xref: /freebsd/sys/netinet/ip_input.c (revision 94df3271d6b2e3511f1ff234bcc16e8b031ce6df)
1c398230bSWarner Losh /*-
251369649SPedro F. Giffuni  * SPDX-License-Identifier: BSD-3-Clause
351369649SPedro F. Giffuni  *
4df8bae1dSRodney W. Grimes  * Copyright (c) 1982, 1986, 1988, 1993
5df8bae1dSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
6df8bae1dSRodney W. Grimes  *
7df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
8df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
9df8bae1dSRodney W. Grimes  * are met:
10df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
11df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
12df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
13df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
14df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
15fbbd9655SWarner Losh  * 3. Neither the name of the University nor the names of its contributors
16df8bae1dSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
17df8bae1dSRodney W. Grimes  *    without specific prior written permission.
18df8bae1dSRodney W. Grimes  *
19df8bae1dSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22df8bae1dSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
30df8bae1dSRodney W. Grimes  *
31df8bae1dSRodney W. Grimes  *	@(#)ip_input.c	8.2 (Berkeley) 1/4/94
32df8bae1dSRodney W. Grimes  */
33df8bae1dSRodney W. Grimes 
344b421e2dSMike Silbersack #include <sys/cdefs.h>
354b421e2dSMike Silbersack __FBSDID("$FreeBSD$");
364b421e2dSMike Silbersack 
370ac40133SBrian Somers #include "opt_bootp.h"
3827108a15SDag-Erling Smørgrav #include "opt_ipstealth.h"
396a800098SYoshinobu Inoue #include "opt_ipsec.h"
4033553d6eSBjoern A. Zeeb #include "opt_route.h"
41b8bc95cdSAdrian Chadd #include "opt_rss.h"
4274a9466cSGary Palmer 
43df8bae1dSRodney W. Grimes #include <sys/param.h>
44df8bae1dSRodney W. Grimes #include <sys/systm.h>
45ef91a976SAndrey V. Elsukov #include <sys/hhook.h>
46df8bae1dSRodney W. Grimes #include <sys/mbuf.h>
47b715f178SLuigi Rizzo #include <sys/malloc.h>
48df8bae1dSRodney W. Grimes #include <sys/domain.h>
49df8bae1dSRodney W. Grimes #include <sys/protosw.h>
50df8bae1dSRodney W. Grimes #include <sys/socket.h>
51df8bae1dSRodney W. Grimes #include <sys/time.h>
52df8bae1dSRodney W. Grimes #include <sys/kernel.h>
53385195c0SMarko Zec #include <sys/lock.h>
54cc0a3c8cSAndrey V. Elsukov #include <sys/rmlock.h>
55385195c0SMarko Zec #include <sys/rwlock.h>
5657f60867SMark Johnston #include <sys/sdt.h>
571025071fSGarrett Wollman #include <sys/syslog.h>
58b5e8ce9fSBruce Evans #include <sys/sysctl.h>
59df8bae1dSRodney W. Grimes 
60df8bae1dSRodney W. Grimes #include <net/if.h>
619494d596SBrooks Davis #include <net/if_types.h>
62d314ad7bSJulian Elischer #include <net/if_var.h>
6382c23ebaSBill Fenner #include <net/if_dl.h>
64b252313fSGleb Smirnoff #include <net/pfil.h>
65df8bae1dSRodney W. Grimes #include <net/route.h>
66983066f0SAlexander V. Chernikov #include <net/route/nhop.h>
67748e0b0aSGarrett Wollman #include <net/netisr.h>
68b2bdc62aSAdrian Chadd #include <net/rss_config.h>
694b79449eSBjoern A. Zeeb #include <net/vnet.h>
70df8bae1dSRodney W. Grimes 
71df8bae1dSRodney W. Grimes #include <netinet/in.h>
7257f60867SMark Johnston #include <netinet/in_kdtrace.h>
73df8bae1dSRodney W. Grimes #include <netinet/in_systm.h>
74b5e8ce9fSBruce Evans #include <netinet/in_var.h>
75df8bae1dSRodney W. Grimes #include <netinet/ip.h>
76983066f0SAlexander V. Chernikov #include <netinet/in_fib.h>
77df8bae1dSRodney W. Grimes #include <netinet/in_pcb.h>
78df8bae1dSRodney W. Grimes #include <netinet/ip_var.h>
79eddfbb76SRobert Watson #include <netinet/ip_fw.h>
80df8bae1dSRodney W. Grimes #include <netinet/ip_icmp.h>
81ef39adf0SAndre Oppermann #include <netinet/ip_options.h>
8258938916SGarrett Wollman #include <machine/in_cksum.h>
83a9771948SGleb Smirnoff #include <netinet/ip_carp.h>
84b8bc95cdSAdrian Chadd #include <netinet/in_rss.h>
8565634ae7SWojciech Macek #include <netinet/ip_mroute.h>
86df8bae1dSRodney W. Grimes 
87fcf59617SAndrey V. Elsukov #include <netipsec/ipsec_support.h>
88fcf59617SAndrey V. Elsukov 
89f0068c4aSGarrett Wollman #include <sys/socketvar.h>
906ddbf1e2SGary Palmer 
91aed55708SRobert Watson #include <security/mac/mac_framework.h>
92aed55708SRobert Watson 
93d2035ffbSEd Maste #ifdef CTASSERT
94d2035ffbSEd Maste CTASSERT(sizeof(struct ip) == 20);
95d2035ffbSEd Maste #endif
96d2035ffbSEd Maste 
971dbefcc0SGleb Smirnoff /* IP reassembly functions are defined in ip_reass.c. */
98843b0e57SXin LI extern void ipreass_init(void);
99843b0e57SXin LI extern void ipreass_drain(void);
100843b0e57SXin LI extern void ipreass_slowtimo(void);
1011dbefcc0SGleb Smirnoff #ifdef VIMAGE
102843b0e57SXin LI extern void ipreass_destroy(void);
1031dbefcc0SGleb Smirnoff #endif
1041dbefcc0SGleb Smirnoff 
10582cea7e6SBjoern A. Zeeb VNET_DEFINE(int, rsvp_on);
10682cea7e6SBjoern A. Zeeb 
10782cea7e6SBjoern A. Zeeb VNET_DEFINE(int, ipforwarding);
1086df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_VNET | CTLFLAG_RW,
109eddfbb76SRobert Watson     &VNET_NAME(ipforwarding), 0,
1108b615593SMarko Zec     "Enable IP forwarding between interfaces");
1110312fbe9SPoul-Henning Kamp 
1128ad114c0SGeorge V. Neville-Neil /*
1138ad114c0SGeorge V. Neville-Neil  * Respond with an ICMP host redirect when we forward a packet out of
1148ad114c0SGeorge V. Neville-Neil  * the same interface on which it was received.  See RFC 792.
1158ad114c0SGeorge V. Neville-Neil  */
1168ad114c0SGeorge V. Neville-Neil VNET_DEFINE(int, ipsendredirects) = 1;
1176df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_VNET | CTLFLAG_RW,
118eddfbb76SRobert Watson     &VNET_NAME(ipsendredirects), 0,
1198b615593SMarko Zec     "Enable sending IP redirects");
1200312fbe9SPoul-Henning Kamp 
121*94df3271SGleb Smirnoff VNET_DEFINE_STATIC(bool, ip_strong_es) = false;
122*94df3271SGleb Smirnoff #define	V_ip_strong_es	VNET(ip_strong_es)
123*94df3271SGleb Smirnoff SYSCTL_BOOL(_net_inet_ip, OID_AUTO, rfc1122_strong_es,
124*94df3271SGleb Smirnoff     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip_strong_es), false,
125*94df3271SGleb Smirnoff     "Packet's IP destination address must match address on arrival interface");
126b3e95d4eSJonathan Lemon 
127b252313fSGleb Smirnoff VNET_DEFINE(pfil_head_t, inet_pfil_head);	/* Packet filter hooks */
128df8bae1dSRodney W. Grimes 
129d4b5cae4SRobert Watson static struct netisr_handler ip_nh = {
130d4b5cae4SRobert Watson 	.nh_name = "ip",
131d4b5cae4SRobert Watson 	.nh_handler = ip_input,
132d4b5cae4SRobert Watson 	.nh_proto = NETISR_IP,
133b8bc95cdSAdrian Chadd #ifdef	RSS
1342527ccadSAdrian Chadd 	.nh_m2cpuid = rss_soft_m2cpuid_v4,
135b8bc95cdSAdrian Chadd 	.nh_policy = NETISR_POLICY_CPU,
136b8bc95cdSAdrian Chadd 	.nh_dispatch = NETISR_DISPATCH_HYBRID,
137b8bc95cdSAdrian Chadd #else
138d4b5cae4SRobert Watson 	.nh_policy = NETISR_POLICY_FLOW,
139b8bc95cdSAdrian Chadd #endif
140d4b5cae4SRobert Watson };
141ca925d9cSJonathan Lemon 
142b8bc95cdSAdrian Chadd #ifdef	RSS
143b8bc95cdSAdrian Chadd /*
144b8bc95cdSAdrian Chadd  * Directly dispatched frames are currently assumed
145b8bc95cdSAdrian Chadd  * to have a flowid already calculated.
146b8bc95cdSAdrian Chadd  *
147b8bc95cdSAdrian Chadd  * It should likely have something that assert it
148b8bc95cdSAdrian Chadd  * actually has valid flow details.
149b8bc95cdSAdrian Chadd  */
150b8bc95cdSAdrian Chadd static struct netisr_handler ip_direct_nh = {
151b8bc95cdSAdrian Chadd 	.nh_name = "ip_direct",
152b8bc95cdSAdrian Chadd 	.nh_handler = ip_direct_input,
153b8bc95cdSAdrian Chadd 	.nh_proto = NETISR_IP_DIRECT,
154499baf0aSAdrian Chadd 	.nh_m2cpuid = rss_soft_m2cpuid_v4,
155b8bc95cdSAdrian Chadd 	.nh_policy = NETISR_POLICY_CPU,
156b8bc95cdSAdrian Chadd 	.nh_dispatch = NETISR_DISPATCH_HYBRID,
157b8bc95cdSAdrian Chadd };
158b8bc95cdSAdrian Chadd #endif
159b8bc95cdSAdrian Chadd 
160df8bae1dSRodney W. Grimes extern	struct domain inetdomain;
161f0ffb944SJulian Elischer extern	struct protosw inetsw[];
162df8bae1dSRodney W. Grimes u_char	ip_protox[IPPROTO_MAX];
16382cea7e6SBjoern A. Zeeb VNET_DEFINE(struct in_ifaddrhead, in_ifaddrhead);  /* first inet address */
16482cea7e6SBjoern A. Zeeb VNET_DEFINE(struct in_ifaddrhashhead *, in_ifaddrhashtbl); /* inet addr hash table  */
16582cea7e6SBjoern A. Zeeb VNET_DEFINE(u_long, in_ifaddrhmask);		/* mask for hash table */
166ca925d9cSJonathan Lemon 
167c8ee75f2SGleb Smirnoff /* Make sure it is safe to use hashinit(9) on CK_LIST. */
168c8ee75f2SGleb Smirnoff CTASSERT(sizeof(struct in_ifaddrhashhead) == sizeof(LIST_HEAD(, in_addr)));
169c8ee75f2SGleb Smirnoff 
1700312fbe9SPoul-Henning Kamp #ifdef IPCTL_DEFMTU
1710312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW,
1723d177f46SBill Fumerola     &ip_mtu, 0, "Default MTU");
1730312fbe9SPoul-Henning Kamp #endif
1740312fbe9SPoul-Henning Kamp 
1751b968362SDag-Erling Smørgrav #ifdef IPSTEALTH
17682cea7e6SBjoern A. Zeeb VNET_DEFINE(int, ipstealth);
1776df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_VNET | CTLFLAG_RW,
178eddfbb76SRobert Watson     &VNET_NAME(ipstealth), 0,
179eddfbb76SRobert Watson     "IP stealth mode, no TTL decrementation on forwarding");
1801b968362SDag-Erling Smørgrav #endif
181eddfbb76SRobert Watson 
182315e3e38SRobert Watson /*
1835da0521fSAndrey V. Elsukov  * IP statistics are stored in the "array" of counter(9)s.
1845923c293SGleb Smirnoff  */
1855da0521fSAndrey V. Elsukov VNET_PCPUSTAT_DEFINE(struct ipstat, ipstat);
1865da0521fSAndrey V. Elsukov VNET_PCPUSTAT_SYSINIT(ipstat);
1875da0521fSAndrey V. Elsukov SYSCTL_VNET_PCPUSTAT(_net_inet_ip, IPCTL_STATS, stats, struct ipstat, ipstat,
1885da0521fSAndrey V. Elsukov     "IP statistics (struct ipstat, netinet/ip_var.h)");
1895923c293SGleb Smirnoff 
1905923c293SGleb Smirnoff #ifdef VIMAGE
1915da0521fSAndrey V. Elsukov VNET_PCPUSTAT_SYSUNINIT(ipstat);
1925923c293SGleb Smirnoff #endif /* VIMAGE */
1935923c293SGleb Smirnoff 
1945923c293SGleb Smirnoff /*
195315e3e38SRobert Watson  * Kernel module interface for updating ipstat.  The argument is an index
1965923c293SGleb Smirnoff  * into ipstat treated as an array.
197315e3e38SRobert Watson  */
198315e3e38SRobert Watson void
199315e3e38SRobert Watson kmod_ipstat_inc(int statnum)
200315e3e38SRobert Watson {
201315e3e38SRobert Watson 
2025da0521fSAndrey V. Elsukov 	counter_u64_add(VNET(ipstat)[statnum], 1);
203315e3e38SRobert Watson }
204315e3e38SRobert Watson 
205315e3e38SRobert Watson void
206315e3e38SRobert Watson kmod_ipstat_dec(int statnum)
207315e3e38SRobert Watson {
208315e3e38SRobert Watson 
2095da0521fSAndrey V. Elsukov 	counter_u64_add(VNET(ipstat)[statnum], -1);
210315e3e38SRobert Watson }
211315e3e38SRobert Watson 
212d4b5cae4SRobert Watson static int
213d4b5cae4SRobert Watson sysctl_netinet_intr_queue_maxlen(SYSCTL_HANDLER_ARGS)
214d4b5cae4SRobert Watson {
215d4b5cae4SRobert Watson 	int error, qlimit;
216d4b5cae4SRobert Watson 
217d4b5cae4SRobert Watson 	netisr_getqlimit(&ip_nh, &qlimit);
218d4b5cae4SRobert Watson 	error = sysctl_handle_int(oidp, &qlimit, 0, req);
219d4b5cae4SRobert Watson 	if (error || !req->newptr)
220d4b5cae4SRobert Watson 		return (error);
221d4b5cae4SRobert Watson 	if (qlimit < 1)
222d4b5cae4SRobert Watson 		return (EINVAL);
223d4b5cae4SRobert Watson 	return (netisr_setqlimit(&ip_nh, qlimit));
224d4b5cae4SRobert Watson }
225d4b5cae4SRobert Watson SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen,
2267029da5cSPawel Biernacki     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0,
2277029da5cSPawel Biernacki     sysctl_netinet_intr_queue_maxlen, "I",
228d4b5cae4SRobert Watson     "Maximum size of the IP input queue");
229d4b5cae4SRobert Watson 
230d4b5cae4SRobert Watson static int
231d4b5cae4SRobert Watson sysctl_netinet_intr_queue_drops(SYSCTL_HANDLER_ARGS)
232d4b5cae4SRobert Watson {
233d4b5cae4SRobert Watson 	u_int64_t qdrops_long;
234d4b5cae4SRobert Watson 	int error, qdrops;
235d4b5cae4SRobert Watson 
236d4b5cae4SRobert Watson 	netisr_getqdrops(&ip_nh, &qdrops_long);
237d4b5cae4SRobert Watson 	qdrops = qdrops_long;
238d4b5cae4SRobert Watson 	error = sysctl_handle_int(oidp, &qdrops, 0, req);
239d4b5cae4SRobert Watson 	if (error || !req->newptr)
240d4b5cae4SRobert Watson 		return (error);
241d4b5cae4SRobert Watson 	if (qdrops != 0)
242d4b5cae4SRobert Watson 		return (EINVAL);
243d4b5cae4SRobert Watson 	netisr_clearqdrops(&ip_nh);
244d4b5cae4SRobert Watson 	return (0);
245d4b5cae4SRobert Watson }
246d4b5cae4SRobert Watson 
247d4b5cae4SRobert Watson SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops,
2487029da5cSPawel Biernacki     CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE,
2497029da5cSPawel Biernacki     0, 0, sysctl_netinet_intr_queue_drops, "I",
250d4b5cae4SRobert Watson     "Number of packets dropped from the IP input queue");
251d4b5cae4SRobert Watson 
252b8bc95cdSAdrian Chadd #ifdef	RSS
253b8bc95cdSAdrian Chadd static int
254b8bc95cdSAdrian Chadd sysctl_netinet_intr_direct_queue_maxlen(SYSCTL_HANDLER_ARGS)
255b8bc95cdSAdrian Chadd {
256b8bc95cdSAdrian Chadd 	int error, qlimit;
257b8bc95cdSAdrian Chadd 
258b8bc95cdSAdrian Chadd 	netisr_getqlimit(&ip_direct_nh, &qlimit);
259b8bc95cdSAdrian Chadd 	error = sysctl_handle_int(oidp, &qlimit, 0, req);
260b8bc95cdSAdrian Chadd 	if (error || !req->newptr)
261b8bc95cdSAdrian Chadd 		return (error);
262b8bc95cdSAdrian Chadd 	if (qlimit < 1)
263b8bc95cdSAdrian Chadd 		return (EINVAL);
264b8bc95cdSAdrian Chadd 	return (netisr_setqlimit(&ip_direct_nh, qlimit));
265b8bc95cdSAdrian Chadd }
2667faa0d21SAndrey V. Elsukov SYSCTL_PROC(_net_inet_ip, IPCTL_INTRDQMAXLEN, intr_direct_queue_maxlen,
2677029da5cSPawel Biernacki     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
2687029da5cSPawel Biernacki     0, 0, sysctl_netinet_intr_direct_queue_maxlen,
2697faa0d21SAndrey V. Elsukov     "I", "Maximum size of the IP direct input queue");
270b8bc95cdSAdrian Chadd 
271b8bc95cdSAdrian Chadd static int
272b8bc95cdSAdrian Chadd sysctl_netinet_intr_direct_queue_drops(SYSCTL_HANDLER_ARGS)
273b8bc95cdSAdrian Chadd {
274b8bc95cdSAdrian Chadd 	u_int64_t qdrops_long;
275b8bc95cdSAdrian Chadd 	int error, qdrops;
276b8bc95cdSAdrian Chadd 
277b8bc95cdSAdrian Chadd 	netisr_getqdrops(&ip_direct_nh, &qdrops_long);
278b8bc95cdSAdrian Chadd 	qdrops = qdrops_long;
279b8bc95cdSAdrian Chadd 	error = sysctl_handle_int(oidp, &qdrops, 0, req);
280b8bc95cdSAdrian Chadd 	if (error || !req->newptr)
281b8bc95cdSAdrian Chadd 		return (error);
282b8bc95cdSAdrian Chadd 	if (qdrops != 0)
283b8bc95cdSAdrian Chadd 		return (EINVAL);
284b8bc95cdSAdrian Chadd 	netisr_clearqdrops(&ip_direct_nh);
285b8bc95cdSAdrian Chadd 	return (0);
286b8bc95cdSAdrian Chadd }
287b8bc95cdSAdrian Chadd 
2887faa0d21SAndrey V. Elsukov SYSCTL_PROC(_net_inet_ip, IPCTL_INTRDQDROPS, intr_direct_queue_drops,
2897029da5cSPawel Biernacki     CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0,
2907029da5cSPawel Biernacki     sysctl_netinet_intr_direct_queue_drops, "I",
291b8bc95cdSAdrian Chadd     "Number of packets dropped from the IP direct input queue");
292b8bc95cdSAdrian Chadd #endif	/* RSS */
293b8bc95cdSAdrian Chadd 
294df8bae1dSRodney W. Grimes /*
295df8bae1dSRodney W. Grimes  * IP initialization: fill in IP protocol switch table.
296df8bae1dSRodney W. Grimes  * All protocols not implemented in kernel go to raw IP protocol handler.
297df8bae1dSRodney W. Grimes  */
298df8bae1dSRodney W. Grimes void
299f2565d68SRobert Watson ip_init(void)
300df8bae1dSRodney W. Grimes {
301b252313fSGleb Smirnoff 	struct pfil_head_args args;
302f2565d68SRobert Watson 	struct protosw *pr;
303f2565d68SRobert Watson 	int i;
304df8bae1dSRodney W. Grimes 
305d7c5a620SMatt Macy 	CK_STAILQ_INIT(&V_in_ifaddrhead);
306603724d3SBjoern A. Zeeb 	V_in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &V_in_ifaddrhmask);
3071ed81b73SMarko Zec 
3081ed81b73SMarko Zec 	/* Initialize IP reassembly queue. */
3091dbefcc0SGleb Smirnoff 	ipreass_init();
3101ed81b73SMarko Zec 
3110b4b0b0fSJulian Elischer 	/* Initialize packet filter hooks. */
312b252313fSGleb Smirnoff 	args.pa_version = PFIL_VERSION;
313b252313fSGleb Smirnoff 	args.pa_flags = PFIL_IN | PFIL_OUT;
314b252313fSGleb Smirnoff 	args.pa_type = PFIL_TYPE_IP4;
315b252313fSGleb Smirnoff 	args.pa_headname = PFIL_INET_NAME;
316b252313fSGleb Smirnoff 	V_inet_pfil_head = pfil_head_register(&args);
3170b4b0b0fSJulian Elischer 
318ef91a976SAndrey V. Elsukov 	if (hhook_head_register(HHOOK_TYPE_IPSEC_IN, AF_INET,
319ef91a976SAndrey V. Elsukov 	    &V_ipsec_hhh_in[HHOOK_IPSEC_INET],
320ef91a976SAndrey V. Elsukov 	    HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0)
321ef91a976SAndrey V. Elsukov 		printf("%s: WARNING: unable to register input helper hook\n",
322ef91a976SAndrey V. Elsukov 		    __func__);
323ef91a976SAndrey V. Elsukov 	if (hhook_head_register(HHOOK_TYPE_IPSEC_OUT, AF_INET,
324ef91a976SAndrey V. Elsukov 	    &V_ipsec_hhh_out[HHOOK_IPSEC_INET],
325ef91a976SAndrey V. Elsukov 	    HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0)
326ef91a976SAndrey V. Elsukov 		printf("%s: WARNING: unable to register output helper hook\n",
327ef91a976SAndrey V. Elsukov 		    __func__);
328ef91a976SAndrey V. Elsukov 
3291ed81b73SMarko Zec 	/* Skip initialization of globals for non-default instances. */
330484149deSBjoern A. Zeeb #ifdef VIMAGE
331484149deSBjoern A. Zeeb 	if (!IS_DEFAULT_VNET(curvnet)) {
332484149deSBjoern A. Zeeb 		netisr_register_vnet(&ip_nh);
333484149deSBjoern A. Zeeb #ifdef	RSS
334484149deSBjoern A. Zeeb 		netisr_register_vnet(&ip_direct_nh);
335484149deSBjoern A. Zeeb #endif
3361ed81b73SMarko Zec 		return;
337484149deSBjoern A. Zeeb 	}
338484149deSBjoern A. Zeeb #endif
3391ed81b73SMarko Zec 
340f0ffb944SJulian Elischer 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
34102410549SRobert Watson 	if (pr == NULL)
342db09bef3SAndre Oppermann 		panic("ip_init: PF_INET not found");
343db09bef3SAndre Oppermann 
344db09bef3SAndre Oppermann 	/* Initialize the entire ip_protox[] array to IPPROTO_RAW. */
345df8bae1dSRodney W. Grimes 	for (i = 0; i < IPPROTO_MAX; i++)
346df8bae1dSRodney W. Grimes 		ip_protox[i] = pr - inetsw;
347db09bef3SAndre Oppermann 	/*
348db09bef3SAndre Oppermann 	 * Cycle through IP protocols and put them into the appropriate place
349db09bef3SAndre Oppermann 	 * in ip_protox[].
350db09bef3SAndre Oppermann 	 */
351f0ffb944SJulian Elischer 	for (pr = inetdomain.dom_protosw;
352f0ffb944SJulian Elischer 	    pr < inetdomain.dom_protoswNPROTOSW; pr++)
353df8bae1dSRodney W. Grimes 		if (pr->pr_domain->dom_family == PF_INET &&
354db09bef3SAndre Oppermann 		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) {
355db09bef3SAndre Oppermann 			/* Be careful to only index valid IP protocols. */
356db77984cSSam Leffler 			if (pr->pr_protocol < IPPROTO_MAX)
357df8bae1dSRodney W. Grimes 				ip_protox[pr->pr_protocol] = pr - inetsw;
358db09bef3SAndre Oppermann 		}
359194a213eSAndrey A. Chernov 
360d4b5cae4SRobert Watson 	netisr_register(&ip_nh);
361b8bc95cdSAdrian Chadd #ifdef	RSS
362b8bc95cdSAdrian Chadd 	netisr_register(&ip_direct_nh);
363b8bc95cdSAdrian Chadd #endif
364df8bae1dSRodney W. Grimes }
365df8bae1dSRodney W. Grimes 
3669802380eSBjoern A. Zeeb #ifdef VIMAGE
3673f58662dSBjoern A. Zeeb static void
3683f58662dSBjoern A. Zeeb ip_destroy(void *unused __unused)
3699802380eSBjoern A. Zeeb {
370ef91a976SAndrey V. Elsukov 	int error;
3714d3dfd45SMikolaj Golub 
372484149deSBjoern A. Zeeb #ifdef	RSS
373484149deSBjoern A. Zeeb 	netisr_unregister_vnet(&ip_direct_nh);
374484149deSBjoern A. Zeeb #endif
375484149deSBjoern A. Zeeb 	netisr_unregister_vnet(&ip_nh);
376484149deSBjoern A. Zeeb 
377b252313fSGleb Smirnoff 	pfil_head_unregister(V_inet_pfil_head);
378ef91a976SAndrey V. Elsukov 	error = hhook_head_deregister(V_ipsec_hhh_in[HHOOK_IPSEC_INET]);
379ef91a976SAndrey V. Elsukov 	if (error != 0) {
380ef91a976SAndrey V. Elsukov 		printf("%s: WARNING: unable to deregister input helper hook "
381ef91a976SAndrey V. Elsukov 		    "type HHOOK_TYPE_IPSEC_IN, id HHOOK_IPSEC_INET: "
382ef91a976SAndrey V. Elsukov 		    "error %d returned\n", __func__, error);
383ef91a976SAndrey V. Elsukov 	}
384ef91a976SAndrey V. Elsukov 	error = hhook_head_deregister(V_ipsec_hhh_out[HHOOK_IPSEC_INET]);
385ef91a976SAndrey V. Elsukov 	if (error != 0) {
386ef91a976SAndrey V. Elsukov 		printf("%s: WARNING: unable to deregister output helper hook "
387ef91a976SAndrey V. Elsukov 		    "type HHOOK_TYPE_IPSEC_OUT, id HHOOK_IPSEC_INET: "
388ef91a976SAndrey V. Elsukov 		    "error %d returned\n", __func__, error);
389ef91a976SAndrey V. Elsukov 	}
39089856f7eSBjoern A. Zeeb 
39189856f7eSBjoern A. Zeeb 	/* Remove the IPv4 addresses from all interfaces. */
39289856f7eSBjoern A. Zeeb 	in_ifscrub_all();
39389856f7eSBjoern A. Zeeb 
39489856f7eSBjoern A. Zeeb 	/* Make sure the IPv4 routes are gone as well. */
395b1d63265SAlexander V. Chernikov 	rib_flush_routes_family(AF_INET);
3969802380eSBjoern A. Zeeb 
397e3c2c634SGleb Smirnoff 	/* Destroy IP reassembly queue. */
3981dbefcc0SGleb Smirnoff 	ipreass_destroy();
39989856f7eSBjoern A. Zeeb 
40089856f7eSBjoern A. Zeeb 	/* Cleanup in_ifaddr hash table; should be empty. */
40189856f7eSBjoern A. Zeeb 	hashdestroy(V_in_ifaddrhashtbl, M_IFADDR, V_in_ifaddrhmask);
4029802380eSBjoern A. Zeeb }
4033f58662dSBjoern A. Zeeb 
4043f58662dSBjoern A. Zeeb VNET_SYSUNINIT(ip, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, ip_destroy, NULL);
4059802380eSBjoern A. Zeeb #endif
4069802380eSBjoern A. Zeeb 
407b8bc95cdSAdrian Chadd #ifdef	RSS
408b8bc95cdSAdrian Chadd /*
409b8bc95cdSAdrian Chadd  * IP direct input routine.
410b8bc95cdSAdrian Chadd  *
411b8bc95cdSAdrian Chadd  * This is called when reinjecting completed fragments where
412b8bc95cdSAdrian Chadd  * all of the previous checking and book-keeping has been done.
413b8bc95cdSAdrian Chadd  */
414b8bc95cdSAdrian Chadd void
415b8bc95cdSAdrian Chadd ip_direct_input(struct mbuf *m)
416b8bc95cdSAdrian Chadd {
417b8bc95cdSAdrian Chadd 	struct ip *ip;
418b8bc95cdSAdrian Chadd 	int hlen;
419b8bc95cdSAdrian Chadd 
420b8bc95cdSAdrian Chadd 	ip = mtod(m, struct ip *);
421b8bc95cdSAdrian Chadd 	hlen = ip->ip_hl << 2;
422b8bc95cdSAdrian Chadd 
423fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT)
424fcf59617SAndrey V. Elsukov 	if (IPSEC_ENABLED(ipv4)) {
425fcf59617SAndrey V. Elsukov 		if (IPSEC_INPUT(ipv4, m, hlen, ip->ip_p) != 0)
426fcf59617SAndrey V. Elsukov 			return;
427fcf59617SAndrey V. Elsukov 	}
428fcf59617SAndrey V. Elsukov #endif /* IPSEC */
429b8bc95cdSAdrian Chadd 	IPSTAT_INC(ips_delivered);
430b8bc95cdSAdrian Chadd 	(*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p);
431b8bc95cdSAdrian Chadd 	return;
432b8bc95cdSAdrian Chadd }
433b8bc95cdSAdrian Chadd #endif
434b8bc95cdSAdrian Chadd 
4354d2e3692SLuigi Rizzo /*
436df8bae1dSRodney W. Grimes  * Ip input routine.  Checksum and byte swap header.  If fragmented
437df8bae1dSRodney W. Grimes  * try to reassemble.  Process options.  Pass to next level.
438df8bae1dSRodney W. Grimes  */
439c67b1d17SGarrett Wollman void
440c67b1d17SGarrett Wollman ip_input(struct mbuf *m)
441df8bae1dSRodney W. Grimes {
44265634ae7SWojciech Macek 	MROUTER_RLOCK_TRACKER;
4439188b4a1SAndre Oppermann 	struct ip *ip = NULL;
4445da9f8faSJosef Karthauser 	struct in_ifaddr *ia = NULL;
445ca925d9cSJonathan Lemon 	struct ifaddr *ifa;
4460aade26eSRobert Watson 	struct ifnet *ifp;
447*94df3271SGleb Smirnoff 	int hlen = 0;
44821d172a3SGleb Smirnoff 	uint16_t sum, ip_len;
44902c1c707SAndre Oppermann 	int dchg = 0;				/* dest changed after fw */
450f51f805fSSam Leffler 	struct in_addr odst;			/* original dst address */
451*94df3271SGleb Smirnoff 	bool strong_es;
452b715f178SLuigi Rizzo 
453fe584538SDag-Erling Smørgrav 	M_ASSERTPKTHDR(m);
454b8a6e03fSGleb Smirnoff 	NET_EPOCH_ASSERT();
455db40007dSAndrew R. Reiter 
456ac9d7e26SMax Laier 	if (m->m_flags & M_FASTFWD_OURS) {
45776ff6dcfSAndre Oppermann 		m->m_flags &= ~M_FASTFWD_OURS;
45876ff6dcfSAndre Oppermann 		/* Set up some basics that will be used later. */
4592b25acc1SLuigi Rizzo 		ip = mtod(m, struct ip *);
46053be11f6SPoul-Henning Kamp 		hlen = ip->ip_hl << 2;
4618f134647SGleb Smirnoff 		ip_len = ntohs(ip->ip_len);
4629b932e9eSAndre Oppermann 		goto ours;
4632b25acc1SLuigi Rizzo 	}
4642b25acc1SLuigi Rizzo 
46586425c62SRobert Watson 	IPSTAT_INC(ips_total);
46658938916SGarrett Wollman 
4670359e7a5SMateusz Guzik 	if (__predict_false(m->m_pkthdr.len < sizeof(struct ip)))
46858938916SGarrett Wollman 		goto tooshort;
46958938916SGarrett Wollman 
4700359e7a5SMateusz Guzik 	if (m->m_len < sizeof(struct ip)) {
4710359e7a5SMateusz Guzik 		m = m_pullup(m, sizeof(struct ip));
4720359e7a5SMateusz Guzik 		if (__predict_false(m == NULL)) {
47386425c62SRobert Watson 			IPSTAT_INC(ips_toosmall);
474c67b1d17SGarrett Wollman 			return;
475df8bae1dSRodney W. Grimes 		}
4760359e7a5SMateusz Guzik 	}
477df8bae1dSRodney W. Grimes 	ip = mtod(m, struct ip *);
47858938916SGarrett Wollman 
4790359e7a5SMateusz Guzik 	if (__predict_false(ip->ip_v != IPVERSION)) {
48086425c62SRobert Watson 		IPSTAT_INC(ips_badvers);
481df8bae1dSRodney W. Grimes 		goto bad;
482df8bae1dSRodney W. Grimes 	}
48358938916SGarrett Wollman 
48453be11f6SPoul-Henning Kamp 	hlen = ip->ip_hl << 2;
4850359e7a5SMateusz Guzik 	if (__predict_false(hlen < sizeof(struct ip))) {	/* minimum header length */
48686425c62SRobert Watson 		IPSTAT_INC(ips_badhlen);
487df8bae1dSRodney W. Grimes 		goto bad;
488df8bae1dSRodney W. Grimes 	}
489df8bae1dSRodney W. Grimes 	if (hlen > m->m_len) {
4900359e7a5SMateusz Guzik 		m = m_pullup(m, hlen);
4910359e7a5SMateusz Guzik 		if (__predict_false(m == NULL)) {
49286425c62SRobert Watson 			IPSTAT_INC(ips_badhlen);
493c67b1d17SGarrett Wollman 			return;
494df8bae1dSRodney W. Grimes 		}
495df8bae1dSRodney W. Grimes 		ip = mtod(m, struct ip *);
496df8bae1dSRodney W. Grimes 	}
49733841545SHajimu UMEMOTO 
49857f60867SMark Johnston 	IP_PROBE(receive, NULL, NULL, ip, m->m_pkthdr.rcvif, ip, NULL);
49957f60867SMark Johnston 
5006c1c6ae5SRodney W. Grimes 	/* IN_LOOPBACK must not appear on the wire - RFC1122 */
5010aade26eSRobert Watson 	ifp = m->m_pkthdr.rcvif;
5026c1c6ae5SRodney W. Grimes 	if (IN_LOOPBACK(ntohl(ip->ip_dst.s_addr)) ||
5036c1c6ae5SRodney W. Grimes 	    IN_LOOPBACK(ntohl(ip->ip_src.s_addr))) {
5040aade26eSRobert Watson 		if ((ifp->if_flags & IFF_LOOPBACK) == 0) {
50586425c62SRobert Watson 			IPSTAT_INC(ips_badaddr);
50633841545SHajimu UMEMOTO 			goto bad;
50733841545SHajimu UMEMOTO 		}
50833841545SHajimu UMEMOTO 	}
50933841545SHajimu UMEMOTO 
510db4f9cc7SJonathan Lemon 	if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
511db4f9cc7SJonathan Lemon 		sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
512db4f9cc7SJonathan Lemon 	} else {
51358938916SGarrett Wollman 		if (hlen == sizeof(struct ip)) {
51447c861ecSBrian Somers 			sum = in_cksum_hdr(ip);
51558938916SGarrett Wollman 		} else {
51647c861ecSBrian Somers 			sum = in_cksum(m, hlen);
51758938916SGarrett Wollman 		}
518db4f9cc7SJonathan Lemon 	}
5190359e7a5SMateusz Guzik 	if (__predict_false(sum)) {
52086425c62SRobert Watson 		IPSTAT_INC(ips_badsum);
521df8bae1dSRodney W. Grimes 		goto bad;
522df8bae1dSRodney W. Grimes 	}
523df8bae1dSRodney W. Grimes 
52402b199f1SMax Laier #ifdef ALTQ
52502b199f1SMax Laier 	if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0)
52602b199f1SMax Laier 		/* packet is dropped by traffic conditioner */
52702b199f1SMax Laier 		return;
52802b199f1SMax Laier #endif
52902b199f1SMax Laier 
53021d172a3SGleb Smirnoff 	ip_len = ntohs(ip->ip_len);
5310359e7a5SMateusz Guzik 	if (__predict_false(ip_len < hlen)) {
53286425c62SRobert Watson 		IPSTAT_INC(ips_badlen);
533df8bae1dSRodney W. Grimes 		goto bad;
534df8bae1dSRodney W. Grimes 	}
535df8bae1dSRodney W. Grimes 
536df8bae1dSRodney W. Grimes 	/*
537df8bae1dSRodney W. Grimes 	 * Check that the amount of data in the buffers
538df8bae1dSRodney W. Grimes 	 * is as at least much as the IP header would have us expect.
539df8bae1dSRodney W. Grimes 	 * Trim mbufs if longer than we expect.
540df8bae1dSRodney W. Grimes 	 * Drop packet if shorter than we expect.
541df8bae1dSRodney W. Grimes 	 */
5420359e7a5SMateusz Guzik 	if (__predict_false(m->m_pkthdr.len < ip_len)) {
54358938916SGarrett Wollman tooshort:
54486425c62SRobert Watson 		IPSTAT_INC(ips_tooshort);
545df8bae1dSRodney W. Grimes 		goto bad;
546df8bae1dSRodney W. Grimes 	}
54721d172a3SGleb Smirnoff 	if (m->m_pkthdr.len > ip_len) {
548df8bae1dSRodney W. Grimes 		if (m->m_len == m->m_pkthdr.len) {
54921d172a3SGleb Smirnoff 			m->m_len = ip_len;
55021d172a3SGleb Smirnoff 			m->m_pkthdr.len = ip_len;
551df8bae1dSRodney W. Grimes 		} else
55221d172a3SGleb Smirnoff 			m_adj(m, ip_len - m->m_pkthdr.len);
553df8bae1dSRodney W. Grimes 	}
554b8bc95cdSAdrian Chadd 
555ad9f4d6aSAndrey V. Elsukov 	/*
556ad9f4d6aSAndrey V. Elsukov 	 * Try to forward the packet, but if we fail continue.
55762484790SAndrey V. Elsukov 	 * ip_tryforward() does not generate redirects, so fall
55862484790SAndrey V. Elsukov 	 * through to normal processing if redirects are required.
559ad9f4d6aSAndrey V. Elsukov 	 * ip_tryforward() does inbound and outbound packet firewall
560ad9f4d6aSAndrey V. Elsukov 	 * processing. If firewall has decided that destination becomes
561ad9f4d6aSAndrey V. Elsukov 	 * our local address, it sets M_FASTFWD_OURS flag. In this
562ad9f4d6aSAndrey V. Elsukov 	 * case skip another inbound firewall processing and update
563ad9f4d6aSAndrey V. Elsukov 	 * ip pointer.
564ad9f4d6aSAndrey V. Elsukov 	 */
5658ad114c0SGeorge V. Neville-Neil 	if (V_ipforwarding != 0
566fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT)
567fcf59617SAndrey V. Elsukov 	    && (!IPSEC_ENABLED(ipv4) ||
568fcf59617SAndrey V. Elsukov 	    IPSEC_CAPS(ipv4, m, IPSEC_CAP_OPERABLE) == 0)
569ad9f4d6aSAndrey V. Elsukov #endif
570ad9f4d6aSAndrey V. Elsukov 	    ) {
571ad9f4d6aSAndrey V. Elsukov 		if ((m = ip_tryforward(m)) == NULL)
57233872124SGeorge V. Neville-Neil 			return;
573ad9f4d6aSAndrey V. Elsukov 		if (m->m_flags & M_FASTFWD_OURS) {
574ad9f4d6aSAndrey V. Elsukov 			m->m_flags &= ~M_FASTFWD_OURS;
575ad9f4d6aSAndrey V. Elsukov 			ip = mtod(m, struct ip *);
576ad9f4d6aSAndrey V. Elsukov 			goto ours;
577ad9f4d6aSAndrey V. Elsukov 		}
578ad9f4d6aSAndrey V. Elsukov 	}
579fcf59617SAndrey V. Elsukov 
580fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT)
58114dd6717SSam Leffler 	/*
582ffe8cd7bSBjoern A. Zeeb 	 * Bypass packet filtering for packets previously handled by IPsec.
58314dd6717SSam Leffler 	 */
584fcf59617SAndrey V. Elsukov 	if (IPSEC_ENABLED(ipv4) &&
585fcf59617SAndrey V. Elsukov 	    IPSEC_CAPS(ipv4, m, IPSEC_CAP_BYPASS_FILTER) != 0)
586c21fd232SAndre Oppermann 			goto passin;
587ad9f4d6aSAndrey V. Elsukov #endif
588fcf59617SAndrey V. Elsukov 
589c4ac87eaSDarren Reed 	/*
590134ea224SSam Leffler 	 * Run through list of hooks for input packets.
591f51f805fSSam Leffler 	 *
592f51f805fSSam Leffler 	 * NB: Beware of the destination address changing (e.g.
593f51f805fSSam Leffler 	 *     by NAT rewriting).  When this happens, tell
594f51f805fSSam Leffler 	 *     ip_forward to do the right thing.
595c4ac87eaSDarren Reed 	 */
596c21fd232SAndre Oppermann 
597c21fd232SAndre Oppermann 	/* Jump over all PFIL processing if hooks are not active. */
598b252313fSGleb Smirnoff 	if (!PFIL_HOOKED_IN(V_inet_pfil_head))
599c21fd232SAndre Oppermann 		goto passin;
600c21fd232SAndre Oppermann 
601f51f805fSSam Leffler 	odst = ip->ip_dst;
602b252313fSGleb Smirnoff 	if (pfil_run_hooks(V_inet_pfil_head, &m, ifp, PFIL_IN, NULL) !=
603b252313fSGleb Smirnoff 	    PFIL_PASS)
604beec8214SDarren Reed 		return;
605134ea224SSam Leffler 	if (m == NULL)			/* consumed by filter */
606c4ac87eaSDarren Reed 		return;
6079b932e9eSAndre Oppermann 
608c4ac87eaSDarren Reed 	ip = mtod(m, struct ip *);
60902c1c707SAndre Oppermann 	dchg = (odst.s_addr != ip->ip_dst.s_addr);
6100aade26eSRobert Watson 	ifp = m->m_pkthdr.rcvif;
6119b932e9eSAndre Oppermann 
6129b932e9eSAndre Oppermann 	if (m->m_flags & M_FASTFWD_OURS) {
6139b932e9eSAndre Oppermann 		m->m_flags &= ~M_FASTFWD_OURS;
6149b932e9eSAndre Oppermann 		goto ours;
6159b932e9eSAndre Oppermann 	}
616ffdbf9daSAndrey V. Elsukov 	if (m->m_flags & M_IP_NEXTHOP) {
617de89d74bSLuiz Otavio O Souza 		if (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL) {
618099dd043SAndre Oppermann 			/*
619ffdbf9daSAndrey V. Elsukov 			 * Directly ship the packet on.  This allows
620ffdbf9daSAndrey V. Elsukov 			 * forwarding packets originally destined to us
621ffdbf9daSAndrey V. Elsukov 			 * to some other directly connected host.
622099dd043SAndre Oppermann 			 */
623ffdbf9daSAndrey V. Elsukov 			ip_forward(m, 1);
624099dd043SAndre Oppermann 			return;
625099dd043SAndre Oppermann 		}
626ffdbf9daSAndrey V. Elsukov 	}
627c21fd232SAndre Oppermann passin:
62821d172a3SGleb Smirnoff 
62921d172a3SGleb Smirnoff 	/*
630df8bae1dSRodney W. Grimes 	 * Process options and, if not destined for us,
631df8bae1dSRodney W. Grimes 	 * ship it on.  ip_dooptions returns 1 when an
632df8bae1dSRodney W. Grimes 	 * error was detected (causing an icmp message
633df8bae1dSRodney W. Grimes 	 * to be sent and the original packet to be freed).
634df8bae1dSRodney W. Grimes 	 */
6359b932e9eSAndre Oppermann 	if (hlen > sizeof (struct ip) && ip_dooptions(m, 0))
636c67b1d17SGarrett Wollman 		return;
637df8bae1dSRodney W. Grimes 
638f0068c4aSGarrett Wollman         /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no
639f0068c4aSGarrett Wollman          * matter if it is destined to another node, or whether it is
640f0068c4aSGarrett Wollman          * a multicast one, RSVP wants it! and prevents it from being forwarded
641f0068c4aSGarrett Wollman          * anywhere else. Also checks if the rsvp daemon is running before
642f0068c4aSGarrett Wollman 	 * grabbing the packet.
643f0068c4aSGarrett Wollman          */
6440359e7a5SMateusz Guzik 	if (ip->ip_p == IPPROTO_RSVP && V_rsvp_on)
645f0068c4aSGarrett Wollman 		goto ours;
646f0068c4aSGarrett Wollman 
647df8bae1dSRodney W. Grimes 	/*
648df8bae1dSRodney W. Grimes 	 * Check our list of addresses, to see if the packet is for us.
649cc766e04SGarrett Wollman 	 * If we don't have any addresses, assume any unicast packet
650cc766e04SGarrett Wollman 	 * we receive might be for us (and let the upper layers deal
651cc766e04SGarrett Wollman 	 * with it).
652df8bae1dSRodney W. Grimes 	 */
653d7c5a620SMatt Macy 	if (CK_STAILQ_EMPTY(&V_in_ifaddrhead) &&
654cc766e04SGarrett Wollman 	    (m->m_flags & (M_MCAST|M_BCAST)) == 0)
655cc766e04SGarrett Wollman 		goto ours;
656cc766e04SGarrett Wollman 
6577538a9a0SJonathan Lemon 	/*
658823db0e9SDon Lewis 	 * Enable a consistency check between the destination address
659823db0e9SDon Lewis 	 * and the arrival interface for a unicast packet (the RFC 1122
660*94df3271SGleb Smirnoff 	 * strong ES model) with a list of additional predicates:
661*94df3271SGleb Smirnoff 	 * - if IP forwarding is disabled
662*94df3271SGleb Smirnoff 	 * - the packet is not locally generated
663*94df3271SGleb Smirnoff 	 * - the packet is not subject to 'ipfw fwd'
664*94df3271SGleb Smirnoff 	 * - Interface is not running CARP. If the packet got here, we already
665*94df3271SGleb Smirnoff 	 *   checked it with carp_iamatch() and carp_forus().
666823db0e9SDon Lewis 	 */
667*94df3271SGleb Smirnoff 	strong_es = V_ip_strong_es && (V_ipforwarding == 0) &&
6680aade26eSRobert Watson 	    ifp != NULL && ((ifp->if_flags & IFF_LOOPBACK) == 0) &&
66954bfbd51SWill Andrews 	    ifp->if_carp == NULL && (dchg == 0);
670823db0e9SDon Lewis 
671ca925d9cSJonathan Lemon 	/*
672ca925d9cSJonathan Lemon 	 * Check for exact addresses in the hash bucket.
673ca925d9cSJonathan Lemon 	 */
674c8ee75f2SGleb Smirnoff 	CK_LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) {
675*94df3271SGleb Smirnoff 		if (IA_SIN(ia)->sin_addr.s_addr != ip->ip_dst.s_addr)
676*94df3271SGleb Smirnoff 			continue;
677*94df3271SGleb Smirnoff 
678f9e354dfSJulian Elischer 		/*
679*94df3271SGleb Smirnoff 		 * net.inet.ip.rfc1122_strong_es: the address matches, verify
680*94df3271SGleb Smirnoff 		 * that the packet arrived via the correct interface.
681f9e354dfSJulian Elischer 		 */
682*94df3271SGleb Smirnoff 		if (__predict_false(strong_es && ia->ia_ifp != ifp)) {
683*94df3271SGleb Smirnoff 			IPSTAT_INC(ips_badaddr);
684*94df3271SGleb Smirnoff 			goto bad;
685ca925d9cSJonathan Lemon 		}
686*94df3271SGleb Smirnoff 
687*94df3271SGleb Smirnoff 		counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
688*94df3271SGleb Smirnoff 		counter_u64_add(ia->ia_ifa.ifa_ibytes, m->m_pkthdr.len);
689*94df3271SGleb Smirnoff 		goto ours;
6908c0fec80SRobert Watson 	}
6912d9cfabaSRobert Watson 
692823db0e9SDon Lewis 	/*
693ca925d9cSJonathan Lemon 	 * Check for broadcast addresses.
694ca925d9cSJonathan Lemon 	 *
695ca925d9cSJonathan Lemon 	 * Only accept broadcast packets that arrive via the matching
696ca925d9cSJonathan Lemon 	 * interface.  Reception of forwarded directed broadcasts would
697ca925d9cSJonathan Lemon 	 * be handled via ip_forward() and ether_output() with the loopback
698ca925d9cSJonathan Lemon 	 * into the stack for SIMPLEX interfaces handled by ether_output().
699823db0e9SDon Lewis 	 */
7000aade26eSRobert Watson 	if (ifp != NULL && ifp->if_flags & IFF_BROADCAST) {
701d7c5a620SMatt Macy 		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
702ca925d9cSJonathan Lemon 			if (ifa->ifa_addr->sa_family != AF_INET)
703ca925d9cSJonathan Lemon 				continue;
704ca925d9cSJonathan Lemon 			ia = ifatoia(ifa);
705df8bae1dSRodney W. Grimes 			if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
7060aade26eSRobert Watson 			    ip->ip_dst.s_addr) {
7077caf4ab7SGleb Smirnoff 				counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
7087caf4ab7SGleb Smirnoff 				counter_u64_add(ia->ia_ifa.ifa_ibytes,
7097caf4ab7SGleb Smirnoff 				    m->m_pkthdr.len);
710df8bae1dSRodney W. Grimes 				goto ours;
7110aade26eSRobert Watson 			}
7120ac40133SBrian Somers #ifdef BOOTP_COMPAT
7130aade26eSRobert Watson 			if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY) {
7147caf4ab7SGleb Smirnoff 				counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
7157caf4ab7SGleb Smirnoff 				counter_u64_add(ia->ia_ifa.ifa_ibytes,
7167caf4ab7SGleb Smirnoff 				    m->m_pkthdr.len);
717ca925d9cSJonathan Lemon 				goto ours;
7180aade26eSRobert Watson 			}
7190ac40133SBrian Somers #endif
720df8bae1dSRodney W. Grimes 		}
72119e5b0a7SRobert Watson 		ia = NULL;
722df8bae1dSRodney W. Grimes 	}
723df8bae1dSRodney W. Grimes 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
72465634ae7SWojciech Macek 		MROUTER_RLOCK();
72503b0505bSZhenlei Huang 		/*
72603b0505bSZhenlei Huang 		 * RFC 3927 2.7: Do not forward multicast packets from
72703b0505bSZhenlei Huang 		 * IN_LINKLOCAL.
72803b0505bSZhenlei Huang 		 */
7293d846e48SZhenlei Huang 		if (V_ip_mrouter && !IN_LINKLOCAL(ntohl(ip->ip_src.s_addr))) {
730df8bae1dSRodney W. Grimes 			/*
731df8bae1dSRodney W. Grimes 			 * If we are acting as a multicast router, all
732df8bae1dSRodney W. Grimes 			 * incoming multicast packets are passed to the
733df8bae1dSRodney W. Grimes 			 * kernel-level multicast forwarding function.
734df8bae1dSRodney W. Grimes 			 * The packet is returned (relatively) intact; if
735df8bae1dSRodney W. Grimes 			 * ip_mforward() returns a non-zero value, the packet
736df8bae1dSRodney W. Grimes 			 * must be discarded, else it may be accepted below.
737df8bae1dSRodney W. Grimes 			 */
7380aade26eSRobert Watson 			if (ip_mforward && ip_mforward(ip, ifp, m, 0) != 0) {
73965634ae7SWojciech Macek 				MROUTER_RUNLOCK();
74086425c62SRobert Watson 				IPSTAT_INC(ips_cantforward);
741df8bae1dSRodney W. Grimes 				m_freem(m);
742c67b1d17SGarrett Wollman 				return;
743df8bae1dSRodney W. Grimes 			}
744df8bae1dSRodney W. Grimes 
745df8bae1dSRodney W. Grimes 			/*
74611612afaSDima Dorfman 			 * The process-level routing daemon needs to receive
747df8bae1dSRodney W. Grimes 			 * all multicast IGMP packets, whether or not this
748df8bae1dSRodney W. Grimes 			 * host belongs to their destination groups.
749df8bae1dSRodney W. Grimes 			 */
75065634ae7SWojciech Macek 			if (ip->ip_p == IPPROTO_IGMP) {
75165634ae7SWojciech Macek 				MROUTER_RUNLOCK();
752df8bae1dSRodney W. Grimes 				goto ours;
75365634ae7SWojciech Macek 			}
75486425c62SRobert Watson 			IPSTAT_INC(ips_forward);
755df8bae1dSRodney W. Grimes 		}
75665634ae7SWojciech Macek 		MROUTER_RUNLOCK();
757df8bae1dSRodney W. Grimes 		/*
758d10910e6SBruce M Simpson 		 * Assume the packet is for us, to avoid prematurely taking
759d10910e6SBruce M Simpson 		 * a lock on the in_multi hash. Protocols must perform
760d10910e6SBruce M Simpson 		 * their own filtering and update statistics accordingly.
761df8bae1dSRodney W. Grimes 		 */
762df8bae1dSRodney W. Grimes 		goto ours;
763df8bae1dSRodney W. Grimes 	}
764df8bae1dSRodney W. Grimes 	if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST)
765df8bae1dSRodney W. Grimes 		goto ours;
766df8bae1dSRodney W. Grimes 	if (ip->ip_dst.s_addr == INADDR_ANY)
767df8bae1dSRodney W. Grimes 		goto ours;
76803b0505bSZhenlei Huang 	/* RFC 3927 2.7: Do not forward packets to or from IN_LINKLOCAL. */
7693d846e48SZhenlei Huang 	if (IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr)) ||
7703d846e48SZhenlei Huang 	    IN_LINKLOCAL(ntohl(ip->ip_src.s_addr))) {
7713d846e48SZhenlei Huang 		IPSTAT_INC(ips_cantforward);
7723d846e48SZhenlei Huang 		m_freem(m);
7733d846e48SZhenlei Huang 		return;
7743d846e48SZhenlei Huang 	}
775df8bae1dSRodney W. Grimes 
7766a800098SYoshinobu Inoue 	/*
777df8bae1dSRodney W. Grimes 	 * Not for us; forward if possible and desirable.
778df8bae1dSRodney W. Grimes 	 */
779603724d3SBjoern A. Zeeb 	if (V_ipforwarding == 0) {
78086425c62SRobert Watson 		IPSTAT_INC(ips_cantforward);
781df8bae1dSRodney W. Grimes 		m_freem(m);
782546f251bSChris D. Faulhaber 	} else {
7839b932e9eSAndre Oppermann 		ip_forward(m, dchg);
784546f251bSChris D. Faulhaber 	}
785c67b1d17SGarrett Wollman 	return;
786df8bae1dSRodney W. Grimes 
787df8bae1dSRodney W. Grimes ours:
788d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH
789d0ebc0d2SYaroslav Tykhiy 	/*
790d0ebc0d2SYaroslav Tykhiy 	 * IPSTEALTH: Process non-routing options only
791d0ebc0d2SYaroslav Tykhiy 	 * if the packet is destined for us.
792d0ebc0d2SYaroslav Tykhiy 	 */
7937caf4ab7SGleb Smirnoff 	if (V_ipstealth && hlen > sizeof (struct ip) && ip_dooptions(m, 1))
794d0ebc0d2SYaroslav Tykhiy 		return;
795d0ebc0d2SYaroslav Tykhiy #endif /* IPSTEALTH */
796d0ebc0d2SYaroslav Tykhiy 
79763f8d699SJordan K. Hubbard 	/*
798b6ea1aa5SRuslan Ermilov 	 * Attempt reassembly; if it succeeds, proceed.
799ac9d7e26SMax Laier 	 * ip_reass() will return a different mbuf.
800df8bae1dSRodney W. Grimes 	 */
8018f134647SGleb Smirnoff 	if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) {
802aa69c612SGleb Smirnoff 		/* XXXGL: shouldn't we save & set m_flags? */
803f0cada84SAndre Oppermann 		m = ip_reass(m);
804f0cada84SAndre Oppermann 		if (m == NULL)
805c67b1d17SGarrett Wollman 			return;
8066a800098SYoshinobu Inoue 		ip = mtod(m, struct ip *);
8077e2df452SRuslan Ermilov 		/* Get the header length of the reassembled packet */
80853be11f6SPoul-Henning Kamp 		hlen = ip->ip_hl << 2;
809f0cada84SAndre Oppermann 	}
810f0cada84SAndre Oppermann 
811fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT)
812fcf59617SAndrey V. Elsukov 	if (IPSEC_ENABLED(ipv4)) {
813fcf59617SAndrey V. Elsukov 		if (IPSEC_INPUT(ipv4, m, hlen, ip->ip_p) != 0)
814fcf59617SAndrey V. Elsukov 			return;
815fcf59617SAndrey V. Elsukov 	}
816b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */
81733841545SHajimu UMEMOTO 
818df8bae1dSRodney W. Grimes 	/*
819df8bae1dSRodney W. Grimes 	 * Switch out to protocol's input routine.
820df8bae1dSRodney W. Grimes 	 */
82186425c62SRobert Watson 	IPSTAT_INC(ips_delivered);
8229b932e9eSAndre Oppermann 
8238f5a8818SKevin Lo 	(*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p);
824c67b1d17SGarrett Wollman 	return;
825df8bae1dSRodney W. Grimes bad:
826df8bae1dSRodney W. Grimes 	m_freem(m);
827c67b1d17SGarrett Wollman }
828c67b1d17SGarrett Wollman 
829c67b1d17SGarrett Wollman /*
830df8bae1dSRodney W. Grimes  * IP timer processing;
831df8bae1dSRodney W. Grimes  * if a timer expires on a reassembly
832df8bae1dSRodney W. Grimes  * queue, discard it.
833df8bae1dSRodney W. Grimes  */
834df8bae1dSRodney W. Grimes void
835f2565d68SRobert Watson ip_slowtimo(void)
836df8bae1dSRodney W. Grimes {
8378b615593SMarko Zec 	VNET_ITERATOR_DECL(vnet_iter);
838df8bae1dSRodney W. Grimes 
8395ee847d3SRobert Watson 	VNET_LIST_RLOCK_NOSLEEP();
8408b615593SMarko Zec 	VNET_FOREACH(vnet_iter) {
8418b615593SMarko Zec 		CURVNET_SET(vnet_iter);
8421dbefcc0SGleb Smirnoff 		ipreass_slowtimo();
8438b615593SMarko Zec 		CURVNET_RESTORE();
8448b615593SMarko Zec 	}
8455ee847d3SRobert Watson 	VNET_LIST_RUNLOCK_NOSLEEP();
846df8bae1dSRodney W. Grimes }
847df8bae1dSRodney W. Grimes 
8489802380eSBjoern A. Zeeb void
8499802380eSBjoern A. Zeeb ip_drain(void)
8509802380eSBjoern A. Zeeb {
8519802380eSBjoern A. Zeeb 	VNET_ITERATOR_DECL(vnet_iter);
8529802380eSBjoern A. Zeeb 
8539802380eSBjoern A. Zeeb 	VNET_LIST_RLOCK_NOSLEEP();
8549802380eSBjoern A. Zeeb 	VNET_FOREACH(vnet_iter) {
8559802380eSBjoern A. Zeeb 		CURVNET_SET(vnet_iter);
8561dbefcc0SGleb Smirnoff 		ipreass_drain();
8578b615593SMarko Zec 		CURVNET_RESTORE();
8588b615593SMarko Zec 	}
8595ee847d3SRobert Watson 	VNET_LIST_RUNLOCK_NOSLEEP();
860df8bae1dSRodney W. Grimes }
861df8bae1dSRodney W. Grimes 
862df8bae1dSRodney W. Grimes /*
863de38924dSAndre Oppermann  * The protocol to be inserted into ip_protox[] must be already registered
864de38924dSAndre Oppermann  * in inetsw[], either statically or through pf_proto_register().
865de38924dSAndre Oppermann  */
866de38924dSAndre Oppermann int
8671b48d245SBjoern A. Zeeb ipproto_register(short ipproto)
868de38924dSAndre Oppermann {
869de38924dSAndre Oppermann 	struct protosw *pr;
870de38924dSAndre Oppermann 
871de38924dSAndre Oppermann 	/* Sanity checks. */
8721b48d245SBjoern A. Zeeb 	if (ipproto <= 0 || ipproto >= IPPROTO_MAX)
873de38924dSAndre Oppermann 		return (EPROTONOSUPPORT);
874de38924dSAndre Oppermann 
875de38924dSAndre Oppermann 	/*
876de38924dSAndre Oppermann 	 * The protocol slot must not be occupied by another protocol
877de38924dSAndre Oppermann 	 * already.  An index pointing to IPPROTO_RAW is unused.
878de38924dSAndre Oppermann 	 */
879de38924dSAndre Oppermann 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
880de38924dSAndre Oppermann 	if (pr == NULL)
881de38924dSAndre Oppermann 		return (EPFNOSUPPORT);
882de38924dSAndre Oppermann 	if (ip_protox[ipproto] != pr - inetsw)	/* IPPROTO_RAW */
883de38924dSAndre Oppermann 		return (EEXIST);
884de38924dSAndre Oppermann 
885de38924dSAndre Oppermann 	/* Find the protocol position in inetsw[] and set the index. */
886de38924dSAndre Oppermann 	for (pr = inetdomain.dom_protosw;
887de38924dSAndre Oppermann 	     pr < inetdomain.dom_protoswNPROTOSW; pr++) {
888de38924dSAndre Oppermann 		if (pr->pr_domain->dom_family == PF_INET &&
889de38924dSAndre Oppermann 		    pr->pr_protocol && pr->pr_protocol == ipproto) {
890de38924dSAndre Oppermann 			ip_protox[pr->pr_protocol] = pr - inetsw;
891de38924dSAndre Oppermann 			return (0);
892de38924dSAndre Oppermann 		}
893de38924dSAndre Oppermann 	}
894de38924dSAndre Oppermann 	return (EPROTONOSUPPORT);
895de38924dSAndre Oppermann }
896de38924dSAndre Oppermann 
897de38924dSAndre Oppermann int
8981b48d245SBjoern A. Zeeb ipproto_unregister(short ipproto)
899de38924dSAndre Oppermann {
900de38924dSAndre Oppermann 	struct protosw *pr;
901de38924dSAndre Oppermann 
902de38924dSAndre Oppermann 	/* Sanity checks. */
9031b48d245SBjoern A. Zeeb 	if (ipproto <= 0 || ipproto >= IPPROTO_MAX)
904de38924dSAndre Oppermann 		return (EPROTONOSUPPORT);
905de38924dSAndre Oppermann 
906de38924dSAndre Oppermann 	/* Check if the protocol was indeed registered. */
907de38924dSAndre Oppermann 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
908de38924dSAndre Oppermann 	if (pr == NULL)
909de38924dSAndre Oppermann 		return (EPFNOSUPPORT);
910de38924dSAndre Oppermann 	if (ip_protox[ipproto] == pr - inetsw)  /* IPPROTO_RAW */
911de38924dSAndre Oppermann 		return (ENOENT);
912de38924dSAndre Oppermann 
913de38924dSAndre Oppermann 	/* Reset the protocol slot to IPPROTO_RAW. */
914de38924dSAndre Oppermann 	ip_protox[ipproto] = pr - inetsw;
915de38924dSAndre Oppermann 	return (0);
916de38924dSAndre Oppermann }
917de38924dSAndre Oppermann 
918df8bae1dSRodney W. Grimes u_char inetctlerrmap[PRC_NCMDS] = {
919df8bae1dSRodney W. Grimes 	0,		0,		0,		0,
920df8bae1dSRodney W. Grimes 	0,		EMSGSIZE,	EHOSTDOWN,	EHOSTUNREACH,
921df8bae1dSRodney W. Grimes 	EHOSTUNREACH,	EHOSTUNREACH,	ECONNREFUSED,	ECONNREFUSED,
922df8bae1dSRodney W. Grimes 	EMSGSIZE,	EHOSTUNREACH,	0,		0,
923fcaf9f91SMike Silbersack 	0,		0,		EHOSTUNREACH,	0,
9243b8123b7SJesper Skriver 	ENOPROTOOPT,	ECONNREFUSED
925df8bae1dSRodney W. Grimes };
926df8bae1dSRodney W. Grimes 
927df8bae1dSRodney W. Grimes /*
928df8bae1dSRodney W. Grimes  * Forward a packet.  If some error occurs return the sender
929df8bae1dSRodney W. Grimes  * an icmp packet.  Note we can't always generate a meaningful
930df8bae1dSRodney W. Grimes  * icmp message because icmp doesn't have a large enough repertoire
931df8bae1dSRodney W. Grimes  * of codes and types.
932df8bae1dSRodney W. Grimes  *
933df8bae1dSRodney W. Grimes  * If not forwarding, just drop the packet.  This could be confusing
934df8bae1dSRodney W. Grimes  * if ipforwarding was zero but some routing protocol was advancing
935df8bae1dSRodney W. Grimes  * us as a gateway to somewhere.  However, we must let the routing
936df8bae1dSRodney W. Grimes  * protocol deal with that.
937df8bae1dSRodney W. Grimes  *
938df8bae1dSRodney W. Grimes  * The srcrt parameter indicates whether the packet is being forwarded
939df8bae1dSRodney W. Grimes  * via a source route.
940df8bae1dSRodney W. Grimes  */
9419b932e9eSAndre Oppermann void
9429b932e9eSAndre Oppermann ip_forward(struct mbuf *m, int srcrt)
943df8bae1dSRodney W. Grimes {
9442b25acc1SLuigi Rizzo 	struct ip *ip = mtod(m, struct ip *);
945efbad259SEdward Tomasz Napierala 	struct in_ifaddr *ia;
946df8bae1dSRodney W. Grimes 	struct mbuf *mcopy;
947d14122b0SErmal Luçi 	struct sockaddr_in *sin;
9489b932e9eSAndre Oppermann 	struct in_addr dest;
949b835b6feSBjoern A. Zeeb 	struct route ro;
9504043ee3cSAlexander V. Chernikov 	uint32_t flowid;
951c773494eSAndre Oppermann 	int error, type = 0, code = 0, mtu = 0;
9523efc3014SJulian Elischer 
953b8a6e03fSGleb Smirnoff 	NET_EPOCH_ASSERT();
954b8a6e03fSGleb Smirnoff 
9559b932e9eSAndre Oppermann 	if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) {
95686425c62SRobert Watson 		IPSTAT_INC(ips_cantforward);
957df8bae1dSRodney W. Grimes 		m_freem(m);
958df8bae1dSRodney W. Grimes 		return;
959df8bae1dSRodney W. Grimes 	}
960fcf59617SAndrey V. Elsukov 	if (
961fcf59617SAndrey V. Elsukov #ifdef IPSTEALTH
962fcf59617SAndrey V. Elsukov 	    V_ipstealth == 0 &&
963fcf59617SAndrey V. Elsukov #endif
964fcf59617SAndrey V. Elsukov 	    ip->ip_ttl <= IPTTLDEC) {
965fcf59617SAndrey V. Elsukov 		icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, 0);
9668922ddbeSAndrey V. Elsukov 		return;
9678922ddbeSAndrey V. Elsukov 	}
968df8bae1dSRodney W. Grimes 
969d14122b0SErmal Luçi 	bzero(&ro, sizeof(ro));
970d14122b0SErmal Luçi 	sin = (struct sockaddr_in *)&ro.ro_dst;
971d14122b0SErmal Luçi 	sin->sin_family = AF_INET;
972d14122b0SErmal Luçi 	sin->sin_len = sizeof(*sin);
973d14122b0SErmal Luçi 	sin->sin_addr = ip->ip_dst;
9744043ee3cSAlexander V. Chernikov 	flowid = m->m_pkthdr.flowid;
9754043ee3cSAlexander V. Chernikov 	ro.ro_nh = fib4_lookup(M_GETFIB(m), ip->ip_dst, 0, NHR_REF, flowid);
976983066f0SAlexander V. Chernikov 	if (ro.ro_nh != NULL) {
977983066f0SAlexander V. Chernikov 		ia = ifatoia(ro.ro_nh->nh_ifa);
97856844a62SErmal Luçi 	} else
97956844a62SErmal Luçi 		ia = NULL;
980df8bae1dSRodney W. Grimes 	/*
981bfef7ed4SIan Dowse 	 * Save the IP header and at most 8 bytes of the payload,
982bfef7ed4SIan Dowse 	 * in case we need to generate an ICMP message to the src.
983bfef7ed4SIan Dowse 	 *
9844d2e3692SLuigi Rizzo 	 * XXX this can be optimized a lot by saving the data in a local
9854d2e3692SLuigi Rizzo 	 * buffer on the stack (72 bytes at most), and only allocating the
9864d2e3692SLuigi Rizzo 	 * mbuf if really necessary. The vast majority of the packets
9874d2e3692SLuigi Rizzo 	 * are forwarded without having to send an ICMP back (either
9884d2e3692SLuigi Rizzo 	 * because unnecessary, or because rate limited), so we are
9894d2e3692SLuigi Rizzo 	 * really we are wasting a lot of work here.
9904d2e3692SLuigi Rizzo 	 *
991c3bef61eSKevin Lo 	 * We don't use m_copym() because it might return a reference
992bfef7ed4SIan Dowse 	 * to a shared cluster. Both this function and ip_output()
993bfef7ed4SIan Dowse 	 * assume exclusive access to the IP header in `m', so any
994bfef7ed4SIan Dowse 	 * data in a cluster may change before we reach icmp_error().
995df8bae1dSRodney W. Grimes 	 */
996dc4ad05eSGleb Smirnoff 	mcopy = m_gethdr(M_NOWAIT, m->m_type);
997eb1b1807SGleb Smirnoff 	if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_NOWAIT)) {
9989967cafcSSam Leffler 		/*
9999967cafcSSam Leffler 		 * It's probably ok if the pkthdr dup fails (because
10009967cafcSSam Leffler 		 * the deep copy of the tag chain failed), but for now
10019967cafcSSam Leffler 		 * be conservative and just discard the copy since
10029967cafcSSam Leffler 		 * code below may some day want the tags.
10039967cafcSSam Leffler 		 */
10049967cafcSSam Leffler 		m_free(mcopy);
10059967cafcSSam Leffler 		mcopy = NULL;
10069967cafcSSam Leffler 	}
1007bfef7ed4SIan Dowse 	if (mcopy != NULL) {
10088f134647SGleb Smirnoff 		mcopy->m_len = min(ntohs(ip->ip_len), M_TRAILINGSPACE(mcopy));
1009e6b0a570SBruce M Simpson 		mcopy->m_pkthdr.len = mcopy->m_len;
1010bfef7ed4SIan Dowse 		m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t));
1011bfef7ed4SIan Dowse 	}
101204287599SRuslan Ermilov #ifdef IPSTEALTH
1013fcf59617SAndrey V. Elsukov 	if (V_ipstealth == 0)
101404287599SRuslan Ermilov #endif
101504287599SRuslan Ermilov 		ip->ip_ttl -= IPTTLDEC;
1016fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT)
1017fcf59617SAndrey V. Elsukov 	if (IPSEC_ENABLED(ipv4)) {
1018fcf59617SAndrey V. Elsukov 		if ((error = IPSEC_FORWARD(ipv4, m)) != 0) {
1019fcf59617SAndrey V. Elsukov 			/* mbuf consumed by IPsec */
1020d16a2e47SMark Johnston 			RO_NHFREE(&ro);
1021fcf59617SAndrey V. Elsukov 			m_freem(mcopy);
1022fcf59617SAndrey V. Elsukov 			if (error != EINPROGRESS)
1023fcf59617SAndrey V. Elsukov 				IPSTAT_INC(ips_cantforward);
1024b8a6e03fSGleb Smirnoff 			return;
102504287599SRuslan Ermilov 		}
1026fcf59617SAndrey V. Elsukov 		/* No IPsec processing required */
1027fcf59617SAndrey V. Elsukov 	}
1028fcf59617SAndrey V. Elsukov #endif /* IPSEC */
1029df8bae1dSRodney W. Grimes 	/*
1030df8bae1dSRodney W. Grimes 	 * If forwarding packet using same interface that it came in on,
1031df8bae1dSRodney W. Grimes 	 * perhaps should send a redirect to sender to shortcut a hop.
1032df8bae1dSRodney W. Grimes 	 * Only send redirect if source is sending directly to us,
1033df8bae1dSRodney W. Grimes 	 * and if packet was not source routed (or has any options).
1034df8bae1dSRodney W. Grimes 	 * Also, don't send redirect if forwarding using a default route
1035df8bae1dSRodney W. Grimes 	 * or a route modified by a redirect.
1036df8bae1dSRodney W. Grimes 	 */
10379b932e9eSAndre Oppermann 	dest.s_addr = 0;
1038efbad259SEdward Tomasz Napierala 	if (!srcrt && V_ipsendredirects &&
1039efbad259SEdward Tomasz Napierala 	    ia != NULL && ia->ia_ifp == m->m_pkthdr.rcvif) {
1040983066f0SAlexander V. Chernikov 		struct nhop_object *nh;
104102c1c707SAndre Oppermann 
1042983066f0SAlexander V. Chernikov 		nh = ro.ro_nh;
104302c1c707SAndre Oppermann 
1044983066f0SAlexander V. Chernikov 		if (nh != NULL && ((nh->nh_flags & (NHF_REDIRECT|NHF_DEFAULT)) == 0)) {
1045983066f0SAlexander V. Chernikov 			struct in_ifaddr *nh_ia = (struct in_ifaddr *)(nh->nh_ifa);
1046df8bae1dSRodney W. Grimes 			u_long src = ntohl(ip->ip_src.s_addr);
1047df8bae1dSRodney W. Grimes 
1048983066f0SAlexander V. Chernikov 			if (nh_ia != NULL &&
1049983066f0SAlexander V. Chernikov 			    (src & nh_ia->ia_subnetmask) == nh_ia->ia_subnet) {
1050df8bae1dSRodney W. Grimes 				/* Router requirements says to only send host redirects */
1051df8bae1dSRodney W. Grimes 				type = ICMP_REDIRECT;
1052df8bae1dSRodney W. Grimes 				code = ICMP_REDIRECT_HOST;
105362e1a437SZhenlei Huang 				if (nh->nh_flags & NHF_GATEWAY) {
105462e1a437SZhenlei Huang 				    if (nh->gw_sa.sa_family == AF_INET)
105562e1a437SZhenlei Huang 					dest.s_addr = nh->gw4_sa.sin_addr.s_addr;
105662e1a437SZhenlei Huang 				    else /* Do not redirect in case gw is AF_INET6 */
105762e1a437SZhenlei Huang 					type = 0;
105862e1a437SZhenlei Huang 				} else
105962e1a437SZhenlei Huang 					dest.s_addr = ip->ip_dst.s_addr;
1060df8bae1dSRodney W. Grimes 			}
1061df8bae1dSRodney W. Grimes 		}
106202c1c707SAndre Oppermann 	}
1063df8bae1dSRodney W. Grimes 
1064b835b6feSBjoern A. Zeeb 	error = ip_output(m, NULL, &ro, IP_FORWARDING, NULL, NULL);
1065b835b6feSBjoern A. Zeeb 
1066983066f0SAlexander V. Chernikov 	if (error == EMSGSIZE && ro.ro_nh)
1067983066f0SAlexander V. Chernikov 		mtu = ro.ro_nh->nh_mtu;
1068983066f0SAlexander V. Chernikov 	RO_NHFREE(&ro);
1069b835b6feSBjoern A. Zeeb 
1070df8bae1dSRodney W. Grimes 	if (error)
107186425c62SRobert Watson 		IPSTAT_INC(ips_cantforward);
1072df8bae1dSRodney W. Grimes 	else {
107386425c62SRobert Watson 		IPSTAT_INC(ips_forward);
1074df8bae1dSRodney W. Grimes 		if (type)
107586425c62SRobert Watson 			IPSTAT_INC(ips_redirectsent);
1076df8bae1dSRodney W. Grimes 		else {
10779188b4a1SAndre Oppermann 			if (mcopy)
1078df8bae1dSRodney W. Grimes 				m_freem(mcopy);
1079b8a6e03fSGleb Smirnoff 			return;
1080df8bae1dSRodney W. Grimes 		}
1081df8bae1dSRodney W. Grimes 	}
10824f6c66ccSMatt Macy 	if (mcopy == NULL)
1083b8a6e03fSGleb Smirnoff 		return;
10844f6c66ccSMatt Macy 
1085df8bae1dSRodney W. Grimes 	switch (error) {
1086df8bae1dSRodney W. Grimes 	case 0:				/* forwarded, but need redirect */
1087df8bae1dSRodney W. Grimes 		/* type, code set above */
1088df8bae1dSRodney W. Grimes 		break;
1089df8bae1dSRodney W. Grimes 
1090efbad259SEdward Tomasz Napierala 	case ENETUNREACH:
1091df8bae1dSRodney W. Grimes 	case EHOSTUNREACH:
1092df8bae1dSRodney W. Grimes 	case ENETDOWN:
1093df8bae1dSRodney W. Grimes 	case EHOSTDOWN:
1094df8bae1dSRodney W. Grimes 	default:
1095df8bae1dSRodney W. Grimes 		type = ICMP_UNREACH;
1096df8bae1dSRodney W. Grimes 		code = ICMP_UNREACH_HOST;
1097df8bae1dSRodney W. Grimes 		break;
1098df8bae1dSRodney W. Grimes 
1099df8bae1dSRodney W. Grimes 	case EMSGSIZE:
1100df8bae1dSRodney W. Grimes 		type = ICMP_UNREACH;
1101df8bae1dSRodney W. Grimes 		code = ICMP_UNREACH_NEEDFRAG;
11029b932e9eSAndre Oppermann 		/*
1103b835b6feSBjoern A. Zeeb 		 * If the MTU was set before make sure we are below the
1104b835b6feSBjoern A. Zeeb 		 * interface MTU.
1105ab48768bSAndre Oppermann 		 * If the MTU wasn't set before use the interface mtu or
1106ab48768bSAndre Oppermann 		 * fall back to the next smaller mtu step compared to the
1107ab48768bSAndre Oppermann 		 * current packet size.
11089b932e9eSAndre Oppermann 		 */
1109b835b6feSBjoern A. Zeeb 		if (mtu != 0) {
1110b835b6feSBjoern A. Zeeb 			if (ia != NULL)
1111b835b6feSBjoern A. Zeeb 				mtu = min(mtu, ia->ia_ifp->if_mtu);
1112b835b6feSBjoern A. Zeeb 		} else {
1113ab48768bSAndre Oppermann 			if (ia != NULL)
1114c773494eSAndre Oppermann 				mtu = ia->ia_ifp->if_mtu;
1115ab48768bSAndre Oppermann 			else
11168f134647SGleb Smirnoff 				mtu = ip_next_mtu(ntohs(ip->ip_len), 0);
1117ab48768bSAndre Oppermann 		}
111886425c62SRobert Watson 		IPSTAT_INC(ips_cantfrag);
1119df8bae1dSRodney W. Grimes 		break;
1120df8bae1dSRodney W. Grimes 
1121df8bae1dSRodney W. Grimes 	case ENOBUFS:
11223a06e3e0SRuslan Ermilov 	case EACCES:			/* ipfw denied packet */
11233a06e3e0SRuslan Ermilov 		m_freem(mcopy);
1124b8a6e03fSGleb Smirnoff 		return;
1125df8bae1dSRodney W. Grimes 	}
1126c773494eSAndre Oppermann 	icmp_error(mcopy, type, code, dest.s_addr, mtu);
1127df8bae1dSRodney W. Grimes }
1128df8bae1dSRodney W. Grimes 
1129339efd75SMaxim Sobolev #define	CHECK_SO_CT(sp, ct) \
1130339efd75SMaxim Sobolev     (((sp->so_options & SO_TIMESTAMP) && (sp->so_ts_clock == ct)) ? 1 : 0)
1131339efd75SMaxim Sobolev 
113282c23ebaSBill Fenner void
1133f2565d68SRobert Watson ip_savecontrol(struct inpcb *inp, struct mbuf **mp, struct ip *ip,
1134f2565d68SRobert Watson     struct mbuf *m)
113582c23ebaSBill Fenner {
113606193f0bSKonstantin Belousov 	bool stamped;
11378b615593SMarko Zec 
113806193f0bSKonstantin Belousov 	stamped = false;
1139339efd75SMaxim Sobolev 	if ((inp->inp_socket->so_options & SO_BINTIME) ||
1140339efd75SMaxim Sobolev 	    CHECK_SO_CT(inp->inp_socket, SO_TS_BINTIME)) {
114106193f0bSKonstantin Belousov 		struct bintime boottimebin, bt;
114206193f0bSKonstantin Belousov 		struct timespec ts1;
1143be8a62e8SPoul-Henning Kamp 
114406193f0bSKonstantin Belousov 		if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR |
114506193f0bSKonstantin Belousov 		    M_TSTMP)) {
114606193f0bSKonstantin Belousov 			mbuf_tstmp2timespec(m, &ts1);
114706193f0bSKonstantin Belousov 			timespec2bintime(&ts1, &bt);
114806193f0bSKonstantin Belousov 			getboottimebin(&boottimebin);
114906193f0bSKonstantin Belousov 			bintime_add(&bt, &boottimebin);
115006193f0bSKonstantin Belousov 		} else {
1151be8a62e8SPoul-Henning Kamp 			bintime(&bt);
115206193f0bSKonstantin Belousov 		}
1153be8a62e8SPoul-Henning Kamp 		*mp = sbcreatecontrol((caddr_t)&bt, sizeof(bt),
1154be8a62e8SPoul-Henning Kamp 		    SCM_BINTIME, SOL_SOCKET);
115506193f0bSKonstantin Belousov 		if (*mp != NULL) {
1156be8a62e8SPoul-Henning Kamp 			mp = &(*mp)->m_next;
115706193f0bSKonstantin Belousov 			stamped = true;
115806193f0bSKonstantin Belousov 		}
1159be8a62e8SPoul-Henning Kamp 	}
1160339efd75SMaxim Sobolev 	if (CHECK_SO_CT(inp->inp_socket, SO_TS_REALTIME_MICRO)) {
116106193f0bSKonstantin Belousov 		struct bintime boottimebin, bt1;
1162c012cfe6SEd Maste 		struct timespec ts1;
116382c23ebaSBill Fenner 		struct timeval tv;
116482c23ebaSBill Fenner 
116506193f0bSKonstantin Belousov 		if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR |
116606193f0bSKonstantin Belousov 		    M_TSTMP)) {
116706193f0bSKonstantin Belousov 			mbuf_tstmp2timespec(m, &ts1);
116806193f0bSKonstantin Belousov 			timespec2bintime(&ts1, &bt1);
116906193f0bSKonstantin Belousov 			getboottimebin(&boottimebin);
117006193f0bSKonstantin Belousov 			bintime_add(&bt1, &boottimebin);
117106193f0bSKonstantin Belousov 			bintime2timeval(&bt1, &tv);
117206193f0bSKonstantin Belousov 		} else {
1173339efd75SMaxim Sobolev 			microtime(&tv);
117406193f0bSKonstantin Belousov 		}
117582c23ebaSBill Fenner 		*mp = sbcreatecontrol((caddr_t)&tv, sizeof(tv),
117682c23ebaSBill Fenner 		    SCM_TIMESTAMP, SOL_SOCKET);
117706193f0bSKonstantin Belousov 		if (*mp != NULL) {
117882c23ebaSBill Fenner 			mp = &(*mp)->m_next;
117906193f0bSKonstantin Belousov 			stamped = true;
118006193f0bSKonstantin Belousov 		}
1181339efd75SMaxim Sobolev 	} else if (CHECK_SO_CT(inp->inp_socket, SO_TS_REALTIME)) {
118206193f0bSKonstantin Belousov 		struct bintime boottimebin;
118306193f0bSKonstantin Belousov 		struct timespec ts, ts1;
1184339efd75SMaxim Sobolev 
118506193f0bSKonstantin Belousov 		if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR |
118606193f0bSKonstantin Belousov 		    M_TSTMP)) {
118706193f0bSKonstantin Belousov 			mbuf_tstmp2timespec(m, &ts);
118806193f0bSKonstantin Belousov 			getboottimebin(&boottimebin);
118906193f0bSKonstantin Belousov 			bintime2timespec(&boottimebin, &ts1);
11906040822cSAlan Somers 			timespecadd(&ts, &ts1, &ts);
119106193f0bSKonstantin Belousov 		} else {
1192339efd75SMaxim Sobolev 			nanotime(&ts);
119306193f0bSKonstantin Belousov 		}
1194339efd75SMaxim Sobolev 		*mp = sbcreatecontrol((caddr_t)&ts, sizeof(ts),
1195339efd75SMaxim Sobolev 		    SCM_REALTIME, SOL_SOCKET);
119606193f0bSKonstantin Belousov 		if (*mp != NULL) {
1197339efd75SMaxim Sobolev 			mp = &(*mp)->m_next;
119806193f0bSKonstantin Belousov 			stamped = true;
119906193f0bSKonstantin Belousov 		}
1200339efd75SMaxim Sobolev 	} else if (CHECK_SO_CT(inp->inp_socket, SO_TS_MONOTONIC)) {
1201339efd75SMaxim Sobolev 		struct timespec ts;
1202339efd75SMaxim Sobolev 
120306193f0bSKonstantin Belousov 		if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR |
120406193f0bSKonstantin Belousov 		    M_TSTMP))
120506193f0bSKonstantin Belousov 			mbuf_tstmp2timespec(m, &ts);
120606193f0bSKonstantin Belousov 		else
1207339efd75SMaxim Sobolev 			nanouptime(&ts);
1208339efd75SMaxim Sobolev 		*mp = sbcreatecontrol((caddr_t)&ts, sizeof(ts),
1209339efd75SMaxim Sobolev 		    SCM_MONOTONIC, SOL_SOCKET);
121006193f0bSKonstantin Belousov 		if (*mp != NULL) {
121106193f0bSKonstantin Belousov 			mp = &(*mp)->m_next;
121206193f0bSKonstantin Belousov 			stamped = true;
121306193f0bSKonstantin Belousov 		}
121406193f0bSKonstantin Belousov 	}
121506193f0bSKonstantin Belousov 	if (stamped && (m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR |
121606193f0bSKonstantin Belousov 	    M_TSTMP)) {
121706193f0bSKonstantin Belousov 		struct sock_timestamp_info sti;
121806193f0bSKonstantin Belousov 
121906193f0bSKonstantin Belousov 		bzero(&sti, sizeof(sti));
122006193f0bSKonstantin Belousov 		sti.st_info_flags = ST_INFO_HW;
122106193f0bSKonstantin Belousov 		if ((m->m_flags & M_TSTMP_HPREC) != 0)
122206193f0bSKonstantin Belousov 			sti.st_info_flags |= ST_INFO_HW_HPREC;
122306193f0bSKonstantin Belousov 		*mp = sbcreatecontrol((caddr_t)&sti, sizeof(sti), SCM_TIME_INFO,
122406193f0bSKonstantin Belousov 		    SOL_SOCKET);
122506193f0bSKonstantin Belousov 		if (*mp != NULL)
1226339efd75SMaxim Sobolev 			mp = &(*mp)->m_next;
1227be8a62e8SPoul-Henning Kamp 	}
122882c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVDSTADDR) {
122982c23ebaSBill Fenner 		*mp = sbcreatecontrol((caddr_t)&ip->ip_dst,
123082c23ebaSBill Fenner 		    sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
123182c23ebaSBill Fenner 		if (*mp)
123282c23ebaSBill Fenner 			mp = &(*mp)->m_next;
123382c23ebaSBill Fenner 	}
12344957466bSMatthew N. Dodd 	if (inp->inp_flags & INP_RECVTTL) {
12354957466bSMatthew N. Dodd 		*mp = sbcreatecontrol((caddr_t)&ip->ip_ttl,
12364957466bSMatthew N. Dodd 		    sizeof(u_char), IP_RECVTTL, IPPROTO_IP);
12374957466bSMatthew N. Dodd 		if (*mp)
12384957466bSMatthew N. Dodd 			mp = &(*mp)->m_next;
12394957466bSMatthew N. Dodd 	}
124082c23ebaSBill Fenner #ifdef notyet
124182c23ebaSBill Fenner 	/* XXX
124282c23ebaSBill Fenner 	 * Moving these out of udp_input() made them even more broken
124382c23ebaSBill Fenner 	 * than they already were.
124482c23ebaSBill Fenner 	 */
124582c23ebaSBill Fenner 	/* options were tossed already */
124682c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVOPTS) {
124782c23ebaSBill Fenner 		*mp = sbcreatecontrol((caddr_t)opts_deleted_above,
124882c23ebaSBill Fenner 		    sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP);
124982c23ebaSBill Fenner 		if (*mp)
125082c23ebaSBill Fenner 			mp = &(*mp)->m_next;
125182c23ebaSBill Fenner 	}
125282c23ebaSBill Fenner 	/* ip_srcroute doesn't do what we want here, need to fix */
125382c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVRETOPTS) {
1254e0982661SAndre Oppermann 		*mp = sbcreatecontrol((caddr_t)ip_srcroute(m),
125582c23ebaSBill Fenner 		    sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP);
125682c23ebaSBill Fenner 		if (*mp)
125782c23ebaSBill Fenner 			mp = &(*mp)->m_next;
125882c23ebaSBill Fenner 	}
125982c23ebaSBill Fenner #endif
126082c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVIF) {
1261d314ad7bSJulian Elischer 		struct ifnet *ifp;
1262d314ad7bSJulian Elischer 		struct sdlbuf {
126382c23ebaSBill Fenner 			struct sockaddr_dl sdl;
1264d314ad7bSJulian Elischer 			u_char	pad[32];
1265d314ad7bSJulian Elischer 		} sdlbuf;
1266d314ad7bSJulian Elischer 		struct sockaddr_dl *sdp;
1267d314ad7bSJulian Elischer 		struct sockaddr_dl *sdl2 = &sdlbuf.sdl;
126882c23ebaSBill Fenner 
126946f2df9cSSergey Kandaurov 		if ((ifp = m->m_pkthdr.rcvif) &&
127046f2df9cSSergey Kandaurov 		    ifp->if_index && ifp->if_index <= V_if_index) {
12714a0d6638SRuslan Ermilov 			sdp = (struct sockaddr_dl *)ifp->if_addr->ifa_addr;
1272d314ad7bSJulian Elischer 			/*
1273d314ad7bSJulian Elischer 			 * Change our mind and don't try copy.
1274d314ad7bSJulian Elischer 			 */
127546f2df9cSSergey Kandaurov 			if (sdp->sdl_family != AF_LINK ||
127646f2df9cSSergey Kandaurov 			    sdp->sdl_len > sizeof(sdlbuf)) {
1277d314ad7bSJulian Elischer 				goto makedummy;
1278d314ad7bSJulian Elischer 			}
1279d314ad7bSJulian Elischer 			bcopy(sdp, sdl2, sdp->sdl_len);
1280d314ad7bSJulian Elischer 		} else {
1281d314ad7bSJulian Elischer makedummy:
128246f2df9cSSergey Kandaurov 			sdl2->sdl_len =
128346f2df9cSSergey Kandaurov 			    offsetof(struct sockaddr_dl, sdl_data[0]);
1284d314ad7bSJulian Elischer 			sdl2->sdl_family = AF_LINK;
1285d314ad7bSJulian Elischer 			sdl2->sdl_index = 0;
1286d314ad7bSJulian Elischer 			sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0;
1287d314ad7bSJulian Elischer 		}
1288d314ad7bSJulian Elischer 		*mp = sbcreatecontrol((caddr_t)sdl2, sdl2->sdl_len,
128982c23ebaSBill Fenner 		    IP_RECVIF, IPPROTO_IP);
129082c23ebaSBill Fenner 		if (*mp)
129182c23ebaSBill Fenner 			mp = &(*mp)->m_next;
129282c23ebaSBill Fenner 	}
12933cca425bSMichael Tuexen 	if (inp->inp_flags & INP_RECVTOS) {
12943cca425bSMichael Tuexen 		*mp = sbcreatecontrol((caddr_t)&ip->ip_tos,
12953cca425bSMichael Tuexen 		    sizeof(u_char), IP_RECVTOS, IPPROTO_IP);
12963cca425bSMichael Tuexen 		if (*mp)
12973cca425bSMichael Tuexen 			mp = &(*mp)->m_next;
12983cca425bSMichael Tuexen 	}
12999d3ddf43SAdrian Chadd 
13009d3ddf43SAdrian Chadd 	if (inp->inp_flags2 & INP_RECVFLOWID) {
13019d3ddf43SAdrian Chadd 		uint32_t flowid, flow_type;
13029d3ddf43SAdrian Chadd 
13039d3ddf43SAdrian Chadd 		flowid = m->m_pkthdr.flowid;
13049d3ddf43SAdrian Chadd 		flow_type = M_HASHTYPE_GET(m);
13059d3ddf43SAdrian Chadd 
13069d3ddf43SAdrian Chadd 		/*
13079d3ddf43SAdrian Chadd 		 * XXX should handle the failure of one or the
13089d3ddf43SAdrian Chadd 		 * other - don't populate both?
13099d3ddf43SAdrian Chadd 		 */
13109d3ddf43SAdrian Chadd 		*mp = sbcreatecontrol((caddr_t) &flowid,
13119d3ddf43SAdrian Chadd 		    sizeof(uint32_t), IP_FLOWID, IPPROTO_IP);
13129d3ddf43SAdrian Chadd 		if (*mp)
13139d3ddf43SAdrian Chadd 			mp = &(*mp)->m_next;
13149d3ddf43SAdrian Chadd 		*mp = sbcreatecontrol((caddr_t) &flow_type,
13159d3ddf43SAdrian Chadd 		    sizeof(uint32_t), IP_FLOWTYPE, IPPROTO_IP);
13169d3ddf43SAdrian Chadd 		if (*mp)
13179d3ddf43SAdrian Chadd 			mp = &(*mp)->m_next;
13189d3ddf43SAdrian Chadd 	}
13199d3ddf43SAdrian Chadd 
13209d3ddf43SAdrian Chadd #ifdef	RSS
13219d3ddf43SAdrian Chadd 	if (inp->inp_flags2 & INP_RECVRSSBUCKETID) {
13229d3ddf43SAdrian Chadd 		uint32_t flowid, flow_type;
13239d3ddf43SAdrian Chadd 		uint32_t rss_bucketid;
13249d3ddf43SAdrian Chadd 
13259d3ddf43SAdrian Chadd 		flowid = m->m_pkthdr.flowid;
13269d3ddf43SAdrian Chadd 		flow_type = M_HASHTYPE_GET(m);
13279d3ddf43SAdrian Chadd 
13289d3ddf43SAdrian Chadd 		if (rss_hash2bucket(flowid, flow_type, &rss_bucketid) == 0) {
13299d3ddf43SAdrian Chadd 			*mp = sbcreatecontrol((caddr_t) &rss_bucketid,
13309d3ddf43SAdrian Chadd 			   sizeof(uint32_t), IP_RSSBUCKETID, IPPROTO_IP);
13319d3ddf43SAdrian Chadd 			if (*mp)
13329d3ddf43SAdrian Chadd 				mp = &(*mp)->m_next;
13339d3ddf43SAdrian Chadd 		}
13349d3ddf43SAdrian Chadd 	}
13359d3ddf43SAdrian Chadd #endif
133682c23ebaSBill Fenner }
133782c23ebaSBill Fenner 
13384d2e3692SLuigi Rizzo /*
133930916a2dSRobert Watson  * XXXRW: Multicast routing code in ip_mroute.c is generally MPSAFE, but the
134030916a2dSRobert Watson  * ip_rsvp and ip_rsvp_on variables need to be interlocked with rsvp_on
134130916a2dSRobert Watson  * locking.  This code remains in ip_input.c as ip_mroute.c is optionally
134230916a2dSRobert Watson  * compiled.
13434d2e3692SLuigi Rizzo  */
13445f901c92SAndrew Turner VNET_DEFINE_STATIC(int, ip_rsvp_on);
134582cea7e6SBjoern A. Zeeb VNET_DEFINE(struct socket *, ip_rsvpd);
134682cea7e6SBjoern A. Zeeb 
134782cea7e6SBjoern A. Zeeb #define	V_ip_rsvp_on		VNET(ip_rsvp_on)
134882cea7e6SBjoern A. Zeeb 
1349df8bae1dSRodney W. Grimes int
1350f0068c4aSGarrett Wollman ip_rsvp_init(struct socket *so)
1351f0068c4aSGarrett Wollman {
13528b615593SMarko Zec 
1353f0068c4aSGarrett Wollman 	if (so->so_type != SOCK_RAW ||
1354f0068c4aSGarrett Wollman 	    so->so_proto->pr_protocol != IPPROTO_RSVP)
1355f0068c4aSGarrett Wollman 		return EOPNOTSUPP;
1356f0068c4aSGarrett Wollman 
1357603724d3SBjoern A. Zeeb 	if (V_ip_rsvpd != NULL)
1358f0068c4aSGarrett Wollman 		return EADDRINUSE;
1359f0068c4aSGarrett Wollman 
1360603724d3SBjoern A. Zeeb 	V_ip_rsvpd = so;
13611c5de19aSGarrett Wollman 	/*
13621c5de19aSGarrett Wollman 	 * This may seem silly, but we need to be sure we don't over-increment
13631c5de19aSGarrett Wollman 	 * the RSVP counter, in case something slips up.
13641c5de19aSGarrett Wollman 	 */
1365603724d3SBjoern A. Zeeb 	if (!V_ip_rsvp_on) {
1366603724d3SBjoern A. Zeeb 		V_ip_rsvp_on = 1;
1367603724d3SBjoern A. Zeeb 		V_rsvp_on++;
13681c5de19aSGarrett Wollman 	}
1369f0068c4aSGarrett Wollman 
1370f0068c4aSGarrett Wollman 	return 0;
1371f0068c4aSGarrett Wollman }
1372f0068c4aSGarrett Wollman 
1373f0068c4aSGarrett Wollman int
1374f0068c4aSGarrett Wollman ip_rsvp_done(void)
1375f0068c4aSGarrett Wollman {
13768b615593SMarko Zec 
1377603724d3SBjoern A. Zeeb 	V_ip_rsvpd = NULL;
13781c5de19aSGarrett Wollman 	/*
13791c5de19aSGarrett Wollman 	 * This may seem silly, but we need to be sure we don't over-decrement
13801c5de19aSGarrett Wollman 	 * the RSVP counter, in case something slips up.
13811c5de19aSGarrett Wollman 	 */
1382603724d3SBjoern A. Zeeb 	if (V_ip_rsvp_on) {
1383603724d3SBjoern A. Zeeb 		V_ip_rsvp_on = 0;
1384603724d3SBjoern A. Zeeb 		V_rsvp_on--;
13851c5de19aSGarrett Wollman 	}
1386f0068c4aSGarrett Wollman 	return 0;
1387f0068c4aSGarrett Wollman }
1388bbb4330bSLuigi Rizzo 
13898f5a8818SKevin Lo int
13908f5a8818SKevin Lo rsvp_input(struct mbuf **mp, int *offp, int proto)
1391bbb4330bSLuigi Rizzo {
13928f5a8818SKevin Lo 	struct mbuf *m;
13938f5a8818SKevin Lo 
13948f5a8818SKevin Lo 	m = *mp;
13958f5a8818SKevin Lo 	*mp = NULL;
13968b615593SMarko Zec 
1397bbb4330bSLuigi Rizzo 	if (rsvp_input_p) { /* call the real one if loaded */
13988f5a8818SKevin Lo 		*mp = m;
13998f5a8818SKevin Lo 		rsvp_input_p(mp, offp, proto);
14008f5a8818SKevin Lo 		return (IPPROTO_DONE);
1401bbb4330bSLuigi Rizzo 	}
1402bbb4330bSLuigi Rizzo 
1403bbb4330bSLuigi Rizzo 	/* Can still get packets with rsvp_on = 0 if there is a local member
1404bbb4330bSLuigi Rizzo 	 * of the group to which the RSVP packet is addressed.  But in this
1405bbb4330bSLuigi Rizzo 	 * case we want to throw the packet away.
1406bbb4330bSLuigi Rizzo 	 */
1407bbb4330bSLuigi Rizzo 
1408603724d3SBjoern A. Zeeb 	if (!V_rsvp_on) {
1409bbb4330bSLuigi Rizzo 		m_freem(m);
14108f5a8818SKevin Lo 		return (IPPROTO_DONE);
1411bbb4330bSLuigi Rizzo 	}
1412bbb4330bSLuigi Rizzo 
1413603724d3SBjoern A. Zeeb 	if (V_ip_rsvpd != NULL) {
14148f5a8818SKevin Lo 		*mp = m;
14158f5a8818SKevin Lo 		rip_input(mp, offp, proto);
14168f5a8818SKevin Lo 		return (IPPROTO_DONE);
1417bbb4330bSLuigi Rizzo 	}
1418bbb4330bSLuigi Rizzo 	/* Drop the packet */
1419bbb4330bSLuigi Rizzo 	m_freem(m);
14208f5a8818SKevin Lo 	return (IPPROTO_DONE);
1421bbb4330bSLuigi Rizzo }
1422