xref: /freebsd/sys/netinet/ip_input.c (revision 4f6c66cc9c75c857341b6ca6ca92d4808be8d3b5)
1c398230bSWarner Losh /*-
251369649SPedro F. Giffuni  * SPDX-License-Identifier: BSD-3-Clause
351369649SPedro F. Giffuni  *
4df8bae1dSRodney W. Grimes  * Copyright (c) 1982, 1986, 1988, 1993
5df8bae1dSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
6df8bae1dSRodney W. Grimes  *
7df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
8df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
9df8bae1dSRodney W. Grimes  * are met:
10df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
11df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
12df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
13df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
14df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
15fbbd9655SWarner Losh  * 3. Neither the name of the University nor the names of its contributors
16df8bae1dSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
17df8bae1dSRodney W. Grimes  *    without specific prior written permission.
18df8bae1dSRodney W. Grimes  *
19df8bae1dSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22df8bae1dSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
30df8bae1dSRodney W. Grimes  *
31df8bae1dSRodney W. Grimes  *	@(#)ip_input.c	8.2 (Berkeley) 1/4/94
32df8bae1dSRodney W. Grimes  */
33df8bae1dSRodney W. Grimes 
344b421e2dSMike Silbersack #include <sys/cdefs.h>
354b421e2dSMike Silbersack __FBSDID("$FreeBSD$");
364b421e2dSMike Silbersack 
370ac40133SBrian Somers #include "opt_bootp.h"
3827108a15SDag-Erling Smørgrav #include "opt_ipstealth.h"
396a800098SYoshinobu Inoue #include "opt_ipsec.h"
4033553d6eSBjoern A. Zeeb #include "opt_route.h"
41b8bc95cdSAdrian Chadd #include "opt_rss.h"
4274a9466cSGary Palmer 
43df8bae1dSRodney W. Grimes #include <sys/param.h>
44df8bae1dSRodney W. Grimes #include <sys/systm.h>
45ef91a976SAndrey V. Elsukov #include <sys/hhook.h>
46df8bae1dSRodney W. Grimes #include <sys/mbuf.h>
47b715f178SLuigi Rizzo #include <sys/malloc.h>
48df8bae1dSRodney W. Grimes #include <sys/domain.h>
49df8bae1dSRodney W. Grimes #include <sys/protosw.h>
50df8bae1dSRodney W. Grimes #include <sys/socket.h>
51df8bae1dSRodney W. Grimes #include <sys/time.h>
52df8bae1dSRodney W. Grimes #include <sys/kernel.h>
53385195c0SMarko Zec #include <sys/lock.h>
54cc0a3c8cSAndrey V. Elsukov #include <sys/rmlock.h>
55385195c0SMarko Zec #include <sys/rwlock.h>
5657f60867SMark Johnston #include <sys/sdt.h>
571025071fSGarrett Wollman #include <sys/syslog.h>
58b5e8ce9fSBruce Evans #include <sys/sysctl.h>
59df8bae1dSRodney W. Grimes 
60c85540ddSAndrey A. Chernov #include <net/pfil.h>
61df8bae1dSRodney W. Grimes #include <net/if.h>
629494d596SBrooks Davis #include <net/if_types.h>
63d314ad7bSJulian Elischer #include <net/if_var.h>
6482c23ebaSBill Fenner #include <net/if_dl.h>
65df8bae1dSRodney W. Grimes #include <net/route.h>
66748e0b0aSGarrett Wollman #include <net/netisr.h>
67b2bdc62aSAdrian Chadd #include <net/rss_config.h>
684b79449eSBjoern A. Zeeb #include <net/vnet.h>
69df8bae1dSRodney W. Grimes 
70df8bae1dSRodney W. Grimes #include <netinet/in.h>
7157f60867SMark Johnston #include <netinet/in_kdtrace.h>
72df8bae1dSRodney W. Grimes #include <netinet/in_systm.h>
73b5e8ce9fSBruce Evans #include <netinet/in_var.h>
74df8bae1dSRodney W. Grimes #include <netinet/ip.h>
75df8bae1dSRodney W. Grimes #include <netinet/in_pcb.h>
76df8bae1dSRodney W. Grimes #include <netinet/ip_var.h>
77eddfbb76SRobert Watson #include <netinet/ip_fw.h>
78df8bae1dSRodney W. Grimes #include <netinet/ip_icmp.h>
79ef39adf0SAndre Oppermann #include <netinet/ip_options.h>
8058938916SGarrett Wollman #include <machine/in_cksum.h>
81a9771948SGleb Smirnoff #include <netinet/ip_carp.h>
82b8bc95cdSAdrian Chadd #include <netinet/in_rss.h>
83df8bae1dSRodney W. Grimes 
84fcf59617SAndrey V. Elsukov #include <netipsec/ipsec_support.h>
85fcf59617SAndrey V. Elsukov 
86f0068c4aSGarrett Wollman #include <sys/socketvar.h>
876ddbf1e2SGary Palmer 
88aed55708SRobert Watson #include <security/mac/mac_framework.h>
89aed55708SRobert Watson 
90d2035ffbSEd Maste #ifdef CTASSERT
91d2035ffbSEd Maste CTASSERT(sizeof(struct ip) == 20);
92d2035ffbSEd Maste #endif
93d2035ffbSEd Maste 
941dbefcc0SGleb Smirnoff /* IP reassembly functions are defined in ip_reass.c. */
95843b0e57SXin LI extern void ipreass_init(void);
96843b0e57SXin LI extern void ipreass_drain(void);
97843b0e57SXin LI extern void ipreass_slowtimo(void);
981dbefcc0SGleb Smirnoff #ifdef VIMAGE
99843b0e57SXin LI extern void ipreass_destroy(void);
1001dbefcc0SGleb Smirnoff #endif
1011dbefcc0SGleb Smirnoff 
102cc0a3c8cSAndrey V. Elsukov struct rmlock in_ifaddr_lock;
103cc0a3c8cSAndrey V. Elsukov RM_SYSINIT(in_ifaddr_lock, &in_ifaddr_lock, "in_ifaddr_lock");
104f0068c4aSGarrett Wollman 
10582cea7e6SBjoern A. Zeeb VNET_DEFINE(int, rsvp_on);
10682cea7e6SBjoern A. Zeeb 
10782cea7e6SBjoern A. Zeeb VNET_DEFINE(int, ipforwarding);
1086df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_VNET | CTLFLAG_RW,
109eddfbb76SRobert Watson     &VNET_NAME(ipforwarding), 0,
1108b615593SMarko Zec     "Enable IP forwarding between interfaces");
1110312fbe9SPoul-Henning Kamp 
1123e288e62SDimitry Andric static VNET_DEFINE(int, ipsendredirects) = 1;	/* XXX */
11382cea7e6SBjoern A. Zeeb #define	V_ipsendredirects	VNET(ipsendredirects)
1146df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_VNET | CTLFLAG_RW,
115eddfbb76SRobert Watson     &VNET_NAME(ipsendredirects), 0,
1168b615593SMarko Zec     "Enable sending IP redirects");
1170312fbe9SPoul-Henning Kamp 
118823db0e9SDon Lewis /*
119823db0e9SDon Lewis  * XXX - Setting ip_checkinterface mostly implements the receive side of
120823db0e9SDon Lewis  * the Strong ES model described in RFC 1122, but since the routing table
121a8f12100SDon Lewis  * and transmit implementation do not implement the Strong ES model,
122823db0e9SDon Lewis  * setting this to 1 results in an odd hybrid.
1233f67c834SDon Lewis  *
124a8f12100SDon Lewis  * XXX - ip_checkinterface currently must be disabled if you use ipnat
125a8f12100SDon Lewis  * to translate the destination address to another local interface.
1263f67c834SDon Lewis  *
1273f67c834SDon Lewis  * XXX - ip_checkinterface must be disabled if you add IP aliases
1283f67c834SDon Lewis  * to the loopback interface instead of the interface where the
1293f67c834SDon Lewis  * packets for those addresses are received.
130823db0e9SDon Lewis  */
1313e288e62SDimitry Andric static VNET_DEFINE(int, ip_checkinterface);
13282cea7e6SBjoern A. Zeeb #define	V_ip_checkinterface	VNET(ip_checkinterface)
1336df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_VNET | CTLFLAG_RW,
134eddfbb76SRobert Watson     &VNET_NAME(ip_checkinterface), 0,
1358b615593SMarko Zec     "Verify packet arrives on correct interface");
136b3e95d4eSJonathan Lemon 
1370b4b0b0fSJulian Elischer VNET_DEFINE(struct pfil_head, inet_pfil_hook);	/* Packet filter hooks */
138df8bae1dSRodney W. Grimes 
139d4b5cae4SRobert Watson static struct netisr_handler ip_nh = {
140d4b5cae4SRobert Watson 	.nh_name = "ip",
141d4b5cae4SRobert Watson 	.nh_handler = ip_input,
142d4b5cae4SRobert Watson 	.nh_proto = NETISR_IP,
143b8bc95cdSAdrian Chadd #ifdef	RSS
1442527ccadSAdrian Chadd 	.nh_m2cpuid = rss_soft_m2cpuid_v4,
145b8bc95cdSAdrian Chadd 	.nh_policy = NETISR_POLICY_CPU,
146b8bc95cdSAdrian Chadd 	.nh_dispatch = NETISR_DISPATCH_HYBRID,
147b8bc95cdSAdrian Chadd #else
148d4b5cae4SRobert Watson 	.nh_policy = NETISR_POLICY_FLOW,
149b8bc95cdSAdrian Chadd #endif
150d4b5cae4SRobert Watson };
151ca925d9cSJonathan Lemon 
152b8bc95cdSAdrian Chadd #ifdef	RSS
153b8bc95cdSAdrian Chadd /*
154b8bc95cdSAdrian Chadd  * Directly dispatched frames are currently assumed
155b8bc95cdSAdrian Chadd  * to have a flowid already calculated.
156b8bc95cdSAdrian Chadd  *
157b8bc95cdSAdrian Chadd  * It should likely have something that assert it
158b8bc95cdSAdrian Chadd  * actually has valid flow details.
159b8bc95cdSAdrian Chadd  */
160b8bc95cdSAdrian Chadd static struct netisr_handler ip_direct_nh = {
161b8bc95cdSAdrian Chadd 	.nh_name = "ip_direct",
162b8bc95cdSAdrian Chadd 	.nh_handler = ip_direct_input,
163b8bc95cdSAdrian Chadd 	.nh_proto = NETISR_IP_DIRECT,
164499baf0aSAdrian Chadd 	.nh_m2cpuid = rss_soft_m2cpuid_v4,
165b8bc95cdSAdrian Chadd 	.nh_policy = NETISR_POLICY_CPU,
166b8bc95cdSAdrian Chadd 	.nh_dispatch = NETISR_DISPATCH_HYBRID,
167b8bc95cdSAdrian Chadd };
168b8bc95cdSAdrian Chadd #endif
169b8bc95cdSAdrian Chadd 
170df8bae1dSRodney W. Grimes extern	struct domain inetdomain;
171f0ffb944SJulian Elischer extern	struct protosw inetsw[];
172df8bae1dSRodney W. Grimes u_char	ip_protox[IPPROTO_MAX];
17382cea7e6SBjoern A. Zeeb VNET_DEFINE(struct in_ifaddrhead, in_ifaddrhead);  /* first inet address */
17482cea7e6SBjoern A. Zeeb VNET_DEFINE(struct in_ifaddrhashhead *, in_ifaddrhashtbl); /* inet addr hash table  */
17582cea7e6SBjoern A. Zeeb VNET_DEFINE(u_long, in_ifaddrhmask);		/* mask for hash table */
176ca925d9cSJonathan Lemon 
1770312fbe9SPoul-Henning Kamp #ifdef IPCTL_DEFMTU
1780312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW,
1793d177f46SBill Fumerola     &ip_mtu, 0, "Default MTU");
1800312fbe9SPoul-Henning Kamp #endif
1810312fbe9SPoul-Henning Kamp 
1821b968362SDag-Erling Smørgrav #ifdef IPSTEALTH
18382cea7e6SBjoern A. Zeeb VNET_DEFINE(int, ipstealth);
1846df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_VNET | CTLFLAG_RW,
185eddfbb76SRobert Watson     &VNET_NAME(ipstealth), 0,
186eddfbb76SRobert Watson     "IP stealth mode, no TTL decrementation on forwarding");
1871b968362SDag-Erling Smørgrav #endif
188eddfbb76SRobert Watson 
189315e3e38SRobert Watson /*
1905da0521fSAndrey V. Elsukov  * IP statistics are stored in the "array" of counter(9)s.
1915923c293SGleb Smirnoff  */
1925da0521fSAndrey V. Elsukov VNET_PCPUSTAT_DEFINE(struct ipstat, ipstat);
1935da0521fSAndrey V. Elsukov VNET_PCPUSTAT_SYSINIT(ipstat);
1945da0521fSAndrey V. Elsukov SYSCTL_VNET_PCPUSTAT(_net_inet_ip, IPCTL_STATS, stats, struct ipstat, ipstat,
1955da0521fSAndrey V. Elsukov     "IP statistics (struct ipstat, netinet/ip_var.h)");
1965923c293SGleb Smirnoff 
1975923c293SGleb Smirnoff #ifdef VIMAGE
1985da0521fSAndrey V. Elsukov VNET_PCPUSTAT_SYSUNINIT(ipstat);
1995923c293SGleb Smirnoff #endif /* VIMAGE */
2005923c293SGleb Smirnoff 
2015923c293SGleb Smirnoff /*
202315e3e38SRobert Watson  * Kernel module interface for updating ipstat.  The argument is an index
2035923c293SGleb Smirnoff  * into ipstat treated as an array.
204315e3e38SRobert Watson  */
205315e3e38SRobert Watson void
206315e3e38SRobert Watson kmod_ipstat_inc(int statnum)
207315e3e38SRobert Watson {
208315e3e38SRobert Watson 
2095da0521fSAndrey V. Elsukov 	counter_u64_add(VNET(ipstat)[statnum], 1);
210315e3e38SRobert Watson }
211315e3e38SRobert Watson 
212315e3e38SRobert Watson void
213315e3e38SRobert Watson kmod_ipstat_dec(int statnum)
214315e3e38SRobert Watson {
215315e3e38SRobert Watson 
2165da0521fSAndrey V. Elsukov 	counter_u64_add(VNET(ipstat)[statnum], -1);
217315e3e38SRobert Watson }
218315e3e38SRobert Watson 
219d4b5cae4SRobert Watson static int
220d4b5cae4SRobert Watson sysctl_netinet_intr_queue_maxlen(SYSCTL_HANDLER_ARGS)
221d4b5cae4SRobert Watson {
222d4b5cae4SRobert Watson 	int error, qlimit;
223d4b5cae4SRobert Watson 
224d4b5cae4SRobert Watson 	netisr_getqlimit(&ip_nh, &qlimit);
225d4b5cae4SRobert Watson 	error = sysctl_handle_int(oidp, &qlimit, 0, req);
226d4b5cae4SRobert Watson 	if (error || !req->newptr)
227d4b5cae4SRobert Watson 		return (error);
228d4b5cae4SRobert Watson 	if (qlimit < 1)
229d4b5cae4SRobert Watson 		return (EINVAL);
230d4b5cae4SRobert Watson 	return (netisr_setqlimit(&ip_nh, qlimit));
231d4b5cae4SRobert Watson }
232d4b5cae4SRobert Watson SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen,
233d4b5cae4SRobert Watson     CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet_intr_queue_maxlen, "I",
234d4b5cae4SRobert Watson     "Maximum size of the IP input queue");
235d4b5cae4SRobert Watson 
236d4b5cae4SRobert Watson static int
237d4b5cae4SRobert Watson sysctl_netinet_intr_queue_drops(SYSCTL_HANDLER_ARGS)
238d4b5cae4SRobert Watson {
239d4b5cae4SRobert Watson 	u_int64_t qdrops_long;
240d4b5cae4SRobert Watson 	int error, qdrops;
241d4b5cae4SRobert Watson 
242d4b5cae4SRobert Watson 	netisr_getqdrops(&ip_nh, &qdrops_long);
243d4b5cae4SRobert Watson 	qdrops = qdrops_long;
244d4b5cae4SRobert Watson 	error = sysctl_handle_int(oidp, &qdrops, 0, req);
245d4b5cae4SRobert Watson 	if (error || !req->newptr)
246d4b5cae4SRobert Watson 		return (error);
247d4b5cae4SRobert Watson 	if (qdrops != 0)
248d4b5cae4SRobert Watson 		return (EINVAL);
249d4b5cae4SRobert Watson 	netisr_clearqdrops(&ip_nh);
250d4b5cae4SRobert Watson 	return (0);
251d4b5cae4SRobert Watson }
252d4b5cae4SRobert Watson 
253d4b5cae4SRobert Watson SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops,
254d4b5cae4SRobert Watson     CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_netinet_intr_queue_drops, "I",
255d4b5cae4SRobert Watson     "Number of packets dropped from the IP input queue");
256d4b5cae4SRobert Watson 
257b8bc95cdSAdrian Chadd #ifdef	RSS
258b8bc95cdSAdrian Chadd static int
259b8bc95cdSAdrian Chadd sysctl_netinet_intr_direct_queue_maxlen(SYSCTL_HANDLER_ARGS)
260b8bc95cdSAdrian Chadd {
261b8bc95cdSAdrian Chadd 	int error, qlimit;
262b8bc95cdSAdrian Chadd 
263b8bc95cdSAdrian Chadd 	netisr_getqlimit(&ip_direct_nh, &qlimit);
264b8bc95cdSAdrian Chadd 	error = sysctl_handle_int(oidp, &qlimit, 0, req);
265b8bc95cdSAdrian Chadd 	if (error || !req->newptr)
266b8bc95cdSAdrian Chadd 		return (error);
267b8bc95cdSAdrian Chadd 	if (qlimit < 1)
268b8bc95cdSAdrian Chadd 		return (EINVAL);
269b8bc95cdSAdrian Chadd 	return (netisr_setqlimit(&ip_direct_nh, qlimit));
270b8bc95cdSAdrian Chadd }
2717faa0d21SAndrey V. Elsukov SYSCTL_PROC(_net_inet_ip, IPCTL_INTRDQMAXLEN, intr_direct_queue_maxlen,
2727faa0d21SAndrey V. Elsukov     CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet_intr_direct_queue_maxlen,
2737faa0d21SAndrey V. Elsukov     "I", "Maximum size of the IP direct input queue");
274b8bc95cdSAdrian Chadd 
275b8bc95cdSAdrian Chadd static int
276b8bc95cdSAdrian Chadd sysctl_netinet_intr_direct_queue_drops(SYSCTL_HANDLER_ARGS)
277b8bc95cdSAdrian Chadd {
278b8bc95cdSAdrian Chadd 	u_int64_t qdrops_long;
279b8bc95cdSAdrian Chadd 	int error, qdrops;
280b8bc95cdSAdrian Chadd 
281b8bc95cdSAdrian Chadd 	netisr_getqdrops(&ip_direct_nh, &qdrops_long);
282b8bc95cdSAdrian Chadd 	qdrops = qdrops_long;
283b8bc95cdSAdrian Chadd 	error = sysctl_handle_int(oidp, &qdrops, 0, req);
284b8bc95cdSAdrian Chadd 	if (error || !req->newptr)
285b8bc95cdSAdrian Chadd 		return (error);
286b8bc95cdSAdrian Chadd 	if (qdrops != 0)
287b8bc95cdSAdrian Chadd 		return (EINVAL);
288b8bc95cdSAdrian Chadd 	netisr_clearqdrops(&ip_direct_nh);
289b8bc95cdSAdrian Chadd 	return (0);
290b8bc95cdSAdrian Chadd }
291b8bc95cdSAdrian Chadd 
2927faa0d21SAndrey V. Elsukov SYSCTL_PROC(_net_inet_ip, IPCTL_INTRDQDROPS, intr_direct_queue_drops,
293b8bc95cdSAdrian Chadd     CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_netinet_intr_direct_queue_drops, "I",
294b8bc95cdSAdrian Chadd     "Number of packets dropped from the IP direct input queue");
295b8bc95cdSAdrian Chadd #endif	/* RSS */
296b8bc95cdSAdrian Chadd 
297df8bae1dSRodney W. Grimes /*
298df8bae1dSRodney W. Grimes  * IP initialization: fill in IP protocol switch table.
299df8bae1dSRodney W. Grimes  * All protocols not implemented in kernel go to raw IP protocol handler.
300df8bae1dSRodney W. Grimes  */
301df8bae1dSRodney W. Grimes void
302f2565d68SRobert Watson ip_init(void)
303df8bae1dSRodney W. Grimes {
304f2565d68SRobert Watson 	struct protosw *pr;
305f2565d68SRobert Watson 	int i;
306df8bae1dSRodney W. Grimes 
307d7c5a620SMatt Macy 	CK_STAILQ_INIT(&V_in_ifaddrhead);
308603724d3SBjoern A. Zeeb 	V_in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &V_in_ifaddrhmask);
3091ed81b73SMarko Zec 
3101ed81b73SMarko Zec 	/* Initialize IP reassembly queue. */
3111dbefcc0SGleb Smirnoff 	ipreass_init();
3121ed81b73SMarko Zec 
3130b4b0b0fSJulian Elischer 	/* Initialize packet filter hooks. */
3140b4b0b0fSJulian Elischer 	V_inet_pfil_hook.ph_type = PFIL_TYPE_AF;
3150b4b0b0fSJulian Elischer 	V_inet_pfil_hook.ph_af = AF_INET;
3160b4b0b0fSJulian Elischer 	if ((i = pfil_head_register(&V_inet_pfil_hook)) != 0)
3170b4b0b0fSJulian Elischer 		printf("%s: WARNING: unable to register pfil hook, "
3180b4b0b0fSJulian Elischer 			"error %d\n", __func__, i);
3190b4b0b0fSJulian Elischer 
320ef91a976SAndrey V. Elsukov 	if (hhook_head_register(HHOOK_TYPE_IPSEC_IN, AF_INET,
321ef91a976SAndrey V. Elsukov 	    &V_ipsec_hhh_in[HHOOK_IPSEC_INET],
322ef91a976SAndrey V. Elsukov 	    HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0)
323ef91a976SAndrey V. Elsukov 		printf("%s: WARNING: unable to register input helper hook\n",
324ef91a976SAndrey V. Elsukov 		    __func__);
325ef91a976SAndrey V. Elsukov 	if (hhook_head_register(HHOOK_TYPE_IPSEC_OUT, AF_INET,
326ef91a976SAndrey V. Elsukov 	    &V_ipsec_hhh_out[HHOOK_IPSEC_INET],
327ef91a976SAndrey V. Elsukov 	    HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0)
328ef91a976SAndrey V. Elsukov 		printf("%s: WARNING: unable to register output helper hook\n",
329ef91a976SAndrey V. Elsukov 		    __func__);
330ef91a976SAndrey V. Elsukov 
3311ed81b73SMarko Zec 	/* Skip initialization of globals for non-default instances. */
332484149deSBjoern A. Zeeb #ifdef VIMAGE
333484149deSBjoern A. Zeeb 	if (!IS_DEFAULT_VNET(curvnet)) {
334484149deSBjoern A. Zeeb 		netisr_register_vnet(&ip_nh);
335484149deSBjoern A. Zeeb #ifdef	RSS
336484149deSBjoern A. Zeeb 		netisr_register_vnet(&ip_direct_nh);
337484149deSBjoern A. Zeeb #endif
3381ed81b73SMarko Zec 		return;
339484149deSBjoern A. Zeeb 	}
340484149deSBjoern A. Zeeb #endif
3411ed81b73SMarko Zec 
342f0ffb944SJulian Elischer 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
34302410549SRobert Watson 	if (pr == NULL)
344db09bef3SAndre Oppermann 		panic("ip_init: PF_INET not found");
345db09bef3SAndre Oppermann 
346db09bef3SAndre Oppermann 	/* Initialize the entire ip_protox[] array to IPPROTO_RAW. */
347df8bae1dSRodney W. Grimes 	for (i = 0; i < IPPROTO_MAX; i++)
348df8bae1dSRodney W. Grimes 		ip_protox[i] = pr - inetsw;
349db09bef3SAndre Oppermann 	/*
350db09bef3SAndre Oppermann 	 * Cycle through IP protocols and put them into the appropriate place
351db09bef3SAndre Oppermann 	 * in ip_protox[].
352db09bef3SAndre Oppermann 	 */
353f0ffb944SJulian Elischer 	for (pr = inetdomain.dom_protosw;
354f0ffb944SJulian Elischer 	    pr < inetdomain.dom_protoswNPROTOSW; pr++)
355df8bae1dSRodney W. Grimes 		if (pr->pr_domain->dom_family == PF_INET &&
356db09bef3SAndre Oppermann 		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) {
357db09bef3SAndre Oppermann 			/* Be careful to only index valid IP protocols. */
358db77984cSSam Leffler 			if (pr->pr_protocol < IPPROTO_MAX)
359df8bae1dSRodney W. Grimes 				ip_protox[pr->pr_protocol] = pr - inetsw;
360db09bef3SAndre Oppermann 		}
361194a213eSAndrey A. Chernov 
362d4b5cae4SRobert Watson 	netisr_register(&ip_nh);
363b8bc95cdSAdrian Chadd #ifdef	RSS
364b8bc95cdSAdrian Chadd 	netisr_register(&ip_direct_nh);
365b8bc95cdSAdrian Chadd #endif
366df8bae1dSRodney W. Grimes }
367df8bae1dSRodney W. Grimes 
3689802380eSBjoern A. Zeeb #ifdef VIMAGE
3693f58662dSBjoern A. Zeeb static void
3703f58662dSBjoern A. Zeeb ip_destroy(void *unused __unused)
3719802380eSBjoern A. Zeeb {
37289856f7eSBjoern A. Zeeb 	struct ifnet *ifp;
373ef91a976SAndrey V. Elsukov 	int error;
3744d3dfd45SMikolaj Golub 
375484149deSBjoern A. Zeeb #ifdef	RSS
376484149deSBjoern A. Zeeb 	netisr_unregister_vnet(&ip_direct_nh);
377484149deSBjoern A. Zeeb #endif
378484149deSBjoern A. Zeeb 	netisr_unregister_vnet(&ip_nh);
379484149deSBjoern A. Zeeb 
380ef91a976SAndrey V. Elsukov 	if ((error = pfil_head_unregister(&V_inet_pfil_hook)) != 0)
3814d3dfd45SMikolaj Golub 		printf("%s: WARNING: unable to unregister pfil hook, "
382ef91a976SAndrey V. Elsukov 		    "error %d\n", __func__, error);
3839802380eSBjoern A. Zeeb 
384ef91a976SAndrey V. Elsukov 	error = hhook_head_deregister(V_ipsec_hhh_in[HHOOK_IPSEC_INET]);
385ef91a976SAndrey V. Elsukov 	if (error != 0) {
386ef91a976SAndrey V. Elsukov 		printf("%s: WARNING: unable to deregister input helper hook "
387ef91a976SAndrey V. Elsukov 		    "type HHOOK_TYPE_IPSEC_IN, id HHOOK_IPSEC_INET: "
388ef91a976SAndrey V. Elsukov 		    "error %d returned\n", __func__, error);
389ef91a976SAndrey V. Elsukov 	}
390ef91a976SAndrey V. Elsukov 	error = hhook_head_deregister(V_ipsec_hhh_out[HHOOK_IPSEC_INET]);
391ef91a976SAndrey V. Elsukov 	if (error != 0) {
392ef91a976SAndrey V. Elsukov 		printf("%s: WARNING: unable to deregister output helper hook "
393ef91a976SAndrey V. Elsukov 		    "type HHOOK_TYPE_IPSEC_OUT, id HHOOK_IPSEC_INET: "
394ef91a976SAndrey V. Elsukov 		    "error %d returned\n", __func__, error);
395ef91a976SAndrey V. Elsukov 	}
39689856f7eSBjoern A. Zeeb 
39789856f7eSBjoern A. Zeeb 	/* Remove the IPv4 addresses from all interfaces. */
39889856f7eSBjoern A. Zeeb 	in_ifscrub_all();
39989856f7eSBjoern A. Zeeb 
40089856f7eSBjoern A. Zeeb 	/* Make sure the IPv4 routes are gone as well. */
40189856f7eSBjoern A. Zeeb 	IFNET_RLOCK();
402*4f6c66ccSMatt Macy 	CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link)
40389856f7eSBjoern A. Zeeb 		rt_flushifroutes_af(ifp, AF_INET);
40489856f7eSBjoern A. Zeeb 	IFNET_RUNLOCK();
4059802380eSBjoern A. Zeeb 
406e3c2c634SGleb Smirnoff 	/* Destroy IP reassembly queue. */
4071dbefcc0SGleb Smirnoff 	ipreass_destroy();
40889856f7eSBjoern A. Zeeb 
40989856f7eSBjoern A. Zeeb 	/* Cleanup in_ifaddr hash table; should be empty. */
41089856f7eSBjoern A. Zeeb 	hashdestroy(V_in_ifaddrhashtbl, M_IFADDR, V_in_ifaddrhmask);
4119802380eSBjoern A. Zeeb }
4123f58662dSBjoern A. Zeeb 
4133f58662dSBjoern A. Zeeb VNET_SYSUNINIT(ip, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, ip_destroy, NULL);
4149802380eSBjoern A. Zeeb #endif
4159802380eSBjoern A. Zeeb 
416b8bc95cdSAdrian Chadd #ifdef	RSS
417b8bc95cdSAdrian Chadd /*
418b8bc95cdSAdrian Chadd  * IP direct input routine.
419b8bc95cdSAdrian Chadd  *
420b8bc95cdSAdrian Chadd  * This is called when reinjecting completed fragments where
421b8bc95cdSAdrian Chadd  * all of the previous checking and book-keeping has been done.
422b8bc95cdSAdrian Chadd  */
423b8bc95cdSAdrian Chadd void
424b8bc95cdSAdrian Chadd ip_direct_input(struct mbuf *m)
425b8bc95cdSAdrian Chadd {
426b8bc95cdSAdrian Chadd 	struct ip *ip;
427b8bc95cdSAdrian Chadd 	int hlen;
428b8bc95cdSAdrian Chadd 
429b8bc95cdSAdrian Chadd 	ip = mtod(m, struct ip *);
430b8bc95cdSAdrian Chadd 	hlen = ip->ip_hl << 2;
431b8bc95cdSAdrian Chadd 
432fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT)
433fcf59617SAndrey V. Elsukov 	if (IPSEC_ENABLED(ipv4)) {
434fcf59617SAndrey V. Elsukov 		if (IPSEC_INPUT(ipv4, m, hlen, ip->ip_p) != 0)
435fcf59617SAndrey V. Elsukov 			return;
436fcf59617SAndrey V. Elsukov 	}
437fcf59617SAndrey V. Elsukov #endif /* IPSEC */
438b8bc95cdSAdrian Chadd 	IPSTAT_INC(ips_delivered);
439b8bc95cdSAdrian Chadd 	(*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p);
440b8bc95cdSAdrian Chadd 	return;
441b8bc95cdSAdrian Chadd }
442b8bc95cdSAdrian Chadd #endif
443b8bc95cdSAdrian Chadd 
4444d2e3692SLuigi Rizzo /*
445df8bae1dSRodney W. Grimes  * Ip input routine.  Checksum and byte swap header.  If fragmented
446df8bae1dSRodney W. Grimes  * try to reassemble.  Process options.  Pass to next level.
447df8bae1dSRodney W. Grimes  */
448c67b1d17SGarrett Wollman void
449c67b1d17SGarrett Wollman ip_input(struct mbuf *m)
450df8bae1dSRodney W. Grimes {
4519188b4a1SAndre Oppermann 	struct ip *ip = NULL;
4525da9f8faSJosef Karthauser 	struct in_ifaddr *ia = NULL;
453ca925d9cSJonathan Lemon 	struct ifaddr *ifa;
4540aade26eSRobert Watson 	struct ifnet *ifp;
4559b932e9eSAndre Oppermann 	int    checkif, hlen = 0;
45621d172a3SGleb Smirnoff 	uint16_t sum, ip_len;
45702c1c707SAndre Oppermann 	int dchg = 0;				/* dest changed after fw */
458f51f805fSSam Leffler 	struct in_addr odst;			/* original dst address */
459b715f178SLuigi Rizzo 
460fe584538SDag-Erling Smørgrav 	M_ASSERTPKTHDR(m);
461db40007dSAndrew R. Reiter 
462ac9d7e26SMax Laier 	if (m->m_flags & M_FASTFWD_OURS) {
46376ff6dcfSAndre Oppermann 		m->m_flags &= ~M_FASTFWD_OURS;
46476ff6dcfSAndre Oppermann 		/* Set up some basics that will be used later. */
4652b25acc1SLuigi Rizzo 		ip = mtod(m, struct ip *);
46653be11f6SPoul-Henning Kamp 		hlen = ip->ip_hl << 2;
4678f134647SGleb Smirnoff 		ip_len = ntohs(ip->ip_len);
4689b932e9eSAndre Oppermann 		goto ours;
4692b25acc1SLuigi Rizzo 	}
4702b25acc1SLuigi Rizzo 
47186425c62SRobert Watson 	IPSTAT_INC(ips_total);
47258938916SGarrett Wollman 
47358938916SGarrett Wollman 	if (m->m_pkthdr.len < sizeof(struct ip))
47458938916SGarrett Wollman 		goto tooshort;
47558938916SGarrett Wollman 
476df8bae1dSRodney W. Grimes 	if (m->m_len < sizeof (struct ip) &&
4770b17fba7SAndre Oppermann 	    (m = m_pullup(m, sizeof (struct ip))) == NULL) {
47886425c62SRobert Watson 		IPSTAT_INC(ips_toosmall);
479c67b1d17SGarrett Wollman 		return;
480df8bae1dSRodney W. Grimes 	}
481df8bae1dSRodney W. Grimes 	ip = mtod(m, struct ip *);
48258938916SGarrett Wollman 
48353be11f6SPoul-Henning Kamp 	if (ip->ip_v != IPVERSION) {
48486425c62SRobert Watson 		IPSTAT_INC(ips_badvers);
485df8bae1dSRodney W. Grimes 		goto bad;
486df8bae1dSRodney W. Grimes 	}
48758938916SGarrett Wollman 
48853be11f6SPoul-Henning Kamp 	hlen = ip->ip_hl << 2;
489df8bae1dSRodney W. Grimes 	if (hlen < sizeof(struct ip)) {	/* minimum header length */
49086425c62SRobert Watson 		IPSTAT_INC(ips_badhlen);
491df8bae1dSRodney W. Grimes 		goto bad;
492df8bae1dSRodney W. Grimes 	}
493df8bae1dSRodney W. Grimes 	if (hlen > m->m_len) {
4940b17fba7SAndre Oppermann 		if ((m = m_pullup(m, hlen)) == NULL) {
49586425c62SRobert Watson 			IPSTAT_INC(ips_badhlen);
496c67b1d17SGarrett Wollman 			return;
497df8bae1dSRodney W. Grimes 		}
498df8bae1dSRodney W. Grimes 		ip = mtod(m, struct ip *);
499df8bae1dSRodney W. Grimes 	}
50033841545SHajimu UMEMOTO 
50157f60867SMark Johnston 	IP_PROBE(receive, NULL, NULL, ip, m->m_pkthdr.rcvif, ip, NULL);
50257f60867SMark Johnston 
50333841545SHajimu UMEMOTO 	/* 127/8 must not appear on wire - RFC1122 */
5040aade26eSRobert Watson 	ifp = m->m_pkthdr.rcvif;
50533841545SHajimu UMEMOTO 	if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
50633841545SHajimu UMEMOTO 	    (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
5070aade26eSRobert Watson 		if ((ifp->if_flags & IFF_LOOPBACK) == 0) {
50886425c62SRobert Watson 			IPSTAT_INC(ips_badaddr);
50933841545SHajimu UMEMOTO 			goto bad;
51033841545SHajimu UMEMOTO 		}
51133841545SHajimu UMEMOTO 	}
51233841545SHajimu UMEMOTO 
513db4f9cc7SJonathan Lemon 	if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
514db4f9cc7SJonathan Lemon 		sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
515db4f9cc7SJonathan Lemon 	} else {
51658938916SGarrett Wollman 		if (hlen == sizeof(struct ip)) {
51747c861ecSBrian Somers 			sum = in_cksum_hdr(ip);
51858938916SGarrett Wollman 		} else {
51947c861ecSBrian Somers 			sum = in_cksum(m, hlen);
52058938916SGarrett Wollman 		}
521db4f9cc7SJonathan Lemon 	}
52247c861ecSBrian Somers 	if (sum) {
52386425c62SRobert Watson 		IPSTAT_INC(ips_badsum);
524df8bae1dSRodney W. Grimes 		goto bad;
525df8bae1dSRodney W. Grimes 	}
526df8bae1dSRodney W. Grimes 
52702b199f1SMax Laier #ifdef ALTQ
52802b199f1SMax Laier 	if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0)
52902b199f1SMax Laier 		/* packet is dropped by traffic conditioner */
53002b199f1SMax Laier 		return;
53102b199f1SMax Laier #endif
53202b199f1SMax Laier 
53321d172a3SGleb Smirnoff 	ip_len = ntohs(ip->ip_len);
53421d172a3SGleb Smirnoff 	if (ip_len < hlen) {
53586425c62SRobert Watson 		IPSTAT_INC(ips_badlen);
536df8bae1dSRodney W. Grimes 		goto bad;
537df8bae1dSRodney W. Grimes 	}
538df8bae1dSRodney W. Grimes 
539df8bae1dSRodney W. Grimes 	/*
540df8bae1dSRodney W. Grimes 	 * Check that the amount of data in the buffers
541df8bae1dSRodney W. Grimes 	 * is as at least much as the IP header would have us expect.
542df8bae1dSRodney W. Grimes 	 * Trim mbufs if longer than we expect.
543df8bae1dSRodney W. Grimes 	 * Drop packet if shorter than we expect.
544df8bae1dSRodney W. Grimes 	 */
54521d172a3SGleb Smirnoff 	if (m->m_pkthdr.len < ip_len) {
54658938916SGarrett Wollman tooshort:
54786425c62SRobert Watson 		IPSTAT_INC(ips_tooshort);
548df8bae1dSRodney W. Grimes 		goto bad;
549df8bae1dSRodney W. Grimes 	}
55021d172a3SGleb Smirnoff 	if (m->m_pkthdr.len > ip_len) {
551df8bae1dSRodney W. Grimes 		if (m->m_len == m->m_pkthdr.len) {
55221d172a3SGleb Smirnoff 			m->m_len = ip_len;
55321d172a3SGleb Smirnoff 			m->m_pkthdr.len = ip_len;
554df8bae1dSRodney W. Grimes 		} else
55521d172a3SGleb Smirnoff 			m_adj(m, ip_len - m->m_pkthdr.len);
556df8bae1dSRodney W. Grimes 	}
557b8bc95cdSAdrian Chadd 
558ad9f4d6aSAndrey V. Elsukov 	/*
559ad9f4d6aSAndrey V. Elsukov 	 * Try to forward the packet, but if we fail continue.
560ad9f4d6aSAndrey V. Elsukov 	 * ip_tryforward() does inbound and outbound packet firewall
561ad9f4d6aSAndrey V. Elsukov 	 * processing. If firewall has decided that destination becomes
562ad9f4d6aSAndrey V. Elsukov 	 * our local address, it sets M_FASTFWD_OURS flag. In this
563ad9f4d6aSAndrey V. Elsukov 	 * case skip another inbound firewall processing and update
564ad9f4d6aSAndrey V. Elsukov 	 * ip pointer.
565ad9f4d6aSAndrey V. Elsukov 	 */
566ad9f4d6aSAndrey V. Elsukov 	if (V_ipforwarding != 0
567fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT)
568fcf59617SAndrey V. Elsukov 	    && (!IPSEC_ENABLED(ipv4) ||
569fcf59617SAndrey V. Elsukov 	    IPSEC_CAPS(ipv4, m, IPSEC_CAP_OPERABLE) == 0)
570ad9f4d6aSAndrey V. Elsukov #endif
571ad9f4d6aSAndrey V. Elsukov 	    ) {
572ad9f4d6aSAndrey V. Elsukov 		if ((m = ip_tryforward(m)) == NULL)
57333872124SGeorge V. Neville-Neil 			return;
574ad9f4d6aSAndrey V. Elsukov 		if (m->m_flags & M_FASTFWD_OURS) {
575ad9f4d6aSAndrey V. Elsukov 			m->m_flags &= ~M_FASTFWD_OURS;
576ad9f4d6aSAndrey V. Elsukov 			ip = mtod(m, struct ip *);
577ad9f4d6aSAndrey V. Elsukov 			goto ours;
578ad9f4d6aSAndrey V. Elsukov 		}
579ad9f4d6aSAndrey V. Elsukov 	}
580fcf59617SAndrey V. Elsukov 
581fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT)
58214dd6717SSam Leffler 	/*
583ffe8cd7bSBjoern A. Zeeb 	 * Bypass packet filtering for packets previously handled by IPsec.
58414dd6717SSam Leffler 	 */
585fcf59617SAndrey V. Elsukov 	if (IPSEC_ENABLED(ipv4) &&
586fcf59617SAndrey V. Elsukov 	    IPSEC_CAPS(ipv4, m, IPSEC_CAP_BYPASS_FILTER) != 0)
587c21fd232SAndre Oppermann 			goto passin;
588ad9f4d6aSAndrey V. Elsukov #endif
589fcf59617SAndrey V. Elsukov 
590c4ac87eaSDarren Reed 	/*
591134ea224SSam Leffler 	 * Run through list of hooks for input packets.
592f51f805fSSam Leffler 	 *
593f51f805fSSam Leffler 	 * NB: Beware of the destination address changing (e.g.
594f51f805fSSam Leffler 	 *     by NAT rewriting).  When this happens, tell
595f51f805fSSam Leffler 	 *     ip_forward to do the right thing.
596c4ac87eaSDarren Reed 	 */
597c21fd232SAndre Oppermann 
598c21fd232SAndre Oppermann 	/* Jump over all PFIL processing if hooks are not active. */
5990b4b0b0fSJulian Elischer 	if (!PFIL_HOOKED(&V_inet_pfil_hook))
600c21fd232SAndre Oppermann 		goto passin;
601c21fd232SAndre Oppermann 
602f51f805fSSam Leffler 	odst = ip->ip_dst;
603effaab88SKristof Provost 	if (pfil_run_hooks(&V_inet_pfil_hook, &m, ifp, PFIL_IN, 0, NULL) != 0)
604beec8214SDarren Reed 		return;
605134ea224SSam Leffler 	if (m == NULL)			/* consumed by filter */
606c4ac87eaSDarren Reed 		return;
6079b932e9eSAndre Oppermann 
608c4ac87eaSDarren Reed 	ip = mtod(m, struct ip *);
60902c1c707SAndre Oppermann 	dchg = (odst.s_addr != ip->ip_dst.s_addr);
6100aade26eSRobert Watson 	ifp = m->m_pkthdr.rcvif;
6119b932e9eSAndre Oppermann 
6129b932e9eSAndre Oppermann 	if (m->m_flags & M_FASTFWD_OURS) {
6139b932e9eSAndre Oppermann 		m->m_flags &= ~M_FASTFWD_OURS;
6149b932e9eSAndre Oppermann 		goto ours;
6159b932e9eSAndre Oppermann 	}
616ffdbf9daSAndrey V. Elsukov 	if (m->m_flags & M_IP_NEXTHOP) {
617de89d74bSLuiz Otavio O Souza 		if (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL) {
618099dd043SAndre Oppermann 			/*
619ffdbf9daSAndrey V. Elsukov 			 * Directly ship the packet on.  This allows
620ffdbf9daSAndrey V. Elsukov 			 * forwarding packets originally destined to us
621ffdbf9daSAndrey V. Elsukov 			 * to some other directly connected host.
622099dd043SAndre Oppermann 			 */
623ffdbf9daSAndrey V. Elsukov 			ip_forward(m, 1);
624099dd043SAndre Oppermann 			return;
625099dd043SAndre Oppermann 		}
626ffdbf9daSAndrey V. Elsukov 	}
627c21fd232SAndre Oppermann passin:
62821d172a3SGleb Smirnoff 
62921d172a3SGleb Smirnoff 	/*
630df8bae1dSRodney W. Grimes 	 * Process options and, if not destined for us,
631df8bae1dSRodney W. Grimes 	 * ship it on.  ip_dooptions returns 1 when an
632df8bae1dSRodney W. Grimes 	 * error was detected (causing an icmp message
633df8bae1dSRodney W. Grimes 	 * to be sent and the original packet to be freed).
634df8bae1dSRodney W. Grimes 	 */
6359b932e9eSAndre Oppermann 	if (hlen > sizeof (struct ip) && ip_dooptions(m, 0))
636c67b1d17SGarrett Wollman 		return;
637df8bae1dSRodney W. Grimes 
638f0068c4aSGarrett Wollman         /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no
639f0068c4aSGarrett Wollman          * matter if it is destined to another node, or whether it is
640f0068c4aSGarrett Wollman          * a multicast one, RSVP wants it! and prevents it from being forwarded
641f0068c4aSGarrett Wollman          * anywhere else. Also checks if the rsvp daemon is running before
642f0068c4aSGarrett Wollman 	 * grabbing the packet.
643f0068c4aSGarrett Wollman          */
644603724d3SBjoern A. Zeeb 	if (V_rsvp_on && ip->ip_p==IPPROTO_RSVP)
645f0068c4aSGarrett Wollman 		goto ours;
646f0068c4aSGarrett Wollman 
647df8bae1dSRodney W. Grimes 	/*
648df8bae1dSRodney W. Grimes 	 * Check our list of addresses, to see if the packet is for us.
649cc766e04SGarrett Wollman 	 * If we don't have any addresses, assume any unicast packet
650cc766e04SGarrett Wollman 	 * we receive might be for us (and let the upper layers deal
651cc766e04SGarrett Wollman 	 * with it).
652df8bae1dSRodney W. Grimes 	 */
653d7c5a620SMatt Macy 	if (CK_STAILQ_EMPTY(&V_in_ifaddrhead) &&
654cc766e04SGarrett Wollman 	    (m->m_flags & (M_MCAST|M_BCAST)) == 0)
655cc766e04SGarrett Wollman 		goto ours;
656cc766e04SGarrett Wollman 
6577538a9a0SJonathan Lemon 	/*
658823db0e9SDon Lewis 	 * Enable a consistency check between the destination address
659823db0e9SDon Lewis 	 * and the arrival interface for a unicast packet (the RFC 1122
660823db0e9SDon Lewis 	 * strong ES model) if IP forwarding is disabled and the packet
661e15ae1b2SDon Lewis 	 * is not locally generated and the packet is not subject to
662e15ae1b2SDon Lewis 	 * 'ipfw fwd'.
6633f67c834SDon Lewis 	 *
6643f67c834SDon Lewis 	 * XXX - Checking also should be disabled if the destination
6653f67c834SDon Lewis 	 * address is ipnat'ed to a different interface.
6663f67c834SDon Lewis 	 *
667a8f12100SDon Lewis 	 * XXX - Checking is incompatible with IP aliases added
6683f67c834SDon Lewis 	 * to the loopback interface instead of the interface where
6693f67c834SDon Lewis 	 * the packets are received.
670a9771948SGleb Smirnoff 	 *
671a9771948SGleb Smirnoff 	 * XXX - This is the case for carp vhost IPs as well so we
672a9771948SGleb Smirnoff 	 * insert a workaround. If the packet got here, we already
673a9771948SGleb Smirnoff 	 * checked with carp_iamatch() and carp_forus().
674823db0e9SDon Lewis 	 */
675603724d3SBjoern A. Zeeb 	checkif = V_ip_checkinterface && (V_ipforwarding == 0) &&
6760aade26eSRobert Watson 	    ifp != NULL && ((ifp->if_flags & IFF_LOOPBACK) == 0) &&
67754bfbd51SWill Andrews 	    ifp->if_carp == NULL && (dchg == 0);
678823db0e9SDon Lewis 
679ca925d9cSJonathan Lemon 	/*
680ca925d9cSJonathan Lemon 	 * Check for exact addresses in the hash bucket.
681ca925d9cSJonathan Lemon 	 */
6822d9cfabaSRobert Watson 	/* IN_IFADDR_RLOCK(); */
6839b932e9eSAndre Oppermann 	LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) {
684f9e354dfSJulian Elischer 		/*
685823db0e9SDon Lewis 		 * If the address matches, verify that the packet
686823db0e9SDon Lewis 		 * arrived via the correct interface if checking is
687823db0e9SDon Lewis 		 * enabled.
688f9e354dfSJulian Elischer 		 */
6899b932e9eSAndre Oppermann 		if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr &&
6908c0fec80SRobert Watson 		    (!checkif || ia->ia_ifp == ifp)) {
6917caf4ab7SGleb Smirnoff 			counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
6927caf4ab7SGleb Smirnoff 			counter_u64_add(ia->ia_ifa.ifa_ibytes,
6937caf4ab7SGleb Smirnoff 			    m->m_pkthdr.len);
6942d9cfabaSRobert Watson 			/* IN_IFADDR_RUNLOCK(); */
695ed1ff184SJulian Elischer 			goto ours;
696ca925d9cSJonathan Lemon 		}
6978c0fec80SRobert Watson 	}
6982d9cfabaSRobert Watson 	/* IN_IFADDR_RUNLOCK(); */
6992d9cfabaSRobert Watson 
700823db0e9SDon Lewis 	/*
701ca925d9cSJonathan Lemon 	 * Check for broadcast addresses.
702ca925d9cSJonathan Lemon 	 *
703ca925d9cSJonathan Lemon 	 * Only accept broadcast packets that arrive via the matching
704ca925d9cSJonathan Lemon 	 * interface.  Reception of forwarded directed broadcasts would
705ca925d9cSJonathan Lemon 	 * be handled via ip_forward() and ether_output() with the loopback
706ca925d9cSJonathan Lemon 	 * into the stack for SIMPLEX interfaces handled by ether_output().
707823db0e9SDon Lewis 	 */
7080aade26eSRobert Watson 	if (ifp != NULL && ifp->if_flags & IFF_BROADCAST) {
709137f91e8SJohn Baldwin 		IF_ADDR_RLOCK(ifp);
710d7c5a620SMatt Macy 		CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
711ca925d9cSJonathan Lemon 			if (ifa->ifa_addr->sa_family != AF_INET)
712ca925d9cSJonathan Lemon 				continue;
713ca925d9cSJonathan Lemon 			ia = ifatoia(ifa);
714df8bae1dSRodney W. Grimes 			if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
7150aade26eSRobert Watson 			    ip->ip_dst.s_addr) {
7167caf4ab7SGleb Smirnoff 				counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
7177caf4ab7SGleb Smirnoff 				counter_u64_add(ia->ia_ifa.ifa_ibytes,
7187caf4ab7SGleb Smirnoff 				    m->m_pkthdr.len);
719137f91e8SJohn Baldwin 				IF_ADDR_RUNLOCK(ifp);
720df8bae1dSRodney W. Grimes 				goto ours;
7210aade26eSRobert Watson 			}
7220ac40133SBrian Somers #ifdef BOOTP_COMPAT
7230aade26eSRobert Watson 			if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY) {
7247caf4ab7SGleb Smirnoff 				counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
7257caf4ab7SGleb Smirnoff 				counter_u64_add(ia->ia_ifa.ifa_ibytes,
7267caf4ab7SGleb Smirnoff 				    m->m_pkthdr.len);
727137f91e8SJohn Baldwin 				IF_ADDR_RUNLOCK(ifp);
728ca925d9cSJonathan Lemon 				goto ours;
7290aade26eSRobert Watson 			}
7300ac40133SBrian Somers #endif
731df8bae1dSRodney W. Grimes 		}
732137f91e8SJohn Baldwin 		IF_ADDR_RUNLOCK(ifp);
73319e5b0a7SRobert Watson 		ia = NULL;
734df8bae1dSRodney W. Grimes 	}
735f8429ca2SBruce M Simpson 	/* RFC 3927 2.7: Do not forward datagrams for 169.254.0.0/16. */
736f8429ca2SBruce M Simpson 	if (IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))) {
73786425c62SRobert Watson 		IPSTAT_INC(ips_cantforward);
738f8429ca2SBruce M Simpson 		m_freem(m);
739f8429ca2SBruce M Simpson 		return;
740f8429ca2SBruce M Simpson 	}
741df8bae1dSRodney W. Grimes 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
742603724d3SBjoern A. Zeeb 		if (V_ip_mrouter) {
743df8bae1dSRodney W. Grimes 			/*
744df8bae1dSRodney W. Grimes 			 * If we are acting as a multicast router, all
745df8bae1dSRodney W. Grimes 			 * incoming multicast packets are passed to the
746df8bae1dSRodney W. Grimes 			 * kernel-level multicast forwarding function.
747df8bae1dSRodney W. Grimes 			 * The packet is returned (relatively) intact; if
748df8bae1dSRodney W. Grimes 			 * ip_mforward() returns a non-zero value, the packet
749df8bae1dSRodney W. Grimes 			 * must be discarded, else it may be accepted below.
750df8bae1dSRodney W. Grimes 			 */
7510aade26eSRobert Watson 			if (ip_mforward && ip_mforward(ip, ifp, m, 0) != 0) {
75286425c62SRobert Watson 				IPSTAT_INC(ips_cantforward);
753df8bae1dSRodney W. Grimes 				m_freem(m);
754c67b1d17SGarrett Wollman 				return;
755df8bae1dSRodney W. Grimes 			}
756df8bae1dSRodney W. Grimes 
757df8bae1dSRodney W. Grimes 			/*
75811612afaSDima Dorfman 			 * The process-level routing daemon needs to receive
759df8bae1dSRodney W. Grimes 			 * all multicast IGMP packets, whether or not this
760df8bae1dSRodney W. Grimes 			 * host belongs to their destination groups.
761df8bae1dSRodney W. Grimes 			 */
762df8bae1dSRodney W. Grimes 			if (ip->ip_p == IPPROTO_IGMP)
763df8bae1dSRodney W. Grimes 				goto ours;
76486425c62SRobert Watson 			IPSTAT_INC(ips_forward);
765df8bae1dSRodney W. Grimes 		}
766df8bae1dSRodney W. Grimes 		/*
767d10910e6SBruce M Simpson 		 * Assume the packet is for us, to avoid prematurely taking
768d10910e6SBruce M Simpson 		 * a lock on the in_multi hash. Protocols must perform
769d10910e6SBruce M Simpson 		 * their own filtering and update statistics accordingly.
770df8bae1dSRodney W. Grimes 		 */
771df8bae1dSRodney W. Grimes 		goto ours;
772df8bae1dSRodney W. Grimes 	}
773df8bae1dSRodney W. Grimes 	if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST)
774df8bae1dSRodney W. Grimes 		goto ours;
775df8bae1dSRodney W. Grimes 	if (ip->ip_dst.s_addr == INADDR_ANY)
776df8bae1dSRodney W. Grimes 		goto ours;
777df8bae1dSRodney W. Grimes 
7786a800098SYoshinobu Inoue 	/*
779df8bae1dSRodney W. Grimes 	 * Not for us; forward if possible and desirable.
780df8bae1dSRodney W. Grimes 	 */
781603724d3SBjoern A. Zeeb 	if (V_ipforwarding == 0) {
78286425c62SRobert Watson 		IPSTAT_INC(ips_cantforward);
783df8bae1dSRodney W. Grimes 		m_freem(m);
784546f251bSChris D. Faulhaber 	} else {
7859b932e9eSAndre Oppermann 		ip_forward(m, dchg);
786546f251bSChris D. Faulhaber 	}
787c67b1d17SGarrett Wollman 	return;
788df8bae1dSRodney W. Grimes 
789df8bae1dSRodney W. Grimes ours:
790d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH
791d0ebc0d2SYaroslav Tykhiy 	/*
792d0ebc0d2SYaroslav Tykhiy 	 * IPSTEALTH: Process non-routing options only
793d0ebc0d2SYaroslav Tykhiy 	 * if the packet is destined for us.
794d0ebc0d2SYaroslav Tykhiy 	 */
7957caf4ab7SGleb Smirnoff 	if (V_ipstealth && hlen > sizeof (struct ip) && ip_dooptions(m, 1))
796d0ebc0d2SYaroslav Tykhiy 		return;
797d0ebc0d2SYaroslav Tykhiy #endif /* IPSTEALTH */
798d0ebc0d2SYaroslav Tykhiy 
79963f8d699SJordan K. Hubbard 	/*
800b6ea1aa5SRuslan Ermilov 	 * Attempt reassembly; if it succeeds, proceed.
801ac9d7e26SMax Laier 	 * ip_reass() will return a different mbuf.
802df8bae1dSRodney W. Grimes 	 */
8038f134647SGleb Smirnoff 	if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) {
804aa69c612SGleb Smirnoff 		/* XXXGL: shouldn't we save & set m_flags? */
805f0cada84SAndre Oppermann 		m = ip_reass(m);
806f0cada84SAndre Oppermann 		if (m == NULL)
807c67b1d17SGarrett Wollman 			return;
8086a800098SYoshinobu Inoue 		ip = mtod(m, struct ip *);
8097e2df452SRuslan Ermilov 		/* Get the header length of the reassembled packet */
81053be11f6SPoul-Henning Kamp 		hlen = ip->ip_hl << 2;
811f0cada84SAndre Oppermann 	}
812f0cada84SAndre Oppermann 
813fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT)
814fcf59617SAndrey V. Elsukov 	if (IPSEC_ENABLED(ipv4)) {
815fcf59617SAndrey V. Elsukov 		if (IPSEC_INPUT(ipv4, m, hlen, ip->ip_p) != 0)
816fcf59617SAndrey V. Elsukov 			return;
817fcf59617SAndrey V. Elsukov 	}
818b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */
81933841545SHajimu UMEMOTO 
820df8bae1dSRodney W. Grimes 	/*
821df8bae1dSRodney W. Grimes 	 * Switch out to protocol's input routine.
822df8bae1dSRodney W. Grimes 	 */
82386425c62SRobert Watson 	IPSTAT_INC(ips_delivered);
8249b932e9eSAndre Oppermann 
8258f5a8818SKevin Lo 	(*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p);
826c67b1d17SGarrett Wollman 	return;
827df8bae1dSRodney W. Grimes bad:
828df8bae1dSRodney W. Grimes 	m_freem(m);
829c67b1d17SGarrett Wollman }
830c67b1d17SGarrett Wollman 
831c67b1d17SGarrett Wollman /*
832df8bae1dSRodney W. Grimes  * IP timer processing;
833df8bae1dSRodney W. Grimes  * if a timer expires on a reassembly
834df8bae1dSRodney W. Grimes  * queue, discard it.
835df8bae1dSRodney W. Grimes  */
836df8bae1dSRodney W. Grimes void
837f2565d68SRobert Watson ip_slowtimo(void)
838df8bae1dSRodney W. Grimes {
8398b615593SMarko Zec 	VNET_ITERATOR_DECL(vnet_iter);
840df8bae1dSRodney W. Grimes 
8415ee847d3SRobert Watson 	VNET_LIST_RLOCK_NOSLEEP();
8428b615593SMarko Zec 	VNET_FOREACH(vnet_iter) {
8438b615593SMarko Zec 		CURVNET_SET(vnet_iter);
8441dbefcc0SGleb Smirnoff 		ipreass_slowtimo();
8458b615593SMarko Zec 		CURVNET_RESTORE();
8468b615593SMarko Zec 	}
8475ee847d3SRobert Watson 	VNET_LIST_RUNLOCK_NOSLEEP();
848df8bae1dSRodney W. Grimes }
849df8bae1dSRodney W. Grimes 
8509802380eSBjoern A. Zeeb void
8519802380eSBjoern A. Zeeb ip_drain(void)
8529802380eSBjoern A. Zeeb {
8539802380eSBjoern A. Zeeb 	VNET_ITERATOR_DECL(vnet_iter);
8549802380eSBjoern A. Zeeb 
8559802380eSBjoern A. Zeeb 	VNET_LIST_RLOCK_NOSLEEP();
8569802380eSBjoern A. Zeeb 	VNET_FOREACH(vnet_iter) {
8579802380eSBjoern A. Zeeb 		CURVNET_SET(vnet_iter);
8581dbefcc0SGleb Smirnoff 		ipreass_drain();
8598b615593SMarko Zec 		CURVNET_RESTORE();
8608b615593SMarko Zec 	}
8615ee847d3SRobert Watson 	VNET_LIST_RUNLOCK_NOSLEEP();
862df8bae1dSRodney W. Grimes }
863df8bae1dSRodney W. Grimes 
864df8bae1dSRodney W. Grimes /*
865de38924dSAndre Oppermann  * The protocol to be inserted into ip_protox[] must be already registered
866de38924dSAndre Oppermann  * in inetsw[], either statically or through pf_proto_register().
867de38924dSAndre Oppermann  */
868de38924dSAndre Oppermann int
8691b48d245SBjoern A. Zeeb ipproto_register(short ipproto)
870de38924dSAndre Oppermann {
871de38924dSAndre Oppermann 	struct protosw *pr;
872de38924dSAndre Oppermann 
873de38924dSAndre Oppermann 	/* Sanity checks. */
8741b48d245SBjoern A. Zeeb 	if (ipproto <= 0 || ipproto >= IPPROTO_MAX)
875de38924dSAndre Oppermann 		return (EPROTONOSUPPORT);
876de38924dSAndre Oppermann 
877de38924dSAndre Oppermann 	/*
878de38924dSAndre Oppermann 	 * The protocol slot must not be occupied by another protocol
879de38924dSAndre Oppermann 	 * already.  An index pointing to IPPROTO_RAW is unused.
880de38924dSAndre Oppermann 	 */
881de38924dSAndre Oppermann 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
882de38924dSAndre Oppermann 	if (pr == NULL)
883de38924dSAndre Oppermann 		return (EPFNOSUPPORT);
884de38924dSAndre Oppermann 	if (ip_protox[ipproto] != pr - inetsw)	/* IPPROTO_RAW */
885de38924dSAndre Oppermann 		return (EEXIST);
886de38924dSAndre Oppermann 
887de38924dSAndre Oppermann 	/* Find the protocol position in inetsw[] and set the index. */
888de38924dSAndre Oppermann 	for (pr = inetdomain.dom_protosw;
889de38924dSAndre Oppermann 	     pr < inetdomain.dom_protoswNPROTOSW; pr++) {
890de38924dSAndre Oppermann 		if (pr->pr_domain->dom_family == PF_INET &&
891de38924dSAndre Oppermann 		    pr->pr_protocol && pr->pr_protocol == ipproto) {
892de38924dSAndre Oppermann 			ip_protox[pr->pr_protocol] = pr - inetsw;
893de38924dSAndre Oppermann 			return (0);
894de38924dSAndre Oppermann 		}
895de38924dSAndre Oppermann 	}
896de38924dSAndre Oppermann 	return (EPROTONOSUPPORT);
897de38924dSAndre Oppermann }
898de38924dSAndre Oppermann 
899de38924dSAndre Oppermann int
9001b48d245SBjoern A. Zeeb ipproto_unregister(short ipproto)
901de38924dSAndre Oppermann {
902de38924dSAndre Oppermann 	struct protosw *pr;
903de38924dSAndre Oppermann 
904de38924dSAndre Oppermann 	/* Sanity checks. */
9051b48d245SBjoern A. Zeeb 	if (ipproto <= 0 || ipproto >= IPPROTO_MAX)
906de38924dSAndre Oppermann 		return (EPROTONOSUPPORT);
907de38924dSAndre Oppermann 
908de38924dSAndre Oppermann 	/* Check if the protocol was indeed registered. */
909de38924dSAndre Oppermann 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
910de38924dSAndre Oppermann 	if (pr == NULL)
911de38924dSAndre Oppermann 		return (EPFNOSUPPORT);
912de38924dSAndre Oppermann 	if (ip_protox[ipproto] == pr - inetsw)  /* IPPROTO_RAW */
913de38924dSAndre Oppermann 		return (ENOENT);
914de38924dSAndre Oppermann 
915de38924dSAndre Oppermann 	/* Reset the protocol slot to IPPROTO_RAW. */
916de38924dSAndre Oppermann 	ip_protox[ipproto] = pr - inetsw;
917de38924dSAndre Oppermann 	return (0);
918de38924dSAndre Oppermann }
919de38924dSAndre Oppermann 
920df8bae1dSRodney W. Grimes u_char inetctlerrmap[PRC_NCMDS] = {
921df8bae1dSRodney W. Grimes 	0,		0,		0,		0,
922df8bae1dSRodney W. Grimes 	0,		EMSGSIZE,	EHOSTDOWN,	EHOSTUNREACH,
923df8bae1dSRodney W. Grimes 	EHOSTUNREACH,	EHOSTUNREACH,	ECONNREFUSED,	ECONNREFUSED,
924df8bae1dSRodney W. Grimes 	EMSGSIZE,	EHOSTUNREACH,	0,		0,
925fcaf9f91SMike Silbersack 	0,		0,		EHOSTUNREACH,	0,
9263b8123b7SJesper Skriver 	ENOPROTOOPT,	ECONNREFUSED
927df8bae1dSRodney W. Grimes };
928df8bae1dSRodney W. Grimes 
929df8bae1dSRodney W. Grimes /*
930df8bae1dSRodney W. Grimes  * Forward a packet.  If some error occurs return the sender
931df8bae1dSRodney W. Grimes  * an icmp packet.  Note we can't always generate a meaningful
932df8bae1dSRodney W. Grimes  * icmp message because icmp doesn't have a large enough repertoire
933df8bae1dSRodney W. Grimes  * of codes and types.
934df8bae1dSRodney W. Grimes  *
935df8bae1dSRodney W. Grimes  * If not forwarding, just drop the packet.  This could be confusing
936df8bae1dSRodney W. Grimes  * if ipforwarding was zero but some routing protocol was advancing
937df8bae1dSRodney W. Grimes  * us as a gateway to somewhere.  However, we must let the routing
938df8bae1dSRodney W. Grimes  * protocol deal with that.
939df8bae1dSRodney W. Grimes  *
940df8bae1dSRodney W. Grimes  * The srcrt parameter indicates whether the packet is being forwarded
941df8bae1dSRodney W. Grimes  * via a source route.
942df8bae1dSRodney W. Grimes  */
9439b932e9eSAndre Oppermann void
9449b932e9eSAndre Oppermann ip_forward(struct mbuf *m, int srcrt)
945df8bae1dSRodney W. Grimes {
9462b25acc1SLuigi Rizzo 	struct ip *ip = mtod(m, struct ip *);
947efbad259SEdward Tomasz Napierala 	struct in_ifaddr *ia;
948df8bae1dSRodney W. Grimes 	struct mbuf *mcopy;
949d14122b0SErmal Luçi 	struct sockaddr_in *sin;
9509b932e9eSAndre Oppermann 	struct in_addr dest;
951b835b6feSBjoern A. Zeeb 	struct route ro;
952c773494eSAndre Oppermann 	int error, type = 0, code = 0, mtu = 0;
9533efc3014SJulian Elischer 
9549b932e9eSAndre Oppermann 	if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) {
95586425c62SRobert Watson 		IPSTAT_INC(ips_cantforward);
956df8bae1dSRodney W. Grimes 		m_freem(m);
957df8bae1dSRodney W. Grimes 		return;
958df8bae1dSRodney W. Grimes 	}
959fcf59617SAndrey V. Elsukov 	if (
960fcf59617SAndrey V. Elsukov #ifdef IPSTEALTH
961fcf59617SAndrey V. Elsukov 	    V_ipstealth == 0 &&
962fcf59617SAndrey V. Elsukov #endif
963fcf59617SAndrey V. Elsukov 	    ip->ip_ttl <= IPTTLDEC) {
964fcf59617SAndrey V. Elsukov 		icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, 0);
9658922ddbeSAndrey V. Elsukov 		return;
9668922ddbeSAndrey V. Elsukov 	}
967df8bae1dSRodney W. Grimes 
968d14122b0SErmal Luçi 	bzero(&ro, sizeof(ro));
969d14122b0SErmal Luçi 	sin = (struct sockaddr_in *)&ro.ro_dst;
970d14122b0SErmal Luçi 	sin->sin_family = AF_INET;
971d14122b0SErmal Luçi 	sin->sin_len = sizeof(*sin);
972d14122b0SErmal Luçi 	sin->sin_addr = ip->ip_dst;
973d14122b0SErmal Luçi #ifdef RADIX_MPATH
974d14122b0SErmal Luçi 	rtalloc_mpath_fib(&ro,
975d14122b0SErmal Luçi 	    ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr),
976d14122b0SErmal Luçi 	    M_GETFIB(m));
977d14122b0SErmal Luçi #else
978d14122b0SErmal Luçi 	in_rtalloc_ign(&ro, 0, M_GETFIB(m));
979d14122b0SErmal Luçi #endif
980*4f6c66ccSMatt Macy 	NET_EPOCH_ENTER();
981d14122b0SErmal Luçi 	if (ro.ro_rt != NULL) {
982d14122b0SErmal Luçi 		ia = ifatoia(ro.ro_rt->rt_ifa);
98356844a62SErmal Luçi 	} else
98456844a62SErmal Luçi 		ia = NULL;
985df8bae1dSRodney W. Grimes 	/*
986bfef7ed4SIan Dowse 	 * Save the IP header and at most 8 bytes of the payload,
987bfef7ed4SIan Dowse 	 * in case we need to generate an ICMP message to the src.
988bfef7ed4SIan Dowse 	 *
9894d2e3692SLuigi Rizzo 	 * XXX this can be optimized a lot by saving the data in a local
9904d2e3692SLuigi Rizzo 	 * buffer on the stack (72 bytes at most), and only allocating the
9914d2e3692SLuigi Rizzo 	 * mbuf if really necessary. The vast majority of the packets
9924d2e3692SLuigi Rizzo 	 * are forwarded without having to send an ICMP back (either
9934d2e3692SLuigi Rizzo 	 * because unnecessary, or because rate limited), so we are
9944d2e3692SLuigi Rizzo 	 * really we are wasting a lot of work here.
9954d2e3692SLuigi Rizzo 	 *
996c3bef61eSKevin Lo 	 * We don't use m_copym() because it might return a reference
997bfef7ed4SIan Dowse 	 * to a shared cluster. Both this function and ip_output()
998bfef7ed4SIan Dowse 	 * assume exclusive access to the IP header in `m', so any
999bfef7ed4SIan Dowse 	 * data in a cluster may change before we reach icmp_error().
1000df8bae1dSRodney W. Grimes 	 */
1001dc4ad05eSGleb Smirnoff 	mcopy = m_gethdr(M_NOWAIT, m->m_type);
1002eb1b1807SGleb Smirnoff 	if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_NOWAIT)) {
10039967cafcSSam Leffler 		/*
10049967cafcSSam Leffler 		 * It's probably ok if the pkthdr dup fails (because
10059967cafcSSam Leffler 		 * the deep copy of the tag chain failed), but for now
10069967cafcSSam Leffler 		 * be conservative and just discard the copy since
10079967cafcSSam Leffler 		 * code below may some day want the tags.
10089967cafcSSam Leffler 		 */
10099967cafcSSam Leffler 		m_free(mcopy);
10109967cafcSSam Leffler 		mcopy = NULL;
10119967cafcSSam Leffler 	}
1012bfef7ed4SIan Dowse 	if (mcopy != NULL) {
10138f134647SGleb Smirnoff 		mcopy->m_len = min(ntohs(ip->ip_len), M_TRAILINGSPACE(mcopy));
1014e6b0a570SBruce M Simpson 		mcopy->m_pkthdr.len = mcopy->m_len;
1015bfef7ed4SIan Dowse 		m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t));
1016bfef7ed4SIan Dowse 	}
101704287599SRuslan Ermilov #ifdef IPSTEALTH
1018fcf59617SAndrey V. Elsukov 	if (V_ipstealth == 0)
101904287599SRuslan Ermilov #endif
102004287599SRuslan Ermilov 		ip->ip_ttl -= IPTTLDEC;
1021fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT)
1022fcf59617SAndrey V. Elsukov 	if (IPSEC_ENABLED(ipv4)) {
1023fcf59617SAndrey V. Elsukov 		if ((error = IPSEC_FORWARD(ipv4, m)) != 0) {
1024fcf59617SAndrey V. Elsukov 			/* mbuf consumed by IPsec */
1025fcf59617SAndrey V. Elsukov 			m_freem(mcopy);
1026fcf59617SAndrey V. Elsukov 			if (error != EINPROGRESS)
1027fcf59617SAndrey V. Elsukov 				IPSTAT_INC(ips_cantforward);
1028*4f6c66ccSMatt Macy 			goto out;
102904287599SRuslan Ermilov 		}
1030fcf59617SAndrey V. Elsukov 		/* No IPsec processing required */
1031fcf59617SAndrey V. Elsukov 	}
1032fcf59617SAndrey V. Elsukov #endif /* IPSEC */
1033df8bae1dSRodney W. Grimes 	/*
1034df8bae1dSRodney W. Grimes 	 * If forwarding packet using same interface that it came in on,
1035df8bae1dSRodney W. Grimes 	 * perhaps should send a redirect to sender to shortcut a hop.
1036df8bae1dSRodney W. Grimes 	 * Only send redirect if source is sending directly to us,
1037df8bae1dSRodney W. Grimes 	 * and if packet was not source routed (or has any options).
1038df8bae1dSRodney W. Grimes 	 * Also, don't send redirect if forwarding using a default route
1039df8bae1dSRodney W. Grimes 	 * or a route modified by a redirect.
1040df8bae1dSRodney W. Grimes 	 */
10419b932e9eSAndre Oppermann 	dest.s_addr = 0;
1042efbad259SEdward Tomasz Napierala 	if (!srcrt && V_ipsendredirects &&
1043efbad259SEdward Tomasz Napierala 	    ia != NULL && ia->ia_ifp == m->m_pkthdr.rcvif) {
104402c1c707SAndre Oppermann 		struct rtentry *rt;
104502c1c707SAndre Oppermann 
104602c1c707SAndre Oppermann 		rt = ro.ro_rt;
104702c1c707SAndre Oppermann 
104802c1c707SAndre Oppermann 		if (rt && (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
10499b932e9eSAndre Oppermann 		    satosin(rt_key(rt))->sin_addr.s_addr != 0) {
1050df8bae1dSRodney W. Grimes #define	RTA(rt)	((struct in_ifaddr *)(rt->rt_ifa))
1051df8bae1dSRodney W. Grimes 			u_long src = ntohl(ip->ip_src.s_addr);
1052df8bae1dSRodney W. Grimes 
1053df8bae1dSRodney W. Grimes 			if (RTA(rt) &&
1054df8bae1dSRodney W. Grimes 			    (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) {
1055df8bae1dSRodney W. Grimes 				if (rt->rt_flags & RTF_GATEWAY)
10569b932e9eSAndre Oppermann 					dest.s_addr = satosin(rt->rt_gateway)->sin_addr.s_addr;
1057df8bae1dSRodney W. Grimes 				else
10589b932e9eSAndre Oppermann 					dest.s_addr = ip->ip_dst.s_addr;
1059df8bae1dSRodney W. Grimes 				/* Router requirements says to only send host redirects */
1060df8bae1dSRodney W. Grimes 				type = ICMP_REDIRECT;
1061df8bae1dSRodney W. Grimes 				code = ICMP_REDIRECT_HOST;
1062df8bae1dSRodney W. Grimes 			}
1063df8bae1dSRodney W. Grimes 		}
106402c1c707SAndre Oppermann 	}
1065df8bae1dSRodney W. Grimes 
1066b835b6feSBjoern A. Zeeb 	error = ip_output(m, NULL, &ro, IP_FORWARDING, NULL, NULL);
1067b835b6feSBjoern A. Zeeb 
1068b835b6feSBjoern A. Zeeb 	if (error == EMSGSIZE && ro.ro_rt)
1069e3a7aa6fSGleb Smirnoff 		mtu = ro.ro_rt->rt_mtu;
1070bf984051SGleb Smirnoff 	RO_RTFREE(&ro);
1071b835b6feSBjoern A. Zeeb 
1072df8bae1dSRodney W. Grimes 	if (error)
107386425c62SRobert Watson 		IPSTAT_INC(ips_cantforward);
1074df8bae1dSRodney W. Grimes 	else {
107586425c62SRobert Watson 		IPSTAT_INC(ips_forward);
1076df8bae1dSRodney W. Grimes 		if (type)
107786425c62SRobert Watson 			IPSTAT_INC(ips_redirectsent);
1078df8bae1dSRodney W. Grimes 		else {
10799188b4a1SAndre Oppermann 			if (mcopy)
1080df8bae1dSRodney W. Grimes 				m_freem(mcopy);
1081*4f6c66ccSMatt Macy 			goto out;
1082df8bae1dSRodney W. Grimes 		}
1083df8bae1dSRodney W. Grimes 	}
1084*4f6c66ccSMatt Macy 	if (mcopy == NULL)
1085*4f6c66ccSMatt Macy 		goto out;
1086*4f6c66ccSMatt Macy 
1087df8bae1dSRodney W. Grimes 
1088df8bae1dSRodney W. Grimes 	switch (error) {
1089df8bae1dSRodney W. Grimes 
1090df8bae1dSRodney W. Grimes 	case 0:				/* forwarded, but need redirect */
1091df8bae1dSRodney W. Grimes 		/* type, code set above */
1092df8bae1dSRodney W. Grimes 		break;
1093df8bae1dSRodney W. Grimes 
1094efbad259SEdward Tomasz Napierala 	case ENETUNREACH:
1095df8bae1dSRodney W. Grimes 	case EHOSTUNREACH:
1096df8bae1dSRodney W. Grimes 	case ENETDOWN:
1097df8bae1dSRodney W. Grimes 	case EHOSTDOWN:
1098df8bae1dSRodney W. Grimes 	default:
1099df8bae1dSRodney W. Grimes 		type = ICMP_UNREACH;
1100df8bae1dSRodney W. Grimes 		code = ICMP_UNREACH_HOST;
1101df8bae1dSRodney W. Grimes 		break;
1102df8bae1dSRodney W. Grimes 
1103df8bae1dSRodney W. Grimes 	case EMSGSIZE:
1104df8bae1dSRodney W. Grimes 		type = ICMP_UNREACH;
1105df8bae1dSRodney W. Grimes 		code = ICMP_UNREACH_NEEDFRAG;
11069b932e9eSAndre Oppermann 		/*
1107b835b6feSBjoern A. Zeeb 		 * If the MTU was set before make sure we are below the
1108b835b6feSBjoern A. Zeeb 		 * interface MTU.
1109ab48768bSAndre Oppermann 		 * If the MTU wasn't set before use the interface mtu or
1110ab48768bSAndre Oppermann 		 * fall back to the next smaller mtu step compared to the
1111ab48768bSAndre Oppermann 		 * current packet size.
11129b932e9eSAndre Oppermann 		 */
1113b835b6feSBjoern A. Zeeb 		if (mtu != 0) {
1114b835b6feSBjoern A. Zeeb 			if (ia != NULL)
1115b835b6feSBjoern A. Zeeb 				mtu = min(mtu, ia->ia_ifp->if_mtu);
1116b835b6feSBjoern A. Zeeb 		} else {
1117ab48768bSAndre Oppermann 			if (ia != NULL)
1118c773494eSAndre Oppermann 				mtu = ia->ia_ifp->if_mtu;
1119ab48768bSAndre Oppermann 			else
11208f134647SGleb Smirnoff 				mtu = ip_next_mtu(ntohs(ip->ip_len), 0);
1121ab48768bSAndre Oppermann 		}
112286425c62SRobert Watson 		IPSTAT_INC(ips_cantfrag);
1123df8bae1dSRodney W. Grimes 		break;
1124df8bae1dSRodney W. Grimes 
1125df8bae1dSRodney W. Grimes 	case ENOBUFS:
11263a06e3e0SRuslan Ermilov 	case EACCES:			/* ipfw denied packet */
11273a06e3e0SRuslan Ermilov 		m_freem(mcopy);
1128*4f6c66ccSMatt Macy 		goto out;
1129df8bae1dSRodney W. Grimes 	}
1130c773494eSAndre Oppermann 	icmp_error(mcopy, type, code, dest.s_addr, mtu);
1131*4f6c66ccSMatt Macy  out:
1132*4f6c66ccSMatt Macy 	NET_EPOCH_EXIT();
1133df8bae1dSRodney W. Grimes }
1134df8bae1dSRodney W. Grimes 
1135339efd75SMaxim Sobolev #define	CHECK_SO_CT(sp, ct) \
1136339efd75SMaxim Sobolev     (((sp->so_options & SO_TIMESTAMP) && (sp->so_ts_clock == ct)) ? 1 : 0)
1137339efd75SMaxim Sobolev 
113882c23ebaSBill Fenner void
1139f2565d68SRobert Watson ip_savecontrol(struct inpcb *inp, struct mbuf **mp, struct ip *ip,
1140f2565d68SRobert Watson     struct mbuf *m)
114182c23ebaSBill Fenner {
114206193f0bSKonstantin Belousov 	bool stamped;
11438b615593SMarko Zec 
114406193f0bSKonstantin Belousov 	stamped = false;
1145339efd75SMaxim Sobolev 	if ((inp->inp_socket->so_options & SO_BINTIME) ||
1146339efd75SMaxim Sobolev 	    CHECK_SO_CT(inp->inp_socket, SO_TS_BINTIME)) {
114706193f0bSKonstantin Belousov 		struct bintime boottimebin, bt;
114806193f0bSKonstantin Belousov 		struct timespec ts1;
1149be8a62e8SPoul-Henning Kamp 
115006193f0bSKonstantin Belousov 		if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR |
115106193f0bSKonstantin Belousov 		    M_TSTMP)) {
115206193f0bSKonstantin Belousov 			mbuf_tstmp2timespec(m, &ts1);
115306193f0bSKonstantin Belousov 			timespec2bintime(&ts1, &bt);
115406193f0bSKonstantin Belousov 			getboottimebin(&boottimebin);
115506193f0bSKonstantin Belousov 			bintime_add(&bt, &boottimebin);
115606193f0bSKonstantin Belousov 		} else {
1157be8a62e8SPoul-Henning Kamp 			bintime(&bt);
115806193f0bSKonstantin Belousov 		}
1159be8a62e8SPoul-Henning Kamp 		*mp = sbcreatecontrol((caddr_t)&bt, sizeof(bt),
1160be8a62e8SPoul-Henning Kamp 		    SCM_BINTIME, SOL_SOCKET);
116106193f0bSKonstantin Belousov 		if (*mp != NULL) {
1162be8a62e8SPoul-Henning Kamp 			mp = &(*mp)->m_next;
116306193f0bSKonstantin Belousov 			stamped = true;
116406193f0bSKonstantin Belousov 		}
1165be8a62e8SPoul-Henning Kamp 	}
1166339efd75SMaxim Sobolev 	if (CHECK_SO_CT(inp->inp_socket, SO_TS_REALTIME_MICRO)) {
116706193f0bSKonstantin Belousov 		struct bintime boottimebin, bt1;
116806193f0bSKonstantin Belousov 		struct timespec ts1;;
116982c23ebaSBill Fenner 		struct timeval tv;
117082c23ebaSBill Fenner 
117106193f0bSKonstantin Belousov 		if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR |
117206193f0bSKonstantin Belousov 		    M_TSTMP)) {
117306193f0bSKonstantin Belousov 			mbuf_tstmp2timespec(m, &ts1);
117406193f0bSKonstantin Belousov 			timespec2bintime(&ts1, &bt1);
117506193f0bSKonstantin Belousov 			getboottimebin(&boottimebin);
117606193f0bSKonstantin Belousov 			bintime_add(&bt1, &boottimebin);
117706193f0bSKonstantin Belousov 			bintime2timeval(&bt1, &tv);
117806193f0bSKonstantin Belousov 		} else {
1179339efd75SMaxim Sobolev 			microtime(&tv);
118006193f0bSKonstantin Belousov 		}
118182c23ebaSBill Fenner 		*mp = sbcreatecontrol((caddr_t)&tv, sizeof(tv),
118282c23ebaSBill Fenner 		    SCM_TIMESTAMP, SOL_SOCKET);
118306193f0bSKonstantin Belousov 		if (*mp != NULL) {
118482c23ebaSBill Fenner 			mp = &(*mp)->m_next;
118506193f0bSKonstantin Belousov 			stamped = true;
118606193f0bSKonstantin Belousov 		}
1187339efd75SMaxim Sobolev 	} else if (CHECK_SO_CT(inp->inp_socket, SO_TS_REALTIME)) {
118806193f0bSKonstantin Belousov 		struct bintime boottimebin;
118906193f0bSKonstantin Belousov 		struct timespec ts, ts1;
1190339efd75SMaxim Sobolev 
119106193f0bSKonstantin Belousov 		if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR |
119206193f0bSKonstantin Belousov 		    M_TSTMP)) {
119306193f0bSKonstantin Belousov 			mbuf_tstmp2timespec(m, &ts);
119406193f0bSKonstantin Belousov 			getboottimebin(&boottimebin);
119506193f0bSKonstantin Belousov 			bintime2timespec(&boottimebin, &ts1);
119606193f0bSKonstantin Belousov 			timespecadd(&ts, &ts1);
119706193f0bSKonstantin Belousov 		} else {
1198339efd75SMaxim Sobolev 			nanotime(&ts);
119906193f0bSKonstantin Belousov 		}
1200339efd75SMaxim Sobolev 		*mp = sbcreatecontrol((caddr_t)&ts, sizeof(ts),
1201339efd75SMaxim Sobolev 		    SCM_REALTIME, SOL_SOCKET);
120206193f0bSKonstantin Belousov 		if (*mp != NULL) {
1203339efd75SMaxim Sobolev 			mp = &(*mp)->m_next;
120406193f0bSKonstantin Belousov 			stamped = true;
120506193f0bSKonstantin Belousov 		}
1206339efd75SMaxim Sobolev 	} else if (CHECK_SO_CT(inp->inp_socket, SO_TS_MONOTONIC)) {
1207339efd75SMaxim Sobolev 		struct timespec ts;
1208339efd75SMaxim Sobolev 
120906193f0bSKonstantin Belousov 		if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR |
121006193f0bSKonstantin Belousov 		    M_TSTMP))
121106193f0bSKonstantin Belousov 			mbuf_tstmp2timespec(m, &ts);
121206193f0bSKonstantin Belousov 		else
1213339efd75SMaxim Sobolev 			nanouptime(&ts);
1214339efd75SMaxim Sobolev 		*mp = sbcreatecontrol((caddr_t)&ts, sizeof(ts),
1215339efd75SMaxim Sobolev 		    SCM_MONOTONIC, SOL_SOCKET);
121606193f0bSKonstantin Belousov 		if (*mp != NULL) {
121706193f0bSKonstantin Belousov 			mp = &(*mp)->m_next;
121806193f0bSKonstantin Belousov 			stamped = true;
121906193f0bSKonstantin Belousov 		}
122006193f0bSKonstantin Belousov 	}
122106193f0bSKonstantin Belousov 	if (stamped && (m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR |
122206193f0bSKonstantin Belousov 	    M_TSTMP)) {
122306193f0bSKonstantin Belousov 		struct sock_timestamp_info sti;
122406193f0bSKonstantin Belousov 
122506193f0bSKonstantin Belousov 		bzero(&sti, sizeof(sti));
122606193f0bSKonstantin Belousov 		sti.st_info_flags = ST_INFO_HW;
122706193f0bSKonstantin Belousov 		if ((m->m_flags & M_TSTMP_HPREC) != 0)
122806193f0bSKonstantin Belousov 			sti.st_info_flags |= ST_INFO_HW_HPREC;
122906193f0bSKonstantin Belousov 		*mp = sbcreatecontrol((caddr_t)&sti, sizeof(sti), SCM_TIME_INFO,
123006193f0bSKonstantin Belousov 		    SOL_SOCKET);
123106193f0bSKonstantin Belousov 		if (*mp != NULL)
1232339efd75SMaxim Sobolev 			mp = &(*mp)->m_next;
1233be8a62e8SPoul-Henning Kamp 	}
123482c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVDSTADDR) {
123582c23ebaSBill Fenner 		*mp = sbcreatecontrol((caddr_t)&ip->ip_dst,
123682c23ebaSBill Fenner 		    sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
123782c23ebaSBill Fenner 		if (*mp)
123882c23ebaSBill Fenner 			mp = &(*mp)->m_next;
123982c23ebaSBill Fenner 	}
12404957466bSMatthew N. Dodd 	if (inp->inp_flags & INP_RECVTTL) {
12414957466bSMatthew N. Dodd 		*mp = sbcreatecontrol((caddr_t)&ip->ip_ttl,
12424957466bSMatthew N. Dodd 		    sizeof(u_char), IP_RECVTTL, IPPROTO_IP);
12434957466bSMatthew N. Dodd 		if (*mp)
12444957466bSMatthew N. Dodd 			mp = &(*mp)->m_next;
12454957466bSMatthew N. Dodd 	}
124682c23ebaSBill Fenner #ifdef notyet
124782c23ebaSBill Fenner 	/* XXX
124882c23ebaSBill Fenner 	 * Moving these out of udp_input() made them even more broken
124982c23ebaSBill Fenner 	 * than they already were.
125082c23ebaSBill Fenner 	 */
125182c23ebaSBill Fenner 	/* options were tossed already */
125282c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVOPTS) {
125382c23ebaSBill Fenner 		*mp = sbcreatecontrol((caddr_t)opts_deleted_above,
125482c23ebaSBill Fenner 		    sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP);
125582c23ebaSBill Fenner 		if (*mp)
125682c23ebaSBill Fenner 			mp = &(*mp)->m_next;
125782c23ebaSBill Fenner 	}
125882c23ebaSBill Fenner 	/* ip_srcroute doesn't do what we want here, need to fix */
125982c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVRETOPTS) {
1260e0982661SAndre Oppermann 		*mp = sbcreatecontrol((caddr_t)ip_srcroute(m),
126182c23ebaSBill Fenner 		    sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP);
126282c23ebaSBill Fenner 		if (*mp)
126382c23ebaSBill Fenner 			mp = &(*mp)->m_next;
126482c23ebaSBill Fenner 	}
126582c23ebaSBill Fenner #endif
126682c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVIF) {
1267d314ad7bSJulian Elischer 		struct ifnet *ifp;
1268d314ad7bSJulian Elischer 		struct sdlbuf {
126982c23ebaSBill Fenner 			struct sockaddr_dl sdl;
1270d314ad7bSJulian Elischer 			u_char	pad[32];
1271d314ad7bSJulian Elischer 		} sdlbuf;
1272d314ad7bSJulian Elischer 		struct sockaddr_dl *sdp;
1273d314ad7bSJulian Elischer 		struct sockaddr_dl *sdl2 = &sdlbuf.sdl;
127482c23ebaSBill Fenner 
127546f2df9cSSergey Kandaurov 		if ((ifp = m->m_pkthdr.rcvif) &&
127646f2df9cSSergey Kandaurov 		    ifp->if_index && ifp->if_index <= V_if_index) {
12774a0d6638SRuslan Ermilov 			sdp = (struct sockaddr_dl *)ifp->if_addr->ifa_addr;
1278d314ad7bSJulian Elischer 			/*
1279d314ad7bSJulian Elischer 			 * Change our mind and don't try copy.
1280d314ad7bSJulian Elischer 			 */
128146f2df9cSSergey Kandaurov 			if (sdp->sdl_family != AF_LINK ||
128246f2df9cSSergey Kandaurov 			    sdp->sdl_len > sizeof(sdlbuf)) {
1283d314ad7bSJulian Elischer 				goto makedummy;
1284d314ad7bSJulian Elischer 			}
1285d314ad7bSJulian Elischer 			bcopy(sdp, sdl2, sdp->sdl_len);
1286d314ad7bSJulian Elischer 		} else {
1287d314ad7bSJulian Elischer makedummy:
128846f2df9cSSergey Kandaurov 			sdl2->sdl_len =
128946f2df9cSSergey Kandaurov 			    offsetof(struct sockaddr_dl, sdl_data[0]);
1290d314ad7bSJulian Elischer 			sdl2->sdl_family = AF_LINK;
1291d314ad7bSJulian Elischer 			sdl2->sdl_index = 0;
1292d314ad7bSJulian Elischer 			sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0;
1293d314ad7bSJulian Elischer 		}
1294d314ad7bSJulian Elischer 		*mp = sbcreatecontrol((caddr_t)sdl2, sdl2->sdl_len,
129582c23ebaSBill Fenner 		    IP_RECVIF, IPPROTO_IP);
129682c23ebaSBill Fenner 		if (*mp)
129782c23ebaSBill Fenner 			mp = &(*mp)->m_next;
129882c23ebaSBill Fenner 	}
12993cca425bSMichael Tuexen 	if (inp->inp_flags & INP_RECVTOS) {
13003cca425bSMichael Tuexen 		*mp = sbcreatecontrol((caddr_t)&ip->ip_tos,
13013cca425bSMichael Tuexen 		    sizeof(u_char), IP_RECVTOS, IPPROTO_IP);
13023cca425bSMichael Tuexen 		if (*mp)
13033cca425bSMichael Tuexen 			mp = &(*mp)->m_next;
13043cca425bSMichael Tuexen 	}
13059d3ddf43SAdrian Chadd 
13069d3ddf43SAdrian Chadd 	if (inp->inp_flags2 & INP_RECVFLOWID) {
13079d3ddf43SAdrian Chadd 		uint32_t flowid, flow_type;
13089d3ddf43SAdrian Chadd 
13099d3ddf43SAdrian Chadd 		flowid = m->m_pkthdr.flowid;
13109d3ddf43SAdrian Chadd 		flow_type = M_HASHTYPE_GET(m);
13119d3ddf43SAdrian Chadd 
13129d3ddf43SAdrian Chadd 		/*
13139d3ddf43SAdrian Chadd 		 * XXX should handle the failure of one or the
13149d3ddf43SAdrian Chadd 		 * other - don't populate both?
13159d3ddf43SAdrian Chadd 		 */
13169d3ddf43SAdrian Chadd 		*mp = sbcreatecontrol((caddr_t) &flowid,
13179d3ddf43SAdrian Chadd 		    sizeof(uint32_t), IP_FLOWID, IPPROTO_IP);
13189d3ddf43SAdrian Chadd 		if (*mp)
13199d3ddf43SAdrian Chadd 			mp = &(*mp)->m_next;
13209d3ddf43SAdrian Chadd 		*mp = sbcreatecontrol((caddr_t) &flow_type,
13219d3ddf43SAdrian Chadd 		    sizeof(uint32_t), IP_FLOWTYPE, IPPROTO_IP);
13229d3ddf43SAdrian Chadd 		if (*mp)
13239d3ddf43SAdrian Chadd 			mp = &(*mp)->m_next;
13249d3ddf43SAdrian Chadd 	}
13259d3ddf43SAdrian Chadd 
13269d3ddf43SAdrian Chadd #ifdef	RSS
13279d3ddf43SAdrian Chadd 	if (inp->inp_flags2 & INP_RECVRSSBUCKETID) {
13289d3ddf43SAdrian Chadd 		uint32_t flowid, flow_type;
13299d3ddf43SAdrian Chadd 		uint32_t rss_bucketid;
13309d3ddf43SAdrian Chadd 
13319d3ddf43SAdrian Chadd 		flowid = m->m_pkthdr.flowid;
13329d3ddf43SAdrian Chadd 		flow_type = M_HASHTYPE_GET(m);
13339d3ddf43SAdrian Chadd 
13349d3ddf43SAdrian Chadd 		if (rss_hash2bucket(flowid, flow_type, &rss_bucketid) == 0) {
13359d3ddf43SAdrian Chadd 			*mp = sbcreatecontrol((caddr_t) &rss_bucketid,
13369d3ddf43SAdrian Chadd 			   sizeof(uint32_t), IP_RSSBUCKETID, IPPROTO_IP);
13379d3ddf43SAdrian Chadd 			if (*mp)
13389d3ddf43SAdrian Chadd 				mp = &(*mp)->m_next;
13399d3ddf43SAdrian Chadd 		}
13409d3ddf43SAdrian Chadd 	}
13419d3ddf43SAdrian Chadd #endif
134282c23ebaSBill Fenner }
134382c23ebaSBill Fenner 
13444d2e3692SLuigi Rizzo /*
134530916a2dSRobert Watson  * XXXRW: Multicast routing code in ip_mroute.c is generally MPSAFE, but the
134630916a2dSRobert Watson  * ip_rsvp and ip_rsvp_on variables need to be interlocked with rsvp_on
134730916a2dSRobert Watson  * locking.  This code remains in ip_input.c as ip_mroute.c is optionally
134830916a2dSRobert Watson  * compiled.
13494d2e3692SLuigi Rizzo  */
13503e288e62SDimitry Andric static VNET_DEFINE(int, ip_rsvp_on);
135182cea7e6SBjoern A. Zeeb VNET_DEFINE(struct socket *, ip_rsvpd);
135282cea7e6SBjoern A. Zeeb 
135382cea7e6SBjoern A. Zeeb #define	V_ip_rsvp_on		VNET(ip_rsvp_on)
135482cea7e6SBjoern A. Zeeb 
1355df8bae1dSRodney W. Grimes int
1356f0068c4aSGarrett Wollman ip_rsvp_init(struct socket *so)
1357f0068c4aSGarrett Wollman {
13588b615593SMarko Zec 
1359f0068c4aSGarrett Wollman 	if (so->so_type != SOCK_RAW ||
1360f0068c4aSGarrett Wollman 	    so->so_proto->pr_protocol != IPPROTO_RSVP)
1361f0068c4aSGarrett Wollman 		return EOPNOTSUPP;
1362f0068c4aSGarrett Wollman 
1363603724d3SBjoern A. Zeeb 	if (V_ip_rsvpd != NULL)
1364f0068c4aSGarrett Wollman 		return EADDRINUSE;
1365f0068c4aSGarrett Wollman 
1366603724d3SBjoern A. Zeeb 	V_ip_rsvpd = so;
13671c5de19aSGarrett Wollman 	/*
13681c5de19aSGarrett Wollman 	 * This may seem silly, but we need to be sure we don't over-increment
13691c5de19aSGarrett Wollman 	 * the RSVP counter, in case something slips up.
13701c5de19aSGarrett Wollman 	 */
1371603724d3SBjoern A. Zeeb 	if (!V_ip_rsvp_on) {
1372603724d3SBjoern A. Zeeb 		V_ip_rsvp_on = 1;
1373603724d3SBjoern A. Zeeb 		V_rsvp_on++;
13741c5de19aSGarrett Wollman 	}
1375f0068c4aSGarrett Wollman 
1376f0068c4aSGarrett Wollman 	return 0;
1377f0068c4aSGarrett Wollman }
1378f0068c4aSGarrett Wollman 
1379f0068c4aSGarrett Wollman int
1380f0068c4aSGarrett Wollman ip_rsvp_done(void)
1381f0068c4aSGarrett Wollman {
13828b615593SMarko Zec 
1383603724d3SBjoern A. Zeeb 	V_ip_rsvpd = NULL;
13841c5de19aSGarrett Wollman 	/*
13851c5de19aSGarrett Wollman 	 * This may seem silly, but we need to be sure we don't over-decrement
13861c5de19aSGarrett Wollman 	 * the RSVP counter, in case something slips up.
13871c5de19aSGarrett Wollman 	 */
1388603724d3SBjoern A. Zeeb 	if (V_ip_rsvp_on) {
1389603724d3SBjoern A. Zeeb 		V_ip_rsvp_on = 0;
1390603724d3SBjoern A. Zeeb 		V_rsvp_on--;
13911c5de19aSGarrett Wollman 	}
1392f0068c4aSGarrett Wollman 	return 0;
1393f0068c4aSGarrett Wollman }
1394bbb4330bSLuigi Rizzo 
13958f5a8818SKevin Lo int
13968f5a8818SKevin Lo rsvp_input(struct mbuf **mp, int *offp, int proto)
1397bbb4330bSLuigi Rizzo {
13988f5a8818SKevin Lo 	struct mbuf *m;
13998f5a8818SKevin Lo 
14008f5a8818SKevin Lo 	m = *mp;
14018f5a8818SKevin Lo 	*mp = NULL;
14028b615593SMarko Zec 
1403bbb4330bSLuigi Rizzo 	if (rsvp_input_p) { /* call the real one if loaded */
14048f5a8818SKevin Lo 		*mp = m;
14058f5a8818SKevin Lo 		rsvp_input_p(mp, offp, proto);
14068f5a8818SKevin Lo 		return (IPPROTO_DONE);
1407bbb4330bSLuigi Rizzo 	}
1408bbb4330bSLuigi Rizzo 
1409bbb4330bSLuigi Rizzo 	/* Can still get packets with rsvp_on = 0 if there is a local member
1410bbb4330bSLuigi Rizzo 	 * of the group to which the RSVP packet is addressed.  But in this
1411bbb4330bSLuigi Rizzo 	 * case we want to throw the packet away.
1412bbb4330bSLuigi Rizzo 	 */
1413bbb4330bSLuigi Rizzo 
1414603724d3SBjoern A. Zeeb 	if (!V_rsvp_on) {
1415bbb4330bSLuigi Rizzo 		m_freem(m);
14168f5a8818SKevin Lo 		return (IPPROTO_DONE);
1417bbb4330bSLuigi Rizzo 	}
1418bbb4330bSLuigi Rizzo 
1419603724d3SBjoern A. Zeeb 	if (V_ip_rsvpd != NULL) {
14208f5a8818SKevin Lo 		*mp = m;
14218f5a8818SKevin Lo 		rip_input(mp, offp, proto);
14228f5a8818SKevin Lo 		return (IPPROTO_DONE);
1423bbb4330bSLuigi Rizzo 	}
1424bbb4330bSLuigi Rizzo 	/* Drop the packet */
1425bbb4330bSLuigi Rizzo 	m_freem(m);
14268f5a8818SKevin Lo 	return (IPPROTO_DONE);
1427bbb4330bSLuigi Rizzo }
1428