xref: /freebsd/sys/netinet/ip_input.c (revision fbbd9655e5107c68e4e0146ff22b73d7350475bc)
1c398230bSWarner Losh /*-
2df8bae1dSRodney W. Grimes  * Copyright (c) 1982, 1986, 1988, 1993
3df8bae1dSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
4df8bae1dSRodney W. Grimes  *
5df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
6df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
7df8bae1dSRodney W. Grimes  * are met:
8df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
9df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
10df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
11df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
12df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
13*fbbd9655SWarner Losh  * 3. Neither the name of the University nor the names of its contributors
14df8bae1dSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
15df8bae1dSRodney W. Grimes  *    without specific prior written permission.
16df8bae1dSRodney W. Grimes  *
17df8bae1dSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20df8bae1dSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
28df8bae1dSRodney W. Grimes  *
29df8bae1dSRodney W. Grimes  *	@(#)ip_input.c	8.2 (Berkeley) 1/4/94
30df8bae1dSRodney W. Grimes  */
31df8bae1dSRodney W. Grimes 
324b421e2dSMike Silbersack #include <sys/cdefs.h>
334b421e2dSMike Silbersack __FBSDID("$FreeBSD$");
344b421e2dSMike Silbersack 
350ac40133SBrian Somers #include "opt_bootp.h"
3627108a15SDag-Erling Smørgrav #include "opt_ipstealth.h"
376a800098SYoshinobu Inoue #include "opt_ipsec.h"
3833553d6eSBjoern A. Zeeb #include "opt_route.h"
39b8bc95cdSAdrian Chadd #include "opt_rss.h"
4074a9466cSGary Palmer 
41df8bae1dSRodney W. Grimes #include <sys/param.h>
42df8bae1dSRodney W. Grimes #include <sys/systm.h>
43ef91a976SAndrey V. Elsukov #include <sys/hhook.h>
44df8bae1dSRodney W. Grimes #include <sys/mbuf.h>
45b715f178SLuigi Rizzo #include <sys/malloc.h>
46df8bae1dSRodney W. Grimes #include <sys/domain.h>
47df8bae1dSRodney W. Grimes #include <sys/protosw.h>
48df8bae1dSRodney W. Grimes #include <sys/socket.h>
49df8bae1dSRodney W. Grimes #include <sys/time.h>
50df8bae1dSRodney W. Grimes #include <sys/kernel.h>
51385195c0SMarko Zec #include <sys/lock.h>
52cc0a3c8cSAndrey V. Elsukov #include <sys/rmlock.h>
53385195c0SMarko Zec #include <sys/rwlock.h>
5457f60867SMark Johnston #include <sys/sdt.h>
551025071fSGarrett Wollman #include <sys/syslog.h>
56b5e8ce9fSBruce Evans #include <sys/sysctl.h>
57df8bae1dSRodney W. Grimes 
58c85540ddSAndrey A. Chernov #include <net/pfil.h>
59df8bae1dSRodney W. Grimes #include <net/if.h>
609494d596SBrooks Davis #include <net/if_types.h>
61d314ad7bSJulian Elischer #include <net/if_var.h>
6282c23ebaSBill Fenner #include <net/if_dl.h>
63df8bae1dSRodney W. Grimes #include <net/route.h>
64748e0b0aSGarrett Wollman #include <net/netisr.h>
65b2bdc62aSAdrian Chadd #include <net/rss_config.h>
664b79449eSBjoern A. Zeeb #include <net/vnet.h>
67df8bae1dSRodney W. Grimes 
68df8bae1dSRodney W. Grimes #include <netinet/in.h>
6957f60867SMark Johnston #include <netinet/in_kdtrace.h>
70df8bae1dSRodney W. Grimes #include <netinet/in_systm.h>
71b5e8ce9fSBruce Evans #include <netinet/in_var.h>
72df8bae1dSRodney W. Grimes #include <netinet/ip.h>
73df8bae1dSRodney W. Grimes #include <netinet/in_pcb.h>
74df8bae1dSRodney W. Grimes #include <netinet/ip_var.h>
75eddfbb76SRobert Watson #include <netinet/ip_fw.h>
76df8bae1dSRodney W. Grimes #include <netinet/ip_icmp.h>
77ef39adf0SAndre Oppermann #include <netinet/ip_options.h>
7858938916SGarrett Wollman #include <machine/in_cksum.h>
79a9771948SGleb Smirnoff #include <netinet/ip_carp.h>
80b8bc95cdSAdrian Chadd #include <netinet/in_rss.h>
81df8bae1dSRodney W. Grimes 
82fcf59617SAndrey V. Elsukov #include <netipsec/ipsec_support.h>
83fcf59617SAndrey V. Elsukov 
84f0068c4aSGarrett Wollman #include <sys/socketvar.h>
856ddbf1e2SGary Palmer 
86aed55708SRobert Watson #include <security/mac/mac_framework.h>
87aed55708SRobert Watson 
88d2035ffbSEd Maste #ifdef CTASSERT
89d2035ffbSEd Maste CTASSERT(sizeof(struct ip) == 20);
90d2035ffbSEd Maste #endif
91d2035ffbSEd Maste 
921dbefcc0SGleb Smirnoff /* IP reassembly functions are defined in ip_reass.c. */
93843b0e57SXin LI extern void ipreass_init(void);
94843b0e57SXin LI extern void ipreass_drain(void);
95843b0e57SXin LI extern void ipreass_slowtimo(void);
961dbefcc0SGleb Smirnoff #ifdef VIMAGE
97843b0e57SXin LI extern void ipreass_destroy(void);
981dbefcc0SGleb Smirnoff #endif
991dbefcc0SGleb Smirnoff 
100cc0a3c8cSAndrey V. Elsukov struct rmlock in_ifaddr_lock;
101cc0a3c8cSAndrey V. Elsukov RM_SYSINIT(in_ifaddr_lock, &in_ifaddr_lock, "in_ifaddr_lock");
102f0068c4aSGarrett Wollman 
10382cea7e6SBjoern A. Zeeb VNET_DEFINE(int, rsvp_on);
10482cea7e6SBjoern A. Zeeb 
10582cea7e6SBjoern A. Zeeb VNET_DEFINE(int, ipforwarding);
1066df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_VNET | CTLFLAG_RW,
107eddfbb76SRobert Watson     &VNET_NAME(ipforwarding), 0,
1088b615593SMarko Zec     "Enable IP forwarding between interfaces");
1090312fbe9SPoul-Henning Kamp 
1103e288e62SDimitry Andric static VNET_DEFINE(int, ipsendredirects) = 1;	/* XXX */
11182cea7e6SBjoern A. Zeeb #define	V_ipsendredirects	VNET(ipsendredirects)
1126df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_VNET | CTLFLAG_RW,
113eddfbb76SRobert Watson     &VNET_NAME(ipsendredirects), 0,
1148b615593SMarko Zec     "Enable sending IP redirects");
1150312fbe9SPoul-Henning Kamp 
116823db0e9SDon Lewis /*
117823db0e9SDon Lewis  * XXX - Setting ip_checkinterface mostly implements the receive side of
118823db0e9SDon Lewis  * the Strong ES model described in RFC 1122, but since the routing table
119a8f12100SDon Lewis  * and transmit implementation do not implement the Strong ES model,
120823db0e9SDon Lewis  * setting this to 1 results in an odd hybrid.
1213f67c834SDon Lewis  *
122a8f12100SDon Lewis  * XXX - ip_checkinterface currently must be disabled if you use ipnat
123a8f12100SDon Lewis  * to translate the destination address to another local interface.
1243f67c834SDon Lewis  *
1253f67c834SDon Lewis  * XXX - ip_checkinterface must be disabled if you add IP aliases
1263f67c834SDon Lewis  * to the loopback interface instead of the interface where the
1273f67c834SDon Lewis  * packets for those addresses are received.
128823db0e9SDon Lewis  */
1293e288e62SDimitry Andric static VNET_DEFINE(int, ip_checkinterface);
13082cea7e6SBjoern A. Zeeb #define	V_ip_checkinterface	VNET(ip_checkinterface)
1316df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_VNET | CTLFLAG_RW,
132eddfbb76SRobert Watson     &VNET_NAME(ip_checkinterface), 0,
1338b615593SMarko Zec     "Verify packet arrives on correct interface");
134b3e95d4eSJonathan Lemon 
1350b4b0b0fSJulian Elischer VNET_DEFINE(struct pfil_head, inet_pfil_hook);	/* Packet filter hooks */
136df8bae1dSRodney W. Grimes 
137d4b5cae4SRobert Watson static struct netisr_handler ip_nh = {
138d4b5cae4SRobert Watson 	.nh_name = "ip",
139d4b5cae4SRobert Watson 	.nh_handler = ip_input,
140d4b5cae4SRobert Watson 	.nh_proto = NETISR_IP,
141b8bc95cdSAdrian Chadd #ifdef	RSS
1422527ccadSAdrian Chadd 	.nh_m2cpuid = rss_soft_m2cpuid_v4,
143b8bc95cdSAdrian Chadd 	.nh_policy = NETISR_POLICY_CPU,
144b8bc95cdSAdrian Chadd 	.nh_dispatch = NETISR_DISPATCH_HYBRID,
145b8bc95cdSAdrian Chadd #else
146d4b5cae4SRobert Watson 	.nh_policy = NETISR_POLICY_FLOW,
147b8bc95cdSAdrian Chadd #endif
148d4b5cae4SRobert Watson };
149ca925d9cSJonathan Lemon 
150b8bc95cdSAdrian Chadd #ifdef	RSS
151b8bc95cdSAdrian Chadd /*
152b8bc95cdSAdrian Chadd  * Directly dispatched frames are currently assumed
153b8bc95cdSAdrian Chadd  * to have a flowid already calculated.
154b8bc95cdSAdrian Chadd  *
155b8bc95cdSAdrian Chadd  * It should likely have something that assert it
156b8bc95cdSAdrian Chadd  * actually has valid flow details.
157b8bc95cdSAdrian Chadd  */
158b8bc95cdSAdrian Chadd static struct netisr_handler ip_direct_nh = {
159b8bc95cdSAdrian Chadd 	.nh_name = "ip_direct",
160b8bc95cdSAdrian Chadd 	.nh_handler = ip_direct_input,
161b8bc95cdSAdrian Chadd 	.nh_proto = NETISR_IP_DIRECT,
162499baf0aSAdrian Chadd 	.nh_m2cpuid = rss_soft_m2cpuid_v4,
163b8bc95cdSAdrian Chadd 	.nh_policy = NETISR_POLICY_CPU,
164b8bc95cdSAdrian Chadd 	.nh_dispatch = NETISR_DISPATCH_HYBRID,
165b8bc95cdSAdrian Chadd };
166b8bc95cdSAdrian Chadd #endif
167b8bc95cdSAdrian Chadd 
168df8bae1dSRodney W. Grimes extern	struct domain inetdomain;
169f0ffb944SJulian Elischer extern	struct protosw inetsw[];
170df8bae1dSRodney W. Grimes u_char	ip_protox[IPPROTO_MAX];
17182cea7e6SBjoern A. Zeeb VNET_DEFINE(struct in_ifaddrhead, in_ifaddrhead);  /* first inet address */
17282cea7e6SBjoern A. Zeeb VNET_DEFINE(struct in_ifaddrhashhead *, in_ifaddrhashtbl); /* inet addr hash table  */
17382cea7e6SBjoern A. Zeeb VNET_DEFINE(u_long, in_ifaddrhmask);		/* mask for hash table */
174ca925d9cSJonathan Lemon 
1750312fbe9SPoul-Henning Kamp #ifdef IPCTL_DEFMTU
1760312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW,
1773d177f46SBill Fumerola     &ip_mtu, 0, "Default MTU");
1780312fbe9SPoul-Henning Kamp #endif
1790312fbe9SPoul-Henning Kamp 
1801b968362SDag-Erling Smørgrav #ifdef IPSTEALTH
18182cea7e6SBjoern A. Zeeb VNET_DEFINE(int, ipstealth);
1826df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_VNET | CTLFLAG_RW,
183eddfbb76SRobert Watson     &VNET_NAME(ipstealth), 0,
184eddfbb76SRobert Watson     "IP stealth mode, no TTL decrementation on forwarding");
1851b968362SDag-Erling Smørgrav #endif
186eddfbb76SRobert Watson 
187315e3e38SRobert Watson /*
1885da0521fSAndrey V. Elsukov  * IP statistics are stored in the "array" of counter(9)s.
1895923c293SGleb Smirnoff  */
1905da0521fSAndrey V. Elsukov VNET_PCPUSTAT_DEFINE(struct ipstat, ipstat);
1915da0521fSAndrey V. Elsukov VNET_PCPUSTAT_SYSINIT(ipstat);
1925da0521fSAndrey V. Elsukov SYSCTL_VNET_PCPUSTAT(_net_inet_ip, IPCTL_STATS, stats, struct ipstat, ipstat,
1935da0521fSAndrey V. Elsukov     "IP statistics (struct ipstat, netinet/ip_var.h)");
1945923c293SGleb Smirnoff 
1955923c293SGleb Smirnoff #ifdef VIMAGE
1965da0521fSAndrey V. Elsukov VNET_PCPUSTAT_SYSUNINIT(ipstat);
1975923c293SGleb Smirnoff #endif /* VIMAGE */
1985923c293SGleb Smirnoff 
1995923c293SGleb Smirnoff /*
200315e3e38SRobert Watson  * Kernel module interface for updating ipstat.  The argument is an index
2015923c293SGleb Smirnoff  * into ipstat treated as an array.
202315e3e38SRobert Watson  */
203315e3e38SRobert Watson void
204315e3e38SRobert Watson kmod_ipstat_inc(int statnum)
205315e3e38SRobert Watson {
206315e3e38SRobert Watson 
2075da0521fSAndrey V. Elsukov 	counter_u64_add(VNET(ipstat)[statnum], 1);
208315e3e38SRobert Watson }
209315e3e38SRobert Watson 
210315e3e38SRobert Watson void
211315e3e38SRobert Watson kmod_ipstat_dec(int statnum)
212315e3e38SRobert Watson {
213315e3e38SRobert Watson 
2145da0521fSAndrey V. Elsukov 	counter_u64_add(VNET(ipstat)[statnum], -1);
215315e3e38SRobert Watson }
216315e3e38SRobert Watson 
217d4b5cae4SRobert Watson static int
218d4b5cae4SRobert Watson sysctl_netinet_intr_queue_maxlen(SYSCTL_HANDLER_ARGS)
219d4b5cae4SRobert Watson {
220d4b5cae4SRobert Watson 	int error, qlimit;
221d4b5cae4SRobert Watson 
222d4b5cae4SRobert Watson 	netisr_getqlimit(&ip_nh, &qlimit);
223d4b5cae4SRobert Watson 	error = sysctl_handle_int(oidp, &qlimit, 0, req);
224d4b5cae4SRobert Watson 	if (error || !req->newptr)
225d4b5cae4SRobert Watson 		return (error);
226d4b5cae4SRobert Watson 	if (qlimit < 1)
227d4b5cae4SRobert Watson 		return (EINVAL);
228d4b5cae4SRobert Watson 	return (netisr_setqlimit(&ip_nh, qlimit));
229d4b5cae4SRobert Watson }
230d4b5cae4SRobert Watson SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen,
231d4b5cae4SRobert Watson     CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet_intr_queue_maxlen, "I",
232d4b5cae4SRobert Watson     "Maximum size of the IP input queue");
233d4b5cae4SRobert Watson 
234d4b5cae4SRobert Watson static int
235d4b5cae4SRobert Watson sysctl_netinet_intr_queue_drops(SYSCTL_HANDLER_ARGS)
236d4b5cae4SRobert Watson {
237d4b5cae4SRobert Watson 	u_int64_t qdrops_long;
238d4b5cae4SRobert Watson 	int error, qdrops;
239d4b5cae4SRobert Watson 
240d4b5cae4SRobert Watson 	netisr_getqdrops(&ip_nh, &qdrops_long);
241d4b5cae4SRobert Watson 	qdrops = qdrops_long;
242d4b5cae4SRobert Watson 	error = sysctl_handle_int(oidp, &qdrops, 0, req);
243d4b5cae4SRobert Watson 	if (error || !req->newptr)
244d4b5cae4SRobert Watson 		return (error);
245d4b5cae4SRobert Watson 	if (qdrops != 0)
246d4b5cae4SRobert Watson 		return (EINVAL);
247d4b5cae4SRobert Watson 	netisr_clearqdrops(&ip_nh);
248d4b5cae4SRobert Watson 	return (0);
249d4b5cae4SRobert Watson }
250d4b5cae4SRobert Watson 
251d4b5cae4SRobert Watson SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops,
252d4b5cae4SRobert Watson     CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_netinet_intr_queue_drops, "I",
253d4b5cae4SRobert Watson     "Number of packets dropped from the IP input queue");
254d4b5cae4SRobert Watson 
255b8bc95cdSAdrian Chadd #ifdef	RSS
256b8bc95cdSAdrian Chadd static int
257b8bc95cdSAdrian Chadd sysctl_netinet_intr_direct_queue_maxlen(SYSCTL_HANDLER_ARGS)
258b8bc95cdSAdrian Chadd {
259b8bc95cdSAdrian Chadd 	int error, qlimit;
260b8bc95cdSAdrian Chadd 
261b8bc95cdSAdrian Chadd 	netisr_getqlimit(&ip_direct_nh, &qlimit);
262b8bc95cdSAdrian Chadd 	error = sysctl_handle_int(oidp, &qlimit, 0, req);
263b8bc95cdSAdrian Chadd 	if (error || !req->newptr)
264b8bc95cdSAdrian Chadd 		return (error);
265b8bc95cdSAdrian Chadd 	if (qlimit < 1)
266b8bc95cdSAdrian Chadd 		return (EINVAL);
267b8bc95cdSAdrian Chadd 	return (netisr_setqlimit(&ip_direct_nh, qlimit));
268b8bc95cdSAdrian Chadd }
269b8bc95cdSAdrian Chadd SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_direct_queue_maxlen,
270b8bc95cdSAdrian Chadd     CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet_intr_direct_queue_maxlen, "I",
271b8bc95cdSAdrian Chadd     "Maximum size of the IP direct input queue");
272b8bc95cdSAdrian Chadd 
273b8bc95cdSAdrian Chadd static int
274b8bc95cdSAdrian Chadd sysctl_netinet_intr_direct_queue_drops(SYSCTL_HANDLER_ARGS)
275b8bc95cdSAdrian Chadd {
276b8bc95cdSAdrian Chadd 	u_int64_t qdrops_long;
277b8bc95cdSAdrian Chadd 	int error, qdrops;
278b8bc95cdSAdrian Chadd 
279b8bc95cdSAdrian Chadd 	netisr_getqdrops(&ip_direct_nh, &qdrops_long);
280b8bc95cdSAdrian Chadd 	qdrops = qdrops_long;
281b8bc95cdSAdrian Chadd 	error = sysctl_handle_int(oidp, &qdrops, 0, req);
282b8bc95cdSAdrian Chadd 	if (error || !req->newptr)
283b8bc95cdSAdrian Chadd 		return (error);
284b8bc95cdSAdrian Chadd 	if (qdrops != 0)
285b8bc95cdSAdrian Chadd 		return (EINVAL);
286b8bc95cdSAdrian Chadd 	netisr_clearqdrops(&ip_direct_nh);
287b8bc95cdSAdrian Chadd 	return (0);
288b8bc95cdSAdrian Chadd }
289b8bc95cdSAdrian Chadd 
290b8bc95cdSAdrian Chadd SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_direct_queue_drops,
291b8bc95cdSAdrian Chadd     CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_netinet_intr_direct_queue_drops, "I",
292b8bc95cdSAdrian Chadd     "Number of packets dropped from the IP direct input queue");
293b8bc95cdSAdrian Chadd #endif	/* RSS */
294b8bc95cdSAdrian Chadd 
295df8bae1dSRodney W. Grimes /*
296df8bae1dSRodney W. Grimes  * IP initialization: fill in IP protocol switch table.
297df8bae1dSRodney W. Grimes  * All protocols not implemented in kernel go to raw IP protocol handler.
298df8bae1dSRodney W. Grimes  */
299df8bae1dSRodney W. Grimes void
300f2565d68SRobert Watson ip_init(void)
301df8bae1dSRodney W. Grimes {
302f2565d68SRobert Watson 	struct protosw *pr;
303f2565d68SRobert Watson 	int i;
304df8bae1dSRodney W. Grimes 
305603724d3SBjoern A. Zeeb 	TAILQ_INIT(&V_in_ifaddrhead);
306603724d3SBjoern A. Zeeb 	V_in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &V_in_ifaddrhmask);
3071ed81b73SMarko Zec 
3081ed81b73SMarko Zec 	/* Initialize IP reassembly queue. */
3091dbefcc0SGleb Smirnoff 	ipreass_init();
3101ed81b73SMarko Zec 
3110b4b0b0fSJulian Elischer 	/* Initialize packet filter hooks. */
3120b4b0b0fSJulian Elischer 	V_inet_pfil_hook.ph_type = PFIL_TYPE_AF;
3130b4b0b0fSJulian Elischer 	V_inet_pfil_hook.ph_af = AF_INET;
3140b4b0b0fSJulian Elischer 	if ((i = pfil_head_register(&V_inet_pfil_hook)) != 0)
3150b4b0b0fSJulian Elischer 		printf("%s: WARNING: unable to register pfil hook, "
3160b4b0b0fSJulian Elischer 			"error %d\n", __func__, i);
3170b4b0b0fSJulian Elischer 
318ef91a976SAndrey V. Elsukov 	if (hhook_head_register(HHOOK_TYPE_IPSEC_IN, AF_INET,
319ef91a976SAndrey V. Elsukov 	    &V_ipsec_hhh_in[HHOOK_IPSEC_INET],
320ef91a976SAndrey V. Elsukov 	    HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0)
321ef91a976SAndrey V. Elsukov 		printf("%s: WARNING: unable to register input helper hook\n",
322ef91a976SAndrey V. Elsukov 		    __func__);
323ef91a976SAndrey V. Elsukov 	if (hhook_head_register(HHOOK_TYPE_IPSEC_OUT, AF_INET,
324ef91a976SAndrey V. Elsukov 	    &V_ipsec_hhh_out[HHOOK_IPSEC_INET],
325ef91a976SAndrey V. Elsukov 	    HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0)
326ef91a976SAndrey V. Elsukov 		printf("%s: WARNING: unable to register output helper hook\n",
327ef91a976SAndrey V. Elsukov 		    __func__);
328ef91a976SAndrey V. Elsukov 
3291ed81b73SMarko Zec 	/* Skip initialization of globals for non-default instances. */
330484149deSBjoern A. Zeeb #ifdef VIMAGE
331484149deSBjoern A. Zeeb 	if (!IS_DEFAULT_VNET(curvnet)) {
332484149deSBjoern A. Zeeb 		netisr_register_vnet(&ip_nh);
333484149deSBjoern A. Zeeb #ifdef	RSS
334484149deSBjoern A. Zeeb 		netisr_register_vnet(&ip_direct_nh);
335484149deSBjoern A. Zeeb #endif
3361ed81b73SMarko Zec 		return;
337484149deSBjoern A. Zeeb 	}
338484149deSBjoern A. Zeeb #endif
3391ed81b73SMarko Zec 
340f0ffb944SJulian Elischer 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
34102410549SRobert Watson 	if (pr == NULL)
342db09bef3SAndre Oppermann 		panic("ip_init: PF_INET not found");
343db09bef3SAndre Oppermann 
344db09bef3SAndre Oppermann 	/* Initialize the entire ip_protox[] array to IPPROTO_RAW. */
345df8bae1dSRodney W. Grimes 	for (i = 0; i < IPPROTO_MAX; i++)
346df8bae1dSRodney W. Grimes 		ip_protox[i] = pr - inetsw;
347db09bef3SAndre Oppermann 	/*
348db09bef3SAndre Oppermann 	 * Cycle through IP protocols and put them into the appropriate place
349db09bef3SAndre Oppermann 	 * in ip_protox[].
350db09bef3SAndre Oppermann 	 */
351f0ffb944SJulian Elischer 	for (pr = inetdomain.dom_protosw;
352f0ffb944SJulian Elischer 	    pr < inetdomain.dom_protoswNPROTOSW; pr++)
353df8bae1dSRodney W. Grimes 		if (pr->pr_domain->dom_family == PF_INET &&
354db09bef3SAndre Oppermann 		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) {
355db09bef3SAndre Oppermann 			/* Be careful to only index valid IP protocols. */
356db77984cSSam Leffler 			if (pr->pr_protocol < IPPROTO_MAX)
357df8bae1dSRodney W. Grimes 				ip_protox[pr->pr_protocol] = pr - inetsw;
358db09bef3SAndre Oppermann 		}
359194a213eSAndrey A. Chernov 
360d4b5cae4SRobert Watson 	netisr_register(&ip_nh);
361b8bc95cdSAdrian Chadd #ifdef	RSS
362b8bc95cdSAdrian Chadd 	netisr_register(&ip_direct_nh);
363b8bc95cdSAdrian Chadd #endif
364df8bae1dSRodney W. Grimes }
365df8bae1dSRodney W. Grimes 
3669802380eSBjoern A. Zeeb #ifdef VIMAGE
3673f58662dSBjoern A. Zeeb static void
3683f58662dSBjoern A. Zeeb ip_destroy(void *unused __unused)
3699802380eSBjoern A. Zeeb {
37089856f7eSBjoern A. Zeeb 	struct ifnet *ifp;
371ef91a976SAndrey V. Elsukov 	int error;
3724d3dfd45SMikolaj Golub 
373484149deSBjoern A. Zeeb #ifdef	RSS
374484149deSBjoern A. Zeeb 	netisr_unregister_vnet(&ip_direct_nh);
375484149deSBjoern A. Zeeb #endif
376484149deSBjoern A. Zeeb 	netisr_unregister_vnet(&ip_nh);
377484149deSBjoern A. Zeeb 
378ef91a976SAndrey V. Elsukov 	if ((error = pfil_head_unregister(&V_inet_pfil_hook)) != 0)
3794d3dfd45SMikolaj Golub 		printf("%s: WARNING: unable to unregister pfil hook, "
380ef91a976SAndrey V. Elsukov 		    "error %d\n", __func__, error);
3819802380eSBjoern A. Zeeb 
382ef91a976SAndrey V. Elsukov 	error = hhook_head_deregister(V_ipsec_hhh_in[HHOOK_IPSEC_INET]);
383ef91a976SAndrey V. Elsukov 	if (error != 0) {
384ef91a976SAndrey V. Elsukov 		printf("%s: WARNING: unable to deregister input helper hook "
385ef91a976SAndrey V. Elsukov 		    "type HHOOK_TYPE_IPSEC_IN, id HHOOK_IPSEC_INET: "
386ef91a976SAndrey V. Elsukov 		    "error %d returned\n", __func__, error);
387ef91a976SAndrey V. Elsukov 	}
388ef91a976SAndrey V. Elsukov 	error = hhook_head_deregister(V_ipsec_hhh_out[HHOOK_IPSEC_INET]);
389ef91a976SAndrey V. Elsukov 	if (error != 0) {
390ef91a976SAndrey V. Elsukov 		printf("%s: WARNING: unable to deregister output helper hook "
391ef91a976SAndrey V. Elsukov 		    "type HHOOK_TYPE_IPSEC_OUT, id HHOOK_IPSEC_INET: "
392ef91a976SAndrey V. Elsukov 		    "error %d returned\n", __func__, error);
393ef91a976SAndrey V. Elsukov 	}
39489856f7eSBjoern A. Zeeb 
39589856f7eSBjoern A. Zeeb 	/* Remove the IPv4 addresses from all interfaces. */
39689856f7eSBjoern A. Zeeb 	in_ifscrub_all();
39789856f7eSBjoern A. Zeeb 
39889856f7eSBjoern A. Zeeb 	/* Make sure the IPv4 routes are gone as well. */
39989856f7eSBjoern A. Zeeb 	IFNET_RLOCK();
40089856f7eSBjoern A. Zeeb 	TAILQ_FOREACH(ifp, &V_ifnet, if_link)
40189856f7eSBjoern A. Zeeb 		rt_flushifroutes_af(ifp, AF_INET);
40289856f7eSBjoern A. Zeeb 	IFNET_RUNLOCK();
4039802380eSBjoern A. Zeeb 
404e3c2c634SGleb Smirnoff 	/* Destroy IP reassembly queue. */
4051dbefcc0SGleb Smirnoff 	ipreass_destroy();
40689856f7eSBjoern A. Zeeb 
40789856f7eSBjoern A. Zeeb 	/* Cleanup in_ifaddr hash table; should be empty. */
40889856f7eSBjoern A. Zeeb 	hashdestroy(V_in_ifaddrhashtbl, M_IFADDR, V_in_ifaddrhmask);
4099802380eSBjoern A. Zeeb }
4103f58662dSBjoern A. Zeeb 
4113f58662dSBjoern A. Zeeb VNET_SYSUNINIT(ip, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, ip_destroy, NULL);
4129802380eSBjoern A. Zeeb #endif
4139802380eSBjoern A. Zeeb 
414b8bc95cdSAdrian Chadd #ifdef	RSS
415b8bc95cdSAdrian Chadd /*
416b8bc95cdSAdrian Chadd  * IP direct input routine.
417b8bc95cdSAdrian Chadd  *
418b8bc95cdSAdrian Chadd  * This is called when reinjecting completed fragments where
419b8bc95cdSAdrian Chadd  * all of the previous checking and book-keeping has been done.
420b8bc95cdSAdrian Chadd  */
421b8bc95cdSAdrian Chadd void
422b8bc95cdSAdrian Chadd ip_direct_input(struct mbuf *m)
423b8bc95cdSAdrian Chadd {
424b8bc95cdSAdrian Chadd 	struct ip *ip;
425b8bc95cdSAdrian Chadd 	int hlen;
426b8bc95cdSAdrian Chadd 
427b8bc95cdSAdrian Chadd 	ip = mtod(m, struct ip *);
428b8bc95cdSAdrian Chadd 	hlen = ip->ip_hl << 2;
429b8bc95cdSAdrian Chadd 
430fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT)
431fcf59617SAndrey V. Elsukov 	if (IPSEC_ENABLED(ipv4)) {
432fcf59617SAndrey V. Elsukov 		if (IPSEC_INPUT(ipv4, m, hlen, ip->ip_p) != 0)
433fcf59617SAndrey V. Elsukov 			return;
434fcf59617SAndrey V. Elsukov 	}
435fcf59617SAndrey V. Elsukov #endif /* IPSEC */
436b8bc95cdSAdrian Chadd 	IPSTAT_INC(ips_delivered);
437b8bc95cdSAdrian Chadd 	(*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p);
438b8bc95cdSAdrian Chadd 	return;
439b8bc95cdSAdrian Chadd }
440b8bc95cdSAdrian Chadd #endif
441b8bc95cdSAdrian Chadd 
4424d2e3692SLuigi Rizzo /*
443df8bae1dSRodney W. Grimes  * Ip input routine.  Checksum and byte swap header.  If fragmented
444df8bae1dSRodney W. Grimes  * try to reassemble.  Process options.  Pass to next level.
445df8bae1dSRodney W. Grimes  */
446c67b1d17SGarrett Wollman void
447c67b1d17SGarrett Wollman ip_input(struct mbuf *m)
448df8bae1dSRodney W. Grimes {
4499188b4a1SAndre Oppermann 	struct ip *ip = NULL;
4505da9f8faSJosef Karthauser 	struct in_ifaddr *ia = NULL;
451ca925d9cSJonathan Lemon 	struct ifaddr *ifa;
4520aade26eSRobert Watson 	struct ifnet *ifp;
4539b932e9eSAndre Oppermann 	int    checkif, hlen = 0;
45421d172a3SGleb Smirnoff 	uint16_t sum, ip_len;
45502c1c707SAndre Oppermann 	int dchg = 0;				/* dest changed after fw */
456f51f805fSSam Leffler 	struct in_addr odst;			/* original dst address */
457b715f178SLuigi Rizzo 
458fe584538SDag-Erling Smørgrav 	M_ASSERTPKTHDR(m);
459db40007dSAndrew R. Reiter 
460ac9d7e26SMax Laier 	if (m->m_flags & M_FASTFWD_OURS) {
46176ff6dcfSAndre Oppermann 		m->m_flags &= ~M_FASTFWD_OURS;
46276ff6dcfSAndre Oppermann 		/* Set up some basics that will be used later. */
4632b25acc1SLuigi Rizzo 		ip = mtod(m, struct ip *);
46453be11f6SPoul-Henning Kamp 		hlen = ip->ip_hl << 2;
4658f134647SGleb Smirnoff 		ip_len = ntohs(ip->ip_len);
4669b932e9eSAndre Oppermann 		goto ours;
4672b25acc1SLuigi Rizzo 	}
4682b25acc1SLuigi Rizzo 
46986425c62SRobert Watson 	IPSTAT_INC(ips_total);
47058938916SGarrett Wollman 
47158938916SGarrett Wollman 	if (m->m_pkthdr.len < sizeof(struct ip))
47258938916SGarrett Wollman 		goto tooshort;
47358938916SGarrett Wollman 
474df8bae1dSRodney W. Grimes 	if (m->m_len < sizeof (struct ip) &&
4750b17fba7SAndre Oppermann 	    (m = m_pullup(m, sizeof (struct ip))) == NULL) {
47686425c62SRobert Watson 		IPSTAT_INC(ips_toosmall);
477c67b1d17SGarrett Wollman 		return;
478df8bae1dSRodney W. Grimes 	}
479df8bae1dSRodney W. Grimes 	ip = mtod(m, struct ip *);
48058938916SGarrett Wollman 
48153be11f6SPoul-Henning Kamp 	if (ip->ip_v != IPVERSION) {
48286425c62SRobert Watson 		IPSTAT_INC(ips_badvers);
483df8bae1dSRodney W. Grimes 		goto bad;
484df8bae1dSRodney W. Grimes 	}
48558938916SGarrett Wollman 
48653be11f6SPoul-Henning Kamp 	hlen = ip->ip_hl << 2;
487df8bae1dSRodney W. Grimes 	if (hlen < sizeof(struct ip)) {	/* minimum header length */
48886425c62SRobert Watson 		IPSTAT_INC(ips_badhlen);
489df8bae1dSRodney W. Grimes 		goto bad;
490df8bae1dSRodney W. Grimes 	}
491df8bae1dSRodney W. Grimes 	if (hlen > m->m_len) {
4920b17fba7SAndre Oppermann 		if ((m = m_pullup(m, hlen)) == NULL) {
49386425c62SRobert Watson 			IPSTAT_INC(ips_badhlen);
494c67b1d17SGarrett Wollman 			return;
495df8bae1dSRodney W. Grimes 		}
496df8bae1dSRodney W. Grimes 		ip = mtod(m, struct ip *);
497df8bae1dSRodney W. Grimes 	}
49833841545SHajimu UMEMOTO 
49957f60867SMark Johnston 	IP_PROBE(receive, NULL, NULL, ip, m->m_pkthdr.rcvif, ip, NULL);
50057f60867SMark Johnston 
50133841545SHajimu UMEMOTO 	/* 127/8 must not appear on wire - RFC1122 */
5020aade26eSRobert Watson 	ifp = m->m_pkthdr.rcvif;
50333841545SHajimu UMEMOTO 	if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
50433841545SHajimu UMEMOTO 	    (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
5050aade26eSRobert Watson 		if ((ifp->if_flags & IFF_LOOPBACK) == 0) {
50686425c62SRobert Watson 			IPSTAT_INC(ips_badaddr);
50733841545SHajimu UMEMOTO 			goto bad;
50833841545SHajimu UMEMOTO 		}
50933841545SHajimu UMEMOTO 	}
51033841545SHajimu UMEMOTO 
511db4f9cc7SJonathan Lemon 	if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
512db4f9cc7SJonathan Lemon 		sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
513db4f9cc7SJonathan Lemon 	} else {
51458938916SGarrett Wollman 		if (hlen == sizeof(struct ip)) {
51547c861ecSBrian Somers 			sum = in_cksum_hdr(ip);
51658938916SGarrett Wollman 		} else {
51747c861ecSBrian Somers 			sum = in_cksum(m, hlen);
51858938916SGarrett Wollman 		}
519db4f9cc7SJonathan Lemon 	}
52047c861ecSBrian Somers 	if (sum) {
52186425c62SRobert Watson 		IPSTAT_INC(ips_badsum);
522df8bae1dSRodney W. Grimes 		goto bad;
523df8bae1dSRodney W. Grimes 	}
524df8bae1dSRodney W. Grimes 
52502b199f1SMax Laier #ifdef ALTQ
52602b199f1SMax Laier 	if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0)
52702b199f1SMax Laier 		/* packet is dropped by traffic conditioner */
52802b199f1SMax Laier 		return;
52902b199f1SMax Laier #endif
53002b199f1SMax Laier 
53121d172a3SGleb Smirnoff 	ip_len = ntohs(ip->ip_len);
53221d172a3SGleb Smirnoff 	if (ip_len < hlen) {
53386425c62SRobert Watson 		IPSTAT_INC(ips_badlen);
534df8bae1dSRodney W. Grimes 		goto bad;
535df8bae1dSRodney W. Grimes 	}
536df8bae1dSRodney W. Grimes 
537df8bae1dSRodney W. Grimes 	/*
538df8bae1dSRodney W. Grimes 	 * Check that the amount of data in the buffers
539df8bae1dSRodney W. Grimes 	 * is as at least much as the IP header would have us expect.
540df8bae1dSRodney W. Grimes 	 * Trim mbufs if longer than we expect.
541df8bae1dSRodney W. Grimes 	 * Drop packet if shorter than we expect.
542df8bae1dSRodney W. Grimes 	 */
54321d172a3SGleb Smirnoff 	if (m->m_pkthdr.len < ip_len) {
54458938916SGarrett Wollman tooshort:
54586425c62SRobert Watson 		IPSTAT_INC(ips_tooshort);
546df8bae1dSRodney W. Grimes 		goto bad;
547df8bae1dSRodney W. Grimes 	}
54821d172a3SGleb Smirnoff 	if (m->m_pkthdr.len > ip_len) {
549df8bae1dSRodney W. Grimes 		if (m->m_len == m->m_pkthdr.len) {
55021d172a3SGleb Smirnoff 			m->m_len = ip_len;
55121d172a3SGleb Smirnoff 			m->m_pkthdr.len = ip_len;
552df8bae1dSRodney W. Grimes 		} else
55321d172a3SGleb Smirnoff 			m_adj(m, ip_len - m->m_pkthdr.len);
554df8bae1dSRodney W. Grimes 	}
555b8bc95cdSAdrian Chadd 
556ad9f4d6aSAndrey V. Elsukov 	/*
557ad9f4d6aSAndrey V. Elsukov 	 * Try to forward the packet, but if we fail continue.
558ad9f4d6aSAndrey V. Elsukov 	 * ip_tryforward() does inbound and outbound packet firewall
559ad9f4d6aSAndrey V. Elsukov 	 * processing. If firewall has decided that destination becomes
560ad9f4d6aSAndrey V. Elsukov 	 * our local address, it sets M_FASTFWD_OURS flag. In this
561ad9f4d6aSAndrey V. Elsukov 	 * case skip another inbound firewall processing and update
562ad9f4d6aSAndrey V. Elsukov 	 * ip pointer.
563ad9f4d6aSAndrey V. Elsukov 	 */
564ad9f4d6aSAndrey V. Elsukov 	if (V_ipforwarding != 0
565fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT)
566fcf59617SAndrey V. Elsukov 	    && (!IPSEC_ENABLED(ipv4) ||
567fcf59617SAndrey V. Elsukov 	    IPSEC_CAPS(ipv4, m, IPSEC_CAP_OPERABLE) == 0)
568ad9f4d6aSAndrey V. Elsukov #endif
569ad9f4d6aSAndrey V. Elsukov 	    ) {
570ad9f4d6aSAndrey V. Elsukov 		if ((m = ip_tryforward(m)) == NULL)
57133872124SGeorge V. Neville-Neil 			return;
572ad9f4d6aSAndrey V. Elsukov 		if (m->m_flags & M_FASTFWD_OURS) {
573ad9f4d6aSAndrey V. Elsukov 			m->m_flags &= ~M_FASTFWD_OURS;
574ad9f4d6aSAndrey V. Elsukov 			ip = mtod(m, struct ip *);
575ad9f4d6aSAndrey V. Elsukov 			goto ours;
576ad9f4d6aSAndrey V. Elsukov 		}
577ad9f4d6aSAndrey V. Elsukov 	}
578fcf59617SAndrey V. Elsukov 
579fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT)
58014dd6717SSam Leffler 	/*
581ffe8cd7bSBjoern A. Zeeb 	 * Bypass packet filtering for packets previously handled by IPsec.
58214dd6717SSam Leffler 	 */
583fcf59617SAndrey V. Elsukov 	if (IPSEC_ENABLED(ipv4) &&
584fcf59617SAndrey V. Elsukov 	    IPSEC_CAPS(ipv4, m, IPSEC_CAP_BYPASS_FILTER) != 0)
585c21fd232SAndre Oppermann 			goto passin;
586ad9f4d6aSAndrey V. Elsukov #endif
587fcf59617SAndrey V. Elsukov 
588c4ac87eaSDarren Reed 	/*
589134ea224SSam Leffler 	 * Run through list of hooks for input packets.
590f51f805fSSam Leffler 	 *
591f51f805fSSam Leffler 	 * NB: Beware of the destination address changing (e.g.
592f51f805fSSam Leffler 	 *     by NAT rewriting).  When this happens, tell
593f51f805fSSam Leffler 	 *     ip_forward to do the right thing.
594c4ac87eaSDarren Reed 	 */
595c21fd232SAndre Oppermann 
596c21fd232SAndre Oppermann 	/* Jump over all PFIL processing if hooks are not active. */
5970b4b0b0fSJulian Elischer 	if (!PFIL_HOOKED(&V_inet_pfil_hook))
598c21fd232SAndre Oppermann 		goto passin;
599c21fd232SAndre Oppermann 
600f51f805fSSam Leffler 	odst = ip->ip_dst;
6010b4b0b0fSJulian Elischer 	if (pfil_run_hooks(&V_inet_pfil_hook, &m, ifp, PFIL_IN, NULL) != 0)
602beec8214SDarren Reed 		return;
603134ea224SSam Leffler 	if (m == NULL)			/* consumed by filter */
604c4ac87eaSDarren Reed 		return;
6059b932e9eSAndre Oppermann 
606c4ac87eaSDarren Reed 	ip = mtod(m, struct ip *);
60702c1c707SAndre Oppermann 	dchg = (odst.s_addr != ip->ip_dst.s_addr);
6080aade26eSRobert Watson 	ifp = m->m_pkthdr.rcvif;
6099b932e9eSAndre Oppermann 
6109b932e9eSAndre Oppermann 	if (m->m_flags & M_FASTFWD_OURS) {
6119b932e9eSAndre Oppermann 		m->m_flags &= ~M_FASTFWD_OURS;
6129b932e9eSAndre Oppermann 		goto ours;
6139b932e9eSAndre Oppermann 	}
614ffdbf9daSAndrey V. Elsukov 	if (m->m_flags & M_IP_NEXTHOP) {
615de89d74bSLuiz Otavio O Souza 		if (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL) {
616099dd043SAndre Oppermann 			/*
617ffdbf9daSAndrey V. Elsukov 			 * Directly ship the packet on.  This allows
618ffdbf9daSAndrey V. Elsukov 			 * forwarding packets originally destined to us
619ffdbf9daSAndrey V. Elsukov 			 * to some other directly connected host.
620099dd043SAndre Oppermann 			 */
621ffdbf9daSAndrey V. Elsukov 			ip_forward(m, 1);
622099dd043SAndre Oppermann 			return;
623099dd043SAndre Oppermann 		}
624ffdbf9daSAndrey V. Elsukov 	}
625c21fd232SAndre Oppermann passin:
62621d172a3SGleb Smirnoff 
62721d172a3SGleb Smirnoff 	/*
628df8bae1dSRodney W. Grimes 	 * Process options and, if not destined for us,
629df8bae1dSRodney W. Grimes 	 * ship it on.  ip_dooptions returns 1 when an
630df8bae1dSRodney W. Grimes 	 * error was detected (causing an icmp message
631df8bae1dSRodney W. Grimes 	 * to be sent and the original packet to be freed).
632df8bae1dSRodney W. Grimes 	 */
6339b932e9eSAndre Oppermann 	if (hlen > sizeof (struct ip) && ip_dooptions(m, 0))
634c67b1d17SGarrett Wollman 		return;
635df8bae1dSRodney W. Grimes 
636f0068c4aSGarrett Wollman         /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no
637f0068c4aSGarrett Wollman          * matter if it is destined to another node, or whether it is
638f0068c4aSGarrett Wollman          * a multicast one, RSVP wants it! and prevents it from being forwarded
639f0068c4aSGarrett Wollman          * anywhere else. Also checks if the rsvp daemon is running before
640f0068c4aSGarrett Wollman 	 * grabbing the packet.
641f0068c4aSGarrett Wollman          */
642603724d3SBjoern A. Zeeb 	if (V_rsvp_on && ip->ip_p==IPPROTO_RSVP)
643f0068c4aSGarrett Wollman 		goto ours;
644f0068c4aSGarrett Wollman 
645df8bae1dSRodney W. Grimes 	/*
646df8bae1dSRodney W. Grimes 	 * Check our list of addresses, to see if the packet is for us.
647cc766e04SGarrett Wollman 	 * If we don't have any addresses, assume any unicast packet
648cc766e04SGarrett Wollman 	 * we receive might be for us (and let the upper layers deal
649cc766e04SGarrett Wollman 	 * with it).
650df8bae1dSRodney W. Grimes 	 */
651603724d3SBjoern A. Zeeb 	if (TAILQ_EMPTY(&V_in_ifaddrhead) &&
652cc766e04SGarrett Wollman 	    (m->m_flags & (M_MCAST|M_BCAST)) == 0)
653cc766e04SGarrett Wollman 		goto ours;
654cc766e04SGarrett Wollman 
6557538a9a0SJonathan Lemon 	/*
656823db0e9SDon Lewis 	 * Enable a consistency check between the destination address
657823db0e9SDon Lewis 	 * and the arrival interface for a unicast packet (the RFC 1122
658823db0e9SDon Lewis 	 * strong ES model) if IP forwarding is disabled and the packet
659e15ae1b2SDon Lewis 	 * is not locally generated and the packet is not subject to
660e15ae1b2SDon Lewis 	 * 'ipfw fwd'.
6613f67c834SDon Lewis 	 *
6623f67c834SDon Lewis 	 * XXX - Checking also should be disabled if the destination
6633f67c834SDon Lewis 	 * address is ipnat'ed to a different interface.
6643f67c834SDon Lewis 	 *
665a8f12100SDon Lewis 	 * XXX - Checking is incompatible with IP aliases added
6663f67c834SDon Lewis 	 * to the loopback interface instead of the interface where
6673f67c834SDon Lewis 	 * the packets are received.
668a9771948SGleb Smirnoff 	 *
669a9771948SGleb Smirnoff 	 * XXX - This is the case for carp vhost IPs as well so we
670a9771948SGleb Smirnoff 	 * insert a workaround. If the packet got here, we already
671a9771948SGleb Smirnoff 	 * checked with carp_iamatch() and carp_forus().
672823db0e9SDon Lewis 	 */
673603724d3SBjoern A. Zeeb 	checkif = V_ip_checkinterface && (V_ipforwarding == 0) &&
6740aade26eSRobert Watson 	    ifp != NULL && ((ifp->if_flags & IFF_LOOPBACK) == 0) &&
67554bfbd51SWill Andrews 	    ifp->if_carp == NULL && (dchg == 0);
676823db0e9SDon Lewis 
677ca925d9cSJonathan Lemon 	/*
678ca925d9cSJonathan Lemon 	 * Check for exact addresses in the hash bucket.
679ca925d9cSJonathan Lemon 	 */
6802d9cfabaSRobert Watson 	/* IN_IFADDR_RLOCK(); */
6819b932e9eSAndre Oppermann 	LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) {
682f9e354dfSJulian Elischer 		/*
683823db0e9SDon Lewis 		 * If the address matches, verify that the packet
684823db0e9SDon Lewis 		 * arrived via the correct interface if checking is
685823db0e9SDon Lewis 		 * enabled.
686f9e354dfSJulian Elischer 		 */
6879b932e9eSAndre Oppermann 		if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr &&
6888c0fec80SRobert Watson 		    (!checkif || ia->ia_ifp == ifp)) {
6897caf4ab7SGleb Smirnoff 			counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
6907caf4ab7SGleb Smirnoff 			counter_u64_add(ia->ia_ifa.ifa_ibytes,
6917caf4ab7SGleb Smirnoff 			    m->m_pkthdr.len);
6922d9cfabaSRobert Watson 			/* IN_IFADDR_RUNLOCK(); */
693ed1ff184SJulian Elischer 			goto ours;
694ca925d9cSJonathan Lemon 		}
6958c0fec80SRobert Watson 	}
6962d9cfabaSRobert Watson 	/* IN_IFADDR_RUNLOCK(); */
6972d9cfabaSRobert Watson 
698823db0e9SDon Lewis 	/*
699ca925d9cSJonathan Lemon 	 * Check for broadcast addresses.
700ca925d9cSJonathan Lemon 	 *
701ca925d9cSJonathan Lemon 	 * Only accept broadcast packets that arrive via the matching
702ca925d9cSJonathan Lemon 	 * interface.  Reception of forwarded directed broadcasts would
703ca925d9cSJonathan Lemon 	 * be handled via ip_forward() and ether_output() with the loopback
704ca925d9cSJonathan Lemon 	 * into the stack for SIMPLEX interfaces handled by ether_output().
705823db0e9SDon Lewis 	 */
7060aade26eSRobert Watson 	if (ifp != NULL && ifp->if_flags & IFF_BROADCAST) {
707137f91e8SJohn Baldwin 		IF_ADDR_RLOCK(ifp);
7080aade26eSRobert Watson 	        TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
709ca925d9cSJonathan Lemon 			if (ifa->ifa_addr->sa_family != AF_INET)
710ca925d9cSJonathan Lemon 				continue;
711ca925d9cSJonathan Lemon 			ia = ifatoia(ifa);
712df8bae1dSRodney W. Grimes 			if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
7130aade26eSRobert Watson 			    ip->ip_dst.s_addr) {
7147caf4ab7SGleb Smirnoff 				counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
7157caf4ab7SGleb Smirnoff 				counter_u64_add(ia->ia_ifa.ifa_ibytes,
7167caf4ab7SGleb Smirnoff 				    m->m_pkthdr.len);
717137f91e8SJohn Baldwin 				IF_ADDR_RUNLOCK(ifp);
718df8bae1dSRodney W. Grimes 				goto ours;
7190aade26eSRobert Watson 			}
7200ac40133SBrian Somers #ifdef BOOTP_COMPAT
7210aade26eSRobert Watson 			if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY) {
7227caf4ab7SGleb Smirnoff 				counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
7237caf4ab7SGleb Smirnoff 				counter_u64_add(ia->ia_ifa.ifa_ibytes,
7247caf4ab7SGleb Smirnoff 				    m->m_pkthdr.len);
725137f91e8SJohn Baldwin 				IF_ADDR_RUNLOCK(ifp);
726ca925d9cSJonathan Lemon 				goto ours;
7270aade26eSRobert Watson 			}
7280ac40133SBrian Somers #endif
729df8bae1dSRodney W. Grimes 		}
730137f91e8SJohn Baldwin 		IF_ADDR_RUNLOCK(ifp);
73119e5b0a7SRobert Watson 		ia = NULL;
732df8bae1dSRodney W. Grimes 	}
733f8429ca2SBruce M Simpson 	/* RFC 3927 2.7: Do not forward datagrams for 169.254.0.0/16. */
734f8429ca2SBruce M Simpson 	if (IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))) {
73586425c62SRobert Watson 		IPSTAT_INC(ips_cantforward);
736f8429ca2SBruce M Simpson 		m_freem(m);
737f8429ca2SBruce M Simpson 		return;
738f8429ca2SBruce M Simpson 	}
739df8bae1dSRodney W. Grimes 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
740603724d3SBjoern A. Zeeb 		if (V_ip_mrouter) {
741df8bae1dSRodney W. Grimes 			/*
742df8bae1dSRodney W. Grimes 			 * If we are acting as a multicast router, all
743df8bae1dSRodney W. Grimes 			 * incoming multicast packets are passed to the
744df8bae1dSRodney W. Grimes 			 * kernel-level multicast forwarding function.
745df8bae1dSRodney W. Grimes 			 * The packet is returned (relatively) intact; if
746df8bae1dSRodney W. Grimes 			 * ip_mforward() returns a non-zero value, the packet
747df8bae1dSRodney W. Grimes 			 * must be discarded, else it may be accepted below.
748df8bae1dSRodney W. Grimes 			 */
7490aade26eSRobert Watson 			if (ip_mforward && ip_mforward(ip, ifp, m, 0) != 0) {
75086425c62SRobert Watson 				IPSTAT_INC(ips_cantforward);
751df8bae1dSRodney W. Grimes 				m_freem(m);
752c67b1d17SGarrett Wollman 				return;
753df8bae1dSRodney W. Grimes 			}
754df8bae1dSRodney W. Grimes 
755df8bae1dSRodney W. Grimes 			/*
75611612afaSDima Dorfman 			 * The process-level routing daemon needs to receive
757df8bae1dSRodney W. Grimes 			 * all multicast IGMP packets, whether or not this
758df8bae1dSRodney W. Grimes 			 * host belongs to their destination groups.
759df8bae1dSRodney W. Grimes 			 */
760df8bae1dSRodney W. Grimes 			if (ip->ip_p == IPPROTO_IGMP)
761df8bae1dSRodney W. Grimes 				goto ours;
76286425c62SRobert Watson 			IPSTAT_INC(ips_forward);
763df8bae1dSRodney W. Grimes 		}
764df8bae1dSRodney W. Grimes 		/*
765d10910e6SBruce M Simpson 		 * Assume the packet is for us, to avoid prematurely taking
766d10910e6SBruce M Simpson 		 * a lock on the in_multi hash. Protocols must perform
767d10910e6SBruce M Simpson 		 * their own filtering and update statistics accordingly.
768df8bae1dSRodney W. Grimes 		 */
769df8bae1dSRodney W. Grimes 		goto ours;
770df8bae1dSRodney W. Grimes 	}
771df8bae1dSRodney W. Grimes 	if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST)
772df8bae1dSRodney W. Grimes 		goto ours;
773df8bae1dSRodney W. Grimes 	if (ip->ip_dst.s_addr == INADDR_ANY)
774df8bae1dSRodney W. Grimes 		goto ours;
775df8bae1dSRodney W. Grimes 
7766a800098SYoshinobu Inoue 	/*
777df8bae1dSRodney W. Grimes 	 * Not for us; forward if possible and desirable.
778df8bae1dSRodney W. Grimes 	 */
779603724d3SBjoern A. Zeeb 	if (V_ipforwarding == 0) {
78086425c62SRobert Watson 		IPSTAT_INC(ips_cantforward);
781df8bae1dSRodney W. Grimes 		m_freem(m);
782546f251bSChris D. Faulhaber 	} else {
7839b932e9eSAndre Oppermann 		ip_forward(m, dchg);
784546f251bSChris D. Faulhaber 	}
785c67b1d17SGarrett Wollman 	return;
786df8bae1dSRodney W. Grimes 
787df8bae1dSRodney W. Grimes ours:
788d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH
789d0ebc0d2SYaroslav Tykhiy 	/*
790d0ebc0d2SYaroslav Tykhiy 	 * IPSTEALTH: Process non-routing options only
791d0ebc0d2SYaroslav Tykhiy 	 * if the packet is destined for us.
792d0ebc0d2SYaroslav Tykhiy 	 */
7937caf4ab7SGleb Smirnoff 	if (V_ipstealth && hlen > sizeof (struct ip) && ip_dooptions(m, 1))
794d0ebc0d2SYaroslav Tykhiy 		return;
795d0ebc0d2SYaroslav Tykhiy #endif /* IPSTEALTH */
796d0ebc0d2SYaroslav Tykhiy 
79763f8d699SJordan K. Hubbard 	/*
798b6ea1aa5SRuslan Ermilov 	 * Attempt reassembly; if it succeeds, proceed.
799ac9d7e26SMax Laier 	 * ip_reass() will return a different mbuf.
800df8bae1dSRodney W. Grimes 	 */
8018f134647SGleb Smirnoff 	if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) {
802aa69c612SGleb Smirnoff 		/* XXXGL: shouldn't we save & set m_flags? */
803f0cada84SAndre Oppermann 		m = ip_reass(m);
804f0cada84SAndre Oppermann 		if (m == NULL)
805c67b1d17SGarrett Wollman 			return;
8066a800098SYoshinobu Inoue 		ip = mtod(m, struct ip *);
8077e2df452SRuslan Ermilov 		/* Get the header length of the reassembled packet */
80853be11f6SPoul-Henning Kamp 		hlen = ip->ip_hl << 2;
809f0cada84SAndre Oppermann 	}
810f0cada84SAndre Oppermann 
811fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT)
812fcf59617SAndrey V. Elsukov 	if (IPSEC_ENABLED(ipv4)) {
813fcf59617SAndrey V. Elsukov 		if (IPSEC_INPUT(ipv4, m, hlen, ip->ip_p) != 0)
814fcf59617SAndrey V. Elsukov 			return;
815fcf59617SAndrey V. Elsukov 	}
816b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */
81733841545SHajimu UMEMOTO 
818df8bae1dSRodney W. Grimes 	/*
819df8bae1dSRodney W. Grimes 	 * Switch out to protocol's input routine.
820df8bae1dSRodney W. Grimes 	 */
82186425c62SRobert Watson 	IPSTAT_INC(ips_delivered);
8229b932e9eSAndre Oppermann 
8238f5a8818SKevin Lo 	(*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p);
824c67b1d17SGarrett Wollman 	return;
825df8bae1dSRodney W. Grimes bad:
826df8bae1dSRodney W. Grimes 	m_freem(m);
827c67b1d17SGarrett Wollman }
828c67b1d17SGarrett Wollman 
829c67b1d17SGarrett Wollman /*
830df8bae1dSRodney W. Grimes  * IP timer processing;
831df8bae1dSRodney W. Grimes  * if a timer expires on a reassembly
832df8bae1dSRodney W. Grimes  * queue, discard it.
833df8bae1dSRodney W. Grimes  */
834df8bae1dSRodney W. Grimes void
835f2565d68SRobert Watson ip_slowtimo(void)
836df8bae1dSRodney W. Grimes {
8378b615593SMarko Zec 	VNET_ITERATOR_DECL(vnet_iter);
838df8bae1dSRodney W. Grimes 
8395ee847d3SRobert Watson 	VNET_LIST_RLOCK_NOSLEEP();
8408b615593SMarko Zec 	VNET_FOREACH(vnet_iter) {
8418b615593SMarko Zec 		CURVNET_SET(vnet_iter);
8421dbefcc0SGleb Smirnoff 		ipreass_slowtimo();
8438b615593SMarko Zec 		CURVNET_RESTORE();
8448b615593SMarko Zec 	}
8455ee847d3SRobert Watson 	VNET_LIST_RUNLOCK_NOSLEEP();
846df8bae1dSRodney W. Grimes }
847df8bae1dSRodney W. Grimes 
8489802380eSBjoern A. Zeeb void
8499802380eSBjoern A. Zeeb ip_drain(void)
8509802380eSBjoern A. Zeeb {
8519802380eSBjoern A. Zeeb 	VNET_ITERATOR_DECL(vnet_iter);
8529802380eSBjoern A. Zeeb 
8539802380eSBjoern A. Zeeb 	VNET_LIST_RLOCK_NOSLEEP();
8549802380eSBjoern A. Zeeb 	VNET_FOREACH(vnet_iter) {
8559802380eSBjoern A. Zeeb 		CURVNET_SET(vnet_iter);
8561dbefcc0SGleb Smirnoff 		ipreass_drain();
8578b615593SMarko Zec 		CURVNET_RESTORE();
8588b615593SMarko Zec 	}
8595ee847d3SRobert Watson 	VNET_LIST_RUNLOCK_NOSLEEP();
860df8bae1dSRodney W. Grimes }
861df8bae1dSRodney W. Grimes 
862df8bae1dSRodney W. Grimes /*
863de38924dSAndre Oppermann  * The protocol to be inserted into ip_protox[] must be already registered
864de38924dSAndre Oppermann  * in inetsw[], either statically or through pf_proto_register().
865de38924dSAndre Oppermann  */
866de38924dSAndre Oppermann int
8671b48d245SBjoern A. Zeeb ipproto_register(short ipproto)
868de38924dSAndre Oppermann {
869de38924dSAndre Oppermann 	struct protosw *pr;
870de38924dSAndre Oppermann 
871de38924dSAndre Oppermann 	/* Sanity checks. */
8721b48d245SBjoern A. Zeeb 	if (ipproto <= 0 || ipproto >= IPPROTO_MAX)
873de38924dSAndre Oppermann 		return (EPROTONOSUPPORT);
874de38924dSAndre Oppermann 
875de38924dSAndre Oppermann 	/*
876de38924dSAndre Oppermann 	 * The protocol slot must not be occupied by another protocol
877de38924dSAndre Oppermann 	 * already.  An index pointing to IPPROTO_RAW is unused.
878de38924dSAndre Oppermann 	 */
879de38924dSAndre Oppermann 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
880de38924dSAndre Oppermann 	if (pr == NULL)
881de38924dSAndre Oppermann 		return (EPFNOSUPPORT);
882de38924dSAndre Oppermann 	if (ip_protox[ipproto] != pr - inetsw)	/* IPPROTO_RAW */
883de38924dSAndre Oppermann 		return (EEXIST);
884de38924dSAndre Oppermann 
885de38924dSAndre Oppermann 	/* Find the protocol position in inetsw[] and set the index. */
886de38924dSAndre Oppermann 	for (pr = inetdomain.dom_protosw;
887de38924dSAndre Oppermann 	     pr < inetdomain.dom_protoswNPROTOSW; pr++) {
888de38924dSAndre Oppermann 		if (pr->pr_domain->dom_family == PF_INET &&
889de38924dSAndre Oppermann 		    pr->pr_protocol && pr->pr_protocol == ipproto) {
890de38924dSAndre Oppermann 			ip_protox[pr->pr_protocol] = pr - inetsw;
891de38924dSAndre Oppermann 			return (0);
892de38924dSAndre Oppermann 		}
893de38924dSAndre Oppermann 	}
894de38924dSAndre Oppermann 	return (EPROTONOSUPPORT);
895de38924dSAndre Oppermann }
896de38924dSAndre Oppermann 
897de38924dSAndre Oppermann int
8981b48d245SBjoern A. Zeeb ipproto_unregister(short ipproto)
899de38924dSAndre Oppermann {
900de38924dSAndre Oppermann 	struct protosw *pr;
901de38924dSAndre Oppermann 
902de38924dSAndre Oppermann 	/* Sanity checks. */
9031b48d245SBjoern A. Zeeb 	if (ipproto <= 0 || ipproto >= IPPROTO_MAX)
904de38924dSAndre Oppermann 		return (EPROTONOSUPPORT);
905de38924dSAndre Oppermann 
906de38924dSAndre Oppermann 	/* Check if the protocol was indeed registered. */
907de38924dSAndre Oppermann 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
908de38924dSAndre Oppermann 	if (pr == NULL)
909de38924dSAndre Oppermann 		return (EPFNOSUPPORT);
910de38924dSAndre Oppermann 	if (ip_protox[ipproto] == pr - inetsw)  /* IPPROTO_RAW */
911de38924dSAndre Oppermann 		return (ENOENT);
912de38924dSAndre Oppermann 
913de38924dSAndre Oppermann 	/* Reset the protocol slot to IPPROTO_RAW. */
914de38924dSAndre Oppermann 	ip_protox[ipproto] = pr - inetsw;
915de38924dSAndre Oppermann 	return (0);
916de38924dSAndre Oppermann }
917de38924dSAndre Oppermann 
918df8bae1dSRodney W. Grimes u_char inetctlerrmap[PRC_NCMDS] = {
919df8bae1dSRodney W. Grimes 	0,		0,		0,		0,
920df8bae1dSRodney W. Grimes 	0,		EMSGSIZE,	EHOSTDOWN,	EHOSTUNREACH,
921df8bae1dSRodney W. Grimes 	EHOSTUNREACH,	EHOSTUNREACH,	ECONNREFUSED,	ECONNREFUSED,
922df8bae1dSRodney W. Grimes 	EMSGSIZE,	EHOSTUNREACH,	0,		0,
923fcaf9f91SMike Silbersack 	0,		0,		EHOSTUNREACH,	0,
9243b8123b7SJesper Skriver 	ENOPROTOOPT,	ECONNREFUSED
925df8bae1dSRodney W. Grimes };
926df8bae1dSRodney W. Grimes 
927df8bae1dSRodney W. Grimes /*
928df8bae1dSRodney W. Grimes  * Forward a packet.  If some error occurs return the sender
929df8bae1dSRodney W. Grimes  * an icmp packet.  Note we can't always generate a meaningful
930df8bae1dSRodney W. Grimes  * icmp message because icmp doesn't have a large enough repertoire
931df8bae1dSRodney W. Grimes  * of codes and types.
932df8bae1dSRodney W. Grimes  *
933df8bae1dSRodney W. Grimes  * If not forwarding, just drop the packet.  This could be confusing
934df8bae1dSRodney W. Grimes  * if ipforwarding was zero but some routing protocol was advancing
935df8bae1dSRodney W. Grimes  * us as a gateway to somewhere.  However, we must let the routing
936df8bae1dSRodney W. Grimes  * protocol deal with that.
937df8bae1dSRodney W. Grimes  *
938df8bae1dSRodney W. Grimes  * The srcrt parameter indicates whether the packet is being forwarded
939df8bae1dSRodney W. Grimes  * via a source route.
940df8bae1dSRodney W. Grimes  */
9419b932e9eSAndre Oppermann void
9429b932e9eSAndre Oppermann ip_forward(struct mbuf *m, int srcrt)
943df8bae1dSRodney W. Grimes {
9442b25acc1SLuigi Rizzo 	struct ip *ip = mtod(m, struct ip *);
945efbad259SEdward Tomasz Napierala 	struct in_ifaddr *ia;
946df8bae1dSRodney W. Grimes 	struct mbuf *mcopy;
947d14122b0SErmal Luçi 	struct sockaddr_in *sin;
9489b932e9eSAndre Oppermann 	struct in_addr dest;
949b835b6feSBjoern A. Zeeb 	struct route ro;
950c773494eSAndre Oppermann 	int error, type = 0, code = 0, mtu = 0;
9513efc3014SJulian Elischer 
9529b932e9eSAndre Oppermann 	if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) {
95386425c62SRobert Watson 		IPSTAT_INC(ips_cantforward);
954df8bae1dSRodney W. Grimes 		m_freem(m);
955df8bae1dSRodney W. Grimes 		return;
956df8bae1dSRodney W. Grimes 	}
957fcf59617SAndrey V. Elsukov 	if (
958fcf59617SAndrey V. Elsukov #ifdef IPSTEALTH
959fcf59617SAndrey V. Elsukov 	    V_ipstealth == 0 &&
960fcf59617SAndrey V. Elsukov #endif
961fcf59617SAndrey V. Elsukov 	    ip->ip_ttl <= IPTTLDEC) {
962fcf59617SAndrey V. Elsukov 		icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, 0);
9638922ddbeSAndrey V. Elsukov 		return;
9648922ddbeSAndrey V. Elsukov 	}
965df8bae1dSRodney W. Grimes 
966d14122b0SErmal Luçi 	bzero(&ro, sizeof(ro));
967d14122b0SErmal Luçi 	sin = (struct sockaddr_in *)&ro.ro_dst;
968d14122b0SErmal Luçi 	sin->sin_family = AF_INET;
969d14122b0SErmal Luçi 	sin->sin_len = sizeof(*sin);
970d14122b0SErmal Luçi 	sin->sin_addr = ip->ip_dst;
971d14122b0SErmal Luçi #ifdef RADIX_MPATH
972d14122b0SErmal Luçi 	rtalloc_mpath_fib(&ro,
973d14122b0SErmal Luçi 	    ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr),
974d14122b0SErmal Luçi 	    M_GETFIB(m));
975d14122b0SErmal Luçi #else
976d14122b0SErmal Luçi 	in_rtalloc_ign(&ro, 0, M_GETFIB(m));
977d14122b0SErmal Luçi #endif
978d14122b0SErmal Luçi 	if (ro.ro_rt != NULL) {
979d14122b0SErmal Luçi 		ia = ifatoia(ro.ro_rt->rt_ifa);
980d14122b0SErmal Luçi 		ifa_ref(&ia->ia_ifa);
98156844a62SErmal Luçi 	} else
98256844a62SErmal Luçi 		ia = NULL;
983df8bae1dSRodney W. Grimes 	/*
984bfef7ed4SIan Dowse 	 * Save the IP header and at most 8 bytes of the payload,
985bfef7ed4SIan Dowse 	 * in case we need to generate an ICMP message to the src.
986bfef7ed4SIan Dowse 	 *
9874d2e3692SLuigi Rizzo 	 * XXX this can be optimized a lot by saving the data in a local
9884d2e3692SLuigi Rizzo 	 * buffer on the stack (72 bytes at most), and only allocating the
9894d2e3692SLuigi Rizzo 	 * mbuf if really necessary. The vast majority of the packets
9904d2e3692SLuigi Rizzo 	 * are forwarded without having to send an ICMP back (either
9914d2e3692SLuigi Rizzo 	 * because unnecessary, or because rate limited), so we are
9924d2e3692SLuigi Rizzo 	 * really we are wasting a lot of work here.
9934d2e3692SLuigi Rizzo 	 *
994c3bef61eSKevin Lo 	 * We don't use m_copym() because it might return a reference
995bfef7ed4SIan Dowse 	 * to a shared cluster. Both this function and ip_output()
996bfef7ed4SIan Dowse 	 * assume exclusive access to the IP header in `m', so any
997bfef7ed4SIan Dowse 	 * data in a cluster may change before we reach icmp_error().
998df8bae1dSRodney W. Grimes 	 */
999dc4ad05eSGleb Smirnoff 	mcopy = m_gethdr(M_NOWAIT, m->m_type);
1000eb1b1807SGleb Smirnoff 	if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_NOWAIT)) {
10019967cafcSSam Leffler 		/*
10029967cafcSSam Leffler 		 * It's probably ok if the pkthdr dup fails (because
10039967cafcSSam Leffler 		 * the deep copy of the tag chain failed), but for now
10049967cafcSSam Leffler 		 * be conservative and just discard the copy since
10059967cafcSSam Leffler 		 * code below may some day want the tags.
10069967cafcSSam Leffler 		 */
10079967cafcSSam Leffler 		m_free(mcopy);
10089967cafcSSam Leffler 		mcopy = NULL;
10099967cafcSSam Leffler 	}
1010bfef7ed4SIan Dowse 	if (mcopy != NULL) {
10118f134647SGleb Smirnoff 		mcopy->m_len = min(ntohs(ip->ip_len), M_TRAILINGSPACE(mcopy));
1012e6b0a570SBruce M Simpson 		mcopy->m_pkthdr.len = mcopy->m_len;
1013bfef7ed4SIan Dowse 		m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t));
1014bfef7ed4SIan Dowse 	}
101504287599SRuslan Ermilov #ifdef IPSTEALTH
1016fcf59617SAndrey V. Elsukov 	if (V_ipstealth == 0)
101704287599SRuslan Ermilov #endif
101804287599SRuslan Ermilov 		ip->ip_ttl -= IPTTLDEC;
1019fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT)
1020fcf59617SAndrey V. Elsukov 	if (IPSEC_ENABLED(ipv4)) {
1021fcf59617SAndrey V. Elsukov 		if ((error = IPSEC_FORWARD(ipv4, m)) != 0) {
1022fcf59617SAndrey V. Elsukov 			/* mbuf consumed by IPsec */
1023fcf59617SAndrey V. Elsukov 			m_freem(mcopy);
1024fcf59617SAndrey V. Elsukov 			if (error != EINPROGRESS)
1025fcf59617SAndrey V. Elsukov 				IPSTAT_INC(ips_cantforward);
1026fcf59617SAndrey V. Elsukov 			return;
102704287599SRuslan Ermilov 		}
1028fcf59617SAndrey V. Elsukov 		/* No IPsec processing required */
1029fcf59617SAndrey V. Elsukov 	}
1030fcf59617SAndrey V. Elsukov #endif /* IPSEC */
1031df8bae1dSRodney W. Grimes 	/*
1032df8bae1dSRodney W. Grimes 	 * If forwarding packet using same interface that it came in on,
1033df8bae1dSRodney W. Grimes 	 * perhaps should send a redirect to sender to shortcut a hop.
1034df8bae1dSRodney W. Grimes 	 * Only send redirect if source is sending directly to us,
1035df8bae1dSRodney W. Grimes 	 * and if packet was not source routed (or has any options).
1036df8bae1dSRodney W. Grimes 	 * Also, don't send redirect if forwarding using a default route
1037df8bae1dSRodney W. Grimes 	 * or a route modified by a redirect.
1038df8bae1dSRodney W. Grimes 	 */
10399b932e9eSAndre Oppermann 	dest.s_addr = 0;
1040efbad259SEdward Tomasz Napierala 	if (!srcrt && V_ipsendredirects &&
1041efbad259SEdward Tomasz Napierala 	    ia != NULL && ia->ia_ifp == m->m_pkthdr.rcvif) {
104202c1c707SAndre Oppermann 		struct rtentry *rt;
104302c1c707SAndre Oppermann 
104402c1c707SAndre Oppermann 		rt = ro.ro_rt;
104502c1c707SAndre Oppermann 
104602c1c707SAndre Oppermann 		if (rt && (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
10479b932e9eSAndre Oppermann 		    satosin(rt_key(rt))->sin_addr.s_addr != 0) {
1048df8bae1dSRodney W. Grimes #define	RTA(rt)	((struct in_ifaddr *)(rt->rt_ifa))
1049df8bae1dSRodney W. Grimes 			u_long src = ntohl(ip->ip_src.s_addr);
1050df8bae1dSRodney W. Grimes 
1051df8bae1dSRodney W. Grimes 			if (RTA(rt) &&
1052df8bae1dSRodney W. Grimes 			    (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) {
1053df8bae1dSRodney W. Grimes 				if (rt->rt_flags & RTF_GATEWAY)
10549b932e9eSAndre Oppermann 					dest.s_addr = satosin(rt->rt_gateway)->sin_addr.s_addr;
1055df8bae1dSRodney W. Grimes 				else
10569b932e9eSAndre Oppermann 					dest.s_addr = ip->ip_dst.s_addr;
1057df8bae1dSRodney W. Grimes 				/* Router requirements says to only send host redirects */
1058df8bae1dSRodney W. Grimes 				type = ICMP_REDIRECT;
1059df8bae1dSRodney W. Grimes 				code = ICMP_REDIRECT_HOST;
1060df8bae1dSRodney W. Grimes 			}
1061df8bae1dSRodney W. Grimes 		}
106202c1c707SAndre Oppermann 	}
1063df8bae1dSRodney W. Grimes 
1064b835b6feSBjoern A. Zeeb 	error = ip_output(m, NULL, &ro, IP_FORWARDING, NULL, NULL);
1065b835b6feSBjoern A. Zeeb 
1066b835b6feSBjoern A. Zeeb 	if (error == EMSGSIZE && ro.ro_rt)
1067e3a7aa6fSGleb Smirnoff 		mtu = ro.ro_rt->rt_mtu;
1068bf984051SGleb Smirnoff 	RO_RTFREE(&ro);
1069b835b6feSBjoern A. Zeeb 
1070df8bae1dSRodney W. Grimes 	if (error)
107186425c62SRobert Watson 		IPSTAT_INC(ips_cantforward);
1072df8bae1dSRodney W. Grimes 	else {
107386425c62SRobert Watson 		IPSTAT_INC(ips_forward);
1074df8bae1dSRodney W. Grimes 		if (type)
107586425c62SRobert Watson 			IPSTAT_INC(ips_redirectsent);
1076df8bae1dSRodney W. Grimes 		else {
10779188b4a1SAndre Oppermann 			if (mcopy)
1078df8bae1dSRodney W. Grimes 				m_freem(mcopy);
10798c0fec80SRobert Watson 			if (ia != NULL)
10808c0fec80SRobert Watson 				ifa_free(&ia->ia_ifa);
1081df8bae1dSRodney W. Grimes 			return;
1082df8bae1dSRodney W. Grimes 		}
1083df8bae1dSRodney W. Grimes 	}
10848c0fec80SRobert Watson 	if (mcopy == NULL) {
10858c0fec80SRobert Watson 		if (ia != NULL)
10868c0fec80SRobert Watson 			ifa_free(&ia->ia_ifa);
1087df8bae1dSRodney W. Grimes 		return;
10888c0fec80SRobert Watson 	}
1089df8bae1dSRodney W. Grimes 
1090df8bae1dSRodney W. Grimes 	switch (error) {
1091df8bae1dSRodney W. Grimes 
1092df8bae1dSRodney W. Grimes 	case 0:				/* forwarded, but need redirect */
1093df8bae1dSRodney W. Grimes 		/* type, code set above */
1094df8bae1dSRodney W. Grimes 		break;
1095df8bae1dSRodney W. Grimes 
1096efbad259SEdward Tomasz Napierala 	case ENETUNREACH:
1097df8bae1dSRodney W. Grimes 	case EHOSTUNREACH:
1098df8bae1dSRodney W. Grimes 	case ENETDOWN:
1099df8bae1dSRodney W. Grimes 	case EHOSTDOWN:
1100df8bae1dSRodney W. Grimes 	default:
1101df8bae1dSRodney W. Grimes 		type = ICMP_UNREACH;
1102df8bae1dSRodney W. Grimes 		code = ICMP_UNREACH_HOST;
1103df8bae1dSRodney W. Grimes 		break;
1104df8bae1dSRodney W. Grimes 
1105df8bae1dSRodney W. Grimes 	case EMSGSIZE:
1106df8bae1dSRodney W. Grimes 		type = ICMP_UNREACH;
1107df8bae1dSRodney W. Grimes 		code = ICMP_UNREACH_NEEDFRAG;
11089b932e9eSAndre Oppermann 		/*
1109b835b6feSBjoern A. Zeeb 		 * If the MTU was set before make sure we are below the
1110b835b6feSBjoern A. Zeeb 		 * interface MTU.
1111ab48768bSAndre Oppermann 		 * If the MTU wasn't set before use the interface mtu or
1112ab48768bSAndre Oppermann 		 * fall back to the next smaller mtu step compared to the
1113ab48768bSAndre Oppermann 		 * current packet size.
11149b932e9eSAndre Oppermann 		 */
1115b835b6feSBjoern A. Zeeb 		if (mtu != 0) {
1116b835b6feSBjoern A. Zeeb 			if (ia != NULL)
1117b835b6feSBjoern A. Zeeb 				mtu = min(mtu, ia->ia_ifp->if_mtu);
1118b835b6feSBjoern A. Zeeb 		} else {
1119ab48768bSAndre Oppermann 			if (ia != NULL)
1120c773494eSAndre Oppermann 				mtu = ia->ia_ifp->if_mtu;
1121ab48768bSAndre Oppermann 			else
11228f134647SGleb Smirnoff 				mtu = ip_next_mtu(ntohs(ip->ip_len), 0);
1123ab48768bSAndre Oppermann 		}
112486425c62SRobert Watson 		IPSTAT_INC(ips_cantfrag);
1125df8bae1dSRodney W. Grimes 		break;
1126df8bae1dSRodney W. Grimes 
1127df8bae1dSRodney W. Grimes 	case ENOBUFS:
11283a06e3e0SRuslan Ermilov 	case EACCES:			/* ipfw denied packet */
11293a06e3e0SRuslan Ermilov 		m_freem(mcopy);
11308c0fec80SRobert Watson 		if (ia != NULL)
11318c0fec80SRobert Watson 			ifa_free(&ia->ia_ifa);
11323a06e3e0SRuslan Ermilov 		return;
1133df8bae1dSRodney W. Grimes 	}
11348c0fec80SRobert Watson 	if (ia != NULL)
11358c0fec80SRobert Watson 		ifa_free(&ia->ia_ifa);
1136c773494eSAndre Oppermann 	icmp_error(mcopy, type, code, dest.s_addr, mtu);
1137df8bae1dSRodney W. Grimes }
1138df8bae1dSRodney W. Grimes 
1139339efd75SMaxim Sobolev #define	CHECK_SO_CT(sp, ct) \
1140339efd75SMaxim Sobolev     (((sp->so_options & SO_TIMESTAMP) && (sp->so_ts_clock == ct)) ? 1 : 0)
1141339efd75SMaxim Sobolev 
114282c23ebaSBill Fenner void
1143f2565d68SRobert Watson ip_savecontrol(struct inpcb *inp, struct mbuf **mp, struct ip *ip,
1144f2565d68SRobert Watson     struct mbuf *m)
114582c23ebaSBill Fenner {
11468b615593SMarko Zec 
1147339efd75SMaxim Sobolev 	if ((inp->inp_socket->so_options & SO_BINTIME) ||
1148339efd75SMaxim Sobolev 	    CHECK_SO_CT(inp->inp_socket, SO_TS_BINTIME)) {
1149be8a62e8SPoul-Henning Kamp 		struct bintime bt;
1150be8a62e8SPoul-Henning Kamp 
1151be8a62e8SPoul-Henning Kamp 		bintime(&bt);
1152be8a62e8SPoul-Henning Kamp 		*mp = sbcreatecontrol((caddr_t)&bt, sizeof(bt),
1153be8a62e8SPoul-Henning Kamp 		    SCM_BINTIME, SOL_SOCKET);
1154be8a62e8SPoul-Henning Kamp 		if (*mp)
1155be8a62e8SPoul-Henning Kamp 			mp = &(*mp)->m_next;
1156be8a62e8SPoul-Henning Kamp 	}
1157339efd75SMaxim Sobolev 	if (CHECK_SO_CT(inp->inp_socket, SO_TS_REALTIME_MICRO)) {
115882c23ebaSBill Fenner 		struct timeval tv;
115982c23ebaSBill Fenner 
1160339efd75SMaxim Sobolev 		microtime(&tv);
116182c23ebaSBill Fenner 		*mp = sbcreatecontrol((caddr_t)&tv, sizeof(tv),
116282c23ebaSBill Fenner 		    SCM_TIMESTAMP, SOL_SOCKET);
116382c23ebaSBill Fenner 		if (*mp)
116482c23ebaSBill Fenner 			mp = &(*mp)->m_next;
1165339efd75SMaxim Sobolev 	} else if (CHECK_SO_CT(inp->inp_socket, SO_TS_REALTIME)) {
1166339efd75SMaxim Sobolev 		struct timespec ts;
1167339efd75SMaxim Sobolev 
1168339efd75SMaxim Sobolev 		nanotime(&ts);
1169339efd75SMaxim Sobolev 		*mp = sbcreatecontrol((caddr_t)&ts, sizeof(ts),
1170339efd75SMaxim Sobolev 		    SCM_REALTIME, SOL_SOCKET);
1171339efd75SMaxim Sobolev 		if (*mp)
1172339efd75SMaxim Sobolev 			mp = &(*mp)->m_next;
1173339efd75SMaxim Sobolev 	} else if (CHECK_SO_CT(inp->inp_socket, SO_TS_MONOTONIC)) {
1174339efd75SMaxim Sobolev 		struct timespec ts;
1175339efd75SMaxim Sobolev 
1176339efd75SMaxim Sobolev 		nanouptime(&ts);
1177339efd75SMaxim Sobolev 		*mp = sbcreatecontrol((caddr_t)&ts, sizeof(ts),
1178339efd75SMaxim Sobolev 		    SCM_MONOTONIC, SOL_SOCKET);
1179339efd75SMaxim Sobolev 		if (*mp)
1180339efd75SMaxim Sobolev 			mp = &(*mp)->m_next;
1181be8a62e8SPoul-Henning Kamp 	}
118282c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVDSTADDR) {
118382c23ebaSBill Fenner 		*mp = sbcreatecontrol((caddr_t)&ip->ip_dst,
118482c23ebaSBill Fenner 		    sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
118582c23ebaSBill Fenner 		if (*mp)
118682c23ebaSBill Fenner 			mp = &(*mp)->m_next;
118782c23ebaSBill Fenner 	}
11884957466bSMatthew N. Dodd 	if (inp->inp_flags & INP_RECVTTL) {
11894957466bSMatthew N. Dodd 		*mp = sbcreatecontrol((caddr_t)&ip->ip_ttl,
11904957466bSMatthew N. Dodd 		    sizeof(u_char), IP_RECVTTL, IPPROTO_IP);
11914957466bSMatthew N. Dodd 		if (*mp)
11924957466bSMatthew N. Dodd 			mp = &(*mp)->m_next;
11934957466bSMatthew N. Dodd 	}
119482c23ebaSBill Fenner #ifdef notyet
119582c23ebaSBill Fenner 	/* XXX
119682c23ebaSBill Fenner 	 * Moving these out of udp_input() made them even more broken
119782c23ebaSBill Fenner 	 * than they already were.
119882c23ebaSBill Fenner 	 */
119982c23ebaSBill Fenner 	/* options were tossed already */
120082c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVOPTS) {
120182c23ebaSBill Fenner 		*mp = sbcreatecontrol((caddr_t)opts_deleted_above,
120282c23ebaSBill Fenner 		    sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP);
120382c23ebaSBill Fenner 		if (*mp)
120482c23ebaSBill Fenner 			mp = &(*mp)->m_next;
120582c23ebaSBill Fenner 	}
120682c23ebaSBill Fenner 	/* ip_srcroute doesn't do what we want here, need to fix */
120782c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVRETOPTS) {
1208e0982661SAndre Oppermann 		*mp = sbcreatecontrol((caddr_t)ip_srcroute(m),
120982c23ebaSBill Fenner 		    sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP);
121082c23ebaSBill Fenner 		if (*mp)
121182c23ebaSBill Fenner 			mp = &(*mp)->m_next;
121282c23ebaSBill Fenner 	}
121382c23ebaSBill Fenner #endif
121482c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVIF) {
1215d314ad7bSJulian Elischer 		struct ifnet *ifp;
1216d314ad7bSJulian Elischer 		struct sdlbuf {
121782c23ebaSBill Fenner 			struct sockaddr_dl sdl;
1218d314ad7bSJulian Elischer 			u_char	pad[32];
1219d314ad7bSJulian Elischer 		} sdlbuf;
1220d314ad7bSJulian Elischer 		struct sockaddr_dl *sdp;
1221d314ad7bSJulian Elischer 		struct sockaddr_dl *sdl2 = &sdlbuf.sdl;
122282c23ebaSBill Fenner 
122346f2df9cSSergey Kandaurov 		if ((ifp = m->m_pkthdr.rcvif) &&
122446f2df9cSSergey Kandaurov 		    ifp->if_index && ifp->if_index <= V_if_index) {
12254a0d6638SRuslan Ermilov 			sdp = (struct sockaddr_dl *)ifp->if_addr->ifa_addr;
1226d314ad7bSJulian Elischer 			/*
1227d314ad7bSJulian Elischer 			 * Change our mind and don't try copy.
1228d314ad7bSJulian Elischer 			 */
122946f2df9cSSergey Kandaurov 			if (sdp->sdl_family != AF_LINK ||
123046f2df9cSSergey Kandaurov 			    sdp->sdl_len > sizeof(sdlbuf)) {
1231d314ad7bSJulian Elischer 				goto makedummy;
1232d314ad7bSJulian Elischer 			}
1233d314ad7bSJulian Elischer 			bcopy(sdp, sdl2, sdp->sdl_len);
1234d314ad7bSJulian Elischer 		} else {
1235d314ad7bSJulian Elischer makedummy:
123646f2df9cSSergey Kandaurov 			sdl2->sdl_len =
123746f2df9cSSergey Kandaurov 			    offsetof(struct sockaddr_dl, sdl_data[0]);
1238d314ad7bSJulian Elischer 			sdl2->sdl_family = AF_LINK;
1239d314ad7bSJulian Elischer 			sdl2->sdl_index = 0;
1240d314ad7bSJulian Elischer 			sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0;
1241d314ad7bSJulian Elischer 		}
1242d314ad7bSJulian Elischer 		*mp = sbcreatecontrol((caddr_t)sdl2, sdl2->sdl_len,
124382c23ebaSBill Fenner 		    IP_RECVIF, IPPROTO_IP);
124482c23ebaSBill Fenner 		if (*mp)
124582c23ebaSBill Fenner 			mp = &(*mp)->m_next;
124682c23ebaSBill Fenner 	}
12473cca425bSMichael Tuexen 	if (inp->inp_flags & INP_RECVTOS) {
12483cca425bSMichael Tuexen 		*mp = sbcreatecontrol((caddr_t)&ip->ip_tos,
12493cca425bSMichael Tuexen 		    sizeof(u_char), IP_RECVTOS, IPPROTO_IP);
12503cca425bSMichael Tuexen 		if (*mp)
12513cca425bSMichael Tuexen 			mp = &(*mp)->m_next;
12523cca425bSMichael Tuexen 	}
12539d3ddf43SAdrian Chadd 
12549d3ddf43SAdrian Chadd 	if (inp->inp_flags2 & INP_RECVFLOWID) {
12559d3ddf43SAdrian Chadd 		uint32_t flowid, flow_type;
12569d3ddf43SAdrian Chadd 
12579d3ddf43SAdrian Chadd 		flowid = m->m_pkthdr.flowid;
12589d3ddf43SAdrian Chadd 		flow_type = M_HASHTYPE_GET(m);
12599d3ddf43SAdrian Chadd 
12609d3ddf43SAdrian Chadd 		/*
12619d3ddf43SAdrian Chadd 		 * XXX should handle the failure of one or the
12629d3ddf43SAdrian Chadd 		 * other - don't populate both?
12639d3ddf43SAdrian Chadd 		 */
12649d3ddf43SAdrian Chadd 		*mp = sbcreatecontrol((caddr_t) &flowid,
12659d3ddf43SAdrian Chadd 		    sizeof(uint32_t), IP_FLOWID, IPPROTO_IP);
12669d3ddf43SAdrian Chadd 		if (*mp)
12679d3ddf43SAdrian Chadd 			mp = &(*mp)->m_next;
12689d3ddf43SAdrian Chadd 		*mp = sbcreatecontrol((caddr_t) &flow_type,
12699d3ddf43SAdrian Chadd 		    sizeof(uint32_t), IP_FLOWTYPE, IPPROTO_IP);
12709d3ddf43SAdrian Chadd 		if (*mp)
12719d3ddf43SAdrian Chadd 			mp = &(*mp)->m_next;
12729d3ddf43SAdrian Chadd 	}
12739d3ddf43SAdrian Chadd 
12749d3ddf43SAdrian Chadd #ifdef	RSS
12759d3ddf43SAdrian Chadd 	if (inp->inp_flags2 & INP_RECVRSSBUCKETID) {
12769d3ddf43SAdrian Chadd 		uint32_t flowid, flow_type;
12779d3ddf43SAdrian Chadd 		uint32_t rss_bucketid;
12789d3ddf43SAdrian Chadd 
12799d3ddf43SAdrian Chadd 		flowid = m->m_pkthdr.flowid;
12809d3ddf43SAdrian Chadd 		flow_type = M_HASHTYPE_GET(m);
12819d3ddf43SAdrian Chadd 
12829d3ddf43SAdrian Chadd 		if (rss_hash2bucket(flowid, flow_type, &rss_bucketid) == 0) {
12839d3ddf43SAdrian Chadd 			*mp = sbcreatecontrol((caddr_t) &rss_bucketid,
12849d3ddf43SAdrian Chadd 			   sizeof(uint32_t), IP_RSSBUCKETID, IPPROTO_IP);
12859d3ddf43SAdrian Chadd 			if (*mp)
12869d3ddf43SAdrian Chadd 				mp = &(*mp)->m_next;
12879d3ddf43SAdrian Chadd 		}
12889d3ddf43SAdrian Chadd 	}
12899d3ddf43SAdrian Chadd #endif
129082c23ebaSBill Fenner }
129182c23ebaSBill Fenner 
12924d2e3692SLuigi Rizzo /*
129330916a2dSRobert Watson  * XXXRW: Multicast routing code in ip_mroute.c is generally MPSAFE, but the
129430916a2dSRobert Watson  * ip_rsvp and ip_rsvp_on variables need to be interlocked with rsvp_on
129530916a2dSRobert Watson  * locking.  This code remains in ip_input.c as ip_mroute.c is optionally
129630916a2dSRobert Watson  * compiled.
12974d2e3692SLuigi Rizzo  */
12983e288e62SDimitry Andric static VNET_DEFINE(int, ip_rsvp_on);
129982cea7e6SBjoern A. Zeeb VNET_DEFINE(struct socket *, ip_rsvpd);
130082cea7e6SBjoern A. Zeeb 
130182cea7e6SBjoern A. Zeeb #define	V_ip_rsvp_on		VNET(ip_rsvp_on)
130282cea7e6SBjoern A. Zeeb 
1303df8bae1dSRodney W. Grimes int
1304f0068c4aSGarrett Wollman ip_rsvp_init(struct socket *so)
1305f0068c4aSGarrett Wollman {
13068b615593SMarko Zec 
1307f0068c4aSGarrett Wollman 	if (so->so_type != SOCK_RAW ||
1308f0068c4aSGarrett Wollman 	    so->so_proto->pr_protocol != IPPROTO_RSVP)
1309f0068c4aSGarrett Wollman 		return EOPNOTSUPP;
1310f0068c4aSGarrett Wollman 
1311603724d3SBjoern A. Zeeb 	if (V_ip_rsvpd != NULL)
1312f0068c4aSGarrett Wollman 		return EADDRINUSE;
1313f0068c4aSGarrett Wollman 
1314603724d3SBjoern A. Zeeb 	V_ip_rsvpd = so;
13151c5de19aSGarrett Wollman 	/*
13161c5de19aSGarrett Wollman 	 * This may seem silly, but we need to be sure we don't over-increment
13171c5de19aSGarrett Wollman 	 * the RSVP counter, in case something slips up.
13181c5de19aSGarrett Wollman 	 */
1319603724d3SBjoern A. Zeeb 	if (!V_ip_rsvp_on) {
1320603724d3SBjoern A. Zeeb 		V_ip_rsvp_on = 1;
1321603724d3SBjoern A. Zeeb 		V_rsvp_on++;
13221c5de19aSGarrett Wollman 	}
1323f0068c4aSGarrett Wollman 
1324f0068c4aSGarrett Wollman 	return 0;
1325f0068c4aSGarrett Wollman }
1326f0068c4aSGarrett Wollman 
1327f0068c4aSGarrett Wollman int
1328f0068c4aSGarrett Wollman ip_rsvp_done(void)
1329f0068c4aSGarrett Wollman {
13308b615593SMarko Zec 
1331603724d3SBjoern A. Zeeb 	V_ip_rsvpd = NULL;
13321c5de19aSGarrett Wollman 	/*
13331c5de19aSGarrett Wollman 	 * This may seem silly, but we need to be sure we don't over-decrement
13341c5de19aSGarrett Wollman 	 * the RSVP counter, in case something slips up.
13351c5de19aSGarrett Wollman 	 */
1336603724d3SBjoern A. Zeeb 	if (V_ip_rsvp_on) {
1337603724d3SBjoern A. Zeeb 		V_ip_rsvp_on = 0;
1338603724d3SBjoern A. Zeeb 		V_rsvp_on--;
13391c5de19aSGarrett Wollman 	}
1340f0068c4aSGarrett Wollman 	return 0;
1341f0068c4aSGarrett Wollman }
1342bbb4330bSLuigi Rizzo 
13438f5a8818SKevin Lo int
13448f5a8818SKevin Lo rsvp_input(struct mbuf **mp, int *offp, int proto)
1345bbb4330bSLuigi Rizzo {
13468f5a8818SKevin Lo 	struct mbuf *m;
13478f5a8818SKevin Lo 
13488f5a8818SKevin Lo 	m = *mp;
13498f5a8818SKevin Lo 	*mp = NULL;
13508b615593SMarko Zec 
1351bbb4330bSLuigi Rizzo 	if (rsvp_input_p) { /* call the real one if loaded */
13528f5a8818SKevin Lo 		*mp = m;
13538f5a8818SKevin Lo 		rsvp_input_p(mp, offp, proto);
13548f5a8818SKevin Lo 		return (IPPROTO_DONE);
1355bbb4330bSLuigi Rizzo 	}
1356bbb4330bSLuigi Rizzo 
1357bbb4330bSLuigi Rizzo 	/* Can still get packets with rsvp_on = 0 if there is a local member
1358bbb4330bSLuigi Rizzo 	 * of the group to which the RSVP packet is addressed.  But in this
1359bbb4330bSLuigi Rizzo 	 * case we want to throw the packet away.
1360bbb4330bSLuigi Rizzo 	 */
1361bbb4330bSLuigi Rizzo 
1362603724d3SBjoern A. Zeeb 	if (!V_rsvp_on) {
1363bbb4330bSLuigi Rizzo 		m_freem(m);
13648f5a8818SKevin Lo 		return (IPPROTO_DONE);
1365bbb4330bSLuigi Rizzo 	}
1366bbb4330bSLuigi Rizzo 
1367603724d3SBjoern A. Zeeb 	if (V_ip_rsvpd != NULL) {
13688f5a8818SKevin Lo 		*mp = m;
13698f5a8818SKevin Lo 		rip_input(mp, offp, proto);
13708f5a8818SKevin Lo 		return (IPPROTO_DONE);
1371bbb4330bSLuigi Rizzo 	}
1372bbb4330bSLuigi Rizzo 	/* Drop the packet */
1373bbb4330bSLuigi Rizzo 	m_freem(m);
13748f5a8818SKevin Lo 	return (IPPROTO_DONE);
1375bbb4330bSLuigi Rizzo }
1376