xref: /freebsd/sys/netinet/ip_input.c (revision c3bef61e584084a8f86fba71cb344f15fc20491c)
1c398230bSWarner Losh /*-
2df8bae1dSRodney W. Grimes  * Copyright (c) 1982, 1986, 1988, 1993
3df8bae1dSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
4df8bae1dSRodney W. Grimes  *
5df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
6df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
7df8bae1dSRodney W. Grimes  * are met:
8df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
9df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
10df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
11df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
12df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
13df8bae1dSRodney W. Grimes  * 4. Neither the name of the University nor the names of its contributors
14df8bae1dSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
15df8bae1dSRodney W. Grimes  *    without specific prior written permission.
16df8bae1dSRodney W. Grimes  *
17df8bae1dSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20df8bae1dSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
28df8bae1dSRodney W. Grimes  *
29df8bae1dSRodney W. Grimes  *	@(#)ip_input.c	8.2 (Berkeley) 1/4/94
30df8bae1dSRodney W. Grimes  */
31df8bae1dSRodney W. Grimes 
324b421e2dSMike Silbersack #include <sys/cdefs.h>
334b421e2dSMike Silbersack __FBSDID("$FreeBSD$");
344b421e2dSMike Silbersack 
350ac40133SBrian Somers #include "opt_bootp.h"
3627108a15SDag-Erling Smørgrav #include "opt_ipstealth.h"
376a800098SYoshinobu Inoue #include "opt_ipsec.h"
3833553d6eSBjoern A. Zeeb #include "opt_route.h"
39b8bc95cdSAdrian Chadd #include "opt_rss.h"
4074a9466cSGary Palmer 
41df8bae1dSRodney W. Grimes #include <sys/param.h>
42df8bae1dSRodney W. Grimes #include <sys/systm.h>
43ef91a976SAndrey V. Elsukov #include <sys/hhook.h>
44df8bae1dSRodney W. Grimes #include <sys/mbuf.h>
45b715f178SLuigi Rizzo #include <sys/malloc.h>
46df8bae1dSRodney W. Grimes #include <sys/domain.h>
47df8bae1dSRodney W. Grimes #include <sys/protosw.h>
48df8bae1dSRodney W. Grimes #include <sys/socket.h>
49df8bae1dSRodney W. Grimes #include <sys/time.h>
50df8bae1dSRodney W. Grimes #include <sys/kernel.h>
51385195c0SMarko Zec #include <sys/lock.h>
52cc0a3c8cSAndrey V. Elsukov #include <sys/rmlock.h>
53385195c0SMarko Zec #include <sys/rwlock.h>
5457f60867SMark Johnston #include <sys/sdt.h>
551025071fSGarrett Wollman #include <sys/syslog.h>
56b5e8ce9fSBruce Evans #include <sys/sysctl.h>
57df8bae1dSRodney W. Grimes 
58c85540ddSAndrey A. Chernov #include <net/pfil.h>
59df8bae1dSRodney W. Grimes #include <net/if.h>
609494d596SBrooks Davis #include <net/if_types.h>
61d314ad7bSJulian Elischer #include <net/if_var.h>
6282c23ebaSBill Fenner #include <net/if_dl.h>
63df8bae1dSRodney W. Grimes #include <net/route.h>
64748e0b0aSGarrett Wollman #include <net/netisr.h>
65b2bdc62aSAdrian Chadd #include <net/rss_config.h>
664b79449eSBjoern A. Zeeb #include <net/vnet.h>
67df8bae1dSRodney W. Grimes 
68df8bae1dSRodney W. Grimes #include <netinet/in.h>
6957f60867SMark Johnston #include <netinet/in_kdtrace.h>
70df8bae1dSRodney W. Grimes #include <netinet/in_systm.h>
71b5e8ce9fSBruce Evans #include <netinet/in_var.h>
72df8bae1dSRodney W. Grimes #include <netinet/ip.h>
73df8bae1dSRodney W. Grimes #include <netinet/in_pcb.h>
74df8bae1dSRodney W. Grimes #include <netinet/ip_var.h>
75eddfbb76SRobert Watson #include <netinet/ip_fw.h>
76df8bae1dSRodney W. Grimes #include <netinet/ip_icmp.h>
77ef39adf0SAndre Oppermann #include <netinet/ip_options.h>
7858938916SGarrett Wollman #include <machine/in_cksum.h>
79a9771948SGleb Smirnoff #include <netinet/ip_carp.h>
80b2630c29SGeorge V. Neville-Neil #ifdef IPSEC
811dfcf0d2SAndre Oppermann #include <netinet/ip_ipsec.h>
8233872124SGeorge V. Neville-Neil #include <netipsec/ipsec.h>
8333872124SGeorge V. Neville-Neil #include <netipsec/key.h>
84b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */
85b8bc95cdSAdrian Chadd #include <netinet/in_rss.h>
86df8bae1dSRodney W. Grimes 
87f0068c4aSGarrett Wollman #include <sys/socketvar.h>
886ddbf1e2SGary Palmer 
89aed55708SRobert Watson #include <security/mac/mac_framework.h>
90aed55708SRobert Watson 
91d2035ffbSEd Maste #ifdef CTASSERT
92d2035ffbSEd Maste CTASSERT(sizeof(struct ip) == 20);
93d2035ffbSEd Maste #endif
94d2035ffbSEd Maste 
951dbefcc0SGleb Smirnoff /* IP reassembly functions are defined in ip_reass.c. */
96843b0e57SXin LI extern void ipreass_init(void);
97843b0e57SXin LI extern void ipreass_drain(void);
98843b0e57SXin LI extern void ipreass_slowtimo(void);
991dbefcc0SGleb Smirnoff #ifdef VIMAGE
100843b0e57SXin LI extern void ipreass_destroy(void);
1011dbefcc0SGleb Smirnoff #endif
1021dbefcc0SGleb Smirnoff 
103cc0a3c8cSAndrey V. Elsukov struct rmlock in_ifaddr_lock;
104cc0a3c8cSAndrey V. Elsukov RM_SYSINIT(in_ifaddr_lock, &in_ifaddr_lock, "in_ifaddr_lock");
105f0068c4aSGarrett Wollman 
10682cea7e6SBjoern A. Zeeb VNET_DEFINE(int, rsvp_on);
10782cea7e6SBjoern A. Zeeb 
10882cea7e6SBjoern A. Zeeb VNET_DEFINE(int, ipforwarding);
1096df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_VNET | CTLFLAG_RW,
110eddfbb76SRobert Watson     &VNET_NAME(ipforwarding), 0,
1118b615593SMarko Zec     "Enable IP forwarding between interfaces");
1120312fbe9SPoul-Henning Kamp 
1133e288e62SDimitry Andric static VNET_DEFINE(int, ipsendredirects) = 1;	/* XXX */
11482cea7e6SBjoern A. Zeeb #define	V_ipsendredirects	VNET(ipsendredirects)
1156df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_VNET | CTLFLAG_RW,
116eddfbb76SRobert Watson     &VNET_NAME(ipsendredirects), 0,
1178b615593SMarko Zec     "Enable sending IP redirects");
1180312fbe9SPoul-Henning Kamp 
119823db0e9SDon Lewis /*
120823db0e9SDon Lewis  * XXX - Setting ip_checkinterface mostly implements the receive side of
121823db0e9SDon Lewis  * the Strong ES model described in RFC 1122, but since the routing table
122a8f12100SDon Lewis  * and transmit implementation do not implement the Strong ES model,
123823db0e9SDon Lewis  * setting this to 1 results in an odd hybrid.
1243f67c834SDon Lewis  *
125a8f12100SDon Lewis  * XXX - ip_checkinterface currently must be disabled if you use ipnat
126a8f12100SDon Lewis  * to translate the destination address to another local interface.
1273f67c834SDon Lewis  *
1283f67c834SDon Lewis  * XXX - ip_checkinterface must be disabled if you add IP aliases
1293f67c834SDon Lewis  * to the loopback interface instead of the interface where the
1303f67c834SDon Lewis  * packets for those addresses are received.
131823db0e9SDon Lewis  */
1323e288e62SDimitry Andric static VNET_DEFINE(int, ip_checkinterface);
13382cea7e6SBjoern A. Zeeb #define	V_ip_checkinterface	VNET(ip_checkinterface)
1346df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_VNET | CTLFLAG_RW,
135eddfbb76SRobert Watson     &VNET_NAME(ip_checkinterface), 0,
1368b615593SMarko Zec     "Verify packet arrives on correct interface");
137b3e95d4eSJonathan Lemon 
1380b4b0b0fSJulian Elischer VNET_DEFINE(struct pfil_head, inet_pfil_hook);	/* Packet filter hooks */
139df8bae1dSRodney W. Grimes 
140d4b5cae4SRobert Watson static struct netisr_handler ip_nh = {
141d4b5cae4SRobert Watson 	.nh_name = "ip",
142d4b5cae4SRobert Watson 	.nh_handler = ip_input,
143d4b5cae4SRobert Watson 	.nh_proto = NETISR_IP,
144b8bc95cdSAdrian Chadd #ifdef	RSS
1452527ccadSAdrian Chadd 	.nh_m2cpuid = rss_soft_m2cpuid_v4,
146b8bc95cdSAdrian Chadd 	.nh_policy = NETISR_POLICY_CPU,
147b8bc95cdSAdrian Chadd 	.nh_dispatch = NETISR_DISPATCH_HYBRID,
148b8bc95cdSAdrian Chadd #else
149d4b5cae4SRobert Watson 	.nh_policy = NETISR_POLICY_FLOW,
150b8bc95cdSAdrian Chadd #endif
151d4b5cae4SRobert Watson };
152ca925d9cSJonathan Lemon 
153b8bc95cdSAdrian Chadd #ifdef	RSS
154b8bc95cdSAdrian Chadd /*
155b8bc95cdSAdrian Chadd  * Directly dispatched frames are currently assumed
156b8bc95cdSAdrian Chadd  * to have a flowid already calculated.
157b8bc95cdSAdrian Chadd  *
158b8bc95cdSAdrian Chadd  * It should likely have something that assert it
159b8bc95cdSAdrian Chadd  * actually has valid flow details.
160b8bc95cdSAdrian Chadd  */
161b8bc95cdSAdrian Chadd static struct netisr_handler ip_direct_nh = {
162b8bc95cdSAdrian Chadd 	.nh_name = "ip_direct",
163b8bc95cdSAdrian Chadd 	.nh_handler = ip_direct_input,
164b8bc95cdSAdrian Chadd 	.nh_proto = NETISR_IP_DIRECT,
165499baf0aSAdrian Chadd 	.nh_m2cpuid = rss_soft_m2cpuid_v4,
166b8bc95cdSAdrian Chadd 	.nh_policy = NETISR_POLICY_CPU,
167b8bc95cdSAdrian Chadd 	.nh_dispatch = NETISR_DISPATCH_HYBRID,
168b8bc95cdSAdrian Chadd };
169b8bc95cdSAdrian Chadd #endif
170b8bc95cdSAdrian Chadd 
171df8bae1dSRodney W. Grimes extern	struct domain inetdomain;
172f0ffb944SJulian Elischer extern	struct protosw inetsw[];
173df8bae1dSRodney W. Grimes u_char	ip_protox[IPPROTO_MAX];
17482cea7e6SBjoern A. Zeeb VNET_DEFINE(struct in_ifaddrhead, in_ifaddrhead);  /* first inet address */
17582cea7e6SBjoern A. Zeeb VNET_DEFINE(struct in_ifaddrhashhead *, in_ifaddrhashtbl); /* inet addr hash table  */
17682cea7e6SBjoern A. Zeeb VNET_DEFINE(u_long, in_ifaddrhmask);		/* mask for hash table */
177ca925d9cSJonathan Lemon 
1780312fbe9SPoul-Henning Kamp #ifdef IPCTL_DEFMTU
1790312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW,
1803d177f46SBill Fumerola     &ip_mtu, 0, "Default MTU");
1810312fbe9SPoul-Henning Kamp #endif
1820312fbe9SPoul-Henning Kamp 
1831b968362SDag-Erling Smørgrav #ifdef IPSTEALTH
18482cea7e6SBjoern A. Zeeb VNET_DEFINE(int, ipstealth);
1856df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_VNET | CTLFLAG_RW,
186eddfbb76SRobert Watson     &VNET_NAME(ipstealth), 0,
187eddfbb76SRobert Watson     "IP stealth mode, no TTL decrementation on forwarding");
1881b968362SDag-Erling Smørgrav #endif
189eddfbb76SRobert Watson 
190315e3e38SRobert Watson /*
1915da0521fSAndrey V. Elsukov  * IP statistics are stored in the "array" of counter(9)s.
1925923c293SGleb Smirnoff  */
1935da0521fSAndrey V. Elsukov VNET_PCPUSTAT_DEFINE(struct ipstat, ipstat);
1945da0521fSAndrey V. Elsukov VNET_PCPUSTAT_SYSINIT(ipstat);
1955da0521fSAndrey V. Elsukov SYSCTL_VNET_PCPUSTAT(_net_inet_ip, IPCTL_STATS, stats, struct ipstat, ipstat,
1965da0521fSAndrey V. Elsukov     "IP statistics (struct ipstat, netinet/ip_var.h)");
1975923c293SGleb Smirnoff 
1985923c293SGleb Smirnoff #ifdef VIMAGE
1995da0521fSAndrey V. Elsukov VNET_PCPUSTAT_SYSUNINIT(ipstat);
2005923c293SGleb Smirnoff #endif /* VIMAGE */
2015923c293SGleb Smirnoff 
2025923c293SGleb Smirnoff /*
203315e3e38SRobert Watson  * Kernel module interface for updating ipstat.  The argument is an index
2045923c293SGleb Smirnoff  * into ipstat treated as an array.
205315e3e38SRobert Watson  */
206315e3e38SRobert Watson void
207315e3e38SRobert Watson kmod_ipstat_inc(int statnum)
208315e3e38SRobert Watson {
209315e3e38SRobert Watson 
2105da0521fSAndrey V. Elsukov 	counter_u64_add(VNET(ipstat)[statnum], 1);
211315e3e38SRobert Watson }
212315e3e38SRobert Watson 
213315e3e38SRobert Watson void
214315e3e38SRobert Watson kmod_ipstat_dec(int statnum)
215315e3e38SRobert Watson {
216315e3e38SRobert Watson 
2175da0521fSAndrey V. Elsukov 	counter_u64_add(VNET(ipstat)[statnum], -1);
218315e3e38SRobert Watson }
219315e3e38SRobert Watson 
220d4b5cae4SRobert Watson static int
221d4b5cae4SRobert Watson sysctl_netinet_intr_queue_maxlen(SYSCTL_HANDLER_ARGS)
222d4b5cae4SRobert Watson {
223d4b5cae4SRobert Watson 	int error, qlimit;
224d4b5cae4SRobert Watson 
225d4b5cae4SRobert Watson 	netisr_getqlimit(&ip_nh, &qlimit);
226d4b5cae4SRobert Watson 	error = sysctl_handle_int(oidp, &qlimit, 0, req);
227d4b5cae4SRobert Watson 	if (error || !req->newptr)
228d4b5cae4SRobert Watson 		return (error);
229d4b5cae4SRobert Watson 	if (qlimit < 1)
230d4b5cae4SRobert Watson 		return (EINVAL);
231d4b5cae4SRobert Watson 	return (netisr_setqlimit(&ip_nh, qlimit));
232d4b5cae4SRobert Watson }
233d4b5cae4SRobert Watson SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen,
234d4b5cae4SRobert Watson     CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet_intr_queue_maxlen, "I",
235d4b5cae4SRobert Watson     "Maximum size of the IP input queue");
236d4b5cae4SRobert Watson 
237d4b5cae4SRobert Watson static int
238d4b5cae4SRobert Watson sysctl_netinet_intr_queue_drops(SYSCTL_HANDLER_ARGS)
239d4b5cae4SRobert Watson {
240d4b5cae4SRobert Watson 	u_int64_t qdrops_long;
241d4b5cae4SRobert Watson 	int error, qdrops;
242d4b5cae4SRobert Watson 
243d4b5cae4SRobert Watson 	netisr_getqdrops(&ip_nh, &qdrops_long);
244d4b5cae4SRobert Watson 	qdrops = qdrops_long;
245d4b5cae4SRobert Watson 	error = sysctl_handle_int(oidp, &qdrops, 0, req);
246d4b5cae4SRobert Watson 	if (error || !req->newptr)
247d4b5cae4SRobert Watson 		return (error);
248d4b5cae4SRobert Watson 	if (qdrops != 0)
249d4b5cae4SRobert Watson 		return (EINVAL);
250d4b5cae4SRobert Watson 	netisr_clearqdrops(&ip_nh);
251d4b5cae4SRobert Watson 	return (0);
252d4b5cae4SRobert Watson }
253d4b5cae4SRobert Watson 
254d4b5cae4SRobert Watson SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops,
255d4b5cae4SRobert Watson     CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_netinet_intr_queue_drops, "I",
256d4b5cae4SRobert Watson     "Number of packets dropped from the IP input queue");
257d4b5cae4SRobert Watson 
258b8bc95cdSAdrian Chadd #ifdef	RSS
259b8bc95cdSAdrian Chadd static int
260b8bc95cdSAdrian Chadd sysctl_netinet_intr_direct_queue_maxlen(SYSCTL_HANDLER_ARGS)
261b8bc95cdSAdrian Chadd {
262b8bc95cdSAdrian Chadd 	int error, qlimit;
263b8bc95cdSAdrian Chadd 
264b8bc95cdSAdrian Chadd 	netisr_getqlimit(&ip_direct_nh, &qlimit);
265b8bc95cdSAdrian Chadd 	error = sysctl_handle_int(oidp, &qlimit, 0, req);
266b8bc95cdSAdrian Chadd 	if (error || !req->newptr)
267b8bc95cdSAdrian Chadd 		return (error);
268b8bc95cdSAdrian Chadd 	if (qlimit < 1)
269b8bc95cdSAdrian Chadd 		return (EINVAL);
270b8bc95cdSAdrian Chadd 	return (netisr_setqlimit(&ip_direct_nh, qlimit));
271b8bc95cdSAdrian Chadd }
272b8bc95cdSAdrian Chadd SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_direct_queue_maxlen,
273b8bc95cdSAdrian Chadd     CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet_intr_direct_queue_maxlen, "I",
274b8bc95cdSAdrian Chadd     "Maximum size of the IP direct input queue");
275b8bc95cdSAdrian Chadd 
276b8bc95cdSAdrian Chadd static int
277b8bc95cdSAdrian Chadd sysctl_netinet_intr_direct_queue_drops(SYSCTL_HANDLER_ARGS)
278b8bc95cdSAdrian Chadd {
279b8bc95cdSAdrian Chadd 	u_int64_t qdrops_long;
280b8bc95cdSAdrian Chadd 	int error, qdrops;
281b8bc95cdSAdrian Chadd 
282b8bc95cdSAdrian Chadd 	netisr_getqdrops(&ip_direct_nh, &qdrops_long);
283b8bc95cdSAdrian Chadd 	qdrops = qdrops_long;
284b8bc95cdSAdrian Chadd 	error = sysctl_handle_int(oidp, &qdrops, 0, req);
285b8bc95cdSAdrian Chadd 	if (error || !req->newptr)
286b8bc95cdSAdrian Chadd 		return (error);
287b8bc95cdSAdrian Chadd 	if (qdrops != 0)
288b8bc95cdSAdrian Chadd 		return (EINVAL);
289b8bc95cdSAdrian Chadd 	netisr_clearqdrops(&ip_direct_nh);
290b8bc95cdSAdrian Chadd 	return (0);
291b8bc95cdSAdrian Chadd }
292b8bc95cdSAdrian Chadd 
293b8bc95cdSAdrian Chadd SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_direct_queue_drops,
294b8bc95cdSAdrian Chadd     CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_netinet_intr_direct_queue_drops, "I",
295b8bc95cdSAdrian Chadd     "Number of packets dropped from the IP direct input queue");
296b8bc95cdSAdrian Chadd #endif	/* RSS */
297b8bc95cdSAdrian Chadd 
298df8bae1dSRodney W. Grimes /*
299df8bae1dSRodney W. Grimes  * IP initialization: fill in IP protocol switch table.
300df8bae1dSRodney W. Grimes  * All protocols not implemented in kernel go to raw IP protocol handler.
301df8bae1dSRodney W. Grimes  */
302df8bae1dSRodney W. Grimes void
303f2565d68SRobert Watson ip_init(void)
304df8bae1dSRodney W. Grimes {
305f2565d68SRobert Watson 	struct protosw *pr;
306f2565d68SRobert Watson 	int i;
307df8bae1dSRodney W. Grimes 
308603724d3SBjoern A. Zeeb 	TAILQ_INIT(&V_in_ifaddrhead);
309603724d3SBjoern A. Zeeb 	V_in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &V_in_ifaddrhmask);
3101ed81b73SMarko Zec 
3111ed81b73SMarko Zec 	/* Initialize IP reassembly queue. */
3121dbefcc0SGleb Smirnoff 	ipreass_init();
3131ed81b73SMarko Zec 
3140b4b0b0fSJulian Elischer 	/* Initialize packet filter hooks. */
3150b4b0b0fSJulian Elischer 	V_inet_pfil_hook.ph_type = PFIL_TYPE_AF;
3160b4b0b0fSJulian Elischer 	V_inet_pfil_hook.ph_af = AF_INET;
3170b4b0b0fSJulian Elischer 	if ((i = pfil_head_register(&V_inet_pfil_hook)) != 0)
3180b4b0b0fSJulian Elischer 		printf("%s: WARNING: unable to register pfil hook, "
3190b4b0b0fSJulian Elischer 			"error %d\n", __func__, i);
3200b4b0b0fSJulian Elischer 
321ef91a976SAndrey V. Elsukov 	if (hhook_head_register(HHOOK_TYPE_IPSEC_IN, AF_INET,
322ef91a976SAndrey V. Elsukov 	    &V_ipsec_hhh_in[HHOOK_IPSEC_INET],
323ef91a976SAndrey V. Elsukov 	    HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0)
324ef91a976SAndrey V. Elsukov 		printf("%s: WARNING: unable to register input helper hook\n",
325ef91a976SAndrey V. Elsukov 		    __func__);
326ef91a976SAndrey V. Elsukov 	if (hhook_head_register(HHOOK_TYPE_IPSEC_OUT, AF_INET,
327ef91a976SAndrey V. Elsukov 	    &V_ipsec_hhh_out[HHOOK_IPSEC_INET],
328ef91a976SAndrey V. Elsukov 	    HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0)
329ef91a976SAndrey V. Elsukov 		printf("%s: WARNING: unable to register output helper hook\n",
330ef91a976SAndrey V. Elsukov 		    __func__);
331ef91a976SAndrey V. Elsukov 
3321ed81b73SMarko Zec 	/* Skip initialization of globals for non-default instances. */
333484149deSBjoern A. Zeeb #ifdef VIMAGE
334484149deSBjoern A. Zeeb 	if (!IS_DEFAULT_VNET(curvnet)) {
335484149deSBjoern A. Zeeb 		netisr_register_vnet(&ip_nh);
336484149deSBjoern A. Zeeb #ifdef	RSS
337484149deSBjoern A. Zeeb 		netisr_register_vnet(&ip_direct_nh);
338484149deSBjoern A. Zeeb #endif
3391ed81b73SMarko Zec 		return;
340484149deSBjoern A. Zeeb 	}
341484149deSBjoern A. Zeeb #endif
3421ed81b73SMarko Zec 
343f0ffb944SJulian Elischer 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
34402410549SRobert Watson 	if (pr == NULL)
345db09bef3SAndre Oppermann 		panic("ip_init: PF_INET not found");
346db09bef3SAndre Oppermann 
347db09bef3SAndre Oppermann 	/* Initialize the entire ip_protox[] array to IPPROTO_RAW. */
348df8bae1dSRodney W. Grimes 	for (i = 0; i < IPPROTO_MAX; i++)
349df8bae1dSRodney W. Grimes 		ip_protox[i] = pr - inetsw;
350db09bef3SAndre Oppermann 	/*
351db09bef3SAndre Oppermann 	 * Cycle through IP protocols and put them into the appropriate place
352db09bef3SAndre Oppermann 	 * in ip_protox[].
353db09bef3SAndre Oppermann 	 */
354f0ffb944SJulian Elischer 	for (pr = inetdomain.dom_protosw;
355f0ffb944SJulian Elischer 	    pr < inetdomain.dom_protoswNPROTOSW; pr++)
356df8bae1dSRodney W. Grimes 		if (pr->pr_domain->dom_family == PF_INET &&
357db09bef3SAndre Oppermann 		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) {
358db09bef3SAndre Oppermann 			/* Be careful to only index valid IP protocols. */
359db77984cSSam Leffler 			if (pr->pr_protocol < IPPROTO_MAX)
360df8bae1dSRodney W. Grimes 				ip_protox[pr->pr_protocol] = pr - inetsw;
361db09bef3SAndre Oppermann 		}
362194a213eSAndrey A. Chernov 
363d4b5cae4SRobert Watson 	netisr_register(&ip_nh);
364b8bc95cdSAdrian Chadd #ifdef	RSS
365b8bc95cdSAdrian Chadd 	netisr_register(&ip_direct_nh);
366b8bc95cdSAdrian Chadd #endif
367df8bae1dSRodney W. Grimes }
368df8bae1dSRodney W. Grimes 
3699802380eSBjoern A. Zeeb #ifdef VIMAGE
3703f58662dSBjoern A. Zeeb static void
3713f58662dSBjoern A. Zeeb ip_destroy(void *unused __unused)
3729802380eSBjoern A. Zeeb {
37389856f7eSBjoern A. Zeeb 	struct ifnet *ifp;
374ef91a976SAndrey V. Elsukov 	int error;
3754d3dfd45SMikolaj Golub 
376484149deSBjoern A. Zeeb #ifdef	RSS
377484149deSBjoern A. Zeeb 	netisr_unregister_vnet(&ip_direct_nh);
378484149deSBjoern A. Zeeb #endif
379484149deSBjoern A. Zeeb 	netisr_unregister_vnet(&ip_nh);
380484149deSBjoern A. Zeeb 
381ef91a976SAndrey V. Elsukov 	if ((error = pfil_head_unregister(&V_inet_pfil_hook)) != 0)
3824d3dfd45SMikolaj Golub 		printf("%s: WARNING: unable to unregister pfil hook, "
383ef91a976SAndrey V. Elsukov 		    "error %d\n", __func__, error);
3849802380eSBjoern A. Zeeb 
385ef91a976SAndrey V. Elsukov 	error = hhook_head_deregister(V_ipsec_hhh_in[HHOOK_IPSEC_INET]);
386ef91a976SAndrey V. Elsukov 	if (error != 0) {
387ef91a976SAndrey V. Elsukov 		printf("%s: WARNING: unable to deregister input helper hook "
388ef91a976SAndrey V. Elsukov 		    "type HHOOK_TYPE_IPSEC_IN, id HHOOK_IPSEC_INET: "
389ef91a976SAndrey V. Elsukov 		    "error %d returned\n", __func__, error);
390ef91a976SAndrey V. Elsukov 	}
391ef91a976SAndrey V. Elsukov 	error = hhook_head_deregister(V_ipsec_hhh_out[HHOOK_IPSEC_INET]);
392ef91a976SAndrey V. Elsukov 	if (error != 0) {
393ef91a976SAndrey V. Elsukov 		printf("%s: WARNING: unable to deregister output helper hook "
394ef91a976SAndrey V. Elsukov 		    "type HHOOK_TYPE_IPSEC_OUT, id HHOOK_IPSEC_INET: "
395ef91a976SAndrey V. Elsukov 		    "error %d returned\n", __func__, error);
396ef91a976SAndrey V. Elsukov 	}
39789856f7eSBjoern A. Zeeb 
39889856f7eSBjoern A. Zeeb 	/* Remove the IPv4 addresses from all interfaces. */
39989856f7eSBjoern A. Zeeb 	in_ifscrub_all();
40089856f7eSBjoern A. Zeeb 
40189856f7eSBjoern A. Zeeb 	/* Make sure the IPv4 routes are gone as well. */
40289856f7eSBjoern A. Zeeb 	IFNET_RLOCK();
40389856f7eSBjoern A. Zeeb 	TAILQ_FOREACH(ifp, &V_ifnet, if_link)
40489856f7eSBjoern A. Zeeb 		rt_flushifroutes_af(ifp, AF_INET);
40589856f7eSBjoern A. Zeeb 	IFNET_RUNLOCK();
4069802380eSBjoern A. Zeeb 
407e3c2c634SGleb Smirnoff 	/* Destroy IP reassembly queue. */
4081dbefcc0SGleb Smirnoff 	ipreass_destroy();
40989856f7eSBjoern A. Zeeb 
41089856f7eSBjoern A. Zeeb 	/* Cleanup in_ifaddr hash table; should be empty. */
41189856f7eSBjoern A. Zeeb 	hashdestroy(V_in_ifaddrhashtbl, M_IFADDR, V_in_ifaddrhmask);
4129802380eSBjoern A. Zeeb }
4133f58662dSBjoern A. Zeeb 
4143f58662dSBjoern A. Zeeb VNET_SYSUNINIT(ip, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, ip_destroy, NULL);
4159802380eSBjoern A. Zeeb #endif
4169802380eSBjoern A. Zeeb 
417b8bc95cdSAdrian Chadd #ifdef	RSS
418b8bc95cdSAdrian Chadd /*
419b8bc95cdSAdrian Chadd  * IP direct input routine.
420b8bc95cdSAdrian Chadd  *
421b8bc95cdSAdrian Chadd  * This is called when reinjecting completed fragments where
422b8bc95cdSAdrian Chadd  * all of the previous checking and book-keeping has been done.
423b8bc95cdSAdrian Chadd  */
424b8bc95cdSAdrian Chadd void
425b8bc95cdSAdrian Chadd ip_direct_input(struct mbuf *m)
426b8bc95cdSAdrian Chadd {
427b8bc95cdSAdrian Chadd 	struct ip *ip;
428b8bc95cdSAdrian Chadd 	int hlen;
429b8bc95cdSAdrian Chadd 
430b8bc95cdSAdrian Chadd 	ip = mtod(m, struct ip *);
431b8bc95cdSAdrian Chadd 	hlen = ip->ip_hl << 2;
432b8bc95cdSAdrian Chadd 
433b8bc95cdSAdrian Chadd 	IPSTAT_INC(ips_delivered);
434b8bc95cdSAdrian Chadd 	(*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p);
435b8bc95cdSAdrian Chadd 	return;
436b8bc95cdSAdrian Chadd }
437b8bc95cdSAdrian Chadd #endif
438b8bc95cdSAdrian Chadd 
4394d2e3692SLuigi Rizzo /*
440df8bae1dSRodney W. Grimes  * Ip input routine.  Checksum and byte swap header.  If fragmented
441df8bae1dSRodney W. Grimes  * try to reassemble.  Process options.  Pass to next level.
442df8bae1dSRodney W. Grimes  */
443c67b1d17SGarrett Wollman void
444c67b1d17SGarrett Wollman ip_input(struct mbuf *m)
445df8bae1dSRodney W. Grimes {
4469188b4a1SAndre Oppermann 	struct ip *ip = NULL;
4475da9f8faSJosef Karthauser 	struct in_ifaddr *ia = NULL;
448ca925d9cSJonathan Lemon 	struct ifaddr *ifa;
4490aade26eSRobert Watson 	struct ifnet *ifp;
4509b932e9eSAndre Oppermann 	int    checkif, hlen = 0;
45121d172a3SGleb Smirnoff 	uint16_t sum, ip_len;
45202c1c707SAndre Oppermann 	int dchg = 0;				/* dest changed after fw */
453f51f805fSSam Leffler 	struct in_addr odst;			/* original dst address */
454b715f178SLuigi Rizzo 
455fe584538SDag-Erling Smørgrav 	M_ASSERTPKTHDR(m);
456db40007dSAndrew R. Reiter 
457ac9d7e26SMax Laier 	if (m->m_flags & M_FASTFWD_OURS) {
45876ff6dcfSAndre Oppermann 		m->m_flags &= ~M_FASTFWD_OURS;
45976ff6dcfSAndre Oppermann 		/* Set up some basics that will be used later. */
4602b25acc1SLuigi Rizzo 		ip = mtod(m, struct ip *);
46153be11f6SPoul-Henning Kamp 		hlen = ip->ip_hl << 2;
4628f134647SGleb Smirnoff 		ip_len = ntohs(ip->ip_len);
4639b932e9eSAndre Oppermann 		goto ours;
4642b25acc1SLuigi Rizzo 	}
4652b25acc1SLuigi Rizzo 
46686425c62SRobert Watson 	IPSTAT_INC(ips_total);
46758938916SGarrett Wollman 
46858938916SGarrett Wollman 	if (m->m_pkthdr.len < sizeof(struct ip))
46958938916SGarrett Wollman 		goto tooshort;
47058938916SGarrett Wollman 
471df8bae1dSRodney W. Grimes 	if (m->m_len < sizeof (struct ip) &&
4720b17fba7SAndre Oppermann 	    (m = m_pullup(m, sizeof (struct ip))) == NULL) {
47386425c62SRobert Watson 		IPSTAT_INC(ips_toosmall);
474c67b1d17SGarrett Wollman 		return;
475df8bae1dSRodney W. Grimes 	}
476df8bae1dSRodney W. Grimes 	ip = mtod(m, struct ip *);
47758938916SGarrett Wollman 
47853be11f6SPoul-Henning Kamp 	if (ip->ip_v != IPVERSION) {
47986425c62SRobert Watson 		IPSTAT_INC(ips_badvers);
480df8bae1dSRodney W. Grimes 		goto bad;
481df8bae1dSRodney W. Grimes 	}
48258938916SGarrett Wollman 
48353be11f6SPoul-Henning Kamp 	hlen = ip->ip_hl << 2;
484df8bae1dSRodney W. Grimes 	if (hlen < sizeof(struct ip)) {	/* minimum header length */
48586425c62SRobert Watson 		IPSTAT_INC(ips_badhlen);
486df8bae1dSRodney W. Grimes 		goto bad;
487df8bae1dSRodney W. Grimes 	}
488df8bae1dSRodney W. Grimes 	if (hlen > m->m_len) {
4890b17fba7SAndre Oppermann 		if ((m = m_pullup(m, hlen)) == NULL) {
49086425c62SRobert Watson 			IPSTAT_INC(ips_badhlen);
491c67b1d17SGarrett Wollman 			return;
492df8bae1dSRodney W. Grimes 		}
493df8bae1dSRodney W. Grimes 		ip = mtod(m, struct ip *);
494df8bae1dSRodney W. Grimes 	}
49533841545SHajimu UMEMOTO 
49657f60867SMark Johnston 	IP_PROBE(receive, NULL, NULL, ip, m->m_pkthdr.rcvif, ip, NULL);
49757f60867SMark Johnston 
49833841545SHajimu UMEMOTO 	/* 127/8 must not appear on wire - RFC1122 */
4990aade26eSRobert Watson 	ifp = m->m_pkthdr.rcvif;
50033841545SHajimu UMEMOTO 	if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
50133841545SHajimu UMEMOTO 	    (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
5020aade26eSRobert Watson 		if ((ifp->if_flags & IFF_LOOPBACK) == 0) {
50386425c62SRobert Watson 			IPSTAT_INC(ips_badaddr);
50433841545SHajimu UMEMOTO 			goto bad;
50533841545SHajimu UMEMOTO 		}
50633841545SHajimu UMEMOTO 	}
50733841545SHajimu UMEMOTO 
508db4f9cc7SJonathan Lemon 	if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
509db4f9cc7SJonathan Lemon 		sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
510db4f9cc7SJonathan Lemon 	} else {
51158938916SGarrett Wollman 		if (hlen == sizeof(struct ip)) {
51247c861ecSBrian Somers 			sum = in_cksum_hdr(ip);
51358938916SGarrett Wollman 		} else {
51447c861ecSBrian Somers 			sum = in_cksum(m, hlen);
51558938916SGarrett Wollman 		}
516db4f9cc7SJonathan Lemon 	}
51747c861ecSBrian Somers 	if (sum) {
51886425c62SRobert Watson 		IPSTAT_INC(ips_badsum);
519df8bae1dSRodney W. Grimes 		goto bad;
520df8bae1dSRodney W. Grimes 	}
521df8bae1dSRodney W. Grimes 
52202b199f1SMax Laier #ifdef ALTQ
52302b199f1SMax Laier 	if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0)
52402b199f1SMax Laier 		/* packet is dropped by traffic conditioner */
52502b199f1SMax Laier 		return;
52602b199f1SMax Laier #endif
52702b199f1SMax Laier 
52821d172a3SGleb Smirnoff 	ip_len = ntohs(ip->ip_len);
52921d172a3SGleb Smirnoff 	if (ip_len < hlen) {
53086425c62SRobert Watson 		IPSTAT_INC(ips_badlen);
531df8bae1dSRodney W. Grimes 		goto bad;
532df8bae1dSRodney W. Grimes 	}
533df8bae1dSRodney W. Grimes 
534df8bae1dSRodney W. Grimes 	/*
535df8bae1dSRodney W. Grimes 	 * Check that the amount of data in the buffers
536df8bae1dSRodney W. Grimes 	 * is as at least much as the IP header would have us expect.
537df8bae1dSRodney W. Grimes 	 * Trim mbufs if longer than we expect.
538df8bae1dSRodney W. Grimes 	 * Drop packet if shorter than we expect.
539df8bae1dSRodney W. Grimes 	 */
54021d172a3SGleb Smirnoff 	if (m->m_pkthdr.len < ip_len) {
54158938916SGarrett Wollman tooshort:
54286425c62SRobert Watson 		IPSTAT_INC(ips_tooshort);
543df8bae1dSRodney W. Grimes 		goto bad;
544df8bae1dSRodney W. Grimes 	}
54521d172a3SGleb Smirnoff 	if (m->m_pkthdr.len > ip_len) {
546df8bae1dSRodney W. Grimes 		if (m->m_len == m->m_pkthdr.len) {
54721d172a3SGleb Smirnoff 			m->m_len = ip_len;
54821d172a3SGleb Smirnoff 			m->m_pkthdr.len = ip_len;
549df8bae1dSRodney W. Grimes 		} else
55021d172a3SGleb Smirnoff 			m_adj(m, ip_len - m->m_pkthdr.len);
551df8bae1dSRodney W. Grimes 	}
552b8bc95cdSAdrian Chadd 
55333872124SGeorge V. Neville-Neil 	/* Try to forward the packet, but if we fail continue */
554b2630c29SGeorge V. Neville-Neil #ifdef IPSEC
55533872124SGeorge V. Neville-Neil 	/* For now we do not handle IPSEC in tryforward. */
55633872124SGeorge V. Neville-Neil 	if (!key_havesp(IPSEC_DIR_INBOUND) && !key_havesp(IPSEC_DIR_OUTBOUND) &&
55733872124SGeorge V. Neville-Neil 	    (V_ipforwarding == 1))
55833872124SGeorge V. Neville-Neil 		if (ip_tryforward(m) == NULL)
55933872124SGeorge V. Neville-Neil 			return;
56014dd6717SSam Leffler 	/*
561ffe8cd7bSBjoern A. Zeeb 	 * Bypass packet filtering for packets previously handled by IPsec.
56214dd6717SSam Leffler 	 */
563cc977adcSBjoern A. Zeeb 	if (ip_ipsec_filtertunnel(m))
564c21fd232SAndre Oppermann 		goto passin;
56533872124SGeorge V. Neville-Neil #else
56633872124SGeorge V. Neville-Neil 	if (V_ipforwarding == 1)
56733872124SGeorge V. Neville-Neil 		if (ip_tryforward(m) == NULL)
56833872124SGeorge V. Neville-Neil 			return;
569b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */
5703f67c834SDon Lewis 
571c4ac87eaSDarren Reed 	/*
572134ea224SSam Leffler 	 * Run through list of hooks for input packets.
573f51f805fSSam Leffler 	 *
574f51f805fSSam Leffler 	 * NB: Beware of the destination address changing (e.g.
575f51f805fSSam Leffler 	 *     by NAT rewriting).  When this happens, tell
576f51f805fSSam Leffler 	 *     ip_forward to do the right thing.
577c4ac87eaSDarren Reed 	 */
578c21fd232SAndre Oppermann 
579c21fd232SAndre Oppermann 	/* Jump over all PFIL processing if hooks are not active. */
5800b4b0b0fSJulian Elischer 	if (!PFIL_HOOKED(&V_inet_pfil_hook))
581c21fd232SAndre Oppermann 		goto passin;
582c21fd232SAndre Oppermann 
583f51f805fSSam Leffler 	odst = ip->ip_dst;
5840b4b0b0fSJulian Elischer 	if (pfil_run_hooks(&V_inet_pfil_hook, &m, ifp, PFIL_IN, NULL) != 0)
585beec8214SDarren Reed 		return;
586134ea224SSam Leffler 	if (m == NULL)			/* consumed by filter */
587c4ac87eaSDarren Reed 		return;
5889b932e9eSAndre Oppermann 
589c4ac87eaSDarren Reed 	ip = mtod(m, struct ip *);
59002c1c707SAndre Oppermann 	dchg = (odst.s_addr != ip->ip_dst.s_addr);
5910aade26eSRobert Watson 	ifp = m->m_pkthdr.rcvif;
5929b932e9eSAndre Oppermann 
5939b932e9eSAndre Oppermann 	if (m->m_flags & M_FASTFWD_OURS) {
5949b932e9eSAndre Oppermann 		m->m_flags &= ~M_FASTFWD_OURS;
5959b932e9eSAndre Oppermann 		goto ours;
5969b932e9eSAndre Oppermann 	}
597ffdbf9daSAndrey V. Elsukov 	if (m->m_flags & M_IP_NEXTHOP) {
598de89d74bSLuiz Otavio O Souza 		if (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL) {
599099dd043SAndre Oppermann 			/*
600ffdbf9daSAndrey V. Elsukov 			 * Directly ship the packet on.  This allows
601ffdbf9daSAndrey V. Elsukov 			 * forwarding packets originally destined to us
602ffdbf9daSAndrey V. Elsukov 			 * to some other directly connected host.
603099dd043SAndre Oppermann 			 */
604ffdbf9daSAndrey V. Elsukov 			ip_forward(m, 1);
605099dd043SAndre Oppermann 			return;
606099dd043SAndre Oppermann 		}
607ffdbf9daSAndrey V. Elsukov 	}
608c21fd232SAndre Oppermann passin:
60921d172a3SGleb Smirnoff 
61021d172a3SGleb Smirnoff 	/*
611df8bae1dSRodney W. Grimes 	 * Process options and, if not destined for us,
612df8bae1dSRodney W. Grimes 	 * ship it on.  ip_dooptions returns 1 when an
613df8bae1dSRodney W. Grimes 	 * error was detected (causing an icmp message
614df8bae1dSRodney W. Grimes 	 * to be sent and the original packet to be freed).
615df8bae1dSRodney W. Grimes 	 */
6169b932e9eSAndre Oppermann 	if (hlen > sizeof (struct ip) && ip_dooptions(m, 0))
617c67b1d17SGarrett Wollman 		return;
618df8bae1dSRodney W. Grimes 
619f0068c4aSGarrett Wollman         /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no
620f0068c4aSGarrett Wollman          * matter if it is destined to another node, or whether it is
621f0068c4aSGarrett Wollman          * a multicast one, RSVP wants it! and prevents it from being forwarded
622f0068c4aSGarrett Wollman          * anywhere else. Also checks if the rsvp daemon is running before
623f0068c4aSGarrett Wollman 	 * grabbing the packet.
624f0068c4aSGarrett Wollman          */
625603724d3SBjoern A. Zeeb 	if (V_rsvp_on && ip->ip_p==IPPROTO_RSVP)
626f0068c4aSGarrett Wollman 		goto ours;
627f0068c4aSGarrett Wollman 
628df8bae1dSRodney W. Grimes 	/*
629df8bae1dSRodney W. Grimes 	 * Check our list of addresses, to see if the packet is for us.
630cc766e04SGarrett Wollman 	 * If we don't have any addresses, assume any unicast packet
631cc766e04SGarrett Wollman 	 * we receive might be for us (and let the upper layers deal
632cc766e04SGarrett Wollman 	 * with it).
633df8bae1dSRodney W. Grimes 	 */
634603724d3SBjoern A. Zeeb 	if (TAILQ_EMPTY(&V_in_ifaddrhead) &&
635cc766e04SGarrett Wollman 	    (m->m_flags & (M_MCAST|M_BCAST)) == 0)
636cc766e04SGarrett Wollman 		goto ours;
637cc766e04SGarrett Wollman 
6387538a9a0SJonathan Lemon 	/*
639823db0e9SDon Lewis 	 * Enable a consistency check between the destination address
640823db0e9SDon Lewis 	 * and the arrival interface for a unicast packet (the RFC 1122
641823db0e9SDon Lewis 	 * strong ES model) if IP forwarding is disabled and the packet
642e15ae1b2SDon Lewis 	 * is not locally generated and the packet is not subject to
643e15ae1b2SDon Lewis 	 * 'ipfw fwd'.
6443f67c834SDon Lewis 	 *
6453f67c834SDon Lewis 	 * XXX - Checking also should be disabled if the destination
6463f67c834SDon Lewis 	 * address is ipnat'ed to a different interface.
6473f67c834SDon Lewis 	 *
648a8f12100SDon Lewis 	 * XXX - Checking is incompatible with IP aliases added
6493f67c834SDon Lewis 	 * to the loopback interface instead of the interface where
6503f67c834SDon Lewis 	 * the packets are received.
651a9771948SGleb Smirnoff 	 *
652a9771948SGleb Smirnoff 	 * XXX - This is the case for carp vhost IPs as well so we
653a9771948SGleb Smirnoff 	 * insert a workaround. If the packet got here, we already
654a9771948SGleb Smirnoff 	 * checked with carp_iamatch() and carp_forus().
655823db0e9SDon Lewis 	 */
656603724d3SBjoern A. Zeeb 	checkif = V_ip_checkinterface && (V_ipforwarding == 0) &&
6570aade26eSRobert Watson 	    ifp != NULL && ((ifp->if_flags & IFF_LOOPBACK) == 0) &&
65854bfbd51SWill Andrews 	    ifp->if_carp == NULL && (dchg == 0);
659823db0e9SDon Lewis 
660ca925d9cSJonathan Lemon 	/*
661ca925d9cSJonathan Lemon 	 * Check for exact addresses in the hash bucket.
662ca925d9cSJonathan Lemon 	 */
6632d9cfabaSRobert Watson 	/* IN_IFADDR_RLOCK(); */
6649b932e9eSAndre Oppermann 	LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) {
665f9e354dfSJulian Elischer 		/*
666823db0e9SDon Lewis 		 * If the address matches, verify that the packet
667823db0e9SDon Lewis 		 * arrived via the correct interface if checking is
668823db0e9SDon Lewis 		 * enabled.
669f9e354dfSJulian Elischer 		 */
6709b932e9eSAndre Oppermann 		if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr &&
6718c0fec80SRobert Watson 		    (!checkif || ia->ia_ifp == ifp)) {
6727caf4ab7SGleb Smirnoff 			counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
6737caf4ab7SGleb Smirnoff 			counter_u64_add(ia->ia_ifa.ifa_ibytes,
6747caf4ab7SGleb Smirnoff 			    m->m_pkthdr.len);
6752d9cfabaSRobert Watson 			/* IN_IFADDR_RUNLOCK(); */
676ed1ff184SJulian Elischer 			goto ours;
677ca925d9cSJonathan Lemon 		}
6788c0fec80SRobert Watson 	}
6792d9cfabaSRobert Watson 	/* IN_IFADDR_RUNLOCK(); */
6802d9cfabaSRobert Watson 
681823db0e9SDon Lewis 	/*
682ca925d9cSJonathan Lemon 	 * Check for broadcast addresses.
683ca925d9cSJonathan Lemon 	 *
684ca925d9cSJonathan Lemon 	 * Only accept broadcast packets that arrive via the matching
685ca925d9cSJonathan Lemon 	 * interface.  Reception of forwarded directed broadcasts would
686ca925d9cSJonathan Lemon 	 * be handled via ip_forward() and ether_output() with the loopback
687ca925d9cSJonathan Lemon 	 * into the stack for SIMPLEX interfaces handled by ether_output().
688823db0e9SDon Lewis 	 */
6890aade26eSRobert Watson 	if (ifp != NULL && ifp->if_flags & IFF_BROADCAST) {
690137f91e8SJohn Baldwin 		IF_ADDR_RLOCK(ifp);
6910aade26eSRobert Watson 	        TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
692ca925d9cSJonathan Lemon 			if (ifa->ifa_addr->sa_family != AF_INET)
693ca925d9cSJonathan Lemon 				continue;
694ca925d9cSJonathan Lemon 			ia = ifatoia(ifa);
695df8bae1dSRodney W. Grimes 			if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
6960aade26eSRobert Watson 			    ip->ip_dst.s_addr) {
6977caf4ab7SGleb Smirnoff 				counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
6987caf4ab7SGleb Smirnoff 				counter_u64_add(ia->ia_ifa.ifa_ibytes,
6997caf4ab7SGleb Smirnoff 				    m->m_pkthdr.len);
700137f91e8SJohn Baldwin 				IF_ADDR_RUNLOCK(ifp);
701df8bae1dSRodney W. Grimes 				goto ours;
7020aade26eSRobert Watson 			}
7030ac40133SBrian Somers #ifdef BOOTP_COMPAT
7040aade26eSRobert Watson 			if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY) {
7057caf4ab7SGleb Smirnoff 				counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
7067caf4ab7SGleb Smirnoff 				counter_u64_add(ia->ia_ifa.ifa_ibytes,
7077caf4ab7SGleb Smirnoff 				    m->m_pkthdr.len);
708137f91e8SJohn Baldwin 				IF_ADDR_RUNLOCK(ifp);
709ca925d9cSJonathan Lemon 				goto ours;
7100aade26eSRobert Watson 			}
7110ac40133SBrian Somers #endif
712df8bae1dSRodney W. Grimes 		}
713137f91e8SJohn Baldwin 		IF_ADDR_RUNLOCK(ifp);
71419e5b0a7SRobert Watson 		ia = NULL;
715df8bae1dSRodney W. Grimes 	}
716f8429ca2SBruce M Simpson 	/* RFC 3927 2.7: Do not forward datagrams for 169.254.0.0/16. */
717f8429ca2SBruce M Simpson 	if (IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))) {
71886425c62SRobert Watson 		IPSTAT_INC(ips_cantforward);
719f8429ca2SBruce M Simpson 		m_freem(m);
720f8429ca2SBruce M Simpson 		return;
721f8429ca2SBruce M Simpson 	}
722df8bae1dSRodney W. Grimes 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
723603724d3SBjoern A. Zeeb 		if (V_ip_mrouter) {
724df8bae1dSRodney W. Grimes 			/*
725df8bae1dSRodney W. Grimes 			 * If we are acting as a multicast router, all
726df8bae1dSRodney W. Grimes 			 * incoming multicast packets are passed to the
727df8bae1dSRodney W. Grimes 			 * kernel-level multicast forwarding function.
728df8bae1dSRodney W. Grimes 			 * The packet is returned (relatively) intact; if
729df8bae1dSRodney W. Grimes 			 * ip_mforward() returns a non-zero value, the packet
730df8bae1dSRodney W. Grimes 			 * must be discarded, else it may be accepted below.
731df8bae1dSRodney W. Grimes 			 */
7320aade26eSRobert Watson 			if (ip_mforward && ip_mforward(ip, ifp, m, 0) != 0) {
73386425c62SRobert Watson 				IPSTAT_INC(ips_cantforward);
734df8bae1dSRodney W. Grimes 				m_freem(m);
735c67b1d17SGarrett Wollman 				return;
736df8bae1dSRodney W. Grimes 			}
737df8bae1dSRodney W. Grimes 
738df8bae1dSRodney W. Grimes 			/*
73911612afaSDima Dorfman 			 * The process-level routing daemon needs to receive
740df8bae1dSRodney W. Grimes 			 * all multicast IGMP packets, whether or not this
741df8bae1dSRodney W. Grimes 			 * host belongs to their destination groups.
742df8bae1dSRodney W. Grimes 			 */
743df8bae1dSRodney W. Grimes 			if (ip->ip_p == IPPROTO_IGMP)
744df8bae1dSRodney W. Grimes 				goto ours;
74586425c62SRobert Watson 			IPSTAT_INC(ips_forward);
746df8bae1dSRodney W. Grimes 		}
747df8bae1dSRodney W. Grimes 		/*
748d10910e6SBruce M Simpson 		 * Assume the packet is for us, to avoid prematurely taking
749d10910e6SBruce M Simpson 		 * a lock on the in_multi hash. Protocols must perform
750d10910e6SBruce M Simpson 		 * their own filtering and update statistics accordingly.
751df8bae1dSRodney W. Grimes 		 */
752df8bae1dSRodney W. Grimes 		goto ours;
753df8bae1dSRodney W. Grimes 	}
754df8bae1dSRodney W. Grimes 	if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST)
755df8bae1dSRodney W. Grimes 		goto ours;
756df8bae1dSRodney W. Grimes 	if (ip->ip_dst.s_addr == INADDR_ANY)
757df8bae1dSRodney W. Grimes 		goto ours;
758df8bae1dSRodney W. Grimes 
7596a800098SYoshinobu Inoue 	/*
760df8bae1dSRodney W. Grimes 	 * Not for us; forward if possible and desirable.
761df8bae1dSRodney W. Grimes 	 */
762603724d3SBjoern A. Zeeb 	if (V_ipforwarding == 0) {
76386425c62SRobert Watson 		IPSTAT_INC(ips_cantforward);
764df8bae1dSRodney W. Grimes 		m_freem(m);
765546f251bSChris D. Faulhaber 	} else {
7669b932e9eSAndre Oppermann 		ip_forward(m, dchg);
767546f251bSChris D. Faulhaber 	}
768c67b1d17SGarrett Wollman 	return;
769df8bae1dSRodney W. Grimes 
770df8bae1dSRodney W. Grimes ours:
771d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH
772d0ebc0d2SYaroslav Tykhiy 	/*
773d0ebc0d2SYaroslav Tykhiy 	 * IPSTEALTH: Process non-routing options only
774d0ebc0d2SYaroslav Tykhiy 	 * if the packet is destined for us.
775d0ebc0d2SYaroslav Tykhiy 	 */
7767caf4ab7SGleb Smirnoff 	if (V_ipstealth && hlen > sizeof (struct ip) && ip_dooptions(m, 1))
777d0ebc0d2SYaroslav Tykhiy 		return;
778d0ebc0d2SYaroslav Tykhiy #endif /* IPSTEALTH */
779d0ebc0d2SYaroslav Tykhiy 
78063f8d699SJordan K. Hubbard 	/*
781b6ea1aa5SRuslan Ermilov 	 * Attempt reassembly; if it succeeds, proceed.
782ac9d7e26SMax Laier 	 * ip_reass() will return a different mbuf.
783df8bae1dSRodney W. Grimes 	 */
7848f134647SGleb Smirnoff 	if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) {
785aa69c612SGleb Smirnoff 		/* XXXGL: shouldn't we save & set m_flags? */
786f0cada84SAndre Oppermann 		m = ip_reass(m);
787f0cada84SAndre Oppermann 		if (m == NULL)
788c67b1d17SGarrett Wollman 			return;
7896a800098SYoshinobu Inoue 		ip = mtod(m, struct ip *);
7907e2df452SRuslan Ermilov 		/* Get the header length of the reassembled packet */
79153be11f6SPoul-Henning Kamp 		hlen = ip->ip_hl << 2;
792f0cada84SAndre Oppermann 	}
793f0cada84SAndre Oppermann 
794b2630c29SGeorge V. Neville-Neil #ifdef IPSEC
79533841545SHajimu UMEMOTO 	/*
79633841545SHajimu UMEMOTO 	 * enforce IPsec policy checking if we are seeing last header.
79733841545SHajimu UMEMOTO 	 * note that we do not visit this with protocols with pcb layer
79833841545SHajimu UMEMOTO 	 * code - like udp/tcp/raw ip.
79933841545SHajimu UMEMOTO 	 */
800e58320f1SAndrey V. Elsukov 	if (ip_ipsec_input(m, ip->ip_p) != 0)
80133841545SHajimu UMEMOTO 		goto bad;
802b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */
80333841545SHajimu UMEMOTO 
804df8bae1dSRodney W. Grimes 	/*
805df8bae1dSRodney W. Grimes 	 * Switch out to protocol's input routine.
806df8bae1dSRodney W. Grimes 	 */
80786425c62SRobert Watson 	IPSTAT_INC(ips_delivered);
8089b932e9eSAndre Oppermann 
8098f5a8818SKevin Lo 	(*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p);
810c67b1d17SGarrett Wollman 	return;
811df8bae1dSRodney W. Grimes bad:
812df8bae1dSRodney W. Grimes 	m_freem(m);
813c67b1d17SGarrett Wollman }
814c67b1d17SGarrett Wollman 
815c67b1d17SGarrett Wollman /*
816df8bae1dSRodney W. Grimes  * IP timer processing;
817df8bae1dSRodney W. Grimes  * if a timer expires on a reassembly
818df8bae1dSRodney W. Grimes  * queue, discard it.
819df8bae1dSRodney W. Grimes  */
820df8bae1dSRodney W. Grimes void
821f2565d68SRobert Watson ip_slowtimo(void)
822df8bae1dSRodney W. Grimes {
8238b615593SMarko Zec 	VNET_ITERATOR_DECL(vnet_iter);
824df8bae1dSRodney W. Grimes 
8255ee847d3SRobert Watson 	VNET_LIST_RLOCK_NOSLEEP();
8268b615593SMarko Zec 	VNET_FOREACH(vnet_iter) {
8278b615593SMarko Zec 		CURVNET_SET(vnet_iter);
8281dbefcc0SGleb Smirnoff 		ipreass_slowtimo();
8298b615593SMarko Zec 		CURVNET_RESTORE();
8308b615593SMarko Zec 	}
8315ee847d3SRobert Watson 	VNET_LIST_RUNLOCK_NOSLEEP();
832df8bae1dSRodney W. Grimes }
833df8bae1dSRodney W. Grimes 
8349802380eSBjoern A. Zeeb void
8359802380eSBjoern A. Zeeb ip_drain(void)
8369802380eSBjoern A. Zeeb {
8379802380eSBjoern A. Zeeb 	VNET_ITERATOR_DECL(vnet_iter);
8389802380eSBjoern A. Zeeb 
8399802380eSBjoern A. Zeeb 	VNET_LIST_RLOCK_NOSLEEP();
8409802380eSBjoern A. Zeeb 	VNET_FOREACH(vnet_iter) {
8419802380eSBjoern A. Zeeb 		CURVNET_SET(vnet_iter);
8421dbefcc0SGleb Smirnoff 		ipreass_drain();
8438b615593SMarko Zec 		CURVNET_RESTORE();
8448b615593SMarko Zec 	}
8455ee847d3SRobert Watson 	VNET_LIST_RUNLOCK_NOSLEEP();
846df8bae1dSRodney W. Grimes }
847df8bae1dSRodney W. Grimes 
848df8bae1dSRodney W. Grimes /*
849de38924dSAndre Oppermann  * The protocol to be inserted into ip_protox[] must be already registered
850de38924dSAndre Oppermann  * in inetsw[], either statically or through pf_proto_register().
851de38924dSAndre Oppermann  */
852de38924dSAndre Oppermann int
8531b48d245SBjoern A. Zeeb ipproto_register(short ipproto)
854de38924dSAndre Oppermann {
855de38924dSAndre Oppermann 	struct protosw *pr;
856de38924dSAndre Oppermann 
857de38924dSAndre Oppermann 	/* Sanity checks. */
8581b48d245SBjoern A. Zeeb 	if (ipproto <= 0 || ipproto >= IPPROTO_MAX)
859de38924dSAndre Oppermann 		return (EPROTONOSUPPORT);
860de38924dSAndre Oppermann 
861de38924dSAndre Oppermann 	/*
862de38924dSAndre Oppermann 	 * The protocol slot must not be occupied by another protocol
863de38924dSAndre Oppermann 	 * already.  An index pointing to IPPROTO_RAW is unused.
864de38924dSAndre Oppermann 	 */
865de38924dSAndre Oppermann 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
866de38924dSAndre Oppermann 	if (pr == NULL)
867de38924dSAndre Oppermann 		return (EPFNOSUPPORT);
868de38924dSAndre Oppermann 	if (ip_protox[ipproto] != pr - inetsw)	/* IPPROTO_RAW */
869de38924dSAndre Oppermann 		return (EEXIST);
870de38924dSAndre Oppermann 
871de38924dSAndre Oppermann 	/* Find the protocol position in inetsw[] and set the index. */
872de38924dSAndre Oppermann 	for (pr = inetdomain.dom_protosw;
873de38924dSAndre Oppermann 	     pr < inetdomain.dom_protoswNPROTOSW; pr++) {
874de38924dSAndre Oppermann 		if (pr->pr_domain->dom_family == PF_INET &&
875de38924dSAndre Oppermann 		    pr->pr_protocol && pr->pr_protocol == ipproto) {
876de38924dSAndre Oppermann 			ip_protox[pr->pr_protocol] = pr - inetsw;
877de38924dSAndre Oppermann 			return (0);
878de38924dSAndre Oppermann 		}
879de38924dSAndre Oppermann 	}
880de38924dSAndre Oppermann 	return (EPROTONOSUPPORT);
881de38924dSAndre Oppermann }
882de38924dSAndre Oppermann 
883de38924dSAndre Oppermann int
8841b48d245SBjoern A. Zeeb ipproto_unregister(short ipproto)
885de38924dSAndre Oppermann {
886de38924dSAndre Oppermann 	struct protosw *pr;
887de38924dSAndre Oppermann 
888de38924dSAndre Oppermann 	/* Sanity checks. */
8891b48d245SBjoern A. Zeeb 	if (ipproto <= 0 || ipproto >= IPPROTO_MAX)
890de38924dSAndre Oppermann 		return (EPROTONOSUPPORT);
891de38924dSAndre Oppermann 
892de38924dSAndre Oppermann 	/* Check if the protocol was indeed registered. */
893de38924dSAndre Oppermann 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
894de38924dSAndre Oppermann 	if (pr == NULL)
895de38924dSAndre Oppermann 		return (EPFNOSUPPORT);
896de38924dSAndre Oppermann 	if (ip_protox[ipproto] == pr - inetsw)  /* IPPROTO_RAW */
897de38924dSAndre Oppermann 		return (ENOENT);
898de38924dSAndre Oppermann 
899de38924dSAndre Oppermann 	/* Reset the protocol slot to IPPROTO_RAW. */
900de38924dSAndre Oppermann 	ip_protox[ipproto] = pr - inetsw;
901de38924dSAndre Oppermann 	return (0);
902de38924dSAndre Oppermann }
903de38924dSAndre Oppermann 
904df8bae1dSRodney W. Grimes u_char inetctlerrmap[PRC_NCMDS] = {
905df8bae1dSRodney W. Grimes 	0,		0,		0,		0,
906df8bae1dSRodney W. Grimes 	0,		EMSGSIZE,	EHOSTDOWN,	EHOSTUNREACH,
907df8bae1dSRodney W. Grimes 	EHOSTUNREACH,	EHOSTUNREACH,	ECONNREFUSED,	ECONNREFUSED,
908df8bae1dSRodney W. Grimes 	EMSGSIZE,	EHOSTUNREACH,	0,		0,
909fcaf9f91SMike Silbersack 	0,		0,		EHOSTUNREACH,	0,
9103b8123b7SJesper Skriver 	ENOPROTOOPT,	ECONNREFUSED
911df8bae1dSRodney W. Grimes };
912df8bae1dSRodney W. Grimes 
913df8bae1dSRodney W. Grimes /*
914df8bae1dSRodney W. Grimes  * Forward a packet.  If some error occurs return the sender
915df8bae1dSRodney W. Grimes  * an icmp packet.  Note we can't always generate a meaningful
916df8bae1dSRodney W. Grimes  * icmp message because icmp doesn't have a large enough repertoire
917df8bae1dSRodney W. Grimes  * of codes and types.
918df8bae1dSRodney W. Grimes  *
919df8bae1dSRodney W. Grimes  * If not forwarding, just drop the packet.  This could be confusing
920df8bae1dSRodney W. Grimes  * if ipforwarding was zero but some routing protocol was advancing
921df8bae1dSRodney W. Grimes  * us as a gateway to somewhere.  However, we must let the routing
922df8bae1dSRodney W. Grimes  * protocol deal with that.
923df8bae1dSRodney W. Grimes  *
924df8bae1dSRodney W. Grimes  * The srcrt parameter indicates whether the packet is being forwarded
925df8bae1dSRodney W. Grimes  * via a source route.
926df8bae1dSRodney W. Grimes  */
9279b932e9eSAndre Oppermann void
9289b932e9eSAndre Oppermann ip_forward(struct mbuf *m, int srcrt)
929df8bae1dSRodney W. Grimes {
9302b25acc1SLuigi Rizzo 	struct ip *ip = mtod(m, struct ip *);
931efbad259SEdward Tomasz Napierala 	struct in_ifaddr *ia;
932df8bae1dSRodney W. Grimes 	struct mbuf *mcopy;
933d14122b0SErmal Luçi 	struct sockaddr_in *sin;
9349b932e9eSAndre Oppermann 	struct in_addr dest;
935b835b6feSBjoern A. Zeeb 	struct route ro;
936c773494eSAndre Oppermann 	int error, type = 0, code = 0, mtu = 0;
9373efc3014SJulian Elischer 
9389b932e9eSAndre Oppermann 	if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) {
93986425c62SRobert Watson 		IPSTAT_INC(ips_cantforward);
940df8bae1dSRodney W. Grimes 		m_freem(m);
941df8bae1dSRodney W. Grimes 		return;
942df8bae1dSRodney W. Grimes 	}
9438922ddbeSAndrey V. Elsukov #ifdef IPSEC
9448922ddbeSAndrey V. Elsukov 	if (ip_ipsec_fwd(m) != 0) {
9458922ddbeSAndrey V. Elsukov 		IPSTAT_INC(ips_cantforward);
9468922ddbeSAndrey V. Elsukov 		m_freem(m);
9478922ddbeSAndrey V. Elsukov 		return;
9488922ddbeSAndrey V. Elsukov 	}
9498922ddbeSAndrey V. Elsukov #endif /* IPSEC */
9501b968362SDag-Erling Smørgrav #ifdef IPSTEALTH
951603724d3SBjoern A. Zeeb 	if (!V_ipstealth) {
9521b968362SDag-Erling Smørgrav #endif
953df8bae1dSRodney W. Grimes 		if (ip->ip_ttl <= IPTTLDEC) {
9541b968362SDag-Erling Smørgrav 			icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS,
95502c1c707SAndre Oppermann 			    0, 0);
956df8bae1dSRodney W. Grimes 			return;
957df8bae1dSRodney W. Grimes 		}
9581b968362SDag-Erling Smørgrav #ifdef IPSTEALTH
9591b968362SDag-Erling Smørgrav 	}
9601b968362SDag-Erling Smørgrav #endif
961df8bae1dSRodney W. Grimes 
962d14122b0SErmal Luçi 	bzero(&ro, sizeof(ro));
963d14122b0SErmal Luçi 	sin = (struct sockaddr_in *)&ro.ro_dst;
964d14122b0SErmal Luçi 	sin->sin_family = AF_INET;
965d14122b0SErmal Luçi 	sin->sin_len = sizeof(*sin);
966d14122b0SErmal Luçi 	sin->sin_addr = ip->ip_dst;
967d14122b0SErmal Luçi #ifdef RADIX_MPATH
968d14122b0SErmal Luçi 	rtalloc_mpath_fib(&ro,
969d14122b0SErmal Luçi 	    ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr),
970d14122b0SErmal Luçi 	    M_GETFIB(m));
971d14122b0SErmal Luçi #else
972d14122b0SErmal Luçi 	in_rtalloc_ign(&ro, 0, M_GETFIB(m));
973d14122b0SErmal Luçi #endif
974d14122b0SErmal Luçi 	if (ro.ro_rt != NULL) {
975d14122b0SErmal Luçi 		ia = ifatoia(ro.ro_rt->rt_ifa);
976d14122b0SErmal Luçi 		ifa_ref(&ia->ia_ifa);
97756844a62SErmal Luçi 	} else
97856844a62SErmal Luçi 		ia = NULL;
979efbad259SEdward Tomasz Napierala #ifndef IPSEC
980efbad259SEdward Tomasz Napierala 	/*
981efbad259SEdward Tomasz Napierala 	 * 'ia' may be NULL if there is no route for this destination.
982efbad259SEdward Tomasz Napierala 	 * In case of IPsec, Don't discard it just yet, but pass it to
983efbad259SEdward Tomasz Napierala 	 * ip_output in case of outgoing IPsec policy.
984efbad259SEdward Tomasz Napierala 	 */
985d23d475fSGuido van Rooij 	if (!srcrt && ia == NULL) {
98602c1c707SAndre Oppermann 		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
987d14122b0SErmal Luçi 		RO_RTFREE(&ro);
988df8bae1dSRodney W. Grimes 		return;
98902c1c707SAndre Oppermann 	}
990efbad259SEdward Tomasz Napierala #endif
991df8bae1dSRodney W. Grimes 
992df8bae1dSRodney W. Grimes 	/*
993bfef7ed4SIan Dowse 	 * Save the IP header and at most 8 bytes of the payload,
994bfef7ed4SIan Dowse 	 * in case we need to generate an ICMP message to the src.
995bfef7ed4SIan Dowse 	 *
9964d2e3692SLuigi Rizzo 	 * XXX this can be optimized a lot by saving the data in a local
9974d2e3692SLuigi Rizzo 	 * buffer on the stack (72 bytes at most), and only allocating the
9984d2e3692SLuigi Rizzo 	 * mbuf if really necessary. The vast majority of the packets
9994d2e3692SLuigi Rizzo 	 * are forwarded without having to send an ICMP back (either
10004d2e3692SLuigi Rizzo 	 * because unnecessary, or because rate limited), so we are
10014d2e3692SLuigi Rizzo 	 * really we are wasting a lot of work here.
10024d2e3692SLuigi Rizzo 	 *
1003*c3bef61eSKevin Lo 	 * We don't use m_copym() because it might return a reference
1004bfef7ed4SIan Dowse 	 * to a shared cluster. Both this function and ip_output()
1005bfef7ed4SIan Dowse 	 * assume exclusive access to the IP header in `m', so any
1006bfef7ed4SIan Dowse 	 * data in a cluster may change before we reach icmp_error().
1007df8bae1dSRodney W. Grimes 	 */
1008dc4ad05eSGleb Smirnoff 	mcopy = m_gethdr(M_NOWAIT, m->m_type);
1009eb1b1807SGleb Smirnoff 	if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_NOWAIT)) {
10109967cafcSSam Leffler 		/*
10119967cafcSSam Leffler 		 * It's probably ok if the pkthdr dup fails (because
10129967cafcSSam Leffler 		 * the deep copy of the tag chain failed), but for now
10139967cafcSSam Leffler 		 * be conservative and just discard the copy since
10149967cafcSSam Leffler 		 * code below may some day want the tags.
10159967cafcSSam Leffler 		 */
10169967cafcSSam Leffler 		m_free(mcopy);
10179967cafcSSam Leffler 		mcopy = NULL;
10189967cafcSSam Leffler 	}
1019bfef7ed4SIan Dowse 	if (mcopy != NULL) {
10208f134647SGleb Smirnoff 		mcopy->m_len = min(ntohs(ip->ip_len), M_TRAILINGSPACE(mcopy));
1021e6b0a570SBruce M Simpson 		mcopy->m_pkthdr.len = mcopy->m_len;
1022bfef7ed4SIan Dowse 		m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t));
1023bfef7ed4SIan Dowse 	}
102404287599SRuslan Ermilov 
102504287599SRuslan Ermilov #ifdef IPSTEALTH
1026603724d3SBjoern A. Zeeb 	if (!V_ipstealth) {
102704287599SRuslan Ermilov #endif
102804287599SRuslan Ermilov 		ip->ip_ttl -= IPTTLDEC;
102904287599SRuslan Ermilov #ifdef IPSTEALTH
103004287599SRuslan Ermilov 	}
103104287599SRuslan Ermilov #endif
1032df8bae1dSRodney W. Grimes 
1033df8bae1dSRodney W. Grimes 	/*
1034df8bae1dSRodney W. Grimes 	 * If forwarding packet using same interface that it came in on,
1035df8bae1dSRodney W. Grimes 	 * perhaps should send a redirect to sender to shortcut a hop.
1036df8bae1dSRodney W. Grimes 	 * Only send redirect if source is sending directly to us,
1037df8bae1dSRodney W. Grimes 	 * and if packet was not source routed (or has any options).
1038df8bae1dSRodney W. Grimes 	 * Also, don't send redirect if forwarding using a default route
1039df8bae1dSRodney W. Grimes 	 * or a route modified by a redirect.
1040df8bae1dSRodney W. Grimes 	 */
10419b932e9eSAndre Oppermann 	dest.s_addr = 0;
1042efbad259SEdward Tomasz Napierala 	if (!srcrt && V_ipsendredirects &&
1043efbad259SEdward Tomasz Napierala 	    ia != NULL && ia->ia_ifp == m->m_pkthdr.rcvif) {
104402c1c707SAndre Oppermann 		struct rtentry *rt;
104502c1c707SAndre Oppermann 
104602c1c707SAndre Oppermann 		rt = ro.ro_rt;
104702c1c707SAndre Oppermann 
104802c1c707SAndre Oppermann 		if (rt && (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
10499b932e9eSAndre Oppermann 		    satosin(rt_key(rt))->sin_addr.s_addr != 0) {
1050df8bae1dSRodney W. Grimes #define	RTA(rt)	((struct in_ifaddr *)(rt->rt_ifa))
1051df8bae1dSRodney W. Grimes 			u_long src = ntohl(ip->ip_src.s_addr);
1052df8bae1dSRodney W. Grimes 
1053df8bae1dSRodney W. Grimes 			if (RTA(rt) &&
1054df8bae1dSRodney W. Grimes 			    (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) {
1055df8bae1dSRodney W. Grimes 				if (rt->rt_flags & RTF_GATEWAY)
10569b932e9eSAndre Oppermann 					dest.s_addr = satosin(rt->rt_gateway)->sin_addr.s_addr;
1057df8bae1dSRodney W. Grimes 				else
10589b932e9eSAndre Oppermann 					dest.s_addr = ip->ip_dst.s_addr;
1059df8bae1dSRodney W. Grimes 				/* Router requirements says to only send host redirects */
1060df8bae1dSRodney W. Grimes 				type = ICMP_REDIRECT;
1061df8bae1dSRodney W. Grimes 				code = ICMP_REDIRECT_HOST;
1062df8bae1dSRodney W. Grimes 			}
1063df8bae1dSRodney W. Grimes 		}
106402c1c707SAndre Oppermann 	}
1065df8bae1dSRodney W. Grimes 
1066b835b6feSBjoern A. Zeeb 	error = ip_output(m, NULL, &ro, IP_FORWARDING, NULL, NULL);
1067b835b6feSBjoern A. Zeeb 
1068b835b6feSBjoern A. Zeeb 	if (error == EMSGSIZE && ro.ro_rt)
1069e3a7aa6fSGleb Smirnoff 		mtu = ro.ro_rt->rt_mtu;
1070bf984051SGleb Smirnoff 	RO_RTFREE(&ro);
1071b835b6feSBjoern A. Zeeb 
1072df8bae1dSRodney W. Grimes 	if (error)
107386425c62SRobert Watson 		IPSTAT_INC(ips_cantforward);
1074df8bae1dSRodney W. Grimes 	else {
107586425c62SRobert Watson 		IPSTAT_INC(ips_forward);
1076df8bae1dSRodney W. Grimes 		if (type)
107786425c62SRobert Watson 			IPSTAT_INC(ips_redirectsent);
1078df8bae1dSRodney W. Grimes 		else {
10799188b4a1SAndre Oppermann 			if (mcopy)
1080df8bae1dSRodney W. Grimes 				m_freem(mcopy);
10818c0fec80SRobert Watson 			if (ia != NULL)
10828c0fec80SRobert Watson 				ifa_free(&ia->ia_ifa);
1083df8bae1dSRodney W. Grimes 			return;
1084df8bae1dSRodney W. Grimes 		}
1085df8bae1dSRodney W. Grimes 	}
10868c0fec80SRobert Watson 	if (mcopy == NULL) {
10878c0fec80SRobert Watson 		if (ia != NULL)
10888c0fec80SRobert Watson 			ifa_free(&ia->ia_ifa);
1089df8bae1dSRodney W. Grimes 		return;
10908c0fec80SRobert Watson 	}
1091df8bae1dSRodney W. Grimes 
1092df8bae1dSRodney W. Grimes 	switch (error) {
1093df8bae1dSRodney W. Grimes 
1094df8bae1dSRodney W. Grimes 	case 0:				/* forwarded, but need redirect */
1095df8bae1dSRodney W. Grimes 		/* type, code set above */
1096df8bae1dSRodney W. Grimes 		break;
1097df8bae1dSRodney W. Grimes 
1098efbad259SEdward Tomasz Napierala 	case ENETUNREACH:
1099df8bae1dSRodney W. Grimes 	case EHOSTUNREACH:
1100df8bae1dSRodney W. Grimes 	case ENETDOWN:
1101df8bae1dSRodney W. Grimes 	case EHOSTDOWN:
1102df8bae1dSRodney W. Grimes 	default:
1103df8bae1dSRodney W. Grimes 		type = ICMP_UNREACH;
1104df8bae1dSRodney W. Grimes 		code = ICMP_UNREACH_HOST;
1105df8bae1dSRodney W. Grimes 		break;
1106df8bae1dSRodney W. Grimes 
1107df8bae1dSRodney W. Grimes 	case EMSGSIZE:
1108df8bae1dSRodney W. Grimes 		type = ICMP_UNREACH;
1109df8bae1dSRodney W. Grimes 		code = ICMP_UNREACH_NEEDFRAG;
11101dfcf0d2SAndre Oppermann 
1111b2630c29SGeorge V. Neville-Neil #ifdef IPSEC
1112b835b6feSBjoern A. Zeeb 		/*
1113b835b6feSBjoern A. Zeeb 		 * If IPsec is configured for this path,
1114b835b6feSBjoern A. Zeeb 		 * override any possibly mtu value set by ip_output.
1115b835b6feSBjoern A. Zeeb 		 */
11161c044382SBjoern A. Zeeb 		mtu = ip_ipsec_mtu(mcopy, mtu);
1117b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */
11189b932e9eSAndre Oppermann 		/*
1119b835b6feSBjoern A. Zeeb 		 * If the MTU was set before make sure we are below the
1120b835b6feSBjoern A. Zeeb 		 * interface MTU.
1121ab48768bSAndre Oppermann 		 * If the MTU wasn't set before use the interface mtu or
1122ab48768bSAndre Oppermann 		 * fall back to the next smaller mtu step compared to the
1123ab48768bSAndre Oppermann 		 * current packet size.
11249b932e9eSAndre Oppermann 		 */
1125b835b6feSBjoern A. Zeeb 		if (mtu != 0) {
1126b835b6feSBjoern A. Zeeb 			if (ia != NULL)
1127b835b6feSBjoern A. Zeeb 				mtu = min(mtu, ia->ia_ifp->if_mtu);
1128b835b6feSBjoern A. Zeeb 		} else {
1129ab48768bSAndre Oppermann 			if (ia != NULL)
1130c773494eSAndre Oppermann 				mtu = ia->ia_ifp->if_mtu;
1131ab48768bSAndre Oppermann 			else
11328f134647SGleb Smirnoff 				mtu = ip_next_mtu(ntohs(ip->ip_len), 0);
1133ab48768bSAndre Oppermann 		}
113486425c62SRobert Watson 		IPSTAT_INC(ips_cantfrag);
1135df8bae1dSRodney W. Grimes 		break;
1136df8bae1dSRodney W. Grimes 
1137df8bae1dSRodney W. Grimes 	case ENOBUFS:
11383a06e3e0SRuslan Ermilov 	case EACCES:			/* ipfw denied packet */
11393a06e3e0SRuslan Ermilov 		m_freem(mcopy);
11408c0fec80SRobert Watson 		if (ia != NULL)
11418c0fec80SRobert Watson 			ifa_free(&ia->ia_ifa);
11423a06e3e0SRuslan Ermilov 		return;
1143df8bae1dSRodney W. Grimes 	}
11448c0fec80SRobert Watson 	if (ia != NULL)
11458c0fec80SRobert Watson 		ifa_free(&ia->ia_ifa);
1146c773494eSAndre Oppermann 	icmp_error(mcopy, type, code, dest.s_addr, mtu);
1147df8bae1dSRodney W. Grimes }
1148df8bae1dSRodney W. Grimes 
114982c23ebaSBill Fenner void
1150f2565d68SRobert Watson ip_savecontrol(struct inpcb *inp, struct mbuf **mp, struct ip *ip,
1151f2565d68SRobert Watson     struct mbuf *m)
115282c23ebaSBill Fenner {
11538b615593SMarko Zec 
1154be8a62e8SPoul-Henning Kamp 	if (inp->inp_socket->so_options & (SO_BINTIME | SO_TIMESTAMP)) {
1155be8a62e8SPoul-Henning Kamp 		struct bintime bt;
1156be8a62e8SPoul-Henning Kamp 
1157be8a62e8SPoul-Henning Kamp 		bintime(&bt);
1158be8a62e8SPoul-Henning Kamp 		if (inp->inp_socket->so_options & SO_BINTIME) {
1159be8a62e8SPoul-Henning Kamp 			*mp = sbcreatecontrol((caddr_t)&bt, sizeof(bt),
1160be8a62e8SPoul-Henning Kamp 			    SCM_BINTIME, SOL_SOCKET);
1161be8a62e8SPoul-Henning Kamp 			if (*mp)
1162be8a62e8SPoul-Henning Kamp 				mp = &(*mp)->m_next;
1163be8a62e8SPoul-Henning Kamp 		}
116482c23ebaSBill Fenner 		if (inp->inp_socket->so_options & SO_TIMESTAMP) {
116582c23ebaSBill Fenner 			struct timeval tv;
116682c23ebaSBill Fenner 
1167be8a62e8SPoul-Henning Kamp 			bintime2timeval(&bt, &tv);
116882c23ebaSBill Fenner 			*mp = sbcreatecontrol((caddr_t)&tv, sizeof(tv),
116982c23ebaSBill Fenner 			    SCM_TIMESTAMP, SOL_SOCKET);
117082c23ebaSBill Fenner 			if (*mp)
117182c23ebaSBill Fenner 				mp = &(*mp)->m_next;
11724cc20ab1SSeigo Tanimura 		}
1173be8a62e8SPoul-Henning Kamp 	}
117482c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVDSTADDR) {
117582c23ebaSBill Fenner 		*mp = sbcreatecontrol((caddr_t)&ip->ip_dst,
117682c23ebaSBill Fenner 		    sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
117782c23ebaSBill Fenner 		if (*mp)
117882c23ebaSBill Fenner 			mp = &(*mp)->m_next;
117982c23ebaSBill Fenner 	}
11804957466bSMatthew N. Dodd 	if (inp->inp_flags & INP_RECVTTL) {
11814957466bSMatthew N. Dodd 		*mp = sbcreatecontrol((caddr_t)&ip->ip_ttl,
11824957466bSMatthew N. Dodd 		    sizeof(u_char), IP_RECVTTL, IPPROTO_IP);
11834957466bSMatthew N. Dodd 		if (*mp)
11844957466bSMatthew N. Dodd 			mp = &(*mp)->m_next;
11854957466bSMatthew N. Dodd 	}
118682c23ebaSBill Fenner #ifdef notyet
118782c23ebaSBill Fenner 	/* XXX
118882c23ebaSBill Fenner 	 * Moving these out of udp_input() made them even more broken
118982c23ebaSBill Fenner 	 * than they already were.
119082c23ebaSBill Fenner 	 */
119182c23ebaSBill Fenner 	/* options were tossed already */
119282c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVOPTS) {
119382c23ebaSBill Fenner 		*mp = sbcreatecontrol((caddr_t)opts_deleted_above,
119482c23ebaSBill Fenner 		    sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP);
119582c23ebaSBill Fenner 		if (*mp)
119682c23ebaSBill Fenner 			mp = &(*mp)->m_next;
119782c23ebaSBill Fenner 	}
119882c23ebaSBill Fenner 	/* ip_srcroute doesn't do what we want here, need to fix */
119982c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVRETOPTS) {
1200e0982661SAndre Oppermann 		*mp = sbcreatecontrol((caddr_t)ip_srcroute(m),
120182c23ebaSBill Fenner 		    sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP);
120282c23ebaSBill Fenner 		if (*mp)
120382c23ebaSBill Fenner 			mp = &(*mp)->m_next;
120482c23ebaSBill Fenner 	}
120582c23ebaSBill Fenner #endif
120682c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVIF) {
1207d314ad7bSJulian Elischer 		struct ifnet *ifp;
1208d314ad7bSJulian Elischer 		struct sdlbuf {
120982c23ebaSBill Fenner 			struct sockaddr_dl sdl;
1210d314ad7bSJulian Elischer 			u_char	pad[32];
1211d314ad7bSJulian Elischer 		} sdlbuf;
1212d314ad7bSJulian Elischer 		struct sockaddr_dl *sdp;
1213d314ad7bSJulian Elischer 		struct sockaddr_dl *sdl2 = &sdlbuf.sdl;
121482c23ebaSBill Fenner 
121546f2df9cSSergey Kandaurov 		if ((ifp = m->m_pkthdr.rcvif) &&
121646f2df9cSSergey Kandaurov 		    ifp->if_index && ifp->if_index <= V_if_index) {
12174a0d6638SRuslan Ermilov 			sdp = (struct sockaddr_dl *)ifp->if_addr->ifa_addr;
1218d314ad7bSJulian Elischer 			/*
1219d314ad7bSJulian Elischer 			 * Change our mind and don't try copy.
1220d314ad7bSJulian Elischer 			 */
122146f2df9cSSergey Kandaurov 			if (sdp->sdl_family != AF_LINK ||
122246f2df9cSSergey Kandaurov 			    sdp->sdl_len > sizeof(sdlbuf)) {
1223d314ad7bSJulian Elischer 				goto makedummy;
1224d314ad7bSJulian Elischer 			}
1225d314ad7bSJulian Elischer 			bcopy(sdp, sdl2, sdp->sdl_len);
1226d314ad7bSJulian Elischer 		} else {
1227d314ad7bSJulian Elischer makedummy:
122846f2df9cSSergey Kandaurov 			sdl2->sdl_len =
122946f2df9cSSergey Kandaurov 			    offsetof(struct sockaddr_dl, sdl_data[0]);
1230d314ad7bSJulian Elischer 			sdl2->sdl_family = AF_LINK;
1231d314ad7bSJulian Elischer 			sdl2->sdl_index = 0;
1232d314ad7bSJulian Elischer 			sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0;
1233d314ad7bSJulian Elischer 		}
1234d314ad7bSJulian Elischer 		*mp = sbcreatecontrol((caddr_t)sdl2, sdl2->sdl_len,
123582c23ebaSBill Fenner 		    IP_RECVIF, IPPROTO_IP);
123682c23ebaSBill Fenner 		if (*mp)
123782c23ebaSBill Fenner 			mp = &(*mp)->m_next;
123882c23ebaSBill Fenner 	}
12393cca425bSMichael Tuexen 	if (inp->inp_flags & INP_RECVTOS) {
12403cca425bSMichael Tuexen 		*mp = sbcreatecontrol((caddr_t)&ip->ip_tos,
12413cca425bSMichael Tuexen 		    sizeof(u_char), IP_RECVTOS, IPPROTO_IP);
12423cca425bSMichael Tuexen 		if (*mp)
12433cca425bSMichael Tuexen 			mp = &(*mp)->m_next;
12443cca425bSMichael Tuexen 	}
12459d3ddf43SAdrian Chadd 
12469d3ddf43SAdrian Chadd 	if (inp->inp_flags2 & INP_RECVFLOWID) {
12479d3ddf43SAdrian Chadd 		uint32_t flowid, flow_type;
12489d3ddf43SAdrian Chadd 
12499d3ddf43SAdrian Chadd 		flowid = m->m_pkthdr.flowid;
12509d3ddf43SAdrian Chadd 		flow_type = M_HASHTYPE_GET(m);
12519d3ddf43SAdrian Chadd 
12529d3ddf43SAdrian Chadd 		/*
12539d3ddf43SAdrian Chadd 		 * XXX should handle the failure of one or the
12549d3ddf43SAdrian Chadd 		 * other - don't populate both?
12559d3ddf43SAdrian Chadd 		 */
12569d3ddf43SAdrian Chadd 		*mp = sbcreatecontrol((caddr_t) &flowid,
12579d3ddf43SAdrian Chadd 		    sizeof(uint32_t), IP_FLOWID, IPPROTO_IP);
12589d3ddf43SAdrian Chadd 		if (*mp)
12599d3ddf43SAdrian Chadd 			mp = &(*mp)->m_next;
12609d3ddf43SAdrian Chadd 		*mp = sbcreatecontrol((caddr_t) &flow_type,
12619d3ddf43SAdrian Chadd 		    sizeof(uint32_t), IP_FLOWTYPE, IPPROTO_IP);
12629d3ddf43SAdrian Chadd 		if (*mp)
12639d3ddf43SAdrian Chadd 			mp = &(*mp)->m_next;
12649d3ddf43SAdrian Chadd 	}
12659d3ddf43SAdrian Chadd 
12669d3ddf43SAdrian Chadd #ifdef	RSS
12679d3ddf43SAdrian Chadd 	if (inp->inp_flags2 & INP_RECVRSSBUCKETID) {
12689d3ddf43SAdrian Chadd 		uint32_t flowid, flow_type;
12699d3ddf43SAdrian Chadd 		uint32_t rss_bucketid;
12709d3ddf43SAdrian Chadd 
12719d3ddf43SAdrian Chadd 		flowid = m->m_pkthdr.flowid;
12729d3ddf43SAdrian Chadd 		flow_type = M_HASHTYPE_GET(m);
12739d3ddf43SAdrian Chadd 
12749d3ddf43SAdrian Chadd 		if (rss_hash2bucket(flowid, flow_type, &rss_bucketid) == 0) {
12759d3ddf43SAdrian Chadd 			*mp = sbcreatecontrol((caddr_t) &rss_bucketid,
12769d3ddf43SAdrian Chadd 			   sizeof(uint32_t), IP_RSSBUCKETID, IPPROTO_IP);
12779d3ddf43SAdrian Chadd 			if (*mp)
12789d3ddf43SAdrian Chadd 				mp = &(*mp)->m_next;
12799d3ddf43SAdrian Chadd 		}
12809d3ddf43SAdrian Chadd 	}
12819d3ddf43SAdrian Chadd #endif
128282c23ebaSBill Fenner }
128382c23ebaSBill Fenner 
12844d2e3692SLuigi Rizzo /*
128530916a2dSRobert Watson  * XXXRW: Multicast routing code in ip_mroute.c is generally MPSAFE, but the
128630916a2dSRobert Watson  * ip_rsvp and ip_rsvp_on variables need to be interlocked with rsvp_on
128730916a2dSRobert Watson  * locking.  This code remains in ip_input.c as ip_mroute.c is optionally
128830916a2dSRobert Watson  * compiled.
12894d2e3692SLuigi Rizzo  */
12903e288e62SDimitry Andric static VNET_DEFINE(int, ip_rsvp_on);
129182cea7e6SBjoern A. Zeeb VNET_DEFINE(struct socket *, ip_rsvpd);
129282cea7e6SBjoern A. Zeeb 
129382cea7e6SBjoern A. Zeeb #define	V_ip_rsvp_on		VNET(ip_rsvp_on)
129482cea7e6SBjoern A. Zeeb 
1295df8bae1dSRodney W. Grimes int
1296f0068c4aSGarrett Wollman ip_rsvp_init(struct socket *so)
1297f0068c4aSGarrett Wollman {
12988b615593SMarko Zec 
1299f0068c4aSGarrett Wollman 	if (so->so_type != SOCK_RAW ||
1300f0068c4aSGarrett Wollman 	    so->so_proto->pr_protocol != IPPROTO_RSVP)
1301f0068c4aSGarrett Wollman 		return EOPNOTSUPP;
1302f0068c4aSGarrett Wollman 
1303603724d3SBjoern A. Zeeb 	if (V_ip_rsvpd != NULL)
1304f0068c4aSGarrett Wollman 		return EADDRINUSE;
1305f0068c4aSGarrett Wollman 
1306603724d3SBjoern A. Zeeb 	V_ip_rsvpd = so;
13071c5de19aSGarrett Wollman 	/*
13081c5de19aSGarrett Wollman 	 * This may seem silly, but we need to be sure we don't over-increment
13091c5de19aSGarrett Wollman 	 * the RSVP counter, in case something slips up.
13101c5de19aSGarrett Wollman 	 */
1311603724d3SBjoern A. Zeeb 	if (!V_ip_rsvp_on) {
1312603724d3SBjoern A. Zeeb 		V_ip_rsvp_on = 1;
1313603724d3SBjoern A. Zeeb 		V_rsvp_on++;
13141c5de19aSGarrett Wollman 	}
1315f0068c4aSGarrett Wollman 
1316f0068c4aSGarrett Wollman 	return 0;
1317f0068c4aSGarrett Wollman }
1318f0068c4aSGarrett Wollman 
1319f0068c4aSGarrett Wollman int
1320f0068c4aSGarrett Wollman ip_rsvp_done(void)
1321f0068c4aSGarrett Wollman {
13228b615593SMarko Zec 
1323603724d3SBjoern A. Zeeb 	V_ip_rsvpd = NULL;
13241c5de19aSGarrett Wollman 	/*
13251c5de19aSGarrett Wollman 	 * This may seem silly, but we need to be sure we don't over-decrement
13261c5de19aSGarrett Wollman 	 * the RSVP counter, in case something slips up.
13271c5de19aSGarrett Wollman 	 */
1328603724d3SBjoern A. Zeeb 	if (V_ip_rsvp_on) {
1329603724d3SBjoern A. Zeeb 		V_ip_rsvp_on = 0;
1330603724d3SBjoern A. Zeeb 		V_rsvp_on--;
13311c5de19aSGarrett Wollman 	}
1332f0068c4aSGarrett Wollman 	return 0;
1333f0068c4aSGarrett Wollman }
1334bbb4330bSLuigi Rizzo 
13358f5a8818SKevin Lo int
13368f5a8818SKevin Lo rsvp_input(struct mbuf **mp, int *offp, int proto)
1337bbb4330bSLuigi Rizzo {
13388f5a8818SKevin Lo 	struct mbuf *m;
13398f5a8818SKevin Lo 
13408f5a8818SKevin Lo 	m = *mp;
13418f5a8818SKevin Lo 	*mp = NULL;
13428b615593SMarko Zec 
1343bbb4330bSLuigi Rizzo 	if (rsvp_input_p) { /* call the real one if loaded */
13448f5a8818SKevin Lo 		*mp = m;
13458f5a8818SKevin Lo 		rsvp_input_p(mp, offp, proto);
13468f5a8818SKevin Lo 		return (IPPROTO_DONE);
1347bbb4330bSLuigi Rizzo 	}
1348bbb4330bSLuigi Rizzo 
1349bbb4330bSLuigi Rizzo 	/* Can still get packets with rsvp_on = 0 if there is a local member
1350bbb4330bSLuigi Rizzo 	 * of the group to which the RSVP packet is addressed.  But in this
1351bbb4330bSLuigi Rizzo 	 * case we want to throw the packet away.
1352bbb4330bSLuigi Rizzo 	 */
1353bbb4330bSLuigi Rizzo 
1354603724d3SBjoern A. Zeeb 	if (!V_rsvp_on) {
1355bbb4330bSLuigi Rizzo 		m_freem(m);
13568f5a8818SKevin Lo 		return (IPPROTO_DONE);
1357bbb4330bSLuigi Rizzo 	}
1358bbb4330bSLuigi Rizzo 
1359603724d3SBjoern A. Zeeb 	if (V_ip_rsvpd != NULL) {
13608f5a8818SKevin Lo 		*mp = m;
13618f5a8818SKevin Lo 		rip_input(mp, offp, proto);
13628f5a8818SKevin Lo 		return (IPPROTO_DONE);
1363bbb4330bSLuigi Rizzo 	}
1364bbb4330bSLuigi Rizzo 	/* Drop the packet */
1365bbb4330bSLuigi Rizzo 	m_freem(m);
13668f5a8818SKevin Lo 	return (IPPROTO_DONE);
1367bbb4330bSLuigi Rizzo }
1368