xref: /freebsd/sys/netinet/ip_input.c (revision 484149def80975bda4e583a1d77c4c6ee2baf074)
1c398230bSWarner Losh /*-
2df8bae1dSRodney W. Grimes  * Copyright (c) 1982, 1986, 1988, 1993
3df8bae1dSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
4df8bae1dSRodney W. Grimes  *
5df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
6df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
7df8bae1dSRodney W. Grimes  * are met:
8df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
9df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
10df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
11df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
12df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
13df8bae1dSRodney W. Grimes  * 4. Neither the name of the University nor the names of its contributors
14df8bae1dSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
15df8bae1dSRodney W. Grimes  *    without specific prior written permission.
16df8bae1dSRodney W. Grimes  *
17df8bae1dSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20df8bae1dSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
28df8bae1dSRodney W. Grimes  *
29df8bae1dSRodney W. Grimes  *	@(#)ip_input.c	8.2 (Berkeley) 1/4/94
30df8bae1dSRodney W. Grimes  */
31df8bae1dSRodney W. Grimes 
324b421e2dSMike Silbersack #include <sys/cdefs.h>
334b421e2dSMike Silbersack __FBSDID("$FreeBSD$");
344b421e2dSMike Silbersack 
350ac40133SBrian Somers #include "opt_bootp.h"
3674a9466cSGary Palmer #include "opt_ipfw.h"
3727108a15SDag-Erling Smørgrav #include "opt_ipstealth.h"
386a800098SYoshinobu Inoue #include "opt_ipsec.h"
3933553d6eSBjoern A. Zeeb #include "opt_route.h"
40b8bc95cdSAdrian Chadd #include "opt_rss.h"
4174a9466cSGary Palmer 
42df8bae1dSRodney W. Grimes #include <sys/param.h>
43df8bae1dSRodney W. Grimes #include <sys/systm.h>
44ef91a976SAndrey V. Elsukov #include <sys/hhook.h>
45df8bae1dSRodney W. Grimes #include <sys/mbuf.h>
46b715f178SLuigi Rizzo #include <sys/malloc.h>
47df8bae1dSRodney W. Grimes #include <sys/domain.h>
48df8bae1dSRodney W. Grimes #include <sys/protosw.h>
49df8bae1dSRodney W. Grimes #include <sys/socket.h>
50df8bae1dSRodney W. Grimes #include <sys/time.h>
51df8bae1dSRodney W. Grimes #include <sys/kernel.h>
52385195c0SMarko Zec #include <sys/lock.h>
53cc0a3c8cSAndrey V. Elsukov #include <sys/rmlock.h>
54385195c0SMarko Zec #include <sys/rwlock.h>
5557f60867SMark Johnston #include <sys/sdt.h>
561025071fSGarrett Wollman #include <sys/syslog.h>
57b5e8ce9fSBruce Evans #include <sys/sysctl.h>
58df8bae1dSRodney W. Grimes 
59c85540ddSAndrey A. Chernov #include <net/pfil.h>
60df8bae1dSRodney W. Grimes #include <net/if.h>
619494d596SBrooks Davis #include <net/if_types.h>
62d314ad7bSJulian Elischer #include <net/if_var.h>
6382c23ebaSBill Fenner #include <net/if_dl.h>
64df8bae1dSRodney W. Grimes #include <net/route.h>
65748e0b0aSGarrett Wollman #include <net/netisr.h>
66b2bdc62aSAdrian Chadd #include <net/rss_config.h>
674b79449eSBjoern A. Zeeb #include <net/vnet.h>
68df8bae1dSRodney W. Grimes 
69df8bae1dSRodney W. Grimes #include <netinet/in.h>
7057f60867SMark Johnston #include <netinet/in_kdtrace.h>
71df8bae1dSRodney W. Grimes #include <netinet/in_systm.h>
72b5e8ce9fSBruce Evans #include <netinet/in_var.h>
73df8bae1dSRodney W. Grimes #include <netinet/ip.h>
74df8bae1dSRodney W. Grimes #include <netinet/in_pcb.h>
75df8bae1dSRodney W. Grimes #include <netinet/ip_var.h>
76eddfbb76SRobert Watson #include <netinet/ip_fw.h>
77df8bae1dSRodney W. Grimes #include <netinet/ip_icmp.h>
78ef39adf0SAndre Oppermann #include <netinet/ip_options.h>
7958938916SGarrett Wollman #include <machine/in_cksum.h>
80a9771948SGleb Smirnoff #include <netinet/ip_carp.h>
81b2630c29SGeorge V. Neville-Neil #ifdef IPSEC
821dfcf0d2SAndre Oppermann #include <netinet/ip_ipsec.h>
8333872124SGeorge V. Neville-Neil #include <netipsec/ipsec.h>
8433872124SGeorge V. Neville-Neil #include <netipsec/key.h>
85b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */
86b8bc95cdSAdrian Chadd #include <netinet/in_rss.h>
87df8bae1dSRodney W. Grimes 
88f0068c4aSGarrett Wollman #include <sys/socketvar.h>
896ddbf1e2SGary Palmer 
90aed55708SRobert Watson #include <security/mac/mac_framework.h>
91aed55708SRobert Watson 
92d2035ffbSEd Maste #ifdef CTASSERT
93d2035ffbSEd Maste CTASSERT(sizeof(struct ip) == 20);
94d2035ffbSEd Maste #endif
95d2035ffbSEd Maste 
961dbefcc0SGleb Smirnoff /* IP reassembly functions are defined in ip_reass.c. */
97843b0e57SXin LI extern void ipreass_init(void);
98843b0e57SXin LI extern void ipreass_drain(void);
99843b0e57SXin LI extern void ipreass_slowtimo(void);
1001dbefcc0SGleb Smirnoff #ifdef VIMAGE
101843b0e57SXin LI extern void ipreass_destroy(void);
1021dbefcc0SGleb Smirnoff #endif
1031dbefcc0SGleb Smirnoff 
104cc0a3c8cSAndrey V. Elsukov struct rmlock in_ifaddr_lock;
105cc0a3c8cSAndrey V. Elsukov RM_SYSINIT(in_ifaddr_lock, &in_ifaddr_lock, "in_ifaddr_lock");
106f0068c4aSGarrett Wollman 
10782cea7e6SBjoern A. Zeeb VNET_DEFINE(int, rsvp_on);
10882cea7e6SBjoern A. Zeeb 
10982cea7e6SBjoern A. Zeeb VNET_DEFINE(int, ipforwarding);
1106df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_VNET | CTLFLAG_RW,
111eddfbb76SRobert Watson     &VNET_NAME(ipforwarding), 0,
1128b615593SMarko Zec     "Enable IP forwarding between interfaces");
1130312fbe9SPoul-Henning Kamp 
1143e288e62SDimitry Andric static VNET_DEFINE(int, ipsendredirects) = 1;	/* XXX */
11582cea7e6SBjoern A. Zeeb #define	V_ipsendredirects	VNET(ipsendredirects)
1166df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_VNET | CTLFLAG_RW,
117eddfbb76SRobert Watson     &VNET_NAME(ipsendredirects), 0,
1188b615593SMarko Zec     "Enable sending IP redirects");
1190312fbe9SPoul-Henning Kamp 
120823db0e9SDon Lewis /*
121823db0e9SDon Lewis  * XXX - Setting ip_checkinterface mostly implements the receive side of
122823db0e9SDon Lewis  * the Strong ES model described in RFC 1122, but since the routing table
123a8f12100SDon Lewis  * and transmit implementation do not implement the Strong ES model,
124823db0e9SDon Lewis  * setting this to 1 results in an odd hybrid.
1253f67c834SDon Lewis  *
126a8f12100SDon Lewis  * XXX - ip_checkinterface currently must be disabled if you use ipnat
127a8f12100SDon Lewis  * to translate the destination address to another local interface.
1283f67c834SDon Lewis  *
1293f67c834SDon Lewis  * XXX - ip_checkinterface must be disabled if you add IP aliases
1303f67c834SDon Lewis  * to the loopback interface instead of the interface where the
1313f67c834SDon Lewis  * packets for those addresses are received.
132823db0e9SDon Lewis  */
1333e288e62SDimitry Andric static VNET_DEFINE(int, ip_checkinterface);
13482cea7e6SBjoern A. Zeeb #define	V_ip_checkinterface	VNET(ip_checkinterface)
1356df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_VNET | CTLFLAG_RW,
136eddfbb76SRobert Watson     &VNET_NAME(ip_checkinterface), 0,
1378b615593SMarko Zec     "Verify packet arrives on correct interface");
138b3e95d4eSJonathan Lemon 
1390b4b0b0fSJulian Elischer VNET_DEFINE(struct pfil_head, inet_pfil_hook);	/* Packet filter hooks */
140df8bae1dSRodney W. Grimes 
141d4b5cae4SRobert Watson static struct netisr_handler ip_nh = {
142d4b5cae4SRobert Watson 	.nh_name = "ip",
143d4b5cae4SRobert Watson 	.nh_handler = ip_input,
144d4b5cae4SRobert Watson 	.nh_proto = NETISR_IP,
145b8bc95cdSAdrian Chadd #ifdef	RSS
1462527ccadSAdrian Chadd 	.nh_m2cpuid = rss_soft_m2cpuid_v4,
147b8bc95cdSAdrian Chadd 	.nh_policy = NETISR_POLICY_CPU,
148b8bc95cdSAdrian Chadd 	.nh_dispatch = NETISR_DISPATCH_HYBRID,
149b8bc95cdSAdrian Chadd #else
150d4b5cae4SRobert Watson 	.nh_policy = NETISR_POLICY_FLOW,
151b8bc95cdSAdrian Chadd #endif
152d4b5cae4SRobert Watson };
153ca925d9cSJonathan Lemon 
154b8bc95cdSAdrian Chadd #ifdef	RSS
155b8bc95cdSAdrian Chadd /*
156b8bc95cdSAdrian Chadd  * Directly dispatched frames are currently assumed
157b8bc95cdSAdrian Chadd  * to have a flowid already calculated.
158b8bc95cdSAdrian Chadd  *
159b8bc95cdSAdrian Chadd  * It should likely have something that assert it
160b8bc95cdSAdrian Chadd  * actually has valid flow details.
161b8bc95cdSAdrian Chadd  */
162b8bc95cdSAdrian Chadd static struct netisr_handler ip_direct_nh = {
163b8bc95cdSAdrian Chadd 	.nh_name = "ip_direct",
164b8bc95cdSAdrian Chadd 	.nh_handler = ip_direct_input,
165b8bc95cdSAdrian Chadd 	.nh_proto = NETISR_IP_DIRECT,
166499baf0aSAdrian Chadd 	.nh_m2cpuid = rss_soft_m2cpuid_v4,
167b8bc95cdSAdrian Chadd 	.nh_policy = NETISR_POLICY_CPU,
168b8bc95cdSAdrian Chadd 	.nh_dispatch = NETISR_DISPATCH_HYBRID,
169b8bc95cdSAdrian Chadd };
170b8bc95cdSAdrian Chadd #endif
171b8bc95cdSAdrian Chadd 
172df8bae1dSRodney W. Grimes extern	struct domain inetdomain;
173f0ffb944SJulian Elischer extern	struct protosw inetsw[];
174df8bae1dSRodney W. Grimes u_char	ip_protox[IPPROTO_MAX];
17582cea7e6SBjoern A. Zeeb VNET_DEFINE(struct in_ifaddrhead, in_ifaddrhead);  /* first inet address */
17682cea7e6SBjoern A. Zeeb VNET_DEFINE(struct in_ifaddrhashhead *, in_ifaddrhashtbl); /* inet addr hash table  */
17782cea7e6SBjoern A. Zeeb VNET_DEFINE(u_long, in_ifaddrhmask);		/* mask for hash table */
178ca925d9cSJonathan Lemon 
1790312fbe9SPoul-Henning Kamp #ifdef IPCTL_DEFMTU
1800312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW,
1813d177f46SBill Fumerola     &ip_mtu, 0, "Default MTU");
1820312fbe9SPoul-Henning Kamp #endif
1830312fbe9SPoul-Henning Kamp 
1841b968362SDag-Erling Smørgrav #ifdef IPSTEALTH
18582cea7e6SBjoern A. Zeeb VNET_DEFINE(int, ipstealth);
1866df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_VNET | CTLFLAG_RW,
187eddfbb76SRobert Watson     &VNET_NAME(ipstealth), 0,
188eddfbb76SRobert Watson     "IP stealth mode, no TTL decrementation on forwarding");
1891b968362SDag-Erling Smørgrav #endif
190eddfbb76SRobert Watson 
191315e3e38SRobert Watson /*
1925da0521fSAndrey V. Elsukov  * IP statistics are stored in the "array" of counter(9)s.
1935923c293SGleb Smirnoff  */
1945da0521fSAndrey V. Elsukov VNET_PCPUSTAT_DEFINE(struct ipstat, ipstat);
1955da0521fSAndrey V. Elsukov VNET_PCPUSTAT_SYSINIT(ipstat);
1965da0521fSAndrey V. Elsukov SYSCTL_VNET_PCPUSTAT(_net_inet_ip, IPCTL_STATS, stats, struct ipstat, ipstat,
1975da0521fSAndrey V. Elsukov     "IP statistics (struct ipstat, netinet/ip_var.h)");
1985923c293SGleb Smirnoff 
1995923c293SGleb Smirnoff #ifdef VIMAGE
2005da0521fSAndrey V. Elsukov VNET_PCPUSTAT_SYSUNINIT(ipstat);
2015923c293SGleb Smirnoff #endif /* VIMAGE */
2025923c293SGleb Smirnoff 
2035923c293SGleb Smirnoff /*
204315e3e38SRobert Watson  * Kernel module interface for updating ipstat.  The argument is an index
2055923c293SGleb Smirnoff  * into ipstat treated as an array.
206315e3e38SRobert Watson  */
207315e3e38SRobert Watson void
208315e3e38SRobert Watson kmod_ipstat_inc(int statnum)
209315e3e38SRobert Watson {
210315e3e38SRobert Watson 
2115da0521fSAndrey V. Elsukov 	counter_u64_add(VNET(ipstat)[statnum], 1);
212315e3e38SRobert Watson }
213315e3e38SRobert Watson 
214315e3e38SRobert Watson void
215315e3e38SRobert Watson kmod_ipstat_dec(int statnum)
216315e3e38SRobert Watson {
217315e3e38SRobert Watson 
2185da0521fSAndrey V. Elsukov 	counter_u64_add(VNET(ipstat)[statnum], -1);
219315e3e38SRobert Watson }
220315e3e38SRobert Watson 
221d4b5cae4SRobert Watson static int
222d4b5cae4SRobert Watson sysctl_netinet_intr_queue_maxlen(SYSCTL_HANDLER_ARGS)
223d4b5cae4SRobert Watson {
224d4b5cae4SRobert Watson 	int error, qlimit;
225d4b5cae4SRobert Watson 
226d4b5cae4SRobert Watson 	netisr_getqlimit(&ip_nh, &qlimit);
227d4b5cae4SRobert Watson 	error = sysctl_handle_int(oidp, &qlimit, 0, req);
228d4b5cae4SRobert Watson 	if (error || !req->newptr)
229d4b5cae4SRobert Watson 		return (error);
230d4b5cae4SRobert Watson 	if (qlimit < 1)
231d4b5cae4SRobert Watson 		return (EINVAL);
232d4b5cae4SRobert Watson 	return (netisr_setqlimit(&ip_nh, qlimit));
233d4b5cae4SRobert Watson }
234d4b5cae4SRobert Watson SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen,
235d4b5cae4SRobert Watson     CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet_intr_queue_maxlen, "I",
236d4b5cae4SRobert Watson     "Maximum size of the IP input queue");
237d4b5cae4SRobert Watson 
238d4b5cae4SRobert Watson static int
239d4b5cae4SRobert Watson sysctl_netinet_intr_queue_drops(SYSCTL_HANDLER_ARGS)
240d4b5cae4SRobert Watson {
241d4b5cae4SRobert Watson 	u_int64_t qdrops_long;
242d4b5cae4SRobert Watson 	int error, qdrops;
243d4b5cae4SRobert Watson 
244d4b5cae4SRobert Watson 	netisr_getqdrops(&ip_nh, &qdrops_long);
245d4b5cae4SRobert Watson 	qdrops = qdrops_long;
246d4b5cae4SRobert Watson 	error = sysctl_handle_int(oidp, &qdrops, 0, req);
247d4b5cae4SRobert Watson 	if (error || !req->newptr)
248d4b5cae4SRobert Watson 		return (error);
249d4b5cae4SRobert Watson 	if (qdrops != 0)
250d4b5cae4SRobert Watson 		return (EINVAL);
251d4b5cae4SRobert Watson 	netisr_clearqdrops(&ip_nh);
252d4b5cae4SRobert Watson 	return (0);
253d4b5cae4SRobert Watson }
254d4b5cae4SRobert Watson 
255d4b5cae4SRobert Watson SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops,
256d4b5cae4SRobert Watson     CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_netinet_intr_queue_drops, "I",
257d4b5cae4SRobert Watson     "Number of packets dropped from the IP input queue");
258d4b5cae4SRobert Watson 
259b8bc95cdSAdrian Chadd #ifdef	RSS
260b8bc95cdSAdrian Chadd static int
261b8bc95cdSAdrian Chadd sysctl_netinet_intr_direct_queue_maxlen(SYSCTL_HANDLER_ARGS)
262b8bc95cdSAdrian Chadd {
263b8bc95cdSAdrian Chadd 	int error, qlimit;
264b8bc95cdSAdrian Chadd 
265b8bc95cdSAdrian Chadd 	netisr_getqlimit(&ip_direct_nh, &qlimit);
266b8bc95cdSAdrian Chadd 	error = sysctl_handle_int(oidp, &qlimit, 0, req);
267b8bc95cdSAdrian Chadd 	if (error || !req->newptr)
268b8bc95cdSAdrian Chadd 		return (error);
269b8bc95cdSAdrian Chadd 	if (qlimit < 1)
270b8bc95cdSAdrian Chadd 		return (EINVAL);
271b8bc95cdSAdrian Chadd 	return (netisr_setqlimit(&ip_direct_nh, qlimit));
272b8bc95cdSAdrian Chadd }
273b8bc95cdSAdrian Chadd SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_direct_queue_maxlen,
274b8bc95cdSAdrian Chadd     CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet_intr_direct_queue_maxlen, "I",
275b8bc95cdSAdrian Chadd     "Maximum size of the IP direct input queue");
276b8bc95cdSAdrian Chadd 
277b8bc95cdSAdrian Chadd static int
278b8bc95cdSAdrian Chadd sysctl_netinet_intr_direct_queue_drops(SYSCTL_HANDLER_ARGS)
279b8bc95cdSAdrian Chadd {
280b8bc95cdSAdrian Chadd 	u_int64_t qdrops_long;
281b8bc95cdSAdrian Chadd 	int error, qdrops;
282b8bc95cdSAdrian Chadd 
283b8bc95cdSAdrian Chadd 	netisr_getqdrops(&ip_direct_nh, &qdrops_long);
284b8bc95cdSAdrian Chadd 	qdrops = qdrops_long;
285b8bc95cdSAdrian Chadd 	error = sysctl_handle_int(oidp, &qdrops, 0, req);
286b8bc95cdSAdrian Chadd 	if (error || !req->newptr)
287b8bc95cdSAdrian Chadd 		return (error);
288b8bc95cdSAdrian Chadd 	if (qdrops != 0)
289b8bc95cdSAdrian Chadd 		return (EINVAL);
290b8bc95cdSAdrian Chadd 	netisr_clearqdrops(&ip_direct_nh);
291b8bc95cdSAdrian Chadd 	return (0);
292b8bc95cdSAdrian Chadd }
293b8bc95cdSAdrian Chadd 
294b8bc95cdSAdrian Chadd SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_direct_queue_drops,
295b8bc95cdSAdrian Chadd     CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_netinet_intr_direct_queue_drops, "I",
296b8bc95cdSAdrian Chadd     "Number of packets dropped from the IP direct input queue");
297b8bc95cdSAdrian Chadd #endif	/* RSS */
298b8bc95cdSAdrian Chadd 
299df8bae1dSRodney W. Grimes /*
300df8bae1dSRodney W. Grimes  * IP initialization: fill in IP protocol switch table.
301df8bae1dSRodney W. Grimes  * All protocols not implemented in kernel go to raw IP protocol handler.
302df8bae1dSRodney W. Grimes  */
303df8bae1dSRodney W. Grimes void
304f2565d68SRobert Watson ip_init(void)
305df8bae1dSRodney W. Grimes {
306f2565d68SRobert Watson 	struct protosw *pr;
307f2565d68SRobert Watson 	int i;
308df8bae1dSRodney W. Grimes 
309603724d3SBjoern A. Zeeb 	TAILQ_INIT(&V_in_ifaddrhead);
310603724d3SBjoern A. Zeeb 	V_in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &V_in_ifaddrhmask);
3111ed81b73SMarko Zec 
3121ed81b73SMarko Zec 	/* Initialize IP reassembly queue. */
3131dbefcc0SGleb Smirnoff 	ipreass_init();
3141ed81b73SMarko Zec 
3150b4b0b0fSJulian Elischer 	/* Initialize packet filter hooks. */
3160b4b0b0fSJulian Elischer 	V_inet_pfil_hook.ph_type = PFIL_TYPE_AF;
3170b4b0b0fSJulian Elischer 	V_inet_pfil_hook.ph_af = AF_INET;
3180b4b0b0fSJulian Elischer 	if ((i = pfil_head_register(&V_inet_pfil_hook)) != 0)
3190b4b0b0fSJulian Elischer 		printf("%s: WARNING: unable to register pfil hook, "
3200b4b0b0fSJulian Elischer 			"error %d\n", __func__, i);
3210b4b0b0fSJulian Elischer 
322ef91a976SAndrey V. Elsukov 	if (hhook_head_register(HHOOK_TYPE_IPSEC_IN, AF_INET,
323ef91a976SAndrey V. Elsukov 	    &V_ipsec_hhh_in[HHOOK_IPSEC_INET],
324ef91a976SAndrey V. Elsukov 	    HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0)
325ef91a976SAndrey V. Elsukov 		printf("%s: WARNING: unable to register input helper hook\n",
326ef91a976SAndrey V. Elsukov 		    __func__);
327ef91a976SAndrey V. Elsukov 	if (hhook_head_register(HHOOK_TYPE_IPSEC_OUT, AF_INET,
328ef91a976SAndrey V. Elsukov 	    &V_ipsec_hhh_out[HHOOK_IPSEC_INET],
329ef91a976SAndrey V. Elsukov 	    HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0)
330ef91a976SAndrey V. Elsukov 		printf("%s: WARNING: unable to register output helper hook\n",
331ef91a976SAndrey V. Elsukov 		    __func__);
332ef91a976SAndrey V. Elsukov 
3331ed81b73SMarko Zec 	/* Skip initialization of globals for non-default instances. */
334*484149deSBjoern A. Zeeb #ifdef VIMAGE
335*484149deSBjoern A. Zeeb 	if (!IS_DEFAULT_VNET(curvnet)) {
336*484149deSBjoern A. Zeeb 		netisr_register_vnet(&ip_nh);
337*484149deSBjoern A. Zeeb #ifdef	RSS
338*484149deSBjoern A. Zeeb 		netisr_register_vnet(&ip_direct_nh);
339*484149deSBjoern A. Zeeb #endif
3401ed81b73SMarko Zec 		return;
341*484149deSBjoern A. Zeeb 	}
342*484149deSBjoern A. Zeeb #endif
3431ed81b73SMarko Zec 
344f0ffb944SJulian Elischer 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
34502410549SRobert Watson 	if (pr == NULL)
346db09bef3SAndre Oppermann 		panic("ip_init: PF_INET not found");
347db09bef3SAndre Oppermann 
348db09bef3SAndre Oppermann 	/* Initialize the entire ip_protox[] array to IPPROTO_RAW. */
349df8bae1dSRodney W. Grimes 	for (i = 0; i < IPPROTO_MAX; i++)
350df8bae1dSRodney W. Grimes 		ip_protox[i] = pr - inetsw;
351db09bef3SAndre Oppermann 	/*
352db09bef3SAndre Oppermann 	 * Cycle through IP protocols and put them into the appropriate place
353db09bef3SAndre Oppermann 	 * in ip_protox[].
354db09bef3SAndre Oppermann 	 */
355f0ffb944SJulian Elischer 	for (pr = inetdomain.dom_protosw;
356f0ffb944SJulian Elischer 	    pr < inetdomain.dom_protoswNPROTOSW; pr++)
357df8bae1dSRodney W. Grimes 		if (pr->pr_domain->dom_family == PF_INET &&
358db09bef3SAndre Oppermann 		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) {
359db09bef3SAndre Oppermann 			/* Be careful to only index valid IP protocols. */
360db77984cSSam Leffler 			if (pr->pr_protocol < IPPROTO_MAX)
361df8bae1dSRodney W. Grimes 				ip_protox[pr->pr_protocol] = pr - inetsw;
362db09bef3SAndre Oppermann 		}
363194a213eSAndrey A. Chernov 
364d4b5cae4SRobert Watson 	netisr_register(&ip_nh);
365b8bc95cdSAdrian Chadd #ifdef	RSS
366b8bc95cdSAdrian Chadd 	netisr_register(&ip_direct_nh);
367b8bc95cdSAdrian Chadd #endif
368df8bae1dSRodney W. Grimes }
369df8bae1dSRodney W. Grimes 
3709802380eSBjoern A. Zeeb #ifdef VIMAGE
3713f58662dSBjoern A. Zeeb static void
3723f58662dSBjoern A. Zeeb ip_destroy(void *unused __unused)
3739802380eSBjoern A. Zeeb {
374ef91a976SAndrey V. Elsukov 	int error;
3754d3dfd45SMikolaj Golub 
376*484149deSBjoern A. Zeeb #ifdef	RSS
377*484149deSBjoern A. Zeeb 	netisr_unregister_vnet(&ip_direct_nh);
378*484149deSBjoern A. Zeeb #endif
379*484149deSBjoern A. Zeeb 	netisr_unregister_vnet(&ip_nh);
380*484149deSBjoern A. Zeeb 
381ef91a976SAndrey V. Elsukov 	if ((error = pfil_head_unregister(&V_inet_pfil_hook)) != 0)
3824d3dfd45SMikolaj Golub 		printf("%s: WARNING: unable to unregister pfil hook, "
383ef91a976SAndrey V. Elsukov 		    "error %d\n", __func__, error);
3849802380eSBjoern A. Zeeb 
385ef91a976SAndrey V. Elsukov 	error = hhook_head_deregister(V_ipsec_hhh_in[HHOOK_IPSEC_INET]);
386ef91a976SAndrey V. Elsukov 	if (error != 0) {
387ef91a976SAndrey V. Elsukov 		printf("%s: WARNING: unable to deregister input helper hook "
388ef91a976SAndrey V. Elsukov 		    "type HHOOK_TYPE_IPSEC_IN, id HHOOK_IPSEC_INET: "
389ef91a976SAndrey V. Elsukov 		    "error %d returned\n", __func__, error);
390ef91a976SAndrey V. Elsukov 	}
391ef91a976SAndrey V. Elsukov 	error = hhook_head_deregister(V_ipsec_hhh_out[HHOOK_IPSEC_INET]);
392ef91a976SAndrey V. Elsukov 	if (error != 0) {
393ef91a976SAndrey V. Elsukov 		printf("%s: WARNING: unable to deregister output helper hook "
394ef91a976SAndrey V. Elsukov 		    "type HHOOK_TYPE_IPSEC_OUT, id HHOOK_IPSEC_INET: "
395ef91a976SAndrey V. Elsukov 		    "error %d returned\n", __func__, error);
396ef91a976SAndrey V. Elsukov 	}
3979802380eSBjoern A. Zeeb 	/* Cleanup in_ifaddr hash table; should be empty. */
3989802380eSBjoern A. Zeeb 	hashdestroy(V_in_ifaddrhashtbl, M_IFADDR, V_in_ifaddrhmask);
3999802380eSBjoern A. Zeeb 
400e3c2c634SGleb Smirnoff 	/* Destroy IP reassembly queue. */
4011dbefcc0SGleb Smirnoff 	ipreass_destroy();
4029802380eSBjoern A. Zeeb }
4033f58662dSBjoern A. Zeeb 
4043f58662dSBjoern A. Zeeb VNET_SYSUNINIT(ip, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, ip_destroy, NULL);
4059802380eSBjoern A. Zeeb #endif
4069802380eSBjoern A. Zeeb 
407b8bc95cdSAdrian Chadd #ifdef	RSS
408b8bc95cdSAdrian Chadd /*
409b8bc95cdSAdrian Chadd  * IP direct input routine.
410b8bc95cdSAdrian Chadd  *
411b8bc95cdSAdrian Chadd  * This is called when reinjecting completed fragments where
412b8bc95cdSAdrian Chadd  * all of the previous checking and book-keeping has been done.
413b8bc95cdSAdrian Chadd  */
414b8bc95cdSAdrian Chadd void
415b8bc95cdSAdrian Chadd ip_direct_input(struct mbuf *m)
416b8bc95cdSAdrian Chadd {
417b8bc95cdSAdrian Chadd 	struct ip *ip;
418b8bc95cdSAdrian Chadd 	int hlen;
419b8bc95cdSAdrian Chadd 
420b8bc95cdSAdrian Chadd 	ip = mtod(m, struct ip *);
421b8bc95cdSAdrian Chadd 	hlen = ip->ip_hl << 2;
422b8bc95cdSAdrian Chadd 
423b8bc95cdSAdrian Chadd 	IPSTAT_INC(ips_delivered);
424b8bc95cdSAdrian Chadd 	(*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p);
425b8bc95cdSAdrian Chadd 	return;
426b8bc95cdSAdrian Chadd }
427b8bc95cdSAdrian Chadd #endif
428b8bc95cdSAdrian Chadd 
4294d2e3692SLuigi Rizzo /*
430df8bae1dSRodney W. Grimes  * Ip input routine.  Checksum and byte swap header.  If fragmented
431df8bae1dSRodney W. Grimes  * try to reassemble.  Process options.  Pass to next level.
432df8bae1dSRodney W. Grimes  */
433c67b1d17SGarrett Wollman void
434c67b1d17SGarrett Wollman ip_input(struct mbuf *m)
435df8bae1dSRodney W. Grimes {
4369188b4a1SAndre Oppermann 	struct ip *ip = NULL;
4375da9f8faSJosef Karthauser 	struct in_ifaddr *ia = NULL;
438ca925d9cSJonathan Lemon 	struct ifaddr *ifa;
4390aade26eSRobert Watson 	struct ifnet *ifp;
4409b932e9eSAndre Oppermann 	int    checkif, hlen = 0;
44121d172a3SGleb Smirnoff 	uint16_t sum, ip_len;
44202c1c707SAndre Oppermann 	int dchg = 0;				/* dest changed after fw */
443f51f805fSSam Leffler 	struct in_addr odst;			/* original dst address */
444b715f178SLuigi Rizzo 
445fe584538SDag-Erling Smørgrav 	M_ASSERTPKTHDR(m);
446db40007dSAndrew R. Reiter 
447ac9d7e26SMax Laier 	if (m->m_flags & M_FASTFWD_OURS) {
44876ff6dcfSAndre Oppermann 		m->m_flags &= ~M_FASTFWD_OURS;
44976ff6dcfSAndre Oppermann 		/* Set up some basics that will be used later. */
4502b25acc1SLuigi Rizzo 		ip = mtod(m, struct ip *);
45153be11f6SPoul-Henning Kamp 		hlen = ip->ip_hl << 2;
4528f134647SGleb Smirnoff 		ip_len = ntohs(ip->ip_len);
4539b932e9eSAndre Oppermann 		goto ours;
4542b25acc1SLuigi Rizzo 	}
4552b25acc1SLuigi Rizzo 
45686425c62SRobert Watson 	IPSTAT_INC(ips_total);
45758938916SGarrett Wollman 
45858938916SGarrett Wollman 	if (m->m_pkthdr.len < sizeof(struct ip))
45958938916SGarrett Wollman 		goto tooshort;
46058938916SGarrett Wollman 
461df8bae1dSRodney W. Grimes 	if (m->m_len < sizeof (struct ip) &&
4620b17fba7SAndre Oppermann 	    (m = m_pullup(m, sizeof (struct ip))) == NULL) {
46386425c62SRobert Watson 		IPSTAT_INC(ips_toosmall);
464c67b1d17SGarrett Wollman 		return;
465df8bae1dSRodney W. Grimes 	}
466df8bae1dSRodney W. Grimes 	ip = mtod(m, struct ip *);
46758938916SGarrett Wollman 
46853be11f6SPoul-Henning Kamp 	if (ip->ip_v != IPVERSION) {
46986425c62SRobert Watson 		IPSTAT_INC(ips_badvers);
470df8bae1dSRodney W. Grimes 		goto bad;
471df8bae1dSRodney W. Grimes 	}
47258938916SGarrett Wollman 
47353be11f6SPoul-Henning Kamp 	hlen = ip->ip_hl << 2;
474df8bae1dSRodney W. Grimes 	if (hlen < sizeof(struct ip)) {	/* minimum header length */
47586425c62SRobert Watson 		IPSTAT_INC(ips_badhlen);
476df8bae1dSRodney W. Grimes 		goto bad;
477df8bae1dSRodney W. Grimes 	}
478df8bae1dSRodney W. Grimes 	if (hlen > m->m_len) {
4790b17fba7SAndre Oppermann 		if ((m = m_pullup(m, hlen)) == NULL) {
48086425c62SRobert Watson 			IPSTAT_INC(ips_badhlen);
481c67b1d17SGarrett Wollman 			return;
482df8bae1dSRodney W. Grimes 		}
483df8bae1dSRodney W. Grimes 		ip = mtod(m, struct ip *);
484df8bae1dSRodney W. Grimes 	}
48533841545SHajimu UMEMOTO 
48657f60867SMark Johnston 	IP_PROBE(receive, NULL, NULL, ip, m->m_pkthdr.rcvif, ip, NULL);
48757f60867SMark Johnston 
48833841545SHajimu UMEMOTO 	/* 127/8 must not appear on wire - RFC1122 */
4890aade26eSRobert Watson 	ifp = m->m_pkthdr.rcvif;
49033841545SHajimu UMEMOTO 	if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
49133841545SHajimu UMEMOTO 	    (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
4920aade26eSRobert Watson 		if ((ifp->if_flags & IFF_LOOPBACK) == 0) {
49386425c62SRobert Watson 			IPSTAT_INC(ips_badaddr);
49433841545SHajimu UMEMOTO 			goto bad;
49533841545SHajimu UMEMOTO 		}
49633841545SHajimu UMEMOTO 	}
49733841545SHajimu UMEMOTO 
498db4f9cc7SJonathan Lemon 	if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
499db4f9cc7SJonathan Lemon 		sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
500db4f9cc7SJonathan Lemon 	} else {
50158938916SGarrett Wollman 		if (hlen == sizeof(struct ip)) {
50247c861ecSBrian Somers 			sum = in_cksum_hdr(ip);
50358938916SGarrett Wollman 		} else {
50447c861ecSBrian Somers 			sum = in_cksum(m, hlen);
50558938916SGarrett Wollman 		}
506db4f9cc7SJonathan Lemon 	}
50747c861ecSBrian Somers 	if (sum) {
50886425c62SRobert Watson 		IPSTAT_INC(ips_badsum);
509df8bae1dSRodney W. Grimes 		goto bad;
510df8bae1dSRodney W. Grimes 	}
511df8bae1dSRodney W. Grimes 
51202b199f1SMax Laier #ifdef ALTQ
51302b199f1SMax Laier 	if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0)
51402b199f1SMax Laier 		/* packet is dropped by traffic conditioner */
51502b199f1SMax Laier 		return;
51602b199f1SMax Laier #endif
51702b199f1SMax Laier 
51821d172a3SGleb Smirnoff 	ip_len = ntohs(ip->ip_len);
51921d172a3SGleb Smirnoff 	if (ip_len < hlen) {
52086425c62SRobert Watson 		IPSTAT_INC(ips_badlen);
521df8bae1dSRodney W. Grimes 		goto bad;
522df8bae1dSRodney W. Grimes 	}
523df8bae1dSRodney W. Grimes 
524df8bae1dSRodney W. Grimes 	/*
525df8bae1dSRodney W. Grimes 	 * Check that the amount of data in the buffers
526df8bae1dSRodney W. Grimes 	 * is as at least much as the IP header would have us expect.
527df8bae1dSRodney W. Grimes 	 * Trim mbufs if longer than we expect.
528df8bae1dSRodney W. Grimes 	 * Drop packet if shorter than we expect.
529df8bae1dSRodney W. Grimes 	 */
53021d172a3SGleb Smirnoff 	if (m->m_pkthdr.len < ip_len) {
53158938916SGarrett Wollman tooshort:
53286425c62SRobert Watson 		IPSTAT_INC(ips_tooshort);
533df8bae1dSRodney W. Grimes 		goto bad;
534df8bae1dSRodney W. Grimes 	}
53521d172a3SGleb Smirnoff 	if (m->m_pkthdr.len > ip_len) {
536df8bae1dSRodney W. Grimes 		if (m->m_len == m->m_pkthdr.len) {
53721d172a3SGleb Smirnoff 			m->m_len = ip_len;
53821d172a3SGleb Smirnoff 			m->m_pkthdr.len = ip_len;
539df8bae1dSRodney W. Grimes 		} else
54021d172a3SGleb Smirnoff 			m_adj(m, ip_len - m->m_pkthdr.len);
541df8bae1dSRodney W. Grimes 	}
542b8bc95cdSAdrian Chadd 
54333872124SGeorge V. Neville-Neil 	/* Try to forward the packet, but if we fail continue */
544b2630c29SGeorge V. Neville-Neil #ifdef IPSEC
54533872124SGeorge V. Neville-Neil 	/* For now we do not handle IPSEC in tryforward. */
54633872124SGeorge V. Neville-Neil 	if (!key_havesp(IPSEC_DIR_INBOUND) && !key_havesp(IPSEC_DIR_OUTBOUND) &&
54733872124SGeorge V. Neville-Neil 	    (V_ipforwarding == 1))
54833872124SGeorge V. Neville-Neil 		if (ip_tryforward(m) == NULL)
54933872124SGeorge V. Neville-Neil 			return;
55014dd6717SSam Leffler 	/*
551ffe8cd7bSBjoern A. Zeeb 	 * Bypass packet filtering for packets previously handled by IPsec.
55214dd6717SSam Leffler 	 */
553cc977adcSBjoern A. Zeeb 	if (ip_ipsec_filtertunnel(m))
554c21fd232SAndre Oppermann 		goto passin;
55533872124SGeorge V. Neville-Neil #else
55633872124SGeorge V. Neville-Neil 	if (V_ipforwarding == 1)
55733872124SGeorge V. Neville-Neil 		if (ip_tryforward(m) == NULL)
55833872124SGeorge V. Neville-Neil 			return;
559b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */
5603f67c834SDon Lewis 
561c4ac87eaSDarren Reed 	/*
562134ea224SSam Leffler 	 * Run through list of hooks for input packets.
563f51f805fSSam Leffler 	 *
564f51f805fSSam Leffler 	 * NB: Beware of the destination address changing (e.g.
565f51f805fSSam Leffler 	 *     by NAT rewriting).  When this happens, tell
566f51f805fSSam Leffler 	 *     ip_forward to do the right thing.
567c4ac87eaSDarren Reed 	 */
568c21fd232SAndre Oppermann 
569c21fd232SAndre Oppermann 	/* Jump over all PFIL processing if hooks are not active. */
5700b4b0b0fSJulian Elischer 	if (!PFIL_HOOKED(&V_inet_pfil_hook))
571c21fd232SAndre Oppermann 		goto passin;
572c21fd232SAndre Oppermann 
573f51f805fSSam Leffler 	odst = ip->ip_dst;
5740b4b0b0fSJulian Elischer 	if (pfil_run_hooks(&V_inet_pfil_hook, &m, ifp, PFIL_IN, NULL) != 0)
575beec8214SDarren Reed 		return;
576134ea224SSam Leffler 	if (m == NULL)			/* consumed by filter */
577c4ac87eaSDarren Reed 		return;
5789b932e9eSAndre Oppermann 
579c4ac87eaSDarren Reed 	ip = mtod(m, struct ip *);
58002c1c707SAndre Oppermann 	dchg = (odst.s_addr != ip->ip_dst.s_addr);
5810aade26eSRobert Watson 	ifp = m->m_pkthdr.rcvif;
5829b932e9eSAndre Oppermann 
5839b932e9eSAndre Oppermann 	if (m->m_flags & M_FASTFWD_OURS) {
5849b932e9eSAndre Oppermann 		m->m_flags &= ~M_FASTFWD_OURS;
5859b932e9eSAndre Oppermann 		goto ours;
5869b932e9eSAndre Oppermann 	}
587ffdbf9daSAndrey V. Elsukov 	if (m->m_flags & M_IP_NEXTHOP) {
588de89d74bSLuiz Otavio O Souza 		if (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL) {
589099dd043SAndre Oppermann 			/*
590ffdbf9daSAndrey V. Elsukov 			 * Directly ship the packet on.  This allows
591ffdbf9daSAndrey V. Elsukov 			 * forwarding packets originally destined to us
592ffdbf9daSAndrey V. Elsukov 			 * to some other directly connected host.
593099dd043SAndre Oppermann 			 */
594ffdbf9daSAndrey V. Elsukov 			ip_forward(m, 1);
595099dd043SAndre Oppermann 			return;
596099dd043SAndre Oppermann 		}
597ffdbf9daSAndrey V. Elsukov 	}
598c21fd232SAndre Oppermann passin:
59921d172a3SGleb Smirnoff 
60021d172a3SGleb Smirnoff 	/*
601df8bae1dSRodney W. Grimes 	 * Process options and, if not destined for us,
602df8bae1dSRodney W. Grimes 	 * ship it on.  ip_dooptions returns 1 when an
603df8bae1dSRodney W. Grimes 	 * error was detected (causing an icmp message
604df8bae1dSRodney W. Grimes 	 * to be sent and the original packet to be freed).
605df8bae1dSRodney W. Grimes 	 */
6069b932e9eSAndre Oppermann 	if (hlen > sizeof (struct ip) && ip_dooptions(m, 0))
607c67b1d17SGarrett Wollman 		return;
608df8bae1dSRodney W. Grimes 
609f0068c4aSGarrett Wollman         /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no
610f0068c4aSGarrett Wollman          * matter if it is destined to another node, or whether it is
611f0068c4aSGarrett Wollman          * a multicast one, RSVP wants it! and prevents it from being forwarded
612f0068c4aSGarrett Wollman          * anywhere else. Also checks if the rsvp daemon is running before
613f0068c4aSGarrett Wollman 	 * grabbing the packet.
614f0068c4aSGarrett Wollman          */
615603724d3SBjoern A. Zeeb 	if (V_rsvp_on && ip->ip_p==IPPROTO_RSVP)
616f0068c4aSGarrett Wollman 		goto ours;
617f0068c4aSGarrett Wollman 
618df8bae1dSRodney W. Grimes 	/*
619df8bae1dSRodney W. Grimes 	 * Check our list of addresses, to see if the packet is for us.
620cc766e04SGarrett Wollman 	 * If we don't have any addresses, assume any unicast packet
621cc766e04SGarrett Wollman 	 * we receive might be for us (and let the upper layers deal
622cc766e04SGarrett Wollman 	 * with it).
623df8bae1dSRodney W. Grimes 	 */
624603724d3SBjoern A. Zeeb 	if (TAILQ_EMPTY(&V_in_ifaddrhead) &&
625cc766e04SGarrett Wollman 	    (m->m_flags & (M_MCAST|M_BCAST)) == 0)
626cc766e04SGarrett Wollman 		goto ours;
627cc766e04SGarrett Wollman 
6287538a9a0SJonathan Lemon 	/*
629823db0e9SDon Lewis 	 * Enable a consistency check between the destination address
630823db0e9SDon Lewis 	 * and the arrival interface for a unicast packet (the RFC 1122
631823db0e9SDon Lewis 	 * strong ES model) if IP forwarding is disabled and the packet
632e15ae1b2SDon Lewis 	 * is not locally generated and the packet is not subject to
633e15ae1b2SDon Lewis 	 * 'ipfw fwd'.
6343f67c834SDon Lewis 	 *
6353f67c834SDon Lewis 	 * XXX - Checking also should be disabled if the destination
6363f67c834SDon Lewis 	 * address is ipnat'ed to a different interface.
6373f67c834SDon Lewis 	 *
638a8f12100SDon Lewis 	 * XXX - Checking is incompatible with IP aliases added
6393f67c834SDon Lewis 	 * to the loopback interface instead of the interface where
6403f67c834SDon Lewis 	 * the packets are received.
641a9771948SGleb Smirnoff 	 *
642a9771948SGleb Smirnoff 	 * XXX - This is the case for carp vhost IPs as well so we
643a9771948SGleb Smirnoff 	 * insert a workaround. If the packet got here, we already
644a9771948SGleb Smirnoff 	 * checked with carp_iamatch() and carp_forus().
645823db0e9SDon Lewis 	 */
646603724d3SBjoern A. Zeeb 	checkif = V_ip_checkinterface && (V_ipforwarding == 0) &&
6470aade26eSRobert Watson 	    ifp != NULL && ((ifp->if_flags & IFF_LOOPBACK) == 0) &&
64854bfbd51SWill Andrews 	    ifp->if_carp == NULL && (dchg == 0);
649823db0e9SDon Lewis 
650ca925d9cSJonathan Lemon 	/*
651ca925d9cSJonathan Lemon 	 * Check for exact addresses in the hash bucket.
652ca925d9cSJonathan Lemon 	 */
6532d9cfabaSRobert Watson 	/* IN_IFADDR_RLOCK(); */
6549b932e9eSAndre Oppermann 	LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) {
655f9e354dfSJulian Elischer 		/*
656823db0e9SDon Lewis 		 * If the address matches, verify that the packet
657823db0e9SDon Lewis 		 * arrived via the correct interface if checking is
658823db0e9SDon Lewis 		 * enabled.
659f9e354dfSJulian Elischer 		 */
6609b932e9eSAndre Oppermann 		if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr &&
6618c0fec80SRobert Watson 		    (!checkif || ia->ia_ifp == ifp)) {
6627caf4ab7SGleb Smirnoff 			counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
6637caf4ab7SGleb Smirnoff 			counter_u64_add(ia->ia_ifa.ifa_ibytes,
6647caf4ab7SGleb Smirnoff 			    m->m_pkthdr.len);
6652d9cfabaSRobert Watson 			/* IN_IFADDR_RUNLOCK(); */
666ed1ff184SJulian Elischer 			goto ours;
667ca925d9cSJonathan Lemon 		}
6688c0fec80SRobert Watson 	}
6692d9cfabaSRobert Watson 	/* IN_IFADDR_RUNLOCK(); */
6702d9cfabaSRobert Watson 
671823db0e9SDon Lewis 	/*
672ca925d9cSJonathan Lemon 	 * Check for broadcast addresses.
673ca925d9cSJonathan Lemon 	 *
674ca925d9cSJonathan Lemon 	 * Only accept broadcast packets that arrive via the matching
675ca925d9cSJonathan Lemon 	 * interface.  Reception of forwarded directed broadcasts would
676ca925d9cSJonathan Lemon 	 * be handled via ip_forward() and ether_output() with the loopback
677ca925d9cSJonathan Lemon 	 * into the stack for SIMPLEX interfaces handled by ether_output().
678823db0e9SDon Lewis 	 */
6790aade26eSRobert Watson 	if (ifp != NULL && ifp->if_flags & IFF_BROADCAST) {
680137f91e8SJohn Baldwin 		IF_ADDR_RLOCK(ifp);
6810aade26eSRobert Watson 	        TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
682ca925d9cSJonathan Lemon 			if (ifa->ifa_addr->sa_family != AF_INET)
683ca925d9cSJonathan Lemon 				continue;
684ca925d9cSJonathan Lemon 			ia = ifatoia(ifa);
685df8bae1dSRodney W. Grimes 			if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
6860aade26eSRobert Watson 			    ip->ip_dst.s_addr) {
6877caf4ab7SGleb Smirnoff 				counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
6887caf4ab7SGleb Smirnoff 				counter_u64_add(ia->ia_ifa.ifa_ibytes,
6897caf4ab7SGleb Smirnoff 				    m->m_pkthdr.len);
690137f91e8SJohn Baldwin 				IF_ADDR_RUNLOCK(ifp);
691df8bae1dSRodney W. Grimes 				goto ours;
6920aade26eSRobert Watson 			}
6930ac40133SBrian Somers #ifdef BOOTP_COMPAT
6940aade26eSRobert Watson 			if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY) {
6957caf4ab7SGleb Smirnoff 				counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
6967caf4ab7SGleb Smirnoff 				counter_u64_add(ia->ia_ifa.ifa_ibytes,
6977caf4ab7SGleb Smirnoff 				    m->m_pkthdr.len);
698137f91e8SJohn Baldwin 				IF_ADDR_RUNLOCK(ifp);
699ca925d9cSJonathan Lemon 				goto ours;
7000aade26eSRobert Watson 			}
7010ac40133SBrian Somers #endif
702df8bae1dSRodney W. Grimes 		}
703137f91e8SJohn Baldwin 		IF_ADDR_RUNLOCK(ifp);
70419e5b0a7SRobert Watson 		ia = NULL;
705df8bae1dSRodney W. Grimes 	}
706f8429ca2SBruce M Simpson 	/* RFC 3927 2.7: Do not forward datagrams for 169.254.0.0/16. */
707f8429ca2SBruce M Simpson 	if (IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))) {
70886425c62SRobert Watson 		IPSTAT_INC(ips_cantforward);
709f8429ca2SBruce M Simpson 		m_freem(m);
710f8429ca2SBruce M Simpson 		return;
711f8429ca2SBruce M Simpson 	}
712df8bae1dSRodney W. Grimes 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
713603724d3SBjoern A. Zeeb 		if (V_ip_mrouter) {
714df8bae1dSRodney W. Grimes 			/*
715df8bae1dSRodney W. Grimes 			 * If we are acting as a multicast router, all
716df8bae1dSRodney W. Grimes 			 * incoming multicast packets are passed to the
717df8bae1dSRodney W. Grimes 			 * kernel-level multicast forwarding function.
718df8bae1dSRodney W. Grimes 			 * The packet is returned (relatively) intact; if
719df8bae1dSRodney W. Grimes 			 * ip_mforward() returns a non-zero value, the packet
720df8bae1dSRodney W. Grimes 			 * must be discarded, else it may be accepted below.
721df8bae1dSRodney W. Grimes 			 */
7220aade26eSRobert Watson 			if (ip_mforward && ip_mforward(ip, ifp, m, 0) != 0) {
72386425c62SRobert Watson 				IPSTAT_INC(ips_cantforward);
724df8bae1dSRodney W. Grimes 				m_freem(m);
725c67b1d17SGarrett Wollman 				return;
726df8bae1dSRodney W. Grimes 			}
727df8bae1dSRodney W. Grimes 
728df8bae1dSRodney W. Grimes 			/*
72911612afaSDima Dorfman 			 * The process-level routing daemon needs to receive
730df8bae1dSRodney W. Grimes 			 * all multicast IGMP packets, whether or not this
731df8bae1dSRodney W. Grimes 			 * host belongs to their destination groups.
732df8bae1dSRodney W. Grimes 			 */
733df8bae1dSRodney W. Grimes 			if (ip->ip_p == IPPROTO_IGMP)
734df8bae1dSRodney W. Grimes 				goto ours;
73586425c62SRobert Watson 			IPSTAT_INC(ips_forward);
736df8bae1dSRodney W. Grimes 		}
737df8bae1dSRodney W. Grimes 		/*
738d10910e6SBruce M Simpson 		 * Assume the packet is for us, to avoid prematurely taking
739d10910e6SBruce M Simpson 		 * a lock on the in_multi hash. Protocols must perform
740d10910e6SBruce M Simpson 		 * their own filtering and update statistics accordingly.
741df8bae1dSRodney W. Grimes 		 */
742df8bae1dSRodney W. Grimes 		goto ours;
743df8bae1dSRodney W. Grimes 	}
744df8bae1dSRodney W. Grimes 	if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST)
745df8bae1dSRodney W. Grimes 		goto ours;
746df8bae1dSRodney W. Grimes 	if (ip->ip_dst.s_addr == INADDR_ANY)
747df8bae1dSRodney W. Grimes 		goto ours;
748df8bae1dSRodney W. Grimes 
7496a800098SYoshinobu Inoue 	/*
750df8bae1dSRodney W. Grimes 	 * Not for us; forward if possible and desirable.
751df8bae1dSRodney W. Grimes 	 */
752603724d3SBjoern A. Zeeb 	if (V_ipforwarding == 0) {
75386425c62SRobert Watson 		IPSTAT_INC(ips_cantforward);
754df8bae1dSRodney W. Grimes 		m_freem(m);
755546f251bSChris D. Faulhaber 	} else {
7569b932e9eSAndre Oppermann 		ip_forward(m, dchg);
757546f251bSChris D. Faulhaber 	}
758c67b1d17SGarrett Wollman 	return;
759df8bae1dSRodney W. Grimes 
760df8bae1dSRodney W. Grimes ours:
761d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH
762d0ebc0d2SYaroslav Tykhiy 	/*
763d0ebc0d2SYaroslav Tykhiy 	 * IPSTEALTH: Process non-routing options only
764d0ebc0d2SYaroslav Tykhiy 	 * if the packet is destined for us.
765d0ebc0d2SYaroslav Tykhiy 	 */
7667caf4ab7SGleb Smirnoff 	if (V_ipstealth && hlen > sizeof (struct ip) && ip_dooptions(m, 1))
767d0ebc0d2SYaroslav Tykhiy 		return;
768d0ebc0d2SYaroslav Tykhiy #endif /* IPSTEALTH */
769d0ebc0d2SYaroslav Tykhiy 
77063f8d699SJordan K. Hubbard 	/*
771b6ea1aa5SRuslan Ermilov 	 * Attempt reassembly; if it succeeds, proceed.
772ac9d7e26SMax Laier 	 * ip_reass() will return a different mbuf.
773df8bae1dSRodney W. Grimes 	 */
7748f134647SGleb Smirnoff 	if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) {
775aa69c612SGleb Smirnoff 		/* XXXGL: shouldn't we save & set m_flags? */
776f0cada84SAndre Oppermann 		m = ip_reass(m);
777f0cada84SAndre Oppermann 		if (m == NULL)
778c67b1d17SGarrett Wollman 			return;
7796a800098SYoshinobu Inoue 		ip = mtod(m, struct ip *);
7807e2df452SRuslan Ermilov 		/* Get the header length of the reassembled packet */
78153be11f6SPoul-Henning Kamp 		hlen = ip->ip_hl << 2;
782f0cada84SAndre Oppermann 	}
783f0cada84SAndre Oppermann 
784b2630c29SGeorge V. Neville-Neil #ifdef IPSEC
78533841545SHajimu UMEMOTO 	/*
78633841545SHajimu UMEMOTO 	 * enforce IPsec policy checking if we are seeing last header.
78733841545SHajimu UMEMOTO 	 * note that we do not visit this with protocols with pcb layer
78833841545SHajimu UMEMOTO 	 * code - like udp/tcp/raw ip.
78933841545SHajimu UMEMOTO 	 */
790e58320f1SAndrey V. Elsukov 	if (ip_ipsec_input(m, ip->ip_p) != 0)
79133841545SHajimu UMEMOTO 		goto bad;
792b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */
79333841545SHajimu UMEMOTO 
794df8bae1dSRodney W. Grimes 	/*
795df8bae1dSRodney W. Grimes 	 * Switch out to protocol's input routine.
796df8bae1dSRodney W. Grimes 	 */
79786425c62SRobert Watson 	IPSTAT_INC(ips_delivered);
7989b932e9eSAndre Oppermann 
7998f5a8818SKevin Lo 	(*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p);
800c67b1d17SGarrett Wollman 	return;
801df8bae1dSRodney W. Grimes bad:
802df8bae1dSRodney W. Grimes 	m_freem(m);
803c67b1d17SGarrett Wollman }
804c67b1d17SGarrett Wollman 
805c67b1d17SGarrett Wollman /*
806df8bae1dSRodney W. Grimes  * IP timer processing;
807df8bae1dSRodney W. Grimes  * if a timer expires on a reassembly
808df8bae1dSRodney W. Grimes  * queue, discard it.
809df8bae1dSRodney W. Grimes  */
810df8bae1dSRodney W. Grimes void
811f2565d68SRobert Watson ip_slowtimo(void)
812df8bae1dSRodney W. Grimes {
8138b615593SMarko Zec 	VNET_ITERATOR_DECL(vnet_iter);
814df8bae1dSRodney W. Grimes 
8155ee847d3SRobert Watson 	VNET_LIST_RLOCK_NOSLEEP();
8168b615593SMarko Zec 	VNET_FOREACH(vnet_iter) {
8178b615593SMarko Zec 		CURVNET_SET(vnet_iter);
8181dbefcc0SGleb Smirnoff 		ipreass_slowtimo();
8198b615593SMarko Zec 		CURVNET_RESTORE();
8208b615593SMarko Zec 	}
8215ee847d3SRobert Watson 	VNET_LIST_RUNLOCK_NOSLEEP();
822df8bae1dSRodney W. Grimes }
823df8bae1dSRodney W. Grimes 
8249802380eSBjoern A. Zeeb void
8259802380eSBjoern A. Zeeb ip_drain(void)
8269802380eSBjoern A. Zeeb {
8279802380eSBjoern A. Zeeb 	VNET_ITERATOR_DECL(vnet_iter);
8289802380eSBjoern A. Zeeb 
8299802380eSBjoern A. Zeeb 	VNET_LIST_RLOCK_NOSLEEP();
8309802380eSBjoern A. Zeeb 	VNET_FOREACH(vnet_iter) {
8319802380eSBjoern A. Zeeb 		CURVNET_SET(vnet_iter);
8321dbefcc0SGleb Smirnoff 		ipreass_drain();
8338b615593SMarko Zec 		CURVNET_RESTORE();
8348b615593SMarko Zec 	}
8355ee847d3SRobert Watson 	VNET_LIST_RUNLOCK_NOSLEEP();
836df8bae1dSRodney W. Grimes }
837df8bae1dSRodney W. Grimes 
838df8bae1dSRodney W. Grimes /*
839de38924dSAndre Oppermann  * The protocol to be inserted into ip_protox[] must be already registered
840de38924dSAndre Oppermann  * in inetsw[], either statically or through pf_proto_register().
841de38924dSAndre Oppermann  */
842de38924dSAndre Oppermann int
8431b48d245SBjoern A. Zeeb ipproto_register(short ipproto)
844de38924dSAndre Oppermann {
845de38924dSAndre Oppermann 	struct protosw *pr;
846de38924dSAndre Oppermann 
847de38924dSAndre Oppermann 	/* Sanity checks. */
8481b48d245SBjoern A. Zeeb 	if (ipproto <= 0 || ipproto >= IPPROTO_MAX)
849de38924dSAndre Oppermann 		return (EPROTONOSUPPORT);
850de38924dSAndre Oppermann 
851de38924dSAndre Oppermann 	/*
852de38924dSAndre Oppermann 	 * The protocol slot must not be occupied by another protocol
853de38924dSAndre Oppermann 	 * already.  An index pointing to IPPROTO_RAW is unused.
854de38924dSAndre Oppermann 	 */
855de38924dSAndre Oppermann 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
856de38924dSAndre Oppermann 	if (pr == NULL)
857de38924dSAndre Oppermann 		return (EPFNOSUPPORT);
858de38924dSAndre Oppermann 	if (ip_protox[ipproto] != pr - inetsw)	/* IPPROTO_RAW */
859de38924dSAndre Oppermann 		return (EEXIST);
860de38924dSAndre Oppermann 
861de38924dSAndre Oppermann 	/* Find the protocol position in inetsw[] and set the index. */
862de38924dSAndre Oppermann 	for (pr = inetdomain.dom_protosw;
863de38924dSAndre Oppermann 	     pr < inetdomain.dom_protoswNPROTOSW; pr++) {
864de38924dSAndre Oppermann 		if (pr->pr_domain->dom_family == PF_INET &&
865de38924dSAndre Oppermann 		    pr->pr_protocol && pr->pr_protocol == ipproto) {
866de38924dSAndre Oppermann 			ip_protox[pr->pr_protocol] = pr - inetsw;
867de38924dSAndre Oppermann 			return (0);
868de38924dSAndre Oppermann 		}
869de38924dSAndre Oppermann 	}
870de38924dSAndre Oppermann 	return (EPROTONOSUPPORT);
871de38924dSAndre Oppermann }
872de38924dSAndre Oppermann 
873de38924dSAndre Oppermann int
8741b48d245SBjoern A. Zeeb ipproto_unregister(short ipproto)
875de38924dSAndre Oppermann {
876de38924dSAndre Oppermann 	struct protosw *pr;
877de38924dSAndre Oppermann 
878de38924dSAndre Oppermann 	/* Sanity checks. */
8791b48d245SBjoern A. Zeeb 	if (ipproto <= 0 || ipproto >= IPPROTO_MAX)
880de38924dSAndre Oppermann 		return (EPROTONOSUPPORT);
881de38924dSAndre Oppermann 
882de38924dSAndre Oppermann 	/* Check if the protocol was indeed registered. */
883de38924dSAndre Oppermann 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
884de38924dSAndre Oppermann 	if (pr == NULL)
885de38924dSAndre Oppermann 		return (EPFNOSUPPORT);
886de38924dSAndre Oppermann 	if (ip_protox[ipproto] == pr - inetsw)  /* IPPROTO_RAW */
887de38924dSAndre Oppermann 		return (ENOENT);
888de38924dSAndre Oppermann 
889de38924dSAndre Oppermann 	/* Reset the protocol slot to IPPROTO_RAW. */
890de38924dSAndre Oppermann 	ip_protox[ipproto] = pr - inetsw;
891de38924dSAndre Oppermann 	return (0);
892de38924dSAndre Oppermann }
893de38924dSAndre Oppermann 
894df8bae1dSRodney W. Grimes u_char inetctlerrmap[PRC_NCMDS] = {
895df8bae1dSRodney W. Grimes 	0,		0,		0,		0,
896df8bae1dSRodney W. Grimes 	0,		EMSGSIZE,	EHOSTDOWN,	EHOSTUNREACH,
897df8bae1dSRodney W. Grimes 	EHOSTUNREACH,	EHOSTUNREACH,	ECONNREFUSED,	ECONNREFUSED,
898df8bae1dSRodney W. Grimes 	EMSGSIZE,	EHOSTUNREACH,	0,		0,
899fcaf9f91SMike Silbersack 	0,		0,		EHOSTUNREACH,	0,
9003b8123b7SJesper Skriver 	ENOPROTOOPT,	ECONNREFUSED
901df8bae1dSRodney W. Grimes };
902df8bae1dSRodney W. Grimes 
903df8bae1dSRodney W. Grimes /*
904df8bae1dSRodney W. Grimes  * Forward a packet.  If some error occurs return the sender
905df8bae1dSRodney W. Grimes  * an icmp packet.  Note we can't always generate a meaningful
906df8bae1dSRodney W. Grimes  * icmp message because icmp doesn't have a large enough repertoire
907df8bae1dSRodney W. Grimes  * of codes and types.
908df8bae1dSRodney W. Grimes  *
909df8bae1dSRodney W. Grimes  * If not forwarding, just drop the packet.  This could be confusing
910df8bae1dSRodney W. Grimes  * if ipforwarding was zero but some routing protocol was advancing
911df8bae1dSRodney W. Grimes  * us as a gateway to somewhere.  However, we must let the routing
912df8bae1dSRodney W. Grimes  * protocol deal with that.
913df8bae1dSRodney W. Grimes  *
914df8bae1dSRodney W. Grimes  * The srcrt parameter indicates whether the packet is being forwarded
915df8bae1dSRodney W. Grimes  * via a source route.
916df8bae1dSRodney W. Grimes  */
9179b932e9eSAndre Oppermann void
9189b932e9eSAndre Oppermann ip_forward(struct mbuf *m, int srcrt)
919df8bae1dSRodney W. Grimes {
9202b25acc1SLuigi Rizzo 	struct ip *ip = mtod(m, struct ip *);
921efbad259SEdward Tomasz Napierala 	struct in_ifaddr *ia;
922df8bae1dSRodney W. Grimes 	struct mbuf *mcopy;
923d14122b0SErmal Luçi 	struct sockaddr_in *sin;
9249b932e9eSAndre Oppermann 	struct in_addr dest;
925b835b6feSBjoern A. Zeeb 	struct route ro;
926c773494eSAndre Oppermann 	int error, type = 0, code = 0, mtu = 0;
9273efc3014SJulian Elischer 
9289b932e9eSAndre Oppermann 	if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) {
92986425c62SRobert Watson 		IPSTAT_INC(ips_cantforward);
930df8bae1dSRodney W. Grimes 		m_freem(m);
931df8bae1dSRodney W. Grimes 		return;
932df8bae1dSRodney W. Grimes 	}
9338922ddbeSAndrey V. Elsukov #ifdef IPSEC
9348922ddbeSAndrey V. Elsukov 	if (ip_ipsec_fwd(m) != 0) {
9358922ddbeSAndrey V. Elsukov 		IPSTAT_INC(ips_cantforward);
9368922ddbeSAndrey V. Elsukov 		m_freem(m);
9378922ddbeSAndrey V. Elsukov 		return;
9388922ddbeSAndrey V. Elsukov 	}
9398922ddbeSAndrey V. Elsukov #endif /* IPSEC */
9401b968362SDag-Erling Smørgrav #ifdef IPSTEALTH
941603724d3SBjoern A. Zeeb 	if (!V_ipstealth) {
9421b968362SDag-Erling Smørgrav #endif
943df8bae1dSRodney W. Grimes 		if (ip->ip_ttl <= IPTTLDEC) {
9441b968362SDag-Erling Smørgrav 			icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS,
94502c1c707SAndre Oppermann 			    0, 0);
946df8bae1dSRodney W. Grimes 			return;
947df8bae1dSRodney W. Grimes 		}
9481b968362SDag-Erling Smørgrav #ifdef IPSTEALTH
9491b968362SDag-Erling Smørgrav 	}
9501b968362SDag-Erling Smørgrav #endif
951df8bae1dSRodney W. Grimes 
952d14122b0SErmal Luçi 	bzero(&ro, sizeof(ro));
953d14122b0SErmal Luçi 	sin = (struct sockaddr_in *)&ro.ro_dst;
954d14122b0SErmal Luçi 	sin->sin_family = AF_INET;
955d14122b0SErmal Luçi 	sin->sin_len = sizeof(*sin);
956d14122b0SErmal Luçi 	sin->sin_addr = ip->ip_dst;
957d14122b0SErmal Luçi #ifdef RADIX_MPATH
958d14122b0SErmal Luçi 	rtalloc_mpath_fib(&ro,
959d14122b0SErmal Luçi 	    ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr),
960d14122b0SErmal Luçi 	    M_GETFIB(m));
961d14122b0SErmal Luçi #else
962d14122b0SErmal Luçi 	in_rtalloc_ign(&ro, 0, M_GETFIB(m));
963d14122b0SErmal Luçi #endif
964d14122b0SErmal Luçi 	if (ro.ro_rt != NULL) {
965d14122b0SErmal Luçi 		ia = ifatoia(ro.ro_rt->rt_ifa);
966d14122b0SErmal Luçi 		ifa_ref(&ia->ia_ifa);
96756844a62SErmal Luçi 	} else
96856844a62SErmal Luçi 		ia = NULL;
969efbad259SEdward Tomasz Napierala #ifndef IPSEC
970efbad259SEdward Tomasz Napierala 	/*
971efbad259SEdward Tomasz Napierala 	 * 'ia' may be NULL if there is no route for this destination.
972efbad259SEdward Tomasz Napierala 	 * In case of IPsec, Don't discard it just yet, but pass it to
973efbad259SEdward Tomasz Napierala 	 * ip_output in case of outgoing IPsec policy.
974efbad259SEdward Tomasz Napierala 	 */
975d23d475fSGuido van Rooij 	if (!srcrt && ia == NULL) {
97602c1c707SAndre Oppermann 		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
977d14122b0SErmal Luçi 		RO_RTFREE(&ro);
978df8bae1dSRodney W. Grimes 		return;
97902c1c707SAndre Oppermann 	}
980efbad259SEdward Tomasz Napierala #endif
981df8bae1dSRodney W. Grimes 
982df8bae1dSRodney W. Grimes 	/*
983bfef7ed4SIan Dowse 	 * Save the IP header and at most 8 bytes of the payload,
984bfef7ed4SIan Dowse 	 * in case we need to generate an ICMP message to the src.
985bfef7ed4SIan Dowse 	 *
9864d2e3692SLuigi Rizzo 	 * XXX this can be optimized a lot by saving the data in a local
9874d2e3692SLuigi Rizzo 	 * buffer on the stack (72 bytes at most), and only allocating the
9884d2e3692SLuigi Rizzo 	 * mbuf if really necessary. The vast majority of the packets
9894d2e3692SLuigi Rizzo 	 * are forwarded without having to send an ICMP back (either
9904d2e3692SLuigi Rizzo 	 * because unnecessary, or because rate limited), so we are
9914d2e3692SLuigi Rizzo 	 * really we are wasting a lot of work here.
9924d2e3692SLuigi Rizzo 	 *
993bfef7ed4SIan Dowse 	 * We don't use m_copy() because it might return a reference
994bfef7ed4SIan Dowse 	 * to a shared cluster. Both this function and ip_output()
995bfef7ed4SIan Dowse 	 * assume exclusive access to the IP header in `m', so any
996bfef7ed4SIan Dowse 	 * data in a cluster may change before we reach icmp_error().
997df8bae1dSRodney W. Grimes 	 */
998dc4ad05eSGleb Smirnoff 	mcopy = m_gethdr(M_NOWAIT, m->m_type);
999eb1b1807SGleb Smirnoff 	if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_NOWAIT)) {
10009967cafcSSam Leffler 		/*
10019967cafcSSam Leffler 		 * It's probably ok if the pkthdr dup fails (because
10029967cafcSSam Leffler 		 * the deep copy of the tag chain failed), but for now
10039967cafcSSam Leffler 		 * be conservative and just discard the copy since
10049967cafcSSam Leffler 		 * code below may some day want the tags.
10059967cafcSSam Leffler 		 */
10069967cafcSSam Leffler 		m_free(mcopy);
10079967cafcSSam Leffler 		mcopy = NULL;
10089967cafcSSam Leffler 	}
1009bfef7ed4SIan Dowse 	if (mcopy != NULL) {
10108f134647SGleb Smirnoff 		mcopy->m_len = min(ntohs(ip->ip_len), M_TRAILINGSPACE(mcopy));
1011e6b0a570SBruce M Simpson 		mcopy->m_pkthdr.len = mcopy->m_len;
1012bfef7ed4SIan Dowse 		m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t));
1013bfef7ed4SIan Dowse 	}
101404287599SRuslan Ermilov 
101504287599SRuslan Ermilov #ifdef IPSTEALTH
1016603724d3SBjoern A. Zeeb 	if (!V_ipstealth) {
101704287599SRuslan Ermilov #endif
101804287599SRuslan Ermilov 		ip->ip_ttl -= IPTTLDEC;
101904287599SRuslan Ermilov #ifdef IPSTEALTH
102004287599SRuslan Ermilov 	}
102104287599SRuslan Ermilov #endif
1022df8bae1dSRodney W. Grimes 
1023df8bae1dSRodney W. Grimes 	/*
1024df8bae1dSRodney W. Grimes 	 * If forwarding packet using same interface that it came in on,
1025df8bae1dSRodney W. Grimes 	 * perhaps should send a redirect to sender to shortcut a hop.
1026df8bae1dSRodney W. Grimes 	 * Only send redirect if source is sending directly to us,
1027df8bae1dSRodney W. Grimes 	 * and if packet was not source routed (or has any options).
1028df8bae1dSRodney W. Grimes 	 * Also, don't send redirect if forwarding using a default route
1029df8bae1dSRodney W. Grimes 	 * or a route modified by a redirect.
1030df8bae1dSRodney W. Grimes 	 */
10319b932e9eSAndre Oppermann 	dest.s_addr = 0;
1032efbad259SEdward Tomasz Napierala 	if (!srcrt && V_ipsendredirects &&
1033efbad259SEdward Tomasz Napierala 	    ia != NULL && ia->ia_ifp == m->m_pkthdr.rcvif) {
103402c1c707SAndre Oppermann 		struct rtentry *rt;
103502c1c707SAndre Oppermann 
103602c1c707SAndre Oppermann 		rt = ro.ro_rt;
103702c1c707SAndre Oppermann 
103802c1c707SAndre Oppermann 		if (rt && (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
10399b932e9eSAndre Oppermann 		    satosin(rt_key(rt))->sin_addr.s_addr != 0) {
1040df8bae1dSRodney W. Grimes #define	RTA(rt)	((struct in_ifaddr *)(rt->rt_ifa))
1041df8bae1dSRodney W. Grimes 			u_long src = ntohl(ip->ip_src.s_addr);
1042df8bae1dSRodney W. Grimes 
1043df8bae1dSRodney W. Grimes 			if (RTA(rt) &&
1044df8bae1dSRodney W. Grimes 			    (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) {
1045df8bae1dSRodney W. Grimes 				if (rt->rt_flags & RTF_GATEWAY)
10469b932e9eSAndre Oppermann 					dest.s_addr = satosin(rt->rt_gateway)->sin_addr.s_addr;
1047df8bae1dSRodney W. Grimes 				else
10489b932e9eSAndre Oppermann 					dest.s_addr = ip->ip_dst.s_addr;
1049df8bae1dSRodney W. Grimes 				/* Router requirements says to only send host redirects */
1050df8bae1dSRodney W. Grimes 				type = ICMP_REDIRECT;
1051df8bae1dSRodney W. Grimes 				code = ICMP_REDIRECT_HOST;
1052df8bae1dSRodney W. Grimes 			}
1053df8bae1dSRodney W. Grimes 		}
105402c1c707SAndre Oppermann 	}
1055df8bae1dSRodney W. Grimes 
1056b835b6feSBjoern A. Zeeb 	error = ip_output(m, NULL, &ro, IP_FORWARDING, NULL, NULL);
1057b835b6feSBjoern A. Zeeb 
1058b835b6feSBjoern A. Zeeb 	if (error == EMSGSIZE && ro.ro_rt)
1059e3a7aa6fSGleb Smirnoff 		mtu = ro.ro_rt->rt_mtu;
1060bf984051SGleb Smirnoff 	RO_RTFREE(&ro);
1061b835b6feSBjoern A. Zeeb 
1062df8bae1dSRodney W. Grimes 	if (error)
106386425c62SRobert Watson 		IPSTAT_INC(ips_cantforward);
1064df8bae1dSRodney W. Grimes 	else {
106586425c62SRobert Watson 		IPSTAT_INC(ips_forward);
1066df8bae1dSRodney W. Grimes 		if (type)
106786425c62SRobert Watson 			IPSTAT_INC(ips_redirectsent);
1068df8bae1dSRodney W. Grimes 		else {
10699188b4a1SAndre Oppermann 			if (mcopy)
1070df8bae1dSRodney W. Grimes 				m_freem(mcopy);
10718c0fec80SRobert Watson 			if (ia != NULL)
10728c0fec80SRobert Watson 				ifa_free(&ia->ia_ifa);
1073df8bae1dSRodney W. Grimes 			return;
1074df8bae1dSRodney W. Grimes 		}
1075df8bae1dSRodney W. Grimes 	}
10768c0fec80SRobert Watson 	if (mcopy == NULL) {
10778c0fec80SRobert Watson 		if (ia != NULL)
10788c0fec80SRobert Watson 			ifa_free(&ia->ia_ifa);
1079df8bae1dSRodney W. Grimes 		return;
10808c0fec80SRobert Watson 	}
1081df8bae1dSRodney W. Grimes 
1082df8bae1dSRodney W. Grimes 	switch (error) {
1083df8bae1dSRodney W. Grimes 
1084df8bae1dSRodney W. Grimes 	case 0:				/* forwarded, but need redirect */
1085df8bae1dSRodney W. Grimes 		/* type, code set above */
1086df8bae1dSRodney W. Grimes 		break;
1087df8bae1dSRodney W. Grimes 
1088efbad259SEdward Tomasz Napierala 	case ENETUNREACH:
1089df8bae1dSRodney W. Grimes 	case EHOSTUNREACH:
1090df8bae1dSRodney W. Grimes 	case ENETDOWN:
1091df8bae1dSRodney W. Grimes 	case EHOSTDOWN:
1092df8bae1dSRodney W. Grimes 	default:
1093df8bae1dSRodney W. Grimes 		type = ICMP_UNREACH;
1094df8bae1dSRodney W. Grimes 		code = ICMP_UNREACH_HOST;
1095df8bae1dSRodney W. Grimes 		break;
1096df8bae1dSRodney W. Grimes 
1097df8bae1dSRodney W. Grimes 	case EMSGSIZE:
1098df8bae1dSRodney W. Grimes 		type = ICMP_UNREACH;
1099df8bae1dSRodney W. Grimes 		code = ICMP_UNREACH_NEEDFRAG;
11001dfcf0d2SAndre Oppermann 
1101b2630c29SGeorge V. Neville-Neil #ifdef IPSEC
1102b835b6feSBjoern A. Zeeb 		/*
1103b835b6feSBjoern A. Zeeb 		 * If IPsec is configured for this path,
1104b835b6feSBjoern A. Zeeb 		 * override any possibly mtu value set by ip_output.
1105b835b6feSBjoern A. Zeeb 		 */
11061c044382SBjoern A. Zeeb 		mtu = ip_ipsec_mtu(mcopy, mtu);
1107b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */
11089b932e9eSAndre Oppermann 		/*
1109b835b6feSBjoern A. Zeeb 		 * If the MTU was set before make sure we are below the
1110b835b6feSBjoern A. Zeeb 		 * interface MTU.
1111ab48768bSAndre Oppermann 		 * If the MTU wasn't set before use the interface mtu or
1112ab48768bSAndre Oppermann 		 * fall back to the next smaller mtu step compared to the
1113ab48768bSAndre Oppermann 		 * current packet size.
11149b932e9eSAndre Oppermann 		 */
1115b835b6feSBjoern A. Zeeb 		if (mtu != 0) {
1116b835b6feSBjoern A. Zeeb 			if (ia != NULL)
1117b835b6feSBjoern A. Zeeb 				mtu = min(mtu, ia->ia_ifp->if_mtu);
1118b835b6feSBjoern A. Zeeb 		} else {
1119ab48768bSAndre Oppermann 			if (ia != NULL)
1120c773494eSAndre Oppermann 				mtu = ia->ia_ifp->if_mtu;
1121ab48768bSAndre Oppermann 			else
11228f134647SGleb Smirnoff 				mtu = ip_next_mtu(ntohs(ip->ip_len), 0);
1123ab48768bSAndre Oppermann 		}
112486425c62SRobert Watson 		IPSTAT_INC(ips_cantfrag);
1125df8bae1dSRodney W. Grimes 		break;
1126df8bae1dSRodney W. Grimes 
1127df8bae1dSRodney W. Grimes 	case ENOBUFS:
11283a06e3e0SRuslan Ermilov 	case EACCES:			/* ipfw denied packet */
11293a06e3e0SRuslan Ermilov 		m_freem(mcopy);
11308c0fec80SRobert Watson 		if (ia != NULL)
11318c0fec80SRobert Watson 			ifa_free(&ia->ia_ifa);
11323a06e3e0SRuslan Ermilov 		return;
1133df8bae1dSRodney W. Grimes 	}
11348c0fec80SRobert Watson 	if (ia != NULL)
11358c0fec80SRobert Watson 		ifa_free(&ia->ia_ifa);
1136c773494eSAndre Oppermann 	icmp_error(mcopy, type, code, dest.s_addr, mtu);
1137df8bae1dSRodney W. Grimes }
1138df8bae1dSRodney W. Grimes 
113982c23ebaSBill Fenner void
1140f2565d68SRobert Watson ip_savecontrol(struct inpcb *inp, struct mbuf **mp, struct ip *ip,
1141f2565d68SRobert Watson     struct mbuf *m)
114282c23ebaSBill Fenner {
11438b615593SMarko Zec 
1144be8a62e8SPoul-Henning Kamp 	if (inp->inp_socket->so_options & (SO_BINTIME | SO_TIMESTAMP)) {
1145be8a62e8SPoul-Henning Kamp 		struct bintime bt;
1146be8a62e8SPoul-Henning Kamp 
1147be8a62e8SPoul-Henning Kamp 		bintime(&bt);
1148be8a62e8SPoul-Henning Kamp 		if (inp->inp_socket->so_options & SO_BINTIME) {
1149be8a62e8SPoul-Henning Kamp 			*mp = sbcreatecontrol((caddr_t)&bt, sizeof(bt),
1150be8a62e8SPoul-Henning Kamp 			    SCM_BINTIME, SOL_SOCKET);
1151be8a62e8SPoul-Henning Kamp 			if (*mp)
1152be8a62e8SPoul-Henning Kamp 				mp = &(*mp)->m_next;
1153be8a62e8SPoul-Henning Kamp 		}
115482c23ebaSBill Fenner 		if (inp->inp_socket->so_options & SO_TIMESTAMP) {
115582c23ebaSBill Fenner 			struct timeval tv;
115682c23ebaSBill Fenner 
1157be8a62e8SPoul-Henning Kamp 			bintime2timeval(&bt, &tv);
115882c23ebaSBill Fenner 			*mp = sbcreatecontrol((caddr_t)&tv, sizeof(tv),
115982c23ebaSBill Fenner 			    SCM_TIMESTAMP, SOL_SOCKET);
116082c23ebaSBill Fenner 			if (*mp)
116182c23ebaSBill Fenner 				mp = &(*mp)->m_next;
11624cc20ab1SSeigo Tanimura 		}
1163be8a62e8SPoul-Henning Kamp 	}
116482c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVDSTADDR) {
116582c23ebaSBill Fenner 		*mp = sbcreatecontrol((caddr_t)&ip->ip_dst,
116682c23ebaSBill Fenner 		    sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
116782c23ebaSBill Fenner 		if (*mp)
116882c23ebaSBill Fenner 			mp = &(*mp)->m_next;
116982c23ebaSBill Fenner 	}
11704957466bSMatthew N. Dodd 	if (inp->inp_flags & INP_RECVTTL) {
11714957466bSMatthew N. Dodd 		*mp = sbcreatecontrol((caddr_t)&ip->ip_ttl,
11724957466bSMatthew N. Dodd 		    sizeof(u_char), IP_RECVTTL, IPPROTO_IP);
11734957466bSMatthew N. Dodd 		if (*mp)
11744957466bSMatthew N. Dodd 			mp = &(*mp)->m_next;
11754957466bSMatthew N. Dodd 	}
117682c23ebaSBill Fenner #ifdef notyet
117782c23ebaSBill Fenner 	/* XXX
117882c23ebaSBill Fenner 	 * Moving these out of udp_input() made them even more broken
117982c23ebaSBill Fenner 	 * than they already were.
118082c23ebaSBill Fenner 	 */
118182c23ebaSBill Fenner 	/* options were tossed already */
118282c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVOPTS) {
118382c23ebaSBill Fenner 		*mp = sbcreatecontrol((caddr_t)opts_deleted_above,
118482c23ebaSBill Fenner 		    sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP);
118582c23ebaSBill Fenner 		if (*mp)
118682c23ebaSBill Fenner 			mp = &(*mp)->m_next;
118782c23ebaSBill Fenner 	}
118882c23ebaSBill Fenner 	/* ip_srcroute doesn't do what we want here, need to fix */
118982c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVRETOPTS) {
1190e0982661SAndre Oppermann 		*mp = sbcreatecontrol((caddr_t)ip_srcroute(m),
119182c23ebaSBill Fenner 		    sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP);
119282c23ebaSBill Fenner 		if (*mp)
119382c23ebaSBill Fenner 			mp = &(*mp)->m_next;
119482c23ebaSBill Fenner 	}
119582c23ebaSBill Fenner #endif
119682c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVIF) {
1197d314ad7bSJulian Elischer 		struct ifnet *ifp;
1198d314ad7bSJulian Elischer 		struct sdlbuf {
119982c23ebaSBill Fenner 			struct sockaddr_dl sdl;
1200d314ad7bSJulian Elischer 			u_char	pad[32];
1201d314ad7bSJulian Elischer 		} sdlbuf;
1202d314ad7bSJulian Elischer 		struct sockaddr_dl *sdp;
1203d314ad7bSJulian Elischer 		struct sockaddr_dl *sdl2 = &sdlbuf.sdl;
120482c23ebaSBill Fenner 
120546f2df9cSSergey Kandaurov 		if ((ifp = m->m_pkthdr.rcvif) &&
120646f2df9cSSergey Kandaurov 		    ifp->if_index && ifp->if_index <= V_if_index) {
12074a0d6638SRuslan Ermilov 			sdp = (struct sockaddr_dl *)ifp->if_addr->ifa_addr;
1208d314ad7bSJulian Elischer 			/*
1209d314ad7bSJulian Elischer 			 * Change our mind and don't try copy.
1210d314ad7bSJulian Elischer 			 */
121146f2df9cSSergey Kandaurov 			if (sdp->sdl_family != AF_LINK ||
121246f2df9cSSergey Kandaurov 			    sdp->sdl_len > sizeof(sdlbuf)) {
1213d314ad7bSJulian Elischer 				goto makedummy;
1214d314ad7bSJulian Elischer 			}
1215d314ad7bSJulian Elischer 			bcopy(sdp, sdl2, sdp->sdl_len);
1216d314ad7bSJulian Elischer 		} else {
1217d314ad7bSJulian Elischer makedummy:
121846f2df9cSSergey Kandaurov 			sdl2->sdl_len =
121946f2df9cSSergey Kandaurov 			    offsetof(struct sockaddr_dl, sdl_data[0]);
1220d314ad7bSJulian Elischer 			sdl2->sdl_family = AF_LINK;
1221d314ad7bSJulian Elischer 			sdl2->sdl_index = 0;
1222d314ad7bSJulian Elischer 			sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0;
1223d314ad7bSJulian Elischer 		}
1224d314ad7bSJulian Elischer 		*mp = sbcreatecontrol((caddr_t)sdl2, sdl2->sdl_len,
122582c23ebaSBill Fenner 		    IP_RECVIF, IPPROTO_IP);
122682c23ebaSBill Fenner 		if (*mp)
122782c23ebaSBill Fenner 			mp = &(*mp)->m_next;
122882c23ebaSBill Fenner 	}
12293cca425bSMichael Tuexen 	if (inp->inp_flags & INP_RECVTOS) {
12303cca425bSMichael Tuexen 		*mp = sbcreatecontrol((caddr_t)&ip->ip_tos,
12313cca425bSMichael Tuexen 		    sizeof(u_char), IP_RECVTOS, IPPROTO_IP);
12323cca425bSMichael Tuexen 		if (*mp)
12333cca425bSMichael Tuexen 			mp = &(*mp)->m_next;
12343cca425bSMichael Tuexen 	}
12359d3ddf43SAdrian Chadd 
12369d3ddf43SAdrian Chadd 	if (inp->inp_flags2 & INP_RECVFLOWID) {
12379d3ddf43SAdrian Chadd 		uint32_t flowid, flow_type;
12389d3ddf43SAdrian Chadd 
12399d3ddf43SAdrian Chadd 		flowid = m->m_pkthdr.flowid;
12409d3ddf43SAdrian Chadd 		flow_type = M_HASHTYPE_GET(m);
12419d3ddf43SAdrian Chadd 
12429d3ddf43SAdrian Chadd 		/*
12439d3ddf43SAdrian Chadd 		 * XXX should handle the failure of one or the
12449d3ddf43SAdrian Chadd 		 * other - don't populate both?
12459d3ddf43SAdrian Chadd 		 */
12469d3ddf43SAdrian Chadd 		*mp = sbcreatecontrol((caddr_t) &flowid,
12479d3ddf43SAdrian Chadd 		    sizeof(uint32_t), IP_FLOWID, IPPROTO_IP);
12489d3ddf43SAdrian Chadd 		if (*mp)
12499d3ddf43SAdrian Chadd 			mp = &(*mp)->m_next;
12509d3ddf43SAdrian Chadd 		*mp = sbcreatecontrol((caddr_t) &flow_type,
12519d3ddf43SAdrian Chadd 		    sizeof(uint32_t), IP_FLOWTYPE, IPPROTO_IP);
12529d3ddf43SAdrian Chadd 		if (*mp)
12539d3ddf43SAdrian Chadd 			mp = &(*mp)->m_next;
12549d3ddf43SAdrian Chadd 	}
12559d3ddf43SAdrian Chadd 
12569d3ddf43SAdrian Chadd #ifdef	RSS
12579d3ddf43SAdrian Chadd 	if (inp->inp_flags2 & INP_RECVRSSBUCKETID) {
12589d3ddf43SAdrian Chadd 		uint32_t flowid, flow_type;
12599d3ddf43SAdrian Chadd 		uint32_t rss_bucketid;
12609d3ddf43SAdrian Chadd 
12619d3ddf43SAdrian Chadd 		flowid = m->m_pkthdr.flowid;
12629d3ddf43SAdrian Chadd 		flow_type = M_HASHTYPE_GET(m);
12639d3ddf43SAdrian Chadd 
12649d3ddf43SAdrian Chadd 		if (rss_hash2bucket(flowid, flow_type, &rss_bucketid) == 0) {
12659d3ddf43SAdrian Chadd 			*mp = sbcreatecontrol((caddr_t) &rss_bucketid,
12669d3ddf43SAdrian Chadd 			   sizeof(uint32_t), IP_RSSBUCKETID, IPPROTO_IP);
12679d3ddf43SAdrian Chadd 			if (*mp)
12689d3ddf43SAdrian Chadd 				mp = &(*mp)->m_next;
12699d3ddf43SAdrian Chadd 		}
12709d3ddf43SAdrian Chadd 	}
12719d3ddf43SAdrian Chadd #endif
127282c23ebaSBill Fenner }
127382c23ebaSBill Fenner 
12744d2e3692SLuigi Rizzo /*
127530916a2dSRobert Watson  * XXXRW: Multicast routing code in ip_mroute.c is generally MPSAFE, but the
127630916a2dSRobert Watson  * ip_rsvp and ip_rsvp_on variables need to be interlocked with rsvp_on
127730916a2dSRobert Watson  * locking.  This code remains in ip_input.c as ip_mroute.c is optionally
127830916a2dSRobert Watson  * compiled.
12794d2e3692SLuigi Rizzo  */
12803e288e62SDimitry Andric static VNET_DEFINE(int, ip_rsvp_on);
128182cea7e6SBjoern A. Zeeb VNET_DEFINE(struct socket *, ip_rsvpd);
128282cea7e6SBjoern A. Zeeb 
128382cea7e6SBjoern A. Zeeb #define	V_ip_rsvp_on		VNET(ip_rsvp_on)
128482cea7e6SBjoern A. Zeeb 
1285df8bae1dSRodney W. Grimes int
1286f0068c4aSGarrett Wollman ip_rsvp_init(struct socket *so)
1287f0068c4aSGarrett Wollman {
12888b615593SMarko Zec 
1289f0068c4aSGarrett Wollman 	if (so->so_type != SOCK_RAW ||
1290f0068c4aSGarrett Wollman 	    so->so_proto->pr_protocol != IPPROTO_RSVP)
1291f0068c4aSGarrett Wollman 		return EOPNOTSUPP;
1292f0068c4aSGarrett Wollman 
1293603724d3SBjoern A. Zeeb 	if (V_ip_rsvpd != NULL)
1294f0068c4aSGarrett Wollman 		return EADDRINUSE;
1295f0068c4aSGarrett Wollman 
1296603724d3SBjoern A. Zeeb 	V_ip_rsvpd = so;
12971c5de19aSGarrett Wollman 	/*
12981c5de19aSGarrett Wollman 	 * This may seem silly, but we need to be sure we don't over-increment
12991c5de19aSGarrett Wollman 	 * the RSVP counter, in case something slips up.
13001c5de19aSGarrett Wollman 	 */
1301603724d3SBjoern A. Zeeb 	if (!V_ip_rsvp_on) {
1302603724d3SBjoern A. Zeeb 		V_ip_rsvp_on = 1;
1303603724d3SBjoern A. Zeeb 		V_rsvp_on++;
13041c5de19aSGarrett Wollman 	}
1305f0068c4aSGarrett Wollman 
1306f0068c4aSGarrett Wollman 	return 0;
1307f0068c4aSGarrett Wollman }
1308f0068c4aSGarrett Wollman 
1309f0068c4aSGarrett Wollman int
1310f0068c4aSGarrett Wollman ip_rsvp_done(void)
1311f0068c4aSGarrett Wollman {
13128b615593SMarko Zec 
1313603724d3SBjoern A. Zeeb 	V_ip_rsvpd = NULL;
13141c5de19aSGarrett Wollman 	/*
13151c5de19aSGarrett Wollman 	 * This may seem silly, but we need to be sure we don't over-decrement
13161c5de19aSGarrett Wollman 	 * the RSVP counter, in case something slips up.
13171c5de19aSGarrett Wollman 	 */
1318603724d3SBjoern A. Zeeb 	if (V_ip_rsvp_on) {
1319603724d3SBjoern A. Zeeb 		V_ip_rsvp_on = 0;
1320603724d3SBjoern A. Zeeb 		V_rsvp_on--;
13211c5de19aSGarrett Wollman 	}
1322f0068c4aSGarrett Wollman 	return 0;
1323f0068c4aSGarrett Wollman }
1324bbb4330bSLuigi Rizzo 
13258f5a8818SKevin Lo int
13268f5a8818SKevin Lo rsvp_input(struct mbuf **mp, int *offp, int proto)
1327bbb4330bSLuigi Rizzo {
13288f5a8818SKevin Lo 	struct mbuf *m;
13298f5a8818SKevin Lo 
13308f5a8818SKevin Lo 	m = *mp;
13318f5a8818SKevin Lo 	*mp = NULL;
13328b615593SMarko Zec 
1333bbb4330bSLuigi Rizzo 	if (rsvp_input_p) { /* call the real one if loaded */
13348f5a8818SKevin Lo 		*mp = m;
13358f5a8818SKevin Lo 		rsvp_input_p(mp, offp, proto);
13368f5a8818SKevin Lo 		return (IPPROTO_DONE);
1337bbb4330bSLuigi Rizzo 	}
1338bbb4330bSLuigi Rizzo 
1339bbb4330bSLuigi Rizzo 	/* Can still get packets with rsvp_on = 0 if there is a local member
1340bbb4330bSLuigi Rizzo 	 * of the group to which the RSVP packet is addressed.  But in this
1341bbb4330bSLuigi Rizzo 	 * case we want to throw the packet away.
1342bbb4330bSLuigi Rizzo 	 */
1343bbb4330bSLuigi Rizzo 
1344603724d3SBjoern A. Zeeb 	if (!V_rsvp_on) {
1345bbb4330bSLuigi Rizzo 		m_freem(m);
13468f5a8818SKevin Lo 		return (IPPROTO_DONE);
1347bbb4330bSLuigi Rizzo 	}
1348bbb4330bSLuigi Rizzo 
1349603724d3SBjoern A. Zeeb 	if (V_ip_rsvpd != NULL) {
13508f5a8818SKevin Lo 		*mp = m;
13518f5a8818SKevin Lo 		rip_input(mp, offp, proto);
13528f5a8818SKevin Lo 		return (IPPROTO_DONE);
1353bbb4330bSLuigi Rizzo 	}
1354bbb4330bSLuigi Rizzo 	/* Drop the packet */
1355bbb4330bSLuigi Rizzo 	m_freem(m);
13568f5a8818SKevin Lo 	return (IPPROTO_DONE);
1357bbb4330bSLuigi Rizzo }
1358