xref: /freebsd/sys/netinet/ip_input.c (revision ef91a9765de04e47ad6812d9be6e76fc955ffd87)
1c398230bSWarner Losh /*-
2df8bae1dSRodney W. Grimes  * Copyright (c) 1982, 1986, 1988, 1993
3df8bae1dSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
4df8bae1dSRodney W. Grimes  *
5df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
6df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
7df8bae1dSRodney W. Grimes  * are met:
8df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
9df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
10df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
11df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
12df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
13df8bae1dSRodney W. Grimes  * 4. Neither the name of the University nor the names of its contributors
14df8bae1dSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
15df8bae1dSRodney W. Grimes  *    without specific prior written permission.
16df8bae1dSRodney W. Grimes  *
17df8bae1dSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20df8bae1dSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
28df8bae1dSRodney W. Grimes  *
29df8bae1dSRodney W. Grimes  *	@(#)ip_input.c	8.2 (Berkeley) 1/4/94
30df8bae1dSRodney W. Grimes  */
31df8bae1dSRodney W. Grimes 
324b421e2dSMike Silbersack #include <sys/cdefs.h>
334b421e2dSMike Silbersack __FBSDID("$FreeBSD$");
344b421e2dSMike Silbersack 
350ac40133SBrian Somers #include "opt_bootp.h"
3674a9466cSGary Palmer #include "opt_ipfw.h"
3727108a15SDag-Erling Smørgrav #include "opt_ipstealth.h"
386a800098SYoshinobu Inoue #include "opt_ipsec.h"
3933553d6eSBjoern A. Zeeb #include "opt_route.h"
40b8bc95cdSAdrian Chadd #include "opt_rss.h"
4174a9466cSGary Palmer 
42df8bae1dSRodney W. Grimes #include <sys/param.h>
43df8bae1dSRodney W. Grimes #include <sys/systm.h>
44*ef91a976SAndrey V. Elsukov #include <sys/hhook.h>
45df8bae1dSRodney W. Grimes #include <sys/mbuf.h>
46b715f178SLuigi Rizzo #include <sys/malloc.h>
47df8bae1dSRodney W. Grimes #include <sys/domain.h>
48df8bae1dSRodney W. Grimes #include <sys/protosw.h>
49df8bae1dSRodney W. Grimes #include <sys/socket.h>
50df8bae1dSRodney W. Grimes #include <sys/time.h>
51df8bae1dSRodney W. Grimes #include <sys/kernel.h>
52385195c0SMarko Zec #include <sys/lock.h>
53cc0a3c8cSAndrey V. Elsukov #include <sys/rmlock.h>
54385195c0SMarko Zec #include <sys/rwlock.h>
5557f60867SMark Johnston #include <sys/sdt.h>
561025071fSGarrett Wollman #include <sys/syslog.h>
57b5e8ce9fSBruce Evans #include <sys/sysctl.h>
58df8bae1dSRodney W. Grimes 
59c85540ddSAndrey A. Chernov #include <net/pfil.h>
60df8bae1dSRodney W. Grimes #include <net/if.h>
619494d596SBrooks Davis #include <net/if_types.h>
62d314ad7bSJulian Elischer #include <net/if_var.h>
6382c23ebaSBill Fenner #include <net/if_dl.h>
64df8bae1dSRodney W. Grimes #include <net/route.h>
65748e0b0aSGarrett Wollman #include <net/netisr.h>
66b2bdc62aSAdrian Chadd #include <net/rss_config.h>
674b79449eSBjoern A. Zeeb #include <net/vnet.h>
68df8bae1dSRodney W. Grimes 
69df8bae1dSRodney W. Grimes #include <netinet/in.h>
7057f60867SMark Johnston #include <netinet/in_kdtrace.h>
71df8bae1dSRodney W. Grimes #include <netinet/in_systm.h>
72b5e8ce9fSBruce Evans #include <netinet/in_var.h>
73df8bae1dSRodney W. Grimes #include <netinet/ip.h>
74df8bae1dSRodney W. Grimes #include <netinet/in_pcb.h>
75df8bae1dSRodney W. Grimes #include <netinet/ip_var.h>
76eddfbb76SRobert Watson #include <netinet/ip_fw.h>
77df8bae1dSRodney W. Grimes #include <netinet/ip_icmp.h>
78ef39adf0SAndre Oppermann #include <netinet/ip_options.h>
7958938916SGarrett Wollman #include <machine/in_cksum.h>
80a9771948SGleb Smirnoff #include <netinet/ip_carp.h>
81b2630c29SGeorge V. Neville-Neil #ifdef IPSEC
821dfcf0d2SAndre Oppermann #include <netinet/ip_ipsec.h>
8333872124SGeorge V. Neville-Neil #include <netipsec/ipsec.h>
8433872124SGeorge V. Neville-Neil #include <netipsec/key.h>
85b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */
86b8bc95cdSAdrian Chadd #include <netinet/in_rss.h>
87df8bae1dSRodney W. Grimes 
88f0068c4aSGarrett Wollman #include <sys/socketvar.h>
896ddbf1e2SGary Palmer 
90aed55708SRobert Watson #include <security/mac/mac_framework.h>
91aed55708SRobert Watson 
92d2035ffbSEd Maste #ifdef CTASSERT
93d2035ffbSEd Maste CTASSERT(sizeof(struct ip) == 20);
94d2035ffbSEd Maste #endif
95d2035ffbSEd Maste 
961dbefcc0SGleb Smirnoff /* IP reassembly functions are defined in ip_reass.c. */
97843b0e57SXin LI extern void ipreass_init(void);
98843b0e57SXin LI extern void ipreass_drain(void);
99843b0e57SXin LI extern void ipreass_slowtimo(void);
1001dbefcc0SGleb Smirnoff #ifdef VIMAGE
101843b0e57SXin LI extern void ipreass_destroy(void);
1021dbefcc0SGleb Smirnoff #endif
1031dbefcc0SGleb Smirnoff 
104cc0a3c8cSAndrey V. Elsukov struct rmlock in_ifaddr_lock;
105cc0a3c8cSAndrey V. Elsukov RM_SYSINIT(in_ifaddr_lock, &in_ifaddr_lock, "in_ifaddr_lock");
106f0068c4aSGarrett Wollman 
10782cea7e6SBjoern A. Zeeb VNET_DEFINE(int, rsvp_on);
10882cea7e6SBjoern A. Zeeb 
10982cea7e6SBjoern A. Zeeb VNET_DEFINE(int, ipforwarding);
1106df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_VNET | CTLFLAG_RW,
111eddfbb76SRobert Watson     &VNET_NAME(ipforwarding), 0,
1128b615593SMarko Zec     "Enable IP forwarding between interfaces");
1130312fbe9SPoul-Henning Kamp 
1143e288e62SDimitry Andric static VNET_DEFINE(int, ipsendredirects) = 1;	/* XXX */
11582cea7e6SBjoern A. Zeeb #define	V_ipsendredirects	VNET(ipsendredirects)
1166df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_VNET | CTLFLAG_RW,
117eddfbb76SRobert Watson     &VNET_NAME(ipsendredirects), 0,
1188b615593SMarko Zec     "Enable sending IP redirects");
1190312fbe9SPoul-Henning Kamp 
120823db0e9SDon Lewis /*
121823db0e9SDon Lewis  * XXX - Setting ip_checkinterface mostly implements the receive side of
122823db0e9SDon Lewis  * the Strong ES model described in RFC 1122, but since the routing table
123a8f12100SDon Lewis  * and transmit implementation do not implement the Strong ES model,
124823db0e9SDon Lewis  * setting this to 1 results in an odd hybrid.
1253f67c834SDon Lewis  *
126a8f12100SDon Lewis  * XXX - ip_checkinterface currently must be disabled if you use ipnat
127a8f12100SDon Lewis  * to translate the destination address to another local interface.
1283f67c834SDon Lewis  *
1293f67c834SDon Lewis  * XXX - ip_checkinterface must be disabled if you add IP aliases
1303f67c834SDon Lewis  * to the loopback interface instead of the interface where the
1313f67c834SDon Lewis  * packets for those addresses are received.
132823db0e9SDon Lewis  */
1333e288e62SDimitry Andric static VNET_DEFINE(int, ip_checkinterface);
13482cea7e6SBjoern A. Zeeb #define	V_ip_checkinterface	VNET(ip_checkinterface)
1356df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_VNET | CTLFLAG_RW,
136eddfbb76SRobert Watson     &VNET_NAME(ip_checkinterface), 0,
1378b615593SMarko Zec     "Verify packet arrives on correct interface");
138b3e95d4eSJonathan Lemon 
1390b4b0b0fSJulian Elischer VNET_DEFINE(struct pfil_head, inet_pfil_hook);	/* Packet filter hooks */
140df8bae1dSRodney W. Grimes 
141d4b5cae4SRobert Watson static struct netisr_handler ip_nh = {
142d4b5cae4SRobert Watson 	.nh_name = "ip",
143d4b5cae4SRobert Watson 	.nh_handler = ip_input,
144d4b5cae4SRobert Watson 	.nh_proto = NETISR_IP,
145b8bc95cdSAdrian Chadd #ifdef	RSS
1462527ccadSAdrian Chadd 	.nh_m2cpuid = rss_soft_m2cpuid_v4,
147b8bc95cdSAdrian Chadd 	.nh_policy = NETISR_POLICY_CPU,
148b8bc95cdSAdrian Chadd 	.nh_dispatch = NETISR_DISPATCH_HYBRID,
149b8bc95cdSAdrian Chadd #else
150d4b5cae4SRobert Watson 	.nh_policy = NETISR_POLICY_FLOW,
151b8bc95cdSAdrian Chadd #endif
152d4b5cae4SRobert Watson };
153ca925d9cSJonathan Lemon 
154b8bc95cdSAdrian Chadd #ifdef	RSS
155b8bc95cdSAdrian Chadd /*
156b8bc95cdSAdrian Chadd  * Directly dispatched frames are currently assumed
157b8bc95cdSAdrian Chadd  * to have a flowid already calculated.
158b8bc95cdSAdrian Chadd  *
159b8bc95cdSAdrian Chadd  * It should likely have something that assert it
160b8bc95cdSAdrian Chadd  * actually has valid flow details.
161b8bc95cdSAdrian Chadd  */
162b8bc95cdSAdrian Chadd static struct netisr_handler ip_direct_nh = {
163b8bc95cdSAdrian Chadd 	.nh_name = "ip_direct",
164b8bc95cdSAdrian Chadd 	.nh_handler = ip_direct_input,
165b8bc95cdSAdrian Chadd 	.nh_proto = NETISR_IP_DIRECT,
166499baf0aSAdrian Chadd 	.nh_m2cpuid = rss_soft_m2cpuid_v4,
167b8bc95cdSAdrian Chadd 	.nh_policy = NETISR_POLICY_CPU,
168b8bc95cdSAdrian Chadd 	.nh_dispatch = NETISR_DISPATCH_HYBRID,
169b8bc95cdSAdrian Chadd };
170b8bc95cdSAdrian Chadd #endif
171b8bc95cdSAdrian Chadd 
172df8bae1dSRodney W. Grimes extern	struct domain inetdomain;
173f0ffb944SJulian Elischer extern	struct protosw inetsw[];
174df8bae1dSRodney W. Grimes u_char	ip_protox[IPPROTO_MAX];
17582cea7e6SBjoern A. Zeeb VNET_DEFINE(struct in_ifaddrhead, in_ifaddrhead);  /* first inet address */
17682cea7e6SBjoern A. Zeeb VNET_DEFINE(struct in_ifaddrhashhead *, in_ifaddrhashtbl); /* inet addr hash table  */
17782cea7e6SBjoern A. Zeeb VNET_DEFINE(u_long, in_ifaddrhmask);		/* mask for hash table */
178ca925d9cSJonathan Lemon 
1790312fbe9SPoul-Henning Kamp #ifdef IPCTL_DEFMTU
1800312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW,
1813d177f46SBill Fumerola     &ip_mtu, 0, "Default MTU");
1820312fbe9SPoul-Henning Kamp #endif
1830312fbe9SPoul-Henning Kamp 
1841b968362SDag-Erling Smørgrav #ifdef IPSTEALTH
18582cea7e6SBjoern A. Zeeb VNET_DEFINE(int, ipstealth);
1866df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_VNET | CTLFLAG_RW,
187eddfbb76SRobert Watson     &VNET_NAME(ipstealth), 0,
188eddfbb76SRobert Watson     "IP stealth mode, no TTL decrementation on forwarding");
1891b968362SDag-Erling Smørgrav #endif
190eddfbb76SRobert Watson 
191315e3e38SRobert Watson /*
1925da0521fSAndrey V. Elsukov  * IP statistics are stored in the "array" of counter(9)s.
1935923c293SGleb Smirnoff  */
1945da0521fSAndrey V. Elsukov VNET_PCPUSTAT_DEFINE(struct ipstat, ipstat);
1955da0521fSAndrey V. Elsukov VNET_PCPUSTAT_SYSINIT(ipstat);
1965da0521fSAndrey V. Elsukov SYSCTL_VNET_PCPUSTAT(_net_inet_ip, IPCTL_STATS, stats, struct ipstat, ipstat,
1975da0521fSAndrey V. Elsukov     "IP statistics (struct ipstat, netinet/ip_var.h)");
1985923c293SGleb Smirnoff 
1995923c293SGleb Smirnoff #ifdef VIMAGE
2005da0521fSAndrey V. Elsukov VNET_PCPUSTAT_SYSUNINIT(ipstat);
2015923c293SGleb Smirnoff #endif /* VIMAGE */
2025923c293SGleb Smirnoff 
2035923c293SGleb Smirnoff /*
204315e3e38SRobert Watson  * Kernel module interface for updating ipstat.  The argument is an index
2055923c293SGleb Smirnoff  * into ipstat treated as an array.
206315e3e38SRobert Watson  */
207315e3e38SRobert Watson void
208315e3e38SRobert Watson kmod_ipstat_inc(int statnum)
209315e3e38SRobert Watson {
210315e3e38SRobert Watson 
2115da0521fSAndrey V. Elsukov 	counter_u64_add(VNET(ipstat)[statnum], 1);
212315e3e38SRobert Watson }
213315e3e38SRobert Watson 
214315e3e38SRobert Watson void
215315e3e38SRobert Watson kmod_ipstat_dec(int statnum)
216315e3e38SRobert Watson {
217315e3e38SRobert Watson 
2185da0521fSAndrey V. Elsukov 	counter_u64_add(VNET(ipstat)[statnum], -1);
219315e3e38SRobert Watson }
220315e3e38SRobert Watson 
221d4b5cae4SRobert Watson static int
222d4b5cae4SRobert Watson sysctl_netinet_intr_queue_maxlen(SYSCTL_HANDLER_ARGS)
223d4b5cae4SRobert Watson {
224d4b5cae4SRobert Watson 	int error, qlimit;
225d4b5cae4SRobert Watson 
226d4b5cae4SRobert Watson 	netisr_getqlimit(&ip_nh, &qlimit);
227d4b5cae4SRobert Watson 	error = sysctl_handle_int(oidp, &qlimit, 0, req);
228d4b5cae4SRobert Watson 	if (error || !req->newptr)
229d4b5cae4SRobert Watson 		return (error);
230d4b5cae4SRobert Watson 	if (qlimit < 1)
231d4b5cae4SRobert Watson 		return (EINVAL);
232d4b5cae4SRobert Watson 	return (netisr_setqlimit(&ip_nh, qlimit));
233d4b5cae4SRobert Watson }
234d4b5cae4SRobert Watson SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen,
235d4b5cae4SRobert Watson     CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet_intr_queue_maxlen, "I",
236d4b5cae4SRobert Watson     "Maximum size of the IP input queue");
237d4b5cae4SRobert Watson 
238d4b5cae4SRobert Watson static int
239d4b5cae4SRobert Watson sysctl_netinet_intr_queue_drops(SYSCTL_HANDLER_ARGS)
240d4b5cae4SRobert Watson {
241d4b5cae4SRobert Watson 	u_int64_t qdrops_long;
242d4b5cae4SRobert Watson 	int error, qdrops;
243d4b5cae4SRobert Watson 
244d4b5cae4SRobert Watson 	netisr_getqdrops(&ip_nh, &qdrops_long);
245d4b5cae4SRobert Watson 	qdrops = qdrops_long;
246d4b5cae4SRobert Watson 	error = sysctl_handle_int(oidp, &qdrops, 0, req);
247d4b5cae4SRobert Watson 	if (error || !req->newptr)
248d4b5cae4SRobert Watson 		return (error);
249d4b5cae4SRobert Watson 	if (qdrops != 0)
250d4b5cae4SRobert Watson 		return (EINVAL);
251d4b5cae4SRobert Watson 	netisr_clearqdrops(&ip_nh);
252d4b5cae4SRobert Watson 	return (0);
253d4b5cae4SRobert Watson }
254d4b5cae4SRobert Watson 
255d4b5cae4SRobert Watson SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops,
256d4b5cae4SRobert Watson     CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_netinet_intr_queue_drops, "I",
257d4b5cae4SRobert Watson     "Number of packets dropped from the IP input queue");
258d4b5cae4SRobert Watson 
259b8bc95cdSAdrian Chadd #ifdef	RSS
260b8bc95cdSAdrian Chadd static int
261b8bc95cdSAdrian Chadd sysctl_netinet_intr_direct_queue_maxlen(SYSCTL_HANDLER_ARGS)
262b8bc95cdSAdrian Chadd {
263b8bc95cdSAdrian Chadd 	int error, qlimit;
264b8bc95cdSAdrian Chadd 
265b8bc95cdSAdrian Chadd 	netisr_getqlimit(&ip_direct_nh, &qlimit);
266b8bc95cdSAdrian Chadd 	error = sysctl_handle_int(oidp, &qlimit, 0, req);
267b8bc95cdSAdrian Chadd 	if (error || !req->newptr)
268b8bc95cdSAdrian Chadd 		return (error);
269b8bc95cdSAdrian Chadd 	if (qlimit < 1)
270b8bc95cdSAdrian Chadd 		return (EINVAL);
271b8bc95cdSAdrian Chadd 	return (netisr_setqlimit(&ip_direct_nh, qlimit));
272b8bc95cdSAdrian Chadd }
273b8bc95cdSAdrian Chadd SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_direct_queue_maxlen,
274b8bc95cdSAdrian Chadd     CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet_intr_direct_queue_maxlen, "I",
275b8bc95cdSAdrian Chadd     "Maximum size of the IP direct input queue");
276b8bc95cdSAdrian Chadd 
277b8bc95cdSAdrian Chadd static int
278b8bc95cdSAdrian Chadd sysctl_netinet_intr_direct_queue_drops(SYSCTL_HANDLER_ARGS)
279b8bc95cdSAdrian Chadd {
280b8bc95cdSAdrian Chadd 	u_int64_t qdrops_long;
281b8bc95cdSAdrian Chadd 	int error, qdrops;
282b8bc95cdSAdrian Chadd 
283b8bc95cdSAdrian Chadd 	netisr_getqdrops(&ip_direct_nh, &qdrops_long);
284b8bc95cdSAdrian Chadd 	qdrops = qdrops_long;
285b8bc95cdSAdrian Chadd 	error = sysctl_handle_int(oidp, &qdrops, 0, req);
286b8bc95cdSAdrian Chadd 	if (error || !req->newptr)
287b8bc95cdSAdrian Chadd 		return (error);
288b8bc95cdSAdrian Chadd 	if (qdrops != 0)
289b8bc95cdSAdrian Chadd 		return (EINVAL);
290b8bc95cdSAdrian Chadd 	netisr_clearqdrops(&ip_direct_nh);
291b8bc95cdSAdrian Chadd 	return (0);
292b8bc95cdSAdrian Chadd }
293b8bc95cdSAdrian Chadd 
294b8bc95cdSAdrian Chadd SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_direct_queue_drops,
295b8bc95cdSAdrian Chadd     CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_netinet_intr_direct_queue_drops, "I",
296b8bc95cdSAdrian Chadd     "Number of packets dropped from the IP direct input queue");
297b8bc95cdSAdrian Chadd #endif	/* RSS */
298b8bc95cdSAdrian Chadd 
299df8bae1dSRodney W. Grimes /*
300df8bae1dSRodney W. Grimes  * IP initialization: fill in IP protocol switch table.
301df8bae1dSRodney W. Grimes  * All protocols not implemented in kernel go to raw IP protocol handler.
302df8bae1dSRodney W. Grimes  */
303df8bae1dSRodney W. Grimes void
304f2565d68SRobert Watson ip_init(void)
305df8bae1dSRodney W. Grimes {
306f2565d68SRobert Watson 	struct protosw *pr;
307f2565d68SRobert Watson 	int i;
308df8bae1dSRodney W. Grimes 
309603724d3SBjoern A. Zeeb 	TAILQ_INIT(&V_in_ifaddrhead);
310603724d3SBjoern A. Zeeb 	V_in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &V_in_ifaddrhmask);
3111ed81b73SMarko Zec 
3121ed81b73SMarko Zec 	/* Initialize IP reassembly queue. */
3131dbefcc0SGleb Smirnoff 	ipreass_init();
3141ed81b73SMarko Zec 
3150b4b0b0fSJulian Elischer 	/* Initialize packet filter hooks. */
3160b4b0b0fSJulian Elischer 	V_inet_pfil_hook.ph_type = PFIL_TYPE_AF;
3170b4b0b0fSJulian Elischer 	V_inet_pfil_hook.ph_af = AF_INET;
3180b4b0b0fSJulian Elischer 	if ((i = pfil_head_register(&V_inet_pfil_hook)) != 0)
3190b4b0b0fSJulian Elischer 		printf("%s: WARNING: unable to register pfil hook, "
3200b4b0b0fSJulian Elischer 			"error %d\n", __func__, i);
3210b4b0b0fSJulian Elischer 
322*ef91a976SAndrey V. Elsukov 	if (hhook_head_register(HHOOK_TYPE_IPSEC_IN, AF_INET,
323*ef91a976SAndrey V. Elsukov 	    &V_ipsec_hhh_in[HHOOK_IPSEC_INET],
324*ef91a976SAndrey V. Elsukov 	    HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0)
325*ef91a976SAndrey V. Elsukov 		printf("%s: WARNING: unable to register input helper hook\n",
326*ef91a976SAndrey V. Elsukov 		    __func__);
327*ef91a976SAndrey V. Elsukov 	if (hhook_head_register(HHOOK_TYPE_IPSEC_OUT, AF_INET,
328*ef91a976SAndrey V. Elsukov 	    &V_ipsec_hhh_out[HHOOK_IPSEC_INET],
329*ef91a976SAndrey V. Elsukov 	    HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0)
330*ef91a976SAndrey V. Elsukov 		printf("%s: WARNING: unable to register output helper hook\n",
331*ef91a976SAndrey V. Elsukov 		    __func__);
332*ef91a976SAndrey V. Elsukov 
3331ed81b73SMarko Zec 	/* Skip initialization of globals for non-default instances. */
3341ed81b73SMarko Zec 	if (!IS_DEFAULT_VNET(curvnet))
3351ed81b73SMarko Zec 		return;
3361ed81b73SMarko Zec 
337f0ffb944SJulian Elischer 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
33802410549SRobert Watson 	if (pr == NULL)
339db09bef3SAndre Oppermann 		panic("ip_init: PF_INET not found");
340db09bef3SAndre Oppermann 
341db09bef3SAndre Oppermann 	/* Initialize the entire ip_protox[] array to IPPROTO_RAW. */
342df8bae1dSRodney W. Grimes 	for (i = 0; i < IPPROTO_MAX; i++)
343df8bae1dSRodney W. Grimes 		ip_protox[i] = pr - inetsw;
344db09bef3SAndre Oppermann 	/*
345db09bef3SAndre Oppermann 	 * Cycle through IP protocols and put them into the appropriate place
346db09bef3SAndre Oppermann 	 * in ip_protox[].
347db09bef3SAndre Oppermann 	 */
348f0ffb944SJulian Elischer 	for (pr = inetdomain.dom_protosw;
349f0ffb944SJulian Elischer 	    pr < inetdomain.dom_protoswNPROTOSW; pr++)
350df8bae1dSRodney W. Grimes 		if (pr->pr_domain->dom_family == PF_INET &&
351db09bef3SAndre Oppermann 		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) {
352db09bef3SAndre Oppermann 			/* Be careful to only index valid IP protocols. */
353db77984cSSam Leffler 			if (pr->pr_protocol < IPPROTO_MAX)
354df8bae1dSRodney W. Grimes 				ip_protox[pr->pr_protocol] = pr - inetsw;
355db09bef3SAndre Oppermann 		}
356194a213eSAndrey A. Chernov 
357d4b5cae4SRobert Watson 	netisr_register(&ip_nh);
358b8bc95cdSAdrian Chadd #ifdef	RSS
359b8bc95cdSAdrian Chadd 	netisr_register(&ip_direct_nh);
360b8bc95cdSAdrian Chadd #endif
361df8bae1dSRodney W. Grimes }
362df8bae1dSRodney W. Grimes 
3639802380eSBjoern A. Zeeb #ifdef VIMAGE
3649802380eSBjoern A. Zeeb void
3659802380eSBjoern A. Zeeb ip_destroy(void)
3669802380eSBjoern A. Zeeb {
367*ef91a976SAndrey V. Elsukov 	int error;
3684d3dfd45SMikolaj Golub 
369*ef91a976SAndrey V. Elsukov 	if ((error = pfil_head_unregister(&V_inet_pfil_hook)) != 0)
3704d3dfd45SMikolaj Golub 		printf("%s: WARNING: unable to unregister pfil hook, "
371*ef91a976SAndrey V. Elsukov 		    "error %d\n", __func__, error);
3729802380eSBjoern A. Zeeb 
373*ef91a976SAndrey V. Elsukov 	error = hhook_head_deregister(V_ipsec_hhh_in[HHOOK_IPSEC_INET]);
374*ef91a976SAndrey V. Elsukov 	if (error != 0) {
375*ef91a976SAndrey V. Elsukov 		printf("%s: WARNING: unable to deregister input helper hook "
376*ef91a976SAndrey V. Elsukov 		    "type HHOOK_TYPE_IPSEC_IN, id HHOOK_IPSEC_INET: "
377*ef91a976SAndrey V. Elsukov 		    "error %d returned\n", __func__, error);
378*ef91a976SAndrey V. Elsukov 	}
379*ef91a976SAndrey V. Elsukov 	error = hhook_head_deregister(V_ipsec_hhh_out[HHOOK_IPSEC_INET]);
380*ef91a976SAndrey V. Elsukov 	if (error != 0) {
381*ef91a976SAndrey V. Elsukov 		printf("%s: WARNING: unable to deregister output helper hook "
382*ef91a976SAndrey V. Elsukov 		    "type HHOOK_TYPE_IPSEC_OUT, id HHOOK_IPSEC_INET: "
383*ef91a976SAndrey V. Elsukov 		    "error %d returned\n", __func__, error);
384*ef91a976SAndrey V. Elsukov 	}
3859802380eSBjoern A. Zeeb 	/* Cleanup in_ifaddr hash table; should be empty. */
3869802380eSBjoern A. Zeeb 	hashdestroy(V_in_ifaddrhashtbl, M_IFADDR, V_in_ifaddrhmask);
3879802380eSBjoern A. Zeeb 
388e3c2c634SGleb Smirnoff 	/* Destroy IP reassembly queue. */
3891dbefcc0SGleb Smirnoff 	ipreass_destroy();
3909802380eSBjoern A. Zeeb }
3919802380eSBjoern A. Zeeb #endif
3929802380eSBjoern A. Zeeb 
393b8bc95cdSAdrian Chadd #ifdef	RSS
394b8bc95cdSAdrian Chadd /*
395b8bc95cdSAdrian Chadd  * IP direct input routine.
396b8bc95cdSAdrian Chadd  *
397b8bc95cdSAdrian Chadd  * This is called when reinjecting completed fragments where
398b8bc95cdSAdrian Chadd  * all of the previous checking and book-keeping has been done.
399b8bc95cdSAdrian Chadd  */
400b8bc95cdSAdrian Chadd void
401b8bc95cdSAdrian Chadd ip_direct_input(struct mbuf *m)
402b8bc95cdSAdrian Chadd {
403b8bc95cdSAdrian Chadd 	struct ip *ip;
404b8bc95cdSAdrian Chadd 	int hlen;
405b8bc95cdSAdrian Chadd 
406b8bc95cdSAdrian Chadd 	ip = mtod(m, struct ip *);
407b8bc95cdSAdrian Chadd 	hlen = ip->ip_hl << 2;
408b8bc95cdSAdrian Chadd 
409b8bc95cdSAdrian Chadd 	IPSTAT_INC(ips_delivered);
410b8bc95cdSAdrian Chadd 	(*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p);
411b8bc95cdSAdrian Chadd 	return;
412b8bc95cdSAdrian Chadd }
413b8bc95cdSAdrian Chadd #endif
414b8bc95cdSAdrian Chadd 
4154d2e3692SLuigi Rizzo /*
416df8bae1dSRodney W. Grimes  * Ip input routine.  Checksum and byte swap header.  If fragmented
417df8bae1dSRodney W. Grimes  * try to reassemble.  Process options.  Pass to next level.
418df8bae1dSRodney W. Grimes  */
419c67b1d17SGarrett Wollman void
420c67b1d17SGarrett Wollman ip_input(struct mbuf *m)
421df8bae1dSRodney W. Grimes {
4229188b4a1SAndre Oppermann 	struct ip *ip = NULL;
4235da9f8faSJosef Karthauser 	struct in_ifaddr *ia = NULL;
424ca925d9cSJonathan Lemon 	struct ifaddr *ifa;
4250aade26eSRobert Watson 	struct ifnet *ifp;
4269b932e9eSAndre Oppermann 	int    checkif, hlen = 0;
42721d172a3SGleb Smirnoff 	uint16_t sum, ip_len;
42802c1c707SAndre Oppermann 	int dchg = 0;				/* dest changed after fw */
429f51f805fSSam Leffler 	struct in_addr odst;			/* original dst address */
430b715f178SLuigi Rizzo 
431fe584538SDag-Erling Smørgrav 	M_ASSERTPKTHDR(m);
432db40007dSAndrew R. Reiter 
433ac9d7e26SMax Laier 	if (m->m_flags & M_FASTFWD_OURS) {
43476ff6dcfSAndre Oppermann 		m->m_flags &= ~M_FASTFWD_OURS;
43576ff6dcfSAndre Oppermann 		/* Set up some basics that will be used later. */
4362b25acc1SLuigi Rizzo 		ip = mtod(m, struct ip *);
43753be11f6SPoul-Henning Kamp 		hlen = ip->ip_hl << 2;
4388f134647SGleb Smirnoff 		ip_len = ntohs(ip->ip_len);
4399b932e9eSAndre Oppermann 		goto ours;
4402b25acc1SLuigi Rizzo 	}
4412b25acc1SLuigi Rizzo 
44286425c62SRobert Watson 	IPSTAT_INC(ips_total);
44358938916SGarrett Wollman 
44458938916SGarrett Wollman 	if (m->m_pkthdr.len < sizeof(struct ip))
44558938916SGarrett Wollman 		goto tooshort;
44658938916SGarrett Wollman 
447df8bae1dSRodney W. Grimes 	if (m->m_len < sizeof (struct ip) &&
4480b17fba7SAndre Oppermann 	    (m = m_pullup(m, sizeof (struct ip))) == NULL) {
44986425c62SRobert Watson 		IPSTAT_INC(ips_toosmall);
450c67b1d17SGarrett Wollman 		return;
451df8bae1dSRodney W. Grimes 	}
452df8bae1dSRodney W. Grimes 	ip = mtod(m, struct ip *);
45358938916SGarrett Wollman 
45453be11f6SPoul-Henning Kamp 	if (ip->ip_v != IPVERSION) {
45586425c62SRobert Watson 		IPSTAT_INC(ips_badvers);
456df8bae1dSRodney W. Grimes 		goto bad;
457df8bae1dSRodney W. Grimes 	}
45858938916SGarrett Wollman 
45953be11f6SPoul-Henning Kamp 	hlen = ip->ip_hl << 2;
460df8bae1dSRodney W. Grimes 	if (hlen < sizeof(struct ip)) {	/* minimum header length */
46186425c62SRobert Watson 		IPSTAT_INC(ips_badhlen);
462df8bae1dSRodney W. Grimes 		goto bad;
463df8bae1dSRodney W. Grimes 	}
464df8bae1dSRodney W. Grimes 	if (hlen > m->m_len) {
4650b17fba7SAndre Oppermann 		if ((m = m_pullup(m, hlen)) == NULL) {
46686425c62SRobert Watson 			IPSTAT_INC(ips_badhlen);
467c67b1d17SGarrett Wollman 			return;
468df8bae1dSRodney W. Grimes 		}
469df8bae1dSRodney W. Grimes 		ip = mtod(m, struct ip *);
470df8bae1dSRodney W. Grimes 	}
47133841545SHajimu UMEMOTO 
47257f60867SMark Johnston 	IP_PROBE(receive, NULL, NULL, ip, m->m_pkthdr.rcvif, ip, NULL);
47357f60867SMark Johnston 
47433841545SHajimu UMEMOTO 	/* 127/8 must not appear on wire - RFC1122 */
4750aade26eSRobert Watson 	ifp = m->m_pkthdr.rcvif;
47633841545SHajimu UMEMOTO 	if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
47733841545SHajimu UMEMOTO 	    (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
4780aade26eSRobert Watson 		if ((ifp->if_flags & IFF_LOOPBACK) == 0) {
47986425c62SRobert Watson 			IPSTAT_INC(ips_badaddr);
48033841545SHajimu UMEMOTO 			goto bad;
48133841545SHajimu UMEMOTO 		}
48233841545SHajimu UMEMOTO 	}
48333841545SHajimu UMEMOTO 
484db4f9cc7SJonathan Lemon 	if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
485db4f9cc7SJonathan Lemon 		sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
486db4f9cc7SJonathan Lemon 	} else {
48758938916SGarrett Wollman 		if (hlen == sizeof(struct ip)) {
48847c861ecSBrian Somers 			sum = in_cksum_hdr(ip);
48958938916SGarrett Wollman 		} else {
49047c861ecSBrian Somers 			sum = in_cksum(m, hlen);
49158938916SGarrett Wollman 		}
492db4f9cc7SJonathan Lemon 	}
49347c861ecSBrian Somers 	if (sum) {
49486425c62SRobert Watson 		IPSTAT_INC(ips_badsum);
495df8bae1dSRodney W. Grimes 		goto bad;
496df8bae1dSRodney W. Grimes 	}
497df8bae1dSRodney W. Grimes 
49802b199f1SMax Laier #ifdef ALTQ
49902b199f1SMax Laier 	if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0)
50002b199f1SMax Laier 		/* packet is dropped by traffic conditioner */
50102b199f1SMax Laier 		return;
50202b199f1SMax Laier #endif
50302b199f1SMax Laier 
50421d172a3SGleb Smirnoff 	ip_len = ntohs(ip->ip_len);
50521d172a3SGleb Smirnoff 	if (ip_len < hlen) {
50686425c62SRobert Watson 		IPSTAT_INC(ips_badlen);
507df8bae1dSRodney W. Grimes 		goto bad;
508df8bae1dSRodney W. Grimes 	}
509df8bae1dSRodney W. Grimes 
510df8bae1dSRodney W. Grimes 	/*
511df8bae1dSRodney W. Grimes 	 * Check that the amount of data in the buffers
512df8bae1dSRodney W. Grimes 	 * is as at least much as the IP header would have us expect.
513df8bae1dSRodney W. Grimes 	 * Trim mbufs if longer than we expect.
514df8bae1dSRodney W. Grimes 	 * Drop packet if shorter than we expect.
515df8bae1dSRodney W. Grimes 	 */
51621d172a3SGleb Smirnoff 	if (m->m_pkthdr.len < ip_len) {
51758938916SGarrett Wollman tooshort:
51886425c62SRobert Watson 		IPSTAT_INC(ips_tooshort);
519df8bae1dSRodney W. Grimes 		goto bad;
520df8bae1dSRodney W. Grimes 	}
52121d172a3SGleb Smirnoff 	if (m->m_pkthdr.len > ip_len) {
522df8bae1dSRodney W. Grimes 		if (m->m_len == m->m_pkthdr.len) {
52321d172a3SGleb Smirnoff 			m->m_len = ip_len;
52421d172a3SGleb Smirnoff 			m->m_pkthdr.len = ip_len;
525df8bae1dSRodney W. Grimes 		} else
52621d172a3SGleb Smirnoff 			m_adj(m, ip_len - m->m_pkthdr.len);
527df8bae1dSRodney W. Grimes 	}
528b8bc95cdSAdrian Chadd 
52933872124SGeorge V. Neville-Neil 	/* Try to forward the packet, but if we fail continue */
530b2630c29SGeorge V. Neville-Neil #ifdef IPSEC
53133872124SGeorge V. Neville-Neil 	/* For now we do not handle IPSEC in tryforward. */
53233872124SGeorge V. Neville-Neil 	if (!key_havesp(IPSEC_DIR_INBOUND) && !key_havesp(IPSEC_DIR_OUTBOUND) &&
53333872124SGeorge V. Neville-Neil 	    (V_ipforwarding == 1))
53433872124SGeorge V. Neville-Neil 		if (ip_tryforward(m) == NULL)
53533872124SGeorge V. Neville-Neil 			return;
53614dd6717SSam Leffler 	/*
537ffe8cd7bSBjoern A. Zeeb 	 * Bypass packet filtering for packets previously handled by IPsec.
53814dd6717SSam Leffler 	 */
539cc977adcSBjoern A. Zeeb 	if (ip_ipsec_filtertunnel(m))
540c21fd232SAndre Oppermann 		goto passin;
54133872124SGeorge V. Neville-Neil #else
54233872124SGeorge V. Neville-Neil 	if (V_ipforwarding == 1)
54333872124SGeorge V. Neville-Neil 		if (ip_tryforward(m) == NULL)
54433872124SGeorge V. Neville-Neil 			return;
545b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */
5463f67c834SDon Lewis 
547c4ac87eaSDarren Reed 	/*
548134ea224SSam Leffler 	 * Run through list of hooks for input packets.
549f51f805fSSam Leffler 	 *
550f51f805fSSam Leffler 	 * NB: Beware of the destination address changing (e.g.
551f51f805fSSam Leffler 	 *     by NAT rewriting).  When this happens, tell
552f51f805fSSam Leffler 	 *     ip_forward to do the right thing.
553c4ac87eaSDarren Reed 	 */
554c21fd232SAndre Oppermann 
555c21fd232SAndre Oppermann 	/* Jump over all PFIL processing if hooks are not active. */
5560b4b0b0fSJulian Elischer 	if (!PFIL_HOOKED(&V_inet_pfil_hook))
557c21fd232SAndre Oppermann 		goto passin;
558c21fd232SAndre Oppermann 
559f51f805fSSam Leffler 	odst = ip->ip_dst;
5600b4b0b0fSJulian Elischer 	if (pfil_run_hooks(&V_inet_pfil_hook, &m, ifp, PFIL_IN, NULL) != 0)
561beec8214SDarren Reed 		return;
562134ea224SSam Leffler 	if (m == NULL)			/* consumed by filter */
563c4ac87eaSDarren Reed 		return;
5649b932e9eSAndre Oppermann 
565c4ac87eaSDarren Reed 	ip = mtod(m, struct ip *);
56602c1c707SAndre Oppermann 	dchg = (odst.s_addr != ip->ip_dst.s_addr);
5670aade26eSRobert Watson 	ifp = m->m_pkthdr.rcvif;
5689b932e9eSAndre Oppermann 
5699b932e9eSAndre Oppermann 	if (m->m_flags & M_FASTFWD_OURS) {
5709b932e9eSAndre Oppermann 		m->m_flags &= ~M_FASTFWD_OURS;
5719b932e9eSAndre Oppermann 		goto ours;
5729b932e9eSAndre Oppermann 	}
573ffdbf9daSAndrey V. Elsukov 	if (m->m_flags & M_IP_NEXTHOP) {
574ffdbf9daSAndrey V. Elsukov 		dchg = (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL);
575ffdbf9daSAndrey V. Elsukov 		if (dchg != 0) {
576099dd043SAndre Oppermann 			/*
577ffdbf9daSAndrey V. Elsukov 			 * Directly ship the packet on.  This allows
578ffdbf9daSAndrey V. Elsukov 			 * forwarding packets originally destined to us
579ffdbf9daSAndrey V. Elsukov 			 * to some other directly connected host.
580099dd043SAndre Oppermann 			 */
581ffdbf9daSAndrey V. Elsukov 			ip_forward(m, 1);
582099dd043SAndre Oppermann 			return;
583099dd043SAndre Oppermann 		}
584ffdbf9daSAndrey V. Elsukov 	}
585c21fd232SAndre Oppermann passin:
58621d172a3SGleb Smirnoff 
58721d172a3SGleb Smirnoff 	/*
588df8bae1dSRodney W. Grimes 	 * Process options and, if not destined for us,
589df8bae1dSRodney W. Grimes 	 * ship it on.  ip_dooptions returns 1 when an
590df8bae1dSRodney W. Grimes 	 * error was detected (causing an icmp message
591df8bae1dSRodney W. Grimes 	 * to be sent and the original packet to be freed).
592df8bae1dSRodney W. Grimes 	 */
5939b932e9eSAndre Oppermann 	if (hlen > sizeof (struct ip) && ip_dooptions(m, 0))
594c67b1d17SGarrett Wollman 		return;
595df8bae1dSRodney W. Grimes 
596f0068c4aSGarrett Wollman         /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no
597f0068c4aSGarrett Wollman          * matter if it is destined to another node, or whether it is
598f0068c4aSGarrett Wollman          * a multicast one, RSVP wants it! and prevents it from being forwarded
599f0068c4aSGarrett Wollman          * anywhere else. Also checks if the rsvp daemon is running before
600f0068c4aSGarrett Wollman 	 * grabbing the packet.
601f0068c4aSGarrett Wollman          */
602603724d3SBjoern A. Zeeb 	if (V_rsvp_on && ip->ip_p==IPPROTO_RSVP)
603f0068c4aSGarrett Wollman 		goto ours;
604f0068c4aSGarrett Wollman 
605df8bae1dSRodney W. Grimes 	/*
606df8bae1dSRodney W. Grimes 	 * Check our list of addresses, to see if the packet is for us.
607cc766e04SGarrett Wollman 	 * If we don't have any addresses, assume any unicast packet
608cc766e04SGarrett Wollman 	 * we receive might be for us (and let the upper layers deal
609cc766e04SGarrett Wollman 	 * with it).
610df8bae1dSRodney W. Grimes 	 */
611603724d3SBjoern A. Zeeb 	if (TAILQ_EMPTY(&V_in_ifaddrhead) &&
612cc766e04SGarrett Wollman 	    (m->m_flags & (M_MCAST|M_BCAST)) == 0)
613cc766e04SGarrett Wollman 		goto ours;
614cc766e04SGarrett Wollman 
6157538a9a0SJonathan Lemon 	/*
616823db0e9SDon Lewis 	 * Enable a consistency check between the destination address
617823db0e9SDon Lewis 	 * and the arrival interface for a unicast packet (the RFC 1122
618823db0e9SDon Lewis 	 * strong ES model) if IP forwarding is disabled and the packet
619e15ae1b2SDon Lewis 	 * is not locally generated and the packet is not subject to
620e15ae1b2SDon Lewis 	 * 'ipfw fwd'.
6213f67c834SDon Lewis 	 *
6223f67c834SDon Lewis 	 * XXX - Checking also should be disabled if the destination
6233f67c834SDon Lewis 	 * address is ipnat'ed to a different interface.
6243f67c834SDon Lewis 	 *
625a8f12100SDon Lewis 	 * XXX - Checking is incompatible with IP aliases added
6263f67c834SDon Lewis 	 * to the loopback interface instead of the interface where
6273f67c834SDon Lewis 	 * the packets are received.
628a9771948SGleb Smirnoff 	 *
629a9771948SGleb Smirnoff 	 * XXX - This is the case for carp vhost IPs as well so we
630a9771948SGleb Smirnoff 	 * insert a workaround. If the packet got here, we already
631a9771948SGleb Smirnoff 	 * checked with carp_iamatch() and carp_forus().
632823db0e9SDon Lewis 	 */
633603724d3SBjoern A. Zeeb 	checkif = V_ip_checkinterface && (V_ipforwarding == 0) &&
6340aade26eSRobert Watson 	    ifp != NULL && ((ifp->if_flags & IFF_LOOPBACK) == 0) &&
63554bfbd51SWill Andrews 	    ifp->if_carp == NULL && (dchg == 0);
636823db0e9SDon Lewis 
637ca925d9cSJonathan Lemon 	/*
638ca925d9cSJonathan Lemon 	 * Check for exact addresses in the hash bucket.
639ca925d9cSJonathan Lemon 	 */
6402d9cfabaSRobert Watson 	/* IN_IFADDR_RLOCK(); */
6419b932e9eSAndre Oppermann 	LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) {
642f9e354dfSJulian Elischer 		/*
643823db0e9SDon Lewis 		 * If the address matches, verify that the packet
644823db0e9SDon Lewis 		 * arrived via the correct interface if checking is
645823db0e9SDon Lewis 		 * enabled.
646f9e354dfSJulian Elischer 		 */
6479b932e9eSAndre Oppermann 		if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr &&
6488c0fec80SRobert Watson 		    (!checkif || ia->ia_ifp == ifp)) {
6497caf4ab7SGleb Smirnoff 			counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
6507caf4ab7SGleb Smirnoff 			counter_u64_add(ia->ia_ifa.ifa_ibytes,
6517caf4ab7SGleb Smirnoff 			    m->m_pkthdr.len);
6522d9cfabaSRobert Watson 			/* IN_IFADDR_RUNLOCK(); */
653ed1ff184SJulian Elischer 			goto ours;
654ca925d9cSJonathan Lemon 		}
6558c0fec80SRobert Watson 	}
6562d9cfabaSRobert Watson 	/* IN_IFADDR_RUNLOCK(); */
6572d9cfabaSRobert Watson 
658823db0e9SDon Lewis 	/*
659ca925d9cSJonathan Lemon 	 * Check for broadcast addresses.
660ca925d9cSJonathan Lemon 	 *
661ca925d9cSJonathan Lemon 	 * Only accept broadcast packets that arrive via the matching
662ca925d9cSJonathan Lemon 	 * interface.  Reception of forwarded directed broadcasts would
663ca925d9cSJonathan Lemon 	 * be handled via ip_forward() and ether_output() with the loopback
664ca925d9cSJonathan Lemon 	 * into the stack for SIMPLEX interfaces handled by ether_output().
665823db0e9SDon Lewis 	 */
6660aade26eSRobert Watson 	if (ifp != NULL && ifp->if_flags & IFF_BROADCAST) {
667137f91e8SJohn Baldwin 		IF_ADDR_RLOCK(ifp);
6680aade26eSRobert Watson 	        TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
669ca925d9cSJonathan Lemon 			if (ifa->ifa_addr->sa_family != AF_INET)
670ca925d9cSJonathan Lemon 				continue;
671ca925d9cSJonathan Lemon 			ia = ifatoia(ifa);
672df8bae1dSRodney W. Grimes 			if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
6730aade26eSRobert Watson 			    ip->ip_dst.s_addr) {
6747caf4ab7SGleb Smirnoff 				counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
6757caf4ab7SGleb Smirnoff 				counter_u64_add(ia->ia_ifa.ifa_ibytes,
6767caf4ab7SGleb Smirnoff 				    m->m_pkthdr.len);
677137f91e8SJohn Baldwin 				IF_ADDR_RUNLOCK(ifp);
678df8bae1dSRodney W. Grimes 				goto ours;
6790aade26eSRobert Watson 			}
6800ac40133SBrian Somers #ifdef BOOTP_COMPAT
6810aade26eSRobert Watson 			if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY) {
6827caf4ab7SGleb Smirnoff 				counter_u64_add(ia->ia_ifa.ifa_ipackets, 1);
6837caf4ab7SGleb Smirnoff 				counter_u64_add(ia->ia_ifa.ifa_ibytes,
6847caf4ab7SGleb Smirnoff 				    m->m_pkthdr.len);
685137f91e8SJohn Baldwin 				IF_ADDR_RUNLOCK(ifp);
686ca925d9cSJonathan Lemon 				goto ours;
6870aade26eSRobert Watson 			}
6880ac40133SBrian Somers #endif
689df8bae1dSRodney W. Grimes 		}
690137f91e8SJohn Baldwin 		IF_ADDR_RUNLOCK(ifp);
69119e5b0a7SRobert Watson 		ia = NULL;
692df8bae1dSRodney W. Grimes 	}
693f8429ca2SBruce M Simpson 	/* RFC 3927 2.7: Do not forward datagrams for 169.254.0.0/16. */
694f8429ca2SBruce M Simpson 	if (IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))) {
69586425c62SRobert Watson 		IPSTAT_INC(ips_cantforward);
696f8429ca2SBruce M Simpson 		m_freem(m);
697f8429ca2SBruce M Simpson 		return;
698f8429ca2SBruce M Simpson 	}
699df8bae1dSRodney W. Grimes 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
700603724d3SBjoern A. Zeeb 		if (V_ip_mrouter) {
701df8bae1dSRodney W. Grimes 			/*
702df8bae1dSRodney W. Grimes 			 * If we are acting as a multicast router, all
703df8bae1dSRodney W. Grimes 			 * incoming multicast packets are passed to the
704df8bae1dSRodney W. Grimes 			 * kernel-level multicast forwarding function.
705df8bae1dSRodney W. Grimes 			 * The packet is returned (relatively) intact; if
706df8bae1dSRodney W. Grimes 			 * ip_mforward() returns a non-zero value, the packet
707df8bae1dSRodney W. Grimes 			 * must be discarded, else it may be accepted below.
708df8bae1dSRodney W. Grimes 			 */
7090aade26eSRobert Watson 			if (ip_mforward && ip_mforward(ip, ifp, m, 0) != 0) {
71086425c62SRobert Watson 				IPSTAT_INC(ips_cantforward);
711df8bae1dSRodney W. Grimes 				m_freem(m);
712c67b1d17SGarrett Wollman 				return;
713df8bae1dSRodney W. Grimes 			}
714df8bae1dSRodney W. Grimes 
715df8bae1dSRodney W. Grimes 			/*
71611612afaSDima Dorfman 			 * The process-level routing daemon needs to receive
717df8bae1dSRodney W. Grimes 			 * all multicast IGMP packets, whether or not this
718df8bae1dSRodney W. Grimes 			 * host belongs to their destination groups.
719df8bae1dSRodney W. Grimes 			 */
720df8bae1dSRodney W. Grimes 			if (ip->ip_p == IPPROTO_IGMP)
721df8bae1dSRodney W. Grimes 				goto ours;
72286425c62SRobert Watson 			IPSTAT_INC(ips_forward);
723df8bae1dSRodney W. Grimes 		}
724df8bae1dSRodney W. Grimes 		/*
725d10910e6SBruce M Simpson 		 * Assume the packet is for us, to avoid prematurely taking
726d10910e6SBruce M Simpson 		 * a lock on the in_multi hash. Protocols must perform
727d10910e6SBruce M Simpson 		 * their own filtering and update statistics accordingly.
728df8bae1dSRodney W. Grimes 		 */
729df8bae1dSRodney W. Grimes 		goto ours;
730df8bae1dSRodney W. Grimes 	}
731df8bae1dSRodney W. Grimes 	if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST)
732df8bae1dSRodney W. Grimes 		goto ours;
733df8bae1dSRodney W. Grimes 	if (ip->ip_dst.s_addr == INADDR_ANY)
734df8bae1dSRodney W. Grimes 		goto ours;
735df8bae1dSRodney W. Grimes 
7366a800098SYoshinobu Inoue 	/*
737df8bae1dSRodney W. Grimes 	 * Not for us; forward if possible and desirable.
738df8bae1dSRodney W. Grimes 	 */
739603724d3SBjoern A. Zeeb 	if (V_ipforwarding == 0) {
74086425c62SRobert Watson 		IPSTAT_INC(ips_cantforward);
741df8bae1dSRodney W. Grimes 		m_freem(m);
742546f251bSChris D. Faulhaber 	} else {
7439b932e9eSAndre Oppermann 		ip_forward(m, dchg);
744546f251bSChris D. Faulhaber 	}
745c67b1d17SGarrett Wollman 	return;
746df8bae1dSRodney W. Grimes 
747df8bae1dSRodney W. Grimes ours:
748d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH
749d0ebc0d2SYaroslav Tykhiy 	/*
750d0ebc0d2SYaroslav Tykhiy 	 * IPSTEALTH: Process non-routing options only
751d0ebc0d2SYaroslav Tykhiy 	 * if the packet is destined for us.
752d0ebc0d2SYaroslav Tykhiy 	 */
7537caf4ab7SGleb Smirnoff 	if (V_ipstealth && hlen > sizeof (struct ip) && ip_dooptions(m, 1))
754d0ebc0d2SYaroslav Tykhiy 		return;
755d0ebc0d2SYaroslav Tykhiy #endif /* IPSTEALTH */
756d0ebc0d2SYaroslav Tykhiy 
75763f8d699SJordan K. Hubbard 	/*
758b6ea1aa5SRuslan Ermilov 	 * Attempt reassembly; if it succeeds, proceed.
759ac9d7e26SMax Laier 	 * ip_reass() will return a different mbuf.
760df8bae1dSRodney W. Grimes 	 */
7618f134647SGleb Smirnoff 	if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) {
762aa69c612SGleb Smirnoff 		/* XXXGL: shouldn't we save & set m_flags? */
763f0cada84SAndre Oppermann 		m = ip_reass(m);
764f0cada84SAndre Oppermann 		if (m == NULL)
765c67b1d17SGarrett Wollman 			return;
7666a800098SYoshinobu Inoue 		ip = mtod(m, struct ip *);
7677e2df452SRuslan Ermilov 		/* Get the header length of the reassembled packet */
76853be11f6SPoul-Henning Kamp 		hlen = ip->ip_hl << 2;
769f0cada84SAndre Oppermann 	}
770f0cada84SAndre Oppermann 
771b2630c29SGeorge V. Neville-Neil #ifdef IPSEC
77233841545SHajimu UMEMOTO 	/*
77333841545SHajimu UMEMOTO 	 * enforce IPsec policy checking if we are seeing last header.
77433841545SHajimu UMEMOTO 	 * note that we do not visit this with protocols with pcb layer
77533841545SHajimu UMEMOTO 	 * code - like udp/tcp/raw ip.
77633841545SHajimu UMEMOTO 	 */
777e58320f1SAndrey V. Elsukov 	if (ip_ipsec_input(m, ip->ip_p) != 0)
77833841545SHajimu UMEMOTO 		goto bad;
779b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */
78033841545SHajimu UMEMOTO 
781df8bae1dSRodney W. Grimes 	/*
782df8bae1dSRodney W. Grimes 	 * Switch out to protocol's input routine.
783df8bae1dSRodney W. Grimes 	 */
78486425c62SRobert Watson 	IPSTAT_INC(ips_delivered);
7859b932e9eSAndre Oppermann 
7868f5a8818SKevin Lo 	(*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p);
787c67b1d17SGarrett Wollman 	return;
788df8bae1dSRodney W. Grimes bad:
789df8bae1dSRodney W. Grimes 	m_freem(m);
790c67b1d17SGarrett Wollman }
791c67b1d17SGarrett Wollman 
792c67b1d17SGarrett Wollman /*
793df8bae1dSRodney W. Grimes  * IP timer processing;
794df8bae1dSRodney W. Grimes  * if a timer expires on a reassembly
795df8bae1dSRodney W. Grimes  * queue, discard it.
796df8bae1dSRodney W. Grimes  */
797df8bae1dSRodney W. Grimes void
798f2565d68SRobert Watson ip_slowtimo(void)
799df8bae1dSRodney W. Grimes {
8008b615593SMarko Zec 	VNET_ITERATOR_DECL(vnet_iter);
801df8bae1dSRodney W. Grimes 
8025ee847d3SRobert Watson 	VNET_LIST_RLOCK_NOSLEEP();
8038b615593SMarko Zec 	VNET_FOREACH(vnet_iter) {
8048b615593SMarko Zec 		CURVNET_SET(vnet_iter);
8051dbefcc0SGleb Smirnoff 		ipreass_slowtimo();
8068b615593SMarko Zec 		CURVNET_RESTORE();
8078b615593SMarko Zec 	}
8085ee847d3SRobert Watson 	VNET_LIST_RUNLOCK_NOSLEEP();
809df8bae1dSRodney W. Grimes }
810df8bae1dSRodney W. Grimes 
8119802380eSBjoern A. Zeeb void
8129802380eSBjoern A. Zeeb ip_drain(void)
8139802380eSBjoern A. Zeeb {
8149802380eSBjoern A. Zeeb 	VNET_ITERATOR_DECL(vnet_iter);
8159802380eSBjoern A. Zeeb 
8169802380eSBjoern A. Zeeb 	VNET_LIST_RLOCK_NOSLEEP();
8179802380eSBjoern A. Zeeb 	VNET_FOREACH(vnet_iter) {
8189802380eSBjoern A. Zeeb 		CURVNET_SET(vnet_iter);
8191dbefcc0SGleb Smirnoff 		ipreass_drain();
8208b615593SMarko Zec 		CURVNET_RESTORE();
8218b615593SMarko Zec 	}
8225ee847d3SRobert Watson 	VNET_LIST_RUNLOCK_NOSLEEP();
823df8bae1dSRodney W. Grimes }
824df8bae1dSRodney W. Grimes 
825df8bae1dSRodney W. Grimes /*
826de38924dSAndre Oppermann  * The protocol to be inserted into ip_protox[] must be already registered
827de38924dSAndre Oppermann  * in inetsw[], either statically or through pf_proto_register().
828de38924dSAndre Oppermann  */
829de38924dSAndre Oppermann int
8301b48d245SBjoern A. Zeeb ipproto_register(short ipproto)
831de38924dSAndre Oppermann {
832de38924dSAndre Oppermann 	struct protosw *pr;
833de38924dSAndre Oppermann 
834de38924dSAndre Oppermann 	/* Sanity checks. */
8351b48d245SBjoern A. Zeeb 	if (ipproto <= 0 || ipproto >= IPPROTO_MAX)
836de38924dSAndre Oppermann 		return (EPROTONOSUPPORT);
837de38924dSAndre Oppermann 
838de38924dSAndre Oppermann 	/*
839de38924dSAndre Oppermann 	 * The protocol slot must not be occupied by another protocol
840de38924dSAndre Oppermann 	 * already.  An index pointing to IPPROTO_RAW is unused.
841de38924dSAndre Oppermann 	 */
842de38924dSAndre Oppermann 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
843de38924dSAndre Oppermann 	if (pr == NULL)
844de38924dSAndre Oppermann 		return (EPFNOSUPPORT);
845de38924dSAndre Oppermann 	if (ip_protox[ipproto] != pr - inetsw)	/* IPPROTO_RAW */
846de38924dSAndre Oppermann 		return (EEXIST);
847de38924dSAndre Oppermann 
848de38924dSAndre Oppermann 	/* Find the protocol position in inetsw[] and set the index. */
849de38924dSAndre Oppermann 	for (pr = inetdomain.dom_protosw;
850de38924dSAndre Oppermann 	     pr < inetdomain.dom_protoswNPROTOSW; pr++) {
851de38924dSAndre Oppermann 		if (pr->pr_domain->dom_family == PF_INET &&
852de38924dSAndre Oppermann 		    pr->pr_protocol && pr->pr_protocol == ipproto) {
853de38924dSAndre Oppermann 			ip_protox[pr->pr_protocol] = pr - inetsw;
854de38924dSAndre Oppermann 			return (0);
855de38924dSAndre Oppermann 		}
856de38924dSAndre Oppermann 	}
857de38924dSAndre Oppermann 	return (EPROTONOSUPPORT);
858de38924dSAndre Oppermann }
859de38924dSAndre Oppermann 
860de38924dSAndre Oppermann int
8611b48d245SBjoern A. Zeeb ipproto_unregister(short ipproto)
862de38924dSAndre Oppermann {
863de38924dSAndre Oppermann 	struct protosw *pr;
864de38924dSAndre Oppermann 
865de38924dSAndre Oppermann 	/* Sanity checks. */
8661b48d245SBjoern A. Zeeb 	if (ipproto <= 0 || ipproto >= IPPROTO_MAX)
867de38924dSAndre Oppermann 		return (EPROTONOSUPPORT);
868de38924dSAndre Oppermann 
869de38924dSAndre Oppermann 	/* Check if the protocol was indeed registered. */
870de38924dSAndre Oppermann 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
871de38924dSAndre Oppermann 	if (pr == NULL)
872de38924dSAndre Oppermann 		return (EPFNOSUPPORT);
873de38924dSAndre Oppermann 	if (ip_protox[ipproto] == pr - inetsw)  /* IPPROTO_RAW */
874de38924dSAndre Oppermann 		return (ENOENT);
875de38924dSAndre Oppermann 
876de38924dSAndre Oppermann 	/* Reset the protocol slot to IPPROTO_RAW. */
877de38924dSAndre Oppermann 	ip_protox[ipproto] = pr - inetsw;
878de38924dSAndre Oppermann 	return (0);
879de38924dSAndre Oppermann }
880de38924dSAndre Oppermann 
881df8bae1dSRodney W. Grimes /*
8828c0fec80SRobert Watson  * Given address of next destination (final or next hop), return (referenced)
8838c0fec80SRobert Watson  * internet address info of interface to be used to get there.
884df8bae1dSRodney W. Grimes  */
885bd714208SRuslan Ermilov struct in_ifaddr *
8868b07e49aSJulian Elischer ip_rtaddr(struct in_addr dst, u_int fibnum)
887df8bae1dSRodney W. Grimes {
88897d8d152SAndre Oppermann 	struct route sro;
88902c1c707SAndre Oppermann 	struct sockaddr_in *sin;
89019e5b0a7SRobert Watson 	struct in_ifaddr *ia;
891df8bae1dSRodney W. Grimes 
8920cfbbe3bSAndre Oppermann 	bzero(&sro, sizeof(sro));
89397d8d152SAndre Oppermann 	sin = (struct sockaddr_in *)&sro.ro_dst;
894df8bae1dSRodney W. Grimes 	sin->sin_family = AF_INET;
895df8bae1dSRodney W. Grimes 	sin->sin_len = sizeof(*sin);
896df8bae1dSRodney W. Grimes 	sin->sin_addr = dst;
8976e6b3f7cSQing Li 	in_rtalloc_ign(&sro, 0, fibnum);
898df8bae1dSRodney W. Grimes 
89997d8d152SAndre Oppermann 	if (sro.ro_rt == NULL)
90002410549SRobert Watson 		return (NULL);
90102c1c707SAndre Oppermann 
90219e5b0a7SRobert Watson 	ia = ifatoia(sro.ro_rt->rt_ifa);
90319e5b0a7SRobert Watson 	ifa_ref(&ia->ia_ifa);
90497d8d152SAndre Oppermann 	RTFREE(sro.ro_rt);
90519e5b0a7SRobert Watson 	return (ia);
906df8bae1dSRodney W. Grimes }
907df8bae1dSRodney W. Grimes 
908df8bae1dSRodney W. Grimes u_char inetctlerrmap[PRC_NCMDS] = {
909df8bae1dSRodney W. Grimes 	0,		0,		0,		0,
910df8bae1dSRodney W. Grimes 	0,		EMSGSIZE,	EHOSTDOWN,	EHOSTUNREACH,
911df8bae1dSRodney W. Grimes 	EHOSTUNREACH,	EHOSTUNREACH,	ECONNREFUSED,	ECONNREFUSED,
912df8bae1dSRodney W. Grimes 	EMSGSIZE,	EHOSTUNREACH,	0,		0,
913fcaf9f91SMike Silbersack 	0,		0,		EHOSTUNREACH,	0,
9143b8123b7SJesper Skriver 	ENOPROTOOPT,	ECONNREFUSED
915df8bae1dSRodney W. Grimes };
916df8bae1dSRodney W. Grimes 
917df8bae1dSRodney W. Grimes /*
918df8bae1dSRodney W. Grimes  * Forward a packet.  If some error occurs return the sender
919df8bae1dSRodney W. Grimes  * an icmp packet.  Note we can't always generate a meaningful
920df8bae1dSRodney W. Grimes  * icmp message because icmp doesn't have a large enough repertoire
921df8bae1dSRodney W. Grimes  * of codes and types.
922df8bae1dSRodney W. Grimes  *
923df8bae1dSRodney W. Grimes  * If not forwarding, just drop the packet.  This could be confusing
924df8bae1dSRodney W. Grimes  * if ipforwarding was zero but some routing protocol was advancing
925df8bae1dSRodney W. Grimes  * us as a gateway to somewhere.  However, we must let the routing
926df8bae1dSRodney W. Grimes  * protocol deal with that.
927df8bae1dSRodney W. Grimes  *
928df8bae1dSRodney W. Grimes  * The srcrt parameter indicates whether the packet is being forwarded
929df8bae1dSRodney W. Grimes  * via a source route.
930df8bae1dSRodney W. Grimes  */
9319b932e9eSAndre Oppermann void
9329b932e9eSAndre Oppermann ip_forward(struct mbuf *m, int srcrt)
933df8bae1dSRodney W. Grimes {
9342b25acc1SLuigi Rizzo 	struct ip *ip = mtod(m, struct ip *);
935efbad259SEdward Tomasz Napierala 	struct in_ifaddr *ia;
936df8bae1dSRodney W. Grimes 	struct mbuf *mcopy;
937d14122b0SErmal Luçi 	struct sockaddr_in *sin;
9389b932e9eSAndre Oppermann 	struct in_addr dest;
939b835b6feSBjoern A. Zeeb 	struct route ro;
940c773494eSAndre Oppermann 	int error, type = 0, code = 0, mtu = 0;
9413efc3014SJulian Elischer 
9429b932e9eSAndre Oppermann 	if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) {
94386425c62SRobert Watson 		IPSTAT_INC(ips_cantforward);
944df8bae1dSRodney W. Grimes 		m_freem(m);
945df8bae1dSRodney W. Grimes 		return;
946df8bae1dSRodney W. Grimes 	}
9478922ddbeSAndrey V. Elsukov #ifdef IPSEC
9488922ddbeSAndrey V. Elsukov 	if (ip_ipsec_fwd(m) != 0) {
9498922ddbeSAndrey V. Elsukov 		IPSTAT_INC(ips_cantforward);
9508922ddbeSAndrey V. Elsukov 		m_freem(m);
9518922ddbeSAndrey V. Elsukov 		return;
9528922ddbeSAndrey V. Elsukov 	}
9538922ddbeSAndrey V. Elsukov #endif /* IPSEC */
9541b968362SDag-Erling Smørgrav #ifdef IPSTEALTH
955603724d3SBjoern A. Zeeb 	if (!V_ipstealth) {
9561b968362SDag-Erling Smørgrav #endif
957df8bae1dSRodney W. Grimes 		if (ip->ip_ttl <= IPTTLDEC) {
9581b968362SDag-Erling Smørgrav 			icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS,
95902c1c707SAndre Oppermann 			    0, 0);
960df8bae1dSRodney W. Grimes 			return;
961df8bae1dSRodney W. Grimes 		}
9621b968362SDag-Erling Smørgrav #ifdef IPSTEALTH
9631b968362SDag-Erling Smørgrav 	}
9641b968362SDag-Erling Smørgrav #endif
965df8bae1dSRodney W. Grimes 
966d14122b0SErmal Luçi 	bzero(&ro, sizeof(ro));
967d14122b0SErmal Luçi 	sin = (struct sockaddr_in *)&ro.ro_dst;
968d14122b0SErmal Luçi 	sin->sin_family = AF_INET;
969d14122b0SErmal Luçi 	sin->sin_len = sizeof(*sin);
970d14122b0SErmal Luçi 	sin->sin_addr = ip->ip_dst;
971d14122b0SErmal Luçi #ifdef RADIX_MPATH
972d14122b0SErmal Luçi 	rtalloc_mpath_fib(&ro,
973d14122b0SErmal Luçi 	    ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr),
974d14122b0SErmal Luçi 	    M_GETFIB(m));
975d14122b0SErmal Luçi #else
976d14122b0SErmal Luçi 	in_rtalloc_ign(&ro, 0, M_GETFIB(m));
977d14122b0SErmal Luçi #endif
978d14122b0SErmal Luçi 	if (ro.ro_rt != NULL) {
979d14122b0SErmal Luçi 		ia = ifatoia(ro.ro_rt->rt_ifa);
980d14122b0SErmal Luçi 		ifa_ref(&ia->ia_ifa);
98156844a62SErmal Luçi 	} else
98256844a62SErmal Luçi 		ia = NULL;
983efbad259SEdward Tomasz Napierala #ifndef IPSEC
984efbad259SEdward Tomasz Napierala 	/*
985efbad259SEdward Tomasz Napierala 	 * 'ia' may be NULL if there is no route for this destination.
986efbad259SEdward Tomasz Napierala 	 * In case of IPsec, Don't discard it just yet, but pass it to
987efbad259SEdward Tomasz Napierala 	 * ip_output in case of outgoing IPsec policy.
988efbad259SEdward Tomasz Napierala 	 */
989d23d475fSGuido van Rooij 	if (!srcrt && ia == NULL) {
99002c1c707SAndre Oppermann 		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
991d14122b0SErmal Luçi 		RO_RTFREE(&ro);
992df8bae1dSRodney W. Grimes 		return;
99302c1c707SAndre Oppermann 	}
994efbad259SEdward Tomasz Napierala #endif
995df8bae1dSRodney W. Grimes 
996df8bae1dSRodney W. Grimes 	/*
997bfef7ed4SIan Dowse 	 * Save the IP header and at most 8 bytes of the payload,
998bfef7ed4SIan Dowse 	 * in case we need to generate an ICMP message to the src.
999bfef7ed4SIan Dowse 	 *
10004d2e3692SLuigi Rizzo 	 * XXX this can be optimized a lot by saving the data in a local
10014d2e3692SLuigi Rizzo 	 * buffer on the stack (72 bytes at most), and only allocating the
10024d2e3692SLuigi Rizzo 	 * mbuf if really necessary. The vast majority of the packets
10034d2e3692SLuigi Rizzo 	 * are forwarded without having to send an ICMP back (either
10044d2e3692SLuigi Rizzo 	 * because unnecessary, or because rate limited), so we are
10054d2e3692SLuigi Rizzo 	 * really we are wasting a lot of work here.
10064d2e3692SLuigi Rizzo 	 *
1007bfef7ed4SIan Dowse 	 * We don't use m_copy() because it might return a reference
1008bfef7ed4SIan Dowse 	 * to a shared cluster. Both this function and ip_output()
1009bfef7ed4SIan Dowse 	 * assume exclusive access to the IP header in `m', so any
1010bfef7ed4SIan Dowse 	 * data in a cluster may change before we reach icmp_error().
1011df8bae1dSRodney W. Grimes 	 */
1012dc4ad05eSGleb Smirnoff 	mcopy = m_gethdr(M_NOWAIT, m->m_type);
1013eb1b1807SGleb Smirnoff 	if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_NOWAIT)) {
10149967cafcSSam Leffler 		/*
10159967cafcSSam Leffler 		 * It's probably ok if the pkthdr dup fails (because
10169967cafcSSam Leffler 		 * the deep copy of the tag chain failed), but for now
10179967cafcSSam Leffler 		 * be conservative and just discard the copy since
10189967cafcSSam Leffler 		 * code below may some day want the tags.
10199967cafcSSam Leffler 		 */
10209967cafcSSam Leffler 		m_free(mcopy);
10219967cafcSSam Leffler 		mcopy = NULL;
10229967cafcSSam Leffler 	}
1023bfef7ed4SIan Dowse 	if (mcopy != NULL) {
10248f134647SGleb Smirnoff 		mcopy->m_len = min(ntohs(ip->ip_len), M_TRAILINGSPACE(mcopy));
1025e6b0a570SBruce M Simpson 		mcopy->m_pkthdr.len = mcopy->m_len;
1026bfef7ed4SIan Dowse 		m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t));
1027bfef7ed4SIan Dowse 	}
102804287599SRuslan Ermilov 
102904287599SRuslan Ermilov #ifdef IPSTEALTH
1030603724d3SBjoern A. Zeeb 	if (!V_ipstealth) {
103104287599SRuslan Ermilov #endif
103204287599SRuslan Ermilov 		ip->ip_ttl -= IPTTLDEC;
103304287599SRuslan Ermilov #ifdef IPSTEALTH
103404287599SRuslan Ermilov 	}
103504287599SRuslan Ermilov #endif
1036df8bae1dSRodney W. Grimes 
1037df8bae1dSRodney W. Grimes 	/*
1038df8bae1dSRodney W. Grimes 	 * If forwarding packet using same interface that it came in on,
1039df8bae1dSRodney W. Grimes 	 * perhaps should send a redirect to sender to shortcut a hop.
1040df8bae1dSRodney W. Grimes 	 * Only send redirect if source is sending directly to us,
1041df8bae1dSRodney W. Grimes 	 * and if packet was not source routed (or has any options).
1042df8bae1dSRodney W. Grimes 	 * Also, don't send redirect if forwarding using a default route
1043df8bae1dSRodney W. Grimes 	 * or a route modified by a redirect.
1044df8bae1dSRodney W. Grimes 	 */
10459b932e9eSAndre Oppermann 	dest.s_addr = 0;
1046efbad259SEdward Tomasz Napierala 	if (!srcrt && V_ipsendredirects &&
1047efbad259SEdward Tomasz Napierala 	    ia != NULL && ia->ia_ifp == m->m_pkthdr.rcvif) {
104802c1c707SAndre Oppermann 		struct rtentry *rt;
104902c1c707SAndre Oppermann 
105002c1c707SAndre Oppermann 		rt = ro.ro_rt;
105102c1c707SAndre Oppermann 
105202c1c707SAndre Oppermann 		if (rt && (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
10539b932e9eSAndre Oppermann 		    satosin(rt_key(rt))->sin_addr.s_addr != 0) {
1054df8bae1dSRodney W. Grimes #define	RTA(rt)	((struct in_ifaddr *)(rt->rt_ifa))
1055df8bae1dSRodney W. Grimes 			u_long src = ntohl(ip->ip_src.s_addr);
1056df8bae1dSRodney W. Grimes 
1057df8bae1dSRodney W. Grimes 			if (RTA(rt) &&
1058df8bae1dSRodney W. Grimes 			    (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) {
1059df8bae1dSRodney W. Grimes 				if (rt->rt_flags & RTF_GATEWAY)
10609b932e9eSAndre Oppermann 					dest.s_addr = satosin(rt->rt_gateway)->sin_addr.s_addr;
1061df8bae1dSRodney W. Grimes 				else
10629b932e9eSAndre Oppermann 					dest.s_addr = ip->ip_dst.s_addr;
1063df8bae1dSRodney W. Grimes 				/* Router requirements says to only send host redirects */
1064df8bae1dSRodney W. Grimes 				type = ICMP_REDIRECT;
1065df8bae1dSRodney W. Grimes 				code = ICMP_REDIRECT_HOST;
1066df8bae1dSRodney W. Grimes 			}
1067df8bae1dSRodney W. Grimes 		}
106802c1c707SAndre Oppermann 	}
1069df8bae1dSRodney W. Grimes 
1070b835b6feSBjoern A. Zeeb 	error = ip_output(m, NULL, &ro, IP_FORWARDING, NULL, NULL);
1071b835b6feSBjoern A. Zeeb 
1072b835b6feSBjoern A. Zeeb 	if (error == EMSGSIZE && ro.ro_rt)
1073e3a7aa6fSGleb Smirnoff 		mtu = ro.ro_rt->rt_mtu;
1074bf984051SGleb Smirnoff 	RO_RTFREE(&ro);
1075b835b6feSBjoern A. Zeeb 
1076df8bae1dSRodney W. Grimes 	if (error)
107786425c62SRobert Watson 		IPSTAT_INC(ips_cantforward);
1078df8bae1dSRodney W. Grimes 	else {
107986425c62SRobert Watson 		IPSTAT_INC(ips_forward);
1080df8bae1dSRodney W. Grimes 		if (type)
108186425c62SRobert Watson 			IPSTAT_INC(ips_redirectsent);
1082df8bae1dSRodney W. Grimes 		else {
10839188b4a1SAndre Oppermann 			if (mcopy)
1084df8bae1dSRodney W. Grimes 				m_freem(mcopy);
10858c0fec80SRobert Watson 			if (ia != NULL)
10868c0fec80SRobert Watson 				ifa_free(&ia->ia_ifa);
1087df8bae1dSRodney W. Grimes 			return;
1088df8bae1dSRodney W. Grimes 		}
1089df8bae1dSRodney W. Grimes 	}
10908c0fec80SRobert Watson 	if (mcopy == NULL) {
10918c0fec80SRobert Watson 		if (ia != NULL)
10928c0fec80SRobert Watson 			ifa_free(&ia->ia_ifa);
1093df8bae1dSRodney W. Grimes 		return;
10948c0fec80SRobert Watson 	}
1095df8bae1dSRodney W. Grimes 
1096df8bae1dSRodney W. Grimes 	switch (error) {
1097df8bae1dSRodney W. Grimes 
1098df8bae1dSRodney W. Grimes 	case 0:				/* forwarded, but need redirect */
1099df8bae1dSRodney W. Grimes 		/* type, code set above */
1100df8bae1dSRodney W. Grimes 		break;
1101df8bae1dSRodney W. Grimes 
1102efbad259SEdward Tomasz Napierala 	case ENETUNREACH:
1103df8bae1dSRodney W. Grimes 	case EHOSTUNREACH:
1104df8bae1dSRodney W. Grimes 	case ENETDOWN:
1105df8bae1dSRodney W. Grimes 	case EHOSTDOWN:
1106df8bae1dSRodney W. Grimes 	default:
1107df8bae1dSRodney W. Grimes 		type = ICMP_UNREACH;
1108df8bae1dSRodney W. Grimes 		code = ICMP_UNREACH_HOST;
1109df8bae1dSRodney W. Grimes 		break;
1110df8bae1dSRodney W. Grimes 
1111df8bae1dSRodney W. Grimes 	case EMSGSIZE:
1112df8bae1dSRodney W. Grimes 		type = ICMP_UNREACH;
1113df8bae1dSRodney W. Grimes 		code = ICMP_UNREACH_NEEDFRAG;
11141dfcf0d2SAndre Oppermann 
1115b2630c29SGeorge V. Neville-Neil #ifdef IPSEC
1116b835b6feSBjoern A. Zeeb 		/*
1117b835b6feSBjoern A. Zeeb 		 * If IPsec is configured for this path,
1118b835b6feSBjoern A. Zeeb 		 * override any possibly mtu value set by ip_output.
1119b835b6feSBjoern A. Zeeb 		 */
11201c044382SBjoern A. Zeeb 		mtu = ip_ipsec_mtu(mcopy, mtu);
1121b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */
11229b932e9eSAndre Oppermann 		/*
1123b835b6feSBjoern A. Zeeb 		 * If the MTU was set before make sure we are below the
1124b835b6feSBjoern A. Zeeb 		 * interface MTU.
1125ab48768bSAndre Oppermann 		 * If the MTU wasn't set before use the interface mtu or
1126ab48768bSAndre Oppermann 		 * fall back to the next smaller mtu step compared to the
1127ab48768bSAndre Oppermann 		 * current packet size.
11289b932e9eSAndre Oppermann 		 */
1129b835b6feSBjoern A. Zeeb 		if (mtu != 0) {
1130b835b6feSBjoern A. Zeeb 			if (ia != NULL)
1131b835b6feSBjoern A. Zeeb 				mtu = min(mtu, ia->ia_ifp->if_mtu);
1132b835b6feSBjoern A. Zeeb 		} else {
1133ab48768bSAndre Oppermann 			if (ia != NULL)
1134c773494eSAndre Oppermann 				mtu = ia->ia_ifp->if_mtu;
1135ab48768bSAndre Oppermann 			else
11368f134647SGleb Smirnoff 				mtu = ip_next_mtu(ntohs(ip->ip_len), 0);
1137ab48768bSAndre Oppermann 		}
113886425c62SRobert Watson 		IPSTAT_INC(ips_cantfrag);
1139df8bae1dSRodney W. Grimes 		break;
1140df8bae1dSRodney W. Grimes 
1141df8bae1dSRodney W. Grimes 	case ENOBUFS:
11423a06e3e0SRuslan Ermilov 	case EACCES:			/* ipfw denied packet */
11433a06e3e0SRuslan Ermilov 		m_freem(mcopy);
11448c0fec80SRobert Watson 		if (ia != NULL)
11458c0fec80SRobert Watson 			ifa_free(&ia->ia_ifa);
11463a06e3e0SRuslan Ermilov 		return;
1147df8bae1dSRodney W. Grimes 	}
11488c0fec80SRobert Watson 	if (ia != NULL)
11498c0fec80SRobert Watson 		ifa_free(&ia->ia_ifa);
1150c773494eSAndre Oppermann 	icmp_error(mcopy, type, code, dest.s_addr, mtu);
1151df8bae1dSRodney W. Grimes }
1152df8bae1dSRodney W. Grimes 
115382c23ebaSBill Fenner void
1154f2565d68SRobert Watson ip_savecontrol(struct inpcb *inp, struct mbuf **mp, struct ip *ip,
1155f2565d68SRobert Watson     struct mbuf *m)
115682c23ebaSBill Fenner {
11578b615593SMarko Zec 
1158be8a62e8SPoul-Henning Kamp 	if (inp->inp_socket->so_options & (SO_BINTIME | SO_TIMESTAMP)) {
1159be8a62e8SPoul-Henning Kamp 		struct bintime bt;
1160be8a62e8SPoul-Henning Kamp 
1161be8a62e8SPoul-Henning Kamp 		bintime(&bt);
1162be8a62e8SPoul-Henning Kamp 		if (inp->inp_socket->so_options & SO_BINTIME) {
1163be8a62e8SPoul-Henning Kamp 			*mp = sbcreatecontrol((caddr_t)&bt, sizeof(bt),
1164be8a62e8SPoul-Henning Kamp 			    SCM_BINTIME, SOL_SOCKET);
1165be8a62e8SPoul-Henning Kamp 			if (*mp)
1166be8a62e8SPoul-Henning Kamp 				mp = &(*mp)->m_next;
1167be8a62e8SPoul-Henning Kamp 		}
116882c23ebaSBill Fenner 		if (inp->inp_socket->so_options & SO_TIMESTAMP) {
116982c23ebaSBill Fenner 			struct timeval tv;
117082c23ebaSBill Fenner 
1171be8a62e8SPoul-Henning Kamp 			bintime2timeval(&bt, &tv);
117282c23ebaSBill Fenner 			*mp = sbcreatecontrol((caddr_t)&tv, sizeof(tv),
117382c23ebaSBill Fenner 			    SCM_TIMESTAMP, SOL_SOCKET);
117482c23ebaSBill Fenner 			if (*mp)
117582c23ebaSBill Fenner 				mp = &(*mp)->m_next;
11764cc20ab1SSeigo Tanimura 		}
1177be8a62e8SPoul-Henning Kamp 	}
117882c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVDSTADDR) {
117982c23ebaSBill Fenner 		*mp = sbcreatecontrol((caddr_t)&ip->ip_dst,
118082c23ebaSBill Fenner 		    sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
118182c23ebaSBill Fenner 		if (*mp)
118282c23ebaSBill Fenner 			mp = &(*mp)->m_next;
118382c23ebaSBill Fenner 	}
11844957466bSMatthew N. Dodd 	if (inp->inp_flags & INP_RECVTTL) {
11854957466bSMatthew N. Dodd 		*mp = sbcreatecontrol((caddr_t)&ip->ip_ttl,
11864957466bSMatthew N. Dodd 		    sizeof(u_char), IP_RECVTTL, IPPROTO_IP);
11874957466bSMatthew N. Dodd 		if (*mp)
11884957466bSMatthew N. Dodd 			mp = &(*mp)->m_next;
11894957466bSMatthew N. Dodd 	}
119082c23ebaSBill Fenner #ifdef notyet
119182c23ebaSBill Fenner 	/* XXX
119282c23ebaSBill Fenner 	 * Moving these out of udp_input() made them even more broken
119382c23ebaSBill Fenner 	 * than they already were.
119482c23ebaSBill Fenner 	 */
119582c23ebaSBill Fenner 	/* options were tossed already */
119682c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVOPTS) {
119782c23ebaSBill Fenner 		*mp = sbcreatecontrol((caddr_t)opts_deleted_above,
119882c23ebaSBill Fenner 		    sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP);
119982c23ebaSBill Fenner 		if (*mp)
120082c23ebaSBill Fenner 			mp = &(*mp)->m_next;
120182c23ebaSBill Fenner 	}
120282c23ebaSBill Fenner 	/* ip_srcroute doesn't do what we want here, need to fix */
120382c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVRETOPTS) {
1204e0982661SAndre Oppermann 		*mp = sbcreatecontrol((caddr_t)ip_srcroute(m),
120582c23ebaSBill Fenner 		    sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP);
120682c23ebaSBill Fenner 		if (*mp)
120782c23ebaSBill Fenner 			mp = &(*mp)->m_next;
120882c23ebaSBill Fenner 	}
120982c23ebaSBill Fenner #endif
121082c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVIF) {
1211d314ad7bSJulian Elischer 		struct ifnet *ifp;
1212d314ad7bSJulian Elischer 		struct sdlbuf {
121382c23ebaSBill Fenner 			struct sockaddr_dl sdl;
1214d314ad7bSJulian Elischer 			u_char	pad[32];
1215d314ad7bSJulian Elischer 		} sdlbuf;
1216d314ad7bSJulian Elischer 		struct sockaddr_dl *sdp;
1217d314ad7bSJulian Elischer 		struct sockaddr_dl *sdl2 = &sdlbuf.sdl;
121882c23ebaSBill Fenner 
121946f2df9cSSergey Kandaurov 		if ((ifp = m->m_pkthdr.rcvif) &&
122046f2df9cSSergey Kandaurov 		    ifp->if_index && ifp->if_index <= V_if_index) {
12214a0d6638SRuslan Ermilov 			sdp = (struct sockaddr_dl *)ifp->if_addr->ifa_addr;
1222d314ad7bSJulian Elischer 			/*
1223d314ad7bSJulian Elischer 			 * Change our mind and don't try copy.
1224d314ad7bSJulian Elischer 			 */
122546f2df9cSSergey Kandaurov 			if (sdp->sdl_family != AF_LINK ||
122646f2df9cSSergey Kandaurov 			    sdp->sdl_len > sizeof(sdlbuf)) {
1227d314ad7bSJulian Elischer 				goto makedummy;
1228d314ad7bSJulian Elischer 			}
1229d314ad7bSJulian Elischer 			bcopy(sdp, sdl2, sdp->sdl_len);
1230d314ad7bSJulian Elischer 		} else {
1231d314ad7bSJulian Elischer makedummy:
123246f2df9cSSergey Kandaurov 			sdl2->sdl_len =
123346f2df9cSSergey Kandaurov 			    offsetof(struct sockaddr_dl, sdl_data[0]);
1234d314ad7bSJulian Elischer 			sdl2->sdl_family = AF_LINK;
1235d314ad7bSJulian Elischer 			sdl2->sdl_index = 0;
1236d314ad7bSJulian Elischer 			sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0;
1237d314ad7bSJulian Elischer 		}
1238d314ad7bSJulian Elischer 		*mp = sbcreatecontrol((caddr_t)sdl2, sdl2->sdl_len,
123982c23ebaSBill Fenner 		    IP_RECVIF, IPPROTO_IP);
124082c23ebaSBill Fenner 		if (*mp)
124182c23ebaSBill Fenner 			mp = &(*mp)->m_next;
124282c23ebaSBill Fenner 	}
12433cca425bSMichael Tuexen 	if (inp->inp_flags & INP_RECVTOS) {
12443cca425bSMichael Tuexen 		*mp = sbcreatecontrol((caddr_t)&ip->ip_tos,
12453cca425bSMichael Tuexen 		    sizeof(u_char), IP_RECVTOS, IPPROTO_IP);
12463cca425bSMichael Tuexen 		if (*mp)
12473cca425bSMichael Tuexen 			mp = &(*mp)->m_next;
12483cca425bSMichael Tuexen 	}
12499d3ddf43SAdrian Chadd 
12509d3ddf43SAdrian Chadd 	if (inp->inp_flags2 & INP_RECVFLOWID) {
12519d3ddf43SAdrian Chadd 		uint32_t flowid, flow_type;
12529d3ddf43SAdrian Chadd 
12539d3ddf43SAdrian Chadd 		flowid = m->m_pkthdr.flowid;
12549d3ddf43SAdrian Chadd 		flow_type = M_HASHTYPE_GET(m);
12559d3ddf43SAdrian Chadd 
12569d3ddf43SAdrian Chadd 		/*
12579d3ddf43SAdrian Chadd 		 * XXX should handle the failure of one or the
12589d3ddf43SAdrian Chadd 		 * other - don't populate both?
12599d3ddf43SAdrian Chadd 		 */
12609d3ddf43SAdrian Chadd 		*mp = sbcreatecontrol((caddr_t) &flowid,
12619d3ddf43SAdrian Chadd 		    sizeof(uint32_t), IP_FLOWID, IPPROTO_IP);
12629d3ddf43SAdrian Chadd 		if (*mp)
12639d3ddf43SAdrian Chadd 			mp = &(*mp)->m_next;
12649d3ddf43SAdrian Chadd 		*mp = sbcreatecontrol((caddr_t) &flow_type,
12659d3ddf43SAdrian Chadd 		    sizeof(uint32_t), IP_FLOWTYPE, IPPROTO_IP);
12669d3ddf43SAdrian Chadd 		if (*mp)
12679d3ddf43SAdrian Chadd 			mp = &(*mp)->m_next;
12689d3ddf43SAdrian Chadd 	}
12699d3ddf43SAdrian Chadd 
12709d3ddf43SAdrian Chadd #ifdef	RSS
12719d3ddf43SAdrian Chadd 	if (inp->inp_flags2 & INP_RECVRSSBUCKETID) {
12729d3ddf43SAdrian Chadd 		uint32_t flowid, flow_type;
12739d3ddf43SAdrian Chadd 		uint32_t rss_bucketid;
12749d3ddf43SAdrian Chadd 
12759d3ddf43SAdrian Chadd 		flowid = m->m_pkthdr.flowid;
12769d3ddf43SAdrian Chadd 		flow_type = M_HASHTYPE_GET(m);
12779d3ddf43SAdrian Chadd 
12789d3ddf43SAdrian Chadd 		if (rss_hash2bucket(flowid, flow_type, &rss_bucketid) == 0) {
12799d3ddf43SAdrian Chadd 			*mp = sbcreatecontrol((caddr_t) &rss_bucketid,
12809d3ddf43SAdrian Chadd 			   sizeof(uint32_t), IP_RSSBUCKETID, IPPROTO_IP);
12819d3ddf43SAdrian Chadd 			if (*mp)
12829d3ddf43SAdrian Chadd 				mp = &(*mp)->m_next;
12839d3ddf43SAdrian Chadd 		}
12849d3ddf43SAdrian Chadd 	}
12859d3ddf43SAdrian Chadd #endif
128682c23ebaSBill Fenner }
128782c23ebaSBill Fenner 
12884d2e3692SLuigi Rizzo /*
128930916a2dSRobert Watson  * XXXRW: Multicast routing code in ip_mroute.c is generally MPSAFE, but the
129030916a2dSRobert Watson  * ip_rsvp and ip_rsvp_on variables need to be interlocked with rsvp_on
129130916a2dSRobert Watson  * locking.  This code remains in ip_input.c as ip_mroute.c is optionally
129230916a2dSRobert Watson  * compiled.
12934d2e3692SLuigi Rizzo  */
12943e288e62SDimitry Andric static VNET_DEFINE(int, ip_rsvp_on);
129582cea7e6SBjoern A. Zeeb VNET_DEFINE(struct socket *, ip_rsvpd);
129682cea7e6SBjoern A. Zeeb 
129782cea7e6SBjoern A. Zeeb #define	V_ip_rsvp_on		VNET(ip_rsvp_on)
129882cea7e6SBjoern A. Zeeb 
1299df8bae1dSRodney W. Grimes int
1300f0068c4aSGarrett Wollman ip_rsvp_init(struct socket *so)
1301f0068c4aSGarrett Wollman {
13028b615593SMarko Zec 
1303f0068c4aSGarrett Wollman 	if (so->so_type != SOCK_RAW ||
1304f0068c4aSGarrett Wollman 	    so->so_proto->pr_protocol != IPPROTO_RSVP)
1305f0068c4aSGarrett Wollman 		return EOPNOTSUPP;
1306f0068c4aSGarrett Wollman 
1307603724d3SBjoern A. Zeeb 	if (V_ip_rsvpd != NULL)
1308f0068c4aSGarrett Wollman 		return EADDRINUSE;
1309f0068c4aSGarrett Wollman 
1310603724d3SBjoern A. Zeeb 	V_ip_rsvpd = so;
13111c5de19aSGarrett Wollman 	/*
13121c5de19aSGarrett Wollman 	 * This may seem silly, but we need to be sure we don't over-increment
13131c5de19aSGarrett Wollman 	 * the RSVP counter, in case something slips up.
13141c5de19aSGarrett Wollman 	 */
1315603724d3SBjoern A. Zeeb 	if (!V_ip_rsvp_on) {
1316603724d3SBjoern A. Zeeb 		V_ip_rsvp_on = 1;
1317603724d3SBjoern A. Zeeb 		V_rsvp_on++;
13181c5de19aSGarrett Wollman 	}
1319f0068c4aSGarrett Wollman 
1320f0068c4aSGarrett Wollman 	return 0;
1321f0068c4aSGarrett Wollman }
1322f0068c4aSGarrett Wollman 
1323f0068c4aSGarrett Wollman int
1324f0068c4aSGarrett Wollman ip_rsvp_done(void)
1325f0068c4aSGarrett Wollman {
13268b615593SMarko Zec 
1327603724d3SBjoern A. Zeeb 	V_ip_rsvpd = NULL;
13281c5de19aSGarrett Wollman 	/*
13291c5de19aSGarrett Wollman 	 * This may seem silly, but we need to be sure we don't over-decrement
13301c5de19aSGarrett Wollman 	 * the RSVP counter, in case something slips up.
13311c5de19aSGarrett Wollman 	 */
1332603724d3SBjoern A. Zeeb 	if (V_ip_rsvp_on) {
1333603724d3SBjoern A. Zeeb 		V_ip_rsvp_on = 0;
1334603724d3SBjoern A. Zeeb 		V_rsvp_on--;
13351c5de19aSGarrett Wollman 	}
1336f0068c4aSGarrett Wollman 	return 0;
1337f0068c4aSGarrett Wollman }
1338bbb4330bSLuigi Rizzo 
13398f5a8818SKevin Lo int
13408f5a8818SKevin Lo rsvp_input(struct mbuf **mp, int *offp, int proto)
1341bbb4330bSLuigi Rizzo {
13428f5a8818SKevin Lo 	struct mbuf *m;
13438f5a8818SKevin Lo 
13448f5a8818SKevin Lo 	m = *mp;
13458f5a8818SKevin Lo 	*mp = NULL;
13468b615593SMarko Zec 
1347bbb4330bSLuigi Rizzo 	if (rsvp_input_p) { /* call the real one if loaded */
13488f5a8818SKevin Lo 		*mp = m;
13498f5a8818SKevin Lo 		rsvp_input_p(mp, offp, proto);
13508f5a8818SKevin Lo 		return (IPPROTO_DONE);
1351bbb4330bSLuigi Rizzo 	}
1352bbb4330bSLuigi Rizzo 
1353bbb4330bSLuigi Rizzo 	/* Can still get packets with rsvp_on = 0 if there is a local member
1354bbb4330bSLuigi Rizzo 	 * of the group to which the RSVP packet is addressed.  But in this
1355bbb4330bSLuigi Rizzo 	 * case we want to throw the packet away.
1356bbb4330bSLuigi Rizzo 	 */
1357bbb4330bSLuigi Rizzo 
1358603724d3SBjoern A. Zeeb 	if (!V_rsvp_on) {
1359bbb4330bSLuigi Rizzo 		m_freem(m);
13608f5a8818SKevin Lo 		return (IPPROTO_DONE);
1361bbb4330bSLuigi Rizzo 	}
1362bbb4330bSLuigi Rizzo 
1363603724d3SBjoern A. Zeeb 	if (V_ip_rsvpd != NULL) {
13648f5a8818SKevin Lo 		*mp = m;
13658f5a8818SKevin Lo 		rip_input(mp, offp, proto);
13668f5a8818SKevin Lo 		return (IPPROTO_DONE);
1367bbb4330bSLuigi Rizzo 	}
1368bbb4330bSLuigi Rizzo 	/* Drop the packet */
1369bbb4330bSLuigi Rizzo 	m_freem(m);
13708f5a8818SKevin Lo 	return (IPPROTO_DONE);
1371bbb4330bSLuigi Rizzo }
1372