xref: /freebsd/sys/netinet/ip_input.c (revision 0b4b0b0feea2734cdf46d8542dee3cc4a56fa52b)
1c398230bSWarner Losh /*-
2df8bae1dSRodney W. Grimes  * Copyright (c) 1982, 1986, 1988, 1993
3df8bae1dSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
4df8bae1dSRodney W. Grimes  *
5df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
6df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
7df8bae1dSRodney W. Grimes  * are met:
8df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
9df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
10df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
11df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
12df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
13df8bae1dSRodney W. Grimes  * 4. Neither the name of the University nor the names of its contributors
14df8bae1dSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
15df8bae1dSRodney W. Grimes  *    without specific prior written permission.
16df8bae1dSRodney W. Grimes  *
17df8bae1dSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20df8bae1dSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
28df8bae1dSRodney W. Grimes  *
29df8bae1dSRodney W. Grimes  *	@(#)ip_input.c	8.2 (Berkeley) 1/4/94
30df8bae1dSRodney W. Grimes  */
31df8bae1dSRodney W. Grimes 
324b421e2dSMike Silbersack #include <sys/cdefs.h>
334b421e2dSMike Silbersack __FBSDID("$FreeBSD$");
344b421e2dSMike Silbersack 
350ac40133SBrian Somers #include "opt_bootp.h"
3674a9466cSGary Palmer #include "opt_ipfw.h"
3727108a15SDag-Erling Smørgrav #include "opt_ipstealth.h"
386a800098SYoshinobu Inoue #include "opt_ipsec.h"
3933553d6eSBjoern A. Zeeb #include "opt_route.h"
40a9771948SGleb Smirnoff #include "opt_carp.h"
4174a9466cSGary Palmer 
42df8bae1dSRodney W. Grimes #include <sys/param.h>
43df8bae1dSRodney W. Grimes #include <sys/systm.h>
445f311da2SMike Silbersack #include <sys/callout.h>
45df8bae1dSRodney W. Grimes #include <sys/mbuf.h>
46b715f178SLuigi Rizzo #include <sys/malloc.h>
47df8bae1dSRodney W. Grimes #include <sys/domain.h>
48df8bae1dSRodney W. Grimes #include <sys/protosw.h>
49df8bae1dSRodney W. Grimes #include <sys/socket.h>
50df8bae1dSRodney W. Grimes #include <sys/time.h>
51df8bae1dSRodney W. Grimes #include <sys/kernel.h>
52385195c0SMarko Zec #include <sys/lock.h>
53385195c0SMarko Zec #include <sys/rwlock.h>
541025071fSGarrett Wollman #include <sys/syslog.h>
55b5e8ce9fSBruce Evans #include <sys/sysctl.h>
56df8bae1dSRodney W. Grimes 
57c85540ddSAndrey A. Chernov #include <net/pfil.h>
58df8bae1dSRodney W. Grimes #include <net/if.h>
599494d596SBrooks Davis #include <net/if_types.h>
60d314ad7bSJulian Elischer #include <net/if_var.h>
6182c23ebaSBill Fenner #include <net/if_dl.h>
62df8bae1dSRodney W. Grimes #include <net/route.h>
63748e0b0aSGarrett Wollman #include <net/netisr.h>
644b79449eSBjoern A. Zeeb #include <net/vnet.h>
6565111ec7SKip Macy #include <net/flowtable.h>
66df8bae1dSRodney W. Grimes 
67df8bae1dSRodney W. Grimes #include <netinet/in.h>
68df8bae1dSRodney W. Grimes #include <netinet/in_systm.h>
69b5e8ce9fSBruce Evans #include <netinet/in_var.h>
70df8bae1dSRodney W. Grimes #include <netinet/ip.h>
71df8bae1dSRodney W. Grimes #include <netinet/in_pcb.h>
72df8bae1dSRodney W. Grimes #include <netinet/ip_var.h>
73eddfbb76SRobert Watson #include <netinet/ip_fw.h>
74df8bae1dSRodney W. Grimes #include <netinet/ip_icmp.h>
75ef39adf0SAndre Oppermann #include <netinet/ip_options.h>
7658938916SGarrett Wollman #include <machine/in_cksum.h>
77a9771948SGleb Smirnoff #ifdef DEV_CARP
78a9771948SGleb Smirnoff #include <netinet/ip_carp.h>
79a9771948SGleb Smirnoff #endif
80b2630c29SGeorge V. Neville-Neil #ifdef IPSEC
811dfcf0d2SAndre Oppermann #include <netinet/ip_ipsec.h>
82b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */
83df8bae1dSRodney W. Grimes 
84f0068c4aSGarrett Wollman #include <sys/socketvar.h>
856ddbf1e2SGary Palmer 
86aed55708SRobert Watson #include <security/mac/mac_framework.h>
87aed55708SRobert Watson 
88d2035ffbSEd Maste #ifdef CTASSERT
89d2035ffbSEd Maste CTASSERT(sizeof(struct ip) == 20);
90d2035ffbSEd Maste #endif
91d2035ffbSEd Maste 
92eddfbb76SRobert Watson static VNET_DEFINE(int, ipsendredirects) = 1;	/* XXX */
93eddfbb76SRobert Watson static VNET_DEFINE(int, ip_checkinterface);
94eddfbb76SRobert Watson static VNET_DEFINE(int, ip_keepfaith);
95eddfbb76SRobert Watson static VNET_DEFINE(int, ip_sendsourcequench);
96385195c0SMarko Zec 
971e77c105SRobert Watson #define	V_ipsendredirects	VNET(ipsendredirects)
981e77c105SRobert Watson #define	V_ip_checkinterface	VNET(ip_checkinterface)
991e77c105SRobert Watson #define	V_ip_keepfaith		VNET(ip_keepfaith)
1001e77c105SRobert Watson #define	V_ip_sendsourcequench	VNET(ip_sendsourcequench)
101eddfbb76SRobert Watson 
102eddfbb76SRobert Watson VNET_DEFINE(int, ip_defttl) = IPDEFTTL;
103eddfbb76SRobert Watson VNET_DEFINE(int, ip_do_randomid);
104eddfbb76SRobert Watson VNET_DEFINE(int, ipforwarding);
105eddfbb76SRobert Watson 
106eddfbb76SRobert Watson VNET_DEFINE(struct in_ifaddrhead, in_ifaddrhead);  /* first inet address */
107eddfbb76SRobert Watson VNET_DEFINE(struct in_ifaddrhashhead *, in_ifaddrhashtbl); /* inet addr hash table  */
108eddfbb76SRobert Watson VNET_DEFINE(u_long, in_ifaddrhmask);		/* mask for hash table */
109eddfbb76SRobert Watson VNET_DEFINE(struct ipstat, ipstat);
110eddfbb76SRobert Watson 
111eddfbb76SRobert Watson static VNET_DEFINE(int, ip_rsvp_on);
112eddfbb76SRobert Watson VNET_DEFINE(struct socket *, ip_rsvpd);
113eddfbb76SRobert Watson VNET_DEFINE(int, rsvp_on);
114eddfbb76SRobert Watson 
1151e77c105SRobert Watson #define	V_ip_rsvp_on		VNET(ip_rsvp_on)
116eddfbb76SRobert Watson 
117eddfbb76SRobert Watson static VNET_DEFINE(TAILQ_HEAD(ipqhead, ipq), ipq[IPREASS_NHASH]);
118eddfbb76SRobert Watson static VNET_DEFINE(int, maxnipq);  /* Administrative limit on # reass queues. */
119eddfbb76SRobert Watson static VNET_DEFINE(int, maxfragsperpacket);
120eddfbb76SRobert Watson static VNET_DEFINE(int, nipq);			/* Total # of reass queues */
121eddfbb76SRobert Watson 
1221e77c105SRobert Watson #define	V_ipq			VNET(ipq)
1231e77c105SRobert Watson #define	V_maxnipq		VNET(maxnipq)
1241e77c105SRobert Watson #define	V_maxfragsperpacket	VNET(maxfragsperpacket)
1251e77c105SRobert Watson #define	V_nipq			VNET(nipq)
126eddfbb76SRobert Watson 
127eddfbb76SRobert Watson VNET_DEFINE(int, ipstealth);
12864aeca7bSRobert Watson 
1292d9cfabaSRobert Watson struct	rwlock in_ifaddr_lock;
13064aeca7bSRobert Watson RW_SYSINIT(in_ifaddr_lock, &in_ifaddr_lock, "in_ifaddr_lock");
131f0068c4aSGarrett Wollman 
132eddfbb76SRobert Watson SYSCTL_VNET_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_RW,
133eddfbb76SRobert Watson     &VNET_NAME(ipforwarding), 0,
1348b615593SMarko Zec     "Enable IP forwarding between interfaces");
1350312fbe9SPoul-Henning Kamp 
136eddfbb76SRobert Watson SYSCTL_VNET_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_RW,
137eddfbb76SRobert Watson     &VNET_NAME(ipsendredirects), 0,
1388b615593SMarko Zec     "Enable sending IP redirects");
1390312fbe9SPoul-Henning Kamp 
140eddfbb76SRobert Watson SYSCTL_VNET_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_RW,
141eddfbb76SRobert Watson     &VNET_NAME(ip_defttl), 0,
142eddfbb76SRobert Watson     "Maximum TTL on IP packets");
1430312fbe9SPoul-Henning Kamp 
144eddfbb76SRobert Watson SYSCTL_VNET_INT(_net_inet_ip, IPCTL_KEEPFAITH, keepfaith, CTLFLAG_RW,
145eddfbb76SRobert Watson     &VNET_NAME(ip_keepfaith), 0,
1466a800098SYoshinobu Inoue     "Enable packet capture for FAITH IPv4->IPv6 translater daemon");
1476a800098SYoshinobu Inoue 
148eddfbb76SRobert Watson SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, sendsourcequench, CTLFLAG_RW,
149eddfbb76SRobert Watson     &VNET_NAME(ip_sendsourcequench), 0,
150df285b3dSMike Silbersack     "Enable the transmission of source quench packets");
151df285b3dSMike Silbersack 
152eddfbb76SRobert Watson SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, random_id, CTLFLAG_RW,
153eddfbb76SRobert Watson     &VNET_NAME(ip_do_randomid), 0,
154eddfbb76SRobert Watson     "Assign random ip_id values");
1551f44b0a1SDavid Malone 
156823db0e9SDon Lewis /*
157823db0e9SDon Lewis  * XXX - Setting ip_checkinterface mostly implements the receive side of
158823db0e9SDon Lewis  * the Strong ES model described in RFC 1122, but since the routing table
159a8f12100SDon Lewis  * and transmit implementation do not implement the Strong ES model,
160823db0e9SDon Lewis  * setting this to 1 results in an odd hybrid.
1613f67c834SDon Lewis  *
162a8f12100SDon Lewis  * XXX - ip_checkinterface currently must be disabled if you use ipnat
163a8f12100SDon Lewis  * to translate the destination address to another local interface.
1643f67c834SDon Lewis  *
1653f67c834SDon Lewis  * XXX - ip_checkinterface must be disabled if you add IP aliases
1663f67c834SDon Lewis  * to the loopback interface instead of the interface where the
1673f67c834SDon Lewis  * packets for those addresses are received.
168823db0e9SDon Lewis  */
169eddfbb76SRobert Watson SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_RW,
170eddfbb76SRobert Watson     &VNET_NAME(ip_checkinterface), 0,
1718b615593SMarko Zec     "Verify packet arrives on correct interface");
172b3e95d4eSJonathan Lemon 
1730b4b0b0fSJulian Elischer VNET_DEFINE(struct pfil_head, inet_pfil_hook);	/* Packet filter hooks */
174df8bae1dSRodney W. Grimes 
175d4b5cae4SRobert Watson static struct netisr_handler ip_nh = {
176d4b5cae4SRobert Watson 	.nh_name = "ip",
177d4b5cae4SRobert Watson 	.nh_handler = ip_input,
178d4b5cae4SRobert Watson 	.nh_proto = NETISR_IP,
179d4b5cae4SRobert Watson 	.nh_policy = NETISR_POLICY_FLOW,
180d4b5cae4SRobert Watson };
181ca925d9cSJonathan Lemon 
182df8bae1dSRodney W. Grimes extern	struct domain inetdomain;
183f0ffb944SJulian Elischer extern	struct protosw inetsw[];
184df8bae1dSRodney W. Grimes u_char	ip_protox[IPPROTO_MAX];
185ca925d9cSJonathan Lemon 
186eddfbb76SRobert Watson SYSCTL_VNET_STRUCT(_net_inet_ip, IPCTL_STATS, stats, CTLFLAG_RW,
187eddfbb76SRobert Watson     &VNET_NAME(ipstat), ipstat,
188eddfbb76SRobert Watson     "IP statistics (struct ipstat, netinet/ip_var.h)");
189df8bae1dSRodney W. Grimes 
190eddfbb76SRobert Watson static VNET_DEFINE(uma_zone_t, ipq_zone);
1911e77c105SRobert Watson #define	V_ipq_zone		VNET(ipq_zone)
192194a213eSAndrey A. Chernov 
193dfa60d93SRobert Watson static struct mtx ipqlock;
1942fad1e93SSam Leffler 
1952fad1e93SSam Leffler #define	IPQ_LOCK()	mtx_lock(&ipqlock)
1962fad1e93SSam Leffler #define	IPQ_UNLOCK()	mtx_unlock(&ipqlock)
197888c2a3cSSam Leffler #define	IPQ_LOCK_INIT()	mtx_init(&ipqlock, "ipqlock", NULL, MTX_DEF)
198888c2a3cSSam Leffler #define	IPQ_LOCK_ASSERT()	mtx_assert(&ipqlock, MA_OWNED)
199f23b4c91SGarrett Wollman 
200d248c7d7SRobert Watson static void	maxnipq_update(void);
2014f590175SPaul Saab static void	ipq_zone_change(void *);
202d248c7d7SRobert Watson 
203eddfbb76SRobert Watson SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, fragpackets, CTLFLAG_RD,
204eddfbb76SRobert Watson     &VNET_NAME(nipq), 0,
2058b615593SMarko Zec     "Current number of IPv4 fragment reassembly queue entries");
206d248c7d7SRobert Watson 
207eddfbb76SRobert Watson SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, maxfragsperpacket, CTLFLAG_RW,
208eddfbb76SRobert Watson     &VNET_NAME(maxfragsperpacket), 0,
209d248c7d7SRobert Watson     "Maximum number of IPv4 fragments allowed per packet");
210d248c7d7SRobert Watson 
211d248c7d7SRobert Watson struct callout	ipport_tick_callout;
212d248c7d7SRobert Watson 
2130312fbe9SPoul-Henning Kamp #ifdef IPCTL_DEFMTU
2140312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW,
2153d177f46SBill Fumerola     &ip_mtu, 0, "Default MTU");
2160312fbe9SPoul-Henning Kamp #endif
2170312fbe9SPoul-Henning Kamp 
2181b968362SDag-Erling Smørgrav #ifdef IPSTEALTH
219eddfbb76SRobert Watson SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_RW,
220eddfbb76SRobert Watson     &VNET_NAME(ipstealth), 0,
221eddfbb76SRobert Watson     "IP stealth mode, no TTL decrementation on forwarding");
2221b968362SDag-Erling Smørgrav #endif
223eddfbb76SRobert Watson 
22453be8fcaSBjoern A. Zeeb #ifdef FLOWTABLE
225eddfbb76SRobert Watson static VNET_DEFINE(int, ip_output_flowtable_size) = 2048;
226eddfbb76SRobert Watson VNET_DEFINE(struct flowtable *, ip_ft);
2271e77c105SRobert Watson #define	V_ip_output_flowtable_size	VNET(ip_output_flowtable_size)
228eddfbb76SRobert Watson 
229eddfbb76SRobert Watson SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, output_flowtable_size, CTLFLAG_RDTUN,
230eddfbb76SRobert Watson     &VNET_NAME(ip_output_flowtable_size), 2048,
23165111ec7SKip Macy     "number of entries in the per-cpu output flow caches");
23253be8fcaSBjoern A. Zeeb #endif
23353be8fcaSBjoern A. Zeeb 
234eddfbb76SRobert Watson VNET_DEFINE(int, fw_one_pass) = 1;
235010b65f5SJulian Elischer 
2364d77a549SAlfred Perlstein static void	ip_freef(struct ipqhead *, struct ipq *);
2378948e4baSArchie Cobbs 
238315e3e38SRobert Watson /*
239315e3e38SRobert Watson  * Kernel module interface for updating ipstat.  The argument is an index
240315e3e38SRobert Watson  * into ipstat treated as an array of u_long.  While this encodes the general
241315e3e38SRobert Watson  * layout of ipstat into the caller, it doesn't encode its location, so that
242315e3e38SRobert Watson  * future changes to add, for example, per-CPU stats support won't cause
243315e3e38SRobert Watson  * binary compatibility problems for kernel modules.
244315e3e38SRobert Watson  */
245315e3e38SRobert Watson void
246315e3e38SRobert Watson kmod_ipstat_inc(int statnum)
247315e3e38SRobert Watson {
248315e3e38SRobert Watson 
249315e3e38SRobert Watson 	(*((u_long *)&V_ipstat + statnum))++;
250315e3e38SRobert Watson }
251315e3e38SRobert Watson 
252315e3e38SRobert Watson void
253315e3e38SRobert Watson kmod_ipstat_dec(int statnum)
254315e3e38SRobert Watson {
255315e3e38SRobert Watson 
256315e3e38SRobert Watson 	(*((u_long *)&V_ipstat + statnum))--;
257315e3e38SRobert Watson }
258315e3e38SRobert Watson 
259d4b5cae4SRobert Watson static int
260d4b5cae4SRobert Watson sysctl_netinet_intr_queue_maxlen(SYSCTL_HANDLER_ARGS)
261d4b5cae4SRobert Watson {
262d4b5cae4SRobert Watson 	int error, qlimit;
263d4b5cae4SRobert Watson 
264d4b5cae4SRobert Watson 	netisr_getqlimit(&ip_nh, &qlimit);
265d4b5cae4SRobert Watson 	error = sysctl_handle_int(oidp, &qlimit, 0, req);
266d4b5cae4SRobert Watson 	if (error || !req->newptr)
267d4b5cae4SRobert Watson 		return (error);
268d4b5cae4SRobert Watson 	if (qlimit < 1)
269d4b5cae4SRobert Watson 		return (EINVAL);
270d4b5cae4SRobert Watson 	return (netisr_setqlimit(&ip_nh, qlimit));
271d4b5cae4SRobert Watson }
272d4b5cae4SRobert Watson SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen,
273d4b5cae4SRobert Watson     CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet_intr_queue_maxlen, "I",
274d4b5cae4SRobert Watson     "Maximum size of the IP input queue");
275d4b5cae4SRobert Watson 
276d4b5cae4SRobert Watson static int
277d4b5cae4SRobert Watson sysctl_netinet_intr_queue_drops(SYSCTL_HANDLER_ARGS)
278d4b5cae4SRobert Watson {
279d4b5cae4SRobert Watson 	u_int64_t qdrops_long;
280d4b5cae4SRobert Watson 	int error, qdrops;
281d4b5cae4SRobert Watson 
282d4b5cae4SRobert Watson 	netisr_getqdrops(&ip_nh, &qdrops_long);
283d4b5cae4SRobert Watson 	qdrops = qdrops_long;
284d4b5cae4SRobert Watson 	error = sysctl_handle_int(oidp, &qdrops, 0, req);
285d4b5cae4SRobert Watson 	if (error || !req->newptr)
286d4b5cae4SRobert Watson 		return (error);
287d4b5cae4SRobert Watson 	if (qdrops != 0)
288d4b5cae4SRobert Watson 		return (EINVAL);
289d4b5cae4SRobert Watson 	netisr_clearqdrops(&ip_nh);
290d4b5cae4SRobert Watson 	return (0);
291d4b5cae4SRobert Watson }
292d4b5cae4SRobert Watson 
293d4b5cae4SRobert Watson SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops,
294d4b5cae4SRobert Watson     CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_netinet_intr_queue_drops, "I",
295d4b5cae4SRobert Watson     "Number of packets dropped from the IP input queue");
296d4b5cae4SRobert Watson 
297df8bae1dSRodney W. Grimes /*
298df8bae1dSRodney W. Grimes  * IP initialization: fill in IP protocol switch table.
299df8bae1dSRodney W. Grimes  * All protocols not implemented in kernel go to raw IP protocol handler.
300df8bae1dSRodney W. Grimes  */
301df8bae1dSRodney W. Grimes void
302f2565d68SRobert Watson ip_init(void)
303df8bae1dSRodney W. Grimes {
304f2565d68SRobert Watson 	struct protosw *pr;
305f2565d68SRobert Watson 	int i;
306df8bae1dSRodney W. Grimes 
307a511354aSRobert Watson 	V_ip_id = time_second & 0xffff;
308a511354aSRobert Watson 
309603724d3SBjoern A. Zeeb 	TAILQ_INIT(&V_in_ifaddrhead);
310603724d3SBjoern A. Zeeb 	V_in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &V_in_ifaddrhmask);
3111ed81b73SMarko Zec 
3121ed81b73SMarko Zec 	/* Initialize IP reassembly queue. */
3131ed81b73SMarko Zec 	for (i = 0; i < IPREASS_NHASH; i++)
3141ed81b73SMarko Zec 		TAILQ_INIT(&V_ipq[i]);
3151ed81b73SMarko Zec 	V_maxnipq = nmbclusters / 32;
3161ed81b73SMarko Zec 	V_maxfragsperpacket = 16;
3171ed81b73SMarko Zec 	V_ipq_zone = uma_zcreate("ipq", sizeof(struct ipq), NULL, NULL, NULL,
3181ed81b73SMarko Zec 	    NULL, UMA_ALIGN_PTR, 0);
3191ed81b73SMarko Zec 	maxnipq_update();
3201ed81b73SMarko Zec 
3210b4b0b0fSJulian Elischer 	/* Initialize packet filter hooks. */
3220b4b0b0fSJulian Elischer 	V_inet_pfil_hook.ph_type = PFIL_TYPE_AF;
3230b4b0b0fSJulian Elischer 	V_inet_pfil_hook.ph_af = AF_INET;
3240b4b0b0fSJulian Elischer 	if ((i = pfil_head_register(&V_inet_pfil_hook)) != 0)
3250b4b0b0fSJulian Elischer 		printf("%s: WARNING: unable to register pfil hook, "
3260b4b0b0fSJulian Elischer 			"error %d\n", __func__, i);
3270b4b0b0fSJulian Elischer 
328fa057b15SMarko Zec #ifdef FLOWTABLE
329fa057b15SMarko Zec 	TUNABLE_INT_FETCH("net.inet.ip.output_flowtable_size",
330fa057b15SMarko Zec 	    &V_ip_output_flowtable_size);
331fa057b15SMarko Zec 	V_ip_ft = flowtable_alloc(V_ip_output_flowtable_size, FL_PCPU);
332fa057b15SMarko Zec #endif
333fa057b15SMarko Zec 
3341ed81b73SMarko Zec 	/* Skip initialization of globals for non-default instances. */
3351ed81b73SMarko Zec 	if (!IS_DEFAULT_VNET(curvnet))
3361ed81b73SMarko Zec 		return;
3371ed81b73SMarko Zec 
338f0ffb944SJulian Elischer 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
33902410549SRobert Watson 	if (pr == NULL)
340db09bef3SAndre Oppermann 		panic("ip_init: PF_INET not found");
341db09bef3SAndre Oppermann 
342db09bef3SAndre Oppermann 	/* Initialize the entire ip_protox[] array to IPPROTO_RAW. */
343df8bae1dSRodney W. Grimes 	for (i = 0; i < IPPROTO_MAX; i++)
344df8bae1dSRodney W. Grimes 		ip_protox[i] = pr - inetsw;
345db09bef3SAndre Oppermann 	/*
346db09bef3SAndre Oppermann 	 * Cycle through IP protocols and put them into the appropriate place
347db09bef3SAndre Oppermann 	 * in ip_protox[].
348db09bef3SAndre Oppermann 	 */
349f0ffb944SJulian Elischer 	for (pr = inetdomain.dom_protosw;
350f0ffb944SJulian Elischer 	    pr < inetdomain.dom_protoswNPROTOSW; pr++)
351df8bae1dSRodney W. Grimes 		if (pr->pr_domain->dom_family == PF_INET &&
352db09bef3SAndre Oppermann 		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) {
353db09bef3SAndre Oppermann 			/* Be careful to only index valid IP protocols. */
354db77984cSSam Leffler 			if (pr->pr_protocol < IPPROTO_MAX)
355df8bae1dSRodney W. Grimes 				ip_protox[pr->pr_protocol] = pr - inetsw;
356db09bef3SAndre Oppermann 		}
357194a213eSAndrey A. Chernov 
3585f311da2SMike Silbersack 	/* Start ipport_tick. */
3595f311da2SMike Silbersack 	callout_init(&ipport_tick_callout, CALLOUT_MPSAFE);
36021ca7b57SMarko Zec 	callout_reset(&ipport_tick_callout, 1, ipport_tick, NULL);
3615f311da2SMike Silbersack 	EVENTHANDLER_REGISTER(shutdown_pre_sync, ip_fini, NULL,
3625f311da2SMike Silbersack 		SHUTDOWN_PRI_DEFAULT);
3634f590175SPaul Saab 	EVENTHANDLER_REGISTER(nmbclusters_change, ipq_zone_change,
3644f590175SPaul Saab 		NULL, EVENTHANDLER_PRI_ANY);
3655f311da2SMike Silbersack 
366db09bef3SAndre Oppermann 	/* Initialize various other remaining things. */
3671ed81b73SMarko Zec 	IPQ_LOCK_INIT();
368d4b5cae4SRobert Watson 	netisr_register(&ip_nh);
369df8bae1dSRodney W. Grimes }
370df8bae1dSRodney W. Grimes 
371f2565d68SRobert Watson void
372f2565d68SRobert Watson ip_fini(void *xtp)
3735f311da2SMike Silbersack {
374f2565d68SRobert Watson 
3755f311da2SMike Silbersack 	callout_stop(&ipport_tick_callout);
3765f311da2SMike Silbersack }
3775f311da2SMike Silbersack 
3784d2e3692SLuigi Rizzo /*
379df8bae1dSRodney W. Grimes  * Ip input routine.  Checksum and byte swap header.  If fragmented
380df8bae1dSRodney W. Grimes  * try to reassemble.  Process options.  Pass to next level.
381df8bae1dSRodney W. Grimes  */
382c67b1d17SGarrett Wollman void
383c67b1d17SGarrett Wollman ip_input(struct mbuf *m)
384df8bae1dSRodney W. Grimes {
3859188b4a1SAndre Oppermann 	struct ip *ip = NULL;
3865da9f8faSJosef Karthauser 	struct in_ifaddr *ia = NULL;
387ca925d9cSJonathan Lemon 	struct ifaddr *ifa;
3880aade26eSRobert Watson 	struct ifnet *ifp;
3899b932e9eSAndre Oppermann 	int    checkif, hlen = 0;
39047c861ecSBrian Somers 	u_short sum;
39102c1c707SAndre Oppermann 	int dchg = 0;				/* dest changed after fw */
392f51f805fSSam Leffler 	struct in_addr odst;			/* original dst address */
393b715f178SLuigi Rizzo 
394fe584538SDag-Erling Smørgrav 	M_ASSERTPKTHDR(m);
395db40007dSAndrew R. Reiter 
396ac9d7e26SMax Laier 	if (m->m_flags & M_FASTFWD_OURS) {
3979b932e9eSAndre Oppermann 		/*
39876ff6dcfSAndre Oppermann 		 * Firewall or NAT changed destination to local.
39976ff6dcfSAndre Oppermann 		 * We expect ip_len and ip_off to be in host byte order.
4009b932e9eSAndre Oppermann 		 */
40176ff6dcfSAndre Oppermann 		m->m_flags &= ~M_FASTFWD_OURS;
40276ff6dcfSAndre Oppermann 		/* Set up some basics that will be used later. */
4032b25acc1SLuigi Rizzo 		ip = mtod(m, struct ip *);
40453be11f6SPoul-Henning Kamp 		hlen = ip->ip_hl << 2;
4059b932e9eSAndre Oppermann 		goto ours;
4062b25acc1SLuigi Rizzo 	}
4072b25acc1SLuigi Rizzo 
40886425c62SRobert Watson 	IPSTAT_INC(ips_total);
40958938916SGarrett Wollman 
41058938916SGarrett Wollman 	if (m->m_pkthdr.len < sizeof(struct ip))
41158938916SGarrett Wollman 		goto tooshort;
41258938916SGarrett Wollman 
413df8bae1dSRodney W. Grimes 	if (m->m_len < sizeof (struct ip) &&
4140b17fba7SAndre Oppermann 	    (m = m_pullup(m, sizeof (struct ip))) == NULL) {
41586425c62SRobert Watson 		IPSTAT_INC(ips_toosmall);
416c67b1d17SGarrett Wollman 		return;
417df8bae1dSRodney W. Grimes 	}
418df8bae1dSRodney W. Grimes 	ip = mtod(m, struct ip *);
41958938916SGarrett Wollman 
42053be11f6SPoul-Henning Kamp 	if (ip->ip_v != IPVERSION) {
42186425c62SRobert Watson 		IPSTAT_INC(ips_badvers);
422df8bae1dSRodney W. Grimes 		goto bad;
423df8bae1dSRodney W. Grimes 	}
42458938916SGarrett Wollman 
42553be11f6SPoul-Henning Kamp 	hlen = ip->ip_hl << 2;
426df8bae1dSRodney W. Grimes 	if (hlen < sizeof(struct ip)) {	/* minimum header length */
42786425c62SRobert Watson 		IPSTAT_INC(ips_badhlen);
428df8bae1dSRodney W. Grimes 		goto bad;
429df8bae1dSRodney W. Grimes 	}
430df8bae1dSRodney W. Grimes 	if (hlen > m->m_len) {
4310b17fba7SAndre Oppermann 		if ((m = m_pullup(m, hlen)) == NULL) {
43286425c62SRobert Watson 			IPSTAT_INC(ips_badhlen);
433c67b1d17SGarrett Wollman 			return;
434df8bae1dSRodney W. Grimes 		}
435df8bae1dSRodney W. Grimes 		ip = mtod(m, struct ip *);
436df8bae1dSRodney W. Grimes 	}
43733841545SHajimu UMEMOTO 
43833841545SHajimu UMEMOTO 	/* 127/8 must not appear on wire - RFC1122 */
4390aade26eSRobert Watson 	ifp = m->m_pkthdr.rcvif;
44033841545SHajimu UMEMOTO 	if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET ||
44133841545SHajimu UMEMOTO 	    (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) {
4420aade26eSRobert Watson 		if ((ifp->if_flags & IFF_LOOPBACK) == 0) {
44386425c62SRobert Watson 			IPSTAT_INC(ips_badaddr);
44433841545SHajimu UMEMOTO 			goto bad;
44533841545SHajimu UMEMOTO 		}
44633841545SHajimu UMEMOTO 	}
44733841545SHajimu UMEMOTO 
448db4f9cc7SJonathan Lemon 	if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) {
449db4f9cc7SJonathan Lemon 		sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID);
450db4f9cc7SJonathan Lemon 	} else {
45158938916SGarrett Wollman 		if (hlen == sizeof(struct ip)) {
45247c861ecSBrian Somers 			sum = in_cksum_hdr(ip);
45358938916SGarrett Wollman 		} else {
45447c861ecSBrian Somers 			sum = in_cksum(m, hlen);
45558938916SGarrett Wollman 		}
456db4f9cc7SJonathan Lemon 	}
45747c861ecSBrian Somers 	if (sum) {
45886425c62SRobert Watson 		IPSTAT_INC(ips_badsum);
459df8bae1dSRodney W. Grimes 		goto bad;
460df8bae1dSRodney W. Grimes 	}
461df8bae1dSRodney W. Grimes 
46202b199f1SMax Laier #ifdef ALTQ
46302b199f1SMax Laier 	if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0)
46402b199f1SMax Laier 		/* packet is dropped by traffic conditioner */
46502b199f1SMax Laier 		return;
46602b199f1SMax Laier #endif
46702b199f1SMax Laier 
468df8bae1dSRodney W. Grimes 	/*
469df8bae1dSRodney W. Grimes 	 * Convert fields to host representation.
470df8bae1dSRodney W. Grimes 	 */
471fd8e4ebcSMike Barcroft 	ip->ip_len = ntohs(ip->ip_len);
472df8bae1dSRodney W. Grimes 	if (ip->ip_len < hlen) {
47386425c62SRobert Watson 		IPSTAT_INC(ips_badlen);
474df8bae1dSRodney W. Grimes 		goto bad;
475df8bae1dSRodney W. Grimes 	}
476fd8e4ebcSMike Barcroft 	ip->ip_off = ntohs(ip->ip_off);
477df8bae1dSRodney W. Grimes 
478df8bae1dSRodney W. Grimes 	/*
479df8bae1dSRodney W. Grimes 	 * Check that the amount of data in the buffers
480df8bae1dSRodney W. Grimes 	 * is as at least much as the IP header would have us expect.
481df8bae1dSRodney W. Grimes 	 * Trim mbufs if longer than we expect.
482df8bae1dSRodney W. Grimes 	 * Drop packet if shorter than we expect.
483df8bae1dSRodney W. Grimes 	 */
484df8bae1dSRodney W. Grimes 	if (m->m_pkthdr.len < ip->ip_len) {
48558938916SGarrett Wollman tooshort:
48686425c62SRobert Watson 		IPSTAT_INC(ips_tooshort);
487df8bae1dSRodney W. Grimes 		goto bad;
488df8bae1dSRodney W. Grimes 	}
489df8bae1dSRodney W. Grimes 	if (m->m_pkthdr.len > ip->ip_len) {
490df8bae1dSRodney W. Grimes 		if (m->m_len == m->m_pkthdr.len) {
491df8bae1dSRodney W. Grimes 			m->m_len = ip->ip_len;
492df8bae1dSRodney W. Grimes 			m->m_pkthdr.len = ip->ip_len;
493df8bae1dSRodney W. Grimes 		} else
494df8bae1dSRodney W. Grimes 			m_adj(m, ip->ip_len - m->m_pkthdr.len);
495df8bae1dSRodney W. Grimes 	}
496b2630c29SGeorge V. Neville-Neil #ifdef IPSEC
49714dd6717SSam Leffler 	/*
49814dd6717SSam Leffler 	 * Bypass packet filtering for packets from a tunnel (gif).
49914dd6717SSam Leffler 	 */
500cc977adcSBjoern A. Zeeb 	if (ip_ipsec_filtertunnel(m))
501c21fd232SAndre Oppermann 		goto passin;
502b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */
5033f67c834SDon Lewis 
504c4ac87eaSDarren Reed 	/*
505134ea224SSam Leffler 	 * Run through list of hooks for input packets.
506f51f805fSSam Leffler 	 *
507f51f805fSSam Leffler 	 * NB: Beware of the destination address changing (e.g.
508f51f805fSSam Leffler 	 *     by NAT rewriting).  When this happens, tell
509f51f805fSSam Leffler 	 *     ip_forward to do the right thing.
510c4ac87eaSDarren Reed 	 */
511c21fd232SAndre Oppermann 
512c21fd232SAndre Oppermann 	/* Jump over all PFIL processing if hooks are not active. */
5130b4b0b0fSJulian Elischer 	if (!PFIL_HOOKED(&V_inet_pfil_hook))
514c21fd232SAndre Oppermann 		goto passin;
515c21fd232SAndre Oppermann 
516f51f805fSSam Leffler 	odst = ip->ip_dst;
5170b4b0b0fSJulian Elischer 	if (pfil_run_hooks(&V_inet_pfil_hook, &m, ifp, PFIL_IN, NULL) != 0)
518beec8214SDarren Reed 		return;
519134ea224SSam Leffler 	if (m == NULL)			/* consumed by filter */
520c4ac87eaSDarren Reed 		return;
5219b932e9eSAndre Oppermann 
522c4ac87eaSDarren Reed 	ip = mtod(m, struct ip *);
52302c1c707SAndre Oppermann 	dchg = (odst.s_addr != ip->ip_dst.s_addr);
5240aade26eSRobert Watson 	ifp = m->m_pkthdr.rcvif;
5259b932e9eSAndre Oppermann 
5269b932e9eSAndre Oppermann #ifdef IPFIREWALL_FORWARD
5279b932e9eSAndre Oppermann 	if (m->m_flags & M_FASTFWD_OURS) {
5289b932e9eSAndre Oppermann 		m->m_flags &= ~M_FASTFWD_OURS;
5299b932e9eSAndre Oppermann 		goto ours;
5309b932e9eSAndre Oppermann 	}
531099dd043SAndre Oppermann 	if ((dchg = (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL)) != 0) {
532099dd043SAndre Oppermann 		/*
533099dd043SAndre Oppermann 		 * Directly ship on the packet.  This allows to forward packets
534099dd043SAndre Oppermann 		 * that were destined for us to some other directly connected
535099dd043SAndre Oppermann 		 * host.
536099dd043SAndre Oppermann 		 */
537099dd043SAndre Oppermann 		ip_forward(m, dchg);
538099dd043SAndre Oppermann 		return;
539099dd043SAndre Oppermann 	}
5409b932e9eSAndre Oppermann #endif /* IPFIREWALL_FORWARD */
5419b932e9eSAndre Oppermann 
542c21fd232SAndre Oppermann passin:
543df8bae1dSRodney W. Grimes 	/*
544df8bae1dSRodney W. Grimes 	 * Process options and, if not destined for us,
545df8bae1dSRodney W. Grimes 	 * ship it on.  ip_dooptions returns 1 when an
546df8bae1dSRodney W. Grimes 	 * error was detected (causing an icmp message
547df8bae1dSRodney W. Grimes 	 * to be sent and the original packet to be freed).
548df8bae1dSRodney W. Grimes 	 */
5499b932e9eSAndre Oppermann 	if (hlen > sizeof (struct ip) && ip_dooptions(m, 0))
550c67b1d17SGarrett Wollman 		return;
551df8bae1dSRodney W. Grimes 
552f0068c4aSGarrett Wollman         /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no
553f0068c4aSGarrett Wollman          * matter if it is destined to another node, or whether it is
554f0068c4aSGarrett Wollman          * a multicast one, RSVP wants it! and prevents it from being forwarded
555f0068c4aSGarrett Wollman          * anywhere else. Also checks if the rsvp daemon is running before
556f0068c4aSGarrett Wollman 	 * grabbing the packet.
557f0068c4aSGarrett Wollman          */
558603724d3SBjoern A. Zeeb 	if (V_rsvp_on && ip->ip_p==IPPROTO_RSVP)
559f0068c4aSGarrett Wollman 		goto ours;
560f0068c4aSGarrett Wollman 
561df8bae1dSRodney W. Grimes 	/*
562df8bae1dSRodney W. Grimes 	 * Check our list of addresses, to see if the packet is for us.
563cc766e04SGarrett Wollman 	 * If we don't have any addresses, assume any unicast packet
564cc766e04SGarrett Wollman 	 * we receive might be for us (and let the upper layers deal
565cc766e04SGarrett Wollman 	 * with it).
566df8bae1dSRodney W. Grimes 	 */
567603724d3SBjoern A. Zeeb 	if (TAILQ_EMPTY(&V_in_ifaddrhead) &&
568cc766e04SGarrett Wollman 	    (m->m_flags & (M_MCAST|M_BCAST)) == 0)
569cc766e04SGarrett Wollman 		goto ours;
570cc766e04SGarrett Wollman 
5717538a9a0SJonathan Lemon 	/*
572823db0e9SDon Lewis 	 * Enable a consistency check between the destination address
573823db0e9SDon Lewis 	 * and the arrival interface for a unicast packet (the RFC 1122
574823db0e9SDon Lewis 	 * strong ES model) if IP forwarding is disabled and the packet
575e15ae1b2SDon Lewis 	 * is not locally generated and the packet is not subject to
576e15ae1b2SDon Lewis 	 * 'ipfw fwd'.
5773f67c834SDon Lewis 	 *
5783f67c834SDon Lewis 	 * XXX - Checking also should be disabled if the destination
5793f67c834SDon Lewis 	 * address is ipnat'ed to a different interface.
5803f67c834SDon Lewis 	 *
581a8f12100SDon Lewis 	 * XXX - Checking is incompatible with IP aliases added
5823f67c834SDon Lewis 	 * to the loopback interface instead of the interface where
5833f67c834SDon Lewis 	 * the packets are received.
584a9771948SGleb Smirnoff 	 *
585a9771948SGleb Smirnoff 	 * XXX - This is the case for carp vhost IPs as well so we
586a9771948SGleb Smirnoff 	 * insert a workaround. If the packet got here, we already
587a9771948SGleb Smirnoff 	 * checked with carp_iamatch() and carp_forus().
588823db0e9SDon Lewis 	 */
589603724d3SBjoern A. Zeeb 	checkif = V_ip_checkinterface && (V_ipforwarding == 0) &&
5900aade26eSRobert Watson 	    ifp != NULL && ((ifp->if_flags & IFF_LOOPBACK) == 0) &&
591a9771948SGleb Smirnoff #ifdef DEV_CARP
5920aade26eSRobert Watson 	    !ifp->if_carp &&
593a9771948SGleb Smirnoff #endif
5949b932e9eSAndre Oppermann 	    (dchg == 0);
595823db0e9SDon Lewis 
596ca925d9cSJonathan Lemon 	/*
597ca925d9cSJonathan Lemon 	 * Check for exact addresses in the hash bucket.
598ca925d9cSJonathan Lemon 	 */
5992d9cfabaSRobert Watson 	/* IN_IFADDR_RLOCK(); */
6009b932e9eSAndre Oppermann 	LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) {
601f9e354dfSJulian Elischer 		/*
602823db0e9SDon Lewis 		 * If the address matches, verify that the packet
603823db0e9SDon Lewis 		 * arrived via the correct interface if checking is
604823db0e9SDon Lewis 		 * enabled.
605f9e354dfSJulian Elischer 		 */
6069b932e9eSAndre Oppermann 		if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr &&
6078c0fec80SRobert Watson 		    (!checkif || ia->ia_ifp == ifp)) {
6088c0fec80SRobert Watson 			ifa_ref(&ia->ia_ifa);
6092d9cfabaSRobert Watson 			/* IN_IFADDR_RUNLOCK(); */
610ed1ff184SJulian Elischer 			goto ours;
611ca925d9cSJonathan Lemon 		}
6128c0fec80SRobert Watson 	}
6132d9cfabaSRobert Watson 	/* IN_IFADDR_RUNLOCK(); */
6142d9cfabaSRobert Watson 
615823db0e9SDon Lewis 	/*
616ca925d9cSJonathan Lemon 	 * Check for broadcast addresses.
617ca925d9cSJonathan Lemon 	 *
618ca925d9cSJonathan Lemon 	 * Only accept broadcast packets that arrive via the matching
619ca925d9cSJonathan Lemon 	 * interface.  Reception of forwarded directed broadcasts would
620ca925d9cSJonathan Lemon 	 * be handled via ip_forward() and ether_output() with the loopback
621ca925d9cSJonathan Lemon 	 * into the stack for SIMPLEX interfaces handled by ether_output().
622823db0e9SDon Lewis 	 */
6230aade26eSRobert Watson 	if (ifp != NULL && ifp->if_flags & IFF_BROADCAST) {
6240aade26eSRobert Watson 		IF_ADDR_LOCK(ifp);
6250aade26eSRobert Watson 	        TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
626ca925d9cSJonathan Lemon 			if (ifa->ifa_addr->sa_family != AF_INET)
627ca925d9cSJonathan Lemon 				continue;
628ca925d9cSJonathan Lemon 			ia = ifatoia(ifa);
629df8bae1dSRodney W. Grimes 			if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
6300aade26eSRobert Watson 			    ip->ip_dst.s_addr) {
6318c0fec80SRobert Watson 				ifa_ref(ifa);
6320aade26eSRobert Watson 				IF_ADDR_UNLOCK(ifp);
633df8bae1dSRodney W. Grimes 				goto ours;
6340aade26eSRobert Watson 			}
6350aade26eSRobert Watson 			if (ia->ia_netbroadcast.s_addr == ip->ip_dst.s_addr) {
6368c0fec80SRobert Watson 				ifa_ref(ifa);
6370aade26eSRobert Watson 				IF_ADDR_UNLOCK(ifp);
638df8bae1dSRodney W. Grimes 				goto ours;
6390aade26eSRobert Watson 			}
6400ac40133SBrian Somers #ifdef BOOTP_COMPAT
6410aade26eSRobert Watson 			if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY) {
6428c0fec80SRobert Watson 				ifa_ref(ifa);
6430aade26eSRobert Watson 				IF_ADDR_UNLOCK(ifp);
644ca925d9cSJonathan Lemon 				goto ours;
6450aade26eSRobert Watson 			}
6460ac40133SBrian Somers #endif
647df8bae1dSRodney W. Grimes 		}
6480aade26eSRobert Watson 		IF_ADDR_UNLOCK(ifp);
64919e5b0a7SRobert Watson 		ia = NULL;
650df8bae1dSRodney W. Grimes 	}
651f8429ca2SBruce M Simpson 	/* RFC 3927 2.7: Do not forward datagrams for 169.254.0.0/16. */
652f8429ca2SBruce M Simpson 	if (IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))) {
65386425c62SRobert Watson 		IPSTAT_INC(ips_cantforward);
654f8429ca2SBruce M Simpson 		m_freem(m);
655f8429ca2SBruce M Simpson 		return;
656f8429ca2SBruce M Simpson 	}
657df8bae1dSRodney W. Grimes 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
658603724d3SBjoern A. Zeeb 		if (V_ip_mrouter) {
659df8bae1dSRodney W. Grimes 			/*
660df8bae1dSRodney W. Grimes 			 * If we are acting as a multicast router, all
661df8bae1dSRodney W. Grimes 			 * incoming multicast packets are passed to the
662df8bae1dSRodney W. Grimes 			 * kernel-level multicast forwarding function.
663df8bae1dSRodney W. Grimes 			 * The packet is returned (relatively) intact; if
664df8bae1dSRodney W. Grimes 			 * ip_mforward() returns a non-zero value, the packet
665df8bae1dSRodney W. Grimes 			 * must be discarded, else it may be accepted below.
666df8bae1dSRodney W. Grimes 			 */
6670aade26eSRobert Watson 			if (ip_mforward && ip_mforward(ip, ifp, m, 0) != 0) {
66886425c62SRobert Watson 				IPSTAT_INC(ips_cantforward);
669df8bae1dSRodney W. Grimes 				m_freem(m);
670c67b1d17SGarrett Wollman 				return;
671df8bae1dSRodney W. Grimes 			}
672df8bae1dSRodney W. Grimes 
673df8bae1dSRodney W. Grimes 			/*
67411612afaSDima Dorfman 			 * The process-level routing daemon needs to receive
675df8bae1dSRodney W. Grimes 			 * all multicast IGMP packets, whether or not this
676df8bae1dSRodney W. Grimes 			 * host belongs to their destination groups.
677df8bae1dSRodney W. Grimes 			 */
678df8bae1dSRodney W. Grimes 			if (ip->ip_p == IPPROTO_IGMP)
679df8bae1dSRodney W. Grimes 				goto ours;
68086425c62SRobert Watson 			IPSTAT_INC(ips_forward);
681df8bae1dSRodney W. Grimes 		}
682df8bae1dSRodney W. Grimes 		/*
683d10910e6SBruce M Simpson 		 * Assume the packet is for us, to avoid prematurely taking
684d10910e6SBruce M Simpson 		 * a lock on the in_multi hash. Protocols must perform
685d10910e6SBruce M Simpson 		 * their own filtering and update statistics accordingly.
686df8bae1dSRodney W. Grimes 		 */
687df8bae1dSRodney W. Grimes 		goto ours;
688df8bae1dSRodney W. Grimes 	}
689df8bae1dSRodney W. Grimes 	if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST)
690df8bae1dSRodney W. Grimes 		goto ours;
691df8bae1dSRodney W. Grimes 	if (ip->ip_dst.s_addr == INADDR_ANY)
692df8bae1dSRodney W. Grimes 		goto ours;
693df8bae1dSRodney W. Grimes 
6946a800098SYoshinobu Inoue 	/*
6956a800098SYoshinobu Inoue 	 * FAITH(Firewall Aided Internet Translator)
6966a800098SYoshinobu Inoue 	 */
6970aade26eSRobert Watson 	if (ifp && ifp->if_type == IFT_FAITH) {
698603724d3SBjoern A. Zeeb 		if (V_ip_keepfaith) {
6996a800098SYoshinobu Inoue 			if (ip->ip_p == IPPROTO_TCP || ip->ip_p == IPPROTO_ICMP)
7006a800098SYoshinobu Inoue 				goto ours;
7016a800098SYoshinobu Inoue 		}
7026a800098SYoshinobu Inoue 		m_freem(m);
7036a800098SYoshinobu Inoue 		return;
7046a800098SYoshinobu Inoue 	}
7059494d596SBrooks Davis 
706df8bae1dSRodney W. Grimes 	/*
707df8bae1dSRodney W. Grimes 	 * Not for us; forward if possible and desirable.
708df8bae1dSRodney W. Grimes 	 */
709603724d3SBjoern A. Zeeb 	if (V_ipforwarding == 0) {
71086425c62SRobert Watson 		IPSTAT_INC(ips_cantforward);
711df8bae1dSRodney W. Grimes 		m_freem(m);
712546f251bSChris D. Faulhaber 	} else {
713b2630c29SGeorge V. Neville-Neil #ifdef IPSEC
7141dfcf0d2SAndre Oppermann 		if (ip_ipsec_fwd(m))
715546f251bSChris D. Faulhaber 			goto bad;
716b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */
7179b932e9eSAndre Oppermann 		ip_forward(m, dchg);
718546f251bSChris D. Faulhaber 	}
719c67b1d17SGarrett Wollman 	return;
720df8bae1dSRodney W. Grimes 
721df8bae1dSRodney W. Grimes ours:
722d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH
723d0ebc0d2SYaroslav Tykhiy 	/*
724d0ebc0d2SYaroslav Tykhiy 	 * IPSTEALTH: Process non-routing options only
725d0ebc0d2SYaroslav Tykhiy 	 * if the packet is destined for us.
726d0ebc0d2SYaroslav Tykhiy 	 */
72719e5b0a7SRobert Watson 	if (V_ipstealth && hlen > sizeof (struct ip) && ip_dooptions(m, 1)) {
72819e5b0a7SRobert Watson 		if (ia != NULL)
72919e5b0a7SRobert Watson 			ifa_free(&ia->ia_ifa);
730d0ebc0d2SYaroslav Tykhiy 		return;
73119e5b0a7SRobert Watson 	}
732d0ebc0d2SYaroslav Tykhiy #endif /* IPSTEALTH */
733d0ebc0d2SYaroslav Tykhiy 
7345da9f8faSJosef Karthauser 	/* Count the packet in the ip address stats */
7355da9f8faSJosef Karthauser 	if (ia != NULL) {
7365da9f8faSJosef Karthauser 		ia->ia_ifa.if_ipackets++;
7375da9f8faSJosef Karthauser 		ia->ia_ifa.if_ibytes += m->m_pkthdr.len;
7388c0fec80SRobert Watson 		ifa_free(&ia->ia_ifa);
7395da9f8faSJosef Karthauser 	}
740100ba1a6SJordan K. Hubbard 
74163f8d699SJordan K. Hubbard 	/*
742b6ea1aa5SRuslan Ermilov 	 * Attempt reassembly; if it succeeds, proceed.
743ac9d7e26SMax Laier 	 * ip_reass() will return a different mbuf.
744df8bae1dSRodney W. Grimes 	 */
745f0cada84SAndre Oppermann 	if (ip->ip_off & (IP_MF | IP_OFFMASK)) {
746f0cada84SAndre Oppermann 		m = ip_reass(m);
747f0cada84SAndre Oppermann 		if (m == NULL)
748c67b1d17SGarrett Wollman 			return;
7496a800098SYoshinobu Inoue 		ip = mtod(m, struct ip *);
7507e2df452SRuslan Ermilov 		/* Get the header length of the reassembled packet */
75153be11f6SPoul-Henning Kamp 		hlen = ip->ip_hl << 2;
752f0cada84SAndre Oppermann 	}
753f0cada84SAndre Oppermann 
754f0cada84SAndre Oppermann 	/*
755f0cada84SAndre Oppermann 	 * Further protocols expect the packet length to be w/o the
756f0cada84SAndre Oppermann 	 * IP header.
757f0cada84SAndre Oppermann 	 */
758df8bae1dSRodney W. Grimes 	ip->ip_len -= hlen;
759df8bae1dSRodney W. Grimes 
760b2630c29SGeorge V. Neville-Neil #ifdef IPSEC
76133841545SHajimu UMEMOTO 	/*
76233841545SHajimu UMEMOTO 	 * enforce IPsec policy checking if we are seeing last header.
76333841545SHajimu UMEMOTO 	 * note that we do not visit this with protocols with pcb layer
76433841545SHajimu UMEMOTO 	 * code - like udp/tcp/raw ip.
76533841545SHajimu UMEMOTO 	 */
7661dfcf0d2SAndre Oppermann 	if (ip_ipsec_input(m))
76733841545SHajimu UMEMOTO 		goto bad;
768b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */
76933841545SHajimu UMEMOTO 
770df8bae1dSRodney W. Grimes 	/*
771df8bae1dSRodney W. Grimes 	 * Switch out to protocol's input routine.
772df8bae1dSRodney W. Grimes 	 */
77386425c62SRobert Watson 	IPSTAT_INC(ips_delivered);
7749b932e9eSAndre Oppermann 
7752b25acc1SLuigi Rizzo 	(*inetsw[ip_protox[ip->ip_p]].pr_input)(m, hlen);
776c67b1d17SGarrett Wollman 	return;
777df8bae1dSRodney W. Grimes bad:
778df8bae1dSRodney W. Grimes 	m_freem(m);
779c67b1d17SGarrett Wollman }
780c67b1d17SGarrett Wollman 
781c67b1d17SGarrett Wollman /*
782d248c7d7SRobert Watson  * After maxnipq has been updated, propagate the change to UMA.  The UMA zone
783d248c7d7SRobert Watson  * max has slightly different semantics than the sysctl, for historical
784d248c7d7SRobert Watson  * reasons.
785d248c7d7SRobert Watson  */
786d248c7d7SRobert Watson static void
787d248c7d7SRobert Watson maxnipq_update(void)
788d248c7d7SRobert Watson {
789d248c7d7SRobert Watson 
790d248c7d7SRobert Watson 	/*
791d248c7d7SRobert Watson 	 * -1 for unlimited allocation.
792d248c7d7SRobert Watson 	 */
793603724d3SBjoern A. Zeeb 	if (V_maxnipq < 0)
794603724d3SBjoern A. Zeeb 		uma_zone_set_max(V_ipq_zone, 0);
795d248c7d7SRobert Watson 	/*
796d248c7d7SRobert Watson 	 * Positive number for specific bound.
797d248c7d7SRobert Watson 	 */
798603724d3SBjoern A. Zeeb 	if (V_maxnipq > 0)
799603724d3SBjoern A. Zeeb 		uma_zone_set_max(V_ipq_zone, V_maxnipq);
800d248c7d7SRobert Watson 	/*
801d248c7d7SRobert Watson 	 * Zero specifies no further fragment queue allocation -- set the
802d248c7d7SRobert Watson 	 * bound very low, but rely on implementation elsewhere to actually
803d248c7d7SRobert Watson 	 * prevent allocation and reclaim current queues.
804d248c7d7SRobert Watson 	 */
805603724d3SBjoern A. Zeeb 	if (V_maxnipq == 0)
806603724d3SBjoern A. Zeeb 		uma_zone_set_max(V_ipq_zone, 1);
807d248c7d7SRobert Watson }
808d248c7d7SRobert Watson 
8094f590175SPaul Saab static void
8104f590175SPaul Saab ipq_zone_change(void *tag)
8114f590175SPaul Saab {
8124f590175SPaul Saab 
813603724d3SBjoern A. Zeeb 	if (V_maxnipq > 0 && V_maxnipq < (nmbclusters / 32)) {
814603724d3SBjoern A. Zeeb 		V_maxnipq = nmbclusters / 32;
8154f590175SPaul Saab 		maxnipq_update();
8164f590175SPaul Saab 	}
8174f590175SPaul Saab }
8184f590175SPaul Saab 
819d248c7d7SRobert Watson static int
820d248c7d7SRobert Watson sysctl_maxnipq(SYSCTL_HANDLER_ARGS)
821d248c7d7SRobert Watson {
822d248c7d7SRobert Watson 	int error, i;
823d248c7d7SRobert Watson 
824603724d3SBjoern A. Zeeb 	i = V_maxnipq;
825d248c7d7SRobert Watson 	error = sysctl_handle_int(oidp, &i, 0, req);
826d248c7d7SRobert Watson 	if (error || !req->newptr)
827d248c7d7SRobert Watson 		return (error);
828d248c7d7SRobert Watson 
829d248c7d7SRobert Watson 	/*
830d248c7d7SRobert Watson 	 * XXXRW: Might be a good idea to sanity check the argument and place
831d248c7d7SRobert Watson 	 * an extreme upper bound.
832d248c7d7SRobert Watson 	 */
833d248c7d7SRobert Watson 	if (i < -1)
834d248c7d7SRobert Watson 		return (EINVAL);
835603724d3SBjoern A. Zeeb 	V_maxnipq = i;
836d248c7d7SRobert Watson 	maxnipq_update();
837d248c7d7SRobert Watson 	return (0);
838d248c7d7SRobert Watson }
839d248c7d7SRobert Watson 
840d248c7d7SRobert Watson SYSCTL_PROC(_net_inet_ip, OID_AUTO, maxfragpackets, CTLTYPE_INT|CTLFLAG_RW,
841d248c7d7SRobert Watson     NULL, 0, sysctl_maxnipq, "I",
842d248c7d7SRobert Watson     "Maximum number of IPv4 fragment reassembly queue entries");
843d248c7d7SRobert Watson 
844d248c7d7SRobert Watson /*
8458948e4baSArchie Cobbs  * Take incoming datagram fragment and try to reassemble it into
846f0cada84SAndre Oppermann  * whole datagram.  If the argument is the first fragment or one
847f0cada84SAndre Oppermann  * in between the function will return NULL and store the mbuf
848f0cada84SAndre Oppermann  * in the fragment chain.  If the argument is the last fragment
849f0cada84SAndre Oppermann  * the packet will be reassembled and the pointer to the new
850f0cada84SAndre Oppermann  * mbuf returned for further processing.  Only m_tags attached
851f0cada84SAndre Oppermann  * to the first packet/fragment are preserved.
852f0cada84SAndre Oppermann  * The IP header is *NOT* adjusted out of iplen.
853df8bae1dSRodney W. Grimes  */
854f0cada84SAndre Oppermann struct mbuf *
855f0cada84SAndre Oppermann ip_reass(struct mbuf *m)
856df8bae1dSRodney W. Grimes {
857f0cada84SAndre Oppermann 	struct ip *ip;
858f0cada84SAndre Oppermann 	struct mbuf *p, *q, *nq, *t;
859f0cada84SAndre Oppermann 	struct ipq *fp = NULL;
860f0cada84SAndre Oppermann 	struct ipqhead *head;
861f0cada84SAndre Oppermann 	int i, hlen, next;
86259dfcba4SHajimu UMEMOTO 	u_int8_t ecn, ecn0;
863f0cada84SAndre Oppermann 	u_short hash;
864df8bae1dSRodney W. Grimes 
865800af1fbSMaxim Konovalov 	/* If maxnipq or maxfragsperpacket are 0, never accept fragments. */
866603724d3SBjoern A. Zeeb 	if (V_maxnipq == 0 || V_maxfragsperpacket == 0) {
86786425c62SRobert Watson 		IPSTAT_INC(ips_fragments);
86886425c62SRobert Watson 		IPSTAT_INC(ips_fragdropped);
8699d804f81SAndre Oppermann 		m_freem(m);
8709d804f81SAndre Oppermann 		return (NULL);
871f0cada84SAndre Oppermann 	}
8722fad1e93SSam Leffler 
873f0cada84SAndre Oppermann 	ip = mtod(m, struct ip *);
874f0cada84SAndre Oppermann 	hlen = ip->ip_hl << 2;
875f0cada84SAndre Oppermann 
876f0cada84SAndre Oppermann 	hash = IPREASS_HASH(ip->ip_src.s_addr, ip->ip_id);
877603724d3SBjoern A. Zeeb 	head = &V_ipq[hash];
878f0cada84SAndre Oppermann 	IPQ_LOCK();
879f0cada84SAndre Oppermann 
880f0cada84SAndre Oppermann 	/*
881f0cada84SAndre Oppermann 	 * Look for queue of fragments
882f0cada84SAndre Oppermann 	 * of this datagram.
883f0cada84SAndre Oppermann 	 */
884f0cada84SAndre Oppermann 	TAILQ_FOREACH(fp, head, ipq_list)
885f0cada84SAndre Oppermann 		if (ip->ip_id == fp->ipq_id &&
886f0cada84SAndre Oppermann 		    ip->ip_src.s_addr == fp->ipq_src.s_addr &&
887f0cada84SAndre Oppermann 		    ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
888f0cada84SAndre Oppermann #ifdef MAC
88930d239bcSRobert Watson 		    mac_ipq_match(m, fp) &&
890f0cada84SAndre Oppermann #endif
891f0cada84SAndre Oppermann 		    ip->ip_p == fp->ipq_p)
892f0cada84SAndre Oppermann 			goto found;
893f0cada84SAndre Oppermann 
894f0cada84SAndre Oppermann 	fp = NULL;
895f0cada84SAndre Oppermann 
896f0cada84SAndre Oppermann 	/*
897d248c7d7SRobert Watson 	 * Attempt to trim the number of allocated fragment queues if it
898d248c7d7SRobert Watson 	 * exceeds the administrative limit.
899f0cada84SAndre Oppermann 	 */
900603724d3SBjoern A. Zeeb 	if ((V_nipq > V_maxnipq) && (V_maxnipq > 0)) {
901f0cada84SAndre Oppermann 		/*
902f0cada84SAndre Oppermann 		 * drop something from the tail of the current queue
903f0cada84SAndre Oppermann 		 * before proceeding further
904f0cada84SAndre Oppermann 		 */
905f0cada84SAndre Oppermann 		struct ipq *q = TAILQ_LAST(head, ipqhead);
906f0cada84SAndre Oppermann 		if (q == NULL) {   /* gak */
907f0cada84SAndre Oppermann 			for (i = 0; i < IPREASS_NHASH; i++) {
908603724d3SBjoern A. Zeeb 				struct ipq *r = TAILQ_LAST(&V_ipq[i], ipqhead);
909f0cada84SAndre Oppermann 				if (r) {
91086425c62SRobert Watson 					IPSTAT_ADD(ips_fragtimeout,
91186425c62SRobert Watson 					    r->ipq_nfrags);
912603724d3SBjoern A. Zeeb 					ip_freef(&V_ipq[i], r);
913f0cada84SAndre Oppermann 					break;
914f0cada84SAndre Oppermann 				}
915f0cada84SAndre Oppermann 			}
916f0cada84SAndre Oppermann 		} else {
91786425c62SRobert Watson 			IPSTAT_ADD(ips_fragtimeout, q->ipq_nfrags);
918f0cada84SAndre Oppermann 			ip_freef(head, q);
919f0cada84SAndre Oppermann 		}
920f0cada84SAndre Oppermann 	}
921f0cada84SAndre Oppermann 
922f0cada84SAndre Oppermann found:
923f0cada84SAndre Oppermann 	/*
924f0cada84SAndre Oppermann 	 * Adjust ip_len to not reflect header,
925f0cada84SAndre Oppermann 	 * convert offset of this to bytes.
926f0cada84SAndre Oppermann 	 */
927f0cada84SAndre Oppermann 	ip->ip_len -= hlen;
928f0cada84SAndre Oppermann 	if (ip->ip_off & IP_MF) {
929f0cada84SAndre Oppermann 		/*
930f0cada84SAndre Oppermann 		 * Make sure that fragments have a data length
931f0cada84SAndre Oppermann 		 * that's a non-zero multiple of 8 bytes.
932f0cada84SAndre Oppermann 		 */
933f0cada84SAndre Oppermann 		if (ip->ip_len == 0 || (ip->ip_len & 0x7) != 0) {
93486425c62SRobert Watson 			IPSTAT_INC(ips_toosmall); /* XXX */
935f0cada84SAndre Oppermann 			goto dropfrag;
936f0cada84SAndre Oppermann 		}
937f0cada84SAndre Oppermann 		m->m_flags |= M_FRAG;
938f0cada84SAndre Oppermann 	} else
939f0cada84SAndre Oppermann 		m->m_flags &= ~M_FRAG;
940f0cada84SAndre Oppermann 	ip->ip_off <<= 3;
941f0cada84SAndre Oppermann 
942f0cada84SAndre Oppermann 
943f0cada84SAndre Oppermann 	/*
944f0cada84SAndre Oppermann 	 * Attempt reassembly; if it succeeds, proceed.
945f0cada84SAndre Oppermann 	 * ip_reass() will return a different mbuf.
946f0cada84SAndre Oppermann 	 */
94786425c62SRobert Watson 	IPSTAT_INC(ips_fragments);
948f0cada84SAndre Oppermann 	m->m_pkthdr.header = ip;
949f0cada84SAndre Oppermann 
950f0cada84SAndre Oppermann 	/* Previous ip_reass() started here. */
951df8bae1dSRodney W. Grimes 	/*
952df8bae1dSRodney W. Grimes 	 * Presence of header sizes in mbufs
953df8bae1dSRodney W. Grimes 	 * would confuse code below.
954df8bae1dSRodney W. Grimes 	 */
955df8bae1dSRodney W. Grimes 	m->m_data += hlen;
956df8bae1dSRodney W. Grimes 	m->m_len -= hlen;
957df8bae1dSRodney W. Grimes 
958df8bae1dSRodney W. Grimes 	/*
959df8bae1dSRodney W. Grimes 	 * If first fragment to arrive, create a reassembly queue.
960df8bae1dSRodney W. Grimes 	 */
961042bbfa3SRobert Watson 	if (fp == NULL) {
962603724d3SBjoern A. Zeeb 		fp = uma_zalloc(V_ipq_zone, M_NOWAIT);
963d248c7d7SRobert Watson 		if (fp == NULL)
964df8bae1dSRodney W. Grimes 			goto dropfrag;
96536b0360bSRobert Watson #ifdef MAC
96630d239bcSRobert Watson 		if (mac_ipq_init(fp, M_NOWAIT) != 0) {
967603724d3SBjoern A. Zeeb 			uma_zfree(V_ipq_zone, fp);
9681d7d0bfeSPawel Jakub Dawidek 			fp = NULL;
9695e7ce478SRobert Watson 			goto dropfrag;
9705e7ce478SRobert Watson 		}
97130d239bcSRobert Watson 		mac_ipq_create(m, fp);
97236b0360bSRobert Watson #endif
973462b86feSPoul-Henning Kamp 		TAILQ_INSERT_HEAD(head, fp, ipq_list);
974603724d3SBjoern A. Zeeb 		V_nipq++;
975375386e2SMike Silbersack 		fp->ipq_nfrags = 1;
976df8bae1dSRodney W. Grimes 		fp->ipq_ttl = IPFRAGTTL;
977df8bae1dSRodney W. Grimes 		fp->ipq_p = ip->ip_p;
978df8bae1dSRodney W. Grimes 		fp->ipq_id = ip->ip_id;
9796effc713SDoug Rabson 		fp->ipq_src = ip->ip_src;
9806effc713SDoug Rabson 		fp->ipq_dst = ip->ip_dst;
981af38c68cSLuigi Rizzo 		fp->ipq_frags = m;
982af38c68cSLuigi Rizzo 		m->m_nextpkt = NULL;
983800af1fbSMaxim Konovalov 		goto done;
98436b0360bSRobert Watson 	} else {
985375386e2SMike Silbersack 		fp->ipq_nfrags++;
98636b0360bSRobert Watson #ifdef MAC
98730d239bcSRobert Watson 		mac_ipq_update(m, fp);
98836b0360bSRobert Watson #endif
989df8bae1dSRodney W. Grimes 	}
990df8bae1dSRodney W. Grimes 
9916effc713SDoug Rabson #define GETIP(m)	((struct ip*)((m)->m_pkthdr.header))
9926effc713SDoug Rabson 
993df8bae1dSRodney W. Grimes 	/*
99459dfcba4SHajimu UMEMOTO 	 * Handle ECN by comparing this segment with the first one;
99559dfcba4SHajimu UMEMOTO 	 * if CE is set, do not lose CE.
99659dfcba4SHajimu UMEMOTO 	 * drop if CE and not-ECT are mixed for the same packet.
99759dfcba4SHajimu UMEMOTO 	 */
99859dfcba4SHajimu UMEMOTO 	ecn = ip->ip_tos & IPTOS_ECN_MASK;
99959dfcba4SHajimu UMEMOTO 	ecn0 = GETIP(fp->ipq_frags)->ip_tos & IPTOS_ECN_MASK;
100059dfcba4SHajimu UMEMOTO 	if (ecn == IPTOS_ECN_CE) {
100159dfcba4SHajimu UMEMOTO 		if (ecn0 == IPTOS_ECN_NOTECT)
100259dfcba4SHajimu UMEMOTO 			goto dropfrag;
100359dfcba4SHajimu UMEMOTO 		if (ecn0 != IPTOS_ECN_CE)
100459dfcba4SHajimu UMEMOTO 			GETIP(fp->ipq_frags)->ip_tos |= IPTOS_ECN_CE;
100559dfcba4SHajimu UMEMOTO 	}
100659dfcba4SHajimu UMEMOTO 	if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT)
100759dfcba4SHajimu UMEMOTO 		goto dropfrag;
100859dfcba4SHajimu UMEMOTO 
100959dfcba4SHajimu UMEMOTO 	/*
1010df8bae1dSRodney W. Grimes 	 * Find a segment which begins after this one does.
1011df8bae1dSRodney W. Grimes 	 */
10126effc713SDoug Rabson 	for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt)
10136effc713SDoug Rabson 		if (GETIP(q)->ip_off > ip->ip_off)
1014df8bae1dSRodney W. Grimes 			break;
1015df8bae1dSRodney W. Grimes 
1016df8bae1dSRodney W. Grimes 	/*
1017df8bae1dSRodney W. Grimes 	 * If there is a preceding segment, it may provide some of
1018df8bae1dSRodney W. Grimes 	 * our data already.  If so, drop the data from the incoming
1019af38c68cSLuigi Rizzo 	 * segment.  If it provides all of our data, drop us, otherwise
1020af38c68cSLuigi Rizzo 	 * stick new segment in the proper place.
1021db4f9cc7SJonathan Lemon 	 *
1022db4f9cc7SJonathan Lemon 	 * If some of the data is dropped from the the preceding
1023db4f9cc7SJonathan Lemon 	 * segment, then it's checksum is invalidated.
1024df8bae1dSRodney W. Grimes 	 */
10256effc713SDoug Rabson 	if (p) {
10266effc713SDoug Rabson 		i = GETIP(p)->ip_off + GETIP(p)->ip_len - ip->ip_off;
1027df8bae1dSRodney W. Grimes 		if (i > 0) {
1028df8bae1dSRodney W. Grimes 			if (i >= ip->ip_len)
1029df8bae1dSRodney W. Grimes 				goto dropfrag;
10306a800098SYoshinobu Inoue 			m_adj(m, i);
1031db4f9cc7SJonathan Lemon 			m->m_pkthdr.csum_flags = 0;
1032df8bae1dSRodney W. Grimes 			ip->ip_off += i;
1033df8bae1dSRodney W. Grimes 			ip->ip_len -= i;
1034df8bae1dSRodney W. Grimes 		}
1035af38c68cSLuigi Rizzo 		m->m_nextpkt = p->m_nextpkt;
1036af38c68cSLuigi Rizzo 		p->m_nextpkt = m;
1037af38c68cSLuigi Rizzo 	} else {
1038af38c68cSLuigi Rizzo 		m->m_nextpkt = fp->ipq_frags;
1039af38c68cSLuigi Rizzo 		fp->ipq_frags = m;
1040df8bae1dSRodney W. Grimes 	}
1041df8bae1dSRodney W. Grimes 
1042df8bae1dSRodney W. Grimes 	/*
1043df8bae1dSRodney W. Grimes 	 * While we overlap succeeding segments trim them or,
1044df8bae1dSRodney W. Grimes 	 * if they are completely covered, dequeue them.
1045df8bae1dSRodney W. Grimes 	 */
10466effc713SDoug Rabson 	for (; q != NULL && ip->ip_off + ip->ip_len > GETIP(q)->ip_off;
1047af38c68cSLuigi Rizzo 	     q = nq) {
1048b36f5b37SMaxim Konovalov 		i = (ip->ip_off + ip->ip_len) - GETIP(q)->ip_off;
10496effc713SDoug Rabson 		if (i < GETIP(q)->ip_len) {
10506effc713SDoug Rabson 			GETIP(q)->ip_len -= i;
10516effc713SDoug Rabson 			GETIP(q)->ip_off += i;
10526effc713SDoug Rabson 			m_adj(q, i);
1053db4f9cc7SJonathan Lemon 			q->m_pkthdr.csum_flags = 0;
1054df8bae1dSRodney W. Grimes 			break;
1055df8bae1dSRodney W. Grimes 		}
10566effc713SDoug Rabson 		nq = q->m_nextpkt;
1057af38c68cSLuigi Rizzo 		m->m_nextpkt = nq;
105886425c62SRobert Watson 		IPSTAT_INC(ips_fragdropped);
1059375386e2SMike Silbersack 		fp->ipq_nfrags--;
10606effc713SDoug Rabson 		m_freem(q);
1061df8bae1dSRodney W. Grimes 	}
1062df8bae1dSRodney W. Grimes 
1063df8bae1dSRodney W. Grimes 	/*
1064375386e2SMike Silbersack 	 * Check for complete reassembly and perform frag per packet
1065375386e2SMike Silbersack 	 * limiting.
1066375386e2SMike Silbersack 	 *
1067375386e2SMike Silbersack 	 * Frag limiting is performed here so that the nth frag has
1068375386e2SMike Silbersack 	 * a chance to complete the packet before we drop the packet.
1069375386e2SMike Silbersack 	 * As a result, n+1 frags are actually allowed per packet, but
1070375386e2SMike Silbersack 	 * only n will ever be stored. (n = maxfragsperpacket.)
1071375386e2SMike Silbersack 	 *
1072df8bae1dSRodney W. Grimes 	 */
10736effc713SDoug Rabson 	next = 0;
10746effc713SDoug Rabson 	for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt) {
1075375386e2SMike Silbersack 		if (GETIP(q)->ip_off != next) {
1076603724d3SBjoern A. Zeeb 			if (fp->ipq_nfrags > V_maxfragsperpacket) {
107786425c62SRobert Watson 				IPSTAT_ADD(ips_fragdropped, fp->ipq_nfrags);
1078375386e2SMike Silbersack 				ip_freef(head, fp);
107999e8617dSMaxim Konovalov 			}
1080f0cada84SAndre Oppermann 			goto done;
1081375386e2SMike Silbersack 		}
10826effc713SDoug Rabson 		next += GETIP(q)->ip_len;
10836effc713SDoug Rabson 	}
10846effc713SDoug Rabson 	/* Make sure the last packet didn't have the IP_MF flag */
1085375386e2SMike Silbersack 	if (p->m_flags & M_FRAG) {
1086603724d3SBjoern A. Zeeb 		if (fp->ipq_nfrags > V_maxfragsperpacket) {
108786425c62SRobert Watson 			IPSTAT_ADD(ips_fragdropped, fp->ipq_nfrags);
1088375386e2SMike Silbersack 			ip_freef(head, fp);
108999e8617dSMaxim Konovalov 		}
1090f0cada84SAndre Oppermann 		goto done;
1091375386e2SMike Silbersack 	}
1092df8bae1dSRodney W. Grimes 
1093df8bae1dSRodney W. Grimes 	/*
1094430d30d8SBill Fenner 	 * Reassembly is complete.  Make sure the packet is a sane size.
1095430d30d8SBill Fenner 	 */
10966effc713SDoug Rabson 	q = fp->ipq_frags;
10976effc713SDoug Rabson 	ip = GETIP(q);
109853be11f6SPoul-Henning Kamp 	if (next + (ip->ip_hl << 2) > IP_MAXPACKET) {
109986425c62SRobert Watson 		IPSTAT_INC(ips_toolong);
110086425c62SRobert Watson 		IPSTAT_ADD(ips_fragdropped, fp->ipq_nfrags);
1101462b86feSPoul-Henning Kamp 		ip_freef(head, fp);
1102f0cada84SAndre Oppermann 		goto done;
1103430d30d8SBill Fenner 	}
1104430d30d8SBill Fenner 
1105430d30d8SBill Fenner 	/*
1106430d30d8SBill Fenner 	 * Concatenate fragments.
1107df8bae1dSRodney W. Grimes 	 */
11086effc713SDoug Rabson 	m = q;
1109df8bae1dSRodney W. Grimes 	t = m->m_next;
111002410549SRobert Watson 	m->m_next = NULL;
1111df8bae1dSRodney W. Grimes 	m_cat(m, t);
11126effc713SDoug Rabson 	nq = q->m_nextpkt;
111302410549SRobert Watson 	q->m_nextpkt = NULL;
11146effc713SDoug Rabson 	for (q = nq; q != NULL; q = nq) {
11156effc713SDoug Rabson 		nq = q->m_nextpkt;
1116945aa40dSDoug Rabson 		q->m_nextpkt = NULL;
1117db4f9cc7SJonathan Lemon 		m->m_pkthdr.csum_flags &= q->m_pkthdr.csum_flags;
1118db4f9cc7SJonathan Lemon 		m->m_pkthdr.csum_data += q->m_pkthdr.csum_data;
1119a8db1d93SJonathan Lemon 		m_cat(m, q);
1120df8bae1dSRodney W. Grimes 	}
11216edb555dSOleg Bulyzhin 	/*
11226edb555dSOleg Bulyzhin 	 * In order to do checksumming faster we do 'end-around carry' here
11236edb555dSOleg Bulyzhin 	 * (and not in for{} loop), though it implies we are not going to
11246edb555dSOleg Bulyzhin 	 * reassemble more than 64k fragments.
11256edb555dSOleg Bulyzhin 	 */
11266edb555dSOleg Bulyzhin 	m->m_pkthdr.csum_data =
11276edb555dSOleg Bulyzhin 	    (m->m_pkthdr.csum_data & 0xffff) + (m->m_pkthdr.csum_data >> 16);
112836b0360bSRobert Watson #ifdef MAC
112930d239bcSRobert Watson 	mac_ipq_reassemble(fp, m);
113030d239bcSRobert Watson 	mac_ipq_destroy(fp);
113136b0360bSRobert Watson #endif
1132df8bae1dSRodney W. Grimes 
1133df8bae1dSRodney W. Grimes 	/*
1134f0cada84SAndre Oppermann 	 * Create header for new ip packet by modifying header of first
1135f0cada84SAndre Oppermann 	 * packet;  dequeue and discard fragment reassembly header.
1136df8bae1dSRodney W. Grimes 	 * Make header visible.
1137df8bae1dSRodney W. Grimes 	 */
1138f0cada84SAndre Oppermann 	ip->ip_len = (ip->ip_hl << 2) + next;
11396effc713SDoug Rabson 	ip->ip_src = fp->ipq_src;
11406effc713SDoug Rabson 	ip->ip_dst = fp->ipq_dst;
1141462b86feSPoul-Henning Kamp 	TAILQ_REMOVE(head, fp, ipq_list);
1142603724d3SBjoern A. Zeeb 	V_nipq--;
1143603724d3SBjoern A. Zeeb 	uma_zfree(V_ipq_zone, fp);
114453be11f6SPoul-Henning Kamp 	m->m_len += (ip->ip_hl << 2);
114553be11f6SPoul-Henning Kamp 	m->m_data -= (ip->ip_hl << 2);
1146df8bae1dSRodney W. Grimes 	/* some debugging cruft by sklower, below, will go away soon */
1147a5554bf0SPoul-Henning Kamp 	if (m->m_flags & M_PKTHDR)	/* XXX this should be done elsewhere */
1148a5554bf0SPoul-Henning Kamp 		m_fixhdr(m);
114986425c62SRobert Watson 	IPSTAT_INC(ips_reassembled);
1150f0cada84SAndre Oppermann 	IPQ_UNLOCK();
11516a800098SYoshinobu Inoue 	return (m);
1152df8bae1dSRodney W. Grimes 
1153df8bae1dSRodney W. Grimes dropfrag:
115486425c62SRobert Watson 	IPSTAT_INC(ips_fragdropped);
1155042bbfa3SRobert Watson 	if (fp != NULL)
1156375386e2SMike Silbersack 		fp->ipq_nfrags--;
1157df8bae1dSRodney W. Grimes 	m_freem(m);
1158f0cada84SAndre Oppermann done:
1159f0cada84SAndre Oppermann 	IPQ_UNLOCK();
1160f0cada84SAndre Oppermann 	return (NULL);
11616effc713SDoug Rabson 
11626effc713SDoug Rabson #undef GETIP
1163df8bae1dSRodney W. Grimes }
1164df8bae1dSRodney W. Grimes 
1165df8bae1dSRodney W. Grimes /*
1166df8bae1dSRodney W. Grimes  * Free a fragment reassembly header and all
1167df8bae1dSRodney W. Grimes  * associated datagrams.
1168df8bae1dSRodney W. Grimes  */
11690312fbe9SPoul-Henning Kamp static void
1170f2565d68SRobert Watson ip_freef(struct ipqhead *fhp, struct ipq *fp)
1171df8bae1dSRodney W. Grimes {
1172f2565d68SRobert Watson 	struct mbuf *q;
1173df8bae1dSRodney W. Grimes 
11742fad1e93SSam Leffler 	IPQ_LOCK_ASSERT();
11752fad1e93SSam Leffler 
11766effc713SDoug Rabson 	while (fp->ipq_frags) {
11776effc713SDoug Rabson 		q = fp->ipq_frags;
11786effc713SDoug Rabson 		fp->ipq_frags = q->m_nextpkt;
11796effc713SDoug Rabson 		m_freem(q);
1180df8bae1dSRodney W. Grimes 	}
1181462b86feSPoul-Henning Kamp 	TAILQ_REMOVE(fhp, fp, ipq_list);
1182603724d3SBjoern A. Zeeb 	uma_zfree(V_ipq_zone, fp);
1183603724d3SBjoern A. Zeeb 	V_nipq--;
1184df8bae1dSRodney W. Grimes }
1185df8bae1dSRodney W. Grimes 
1186df8bae1dSRodney W. Grimes /*
1187df8bae1dSRodney W. Grimes  * IP timer processing;
1188df8bae1dSRodney W. Grimes  * if a timer expires on a reassembly
1189df8bae1dSRodney W. Grimes  * queue, discard it.
1190df8bae1dSRodney W. Grimes  */
1191df8bae1dSRodney W. Grimes void
1192f2565d68SRobert Watson ip_slowtimo(void)
1193df8bae1dSRodney W. Grimes {
11948b615593SMarko Zec 	VNET_ITERATOR_DECL(vnet_iter);
1195f2565d68SRobert Watson 	struct ipq *fp;
1196194a213eSAndrey A. Chernov 	int i;
1197df8bae1dSRodney W. Grimes 
11985ee847d3SRobert Watson 	VNET_LIST_RLOCK_NOSLEEP();
11992fad1e93SSam Leffler 	IPQ_LOCK();
12008b615593SMarko Zec 	VNET_FOREACH(vnet_iter) {
12018b615593SMarko Zec 		CURVNET_SET(vnet_iter);
1202194a213eSAndrey A. Chernov 		for (i = 0; i < IPREASS_NHASH; i++) {
1203603724d3SBjoern A. Zeeb 			for(fp = TAILQ_FIRST(&V_ipq[i]); fp;) {
1204462b86feSPoul-Henning Kamp 				struct ipq *fpp;
1205462b86feSPoul-Henning Kamp 
1206462b86feSPoul-Henning Kamp 				fpp = fp;
1207462b86feSPoul-Henning Kamp 				fp = TAILQ_NEXT(fp, ipq_list);
1208462b86feSPoul-Henning Kamp 				if(--fpp->ipq_ttl == 0) {
120986425c62SRobert Watson 					IPSTAT_ADD(ips_fragtimeout,
121086425c62SRobert Watson 					    fpp->ipq_nfrags);
1211603724d3SBjoern A. Zeeb 					ip_freef(&V_ipq[i], fpp);
1212df8bae1dSRodney W. Grimes 				}
1213df8bae1dSRodney W. Grimes 			}
1214194a213eSAndrey A. Chernov 		}
1215690a6055SJesper Skriver 		/*
1216690a6055SJesper Skriver 		 * If we are over the maximum number of fragments
1217690a6055SJesper Skriver 		 * (due to the limit being lowered), drain off
1218690a6055SJesper Skriver 		 * enough to get down to the new limit.
1219690a6055SJesper Skriver 		 */
1220603724d3SBjoern A. Zeeb 		if (V_maxnipq >= 0 && V_nipq > V_maxnipq) {
1221690a6055SJesper Skriver 			for (i = 0; i < IPREASS_NHASH; i++) {
12228b615593SMarko Zec 				while (V_nipq > V_maxnipq &&
12238b615593SMarko Zec 				    !TAILQ_EMPTY(&V_ipq[i])) {
122486425c62SRobert Watson 					IPSTAT_ADD(ips_fragdropped,
122586425c62SRobert Watson 					    TAILQ_FIRST(&V_ipq[i])->ipq_nfrags);
12268b615593SMarko Zec 					ip_freef(&V_ipq[i],
12278b615593SMarko Zec 					    TAILQ_FIRST(&V_ipq[i]));
1228690a6055SJesper Skriver 				}
1229690a6055SJesper Skriver 			}
1230690a6055SJesper Skriver 		}
12318b615593SMarko Zec 		CURVNET_RESTORE();
12328b615593SMarko Zec 	}
12332fad1e93SSam Leffler 	IPQ_UNLOCK();
12345ee847d3SRobert Watson 	VNET_LIST_RUNLOCK_NOSLEEP();
1235df8bae1dSRodney W. Grimes }
1236df8bae1dSRodney W. Grimes 
1237df8bae1dSRodney W. Grimes /*
1238df8bae1dSRodney W. Grimes  * Drain off all datagram fragments.
1239df8bae1dSRodney W. Grimes  */
1240df8bae1dSRodney W. Grimes void
1241f2565d68SRobert Watson ip_drain(void)
1242df8bae1dSRodney W. Grimes {
12438b615593SMarko Zec 	VNET_ITERATOR_DECL(vnet_iter);
1244194a213eSAndrey A. Chernov 	int     i;
1245ce29ab3aSGarrett Wollman 
12465ee847d3SRobert Watson 	VNET_LIST_RLOCK_NOSLEEP();
12472fad1e93SSam Leffler 	IPQ_LOCK();
12488b615593SMarko Zec 	VNET_FOREACH(vnet_iter) {
12498b615593SMarko Zec 		CURVNET_SET(vnet_iter);
1250194a213eSAndrey A. Chernov 		for (i = 0; i < IPREASS_NHASH; i++) {
1251603724d3SBjoern A. Zeeb 			while(!TAILQ_EMPTY(&V_ipq[i])) {
125286425c62SRobert Watson 				IPSTAT_ADD(ips_fragdropped,
125386425c62SRobert Watson 				    TAILQ_FIRST(&V_ipq[i])->ipq_nfrags);
1254603724d3SBjoern A. Zeeb 				ip_freef(&V_ipq[i], TAILQ_FIRST(&V_ipq[i]));
1255194a213eSAndrey A. Chernov 			}
1256194a213eSAndrey A. Chernov 		}
12578b615593SMarko Zec 		CURVNET_RESTORE();
12588b615593SMarko Zec 	}
12592fad1e93SSam Leffler 	IPQ_UNLOCK();
12605ee847d3SRobert Watson 	VNET_LIST_RUNLOCK_NOSLEEP();
1261ce29ab3aSGarrett Wollman 	in_rtqdrain();
1262df8bae1dSRodney W. Grimes }
1263df8bae1dSRodney W. Grimes 
1264df8bae1dSRodney W. Grimes /*
1265de38924dSAndre Oppermann  * The protocol to be inserted into ip_protox[] must be already registered
1266de38924dSAndre Oppermann  * in inetsw[], either statically or through pf_proto_register().
1267de38924dSAndre Oppermann  */
1268de38924dSAndre Oppermann int
1269de38924dSAndre Oppermann ipproto_register(u_char ipproto)
1270de38924dSAndre Oppermann {
1271de38924dSAndre Oppermann 	struct protosw *pr;
1272de38924dSAndre Oppermann 
1273de38924dSAndre Oppermann 	/* Sanity checks. */
1274de38924dSAndre Oppermann 	if (ipproto == 0)
1275de38924dSAndre Oppermann 		return (EPROTONOSUPPORT);
1276de38924dSAndre Oppermann 
1277de38924dSAndre Oppermann 	/*
1278de38924dSAndre Oppermann 	 * The protocol slot must not be occupied by another protocol
1279de38924dSAndre Oppermann 	 * already.  An index pointing to IPPROTO_RAW is unused.
1280de38924dSAndre Oppermann 	 */
1281de38924dSAndre Oppermann 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
1282de38924dSAndre Oppermann 	if (pr == NULL)
1283de38924dSAndre Oppermann 		return (EPFNOSUPPORT);
1284de38924dSAndre Oppermann 	if (ip_protox[ipproto] != pr - inetsw)	/* IPPROTO_RAW */
1285de38924dSAndre Oppermann 		return (EEXIST);
1286de38924dSAndre Oppermann 
1287de38924dSAndre Oppermann 	/* Find the protocol position in inetsw[] and set the index. */
1288de38924dSAndre Oppermann 	for (pr = inetdomain.dom_protosw;
1289de38924dSAndre Oppermann 	     pr < inetdomain.dom_protoswNPROTOSW; pr++) {
1290de38924dSAndre Oppermann 		if (pr->pr_domain->dom_family == PF_INET &&
1291de38924dSAndre Oppermann 		    pr->pr_protocol && pr->pr_protocol == ipproto) {
1292de38924dSAndre Oppermann 			/* Be careful to only index valid IP protocols. */
1293db77984cSSam Leffler 			if (pr->pr_protocol < IPPROTO_MAX) {
1294de38924dSAndre Oppermann 				ip_protox[pr->pr_protocol] = pr - inetsw;
1295de38924dSAndre Oppermann 				return (0);
1296de38924dSAndre Oppermann 			} else
1297de38924dSAndre Oppermann 				return (EINVAL);
1298de38924dSAndre Oppermann 		}
1299de38924dSAndre Oppermann 	}
1300de38924dSAndre Oppermann 	return (EPROTONOSUPPORT);
1301de38924dSAndre Oppermann }
1302de38924dSAndre Oppermann 
1303de38924dSAndre Oppermann int
1304de38924dSAndre Oppermann ipproto_unregister(u_char ipproto)
1305de38924dSAndre Oppermann {
1306de38924dSAndre Oppermann 	struct protosw *pr;
1307de38924dSAndre Oppermann 
1308de38924dSAndre Oppermann 	/* Sanity checks. */
1309de38924dSAndre Oppermann 	if (ipproto == 0)
1310de38924dSAndre Oppermann 		return (EPROTONOSUPPORT);
1311de38924dSAndre Oppermann 
1312de38924dSAndre Oppermann 	/* Check if the protocol was indeed registered. */
1313de38924dSAndre Oppermann 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
1314de38924dSAndre Oppermann 	if (pr == NULL)
1315de38924dSAndre Oppermann 		return (EPFNOSUPPORT);
1316de38924dSAndre Oppermann 	if (ip_protox[ipproto] == pr - inetsw)  /* IPPROTO_RAW */
1317de38924dSAndre Oppermann 		return (ENOENT);
1318de38924dSAndre Oppermann 
1319de38924dSAndre Oppermann 	/* Reset the protocol slot to IPPROTO_RAW. */
1320de38924dSAndre Oppermann 	ip_protox[ipproto] = pr - inetsw;
1321de38924dSAndre Oppermann 	return (0);
1322de38924dSAndre Oppermann }
1323de38924dSAndre Oppermann 
1324df8bae1dSRodney W. Grimes /*
13258c0fec80SRobert Watson  * Given address of next destination (final or next hop), return (referenced)
13268c0fec80SRobert Watson  * internet address info of interface to be used to get there.
1327df8bae1dSRodney W. Grimes  */
1328bd714208SRuslan Ermilov struct in_ifaddr *
13298b07e49aSJulian Elischer ip_rtaddr(struct in_addr dst, u_int fibnum)
1330df8bae1dSRodney W. Grimes {
133197d8d152SAndre Oppermann 	struct route sro;
133202c1c707SAndre Oppermann 	struct sockaddr_in *sin;
133319e5b0a7SRobert Watson 	struct in_ifaddr *ia;
1334df8bae1dSRodney W. Grimes 
13350cfbbe3bSAndre Oppermann 	bzero(&sro, sizeof(sro));
133697d8d152SAndre Oppermann 	sin = (struct sockaddr_in *)&sro.ro_dst;
1337df8bae1dSRodney W. Grimes 	sin->sin_family = AF_INET;
1338df8bae1dSRodney W. Grimes 	sin->sin_len = sizeof(*sin);
1339df8bae1dSRodney W. Grimes 	sin->sin_addr = dst;
13406e6b3f7cSQing Li 	in_rtalloc_ign(&sro, 0, fibnum);
1341df8bae1dSRodney W. Grimes 
134297d8d152SAndre Oppermann 	if (sro.ro_rt == NULL)
134302410549SRobert Watson 		return (NULL);
134402c1c707SAndre Oppermann 
134519e5b0a7SRobert Watson 	ia = ifatoia(sro.ro_rt->rt_ifa);
134619e5b0a7SRobert Watson 	ifa_ref(&ia->ia_ifa);
134797d8d152SAndre Oppermann 	RTFREE(sro.ro_rt);
134819e5b0a7SRobert Watson 	return (ia);
1349df8bae1dSRodney W. Grimes }
1350df8bae1dSRodney W. Grimes 
1351df8bae1dSRodney W. Grimes u_char inetctlerrmap[PRC_NCMDS] = {
1352df8bae1dSRodney W. Grimes 	0,		0,		0,		0,
1353df8bae1dSRodney W. Grimes 	0,		EMSGSIZE,	EHOSTDOWN,	EHOSTUNREACH,
1354df8bae1dSRodney W. Grimes 	EHOSTUNREACH,	EHOSTUNREACH,	ECONNREFUSED,	ECONNREFUSED,
1355df8bae1dSRodney W. Grimes 	EMSGSIZE,	EHOSTUNREACH,	0,		0,
1356fcaf9f91SMike Silbersack 	0,		0,		EHOSTUNREACH,	0,
13573b8123b7SJesper Skriver 	ENOPROTOOPT,	ECONNREFUSED
1358df8bae1dSRodney W. Grimes };
1359df8bae1dSRodney W. Grimes 
1360df8bae1dSRodney W. Grimes /*
1361df8bae1dSRodney W. Grimes  * Forward a packet.  If some error occurs return the sender
1362df8bae1dSRodney W. Grimes  * an icmp packet.  Note we can't always generate a meaningful
1363df8bae1dSRodney W. Grimes  * icmp message because icmp doesn't have a large enough repertoire
1364df8bae1dSRodney W. Grimes  * of codes and types.
1365df8bae1dSRodney W. Grimes  *
1366df8bae1dSRodney W. Grimes  * If not forwarding, just drop the packet.  This could be confusing
1367df8bae1dSRodney W. Grimes  * if ipforwarding was zero but some routing protocol was advancing
1368df8bae1dSRodney W. Grimes  * us as a gateway to somewhere.  However, we must let the routing
1369df8bae1dSRodney W. Grimes  * protocol deal with that.
1370df8bae1dSRodney W. Grimes  *
1371df8bae1dSRodney W. Grimes  * The srcrt parameter indicates whether the packet is being forwarded
1372df8bae1dSRodney W. Grimes  * via a source route.
1373df8bae1dSRodney W. Grimes  */
13749b932e9eSAndre Oppermann void
13759b932e9eSAndre Oppermann ip_forward(struct mbuf *m, int srcrt)
1376df8bae1dSRodney W. Grimes {
13772b25acc1SLuigi Rizzo 	struct ip *ip = mtod(m, struct ip *);
1378efbad259SEdward Tomasz Napierala 	struct in_ifaddr *ia;
1379df8bae1dSRodney W. Grimes 	struct mbuf *mcopy;
13809b932e9eSAndre Oppermann 	struct in_addr dest;
1381b835b6feSBjoern A. Zeeb 	struct route ro;
1382c773494eSAndre Oppermann 	int error, type = 0, code = 0, mtu = 0;
13833efc3014SJulian Elischer 
13849b932e9eSAndre Oppermann 	if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) {
138586425c62SRobert Watson 		IPSTAT_INC(ips_cantforward);
1386df8bae1dSRodney W. Grimes 		m_freem(m);
1387df8bae1dSRodney W. Grimes 		return;
1388df8bae1dSRodney W. Grimes 	}
13891b968362SDag-Erling Smørgrav #ifdef IPSTEALTH
1390603724d3SBjoern A. Zeeb 	if (!V_ipstealth) {
13911b968362SDag-Erling Smørgrav #endif
1392df8bae1dSRodney W. Grimes 		if (ip->ip_ttl <= IPTTLDEC) {
13931b968362SDag-Erling Smørgrav 			icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS,
139402c1c707SAndre Oppermann 			    0, 0);
1395df8bae1dSRodney W. Grimes 			return;
1396df8bae1dSRodney W. Grimes 		}
13971b968362SDag-Erling Smørgrav #ifdef IPSTEALTH
13981b968362SDag-Erling Smørgrav 	}
13991b968362SDag-Erling Smørgrav #endif
1400df8bae1dSRodney W. Grimes 
14018b07e49aSJulian Elischer 	ia = ip_rtaddr(ip->ip_dst, M_GETFIB(m));
1402efbad259SEdward Tomasz Napierala #ifndef IPSEC
1403efbad259SEdward Tomasz Napierala 	/*
1404efbad259SEdward Tomasz Napierala 	 * 'ia' may be NULL if there is no route for this destination.
1405efbad259SEdward Tomasz Napierala 	 * In case of IPsec, Don't discard it just yet, but pass it to
1406efbad259SEdward Tomasz Napierala 	 * ip_output in case of outgoing IPsec policy.
1407efbad259SEdward Tomasz Napierala 	 */
1408d23d475fSGuido van Rooij 	if (!srcrt && ia == NULL) {
140902c1c707SAndre Oppermann 		icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0);
1410df8bae1dSRodney W. Grimes 		return;
141102c1c707SAndre Oppermann 	}
1412efbad259SEdward Tomasz Napierala #endif
1413df8bae1dSRodney W. Grimes 
1414df8bae1dSRodney W. Grimes 	/*
1415bfef7ed4SIan Dowse 	 * Save the IP header and at most 8 bytes of the payload,
1416bfef7ed4SIan Dowse 	 * in case we need to generate an ICMP message to the src.
1417bfef7ed4SIan Dowse 	 *
14184d2e3692SLuigi Rizzo 	 * XXX this can be optimized a lot by saving the data in a local
14194d2e3692SLuigi Rizzo 	 * buffer on the stack (72 bytes at most), and only allocating the
14204d2e3692SLuigi Rizzo 	 * mbuf if really necessary. The vast majority of the packets
14214d2e3692SLuigi Rizzo 	 * are forwarded without having to send an ICMP back (either
14224d2e3692SLuigi Rizzo 	 * because unnecessary, or because rate limited), so we are
14234d2e3692SLuigi Rizzo 	 * really we are wasting a lot of work here.
14244d2e3692SLuigi Rizzo 	 *
1425bfef7ed4SIan Dowse 	 * We don't use m_copy() because it might return a reference
1426bfef7ed4SIan Dowse 	 * to a shared cluster. Both this function and ip_output()
1427bfef7ed4SIan Dowse 	 * assume exclusive access to the IP header in `m', so any
1428bfef7ed4SIan Dowse 	 * data in a cluster may change before we reach icmp_error().
1429df8bae1dSRodney W. Grimes 	 */
1430780b2f69SAndre Oppermann 	MGETHDR(mcopy, M_DONTWAIT, m->m_type);
1431a163d034SWarner Losh 	if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_DONTWAIT)) {
14329967cafcSSam Leffler 		/*
14339967cafcSSam Leffler 		 * It's probably ok if the pkthdr dup fails (because
14349967cafcSSam Leffler 		 * the deep copy of the tag chain failed), but for now
14359967cafcSSam Leffler 		 * be conservative and just discard the copy since
14369967cafcSSam Leffler 		 * code below may some day want the tags.
14379967cafcSSam Leffler 		 */
14389967cafcSSam Leffler 		m_free(mcopy);
14399967cafcSSam Leffler 		mcopy = NULL;
14409967cafcSSam Leffler 	}
1441bfef7ed4SIan Dowse 	if (mcopy != NULL) {
1442780b2f69SAndre Oppermann 		mcopy->m_len = min(ip->ip_len, M_TRAILINGSPACE(mcopy));
1443e6b0a570SBruce M Simpson 		mcopy->m_pkthdr.len = mcopy->m_len;
1444bfef7ed4SIan Dowse 		m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t));
1445bfef7ed4SIan Dowse 	}
144604287599SRuslan Ermilov 
144704287599SRuslan Ermilov #ifdef IPSTEALTH
1448603724d3SBjoern A. Zeeb 	if (!V_ipstealth) {
144904287599SRuslan Ermilov #endif
145004287599SRuslan Ermilov 		ip->ip_ttl -= IPTTLDEC;
145104287599SRuslan Ermilov #ifdef IPSTEALTH
145204287599SRuslan Ermilov 	}
145304287599SRuslan Ermilov #endif
1454df8bae1dSRodney W. Grimes 
1455df8bae1dSRodney W. Grimes 	/*
1456df8bae1dSRodney W. Grimes 	 * If forwarding packet using same interface that it came in on,
1457df8bae1dSRodney W. Grimes 	 * perhaps should send a redirect to sender to shortcut a hop.
1458df8bae1dSRodney W. Grimes 	 * Only send redirect if source is sending directly to us,
1459df8bae1dSRodney W. Grimes 	 * and if packet was not source routed (or has any options).
1460df8bae1dSRodney W. Grimes 	 * Also, don't send redirect if forwarding using a default route
1461df8bae1dSRodney W. Grimes 	 * or a route modified by a redirect.
1462df8bae1dSRodney W. Grimes 	 */
14639b932e9eSAndre Oppermann 	dest.s_addr = 0;
1464efbad259SEdward Tomasz Napierala 	if (!srcrt && V_ipsendredirects &&
1465efbad259SEdward Tomasz Napierala 	    ia != NULL && ia->ia_ifp == m->m_pkthdr.rcvif) {
146602c1c707SAndre Oppermann 		struct sockaddr_in *sin;
146702c1c707SAndre Oppermann 		struct rtentry *rt;
146802c1c707SAndre Oppermann 
14690cfbbe3bSAndre Oppermann 		bzero(&ro, sizeof(ro));
147002c1c707SAndre Oppermann 		sin = (struct sockaddr_in *)&ro.ro_dst;
147102c1c707SAndre Oppermann 		sin->sin_family = AF_INET;
147202c1c707SAndre Oppermann 		sin->sin_len = sizeof(*sin);
14739b932e9eSAndre Oppermann 		sin->sin_addr = ip->ip_dst;
14746e6b3f7cSQing Li 		in_rtalloc_ign(&ro, 0, M_GETFIB(m));
147502c1c707SAndre Oppermann 
147602c1c707SAndre Oppermann 		rt = ro.ro_rt;
147702c1c707SAndre Oppermann 
147802c1c707SAndre Oppermann 		if (rt && (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
14799b932e9eSAndre Oppermann 		    satosin(rt_key(rt))->sin_addr.s_addr != 0) {
1480df8bae1dSRodney W. Grimes #define	RTA(rt)	((struct in_ifaddr *)(rt->rt_ifa))
1481df8bae1dSRodney W. Grimes 			u_long src = ntohl(ip->ip_src.s_addr);
1482df8bae1dSRodney W. Grimes 
1483df8bae1dSRodney W. Grimes 			if (RTA(rt) &&
1484df8bae1dSRodney W. Grimes 			    (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) {
1485df8bae1dSRodney W. Grimes 				if (rt->rt_flags & RTF_GATEWAY)
14869b932e9eSAndre Oppermann 					dest.s_addr = satosin(rt->rt_gateway)->sin_addr.s_addr;
1487df8bae1dSRodney W. Grimes 				else
14889b932e9eSAndre Oppermann 					dest.s_addr = ip->ip_dst.s_addr;
1489df8bae1dSRodney W. Grimes 				/* Router requirements says to only send host redirects */
1490df8bae1dSRodney W. Grimes 				type = ICMP_REDIRECT;
1491df8bae1dSRodney W. Grimes 				code = ICMP_REDIRECT_HOST;
1492df8bae1dSRodney W. Grimes 			}
1493df8bae1dSRodney W. Grimes 		}
149402c1c707SAndre Oppermann 		if (rt)
149502c1c707SAndre Oppermann 			RTFREE(rt);
149602c1c707SAndre Oppermann 	}
1497df8bae1dSRodney W. Grimes 
1498b835b6feSBjoern A. Zeeb 	/*
1499b835b6feSBjoern A. Zeeb 	 * Try to cache the route MTU from ip_output so we can consider it for
1500b835b6feSBjoern A. Zeeb 	 * the ICMP_UNREACH_NEEDFRAG "Next-Hop MTU" field described in RFC1191.
1501b835b6feSBjoern A. Zeeb 	 */
1502b835b6feSBjoern A. Zeeb 	bzero(&ro, sizeof(ro));
1503b835b6feSBjoern A. Zeeb 
1504b835b6feSBjoern A. Zeeb 	error = ip_output(m, NULL, &ro, IP_FORWARDING, NULL, NULL);
1505b835b6feSBjoern A. Zeeb 
1506b835b6feSBjoern A. Zeeb 	if (error == EMSGSIZE && ro.ro_rt)
1507b835b6feSBjoern A. Zeeb 		mtu = ro.ro_rt->rt_rmx.rmx_mtu;
1508b835b6feSBjoern A. Zeeb 	if (ro.ro_rt)
1509b835b6feSBjoern A. Zeeb 		RTFREE(ro.ro_rt);
1510b835b6feSBjoern A. Zeeb 
1511df8bae1dSRodney W. Grimes 	if (error)
151286425c62SRobert Watson 		IPSTAT_INC(ips_cantforward);
1513df8bae1dSRodney W. Grimes 	else {
151486425c62SRobert Watson 		IPSTAT_INC(ips_forward);
1515df8bae1dSRodney W. Grimes 		if (type)
151686425c62SRobert Watson 			IPSTAT_INC(ips_redirectsent);
1517df8bae1dSRodney W. Grimes 		else {
15189188b4a1SAndre Oppermann 			if (mcopy)
1519df8bae1dSRodney W. Grimes 				m_freem(mcopy);
15208c0fec80SRobert Watson 			if (ia != NULL)
15218c0fec80SRobert Watson 				ifa_free(&ia->ia_ifa);
1522df8bae1dSRodney W. Grimes 			return;
1523df8bae1dSRodney W. Grimes 		}
1524df8bae1dSRodney W. Grimes 	}
15258c0fec80SRobert Watson 	if (mcopy == NULL) {
15268c0fec80SRobert Watson 		if (ia != NULL)
15278c0fec80SRobert Watson 			ifa_free(&ia->ia_ifa);
1528df8bae1dSRodney W. Grimes 		return;
15298c0fec80SRobert Watson 	}
1530df8bae1dSRodney W. Grimes 
1531df8bae1dSRodney W. Grimes 	switch (error) {
1532df8bae1dSRodney W. Grimes 
1533df8bae1dSRodney W. Grimes 	case 0:				/* forwarded, but need redirect */
1534df8bae1dSRodney W. Grimes 		/* type, code set above */
1535df8bae1dSRodney W. Grimes 		break;
1536df8bae1dSRodney W. Grimes 
1537efbad259SEdward Tomasz Napierala 	case ENETUNREACH:
1538df8bae1dSRodney W. Grimes 	case EHOSTUNREACH:
1539df8bae1dSRodney W. Grimes 	case ENETDOWN:
1540df8bae1dSRodney W. Grimes 	case EHOSTDOWN:
1541df8bae1dSRodney W. Grimes 	default:
1542df8bae1dSRodney W. Grimes 		type = ICMP_UNREACH;
1543df8bae1dSRodney W. Grimes 		code = ICMP_UNREACH_HOST;
1544df8bae1dSRodney W. Grimes 		break;
1545df8bae1dSRodney W. Grimes 
1546df8bae1dSRodney W. Grimes 	case EMSGSIZE:
1547df8bae1dSRodney W. Grimes 		type = ICMP_UNREACH;
1548df8bae1dSRodney W. Grimes 		code = ICMP_UNREACH_NEEDFRAG;
15491dfcf0d2SAndre Oppermann 
1550b2630c29SGeorge V. Neville-Neil #ifdef IPSEC
1551b835b6feSBjoern A. Zeeb 		/*
1552b835b6feSBjoern A. Zeeb 		 * If IPsec is configured for this path,
1553b835b6feSBjoern A. Zeeb 		 * override any possibly mtu value set by ip_output.
1554b835b6feSBjoern A. Zeeb 		 */
1555b835b6feSBjoern A. Zeeb 		mtu = ip_ipsec_mtu(m, mtu);
1556b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */
15579b932e9eSAndre Oppermann 		/*
1558b835b6feSBjoern A. Zeeb 		 * If the MTU was set before make sure we are below the
1559b835b6feSBjoern A. Zeeb 		 * interface MTU.
1560ab48768bSAndre Oppermann 		 * If the MTU wasn't set before use the interface mtu or
1561ab48768bSAndre Oppermann 		 * fall back to the next smaller mtu step compared to the
1562ab48768bSAndre Oppermann 		 * current packet size.
15639b932e9eSAndre Oppermann 		 */
1564b835b6feSBjoern A. Zeeb 		if (mtu != 0) {
1565b835b6feSBjoern A. Zeeb 			if (ia != NULL)
1566b835b6feSBjoern A. Zeeb 				mtu = min(mtu, ia->ia_ifp->if_mtu);
1567b835b6feSBjoern A. Zeeb 		} else {
1568ab48768bSAndre Oppermann 			if (ia != NULL)
1569c773494eSAndre Oppermann 				mtu = ia->ia_ifp->if_mtu;
1570ab48768bSAndre Oppermann 			else
1571ab48768bSAndre Oppermann 				mtu = ip_next_mtu(ip->ip_len, 0);
1572ab48768bSAndre Oppermann 		}
157386425c62SRobert Watson 		IPSTAT_INC(ips_cantfrag);
1574df8bae1dSRodney W. Grimes 		break;
1575df8bae1dSRodney W. Grimes 
1576df8bae1dSRodney W. Grimes 	case ENOBUFS:
1577df285b3dSMike Silbersack 		/*
1578df285b3dSMike Silbersack 		 * A router should not generate ICMP_SOURCEQUENCH as
1579df285b3dSMike Silbersack 		 * required in RFC1812 Requirements for IP Version 4 Routers.
1580df285b3dSMike Silbersack 		 * Source quench could be a big problem under DoS attacks,
1581df285b3dSMike Silbersack 		 * or if the underlying interface is rate-limited.
1582df285b3dSMike Silbersack 		 * Those who need source quench packets may re-enable them
1583df285b3dSMike Silbersack 		 * via the net.inet.ip.sendsourcequench sysctl.
1584df285b3dSMike Silbersack 		 */
1585603724d3SBjoern A. Zeeb 		if (V_ip_sendsourcequench == 0) {
1586df285b3dSMike Silbersack 			m_freem(mcopy);
15878c0fec80SRobert Watson 			if (ia != NULL)
15888c0fec80SRobert Watson 				ifa_free(&ia->ia_ifa);
1589df285b3dSMike Silbersack 			return;
1590df285b3dSMike Silbersack 		} else {
1591df8bae1dSRodney W. Grimes 			type = ICMP_SOURCEQUENCH;
1592df8bae1dSRodney W. Grimes 			code = 0;
1593df285b3dSMike Silbersack 		}
1594df8bae1dSRodney W. Grimes 		break;
15953a06e3e0SRuslan Ermilov 
15963a06e3e0SRuslan Ermilov 	case EACCES:			/* ipfw denied packet */
15973a06e3e0SRuslan Ermilov 		m_freem(mcopy);
15988c0fec80SRobert Watson 		if (ia != NULL)
15998c0fec80SRobert Watson 			ifa_free(&ia->ia_ifa);
16003a06e3e0SRuslan Ermilov 		return;
1601df8bae1dSRodney W. Grimes 	}
16028c0fec80SRobert Watson 	if (ia != NULL)
16038c0fec80SRobert Watson 		ifa_free(&ia->ia_ifa);
1604c773494eSAndre Oppermann 	icmp_error(mcopy, type, code, dest.s_addr, mtu);
1605df8bae1dSRodney W. Grimes }
1606df8bae1dSRodney W. Grimes 
160782c23ebaSBill Fenner void
1608f2565d68SRobert Watson ip_savecontrol(struct inpcb *inp, struct mbuf **mp, struct ip *ip,
1609f2565d68SRobert Watson     struct mbuf *m)
161082c23ebaSBill Fenner {
16118b615593SMarko Zec 
1612be8a62e8SPoul-Henning Kamp 	if (inp->inp_socket->so_options & (SO_BINTIME | SO_TIMESTAMP)) {
1613be8a62e8SPoul-Henning Kamp 		struct bintime bt;
1614be8a62e8SPoul-Henning Kamp 
1615be8a62e8SPoul-Henning Kamp 		bintime(&bt);
1616be8a62e8SPoul-Henning Kamp 		if (inp->inp_socket->so_options & SO_BINTIME) {
1617be8a62e8SPoul-Henning Kamp 			*mp = sbcreatecontrol((caddr_t) &bt, sizeof(bt),
1618be8a62e8SPoul-Henning Kamp 			SCM_BINTIME, SOL_SOCKET);
1619be8a62e8SPoul-Henning Kamp 			if (*mp)
1620be8a62e8SPoul-Henning Kamp 				mp = &(*mp)->m_next;
1621be8a62e8SPoul-Henning Kamp 		}
162282c23ebaSBill Fenner 		if (inp->inp_socket->so_options & SO_TIMESTAMP) {
162382c23ebaSBill Fenner 			struct timeval tv;
162482c23ebaSBill Fenner 
1625be8a62e8SPoul-Henning Kamp 			bintime2timeval(&bt, &tv);
162682c23ebaSBill Fenner 			*mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv),
162782c23ebaSBill Fenner 				SCM_TIMESTAMP, SOL_SOCKET);
162882c23ebaSBill Fenner 			if (*mp)
162982c23ebaSBill Fenner 				mp = &(*mp)->m_next;
16304cc20ab1SSeigo Tanimura 		}
1631be8a62e8SPoul-Henning Kamp 	}
163282c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVDSTADDR) {
163382c23ebaSBill Fenner 		*mp = sbcreatecontrol((caddr_t) &ip->ip_dst,
163482c23ebaSBill Fenner 		    sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP);
163582c23ebaSBill Fenner 		if (*mp)
163682c23ebaSBill Fenner 			mp = &(*mp)->m_next;
163782c23ebaSBill Fenner 	}
16384957466bSMatthew N. Dodd 	if (inp->inp_flags & INP_RECVTTL) {
16394957466bSMatthew N. Dodd 		*mp = sbcreatecontrol((caddr_t) &ip->ip_ttl,
16404957466bSMatthew N. Dodd 		    sizeof(u_char), IP_RECVTTL, IPPROTO_IP);
16414957466bSMatthew N. Dodd 		if (*mp)
16424957466bSMatthew N. Dodd 			mp = &(*mp)->m_next;
16434957466bSMatthew N. Dodd 	}
164482c23ebaSBill Fenner #ifdef notyet
164582c23ebaSBill Fenner 	/* XXX
164682c23ebaSBill Fenner 	 * Moving these out of udp_input() made them even more broken
164782c23ebaSBill Fenner 	 * than they already were.
164882c23ebaSBill Fenner 	 */
164982c23ebaSBill Fenner 	/* options were tossed already */
165082c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVOPTS) {
165182c23ebaSBill Fenner 		*mp = sbcreatecontrol((caddr_t) opts_deleted_above,
165282c23ebaSBill Fenner 		    sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP);
165382c23ebaSBill Fenner 		if (*mp)
165482c23ebaSBill Fenner 			mp = &(*mp)->m_next;
165582c23ebaSBill Fenner 	}
165682c23ebaSBill Fenner 	/* ip_srcroute doesn't do what we want here, need to fix */
165782c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVRETOPTS) {
1658e0982661SAndre Oppermann 		*mp = sbcreatecontrol((caddr_t) ip_srcroute(m),
165982c23ebaSBill Fenner 		    sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP);
166082c23ebaSBill Fenner 		if (*mp)
166182c23ebaSBill Fenner 			mp = &(*mp)->m_next;
166282c23ebaSBill Fenner 	}
166382c23ebaSBill Fenner #endif
166482c23ebaSBill Fenner 	if (inp->inp_flags & INP_RECVIF) {
1665d314ad7bSJulian Elischer 		struct ifnet *ifp;
1666d314ad7bSJulian Elischer 		struct sdlbuf {
166782c23ebaSBill Fenner 			struct sockaddr_dl sdl;
1668d314ad7bSJulian Elischer 			u_char	pad[32];
1669d314ad7bSJulian Elischer 		} sdlbuf;
1670d314ad7bSJulian Elischer 		struct sockaddr_dl *sdp;
1671d314ad7bSJulian Elischer 		struct sockaddr_dl *sdl2 = &sdlbuf.sdl;
167282c23ebaSBill Fenner 
1673d314ad7bSJulian Elischer 		if (((ifp = m->m_pkthdr.rcvif))
1674603724d3SBjoern A. Zeeb 		&& ( ifp->if_index && (ifp->if_index <= V_if_index))) {
16754a0d6638SRuslan Ermilov 			sdp = (struct sockaddr_dl *)ifp->if_addr->ifa_addr;
1676d314ad7bSJulian Elischer 			/*
1677d314ad7bSJulian Elischer 			 * Change our mind and don't try copy.
1678d314ad7bSJulian Elischer 			 */
1679d314ad7bSJulian Elischer 			if ((sdp->sdl_family != AF_LINK)
1680d314ad7bSJulian Elischer 			|| (sdp->sdl_len > sizeof(sdlbuf))) {
1681d314ad7bSJulian Elischer 				goto makedummy;
1682d314ad7bSJulian Elischer 			}
1683d314ad7bSJulian Elischer 			bcopy(sdp, sdl2, sdp->sdl_len);
1684d314ad7bSJulian Elischer 		} else {
1685d314ad7bSJulian Elischer makedummy:
1686d314ad7bSJulian Elischer 			sdl2->sdl_len
1687d314ad7bSJulian Elischer 				= offsetof(struct sockaddr_dl, sdl_data[0]);
1688d314ad7bSJulian Elischer 			sdl2->sdl_family = AF_LINK;
1689d314ad7bSJulian Elischer 			sdl2->sdl_index = 0;
1690d314ad7bSJulian Elischer 			sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0;
1691d314ad7bSJulian Elischer 		}
1692d314ad7bSJulian Elischer 		*mp = sbcreatecontrol((caddr_t) sdl2, sdl2->sdl_len,
169382c23ebaSBill Fenner 			IP_RECVIF, IPPROTO_IP);
169482c23ebaSBill Fenner 		if (*mp)
169582c23ebaSBill Fenner 			mp = &(*mp)->m_next;
169682c23ebaSBill Fenner 	}
169782c23ebaSBill Fenner }
169882c23ebaSBill Fenner 
16994d2e3692SLuigi Rizzo /*
170030916a2dSRobert Watson  * XXXRW: Multicast routing code in ip_mroute.c is generally MPSAFE, but the
170130916a2dSRobert Watson  * ip_rsvp and ip_rsvp_on variables need to be interlocked with rsvp_on
170230916a2dSRobert Watson  * locking.  This code remains in ip_input.c as ip_mroute.c is optionally
170330916a2dSRobert Watson  * compiled.
17044d2e3692SLuigi Rizzo  */
1705df8bae1dSRodney W. Grimes int
1706f0068c4aSGarrett Wollman ip_rsvp_init(struct socket *so)
1707f0068c4aSGarrett Wollman {
17088b615593SMarko Zec 
1709f0068c4aSGarrett Wollman 	if (so->so_type != SOCK_RAW ||
1710f0068c4aSGarrett Wollman 	    so->so_proto->pr_protocol != IPPROTO_RSVP)
1711f0068c4aSGarrett Wollman 		return EOPNOTSUPP;
1712f0068c4aSGarrett Wollman 
1713603724d3SBjoern A. Zeeb 	if (V_ip_rsvpd != NULL)
1714f0068c4aSGarrett Wollman 		return EADDRINUSE;
1715f0068c4aSGarrett Wollman 
1716603724d3SBjoern A. Zeeb 	V_ip_rsvpd = so;
17171c5de19aSGarrett Wollman 	/*
17181c5de19aSGarrett Wollman 	 * This may seem silly, but we need to be sure we don't over-increment
17191c5de19aSGarrett Wollman 	 * the RSVP counter, in case something slips up.
17201c5de19aSGarrett Wollman 	 */
1721603724d3SBjoern A. Zeeb 	if (!V_ip_rsvp_on) {
1722603724d3SBjoern A. Zeeb 		V_ip_rsvp_on = 1;
1723603724d3SBjoern A. Zeeb 		V_rsvp_on++;
17241c5de19aSGarrett Wollman 	}
1725f0068c4aSGarrett Wollman 
1726f0068c4aSGarrett Wollman 	return 0;
1727f0068c4aSGarrett Wollman }
1728f0068c4aSGarrett Wollman 
1729f0068c4aSGarrett Wollman int
1730f0068c4aSGarrett Wollman ip_rsvp_done(void)
1731f0068c4aSGarrett Wollman {
17328b615593SMarko Zec 
1733603724d3SBjoern A. Zeeb 	V_ip_rsvpd = NULL;
17341c5de19aSGarrett Wollman 	/*
17351c5de19aSGarrett Wollman 	 * This may seem silly, but we need to be sure we don't over-decrement
17361c5de19aSGarrett Wollman 	 * the RSVP counter, in case something slips up.
17371c5de19aSGarrett Wollman 	 */
1738603724d3SBjoern A. Zeeb 	if (V_ip_rsvp_on) {
1739603724d3SBjoern A. Zeeb 		V_ip_rsvp_on = 0;
1740603724d3SBjoern A. Zeeb 		V_rsvp_on--;
17411c5de19aSGarrett Wollman 	}
1742f0068c4aSGarrett Wollman 	return 0;
1743f0068c4aSGarrett Wollman }
1744bbb4330bSLuigi Rizzo 
1745bbb4330bSLuigi Rizzo void
1746bbb4330bSLuigi Rizzo rsvp_input(struct mbuf *m, int off)	/* XXX must fixup manually */
1747bbb4330bSLuigi Rizzo {
17488b615593SMarko Zec 
1749bbb4330bSLuigi Rizzo 	if (rsvp_input_p) { /* call the real one if loaded */
1750bbb4330bSLuigi Rizzo 		rsvp_input_p(m, off);
1751bbb4330bSLuigi Rizzo 		return;
1752bbb4330bSLuigi Rizzo 	}
1753bbb4330bSLuigi Rizzo 
1754bbb4330bSLuigi Rizzo 	/* Can still get packets with rsvp_on = 0 if there is a local member
1755bbb4330bSLuigi Rizzo 	 * of the group to which the RSVP packet is addressed.  But in this
1756bbb4330bSLuigi Rizzo 	 * case we want to throw the packet away.
1757bbb4330bSLuigi Rizzo 	 */
1758bbb4330bSLuigi Rizzo 
1759603724d3SBjoern A. Zeeb 	if (!V_rsvp_on) {
1760bbb4330bSLuigi Rizzo 		m_freem(m);
1761bbb4330bSLuigi Rizzo 		return;
1762bbb4330bSLuigi Rizzo 	}
1763bbb4330bSLuigi Rizzo 
1764603724d3SBjoern A. Zeeb 	if (V_ip_rsvpd != NULL) {
1765bbb4330bSLuigi Rizzo 		rip_input(m, off);
1766bbb4330bSLuigi Rizzo 		return;
1767bbb4330bSLuigi Rizzo 	}
1768bbb4330bSLuigi Rizzo 	/* Drop the packet */
1769bbb4330bSLuigi Rizzo 	m_freem(m);
1770bbb4330bSLuigi Rizzo }
1771