1c398230bSWarner Losh /*- 251369649SPedro F. Giffuni * SPDX-License-Identifier: BSD-3-Clause 351369649SPedro F. Giffuni * 4df8bae1dSRodney W. Grimes * Copyright (c) 1982, 1986, 1988, 1993 5df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 6df8bae1dSRodney W. Grimes * 7df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 8df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 9df8bae1dSRodney W. Grimes * are met: 10df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 11df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 12df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 13df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 14df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 15fbbd9655SWarner Losh * 3. Neither the name of the University nor the names of its contributors 16df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 17df8bae1dSRodney W. Grimes * without specific prior written permission. 18df8bae1dSRodney W. Grimes * 19df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29df8bae1dSRodney W. Grimes * SUCH DAMAGE. 30df8bae1dSRodney W. Grimes * 31df8bae1dSRodney W. Grimes * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 32df8bae1dSRodney W. Grimes */ 33df8bae1dSRodney W. Grimes 344b421e2dSMike Silbersack #include <sys/cdefs.h> 354b421e2dSMike Silbersack __FBSDID("$FreeBSD$"); 364b421e2dSMike Silbersack 370ac40133SBrian Somers #include "opt_bootp.h" 3827108a15SDag-Erling Smørgrav #include "opt_ipstealth.h" 396a800098SYoshinobu Inoue #include "opt_ipsec.h" 4033553d6eSBjoern A. Zeeb #include "opt_route.h" 41b8bc95cdSAdrian Chadd #include "opt_rss.h" 4274a9466cSGary Palmer 43df8bae1dSRodney W. Grimes #include <sys/param.h> 44df8bae1dSRodney W. Grimes #include <sys/systm.h> 45ef91a976SAndrey V. Elsukov #include <sys/hhook.h> 46df8bae1dSRodney W. Grimes #include <sys/mbuf.h> 47b715f178SLuigi Rizzo #include <sys/malloc.h> 48df8bae1dSRodney W. Grimes #include <sys/domain.h> 49df8bae1dSRodney W. Grimes #include <sys/protosw.h> 50df8bae1dSRodney W. Grimes #include <sys/socket.h> 51df8bae1dSRodney W. Grimes #include <sys/time.h> 52df8bae1dSRodney W. Grimes #include <sys/kernel.h> 53385195c0SMarko Zec #include <sys/lock.h> 54cc0a3c8cSAndrey V. Elsukov #include <sys/rmlock.h> 55385195c0SMarko Zec #include <sys/rwlock.h> 5657f60867SMark Johnston #include <sys/sdt.h> 571025071fSGarrett Wollman #include <sys/syslog.h> 58b5e8ce9fSBruce Evans #include <sys/sysctl.h> 59df8bae1dSRodney W. Grimes 60df8bae1dSRodney W. Grimes #include <net/if.h> 619494d596SBrooks Davis #include <net/if_types.h> 62d314ad7bSJulian Elischer #include <net/if_var.h> 6382c23ebaSBill Fenner #include <net/if_dl.h> 64b252313fSGleb Smirnoff #include <net/pfil.h> 65df8bae1dSRodney W. Grimes #include <net/route.h> 66983066f0SAlexander V. Chernikov #include <net/route/nhop.h> 67748e0b0aSGarrett Wollman #include <net/netisr.h> 68b2bdc62aSAdrian Chadd #include <net/rss_config.h> 694b79449eSBjoern A. Zeeb #include <net/vnet.h> 70df8bae1dSRodney W. Grimes 71df8bae1dSRodney W. Grimes #include <netinet/in.h> 7257f60867SMark Johnston #include <netinet/in_kdtrace.h> 73df8bae1dSRodney W. Grimes #include <netinet/in_systm.h> 74b5e8ce9fSBruce Evans #include <netinet/in_var.h> 75df8bae1dSRodney W. Grimes #include <netinet/ip.h> 76983066f0SAlexander V. Chernikov #include <netinet/in_fib.h> 77df8bae1dSRodney W. Grimes #include <netinet/in_pcb.h> 78df8bae1dSRodney W. Grimes #include <netinet/ip_var.h> 79eddfbb76SRobert Watson #include <netinet/ip_fw.h> 80df8bae1dSRodney W. Grimes #include <netinet/ip_icmp.h> 81ef39adf0SAndre Oppermann #include <netinet/ip_options.h> 8258938916SGarrett Wollman #include <machine/in_cksum.h> 83a9771948SGleb Smirnoff #include <netinet/ip_carp.h> 84b8bc95cdSAdrian Chadd #include <netinet/in_rss.h> 85df8bae1dSRodney W. Grimes 86fcf59617SAndrey V. Elsukov #include <netipsec/ipsec_support.h> 87fcf59617SAndrey V. Elsukov 88f0068c4aSGarrett Wollman #include <sys/socketvar.h> 896ddbf1e2SGary Palmer 90aed55708SRobert Watson #include <security/mac/mac_framework.h> 91aed55708SRobert Watson 92d2035ffbSEd Maste #ifdef CTASSERT 93d2035ffbSEd Maste CTASSERT(sizeof(struct ip) == 20); 94d2035ffbSEd Maste #endif 95d2035ffbSEd Maste 961dbefcc0SGleb Smirnoff /* IP reassembly functions are defined in ip_reass.c. */ 97843b0e57SXin LI extern void ipreass_init(void); 98843b0e57SXin LI extern void ipreass_drain(void); 99843b0e57SXin LI extern void ipreass_slowtimo(void); 1001dbefcc0SGleb Smirnoff #ifdef VIMAGE 101843b0e57SXin LI extern void ipreass_destroy(void); 1021dbefcc0SGleb Smirnoff #endif 1031dbefcc0SGleb Smirnoff 104cc0a3c8cSAndrey V. Elsukov struct rmlock in_ifaddr_lock; 105cc0a3c8cSAndrey V. Elsukov RM_SYSINIT(in_ifaddr_lock, &in_ifaddr_lock, "in_ifaddr_lock"); 106f0068c4aSGarrett Wollman 10782cea7e6SBjoern A. Zeeb VNET_DEFINE(int, rsvp_on); 10882cea7e6SBjoern A. Zeeb 10982cea7e6SBjoern A. Zeeb VNET_DEFINE(int, ipforwarding); 1106df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_VNET | CTLFLAG_RW, 111eddfbb76SRobert Watson &VNET_NAME(ipforwarding), 0, 1128b615593SMarko Zec "Enable IP forwarding between interfaces"); 1130312fbe9SPoul-Henning Kamp 1148ad114c0SGeorge V. Neville-Neil /* 1158ad114c0SGeorge V. Neville-Neil * Respond with an ICMP host redirect when we forward a packet out of 1168ad114c0SGeorge V. Neville-Neil * the same interface on which it was received. See RFC 792. 1178ad114c0SGeorge V. Neville-Neil */ 1188ad114c0SGeorge V. Neville-Neil VNET_DEFINE(int, ipsendredirects) = 1; 1196df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_VNET | CTLFLAG_RW, 120eddfbb76SRobert Watson &VNET_NAME(ipsendredirects), 0, 1218b615593SMarko Zec "Enable sending IP redirects"); 1220312fbe9SPoul-Henning Kamp 123823db0e9SDon Lewis /* 124823db0e9SDon Lewis * XXX - Setting ip_checkinterface mostly implements the receive side of 125823db0e9SDon Lewis * the Strong ES model described in RFC 1122, but since the routing table 126a8f12100SDon Lewis * and transmit implementation do not implement the Strong ES model, 127823db0e9SDon Lewis * setting this to 1 results in an odd hybrid. 1283f67c834SDon Lewis * 129a8f12100SDon Lewis * XXX - ip_checkinterface currently must be disabled if you use ipnat 130a8f12100SDon Lewis * to translate the destination address to another local interface. 1313f67c834SDon Lewis * 1323f67c834SDon Lewis * XXX - ip_checkinterface must be disabled if you add IP aliases 1333f67c834SDon Lewis * to the loopback interface instead of the interface where the 1343f67c834SDon Lewis * packets for those addresses are received. 135823db0e9SDon Lewis */ 1365f901c92SAndrew Turner VNET_DEFINE_STATIC(int, ip_checkinterface); 13782cea7e6SBjoern A. Zeeb #define V_ip_checkinterface VNET(ip_checkinterface) 1386df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_VNET | CTLFLAG_RW, 139eddfbb76SRobert Watson &VNET_NAME(ip_checkinterface), 0, 1408b615593SMarko Zec "Verify packet arrives on correct interface"); 141b3e95d4eSJonathan Lemon 142b252313fSGleb Smirnoff VNET_DEFINE(pfil_head_t, inet_pfil_head); /* Packet filter hooks */ 143df8bae1dSRodney W. Grimes 144d4b5cae4SRobert Watson static struct netisr_handler ip_nh = { 145d4b5cae4SRobert Watson .nh_name = "ip", 146d4b5cae4SRobert Watson .nh_handler = ip_input, 147d4b5cae4SRobert Watson .nh_proto = NETISR_IP, 148b8bc95cdSAdrian Chadd #ifdef RSS 1492527ccadSAdrian Chadd .nh_m2cpuid = rss_soft_m2cpuid_v4, 150b8bc95cdSAdrian Chadd .nh_policy = NETISR_POLICY_CPU, 151b8bc95cdSAdrian Chadd .nh_dispatch = NETISR_DISPATCH_HYBRID, 152b8bc95cdSAdrian Chadd #else 153d4b5cae4SRobert Watson .nh_policy = NETISR_POLICY_FLOW, 154b8bc95cdSAdrian Chadd #endif 155d4b5cae4SRobert Watson }; 156ca925d9cSJonathan Lemon 157b8bc95cdSAdrian Chadd #ifdef RSS 158b8bc95cdSAdrian Chadd /* 159b8bc95cdSAdrian Chadd * Directly dispatched frames are currently assumed 160b8bc95cdSAdrian Chadd * to have a flowid already calculated. 161b8bc95cdSAdrian Chadd * 162b8bc95cdSAdrian Chadd * It should likely have something that assert it 163b8bc95cdSAdrian Chadd * actually has valid flow details. 164b8bc95cdSAdrian Chadd */ 165b8bc95cdSAdrian Chadd static struct netisr_handler ip_direct_nh = { 166b8bc95cdSAdrian Chadd .nh_name = "ip_direct", 167b8bc95cdSAdrian Chadd .nh_handler = ip_direct_input, 168b8bc95cdSAdrian Chadd .nh_proto = NETISR_IP_DIRECT, 169499baf0aSAdrian Chadd .nh_m2cpuid = rss_soft_m2cpuid_v4, 170b8bc95cdSAdrian Chadd .nh_policy = NETISR_POLICY_CPU, 171b8bc95cdSAdrian Chadd .nh_dispatch = NETISR_DISPATCH_HYBRID, 172b8bc95cdSAdrian Chadd }; 173b8bc95cdSAdrian Chadd #endif 174b8bc95cdSAdrian Chadd 175df8bae1dSRodney W. Grimes extern struct domain inetdomain; 176f0ffb944SJulian Elischer extern struct protosw inetsw[]; 177df8bae1dSRodney W. Grimes u_char ip_protox[IPPROTO_MAX]; 17882cea7e6SBjoern A. Zeeb VNET_DEFINE(struct in_ifaddrhead, in_ifaddrhead); /* first inet address */ 17982cea7e6SBjoern A. Zeeb VNET_DEFINE(struct in_ifaddrhashhead *, in_ifaddrhashtbl); /* inet addr hash table */ 18082cea7e6SBjoern A. Zeeb VNET_DEFINE(u_long, in_ifaddrhmask); /* mask for hash table */ 181ca925d9cSJonathan Lemon 1820312fbe9SPoul-Henning Kamp #ifdef IPCTL_DEFMTU 1830312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW, 1843d177f46SBill Fumerola &ip_mtu, 0, "Default MTU"); 1850312fbe9SPoul-Henning Kamp #endif 1860312fbe9SPoul-Henning Kamp 1871b968362SDag-Erling Smørgrav #ifdef IPSTEALTH 18882cea7e6SBjoern A. Zeeb VNET_DEFINE(int, ipstealth); 1896df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_VNET | CTLFLAG_RW, 190eddfbb76SRobert Watson &VNET_NAME(ipstealth), 0, 191eddfbb76SRobert Watson "IP stealth mode, no TTL decrementation on forwarding"); 1921b968362SDag-Erling Smørgrav #endif 193eddfbb76SRobert Watson 194315e3e38SRobert Watson /* 1955da0521fSAndrey V. Elsukov * IP statistics are stored in the "array" of counter(9)s. 1965923c293SGleb Smirnoff */ 1975da0521fSAndrey V. Elsukov VNET_PCPUSTAT_DEFINE(struct ipstat, ipstat); 1985da0521fSAndrey V. Elsukov VNET_PCPUSTAT_SYSINIT(ipstat); 1995da0521fSAndrey V. Elsukov SYSCTL_VNET_PCPUSTAT(_net_inet_ip, IPCTL_STATS, stats, struct ipstat, ipstat, 2005da0521fSAndrey V. Elsukov "IP statistics (struct ipstat, netinet/ip_var.h)"); 2015923c293SGleb Smirnoff 2025923c293SGleb Smirnoff #ifdef VIMAGE 2035da0521fSAndrey V. Elsukov VNET_PCPUSTAT_SYSUNINIT(ipstat); 2045923c293SGleb Smirnoff #endif /* VIMAGE */ 2055923c293SGleb Smirnoff 2065923c293SGleb Smirnoff /* 207315e3e38SRobert Watson * Kernel module interface for updating ipstat. The argument is an index 2085923c293SGleb Smirnoff * into ipstat treated as an array. 209315e3e38SRobert Watson */ 210315e3e38SRobert Watson void 211315e3e38SRobert Watson kmod_ipstat_inc(int statnum) 212315e3e38SRobert Watson { 213315e3e38SRobert Watson 2145da0521fSAndrey V. Elsukov counter_u64_add(VNET(ipstat)[statnum], 1); 215315e3e38SRobert Watson } 216315e3e38SRobert Watson 217315e3e38SRobert Watson void 218315e3e38SRobert Watson kmod_ipstat_dec(int statnum) 219315e3e38SRobert Watson { 220315e3e38SRobert Watson 2215da0521fSAndrey V. Elsukov counter_u64_add(VNET(ipstat)[statnum], -1); 222315e3e38SRobert Watson } 223315e3e38SRobert Watson 224d4b5cae4SRobert Watson static int 225d4b5cae4SRobert Watson sysctl_netinet_intr_queue_maxlen(SYSCTL_HANDLER_ARGS) 226d4b5cae4SRobert Watson { 227d4b5cae4SRobert Watson int error, qlimit; 228d4b5cae4SRobert Watson 229d4b5cae4SRobert Watson netisr_getqlimit(&ip_nh, &qlimit); 230d4b5cae4SRobert Watson error = sysctl_handle_int(oidp, &qlimit, 0, req); 231d4b5cae4SRobert Watson if (error || !req->newptr) 232d4b5cae4SRobert Watson return (error); 233d4b5cae4SRobert Watson if (qlimit < 1) 234d4b5cae4SRobert Watson return (EINVAL); 235d4b5cae4SRobert Watson return (netisr_setqlimit(&ip_nh, qlimit)); 236d4b5cae4SRobert Watson } 237d4b5cae4SRobert Watson SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen, 2387029da5cSPawel Biernacki CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, 2397029da5cSPawel Biernacki sysctl_netinet_intr_queue_maxlen, "I", 240d4b5cae4SRobert Watson "Maximum size of the IP input queue"); 241d4b5cae4SRobert Watson 242d4b5cae4SRobert Watson static int 243d4b5cae4SRobert Watson sysctl_netinet_intr_queue_drops(SYSCTL_HANDLER_ARGS) 244d4b5cae4SRobert Watson { 245d4b5cae4SRobert Watson u_int64_t qdrops_long; 246d4b5cae4SRobert Watson int error, qdrops; 247d4b5cae4SRobert Watson 248d4b5cae4SRobert Watson netisr_getqdrops(&ip_nh, &qdrops_long); 249d4b5cae4SRobert Watson qdrops = qdrops_long; 250d4b5cae4SRobert Watson error = sysctl_handle_int(oidp, &qdrops, 0, req); 251d4b5cae4SRobert Watson if (error || !req->newptr) 252d4b5cae4SRobert Watson return (error); 253d4b5cae4SRobert Watson if (qdrops != 0) 254d4b5cae4SRobert Watson return (EINVAL); 255d4b5cae4SRobert Watson netisr_clearqdrops(&ip_nh); 256d4b5cae4SRobert Watson return (0); 257d4b5cae4SRobert Watson } 258d4b5cae4SRobert Watson 259d4b5cae4SRobert Watson SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops, 2607029da5cSPawel Biernacki CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 2617029da5cSPawel Biernacki 0, 0, sysctl_netinet_intr_queue_drops, "I", 262d4b5cae4SRobert Watson "Number of packets dropped from the IP input queue"); 263d4b5cae4SRobert Watson 264b8bc95cdSAdrian Chadd #ifdef RSS 265b8bc95cdSAdrian Chadd static int 266b8bc95cdSAdrian Chadd sysctl_netinet_intr_direct_queue_maxlen(SYSCTL_HANDLER_ARGS) 267b8bc95cdSAdrian Chadd { 268b8bc95cdSAdrian Chadd int error, qlimit; 269b8bc95cdSAdrian Chadd 270b8bc95cdSAdrian Chadd netisr_getqlimit(&ip_direct_nh, &qlimit); 271b8bc95cdSAdrian Chadd error = sysctl_handle_int(oidp, &qlimit, 0, req); 272b8bc95cdSAdrian Chadd if (error || !req->newptr) 273b8bc95cdSAdrian Chadd return (error); 274b8bc95cdSAdrian Chadd if (qlimit < 1) 275b8bc95cdSAdrian Chadd return (EINVAL); 276b8bc95cdSAdrian Chadd return (netisr_setqlimit(&ip_direct_nh, qlimit)); 277b8bc95cdSAdrian Chadd } 2787faa0d21SAndrey V. Elsukov SYSCTL_PROC(_net_inet_ip, IPCTL_INTRDQMAXLEN, intr_direct_queue_maxlen, 2797029da5cSPawel Biernacki CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 2807029da5cSPawel Biernacki 0, 0, sysctl_netinet_intr_direct_queue_maxlen, 2817faa0d21SAndrey V. Elsukov "I", "Maximum size of the IP direct input queue"); 282b8bc95cdSAdrian Chadd 283b8bc95cdSAdrian Chadd static int 284b8bc95cdSAdrian Chadd sysctl_netinet_intr_direct_queue_drops(SYSCTL_HANDLER_ARGS) 285b8bc95cdSAdrian Chadd { 286b8bc95cdSAdrian Chadd u_int64_t qdrops_long; 287b8bc95cdSAdrian Chadd int error, qdrops; 288b8bc95cdSAdrian Chadd 289b8bc95cdSAdrian Chadd netisr_getqdrops(&ip_direct_nh, &qdrops_long); 290b8bc95cdSAdrian Chadd qdrops = qdrops_long; 291b8bc95cdSAdrian Chadd error = sysctl_handle_int(oidp, &qdrops, 0, req); 292b8bc95cdSAdrian Chadd if (error || !req->newptr) 293b8bc95cdSAdrian Chadd return (error); 294b8bc95cdSAdrian Chadd if (qdrops != 0) 295b8bc95cdSAdrian Chadd return (EINVAL); 296b8bc95cdSAdrian Chadd netisr_clearqdrops(&ip_direct_nh); 297b8bc95cdSAdrian Chadd return (0); 298b8bc95cdSAdrian Chadd } 299b8bc95cdSAdrian Chadd 3007faa0d21SAndrey V. Elsukov SYSCTL_PROC(_net_inet_ip, IPCTL_INTRDQDROPS, intr_direct_queue_drops, 3017029da5cSPawel Biernacki CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, 3027029da5cSPawel Biernacki sysctl_netinet_intr_direct_queue_drops, "I", 303b8bc95cdSAdrian Chadd "Number of packets dropped from the IP direct input queue"); 304b8bc95cdSAdrian Chadd #endif /* RSS */ 305b8bc95cdSAdrian Chadd 306df8bae1dSRodney W. Grimes /* 307df8bae1dSRodney W. Grimes * IP initialization: fill in IP protocol switch table. 308df8bae1dSRodney W. Grimes * All protocols not implemented in kernel go to raw IP protocol handler. 309df8bae1dSRodney W. Grimes */ 310df8bae1dSRodney W. Grimes void 311f2565d68SRobert Watson ip_init(void) 312df8bae1dSRodney W. Grimes { 313b252313fSGleb Smirnoff struct pfil_head_args args; 314f2565d68SRobert Watson struct protosw *pr; 315f2565d68SRobert Watson int i; 316df8bae1dSRodney W. Grimes 317d7c5a620SMatt Macy CK_STAILQ_INIT(&V_in_ifaddrhead); 318603724d3SBjoern A. Zeeb V_in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &V_in_ifaddrhmask); 3191ed81b73SMarko Zec 3201ed81b73SMarko Zec /* Initialize IP reassembly queue. */ 3211dbefcc0SGleb Smirnoff ipreass_init(); 3221ed81b73SMarko Zec 3230b4b0b0fSJulian Elischer /* Initialize packet filter hooks. */ 324b252313fSGleb Smirnoff args.pa_version = PFIL_VERSION; 325b252313fSGleb Smirnoff args.pa_flags = PFIL_IN | PFIL_OUT; 326b252313fSGleb Smirnoff args.pa_type = PFIL_TYPE_IP4; 327b252313fSGleb Smirnoff args.pa_headname = PFIL_INET_NAME; 328b252313fSGleb Smirnoff V_inet_pfil_head = pfil_head_register(&args); 3290b4b0b0fSJulian Elischer 330ef91a976SAndrey V. Elsukov if (hhook_head_register(HHOOK_TYPE_IPSEC_IN, AF_INET, 331ef91a976SAndrey V. Elsukov &V_ipsec_hhh_in[HHOOK_IPSEC_INET], 332ef91a976SAndrey V. Elsukov HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0) 333ef91a976SAndrey V. Elsukov printf("%s: WARNING: unable to register input helper hook\n", 334ef91a976SAndrey V. Elsukov __func__); 335ef91a976SAndrey V. Elsukov if (hhook_head_register(HHOOK_TYPE_IPSEC_OUT, AF_INET, 336ef91a976SAndrey V. Elsukov &V_ipsec_hhh_out[HHOOK_IPSEC_INET], 337ef91a976SAndrey V. Elsukov HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0) 338ef91a976SAndrey V. Elsukov printf("%s: WARNING: unable to register output helper hook\n", 339ef91a976SAndrey V. Elsukov __func__); 340ef91a976SAndrey V. Elsukov 3411ed81b73SMarko Zec /* Skip initialization of globals for non-default instances. */ 342484149deSBjoern A. Zeeb #ifdef VIMAGE 343484149deSBjoern A. Zeeb if (!IS_DEFAULT_VNET(curvnet)) { 344484149deSBjoern A. Zeeb netisr_register_vnet(&ip_nh); 345484149deSBjoern A. Zeeb #ifdef RSS 346484149deSBjoern A. Zeeb netisr_register_vnet(&ip_direct_nh); 347484149deSBjoern A. Zeeb #endif 3481ed81b73SMarko Zec return; 349484149deSBjoern A. Zeeb } 350484149deSBjoern A. Zeeb #endif 3511ed81b73SMarko Zec 352f0ffb944SJulian Elischer pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 35302410549SRobert Watson if (pr == NULL) 354db09bef3SAndre Oppermann panic("ip_init: PF_INET not found"); 355db09bef3SAndre Oppermann 356db09bef3SAndre Oppermann /* Initialize the entire ip_protox[] array to IPPROTO_RAW. */ 357df8bae1dSRodney W. Grimes for (i = 0; i < IPPROTO_MAX; i++) 358df8bae1dSRodney W. Grimes ip_protox[i] = pr - inetsw; 359db09bef3SAndre Oppermann /* 360db09bef3SAndre Oppermann * Cycle through IP protocols and put them into the appropriate place 361db09bef3SAndre Oppermann * in ip_protox[]. 362db09bef3SAndre Oppermann */ 363f0ffb944SJulian Elischer for (pr = inetdomain.dom_protosw; 364f0ffb944SJulian Elischer pr < inetdomain.dom_protoswNPROTOSW; pr++) 365df8bae1dSRodney W. Grimes if (pr->pr_domain->dom_family == PF_INET && 366db09bef3SAndre Oppermann pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) { 367db09bef3SAndre Oppermann /* Be careful to only index valid IP protocols. */ 368db77984cSSam Leffler if (pr->pr_protocol < IPPROTO_MAX) 369df8bae1dSRodney W. Grimes ip_protox[pr->pr_protocol] = pr - inetsw; 370db09bef3SAndre Oppermann } 371194a213eSAndrey A. Chernov 372d4b5cae4SRobert Watson netisr_register(&ip_nh); 373b8bc95cdSAdrian Chadd #ifdef RSS 374b8bc95cdSAdrian Chadd netisr_register(&ip_direct_nh); 375b8bc95cdSAdrian Chadd #endif 376df8bae1dSRodney W. Grimes } 377df8bae1dSRodney W. Grimes 3789802380eSBjoern A. Zeeb #ifdef VIMAGE 3793f58662dSBjoern A. Zeeb static void 3803f58662dSBjoern A. Zeeb ip_destroy(void *unused __unused) 3819802380eSBjoern A. Zeeb { 382ef91a976SAndrey V. Elsukov int error; 3834d3dfd45SMikolaj Golub 384484149deSBjoern A. Zeeb #ifdef RSS 385484149deSBjoern A. Zeeb netisr_unregister_vnet(&ip_direct_nh); 386484149deSBjoern A. Zeeb #endif 387484149deSBjoern A. Zeeb netisr_unregister_vnet(&ip_nh); 388484149deSBjoern A. Zeeb 389b252313fSGleb Smirnoff pfil_head_unregister(V_inet_pfil_head); 390ef91a976SAndrey V. Elsukov error = hhook_head_deregister(V_ipsec_hhh_in[HHOOK_IPSEC_INET]); 391ef91a976SAndrey V. Elsukov if (error != 0) { 392ef91a976SAndrey V. Elsukov printf("%s: WARNING: unable to deregister input helper hook " 393ef91a976SAndrey V. Elsukov "type HHOOK_TYPE_IPSEC_IN, id HHOOK_IPSEC_INET: " 394ef91a976SAndrey V. Elsukov "error %d returned\n", __func__, error); 395ef91a976SAndrey V. Elsukov } 396ef91a976SAndrey V. Elsukov error = hhook_head_deregister(V_ipsec_hhh_out[HHOOK_IPSEC_INET]); 397ef91a976SAndrey V. Elsukov if (error != 0) { 398ef91a976SAndrey V. Elsukov printf("%s: WARNING: unable to deregister output helper hook " 399ef91a976SAndrey V. Elsukov "type HHOOK_TYPE_IPSEC_OUT, id HHOOK_IPSEC_INET: " 400ef91a976SAndrey V. Elsukov "error %d returned\n", __func__, error); 401ef91a976SAndrey V. Elsukov } 40289856f7eSBjoern A. Zeeb 40389856f7eSBjoern A. Zeeb /* Remove the IPv4 addresses from all interfaces. */ 40489856f7eSBjoern A. Zeeb in_ifscrub_all(); 40589856f7eSBjoern A. Zeeb 40689856f7eSBjoern A. Zeeb /* Make sure the IPv4 routes are gone as well. */ 407*b1d63265SAlexander V. Chernikov rib_flush_routes_family(AF_INET); 4089802380eSBjoern A. Zeeb 409e3c2c634SGleb Smirnoff /* Destroy IP reassembly queue. */ 4101dbefcc0SGleb Smirnoff ipreass_destroy(); 41189856f7eSBjoern A. Zeeb 41289856f7eSBjoern A. Zeeb /* Cleanup in_ifaddr hash table; should be empty. */ 41389856f7eSBjoern A. Zeeb hashdestroy(V_in_ifaddrhashtbl, M_IFADDR, V_in_ifaddrhmask); 4149802380eSBjoern A. Zeeb } 4153f58662dSBjoern A. Zeeb 4163f58662dSBjoern A. Zeeb VNET_SYSUNINIT(ip, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, ip_destroy, NULL); 4179802380eSBjoern A. Zeeb #endif 4189802380eSBjoern A. Zeeb 419b8bc95cdSAdrian Chadd #ifdef RSS 420b8bc95cdSAdrian Chadd /* 421b8bc95cdSAdrian Chadd * IP direct input routine. 422b8bc95cdSAdrian Chadd * 423b8bc95cdSAdrian Chadd * This is called when reinjecting completed fragments where 424b8bc95cdSAdrian Chadd * all of the previous checking and book-keeping has been done. 425b8bc95cdSAdrian Chadd */ 426b8bc95cdSAdrian Chadd void 427b8bc95cdSAdrian Chadd ip_direct_input(struct mbuf *m) 428b8bc95cdSAdrian Chadd { 429b8bc95cdSAdrian Chadd struct ip *ip; 430b8bc95cdSAdrian Chadd int hlen; 431b8bc95cdSAdrian Chadd 432b8bc95cdSAdrian Chadd ip = mtod(m, struct ip *); 433b8bc95cdSAdrian Chadd hlen = ip->ip_hl << 2; 434b8bc95cdSAdrian Chadd 435fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT) 436fcf59617SAndrey V. Elsukov if (IPSEC_ENABLED(ipv4)) { 437fcf59617SAndrey V. Elsukov if (IPSEC_INPUT(ipv4, m, hlen, ip->ip_p) != 0) 438fcf59617SAndrey V. Elsukov return; 439fcf59617SAndrey V. Elsukov } 440fcf59617SAndrey V. Elsukov #endif /* IPSEC */ 441b8bc95cdSAdrian Chadd IPSTAT_INC(ips_delivered); 442b8bc95cdSAdrian Chadd (*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p); 443b8bc95cdSAdrian Chadd return; 444b8bc95cdSAdrian Chadd } 445b8bc95cdSAdrian Chadd #endif 446b8bc95cdSAdrian Chadd 4474d2e3692SLuigi Rizzo /* 448df8bae1dSRodney W. Grimes * Ip input routine. Checksum and byte swap header. If fragmented 449df8bae1dSRodney W. Grimes * try to reassemble. Process options. Pass to next level. 450df8bae1dSRodney W. Grimes */ 451c67b1d17SGarrett Wollman void 452c67b1d17SGarrett Wollman ip_input(struct mbuf *m) 453df8bae1dSRodney W. Grimes { 4541a5995ccSEugene Grosbein struct rm_priotracker in_ifa_tracker; 4559188b4a1SAndre Oppermann struct ip *ip = NULL; 4565da9f8faSJosef Karthauser struct in_ifaddr *ia = NULL; 457ca925d9cSJonathan Lemon struct ifaddr *ifa; 4580aade26eSRobert Watson struct ifnet *ifp; 4599b932e9eSAndre Oppermann int checkif, hlen = 0; 46021d172a3SGleb Smirnoff uint16_t sum, ip_len; 46102c1c707SAndre Oppermann int dchg = 0; /* dest changed after fw */ 462f51f805fSSam Leffler struct in_addr odst; /* original dst address */ 463b715f178SLuigi Rizzo 464fe584538SDag-Erling Smørgrav M_ASSERTPKTHDR(m); 465b8a6e03fSGleb Smirnoff NET_EPOCH_ASSERT(); 466db40007dSAndrew R. Reiter 467ac9d7e26SMax Laier if (m->m_flags & M_FASTFWD_OURS) { 46876ff6dcfSAndre Oppermann m->m_flags &= ~M_FASTFWD_OURS; 46976ff6dcfSAndre Oppermann /* Set up some basics that will be used later. */ 4702b25acc1SLuigi Rizzo ip = mtod(m, struct ip *); 47153be11f6SPoul-Henning Kamp hlen = ip->ip_hl << 2; 4728f134647SGleb Smirnoff ip_len = ntohs(ip->ip_len); 4739b932e9eSAndre Oppermann goto ours; 4742b25acc1SLuigi Rizzo } 4752b25acc1SLuigi Rizzo 47686425c62SRobert Watson IPSTAT_INC(ips_total); 47758938916SGarrett Wollman 47858938916SGarrett Wollman if (m->m_pkthdr.len < sizeof(struct ip)) 47958938916SGarrett Wollman goto tooshort; 48058938916SGarrett Wollman 481df8bae1dSRodney W. Grimes if (m->m_len < sizeof (struct ip) && 4820b17fba7SAndre Oppermann (m = m_pullup(m, sizeof (struct ip))) == NULL) { 48386425c62SRobert Watson IPSTAT_INC(ips_toosmall); 484c67b1d17SGarrett Wollman return; 485df8bae1dSRodney W. Grimes } 486df8bae1dSRodney W. Grimes ip = mtod(m, struct ip *); 48758938916SGarrett Wollman 48853be11f6SPoul-Henning Kamp if (ip->ip_v != IPVERSION) { 48986425c62SRobert Watson IPSTAT_INC(ips_badvers); 490df8bae1dSRodney W. Grimes goto bad; 491df8bae1dSRodney W. Grimes } 49258938916SGarrett Wollman 49353be11f6SPoul-Henning Kamp hlen = ip->ip_hl << 2; 494df8bae1dSRodney W. Grimes if (hlen < sizeof(struct ip)) { /* minimum header length */ 49586425c62SRobert Watson IPSTAT_INC(ips_badhlen); 496df8bae1dSRodney W. Grimes goto bad; 497df8bae1dSRodney W. Grimes } 498df8bae1dSRodney W. Grimes if (hlen > m->m_len) { 4990b17fba7SAndre Oppermann if ((m = m_pullup(m, hlen)) == NULL) { 50086425c62SRobert Watson IPSTAT_INC(ips_badhlen); 501c67b1d17SGarrett Wollman return; 502df8bae1dSRodney W. Grimes } 503df8bae1dSRodney W. Grimes ip = mtod(m, struct ip *); 504df8bae1dSRodney W. Grimes } 50533841545SHajimu UMEMOTO 50657f60867SMark Johnston IP_PROBE(receive, NULL, NULL, ip, m->m_pkthdr.rcvif, ip, NULL); 50757f60867SMark Johnston 5086c1c6ae5SRodney W. Grimes /* IN_LOOPBACK must not appear on the wire - RFC1122 */ 5090aade26eSRobert Watson ifp = m->m_pkthdr.rcvif; 5106c1c6ae5SRodney W. Grimes if (IN_LOOPBACK(ntohl(ip->ip_dst.s_addr)) || 5116c1c6ae5SRodney W. Grimes IN_LOOPBACK(ntohl(ip->ip_src.s_addr))) { 5120aade26eSRobert Watson if ((ifp->if_flags & IFF_LOOPBACK) == 0) { 51386425c62SRobert Watson IPSTAT_INC(ips_badaddr); 51433841545SHajimu UMEMOTO goto bad; 51533841545SHajimu UMEMOTO } 51633841545SHajimu UMEMOTO } 51733841545SHajimu UMEMOTO 518db4f9cc7SJonathan Lemon if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) { 519db4f9cc7SJonathan Lemon sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID); 520db4f9cc7SJonathan Lemon } else { 52158938916SGarrett Wollman if (hlen == sizeof(struct ip)) { 52247c861ecSBrian Somers sum = in_cksum_hdr(ip); 52358938916SGarrett Wollman } else { 52447c861ecSBrian Somers sum = in_cksum(m, hlen); 52558938916SGarrett Wollman } 526db4f9cc7SJonathan Lemon } 52747c861ecSBrian Somers if (sum) { 52886425c62SRobert Watson IPSTAT_INC(ips_badsum); 529df8bae1dSRodney W. Grimes goto bad; 530df8bae1dSRodney W. Grimes } 531df8bae1dSRodney W. Grimes 53202b199f1SMax Laier #ifdef ALTQ 53302b199f1SMax Laier if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0) 53402b199f1SMax Laier /* packet is dropped by traffic conditioner */ 53502b199f1SMax Laier return; 53602b199f1SMax Laier #endif 53702b199f1SMax Laier 53821d172a3SGleb Smirnoff ip_len = ntohs(ip->ip_len); 53921d172a3SGleb Smirnoff if (ip_len < hlen) { 54086425c62SRobert Watson IPSTAT_INC(ips_badlen); 541df8bae1dSRodney W. Grimes goto bad; 542df8bae1dSRodney W. Grimes } 543df8bae1dSRodney W. Grimes 544df8bae1dSRodney W. Grimes /* 545df8bae1dSRodney W. Grimes * Check that the amount of data in the buffers 546df8bae1dSRodney W. Grimes * is as at least much as the IP header would have us expect. 547df8bae1dSRodney W. Grimes * Trim mbufs if longer than we expect. 548df8bae1dSRodney W. Grimes * Drop packet if shorter than we expect. 549df8bae1dSRodney W. Grimes */ 55021d172a3SGleb Smirnoff if (m->m_pkthdr.len < ip_len) { 55158938916SGarrett Wollman tooshort: 55286425c62SRobert Watson IPSTAT_INC(ips_tooshort); 553df8bae1dSRodney W. Grimes goto bad; 554df8bae1dSRodney W. Grimes } 55521d172a3SGleb Smirnoff if (m->m_pkthdr.len > ip_len) { 556df8bae1dSRodney W. Grimes if (m->m_len == m->m_pkthdr.len) { 55721d172a3SGleb Smirnoff m->m_len = ip_len; 55821d172a3SGleb Smirnoff m->m_pkthdr.len = ip_len; 559df8bae1dSRodney W. Grimes } else 56021d172a3SGleb Smirnoff m_adj(m, ip_len - m->m_pkthdr.len); 561df8bae1dSRodney W. Grimes } 562b8bc95cdSAdrian Chadd 563ad9f4d6aSAndrey V. Elsukov /* 564ad9f4d6aSAndrey V. Elsukov * Try to forward the packet, but if we fail continue. 56562484790SAndrey V. Elsukov * ip_tryforward() does not generate redirects, so fall 56662484790SAndrey V. Elsukov * through to normal processing if redirects are required. 567ad9f4d6aSAndrey V. Elsukov * ip_tryforward() does inbound and outbound packet firewall 568ad9f4d6aSAndrey V. Elsukov * processing. If firewall has decided that destination becomes 569ad9f4d6aSAndrey V. Elsukov * our local address, it sets M_FASTFWD_OURS flag. In this 570ad9f4d6aSAndrey V. Elsukov * case skip another inbound firewall processing and update 571ad9f4d6aSAndrey V. Elsukov * ip pointer. 572ad9f4d6aSAndrey V. Elsukov */ 5738ad114c0SGeorge V. Neville-Neil if (V_ipforwarding != 0 574fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT) 575fcf59617SAndrey V. Elsukov && (!IPSEC_ENABLED(ipv4) || 576fcf59617SAndrey V. Elsukov IPSEC_CAPS(ipv4, m, IPSEC_CAP_OPERABLE) == 0) 577ad9f4d6aSAndrey V. Elsukov #endif 578ad9f4d6aSAndrey V. Elsukov ) { 579ad9f4d6aSAndrey V. Elsukov if ((m = ip_tryforward(m)) == NULL) 58033872124SGeorge V. Neville-Neil return; 581ad9f4d6aSAndrey V. Elsukov if (m->m_flags & M_FASTFWD_OURS) { 582ad9f4d6aSAndrey V. Elsukov m->m_flags &= ~M_FASTFWD_OURS; 583ad9f4d6aSAndrey V. Elsukov ip = mtod(m, struct ip *); 584ad9f4d6aSAndrey V. Elsukov goto ours; 585ad9f4d6aSAndrey V. Elsukov } 586ad9f4d6aSAndrey V. Elsukov } 587fcf59617SAndrey V. Elsukov 588fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT) 58914dd6717SSam Leffler /* 590ffe8cd7bSBjoern A. Zeeb * Bypass packet filtering for packets previously handled by IPsec. 59114dd6717SSam Leffler */ 592fcf59617SAndrey V. Elsukov if (IPSEC_ENABLED(ipv4) && 593fcf59617SAndrey V. Elsukov IPSEC_CAPS(ipv4, m, IPSEC_CAP_BYPASS_FILTER) != 0) 594c21fd232SAndre Oppermann goto passin; 595ad9f4d6aSAndrey V. Elsukov #endif 596fcf59617SAndrey V. Elsukov 597c4ac87eaSDarren Reed /* 598134ea224SSam Leffler * Run through list of hooks for input packets. 599f51f805fSSam Leffler * 600f51f805fSSam Leffler * NB: Beware of the destination address changing (e.g. 601f51f805fSSam Leffler * by NAT rewriting). When this happens, tell 602f51f805fSSam Leffler * ip_forward to do the right thing. 603c4ac87eaSDarren Reed */ 604c21fd232SAndre Oppermann 605c21fd232SAndre Oppermann /* Jump over all PFIL processing if hooks are not active. */ 606b252313fSGleb Smirnoff if (!PFIL_HOOKED_IN(V_inet_pfil_head)) 607c21fd232SAndre Oppermann goto passin; 608c21fd232SAndre Oppermann 609f51f805fSSam Leffler odst = ip->ip_dst; 610b252313fSGleb Smirnoff if (pfil_run_hooks(V_inet_pfil_head, &m, ifp, PFIL_IN, NULL) != 611b252313fSGleb Smirnoff PFIL_PASS) 612beec8214SDarren Reed return; 613134ea224SSam Leffler if (m == NULL) /* consumed by filter */ 614c4ac87eaSDarren Reed return; 6159b932e9eSAndre Oppermann 616c4ac87eaSDarren Reed ip = mtod(m, struct ip *); 61702c1c707SAndre Oppermann dchg = (odst.s_addr != ip->ip_dst.s_addr); 6180aade26eSRobert Watson ifp = m->m_pkthdr.rcvif; 6199b932e9eSAndre Oppermann 6209b932e9eSAndre Oppermann if (m->m_flags & M_FASTFWD_OURS) { 6219b932e9eSAndre Oppermann m->m_flags &= ~M_FASTFWD_OURS; 6229b932e9eSAndre Oppermann goto ours; 6239b932e9eSAndre Oppermann } 624ffdbf9daSAndrey V. Elsukov if (m->m_flags & M_IP_NEXTHOP) { 625de89d74bSLuiz Otavio O Souza if (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL) { 626099dd043SAndre Oppermann /* 627ffdbf9daSAndrey V. Elsukov * Directly ship the packet on. This allows 628ffdbf9daSAndrey V. Elsukov * forwarding packets originally destined to us 629ffdbf9daSAndrey V. Elsukov * to some other directly connected host. 630099dd043SAndre Oppermann */ 631ffdbf9daSAndrey V. Elsukov ip_forward(m, 1); 632099dd043SAndre Oppermann return; 633099dd043SAndre Oppermann } 634ffdbf9daSAndrey V. Elsukov } 635c21fd232SAndre Oppermann passin: 63621d172a3SGleb Smirnoff 63721d172a3SGleb Smirnoff /* 638df8bae1dSRodney W. Grimes * Process options and, if not destined for us, 639df8bae1dSRodney W. Grimes * ship it on. ip_dooptions returns 1 when an 640df8bae1dSRodney W. Grimes * error was detected (causing an icmp message 641df8bae1dSRodney W. Grimes * to be sent and the original packet to be freed). 642df8bae1dSRodney W. Grimes */ 6439b932e9eSAndre Oppermann if (hlen > sizeof (struct ip) && ip_dooptions(m, 0)) 644c67b1d17SGarrett Wollman return; 645df8bae1dSRodney W. Grimes 646f0068c4aSGarrett Wollman /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no 647f0068c4aSGarrett Wollman * matter if it is destined to another node, or whether it is 648f0068c4aSGarrett Wollman * a multicast one, RSVP wants it! and prevents it from being forwarded 649f0068c4aSGarrett Wollman * anywhere else. Also checks if the rsvp daemon is running before 650f0068c4aSGarrett Wollman * grabbing the packet. 651f0068c4aSGarrett Wollman */ 652603724d3SBjoern A. Zeeb if (V_rsvp_on && ip->ip_p==IPPROTO_RSVP) 653f0068c4aSGarrett Wollman goto ours; 654f0068c4aSGarrett Wollman 655df8bae1dSRodney W. Grimes /* 656df8bae1dSRodney W. Grimes * Check our list of addresses, to see if the packet is for us. 657cc766e04SGarrett Wollman * If we don't have any addresses, assume any unicast packet 658cc766e04SGarrett Wollman * we receive might be for us (and let the upper layers deal 659cc766e04SGarrett Wollman * with it). 660df8bae1dSRodney W. Grimes */ 661d7c5a620SMatt Macy if (CK_STAILQ_EMPTY(&V_in_ifaddrhead) && 662cc766e04SGarrett Wollman (m->m_flags & (M_MCAST|M_BCAST)) == 0) 663cc766e04SGarrett Wollman goto ours; 664cc766e04SGarrett Wollman 6657538a9a0SJonathan Lemon /* 666823db0e9SDon Lewis * Enable a consistency check between the destination address 667823db0e9SDon Lewis * and the arrival interface for a unicast packet (the RFC 1122 668823db0e9SDon Lewis * strong ES model) if IP forwarding is disabled and the packet 669e15ae1b2SDon Lewis * is not locally generated and the packet is not subject to 670e15ae1b2SDon Lewis * 'ipfw fwd'. 6713f67c834SDon Lewis * 6723f67c834SDon Lewis * XXX - Checking also should be disabled if the destination 6733f67c834SDon Lewis * address is ipnat'ed to a different interface. 6743f67c834SDon Lewis * 675a8f12100SDon Lewis * XXX - Checking is incompatible with IP aliases added 6763f67c834SDon Lewis * to the loopback interface instead of the interface where 6773f67c834SDon Lewis * the packets are received. 678a9771948SGleb Smirnoff * 679a9771948SGleb Smirnoff * XXX - This is the case for carp vhost IPs as well so we 680a9771948SGleb Smirnoff * insert a workaround. If the packet got here, we already 681a9771948SGleb Smirnoff * checked with carp_iamatch() and carp_forus(). 682823db0e9SDon Lewis */ 683603724d3SBjoern A. Zeeb checkif = V_ip_checkinterface && (V_ipforwarding == 0) && 6840aade26eSRobert Watson ifp != NULL && ((ifp->if_flags & IFF_LOOPBACK) == 0) && 68554bfbd51SWill Andrews ifp->if_carp == NULL && (dchg == 0); 686823db0e9SDon Lewis 687ca925d9cSJonathan Lemon /* 688ca925d9cSJonathan Lemon * Check for exact addresses in the hash bucket. 689ca925d9cSJonathan Lemon */ 6901a5995ccSEugene Grosbein IN_IFADDR_RLOCK(&in_ifa_tracker); 6919b932e9eSAndre Oppermann LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) { 692f9e354dfSJulian Elischer /* 693823db0e9SDon Lewis * If the address matches, verify that the packet 694823db0e9SDon Lewis * arrived via the correct interface if checking is 695823db0e9SDon Lewis * enabled. 696f9e354dfSJulian Elischer */ 6979b932e9eSAndre Oppermann if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr && 6988c0fec80SRobert Watson (!checkif || ia->ia_ifp == ifp)) { 6997caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ipackets, 1); 7007caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ibytes, 7017caf4ab7SGleb Smirnoff m->m_pkthdr.len); 7021a5995ccSEugene Grosbein IN_IFADDR_RUNLOCK(&in_ifa_tracker); 703ed1ff184SJulian Elischer goto ours; 704ca925d9cSJonathan Lemon } 7058c0fec80SRobert Watson } 7061a5995ccSEugene Grosbein IN_IFADDR_RUNLOCK(&in_ifa_tracker); 7072d9cfabaSRobert Watson 708823db0e9SDon Lewis /* 709ca925d9cSJonathan Lemon * Check for broadcast addresses. 710ca925d9cSJonathan Lemon * 711ca925d9cSJonathan Lemon * Only accept broadcast packets that arrive via the matching 712ca925d9cSJonathan Lemon * interface. Reception of forwarded directed broadcasts would 713ca925d9cSJonathan Lemon * be handled via ip_forward() and ether_output() with the loopback 714ca925d9cSJonathan Lemon * into the stack for SIMPLEX interfaces handled by ether_output(). 715823db0e9SDon Lewis */ 7160aade26eSRobert Watson if (ifp != NULL && ifp->if_flags & IFF_BROADCAST) { 717d7c5a620SMatt Macy CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 718ca925d9cSJonathan Lemon if (ifa->ifa_addr->sa_family != AF_INET) 719ca925d9cSJonathan Lemon continue; 720ca925d9cSJonathan Lemon ia = ifatoia(ifa); 721df8bae1dSRodney W. Grimes if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr == 7220aade26eSRobert Watson ip->ip_dst.s_addr) { 7237caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ipackets, 1); 7247caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ibytes, 7257caf4ab7SGleb Smirnoff m->m_pkthdr.len); 726df8bae1dSRodney W. Grimes goto ours; 7270aade26eSRobert Watson } 7280ac40133SBrian Somers #ifdef BOOTP_COMPAT 7290aade26eSRobert Watson if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY) { 7307caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ipackets, 1); 7317caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ibytes, 7327caf4ab7SGleb Smirnoff m->m_pkthdr.len); 733ca925d9cSJonathan Lemon goto ours; 7340aade26eSRobert Watson } 7350ac40133SBrian Somers #endif 736df8bae1dSRodney W. Grimes } 73719e5b0a7SRobert Watson ia = NULL; 738df8bae1dSRodney W. Grimes } 739f8429ca2SBruce M Simpson /* RFC 3927 2.7: Do not forward datagrams for 169.254.0.0/16. */ 740f8429ca2SBruce M Simpson if (IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))) { 74186425c62SRobert Watson IPSTAT_INC(ips_cantforward); 742f8429ca2SBruce M Simpson m_freem(m); 743f8429ca2SBruce M Simpson return; 744f8429ca2SBruce M Simpson } 745df8bae1dSRodney W. Grimes if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { 746603724d3SBjoern A. Zeeb if (V_ip_mrouter) { 747df8bae1dSRodney W. Grimes /* 748df8bae1dSRodney W. Grimes * If we are acting as a multicast router, all 749df8bae1dSRodney W. Grimes * incoming multicast packets are passed to the 750df8bae1dSRodney W. Grimes * kernel-level multicast forwarding function. 751df8bae1dSRodney W. Grimes * The packet is returned (relatively) intact; if 752df8bae1dSRodney W. Grimes * ip_mforward() returns a non-zero value, the packet 753df8bae1dSRodney W. Grimes * must be discarded, else it may be accepted below. 754df8bae1dSRodney W. Grimes */ 7550aade26eSRobert Watson if (ip_mforward && ip_mforward(ip, ifp, m, 0) != 0) { 75686425c62SRobert Watson IPSTAT_INC(ips_cantforward); 757df8bae1dSRodney W. Grimes m_freem(m); 758c67b1d17SGarrett Wollman return; 759df8bae1dSRodney W. Grimes } 760df8bae1dSRodney W. Grimes 761df8bae1dSRodney W. Grimes /* 76211612afaSDima Dorfman * The process-level routing daemon needs to receive 763df8bae1dSRodney W. Grimes * all multicast IGMP packets, whether or not this 764df8bae1dSRodney W. Grimes * host belongs to their destination groups. 765df8bae1dSRodney W. Grimes */ 766df8bae1dSRodney W. Grimes if (ip->ip_p == IPPROTO_IGMP) 767df8bae1dSRodney W. Grimes goto ours; 76886425c62SRobert Watson IPSTAT_INC(ips_forward); 769df8bae1dSRodney W. Grimes } 770df8bae1dSRodney W. Grimes /* 771d10910e6SBruce M Simpson * Assume the packet is for us, to avoid prematurely taking 772d10910e6SBruce M Simpson * a lock on the in_multi hash. Protocols must perform 773d10910e6SBruce M Simpson * their own filtering and update statistics accordingly. 774df8bae1dSRodney W. Grimes */ 775df8bae1dSRodney W. Grimes goto ours; 776df8bae1dSRodney W. Grimes } 777df8bae1dSRodney W. Grimes if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST) 778df8bae1dSRodney W. Grimes goto ours; 779df8bae1dSRodney W. Grimes if (ip->ip_dst.s_addr == INADDR_ANY) 780df8bae1dSRodney W. Grimes goto ours; 781df8bae1dSRodney W. Grimes 7826a800098SYoshinobu Inoue /* 783df8bae1dSRodney W. Grimes * Not for us; forward if possible and desirable. 784df8bae1dSRodney W. Grimes */ 785603724d3SBjoern A. Zeeb if (V_ipforwarding == 0) { 78686425c62SRobert Watson IPSTAT_INC(ips_cantforward); 787df8bae1dSRodney W. Grimes m_freem(m); 788546f251bSChris D. Faulhaber } else { 7899b932e9eSAndre Oppermann ip_forward(m, dchg); 790546f251bSChris D. Faulhaber } 791c67b1d17SGarrett Wollman return; 792df8bae1dSRodney W. Grimes 793df8bae1dSRodney W. Grimes ours: 794d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH 795d0ebc0d2SYaroslav Tykhiy /* 796d0ebc0d2SYaroslav Tykhiy * IPSTEALTH: Process non-routing options only 797d0ebc0d2SYaroslav Tykhiy * if the packet is destined for us. 798d0ebc0d2SYaroslav Tykhiy */ 7997caf4ab7SGleb Smirnoff if (V_ipstealth && hlen > sizeof (struct ip) && ip_dooptions(m, 1)) 800d0ebc0d2SYaroslav Tykhiy return; 801d0ebc0d2SYaroslav Tykhiy #endif /* IPSTEALTH */ 802d0ebc0d2SYaroslav Tykhiy 80363f8d699SJordan K. Hubbard /* 804b6ea1aa5SRuslan Ermilov * Attempt reassembly; if it succeeds, proceed. 805ac9d7e26SMax Laier * ip_reass() will return a different mbuf. 806df8bae1dSRodney W. Grimes */ 8078f134647SGleb Smirnoff if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) { 808aa69c612SGleb Smirnoff /* XXXGL: shouldn't we save & set m_flags? */ 809f0cada84SAndre Oppermann m = ip_reass(m); 810f0cada84SAndre Oppermann if (m == NULL) 811c67b1d17SGarrett Wollman return; 8126a800098SYoshinobu Inoue ip = mtod(m, struct ip *); 8137e2df452SRuslan Ermilov /* Get the header length of the reassembled packet */ 81453be11f6SPoul-Henning Kamp hlen = ip->ip_hl << 2; 815f0cada84SAndre Oppermann } 816f0cada84SAndre Oppermann 817fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT) 818fcf59617SAndrey V. Elsukov if (IPSEC_ENABLED(ipv4)) { 819fcf59617SAndrey V. Elsukov if (IPSEC_INPUT(ipv4, m, hlen, ip->ip_p) != 0) 820fcf59617SAndrey V. Elsukov return; 821fcf59617SAndrey V. Elsukov } 822b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */ 82333841545SHajimu UMEMOTO 824df8bae1dSRodney W. Grimes /* 825df8bae1dSRodney W. Grimes * Switch out to protocol's input routine. 826df8bae1dSRodney W. Grimes */ 82786425c62SRobert Watson IPSTAT_INC(ips_delivered); 8289b932e9eSAndre Oppermann 8298f5a8818SKevin Lo (*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p); 830c67b1d17SGarrett Wollman return; 831df8bae1dSRodney W. Grimes bad: 832df8bae1dSRodney W. Grimes m_freem(m); 833c67b1d17SGarrett Wollman } 834c67b1d17SGarrett Wollman 835c67b1d17SGarrett Wollman /* 836df8bae1dSRodney W. Grimes * IP timer processing; 837df8bae1dSRodney W. Grimes * if a timer expires on a reassembly 838df8bae1dSRodney W. Grimes * queue, discard it. 839df8bae1dSRodney W. Grimes */ 840df8bae1dSRodney W. Grimes void 841f2565d68SRobert Watson ip_slowtimo(void) 842df8bae1dSRodney W. Grimes { 8438b615593SMarko Zec VNET_ITERATOR_DECL(vnet_iter); 844df8bae1dSRodney W. Grimes 8455ee847d3SRobert Watson VNET_LIST_RLOCK_NOSLEEP(); 8468b615593SMarko Zec VNET_FOREACH(vnet_iter) { 8478b615593SMarko Zec CURVNET_SET(vnet_iter); 8481dbefcc0SGleb Smirnoff ipreass_slowtimo(); 8498b615593SMarko Zec CURVNET_RESTORE(); 8508b615593SMarko Zec } 8515ee847d3SRobert Watson VNET_LIST_RUNLOCK_NOSLEEP(); 852df8bae1dSRodney W. Grimes } 853df8bae1dSRodney W. Grimes 8549802380eSBjoern A. Zeeb void 8559802380eSBjoern A. Zeeb ip_drain(void) 8569802380eSBjoern A. Zeeb { 8579802380eSBjoern A. Zeeb VNET_ITERATOR_DECL(vnet_iter); 8589802380eSBjoern A. Zeeb 8599802380eSBjoern A. Zeeb VNET_LIST_RLOCK_NOSLEEP(); 8609802380eSBjoern A. Zeeb VNET_FOREACH(vnet_iter) { 8619802380eSBjoern A. Zeeb CURVNET_SET(vnet_iter); 8621dbefcc0SGleb Smirnoff ipreass_drain(); 8638b615593SMarko Zec CURVNET_RESTORE(); 8648b615593SMarko Zec } 8655ee847d3SRobert Watson VNET_LIST_RUNLOCK_NOSLEEP(); 866df8bae1dSRodney W. Grimes } 867df8bae1dSRodney W. Grimes 868df8bae1dSRodney W. Grimes /* 869de38924dSAndre Oppermann * The protocol to be inserted into ip_protox[] must be already registered 870de38924dSAndre Oppermann * in inetsw[], either statically or through pf_proto_register(). 871de38924dSAndre Oppermann */ 872de38924dSAndre Oppermann int 8731b48d245SBjoern A. Zeeb ipproto_register(short ipproto) 874de38924dSAndre Oppermann { 875de38924dSAndre Oppermann struct protosw *pr; 876de38924dSAndre Oppermann 877de38924dSAndre Oppermann /* Sanity checks. */ 8781b48d245SBjoern A. Zeeb if (ipproto <= 0 || ipproto >= IPPROTO_MAX) 879de38924dSAndre Oppermann return (EPROTONOSUPPORT); 880de38924dSAndre Oppermann 881de38924dSAndre Oppermann /* 882de38924dSAndre Oppermann * The protocol slot must not be occupied by another protocol 883de38924dSAndre Oppermann * already. An index pointing to IPPROTO_RAW is unused. 884de38924dSAndre Oppermann */ 885de38924dSAndre Oppermann pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 886de38924dSAndre Oppermann if (pr == NULL) 887de38924dSAndre Oppermann return (EPFNOSUPPORT); 888de38924dSAndre Oppermann if (ip_protox[ipproto] != pr - inetsw) /* IPPROTO_RAW */ 889de38924dSAndre Oppermann return (EEXIST); 890de38924dSAndre Oppermann 891de38924dSAndre Oppermann /* Find the protocol position in inetsw[] and set the index. */ 892de38924dSAndre Oppermann for (pr = inetdomain.dom_protosw; 893de38924dSAndre Oppermann pr < inetdomain.dom_protoswNPROTOSW; pr++) { 894de38924dSAndre Oppermann if (pr->pr_domain->dom_family == PF_INET && 895de38924dSAndre Oppermann pr->pr_protocol && pr->pr_protocol == ipproto) { 896de38924dSAndre Oppermann ip_protox[pr->pr_protocol] = pr - inetsw; 897de38924dSAndre Oppermann return (0); 898de38924dSAndre Oppermann } 899de38924dSAndre Oppermann } 900de38924dSAndre Oppermann return (EPROTONOSUPPORT); 901de38924dSAndre Oppermann } 902de38924dSAndre Oppermann 903de38924dSAndre Oppermann int 9041b48d245SBjoern A. Zeeb ipproto_unregister(short ipproto) 905de38924dSAndre Oppermann { 906de38924dSAndre Oppermann struct protosw *pr; 907de38924dSAndre Oppermann 908de38924dSAndre Oppermann /* Sanity checks. */ 9091b48d245SBjoern A. Zeeb if (ipproto <= 0 || ipproto >= IPPROTO_MAX) 910de38924dSAndre Oppermann return (EPROTONOSUPPORT); 911de38924dSAndre Oppermann 912de38924dSAndre Oppermann /* Check if the protocol was indeed registered. */ 913de38924dSAndre Oppermann pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 914de38924dSAndre Oppermann if (pr == NULL) 915de38924dSAndre Oppermann return (EPFNOSUPPORT); 916de38924dSAndre Oppermann if (ip_protox[ipproto] == pr - inetsw) /* IPPROTO_RAW */ 917de38924dSAndre Oppermann return (ENOENT); 918de38924dSAndre Oppermann 919de38924dSAndre Oppermann /* Reset the protocol slot to IPPROTO_RAW. */ 920de38924dSAndre Oppermann ip_protox[ipproto] = pr - inetsw; 921de38924dSAndre Oppermann return (0); 922de38924dSAndre Oppermann } 923de38924dSAndre Oppermann 924df8bae1dSRodney W. Grimes u_char inetctlerrmap[PRC_NCMDS] = { 925df8bae1dSRodney W. Grimes 0, 0, 0, 0, 926df8bae1dSRodney W. Grimes 0, EMSGSIZE, EHOSTDOWN, EHOSTUNREACH, 927df8bae1dSRodney W. Grimes EHOSTUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED, 928df8bae1dSRodney W. Grimes EMSGSIZE, EHOSTUNREACH, 0, 0, 929fcaf9f91SMike Silbersack 0, 0, EHOSTUNREACH, 0, 9303b8123b7SJesper Skriver ENOPROTOOPT, ECONNREFUSED 931df8bae1dSRodney W. Grimes }; 932df8bae1dSRodney W. Grimes 933df8bae1dSRodney W. Grimes /* 934df8bae1dSRodney W. Grimes * Forward a packet. If some error occurs return the sender 935df8bae1dSRodney W. Grimes * an icmp packet. Note we can't always generate a meaningful 936df8bae1dSRodney W. Grimes * icmp message because icmp doesn't have a large enough repertoire 937df8bae1dSRodney W. Grimes * of codes and types. 938df8bae1dSRodney W. Grimes * 939df8bae1dSRodney W. Grimes * If not forwarding, just drop the packet. This could be confusing 940df8bae1dSRodney W. Grimes * if ipforwarding was zero but some routing protocol was advancing 941df8bae1dSRodney W. Grimes * us as a gateway to somewhere. However, we must let the routing 942df8bae1dSRodney W. Grimes * protocol deal with that. 943df8bae1dSRodney W. Grimes * 944df8bae1dSRodney W. Grimes * The srcrt parameter indicates whether the packet is being forwarded 945df8bae1dSRodney W. Grimes * via a source route. 946df8bae1dSRodney W. Grimes */ 9479b932e9eSAndre Oppermann void 9489b932e9eSAndre Oppermann ip_forward(struct mbuf *m, int srcrt) 949df8bae1dSRodney W. Grimes { 9502b25acc1SLuigi Rizzo struct ip *ip = mtod(m, struct ip *); 951efbad259SEdward Tomasz Napierala struct in_ifaddr *ia; 952df8bae1dSRodney W. Grimes struct mbuf *mcopy; 953d14122b0SErmal Luçi struct sockaddr_in *sin; 9549b932e9eSAndre Oppermann struct in_addr dest; 955b835b6feSBjoern A. Zeeb struct route ro; 9564043ee3cSAlexander V. Chernikov uint32_t flowid; 957c773494eSAndre Oppermann int error, type = 0, code = 0, mtu = 0; 9583efc3014SJulian Elischer 959b8a6e03fSGleb Smirnoff NET_EPOCH_ASSERT(); 960b8a6e03fSGleb Smirnoff 9619b932e9eSAndre Oppermann if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) { 96286425c62SRobert Watson IPSTAT_INC(ips_cantforward); 963df8bae1dSRodney W. Grimes m_freem(m); 964df8bae1dSRodney W. Grimes return; 965df8bae1dSRodney W. Grimes } 966fcf59617SAndrey V. Elsukov if ( 967fcf59617SAndrey V. Elsukov #ifdef IPSTEALTH 968fcf59617SAndrey V. Elsukov V_ipstealth == 0 && 969fcf59617SAndrey V. Elsukov #endif 970fcf59617SAndrey V. Elsukov ip->ip_ttl <= IPTTLDEC) { 971fcf59617SAndrey V. Elsukov icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, 0); 9728922ddbeSAndrey V. Elsukov return; 9738922ddbeSAndrey V. Elsukov } 974df8bae1dSRodney W. Grimes 975d14122b0SErmal Luçi bzero(&ro, sizeof(ro)); 976d14122b0SErmal Luçi sin = (struct sockaddr_in *)&ro.ro_dst; 977d14122b0SErmal Luçi sin->sin_family = AF_INET; 978d14122b0SErmal Luçi sin->sin_len = sizeof(*sin); 979d14122b0SErmal Luçi sin->sin_addr = ip->ip_dst; 9804043ee3cSAlexander V. Chernikov flowid = m->m_pkthdr.flowid; 9814043ee3cSAlexander V. Chernikov ro.ro_nh = fib4_lookup(M_GETFIB(m), ip->ip_dst, 0, NHR_REF, flowid); 982983066f0SAlexander V. Chernikov if (ro.ro_nh != NULL) { 983983066f0SAlexander V. Chernikov ia = ifatoia(ro.ro_nh->nh_ifa); 98456844a62SErmal Luçi } else 98556844a62SErmal Luçi ia = NULL; 986df8bae1dSRodney W. Grimes /* 987bfef7ed4SIan Dowse * Save the IP header and at most 8 bytes of the payload, 988bfef7ed4SIan Dowse * in case we need to generate an ICMP message to the src. 989bfef7ed4SIan Dowse * 9904d2e3692SLuigi Rizzo * XXX this can be optimized a lot by saving the data in a local 9914d2e3692SLuigi Rizzo * buffer on the stack (72 bytes at most), and only allocating the 9924d2e3692SLuigi Rizzo * mbuf if really necessary. The vast majority of the packets 9934d2e3692SLuigi Rizzo * are forwarded without having to send an ICMP back (either 9944d2e3692SLuigi Rizzo * because unnecessary, or because rate limited), so we are 9954d2e3692SLuigi Rizzo * really we are wasting a lot of work here. 9964d2e3692SLuigi Rizzo * 997c3bef61eSKevin Lo * We don't use m_copym() because it might return a reference 998bfef7ed4SIan Dowse * to a shared cluster. Both this function and ip_output() 999bfef7ed4SIan Dowse * assume exclusive access to the IP header in `m', so any 1000bfef7ed4SIan Dowse * data in a cluster may change before we reach icmp_error(). 1001df8bae1dSRodney W. Grimes */ 1002dc4ad05eSGleb Smirnoff mcopy = m_gethdr(M_NOWAIT, m->m_type); 1003eb1b1807SGleb Smirnoff if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_NOWAIT)) { 10049967cafcSSam Leffler /* 10059967cafcSSam Leffler * It's probably ok if the pkthdr dup fails (because 10069967cafcSSam Leffler * the deep copy of the tag chain failed), but for now 10079967cafcSSam Leffler * be conservative and just discard the copy since 10089967cafcSSam Leffler * code below may some day want the tags. 10099967cafcSSam Leffler */ 10109967cafcSSam Leffler m_free(mcopy); 10119967cafcSSam Leffler mcopy = NULL; 10129967cafcSSam Leffler } 1013bfef7ed4SIan Dowse if (mcopy != NULL) { 10148f134647SGleb Smirnoff mcopy->m_len = min(ntohs(ip->ip_len), M_TRAILINGSPACE(mcopy)); 1015e6b0a570SBruce M Simpson mcopy->m_pkthdr.len = mcopy->m_len; 1016bfef7ed4SIan Dowse m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t)); 1017bfef7ed4SIan Dowse } 101804287599SRuslan Ermilov #ifdef IPSTEALTH 1019fcf59617SAndrey V. Elsukov if (V_ipstealth == 0) 102004287599SRuslan Ermilov #endif 102104287599SRuslan Ermilov ip->ip_ttl -= IPTTLDEC; 1022fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT) 1023fcf59617SAndrey V. Elsukov if (IPSEC_ENABLED(ipv4)) { 1024fcf59617SAndrey V. Elsukov if ((error = IPSEC_FORWARD(ipv4, m)) != 0) { 1025fcf59617SAndrey V. Elsukov /* mbuf consumed by IPsec */ 1026d16a2e47SMark Johnston RO_NHFREE(&ro); 1027fcf59617SAndrey V. Elsukov m_freem(mcopy); 1028fcf59617SAndrey V. Elsukov if (error != EINPROGRESS) 1029fcf59617SAndrey V. Elsukov IPSTAT_INC(ips_cantforward); 1030b8a6e03fSGleb Smirnoff return; 103104287599SRuslan Ermilov } 1032fcf59617SAndrey V. Elsukov /* No IPsec processing required */ 1033fcf59617SAndrey V. Elsukov } 1034fcf59617SAndrey V. Elsukov #endif /* IPSEC */ 1035df8bae1dSRodney W. Grimes /* 1036df8bae1dSRodney W. Grimes * If forwarding packet using same interface that it came in on, 1037df8bae1dSRodney W. Grimes * perhaps should send a redirect to sender to shortcut a hop. 1038df8bae1dSRodney W. Grimes * Only send redirect if source is sending directly to us, 1039df8bae1dSRodney W. Grimes * and if packet was not source routed (or has any options). 1040df8bae1dSRodney W. Grimes * Also, don't send redirect if forwarding using a default route 1041df8bae1dSRodney W. Grimes * or a route modified by a redirect. 1042df8bae1dSRodney W. Grimes */ 10439b932e9eSAndre Oppermann dest.s_addr = 0; 1044efbad259SEdward Tomasz Napierala if (!srcrt && V_ipsendredirects && 1045efbad259SEdward Tomasz Napierala ia != NULL && ia->ia_ifp == m->m_pkthdr.rcvif) { 1046983066f0SAlexander V. Chernikov struct nhop_object *nh; 104702c1c707SAndre Oppermann 1048983066f0SAlexander V. Chernikov nh = ro.ro_nh; 104902c1c707SAndre Oppermann 1050983066f0SAlexander V. Chernikov if (nh != NULL && ((nh->nh_flags & (NHF_REDIRECT|NHF_DEFAULT)) == 0)) { 1051983066f0SAlexander V. Chernikov struct in_ifaddr *nh_ia = (struct in_ifaddr *)(nh->nh_ifa); 1052df8bae1dSRodney W. Grimes u_long src = ntohl(ip->ip_src.s_addr); 1053df8bae1dSRodney W. Grimes 1054983066f0SAlexander V. Chernikov if (nh_ia != NULL && 1055983066f0SAlexander V. Chernikov (src & nh_ia->ia_subnetmask) == nh_ia->ia_subnet) { 1056983066f0SAlexander V. Chernikov if (nh->nh_flags & NHF_GATEWAY) 1057983066f0SAlexander V. Chernikov dest.s_addr = nh->gw4_sa.sin_addr.s_addr; 1058df8bae1dSRodney W. Grimes else 10599b932e9eSAndre Oppermann dest.s_addr = ip->ip_dst.s_addr; 1060df8bae1dSRodney W. Grimes /* Router requirements says to only send host redirects */ 1061df8bae1dSRodney W. Grimes type = ICMP_REDIRECT; 1062df8bae1dSRodney W. Grimes code = ICMP_REDIRECT_HOST; 1063df8bae1dSRodney W. Grimes } 1064df8bae1dSRodney W. Grimes } 106502c1c707SAndre Oppermann } 1066df8bae1dSRodney W. Grimes 1067b835b6feSBjoern A. Zeeb error = ip_output(m, NULL, &ro, IP_FORWARDING, NULL, NULL); 1068b835b6feSBjoern A. Zeeb 1069983066f0SAlexander V. Chernikov if (error == EMSGSIZE && ro.ro_nh) 1070983066f0SAlexander V. Chernikov mtu = ro.ro_nh->nh_mtu; 1071983066f0SAlexander V. Chernikov RO_NHFREE(&ro); 1072b835b6feSBjoern A. Zeeb 1073df8bae1dSRodney W. Grimes if (error) 107486425c62SRobert Watson IPSTAT_INC(ips_cantforward); 1075df8bae1dSRodney W. Grimes else { 107686425c62SRobert Watson IPSTAT_INC(ips_forward); 1077df8bae1dSRodney W. Grimes if (type) 107886425c62SRobert Watson IPSTAT_INC(ips_redirectsent); 1079df8bae1dSRodney W. Grimes else { 10809188b4a1SAndre Oppermann if (mcopy) 1081df8bae1dSRodney W. Grimes m_freem(mcopy); 1082b8a6e03fSGleb Smirnoff return; 1083df8bae1dSRodney W. Grimes } 1084df8bae1dSRodney W. Grimes } 10854f6c66ccSMatt Macy if (mcopy == NULL) 1086b8a6e03fSGleb Smirnoff return; 10874f6c66ccSMatt Macy 1088df8bae1dSRodney W. Grimes switch (error) { 1089df8bae1dSRodney W. Grimes case 0: /* forwarded, but need redirect */ 1090df8bae1dSRodney W. Grimes /* type, code set above */ 1091df8bae1dSRodney W. Grimes break; 1092df8bae1dSRodney W. Grimes 1093efbad259SEdward Tomasz Napierala case ENETUNREACH: 1094df8bae1dSRodney W. Grimes case EHOSTUNREACH: 1095df8bae1dSRodney W. Grimes case ENETDOWN: 1096df8bae1dSRodney W. Grimes case EHOSTDOWN: 1097df8bae1dSRodney W. Grimes default: 1098df8bae1dSRodney W. Grimes type = ICMP_UNREACH; 1099df8bae1dSRodney W. Grimes code = ICMP_UNREACH_HOST; 1100df8bae1dSRodney W. Grimes break; 1101df8bae1dSRodney W. Grimes 1102df8bae1dSRodney W. Grimes case EMSGSIZE: 1103df8bae1dSRodney W. Grimes type = ICMP_UNREACH; 1104df8bae1dSRodney W. Grimes code = ICMP_UNREACH_NEEDFRAG; 11059b932e9eSAndre Oppermann /* 1106b835b6feSBjoern A. Zeeb * If the MTU was set before make sure we are below the 1107b835b6feSBjoern A. Zeeb * interface MTU. 1108ab48768bSAndre Oppermann * If the MTU wasn't set before use the interface mtu or 1109ab48768bSAndre Oppermann * fall back to the next smaller mtu step compared to the 1110ab48768bSAndre Oppermann * current packet size. 11119b932e9eSAndre Oppermann */ 1112b835b6feSBjoern A. Zeeb if (mtu != 0) { 1113b835b6feSBjoern A. Zeeb if (ia != NULL) 1114b835b6feSBjoern A. Zeeb mtu = min(mtu, ia->ia_ifp->if_mtu); 1115b835b6feSBjoern A. Zeeb } else { 1116ab48768bSAndre Oppermann if (ia != NULL) 1117c773494eSAndre Oppermann mtu = ia->ia_ifp->if_mtu; 1118ab48768bSAndre Oppermann else 11198f134647SGleb Smirnoff mtu = ip_next_mtu(ntohs(ip->ip_len), 0); 1120ab48768bSAndre Oppermann } 112186425c62SRobert Watson IPSTAT_INC(ips_cantfrag); 1122df8bae1dSRodney W. Grimes break; 1123df8bae1dSRodney W. Grimes 1124df8bae1dSRodney W. Grimes case ENOBUFS: 11253a06e3e0SRuslan Ermilov case EACCES: /* ipfw denied packet */ 11263a06e3e0SRuslan Ermilov m_freem(mcopy); 1127b8a6e03fSGleb Smirnoff return; 1128df8bae1dSRodney W. Grimes } 1129c773494eSAndre Oppermann icmp_error(mcopy, type, code, dest.s_addr, mtu); 1130df8bae1dSRodney W. Grimes } 1131df8bae1dSRodney W. Grimes 1132339efd75SMaxim Sobolev #define CHECK_SO_CT(sp, ct) \ 1133339efd75SMaxim Sobolev (((sp->so_options & SO_TIMESTAMP) && (sp->so_ts_clock == ct)) ? 1 : 0) 1134339efd75SMaxim Sobolev 113582c23ebaSBill Fenner void 1136f2565d68SRobert Watson ip_savecontrol(struct inpcb *inp, struct mbuf **mp, struct ip *ip, 1137f2565d68SRobert Watson struct mbuf *m) 113882c23ebaSBill Fenner { 113906193f0bSKonstantin Belousov bool stamped; 11408b615593SMarko Zec 114106193f0bSKonstantin Belousov stamped = false; 1142339efd75SMaxim Sobolev if ((inp->inp_socket->so_options & SO_BINTIME) || 1143339efd75SMaxim Sobolev CHECK_SO_CT(inp->inp_socket, SO_TS_BINTIME)) { 114406193f0bSKonstantin Belousov struct bintime boottimebin, bt; 114506193f0bSKonstantin Belousov struct timespec ts1; 1146be8a62e8SPoul-Henning Kamp 114706193f0bSKonstantin Belousov if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR | 114806193f0bSKonstantin Belousov M_TSTMP)) { 114906193f0bSKonstantin Belousov mbuf_tstmp2timespec(m, &ts1); 115006193f0bSKonstantin Belousov timespec2bintime(&ts1, &bt); 115106193f0bSKonstantin Belousov getboottimebin(&boottimebin); 115206193f0bSKonstantin Belousov bintime_add(&bt, &boottimebin); 115306193f0bSKonstantin Belousov } else { 1154be8a62e8SPoul-Henning Kamp bintime(&bt); 115506193f0bSKonstantin Belousov } 1156be8a62e8SPoul-Henning Kamp *mp = sbcreatecontrol((caddr_t)&bt, sizeof(bt), 1157be8a62e8SPoul-Henning Kamp SCM_BINTIME, SOL_SOCKET); 115806193f0bSKonstantin Belousov if (*mp != NULL) { 1159be8a62e8SPoul-Henning Kamp mp = &(*mp)->m_next; 116006193f0bSKonstantin Belousov stamped = true; 116106193f0bSKonstantin Belousov } 1162be8a62e8SPoul-Henning Kamp } 1163339efd75SMaxim Sobolev if (CHECK_SO_CT(inp->inp_socket, SO_TS_REALTIME_MICRO)) { 116406193f0bSKonstantin Belousov struct bintime boottimebin, bt1; 1165c012cfe6SEd Maste struct timespec ts1; 116682c23ebaSBill Fenner struct timeval tv; 116782c23ebaSBill Fenner 116806193f0bSKonstantin Belousov if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR | 116906193f0bSKonstantin Belousov M_TSTMP)) { 117006193f0bSKonstantin Belousov mbuf_tstmp2timespec(m, &ts1); 117106193f0bSKonstantin Belousov timespec2bintime(&ts1, &bt1); 117206193f0bSKonstantin Belousov getboottimebin(&boottimebin); 117306193f0bSKonstantin Belousov bintime_add(&bt1, &boottimebin); 117406193f0bSKonstantin Belousov bintime2timeval(&bt1, &tv); 117506193f0bSKonstantin Belousov } else { 1176339efd75SMaxim Sobolev microtime(&tv); 117706193f0bSKonstantin Belousov } 117882c23ebaSBill Fenner *mp = sbcreatecontrol((caddr_t)&tv, sizeof(tv), 117982c23ebaSBill Fenner SCM_TIMESTAMP, SOL_SOCKET); 118006193f0bSKonstantin Belousov if (*mp != NULL) { 118182c23ebaSBill Fenner mp = &(*mp)->m_next; 118206193f0bSKonstantin Belousov stamped = true; 118306193f0bSKonstantin Belousov } 1184339efd75SMaxim Sobolev } else if (CHECK_SO_CT(inp->inp_socket, SO_TS_REALTIME)) { 118506193f0bSKonstantin Belousov struct bintime boottimebin; 118606193f0bSKonstantin Belousov struct timespec ts, ts1; 1187339efd75SMaxim Sobolev 118806193f0bSKonstantin Belousov if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR | 118906193f0bSKonstantin Belousov M_TSTMP)) { 119006193f0bSKonstantin Belousov mbuf_tstmp2timespec(m, &ts); 119106193f0bSKonstantin Belousov getboottimebin(&boottimebin); 119206193f0bSKonstantin Belousov bintime2timespec(&boottimebin, &ts1); 11936040822cSAlan Somers timespecadd(&ts, &ts1, &ts); 119406193f0bSKonstantin Belousov } else { 1195339efd75SMaxim Sobolev nanotime(&ts); 119606193f0bSKonstantin Belousov } 1197339efd75SMaxim Sobolev *mp = sbcreatecontrol((caddr_t)&ts, sizeof(ts), 1198339efd75SMaxim Sobolev SCM_REALTIME, SOL_SOCKET); 119906193f0bSKonstantin Belousov if (*mp != NULL) { 1200339efd75SMaxim Sobolev mp = &(*mp)->m_next; 120106193f0bSKonstantin Belousov stamped = true; 120206193f0bSKonstantin Belousov } 1203339efd75SMaxim Sobolev } else if (CHECK_SO_CT(inp->inp_socket, SO_TS_MONOTONIC)) { 1204339efd75SMaxim Sobolev struct timespec ts; 1205339efd75SMaxim Sobolev 120606193f0bSKonstantin Belousov if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR | 120706193f0bSKonstantin Belousov M_TSTMP)) 120806193f0bSKonstantin Belousov mbuf_tstmp2timespec(m, &ts); 120906193f0bSKonstantin Belousov else 1210339efd75SMaxim Sobolev nanouptime(&ts); 1211339efd75SMaxim Sobolev *mp = sbcreatecontrol((caddr_t)&ts, sizeof(ts), 1212339efd75SMaxim Sobolev SCM_MONOTONIC, SOL_SOCKET); 121306193f0bSKonstantin Belousov if (*mp != NULL) { 121406193f0bSKonstantin Belousov mp = &(*mp)->m_next; 121506193f0bSKonstantin Belousov stamped = true; 121606193f0bSKonstantin Belousov } 121706193f0bSKonstantin Belousov } 121806193f0bSKonstantin Belousov if (stamped && (m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR | 121906193f0bSKonstantin Belousov M_TSTMP)) { 122006193f0bSKonstantin Belousov struct sock_timestamp_info sti; 122106193f0bSKonstantin Belousov 122206193f0bSKonstantin Belousov bzero(&sti, sizeof(sti)); 122306193f0bSKonstantin Belousov sti.st_info_flags = ST_INFO_HW; 122406193f0bSKonstantin Belousov if ((m->m_flags & M_TSTMP_HPREC) != 0) 122506193f0bSKonstantin Belousov sti.st_info_flags |= ST_INFO_HW_HPREC; 122606193f0bSKonstantin Belousov *mp = sbcreatecontrol((caddr_t)&sti, sizeof(sti), SCM_TIME_INFO, 122706193f0bSKonstantin Belousov SOL_SOCKET); 122806193f0bSKonstantin Belousov if (*mp != NULL) 1229339efd75SMaxim Sobolev mp = &(*mp)->m_next; 1230be8a62e8SPoul-Henning Kamp } 123182c23ebaSBill Fenner if (inp->inp_flags & INP_RECVDSTADDR) { 123282c23ebaSBill Fenner *mp = sbcreatecontrol((caddr_t)&ip->ip_dst, 123382c23ebaSBill Fenner sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP); 123482c23ebaSBill Fenner if (*mp) 123582c23ebaSBill Fenner mp = &(*mp)->m_next; 123682c23ebaSBill Fenner } 12374957466bSMatthew N. Dodd if (inp->inp_flags & INP_RECVTTL) { 12384957466bSMatthew N. Dodd *mp = sbcreatecontrol((caddr_t)&ip->ip_ttl, 12394957466bSMatthew N. Dodd sizeof(u_char), IP_RECVTTL, IPPROTO_IP); 12404957466bSMatthew N. Dodd if (*mp) 12414957466bSMatthew N. Dodd mp = &(*mp)->m_next; 12424957466bSMatthew N. Dodd } 124382c23ebaSBill Fenner #ifdef notyet 124482c23ebaSBill Fenner /* XXX 124582c23ebaSBill Fenner * Moving these out of udp_input() made them even more broken 124682c23ebaSBill Fenner * than they already were. 124782c23ebaSBill Fenner */ 124882c23ebaSBill Fenner /* options were tossed already */ 124982c23ebaSBill Fenner if (inp->inp_flags & INP_RECVOPTS) { 125082c23ebaSBill Fenner *mp = sbcreatecontrol((caddr_t)opts_deleted_above, 125182c23ebaSBill Fenner sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP); 125282c23ebaSBill Fenner if (*mp) 125382c23ebaSBill Fenner mp = &(*mp)->m_next; 125482c23ebaSBill Fenner } 125582c23ebaSBill Fenner /* ip_srcroute doesn't do what we want here, need to fix */ 125682c23ebaSBill Fenner if (inp->inp_flags & INP_RECVRETOPTS) { 1257e0982661SAndre Oppermann *mp = sbcreatecontrol((caddr_t)ip_srcroute(m), 125882c23ebaSBill Fenner sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP); 125982c23ebaSBill Fenner if (*mp) 126082c23ebaSBill Fenner mp = &(*mp)->m_next; 126182c23ebaSBill Fenner } 126282c23ebaSBill Fenner #endif 126382c23ebaSBill Fenner if (inp->inp_flags & INP_RECVIF) { 1264d314ad7bSJulian Elischer struct ifnet *ifp; 1265d314ad7bSJulian Elischer struct sdlbuf { 126682c23ebaSBill Fenner struct sockaddr_dl sdl; 1267d314ad7bSJulian Elischer u_char pad[32]; 1268d314ad7bSJulian Elischer } sdlbuf; 1269d314ad7bSJulian Elischer struct sockaddr_dl *sdp; 1270d314ad7bSJulian Elischer struct sockaddr_dl *sdl2 = &sdlbuf.sdl; 127182c23ebaSBill Fenner 127246f2df9cSSergey Kandaurov if ((ifp = m->m_pkthdr.rcvif) && 127346f2df9cSSergey Kandaurov ifp->if_index && ifp->if_index <= V_if_index) { 12744a0d6638SRuslan Ermilov sdp = (struct sockaddr_dl *)ifp->if_addr->ifa_addr; 1275d314ad7bSJulian Elischer /* 1276d314ad7bSJulian Elischer * Change our mind and don't try copy. 1277d314ad7bSJulian Elischer */ 127846f2df9cSSergey Kandaurov if (sdp->sdl_family != AF_LINK || 127946f2df9cSSergey Kandaurov sdp->sdl_len > sizeof(sdlbuf)) { 1280d314ad7bSJulian Elischer goto makedummy; 1281d314ad7bSJulian Elischer } 1282d314ad7bSJulian Elischer bcopy(sdp, sdl2, sdp->sdl_len); 1283d314ad7bSJulian Elischer } else { 1284d314ad7bSJulian Elischer makedummy: 128546f2df9cSSergey Kandaurov sdl2->sdl_len = 128646f2df9cSSergey Kandaurov offsetof(struct sockaddr_dl, sdl_data[0]); 1287d314ad7bSJulian Elischer sdl2->sdl_family = AF_LINK; 1288d314ad7bSJulian Elischer sdl2->sdl_index = 0; 1289d314ad7bSJulian Elischer sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0; 1290d314ad7bSJulian Elischer } 1291d314ad7bSJulian Elischer *mp = sbcreatecontrol((caddr_t)sdl2, sdl2->sdl_len, 129282c23ebaSBill Fenner IP_RECVIF, IPPROTO_IP); 129382c23ebaSBill Fenner if (*mp) 129482c23ebaSBill Fenner mp = &(*mp)->m_next; 129582c23ebaSBill Fenner } 12963cca425bSMichael Tuexen if (inp->inp_flags & INP_RECVTOS) { 12973cca425bSMichael Tuexen *mp = sbcreatecontrol((caddr_t)&ip->ip_tos, 12983cca425bSMichael Tuexen sizeof(u_char), IP_RECVTOS, IPPROTO_IP); 12993cca425bSMichael Tuexen if (*mp) 13003cca425bSMichael Tuexen mp = &(*mp)->m_next; 13013cca425bSMichael Tuexen } 13029d3ddf43SAdrian Chadd 13039d3ddf43SAdrian Chadd if (inp->inp_flags2 & INP_RECVFLOWID) { 13049d3ddf43SAdrian Chadd uint32_t flowid, flow_type; 13059d3ddf43SAdrian Chadd 13069d3ddf43SAdrian Chadd flowid = m->m_pkthdr.flowid; 13079d3ddf43SAdrian Chadd flow_type = M_HASHTYPE_GET(m); 13089d3ddf43SAdrian Chadd 13099d3ddf43SAdrian Chadd /* 13109d3ddf43SAdrian Chadd * XXX should handle the failure of one or the 13119d3ddf43SAdrian Chadd * other - don't populate both? 13129d3ddf43SAdrian Chadd */ 13139d3ddf43SAdrian Chadd *mp = sbcreatecontrol((caddr_t) &flowid, 13149d3ddf43SAdrian Chadd sizeof(uint32_t), IP_FLOWID, IPPROTO_IP); 13159d3ddf43SAdrian Chadd if (*mp) 13169d3ddf43SAdrian Chadd mp = &(*mp)->m_next; 13179d3ddf43SAdrian Chadd *mp = sbcreatecontrol((caddr_t) &flow_type, 13189d3ddf43SAdrian Chadd sizeof(uint32_t), IP_FLOWTYPE, IPPROTO_IP); 13199d3ddf43SAdrian Chadd if (*mp) 13209d3ddf43SAdrian Chadd mp = &(*mp)->m_next; 13219d3ddf43SAdrian Chadd } 13229d3ddf43SAdrian Chadd 13239d3ddf43SAdrian Chadd #ifdef RSS 13249d3ddf43SAdrian Chadd if (inp->inp_flags2 & INP_RECVRSSBUCKETID) { 13259d3ddf43SAdrian Chadd uint32_t flowid, flow_type; 13269d3ddf43SAdrian Chadd uint32_t rss_bucketid; 13279d3ddf43SAdrian Chadd 13289d3ddf43SAdrian Chadd flowid = m->m_pkthdr.flowid; 13299d3ddf43SAdrian Chadd flow_type = M_HASHTYPE_GET(m); 13309d3ddf43SAdrian Chadd 13319d3ddf43SAdrian Chadd if (rss_hash2bucket(flowid, flow_type, &rss_bucketid) == 0) { 13329d3ddf43SAdrian Chadd *mp = sbcreatecontrol((caddr_t) &rss_bucketid, 13339d3ddf43SAdrian Chadd sizeof(uint32_t), IP_RSSBUCKETID, IPPROTO_IP); 13349d3ddf43SAdrian Chadd if (*mp) 13359d3ddf43SAdrian Chadd mp = &(*mp)->m_next; 13369d3ddf43SAdrian Chadd } 13379d3ddf43SAdrian Chadd } 13389d3ddf43SAdrian Chadd #endif 133982c23ebaSBill Fenner } 134082c23ebaSBill Fenner 13414d2e3692SLuigi Rizzo /* 134230916a2dSRobert Watson * XXXRW: Multicast routing code in ip_mroute.c is generally MPSAFE, but the 134330916a2dSRobert Watson * ip_rsvp and ip_rsvp_on variables need to be interlocked with rsvp_on 134430916a2dSRobert Watson * locking. This code remains in ip_input.c as ip_mroute.c is optionally 134530916a2dSRobert Watson * compiled. 13464d2e3692SLuigi Rizzo */ 13475f901c92SAndrew Turner VNET_DEFINE_STATIC(int, ip_rsvp_on); 134882cea7e6SBjoern A. Zeeb VNET_DEFINE(struct socket *, ip_rsvpd); 134982cea7e6SBjoern A. Zeeb 135082cea7e6SBjoern A. Zeeb #define V_ip_rsvp_on VNET(ip_rsvp_on) 135182cea7e6SBjoern A. Zeeb 1352df8bae1dSRodney W. Grimes int 1353f0068c4aSGarrett Wollman ip_rsvp_init(struct socket *so) 1354f0068c4aSGarrett Wollman { 13558b615593SMarko Zec 1356f0068c4aSGarrett Wollman if (so->so_type != SOCK_RAW || 1357f0068c4aSGarrett Wollman so->so_proto->pr_protocol != IPPROTO_RSVP) 1358f0068c4aSGarrett Wollman return EOPNOTSUPP; 1359f0068c4aSGarrett Wollman 1360603724d3SBjoern A. Zeeb if (V_ip_rsvpd != NULL) 1361f0068c4aSGarrett Wollman return EADDRINUSE; 1362f0068c4aSGarrett Wollman 1363603724d3SBjoern A. Zeeb V_ip_rsvpd = so; 13641c5de19aSGarrett Wollman /* 13651c5de19aSGarrett Wollman * This may seem silly, but we need to be sure we don't over-increment 13661c5de19aSGarrett Wollman * the RSVP counter, in case something slips up. 13671c5de19aSGarrett Wollman */ 1368603724d3SBjoern A. Zeeb if (!V_ip_rsvp_on) { 1369603724d3SBjoern A. Zeeb V_ip_rsvp_on = 1; 1370603724d3SBjoern A. Zeeb V_rsvp_on++; 13711c5de19aSGarrett Wollman } 1372f0068c4aSGarrett Wollman 1373f0068c4aSGarrett Wollman return 0; 1374f0068c4aSGarrett Wollman } 1375f0068c4aSGarrett Wollman 1376f0068c4aSGarrett Wollman int 1377f0068c4aSGarrett Wollman ip_rsvp_done(void) 1378f0068c4aSGarrett Wollman { 13798b615593SMarko Zec 1380603724d3SBjoern A. Zeeb V_ip_rsvpd = NULL; 13811c5de19aSGarrett Wollman /* 13821c5de19aSGarrett Wollman * This may seem silly, but we need to be sure we don't over-decrement 13831c5de19aSGarrett Wollman * the RSVP counter, in case something slips up. 13841c5de19aSGarrett Wollman */ 1385603724d3SBjoern A. Zeeb if (V_ip_rsvp_on) { 1386603724d3SBjoern A. Zeeb V_ip_rsvp_on = 0; 1387603724d3SBjoern A. Zeeb V_rsvp_on--; 13881c5de19aSGarrett Wollman } 1389f0068c4aSGarrett Wollman return 0; 1390f0068c4aSGarrett Wollman } 1391bbb4330bSLuigi Rizzo 13928f5a8818SKevin Lo int 13938f5a8818SKevin Lo rsvp_input(struct mbuf **mp, int *offp, int proto) 1394bbb4330bSLuigi Rizzo { 13958f5a8818SKevin Lo struct mbuf *m; 13968f5a8818SKevin Lo 13978f5a8818SKevin Lo m = *mp; 13988f5a8818SKevin Lo *mp = NULL; 13998b615593SMarko Zec 1400bbb4330bSLuigi Rizzo if (rsvp_input_p) { /* call the real one if loaded */ 14018f5a8818SKevin Lo *mp = m; 14028f5a8818SKevin Lo rsvp_input_p(mp, offp, proto); 14038f5a8818SKevin Lo return (IPPROTO_DONE); 1404bbb4330bSLuigi Rizzo } 1405bbb4330bSLuigi Rizzo 1406bbb4330bSLuigi Rizzo /* Can still get packets with rsvp_on = 0 if there is a local member 1407bbb4330bSLuigi Rizzo * of the group to which the RSVP packet is addressed. But in this 1408bbb4330bSLuigi Rizzo * case we want to throw the packet away. 1409bbb4330bSLuigi Rizzo */ 1410bbb4330bSLuigi Rizzo 1411603724d3SBjoern A. Zeeb if (!V_rsvp_on) { 1412bbb4330bSLuigi Rizzo m_freem(m); 14138f5a8818SKevin Lo return (IPPROTO_DONE); 1414bbb4330bSLuigi Rizzo } 1415bbb4330bSLuigi Rizzo 1416603724d3SBjoern A. Zeeb if (V_ip_rsvpd != NULL) { 14178f5a8818SKevin Lo *mp = m; 14188f5a8818SKevin Lo rip_input(mp, offp, proto); 14198f5a8818SKevin Lo return (IPPROTO_DONE); 1420bbb4330bSLuigi Rizzo } 1421bbb4330bSLuigi Rizzo /* Drop the packet */ 1422bbb4330bSLuigi Rizzo m_freem(m); 14238f5a8818SKevin Lo return (IPPROTO_DONE); 1424bbb4330bSLuigi Rizzo } 1425