1c398230bSWarner Losh /*- 251369649SPedro F. Giffuni * SPDX-License-Identifier: BSD-3-Clause 351369649SPedro F. Giffuni * 4df8bae1dSRodney W. Grimes * Copyright (c) 1982, 1986, 1988, 1993 5df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 6df8bae1dSRodney W. Grimes * 7df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 8df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 9df8bae1dSRodney W. Grimes * are met: 10df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 11df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 12df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 13df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 14df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 15fbbd9655SWarner Losh * 3. Neither the name of the University nor the names of its contributors 16df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 17df8bae1dSRodney W. Grimes * without specific prior written permission. 18df8bae1dSRodney W. Grimes * 19df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29df8bae1dSRodney W. Grimes * SUCH DAMAGE. 30df8bae1dSRodney W. Grimes * 31df8bae1dSRodney W. Grimes * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 32df8bae1dSRodney W. Grimes */ 33df8bae1dSRodney W. Grimes 344b421e2dSMike Silbersack #include <sys/cdefs.h> 354b421e2dSMike Silbersack __FBSDID("$FreeBSD$"); 364b421e2dSMike Silbersack 370ac40133SBrian Somers #include "opt_bootp.h" 3827108a15SDag-Erling Smørgrav #include "opt_ipstealth.h" 396a800098SYoshinobu Inoue #include "opt_ipsec.h" 4033553d6eSBjoern A. Zeeb #include "opt_route.h" 41b8bc95cdSAdrian Chadd #include "opt_rss.h" 4274a9466cSGary Palmer 43df8bae1dSRodney W. Grimes #include <sys/param.h> 44df8bae1dSRodney W. Grimes #include <sys/systm.h> 45ef91a976SAndrey V. Elsukov #include <sys/hhook.h> 46df8bae1dSRodney W. Grimes #include <sys/mbuf.h> 47b715f178SLuigi Rizzo #include <sys/malloc.h> 48df8bae1dSRodney W. Grimes #include <sys/domain.h> 49df8bae1dSRodney W. Grimes #include <sys/protosw.h> 50df8bae1dSRodney W. Grimes #include <sys/socket.h> 51df8bae1dSRodney W. Grimes #include <sys/time.h> 52df8bae1dSRodney W. Grimes #include <sys/kernel.h> 53385195c0SMarko Zec #include <sys/lock.h> 54cc0a3c8cSAndrey V. Elsukov #include <sys/rmlock.h> 55385195c0SMarko Zec #include <sys/rwlock.h> 5657f60867SMark Johnston #include <sys/sdt.h> 571025071fSGarrett Wollman #include <sys/syslog.h> 58b5e8ce9fSBruce Evans #include <sys/sysctl.h> 59df8bae1dSRodney W. Grimes 60df8bae1dSRodney W. Grimes #include <net/if.h> 619494d596SBrooks Davis #include <net/if_types.h> 62d314ad7bSJulian Elischer #include <net/if_var.h> 6382c23ebaSBill Fenner #include <net/if_dl.h> 64b252313fSGleb Smirnoff #include <net/pfil.h> 65df8bae1dSRodney W. Grimes #include <net/route.h> 66748e0b0aSGarrett Wollman #include <net/netisr.h> 67b2bdc62aSAdrian Chadd #include <net/rss_config.h> 684b79449eSBjoern A. Zeeb #include <net/vnet.h> 69df8bae1dSRodney W. Grimes 70df8bae1dSRodney W. Grimes #include <netinet/in.h> 7157f60867SMark Johnston #include <netinet/in_kdtrace.h> 72df8bae1dSRodney W. Grimes #include <netinet/in_systm.h> 73b5e8ce9fSBruce Evans #include <netinet/in_var.h> 74df8bae1dSRodney W. Grimes #include <netinet/ip.h> 75df8bae1dSRodney W. Grimes #include <netinet/in_pcb.h> 76df8bae1dSRodney W. Grimes #include <netinet/ip_var.h> 77eddfbb76SRobert Watson #include <netinet/ip_fw.h> 78df8bae1dSRodney W. Grimes #include <netinet/ip_icmp.h> 79ef39adf0SAndre Oppermann #include <netinet/ip_options.h> 8058938916SGarrett Wollman #include <machine/in_cksum.h> 81a9771948SGleb Smirnoff #include <netinet/ip_carp.h> 82b8bc95cdSAdrian Chadd #include <netinet/in_rss.h> 83df8bae1dSRodney W. Grimes 84fcf59617SAndrey V. Elsukov #include <netipsec/ipsec_support.h> 85fcf59617SAndrey V. Elsukov 86f0068c4aSGarrett Wollman #include <sys/socketvar.h> 876ddbf1e2SGary Palmer 88aed55708SRobert Watson #include <security/mac/mac_framework.h> 89aed55708SRobert Watson 90d2035ffbSEd Maste #ifdef CTASSERT 91d2035ffbSEd Maste CTASSERT(sizeof(struct ip) == 20); 92d2035ffbSEd Maste #endif 93d2035ffbSEd Maste 941dbefcc0SGleb Smirnoff /* IP reassembly functions are defined in ip_reass.c. */ 95843b0e57SXin LI extern void ipreass_init(void); 96843b0e57SXin LI extern void ipreass_drain(void); 97843b0e57SXin LI extern void ipreass_slowtimo(void); 981dbefcc0SGleb Smirnoff #ifdef VIMAGE 99843b0e57SXin LI extern void ipreass_destroy(void); 1001dbefcc0SGleb Smirnoff #endif 1011dbefcc0SGleb Smirnoff 102cc0a3c8cSAndrey V. Elsukov struct rmlock in_ifaddr_lock; 103cc0a3c8cSAndrey V. Elsukov RM_SYSINIT(in_ifaddr_lock, &in_ifaddr_lock, "in_ifaddr_lock"); 104f0068c4aSGarrett Wollman 10582cea7e6SBjoern A. Zeeb VNET_DEFINE(int, rsvp_on); 10682cea7e6SBjoern A. Zeeb 10782cea7e6SBjoern A. Zeeb VNET_DEFINE(int, ipforwarding); 1086df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_VNET | CTLFLAG_RW, 109eddfbb76SRobert Watson &VNET_NAME(ipforwarding), 0, 1108b615593SMarko Zec "Enable IP forwarding between interfaces"); 1110312fbe9SPoul-Henning Kamp 1125f901c92SAndrew Turner VNET_DEFINE_STATIC(int, ipsendredirects) = 1; /* XXX */ 11382cea7e6SBjoern A. Zeeb #define V_ipsendredirects VNET(ipsendredirects) 1146df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_VNET | CTLFLAG_RW, 115eddfbb76SRobert Watson &VNET_NAME(ipsendredirects), 0, 1168b615593SMarko Zec "Enable sending IP redirects"); 1170312fbe9SPoul-Henning Kamp 118823db0e9SDon Lewis /* 119823db0e9SDon Lewis * XXX - Setting ip_checkinterface mostly implements the receive side of 120823db0e9SDon Lewis * the Strong ES model described in RFC 1122, but since the routing table 121a8f12100SDon Lewis * and transmit implementation do not implement the Strong ES model, 122823db0e9SDon Lewis * setting this to 1 results in an odd hybrid. 1233f67c834SDon Lewis * 124a8f12100SDon Lewis * XXX - ip_checkinterface currently must be disabled if you use ipnat 125a8f12100SDon Lewis * to translate the destination address to another local interface. 1263f67c834SDon Lewis * 1273f67c834SDon Lewis * XXX - ip_checkinterface must be disabled if you add IP aliases 1283f67c834SDon Lewis * to the loopback interface instead of the interface where the 1293f67c834SDon Lewis * packets for those addresses are received. 130823db0e9SDon Lewis */ 1315f901c92SAndrew Turner VNET_DEFINE_STATIC(int, ip_checkinterface); 13282cea7e6SBjoern A. Zeeb #define V_ip_checkinterface VNET(ip_checkinterface) 1336df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_VNET | CTLFLAG_RW, 134eddfbb76SRobert Watson &VNET_NAME(ip_checkinterface), 0, 1358b615593SMarko Zec "Verify packet arrives on correct interface"); 136b3e95d4eSJonathan Lemon 137b252313fSGleb Smirnoff VNET_DEFINE(pfil_head_t, inet_pfil_head); /* Packet filter hooks */ 138df8bae1dSRodney W. Grimes 139d4b5cae4SRobert Watson static struct netisr_handler ip_nh = { 140d4b5cae4SRobert Watson .nh_name = "ip", 141d4b5cae4SRobert Watson .nh_handler = ip_input, 142d4b5cae4SRobert Watson .nh_proto = NETISR_IP, 143b8bc95cdSAdrian Chadd #ifdef RSS 1442527ccadSAdrian Chadd .nh_m2cpuid = rss_soft_m2cpuid_v4, 145b8bc95cdSAdrian Chadd .nh_policy = NETISR_POLICY_CPU, 146b8bc95cdSAdrian Chadd .nh_dispatch = NETISR_DISPATCH_HYBRID, 147b8bc95cdSAdrian Chadd #else 148d4b5cae4SRobert Watson .nh_policy = NETISR_POLICY_FLOW, 149b8bc95cdSAdrian Chadd #endif 150d4b5cae4SRobert Watson }; 151ca925d9cSJonathan Lemon 152b8bc95cdSAdrian Chadd #ifdef RSS 153b8bc95cdSAdrian Chadd /* 154b8bc95cdSAdrian Chadd * Directly dispatched frames are currently assumed 155b8bc95cdSAdrian Chadd * to have a flowid already calculated. 156b8bc95cdSAdrian Chadd * 157b8bc95cdSAdrian Chadd * It should likely have something that assert it 158b8bc95cdSAdrian Chadd * actually has valid flow details. 159b8bc95cdSAdrian Chadd */ 160b8bc95cdSAdrian Chadd static struct netisr_handler ip_direct_nh = { 161b8bc95cdSAdrian Chadd .nh_name = "ip_direct", 162b8bc95cdSAdrian Chadd .nh_handler = ip_direct_input, 163b8bc95cdSAdrian Chadd .nh_proto = NETISR_IP_DIRECT, 164499baf0aSAdrian Chadd .nh_m2cpuid = rss_soft_m2cpuid_v4, 165b8bc95cdSAdrian Chadd .nh_policy = NETISR_POLICY_CPU, 166b8bc95cdSAdrian Chadd .nh_dispatch = NETISR_DISPATCH_HYBRID, 167b8bc95cdSAdrian Chadd }; 168b8bc95cdSAdrian Chadd #endif 169b8bc95cdSAdrian Chadd 170df8bae1dSRodney W. Grimes extern struct domain inetdomain; 171f0ffb944SJulian Elischer extern struct protosw inetsw[]; 172df8bae1dSRodney W. Grimes u_char ip_protox[IPPROTO_MAX]; 17382cea7e6SBjoern A. Zeeb VNET_DEFINE(struct in_ifaddrhead, in_ifaddrhead); /* first inet address */ 17482cea7e6SBjoern A. Zeeb VNET_DEFINE(struct in_ifaddrhashhead *, in_ifaddrhashtbl); /* inet addr hash table */ 17582cea7e6SBjoern A. Zeeb VNET_DEFINE(u_long, in_ifaddrhmask); /* mask for hash table */ 176ca925d9cSJonathan Lemon 1770312fbe9SPoul-Henning Kamp #ifdef IPCTL_DEFMTU 1780312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW, 1793d177f46SBill Fumerola &ip_mtu, 0, "Default MTU"); 1800312fbe9SPoul-Henning Kamp #endif 1810312fbe9SPoul-Henning Kamp 1821b968362SDag-Erling Smørgrav #ifdef IPSTEALTH 18382cea7e6SBjoern A. Zeeb VNET_DEFINE(int, ipstealth); 1846df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_VNET | CTLFLAG_RW, 185eddfbb76SRobert Watson &VNET_NAME(ipstealth), 0, 186eddfbb76SRobert Watson "IP stealth mode, no TTL decrementation on forwarding"); 1871b968362SDag-Erling Smørgrav #endif 188eddfbb76SRobert Watson 189315e3e38SRobert Watson /* 1905da0521fSAndrey V. Elsukov * IP statistics are stored in the "array" of counter(9)s. 1915923c293SGleb Smirnoff */ 1925da0521fSAndrey V. Elsukov VNET_PCPUSTAT_DEFINE(struct ipstat, ipstat); 1935da0521fSAndrey V. Elsukov VNET_PCPUSTAT_SYSINIT(ipstat); 1945da0521fSAndrey V. Elsukov SYSCTL_VNET_PCPUSTAT(_net_inet_ip, IPCTL_STATS, stats, struct ipstat, ipstat, 1955da0521fSAndrey V. Elsukov "IP statistics (struct ipstat, netinet/ip_var.h)"); 1965923c293SGleb Smirnoff 1975923c293SGleb Smirnoff #ifdef VIMAGE 1985da0521fSAndrey V. Elsukov VNET_PCPUSTAT_SYSUNINIT(ipstat); 1995923c293SGleb Smirnoff #endif /* VIMAGE */ 2005923c293SGleb Smirnoff 2015923c293SGleb Smirnoff /* 202315e3e38SRobert Watson * Kernel module interface for updating ipstat. The argument is an index 2035923c293SGleb Smirnoff * into ipstat treated as an array. 204315e3e38SRobert Watson */ 205315e3e38SRobert Watson void 206315e3e38SRobert Watson kmod_ipstat_inc(int statnum) 207315e3e38SRobert Watson { 208315e3e38SRobert Watson 2095da0521fSAndrey V. Elsukov counter_u64_add(VNET(ipstat)[statnum], 1); 210315e3e38SRobert Watson } 211315e3e38SRobert Watson 212315e3e38SRobert Watson void 213315e3e38SRobert Watson kmod_ipstat_dec(int statnum) 214315e3e38SRobert Watson { 215315e3e38SRobert Watson 2165da0521fSAndrey V. Elsukov counter_u64_add(VNET(ipstat)[statnum], -1); 217315e3e38SRobert Watson } 218315e3e38SRobert Watson 219d4b5cae4SRobert Watson static int 220d4b5cae4SRobert Watson sysctl_netinet_intr_queue_maxlen(SYSCTL_HANDLER_ARGS) 221d4b5cae4SRobert Watson { 222d4b5cae4SRobert Watson int error, qlimit; 223d4b5cae4SRobert Watson 224d4b5cae4SRobert Watson netisr_getqlimit(&ip_nh, &qlimit); 225d4b5cae4SRobert Watson error = sysctl_handle_int(oidp, &qlimit, 0, req); 226d4b5cae4SRobert Watson if (error || !req->newptr) 227d4b5cae4SRobert Watson return (error); 228d4b5cae4SRobert Watson if (qlimit < 1) 229d4b5cae4SRobert Watson return (EINVAL); 230d4b5cae4SRobert Watson return (netisr_setqlimit(&ip_nh, qlimit)); 231d4b5cae4SRobert Watson } 232d4b5cae4SRobert Watson SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen, 233*7029da5cSPawel Biernacki CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, 234*7029da5cSPawel Biernacki sysctl_netinet_intr_queue_maxlen, "I", 235d4b5cae4SRobert Watson "Maximum size of the IP input queue"); 236d4b5cae4SRobert Watson 237d4b5cae4SRobert Watson static int 238d4b5cae4SRobert Watson sysctl_netinet_intr_queue_drops(SYSCTL_HANDLER_ARGS) 239d4b5cae4SRobert Watson { 240d4b5cae4SRobert Watson u_int64_t qdrops_long; 241d4b5cae4SRobert Watson int error, qdrops; 242d4b5cae4SRobert Watson 243d4b5cae4SRobert Watson netisr_getqdrops(&ip_nh, &qdrops_long); 244d4b5cae4SRobert Watson qdrops = qdrops_long; 245d4b5cae4SRobert Watson error = sysctl_handle_int(oidp, &qdrops, 0, req); 246d4b5cae4SRobert Watson if (error || !req->newptr) 247d4b5cae4SRobert Watson return (error); 248d4b5cae4SRobert Watson if (qdrops != 0) 249d4b5cae4SRobert Watson return (EINVAL); 250d4b5cae4SRobert Watson netisr_clearqdrops(&ip_nh); 251d4b5cae4SRobert Watson return (0); 252d4b5cae4SRobert Watson } 253d4b5cae4SRobert Watson 254d4b5cae4SRobert Watson SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops, 255*7029da5cSPawel Biernacki CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 256*7029da5cSPawel Biernacki 0, 0, sysctl_netinet_intr_queue_drops, "I", 257d4b5cae4SRobert Watson "Number of packets dropped from the IP input queue"); 258d4b5cae4SRobert Watson 259b8bc95cdSAdrian Chadd #ifdef RSS 260b8bc95cdSAdrian Chadd static int 261b8bc95cdSAdrian Chadd sysctl_netinet_intr_direct_queue_maxlen(SYSCTL_HANDLER_ARGS) 262b8bc95cdSAdrian Chadd { 263b8bc95cdSAdrian Chadd int error, qlimit; 264b8bc95cdSAdrian Chadd 265b8bc95cdSAdrian Chadd netisr_getqlimit(&ip_direct_nh, &qlimit); 266b8bc95cdSAdrian Chadd error = sysctl_handle_int(oidp, &qlimit, 0, req); 267b8bc95cdSAdrian Chadd if (error || !req->newptr) 268b8bc95cdSAdrian Chadd return (error); 269b8bc95cdSAdrian Chadd if (qlimit < 1) 270b8bc95cdSAdrian Chadd return (EINVAL); 271b8bc95cdSAdrian Chadd return (netisr_setqlimit(&ip_direct_nh, qlimit)); 272b8bc95cdSAdrian Chadd } 2737faa0d21SAndrey V. Elsukov SYSCTL_PROC(_net_inet_ip, IPCTL_INTRDQMAXLEN, intr_direct_queue_maxlen, 274*7029da5cSPawel Biernacki CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 275*7029da5cSPawel Biernacki 0, 0, sysctl_netinet_intr_direct_queue_maxlen, 2767faa0d21SAndrey V. Elsukov "I", "Maximum size of the IP direct input queue"); 277b8bc95cdSAdrian Chadd 278b8bc95cdSAdrian Chadd static int 279b8bc95cdSAdrian Chadd sysctl_netinet_intr_direct_queue_drops(SYSCTL_HANDLER_ARGS) 280b8bc95cdSAdrian Chadd { 281b8bc95cdSAdrian Chadd u_int64_t qdrops_long; 282b8bc95cdSAdrian Chadd int error, qdrops; 283b8bc95cdSAdrian Chadd 284b8bc95cdSAdrian Chadd netisr_getqdrops(&ip_direct_nh, &qdrops_long); 285b8bc95cdSAdrian Chadd qdrops = qdrops_long; 286b8bc95cdSAdrian Chadd error = sysctl_handle_int(oidp, &qdrops, 0, req); 287b8bc95cdSAdrian Chadd if (error || !req->newptr) 288b8bc95cdSAdrian Chadd return (error); 289b8bc95cdSAdrian Chadd if (qdrops != 0) 290b8bc95cdSAdrian Chadd return (EINVAL); 291b8bc95cdSAdrian Chadd netisr_clearqdrops(&ip_direct_nh); 292b8bc95cdSAdrian Chadd return (0); 293b8bc95cdSAdrian Chadd } 294b8bc95cdSAdrian Chadd 2957faa0d21SAndrey V. Elsukov SYSCTL_PROC(_net_inet_ip, IPCTL_INTRDQDROPS, intr_direct_queue_drops, 296*7029da5cSPawel Biernacki CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, 297*7029da5cSPawel Biernacki sysctl_netinet_intr_direct_queue_drops, "I", 298b8bc95cdSAdrian Chadd "Number of packets dropped from the IP direct input queue"); 299b8bc95cdSAdrian Chadd #endif /* RSS */ 300b8bc95cdSAdrian Chadd 301df8bae1dSRodney W. Grimes /* 302df8bae1dSRodney W. Grimes * IP initialization: fill in IP protocol switch table. 303df8bae1dSRodney W. Grimes * All protocols not implemented in kernel go to raw IP protocol handler. 304df8bae1dSRodney W. Grimes */ 305df8bae1dSRodney W. Grimes void 306f2565d68SRobert Watson ip_init(void) 307df8bae1dSRodney W. Grimes { 308b252313fSGleb Smirnoff struct pfil_head_args args; 309f2565d68SRobert Watson struct protosw *pr; 310f2565d68SRobert Watson int i; 311df8bae1dSRodney W. Grimes 312d7c5a620SMatt Macy CK_STAILQ_INIT(&V_in_ifaddrhead); 313603724d3SBjoern A. Zeeb V_in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &V_in_ifaddrhmask); 3141ed81b73SMarko Zec 3151ed81b73SMarko Zec /* Initialize IP reassembly queue. */ 3161dbefcc0SGleb Smirnoff ipreass_init(); 3171ed81b73SMarko Zec 3180b4b0b0fSJulian Elischer /* Initialize packet filter hooks. */ 319b252313fSGleb Smirnoff args.pa_version = PFIL_VERSION; 320b252313fSGleb Smirnoff args.pa_flags = PFIL_IN | PFIL_OUT; 321b252313fSGleb Smirnoff args.pa_type = PFIL_TYPE_IP4; 322b252313fSGleb Smirnoff args.pa_headname = PFIL_INET_NAME; 323b252313fSGleb Smirnoff V_inet_pfil_head = pfil_head_register(&args); 3240b4b0b0fSJulian Elischer 325ef91a976SAndrey V. Elsukov if (hhook_head_register(HHOOK_TYPE_IPSEC_IN, AF_INET, 326ef91a976SAndrey V. Elsukov &V_ipsec_hhh_in[HHOOK_IPSEC_INET], 327ef91a976SAndrey V. Elsukov HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0) 328ef91a976SAndrey V. Elsukov printf("%s: WARNING: unable to register input helper hook\n", 329ef91a976SAndrey V. Elsukov __func__); 330ef91a976SAndrey V. Elsukov if (hhook_head_register(HHOOK_TYPE_IPSEC_OUT, AF_INET, 331ef91a976SAndrey V. Elsukov &V_ipsec_hhh_out[HHOOK_IPSEC_INET], 332ef91a976SAndrey V. Elsukov HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0) 333ef91a976SAndrey V. Elsukov printf("%s: WARNING: unable to register output helper hook\n", 334ef91a976SAndrey V. Elsukov __func__); 335ef91a976SAndrey V. Elsukov 3361ed81b73SMarko Zec /* Skip initialization of globals for non-default instances. */ 337484149deSBjoern A. Zeeb #ifdef VIMAGE 338484149deSBjoern A. Zeeb if (!IS_DEFAULT_VNET(curvnet)) { 339484149deSBjoern A. Zeeb netisr_register_vnet(&ip_nh); 340484149deSBjoern A. Zeeb #ifdef RSS 341484149deSBjoern A. Zeeb netisr_register_vnet(&ip_direct_nh); 342484149deSBjoern A. Zeeb #endif 3431ed81b73SMarko Zec return; 344484149deSBjoern A. Zeeb } 345484149deSBjoern A. Zeeb #endif 3461ed81b73SMarko Zec 347f0ffb944SJulian Elischer pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 34802410549SRobert Watson if (pr == NULL) 349db09bef3SAndre Oppermann panic("ip_init: PF_INET not found"); 350db09bef3SAndre Oppermann 351db09bef3SAndre Oppermann /* Initialize the entire ip_protox[] array to IPPROTO_RAW. */ 352df8bae1dSRodney W. Grimes for (i = 0; i < IPPROTO_MAX; i++) 353df8bae1dSRodney W. Grimes ip_protox[i] = pr - inetsw; 354db09bef3SAndre Oppermann /* 355db09bef3SAndre Oppermann * Cycle through IP protocols and put them into the appropriate place 356db09bef3SAndre Oppermann * in ip_protox[]. 357db09bef3SAndre Oppermann */ 358f0ffb944SJulian Elischer for (pr = inetdomain.dom_protosw; 359f0ffb944SJulian Elischer pr < inetdomain.dom_protoswNPROTOSW; pr++) 360df8bae1dSRodney W. Grimes if (pr->pr_domain->dom_family == PF_INET && 361db09bef3SAndre Oppermann pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) { 362db09bef3SAndre Oppermann /* Be careful to only index valid IP protocols. */ 363db77984cSSam Leffler if (pr->pr_protocol < IPPROTO_MAX) 364df8bae1dSRodney W. Grimes ip_protox[pr->pr_protocol] = pr - inetsw; 365db09bef3SAndre Oppermann } 366194a213eSAndrey A. Chernov 367d4b5cae4SRobert Watson netisr_register(&ip_nh); 368b8bc95cdSAdrian Chadd #ifdef RSS 369b8bc95cdSAdrian Chadd netisr_register(&ip_direct_nh); 370b8bc95cdSAdrian Chadd #endif 371df8bae1dSRodney W. Grimes } 372df8bae1dSRodney W. Grimes 3739802380eSBjoern A. Zeeb #ifdef VIMAGE 3743f58662dSBjoern A. Zeeb static void 3753f58662dSBjoern A. Zeeb ip_destroy(void *unused __unused) 3769802380eSBjoern A. Zeeb { 37789856f7eSBjoern A. Zeeb struct ifnet *ifp; 378ef91a976SAndrey V. Elsukov int error; 3794d3dfd45SMikolaj Golub 380484149deSBjoern A. Zeeb #ifdef RSS 381484149deSBjoern A. Zeeb netisr_unregister_vnet(&ip_direct_nh); 382484149deSBjoern A. Zeeb #endif 383484149deSBjoern A. Zeeb netisr_unregister_vnet(&ip_nh); 384484149deSBjoern A. Zeeb 385b252313fSGleb Smirnoff pfil_head_unregister(V_inet_pfil_head); 386ef91a976SAndrey V. Elsukov error = hhook_head_deregister(V_ipsec_hhh_in[HHOOK_IPSEC_INET]); 387ef91a976SAndrey V. Elsukov if (error != 0) { 388ef91a976SAndrey V. Elsukov printf("%s: WARNING: unable to deregister input helper hook " 389ef91a976SAndrey V. Elsukov "type HHOOK_TYPE_IPSEC_IN, id HHOOK_IPSEC_INET: " 390ef91a976SAndrey V. Elsukov "error %d returned\n", __func__, error); 391ef91a976SAndrey V. Elsukov } 392ef91a976SAndrey V. Elsukov error = hhook_head_deregister(V_ipsec_hhh_out[HHOOK_IPSEC_INET]); 393ef91a976SAndrey V. Elsukov if (error != 0) { 394ef91a976SAndrey V. Elsukov printf("%s: WARNING: unable to deregister output helper hook " 395ef91a976SAndrey V. Elsukov "type HHOOK_TYPE_IPSEC_OUT, id HHOOK_IPSEC_INET: " 396ef91a976SAndrey V. Elsukov "error %d returned\n", __func__, error); 397ef91a976SAndrey V. Elsukov } 39889856f7eSBjoern A. Zeeb 39989856f7eSBjoern A. Zeeb /* Remove the IPv4 addresses from all interfaces. */ 40089856f7eSBjoern A. Zeeb in_ifscrub_all(); 40189856f7eSBjoern A. Zeeb 40289856f7eSBjoern A. Zeeb /* Make sure the IPv4 routes are gone as well. */ 40389856f7eSBjoern A. Zeeb IFNET_RLOCK(); 4044f6c66ccSMatt Macy CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) 40589856f7eSBjoern A. Zeeb rt_flushifroutes_af(ifp, AF_INET); 40689856f7eSBjoern A. Zeeb IFNET_RUNLOCK(); 4079802380eSBjoern A. Zeeb 408e3c2c634SGleb Smirnoff /* Destroy IP reassembly queue. */ 4091dbefcc0SGleb Smirnoff ipreass_destroy(); 41089856f7eSBjoern A. Zeeb 41189856f7eSBjoern A. Zeeb /* Cleanup in_ifaddr hash table; should be empty. */ 41289856f7eSBjoern A. Zeeb hashdestroy(V_in_ifaddrhashtbl, M_IFADDR, V_in_ifaddrhmask); 4139802380eSBjoern A. Zeeb } 4143f58662dSBjoern A. Zeeb 4153f58662dSBjoern A. Zeeb VNET_SYSUNINIT(ip, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, ip_destroy, NULL); 4169802380eSBjoern A. Zeeb #endif 4179802380eSBjoern A. Zeeb 418b8bc95cdSAdrian Chadd #ifdef RSS 419b8bc95cdSAdrian Chadd /* 420b8bc95cdSAdrian Chadd * IP direct input routine. 421b8bc95cdSAdrian Chadd * 422b8bc95cdSAdrian Chadd * This is called when reinjecting completed fragments where 423b8bc95cdSAdrian Chadd * all of the previous checking and book-keeping has been done. 424b8bc95cdSAdrian Chadd */ 425b8bc95cdSAdrian Chadd void 426b8bc95cdSAdrian Chadd ip_direct_input(struct mbuf *m) 427b8bc95cdSAdrian Chadd { 428b8bc95cdSAdrian Chadd struct ip *ip; 429b8bc95cdSAdrian Chadd int hlen; 430b8bc95cdSAdrian Chadd 431b8bc95cdSAdrian Chadd ip = mtod(m, struct ip *); 432b8bc95cdSAdrian Chadd hlen = ip->ip_hl << 2; 433b8bc95cdSAdrian Chadd 434fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT) 435fcf59617SAndrey V. Elsukov if (IPSEC_ENABLED(ipv4)) { 436fcf59617SAndrey V. Elsukov if (IPSEC_INPUT(ipv4, m, hlen, ip->ip_p) != 0) 437fcf59617SAndrey V. Elsukov return; 438fcf59617SAndrey V. Elsukov } 439fcf59617SAndrey V. Elsukov #endif /* IPSEC */ 440b8bc95cdSAdrian Chadd IPSTAT_INC(ips_delivered); 441b8bc95cdSAdrian Chadd (*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p); 442b8bc95cdSAdrian Chadd return; 443b8bc95cdSAdrian Chadd } 444b8bc95cdSAdrian Chadd #endif 445b8bc95cdSAdrian Chadd 4464d2e3692SLuigi Rizzo /* 447df8bae1dSRodney W. Grimes * Ip input routine. Checksum and byte swap header. If fragmented 448df8bae1dSRodney W. Grimes * try to reassemble. Process options. Pass to next level. 449df8bae1dSRodney W. Grimes */ 450c67b1d17SGarrett Wollman void 451c67b1d17SGarrett Wollman ip_input(struct mbuf *m) 452df8bae1dSRodney W. Grimes { 4531a5995ccSEugene Grosbein struct rm_priotracker in_ifa_tracker; 4549188b4a1SAndre Oppermann struct ip *ip = NULL; 4555da9f8faSJosef Karthauser struct in_ifaddr *ia = NULL; 456ca925d9cSJonathan Lemon struct ifaddr *ifa; 4570aade26eSRobert Watson struct ifnet *ifp; 4589b932e9eSAndre Oppermann int checkif, hlen = 0; 45921d172a3SGleb Smirnoff uint16_t sum, ip_len; 46002c1c707SAndre Oppermann int dchg = 0; /* dest changed after fw */ 461f51f805fSSam Leffler struct in_addr odst; /* original dst address */ 462b715f178SLuigi Rizzo 463fe584538SDag-Erling Smørgrav M_ASSERTPKTHDR(m); 464b8a6e03fSGleb Smirnoff NET_EPOCH_ASSERT(); 465db40007dSAndrew R. Reiter 466ac9d7e26SMax Laier if (m->m_flags & M_FASTFWD_OURS) { 46776ff6dcfSAndre Oppermann m->m_flags &= ~M_FASTFWD_OURS; 46876ff6dcfSAndre Oppermann /* Set up some basics that will be used later. */ 4692b25acc1SLuigi Rizzo ip = mtod(m, struct ip *); 47053be11f6SPoul-Henning Kamp hlen = ip->ip_hl << 2; 4718f134647SGleb Smirnoff ip_len = ntohs(ip->ip_len); 4729b932e9eSAndre Oppermann goto ours; 4732b25acc1SLuigi Rizzo } 4742b25acc1SLuigi Rizzo 47586425c62SRobert Watson IPSTAT_INC(ips_total); 47658938916SGarrett Wollman 47758938916SGarrett Wollman if (m->m_pkthdr.len < sizeof(struct ip)) 47858938916SGarrett Wollman goto tooshort; 47958938916SGarrett Wollman 480df8bae1dSRodney W. Grimes if (m->m_len < sizeof (struct ip) && 4810b17fba7SAndre Oppermann (m = m_pullup(m, sizeof (struct ip))) == NULL) { 48286425c62SRobert Watson IPSTAT_INC(ips_toosmall); 483c67b1d17SGarrett Wollman return; 484df8bae1dSRodney W. Grimes } 485df8bae1dSRodney W. Grimes ip = mtod(m, struct ip *); 48658938916SGarrett Wollman 48753be11f6SPoul-Henning Kamp if (ip->ip_v != IPVERSION) { 48886425c62SRobert Watson IPSTAT_INC(ips_badvers); 489df8bae1dSRodney W. Grimes goto bad; 490df8bae1dSRodney W. Grimes } 49158938916SGarrett Wollman 49253be11f6SPoul-Henning Kamp hlen = ip->ip_hl << 2; 493df8bae1dSRodney W. Grimes if (hlen < sizeof(struct ip)) { /* minimum header length */ 49486425c62SRobert Watson IPSTAT_INC(ips_badhlen); 495df8bae1dSRodney W. Grimes goto bad; 496df8bae1dSRodney W. Grimes } 497df8bae1dSRodney W. Grimes if (hlen > m->m_len) { 4980b17fba7SAndre Oppermann if ((m = m_pullup(m, hlen)) == NULL) { 49986425c62SRobert Watson IPSTAT_INC(ips_badhlen); 500c67b1d17SGarrett Wollman return; 501df8bae1dSRodney W. Grimes } 502df8bae1dSRodney W. Grimes ip = mtod(m, struct ip *); 503df8bae1dSRodney W. Grimes } 50433841545SHajimu UMEMOTO 50557f60867SMark Johnston IP_PROBE(receive, NULL, NULL, ip, m->m_pkthdr.rcvif, ip, NULL); 50657f60867SMark Johnston 5076c1c6ae5SRodney W. Grimes /* IN_LOOPBACK must not appear on the wire - RFC1122 */ 5080aade26eSRobert Watson ifp = m->m_pkthdr.rcvif; 5096c1c6ae5SRodney W. Grimes if (IN_LOOPBACK(ntohl(ip->ip_dst.s_addr)) || 5106c1c6ae5SRodney W. Grimes IN_LOOPBACK(ntohl(ip->ip_src.s_addr))) { 5110aade26eSRobert Watson if ((ifp->if_flags & IFF_LOOPBACK) == 0) { 51286425c62SRobert Watson IPSTAT_INC(ips_badaddr); 51333841545SHajimu UMEMOTO goto bad; 51433841545SHajimu UMEMOTO } 51533841545SHajimu UMEMOTO } 51633841545SHajimu UMEMOTO 517db4f9cc7SJonathan Lemon if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) { 518db4f9cc7SJonathan Lemon sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID); 519db4f9cc7SJonathan Lemon } else { 52058938916SGarrett Wollman if (hlen == sizeof(struct ip)) { 52147c861ecSBrian Somers sum = in_cksum_hdr(ip); 52258938916SGarrett Wollman } else { 52347c861ecSBrian Somers sum = in_cksum(m, hlen); 52458938916SGarrett Wollman } 525db4f9cc7SJonathan Lemon } 52647c861ecSBrian Somers if (sum) { 52786425c62SRobert Watson IPSTAT_INC(ips_badsum); 528df8bae1dSRodney W. Grimes goto bad; 529df8bae1dSRodney W. Grimes } 530df8bae1dSRodney W. Grimes 53102b199f1SMax Laier #ifdef ALTQ 53202b199f1SMax Laier if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0) 53302b199f1SMax Laier /* packet is dropped by traffic conditioner */ 53402b199f1SMax Laier return; 53502b199f1SMax Laier #endif 53602b199f1SMax Laier 53721d172a3SGleb Smirnoff ip_len = ntohs(ip->ip_len); 53821d172a3SGleb Smirnoff if (ip_len < hlen) { 53986425c62SRobert Watson IPSTAT_INC(ips_badlen); 540df8bae1dSRodney W. Grimes goto bad; 541df8bae1dSRodney W. Grimes } 542df8bae1dSRodney W. Grimes 543df8bae1dSRodney W. Grimes /* 544df8bae1dSRodney W. Grimes * Check that the amount of data in the buffers 545df8bae1dSRodney W. Grimes * is as at least much as the IP header would have us expect. 546df8bae1dSRodney W. Grimes * Trim mbufs if longer than we expect. 547df8bae1dSRodney W. Grimes * Drop packet if shorter than we expect. 548df8bae1dSRodney W. Grimes */ 54921d172a3SGleb Smirnoff if (m->m_pkthdr.len < ip_len) { 55058938916SGarrett Wollman tooshort: 55186425c62SRobert Watson IPSTAT_INC(ips_tooshort); 552df8bae1dSRodney W. Grimes goto bad; 553df8bae1dSRodney W. Grimes } 55421d172a3SGleb Smirnoff if (m->m_pkthdr.len > ip_len) { 555df8bae1dSRodney W. Grimes if (m->m_len == m->m_pkthdr.len) { 55621d172a3SGleb Smirnoff m->m_len = ip_len; 55721d172a3SGleb Smirnoff m->m_pkthdr.len = ip_len; 558df8bae1dSRodney W. Grimes } else 55921d172a3SGleb Smirnoff m_adj(m, ip_len - m->m_pkthdr.len); 560df8bae1dSRodney W. Grimes } 561b8bc95cdSAdrian Chadd 562ad9f4d6aSAndrey V. Elsukov /* 563ad9f4d6aSAndrey V. Elsukov * Try to forward the packet, but if we fail continue. 56462484790SAndrey V. Elsukov * ip_tryforward() does not generate redirects, so fall 56562484790SAndrey V. Elsukov * through to normal processing if redirects are required. 566ad9f4d6aSAndrey V. Elsukov * ip_tryforward() does inbound and outbound packet firewall 567ad9f4d6aSAndrey V. Elsukov * processing. If firewall has decided that destination becomes 568ad9f4d6aSAndrey V. Elsukov * our local address, it sets M_FASTFWD_OURS flag. In this 569ad9f4d6aSAndrey V. Elsukov * case skip another inbound firewall processing and update 570ad9f4d6aSAndrey V. Elsukov * ip pointer. 571ad9f4d6aSAndrey V. Elsukov */ 57262484790SAndrey V. Elsukov if (V_ipforwarding != 0 && V_ipsendredirects == 0 573fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT) 574fcf59617SAndrey V. Elsukov && (!IPSEC_ENABLED(ipv4) || 575fcf59617SAndrey V. Elsukov IPSEC_CAPS(ipv4, m, IPSEC_CAP_OPERABLE) == 0) 576ad9f4d6aSAndrey V. Elsukov #endif 577ad9f4d6aSAndrey V. Elsukov ) { 578ad9f4d6aSAndrey V. Elsukov if ((m = ip_tryforward(m)) == NULL) 57933872124SGeorge V. Neville-Neil return; 580ad9f4d6aSAndrey V. Elsukov if (m->m_flags & M_FASTFWD_OURS) { 581ad9f4d6aSAndrey V. Elsukov m->m_flags &= ~M_FASTFWD_OURS; 582ad9f4d6aSAndrey V. Elsukov ip = mtod(m, struct ip *); 583ad9f4d6aSAndrey V. Elsukov goto ours; 584ad9f4d6aSAndrey V. Elsukov } 585ad9f4d6aSAndrey V. Elsukov } 586fcf59617SAndrey V. Elsukov 587fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT) 58814dd6717SSam Leffler /* 589ffe8cd7bSBjoern A. Zeeb * Bypass packet filtering for packets previously handled by IPsec. 59014dd6717SSam Leffler */ 591fcf59617SAndrey V. Elsukov if (IPSEC_ENABLED(ipv4) && 592fcf59617SAndrey V. Elsukov IPSEC_CAPS(ipv4, m, IPSEC_CAP_BYPASS_FILTER) != 0) 593c21fd232SAndre Oppermann goto passin; 594ad9f4d6aSAndrey V. Elsukov #endif 595fcf59617SAndrey V. Elsukov 596c4ac87eaSDarren Reed /* 597134ea224SSam Leffler * Run through list of hooks for input packets. 598f51f805fSSam Leffler * 599f51f805fSSam Leffler * NB: Beware of the destination address changing (e.g. 600f51f805fSSam Leffler * by NAT rewriting). When this happens, tell 601f51f805fSSam Leffler * ip_forward to do the right thing. 602c4ac87eaSDarren Reed */ 603c21fd232SAndre Oppermann 604c21fd232SAndre Oppermann /* Jump over all PFIL processing if hooks are not active. */ 605b252313fSGleb Smirnoff if (!PFIL_HOOKED_IN(V_inet_pfil_head)) 606c21fd232SAndre Oppermann goto passin; 607c21fd232SAndre Oppermann 608f51f805fSSam Leffler odst = ip->ip_dst; 609b252313fSGleb Smirnoff if (pfil_run_hooks(V_inet_pfil_head, &m, ifp, PFIL_IN, NULL) != 610b252313fSGleb Smirnoff PFIL_PASS) 611beec8214SDarren Reed return; 612134ea224SSam Leffler if (m == NULL) /* consumed by filter */ 613c4ac87eaSDarren Reed return; 6149b932e9eSAndre Oppermann 615c4ac87eaSDarren Reed ip = mtod(m, struct ip *); 61602c1c707SAndre Oppermann dchg = (odst.s_addr != ip->ip_dst.s_addr); 6170aade26eSRobert Watson ifp = m->m_pkthdr.rcvif; 6189b932e9eSAndre Oppermann 6199b932e9eSAndre Oppermann if (m->m_flags & M_FASTFWD_OURS) { 6209b932e9eSAndre Oppermann m->m_flags &= ~M_FASTFWD_OURS; 6219b932e9eSAndre Oppermann goto ours; 6229b932e9eSAndre Oppermann } 623ffdbf9daSAndrey V. Elsukov if (m->m_flags & M_IP_NEXTHOP) { 624de89d74bSLuiz Otavio O Souza if (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL) { 625099dd043SAndre Oppermann /* 626ffdbf9daSAndrey V. Elsukov * Directly ship the packet on. This allows 627ffdbf9daSAndrey V. Elsukov * forwarding packets originally destined to us 628ffdbf9daSAndrey V. Elsukov * to some other directly connected host. 629099dd043SAndre Oppermann */ 630ffdbf9daSAndrey V. Elsukov ip_forward(m, 1); 631099dd043SAndre Oppermann return; 632099dd043SAndre Oppermann } 633ffdbf9daSAndrey V. Elsukov } 634c21fd232SAndre Oppermann passin: 63521d172a3SGleb Smirnoff 63621d172a3SGleb Smirnoff /* 637df8bae1dSRodney W. Grimes * Process options and, if not destined for us, 638df8bae1dSRodney W. Grimes * ship it on. ip_dooptions returns 1 when an 639df8bae1dSRodney W. Grimes * error was detected (causing an icmp message 640df8bae1dSRodney W. Grimes * to be sent and the original packet to be freed). 641df8bae1dSRodney W. Grimes */ 6429b932e9eSAndre Oppermann if (hlen > sizeof (struct ip) && ip_dooptions(m, 0)) 643c67b1d17SGarrett Wollman return; 644df8bae1dSRodney W. Grimes 645f0068c4aSGarrett Wollman /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no 646f0068c4aSGarrett Wollman * matter if it is destined to another node, or whether it is 647f0068c4aSGarrett Wollman * a multicast one, RSVP wants it! and prevents it from being forwarded 648f0068c4aSGarrett Wollman * anywhere else. Also checks if the rsvp daemon is running before 649f0068c4aSGarrett Wollman * grabbing the packet. 650f0068c4aSGarrett Wollman */ 651603724d3SBjoern A. Zeeb if (V_rsvp_on && ip->ip_p==IPPROTO_RSVP) 652f0068c4aSGarrett Wollman goto ours; 653f0068c4aSGarrett Wollman 654df8bae1dSRodney W. Grimes /* 655df8bae1dSRodney W. Grimes * Check our list of addresses, to see if the packet is for us. 656cc766e04SGarrett Wollman * If we don't have any addresses, assume any unicast packet 657cc766e04SGarrett Wollman * we receive might be for us (and let the upper layers deal 658cc766e04SGarrett Wollman * with it). 659df8bae1dSRodney W. Grimes */ 660d7c5a620SMatt Macy if (CK_STAILQ_EMPTY(&V_in_ifaddrhead) && 661cc766e04SGarrett Wollman (m->m_flags & (M_MCAST|M_BCAST)) == 0) 662cc766e04SGarrett Wollman goto ours; 663cc766e04SGarrett Wollman 6647538a9a0SJonathan Lemon /* 665823db0e9SDon Lewis * Enable a consistency check between the destination address 666823db0e9SDon Lewis * and the arrival interface for a unicast packet (the RFC 1122 667823db0e9SDon Lewis * strong ES model) if IP forwarding is disabled and the packet 668e15ae1b2SDon Lewis * is not locally generated and the packet is not subject to 669e15ae1b2SDon Lewis * 'ipfw fwd'. 6703f67c834SDon Lewis * 6713f67c834SDon Lewis * XXX - Checking also should be disabled if the destination 6723f67c834SDon Lewis * address is ipnat'ed to a different interface. 6733f67c834SDon Lewis * 674a8f12100SDon Lewis * XXX - Checking is incompatible with IP aliases added 6753f67c834SDon Lewis * to the loopback interface instead of the interface where 6763f67c834SDon Lewis * the packets are received. 677a9771948SGleb Smirnoff * 678a9771948SGleb Smirnoff * XXX - This is the case for carp vhost IPs as well so we 679a9771948SGleb Smirnoff * insert a workaround. If the packet got here, we already 680a9771948SGleb Smirnoff * checked with carp_iamatch() and carp_forus(). 681823db0e9SDon Lewis */ 682603724d3SBjoern A. Zeeb checkif = V_ip_checkinterface && (V_ipforwarding == 0) && 6830aade26eSRobert Watson ifp != NULL && ((ifp->if_flags & IFF_LOOPBACK) == 0) && 68454bfbd51SWill Andrews ifp->if_carp == NULL && (dchg == 0); 685823db0e9SDon Lewis 686ca925d9cSJonathan Lemon /* 687ca925d9cSJonathan Lemon * Check for exact addresses in the hash bucket. 688ca925d9cSJonathan Lemon */ 6891a5995ccSEugene Grosbein IN_IFADDR_RLOCK(&in_ifa_tracker); 6909b932e9eSAndre Oppermann LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) { 691f9e354dfSJulian Elischer /* 692823db0e9SDon Lewis * If the address matches, verify that the packet 693823db0e9SDon Lewis * arrived via the correct interface if checking is 694823db0e9SDon Lewis * enabled. 695f9e354dfSJulian Elischer */ 6969b932e9eSAndre Oppermann if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr && 6978c0fec80SRobert Watson (!checkif || ia->ia_ifp == ifp)) { 6987caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ipackets, 1); 6997caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ibytes, 7007caf4ab7SGleb Smirnoff m->m_pkthdr.len); 7011a5995ccSEugene Grosbein IN_IFADDR_RUNLOCK(&in_ifa_tracker); 702ed1ff184SJulian Elischer goto ours; 703ca925d9cSJonathan Lemon } 7048c0fec80SRobert Watson } 7051a5995ccSEugene Grosbein IN_IFADDR_RUNLOCK(&in_ifa_tracker); 7062d9cfabaSRobert Watson 707823db0e9SDon Lewis /* 708ca925d9cSJonathan Lemon * Check for broadcast addresses. 709ca925d9cSJonathan Lemon * 710ca925d9cSJonathan Lemon * Only accept broadcast packets that arrive via the matching 711ca925d9cSJonathan Lemon * interface. Reception of forwarded directed broadcasts would 712ca925d9cSJonathan Lemon * be handled via ip_forward() and ether_output() with the loopback 713ca925d9cSJonathan Lemon * into the stack for SIMPLEX interfaces handled by ether_output(). 714823db0e9SDon Lewis */ 7150aade26eSRobert Watson if (ifp != NULL && ifp->if_flags & IFF_BROADCAST) { 716d7c5a620SMatt Macy CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 717ca925d9cSJonathan Lemon if (ifa->ifa_addr->sa_family != AF_INET) 718ca925d9cSJonathan Lemon continue; 719ca925d9cSJonathan Lemon ia = ifatoia(ifa); 720df8bae1dSRodney W. Grimes if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr == 7210aade26eSRobert Watson ip->ip_dst.s_addr) { 7227caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ipackets, 1); 7237caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ibytes, 7247caf4ab7SGleb Smirnoff m->m_pkthdr.len); 725df8bae1dSRodney W. Grimes goto ours; 7260aade26eSRobert Watson } 7270ac40133SBrian Somers #ifdef BOOTP_COMPAT 7280aade26eSRobert Watson if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY) { 7297caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ipackets, 1); 7307caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ibytes, 7317caf4ab7SGleb Smirnoff m->m_pkthdr.len); 732ca925d9cSJonathan Lemon goto ours; 7330aade26eSRobert Watson } 7340ac40133SBrian Somers #endif 735df8bae1dSRodney W. Grimes } 73619e5b0a7SRobert Watson ia = NULL; 737df8bae1dSRodney W. Grimes } 738f8429ca2SBruce M Simpson /* RFC 3927 2.7: Do not forward datagrams for 169.254.0.0/16. */ 739f8429ca2SBruce M Simpson if (IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))) { 74086425c62SRobert Watson IPSTAT_INC(ips_cantforward); 741f8429ca2SBruce M Simpson m_freem(m); 742f8429ca2SBruce M Simpson return; 743f8429ca2SBruce M Simpson } 744df8bae1dSRodney W. Grimes if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { 745603724d3SBjoern A. Zeeb if (V_ip_mrouter) { 746df8bae1dSRodney W. Grimes /* 747df8bae1dSRodney W. Grimes * If we are acting as a multicast router, all 748df8bae1dSRodney W. Grimes * incoming multicast packets are passed to the 749df8bae1dSRodney W. Grimes * kernel-level multicast forwarding function. 750df8bae1dSRodney W. Grimes * The packet is returned (relatively) intact; if 751df8bae1dSRodney W. Grimes * ip_mforward() returns a non-zero value, the packet 752df8bae1dSRodney W. Grimes * must be discarded, else it may be accepted below. 753df8bae1dSRodney W. Grimes */ 7540aade26eSRobert Watson if (ip_mforward && ip_mforward(ip, ifp, m, 0) != 0) { 75586425c62SRobert Watson IPSTAT_INC(ips_cantforward); 756df8bae1dSRodney W. Grimes m_freem(m); 757c67b1d17SGarrett Wollman return; 758df8bae1dSRodney W. Grimes } 759df8bae1dSRodney W. Grimes 760df8bae1dSRodney W. Grimes /* 76111612afaSDima Dorfman * The process-level routing daemon needs to receive 762df8bae1dSRodney W. Grimes * all multicast IGMP packets, whether or not this 763df8bae1dSRodney W. Grimes * host belongs to their destination groups. 764df8bae1dSRodney W. Grimes */ 765df8bae1dSRodney W. Grimes if (ip->ip_p == IPPROTO_IGMP) 766df8bae1dSRodney W. Grimes goto ours; 76786425c62SRobert Watson IPSTAT_INC(ips_forward); 768df8bae1dSRodney W. Grimes } 769df8bae1dSRodney W. Grimes /* 770d10910e6SBruce M Simpson * Assume the packet is for us, to avoid prematurely taking 771d10910e6SBruce M Simpson * a lock on the in_multi hash. Protocols must perform 772d10910e6SBruce M Simpson * their own filtering and update statistics accordingly. 773df8bae1dSRodney W. Grimes */ 774df8bae1dSRodney W. Grimes goto ours; 775df8bae1dSRodney W. Grimes } 776df8bae1dSRodney W. Grimes if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST) 777df8bae1dSRodney W. Grimes goto ours; 778df8bae1dSRodney W. Grimes if (ip->ip_dst.s_addr == INADDR_ANY) 779df8bae1dSRodney W. Grimes goto ours; 780df8bae1dSRodney W. Grimes 7816a800098SYoshinobu Inoue /* 782df8bae1dSRodney W. Grimes * Not for us; forward if possible and desirable. 783df8bae1dSRodney W. Grimes */ 784603724d3SBjoern A. Zeeb if (V_ipforwarding == 0) { 78586425c62SRobert Watson IPSTAT_INC(ips_cantforward); 786df8bae1dSRodney W. Grimes m_freem(m); 787546f251bSChris D. Faulhaber } else { 7889b932e9eSAndre Oppermann ip_forward(m, dchg); 789546f251bSChris D. Faulhaber } 790c67b1d17SGarrett Wollman return; 791df8bae1dSRodney W. Grimes 792df8bae1dSRodney W. Grimes ours: 793d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH 794d0ebc0d2SYaroslav Tykhiy /* 795d0ebc0d2SYaroslav Tykhiy * IPSTEALTH: Process non-routing options only 796d0ebc0d2SYaroslav Tykhiy * if the packet is destined for us. 797d0ebc0d2SYaroslav Tykhiy */ 7987caf4ab7SGleb Smirnoff if (V_ipstealth && hlen > sizeof (struct ip) && ip_dooptions(m, 1)) 799d0ebc0d2SYaroslav Tykhiy return; 800d0ebc0d2SYaroslav Tykhiy #endif /* IPSTEALTH */ 801d0ebc0d2SYaroslav Tykhiy 80263f8d699SJordan K. Hubbard /* 803b6ea1aa5SRuslan Ermilov * Attempt reassembly; if it succeeds, proceed. 804ac9d7e26SMax Laier * ip_reass() will return a different mbuf. 805df8bae1dSRodney W. Grimes */ 8068f134647SGleb Smirnoff if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) { 807aa69c612SGleb Smirnoff /* XXXGL: shouldn't we save & set m_flags? */ 808f0cada84SAndre Oppermann m = ip_reass(m); 809f0cada84SAndre Oppermann if (m == NULL) 810c67b1d17SGarrett Wollman return; 8116a800098SYoshinobu Inoue ip = mtod(m, struct ip *); 8127e2df452SRuslan Ermilov /* Get the header length of the reassembled packet */ 81353be11f6SPoul-Henning Kamp hlen = ip->ip_hl << 2; 814f0cada84SAndre Oppermann } 815f0cada84SAndre Oppermann 816fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT) 817fcf59617SAndrey V. Elsukov if (IPSEC_ENABLED(ipv4)) { 818fcf59617SAndrey V. Elsukov if (IPSEC_INPUT(ipv4, m, hlen, ip->ip_p) != 0) 819fcf59617SAndrey V. Elsukov return; 820fcf59617SAndrey V. Elsukov } 821b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */ 82233841545SHajimu UMEMOTO 823df8bae1dSRodney W. Grimes /* 824df8bae1dSRodney W. Grimes * Switch out to protocol's input routine. 825df8bae1dSRodney W. Grimes */ 82686425c62SRobert Watson IPSTAT_INC(ips_delivered); 8279b932e9eSAndre Oppermann 8288f5a8818SKevin Lo (*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p); 829c67b1d17SGarrett Wollman return; 830df8bae1dSRodney W. Grimes bad: 831df8bae1dSRodney W. Grimes m_freem(m); 832c67b1d17SGarrett Wollman } 833c67b1d17SGarrett Wollman 834c67b1d17SGarrett Wollman /* 835df8bae1dSRodney W. Grimes * IP timer processing; 836df8bae1dSRodney W. Grimes * if a timer expires on a reassembly 837df8bae1dSRodney W. Grimes * queue, discard it. 838df8bae1dSRodney W. Grimes */ 839df8bae1dSRodney W. Grimes void 840f2565d68SRobert Watson ip_slowtimo(void) 841df8bae1dSRodney W. Grimes { 8428b615593SMarko Zec VNET_ITERATOR_DECL(vnet_iter); 843df8bae1dSRodney W. Grimes 8445ee847d3SRobert Watson VNET_LIST_RLOCK_NOSLEEP(); 8458b615593SMarko Zec VNET_FOREACH(vnet_iter) { 8468b615593SMarko Zec CURVNET_SET(vnet_iter); 8471dbefcc0SGleb Smirnoff ipreass_slowtimo(); 8488b615593SMarko Zec CURVNET_RESTORE(); 8498b615593SMarko Zec } 8505ee847d3SRobert Watson VNET_LIST_RUNLOCK_NOSLEEP(); 851df8bae1dSRodney W. Grimes } 852df8bae1dSRodney W. Grimes 8539802380eSBjoern A. Zeeb void 8549802380eSBjoern A. Zeeb ip_drain(void) 8559802380eSBjoern A. Zeeb { 8569802380eSBjoern A. Zeeb VNET_ITERATOR_DECL(vnet_iter); 8579802380eSBjoern A. Zeeb 8589802380eSBjoern A. Zeeb VNET_LIST_RLOCK_NOSLEEP(); 8599802380eSBjoern A. Zeeb VNET_FOREACH(vnet_iter) { 8609802380eSBjoern A. Zeeb CURVNET_SET(vnet_iter); 8611dbefcc0SGleb Smirnoff ipreass_drain(); 8628b615593SMarko Zec CURVNET_RESTORE(); 8638b615593SMarko Zec } 8645ee847d3SRobert Watson VNET_LIST_RUNLOCK_NOSLEEP(); 865df8bae1dSRodney W. Grimes } 866df8bae1dSRodney W. Grimes 867df8bae1dSRodney W. Grimes /* 868de38924dSAndre Oppermann * The protocol to be inserted into ip_protox[] must be already registered 869de38924dSAndre Oppermann * in inetsw[], either statically or through pf_proto_register(). 870de38924dSAndre Oppermann */ 871de38924dSAndre Oppermann int 8721b48d245SBjoern A. Zeeb ipproto_register(short ipproto) 873de38924dSAndre Oppermann { 874de38924dSAndre Oppermann struct protosw *pr; 875de38924dSAndre Oppermann 876de38924dSAndre Oppermann /* Sanity checks. */ 8771b48d245SBjoern A. Zeeb if (ipproto <= 0 || ipproto >= IPPROTO_MAX) 878de38924dSAndre Oppermann return (EPROTONOSUPPORT); 879de38924dSAndre Oppermann 880de38924dSAndre Oppermann /* 881de38924dSAndre Oppermann * The protocol slot must not be occupied by another protocol 882de38924dSAndre Oppermann * already. An index pointing to IPPROTO_RAW is unused. 883de38924dSAndre Oppermann */ 884de38924dSAndre Oppermann pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 885de38924dSAndre Oppermann if (pr == NULL) 886de38924dSAndre Oppermann return (EPFNOSUPPORT); 887de38924dSAndre Oppermann if (ip_protox[ipproto] != pr - inetsw) /* IPPROTO_RAW */ 888de38924dSAndre Oppermann return (EEXIST); 889de38924dSAndre Oppermann 890de38924dSAndre Oppermann /* Find the protocol position in inetsw[] and set the index. */ 891de38924dSAndre Oppermann for (pr = inetdomain.dom_protosw; 892de38924dSAndre Oppermann pr < inetdomain.dom_protoswNPROTOSW; pr++) { 893de38924dSAndre Oppermann if (pr->pr_domain->dom_family == PF_INET && 894de38924dSAndre Oppermann pr->pr_protocol && pr->pr_protocol == ipproto) { 895de38924dSAndre Oppermann ip_protox[pr->pr_protocol] = pr - inetsw; 896de38924dSAndre Oppermann return (0); 897de38924dSAndre Oppermann } 898de38924dSAndre Oppermann } 899de38924dSAndre Oppermann return (EPROTONOSUPPORT); 900de38924dSAndre Oppermann } 901de38924dSAndre Oppermann 902de38924dSAndre Oppermann int 9031b48d245SBjoern A. Zeeb ipproto_unregister(short ipproto) 904de38924dSAndre Oppermann { 905de38924dSAndre Oppermann struct protosw *pr; 906de38924dSAndre Oppermann 907de38924dSAndre Oppermann /* Sanity checks. */ 9081b48d245SBjoern A. Zeeb if (ipproto <= 0 || ipproto >= IPPROTO_MAX) 909de38924dSAndre Oppermann return (EPROTONOSUPPORT); 910de38924dSAndre Oppermann 911de38924dSAndre Oppermann /* Check if the protocol was indeed registered. */ 912de38924dSAndre Oppermann pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 913de38924dSAndre Oppermann if (pr == NULL) 914de38924dSAndre Oppermann return (EPFNOSUPPORT); 915de38924dSAndre Oppermann if (ip_protox[ipproto] == pr - inetsw) /* IPPROTO_RAW */ 916de38924dSAndre Oppermann return (ENOENT); 917de38924dSAndre Oppermann 918de38924dSAndre Oppermann /* Reset the protocol slot to IPPROTO_RAW. */ 919de38924dSAndre Oppermann ip_protox[ipproto] = pr - inetsw; 920de38924dSAndre Oppermann return (0); 921de38924dSAndre Oppermann } 922de38924dSAndre Oppermann 923df8bae1dSRodney W. Grimes u_char inetctlerrmap[PRC_NCMDS] = { 924df8bae1dSRodney W. Grimes 0, 0, 0, 0, 925df8bae1dSRodney W. Grimes 0, EMSGSIZE, EHOSTDOWN, EHOSTUNREACH, 926df8bae1dSRodney W. Grimes EHOSTUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED, 927df8bae1dSRodney W. Grimes EMSGSIZE, EHOSTUNREACH, 0, 0, 928fcaf9f91SMike Silbersack 0, 0, EHOSTUNREACH, 0, 9293b8123b7SJesper Skriver ENOPROTOOPT, ECONNREFUSED 930df8bae1dSRodney W. Grimes }; 931df8bae1dSRodney W. Grimes 932df8bae1dSRodney W. Grimes /* 933df8bae1dSRodney W. Grimes * Forward a packet. If some error occurs return the sender 934df8bae1dSRodney W. Grimes * an icmp packet. Note we can't always generate a meaningful 935df8bae1dSRodney W. Grimes * icmp message because icmp doesn't have a large enough repertoire 936df8bae1dSRodney W. Grimes * of codes and types. 937df8bae1dSRodney W. Grimes * 938df8bae1dSRodney W. Grimes * If not forwarding, just drop the packet. This could be confusing 939df8bae1dSRodney W. Grimes * if ipforwarding was zero but some routing protocol was advancing 940df8bae1dSRodney W. Grimes * us as a gateway to somewhere. However, we must let the routing 941df8bae1dSRodney W. Grimes * protocol deal with that. 942df8bae1dSRodney W. Grimes * 943df8bae1dSRodney W. Grimes * The srcrt parameter indicates whether the packet is being forwarded 944df8bae1dSRodney W. Grimes * via a source route. 945df8bae1dSRodney W. Grimes */ 9469b932e9eSAndre Oppermann void 9479b932e9eSAndre Oppermann ip_forward(struct mbuf *m, int srcrt) 948df8bae1dSRodney W. Grimes { 9492b25acc1SLuigi Rizzo struct ip *ip = mtod(m, struct ip *); 950efbad259SEdward Tomasz Napierala struct in_ifaddr *ia; 951df8bae1dSRodney W. Grimes struct mbuf *mcopy; 952d14122b0SErmal Luçi struct sockaddr_in *sin; 9539b932e9eSAndre Oppermann struct in_addr dest; 954b835b6feSBjoern A. Zeeb struct route ro; 955c773494eSAndre Oppermann int error, type = 0, code = 0, mtu = 0; 9563efc3014SJulian Elischer 957b8a6e03fSGleb Smirnoff NET_EPOCH_ASSERT(); 958b8a6e03fSGleb Smirnoff 9599b932e9eSAndre Oppermann if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) { 96086425c62SRobert Watson IPSTAT_INC(ips_cantforward); 961df8bae1dSRodney W. Grimes m_freem(m); 962df8bae1dSRodney W. Grimes return; 963df8bae1dSRodney W. Grimes } 964fcf59617SAndrey V. Elsukov if ( 965fcf59617SAndrey V. Elsukov #ifdef IPSTEALTH 966fcf59617SAndrey V. Elsukov V_ipstealth == 0 && 967fcf59617SAndrey V. Elsukov #endif 968fcf59617SAndrey V. Elsukov ip->ip_ttl <= IPTTLDEC) { 969fcf59617SAndrey V. Elsukov icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, 0); 9708922ddbeSAndrey V. Elsukov return; 9718922ddbeSAndrey V. Elsukov } 972df8bae1dSRodney W. Grimes 973d14122b0SErmal Luçi bzero(&ro, sizeof(ro)); 974d14122b0SErmal Luçi sin = (struct sockaddr_in *)&ro.ro_dst; 975d14122b0SErmal Luçi sin->sin_family = AF_INET; 976d14122b0SErmal Luçi sin->sin_len = sizeof(*sin); 977d14122b0SErmal Luçi sin->sin_addr = ip->ip_dst; 978d14122b0SErmal Luçi #ifdef RADIX_MPATH 979d14122b0SErmal Luçi rtalloc_mpath_fib(&ro, 980d14122b0SErmal Luçi ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr), 981d14122b0SErmal Luçi M_GETFIB(m)); 982d14122b0SErmal Luçi #else 983d14122b0SErmal Luçi in_rtalloc_ign(&ro, 0, M_GETFIB(m)); 984d14122b0SErmal Luçi #endif 985d14122b0SErmal Luçi if (ro.ro_rt != NULL) { 986d14122b0SErmal Luçi ia = ifatoia(ro.ro_rt->rt_ifa); 98756844a62SErmal Luçi } else 98856844a62SErmal Luçi ia = NULL; 989df8bae1dSRodney W. Grimes /* 990bfef7ed4SIan Dowse * Save the IP header and at most 8 bytes of the payload, 991bfef7ed4SIan Dowse * in case we need to generate an ICMP message to the src. 992bfef7ed4SIan Dowse * 9934d2e3692SLuigi Rizzo * XXX this can be optimized a lot by saving the data in a local 9944d2e3692SLuigi Rizzo * buffer on the stack (72 bytes at most), and only allocating the 9954d2e3692SLuigi Rizzo * mbuf if really necessary. The vast majority of the packets 9964d2e3692SLuigi Rizzo * are forwarded without having to send an ICMP back (either 9974d2e3692SLuigi Rizzo * because unnecessary, or because rate limited), so we are 9984d2e3692SLuigi Rizzo * really we are wasting a lot of work here. 9994d2e3692SLuigi Rizzo * 1000c3bef61eSKevin Lo * We don't use m_copym() because it might return a reference 1001bfef7ed4SIan Dowse * to a shared cluster. Both this function and ip_output() 1002bfef7ed4SIan Dowse * assume exclusive access to the IP header in `m', so any 1003bfef7ed4SIan Dowse * data in a cluster may change before we reach icmp_error(). 1004df8bae1dSRodney W. Grimes */ 1005dc4ad05eSGleb Smirnoff mcopy = m_gethdr(M_NOWAIT, m->m_type); 1006eb1b1807SGleb Smirnoff if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_NOWAIT)) { 10079967cafcSSam Leffler /* 10089967cafcSSam Leffler * It's probably ok if the pkthdr dup fails (because 10099967cafcSSam Leffler * the deep copy of the tag chain failed), but for now 10109967cafcSSam Leffler * be conservative and just discard the copy since 10119967cafcSSam Leffler * code below may some day want the tags. 10129967cafcSSam Leffler */ 10139967cafcSSam Leffler m_free(mcopy); 10149967cafcSSam Leffler mcopy = NULL; 10159967cafcSSam Leffler } 1016bfef7ed4SIan Dowse if (mcopy != NULL) { 10178f134647SGleb Smirnoff mcopy->m_len = min(ntohs(ip->ip_len), M_TRAILINGSPACE(mcopy)); 1018e6b0a570SBruce M Simpson mcopy->m_pkthdr.len = mcopy->m_len; 1019bfef7ed4SIan Dowse m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t)); 1020bfef7ed4SIan Dowse } 102104287599SRuslan Ermilov #ifdef IPSTEALTH 1022fcf59617SAndrey V. Elsukov if (V_ipstealth == 0) 102304287599SRuslan Ermilov #endif 102404287599SRuslan Ermilov ip->ip_ttl -= IPTTLDEC; 1025fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT) 1026fcf59617SAndrey V. Elsukov if (IPSEC_ENABLED(ipv4)) { 1027fcf59617SAndrey V. Elsukov if ((error = IPSEC_FORWARD(ipv4, m)) != 0) { 1028fcf59617SAndrey V. Elsukov /* mbuf consumed by IPsec */ 1029fcf59617SAndrey V. Elsukov m_freem(mcopy); 1030fcf59617SAndrey V. Elsukov if (error != EINPROGRESS) 1031fcf59617SAndrey V. Elsukov IPSTAT_INC(ips_cantforward); 1032b8a6e03fSGleb Smirnoff return; 103304287599SRuslan Ermilov } 1034fcf59617SAndrey V. Elsukov /* No IPsec processing required */ 1035fcf59617SAndrey V. Elsukov } 1036fcf59617SAndrey V. Elsukov #endif /* IPSEC */ 1037df8bae1dSRodney W. Grimes /* 1038df8bae1dSRodney W. Grimes * If forwarding packet using same interface that it came in on, 1039df8bae1dSRodney W. Grimes * perhaps should send a redirect to sender to shortcut a hop. 1040df8bae1dSRodney W. Grimes * Only send redirect if source is sending directly to us, 1041df8bae1dSRodney W. Grimes * and if packet was not source routed (or has any options). 1042df8bae1dSRodney W. Grimes * Also, don't send redirect if forwarding using a default route 1043df8bae1dSRodney W. Grimes * or a route modified by a redirect. 1044df8bae1dSRodney W. Grimes */ 10459b932e9eSAndre Oppermann dest.s_addr = 0; 1046efbad259SEdward Tomasz Napierala if (!srcrt && V_ipsendredirects && 1047efbad259SEdward Tomasz Napierala ia != NULL && ia->ia_ifp == m->m_pkthdr.rcvif) { 104802c1c707SAndre Oppermann struct rtentry *rt; 104902c1c707SAndre Oppermann 105002c1c707SAndre Oppermann rt = ro.ro_rt; 105102c1c707SAndre Oppermann 105202c1c707SAndre Oppermann if (rt && (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 && 10539b932e9eSAndre Oppermann satosin(rt_key(rt))->sin_addr.s_addr != 0) { 1054df8bae1dSRodney W. Grimes #define RTA(rt) ((struct in_ifaddr *)(rt->rt_ifa)) 1055df8bae1dSRodney W. Grimes u_long src = ntohl(ip->ip_src.s_addr); 1056df8bae1dSRodney W. Grimes 1057df8bae1dSRodney W. Grimes if (RTA(rt) && 1058df8bae1dSRodney W. Grimes (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) { 1059df8bae1dSRodney W. Grimes if (rt->rt_flags & RTF_GATEWAY) 10609b932e9eSAndre Oppermann dest.s_addr = satosin(rt->rt_gateway)->sin_addr.s_addr; 1061df8bae1dSRodney W. Grimes else 10629b932e9eSAndre Oppermann dest.s_addr = ip->ip_dst.s_addr; 1063df8bae1dSRodney W. Grimes /* Router requirements says to only send host redirects */ 1064df8bae1dSRodney W. Grimes type = ICMP_REDIRECT; 1065df8bae1dSRodney W. Grimes code = ICMP_REDIRECT_HOST; 1066df8bae1dSRodney W. Grimes } 1067df8bae1dSRodney W. Grimes } 106802c1c707SAndre Oppermann } 1069df8bae1dSRodney W. Grimes 1070b835b6feSBjoern A. Zeeb error = ip_output(m, NULL, &ro, IP_FORWARDING, NULL, NULL); 1071b835b6feSBjoern A. Zeeb 1072b835b6feSBjoern A. Zeeb if (error == EMSGSIZE && ro.ro_rt) 1073e3a7aa6fSGleb Smirnoff mtu = ro.ro_rt->rt_mtu; 1074bf984051SGleb Smirnoff RO_RTFREE(&ro); 1075b835b6feSBjoern A. Zeeb 1076df8bae1dSRodney W. Grimes if (error) 107786425c62SRobert Watson IPSTAT_INC(ips_cantforward); 1078df8bae1dSRodney W. Grimes else { 107986425c62SRobert Watson IPSTAT_INC(ips_forward); 1080df8bae1dSRodney W. Grimes if (type) 108186425c62SRobert Watson IPSTAT_INC(ips_redirectsent); 1082df8bae1dSRodney W. Grimes else { 10839188b4a1SAndre Oppermann if (mcopy) 1084df8bae1dSRodney W. Grimes m_freem(mcopy); 1085b8a6e03fSGleb Smirnoff return; 1086df8bae1dSRodney W. Grimes } 1087df8bae1dSRodney W. Grimes } 10884f6c66ccSMatt Macy if (mcopy == NULL) 1089b8a6e03fSGleb Smirnoff return; 10904f6c66ccSMatt Macy 1091df8bae1dSRodney W. Grimes 1092df8bae1dSRodney W. Grimes switch (error) { 1093df8bae1dSRodney W. Grimes 1094df8bae1dSRodney W. Grimes case 0: /* forwarded, but need redirect */ 1095df8bae1dSRodney W. Grimes /* type, code set above */ 1096df8bae1dSRodney W. Grimes break; 1097df8bae1dSRodney W. Grimes 1098efbad259SEdward Tomasz Napierala case ENETUNREACH: 1099df8bae1dSRodney W. Grimes case EHOSTUNREACH: 1100df8bae1dSRodney W. Grimes case ENETDOWN: 1101df8bae1dSRodney W. Grimes case EHOSTDOWN: 1102df8bae1dSRodney W. Grimes default: 1103df8bae1dSRodney W. Grimes type = ICMP_UNREACH; 1104df8bae1dSRodney W. Grimes code = ICMP_UNREACH_HOST; 1105df8bae1dSRodney W. Grimes break; 1106df8bae1dSRodney W. Grimes 1107df8bae1dSRodney W. Grimes case EMSGSIZE: 1108df8bae1dSRodney W. Grimes type = ICMP_UNREACH; 1109df8bae1dSRodney W. Grimes code = ICMP_UNREACH_NEEDFRAG; 11109b932e9eSAndre Oppermann /* 1111b835b6feSBjoern A. Zeeb * If the MTU was set before make sure we are below the 1112b835b6feSBjoern A. Zeeb * interface MTU. 1113ab48768bSAndre Oppermann * If the MTU wasn't set before use the interface mtu or 1114ab48768bSAndre Oppermann * fall back to the next smaller mtu step compared to the 1115ab48768bSAndre Oppermann * current packet size. 11169b932e9eSAndre Oppermann */ 1117b835b6feSBjoern A. Zeeb if (mtu != 0) { 1118b835b6feSBjoern A. Zeeb if (ia != NULL) 1119b835b6feSBjoern A. Zeeb mtu = min(mtu, ia->ia_ifp->if_mtu); 1120b835b6feSBjoern A. Zeeb } else { 1121ab48768bSAndre Oppermann if (ia != NULL) 1122c773494eSAndre Oppermann mtu = ia->ia_ifp->if_mtu; 1123ab48768bSAndre Oppermann else 11248f134647SGleb Smirnoff mtu = ip_next_mtu(ntohs(ip->ip_len), 0); 1125ab48768bSAndre Oppermann } 112686425c62SRobert Watson IPSTAT_INC(ips_cantfrag); 1127df8bae1dSRodney W. Grimes break; 1128df8bae1dSRodney W. Grimes 1129df8bae1dSRodney W. Grimes case ENOBUFS: 11303a06e3e0SRuslan Ermilov case EACCES: /* ipfw denied packet */ 11313a06e3e0SRuslan Ermilov m_freem(mcopy); 1132b8a6e03fSGleb Smirnoff return; 1133df8bae1dSRodney W. Grimes } 1134c773494eSAndre Oppermann icmp_error(mcopy, type, code, dest.s_addr, mtu); 1135df8bae1dSRodney W. Grimes } 1136df8bae1dSRodney W. Grimes 1137339efd75SMaxim Sobolev #define CHECK_SO_CT(sp, ct) \ 1138339efd75SMaxim Sobolev (((sp->so_options & SO_TIMESTAMP) && (sp->so_ts_clock == ct)) ? 1 : 0) 1139339efd75SMaxim Sobolev 114082c23ebaSBill Fenner void 1141f2565d68SRobert Watson ip_savecontrol(struct inpcb *inp, struct mbuf **mp, struct ip *ip, 1142f2565d68SRobert Watson struct mbuf *m) 114382c23ebaSBill Fenner { 114406193f0bSKonstantin Belousov bool stamped; 11458b615593SMarko Zec 114606193f0bSKonstantin Belousov stamped = false; 1147339efd75SMaxim Sobolev if ((inp->inp_socket->so_options & SO_BINTIME) || 1148339efd75SMaxim Sobolev CHECK_SO_CT(inp->inp_socket, SO_TS_BINTIME)) { 114906193f0bSKonstantin Belousov struct bintime boottimebin, bt; 115006193f0bSKonstantin Belousov struct timespec ts1; 1151be8a62e8SPoul-Henning Kamp 115206193f0bSKonstantin Belousov if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR | 115306193f0bSKonstantin Belousov M_TSTMP)) { 115406193f0bSKonstantin Belousov mbuf_tstmp2timespec(m, &ts1); 115506193f0bSKonstantin Belousov timespec2bintime(&ts1, &bt); 115606193f0bSKonstantin Belousov getboottimebin(&boottimebin); 115706193f0bSKonstantin Belousov bintime_add(&bt, &boottimebin); 115806193f0bSKonstantin Belousov } else { 1159be8a62e8SPoul-Henning Kamp bintime(&bt); 116006193f0bSKonstantin Belousov } 1161be8a62e8SPoul-Henning Kamp *mp = sbcreatecontrol((caddr_t)&bt, sizeof(bt), 1162be8a62e8SPoul-Henning Kamp SCM_BINTIME, SOL_SOCKET); 116306193f0bSKonstantin Belousov if (*mp != NULL) { 1164be8a62e8SPoul-Henning Kamp mp = &(*mp)->m_next; 116506193f0bSKonstantin Belousov stamped = true; 116606193f0bSKonstantin Belousov } 1167be8a62e8SPoul-Henning Kamp } 1168339efd75SMaxim Sobolev if (CHECK_SO_CT(inp->inp_socket, SO_TS_REALTIME_MICRO)) { 116906193f0bSKonstantin Belousov struct bintime boottimebin, bt1; 117006193f0bSKonstantin Belousov struct timespec ts1;; 117182c23ebaSBill Fenner struct timeval tv; 117282c23ebaSBill Fenner 117306193f0bSKonstantin Belousov if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR | 117406193f0bSKonstantin Belousov M_TSTMP)) { 117506193f0bSKonstantin Belousov mbuf_tstmp2timespec(m, &ts1); 117606193f0bSKonstantin Belousov timespec2bintime(&ts1, &bt1); 117706193f0bSKonstantin Belousov getboottimebin(&boottimebin); 117806193f0bSKonstantin Belousov bintime_add(&bt1, &boottimebin); 117906193f0bSKonstantin Belousov bintime2timeval(&bt1, &tv); 118006193f0bSKonstantin Belousov } else { 1181339efd75SMaxim Sobolev microtime(&tv); 118206193f0bSKonstantin Belousov } 118382c23ebaSBill Fenner *mp = sbcreatecontrol((caddr_t)&tv, sizeof(tv), 118482c23ebaSBill Fenner SCM_TIMESTAMP, SOL_SOCKET); 118506193f0bSKonstantin Belousov if (*mp != NULL) { 118682c23ebaSBill Fenner mp = &(*mp)->m_next; 118706193f0bSKonstantin Belousov stamped = true; 118806193f0bSKonstantin Belousov } 1189339efd75SMaxim Sobolev } else if (CHECK_SO_CT(inp->inp_socket, SO_TS_REALTIME)) { 119006193f0bSKonstantin Belousov struct bintime boottimebin; 119106193f0bSKonstantin Belousov struct timespec ts, ts1; 1192339efd75SMaxim Sobolev 119306193f0bSKonstantin Belousov if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR | 119406193f0bSKonstantin Belousov M_TSTMP)) { 119506193f0bSKonstantin Belousov mbuf_tstmp2timespec(m, &ts); 119606193f0bSKonstantin Belousov getboottimebin(&boottimebin); 119706193f0bSKonstantin Belousov bintime2timespec(&boottimebin, &ts1); 11986040822cSAlan Somers timespecadd(&ts, &ts1, &ts); 119906193f0bSKonstantin Belousov } else { 1200339efd75SMaxim Sobolev nanotime(&ts); 120106193f0bSKonstantin Belousov } 1202339efd75SMaxim Sobolev *mp = sbcreatecontrol((caddr_t)&ts, sizeof(ts), 1203339efd75SMaxim Sobolev SCM_REALTIME, SOL_SOCKET); 120406193f0bSKonstantin Belousov if (*mp != NULL) { 1205339efd75SMaxim Sobolev mp = &(*mp)->m_next; 120606193f0bSKonstantin Belousov stamped = true; 120706193f0bSKonstantin Belousov } 1208339efd75SMaxim Sobolev } else if (CHECK_SO_CT(inp->inp_socket, SO_TS_MONOTONIC)) { 1209339efd75SMaxim Sobolev struct timespec ts; 1210339efd75SMaxim Sobolev 121106193f0bSKonstantin Belousov if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR | 121206193f0bSKonstantin Belousov M_TSTMP)) 121306193f0bSKonstantin Belousov mbuf_tstmp2timespec(m, &ts); 121406193f0bSKonstantin Belousov else 1215339efd75SMaxim Sobolev nanouptime(&ts); 1216339efd75SMaxim Sobolev *mp = sbcreatecontrol((caddr_t)&ts, sizeof(ts), 1217339efd75SMaxim Sobolev SCM_MONOTONIC, SOL_SOCKET); 121806193f0bSKonstantin Belousov if (*mp != NULL) { 121906193f0bSKonstantin Belousov mp = &(*mp)->m_next; 122006193f0bSKonstantin Belousov stamped = true; 122106193f0bSKonstantin Belousov } 122206193f0bSKonstantin Belousov } 122306193f0bSKonstantin Belousov if (stamped && (m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR | 122406193f0bSKonstantin Belousov M_TSTMP)) { 122506193f0bSKonstantin Belousov struct sock_timestamp_info sti; 122606193f0bSKonstantin Belousov 122706193f0bSKonstantin Belousov bzero(&sti, sizeof(sti)); 122806193f0bSKonstantin Belousov sti.st_info_flags = ST_INFO_HW; 122906193f0bSKonstantin Belousov if ((m->m_flags & M_TSTMP_HPREC) != 0) 123006193f0bSKonstantin Belousov sti.st_info_flags |= ST_INFO_HW_HPREC; 123106193f0bSKonstantin Belousov *mp = sbcreatecontrol((caddr_t)&sti, sizeof(sti), SCM_TIME_INFO, 123206193f0bSKonstantin Belousov SOL_SOCKET); 123306193f0bSKonstantin Belousov if (*mp != NULL) 1234339efd75SMaxim Sobolev mp = &(*mp)->m_next; 1235be8a62e8SPoul-Henning Kamp } 123682c23ebaSBill Fenner if (inp->inp_flags & INP_RECVDSTADDR) { 123782c23ebaSBill Fenner *mp = sbcreatecontrol((caddr_t)&ip->ip_dst, 123882c23ebaSBill Fenner sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP); 123982c23ebaSBill Fenner if (*mp) 124082c23ebaSBill Fenner mp = &(*mp)->m_next; 124182c23ebaSBill Fenner } 12424957466bSMatthew N. Dodd if (inp->inp_flags & INP_RECVTTL) { 12434957466bSMatthew N. Dodd *mp = sbcreatecontrol((caddr_t)&ip->ip_ttl, 12444957466bSMatthew N. Dodd sizeof(u_char), IP_RECVTTL, IPPROTO_IP); 12454957466bSMatthew N. Dodd if (*mp) 12464957466bSMatthew N. Dodd mp = &(*mp)->m_next; 12474957466bSMatthew N. Dodd } 124882c23ebaSBill Fenner #ifdef notyet 124982c23ebaSBill Fenner /* XXX 125082c23ebaSBill Fenner * Moving these out of udp_input() made them even more broken 125182c23ebaSBill Fenner * than they already were. 125282c23ebaSBill Fenner */ 125382c23ebaSBill Fenner /* options were tossed already */ 125482c23ebaSBill Fenner if (inp->inp_flags & INP_RECVOPTS) { 125582c23ebaSBill Fenner *mp = sbcreatecontrol((caddr_t)opts_deleted_above, 125682c23ebaSBill Fenner sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP); 125782c23ebaSBill Fenner if (*mp) 125882c23ebaSBill Fenner mp = &(*mp)->m_next; 125982c23ebaSBill Fenner } 126082c23ebaSBill Fenner /* ip_srcroute doesn't do what we want here, need to fix */ 126182c23ebaSBill Fenner if (inp->inp_flags & INP_RECVRETOPTS) { 1262e0982661SAndre Oppermann *mp = sbcreatecontrol((caddr_t)ip_srcroute(m), 126382c23ebaSBill Fenner sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP); 126482c23ebaSBill Fenner if (*mp) 126582c23ebaSBill Fenner mp = &(*mp)->m_next; 126682c23ebaSBill Fenner } 126782c23ebaSBill Fenner #endif 126882c23ebaSBill Fenner if (inp->inp_flags & INP_RECVIF) { 1269d314ad7bSJulian Elischer struct ifnet *ifp; 1270d314ad7bSJulian Elischer struct sdlbuf { 127182c23ebaSBill Fenner struct sockaddr_dl sdl; 1272d314ad7bSJulian Elischer u_char pad[32]; 1273d314ad7bSJulian Elischer } sdlbuf; 1274d314ad7bSJulian Elischer struct sockaddr_dl *sdp; 1275d314ad7bSJulian Elischer struct sockaddr_dl *sdl2 = &sdlbuf.sdl; 127682c23ebaSBill Fenner 127746f2df9cSSergey Kandaurov if ((ifp = m->m_pkthdr.rcvif) && 127846f2df9cSSergey Kandaurov ifp->if_index && ifp->if_index <= V_if_index) { 12794a0d6638SRuslan Ermilov sdp = (struct sockaddr_dl *)ifp->if_addr->ifa_addr; 1280d314ad7bSJulian Elischer /* 1281d314ad7bSJulian Elischer * Change our mind and don't try copy. 1282d314ad7bSJulian Elischer */ 128346f2df9cSSergey Kandaurov if (sdp->sdl_family != AF_LINK || 128446f2df9cSSergey Kandaurov sdp->sdl_len > sizeof(sdlbuf)) { 1285d314ad7bSJulian Elischer goto makedummy; 1286d314ad7bSJulian Elischer } 1287d314ad7bSJulian Elischer bcopy(sdp, sdl2, sdp->sdl_len); 1288d314ad7bSJulian Elischer } else { 1289d314ad7bSJulian Elischer makedummy: 129046f2df9cSSergey Kandaurov sdl2->sdl_len = 129146f2df9cSSergey Kandaurov offsetof(struct sockaddr_dl, sdl_data[0]); 1292d314ad7bSJulian Elischer sdl2->sdl_family = AF_LINK; 1293d314ad7bSJulian Elischer sdl2->sdl_index = 0; 1294d314ad7bSJulian Elischer sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0; 1295d314ad7bSJulian Elischer } 1296d314ad7bSJulian Elischer *mp = sbcreatecontrol((caddr_t)sdl2, sdl2->sdl_len, 129782c23ebaSBill Fenner IP_RECVIF, IPPROTO_IP); 129882c23ebaSBill Fenner if (*mp) 129982c23ebaSBill Fenner mp = &(*mp)->m_next; 130082c23ebaSBill Fenner } 13013cca425bSMichael Tuexen if (inp->inp_flags & INP_RECVTOS) { 13023cca425bSMichael Tuexen *mp = sbcreatecontrol((caddr_t)&ip->ip_tos, 13033cca425bSMichael Tuexen sizeof(u_char), IP_RECVTOS, IPPROTO_IP); 13043cca425bSMichael Tuexen if (*mp) 13053cca425bSMichael Tuexen mp = &(*mp)->m_next; 13063cca425bSMichael Tuexen } 13079d3ddf43SAdrian Chadd 13089d3ddf43SAdrian Chadd if (inp->inp_flags2 & INP_RECVFLOWID) { 13099d3ddf43SAdrian Chadd uint32_t flowid, flow_type; 13109d3ddf43SAdrian Chadd 13119d3ddf43SAdrian Chadd flowid = m->m_pkthdr.flowid; 13129d3ddf43SAdrian Chadd flow_type = M_HASHTYPE_GET(m); 13139d3ddf43SAdrian Chadd 13149d3ddf43SAdrian Chadd /* 13159d3ddf43SAdrian Chadd * XXX should handle the failure of one or the 13169d3ddf43SAdrian Chadd * other - don't populate both? 13179d3ddf43SAdrian Chadd */ 13189d3ddf43SAdrian Chadd *mp = sbcreatecontrol((caddr_t) &flowid, 13199d3ddf43SAdrian Chadd sizeof(uint32_t), IP_FLOWID, IPPROTO_IP); 13209d3ddf43SAdrian Chadd if (*mp) 13219d3ddf43SAdrian Chadd mp = &(*mp)->m_next; 13229d3ddf43SAdrian Chadd *mp = sbcreatecontrol((caddr_t) &flow_type, 13239d3ddf43SAdrian Chadd sizeof(uint32_t), IP_FLOWTYPE, IPPROTO_IP); 13249d3ddf43SAdrian Chadd if (*mp) 13259d3ddf43SAdrian Chadd mp = &(*mp)->m_next; 13269d3ddf43SAdrian Chadd } 13279d3ddf43SAdrian Chadd 13289d3ddf43SAdrian Chadd #ifdef RSS 13299d3ddf43SAdrian Chadd if (inp->inp_flags2 & INP_RECVRSSBUCKETID) { 13309d3ddf43SAdrian Chadd uint32_t flowid, flow_type; 13319d3ddf43SAdrian Chadd uint32_t rss_bucketid; 13329d3ddf43SAdrian Chadd 13339d3ddf43SAdrian Chadd flowid = m->m_pkthdr.flowid; 13349d3ddf43SAdrian Chadd flow_type = M_HASHTYPE_GET(m); 13359d3ddf43SAdrian Chadd 13369d3ddf43SAdrian Chadd if (rss_hash2bucket(flowid, flow_type, &rss_bucketid) == 0) { 13379d3ddf43SAdrian Chadd *mp = sbcreatecontrol((caddr_t) &rss_bucketid, 13389d3ddf43SAdrian Chadd sizeof(uint32_t), IP_RSSBUCKETID, IPPROTO_IP); 13399d3ddf43SAdrian Chadd if (*mp) 13409d3ddf43SAdrian Chadd mp = &(*mp)->m_next; 13419d3ddf43SAdrian Chadd } 13429d3ddf43SAdrian Chadd } 13439d3ddf43SAdrian Chadd #endif 134482c23ebaSBill Fenner } 134582c23ebaSBill Fenner 13464d2e3692SLuigi Rizzo /* 134730916a2dSRobert Watson * XXXRW: Multicast routing code in ip_mroute.c is generally MPSAFE, but the 134830916a2dSRobert Watson * ip_rsvp and ip_rsvp_on variables need to be interlocked with rsvp_on 134930916a2dSRobert Watson * locking. This code remains in ip_input.c as ip_mroute.c is optionally 135030916a2dSRobert Watson * compiled. 13514d2e3692SLuigi Rizzo */ 13525f901c92SAndrew Turner VNET_DEFINE_STATIC(int, ip_rsvp_on); 135382cea7e6SBjoern A. Zeeb VNET_DEFINE(struct socket *, ip_rsvpd); 135482cea7e6SBjoern A. Zeeb 135582cea7e6SBjoern A. Zeeb #define V_ip_rsvp_on VNET(ip_rsvp_on) 135682cea7e6SBjoern A. Zeeb 1357df8bae1dSRodney W. Grimes int 1358f0068c4aSGarrett Wollman ip_rsvp_init(struct socket *so) 1359f0068c4aSGarrett Wollman { 13608b615593SMarko Zec 1361f0068c4aSGarrett Wollman if (so->so_type != SOCK_RAW || 1362f0068c4aSGarrett Wollman so->so_proto->pr_protocol != IPPROTO_RSVP) 1363f0068c4aSGarrett Wollman return EOPNOTSUPP; 1364f0068c4aSGarrett Wollman 1365603724d3SBjoern A. Zeeb if (V_ip_rsvpd != NULL) 1366f0068c4aSGarrett Wollman return EADDRINUSE; 1367f0068c4aSGarrett Wollman 1368603724d3SBjoern A. Zeeb V_ip_rsvpd = so; 13691c5de19aSGarrett Wollman /* 13701c5de19aSGarrett Wollman * This may seem silly, but we need to be sure we don't over-increment 13711c5de19aSGarrett Wollman * the RSVP counter, in case something slips up. 13721c5de19aSGarrett Wollman */ 1373603724d3SBjoern A. Zeeb if (!V_ip_rsvp_on) { 1374603724d3SBjoern A. Zeeb V_ip_rsvp_on = 1; 1375603724d3SBjoern A. Zeeb V_rsvp_on++; 13761c5de19aSGarrett Wollman } 1377f0068c4aSGarrett Wollman 1378f0068c4aSGarrett Wollman return 0; 1379f0068c4aSGarrett Wollman } 1380f0068c4aSGarrett Wollman 1381f0068c4aSGarrett Wollman int 1382f0068c4aSGarrett Wollman ip_rsvp_done(void) 1383f0068c4aSGarrett Wollman { 13848b615593SMarko Zec 1385603724d3SBjoern A. Zeeb V_ip_rsvpd = NULL; 13861c5de19aSGarrett Wollman /* 13871c5de19aSGarrett Wollman * This may seem silly, but we need to be sure we don't over-decrement 13881c5de19aSGarrett Wollman * the RSVP counter, in case something slips up. 13891c5de19aSGarrett Wollman */ 1390603724d3SBjoern A. Zeeb if (V_ip_rsvp_on) { 1391603724d3SBjoern A. Zeeb V_ip_rsvp_on = 0; 1392603724d3SBjoern A. Zeeb V_rsvp_on--; 13931c5de19aSGarrett Wollman } 1394f0068c4aSGarrett Wollman return 0; 1395f0068c4aSGarrett Wollman } 1396bbb4330bSLuigi Rizzo 13978f5a8818SKevin Lo int 13988f5a8818SKevin Lo rsvp_input(struct mbuf **mp, int *offp, int proto) 1399bbb4330bSLuigi Rizzo { 14008f5a8818SKevin Lo struct mbuf *m; 14018f5a8818SKevin Lo 14028f5a8818SKevin Lo m = *mp; 14038f5a8818SKevin Lo *mp = NULL; 14048b615593SMarko Zec 1405bbb4330bSLuigi Rizzo if (rsvp_input_p) { /* call the real one if loaded */ 14068f5a8818SKevin Lo *mp = m; 14078f5a8818SKevin Lo rsvp_input_p(mp, offp, proto); 14088f5a8818SKevin Lo return (IPPROTO_DONE); 1409bbb4330bSLuigi Rizzo } 1410bbb4330bSLuigi Rizzo 1411bbb4330bSLuigi Rizzo /* Can still get packets with rsvp_on = 0 if there is a local member 1412bbb4330bSLuigi Rizzo * of the group to which the RSVP packet is addressed. But in this 1413bbb4330bSLuigi Rizzo * case we want to throw the packet away. 1414bbb4330bSLuigi Rizzo */ 1415bbb4330bSLuigi Rizzo 1416603724d3SBjoern A. Zeeb if (!V_rsvp_on) { 1417bbb4330bSLuigi Rizzo m_freem(m); 14188f5a8818SKevin Lo return (IPPROTO_DONE); 1419bbb4330bSLuigi Rizzo } 1420bbb4330bSLuigi Rizzo 1421603724d3SBjoern A. Zeeb if (V_ip_rsvpd != NULL) { 14228f5a8818SKevin Lo *mp = m; 14238f5a8818SKevin Lo rip_input(mp, offp, proto); 14248f5a8818SKevin Lo return (IPPROTO_DONE); 1425bbb4330bSLuigi Rizzo } 1426bbb4330bSLuigi Rizzo /* Drop the packet */ 1427bbb4330bSLuigi Rizzo m_freem(m); 14288f5a8818SKevin Lo return (IPPROTO_DONE); 1429bbb4330bSLuigi Rizzo } 1430