1c398230bSWarner Losh /*- 251369649SPedro F. Giffuni * SPDX-License-Identifier: BSD-3-Clause 351369649SPedro F. Giffuni * 4df8bae1dSRodney W. Grimes * Copyright (c) 1982, 1986, 1988, 1993 5df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 6df8bae1dSRodney W. Grimes * 7df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 8df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 9df8bae1dSRodney W. Grimes * are met: 10df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 11df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 12df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 13df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 14df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 15fbbd9655SWarner Losh * 3. Neither the name of the University nor the names of its contributors 16df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 17df8bae1dSRodney W. Grimes * without specific prior written permission. 18df8bae1dSRodney W. Grimes * 19df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29df8bae1dSRodney W. Grimes * SUCH DAMAGE. 30df8bae1dSRodney W. Grimes * 31df8bae1dSRodney W. Grimes * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 32df8bae1dSRodney W. Grimes */ 33df8bae1dSRodney W. Grimes 344b421e2dSMike Silbersack #include <sys/cdefs.h> 354b421e2dSMike Silbersack __FBSDID("$FreeBSD$"); 364b421e2dSMike Silbersack 370ac40133SBrian Somers #include "opt_bootp.h" 3827108a15SDag-Erling Smørgrav #include "opt_ipstealth.h" 396a800098SYoshinobu Inoue #include "opt_ipsec.h" 4033553d6eSBjoern A. Zeeb #include "opt_route.h" 41b8bc95cdSAdrian Chadd #include "opt_rss.h" 4274a9466cSGary Palmer 43df8bae1dSRodney W. Grimes #include <sys/param.h> 44df8bae1dSRodney W. Grimes #include <sys/systm.h> 45ef91a976SAndrey V. Elsukov #include <sys/hhook.h> 46df8bae1dSRodney W. Grimes #include <sys/mbuf.h> 47b715f178SLuigi Rizzo #include <sys/malloc.h> 48df8bae1dSRodney W. Grimes #include <sys/domain.h> 49df8bae1dSRodney W. Grimes #include <sys/protosw.h> 50df8bae1dSRodney W. Grimes #include <sys/socket.h> 51df8bae1dSRodney W. Grimes #include <sys/time.h> 52df8bae1dSRodney W. Grimes #include <sys/kernel.h> 53385195c0SMarko Zec #include <sys/lock.h> 54cc0a3c8cSAndrey V. Elsukov #include <sys/rmlock.h> 55385195c0SMarko Zec #include <sys/rwlock.h> 5657f60867SMark Johnston #include <sys/sdt.h> 571025071fSGarrett Wollman #include <sys/syslog.h> 58b5e8ce9fSBruce Evans #include <sys/sysctl.h> 59df8bae1dSRodney W. Grimes 60df8bae1dSRodney W. Grimes #include <net/if.h> 619494d596SBrooks Davis #include <net/if_types.h> 62d314ad7bSJulian Elischer #include <net/if_var.h> 6382c23ebaSBill Fenner #include <net/if_dl.h> 64b252313fSGleb Smirnoff #include <net/pfil.h> 65df8bae1dSRodney W. Grimes #include <net/route.h> 66983066f0SAlexander V. Chernikov #include <net/route/nhop.h> 67748e0b0aSGarrett Wollman #include <net/netisr.h> 68b2bdc62aSAdrian Chadd #include <net/rss_config.h> 694b79449eSBjoern A. Zeeb #include <net/vnet.h> 70df8bae1dSRodney W. Grimes 71df8bae1dSRodney W. Grimes #include <netinet/in.h> 7257f60867SMark Johnston #include <netinet/in_kdtrace.h> 73df8bae1dSRodney W. Grimes #include <netinet/in_systm.h> 74b5e8ce9fSBruce Evans #include <netinet/in_var.h> 75df8bae1dSRodney W. Grimes #include <netinet/ip.h> 76983066f0SAlexander V. Chernikov #include <netinet/in_fib.h> 77df8bae1dSRodney W. Grimes #include <netinet/in_pcb.h> 78df8bae1dSRodney W. Grimes #include <netinet/ip_var.h> 79eddfbb76SRobert Watson #include <netinet/ip_fw.h> 80df8bae1dSRodney W. Grimes #include <netinet/ip_icmp.h> 81ef39adf0SAndre Oppermann #include <netinet/ip_options.h> 8258938916SGarrett Wollman #include <machine/in_cksum.h> 83a9771948SGleb Smirnoff #include <netinet/ip_carp.h> 84b8bc95cdSAdrian Chadd #include <netinet/in_rss.h> 85*65634ae7SWojciech Macek #include <netinet/ip_mroute.h> 86df8bae1dSRodney W. Grimes 87fcf59617SAndrey V. Elsukov #include <netipsec/ipsec_support.h> 88fcf59617SAndrey V. Elsukov 89f0068c4aSGarrett Wollman #include <sys/socketvar.h> 906ddbf1e2SGary Palmer 91aed55708SRobert Watson #include <security/mac/mac_framework.h> 92aed55708SRobert Watson 93d2035ffbSEd Maste #ifdef CTASSERT 94d2035ffbSEd Maste CTASSERT(sizeof(struct ip) == 20); 95d2035ffbSEd Maste #endif 96d2035ffbSEd Maste 971dbefcc0SGleb Smirnoff /* IP reassembly functions are defined in ip_reass.c. */ 98843b0e57SXin LI extern void ipreass_init(void); 99843b0e57SXin LI extern void ipreass_drain(void); 100843b0e57SXin LI extern void ipreass_slowtimo(void); 1011dbefcc0SGleb Smirnoff #ifdef VIMAGE 102843b0e57SXin LI extern void ipreass_destroy(void); 1031dbefcc0SGleb Smirnoff #endif 1041dbefcc0SGleb Smirnoff 105cc0a3c8cSAndrey V. Elsukov struct rmlock in_ifaddr_lock; 106cc0a3c8cSAndrey V. Elsukov RM_SYSINIT(in_ifaddr_lock, &in_ifaddr_lock, "in_ifaddr_lock"); 107f0068c4aSGarrett Wollman 10882cea7e6SBjoern A. Zeeb VNET_DEFINE(int, rsvp_on); 10982cea7e6SBjoern A. Zeeb 11082cea7e6SBjoern A. Zeeb VNET_DEFINE(int, ipforwarding); 1116df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_VNET | CTLFLAG_RW, 112eddfbb76SRobert Watson &VNET_NAME(ipforwarding), 0, 1138b615593SMarko Zec "Enable IP forwarding between interfaces"); 1140312fbe9SPoul-Henning Kamp 1158ad114c0SGeorge V. Neville-Neil /* 1168ad114c0SGeorge V. Neville-Neil * Respond with an ICMP host redirect when we forward a packet out of 1178ad114c0SGeorge V. Neville-Neil * the same interface on which it was received. See RFC 792. 1188ad114c0SGeorge V. Neville-Neil */ 1198ad114c0SGeorge V. Neville-Neil VNET_DEFINE(int, ipsendredirects) = 1; 1206df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_VNET | CTLFLAG_RW, 121eddfbb76SRobert Watson &VNET_NAME(ipsendredirects), 0, 1228b615593SMarko Zec "Enable sending IP redirects"); 1230312fbe9SPoul-Henning Kamp 124823db0e9SDon Lewis /* 125823db0e9SDon Lewis * XXX - Setting ip_checkinterface mostly implements the receive side of 126823db0e9SDon Lewis * the Strong ES model described in RFC 1122, but since the routing table 127a8f12100SDon Lewis * and transmit implementation do not implement the Strong ES model, 128823db0e9SDon Lewis * setting this to 1 results in an odd hybrid. 1293f67c834SDon Lewis * 130a8f12100SDon Lewis * XXX - ip_checkinterface currently must be disabled if you use ipnat 131a8f12100SDon Lewis * to translate the destination address to another local interface. 1323f67c834SDon Lewis * 1333f67c834SDon Lewis * XXX - ip_checkinterface must be disabled if you add IP aliases 1343f67c834SDon Lewis * to the loopback interface instead of the interface where the 1353f67c834SDon Lewis * packets for those addresses are received. 136823db0e9SDon Lewis */ 1375f901c92SAndrew Turner VNET_DEFINE_STATIC(int, ip_checkinterface); 13882cea7e6SBjoern A. Zeeb #define V_ip_checkinterface VNET(ip_checkinterface) 1396df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_VNET | CTLFLAG_RW, 140eddfbb76SRobert Watson &VNET_NAME(ip_checkinterface), 0, 1418b615593SMarko Zec "Verify packet arrives on correct interface"); 142b3e95d4eSJonathan Lemon 143b252313fSGleb Smirnoff VNET_DEFINE(pfil_head_t, inet_pfil_head); /* Packet filter hooks */ 144df8bae1dSRodney W. Grimes 145d4b5cae4SRobert Watson static struct netisr_handler ip_nh = { 146d4b5cae4SRobert Watson .nh_name = "ip", 147d4b5cae4SRobert Watson .nh_handler = ip_input, 148d4b5cae4SRobert Watson .nh_proto = NETISR_IP, 149b8bc95cdSAdrian Chadd #ifdef RSS 1502527ccadSAdrian Chadd .nh_m2cpuid = rss_soft_m2cpuid_v4, 151b8bc95cdSAdrian Chadd .nh_policy = NETISR_POLICY_CPU, 152b8bc95cdSAdrian Chadd .nh_dispatch = NETISR_DISPATCH_HYBRID, 153b8bc95cdSAdrian Chadd #else 154d4b5cae4SRobert Watson .nh_policy = NETISR_POLICY_FLOW, 155b8bc95cdSAdrian Chadd #endif 156d4b5cae4SRobert Watson }; 157ca925d9cSJonathan Lemon 158b8bc95cdSAdrian Chadd #ifdef RSS 159b8bc95cdSAdrian Chadd /* 160b8bc95cdSAdrian Chadd * Directly dispatched frames are currently assumed 161b8bc95cdSAdrian Chadd * to have a flowid already calculated. 162b8bc95cdSAdrian Chadd * 163b8bc95cdSAdrian Chadd * It should likely have something that assert it 164b8bc95cdSAdrian Chadd * actually has valid flow details. 165b8bc95cdSAdrian Chadd */ 166b8bc95cdSAdrian Chadd static struct netisr_handler ip_direct_nh = { 167b8bc95cdSAdrian Chadd .nh_name = "ip_direct", 168b8bc95cdSAdrian Chadd .nh_handler = ip_direct_input, 169b8bc95cdSAdrian Chadd .nh_proto = NETISR_IP_DIRECT, 170499baf0aSAdrian Chadd .nh_m2cpuid = rss_soft_m2cpuid_v4, 171b8bc95cdSAdrian Chadd .nh_policy = NETISR_POLICY_CPU, 172b8bc95cdSAdrian Chadd .nh_dispatch = NETISR_DISPATCH_HYBRID, 173b8bc95cdSAdrian Chadd }; 174b8bc95cdSAdrian Chadd #endif 175b8bc95cdSAdrian Chadd 176df8bae1dSRodney W. Grimes extern struct domain inetdomain; 177f0ffb944SJulian Elischer extern struct protosw inetsw[]; 178df8bae1dSRodney W. Grimes u_char ip_protox[IPPROTO_MAX]; 17982cea7e6SBjoern A. Zeeb VNET_DEFINE(struct in_ifaddrhead, in_ifaddrhead); /* first inet address */ 18082cea7e6SBjoern A. Zeeb VNET_DEFINE(struct in_ifaddrhashhead *, in_ifaddrhashtbl); /* inet addr hash table */ 18182cea7e6SBjoern A. Zeeb VNET_DEFINE(u_long, in_ifaddrhmask); /* mask for hash table */ 182ca925d9cSJonathan Lemon 1830312fbe9SPoul-Henning Kamp #ifdef IPCTL_DEFMTU 1840312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW, 1853d177f46SBill Fumerola &ip_mtu, 0, "Default MTU"); 1860312fbe9SPoul-Henning Kamp #endif 1870312fbe9SPoul-Henning Kamp 1881b968362SDag-Erling Smørgrav #ifdef IPSTEALTH 18982cea7e6SBjoern A. Zeeb VNET_DEFINE(int, ipstealth); 1906df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_VNET | CTLFLAG_RW, 191eddfbb76SRobert Watson &VNET_NAME(ipstealth), 0, 192eddfbb76SRobert Watson "IP stealth mode, no TTL decrementation on forwarding"); 1931b968362SDag-Erling Smørgrav #endif 194eddfbb76SRobert Watson 195315e3e38SRobert Watson /* 1965da0521fSAndrey V. Elsukov * IP statistics are stored in the "array" of counter(9)s. 1975923c293SGleb Smirnoff */ 1985da0521fSAndrey V. Elsukov VNET_PCPUSTAT_DEFINE(struct ipstat, ipstat); 1995da0521fSAndrey V. Elsukov VNET_PCPUSTAT_SYSINIT(ipstat); 2005da0521fSAndrey V. Elsukov SYSCTL_VNET_PCPUSTAT(_net_inet_ip, IPCTL_STATS, stats, struct ipstat, ipstat, 2015da0521fSAndrey V. Elsukov "IP statistics (struct ipstat, netinet/ip_var.h)"); 2025923c293SGleb Smirnoff 2035923c293SGleb Smirnoff #ifdef VIMAGE 2045da0521fSAndrey V. Elsukov VNET_PCPUSTAT_SYSUNINIT(ipstat); 2055923c293SGleb Smirnoff #endif /* VIMAGE */ 2065923c293SGleb Smirnoff 2075923c293SGleb Smirnoff /* 208315e3e38SRobert Watson * Kernel module interface for updating ipstat. The argument is an index 2095923c293SGleb Smirnoff * into ipstat treated as an array. 210315e3e38SRobert Watson */ 211315e3e38SRobert Watson void 212315e3e38SRobert Watson kmod_ipstat_inc(int statnum) 213315e3e38SRobert Watson { 214315e3e38SRobert Watson 2155da0521fSAndrey V. Elsukov counter_u64_add(VNET(ipstat)[statnum], 1); 216315e3e38SRobert Watson } 217315e3e38SRobert Watson 218315e3e38SRobert Watson void 219315e3e38SRobert Watson kmod_ipstat_dec(int statnum) 220315e3e38SRobert Watson { 221315e3e38SRobert Watson 2225da0521fSAndrey V. Elsukov counter_u64_add(VNET(ipstat)[statnum], -1); 223315e3e38SRobert Watson } 224315e3e38SRobert Watson 225d4b5cae4SRobert Watson static int 226d4b5cae4SRobert Watson sysctl_netinet_intr_queue_maxlen(SYSCTL_HANDLER_ARGS) 227d4b5cae4SRobert Watson { 228d4b5cae4SRobert Watson int error, qlimit; 229d4b5cae4SRobert Watson 230d4b5cae4SRobert Watson netisr_getqlimit(&ip_nh, &qlimit); 231d4b5cae4SRobert Watson error = sysctl_handle_int(oidp, &qlimit, 0, req); 232d4b5cae4SRobert Watson if (error || !req->newptr) 233d4b5cae4SRobert Watson return (error); 234d4b5cae4SRobert Watson if (qlimit < 1) 235d4b5cae4SRobert Watson return (EINVAL); 236d4b5cae4SRobert Watson return (netisr_setqlimit(&ip_nh, qlimit)); 237d4b5cae4SRobert Watson } 238d4b5cae4SRobert Watson SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen, 2397029da5cSPawel Biernacki CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, 2407029da5cSPawel Biernacki sysctl_netinet_intr_queue_maxlen, "I", 241d4b5cae4SRobert Watson "Maximum size of the IP input queue"); 242d4b5cae4SRobert Watson 243d4b5cae4SRobert Watson static int 244d4b5cae4SRobert Watson sysctl_netinet_intr_queue_drops(SYSCTL_HANDLER_ARGS) 245d4b5cae4SRobert Watson { 246d4b5cae4SRobert Watson u_int64_t qdrops_long; 247d4b5cae4SRobert Watson int error, qdrops; 248d4b5cae4SRobert Watson 249d4b5cae4SRobert Watson netisr_getqdrops(&ip_nh, &qdrops_long); 250d4b5cae4SRobert Watson qdrops = qdrops_long; 251d4b5cae4SRobert Watson error = sysctl_handle_int(oidp, &qdrops, 0, req); 252d4b5cae4SRobert Watson if (error || !req->newptr) 253d4b5cae4SRobert Watson return (error); 254d4b5cae4SRobert Watson if (qdrops != 0) 255d4b5cae4SRobert Watson return (EINVAL); 256d4b5cae4SRobert Watson netisr_clearqdrops(&ip_nh); 257d4b5cae4SRobert Watson return (0); 258d4b5cae4SRobert Watson } 259d4b5cae4SRobert Watson 260d4b5cae4SRobert Watson SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops, 2617029da5cSPawel Biernacki CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 2627029da5cSPawel Biernacki 0, 0, sysctl_netinet_intr_queue_drops, "I", 263d4b5cae4SRobert Watson "Number of packets dropped from the IP input queue"); 264d4b5cae4SRobert Watson 265b8bc95cdSAdrian Chadd #ifdef RSS 266b8bc95cdSAdrian Chadd static int 267b8bc95cdSAdrian Chadd sysctl_netinet_intr_direct_queue_maxlen(SYSCTL_HANDLER_ARGS) 268b8bc95cdSAdrian Chadd { 269b8bc95cdSAdrian Chadd int error, qlimit; 270b8bc95cdSAdrian Chadd 271b8bc95cdSAdrian Chadd netisr_getqlimit(&ip_direct_nh, &qlimit); 272b8bc95cdSAdrian Chadd error = sysctl_handle_int(oidp, &qlimit, 0, req); 273b8bc95cdSAdrian Chadd if (error || !req->newptr) 274b8bc95cdSAdrian Chadd return (error); 275b8bc95cdSAdrian Chadd if (qlimit < 1) 276b8bc95cdSAdrian Chadd return (EINVAL); 277b8bc95cdSAdrian Chadd return (netisr_setqlimit(&ip_direct_nh, qlimit)); 278b8bc95cdSAdrian Chadd } 2797faa0d21SAndrey V. Elsukov SYSCTL_PROC(_net_inet_ip, IPCTL_INTRDQMAXLEN, intr_direct_queue_maxlen, 2807029da5cSPawel Biernacki CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 2817029da5cSPawel Biernacki 0, 0, sysctl_netinet_intr_direct_queue_maxlen, 2827faa0d21SAndrey V. Elsukov "I", "Maximum size of the IP direct input queue"); 283b8bc95cdSAdrian Chadd 284b8bc95cdSAdrian Chadd static int 285b8bc95cdSAdrian Chadd sysctl_netinet_intr_direct_queue_drops(SYSCTL_HANDLER_ARGS) 286b8bc95cdSAdrian Chadd { 287b8bc95cdSAdrian Chadd u_int64_t qdrops_long; 288b8bc95cdSAdrian Chadd int error, qdrops; 289b8bc95cdSAdrian Chadd 290b8bc95cdSAdrian Chadd netisr_getqdrops(&ip_direct_nh, &qdrops_long); 291b8bc95cdSAdrian Chadd qdrops = qdrops_long; 292b8bc95cdSAdrian Chadd error = sysctl_handle_int(oidp, &qdrops, 0, req); 293b8bc95cdSAdrian Chadd if (error || !req->newptr) 294b8bc95cdSAdrian Chadd return (error); 295b8bc95cdSAdrian Chadd if (qdrops != 0) 296b8bc95cdSAdrian Chadd return (EINVAL); 297b8bc95cdSAdrian Chadd netisr_clearqdrops(&ip_direct_nh); 298b8bc95cdSAdrian Chadd return (0); 299b8bc95cdSAdrian Chadd } 300b8bc95cdSAdrian Chadd 3017faa0d21SAndrey V. Elsukov SYSCTL_PROC(_net_inet_ip, IPCTL_INTRDQDROPS, intr_direct_queue_drops, 3027029da5cSPawel Biernacki CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, 3037029da5cSPawel Biernacki sysctl_netinet_intr_direct_queue_drops, "I", 304b8bc95cdSAdrian Chadd "Number of packets dropped from the IP direct input queue"); 305b8bc95cdSAdrian Chadd #endif /* RSS */ 306b8bc95cdSAdrian Chadd 307df8bae1dSRodney W. Grimes /* 308df8bae1dSRodney W. Grimes * IP initialization: fill in IP protocol switch table. 309df8bae1dSRodney W. Grimes * All protocols not implemented in kernel go to raw IP protocol handler. 310df8bae1dSRodney W. Grimes */ 311df8bae1dSRodney W. Grimes void 312f2565d68SRobert Watson ip_init(void) 313df8bae1dSRodney W. Grimes { 314b252313fSGleb Smirnoff struct pfil_head_args args; 315f2565d68SRobert Watson struct protosw *pr; 316f2565d68SRobert Watson int i; 317df8bae1dSRodney W. Grimes 318d7c5a620SMatt Macy CK_STAILQ_INIT(&V_in_ifaddrhead); 319603724d3SBjoern A. Zeeb V_in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &V_in_ifaddrhmask); 3201ed81b73SMarko Zec 3211ed81b73SMarko Zec /* Initialize IP reassembly queue. */ 3221dbefcc0SGleb Smirnoff ipreass_init(); 3231ed81b73SMarko Zec 3240b4b0b0fSJulian Elischer /* Initialize packet filter hooks. */ 325b252313fSGleb Smirnoff args.pa_version = PFIL_VERSION; 326b252313fSGleb Smirnoff args.pa_flags = PFIL_IN | PFIL_OUT; 327b252313fSGleb Smirnoff args.pa_type = PFIL_TYPE_IP4; 328b252313fSGleb Smirnoff args.pa_headname = PFIL_INET_NAME; 329b252313fSGleb Smirnoff V_inet_pfil_head = pfil_head_register(&args); 3300b4b0b0fSJulian Elischer 331ef91a976SAndrey V. Elsukov if (hhook_head_register(HHOOK_TYPE_IPSEC_IN, AF_INET, 332ef91a976SAndrey V. Elsukov &V_ipsec_hhh_in[HHOOK_IPSEC_INET], 333ef91a976SAndrey V. Elsukov HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0) 334ef91a976SAndrey V. Elsukov printf("%s: WARNING: unable to register input helper hook\n", 335ef91a976SAndrey V. Elsukov __func__); 336ef91a976SAndrey V. Elsukov if (hhook_head_register(HHOOK_TYPE_IPSEC_OUT, AF_INET, 337ef91a976SAndrey V. Elsukov &V_ipsec_hhh_out[HHOOK_IPSEC_INET], 338ef91a976SAndrey V. Elsukov HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0) 339ef91a976SAndrey V. Elsukov printf("%s: WARNING: unable to register output helper hook\n", 340ef91a976SAndrey V. Elsukov __func__); 341ef91a976SAndrey V. Elsukov 3421ed81b73SMarko Zec /* Skip initialization of globals for non-default instances. */ 343484149deSBjoern A. Zeeb #ifdef VIMAGE 344484149deSBjoern A. Zeeb if (!IS_DEFAULT_VNET(curvnet)) { 345484149deSBjoern A. Zeeb netisr_register_vnet(&ip_nh); 346484149deSBjoern A. Zeeb #ifdef RSS 347484149deSBjoern A. Zeeb netisr_register_vnet(&ip_direct_nh); 348484149deSBjoern A. Zeeb #endif 3491ed81b73SMarko Zec return; 350484149deSBjoern A. Zeeb } 351484149deSBjoern A. Zeeb #endif 3521ed81b73SMarko Zec 353f0ffb944SJulian Elischer pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 35402410549SRobert Watson if (pr == NULL) 355db09bef3SAndre Oppermann panic("ip_init: PF_INET not found"); 356db09bef3SAndre Oppermann 357db09bef3SAndre Oppermann /* Initialize the entire ip_protox[] array to IPPROTO_RAW. */ 358df8bae1dSRodney W. Grimes for (i = 0; i < IPPROTO_MAX; i++) 359df8bae1dSRodney W. Grimes ip_protox[i] = pr - inetsw; 360db09bef3SAndre Oppermann /* 361db09bef3SAndre Oppermann * Cycle through IP protocols and put them into the appropriate place 362db09bef3SAndre Oppermann * in ip_protox[]. 363db09bef3SAndre Oppermann */ 364f0ffb944SJulian Elischer for (pr = inetdomain.dom_protosw; 365f0ffb944SJulian Elischer pr < inetdomain.dom_protoswNPROTOSW; pr++) 366df8bae1dSRodney W. Grimes if (pr->pr_domain->dom_family == PF_INET && 367db09bef3SAndre Oppermann pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) { 368db09bef3SAndre Oppermann /* Be careful to only index valid IP protocols. */ 369db77984cSSam Leffler if (pr->pr_protocol < IPPROTO_MAX) 370df8bae1dSRodney W. Grimes ip_protox[pr->pr_protocol] = pr - inetsw; 371db09bef3SAndre Oppermann } 372194a213eSAndrey A. Chernov 373d4b5cae4SRobert Watson netisr_register(&ip_nh); 374b8bc95cdSAdrian Chadd #ifdef RSS 375b8bc95cdSAdrian Chadd netisr_register(&ip_direct_nh); 376b8bc95cdSAdrian Chadd #endif 377df8bae1dSRodney W. Grimes } 378df8bae1dSRodney W. Grimes 3799802380eSBjoern A. Zeeb #ifdef VIMAGE 3803f58662dSBjoern A. Zeeb static void 3813f58662dSBjoern A. Zeeb ip_destroy(void *unused __unused) 3829802380eSBjoern A. Zeeb { 383ef91a976SAndrey V. Elsukov int error; 3844d3dfd45SMikolaj Golub 385484149deSBjoern A. Zeeb #ifdef RSS 386484149deSBjoern A. Zeeb netisr_unregister_vnet(&ip_direct_nh); 387484149deSBjoern A. Zeeb #endif 388484149deSBjoern A. Zeeb netisr_unregister_vnet(&ip_nh); 389484149deSBjoern A. Zeeb 390b252313fSGleb Smirnoff pfil_head_unregister(V_inet_pfil_head); 391ef91a976SAndrey V. Elsukov error = hhook_head_deregister(V_ipsec_hhh_in[HHOOK_IPSEC_INET]); 392ef91a976SAndrey V. Elsukov if (error != 0) { 393ef91a976SAndrey V. Elsukov printf("%s: WARNING: unable to deregister input helper hook " 394ef91a976SAndrey V. Elsukov "type HHOOK_TYPE_IPSEC_IN, id HHOOK_IPSEC_INET: " 395ef91a976SAndrey V. Elsukov "error %d returned\n", __func__, error); 396ef91a976SAndrey V. Elsukov } 397ef91a976SAndrey V. Elsukov error = hhook_head_deregister(V_ipsec_hhh_out[HHOOK_IPSEC_INET]); 398ef91a976SAndrey V. Elsukov if (error != 0) { 399ef91a976SAndrey V. Elsukov printf("%s: WARNING: unable to deregister output helper hook " 400ef91a976SAndrey V. Elsukov "type HHOOK_TYPE_IPSEC_OUT, id HHOOK_IPSEC_INET: " 401ef91a976SAndrey V. Elsukov "error %d returned\n", __func__, error); 402ef91a976SAndrey V. Elsukov } 40389856f7eSBjoern A. Zeeb 40489856f7eSBjoern A. Zeeb /* Remove the IPv4 addresses from all interfaces. */ 40589856f7eSBjoern A. Zeeb in_ifscrub_all(); 40689856f7eSBjoern A. Zeeb 40789856f7eSBjoern A. Zeeb /* Make sure the IPv4 routes are gone as well. */ 408b1d63265SAlexander V. Chernikov rib_flush_routes_family(AF_INET); 4099802380eSBjoern A. Zeeb 410e3c2c634SGleb Smirnoff /* Destroy IP reassembly queue. */ 4111dbefcc0SGleb Smirnoff ipreass_destroy(); 41289856f7eSBjoern A. Zeeb 41389856f7eSBjoern A. Zeeb /* Cleanup in_ifaddr hash table; should be empty. */ 41489856f7eSBjoern A. Zeeb hashdestroy(V_in_ifaddrhashtbl, M_IFADDR, V_in_ifaddrhmask); 4159802380eSBjoern A. Zeeb } 4163f58662dSBjoern A. Zeeb 4173f58662dSBjoern A. Zeeb VNET_SYSUNINIT(ip, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, ip_destroy, NULL); 4189802380eSBjoern A. Zeeb #endif 4199802380eSBjoern A. Zeeb 420b8bc95cdSAdrian Chadd #ifdef RSS 421b8bc95cdSAdrian Chadd /* 422b8bc95cdSAdrian Chadd * IP direct input routine. 423b8bc95cdSAdrian Chadd * 424b8bc95cdSAdrian Chadd * This is called when reinjecting completed fragments where 425b8bc95cdSAdrian Chadd * all of the previous checking and book-keeping has been done. 426b8bc95cdSAdrian Chadd */ 427b8bc95cdSAdrian Chadd void 428b8bc95cdSAdrian Chadd ip_direct_input(struct mbuf *m) 429b8bc95cdSAdrian Chadd { 430b8bc95cdSAdrian Chadd struct ip *ip; 431b8bc95cdSAdrian Chadd int hlen; 432b8bc95cdSAdrian Chadd 433b8bc95cdSAdrian Chadd ip = mtod(m, struct ip *); 434b8bc95cdSAdrian Chadd hlen = ip->ip_hl << 2; 435b8bc95cdSAdrian Chadd 436fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT) 437fcf59617SAndrey V. Elsukov if (IPSEC_ENABLED(ipv4)) { 438fcf59617SAndrey V. Elsukov if (IPSEC_INPUT(ipv4, m, hlen, ip->ip_p) != 0) 439fcf59617SAndrey V. Elsukov return; 440fcf59617SAndrey V. Elsukov } 441fcf59617SAndrey V. Elsukov #endif /* IPSEC */ 442b8bc95cdSAdrian Chadd IPSTAT_INC(ips_delivered); 443b8bc95cdSAdrian Chadd (*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p); 444b8bc95cdSAdrian Chadd return; 445b8bc95cdSAdrian Chadd } 446b8bc95cdSAdrian Chadd #endif 447b8bc95cdSAdrian Chadd 4484d2e3692SLuigi Rizzo /* 449df8bae1dSRodney W. Grimes * Ip input routine. Checksum and byte swap header. If fragmented 450df8bae1dSRodney W. Grimes * try to reassemble. Process options. Pass to next level. 451df8bae1dSRodney W. Grimes */ 452c67b1d17SGarrett Wollman void 453c67b1d17SGarrett Wollman ip_input(struct mbuf *m) 454df8bae1dSRodney W. Grimes { 455*65634ae7SWojciech Macek MROUTER_RLOCK_TRACKER; 4561a5995ccSEugene Grosbein struct rm_priotracker in_ifa_tracker; 4579188b4a1SAndre Oppermann struct ip *ip = NULL; 4585da9f8faSJosef Karthauser struct in_ifaddr *ia = NULL; 459ca925d9cSJonathan Lemon struct ifaddr *ifa; 4600aade26eSRobert Watson struct ifnet *ifp; 4619b932e9eSAndre Oppermann int checkif, hlen = 0; 46221d172a3SGleb Smirnoff uint16_t sum, ip_len; 46302c1c707SAndre Oppermann int dchg = 0; /* dest changed after fw */ 464f51f805fSSam Leffler struct in_addr odst; /* original dst address */ 465b715f178SLuigi Rizzo 466fe584538SDag-Erling Smørgrav M_ASSERTPKTHDR(m); 467b8a6e03fSGleb Smirnoff NET_EPOCH_ASSERT(); 468db40007dSAndrew R. Reiter 469ac9d7e26SMax Laier if (m->m_flags & M_FASTFWD_OURS) { 47076ff6dcfSAndre Oppermann m->m_flags &= ~M_FASTFWD_OURS; 47176ff6dcfSAndre Oppermann /* Set up some basics that will be used later. */ 4722b25acc1SLuigi Rizzo ip = mtod(m, struct ip *); 47353be11f6SPoul-Henning Kamp hlen = ip->ip_hl << 2; 4748f134647SGleb Smirnoff ip_len = ntohs(ip->ip_len); 4759b932e9eSAndre Oppermann goto ours; 4762b25acc1SLuigi Rizzo } 4772b25acc1SLuigi Rizzo 47886425c62SRobert Watson IPSTAT_INC(ips_total); 47958938916SGarrett Wollman 48058938916SGarrett Wollman if (m->m_pkthdr.len < sizeof(struct ip)) 48158938916SGarrett Wollman goto tooshort; 48258938916SGarrett Wollman 483df8bae1dSRodney W. Grimes if (m->m_len < sizeof (struct ip) && 4840b17fba7SAndre Oppermann (m = m_pullup(m, sizeof (struct ip))) == NULL) { 48586425c62SRobert Watson IPSTAT_INC(ips_toosmall); 486c67b1d17SGarrett Wollman return; 487df8bae1dSRodney W. Grimes } 488df8bae1dSRodney W. Grimes ip = mtod(m, struct ip *); 48958938916SGarrett Wollman 49053be11f6SPoul-Henning Kamp if (ip->ip_v != IPVERSION) { 49186425c62SRobert Watson IPSTAT_INC(ips_badvers); 492df8bae1dSRodney W. Grimes goto bad; 493df8bae1dSRodney W. Grimes } 49458938916SGarrett Wollman 49553be11f6SPoul-Henning Kamp hlen = ip->ip_hl << 2; 496df8bae1dSRodney W. Grimes if (hlen < sizeof(struct ip)) { /* minimum header length */ 49786425c62SRobert Watson IPSTAT_INC(ips_badhlen); 498df8bae1dSRodney W. Grimes goto bad; 499df8bae1dSRodney W. Grimes } 500df8bae1dSRodney W. Grimes if (hlen > m->m_len) { 5010b17fba7SAndre Oppermann if ((m = m_pullup(m, hlen)) == NULL) { 50286425c62SRobert Watson IPSTAT_INC(ips_badhlen); 503c67b1d17SGarrett Wollman return; 504df8bae1dSRodney W. Grimes } 505df8bae1dSRodney W. Grimes ip = mtod(m, struct ip *); 506df8bae1dSRodney W. Grimes } 50733841545SHajimu UMEMOTO 50857f60867SMark Johnston IP_PROBE(receive, NULL, NULL, ip, m->m_pkthdr.rcvif, ip, NULL); 50957f60867SMark Johnston 5106c1c6ae5SRodney W. Grimes /* IN_LOOPBACK must not appear on the wire - RFC1122 */ 5110aade26eSRobert Watson ifp = m->m_pkthdr.rcvif; 5126c1c6ae5SRodney W. Grimes if (IN_LOOPBACK(ntohl(ip->ip_dst.s_addr)) || 5136c1c6ae5SRodney W. Grimes IN_LOOPBACK(ntohl(ip->ip_src.s_addr))) { 5140aade26eSRobert Watson if ((ifp->if_flags & IFF_LOOPBACK) == 0) { 51586425c62SRobert Watson IPSTAT_INC(ips_badaddr); 51633841545SHajimu UMEMOTO goto bad; 51733841545SHajimu UMEMOTO } 51833841545SHajimu UMEMOTO } 51933841545SHajimu UMEMOTO 520db4f9cc7SJonathan Lemon if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) { 521db4f9cc7SJonathan Lemon sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID); 522db4f9cc7SJonathan Lemon } else { 52358938916SGarrett Wollman if (hlen == sizeof(struct ip)) { 52447c861ecSBrian Somers sum = in_cksum_hdr(ip); 52558938916SGarrett Wollman } else { 52647c861ecSBrian Somers sum = in_cksum(m, hlen); 52758938916SGarrett Wollman } 528db4f9cc7SJonathan Lemon } 52947c861ecSBrian Somers if (sum) { 53086425c62SRobert Watson IPSTAT_INC(ips_badsum); 531df8bae1dSRodney W. Grimes goto bad; 532df8bae1dSRodney W. Grimes } 533df8bae1dSRodney W. Grimes 53402b199f1SMax Laier #ifdef ALTQ 53502b199f1SMax Laier if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0) 53602b199f1SMax Laier /* packet is dropped by traffic conditioner */ 53702b199f1SMax Laier return; 53802b199f1SMax Laier #endif 53902b199f1SMax Laier 54021d172a3SGleb Smirnoff ip_len = ntohs(ip->ip_len); 54121d172a3SGleb Smirnoff if (ip_len < hlen) { 54286425c62SRobert Watson IPSTAT_INC(ips_badlen); 543df8bae1dSRodney W. Grimes goto bad; 544df8bae1dSRodney W. Grimes } 545df8bae1dSRodney W. Grimes 546df8bae1dSRodney W. Grimes /* 547df8bae1dSRodney W. Grimes * Check that the amount of data in the buffers 548df8bae1dSRodney W. Grimes * is as at least much as the IP header would have us expect. 549df8bae1dSRodney W. Grimes * Trim mbufs if longer than we expect. 550df8bae1dSRodney W. Grimes * Drop packet if shorter than we expect. 551df8bae1dSRodney W. Grimes */ 55221d172a3SGleb Smirnoff if (m->m_pkthdr.len < ip_len) { 55358938916SGarrett Wollman tooshort: 55486425c62SRobert Watson IPSTAT_INC(ips_tooshort); 555df8bae1dSRodney W. Grimes goto bad; 556df8bae1dSRodney W. Grimes } 55721d172a3SGleb Smirnoff if (m->m_pkthdr.len > ip_len) { 558df8bae1dSRodney W. Grimes if (m->m_len == m->m_pkthdr.len) { 55921d172a3SGleb Smirnoff m->m_len = ip_len; 56021d172a3SGleb Smirnoff m->m_pkthdr.len = ip_len; 561df8bae1dSRodney W. Grimes } else 56221d172a3SGleb Smirnoff m_adj(m, ip_len - m->m_pkthdr.len); 563df8bae1dSRodney W. Grimes } 564b8bc95cdSAdrian Chadd 565ad9f4d6aSAndrey V. Elsukov /* 566ad9f4d6aSAndrey V. Elsukov * Try to forward the packet, but if we fail continue. 56762484790SAndrey V. Elsukov * ip_tryforward() does not generate redirects, so fall 56862484790SAndrey V. Elsukov * through to normal processing if redirects are required. 569ad9f4d6aSAndrey V. Elsukov * ip_tryforward() does inbound and outbound packet firewall 570ad9f4d6aSAndrey V. Elsukov * processing. If firewall has decided that destination becomes 571ad9f4d6aSAndrey V. Elsukov * our local address, it sets M_FASTFWD_OURS flag. In this 572ad9f4d6aSAndrey V. Elsukov * case skip another inbound firewall processing and update 573ad9f4d6aSAndrey V. Elsukov * ip pointer. 574ad9f4d6aSAndrey V. Elsukov */ 5758ad114c0SGeorge V. Neville-Neil if (V_ipforwarding != 0 576fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT) 577fcf59617SAndrey V. Elsukov && (!IPSEC_ENABLED(ipv4) || 578fcf59617SAndrey V. Elsukov IPSEC_CAPS(ipv4, m, IPSEC_CAP_OPERABLE) == 0) 579ad9f4d6aSAndrey V. Elsukov #endif 580ad9f4d6aSAndrey V. Elsukov ) { 581ad9f4d6aSAndrey V. Elsukov if ((m = ip_tryforward(m)) == NULL) 58233872124SGeorge V. Neville-Neil return; 583ad9f4d6aSAndrey V. Elsukov if (m->m_flags & M_FASTFWD_OURS) { 584ad9f4d6aSAndrey V. Elsukov m->m_flags &= ~M_FASTFWD_OURS; 585ad9f4d6aSAndrey V. Elsukov ip = mtod(m, struct ip *); 586ad9f4d6aSAndrey V. Elsukov goto ours; 587ad9f4d6aSAndrey V. Elsukov } 588ad9f4d6aSAndrey V. Elsukov } 589fcf59617SAndrey V. Elsukov 590fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT) 59114dd6717SSam Leffler /* 592ffe8cd7bSBjoern A. Zeeb * Bypass packet filtering for packets previously handled by IPsec. 59314dd6717SSam Leffler */ 594fcf59617SAndrey V. Elsukov if (IPSEC_ENABLED(ipv4) && 595fcf59617SAndrey V. Elsukov IPSEC_CAPS(ipv4, m, IPSEC_CAP_BYPASS_FILTER) != 0) 596c21fd232SAndre Oppermann goto passin; 597ad9f4d6aSAndrey V. Elsukov #endif 598fcf59617SAndrey V. Elsukov 599c4ac87eaSDarren Reed /* 600134ea224SSam Leffler * Run through list of hooks for input packets. 601f51f805fSSam Leffler * 602f51f805fSSam Leffler * NB: Beware of the destination address changing (e.g. 603f51f805fSSam Leffler * by NAT rewriting). When this happens, tell 604f51f805fSSam Leffler * ip_forward to do the right thing. 605c4ac87eaSDarren Reed */ 606c21fd232SAndre Oppermann 607c21fd232SAndre Oppermann /* Jump over all PFIL processing if hooks are not active. */ 608b252313fSGleb Smirnoff if (!PFIL_HOOKED_IN(V_inet_pfil_head)) 609c21fd232SAndre Oppermann goto passin; 610c21fd232SAndre Oppermann 611f51f805fSSam Leffler odst = ip->ip_dst; 612b252313fSGleb Smirnoff if (pfil_run_hooks(V_inet_pfil_head, &m, ifp, PFIL_IN, NULL) != 613b252313fSGleb Smirnoff PFIL_PASS) 614beec8214SDarren Reed return; 615134ea224SSam Leffler if (m == NULL) /* consumed by filter */ 616c4ac87eaSDarren Reed return; 6179b932e9eSAndre Oppermann 618c4ac87eaSDarren Reed ip = mtod(m, struct ip *); 61902c1c707SAndre Oppermann dchg = (odst.s_addr != ip->ip_dst.s_addr); 6200aade26eSRobert Watson ifp = m->m_pkthdr.rcvif; 6219b932e9eSAndre Oppermann 6229b932e9eSAndre Oppermann if (m->m_flags & M_FASTFWD_OURS) { 6239b932e9eSAndre Oppermann m->m_flags &= ~M_FASTFWD_OURS; 6249b932e9eSAndre Oppermann goto ours; 6259b932e9eSAndre Oppermann } 626ffdbf9daSAndrey V. Elsukov if (m->m_flags & M_IP_NEXTHOP) { 627de89d74bSLuiz Otavio O Souza if (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL) { 628099dd043SAndre Oppermann /* 629ffdbf9daSAndrey V. Elsukov * Directly ship the packet on. This allows 630ffdbf9daSAndrey V. Elsukov * forwarding packets originally destined to us 631ffdbf9daSAndrey V. Elsukov * to some other directly connected host. 632099dd043SAndre Oppermann */ 633ffdbf9daSAndrey V. Elsukov ip_forward(m, 1); 634099dd043SAndre Oppermann return; 635099dd043SAndre Oppermann } 636ffdbf9daSAndrey V. Elsukov } 637c21fd232SAndre Oppermann passin: 63821d172a3SGleb Smirnoff 63921d172a3SGleb Smirnoff /* 640df8bae1dSRodney W. Grimes * Process options and, if not destined for us, 641df8bae1dSRodney W. Grimes * ship it on. ip_dooptions returns 1 when an 642df8bae1dSRodney W. Grimes * error was detected (causing an icmp message 643df8bae1dSRodney W. Grimes * to be sent and the original packet to be freed). 644df8bae1dSRodney W. Grimes */ 6459b932e9eSAndre Oppermann if (hlen > sizeof (struct ip) && ip_dooptions(m, 0)) 646c67b1d17SGarrett Wollman return; 647df8bae1dSRodney W. Grimes 648f0068c4aSGarrett Wollman /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no 649f0068c4aSGarrett Wollman * matter if it is destined to another node, or whether it is 650f0068c4aSGarrett Wollman * a multicast one, RSVP wants it! and prevents it from being forwarded 651f0068c4aSGarrett Wollman * anywhere else. Also checks if the rsvp daemon is running before 652f0068c4aSGarrett Wollman * grabbing the packet. 653f0068c4aSGarrett Wollman */ 654603724d3SBjoern A. Zeeb if (V_rsvp_on && ip->ip_p==IPPROTO_RSVP) 655f0068c4aSGarrett Wollman goto ours; 656f0068c4aSGarrett Wollman 657df8bae1dSRodney W. Grimes /* 658df8bae1dSRodney W. Grimes * Check our list of addresses, to see if the packet is for us. 659cc766e04SGarrett Wollman * If we don't have any addresses, assume any unicast packet 660cc766e04SGarrett Wollman * we receive might be for us (and let the upper layers deal 661cc766e04SGarrett Wollman * with it). 662df8bae1dSRodney W. Grimes */ 663d7c5a620SMatt Macy if (CK_STAILQ_EMPTY(&V_in_ifaddrhead) && 664cc766e04SGarrett Wollman (m->m_flags & (M_MCAST|M_BCAST)) == 0) 665cc766e04SGarrett Wollman goto ours; 666cc766e04SGarrett Wollman 6677538a9a0SJonathan Lemon /* 668823db0e9SDon Lewis * Enable a consistency check between the destination address 669823db0e9SDon Lewis * and the arrival interface for a unicast packet (the RFC 1122 670823db0e9SDon Lewis * strong ES model) if IP forwarding is disabled and the packet 671e15ae1b2SDon Lewis * is not locally generated and the packet is not subject to 672e15ae1b2SDon Lewis * 'ipfw fwd'. 6733f67c834SDon Lewis * 6743f67c834SDon Lewis * XXX - Checking also should be disabled if the destination 6753f67c834SDon Lewis * address is ipnat'ed to a different interface. 6763f67c834SDon Lewis * 677a8f12100SDon Lewis * XXX - Checking is incompatible with IP aliases added 6783f67c834SDon Lewis * to the loopback interface instead of the interface where 6793f67c834SDon Lewis * the packets are received. 680a9771948SGleb Smirnoff * 681a9771948SGleb Smirnoff * XXX - This is the case for carp vhost IPs as well so we 682a9771948SGleb Smirnoff * insert a workaround. If the packet got here, we already 683a9771948SGleb Smirnoff * checked with carp_iamatch() and carp_forus(). 684823db0e9SDon Lewis */ 685603724d3SBjoern A. Zeeb checkif = V_ip_checkinterface && (V_ipforwarding == 0) && 6860aade26eSRobert Watson ifp != NULL && ((ifp->if_flags & IFF_LOOPBACK) == 0) && 68754bfbd51SWill Andrews ifp->if_carp == NULL && (dchg == 0); 688823db0e9SDon Lewis 689ca925d9cSJonathan Lemon /* 690ca925d9cSJonathan Lemon * Check for exact addresses in the hash bucket. 691ca925d9cSJonathan Lemon */ 6921a5995ccSEugene Grosbein IN_IFADDR_RLOCK(&in_ifa_tracker); 6939b932e9eSAndre Oppermann LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) { 694f9e354dfSJulian Elischer /* 695823db0e9SDon Lewis * If the address matches, verify that the packet 696823db0e9SDon Lewis * arrived via the correct interface if checking is 697823db0e9SDon Lewis * enabled. 698f9e354dfSJulian Elischer */ 6999b932e9eSAndre Oppermann if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr && 7008c0fec80SRobert Watson (!checkif || ia->ia_ifp == ifp)) { 7017caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ipackets, 1); 7027caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ibytes, 7037caf4ab7SGleb Smirnoff m->m_pkthdr.len); 7041a5995ccSEugene Grosbein IN_IFADDR_RUNLOCK(&in_ifa_tracker); 705ed1ff184SJulian Elischer goto ours; 706ca925d9cSJonathan Lemon } 7078c0fec80SRobert Watson } 7081a5995ccSEugene Grosbein IN_IFADDR_RUNLOCK(&in_ifa_tracker); 7092d9cfabaSRobert Watson 710823db0e9SDon Lewis /* 711ca925d9cSJonathan Lemon * Check for broadcast addresses. 712ca925d9cSJonathan Lemon * 713ca925d9cSJonathan Lemon * Only accept broadcast packets that arrive via the matching 714ca925d9cSJonathan Lemon * interface. Reception of forwarded directed broadcasts would 715ca925d9cSJonathan Lemon * be handled via ip_forward() and ether_output() with the loopback 716ca925d9cSJonathan Lemon * into the stack for SIMPLEX interfaces handled by ether_output(). 717823db0e9SDon Lewis */ 7180aade26eSRobert Watson if (ifp != NULL && ifp->if_flags & IFF_BROADCAST) { 719d7c5a620SMatt Macy CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 720ca925d9cSJonathan Lemon if (ifa->ifa_addr->sa_family != AF_INET) 721ca925d9cSJonathan Lemon continue; 722ca925d9cSJonathan Lemon ia = ifatoia(ifa); 723df8bae1dSRodney W. Grimes if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr == 7240aade26eSRobert Watson ip->ip_dst.s_addr) { 7257caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ipackets, 1); 7267caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ibytes, 7277caf4ab7SGleb Smirnoff m->m_pkthdr.len); 728df8bae1dSRodney W. Grimes goto ours; 7290aade26eSRobert Watson } 7300ac40133SBrian Somers #ifdef BOOTP_COMPAT 7310aade26eSRobert Watson if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY) { 7327caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ipackets, 1); 7337caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ibytes, 7347caf4ab7SGleb Smirnoff m->m_pkthdr.len); 735ca925d9cSJonathan Lemon goto ours; 7360aade26eSRobert Watson } 7370ac40133SBrian Somers #endif 738df8bae1dSRodney W. Grimes } 73919e5b0a7SRobert Watson ia = NULL; 740df8bae1dSRodney W. Grimes } 741f8429ca2SBruce M Simpson /* RFC 3927 2.7: Do not forward datagrams for 169.254.0.0/16. */ 742f8429ca2SBruce M Simpson if (IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))) { 74386425c62SRobert Watson IPSTAT_INC(ips_cantforward); 744f8429ca2SBruce M Simpson m_freem(m); 745f8429ca2SBruce M Simpson return; 746f8429ca2SBruce M Simpson } 747df8bae1dSRodney W. Grimes if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { 748*65634ae7SWojciech Macek MROUTER_RLOCK(); 749603724d3SBjoern A. Zeeb if (V_ip_mrouter) { 750df8bae1dSRodney W. Grimes /* 751df8bae1dSRodney W. Grimes * If we are acting as a multicast router, all 752df8bae1dSRodney W. Grimes * incoming multicast packets are passed to the 753df8bae1dSRodney W. Grimes * kernel-level multicast forwarding function. 754df8bae1dSRodney W. Grimes * The packet is returned (relatively) intact; if 755df8bae1dSRodney W. Grimes * ip_mforward() returns a non-zero value, the packet 756df8bae1dSRodney W. Grimes * must be discarded, else it may be accepted below. 757df8bae1dSRodney W. Grimes */ 7580aade26eSRobert Watson if (ip_mforward && ip_mforward(ip, ifp, m, 0) != 0) { 759*65634ae7SWojciech Macek MROUTER_RUNLOCK(); 76086425c62SRobert Watson IPSTAT_INC(ips_cantforward); 761df8bae1dSRodney W. Grimes m_freem(m); 762c67b1d17SGarrett Wollman return; 763df8bae1dSRodney W. Grimes } 764df8bae1dSRodney W. Grimes 765df8bae1dSRodney W. Grimes /* 76611612afaSDima Dorfman * The process-level routing daemon needs to receive 767df8bae1dSRodney W. Grimes * all multicast IGMP packets, whether or not this 768df8bae1dSRodney W. Grimes * host belongs to their destination groups. 769df8bae1dSRodney W. Grimes */ 770*65634ae7SWojciech Macek if (ip->ip_p == IPPROTO_IGMP) { 771*65634ae7SWojciech Macek MROUTER_RUNLOCK(); 772df8bae1dSRodney W. Grimes goto ours; 773*65634ae7SWojciech Macek } 77486425c62SRobert Watson IPSTAT_INC(ips_forward); 775df8bae1dSRodney W. Grimes } 776*65634ae7SWojciech Macek MROUTER_RUNLOCK(); 777df8bae1dSRodney W. Grimes /* 778d10910e6SBruce M Simpson * Assume the packet is for us, to avoid prematurely taking 779d10910e6SBruce M Simpson * a lock on the in_multi hash. Protocols must perform 780d10910e6SBruce M Simpson * their own filtering and update statistics accordingly. 781df8bae1dSRodney W. Grimes */ 782df8bae1dSRodney W. Grimes goto ours; 783df8bae1dSRodney W. Grimes } 784df8bae1dSRodney W. Grimes if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST) 785df8bae1dSRodney W. Grimes goto ours; 786df8bae1dSRodney W. Grimes if (ip->ip_dst.s_addr == INADDR_ANY) 787df8bae1dSRodney W. Grimes goto ours; 788df8bae1dSRodney W. Grimes 7896a800098SYoshinobu Inoue /* 790df8bae1dSRodney W. Grimes * Not for us; forward if possible and desirable. 791df8bae1dSRodney W. Grimes */ 792603724d3SBjoern A. Zeeb if (V_ipforwarding == 0) { 79386425c62SRobert Watson IPSTAT_INC(ips_cantforward); 794df8bae1dSRodney W. Grimes m_freem(m); 795546f251bSChris D. Faulhaber } else { 7969b932e9eSAndre Oppermann ip_forward(m, dchg); 797546f251bSChris D. Faulhaber } 798c67b1d17SGarrett Wollman return; 799df8bae1dSRodney W. Grimes 800df8bae1dSRodney W. Grimes ours: 801d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH 802d0ebc0d2SYaroslav Tykhiy /* 803d0ebc0d2SYaroslav Tykhiy * IPSTEALTH: Process non-routing options only 804d0ebc0d2SYaroslav Tykhiy * if the packet is destined for us. 805d0ebc0d2SYaroslav Tykhiy */ 8067caf4ab7SGleb Smirnoff if (V_ipstealth && hlen > sizeof (struct ip) && ip_dooptions(m, 1)) 807d0ebc0d2SYaroslav Tykhiy return; 808d0ebc0d2SYaroslav Tykhiy #endif /* IPSTEALTH */ 809d0ebc0d2SYaroslav Tykhiy 81063f8d699SJordan K. Hubbard /* 811b6ea1aa5SRuslan Ermilov * Attempt reassembly; if it succeeds, proceed. 812ac9d7e26SMax Laier * ip_reass() will return a different mbuf. 813df8bae1dSRodney W. Grimes */ 8148f134647SGleb Smirnoff if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) { 815aa69c612SGleb Smirnoff /* XXXGL: shouldn't we save & set m_flags? */ 816f0cada84SAndre Oppermann m = ip_reass(m); 817f0cada84SAndre Oppermann if (m == NULL) 818c67b1d17SGarrett Wollman return; 8196a800098SYoshinobu Inoue ip = mtod(m, struct ip *); 8207e2df452SRuslan Ermilov /* Get the header length of the reassembled packet */ 82153be11f6SPoul-Henning Kamp hlen = ip->ip_hl << 2; 822f0cada84SAndre Oppermann } 823f0cada84SAndre Oppermann 824fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT) 825fcf59617SAndrey V. Elsukov if (IPSEC_ENABLED(ipv4)) { 826fcf59617SAndrey V. Elsukov if (IPSEC_INPUT(ipv4, m, hlen, ip->ip_p) != 0) 827fcf59617SAndrey V. Elsukov return; 828fcf59617SAndrey V. Elsukov } 829b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */ 83033841545SHajimu UMEMOTO 831df8bae1dSRodney W. Grimes /* 832df8bae1dSRodney W. Grimes * Switch out to protocol's input routine. 833df8bae1dSRodney W. Grimes */ 83486425c62SRobert Watson IPSTAT_INC(ips_delivered); 8359b932e9eSAndre Oppermann 8368f5a8818SKevin Lo (*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p); 837c67b1d17SGarrett Wollman return; 838df8bae1dSRodney W. Grimes bad: 839df8bae1dSRodney W. Grimes m_freem(m); 840c67b1d17SGarrett Wollman } 841c67b1d17SGarrett Wollman 842c67b1d17SGarrett Wollman /* 843df8bae1dSRodney W. Grimes * IP timer processing; 844df8bae1dSRodney W. Grimes * if a timer expires on a reassembly 845df8bae1dSRodney W. Grimes * queue, discard it. 846df8bae1dSRodney W. Grimes */ 847df8bae1dSRodney W. Grimes void 848f2565d68SRobert Watson ip_slowtimo(void) 849df8bae1dSRodney W. Grimes { 8508b615593SMarko Zec VNET_ITERATOR_DECL(vnet_iter); 851df8bae1dSRodney W. Grimes 8525ee847d3SRobert Watson VNET_LIST_RLOCK_NOSLEEP(); 8538b615593SMarko Zec VNET_FOREACH(vnet_iter) { 8548b615593SMarko Zec CURVNET_SET(vnet_iter); 8551dbefcc0SGleb Smirnoff ipreass_slowtimo(); 8568b615593SMarko Zec CURVNET_RESTORE(); 8578b615593SMarko Zec } 8585ee847d3SRobert Watson VNET_LIST_RUNLOCK_NOSLEEP(); 859df8bae1dSRodney W. Grimes } 860df8bae1dSRodney W. Grimes 8619802380eSBjoern A. Zeeb void 8629802380eSBjoern A. Zeeb ip_drain(void) 8639802380eSBjoern A. Zeeb { 8649802380eSBjoern A. Zeeb VNET_ITERATOR_DECL(vnet_iter); 8659802380eSBjoern A. Zeeb 8669802380eSBjoern A. Zeeb VNET_LIST_RLOCK_NOSLEEP(); 8679802380eSBjoern A. Zeeb VNET_FOREACH(vnet_iter) { 8689802380eSBjoern A. Zeeb CURVNET_SET(vnet_iter); 8691dbefcc0SGleb Smirnoff ipreass_drain(); 8708b615593SMarko Zec CURVNET_RESTORE(); 8718b615593SMarko Zec } 8725ee847d3SRobert Watson VNET_LIST_RUNLOCK_NOSLEEP(); 873df8bae1dSRodney W. Grimes } 874df8bae1dSRodney W. Grimes 875df8bae1dSRodney W. Grimes /* 876de38924dSAndre Oppermann * The protocol to be inserted into ip_protox[] must be already registered 877de38924dSAndre Oppermann * in inetsw[], either statically or through pf_proto_register(). 878de38924dSAndre Oppermann */ 879de38924dSAndre Oppermann int 8801b48d245SBjoern A. Zeeb ipproto_register(short ipproto) 881de38924dSAndre Oppermann { 882de38924dSAndre Oppermann struct protosw *pr; 883de38924dSAndre Oppermann 884de38924dSAndre Oppermann /* Sanity checks. */ 8851b48d245SBjoern A. Zeeb if (ipproto <= 0 || ipproto >= IPPROTO_MAX) 886de38924dSAndre Oppermann return (EPROTONOSUPPORT); 887de38924dSAndre Oppermann 888de38924dSAndre Oppermann /* 889de38924dSAndre Oppermann * The protocol slot must not be occupied by another protocol 890de38924dSAndre Oppermann * already. An index pointing to IPPROTO_RAW is unused. 891de38924dSAndre Oppermann */ 892de38924dSAndre Oppermann pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 893de38924dSAndre Oppermann if (pr == NULL) 894de38924dSAndre Oppermann return (EPFNOSUPPORT); 895de38924dSAndre Oppermann if (ip_protox[ipproto] != pr - inetsw) /* IPPROTO_RAW */ 896de38924dSAndre Oppermann return (EEXIST); 897de38924dSAndre Oppermann 898de38924dSAndre Oppermann /* Find the protocol position in inetsw[] and set the index. */ 899de38924dSAndre Oppermann for (pr = inetdomain.dom_protosw; 900de38924dSAndre Oppermann pr < inetdomain.dom_protoswNPROTOSW; pr++) { 901de38924dSAndre Oppermann if (pr->pr_domain->dom_family == PF_INET && 902de38924dSAndre Oppermann pr->pr_protocol && pr->pr_protocol == ipproto) { 903de38924dSAndre Oppermann ip_protox[pr->pr_protocol] = pr - inetsw; 904de38924dSAndre Oppermann return (0); 905de38924dSAndre Oppermann } 906de38924dSAndre Oppermann } 907de38924dSAndre Oppermann return (EPROTONOSUPPORT); 908de38924dSAndre Oppermann } 909de38924dSAndre Oppermann 910de38924dSAndre Oppermann int 9111b48d245SBjoern A. Zeeb ipproto_unregister(short ipproto) 912de38924dSAndre Oppermann { 913de38924dSAndre Oppermann struct protosw *pr; 914de38924dSAndre Oppermann 915de38924dSAndre Oppermann /* Sanity checks. */ 9161b48d245SBjoern A. Zeeb if (ipproto <= 0 || ipproto >= IPPROTO_MAX) 917de38924dSAndre Oppermann return (EPROTONOSUPPORT); 918de38924dSAndre Oppermann 919de38924dSAndre Oppermann /* Check if the protocol was indeed registered. */ 920de38924dSAndre Oppermann pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 921de38924dSAndre Oppermann if (pr == NULL) 922de38924dSAndre Oppermann return (EPFNOSUPPORT); 923de38924dSAndre Oppermann if (ip_protox[ipproto] == pr - inetsw) /* IPPROTO_RAW */ 924de38924dSAndre Oppermann return (ENOENT); 925de38924dSAndre Oppermann 926de38924dSAndre Oppermann /* Reset the protocol slot to IPPROTO_RAW. */ 927de38924dSAndre Oppermann ip_protox[ipproto] = pr - inetsw; 928de38924dSAndre Oppermann return (0); 929de38924dSAndre Oppermann } 930de38924dSAndre Oppermann 931df8bae1dSRodney W. Grimes u_char inetctlerrmap[PRC_NCMDS] = { 932df8bae1dSRodney W. Grimes 0, 0, 0, 0, 933df8bae1dSRodney W. Grimes 0, EMSGSIZE, EHOSTDOWN, EHOSTUNREACH, 934df8bae1dSRodney W. Grimes EHOSTUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED, 935df8bae1dSRodney W. Grimes EMSGSIZE, EHOSTUNREACH, 0, 0, 936fcaf9f91SMike Silbersack 0, 0, EHOSTUNREACH, 0, 9373b8123b7SJesper Skriver ENOPROTOOPT, ECONNREFUSED 938df8bae1dSRodney W. Grimes }; 939df8bae1dSRodney W. Grimes 940df8bae1dSRodney W. Grimes /* 941df8bae1dSRodney W. Grimes * Forward a packet. If some error occurs return the sender 942df8bae1dSRodney W. Grimes * an icmp packet. Note we can't always generate a meaningful 943df8bae1dSRodney W. Grimes * icmp message because icmp doesn't have a large enough repertoire 944df8bae1dSRodney W. Grimes * of codes and types. 945df8bae1dSRodney W. Grimes * 946df8bae1dSRodney W. Grimes * If not forwarding, just drop the packet. This could be confusing 947df8bae1dSRodney W. Grimes * if ipforwarding was zero but some routing protocol was advancing 948df8bae1dSRodney W. Grimes * us as a gateway to somewhere. However, we must let the routing 949df8bae1dSRodney W. Grimes * protocol deal with that. 950df8bae1dSRodney W. Grimes * 951df8bae1dSRodney W. Grimes * The srcrt parameter indicates whether the packet is being forwarded 952df8bae1dSRodney W. Grimes * via a source route. 953df8bae1dSRodney W. Grimes */ 9549b932e9eSAndre Oppermann void 9559b932e9eSAndre Oppermann ip_forward(struct mbuf *m, int srcrt) 956df8bae1dSRodney W. Grimes { 9572b25acc1SLuigi Rizzo struct ip *ip = mtod(m, struct ip *); 958efbad259SEdward Tomasz Napierala struct in_ifaddr *ia; 959df8bae1dSRodney W. Grimes struct mbuf *mcopy; 960d14122b0SErmal Luçi struct sockaddr_in *sin; 9619b932e9eSAndre Oppermann struct in_addr dest; 962b835b6feSBjoern A. Zeeb struct route ro; 9634043ee3cSAlexander V. Chernikov uint32_t flowid; 964c773494eSAndre Oppermann int error, type = 0, code = 0, mtu = 0; 9653efc3014SJulian Elischer 966b8a6e03fSGleb Smirnoff NET_EPOCH_ASSERT(); 967b8a6e03fSGleb Smirnoff 9689b932e9eSAndre Oppermann if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) { 96986425c62SRobert Watson IPSTAT_INC(ips_cantforward); 970df8bae1dSRodney W. Grimes m_freem(m); 971df8bae1dSRodney W. Grimes return; 972df8bae1dSRodney W. Grimes } 973fcf59617SAndrey V. Elsukov if ( 974fcf59617SAndrey V. Elsukov #ifdef IPSTEALTH 975fcf59617SAndrey V. Elsukov V_ipstealth == 0 && 976fcf59617SAndrey V. Elsukov #endif 977fcf59617SAndrey V. Elsukov ip->ip_ttl <= IPTTLDEC) { 978fcf59617SAndrey V. Elsukov icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, 0); 9798922ddbeSAndrey V. Elsukov return; 9808922ddbeSAndrey V. Elsukov } 981df8bae1dSRodney W. Grimes 982d14122b0SErmal Luçi bzero(&ro, sizeof(ro)); 983d14122b0SErmal Luçi sin = (struct sockaddr_in *)&ro.ro_dst; 984d14122b0SErmal Luçi sin->sin_family = AF_INET; 985d14122b0SErmal Luçi sin->sin_len = sizeof(*sin); 986d14122b0SErmal Luçi sin->sin_addr = ip->ip_dst; 9874043ee3cSAlexander V. Chernikov flowid = m->m_pkthdr.flowid; 9884043ee3cSAlexander V. Chernikov ro.ro_nh = fib4_lookup(M_GETFIB(m), ip->ip_dst, 0, NHR_REF, flowid); 989983066f0SAlexander V. Chernikov if (ro.ro_nh != NULL) { 990983066f0SAlexander V. Chernikov ia = ifatoia(ro.ro_nh->nh_ifa); 99156844a62SErmal Luçi } else 99256844a62SErmal Luçi ia = NULL; 993df8bae1dSRodney W. Grimes /* 994bfef7ed4SIan Dowse * Save the IP header and at most 8 bytes of the payload, 995bfef7ed4SIan Dowse * in case we need to generate an ICMP message to the src. 996bfef7ed4SIan Dowse * 9974d2e3692SLuigi Rizzo * XXX this can be optimized a lot by saving the data in a local 9984d2e3692SLuigi Rizzo * buffer on the stack (72 bytes at most), and only allocating the 9994d2e3692SLuigi Rizzo * mbuf if really necessary. The vast majority of the packets 10004d2e3692SLuigi Rizzo * are forwarded without having to send an ICMP back (either 10014d2e3692SLuigi Rizzo * because unnecessary, or because rate limited), so we are 10024d2e3692SLuigi Rizzo * really we are wasting a lot of work here. 10034d2e3692SLuigi Rizzo * 1004c3bef61eSKevin Lo * We don't use m_copym() because it might return a reference 1005bfef7ed4SIan Dowse * to a shared cluster. Both this function and ip_output() 1006bfef7ed4SIan Dowse * assume exclusive access to the IP header in `m', so any 1007bfef7ed4SIan Dowse * data in a cluster may change before we reach icmp_error(). 1008df8bae1dSRodney W. Grimes */ 1009dc4ad05eSGleb Smirnoff mcopy = m_gethdr(M_NOWAIT, m->m_type); 1010eb1b1807SGleb Smirnoff if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_NOWAIT)) { 10119967cafcSSam Leffler /* 10129967cafcSSam Leffler * It's probably ok if the pkthdr dup fails (because 10139967cafcSSam Leffler * the deep copy of the tag chain failed), but for now 10149967cafcSSam Leffler * be conservative and just discard the copy since 10159967cafcSSam Leffler * code below may some day want the tags. 10169967cafcSSam Leffler */ 10179967cafcSSam Leffler m_free(mcopy); 10189967cafcSSam Leffler mcopy = NULL; 10199967cafcSSam Leffler } 1020bfef7ed4SIan Dowse if (mcopy != NULL) { 10218f134647SGleb Smirnoff mcopy->m_len = min(ntohs(ip->ip_len), M_TRAILINGSPACE(mcopy)); 1022e6b0a570SBruce M Simpson mcopy->m_pkthdr.len = mcopy->m_len; 1023bfef7ed4SIan Dowse m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t)); 1024bfef7ed4SIan Dowse } 102504287599SRuslan Ermilov #ifdef IPSTEALTH 1026fcf59617SAndrey V. Elsukov if (V_ipstealth == 0) 102704287599SRuslan Ermilov #endif 102804287599SRuslan Ermilov ip->ip_ttl -= IPTTLDEC; 1029fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT) 1030fcf59617SAndrey V. Elsukov if (IPSEC_ENABLED(ipv4)) { 1031fcf59617SAndrey V. Elsukov if ((error = IPSEC_FORWARD(ipv4, m)) != 0) { 1032fcf59617SAndrey V. Elsukov /* mbuf consumed by IPsec */ 1033d16a2e47SMark Johnston RO_NHFREE(&ro); 1034fcf59617SAndrey V. Elsukov m_freem(mcopy); 1035fcf59617SAndrey V. Elsukov if (error != EINPROGRESS) 1036fcf59617SAndrey V. Elsukov IPSTAT_INC(ips_cantforward); 1037b8a6e03fSGleb Smirnoff return; 103804287599SRuslan Ermilov } 1039fcf59617SAndrey V. Elsukov /* No IPsec processing required */ 1040fcf59617SAndrey V. Elsukov } 1041fcf59617SAndrey V. Elsukov #endif /* IPSEC */ 1042df8bae1dSRodney W. Grimes /* 1043df8bae1dSRodney W. Grimes * If forwarding packet using same interface that it came in on, 1044df8bae1dSRodney W. Grimes * perhaps should send a redirect to sender to shortcut a hop. 1045df8bae1dSRodney W. Grimes * Only send redirect if source is sending directly to us, 1046df8bae1dSRodney W. Grimes * and if packet was not source routed (or has any options). 1047df8bae1dSRodney W. Grimes * Also, don't send redirect if forwarding using a default route 1048df8bae1dSRodney W. Grimes * or a route modified by a redirect. 1049df8bae1dSRodney W. Grimes */ 10509b932e9eSAndre Oppermann dest.s_addr = 0; 1051efbad259SEdward Tomasz Napierala if (!srcrt && V_ipsendredirects && 1052efbad259SEdward Tomasz Napierala ia != NULL && ia->ia_ifp == m->m_pkthdr.rcvif) { 1053983066f0SAlexander V. Chernikov struct nhop_object *nh; 105402c1c707SAndre Oppermann 1055983066f0SAlexander V. Chernikov nh = ro.ro_nh; 105602c1c707SAndre Oppermann 1057983066f0SAlexander V. Chernikov if (nh != NULL && ((nh->nh_flags & (NHF_REDIRECT|NHF_DEFAULT)) == 0)) { 1058983066f0SAlexander V. Chernikov struct in_ifaddr *nh_ia = (struct in_ifaddr *)(nh->nh_ifa); 1059df8bae1dSRodney W. Grimes u_long src = ntohl(ip->ip_src.s_addr); 1060df8bae1dSRodney W. Grimes 1061983066f0SAlexander V. Chernikov if (nh_ia != NULL && 1062983066f0SAlexander V. Chernikov (src & nh_ia->ia_subnetmask) == nh_ia->ia_subnet) { 1063983066f0SAlexander V. Chernikov if (nh->nh_flags & NHF_GATEWAY) 1064983066f0SAlexander V. Chernikov dest.s_addr = nh->gw4_sa.sin_addr.s_addr; 1065df8bae1dSRodney W. Grimes else 10669b932e9eSAndre Oppermann dest.s_addr = ip->ip_dst.s_addr; 1067df8bae1dSRodney W. Grimes /* Router requirements says to only send host redirects */ 1068df8bae1dSRodney W. Grimes type = ICMP_REDIRECT; 1069df8bae1dSRodney W. Grimes code = ICMP_REDIRECT_HOST; 1070df8bae1dSRodney W. Grimes } 1071df8bae1dSRodney W. Grimes } 107202c1c707SAndre Oppermann } 1073df8bae1dSRodney W. Grimes 1074b835b6feSBjoern A. Zeeb error = ip_output(m, NULL, &ro, IP_FORWARDING, NULL, NULL); 1075b835b6feSBjoern A. Zeeb 1076983066f0SAlexander V. Chernikov if (error == EMSGSIZE && ro.ro_nh) 1077983066f0SAlexander V. Chernikov mtu = ro.ro_nh->nh_mtu; 1078983066f0SAlexander V. Chernikov RO_NHFREE(&ro); 1079b835b6feSBjoern A. Zeeb 1080df8bae1dSRodney W. Grimes if (error) 108186425c62SRobert Watson IPSTAT_INC(ips_cantforward); 1082df8bae1dSRodney W. Grimes else { 108386425c62SRobert Watson IPSTAT_INC(ips_forward); 1084df8bae1dSRodney W. Grimes if (type) 108586425c62SRobert Watson IPSTAT_INC(ips_redirectsent); 1086df8bae1dSRodney W. Grimes else { 10879188b4a1SAndre Oppermann if (mcopy) 1088df8bae1dSRodney W. Grimes m_freem(mcopy); 1089b8a6e03fSGleb Smirnoff return; 1090df8bae1dSRodney W. Grimes } 1091df8bae1dSRodney W. Grimes } 10924f6c66ccSMatt Macy if (mcopy == NULL) 1093b8a6e03fSGleb Smirnoff return; 10944f6c66ccSMatt Macy 1095df8bae1dSRodney W. Grimes switch (error) { 1096df8bae1dSRodney W. Grimes case 0: /* forwarded, but need redirect */ 1097df8bae1dSRodney W. Grimes /* type, code set above */ 1098df8bae1dSRodney W. Grimes break; 1099df8bae1dSRodney W. Grimes 1100efbad259SEdward Tomasz Napierala case ENETUNREACH: 1101df8bae1dSRodney W. Grimes case EHOSTUNREACH: 1102df8bae1dSRodney W. Grimes case ENETDOWN: 1103df8bae1dSRodney W. Grimes case EHOSTDOWN: 1104df8bae1dSRodney W. Grimes default: 1105df8bae1dSRodney W. Grimes type = ICMP_UNREACH; 1106df8bae1dSRodney W. Grimes code = ICMP_UNREACH_HOST; 1107df8bae1dSRodney W. Grimes break; 1108df8bae1dSRodney W. Grimes 1109df8bae1dSRodney W. Grimes case EMSGSIZE: 1110df8bae1dSRodney W. Grimes type = ICMP_UNREACH; 1111df8bae1dSRodney W. Grimes code = ICMP_UNREACH_NEEDFRAG; 11129b932e9eSAndre Oppermann /* 1113b835b6feSBjoern A. Zeeb * If the MTU was set before make sure we are below the 1114b835b6feSBjoern A. Zeeb * interface MTU. 1115ab48768bSAndre Oppermann * If the MTU wasn't set before use the interface mtu or 1116ab48768bSAndre Oppermann * fall back to the next smaller mtu step compared to the 1117ab48768bSAndre Oppermann * current packet size. 11189b932e9eSAndre Oppermann */ 1119b835b6feSBjoern A. Zeeb if (mtu != 0) { 1120b835b6feSBjoern A. Zeeb if (ia != NULL) 1121b835b6feSBjoern A. Zeeb mtu = min(mtu, ia->ia_ifp->if_mtu); 1122b835b6feSBjoern A. Zeeb } else { 1123ab48768bSAndre Oppermann if (ia != NULL) 1124c773494eSAndre Oppermann mtu = ia->ia_ifp->if_mtu; 1125ab48768bSAndre Oppermann else 11268f134647SGleb Smirnoff mtu = ip_next_mtu(ntohs(ip->ip_len), 0); 1127ab48768bSAndre Oppermann } 112886425c62SRobert Watson IPSTAT_INC(ips_cantfrag); 1129df8bae1dSRodney W. Grimes break; 1130df8bae1dSRodney W. Grimes 1131df8bae1dSRodney W. Grimes case ENOBUFS: 11323a06e3e0SRuslan Ermilov case EACCES: /* ipfw denied packet */ 11333a06e3e0SRuslan Ermilov m_freem(mcopy); 1134b8a6e03fSGleb Smirnoff return; 1135df8bae1dSRodney W. Grimes } 1136c773494eSAndre Oppermann icmp_error(mcopy, type, code, dest.s_addr, mtu); 1137df8bae1dSRodney W. Grimes } 1138df8bae1dSRodney W. Grimes 1139339efd75SMaxim Sobolev #define CHECK_SO_CT(sp, ct) \ 1140339efd75SMaxim Sobolev (((sp->so_options & SO_TIMESTAMP) && (sp->so_ts_clock == ct)) ? 1 : 0) 1141339efd75SMaxim Sobolev 114282c23ebaSBill Fenner void 1143f2565d68SRobert Watson ip_savecontrol(struct inpcb *inp, struct mbuf **mp, struct ip *ip, 1144f2565d68SRobert Watson struct mbuf *m) 114582c23ebaSBill Fenner { 114606193f0bSKonstantin Belousov bool stamped; 11478b615593SMarko Zec 114806193f0bSKonstantin Belousov stamped = false; 1149339efd75SMaxim Sobolev if ((inp->inp_socket->so_options & SO_BINTIME) || 1150339efd75SMaxim Sobolev CHECK_SO_CT(inp->inp_socket, SO_TS_BINTIME)) { 115106193f0bSKonstantin Belousov struct bintime boottimebin, bt; 115206193f0bSKonstantin Belousov struct timespec ts1; 1153be8a62e8SPoul-Henning Kamp 115406193f0bSKonstantin Belousov if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR | 115506193f0bSKonstantin Belousov M_TSTMP)) { 115606193f0bSKonstantin Belousov mbuf_tstmp2timespec(m, &ts1); 115706193f0bSKonstantin Belousov timespec2bintime(&ts1, &bt); 115806193f0bSKonstantin Belousov getboottimebin(&boottimebin); 115906193f0bSKonstantin Belousov bintime_add(&bt, &boottimebin); 116006193f0bSKonstantin Belousov } else { 1161be8a62e8SPoul-Henning Kamp bintime(&bt); 116206193f0bSKonstantin Belousov } 1163be8a62e8SPoul-Henning Kamp *mp = sbcreatecontrol((caddr_t)&bt, sizeof(bt), 1164be8a62e8SPoul-Henning Kamp SCM_BINTIME, SOL_SOCKET); 116506193f0bSKonstantin Belousov if (*mp != NULL) { 1166be8a62e8SPoul-Henning Kamp mp = &(*mp)->m_next; 116706193f0bSKonstantin Belousov stamped = true; 116806193f0bSKonstantin Belousov } 1169be8a62e8SPoul-Henning Kamp } 1170339efd75SMaxim Sobolev if (CHECK_SO_CT(inp->inp_socket, SO_TS_REALTIME_MICRO)) { 117106193f0bSKonstantin Belousov struct bintime boottimebin, bt1; 1172c012cfe6SEd Maste struct timespec ts1; 117382c23ebaSBill Fenner struct timeval tv; 117482c23ebaSBill Fenner 117506193f0bSKonstantin Belousov if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR | 117606193f0bSKonstantin Belousov M_TSTMP)) { 117706193f0bSKonstantin Belousov mbuf_tstmp2timespec(m, &ts1); 117806193f0bSKonstantin Belousov timespec2bintime(&ts1, &bt1); 117906193f0bSKonstantin Belousov getboottimebin(&boottimebin); 118006193f0bSKonstantin Belousov bintime_add(&bt1, &boottimebin); 118106193f0bSKonstantin Belousov bintime2timeval(&bt1, &tv); 118206193f0bSKonstantin Belousov } else { 1183339efd75SMaxim Sobolev microtime(&tv); 118406193f0bSKonstantin Belousov } 118582c23ebaSBill Fenner *mp = sbcreatecontrol((caddr_t)&tv, sizeof(tv), 118682c23ebaSBill Fenner SCM_TIMESTAMP, SOL_SOCKET); 118706193f0bSKonstantin Belousov if (*mp != NULL) { 118882c23ebaSBill Fenner mp = &(*mp)->m_next; 118906193f0bSKonstantin Belousov stamped = true; 119006193f0bSKonstantin Belousov } 1191339efd75SMaxim Sobolev } else if (CHECK_SO_CT(inp->inp_socket, SO_TS_REALTIME)) { 119206193f0bSKonstantin Belousov struct bintime boottimebin; 119306193f0bSKonstantin Belousov struct timespec ts, ts1; 1194339efd75SMaxim Sobolev 119506193f0bSKonstantin Belousov if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR | 119606193f0bSKonstantin Belousov M_TSTMP)) { 119706193f0bSKonstantin Belousov mbuf_tstmp2timespec(m, &ts); 119806193f0bSKonstantin Belousov getboottimebin(&boottimebin); 119906193f0bSKonstantin Belousov bintime2timespec(&boottimebin, &ts1); 12006040822cSAlan Somers timespecadd(&ts, &ts1, &ts); 120106193f0bSKonstantin Belousov } else { 1202339efd75SMaxim Sobolev nanotime(&ts); 120306193f0bSKonstantin Belousov } 1204339efd75SMaxim Sobolev *mp = sbcreatecontrol((caddr_t)&ts, sizeof(ts), 1205339efd75SMaxim Sobolev SCM_REALTIME, SOL_SOCKET); 120606193f0bSKonstantin Belousov if (*mp != NULL) { 1207339efd75SMaxim Sobolev mp = &(*mp)->m_next; 120806193f0bSKonstantin Belousov stamped = true; 120906193f0bSKonstantin Belousov } 1210339efd75SMaxim Sobolev } else if (CHECK_SO_CT(inp->inp_socket, SO_TS_MONOTONIC)) { 1211339efd75SMaxim Sobolev struct timespec ts; 1212339efd75SMaxim Sobolev 121306193f0bSKonstantin Belousov if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR | 121406193f0bSKonstantin Belousov M_TSTMP)) 121506193f0bSKonstantin Belousov mbuf_tstmp2timespec(m, &ts); 121606193f0bSKonstantin Belousov else 1217339efd75SMaxim Sobolev nanouptime(&ts); 1218339efd75SMaxim Sobolev *mp = sbcreatecontrol((caddr_t)&ts, sizeof(ts), 1219339efd75SMaxim Sobolev SCM_MONOTONIC, SOL_SOCKET); 122006193f0bSKonstantin Belousov if (*mp != NULL) { 122106193f0bSKonstantin Belousov mp = &(*mp)->m_next; 122206193f0bSKonstantin Belousov stamped = true; 122306193f0bSKonstantin Belousov } 122406193f0bSKonstantin Belousov } 122506193f0bSKonstantin Belousov if (stamped && (m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR | 122606193f0bSKonstantin Belousov M_TSTMP)) { 122706193f0bSKonstantin Belousov struct sock_timestamp_info sti; 122806193f0bSKonstantin Belousov 122906193f0bSKonstantin Belousov bzero(&sti, sizeof(sti)); 123006193f0bSKonstantin Belousov sti.st_info_flags = ST_INFO_HW; 123106193f0bSKonstantin Belousov if ((m->m_flags & M_TSTMP_HPREC) != 0) 123206193f0bSKonstantin Belousov sti.st_info_flags |= ST_INFO_HW_HPREC; 123306193f0bSKonstantin Belousov *mp = sbcreatecontrol((caddr_t)&sti, sizeof(sti), SCM_TIME_INFO, 123406193f0bSKonstantin Belousov SOL_SOCKET); 123506193f0bSKonstantin Belousov if (*mp != NULL) 1236339efd75SMaxim Sobolev mp = &(*mp)->m_next; 1237be8a62e8SPoul-Henning Kamp } 123882c23ebaSBill Fenner if (inp->inp_flags & INP_RECVDSTADDR) { 123982c23ebaSBill Fenner *mp = sbcreatecontrol((caddr_t)&ip->ip_dst, 124082c23ebaSBill Fenner sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP); 124182c23ebaSBill Fenner if (*mp) 124282c23ebaSBill Fenner mp = &(*mp)->m_next; 124382c23ebaSBill Fenner } 12444957466bSMatthew N. Dodd if (inp->inp_flags & INP_RECVTTL) { 12454957466bSMatthew N. Dodd *mp = sbcreatecontrol((caddr_t)&ip->ip_ttl, 12464957466bSMatthew N. Dodd sizeof(u_char), IP_RECVTTL, IPPROTO_IP); 12474957466bSMatthew N. Dodd if (*mp) 12484957466bSMatthew N. Dodd mp = &(*mp)->m_next; 12494957466bSMatthew N. Dodd } 125082c23ebaSBill Fenner #ifdef notyet 125182c23ebaSBill Fenner /* XXX 125282c23ebaSBill Fenner * Moving these out of udp_input() made them even more broken 125382c23ebaSBill Fenner * than they already were. 125482c23ebaSBill Fenner */ 125582c23ebaSBill Fenner /* options were tossed already */ 125682c23ebaSBill Fenner if (inp->inp_flags & INP_RECVOPTS) { 125782c23ebaSBill Fenner *mp = sbcreatecontrol((caddr_t)opts_deleted_above, 125882c23ebaSBill Fenner sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP); 125982c23ebaSBill Fenner if (*mp) 126082c23ebaSBill Fenner mp = &(*mp)->m_next; 126182c23ebaSBill Fenner } 126282c23ebaSBill Fenner /* ip_srcroute doesn't do what we want here, need to fix */ 126382c23ebaSBill Fenner if (inp->inp_flags & INP_RECVRETOPTS) { 1264e0982661SAndre Oppermann *mp = sbcreatecontrol((caddr_t)ip_srcroute(m), 126582c23ebaSBill Fenner sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP); 126682c23ebaSBill Fenner if (*mp) 126782c23ebaSBill Fenner mp = &(*mp)->m_next; 126882c23ebaSBill Fenner } 126982c23ebaSBill Fenner #endif 127082c23ebaSBill Fenner if (inp->inp_flags & INP_RECVIF) { 1271d314ad7bSJulian Elischer struct ifnet *ifp; 1272d314ad7bSJulian Elischer struct sdlbuf { 127382c23ebaSBill Fenner struct sockaddr_dl sdl; 1274d314ad7bSJulian Elischer u_char pad[32]; 1275d314ad7bSJulian Elischer } sdlbuf; 1276d314ad7bSJulian Elischer struct sockaddr_dl *sdp; 1277d314ad7bSJulian Elischer struct sockaddr_dl *sdl2 = &sdlbuf.sdl; 127882c23ebaSBill Fenner 127946f2df9cSSergey Kandaurov if ((ifp = m->m_pkthdr.rcvif) && 128046f2df9cSSergey Kandaurov ifp->if_index && ifp->if_index <= V_if_index) { 12814a0d6638SRuslan Ermilov sdp = (struct sockaddr_dl *)ifp->if_addr->ifa_addr; 1282d314ad7bSJulian Elischer /* 1283d314ad7bSJulian Elischer * Change our mind and don't try copy. 1284d314ad7bSJulian Elischer */ 128546f2df9cSSergey Kandaurov if (sdp->sdl_family != AF_LINK || 128646f2df9cSSergey Kandaurov sdp->sdl_len > sizeof(sdlbuf)) { 1287d314ad7bSJulian Elischer goto makedummy; 1288d314ad7bSJulian Elischer } 1289d314ad7bSJulian Elischer bcopy(sdp, sdl2, sdp->sdl_len); 1290d314ad7bSJulian Elischer } else { 1291d314ad7bSJulian Elischer makedummy: 129246f2df9cSSergey Kandaurov sdl2->sdl_len = 129346f2df9cSSergey Kandaurov offsetof(struct sockaddr_dl, sdl_data[0]); 1294d314ad7bSJulian Elischer sdl2->sdl_family = AF_LINK; 1295d314ad7bSJulian Elischer sdl2->sdl_index = 0; 1296d314ad7bSJulian Elischer sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0; 1297d314ad7bSJulian Elischer } 1298d314ad7bSJulian Elischer *mp = sbcreatecontrol((caddr_t)sdl2, sdl2->sdl_len, 129982c23ebaSBill Fenner IP_RECVIF, IPPROTO_IP); 130082c23ebaSBill Fenner if (*mp) 130182c23ebaSBill Fenner mp = &(*mp)->m_next; 130282c23ebaSBill Fenner } 13033cca425bSMichael Tuexen if (inp->inp_flags & INP_RECVTOS) { 13043cca425bSMichael Tuexen *mp = sbcreatecontrol((caddr_t)&ip->ip_tos, 13053cca425bSMichael Tuexen sizeof(u_char), IP_RECVTOS, IPPROTO_IP); 13063cca425bSMichael Tuexen if (*mp) 13073cca425bSMichael Tuexen mp = &(*mp)->m_next; 13083cca425bSMichael Tuexen } 13099d3ddf43SAdrian Chadd 13109d3ddf43SAdrian Chadd if (inp->inp_flags2 & INP_RECVFLOWID) { 13119d3ddf43SAdrian Chadd uint32_t flowid, flow_type; 13129d3ddf43SAdrian Chadd 13139d3ddf43SAdrian Chadd flowid = m->m_pkthdr.flowid; 13149d3ddf43SAdrian Chadd flow_type = M_HASHTYPE_GET(m); 13159d3ddf43SAdrian Chadd 13169d3ddf43SAdrian Chadd /* 13179d3ddf43SAdrian Chadd * XXX should handle the failure of one or the 13189d3ddf43SAdrian Chadd * other - don't populate both? 13199d3ddf43SAdrian Chadd */ 13209d3ddf43SAdrian Chadd *mp = sbcreatecontrol((caddr_t) &flowid, 13219d3ddf43SAdrian Chadd sizeof(uint32_t), IP_FLOWID, IPPROTO_IP); 13229d3ddf43SAdrian Chadd if (*mp) 13239d3ddf43SAdrian Chadd mp = &(*mp)->m_next; 13249d3ddf43SAdrian Chadd *mp = sbcreatecontrol((caddr_t) &flow_type, 13259d3ddf43SAdrian Chadd sizeof(uint32_t), IP_FLOWTYPE, IPPROTO_IP); 13269d3ddf43SAdrian Chadd if (*mp) 13279d3ddf43SAdrian Chadd mp = &(*mp)->m_next; 13289d3ddf43SAdrian Chadd } 13299d3ddf43SAdrian Chadd 13309d3ddf43SAdrian Chadd #ifdef RSS 13319d3ddf43SAdrian Chadd if (inp->inp_flags2 & INP_RECVRSSBUCKETID) { 13329d3ddf43SAdrian Chadd uint32_t flowid, flow_type; 13339d3ddf43SAdrian Chadd uint32_t rss_bucketid; 13349d3ddf43SAdrian Chadd 13359d3ddf43SAdrian Chadd flowid = m->m_pkthdr.flowid; 13369d3ddf43SAdrian Chadd flow_type = M_HASHTYPE_GET(m); 13379d3ddf43SAdrian Chadd 13389d3ddf43SAdrian Chadd if (rss_hash2bucket(flowid, flow_type, &rss_bucketid) == 0) { 13399d3ddf43SAdrian Chadd *mp = sbcreatecontrol((caddr_t) &rss_bucketid, 13409d3ddf43SAdrian Chadd sizeof(uint32_t), IP_RSSBUCKETID, IPPROTO_IP); 13419d3ddf43SAdrian Chadd if (*mp) 13429d3ddf43SAdrian Chadd mp = &(*mp)->m_next; 13439d3ddf43SAdrian Chadd } 13449d3ddf43SAdrian Chadd } 13459d3ddf43SAdrian Chadd #endif 134682c23ebaSBill Fenner } 134782c23ebaSBill Fenner 13484d2e3692SLuigi Rizzo /* 134930916a2dSRobert Watson * XXXRW: Multicast routing code in ip_mroute.c is generally MPSAFE, but the 135030916a2dSRobert Watson * ip_rsvp and ip_rsvp_on variables need to be interlocked with rsvp_on 135130916a2dSRobert Watson * locking. This code remains in ip_input.c as ip_mroute.c is optionally 135230916a2dSRobert Watson * compiled. 13534d2e3692SLuigi Rizzo */ 13545f901c92SAndrew Turner VNET_DEFINE_STATIC(int, ip_rsvp_on); 135582cea7e6SBjoern A. Zeeb VNET_DEFINE(struct socket *, ip_rsvpd); 135682cea7e6SBjoern A. Zeeb 135782cea7e6SBjoern A. Zeeb #define V_ip_rsvp_on VNET(ip_rsvp_on) 135882cea7e6SBjoern A. Zeeb 1359df8bae1dSRodney W. Grimes int 1360f0068c4aSGarrett Wollman ip_rsvp_init(struct socket *so) 1361f0068c4aSGarrett Wollman { 13628b615593SMarko Zec 1363f0068c4aSGarrett Wollman if (so->so_type != SOCK_RAW || 1364f0068c4aSGarrett Wollman so->so_proto->pr_protocol != IPPROTO_RSVP) 1365f0068c4aSGarrett Wollman return EOPNOTSUPP; 1366f0068c4aSGarrett Wollman 1367603724d3SBjoern A. Zeeb if (V_ip_rsvpd != NULL) 1368f0068c4aSGarrett Wollman return EADDRINUSE; 1369f0068c4aSGarrett Wollman 1370603724d3SBjoern A. Zeeb V_ip_rsvpd = so; 13711c5de19aSGarrett Wollman /* 13721c5de19aSGarrett Wollman * This may seem silly, but we need to be sure we don't over-increment 13731c5de19aSGarrett Wollman * the RSVP counter, in case something slips up. 13741c5de19aSGarrett Wollman */ 1375603724d3SBjoern A. Zeeb if (!V_ip_rsvp_on) { 1376603724d3SBjoern A. Zeeb V_ip_rsvp_on = 1; 1377603724d3SBjoern A. Zeeb V_rsvp_on++; 13781c5de19aSGarrett Wollman } 1379f0068c4aSGarrett Wollman 1380f0068c4aSGarrett Wollman return 0; 1381f0068c4aSGarrett Wollman } 1382f0068c4aSGarrett Wollman 1383f0068c4aSGarrett Wollman int 1384f0068c4aSGarrett Wollman ip_rsvp_done(void) 1385f0068c4aSGarrett Wollman { 13868b615593SMarko Zec 1387603724d3SBjoern A. Zeeb V_ip_rsvpd = NULL; 13881c5de19aSGarrett Wollman /* 13891c5de19aSGarrett Wollman * This may seem silly, but we need to be sure we don't over-decrement 13901c5de19aSGarrett Wollman * the RSVP counter, in case something slips up. 13911c5de19aSGarrett Wollman */ 1392603724d3SBjoern A. Zeeb if (V_ip_rsvp_on) { 1393603724d3SBjoern A. Zeeb V_ip_rsvp_on = 0; 1394603724d3SBjoern A. Zeeb V_rsvp_on--; 13951c5de19aSGarrett Wollman } 1396f0068c4aSGarrett Wollman return 0; 1397f0068c4aSGarrett Wollman } 1398bbb4330bSLuigi Rizzo 13998f5a8818SKevin Lo int 14008f5a8818SKevin Lo rsvp_input(struct mbuf **mp, int *offp, int proto) 1401bbb4330bSLuigi Rizzo { 14028f5a8818SKevin Lo struct mbuf *m; 14038f5a8818SKevin Lo 14048f5a8818SKevin Lo m = *mp; 14058f5a8818SKevin Lo *mp = NULL; 14068b615593SMarko Zec 1407bbb4330bSLuigi Rizzo if (rsvp_input_p) { /* call the real one if loaded */ 14088f5a8818SKevin Lo *mp = m; 14098f5a8818SKevin Lo rsvp_input_p(mp, offp, proto); 14108f5a8818SKevin Lo return (IPPROTO_DONE); 1411bbb4330bSLuigi Rizzo } 1412bbb4330bSLuigi Rizzo 1413bbb4330bSLuigi Rizzo /* Can still get packets with rsvp_on = 0 if there is a local member 1414bbb4330bSLuigi Rizzo * of the group to which the RSVP packet is addressed. But in this 1415bbb4330bSLuigi Rizzo * case we want to throw the packet away. 1416bbb4330bSLuigi Rizzo */ 1417bbb4330bSLuigi Rizzo 1418603724d3SBjoern A. Zeeb if (!V_rsvp_on) { 1419bbb4330bSLuigi Rizzo m_freem(m); 14208f5a8818SKevin Lo return (IPPROTO_DONE); 1421bbb4330bSLuigi Rizzo } 1422bbb4330bSLuigi Rizzo 1423603724d3SBjoern A. Zeeb if (V_ip_rsvpd != NULL) { 14248f5a8818SKevin Lo *mp = m; 14258f5a8818SKevin Lo rip_input(mp, offp, proto); 14268f5a8818SKevin Lo return (IPPROTO_DONE); 1427bbb4330bSLuigi Rizzo } 1428bbb4330bSLuigi Rizzo /* Drop the packet */ 1429bbb4330bSLuigi Rizzo m_freem(m); 14308f5a8818SKevin Lo return (IPPROTO_DONE); 1431bbb4330bSLuigi Rizzo } 1432