1c398230bSWarner Losh /*- 251369649SPedro F. Giffuni * SPDX-License-Identifier: BSD-3-Clause 351369649SPedro F. Giffuni * 4df8bae1dSRodney W. Grimes * Copyright (c) 1982, 1986, 1988, 1993 5df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 6df8bae1dSRodney W. Grimes * 7df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 8df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 9df8bae1dSRodney W. Grimes * are met: 10df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 11df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 12df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 13df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 14df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 15fbbd9655SWarner Losh * 3. Neither the name of the University nor the names of its contributors 16df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 17df8bae1dSRodney W. Grimes * without specific prior written permission. 18df8bae1dSRodney W. Grimes * 19df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29df8bae1dSRodney W. Grimes * SUCH DAMAGE. 30df8bae1dSRodney W. Grimes * 31df8bae1dSRodney W. Grimes * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 32df8bae1dSRodney W. Grimes */ 33df8bae1dSRodney W. Grimes 344b421e2dSMike Silbersack #include <sys/cdefs.h> 354b421e2dSMike Silbersack __FBSDID("$FreeBSD$"); 364b421e2dSMike Silbersack 370ac40133SBrian Somers #include "opt_bootp.h" 3827108a15SDag-Erling Smørgrav #include "opt_ipstealth.h" 396a800098SYoshinobu Inoue #include "opt_ipsec.h" 4033553d6eSBjoern A. Zeeb #include "opt_route.h" 41b8bc95cdSAdrian Chadd #include "opt_rss.h" 4274a9466cSGary Palmer 43df8bae1dSRodney W. Grimes #include <sys/param.h> 44df8bae1dSRodney W. Grimes #include <sys/systm.h> 45ef91a976SAndrey V. Elsukov #include <sys/hhook.h> 46df8bae1dSRodney W. Grimes #include <sys/mbuf.h> 47b715f178SLuigi Rizzo #include <sys/malloc.h> 48df8bae1dSRodney W. Grimes #include <sys/domain.h> 49df8bae1dSRodney W. Grimes #include <sys/protosw.h> 50df8bae1dSRodney W. Grimes #include <sys/socket.h> 51df8bae1dSRodney W. Grimes #include <sys/time.h> 52df8bae1dSRodney W. Grimes #include <sys/kernel.h> 53385195c0SMarko Zec #include <sys/lock.h> 54cc0a3c8cSAndrey V. Elsukov #include <sys/rmlock.h> 55385195c0SMarko Zec #include <sys/rwlock.h> 5657f60867SMark Johnston #include <sys/sdt.h> 571025071fSGarrett Wollman #include <sys/syslog.h> 58b5e8ce9fSBruce Evans #include <sys/sysctl.h> 59df8bae1dSRodney W. Grimes 60c85540ddSAndrey A. Chernov #include <net/pfil.h> 61df8bae1dSRodney W. Grimes #include <net/if.h> 629494d596SBrooks Davis #include <net/if_types.h> 63d314ad7bSJulian Elischer #include <net/if_var.h> 6482c23ebaSBill Fenner #include <net/if_dl.h> 65df8bae1dSRodney W. Grimes #include <net/route.h> 66748e0b0aSGarrett Wollman #include <net/netisr.h> 67b2bdc62aSAdrian Chadd #include <net/rss_config.h> 684b79449eSBjoern A. Zeeb #include <net/vnet.h> 69df8bae1dSRodney W. Grimes 70df8bae1dSRodney W. Grimes #include <netinet/in.h> 7157f60867SMark Johnston #include <netinet/in_kdtrace.h> 72df8bae1dSRodney W. Grimes #include <netinet/in_systm.h> 73b5e8ce9fSBruce Evans #include <netinet/in_var.h> 74df8bae1dSRodney W. Grimes #include <netinet/ip.h> 75df8bae1dSRodney W. Grimes #include <netinet/in_pcb.h> 76df8bae1dSRodney W. Grimes #include <netinet/ip_var.h> 77eddfbb76SRobert Watson #include <netinet/ip_fw.h> 78df8bae1dSRodney W. Grimes #include <netinet/ip_icmp.h> 79ef39adf0SAndre Oppermann #include <netinet/ip_options.h> 8058938916SGarrett Wollman #include <machine/in_cksum.h> 81a9771948SGleb Smirnoff #include <netinet/ip_carp.h> 82b8bc95cdSAdrian Chadd #include <netinet/in_rss.h> 83df8bae1dSRodney W. Grimes 84fcf59617SAndrey V. Elsukov #include <netipsec/ipsec_support.h> 85fcf59617SAndrey V. Elsukov 86f0068c4aSGarrett Wollman #include <sys/socketvar.h> 876ddbf1e2SGary Palmer 88aed55708SRobert Watson #include <security/mac/mac_framework.h> 89aed55708SRobert Watson 90d2035ffbSEd Maste #ifdef CTASSERT 91d2035ffbSEd Maste CTASSERT(sizeof(struct ip) == 20); 92d2035ffbSEd Maste #endif 93d2035ffbSEd Maste 941dbefcc0SGleb Smirnoff /* IP reassembly functions are defined in ip_reass.c. */ 95843b0e57SXin LI extern void ipreass_init(void); 96843b0e57SXin LI extern void ipreass_drain(void); 97843b0e57SXin LI extern void ipreass_slowtimo(void); 981dbefcc0SGleb Smirnoff #ifdef VIMAGE 99843b0e57SXin LI extern void ipreass_destroy(void); 1001dbefcc0SGleb Smirnoff #endif 1011dbefcc0SGleb Smirnoff 102cc0a3c8cSAndrey V. Elsukov struct rmlock in_ifaddr_lock; 103cc0a3c8cSAndrey V. Elsukov RM_SYSINIT(in_ifaddr_lock, &in_ifaddr_lock, "in_ifaddr_lock"); 104f0068c4aSGarrett Wollman 10582cea7e6SBjoern A. Zeeb VNET_DEFINE(int, rsvp_on); 10682cea7e6SBjoern A. Zeeb 10782cea7e6SBjoern A. Zeeb VNET_DEFINE(int, ipforwarding); 1086df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_VNET | CTLFLAG_RW, 109eddfbb76SRobert Watson &VNET_NAME(ipforwarding), 0, 1108b615593SMarko Zec "Enable IP forwarding between interfaces"); 1110312fbe9SPoul-Henning Kamp 1123e288e62SDimitry Andric static VNET_DEFINE(int, ipsendredirects) = 1; /* XXX */ 11382cea7e6SBjoern A. Zeeb #define V_ipsendredirects VNET(ipsendredirects) 1146df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_VNET | CTLFLAG_RW, 115eddfbb76SRobert Watson &VNET_NAME(ipsendredirects), 0, 1168b615593SMarko Zec "Enable sending IP redirects"); 1170312fbe9SPoul-Henning Kamp 118823db0e9SDon Lewis /* 119823db0e9SDon Lewis * XXX - Setting ip_checkinterface mostly implements the receive side of 120823db0e9SDon Lewis * the Strong ES model described in RFC 1122, but since the routing table 121a8f12100SDon Lewis * and transmit implementation do not implement the Strong ES model, 122823db0e9SDon Lewis * setting this to 1 results in an odd hybrid. 1233f67c834SDon Lewis * 124a8f12100SDon Lewis * XXX - ip_checkinterface currently must be disabled if you use ipnat 125a8f12100SDon Lewis * to translate the destination address to another local interface. 1263f67c834SDon Lewis * 1273f67c834SDon Lewis * XXX - ip_checkinterface must be disabled if you add IP aliases 1283f67c834SDon Lewis * to the loopback interface instead of the interface where the 1293f67c834SDon Lewis * packets for those addresses are received. 130823db0e9SDon Lewis */ 1313e288e62SDimitry Andric static VNET_DEFINE(int, ip_checkinterface); 13282cea7e6SBjoern A. Zeeb #define V_ip_checkinterface VNET(ip_checkinterface) 1336df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_VNET | CTLFLAG_RW, 134eddfbb76SRobert Watson &VNET_NAME(ip_checkinterface), 0, 1358b615593SMarko Zec "Verify packet arrives on correct interface"); 136b3e95d4eSJonathan Lemon 1370b4b0b0fSJulian Elischer VNET_DEFINE(struct pfil_head, inet_pfil_hook); /* Packet filter hooks */ 138df8bae1dSRodney W. Grimes 139d4b5cae4SRobert Watson static struct netisr_handler ip_nh = { 140d4b5cae4SRobert Watson .nh_name = "ip", 141d4b5cae4SRobert Watson .nh_handler = ip_input, 142d4b5cae4SRobert Watson .nh_proto = NETISR_IP, 143b8bc95cdSAdrian Chadd #ifdef RSS 1442527ccadSAdrian Chadd .nh_m2cpuid = rss_soft_m2cpuid_v4, 145b8bc95cdSAdrian Chadd .nh_policy = NETISR_POLICY_CPU, 146b8bc95cdSAdrian Chadd .nh_dispatch = NETISR_DISPATCH_HYBRID, 147b8bc95cdSAdrian Chadd #else 148d4b5cae4SRobert Watson .nh_policy = NETISR_POLICY_FLOW, 149b8bc95cdSAdrian Chadd #endif 150d4b5cae4SRobert Watson }; 151ca925d9cSJonathan Lemon 152b8bc95cdSAdrian Chadd #ifdef RSS 153b8bc95cdSAdrian Chadd /* 154b8bc95cdSAdrian Chadd * Directly dispatched frames are currently assumed 155b8bc95cdSAdrian Chadd * to have a flowid already calculated. 156b8bc95cdSAdrian Chadd * 157b8bc95cdSAdrian Chadd * It should likely have something that assert it 158b8bc95cdSAdrian Chadd * actually has valid flow details. 159b8bc95cdSAdrian Chadd */ 160b8bc95cdSAdrian Chadd static struct netisr_handler ip_direct_nh = { 161b8bc95cdSAdrian Chadd .nh_name = "ip_direct", 162b8bc95cdSAdrian Chadd .nh_handler = ip_direct_input, 163b8bc95cdSAdrian Chadd .nh_proto = NETISR_IP_DIRECT, 164499baf0aSAdrian Chadd .nh_m2cpuid = rss_soft_m2cpuid_v4, 165b8bc95cdSAdrian Chadd .nh_policy = NETISR_POLICY_CPU, 166b8bc95cdSAdrian Chadd .nh_dispatch = NETISR_DISPATCH_HYBRID, 167b8bc95cdSAdrian Chadd }; 168b8bc95cdSAdrian Chadd #endif 169b8bc95cdSAdrian Chadd 170df8bae1dSRodney W. Grimes extern struct domain inetdomain; 171f0ffb944SJulian Elischer extern struct protosw inetsw[]; 172df8bae1dSRodney W. Grimes u_char ip_protox[IPPROTO_MAX]; 17382cea7e6SBjoern A. Zeeb VNET_DEFINE(struct in_ifaddrhead, in_ifaddrhead); /* first inet address */ 17482cea7e6SBjoern A. Zeeb VNET_DEFINE(struct in_ifaddrhashhead *, in_ifaddrhashtbl); /* inet addr hash table */ 17582cea7e6SBjoern A. Zeeb VNET_DEFINE(u_long, in_ifaddrhmask); /* mask for hash table */ 176ca925d9cSJonathan Lemon 1770312fbe9SPoul-Henning Kamp #ifdef IPCTL_DEFMTU 1780312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW, 1793d177f46SBill Fumerola &ip_mtu, 0, "Default MTU"); 1800312fbe9SPoul-Henning Kamp #endif 1810312fbe9SPoul-Henning Kamp 1821b968362SDag-Erling Smørgrav #ifdef IPSTEALTH 18382cea7e6SBjoern A. Zeeb VNET_DEFINE(int, ipstealth); 1846df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_VNET | CTLFLAG_RW, 185eddfbb76SRobert Watson &VNET_NAME(ipstealth), 0, 186eddfbb76SRobert Watson "IP stealth mode, no TTL decrementation on forwarding"); 1871b968362SDag-Erling Smørgrav #endif 188eddfbb76SRobert Watson 189315e3e38SRobert Watson /* 1905da0521fSAndrey V. Elsukov * IP statistics are stored in the "array" of counter(9)s. 1915923c293SGleb Smirnoff */ 1925da0521fSAndrey V. Elsukov VNET_PCPUSTAT_DEFINE(struct ipstat, ipstat); 1935da0521fSAndrey V. Elsukov VNET_PCPUSTAT_SYSINIT(ipstat); 1945da0521fSAndrey V. Elsukov SYSCTL_VNET_PCPUSTAT(_net_inet_ip, IPCTL_STATS, stats, struct ipstat, ipstat, 1955da0521fSAndrey V. Elsukov "IP statistics (struct ipstat, netinet/ip_var.h)"); 1965923c293SGleb Smirnoff 1975923c293SGleb Smirnoff #ifdef VIMAGE 1985da0521fSAndrey V. Elsukov VNET_PCPUSTAT_SYSUNINIT(ipstat); 1995923c293SGleb Smirnoff #endif /* VIMAGE */ 2005923c293SGleb Smirnoff 2015923c293SGleb Smirnoff /* 202315e3e38SRobert Watson * Kernel module interface for updating ipstat. The argument is an index 2035923c293SGleb Smirnoff * into ipstat treated as an array. 204315e3e38SRobert Watson */ 205315e3e38SRobert Watson void 206315e3e38SRobert Watson kmod_ipstat_inc(int statnum) 207315e3e38SRobert Watson { 208315e3e38SRobert Watson 2095da0521fSAndrey V. Elsukov counter_u64_add(VNET(ipstat)[statnum], 1); 210315e3e38SRobert Watson } 211315e3e38SRobert Watson 212315e3e38SRobert Watson void 213315e3e38SRobert Watson kmod_ipstat_dec(int statnum) 214315e3e38SRobert Watson { 215315e3e38SRobert Watson 2165da0521fSAndrey V. Elsukov counter_u64_add(VNET(ipstat)[statnum], -1); 217315e3e38SRobert Watson } 218315e3e38SRobert Watson 219d4b5cae4SRobert Watson static int 220d4b5cae4SRobert Watson sysctl_netinet_intr_queue_maxlen(SYSCTL_HANDLER_ARGS) 221d4b5cae4SRobert Watson { 222d4b5cae4SRobert Watson int error, qlimit; 223d4b5cae4SRobert Watson 224d4b5cae4SRobert Watson netisr_getqlimit(&ip_nh, &qlimit); 225d4b5cae4SRobert Watson error = sysctl_handle_int(oidp, &qlimit, 0, req); 226d4b5cae4SRobert Watson if (error || !req->newptr) 227d4b5cae4SRobert Watson return (error); 228d4b5cae4SRobert Watson if (qlimit < 1) 229d4b5cae4SRobert Watson return (EINVAL); 230d4b5cae4SRobert Watson return (netisr_setqlimit(&ip_nh, qlimit)); 231d4b5cae4SRobert Watson } 232d4b5cae4SRobert Watson SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen, 233d4b5cae4SRobert Watson CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet_intr_queue_maxlen, "I", 234d4b5cae4SRobert Watson "Maximum size of the IP input queue"); 235d4b5cae4SRobert Watson 236d4b5cae4SRobert Watson static int 237d4b5cae4SRobert Watson sysctl_netinet_intr_queue_drops(SYSCTL_HANDLER_ARGS) 238d4b5cae4SRobert Watson { 239d4b5cae4SRobert Watson u_int64_t qdrops_long; 240d4b5cae4SRobert Watson int error, qdrops; 241d4b5cae4SRobert Watson 242d4b5cae4SRobert Watson netisr_getqdrops(&ip_nh, &qdrops_long); 243d4b5cae4SRobert Watson qdrops = qdrops_long; 244d4b5cae4SRobert Watson error = sysctl_handle_int(oidp, &qdrops, 0, req); 245d4b5cae4SRobert Watson if (error || !req->newptr) 246d4b5cae4SRobert Watson return (error); 247d4b5cae4SRobert Watson if (qdrops != 0) 248d4b5cae4SRobert Watson return (EINVAL); 249d4b5cae4SRobert Watson netisr_clearqdrops(&ip_nh); 250d4b5cae4SRobert Watson return (0); 251d4b5cae4SRobert Watson } 252d4b5cae4SRobert Watson 253d4b5cae4SRobert Watson SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops, 254d4b5cae4SRobert Watson CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_netinet_intr_queue_drops, "I", 255d4b5cae4SRobert Watson "Number of packets dropped from the IP input queue"); 256d4b5cae4SRobert Watson 257b8bc95cdSAdrian Chadd #ifdef RSS 258b8bc95cdSAdrian Chadd static int 259b8bc95cdSAdrian Chadd sysctl_netinet_intr_direct_queue_maxlen(SYSCTL_HANDLER_ARGS) 260b8bc95cdSAdrian Chadd { 261b8bc95cdSAdrian Chadd int error, qlimit; 262b8bc95cdSAdrian Chadd 263b8bc95cdSAdrian Chadd netisr_getqlimit(&ip_direct_nh, &qlimit); 264b8bc95cdSAdrian Chadd error = sysctl_handle_int(oidp, &qlimit, 0, req); 265b8bc95cdSAdrian Chadd if (error || !req->newptr) 266b8bc95cdSAdrian Chadd return (error); 267b8bc95cdSAdrian Chadd if (qlimit < 1) 268b8bc95cdSAdrian Chadd return (EINVAL); 269b8bc95cdSAdrian Chadd return (netisr_setqlimit(&ip_direct_nh, qlimit)); 270b8bc95cdSAdrian Chadd } 2717faa0d21SAndrey V. Elsukov SYSCTL_PROC(_net_inet_ip, IPCTL_INTRDQMAXLEN, intr_direct_queue_maxlen, 2727faa0d21SAndrey V. Elsukov CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet_intr_direct_queue_maxlen, 2737faa0d21SAndrey V. Elsukov "I", "Maximum size of the IP direct input queue"); 274b8bc95cdSAdrian Chadd 275b8bc95cdSAdrian Chadd static int 276b8bc95cdSAdrian Chadd sysctl_netinet_intr_direct_queue_drops(SYSCTL_HANDLER_ARGS) 277b8bc95cdSAdrian Chadd { 278b8bc95cdSAdrian Chadd u_int64_t qdrops_long; 279b8bc95cdSAdrian Chadd int error, qdrops; 280b8bc95cdSAdrian Chadd 281b8bc95cdSAdrian Chadd netisr_getqdrops(&ip_direct_nh, &qdrops_long); 282b8bc95cdSAdrian Chadd qdrops = qdrops_long; 283b8bc95cdSAdrian Chadd error = sysctl_handle_int(oidp, &qdrops, 0, req); 284b8bc95cdSAdrian Chadd if (error || !req->newptr) 285b8bc95cdSAdrian Chadd return (error); 286b8bc95cdSAdrian Chadd if (qdrops != 0) 287b8bc95cdSAdrian Chadd return (EINVAL); 288b8bc95cdSAdrian Chadd netisr_clearqdrops(&ip_direct_nh); 289b8bc95cdSAdrian Chadd return (0); 290b8bc95cdSAdrian Chadd } 291b8bc95cdSAdrian Chadd 2927faa0d21SAndrey V. Elsukov SYSCTL_PROC(_net_inet_ip, IPCTL_INTRDQDROPS, intr_direct_queue_drops, 293b8bc95cdSAdrian Chadd CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_netinet_intr_direct_queue_drops, "I", 294b8bc95cdSAdrian Chadd "Number of packets dropped from the IP direct input queue"); 295b8bc95cdSAdrian Chadd #endif /* RSS */ 296b8bc95cdSAdrian Chadd 297df8bae1dSRodney W. Grimes /* 298df8bae1dSRodney W. Grimes * IP initialization: fill in IP protocol switch table. 299df8bae1dSRodney W. Grimes * All protocols not implemented in kernel go to raw IP protocol handler. 300df8bae1dSRodney W. Grimes */ 301df8bae1dSRodney W. Grimes void 302f2565d68SRobert Watson ip_init(void) 303df8bae1dSRodney W. Grimes { 304f2565d68SRobert Watson struct protosw *pr; 305f2565d68SRobert Watson int i; 306df8bae1dSRodney W. Grimes 307d7c5a620SMatt Macy CK_STAILQ_INIT(&V_in_ifaddrhead); 308603724d3SBjoern A. Zeeb V_in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &V_in_ifaddrhmask); 3091ed81b73SMarko Zec 3101ed81b73SMarko Zec /* Initialize IP reassembly queue. */ 3111dbefcc0SGleb Smirnoff ipreass_init(); 3121ed81b73SMarko Zec 3130b4b0b0fSJulian Elischer /* Initialize packet filter hooks. */ 3140b4b0b0fSJulian Elischer V_inet_pfil_hook.ph_type = PFIL_TYPE_AF; 3150b4b0b0fSJulian Elischer V_inet_pfil_hook.ph_af = AF_INET; 3160b4b0b0fSJulian Elischer if ((i = pfil_head_register(&V_inet_pfil_hook)) != 0) 3170b4b0b0fSJulian Elischer printf("%s: WARNING: unable to register pfil hook, " 3180b4b0b0fSJulian Elischer "error %d\n", __func__, i); 3190b4b0b0fSJulian Elischer 320ef91a976SAndrey V. Elsukov if (hhook_head_register(HHOOK_TYPE_IPSEC_IN, AF_INET, 321ef91a976SAndrey V. Elsukov &V_ipsec_hhh_in[HHOOK_IPSEC_INET], 322ef91a976SAndrey V. Elsukov HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0) 323ef91a976SAndrey V. Elsukov printf("%s: WARNING: unable to register input helper hook\n", 324ef91a976SAndrey V. Elsukov __func__); 325ef91a976SAndrey V. Elsukov if (hhook_head_register(HHOOK_TYPE_IPSEC_OUT, AF_INET, 326ef91a976SAndrey V. Elsukov &V_ipsec_hhh_out[HHOOK_IPSEC_INET], 327ef91a976SAndrey V. Elsukov HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0) 328ef91a976SAndrey V. Elsukov printf("%s: WARNING: unable to register output helper hook\n", 329ef91a976SAndrey V. Elsukov __func__); 330ef91a976SAndrey V. Elsukov 3311ed81b73SMarko Zec /* Skip initialization of globals for non-default instances. */ 332484149deSBjoern A. Zeeb #ifdef VIMAGE 333484149deSBjoern A. Zeeb if (!IS_DEFAULT_VNET(curvnet)) { 334484149deSBjoern A. Zeeb netisr_register_vnet(&ip_nh); 335484149deSBjoern A. Zeeb #ifdef RSS 336484149deSBjoern A. Zeeb netisr_register_vnet(&ip_direct_nh); 337484149deSBjoern A. Zeeb #endif 3381ed81b73SMarko Zec return; 339484149deSBjoern A. Zeeb } 340484149deSBjoern A. Zeeb #endif 3411ed81b73SMarko Zec 342f0ffb944SJulian Elischer pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 34302410549SRobert Watson if (pr == NULL) 344db09bef3SAndre Oppermann panic("ip_init: PF_INET not found"); 345db09bef3SAndre Oppermann 346db09bef3SAndre Oppermann /* Initialize the entire ip_protox[] array to IPPROTO_RAW. */ 347df8bae1dSRodney W. Grimes for (i = 0; i < IPPROTO_MAX; i++) 348df8bae1dSRodney W. Grimes ip_protox[i] = pr - inetsw; 349db09bef3SAndre Oppermann /* 350db09bef3SAndre Oppermann * Cycle through IP protocols and put them into the appropriate place 351db09bef3SAndre Oppermann * in ip_protox[]. 352db09bef3SAndre Oppermann */ 353f0ffb944SJulian Elischer for (pr = inetdomain.dom_protosw; 354f0ffb944SJulian Elischer pr < inetdomain.dom_protoswNPROTOSW; pr++) 355df8bae1dSRodney W. Grimes if (pr->pr_domain->dom_family == PF_INET && 356db09bef3SAndre Oppermann pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) { 357db09bef3SAndre Oppermann /* Be careful to only index valid IP protocols. */ 358db77984cSSam Leffler if (pr->pr_protocol < IPPROTO_MAX) 359df8bae1dSRodney W. Grimes ip_protox[pr->pr_protocol] = pr - inetsw; 360db09bef3SAndre Oppermann } 361194a213eSAndrey A. Chernov 362d4b5cae4SRobert Watson netisr_register(&ip_nh); 363b8bc95cdSAdrian Chadd #ifdef RSS 364b8bc95cdSAdrian Chadd netisr_register(&ip_direct_nh); 365b8bc95cdSAdrian Chadd #endif 366df8bae1dSRodney W. Grimes } 367df8bae1dSRodney W. Grimes 3689802380eSBjoern A. Zeeb #ifdef VIMAGE 3693f58662dSBjoern A. Zeeb static void 3703f58662dSBjoern A. Zeeb ip_destroy(void *unused __unused) 3719802380eSBjoern A. Zeeb { 37289856f7eSBjoern A. Zeeb struct ifnet *ifp; 373ef91a976SAndrey V. Elsukov int error; 3744d3dfd45SMikolaj Golub 375484149deSBjoern A. Zeeb #ifdef RSS 376484149deSBjoern A. Zeeb netisr_unregister_vnet(&ip_direct_nh); 377484149deSBjoern A. Zeeb #endif 378484149deSBjoern A. Zeeb netisr_unregister_vnet(&ip_nh); 379484149deSBjoern A. Zeeb 380ef91a976SAndrey V. Elsukov if ((error = pfil_head_unregister(&V_inet_pfil_hook)) != 0) 3814d3dfd45SMikolaj Golub printf("%s: WARNING: unable to unregister pfil hook, " 382ef91a976SAndrey V. Elsukov "error %d\n", __func__, error); 3839802380eSBjoern A. Zeeb 384ef91a976SAndrey V. Elsukov error = hhook_head_deregister(V_ipsec_hhh_in[HHOOK_IPSEC_INET]); 385ef91a976SAndrey V. Elsukov if (error != 0) { 386ef91a976SAndrey V. Elsukov printf("%s: WARNING: unable to deregister input helper hook " 387ef91a976SAndrey V. Elsukov "type HHOOK_TYPE_IPSEC_IN, id HHOOK_IPSEC_INET: " 388ef91a976SAndrey V. Elsukov "error %d returned\n", __func__, error); 389ef91a976SAndrey V. Elsukov } 390ef91a976SAndrey V. Elsukov error = hhook_head_deregister(V_ipsec_hhh_out[HHOOK_IPSEC_INET]); 391ef91a976SAndrey V. Elsukov if (error != 0) { 392ef91a976SAndrey V. Elsukov printf("%s: WARNING: unable to deregister output helper hook " 393ef91a976SAndrey V. Elsukov "type HHOOK_TYPE_IPSEC_OUT, id HHOOK_IPSEC_INET: " 394ef91a976SAndrey V. Elsukov "error %d returned\n", __func__, error); 395ef91a976SAndrey V. Elsukov } 39689856f7eSBjoern A. Zeeb 39789856f7eSBjoern A. Zeeb /* Remove the IPv4 addresses from all interfaces. */ 39889856f7eSBjoern A. Zeeb in_ifscrub_all(); 39989856f7eSBjoern A. Zeeb 40089856f7eSBjoern A. Zeeb /* Make sure the IPv4 routes are gone as well. */ 40189856f7eSBjoern A. Zeeb IFNET_RLOCK(); 402*4f6c66ccSMatt Macy CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) 40389856f7eSBjoern A. Zeeb rt_flushifroutes_af(ifp, AF_INET); 40489856f7eSBjoern A. Zeeb IFNET_RUNLOCK(); 4059802380eSBjoern A. Zeeb 406e3c2c634SGleb Smirnoff /* Destroy IP reassembly queue. */ 4071dbefcc0SGleb Smirnoff ipreass_destroy(); 40889856f7eSBjoern A. Zeeb 40989856f7eSBjoern A. Zeeb /* Cleanup in_ifaddr hash table; should be empty. */ 41089856f7eSBjoern A. Zeeb hashdestroy(V_in_ifaddrhashtbl, M_IFADDR, V_in_ifaddrhmask); 4119802380eSBjoern A. Zeeb } 4123f58662dSBjoern A. Zeeb 4133f58662dSBjoern A. Zeeb VNET_SYSUNINIT(ip, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, ip_destroy, NULL); 4149802380eSBjoern A. Zeeb #endif 4159802380eSBjoern A. Zeeb 416b8bc95cdSAdrian Chadd #ifdef RSS 417b8bc95cdSAdrian Chadd /* 418b8bc95cdSAdrian Chadd * IP direct input routine. 419b8bc95cdSAdrian Chadd * 420b8bc95cdSAdrian Chadd * This is called when reinjecting completed fragments where 421b8bc95cdSAdrian Chadd * all of the previous checking and book-keeping has been done. 422b8bc95cdSAdrian Chadd */ 423b8bc95cdSAdrian Chadd void 424b8bc95cdSAdrian Chadd ip_direct_input(struct mbuf *m) 425b8bc95cdSAdrian Chadd { 426b8bc95cdSAdrian Chadd struct ip *ip; 427b8bc95cdSAdrian Chadd int hlen; 428b8bc95cdSAdrian Chadd 429b8bc95cdSAdrian Chadd ip = mtod(m, struct ip *); 430b8bc95cdSAdrian Chadd hlen = ip->ip_hl << 2; 431b8bc95cdSAdrian Chadd 432fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT) 433fcf59617SAndrey V. Elsukov if (IPSEC_ENABLED(ipv4)) { 434fcf59617SAndrey V. Elsukov if (IPSEC_INPUT(ipv4, m, hlen, ip->ip_p) != 0) 435fcf59617SAndrey V. Elsukov return; 436fcf59617SAndrey V. Elsukov } 437fcf59617SAndrey V. Elsukov #endif /* IPSEC */ 438b8bc95cdSAdrian Chadd IPSTAT_INC(ips_delivered); 439b8bc95cdSAdrian Chadd (*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p); 440b8bc95cdSAdrian Chadd return; 441b8bc95cdSAdrian Chadd } 442b8bc95cdSAdrian Chadd #endif 443b8bc95cdSAdrian Chadd 4444d2e3692SLuigi Rizzo /* 445df8bae1dSRodney W. Grimes * Ip input routine. Checksum and byte swap header. If fragmented 446df8bae1dSRodney W. Grimes * try to reassemble. Process options. Pass to next level. 447df8bae1dSRodney W. Grimes */ 448c67b1d17SGarrett Wollman void 449c67b1d17SGarrett Wollman ip_input(struct mbuf *m) 450df8bae1dSRodney W. Grimes { 4519188b4a1SAndre Oppermann struct ip *ip = NULL; 4525da9f8faSJosef Karthauser struct in_ifaddr *ia = NULL; 453ca925d9cSJonathan Lemon struct ifaddr *ifa; 4540aade26eSRobert Watson struct ifnet *ifp; 4559b932e9eSAndre Oppermann int checkif, hlen = 0; 45621d172a3SGleb Smirnoff uint16_t sum, ip_len; 45702c1c707SAndre Oppermann int dchg = 0; /* dest changed after fw */ 458f51f805fSSam Leffler struct in_addr odst; /* original dst address */ 459b715f178SLuigi Rizzo 460fe584538SDag-Erling Smørgrav M_ASSERTPKTHDR(m); 461db40007dSAndrew R. Reiter 462ac9d7e26SMax Laier if (m->m_flags & M_FASTFWD_OURS) { 46376ff6dcfSAndre Oppermann m->m_flags &= ~M_FASTFWD_OURS; 46476ff6dcfSAndre Oppermann /* Set up some basics that will be used later. */ 4652b25acc1SLuigi Rizzo ip = mtod(m, struct ip *); 46653be11f6SPoul-Henning Kamp hlen = ip->ip_hl << 2; 4678f134647SGleb Smirnoff ip_len = ntohs(ip->ip_len); 4689b932e9eSAndre Oppermann goto ours; 4692b25acc1SLuigi Rizzo } 4702b25acc1SLuigi Rizzo 47186425c62SRobert Watson IPSTAT_INC(ips_total); 47258938916SGarrett Wollman 47358938916SGarrett Wollman if (m->m_pkthdr.len < sizeof(struct ip)) 47458938916SGarrett Wollman goto tooshort; 47558938916SGarrett Wollman 476df8bae1dSRodney W. Grimes if (m->m_len < sizeof (struct ip) && 4770b17fba7SAndre Oppermann (m = m_pullup(m, sizeof (struct ip))) == NULL) { 47886425c62SRobert Watson IPSTAT_INC(ips_toosmall); 479c67b1d17SGarrett Wollman return; 480df8bae1dSRodney W. Grimes } 481df8bae1dSRodney W. Grimes ip = mtod(m, struct ip *); 48258938916SGarrett Wollman 48353be11f6SPoul-Henning Kamp if (ip->ip_v != IPVERSION) { 48486425c62SRobert Watson IPSTAT_INC(ips_badvers); 485df8bae1dSRodney W. Grimes goto bad; 486df8bae1dSRodney W. Grimes } 48758938916SGarrett Wollman 48853be11f6SPoul-Henning Kamp hlen = ip->ip_hl << 2; 489df8bae1dSRodney W. Grimes if (hlen < sizeof(struct ip)) { /* minimum header length */ 49086425c62SRobert Watson IPSTAT_INC(ips_badhlen); 491df8bae1dSRodney W. Grimes goto bad; 492df8bae1dSRodney W. Grimes } 493df8bae1dSRodney W. Grimes if (hlen > m->m_len) { 4940b17fba7SAndre Oppermann if ((m = m_pullup(m, hlen)) == NULL) { 49586425c62SRobert Watson IPSTAT_INC(ips_badhlen); 496c67b1d17SGarrett Wollman return; 497df8bae1dSRodney W. Grimes } 498df8bae1dSRodney W. Grimes ip = mtod(m, struct ip *); 499df8bae1dSRodney W. Grimes } 50033841545SHajimu UMEMOTO 50157f60867SMark Johnston IP_PROBE(receive, NULL, NULL, ip, m->m_pkthdr.rcvif, ip, NULL); 50257f60867SMark Johnston 50333841545SHajimu UMEMOTO /* 127/8 must not appear on wire - RFC1122 */ 5040aade26eSRobert Watson ifp = m->m_pkthdr.rcvif; 50533841545SHajimu UMEMOTO if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || 50633841545SHajimu UMEMOTO (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { 5070aade26eSRobert Watson if ((ifp->if_flags & IFF_LOOPBACK) == 0) { 50886425c62SRobert Watson IPSTAT_INC(ips_badaddr); 50933841545SHajimu UMEMOTO goto bad; 51033841545SHajimu UMEMOTO } 51133841545SHajimu UMEMOTO } 51233841545SHajimu UMEMOTO 513db4f9cc7SJonathan Lemon if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) { 514db4f9cc7SJonathan Lemon sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID); 515db4f9cc7SJonathan Lemon } else { 51658938916SGarrett Wollman if (hlen == sizeof(struct ip)) { 51747c861ecSBrian Somers sum = in_cksum_hdr(ip); 51858938916SGarrett Wollman } else { 51947c861ecSBrian Somers sum = in_cksum(m, hlen); 52058938916SGarrett Wollman } 521db4f9cc7SJonathan Lemon } 52247c861ecSBrian Somers if (sum) { 52386425c62SRobert Watson IPSTAT_INC(ips_badsum); 524df8bae1dSRodney W. Grimes goto bad; 525df8bae1dSRodney W. Grimes } 526df8bae1dSRodney W. Grimes 52702b199f1SMax Laier #ifdef ALTQ 52802b199f1SMax Laier if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0) 52902b199f1SMax Laier /* packet is dropped by traffic conditioner */ 53002b199f1SMax Laier return; 53102b199f1SMax Laier #endif 53202b199f1SMax Laier 53321d172a3SGleb Smirnoff ip_len = ntohs(ip->ip_len); 53421d172a3SGleb Smirnoff if (ip_len < hlen) { 53586425c62SRobert Watson IPSTAT_INC(ips_badlen); 536df8bae1dSRodney W. Grimes goto bad; 537df8bae1dSRodney W. Grimes } 538df8bae1dSRodney W. Grimes 539df8bae1dSRodney W. Grimes /* 540df8bae1dSRodney W. Grimes * Check that the amount of data in the buffers 541df8bae1dSRodney W. Grimes * is as at least much as the IP header would have us expect. 542df8bae1dSRodney W. Grimes * Trim mbufs if longer than we expect. 543df8bae1dSRodney W. Grimes * Drop packet if shorter than we expect. 544df8bae1dSRodney W. Grimes */ 54521d172a3SGleb Smirnoff if (m->m_pkthdr.len < ip_len) { 54658938916SGarrett Wollman tooshort: 54786425c62SRobert Watson IPSTAT_INC(ips_tooshort); 548df8bae1dSRodney W. Grimes goto bad; 549df8bae1dSRodney W. Grimes } 55021d172a3SGleb Smirnoff if (m->m_pkthdr.len > ip_len) { 551df8bae1dSRodney W. Grimes if (m->m_len == m->m_pkthdr.len) { 55221d172a3SGleb Smirnoff m->m_len = ip_len; 55321d172a3SGleb Smirnoff m->m_pkthdr.len = ip_len; 554df8bae1dSRodney W. Grimes } else 55521d172a3SGleb Smirnoff m_adj(m, ip_len - m->m_pkthdr.len); 556df8bae1dSRodney W. Grimes } 557b8bc95cdSAdrian Chadd 558ad9f4d6aSAndrey V. Elsukov /* 559ad9f4d6aSAndrey V. Elsukov * Try to forward the packet, but if we fail continue. 560ad9f4d6aSAndrey V. Elsukov * ip_tryforward() does inbound and outbound packet firewall 561ad9f4d6aSAndrey V. Elsukov * processing. If firewall has decided that destination becomes 562ad9f4d6aSAndrey V. Elsukov * our local address, it sets M_FASTFWD_OURS flag. In this 563ad9f4d6aSAndrey V. Elsukov * case skip another inbound firewall processing and update 564ad9f4d6aSAndrey V. Elsukov * ip pointer. 565ad9f4d6aSAndrey V. Elsukov */ 566ad9f4d6aSAndrey V. Elsukov if (V_ipforwarding != 0 567fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT) 568fcf59617SAndrey V. Elsukov && (!IPSEC_ENABLED(ipv4) || 569fcf59617SAndrey V. Elsukov IPSEC_CAPS(ipv4, m, IPSEC_CAP_OPERABLE) == 0) 570ad9f4d6aSAndrey V. Elsukov #endif 571ad9f4d6aSAndrey V. Elsukov ) { 572ad9f4d6aSAndrey V. Elsukov if ((m = ip_tryforward(m)) == NULL) 57333872124SGeorge V. Neville-Neil return; 574ad9f4d6aSAndrey V. Elsukov if (m->m_flags & M_FASTFWD_OURS) { 575ad9f4d6aSAndrey V. Elsukov m->m_flags &= ~M_FASTFWD_OURS; 576ad9f4d6aSAndrey V. Elsukov ip = mtod(m, struct ip *); 577ad9f4d6aSAndrey V. Elsukov goto ours; 578ad9f4d6aSAndrey V. Elsukov } 579ad9f4d6aSAndrey V. Elsukov } 580fcf59617SAndrey V. Elsukov 581fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT) 58214dd6717SSam Leffler /* 583ffe8cd7bSBjoern A. Zeeb * Bypass packet filtering for packets previously handled by IPsec. 58414dd6717SSam Leffler */ 585fcf59617SAndrey V. Elsukov if (IPSEC_ENABLED(ipv4) && 586fcf59617SAndrey V. Elsukov IPSEC_CAPS(ipv4, m, IPSEC_CAP_BYPASS_FILTER) != 0) 587c21fd232SAndre Oppermann goto passin; 588ad9f4d6aSAndrey V. Elsukov #endif 589fcf59617SAndrey V. Elsukov 590c4ac87eaSDarren Reed /* 591134ea224SSam Leffler * Run through list of hooks for input packets. 592f51f805fSSam Leffler * 593f51f805fSSam Leffler * NB: Beware of the destination address changing (e.g. 594f51f805fSSam Leffler * by NAT rewriting). When this happens, tell 595f51f805fSSam Leffler * ip_forward to do the right thing. 596c4ac87eaSDarren Reed */ 597c21fd232SAndre Oppermann 598c21fd232SAndre Oppermann /* Jump over all PFIL processing if hooks are not active. */ 5990b4b0b0fSJulian Elischer if (!PFIL_HOOKED(&V_inet_pfil_hook)) 600c21fd232SAndre Oppermann goto passin; 601c21fd232SAndre Oppermann 602f51f805fSSam Leffler odst = ip->ip_dst; 603effaab88SKristof Provost if (pfil_run_hooks(&V_inet_pfil_hook, &m, ifp, PFIL_IN, 0, NULL) != 0) 604beec8214SDarren Reed return; 605134ea224SSam Leffler if (m == NULL) /* consumed by filter */ 606c4ac87eaSDarren Reed return; 6079b932e9eSAndre Oppermann 608c4ac87eaSDarren Reed ip = mtod(m, struct ip *); 60902c1c707SAndre Oppermann dchg = (odst.s_addr != ip->ip_dst.s_addr); 6100aade26eSRobert Watson ifp = m->m_pkthdr.rcvif; 6119b932e9eSAndre Oppermann 6129b932e9eSAndre Oppermann if (m->m_flags & M_FASTFWD_OURS) { 6139b932e9eSAndre Oppermann m->m_flags &= ~M_FASTFWD_OURS; 6149b932e9eSAndre Oppermann goto ours; 6159b932e9eSAndre Oppermann } 616ffdbf9daSAndrey V. Elsukov if (m->m_flags & M_IP_NEXTHOP) { 617de89d74bSLuiz Otavio O Souza if (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL) { 618099dd043SAndre Oppermann /* 619ffdbf9daSAndrey V. Elsukov * Directly ship the packet on. This allows 620ffdbf9daSAndrey V. Elsukov * forwarding packets originally destined to us 621ffdbf9daSAndrey V. Elsukov * to some other directly connected host. 622099dd043SAndre Oppermann */ 623ffdbf9daSAndrey V. Elsukov ip_forward(m, 1); 624099dd043SAndre Oppermann return; 625099dd043SAndre Oppermann } 626ffdbf9daSAndrey V. Elsukov } 627c21fd232SAndre Oppermann passin: 62821d172a3SGleb Smirnoff 62921d172a3SGleb Smirnoff /* 630df8bae1dSRodney W. Grimes * Process options and, if not destined for us, 631df8bae1dSRodney W. Grimes * ship it on. ip_dooptions returns 1 when an 632df8bae1dSRodney W. Grimes * error was detected (causing an icmp message 633df8bae1dSRodney W. Grimes * to be sent and the original packet to be freed). 634df8bae1dSRodney W. Grimes */ 6359b932e9eSAndre Oppermann if (hlen > sizeof (struct ip) && ip_dooptions(m, 0)) 636c67b1d17SGarrett Wollman return; 637df8bae1dSRodney W. Grimes 638f0068c4aSGarrett Wollman /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no 639f0068c4aSGarrett Wollman * matter if it is destined to another node, or whether it is 640f0068c4aSGarrett Wollman * a multicast one, RSVP wants it! and prevents it from being forwarded 641f0068c4aSGarrett Wollman * anywhere else. Also checks if the rsvp daemon is running before 642f0068c4aSGarrett Wollman * grabbing the packet. 643f0068c4aSGarrett Wollman */ 644603724d3SBjoern A. Zeeb if (V_rsvp_on && ip->ip_p==IPPROTO_RSVP) 645f0068c4aSGarrett Wollman goto ours; 646f0068c4aSGarrett Wollman 647df8bae1dSRodney W. Grimes /* 648df8bae1dSRodney W. Grimes * Check our list of addresses, to see if the packet is for us. 649cc766e04SGarrett Wollman * If we don't have any addresses, assume any unicast packet 650cc766e04SGarrett Wollman * we receive might be for us (and let the upper layers deal 651cc766e04SGarrett Wollman * with it). 652df8bae1dSRodney W. Grimes */ 653d7c5a620SMatt Macy if (CK_STAILQ_EMPTY(&V_in_ifaddrhead) && 654cc766e04SGarrett Wollman (m->m_flags & (M_MCAST|M_BCAST)) == 0) 655cc766e04SGarrett Wollman goto ours; 656cc766e04SGarrett Wollman 6577538a9a0SJonathan Lemon /* 658823db0e9SDon Lewis * Enable a consistency check between the destination address 659823db0e9SDon Lewis * and the arrival interface for a unicast packet (the RFC 1122 660823db0e9SDon Lewis * strong ES model) if IP forwarding is disabled and the packet 661e15ae1b2SDon Lewis * is not locally generated and the packet is not subject to 662e15ae1b2SDon Lewis * 'ipfw fwd'. 6633f67c834SDon Lewis * 6643f67c834SDon Lewis * XXX - Checking also should be disabled if the destination 6653f67c834SDon Lewis * address is ipnat'ed to a different interface. 6663f67c834SDon Lewis * 667a8f12100SDon Lewis * XXX - Checking is incompatible with IP aliases added 6683f67c834SDon Lewis * to the loopback interface instead of the interface where 6693f67c834SDon Lewis * the packets are received. 670a9771948SGleb Smirnoff * 671a9771948SGleb Smirnoff * XXX - This is the case for carp vhost IPs as well so we 672a9771948SGleb Smirnoff * insert a workaround. If the packet got here, we already 673a9771948SGleb Smirnoff * checked with carp_iamatch() and carp_forus(). 674823db0e9SDon Lewis */ 675603724d3SBjoern A. Zeeb checkif = V_ip_checkinterface && (V_ipforwarding == 0) && 6760aade26eSRobert Watson ifp != NULL && ((ifp->if_flags & IFF_LOOPBACK) == 0) && 67754bfbd51SWill Andrews ifp->if_carp == NULL && (dchg == 0); 678823db0e9SDon Lewis 679ca925d9cSJonathan Lemon /* 680ca925d9cSJonathan Lemon * Check for exact addresses in the hash bucket. 681ca925d9cSJonathan Lemon */ 6822d9cfabaSRobert Watson /* IN_IFADDR_RLOCK(); */ 6839b932e9eSAndre Oppermann LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) { 684f9e354dfSJulian Elischer /* 685823db0e9SDon Lewis * If the address matches, verify that the packet 686823db0e9SDon Lewis * arrived via the correct interface if checking is 687823db0e9SDon Lewis * enabled. 688f9e354dfSJulian Elischer */ 6899b932e9eSAndre Oppermann if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr && 6908c0fec80SRobert Watson (!checkif || ia->ia_ifp == ifp)) { 6917caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ipackets, 1); 6927caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ibytes, 6937caf4ab7SGleb Smirnoff m->m_pkthdr.len); 6942d9cfabaSRobert Watson /* IN_IFADDR_RUNLOCK(); */ 695ed1ff184SJulian Elischer goto ours; 696ca925d9cSJonathan Lemon } 6978c0fec80SRobert Watson } 6982d9cfabaSRobert Watson /* IN_IFADDR_RUNLOCK(); */ 6992d9cfabaSRobert Watson 700823db0e9SDon Lewis /* 701ca925d9cSJonathan Lemon * Check for broadcast addresses. 702ca925d9cSJonathan Lemon * 703ca925d9cSJonathan Lemon * Only accept broadcast packets that arrive via the matching 704ca925d9cSJonathan Lemon * interface. Reception of forwarded directed broadcasts would 705ca925d9cSJonathan Lemon * be handled via ip_forward() and ether_output() with the loopback 706ca925d9cSJonathan Lemon * into the stack for SIMPLEX interfaces handled by ether_output(). 707823db0e9SDon Lewis */ 7080aade26eSRobert Watson if (ifp != NULL && ifp->if_flags & IFF_BROADCAST) { 709137f91e8SJohn Baldwin IF_ADDR_RLOCK(ifp); 710d7c5a620SMatt Macy CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 711ca925d9cSJonathan Lemon if (ifa->ifa_addr->sa_family != AF_INET) 712ca925d9cSJonathan Lemon continue; 713ca925d9cSJonathan Lemon ia = ifatoia(ifa); 714df8bae1dSRodney W. Grimes if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr == 7150aade26eSRobert Watson ip->ip_dst.s_addr) { 7167caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ipackets, 1); 7177caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ibytes, 7187caf4ab7SGleb Smirnoff m->m_pkthdr.len); 719137f91e8SJohn Baldwin IF_ADDR_RUNLOCK(ifp); 720df8bae1dSRodney W. Grimes goto ours; 7210aade26eSRobert Watson } 7220ac40133SBrian Somers #ifdef BOOTP_COMPAT 7230aade26eSRobert Watson if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY) { 7247caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ipackets, 1); 7257caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ibytes, 7267caf4ab7SGleb Smirnoff m->m_pkthdr.len); 727137f91e8SJohn Baldwin IF_ADDR_RUNLOCK(ifp); 728ca925d9cSJonathan Lemon goto ours; 7290aade26eSRobert Watson } 7300ac40133SBrian Somers #endif 731df8bae1dSRodney W. Grimes } 732137f91e8SJohn Baldwin IF_ADDR_RUNLOCK(ifp); 73319e5b0a7SRobert Watson ia = NULL; 734df8bae1dSRodney W. Grimes } 735f8429ca2SBruce M Simpson /* RFC 3927 2.7: Do not forward datagrams for 169.254.0.0/16. */ 736f8429ca2SBruce M Simpson if (IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))) { 73786425c62SRobert Watson IPSTAT_INC(ips_cantforward); 738f8429ca2SBruce M Simpson m_freem(m); 739f8429ca2SBruce M Simpson return; 740f8429ca2SBruce M Simpson } 741df8bae1dSRodney W. Grimes if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { 742603724d3SBjoern A. Zeeb if (V_ip_mrouter) { 743df8bae1dSRodney W. Grimes /* 744df8bae1dSRodney W. Grimes * If we are acting as a multicast router, all 745df8bae1dSRodney W. Grimes * incoming multicast packets are passed to the 746df8bae1dSRodney W. Grimes * kernel-level multicast forwarding function. 747df8bae1dSRodney W. Grimes * The packet is returned (relatively) intact; if 748df8bae1dSRodney W. Grimes * ip_mforward() returns a non-zero value, the packet 749df8bae1dSRodney W. Grimes * must be discarded, else it may be accepted below. 750df8bae1dSRodney W. Grimes */ 7510aade26eSRobert Watson if (ip_mforward && ip_mforward(ip, ifp, m, 0) != 0) { 75286425c62SRobert Watson IPSTAT_INC(ips_cantforward); 753df8bae1dSRodney W. Grimes m_freem(m); 754c67b1d17SGarrett Wollman return; 755df8bae1dSRodney W. Grimes } 756df8bae1dSRodney W. Grimes 757df8bae1dSRodney W. Grimes /* 75811612afaSDima Dorfman * The process-level routing daemon needs to receive 759df8bae1dSRodney W. Grimes * all multicast IGMP packets, whether or not this 760df8bae1dSRodney W. Grimes * host belongs to their destination groups. 761df8bae1dSRodney W. Grimes */ 762df8bae1dSRodney W. Grimes if (ip->ip_p == IPPROTO_IGMP) 763df8bae1dSRodney W. Grimes goto ours; 76486425c62SRobert Watson IPSTAT_INC(ips_forward); 765df8bae1dSRodney W. Grimes } 766df8bae1dSRodney W. Grimes /* 767d10910e6SBruce M Simpson * Assume the packet is for us, to avoid prematurely taking 768d10910e6SBruce M Simpson * a lock on the in_multi hash. Protocols must perform 769d10910e6SBruce M Simpson * their own filtering and update statistics accordingly. 770df8bae1dSRodney W. Grimes */ 771df8bae1dSRodney W. Grimes goto ours; 772df8bae1dSRodney W. Grimes } 773df8bae1dSRodney W. Grimes if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST) 774df8bae1dSRodney W. Grimes goto ours; 775df8bae1dSRodney W. Grimes if (ip->ip_dst.s_addr == INADDR_ANY) 776df8bae1dSRodney W. Grimes goto ours; 777df8bae1dSRodney W. Grimes 7786a800098SYoshinobu Inoue /* 779df8bae1dSRodney W. Grimes * Not for us; forward if possible and desirable. 780df8bae1dSRodney W. Grimes */ 781603724d3SBjoern A. Zeeb if (V_ipforwarding == 0) { 78286425c62SRobert Watson IPSTAT_INC(ips_cantforward); 783df8bae1dSRodney W. Grimes m_freem(m); 784546f251bSChris D. Faulhaber } else { 7859b932e9eSAndre Oppermann ip_forward(m, dchg); 786546f251bSChris D. Faulhaber } 787c67b1d17SGarrett Wollman return; 788df8bae1dSRodney W. Grimes 789df8bae1dSRodney W. Grimes ours: 790d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH 791d0ebc0d2SYaroslav Tykhiy /* 792d0ebc0d2SYaroslav Tykhiy * IPSTEALTH: Process non-routing options only 793d0ebc0d2SYaroslav Tykhiy * if the packet is destined for us. 794d0ebc0d2SYaroslav Tykhiy */ 7957caf4ab7SGleb Smirnoff if (V_ipstealth && hlen > sizeof (struct ip) && ip_dooptions(m, 1)) 796d0ebc0d2SYaroslav Tykhiy return; 797d0ebc0d2SYaroslav Tykhiy #endif /* IPSTEALTH */ 798d0ebc0d2SYaroslav Tykhiy 79963f8d699SJordan K. Hubbard /* 800b6ea1aa5SRuslan Ermilov * Attempt reassembly; if it succeeds, proceed. 801ac9d7e26SMax Laier * ip_reass() will return a different mbuf. 802df8bae1dSRodney W. Grimes */ 8038f134647SGleb Smirnoff if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) { 804aa69c612SGleb Smirnoff /* XXXGL: shouldn't we save & set m_flags? */ 805f0cada84SAndre Oppermann m = ip_reass(m); 806f0cada84SAndre Oppermann if (m == NULL) 807c67b1d17SGarrett Wollman return; 8086a800098SYoshinobu Inoue ip = mtod(m, struct ip *); 8097e2df452SRuslan Ermilov /* Get the header length of the reassembled packet */ 81053be11f6SPoul-Henning Kamp hlen = ip->ip_hl << 2; 811f0cada84SAndre Oppermann } 812f0cada84SAndre Oppermann 813fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT) 814fcf59617SAndrey V. Elsukov if (IPSEC_ENABLED(ipv4)) { 815fcf59617SAndrey V. Elsukov if (IPSEC_INPUT(ipv4, m, hlen, ip->ip_p) != 0) 816fcf59617SAndrey V. Elsukov return; 817fcf59617SAndrey V. Elsukov } 818b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */ 81933841545SHajimu UMEMOTO 820df8bae1dSRodney W. Grimes /* 821df8bae1dSRodney W. Grimes * Switch out to protocol's input routine. 822df8bae1dSRodney W. Grimes */ 82386425c62SRobert Watson IPSTAT_INC(ips_delivered); 8249b932e9eSAndre Oppermann 8258f5a8818SKevin Lo (*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p); 826c67b1d17SGarrett Wollman return; 827df8bae1dSRodney W. Grimes bad: 828df8bae1dSRodney W. Grimes m_freem(m); 829c67b1d17SGarrett Wollman } 830c67b1d17SGarrett Wollman 831c67b1d17SGarrett Wollman /* 832df8bae1dSRodney W. Grimes * IP timer processing; 833df8bae1dSRodney W. Grimes * if a timer expires on a reassembly 834df8bae1dSRodney W. Grimes * queue, discard it. 835df8bae1dSRodney W. Grimes */ 836df8bae1dSRodney W. Grimes void 837f2565d68SRobert Watson ip_slowtimo(void) 838df8bae1dSRodney W. Grimes { 8398b615593SMarko Zec VNET_ITERATOR_DECL(vnet_iter); 840df8bae1dSRodney W. Grimes 8415ee847d3SRobert Watson VNET_LIST_RLOCK_NOSLEEP(); 8428b615593SMarko Zec VNET_FOREACH(vnet_iter) { 8438b615593SMarko Zec CURVNET_SET(vnet_iter); 8441dbefcc0SGleb Smirnoff ipreass_slowtimo(); 8458b615593SMarko Zec CURVNET_RESTORE(); 8468b615593SMarko Zec } 8475ee847d3SRobert Watson VNET_LIST_RUNLOCK_NOSLEEP(); 848df8bae1dSRodney W. Grimes } 849df8bae1dSRodney W. Grimes 8509802380eSBjoern A. Zeeb void 8519802380eSBjoern A. Zeeb ip_drain(void) 8529802380eSBjoern A. Zeeb { 8539802380eSBjoern A. Zeeb VNET_ITERATOR_DECL(vnet_iter); 8549802380eSBjoern A. Zeeb 8559802380eSBjoern A. Zeeb VNET_LIST_RLOCK_NOSLEEP(); 8569802380eSBjoern A. Zeeb VNET_FOREACH(vnet_iter) { 8579802380eSBjoern A. Zeeb CURVNET_SET(vnet_iter); 8581dbefcc0SGleb Smirnoff ipreass_drain(); 8598b615593SMarko Zec CURVNET_RESTORE(); 8608b615593SMarko Zec } 8615ee847d3SRobert Watson VNET_LIST_RUNLOCK_NOSLEEP(); 862df8bae1dSRodney W. Grimes } 863df8bae1dSRodney W. Grimes 864df8bae1dSRodney W. Grimes /* 865de38924dSAndre Oppermann * The protocol to be inserted into ip_protox[] must be already registered 866de38924dSAndre Oppermann * in inetsw[], either statically or through pf_proto_register(). 867de38924dSAndre Oppermann */ 868de38924dSAndre Oppermann int 8691b48d245SBjoern A. Zeeb ipproto_register(short ipproto) 870de38924dSAndre Oppermann { 871de38924dSAndre Oppermann struct protosw *pr; 872de38924dSAndre Oppermann 873de38924dSAndre Oppermann /* Sanity checks. */ 8741b48d245SBjoern A. Zeeb if (ipproto <= 0 || ipproto >= IPPROTO_MAX) 875de38924dSAndre Oppermann return (EPROTONOSUPPORT); 876de38924dSAndre Oppermann 877de38924dSAndre Oppermann /* 878de38924dSAndre Oppermann * The protocol slot must not be occupied by another protocol 879de38924dSAndre Oppermann * already. An index pointing to IPPROTO_RAW is unused. 880de38924dSAndre Oppermann */ 881de38924dSAndre Oppermann pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 882de38924dSAndre Oppermann if (pr == NULL) 883de38924dSAndre Oppermann return (EPFNOSUPPORT); 884de38924dSAndre Oppermann if (ip_protox[ipproto] != pr - inetsw) /* IPPROTO_RAW */ 885de38924dSAndre Oppermann return (EEXIST); 886de38924dSAndre Oppermann 887de38924dSAndre Oppermann /* Find the protocol position in inetsw[] and set the index. */ 888de38924dSAndre Oppermann for (pr = inetdomain.dom_protosw; 889de38924dSAndre Oppermann pr < inetdomain.dom_protoswNPROTOSW; pr++) { 890de38924dSAndre Oppermann if (pr->pr_domain->dom_family == PF_INET && 891de38924dSAndre Oppermann pr->pr_protocol && pr->pr_protocol == ipproto) { 892de38924dSAndre Oppermann ip_protox[pr->pr_protocol] = pr - inetsw; 893de38924dSAndre Oppermann return (0); 894de38924dSAndre Oppermann } 895de38924dSAndre Oppermann } 896de38924dSAndre Oppermann return (EPROTONOSUPPORT); 897de38924dSAndre Oppermann } 898de38924dSAndre Oppermann 899de38924dSAndre Oppermann int 9001b48d245SBjoern A. Zeeb ipproto_unregister(short ipproto) 901de38924dSAndre Oppermann { 902de38924dSAndre Oppermann struct protosw *pr; 903de38924dSAndre Oppermann 904de38924dSAndre Oppermann /* Sanity checks. */ 9051b48d245SBjoern A. Zeeb if (ipproto <= 0 || ipproto >= IPPROTO_MAX) 906de38924dSAndre Oppermann return (EPROTONOSUPPORT); 907de38924dSAndre Oppermann 908de38924dSAndre Oppermann /* Check if the protocol was indeed registered. */ 909de38924dSAndre Oppermann pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 910de38924dSAndre Oppermann if (pr == NULL) 911de38924dSAndre Oppermann return (EPFNOSUPPORT); 912de38924dSAndre Oppermann if (ip_protox[ipproto] == pr - inetsw) /* IPPROTO_RAW */ 913de38924dSAndre Oppermann return (ENOENT); 914de38924dSAndre Oppermann 915de38924dSAndre Oppermann /* Reset the protocol slot to IPPROTO_RAW. */ 916de38924dSAndre Oppermann ip_protox[ipproto] = pr - inetsw; 917de38924dSAndre Oppermann return (0); 918de38924dSAndre Oppermann } 919de38924dSAndre Oppermann 920df8bae1dSRodney W. Grimes u_char inetctlerrmap[PRC_NCMDS] = { 921df8bae1dSRodney W. Grimes 0, 0, 0, 0, 922df8bae1dSRodney W. Grimes 0, EMSGSIZE, EHOSTDOWN, EHOSTUNREACH, 923df8bae1dSRodney W. Grimes EHOSTUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED, 924df8bae1dSRodney W. Grimes EMSGSIZE, EHOSTUNREACH, 0, 0, 925fcaf9f91SMike Silbersack 0, 0, EHOSTUNREACH, 0, 9263b8123b7SJesper Skriver ENOPROTOOPT, ECONNREFUSED 927df8bae1dSRodney W. Grimes }; 928df8bae1dSRodney W. Grimes 929df8bae1dSRodney W. Grimes /* 930df8bae1dSRodney W. Grimes * Forward a packet. If some error occurs return the sender 931df8bae1dSRodney W. Grimes * an icmp packet. Note we can't always generate a meaningful 932df8bae1dSRodney W. Grimes * icmp message because icmp doesn't have a large enough repertoire 933df8bae1dSRodney W. Grimes * of codes and types. 934df8bae1dSRodney W. Grimes * 935df8bae1dSRodney W. Grimes * If not forwarding, just drop the packet. This could be confusing 936df8bae1dSRodney W. Grimes * if ipforwarding was zero but some routing protocol was advancing 937df8bae1dSRodney W. Grimes * us as a gateway to somewhere. However, we must let the routing 938df8bae1dSRodney W. Grimes * protocol deal with that. 939df8bae1dSRodney W. Grimes * 940df8bae1dSRodney W. Grimes * The srcrt parameter indicates whether the packet is being forwarded 941df8bae1dSRodney W. Grimes * via a source route. 942df8bae1dSRodney W. Grimes */ 9439b932e9eSAndre Oppermann void 9449b932e9eSAndre Oppermann ip_forward(struct mbuf *m, int srcrt) 945df8bae1dSRodney W. Grimes { 9462b25acc1SLuigi Rizzo struct ip *ip = mtod(m, struct ip *); 947efbad259SEdward Tomasz Napierala struct in_ifaddr *ia; 948df8bae1dSRodney W. Grimes struct mbuf *mcopy; 949d14122b0SErmal Luçi struct sockaddr_in *sin; 9509b932e9eSAndre Oppermann struct in_addr dest; 951b835b6feSBjoern A. Zeeb struct route ro; 952c773494eSAndre Oppermann int error, type = 0, code = 0, mtu = 0; 9533efc3014SJulian Elischer 9549b932e9eSAndre Oppermann if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) { 95586425c62SRobert Watson IPSTAT_INC(ips_cantforward); 956df8bae1dSRodney W. Grimes m_freem(m); 957df8bae1dSRodney W. Grimes return; 958df8bae1dSRodney W. Grimes } 959fcf59617SAndrey V. Elsukov if ( 960fcf59617SAndrey V. Elsukov #ifdef IPSTEALTH 961fcf59617SAndrey V. Elsukov V_ipstealth == 0 && 962fcf59617SAndrey V. Elsukov #endif 963fcf59617SAndrey V. Elsukov ip->ip_ttl <= IPTTLDEC) { 964fcf59617SAndrey V. Elsukov icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, 0); 9658922ddbeSAndrey V. Elsukov return; 9668922ddbeSAndrey V. Elsukov } 967df8bae1dSRodney W. Grimes 968d14122b0SErmal Luçi bzero(&ro, sizeof(ro)); 969d14122b0SErmal Luçi sin = (struct sockaddr_in *)&ro.ro_dst; 970d14122b0SErmal Luçi sin->sin_family = AF_INET; 971d14122b0SErmal Luçi sin->sin_len = sizeof(*sin); 972d14122b0SErmal Luçi sin->sin_addr = ip->ip_dst; 973d14122b0SErmal Luçi #ifdef RADIX_MPATH 974d14122b0SErmal Luçi rtalloc_mpath_fib(&ro, 975d14122b0SErmal Luçi ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr), 976d14122b0SErmal Luçi M_GETFIB(m)); 977d14122b0SErmal Luçi #else 978d14122b0SErmal Luçi in_rtalloc_ign(&ro, 0, M_GETFIB(m)); 979d14122b0SErmal Luçi #endif 980*4f6c66ccSMatt Macy NET_EPOCH_ENTER(); 981d14122b0SErmal Luçi if (ro.ro_rt != NULL) { 982d14122b0SErmal Luçi ia = ifatoia(ro.ro_rt->rt_ifa); 98356844a62SErmal Luçi } else 98456844a62SErmal Luçi ia = NULL; 985df8bae1dSRodney W. Grimes /* 986bfef7ed4SIan Dowse * Save the IP header and at most 8 bytes of the payload, 987bfef7ed4SIan Dowse * in case we need to generate an ICMP message to the src. 988bfef7ed4SIan Dowse * 9894d2e3692SLuigi Rizzo * XXX this can be optimized a lot by saving the data in a local 9904d2e3692SLuigi Rizzo * buffer on the stack (72 bytes at most), and only allocating the 9914d2e3692SLuigi Rizzo * mbuf if really necessary. The vast majority of the packets 9924d2e3692SLuigi Rizzo * are forwarded without having to send an ICMP back (either 9934d2e3692SLuigi Rizzo * because unnecessary, or because rate limited), so we are 9944d2e3692SLuigi Rizzo * really we are wasting a lot of work here. 9954d2e3692SLuigi Rizzo * 996c3bef61eSKevin Lo * We don't use m_copym() because it might return a reference 997bfef7ed4SIan Dowse * to a shared cluster. Both this function and ip_output() 998bfef7ed4SIan Dowse * assume exclusive access to the IP header in `m', so any 999bfef7ed4SIan Dowse * data in a cluster may change before we reach icmp_error(). 1000df8bae1dSRodney W. Grimes */ 1001dc4ad05eSGleb Smirnoff mcopy = m_gethdr(M_NOWAIT, m->m_type); 1002eb1b1807SGleb Smirnoff if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_NOWAIT)) { 10039967cafcSSam Leffler /* 10049967cafcSSam Leffler * It's probably ok if the pkthdr dup fails (because 10059967cafcSSam Leffler * the deep copy of the tag chain failed), but for now 10069967cafcSSam Leffler * be conservative and just discard the copy since 10079967cafcSSam Leffler * code below may some day want the tags. 10089967cafcSSam Leffler */ 10099967cafcSSam Leffler m_free(mcopy); 10109967cafcSSam Leffler mcopy = NULL; 10119967cafcSSam Leffler } 1012bfef7ed4SIan Dowse if (mcopy != NULL) { 10138f134647SGleb Smirnoff mcopy->m_len = min(ntohs(ip->ip_len), M_TRAILINGSPACE(mcopy)); 1014e6b0a570SBruce M Simpson mcopy->m_pkthdr.len = mcopy->m_len; 1015bfef7ed4SIan Dowse m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t)); 1016bfef7ed4SIan Dowse } 101704287599SRuslan Ermilov #ifdef IPSTEALTH 1018fcf59617SAndrey V. Elsukov if (V_ipstealth == 0) 101904287599SRuslan Ermilov #endif 102004287599SRuslan Ermilov ip->ip_ttl -= IPTTLDEC; 1021fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT) 1022fcf59617SAndrey V. Elsukov if (IPSEC_ENABLED(ipv4)) { 1023fcf59617SAndrey V. Elsukov if ((error = IPSEC_FORWARD(ipv4, m)) != 0) { 1024fcf59617SAndrey V. Elsukov /* mbuf consumed by IPsec */ 1025fcf59617SAndrey V. Elsukov m_freem(mcopy); 1026fcf59617SAndrey V. Elsukov if (error != EINPROGRESS) 1027fcf59617SAndrey V. Elsukov IPSTAT_INC(ips_cantforward); 1028*4f6c66ccSMatt Macy goto out; 102904287599SRuslan Ermilov } 1030fcf59617SAndrey V. Elsukov /* No IPsec processing required */ 1031fcf59617SAndrey V. Elsukov } 1032fcf59617SAndrey V. Elsukov #endif /* IPSEC */ 1033df8bae1dSRodney W. Grimes /* 1034df8bae1dSRodney W. Grimes * If forwarding packet using same interface that it came in on, 1035df8bae1dSRodney W. Grimes * perhaps should send a redirect to sender to shortcut a hop. 1036df8bae1dSRodney W. Grimes * Only send redirect if source is sending directly to us, 1037df8bae1dSRodney W. Grimes * and if packet was not source routed (or has any options). 1038df8bae1dSRodney W. Grimes * Also, don't send redirect if forwarding using a default route 1039df8bae1dSRodney W. Grimes * or a route modified by a redirect. 1040df8bae1dSRodney W. Grimes */ 10419b932e9eSAndre Oppermann dest.s_addr = 0; 1042efbad259SEdward Tomasz Napierala if (!srcrt && V_ipsendredirects && 1043efbad259SEdward Tomasz Napierala ia != NULL && ia->ia_ifp == m->m_pkthdr.rcvif) { 104402c1c707SAndre Oppermann struct rtentry *rt; 104502c1c707SAndre Oppermann 104602c1c707SAndre Oppermann rt = ro.ro_rt; 104702c1c707SAndre Oppermann 104802c1c707SAndre Oppermann if (rt && (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 && 10499b932e9eSAndre Oppermann satosin(rt_key(rt))->sin_addr.s_addr != 0) { 1050df8bae1dSRodney W. Grimes #define RTA(rt) ((struct in_ifaddr *)(rt->rt_ifa)) 1051df8bae1dSRodney W. Grimes u_long src = ntohl(ip->ip_src.s_addr); 1052df8bae1dSRodney W. Grimes 1053df8bae1dSRodney W. Grimes if (RTA(rt) && 1054df8bae1dSRodney W. Grimes (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) { 1055df8bae1dSRodney W. Grimes if (rt->rt_flags & RTF_GATEWAY) 10569b932e9eSAndre Oppermann dest.s_addr = satosin(rt->rt_gateway)->sin_addr.s_addr; 1057df8bae1dSRodney W. Grimes else 10589b932e9eSAndre Oppermann dest.s_addr = ip->ip_dst.s_addr; 1059df8bae1dSRodney W. Grimes /* Router requirements says to only send host redirects */ 1060df8bae1dSRodney W. Grimes type = ICMP_REDIRECT; 1061df8bae1dSRodney W. Grimes code = ICMP_REDIRECT_HOST; 1062df8bae1dSRodney W. Grimes } 1063df8bae1dSRodney W. Grimes } 106402c1c707SAndre Oppermann } 1065df8bae1dSRodney W. Grimes 1066b835b6feSBjoern A. Zeeb error = ip_output(m, NULL, &ro, IP_FORWARDING, NULL, NULL); 1067b835b6feSBjoern A. Zeeb 1068b835b6feSBjoern A. Zeeb if (error == EMSGSIZE && ro.ro_rt) 1069e3a7aa6fSGleb Smirnoff mtu = ro.ro_rt->rt_mtu; 1070bf984051SGleb Smirnoff RO_RTFREE(&ro); 1071b835b6feSBjoern A. Zeeb 1072df8bae1dSRodney W. Grimes if (error) 107386425c62SRobert Watson IPSTAT_INC(ips_cantforward); 1074df8bae1dSRodney W. Grimes else { 107586425c62SRobert Watson IPSTAT_INC(ips_forward); 1076df8bae1dSRodney W. Grimes if (type) 107786425c62SRobert Watson IPSTAT_INC(ips_redirectsent); 1078df8bae1dSRodney W. Grimes else { 10799188b4a1SAndre Oppermann if (mcopy) 1080df8bae1dSRodney W. Grimes m_freem(mcopy); 1081*4f6c66ccSMatt Macy goto out; 1082df8bae1dSRodney W. Grimes } 1083df8bae1dSRodney W. Grimes } 1084*4f6c66ccSMatt Macy if (mcopy == NULL) 1085*4f6c66ccSMatt Macy goto out; 1086*4f6c66ccSMatt Macy 1087df8bae1dSRodney W. Grimes 1088df8bae1dSRodney W. Grimes switch (error) { 1089df8bae1dSRodney W. Grimes 1090df8bae1dSRodney W. Grimes case 0: /* forwarded, but need redirect */ 1091df8bae1dSRodney W. Grimes /* type, code set above */ 1092df8bae1dSRodney W. Grimes break; 1093df8bae1dSRodney W. Grimes 1094efbad259SEdward Tomasz Napierala case ENETUNREACH: 1095df8bae1dSRodney W. Grimes case EHOSTUNREACH: 1096df8bae1dSRodney W. Grimes case ENETDOWN: 1097df8bae1dSRodney W. Grimes case EHOSTDOWN: 1098df8bae1dSRodney W. Grimes default: 1099df8bae1dSRodney W. Grimes type = ICMP_UNREACH; 1100df8bae1dSRodney W. Grimes code = ICMP_UNREACH_HOST; 1101df8bae1dSRodney W. Grimes break; 1102df8bae1dSRodney W. Grimes 1103df8bae1dSRodney W. Grimes case EMSGSIZE: 1104df8bae1dSRodney W. Grimes type = ICMP_UNREACH; 1105df8bae1dSRodney W. Grimes code = ICMP_UNREACH_NEEDFRAG; 11069b932e9eSAndre Oppermann /* 1107b835b6feSBjoern A. Zeeb * If the MTU was set before make sure we are below the 1108b835b6feSBjoern A. Zeeb * interface MTU. 1109ab48768bSAndre Oppermann * If the MTU wasn't set before use the interface mtu or 1110ab48768bSAndre Oppermann * fall back to the next smaller mtu step compared to the 1111ab48768bSAndre Oppermann * current packet size. 11129b932e9eSAndre Oppermann */ 1113b835b6feSBjoern A. Zeeb if (mtu != 0) { 1114b835b6feSBjoern A. Zeeb if (ia != NULL) 1115b835b6feSBjoern A. Zeeb mtu = min(mtu, ia->ia_ifp->if_mtu); 1116b835b6feSBjoern A. Zeeb } else { 1117ab48768bSAndre Oppermann if (ia != NULL) 1118c773494eSAndre Oppermann mtu = ia->ia_ifp->if_mtu; 1119ab48768bSAndre Oppermann else 11208f134647SGleb Smirnoff mtu = ip_next_mtu(ntohs(ip->ip_len), 0); 1121ab48768bSAndre Oppermann } 112286425c62SRobert Watson IPSTAT_INC(ips_cantfrag); 1123df8bae1dSRodney W. Grimes break; 1124df8bae1dSRodney W. Grimes 1125df8bae1dSRodney W. Grimes case ENOBUFS: 11263a06e3e0SRuslan Ermilov case EACCES: /* ipfw denied packet */ 11273a06e3e0SRuslan Ermilov m_freem(mcopy); 1128*4f6c66ccSMatt Macy goto out; 1129df8bae1dSRodney W. Grimes } 1130c773494eSAndre Oppermann icmp_error(mcopy, type, code, dest.s_addr, mtu); 1131*4f6c66ccSMatt Macy out: 1132*4f6c66ccSMatt Macy NET_EPOCH_EXIT(); 1133df8bae1dSRodney W. Grimes } 1134df8bae1dSRodney W. Grimes 1135339efd75SMaxim Sobolev #define CHECK_SO_CT(sp, ct) \ 1136339efd75SMaxim Sobolev (((sp->so_options & SO_TIMESTAMP) && (sp->so_ts_clock == ct)) ? 1 : 0) 1137339efd75SMaxim Sobolev 113882c23ebaSBill Fenner void 1139f2565d68SRobert Watson ip_savecontrol(struct inpcb *inp, struct mbuf **mp, struct ip *ip, 1140f2565d68SRobert Watson struct mbuf *m) 114182c23ebaSBill Fenner { 114206193f0bSKonstantin Belousov bool stamped; 11438b615593SMarko Zec 114406193f0bSKonstantin Belousov stamped = false; 1145339efd75SMaxim Sobolev if ((inp->inp_socket->so_options & SO_BINTIME) || 1146339efd75SMaxim Sobolev CHECK_SO_CT(inp->inp_socket, SO_TS_BINTIME)) { 114706193f0bSKonstantin Belousov struct bintime boottimebin, bt; 114806193f0bSKonstantin Belousov struct timespec ts1; 1149be8a62e8SPoul-Henning Kamp 115006193f0bSKonstantin Belousov if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR | 115106193f0bSKonstantin Belousov M_TSTMP)) { 115206193f0bSKonstantin Belousov mbuf_tstmp2timespec(m, &ts1); 115306193f0bSKonstantin Belousov timespec2bintime(&ts1, &bt); 115406193f0bSKonstantin Belousov getboottimebin(&boottimebin); 115506193f0bSKonstantin Belousov bintime_add(&bt, &boottimebin); 115606193f0bSKonstantin Belousov } else { 1157be8a62e8SPoul-Henning Kamp bintime(&bt); 115806193f0bSKonstantin Belousov } 1159be8a62e8SPoul-Henning Kamp *mp = sbcreatecontrol((caddr_t)&bt, sizeof(bt), 1160be8a62e8SPoul-Henning Kamp SCM_BINTIME, SOL_SOCKET); 116106193f0bSKonstantin Belousov if (*mp != NULL) { 1162be8a62e8SPoul-Henning Kamp mp = &(*mp)->m_next; 116306193f0bSKonstantin Belousov stamped = true; 116406193f0bSKonstantin Belousov } 1165be8a62e8SPoul-Henning Kamp } 1166339efd75SMaxim Sobolev if (CHECK_SO_CT(inp->inp_socket, SO_TS_REALTIME_MICRO)) { 116706193f0bSKonstantin Belousov struct bintime boottimebin, bt1; 116806193f0bSKonstantin Belousov struct timespec ts1;; 116982c23ebaSBill Fenner struct timeval tv; 117082c23ebaSBill Fenner 117106193f0bSKonstantin Belousov if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR | 117206193f0bSKonstantin Belousov M_TSTMP)) { 117306193f0bSKonstantin Belousov mbuf_tstmp2timespec(m, &ts1); 117406193f0bSKonstantin Belousov timespec2bintime(&ts1, &bt1); 117506193f0bSKonstantin Belousov getboottimebin(&boottimebin); 117606193f0bSKonstantin Belousov bintime_add(&bt1, &boottimebin); 117706193f0bSKonstantin Belousov bintime2timeval(&bt1, &tv); 117806193f0bSKonstantin Belousov } else { 1179339efd75SMaxim Sobolev microtime(&tv); 118006193f0bSKonstantin Belousov } 118182c23ebaSBill Fenner *mp = sbcreatecontrol((caddr_t)&tv, sizeof(tv), 118282c23ebaSBill Fenner SCM_TIMESTAMP, SOL_SOCKET); 118306193f0bSKonstantin Belousov if (*mp != NULL) { 118482c23ebaSBill Fenner mp = &(*mp)->m_next; 118506193f0bSKonstantin Belousov stamped = true; 118606193f0bSKonstantin Belousov } 1187339efd75SMaxim Sobolev } else if (CHECK_SO_CT(inp->inp_socket, SO_TS_REALTIME)) { 118806193f0bSKonstantin Belousov struct bintime boottimebin; 118906193f0bSKonstantin Belousov struct timespec ts, ts1; 1190339efd75SMaxim Sobolev 119106193f0bSKonstantin Belousov if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR | 119206193f0bSKonstantin Belousov M_TSTMP)) { 119306193f0bSKonstantin Belousov mbuf_tstmp2timespec(m, &ts); 119406193f0bSKonstantin Belousov getboottimebin(&boottimebin); 119506193f0bSKonstantin Belousov bintime2timespec(&boottimebin, &ts1); 119606193f0bSKonstantin Belousov timespecadd(&ts, &ts1); 119706193f0bSKonstantin Belousov } else { 1198339efd75SMaxim Sobolev nanotime(&ts); 119906193f0bSKonstantin Belousov } 1200339efd75SMaxim Sobolev *mp = sbcreatecontrol((caddr_t)&ts, sizeof(ts), 1201339efd75SMaxim Sobolev SCM_REALTIME, SOL_SOCKET); 120206193f0bSKonstantin Belousov if (*mp != NULL) { 1203339efd75SMaxim Sobolev mp = &(*mp)->m_next; 120406193f0bSKonstantin Belousov stamped = true; 120506193f0bSKonstantin Belousov } 1206339efd75SMaxim Sobolev } else if (CHECK_SO_CT(inp->inp_socket, SO_TS_MONOTONIC)) { 1207339efd75SMaxim Sobolev struct timespec ts; 1208339efd75SMaxim Sobolev 120906193f0bSKonstantin Belousov if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR | 121006193f0bSKonstantin Belousov M_TSTMP)) 121106193f0bSKonstantin Belousov mbuf_tstmp2timespec(m, &ts); 121206193f0bSKonstantin Belousov else 1213339efd75SMaxim Sobolev nanouptime(&ts); 1214339efd75SMaxim Sobolev *mp = sbcreatecontrol((caddr_t)&ts, sizeof(ts), 1215339efd75SMaxim Sobolev SCM_MONOTONIC, SOL_SOCKET); 121606193f0bSKonstantin Belousov if (*mp != NULL) { 121706193f0bSKonstantin Belousov mp = &(*mp)->m_next; 121806193f0bSKonstantin Belousov stamped = true; 121906193f0bSKonstantin Belousov } 122006193f0bSKonstantin Belousov } 122106193f0bSKonstantin Belousov if (stamped && (m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR | 122206193f0bSKonstantin Belousov M_TSTMP)) { 122306193f0bSKonstantin Belousov struct sock_timestamp_info sti; 122406193f0bSKonstantin Belousov 122506193f0bSKonstantin Belousov bzero(&sti, sizeof(sti)); 122606193f0bSKonstantin Belousov sti.st_info_flags = ST_INFO_HW; 122706193f0bSKonstantin Belousov if ((m->m_flags & M_TSTMP_HPREC) != 0) 122806193f0bSKonstantin Belousov sti.st_info_flags |= ST_INFO_HW_HPREC; 122906193f0bSKonstantin Belousov *mp = sbcreatecontrol((caddr_t)&sti, sizeof(sti), SCM_TIME_INFO, 123006193f0bSKonstantin Belousov SOL_SOCKET); 123106193f0bSKonstantin Belousov if (*mp != NULL) 1232339efd75SMaxim Sobolev mp = &(*mp)->m_next; 1233be8a62e8SPoul-Henning Kamp } 123482c23ebaSBill Fenner if (inp->inp_flags & INP_RECVDSTADDR) { 123582c23ebaSBill Fenner *mp = sbcreatecontrol((caddr_t)&ip->ip_dst, 123682c23ebaSBill Fenner sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP); 123782c23ebaSBill Fenner if (*mp) 123882c23ebaSBill Fenner mp = &(*mp)->m_next; 123982c23ebaSBill Fenner } 12404957466bSMatthew N. Dodd if (inp->inp_flags & INP_RECVTTL) { 12414957466bSMatthew N. Dodd *mp = sbcreatecontrol((caddr_t)&ip->ip_ttl, 12424957466bSMatthew N. Dodd sizeof(u_char), IP_RECVTTL, IPPROTO_IP); 12434957466bSMatthew N. Dodd if (*mp) 12444957466bSMatthew N. Dodd mp = &(*mp)->m_next; 12454957466bSMatthew N. Dodd } 124682c23ebaSBill Fenner #ifdef notyet 124782c23ebaSBill Fenner /* XXX 124882c23ebaSBill Fenner * Moving these out of udp_input() made them even more broken 124982c23ebaSBill Fenner * than they already were. 125082c23ebaSBill Fenner */ 125182c23ebaSBill Fenner /* options were tossed already */ 125282c23ebaSBill Fenner if (inp->inp_flags & INP_RECVOPTS) { 125382c23ebaSBill Fenner *mp = sbcreatecontrol((caddr_t)opts_deleted_above, 125482c23ebaSBill Fenner sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP); 125582c23ebaSBill Fenner if (*mp) 125682c23ebaSBill Fenner mp = &(*mp)->m_next; 125782c23ebaSBill Fenner } 125882c23ebaSBill Fenner /* ip_srcroute doesn't do what we want here, need to fix */ 125982c23ebaSBill Fenner if (inp->inp_flags & INP_RECVRETOPTS) { 1260e0982661SAndre Oppermann *mp = sbcreatecontrol((caddr_t)ip_srcroute(m), 126182c23ebaSBill Fenner sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP); 126282c23ebaSBill Fenner if (*mp) 126382c23ebaSBill Fenner mp = &(*mp)->m_next; 126482c23ebaSBill Fenner } 126582c23ebaSBill Fenner #endif 126682c23ebaSBill Fenner if (inp->inp_flags & INP_RECVIF) { 1267d314ad7bSJulian Elischer struct ifnet *ifp; 1268d314ad7bSJulian Elischer struct sdlbuf { 126982c23ebaSBill Fenner struct sockaddr_dl sdl; 1270d314ad7bSJulian Elischer u_char pad[32]; 1271d314ad7bSJulian Elischer } sdlbuf; 1272d314ad7bSJulian Elischer struct sockaddr_dl *sdp; 1273d314ad7bSJulian Elischer struct sockaddr_dl *sdl2 = &sdlbuf.sdl; 127482c23ebaSBill Fenner 127546f2df9cSSergey Kandaurov if ((ifp = m->m_pkthdr.rcvif) && 127646f2df9cSSergey Kandaurov ifp->if_index && ifp->if_index <= V_if_index) { 12774a0d6638SRuslan Ermilov sdp = (struct sockaddr_dl *)ifp->if_addr->ifa_addr; 1278d314ad7bSJulian Elischer /* 1279d314ad7bSJulian Elischer * Change our mind and don't try copy. 1280d314ad7bSJulian Elischer */ 128146f2df9cSSergey Kandaurov if (sdp->sdl_family != AF_LINK || 128246f2df9cSSergey Kandaurov sdp->sdl_len > sizeof(sdlbuf)) { 1283d314ad7bSJulian Elischer goto makedummy; 1284d314ad7bSJulian Elischer } 1285d314ad7bSJulian Elischer bcopy(sdp, sdl2, sdp->sdl_len); 1286d314ad7bSJulian Elischer } else { 1287d314ad7bSJulian Elischer makedummy: 128846f2df9cSSergey Kandaurov sdl2->sdl_len = 128946f2df9cSSergey Kandaurov offsetof(struct sockaddr_dl, sdl_data[0]); 1290d314ad7bSJulian Elischer sdl2->sdl_family = AF_LINK; 1291d314ad7bSJulian Elischer sdl2->sdl_index = 0; 1292d314ad7bSJulian Elischer sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0; 1293d314ad7bSJulian Elischer } 1294d314ad7bSJulian Elischer *mp = sbcreatecontrol((caddr_t)sdl2, sdl2->sdl_len, 129582c23ebaSBill Fenner IP_RECVIF, IPPROTO_IP); 129682c23ebaSBill Fenner if (*mp) 129782c23ebaSBill Fenner mp = &(*mp)->m_next; 129882c23ebaSBill Fenner } 12993cca425bSMichael Tuexen if (inp->inp_flags & INP_RECVTOS) { 13003cca425bSMichael Tuexen *mp = sbcreatecontrol((caddr_t)&ip->ip_tos, 13013cca425bSMichael Tuexen sizeof(u_char), IP_RECVTOS, IPPROTO_IP); 13023cca425bSMichael Tuexen if (*mp) 13033cca425bSMichael Tuexen mp = &(*mp)->m_next; 13043cca425bSMichael Tuexen } 13059d3ddf43SAdrian Chadd 13069d3ddf43SAdrian Chadd if (inp->inp_flags2 & INP_RECVFLOWID) { 13079d3ddf43SAdrian Chadd uint32_t flowid, flow_type; 13089d3ddf43SAdrian Chadd 13099d3ddf43SAdrian Chadd flowid = m->m_pkthdr.flowid; 13109d3ddf43SAdrian Chadd flow_type = M_HASHTYPE_GET(m); 13119d3ddf43SAdrian Chadd 13129d3ddf43SAdrian Chadd /* 13139d3ddf43SAdrian Chadd * XXX should handle the failure of one or the 13149d3ddf43SAdrian Chadd * other - don't populate both? 13159d3ddf43SAdrian Chadd */ 13169d3ddf43SAdrian Chadd *mp = sbcreatecontrol((caddr_t) &flowid, 13179d3ddf43SAdrian Chadd sizeof(uint32_t), IP_FLOWID, IPPROTO_IP); 13189d3ddf43SAdrian Chadd if (*mp) 13199d3ddf43SAdrian Chadd mp = &(*mp)->m_next; 13209d3ddf43SAdrian Chadd *mp = sbcreatecontrol((caddr_t) &flow_type, 13219d3ddf43SAdrian Chadd sizeof(uint32_t), IP_FLOWTYPE, IPPROTO_IP); 13229d3ddf43SAdrian Chadd if (*mp) 13239d3ddf43SAdrian Chadd mp = &(*mp)->m_next; 13249d3ddf43SAdrian Chadd } 13259d3ddf43SAdrian Chadd 13269d3ddf43SAdrian Chadd #ifdef RSS 13279d3ddf43SAdrian Chadd if (inp->inp_flags2 & INP_RECVRSSBUCKETID) { 13289d3ddf43SAdrian Chadd uint32_t flowid, flow_type; 13299d3ddf43SAdrian Chadd uint32_t rss_bucketid; 13309d3ddf43SAdrian Chadd 13319d3ddf43SAdrian Chadd flowid = m->m_pkthdr.flowid; 13329d3ddf43SAdrian Chadd flow_type = M_HASHTYPE_GET(m); 13339d3ddf43SAdrian Chadd 13349d3ddf43SAdrian Chadd if (rss_hash2bucket(flowid, flow_type, &rss_bucketid) == 0) { 13359d3ddf43SAdrian Chadd *mp = sbcreatecontrol((caddr_t) &rss_bucketid, 13369d3ddf43SAdrian Chadd sizeof(uint32_t), IP_RSSBUCKETID, IPPROTO_IP); 13379d3ddf43SAdrian Chadd if (*mp) 13389d3ddf43SAdrian Chadd mp = &(*mp)->m_next; 13399d3ddf43SAdrian Chadd } 13409d3ddf43SAdrian Chadd } 13419d3ddf43SAdrian Chadd #endif 134282c23ebaSBill Fenner } 134382c23ebaSBill Fenner 13444d2e3692SLuigi Rizzo /* 134530916a2dSRobert Watson * XXXRW: Multicast routing code in ip_mroute.c is generally MPSAFE, but the 134630916a2dSRobert Watson * ip_rsvp and ip_rsvp_on variables need to be interlocked with rsvp_on 134730916a2dSRobert Watson * locking. This code remains in ip_input.c as ip_mroute.c is optionally 134830916a2dSRobert Watson * compiled. 13494d2e3692SLuigi Rizzo */ 13503e288e62SDimitry Andric static VNET_DEFINE(int, ip_rsvp_on); 135182cea7e6SBjoern A. Zeeb VNET_DEFINE(struct socket *, ip_rsvpd); 135282cea7e6SBjoern A. Zeeb 135382cea7e6SBjoern A. Zeeb #define V_ip_rsvp_on VNET(ip_rsvp_on) 135482cea7e6SBjoern A. Zeeb 1355df8bae1dSRodney W. Grimes int 1356f0068c4aSGarrett Wollman ip_rsvp_init(struct socket *so) 1357f0068c4aSGarrett Wollman { 13588b615593SMarko Zec 1359f0068c4aSGarrett Wollman if (so->so_type != SOCK_RAW || 1360f0068c4aSGarrett Wollman so->so_proto->pr_protocol != IPPROTO_RSVP) 1361f0068c4aSGarrett Wollman return EOPNOTSUPP; 1362f0068c4aSGarrett Wollman 1363603724d3SBjoern A. Zeeb if (V_ip_rsvpd != NULL) 1364f0068c4aSGarrett Wollman return EADDRINUSE; 1365f0068c4aSGarrett Wollman 1366603724d3SBjoern A. Zeeb V_ip_rsvpd = so; 13671c5de19aSGarrett Wollman /* 13681c5de19aSGarrett Wollman * This may seem silly, but we need to be sure we don't over-increment 13691c5de19aSGarrett Wollman * the RSVP counter, in case something slips up. 13701c5de19aSGarrett Wollman */ 1371603724d3SBjoern A. Zeeb if (!V_ip_rsvp_on) { 1372603724d3SBjoern A. Zeeb V_ip_rsvp_on = 1; 1373603724d3SBjoern A. Zeeb V_rsvp_on++; 13741c5de19aSGarrett Wollman } 1375f0068c4aSGarrett Wollman 1376f0068c4aSGarrett Wollman return 0; 1377f0068c4aSGarrett Wollman } 1378f0068c4aSGarrett Wollman 1379f0068c4aSGarrett Wollman int 1380f0068c4aSGarrett Wollman ip_rsvp_done(void) 1381f0068c4aSGarrett Wollman { 13828b615593SMarko Zec 1383603724d3SBjoern A. Zeeb V_ip_rsvpd = NULL; 13841c5de19aSGarrett Wollman /* 13851c5de19aSGarrett Wollman * This may seem silly, but we need to be sure we don't over-decrement 13861c5de19aSGarrett Wollman * the RSVP counter, in case something slips up. 13871c5de19aSGarrett Wollman */ 1388603724d3SBjoern A. Zeeb if (V_ip_rsvp_on) { 1389603724d3SBjoern A. Zeeb V_ip_rsvp_on = 0; 1390603724d3SBjoern A. Zeeb V_rsvp_on--; 13911c5de19aSGarrett Wollman } 1392f0068c4aSGarrett Wollman return 0; 1393f0068c4aSGarrett Wollman } 1394bbb4330bSLuigi Rizzo 13958f5a8818SKevin Lo int 13968f5a8818SKevin Lo rsvp_input(struct mbuf **mp, int *offp, int proto) 1397bbb4330bSLuigi Rizzo { 13988f5a8818SKevin Lo struct mbuf *m; 13998f5a8818SKevin Lo 14008f5a8818SKevin Lo m = *mp; 14018f5a8818SKevin Lo *mp = NULL; 14028b615593SMarko Zec 1403bbb4330bSLuigi Rizzo if (rsvp_input_p) { /* call the real one if loaded */ 14048f5a8818SKevin Lo *mp = m; 14058f5a8818SKevin Lo rsvp_input_p(mp, offp, proto); 14068f5a8818SKevin Lo return (IPPROTO_DONE); 1407bbb4330bSLuigi Rizzo } 1408bbb4330bSLuigi Rizzo 1409bbb4330bSLuigi Rizzo /* Can still get packets with rsvp_on = 0 if there is a local member 1410bbb4330bSLuigi Rizzo * of the group to which the RSVP packet is addressed. But in this 1411bbb4330bSLuigi Rizzo * case we want to throw the packet away. 1412bbb4330bSLuigi Rizzo */ 1413bbb4330bSLuigi Rizzo 1414603724d3SBjoern A. Zeeb if (!V_rsvp_on) { 1415bbb4330bSLuigi Rizzo m_freem(m); 14168f5a8818SKevin Lo return (IPPROTO_DONE); 1417bbb4330bSLuigi Rizzo } 1418bbb4330bSLuigi Rizzo 1419603724d3SBjoern A. Zeeb if (V_ip_rsvpd != NULL) { 14208f5a8818SKevin Lo *mp = m; 14218f5a8818SKevin Lo rip_input(mp, offp, proto); 14228f5a8818SKevin Lo return (IPPROTO_DONE); 1423bbb4330bSLuigi Rizzo } 1424bbb4330bSLuigi Rizzo /* Drop the packet */ 1425bbb4330bSLuigi Rizzo m_freem(m); 14268f5a8818SKevin Lo return (IPPROTO_DONE); 1427bbb4330bSLuigi Rizzo } 1428