1c398230bSWarner Losh /*- 251369649SPedro F. Giffuni * SPDX-License-Identifier: BSD-3-Clause 351369649SPedro F. Giffuni * 4df8bae1dSRodney W. Grimes * Copyright (c) 1982, 1986, 1988, 1993 5df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 6df8bae1dSRodney W. Grimes * 7df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 8df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 9df8bae1dSRodney W. Grimes * are met: 10df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 11df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 12df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 13df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 14df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 15fbbd9655SWarner Losh * 3. Neither the name of the University nor the names of its contributors 16df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 17df8bae1dSRodney W. Grimes * without specific prior written permission. 18df8bae1dSRodney W. Grimes * 19df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29df8bae1dSRodney W. Grimes * SUCH DAMAGE. 30df8bae1dSRodney W. Grimes * 31df8bae1dSRodney W. Grimes * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 32df8bae1dSRodney W. Grimes */ 33df8bae1dSRodney W. Grimes 344b421e2dSMike Silbersack #include <sys/cdefs.h> 354b421e2dSMike Silbersack __FBSDID("$FreeBSD$"); 364b421e2dSMike Silbersack 370ac40133SBrian Somers #include "opt_bootp.h" 3827108a15SDag-Erling Smørgrav #include "opt_ipstealth.h" 396a800098SYoshinobu Inoue #include "opt_ipsec.h" 4033553d6eSBjoern A. Zeeb #include "opt_route.h" 41b8bc95cdSAdrian Chadd #include "opt_rss.h" 4274a9466cSGary Palmer 43df8bae1dSRodney W. Grimes #include <sys/param.h> 44df8bae1dSRodney W. Grimes #include <sys/systm.h> 45ef91a976SAndrey V. Elsukov #include <sys/hhook.h> 46df8bae1dSRodney W. Grimes #include <sys/mbuf.h> 47b715f178SLuigi Rizzo #include <sys/malloc.h> 48df8bae1dSRodney W. Grimes #include <sys/domain.h> 49df8bae1dSRodney W. Grimes #include <sys/protosw.h> 50df8bae1dSRodney W. Grimes #include <sys/socket.h> 51df8bae1dSRodney W. Grimes #include <sys/time.h> 52df8bae1dSRodney W. Grimes #include <sys/kernel.h> 53385195c0SMarko Zec #include <sys/lock.h> 54cc0a3c8cSAndrey V. Elsukov #include <sys/rmlock.h> 55385195c0SMarko Zec #include <sys/rwlock.h> 5657f60867SMark Johnston #include <sys/sdt.h> 571025071fSGarrett Wollman #include <sys/syslog.h> 58b5e8ce9fSBruce Evans #include <sys/sysctl.h> 59df8bae1dSRodney W. Grimes 60df8bae1dSRodney W. Grimes #include <net/if.h> 619494d596SBrooks Davis #include <net/if_types.h> 62d314ad7bSJulian Elischer #include <net/if_var.h> 6382c23ebaSBill Fenner #include <net/if_dl.h> 64b252313fSGleb Smirnoff #include <net/pfil.h> 65df8bae1dSRodney W. Grimes #include <net/route.h> 66983066f0SAlexander V. Chernikov #include <net/route/nhop.h> 67748e0b0aSGarrett Wollman #include <net/netisr.h> 68b2bdc62aSAdrian Chadd #include <net/rss_config.h> 694b79449eSBjoern A. Zeeb #include <net/vnet.h> 70df8bae1dSRodney W. Grimes 71df8bae1dSRodney W. Grimes #include <netinet/in.h> 7257f60867SMark Johnston #include <netinet/in_kdtrace.h> 73df8bae1dSRodney W. Grimes #include <netinet/in_systm.h> 74b5e8ce9fSBruce Evans #include <netinet/in_var.h> 75df8bae1dSRodney W. Grimes #include <netinet/ip.h> 76983066f0SAlexander V. Chernikov #include <netinet/in_fib.h> 77df8bae1dSRodney W. Grimes #include <netinet/in_pcb.h> 78df8bae1dSRodney W. Grimes #include <netinet/ip_var.h> 79eddfbb76SRobert Watson #include <netinet/ip_fw.h> 80df8bae1dSRodney W. Grimes #include <netinet/ip_icmp.h> 81ef39adf0SAndre Oppermann #include <netinet/ip_options.h> 8258938916SGarrett Wollman #include <machine/in_cksum.h> 83a9771948SGleb Smirnoff #include <netinet/ip_carp.h> 84b8bc95cdSAdrian Chadd #include <netinet/in_rss.h> 8565634ae7SWojciech Macek #include <netinet/ip_mroute.h> 86df8bae1dSRodney W. Grimes 87fcf59617SAndrey V. Elsukov #include <netipsec/ipsec_support.h> 88fcf59617SAndrey V. Elsukov 89f0068c4aSGarrett Wollman #include <sys/socketvar.h> 906ddbf1e2SGary Palmer 91aed55708SRobert Watson #include <security/mac/mac_framework.h> 92aed55708SRobert Watson 93d2035ffbSEd Maste #ifdef CTASSERT 94d2035ffbSEd Maste CTASSERT(sizeof(struct ip) == 20); 95d2035ffbSEd Maste #endif 96d2035ffbSEd Maste 971dbefcc0SGleb Smirnoff /* IP reassembly functions are defined in ip_reass.c. */ 98843b0e57SXin LI extern void ipreass_init(void); 99843b0e57SXin LI extern void ipreass_drain(void); 100843b0e57SXin LI extern void ipreass_slowtimo(void); 1011dbefcc0SGleb Smirnoff #ifdef VIMAGE 102843b0e57SXin LI extern void ipreass_destroy(void); 1031dbefcc0SGleb Smirnoff #endif 1041dbefcc0SGleb Smirnoff 10582cea7e6SBjoern A. Zeeb VNET_DEFINE(int, rsvp_on); 10682cea7e6SBjoern A. Zeeb 10782cea7e6SBjoern A. Zeeb VNET_DEFINE(int, ipforwarding); 1086df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_VNET | CTLFLAG_RW, 109eddfbb76SRobert Watson &VNET_NAME(ipforwarding), 0, 1108b615593SMarko Zec "Enable IP forwarding between interfaces"); 1110312fbe9SPoul-Henning Kamp 1128ad114c0SGeorge V. Neville-Neil /* 1138ad114c0SGeorge V. Neville-Neil * Respond with an ICMP host redirect when we forward a packet out of 1148ad114c0SGeorge V. Neville-Neil * the same interface on which it was received. See RFC 792. 1158ad114c0SGeorge V. Neville-Neil */ 1168ad114c0SGeorge V. Neville-Neil VNET_DEFINE(int, ipsendredirects) = 1; 1176df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_VNET | CTLFLAG_RW, 118eddfbb76SRobert Watson &VNET_NAME(ipsendredirects), 0, 1198b615593SMarko Zec "Enable sending IP redirects"); 1200312fbe9SPoul-Henning Kamp 12194df3271SGleb Smirnoff VNET_DEFINE_STATIC(bool, ip_strong_es) = false; 12294df3271SGleb Smirnoff #define V_ip_strong_es VNET(ip_strong_es) 12394df3271SGleb Smirnoff SYSCTL_BOOL(_net_inet_ip, OID_AUTO, rfc1122_strong_es, 12494df3271SGleb Smirnoff CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip_strong_es), false, 12594df3271SGleb Smirnoff "Packet's IP destination address must match address on arrival interface"); 126b3e95d4eSJonathan Lemon 1272ce85919SGleb Smirnoff VNET_DEFINE_STATIC(bool, ip_sav) = true; 1282ce85919SGleb Smirnoff #define V_ip_sav VNET(ip_sav) 1292ce85919SGleb Smirnoff SYSCTL_BOOL(_net_inet_ip, OID_AUTO, source_address_validation, 1302ce85919SGleb Smirnoff CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip_sav), true, 1312ce85919SGleb Smirnoff "Drop incoming packets with source address that is a local address"); 1322ce85919SGleb Smirnoff 133b252313fSGleb Smirnoff VNET_DEFINE(pfil_head_t, inet_pfil_head); /* Packet filter hooks */ 134df8bae1dSRodney W. Grimes 135d4b5cae4SRobert Watson static struct netisr_handler ip_nh = { 136d4b5cae4SRobert Watson .nh_name = "ip", 137d4b5cae4SRobert Watson .nh_handler = ip_input, 138d4b5cae4SRobert Watson .nh_proto = NETISR_IP, 139b8bc95cdSAdrian Chadd #ifdef RSS 1402527ccadSAdrian Chadd .nh_m2cpuid = rss_soft_m2cpuid_v4, 141b8bc95cdSAdrian Chadd .nh_policy = NETISR_POLICY_CPU, 142b8bc95cdSAdrian Chadd .nh_dispatch = NETISR_DISPATCH_HYBRID, 143b8bc95cdSAdrian Chadd #else 144d4b5cae4SRobert Watson .nh_policy = NETISR_POLICY_FLOW, 145b8bc95cdSAdrian Chadd #endif 146d4b5cae4SRobert Watson }; 147ca925d9cSJonathan Lemon 148b8bc95cdSAdrian Chadd #ifdef RSS 149b8bc95cdSAdrian Chadd /* 150b8bc95cdSAdrian Chadd * Directly dispatched frames are currently assumed 151b8bc95cdSAdrian Chadd * to have a flowid already calculated. 152b8bc95cdSAdrian Chadd * 153b8bc95cdSAdrian Chadd * It should likely have something that assert it 154b8bc95cdSAdrian Chadd * actually has valid flow details. 155b8bc95cdSAdrian Chadd */ 156b8bc95cdSAdrian Chadd static struct netisr_handler ip_direct_nh = { 157b8bc95cdSAdrian Chadd .nh_name = "ip_direct", 158b8bc95cdSAdrian Chadd .nh_handler = ip_direct_input, 159b8bc95cdSAdrian Chadd .nh_proto = NETISR_IP_DIRECT, 160499baf0aSAdrian Chadd .nh_m2cpuid = rss_soft_m2cpuid_v4, 161b8bc95cdSAdrian Chadd .nh_policy = NETISR_POLICY_CPU, 162b8bc95cdSAdrian Chadd .nh_dispatch = NETISR_DISPATCH_HYBRID, 163b8bc95cdSAdrian Chadd }; 164b8bc95cdSAdrian Chadd #endif 165b8bc95cdSAdrian Chadd 166df8bae1dSRodney W. Grimes extern struct domain inetdomain; 167f0ffb944SJulian Elischer extern struct protosw inetsw[]; 168df8bae1dSRodney W. Grimes u_char ip_protox[IPPROTO_MAX]; 16982cea7e6SBjoern A. Zeeb VNET_DEFINE(struct in_ifaddrhead, in_ifaddrhead); /* first inet address */ 17082cea7e6SBjoern A. Zeeb VNET_DEFINE(struct in_ifaddrhashhead *, in_ifaddrhashtbl); /* inet addr hash table */ 17182cea7e6SBjoern A. Zeeb VNET_DEFINE(u_long, in_ifaddrhmask); /* mask for hash table */ 172ca925d9cSJonathan Lemon 173c8ee75f2SGleb Smirnoff /* Make sure it is safe to use hashinit(9) on CK_LIST. */ 174c8ee75f2SGleb Smirnoff CTASSERT(sizeof(struct in_ifaddrhashhead) == sizeof(LIST_HEAD(, in_addr))); 175c8ee75f2SGleb Smirnoff 1760312fbe9SPoul-Henning Kamp #ifdef IPCTL_DEFMTU 1770312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW, 1783d177f46SBill Fumerola &ip_mtu, 0, "Default MTU"); 1790312fbe9SPoul-Henning Kamp #endif 1800312fbe9SPoul-Henning Kamp 1811b968362SDag-Erling Smørgrav #ifdef IPSTEALTH 18282cea7e6SBjoern A. Zeeb VNET_DEFINE(int, ipstealth); 1836df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_VNET | CTLFLAG_RW, 184eddfbb76SRobert Watson &VNET_NAME(ipstealth), 0, 185eddfbb76SRobert Watson "IP stealth mode, no TTL decrementation on forwarding"); 1861b968362SDag-Erling Smørgrav #endif 187eddfbb76SRobert Watson 188315e3e38SRobert Watson /* 1895da0521fSAndrey V. Elsukov * IP statistics are stored in the "array" of counter(9)s. 1905923c293SGleb Smirnoff */ 1915da0521fSAndrey V. Elsukov VNET_PCPUSTAT_DEFINE(struct ipstat, ipstat); 1925da0521fSAndrey V. Elsukov VNET_PCPUSTAT_SYSINIT(ipstat); 1935da0521fSAndrey V. Elsukov SYSCTL_VNET_PCPUSTAT(_net_inet_ip, IPCTL_STATS, stats, struct ipstat, ipstat, 1945da0521fSAndrey V. Elsukov "IP statistics (struct ipstat, netinet/ip_var.h)"); 1955923c293SGleb Smirnoff 1965923c293SGleb Smirnoff #ifdef VIMAGE 1975da0521fSAndrey V. Elsukov VNET_PCPUSTAT_SYSUNINIT(ipstat); 1985923c293SGleb Smirnoff #endif /* VIMAGE */ 1995923c293SGleb Smirnoff 2005923c293SGleb Smirnoff /* 201315e3e38SRobert Watson * Kernel module interface for updating ipstat. The argument is an index 2025923c293SGleb Smirnoff * into ipstat treated as an array. 203315e3e38SRobert Watson */ 204315e3e38SRobert Watson void 205315e3e38SRobert Watson kmod_ipstat_inc(int statnum) 206315e3e38SRobert Watson { 207315e3e38SRobert Watson 2085da0521fSAndrey V. Elsukov counter_u64_add(VNET(ipstat)[statnum], 1); 209315e3e38SRobert Watson } 210315e3e38SRobert Watson 211315e3e38SRobert Watson void 212315e3e38SRobert Watson kmod_ipstat_dec(int statnum) 213315e3e38SRobert Watson { 214315e3e38SRobert Watson 2155da0521fSAndrey V. Elsukov counter_u64_add(VNET(ipstat)[statnum], -1); 216315e3e38SRobert Watson } 217315e3e38SRobert Watson 218d4b5cae4SRobert Watson static int 219d4b5cae4SRobert Watson sysctl_netinet_intr_queue_maxlen(SYSCTL_HANDLER_ARGS) 220d4b5cae4SRobert Watson { 221d4b5cae4SRobert Watson int error, qlimit; 222d4b5cae4SRobert Watson 223d4b5cae4SRobert Watson netisr_getqlimit(&ip_nh, &qlimit); 224d4b5cae4SRobert Watson error = sysctl_handle_int(oidp, &qlimit, 0, req); 225d4b5cae4SRobert Watson if (error || !req->newptr) 226d4b5cae4SRobert Watson return (error); 227d4b5cae4SRobert Watson if (qlimit < 1) 228d4b5cae4SRobert Watson return (EINVAL); 229d4b5cae4SRobert Watson return (netisr_setqlimit(&ip_nh, qlimit)); 230d4b5cae4SRobert Watson } 231d4b5cae4SRobert Watson SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen, 2327029da5cSPawel Biernacki CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, 2337029da5cSPawel Biernacki sysctl_netinet_intr_queue_maxlen, "I", 234d4b5cae4SRobert Watson "Maximum size of the IP input queue"); 235d4b5cae4SRobert Watson 236d4b5cae4SRobert Watson static int 237d4b5cae4SRobert Watson sysctl_netinet_intr_queue_drops(SYSCTL_HANDLER_ARGS) 238d4b5cae4SRobert Watson { 239d4b5cae4SRobert Watson u_int64_t qdrops_long; 240d4b5cae4SRobert Watson int error, qdrops; 241d4b5cae4SRobert Watson 242d4b5cae4SRobert Watson netisr_getqdrops(&ip_nh, &qdrops_long); 243d4b5cae4SRobert Watson qdrops = qdrops_long; 244d4b5cae4SRobert Watson error = sysctl_handle_int(oidp, &qdrops, 0, req); 245d4b5cae4SRobert Watson if (error || !req->newptr) 246d4b5cae4SRobert Watson return (error); 247d4b5cae4SRobert Watson if (qdrops != 0) 248d4b5cae4SRobert Watson return (EINVAL); 249d4b5cae4SRobert Watson netisr_clearqdrops(&ip_nh); 250d4b5cae4SRobert Watson return (0); 251d4b5cae4SRobert Watson } 252d4b5cae4SRobert Watson 253d4b5cae4SRobert Watson SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops, 2547029da5cSPawel Biernacki CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 2557029da5cSPawel Biernacki 0, 0, sysctl_netinet_intr_queue_drops, "I", 256d4b5cae4SRobert Watson "Number of packets dropped from the IP input queue"); 257d4b5cae4SRobert Watson 258b8bc95cdSAdrian Chadd #ifdef RSS 259b8bc95cdSAdrian Chadd static int 260b8bc95cdSAdrian Chadd sysctl_netinet_intr_direct_queue_maxlen(SYSCTL_HANDLER_ARGS) 261b8bc95cdSAdrian Chadd { 262b8bc95cdSAdrian Chadd int error, qlimit; 263b8bc95cdSAdrian Chadd 264b8bc95cdSAdrian Chadd netisr_getqlimit(&ip_direct_nh, &qlimit); 265b8bc95cdSAdrian Chadd error = sysctl_handle_int(oidp, &qlimit, 0, req); 266b8bc95cdSAdrian Chadd if (error || !req->newptr) 267b8bc95cdSAdrian Chadd return (error); 268b8bc95cdSAdrian Chadd if (qlimit < 1) 269b8bc95cdSAdrian Chadd return (EINVAL); 270b8bc95cdSAdrian Chadd return (netisr_setqlimit(&ip_direct_nh, qlimit)); 271b8bc95cdSAdrian Chadd } 2727faa0d21SAndrey V. Elsukov SYSCTL_PROC(_net_inet_ip, IPCTL_INTRDQMAXLEN, intr_direct_queue_maxlen, 2737029da5cSPawel Biernacki CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 2747029da5cSPawel Biernacki 0, 0, sysctl_netinet_intr_direct_queue_maxlen, 2757faa0d21SAndrey V. Elsukov "I", "Maximum size of the IP direct input queue"); 276b8bc95cdSAdrian Chadd 277b8bc95cdSAdrian Chadd static int 278b8bc95cdSAdrian Chadd sysctl_netinet_intr_direct_queue_drops(SYSCTL_HANDLER_ARGS) 279b8bc95cdSAdrian Chadd { 280b8bc95cdSAdrian Chadd u_int64_t qdrops_long; 281b8bc95cdSAdrian Chadd int error, qdrops; 282b8bc95cdSAdrian Chadd 283b8bc95cdSAdrian Chadd netisr_getqdrops(&ip_direct_nh, &qdrops_long); 284b8bc95cdSAdrian Chadd qdrops = qdrops_long; 285b8bc95cdSAdrian Chadd error = sysctl_handle_int(oidp, &qdrops, 0, req); 286b8bc95cdSAdrian Chadd if (error || !req->newptr) 287b8bc95cdSAdrian Chadd return (error); 288b8bc95cdSAdrian Chadd if (qdrops != 0) 289b8bc95cdSAdrian Chadd return (EINVAL); 290b8bc95cdSAdrian Chadd netisr_clearqdrops(&ip_direct_nh); 291b8bc95cdSAdrian Chadd return (0); 292b8bc95cdSAdrian Chadd } 293b8bc95cdSAdrian Chadd 2947faa0d21SAndrey V. Elsukov SYSCTL_PROC(_net_inet_ip, IPCTL_INTRDQDROPS, intr_direct_queue_drops, 2957029da5cSPawel Biernacki CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, 2967029da5cSPawel Biernacki sysctl_netinet_intr_direct_queue_drops, "I", 297b8bc95cdSAdrian Chadd "Number of packets dropped from the IP direct input queue"); 298b8bc95cdSAdrian Chadd #endif /* RSS */ 299b8bc95cdSAdrian Chadd 300df8bae1dSRodney W. Grimes /* 301df8bae1dSRodney W. Grimes * IP initialization: fill in IP protocol switch table. 302df8bae1dSRodney W. Grimes * All protocols not implemented in kernel go to raw IP protocol handler. 303df8bae1dSRodney W. Grimes */ 304*89128ff3SGleb Smirnoff static void 305*89128ff3SGleb Smirnoff ip_vnet_init(void *arg __unused) 306df8bae1dSRodney W. Grimes { 307b252313fSGleb Smirnoff struct pfil_head_args args; 308df8bae1dSRodney W. Grimes 309d7c5a620SMatt Macy CK_STAILQ_INIT(&V_in_ifaddrhead); 310603724d3SBjoern A. Zeeb V_in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &V_in_ifaddrhmask); 3111ed81b73SMarko Zec 3121ed81b73SMarko Zec /* Initialize IP reassembly queue. */ 3131dbefcc0SGleb Smirnoff ipreass_init(); 3141ed81b73SMarko Zec 3150b4b0b0fSJulian Elischer /* Initialize packet filter hooks. */ 316b252313fSGleb Smirnoff args.pa_version = PFIL_VERSION; 317b252313fSGleb Smirnoff args.pa_flags = PFIL_IN | PFIL_OUT; 318b252313fSGleb Smirnoff args.pa_type = PFIL_TYPE_IP4; 319b252313fSGleb Smirnoff args.pa_headname = PFIL_INET_NAME; 320b252313fSGleb Smirnoff V_inet_pfil_head = pfil_head_register(&args); 3210b4b0b0fSJulian Elischer 322ef91a976SAndrey V. Elsukov if (hhook_head_register(HHOOK_TYPE_IPSEC_IN, AF_INET, 323ef91a976SAndrey V. Elsukov &V_ipsec_hhh_in[HHOOK_IPSEC_INET], 324ef91a976SAndrey V. Elsukov HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0) 325ef91a976SAndrey V. Elsukov printf("%s: WARNING: unable to register input helper hook\n", 326ef91a976SAndrey V. Elsukov __func__); 327ef91a976SAndrey V. Elsukov if (hhook_head_register(HHOOK_TYPE_IPSEC_OUT, AF_INET, 328ef91a976SAndrey V. Elsukov &V_ipsec_hhh_out[HHOOK_IPSEC_INET], 329ef91a976SAndrey V. Elsukov HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0) 330ef91a976SAndrey V. Elsukov printf("%s: WARNING: unable to register output helper hook\n", 331ef91a976SAndrey V. Elsukov __func__); 332ef91a976SAndrey V. Elsukov 333484149deSBjoern A. Zeeb #ifdef VIMAGE 334484149deSBjoern A. Zeeb netisr_register_vnet(&ip_nh); 335484149deSBjoern A. Zeeb #ifdef RSS 336484149deSBjoern A. Zeeb netisr_register_vnet(&ip_direct_nh); 337484149deSBjoern A. Zeeb #endif 338484149deSBjoern A. Zeeb #endif 339*89128ff3SGleb Smirnoff } 340*89128ff3SGleb Smirnoff VNET_SYSINIT(ip_vnet_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, 341*89128ff3SGleb Smirnoff ip_vnet_init, NULL); 342*89128ff3SGleb Smirnoff 343*89128ff3SGleb Smirnoff 344*89128ff3SGleb Smirnoff static void 345*89128ff3SGleb Smirnoff ip_init(const void *unused __unused) 346*89128ff3SGleb Smirnoff { 347*89128ff3SGleb Smirnoff struct protosw *pr; 3481ed81b73SMarko Zec 349f0ffb944SJulian Elischer pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 350*89128ff3SGleb Smirnoff KASSERT(pr, ("%s: PF_INET not found", __func__)); 351db09bef3SAndre Oppermann 352db09bef3SAndre Oppermann /* Initialize the entire ip_protox[] array to IPPROTO_RAW. */ 353*89128ff3SGleb Smirnoff for (int i = 0; i < IPPROTO_MAX; i++) 354df8bae1dSRodney W. Grimes ip_protox[i] = pr - inetsw; 355db09bef3SAndre Oppermann /* 356db09bef3SAndre Oppermann * Cycle through IP protocols and put them into the appropriate place 357db09bef3SAndre Oppermann * in ip_protox[]. 358db09bef3SAndre Oppermann */ 359f0ffb944SJulian Elischer for (pr = inetdomain.dom_protosw; 360f0ffb944SJulian Elischer pr < inetdomain.dom_protoswNPROTOSW; pr++) 361df8bae1dSRodney W. Grimes if (pr->pr_domain->dom_family == PF_INET && 362db09bef3SAndre Oppermann pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) { 363db09bef3SAndre Oppermann /* Be careful to only index valid IP protocols. */ 364db77984cSSam Leffler if (pr->pr_protocol < IPPROTO_MAX) 365df8bae1dSRodney W. Grimes ip_protox[pr->pr_protocol] = pr - inetsw; 366db09bef3SAndre Oppermann } 367194a213eSAndrey A. Chernov 368d4b5cae4SRobert Watson netisr_register(&ip_nh); 369b8bc95cdSAdrian Chadd #ifdef RSS 370b8bc95cdSAdrian Chadd netisr_register(&ip_direct_nh); 371b8bc95cdSAdrian Chadd #endif 372df8bae1dSRodney W. Grimes } 373*89128ff3SGleb Smirnoff SYSINIT(ip_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, ip_init, NULL); 374df8bae1dSRodney W. Grimes 3759802380eSBjoern A. Zeeb #ifdef VIMAGE 3763f58662dSBjoern A. Zeeb static void 3773f58662dSBjoern A. Zeeb ip_destroy(void *unused __unused) 3789802380eSBjoern A. Zeeb { 379ef91a976SAndrey V. Elsukov int error; 3804d3dfd45SMikolaj Golub 381484149deSBjoern A. Zeeb #ifdef RSS 382484149deSBjoern A. Zeeb netisr_unregister_vnet(&ip_direct_nh); 383484149deSBjoern A. Zeeb #endif 384484149deSBjoern A. Zeeb netisr_unregister_vnet(&ip_nh); 385484149deSBjoern A. Zeeb 386b252313fSGleb Smirnoff pfil_head_unregister(V_inet_pfil_head); 387ef91a976SAndrey V. Elsukov error = hhook_head_deregister(V_ipsec_hhh_in[HHOOK_IPSEC_INET]); 388ef91a976SAndrey V. Elsukov if (error != 0) { 389ef91a976SAndrey V. Elsukov printf("%s: WARNING: unable to deregister input helper hook " 390ef91a976SAndrey V. Elsukov "type HHOOK_TYPE_IPSEC_IN, id HHOOK_IPSEC_INET: " 391ef91a976SAndrey V. Elsukov "error %d returned\n", __func__, error); 392ef91a976SAndrey V. Elsukov } 393ef91a976SAndrey V. Elsukov error = hhook_head_deregister(V_ipsec_hhh_out[HHOOK_IPSEC_INET]); 394ef91a976SAndrey V. Elsukov if (error != 0) { 395ef91a976SAndrey V. Elsukov printf("%s: WARNING: unable to deregister output helper hook " 396ef91a976SAndrey V. Elsukov "type HHOOK_TYPE_IPSEC_OUT, id HHOOK_IPSEC_INET: " 397ef91a976SAndrey V. Elsukov "error %d returned\n", __func__, error); 398ef91a976SAndrey V. Elsukov } 39989856f7eSBjoern A. Zeeb 40089856f7eSBjoern A. Zeeb /* Remove the IPv4 addresses from all interfaces. */ 40189856f7eSBjoern A. Zeeb in_ifscrub_all(); 40289856f7eSBjoern A. Zeeb 40389856f7eSBjoern A. Zeeb /* Make sure the IPv4 routes are gone as well. */ 404b1d63265SAlexander V. Chernikov rib_flush_routes_family(AF_INET); 4059802380eSBjoern A. Zeeb 406e3c2c634SGleb Smirnoff /* Destroy IP reassembly queue. */ 4071dbefcc0SGleb Smirnoff ipreass_destroy(); 40889856f7eSBjoern A. Zeeb 40989856f7eSBjoern A. Zeeb /* Cleanup in_ifaddr hash table; should be empty. */ 41089856f7eSBjoern A. Zeeb hashdestroy(V_in_ifaddrhashtbl, M_IFADDR, V_in_ifaddrhmask); 4119802380eSBjoern A. Zeeb } 4123f58662dSBjoern A. Zeeb 4133f58662dSBjoern A. Zeeb VNET_SYSUNINIT(ip, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, ip_destroy, NULL); 4149802380eSBjoern A. Zeeb #endif 4159802380eSBjoern A. Zeeb 416b8bc95cdSAdrian Chadd #ifdef RSS 417b8bc95cdSAdrian Chadd /* 418b8bc95cdSAdrian Chadd * IP direct input routine. 419b8bc95cdSAdrian Chadd * 420b8bc95cdSAdrian Chadd * This is called when reinjecting completed fragments where 421b8bc95cdSAdrian Chadd * all of the previous checking and book-keeping has been done. 422b8bc95cdSAdrian Chadd */ 423b8bc95cdSAdrian Chadd void 424b8bc95cdSAdrian Chadd ip_direct_input(struct mbuf *m) 425b8bc95cdSAdrian Chadd { 426b8bc95cdSAdrian Chadd struct ip *ip; 427b8bc95cdSAdrian Chadd int hlen; 428b8bc95cdSAdrian Chadd 429b8bc95cdSAdrian Chadd ip = mtod(m, struct ip *); 430b8bc95cdSAdrian Chadd hlen = ip->ip_hl << 2; 431b8bc95cdSAdrian Chadd 432fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT) 433fcf59617SAndrey V. Elsukov if (IPSEC_ENABLED(ipv4)) { 434fcf59617SAndrey V. Elsukov if (IPSEC_INPUT(ipv4, m, hlen, ip->ip_p) != 0) 435fcf59617SAndrey V. Elsukov return; 436fcf59617SAndrey V. Elsukov } 437fcf59617SAndrey V. Elsukov #endif /* IPSEC */ 438b8bc95cdSAdrian Chadd IPSTAT_INC(ips_delivered); 439b8bc95cdSAdrian Chadd (*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p); 440b8bc95cdSAdrian Chadd return; 441b8bc95cdSAdrian Chadd } 442b8bc95cdSAdrian Chadd #endif 443b8bc95cdSAdrian Chadd 4444d2e3692SLuigi Rizzo /* 445df8bae1dSRodney W. Grimes * Ip input routine. Checksum and byte swap header. If fragmented 446df8bae1dSRodney W. Grimes * try to reassemble. Process options. Pass to next level. 447df8bae1dSRodney W. Grimes */ 448c67b1d17SGarrett Wollman void 449c67b1d17SGarrett Wollman ip_input(struct mbuf *m) 450df8bae1dSRodney W. Grimes { 45165634ae7SWojciech Macek MROUTER_RLOCK_TRACKER; 4529188b4a1SAndre Oppermann struct ip *ip = NULL; 4535da9f8faSJosef Karthauser struct in_ifaddr *ia = NULL; 454ca925d9cSJonathan Lemon struct ifaddr *ifa; 4550aade26eSRobert Watson struct ifnet *ifp; 45694df3271SGleb Smirnoff int hlen = 0; 45721d172a3SGleb Smirnoff uint16_t sum, ip_len; 45802c1c707SAndre Oppermann int dchg = 0; /* dest changed after fw */ 459f51f805fSSam Leffler struct in_addr odst; /* original dst address */ 46094df3271SGleb Smirnoff bool strong_es; 461b715f178SLuigi Rizzo 462fe584538SDag-Erling Smørgrav M_ASSERTPKTHDR(m); 463b8a6e03fSGleb Smirnoff NET_EPOCH_ASSERT(); 464db40007dSAndrew R. Reiter 465ac9d7e26SMax Laier if (m->m_flags & M_FASTFWD_OURS) { 46676ff6dcfSAndre Oppermann m->m_flags &= ~M_FASTFWD_OURS; 46776ff6dcfSAndre Oppermann /* Set up some basics that will be used later. */ 4682b25acc1SLuigi Rizzo ip = mtod(m, struct ip *); 46953be11f6SPoul-Henning Kamp hlen = ip->ip_hl << 2; 4708f134647SGleb Smirnoff ip_len = ntohs(ip->ip_len); 4719b932e9eSAndre Oppermann goto ours; 4722b25acc1SLuigi Rizzo } 4732b25acc1SLuigi Rizzo 47486425c62SRobert Watson IPSTAT_INC(ips_total); 47558938916SGarrett Wollman 4760359e7a5SMateusz Guzik if (__predict_false(m->m_pkthdr.len < sizeof(struct ip))) 47758938916SGarrett Wollman goto tooshort; 47858938916SGarrett Wollman 4790359e7a5SMateusz Guzik if (m->m_len < sizeof(struct ip)) { 4800359e7a5SMateusz Guzik m = m_pullup(m, sizeof(struct ip)); 4810359e7a5SMateusz Guzik if (__predict_false(m == NULL)) { 48286425c62SRobert Watson IPSTAT_INC(ips_toosmall); 483c67b1d17SGarrett Wollman return; 484df8bae1dSRodney W. Grimes } 4850359e7a5SMateusz Guzik } 486df8bae1dSRodney W. Grimes ip = mtod(m, struct ip *); 48758938916SGarrett Wollman 4880359e7a5SMateusz Guzik if (__predict_false(ip->ip_v != IPVERSION)) { 48986425c62SRobert Watson IPSTAT_INC(ips_badvers); 490df8bae1dSRodney W. Grimes goto bad; 491df8bae1dSRodney W. Grimes } 49258938916SGarrett Wollman 49353be11f6SPoul-Henning Kamp hlen = ip->ip_hl << 2; 4940359e7a5SMateusz Guzik if (__predict_false(hlen < sizeof(struct ip))) { /* minimum header length */ 49586425c62SRobert Watson IPSTAT_INC(ips_badhlen); 496df8bae1dSRodney W. Grimes goto bad; 497df8bae1dSRodney W. Grimes } 498df8bae1dSRodney W. Grimes if (hlen > m->m_len) { 4990359e7a5SMateusz Guzik m = m_pullup(m, hlen); 5000359e7a5SMateusz Guzik if (__predict_false(m == NULL)) { 50186425c62SRobert Watson IPSTAT_INC(ips_badhlen); 502c67b1d17SGarrett Wollman return; 503df8bae1dSRodney W. Grimes } 504df8bae1dSRodney W. Grimes ip = mtod(m, struct ip *); 505df8bae1dSRodney W. Grimes } 50633841545SHajimu UMEMOTO 50757f60867SMark Johnston IP_PROBE(receive, NULL, NULL, ip, m->m_pkthdr.rcvif, ip, NULL); 50857f60867SMark Johnston 5096c1c6ae5SRodney W. Grimes /* IN_LOOPBACK must not appear on the wire - RFC1122 */ 5100aade26eSRobert Watson ifp = m->m_pkthdr.rcvif; 5116c1c6ae5SRodney W. Grimes if (IN_LOOPBACK(ntohl(ip->ip_dst.s_addr)) || 5126c1c6ae5SRodney W. Grimes IN_LOOPBACK(ntohl(ip->ip_src.s_addr))) { 5130aade26eSRobert Watson if ((ifp->if_flags & IFF_LOOPBACK) == 0) { 51486425c62SRobert Watson IPSTAT_INC(ips_badaddr); 51533841545SHajimu UMEMOTO goto bad; 51633841545SHajimu UMEMOTO } 51733841545SHajimu UMEMOTO } 51833841545SHajimu UMEMOTO 519db4f9cc7SJonathan Lemon if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) { 520db4f9cc7SJonathan Lemon sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID); 521db4f9cc7SJonathan Lemon } else { 52258938916SGarrett Wollman if (hlen == sizeof(struct ip)) { 52347c861ecSBrian Somers sum = in_cksum_hdr(ip); 52458938916SGarrett Wollman } else { 52547c861ecSBrian Somers sum = in_cksum(m, hlen); 52658938916SGarrett Wollman } 527db4f9cc7SJonathan Lemon } 5280359e7a5SMateusz Guzik if (__predict_false(sum)) { 52986425c62SRobert Watson IPSTAT_INC(ips_badsum); 530df8bae1dSRodney W. Grimes goto bad; 531df8bae1dSRodney W. Grimes } 532df8bae1dSRodney W. Grimes 53302b199f1SMax Laier #ifdef ALTQ 53402b199f1SMax Laier if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0) 53502b199f1SMax Laier /* packet is dropped by traffic conditioner */ 53602b199f1SMax Laier return; 53702b199f1SMax Laier #endif 53802b199f1SMax Laier 53921d172a3SGleb Smirnoff ip_len = ntohs(ip->ip_len); 5400359e7a5SMateusz Guzik if (__predict_false(ip_len < hlen)) { 54186425c62SRobert Watson IPSTAT_INC(ips_badlen); 542df8bae1dSRodney W. Grimes goto bad; 543df8bae1dSRodney W. Grimes } 544df8bae1dSRodney W. Grimes 545df8bae1dSRodney W. Grimes /* 546df8bae1dSRodney W. Grimes * Check that the amount of data in the buffers 547df8bae1dSRodney W. Grimes * is as at least much as the IP header would have us expect. 548df8bae1dSRodney W. Grimes * Trim mbufs if longer than we expect. 549df8bae1dSRodney W. Grimes * Drop packet if shorter than we expect. 550df8bae1dSRodney W. Grimes */ 5510359e7a5SMateusz Guzik if (__predict_false(m->m_pkthdr.len < ip_len)) { 55258938916SGarrett Wollman tooshort: 55386425c62SRobert Watson IPSTAT_INC(ips_tooshort); 554df8bae1dSRodney W. Grimes goto bad; 555df8bae1dSRodney W. Grimes } 55621d172a3SGleb Smirnoff if (m->m_pkthdr.len > ip_len) { 557df8bae1dSRodney W. Grimes if (m->m_len == m->m_pkthdr.len) { 55821d172a3SGleb Smirnoff m->m_len = ip_len; 55921d172a3SGleb Smirnoff m->m_pkthdr.len = ip_len; 560df8bae1dSRodney W. Grimes } else 56121d172a3SGleb Smirnoff m_adj(m, ip_len - m->m_pkthdr.len); 562df8bae1dSRodney W. Grimes } 563b8bc95cdSAdrian Chadd 564ad9f4d6aSAndrey V. Elsukov /* 565ad9f4d6aSAndrey V. Elsukov * Try to forward the packet, but if we fail continue. 566f389439fSBjoern A. Zeeb * ip_tryforward() may generate redirects these days. 567f389439fSBjoern A. Zeeb * XXX the logic below falling through to normal processing 568f389439fSBjoern A. Zeeb * if redirects are required should be revisited as well. 569ad9f4d6aSAndrey V. Elsukov * ip_tryforward() does inbound and outbound packet firewall 570ad9f4d6aSAndrey V. Elsukov * processing. If firewall has decided that destination becomes 571ad9f4d6aSAndrey V. Elsukov * our local address, it sets M_FASTFWD_OURS flag. In this 572ad9f4d6aSAndrey V. Elsukov * case skip another inbound firewall processing and update 573ad9f4d6aSAndrey V. Elsukov * ip pointer. 574ad9f4d6aSAndrey V. Elsukov */ 5758ad114c0SGeorge V. Neville-Neil if (V_ipforwarding != 0 576fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT) 577fcf59617SAndrey V. Elsukov && (!IPSEC_ENABLED(ipv4) || 578fcf59617SAndrey V. Elsukov IPSEC_CAPS(ipv4, m, IPSEC_CAP_OPERABLE) == 0) 579ad9f4d6aSAndrey V. Elsukov #endif 580ad9f4d6aSAndrey V. Elsukov ) { 581f389439fSBjoern A. Zeeb /* 582f389439fSBjoern A. Zeeb * ip_dooptions() was run so we can ignore the source route (or 583f389439fSBjoern A. Zeeb * any IP options case) case for redirects in ip_tryforward(). 584f389439fSBjoern A. Zeeb */ 585ad9f4d6aSAndrey V. Elsukov if ((m = ip_tryforward(m)) == NULL) 58633872124SGeorge V. Neville-Neil return; 587ad9f4d6aSAndrey V. Elsukov if (m->m_flags & M_FASTFWD_OURS) { 588ad9f4d6aSAndrey V. Elsukov m->m_flags &= ~M_FASTFWD_OURS; 589ad9f4d6aSAndrey V. Elsukov ip = mtod(m, struct ip *); 590ad9f4d6aSAndrey V. Elsukov goto ours; 591ad9f4d6aSAndrey V. Elsukov } 592ad9f4d6aSAndrey V. Elsukov } 593fcf59617SAndrey V. Elsukov 594fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT) 59514dd6717SSam Leffler /* 596ffe8cd7bSBjoern A. Zeeb * Bypass packet filtering for packets previously handled by IPsec. 59714dd6717SSam Leffler */ 598fcf59617SAndrey V. Elsukov if (IPSEC_ENABLED(ipv4) && 599fcf59617SAndrey V. Elsukov IPSEC_CAPS(ipv4, m, IPSEC_CAP_BYPASS_FILTER) != 0) 600c21fd232SAndre Oppermann goto passin; 601ad9f4d6aSAndrey V. Elsukov #endif 602fcf59617SAndrey V. Elsukov 603c4ac87eaSDarren Reed /* 604134ea224SSam Leffler * Run through list of hooks for input packets. 605f51f805fSSam Leffler * 606f51f805fSSam Leffler * NB: Beware of the destination address changing (e.g. 607f51f805fSSam Leffler * by NAT rewriting). When this happens, tell 608f51f805fSSam Leffler * ip_forward to do the right thing. 609c4ac87eaSDarren Reed */ 610c21fd232SAndre Oppermann 611c21fd232SAndre Oppermann /* Jump over all PFIL processing if hooks are not active. */ 612b252313fSGleb Smirnoff if (!PFIL_HOOKED_IN(V_inet_pfil_head)) 613c21fd232SAndre Oppermann goto passin; 614c21fd232SAndre Oppermann 615f51f805fSSam Leffler odst = ip->ip_dst; 616b252313fSGleb Smirnoff if (pfil_run_hooks(V_inet_pfil_head, &m, ifp, PFIL_IN, NULL) != 617b252313fSGleb Smirnoff PFIL_PASS) 618beec8214SDarren Reed return; 619134ea224SSam Leffler if (m == NULL) /* consumed by filter */ 620c4ac87eaSDarren Reed return; 6219b932e9eSAndre Oppermann 622c4ac87eaSDarren Reed ip = mtod(m, struct ip *); 62302c1c707SAndre Oppermann dchg = (odst.s_addr != ip->ip_dst.s_addr); 6249b932e9eSAndre Oppermann 6259b932e9eSAndre Oppermann if (m->m_flags & M_FASTFWD_OURS) { 6269b932e9eSAndre Oppermann m->m_flags &= ~M_FASTFWD_OURS; 6279b932e9eSAndre Oppermann goto ours; 6289b932e9eSAndre Oppermann } 629ffdbf9daSAndrey V. Elsukov if (m->m_flags & M_IP_NEXTHOP) { 630de89d74bSLuiz Otavio O Souza if (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL) { 631099dd043SAndre Oppermann /* 632ffdbf9daSAndrey V. Elsukov * Directly ship the packet on. This allows 633ffdbf9daSAndrey V. Elsukov * forwarding packets originally destined to us 634ffdbf9daSAndrey V. Elsukov * to some other directly connected host. 635099dd043SAndre Oppermann */ 636ffdbf9daSAndrey V. Elsukov ip_forward(m, 1); 637099dd043SAndre Oppermann return; 638099dd043SAndre Oppermann } 639ffdbf9daSAndrey V. Elsukov } 640c21fd232SAndre Oppermann passin: 64121d172a3SGleb Smirnoff 64221d172a3SGleb Smirnoff /* 643df8bae1dSRodney W. Grimes * Process options and, if not destined for us, 644df8bae1dSRodney W. Grimes * ship it on. ip_dooptions returns 1 when an 645df8bae1dSRodney W. Grimes * error was detected (causing an icmp message 646df8bae1dSRodney W. Grimes * to be sent and the original packet to be freed). 647df8bae1dSRodney W. Grimes */ 6489b932e9eSAndre Oppermann if (hlen > sizeof (struct ip) && ip_dooptions(m, 0)) 649c67b1d17SGarrett Wollman return; 650df8bae1dSRodney W. Grimes 651f0068c4aSGarrett Wollman /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no 652f0068c4aSGarrett Wollman * matter if it is destined to another node, or whether it is 653f0068c4aSGarrett Wollman * a multicast one, RSVP wants it! and prevents it from being forwarded 654f0068c4aSGarrett Wollman * anywhere else. Also checks if the rsvp daemon is running before 655f0068c4aSGarrett Wollman * grabbing the packet. 656f0068c4aSGarrett Wollman */ 6570359e7a5SMateusz Guzik if (ip->ip_p == IPPROTO_RSVP && V_rsvp_on) 658f0068c4aSGarrett Wollman goto ours; 659f0068c4aSGarrett Wollman 660df8bae1dSRodney W. Grimes /* 661df8bae1dSRodney W. Grimes * Check our list of addresses, to see if the packet is for us. 662cc766e04SGarrett Wollman * If we don't have any addresses, assume any unicast packet 663cc766e04SGarrett Wollman * we receive might be for us (and let the upper layers deal 664cc766e04SGarrett Wollman * with it). 665df8bae1dSRodney W. Grimes */ 666d7c5a620SMatt Macy if (CK_STAILQ_EMPTY(&V_in_ifaddrhead) && 667cc766e04SGarrett Wollman (m->m_flags & (M_MCAST|M_BCAST)) == 0) 668cc766e04SGarrett Wollman goto ours; 669cc766e04SGarrett Wollman 6707538a9a0SJonathan Lemon /* 671823db0e9SDon Lewis * Enable a consistency check between the destination address 672823db0e9SDon Lewis * and the arrival interface for a unicast packet (the RFC 1122 67394df3271SGleb Smirnoff * strong ES model) with a list of additional predicates: 67494df3271SGleb Smirnoff * - if IP forwarding is disabled 67594df3271SGleb Smirnoff * - the packet is not locally generated 67694df3271SGleb Smirnoff * - the packet is not subject to 'ipfw fwd' 67794df3271SGleb Smirnoff * - Interface is not running CARP. If the packet got here, we already 67894df3271SGleb Smirnoff * checked it with carp_iamatch() and carp_forus(). 679823db0e9SDon Lewis */ 68094df3271SGleb Smirnoff strong_es = V_ip_strong_es && (V_ipforwarding == 0) && 68181674f12SGleb Smirnoff ((ifp->if_flags & IFF_LOOPBACK) == 0) && 68254bfbd51SWill Andrews ifp->if_carp == NULL && (dchg == 0); 683823db0e9SDon Lewis 684ca925d9cSJonathan Lemon /* 685ca925d9cSJonathan Lemon * Check for exact addresses in the hash bucket. 686ca925d9cSJonathan Lemon */ 687c8ee75f2SGleb Smirnoff CK_LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) { 68894df3271SGleb Smirnoff if (IA_SIN(ia)->sin_addr.s_addr != ip->ip_dst.s_addr) 68994df3271SGleb Smirnoff continue; 69094df3271SGleb Smirnoff 691f9e354dfSJulian Elischer /* 69294df3271SGleb Smirnoff * net.inet.ip.rfc1122_strong_es: the address matches, verify 69394df3271SGleb Smirnoff * that the packet arrived via the correct interface. 694f9e354dfSJulian Elischer */ 69594df3271SGleb Smirnoff if (__predict_false(strong_es && ia->ia_ifp != ifp)) { 69694df3271SGleb Smirnoff IPSTAT_INC(ips_badaddr); 69794df3271SGleb Smirnoff goto bad; 698ca925d9cSJonathan Lemon } 69994df3271SGleb Smirnoff 7002ce85919SGleb Smirnoff /* 7012ce85919SGleb Smirnoff * net.inet.ip.source_address_validation: drop incoming 7022ce85919SGleb Smirnoff * packets that pretend to be ours. 7032ce85919SGleb Smirnoff */ 7042ce85919SGleb Smirnoff if (V_ip_sav && !(ifp->if_flags & IFF_LOOPBACK) && 7052ce85919SGleb Smirnoff __predict_false(in_localip_fib(ip->ip_src, ifp->if_fib))) { 7062ce85919SGleb Smirnoff IPSTAT_INC(ips_badaddr); 7072ce85919SGleb Smirnoff goto bad; 7082ce85919SGleb Smirnoff } 7092ce85919SGleb Smirnoff 71094df3271SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ipackets, 1); 71194df3271SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ibytes, m->m_pkthdr.len); 71294df3271SGleb Smirnoff goto ours; 7138c0fec80SRobert Watson } 7142d9cfabaSRobert Watson 715823db0e9SDon Lewis /* 716ca925d9cSJonathan Lemon * Check for broadcast addresses. 717ca925d9cSJonathan Lemon * 718ca925d9cSJonathan Lemon * Only accept broadcast packets that arrive via the matching 719ca925d9cSJonathan Lemon * interface. Reception of forwarded directed broadcasts would 720ca925d9cSJonathan Lemon * be handled via ip_forward() and ether_output() with the loopback 721ca925d9cSJonathan Lemon * into the stack for SIMPLEX interfaces handled by ether_output(). 722823db0e9SDon Lewis */ 72381674f12SGleb Smirnoff if (ifp->if_flags & IFF_BROADCAST) { 724d7c5a620SMatt Macy CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 725ca925d9cSJonathan Lemon if (ifa->ifa_addr->sa_family != AF_INET) 726ca925d9cSJonathan Lemon continue; 727ca925d9cSJonathan Lemon ia = ifatoia(ifa); 728df8bae1dSRodney W. Grimes if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr == 7290aade26eSRobert Watson ip->ip_dst.s_addr) { 7307caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ipackets, 1); 7317caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ibytes, 7327caf4ab7SGleb Smirnoff m->m_pkthdr.len); 733df8bae1dSRodney W. Grimes goto ours; 7340aade26eSRobert Watson } 7350ac40133SBrian Somers #ifdef BOOTP_COMPAT 7360aade26eSRobert Watson if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY) { 7377caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ipackets, 1); 7387caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ibytes, 7397caf4ab7SGleb Smirnoff m->m_pkthdr.len); 740ca925d9cSJonathan Lemon goto ours; 7410aade26eSRobert Watson } 7420ac40133SBrian Somers #endif 743df8bae1dSRodney W. Grimes } 74419e5b0a7SRobert Watson ia = NULL; 745df8bae1dSRodney W. Grimes } 746df8bae1dSRodney W. Grimes if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { 74765634ae7SWojciech Macek MROUTER_RLOCK(); 74803b0505bSZhenlei Huang /* 74903b0505bSZhenlei Huang * RFC 3927 2.7: Do not forward multicast packets from 75003b0505bSZhenlei Huang * IN_LINKLOCAL. 75103b0505bSZhenlei Huang */ 7523d846e48SZhenlei Huang if (V_ip_mrouter && !IN_LINKLOCAL(ntohl(ip->ip_src.s_addr))) { 753df8bae1dSRodney W. Grimes /* 754df8bae1dSRodney W. Grimes * If we are acting as a multicast router, all 755df8bae1dSRodney W. Grimes * incoming multicast packets are passed to the 756df8bae1dSRodney W. Grimes * kernel-level multicast forwarding function. 757df8bae1dSRodney W. Grimes * The packet is returned (relatively) intact; if 758df8bae1dSRodney W. Grimes * ip_mforward() returns a non-zero value, the packet 759df8bae1dSRodney W. Grimes * must be discarded, else it may be accepted below. 760df8bae1dSRodney W. Grimes */ 7610aade26eSRobert Watson if (ip_mforward && ip_mforward(ip, ifp, m, 0) != 0) { 76265634ae7SWojciech Macek MROUTER_RUNLOCK(); 76386425c62SRobert Watson IPSTAT_INC(ips_cantforward); 764df8bae1dSRodney W. Grimes m_freem(m); 765c67b1d17SGarrett Wollman return; 766df8bae1dSRodney W. Grimes } 767df8bae1dSRodney W. Grimes 768df8bae1dSRodney W. Grimes /* 76911612afaSDima Dorfman * The process-level routing daemon needs to receive 770df8bae1dSRodney W. Grimes * all multicast IGMP packets, whether or not this 771df8bae1dSRodney W. Grimes * host belongs to their destination groups. 772df8bae1dSRodney W. Grimes */ 77365634ae7SWojciech Macek if (ip->ip_p == IPPROTO_IGMP) { 77465634ae7SWojciech Macek MROUTER_RUNLOCK(); 775df8bae1dSRodney W. Grimes goto ours; 77665634ae7SWojciech Macek } 77786425c62SRobert Watson IPSTAT_INC(ips_forward); 778df8bae1dSRodney W. Grimes } 77965634ae7SWojciech Macek MROUTER_RUNLOCK(); 780df8bae1dSRodney W. Grimes /* 781d10910e6SBruce M Simpson * Assume the packet is for us, to avoid prematurely taking 782d10910e6SBruce M Simpson * a lock on the in_multi hash. Protocols must perform 783d10910e6SBruce M Simpson * their own filtering and update statistics accordingly. 784df8bae1dSRodney W. Grimes */ 785df8bae1dSRodney W. Grimes goto ours; 786df8bae1dSRodney W. Grimes } 787df8bae1dSRodney W. Grimes if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST) 788df8bae1dSRodney W. Grimes goto ours; 789df8bae1dSRodney W. Grimes if (ip->ip_dst.s_addr == INADDR_ANY) 790df8bae1dSRodney W. Grimes goto ours; 79103b0505bSZhenlei Huang /* RFC 3927 2.7: Do not forward packets to or from IN_LINKLOCAL. */ 7923d846e48SZhenlei Huang if (IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr)) || 7933d846e48SZhenlei Huang IN_LINKLOCAL(ntohl(ip->ip_src.s_addr))) { 7943d846e48SZhenlei Huang IPSTAT_INC(ips_cantforward); 7953d846e48SZhenlei Huang m_freem(m); 7963d846e48SZhenlei Huang return; 7973d846e48SZhenlei Huang } 798df8bae1dSRodney W. Grimes 7996a800098SYoshinobu Inoue /* 800df8bae1dSRodney W. Grimes * Not for us; forward if possible and desirable. 801df8bae1dSRodney W. Grimes */ 802603724d3SBjoern A. Zeeb if (V_ipforwarding == 0) { 80386425c62SRobert Watson IPSTAT_INC(ips_cantforward); 804df8bae1dSRodney W. Grimes m_freem(m); 805546f251bSChris D. Faulhaber } else { 8069b932e9eSAndre Oppermann ip_forward(m, dchg); 807546f251bSChris D. Faulhaber } 808c67b1d17SGarrett Wollman return; 809df8bae1dSRodney W. Grimes 810df8bae1dSRodney W. Grimes ours: 811d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH 812d0ebc0d2SYaroslav Tykhiy /* 813d0ebc0d2SYaroslav Tykhiy * IPSTEALTH: Process non-routing options only 814d0ebc0d2SYaroslav Tykhiy * if the packet is destined for us. 815d0ebc0d2SYaroslav Tykhiy */ 8167caf4ab7SGleb Smirnoff if (V_ipstealth && hlen > sizeof (struct ip) && ip_dooptions(m, 1)) 817d0ebc0d2SYaroslav Tykhiy return; 818d0ebc0d2SYaroslav Tykhiy #endif /* IPSTEALTH */ 819d0ebc0d2SYaroslav Tykhiy 82063f8d699SJordan K. Hubbard /* 821b6ea1aa5SRuslan Ermilov * Attempt reassembly; if it succeeds, proceed. 822ac9d7e26SMax Laier * ip_reass() will return a different mbuf. 823df8bae1dSRodney W. Grimes */ 8248f134647SGleb Smirnoff if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) { 825aa69c612SGleb Smirnoff /* XXXGL: shouldn't we save & set m_flags? */ 826f0cada84SAndre Oppermann m = ip_reass(m); 827f0cada84SAndre Oppermann if (m == NULL) 828c67b1d17SGarrett Wollman return; 8296a800098SYoshinobu Inoue ip = mtod(m, struct ip *); 8307e2df452SRuslan Ermilov /* Get the header length of the reassembled packet */ 83153be11f6SPoul-Henning Kamp hlen = ip->ip_hl << 2; 832f0cada84SAndre Oppermann } 833f0cada84SAndre Oppermann 834fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT) 835fcf59617SAndrey V. Elsukov if (IPSEC_ENABLED(ipv4)) { 836fcf59617SAndrey V. Elsukov if (IPSEC_INPUT(ipv4, m, hlen, ip->ip_p) != 0) 837fcf59617SAndrey V. Elsukov return; 838fcf59617SAndrey V. Elsukov } 839b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */ 84033841545SHajimu UMEMOTO 841df8bae1dSRodney W. Grimes /* 842df8bae1dSRodney W. Grimes * Switch out to protocol's input routine. 843df8bae1dSRodney W. Grimes */ 84486425c62SRobert Watson IPSTAT_INC(ips_delivered); 8459b932e9eSAndre Oppermann 8468f5a8818SKevin Lo (*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p); 847c67b1d17SGarrett Wollman return; 848df8bae1dSRodney W. Grimes bad: 849df8bae1dSRodney W. Grimes m_freem(m); 850c67b1d17SGarrett Wollman } 851c67b1d17SGarrett Wollman 852c67b1d17SGarrett Wollman /* 853df8bae1dSRodney W. Grimes * IP timer processing; 854df8bae1dSRodney W. Grimes * if a timer expires on a reassembly 855df8bae1dSRodney W. Grimes * queue, discard it. 856df8bae1dSRodney W. Grimes */ 857df8bae1dSRodney W. Grimes void 858f2565d68SRobert Watson ip_slowtimo(void) 859df8bae1dSRodney W. Grimes { 8608b615593SMarko Zec VNET_ITERATOR_DECL(vnet_iter); 861df8bae1dSRodney W. Grimes 8625ee847d3SRobert Watson VNET_LIST_RLOCK_NOSLEEP(); 8638b615593SMarko Zec VNET_FOREACH(vnet_iter) { 8648b615593SMarko Zec CURVNET_SET(vnet_iter); 8651dbefcc0SGleb Smirnoff ipreass_slowtimo(); 8668b615593SMarko Zec CURVNET_RESTORE(); 8678b615593SMarko Zec } 8685ee847d3SRobert Watson VNET_LIST_RUNLOCK_NOSLEEP(); 869df8bae1dSRodney W. Grimes } 870df8bae1dSRodney W. Grimes 8719802380eSBjoern A. Zeeb void 8729802380eSBjoern A. Zeeb ip_drain(void) 8739802380eSBjoern A. Zeeb { 8749802380eSBjoern A. Zeeb VNET_ITERATOR_DECL(vnet_iter); 8759802380eSBjoern A. Zeeb 8769802380eSBjoern A. Zeeb VNET_LIST_RLOCK_NOSLEEP(); 8779802380eSBjoern A. Zeeb VNET_FOREACH(vnet_iter) { 8789802380eSBjoern A. Zeeb CURVNET_SET(vnet_iter); 8791dbefcc0SGleb Smirnoff ipreass_drain(); 8808b615593SMarko Zec CURVNET_RESTORE(); 8818b615593SMarko Zec } 8825ee847d3SRobert Watson VNET_LIST_RUNLOCK_NOSLEEP(); 883df8bae1dSRodney W. Grimes } 884df8bae1dSRodney W. Grimes 885df8bae1dSRodney W. Grimes /* 886de38924dSAndre Oppermann * The protocol to be inserted into ip_protox[] must be already registered 887de38924dSAndre Oppermann * in inetsw[], either statically or through pf_proto_register(). 888de38924dSAndre Oppermann */ 889de38924dSAndre Oppermann int 8901b48d245SBjoern A. Zeeb ipproto_register(short ipproto) 891de38924dSAndre Oppermann { 892de38924dSAndre Oppermann struct protosw *pr; 893de38924dSAndre Oppermann 894de38924dSAndre Oppermann /* Sanity checks. */ 8951b48d245SBjoern A. Zeeb if (ipproto <= 0 || ipproto >= IPPROTO_MAX) 896de38924dSAndre Oppermann return (EPROTONOSUPPORT); 897de38924dSAndre Oppermann 898de38924dSAndre Oppermann /* 899de38924dSAndre Oppermann * The protocol slot must not be occupied by another protocol 900de38924dSAndre Oppermann * already. An index pointing to IPPROTO_RAW is unused. 901de38924dSAndre Oppermann */ 902de38924dSAndre Oppermann pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 903de38924dSAndre Oppermann if (pr == NULL) 904de38924dSAndre Oppermann return (EPFNOSUPPORT); 905de38924dSAndre Oppermann if (ip_protox[ipproto] != pr - inetsw) /* IPPROTO_RAW */ 906de38924dSAndre Oppermann return (EEXIST); 907de38924dSAndre Oppermann 908de38924dSAndre Oppermann /* Find the protocol position in inetsw[] and set the index. */ 909de38924dSAndre Oppermann for (pr = inetdomain.dom_protosw; 910de38924dSAndre Oppermann pr < inetdomain.dom_protoswNPROTOSW; pr++) { 911de38924dSAndre Oppermann if (pr->pr_domain->dom_family == PF_INET && 912de38924dSAndre Oppermann pr->pr_protocol && pr->pr_protocol == ipproto) { 913de38924dSAndre Oppermann ip_protox[pr->pr_protocol] = pr - inetsw; 914de38924dSAndre Oppermann return (0); 915de38924dSAndre Oppermann } 916de38924dSAndre Oppermann } 917de38924dSAndre Oppermann return (EPROTONOSUPPORT); 918de38924dSAndre Oppermann } 919de38924dSAndre Oppermann 920de38924dSAndre Oppermann int 9211b48d245SBjoern A. Zeeb ipproto_unregister(short ipproto) 922de38924dSAndre Oppermann { 923de38924dSAndre Oppermann struct protosw *pr; 924de38924dSAndre Oppermann 925de38924dSAndre Oppermann /* Sanity checks. */ 9261b48d245SBjoern A. Zeeb if (ipproto <= 0 || ipproto >= IPPROTO_MAX) 927de38924dSAndre Oppermann return (EPROTONOSUPPORT); 928de38924dSAndre Oppermann 929de38924dSAndre Oppermann /* Check if the protocol was indeed registered. */ 930de38924dSAndre Oppermann pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 931de38924dSAndre Oppermann if (pr == NULL) 932de38924dSAndre Oppermann return (EPFNOSUPPORT); 933de38924dSAndre Oppermann if (ip_protox[ipproto] == pr - inetsw) /* IPPROTO_RAW */ 934de38924dSAndre Oppermann return (ENOENT); 935de38924dSAndre Oppermann 936de38924dSAndre Oppermann /* Reset the protocol slot to IPPROTO_RAW. */ 937de38924dSAndre Oppermann ip_protox[ipproto] = pr - inetsw; 938de38924dSAndre Oppermann return (0); 939de38924dSAndre Oppermann } 940de38924dSAndre Oppermann 941df8bae1dSRodney W. Grimes u_char inetctlerrmap[PRC_NCMDS] = { 942df8bae1dSRodney W. Grimes 0, 0, 0, 0, 943df8bae1dSRodney W. Grimes 0, EMSGSIZE, EHOSTDOWN, EHOSTUNREACH, 944df8bae1dSRodney W. Grimes EHOSTUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED, 945df8bae1dSRodney W. Grimes EMSGSIZE, EHOSTUNREACH, 0, 0, 946fcaf9f91SMike Silbersack 0, 0, EHOSTUNREACH, 0, 9473b8123b7SJesper Skriver ENOPROTOOPT, ECONNREFUSED 948df8bae1dSRodney W. Grimes }; 949df8bae1dSRodney W. Grimes 950df8bae1dSRodney W. Grimes /* 951df8bae1dSRodney W. Grimes * Forward a packet. If some error occurs return the sender 952df8bae1dSRodney W. Grimes * an icmp packet. Note we can't always generate a meaningful 953df8bae1dSRodney W. Grimes * icmp message because icmp doesn't have a large enough repertoire 954df8bae1dSRodney W. Grimes * of codes and types. 955df8bae1dSRodney W. Grimes * 956df8bae1dSRodney W. Grimes * If not forwarding, just drop the packet. This could be confusing 957df8bae1dSRodney W. Grimes * if ipforwarding was zero but some routing protocol was advancing 958df8bae1dSRodney W. Grimes * us as a gateway to somewhere. However, we must let the routing 959df8bae1dSRodney W. Grimes * protocol deal with that. 960df8bae1dSRodney W. Grimes * 961df8bae1dSRodney W. Grimes * The srcrt parameter indicates whether the packet is being forwarded 962df8bae1dSRodney W. Grimes * via a source route. 963df8bae1dSRodney W. Grimes */ 9649b932e9eSAndre Oppermann void 9659b932e9eSAndre Oppermann ip_forward(struct mbuf *m, int srcrt) 966df8bae1dSRodney W. Grimes { 9672b25acc1SLuigi Rizzo struct ip *ip = mtod(m, struct ip *); 968efbad259SEdward Tomasz Napierala struct in_ifaddr *ia; 969df8bae1dSRodney W. Grimes struct mbuf *mcopy; 970d14122b0SErmal Luçi struct sockaddr_in *sin; 9719b932e9eSAndre Oppermann struct in_addr dest; 972b835b6feSBjoern A. Zeeb struct route ro; 9734043ee3cSAlexander V. Chernikov uint32_t flowid; 974c773494eSAndre Oppermann int error, type = 0, code = 0, mtu = 0; 9753efc3014SJulian Elischer 976b8a6e03fSGleb Smirnoff NET_EPOCH_ASSERT(); 977b8a6e03fSGleb Smirnoff 9789b932e9eSAndre Oppermann if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) { 97986425c62SRobert Watson IPSTAT_INC(ips_cantforward); 980df8bae1dSRodney W. Grimes m_freem(m); 981df8bae1dSRodney W. Grimes return; 982df8bae1dSRodney W. Grimes } 983fcf59617SAndrey V. Elsukov if ( 984fcf59617SAndrey V. Elsukov #ifdef IPSTEALTH 985fcf59617SAndrey V. Elsukov V_ipstealth == 0 && 986fcf59617SAndrey V. Elsukov #endif 987fcf59617SAndrey V. Elsukov ip->ip_ttl <= IPTTLDEC) { 988fcf59617SAndrey V. Elsukov icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, 0); 9898922ddbeSAndrey V. Elsukov return; 9908922ddbeSAndrey V. Elsukov } 991df8bae1dSRodney W. Grimes 992d14122b0SErmal Luçi bzero(&ro, sizeof(ro)); 993d14122b0SErmal Luçi sin = (struct sockaddr_in *)&ro.ro_dst; 994d14122b0SErmal Luçi sin->sin_family = AF_INET; 995d14122b0SErmal Luçi sin->sin_len = sizeof(*sin); 996d14122b0SErmal Luçi sin->sin_addr = ip->ip_dst; 9974043ee3cSAlexander V. Chernikov flowid = m->m_pkthdr.flowid; 9984043ee3cSAlexander V. Chernikov ro.ro_nh = fib4_lookup(M_GETFIB(m), ip->ip_dst, 0, NHR_REF, flowid); 999983066f0SAlexander V. Chernikov if (ro.ro_nh != NULL) { 1000983066f0SAlexander V. Chernikov ia = ifatoia(ro.ro_nh->nh_ifa); 100156844a62SErmal Luçi } else 100256844a62SErmal Luçi ia = NULL; 1003df8bae1dSRodney W. Grimes /* 1004bfef7ed4SIan Dowse * Save the IP header and at most 8 bytes of the payload, 1005bfef7ed4SIan Dowse * in case we need to generate an ICMP message to the src. 1006bfef7ed4SIan Dowse * 10074d2e3692SLuigi Rizzo * XXX this can be optimized a lot by saving the data in a local 10084d2e3692SLuigi Rizzo * buffer on the stack (72 bytes at most), and only allocating the 10094d2e3692SLuigi Rizzo * mbuf if really necessary. The vast majority of the packets 10104d2e3692SLuigi Rizzo * are forwarded without having to send an ICMP back (either 10114d2e3692SLuigi Rizzo * because unnecessary, or because rate limited), so we are 10124d2e3692SLuigi Rizzo * really we are wasting a lot of work here. 10134d2e3692SLuigi Rizzo * 1014c3bef61eSKevin Lo * We don't use m_copym() because it might return a reference 1015bfef7ed4SIan Dowse * to a shared cluster. Both this function and ip_output() 1016bfef7ed4SIan Dowse * assume exclusive access to the IP header in `m', so any 1017bfef7ed4SIan Dowse * data in a cluster may change before we reach icmp_error(). 1018df8bae1dSRodney W. Grimes */ 1019dc4ad05eSGleb Smirnoff mcopy = m_gethdr(M_NOWAIT, m->m_type); 1020eb1b1807SGleb Smirnoff if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_NOWAIT)) { 10219967cafcSSam Leffler /* 10229967cafcSSam Leffler * It's probably ok if the pkthdr dup fails (because 10239967cafcSSam Leffler * the deep copy of the tag chain failed), but for now 10249967cafcSSam Leffler * be conservative and just discard the copy since 10259967cafcSSam Leffler * code below may some day want the tags. 10269967cafcSSam Leffler */ 10279967cafcSSam Leffler m_free(mcopy); 10289967cafcSSam Leffler mcopy = NULL; 10299967cafcSSam Leffler } 1030bfef7ed4SIan Dowse if (mcopy != NULL) { 10318f134647SGleb Smirnoff mcopy->m_len = min(ntohs(ip->ip_len), M_TRAILINGSPACE(mcopy)); 1032e6b0a570SBruce M Simpson mcopy->m_pkthdr.len = mcopy->m_len; 1033bfef7ed4SIan Dowse m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t)); 1034bfef7ed4SIan Dowse } 103504287599SRuslan Ermilov #ifdef IPSTEALTH 1036fcf59617SAndrey V. Elsukov if (V_ipstealth == 0) 103704287599SRuslan Ermilov #endif 103804287599SRuslan Ermilov ip->ip_ttl -= IPTTLDEC; 1039fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT) 1040fcf59617SAndrey V. Elsukov if (IPSEC_ENABLED(ipv4)) { 1041fcf59617SAndrey V. Elsukov if ((error = IPSEC_FORWARD(ipv4, m)) != 0) { 1042fcf59617SAndrey V. Elsukov /* mbuf consumed by IPsec */ 1043d16a2e47SMark Johnston RO_NHFREE(&ro); 1044fcf59617SAndrey V. Elsukov m_freem(mcopy); 1045fcf59617SAndrey V. Elsukov if (error != EINPROGRESS) 1046fcf59617SAndrey V. Elsukov IPSTAT_INC(ips_cantforward); 1047b8a6e03fSGleb Smirnoff return; 104804287599SRuslan Ermilov } 1049fcf59617SAndrey V. Elsukov /* No IPsec processing required */ 1050fcf59617SAndrey V. Elsukov } 1051fcf59617SAndrey V. Elsukov #endif /* IPSEC */ 1052df8bae1dSRodney W. Grimes /* 1053df8bae1dSRodney W. Grimes * If forwarding packet using same interface that it came in on, 1054df8bae1dSRodney W. Grimes * perhaps should send a redirect to sender to shortcut a hop. 1055df8bae1dSRodney W. Grimes * Only send redirect if source is sending directly to us, 1056df8bae1dSRodney W. Grimes * and if packet was not source routed (or has any options). 1057df8bae1dSRodney W. Grimes * Also, don't send redirect if forwarding using a default route 1058df8bae1dSRodney W. Grimes * or a route modified by a redirect. 1059df8bae1dSRodney W. Grimes */ 10609b932e9eSAndre Oppermann dest.s_addr = 0; 1061efbad259SEdward Tomasz Napierala if (!srcrt && V_ipsendredirects && 1062efbad259SEdward Tomasz Napierala ia != NULL && ia->ia_ifp == m->m_pkthdr.rcvif) { 1063983066f0SAlexander V. Chernikov struct nhop_object *nh; 106402c1c707SAndre Oppermann 1065983066f0SAlexander V. Chernikov nh = ro.ro_nh; 106602c1c707SAndre Oppermann 1067983066f0SAlexander V. Chernikov if (nh != NULL && ((nh->nh_flags & (NHF_REDIRECT|NHF_DEFAULT)) == 0)) { 1068983066f0SAlexander V. Chernikov struct in_ifaddr *nh_ia = (struct in_ifaddr *)(nh->nh_ifa); 1069df8bae1dSRodney W. Grimes u_long src = ntohl(ip->ip_src.s_addr); 1070df8bae1dSRodney W. Grimes 1071983066f0SAlexander V. Chernikov if (nh_ia != NULL && 1072983066f0SAlexander V. Chernikov (src & nh_ia->ia_subnetmask) == nh_ia->ia_subnet) { 1073df8bae1dSRodney W. Grimes /* Router requirements says to only send host redirects */ 1074df8bae1dSRodney W. Grimes type = ICMP_REDIRECT; 1075df8bae1dSRodney W. Grimes code = ICMP_REDIRECT_HOST; 107662e1a437SZhenlei Huang if (nh->nh_flags & NHF_GATEWAY) { 107762e1a437SZhenlei Huang if (nh->gw_sa.sa_family == AF_INET) 107862e1a437SZhenlei Huang dest.s_addr = nh->gw4_sa.sin_addr.s_addr; 107962e1a437SZhenlei Huang else /* Do not redirect in case gw is AF_INET6 */ 108062e1a437SZhenlei Huang type = 0; 108162e1a437SZhenlei Huang } else 108262e1a437SZhenlei Huang dest.s_addr = ip->ip_dst.s_addr; 1083df8bae1dSRodney W. Grimes } 1084df8bae1dSRodney W. Grimes } 108502c1c707SAndre Oppermann } 1086df8bae1dSRodney W. Grimes 1087b835b6feSBjoern A. Zeeb error = ip_output(m, NULL, &ro, IP_FORWARDING, NULL, NULL); 1088b835b6feSBjoern A. Zeeb 1089983066f0SAlexander V. Chernikov if (error == EMSGSIZE && ro.ro_nh) 1090983066f0SAlexander V. Chernikov mtu = ro.ro_nh->nh_mtu; 1091983066f0SAlexander V. Chernikov RO_NHFREE(&ro); 1092b835b6feSBjoern A. Zeeb 1093df8bae1dSRodney W. Grimes if (error) 109486425c62SRobert Watson IPSTAT_INC(ips_cantforward); 1095df8bae1dSRodney W. Grimes else { 109686425c62SRobert Watson IPSTAT_INC(ips_forward); 1097df8bae1dSRodney W. Grimes if (type) 109886425c62SRobert Watson IPSTAT_INC(ips_redirectsent); 1099df8bae1dSRodney W. Grimes else { 11009188b4a1SAndre Oppermann if (mcopy) 1101df8bae1dSRodney W. Grimes m_freem(mcopy); 1102b8a6e03fSGleb Smirnoff return; 1103df8bae1dSRodney W. Grimes } 1104df8bae1dSRodney W. Grimes } 11054f6c66ccSMatt Macy if (mcopy == NULL) 1106b8a6e03fSGleb Smirnoff return; 11074f6c66ccSMatt Macy 1108df8bae1dSRodney W. Grimes switch (error) { 1109df8bae1dSRodney W. Grimes case 0: /* forwarded, but need redirect */ 1110df8bae1dSRodney W. Grimes /* type, code set above */ 1111df8bae1dSRodney W. Grimes break; 1112df8bae1dSRodney W. Grimes 1113efbad259SEdward Tomasz Napierala case ENETUNREACH: 1114df8bae1dSRodney W. Grimes case EHOSTUNREACH: 1115df8bae1dSRodney W. Grimes case ENETDOWN: 1116df8bae1dSRodney W. Grimes case EHOSTDOWN: 1117df8bae1dSRodney W. Grimes default: 1118df8bae1dSRodney W. Grimes type = ICMP_UNREACH; 1119df8bae1dSRodney W. Grimes code = ICMP_UNREACH_HOST; 1120df8bae1dSRodney W. Grimes break; 1121df8bae1dSRodney W. Grimes 1122df8bae1dSRodney W. Grimes case EMSGSIZE: 1123df8bae1dSRodney W. Grimes type = ICMP_UNREACH; 1124df8bae1dSRodney W. Grimes code = ICMP_UNREACH_NEEDFRAG; 11259b932e9eSAndre Oppermann /* 1126b835b6feSBjoern A. Zeeb * If the MTU was set before make sure we are below the 1127b835b6feSBjoern A. Zeeb * interface MTU. 1128ab48768bSAndre Oppermann * If the MTU wasn't set before use the interface mtu or 1129ab48768bSAndre Oppermann * fall back to the next smaller mtu step compared to the 1130ab48768bSAndre Oppermann * current packet size. 11319b932e9eSAndre Oppermann */ 1132b835b6feSBjoern A. Zeeb if (mtu != 0) { 1133b835b6feSBjoern A. Zeeb if (ia != NULL) 1134b835b6feSBjoern A. Zeeb mtu = min(mtu, ia->ia_ifp->if_mtu); 1135b835b6feSBjoern A. Zeeb } else { 1136ab48768bSAndre Oppermann if (ia != NULL) 1137c773494eSAndre Oppermann mtu = ia->ia_ifp->if_mtu; 1138ab48768bSAndre Oppermann else 11398f134647SGleb Smirnoff mtu = ip_next_mtu(ntohs(ip->ip_len), 0); 1140ab48768bSAndre Oppermann } 114186425c62SRobert Watson IPSTAT_INC(ips_cantfrag); 1142df8bae1dSRodney W. Grimes break; 1143df8bae1dSRodney W. Grimes 1144df8bae1dSRodney W. Grimes case ENOBUFS: 11453a06e3e0SRuslan Ermilov case EACCES: /* ipfw denied packet */ 11463a06e3e0SRuslan Ermilov m_freem(mcopy); 1147b8a6e03fSGleb Smirnoff return; 1148df8bae1dSRodney W. Grimes } 1149c773494eSAndre Oppermann icmp_error(mcopy, type, code, dest.s_addr, mtu); 1150df8bae1dSRodney W. Grimes } 1151df8bae1dSRodney W. Grimes 1152339efd75SMaxim Sobolev #define CHECK_SO_CT(sp, ct) \ 1153339efd75SMaxim Sobolev (((sp->so_options & SO_TIMESTAMP) && (sp->so_ts_clock == ct)) ? 1 : 0) 1154339efd75SMaxim Sobolev 115582c23ebaSBill Fenner void 1156f2565d68SRobert Watson ip_savecontrol(struct inpcb *inp, struct mbuf **mp, struct ip *ip, 1157f2565d68SRobert Watson struct mbuf *m) 115882c23ebaSBill Fenner { 115906193f0bSKonstantin Belousov bool stamped; 11608b615593SMarko Zec 116106193f0bSKonstantin Belousov stamped = false; 1162339efd75SMaxim Sobolev if ((inp->inp_socket->so_options & SO_BINTIME) || 1163339efd75SMaxim Sobolev CHECK_SO_CT(inp->inp_socket, SO_TS_BINTIME)) { 116406193f0bSKonstantin Belousov struct bintime boottimebin, bt; 116506193f0bSKonstantin Belousov struct timespec ts1; 1166be8a62e8SPoul-Henning Kamp 116706193f0bSKonstantin Belousov if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR | 116806193f0bSKonstantin Belousov M_TSTMP)) { 116906193f0bSKonstantin Belousov mbuf_tstmp2timespec(m, &ts1); 117006193f0bSKonstantin Belousov timespec2bintime(&ts1, &bt); 117106193f0bSKonstantin Belousov getboottimebin(&boottimebin); 117206193f0bSKonstantin Belousov bintime_add(&bt, &boottimebin); 117306193f0bSKonstantin Belousov } else { 1174be8a62e8SPoul-Henning Kamp bintime(&bt); 117506193f0bSKonstantin Belousov } 1176be8a62e8SPoul-Henning Kamp *mp = sbcreatecontrol((caddr_t)&bt, sizeof(bt), 1177be8a62e8SPoul-Henning Kamp SCM_BINTIME, SOL_SOCKET); 117806193f0bSKonstantin Belousov if (*mp != NULL) { 1179be8a62e8SPoul-Henning Kamp mp = &(*mp)->m_next; 118006193f0bSKonstantin Belousov stamped = true; 118106193f0bSKonstantin Belousov } 1182be8a62e8SPoul-Henning Kamp } 1183339efd75SMaxim Sobolev if (CHECK_SO_CT(inp->inp_socket, SO_TS_REALTIME_MICRO)) { 118406193f0bSKonstantin Belousov struct bintime boottimebin, bt1; 1185c012cfe6SEd Maste struct timespec ts1; 118682c23ebaSBill Fenner struct timeval tv; 118782c23ebaSBill Fenner 118806193f0bSKonstantin Belousov if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR | 118906193f0bSKonstantin Belousov M_TSTMP)) { 119006193f0bSKonstantin Belousov mbuf_tstmp2timespec(m, &ts1); 119106193f0bSKonstantin Belousov timespec2bintime(&ts1, &bt1); 119206193f0bSKonstantin Belousov getboottimebin(&boottimebin); 119306193f0bSKonstantin Belousov bintime_add(&bt1, &boottimebin); 119406193f0bSKonstantin Belousov bintime2timeval(&bt1, &tv); 119506193f0bSKonstantin Belousov } else { 1196339efd75SMaxim Sobolev microtime(&tv); 119706193f0bSKonstantin Belousov } 119882c23ebaSBill Fenner *mp = sbcreatecontrol((caddr_t)&tv, sizeof(tv), 119982c23ebaSBill Fenner SCM_TIMESTAMP, SOL_SOCKET); 120006193f0bSKonstantin Belousov if (*mp != NULL) { 120182c23ebaSBill Fenner mp = &(*mp)->m_next; 120206193f0bSKonstantin Belousov stamped = true; 120306193f0bSKonstantin Belousov } 1204339efd75SMaxim Sobolev } else if (CHECK_SO_CT(inp->inp_socket, SO_TS_REALTIME)) { 120506193f0bSKonstantin Belousov struct bintime boottimebin; 120606193f0bSKonstantin Belousov struct timespec ts, ts1; 1207339efd75SMaxim Sobolev 120806193f0bSKonstantin Belousov if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR | 120906193f0bSKonstantin Belousov M_TSTMP)) { 121006193f0bSKonstantin Belousov mbuf_tstmp2timespec(m, &ts); 121106193f0bSKonstantin Belousov getboottimebin(&boottimebin); 121206193f0bSKonstantin Belousov bintime2timespec(&boottimebin, &ts1); 12136040822cSAlan Somers timespecadd(&ts, &ts1, &ts); 121406193f0bSKonstantin Belousov } else { 1215339efd75SMaxim Sobolev nanotime(&ts); 121606193f0bSKonstantin Belousov } 1217339efd75SMaxim Sobolev *mp = sbcreatecontrol((caddr_t)&ts, sizeof(ts), 1218339efd75SMaxim Sobolev SCM_REALTIME, SOL_SOCKET); 121906193f0bSKonstantin Belousov if (*mp != NULL) { 1220339efd75SMaxim Sobolev mp = &(*mp)->m_next; 122106193f0bSKonstantin Belousov stamped = true; 122206193f0bSKonstantin Belousov } 1223339efd75SMaxim Sobolev } else if (CHECK_SO_CT(inp->inp_socket, SO_TS_MONOTONIC)) { 1224339efd75SMaxim Sobolev struct timespec ts; 1225339efd75SMaxim Sobolev 122606193f0bSKonstantin Belousov if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR | 122706193f0bSKonstantin Belousov M_TSTMP)) 122806193f0bSKonstantin Belousov mbuf_tstmp2timespec(m, &ts); 122906193f0bSKonstantin Belousov else 1230339efd75SMaxim Sobolev nanouptime(&ts); 1231339efd75SMaxim Sobolev *mp = sbcreatecontrol((caddr_t)&ts, sizeof(ts), 1232339efd75SMaxim Sobolev SCM_MONOTONIC, SOL_SOCKET); 123306193f0bSKonstantin Belousov if (*mp != NULL) { 123406193f0bSKonstantin Belousov mp = &(*mp)->m_next; 123506193f0bSKonstantin Belousov stamped = true; 123606193f0bSKonstantin Belousov } 123706193f0bSKonstantin Belousov } 123806193f0bSKonstantin Belousov if (stamped && (m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR | 123906193f0bSKonstantin Belousov M_TSTMP)) { 124006193f0bSKonstantin Belousov struct sock_timestamp_info sti; 124106193f0bSKonstantin Belousov 124206193f0bSKonstantin Belousov bzero(&sti, sizeof(sti)); 124306193f0bSKonstantin Belousov sti.st_info_flags = ST_INFO_HW; 124406193f0bSKonstantin Belousov if ((m->m_flags & M_TSTMP_HPREC) != 0) 124506193f0bSKonstantin Belousov sti.st_info_flags |= ST_INFO_HW_HPREC; 124606193f0bSKonstantin Belousov *mp = sbcreatecontrol((caddr_t)&sti, sizeof(sti), SCM_TIME_INFO, 124706193f0bSKonstantin Belousov SOL_SOCKET); 124806193f0bSKonstantin Belousov if (*mp != NULL) 1249339efd75SMaxim Sobolev mp = &(*mp)->m_next; 1250be8a62e8SPoul-Henning Kamp } 125182c23ebaSBill Fenner if (inp->inp_flags & INP_RECVDSTADDR) { 125282c23ebaSBill Fenner *mp = sbcreatecontrol((caddr_t)&ip->ip_dst, 125382c23ebaSBill Fenner sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP); 125482c23ebaSBill Fenner if (*mp) 125582c23ebaSBill Fenner mp = &(*mp)->m_next; 125682c23ebaSBill Fenner } 12574957466bSMatthew N. Dodd if (inp->inp_flags & INP_RECVTTL) { 12584957466bSMatthew N. Dodd *mp = sbcreatecontrol((caddr_t)&ip->ip_ttl, 12594957466bSMatthew N. Dodd sizeof(u_char), IP_RECVTTL, IPPROTO_IP); 12604957466bSMatthew N. Dodd if (*mp) 12614957466bSMatthew N. Dodd mp = &(*mp)->m_next; 12624957466bSMatthew N. Dodd } 126382c23ebaSBill Fenner #ifdef notyet 126482c23ebaSBill Fenner /* XXX 126582c23ebaSBill Fenner * Moving these out of udp_input() made them even more broken 126682c23ebaSBill Fenner * than they already were. 126782c23ebaSBill Fenner */ 126882c23ebaSBill Fenner /* options were tossed already */ 126982c23ebaSBill Fenner if (inp->inp_flags & INP_RECVOPTS) { 127082c23ebaSBill Fenner *mp = sbcreatecontrol((caddr_t)opts_deleted_above, 127182c23ebaSBill Fenner sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP); 127282c23ebaSBill Fenner if (*mp) 127382c23ebaSBill Fenner mp = &(*mp)->m_next; 127482c23ebaSBill Fenner } 127582c23ebaSBill Fenner /* ip_srcroute doesn't do what we want here, need to fix */ 127682c23ebaSBill Fenner if (inp->inp_flags & INP_RECVRETOPTS) { 1277e0982661SAndre Oppermann *mp = sbcreatecontrol((caddr_t)ip_srcroute(m), 127882c23ebaSBill Fenner sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP); 127982c23ebaSBill Fenner if (*mp) 128082c23ebaSBill Fenner mp = &(*mp)->m_next; 128182c23ebaSBill Fenner } 128282c23ebaSBill Fenner #endif 128382c23ebaSBill Fenner if (inp->inp_flags & INP_RECVIF) { 1284d314ad7bSJulian Elischer struct ifnet *ifp; 1285d314ad7bSJulian Elischer struct sdlbuf { 128682c23ebaSBill Fenner struct sockaddr_dl sdl; 1287d314ad7bSJulian Elischer u_char pad[32]; 1288d314ad7bSJulian Elischer } sdlbuf; 1289d314ad7bSJulian Elischer struct sockaddr_dl *sdp; 1290d314ad7bSJulian Elischer struct sockaddr_dl *sdl2 = &sdlbuf.sdl; 129182c23ebaSBill Fenner 1292db0ac6deSCy Schubert if ((ifp = m->m_pkthdr.rcvif)) { 12934a0d6638SRuslan Ermilov sdp = (struct sockaddr_dl *)ifp->if_addr->ifa_addr; 1294d314ad7bSJulian Elischer /* 1295d314ad7bSJulian Elischer * Change our mind and don't try copy. 1296d314ad7bSJulian Elischer */ 129746f2df9cSSergey Kandaurov if (sdp->sdl_family != AF_LINK || 129846f2df9cSSergey Kandaurov sdp->sdl_len > sizeof(sdlbuf)) { 1299d314ad7bSJulian Elischer goto makedummy; 1300d314ad7bSJulian Elischer } 1301d314ad7bSJulian Elischer bcopy(sdp, sdl2, sdp->sdl_len); 1302d314ad7bSJulian Elischer } else { 1303d314ad7bSJulian Elischer makedummy: 130446f2df9cSSergey Kandaurov sdl2->sdl_len = 130546f2df9cSSergey Kandaurov offsetof(struct sockaddr_dl, sdl_data[0]); 1306d314ad7bSJulian Elischer sdl2->sdl_family = AF_LINK; 1307d314ad7bSJulian Elischer sdl2->sdl_index = 0; 1308d314ad7bSJulian Elischer sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0; 1309d314ad7bSJulian Elischer } 1310d314ad7bSJulian Elischer *mp = sbcreatecontrol((caddr_t)sdl2, sdl2->sdl_len, 131182c23ebaSBill Fenner IP_RECVIF, IPPROTO_IP); 131282c23ebaSBill Fenner if (*mp) 131382c23ebaSBill Fenner mp = &(*mp)->m_next; 131482c23ebaSBill Fenner } 13153cca425bSMichael Tuexen if (inp->inp_flags & INP_RECVTOS) { 13163cca425bSMichael Tuexen *mp = sbcreatecontrol((caddr_t)&ip->ip_tos, 13173cca425bSMichael Tuexen sizeof(u_char), IP_RECVTOS, IPPROTO_IP); 13183cca425bSMichael Tuexen if (*mp) 13193cca425bSMichael Tuexen mp = &(*mp)->m_next; 13203cca425bSMichael Tuexen } 13219d3ddf43SAdrian Chadd 13229d3ddf43SAdrian Chadd if (inp->inp_flags2 & INP_RECVFLOWID) { 13239d3ddf43SAdrian Chadd uint32_t flowid, flow_type; 13249d3ddf43SAdrian Chadd 13259d3ddf43SAdrian Chadd flowid = m->m_pkthdr.flowid; 13269d3ddf43SAdrian Chadd flow_type = M_HASHTYPE_GET(m); 13279d3ddf43SAdrian Chadd 13289d3ddf43SAdrian Chadd /* 13299d3ddf43SAdrian Chadd * XXX should handle the failure of one or the 13309d3ddf43SAdrian Chadd * other - don't populate both? 13319d3ddf43SAdrian Chadd */ 13329d3ddf43SAdrian Chadd *mp = sbcreatecontrol((caddr_t) &flowid, 13339d3ddf43SAdrian Chadd sizeof(uint32_t), IP_FLOWID, IPPROTO_IP); 13349d3ddf43SAdrian Chadd if (*mp) 13359d3ddf43SAdrian Chadd mp = &(*mp)->m_next; 13369d3ddf43SAdrian Chadd *mp = sbcreatecontrol((caddr_t) &flow_type, 13379d3ddf43SAdrian Chadd sizeof(uint32_t), IP_FLOWTYPE, IPPROTO_IP); 13389d3ddf43SAdrian Chadd if (*mp) 13399d3ddf43SAdrian Chadd mp = &(*mp)->m_next; 13409d3ddf43SAdrian Chadd } 13419d3ddf43SAdrian Chadd 13429d3ddf43SAdrian Chadd #ifdef RSS 13439d3ddf43SAdrian Chadd if (inp->inp_flags2 & INP_RECVRSSBUCKETID) { 13449d3ddf43SAdrian Chadd uint32_t flowid, flow_type; 13459d3ddf43SAdrian Chadd uint32_t rss_bucketid; 13469d3ddf43SAdrian Chadd 13479d3ddf43SAdrian Chadd flowid = m->m_pkthdr.flowid; 13489d3ddf43SAdrian Chadd flow_type = M_HASHTYPE_GET(m); 13499d3ddf43SAdrian Chadd 13509d3ddf43SAdrian Chadd if (rss_hash2bucket(flowid, flow_type, &rss_bucketid) == 0) { 13519d3ddf43SAdrian Chadd *mp = sbcreatecontrol((caddr_t) &rss_bucketid, 13529d3ddf43SAdrian Chadd sizeof(uint32_t), IP_RSSBUCKETID, IPPROTO_IP); 13539d3ddf43SAdrian Chadd if (*mp) 13549d3ddf43SAdrian Chadd mp = &(*mp)->m_next; 13559d3ddf43SAdrian Chadd } 13569d3ddf43SAdrian Chadd } 13579d3ddf43SAdrian Chadd #endif 135882c23ebaSBill Fenner } 135982c23ebaSBill Fenner 13604d2e3692SLuigi Rizzo /* 136130916a2dSRobert Watson * XXXRW: Multicast routing code in ip_mroute.c is generally MPSAFE, but the 136230916a2dSRobert Watson * ip_rsvp and ip_rsvp_on variables need to be interlocked with rsvp_on 136330916a2dSRobert Watson * locking. This code remains in ip_input.c as ip_mroute.c is optionally 136430916a2dSRobert Watson * compiled. 13654d2e3692SLuigi Rizzo */ 13665f901c92SAndrew Turner VNET_DEFINE_STATIC(int, ip_rsvp_on); 136782cea7e6SBjoern A. Zeeb VNET_DEFINE(struct socket *, ip_rsvpd); 136882cea7e6SBjoern A. Zeeb 136982cea7e6SBjoern A. Zeeb #define V_ip_rsvp_on VNET(ip_rsvp_on) 137082cea7e6SBjoern A. Zeeb 1371df8bae1dSRodney W. Grimes int 1372f0068c4aSGarrett Wollman ip_rsvp_init(struct socket *so) 1373f0068c4aSGarrett Wollman { 13748b615593SMarko Zec 1375f0068c4aSGarrett Wollman if (so->so_type != SOCK_RAW || 1376f0068c4aSGarrett Wollman so->so_proto->pr_protocol != IPPROTO_RSVP) 1377f0068c4aSGarrett Wollman return EOPNOTSUPP; 1378f0068c4aSGarrett Wollman 1379603724d3SBjoern A. Zeeb if (V_ip_rsvpd != NULL) 1380f0068c4aSGarrett Wollman return EADDRINUSE; 1381f0068c4aSGarrett Wollman 1382603724d3SBjoern A. Zeeb V_ip_rsvpd = so; 13831c5de19aSGarrett Wollman /* 13841c5de19aSGarrett Wollman * This may seem silly, but we need to be sure we don't over-increment 13851c5de19aSGarrett Wollman * the RSVP counter, in case something slips up. 13861c5de19aSGarrett Wollman */ 1387603724d3SBjoern A. Zeeb if (!V_ip_rsvp_on) { 1388603724d3SBjoern A. Zeeb V_ip_rsvp_on = 1; 1389603724d3SBjoern A. Zeeb V_rsvp_on++; 13901c5de19aSGarrett Wollman } 1391f0068c4aSGarrett Wollman 1392f0068c4aSGarrett Wollman return 0; 1393f0068c4aSGarrett Wollman } 1394f0068c4aSGarrett Wollman 1395f0068c4aSGarrett Wollman int 1396f0068c4aSGarrett Wollman ip_rsvp_done(void) 1397f0068c4aSGarrett Wollman { 13988b615593SMarko Zec 1399603724d3SBjoern A. Zeeb V_ip_rsvpd = NULL; 14001c5de19aSGarrett Wollman /* 14011c5de19aSGarrett Wollman * This may seem silly, but we need to be sure we don't over-decrement 14021c5de19aSGarrett Wollman * the RSVP counter, in case something slips up. 14031c5de19aSGarrett Wollman */ 1404603724d3SBjoern A. Zeeb if (V_ip_rsvp_on) { 1405603724d3SBjoern A. Zeeb V_ip_rsvp_on = 0; 1406603724d3SBjoern A. Zeeb V_rsvp_on--; 14071c5de19aSGarrett Wollman } 1408f0068c4aSGarrett Wollman return 0; 1409f0068c4aSGarrett Wollman } 1410bbb4330bSLuigi Rizzo 14118f5a8818SKevin Lo int 14128f5a8818SKevin Lo rsvp_input(struct mbuf **mp, int *offp, int proto) 1413bbb4330bSLuigi Rizzo { 14148f5a8818SKevin Lo struct mbuf *m; 14158f5a8818SKevin Lo 14168f5a8818SKevin Lo m = *mp; 14178f5a8818SKevin Lo *mp = NULL; 14188b615593SMarko Zec 1419bbb4330bSLuigi Rizzo if (rsvp_input_p) { /* call the real one if loaded */ 14208f5a8818SKevin Lo *mp = m; 14218f5a8818SKevin Lo rsvp_input_p(mp, offp, proto); 14228f5a8818SKevin Lo return (IPPROTO_DONE); 1423bbb4330bSLuigi Rizzo } 1424bbb4330bSLuigi Rizzo 1425bbb4330bSLuigi Rizzo /* Can still get packets with rsvp_on = 0 if there is a local member 1426bbb4330bSLuigi Rizzo * of the group to which the RSVP packet is addressed. But in this 1427bbb4330bSLuigi Rizzo * case we want to throw the packet away. 1428bbb4330bSLuigi Rizzo */ 1429bbb4330bSLuigi Rizzo 1430603724d3SBjoern A. Zeeb if (!V_rsvp_on) { 1431bbb4330bSLuigi Rizzo m_freem(m); 14328f5a8818SKevin Lo return (IPPROTO_DONE); 1433bbb4330bSLuigi Rizzo } 1434bbb4330bSLuigi Rizzo 1435603724d3SBjoern A. Zeeb if (V_ip_rsvpd != NULL) { 14368f5a8818SKevin Lo *mp = m; 14378f5a8818SKevin Lo rip_input(mp, offp, proto); 14388f5a8818SKevin Lo return (IPPROTO_DONE); 1439bbb4330bSLuigi Rizzo } 1440bbb4330bSLuigi Rizzo /* Drop the packet */ 1441bbb4330bSLuigi Rizzo m_freem(m); 14428f5a8818SKevin Lo return (IPPROTO_DONE); 1443bbb4330bSLuigi Rizzo } 1444