1c398230bSWarner Losh /*- 251369649SPedro F. Giffuni * SPDX-License-Identifier: BSD-3-Clause 351369649SPedro F. Giffuni * 4df8bae1dSRodney W. Grimes * Copyright (c) 1982, 1986, 1988, 1993 5df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 6df8bae1dSRodney W. Grimes * 7df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 8df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 9df8bae1dSRodney W. Grimes * are met: 10df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 11df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 12df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 13df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 14df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 15fbbd9655SWarner Losh * 3. Neither the name of the University nor the names of its contributors 16df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 17df8bae1dSRodney W. Grimes * without specific prior written permission. 18df8bae1dSRodney W. Grimes * 19df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29df8bae1dSRodney W. Grimes * SUCH DAMAGE. 30df8bae1dSRodney W. Grimes * 31df8bae1dSRodney W. Grimes * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 32df8bae1dSRodney W. Grimes */ 33df8bae1dSRodney W. Grimes 344b421e2dSMike Silbersack #include <sys/cdefs.h> 354b421e2dSMike Silbersack __FBSDID("$FreeBSD$"); 364b421e2dSMike Silbersack 370ac40133SBrian Somers #include "opt_bootp.h" 3827108a15SDag-Erling Smørgrav #include "opt_ipstealth.h" 396a800098SYoshinobu Inoue #include "opt_ipsec.h" 4033553d6eSBjoern A. Zeeb #include "opt_route.h" 41b8bc95cdSAdrian Chadd #include "opt_rss.h" 4274a9466cSGary Palmer 43df8bae1dSRodney W. Grimes #include <sys/param.h> 44df8bae1dSRodney W. Grimes #include <sys/systm.h> 45ef91a976SAndrey V. Elsukov #include <sys/hhook.h> 46df8bae1dSRodney W. Grimes #include <sys/mbuf.h> 47b715f178SLuigi Rizzo #include <sys/malloc.h> 48df8bae1dSRodney W. Grimes #include <sys/domain.h> 49df8bae1dSRodney W. Grimes #include <sys/protosw.h> 50df8bae1dSRodney W. Grimes #include <sys/socket.h> 51df8bae1dSRodney W. Grimes #include <sys/time.h> 52df8bae1dSRodney W. Grimes #include <sys/kernel.h> 53385195c0SMarko Zec #include <sys/lock.h> 54cc0a3c8cSAndrey V. Elsukov #include <sys/rmlock.h> 55385195c0SMarko Zec #include <sys/rwlock.h> 5657f60867SMark Johnston #include <sys/sdt.h> 571025071fSGarrett Wollman #include <sys/syslog.h> 58b5e8ce9fSBruce Evans #include <sys/sysctl.h> 59df8bae1dSRodney W. Grimes 60df8bae1dSRodney W. Grimes #include <net/if.h> 619494d596SBrooks Davis #include <net/if_types.h> 62d314ad7bSJulian Elischer #include <net/if_var.h> 6382c23ebaSBill Fenner #include <net/if_dl.h> 64b252313fSGleb Smirnoff #include <net/pfil.h> 65df8bae1dSRodney W. Grimes #include <net/route.h> 66983066f0SAlexander V. Chernikov #include <net/route/nhop.h> 67748e0b0aSGarrett Wollman #include <net/netisr.h> 68b2bdc62aSAdrian Chadd #include <net/rss_config.h> 694b79449eSBjoern A. Zeeb #include <net/vnet.h> 70df8bae1dSRodney W. Grimes 71df8bae1dSRodney W. Grimes #include <netinet/in.h> 7257f60867SMark Johnston #include <netinet/in_kdtrace.h> 73df8bae1dSRodney W. Grimes #include <netinet/in_systm.h> 74b5e8ce9fSBruce Evans #include <netinet/in_var.h> 75df8bae1dSRodney W. Grimes #include <netinet/ip.h> 76983066f0SAlexander V. Chernikov #include <netinet/in_fib.h> 77df8bae1dSRodney W. Grimes #include <netinet/in_pcb.h> 78df8bae1dSRodney W. Grimes #include <netinet/ip_var.h> 79eddfbb76SRobert Watson #include <netinet/ip_fw.h> 80df8bae1dSRodney W. Grimes #include <netinet/ip_icmp.h> 81ef39adf0SAndre Oppermann #include <netinet/ip_options.h> 8258938916SGarrett Wollman #include <machine/in_cksum.h> 83a9771948SGleb Smirnoff #include <netinet/ip_carp.h> 84b8bc95cdSAdrian Chadd #include <netinet/in_rss.h> 85df8bae1dSRodney W. Grimes 86fcf59617SAndrey V. Elsukov #include <netipsec/ipsec_support.h> 87fcf59617SAndrey V. Elsukov 88f0068c4aSGarrett Wollman #include <sys/socketvar.h> 896ddbf1e2SGary Palmer 90aed55708SRobert Watson #include <security/mac/mac_framework.h> 91aed55708SRobert Watson 92d2035ffbSEd Maste #ifdef CTASSERT 93d2035ffbSEd Maste CTASSERT(sizeof(struct ip) == 20); 94d2035ffbSEd Maste #endif 95d2035ffbSEd Maste 961dbefcc0SGleb Smirnoff /* IP reassembly functions are defined in ip_reass.c. */ 97843b0e57SXin LI extern void ipreass_init(void); 98843b0e57SXin LI extern void ipreass_drain(void); 99843b0e57SXin LI extern void ipreass_slowtimo(void); 1001dbefcc0SGleb Smirnoff #ifdef VIMAGE 101843b0e57SXin LI extern void ipreass_destroy(void); 1021dbefcc0SGleb Smirnoff #endif 1031dbefcc0SGleb Smirnoff 10482cea7e6SBjoern A. Zeeb VNET_DEFINE(int, rsvp_on); 10582cea7e6SBjoern A. Zeeb 10682cea7e6SBjoern A. Zeeb VNET_DEFINE(int, ipforwarding); 1076df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_VNET | CTLFLAG_RW, 108eddfbb76SRobert Watson &VNET_NAME(ipforwarding), 0, 1098b615593SMarko Zec "Enable IP forwarding between interfaces"); 1100312fbe9SPoul-Henning Kamp 1118ad114c0SGeorge V. Neville-Neil /* 1128ad114c0SGeorge V. Neville-Neil * Respond with an ICMP host redirect when we forward a packet out of 1138ad114c0SGeorge V. Neville-Neil * the same interface on which it was received. See RFC 792. 1148ad114c0SGeorge V. Neville-Neil */ 1158ad114c0SGeorge V. Neville-Neil VNET_DEFINE(int, ipsendredirects) = 1; 1166df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_VNET | CTLFLAG_RW, 117eddfbb76SRobert Watson &VNET_NAME(ipsendredirects), 0, 1188b615593SMarko Zec "Enable sending IP redirects"); 1190312fbe9SPoul-Henning Kamp 12094df3271SGleb Smirnoff VNET_DEFINE_STATIC(bool, ip_strong_es) = false; 12194df3271SGleb Smirnoff #define V_ip_strong_es VNET(ip_strong_es) 12294df3271SGleb Smirnoff SYSCTL_BOOL(_net_inet_ip, OID_AUTO, rfc1122_strong_es, 12394df3271SGleb Smirnoff CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip_strong_es), false, 12494df3271SGleb Smirnoff "Packet's IP destination address must match address on arrival interface"); 125b3e95d4eSJonathan Lemon 1262ce85919SGleb Smirnoff VNET_DEFINE_STATIC(bool, ip_sav) = true; 1272ce85919SGleb Smirnoff #define V_ip_sav VNET(ip_sav) 1282ce85919SGleb Smirnoff SYSCTL_BOOL(_net_inet_ip, OID_AUTO, source_address_validation, 1292ce85919SGleb Smirnoff CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip_sav), true, 1302ce85919SGleb Smirnoff "Drop incoming packets with source address that is a local address"); 1312ce85919SGleb Smirnoff 132b252313fSGleb Smirnoff VNET_DEFINE(pfil_head_t, inet_pfil_head); /* Packet filter hooks */ 133df8bae1dSRodney W. Grimes 134d4b5cae4SRobert Watson static struct netisr_handler ip_nh = { 135d4b5cae4SRobert Watson .nh_name = "ip", 136d4b5cae4SRobert Watson .nh_handler = ip_input, 137d4b5cae4SRobert Watson .nh_proto = NETISR_IP, 138b8bc95cdSAdrian Chadd #ifdef RSS 1392527ccadSAdrian Chadd .nh_m2cpuid = rss_soft_m2cpuid_v4, 140b8bc95cdSAdrian Chadd .nh_policy = NETISR_POLICY_CPU, 141b8bc95cdSAdrian Chadd .nh_dispatch = NETISR_DISPATCH_HYBRID, 142b8bc95cdSAdrian Chadd #else 143d4b5cae4SRobert Watson .nh_policy = NETISR_POLICY_FLOW, 144b8bc95cdSAdrian Chadd #endif 145d4b5cae4SRobert Watson }; 146ca925d9cSJonathan Lemon 147b8bc95cdSAdrian Chadd #ifdef RSS 148b8bc95cdSAdrian Chadd /* 149b8bc95cdSAdrian Chadd * Directly dispatched frames are currently assumed 150b8bc95cdSAdrian Chadd * to have a flowid already calculated. 151b8bc95cdSAdrian Chadd * 152b8bc95cdSAdrian Chadd * It should likely have something that assert it 153b8bc95cdSAdrian Chadd * actually has valid flow details. 154b8bc95cdSAdrian Chadd */ 155b8bc95cdSAdrian Chadd static struct netisr_handler ip_direct_nh = { 156b8bc95cdSAdrian Chadd .nh_name = "ip_direct", 157b8bc95cdSAdrian Chadd .nh_handler = ip_direct_input, 158b8bc95cdSAdrian Chadd .nh_proto = NETISR_IP_DIRECT, 159499baf0aSAdrian Chadd .nh_m2cpuid = rss_soft_m2cpuid_v4, 160b8bc95cdSAdrian Chadd .nh_policy = NETISR_POLICY_CPU, 161b8bc95cdSAdrian Chadd .nh_dispatch = NETISR_DISPATCH_HYBRID, 162b8bc95cdSAdrian Chadd }; 163b8bc95cdSAdrian Chadd #endif 164b8bc95cdSAdrian Chadd 165df8bae1dSRodney W. Grimes extern struct domain inetdomain; 166f0ffb944SJulian Elischer extern struct protosw inetsw[]; 167df8bae1dSRodney W. Grimes u_char ip_protox[IPPROTO_MAX]; 16882cea7e6SBjoern A. Zeeb VNET_DEFINE(struct in_ifaddrhead, in_ifaddrhead); /* first inet address */ 16982cea7e6SBjoern A. Zeeb VNET_DEFINE(struct in_ifaddrhashhead *, in_ifaddrhashtbl); /* inet addr hash table */ 17082cea7e6SBjoern A. Zeeb VNET_DEFINE(u_long, in_ifaddrhmask); /* mask for hash table */ 171ca925d9cSJonathan Lemon 172c8ee75f2SGleb Smirnoff /* Make sure it is safe to use hashinit(9) on CK_LIST. */ 173c8ee75f2SGleb Smirnoff CTASSERT(sizeof(struct in_ifaddrhashhead) == sizeof(LIST_HEAD(, in_addr))); 174c8ee75f2SGleb Smirnoff 1750312fbe9SPoul-Henning Kamp #ifdef IPCTL_DEFMTU 1760312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW, 1773d177f46SBill Fumerola &ip_mtu, 0, "Default MTU"); 1780312fbe9SPoul-Henning Kamp #endif 1790312fbe9SPoul-Henning Kamp 1801b968362SDag-Erling Smørgrav #ifdef IPSTEALTH 18182cea7e6SBjoern A. Zeeb VNET_DEFINE(int, ipstealth); 1826df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_VNET | CTLFLAG_RW, 183eddfbb76SRobert Watson &VNET_NAME(ipstealth), 0, 184eddfbb76SRobert Watson "IP stealth mode, no TTL decrementation on forwarding"); 1851b968362SDag-Erling Smørgrav #endif 186eddfbb76SRobert Watson 187315e3e38SRobert Watson /* 1885da0521fSAndrey V. Elsukov * IP statistics are stored in the "array" of counter(9)s. 1895923c293SGleb Smirnoff */ 1905da0521fSAndrey V. Elsukov VNET_PCPUSTAT_DEFINE(struct ipstat, ipstat); 1915da0521fSAndrey V. Elsukov VNET_PCPUSTAT_SYSINIT(ipstat); 1925da0521fSAndrey V. Elsukov SYSCTL_VNET_PCPUSTAT(_net_inet_ip, IPCTL_STATS, stats, struct ipstat, ipstat, 1935da0521fSAndrey V. Elsukov "IP statistics (struct ipstat, netinet/ip_var.h)"); 1945923c293SGleb Smirnoff 1955923c293SGleb Smirnoff #ifdef VIMAGE 1965da0521fSAndrey V. Elsukov VNET_PCPUSTAT_SYSUNINIT(ipstat); 1975923c293SGleb Smirnoff #endif /* VIMAGE */ 1985923c293SGleb Smirnoff 1995923c293SGleb Smirnoff /* 200315e3e38SRobert Watson * Kernel module interface for updating ipstat. The argument is an index 2015923c293SGleb Smirnoff * into ipstat treated as an array. 202315e3e38SRobert Watson */ 203315e3e38SRobert Watson void 204315e3e38SRobert Watson kmod_ipstat_inc(int statnum) 205315e3e38SRobert Watson { 206315e3e38SRobert Watson 2075da0521fSAndrey V. Elsukov counter_u64_add(VNET(ipstat)[statnum], 1); 208315e3e38SRobert Watson } 209315e3e38SRobert Watson 210315e3e38SRobert Watson void 211315e3e38SRobert Watson kmod_ipstat_dec(int statnum) 212315e3e38SRobert Watson { 213315e3e38SRobert Watson 2145da0521fSAndrey V. Elsukov counter_u64_add(VNET(ipstat)[statnum], -1); 215315e3e38SRobert Watson } 216315e3e38SRobert Watson 217d4b5cae4SRobert Watson static int 218d4b5cae4SRobert Watson sysctl_netinet_intr_queue_maxlen(SYSCTL_HANDLER_ARGS) 219d4b5cae4SRobert Watson { 220d4b5cae4SRobert Watson int error, qlimit; 221d4b5cae4SRobert Watson 222d4b5cae4SRobert Watson netisr_getqlimit(&ip_nh, &qlimit); 223d4b5cae4SRobert Watson error = sysctl_handle_int(oidp, &qlimit, 0, req); 224d4b5cae4SRobert Watson if (error || !req->newptr) 225d4b5cae4SRobert Watson return (error); 226d4b5cae4SRobert Watson if (qlimit < 1) 227d4b5cae4SRobert Watson return (EINVAL); 228d4b5cae4SRobert Watson return (netisr_setqlimit(&ip_nh, qlimit)); 229d4b5cae4SRobert Watson } 230d4b5cae4SRobert Watson SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen, 2317029da5cSPawel Biernacki CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, 2327029da5cSPawel Biernacki sysctl_netinet_intr_queue_maxlen, "I", 233d4b5cae4SRobert Watson "Maximum size of the IP input queue"); 234d4b5cae4SRobert Watson 235d4b5cae4SRobert Watson static int 236d4b5cae4SRobert Watson sysctl_netinet_intr_queue_drops(SYSCTL_HANDLER_ARGS) 237d4b5cae4SRobert Watson { 238d4b5cae4SRobert Watson u_int64_t qdrops_long; 239d4b5cae4SRobert Watson int error, qdrops; 240d4b5cae4SRobert Watson 241d4b5cae4SRobert Watson netisr_getqdrops(&ip_nh, &qdrops_long); 242d4b5cae4SRobert Watson qdrops = qdrops_long; 243d4b5cae4SRobert Watson error = sysctl_handle_int(oidp, &qdrops, 0, req); 244d4b5cae4SRobert Watson if (error || !req->newptr) 245d4b5cae4SRobert Watson return (error); 246d4b5cae4SRobert Watson if (qdrops != 0) 247d4b5cae4SRobert Watson return (EINVAL); 248d4b5cae4SRobert Watson netisr_clearqdrops(&ip_nh); 249d4b5cae4SRobert Watson return (0); 250d4b5cae4SRobert Watson } 251d4b5cae4SRobert Watson 252d4b5cae4SRobert Watson SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops, 2537029da5cSPawel Biernacki CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 2547029da5cSPawel Biernacki 0, 0, sysctl_netinet_intr_queue_drops, "I", 255d4b5cae4SRobert Watson "Number of packets dropped from the IP input queue"); 256d4b5cae4SRobert Watson 257b8bc95cdSAdrian Chadd #ifdef RSS 258b8bc95cdSAdrian Chadd static int 259b8bc95cdSAdrian Chadd sysctl_netinet_intr_direct_queue_maxlen(SYSCTL_HANDLER_ARGS) 260b8bc95cdSAdrian Chadd { 261b8bc95cdSAdrian Chadd int error, qlimit; 262b8bc95cdSAdrian Chadd 263b8bc95cdSAdrian Chadd netisr_getqlimit(&ip_direct_nh, &qlimit); 264b8bc95cdSAdrian Chadd error = sysctl_handle_int(oidp, &qlimit, 0, req); 265b8bc95cdSAdrian Chadd if (error || !req->newptr) 266b8bc95cdSAdrian Chadd return (error); 267b8bc95cdSAdrian Chadd if (qlimit < 1) 268b8bc95cdSAdrian Chadd return (EINVAL); 269b8bc95cdSAdrian Chadd return (netisr_setqlimit(&ip_direct_nh, qlimit)); 270b8bc95cdSAdrian Chadd } 2717faa0d21SAndrey V. Elsukov SYSCTL_PROC(_net_inet_ip, IPCTL_INTRDQMAXLEN, intr_direct_queue_maxlen, 2727029da5cSPawel Biernacki CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 2737029da5cSPawel Biernacki 0, 0, sysctl_netinet_intr_direct_queue_maxlen, 2747faa0d21SAndrey V. Elsukov "I", "Maximum size of the IP direct input queue"); 275b8bc95cdSAdrian Chadd 276b8bc95cdSAdrian Chadd static int 277b8bc95cdSAdrian Chadd sysctl_netinet_intr_direct_queue_drops(SYSCTL_HANDLER_ARGS) 278b8bc95cdSAdrian Chadd { 279b8bc95cdSAdrian Chadd u_int64_t qdrops_long; 280b8bc95cdSAdrian Chadd int error, qdrops; 281b8bc95cdSAdrian Chadd 282b8bc95cdSAdrian Chadd netisr_getqdrops(&ip_direct_nh, &qdrops_long); 283b8bc95cdSAdrian Chadd qdrops = qdrops_long; 284b8bc95cdSAdrian Chadd error = sysctl_handle_int(oidp, &qdrops, 0, req); 285b8bc95cdSAdrian Chadd if (error || !req->newptr) 286b8bc95cdSAdrian Chadd return (error); 287b8bc95cdSAdrian Chadd if (qdrops != 0) 288b8bc95cdSAdrian Chadd return (EINVAL); 289b8bc95cdSAdrian Chadd netisr_clearqdrops(&ip_direct_nh); 290b8bc95cdSAdrian Chadd return (0); 291b8bc95cdSAdrian Chadd } 292b8bc95cdSAdrian Chadd 2937faa0d21SAndrey V. Elsukov SYSCTL_PROC(_net_inet_ip, IPCTL_INTRDQDROPS, intr_direct_queue_drops, 2947029da5cSPawel Biernacki CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, 2957029da5cSPawel Biernacki sysctl_netinet_intr_direct_queue_drops, "I", 296b8bc95cdSAdrian Chadd "Number of packets dropped from the IP direct input queue"); 297b8bc95cdSAdrian Chadd #endif /* RSS */ 298b8bc95cdSAdrian Chadd 299df8bae1dSRodney W. Grimes /* 300df8bae1dSRodney W. Grimes * IP initialization: fill in IP protocol switch table. 301df8bae1dSRodney W. Grimes * All protocols not implemented in kernel go to raw IP protocol handler. 302df8bae1dSRodney W. Grimes */ 30389128ff3SGleb Smirnoff static void 30489128ff3SGleb Smirnoff ip_vnet_init(void *arg __unused) 305df8bae1dSRodney W. Grimes { 306b252313fSGleb Smirnoff struct pfil_head_args args; 307df8bae1dSRodney W. Grimes 308d7c5a620SMatt Macy CK_STAILQ_INIT(&V_in_ifaddrhead); 309603724d3SBjoern A. Zeeb V_in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &V_in_ifaddrhmask); 3101ed81b73SMarko Zec 3111ed81b73SMarko Zec /* Initialize IP reassembly queue. */ 3121dbefcc0SGleb Smirnoff ipreass_init(); 3131ed81b73SMarko Zec 3140b4b0b0fSJulian Elischer /* Initialize packet filter hooks. */ 315b252313fSGleb Smirnoff args.pa_version = PFIL_VERSION; 316b252313fSGleb Smirnoff args.pa_flags = PFIL_IN | PFIL_OUT; 317b252313fSGleb Smirnoff args.pa_type = PFIL_TYPE_IP4; 318b252313fSGleb Smirnoff args.pa_headname = PFIL_INET_NAME; 319b252313fSGleb Smirnoff V_inet_pfil_head = pfil_head_register(&args); 3200b4b0b0fSJulian Elischer 321ef91a976SAndrey V. Elsukov if (hhook_head_register(HHOOK_TYPE_IPSEC_IN, AF_INET, 322ef91a976SAndrey V. Elsukov &V_ipsec_hhh_in[HHOOK_IPSEC_INET], 323ef91a976SAndrey V. Elsukov HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0) 324ef91a976SAndrey V. Elsukov printf("%s: WARNING: unable to register input helper hook\n", 325ef91a976SAndrey V. Elsukov __func__); 326ef91a976SAndrey V. Elsukov if (hhook_head_register(HHOOK_TYPE_IPSEC_OUT, AF_INET, 327ef91a976SAndrey V. Elsukov &V_ipsec_hhh_out[HHOOK_IPSEC_INET], 328ef91a976SAndrey V. Elsukov HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0) 329ef91a976SAndrey V. Elsukov printf("%s: WARNING: unable to register output helper hook\n", 330ef91a976SAndrey V. Elsukov __func__); 331ef91a976SAndrey V. Elsukov 332484149deSBjoern A. Zeeb #ifdef VIMAGE 333484149deSBjoern A. Zeeb netisr_register_vnet(&ip_nh); 334484149deSBjoern A. Zeeb #ifdef RSS 335484149deSBjoern A. Zeeb netisr_register_vnet(&ip_direct_nh); 336484149deSBjoern A. Zeeb #endif 337484149deSBjoern A. Zeeb #endif 33889128ff3SGleb Smirnoff } 33989128ff3SGleb Smirnoff VNET_SYSINIT(ip_vnet_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH, 34089128ff3SGleb Smirnoff ip_vnet_init, NULL); 34189128ff3SGleb Smirnoff 34289128ff3SGleb Smirnoff 34389128ff3SGleb Smirnoff static void 34489128ff3SGleb Smirnoff ip_init(const void *unused __unused) 34589128ff3SGleb Smirnoff { 34689128ff3SGleb Smirnoff struct protosw *pr; 3471ed81b73SMarko Zec 348f0ffb944SJulian Elischer pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 34989128ff3SGleb Smirnoff KASSERT(pr, ("%s: PF_INET not found", __func__)); 350db09bef3SAndre Oppermann 351db09bef3SAndre Oppermann /* Initialize the entire ip_protox[] array to IPPROTO_RAW. */ 35289128ff3SGleb Smirnoff for (int i = 0; i < IPPROTO_MAX; i++) 353df8bae1dSRodney W. Grimes ip_protox[i] = pr - inetsw; 354db09bef3SAndre Oppermann /* 355db09bef3SAndre Oppermann * Cycle through IP protocols and put them into the appropriate place 356db09bef3SAndre Oppermann * in ip_protox[]. 357db09bef3SAndre Oppermann */ 358f0ffb944SJulian Elischer for (pr = inetdomain.dom_protosw; 359f0ffb944SJulian Elischer pr < inetdomain.dom_protoswNPROTOSW; pr++) 360df8bae1dSRodney W. Grimes if (pr->pr_domain->dom_family == PF_INET && 361db09bef3SAndre Oppermann pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) { 362db09bef3SAndre Oppermann /* Be careful to only index valid IP protocols. */ 363db77984cSSam Leffler if (pr->pr_protocol < IPPROTO_MAX) 364df8bae1dSRodney W. Grimes ip_protox[pr->pr_protocol] = pr - inetsw; 365db09bef3SAndre Oppermann } 366194a213eSAndrey A. Chernov 367d4b5cae4SRobert Watson netisr_register(&ip_nh); 368b8bc95cdSAdrian Chadd #ifdef RSS 369b8bc95cdSAdrian Chadd netisr_register(&ip_direct_nh); 370b8bc95cdSAdrian Chadd #endif 371df8bae1dSRodney W. Grimes } 37289128ff3SGleb Smirnoff SYSINIT(ip_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, ip_init, NULL); 373df8bae1dSRodney W. Grimes 3749802380eSBjoern A. Zeeb #ifdef VIMAGE 3753f58662dSBjoern A. Zeeb static void 3763f58662dSBjoern A. Zeeb ip_destroy(void *unused __unused) 3779802380eSBjoern A. Zeeb { 378ef91a976SAndrey V. Elsukov int error; 3794d3dfd45SMikolaj Golub 380484149deSBjoern A. Zeeb #ifdef RSS 381484149deSBjoern A. Zeeb netisr_unregister_vnet(&ip_direct_nh); 382484149deSBjoern A. Zeeb #endif 383484149deSBjoern A. Zeeb netisr_unregister_vnet(&ip_nh); 384484149deSBjoern A. Zeeb 385b252313fSGleb Smirnoff pfil_head_unregister(V_inet_pfil_head); 386ef91a976SAndrey V. Elsukov error = hhook_head_deregister(V_ipsec_hhh_in[HHOOK_IPSEC_INET]); 387ef91a976SAndrey V. Elsukov if (error != 0) { 388ef91a976SAndrey V. Elsukov printf("%s: WARNING: unable to deregister input helper hook " 389ef91a976SAndrey V. Elsukov "type HHOOK_TYPE_IPSEC_IN, id HHOOK_IPSEC_INET: " 390ef91a976SAndrey V. Elsukov "error %d returned\n", __func__, error); 391ef91a976SAndrey V. Elsukov } 392ef91a976SAndrey V. Elsukov error = hhook_head_deregister(V_ipsec_hhh_out[HHOOK_IPSEC_INET]); 393ef91a976SAndrey V. Elsukov if (error != 0) { 394ef91a976SAndrey V. Elsukov printf("%s: WARNING: unable to deregister output helper hook " 395ef91a976SAndrey V. Elsukov "type HHOOK_TYPE_IPSEC_OUT, id HHOOK_IPSEC_INET: " 396ef91a976SAndrey V. Elsukov "error %d returned\n", __func__, error); 397ef91a976SAndrey V. Elsukov } 39889856f7eSBjoern A. Zeeb 39989856f7eSBjoern A. Zeeb /* Remove the IPv4 addresses from all interfaces. */ 40089856f7eSBjoern A. Zeeb in_ifscrub_all(); 40189856f7eSBjoern A. Zeeb 40289856f7eSBjoern A. Zeeb /* Make sure the IPv4 routes are gone as well. */ 403b1d63265SAlexander V. Chernikov rib_flush_routes_family(AF_INET); 4049802380eSBjoern A. Zeeb 405e3c2c634SGleb Smirnoff /* Destroy IP reassembly queue. */ 4061dbefcc0SGleb Smirnoff ipreass_destroy(); 40789856f7eSBjoern A. Zeeb 40889856f7eSBjoern A. Zeeb /* Cleanup in_ifaddr hash table; should be empty. */ 40989856f7eSBjoern A. Zeeb hashdestroy(V_in_ifaddrhashtbl, M_IFADDR, V_in_ifaddrhmask); 4109802380eSBjoern A. Zeeb } 4113f58662dSBjoern A. Zeeb 4123f58662dSBjoern A. Zeeb VNET_SYSUNINIT(ip, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, ip_destroy, NULL); 4139802380eSBjoern A. Zeeb #endif 4149802380eSBjoern A. Zeeb 415b8bc95cdSAdrian Chadd #ifdef RSS 416b8bc95cdSAdrian Chadd /* 417b8bc95cdSAdrian Chadd * IP direct input routine. 418b8bc95cdSAdrian Chadd * 419b8bc95cdSAdrian Chadd * This is called when reinjecting completed fragments where 420b8bc95cdSAdrian Chadd * all of the previous checking and book-keeping has been done. 421b8bc95cdSAdrian Chadd */ 422b8bc95cdSAdrian Chadd void 423b8bc95cdSAdrian Chadd ip_direct_input(struct mbuf *m) 424b8bc95cdSAdrian Chadd { 425b8bc95cdSAdrian Chadd struct ip *ip; 426b8bc95cdSAdrian Chadd int hlen; 427b8bc95cdSAdrian Chadd 428b8bc95cdSAdrian Chadd ip = mtod(m, struct ip *); 429b8bc95cdSAdrian Chadd hlen = ip->ip_hl << 2; 430b8bc95cdSAdrian Chadd 431fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT) 432fcf59617SAndrey V. Elsukov if (IPSEC_ENABLED(ipv4)) { 433fcf59617SAndrey V. Elsukov if (IPSEC_INPUT(ipv4, m, hlen, ip->ip_p) != 0) 434fcf59617SAndrey V. Elsukov return; 435fcf59617SAndrey V. Elsukov } 436fcf59617SAndrey V. Elsukov #endif /* IPSEC */ 437b8bc95cdSAdrian Chadd IPSTAT_INC(ips_delivered); 438b8bc95cdSAdrian Chadd (*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p); 439b8bc95cdSAdrian Chadd return; 440b8bc95cdSAdrian Chadd } 441b8bc95cdSAdrian Chadd #endif 442b8bc95cdSAdrian Chadd 4434d2e3692SLuigi Rizzo /* 444df8bae1dSRodney W. Grimes * Ip input routine. Checksum and byte swap header. If fragmented 445df8bae1dSRodney W. Grimes * try to reassemble. Process options. Pass to next level. 446df8bae1dSRodney W. Grimes */ 447c67b1d17SGarrett Wollman void 448c67b1d17SGarrett Wollman ip_input(struct mbuf *m) 449df8bae1dSRodney W. Grimes { 4509188b4a1SAndre Oppermann struct ip *ip = NULL; 4515da9f8faSJosef Karthauser struct in_ifaddr *ia = NULL; 452ca925d9cSJonathan Lemon struct ifaddr *ifa; 4530aade26eSRobert Watson struct ifnet *ifp; 45494df3271SGleb Smirnoff int hlen = 0; 45521d172a3SGleb Smirnoff uint16_t sum, ip_len; 45602c1c707SAndre Oppermann int dchg = 0; /* dest changed after fw */ 457f51f805fSSam Leffler struct in_addr odst; /* original dst address */ 45894df3271SGleb Smirnoff bool strong_es; 459b715f178SLuigi Rizzo 460fe584538SDag-Erling Smørgrav M_ASSERTPKTHDR(m); 461b8a6e03fSGleb Smirnoff NET_EPOCH_ASSERT(); 462db40007dSAndrew R. Reiter 463ac9d7e26SMax Laier if (m->m_flags & M_FASTFWD_OURS) { 46476ff6dcfSAndre Oppermann m->m_flags &= ~M_FASTFWD_OURS; 46576ff6dcfSAndre Oppermann /* Set up some basics that will be used later. */ 4662b25acc1SLuigi Rizzo ip = mtod(m, struct ip *); 46753be11f6SPoul-Henning Kamp hlen = ip->ip_hl << 2; 4688f134647SGleb Smirnoff ip_len = ntohs(ip->ip_len); 4699b932e9eSAndre Oppermann goto ours; 4702b25acc1SLuigi Rizzo } 4712b25acc1SLuigi Rizzo 47286425c62SRobert Watson IPSTAT_INC(ips_total); 47358938916SGarrett Wollman 4740359e7a5SMateusz Guzik if (__predict_false(m->m_pkthdr.len < sizeof(struct ip))) 47558938916SGarrett Wollman goto tooshort; 47658938916SGarrett Wollman 4770359e7a5SMateusz Guzik if (m->m_len < sizeof(struct ip)) { 4780359e7a5SMateusz Guzik m = m_pullup(m, sizeof(struct ip)); 4790359e7a5SMateusz Guzik if (__predict_false(m == NULL)) { 48086425c62SRobert Watson IPSTAT_INC(ips_toosmall); 481c67b1d17SGarrett Wollman return; 482df8bae1dSRodney W. Grimes } 4830359e7a5SMateusz Guzik } 484df8bae1dSRodney W. Grimes ip = mtod(m, struct ip *); 48558938916SGarrett Wollman 4860359e7a5SMateusz Guzik if (__predict_false(ip->ip_v != IPVERSION)) { 48786425c62SRobert Watson IPSTAT_INC(ips_badvers); 488df8bae1dSRodney W. Grimes goto bad; 489df8bae1dSRodney W. Grimes } 49058938916SGarrett Wollman 49153be11f6SPoul-Henning Kamp hlen = ip->ip_hl << 2; 4920359e7a5SMateusz Guzik if (__predict_false(hlen < sizeof(struct ip))) { /* minimum header length */ 49386425c62SRobert Watson IPSTAT_INC(ips_badhlen); 494df8bae1dSRodney W. Grimes goto bad; 495df8bae1dSRodney W. Grimes } 496df8bae1dSRodney W. Grimes if (hlen > m->m_len) { 4970359e7a5SMateusz Guzik m = m_pullup(m, hlen); 4980359e7a5SMateusz Guzik if (__predict_false(m == NULL)) { 49986425c62SRobert Watson IPSTAT_INC(ips_badhlen); 500c67b1d17SGarrett Wollman return; 501df8bae1dSRodney W. Grimes } 502df8bae1dSRodney W. Grimes ip = mtod(m, struct ip *); 503df8bae1dSRodney W. Grimes } 50433841545SHajimu UMEMOTO 50557f60867SMark Johnston IP_PROBE(receive, NULL, NULL, ip, m->m_pkthdr.rcvif, ip, NULL); 50657f60867SMark Johnston 5076c1c6ae5SRodney W. Grimes /* IN_LOOPBACK must not appear on the wire - RFC1122 */ 5080aade26eSRobert Watson ifp = m->m_pkthdr.rcvif; 5096c1c6ae5SRodney W. Grimes if (IN_LOOPBACK(ntohl(ip->ip_dst.s_addr)) || 5106c1c6ae5SRodney W. Grimes IN_LOOPBACK(ntohl(ip->ip_src.s_addr))) { 5110aade26eSRobert Watson if ((ifp->if_flags & IFF_LOOPBACK) == 0) { 51286425c62SRobert Watson IPSTAT_INC(ips_badaddr); 51333841545SHajimu UMEMOTO goto bad; 51433841545SHajimu UMEMOTO } 51533841545SHajimu UMEMOTO } 51633841545SHajimu UMEMOTO 517db4f9cc7SJonathan Lemon if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) { 518db4f9cc7SJonathan Lemon sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID); 519db4f9cc7SJonathan Lemon } else { 52058938916SGarrett Wollman if (hlen == sizeof(struct ip)) { 52147c861ecSBrian Somers sum = in_cksum_hdr(ip); 52258938916SGarrett Wollman } else { 52347c861ecSBrian Somers sum = in_cksum(m, hlen); 52458938916SGarrett Wollman } 525db4f9cc7SJonathan Lemon } 5260359e7a5SMateusz Guzik if (__predict_false(sum)) { 52786425c62SRobert Watson IPSTAT_INC(ips_badsum); 528df8bae1dSRodney W. Grimes goto bad; 529df8bae1dSRodney W. Grimes } 530df8bae1dSRodney W. Grimes 53102b199f1SMax Laier #ifdef ALTQ 53202b199f1SMax Laier if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0) 53302b199f1SMax Laier /* packet is dropped by traffic conditioner */ 53402b199f1SMax Laier return; 53502b199f1SMax Laier #endif 53602b199f1SMax Laier 53721d172a3SGleb Smirnoff ip_len = ntohs(ip->ip_len); 5380359e7a5SMateusz Guzik if (__predict_false(ip_len < hlen)) { 53986425c62SRobert Watson IPSTAT_INC(ips_badlen); 540df8bae1dSRodney W. Grimes goto bad; 541df8bae1dSRodney W. Grimes } 542df8bae1dSRodney W. Grimes 543df8bae1dSRodney W. Grimes /* 544df8bae1dSRodney W. Grimes * Check that the amount of data in the buffers 545df8bae1dSRodney W. Grimes * is as at least much as the IP header would have us expect. 546df8bae1dSRodney W. Grimes * Trim mbufs if longer than we expect. 547df8bae1dSRodney W. Grimes * Drop packet if shorter than we expect. 548df8bae1dSRodney W. Grimes */ 5490359e7a5SMateusz Guzik if (__predict_false(m->m_pkthdr.len < ip_len)) { 55058938916SGarrett Wollman tooshort: 55186425c62SRobert Watson IPSTAT_INC(ips_tooshort); 552df8bae1dSRodney W. Grimes goto bad; 553df8bae1dSRodney W. Grimes } 55421d172a3SGleb Smirnoff if (m->m_pkthdr.len > ip_len) { 555df8bae1dSRodney W. Grimes if (m->m_len == m->m_pkthdr.len) { 55621d172a3SGleb Smirnoff m->m_len = ip_len; 55721d172a3SGleb Smirnoff m->m_pkthdr.len = ip_len; 558df8bae1dSRodney W. Grimes } else 55921d172a3SGleb Smirnoff m_adj(m, ip_len - m->m_pkthdr.len); 560df8bae1dSRodney W. Grimes } 561b8bc95cdSAdrian Chadd 562ad9f4d6aSAndrey V. Elsukov /* 563ad9f4d6aSAndrey V. Elsukov * Try to forward the packet, but if we fail continue. 564f389439fSBjoern A. Zeeb * ip_tryforward() may generate redirects these days. 565f389439fSBjoern A. Zeeb * XXX the logic below falling through to normal processing 566f389439fSBjoern A. Zeeb * if redirects are required should be revisited as well. 567ad9f4d6aSAndrey V. Elsukov * ip_tryforward() does inbound and outbound packet firewall 568ad9f4d6aSAndrey V. Elsukov * processing. If firewall has decided that destination becomes 569ad9f4d6aSAndrey V. Elsukov * our local address, it sets M_FASTFWD_OURS flag. In this 570ad9f4d6aSAndrey V. Elsukov * case skip another inbound firewall processing and update 571ad9f4d6aSAndrey V. Elsukov * ip pointer. 572ad9f4d6aSAndrey V. Elsukov */ 5738ad114c0SGeorge V. Neville-Neil if (V_ipforwarding != 0 574fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT) 575fcf59617SAndrey V. Elsukov && (!IPSEC_ENABLED(ipv4) || 576fcf59617SAndrey V. Elsukov IPSEC_CAPS(ipv4, m, IPSEC_CAP_OPERABLE) == 0) 577ad9f4d6aSAndrey V. Elsukov #endif 578ad9f4d6aSAndrey V. Elsukov ) { 579f389439fSBjoern A. Zeeb /* 580f389439fSBjoern A. Zeeb * ip_dooptions() was run so we can ignore the source route (or 581f389439fSBjoern A. Zeeb * any IP options case) case for redirects in ip_tryforward(). 582f389439fSBjoern A. Zeeb */ 583ad9f4d6aSAndrey V. Elsukov if ((m = ip_tryforward(m)) == NULL) 58433872124SGeorge V. Neville-Neil return; 585ad9f4d6aSAndrey V. Elsukov if (m->m_flags & M_FASTFWD_OURS) { 586ad9f4d6aSAndrey V. Elsukov m->m_flags &= ~M_FASTFWD_OURS; 587ad9f4d6aSAndrey V. Elsukov ip = mtod(m, struct ip *); 588ad9f4d6aSAndrey V. Elsukov goto ours; 589ad9f4d6aSAndrey V. Elsukov } 590ad9f4d6aSAndrey V. Elsukov } 591fcf59617SAndrey V. Elsukov 592fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT) 59314dd6717SSam Leffler /* 594ffe8cd7bSBjoern A. Zeeb * Bypass packet filtering for packets previously handled by IPsec. 59514dd6717SSam Leffler */ 596fcf59617SAndrey V. Elsukov if (IPSEC_ENABLED(ipv4) && 597fcf59617SAndrey V. Elsukov IPSEC_CAPS(ipv4, m, IPSEC_CAP_BYPASS_FILTER) != 0) 598c21fd232SAndre Oppermann goto passin; 599ad9f4d6aSAndrey V. Elsukov #endif 600fcf59617SAndrey V. Elsukov 601c4ac87eaSDarren Reed /* 602134ea224SSam Leffler * Run through list of hooks for input packets. 603f51f805fSSam Leffler * 604f51f805fSSam Leffler * NB: Beware of the destination address changing (e.g. 605f51f805fSSam Leffler * by NAT rewriting). When this happens, tell 606f51f805fSSam Leffler * ip_forward to do the right thing. 607c4ac87eaSDarren Reed */ 608c21fd232SAndre Oppermann 609c21fd232SAndre Oppermann /* Jump over all PFIL processing if hooks are not active. */ 610b252313fSGleb Smirnoff if (!PFIL_HOOKED_IN(V_inet_pfil_head)) 611c21fd232SAndre Oppermann goto passin; 612c21fd232SAndre Oppermann 613f51f805fSSam Leffler odst = ip->ip_dst; 614b252313fSGleb Smirnoff if (pfil_run_hooks(V_inet_pfil_head, &m, ifp, PFIL_IN, NULL) != 615b252313fSGleb Smirnoff PFIL_PASS) 616beec8214SDarren Reed return; 617134ea224SSam Leffler if (m == NULL) /* consumed by filter */ 618c4ac87eaSDarren Reed return; 6199b932e9eSAndre Oppermann 620c4ac87eaSDarren Reed ip = mtod(m, struct ip *); 62102c1c707SAndre Oppermann dchg = (odst.s_addr != ip->ip_dst.s_addr); 6229b932e9eSAndre Oppermann 6239b932e9eSAndre Oppermann if (m->m_flags & M_FASTFWD_OURS) { 6249b932e9eSAndre Oppermann m->m_flags &= ~M_FASTFWD_OURS; 6259b932e9eSAndre Oppermann goto ours; 6269b932e9eSAndre Oppermann } 627ffdbf9daSAndrey V. Elsukov if (m->m_flags & M_IP_NEXTHOP) { 628de89d74bSLuiz Otavio O Souza if (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL) { 629099dd043SAndre Oppermann /* 630ffdbf9daSAndrey V. Elsukov * Directly ship the packet on. This allows 631ffdbf9daSAndrey V. Elsukov * forwarding packets originally destined to us 632ffdbf9daSAndrey V. Elsukov * to some other directly connected host. 633099dd043SAndre Oppermann */ 634ffdbf9daSAndrey V. Elsukov ip_forward(m, 1); 635099dd043SAndre Oppermann return; 636099dd043SAndre Oppermann } 637ffdbf9daSAndrey V. Elsukov } 638c21fd232SAndre Oppermann passin: 63921d172a3SGleb Smirnoff 64021d172a3SGleb Smirnoff /* 641df8bae1dSRodney W. Grimes * Process options and, if not destined for us, 642df8bae1dSRodney W. Grimes * ship it on. ip_dooptions returns 1 when an 643df8bae1dSRodney W. Grimes * error was detected (causing an icmp message 644df8bae1dSRodney W. Grimes * to be sent and the original packet to be freed). 645df8bae1dSRodney W. Grimes */ 6469b932e9eSAndre Oppermann if (hlen > sizeof (struct ip) && ip_dooptions(m, 0)) 647c67b1d17SGarrett Wollman return; 648df8bae1dSRodney W. Grimes 649f0068c4aSGarrett Wollman /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no 650f0068c4aSGarrett Wollman * matter if it is destined to another node, or whether it is 651f0068c4aSGarrett Wollman * a multicast one, RSVP wants it! and prevents it from being forwarded 652f0068c4aSGarrett Wollman * anywhere else. Also checks if the rsvp daemon is running before 653f0068c4aSGarrett Wollman * grabbing the packet. 654f0068c4aSGarrett Wollman */ 6550359e7a5SMateusz Guzik if (ip->ip_p == IPPROTO_RSVP && V_rsvp_on) 656f0068c4aSGarrett Wollman goto ours; 657f0068c4aSGarrett Wollman 658df8bae1dSRodney W. Grimes /* 659df8bae1dSRodney W. Grimes * Check our list of addresses, to see if the packet is for us. 660cc766e04SGarrett Wollman * If we don't have any addresses, assume any unicast packet 661cc766e04SGarrett Wollman * we receive might be for us (and let the upper layers deal 662cc766e04SGarrett Wollman * with it). 663df8bae1dSRodney W. Grimes */ 664d7c5a620SMatt Macy if (CK_STAILQ_EMPTY(&V_in_ifaddrhead) && 665cc766e04SGarrett Wollman (m->m_flags & (M_MCAST|M_BCAST)) == 0) 666cc766e04SGarrett Wollman goto ours; 667cc766e04SGarrett Wollman 6687538a9a0SJonathan Lemon /* 669823db0e9SDon Lewis * Enable a consistency check between the destination address 670823db0e9SDon Lewis * and the arrival interface for a unicast packet (the RFC 1122 67194df3271SGleb Smirnoff * strong ES model) with a list of additional predicates: 67294df3271SGleb Smirnoff * - if IP forwarding is disabled 67394df3271SGleb Smirnoff * - the packet is not locally generated 67494df3271SGleb Smirnoff * - the packet is not subject to 'ipfw fwd' 67594df3271SGleb Smirnoff * - Interface is not running CARP. If the packet got here, we already 67694df3271SGleb Smirnoff * checked it with carp_iamatch() and carp_forus(). 677823db0e9SDon Lewis */ 67894df3271SGleb Smirnoff strong_es = V_ip_strong_es && (V_ipforwarding == 0) && 67981674f12SGleb Smirnoff ((ifp->if_flags & IFF_LOOPBACK) == 0) && 68054bfbd51SWill Andrews ifp->if_carp == NULL && (dchg == 0); 681823db0e9SDon Lewis 682ca925d9cSJonathan Lemon /* 683ca925d9cSJonathan Lemon * Check for exact addresses in the hash bucket. 684ca925d9cSJonathan Lemon */ 685c8ee75f2SGleb Smirnoff CK_LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) { 68694df3271SGleb Smirnoff if (IA_SIN(ia)->sin_addr.s_addr != ip->ip_dst.s_addr) 68794df3271SGleb Smirnoff continue; 68894df3271SGleb Smirnoff 689f9e354dfSJulian Elischer /* 69094df3271SGleb Smirnoff * net.inet.ip.rfc1122_strong_es: the address matches, verify 69194df3271SGleb Smirnoff * that the packet arrived via the correct interface. 692f9e354dfSJulian Elischer */ 69394df3271SGleb Smirnoff if (__predict_false(strong_es && ia->ia_ifp != ifp)) { 69494df3271SGleb Smirnoff IPSTAT_INC(ips_badaddr); 69594df3271SGleb Smirnoff goto bad; 696ca925d9cSJonathan Lemon } 69794df3271SGleb Smirnoff 6982ce85919SGleb Smirnoff /* 6992ce85919SGleb Smirnoff * net.inet.ip.source_address_validation: drop incoming 7002ce85919SGleb Smirnoff * packets that pretend to be ours. 7012ce85919SGleb Smirnoff */ 7022ce85919SGleb Smirnoff if (V_ip_sav && !(ifp->if_flags & IFF_LOOPBACK) && 7032ce85919SGleb Smirnoff __predict_false(in_localip_fib(ip->ip_src, ifp->if_fib))) { 7042ce85919SGleb Smirnoff IPSTAT_INC(ips_badaddr); 7052ce85919SGleb Smirnoff goto bad; 7062ce85919SGleb Smirnoff } 7072ce85919SGleb Smirnoff 70894df3271SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ipackets, 1); 70994df3271SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ibytes, m->m_pkthdr.len); 71094df3271SGleb Smirnoff goto ours; 7118c0fec80SRobert Watson } 7122d9cfabaSRobert Watson 713823db0e9SDon Lewis /* 714ca925d9cSJonathan Lemon * Check for broadcast addresses. 715ca925d9cSJonathan Lemon * 716ca925d9cSJonathan Lemon * Only accept broadcast packets that arrive via the matching 717ca925d9cSJonathan Lemon * interface. Reception of forwarded directed broadcasts would 718ca925d9cSJonathan Lemon * be handled via ip_forward() and ether_output() with the loopback 719ca925d9cSJonathan Lemon * into the stack for SIMPLEX interfaces handled by ether_output(). 720823db0e9SDon Lewis */ 72181674f12SGleb Smirnoff if (ifp->if_flags & IFF_BROADCAST) { 722d7c5a620SMatt Macy CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 723ca925d9cSJonathan Lemon if (ifa->ifa_addr->sa_family != AF_INET) 724ca925d9cSJonathan Lemon continue; 725ca925d9cSJonathan Lemon ia = ifatoia(ifa); 726df8bae1dSRodney W. Grimes if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr == 7270aade26eSRobert Watson ip->ip_dst.s_addr) { 7287caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ipackets, 1); 7297caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ibytes, 7307caf4ab7SGleb Smirnoff m->m_pkthdr.len); 731df8bae1dSRodney W. Grimes goto ours; 7320aade26eSRobert Watson } 7330ac40133SBrian Somers #ifdef BOOTP_COMPAT 7340aade26eSRobert Watson if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY) { 7357caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ipackets, 1); 7367caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ibytes, 7377caf4ab7SGleb Smirnoff m->m_pkthdr.len); 738ca925d9cSJonathan Lemon goto ours; 7390aade26eSRobert Watson } 7400ac40133SBrian Somers #endif 741df8bae1dSRodney W. Grimes } 74219e5b0a7SRobert Watson ia = NULL; 743df8bae1dSRodney W. Grimes } 744df8bae1dSRodney W. Grimes if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { 74503b0505bSZhenlei Huang /* 74603b0505bSZhenlei Huang * RFC 3927 2.7: Do not forward multicast packets from 74703b0505bSZhenlei Huang * IN_LINKLOCAL. 74803b0505bSZhenlei Huang */ 7493d846e48SZhenlei Huang if (V_ip_mrouter && !IN_LINKLOCAL(ntohl(ip->ip_src.s_addr))) { 750df8bae1dSRodney W. Grimes /* 751df8bae1dSRodney W. Grimes * If we are acting as a multicast router, all 752df8bae1dSRodney W. Grimes * incoming multicast packets are passed to the 753df8bae1dSRodney W. Grimes * kernel-level multicast forwarding function. 754df8bae1dSRodney W. Grimes * The packet is returned (relatively) intact; if 755df8bae1dSRodney W. Grimes * ip_mforward() returns a non-zero value, the packet 756df8bae1dSRodney W. Grimes * must be discarded, else it may be accepted below. 757df8bae1dSRodney W. Grimes */ 7580aade26eSRobert Watson if (ip_mforward && ip_mforward(ip, ifp, m, 0) != 0) { 75986425c62SRobert Watson IPSTAT_INC(ips_cantforward); 760df8bae1dSRodney W. Grimes m_freem(m); 761c67b1d17SGarrett Wollman return; 762df8bae1dSRodney W. Grimes } 763df8bae1dSRodney W. Grimes 764df8bae1dSRodney W. Grimes /* 76511612afaSDima Dorfman * The process-level routing daemon needs to receive 766df8bae1dSRodney W. Grimes * all multicast IGMP packets, whether or not this 767df8bae1dSRodney W. Grimes * host belongs to their destination groups. 768df8bae1dSRodney W. Grimes */ 76965634ae7SWojciech Macek if (ip->ip_p == IPPROTO_IGMP) { 770df8bae1dSRodney W. Grimes goto ours; 77165634ae7SWojciech Macek } 77286425c62SRobert Watson IPSTAT_INC(ips_forward); 773df8bae1dSRodney W. Grimes } 774df8bae1dSRodney W. Grimes /* 775d10910e6SBruce M Simpson * Assume the packet is for us, to avoid prematurely taking 776d10910e6SBruce M Simpson * a lock on the in_multi hash. Protocols must perform 777d10910e6SBruce M Simpson * their own filtering and update statistics accordingly. 778df8bae1dSRodney W. Grimes */ 779df8bae1dSRodney W. Grimes goto ours; 780df8bae1dSRodney W. Grimes } 781df8bae1dSRodney W. Grimes if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST) 782df8bae1dSRodney W. Grimes goto ours; 783df8bae1dSRodney W. Grimes if (ip->ip_dst.s_addr == INADDR_ANY) 784df8bae1dSRodney W. Grimes goto ours; 78503b0505bSZhenlei Huang /* RFC 3927 2.7: Do not forward packets to or from IN_LINKLOCAL. */ 7863d846e48SZhenlei Huang if (IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr)) || 7873d846e48SZhenlei Huang IN_LINKLOCAL(ntohl(ip->ip_src.s_addr))) { 7883d846e48SZhenlei Huang IPSTAT_INC(ips_cantforward); 7893d846e48SZhenlei Huang m_freem(m); 7903d846e48SZhenlei Huang return; 7913d846e48SZhenlei Huang } 792df8bae1dSRodney W. Grimes 7936a800098SYoshinobu Inoue /* 794df8bae1dSRodney W. Grimes * Not for us; forward if possible and desirable. 795df8bae1dSRodney W. Grimes */ 796603724d3SBjoern A. Zeeb if (V_ipforwarding == 0) { 79786425c62SRobert Watson IPSTAT_INC(ips_cantforward); 798df8bae1dSRodney W. Grimes m_freem(m); 799546f251bSChris D. Faulhaber } else { 8009b932e9eSAndre Oppermann ip_forward(m, dchg); 801546f251bSChris D. Faulhaber } 802c67b1d17SGarrett Wollman return; 803df8bae1dSRodney W. Grimes 804df8bae1dSRodney W. Grimes ours: 805d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH 806d0ebc0d2SYaroslav Tykhiy /* 807d0ebc0d2SYaroslav Tykhiy * IPSTEALTH: Process non-routing options only 808d0ebc0d2SYaroslav Tykhiy * if the packet is destined for us. 809d0ebc0d2SYaroslav Tykhiy */ 8107caf4ab7SGleb Smirnoff if (V_ipstealth && hlen > sizeof (struct ip) && ip_dooptions(m, 1)) 811d0ebc0d2SYaroslav Tykhiy return; 812d0ebc0d2SYaroslav Tykhiy #endif /* IPSTEALTH */ 813d0ebc0d2SYaroslav Tykhiy 81463f8d699SJordan K. Hubbard /* 815b6ea1aa5SRuslan Ermilov * Attempt reassembly; if it succeeds, proceed. 816ac9d7e26SMax Laier * ip_reass() will return a different mbuf. 817df8bae1dSRodney W. Grimes */ 8188f134647SGleb Smirnoff if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) { 819aa69c612SGleb Smirnoff /* XXXGL: shouldn't we save & set m_flags? */ 820f0cada84SAndre Oppermann m = ip_reass(m); 821f0cada84SAndre Oppermann if (m == NULL) 822c67b1d17SGarrett Wollman return; 8236a800098SYoshinobu Inoue ip = mtod(m, struct ip *); 8247e2df452SRuslan Ermilov /* Get the header length of the reassembled packet */ 82553be11f6SPoul-Henning Kamp hlen = ip->ip_hl << 2; 826f0cada84SAndre Oppermann } 827f0cada84SAndre Oppermann 828fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT) 829fcf59617SAndrey V. Elsukov if (IPSEC_ENABLED(ipv4)) { 830fcf59617SAndrey V. Elsukov if (IPSEC_INPUT(ipv4, m, hlen, ip->ip_p) != 0) 831fcf59617SAndrey V. Elsukov return; 832fcf59617SAndrey V. Elsukov } 833b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */ 83433841545SHajimu UMEMOTO 835df8bae1dSRodney W. Grimes /* 836df8bae1dSRodney W. Grimes * Switch out to protocol's input routine. 837df8bae1dSRodney W. Grimes */ 83886425c62SRobert Watson IPSTAT_INC(ips_delivered); 8399b932e9eSAndre Oppermann 8408f5a8818SKevin Lo (*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p); 841c67b1d17SGarrett Wollman return; 842df8bae1dSRodney W. Grimes bad: 843df8bae1dSRodney W. Grimes m_freem(m); 844c67b1d17SGarrett Wollman } 845c67b1d17SGarrett Wollman 846c67b1d17SGarrett Wollman /* 847df8bae1dSRodney W. Grimes * IP timer processing; 848df8bae1dSRodney W. Grimes * if a timer expires on a reassembly 849df8bae1dSRodney W. Grimes * queue, discard it. 850df8bae1dSRodney W. Grimes */ 851df8bae1dSRodney W. Grimes void 852f2565d68SRobert Watson ip_slowtimo(void) 853df8bae1dSRodney W. Grimes { 8548b615593SMarko Zec VNET_ITERATOR_DECL(vnet_iter); 855df8bae1dSRodney W. Grimes 8565ee847d3SRobert Watson VNET_LIST_RLOCK_NOSLEEP(); 8578b615593SMarko Zec VNET_FOREACH(vnet_iter) { 8588b615593SMarko Zec CURVNET_SET(vnet_iter); 8591dbefcc0SGleb Smirnoff ipreass_slowtimo(); 8608b615593SMarko Zec CURVNET_RESTORE(); 8618b615593SMarko Zec } 8625ee847d3SRobert Watson VNET_LIST_RUNLOCK_NOSLEEP(); 863df8bae1dSRodney W. Grimes } 864df8bae1dSRodney W. Grimes 8659802380eSBjoern A. Zeeb void 8669802380eSBjoern A. Zeeb ip_drain(void) 8679802380eSBjoern A. Zeeb { 8689802380eSBjoern A. Zeeb VNET_ITERATOR_DECL(vnet_iter); 8699802380eSBjoern A. Zeeb 8709802380eSBjoern A. Zeeb VNET_LIST_RLOCK_NOSLEEP(); 8719802380eSBjoern A. Zeeb VNET_FOREACH(vnet_iter) { 8729802380eSBjoern A. Zeeb CURVNET_SET(vnet_iter); 8731dbefcc0SGleb Smirnoff ipreass_drain(); 8748b615593SMarko Zec CURVNET_RESTORE(); 8758b615593SMarko Zec } 8765ee847d3SRobert Watson VNET_LIST_RUNLOCK_NOSLEEP(); 877df8bae1dSRodney W. Grimes } 878df8bae1dSRodney W. Grimes 879df8bae1dSRodney W. Grimes /* 880de38924dSAndre Oppermann * The protocol to be inserted into ip_protox[] must be already registered 881de38924dSAndre Oppermann * in inetsw[], either statically or through pf_proto_register(). 882de38924dSAndre Oppermann */ 883de38924dSAndre Oppermann int 8841b48d245SBjoern A. Zeeb ipproto_register(short ipproto) 885de38924dSAndre Oppermann { 886de38924dSAndre Oppermann struct protosw *pr; 887de38924dSAndre Oppermann 888de38924dSAndre Oppermann /* Sanity checks. */ 8891b48d245SBjoern A. Zeeb if (ipproto <= 0 || ipproto >= IPPROTO_MAX) 890de38924dSAndre Oppermann return (EPROTONOSUPPORT); 891de38924dSAndre Oppermann 892de38924dSAndre Oppermann /* 893de38924dSAndre Oppermann * The protocol slot must not be occupied by another protocol 894de38924dSAndre Oppermann * already. An index pointing to IPPROTO_RAW is unused. 895de38924dSAndre Oppermann */ 896de38924dSAndre Oppermann pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 897de38924dSAndre Oppermann if (pr == NULL) 898de38924dSAndre Oppermann return (EPFNOSUPPORT); 899de38924dSAndre Oppermann if (ip_protox[ipproto] != pr - inetsw) /* IPPROTO_RAW */ 900de38924dSAndre Oppermann return (EEXIST); 901de38924dSAndre Oppermann 902de38924dSAndre Oppermann /* Find the protocol position in inetsw[] and set the index. */ 903de38924dSAndre Oppermann for (pr = inetdomain.dom_protosw; 904de38924dSAndre Oppermann pr < inetdomain.dom_protoswNPROTOSW; pr++) { 905de38924dSAndre Oppermann if (pr->pr_domain->dom_family == PF_INET && 906de38924dSAndre Oppermann pr->pr_protocol && pr->pr_protocol == ipproto) { 907de38924dSAndre Oppermann ip_protox[pr->pr_protocol] = pr - inetsw; 908de38924dSAndre Oppermann return (0); 909de38924dSAndre Oppermann } 910de38924dSAndre Oppermann } 911de38924dSAndre Oppermann return (EPROTONOSUPPORT); 912de38924dSAndre Oppermann } 913de38924dSAndre Oppermann 914de38924dSAndre Oppermann int 9151b48d245SBjoern A. Zeeb ipproto_unregister(short ipproto) 916de38924dSAndre Oppermann { 917de38924dSAndre Oppermann struct protosw *pr; 918de38924dSAndre Oppermann 919de38924dSAndre Oppermann /* Sanity checks. */ 9201b48d245SBjoern A. Zeeb if (ipproto <= 0 || ipproto >= IPPROTO_MAX) 921de38924dSAndre Oppermann return (EPROTONOSUPPORT); 922de38924dSAndre Oppermann 923de38924dSAndre Oppermann /* Check if the protocol was indeed registered. */ 924de38924dSAndre Oppermann pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 925de38924dSAndre Oppermann if (pr == NULL) 926de38924dSAndre Oppermann return (EPFNOSUPPORT); 927de38924dSAndre Oppermann if (ip_protox[ipproto] == pr - inetsw) /* IPPROTO_RAW */ 928de38924dSAndre Oppermann return (ENOENT); 929de38924dSAndre Oppermann 930de38924dSAndre Oppermann /* Reset the protocol slot to IPPROTO_RAW. */ 931de38924dSAndre Oppermann ip_protox[ipproto] = pr - inetsw; 932de38924dSAndre Oppermann return (0); 933de38924dSAndre Oppermann } 934de38924dSAndre Oppermann 935df8bae1dSRodney W. Grimes u_char inetctlerrmap[PRC_NCMDS] = { 936df8bae1dSRodney W. Grimes 0, 0, 0, 0, 937df8bae1dSRodney W. Grimes 0, EMSGSIZE, EHOSTDOWN, EHOSTUNREACH, 938df8bae1dSRodney W. Grimes EHOSTUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED, 939df8bae1dSRodney W. Grimes EMSGSIZE, EHOSTUNREACH, 0, 0, 940fcaf9f91SMike Silbersack 0, 0, EHOSTUNREACH, 0, 9413b8123b7SJesper Skriver ENOPROTOOPT, ECONNREFUSED 942df8bae1dSRodney W. Grimes }; 943df8bae1dSRodney W. Grimes 944df8bae1dSRodney W. Grimes /* 945df8bae1dSRodney W. Grimes * Forward a packet. If some error occurs return the sender 946df8bae1dSRodney W. Grimes * an icmp packet. Note we can't always generate a meaningful 947df8bae1dSRodney W. Grimes * icmp message because icmp doesn't have a large enough repertoire 948df8bae1dSRodney W. Grimes * of codes and types. 949df8bae1dSRodney W. Grimes * 950df8bae1dSRodney W. Grimes * If not forwarding, just drop the packet. This could be confusing 951df8bae1dSRodney W. Grimes * if ipforwarding was zero but some routing protocol was advancing 952df8bae1dSRodney W. Grimes * us as a gateway to somewhere. However, we must let the routing 953df8bae1dSRodney W. Grimes * protocol deal with that. 954df8bae1dSRodney W. Grimes * 955df8bae1dSRodney W. Grimes * The srcrt parameter indicates whether the packet is being forwarded 956df8bae1dSRodney W. Grimes * via a source route. 957df8bae1dSRodney W. Grimes */ 9589b932e9eSAndre Oppermann void 9599b932e9eSAndre Oppermann ip_forward(struct mbuf *m, int srcrt) 960df8bae1dSRodney W. Grimes { 9612b25acc1SLuigi Rizzo struct ip *ip = mtod(m, struct ip *); 962efbad259SEdward Tomasz Napierala struct in_ifaddr *ia; 963df8bae1dSRodney W. Grimes struct mbuf *mcopy; 964d14122b0SErmal Luçi struct sockaddr_in *sin; 9659b932e9eSAndre Oppermann struct in_addr dest; 966b835b6feSBjoern A. Zeeb struct route ro; 9674043ee3cSAlexander V. Chernikov uint32_t flowid; 968c773494eSAndre Oppermann int error, type = 0, code = 0, mtu = 0; 9693efc3014SJulian Elischer 970b8a6e03fSGleb Smirnoff NET_EPOCH_ASSERT(); 971b8a6e03fSGleb Smirnoff 9729b932e9eSAndre Oppermann if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) { 97386425c62SRobert Watson IPSTAT_INC(ips_cantforward); 974df8bae1dSRodney W. Grimes m_freem(m); 975df8bae1dSRodney W. Grimes return; 976df8bae1dSRodney W. Grimes } 977fcf59617SAndrey V. Elsukov if ( 978fcf59617SAndrey V. Elsukov #ifdef IPSTEALTH 979fcf59617SAndrey V. Elsukov V_ipstealth == 0 && 980fcf59617SAndrey V. Elsukov #endif 981fcf59617SAndrey V. Elsukov ip->ip_ttl <= IPTTLDEC) { 982fcf59617SAndrey V. Elsukov icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, 0); 9838922ddbeSAndrey V. Elsukov return; 9848922ddbeSAndrey V. Elsukov } 985df8bae1dSRodney W. Grimes 986d14122b0SErmal Luçi bzero(&ro, sizeof(ro)); 987d14122b0SErmal Luçi sin = (struct sockaddr_in *)&ro.ro_dst; 988d14122b0SErmal Luçi sin->sin_family = AF_INET; 989d14122b0SErmal Luçi sin->sin_len = sizeof(*sin); 990d14122b0SErmal Luçi sin->sin_addr = ip->ip_dst; 9914043ee3cSAlexander V. Chernikov flowid = m->m_pkthdr.flowid; 9924043ee3cSAlexander V. Chernikov ro.ro_nh = fib4_lookup(M_GETFIB(m), ip->ip_dst, 0, NHR_REF, flowid); 993983066f0SAlexander V. Chernikov if (ro.ro_nh != NULL) { 994983066f0SAlexander V. Chernikov ia = ifatoia(ro.ro_nh->nh_ifa); 99556844a62SErmal Luçi } else 99656844a62SErmal Luçi ia = NULL; 997df8bae1dSRodney W. Grimes /* 998bfef7ed4SIan Dowse * Save the IP header and at most 8 bytes of the payload, 999bfef7ed4SIan Dowse * in case we need to generate an ICMP message to the src. 1000bfef7ed4SIan Dowse * 10014d2e3692SLuigi Rizzo * XXX this can be optimized a lot by saving the data in a local 10024d2e3692SLuigi Rizzo * buffer on the stack (72 bytes at most), and only allocating the 10034d2e3692SLuigi Rizzo * mbuf if really necessary. The vast majority of the packets 10044d2e3692SLuigi Rizzo * are forwarded without having to send an ICMP back (either 10054d2e3692SLuigi Rizzo * because unnecessary, or because rate limited), so we are 10064d2e3692SLuigi Rizzo * really we are wasting a lot of work here. 10074d2e3692SLuigi Rizzo * 1008c3bef61eSKevin Lo * We don't use m_copym() because it might return a reference 1009bfef7ed4SIan Dowse * to a shared cluster. Both this function and ip_output() 1010bfef7ed4SIan Dowse * assume exclusive access to the IP header in `m', so any 1011bfef7ed4SIan Dowse * data in a cluster may change before we reach icmp_error(). 1012df8bae1dSRodney W. Grimes */ 1013dc4ad05eSGleb Smirnoff mcopy = m_gethdr(M_NOWAIT, m->m_type); 1014eb1b1807SGleb Smirnoff if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_NOWAIT)) { 10159967cafcSSam Leffler /* 10169967cafcSSam Leffler * It's probably ok if the pkthdr dup fails (because 10179967cafcSSam Leffler * the deep copy of the tag chain failed), but for now 10189967cafcSSam Leffler * be conservative and just discard the copy since 10199967cafcSSam Leffler * code below may some day want the tags. 10209967cafcSSam Leffler */ 10219967cafcSSam Leffler m_free(mcopy); 10229967cafcSSam Leffler mcopy = NULL; 10239967cafcSSam Leffler } 1024bfef7ed4SIan Dowse if (mcopy != NULL) { 10258f134647SGleb Smirnoff mcopy->m_len = min(ntohs(ip->ip_len), M_TRAILINGSPACE(mcopy)); 1026e6b0a570SBruce M Simpson mcopy->m_pkthdr.len = mcopy->m_len; 1027bfef7ed4SIan Dowse m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t)); 1028bfef7ed4SIan Dowse } 102904287599SRuslan Ermilov #ifdef IPSTEALTH 1030fcf59617SAndrey V. Elsukov if (V_ipstealth == 0) 103104287599SRuslan Ermilov #endif 103204287599SRuslan Ermilov ip->ip_ttl -= IPTTLDEC; 1033fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT) 1034fcf59617SAndrey V. Elsukov if (IPSEC_ENABLED(ipv4)) { 1035fcf59617SAndrey V. Elsukov if ((error = IPSEC_FORWARD(ipv4, m)) != 0) { 1036fcf59617SAndrey V. Elsukov /* mbuf consumed by IPsec */ 1037d16a2e47SMark Johnston RO_NHFREE(&ro); 1038fcf59617SAndrey V. Elsukov m_freem(mcopy); 1039fcf59617SAndrey V. Elsukov if (error != EINPROGRESS) 1040fcf59617SAndrey V. Elsukov IPSTAT_INC(ips_cantforward); 1041b8a6e03fSGleb Smirnoff return; 104204287599SRuslan Ermilov } 1043fcf59617SAndrey V. Elsukov /* No IPsec processing required */ 1044fcf59617SAndrey V. Elsukov } 1045fcf59617SAndrey V. Elsukov #endif /* IPSEC */ 1046df8bae1dSRodney W. Grimes /* 1047df8bae1dSRodney W. Grimes * If forwarding packet using same interface that it came in on, 1048df8bae1dSRodney W. Grimes * perhaps should send a redirect to sender to shortcut a hop. 1049df8bae1dSRodney W. Grimes * Only send redirect if source is sending directly to us, 1050df8bae1dSRodney W. Grimes * and if packet was not source routed (or has any options). 1051df8bae1dSRodney W. Grimes * Also, don't send redirect if forwarding using a default route 1052df8bae1dSRodney W. Grimes * or a route modified by a redirect. 1053df8bae1dSRodney W. Grimes */ 10549b932e9eSAndre Oppermann dest.s_addr = 0; 1055efbad259SEdward Tomasz Napierala if (!srcrt && V_ipsendredirects && 1056efbad259SEdward Tomasz Napierala ia != NULL && ia->ia_ifp == m->m_pkthdr.rcvif) { 1057983066f0SAlexander V. Chernikov struct nhop_object *nh; 105802c1c707SAndre Oppermann 1059983066f0SAlexander V. Chernikov nh = ro.ro_nh; 106002c1c707SAndre Oppermann 1061983066f0SAlexander V. Chernikov if (nh != NULL && ((nh->nh_flags & (NHF_REDIRECT|NHF_DEFAULT)) == 0)) { 1062983066f0SAlexander V. Chernikov struct in_ifaddr *nh_ia = (struct in_ifaddr *)(nh->nh_ifa); 1063df8bae1dSRodney W. Grimes u_long src = ntohl(ip->ip_src.s_addr); 1064df8bae1dSRodney W. Grimes 1065983066f0SAlexander V. Chernikov if (nh_ia != NULL && 1066983066f0SAlexander V. Chernikov (src & nh_ia->ia_subnetmask) == nh_ia->ia_subnet) { 1067df8bae1dSRodney W. Grimes /* Router requirements says to only send host redirects */ 1068df8bae1dSRodney W. Grimes type = ICMP_REDIRECT; 1069df8bae1dSRodney W. Grimes code = ICMP_REDIRECT_HOST; 107062e1a437SZhenlei Huang if (nh->nh_flags & NHF_GATEWAY) { 107162e1a437SZhenlei Huang if (nh->gw_sa.sa_family == AF_INET) 107262e1a437SZhenlei Huang dest.s_addr = nh->gw4_sa.sin_addr.s_addr; 107362e1a437SZhenlei Huang else /* Do not redirect in case gw is AF_INET6 */ 107462e1a437SZhenlei Huang type = 0; 107562e1a437SZhenlei Huang } else 107662e1a437SZhenlei Huang dest.s_addr = ip->ip_dst.s_addr; 1077df8bae1dSRodney W. Grimes } 1078df8bae1dSRodney W. Grimes } 107902c1c707SAndre Oppermann } 1080df8bae1dSRodney W. Grimes 1081b835b6feSBjoern A. Zeeb error = ip_output(m, NULL, &ro, IP_FORWARDING, NULL, NULL); 1082b835b6feSBjoern A. Zeeb 1083983066f0SAlexander V. Chernikov if (error == EMSGSIZE && ro.ro_nh) 1084983066f0SAlexander V. Chernikov mtu = ro.ro_nh->nh_mtu; 1085983066f0SAlexander V. Chernikov RO_NHFREE(&ro); 1086b835b6feSBjoern A. Zeeb 1087df8bae1dSRodney W. Grimes if (error) 108886425c62SRobert Watson IPSTAT_INC(ips_cantforward); 1089df8bae1dSRodney W. Grimes else { 109086425c62SRobert Watson IPSTAT_INC(ips_forward); 1091df8bae1dSRodney W. Grimes if (type) 109286425c62SRobert Watson IPSTAT_INC(ips_redirectsent); 1093df8bae1dSRodney W. Grimes else { 10949188b4a1SAndre Oppermann if (mcopy) 1095df8bae1dSRodney W. Grimes m_freem(mcopy); 1096b8a6e03fSGleb Smirnoff return; 1097df8bae1dSRodney W. Grimes } 1098df8bae1dSRodney W. Grimes } 10994f6c66ccSMatt Macy if (mcopy == NULL) 1100b8a6e03fSGleb Smirnoff return; 11014f6c66ccSMatt Macy 1102df8bae1dSRodney W. Grimes switch (error) { 1103df8bae1dSRodney W. Grimes case 0: /* forwarded, but need redirect */ 1104df8bae1dSRodney W. Grimes /* type, code set above */ 1105df8bae1dSRodney W. Grimes break; 1106df8bae1dSRodney W. Grimes 1107efbad259SEdward Tomasz Napierala case ENETUNREACH: 1108df8bae1dSRodney W. Grimes case EHOSTUNREACH: 1109df8bae1dSRodney W. Grimes case ENETDOWN: 1110df8bae1dSRodney W. Grimes case EHOSTDOWN: 1111df8bae1dSRodney W. Grimes default: 1112df8bae1dSRodney W. Grimes type = ICMP_UNREACH; 1113df8bae1dSRodney W. Grimes code = ICMP_UNREACH_HOST; 1114df8bae1dSRodney W. Grimes break; 1115df8bae1dSRodney W. Grimes 1116df8bae1dSRodney W. Grimes case EMSGSIZE: 1117df8bae1dSRodney W. Grimes type = ICMP_UNREACH; 1118df8bae1dSRodney W. Grimes code = ICMP_UNREACH_NEEDFRAG; 11199b932e9eSAndre Oppermann /* 1120b835b6feSBjoern A. Zeeb * If the MTU was set before make sure we are below the 1121b835b6feSBjoern A. Zeeb * interface MTU. 1122ab48768bSAndre Oppermann * If the MTU wasn't set before use the interface mtu or 1123ab48768bSAndre Oppermann * fall back to the next smaller mtu step compared to the 1124ab48768bSAndre Oppermann * current packet size. 11259b932e9eSAndre Oppermann */ 1126b835b6feSBjoern A. Zeeb if (mtu != 0) { 1127b835b6feSBjoern A. Zeeb if (ia != NULL) 1128b835b6feSBjoern A. Zeeb mtu = min(mtu, ia->ia_ifp->if_mtu); 1129b835b6feSBjoern A. Zeeb } else { 1130ab48768bSAndre Oppermann if (ia != NULL) 1131c773494eSAndre Oppermann mtu = ia->ia_ifp->if_mtu; 1132ab48768bSAndre Oppermann else 11338f134647SGleb Smirnoff mtu = ip_next_mtu(ntohs(ip->ip_len), 0); 1134ab48768bSAndre Oppermann } 113586425c62SRobert Watson IPSTAT_INC(ips_cantfrag); 1136df8bae1dSRodney W. Grimes break; 1137df8bae1dSRodney W. Grimes 1138df8bae1dSRodney W. Grimes case ENOBUFS: 11393a06e3e0SRuslan Ermilov case EACCES: /* ipfw denied packet */ 11403a06e3e0SRuslan Ermilov m_freem(mcopy); 1141b8a6e03fSGleb Smirnoff return; 1142df8bae1dSRodney W. Grimes } 1143c773494eSAndre Oppermann icmp_error(mcopy, type, code, dest.s_addr, mtu); 1144df8bae1dSRodney W. Grimes } 1145df8bae1dSRodney W. Grimes 1146339efd75SMaxim Sobolev #define CHECK_SO_CT(sp, ct) \ 1147339efd75SMaxim Sobolev (((sp->so_options & SO_TIMESTAMP) && (sp->so_ts_clock == ct)) ? 1 : 0) 1148339efd75SMaxim Sobolev 114982c23ebaSBill Fenner void 1150f2565d68SRobert Watson ip_savecontrol(struct inpcb *inp, struct mbuf **mp, struct ip *ip, 1151f2565d68SRobert Watson struct mbuf *m) 115282c23ebaSBill Fenner { 115306193f0bSKonstantin Belousov bool stamped; 11548b615593SMarko Zec 115506193f0bSKonstantin Belousov stamped = false; 1156339efd75SMaxim Sobolev if ((inp->inp_socket->so_options & SO_BINTIME) || 1157339efd75SMaxim Sobolev CHECK_SO_CT(inp->inp_socket, SO_TS_BINTIME)) { 115806193f0bSKonstantin Belousov struct bintime boottimebin, bt; 115906193f0bSKonstantin Belousov struct timespec ts1; 1160be8a62e8SPoul-Henning Kamp 116106193f0bSKonstantin Belousov if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR | 116206193f0bSKonstantin Belousov M_TSTMP)) { 116306193f0bSKonstantin Belousov mbuf_tstmp2timespec(m, &ts1); 116406193f0bSKonstantin Belousov timespec2bintime(&ts1, &bt); 116506193f0bSKonstantin Belousov getboottimebin(&boottimebin); 116606193f0bSKonstantin Belousov bintime_add(&bt, &boottimebin); 116706193f0bSKonstantin Belousov } else { 1168be8a62e8SPoul-Henning Kamp bintime(&bt); 116906193f0bSKonstantin Belousov } 1170*b46667c6SGleb Smirnoff *mp = sbcreatecontrol(&bt, sizeof(bt), SCM_BINTIME, 1171*b46667c6SGleb Smirnoff SOL_SOCKET, M_NOWAIT); 117206193f0bSKonstantin Belousov if (*mp != NULL) { 1173be8a62e8SPoul-Henning Kamp mp = &(*mp)->m_next; 117406193f0bSKonstantin Belousov stamped = true; 117506193f0bSKonstantin Belousov } 1176be8a62e8SPoul-Henning Kamp } 1177339efd75SMaxim Sobolev if (CHECK_SO_CT(inp->inp_socket, SO_TS_REALTIME_MICRO)) { 117806193f0bSKonstantin Belousov struct bintime boottimebin, bt1; 1179c012cfe6SEd Maste struct timespec ts1; 118082c23ebaSBill Fenner struct timeval tv; 118182c23ebaSBill Fenner 118206193f0bSKonstantin Belousov if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR | 118306193f0bSKonstantin Belousov M_TSTMP)) { 118406193f0bSKonstantin Belousov mbuf_tstmp2timespec(m, &ts1); 118506193f0bSKonstantin Belousov timespec2bintime(&ts1, &bt1); 118606193f0bSKonstantin Belousov getboottimebin(&boottimebin); 118706193f0bSKonstantin Belousov bintime_add(&bt1, &boottimebin); 118806193f0bSKonstantin Belousov bintime2timeval(&bt1, &tv); 118906193f0bSKonstantin Belousov } else { 1190339efd75SMaxim Sobolev microtime(&tv); 119106193f0bSKonstantin Belousov } 1192*b46667c6SGleb Smirnoff *mp = sbcreatecontrol((caddr_t)&tv, sizeof(tv), SCM_TIMESTAMP, 1193*b46667c6SGleb Smirnoff SOL_SOCKET, M_NOWAIT); 119406193f0bSKonstantin Belousov if (*mp != NULL) { 119582c23ebaSBill Fenner mp = &(*mp)->m_next; 119606193f0bSKonstantin Belousov stamped = true; 119706193f0bSKonstantin Belousov } 1198339efd75SMaxim Sobolev } else if (CHECK_SO_CT(inp->inp_socket, SO_TS_REALTIME)) { 119906193f0bSKonstantin Belousov struct bintime boottimebin; 120006193f0bSKonstantin Belousov struct timespec ts, ts1; 1201339efd75SMaxim Sobolev 120206193f0bSKonstantin Belousov if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR | 120306193f0bSKonstantin Belousov M_TSTMP)) { 120406193f0bSKonstantin Belousov mbuf_tstmp2timespec(m, &ts); 120506193f0bSKonstantin Belousov getboottimebin(&boottimebin); 120606193f0bSKonstantin Belousov bintime2timespec(&boottimebin, &ts1); 12076040822cSAlan Somers timespecadd(&ts, &ts1, &ts); 120806193f0bSKonstantin Belousov } else { 1209339efd75SMaxim Sobolev nanotime(&ts); 121006193f0bSKonstantin Belousov } 1211*b46667c6SGleb Smirnoff *mp = sbcreatecontrol(&ts, sizeof(ts), SCM_REALTIME, 1212*b46667c6SGleb Smirnoff SOL_SOCKET, M_NOWAIT); 121306193f0bSKonstantin Belousov if (*mp != NULL) { 1214339efd75SMaxim Sobolev mp = &(*mp)->m_next; 121506193f0bSKonstantin Belousov stamped = true; 121606193f0bSKonstantin Belousov } 1217339efd75SMaxim Sobolev } else if (CHECK_SO_CT(inp->inp_socket, SO_TS_MONOTONIC)) { 1218339efd75SMaxim Sobolev struct timespec ts; 1219339efd75SMaxim Sobolev 122006193f0bSKonstantin Belousov if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR | 122106193f0bSKonstantin Belousov M_TSTMP)) 122206193f0bSKonstantin Belousov mbuf_tstmp2timespec(m, &ts); 122306193f0bSKonstantin Belousov else 1224339efd75SMaxim Sobolev nanouptime(&ts); 1225*b46667c6SGleb Smirnoff *mp = sbcreatecontrol(&ts, sizeof(ts), SCM_MONOTONIC, 1226*b46667c6SGleb Smirnoff SOL_SOCKET, M_NOWAIT); 122706193f0bSKonstantin Belousov if (*mp != NULL) { 122806193f0bSKonstantin Belousov mp = &(*mp)->m_next; 122906193f0bSKonstantin Belousov stamped = true; 123006193f0bSKonstantin Belousov } 123106193f0bSKonstantin Belousov } 123206193f0bSKonstantin Belousov if (stamped && (m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR | 123306193f0bSKonstantin Belousov M_TSTMP)) { 123406193f0bSKonstantin Belousov struct sock_timestamp_info sti; 123506193f0bSKonstantin Belousov 123606193f0bSKonstantin Belousov bzero(&sti, sizeof(sti)); 123706193f0bSKonstantin Belousov sti.st_info_flags = ST_INFO_HW; 123806193f0bSKonstantin Belousov if ((m->m_flags & M_TSTMP_HPREC) != 0) 123906193f0bSKonstantin Belousov sti.st_info_flags |= ST_INFO_HW_HPREC; 1240*b46667c6SGleb Smirnoff *mp = sbcreatecontrol(&sti, sizeof(sti), SCM_TIME_INFO, 1241*b46667c6SGleb Smirnoff SOL_SOCKET, M_NOWAIT); 124206193f0bSKonstantin Belousov if (*mp != NULL) 1243339efd75SMaxim Sobolev mp = &(*mp)->m_next; 1244be8a62e8SPoul-Henning Kamp } 124582c23ebaSBill Fenner if (inp->inp_flags & INP_RECVDSTADDR) { 1246*b46667c6SGleb Smirnoff *mp = sbcreatecontrol(&ip->ip_dst, sizeof(struct in_addr), 1247*b46667c6SGleb Smirnoff IP_RECVDSTADDR, IPPROTO_IP, M_NOWAIT); 124882c23ebaSBill Fenner if (*mp) 124982c23ebaSBill Fenner mp = &(*mp)->m_next; 125082c23ebaSBill Fenner } 12514957466bSMatthew N. Dodd if (inp->inp_flags & INP_RECVTTL) { 1252*b46667c6SGleb Smirnoff *mp = sbcreatecontrol(&ip->ip_ttl, sizeof(u_char), IP_RECVTTL, 1253*b46667c6SGleb Smirnoff IPPROTO_IP, M_NOWAIT); 12544957466bSMatthew N. Dodd if (*mp) 12554957466bSMatthew N. Dodd mp = &(*mp)->m_next; 12564957466bSMatthew N. Dodd } 125782c23ebaSBill Fenner #ifdef notyet 125882c23ebaSBill Fenner /* XXX 125982c23ebaSBill Fenner * Moving these out of udp_input() made them even more broken 126082c23ebaSBill Fenner * than they already were. 126182c23ebaSBill Fenner */ 126282c23ebaSBill Fenner /* options were tossed already */ 126382c23ebaSBill Fenner if (inp->inp_flags & INP_RECVOPTS) { 1264*b46667c6SGleb Smirnoff *mp = sbcreatecontrol(opts_deleted_above, 1265*b46667c6SGleb Smirnoff sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP, M_NOWAIT); 126682c23ebaSBill Fenner if (*mp) 126782c23ebaSBill Fenner mp = &(*mp)->m_next; 126882c23ebaSBill Fenner } 126982c23ebaSBill Fenner /* ip_srcroute doesn't do what we want here, need to fix */ 127082c23ebaSBill Fenner if (inp->inp_flags & INP_RECVRETOPTS) { 1271*b46667c6SGleb Smirnoff *mp = sbcreatecontrol(ip_srcroute(m), sizeof(struct in_addr), 1272*b46667c6SGleb Smirnoff IP_RECVRETOPTS, IPPROTO_IP, M_NOWAIT); 127382c23ebaSBill Fenner if (*mp) 127482c23ebaSBill Fenner mp = &(*mp)->m_next; 127582c23ebaSBill Fenner } 127682c23ebaSBill Fenner #endif 127782c23ebaSBill Fenner if (inp->inp_flags & INP_RECVIF) { 1278d314ad7bSJulian Elischer struct ifnet *ifp; 1279d314ad7bSJulian Elischer struct sdlbuf { 128082c23ebaSBill Fenner struct sockaddr_dl sdl; 1281d314ad7bSJulian Elischer u_char pad[32]; 1282d314ad7bSJulian Elischer } sdlbuf; 1283d314ad7bSJulian Elischer struct sockaddr_dl *sdp; 1284d314ad7bSJulian Elischer struct sockaddr_dl *sdl2 = &sdlbuf.sdl; 128582c23ebaSBill Fenner 1286db0ac6deSCy Schubert if ((ifp = m->m_pkthdr.rcvif)) { 12874a0d6638SRuslan Ermilov sdp = (struct sockaddr_dl *)ifp->if_addr->ifa_addr; 1288d314ad7bSJulian Elischer /* 1289d314ad7bSJulian Elischer * Change our mind and don't try copy. 1290d314ad7bSJulian Elischer */ 129146f2df9cSSergey Kandaurov if (sdp->sdl_family != AF_LINK || 129246f2df9cSSergey Kandaurov sdp->sdl_len > sizeof(sdlbuf)) { 1293d314ad7bSJulian Elischer goto makedummy; 1294d314ad7bSJulian Elischer } 1295d314ad7bSJulian Elischer bcopy(sdp, sdl2, sdp->sdl_len); 1296d314ad7bSJulian Elischer } else { 1297d314ad7bSJulian Elischer makedummy: 129846f2df9cSSergey Kandaurov sdl2->sdl_len = 129946f2df9cSSergey Kandaurov offsetof(struct sockaddr_dl, sdl_data[0]); 1300d314ad7bSJulian Elischer sdl2->sdl_family = AF_LINK; 1301d314ad7bSJulian Elischer sdl2->sdl_index = 0; 1302d314ad7bSJulian Elischer sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0; 1303d314ad7bSJulian Elischer } 1304*b46667c6SGleb Smirnoff *mp = sbcreatecontrol(sdl2, sdl2->sdl_len, IP_RECVIF, 1305*b46667c6SGleb Smirnoff IPPROTO_IP, M_NOWAIT); 130682c23ebaSBill Fenner if (*mp) 130782c23ebaSBill Fenner mp = &(*mp)->m_next; 130882c23ebaSBill Fenner } 13093cca425bSMichael Tuexen if (inp->inp_flags & INP_RECVTOS) { 1310*b46667c6SGleb Smirnoff *mp = sbcreatecontrol(&ip->ip_tos, sizeof(u_char), IP_RECVTOS, 1311*b46667c6SGleb Smirnoff IPPROTO_IP, M_NOWAIT); 13123cca425bSMichael Tuexen if (*mp) 13133cca425bSMichael Tuexen mp = &(*mp)->m_next; 13143cca425bSMichael Tuexen } 13159d3ddf43SAdrian Chadd 13169d3ddf43SAdrian Chadd if (inp->inp_flags2 & INP_RECVFLOWID) { 13179d3ddf43SAdrian Chadd uint32_t flowid, flow_type; 13189d3ddf43SAdrian Chadd 13199d3ddf43SAdrian Chadd flowid = m->m_pkthdr.flowid; 13209d3ddf43SAdrian Chadd flow_type = M_HASHTYPE_GET(m); 13219d3ddf43SAdrian Chadd 13229d3ddf43SAdrian Chadd /* 13239d3ddf43SAdrian Chadd * XXX should handle the failure of one or the 13249d3ddf43SAdrian Chadd * other - don't populate both? 13259d3ddf43SAdrian Chadd */ 1326*b46667c6SGleb Smirnoff *mp = sbcreatecontrol(&flowid, sizeof(uint32_t), IP_FLOWID, 1327*b46667c6SGleb Smirnoff IPPROTO_IP, M_NOWAIT); 13289d3ddf43SAdrian Chadd if (*mp) 13299d3ddf43SAdrian Chadd mp = &(*mp)->m_next; 1330*b46667c6SGleb Smirnoff *mp = sbcreatecontrol(&flow_type, sizeof(uint32_t), 1331*b46667c6SGleb Smirnoff IP_FLOWTYPE, IPPROTO_IP, M_NOWAIT); 13329d3ddf43SAdrian Chadd if (*mp) 13339d3ddf43SAdrian Chadd mp = &(*mp)->m_next; 13349d3ddf43SAdrian Chadd } 13359d3ddf43SAdrian Chadd 13369d3ddf43SAdrian Chadd #ifdef RSS 13379d3ddf43SAdrian Chadd if (inp->inp_flags2 & INP_RECVRSSBUCKETID) { 13389d3ddf43SAdrian Chadd uint32_t flowid, flow_type; 13399d3ddf43SAdrian Chadd uint32_t rss_bucketid; 13409d3ddf43SAdrian Chadd 13419d3ddf43SAdrian Chadd flowid = m->m_pkthdr.flowid; 13429d3ddf43SAdrian Chadd flow_type = M_HASHTYPE_GET(m); 13439d3ddf43SAdrian Chadd 13449d3ddf43SAdrian Chadd if (rss_hash2bucket(flowid, flow_type, &rss_bucketid) == 0) { 1345*b46667c6SGleb Smirnoff *mp = sbcreatecontrol(&rss_bucketid, sizeof(uint32_t), 1346*b46667c6SGleb Smirnoff IP_RSSBUCKETID, IPPROTO_IP, M_NOWAIT); 13479d3ddf43SAdrian Chadd if (*mp) 13489d3ddf43SAdrian Chadd mp = &(*mp)->m_next; 13499d3ddf43SAdrian Chadd } 13509d3ddf43SAdrian Chadd } 13519d3ddf43SAdrian Chadd #endif 135282c23ebaSBill Fenner } 135382c23ebaSBill Fenner 13544d2e3692SLuigi Rizzo /* 135530916a2dSRobert Watson * XXXRW: Multicast routing code in ip_mroute.c is generally MPSAFE, but the 135630916a2dSRobert Watson * ip_rsvp and ip_rsvp_on variables need to be interlocked with rsvp_on 135730916a2dSRobert Watson * locking. This code remains in ip_input.c as ip_mroute.c is optionally 135830916a2dSRobert Watson * compiled. 13594d2e3692SLuigi Rizzo */ 13605f901c92SAndrew Turner VNET_DEFINE_STATIC(int, ip_rsvp_on); 136182cea7e6SBjoern A. Zeeb VNET_DEFINE(struct socket *, ip_rsvpd); 136282cea7e6SBjoern A. Zeeb 136382cea7e6SBjoern A. Zeeb #define V_ip_rsvp_on VNET(ip_rsvp_on) 136482cea7e6SBjoern A. Zeeb 1365df8bae1dSRodney W. Grimes int 1366f0068c4aSGarrett Wollman ip_rsvp_init(struct socket *so) 1367f0068c4aSGarrett Wollman { 13688b615593SMarko Zec 1369f0068c4aSGarrett Wollman if (so->so_type != SOCK_RAW || 1370f0068c4aSGarrett Wollman so->so_proto->pr_protocol != IPPROTO_RSVP) 1371f0068c4aSGarrett Wollman return EOPNOTSUPP; 1372f0068c4aSGarrett Wollman 1373603724d3SBjoern A. Zeeb if (V_ip_rsvpd != NULL) 1374f0068c4aSGarrett Wollman return EADDRINUSE; 1375f0068c4aSGarrett Wollman 1376603724d3SBjoern A. Zeeb V_ip_rsvpd = so; 13771c5de19aSGarrett Wollman /* 13781c5de19aSGarrett Wollman * This may seem silly, but we need to be sure we don't over-increment 13791c5de19aSGarrett Wollman * the RSVP counter, in case something slips up. 13801c5de19aSGarrett Wollman */ 1381603724d3SBjoern A. Zeeb if (!V_ip_rsvp_on) { 1382603724d3SBjoern A. Zeeb V_ip_rsvp_on = 1; 1383603724d3SBjoern A. Zeeb V_rsvp_on++; 13841c5de19aSGarrett Wollman } 1385f0068c4aSGarrett Wollman 1386f0068c4aSGarrett Wollman return 0; 1387f0068c4aSGarrett Wollman } 1388f0068c4aSGarrett Wollman 1389f0068c4aSGarrett Wollman int 1390f0068c4aSGarrett Wollman ip_rsvp_done(void) 1391f0068c4aSGarrett Wollman { 13928b615593SMarko Zec 1393603724d3SBjoern A. Zeeb V_ip_rsvpd = NULL; 13941c5de19aSGarrett Wollman /* 13951c5de19aSGarrett Wollman * This may seem silly, but we need to be sure we don't over-decrement 13961c5de19aSGarrett Wollman * the RSVP counter, in case something slips up. 13971c5de19aSGarrett Wollman */ 1398603724d3SBjoern A. Zeeb if (V_ip_rsvp_on) { 1399603724d3SBjoern A. Zeeb V_ip_rsvp_on = 0; 1400603724d3SBjoern A. Zeeb V_rsvp_on--; 14011c5de19aSGarrett Wollman } 1402f0068c4aSGarrett Wollman return 0; 1403f0068c4aSGarrett Wollman } 1404bbb4330bSLuigi Rizzo 14058f5a8818SKevin Lo int 14068f5a8818SKevin Lo rsvp_input(struct mbuf **mp, int *offp, int proto) 1407bbb4330bSLuigi Rizzo { 14088f5a8818SKevin Lo struct mbuf *m; 14098f5a8818SKevin Lo 14108f5a8818SKevin Lo m = *mp; 14118f5a8818SKevin Lo *mp = NULL; 14128b615593SMarko Zec 1413bbb4330bSLuigi Rizzo if (rsvp_input_p) { /* call the real one if loaded */ 14148f5a8818SKevin Lo *mp = m; 14158f5a8818SKevin Lo rsvp_input_p(mp, offp, proto); 14168f5a8818SKevin Lo return (IPPROTO_DONE); 1417bbb4330bSLuigi Rizzo } 1418bbb4330bSLuigi Rizzo 1419bbb4330bSLuigi Rizzo /* Can still get packets with rsvp_on = 0 if there is a local member 1420bbb4330bSLuigi Rizzo * of the group to which the RSVP packet is addressed. But in this 1421bbb4330bSLuigi Rizzo * case we want to throw the packet away. 1422bbb4330bSLuigi Rizzo */ 1423bbb4330bSLuigi Rizzo 1424603724d3SBjoern A. Zeeb if (!V_rsvp_on) { 1425bbb4330bSLuigi Rizzo m_freem(m); 14268f5a8818SKevin Lo return (IPPROTO_DONE); 1427bbb4330bSLuigi Rizzo } 1428bbb4330bSLuigi Rizzo 1429603724d3SBjoern A. Zeeb if (V_ip_rsvpd != NULL) { 14308f5a8818SKevin Lo *mp = m; 14318f5a8818SKevin Lo rip_input(mp, offp, proto); 14328f5a8818SKevin Lo return (IPPROTO_DONE); 1433bbb4330bSLuigi Rizzo } 1434bbb4330bSLuigi Rizzo /* Drop the packet */ 1435bbb4330bSLuigi Rizzo m_freem(m); 14368f5a8818SKevin Lo return (IPPROTO_DONE); 1437bbb4330bSLuigi Rizzo } 1438