1c398230bSWarner Losh /*- 251369649SPedro F. Giffuni * SPDX-License-Identifier: BSD-3-Clause 351369649SPedro F. Giffuni * 4df8bae1dSRodney W. Grimes * Copyright (c) 1982, 1986, 1988, 1993 5df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 6df8bae1dSRodney W. Grimes * 7df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 8df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 9df8bae1dSRodney W. Grimes * are met: 10df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 11df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 12df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 13df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 14df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 15fbbd9655SWarner Losh * 3. Neither the name of the University nor the names of its contributors 16df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 17df8bae1dSRodney W. Grimes * without specific prior written permission. 18df8bae1dSRodney W. Grimes * 19df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29df8bae1dSRodney W. Grimes * SUCH DAMAGE. 30df8bae1dSRodney W. Grimes * 31df8bae1dSRodney W. Grimes * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 32df8bae1dSRodney W. Grimes */ 33df8bae1dSRodney W. Grimes 344b421e2dSMike Silbersack #include <sys/cdefs.h> 354b421e2dSMike Silbersack __FBSDID("$FreeBSD$"); 364b421e2dSMike Silbersack 370ac40133SBrian Somers #include "opt_bootp.h" 3827108a15SDag-Erling Smørgrav #include "opt_ipstealth.h" 396a800098SYoshinobu Inoue #include "opt_ipsec.h" 4033553d6eSBjoern A. Zeeb #include "opt_route.h" 41b8bc95cdSAdrian Chadd #include "opt_rss.h" 4274a9466cSGary Palmer 43df8bae1dSRodney W. Grimes #include <sys/param.h> 44df8bae1dSRodney W. Grimes #include <sys/systm.h> 45ef91a976SAndrey V. Elsukov #include <sys/hhook.h> 46df8bae1dSRodney W. Grimes #include <sys/mbuf.h> 47b715f178SLuigi Rizzo #include <sys/malloc.h> 48df8bae1dSRodney W. Grimes #include <sys/domain.h> 49df8bae1dSRodney W. Grimes #include <sys/protosw.h> 50df8bae1dSRodney W. Grimes #include <sys/socket.h> 51df8bae1dSRodney W. Grimes #include <sys/time.h> 52df8bae1dSRodney W. Grimes #include <sys/kernel.h> 53385195c0SMarko Zec #include <sys/lock.h> 54cc0a3c8cSAndrey V. Elsukov #include <sys/rmlock.h> 55385195c0SMarko Zec #include <sys/rwlock.h> 5657f60867SMark Johnston #include <sys/sdt.h> 571025071fSGarrett Wollman #include <sys/syslog.h> 58b5e8ce9fSBruce Evans #include <sys/sysctl.h> 59df8bae1dSRodney W. Grimes 60df8bae1dSRodney W. Grimes #include <net/if.h> 619494d596SBrooks Davis #include <net/if_types.h> 62d314ad7bSJulian Elischer #include <net/if_var.h> 6382c23ebaSBill Fenner #include <net/if_dl.h> 64b252313fSGleb Smirnoff #include <net/pfil.h> 65df8bae1dSRodney W. Grimes #include <net/route.h> 66983066f0SAlexander V. Chernikov #include <net/route/nhop.h> 67748e0b0aSGarrett Wollman #include <net/netisr.h> 68b2bdc62aSAdrian Chadd #include <net/rss_config.h> 694b79449eSBjoern A. Zeeb #include <net/vnet.h> 70df8bae1dSRodney W. Grimes 71df8bae1dSRodney W. Grimes #include <netinet/in.h> 7257f60867SMark Johnston #include <netinet/in_kdtrace.h> 73df8bae1dSRodney W. Grimes #include <netinet/in_systm.h> 74b5e8ce9fSBruce Evans #include <netinet/in_var.h> 75df8bae1dSRodney W. Grimes #include <netinet/ip.h> 76983066f0SAlexander V. Chernikov #include <netinet/in_fib.h> 77df8bae1dSRodney W. Grimes #include <netinet/in_pcb.h> 78df8bae1dSRodney W. Grimes #include <netinet/ip_var.h> 79eddfbb76SRobert Watson #include <netinet/ip_fw.h> 80df8bae1dSRodney W. Grimes #include <netinet/ip_icmp.h> 81ef39adf0SAndre Oppermann #include <netinet/ip_options.h> 8258938916SGarrett Wollman #include <machine/in_cksum.h> 83a9771948SGleb Smirnoff #include <netinet/ip_carp.h> 84b8bc95cdSAdrian Chadd #include <netinet/in_rss.h> 8565634ae7SWojciech Macek #include <netinet/ip_mroute.h> 86df8bae1dSRodney W. Grimes 87fcf59617SAndrey V. Elsukov #include <netipsec/ipsec_support.h> 88fcf59617SAndrey V. Elsukov 89f0068c4aSGarrett Wollman #include <sys/socketvar.h> 906ddbf1e2SGary Palmer 91aed55708SRobert Watson #include <security/mac/mac_framework.h> 92aed55708SRobert Watson 93d2035ffbSEd Maste #ifdef CTASSERT 94d2035ffbSEd Maste CTASSERT(sizeof(struct ip) == 20); 95d2035ffbSEd Maste #endif 96d2035ffbSEd Maste 971dbefcc0SGleb Smirnoff /* IP reassembly functions are defined in ip_reass.c. */ 98843b0e57SXin LI extern void ipreass_init(void); 99843b0e57SXin LI extern void ipreass_drain(void); 100843b0e57SXin LI extern void ipreass_slowtimo(void); 1011dbefcc0SGleb Smirnoff #ifdef VIMAGE 102843b0e57SXin LI extern void ipreass_destroy(void); 1031dbefcc0SGleb Smirnoff #endif 1041dbefcc0SGleb Smirnoff 10582cea7e6SBjoern A. Zeeb VNET_DEFINE(int, rsvp_on); 10682cea7e6SBjoern A. Zeeb 10782cea7e6SBjoern A. Zeeb VNET_DEFINE(int, ipforwarding); 1086df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_VNET | CTLFLAG_RW, 109eddfbb76SRobert Watson &VNET_NAME(ipforwarding), 0, 1108b615593SMarko Zec "Enable IP forwarding between interfaces"); 1110312fbe9SPoul-Henning Kamp 1128ad114c0SGeorge V. Neville-Neil /* 1138ad114c0SGeorge V. Neville-Neil * Respond with an ICMP host redirect when we forward a packet out of 1148ad114c0SGeorge V. Neville-Neil * the same interface on which it was received. See RFC 792. 1158ad114c0SGeorge V. Neville-Neil */ 1168ad114c0SGeorge V. Neville-Neil VNET_DEFINE(int, ipsendredirects) = 1; 1176df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_VNET | CTLFLAG_RW, 118eddfbb76SRobert Watson &VNET_NAME(ipsendredirects), 0, 1198b615593SMarko Zec "Enable sending IP redirects"); 1200312fbe9SPoul-Henning Kamp 121*94df3271SGleb Smirnoff VNET_DEFINE_STATIC(bool, ip_strong_es) = false; 122*94df3271SGleb Smirnoff #define V_ip_strong_es VNET(ip_strong_es) 123*94df3271SGleb Smirnoff SYSCTL_BOOL(_net_inet_ip, OID_AUTO, rfc1122_strong_es, 124*94df3271SGleb Smirnoff CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(ip_strong_es), false, 125*94df3271SGleb Smirnoff "Packet's IP destination address must match address on arrival interface"); 126b3e95d4eSJonathan Lemon 127b252313fSGleb Smirnoff VNET_DEFINE(pfil_head_t, inet_pfil_head); /* Packet filter hooks */ 128df8bae1dSRodney W. Grimes 129d4b5cae4SRobert Watson static struct netisr_handler ip_nh = { 130d4b5cae4SRobert Watson .nh_name = "ip", 131d4b5cae4SRobert Watson .nh_handler = ip_input, 132d4b5cae4SRobert Watson .nh_proto = NETISR_IP, 133b8bc95cdSAdrian Chadd #ifdef RSS 1342527ccadSAdrian Chadd .nh_m2cpuid = rss_soft_m2cpuid_v4, 135b8bc95cdSAdrian Chadd .nh_policy = NETISR_POLICY_CPU, 136b8bc95cdSAdrian Chadd .nh_dispatch = NETISR_DISPATCH_HYBRID, 137b8bc95cdSAdrian Chadd #else 138d4b5cae4SRobert Watson .nh_policy = NETISR_POLICY_FLOW, 139b8bc95cdSAdrian Chadd #endif 140d4b5cae4SRobert Watson }; 141ca925d9cSJonathan Lemon 142b8bc95cdSAdrian Chadd #ifdef RSS 143b8bc95cdSAdrian Chadd /* 144b8bc95cdSAdrian Chadd * Directly dispatched frames are currently assumed 145b8bc95cdSAdrian Chadd * to have a flowid already calculated. 146b8bc95cdSAdrian Chadd * 147b8bc95cdSAdrian Chadd * It should likely have something that assert it 148b8bc95cdSAdrian Chadd * actually has valid flow details. 149b8bc95cdSAdrian Chadd */ 150b8bc95cdSAdrian Chadd static struct netisr_handler ip_direct_nh = { 151b8bc95cdSAdrian Chadd .nh_name = "ip_direct", 152b8bc95cdSAdrian Chadd .nh_handler = ip_direct_input, 153b8bc95cdSAdrian Chadd .nh_proto = NETISR_IP_DIRECT, 154499baf0aSAdrian Chadd .nh_m2cpuid = rss_soft_m2cpuid_v4, 155b8bc95cdSAdrian Chadd .nh_policy = NETISR_POLICY_CPU, 156b8bc95cdSAdrian Chadd .nh_dispatch = NETISR_DISPATCH_HYBRID, 157b8bc95cdSAdrian Chadd }; 158b8bc95cdSAdrian Chadd #endif 159b8bc95cdSAdrian Chadd 160df8bae1dSRodney W. Grimes extern struct domain inetdomain; 161f0ffb944SJulian Elischer extern struct protosw inetsw[]; 162df8bae1dSRodney W. Grimes u_char ip_protox[IPPROTO_MAX]; 16382cea7e6SBjoern A. Zeeb VNET_DEFINE(struct in_ifaddrhead, in_ifaddrhead); /* first inet address */ 16482cea7e6SBjoern A. Zeeb VNET_DEFINE(struct in_ifaddrhashhead *, in_ifaddrhashtbl); /* inet addr hash table */ 16582cea7e6SBjoern A. Zeeb VNET_DEFINE(u_long, in_ifaddrhmask); /* mask for hash table */ 166ca925d9cSJonathan Lemon 167c8ee75f2SGleb Smirnoff /* Make sure it is safe to use hashinit(9) on CK_LIST. */ 168c8ee75f2SGleb Smirnoff CTASSERT(sizeof(struct in_ifaddrhashhead) == sizeof(LIST_HEAD(, in_addr))); 169c8ee75f2SGleb Smirnoff 1700312fbe9SPoul-Henning Kamp #ifdef IPCTL_DEFMTU 1710312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW, 1723d177f46SBill Fumerola &ip_mtu, 0, "Default MTU"); 1730312fbe9SPoul-Henning Kamp #endif 1740312fbe9SPoul-Henning Kamp 1751b968362SDag-Erling Smørgrav #ifdef IPSTEALTH 17682cea7e6SBjoern A. Zeeb VNET_DEFINE(int, ipstealth); 1776df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_VNET | CTLFLAG_RW, 178eddfbb76SRobert Watson &VNET_NAME(ipstealth), 0, 179eddfbb76SRobert Watson "IP stealth mode, no TTL decrementation on forwarding"); 1801b968362SDag-Erling Smørgrav #endif 181eddfbb76SRobert Watson 182315e3e38SRobert Watson /* 1835da0521fSAndrey V. Elsukov * IP statistics are stored in the "array" of counter(9)s. 1845923c293SGleb Smirnoff */ 1855da0521fSAndrey V. Elsukov VNET_PCPUSTAT_DEFINE(struct ipstat, ipstat); 1865da0521fSAndrey V. Elsukov VNET_PCPUSTAT_SYSINIT(ipstat); 1875da0521fSAndrey V. Elsukov SYSCTL_VNET_PCPUSTAT(_net_inet_ip, IPCTL_STATS, stats, struct ipstat, ipstat, 1885da0521fSAndrey V. Elsukov "IP statistics (struct ipstat, netinet/ip_var.h)"); 1895923c293SGleb Smirnoff 1905923c293SGleb Smirnoff #ifdef VIMAGE 1915da0521fSAndrey V. Elsukov VNET_PCPUSTAT_SYSUNINIT(ipstat); 1925923c293SGleb Smirnoff #endif /* VIMAGE */ 1935923c293SGleb Smirnoff 1945923c293SGleb Smirnoff /* 195315e3e38SRobert Watson * Kernel module interface for updating ipstat. The argument is an index 1965923c293SGleb Smirnoff * into ipstat treated as an array. 197315e3e38SRobert Watson */ 198315e3e38SRobert Watson void 199315e3e38SRobert Watson kmod_ipstat_inc(int statnum) 200315e3e38SRobert Watson { 201315e3e38SRobert Watson 2025da0521fSAndrey V. Elsukov counter_u64_add(VNET(ipstat)[statnum], 1); 203315e3e38SRobert Watson } 204315e3e38SRobert Watson 205315e3e38SRobert Watson void 206315e3e38SRobert Watson kmod_ipstat_dec(int statnum) 207315e3e38SRobert Watson { 208315e3e38SRobert Watson 2095da0521fSAndrey V. Elsukov counter_u64_add(VNET(ipstat)[statnum], -1); 210315e3e38SRobert Watson } 211315e3e38SRobert Watson 212d4b5cae4SRobert Watson static int 213d4b5cae4SRobert Watson sysctl_netinet_intr_queue_maxlen(SYSCTL_HANDLER_ARGS) 214d4b5cae4SRobert Watson { 215d4b5cae4SRobert Watson int error, qlimit; 216d4b5cae4SRobert Watson 217d4b5cae4SRobert Watson netisr_getqlimit(&ip_nh, &qlimit); 218d4b5cae4SRobert Watson error = sysctl_handle_int(oidp, &qlimit, 0, req); 219d4b5cae4SRobert Watson if (error || !req->newptr) 220d4b5cae4SRobert Watson return (error); 221d4b5cae4SRobert Watson if (qlimit < 1) 222d4b5cae4SRobert Watson return (EINVAL); 223d4b5cae4SRobert Watson return (netisr_setqlimit(&ip_nh, qlimit)); 224d4b5cae4SRobert Watson } 225d4b5cae4SRobert Watson SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen, 2267029da5cSPawel Biernacki CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, 2277029da5cSPawel Biernacki sysctl_netinet_intr_queue_maxlen, "I", 228d4b5cae4SRobert Watson "Maximum size of the IP input queue"); 229d4b5cae4SRobert Watson 230d4b5cae4SRobert Watson static int 231d4b5cae4SRobert Watson sysctl_netinet_intr_queue_drops(SYSCTL_HANDLER_ARGS) 232d4b5cae4SRobert Watson { 233d4b5cae4SRobert Watson u_int64_t qdrops_long; 234d4b5cae4SRobert Watson int error, qdrops; 235d4b5cae4SRobert Watson 236d4b5cae4SRobert Watson netisr_getqdrops(&ip_nh, &qdrops_long); 237d4b5cae4SRobert Watson qdrops = qdrops_long; 238d4b5cae4SRobert Watson error = sysctl_handle_int(oidp, &qdrops, 0, req); 239d4b5cae4SRobert Watson if (error || !req->newptr) 240d4b5cae4SRobert Watson return (error); 241d4b5cae4SRobert Watson if (qdrops != 0) 242d4b5cae4SRobert Watson return (EINVAL); 243d4b5cae4SRobert Watson netisr_clearqdrops(&ip_nh); 244d4b5cae4SRobert Watson return (0); 245d4b5cae4SRobert Watson } 246d4b5cae4SRobert Watson 247d4b5cae4SRobert Watson SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops, 2487029da5cSPawel Biernacki CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 2497029da5cSPawel Biernacki 0, 0, sysctl_netinet_intr_queue_drops, "I", 250d4b5cae4SRobert Watson "Number of packets dropped from the IP input queue"); 251d4b5cae4SRobert Watson 252b8bc95cdSAdrian Chadd #ifdef RSS 253b8bc95cdSAdrian Chadd static int 254b8bc95cdSAdrian Chadd sysctl_netinet_intr_direct_queue_maxlen(SYSCTL_HANDLER_ARGS) 255b8bc95cdSAdrian Chadd { 256b8bc95cdSAdrian Chadd int error, qlimit; 257b8bc95cdSAdrian Chadd 258b8bc95cdSAdrian Chadd netisr_getqlimit(&ip_direct_nh, &qlimit); 259b8bc95cdSAdrian Chadd error = sysctl_handle_int(oidp, &qlimit, 0, req); 260b8bc95cdSAdrian Chadd if (error || !req->newptr) 261b8bc95cdSAdrian Chadd return (error); 262b8bc95cdSAdrian Chadd if (qlimit < 1) 263b8bc95cdSAdrian Chadd return (EINVAL); 264b8bc95cdSAdrian Chadd return (netisr_setqlimit(&ip_direct_nh, qlimit)); 265b8bc95cdSAdrian Chadd } 2667faa0d21SAndrey V. Elsukov SYSCTL_PROC(_net_inet_ip, IPCTL_INTRDQMAXLEN, intr_direct_queue_maxlen, 2677029da5cSPawel Biernacki CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 2687029da5cSPawel Biernacki 0, 0, sysctl_netinet_intr_direct_queue_maxlen, 2697faa0d21SAndrey V. Elsukov "I", "Maximum size of the IP direct input queue"); 270b8bc95cdSAdrian Chadd 271b8bc95cdSAdrian Chadd static int 272b8bc95cdSAdrian Chadd sysctl_netinet_intr_direct_queue_drops(SYSCTL_HANDLER_ARGS) 273b8bc95cdSAdrian Chadd { 274b8bc95cdSAdrian Chadd u_int64_t qdrops_long; 275b8bc95cdSAdrian Chadd int error, qdrops; 276b8bc95cdSAdrian Chadd 277b8bc95cdSAdrian Chadd netisr_getqdrops(&ip_direct_nh, &qdrops_long); 278b8bc95cdSAdrian Chadd qdrops = qdrops_long; 279b8bc95cdSAdrian Chadd error = sysctl_handle_int(oidp, &qdrops, 0, req); 280b8bc95cdSAdrian Chadd if (error || !req->newptr) 281b8bc95cdSAdrian Chadd return (error); 282b8bc95cdSAdrian Chadd if (qdrops != 0) 283b8bc95cdSAdrian Chadd return (EINVAL); 284b8bc95cdSAdrian Chadd netisr_clearqdrops(&ip_direct_nh); 285b8bc95cdSAdrian Chadd return (0); 286b8bc95cdSAdrian Chadd } 287b8bc95cdSAdrian Chadd 2887faa0d21SAndrey V. Elsukov SYSCTL_PROC(_net_inet_ip, IPCTL_INTRDQDROPS, intr_direct_queue_drops, 2897029da5cSPawel Biernacki CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 0, 2907029da5cSPawel Biernacki sysctl_netinet_intr_direct_queue_drops, "I", 291b8bc95cdSAdrian Chadd "Number of packets dropped from the IP direct input queue"); 292b8bc95cdSAdrian Chadd #endif /* RSS */ 293b8bc95cdSAdrian Chadd 294df8bae1dSRodney W. Grimes /* 295df8bae1dSRodney W. Grimes * IP initialization: fill in IP protocol switch table. 296df8bae1dSRodney W. Grimes * All protocols not implemented in kernel go to raw IP protocol handler. 297df8bae1dSRodney W. Grimes */ 298df8bae1dSRodney W. Grimes void 299f2565d68SRobert Watson ip_init(void) 300df8bae1dSRodney W. Grimes { 301b252313fSGleb Smirnoff struct pfil_head_args args; 302f2565d68SRobert Watson struct protosw *pr; 303f2565d68SRobert Watson int i; 304df8bae1dSRodney W. Grimes 305d7c5a620SMatt Macy CK_STAILQ_INIT(&V_in_ifaddrhead); 306603724d3SBjoern A. Zeeb V_in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &V_in_ifaddrhmask); 3071ed81b73SMarko Zec 3081ed81b73SMarko Zec /* Initialize IP reassembly queue. */ 3091dbefcc0SGleb Smirnoff ipreass_init(); 3101ed81b73SMarko Zec 3110b4b0b0fSJulian Elischer /* Initialize packet filter hooks. */ 312b252313fSGleb Smirnoff args.pa_version = PFIL_VERSION; 313b252313fSGleb Smirnoff args.pa_flags = PFIL_IN | PFIL_OUT; 314b252313fSGleb Smirnoff args.pa_type = PFIL_TYPE_IP4; 315b252313fSGleb Smirnoff args.pa_headname = PFIL_INET_NAME; 316b252313fSGleb Smirnoff V_inet_pfil_head = pfil_head_register(&args); 3170b4b0b0fSJulian Elischer 318ef91a976SAndrey V. Elsukov if (hhook_head_register(HHOOK_TYPE_IPSEC_IN, AF_INET, 319ef91a976SAndrey V. Elsukov &V_ipsec_hhh_in[HHOOK_IPSEC_INET], 320ef91a976SAndrey V. Elsukov HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0) 321ef91a976SAndrey V. Elsukov printf("%s: WARNING: unable to register input helper hook\n", 322ef91a976SAndrey V. Elsukov __func__); 323ef91a976SAndrey V. Elsukov if (hhook_head_register(HHOOK_TYPE_IPSEC_OUT, AF_INET, 324ef91a976SAndrey V. Elsukov &V_ipsec_hhh_out[HHOOK_IPSEC_INET], 325ef91a976SAndrey V. Elsukov HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0) 326ef91a976SAndrey V. Elsukov printf("%s: WARNING: unable to register output helper hook\n", 327ef91a976SAndrey V. Elsukov __func__); 328ef91a976SAndrey V. Elsukov 3291ed81b73SMarko Zec /* Skip initialization of globals for non-default instances. */ 330484149deSBjoern A. Zeeb #ifdef VIMAGE 331484149deSBjoern A. Zeeb if (!IS_DEFAULT_VNET(curvnet)) { 332484149deSBjoern A. Zeeb netisr_register_vnet(&ip_nh); 333484149deSBjoern A. Zeeb #ifdef RSS 334484149deSBjoern A. Zeeb netisr_register_vnet(&ip_direct_nh); 335484149deSBjoern A. Zeeb #endif 3361ed81b73SMarko Zec return; 337484149deSBjoern A. Zeeb } 338484149deSBjoern A. Zeeb #endif 3391ed81b73SMarko Zec 340f0ffb944SJulian Elischer pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 34102410549SRobert Watson if (pr == NULL) 342db09bef3SAndre Oppermann panic("ip_init: PF_INET not found"); 343db09bef3SAndre Oppermann 344db09bef3SAndre Oppermann /* Initialize the entire ip_protox[] array to IPPROTO_RAW. */ 345df8bae1dSRodney W. Grimes for (i = 0; i < IPPROTO_MAX; i++) 346df8bae1dSRodney W. Grimes ip_protox[i] = pr - inetsw; 347db09bef3SAndre Oppermann /* 348db09bef3SAndre Oppermann * Cycle through IP protocols and put them into the appropriate place 349db09bef3SAndre Oppermann * in ip_protox[]. 350db09bef3SAndre Oppermann */ 351f0ffb944SJulian Elischer for (pr = inetdomain.dom_protosw; 352f0ffb944SJulian Elischer pr < inetdomain.dom_protoswNPROTOSW; pr++) 353df8bae1dSRodney W. Grimes if (pr->pr_domain->dom_family == PF_INET && 354db09bef3SAndre Oppermann pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) { 355db09bef3SAndre Oppermann /* Be careful to only index valid IP protocols. */ 356db77984cSSam Leffler if (pr->pr_protocol < IPPROTO_MAX) 357df8bae1dSRodney W. Grimes ip_protox[pr->pr_protocol] = pr - inetsw; 358db09bef3SAndre Oppermann } 359194a213eSAndrey A. Chernov 360d4b5cae4SRobert Watson netisr_register(&ip_nh); 361b8bc95cdSAdrian Chadd #ifdef RSS 362b8bc95cdSAdrian Chadd netisr_register(&ip_direct_nh); 363b8bc95cdSAdrian Chadd #endif 364df8bae1dSRodney W. Grimes } 365df8bae1dSRodney W. Grimes 3669802380eSBjoern A. Zeeb #ifdef VIMAGE 3673f58662dSBjoern A. Zeeb static void 3683f58662dSBjoern A. Zeeb ip_destroy(void *unused __unused) 3699802380eSBjoern A. Zeeb { 370ef91a976SAndrey V. Elsukov int error; 3714d3dfd45SMikolaj Golub 372484149deSBjoern A. Zeeb #ifdef RSS 373484149deSBjoern A. Zeeb netisr_unregister_vnet(&ip_direct_nh); 374484149deSBjoern A. Zeeb #endif 375484149deSBjoern A. Zeeb netisr_unregister_vnet(&ip_nh); 376484149deSBjoern A. Zeeb 377b252313fSGleb Smirnoff pfil_head_unregister(V_inet_pfil_head); 378ef91a976SAndrey V. Elsukov error = hhook_head_deregister(V_ipsec_hhh_in[HHOOK_IPSEC_INET]); 379ef91a976SAndrey V. Elsukov if (error != 0) { 380ef91a976SAndrey V. Elsukov printf("%s: WARNING: unable to deregister input helper hook " 381ef91a976SAndrey V. Elsukov "type HHOOK_TYPE_IPSEC_IN, id HHOOK_IPSEC_INET: " 382ef91a976SAndrey V. Elsukov "error %d returned\n", __func__, error); 383ef91a976SAndrey V. Elsukov } 384ef91a976SAndrey V. Elsukov error = hhook_head_deregister(V_ipsec_hhh_out[HHOOK_IPSEC_INET]); 385ef91a976SAndrey V. Elsukov if (error != 0) { 386ef91a976SAndrey V. Elsukov printf("%s: WARNING: unable to deregister output helper hook " 387ef91a976SAndrey V. Elsukov "type HHOOK_TYPE_IPSEC_OUT, id HHOOK_IPSEC_INET: " 388ef91a976SAndrey V. Elsukov "error %d returned\n", __func__, error); 389ef91a976SAndrey V. Elsukov } 39089856f7eSBjoern A. Zeeb 39189856f7eSBjoern A. Zeeb /* Remove the IPv4 addresses from all interfaces. */ 39289856f7eSBjoern A. Zeeb in_ifscrub_all(); 39389856f7eSBjoern A. Zeeb 39489856f7eSBjoern A. Zeeb /* Make sure the IPv4 routes are gone as well. */ 395b1d63265SAlexander V. Chernikov rib_flush_routes_family(AF_INET); 3969802380eSBjoern A. Zeeb 397e3c2c634SGleb Smirnoff /* Destroy IP reassembly queue. */ 3981dbefcc0SGleb Smirnoff ipreass_destroy(); 39989856f7eSBjoern A. Zeeb 40089856f7eSBjoern A. Zeeb /* Cleanup in_ifaddr hash table; should be empty. */ 40189856f7eSBjoern A. Zeeb hashdestroy(V_in_ifaddrhashtbl, M_IFADDR, V_in_ifaddrhmask); 4029802380eSBjoern A. Zeeb } 4033f58662dSBjoern A. Zeeb 4043f58662dSBjoern A. Zeeb VNET_SYSUNINIT(ip, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, ip_destroy, NULL); 4059802380eSBjoern A. Zeeb #endif 4069802380eSBjoern A. Zeeb 407b8bc95cdSAdrian Chadd #ifdef RSS 408b8bc95cdSAdrian Chadd /* 409b8bc95cdSAdrian Chadd * IP direct input routine. 410b8bc95cdSAdrian Chadd * 411b8bc95cdSAdrian Chadd * This is called when reinjecting completed fragments where 412b8bc95cdSAdrian Chadd * all of the previous checking and book-keeping has been done. 413b8bc95cdSAdrian Chadd */ 414b8bc95cdSAdrian Chadd void 415b8bc95cdSAdrian Chadd ip_direct_input(struct mbuf *m) 416b8bc95cdSAdrian Chadd { 417b8bc95cdSAdrian Chadd struct ip *ip; 418b8bc95cdSAdrian Chadd int hlen; 419b8bc95cdSAdrian Chadd 420b8bc95cdSAdrian Chadd ip = mtod(m, struct ip *); 421b8bc95cdSAdrian Chadd hlen = ip->ip_hl << 2; 422b8bc95cdSAdrian Chadd 423fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT) 424fcf59617SAndrey V. Elsukov if (IPSEC_ENABLED(ipv4)) { 425fcf59617SAndrey V. Elsukov if (IPSEC_INPUT(ipv4, m, hlen, ip->ip_p) != 0) 426fcf59617SAndrey V. Elsukov return; 427fcf59617SAndrey V. Elsukov } 428fcf59617SAndrey V. Elsukov #endif /* IPSEC */ 429b8bc95cdSAdrian Chadd IPSTAT_INC(ips_delivered); 430b8bc95cdSAdrian Chadd (*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p); 431b8bc95cdSAdrian Chadd return; 432b8bc95cdSAdrian Chadd } 433b8bc95cdSAdrian Chadd #endif 434b8bc95cdSAdrian Chadd 4354d2e3692SLuigi Rizzo /* 436df8bae1dSRodney W. Grimes * Ip input routine. Checksum and byte swap header. If fragmented 437df8bae1dSRodney W. Grimes * try to reassemble. Process options. Pass to next level. 438df8bae1dSRodney W. Grimes */ 439c67b1d17SGarrett Wollman void 440c67b1d17SGarrett Wollman ip_input(struct mbuf *m) 441df8bae1dSRodney W. Grimes { 44265634ae7SWojciech Macek MROUTER_RLOCK_TRACKER; 4439188b4a1SAndre Oppermann struct ip *ip = NULL; 4445da9f8faSJosef Karthauser struct in_ifaddr *ia = NULL; 445ca925d9cSJonathan Lemon struct ifaddr *ifa; 4460aade26eSRobert Watson struct ifnet *ifp; 447*94df3271SGleb Smirnoff int hlen = 0; 44821d172a3SGleb Smirnoff uint16_t sum, ip_len; 44902c1c707SAndre Oppermann int dchg = 0; /* dest changed after fw */ 450f51f805fSSam Leffler struct in_addr odst; /* original dst address */ 451*94df3271SGleb Smirnoff bool strong_es; 452b715f178SLuigi Rizzo 453fe584538SDag-Erling Smørgrav M_ASSERTPKTHDR(m); 454b8a6e03fSGleb Smirnoff NET_EPOCH_ASSERT(); 455db40007dSAndrew R. Reiter 456ac9d7e26SMax Laier if (m->m_flags & M_FASTFWD_OURS) { 45776ff6dcfSAndre Oppermann m->m_flags &= ~M_FASTFWD_OURS; 45876ff6dcfSAndre Oppermann /* Set up some basics that will be used later. */ 4592b25acc1SLuigi Rizzo ip = mtod(m, struct ip *); 46053be11f6SPoul-Henning Kamp hlen = ip->ip_hl << 2; 4618f134647SGleb Smirnoff ip_len = ntohs(ip->ip_len); 4629b932e9eSAndre Oppermann goto ours; 4632b25acc1SLuigi Rizzo } 4642b25acc1SLuigi Rizzo 46586425c62SRobert Watson IPSTAT_INC(ips_total); 46658938916SGarrett Wollman 4670359e7a5SMateusz Guzik if (__predict_false(m->m_pkthdr.len < sizeof(struct ip))) 46858938916SGarrett Wollman goto tooshort; 46958938916SGarrett Wollman 4700359e7a5SMateusz Guzik if (m->m_len < sizeof(struct ip)) { 4710359e7a5SMateusz Guzik m = m_pullup(m, sizeof(struct ip)); 4720359e7a5SMateusz Guzik if (__predict_false(m == NULL)) { 47386425c62SRobert Watson IPSTAT_INC(ips_toosmall); 474c67b1d17SGarrett Wollman return; 475df8bae1dSRodney W. Grimes } 4760359e7a5SMateusz Guzik } 477df8bae1dSRodney W. Grimes ip = mtod(m, struct ip *); 47858938916SGarrett Wollman 4790359e7a5SMateusz Guzik if (__predict_false(ip->ip_v != IPVERSION)) { 48086425c62SRobert Watson IPSTAT_INC(ips_badvers); 481df8bae1dSRodney W. Grimes goto bad; 482df8bae1dSRodney W. Grimes } 48358938916SGarrett Wollman 48453be11f6SPoul-Henning Kamp hlen = ip->ip_hl << 2; 4850359e7a5SMateusz Guzik if (__predict_false(hlen < sizeof(struct ip))) { /* minimum header length */ 48686425c62SRobert Watson IPSTAT_INC(ips_badhlen); 487df8bae1dSRodney W. Grimes goto bad; 488df8bae1dSRodney W. Grimes } 489df8bae1dSRodney W. Grimes if (hlen > m->m_len) { 4900359e7a5SMateusz Guzik m = m_pullup(m, hlen); 4910359e7a5SMateusz Guzik if (__predict_false(m == NULL)) { 49286425c62SRobert Watson IPSTAT_INC(ips_badhlen); 493c67b1d17SGarrett Wollman return; 494df8bae1dSRodney W. Grimes } 495df8bae1dSRodney W. Grimes ip = mtod(m, struct ip *); 496df8bae1dSRodney W. Grimes } 49733841545SHajimu UMEMOTO 49857f60867SMark Johnston IP_PROBE(receive, NULL, NULL, ip, m->m_pkthdr.rcvif, ip, NULL); 49957f60867SMark Johnston 5006c1c6ae5SRodney W. Grimes /* IN_LOOPBACK must not appear on the wire - RFC1122 */ 5010aade26eSRobert Watson ifp = m->m_pkthdr.rcvif; 5026c1c6ae5SRodney W. Grimes if (IN_LOOPBACK(ntohl(ip->ip_dst.s_addr)) || 5036c1c6ae5SRodney W. Grimes IN_LOOPBACK(ntohl(ip->ip_src.s_addr))) { 5040aade26eSRobert Watson if ((ifp->if_flags & IFF_LOOPBACK) == 0) { 50586425c62SRobert Watson IPSTAT_INC(ips_badaddr); 50633841545SHajimu UMEMOTO goto bad; 50733841545SHajimu UMEMOTO } 50833841545SHajimu UMEMOTO } 50933841545SHajimu UMEMOTO 510db4f9cc7SJonathan Lemon if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) { 511db4f9cc7SJonathan Lemon sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID); 512db4f9cc7SJonathan Lemon } else { 51358938916SGarrett Wollman if (hlen == sizeof(struct ip)) { 51447c861ecSBrian Somers sum = in_cksum_hdr(ip); 51558938916SGarrett Wollman } else { 51647c861ecSBrian Somers sum = in_cksum(m, hlen); 51758938916SGarrett Wollman } 518db4f9cc7SJonathan Lemon } 5190359e7a5SMateusz Guzik if (__predict_false(sum)) { 52086425c62SRobert Watson IPSTAT_INC(ips_badsum); 521df8bae1dSRodney W. Grimes goto bad; 522df8bae1dSRodney W. Grimes } 523df8bae1dSRodney W. Grimes 52402b199f1SMax Laier #ifdef ALTQ 52502b199f1SMax Laier if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0) 52602b199f1SMax Laier /* packet is dropped by traffic conditioner */ 52702b199f1SMax Laier return; 52802b199f1SMax Laier #endif 52902b199f1SMax Laier 53021d172a3SGleb Smirnoff ip_len = ntohs(ip->ip_len); 5310359e7a5SMateusz Guzik if (__predict_false(ip_len < hlen)) { 53286425c62SRobert Watson IPSTAT_INC(ips_badlen); 533df8bae1dSRodney W. Grimes goto bad; 534df8bae1dSRodney W. Grimes } 535df8bae1dSRodney W. Grimes 536df8bae1dSRodney W. Grimes /* 537df8bae1dSRodney W. Grimes * Check that the amount of data in the buffers 538df8bae1dSRodney W. Grimes * is as at least much as the IP header would have us expect. 539df8bae1dSRodney W. Grimes * Trim mbufs if longer than we expect. 540df8bae1dSRodney W. Grimes * Drop packet if shorter than we expect. 541df8bae1dSRodney W. Grimes */ 5420359e7a5SMateusz Guzik if (__predict_false(m->m_pkthdr.len < ip_len)) { 54358938916SGarrett Wollman tooshort: 54486425c62SRobert Watson IPSTAT_INC(ips_tooshort); 545df8bae1dSRodney W. Grimes goto bad; 546df8bae1dSRodney W. Grimes } 54721d172a3SGleb Smirnoff if (m->m_pkthdr.len > ip_len) { 548df8bae1dSRodney W. Grimes if (m->m_len == m->m_pkthdr.len) { 54921d172a3SGleb Smirnoff m->m_len = ip_len; 55021d172a3SGleb Smirnoff m->m_pkthdr.len = ip_len; 551df8bae1dSRodney W. Grimes } else 55221d172a3SGleb Smirnoff m_adj(m, ip_len - m->m_pkthdr.len); 553df8bae1dSRodney W. Grimes } 554b8bc95cdSAdrian Chadd 555ad9f4d6aSAndrey V. Elsukov /* 556ad9f4d6aSAndrey V. Elsukov * Try to forward the packet, but if we fail continue. 55762484790SAndrey V. Elsukov * ip_tryforward() does not generate redirects, so fall 55862484790SAndrey V. Elsukov * through to normal processing if redirects are required. 559ad9f4d6aSAndrey V. Elsukov * ip_tryforward() does inbound and outbound packet firewall 560ad9f4d6aSAndrey V. Elsukov * processing. If firewall has decided that destination becomes 561ad9f4d6aSAndrey V. Elsukov * our local address, it sets M_FASTFWD_OURS flag. In this 562ad9f4d6aSAndrey V. Elsukov * case skip another inbound firewall processing and update 563ad9f4d6aSAndrey V. Elsukov * ip pointer. 564ad9f4d6aSAndrey V. Elsukov */ 5658ad114c0SGeorge V. Neville-Neil if (V_ipforwarding != 0 566fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT) 567fcf59617SAndrey V. Elsukov && (!IPSEC_ENABLED(ipv4) || 568fcf59617SAndrey V. Elsukov IPSEC_CAPS(ipv4, m, IPSEC_CAP_OPERABLE) == 0) 569ad9f4d6aSAndrey V. Elsukov #endif 570ad9f4d6aSAndrey V. Elsukov ) { 571ad9f4d6aSAndrey V. Elsukov if ((m = ip_tryforward(m)) == NULL) 57233872124SGeorge V. Neville-Neil return; 573ad9f4d6aSAndrey V. Elsukov if (m->m_flags & M_FASTFWD_OURS) { 574ad9f4d6aSAndrey V. Elsukov m->m_flags &= ~M_FASTFWD_OURS; 575ad9f4d6aSAndrey V. Elsukov ip = mtod(m, struct ip *); 576ad9f4d6aSAndrey V. Elsukov goto ours; 577ad9f4d6aSAndrey V. Elsukov } 578ad9f4d6aSAndrey V. Elsukov } 579fcf59617SAndrey V. Elsukov 580fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT) 58114dd6717SSam Leffler /* 582ffe8cd7bSBjoern A. Zeeb * Bypass packet filtering for packets previously handled by IPsec. 58314dd6717SSam Leffler */ 584fcf59617SAndrey V. Elsukov if (IPSEC_ENABLED(ipv4) && 585fcf59617SAndrey V. Elsukov IPSEC_CAPS(ipv4, m, IPSEC_CAP_BYPASS_FILTER) != 0) 586c21fd232SAndre Oppermann goto passin; 587ad9f4d6aSAndrey V. Elsukov #endif 588fcf59617SAndrey V. Elsukov 589c4ac87eaSDarren Reed /* 590134ea224SSam Leffler * Run through list of hooks for input packets. 591f51f805fSSam Leffler * 592f51f805fSSam Leffler * NB: Beware of the destination address changing (e.g. 593f51f805fSSam Leffler * by NAT rewriting). When this happens, tell 594f51f805fSSam Leffler * ip_forward to do the right thing. 595c4ac87eaSDarren Reed */ 596c21fd232SAndre Oppermann 597c21fd232SAndre Oppermann /* Jump over all PFIL processing if hooks are not active. */ 598b252313fSGleb Smirnoff if (!PFIL_HOOKED_IN(V_inet_pfil_head)) 599c21fd232SAndre Oppermann goto passin; 600c21fd232SAndre Oppermann 601f51f805fSSam Leffler odst = ip->ip_dst; 602b252313fSGleb Smirnoff if (pfil_run_hooks(V_inet_pfil_head, &m, ifp, PFIL_IN, NULL) != 603b252313fSGleb Smirnoff PFIL_PASS) 604beec8214SDarren Reed return; 605134ea224SSam Leffler if (m == NULL) /* consumed by filter */ 606c4ac87eaSDarren Reed return; 6079b932e9eSAndre Oppermann 608c4ac87eaSDarren Reed ip = mtod(m, struct ip *); 60902c1c707SAndre Oppermann dchg = (odst.s_addr != ip->ip_dst.s_addr); 6100aade26eSRobert Watson ifp = m->m_pkthdr.rcvif; 6119b932e9eSAndre Oppermann 6129b932e9eSAndre Oppermann if (m->m_flags & M_FASTFWD_OURS) { 6139b932e9eSAndre Oppermann m->m_flags &= ~M_FASTFWD_OURS; 6149b932e9eSAndre Oppermann goto ours; 6159b932e9eSAndre Oppermann } 616ffdbf9daSAndrey V. Elsukov if (m->m_flags & M_IP_NEXTHOP) { 617de89d74bSLuiz Otavio O Souza if (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL) { 618099dd043SAndre Oppermann /* 619ffdbf9daSAndrey V. Elsukov * Directly ship the packet on. This allows 620ffdbf9daSAndrey V. Elsukov * forwarding packets originally destined to us 621ffdbf9daSAndrey V. Elsukov * to some other directly connected host. 622099dd043SAndre Oppermann */ 623ffdbf9daSAndrey V. Elsukov ip_forward(m, 1); 624099dd043SAndre Oppermann return; 625099dd043SAndre Oppermann } 626ffdbf9daSAndrey V. Elsukov } 627c21fd232SAndre Oppermann passin: 62821d172a3SGleb Smirnoff 62921d172a3SGleb Smirnoff /* 630df8bae1dSRodney W. Grimes * Process options and, if not destined for us, 631df8bae1dSRodney W. Grimes * ship it on. ip_dooptions returns 1 when an 632df8bae1dSRodney W. Grimes * error was detected (causing an icmp message 633df8bae1dSRodney W. Grimes * to be sent and the original packet to be freed). 634df8bae1dSRodney W. Grimes */ 6359b932e9eSAndre Oppermann if (hlen > sizeof (struct ip) && ip_dooptions(m, 0)) 636c67b1d17SGarrett Wollman return; 637df8bae1dSRodney W. Grimes 638f0068c4aSGarrett Wollman /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no 639f0068c4aSGarrett Wollman * matter if it is destined to another node, or whether it is 640f0068c4aSGarrett Wollman * a multicast one, RSVP wants it! and prevents it from being forwarded 641f0068c4aSGarrett Wollman * anywhere else. Also checks if the rsvp daemon is running before 642f0068c4aSGarrett Wollman * grabbing the packet. 643f0068c4aSGarrett Wollman */ 6440359e7a5SMateusz Guzik if (ip->ip_p == IPPROTO_RSVP && V_rsvp_on) 645f0068c4aSGarrett Wollman goto ours; 646f0068c4aSGarrett Wollman 647df8bae1dSRodney W. Grimes /* 648df8bae1dSRodney W. Grimes * Check our list of addresses, to see if the packet is for us. 649cc766e04SGarrett Wollman * If we don't have any addresses, assume any unicast packet 650cc766e04SGarrett Wollman * we receive might be for us (and let the upper layers deal 651cc766e04SGarrett Wollman * with it). 652df8bae1dSRodney W. Grimes */ 653d7c5a620SMatt Macy if (CK_STAILQ_EMPTY(&V_in_ifaddrhead) && 654cc766e04SGarrett Wollman (m->m_flags & (M_MCAST|M_BCAST)) == 0) 655cc766e04SGarrett Wollman goto ours; 656cc766e04SGarrett Wollman 6577538a9a0SJonathan Lemon /* 658823db0e9SDon Lewis * Enable a consistency check between the destination address 659823db0e9SDon Lewis * and the arrival interface for a unicast packet (the RFC 1122 660*94df3271SGleb Smirnoff * strong ES model) with a list of additional predicates: 661*94df3271SGleb Smirnoff * - if IP forwarding is disabled 662*94df3271SGleb Smirnoff * - the packet is not locally generated 663*94df3271SGleb Smirnoff * - the packet is not subject to 'ipfw fwd' 664*94df3271SGleb Smirnoff * - Interface is not running CARP. If the packet got here, we already 665*94df3271SGleb Smirnoff * checked it with carp_iamatch() and carp_forus(). 666823db0e9SDon Lewis */ 667*94df3271SGleb Smirnoff strong_es = V_ip_strong_es && (V_ipforwarding == 0) && 6680aade26eSRobert Watson ifp != NULL && ((ifp->if_flags & IFF_LOOPBACK) == 0) && 66954bfbd51SWill Andrews ifp->if_carp == NULL && (dchg == 0); 670823db0e9SDon Lewis 671ca925d9cSJonathan Lemon /* 672ca925d9cSJonathan Lemon * Check for exact addresses in the hash bucket. 673ca925d9cSJonathan Lemon */ 674c8ee75f2SGleb Smirnoff CK_LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) { 675*94df3271SGleb Smirnoff if (IA_SIN(ia)->sin_addr.s_addr != ip->ip_dst.s_addr) 676*94df3271SGleb Smirnoff continue; 677*94df3271SGleb Smirnoff 678f9e354dfSJulian Elischer /* 679*94df3271SGleb Smirnoff * net.inet.ip.rfc1122_strong_es: the address matches, verify 680*94df3271SGleb Smirnoff * that the packet arrived via the correct interface. 681f9e354dfSJulian Elischer */ 682*94df3271SGleb Smirnoff if (__predict_false(strong_es && ia->ia_ifp != ifp)) { 683*94df3271SGleb Smirnoff IPSTAT_INC(ips_badaddr); 684*94df3271SGleb Smirnoff goto bad; 685ca925d9cSJonathan Lemon } 686*94df3271SGleb Smirnoff 687*94df3271SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ipackets, 1); 688*94df3271SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ibytes, m->m_pkthdr.len); 689*94df3271SGleb Smirnoff goto ours; 6908c0fec80SRobert Watson } 6912d9cfabaSRobert Watson 692823db0e9SDon Lewis /* 693ca925d9cSJonathan Lemon * Check for broadcast addresses. 694ca925d9cSJonathan Lemon * 695ca925d9cSJonathan Lemon * Only accept broadcast packets that arrive via the matching 696ca925d9cSJonathan Lemon * interface. Reception of forwarded directed broadcasts would 697ca925d9cSJonathan Lemon * be handled via ip_forward() and ether_output() with the loopback 698ca925d9cSJonathan Lemon * into the stack for SIMPLEX interfaces handled by ether_output(). 699823db0e9SDon Lewis */ 7000aade26eSRobert Watson if (ifp != NULL && ifp->if_flags & IFF_BROADCAST) { 701d7c5a620SMatt Macy CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 702ca925d9cSJonathan Lemon if (ifa->ifa_addr->sa_family != AF_INET) 703ca925d9cSJonathan Lemon continue; 704ca925d9cSJonathan Lemon ia = ifatoia(ifa); 705df8bae1dSRodney W. Grimes if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr == 7060aade26eSRobert Watson ip->ip_dst.s_addr) { 7077caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ipackets, 1); 7087caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ibytes, 7097caf4ab7SGleb Smirnoff m->m_pkthdr.len); 710df8bae1dSRodney W. Grimes goto ours; 7110aade26eSRobert Watson } 7120ac40133SBrian Somers #ifdef BOOTP_COMPAT 7130aade26eSRobert Watson if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY) { 7147caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ipackets, 1); 7157caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ibytes, 7167caf4ab7SGleb Smirnoff m->m_pkthdr.len); 717ca925d9cSJonathan Lemon goto ours; 7180aade26eSRobert Watson } 7190ac40133SBrian Somers #endif 720df8bae1dSRodney W. Grimes } 72119e5b0a7SRobert Watson ia = NULL; 722df8bae1dSRodney W. Grimes } 723df8bae1dSRodney W. Grimes if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { 72465634ae7SWojciech Macek MROUTER_RLOCK(); 72503b0505bSZhenlei Huang /* 72603b0505bSZhenlei Huang * RFC 3927 2.7: Do not forward multicast packets from 72703b0505bSZhenlei Huang * IN_LINKLOCAL. 72803b0505bSZhenlei Huang */ 7293d846e48SZhenlei Huang if (V_ip_mrouter && !IN_LINKLOCAL(ntohl(ip->ip_src.s_addr))) { 730df8bae1dSRodney W. Grimes /* 731df8bae1dSRodney W. Grimes * If we are acting as a multicast router, all 732df8bae1dSRodney W. Grimes * incoming multicast packets are passed to the 733df8bae1dSRodney W. Grimes * kernel-level multicast forwarding function. 734df8bae1dSRodney W. Grimes * The packet is returned (relatively) intact; if 735df8bae1dSRodney W. Grimes * ip_mforward() returns a non-zero value, the packet 736df8bae1dSRodney W. Grimes * must be discarded, else it may be accepted below. 737df8bae1dSRodney W. Grimes */ 7380aade26eSRobert Watson if (ip_mforward && ip_mforward(ip, ifp, m, 0) != 0) { 73965634ae7SWojciech Macek MROUTER_RUNLOCK(); 74086425c62SRobert Watson IPSTAT_INC(ips_cantforward); 741df8bae1dSRodney W. Grimes m_freem(m); 742c67b1d17SGarrett Wollman return; 743df8bae1dSRodney W. Grimes } 744df8bae1dSRodney W. Grimes 745df8bae1dSRodney W. Grimes /* 74611612afaSDima Dorfman * The process-level routing daemon needs to receive 747df8bae1dSRodney W. Grimes * all multicast IGMP packets, whether or not this 748df8bae1dSRodney W. Grimes * host belongs to their destination groups. 749df8bae1dSRodney W. Grimes */ 75065634ae7SWojciech Macek if (ip->ip_p == IPPROTO_IGMP) { 75165634ae7SWojciech Macek MROUTER_RUNLOCK(); 752df8bae1dSRodney W. Grimes goto ours; 75365634ae7SWojciech Macek } 75486425c62SRobert Watson IPSTAT_INC(ips_forward); 755df8bae1dSRodney W. Grimes } 75665634ae7SWojciech Macek MROUTER_RUNLOCK(); 757df8bae1dSRodney W. Grimes /* 758d10910e6SBruce M Simpson * Assume the packet is for us, to avoid prematurely taking 759d10910e6SBruce M Simpson * a lock on the in_multi hash. Protocols must perform 760d10910e6SBruce M Simpson * their own filtering and update statistics accordingly. 761df8bae1dSRodney W. Grimes */ 762df8bae1dSRodney W. Grimes goto ours; 763df8bae1dSRodney W. Grimes } 764df8bae1dSRodney W. Grimes if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST) 765df8bae1dSRodney W. Grimes goto ours; 766df8bae1dSRodney W. Grimes if (ip->ip_dst.s_addr == INADDR_ANY) 767df8bae1dSRodney W. Grimes goto ours; 76803b0505bSZhenlei Huang /* RFC 3927 2.7: Do not forward packets to or from IN_LINKLOCAL. */ 7693d846e48SZhenlei Huang if (IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr)) || 7703d846e48SZhenlei Huang IN_LINKLOCAL(ntohl(ip->ip_src.s_addr))) { 7713d846e48SZhenlei Huang IPSTAT_INC(ips_cantforward); 7723d846e48SZhenlei Huang m_freem(m); 7733d846e48SZhenlei Huang return; 7743d846e48SZhenlei Huang } 775df8bae1dSRodney W. Grimes 7766a800098SYoshinobu Inoue /* 777df8bae1dSRodney W. Grimes * Not for us; forward if possible and desirable. 778df8bae1dSRodney W. Grimes */ 779603724d3SBjoern A. Zeeb if (V_ipforwarding == 0) { 78086425c62SRobert Watson IPSTAT_INC(ips_cantforward); 781df8bae1dSRodney W. Grimes m_freem(m); 782546f251bSChris D. Faulhaber } else { 7839b932e9eSAndre Oppermann ip_forward(m, dchg); 784546f251bSChris D. Faulhaber } 785c67b1d17SGarrett Wollman return; 786df8bae1dSRodney W. Grimes 787df8bae1dSRodney W. Grimes ours: 788d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH 789d0ebc0d2SYaroslav Tykhiy /* 790d0ebc0d2SYaroslav Tykhiy * IPSTEALTH: Process non-routing options only 791d0ebc0d2SYaroslav Tykhiy * if the packet is destined for us. 792d0ebc0d2SYaroslav Tykhiy */ 7937caf4ab7SGleb Smirnoff if (V_ipstealth && hlen > sizeof (struct ip) && ip_dooptions(m, 1)) 794d0ebc0d2SYaroslav Tykhiy return; 795d0ebc0d2SYaroslav Tykhiy #endif /* IPSTEALTH */ 796d0ebc0d2SYaroslav Tykhiy 79763f8d699SJordan K. Hubbard /* 798b6ea1aa5SRuslan Ermilov * Attempt reassembly; if it succeeds, proceed. 799ac9d7e26SMax Laier * ip_reass() will return a different mbuf. 800df8bae1dSRodney W. Grimes */ 8018f134647SGleb Smirnoff if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) { 802aa69c612SGleb Smirnoff /* XXXGL: shouldn't we save & set m_flags? */ 803f0cada84SAndre Oppermann m = ip_reass(m); 804f0cada84SAndre Oppermann if (m == NULL) 805c67b1d17SGarrett Wollman return; 8066a800098SYoshinobu Inoue ip = mtod(m, struct ip *); 8077e2df452SRuslan Ermilov /* Get the header length of the reassembled packet */ 80853be11f6SPoul-Henning Kamp hlen = ip->ip_hl << 2; 809f0cada84SAndre Oppermann } 810f0cada84SAndre Oppermann 811fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT) 812fcf59617SAndrey V. Elsukov if (IPSEC_ENABLED(ipv4)) { 813fcf59617SAndrey V. Elsukov if (IPSEC_INPUT(ipv4, m, hlen, ip->ip_p) != 0) 814fcf59617SAndrey V. Elsukov return; 815fcf59617SAndrey V. Elsukov } 816b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */ 81733841545SHajimu UMEMOTO 818df8bae1dSRodney W. Grimes /* 819df8bae1dSRodney W. Grimes * Switch out to protocol's input routine. 820df8bae1dSRodney W. Grimes */ 82186425c62SRobert Watson IPSTAT_INC(ips_delivered); 8229b932e9eSAndre Oppermann 8238f5a8818SKevin Lo (*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p); 824c67b1d17SGarrett Wollman return; 825df8bae1dSRodney W. Grimes bad: 826df8bae1dSRodney W. Grimes m_freem(m); 827c67b1d17SGarrett Wollman } 828c67b1d17SGarrett Wollman 829c67b1d17SGarrett Wollman /* 830df8bae1dSRodney W. Grimes * IP timer processing; 831df8bae1dSRodney W. Grimes * if a timer expires on a reassembly 832df8bae1dSRodney W. Grimes * queue, discard it. 833df8bae1dSRodney W. Grimes */ 834df8bae1dSRodney W. Grimes void 835f2565d68SRobert Watson ip_slowtimo(void) 836df8bae1dSRodney W. Grimes { 8378b615593SMarko Zec VNET_ITERATOR_DECL(vnet_iter); 838df8bae1dSRodney W. Grimes 8395ee847d3SRobert Watson VNET_LIST_RLOCK_NOSLEEP(); 8408b615593SMarko Zec VNET_FOREACH(vnet_iter) { 8418b615593SMarko Zec CURVNET_SET(vnet_iter); 8421dbefcc0SGleb Smirnoff ipreass_slowtimo(); 8438b615593SMarko Zec CURVNET_RESTORE(); 8448b615593SMarko Zec } 8455ee847d3SRobert Watson VNET_LIST_RUNLOCK_NOSLEEP(); 846df8bae1dSRodney W. Grimes } 847df8bae1dSRodney W. Grimes 8489802380eSBjoern A. Zeeb void 8499802380eSBjoern A. Zeeb ip_drain(void) 8509802380eSBjoern A. Zeeb { 8519802380eSBjoern A. Zeeb VNET_ITERATOR_DECL(vnet_iter); 8529802380eSBjoern A. Zeeb 8539802380eSBjoern A. Zeeb VNET_LIST_RLOCK_NOSLEEP(); 8549802380eSBjoern A. Zeeb VNET_FOREACH(vnet_iter) { 8559802380eSBjoern A. Zeeb CURVNET_SET(vnet_iter); 8561dbefcc0SGleb Smirnoff ipreass_drain(); 8578b615593SMarko Zec CURVNET_RESTORE(); 8588b615593SMarko Zec } 8595ee847d3SRobert Watson VNET_LIST_RUNLOCK_NOSLEEP(); 860df8bae1dSRodney W. Grimes } 861df8bae1dSRodney W. Grimes 862df8bae1dSRodney W. Grimes /* 863de38924dSAndre Oppermann * The protocol to be inserted into ip_protox[] must be already registered 864de38924dSAndre Oppermann * in inetsw[], either statically or through pf_proto_register(). 865de38924dSAndre Oppermann */ 866de38924dSAndre Oppermann int 8671b48d245SBjoern A. Zeeb ipproto_register(short ipproto) 868de38924dSAndre Oppermann { 869de38924dSAndre Oppermann struct protosw *pr; 870de38924dSAndre Oppermann 871de38924dSAndre Oppermann /* Sanity checks. */ 8721b48d245SBjoern A. Zeeb if (ipproto <= 0 || ipproto >= IPPROTO_MAX) 873de38924dSAndre Oppermann return (EPROTONOSUPPORT); 874de38924dSAndre Oppermann 875de38924dSAndre Oppermann /* 876de38924dSAndre Oppermann * The protocol slot must not be occupied by another protocol 877de38924dSAndre Oppermann * already. An index pointing to IPPROTO_RAW is unused. 878de38924dSAndre Oppermann */ 879de38924dSAndre Oppermann pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 880de38924dSAndre Oppermann if (pr == NULL) 881de38924dSAndre Oppermann return (EPFNOSUPPORT); 882de38924dSAndre Oppermann if (ip_protox[ipproto] != pr - inetsw) /* IPPROTO_RAW */ 883de38924dSAndre Oppermann return (EEXIST); 884de38924dSAndre Oppermann 885de38924dSAndre Oppermann /* Find the protocol position in inetsw[] and set the index. */ 886de38924dSAndre Oppermann for (pr = inetdomain.dom_protosw; 887de38924dSAndre Oppermann pr < inetdomain.dom_protoswNPROTOSW; pr++) { 888de38924dSAndre Oppermann if (pr->pr_domain->dom_family == PF_INET && 889de38924dSAndre Oppermann pr->pr_protocol && pr->pr_protocol == ipproto) { 890de38924dSAndre Oppermann ip_protox[pr->pr_protocol] = pr - inetsw; 891de38924dSAndre Oppermann return (0); 892de38924dSAndre Oppermann } 893de38924dSAndre Oppermann } 894de38924dSAndre Oppermann return (EPROTONOSUPPORT); 895de38924dSAndre Oppermann } 896de38924dSAndre Oppermann 897de38924dSAndre Oppermann int 8981b48d245SBjoern A. Zeeb ipproto_unregister(short ipproto) 899de38924dSAndre Oppermann { 900de38924dSAndre Oppermann struct protosw *pr; 901de38924dSAndre Oppermann 902de38924dSAndre Oppermann /* Sanity checks. */ 9031b48d245SBjoern A. Zeeb if (ipproto <= 0 || ipproto >= IPPROTO_MAX) 904de38924dSAndre Oppermann return (EPROTONOSUPPORT); 905de38924dSAndre Oppermann 906de38924dSAndre Oppermann /* Check if the protocol was indeed registered. */ 907de38924dSAndre Oppermann pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 908de38924dSAndre Oppermann if (pr == NULL) 909de38924dSAndre Oppermann return (EPFNOSUPPORT); 910de38924dSAndre Oppermann if (ip_protox[ipproto] == pr - inetsw) /* IPPROTO_RAW */ 911de38924dSAndre Oppermann return (ENOENT); 912de38924dSAndre Oppermann 913de38924dSAndre Oppermann /* Reset the protocol slot to IPPROTO_RAW. */ 914de38924dSAndre Oppermann ip_protox[ipproto] = pr - inetsw; 915de38924dSAndre Oppermann return (0); 916de38924dSAndre Oppermann } 917de38924dSAndre Oppermann 918df8bae1dSRodney W. Grimes u_char inetctlerrmap[PRC_NCMDS] = { 919df8bae1dSRodney W. Grimes 0, 0, 0, 0, 920df8bae1dSRodney W. Grimes 0, EMSGSIZE, EHOSTDOWN, EHOSTUNREACH, 921df8bae1dSRodney W. Grimes EHOSTUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED, 922df8bae1dSRodney W. Grimes EMSGSIZE, EHOSTUNREACH, 0, 0, 923fcaf9f91SMike Silbersack 0, 0, EHOSTUNREACH, 0, 9243b8123b7SJesper Skriver ENOPROTOOPT, ECONNREFUSED 925df8bae1dSRodney W. Grimes }; 926df8bae1dSRodney W. Grimes 927df8bae1dSRodney W. Grimes /* 928df8bae1dSRodney W. Grimes * Forward a packet. If some error occurs return the sender 929df8bae1dSRodney W. Grimes * an icmp packet. Note we can't always generate a meaningful 930df8bae1dSRodney W. Grimes * icmp message because icmp doesn't have a large enough repertoire 931df8bae1dSRodney W. Grimes * of codes and types. 932df8bae1dSRodney W. Grimes * 933df8bae1dSRodney W. Grimes * If not forwarding, just drop the packet. This could be confusing 934df8bae1dSRodney W. Grimes * if ipforwarding was zero but some routing protocol was advancing 935df8bae1dSRodney W. Grimes * us as a gateway to somewhere. However, we must let the routing 936df8bae1dSRodney W. Grimes * protocol deal with that. 937df8bae1dSRodney W. Grimes * 938df8bae1dSRodney W. Grimes * The srcrt parameter indicates whether the packet is being forwarded 939df8bae1dSRodney W. Grimes * via a source route. 940df8bae1dSRodney W. Grimes */ 9419b932e9eSAndre Oppermann void 9429b932e9eSAndre Oppermann ip_forward(struct mbuf *m, int srcrt) 943df8bae1dSRodney W. Grimes { 9442b25acc1SLuigi Rizzo struct ip *ip = mtod(m, struct ip *); 945efbad259SEdward Tomasz Napierala struct in_ifaddr *ia; 946df8bae1dSRodney W. Grimes struct mbuf *mcopy; 947d14122b0SErmal Luçi struct sockaddr_in *sin; 9489b932e9eSAndre Oppermann struct in_addr dest; 949b835b6feSBjoern A. Zeeb struct route ro; 9504043ee3cSAlexander V. Chernikov uint32_t flowid; 951c773494eSAndre Oppermann int error, type = 0, code = 0, mtu = 0; 9523efc3014SJulian Elischer 953b8a6e03fSGleb Smirnoff NET_EPOCH_ASSERT(); 954b8a6e03fSGleb Smirnoff 9559b932e9eSAndre Oppermann if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) { 95686425c62SRobert Watson IPSTAT_INC(ips_cantforward); 957df8bae1dSRodney W. Grimes m_freem(m); 958df8bae1dSRodney W. Grimes return; 959df8bae1dSRodney W. Grimes } 960fcf59617SAndrey V. Elsukov if ( 961fcf59617SAndrey V. Elsukov #ifdef IPSTEALTH 962fcf59617SAndrey V. Elsukov V_ipstealth == 0 && 963fcf59617SAndrey V. Elsukov #endif 964fcf59617SAndrey V. Elsukov ip->ip_ttl <= IPTTLDEC) { 965fcf59617SAndrey V. Elsukov icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 0, 0); 9668922ddbeSAndrey V. Elsukov return; 9678922ddbeSAndrey V. Elsukov } 968df8bae1dSRodney W. Grimes 969d14122b0SErmal Luçi bzero(&ro, sizeof(ro)); 970d14122b0SErmal Luçi sin = (struct sockaddr_in *)&ro.ro_dst; 971d14122b0SErmal Luçi sin->sin_family = AF_INET; 972d14122b0SErmal Luçi sin->sin_len = sizeof(*sin); 973d14122b0SErmal Luçi sin->sin_addr = ip->ip_dst; 9744043ee3cSAlexander V. Chernikov flowid = m->m_pkthdr.flowid; 9754043ee3cSAlexander V. Chernikov ro.ro_nh = fib4_lookup(M_GETFIB(m), ip->ip_dst, 0, NHR_REF, flowid); 976983066f0SAlexander V. Chernikov if (ro.ro_nh != NULL) { 977983066f0SAlexander V. Chernikov ia = ifatoia(ro.ro_nh->nh_ifa); 97856844a62SErmal Luçi } else 97956844a62SErmal Luçi ia = NULL; 980df8bae1dSRodney W. Grimes /* 981bfef7ed4SIan Dowse * Save the IP header and at most 8 bytes of the payload, 982bfef7ed4SIan Dowse * in case we need to generate an ICMP message to the src. 983bfef7ed4SIan Dowse * 9844d2e3692SLuigi Rizzo * XXX this can be optimized a lot by saving the data in a local 9854d2e3692SLuigi Rizzo * buffer on the stack (72 bytes at most), and only allocating the 9864d2e3692SLuigi Rizzo * mbuf if really necessary. The vast majority of the packets 9874d2e3692SLuigi Rizzo * are forwarded without having to send an ICMP back (either 9884d2e3692SLuigi Rizzo * because unnecessary, or because rate limited), so we are 9894d2e3692SLuigi Rizzo * really we are wasting a lot of work here. 9904d2e3692SLuigi Rizzo * 991c3bef61eSKevin Lo * We don't use m_copym() because it might return a reference 992bfef7ed4SIan Dowse * to a shared cluster. Both this function and ip_output() 993bfef7ed4SIan Dowse * assume exclusive access to the IP header in `m', so any 994bfef7ed4SIan Dowse * data in a cluster may change before we reach icmp_error(). 995df8bae1dSRodney W. Grimes */ 996dc4ad05eSGleb Smirnoff mcopy = m_gethdr(M_NOWAIT, m->m_type); 997eb1b1807SGleb Smirnoff if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_NOWAIT)) { 9989967cafcSSam Leffler /* 9999967cafcSSam Leffler * It's probably ok if the pkthdr dup fails (because 10009967cafcSSam Leffler * the deep copy of the tag chain failed), but for now 10019967cafcSSam Leffler * be conservative and just discard the copy since 10029967cafcSSam Leffler * code below may some day want the tags. 10039967cafcSSam Leffler */ 10049967cafcSSam Leffler m_free(mcopy); 10059967cafcSSam Leffler mcopy = NULL; 10069967cafcSSam Leffler } 1007bfef7ed4SIan Dowse if (mcopy != NULL) { 10088f134647SGleb Smirnoff mcopy->m_len = min(ntohs(ip->ip_len), M_TRAILINGSPACE(mcopy)); 1009e6b0a570SBruce M Simpson mcopy->m_pkthdr.len = mcopy->m_len; 1010bfef7ed4SIan Dowse m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t)); 1011bfef7ed4SIan Dowse } 101204287599SRuslan Ermilov #ifdef IPSTEALTH 1013fcf59617SAndrey V. Elsukov if (V_ipstealth == 0) 101404287599SRuslan Ermilov #endif 101504287599SRuslan Ermilov ip->ip_ttl -= IPTTLDEC; 1016fcf59617SAndrey V. Elsukov #if defined(IPSEC) || defined(IPSEC_SUPPORT) 1017fcf59617SAndrey V. Elsukov if (IPSEC_ENABLED(ipv4)) { 1018fcf59617SAndrey V. Elsukov if ((error = IPSEC_FORWARD(ipv4, m)) != 0) { 1019fcf59617SAndrey V. Elsukov /* mbuf consumed by IPsec */ 1020d16a2e47SMark Johnston RO_NHFREE(&ro); 1021fcf59617SAndrey V. Elsukov m_freem(mcopy); 1022fcf59617SAndrey V. Elsukov if (error != EINPROGRESS) 1023fcf59617SAndrey V. Elsukov IPSTAT_INC(ips_cantforward); 1024b8a6e03fSGleb Smirnoff return; 102504287599SRuslan Ermilov } 1026fcf59617SAndrey V. Elsukov /* No IPsec processing required */ 1027fcf59617SAndrey V. Elsukov } 1028fcf59617SAndrey V. Elsukov #endif /* IPSEC */ 1029df8bae1dSRodney W. Grimes /* 1030df8bae1dSRodney W. Grimes * If forwarding packet using same interface that it came in on, 1031df8bae1dSRodney W. Grimes * perhaps should send a redirect to sender to shortcut a hop. 1032df8bae1dSRodney W. Grimes * Only send redirect if source is sending directly to us, 1033df8bae1dSRodney W. Grimes * and if packet was not source routed (or has any options). 1034df8bae1dSRodney W. Grimes * Also, don't send redirect if forwarding using a default route 1035df8bae1dSRodney W. Grimes * or a route modified by a redirect. 1036df8bae1dSRodney W. Grimes */ 10379b932e9eSAndre Oppermann dest.s_addr = 0; 1038efbad259SEdward Tomasz Napierala if (!srcrt && V_ipsendredirects && 1039efbad259SEdward Tomasz Napierala ia != NULL && ia->ia_ifp == m->m_pkthdr.rcvif) { 1040983066f0SAlexander V. Chernikov struct nhop_object *nh; 104102c1c707SAndre Oppermann 1042983066f0SAlexander V. Chernikov nh = ro.ro_nh; 104302c1c707SAndre Oppermann 1044983066f0SAlexander V. Chernikov if (nh != NULL && ((nh->nh_flags & (NHF_REDIRECT|NHF_DEFAULT)) == 0)) { 1045983066f0SAlexander V. Chernikov struct in_ifaddr *nh_ia = (struct in_ifaddr *)(nh->nh_ifa); 1046df8bae1dSRodney W. Grimes u_long src = ntohl(ip->ip_src.s_addr); 1047df8bae1dSRodney W. Grimes 1048983066f0SAlexander V. Chernikov if (nh_ia != NULL && 1049983066f0SAlexander V. Chernikov (src & nh_ia->ia_subnetmask) == nh_ia->ia_subnet) { 1050df8bae1dSRodney W. Grimes /* Router requirements says to only send host redirects */ 1051df8bae1dSRodney W. Grimes type = ICMP_REDIRECT; 1052df8bae1dSRodney W. Grimes code = ICMP_REDIRECT_HOST; 105362e1a437SZhenlei Huang if (nh->nh_flags & NHF_GATEWAY) { 105462e1a437SZhenlei Huang if (nh->gw_sa.sa_family == AF_INET) 105562e1a437SZhenlei Huang dest.s_addr = nh->gw4_sa.sin_addr.s_addr; 105662e1a437SZhenlei Huang else /* Do not redirect in case gw is AF_INET6 */ 105762e1a437SZhenlei Huang type = 0; 105862e1a437SZhenlei Huang } else 105962e1a437SZhenlei Huang dest.s_addr = ip->ip_dst.s_addr; 1060df8bae1dSRodney W. Grimes } 1061df8bae1dSRodney W. Grimes } 106202c1c707SAndre Oppermann } 1063df8bae1dSRodney W. Grimes 1064b835b6feSBjoern A. Zeeb error = ip_output(m, NULL, &ro, IP_FORWARDING, NULL, NULL); 1065b835b6feSBjoern A. Zeeb 1066983066f0SAlexander V. Chernikov if (error == EMSGSIZE && ro.ro_nh) 1067983066f0SAlexander V. Chernikov mtu = ro.ro_nh->nh_mtu; 1068983066f0SAlexander V. Chernikov RO_NHFREE(&ro); 1069b835b6feSBjoern A. Zeeb 1070df8bae1dSRodney W. Grimes if (error) 107186425c62SRobert Watson IPSTAT_INC(ips_cantforward); 1072df8bae1dSRodney W. Grimes else { 107386425c62SRobert Watson IPSTAT_INC(ips_forward); 1074df8bae1dSRodney W. Grimes if (type) 107586425c62SRobert Watson IPSTAT_INC(ips_redirectsent); 1076df8bae1dSRodney W. Grimes else { 10779188b4a1SAndre Oppermann if (mcopy) 1078df8bae1dSRodney W. Grimes m_freem(mcopy); 1079b8a6e03fSGleb Smirnoff return; 1080df8bae1dSRodney W. Grimes } 1081df8bae1dSRodney W. Grimes } 10824f6c66ccSMatt Macy if (mcopy == NULL) 1083b8a6e03fSGleb Smirnoff return; 10844f6c66ccSMatt Macy 1085df8bae1dSRodney W. Grimes switch (error) { 1086df8bae1dSRodney W. Grimes case 0: /* forwarded, but need redirect */ 1087df8bae1dSRodney W. Grimes /* type, code set above */ 1088df8bae1dSRodney W. Grimes break; 1089df8bae1dSRodney W. Grimes 1090efbad259SEdward Tomasz Napierala case ENETUNREACH: 1091df8bae1dSRodney W. Grimes case EHOSTUNREACH: 1092df8bae1dSRodney W. Grimes case ENETDOWN: 1093df8bae1dSRodney W. Grimes case EHOSTDOWN: 1094df8bae1dSRodney W. Grimes default: 1095df8bae1dSRodney W. Grimes type = ICMP_UNREACH; 1096df8bae1dSRodney W. Grimes code = ICMP_UNREACH_HOST; 1097df8bae1dSRodney W. Grimes break; 1098df8bae1dSRodney W. Grimes 1099df8bae1dSRodney W. Grimes case EMSGSIZE: 1100df8bae1dSRodney W. Grimes type = ICMP_UNREACH; 1101df8bae1dSRodney W. Grimes code = ICMP_UNREACH_NEEDFRAG; 11029b932e9eSAndre Oppermann /* 1103b835b6feSBjoern A. Zeeb * If the MTU was set before make sure we are below the 1104b835b6feSBjoern A. Zeeb * interface MTU. 1105ab48768bSAndre Oppermann * If the MTU wasn't set before use the interface mtu or 1106ab48768bSAndre Oppermann * fall back to the next smaller mtu step compared to the 1107ab48768bSAndre Oppermann * current packet size. 11089b932e9eSAndre Oppermann */ 1109b835b6feSBjoern A. Zeeb if (mtu != 0) { 1110b835b6feSBjoern A. Zeeb if (ia != NULL) 1111b835b6feSBjoern A. Zeeb mtu = min(mtu, ia->ia_ifp->if_mtu); 1112b835b6feSBjoern A. Zeeb } else { 1113ab48768bSAndre Oppermann if (ia != NULL) 1114c773494eSAndre Oppermann mtu = ia->ia_ifp->if_mtu; 1115ab48768bSAndre Oppermann else 11168f134647SGleb Smirnoff mtu = ip_next_mtu(ntohs(ip->ip_len), 0); 1117ab48768bSAndre Oppermann } 111886425c62SRobert Watson IPSTAT_INC(ips_cantfrag); 1119df8bae1dSRodney W. Grimes break; 1120df8bae1dSRodney W. Grimes 1121df8bae1dSRodney W. Grimes case ENOBUFS: 11223a06e3e0SRuslan Ermilov case EACCES: /* ipfw denied packet */ 11233a06e3e0SRuslan Ermilov m_freem(mcopy); 1124b8a6e03fSGleb Smirnoff return; 1125df8bae1dSRodney W. Grimes } 1126c773494eSAndre Oppermann icmp_error(mcopy, type, code, dest.s_addr, mtu); 1127df8bae1dSRodney W. Grimes } 1128df8bae1dSRodney W. Grimes 1129339efd75SMaxim Sobolev #define CHECK_SO_CT(sp, ct) \ 1130339efd75SMaxim Sobolev (((sp->so_options & SO_TIMESTAMP) && (sp->so_ts_clock == ct)) ? 1 : 0) 1131339efd75SMaxim Sobolev 113282c23ebaSBill Fenner void 1133f2565d68SRobert Watson ip_savecontrol(struct inpcb *inp, struct mbuf **mp, struct ip *ip, 1134f2565d68SRobert Watson struct mbuf *m) 113582c23ebaSBill Fenner { 113606193f0bSKonstantin Belousov bool stamped; 11378b615593SMarko Zec 113806193f0bSKonstantin Belousov stamped = false; 1139339efd75SMaxim Sobolev if ((inp->inp_socket->so_options & SO_BINTIME) || 1140339efd75SMaxim Sobolev CHECK_SO_CT(inp->inp_socket, SO_TS_BINTIME)) { 114106193f0bSKonstantin Belousov struct bintime boottimebin, bt; 114206193f0bSKonstantin Belousov struct timespec ts1; 1143be8a62e8SPoul-Henning Kamp 114406193f0bSKonstantin Belousov if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR | 114506193f0bSKonstantin Belousov M_TSTMP)) { 114606193f0bSKonstantin Belousov mbuf_tstmp2timespec(m, &ts1); 114706193f0bSKonstantin Belousov timespec2bintime(&ts1, &bt); 114806193f0bSKonstantin Belousov getboottimebin(&boottimebin); 114906193f0bSKonstantin Belousov bintime_add(&bt, &boottimebin); 115006193f0bSKonstantin Belousov } else { 1151be8a62e8SPoul-Henning Kamp bintime(&bt); 115206193f0bSKonstantin Belousov } 1153be8a62e8SPoul-Henning Kamp *mp = sbcreatecontrol((caddr_t)&bt, sizeof(bt), 1154be8a62e8SPoul-Henning Kamp SCM_BINTIME, SOL_SOCKET); 115506193f0bSKonstantin Belousov if (*mp != NULL) { 1156be8a62e8SPoul-Henning Kamp mp = &(*mp)->m_next; 115706193f0bSKonstantin Belousov stamped = true; 115806193f0bSKonstantin Belousov } 1159be8a62e8SPoul-Henning Kamp } 1160339efd75SMaxim Sobolev if (CHECK_SO_CT(inp->inp_socket, SO_TS_REALTIME_MICRO)) { 116106193f0bSKonstantin Belousov struct bintime boottimebin, bt1; 1162c012cfe6SEd Maste struct timespec ts1; 116382c23ebaSBill Fenner struct timeval tv; 116482c23ebaSBill Fenner 116506193f0bSKonstantin Belousov if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR | 116606193f0bSKonstantin Belousov M_TSTMP)) { 116706193f0bSKonstantin Belousov mbuf_tstmp2timespec(m, &ts1); 116806193f0bSKonstantin Belousov timespec2bintime(&ts1, &bt1); 116906193f0bSKonstantin Belousov getboottimebin(&boottimebin); 117006193f0bSKonstantin Belousov bintime_add(&bt1, &boottimebin); 117106193f0bSKonstantin Belousov bintime2timeval(&bt1, &tv); 117206193f0bSKonstantin Belousov } else { 1173339efd75SMaxim Sobolev microtime(&tv); 117406193f0bSKonstantin Belousov } 117582c23ebaSBill Fenner *mp = sbcreatecontrol((caddr_t)&tv, sizeof(tv), 117682c23ebaSBill Fenner SCM_TIMESTAMP, SOL_SOCKET); 117706193f0bSKonstantin Belousov if (*mp != NULL) { 117882c23ebaSBill Fenner mp = &(*mp)->m_next; 117906193f0bSKonstantin Belousov stamped = true; 118006193f0bSKonstantin Belousov } 1181339efd75SMaxim Sobolev } else if (CHECK_SO_CT(inp->inp_socket, SO_TS_REALTIME)) { 118206193f0bSKonstantin Belousov struct bintime boottimebin; 118306193f0bSKonstantin Belousov struct timespec ts, ts1; 1184339efd75SMaxim Sobolev 118506193f0bSKonstantin Belousov if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR | 118606193f0bSKonstantin Belousov M_TSTMP)) { 118706193f0bSKonstantin Belousov mbuf_tstmp2timespec(m, &ts); 118806193f0bSKonstantin Belousov getboottimebin(&boottimebin); 118906193f0bSKonstantin Belousov bintime2timespec(&boottimebin, &ts1); 11906040822cSAlan Somers timespecadd(&ts, &ts1, &ts); 119106193f0bSKonstantin Belousov } else { 1192339efd75SMaxim Sobolev nanotime(&ts); 119306193f0bSKonstantin Belousov } 1194339efd75SMaxim Sobolev *mp = sbcreatecontrol((caddr_t)&ts, sizeof(ts), 1195339efd75SMaxim Sobolev SCM_REALTIME, SOL_SOCKET); 119606193f0bSKonstantin Belousov if (*mp != NULL) { 1197339efd75SMaxim Sobolev mp = &(*mp)->m_next; 119806193f0bSKonstantin Belousov stamped = true; 119906193f0bSKonstantin Belousov } 1200339efd75SMaxim Sobolev } else if (CHECK_SO_CT(inp->inp_socket, SO_TS_MONOTONIC)) { 1201339efd75SMaxim Sobolev struct timespec ts; 1202339efd75SMaxim Sobolev 120306193f0bSKonstantin Belousov if ((m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR | 120406193f0bSKonstantin Belousov M_TSTMP)) 120506193f0bSKonstantin Belousov mbuf_tstmp2timespec(m, &ts); 120606193f0bSKonstantin Belousov else 1207339efd75SMaxim Sobolev nanouptime(&ts); 1208339efd75SMaxim Sobolev *mp = sbcreatecontrol((caddr_t)&ts, sizeof(ts), 1209339efd75SMaxim Sobolev SCM_MONOTONIC, SOL_SOCKET); 121006193f0bSKonstantin Belousov if (*mp != NULL) { 121106193f0bSKonstantin Belousov mp = &(*mp)->m_next; 121206193f0bSKonstantin Belousov stamped = true; 121306193f0bSKonstantin Belousov } 121406193f0bSKonstantin Belousov } 121506193f0bSKonstantin Belousov if (stamped && (m->m_flags & (M_PKTHDR | M_TSTMP)) == (M_PKTHDR | 121606193f0bSKonstantin Belousov M_TSTMP)) { 121706193f0bSKonstantin Belousov struct sock_timestamp_info sti; 121806193f0bSKonstantin Belousov 121906193f0bSKonstantin Belousov bzero(&sti, sizeof(sti)); 122006193f0bSKonstantin Belousov sti.st_info_flags = ST_INFO_HW; 122106193f0bSKonstantin Belousov if ((m->m_flags & M_TSTMP_HPREC) != 0) 122206193f0bSKonstantin Belousov sti.st_info_flags |= ST_INFO_HW_HPREC; 122306193f0bSKonstantin Belousov *mp = sbcreatecontrol((caddr_t)&sti, sizeof(sti), SCM_TIME_INFO, 122406193f0bSKonstantin Belousov SOL_SOCKET); 122506193f0bSKonstantin Belousov if (*mp != NULL) 1226339efd75SMaxim Sobolev mp = &(*mp)->m_next; 1227be8a62e8SPoul-Henning Kamp } 122882c23ebaSBill Fenner if (inp->inp_flags & INP_RECVDSTADDR) { 122982c23ebaSBill Fenner *mp = sbcreatecontrol((caddr_t)&ip->ip_dst, 123082c23ebaSBill Fenner sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP); 123182c23ebaSBill Fenner if (*mp) 123282c23ebaSBill Fenner mp = &(*mp)->m_next; 123382c23ebaSBill Fenner } 12344957466bSMatthew N. Dodd if (inp->inp_flags & INP_RECVTTL) { 12354957466bSMatthew N. Dodd *mp = sbcreatecontrol((caddr_t)&ip->ip_ttl, 12364957466bSMatthew N. Dodd sizeof(u_char), IP_RECVTTL, IPPROTO_IP); 12374957466bSMatthew N. Dodd if (*mp) 12384957466bSMatthew N. Dodd mp = &(*mp)->m_next; 12394957466bSMatthew N. Dodd } 124082c23ebaSBill Fenner #ifdef notyet 124182c23ebaSBill Fenner /* XXX 124282c23ebaSBill Fenner * Moving these out of udp_input() made them even more broken 124382c23ebaSBill Fenner * than they already were. 124482c23ebaSBill Fenner */ 124582c23ebaSBill Fenner /* options were tossed already */ 124682c23ebaSBill Fenner if (inp->inp_flags & INP_RECVOPTS) { 124782c23ebaSBill Fenner *mp = sbcreatecontrol((caddr_t)opts_deleted_above, 124882c23ebaSBill Fenner sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP); 124982c23ebaSBill Fenner if (*mp) 125082c23ebaSBill Fenner mp = &(*mp)->m_next; 125182c23ebaSBill Fenner } 125282c23ebaSBill Fenner /* ip_srcroute doesn't do what we want here, need to fix */ 125382c23ebaSBill Fenner if (inp->inp_flags & INP_RECVRETOPTS) { 1254e0982661SAndre Oppermann *mp = sbcreatecontrol((caddr_t)ip_srcroute(m), 125582c23ebaSBill Fenner sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP); 125682c23ebaSBill Fenner if (*mp) 125782c23ebaSBill Fenner mp = &(*mp)->m_next; 125882c23ebaSBill Fenner } 125982c23ebaSBill Fenner #endif 126082c23ebaSBill Fenner if (inp->inp_flags & INP_RECVIF) { 1261d314ad7bSJulian Elischer struct ifnet *ifp; 1262d314ad7bSJulian Elischer struct sdlbuf { 126382c23ebaSBill Fenner struct sockaddr_dl sdl; 1264d314ad7bSJulian Elischer u_char pad[32]; 1265d314ad7bSJulian Elischer } sdlbuf; 1266d314ad7bSJulian Elischer struct sockaddr_dl *sdp; 1267d314ad7bSJulian Elischer struct sockaddr_dl *sdl2 = &sdlbuf.sdl; 126882c23ebaSBill Fenner 126946f2df9cSSergey Kandaurov if ((ifp = m->m_pkthdr.rcvif) && 127046f2df9cSSergey Kandaurov ifp->if_index && ifp->if_index <= V_if_index) { 12714a0d6638SRuslan Ermilov sdp = (struct sockaddr_dl *)ifp->if_addr->ifa_addr; 1272d314ad7bSJulian Elischer /* 1273d314ad7bSJulian Elischer * Change our mind and don't try copy. 1274d314ad7bSJulian Elischer */ 127546f2df9cSSergey Kandaurov if (sdp->sdl_family != AF_LINK || 127646f2df9cSSergey Kandaurov sdp->sdl_len > sizeof(sdlbuf)) { 1277d314ad7bSJulian Elischer goto makedummy; 1278d314ad7bSJulian Elischer } 1279d314ad7bSJulian Elischer bcopy(sdp, sdl2, sdp->sdl_len); 1280d314ad7bSJulian Elischer } else { 1281d314ad7bSJulian Elischer makedummy: 128246f2df9cSSergey Kandaurov sdl2->sdl_len = 128346f2df9cSSergey Kandaurov offsetof(struct sockaddr_dl, sdl_data[0]); 1284d314ad7bSJulian Elischer sdl2->sdl_family = AF_LINK; 1285d314ad7bSJulian Elischer sdl2->sdl_index = 0; 1286d314ad7bSJulian Elischer sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0; 1287d314ad7bSJulian Elischer } 1288d314ad7bSJulian Elischer *mp = sbcreatecontrol((caddr_t)sdl2, sdl2->sdl_len, 128982c23ebaSBill Fenner IP_RECVIF, IPPROTO_IP); 129082c23ebaSBill Fenner if (*mp) 129182c23ebaSBill Fenner mp = &(*mp)->m_next; 129282c23ebaSBill Fenner } 12933cca425bSMichael Tuexen if (inp->inp_flags & INP_RECVTOS) { 12943cca425bSMichael Tuexen *mp = sbcreatecontrol((caddr_t)&ip->ip_tos, 12953cca425bSMichael Tuexen sizeof(u_char), IP_RECVTOS, IPPROTO_IP); 12963cca425bSMichael Tuexen if (*mp) 12973cca425bSMichael Tuexen mp = &(*mp)->m_next; 12983cca425bSMichael Tuexen } 12999d3ddf43SAdrian Chadd 13009d3ddf43SAdrian Chadd if (inp->inp_flags2 & INP_RECVFLOWID) { 13019d3ddf43SAdrian Chadd uint32_t flowid, flow_type; 13029d3ddf43SAdrian Chadd 13039d3ddf43SAdrian Chadd flowid = m->m_pkthdr.flowid; 13049d3ddf43SAdrian Chadd flow_type = M_HASHTYPE_GET(m); 13059d3ddf43SAdrian Chadd 13069d3ddf43SAdrian Chadd /* 13079d3ddf43SAdrian Chadd * XXX should handle the failure of one or the 13089d3ddf43SAdrian Chadd * other - don't populate both? 13099d3ddf43SAdrian Chadd */ 13109d3ddf43SAdrian Chadd *mp = sbcreatecontrol((caddr_t) &flowid, 13119d3ddf43SAdrian Chadd sizeof(uint32_t), IP_FLOWID, IPPROTO_IP); 13129d3ddf43SAdrian Chadd if (*mp) 13139d3ddf43SAdrian Chadd mp = &(*mp)->m_next; 13149d3ddf43SAdrian Chadd *mp = sbcreatecontrol((caddr_t) &flow_type, 13159d3ddf43SAdrian Chadd sizeof(uint32_t), IP_FLOWTYPE, IPPROTO_IP); 13169d3ddf43SAdrian Chadd if (*mp) 13179d3ddf43SAdrian Chadd mp = &(*mp)->m_next; 13189d3ddf43SAdrian Chadd } 13199d3ddf43SAdrian Chadd 13209d3ddf43SAdrian Chadd #ifdef RSS 13219d3ddf43SAdrian Chadd if (inp->inp_flags2 & INP_RECVRSSBUCKETID) { 13229d3ddf43SAdrian Chadd uint32_t flowid, flow_type; 13239d3ddf43SAdrian Chadd uint32_t rss_bucketid; 13249d3ddf43SAdrian Chadd 13259d3ddf43SAdrian Chadd flowid = m->m_pkthdr.flowid; 13269d3ddf43SAdrian Chadd flow_type = M_HASHTYPE_GET(m); 13279d3ddf43SAdrian Chadd 13289d3ddf43SAdrian Chadd if (rss_hash2bucket(flowid, flow_type, &rss_bucketid) == 0) { 13299d3ddf43SAdrian Chadd *mp = sbcreatecontrol((caddr_t) &rss_bucketid, 13309d3ddf43SAdrian Chadd sizeof(uint32_t), IP_RSSBUCKETID, IPPROTO_IP); 13319d3ddf43SAdrian Chadd if (*mp) 13329d3ddf43SAdrian Chadd mp = &(*mp)->m_next; 13339d3ddf43SAdrian Chadd } 13349d3ddf43SAdrian Chadd } 13359d3ddf43SAdrian Chadd #endif 133682c23ebaSBill Fenner } 133782c23ebaSBill Fenner 13384d2e3692SLuigi Rizzo /* 133930916a2dSRobert Watson * XXXRW: Multicast routing code in ip_mroute.c is generally MPSAFE, but the 134030916a2dSRobert Watson * ip_rsvp and ip_rsvp_on variables need to be interlocked with rsvp_on 134130916a2dSRobert Watson * locking. This code remains in ip_input.c as ip_mroute.c is optionally 134230916a2dSRobert Watson * compiled. 13434d2e3692SLuigi Rizzo */ 13445f901c92SAndrew Turner VNET_DEFINE_STATIC(int, ip_rsvp_on); 134582cea7e6SBjoern A. Zeeb VNET_DEFINE(struct socket *, ip_rsvpd); 134682cea7e6SBjoern A. Zeeb 134782cea7e6SBjoern A. Zeeb #define V_ip_rsvp_on VNET(ip_rsvp_on) 134882cea7e6SBjoern A. Zeeb 1349df8bae1dSRodney W. Grimes int 1350f0068c4aSGarrett Wollman ip_rsvp_init(struct socket *so) 1351f0068c4aSGarrett Wollman { 13528b615593SMarko Zec 1353f0068c4aSGarrett Wollman if (so->so_type != SOCK_RAW || 1354f0068c4aSGarrett Wollman so->so_proto->pr_protocol != IPPROTO_RSVP) 1355f0068c4aSGarrett Wollman return EOPNOTSUPP; 1356f0068c4aSGarrett Wollman 1357603724d3SBjoern A. Zeeb if (V_ip_rsvpd != NULL) 1358f0068c4aSGarrett Wollman return EADDRINUSE; 1359f0068c4aSGarrett Wollman 1360603724d3SBjoern A. Zeeb V_ip_rsvpd = so; 13611c5de19aSGarrett Wollman /* 13621c5de19aSGarrett Wollman * This may seem silly, but we need to be sure we don't over-increment 13631c5de19aSGarrett Wollman * the RSVP counter, in case something slips up. 13641c5de19aSGarrett Wollman */ 1365603724d3SBjoern A. Zeeb if (!V_ip_rsvp_on) { 1366603724d3SBjoern A. Zeeb V_ip_rsvp_on = 1; 1367603724d3SBjoern A. Zeeb V_rsvp_on++; 13681c5de19aSGarrett Wollman } 1369f0068c4aSGarrett Wollman 1370f0068c4aSGarrett Wollman return 0; 1371f0068c4aSGarrett Wollman } 1372f0068c4aSGarrett Wollman 1373f0068c4aSGarrett Wollman int 1374f0068c4aSGarrett Wollman ip_rsvp_done(void) 1375f0068c4aSGarrett Wollman { 13768b615593SMarko Zec 1377603724d3SBjoern A. Zeeb V_ip_rsvpd = NULL; 13781c5de19aSGarrett Wollman /* 13791c5de19aSGarrett Wollman * This may seem silly, but we need to be sure we don't over-decrement 13801c5de19aSGarrett Wollman * the RSVP counter, in case something slips up. 13811c5de19aSGarrett Wollman */ 1382603724d3SBjoern A. Zeeb if (V_ip_rsvp_on) { 1383603724d3SBjoern A. Zeeb V_ip_rsvp_on = 0; 1384603724d3SBjoern A. Zeeb V_rsvp_on--; 13851c5de19aSGarrett Wollman } 1386f0068c4aSGarrett Wollman return 0; 1387f0068c4aSGarrett Wollman } 1388bbb4330bSLuigi Rizzo 13898f5a8818SKevin Lo int 13908f5a8818SKevin Lo rsvp_input(struct mbuf **mp, int *offp, int proto) 1391bbb4330bSLuigi Rizzo { 13928f5a8818SKevin Lo struct mbuf *m; 13938f5a8818SKevin Lo 13948f5a8818SKevin Lo m = *mp; 13958f5a8818SKevin Lo *mp = NULL; 13968b615593SMarko Zec 1397bbb4330bSLuigi Rizzo if (rsvp_input_p) { /* call the real one if loaded */ 13988f5a8818SKevin Lo *mp = m; 13998f5a8818SKevin Lo rsvp_input_p(mp, offp, proto); 14008f5a8818SKevin Lo return (IPPROTO_DONE); 1401bbb4330bSLuigi Rizzo } 1402bbb4330bSLuigi Rizzo 1403bbb4330bSLuigi Rizzo /* Can still get packets with rsvp_on = 0 if there is a local member 1404bbb4330bSLuigi Rizzo * of the group to which the RSVP packet is addressed. But in this 1405bbb4330bSLuigi Rizzo * case we want to throw the packet away. 1406bbb4330bSLuigi Rizzo */ 1407bbb4330bSLuigi Rizzo 1408603724d3SBjoern A. Zeeb if (!V_rsvp_on) { 1409bbb4330bSLuigi Rizzo m_freem(m); 14108f5a8818SKevin Lo return (IPPROTO_DONE); 1411bbb4330bSLuigi Rizzo } 1412bbb4330bSLuigi Rizzo 1413603724d3SBjoern A. Zeeb if (V_ip_rsvpd != NULL) { 14148f5a8818SKevin Lo *mp = m; 14158f5a8818SKevin Lo rip_input(mp, offp, proto); 14168f5a8818SKevin Lo return (IPPROTO_DONE); 1417bbb4330bSLuigi Rizzo } 1418bbb4330bSLuigi Rizzo /* Drop the packet */ 1419bbb4330bSLuigi Rizzo m_freem(m); 14208f5a8818SKevin Lo return (IPPROTO_DONE); 1421bbb4330bSLuigi Rizzo } 1422