1c398230bSWarner Losh /*- 2df8bae1dSRodney W. Grimes * Copyright (c) 1982, 1986, 1988, 1993 3df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 4df8bae1dSRodney W. Grimes * 5df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 6df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 7df8bae1dSRodney W. Grimes * are met: 8df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 9df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 10df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 11df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 12df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 13df8bae1dSRodney W. Grimes * 4. Neither the name of the University nor the names of its contributors 14df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 15df8bae1dSRodney W. Grimes * without specific prior written permission. 16df8bae1dSRodney W. Grimes * 17df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27df8bae1dSRodney W. Grimes * SUCH DAMAGE. 28df8bae1dSRodney W. Grimes * 29df8bae1dSRodney W. Grimes * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 30df8bae1dSRodney W. Grimes */ 31df8bae1dSRodney W. Grimes 324b421e2dSMike Silbersack #include <sys/cdefs.h> 334b421e2dSMike Silbersack __FBSDID("$FreeBSD$"); 344b421e2dSMike Silbersack 350ac40133SBrian Somers #include "opt_bootp.h" 3674a9466cSGary Palmer #include "opt_ipfw.h" 3727108a15SDag-Erling Smørgrav #include "opt_ipstealth.h" 386a800098SYoshinobu Inoue #include "opt_ipsec.h" 3933553d6eSBjoern A. Zeeb #include "opt_route.h" 40b8bc95cdSAdrian Chadd #include "opt_rss.h" 4174a9466cSGary Palmer 42df8bae1dSRodney W. Grimes #include <sys/param.h> 43df8bae1dSRodney W. Grimes #include <sys/systm.h> 44df8bae1dSRodney W. Grimes #include <sys/mbuf.h> 45b715f178SLuigi Rizzo #include <sys/malloc.h> 46df8bae1dSRodney W. Grimes #include <sys/domain.h> 47df8bae1dSRodney W. Grimes #include <sys/protosw.h> 48df8bae1dSRodney W. Grimes #include <sys/socket.h> 49df8bae1dSRodney W. Grimes #include <sys/time.h> 50df8bae1dSRodney W. Grimes #include <sys/kernel.h> 51385195c0SMarko Zec #include <sys/lock.h> 52cc0a3c8cSAndrey V. Elsukov #include <sys/rmlock.h> 53385195c0SMarko Zec #include <sys/rwlock.h> 5457f60867SMark Johnston #include <sys/sdt.h> 551025071fSGarrett Wollman #include <sys/syslog.h> 56b5e8ce9fSBruce Evans #include <sys/sysctl.h> 57df8bae1dSRodney W. Grimes 58c85540ddSAndrey A. Chernov #include <net/pfil.h> 59df8bae1dSRodney W. Grimes #include <net/if.h> 609494d596SBrooks Davis #include <net/if_types.h> 61d314ad7bSJulian Elischer #include <net/if_var.h> 6282c23ebaSBill Fenner #include <net/if_dl.h> 63df8bae1dSRodney W. Grimes #include <net/route.h> 64748e0b0aSGarrett Wollman #include <net/netisr.h> 65b2bdc62aSAdrian Chadd #include <net/rss_config.h> 664b79449eSBjoern A. Zeeb #include <net/vnet.h> 67df8bae1dSRodney W. Grimes 68df8bae1dSRodney W. Grimes #include <netinet/in.h> 6957f60867SMark Johnston #include <netinet/in_kdtrace.h> 70df8bae1dSRodney W. Grimes #include <netinet/in_systm.h> 71b5e8ce9fSBruce Evans #include <netinet/in_var.h> 72df8bae1dSRodney W. Grimes #include <netinet/ip.h> 73df8bae1dSRodney W. Grimes #include <netinet/in_pcb.h> 74df8bae1dSRodney W. Grimes #include <netinet/ip_var.h> 75eddfbb76SRobert Watson #include <netinet/ip_fw.h> 76df8bae1dSRodney W. Grimes #include <netinet/ip_icmp.h> 77ef39adf0SAndre Oppermann #include <netinet/ip_options.h> 7858938916SGarrett Wollman #include <machine/in_cksum.h> 79a9771948SGleb Smirnoff #include <netinet/ip_carp.h> 80b2630c29SGeorge V. Neville-Neil #ifdef IPSEC 811dfcf0d2SAndre Oppermann #include <netinet/ip_ipsec.h> 82b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */ 83b8bc95cdSAdrian Chadd #include <netinet/in_rss.h> 84df8bae1dSRodney W. Grimes 85f0068c4aSGarrett Wollman #include <sys/socketvar.h> 866ddbf1e2SGary Palmer 87aed55708SRobert Watson #include <security/mac/mac_framework.h> 88aed55708SRobert Watson 89d2035ffbSEd Maste #ifdef CTASSERT 90d2035ffbSEd Maste CTASSERT(sizeof(struct ip) == 20); 91d2035ffbSEd Maste #endif 92d2035ffbSEd Maste 931dbefcc0SGleb Smirnoff /* IP reassembly functions are defined in ip_reass.c. */ 94843b0e57SXin LI extern void ipreass_init(void); 95843b0e57SXin LI extern void ipreass_drain(void); 96843b0e57SXin LI extern void ipreass_slowtimo(void); 971dbefcc0SGleb Smirnoff #ifdef VIMAGE 98843b0e57SXin LI extern void ipreass_destroy(void); 991dbefcc0SGleb Smirnoff #endif 1001dbefcc0SGleb Smirnoff 101cc0a3c8cSAndrey V. Elsukov struct rmlock in_ifaddr_lock; 102cc0a3c8cSAndrey V. Elsukov RM_SYSINIT(in_ifaddr_lock, &in_ifaddr_lock, "in_ifaddr_lock"); 103f0068c4aSGarrett Wollman 10482cea7e6SBjoern A. Zeeb VNET_DEFINE(int, rsvp_on); 10582cea7e6SBjoern A. Zeeb 10682cea7e6SBjoern A. Zeeb VNET_DEFINE(int, ipforwarding); 1076df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_VNET | CTLFLAG_RW, 108eddfbb76SRobert Watson &VNET_NAME(ipforwarding), 0, 1098b615593SMarko Zec "Enable IP forwarding between interfaces"); 1100312fbe9SPoul-Henning Kamp 1113e288e62SDimitry Andric static VNET_DEFINE(int, ipsendredirects) = 1; /* XXX */ 11282cea7e6SBjoern A. Zeeb #define V_ipsendredirects VNET(ipsendredirects) 1136df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_VNET | CTLFLAG_RW, 114eddfbb76SRobert Watson &VNET_NAME(ipsendredirects), 0, 1158b615593SMarko Zec "Enable sending IP redirects"); 1160312fbe9SPoul-Henning Kamp 117823db0e9SDon Lewis /* 118823db0e9SDon Lewis * XXX - Setting ip_checkinterface mostly implements the receive side of 119823db0e9SDon Lewis * the Strong ES model described in RFC 1122, but since the routing table 120a8f12100SDon Lewis * and transmit implementation do not implement the Strong ES model, 121823db0e9SDon Lewis * setting this to 1 results in an odd hybrid. 1223f67c834SDon Lewis * 123a8f12100SDon Lewis * XXX - ip_checkinterface currently must be disabled if you use ipnat 124a8f12100SDon Lewis * to translate the destination address to another local interface. 1253f67c834SDon Lewis * 1263f67c834SDon Lewis * XXX - ip_checkinterface must be disabled if you add IP aliases 1273f67c834SDon Lewis * to the loopback interface instead of the interface where the 1283f67c834SDon Lewis * packets for those addresses are received. 129823db0e9SDon Lewis */ 1303e288e62SDimitry Andric static VNET_DEFINE(int, ip_checkinterface); 13182cea7e6SBjoern A. Zeeb #define V_ip_checkinterface VNET(ip_checkinterface) 1326df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_VNET | CTLFLAG_RW, 133eddfbb76SRobert Watson &VNET_NAME(ip_checkinterface), 0, 1348b615593SMarko Zec "Verify packet arrives on correct interface"); 135b3e95d4eSJonathan Lemon 1360b4b0b0fSJulian Elischer VNET_DEFINE(struct pfil_head, inet_pfil_hook); /* Packet filter hooks */ 137df8bae1dSRodney W. Grimes 138d4b5cae4SRobert Watson static struct netisr_handler ip_nh = { 139d4b5cae4SRobert Watson .nh_name = "ip", 140d4b5cae4SRobert Watson .nh_handler = ip_input, 141d4b5cae4SRobert Watson .nh_proto = NETISR_IP, 142b8bc95cdSAdrian Chadd #ifdef RSS 1432527ccadSAdrian Chadd .nh_m2cpuid = rss_soft_m2cpuid_v4, 144b8bc95cdSAdrian Chadd .nh_policy = NETISR_POLICY_CPU, 145b8bc95cdSAdrian Chadd .nh_dispatch = NETISR_DISPATCH_HYBRID, 146b8bc95cdSAdrian Chadd #else 147d4b5cae4SRobert Watson .nh_policy = NETISR_POLICY_FLOW, 148b8bc95cdSAdrian Chadd #endif 149d4b5cae4SRobert Watson }; 150ca925d9cSJonathan Lemon 151b8bc95cdSAdrian Chadd #ifdef RSS 152b8bc95cdSAdrian Chadd /* 153b8bc95cdSAdrian Chadd * Directly dispatched frames are currently assumed 154b8bc95cdSAdrian Chadd * to have a flowid already calculated. 155b8bc95cdSAdrian Chadd * 156b8bc95cdSAdrian Chadd * It should likely have something that assert it 157b8bc95cdSAdrian Chadd * actually has valid flow details. 158b8bc95cdSAdrian Chadd */ 159b8bc95cdSAdrian Chadd static struct netisr_handler ip_direct_nh = { 160b8bc95cdSAdrian Chadd .nh_name = "ip_direct", 161b8bc95cdSAdrian Chadd .nh_handler = ip_direct_input, 162b8bc95cdSAdrian Chadd .nh_proto = NETISR_IP_DIRECT, 163*499baf0aSAdrian Chadd .nh_m2cpuid = rss_soft_m2cpuid_v4, 164b8bc95cdSAdrian Chadd .nh_policy = NETISR_POLICY_CPU, 165b8bc95cdSAdrian Chadd .nh_dispatch = NETISR_DISPATCH_HYBRID, 166b8bc95cdSAdrian Chadd }; 167b8bc95cdSAdrian Chadd #endif 168b8bc95cdSAdrian Chadd 169df8bae1dSRodney W. Grimes extern struct domain inetdomain; 170f0ffb944SJulian Elischer extern struct protosw inetsw[]; 171df8bae1dSRodney W. Grimes u_char ip_protox[IPPROTO_MAX]; 17282cea7e6SBjoern A. Zeeb VNET_DEFINE(struct in_ifaddrhead, in_ifaddrhead); /* first inet address */ 17382cea7e6SBjoern A. Zeeb VNET_DEFINE(struct in_ifaddrhashhead *, in_ifaddrhashtbl); /* inet addr hash table */ 17482cea7e6SBjoern A. Zeeb VNET_DEFINE(u_long, in_ifaddrhmask); /* mask for hash table */ 175ca925d9cSJonathan Lemon 1760312fbe9SPoul-Henning Kamp #ifdef IPCTL_DEFMTU 1770312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW, 1783d177f46SBill Fumerola &ip_mtu, 0, "Default MTU"); 1790312fbe9SPoul-Henning Kamp #endif 1800312fbe9SPoul-Henning Kamp 1811b968362SDag-Erling Smørgrav #ifdef IPSTEALTH 18282cea7e6SBjoern A. Zeeb VNET_DEFINE(int, ipstealth); 1836df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_VNET | CTLFLAG_RW, 184eddfbb76SRobert Watson &VNET_NAME(ipstealth), 0, 185eddfbb76SRobert Watson "IP stealth mode, no TTL decrementation on forwarding"); 1861b968362SDag-Erling Smørgrav #endif 187eddfbb76SRobert Watson 188315e3e38SRobert Watson /* 1895da0521fSAndrey V. Elsukov * IP statistics are stored in the "array" of counter(9)s. 1905923c293SGleb Smirnoff */ 1915da0521fSAndrey V. Elsukov VNET_PCPUSTAT_DEFINE(struct ipstat, ipstat); 1925da0521fSAndrey V. Elsukov VNET_PCPUSTAT_SYSINIT(ipstat); 1935da0521fSAndrey V. Elsukov SYSCTL_VNET_PCPUSTAT(_net_inet_ip, IPCTL_STATS, stats, struct ipstat, ipstat, 1945da0521fSAndrey V. Elsukov "IP statistics (struct ipstat, netinet/ip_var.h)"); 1955923c293SGleb Smirnoff 1965923c293SGleb Smirnoff #ifdef VIMAGE 1975da0521fSAndrey V. Elsukov VNET_PCPUSTAT_SYSUNINIT(ipstat); 1985923c293SGleb Smirnoff #endif /* VIMAGE */ 1995923c293SGleb Smirnoff 2005923c293SGleb Smirnoff /* 201315e3e38SRobert Watson * Kernel module interface for updating ipstat. The argument is an index 2025923c293SGleb Smirnoff * into ipstat treated as an array. 203315e3e38SRobert Watson */ 204315e3e38SRobert Watson void 205315e3e38SRobert Watson kmod_ipstat_inc(int statnum) 206315e3e38SRobert Watson { 207315e3e38SRobert Watson 2085da0521fSAndrey V. Elsukov counter_u64_add(VNET(ipstat)[statnum], 1); 209315e3e38SRobert Watson } 210315e3e38SRobert Watson 211315e3e38SRobert Watson void 212315e3e38SRobert Watson kmod_ipstat_dec(int statnum) 213315e3e38SRobert Watson { 214315e3e38SRobert Watson 2155da0521fSAndrey V. Elsukov counter_u64_add(VNET(ipstat)[statnum], -1); 216315e3e38SRobert Watson } 217315e3e38SRobert Watson 218d4b5cae4SRobert Watson static int 219d4b5cae4SRobert Watson sysctl_netinet_intr_queue_maxlen(SYSCTL_HANDLER_ARGS) 220d4b5cae4SRobert Watson { 221d4b5cae4SRobert Watson int error, qlimit; 222d4b5cae4SRobert Watson 223d4b5cae4SRobert Watson netisr_getqlimit(&ip_nh, &qlimit); 224d4b5cae4SRobert Watson error = sysctl_handle_int(oidp, &qlimit, 0, req); 225d4b5cae4SRobert Watson if (error || !req->newptr) 226d4b5cae4SRobert Watson return (error); 227d4b5cae4SRobert Watson if (qlimit < 1) 228d4b5cae4SRobert Watson return (EINVAL); 229d4b5cae4SRobert Watson return (netisr_setqlimit(&ip_nh, qlimit)); 230d4b5cae4SRobert Watson } 231d4b5cae4SRobert Watson SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen, 232d4b5cae4SRobert Watson CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet_intr_queue_maxlen, "I", 233d4b5cae4SRobert Watson "Maximum size of the IP input queue"); 234d4b5cae4SRobert Watson 235d4b5cae4SRobert Watson static int 236d4b5cae4SRobert Watson sysctl_netinet_intr_queue_drops(SYSCTL_HANDLER_ARGS) 237d4b5cae4SRobert Watson { 238d4b5cae4SRobert Watson u_int64_t qdrops_long; 239d4b5cae4SRobert Watson int error, qdrops; 240d4b5cae4SRobert Watson 241d4b5cae4SRobert Watson netisr_getqdrops(&ip_nh, &qdrops_long); 242d4b5cae4SRobert Watson qdrops = qdrops_long; 243d4b5cae4SRobert Watson error = sysctl_handle_int(oidp, &qdrops, 0, req); 244d4b5cae4SRobert Watson if (error || !req->newptr) 245d4b5cae4SRobert Watson return (error); 246d4b5cae4SRobert Watson if (qdrops != 0) 247d4b5cae4SRobert Watson return (EINVAL); 248d4b5cae4SRobert Watson netisr_clearqdrops(&ip_nh); 249d4b5cae4SRobert Watson return (0); 250d4b5cae4SRobert Watson } 251d4b5cae4SRobert Watson 252d4b5cae4SRobert Watson SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops, 253d4b5cae4SRobert Watson CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_netinet_intr_queue_drops, "I", 254d4b5cae4SRobert Watson "Number of packets dropped from the IP input queue"); 255d4b5cae4SRobert Watson 256b8bc95cdSAdrian Chadd #ifdef RSS 257b8bc95cdSAdrian Chadd static int 258b8bc95cdSAdrian Chadd sysctl_netinet_intr_direct_queue_maxlen(SYSCTL_HANDLER_ARGS) 259b8bc95cdSAdrian Chadd { 260b8bc95cdSAdrian Chadd int error, qlimit; 261b8bc95cdSAdrian Chadd 262b8bc95cdSAdrian Chadd netisr_getqlimit(&ip_direct_nh, &qlimit); 263b8bc95cdSAdrian Chadd error = sysctl_handle_int(oidp, &qlimit, 0, req); 264b8bc95cdSAdrian Chadd if (error || !req->newptr) 265b8bc95cdSAdrian Chadd return (error); 266b8bc95cdSAdrian Chadd if (qlimit < 1) 267b8bc95cdSAdrian Chadd return (EINVAL); 268b8bc95cdSAdrian Chadd return (netisr_setqlimit(&ip_direct_nh, qlimit)); 269b8bc95cdSAdrian Chadd } 270b8bc95cdSAdrian Chadd SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_direct_queue_maxlen, 271b8bc95cdSAdrian Chadd CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet_intr_direct_queue_maxlen, "I", 272b8bc95cdSAdrian Chadd "Maximum size of the IP direct input queue"); 273b8bc95cdSAdrian Chadd 274b8bc95cdSAdrian Chadd static int 275b8bc95cdSAdrian Chadd sysctl_netinet_intr_direct_queue_drops(SYSCTL_HANDLER_ARGS) 276b8bc95cdSAdrian Chadd { 277b8bc95cdSAdrian Chadd u_int64_t qdrops_long; 278b8bc95cdSAdrian Chadd int error, qdrops; 279b8bc95cdSAdrian Chadd 280b8bc95cdSAdrian Chadd netisr_getqdrops(&ip_direct_nh, &qdrops_long); 281b8bc95cdSAdrian Chadd qdrops = qdrops_long; 282b8bc95cdSAdrian Chadd error = sysctl_handle_int(oidp, &qdrops, 0, req); 283b8bc95cdSAdrian Chadd if (error || !req->newptr) 284b8bc95cdSAdrian Chadd return (error); 285b8bc95cdSAdrian Chadd if (qdrops != 0) 286b8bc95cdSAdrian Chadd return (EINVAL); 287b8bc95cdSAdrian Chadd netisr_clearqdrops(&ip_direct_nh); 288b8bc95cdSAdrian Chadd return (0); 289b8bc95cdSAdrian Chadd } 290b8bc95cdSAdrian Chadd 291b8bc95cdSAdrian Chadd SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_direct_queue_drops, 292b8bc95cdSAdrian Chadd CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_netinet_intr_direct_queue_drops, "I", 293b8bc95cdSAdrian Chadd "Number of packets dropped from the IP direct input queue"); 294b8bc95cdSAdrian Chadd #endif /* RSS */ 295b8bc95cdSAdrian Chadd 296df8bae1dSRodney W. Grimes /* 297df8bae1dSRodney W. Grimes * IP initialization: fill in IP protocol switch table. 298df8bae1dSRodney W. Grimes * All protocols not implemented in kernel go to raw IP protocol handler. 299df8bae1dSRodney W. Grimes */ 300df8bae1dSRodney W. Grimes void 301f2565d68SRobert Watson ip_init(void) 302df8bae1dSRodney W. Grimes { 303f2565d68SRobert Watson struct protosw *pr; 304f2565d68SRobert Watson int i; 305df8bae1dSRodney W. Grimes 306603724d3SBjoern A. Zeeb TAILQ_INIT(&V_in_ifaddrhead); 307603724d3SBjoern A. Zeeb V_in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &V_in_ifaddrhmask); 3081ed81b73SMarko Zec 3091ed81b73SMarko Zec /* Initialize IP reassembly queue. */ 3101dbefcc0SGleb Smirnoff ipreass_init(); 3111ed81b73SMarko Zec 3120b4b0b0fSJulian Elischer /* Initialize packet filter hooks. */ 3130b4b0b0fSJulian Elischer V_inet_pfil_hook.ph_type = PFIL_TYPE_AF; 3140b4b0b0fSJulian Elischer V_inet_pfil_hook.ph_af = AF_INET; 3150b4b0b0fSJulian Elischer if ((i = pfil_head_register(&V_inet_pfil_hook)) != 0) 3160b4b0b0fSJulian Elischer printf("%s: WARNING: unable to register pfil hook, " 3170b4b0b0fSJulian Elischer "error %d\n", __func__, i); 3180b4b0b0fSJulian Elischer 3191ed81b73SMarko Zec /* Skip initialization of globals for non-default instances. */ 3201ed81b73SMarko Zec if (!IS_DEFAULT_VNET(curvnet)) 3211ed81b73SMarko Zec return; 3221ed81b73SMarko Zec 323f0ffb944SJulian Elischer pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 32402410549SRobert Watson if (pr == NULL) 325db09bef3SAndre Oppermann panic("ip_init: PF_INET not found"); 326db09bef3SAndre Oppermann 327db09bef3SAndre Oppermann /* Initialize the entire ip_protox[] array to IPPROTO_RAW. */ 328df8bae1dSRodney W. Grimes for (i = 0; i < IPPROTO_MAX; i++) 329df8bae1dSRodney W. Grimes ip_protox[i] = pr - inetsw; 330db09bef3SAndre Oppermann /* 331db09bef3SAndre Oppermann * Cycle through IP protocols and put them into the appropriate place 332db09bef3SAndre Oppermann * in ip_protox[]. 333db09bef3SAndre Oppermann */ 334f0ffb944SJulian Elischer for (pr = inetdomain.dom_protosw; 335f0ffb944SJulian Elischer pr < inetdomain.dom_protoswNPROTOSW; pr++) 336df8bae1dSRodney W. Grimes if (pr->pr_domain->dom_family == PF_INET && 337db09bef3SAndre Oppermann pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) { 338db09bef3SAndre Oppermann /* Be careful to only index valid IP protocols. */ 339db77984cSSam Leffler if (pr->pr_protocol < IPPROTO_MAX) 340df8bae1dSRodney W. Grimes ip_protox[pr->pr_protocol] = pr - inetsw; 341db09bef3SAndre Oppermann } 342194a213eSAndrey A. Chernov 343d4b5cae4SRobert Watson netisr_register(&ip_nh); 344b8bc95cdSAdrian Chadd #ifdef RSS 345b8bc95cdSAdrian Chadd netisr_register(&ip_direct_nh); 346b8bc95cdSAdrian Chadd #endif 347df8bae1dSRodney W. Grimes } 348df8bae1dSRodney W. Grimes 3499802380eSBjoern A. Zeeb #ifdef VIMAGE 3509802380eSBjoern A. Zeeb void 3519802380eSBjoern A. Zeeb ip_destroy(void) 3529802380eSBjoern A. Zeeb { 3534d3dfd45SMikolaj Golub int i; 3544d3dfd45SMikolaj Golub 3554d3dfd45SMikolaj Golub if ((i = pfil_head_unregister(&V_inet_pfil_hook)) != 0) 3564d3dfd45SMikolaj Golub printf("%s: WARNING: unable to unregister pfil hook, " 3574d3dfd45SMikolaj Golub "error %d\n", __func__, i); 3589802380eSBjoern A. Zeeb 3599802380eSBjoern A. Zeeb /* Cleanup in_ifaddr hash table; should be empty. */ 3609802380eSBjoern A. Zeeb hashdestroy(V_in_ifaddrhashtbl, M_IFADDR, V_in_ifaddrhmask); 3619802380eSBjoern A. Zeeb 362e3c2c634SGleb Smirnoff /* Destroy IP reassembly queue. */ 3631dbefcc0SGleb Smirnoff ipreass_destroy(); 3649802380eSBjoern A. Zeeb } 3659802380eSBjoern A. Zeeb #endif 3669802380eSBjoern A. Zeeb 367b8bc95cdSAdrian Chadd #ifdef RSS 368b8bc95cdSAdrian Chadd /* 369b8bc95cdSAdrian Chadd * IP direct input routine. 370b8bc95cdSAdrian Chadd * 371b8bc95cdSAdrian Chadd * This is called when reinjecting completed fragments where 372b8bc95cdSAdrian Chadd * all of the previous checking and book-keeping has been done. 373b8bc95cdSAdrian Chadd */ 374b8bc95cdSAdrian Chadd void 375b8bc95cdSAdrian Chadd ip_direct_input(struct mbuf *m) 376b8bc95cdSAdrian Chadd { 377b8bc95cdSAdrian Chadd struct ip *ip; 378b8bc95cdSAdrian Chadd int hlen; 379b8bc95cdSAdrian Chadd 380b8bc95cdSAdrian Chadd ip = mtod(m, struct ip *); 381b8bc95cdSAdrian Chadd hlen = ip->ip_hl << 2; 382b8bc95cdSAdrian Chadd 383b8bc95cdSAdrian Chadd IPSTAT_INC(ips_delivered); 384b8bc95cdSAdrian Chadd (*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p); 385b8bc95cdSAdrian Chadd return; 386b8bc95cdSAdrian Chadd } 387b8bc95cdSAdrian Chadd #endif 388b8bc95cdSAdrian Chadd 3894d2e3692SLuigi Rizzo /* 390df8bae1dSRodney W. Grimes * Ip input routine. Checksum and byte swap header. If fragmented 391df8bae1dSRodney W. Grimes * try to reassemble. Process options. Pass to next level. 392df8bae1dSRodney W. Grimes */ 393c67b1d17SGarrett Wollman void 394c67b1d17SGarrett Wollman ip_input(struct mbuf *m) 395df8bae1dSRodney W. Grimes { 3969188b4a1SAndre Oppermann struct ip *ip = NULL; 3975da9f8faSJosef Karthauser struct in_ifaddr *ia = NULL; 398ca925d9cSJonathan Lemon struct ifaddr *ifa; 3990aade26eSRobert Watson struct ifnet *ifp; 4009b932e9eSAndre Oppermann int checkif, hlen = 0; 40121d172a3SGleb Smirnoff uint16_t sum, ip_len; 40202c1c707SAndre Oppermann int dchg = 0; /* dest changed after fw */ 403f51f805fSSam Leffler struct in_addr odst; /* original dst address */ 404b715f178SLuigi Rizzo 405fe584538SDag-Erling Smørgrav M_ASSERTPKTHDR(m); 406db40007dSAndrew R. Reiter 407ac9d7e26SMax Laier if (m->m_flags & M_FASTFWD_OURS) { 40876ff6dcfSAndre Oppermann m->m_flags &= ~M_FASTFWD_OURS; 40976ff6dcfSAndre Oppermann /* Set up some basics that will be used later. */ 4102b25acc1SLuigi Rizzo ip = mtod(m, struct ip *); 41153be11f6SPoul-Henning Kamp hlen = ip->ip_hl << 2; 4128f134647SGleb Smirnoff ip_len = ntohs(ip->ip_len); 4139b932e9eSAndre Oppermann goto ours; 4142b25acc1SLuigi Rizzo } 4152b25acc1SLuigi Rizzo 41686425c62SRobert Watson IPSTAT_INC(ips_total); 41758938916SGarrett Wollman 41858938916SGarrett Wollman if (m->m_pkthdr.len < sizeof(struct ip)) 41958938916SGarrett Wollman goto tooshort; 42058938916SGarrett Wollman 421df8bae1dSRodney W. Grimes if (m->m_len < sizeof (struct ip) && 4220b17fba7SAndre Oppermann (m = m_pullup(m, sizeof (struct ip))) == NULL) { 42386425c62SRobert Watson IPSTAT_INC(ips_toosmall); 424c67b1d17SGarrett Wollman return; 425df8bae1dSRodney W. Grimes } 426df8bae1dSRodney W. Grimes ip = mtod(m, struct ip *); 42758938916SGarrett Wollman 42853be11f6SPoul-Henning Kamp if (ip->ip_v != IPVERSION) { 42986425c62SRobert Watson IPSTAT_INC(ips_badvers); 430df8bae1dSRodney W. Grimes goto bad; 431df8bae1dSRodney W. Grimes } 43258938916SGarrett Wollman 43353be11f6SPoul-Henning Kamp hlen = ip->ip_hl << 2; 434df8bae1dSRodney W. Grimes if (hlen < sizeof(struct ip)) { /* minimum header length */ 43586425c62SRobert Watson IPSTAT_INC(ips_badhlen); 436df8bae1dSRodney W. Grimes goto bad; 437df8bae1dSRodney W. Grimes } 438df8bae1dSRodney W. Grimes if (hlen > m->m_len) { 4390b17fba7SAndre Oppermann if ((m = m_pullup(m, hlen)) == NULL) { 44086425c62SRobert Watson IPSTAT_INC(ips_badhlen); 441c67b1d17SGarrett Wollman return; 442df8bae1dSRodney W. Grimes } 443df8bae1dSRodney W. Grimes ip = mtod(m, struct ip *); 444df8bae1dSRodney W. Grimes } 44533841545SHajimu UMEMOTO 44657f60867SMark Johnston IP_PROBE(receive, NULL, NULL, ip, m->m_pkthdr.rcvif, ip, NULL); 44757f60867SMark Johnston 44833841545SHajimu UMEMOTO /* 127/8 must not appear on wire - RFC1122 */ 4490aade26eSRobert Watson ifp = m->m_pkthdr.rcvif; 45033841545SHajimu UMEMOTO if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || 45133841545SHajimu UMEMOTO (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { 4520aade26eSRobert Watson if ((ifp->if_flags & IFF_LOOPBACK) == 0) { 45386425c62SRobert Watson IPSTAT_INC(ips_badaddr); 45433841545SHajimu UMEMOTO goto bad; 45533841545SHajimu UMEMOTO } 45633841545SHajimu UMEMOTO } 45733841545SHajimu UMEMOTO 458db4f9cc7SJonathan Lemon if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) { 459db4f9cc7SJonathan Lemon sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID); 460db4f9cc7SJonathan Lemon } else { 46158938916SGarrett Wollman if (hlen == sizeof(struct ip)) { 46247c861ecSBrian Somers sum = in_cksum_hdr(ip); 46358938916SGarrett Wollman } else { 46447c861ecSBrian Somers sum = in_cksum(m, hlen); 46558938916SGarrett Wollman } 466db4f9cc7SJonathan Lemon } 46747c861ecSBrian Somers if (sum) { 46886425c62SRobert Watson IPSTAT_INC(ips_badsum); 469df8bae1dSRodney W. Grimes goto bad; 470df8bae1dSRodney W. Grimes } 471df8bae1dSRodney W. Grimes 47202b199f1SMax Laier #ifdef ALTQ 47302b199f1SMax Laier if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0) 47402b199f1SMax Laier /* packet is dropped by traffic conditioner */ 47502b199f1SMax Laier return; 47602b199f1SMax Laier #endif 47702b199f1SMax Laier 47821d172a3SGleb Smirnoff ip_len = ntohs(ip->ip_len); 47921d172a3SGleb Smirnoff if (ip_len < hlen) { 48086425c62SRobert Watson IPSTAT_INC(ips_badlen); 481df8bae1dSRodney W. Grimes goto bad; 482df8bae1dSRodney W. Grimes } 483df8bae1dSRodney W. Grimes 484df8bae1dSRodney W. Grimes /* 485df8bae1dSRodney W. Grimes * Check that the amount of data in the buffers 486df8bae1dSRodney W. Grimes * is as at least much as the IP header would have us expect. 487df8bae1dSRodney W. Grimes * Trim mbufs if longer than we expect. 488df8bae1dSRodney W. Grimes * Drop packet if shorter than we expect. 489df8bae1dSRodney W. Grimes */ 49021d172a3SGleb Smirnoff if (m->m_pkthdr.len < ip_len) { 49158938916SGarrett Wollman tooshort: 49286425c62SRobert Watson IPSTAT_INC(ips_tooshort); 493df8bae1dSRodney W. Grimes goto bad; 494df8bae1dSRodney W. Grimes } 49521d172a3SGleb Smirnoff if (m->m_pkthdr.len > ip_len) { 496df8bae1dSRodney W. Grimes if (m->m_len == m->m_pkthdr.len) { 49721d172a3SGleb Smirnoff m->m_len = ip_len; 49821d172a3SGleb Smirnoff m->m_pkthdr.len = ip_len; 499df8bae1dSRodney W. Grimes } else 50021d172a3SGleb Smirnoff m_adj(m, ip_len - m->m_pkthdr.len); 501df8bae1dSRodney W. Grimes } 502b8bc95cdSAdrian Chadd 503b2630c29SGeorge V. Neville-Neil #ifdef IPSEC 50414dd6717SSam Leffler /* 505ffe8cd7bSBjoern A. Zeeb * Bypass packet filtering for packets previously handled by IPsec. 50614dd6717SSam Leffler */ 507cc977adcSBjoern A. Zeeb if (ip_ipsec_filtertunnel(m)) 508c21fd232SAndre Oppermann goto passin; 509b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */ 5103f67c834SDon Lewis 511c4ac87eaSDarren Reed /* 512134ea224SSam Leffler * Run through list of hooks for input packets. 513f51f805fSSam Leffler * 514f51f805fSSam Leffler * NB: Beware of the destination address changing (e.g. 515f51f805fSSam Leffler * by NAT rewriting). When this happens, tell 516f51f805fSSam Leffler * ip_forward to do the right thing. 517c4ac87eaSDarren Reed */ 518c21fd232SAndre Oppermann 519c21fd232SAndre Oppermann /* Jump over all PFIL processing if hooks are not active. */ 5200b4b0b0fSJulian Elischer if (!PFIL_HOOKED(&V_inet_pfil_hook)) 521c21fd232SAndre Oppermann goto passin; 522c21fd232SAndre Oppermann 523f51f805fSSam Leffler odst = ip->ip_dst; 5240b4b0b0fSJulian Elischer if (pfil_run_hooks(&V_inet_pfil_hook, &m, ifp, PFIL_IN, NULL) != 0) 525beec8214SDarren Reed return; 526134ea224SSam Leffler if (m == NULL) /* consumed by filter */ 527c4ac87eaSDarren Reed return; 5289b932e9eSAndre Oppermann 529c4ac87eaSDarren Reed ip = mtod(m, struct ip *); 53002c1c707SAndre Oppermann dchg = (odst.s_addr != ip->ip_dst.s_addr); 5310aade26eSRobert Watson ifp = m->m_pkthdr.rcvif; 5329b932e9eSAndre Oppermann 5339b932e9eSAndre Oppermann if (m->m_flags & M_FASTFWD_OURS) { 5349b932e9eSAndre Oppermann m->m_flags &= ~M_FASTFWD_OURS; 5359b932e9eSAndre Oppermann goto ours; 5369b932e9eSAndre Oppermann } 537ffdbf9daSAndrey V. Elsukov if (m->m_flags & M_IP_NEXTHOP) { 538ffdbf9daSAndrey V. Elsukov dchg = (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL); 539ffdbf9daSAndrey V. Elsukov if (dchg != 0) { 540099dd043SAndre Oppermann /* 541ffdbf9daSAndrey V. Elsukov * Directly ship the packet on. This allows 542ffdbf9daSAndrey V. Elsukov * forwarding packets originally destined to us 543ffdbf9daSAndrey V. Elsukov * to some other directly connected host. 544099dd043SAndre Oppermann */ 545ffdbf9daSAndrey V. Elsukov ip_forward(m, 1); 546099dd043SAndre Oppermann return; 547099dd043SAndre Oppermann } 548ffdbf9daSAndrey V. Elsukov } 549c21fd232SAndre Oppermann passin: 55021d172a3SGleb Smirnoff 55121d172a3SGleb Smirnoff /* 552df8bae1dSRodney W. Grimes * Process options and, if not destined for us, 553df8bae1dSRodney W. Grimes * ship it on. ip_dooptions returns 1 when an 554df8bae1dSRodney W. Grimes * error was detected (causing an icmp message 555df8bae1dSRodney W. Grimes * to be sent and the original packet to be freed). 556df8bae1dSRodney W. Grimes */ 5579b932e9eSAndre Oppermann if (hlen > sizeof (struct ip) && ip_dooptions(m, 0)) 558c67b1d17SGarrett Wollman return; 559df8bae1dSRodney W. Grimes 560f0068c4aSGarrett Wollman /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no 561f0068c4aSGarrett Wollman * matter if it is destined to another node, or whether it is 562f0068c4aSGarrett Wollman * a multicast one, RSVP wants it! and prevents it from being forwarded 563f0068c4aSGarrett Wollman * anywhere else. Also checks if the rsvp daemon is running before 564f0068c4aSGarrett Wollman * grabbing the packet. 565f0068c4aSGarrett Wollman */ 566603724d3SBjoern A. Zeeb if (V_rsvp_on && ip->ip_p==IPPROTO_RSVP) 567f0068c4aSGarrett Wollman goto ours; 568f0068c4aSGarrett Wollman 569df8bae1dSRodney W. Grimes /* 570df8bae1dSRodney W. Grimes * Check our list of addresses, to see if the packet is for us. 571cc766e04SGarrett Wollman * If we don't have any addresses, assume any unicast packet 572cc766e04SGarrett Wollman * we receive might be for us (and let the upper layers deal 573cc766e04SGarrett Wollman * with it). 574df8bae1dSRodney W. Grimes */ 575603724d3SBjoern A. Zeeb if (TAILQ_EMPTY(&V_in_ifaddrhead) && 576cc766e04SGarrett Wollman (m->m_flags & (M_MCAST|M_BCAST)) == 0) 577cc766e04SGarrett Wollman goto ours; 578cc766e04SGarrett Wollman 5797538a9a0SJonathan Lemon /* 580823db0e9SDon Lewis * Enable a consistency check between the destination address 581823db0e9SDon Lewis * and the arrival interface for a unicast packet (the RFC 1122 582823db0e9SDon Lewis * strong ES model) if IP forwarding is disabled and the packet 583e15ae1b2SDon Lewis * is not locally generated and the packet is not subject to 584e15ae1b2SDon Lewis * 'ipfw fwd'. 5853f67c834SDon Lewis * 5863f67c834SDon Lewis * XXX - Checking also should be disabled if the destination 5873f67c834SDon Lewis * address is ipnat'ed to a different interface. 5883f67c834SDon Lewis * 589a8f12100SDon Lewis * XXX - Checking is incompatible with IP aliases added 5903f67c834SDon Lewis * to the loopback interface instead of the interface where 5913f67c834SDon Lewis * the packets are received. 592a9771948SGleb Smirnoff * 593a9771948SGleb Smirnoff * XXX - This is the case for carp vhost IPs as well so we 594a9771948SGleb Smirnoff * insert a workaround. If the packet got here, we already 595a9771948SGleb Smirnoff * checked with carp_iamatch() and carp_forus(). 596823db0e9SDon Lewis */ 597603724d3SBjoern A. Zeeb checkif = V_ip_checkinterface && (V_ipforwarding == 0) && 5980aade26eSRobert Watson ifp != NULL && ((ifp->if_flags & IFF_LOOPBACK) == 0) && 59954bfbd51SWill Andrews ifp->if_carp == NULL && (dchg == 0); 600823db0e9SDon Lewis 601ca925d9cSJonathan Lemon /* 602ca925d9cSJonathan Lemon * Check for exact addresses in the hash bucket. 603ca925d9cSJonathan Lemon */ 6042d9cfabaSRobert Watson /* IN_IFADDR_RLOCK(); */ 6059b932e9eSAndre Oppermann LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) { 606f9e354dfSJulian Elischer /* 607823db0e9SDon Lewis * If the address matches, verify that the packet 608823db0e9SDon Lewis * arrived via the correct interface if checking is 609823db0e9SDon Lewis * enabled. 610f9e354dfSJulian Elischer */ 6119b932e9eSAndre Oppermann if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr && 6128c0fec80SRobert Watson (!checkif || ia->ia_ifp == ifp)) { 6137caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ipackets, 1); 6147caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ibytes, 6157caf4ab7SGleb Smirnoff m->m_pkthdr.len); 6162d9cfabaSRobert Watson /* IN_IFADDR_RUNLOCK(); */ 617ed1ff184SJulian Elischer goto ours; 618ca925d9cSJonathan Lemon } 6198c0fec80SRobert Watson } 6202d9cfabaSRobert Watson /* IN_IFADDR_RUNLOCK(); */ 6212d9cfabaSRobert Watson 622823db0e9SDon Lewis /* 623ca925d9cSJonathan Lemon * Check for broadcast addresses. 624ca925d9cSJonathan Lemon * 625ca925d9cSJonathan Lemon * Only accept broadcast packets that arrive via the matching 626ca925d9cSJonathan Lemon * interface. Reception of forwarded directed broadcasts would 627ca925d9cSJonathan Lemon * be handled via ip_forward() and ether_output() with the loopback 628ca925d9cSJonathan Lemon * into the stack for SIMPLEX interfaces handled by ether_output(). 629823db0e9SDon Lewis */ 6300aade26eSRobert Watson if (ifp != NULL && ifp->if_flags & IFF_BROADCAST) { 631137f91e8SJohn Baldwin IF_ADDR_RLOCK(ifp); 6320aade26eSRobert Watson TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 633ca925d9cSJonathan Lemon if (ifa->ifa_addr->sa_family != AF_INET) 634ca925d9cSJonathan Lemon continue; 635ca925d9cSJonathan Lemon ia = ifatoia(ifa); 636df8bae1dSRodney W. Grimes if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr == 6370aade26eSRobert Watson ip->ip_dst.s_addr) { 6387caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ipackets, 1); 6397caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ibytes, 6407caf4ab7SGleb Smirnoff m->m_pkthdr.len); 641137f91e8SJohn Baldwin IF_ADDR_RUNLOCK(ifp); 642df8bae1dSRodney W. Grimes goto ours; 6430aade26eSRobert Watson } 6440ac40133SBrian Somers #ifdef BOOTP_COMPAT 6450aade26eSRobert Watson if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY) { 6467caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ipackets, 1); 6477caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ibytes, 6487caf4ab7SGleb Smirnoff m->m_pkthdr.len); 649137f91e8SJohn Baldwin IF_ADDR_RUNLOCK(ifp); 650ca925d9cSJonathan Lemon goto ours; 6510aade26eSRobert Watson } 6520ac40133SBrian Somers #endif 653df8bae1dSRodney W. Grimes } 654137f91e8SJohn Baldwin IF_ADDR_RUNLOCK(ifp); 65519e5b0a7SRobert Watson ia = NULL; 656df8bae1dSRodney W. Grimes } 657f8429ca2SBruce M Simpson /* RFC 3927 2.7: Do not forward datagrams for 169.254.0.0/16. */ 658f8429ca2SBruce M Simpson if (IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))) { 65986425c62SRobert Watson IPSTAT_INC(ips_cantforward); 660f8429ca2SBruce M Simpson m_freem(m); 661f8429ca2SBruce M Simpson return; 662f8429ca2SBruce M Simpson } 663df8bae1dSRodney W. Grimes if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { 664603724d3SBjoern A. Zeeb if (V_ip_mrouter) { 665df8bae1dSRodney W. Grimes /* 666df8bae1dSRodney W. Grimes * If we are acting as a multicast router, all 667df8bae1dSRodney W. Grimes * incoming multicast packets are passed to the 668df8bae1dSRodney W. Grimes * kernel-level multicast forwarding function. 669df8bae1dSRodney W. Grimes * The packet is returned (relatively) intact; if 670df8bae1dSRodney W. Grimes * ip_mforward() returns a non-zero value, the packet 671df8bae1dSRodney W. Grimes * must be discarded, else it may be accepted below. 672df8bae1dSRodney W. Grimes */ 6730aade26eSRobert Watson if (ip_mforward && ip_mforward(ip, ifp, m, 0) != 0) { 67486425c62SRobert Watson IPSTAT_INC(ips_cantforward); 675df8bae1dSRodney W. Grimes m_freem(m); 676c67b1d17SGarrett Wollman return; 677df8bae1dSRodney W. Grimes } 678df8bae1dSRodney W. Grimes 679df8bae1dSRodney W. Grimes /* 68011612afaSDima Dorfman * The process-level routing daemon needs to receive 681df8bae1dSRodney W. Grimes * all multicast IGMP packets, whether or not this 682df8bae1dSRodney W. Grimes * host belongs to their destination groups. 683df8bae1dSRodney W. Grimes */ 684df8bae1dSRodney W. Grimes if (ip->ip_p == IPPROTO_IGMP) 685df8bae1dSRodney W. Grimes goto ours; 68686425c62SRobert Watson IPSTAT_INC(ips_forward); 687df8bae1dSRodney W. Grimes } 688df8bae1dSRodney W. Grimes /* 689d10910e6SBruce M Simpson * Assume the packet is for us, to avoid prematurely taking 690d10910e6SBruce M Simpson * a lock on the in_multi hash. Protocols must perform 691d10910e6SBruce M Simpson * their own filtering and update statistics accordingly. 692df8bae1dSRodney W. Grimes */ 693df8bae1dSRodney W. Grimes goto ours; 694df8bae1dSRodney W. Grimes } 695df8bae1dSRodney W. Grimes if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST) 696df8bae1dSRodney W. Grimes goto ours; 697df8bae1dSRodney W. Grimes if (ip->ip_dst.s_addr == INADDR_ANY) 698df8bae1dSRodney W. Grimes goto ours; 699df8bae1dSRodney W. Grimes 7006a800098SYoshinobu Inoue /* 701df8bae1dSRodney W. Grimes * Not for us; forward if possible and desirable. 702df8bae1dSRodney W. Grimes */ 703603724d3SBjoern A. Zeeb if (V_ipforwarding == 0) { 70486425c62SRobert Watson IPSTAT_INC(ips_cantforward); 705df8bae1dSRodney W. Grimes m_freem(m); 706546f251bSChris D. Faulhaber } else { 7079b932e9eSAndre Oppermann ip_forward(m, dchg); 708546f251bSChris D. Faulhaber } 709c67b1d17SGarrett Wollman return; 710df8bae1dSRodney W. Grimes 711df8bae1dSRodney W. Grimes ours: 712d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH 713d0ebc0d2SYaroslav Tykhiy /* 714d0ebc0d2SYaroslav Tykhiy * IPSTEALTH: Process non-routing options only 715d0ebc0d2SYaroslav Tykhiy * if the packet is destined for us. 716d0ebc0d2SYaroslav Tykhiy */ 7177caf4ab7SGleb Smirnoff if (V_ipstealth && hlen > sizeof (struct ip) && ip_dooptions(m, 1)) 718d0ebc0d2SYaroslav Tykhiy return; 719d0ebc0d2SYaroslav Tykhiy #endif /* IPSTEALTH */ 720d0ebc0d2SYaroslav Tykhiy 72163f8d699SJordan K. Hubbard /* 722b6ea1aa5SRuslan Ermilov * Attempt reassembly; if it succeeds, proceed. 723ac9d7e26SMax Laier * ip_reass() will return a different mbuf. 724df8bae1dSRodney W. Grimes */ 7258f134647SGleb Smirnoff if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) { 726aa69c612SGleb Smirnoff /* XXXGL: shouldn't we save & set m_flags? */ 727f0cada84SAndre Oppermann m = ip_reass(m); 728f0cada84SAndre Oppermann if (m == NULL) 729c67b1d17SGarrett Wollman return; 7306a800098SYoshinobu Inoue ip = mtod(m, struct ip *); 7317e2df452SRuslan Ermilov /* Get the header length of the reassembled packet */ 73253be11f6SPoul-Henning Kamp hlen = ip->ip_hl << 2; 733f0cada84SAndre Oppermann } 734f0cada84SAndre Oppermann 735b2630c29SGeorge V. Neville-Neil #ifdef IPSEC 73633841545SHajimu UMEMOTO /* 73733841545SHajimu UMEMOTO * enforce IPsec policy checking if we are seeing last header. 73833841545SHajimu UMEMOTO * note that we do not visit this with protocols with pcb layer 73933841545SHajimu UMEMOTO * code - like udp/tcp/raw ip. 74033841545SHajimu UMEMOTO */ 741e58320f1SAndrey V. Elsukov if (ip_ipsec_input(m, ip->ip_p) != 0) 74233841545SHajimu UMEMOTO goto bad; 743b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */ 74433841545SHajimu UMEMOTO 745df8bae1dSRodney W. Grimes /* 746df8bae1dSRodney W. Grimes * Switch out to protocol's input routine. 747df8bae1dSRodney W. Grimes */ 74886425c62SRobert Watson IPSTAT_INC(ips_delivered); 7499b932e9eSAndre Oppermann 7508f5a8818SKevin Lo (*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p); 751c67b1d17SGarrett Wollman return; 752df8bae1dSRodney W. Grimes bad: 753df8bae1dSRodney W. Grimes m_freem(m); 754c67b1d17SGarrett Wollman } 755c67b1d17SGarrett Wollman 756c67b1d17SGarrett Wollman /* 757df8bae1dSRodney W. Grimes * IP timer processing; 758df8bae1dSRodney W. Grimes * if a timer expires on a reassembly 759df8bae1dSRodney W. Grimes * queue, discard it. 760df8bae1dSRodney W. Grimes */ 761df8bae1dSRodney W. Grimes void 762f2565d68SRobert Watson ip_slowtimo(void) 763df8bae1dSRodney W. Grimes { 7648b615593SMarko Zec VNET_ITERATOR_DECL(vnet_iter); 765df8bae1dSRodney W. Grimes 7665ee847d3SRobert Watson VNET_LIST_RLOCK_NOSLEEP(); 7678b615593SMarko Zec VNET_FOREACH(vnet_iter) { 7688b615593SMarko Zec CURVNET_SET(vnet_iter); 7691dbefcc0SGleb Smirnoff ipreass_slowtimo(); 7708b615593SMarko Zec CURVNET_RESTORE(); 7718b615593SMarko Zec } 7725ee847d3SRobert Watson VNET_LIST_RUNLOCK_NOSLEEP(); 773df8bae1dSRodney W. Grimes } 774df8bae1dSRodney W. Grimes 7759802380eSBjoern A. Zeeb void 7769802380eSBjoern A. Zeeb ip_drain(void) 7779802380eSBjoern A. Zeeb { 7789802380eSBjoern A. Zeeb VNET_ITERATOR_DECL(vnet_iter); 7799802380eSBjoern A. Zeeb 7809802380eSBjoern A. Zeeb VNET_LIST_RLOCK_NOSLEEP(); 7819802380eSBjoern A. Zeeb VNET_FOREACH(vnet_iter) { 7829802380eSBjoern A. Zeeb CURVNET_SET(vnet_iter); 7831dbefcc0SGleb Smirnoff ipreass_drain(); 7848b615593SMarko Zec CURVNET_RESTORE(); 7858b615593SMarko Zec } 7865ee847d3SRobert Watson VNET_LIST_RUNLOCK_NOSLEEP(); 787df8bae1dSRodney W. Grimes } 788df8bae1dSRodney W. Grimes 789df8bae1dSRodney W. Grimes /* 790de38924dSAndre Oppermann * The protocol to be inserted into ip_protox[] must be already registered 791de38924dSAndre Oppermann * in inetsw[], either statically or through pf_proto_register(). 792de38924dSAndre Oppermann */ 793de38924dSAndre Oppermann int 7941b48d245SBjoern A. Zeeb ipproto_register(short ipproto) 795de38924dSAndre Oppermann { 796de38924dSAndre Oppermann struct protosw *pr; 797de38924dSAndre Oppermann 798de38924dSAndre Oppermann /* Sanity checks. */ 7991b48d245SBjoern A. Zeeb if (ipproto <= 0 || ipproto >= IPPROTO_MAX) 800de38924dSAndre Oppermann return (EPROTONOSUPPORT); 801de38924dSAndre Oppermann 802de38924dSAndre Oppermann /* 803de38924dSAndre Oppermann * The protocol slot must not be occupied by another protocol 804de38924dSAndre Oppermann * already. An index pointing to IPPROTO_RAW is unused. 805de38924dSAndre Oppermann */ 806de38924dSAndre Oppermann pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 807de38924dSAndre Oppermann if (pr == NULL) 808de38924dSAndre Oppermann return (EPFNOSUPPORT); 809de38924dSAndre Oppermann if (ip_protox[ipproto] != pr - inetsw) /* IPPROTO_RAW */ 810de38924dSAndre Oppermann return (EEXIST); 811de38924dSAndre Oppermann 812de38924dSAndre Oppermann /* Find the protocol position in inetsw[] and set the index. */ 813de38924dSAndre Oppermann for (pr = inetdomain.dom_protosw; 814de38924dSAndre Oppermann pr < inetdomain.dom_protoswNPROTOSW; pr++) { 815de38924dSAndre Oppermann if (pr->pr_domain->dom_family == PF_INET && 816de38924dSAndre Oppermann pr->pr_protocol && pr->pr_protocol == ipproto) { 817de38924dSAndre Oppermann ip_protox[pr->pr_protocol] = pr - inetsw; 818de38924dSAndre Oppermann return (0); 819de38924dSAndre Oppermann } 820de38924dSAndre Oppermann } 821de38924dSAndre Oppermann return (EPROTONOSUPPORT); 822de38924dSAndre Oppermann } 823de38924dSAndre Oppermann 824de38924dSAndre Oppermann int 8251b48d245SBjoern A. Zeeb ipproto_unregister(short ipproto) 826de38924dSAndre Oppermann { 827de38924dSAndre Oppermann struct protosw *pr; 828de38924dSAndre Oppermann 829de38924dSAndre Oppermann /* Sanity checks. */ 8301b48d245SBjoern A. Zeeb if (ipproto <= 0 || ipproto >= IPPROTO_MAX) 831de38924dSAndre Oppermann return (EPROTONOSUPPORT); 832de38924dSAndre Oppermann 833de38924dSAndre Oppermann /* Check if the protocol was indeed registered. */ 834de38924dSAndre Oppermann pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 835de38924dSAndre Oppermann if (pr == NULL) 836de38924dSAndre Oppermann return (EPFNOSUPPORT); 837de38924dSAndre Oppermann if (ip_protox[ipproto] == pr - inetsw) /* IPPROTO_RAW */ 838de38924dSAndre Oppermann return (ENOENT); 839de38924dSAndre Oppermann 840de38924dSAndre Oppermann /* Reset the protocol slot to IPPROTO_RAW. */ 841de38924dSAndre Oppermann ip_protox[ipproto] = pr - inetsw; 842de38924dSAndre Oppermann return (0); 843de38924dSAndre Oppermann } 844de38924dSAndre Oppermann 845df8bae1dSRodney W. Grimes /* 8468c0fec80SRobert Watson * Given address of next destination (final or next hop), return (referenced) 8478c0fec80SRobert Watson * internet address info of interface to be used to get there. 848df8bae1dSRodney W. Grimes */ 849bd714208SRuslan Ermilov struct in_ifaddr * 8508b07e49aSJulian Elischer ip_rtaddr(struct in_addr dst, u_int fibnum) 851df8bae1dSRodney W. Grimes { 85297d8d152SAndre Oppermann struct route sro; 85302c1c707SAndre Oppermann struct sockaddr_in *sin; 85419e5b0a7SRobert Watson struct in_ifaddr *ia; 855df8bae1dSRodney W. Grimes 8560cfbbe3bSAndre Oppermann bzero(&sro, sizeof(sro)); 85797d8d152SAndre Oppermann sin = (struct sockaddr_in *)&sro.ro_dst; 858df8bae1dSRodney W. Grimes sin->sin_family = AF_INET; 859df8bae1dSRodney W. Grimes sin->sin_len = sizeof(*sin); 860df8bae1dSRodney W. Grimes sin->sin_addr = dst; 8616e6b3f7cSQing Li in_rtalloc_ign(&sro, 0, fibnum); 862df8bae1dSRodney W. Grimes 86397d8d152SAndre Oppermann if (sro.ro_rt == NULL) 86402410549SRobert Watson return (NULL); 86502c1c707SAndre Oppermann 86619e5b0a7SRobert Watson ia = ifatoia(sro.ro_rt->rt_ifa); 86719e5b0a7SRobert Watson ifa_ref(&ia->ia_ifa); 86897d8d152SAndre Oppermann RTFREE(sro.ro_rt); 86919e5b0a7SRobert Watson return (ia); 870df8bae1dSRodney W. Grimes } 871df8bae1dSRodney W. Grimes 872df8bae1dSRodney W. Grimes u_char inetctlerrmap[PRC_NCMDS] = { 873df8bae1dSRodney W. Grimes 0, 0, 0, 0, 874df8bae1dSRodney W. Grimes 0, EMSGSIZE, EHOSTDOWN, EHOSTUNREACH, 875df8bae1dSRodney W. Grimes EHOSTUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED, 876df8bae1dSRodney W. Grimes EMSGSIZE, EHOSTUNREACH, 0, 0, 877fcaf9f91SMike Silbersack 0, 0, EHOSTUNREACH, 0, 8783b8123b7SJesper Skriver ENOPROTOOPT, ECONNREFUSED 879df8bae1dSRodney W. Grimes }; 880df8bae1dSRodney W. Grimes 881df8bae1dSRodney W. Grimes /* 882df8bae1dSRodney W. Grimes * Forward a packet. If some error occurs return the sender 883df8bae1dSRodney W. Grimes * an icmp packet. Note we can't always generate a meaningful 884df8bae1dSRodney W. Grimes * icmp message because icmp doesn't have a large enough repertoire 885df8bae1dSRodney W. Grimes * of codes and types. 886df8bae1dSRodney W. Grimes * 887df8bae1dSRodney W. Grimes * If not forwarding, just drop the packet. This could be confusing 888df8bae1dSRodney W. Grimes * if ipforwarding was zero but some routing protocol was advancing 889df8bae1dSRodney W. Grimes * us as a gateway to somewhere. However, we must let the routing 890df8bae1dSRodney W. Grimes * protocol deal with that. 891df8bae1dSRodney W. Grimes * 892df8bae1dSRodney W. Grimes * The srcrt parameter indicates whether the packet is being forwarded 893df8bae1dSRodney W. Grimes * via a source route. 894df8bae1dSRodney W. Grimes */ 8959b932e9eSAndre Oppermann void 8969b932e9eSAndre Oppermann ip_forward(struct mbuf *m, int srcrt) 897df8bae1dSRodney W. Grimes { 8982b25acc1SLuigi Rizzo struct ip *ip = mtod(m, struct ip *); 899efbad259SEdward Tomasz Napierala struct in_ifaddr *ia; 900df8bae1dSRodney W. Grimes struct mbuf *mcopy; 901d14122b0SErmal Luçi struct sockaddr_in *sin; 9029b932e9eSAndre Oppermann struct in_addr dest; 903b835b6feSBjoern A. Zeeb struct route ro; 904c773494eSAndre Oppermann int error, type = 0, code = 0, mtu = 0; 9053efc3014SJulian Elischer 9069b932e9eSAndre Oppermann if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) { 90786425c62SRobert Watson IPSTAT_INC(ips_cantforward); 908df8bae1dSRodney W. Grimes m_freem(m); 909df8bae1dSRodney W. Grimes return; 910df8bae1dSRodney W. Grimes } 9118922ddbeSAndrey V. Elsukov #ifdef IPSEC 9128922ddbeSAndrey V. Elsukov if (ip_ipsec_fwd(m) != 0) { 9138922ddbeSAndrey V. Elsukov IPSTAT_INC(ips_cantforward); 9148922ddbeSAndrey V. Elsukov m_freem(m); 9158922ddbeSAndrey V. Elsukov return; 9168922ddbeSAndrey V. Elsukov } 9178922ddbeSAndrey V. Elsukov #endif /* IPSEC */ 9181b968362SDag-Erling Smørgrav #ifdef IPSTEALTH 919603724d3SBjoern A. Zeeb if (!V_ipstealth) { 9201b968362SDag-Erling Smørgrav #endif 921df8bae1dSRodney W. Grimes if (ip->ip_ttl <= IPTTLDEC) { 9221b968362SDag-Erling Smørgrav icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 92302c1c707SAndre Oppermann 0, 0); 924df8bae1dSRodney W. Grimes return; 925df8bae1dSRodney W. Grimes } 9261b968362SDag-Erling Smørgrav #ifdef IPSTEALTH 9271b968362SDag-Erling Smørgrav } 9281b968362SDag-Erling Smørgrav #endif 929df8bae1dSRodney W. Grimes 930d14122b0SErmal Luçi bzero(&ro, sizeof(ro)); 931d14122b0SErmal Luçi sin = (struct sockaddr_in *)&ro.ro_dst; 932d14122b0SErmal Luçi sin->sin_family = AF_INET; 933d14122b0SErmal Luçi sin->sin_len = sizeof(*sin); 934d14122b0SErmal Luçi sin->sin_addr = ip->ip_dst; 935d14122b0SErmal Luçi #ifdef RADIX_MPATH 936d14122b0SErmal Luçi rtalloc_mpath_fib(&ro, 937d14122b0SErmal Luçi ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr), 938d14122b0SErmal Luçi M_GETFIB(m)); 939d14122b0SErmal Luçi #else 940d14122b0SErmal Luçi in_rtalloc_ign(&ro, 0, M_GETFIB(m)); 941d14122b0SErmal Luçi #endif 942d14122b0SErmal Luçi if (ro.ro_rt != NULL) { 943d14122b0SErmal Luçi ia = ifatoia(ro.ro_rt->rt_ifa); 944d14122b0SErmal Luçi ifa_ref(&ia->ia_ifa); 94556844a62SErmal Luçi } else 94656844a62SErmal Luçi ia = NULL; 947efbad259SEdward Tomasz Napierala #ifndef IPSEC 948efbad259SEdward Tomasz Napierala /* 949efbad259SEdward Tomasz Napierala * 'ia' may be NULL if there is no route for this destination. 950efbad259SEdward Tomasz Napierala * In case of IPsec, Don't discard it just yet, but pass it to 951efbad259SEdward Tomasz Napierala * ip_output in case of outgoing IPsec policy. 952efbad259SEdward Tomasz Napierala */ 953d23d475fSGuido van Rooij if (!srcrt && ia == NULL) { 95402c1c707SAndre Oppermann icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0); 955d14122b0SErmal Luçi RO_RTFREE(&ro); 956df8bae1dSRodney W. Grimes return; 95702c1c707SAndre Oppermann } 958efbad259SEdward Tomasz Napierala #endif 959df8bae1dSRodney W. Grimes 960df8bae1dSRodney W. Grimes /* 961bfef7ed4SIan Dowse * Save the IP header and at most 8 bytes of the payload, 962bfef7ed4SIan Dowse * in case we need to generate an ICMP message to the src. 963bfef7ed4SIan Dowse * 9644d2e3692SLuigi Rizzo * XXX this can be optimized a lot by saving the data in a local 9654d2e3692SLuigi Rizzo * buffer on the stack (72 bytes at most), and only allocating the 9664d2e3692SLuigi Rizzo * mbuf if really necessary. The vast majority of the packets 9674d2e3692SLuigi Rizzo * are forwarded without having to send an ICMP back (either 9684d2e3692SLuigi Rizzo * because unnecessary, or because rate limited), so we are 9694d2e3692SLuigi Rizzo * really we are wasting a lot of work here. 9704d2e3692SLuigi Rizzo * 971bfef7ed4SIan Dowse * We don't use m_copy() because it might return a reference 972bfef7ed4SIan Dowse * to a shared cluster. Both this function and ip_output() 973bfef7ed4SIan Dowse * assume exclusive access to the IP header in `m', so any 974bfef7ed4SIan Dowse * data in a cluster may change before we reach icmp_error(). 975df8bae1dSRodney W. Grimes */ 976dc4ad05eSGleb Smirnoff mcopy = m_gethdr(M_NOWAIT, m->m_type); 977eb1b1807SGleb Smirnoff if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_NOWAIT)) { 9789967cafcSSam Leffler /* 9799967cafcSSam Leffler * It's probably ok if the pkthdr dup fails (because 9809967cafcSSam Leffler * the deep copy of the tag chain failed), but for now 9819967cafcSSam Leffler * be conservative and just discard the copy since 9829967cafcSSam Leffler * code below may some day want the tags. 9839967cafcSSam Leffler */ 9849967cafcSSam Leffler m_free(mcopy); 9859967cafcSSam Leffler mcopy = NULL; 9869967cafcSSam Leffler } 987bfef7ed4SIan Dowse if (mcopy != NULL) { 9888f134647SGleb Smirnoff mcopy->m_len = min(ntohs(ip->ip_len), M_TRAILINGSPACE(mcopy)); 989e6b0a570SBruce M Simpson mcopy->m_pkthdr.len = mcopy->m_len; 990bfef7ed4SIan Dowse m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t)); 991bfef7ed4SIan Dowse } 99204287599SRuslan Ermilov 99304287599SRuslan Ermilov #ifdef IPSTEALTH 994603724d3SBjoern A. Zeeb if (!V_ipstealth) { 99504287599SRuslan Ermilov #endif 99604287599SRuslan Ermilov ip->ip_ttl -= IPTTLDEC; 99704287599SRuslan Ermilov #ifdef IPSTEALTH 99804287599SRuslan Ermilov } 99904287599SRuslan Ermilov #endif 1000df8bae1dSRodney W. Grimes 1001df8bae1dSRodney W. Grimes /* 1002df8bae1dSRodney W. Grimes * If forwarding packet using same interface that it came in on, 1003df8bae1dSRodney W. Grimes * perhaps should send a redirect to sender to shortcut a hop. 1004df8bae1dSRodney W. Grimes * Only send redirect if source is sending directly to us, 1005df8bae1dSRodney W. Grimes * and if packet was not source routed (or has any options). 1006df8bae1dSRodney W. Grimes * Also, don't send redirect if forwarding using a default route 1007df8bae1dSRodney W. Grimes * or a route modified by a redirect. 1008df8bae1dSRodney W. Grimes */ 10099b932e9eSAndre Oppermann dest.s_addr = 0; 1010efbad259SEdward Tomasz Napierala if (!srcrt && V_ipsendredirects && 1011efbad259SEdward Tomasz Napierala ia != NULL && ia->ia_ifp == m->m_pkthdr.rcvif) { 101202c1c707SAndre Oppermann struct rtentry *rt; 101302c1c707SAndre Oppermann 101402c1c707SAndre Oppermann rt = ro.ro_rt; 101502c1c707SAndre Oppermann 101602c1c707SAndre Oppermann if (rt && (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 && 10179b932e9eSAndre Oppermann satosin(rt_key(rt))->sin_addr.s_addr != 0) { 1018df8bae1dSRodney W. Grimes #define RTA(rt) ((struct in_ifaddr *)(rt->rt_ifa)) 1019df8bae1dSRodney W. Grimes u_long src = ntohl(ip->ip_src.s_addr); 1020df8bae1dSRodney W. Grimes 1021df8bae1dSRodney W. Grimes if (RTA(rt) && 1022df8bae1dSRodney W. Grimes (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) { 1023df8bae1dSRodney W. Grimes if (rt->rt_flags & RTF_GATEWAY) 10249b932e9eSAndre Oppermann dest.s_addr = satosin(rt->rt_gateway)->sin_addr.s_addr; 1025df8bae1dSRodney W. Grimes else 10269b932e9eSAndre Oppermann dest.s_addr = ip->ip_dst.s_addr; 1027df8bae1dSRodney W. Grimes /* Router requirements says to only send host redirects */ 1028df8bae1dSRodney W. Grimes type = ICMP_REDIRECT; 1029df8bae1dSRodney W. Grimes code = ICMP_REDIRECT_HOST; 1030df8bae1dSRodney W. Grimes } 1031df8bae1dSRodney W. Grimes } 103202c1c707SAndre Oppermann } 1033df8bae1dSRodney W. Grimes 1034b835b6feSBjoern A. Zeeb error = ip_output(m, NULL, &ro, IP_FORWARDING, NULL, NULL); 1035b835b6feSBjoern A. Zeeb 1036b835b6feSBjoern A. Zeeb if (error == EMSGSIZE && ro.ro_rt) 1037e3a7aa6fSGleb Smirnoff mtu = ro.ro_rt->rt_mtu; 1038bf984051SGleb Smirnoff RO_RTFREE(&ro); 1039b835b6feSBjoern A. Zeeb 1040df8bae1dSRodney W. Grimes if (error) 104186425c62SRobert Watson IPSTAT_INC(ips_cantforward); 1042df8bae1dSRodney W. Grimes else { 104386425c62SRobert Watson IPSTAT_INC(ips_forward); 1044df8bae1dSRodney W. Grimes if (type) 104586425c62SRobert Watson IPSTAT_INC(ips_redirectsent); 1046df8bae1dSRodney W. Grimes else { 10479188b4a1SAndre Oppermann if (mcopy) 1048df8bae1dSRodney W. Grimes m_freem(mcopy); 10498c0fec80SRobert Watson if (ia != NULL) 10508c0fec80SRobert Watson ifa_free(&ia->ia_ifa); 1051df8bae1dSRodney W. Grimes return; 1052df8bae1dSRodney W. Grimes } 1053df8bae1dSRodney W. Grimes } 10548c0fec80SRobert Watson if (mcopy == NULL) { 10558c0fec80SRobert Watson if (ia != NULL) 10568c0fec80SRobert Watson ifa_free(&ia->ia_ifa); 1057df8bae1dSRodney W. Grimes return; 10588c0fec80SRobert Watson } 1059df8bae1dSRodney W. Grimes 1060df8bae1dSRodney W. Grimes switch (error) { 1061df8bae1dSRodney W. Grimes 1062df8bae1dSRodney W. Grimes case 0: /* forwarded, but need redirect */ 1063df8bae1dSRodney W. Grimes /* type, code set above */ 1064df8bae1dSRodney W. Grimes break; 1065df8bae1dSRodney W. Grimes 1066efbad259SEdward Tomasz Napierala case ENETUNREACH: 1067df8bae1dSRodney W. Grimes case EHOSTUNREACH: 1068df8bae1dSRodney W. Grimes case ENETDOWN: 1069df8bae1dSRodney W. Grimes case EHOSTDOWN: 1070df8bae1dSRodney W. Grimes default: 1071df8bae1dSRodney W. Grimes type = ICMP_UNREACH; 1072df8bae1dSRodney W. Grimes code = ICMP_UNREACH_HOST; 1073df8bae1dSRodney W. Grimes break; 1074df8bae1dSRodney W. Grimes 1075df8bae1dSRodney W. Grimes case EMSGSIZE: 1076df8bae1dSRodney W. Grimes type = ICMP_UNREACH; 1077df8bae1dSRodney W. Grimes code = ICMP_UNREACH_NEEDFRAG; 10781dfcf0d2SAndre Oppermann 1079b2630c29SGeorge V. Neville-Neil #ifdef IPSEC 1080b835b6feSBjoern A. Zeeb /* 1081b835b6feSBjoern A. Zeeb * If IPsec is configured for this path, 1082b835b6feSBjoern A. Zeeb * override any possibly mtu value set by ip_output. 1083b835b6feSBjoern A. Zeeb */ 10841c044382SBjoern A. Zeeb mtu = ip_ipsec_mtu(mcopy, mtu); 1085b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */ 10869b932e9eSAndre Oppermann /* 1087b835b6feSBjoern A. Zeeb * If the MTU was set before make sure we are below the 1088b835b6feSBjoern A. Zeeb * interface MTU. 1089ab48768bSAndre Oppermann * If the MTU wasn't set before use the interface mtu or 1090ab48768bSAndre Oppermann * fall back to the next smaller mtu step compared to the 1091ab48768bSAndre Oppermann * current packet size. 10929b932e9eSAndre Oppermann */ 1093b835b6feSBjoern A. Zeeb if (mtu != 0) { 1094b835b6feSBjoern A. Zeeb if (ia != NULL) 1095b835b6feSBjoern A. Zeeb mtu = min(mtu, ia->ia_ifp->if_mtu); 1096b835b6feSBjoern A. Zeeb } else { 1097ab48768bSAndre Oppermann if (ia != NULL) 1098c773494eSAndre Oppermann mtu = ia->ia_ifp->if_mtu; 1099ab48768bSAndre Oppermann else 11008f134647SGleb Smirnoff mtu = ip_next_mtu(ntohs(ip->ip_len), 0); 1101ab48768bSAndre Oppermann } 110286425c62SRobert Watson IPSTAT_INC(ips_cantfrag); 1103df8bae1dSRodney W. Grimes break; 1104df8bae1dSRodney W. Grimes 1105df8bae1dSRodney W. Grimes case ENOBUFS: 11063a06e3e0SRuslan Ermilov case EACCES: /* ipfw denied packet */ 11073a06e3e0SRuslan Ermilov m_freem(mcopy); 11088c0fec80SRobert Watson if (ia != NULL) 11098c0fec80SRobert Watson ifa_free(&ia->ia_ifa); 11103a06e3e0SRuslan Ermilov return; 1111df8bae1dSRodney W. Grimes } 11128c0fec80SRobert Watson if (ia != NULL) 11138c0fec80SRobert Watson ifa_free(&ia->ia_ifa); 1114c773494eSAndre Oppermann icmp_error(mcopy, type, code, dest.s_addr, mtu); 1115df8bae1dSRodney W. Grimes } 1116df8bae1dSRodney W. Grimes 111782c23ebaSBill Fenner void 1118f2565d68SRobert Watson ip_savecontrol(struct inpcb *inp, struct mbuf **mp, struct ip *ip, 1119f2565d68SRobert Watson struct mbuf *m) 112082c23ebaSBill Fenner { 11218b615593SMarko Zec 1122be8a62e8SPoul-Henning Kamp if (inp->inp_socket->so_options & (SO_BINTIME | SO_TIMESTAMP)) { 1123be8a62e8SPoul-Henning Kamp struct bintime bt; 1124be8a62e8SPoul-Henning Kamp 1125be8a62e8SPoul-Henning Kamp bintime(&bt); 1126be8a62e8SPoul-Henning Kamp if (inp->inp_socket->so_options & SO_BINTIME) { 1127be8a62e8SPoul-Henning Kamp *mp = sbcreatecontrol((caddr_t)&bt, sizeof(bt), 1128be8a62e8SPoul-Henning Kamp SCM_BINTIME, SOL_SOCKET); 1129be8a62e8SPoul-Henning Kamp if (*mp) 1130be8a62e8SPoul-Henning Kamp mp = &(*mp)->m_next; 1131be8a62e8SPoul-Henning Kamp } 113282c23ebaSBill Fenner if (inp->inp_socket->so_options & SO_TIMESTAMP) { 113382c23ebaSBill Fenner struct timeval tv; 113482c23ebaSBill Fenner 1135be8a62e8SPoul-Henning Kamp bintime2timeval(&bt, &tv); 113682c23ebaSBill Fenner *mp = sbcreatecontrol((caddr_t)&tv, sizeof(tv), 113782c23ebaSBill Fenner SCM_TIMESTAMP, SOL_SOCKET); 113882c23ebaSBill Fenner if (*mp) 113982c23ebaSBill Fenner mp = &(*mp)->m_next; 11404cc20ab1SSeigo Tanimura } 1141be8a62e8SPoul-Henning Kamp } 114282c23ebaSBill Fenner if (inp->inp_flags & INP_RECVDSTADDR) { 114382c23ebaSBill Fenner *mp = sbcreatecontrol((caddr_t)&ip->ip_dst, 114482c23ebaSBill Fenner sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP); 114582c23ebaSBill Fenner if (*mp) 114682c23ebaSBill Fenner mp = &(*mp)->m_next; 114782c23ebaSBill Fenner } 11484957466bSMatthew N. Dodd if (inp->inp_flags & INP_RECVTTL) { 11494957466bSMatthew N. Dodd *mp = sbcreatecontrol((caddr_t)&ip->ip_ttl, 11504957466bSMatthew N. Dodd sizeof(u_char), IP_RECVTTL, IPPROTO_IP); 11514957466bSMatthew N. Dodd if (*mp) 11524957466bSMatthew N. Dodd mp = &(*mp)->m_next; 11534957466bSMatthew N. Dodd } 115482c23ebaSBill Fenner #ifdef notyet 115582c23ebaSBill Fenner /* XXX 115682c23ebaSBill Fenner * Moving these out of udp_input() made them even more broken 115782c23ebaSBill Fenner * than they already were. 115882c23ebaSBill Fenner */ 115982c23ebaSBill Fenner /* options were tossed already */ 116082c23ebaSBill Fenner if (inp->inp_flags & INP_RECVOPTS) { 116182c23ebaSBill Fenner *mp = sbcreatecontrol((caddr_t)opts_deleted_above, 116282c23ebaSBill Fenner sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP); 116382c23ebaSBill Fenner if (*mp) 116482c23ebaSBill Fenner mp = &(*mp)->m_next; 116582c23ebaSBill Fenner } 116682c23ebaSBill Fenner /* ip_srcroute doesn't do what we want here, need to fix */ 116782c23ebaSBill Fenner if (inp->inp_flags & INP_RECVRETOPTS) { 1168e0982661SAndre Oppermann *mp = sbcreatecontrol((caddr_t)ip_srcroute(m), 116982c23ebaSBill Fenner sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP); 117082c23ebaSBill Fenner if (*mp) 117182c23ebaSBill Fenner mp = &(*mp)->m_next; 117282c23ebaSBill Fenner } 117382c23ebaSBill Fenner #endif 117482c23ebaSBill Fenner if (inp->inp_flags & INP_RECVIF) { 1175d314ad7bSJulian Elischer struct ifnet *ifp; 1176d314ad7bSJulian Elischer struct sdlbuf { 117782c23ebaSBill Fenner struct sockaddr_dl sdl; 1178d314ad7bSJulian Elischer u_char pad[32]; 1179d314ad7bSJulian Elischer } sdlbuf; 1180d314ad7bSJulian Elischer struct sockaddr_dl *sdp; 1181d314ad7bSJulian Elischer struct sockaddr_dl *sdl2 = &sdlbuf.sdl; 118282c23ebaSBill Fenner 118346f2df9cSSergey Kandaurov if ((ifp = m->m_pkthdr.rcvif) && 118446f2df9cSSergey Kandaurov ifp->if_index && ifp->if_index <= V_if_index) { 11854a0d6638SRuslan Ermilov sdp = (struct sockaddr_dl *)ifp->if_addr->ifa_addr; 1186d314ad7bSJulian Elischer /* 1187d314ad7bSJulian Elischer * Change our mind and don't try copy. 1188d314ad7bSJulian Elischer */ 118946f2df9cSSergey Kandaurov if (sdp->sdl_family != AF_LINK || 119046f2df9cSSergey Kandaurov sdp->sdl_len > sizeof(sdlbuf)) { 1191d314ad7bSJulian Elischer goto makedummy; 1192d314ad7bSJulian Elischer } 1193d314ad7bSJulian Elischer bcopy(sdp, sdl2, sdp->sdl_len); 1194d314ad7bSJulian Elischer } else { 1195d314ad7bSJulian Elischer makedummy: 119646f2df9cSSergey Kandaurov sdl2->sdl_len = 119746f2df9cSSergey Kandaurov offsetof(struct sockaddr_dl, sdl_data[0]); 1198d314ad7bSJulian Elischer sdl2->sdl_family = AF_LINK; 1199d314ad7bSJulian Elischer sdl2->sdl_index = 0; 1200d314ad7bSJulian Elischer sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0; 1201d314ad7bSJulian Elischer } 1202d314ad7bSJulian Elischer *mp = sbcreatecontrol((caddr_t)sdl2, sdl2->sdl_len, 120382c23ebaSBill Fenner IP_RECVIF, IPPROTO_IP); 120482c23ebaSBill Fenner if (*mp) 120582c23ebaSBill Fenner mp = &(*mp)->m_next; 120682c23ebaSBill Fenner } 12073cca425bSMichael Tuexen if (inp->inp_flags & INP_RECVTOS) { 12083cca425bSMichael Tuexen *mp = sbcreatecontrol((caddr_t)&ip->ip_tos, 12093cca425bSMichael Tuexen sizeof(u_char), IP_RECVTOS, IPPROTO_IP); 12103cca425bSMichael Tuexen if (*mp) 12113cca425bSMichael Tuexen mp = &(*mp)->m_next; 12123cca425bSMichael Tuexen } 12139d3ddf43SAdrian Chadd 12149d3ddf43SAdrian Chadd if (inp->inp_flags2 & INP_RECVFLOWID) { 12159d3ddf43SAdrian Chadd uint32_t flowid, flow_type; 12169d3ddf43SAdrian Chadd 12179d3ddf43SAdrian Chadd flowid = m->m_pkthdr.flowid; 12189d3ddf43SAdrian Chadd flow_type = M_HASHTYPE_GET(m); 12199d3ddf43SAdrian Chadd 12209d3ddf43SAdrian Chadd /* 12219d3ddf43SAdrian Chadd * XXX should handle the failure of one or the 12229d3ddf43SAdrian Chadd * other - don't populate both? 12239d3ddf43SAdrian Chadd */ 12249d3ddf43SAdrian Chadd *mp = sbcreatecontrol((caddr_t) &flowid, 12259d3ddf43SAdrian Chadd sizeof(uint32_t), IP_FLOWID, IPPROTO_IP); 12269d3ddf43SAdrian Chadd if (*mp) 12279d3ddf43SAdrian Chadd mp = &(*mp)->m_next; 12289d3ddf43SAdrian Chadd *mp = sbcreatecontrol((caddr_t) &flow_type, 12299d3ddf43SAdrian Chadd sizeof(uint32_t), IP_FLOWTYPE, IPPROTO_IP); 12309d3ddf43SAdrian Chadd if (*mp) 12319d3ddf43SAdrian Chadd mp = &(*mp)->m_next; 12329d3ddf43SAdrian Chadd } 12339d3ddf43SAdrian Chadd 12349d3ddf43SAdrian Chadd #ifdef RSS 12359d3ddf43SAdrian Chadd if (inp->inp_flags2 & INP_RECVRSSBUCKETID) { 12369d3ddf43SAdrian Chadd uint32_t flowid, flow_type; 12379d3ddf43SAdrian Chadd uint32_t rss_bucketid; 12389d3ddf43SAdrian Chadd 12399d3ddf43SAdrian Chadd flowid = m->m_pkthdr.flowid; 12409d3ddf43SAdrian Chadd flow_type = M_HASHTYPE_GET(m); 12419d3ddf43SAdrian Chadd 12429d3ddf43SAdrian Chadd if (rss_hash2bucket(flowid, flow_type, &rss_bucketid) == 0) { 12439d3ddf43SAdrian Chadd *mp = sbcreatecontrol((caddr_t) &rss_bucketid, 12449d3ddf43SAdrian Chadd sizeof(uint32_t), IP_RSSBUCKETID, IPPROTO_IP); 12459d3ddf43SAdrian Chadd if (*mp) 12469d3ddf43SAdrian Chadd mp = &(*mp)->m_next; 12479d3ddf43SAdrian Chadd } 12489d3ddf43SAdrian Chadd } 12499d3ddf43SAdrian Chadd #endif 125082c23ebaSBill Fenner } 125182c23ebaSBill Fenner 12524d2e3692SLuigi Rizzo /* 125330916a2dSRobert Watson * XXXRW: Multicast routing code in ip_mroute.c is generally MPSAFE, but the 125430916a2dSRobert Watson * ip_rsvp and ip_rsvp_on variables need to be interlocked with rsvp_on 125530916a2dSRobert Watson * locking. This code remains in ip_input.c as ip_mroute.c is optionally 125630916a2dSRobert Watson * compiled. 12574d2e3692SLuigi Rizzo */ 12583e288e62SDimitry Andric static VNET_DEFINE(int, ip_rsvp_on); 125982cea7e6SBjoern A. Zeeb VNET_DEFINE(struct socket *, ip_rsvpd); 126082cea7e6SBjoern A. Zeeb 126182cea7e6SBjoern A. Zeeb #define V_ip_rsvp_on VNET(ip_rsvp_on) 126282cea7e6SBjoern A. Zeeb 1263df8bae1dSRodney W. Grimes int 1264f0068c4aSGarrett Wollman ip_rsvp_init(struct socket *so) 1265f0068c4aSGarrett Wollman { 12668b615593SMarko Zec 1267f0068c4aSGarrett Wollman if (so->so_type != SOCK_RAW || 1268f0068c4aSGarrett Wollman so->so_proto->pr_protocol != IPPROTO_RSVP) 1269f0068c4aSGarrett Wollman return EOPNOTSUPP; 1270f0068c4aSGarrett Wollman 1271603724d3SBjoern A. Zeeb if (V_ip_rsvpd != NULL) 1272f0068c4aSGarrett Wollman return EADDRINUSE; 1273f0068c4aSGarrett Wollman 1274603724d3SBjoern A. Zeeb V_ip_rsvpd = so; 12751c5de19aSGarrett Wollman /* 12761c5de19aSGarrett Wollman * This may seem silly, but we need to be sure we don't over-increment 12771c5de19aSGarrett Wollman * the RSVP counter, in case something slips up. 12781c5de19aSGarrett Wollman */ 1279603724d3SBjoern A. Zeeb if (!V_ip_rsvp_on) { 1280603724d3SBjoern A. Zeeb V_ip_rsvp_on = 1; 1281603724d3SBjoern A. Zeeb V_rsvp_on++; 12821c5de19aSGarrett Wollman } 1283f0068c4aSGarrett Wollman 1284f0068c4aSGarrett Wollman return 0; 1285f0068c4aSGarrett Wollman } 1286f0068c4aSGarrett Wollman 1287f0068c4aSGarrett Wollman int 1288f0068c4aSGarrett Wollman ip_rsvp_done(void) 1289f0068c4aSGarrett Wollman { 12908b615593SMarko Zec 1291603724d3SBjoern A. Zeeb V_ip_rsvpd = NULL; 12921c5de19aSGarrett Wollman /* 12931c5de19aSGarrett Wollman * This may seem silly, but we need to be sure we don't over-decrement 12941c5de19aSGarrett Wollman * the RSVP counter, in case something slips up. 12951c5de19aSGarrett Wollman */ 1296603724d3SBjoern A. Zeeb if (V_ip_rsvp_on) { 1297603724d3SBjoern A. Zeeb V_ip_rsvp_on = 0; 1298603724d3SBjoern A. Zeeb V_rsvp_on--; 12991c5de19aSGarrett Wollman } 1300f0068c4aSGarrett Wollman return 0; 1301f0068c4aSGarrett Wollman } 1302bbb4330bSLuigi Rizzo 13038f5a8818SKevin Lo int 13048f5a8818SKevin Lo rsvp_input(struct mbuf **mp, int *offp, int proto) 1305bbb4330bSLuigi Rizzo { 13068f5a8818SKevin Lo struct mbuf *m; 13078f5a8818SKevin Lo 13088f5a8818SKevin Lo m = *mp; 13098f5a8818SKevin Lo *mp = NULL; 13108b615593SMarko Zec 1311bbb4330bSLuigi Rizzo if (rsvp_input_p) { /* call the real one if loaded */ 13128f5a8818SKevin Lo *mp = m; 13138f5a8818SKevin Lo rsvp_input_p(mp, offp, proto); 13148f5a8818SKevin Lo return (IPPROTO_DONE); 1315bbb4330bSLuigi Rizzo } 1316bbb4330bSLuigi Rizzo 1317bbb4330bSLuigi Rizzo /* Can still get packets with rsvp_on = 0 if there is a local member 1318bbb4330bSLuigi Rizzo * of the group to which the RSVP packet is addressed. But in this 1319bbb4330bSLuigi Rizzo * case we want to throw the packet away. 1320bbb4330bSLuigi Rizzo */ 1321bbb4330bSLuigi Rizzo 1322603724d3SBjoern A. Zeeb if (!V_rsvp_on) { 1323bbb4330bSLuigi Rizzo m_freem(m); 13248f5a8818SKevin Lo return (IPPROTO_DONE); 1325bbb4330bSLuigi Rizzo } 1326bbb4330bSLuigi Rizzo 1327603724d3SBjoern A. Zeeb if (V_ip_rsvpd != NULL) { 13288f5a8818SKevin Lo *mp = m; 13298f5a8818SKevin Lo rip_input(mp, offp, proto); 13308f5a8818SKevin Lo return (IPPROTO_DONE); 1331bbb4330bSLuigi Rizzo } 1332bbb4330bSLuigi Rizzo /* Drop the packet */ 1333bbb4330bSLuigi Rizzo m_freem(m); 13348f5a8818SKevin Lo return (IPPROTO_DONE); 1335bbb4330bSLuigi Rizzo } 1336