1c398230bSWarner Losh /*- 2df8bae1dSRodney W. Grimes * Copyright (c) 1982, 1986, 1988, 1993 3df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 4df8bae1dSRodney W. Grimes * 5df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 6df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 7df8bae1dSRodney W. Grimes * are met: 8df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 9df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 10df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 11df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 12df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 13df8bae1dSRodney W. Grimes * 4. Neither the name of the University nor the names of its contributors 14df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 15df8bae1dSRodney W. Grimes * without specific prior written permission. 16df8bae1dSRodney W. Grimes * 17df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27df8bae1dSRodney W. Grimes * SUCH DAMAGE. 28df8bae1dSRodney W. Grimes * 29df8bae1dSRodney W. Grimes * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 30df8bae1dSRodney W. Grimes */ 31df8bae1dSRodney W. Grimes 324b421e2dSMike Silbersack #include <sys/cdefs.h> 334b421e2dSMike Silbersack __FBSDID("$FreeBSD$"); 344b421e2dSMike Silbersack 350ac40133SBrian Somers #include "opt_bootp.h" 3674a9466cSGary Palmer #include "opt_ipfw.h" 3727108a15SDag-Erling Smørgrav #include "opt_ipstealth.h" 386a800098SYoshinobu Inoue #include "opt_ipsec.h" 3933553d6eSBjoern A. Zeeb #include "opt_route.h" 40b8bc95cdSAdrian Chadd #include "opt_rss.h" 4174a9466cSGary Palmer 42df8bae1dSRodney W. Grimes #include <sys/param.h> 43df8bae1dSRodney W. Grimes #include <sys/systm.h> 44ef91a976SAndrey V. Elsukov #include <sys/hhook.h> 45df8bae1dSRodney W. Grimes #include <sys/mbuf.h> 46b715f178SLuigi Rizzo #include <sys/malloc.h> 47df8bae1dSRodney W. Grimes #include <sys/domain.h> 48df8bae1dSRodney W. Grimes #include <sys/protosw.h> 49df8bae1dSRodney W. Grimes #include <sys/socket.h> 50df8bae1dSRodney W. Grimes #include <sys/time.h> 51df8bae1dSRodney W. Grimes #include <sys/kernel.h> 52385195c0SMarko Zec #include <sys/lock.h> 53cc0a3c8cSAndrey V. Elsukov #include <sys/rmlock.h> 54385195c0SMarko Zec #include <sys/rwlock.h> 5557f60867SMark Johnston #include <sys/sdt.h> 561025071fSGarrett Wollman #include <sys/syslog.h> 57b5e8ce9fSBruce Evans #include <sys/sysctl.h> 58df8bae1dSRodney W. Grimes 59c85540ddSAndrey A. Chernov #include <net/pfil.h> 60df8bae1dSRodney W. Grimes #include <net/if.h> 619494d596SBrooks Davis #include <net/if_types.h> 62d314ad7bSJulian Elischer #include <net/if_var.h> 6382c23ebaSBill Fenner #include <net/if_dl.h> 64df8bae1dSRodney W. Grimes #include <net/route.h> 65748e0b0aSGarrett Wollman #include <net/netisr.h> 66b2bdc62aSAdrian Chadd #include <net/rss_config.h> 674b79449eSBjoern A. Zeeb #include <net/vnet.h> 68df8bae1dSRodney W. Grimes 69df8bae1dSRodney W. Grimes #include <netinet/in.h> 7057f60867SMark Johnston #include <netinet/in_kdtrace.h> 71df8bae1dSRodney W. Grimes #include <netinet/in_systm.h> 72b5e8ce9fSBruce Evans #include <netinet/in_var.h> 73df8bae1dSRodney W. Grimes #include <netinet/ip.h> 74df8bae1dSRodney W. Grimes #include <netinet/in_pcb.h> 75df8bae1dSRodney W. Grimes #include <netinet/ip_var.h> 76eddfbb76SRobert Watson #include <netinet/ip_fw.h> 77df8bae1dSRodney W. Grimes #include <netinet/ip_icmp.h> 78ef39adf0SAndre Oppermann #include <netinet/ip_options.h> 7958938916SGarrett Wollman #include <machine/in_cksum.h> 80a9771948SGleb Smirnoff #include <netinet/ip_carp.h> 81b2630c29SGeorge V. Neville-Neil #ifdef IPSEC 821dfcf0d2SAndre Oppermann #include <netinet/ip_ipsec.h> 8333872124SGeorge V. Neville-Neil #include <netipsec/ipsec.h> 8433872124SGeorge V. Neville-Neil #include <netipsec/key.h> 85b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */ 86b8bc95cdSAdrian Chadd #include <netinet/in_rss.h> 87df8bae1dSRodney W. Grimes 88f0068c4aSGarrett Wollman #include <sys/socketvar.h> 896ddbf1e2SGary Palmer 90aed55708SRobert Watson #include <security/mac/mac_framework.h> 91aed55708SRobert Watson 92d2035ffbSEd Maste #ifdef CTASSERT 93d2035ffbSEd Maste CTASSERT(sizeof(struct ip) == 20); 94d2035ffbSEd Maste #endif 95d2035ffbSEd Maste 961dbefcc0SGleb Smirnoff /* IP reassembly functions are defined in ip_reass.c. */ 97843b0e57SXin LI extern void ipreass_init(void); 98843b0e57SXin LI extern void ipreass_drain(void); 99843b0e57SXin LI extern void ipreass_slowtimo(void); 1001dbefcc0SGleb Smirnoff #ifdef VIMAGE 101843b0e57SXin LI extern void ipreass_destroy(void); 1021dbefcc0SGleb Smirnoff #endif 1031dbefcc0SGleb Smirnoff 104cc0a3c8cSAndrey V. Elsukov struct rmlock in_ifaddr_lock; 105cc0a3c8cSAndrey V. Elsukov RM_SYSINIT(in_ifaddr_lock, &in_ifaddr_lock, "in_ifaddr_lock"); 106f0068c4aSGarrett Wollman 10782cea7e6SBjoern A. Zeeb VNET_DEFINE(int, rsvp_on); 10882cea7e6SBjoern A. Zeeb 10982cea7e6SBjoern A. Zeeb VNET_DEFINE(int, ipforwarding); 1106df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_VNET | CTLFLAG_RW, 111eddfbb76SRobert Watson &VNET_NAME(ipforwarding), 0, 1128b615593SMarko Zec "Enable IP forwarding between interfaces"); 1130312fbe9SPoul-Henning Kamp 1143e288e62SDimitry Andric static VNET_DEFINE(int, ipsendredirects) = 1; /* XXX */ 11582cea7e6SBjoern A. Zeeb #define V_ipsendredirects VNET(ipsendredirects) 1166df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_VNET | CTLFLAG_RW, 117eddfbb76SRobert Watson &VNET_NAME(ipsendredirects), 0, 1188b615593SMarko Zec "Enable sending IP redirects"); 1190312fbe9SPoul-Henning Kamp 120823db0e9SDon Lewis /* 121823db0e9SDon Lewis * XXX - Setting ip_checkinterface mostly implements the receive side of 122823db0e9SDon Lewis * the Strong ES model described in RFC 1122, but since the routing table 123a8f12100SDon Lewis * and transmit implementation do not implement the Strong ES model, 124823db0e9SDon Lewis * setting this to 1 results in an odd hybrid. 1253f67c834SDon Lewis * 126a8f12100SDon Lewis * XXX - ip_checkinterface currently must be disabled if you use ipnat 127a8f12100SDon Lewis * to translate the destination address to another local interface. 1283f67c834SDon Lewis * 1293f67c834SDon Lewis * XXX - ip_checkinterface must be disabled if you add IP aliases 1303f67c834SDon Lewis * to the loopback interface instead of the interface where the 1313f67c834SDon Lewis * packets for those addresses are received. 132823db0e9SDon Lewis */ 1333e288e62SDimitry Andric static VNET_DEFINE(int, ip_checkinterface); 13482cea7e6SBjoern A. Zeeb #define V_ip_checkinterface VNET(ip_checkinterface) 1356df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_VNET | CTLFLAG_RW, 136eddfbb76SRobert Watson &VNET_NAME(ip_checkinterface), 0, 1378b615593SMarko Zec "Verify packet arrives on correct interface"); 138b3e95d4eSJonathan Lemon 1390b4b0b0fSJulian Elischer VNET_DEFINE(struct pfil_head, inet_pfil_hook); /* Packet filter hooks */ 140df8bae1dSRodney W. Grimes 141d4b5cae4SRobert Watson static struct netisr_handler ip_nh = { 142d4b5cae4SRobert Watson .nh_name = "ip", 143d4b5cae4SRobert Watson .nh_handler = ip_input, 144d4b5cae4SRobert Watson .nh_proto = NETISR_IP, 145b8bc95cdSAdrian Chadd #ifdef RSS 1462527ccadSAdrian Chadd .nh_m2cpuid = rss_soft_m2cpuid_v4, 147b8bc95cdSAdrian Chadd .nh_policy = NETISR_POLICY_CPU, 148b8bc95cdSAdrian Chadd .nh_dispatch = NETISR_DISPATCH_HYBRID, 149b8bc95cdSAdrian Chadd #else 150d4b5cae4SRobert Watson .nh_policy = NETISR_POLICY_FLOW, 151b8bc95cdSAdrian Chadd #endif 152d4b5cae4SRobert Watson }; 153ca925d9cSJonathan Lemon 154b8bc95cdSAdrian Chadd #ifdef RSS 155b8bc95cdSAdrian Chadd /* 156b8bc95cdSAdrian Chadd * Directly dispatched frames are currently assumed 157b8bc95cdSAdrian Chadd * to have a flowid already calculated. 158b8bc95cdSAdrian Chadd * 159b8bc95cdSAdrian Chadd * It should likely have something that assert it 160b8bc95cdSAdrian Chadd * actually has valid flow details. 161b8bc95cdSAdrian Chadd */ 162b8bc95cdSAdrian Chadd static struct netisr_handler ip_direct_nh = { 163b8bc95cdSAdrian Chadd .nh_name = "ip_direct", 164b8bc95cdSAdrian Chadd .nh_handler = ip_direct_input, 165b8bc95cdSAdrian Chadd .nh_proto = NETISR_IP_DIRECT, 166499baf0aSAdrian Chadd .nh_m2cpuid = rss_soft_m2cpuid_v4, 167b8bc95cdSAdrian Chadd .nh_policy = NETISR_POLICY_CPU, 168b8bc95cdSAdrian Chadd .nh_dispatch = NETISR_DISPATCH_HYBRID, 169b8bc95cdSAdrian Chadd }; 170b8bc95cdSAdrian Chadd #endif 171b8bc95cdSAdrian Chadd 172df8bae1dSRodney W. Grimes extern struct domain inetdomain; 173f0ffb944SJulian Elischer extern struct protosw inetsw[]; 174df8bae1dSRodney W. Grimes u_char ip_protox[IPPROTO_MAX]; 17582cea7e6SBjoern A. Zeeb VNET_DEFINE(struct in_ifaddrhead, in_ifaddrhead); /* first inet address */ 17682cea7e6SBjoern A. Zeeb VNET_DEFINE(struct in_ifaddrhashhead *, in_ifaddrhashtbl); /* inet addr hash table */ 17782cea7e6SBjoern A. Zeeb VNET_DEFINE(u_long, in_ifaddrhmask); /* mask for hash table */ 178ca925d9cSJonathan Lemon 1790312fbe9SPoul-Henning Kamp #ifdef IPCTL_DEFMTU 1800312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW, 1813d177f46SBill Fumerola &ip_mtu, 0, "Default MTU"); 1820312fbe9SPoul-Henning Kamp #endif 1830312fbe9SPoul-Henning Kamp 1841b968362SDag-Erling Smørgrav #ifdef IPSTEALTH 18582cea7e6SBjoern A. Zeeb VNET_DEFINE(int, ipstealth); 1866df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_VNET | CTLFLAG_RW, 187eddfbb76SRobert Watson &VNET_NAME(ipstealth), 0, 188eddfbb76SRobert Watson "IP stealth mode, no TTL decrementation on forwarding"); 1891b968362SDag-Erling Smørgrav #endif 190eddfbb76SRobert Watson 191315e3e38SRobert Watson /* 1925da0521fSAndrey V. Elsukov * IP statistics are stored in the "array" of counter(9)s. 1935923c293SGleb Smirnoff */ 1945da0521fSAndrey V. Elsukov VNET_PCPUSTAT_DEFINE(struct ipstat, ipstat); 1955da0521fSAndrey V. Elsukov VNET_PCPUSTAT_SYSINIT(ipstat); 1965da0521fSAndrey V. Elsukov SYSCTL_VNET_PCPUSTAT(_net_inet_ip, IPCTL_STATS, stats, struct ipstat, ipstat, 1975da0521fSAndrey V. Elsukov "IP statistics (struct ipstat, netinet/ip_var.h)"); 1985923c293SGleb Smirnoff 1995923c293SGleb Smirnoff #ifdef VIMAGE 2005da0521fSAndrey V. Elsukov VNET_PCPUSTAT_SYSUNINIT(ipstat); 2015923c293SGleb Smirnoff #endif /* VIMAGE */ 2025923c293SGleb Smirnoff 2035923c293SGleb Smirnoff /* 204315e3e38SRobert Watson * Kernel module interface for updating ipstat. The argument is an index 2055923c293SGleb Smirnoff * into ipstat treated as an array. 206315e3e38SRobert Watson */ 207315e3e38SRobert Watson void 208315e3e38SRobert Watson kmod_ipstat_inc(int statnum) 209315e3e38SRobert Watson { 210315e3e38SRobert Watson 2115da0521fSAndrey V. Elsukov counter_u64_add(VNET(ipstat)[statnum], 1); 212315e3e38SRobert Watson } 213315e3e38SRobert Watson 214315e3e38SRobert Watson void 215315e3e38SRobert Watson kmod_ipstat_dec(int statnum) 216315e3e38SRobert Watson { 217315e3e38SRobert Watson 2185da0521fSAndrey V. Elsukov counter_u64_add(VNET(ipstat)[statnum], -1); 219315e3e38SRobert Watson } 220315e3e38SRobert Watson 221d4b5cae4SRobert Watson static int 222d4b5cae4SRobert Watson sysctl_netinet_intr_queue_maxlen(SYSCTL_HANDLER_ARGS) 223d4b5cae4SRobert Watson { 224d4b5cae4SRobert Watson int error, qlimit; 225d4b5cae4SRobert Watson 226d4b5cae4SRobert Watson netisr_getqlimit(&ip_nh, &qlimit); 227d4b5cae4SRobert Watson error = sysctl_handle_int(oidp, &qlimit, 0, req); 228d4b5cae4SRobert Watson if (error || !req->newptr) 229d4b5cae4SRobert Watson return (error); 230d4b5cae4SRobert Watson if (qlimit < 1) 231d4b5cae4SRobert Watson return (EINVAL); 232d4b5cae4SRobert Watson return (netisr_setqlimit(&ip_nh, qlimit)); 233d4b5cae4SRobert Watson } 234d4b5cae4SRobert Watson SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen, 235d4b5cae4SRobert Watson CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet_intr_queue_maxlen, "I", 236d4b5cae4SRobert Watson "Maximum size of the IP input queue"); 237d4b5cae4SRobert Watson 238d4b5cae4SRobert Watson static int 239d4b5cae4SRobert Watson sysctl_netinet_intr_queue_drops(SYSCTL_HANDLER_ARGS) 240d4b5cae4SRobert Watson { 241d4b5cae4SRobert Watson u_int64_t qdrops_long; 242d4b5cae4SRobert Watson int error, qdrops; 243d4b5cae4SRobert Watson 244d4b5cae4SRobert Watson netisr_getqdrops(&ip_nh, &qdrops_long); 245d4b5cae4SRobert Watson qdrops = qdrops_long; 246d4b5cae4SRobert Watson error = sysctl_handle_int(oidp, &qdrops, 0, req); 247d4b5cae4SRobert Watson if (error || !req->newptr) 248d4b5cae4SRobert Watson return (error); 249d4b5cae4SRobert Watson if (qdrops != 0) 250d4b5cae4SRobert Watson return (EINVAL); 251d4b5cae4SRobert Watson netisr_clearqdrops(&ip_nh); 252d4b5cae4SRobert Watson return (0); 253d4b5cae4SRobert Watson } 254d4b5cae4SRobert Watson 255d4b5cae4SRobert Watson SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops, 256d4b5cae4SRobert Watson CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_netinet_intr_queue_drops, "I", 257d4b5cae4SRobert Watson "Number of packets dropped from the IP input queue"); 258d4b5cae4SRobert Watson 259b8bc95cdSAdrian Chadd #ifdef RSS 260b8bc95cdSAdrian Chadd static int 261b8bc95cdSAdrian Chadd sysctl_netinet_intr_direct_queue_maxlen(SYSCTL_HANDLER_ARGS) 262b8bc95cdSAdrian Chadd { 263b8bc95cdSAdrian Chadd int error, qlimit; 264b8bc95cdSAdrian Chadd 265b8bc95cdSAdrian Chadd netisr_getqlimit(&ip_direct_nh, &qlimit); 266b8bc95cdSAdrian Chadd error = sysctl_handle_int(oidp, &qlimit, 0, req); 267b8bc95cdSAdrian Chadd if (error || !req->newptr) 268b8bc95cdSAdrian Chadd return (error); 269b8bc95cdSAdrian Chadd if (qlimit < 1) 270b8bc95cdSAdrian Chadd return (EINVAL); 271b8bc95cdSAdrian Chadd return (netisr_setqlimit(&ip_direct_nh, qlimit)); 272b8bc95cdSAdrian Chadd } 273b8bc95cdSAdrian Chadd SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_direct_queue_maxlen, 274b8bc95cdSAdrian Chadd CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet_intr_direct_queue_maxlen, "I", 275b8bc95cdSAdrian Chadd "Maximum size of the IP direct input queue"); 276b8bc95cdSAdrian Chadd 277b8bc95cdSAdrian Chadd static int 278b8bc95cdSAdrian Chadd sysctl_netinet_intr_direct_queue_drops(SYSCTL_HANDLER_ARGS) 279b8bc95cdSAdrian Chadd { 280b8bc95cdSAdrian Chadd u_int64_t qdrops_long; 281b8bc95cdSAdrian Chadd int error, qdrops; 282b8bc95cdSAdrian Chadd 283b8bc95cdSAdrian Chadd netisr_getqdrops(&ip_direct_nh, &qdrops_long); 284b8bc95cdSAdrian Chadd qdrops = qdrops_long; 285b8bc95cdSAdrian Chadd error = sysctl_handle_int(oidp, &qdrops, 0, req); 286b8bc95cdSAdrian Chadd if (error || !req->newptr) 287b8bc95cdSAdrian Chadd return (error); 288b8bc95cdSAdrian Chadd if (qdrops != 0) 289b8bc95cdSAdrian Chadd return (EINVAL); 290b8bc95cdSAdrian Chadd netisr_clearqdrops(&ip_direct_nh); 291b8bc95cdSAdrian Chadd return (0); 292b8bc95cdSAdrian Chadd } 293b8bc95cdSAdrian Chadd 294b8bc95cdSAdrian Chadd SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_direct_queue_drops, 295b8bc95cdSAdrian Chadd CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_netinet_intr_direct_queue_drops, "I", 296b8bc95cdSAdrian Chadd "Number of packets dropped from the IP direct input queue"); 297b8bc95cdSAdrian Chadd #endif /* RSS */ 298b8bc95cdSAdrian Chadd 299df8bae1dSRodney W. Grimes /* 300df8bae1dSRodney W. Grimes * IP initialization: fill in IP protocol switch table. 301df8bae1dSRodney W. Grimes * All protocols not implemented in kernel go to raw IP protocol handler. 302df8bae1dSRodney W. Grimes */ 303df8bae1dSRodney W. Grimes void 304f2565d68SRobert Watson ip_init(void) 305df8bae1dSRodney W. Grimes { 306f2565d68SRobert Watson struct protosw *pr; 307f2565d68SRobert Watson int i; 308df8bae1dSRodney W. Grimes 309603724d3SBjoern A. Zeeb TAILQ_INIT(&V_in_ifaddrhead); 310603724d3SBjoern A. Zeeb V_in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &V_in_ifaddrhmask); 3111ed81b73SMarko Zec 3121ed81b73SMarko Zec /* Initialize IP reassembly queue. */ 3131dbefcc0SGleb Smirnoff ipreass_init(); 3141ed81b73SMarko Zec 3150b4b0b0fSJulian Elischer /* Initialize packet filter hooks. */ 3160b4b0b0fSJulian Elischer V_inet_pfil_hook.ph_type = PFIL_TYPE_AF; 3170b4b0b0fSJulian Elischer V_inet_pfil_hook.ph_af = AF_INET; 3180b4b0b0fSJulian Elischer if ((i = pfil_head_register(&V_inet_pfil_hook)) != 0) 3190b4b0b0fSJulian Elischer printf("%s: WARNING: unable to register pfil hook, " 3200b4b0b0fSJulian Elischer "error %d\n", __func__, i); 3210b4b0b0fSJulian Elischer 322ef91a976SAndrey V. Elsukov if (hhook_head_register(HHOOK_TYPE_IPSEC_IN, AF_INET, 323ef91a976SAndrey V. Elsukov &V_ipsec_hhh_in[HHOOK_IPSEC_INET], 324ef91a976SAndrey V. Elsukov HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0) 325ef91a976SAndrey V. Elsukov printf("%s: WARNING: unable to register input helper hook\n", 326ef91a976SAndrey V. Elsukov __func__); 327ef91a976SAndrey V. Elsukov if (hhook_head_register(HHOOK_TYPE_IPSEC_OUT, AF_INET, 328ef91a976SAndrey V. Elsukov &V_ipsec_hhh_out[HHOOK_IPSEC_INET], 329ef91a976SAndrey V. Elsukov HHOOK_WAITOK | HHOOK_HEADISINVNET) != 0) 330ef91a976SAndrey V. Elsukov printf("%s: WARNING: unable to register output helper hook\n", 331ef91a976SAndrey V. Elsukov __func__); 332ef91a976SAndrey V. Elsukov 3331ed81b73SMarko Zec /* Skip initialization of globals for non-default instances. */ 334*484149deSBjoern A. Zeeb #ifdef VIMAGE 335*484149deSBjoern A. Zeeb if (!IS_DEFAULT_VNET(curvnet)) { 336*484149deSBjoern A. Zeeb netisr_register_vnet(&ip_nh); 337*484149deSBjoern A. Zeeb #ifdef RSS 338*484149deSBjoern A. Zeeb netisr_register_vnet(&ip_direct_nh); 339*484149deSBjoern A. Zeeb #endif 3401ed81b73SMarko Zec return; 341*484149deSBjoern A. Zeeb } 342*484149deSBjoern A. Zeeb #endif 3431ed81b73SMarko Zec 344f0ffb944SJulian Elischer pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 34502410549SRobert Watson if (pr == NULL) 346db09bef3SAndre Oppermann panic("ip_init: PF_INET not found"); 347db09bef3SAndre Oppermann 348db09bef3SAndre Oppermann /* Initialize the entire ip_protox[] array to IPPROTO_RAW. */ 349df8bae1dSRodney W. Grimes for (i = 0; i < IPPROTO_MAX; i++) 350df8bae1dSRodney W. Grimes ip_protox[i] = pr - inetsw; 351db09bef3SAndre Oppermann /* 352db09bef3SAndre Oppermann * Cycle through IP protocols and put them into the appropriate place 353db09bef3SAndre Oppermann * in ip_protox[]. 354db09bef3SAndre Oppermann */ 355f0ffb944SJulian Elischer for (pr = inetdomain.dom_protosw; 356f0ffb944SJulian Elischer pr < inetdomain.dom_protoswNPROTOSW; pr++) 357df8bae1dSRodney W. Grimes if (pr->pr_domain->dom_family == PF_INET && 358db09bef3SAndre Oppermann pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) { 359db09bef3SAndre Oppermann /* Be careful to only index valid IP protocols. */ 360db77984cSSam Leffler if (pr->pr_protocol < IPPROTO_MAX) 361df8bae1dSRodney W. Grimes ip_protox[pr->pr_protocol] = pr - inetsw; 362db09bef3SAndre Oppermann } 363194a213eSAndrey A. Chernov 364d4b5cae4SRobert Watson netisr_register(&ip_nh); 365b8bc95cdSAdrian Chadd #ifdef RSS 366b8bc95cdSAdrian Chadd netisr_register(&ip_direct_nh); 367b8bc95cdSAdrian Chadd #endif 368df8bae1dSRodney W. Grimes } 369df8bae1dSRodney W. Grimes 3709802380eSBjoern A. Zeeb #ifdef VIMAGE 3713f58662dSBjoern A. Zeeb static void 3723f58662dSBjoern A. Zeeb ip_destroy(void *unused __unused) 3739802380eSBjoern A. Zeeb { 374ef91a976SAndrey V. Elsukov int error; 3754d3dfd45SMikolaj Golub 376*484149deSBjoern A. Zeeb #ifdef RSS 377*484149deSBjoern A. Zeeb netisr_unregister_vnet(&ip_direct_nh); 378*484149deSBjoern A. Zeeb #endif 379*484149deSBjoern A. Zeeb netisr_unregister_vnet(&ip_nh); 380*484149deSBjoern A. Zeeb 381ef91a976SAndrey V. Elsukov if ((error = pfil_head_unregister(&V_inet_pfil_hook)) != 0) 3824d3dfd45SMikolaj Golub printf("%s: WARNING: unable to unregister pfil hook, " 383ef91a976SAndrey V. Elsukov "error %d\n", __func__, error); 3849802380eSBjoern A. Zeeb 385ef91a976SAndrey V. Elsukov error = hhook_head_deregister(V_ipsec_hhh_in[HHOOK_IPSEC_INET]); 386ef91a976SAndrey V. Elsukov if (error != 0) { 387ef91a976SAndrey V. Elsukov printf("%s: WARNING: unable to deregister input helper hook " 388ef91a976SAndrey V. Elsukov "type HHOOK_TYPE_IPSEC_IN, id HHOOK_IPSEC_INET: " 389ef91a976SAndrey V. Elsukov "error %d returned\n", __func__, error); 390ef91a976SAndrey V. Elsukov } 391ef91a976SAndrey V. Elsukov error = hhook_head_deregister(V_ipsec_hhh_out[HHOOK_IPSEC_INET]); 392ef91a976SAndrey V. Elsukov if (error != 0) { 393ef91a976SAndrey V. Elsukov printf("%s: WARNING: unable to deregister output helper hook " 394ef91a976SAndrey V. Elsukov "type HHOOK_TYPE_IPSEC_OUT, id HHOOK_IPSEC_INET: " 395ef91a976SAndrey V. Elsukov "error %d returned\n", __func__, error); 396ef91a976SAndrey V. Elsukov } 3979802380eSBjoern A. Zeeb /* Cleanup in_ifaddr hash table; should be empty. */ 3989802380eSBjoern A. Zeeb hashdestroy(V_in_ifaddrhashtbl, M_IFADDR, V_in_ifaddrhmask); 3999802380eSBjoern A. Zeeb 400e3c2c634SGleb Smirnoff /* Destroy IP reassembly queue. */ 4011dbefcc0SGleb Smirnoff ipreass_destroy(); 4029802380eSBjoern A. Zeeb } 4033f58662dSBjoern A. Zeeb 4043f58662dSBjoern A. Zeeb VNET_SYSUNINIT(ip, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, ip_destroy, NULL); 4059802380eSBjoern A. Zeeb #endif 4069802380eSBjoern A. Zeeb 407b8bc95cdSAdrian Chadd #ifdef RSS 408b8bc95cdSAdrian Chadd /* 409b8bc95cdSAdrian Chadd * IP direct input routine. 410b8bc95cdSAdrian Chadd * 411b8bc95cdSAdrian Chadd * This is called when reinjecting completed fragments where 412b8bc95cdSAdrian Chadd * all of the previous checking and book-keeping has been done. 413b8bc95cdSAdrian Chadd */ 414b8bc95cdSAdrian Chadd void 415b8bc95cdSAdrian Chadd ip_direct_input(struct mbuf *m) 416b8bc95cdSAdrian Chadd { 417b8bc95cdSAdrian Chadd struct ip *ip; 418b8bc95cdSAdrian Chadd int hlen; 419b8bc95cdSAdrian Chadd 420b8bc95cdSAdrian Chadd ip = mtod(m, struct ip *); 421b8bc95cdSAdrian Chadd hlen = ip->ip_hl << 2; 422b8bc95cdSAdrian Chadd 423b8bc95cdSAdrian Chadd IPSTAT_INC(ips_delivered); 424b8bc95cdSAdrian Chadd (*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p); 425b8bc95cdSAdrian Chadd return; 426b8bc95cdSAdrian Chadd } 427b8bc95cdSAdrian Chadd #endif 428b8bc95cdSAdrian Chadd 4294d2e3692SLuigi Rizzo /* 430df8bae1dSRodney W. Grimes * Ip input routine. Checksum and byte swap header. If fragmented 431df8bae1dSRodney W. Grimes * try to reassemble. Process options. Pass to next level. 432df8bae1dSRodney W. Grimes */ 433c67b1d17SGarrett Wollman void 434c67b1d17SGarrett Wollman ip_input(struct mbuf *m) 435df8bae1dSRodney W. Grimes { 4369188b4a1SAndre Oppermann struct ip *ip = NULL; 4375da9f8faSJosef Karthauser struct in_ifaddr *ia = NULL; 438ca925d9cSJonathan Lemon struct ifaddr *ifa; 4390aade26eSRobert Watson struct ifnet *ifp; 4409b932e9eSAndre Oppermann int checkif, hlen = 0; 44121d172a3SGleb Smirnoff uint16_t sum, ip_len; 44202c1c707SAndre Oppermann int dchg = 0; /* dest changed after fw */ 443f51f805fSSam Leffler struct in_addr odst; /* original dst address */ 444b715f178SLuigi Rizzo 445fe584538SDag-Erling Smørgrav M_ASSERTPKTHDR(m); 446db40007dSAndrew R. Reiter 447ac9d7e26SMax Laier if (m->m_flags & M_FASTFWD_OURS) { 44876ff6dcfSAndre Oppermann m->m_flags &= ~M_FASTFWD_OURS; 44976ff6dcfSAndre Oppermann /* Set up some basics that will be used later. */ 4502b25acc1SLuigi Rizzo ip = mtod(m, struct ip *); 45153be11f6SPoul-Henning Kamp hlen = ip->ip_hl << 2; 4528f134647SGleb Smirnoff ip_len = ntohs(ip->ip_len); 4539b932e9eSAndre Oppermann goto ours; 4542b25acc1SLuigi Rizzo } 4552b25acc1SLuigi Rizzo 45686425c62SRobert Watson IPSTAT_INC(ips_total); 45758938916SGarrett Wollman 45858938916SGarrett Wollman if (m->m_pkthdr.len < sizeof(struct ip)) 45958938916SGarrett Wollman goto tooshort; 46058938916SGarrett Wollman 461df8bae1dSRodney W. Grimes if (m->m_len < sizeof (struct ip) && 4620b17fba7SAndre Oppermann (m = m_pullup(m, sizeof (struct ip))) == NULL) { 46386425c62SRobert Watson IPSTAT_INC(ips_toosmall); 464c67b1d17SGarrett Wollman return; 465df8bae1dSRodney W. Grimes } 466df8bae1dSRodney W. Grimes ip = mtod(m, struct ip *); 46758938916SGarrett Wollman 46853be11f6SPoul-Henning Kamp if (ip->ip_v != IPVERSION) { 46986425c62SRobert Watson IPSTAT_INC(ips_badvers); 470df8bae1dSRodney W. Grimes goto bad; 471df8bae1dSRodney W. Grimes } 47258938916SGarrett Wollman 47353be11f6SPoul-Henning Kamp hlen = ip->ip_hl << 2; 474df8bae1dSRodney W. Grimes if (hlen < sizeof(struct ip)) { /* minimum header length */ 47586425c62SRobert Watson IPSTAT_INC(ips_badhlen); 476df8bae1dSRodney W. Grimes goto bad; 477df8bae1dSRodney W. Grimes } 478df8bae1dSRodney W. Grimes if (hlen > m->m_len) { 4790b17fba7SAndre Oppermann if ((m = m_pullup(m, hlen)) == NULL) { 48086425c62SRobert Watson IPSTAT_INC(ips_badhlen); 481c67b1d17SGarrett Wollman return; 482df8bae1dSRodney W. Grimes } 483df8bae1dSRodney W. Grimes ip = mtod(m, struct ip *); 484df8bae1dSRodney W. Grimes } 48533841545SHajimu UMEMOTO 48657f60867SMark Johnston IP_PROBE(receive, NULL, NULL, ip, m->m_pkthdr.rcvif, ip, NULL); 48757f60867SMark Johnston 48833841545SHajimu UMEMOTO /* 127/8 must not appear on wire - RFC1122 */ 4890aade26eSRobert Watson ifp = m->m_pkthdr.rcvif; 49033841545SHajimu UMEMOTO if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || 49133841545SHajimu UMEMOTO (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { 4920aade26eSRobert Watson if ((ifp->if_flags & IFF_LOOPBACK) == 0) { 49386425c62SRobert Watson IPSTAT_INC(ips_badaddr); 49433841545SHajimu UMEMOTO goto bad; 49533841545SHajimu UMEMOTO } 49633841545SHajimu UMEMOTO } 49733841545SHajimu UMEMOTO 498db4f9cc7SJonathan Lemon if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) { 499db4f9cc7SJonathan Lemon sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID); 500db4f9cc7SJonathan Lemon } else { 50158938916SGarrett Wollman if (hlen == sizeof(struct ip)) { 50247c861ecSBrian Somers sum = in_cksum_hdr(ip); 50358938916SGarrett Wollman } else { 50447c861ecSBrian Somers sum = in_cksum(m, hlen); 50558938916SGarrett Wollman } 506db4f9cc7SJonathan Lemon } 50747c861ecSBrian Somers if (sum) { 50886425c62SRobert Watson IPSTAT_INC(ips_badsum); 509df8bae1dSRodney W. Grimes goto bad; 510df8bae1dSRodney W. Grimes } 511df8bae1dSRodney W. Grimes 51202b199f1SMax Laier #ifdef ALTQ 51302b199f1SMax Laier if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0) 51402b199f1SMax Laier /* packet is dropped by traffic conditioner */ 51502b199f1SMax Laier return; 51602b199f1SMax Laier #endif 51702b199f1SMax Laier 51821d172a3SGleb Smirnoff ip_len = ntohs(ip->ip_len); 51921d172a3SGleb Smirnoff if (ip_len < hlen) { 52086425c62SRobert Watson IPSTAT_INC(ips_badlen); 521df8bae1dSRodney W. Grimes goto bad; 522df8bae1dSRodney W. Grimes } 523df8bae1dSRodney W. Grimes 524df8bae1dSRodney W. Grimes /* 525df8bae1dSRodney W. Grimes * Check that the amount of data in the buffers 526df8bae1dSRodney W. Grimes * is as at least much as the IP header would have us expect. 527df8bae1dSRodney W. Grimes * Trim mbufs if longer than we expect. 528df8bae1dSRodney W. Grimes * Drop packet if shorter than we expect. 529df8bae1dSRodney W. Grimes */ 53021d172a3SGleb Smirnoff if (m->m_pkthdr.len < ip_len) { 53158938916SGarrett Wollman tooshort: 53286425c62SRobert Watson IPSTAT_INC(ips_tooshort); 533df8bae1dSRodney W. Grimes goto bad; 534df8bae1dSRodney W. Grimes } 53521d172a3SGleb Smirnoff if (m->m_pkthdr.len > ip_len) { 536df8bae1dSRodney W. Grimes if (m->m_len == m->m_pkthdr.len) { 53721d172a3SGleb Smirnoff m->m_len = ip_len; 53821d172a3SGleb Smirnoff m->m_pkthdr.len = ip_len; 539df8bae1dSRodney W. Grimes } else 54021d172a3SGleb Smirnoff m_adj(m, ip_len - m->m_pkthdr.len); 541df8bae1dSRodney W. Grimes } 542b8bc95cdSAdrian Chadd 54333872124SGeorge V. Neville-Neil /* Try to forward the packet, but if we fail continue */ 544b2630c29SGeorge V. Neville-Neil #ifdef IPSEC 54533872124SGeorge V. Neville-Neil /* For now we do not handle IPSEC in tryforward. */ 54633872124SGeorge V. Neville-Neil if (!key_havesp(IPSEC_DIR_INBOUND) && !key_havesp(IPSEC_DIR_OUTBOUND) && 54733872124SGeorge V. Neville-Neil (V_ipforwarding == 1)) 54833872124SGeorge V. Neville-Neil if (ip_tryforward(m) == NULL) 54933872124SGeorge V. Neville-Neil return; 55014dd6717SSam Leffler /* 551ffe8cd7bSBjoern A. Zeeb * Bypass packet filtering for packets previously handled by IPsec. 55214dd6717SSam Leffler */ 553cc977adcSBjoern A. Zeeb if (ip_ipsec_filtertunnel(m)) 554c21fd232SAndre Oppermann goto passin; 55533872124SGeorge V. Neville-Neil #else 55633872124SGeorge V. Neville-Neil if (V_ipforwarding == 1) 55733872124SGeorge V. Neville-Neil if (ip_tryforward(m) == NULL) 55833872124SGeorge V. Neville-Neil return; 559b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */ 5603f67c834SDon Lewis 561c4ac87eaSDarren Reed /* 562134ea224SSam Leffler * Run through list of hooks for input packets. 563f51f805fSSam Leffler * 564f51f805fSSam Leffler * NB: Beware of the destination address changing (e.g. 565f51f805fSSam Leffler * by NAT rewriting). When this happens, tell 566f51f805fSSam Leffler * ip_forward to do the right thing. 567c4ac87eaSDarren Reed */ 568c21fd232SAndre Oppermann 569c21fd232SAndre Oppermann /* Jump over all PFIL processing if hooks are not active. */ 5700b4b0b0fSJulian Elischer if (!PFIL_HOOKED(&V_inet_pfil_hook)) 571c21fd232SAndre Oppermann goto passin; 572c21fd232SAndre Oppermann 573f51f805fSSam Leffler odst = ip->ip_dst; 5740b4b0b0fSJulian Elischer if (pfil_run_hooks(&V_inet_pfil_hook, &m, ifp, PFIL_IN, NULL) != 0) 575beec8214SDarren Reed return; 576134ea224SSam Leffler if (m == NULL) /* consumed by filter */ 577c4ac87eaSDarren Reed return; 5789b932e9eSAndre Oppermann 579c4ac87eaSDarren Reed ip = mtod(m, struct ip *); 58002c1c707SAndre Oppermann dchg = (odst.s_addr != ip->ip_dst.s_addr); 5810aade26eSRobert Watson ifp = m->m_pkthdr.rcvif; 5829b932e9eSAndre Oppermann 5839b932e9eSAndre Oppermann if (m->m_flags & M_FASTFWD_OURS) { 5849b932e9eSAndre Oppermann m->m_flags &= ~M_FASTFWD_OURS; 5859b932e9eSAndre Oppermann goto ours; 5869b932e9eSAndre Oppermann } 587ffdbf9daSAndrey V. Elsukov if (m->m_flags & M_IP_NEXTHOP) { 588de89d74bSLuiz Otavio O Souza if (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL) { 589099dd043SAndre Oppermann /* 590ffdbf9daSAndrey V. Elsukov * Directly ship the packet on. This allows 591ffdbf9daSAndrey V. Elsukov * forwarding packets originally destined to us 592ffdbf9daSAndrey V. Elsukov * to some other directly connected host. 593099dd043SAndre Oppermann */ 594ffdbf9daSAndrey V. Elsukov ip_forward(m, 1); 595099dd043SAndre Oppermann return; 596099dd043SAndre Oppermann } 597ffdbf9daSAndrey V. Elsukov } 598c21fd232SAndre Oppermann passin: 59921d172a3SGleb Smirnoff 60021d172a3SGleb Smirnoff /* 601df8bae1dSRodney W. Grimes * Process options and, if not destined for us, 602df8bae1dSRodney W. Grimes * ship it on. ip_dooptions returns 1 when an 603df8bae1dSRodney W. Grimes * error was detected (causing an icmp message 604df8bae1dSRodney W. Grimes * to be sent and the original packet to be freed). 605df8bae1dSRodney W. Grimes */ 6069b932e9eSAndre Oppermann if (hlen > sizeof (struct ip) && ip_dooptions(m, 0)) 607c67b1d17SGarrett Wollman return; 608df8bae1dSRodney W. Grimes 609f0068c4aSGarrett Wollman /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no 610f0068c4aSGarrett Wollman * matter if it is destined to another node, or whether it is 611f0068c4aSGarrett Wollman * a multicast one, RSVP wants it! and prevents it from being forwarded 612f0068c4aSGarrett Wollman * anywhere else. Also checks if the rsvp daemon is running before 613f0068c4aSGarrett Wollman * grabbing the packet. 614f0068c4aSGarrett Wollman */ 615603724d3SBjoern A. Zeeb if (V_rsvp_on && ip->ip_p==IPPROTO_RSVP) 616f0068c4aSGarrett Wollman goto ours; 617f0068c4aSGarrett Wollman 618df8bae1dSRodney W. Grimes /* 619df8bae1dSRodney W. Grimes * Check our list of addresses, to see if the packet is for us. 620cc766e04SGarrett Wollman * If we don't have any addresses, assume any unicast packet 621cc766e04SGarrett Wollman * we receive might be for us (and let the upper layers deal 622cc766e04SGarrett Wollman * with it). 623df8bae1dSRodney W. Grimes */ 624603724d3SBjoern A. Zeeb if (TAILQ_EMPTY(&V_in_ifaddrhead) && 625cc766e04SGarrett Wollman (m->m_flags & (M_MCAST|M_BCAST)) == 0) 626cc766e04SGarrett Wollman goto ours; 627cc766e04SGarrett Wollman 6287538a9a0SJonathan Lemon /* 629823db0e9SDon Lewis * Enable a consistency check between the destination address 630823db0e9SDon Lewis * and the arrival interface for a unicast packet (the RFC 1122 631823db0e9SDon Lewis * strong ES model) if IP forwarding is disabled and the packet 632e15ae1b2SDon Lewis * is not locally generated and the packet is not subject to 633e15ae1b2SDon Lewis * 'ipfw fwd'. 6343f67c834SDon Lewis * 6353f67c834SDon Lewis * XXX - Checking also should be disabled if the destination 6363f67c834SDon Lewis * address is ipnat'ed to a different interface. 6373f67c834SDon Lewis * 638a8f12100SDon Lewis * XXX - Checking is incompatible with IP aliases added 6393f67c834SDon Lewis * to the loopback interface instead of the interface where 6403f67c834SDon Lewis * the packets are received. 641a9771948SGleb Smirnoff * 642a9771948SGleb Smirnoff * XXX - This is the case for carp vhost IPs as well so we 643a9771948SGleb Smirnoff * insert a workaround. If the packet got here, we already 644a9771948SGleb Smirnoff * checked with carp_iamatch() and carp_forus(). 645823db0e9SDon Lewis */ 646603724d3SBjoern A. Zeeb checkif = V_ip_checkinterface && (V_ipforwarding == 0) && 6470aade26eSRobert Watson ifp != NULL && ((ifp->if_flags & IFF_LOOPBACK) == 0) && 64854bfbd51SWill Andrews ifp->if_carp == NULL && (dchg == 0); 649823db0e9SDon Lewis 650ca925d9cSJonathan Lemon /* 651ca925d9cSJonathan Lemon * Check for exact addresses in the hash bucket. 652ca925d9cSJonathan Lemon */ 6532d9cfabaSRobert Watson /* IN_IFADDR_RLOCK(); */ 6549b932e9eSAndre Oppermann LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) { 655f9e354dfSJulian Elischer /* 656823db0e9SDon Lewis * If the address matches, verify that the packet 657823db0e9SDon Lewis * arrived via the correct interface if checking is 658823db0e9SDon Lewis * enabled. 659f9e354dfSJulian Elischer */ 6609b932e9eSAndre Oppermann if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr && 6618c0fec80SRobert Watson (!checkif || ia->ia_ifp == ifp)) { 6627caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ipackets, 1); 6637caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ibytes, 6647caf4ab7SGleb Smirnoff m->m_pkthdr.len); 6652d9cfabaSRobert Watson /* IN_IFADDR_RUNLOCK(); */ 666ed1ff184SJulian Elischer goto ours; 667ca925d9cSJonathan Lemon } 6688c0fec80SRobert Watson } 6692d9cfabaSRobert Watson /* IN_IFADDR_RUNLOCK(); */ 6702d9cfabaSRobert Watson 671823db0e9SDon Lewis /* 672ca925d9cSJonathan Lemon * Check for broadcast addresses. 673ca925d9cSJonathan Lemon * 674ca925d9cSJonathan Lemon * Only accept broadcast packets that arrive via the matching 675ca925d9cSJonathan Lemon * interface. Reception of forwarded directed broadcasts would 676ca925d9cSJonathan Lemon * be handled via ip_forward() and ether_output() with the loopback 677ca925d9cSJonathan Lemon * into the stack for SIMPLEX interfaces handled by ether_output(). 678823db0e9SDon Lewis */ 6790aade26eSRobert Watson if (ifp != NULL && ifp->if_flags & IFF_BROADCAST) { 680137f91e8SJohn Baldwin IF_ADDR_RLOCK(ifp); 6810aade26eSRobert Watson TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 682ca925d9cSJonathan Lemon if (ifa->ifa_addr->sa_family != AF_INET) 683ca925d9cSJonathan Lemon continue; 684ca925d9cSJonathan Lemon ia = ifatoia(ifa); 685df8bae1dSRodney W. Grimes if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr == 6860aade26eSRobert Watson ip->ip_dst.s_addr) { 6877caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ipackets, 1); 6887caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ibytes, 6897caf4ab7SGleb Smirnoff m->m_pkthdr.len); 690137f91e8SJohn Baldwin IF_ADDR_RUNLOCK(ifp); 691df8bae1dSRodney W. Grimes goto ours; 6920aade26eSRobert Watson } 6930ac40133SBrian Somers #ifdef BOOTP_COMPAT 6940aade26eSRobert Watson if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY) { 6957caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ipackets, 1); 6967caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ibytes, 6977caf4ab7SGleb Smirnoff m->m_pkthdr.len); 698137f91e8SJohn Baldwin IF_ADDR_RUNLOCK(ifp); 699ca925d9cSJonathan Lemon goto ours; 7000aade26eSRobert Watson } 7010ac40133SBrian Somers #endif 702df8bae1dSRodney W. Grimes } 703137f91e8SJohn Baldwin IF_ADDR_RUNLOCK(ifp); 70419e5b0a7SRobert Watson ia = NULL; 705df8bae1dSRodney W. Grimes } 706f8429ca2SBruce M Simpson /* RFC 3927 2.7: Do not forward datagrams for 169.254.0.0/16. */ 707f8429ca2SBruce M Simpson if (IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))) { 70886425c62SRobert Watson IPSTAT_INC(ips_cantforward); 709f8429ca2SBruce M Simpson m_freem(m); 710f8429ca2SBruce M Simpson return; 711f8429ca2SBruce M Simpson } 712df8bae1dSRodney W. Grimes if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { 713603724d3SBjoern A. Zeeb if (V_ip_mrouter) { 714df8bae1dSRodney W. Grimes /* 715df8bae1dSRodney W. Grimes * If we are acting as a multicast router, all 716df8bae1dSRodney W. Grimes * incoming multicast packets are passed to the 717df8bae1dSRodney W. Grimes * kernel-level multicast forwarding function. 718df8bae1dSRodney W. Grimes * The packet is returned (relatively) intact; if 719df8bae1dSRodney W. Grimes * ip_mforward() returns a non-zero value, the packet 720df8bae1dSRodney W. Grimes * must be discarded, else it may be accepted below. 721df8bae1dSRodney W. Grimes */ 7220aade26eSRobert Watson if (ip_mforward && ip_mforward(ip, ifp, m, 0) != 0) { 72386425c62SRobert Watson IPSTAT_INC(ips_cantforward); 724df8bae1dSRodney W. Grimes m_freem(m); 725c67b1d17SGarrett Wollman return; 726df8bae1dSRodney W. Grimes } 727df8bae1dSRodney W. Grimes 728df8bae1dSRodney W. Grimes /* 72911612afaSDima Dorfman * The process-level routing daemon needs to receive 730df8bae1dSRodney W. Grimes * all multicast IGMP packets, whether or not this 731df8bae1dSRodney W. Grimes * host belongs to their destination groups. 732df8bae1dSRodney W. Grimes */ 733df8bae1dSRodney W. Grimes if (ip->ip_p == IPPROTO_IGMP) 734df8bae1dSRodney W. Grimes goto ours; 73586425c62SRobert Watson IPSTAT_INC(ips_forward); 736df8bae1dSRodney W. Grimes } 737df8bae1dSRodney W. Grimes /* 738d10910e6SBruce M Simpson * Assume the packet is for us, to avoid prematurely taking 739d10910e6SBruce M Simpson * a lock on the in_multi hash. Protocols must perform 740d10910e6SBruce M Simpson * their own filtering and update statistics accordingly. 741df8bae1dSRodney W. Grimes */ 742df8bae1dSRodney W. Grimes goto ours; 743df8bae1dSRodney W. Grimes } 744df8bae1dSRodney W. Grimes if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST) 745df8bae1dSRodney W. Grimes goto ours; 746df8bae1dSRodney W. Grimes if (ip->ip_dst.s_addr == INADDR_ANY) 747df8bae1dSRodney W. Grimes goto ours; 748df8bae1dSRodney W. Grimes 7496a800098SYoshinobu Inoue /* 750df8bae1dSRodney W. Grimes * Not for us; forward if possible and desirable. 751df8bae1dSRodney W. Grimes */ 752603724d3SBjoern A. Zeeb if (V_ipforwarding == 0) { 75386425c62SRobert Watson IPSTAT_INC(ips_cantforward); 754df8bae1dSRodney W. Grimes m_freem(m); 755546f251bSChris D. Faulhaber } else { 7569b932e9eSAndre Oppermann ip_forward(m, dchg); 757546f251bSChris D. Faulhaber } 758c67b1d17SGarrett Wollman return; 759df8bae1dSRodney W. Grimes 760df8bae1dSRodney W. Grimes ours: 761d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH 762d0ebc0d2SYaroslav Tykhiy /* 763d0ebc0d2SYaroslav Tykhiy * IPSTEALTH: Process non-routing options only 764d0ebc0d2SYaroslav Tykhiy * if the packet is destined for us. 765d0ebc0d2SYaroslav Tykhiy */ 7667caf4ab7SGleb Smirnoff if (V_ipstealth && hlen > sizeof (struct ip) && ip_dooptions(m, 1)) 767d0ebc0d2SYaroslav Tykhiy return; 768d0ebc0d2SYaroslav Tykhiy #endif /* IPSTEALTH */ 769d0ebc0d2SYaroslav Tykhiy 77063f8d699SJordan K. Hubbard /* 771b6ea1aa5SRuslan Ermilov * Attempt reassembly; if it succeeds, proceed. 772ac9d7e26SMax Laier * ip_reass() will return a different mbuf. 773df8bae1dSRodney W. Grimes */ 7748f134647SGleb Smirnoff if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) { 775aa69c612SGleb Smirnoff /* XXXGL: shouldn't we save & set m_flags? */ 776f0cada84SAndre Oppermann m = ip_reass(m); 777f0cada84SAndre Oppermann if (m == NULL) 778c67b1d17SGarrett Wollman return; 7796a800098SYoshinobu Inoue ip = mtod(m, struct ip *); 7807e2df452SRuslan Ermilov /* Get the header length of the reassembled packet */ 78153be11f6SPoul-Henning Kamp hlen = ip->ip_hl << 2; 782f0cada84SAndre Oppermann } 783f0cada84SAndre Oppermann 784b2630c29SGeorge V. Neville-Neil #ifdef IPSEC 78533841545SHajimu UMEMOTO /* 78633841545SHajimu UMEMOTO * enforce IPsec policy checking if we are seeing last header. 78733841545SHajimu UMEMOTO * note that we do not visit this with protocols with pcb layer 78833841545SHajimu UMEMOTO * code - like udp/tcp/raw ip. 78933841545SHajimu UMEMOTO */ 790e58320f1SAndrey V. Elsukov if (ip_ipsec_input(m, ip->ip_p) != 0) 79133841545SHajimu UMEMOTO goto bad; 792b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */ 79333841545SHajimu UMEMOTO 794df8bae1dSRodney W. Grimes /* 795df8bae1dSRodney W. Grimes * Switch out to protocol's input routine. 796df8bae1dSRodney W. Grimes */ 79786425c62SRobert Watson IPSTAT_INC(ips_delivered); 7989b932e9eSAndre Oppermann 7998f5a8818SKevin Lo (*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p); 800c67b1d17SGarrett Wollman return; 801df8bae1dSRodney W. Grimes bad: 802df8bae1dSRodney W. Grimes m_freem(m); 803c67b1d17SGarrett Wollman } 804c67b1d17SGarrett Wollman 805c67b1d17SGarrett Wollman /* 806df8bae1dSRodney W. Grimes * IP timer processing; 807df8bae1dSRodney W. Grimes * if a timer expires on a reassembly 808df8bae1dSRodney W. Grimes * queue, discard it. 809df8bae1dSRodney W. Grimes */ 810df8bae1dSRodney W. Grimes void 811f2565d68SRobert Watson ip_slowtimo(void) 812df8bae1dSRodney W. Grimes { 8138b615593SMarko Zec VNET_ITERATOR_DECL(vnet_iter); 814df8bae1dSRodney W. Grimes 8155ee847d3SRobert Watson VNET_LIST_RLOCK_NOSLEEP(); 8168b615593SMarko Zec VNET_FOREACH(vnet_iter) { 8178b615593SMarko Zec CURVNET_SET(vnet_iter); 8181dbefcc0SGleb Smirnoff ipreass_slowtimo(); 8198b615593SMarko Zec CURVNET_RESTORE(); 8208b615593SMarko Zec } 8215ee847d3SRobert Watson VNET_LIST_RUNLOCK_NOSLEEP(); 822df8bae1dSRodney W. Grimes } 823df8bae1dSRodney W. Grimes 8249802380eSBjoern A. Zeeb void 8259802380eSBjoern A. Zeeb ip_drain(void) 8269802380eSBjoern A. Zeeb { 8279802380eSBjoern A. Zeeb VNET_ITERATOR_DECL(vnet_iter); 8289802380eSBjoern A. Zeeb 8299802380eSBjoern A. Zeeb VNET_LIST_RLOCK_NOSLEEP(); 8309802380eSBjoern A. Zeeb VNET_FOREACH(vnet_iter) { 8319802380eSBjoern A. Zeeb CURVNET_SET(vnet_iter); 8321dbefcc0SGleb Smirnoff ipreass_drain(); 8338b615593SMarko Zec CURVNET_RESTORE(); 8348b615593SMarko Zec } 8355ee847d3SRobert Watson VNET_LIST_RUNLOCK_NOSLEEP(); 836df8bae1dSRodney W. Grimes } 837df8bae1dSRodney W. Grimes 838df8bae1dSRodney W. Grimes /* 839de38924dSAndre Oppermann * The protocol to be inserted into ip_protox[] must be already registered 840de38924dSAndre Oppermann * in inetsw[], either statically or through pf_proto_register(). 841de38924dSAndre Oppermann */ 842de38924dSAndre Oppermann int 8431b48d245SBjoern A. Zeeb ipproto_register(short ipproto) 844de38924dSAndre Oppermann { 845de38924dSAndre Oppermann struct protosw *pr; 846de38924dSAndre Oppermann 847de38924dSAndre Oppermann /* Sanity checks. */ 8481b48d245SBjoern A. Zeeb if (ipproto <= 0 || ipproto >= IPPROTO_MAX) 849de38924dSAndre Oppermann return (EPROTONOSUPPORT); 850de38924dSAndre Oppermann 851de38924dSAndre Oppermann /* 852de38924dSAndre Oppermann * The protocol slot must not be occupied by another protocol 853de38924dSAndre Oppermann * already. An index pointing to IPPROTO_RAW is unused. 854de38924dSAndre Oppermann */ 855de38924dSAndre Oppermann pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 856de38924dSAndre Oppermann if (pr == NULL) 857de38924dSAndre Oppermann return (EPFNOSUPPORT); 858de38924dSAndre Oppermann if (ip_protox[ipproto] != pr - inetsw) /* IPPROTO_RAW */ 859de38924dSAndre Oppermann return (EEXIST); 860de38924dSAndre Oppermann 861de38924dSAndre Oppermann /* Find the protocol position in inetsw[] and set the index. */ 862de38924dSAndre Oppermann for (pr = inetdomain.dom_protosw; 863de38924dSAndre Oppermann pr < inetdomain.dom_protoswNPROTOSW; pr++) { 864de38924dSAndre Oppermann if (pr->pr_domain->dom_family == PF_INET && 865de38924dSAndre Oppermann pr->pr_protocol && pr->pr_protocol == ipproto) { 866de38924dSAndre Oppermann ip_protox[pr->pr_protocol] = pr - inetsw; 867de38924dSAndre Oppermann return (0); 868de38924dSAndre Oppermann } 869de38924dSAndre Oppermann } 870de38924dSAndre Oppermann return (EPROTONOSUPPORT); 871de38924dSAndre Oppermann } 872de38924dSAndre Oppermann 873de38924dSAndre Oppermann int 8741b48d245SBjoern A. Zeeb ipproto_unregister(short ipproto) 875de38924dSAndre Oppermann { 876de38924dSAndre Oppermann struct protosw *pr; 877de38924dSAndre Oppermann 878de38924dSAndre Oppermann /* Sanity checks. */ 8791b48d245SBjoern A. Zeeb if (ipproto <= 0 || ipproto >= IPPROTO_MAX) 880de38924dSAndre Oppermann return (EPROTONOSUPPORT); 881de38924dSAndre Oppermann 882de38924dSAndre Oppermann /* Check if the protocol was indeed registered. */ 883de38924dSAndre Oppermann pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 884de38924dSAndre Oppermann if (pr == NULL) 885de38924dSAndre Oppermann return (EPFNOSUPPORT); 886de38924dSAndre Oppermann if (ip_protox[ipproto] == pr - inetsw) /* IPPROTO_RAW */ 887de38924dSAndre Oppermann return (ENOENT); 888de38924dSAndre Oppermann 889de38924dSAndre Oppermann /* Reset the protocol slot to IPPROTO_RAW. */ 890de38924dSAndre Oppermann ip_protox[ipproto] = pr - inetsw; 891de38924dSAndre Oppermann return (0); 892de38924dSAndre Oppermann } 893de38924dSAndre Oppermann 894df8bae1dSRodney W. Grimes u_char inetctlerrmap[PRC_NCMDS] = { 895df8bae1dSRodney W. Grimes 0, 0, 0, 0, 896df8bae1dSRodney W. Grimes 0, EMSGSIZE, EHOSTDOWN, EHOSTUNREACH, 897df8bae1dSRodney W. Grimes EHOSTUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED, 898df8bae1dSRodney W. Grimes EMSGSIZE, EHOSTUNREACH, 0, 0, 899fcaf9f91SMike Silbersack 0, 0, EHOSTUNREACH, 0, 9003b8123b7SJesper Skriver ENOPROTOOPT, ECONNREFUSED 901df8bae1dSRodney W. Grimes }; 902df8bae1dSRodney W. Grimes 903df8bae1dSRodney W. Grimes /* 904df8bae1dSRodney W. Grimes * Forward a packet. If some error occurs return the sender 905df8bae1dSRodney W. Grimes * an icmp packet. Note we can't always generate a meaningful 906df8bae1dSRodney W. Grimes * icmp message because icmp doesn't have a large enough repertoire 907df8bae1dSRodney W. Grimes * of codes and types. 908df8bae1dSRodney W. Grimes * 909df8bae1dSRodney W. Grimes * If not forwarding, just drop the packet. This could be confusing 910df8bae1dSRodney W. Grimes * if ipforwarding was zero but some routing protocol was advancing 911df8bae1dSRodney W. Grimes * us as a gateway to somewhere. However, we must let the routing 912df8bae1dSRodney W. Grimes * protocol deal with that. 913df8bae1dSRodney W. Grimes * 914df8bae1dSRodney W. Grimes * The srcrt parameter indicates whether the packet is being forwarded 915df8bae1dSRodney W. Grimes * via a source route. 916df8bae1dSRodney W. Grimes */ 9179b932e9eSAndre Oppermann void 9189b932e9eSAndre Oppermann ip_forward(struct mbuf *m, int srcrt) 919df8bae1dSRodney W. Grimes { 9202b25acc1SLuigi Rizzo struct ip *ip = mtod(m, struct ip *); 921efbad259SEdward Tomasz Napierala struct in_ifaddr *ia; 922df8bae1dSRodney W. Grimes struct mbuf *mcopy; 923d14122b0SErmal Luçi struct sockaddr_in *sin; 9249b932e9eSAndre Oppermann struct in_addr dest; 925b835b6feSBjoern A. Zeeb struct route ro; 926c773494eSAndre Oppermann int error, type = 0, code = 0, mtu = 0; 9273efc3014SJulian Elischer 9289b932e9eSAndre Oppermann if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) { 92986425c62SRobert Watson IPSTAT_INC(ips_cantforward); 930df8bae1dSRodney W. Grimes m_freem(m); 931df8bae1dSRodney W. Grimes return; 932df8bae1dSRodney W. Grimes } 9338922ddbeSAndrey V. Elsukov #ifdef IPSEC 9348922ddbeSAndrey V. Elsukov if (ip_ipsec_fwd(m) != 0) { 9358922ddbeSAndrey V. Elsukov IPSTAT_INC(ips_cantforward); 9368922ddbeSAndrey V. Elsukov m_freem(m); 9378922ddbeSAndrey V. Elsukov return; 9388922ddbeSAndrey V. Elsukov } 9398922ddbeSAndrey V. Elsukov #endif /* IPSEC */ 9401b968362SDag-Erling Smørgrav #ifdef IPSTEALTH 941603724d3SBjoern A. Zeeb if (!V_ipstealth) { 9421b968362SDag-Erling Smørgrav #endif 943df8bae1dSRodney W. Grimes if (ip->ip_ttl <= IPTTLDEC) { 9441b968362SDag-Erling Smørgrav icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 94502c1c707SAndre Oppermann 0, 0); 946df8bae1dSRodney W. Grimes return; 947df8bae1dSRodney W. Grimes } 9481b968362SDag-Erling Smørgrav #ifdef IPSTEALTH 9491b968362SDag-Erling Smørgrav } 9501b968362SDag-Erling Smørgrav #endif 951df8bae1dSRodney W. Grimes 952d14122b0SErmal Luçi bzero(&ro, sizeof(ro)); 953d14122b0SErmal Luçi sin = (struct sockaddr_in *)&ro.ro_dst; 954d14122b0SErmal Luçi sin->sin_family = AF_INET; 955d14122b0SErmal Luçi sin->sin_len = sizeof(*sin); 956d14122b0SErmal Luçi sin->sin_addr = ip->ip_dst; 957d14122b0SErmal Luçi #ifdef RADIX_MPATH 958d14122b0SErmal Luçi rtalloc_mpath_fib(&ro, 959d14122b0SErmal Luçi ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr), 960d14122b0SErmal Luçi M_GETFIB(m)); 961d14122b0SErmal Luçi #else 962d14122b0SErmal Luçi in_rtalloc_ign(&ro, 0, M_GETFIB(m)); 963d14122b0SErmal Luçi #endif 964d14122b0SErmal Luçi if (ro.ro_rt != NULL) { 965d14122b0SErmal Luçi ia = ifatoia(ro.ro_rt->rt_ifa); 966d14122b0SErmal Luçi ifa_ref(&ia->ia_ifa); 96756844a62SErmal Luçi } else 96856844a62SErmal Luçi ia = NULL; 969efbad259SEdward Tomasz Napierala #ifndef IPSEC 970efbad259SEdward Tomasz Napierala /* 971efbad259SEdward Tomasz Napierala * 'ia' may be NULL if there is no route for this destination. 972efbad259SEdward Tomasz Napierala * In case of IPsec, Don't discard it just yet, but pass it to 973efbad259SEdward Tomasz Napierala * ip_output in case of outgoing IPsec policy. 974efbad259SEdward Tomasz Napierala */ 975d23d475fSGuido van Rooij if (!srcrt && ia == NULL) { 97602c1c707SAndre Oppermann icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0); 977d14122b0SErmal Luçi RO_RTFREE(&ro); 978df8bae1dSRodney W. Grimes return; 97902c1c707SAndre Oppermann } 980efbad259SEdward Tomasz Napierala #endif 981df8bae1dSRodney W. Grimes 982df8bae1dSRodney W. Grimes /* 983bfef7ed4SIan Dowse * Save the IP header and at most 8 bytes of the payload, 984bfef7ed4SIan Dowse * in case we need to generate an ICMP message to the src. 985bfef7ed4SIan Dowse * 9864d2e3692SLuigi Rizzo * XXX this can be optimized a lot by saving the data in a local 9874d2e3692SLuigi Rizzo * buffer on the stack (72 bytes at most), and only allocating the 9884d2e3692SLuigi Rizzo * mbuf if really necessary. The vast majority of the packets 9894d2e3692SLuigi Rizzo * are forwarded without having to send an ICMP back (either 9904d2e3692SLuigi Rizzo * because unnecessary, or because rate limited), so we are 9914d2e3692SLuigi Rizzo * really we are wasting a lot of work here. 9924d2e3692SLuigi Rizzo * 993bfef7ed4SIan Dowse * We don't use m_copy() because it might return a reference 994bfef7ed4SIan Dowse * to a shared cluster. Both this function and ip_output() 995bfef7ed4SIan Dowse * assume exclusive access to the IP header in `m', so any 996bfef7ed4SIan Dowse * data in a cluster may change before we reach icmp_error(). 997df8bae1dSRodney W. Grimes */ 998dc4ad05eSGleb Smirnoff mcopy = m_gethdr(M_NOWAIT, m->m_type); 999eb1b1807SGleb Smirnoff if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_NOWAIT)) { 10009967cafcSSam Leffler /* 10019967cafcSSam Leffler * It's probably ok if the pkthdr dup fails (because 10029967cafcSSam Leffler * the deep copy of the tag chain failed), but for now 10039967cafcSSam Leffler * be conservative and just discard the copy since 10049967cafcSSam Leffler * code below may some day want the tags. 10059967cafcSSam Leffler */ 10069967cafcSSam Leffler m_free(mcopy); 10079967cafcSSam Leffler mcopy = NULL; 10089967cafcSSam Leffler } 1009bfef7ed4SIan Dowse if (mcopy != NULL) { 10108f134647SGleb Smirnoff mcopy->m_len = min(ntohs(ip->ip_len), M_TRAILINGSPACE(mcopy)); 1011e6b0a570SBruce M Simpson mcopy->m_pkthdr.len = mcopy->m_len; 1012bfef7ed4SIan Dowse m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t)); 1013bfef7ed4SIan Dowse } 101404287599SRuslan Ermilov 101504287599SRuslan Ermilov #ifdef IPSTEALTH 1016603724d3SBjoern A. Zeeb if (!V_ipstealth) { 101704287599SRuslan Ermilov #endif 101804287599SRuslan Ermilov ip->ip_ttl -= IPTTLDEC; 101904287599SRuslan Ermilov #ifdef IPSTEALTH 102004287599SRuslan Ermilov } 102104287599SRuslan Ermilov #endif 1022df8bae1dSRodney W. Grimes 1023df8bae1dSRodney W. Grimes /* 1024df8bae1dSRodney W. Grimes * If forwarding packet using same interface that it came in on, 1025df8bae1dSRodney W. Grimes * perhaps should send a redirect to sender to shortcut a hop. 1026df8bae1dSRodney W. Grimes * Only send redirect if source is sending directly to us, 1027df8bae1dSRodney W. Grimes * and if packet was not source routed (or has any options). 1028df8bae1dSRodney W. Grimes * Also, don't send redirect if forwarding using a default route 1029df8bae1dSRodney W. Grimes * or a route modified by a redirect. 1030df8bae1dSRodney W. Grimes */ 10319b932e9eSAndre Oppermann dest.s_addr = 0; 1032efbad259SEdward Tomasz Napierala if (!srcrt && V_ipsendredirects && 1033efbad259SEdward Tomasz Napierala ia != NULL && ia->ia_ifp == m->m_pkthdr.rcvif) { 103402c1c707SAndre Oppermann struct rtentry *rt; 103502c1c707SAndre Oppermann 103602c1c707SAndre Oppermann rt = ro.ro_rt; 103702c1c707SAndre Oppermann 103802c1c707SAndre Oppermann if (rt && (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 && 10399b932e9eSAndre Oppermann satosin(rt_key(rt))->sin_addr.s_addr != 0) { 1040df8bae1dSRodney W. Grimes #define RTA(rt) ((struct in_ifaddr *)(rt->rt_ifa)) 1041df8bae1dSRodney W. Grimes u_long src = ntohl(ip->ip_src.s_addr); 1042df8bae1dSRodney W. Grimes 1043df8bae1dSRodney W. Grimes if (RTA(rt) && 1044df8bae1dSRodney W. Grimes (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) { 1045df8bae1dSRodney W. Grimes if (rt->rt_flags & RTF_GATEWAY) 10469b932e9eSAndre Oppermann dest.s_addr = satosin(rt->rt_gateway)->sin_addr.s_addr; 1047df8bae1dSRodney W. Grimes else 10489b932e9eSAndre Oppermann dest.s_addr = ip->ip_dst.s_addr; 1049df8bae1dSRodney W. Grimes /* Router requirements says to only send host redirects */ 1050df8bae1dSRodney W. Grimes type = ICMP_REDIRECT; 1051df8bae1dSRodney W. Grimes code = ICMP_REDIRECT_HOST; 1052df8bae1dSRodney W. Grimes } 1053df8bae1dSRodney W. Grimes } 105402c1c707SAndre Oppermann } 1055df8bae1dSRodney W. Grimes 1056b835b6feSBjoern A. Zeeb error = ip_output(m, NULL, &ro, IP_FORWARDING, NULL, NULL); 1057b835b6feSBjoern A. Zeeb 1058b835b6feSBjoern A. Zeeb if (error == EMSGSIZE && ro.ro_rt) 1059e3a7aa6fSGleb Smirnoff mtu = ro.ro_rt->rt_mtu; 1060bf984051SGleb Smirnoff RO_RTFREE(&ro); 1061b835b6feSBjoern A. Zeeb 1062df8bae1dSRodney W. Grimes if (error) 106386425c62SRobert Watson IPSTAT_INC(ips_cantforward); 1064df8bae1dSRodney W. Grimes else { 106586425c62SRobert Watson IPSTAT_INC(ips_forward); 1066df8bae1dSRodney W. Grimes if (type) 106786425c62SRobert Watson IPSTAT_INC(ips_redirectsent); 1068df8bae1dSRodney W. Grimes else { 10699188b4a1SAndre Oppermann if (mcopy) 1070df8bae1dSRodney W. Grimes m_freem(mcopy); 10718c0fec80SRobert Watson if (ia != NULL) 10728c0fec80SRobert Watson ifa_free(&ia->ia_ifa); 1073df8bae1dSRodney W. Grimes return; 1074df8bae1dSRodney W. Grimes } 1075df8bae1dSRodney W. Grimes } 10768c0fec80SRobert Watson if (mcopy == NULL) { 10778c0fec80SRobert Watson if (ia != NULL) 10788c0fec80SRobert Watson ifa_free(&ia->ia_ifa); 1079df8bae1dSRodney W. Grimes return; 10808c0fec80SRobert Watson } 1081df8bae1dSRodney W. Grimes 1082df8bae1dSRodney W. Grimes switch (error) { 1083df8bae1dSRodney W. Grimes 1084df8bae1dSRodney W. Grimes case 0: /* forwarded, but need redirect */ 1085df8bae1dSRodney W. Grimes /* type, code set above */ 1086df8bae1dSRodney W. Grimes break; 1087df8bae1dSRodney W. Grimes 1088efbad259SEdward Tomasz Napierala case ENETUNREACH: 1089df8bae1dSRodney W. Grimes case EHOSTUNREACH: 1090df8bae1dSRodney W. Grimes case ENETDOWN: 1091df8bae1dSRodney W. Grimes case EHOSTDOWN: 1092df8bae1dSRodney W. Grimes default: 1093df8bae1dSRodney W. Grimes type = ICMP_UNREACH; 1094df8bae1dSRodney W. Grimes code = ICMP_UNREACH_HOST; 1095df8bae1dSRodney W. Grimes break; 1096df8bae1dSRodney W. Grimes 1097df8bae1dSRodney W. Grimes case EMSGSIZE: 1098df8bae1dSRodney W. Grimes type = ICMP_UNREACH; 1099df8bae1dSRodney W. Grimes code = ICMP_UNREACH_NEEDFRAG; 11001dfcf0d2SAndre Oppermann 1101b2630c29SGeorge V. Neville-Neil #ifdef IPSEC 1102b835b6feSBjoern A. Zeeb /* 1103b835b6feSBjoern A. Zeeb * If IPsec is configured for this path, 1104b835b6feSBjoern A. Zeeb * override any possibly mtu value set by ip_output. 1105b835b6feSBjoern A. Zeeb */ 11061c044382SBjoern A. Zeeb mtu = ip_ipsec_mtu(mcopy, mtu); 1107b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */ 11089b932e9eSAndre Oppermann /* 1109b835b6feSBjoern A. Zeeb * If the MTU was set before make sure we are below the 1110b835b6feSBjoern A. Zeeb * interface MTU. 1111ab48768bSAndre Oppermann * If the MTU wasn't set before use the interface mtu or 1112ab48768bSAndre Oppermann * fall back to the next smaller mtu step compared to the 1113ab48768bSAndre Oppermann * current packet size. 11149b932e9eSAndre Oppermann */ 1115b835b6feSBjoern A. Zeeb if (mtu != 0) { 1116b835b6feSBjoern A. Zeeb if (ia != NULL) 1117b835b6feSBjoern A. Zeeb mtu = min(mtu, ia->ia_ifp->if_mtu); 1118b835b6feSBjoern A. Zeeb } else { 1119ab48768bSAndre Oppermann if (ia != NULL) 1120c773494eSAndre Oppermann mtu = ia->ia_ifp->if_mtu; 1121ab48768bSAndre Oppermann else 11228f134647SGleb Smirnoff mtu = ip_next_mtu(ntohs(ip->ip_len), 0); 1123ab48768bSAndre Oppermann } 112486425c62SRobert Watson IPSTAT_INC(ips_cantfrag); 1125df8bae1dSRodney W. Grimes break; 1126df8bae1dSRodney W. Grimes 1127df8bae1dSRodney W. Grimes case ENOBUFS: 11283a06e3e0SRuslan Ermilov case EACCES: /* ipfw denied packet */ 11293a06e3e0SRuslan Ermilov m_freem(mcopy); 11308c0fec80SRobert Watson if (ia != NULL) 11318c0fec80SRobert Watson ifa_free(&ia->ia_ifa); 11323a06e3e0SRuslan Ermilov return; 1133df8bae1dSRodney W. Grimes } 11348c0fec80SRobert Watson if (ia != NULL) 11358c0fec80SRobert Watson ifa_free(&ia->ia_ifa); 1136c773494eSAndre Oppermann icmp_error(mcopy, type, code, dest.s_addr, mtu); 1137df8bae1dSRodney W. Grimes } 1138df8bae1dSRodney W. Grimes 113982c23ebaSBill Fenner void 1140f2565d68SRobert Watson ip_savecontrol(struct inpcb *inp, struct mbuf **mp, struct ip *ip, 1141f2565d68SRobert Watson struct mbuf *m) 114282c23ebaSBill Fenner { 11438b615593SMarko Zec 1144be8a62e8SPoul-Henning Kamp if (inp->inp_socket->so_options & (SO_BINTIME | SO_TIMESTAMP)) { 1145be8a62e8SPoul-Henning Kamp struct bintime bt; 1146be8a62e8SPoul-Henning Kamp 1147be8a62e8SPoul-Henning Kamp bintime(&bt); 1148be8a62e8SPoul-Henning Kamp if (inp->inp_socket->so_options & SO_BINTIME) { 1149be8a62e8SPoul-Henning Kamp *mp = sbcreatecontrol((caddr_t)&bt, sizeof(bt), 1150be8a62e8SPoul-Henning Kamp SCM_BINTIME, SOL_SOCKET); 1151be8a62e8SPoul-Henning Kamp if (*mp) 1152be8a62e8SPoul-Henning Kamp mp = &(*mp)->m_next; 1153be8a62e8SPoul-Henning Kamp } 115482c23ebaSBill Fenner if (inp->inp_socket->so_options & SO_TIMESTAMP) { 115582c23ebaSBill Fenner struct timeval tv; 115682c23ebaSBill Fenner 1157be8a62e8SPoul-Henning Kamp bintime2timeval(&bt, &tv); 115882c23ebaSBill Fenner *mp = sbcreatecontrol((caddr_t)&tv, sizeof(tv), 115982c23ebaSBill Fenner SCM_TIMESTAMP, SOL_SOCKET); 116082c23ebaSBill Fenner if (*mp) 116182c23ebaSBill Fenner mp = &(*mp)->m_next; 11624cc20ab1SSeigo Tanimura } 1163be8a62e8SPoul-Henning Kamp } 116482c23ebaSBill Fenner if (inp->inp_flags & INP_RECVDSTADDR) { 116582c23ebaSBill Fenner *mp = sbcreatecontrol((caddr_t)&ip->ip_dst, 116682c23ebaSBill Fenner sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP); 116782c23ebaSBill Fenner if (*mp) 116882c23ebaSBill Fenner mp = &(*mp)->m_next; 116982c23ebaSBill Fenner } 11704957466bSMatthew N. Dodd if (inp->inp_flags & INP_RECVTTL) { 11714957466bSMatthew N. Dodd *mp = sbcreatecontrol((caddr_t)&ip->ip_ttl, 11724957466bSMatthew N. Dodd sizeof(u_char), IP_RECVTTL, IPPROTO_IP); 11734957466bSMatthew N. Dodd if (*mp) 11744957466bSMatthew N. Dodd mp = &(*mp)->m_next; 11754957466bSMatthew N. Dodd } 117682c23ebaSBill Fenner #ifdef notyet 117782c23ebaSBill Fenner /* XXX 117882c23ebaSBill Fenner * Moving these out of udp_input() made them even more broken 117982c23ebaSBill Fenner * than they already were. 118082c23ebaSBill Fenner */ 118182c23ebaSBill Fenner /* options were tossed already */ 118282c23ebaSBill Fenner if (inp->inp_flags & INP_RECVOPTS) { 118382c23ebaSBill Fenner *mp = sbcreatecontrol((caddr_t)opts_deleted_above, 118482c23ebaSBill Fenner sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP); 118582c23ebaSBill Fenner if (*mp) 118682c23ebaSBill Fenner mp = &(*mp)->m_next; 118782c23ebaSBill Fenner } 118882c23ebaSBill Fenner /* ip_srcroute doesn't do what we want here, need to fix */ 118982c23ebaSBill Fenner if (inp->inp_flags & INP_RECVRETOPTS) { 1190e0982661SAndre Oppermann *mp = sbcreatecontrol((caddr_t)ip_srcroute(m), 119182c23ebaSBill Fenner sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP); 119282c23ebaSBill Fenner if (*mp) 119382c23ebaSBill Fenner mp = &(*mp)->m_next; 119482c23ebaSBill Fenner } 119582c23ebaSBill Fenner #endif 119682c23ebaSBill Fenner if (inp->inp_flags & INP_RECVIF) { 1197d314ad7bSJulian Elischer struct ifnet *ifp; 1198d314ad7bSJulian Elischer struct sdlbuf { 119982c23ebaSBill Fenner struct sockaddr_dl sdl; 1200d314ad7bSJulian Elischer u_char pad[32]; 1201d314ad7bSJulian Elischer } sdlbuf; 1202d314ad7bSJulian Elischer struct sockaddr_dl *sdp; 1203d314ad7bSJulian Elischer struct sockaddr_dl *sdl2 = &sdlbuf.sdl; 120482c23ebaSBill Fenner 120546f2df9cSSergey Kandaurov if ((ifp = m->m_pkthdr.rcvif) && 120646f2df9cSSergey Kandaurov ifp->if_index && ifp->if_index <= V_if_index) { 12074a0d6638SRuslan Ermilov sdp = (struct sockaddr_dl *)ifp->if_addr->ifa_addr; 1208d314ad7bSJulian Elischer /* 1209d314ad7bSJulian Elischer * Change our mind and don't try copy. 1210d314ad7bSJulian Elischer */ 121146f2df9cSSergey Kandaurov if (sdp->sdl_family != AF_LINK || 121246f2df9cSSergey Kandaurov sdp->sdl_len > sizeof(sdlbuf)) { 1213d314ad7bSJulian Elischer goto makedummy; 1214d314ad7bSJulian Elischer } 1215d314ad7bSJulian Elischer bcopy(sdp, sdl2, sdp->sdl_len); 1216d314ad7bSJulian Elischer } else { 1217d314ad7bSJulian Elischer makedummy: 121846f2df9cSSergey Kandaurov sdl2->sdl_len = 121946f2df9cSSergey Kandaurov offsetof(struct sockaddr_dl, sdl_data[0]); 1220d314ad7bSJulian Elischer sdl2->sdl_family = AF_LINK; 1221d314ad7bSJulian Elischer sdl2->sdl_index = 0; 1222d314ad7bSJulian Elischer sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0; 1223d314ad7bSJulian Elischer } 1224d314ad7bSJulian Elischer *mp = sbcreatecontrol((caddr_t)sdl2, sdl2->sdl_len, 122582c23ebaSBill Fenner IP_RECVIF, IPPROTO_IP); 122682c23ebaSBill Fenner if (*mp) 122782c23ebaSBill Fenner mp = &(*mp)->m_next; 122882c23ebaSBill Fenner } 12293cca425bSMichael Tuexen if (inp->inp_flags & INP_RECVTOS) { 12303cca425bSMichael Tuexen *mp = sbcreatecontrol((caddr_t)&ip->ip_tos, 12313cca425bSMichael Tuexen sizeof(u_char), IP_RECVTOS, IPPROTO_IP); 12323cca425bSMichael Tuexen if (*mp) 12333cca425bSMichael Tuexen mp = &(*mp)->m_next; 12343cca425bSMichael Tuexen } 12359d3ddf43SAdrian Chadd 12369d3ddf43SAdrian Chadd if (inp->inp_flags2 & INP_RECVFLOWID) { 12379d3ddf43SAdrian Chadd uint32_t flowid, flow_type; 12389d3ddf43SAdrian Chadd 12399d3ddf43SAdrian Chadd flowid = m->m_pkthdr.flowid; 12409d3ddf43SAdrian Chadd flow_type = M_HASHTYPE_GET(m); 12419d3ddf43SAdrian Chadd 12429d3ddf43SAdrian Chadd /* 12439d3ddf43SAdrian Chadd * XXX should handle the failure of one or the 12449d3ddf43SAdrian Chadd * other - don't populate both? 12459d3ddf43SAdrian Chadd */ 12469d3ddf43SAdrian Chadd *mp = sbcreatecontrol((caddr_t) &flowid, 12479d3ddf43SAdrian Chadd sizeof(uint32_t), IP_FLOWID, IPPROTO_IP); 12489d3ddf43SAdrian Chadd if (*mp) 12499d3ddf43SAdrian Chadd mp = &(*mp)->m_next; 12509d3ddf43SAdrian Chadd *mp = sbcreatecontrol((caddr_t) &flow_type, 12519d3ddf43SAdrian Chadd sizeof(uint32_t), IP_FLOWTYPE, IPPROTO_IP); 12529d3ddf43SAdrian Chadd if (*mp) 12539d3ddf43SAdrian Chadd mp = &(*mp)->m_next; 12549d3ddf43SAdrian Chadd } 12559d3ddf43SAdrian Chadd 12569d3ddf43SAdrian Chadd #ifdef RSS 12579d3ddf43SAdrian Chadd if (inp->inp_flags2 & INP_RECVRSSBUCKETID) { 12589d3ddf43SAdrian Chadd uint32_t flowid, flow_type; 12599d3ddf43SAdrian Chadd uint32_t rss_bucketid; 12609d3ddf43SAdrian Chadd 12619d3ddf43SAdrian Chadd flowid = m->m_pkthdr.flowid; 12629d3ddf43SAdrian Chadd flow_type = M_HASHTYPE_GET(m); 12639d3ddf43SAdrian Chadd 12649d3ddf43SAdrian Chadd if (rss_hash2bucket(flowid, flow_type, &rss_bucketid) == 0) { 12659d3ddf43SAdrian Chadd *mp = sbcreatecontrol((caddr_t) &rss_bucketid, 12669d3ddf43SAdrian Chadd sizeof(uint32_t), IP_RSSBUCKETID, IPPROTO_IP); 12679d3ddf43SAdrian Chadd if (*mp) 12689d3ddf43SAdrian Chadd mp = &(*mp)->m_next; 12699d3ddf43SAdrian Chadd } 12709d3ddf43SAdrian Chadd } 12719d3ddf43SAdrian Chadd #endif 127282c23ebaSBill Fenner } 127382c23ebaSBill Fenner 12744d2e3692SLuigi Rizzo /* 127530916a2dSRobert Watson * XXXRW: Multicast routing code in ip_mroute.c is generally MPSAFE, but the 127630916a2dSRobert Watson * ip_rsvp and ip_rsvp_on variables need to be interlocked with rsvp_on 127730916a2dSRobert Watson * locking. This code remains in ip_input.c as ip_mroute.c is optionally 127830916a2dSRobert Watson * compiled. 12794d2e3692SLuigi Rizzo */ 12803e288e62SDimitry Andric static VNET_DEFINE(int, ip_rsvp_on); 128182cea7e6SBjoern A. Zeeb VNET_DEFINE(struct socket *, ip_rsvpd); 128282cea7e6SBjoern A. Zeeb 128382cea7e6SBjoern A. Zeeb #define V_ip_rsvp_on VNET(ip_rsvp_on) 128482cea7e6SBjoern A. Zeeb 1285df8bae1dSRodney W. Grimes int 1286f0068c4aSGarrett Wollman ip_rsvp_init(struct socket *so) 1287f0068c4aSGarrett Wollman { 12888b615593SMarko Zec 1289f0068c4aSGarrett Wollman if (so->so_type != SOCK_RAW || 1290f0068c4aSGarrett Wollman so->so_proto->pr_protocol != IPPROTO_RSVP) 1291f0068c4aSGarrett Wollman return EOPNOTSUPP; 1292f0068c4aSGarrett Wollman 1293603724d3SBjoern A. Zeeb if (V_ip_rsvpd != NULL) 1294f0068c4aSGarrett Wollman return EADDRINUSE; 1295f0068c4aSGarrett Wollman 1296603724d3SBjoern A. Zeeb V_ip_rsvpd = so; 12971c5de19aSGarrett Wollman /* 12981c5de19aSGarrett Wollman * This may seem silly, but we need to be sure we don't over-increment 12991c5de19aSGarrett Wollman * the RSVP counter, in case something slips up. 13001c5de19aSGarrett Wollman */ 1301603724d3SBjoern A. Zeeb if (!V_ip_rsvp_on) { 1302603724d3SBjoern A. Zeeb V_ip_rsvp_on = 1; 1303603724d3SBjoern A. Zeeb V_rsvp_on++; 13041c5de19aSGarrett Wollman } 1305f0068c4aSGarrett Wollman 1306f0068c4aSGarrett Wollman return 0; 1307f0068c4aSGarrett Wollman } 1308f0068c4aSGarrett Wollman 1309f0068c4aSGarrett Wollman int 1310f0068c4aSGarrett Wollman ip_rsvp_done(void) 1311f0068c4aSGarrett Wollman { 13128b615593SMarko Zec 1313603724d3SBjoern A. Zeeb V_ip_rsvpd = NULL; 13141c5de19aSGarrett Wollman /* 13151c5de19aSGarrett Wollman * This may seem silly, but we need to be sure we don't over-decrement 13161c5de19aSGarrett Wollman * the RSVP counter, in case something slips up. 13171c5de19aSGarrett Wollman */ 1318603724d3SBjoern A. Zeeb if (V_ip_rsvp_on) { 1319603724d3SBjoern A. Zeeb V_ip_rsvp_on = 0; 1320603724d3SBjoern A. Zeeb V_rsvp_on--; 13211c5de19aSGarrett Wollman } 1322f0068c4aSGarrett Wollman return 0; 1323f0068c4aSGarrett Wollman } 1324bbb4330bSLuigi Rizzo 13258f5a8818SKevin Lo int 13268f5a8818SKevin Lo rsvp_input(struct mbuf **mp, int *offp, int proto) 1327bbb4330bSLuigi Rizzo { 13288f5a8818SKevin Lo struct mbuf *m; 13298f5a8818SKevin Lo 13308f5a8818SKevin Lo m = *mp; 13318f5a8818SKevin Lo *mp = NULL; 13328b615593SMarko Zec 1333bbb4330bSLuigi Rizzo if (rsvp_input_p) { /* call the real one if loaded */ 13348f5a8818SKevin Lo *mp = m; 13358f5a8818SKevin Lo rsvp_input_p(mp, offp, proto); 13368f5a8818SKevin Lo return (IPPROTO_DONE); 1337bbb4330bSLuigi Rizzo } 1338bbb4330bSLuigi Rizzo 1339bbb4330bSLuigi Rizzo /* Can still get packets with rsvp_on = 0 if there is a local member 1340bbb4330bSLuigi Rizzo * of the group to which the RSVP packet is addressed. But in this 1341bbb4330bSLuigi Rizzo * case we want to throw the packet away. 1342bbb4330bSLuigi Rizzo */ 1343bbb4330bSLuigi Rizzo 1344603724d3SBjoern A. Zeeb if (!V_rsvp_on) { 1345bbb4330bSLuigi Rizzo m_freem(m); 13468f5a8818SKevin Lo return (IPPROTO_DONE); 1347bbb4330bSLuigi Rizzo } 1348bbb4330bSLuigi Rizzo 1349603724d3SBjoern A. Zeeb if (V_ip_rsvpd != NULL) { 13508f5a8818SKevin Lo *mp = m; 13518f5a8818SKevin Lo rip_input(mp, offp, proto); 13528f5a8818SKevin Lo return (IPPROTO_DONE); 1353bbb4330bSLuigi Rizzo } 1354bbb4330bSLuigi Rizzo /* Drop the packet */ 1355bbb4330bSLuigi Rizzo m_freem(m); 13568f5a8818SKevin Lo return (IPPROTO_DONE); 1357bbb4330bSLuigi Rizzo } 1358