1c398230bSWarner Losh /*- 2df8bae1dSRodney W. Grimes * Copyright (c) 1982, 1986, 1988, 1993 3df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 4df8bae1dSRodney W. Grimes * 5df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 6df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 7df8bae1dSRodney W. Grimes * are met: 8df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 9df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 10df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 11df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 12df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 13df8bae1dSRodney W. Grimes * 4. Neither the name of the University nor the names of its contributors 14df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 15df8bae1dSRodney W. Grimes * without specific prior written permission. 16df8bae1dSRodney W. Grimes * 17df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27df8bae1dSRodney W. Grimes * SUCH DAMAGE. 28df8bae1dSRodney W. Grimes * 29df8bae1dSRodney W. Grimes * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 30df8bae1dSRodney W. Grimes */ 31df8bae1dSRodney W. Grimes 324b421e2dSMike Silbersack #include <sys/cdefs.h> 334b421e2dSMike Silbersack __FBSDID("$FreeBSD$"); 344b421e2dSMike Silbersack 350ac40133SBrian Somers #include "opt_bootp.h" 3674a9466cSGary Palmer #include "opt_ipfw.h" 3727108a15SDag-Erling Smørgrav #include "opt_ipstealth.h" 386a800098SYoshinobu Inoue #include "opt_ipsec.h" 3933553d6eSBjoern A. Zeeb #include "opt_route.h" 40b8bc95cdSAdrian Chadd #include "opt_rss.h" 4174a9466cSGary Palmer 42df8bae1dSRodney W. Grimes #include <sys/param.h> 43df8bae1dSRodney W. Grimes #include <sys/systm.h> 44df8bae1dSRodney W. Grimes #include <sys/mbuf.h> 45b715f178SLuigi Rizzo #include <sys/malloc.h> 46df8bae1dSRodney W. Grimes #include <sys/domain.h> 47df8bae1dSRodney W. Grimes #include <sys/protosw.h> 48df8bae1dSRodney W. Grimes #include <sys/socket.h> 49df8bae1dSRodney W. Grimes #include <sys/time.h> 50df8bae1dSRodney W. Grimes #include <sys/kernel.h> 51385195c0SMarko Zec #include <sys/lock.h> 52385195c0SMarko Zec #include <sys/rwlock.h> 5357f60867SMark Johnston #include <sys/sdt.h> 541025071fSGarrett Wollman #include <sys/syslog.h> 55b5e8ce9fSBruce Evans #include <sys/sysctl.h> 56df8bae1dSRodney W. Grimes 57c85540ddSAndrey A. Chernov #include <net/pfil.h> 58df8bae1dSRodney W. Grimes #include <net/if.h> 599494d596SBrooks Davis #include <net/if_types.h> 60d314ad7bSJulian Elischer #include <net/if_var.h> 6182c23ebaSBill Fenner #include <net/if_dl.h> 62df8bae1dSRodney W. Grimes #include <net/route.h> 63748e0b0aSGarrett Wollman #include <net/netisr.h> 64b2bdc62aSAdrian Chadd #include <net/rss_config.h> 654b79449eSBjoern A. Zeeb #include <net/vnet.h> 66df8bae1dSRodney W. Grimes 67df8bae1dSRodney W. Grimes #include <netinet/in.h> 6857f60867SMark Johnston #include <netinet/in_kdtrace.h> 69df8bae1dSRodney W. Grimes #include <netinet/in_systm.h> 70b5e8ce9fSBruce Evans #include <netinet/in_var.h> 71df8bae1dSRodney W. Grimes #include <netinet/ip.h> 72df8bae1dSRodney W. Grimes #include <netinet/in_pcb.h> 73df8bae1dSRodney W. Grimes #include <netinet/ip_var.h> 74eddfbb76SRobert Watson #include <netinet/ip_fw.h> 75df8bae1dSRodney W. Grimes #include <netinet/ip_icmp.h> 76ef39adf0SAndre Oppermann #include <netinet/ip_options.h> 7758938916SGarrett Wollman #include <machine/in_cksum.h> 78a9771948SGleb Smirnoff #include <netinet/ip_carp.h> 79b2630c29SGeorge V. Neville-Neil #ifdef IPSEC 801dfcf0d2SAndre Oppermann #include <netinet/ip_ipsec.h> 81b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */ 82b8bc95cdSAdrian Chadd #include <netinet/in_rss.h> 83df8bae1dSRodney W. Grimes 84f0068c4aSGarrett Wollman #include <sys/socketvar.h> 856ddbf1e2SGary Palmer 86aed55708SRobert Watson #include <security/mac/mac_framework.h> 87aed55708SRobert Watson 88d2035ffbSEd Maste #ifdef CTASSERT 89d2035ffbSEd Maste CTASSERT(sizeof(struct ip) == 20); 90d2035ffbSEd Maste #endif 91d2035ffbSEd Maste 921dbefcc0SGleb Smirnoff /* IP reassembly functions are defined in ip_reass.c. */ 93843b0e57SXin LI extern void ipreass_init(void); 94843b0e57SXin LI extern void ipreass_drain(void); 95843b0e57SXin LI extern void ipreass_slowtimo(void); 961dbefcc0SGleb Smirnoff #ifdef VIMAGE 97843b0e57SXin LI extern void ipreass_destroy(void); 981dbefcc0SGleb Smirnoff #endif 991dbefcc0SGleb Smirnoff 100f89d4c3aSAndre Oppermann struct rwlock in_ifaddr_lock; 10164aeca7bSRobert Watson RW_SYSINIT(in_ifaddr_lock, &in_ifaddr_lock, "in_ifaddr_lock"); 102f0068c4aSGarrett Wollman 10382cea7e6SBjoern A. Zeeb VNET_DEFINE(int, rsvp_on); 10482cea7e6SBjoern A. Zeeb 10582cea7e6SBjoern A. Zeeb VNET_DEFINE(int, ipforwarding); 1066df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_VNET | CTLFLAG_RW, 107eddfbb76SRobert Watson &VNET_NAME(ipforwarding), 0, 1088b615593SMarko Zec "Enable IP forwarding between interfaces"); 1090312fbe9SPoul-Henning Kamp 1103e288e62SDimitry Andric static VNET_DEFINE(int, ipsendredirects) = 1; /* XXX */ 11182cea7e6SBjoern A. Zeeb #define V_ipsendredirects VNET(ipsendredirects) 1126df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_VNET | CTLFLAG_RW, 113eddfbb76SRobert Watson &VNET_NAME(ipsendredirects), 0, 1148b615593SMarko Zec "Enable sending IP redirects"); 1150312fbe9SPoul-Henning Kamp 116823db0e9SDon Lewis /* 117823db0e9SDon Lewis * XXX - Setting ip_checkinterface mostly implements the receive side of 118823db0e9SDon Lewis * the Strong ES model described in RFC 1122, but since the routing table 119a8f12100SDon Lewis * and transmit implementation do not implement the Strong ES model, 120823db0e9SDon Lewis * setting this to 1 results in an odd hybrid. 1213f67c834SDon Lewis * 122a8f12100SDon Lewis * XXX - ip_checkinterface currently must be disabled if you use ipnat 123a8f12100SDon Lewis * to translate the destination address to another local interface. 1243f67c834SDon Lewis * 1253f67c834SDon Lewis * XXX - ip_checkinterface must be disabled if you add IP aliases 1263f67c834SDon Lewis * to the loopback interface instead of the interface where the 1273f67c834SDon Lewis * packets for those addresses are received. 128823db0e9SDon Lewis */ 1293e288e62SDimitry Andric static VNET_DEFINE(int, ip_checkinterface); 13082cea7e6SBjoern A. Zeeb #define V_ip_checkinterface VNET(ip_checkinterface) 1316df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_VNET | CTLFLAG_RW, 132eddfbb76SRobert Watson &VNET_NAME(ip_checkinterface), 0, 1338b615593SMarko Zec "Verify packet arrives on correct interface"); 134b3e95d4eSJonathan Lemon 1350b4b0b0fSJulian Elischer VNET_DEFINE(struct pfil_head, inet_pfil_hook); /* Packet filter hooks */ 136df8bae1dSRodney W. Grimes 137d4b5cae4SRobert Watson static struct netisr_handler ip_nh = { 138d4b5cae4SRobert Watson .nh_name = "ip", 139d4b5cae4SRobert Watson .nh_handler = ip_input, 140d4b5cae4SRobert Watson .nh_proto = NETISR_IP, 141b8bc95cdSAdrian Chadd #ifdef RSS 142b8bc95cdSAdrian Chadd .nh_m2cpuid = rss_soft_m2cpuid, 143b8bc95cdSAdrian Chadd .nh_policy = NETISR_POLICY_CPU, 144b8bc95cdSAdrian Chadd .nh_dispatch = NETISR_DISPATCH_HYBRID, 145b8bc95cdSAdrian Chadd #else 146d4b5cae4SRobert Watson .nh_policy = NETISR_POLICY_FLOW, 147b8bc95cdSAdrian Chadd #endif 148d4b5cae4SRobert Watson }; 149ca925d9cSJonathan Lemon 150b8bc95cdSAdrian Chadd #ifdef RSS 151b8bc95cdSAdrian Chadd /* 152b8bc95cdSAdrian Chadd * Directly dispatched frames are currently assumed 153b8bc95cdSAdrian Chadd * to have a flowid already calculated. 154b8bc95cdSAdrian Chadd * 155b8bc95cdSAdrian Chadd * It should likely have something that assert it 156b8bc95cdSAdrian Chadd * actually has valid flow details. 157b8bc95cdSAdrian Chadd */ 158b8bc95cdSAdrian Chadd static struct netisr_handler ip_direct_nh = { 159b8bc95cdSAdrian Chadd .nh_name = "ip_direct", 160b8bc95cdSAdrian Chadd .nh_handler = ip_direct_input, 161b8bc95cdSAdrian Chadd .nh_proto = NETISR_IP_DIRECT, 162b8bc95cdSAdrian Chadd .nh_m2cpuid = rss_m2cpuid, 163b8bc95cdSAdrian Chadd .nh_policy = NETISR_POLICY_CPU, 164b8bc95cdSAdrian Chadd .nh_dispatch = NETISR_DISPATCH_HYBRID, 165b8bc95cdSAdrian Chadd }; 166b8bc95cdSAdrian Chadd #endif 167b8bc95cdSAdrian Chadd 168df8bae1dSRodney W. Grimes extern struct domain inetdomain; 169f0ffb944SJulian Elischer extern struct protosw inetsw[]; 170df8bae1dSRodney W. Grimes u_char ip_protox[IPPROTO_MAX]; 17182cea7e6SBjoern A. Zeeb VNET_DEFINE(struct in_ifaddrhead, in_ifaddrhead); /* first inet address */ 17282cea7e6SBjoern A. Zeeb VNET_DEFINE(struct in_ifaddrhashhead *, in_ifaddrhashtbl); /* inet addr hash table */ 17382cea7e6SBjoern A. Zeeb VNET_DEFINE(u_long, in_ifaddrhmask); /* mask for hash table */ 174ca925d9cSJonathan Lemon 1750312fbe9SPoul-Henning Kamp #ifdef IPCTL_DEFMTU 1760312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW, 1773d177f46SBill Fumerola &ip_mtu, 0, "Default MTU"); 1780312fbe9SPoul-Henning Kamp #endif 1790312fbe9SPoul-Henning Kamp 1801b968362SDag-Erling Smørgrav #ifdef IPSTEALTH 18182cea7e6SBjoern A. Zeeb VNET_DEFINE(int, ipstealth); 1826df8a710SGleb Smirnoff SYSCTL_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_VNET | CTLFLAG_RW, 183eddfbb76SRobert Watson &VNET_NAME(ipstealth), 0, 184eddfbb76SRobert Watson "IP stealth mode, no TTL decrementation on forwarding"); 1851b968362SDag-Erling Smørgrav #endif 186eddfbb76SRobert Watson 187315e3e38SRobert Watson /* 1885da0521fSAndrey V. Elsukov * IP statistics are stored in the "array" of counter(9)s. 1895923c293SGleb Smirnoff */ 1905da0521fSAndrey V. Elsukov VNET_PCPUSTAT_DEFINE(struct ipstat, ipstat); 1915da0521fSAndrey V. Elsukov VNET_PCPUSTAT_SYSINIT(ipstat); 1925da0521fSAndrey V. Elsukov SYSCTL_VNET_PCPUSTAT(_net_inet_ip, IPCTL_STATS, stats, struct ipstat, ipstat, 1935da0521fSAndrey V. Elsukov "IP statistics (struct ipstat, netinet/ip_var.h)"); 1945923c293SGleb Smirnoff 1955923c293SGleb Smirnoff #ifdef VIMAGE 1965da0521fSAndrey V. Elsukov VNET_PCPUSTAT_SYSUNINIT(ipstat); 1975923c293SGleb Smirnoff #endif /* VIMAGE */ 1985923c293SGleb Smirnoff 1995923c293SGleb Smirnoff /* 200315e3e38SRobert Watson * Kernel module interface for updating ipstat. The argument is an index 2015923c293SGleb Smirnoff * into ipstat treated as an array. 202315e3e38SRobert Watson */ 203315e3e38SRobert Watson void 204315e3e38SRobert Watson kmod_ipstat_inc(int statnum) 205315e3e38SRobert Watson { 206315e3e38SRobert Watson 2075da0521fSAndrey V. Elsukov counter_u64_add(VNET(ipstat)[statnum], 1); 208315e3e38SRobert Watson } 209315e3e38SRobert Watson 210315e3e38SRobert Watson void 211315e3e38SRobert Watson kmod_ipstat_dec(int statnum) 212315e3e38SRobert Watson { 213315e3e38SRobert Watson 2145da0521fSAndrey V. Elsukov counter_u64_add(VNET(ipstat)[statnum], -1); 215315e3e38SRobert Watson } 216315e3e38SRobert Watson 217d4b5cae4SRobert Watson static int 218d4b5cae4SRobert Watson sysctl_netinet_intr_queue_maxlen(SYSCTL_HANDLER_ARGS) 219d4b5cae4SRobert Watson { 220d4b5cae4SRobert Watson int error, qlimit; 221d4b5cae4SRobert Watson 222d4b5cae4SRobert Watson netisr_getqlimit(&ip_nh, &qlimit); 223d4b5cae4SRobert Watson error = sysctl_handle_int(oidp, &qlimit, 0, req); 224d4b5cae4SRobert Watson if (error || !req->newptr) 225d4b5cae4SRobert Watson return (error); 226d4b5cae4SRobert Watson if (qlimit < 1) 227d4b5cae4SRobert Watson return (EINVAL); 228d4b5cae4SRobert Watson return (netisr_setqlimit(&ip_nh, qlimit)); 229d4b5cae4SRobert Watson } 230d4b5cae4SRobert Watson SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen, 231d4b5cae4SRobert Watson CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet_intr_queue_maxlen, "I", 232d4b5cae4SRobert Watson "Maximum size of the IP input queue"); 233d4b5cae4SRobert Watson 234d4b5cae4SRobert Watson static int 235d4b5cae4SRobert Watson sysctl_netinet_intr_queue_drops(SYSCTL_HANDLER_ARGS) 236d4b5cae4SRobert Watson { 237d4b5cae4SRobert Watson u_int64_t qdrops_long; 238d4b5cae4SRobert Watson int error, qdrops; 239d4b5cae4SRobert Watson 240d4b5cae4SRobert Watson netisr_getqdrops(&ip_nh, &qdrops_long); 241d4b5cae4SRobert Watson qdrops = qdrops_long; 242d4b5cae4SRobert Watson error = sysctl_handle_int(oidp, &qdrops, 0, req); 243d4b5cae4SRobert Watson if (error || !req->newptr) 244d4b5cae4SRobert Watson return (error); 245d4b5cae4SRobert Watson if (qdrops != 0) 246d4b5cae4SRobert Watson return (EINVAL); 247d4b5cae4SRobert Watson netisr_clearqdrops(&ip_nh); 248d4b5cae4SRobert Watson return (0); 249d4b5cae4SRobert Watson } 250d4b5cae4SRobert Watson 251d4b5cae4SRobert Watson SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops, 252d4b5cae4SRobert Watson CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_netinet_intr_queue_drops, "I", 253d4b5cae4SRobert Watson "Number of packets dropped from the IP input queue"); 254d4b5cae4SRobert Watson 255b8bc95cdSAdrian Chadd #ifdef RSS 256b8bc95cdSAdrian Chadd static int 257b8bc95cdSAdrian Chadd sysctl_netinet_intr_direct_queue_maxlen(SYSCTL_HANDLER_ARGS) 258b8bc95cdSAdrian Chadd { 259b8bc95cdSAdrian Chadd int error, qlimit; 260b8bc95cdSAdrian Chadd 261b8bc95cdSAdrian Chadd netisr_getqlimit(&ip_direct_nh, &qlimit); 262b8bc95cdSAdrian Chadd error = sysctl_handle_int(oidp, &qlimit, 0, req); 263b8bc95cdSAdrian Chadd if (error || !req->newptr) 264b8bc95cdSAdrian Chadd return (error); 265b8bc95cdSAdrian Chadd if (qlimit < 1) 266b8bc95cdSAdrian Chadd return (EINVAL); 267b8bc95cdSAdrian Chadd return (netisr_setqlimit(&ip_direct_nh, qlimit)); 268b8bc95cdSAdrian Chadd } 269b8bc95cdSAdrian Chadd SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_direct_queue_maxlen, 270b8bc95cdSAdrian Chadd CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet_intr_direct_queue_maxlen, "I", 271b8bc95cdSAdrian Chadd "Maximum size of the IP direct input queue"); 272b8bc95cdSAdrian Chadd 273b8bc95cdSAdrian Chadd static int 274b8bc95cdSAdrian Chadd sysctl_netinet_intr_direct_queue_drops(SYSCTL_HANDLER_ARGS) 275b8bc95cdSAdrian Chadd { 276b8bc95cdSAdrian Chadd u_int64_t qdrops_long; 277b8bc95cdSAdrian Chadd int error, qdrops; 278b8bc95cdSAdrian Chadd 279b8bc95cdSAdrian Chadd netisr_getqdrops(&ip_direct_nh, &qdrops_long); 280b8bc95cdSAdrian Chadd qdrops = qdrops_long; 281b8bc95cdSAdrian Chadd error = sysctl_handle_int(oidp, &qdrops, 0, req); 282b8bc95cdSAdrian Chadd if (error || !req->newptr) 283b8bc95cdSAdrian Chadd return (error); 284b8bc95cdSAdrian Chadd if (qdrops != 0) 285b8bc95cdSAdrian Chadd return (EINVAL); 286b8bc95cdSAdrian Chadd netisr_clearqdrops(&ip_direct_nh); 287b8bc95cdSAdrian Chadd return (0); 288b8bc95cdSAdrian Chadd } 289b8bc95cdSAdrian Chadd 290b8bc95cdSAdrian Chadd SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_direct_queue_drops, 291b8bc95cdSAdrian Chadd CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_netinet_intr_direct_queue_drops, "I", 292b8bc95cdSAdrian Chadd "Number of packets dropped from the IP direct input queue"); 293b8bc95cdSAdrian Chadd #endif /* RSS */ 294b8bc95cdSAdrian Chadd 295df8bae1dSRodney W. Grimes /* 296df8bae1dSRodney W. Grimes * IP initialization: fill in IP protocol switch table. 297df8bae1dSRodney W. Grimes * All protocols not implemented in kernel go to raw IP protocol handler. 298df8bae1dSRodney W. Grimes */ 299df8bae1dSRodney W. Grimes void 300f2565d68SRobert Watson ip_init(void) 301df8bae1dSRodney W. Grimes { 302f2565d68SRobert Watson struct protosw *pr; 303f2565d68SRobert Watson int i; 304df8bae1dSRodney W. Grimes 305603724d3SBjoern A. Zeeb TAILQ_INIT(&V_in_ifaddrhead); 306603724d3SBjoern A. Zeeb V_in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &V_in_ifaddrhmask); 3071ed81b73SMarko Zec 3081ed81b73SMarko Zec /* Initialize IP reassembly queue. */ 3091dbefcc0SGleb Smirnoff ipreass_init(); 3101ed81b73SMarko Zec 3110b4b0b0fSJulian Elischer /* Initialize packet filter hooks. */ 3120b4b0b0fSJulian Elischer V_inet_pfil_hook.ph_type = PFIL_TYPE_AF; 3130b4b0b0fSJulian Elischer V_inet_pfil_hook.ph_af = AF_INET; 3140b4b0b0fSJulian Elischer if ((i = pfil_head_register(&V_inet_pfil_hook)) != 0) 3150b4b0b0fSJulian Elischer printf("%s: WARNING: unable to register pfil hook, " 3160b4b0b0fSJulian Elischer "error %d\n", __func__, i); 3170b4b0b0fSJulian Elischer 3181ed81b73SMarko Zec /* Skip initialization of globals for non-default instances. */ 3191ed81b73SMarko Zec if (!IS_DEFAULT_VNET(curvnet)) 3201ed81b73SMarko Zec return; 3211ed81b73SMarko Zec 322f0ffb944SJulian Elischer pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 32302410549SRobert Watson if (pr == NULL) 324db09bef3SAndre Oppermann panic("ip_init: PF_INET not found"); 325db09bef3SAndre Oppermann 326db09bef3SAndre Oppermann /* Initialize the entire ip_protox[] array to IPPROTO_RAW. */ 327df8bae1dSRodney W. Grimes for (i = 0; i < IPPROTO_MAX; i++) 328df8bae1dSRodney W. Grimes ip_protox[i] = pr - inetsw; 329db09bef3SAndre Oppermann /* 330db09bef3SAndre Oppermann * Cycle through IP protocols and put them into the appropriate place 331db09bef3SAndre Oppermann * in ip_protox[]. 332db09bef3SAndre Oppermann */ 333f0ffb944SJulian Elischer for (pr = inetdomain.dom_protosw; 334f0ffb944SJulian Elischer pr < inetdomain.dom_protoswNPROTOSW; pr++) 335df8bae1dSRodney W. Grimes if (pr->pr_domain->dom_family == PF_INET && 336db09bef3SAndre Oppermann pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) { 337db09bef3SAndre Oppermann /* Be careful to only index valid IP protocols. */ 338db77984cSSam Leffler if (pr->pr_protocol < IPPROTO_MAX) 339df8bae1dSRodney W. Grimes ip_protox[pr->pr_protocol] = pr - inetsw; 340db09bef3SAndre Oppermann } 341194a213eSAndrey A. Chernov 342d4b5cae4SRobert Watson netisr_register(&ip_nh); 343b8bc95cdSAdrian Chadd #ifdef RSS 344b8bc95cdSAdrian Chadd netisr_register(&ip_direct_nh); 345b8bc95cdSAdrian Chadd #endif 346df8bae1dSRodney W. Grimes } 347df8bae1dSRodney W. Grimes 3489802380eSBjoern A. Zeeb #ifdef VIMAGE 3499802380eSBjoern A. Zeeb void 3509802380eSBjoern A. Zeeb ip_destroy(void) 3519802380eSBjoern A. Zeeb { 3524d3dfd45SMikolaj Golub int i; 3534d3dfd45SMikolaj Golub 3544d3dfd45SMikolaj Golub if ((i = pfil_head_unregister(&V_inet_pfil_hook)) != 0) 3554d3dfd45SMikolaj Golub printf("%s: WARNING: unable to unregister pfil hook, " 3564d3dfd45SMikolaj Golub "error %d\n", __func__, i); 3579802380eSBjoern A. Zeeb 3589802380eSBjoern A. Zeeb /* Cleanup in_ifaddr hash table; should be empty. */ 3599802380eSBjoern A. Zeeb hashdestroy(V_in_ifaddrhashtbl, M_IFADDR, V_in_ifaddrhmask); 3609802380eSBjoern A. Zeeb 361e3c2c634SGleb Smirnoff /* Destroy IP reassembly queue. */ 3621dbefcc0SGleb Smirnoff ipreass_destroy(); 3639802380eSBjoern A. Zeeb } 3649802380eSBjoern A. Zeeb #endif 3659802380eSBjoern A. Zeeb 366b8bc95cdSAdrian Chadd #ifdef RSS 367b8bc95cdSAdrian Chadd /* 368b8bc95cdSAdrian Chadd * IP direct input routine. 369b8bc95cdSAdrian Chadd * 370b8bc95cdSAdrian Chadd * This is called when reinjecting completed fragments where 371b8bc95cdSAdrian Chadd * all of the previous checking and book-keeping has been done. 372b8bc95cdSAdrian Chadd */ 373b8bc95cdSAdrian Chadd void 374b8bc95cdSAdrian Chadd ip_direct_input(struct mbuf *m) 375b8bc95cdSAdrian Chadd { 376b8bc95cdSAdrian Chadd struct ip *ip; 377b8bc95cdSAdrian Chadd int hlen; 378b8bc95cdSAdrian Chadd 379b8bc95cdSAdrian Chadd ip = mtod(m, struct ip *); 380b8bc95cdSAdrian Chadd hlen = ip->ip_hl << 2; 381b8bc95cdSAdrian Chadd 382b8bc95cdSAdrian Chadd IPSTAT_INC(ips_delivered); 383b8bc95cdSAdrian Chadd (*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p); 384b8bc95cdSAdrian Chadd return; 385b8bc95cdSAdrian Chadd } 386b8bc95cdSAdrian Chadd #endif 387b8bc95cdSAdrian Chadd 3884d2e3692SLuigi Rizzo /* 389df8bae1dSRodney W. Grimes * Ip input routine. Checksum and byte swap header. If fragmented 390df8bae1dSRodney W. Grimes * try to reassemble. Process options. Pass to next level. 391df8bae1dSRodney W. Grimes */ 392c67b1d17SGarrett Wollman void 393c67b1d17SGarrett Wollman ip_input(struct mbuf *m) 394df8bae1dSRodney W. Grimes { 3959188b4a1SAndre Oppermann struct ip *ip = NULL; 3965da9f8faSJosef Karthauser struct in_ifaddr *ia = NULL; 397ca925d9cSJonathan Lemon struct ifaddr *ifa; 3980aade26eSRobert Watson struct ifnet *ifp; 3999b932e9eSAndre Oppermann int checkif, hlen = 0; 40021d172a3SGleb Smirnoff uint16_t sum, ip_len; 40102c1c707SAndre Oppermann int dchg = 0; /* dest changed after fw */ 402f51f805fSSam Leffler struct in_addr odst; /* original dst address */ 403b715f178SLuigi Rizzo 404fe584538SDag-Erling Smørgrav M_ASSERTPKTHDR(m); 405db40007dSAndrew R. Reiter 406ac9d7e26SMax Laier if (m->m_flags & M_FASTFWD_OURS) { 40776ff6dcfSAndre Oppermann m->m_flags &= ~M_FASTFWD_OURS; 40876ff6dcfSAndre Oppermann /* Set up some basics that will be used later. */ 4092b25acc1SLuigi Rizzo ip = mtod(m, struct ip *); 41053be11f6SPoul-Henning Kamp hlen = ip->ip_hl << 2; 4118f134647SGleb Smirnoff ip_len = ntohs(ip->ip_len); 4129b932e9eSAndre Oppermann goto ours; 4132b25acc1SLuigi Rizzo } 4142b25acc1SLuigi Rizzo 41586425c62SRobert Watson IPSTAT_INC(ips_total); 41658938916SGarrett Wollman 41758938916SGarrett Wollman if (m->m_pkthdr.len < sizeof(struct ip)) 41858938916SGarrett Wollman goto tooshort; 41958938916SGarrett Wollman 420df8bae1dSRodney W. Grimes if (m->m_len < sizeof (struct ip) && 4210b17fba7SAndre Oppermann (m = m_pullup(m, sizeof (struct ip))) == NULL) { 42286425c62SRobert Watson IPSTAT_INC(ips_toosmall); 423c67b1d17SGarrett Wollman return; 424df8bae1dSRodney W. Grimes } 425df8bae1dSRodney W. Grimes ip = mtod(m, struct ip *); 42658938916SGarrett Wollman 42753be11f6SPoul-Henning Kamp if (ip->ip_v != IPVERSION) { 42886425c62SRobert Watson IPSTAT_INC(ips_badvers); 429df8bae1dSRodney W. Grimes goto bad; 430df8bae1dSRodney W. Grimes } 43158938916SGarrett Wollman 43253be11f6SPoul-Henning Kamp hlen = ip->ip_hl << 2; 433df8bae1dSRodney W. Grimes if (hlen < sizeof(struct ip)) { /* minimum header length */ 43486425c62SRobert Watson IPSTAT_INC(ips_badhlen); 435df8bae1dSRodney W. Grimes goto bad; 436df8bae1dSRodney W. Grimes } 437df8bae1dSRodney W. Grimes if (hlen > m->m_len) { 4380b17fba7SAndre Oppermann if ((m = m_pullup(m, hlen)) == NULL) { 43986425c62SRobert Watson IPSTAT_INC(ips_badhlen); 440c67b1d17SGarrett Wollman return; 441df8bae1dSRodney W. Grimes } 442df8bae1dSRodney W. Grimes ip = mtod(m, struct ip *); 443df8bae1dSRodney W. Grimes } 44433841545SHajimu UMEMOTO 44557f60867SMark Johnston IP_PROBE(receive, NULL, NULL, ip, m->m_pkthdr.rcvif, ip, NULL); 44657f60867SMark Johnston 44733841545SHajimu UMEMOTO /* 127/8 must not appear on wire - RFC1122 */ 4480aade26eSRobert Watson ifp = m->m_pkthdr.rcvif; 44933841545SHajimu UMEMOTO if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || 45033841545SHajimu UMEMOTO (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { 4510aade26eSRobert Watson if ((ifp->if_flags & IFF_LOOPBACK) == 0) { 45286425c62SRobert Watson IPSTAT_INC(ips_badaddr); 45333841545SHajimu UMEMOTO goto bad; 45433841545SHajimu UMEMOTO } 45533841545SHajimu UMEMOTO } 45633841545SHajimu UMEMOTO 457db4f9cc7SJonathan Lemon if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) { 458db4f9cc7SJonathan Lemon sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID); 459db4f9cc7SJonathan Lemon } else { 46058938916SGarrett Wollman if (hlen == sizeof(struct ip)) { 46147c861ecSBrian Somers sum = in_cksum_hdr(ip); 46258938916SGarrett Wollman } else { 46347c861ecSBrian Somers sum = in_cksum(m, hlen); 46458938916SGarrett Wollman } 465db4f9cc7SJonathan Lemon } 46647c861ecSBrian Somers if (sum) { 46786425c62SRobert Watson IPSTAT_INC(ips_badsum); 468df8bae1dSRodney W. Grimes goto bad; 469df8bae1dSRodney W. Grimes } 470df8bae1dSRodney W. Grimes 47102b199f1SMax Laier #ifdef ALTQ 47202b199f1SMax Laier if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0) 47302b199f1SMax Laier /* packet is dropped by traffic conditioner */ 47402b199f1SMax Laier return; 47502b199f1SMax Laier #endif 47602b199f1SMax Laier 47721d172a3SGleb Smirnoff ip_len = ntohs(ip->ip_len); 47821d172a3SGleb Smirnoff if (ip_len < hlen) { 47986425c62SRobert Watson IPSTAT_INC(ips_badlen); 480df8bae1dSRodney W. Grimes goto bad; 481df8bae1dSRodney W. Grimes } 482df8bae1dSRodney W. Grimes 483df8bae1dSRodney W. Grimes /* 484df8bae1dSRodney W. Grimes * Check that the amount of data in the buffers 485df8bae1dSRodney W. Grimes * is as at least much as the IP header would have us expect. 486df8bae1dSRodney W. Grimes * Trim mbufs if longer than we expect. 487df8bae1dSRodney W. Grimes * Drop packet if shorter than we expect. 488df8bae1dSRodney W. Grimes */ 48921d172a3SGleb Smirnoff if (m->m_pkthdr.len < ip_len) { 49058938916SGarrett Wollman tooshort: 49186425c62SRobert Watson IPSTAT_INC(ips_tooshort); 492df8bae1dSRodney W. Grimes goto bad; 493df8bae1dSRodney W. Grimes } 49421d172a3SGleb Smirnoff if (m->m_pkthdr.len > ip_len) { 495df8bae1dSRodney W. Grimes if (m->m_len == m->m_pkthdr.len) { 49621d172a3SGleb Smirnoff m->m_len = ip_len; 49721d172a3SGleb Smirnoff m->m_pkthdr.len = ip_len; 498df8bae1dSRodney W. Grimes } else 49921d172a3SGleb Smirnoff m_adj(m, ip_len - m->m_pkthdr.len); 500df8bae1dSRodney W. Grimes } 501b8bc95cdSAdrian Chadd 502b2630c29SGeorge V. Neville-Neil #ifdef IPSEC 50314dd6717SSam Leffler /* 504ffe8cd7bSBjoern A. Zeeb * Bypass packet filtering for packets previously handled by IPsec. 50514dd6717SSam Leffler */ 506cc977adcSBjoern A. Zeeb if (ip_ipsec_filtertunnel(m)) 507c21fd232SAndre Oppermann goto passin; 508b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */ 5093f67c834SDon Lewis 510c4ac87eaSDarren Reed /* 511134ea224SSam Leffler * Run through list of hooks for input packets. 512f51f805fSSam Leffler * 513f51f805fSSam Leffler * NB: Beware of the destination address changing (e.g. 514f51f805fSSam Leffler * by NAT rewriting). When this happens, tell 515f51f805fSSam Leffler * ip_forward to do the right thing. 516c4ac87eaSDarren Reed */ 517c21fd232SAndre Oppermann 518c21fd232SAndre Oppermann /* Jump over all PFIL processing if hooks are not active. */ 5190b4b0b0fSJulian Elischer if (!PFIL_HOOKED(&V_inet_pfil_hook)) 520c21fd232SAndre Oppermann goto passin; 521c21fd232SAndre Oppermann 522f51f805fSSam Leffler odst = ip->ip_dst; 5230b4b0b0fSJulian Elischer if (pfil_run_hooks(&V_inet_pfil_hook, &m, ifp, PFIL_IN, NULL) != 0) 524beec8214SDarren Reed return; 525134ea224SSam Leffler if (m == NULL) /* consumed by filter */ 526c4ac87eaSDarren Reed return; 5279b932e9eSAndre Oppermann 528c4ac87eaSDarren Reed ip = mtod(m, struct ip *); 52902c1c707SAndre Oppermann dchg = (odst.s_addr != ip->ip_dst.s_addr); 5300aade26eSRobert Watson ifp = m->m_pkthdr.rcvif; 5319b932e9eSAndre Oppermann 5329b932e9eSAndre Oppermann if (m->m_flags & M_FASTFWD_OURS) { 5339b932e9eSAndre Oppermann m->m_flags &= ~M_FASTFWD_OURS; 5349b932e9eSAndre Oppermann goto ours; 5359b932e9eSAndre Oppermann } 536ffdbf9daSAndrey V. Elsukov if (m->m_flags & M_IP_NEXTHOP) { 537ffdbf9daSAndrey V. Elsukov dchg = (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL); 538ffdbf9daSAndrey V. Elsukov if (dchg != 0) { 539099dd043SAndre Oppermann /* 540ffdbf9daSAndrey V. Elsukov * Directly ship the packet on. This allows 541ffdbf9daSAndrey V. Elsukov * forwarding packets originally destined to us 542ffdbf9daSAndrey V. Elsukov * to some other directly connected host. 543099dd043SAndre Oppermann */ 544ffdbf9daSAndrey V. Elsukov ip_forward(m, 1); 545099dd043SAndre Oppermann return; 546099dd043SAndre Oppermann } 547ffdbf9daSAndrey V. Elsukov } 548c21fd232SAndre Oppermann passin: 54921d172a3SGleb Smirnoff 55021d172a3SGleb Smirnoff /* 551df8bae1dSRodney W. Grimes * Process options and, if not destined for us, 552df8bae1dSRodney W. Grimes * ship it on. ip_dooptions returns 1 when an 553df8bae1dSRodney W. Grimes * error was detected (causing an icmp message 554df8bae1dSRodney W. Grimes * to be sent and the original packet to be freed). 555df8bae1dSRodney W. Grimes */ 5569b932e9eSAndre Oppermann if (hlen > sizeof (struct ip) && ip_dooptions(m, 0)) 557c67b1d17SGarrett Wollman return; 558df8bae1dSRodney W. Grimes 559f0068c4aSGarrett Wollman /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no 560f0068c4aSGarrett Wollman * matter if it is destined to another node, or whether it is 561f0068c4aSGarrett Wollman * a multicast one, RSVP wants it! and prevents it from being forwarded 562f0068c4aSGarrett Wollman * anywhere else. Also checks if the rsvp daemon is running before 563f0068c4aSGarrett Wollman * grabbing the packet. 564f0068c4aSGarrett Wollman */ 565603724d3SBjoern A. Zeeb if (V_rsvp_on && ip->ip_p==IPPROTO_RSVP) 566f0068c4aSGarrett Wollman goto ours; 567f0068c4aSGarrett Wollman 568df8bae1dSRodney W. Grimes /* 569df8bae1dSRodney W. Grimes * Check our list of addresses, to see if the packet is for us. 570cc766e04SGarrett Wollman * If we don't have any addresses, assume any unicast packet 571cc766e04SGarrett Wollman * we receive might be for us (and let the upper layers deal 572cc766e04SGarrett Wollman * with it). 573df8bae1dSRodney W. Grimes */ 574603724d3SBjoern A. Zeeb if (TAILQ_EMPTY(&V_in_ifaddrhead) && 575cc766e04SGarrett Wollman (m->m_flags & (M_MCAST|M_BCAST)) == 0) 576cc766e04SGarrett Wollman goto ours; 577cc766e04SGarrett Wollman 5787538a9a0SJonathan Lemon /* 579823db0e9SDon Lewis * Enable a consistency check between the destination address 580823db0e9SDon Lewis * and the arrival interface for a unicast packet (the RFC 1122 581823db0e9SDon Lewis * strong ES model) if IP forwarding is disabled and the packet 582e15ae1b2SDon Lewis * is not locally generated and the packet is not subject to 583e15ae1b2SDon Lewis * 'ipfw fwd'. 5843f67c834SDon Lewis * 5853f67c834SDon Lewis * XXX - Checking also should be disabled if the destination 5863f67c834SDon Lewis * address is ipnat'ed to a different interface. 5873f67c834SDon Lewis * 588a8f12100SDon Lewis * XXX - Checking is incompatible with IP aliases added 5893f67c834SDon Lewis * to the loopback interface instead of the interface where 5903f67c834SDon Lewis * the packets are received. 591a9771948SGleb Smirnoff * 592a9771948SGleb Smirnoff * XXX - This is the case for carp vhost IPs as well so we 593a9771948SGleb Smirnoff * insert a workaround. If the packet got here, we already 594a9771948SGleb Smirnoff * checked with carp_iamatch() and carp_forus(). 595823db0e9SDon Lewis */ 596603724d3SBjoern A. Zeeb checkif = V_ip_checkinterface && (V_ipforwarding == 0) && 5970aade26eSRobert Watson ifp != NULL && ((ifp->if_flags & IFF_LOOPBACK) == 0) && 59854bfbd51SWill Andrews ifp->if_carp == NULL && (dchg == 0); 599823db0e9SDon Lewis 600ca925d9cSJonathan Lemon /* 601ca925d9cSJonathan Lemon * Check for exact addresses in the hash bucket. 602ca925d9cSJonathan Lemon */ 6032d9cfabaSRobert Watson /* IN_IFADDR_RLOCK(); */ 6049b932e9eSAndre Oppermann LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) { 605f9e354dfSJulian Elischer /* 606823db0e9SDon Lewis * If the address matches, verify that the packet 607823db0e9SDon Lewis * arrived via the correct interface if checking is 608823db0e9SDon Lewis * enabled. 609f9e354dfSJulian Elischer */ 6109b932e9eSAndre Oppermann if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr && 6118c0fec80SRobert Watson (!checkif || ia->ia_ifp == ifp)) { 6127caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ipackets, 1); 6137caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ibytes, 6147caf4ab7SGleb Smirnoff m->m_pkthdr.len); 6152d9cfabaSRobert Watson /* IN_IFADDR_RUNLOCK(); */ 616ed1ff184SJulian Elischer goto ours; 617ca925d9cSJonathan Lemon } 6188c0fec80SRobert Watson } 6192d9cfabaSRobert Watson /* IN_IFADDR_RUNLOCK(); */ 6202d9cfabaSRobert Watson 621823db0e9SDon Lewis /* 622ca925d9cSJonathan Lemon * Check for broadcast addresses. 623ca925d9cSJonathan Lemon * 624ca925d9cSJonathan Lemon * Only accept broadcast packets that arrive via the matching 625ca925d9cSJonathan Lemon * interface. Reception of forwarded directed broadcasts would 626ca925d9cSJonathan Lemon * be handled via ip_forward() and ether_output() with the loopback 627ca925d9cSJonathan Lemon * into the stack for SIMPLEX interfaces handled by ether_output(). 628823db0e9SDon Lewis */ 6290aade26eSRobert Watson if (ifp != NULL && ifp->if_flags & IFF_BROADCAST) { 630137f91e8SJohn Baldwin IF_ADDR_RLOCK(ifp); 6310aade26eSRobert Watson TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 632ca925d9cSJonathan Lemon if (ifa->ifa_addr->sa_family != AF_INET) 633ca925d9cSJonathan Lemon continue; 634ca925d9cSJonathan Lemon ia = ifatoia(ifa); 635df8bae1dSRodney W. Grimes if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr == 6360aade26eSRobert Watson ip->ip_dst.s_addr) { 6377caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ipackets, 1); 6387caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ibytes, 6397caf4ab7SGleb Smirnoff m->m_pkthdr.len); 640137f91e8SJohn Baldwin IF_ADDR_RUNLOCK(ifp); 641df8bae1dSRodney W. Grimes goto ours; 6420aade26eSRobert Watson } 6430ac40133SBrian Somers #ifdef BOOTP_COMPAT 6440aade26eSRobert Watson if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY) { 6457caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ipackets, 1); 6467caf4ab7SGleb Smirnoff counter_u64_add(ia->ia_ifa.ifa_ibytes, 6477caf4ab7SGleb Smirnoff m->m_pkthdr.len); 648137f91e8SJohn Baldwin IF_ADDR_RUNLOCK(ifp); 649ca925d9cSJonathan Lemon goto ours; 6500aade26eSRobert Watson } 6510ac40133SBrian Somers #endif 652df8bae1dSRodney W. Grimes } 653137f91e8SJohn Baldwin IF_ADDR_RUNLOCK(ifp); 65419e5b0a7SRobert Watson ia = NULL; 655df8bae1dSRodney W. Grimes } 656f8429ca2SBruce M Simpson /* RFC 3927 2.7: Do not forward datagrams for 169.254.0.0/16. */ 657f8429ca2SBruce M Simpson if (IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))) { 65886425c62SRobert Watson IPSTAT_INC(ips_cantforward); 659f8429ca2SBruce M Simpson m_freem(m); 660f8429ca2SBruce M Simpson return; 661f8429ca2SBruce M Simpson } 662df8bae1dSRodney W. Grimes if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { 663603724d3SBjoern A. Zeeb if (V_ip_mrouter) { 664df8bae1dSRodney W. Grimes /* 665df8bae1dSRodney W. Grimes * If we are acting as a multicast router, all 666df8bae1dSRodney W. Grimes * incoming multicast packets are passed to the 667df8bae1dSRodney W. Grimes * kernel-level multicast forwarding function. 668df8bae1dSRodney W. Grimes * The packet is returned (relatively) intact; if 669df8bae1dSRodney W. Grimes * ip_mforward() returns a non-zero value, the packet 670df8bae1dSRodney W. Grimes * must be discarded, else it may be accepted below. 671df8bae1dSRodney W. Grimes */ 6720aade26eSRobert Watson if (ip_mforward && ip_mforward(ip, ifp, m, 0) != 0) { 67386425c62SRobert Watson IPSTAT_INC(ips_cantforward); 674df8bae1dSRodney W. Grimes m_freem(m); 675c67b1d17SGarrett Wollman return; 676df8bae1dSRodney W. Grimes } 677df8bae1dSRodney W. Grimes 678df8bae1dSRodney W. Grimes /* 67911612afaSDima Dorfman * The process-level routing daemon needs to receive 680df8bae1dSRodney W. Grimes * all multicast IGMP packets, whether or not this 681df8bae1dSRodney W. Grimes * host belongs to their destination groups. 682df8bae1dSRodney W. Grimes */ 683df8bae1dSRodney W. Grimes if (ip->ip_p == IPPROTO_IGMP) 684df8bae1dSRodney W. Grimes goto ours; 68586425c62SRobert Watson IPSTAT_INC(ips_forward); 686df8bae1dSRodney W. Grimes } 687df8bae1dSRodney W. Grimes /* 688d10910e6SBruce M Simpson * Assume the packet is for us, to avoid prematurely taking 689d10910e6SBruce M Simpson * a lock on the in_multi hash. Protocols must perform 690d10910e6SBruce M Simpson * their own filtering and update statistics accordingly. 691df8bae1dSRodney W. Grimes */ 692df8bae1dSRodney W. Grimes goto ours; 693df8bae1dSRodney W. Grimes } 694df8bae1dSRodney W. Grimes if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST) 695df8bae1dSRodney W. Grimes goto ours; 696df8bae1dSRodney W. Grimes if (ip->ip_dst.s_addr == INADDR_ANY) 697df8bae1dSRodney W. Grimes goto ours; 698df8bae1dSRodney W. Grimes 6996a800098SYoshinobu Inoue /* 700df8bae1dSRodney W. Grimes * Not for us; forward if possible and desirable. 701df8bae1dSRodney W. Grimes */ 702603724d3SBjoern A. Zeeb if (V_ipforwarding == 0) { 70386425c62SRobert Watson IPSTAT_INC(ips_cantforward); 704df8bae1dSRodney W. Grimes m_freem(m); 705546f251bSChris D. Faulhaber } else { 7069b932e9eSAndre Oppermann ip_forward(m, dchg); 707546f251bSChris D. Faulhaber } 708c67b1d17SGarrett Wollman return; 709df8bae1dSRodney W. Grimes 710df8bae1dSRodney W. Grimes ours: 711d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH 712d0ebc0d2SYaroslav Tykhiy /* 713d0ebc0d2SYaroslav Tykhiy * IPSTEALTH: Process non-routing options only 714d0ebc0d2SYaroslav Tykhiy * if the packet is destined for us. 715d0ebc0d2SYaroslav Tykhiy */ 7167caf4ab7SGleb Smirnoff if (V_ipstealth && hlen > sizeof (struct ip) && ip_dooptions(m, 1)) 717d0ebc0d2SYaroslav Tykhiy return; 718d0ebc0d2SYaroslav Tykhiy #endif /* IPSTEALTH */ 719d0ebc0d2SYaroslav Tykhiy 72063f8d699SJordan K. Hubbard /* 721b6ea1aa5SRuslan Ermilov * Attempt reassembly; if it succeeds, proceed. 722ac9d7e26SMax Laier * ip_reass() will return a different mbuf. 723df8bae1dSRodney W. Grimes */ 7248f134647SGleb Smirnoff if (ip->ip_off & htons(IP_MF | IP_OFFMASK)) { 725aa69c612SGleb Smirnoff /* XXXGL: shouldn't we save & set m_flags? */ 726f0cada84SAndre Oppermann m = ip_reass(m); 727f0cada84SAndre Oppermann if (m == NULL) 728c67b1d17SGarrett Wollman return; 7296a800098SYoshinobu Inoue ip = mtod(m, struct ip *); 7307e2df452SRuslan Ermilov /* Get the header length of the reassembled packet */ 73153be11f6SPoul-Henning Kamp hlen = ip->ip_hl << 2; 732f0cada84SAndre Oppermann } 733f0cada84SAndre Oppermann 734b2630c29SGeorge V. Neville-Neil #ifdef IPSEC 73533841545SHajimu UMEMOTO /* 73633841545SHajimu UMEMOTO * enforce IPsec policy checking if we are seeing last header. 73733841545SHajimu UMEMOTO * note that we do not visit this with protocols with pcb layer 73833841545SHajimu UMEMOTO * code - like udp/tcp/raw ip. 73933841545SHajimu UMEMOTO */ 740e58320f1SAndrey V. Elsukov if (ip_ipsec_input(m, ip->ip_p) != 0) 74133841545SHajimu UMEMOTO goto bad; 742b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */ 74333841545SHajimu UMEMOTO 744df8bae1dSRodney W. Grimes /* 745df8bae1dSRodney W. Grimes * Switch out to protocol's input routine. 746df8bae1dSRodney W. Grimes */ 74786425c62SRobert Watson IPSTAT_INC(ips_delivered); 7489b932e9eSAndre Oppermann 7498f5a8818SKevin Lo (*inetsw[ip_protox[ip->ip_p]].pr_input)(&m, &hlen, ip->ip_p); 750c67b1d17SGarrett Wollman return; 751df8bae1dSRodney W. Grimes bad: 752df8bae1dSRodney W. Grimes m_freem(m); 753c67b1d17SGarrett Wollman } 754c67b1d17SGarrett Wollman 755c67b1d17SGarrett Wollman /* 756df8bae1dSRodney W. Grimes * IP timer processing; 757df8bae1dSRodney W. Grimes * if a timer expires on a reassembly 758df8bae1dSRodney W. Grimes * queue, discard it. 759df8bae1dSRodney W. Grimes */ 760df8bae1dSRodney W. Grimes void 761f2565d68SRobert Watson ip_slowtimo(void) 762df8bae1dSRodney W. Grimes { 7638b615593SMarko Zec VNET_ITERATOR_DECL(vnet_iter); 764df8bae1dSRodney W. Grimes 7655ee847d3SRobert Watson VNET_LIST_RLOCK_NOSLEEP(); 7668b615593SMarko Zec VNET_FOREACH(vnet_iter) { 7678b615593SMarko Zec CURVNET_SET(vnet_iter); 7681dbefcc0SGleb Smirnoff ipreass_slowtimo(); 7698b615593SMarko Zec CURVNET_RESTORE(); 7708b615593SMarko Zec } 7715ee847d3SRobert Watson VNET_LIST_RUNLOCK_NOSLEEP(); 772df8bae1dSRodney W. Grimes } 773df8bae1dSRodney W. Grimes 7749802380eSBjoern A. Zeeb void 7759802380eSBjoern A. Zeeb ip_drain(void) 7769802380eSBjoern A. Zeeb { 7779802380eSBjoern A. Zeeb VNET_ITERATOR_DECL(vnet_iter); 7789802380eSBjoern A. Zeeb 7799802380eSBjoern A. Zeeb VNET_LIST_RLOCK_NOSLEEP(); 7809802380eSBjoern A. Zeeb VNET_FOREACH(vnet_iter) { 7819802380eSBjoern A. Zeeb CURVNET_SET(vnet_iter); 7821dbefcc0SGleb Smirnoff ipreass_drain(); 7838b615593SMarko Zec CURVNET_RESTORE(); 7848b615593SMarko Zec } 7855ee847d3SRobert Watson VNET_LIST_RUNLOCK_NOSLEEP(); 786df8bae1dSRodney W. Grimes } 787df8bae1dSRodney W. Grimes 788df8bae1dSRodney W. Grimes /* 789de38924dSAndre Oppermann * The protocol to be inserted into ip_protox[] must be already registered 790de38924dSAndre Oppermann * in inetsw[], either statically or through pf_proto_register(). 791de38924dSAndre Oppermann */ 792de38924dSAndre Oppermann int 7931b48d245SBjoern A. Zeeb ipproto_register(short ipproto) 794de38924dSAndre Oppermann { 795de38924dSAndre Oppermann struct protosw *pr; 796de38924dSAndre Oppermann 797de38924dSAndre Oppermann /* Sanity checks. */ 7981b48d245SBjoern A. Zeeb if (ipproto <= 0 || ipproto >= IPPROTO_MAX) 799de38924dSAndre Oppermann return (EPROTONOSUPPORT); 800de38924dSAndre Oppermann 801de38924dSAndre Oppermann /* 802de38924dSAndre Oppermann * The protocol slot must not be occupied by another protocol 803de38924dSAndre Oppermann * already. An index pointing to IPPROTO_RAW is unused. 804de38924dSAndre Oppermann */ 805de38924dSAndre Oppermann pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 806de38924dSAndre Oppermann if (pr == NULL) 807de38924dSAndre Oppermann return (EPFNOSUPPORT); 808de38924dSAndre Oppermann if (ip_protox[ipproto] != pr - inetsw) /* IPPROTO_RAW */ 809de38924dSAndre Oppermann return (EEXIST); 810de38924dSAndre Oppermann 811de38924dSAndre Oppermann /* Find the protocol position in inetsw[] and set the index. */ 812de38924dSAndre Oppermann for (pr = inetdomain.dom_protosw; 813de38924dSAndre Oppermann pr < inetdomain.dom_protoswNPROTOSW; pr++) { 814de38924dSAndre Oppermann if (pr->pr_domain->dom_family == PF_INET && 815de38924dSAndre Oppermann pr->pr_protocol && pr->pr_protocol == ipproto) { 816de38924dSAndre Oppermann ip_protox[pr->pr_protocol] = pr - inetsw; 817de38924dSAndre Oppermann return (0); 818de38924dSAndre Oppermann } 819de38924dSAndre Oppermann } 820de38924dSAndre Oppermann return (EPROTONOSUPPORT); 821de38924dSAndre Oppermann } 822de38924dSAndre Oppermann 823de38924dSAndre Oppermann int 8241b48d245SBjoern A. Zeeb ipproto_unregister(short ipproto) 825de38924dSAndre Oppermann { 826de38924dSAndre Oppermann struct protosw *pr; 827de38924dSAndre Oppermann 828de38924dSAndre Oppermann /* Sanity checks. */ 8291b48d245SBjoern A. Zeeb if (ipproto <= 0 || ipproto >= IPPROTO_MAX) 830de38924dSAndre Oppermann return (EPROTONOSUPPORT); 831de38924dSAndre Oppermann 832de38924dSAndre Oppermann /* Check if the protocol was indeed registered. */ 833de38924dSAndre Oppermann pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 834de38924dSAndre Oppermann if (pr == NULL) 835de38924dSAndre Oppermann return (EPFNOSUPPORT); 836de38924dSAndre Oppermann if (ip_protox[ipproto] == pr - inetsw) /* IPPROTO_RAW */ 837de38924dSAndre Oppermann return (ENOENT); 838de38924dSAndre Oppermann 839de38924dSAndre Oppermann /* Reset the protocol slot to IPPROTO_RAW. */ 840de38924dSAndre Oppermann ip_protox[ipproto] = pr - inetsw; 841de38924dSAndre Oppermann return (0); 842de38924dSAndre Oppermann } 843de38924dSAndre Oppermann 844df8bae1dSRodney W. Grimes /* 8458c0fec80SRobert Watson * Given address of next destination (final or next hop), return (referenced) 8468c0fec80SRobert Watson * internet address info of interface to be used to get there. 847df8bae1dSRodney W. Grimes */ 848bd714208SRuslan Ermilov struct in_ifaddr * 8498b07e49aSJulian Elischer ip_rtaddr(struct in_addr dst, u_int fibnum) 850df8bae1dSRodney W. Grimes { 85197d8d152SAndre Oppermann struct route sro; 85202c1c707SAndre Oppermann struct sockaddr_in *sin; 85319e5b0a7SRobert Watson struct in_ifaddr *ia; 854df8bae1dSRodney W. Grimes 8550cfbbe3bSAndre Oppermann bzero(&sro, sizeof(sro)); 85697d8d152SAndre Oppermann sin = (struct sockaddr_in *)&sro.ro_dst; 857df8bae1dSRodney W. Grimes sin->sin_family = AF_INET; 858df8bae1dSRodney W. Grimes sin->sin_len = sizeof(*sin); 859df8bae1dSRodney W. Grimes sin->sin_addr = dst; 8606e6b3f7cSQing Li in_rtalloc_ign(&sro, 0, fibnum); 861df8bae1dSRodney W. Grimes 86297d8d152SAndre Oppermann if (sro.ro_rt == NULL) 86302410549SRobert Watson return (NULL); 86402c1c707SAndre Oppermann 86519e5b0a7SRobert Watson ia = ifatoia(sro.ro_rt->rt_ifa); 86619e5b0a7SRobert Watson ifa_ref(&ia->ia_ifa); 86797d8d152SAndre Oppermann RTFREE(sro.ro_rt); 86819e5b0a7SRobert Watson return (ia); 869df8bae1dSRodney W. Grimes } 870df8bae1dSRodney W. Grimes 871df8bae1dSRodney W. Grimes u_char inetctlerrmap[PRC_NCMDS] = { 872df8bae1dSRodney W. Grimes 0, 0, 0, 0, 873df8bae1dSRodney W. Grimes 0, EMSGSIZE, EHOSTDOWN, EHOSTUNREACH, 874df8bae1dSRodney W. Grimes EHOSTUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED, 875df8bae1dSRodney W. Grimes EMSGSIZE, EHOSTUNREACH, 0, 0, 876fcaf9f91SMike Silbersack 0, 0, EHOSTUNREACH, 0, 8773b8123b7SJesper Skriver ENOPROTOOPT, ECONNREFUSED 878df8bae1dSRodney W. Grimes }; 879df8bae1dSRodney W. Grimes 880df8bae1dSRodney W. Grimes /* 881df8bae1dSRodney W. Grimes * Forward a packet. If some error occurs return the sender 882df8bae1dSRodney W. Grimes * an icmp packet. Note we can't always generate a meaningful 883df8bae1dSRodney W. Grimes * icmp message because icmp doesn't have a large enough repertoire 884df8bae1dSRodney W. Grimes * of codes and types. 885df8bae1dSRodney W. Grimes * 886df8bae1dSRodney W. Grimes * If not forwarding, just drop the packet. This could be confusing 887df8bae1dSRodney W. Grimes * if ipforwarding was zero but some routing protocol was advancing 888df8bae1dSRodney W. Grimes * us as a gateway to somewhere. However, we must let the routing 889df8bae1dSRodney W. Grimes * protocol deal with that. 890df8bae1dSRodney W. Grimes * 891df8bae1dSRodney W. Grimes * The srcrt parameter indicates whether the packet is being forwarded 892df8bae1dSRodney W. Grimes * via a source route. 893df8bae1dSRodney W. Grimes */ 8949b932e9eSAndre Oppermann void 8959b932e9eSAndre Oppermann ip_forward(struct mbuf *m, int srcrt) 896df8bae1dSRodney W. Grimes { 8972b25acc1SLuigi Rizzo struct ip *ip = mtod(m, struct ip *); 898efbad259SEdward Tomasz Napierala struct in_ifaddr *ia; 899df8bae1dSRodney W. Grimes struct mbuf *mcopy; 900d14122b0SErmal Luçi struct sockaddr_in *sin; 9019b932e9eSAndre Oppermann struct in_addr dest; 902b835b6feSBjoern A. Zeeb struct route ro; 903c773494eSAndre Oppermann int error, type = 0, code = 0, mtu = 0; 9043efc3014SJulian Elischer 9059b932e9eSAndre Oppermann if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) { 90686425c62SRobert Watson IPSTAT_INC(ips_cantforward); 907df8bae1dSRodney W. Grimes m_freem(m); 908df8bae1dSRodney W. Grimes return; 909df8bae1dSRodney W. Grimes } 9108922ddbeSAndrey V. Elsukov #ifdef IPSEC 9118922ddbeSAndrey V. Elsukov if (ip_ipsec_fwd(m) != 0) { 9128922ddbeSAndrey V. Elsukov IPSTAT_INC(ips_cantforward); 9138922ddbeSAndrey V. Elsukov m_freem(m); 9148922ddbeSAndrey V. Elsukov return; 9158922ddbeSAndrey V. Elsukov } 9168922ddbeSAndrey V. Elsukov #endif /* IPSEC */ 9171b968362SDag-Erling Smørgrav #ifdef IPSTEALTH 918603724d3SBjoern A. Zeeb if (!V_ipstealth) { 9191b968362SDag-Erling Smørgrav #endif 920df8bae1dSRodney W. Grimes if (ip->ip_ttl <= IPTTLDEC) { 9211b968362SDag-Erling Smørgrav icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 92202c1c707SAndre Oppermann 0, 0); 923df8bae1dSRodney W. Grimes return; 924df8bae1dSRodney W. Grimes } 9251b968362SDag-Erling Smørgrav #ifdef IPSTEALTH 9261b968362SDag-Erling Smørgrav } 9271b968362SDag-Erling Smørgrav #endif 928df8bae1dSRodney W. Grimes 929d14122b0SErmal Luçi bzero(&ro, sizeof(ro)); 930d14122b0SErmal Luçi sin = (struct sockaddr_in *)&ro.ro_dst; 931d14122b0SErmal Luçi sin->sin_family = AF_INET; 932d14122b0SErmal Luçi sin->sin_len = sizeof(*sin); 933d14122b0SErmal Luçi sin->sin_addr = ip->ip_dst; 934d14122b0SErmal Luçi #ifdef RADIX_MPATH 935d14122b0SErmal Luçi rtalloc_mpath_fib(&ro, 936d14122b0SErmal Luçi ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr), 937d14122b0SErmal Luçi M_GETFIB(m)); 938d14122b0SErmal Luçi #else 939d14122b0SErmal Luçi in_rtalloc_ign(&ro, 0, M_GETFIB(m)); 940d14122b0SErmal Luçi #endif 941d14122b0SErmal Luçi if (ro.ro_rt != NULL) { 942d14122b0SErmal Luçi ia = ifatoia(ro.ro_rt->rt_ifa); 943d14122b0SErmal Luçi ifa_ref(&ia->ia_ifa); 944*56844a62SErmal Luçi } else 945*56844a62SErmal Luçi ia = NULL; 946efbad259SEdward Tomasz Napierala #ifndef IPSEC 947efbad259SEdward Tomasz Napierala /* 948efbad259SEdward Tomasz Napierala * 'ia' may be NULL if there is no route for this destination. 949efbad259SEdward Tomasz Napierala * In case of IPsec, Don't discard it just yet, but pass it to 950efbad259SEdward Tomasz Napierala * ip_output in case of outgoing IPsec policy. 951efbad259SEdward Tomasz Napierala */ 952d23d475fSGuido van Rooij if (!srcrt && ia == NULL) { 95302c1c707SAndre Oppermann icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0); 954d14122b0SErmal Luçi RO_RTFREE(&ro); 955df8bae1dSRodney W. Grimes return; 95602c1c707SAndre Oppermann } 957efbad259SEdward Tomasz Napierala #endif 958df8bae1dSRodney W. Grimes 959df8bae1dSRodney W. Grimes /* 960bfef7ed4SIan Dowse * Save the IP header and at most 8 bytes of the payload, 961bfef7ed4SIan Dowse * in case we need to generate an ICMP message to the src. 962bfef7ed4SIan Dowse * 9634d2e3692SLuigi Rizzo * XXX this can be optimized a lot by saving the data in a local 9644d2e3692SLuigi Rizzo * buffer on the stack (72 bytes at most), and only allocating the 9654d2e3692SLuigi Rizzo * mbuf if really necessary. The vast majority of the packets 9664d2e3692SLuigi Rizzo * are forwarded without having to send an ICMP back (either 9674d2e3692SLuigi Rizzo * because unnecessary, or because rate limited), so we are 9684d2e3692SLuigi Rizzo * really we are wasting a lot of work here. 9694d2e3692SLuigi Rizzo * 970bfef7ed4SIan Dowse * We don't use m_copy() because it might return a reference 971bfef7ed4SIan Dowse * to a shared cluster. Both this function and ip_output() 972bfef7ed4SIan Dowse * assume exclusive access to the IP header in `m', so any 973bfef7ed4SIan Dowse * data in a cluster may change before we reach icmp_error(). 974df8bae1dSRodney W. Grimes */ 975dc4ad05eSGleb Smirnoff mcopy = m_gethdr(M_NOWAIT, m->m_type); 976eb1b1807SGleb Smirnoff if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_NOWAIT)) { 9779967cafcSSam Leffler /* 9789967cafcSSam Leffler * It's probably ok if the pkthdr dup fails (because 9799967cafcSSam Leffler * the deep copy of the tag chain failed), but for now 9809967cafcSSam Leffler * be conservative and just discard the copy since 9819967cafcSSam Leffler * code below may some day want the tags. 9829967cafcSSam Leffler */ 9839967cafcSSam Leffler m_free(mcopy); 9849967cafcSSam Leffler mcopy = NULL; 9859967cafcSSam Leffler } 986bfef7ed4SIan Dowse if (mcopy != NULL) { 9878f134647SGleb Smirnoff mcopy->m_len = min(ntohs(ip->ip_len), M_TRAILINGSPACE(mcopy)); 988e6b0a570SBruce M Simpson mcopy->m_pkthdr.len = mcopy->m_len; 989bfef7ed4SIan Dowse m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t)); 990bfef7ed4SIan Dowse } 99104287599SRuslan Ermilov 99204287599SRuslan Ermilov #ifdef IPSTEALTH 993603724d3SBjoern A. Zeeb if (!V_ipstealth) { 99404287599SRuslan Ermilov #endif 99504287599SRuslan Ermilov ip->ip_ttl -= IPTTLDEC; 99604287599SRuslan Ermilov #ifdef IPSTEALTH 99704287599SRuslan Ermilov } 99804287599SRuslan Ermilov #endif 999df8bae1dSRodney W. Grimes 1000df8bae1dSRodney W. Grimes /* 1001df8bae1dSRodney W. Grimes * If forwarding packet using same interface that it came in on, 1002df8bae1dSRodney W. Grimes * perhaps should send a redirect to sender to shortcut a hop. 1003df8bae1dSRodney W. Grimes * Only send redirect if source is sending directly to us, 1004df8bae1dSRodney W. Grimes * and if packet was not source routed (or has any options). 1005df8bae1dSRodney W. Grimes * Also, don't send redirect if forwarding using a default route 1006df8bae1dSRodney W. Grimes * or a route modified by a redirect. 1007df8bae1dSRodney W. Grimes */ 10089b932e9eSAndre Oppermann dest.s_addr = 0; 1009efbad259SEdward Tomasz Napierala if (!srcrt && V_ipsendredirects && 1010efbad259SEdward Tomasz Napierala ia != NULL && ia->ia_ifp == m->m_pkthdr.rcvif) { 101102c1c707SAndre Oppermann struct rtentry *rt; 101202c1c707SAndre Oppermann 101302c1c707SAndre Oppermann rt = ro.ro_rt; 101402c1c707SAndre Oppermann 101502c1c707SAndre Oppermann if (rt && (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 && 10169b932e9eSAndre Oppermann satosin(rt_key(rt))->sin_addr.s_addr != 0) { 1017df8bae1dSRodney W. Grimes #define RTA(rt) ((struct in_ifaddr *)(rt->rt_ifa)) 1018df8bae1dSRodney W. Grimes u_long src = ntohl(ip->ip_src.s_addr); 1019df8bae1dSRodney W. Grimes 1020df8bae1dSRodney W. Grimes if (RTA(rt) && 1021df8bae1dSRodney W. Grimes (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) { 1022df8bae1dSRodney W. Grimes if (rt->rt_flags & RTF_GATEWAY) 10239b932e9eSAndre Oppermann dest.s_addr = satosin(rt->rt_gateway)->sin_addr.s_addr; 1024df8bae1dSRodney W. Grimes else 10259b932e9eSAndre Oppermann dest.s_addr = ip->ip_dst.s_addr; 1026df8bae1dSRodney W. Grimes /* Router requirements says to only send host redirects */ 1027df8bae1dSRodney W. Grimes type = ICMP_REDIRECT; 1028df8bae1dSRodney W. Grimes code = ICMP_REDIRECT_HOST; 1029df8bae1dSRodney W. Grimes } 1030df8bae1dSRodney W. Grimes } 103102c1c707SAndre Oppermann } 1032df8bae1dSRodney W. Grimes 1033b835b6feSBjoern A. Zeeb error = ip_output(m, NULL, &ro, IP_FORWARDING, NULL, NULL); 1034b835b6feSBjoern A. Zeeb 1035b835b6feSBjoern A. Zeeb if (error == EMSGSIZE && ro.ro_rt) 1036e3a7aa6fSGleb Smirnoff mtu = ro.ro_rt->rt_mtu; 1037bf984051SGleb Smirnoff RO_RTFREE(&ro); 1038b835b6feSBjoern A. Zeeb 1039df8bae1dSRodney W. Grimes if (error) 104086425c62SRobert Watson IPSTAT_INC(ips_cantforward); 1041df8bae1dSRodney W. Grimes else { 104286425c62SRobert Watson IPSTAT_INC(ips_forward); 1043df8bae1dSRodney W. Grimes if (type) 104486425c62SRobert Watson IPSTAT_INC(ips_redirectsent); 1045df8bae1dSRodney W. Grimes else { 10469188b4a1SAndre Oppermann if (mcopy) 1047df8bae1dSRodney W. Grimes m_freem(mcopy); 10488c0fec80SRobert Watson if (ia != NULL) 10498c0fec80SRobert Watson ifa_free(&ia->ia_ifa); 1050df8bae1dSRodney W. Grimes return; 1051df8bae1dSRodney W. Grimes } 1052df8bae1dSRodney W. Grimes } 10538c0fec80SRobert Watson if (mcopy == NULL) { 10548c0fec80SRobert Watson if (ia != NULL) 10558c0fec80SRobert Watson ifa_free(&ia->ia_ifa); 1056df8bae1dSRodney W. Grimes return; 10578c0fec80SRobert Watson } 1058df8bae1dSRodney W. Grimes 1059df8bae1dSRodney W. Grimes switch (error) { 1060df8bae1dSRodney W. Grimes 1061df8bae1dSRodney W. Grimes case 0: /* forwarded, but need redirect */ 1062df8bae1dSRodney W. Grimes /* type, code set above */ 1063df8bae1dSRodney W. Grimes break; 1064df8bae1dSRodney W. Grimes 1065efbad259SEdward Tomasz Napierala case ENETUNREACH: 1066df8bae1dSRodney W. Grimes case EHOSTUNREACH: 1067df8bae1dSRodney W. Grimes case ENETDOWN: 1068df8bae1dSRodney W. Grimes case EHOSTDOWN: 1069df8bae1dSRodney W. Grimes default: 1070df8bae1dSRodney W. Grimes type = ICMP_UNREACH; 1071df8bae1dSRodney W. Grimes code = ICMP_UNREACH_HOST; 1072df8bae1dSRodney W. Grimes break; 1073df8bae1dSRodney W. Grimes 1074df8bae1dSRodney W. Grimes case EMSGSIZE: 1075df8bae1dSRodney W. Grimes type = ICMP_UNREACH; 1076df8bae1dSRodney W. Grimes code = ICMP_UNREACH_NEEDFRAG; 10771dfcf0d2SAndre Oppermann 1078b2630c29SGeorge V. Neville-Neil #ifdef IPSEC 1079b835b6feSBjoern A. Zeeb /* 1080b835b6feSBjoern A. Zeeb * If IPsec is configured for this path, 1081b835b6feSBjoern A. Zeeb * override any possibly mtu value set by ip_output. 1082b835b6feSBjoern A. Zeeb */ 10831c044382SBjoern A. Zeeb mtu = ip_ipsec_mtu(mcopy, mtu); 1084b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */ 10859b932e9eSAndre Oppermann /* 1086b835b6feSBjoern A. Zeeb * If the MTU was set before make sure we are below the 1087b835b6feSBjoern A. Zeeb * interface MTU. 1088ab48768bSAndre Oppermann * If the MTU wasn't set before use the interface mtu or 1089ab48768bSAndre Oppermann * fall back to the next smaller mtu step compared to the 1090ab48768bSAndre Oppermann * current packet size. 10919b932e9eSAndre Oppermann */ 1092b835b6feSBjoern A. Zeeb if (mtu != 0) { 1093b835b6feSBjoern A. Zeeb if (ia != NULL) 1094b835b6feSBjoern A. Zeeb mtu = min(mtu, ia->ia_ifp->if_mtu); 1095b835b6feSBjoern A. Zeeb } else { 1096ab48768bSAndre Oppermann if (ia != NULL) 1097c773494eSAndre Oppermann mtu = ia->ia_ifp->if_mtu; 1098ab48768bSAndre Oppermann else 10998f134647SGleb Smirnoff mtu = ip_next_mtu(ntohs(ip->ip_len), 0); 1100ab48768bSAndre Oppermann } 110186425c62SRobert Watson IPSTAT_INC(ips_cantfrag); 1102df8bae1dSRodney W. Grimes break; 1103df8bae1dSRodney W. Grimes 1104df8bae1dSRodney W. Grimes case ENOBUFS: 11053a06e3e0SRuslan Ermilov case EACCES: /* ipfw denied packet */ 11063a06e3e0SRuslan Ermilov m_freem(mcopy); 11078c0fec80SRobert Watson if (ia != NULL) 11088c0fec80SRobert Watson ifa_free(&ia->ia_ifa); 11093a06e3e0SRuslan Ermilov return; 1110df8bae1dSRodney W. Grimes } 11118c0fec80SRobert Watson if (ia != NULL) 11128c0fec80SRobert Watson ifa_free(&ia->ia_ifa); 1113c773494eSAndre Oppermann icmp_error(mcopy, type, code, dest.s_addr, mtu); 1114df8bae1dSRodney W. Grimes } 1115df8bae1dSRodney W. Grimes 111682c23ebaSBill Fenner void 1117f2565d68SRobert Watson ip_savecontrol(struct inpcb *inp, struct mbuf **mp, struct ip *ip, 1118f2565d68SRobert Watson struct mbuf *m) 111982c23ebaSBill Fenner { 11208b615593SMarko Zec 1121be8a62e8SPoul-Henning Kamp if (inp->inp_socket->so_options & (SO_BINTIME | SO_TIMESTAMP)) { 1122be8a62e8SPoul-Henning Kamp struct bintime bt; 1123be8a62e8SPoul-Henning Kamp 1124be8a62e8SPoul-Henning Kamp bintime(&bt); 1125be8a62e8SPoul-Henning Kamp if (inp->inp_socket->so_options & SO_BINTIME) { 1126be8a62e8SPoul-Henning Kamp *mp = sbcreatecontrol((caddr_t)&bt, sizeof(bt), 1127be8a62e8SPoul-Henning Kamp SCM_BINTIME, SOL_SOCKET); 1128be8a62e8SPoul-Henning Kamp if (*mp) 1129be8a62e8SPoul-Henning Kamp mp = &(*mp)->m_next; 1130be8a62e8SPoul-Henning Kamp } 113182c23ebaSBill Fenner if (inp->inp_socket->so_options & SO_TIMESTAMP) { 113282c23ebaSBill Fenner struct timeval tv; 113382c23ebaSBill Fenner 1134be8a62e8SPoul-Henning Kamp bintime2timeval(&bt, &tv); 113582c23ebaSBill Fenner *mp = sbcreatecontrol((caddr_t)&tv, sizeof(tv), 113682c23ebaSBill Fenner SCM_TIMESTAMP, SOL_SOCKET); 113782c23ebaSBill Fenner if (*mp) 113882c23ebaSBill Fenner mp = &(*mp)->m_next; 11394cc20ab1SSeigo Tanimura } 1140be8a62e8SPoul-Henning Kamp } 114182c23ebaSBill Fenner if (inp->inp_flags & INP_RECVDSTADDR) { 114282c23ebaSBill Fenner *mp = sbcreatecontrol((caddr_t)&ip->ip_dst, 114382c23ebaSBill Fenner sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP); 114482c23ebaSBill Fenner if (*mp) 114582c23ebaSBill Fenner mp = &(*mp)->m_next; 114682c23ebaSBill Fenner } 11474957466bSMatthew N. Dodd if (inp->inp_flags & INP_RECVTTL) { 11484957466bSMatthew N. Dodd *mp = sbcreatecontrol((caddr_t)&ip->ip_ttl, 11494957466bSMatthew N. Dodd sizeof(u_char), IP_RECVTTL, IPPROTO_IP); 11504957466bSMatthew N. Dodd if (*mp) 11514957466bSMatthew N. Dodd mp = &(*mp)->m_next; 11524957466bSMatthew N. Dodd } 115382c23ebaSBill Fenner #ifdef notyet 115482c23ebaSBill Fenner /* XXX 115582c23ebaSBill Fenner * Moving these out of udp_input() made them even more broken 115682c23ebaSBill Fenner * than they already were. 115782c23ebaSBill Fenner */ 115882c23ebaSBill Fenner /* options were tossed already */ 115982c23ebaSBill Fenner if (inp->inp_flags & INP_RECVOPTS) { 116082c23ebaSBill Fenner *mp = sbcreatecontrol((caddr_t)opts_deleted_above, 116182c23ebaSBill Fenner sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP); 116282c23ebaSBill Fenner if (*mp) 116382c23ebaSBill Fenner mp = &(*mp)->m_next; 116482c23ebaSBill Fenner } 116582c23ebaSBill Fenner /* ip_srcroute doesn't do what we want here, need to fix */ 116682c23ebaSBill Fenner if (inp->inp_flags & INP_RECVRETOPTS) { 1167e0982661SAndre Oppermann *mp = sbcreatecontrol((caddr_t)ip_srcroute(m), 116882c23ebaSBill Fenner sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP); 116982c23ebaSBill Fenner if (*mp) 117082c23ebaSBill Fenner mp = &(*mp)->m_next; 117182c23ebaSBill Fenner } 117282c23ebaSBill Fenner #endif 117382c23ebaSBill Fenner if (inp->inp_flags & INP_RECVIF) { 1174d314ad7bSJulian Elischer struct ifnet *ifp; 1175d314ad7bSJulian Elischer struct sdlbuf { 117682c23ebaSBill Fenner struct sockaddr_dl sdl; 1177d314ad7bSJulian Elischer u_char pad[32]; 1178d314ad7bSJulian Elischer } sdlbuf; 1179d314ad7bSJulian Elischer struct sockaddr_dl *sdp; 1180d314ad7bSJulian Elischer struct sockaddr_dl *sdl2 = &sdlbuf.sdl; 118182c23ebaSBill Fenner 118246f2df9cSSergey Kandaurov if ((ifp = m->m_pkthdr.rcvif) && 118346f2df9cSSergey Kandaurov ifp->if_index && ifp->if_index <= V_if_index) { 11844a0d6638SRuslan Ermilov sdp = (struct sockaddr_dl *)ifp->if_addr->ifa_addr; 1185d314ad7bSJulian Elischer /* 1186d314ad7bSJulian Elischer * Change our mind and don't try copy. 1187d314ad7bSJulian Elischer */ 118846f2df9cSSergey Kandaurov if (sdp->sdl_family != AF_LINK || 118946f2df9cSSergey Kandaurov sdp->sdl_len > sizeof(sdlbuf)) { 1190d314ad7bSJulian Elischer goto makedummy; 1191d314ad7bSJulian Elischer } 1192d314ad7bSJulian Elischer bcopy(sdp, sdl2, sdp->sdl_len); 1193d314ad7bSJulian Elischer } else { 1194d314ad7bSJulian Elischer makedummy: 119546f2df9cSSergey Kandaurov sdl2->sdl_len = 119646f2df9cSSergey Kandaurov offsetof(struct sockaddr_dl, sdl_data[0]); 1197d314ad7bSJulian Elischer sdl2->sdl_family = AF_LINK; 1198d314ad7bSJulian Elischer sdl2->sdl_index = 0; 1199d314ad7bSJulian Elischer sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0; 1200d314ad7bSJulian Elischer } 1201d314ad7bSJulian Elischer *mp = sbcreatecontrol((caddr_t)sdl2, sdl2->sdl_len, 120282c23ebaSBill Fenner IP_RECVIF, IPPROTO_IP); 120382c23ebaSBill Fenner if (*mp) 120482c23ebaSBill Fenner mp = &(*mp)->m_next; 120582c23ebaSBill Fenner } 12063cca425bSMichael Tuexen if (inp->inp_flags & INP_RECVTOS) { 12073cca425bSMichael Tuexen *mp = sbcreatecontrol((caddr_t)&ip->ip_tos, 12083cca425bSMichael Tuexen sizeof(u_char), IP_RECVTOS, IPPROTO_IP); 12093cca425bSMichael Tuexen if (*mp) 12103cca425bSMichael Tuexen mp = &(*mp)->m_next; 12113cca425bSMichael Tuexen } 12129d3ddf43SAdrian Chadd 12139d3ddf43SAdrian Chadd if (inp->inp_flags2 & INP_RECVFLOWID) { 12149d3ddf43SAdrian Chadd uint32_t flowid, flow_type; 12159d3ddf43SAdrian Chadd 12169d3ddf43SAdrian Chadd flowid = m->m_pkthdr.flowid; 12179d3ddf43SAdrian Chadd flow_type = M_HASHTYPE_GET(m); 12189d3ddf43SAdrian Chadd 12199d3ddf43SAdrian Chadd /* 12209d3ddf43SAdrian Chadd * XXX should handle the failure of one or the 12219d3ddf43SAdrian Chadd * other - don't populate both? 12229d3ddf43SAdrian Chadd */ 12239d3ddf43SAdrian Chadd *mp = sbcreatecontrol((caddr_t) &flowid, 12249d3ddf43SAdrian Chadd sizeof(uint32_t), IP_FLOWID, IPPROTO_IP); 12259d3ddf43SAdrian Chadd if (*mp) 12269d3ddf43SAdrian Chadd mp = &(*mp)->m_next; 12279d3ddf43SAdrian Chadd *mp = sbcreatecontrol((caddr_t) &flow_type, 12289d3ddf43SAdrian Chadd sizeof(uint32_t), IP_FLOWTYPE, IPPROTO_IP); 12299d3ddf43SAdrian Chadd if (*mp) 12309d3ddf43SAdrian Chadd mp = &(*mp)->m_next; 12319d3ddf43SAdrian Chadd } 12329d3ddf43SAdrian Chadd 12339d3ddf43SAdrian Chadd #ifdef RSS 12349d3ddf43SAdrian Chadd if (inp->inp_flags2 & INP_RECVRSSBUCKETID) { 12359d3ddf43SAdrian Chadd uint32_t flowid, flow_type; 12369d3ddf43SAdrian Chadd uint32_t rss_bucketid; 12379d3ddf43SAdrian Chadd 12389d3ddf43SAdrian Chadd flowid = m->m_pkthdr.flowid; 12399d3ddf43SAdrian Chadd flow_type = M_HASHTYPE_GET(m); 12409d3ddf43SAdrian Chadd 12419d3ddf43SAdrian Chadd if (rss_hash2bucket(flowid, flow_type, &rss_bucketid) == 0) { 12429d3ddf43SAdrian Chadd *mp = sbcreatecontrol((caddr_t) &rss_bucketid, 12439d3ddf43SAdrian Chadd sizeof(uint32_t), IP_RSSBUCKETID, IPPROTO_IP); 12449d3ddf43SAdrian Chadd if (*mp) 12459d3ddf43SAdrian Chadd mp = &(*mp)->m_next; 12469d3ddf43SAdrian Chadd } 12479d3ddf43SAdrian Chadd } 12489d3ddf43SAdrian Chadd #endif 124982c23ebaSBill Fenner } 125082c23ebaSBill Fenner 12514d2e3692SLuigi Rizzo /* 125230916a2dSRobert Watson * XXXRW: Multicast routing code in ip_mroute.c is generally MPSAFE, but the 125330916a2dSRobert Watson * ip_rsvp and ip_rsvp_on variables need to be interlocked with rsvp_on 125430916a2dSRobert Watson * locking. This code remains in ip_input.c as ip_mroute.c is optionally 125530916a2dSRobert Watson * compiled. 12564d2e3692SLuigi Rizzo */ 12573e288e62SDimitry Andric static VNET_DEFINE(int, ip_rsvp_on); 125882cea7e6SBjoern A. Zeeb VNET_DEFINE(struct socket *, ip_rsvpd); 125982cea7e6SBjoern A. Zeeb 126082cea7e6SBjoern A. Zeeb #define V_ip_rsvp_on VNET(ip_rsvp_on) 126182cea7e6SBjoern A. Zeeb 1262df8bae1dSRodney W. Grimes int 1263f0068c4aSGarrett Wollman ip_rsvp_init(struct socket *so) 1264f0068c4aSGarrett Wollman { 12658b615593SMarko Zec 1266f0068c4aSGarrett Wollman if (so->so_type != SOCK_RAW || 1267f0068c4aSGarrett Wollman so->so_proto->pr_protocol != IPPROTO_RSVP) 1268f0068c4aSGarrett Wollman return EOPNOTSUPP; 1269f0068c4aSGarrett Wollman 1270603724d3SBjoern A. Zeeb if (V_ip_rsvpd != NULL) 1271f0068c4aSGarrett Wollman return EADDRINUSE; 1272f0068c4aSGarrett Wollman 1273603724d3SBjoern A. Zeeb V_ip_rsvpd = so; 12741c5de19aSGarrett Wollman /* 12751c5de19aSGarrett Wollman * This may seem silly, but we need to be sure we don't over-increment 12761c5de19aSGarrett Wollman * the RSVP counter, in case something slips up. 12771c5de19aSGarrett Wollman */ 1278603724d3SBjoern A. Zeeb if (!V_ip_rsvp_on) { 1279603724d3SBjoern A. Zeeb V_ip_rsvp_on = 1; 1280603724d3SBjoern A. Zeeb V_rsvp_on++; 12811c5de19aSGarrett Wollman } 1282f0068c4aSGarrett Wollman 1283f0068c4aSGarrett Wollman return 0; 1284f0068c4aSGarrett Wollman } 1285f0068c4aSGarrett Wollman 1286f0068c4aSGarrett Wollman int 1287f0068c4aSGarrett Wollman ip_rsvp_done(void) 1288f0068c4aSGarrett Wollman { 12898b615593SMarko Zec 1290603724d3SBjoern A. Zeeb V_ip_rsvpd = NULL; 12911c5de19aSGarrett Wollman /* 12921c5de19aSGarrett Wollman * This may seem silly, but we need to be sure we don't over-decrement 12931c5de19aSGarrett Wollman * the RSVP counter, in case something slips up. 12941c5de19aSGarrett Wollman */ 1295603724d3SBjoern A. Zeeb if (V_ip_rsvp_on) { 1296603724d3SBjoern A. Zeeb V_ip_rsvp_on = 0; 1297603724d3SBjoern A. Zeeb V_rsvp_on--; 12981c5de19aSGarrett Wollman } 1299f0068c4aSGarrett Wollman return 0; 1300f0068c4aSGarrett Wollman } 1301bbb4330bSLuigi Rizzo 13028f5a8818SKevin Lo int 13038f5a8818SKevin Lo rsvp_input(struct mbuf **mp, int *offp, int proto) 1304bbb4330bSLuigi Rizzo { 13058f5a8818SKevin Lo struct mbuf *m; 13068f5a8818SKevin Lo 13078f5a8818SKevin Lo m = *mp; 13088f5a8818SKevin Lo *mp = NULL; 13098b615593SMarko Zec 1310bbb4330bSLuigi Rizzo if (rsvp_input_p) { /* call the real one if loaded */ 13118f5a8818SKevin Lo *mp = m; 13128f5a8818SKevin Lo rsvp_input_p(mp, offp, proto); 13138f5a8818SKevin Lo return (IPPROTO_DONE); 1314bbb4330bSLuigi Rizzo } 1315bbb4330bSLuigi Rizzo 1316bbb4330bSLuigi Rizzo /* Can still get packets with rsvp_on = 0 if there is a local member 1317bbb4330bSLuigi Rizzo * of the group to which the RSVP packet is addressed. But in this 1318bbb4330bSLuigi Rizzo * case we want to throw the packet away. 1319bbb4330bSLuigi Rizzo */ 1320bbb4330bSLuigi Rizzo 1321603724d3SBjoern A. Zeeb if (!V_rsvp_on) { 1322bbb4330bSLuigi Rizzo m_freem(m); 13238f5a8818SKevin Lo return (IPPROTO_DONE); 1324bbb4330bSLuigi Rizzo } 1325bbb4330bSLuigi Rizzo 1326603724d3SBjoern A. Zeeb if (V_ip_rsvpd != NULL) { 13278f5a8818SKevin Lo *mp = m; 13288f5a8818SKevin Lo rip_input(mp, offp, proto); 13298f5a8818SKevin Lo return (IPPROTO_DONE); 1330bbb4330bSLuigi Rizzo } 1331bbb4330bSLuigi Rizzo /* Drop the packet */ 1332bbb4330bSLuigi Rizzo m_freem(m); 13338f5a8818SKevin Lo return (IPPROTO_DONE); 1334bbb4330bSLuigi Rizzo } 1335