1c398230bSWarner Losh /*- 2df8bae1dSRodney W. Grimes * Copyright (c) 1982, 1986, 1988, 1993 3df8bae1dSRodney W. Grimes * The Regents of the University of California. All rights reserved. 4df8bae1dSRodney W. Grimes * 5df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 6df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 7df8bae1dSRodney W. Grimes * are met: 8df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 9df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 10df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 11df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 12df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 13df8bae1dSRodney W. Grimes * 4. Neither the name of the University nor the names of its contributors 14df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 15df8bae1dSRodney W. Grimes * without specific prior written permission. 16df8bae1dSRodney W. Grimes * 17df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27df8bae1dSRodney W. Grimes * SUCH DAMAGE. 28df8bae1dSRodney W. Grimes * 29df8bae1dSRodney W. Grimes * @(#)ip_input.c 8.2 (Berkeley) 1/4/94 30df8bae1dSRodney W. Grimes */ 31df8bae1dSRodney W. Grimes 324b421e2dSMike Silbersack #include <sys/cdefs.h> 334b421e2dSMike Silbersack __FBSDID("$FreeBSD$"); 344b421e2dSMike Silbersack 350ac40133SBrian Somers #include "opt_bootp.h" 3674a9466cSGary Palmer #include "opt_ipfw.h" 3727108a15SDag-Erling Smørgrav #include "opt_ipstealth.h" 386a800098SYoshinobu Inoue #include "opt_ipsec.h" 3933553d6eSBjoern A. Zeeb #include "opt_route.h" 40a9771948SGleb Smirnoff #include "opt_carp.h" 4174a9466cSGary Palmer 42df8bae1dSRodney W. Grimes #include <sys/param.h> 43df8bae1dSRodney W. Grimes #include <sys/systm.h> 445f311da2SMike Silbersack #include <sys/callout.h> 45df8bae1dSRodney W. Grimes #include <sys/mbuf.h> 46b715f178SLuigi Rizzo #include <sys/malloc.h> 47df8bae1dSRodney W. Grimes #include <sys/domain.h> 48df8bae1dSRodney W. Grimes #include <sys/protosw.h> 49df8bae1dSRodney W. Grimes #include <sys/socket.h> 50df8bae1dSRodney W. Grimes #include <sys/time.h> 51df8bae1dSRodney W. Grimes #include <sys/kernel.h> 52385195c0SMarko Zec #include <sys/lock.h> 53385195c0SMarko Zec #include <sys/rwlock.h> 541025071fSGarrett Wollman #include <sys/syslog.h> 55b5e8ce9fSBruce Evans #include <sys/sysctl.h> 56df8bae1dSRodney W. Grimes 57c85540ddSAndrey A. Chernov #include <net/pfil.h> 58df8bae1dSRodney W. Grimes #include <net/if.h> 599494d596SBrooks Davis #include <net/if_types.h> 60d314ad7bSJulian Elischer #include <net/if_var.h> 6182c23ebaSBill Fenner #include <net/if_dl.h> 62df8bae1dSRodney W. Grimes #include <net/route.h> 63748e0b0aSGarrett Wollman #include <net/netisr.h> 644b79449eSBjoern A. Zeeb #include <net/vnet.h> 6565111ec7SKip Macy #include <net/flowtable.h> 66df8bae1dSRodney W. Grimes 67df8bae1dSRodney W. Grimes #include <netinet/in.h> 68df8bae1dSRodney W. Grimes #include <netinet/in_systm.h> 69b5e8ce9fSBruce Evans #include <netinet/in_var.h> 70df8bae1dSRodney W. Grimes #include <netinet/ip.h> 71df8bae1dSRodney W. Grimes #include <netinet/in_pcb.h> 72df8bae1dSRodney W. Grimes #include <netinet/ip_var.h> 73eddfbb76SRobert Watson #include <netinet/ip_fw.h> 74df8bae1dSRodney W. Grimes #include <netinet/ip_icmp.h> 75ef39adf0SAndre Oppermann #include <netinet/ip_options.h> 7658938916SGarrett Wollman #include <machine/in_cksum.h> 77a9771948SGleb Smirnoff #ifdef DEV_CARP 78a9771948SGleb Smirnoff #include <netinet/ip_carp.h> 79a9771948SGleb Smirnoff #endif 80b2630c29SGeorge V. Neville-Neil #ifdef IPSEC 811dfcf0d2SAndre Oppermann #include <netinet/ip_ipsec.h> 82b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */ 83df8bae1dSRodney W. Grimes 84f0068c4aSGarrett Wollman #include <sys/socketvar.h> 856ddbf1e2SGary Palmer 86aed55708SRobert Watson #include <security/mac/mac_framework.h> 87aed55708SRobert Watson 88d2035ffbSEd Maste #ifdef CTASSERT 89d2035ffbSEd Maste CTASSERT(sizeof(struct ip) == 20); 90d2035ffbSEd Maste #endif 91d2035ffbSEd Maste 92eddfbb76SRobert Watson static VNET_DEFINE(int, ipsendredirects) = 1; /* XXX */ 93eddfbb76SRobert Watson static VNET_DEFINE(int, ip_checkinterface); 94eddfbb76SRobert Watson static VNET_DEFINE(int, ip_keepfaith); 95eddfbb76SRobert Watson static VNET_DEFINE(int, ip_sendsourcequench); 96385195c0SMarko Zec 971e77c105SRobert Watson #define V_ipsendredirects VNET(ipsendredirects) 981e77c105SRobert Watson #define V_ip_checkinterface VNET(ip_checkinterface) 991e77c105SRobert Watson #define V_ip_keepfaith VNET(ip_keepfaith) 1001e77c105SRobert Watson #define V_ip_sendsourcequench VNET(ip_sendsourcequench) 101eddfbb76SRobert Watson 102eddfbb76SRobert Watson VNET_DEFINE(int, ip_defttl) = IPDEFTTL; 103eddfbb76SRobert Watson VNET_DEFINE(int, ip_do_randomid); 104eddfbb76SRobert Watson VNET_DEFINE(int, ipforwarding); 105eddfbb76SRobert Watson 106eddfbb76SRobert Watson VNET_DEFINE(struct in_ifaddrhead, in_ifaddrhead); /* first inet address */ 107eddfbb76SRobert Watson VNET_DEFINE(struct in_ifaddrhashhead *, in_ifaddrhashtbl); /* inet addr hash table */ 108eddfbb76SRobert Watson VNET_DEFINE(u_long, in_ifaddrhmask); /* mask for hash table */ 109eddfbb76SRobert Watson VNET_DEFINE(struct ipstat, ipstat); 110eddfbb76SRobert Watson 111eddfbb76SRobert Watson static VNET_DEFINE(int, ip_rsvp_on); 112eddfbb76SRobert Watson VNET_DEFINE(struct socket *, ip_rsvpd); 113eddfbb76SRobert Watson VNET_DEFINE(int, rsvp_on); 114eddfbb76SRobert Watson 1151e77c105SRobert Watson #define V_ip_rsvp_on VNET(ip_rsvp_on) 116eddfbb76SRobert Watson 117eddfbb76SRobert Watson static VNET_DEFINE(TAILQ_HEAD(ipqhead, ipq), ipq[IPREASS_NHASH]); 118eddfbb76SRobert Watson static VNET_DEFINE(int, maxnipq); /* Administrative limit on # reass queues. */ 119eddfbb76SRobert Watson static VNET_DEFINE(int, maxfragsperpacket); 120eddfbb76SRobert Watson static VNET_DEFINE(int, nipq); /* Total # of reass queues */ 121eddfbb76SRobert Watson 1221e77c105SRobert Watson #define V_ipq VNET(ipq) 1231e77c105SRobert Watson #define V_maxnipq VNET(maxnipq) 1241e77c105SRobert Watson #define V_maxfragsperpacket VNET(maxfragsperpacket) 1251e77c105SRobert Watson #define V_nipq VNET(nipq) 126eddfbb76SRobert Watson 127eddfbb76SRobert Watson VNET_DEFINE(int, ipstealth); 12864aeca7bSRobert Watson 1292d9cfabaSRobert Watson struct rwlock in_ifaddr_lock; 13064aeca7bSRobert Watson RW_SYSINIT(in_ifaddr_lock, &in_ifaddr_lock, "in_ifaddr_lock"); 131f0068c4aSGarrett Wollman 132eddfbb76SRobert Watson SYSCTL_VNET_INT(_net_inet_ip, IPCTL_FORWARDING, forwarding, CTLFLAG_RW, 133eddfbb76SRobert Watson &VNET_NAME(ipforwarding), 0, 1348b615593SMarko Zec "Enable IP forwarding between interfaces"); 1350312fbe9SPoul-Henning Kamp 136eddfbb76SRobert Watson SYSCTL_VNET_INT(_net_inet_ip, IPCTL_SENDREDIRECTS, redirect, CTLFLAG_RW, 137eddfbb76SRobert Watson &VNET_NAME(ipsendredirects), 0, 1388b615593SMarko Zec "Enable sending IP redirects"); 1390312fbe9SPoul-Henning Kamp 140eddfbb76SRobert Watson SYSCTL_VNET_INT(_net_inet_ip, IPCTL_DEFTTL, ttl, CTLFLAG_RW, 141eddfbb76SRobert Watson &VNET_NAME(ip_defttl), 0, 142eddfbb76SRobert Watson "Maximum TTL on IP packets"); 1430312fbe9SPoul-Henning Kamp 144eddfbb76SRobert Watson SYSCTL_VNET_INT(_net_inet_ip, IPCTL_KEEPFAITH, keepfaith, CTLFLAG_RW, 145eddfbb76SRobert Watson &VNET_NAME(ip_keepfaith), 0, 1466a800098SYoshinobu Inoue "Enable packet capture for FAITH IPv4->IPv6 translater daemon"); 1476a800098SYoshinobu Inoue 148eddfbb76SRobert Watson SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, sendsourcequench, CTLFLAG_RW, 149eddfbb76SRobert Watson &VNET_NAME(ip_sendsourcequench), 0, 150df285b3dSMike Silbersack "Enable the transmission of source quench packets"); 151df285b3dSMike Silbersack 152eddfbb76SRobert Watson SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, random_id, CTLFLAG_RW, 153eddfbb76SRobert Watson &VNET_NAME(ip_do_randomid), 0, 154eddfbb76SRobert Watson "Assign random ip_id values"); 1551f44b0a1SDavid Malone 156823db0e9SDon Lewis /* 157823db0e9SDon Lewis * XXX - Setting ip_checkinterface mostly implements the receive side of 158823db0e9SDon Lewis * the Strong ES model described in RFC 1122, but since the routing table 159a8f12100SDon Lewis * and transmit implementation do not implement the Strong ES model, 160823db0e9SDon Lewis * setting this to 1 results in an odd hybrid. 1613f67c834SDon Lewis * 162a8f12100SDon Lewis * XXX - ip_checkinterface currently must be disabled if you use ipnat 163a8f12100SDon Lewis * to translate the destination address to another local interface. 1643f67c834SDon Lewis * 1653f67c834SDon Lewis * XXX - ip_checkinterface must be disabled if you add IP aliases 1663f67c834SDon Lewis * to the loopback interface instead of the interface where the 1673f67c834SDon Lewis * packets for those addresses are received. 168823db0e9SDon Lewis */ 169eddfbb76SRobert Watson SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, check_interface, CTLFLAG_RW, 170eddfbb76SRobert Watson &VNET_NAME(ip_checkinterface), 0, 1718b615593SMarko Zec "Verify packet arrives on correct interface"); 172b3e95d4eSJonathan Lemon 1730b4b0b0fSJulian Elischer VNET_DEFINE(struct pfil_head, inet_pfil_hook); /* Packet filter hooks */ 174df8bae1dSRodney W. Grimes 175d4b5cae4SRobert Watson static struct netisr_handler ip_nh = { 176d4b5cae4SRobert Watson .nh_name = "ip", 177d4b5cae4SRobert Watson .nh_handler = ip_input, 178d4b5cae4SRobert Watson .nh_proto = NETISR_IP, 179d4b5cae4SRobert Watson .nh_policy = NETISR_POLICY_FLOW, 180d4b5cae4SRobert Watson }; 181ca925d9cSJonathan Lemon 182df8bae1dSRodney W. Grimes extern struct domain inetdomain; 183f0ffb944SJulian Elischer extern struct protosw inetsw[]; 184df8bae1dSRodney W. Grimes u_char ip_protox[IPPROTO_MAX]; 185ca925d9cSJonathan Lemon 186eddfbb76SRobert Watson SYSCTL_VNET_STRUCT(_net_inet_ip, IPCTL_STATS, stats, CTLFLAG_RW, 187eddfbb76SRobert Watson &VNET_NAME(ipstat), ipstat, 188eddfbb76SRobert Watson "IP statistics (struct ipstat, netinet/ip_var.h)"); 189df8bae1dSRodney W. Grimes 190eddfbb76SRobert Watson static VNET_DEFINE(uma_zone_t, ipq_zone); 1911e77c105SRobert Watson #define V_ipq_zone VNET(ipq_zone) 192194a213eSAndrey A. Chernov 193dfa60d93SRobert Watson static struct mtx ipqlock; 1942fad1e93SSam Leffler 1952fad1e93SSam Leffler #define IPQ_LOCK() mtx_lock(&ipqlock) 1962fad1e93SSam Leffler #define IPQ_UNLOCK() mtx_unlock(&ipqlock) 197888c2a3cSSam Leffler #define IPQ_LOCK_INIT() mtx_init(&ipqlock, "ipqlock", NULL, MTX_DEF) 198888c2a3cSSam Leffler #define IPQ_LOCK_ASSERT() mtx_assert(&ipqlock, MA_OWNED) 199f23b4c91SGarrett Wollman 200d248c7d7SRobert Watson static void maxnipq_update(void); 2014f590175SPaul Saab static void ipq_zone_change(void *); 202d248c7d7SRobert Watson 203eddfbb76SRobert Watson SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, fragpackets, CTLFLAG_RD, 204eddfbb76SRobert Watson &VNET_NAME(nipq), 0, 2058b615593SMarko Zec "Current number of IPv4 fragment reassembly queue entries"); 206d248c7d7SRobert Watson 207eddfbb76SRobert Watson SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, maxfragsperpacket, CTLFLAG_RW, 208eddfbb76SRobert Watson &VNET_NAME(maxfragsperpacket), 0, 209d248c7d7SRobert Watson "Maximum number of IPv4 fragments allowed per packet"); 210d248c7d7SRobert Watson 211d248c7d7SRobert Watson struct callout ipport_tick_callout; 212d248c7d7SRobert Watson 2130312fbe9SPoul-Henning Kamp #ifdef IPCTL_DEFMTU 2140312fbe9SPoul-Henning Kamp SYSCTL_INT(_net_inet_ip, IPCTL_DEFMTU, mtu, CTLFLAG_RW, 2153d177f46SBill Fumerola &ip_mtu, 0, "Default MTU"); 2160312fbe9SPoul-Henning Kamp #endif 2170312fbe9SPoul-Henning Kamp 2181b968362SDag-Erling Smørgrav #ifdef IPSTEALTH 219eddfbb76SRobert Watson SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, stealth, CTLFLAG_RW, 220eddfbb76SRobert Watson &VNET_NAME(ipstealth), 0, 221eddfbb76SRobert Watson "IP stealth mode, no TTL decrementation on forwarding"); 2221b968362SDag-Erling Smørgrav #endif 223eddfbb76SRobert Watson 22453be8fcaSBjoern A. Zeeb #ifdef FLOWTABLE 225eddfbb76SRobert Watson static VNET_DEFINE(int, ip_output_flowtable_size) = 2048; 226eddfbb76SRobert Watson VNET_DEFINE(struct flowtable *, ip_ft); 2271e77c105SRobert Watson #define V_ip_output_flowtable_size VNET(ip_output_flowtable_size) 228eddfbb76SRobert Watson 229eddfbb76SRobert Watson SYSCTL_VNET_INT(_net_inet_ip, OID_AUTO, output_flowtable_size, CTLFLAG_RDTUN, 230eddfbb76SRobert Watson &VNET_NAME(ip_output_flowtable_size), 2048, 23165111ec7SKip Macy "number of entries in the per-cpu output flow caches"); 23253be8fcaSBjoern A. Zeeb #endif 23353be8fcaSBjoern A. Zeeb 234eddfbb76SRobert Watson VNET_DEFINE(int, fw_one_pass) = 1; 235010b65f5SJulian Elischer 2364d77a549SAlfred Perlstein static void ip_freef(struct ipqhead *, struct ipq *); 2378948e4baSArchie Cobbs 238315e3e38SRobert Watson /* 239315e3e38SRobert Watson * Kernel module interface for updating ipstat. The argument is an index 240315e3e38SRobert Watson * into ipstat treated as an array of u_long. While this encodes the general 241315e3e38SRobert Watson * layout of ipstat into the caller, it doesn't encode its location, so that 242315e3e38SRobert Watson * future changes to add, for example, per-CPU stats support won't cause 243315e3e38SRobert Watson * binary compatibility problems for kernel modules. 244315e3e38SRobert Watson */ 245315e3e38SRobert Watson void 246315e3e38SRobert Watson kmod_ipstat_inc(int statnum) 247315e3e38SRobert Watson { 248315e3e38SRobert Watson 249315e3e38SRobert Watson (*((u_long *)&V_ipstat + statnum))++; 250315e3e38SRobert Watson } 251315e3e38SRobert Watson 252315e3e38SRobert Watson void 253315e3e38SRobert Watson kmod_ipstat_dec(int statnum) 254315e3e38SRobert Watson { 255315e3e38SRobert Watson 256315e3e38SRobert Watson (*((u_long *)&V_ipstat + statnum))--; 257315e3e38SRobert Watson } 258315e3e38SRobert Watson 259d4b5cae4SRobert Watson static int 260d4b5cae4SRobert Watson sysctl_netinet_intr_queue_maxlen(SYSCTL_HANDLER_ARGS) 261d4b5cae4SRobert Watson { 262d4b5cae4SRobert Watson int error, qlimit; 263d4b5cae4SRobert Watson 264d4b5cae4SRobert Watson netisr_getqlimit(&ip_nh, &qlimit); 265d4b5cae4SRobert Watson error = sysctl_handle_int(oidp, &qlimit, 0, req); 266d4b5cae4SRobert Watson if (error || !req->newptr) 267d4b5cae4SRobert Watson return (error); 268d4b5cae4SRobert Watson if (qlimit < 1) 269d4b5cae4SRobert Watson return (EINVAL); 270d4b5cae4SRobert Watson return (netisr_setqlimit(&ip_nh, qlimit)); 271d4b5cae4SRobert Watson } 272d4b5cae4SRobert Watson SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQMAXLEN, intr_queue_maxlen, 273d4b5cae4SRobert Watson CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_netinet_intr_queue_maxlen, "I", 274d4b5cae4SRobert Watson "Maximum size of the IP input queue"); 275d4b5cae4SRobert Watson 276d4b5cae4SRobert Watson static int 277d4b5cae4SRobert Watson sysctl_netinet_intr_queue_drops(SYSCTL_HANDLER_ARGS) 278d4b5cae4SRobert Watson { 279d4b5cae4SRobert Watson u_int64_t qdrops_long; 280d4b5cae4SRobert Watson int error, qdrops; 281d4b5cae4SRobert Watson 282d4b5cae4SRobert Watson netisr_getqdrops(&ip_nh, &qdrops_long); 283d4b5cae4SRobert Watson qdrops = qdrops_long; 284d4b5cae4SRobert Watson error = sysctl_handle_int(oidp, &qdrops, 0, req); 285d4b5cae4SRobert Watson if (error || !req->newptr) 286d4b5cae4SRobert Watson return (error); 287d4b5cae4SRobert Watson if (qdrops != 0) 288d4b5cae4SRobert Watson return (EINVAL); 289d4b5cae4SRobert Watson netisr_clearqdrops(&ip_nh); 290d4b5cae4SRobert Watson return (0); 291d4b5cae4SRobert Watson } 292d4b5cae4SRobert Watson 293d4b5cae4SRobert Watson SYSCTL_PROC(_net_inet_ip, IPCTL_INTRQDROPS, intr_queue_drops, 294d4b5cae4SRobert Watson CTLTYPE_INT|CTLFLAG_RD, 0, 0, sysctl_netinet_intr_queue_drops, "I", 295d4b5cae4SRobert Watson "Number of packets dropped from the IP input queue"); 296d4b5cae4SRobert Watson 297df8bae1dSRodney W. Grimes /* 298df8bae1dSRodney W. Grimes * IP initialization: fill in IP protocol switch table. 299df8bae1dSRodney W. Grimes * All protocols not implemented in kernel go to raw IP protocol handler. 300df8bae1dSRodney W. Grimes */ 301df8bae1dSRodney W. Grimes void 302f2565d68SRobert Watson ip_init(void) 303df8bae1dSRodney W. Grimes { 304f2565d68SRobert Watson struct protosw *pr; 305f2565d68SRobert Watson int i; 306df8bae1dSRodney W. Grimes 307a511354aSRobert Watson V_ip_id = time_second & 0xffff; 308a511354aSRobert Watson 309603724d3SBjoern A. Zeeb TAILQ_INIT(&V_in_ifaddrhead); 310603724d3SBjoern A. Zeeb V_in_ifaddrhashtbl = hashinit(INADDR_NHASH, M_IFADDR, &V_in_ifaddrhmask); 3111ed81b73SMarko Zec 3121ed81b73SMarko Zec /* Initialize IP reassembly queue. */ 3131ed81b73SMarko Zec for (i = 0; i < IPREASS_NHASH; i++) 3141ed81b73SMarko Zec TAILQ_INIT(&V_ipq[i]); 3151ed81b73SMarko Zec V_maxnipq = nmbclusters / 32; 3161ed81b73SMarko Zec V_maxfragsperpacket = 16; 3171ed81b73SMarko Zec V_ipq_zone = uma_zcreate("ipq", sizeof(struct ipq), NULL, NULL, NULL, 3181ed81b73SMarko Zec NULL, UMA_ALIGN_PTR, 0); 3191ed81b73SMarko Zec maxnipq_update(); 3201ed81b73SMarko Zec 3210b4b0b0fSJulian Elischer /* Initialize packet filter hooks. */ 3220b4b0b0fSJulian Elischer V_inet_pfil_hook.ph_type = PFIL_TYPE_AF; 3230b4b0b0fSJulian Elischer V_inet_pfil_hook.ph_af = AF_INET; 3240b4b0b0fSJulian Elischer if ((i = pfil_head_register(&V_inet_pfil_hook)) != 0) 3250b4b0b0fSJulian Elischer printf("%s: WARNING: unable to register pfil hook, " 3260b4b0b0fSJulian Elischer "error %d\n", __func__, i); 3270b4b0b0fSJulian Elischer 328fa057b15SMarko Zec #ifdef FLOWTABLE 329fa057b15SMarko Zec TUNABLE_INT_FETCH("net.inet.ip.output_flowtable_size", 330fa057b15SMarko Zec &V_ip_output_flowtable_size); 331fa057b15SMarko Zec V_ip_ft = flowtable_alloc(V_ip_output_flowtable_size, FL_PCPU); 332fa057b15SMarko Zec #endif 333fa057b15SMarko Zec 3341ed81b73SMarko Zec /* Skip initialization of globals for non-default instances. */ 3351ed81b73SMarko Zec if (!IS_DEFAULT_VNET(curvnet)) 3361ed81b73SMarko Zec return; 3371ed81b73SMarko Zec 338f0ffb944SJulian Elischer pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 33902410549SRobert Watson if (pr == NULL) 340db09bef3SAndre Oppermann panic("ip_init: PF_INET not found"); 341db09bef3SAndre Oppermann 342db09bef3SAndre Oppermann /* Initialize the entire ip_protox[] array to IPPROTO_RAW. */ 343df8bae1dSRodney W. Grimes for (i = 0; i < IPPROTO_MAX; i++) 344df8bae1dSRodney W. Grimes ip_protox[i] = pr - inetsw; 345db09bef3SAndre Oppermann /* 346db09bef3SAndre Oppermann * Cycle through IP protocols and put them into the appropriate place 347db09bef3SAndre Oppermann * in ip_protox[]. 348db09bef3SAndre Oppermann */ 349f0ffb944SJulian Elischer for (pr = inetdomain.dom_protosw; 350f0ffb944SJulian Elischer pr < inetdomain.dom_protoswNPROTOSW; pr++) 351df8bae1dSRodney W. Grimes if (pr->pr_domain->dom_family == PF_INET && 352db09bef3SAndre Oppermann pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW) { 353db09bef3SAndre Oppermann /* Be careful to only index valid IP protocols. */ 354db77984cSSam Leffler if (pr->pr_protocol < IPPROTO_MAX) 355df8bae1dSRodney W. Grimes ip_protox[pr->pr_protocol] = pr - inetsw; 356db09bef3SAndre Oppermann } 357194a213eSAndrey A. Chernov 3585f311da2SMike Silbersack /* Start ipport_tick. */ 3595f311da2SMike Silbersack callout_init(&ipport_tick_callout, CALLOUT_MPSAFE); 36021ca7b57SMarko Zec callout_reset(&ipport_tick_callout, 1, ipport_tick, NULL); 3615f311da2SMike Silbersack EVENTHANDLER_REGISTER(shutdown_pre_sync, ip_fini, NULL, 3625f311da2SMike Silbersack SHUTDOWN_PRI_DEFAULT); 3634f590175SPaul Saab EVENTHANDLER_REGISTER(nmbclusters_change, ipq_zone_change, 3644f590175SPaul Saab NULL, EVENTHANDLER_PRI_ANY); 3655f311da2SMike Silbersack 366db09bef3SAndre Oppermann /* Initialize various other remaining things. */ 3671ed81b73SMarko Zec IPQ_LOCK_INIT(); 368d4b5cae4SRobert Watson netisr_register(&ip_nh); 369df8bae1dSRodney W. Grimes } 370df8bae1dSRodney W. Grimes 371f2565d68SRobert Watson void 372f2565d68SRobert Watson ip_fini(void *xtp) 3735f311da2SMike Silbersack { 374f2565d68SRobert Watson 3755f311da2SMike Silbersack callout_stop(&ipport_tick_callout); 3765f311da2SMike Silbersack } 3775f311da2SMike Silbersack 3784d2e3692SLuigi Rizzo /* 379df8bae1dSRodney W. Grimes * Ip input routine. Checksum and byte swap header. If fragmented 380df8bae1dSRodney W. Grimes * try to reassemble. Process options. Pass to next level. 381df8bae1dSRodney W. Grimes */ 382c67b1d17SGarrett Wollman void 383c67b1d17SGarrett Wollman ip_input(struct mbuf *m) 384df8bae1dSRodney W. Grimes { 3859188b4a1SAndre Oppermann struct ip *ip = NULL; 3865da9f8faSJosef Karthauser struct in_ifaddr *ia = NULL; 387ca925d9cSJonathan Lemon struct ifaddr *ifa; 3880aade26eSRobert Watson struct ifnet *ifp; 3899b932e9eSAndre Oppermann int checkif, hlen = 0; 39047c861ecSBrian Somers u_short sum; 39102c1c707SAndre Oppermann int dchg = 0; /* dest changed after fw */ 392f51f805fSSam Leffler struct in_addr odst; /* original dst address */ 393b715f178SLuigi Rizzo 394fe584538SDag-Erling Smørgrav M_ASSERTPKTHDR(m); 395db40007dSAndrew R. Reiter 396ac9d7e26SMax Laier if (m->m_flags & M_FASTFWD_OURS) { 3979b932e9eSAndre Oppermann /* 39876ff6dcfSAndre Oppermann * Firewall or NAT changed destination to local. 39976ff6dcfSAndre Oppermann * We expect ip_len and ip_off to be in host byte order. 4009b932e9eSAndre Oppermann */ 40176ff6dcfSAndre Oppermann m->m_flags &= ~M_FASTFWD_OURS; 40276ff6dcfSAndre Oppermann /* Set up some basics that will be used later. */ 4032b25acc1SLuigi Rizzo ip = mtod(m, struct ip *); 40453be11f6SPoul-Henning Kamp hlen = ip->ip_hl << 2; 4059b932e9eSAndre Oppermann goto ours; 4062b25acc1SLuigi Rizzo } 4072b25acc1SLuigi Rizzo 40886425c62SRobert Watson IPSTAT_INC(ips_total); 40958938916SGarrett Wollman 41058938916SGarrett Wollman if (m->m_pkthdr.len < sizeof(struct ip)) 41158938916SGarrett Wollman goto tooshort; 41258938916SGarrett Wollman 413df8bae1dSRodney W. Grimes if (m->m_len < sizeof (struct ip) && 4140b17fba7SAndre Oppermann (m = m_pullup(m, sizeof (struct ip))) == NULL) { 41586425c62SRobert Watson IPSTAT_INC(ips_toosmall); 416c67b1d17SGarrett Wollman return; 417df8bae1dSRodney W. Grimes } 418df8bae1dSRodney W. Grimes ip = mtod(m, struct ip *); 41958938916SGarrett Wollman 42053be11f6SPoul-Henning Kamp if (ip->ip_v != IPVERSION) { 42186425c62SRobert Watson IPSTAT_INC(ips_badvers); 422df8bae1dSRodney W. Grimes goto bad; 423df8bae1dSRodney W. Grimes } 42458938916SGarrett Wollman 42553be11f6SPoul-Henning Kamp hlen = ip->ip_hl << 2; 426df8bae1dSRodney W. Grimes if (hlen < sizeof(struct ip)) { /* minimum header length */ 42786425c62SRobert Watson IPSTAT_INC(ips_badhlen); 428df8bae1dSRodney W. Grimes goto bad; 429df8bae1dSRodney W. Grimes } 430df8bae1dSRodney W. Grimes if (hlen > m->m_len) { 4310b17fba7SAndre Oppermann if ((m = m_pullup(m, hlen)) == NULL) { 43286425c62SRobert Watson IPSTAT_INC(ips_badhlen); 433c67b1d17SGarrett Wollman return; 434df8bae1dSRodney W. Grimes } 435df8bae1dSRodney W. Grimes ip = mtod(m, struct ip *); 436df8bae1dSRodney W. Grimes } 43733841545SHajimu UMEMOTO 43833841545SHajimu UMEMOTO /* 127/8 must not appear on wire - RFC1122 */ 4390aade26eSRobert Watson ifp = m->m_pkthdr.rcvif; 44033841545SHajimu UMEMOTO if ((ntohl(ip->ip_dst.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET || 44133841545SHajimu UMEMOTO (ntohl(ip->ip_src.s_addr) >> IN_CLASSA_NSHIFT) == IN_LOOPBACKNET) { 4420aade26eSRobert Watson if ((ifp->if_flags & IFF_LOOPBACK) == 0) { 44386425c62SRobert Watson IPSTAT_INC(ips_badaddr); 44433841545SHajimu UMEMOTO goto bad; 44533841545SHajimu UMEMOTO } 44633841545SHajimu UMEMOTO } 44733841545SHajimu UMEMOTO 448db4f9cc7SJonathan Lemon if (m->m_pkthdr.csum_flags & CSUM_IP_CHECKED) { 449db4f9cc7SJonathan Lemon sum = !(m->m_pkthdr.csum_flags & CSUM_IP_VALID); 450db4f9cc7SJonathan Lemon } else { 45158938916SGarrett Wollman if (hlen == sizeof(struct ip)) { 45247c861ecSBrian Somers sum = in_cksum_hdr(ip); 45358938916SGarrett Wollman } else { 45447c861ecSBrian Somers sum = in_cksum(m, hlen); 45558938916SGarrett Wollman } 456db4f9cc7SJonathan Lemon } 45747c861ecSBrian Somers if (sum) { 45886425c62SRobert Watson IPSTAT_INC(ips_badsum); 459df8bae1dSRodney W. Grimes goto bad; 460df8bae1dSRodney W. Grimes } 461df8bae1dSRodney W. Grimes 46202b199f1SMax Laier #ifdef ALTQ 46302b199f1SMax Laier if (altq_input != NULL && (*altq_input)(m, AF_INET) == 0) 46402b199f1SMax Laier /* packet is dropped by traffic conditioner */ 46502b199f1SMax Laier return; 46602b199f1SMax Laier #endif 46702b199f1SMax Laier 468df8bae1dSRodney W. Grimes /* 469df8bae1dSRodney W. Grimes * Convert fields to host representation. 470df8bae1dSRodney W. Grimes */ 471fd8e4ebcSMike Barcroft ip->ip_len = ntohs(ip->ip_len); 472df8bae1dSRodney W. Grimes if (ip->ip_len < hlen) { 47386425c62SRobert Watson IPSTAT_INC(ips_badlen); 474df8bae1dSRodney W. Grimes goto bad; 475df8bae1dSRodney W. Grimes } 476fd8e4ebcSMike Barcroft ip->ip_off = ntohs(ip->ip_off); 477df8bae1dSRodney W. Grimes 478df8bae1dSRodney W. Grimes /* 479df8bae1dSRodney W. Grimes * Check that the amount of data in the buffers 480df8bae1dSRodney W. Grimes * is as at least much as the IP header would have us expect. 481df8bae1dSRodney W. Grimes * Trim mbufs if longer than we expect. 482df8bae1dSRodney W. Grimes * Drop packet if shorter than we expect. 483df8bae1dSRodney W. Grimes */ 484df8bae1dSRodney W. Grimes if (m->m_pkthdr.len < ip->ip_len) { 48558938916SGarrett Wollman tooshort: 48686425c62SRobert Watson IPSTAT_INC(ips_tooshort); 487df8bae1dSRodney W. Grimes goto bad; 488df8bae1dSRodney W. Grimes } 489df8bae1dSRodney W. Grimes if (m->m_pkthdr.len > ip->ip_len) { 490df8bae1dSRodney W. Grimes if (m->m_len == m->m_pkthdr.len) { 491df8bae1dSRodney W. Grimes m->m_len = ip->ip_len; 492df8bae1dSRodney W. Grimes m->m_pkthdr.len = ip->ip_len; 493df8bae1dSRodney W. Grimes } else 494df8bae1dSRodney W. Grimes m_adj(m, ip->ip_len - m->m_pkthdr.len); 495df8bae1dSRodney W. Grimes } 496b2630c29SGeorge V. Neville-Neil #ifdef IPSEC 49714dd6717SSam Leffler /* 49814dd6717SSam Leffler * Bypass packet filtering for packets from a tunnel (gif). 49914dd6717SSam Leffler */ 500cc977adcSBjoern A. Zeeb if (ip_ipsec_filtertunnel(m)) 501c21fd232SAndre Oppermann goto passin; 502b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */ 5033f67c834SDon Lewis 504c4ac87eaSDarren Reed /* 505134ea224SSam Leffler * Run through list of hooks for input packets. 506f51f805fSSam Leffler * 507f51f805fSSam Leffler * NB: Beware of the destination address changing (e.g. 508f51f805fSSam Leffler * by NAT rewriting). When this happens, tell 509f51f805fSSam Leffler * ip_forward to do the right thing. 510c4ac87eaSDarren Reed */ 511c21fd232SAndre Oppermann 512c21fd232SAndre Oppermann /* Jump over all PFIL processing if hooks are not active. */ 5130b4b0b0fSJulian Elischer if (!PFIL_HOOKED(&V_inet_pfil_hook)) 514c21fd232SAndre Oppermann goto passin; 515c21fd232SAndre Oppermann 516f51f805fSSam Leffler odst = ip->ip_dst; 5170b4b0b0fSJulian Elischer if (pfil_run_hooks(&V_inet_pfil_hook, &m, ifp, PFIL_IN, NULL) != 0) 518beec8214SDarren Reed return; 519134ea224SSam Leffler if (m == NULL) /* consumed by filter */ 520c4ac87eaSDarren Reed return; 5219b932e9eSAndre Oppermann 522c4ac87eaSDarren Reed ip = mtod(m, struct ip *); 52302c1c707SAndre Oppermann dchg = (odst.s_addr != ip->ip_dst.s_addr); 5240aade26eSRobert Watson ifp = m->m_pkthdr.rcvif; 5259b932e9eSAndre Oppermann 5269b932e9eSAndre Oppermann #ifdef IPFIREWALL_FORWARD 5279b932e9eSAndre Oppermann if (m->m_flags & M_FASTFWD_OURS) { 5289b932e9eSAndre Oppermann m->m_flags &= ~M_FASTFWD_OURS; 5299b932e9eSAndre Oppermann goto ours; 5309b932e9eSAndre Oppermann } 531099dd043SAndre Oppermann if ((dchg = (m_tag_find(m, PACKET_TAG_IPFORWARD, NULL) != NULL)) != 0) { 532099dd043SAndre Oppermann /* 533099dd043SAndre Oppermann * Directly ship on the packet. This allows to forward packets 534099dd043SAndre Oppermann * that were destined for us to some other directly connected 535099dd043SAndre Oppermann * host. 536099dd043SAndre Oppermann */ 537099dd043SAndre Oppermann ip_forward(m, dchg); 538099dd043SAndre Oppermann return; 539099dd043SAndre Oppermann } 5409b932e9eSAndre Oppermann #endif /* IPFIREWALL_FORWARD */ 5419b932e9eSAndre Oppermann 542c21fd232SAndre Oppermann passin: 543df8bae1dSRodney W. Grimes /* 544df8bae1dSRodney W. Grimes * Process options and, if not destined for us, 545df8bae1dSRodney W. Grimes * ship it on. ip_dooptions returns 1 when an 546df8bae1dSRodney W. Grimes * error was detected (causing an icmp message 547df8bae1dSRodney W. Grimes * to be sent and the original packet to be freed). 548df8bae1dSRodney W. Grimes */ 5499b932e9eSAndre Oppermann if (hlen > sizeof (struct ip) && ip_dooptions(m, 0)) 550c67b1d17SGarrett Wollman return; 551df8bae1dSRodney W. Grimes 552f0068c4aSGarrett Wollman /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no 553f0068c4aSGarrett Wollman * matter if it is destined to another node, or whether it is 554f0068c4aSGarrett Wollman * a multicast one, RSVP wants it! and prevents it from being forwarded 555f0068c4aSGarrett Wollman * anywhere else. Also checks if the rsvp daemon is running before 556f0068c4aSGarrett Wollman * grabbing the packet. 557f0068c4aSGarrett Wollman */ 558603724d3SBjoern A. Zeeb if (V_rsvp_on && ip->ip_p==IPPROTO_RSVP) 559f0068c4aSGarrett Wollman goto ours; 560f0068c4aSGarrett Wollman 561df8bae1dSRodney W. Grimes /* 562df8bae1dSRodney W. Grimes * Check our list of addresses, to see if the packet is for us. 563cc766e04SGarrett Wollman * If we don't have any addresses, assume any unicast packet 564cc766e04SGarrett Wollman * we receive might be for us (and let the upper layers deal 565cc766e04SGarrett Wollman * with it). 566df8bae1dSRodney W. Grimes */ 567603724d3SBjoern A. Zeeb if (TAILQ_EMPTY(&V_in_ifaddrhead) && 568cc766e04SGarrett Wollman (m->m_flags & (M_MCAST|M_BCAST)) == 0) 569cc766e04SGarrett Wollman goto ours; 570cc766e04SGarrett Wollman 5717538a9a0SJonathan Lemon /* 572823db0e9SDon Lewis * Enable a consistency check between the destination address 573823db0e9SDon Lewis * and the arrival interface for a unicast packet (the RFC 1122 574823db0e9SDon Lewis * strong ES model) if IP forwarding is disabled and the packet 575e15ae1b2SDon Lewis * is not locally generated and the packet is not subject to 576e15ae1b2SDon Lewis * 'ipfw fwd'. 5773f67c834SDon Lewis * 5783f67c834SDon Lewis * XXX - Checking also should be disabled if the destination 5793f67c834SDon Lewis * address is ipnat'ed to a different interface. 5803f67c834SDon Lewis * 581a8f12100SDon Lewis * XXX - Checking is incompatible with IP aliases added 5823f67c834SDon Lewis * to the loopback interface instead of the interface where 5833f67c834SDon Lewis * the packets are received. 584a9771948SGleb Smirnoff * 585a9771948SGleb Smirnoff * XXX - This is the case for carp vhost IPs as well so we 586a9771948SGleb Smirnoff * insert a workaround. If the packet got here, we already 587a9771948SGleb Smirnoff * checked with carp_iamatch() and carp_forus(). 588823db0e9SDon Lewis */ 589603724d3SBjoern A. Zeeb checkif = V_ip_checkinterface && (V_ipforwarding == 0) && 5900aade26eSRobert Watson ifp != NULL && ((ifp->if_flags & IFF_LOOPBACK) == 0) && 591a9771948SGleb Smirnoff #ifdef DEV_CARP 5920aade26eSRobert Watson !ifp->if_carp && 593a9771948SGleb Smirnoff #endif 5949b932e9eSAndre Oppermann (dchg == 0); 595823db0e9SDon Lewis 596ca925d9cSJonathan Lemon /* 597ca925d9cSJonathan Lemon * Check for exact addresses in the hash bucket. 598ca925d9cSJonathan Lemon */ 5992d9cfabaSRobert Watson /* IN_IFADDR_RLOCK(); */ 6009b932e9eSAndre Oppermann LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) { 601f9e354dfSJulian Elischer /* 602823db0e9SDon Lewis * If the address matches, verify that the packet 603823db0e9SDon Lewis * arrived via the correct interface if checking is 604823db0e9SDon Lewis * enabled. 605f9e354dfSJulian Elischer */ 6069b932e9eSAndre Oppermann if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr && 6078c0fec80SRobert Watson (!checkif || ia->ia_ifp == ifp)) { 6088c0fec80SRobert Watson ifa_ref(&ia->ia_ifa); 6092d9cfabaSRobert Watson /* IN_IFADDR_RUNLOCK(); */ 610ed1ff184SJulian Elischer goto ours; 611ca925d9cSJonathan Lemon } 6128c0fec80SRobert Watson } 6132d9cfabaSRobert Watson /* IN_IFADDR_RUNLOCK(); */ 6142d9cfabaSRobert Watson 615823db0e9SDon Lewis /* 616ca925d9cSJonathan Lemon * Check for broadcast addresses. 617ca925d9cSJonathan Lemon * 618ca925d9cSJonathan Lemon * Only accept broadcast packets that arrive via the matching 619ca925d9cSJonathan Lemon * interface. Reception of forwarded directed broadcasts would 620ca925d9cSJonathan Lemon * be handled via ip_forward() and ether_output() with the loopback 621ca925d9cSJonathan Lemon * into the stack for SIMPLEX interfaces handled by ether_output(). 622823db0e9SDon Lewis */ 6230aade26eSRobert Watson if (ifp != NULL && ifp->if_flags & IFF_BROADCAST) { 6240aade26eSRobert Watson IF_ADDR_LOCK(ifp); 6250aade26eSRobert Watson TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 626ca925d9cSJonathan Lemon if (ifa->ifa_addr->sa_family != AF_INET) 627ca925d9cSJonathan Lemon continue; 628ca925d9cSJonathan Lemon ia = ifatoia(ifa); 629df8bae1dSRodney W. Grimes if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr == 6300aade26eSRobert Watson ip->ip_dst.s_addr) { 6318c0fec80SRobert Watson ifa_ref(ifa); 6320aade26eSRobert Watson IF_ADDR_UNLOCK(ifp); 633df8bae1dSRodney W. Grimes goto ours; 6340aade26eSRobert Watson } 6350aade26eSRobert Watson if (ia->ia_netbroadcast.s_addr == ip->ip_dst.s_addr) { 6368c0fec80SRobert Watson ifa_ref(ifa); 6370aade26eSRobert Watson IF_ADDR_UNLOCK(ifp); 638df8bae1dSRodney W. Grimes goto ours; 6390aade26eSRobert Watson } 6400ac40133SBrian Somers #ifdef BOOTP_COMPAT 6410aade26eSRobert Watson if (IA_SIN(ia)->sin_addr.s_addr == INADDR_ANY) { 6428c0fec80SRobert Watson ifa_ref(ifa); 6430aade26eSRobert Watson IF_ADDR_UNLOCK(ifp); 644ca925d9cSJonathan Lemon goto ours; 6450aade26eSRobert Watson } 6460ac40133SBrian Somers #endif 647df8bae1dSRodney W. Grimes } 6480aade26eSRobert Watson IF_ADDR_UNLOCK(ifp); 64919e5b0a7SRobert Watson ia = NULL; 650df8bae1dSRodney W. Grimes } 651f8429ca2SBruce M Simpson /* RFC 3927 2.7: Do not forward datagrams for 169.254.0.0/16. */ 652f8429ca2SBruce M Simpson if (IN_LINKLOCAL(ntohl(ip->ip_dst.s_addr))) { 65386425c62SRobert Watson IPSTAT_INC(ips_cantforward); 654f8429ca2SBruce M Simpson m_freem(m); 655f8429ca2SBruce M Simpson return; 656f8429ca2SBruce M Simpson } 657df8bae1dSRodney W. Grimes if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { 658603724d3SBjoern A. Zeeb if (V_ip_mrouter) { 659df8bae1dSRodney W. Grimes /* 660df8bae1dSRodney W. Grimes * If we are acting as a multicast router, all 661df8bae1dSRodney W. Grimes * incoming multicast packets are passed to the 662df8bae1dSRodney W. Grimes * kernel-level multicast forwarding function. 663df8bae1dSRodney W. Grimes * The packet is returned (relatively) intact; if 664df8bae1dSRodney W. Grimes * ip_mforward() returns a non-zero value, the packet 665df8bae1dSRodney W. Grimes * must be discarded, else it may be accepted below. 666df8bae1dSRodney W. Grimes */ 6670aade26eSRobert Watson if (ip_mforward && ip_mforward(ip, ifp, m, 0) != 0) { 66886425c62SRobert Watson IPSTAT_INC(ips_cantforward); 669df8bae1dSRodney W. Grimes m_freem(m); 670c67b1d17SGarrett Wollman return; 671df8bae1dSRodney W. Grimes } 672df8bae1dSRodney W. Grimes 673df8bae1dSRodney W. Grimes /* 67411612afaSDima Dorfman * The process-level routing daemon needs to receive 675df8bae1dSRodney W. Grimes * all multicast IGMP packets, whether or not this 676df8bae1dSRodney W. Grimes * host belongs to their destination groups. 677df8bae1dSRodney W. Grimes */ 678df8bae1dSRodney W. Grimes if (ip->ip_p == IPPROTO_IGMP) 679df8bae1dSRodney W. Grimes goto ours; 68086425c62SRobert Watson IPSTAT_INC(ips_forward); 681df8bae1dSRodney W. Grimes } 682df8bae1dSRodney W. Grimes /* 683d10910e6SBruce M Simpson * Assume the packet is for us, to avoid prematurely taking 684d10910e6SBruce M Simpson * a lock on the in_multi hash. Protocols must perform 685d10910e6SBruce M Simpson * their own filtering and update statistics accordingly. 686df8bae1dSRodney W. Grimes */ 687df8bae1dSRodney W. Grimes goto ours; 688df8bae1dSRodney W. Grimes } 689df8bae1dSRodney W. Grimes if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST) 690df8bae1dSRodney W. Grimes goto ours; 691df8bae1dSRodney W. Grimes if (ip->ip_dst.s_addr == INADDR_ANY) 692df8bae1dSRodney W. Grimes goto ours; 693df8bae1dSRodney W. Grimes 6946a800098SYoshinobu Inoue /* 6956a800098SYoshinobu Inoue * FAITH(Firewall Aided Internet Translator) 6966a800098SYoshinobu Inoue */ 6970aade26eSRobert Watson if (ifp && ifp->if_type == IFT_FAITH) { 698603724d3SBjoern A. Zeeb if (V_ip_keepfaith) { 6996a800098SYoshinobu Inoue if (ip->ip_p == IPPROTO_TCP || ip->ip_p == IPPROTO_ICMP) 7006a800098SYoshinobu Inoue goto ours; 7016a800098SYoshinobu Inoue } 7026a800098SYoshinobu Inoue m_freem(m); 7036a800098SYoshinobu Inoue return; 7046a800098SYoshinobu Inoue } 7059494d596SBrooks Davis 706df8bae1dSRodney W. Grimes /* 707df8bae1dSRodney W. Grimes * Not for us; forward if possible and desirable. 708df8bae1dSRodney W. Grimes */ 709603724d3SBjoern A. Zeeb if (V_ipforwarding == 0) { 71086425c62SRobert Watson IPSTAT_INC(ips_cantforward); 711df8bae1dSRodney W. Grimes m_freem(m); 712546f251bSChris D. Faulhaber } else { 713b2630c29SGeorge V. Neville-Neil #ifdef IPSEC 7141dfcf0d2SAndre Oppermann if (ip_ipsec_fwd(m)) 715546f251bSChris D. Faulhaber goto bad; 716b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */ 7179b932e9eSAndre Oppermann ip_forward(m, dchg); 718546f251bSChris D. Faulhaber } 719c67b1d17SGarrett Wollman return; 720df8bae1dSRodney W. Grimes 721df8bae1dSRodney W. Grimes ours: 722d0ebc0d2SYaroslav Tykhiy #ifdef IPSTEALTH 723d0ebc0d2SYaroslav Tykhiy /* 724d0ebc0d2SYaroslav Tykhiy * IPSTEALTH: Process non-routing options only 725d0ebc0d2SYaroslav Tykhiy * if the packet is destined for us. 726d0ebc0d2SYaroslav Tykhiy */ 72719e5b0a7SRobert Watson if (V_ipstealth && hlen > sizeof (struct ip) && ip_dooptions(m, 1)) { 72819e5b0a7SRobert Watson if (ia != NULL) 72919e5b0a7SRobert Watson ifa_free(&ia->ia_ifa); 730d0ebc0d2SYaroslav Tykhiy return; 73119e5b0a7SRobert Watson } 732d0ebc0d2SYaroslav Tykhiy #endif /* IPSTEALTH */ 733d0ebc0d2SYaroslav Tykhiy 7345da9f8faSJosef Karthauser /* Count the packet in the ip address stats */ 7355da9f8faSJosef Karthauser if (ia != NULL) { 7365da9f8faSJosef Karthauser ia->ia_ifa.if_ipackets++; 7375da9f8faSJosef Karthauser ia->ia_ifa.if_ibytes += m->m_pkthdr.len; 7388c0fec80SRobert Watson ifa_free(&ia->ia_ifa); 7395da9f8faSJosef Karthauser } 740100ba1a6SJordan K. Hubbard 74163f8d699SJordan K. Hubbard /* 742b6ea1aa5SRuslan Ermilov * Attempt reassembly; if it succeeds, proceed. 743ac9d7e26SMax Laier * ip_reass() will return a different mbuf. 744df8bae1dSRodney W. Grimes */ 745f0cada84SAndre Oppermann if (ip->ip_off & (IP_MF | IP_OFFMASK)) { 746f0cada84SAndre Oppermann m = ip_reass(m); 747f0cada84SAndre Oppermann if (m == NULL) 748c67b1d17SGarrett Wollman return; 7496a800098SYoshinobu Inoue ip = mtod(m, struct ip *); 7507e2df452SRuslan Ermilov /* Get the header length of the reassembled packet */ 75153be11f6SPoul-Henning Kamp hlen = ip->ip_hl << 2; 752f0cada84SAndre Oppermann } 753f0cada84SAndre Oppermann 754f0cada84SAndre Oppermann /* 755f0cada84SAndre Oppermann * Further protocols expect the packet length to be w/o the 756f0cada84SAndre Oppermann * IP header. 757f0cada84SAndre Oppermann */ 758df8bae1dSRodney W. Grimes ip->ip_len -= hlen; 759df8bae1dSRodney W. Grimes 760b2630c29SGeorge V. Neville-Neil #ifdef IPSEC 76133841545SHajimu UMEMOTO /* 76233841545SHajimu UMEMOTO * enforce IPsec policy checking if we are seeing last header. 76333841545SHajimu UMEMOTO * note that we do not visit this with protocols with pcb layer 76433841545SHajimu UMEMOTO * code - like udp/tcp/raw ip. 76533841545SHajimu UMEMOTO */ 7661dfcf0d2SAndre Oppermann if (ip_ipsec_input(m)) 76733841545SHajimu UMEMOTO goto bad; 768b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */ 76933841545SHajimu UMEMOTO 770df8bae1dSRodney W. Grimes /* 771df8bae1dSRodney W. Grimes * Switch out to protocol's input routine. 772df8bae1dSRodney W. Grimes */ 77386425c62SRobert Watson IPSTAT_INC(ips_delivered); 7749b932e9eSAndre Oppermann 7752b25acc1SLuigi Rizzo (*inetsw[ip_protox[ip->ip_p]].pr_input)(m, hlen); 776c67b1d17SGarrett Wollman return; 777df8bae1dSRodney W. Grimes bad: 778df8bae1dSRodney W. Grimes m_freem(m); 779c67b1d17SGarrett Wollman } 780c67b1d17SGarrett Wollman 781c67b1d17SGarrett Wollman /* 782d248c7d7SRobert Watson * After maxnipq has been updated, propagate the change to UMA. The UMA zone 783d248c7d7SRobert Watson * max has slightly different semantics than the sysctl, for historical 784d248c7d7SRobert Watson * reasons. 785d248c7d7SRobert Watson */ 786d248c7d7SRobert Watson static void 787d248c7d7SRobert Watson maxnipq_update(void) 788d248c7d7SRobert Watson { 789d248c7d7SRobert Watson 790d248c7d7SRobert Watson /* 791d248c7d7SRobert Watson * -1 for unlimited allocation. 792d248c7d7SRobert Watson */ 793603724d3SBjoern A. Zeeb if (V_maxnipq < 0) 794603724d3SBjoern A. Zeeb uma_zone_set_max(V_ipq_zone, 0); 795d248c7d7SRobert Watson /* 796d248c7d7SRobert Watson * Positive number for specific bound. 797d248c7d7SRobert Watson */ 798603724d3SBjoern A. Zeeb if (V_maxnipq > 0) 799603724d3SBjoern A. Zeeb uma_zone_set_max(V_ipq_zone, V_maxnipq); 800d248c7d7SRobert Watson /* 801d248c7d7SRobert Watson * Zero specifies no further fragment queue allocation -- set the 802d248c7d7SRobert Watson * bound very low, but rely on implementation elsewhere to actually 803d248c7d7SRobert Watson * prevent allocation and reclaim current queues. 804d248c7d7SRobert Watson */ 805603724d3SBjoern A. Zeeb if (V_maxnipq == 0) 806603724d3SBjoern A. Zeeb uma_zone_set_max(V_ipq_zone, 1); 807d248c7d7SRobert Watson } 808d248c7d7SRobert Watson 8094f590175SPaul Saab static void 8104f590175SPaul Saab ipq_zone_change(void *tag) 8114f590175SPaul Saab { 8124f590175SPaul Saab 813603724d3SBjoern A. Zeeb if (V_maxnipq > 0 && V_maxnipq < (nmbclusters / 32)) { 814603724d3SBjoern A. Zeeb V_maxnipq = nmbclusters / 32; 8154f590175SPaul Saab maxnipq_update(); 8164f590175SPaul Saab } 8174f590175SPaul Saab } 8184f590175SPaul Saab 819d248c7d7SRobert Watson static int 820d248c7d7SRobert Watson sysctl_maxnipq(SYSCTL_HANDLER_ARGS) 821d248c7d7SRobert Watson { 822d248c7d7SRobert Watson int error, i; 823d248c7d7SRobert Watson 824603724d3SBjoern A. Zeeb i = V_maxnipq; 825d248c7d7SRobert Watson error = sysctl_handle_int(oidp, &i, 0, req); 826d248c7d7SRobert Watson if (error || !req->newptr) 827d248c7d7SRobert Watson return (error); 828d248c7d7SRobert Watson 829d248c7d7SRobert Watson /* 830d248c7d7SRobert Watson * XXXRW: Might be a good idea to sanity check the argument and place 831d248c7d7SRobert Watson * an extreme upper bound. 832d248c7d7SRobert Watson */ 833d248c7d7SRobert Watson if (i < -1) 834d248c7d7SRobert Watson return (EINVAL); 835603724d3SBjoern A. Zeeb V_maxnipq = i; 836d248c7d7SRobert Watson maxnipq_update(); 837d248c7d7SRobert Watson return (0); 838d248c7d7SRobert Watson } 839d248c7d7SRobert Watson 840d248c7d7SRobert Watson SYSCTL_PROC(_net_inet_ip, OID_AUTO, maxfragpackets, CTLTYPE_INT|CTLFLAG_RW, 841d248c7d7SRobert Watson NULL, 0, sysctl_maxnipq, "I", 842d248c7d7SRobert Watson "Maximum number of IPv4 fragment reassembly queue entries"); 843d248c7d7SRobert Watson 844d248c7d7SRobert Watson /* 8458948e4baSArchie Cobbs * Take incoming datagram fragment and try to reassemble it into 846f0cada84SAndre Oppermann * whole datagram. If the argument is the first fragment or one 847f0cada84SAndre Oppermann * in between the function will return NULL and store the mbuf 848f0cada84SAndre Oppermann * in the fragment chain. If the argument is the last fragment 849f0cada84SAndre Oppermann * the packet will be reassembled and the pointer to the new 850f0cada84SAndre Oppermann * mbuf returned for further processing. Only m_tags attached 851f0cada84SAndre Oppermann * to the first packet/fragment are preserved. 852f0cada84SAndre Oppermann * The IP header is *NOT* adjusted out of iplen. 853df8bae1dSRodney W. Grimes */ 854f0cada84SAndre Oppermann struct mbuf * 855f0cada84SAndre Oppermann ip_reass(struct mbuf *m) 856df8bae1dSRodney W. Grimes { 857f0cada84SAndre Oppermann struct ip *ip; 858f0cada84SAndre Oppermann struct mbuf *p, *q, *nq, *t; 859f0cada84SAndre Oppermann struct ipq *fp = NULL; 860f0cada84SAndre Oppermann struct ipqhead *head; 861f0cada84SAndre Oppermann int i, hlen, next; 86259dfcba4SHajimu UMEMOTO u_int8_t ecn, ecn0; 863f0cada84SAndre Oppermann u_short hash; 864df8bae1dSRodney W. Grimes 865800af1fbSMaxim Konovalov /* If maxnipq or maxfragsperpacket are 0, never accept fragments. */ 866603724d3SBjoern A. Zeeb if (V_maxnipq == 0 || V_maxfragsperpacket == 0) { 86786425c62SRobert Watson IPSTAT_INC(ips_fragments); 86886425c62SRobert Watson IPSTAT_INC(ips_fragdropped); 8699d804f81SAndre Oppermann m_freem(m); 8709d804f81SAndre Oppermann return (NULL); 871f0cada84SAndre Oppermann } 8722fad1e93SSam Leffler 873f0cada84SAndre Oppermann ip = mtod(m, struct ip *); 874f0cada84SAndre Oppermann hlen = ip->ip_hl << 2; 875f0cada84SAndre Oppermann 876f0cada84SAndre Oppermann hash = IPREASS_HASH(ip->ip_src.s_addr, ip->ip_id); 877603724d3SBjoern A. Zeeb head = &V_ipq[hash]; 878f0cada84SAndre Oppermann IPQ_LOCK(); 879f0cada84SAndre Oppermann 880f0cada84SAndre Oppermann /* 881f0cada84SAndre Oppermann * Look for queue of fragments 882f0cada84SAndre Oppermann * of this datagram. 883f0cada84SAndre Oppermann */ 884f0cada84SAndre Oppermann TAILQ_FOREACH(fp, head, ipq_list) 885f0cada84SAndre Oppermann if (ip->ip_id == fp->ipq_id && 886f0cada84SAndre Oppermann ip->ip_src.s_addr == fp->ipq_src.s_addr && 887f0cada84SAndre Oppermann ip->ip_dst.s_addr == fp->ipq_dst.s_addr && 888f0cada84SAndre Oppermann #ifdef MAC 88930d239bcSRobert Watson mac_ipq_match(m, fp) && 890f0cada84SAndre Oppermann #endif 891f0cada84SAndre Oppermann ip->ip_p == fp->ipq_p) 892f0cada84SAndre Oppermann goto found; 893f0cada84SAndre Oppermann 894f0cada84SAndre Oppermann fp = NULL; 895f0cada84SAndre Oppermann 896f0cada84SAndre Oppermann /* 897d248c7d7SRobert Watson * Attempt to trim the number of allocated fragment queues if it 898d248c7d7SRobert Watson * exceeds the administrative limit. 899f0cada84SAndre Oppermann */ 900603724d3SBjoern A. Zeeb if ((V_nipq > V_maxnipq) && (V_maxnipq > 0)) { 901f0cada84SAndre Oppermann /* 902f0cada84SAndre Oppermann * drop something from the tail of the current queue 903f0cada84SAndre Oppermann * before proceeding further 904f0cada84SAndre Oppermann */ 905f0cada84SAndre Oppermann struct ipq *q = TAILQ_LAST(head, ipqhead); 906f0cada84SAndre Oppermann if (q == NULL) { /* gak */ 907f0cada84SAndre Oppermann for (i = 0; i < IPREASS_NHASH; i++) { 908603724d3SBjoern A. Zeeb struct ipq *r = TAILQ_LAST(&V_ipq[i], ipqhead); 909f0cada84SAndre Oppermann if (r) { 91086425c62SRobert Watson IPSTAT_ADD(ips_fragtimeout, 91186425c62SRobert Watson r->ipq_nfrags); 912603724d3SBjoern A. Zeeb ip_freef(&V_ipq[i], r); 913f0cada84SAndre Oppermann break; 914f0cada84SAndre Oppermann } 915f0cada84SAndre Oppermann } 916f0cada84SAndre Oppermann } else { 91786425c62SRobert Watson IPSTAT_ADD(ips_fragtimeout, q->ipq_nfrags); 918f0cada84SAndre Oppermann ip_freef(head, q); 919f0cada84SAndre Oppermann } 920f0cada84SAndre Oppermann } 921f0cada84SAndre Oppermann 922f0cada84SAndre Oppermann found: 923f0cada84SAndre Oppermann /* 924f0cada84SAndre Oppermann * Adjust ip_len to not reflect header, 925f0cada84SAndre Oppermann * convert offset of this to bytes. 926f0cada84SAndre Oppermann */ 927f0cada84SAndre Oppermann ip->ip_len -= hlen; 928f0cada84SAndre Oppermann if (ip->ip_off & IP_MF) { 929f0cada84SAndre Oppermann /* 930f0cada84SAndre Oppermann * Make sure that fragments have a data length 931f0cada84SAndre Oppermann * that's a non-zero multiple of 8 bytes. 932f0cada84SAndre Oppermann */ 933f0cada84SAndre Oppermann if (ip->ip_len == 0 || (ip->ip_len & 0x7) != 0) { 93486425c62SRobert Watson IPSTAT_INC(ips_toosmall); /* XXX */ 935f0cada84SAndre Oppermann goto dropfrag; 936f0cada84SAndre Oppermann } 937f0cada84SAndre Oppermann m->m_flags |= M_FRAG; 938f0cada84SAndre Oppermann } else 939f0cada84SAndre Oppermann m->m_flags &= ~M_FRAG; 940f0cada84SAndre Oppermann ip->ip_off <<= 3; 941f0cada84SAndre Oppermann 942f0cada84SAndre Oppermann 943f0cada84SAndre Oppermann /* 944f0cada84SAndre Oppermann * Attempt reassembly; if it succeeds, proceed. 945f0cada84SAndre Oppermann * ip_reass() will return a different mbuf. 946f0cada84SAndre Oppermann */ 94786425c62SRobert Watson IPSTAT_INC(ips_fragments); 948f0cada84SAndre Oppermann m->m_pkthdr.header = ip; 949f0cada84SAndre Oppermann 950f0cada84SAndre Oppermann /* Previous ip_reass() started here. */ 951df8bae1dSRodney W. Grimes /* 952df8bae1dSRodney W. Grimes * Presence of header sizes in mbufs 953df8bae1dSRodney W. Grimes * would confuse code below. 954df8bae1dSRodney W. Grimes */ 955df8bae1dSRodney W. Grimes m->m_data += hlen; 956df8bae1dSRodney W. Grimes m->m_len -= hlen; 957df8bae1dSRodney W. Grimes 958df8bae1dSRodney W. Grimes /* 959df8bae1dSRodney W. Grimes * If first fragment to arrive, create a reassembly queue. 960df8bae1dSRodney W. Grimes */ 961042bbfa3SRobert Watson if (fp == NULL) { 962603724d3SBjoern A. Zeeb fp = uma_zalloc(V_ipq_zone, M_NOWAIT); 963d248c7d7SRobert Watson if (fp == NULL) 964df8bae1dSRodney W. Grimes goto dropfrag; 96536b0360bSRobert Watson #ifdef MAC 96630d239bcSRobert Watson if (mac_ipq_init(fp, M_NOWAIT) != 0) { 967603724d3SBjoern A. Zeeb uma_zfree(V_ipq_zone, fp); 9681d7d0bfeSPawel Jakub Dawidek fp = NULL; 9695e7ce478SRobert Watson goto dropfrag; 9705e7ce478SRobert Watson } 97130d239bcSRobert Watson mac_ipq_create(m, fp); 97236b0360bSRobert Watson #endif 973462b86feSPoul-Henning Kamp TAILQ_INSERT_HEAD(head, fp, ipq_list); 974603724d3SBjoern A. Zeeb V_nipq++; 975375386e2SMike Silbersack fp->ipq_nfrags = 1; 976df8bae1dSRodney W. Grimes fp->ipq_ttl = IPFRAGTTL; 977df8bae1dSRodney W. Grimes fp->ipq_p = ip->ip_p; 978df8bae1dSRodney W. Grimes fp->ipq_id = ip->ip_id; 9796effc713SDoug Rabson fp->ipq_src = ip->ip_src; 9806effc713SDoug Rabson fp->ipq_dst = ip->ip_dst; 981af38c68cSLuigi Rizzo fp->ipq_frags = m; 982af38c68cSLuigi Rizzo m->m_nextpkt = NULL; 983800af1fbSMaxim Konovalov goto done; 98436b0360bSRobert Watson } else { 985375386e2SMike Silbersack fp->ipq_nfrags++; 98636b0360bSRobert Watson #ifdef MAC 98730d239bcSRobert Watson mac_ipq_update(m, fp); 98836b0360bSRobert Watson #endif 989df8bae1dSRodney W. Grimes } 990df8bae1dSRodney W. Grimes 9916effc713SDoug Rabson #define GETIP(m) ((struct ip*)((m)->m_pkthdr.header)) 9926effc713SDoug Rabson 993df8bae1dSRodney W. Grimes /* 99459dfcba4SHajimu UMEMOTO * Handle ECN by comparing this segment with the first one; 99559dfcba4SHajimu UMEMOTO * if CE is set, do not lose CE. 99659dfcba4SHajimu UMEMOTO * drop if CE and not-ECT are mixed for the same packet. 99759dfcba4SHajimu UMEMOTO */ 99859dfcba4SHajimu UMEMOTO ecn = ip->ip_tos & IPTOS_ECN_MASK; 99959dfcba4SHajimu UMEMOTO ecn0 = GETIP(fp->ipq_frags)->ip_tos & IPTOS_ECN_MASK; 100059dfcba4SHajimu UMEMOTO if (ecn == IPTOS_ECN_CE) { 100159dfcba4SHajimu UMEMOTO if (ecn0 == IPTOS_ECN_NOTECT) 100259dfcba4SHajimu UMEMOTO goto dropfrag; 100359dfcba4SHajimu UMEMOTO if (ecn0 != IPTOS_ECN_CE) 100459dfcba4SHajimu UMEMOTO GETIP(fp->ipq_frags)->ip_tos |= IPTOS_ECN_CE; 100559dfcba4SHajimu UMEMOTO } 100659dfcba4SHajimu UMEMOTO if (ecn == IPTOS_ECN_NOTECT && ecn0 != IPTOS_ECN_NOTECT) 100759dfcba4SHajimu UMEMOTO goto dropfrag; 100859dfcba4SHajimu UMEMOTO 100959dfcba4SHajimu UMEMOTO /* 1010df8bae1dSRodney W. Grimes * Find a segment which begins after this one does. 1011df8bae1dSRodney W. Grimes */ 10126effc713SDoug Rabson for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt) 10136effc713SDoug Rabson if (GETIP(q)->ip_off > ip->ip_off) 1014df8bae1dSRodney W. Grimes break; 1015df8bae1dSRodney W. Grimes 1016df8bae1dSRodney W. Grimes /* 1017df8bae1dSRodney W. Grimes * If there is a preceding segment, it may provide some of 1018df8bae1dSRodney W. Grimes * our data already. If so, drop the data from the incoming 1019af38c68cSLuigi Rizzo * segment. If it provides all of our data, drop us, otherwise 1020af38c68cSLuigi Rizzo * stick new segment in the proper place. 1021db4f9cc7SJonathan Lemon * 1022db4f9cc7SJonathan Lemon * If some of the data is dropped from the the preceding 1023db4f9cc7SJonathan Lemon * segment, then it's checksum is invalidated. 1024df8bae1dSRodney W. Grimes */ 10256effc713SDoug Rabson if (p) { 10266effc713SDoug Rabson i = GETIP(p)->ip_off + GETIP(p)->ip_len - ip->ip_off; 1027df8bae1dSRodney W. Grimes if (i > 0) { 1028df8bae1dSRodney W. Grimes if (i >= ip->ip_len) 1029df8bae1dSRodney W. Grimes goto dropfrag; 10306a800098SYoshinobu Inoue m_adj(m, i); 1031db4f9cc7SJonathan Lemon m->m_pkthdr.csum_flags = 0; 1032df8bae1dSRodney W. Grimes ip->ip_off += i; 1033df8bae1dSRodney W. Grimes ip->ip_len -= i; 1034df8bae1dSRodney W. Grimes } 1035af38c68cSLuigi Rizzo m->m_nextpkt = p->m_nextpkt; 1036af38c68cSLuigi Rizzo p->m_nextpkt = m; 1037af38c68cSLuigi Rizzo } else { 1038af38c68cSLuigi Rizzo m->m_nextpkt = fp->ipq_frags; 1039af38c68cSLuigi Rizzo fp->ipq_frags = m; 1040df8bae1dSRodney W. Grimes } 1041df8bae1dSRodney W. Grimes 1042df8bae1dSRodney W. Grimes /* 1043df8bae1dSRodney W. Grimes * While we overlap succeeding segments trim them or, 1044df8bae1dSRodney W. Grimes * if they are completely covered, dequeue them. 1045df8bae1dSRodney W. Grimes */ 10466effc713SDoug Rabson for (; q != NULL && ip->ip_off + ip->ip_len > GETIP(q)->ip_off; 1047af38c68cSLuigi Rizzo q = nq) { 1048b36f5b37SMaxim Konovalov i = (ip->ip_off + ip->ip_len) - GETIP(q)->ip_off; 10496effc713SDoug Rabson if (i < GETIP(q)->ip_len) { 10506effc713SDoug Rabson GETIP(q)->ip_len -= i; 10516effc713SDoug Rabson GETIP(q)->ip_off += i; 10526effc713SDoug Rabson m_adj(q, i); 1053db4f9cc7SJonathan Lemon q->m_pkthdr.csum_flags = 0; 1054df8bae1dSRodney W. Grimes break; 1055df8bae1dSRodney W. Grimes } 10566effc713SDoug Rabson nq = q->m_nextpkt; 1057af38c68cSLuigi Rizzo m->m_nextpkt = nq; 105886425c62SRobert Watson IPSTAT_INC(ips_fragdropped); 1059375386e2SMike Silbersack fp->ipq_nfrags--; 10606effc713SDoug Rabson m_freem(q); 1061df8bae1dSRodney W. Grimes } 1062df8bae1dSRodney W. Grimes 1063df8bae1dSRodney W. Grimes /* 1064375386e2SMike Silbersack * Check for complete reassembly and perform frag per packet 1065375386e2SMike Silbersack * limiting. 1066375386e2SMike Silbersack * 1067375386e2SMike Silbersack * Frag limiting is performed here so that the nth frag has 1068375386e2SMike Silbersack * a chance to complete the packet before we drop the packet. 1069375386e2SMike Silbersack * As a result, n+1 frags are actually allowed per packet, but 1070375386e2SMike Silbersack * only n will ever be stored. (n = maxfragsperpacket.) 1071375386e2SMike Silbersack * 1072df8bae1dSRodney W. Grimes */ 10736effc713SDoug Rabson next = 0; 10746effc713SDoug Rabson for (p = NULL, q = fp->ipq_frags; q; p = q, q = q->m_nextpkt) { 1075375386e2SMike Silbersack if (GETIP(q)->ip_off != next) { 1076603724d3SBjoern A. Zeeb if (fp->ipq_nfrags > V_maxfragsperpacket) { 107786425c62SRobert Watson IPSTAT_ADD(ips_fragdropped, fp->ipq_nfrags); 1078375386e2SMike Silbersack ip_freef(head, fp); 107999e8617dSMaxim Konovalov } 1080f0cada84SAndre Oppermann goto done; 1081375386e2SMike Silbersack } 10826effc713SDoug Rabson next += GETIP(q)->ip_len; 10836effc713SDoug Rabson } 10846effc713SDoug Rabson /* Make sure the last packet didn't have the IP_MF flag */ 1085375386e2SMike Silbersack if (p->m_flags & M_FRAG) { 1086603724d3SBjoern A. Zeeb if (fp->ipq_nfrags > V_maxfragsperpacket) { 108786425c62SRobert Watson IPSTAT_ADD(ips_fragdropped, fp->ipq_nfrags); 1088375386e2SMike Silbersack ip_freef(head, fp); 108999e8617dSMaxim Konovalov } 1090f0cada84SAndre Oppermann goto done; 1091375386e2SMike Silbersack } 1092df8bae1dSRodney W. Grimes 1093df8bae1dSRodney W. Grimes /* 1094430d30d8SBill Fenner * Reassembly is complete. Make sure the packet is a sane size. 1095430d30d8SBill Fenner */ 10966effc713SDoug Rabson q = fp->ipq_frags; 10976effc713SDoug Rabson ip = GETIP(q); 109853be11f6SPoul-Henning Kamp if (next + (ip->ip_hl << 2) > IP_MAXPACKET) { 109986425c62SRobert Watson IPSTAT_INC(ips_toolong); 110086425c62SRobert Watson IPSTAT_ADD(ips_fragdropped, fp->ipq_nfrags); 1101462b86feSPoul-Henning Kamp ip_freef(head, fp); 1102f0cada84SAndre Oppermann goto done; 1103430d30d8SBill Fenner } 1104430d30d8SBill Fenner 1105430d30d8SBill Fenner /* 1106430d30d8SBill Fenner * Concatenate fragments. 1107df8bae1dSRodney W. Grimes */ 11086effc713SDoug Rabson m = q; 1109df8bae1dSRodney W. Grimes t = m->m_next; 111002410549SRobert Watson m->m_next = NULL; 1111df8bae1dSRodney W. Grimes m_cat(m, t); 11126effc713SDoug Rabson nq = q->m_nextpkt; 111302410549SRobert Watson q->m_nextpkt = NULL; 11146effc713SDoug Rabson for (q = nq; q != NULL; q = nq) { 11156effc713SDoug Rabson nq = q->m_nextpkt; 1116945aa40dSDoug Rabson q->m_nextpkt = NULL; 1117db4f9cc7SJonathan Lemon m->m_pkthdr.csum_flags &= q->m_pkthdr.csum_flags; 1118db4f9cc7SJonathan Lemon m->m_pkthdr.csum_data += q->m_pkthdr.csum_data; 1119a8db1d93SJonathan Lemon m_cat(m, q); 1120df8bae1dSRodney W. Grimes } 11216edb555dSOleg Bulyzhin /* 11226edb555dSOleg Bulyzhin * In order to do checksumming faster we do 'end-around carry' here 11236edb555dSOleg Bulyzhin * (and not in for{} loop), though it implies we are not going to 11246edb555dSOleg Bulyzhin * reassemble more than 64k fragments. 11256edb555dSOleg Bulyzhin */ 11266edb555dSOleg Bulyzhin m->m_pkthdr.csum_data = 11276edb555dSOleg Bulyzhin (m->m_pkthdr.csum_data & 0xffff) + (m->m_pkthdr.csum_data >> 16); 112836b0360bSRobert Watson #ifdef MAC 112930d239bcSRobert Watson mac_ipq_reassemble(fp, m); 113030d239bcSRobert Watson mac_ipq_destroy(fp); 113136b0360bSRobert Watson #endif 1132df8bae1dSRodney W. Grimes 1133df8bae1dSRodney W. Grimes /* 1134f0cada84SAndre Oppermann * Create header for new ip packet by modifying header of first 1135f0cada84SAndre Oppermann * packet; dequeue and discard fragment reassembly header. 1136df8bae1dSRodney W. Grimes * Make header visible. 1137df8bae1dSRodney W. Grimes */ 1138f0cada84SAndre Oppermann ip->ip_len = (ip->ip_hl << 2) + next; 11396effc713SDoug Rabson ip->ip_src = fp->ipq_src; 11406effc713SDoug Rabson ip->ip_dst = fp->ipq_dst; 1141462b86feSPoul-Henning Kamp TAILQ_REMOVE(head, fp, ipq_list); 1142603724d3SBjoern A. Zeeb V_nipq--; 1143603724d3SBjoern A. Zeeb uma_zfree(V_ipq_zone, fp); 114453be11f6SPoul-Henning Kamp m->m_len += (ip->ip_hl << 2); 114553be11f6SPoul-Henning Kamp m->m_data -= (ip->ip_hl << 2); 1146df8bae1dSRodney W. Grimes /* some debugging cruft by sklower, below, will go away soon */ 1147a5554bf0SPoul-Henning Kamp if (m->m_flags & M_PKTHDR) /* XXX this should be done elsewhere */ 1148a5554bf0SPoul-Henning Kamp m_fixhdr(m); 114986425c62SRobert Watson IPSTAT_INC(ips_reassembled); 1150f0cada84SAndre Oppermann IPQ_UNLOCK(); 11516a800098SYoshinobu Inoue return (m); 1152df8bae1dSRodney W. Grimes 1153df8bae1dSRodney W. Grimes dropfrag: 115486425c62SRobert Watson IPSTAT_INC(ips_fragdropped); 1155042bbfa3SRobert Watson if (fp != NULL) 1156375386e2SMike Silbersack fp->ipq_nfrags--; 1157df8bae1dSRodney W. Grimes m_freem(m); 1158f0cada84SAndre Oppermann done: 1159f0cada84SAndre Oppermann IPQ_UNLOCK(); 1160f0cada84SAndre Oppermann return (NULL); 11616effc713SDoug Rabson 11626effc713SDoug Rabson #undef GETIP 1163df8bae1dSRodney W. Grimes } 1164df8bae1dSRodney W. Grimes 1165df8bae1dSRodney W. Grimes /* 1166df8bae1dSRodney W. Grimes * Free a fragment reassembly header and all 1167df8bae1dSRodney W. Grimes * associated datagrams. 1168df8bae1dSRodney W. Grimes */ 11690312fbe9SPoul-Henning Kamp static void 1170f2565d68SRobert Watson ip_freef(struct ipqhead *fhp, struct ipq *fp) 1171df8bae1dSRodney W. Grimes { 1172f2565d68SRobert Watson struct mbuf *q; 1173df8bae1dSRodney W. Grimes 11742fad1e93SSam Leffler IPQ_LOCK_ASSERT(); 11752fad1e93SSam Leffler 11766effc713SDoug Rabson while (fp->ipq_frags) { 11776effc713SDoug Rabson q = fp->ipq_frags; 11786effc713SDoug Rabson fp->ipq_frags = q->m_nextpkt; 11796effc713SDoug Rabson m_freem(q); 1180df8bae1dSRodney W. Grimes } 1181462b86feSPoul-Henning Kamp TAILQ_REMOVE(fhp, fp, ipq_list); 1182603724d3SBjoern A. Zeeb uma_zfree(V_ipq_zone, fp); 1183603724d3SBjoern A. Zeeb V_nipq--; 1184df8bae1dSRodney W. Grimes } 1185df8bae1dSRodney W. Grimes 1186df8bae1dSRodney W. Grimes /* 1187df8bae1dSRodney W. Grimes * IP timer processing; 1188df8bae1dSRodney W. Grimes * if a timer expires on a reassembly 1189df8bae1dSRodney W. Grimes * queue, discard it. 1190df8bae1dSRodney W. Grimes */ 1191df8bae1dSRodney W. Grimes void 1192f2565d68SRobert Watson ip_slowtimo(void) 1193df8bae1dSRodney W. Grimes { 11948b615593SMarko Zec VNET_ITERATOR_DECL(vnet_iter); 1195f2565d68SRobert Watson struct ipq *fp; 1196194a213eSAndrey A. Chernov int i; 1197df8bae1dSRodney W. Grimes 11985ee847d3SRobert Watson VNET_LIST_RLOCK_NOSLEEP(); 11992fad1e93SSam Leffler IPQ_LOCK(); 12008b615593SMarko Zec VNET_FOREACH(vnet_iter) { 12018b615593SMarko Zec CURVNET_SET(vnet_iter); 1202194a213eSAndrey A. Chernov for (i = 0; i < IPREASS_NHASH; i++) { 1203603724d3SBjoern A. Zeeb for(fp = TAILQ_FIRST(&V_ipq[i]); fp;) { 1204462b86feSPoul-Henning Kamp struct ipq *fpp; 1205462b86feSPoul-Henning Kamp 1206462b86feSPoul-Henning Kamp fpp = fp; 1207462b86feSPoul-Henning Kamp fp = TAILQ_NEXT(fp, ipq_list); 1208462b86feSPoul-Henning Kamp if(--fpp->ipq_ttl == 0) { 120986425c62SRobert Watson IPSTAT_ADD(ips_fragtimeout, 121086425c62SRobert Watson fpp->ipq_nfrags); 1211603724d3SBjoern A. Zeeb ip_freef(&V_ipq[i], fpp); 1212df8bae1dSRodney W. Grimes } 1213df8bae1dSRodney W. Grimes } 1214194a213eSAndrey A. Chernov } 1215690a6055SJesper Skriver /* 1216690a6055SJesper Skriver * If we are over the maximum number of fragments 1217690a6055SJesper Skriver * (due to the limit being lowered), drain off 1218690a6055SJesper Skriver * enough to get down to the new limit. 1219690a6055SJesper Skriver */ 1220603724d3SBjoern A. Zeeb if (V_maxnipq >= 0 && V_nipq > V_maxnipq) { 1221690a6055SJesper Skriver for (i = 0; i < IPREASS_NHASH; i++) { 12228b615593SMarko Zec while (V_nipq > V_maxnipq && 12238b615593SMarko Zec !TAILQ_EMPTY(&V_ipq[i])) { 122486425c62SRobert Watson IPSTAT_ADD(ips_fragdropped, 122586425c62SRobert Watson TAILQ_FIRST(&V_ipq[i])->ipq_nfrags); 12268b615593SMarko Zec ip_freef(&V_ipq[i], 12278b615593SMarko Zec TAILQ_FIRST(&V_ipq[i])); 1228690a6055SJesper Skriver } 1229690a6055SJesper Skriver } 1230690a6055SJesper Skriver } 12318b615593SMarko Zec CURVNET_RESTORE(); 12328b615593SMarko Zec } 12332fad1e93SSam Leffler IPQ_UNLOCK(); 12345ee847d3SRobert Watson VNET_LIST_RUNLOCK_NOSLEEP(); 1235df8bae1dSRodney W. Grimes } 1236df8bae1dSRodney W. Grimes 1237df8bae1dSRodney W. Grimes /* 1238df8bae1dSRodney W. Grimes * Drain off all datagram fragments. 1239df8bae1dSRodney W. Grimes */ 1240df8bae1dSRodney W. Grimes void 1241f2565d68SRobert Watson ip_drain(void) 1242df8bae1dSRodney W. Grimes { 12438b615593SMarko Zec VNET_ITERATOR_DECL(vnet_iter); 1244194a213eSAndrey A. Chernov int i; 1245ce29ab3aSGarrett Wollman 12465ee847d3SRobert Watson VNET_LIST_RLOCK_NOSLEEP(); 12472fad1e93SSam Leffler IPQ_LOCK(); 12488b615593SMarko Zec VNET_FOREACH(vnet_iter) { 12498b615593SMarko Zec CURVNET_SET(vnet_iter); 1250194a213eSAndrey A. Chernov for (i = 0; i < IPREASS_NHASH; i++) { 1251603724d3SBjoern A. Zeeb while(!TAILQ_EMPTY(&V_ipq[i])) { 125286425c62SRobert Watson IPSTAT_ADD(ips_fragdropped, 125386425c62SRobert Watson TAILQ_FIRST(&V_ipq[i])->ipq_nfrags); 1254603724d3SBjoern A. Zeeb ip_freef(&V_ipq[i], TAILQ_FIRST(&V_ipq[i])); 1255194a213eSAndrey A. Chernov } 1256194a213eSAndrey A. Chernov } 12578b615593SMarko Zec CURVNET_RESTORE(); 12588b615593SMarko Zec } 12592fad1e93SSam Leffler IPQ_UNLOCK(); 12605ee847d3SRobert Watson VNET_LIST_RUNLOCK_NOSLEEP(); 1261ce29ab3aSGarrett Wollman in_rtqdrain(); 1262df8bae1dSRodney W. Grimes } 1263df8bae1dSRodney W. Grimes 1264df8bae1dSRodney W. Grimes /* 1265de38924dSAndre Oppermann * The protocol to be inserted into ip_protox[] must be already registered 1266de38924dSAndre Oppermann * in inetsw[], either statically or through pf_proto_register(). 1267de38924dSAndre Oppermann */ 1268de38924dSAndre Oppermann int 1269de38924dSAndre Oppermann ipproto_register(u_char ipproto) 1270de38924dSAndre Oppermann { 1271de38924dSAndre Oppermann struct protosw *pr; 1272de38924dSAndre Oppermann 1273de38924dSAndre Oppermann /* Sanity checks. */ 1274de38924dSAndre Oppermann if (ipproto == 0) 1275de38924dSAndre Oppermann return (EPROTONOSUPPORT); 1276de38924dSAndre Oppermann 1277de38924dSAndre Oppermann /* 1278de38924dSAndre Oppermann * The protocol slot must not be occupied by another protocol 1279de38924dSAndre Oppermann * already. An index pointing to IPPROTO_RAW is unused. 1280de38924dSAndre Oppermann */ 1281de38924dSAndre Oppermann pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 1282de38924dSAndre Oppermann if (pr == NULL) 1283de38924dSAndre Oppermann return (EPFNOSUPPORT); 1284de38924dSAndre Oppermann if (ip_protox[ipproto] != pr - inetsw) /* IPPROTO_RAW */ 1285de38924dSAndre Oppermann return (EEXIST); 1286de38924dSAndre Oppermann 1287de38924dSAndre Oppermann /* Find the protocol position in inetsw[] and set the index. */ 1288de38924dSAndre Oppermann for (pr = inetdomain.dom_protosw; 1289de38924dSAndre Oppermann pr < inetdomain.dom_protoswNPROTOSW; pr++) { 1290de38924dSAndre Oppermann if (pr->pr_domain->dom_family == PF_INET && 1291de38924dSAndre Oppermann pr->pr_protocol && pr->pr_protocol == ipproto) { 1292de38924dSAndre Oppermann /* Be careful to only index valid IP protocols. */ 1293db77984cSSam Leffler if (pr->pr_protocol < IPPROTO_MAX) { 1294de38924dSAndre Oppermann ip_protox[pr->pr_protocol] = pr - inetsw; 1295de38924dSAndre Oppermann return (0); 1296de38924dSAndre Oppermann } else 1297de38924dSAndre Oppermann return (EINVAL); 1298de38924dSAndre Oppermann } 1299de38924dSAndre Oppermann } 1300de38924dSAndre Oppermann return (EPROTONOSUPPORT); 1301de38924dSAndre Oppermann } 1302de38924dSAndre Oppermann 1303de38924dSAndre Oppermann int 1304de38924dSAndre Oppermann ipproto_unregister(u_char ipproto) 1305de38924dSAndre Oppermann { 1306de38924dSAndre Oppermann struct protosw *pr; 1307de38924dSAndre Oppermann 1308de38924dSAndre Oppermann /* Sanity checks. */ 1309de38924dSAndre Oppermann if (ipproto == 0) 1310de38924dSAndre Oppermann return (EPROTONOSUPPORT); 1311de38924dSAndre Oppermann 1312de38924dSAndre Oppermann /* Check if the protocol was indeed registered. */ 1313de38924dSAndre Oppermann pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW); 1314de38924dSAndre Oppermann if (pr == NULL) 1315de38924dSAndre Oppermann return (EPFNOSUPPORT); 1316de38924dSAndre Oppermann if (ip_protox[ipproto] == pr - inetsw) /* IPPROTO_RAW */ 1317de38924dSAndre Oppermann return (ENOENT); 1318de38924dSAndre Oppermann 1319de38924dSAndre Oppermann /* Reset the protocol slot to IPPROTO_RAW. */ 1320de38924dSAndre Oppermann ip_protox[ipproto] = pr - inetsw; 1321de38924dSAndre Oppermann return (0); 1322de38924dSAndre Oppermann } 1323de38924dSAndre Oppermann 1324df8bae1dSRodney W. Grimes /* 13258c0fec80SRobert Watson * Given address of next destination (final or next hop), return (referenced) 13268c0fec80SRobert Watson * internet address info of interface to be used to get there. 1327df8bae1dSRodney W. Grimes */ 1328bd714208SRuslan Ermilov struct in_ifaddr * 13298b07e49aSJulian Elischer ip_rtaddr(struct in_addr dst, u_int fibnum) 1330df8bae1dSRodney W. Grimes { 133197d8d152SAndre Oppermann struct route sro; 133202c1c707SAndre Oppermann struct sockaddr_in *sin; 133319e5b0a7SRobert Watson struct in_ifaddr *ia; 1334df8bae1dSRodney W. Grimes 13350cfbbe3bSAndre Oppermann bzero(&sro, sizeof(sro)); 133697d8d152SAndre Oppermann sin = (struct sockaddr_in *)&sro.ro_dst; 1337df8bae1dSRodney W. Grimes sin->sin_family = AF_INET; 1338df8bae1dSRodney W. Grimes sin->sin_len = sizeof(*sin); 1339df8bae1dSRodney W. Grimes sin->sin_addr = dst; 13406e6b3f7cSQing Li in_rtalloc_ign(&sro, 0, fibnum); 1341df8bae1dSRodney W. Grimes 134297d8d152SAndre Oppermann if (sro.ro_rt == NULL) 134302410549SRobert Watson return (NULL); 134402c1c707SAndre Oppermann 134519e5b0a7SRobert Watson ia = ifatoia(sro.ro_rt->rt_ifa); 134619e5b0a7SRobert Watson ifa_ref(&ia->ia_ifa); 134797d8d152SAndre Oppermann RTFREE(sro.ro_rt); 134819e5b0a7SRobert Watson return (ia); 1349df8bae1dSRodney W. Grimes } 1350df8bae1dSRodney W. Grimes 1351df8bae1dSRodney W. Grimes u_char inetctlerrmap[PRC_NCMDS] = { 1352df8bae1dSRodney W. Grimes 0, 0, 0, 0, 1353df8bae1dSRodney W. Grimes 0, EMSGSIZE, EHOSTDOWN, EHOSTUNREACH, 1354df8bae1dSRodney W. Grimes EHOSTUNREACH, EHOSTUNREACH, ECONNREFUSED, ECONNREFUSED, 1355df8bae1dSRodney W. Grimes EMSGSIZE, EHOSTUNREACH, 0, 0, 1356fcaf9f91SMike Silbersack 0, 0, EHOSTUNREACH, 0, 13573b8123b7SJesper Skriver ENOPROTOOPT, ECONNREFUSED 1358df8bae1dSRodney W. Grimes }; 1359df8bae1dSRodney W. Grimes 1360df8bae1dSRodney W. Grimes /* 1361df8bae1dSRodney W. Grimes * Forward a packet. If some error occurs return the sender 1362df8bae1dSRodney W. Grimes * an icmp packet. Note we can't always generate a meaningful 1363df8bae1dSRodney W. Grimes * icmp message because icmp doesn't have a large enough repertoire 1364df8bae1dSRodney W. Grimes * of codes and types. 1365df8bae1dSRodney W. Grimes * 1366df8bae1dSRodney W. Grimes * If not forwarding, just drop the packet. This could be confusing 1367df8bae1dSRodney W. Grimes * if ipforwarding was zero but some routing protocol was advancing 1368df8bae1dSRodney W. Grimes * us as a gateway to somewhere. However, we must let the routing 1369df8bae1dSRodney W. Grimes * protocol deal with that. 1370df8bae1dSRodney W. Grimes * 1371df8bae1dSRodney W. Grimes * The srcrt parameter indicates whether the packet is being forwarded 1372df8bae1dSRodney W. Grimes * via a source route. 1373df8bae1dSRodney W. Grimes */ 13749b932e9eSAndre Oppermann void 13759b932e9eSAndre Oppermann ip_forward(struct mbuf *m, int srcrt) 1376df8bae1dSRodney W. Grimes { 13772b25acc1SLuigi Rizzo struct ip *ip = mtod(m, struct ip *); 1378efbad259SEdward Tomasz Napierala struct in_ifaddr *ia; 1379df8bae1dSRodney W. Grimes struct mbuf *mcopy; 13809b932e9eSAndre Oppermann struct in_addr dest; 1381b835b6feSBjoern A. Zeeb struct route ro; 1382c773494eSAndre Oppermann int error, type = 0, code = 0, mtu = 0; 13833efc3014SJulian Elischer 13849b932e9eSAndre Oppermann if (m->m_flags & (M_BCAST|M_MCAST) || in_canforward(ip->ip_dst) == 0) { 138586425c62SRobert Watson IPSTAT_INC(ips_cantforward); 1386df8bae1dSRodney W. Grimes m_freem(m); 1387df8bae1dSRodney W. Grimes return; 1388df8bae1dSRodney W. Grimes } 13891b968362SDag-Erling Smørgrav #ifdef IPSTEALTH 1390603724d3SBjoern A. Zeeb if (!V_ipstealth) { 13911b968362SDag-Erling Smørgrav #endif 1392df8bae1dSRodney W. Grimes if (ip->ip_ttl <= IPTTLDEC) { 13931b968362SDag-Erling Smørgrav icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, 139402c1c707SAndre Oppermann 0, 0); 1395df8bae1dSRodney W. Grimes return; 1396df8bae1dSRodney W. Grimes } 13971b968362SDag-Erling Smørgrav #ifdef IPSTEALTH 13981b968362SDag-Erling Smørgrav } 13991b968362SDag-Erling Smørgrav #endif 1400df8bae1dSRodney W. Grimes 14018b07e49aSJulian Elischer ia = ip_rtaddr(ip->ip_dst, M_GETFIB(m)); 1402efbad259SEdward Tomasz Napierala #ifndef IPSEC 1403efbad259SEdward Tomasz Napierala /* 1404efbad259SEdward Tomasz Napierala * 'ia' may be NULL if there is no route for this destination. 1405efbad259SEdward Tomasz Napierala * In case of IPsec, Don't discard it just yet, but pass it to 1406efbad259SEdward Tomasz Napierala * ip_output in case of outgoing IPsec policy. 1407efbad259SEdward Tomasz Napierala */ 1408d23d475fSGuido van Rooij if (!srcrt && ia == NULL) { 140902c1c707SAndre Oppermann icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, 0, 0); 1410df8bae1dSRodney W. Grimes return; 141102c1c707SAndre Oppermann } 1412efbad259SEdward Tomasz Napierala #endif 1413df8bae1dSRodney W. Grimes 1414df8bae1dSRodney W. Grimes /* 1415bfef7ed4SIan Dowse * Save the IP header and at most 8 bytes of the payload, 1416bfef7ed4SIan Dowse * in case we need to generate an ICMP message to the src. 1417bfef7ed4SIan Dowse * 14184d2e3692SLuigi Rizzo * XXX this can be optimized a lot by saving the data in a local 14194d2e3692SLuigi Rizzo * buffer on the stack (72 bytes at most), and only allocating the 14204d2e3692SLuigi Rizzo * mbuf if really necessary. The vast majority of the packets 14214d2e3692SLuigi Rizzo * are forwarded without having to send an ICMP back (either 14224d2e3692SLuigi Rizzo * because unnecessary, or because rate limited), so we are 14234d2e3692SLuigi Rizzo * really we are wasting a lot of work here. 14244d2e3692SLuigi Rizzo * 1425bfef7ed4SIan Dowse * We don't use m_copy() because it might return a reference 1426bfef7ed4SIan Dowse * to a shared cluster. Both this function and ip_output() 1427bfef7ed4SIan Dowse * assume exclusive access to the IP header in `m', so any 1428bfef7ed4SIan Dowse * data in a cluster may change before we reach icmp_error(). 1429df8bae1dSRodney W. Grimes */ 1430780b2f69SAndre Oppermann MGETHDR(mcopy, M_DONTWAIT, m->m_type); 1431a163d034SWarner Losh if (mcopy != NULL && !m_dup_pkthdr(mcopy, m, M_DONTWAIT)) { 14329967cafcSSam Leffler /* 14339967cafcSSam Leffler * It's probably ok if the pkthdr dup fails (because 14349967cafcSSam Leffler * the deep copy of the tag chain failed), but for now 14359967cafcSSam Leffler * be conservative and just discard the copy since 14369967cafcSSam Leffler * code below may some day want the tags. 14379967cafcSSam Leffler */ 14389967cafcSSam Leffler m_free(mcopy); 14399967cafcSSam Leffler mcopy = NULL; 14409967cafcSSam Leffler } 1441bfef7ed4SIan Dowse if (mcopy != NULL) { 1442780b2f69SAndre Oppermann mcopy->m_len = min(ip->ip_len, M_TRAILINGSPACE(mcopy)); 1443e6b0a570SBruce M Simpson mcopy->m_pkthdr.len = mcopy->m_len; 1444bfef7ed4SIan Dowse m_copydata(m, 0, mcopy->m_len, mtod(mcopy, caddr_t)); 1445bfef7ed4SIan Dowse } 144604287599SRuslan Ermilov 144704287599SRuslan Ermilov #ifdef IPSTEALTH 1448603724d3SBjoern A. Zeeb if (!V_ipstealth) { 144904287599SRuslan Ermilov #endif 145004287599SRuslan Ermilov ip->ip_ttl -= IPTTLDEC; 145104287599SRuslan Ermilov #ifdef IPSTEALTH 145204287599SRuslan Ermilov } 145304287599SRuslan Ermilov #endif 1454df8bae1dSRodney W. Grimes 1455df8bae1dSRodney W. Grimes /* 1456df8bae1dSRodney W. Grimes * If forwarding packet using same interface that it came in on, 1457df8bae1dSRodney W. Grimes * perhaps should send a redirect to sender to shortcut a hop. 1458df8bae1dSRodney W. Grimes * Only send redirect if source is sending directly to us, 1459df8bae1dSRodney W. Grimes * and if packet was not source routed (or has any options). 1460df8bae1dSRodney W. Grimes * Also, don't send redirect if forwarding using a default route 1461df8bae1dSRodney W. Grimes * or a route modified by a redirect. 1462df8bae1dSRodney W. Grimes */ 14639b932e9eSAndre Oppermann dest.s_addr = 0; 1464efbad259SEdward Tomasz Napierala if (!srcrt && V_ipsendredirects && 1465efbad259SEdward Tomasz Napierala ia != NULL && ia->ia_ifp == m->m_pkthdr.rcvif) { 146602c1c707SAndre Oppermann struct sockaddr_in *sin; 146702c1c707SAndre Oppermann struct rtentry *rt; 146802c1c707SAndre Oppermann 14690cfbbe3bSAndre Oppermann bzero(&ro, sizeof(ro)); 147002c1c707SAndre Oppermann sin = (struct sockaddr_in *)&ro.ro_dst; 147102c1c707SAndre Oppermann sin->sin_family = AF_INET; 147202c1c707SAndre Oppermann sin->sin_len = sizeof(*sin); 14739b932e9eSAndre Oppermann sin->sin_addr = ip->ip_dst; 14746e6b3f7cSQing Li in_rtalloc_ign(&ro, 0, M_GETFIB(m)); 147502c1c707SAndre Oppermann 147602c1c707SAndre Oppermann rt = ro.ro_rt; 147702c1c707SAndre Oppermann 147802c1c707SAndre Oppermann if (rt && (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 && 14799b932e9eSAndre Oppermann satosin(rt_key(rt))->sin_addr.s_addr != 0) { 1480df8bae1dSRodney W. Grimes #define RTA(rt) ((struct in_ifaddr *)(rt->rt_ifa)) 1481df8bae1dSRodney W. Grimes u_long src = ntohl(ip->ip_src.s_addr); 1482df8bae1dSRodney W. Grimes 1483df8bae1dSRodney W. Grimes if (RTA(rt) && 1484df8bae1dSRodney W. Grimes (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) { 1485df8bae1dSRodney W. Grimes if (rt->rt_flags & RTF_GATEWAY) 14869b932e9eSAndre Oppermann dest.s_addr = satosin(rt->rt_gateway)->sin_addr.s_addr; 1487df8bae1dSRodney W. Grimes else 14889b932e9eSAndre Oppermann dest.s_addr = ip->ip_dst.s_addr; 1489df8bae1dSRodney W. Grimes /* Router requirements says to only send host redirects */ 1490df8bae1dSRodney W. Grimes type = ICMP_REDIRECT; 1491df8bae1dSRodney W. Grimes code = ICMP_REDIRECT_HOST; 1492df8bae1dSRodney W. Grimes } 1493df8bae1dSRodney W. Grimes } 149402c1c707SAndre Oppermann if (rt) 149502c1c707SAndre Oppermann RTFREE(rt); 149602c1c707SAndre Oppermann } 1497df8bae1dSRodney W. Grimes 1498b835b6feSBjoern A. Zeeb /* 1499b835b6feSBjoern A. Zeeb * Try to cache the route MTU from ip_output so we can consider it for 1500b835b6feSBjoern A. Zeeb * the ICMP_UNREACH_NEEDFRAG "Next-Hop MTU" field described in RFC1191. 1501b835b6feSBjoern A. Zeeb */ 1502b835b6feSBjoern A. Zeeb bzero(&ro, sizeof(ro)); 1503b835b6feSBjoern A. Zeeb 1504b835b6feSBjoern A. Zeeb error = ip_output(m, NULL, &ro, IP_FORWARDING, NULL, NULL); 1505b835b6feSBjoern A. Zeeb 1506b835b6feSBjoern A. Zeeb if (error == EMSGSIZE && ro.ro_rt) 1507b835b6feSBjoern A. Zeeb mtu = ro.ro_rt->rt_rmx.rmx_mtu; 1508b835b6feSBjoern A. Zeeb if (ro.ro_rt) 1509b835b6feSBjoern A. Zeeb RTFREE(ro.ro_rt); 1510b835b6feSBjoern A. Zeeb 1511df8bae1dSRodney W. Grimes if (error) 151286425c62SRobert Watson IPSTAT_INC(ips_cantforward); 1513df8bae1dSRodney W. Grimes else { 151486425c62SRobert Watson IPSTAT_INC(ips_forward); 1515df8bae1dSRodney W. Grimes if (type) 151686425c62SRobert Watson IPSTAT_INC(ips_redirectsent); 1517df8bae1dSRodney W. Grimes else { 15189188b4a1SAndre Oppermann if (mcopy) 1519df8bae1dSRodney W. Grimes m_freem(mcopy); 15208c0fec80SRobert Watson if (ia != NULL) 15218c0fec80SRobert Watson ifa_free(&ia->ia_ifa); 1522df8bae1dSRodney W. Grimes return; 1523df8bae1dSRodney W. Grimes } 1524df8bae1dSRodney W. Grimes } 15258c0fec80SRobert Watson if (mcopy == NULL) { 15268c0fec80SRobert Watson if (ia != NULL) 15278c0fec80SRobert Watson ifa_free(&ia->ia_ifa); 1528df8bae1dSRodney W. Grimes return; 15298c0fec80SRobert Watson } 1530df8bae1dSRodney W. Grimes 1531df8bae1dSRodney W. Grimes switch (error) { 1532df8bae1dSRodney W. Grimes 1533df8bae1dSRodney W. Grimes case 0: /* forwarded, but need redirect */ 1534df8bae1dSRodney W. Grimes /* type, code set above */ 1535df8bae1dSRodney W. Grimes break; 1536df8bae1dSRodney W. Grimes 1537efbad259SEdward Tomasz Napierala case ENETUNREACH: 1538df8bae1dSRodney W. Grimes case EHOSTUNREACH: 1539df8bae1dSRodney W. Grimes case ENETDOWN: 1540df8bae1dSRodney W. Grimes case EHOSTDOWN: 1541df8bae1dSRodney W. Grimes default: 1542df8bae1dSRodney W. Grimes type = ICMP_UNREACH; 1543df8bae1dSRodney W. Grimes code = ICMP_UNREACH_HOST; 1544df8bae1dSRodney W. Grimes break; 1545df8bae1dSRodney W. Grimes 1546df8bae1dSRodney W. Grimes case EMSGSIZE: 1547df8bae1dSRodney W. Grimes type = ICMP_UNREACH; 1548df8bae1dSRodney W. Grimes code = ICMP_UNREACH_NEEDFRAG; 15491dfcf0d2SAndre Oppermann 1550b2630c29SGeorge V. Neville-Neil #ifdef IPSEC 1551b835b6feSBjoern A. Zeeb /* 1552b835b6feSBjoern A. Zeeb * If IPsec is configured for this path, 1553b835b6feSBjoern A. Zeeb * override any possibly mtu value set by ip_output. 1554b835b6feSBjoern A. Zeeb */ 1555b835b6feSBjoern A. Zeeb mtu = ip_ipsec_mtu(m, mtu); 1556b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */ 15579b932e9eSAndre Oppermann /* 1558b835b6feSBjoern A. Zeeb * If the MTU was set before make sure we are below the 1559b835b6feSBjoern A. Zeeb * interface MTU. 1560ab48768bSAndre Oppermann * If the MTU wasn't set before use the interface mtu or 1561ab48768bSAndre Oppermann * fall back to the next smaller mtu step compared to the 1562ab48768bSAndre Oppermann * current packet size. 15639b932e9eSAndre Oppermann */ 1564b835b6feSBjoern A. Zeeb if (mtu != 0) { 1565b835b6feSBjoern A. Zeeb if (ia != NULL) 1566b835b6feSBjoern A. Zeeb mtu = min(mtu, ia->ia_ifp->if_mtu); 1567b835b6feSBjoern A. Zeeb } else { 1568ab48768bSAndre Oppermann if (ia != NULL) 1569c773494eSAndre Oppermann mtu = ia->ia_ifp->if_mtu; 1570ab48768bSAndre Oppermann else 1571ab48768bSAndre Oppermann mtu = ip_next_mtu(ip->ip_len, 0); 1572ab48768bSAndre Oppermann } 157386425c62SRobert Watson IPSTAT_INC(ips_cantfrag); 1574df8bae1dSRodney W. Grimes break; 1575df8bae1dSRodney W. Grimes 1576df8bae1dSRodney W. Grimes case ENOBUFS: 1577df285b3dSMike Silbersack /* 1578df285b3dSMike Silbersack * A router should not generate ICMP_SOURCEQUENCH as 1579df285b3dSMike Silbersack * required in RFC1812 Requirements for IP Version 4 Routers. 1580df285b3dSMike Silbersack * Source quench could be a big problem under DoS attacks, 1581df285b3dSMike Silbersack * or if the underlying interface is rate-limited. 1582df285b3dSMike Silbersack * Those who need source quench packets may re-enable them 1583df285b3dSMike Silbersack * via the net.inet.ip.sendsourcequench sysctl. 1584df285b3dSMike Silbersack */ 1585603724d3SBjoern A. Zeeb if (V_ip_sendsourcequench == 0) { 1586df285b3dSMike Silbersack m_freem(mcopy); 15878c0fec80SRobert Watson if (ia != NULL) 15888c0fec80SRobert Watson ifa_free(&ia->ia_ifa); 1589df285b3dSMike Silbersack return; 1590df285b3dSMike Silbersack } else { 1591df8bae1dSRodney W. Grimes type = ICMP_SOURCEQUENCH; 1592df8bae1dSRodney W. Grimes code = 0; 1593df285b3dSMike Silbersack } 1594df8bae1dSRodney W. Grimes break; 15953a06e3e0SRuslan Ermilov 15963a06e3e0SRuslan Ermilov case EACCES: /* ipfw denied packet */ 15973a06e3e0SRuslan Ermilov m_freem(mcopy); 15988c0fec80SRobert Watson if (ia != NULL) 15998c0fec80SRobert Watson ifa_free(&ia->ia_ifa); 16003a06e3e0SRuslan Ermilov return; 1601df8bae1dSRodney W. Grimes } 16028c0fec80SRobert Watson if (ia != NULL) 16038c0fec80SRobert Watson ifa_free(&ia->ia_ifa); 1604c773494eSAndre Oppermann icmp_error(mcopy, type, code, dest.s_addr, mtu); 1605df8bae1dSRodney W. Grimes } 1606df8bae1dSRodney W. Grimes 160782c23ebaSBill Fenner void 1608f2565d68SRobert Watson ip_savecontrol(struct inpcb *inp, struct mbuf **mp, struct ip *ip, 1609f2565d68SRobert Watson struct mbuf *m) 161082c23ebaSBill Fenner { 16118b615593SMarko Zec 1612be8a62e8SPoul-Henning Kamp if (inp->inp_socket->so_options & (SO_BINTIME | SO_TIMESTAMP)) { 1613be8a62e8SPoul-Henning Kamp struct bintime bt; 1614be8a62e8SPoul-Henning Kamp 1615be8a62e8SPoul-Henning Kamp bintime(&bt); 1616be8a62e8SPoul-Henning Kamp if (inp->inp_socket->so_options & SO_BINTIME) { 1617be8a62e8SPoul-Henning Kamp *mp = sbcreatecontrol((caddr_t) &bt, sizeof(bt), 1618be8a62e8SPoul-Henning Kamp SCM_BINTIME, SOL_SOCKET); 1619be8a62e8SPoul-Henning Kamp if (*mp) 1620be8a62e8SPoul-Henning Kamp mp = &(*mp)->m_next; 1621be8a62e8SPoul-Henning Kamp } 162282c23ebaSBill Fenner if (inp->inp_socket->so_options & SO_TIMESTAMP) { 162382c23ebaSBill Fenner struct timeval tv; 162482c23ebaSBill Fenner 1625be8a62e8SPoul-Henning Kamp bintime2timeval(&bt, &tv); 162682c23ebaSBill Fenner *mp = sbcreatecontrol((caddr_t) &tv, sizeof(tv), 162782c23ebaSBill Fenner SCM_TIMESTAMP, SOL_SOCKET); 162882c23ebaSBill Fenner if (*mp) 162982c23ebaSBill Fenner mp = &(*mp)->m_next; 16304cc20ab1SSeigo Tanimura } 1631be8a62e8SPoul-Henning Kamp } 163282c23ebaSBill Fenner if (inp->inp_flags & INP_RECVDSTADDR) { 163382c23ebaSBill Fenner *mp = sbcreatecontrol((caddr_t) &ip->ip_dst, 163482c23ebaSBill Fenner sizeof(struct in_addr), IP_RECVDSTADDR, IPPROTO_IP); 163582c23ebaSBill Fenner if (*mp) 163682c23ebaSBill Fenner mp = &(*mp)->m_next; 163782c23ebaSBill Fenner } 16384957466bSMatthew N. Dodd if (inp->inp_flags & INP_RECVTTL) { 16394957466bSMatthew N. Dodd *mp = sbcreatecontrol((caddr_t) &ip->ip_ttl, 16404957466bSMatthew N. Dodd sizeof(u_char), IP_RECVTTL, IPPROTO_IP); 16414957466bSMatthew N. Dodd if (*mp) 16424957466bSMatthew N. Dodd mp = &(*mp)->m_next; 16434957466bSMatthew N. Dodd } 164482c23ebaSBill Fenner #ifdef notyet 164582c23ebaSBill Fenner /* XXX 164682c23ebaSBill Fenner * Moving these out of udp_input() made them even more broken 164782c23ebaSBill Fenner * than they already were. 164882c23ebaSBill Fenner */ 164982c23ebaSBill Fenner /* options were tossed already */ 165082c23ebaSBill Fenner if (inp->inp_flags & INP_RECVOPTS) { 165182c23ebaSBill Fenner *mp = sbcreatecontrol((caddr_t) opts_deleted_above, 165282c23ebaSBill Fenner sizeof(struct in_addr), IP_RECVOPTS, IPPROTO_IP); 165382c23ebaSBill Fenner if (*mp) 165482c23ebaSBill Fenner mp = &(*mp)->m_next; 165582c23ebaSBill Fenner } 165682c23ebaSBill Fenner /* ip_srcroute doesn't do what we want here, need to fix */ 165782c23ebaSBill Fenner if (inp->inp_flags & INP_RECVRETOPTS) { 1658e0982661SAndre Oppermann *mp = sbcreatecontrol((caddr_t) ip_srcroute(m), 165982c23ebaSBill Fenner sizeof(struct in_addr), IP_RECVRETOPTS, IPPROTO_IP); 166082c23ebaSBill Fenner if (*mp) 166182c23ebaSBill Fenner mp = &(*mp)->m_next; 166282c23ebaSBill Fenner } 166382c23ebaSBill Fenner #endif 166482c23ebaSBill Fenner if (inp->inp_flags & INP_RECVIF) { 1665d314ad7bSJulian Elischer struct ifnet *ifp; 1666d314ad7bSJulian Elischer struct sdlbuf { 166782c23ebaSBill Fenner struct sockaddr_dl sdl; 1668d314ad7bSJulian Elischer u_char pad[32]; 1669d314ad7bSJulian Elischer } sdlbuf; 1670d314ad7bSJulian Elischer struct sockaddr_dl *sdp; 1671d314ad7bSJulian Elischer struct sockaddr_dl *sdl2 = &sdlbuf.sdl; 167282c23ebaSBill Fenner 1673d314ad7bSJulian Elischer if (((ifp = m->m_pkthdr.rcvif)) 1674603724d3SBjoern A. Zeeb && ( ifp->if_index && (ifp->if_index <= V_if_index))) { 16754a0d6638SRuslan Ermilov sdp = (struct sockaddr_dl *)ifp->if_addr->ifa_addr; 1676d314ad7bSJulian Elischer /* 1677d314ad7bSJulian Elischer * Change our mind and don't try copy. 1678d314ad7bSJulian Elischer */ 1679d314ad7bSJulian Elischer if ((sdp->sdl_family != AF_LINK) 1680d314ad7bSJulian Elischer || (sdp->sdl_len > sizeof(sdlbuf))) { 1681d314ad7bSJulian Elischer goto makedummy; 1682d314ad7bSJulian Elischer } 1683d314ad7bSJulian Elischer bcopy(sdp, sdl2, sdp->sdl_len); 1684d314ad7bSJulian Elischer } else { 1685d314ad7bSJulian Elischer makedummy: 1686d314ad7bSJulian Elischer sdl2->sdl_len 1687d314ad7bSJulian Elischer = offsetof(struct sockaddr_dl, sdl_data[0]); 1688d314ad7bSJulian Elischer sdl2->sdl_family = AF_LINK; 1689d314ad7bSJulian Elischer sdl2->sdl_index = 0; 1690d314ad7bSJulian Elischer sdl2->sdl_nlen = sdl2->sdl_alen = sdl2->sdl_slen = 0; 1691d314ad7bSJulian Elischer } 1692d314ad7bSJulian Elischer *mp = sbcreatecontrol((caddr_t) sdl2, sdl2->sdl_len, 169382c23ebaSBill Fenner IP_RECVIF, IPPROTO_IP); 169482c23ebaSBill Fenner if (*mp) 169582c23ebaSBill Fenner mp = &(*mp)->m_next; 169682c23ebaSBill Fenner } 169782c23ebaSBill Fenner } 169882c23ebaSBill Fenner 16994d2e3692SLuigi Rizzo /* 170030916a2dSRobert Watson * XXXRW: Multicast routing code in ip_mroute.c is generally MPSAFE, but the 170130916a2dSRobert Watson * ip_rsvp and ip_rsvp_on variables need to be interlocked with rsvp_on 170230916a2dSRobert Watson * locking. This code remains in ip_input.c as ip_mroute.c is optionally 170330916a2dSRobert Watson * compiled. 17044d2e3692SLuigi Rizzo */ 1705df8bae1dSRodney W. Grimes int 1706f0068c4aSGarrett Wollman ip_rsvp_init(struct socket *so) 1707f0068c4aSGarrett Wollman { 17088b615593SMarko Zec 1709f0068c4aSGarrett Wollman if (so->so_type != SOCK_RAW || 1710f0068c4aSGarrett Wollman so->so_proto->pr_protocol != IPPROTO_RSVP) 1711f0068c4aSGarrett Wollman return EOPNOTSUPP; 1712f0068c4aSGarrett Wollman 1713603724d3SBjoern A. Zeeb if (V_ip_rsvpd != NULL) 1714f0068c4aSGarrett Wollman return EADDRINUSE; 1715f0068c4aSGarrett Wollman 1716603724d3SBjoern A. Zeeb V_ip_rsvpd = so; 17171c5de19aSGarrett Wollman /* 17181c5de19aSGarrett Wollman * This may seem silly, but we need to be sure we don't over-increment 17191c5de19aSGarrett Wollman * the RSVP counter, in case something slips up. 17201c5de19aSGarrett Wollman */ 1721603724d3SBjoern A. Zeeb if (!V_ip_rsvp_on) { 1722603724d3SBjoern A. Zeeb V_ip_rsvp_on = 1; 1723603724d3SBjoern A. Zeeb V_rsvp_on++; 17241c5de19aSGarrett Wollman } 1725f0068c4aSGarrett Wollman 1726f0068c4aSGarrett Wollman return 0; 1727f0068c4aSGarrett Wollman } 1728f0068c4aSGarrett Wollman 1729f0068c4aSGarrett Wollman int 1730f0068c4aSGarrett Wollman ip_rsvp_done(void) 1731f0068c4aSGarrett Wollman { 17328b615593SMarko Zec 1733603724d3SBjoern A. Zeeb V_ip_rsvpd = NULL; 17341c5de19aSGarrett Wollman /* 17351c5de19aSGarrett Wollman * This may seem silly, but we need to be sure we don't over-decrement 17361c5de19aSGarrett Wollman * the RSVP counter, in case something slips up. 17371c5de19aSGarrett Wollman */ 1738603724d3SBjoern A. Zeeb if (V_ip_rsvp_on) { 1739603724d3SBjoern A. Zeeb V_ip_rsvp_on = 0; 1740603724d3SBjoern A. Zeeb V_rsvp_on--; 17411c5de19aSGarrett Wollman } 1742f0068c4aSGarrett Wollman return 0; 1743f0068c4aSGarrett Wollman } 1744bbb4330bSLuigi Rizzo 1745bbb4330bSLuigi Rizzo void 1746bbb4330bSLuigi Rizzo rsvp_input(struct mbuf *m, int off) /* XXX must fixup manually */ 1747bbb4330bSLuigi Rizzo { 17488b615593SMarko Zec 1749bbb4330bSLuigi Rizzo if (rsvp_input_p) { /* call the real one if loaded */ 1750bbb4330bSLuigi Rizzo rsvp_input_p(m, off); 1751bbb4330bSLuigi Rizzo return; 1752bbb4330bSLuigi Rizzo } 1753bbb4330bSLuigi Rizzo 1754bbb4330bSLuigi Rizzo /* Can still get packets with rsvp_on = 0 if there is a local member 1755bbb4330bSLuigi Rizzo * of the group to which the RSVP packet is addressed. But in this 1756bbb4330bSLuigi Rizzo * case we want to throw the packet away. 1757bbb4330bSLuigi Rizzo */ 1758bbb4330bSLuigi Rizzo 1759603724d3SBjoern A. Zeeb if (!V_rsvp_on) { 1760bbb4330bSLuigi Rizzo m_freem(m); 1761bbb4330bSLuigi Rizzo return; 1762bbb4330bSLuigi Rizzo } 1763bbb4330bSLuigi Rizzo 1764603724d3SBjoern A. Zeeb if (V_ip_rsvpd != NULL) { 1765bbb4330bSLuigi Rizzo rip_input(m, off); 1766bbb4330bSLuigi Rizzo return; 1767bbb4330bSLuigi Rizzo } 1768bbb4330bSLuigi Rizzo /* Drop the packet */ 1769bbb4330bSLuigi Rizzo m_freem(m); 1770bbb4330bSLuigi Rizzo } 1771