1c398230bSWarner Losh /*- 22469dd60SGarrett Wollman * Copyright (c) 1982, 1986, 1991, 1993, 1995 3497057eeSRobert Watson * The Regents of the University of California. 4111d57a6SRobert Watson * Copyright (c) 2007-2009 Robert N. M. Watson 5497057eeSRobert Watson * All rights reserved. 6df8bae1dSRodney W. Grimes * 7df8bae1dSRodney W. Grimes * Redistribution and use in source and binary forms, with or without 8df8bae1dSRodney W. Grimes * modification, are permitted provided that the following conditions 9df8bae1dSRodney W. Grimes * are met: 10df8bae1dSRodney W. Grimes * 1. Redistributions of source code must retain the above copyright 11df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer. 12df8bae1dSRodney W. Grimes * 2. Redistributions in binary form must reproduce the above copyright 13df8bae1dSRodney W. Grimes * notice, this list of conditions and the following disclaimer in the 14df8bae1dSRodney W. Grimes * documentation and/or other materials provided with the distribution. 15df8bae1dSRodney W. Grimes * 4. Neither the name of the University nor the names of its contributors 16df8bae1dSRodney W. Grimes * may be used to endorse or promote products derived from this software 17df8bae1dSRodney W. Grimes * without specific prior written permission. 18df8bae1dSRodney W. Grimes * 19df8bae1dSRodney W. Grimes * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20df8bae1dSRodney W. Grimes * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21df8bae1dSRodney W. Grimes * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22df8bae1dSRodney W. Grimes * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23df8bae1dSRodney W. Grimes * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24df8bae1dSRodney W. Grimes * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25df8bae1dSRodney W. Grimes * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26df8bae1dSRodney W. Grimes * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27df8bae1dSRodney W. Grimes * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28df8bae1dSRodney W. Grimes * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29df8bae1dSRodney W. Grimes * SUCH DAMAGE. 30df8bae1dSRodney W. Grimes * 312469dd60SGarrett Wollman * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95 32df8bae1dSRodney W. Grimes */ 33df8bae1dSRodney W. Grimes 344b421e2dSMike Silbersack #include <sys/cdefs.h> 354b421e2dSMike Silbersack __FBSDID("$FreeBSD$"); 364b421e2dSMike Silbersack 37497057eeSRobert Watson #include "opt_ddb.h" 386a800098SYoshinobu Inoue #include "opt_ipsec.h" 39*efc76f72SBjoern A. Zeeb #include "opt_inet.h" 40cfa1ca9dSYoshinobu Inoue #include "opt_inet6.h" 41cfa1ca9dSYoshinobu Inoue 42df8bae1dSRodney W. Grimes #include <sys/param.h> 43df8bae1dSRodney W. Grimes #include <sys/systm.h> 44df8bae1dSRodney W. Grimes #include <sys/malloc.h> 45df8bae1dSRodney W. Grimes #include <sys/mbuf.h> 46cfa1ca9dSYoshinobu Inoue #include <sys/domain.h> 47df8bae1dSRodney W. Grimes #include <sys/protosw.h> 48df8bae1dSRodney W. Grimes #include <sys/socket.h> 49df8bae1dSRodney W. Grimes #include <sys/socketvar.h> 50acd3428bSRobert Watson #include <sys/priv.h> 51df8bae1dSRodney W. Grimes #include <sys/proc.h> 5275c13541SPoul-Henning Kamp #include <sys/jail.h> 53101f9fc8SPeter Wemm #include <sys/kernel.h> 54101f9fc8SPeter Wemm #include <sys/sysctl.h> 558781d8e9SBruce Evans 56497057eeSRobert Watson #ifdef DDB 57497057eeSRobert Watson #include <ddb/ddb.h> 58497057eeSRobert Watson #endif 59497057eeSRobert Watson 6069c2d429SJeff Roberson #include <vm/uma.h> 61df8bae1dSRodney W. Grimes 62df8bae1dSRodney W. Grimes #include <net/if.h> 63cfa1ca9dSYoshinobu Inoue #include <net/if_types.h> 64df8bae1dSRodney W. Grimes #include <net/route.h> 65530c0060SRobert Watson #include <net/vnet.h> 66df8bae1dSRodney W. Grimes 67df8bae1dSRodney W. Grimes #include <netinet/in.h> 68df8bae1dSRodney W. Grimes #include <netinet/in_pcb.h> 69df8bae1dSRodney W. Grimes #include <netinet/in_var.h> 70df8bae1dSRodney W. Grimes #include <netinet/ip_var.h> 71340c35deSJonathan Lemon #include <netinet/tcp_var.h> 725f311da2SMike Silbersack #include <netinet/udp.h> 735f311da2SMike Silbersack #include <netinet/udp_var.h> 74cfa1ca9dSYoshinobu Inoue #ifdef INET6 75cfa1ca9dSYoshinobu Inoue #include <netinet/ip6.h> 76cfa1ca9dSYoshinobu Inoue #include <netinet6/ip6_var.h> 77*efc76f72SBjoern A. Zeeb #include <netinet6/in6_pcb.h> 78cfa1ca9dSYoshinobu Inoue #endif /* INET6 */ 79cfa1ca9dSYoshinobu Inoue 80df8bae1dSRodney W. Grimes 81b2630c29SGeorge V. Neville-Neil #ifdef IPSEC 82b9234fafSSam Leffler #include <netipsec/ipsec.h> 83b9234fafSSam Leffler #include <netipsec/key.h> 84b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */ 85b9234fafSSam Leffler 86aed55708SRobert Watson #include <security/mac/mac_framework.h> 87aed55708SRobert Watson 88101f9fc8SPeter Wemm /* 89101f9fc8SPeter Wemm * These configure the range of local port addresses assigned to 90101f9fc8SPeter Wemm * "unspecified" outgoing connections/packets/whatever. 91101f9fc8SPeter Wemm */ 92eddfbb76SRobert Watson VNET_DEFINE(int, ipport_lowfirstauto) = IPPORT_RESERVED - 1; /* 1023 */ 93eddfbb76SRobert Watson VNET_DEFINE(int, ipport_lowlastauto) = IPPORT_RESERVEDSTART; /* 600 */ 94eddfbb76SRobert Watson VNET_DEFINE(int, ipport_firstauto) = IPPORT_EPHEMERALFIRST; /* 10000 */ 95eddfbb76SRobert Watson VNET_DEFINE(int, ipport_lastauto) = IPPORT_EPHEMERALLAST; /* 65535 */ 96eddfbb76SRobert Watson VNET_DEFINE(int, ipport_hifirstauto) = IPPORT_HIFIRSTAUTO; /* 49152 */ 97eddfbb76SRobert Watson VNET_DEFINE(int, ipport_hilastauto) = IPPORT_HILASTAUTO; /* 65535 */ 98101f9fc8SPeter Wemm 99b0d22693SCrist J. Clark /* 100b0d22693SCrist J. Clark * Reserved ports accessible only to root. There are significant 101b0d22693SCrist J. Clark * security considerations that must be accounted for when changing these, 102b0d22693SCrist J. Clark * but the security benefits can be great. Please be careful. 103b0d22693SCrist J. Clark */ 104eddfbb76SRobert Watson VNET_DEFINE(int, ipport_reservedhigh) = IPPORT_RESERVED - 1; /* 1023 */ 105eddfbb76SRobert Watson VNET_DEFINE(int, ipport_reservedlow); 106b0d22693SCrist J. Clark 1075f311da2SMike Silbersack /* Variables dealing with random ephemeral port allocation. */ 108eddfbb76SRobert Watson VNET_DEFINE(int, ipport_randomized) = 1; /* user controlled via sysctl */ 109eddfbb76SRobert Watson VNET_DEFINE(int, ipport_randomcps) = 10; /* user controlled via sysctl */ 110eddfbb76SRobert Watson VNET_DEFINE(int, ipport_randomtime) = 45; /* user controlled via sysctl */ 111eddfbb76SRobert Watson VNET_DEFINE(int, ipport_stoprandom); /* toggled by ipport_tick */ 112eddfbb76SRobert Watson VNET_DEFINE(int, ipport_tcpallocs); 1133e288e62SDimitry Andric static VNET_DEFINE(int, ipport_tcplastcount); 114eddfbb76SRobert Watson 1151e77c105SRobert Watson #define V_ipport_tcplastcount VNET(ipport_tcplastcount) 1166ac48b74SMike Silbersack 117bbd42ad0SPeter Wemm #define RANGECHK(var, min, max) \ 118bbd42ad0SPeter Wemm if ((var) < (min)) { (var) = (min); } \ 119bbd42ad0SPeter Wemm else if ((var) > (max)) { (var) = (max); } 120bbd42ad0SPeter Wemm 1216d888973SRobert Watson static void in_pcbremlists(struct inpcb *inp); 1226d888973SRobert Watson 123bbd42ad0SPeter Wemm static int 12482d9ae4eSPoul-Henning Kamp sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS) 125bbd42ad0SPeter Wemm { 12630a4ab08SBruce Evans int error; 12730a4ab08SBruce Evans 128eddfbb76SRobert Watson #ifdef VIMAGE 129eddfbb76SRobert Watson error = vnet_sysctl_handle_int(oidp, arg1, arg2, req); 130eddfbb76SRobert Watson #else 131f6dfe47aSMarko Zec error = sysctl_handle_int(oidp, arg1, arg2, req); 132eddfbb76SRobert Watson #endif 13330a4ab08SBruce Evans if (error == 0) { 134603724d3SBjoern A. Zeeb RANGECHK(V_ipport_lowfirstauto, 1, IPPORT_RESERVED - 1); 135603724d3SBjoern A. Zeeb RANGECHK(V_ipport_lowlastauto, 1, IPPORT_RESERVED - 1); 136603724d3SBjoern A. Zeeb RANGECHK(V_ipport_firstauto, IPPORT_RESERVED, IPPORT_MAX); 137603724d3SBjoern A. Zeeb RANGECHK(V_ipport_lastauto, IPPORT_RESERVED, IPPORT_MAX); 138603724d3SBjoern A. Zeeb RANGECHK(V_ipport_hifirstauto, IPPORT_RESERVED, IPPORT_MAX); 139603724d3SBjoern A. Zeeb RANGECHK(V_ipport_hilastauto, IPPORT_RESERVED, IPPORT_MAX); 140bbd42ad0SPeter Wemm } 14130a4ab08SBruce Evans return (error); 142bbd42ad0SPeter Wemm } 143bbd42ad0SPeter Wemm 144bbd42ad0SPeter Wemm #undef RANGECHK 145bbd42ad0SPeter Wemm 14633b3ac06SPeter Wemm SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0, "IP Ports"); 14733b3ac06SPeter Wemm 148eddfbb76SRobert Watson SYSCTL_VNET_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, 149eddfbb76SRobert Watson CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(ipport_lowfirstauto), 0, 1508b615593SMarko Zec &sysctl_net_ipport_check, "I", ""); 151eddfbb76SRobert Watson SYSCTL_VNET_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, 152eddfbb76SRobert Watson CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(ipport_lowlastauto), 0, 1538b615593SMarko Zec &sysctl_net_ipport_check, "I", ""); 154eddfbb76SRobert Watson SYSCTL_VNET_PROC(_net_inet_ip_portrange, OID_AUTO, first, 155eddfbb76SRobert Watson CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(ipport_firstauto), 0, 1568b615593SMarko Zec &sysctl_net_ipport_check, "I", ""); 157eddfbb76SRobert Watson SYSCTL_VNET_PROC(_net_inet_ip_portrange, OID_AUTO, last, 158eddfbb76SRobert Watson CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(ipport_lastauto), 0, 1598b615593SMarko Zec &sysctl_net_ipport_check, "I", ""); 160eddfbb76SRobert Watson SYSCTL_VNET_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, 161eddfbb76SRobert Watson CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(ipport_hifirstauto), 0, 1628b615593SMarko Zec &sysctl_net_ipport_check, "I", ""); 163eddfbb76SRobert Watson SYSCTL_VNET_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, 164eddfbb76SRobert Watson CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(ipport_hilastauto), 0, 1658b615593SMarko Zec &sysctl_net_ipport_check, "I", ""); 166eddfbb76SRobert Watson SYSCTL_VNET_INT(_net_inet_ip_portrange, OID_AUTO, reservedhigh, 167eddfbb76SRobert Watson CTLFLAG_RW|CTLFLAG_SECURE, &VNET_NAME(ipport_reservedhigh), 0, ""); 168eddfbb76SRobert Watson SYSCTL_VNET_INT(_net_inet_ip_portrange, OID_AUTO, reservedlow, 169eddfbb76SRobert Watson CTLFLAG_RW|CTLFLAG_SECURE, &VNET_NAME(ipport_reservedlow), 0, ""); 170eddfbb76SRobert Watson SYSCTL_VNET_INT(_net_inet_ip_portrange, OID_AUTO, randomized, CTLFLAG_RW, 171eddfbb76SRobert Watson &VNET_NAME(ipport_randomized), 0, "Enable random port allocation"); 172eddfbb76SRobert Watson SYSCTL_VNET_INT(_net_inet_ip_portrange, OID_AUTO, randomcps, CTLFLAG_RW, 173eddfbb76SRobert Watson &VNET_NAME(ipport_randomcps), 0, "Maximum number of random port " 1746ee79c59SMaxim Konovalov "allocations before switching to a sequental one"); 175eddfbb76SRobert Watson SYSCTL_VNET_INT(_net_inet_ip_portrange, OID_AUTO, randomtime, CTLFLAG_RW, 176eddfbb76SRobert Watson &VNET_NAME(ipport_randomtime), 0, 1778b615593SMarko Zec "Minimum time to keep sequental port " 1786ee79c59SMaxim Konovalov "allocation before switching to a random one"); 1790312fbe9SPoul-Henning Kamp 180c3229e05SDavid Greenman /* 181c3229e05SDavid Greenman * in_pcb.c: manage the Protocol Control Blocks. 182c3229e05SDavid Greenman * 183de35559fSRobert Watson * NOTE: It is assumed that most of these functions will be called with 184de35559fSRobert Watson * the pcbinfo lock held, and often, the inpcb lock held, as these utility 185de35559fSRobert Watson * functions often modify hash chains or addresses in pcbs. 186c3229e05SDavid Greenman */ 187c3229e05SDavid Greenman 188c3229e05SDavid Greenman /* 1899bcd427bSRobert Watson * Initialize an inpcbinfo -- we should be able to reduce the number of 1909bcd427bSRobert Watson * arguments in time. 1919bcd427bSRobert Watson */ 1929bcd427bSRobert Watson void 1939bcd427bSRobert Watson in_pcbinfo_init(struct inpcbinfo *pcbinfo, const char *name, 1949bcd427bSRobert Watson struct inpcbhead *listhead, int hash_nelements, int porthash_nelements, 1959bcd427bSRobert Watson char *inpcbzone_name, uma_init inpcbzone_init, uma_fini inpcbzone_fini, 1969bcd427bSRobert Watson uint32_t inpcbzone_flags) 1979bcd427bSRobert Watson { 1989bcd427bSRobert Watson 1999bcd427bSRobert Watson INP_INFO_LOCK_INIT(pcbinfo, name); 2009bcd427bSRobert Watson #ifdef VIMAGE 2019bcd427bSRobert Watson pcbinfo->ipi_vnet = curvnet; 2029bcd427bSRobert Watson #endif 2039bcd427bSRobert Watson pcbinfo->ipi_listhead = listhead; 2049bcd427bSRobert Watson LIST_INIT(pcbinfo->ipi_listhead); 2059bcd427bSRobert Watson pcbinfo->ipi_hashbase = hashinit(hash_nelements, M_PCB, 2069bcd427bSRobert Watson &pcbinfo->ipi_hashmask); 2079bcd427bSRobert Watson pcbinfo->ipi_porthashbase = hashinit(porthash_nelements, M_PCB, 2089bcd427bSRobert Watson &pcbinfo->ipi_porthashmask); 2099bcd427bSRobert Watson pcbinfo->ipi_zone = uma_zcreate(inpcbzone_name, sizeof(struct inpcb), 2109bcd427bSRobert Watson NULL, NULL, inpcbzone_init, inpcbzone_fini, UMA_ALIGN_PTR, 2119bcd427bSRobert Watson inpcbzone_flags); 2129bcd427bSRobert Watson uma_zone_set_max(pcbinfo->ipi_zone, maxsockets); 2139bcd427bSRobert Watson } 2149bcd427bSRobert Watson 2159bcd427bSRobert Watson /* 2169bcd427bSRobert Watson * Destroy an inpcbinfo. 2179bcd427bSRobert Watson */ 2189bcd427bSRobert Watson void 2199bcd427bSRobert Watson in_pcbinfo_destroy(struct inpcbinfo *pcbinfo) 2209bcd427bSRobert Watson { 2219bcd427bSRobert Watson 2229bcd427bSRobert Watson hashdestroy(pcbinfo->ipi_hashbase, M_PCB, pcbinfo->ipi_hashmask); 2239bcd427bSRobert Watson hashdestroy(pcbinfo->ipi_porthashbase, M_PCB, 2249bcd427bSRobert Watson pcbinfo->ipi_porthashmask); 2259bcd427bSRobert Watson uma_zdestroy(pcbinfo->ipi_zone); 2269bcd427bSRobert Watson INP_INFO_LOCK_DESTROY(pcbinfo); 2279bcd427bSRobert Watson } 2289bcd427bSRobert Watson 2299bcd427bSRobert Watson /* 230c3229e05SDavid Greenman * Allocate a PCB and associate it with the socket. 231d915b280SStephan Uphoff * On success return with the PCB locked. 232c3229e05SDavid Greenman */ 233df8bae1dSRodney W. Grimes int 234d915b280SStephan Uphoff in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo) 235df8bae1dSRodney W. Grimes { 236136d4f1cSRobert Watson struct inpcb *inp; 23713cf67f3SHajimu UMEMOTO int error; 238a557af22SRobert Watson 23959daba27SSam Leffler INP_INFO_WLOCK_ASSERT(pcbinfo); 240a557af22SRobert Watson error = 0; 241d915b280SStephan Uphoff inp = uma_zalloc(pcbinfo->ipi_zone, M_NOWAIT); 242df8bae1dSRodney W. Grimes if (inp == NULL) 243df8bae1dSRodney W. Grimes return (ENOBUFS); 244d915b280SStephan Uphoff bzero(inp, inp_zero_size); 24515bd2b43SDavid Greenman inp->inp_pcbinfo = pcbinfo; 246df8bae1dSRodney W. Grimes inp->inp_socket = so; 24786d02c5cSBjoern A. Zeeb inp->inp_cred = crhold(so->so_cred); 2488b07e49aSJulian Elischer inp->inp_inc.inc_fibnum = so->so_fibnum; 249a557af22SRobert Watson #ifdef MAC 25030d239bcSRobert Watson error = mac_inpcb_init(inp, M_NOWAIT); 251a557af22SRobert Watson if (error != 0) 252a557af22SRobert Watson goto out; 25330d239bcSRobert Watson mac_inpcb_create(so, inp); 254a557af22SRobert Watson #endif 255b2630c29SGeorge V. Neville-Neil #ifdef IPSEC 25613cf67f3SHajimu UMEMOTO error = ipsec_init_policy(so, &inp->inp_sp); 2570bffde27SRobert Watson if (error != 0) { 2580bffde27SRobert Watson #ifdef MAC 2590bffde27SRobert Watson mac_inpcb_destroy(inp); 2600bffde27SRobert Watson #endif 261a557af22SRobert Watson goto out; 2620bffde27SRobert Watson } 263b2630c29SGeorge V. Neville-Neil #endif /*IPSEC*/ 264e3fd5ffdSRobert Watson #ifdef INET6 265340c35deSJonathan Lemon if (INP_SOCKAF(so) == AF_INET6) { 266340c35deSJonathan Lemon inp->inp_vflag |= INP_IPV6PROTO; 267603724d3SBjoern A. Zeeb if (V_ip6_v6only) 26833841545SHajimu UMEMOTO inp->inp_flags |= IN6P_IPV6_V6ONLY; 269340c35deSJonathan Lemon } 27075daea93SPaul Saab #endif 271712fc218SRobert Watson LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list); 2723d4d47f3SGarrett Wollman pcbinfo->ipi_count++; 273df8bae1dSRodney W. Grimes so->so_pcb = (caddr_t)inp; 27433841545SHajimu UMEMOTO #ifdef INET6 275603724d3SBjoern A. Zeeb if (V_ip6_auto_flowlabel) 27633841545SHajimu UMEMOTO inp->inp_flags |= IN6P_AUTOFLOWLABEL; 27733841545SHajimu UMEMOTO #endif 2788501a69cSRobert Watson INP_WLOCK(inp); 279d915b280SStephan Uphoff inp->inp_gencnt = ++pcbinfo->ipi_gencnt; 28028696211SRobert Watson inp->inp_refcount = 1; /* Reference from the inpcbinfo */ 281b2630c29SGeorge V. Neville-Neil #if defined(IPSEC) || defined(MAC) 282a557af22SRobert Watson out: 28386d02c5cSBjoern A. Zeeb if (error != 0) { 28486d02c5cSBjoern A. Zeeb crfree(inp->inp_cred); 285a557af22SRobert Watson uma_zfree(pcbinfo->ipi_zone, inp); 28686d02c5cSBjoern A. Zeeb } 287a557af22SRobert Watson #endif 288a557af22SRobert Watson return (error); 289df8bae1dSRodney W. Grimes } 290df8bae1dSRodney W. Grimes 291df8bae1dSRodney W. Grimes int 292136d4f1cSRobert Watson in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred) 293df8bae1dSRodney W. Grimes { 2944b932371SIan Dowse int anonport, error; 2954b932371SIan Dowse 2961b73ca0bSSam Leffler INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo); 2978501a69cSRobert Watson INP_WLOCK_ASSERT(inp); 29859daba27SSam Leffler 2994b932371SIan Dowse if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) 3004b932371SIan Dowse return (EINVAL); 3014b932371SIan Dowse anonport = inp->inp_lport == 0 && (nam == NULL || 3024b932371SIan Dowse ((struct sockaddr_in *)nam)->sin_port == 0); 3034b932371SIan Dowse error = in_pcbbind_setup(inp, nam, &inp->inp_laddr.s_addr, 304b0330ed9SPawel Jakub Dawidek &inp->inp_lport, cred); 3054b932371SIan Dowse if (error) 3064b932371SIan Dowse return (error); 3074b932371SIan Dowse if (in_pcbinshash(inp) != 0) { 3084b932371SIan Dowse inp->inp_laddr.s_addr = INADDR_ANY; 3094b932371SIan Dowse inp->inp_lport = 0; 3104b932371SIan Dowse return (EAGAIN); 3114b932371SIan Dowse } 3124b932371SIan Dowse if (anonport) 3134b932371SIan Dowse inp->inp_flags |= INP_ANONPORT; 3144b932371SIan Dowse return (0); 3154b932371SIan Dowse } 3164b932371SIan Dowse 317*efc76f72SBjoern A. Zeeb #if defined(INET) || defined(INET6) 318*efc76f72SBjoern A. Zeeb int 319*efc76f72SBjoern A. Zeeb in_pcb_lport(struct inpcb *inp, struct in_addr *laddrp, u_short *lportp, 320*efc76f72SBjoern A. Zeeb struct ucred *cred, int wild) 321*efc76f72SBjoern A. Zeeb { 322*efc76f72SBjoern A. Zeeb struct inpcbinfo *pcbinfo; 323*efc76f72SBjoern A. Zeeb struct inpcb *tmpinp; 324*efc76f72SBjoern A. Zeeb unsigned short *lastport; 325*efc76f72SBjoern A. Zeeb int count, dorandom, error; 326*efc76f72SBjoern A. Zeeb u_short aux, first, last, lport; 327*efc76f72SBjoern A. Zeeb #ifdef INET 328*efc76f72SBjoern A. Zeeb struct in_addr laddr; 329*efc76f72SBjoern A. Zeeb #endif 330*efc76f72SBjoern A. Zeeb 331*efc76f72SBjoern A. Zeeb pcbinfo = inp->inp_pcbinfo; 332*efc76f72SBjoern A. Zeeb 333*efc76f72SBjoern A. Zeeb /* 334*efc76f72SBjoern A. Zeeb * Because no actual state changes occur here, a global write lock on 335*efc76f72SBjoern A. Zeeb * the pcbinfo isn't required. 336*efc76f72SBjoern A. Zeeb */ 337*efc76f72SBjoern A. Zeeb INP_INFO_LOCK_ASSERT(pcbinfo); 338*efc76f72SBjoern A. Zeeb INP_LOCK_ASSERT(inp); 339*efc76f72SBjoern A. Zeeb 340*efc76f72SBjoern A. Zeeb if (inp->inp_flags & INP_HIGHPORT) { 341*efc76f72SBjoern A. Zeeb first = V_ipport_hifirstauto; /* sysctl */ 342*efc76f72SBjoern A. Zeeb last = V_ipport_hilastauto; 343*efc76f72SBjoern A. Zeeb lastport = &pcbinfo->ipi_lasthi; 344*efc76f72SBjoern A. Zeeb } else if (inp->inp_flags & INP_LOWPORT) { 345*efc76f72SBjoern A. Zeeb error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0); 346*efc76f72SBjoern A. Zeeb if (error) 347*efc76f72SBjoern A. Zeeb return (error); 348*efc76f72SBjoern A. Zeeb first = V_ipport_lowfirstauto; /* 1023 */ 349*efc76f72SBjoern A. Zeeb last = V_ipport_lowlastauto; /* 600 */ 350*efc76f72SBjoern A. Zeeb lastport = &pcbinfo->ipi_lastlow; 351*efc76f72SBjoern A. Zeeb } else { 352*efc76f72SBjoern A. Zeeb first = V_ipport_firstauto; /* sysctl */ 353*efc76f72SBjoern A. Zeeb last = V_ipport_lastauto; 354*efc76f72SBjoern A. Zeeb lastport = &pcbinfo->ipi_lastport; 355*efc76f72SBjoern A. Zeeb } 356*efc76f72SBjoern A. Zeeb /* 357*efc76f72SBjoern A. Zeeb * For UDP, use random port allocation as long as the user 358*efc76f72SBjoern A. Zeeb * allows it. For TCP (and as of yet unknown) connections, 359*efc76f72SBjoern A. Zeeb * use random port allocation only if the user allows it AND 360*efc76f72SBjoern A. Zeeb * ipport_tick() allows it. 361*efc76f72SBjoern A. Zeeb */ 362*efc76f72SBjoern A. Zeeb if (V_ipport_randomized && 363*efc76f72SBjoern A. Zeeb (!V_ipport_stoprandom || pcbinfo == &V_udbinfo)) 364*efc76f72SBjoern A. Zeeb dorandom = 1; 365*efc76f72SBjoern A. Zeeb else 366*efc76f72SBjoern A. Zeeb dorandom = 0; 367*efc76f72SBjoern A. Zeeb /* 368*efc76f72SBjoern A. Zeeb * It makes no sense to do random port allocation if 369*efc76f72SBjoern A. Zeeb * we have the only port available. 370*efc76f72SBjoern A. Zeeb */ 371*efc76f72SBjoern A. Zeeb if (first == last) 372*efc76f72SBjoern A. Zeeb dorandom = 0; 373*efc76f72SBjoern A. Zeeb /* Make sure to not include UDP packets in the count. */ 374*efc76f72SBjoern A. Zeeb if (pcbinfo != &V_udbinfo) 375*efc76f72SBjoern A. Zeeb V_ipport_tcpallocs++; 376*efc76f72SBjoern A. Zeeb /* 377*efc76f72SBjoern A. Zeeb * Instead of having two loops further down counting up or down 378*efc76f72SBjoern A. Zeeb * make sure that first is always <= last and go with only one 379*efc76f72SBjoern A. Zeeb * code path implementing all logic. 380*efc76f72SBjoern A. Zeeb */ 381*efc76f72SBjoern A. Zeeb if (first > last) { 382*efc76f72SBjoern A. Zeeb aux = first; 383*efc76f72SBjoern A. Zeeb first = last; 384*efc76f72SBjoern A. Zeeb last = aux; 385*efc76f72SBjoern A. Zeeb } 386*efc76f72SBjoern A. Zeeb 387*efc76f72SBjoern A. Zeeb #ifdef INET 388*efc76f72SBjoern A. Zeeb /* Make the compiler happy. */ 389*efc76f72SBjoern A. Zeeb laddr.s_addr = 0; 390*efc76f72SBjoern A. Zeeb if ((inp->inp_vflag & INP_IPV4) != 0) { 391*efc76f72SBjoern A. Zeeb KASSERT(laddrp != NULL, ("%s: laddrp NULL for v4 inp %p", 392*efc76f72SBjoern A. Zeeb __func__, inp)); 393*efc76f72SBjoern A. Zeeb laddr = *laddrp; 394*efc76f72SBjoern A. Zeeb } 395*efc76f72SBjoern A. Zeeb #endif 396*efc76f72SBjoern A. Zeeb lport = *lportp; 397*efc76f72SBjoern A. Zeeb 398*efc76f72SBjoern A. Zeeb if (dorandom) 399*efc76f72SBjoern A. Zeeb *lastport = first + (arc4random() % (last - first)); 400*efc76f72SBjoern A. Zeeb 401*efc76f72SBjoern A. Zeeb count = last - first; 402*efc76f72SBjoern A. Zeeb 403*efc76f72SBjoern A. Zeeb do { 404*efc76f72SBjoern A. Zeeb if (count-- < 0) /* completely used? */ 405*efc76f72SBjoern A. Zeeb return (EADDRNOTAVAIL); 406*efc76f72SBjoern A. Zeeb ++*lastport; 407*efc76f72SBjoern A. Zeeb if (*lastport < first || *lastport > last) 408*efc76f72SBjoern A. Zeeb *lastport = first; 409*efc76f72SBjoern A. Zeeb lport = htons(*lastport); 410*efc76f72SBjoern A. Zeeb 411*efc76f72SBjoern A. Zeeb #ifdef INET6 412*efc76f72SBjoern A. Zeeb if ((inp->inp_vflag & INP_IPV6) != 0) 413*efc76f72SBjoern A. Zeeb tmpinp = in6_pcblookup_local(pcbinfo, 414*efc76f72SBjoern A. Zeeb &inp->in6p_laddr, lport, wild, cred); 415*efc76f72SBjoern A. Zeeb #endif 416*efc76f72SBjoern A. Zeeb #if defined(INET) && defined(INET6) 417*efc76f72SBjoern A. Zeeb else 418*efc76f72SBjoern A. Zeeb #endif 419*efc76f72SBjoern A. Zeeb #ifdef INET 420*efc76f72SBjoern A. Zeeb tmpinp = in_pcblookup_local(pcbinfo, laddr, 421*efc76f72SBjoern A. Zeeb lport, wild, cred); 422*efc76f72SBjoern A. Zeeb #endif 423*efc76f72SBjoern A. Zeeb } while (tmpinp != NULL); 424*efc76f72SBjoern A. Zeeb 425*efc76f72SBjoern A. Zeeb #ifdef INET 426*efc76f72SBjoern A. Zeeb if ((inp->inp_vflag & INP_IPV4) != 0) 427*efc76f72SBjoern A. Zeeb laddrp->s_addr = laddr.s_addr; 428*efc76f72SBjoern A. Zeeb #endif 429*efc76f72SBjoern A. Zeeb *lportp = lport; 430*efc76f72SBjoern A. Zeeb 431*efc76f72SBjoern A. Zeeb return (0); 432*efc76f72SBjoern A. Zeeb } 433*efc76f72SBjoern A. Zeeb #endif /* INET || INET6 */ 434*efc76f72SBjoern A. Zeeb 4354b932371SIan Dowse /* 4364b932371SIan Dowse * Set up a bind operation on a PCB, performing port allocation 4374b932371SIan Dowse * as required, but do not actually modify the PCB. Callers can 4384b932371SIan Dowse * either complete the bind by setting inp_laddr/inp_lport and 4394b932371SIan Dowse * calling in_pcbinshash(), or they can just use the resulting 4404b932371SIan Dowse * port and address to authorise the sending of a once-off packet. 4414b932371SIan Dowse * 4424b932371SIan Dowse * On error, the values of *laddrp and *lportp are not changed. 4434b932371SIan Dowse */ 4444b932371SIan Dowse int 445136d4f1cSRobert Watson in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp, 446136d4f1cSRobert Watson u_short *lportp, struct ucred *cred) 4474b932371SIan Dowse { 4484b932371SIan Dowse struct socket *so = inp->inp_socket; 44915bd2b43SDavid Greenman struct sockaddr_in *sin; 450c3229e05SDavid Greenman struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 4514b932371SIan Dowse struct in_addr laddr; 452df8bae1dSRodney W. Grimes u_short lport = 0; 4534cc20ab1SSeigo Tanimura int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); 454413628a7SBjoern A. Zeeb int error; 455df8bae1dSRodney W. Grimes 4568501a69cSRobert Watson /* 45772bed082SRobert Watson * Because no actual state changes occur here, a global write lock on 45872bed082SRobert Watson * the pcbinfo isn't required. 4598501a69cSRobert Watson */ 4608501a69cSRobert Watson INP_INFO_LOCK_ASSERT(pcbinfo); 46159daba27SSam Leffler INP_LOCK_ASSERT(inp); 46259daba27SSam Leffler 463603724d3SBjoern A. Zeeb if (TAILQ_EMPTY(&V_in_ifaddrhead)) /* XXX broken! */ 464df8bae1dSRodney W. Grimes return (EADDRNOTAVAIL); 4654b932371SIan Dowse laddr.s_addr = *laddrp; 4664b932371SIan Dowse if (nam != NULL && laddr.s_addr != INADDR_ANY) 467df8bae1dSRodney W. Grimes return (EINVAL); 468c3229e05SDavid Greenman if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) 469421d8aa6SBjoern A. Zeeb wild = INPLOOKUP_WILDCARD; 4707c2f3cb9SJamie Gritton if (nam == NULL) { 4717c2f3cb9SJamie Gritton if ((error = prison_local_ip4(cred, &laddr)) != 0) 4727c2f3cb9SJamie Gritton return (error); 4737c2f3cb9SJamie Gritton } else { 47457bf258eSGarrett Wollman sin = (struct sockaddr_in *)nam; 47557bf258eSGarrett Wollman if (nam->sa_len != sizeof (*sin)) 476df8bae1dSRodney W. Grimes return (EINVAL); 477df8bae1dSRodney W. Grimes #ifdef notdef 478df8bae1dSRodney W. Grimes /* 479df8bae1dSRodney W. Grimes * We should check the family, but old programs 480df8bae1dSRodney W. Grimes * incorrectly fail to initialize it. 481df8bae1dSRodney W. Grimes */ 482df8bae1dSRodney W. Grimes if (sin->sin_family != AF_INET) 483df8bae1dSRodney W. Grimes return (EAFNOSUPPORT); 484df8bae1dSRodney W. Grimes #endif 485b89e82ddSJamie Gritton error = prison_local_ip4(cred, &sin->sin_addr); 486b89e82ddSJamie Gritton if (error) 487b89e82ddSJamie Gritton return (error); 4884b932371SIan Dowse if (sin->sin_port != *lportp) { 4894b932371SIan Dowse /* Don't allow the port to change. */ 4904b932371SIan Dowse if (*lportp != 0) 4914b932371SIan Dowse return (EINVAL); 492df8bae1dSRodney W. Grimes lport = sin->sin_port; 4934b932371SIan Dowse } 4944b932371SIan Dowse /* NB: lport is left as 0 if the port isn't being changed. */ 495df8bae1dSRodney W. Grimes if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { 496df8bae1dSRodney W. Grimes /* 497df8bae1dSRodney W. Grimes * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; 498df8bae1dSRodney W. Grimes * allow complete duplication of binding if 499df8bae1dSRodney W. Grimes * SO_REUSEPORT is set, or if SO_REUSEADDR is set 500df8bae1dSRodney W. Grimes * and a multicast address is bound on both 501df8bae1dSRodney W. Grimes * new and duplicated sockets. 502df8bae1dSRodney W. Grimes */ 503df8bae1dSRodney W. Grimes if (so->so_options & SO_REUSEADDR) 504df8bae1dSRodney W. Grimes reuseport = SO_REUSEADDR|SO_REUSEPORT; 505df8bae1dSRodney W. Grimes } else if (sin->sin_addr.s_addr != INADDR_ANY) { 506df8bae1dSRodney W. Grimes sin->sin_port = 0; /* yech... */ 50783103a73SAndrew R. Reiter bzero(&sin->sin_zero, sizeof(sin->sin_zero)); 5084209e01aSAdrian Chadd /* 5094209e01aSAdrian Chadd * Is the address a local IP address? 510f44270e7SPawel Jakub Dawidek * If INP_BINDANY is set, then the socket may be bound 5118696873dSAdrian Chadd * to any endpoint address, local or not. 5124209e01aSAdrian Chadd */ 513f44270e7SPawel Jakub Dawidek if ((inp->inp_flags & INP_BINDANY) == 0 && 5148896f83aSRobert Watson ifa_ifwithaddr_check((struct sockaddr *)sin) == 0) 515df8bae1dSRodney W. Grimes return (EADDRNOTAVAIL); 516df8bae1dSRodney W. Grimes } 5174b932371SIan Dowse laddr = sin->sin_addr; 518df8bae1dSRodney W. Grimes if (lport) { 519df8bae1dSRodney W. Grimes struct inpcb *t; 520ae0e7143SRobert Watson struct tcptw *tw; 521ae0e7143SRobert Watson 522df8bae1dSRodney W. Grimes /* GROSS */ 523603724d3SBjoern A. Zeeb if (ntohs(lport) <= V_ipport_reservedhigh && 524603724d3SBjoern A. Zeeb ntohs(lport) >= V_ipport_reservedlow && 525acd3428bSRobert Watson priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 52632f9753cSRobert Watson 0)) 5272469dd60SGarrett Wollman return (EACCES); 528835d4b89SPawel Jakub Dawidek if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) && 52986d02c5cSBjoern A. Zeeb priv_check_cred(inp->inp_cred, 53032f9753cSRobert Watson PRIV_NETINET_REUSEPORT, 0) != 0) { 531078b7042SBjoern A. Zeeb t = in_pcblookup_local(pcbinfo, sin->sin_addr, 532413628a7SBjoern A. Zeeb lport, INPLOOKUP_WILDCARD, cred); 533340c35deSJonathan Lemon /* 534340c35deSJonathan Lemon * XXX 535340c35deSJonathan Lemon * This entire block sorely needs a rewrite. 536340c35deSJonathan Lemon */ 5374cc20ab1SSeigo Tanimura if (t && 538ad71fe3cSRobert Watson ((t->inp_flags & INP_TIMEWAIT) == 0) && 5394658dc83SYaroslav Tykhiy (so->so_type != SOCK_STREAM || 5404658dc83SYaroslav Tykhiy ntohl(t->inp_faddr.s_addr) == INADDR_ANY) && 5414cc20ab1SSeigo Tanimura (ntohl(sin->sin_addr.s_addr) != INADDR_ANY || 54252b65dbeSBill Fenner ntohl(t->inp_laddr.s_addr) != INADDR_ANY || 54352b65dbeSBill Fenner (t->inp_socket->so_options & 54452b65dbeSBill Fenner SO_REUSEPORT) == 0) && 54586d02c5cSBjoern A. Zeeb (inp->inp_cred->cr_uid != 54686d02c5cSBjoern A. Zeeb t->inp_cred->cr_uid)) 5474049a042SGuido van Rooij return (EADDRINUSE); 5484049a042SGuido van Rooij } 549c3229e05SDavid Greenman t = in_pcblookup_local(pcbinfo, sin->sin_addr, 550413628a7SBjoern A. Zeeb lport, wild, cred); 551ad71fe3cSRobert Watson if (t && (t->inp_flags & INP_TIMEWAIT)) { 552ae0e7143SRobert Watson /* 553ae0e7143SRobert Watson * XXXRW: If an incpb has had its timewait 554ae0e7143SRobert Watson * state recycled, we treat the address as 555ae0e7143SRobert Watson * being in use (for now). This is better 556ae0e7143SRobert Watson * than a panic, but not desirable. 557ae0e7143SRobert Watson */ 558ae0e7143SRobert Watson tw = intotw(inp); 559ae0e7143SRobert Watson if (tw == NULL || 560ae0e7143SRobert Watson (reuseport & tw->tw_so_options) == 0) 561340c35deSJonathan Lemon return (EADDRINUSE); 562ae0e7143SRobert Watson } else if (t && 5634cc20ab1SSeigo Tanimura (reuseport & t->inp_socket->so_options) == 0) { 564e3fd5ffdSRobert Watson #ifdef INET6 56533841545SHajimu UMEMOTO if (ntohl(sin->sin_addr.s_addr) != 566cfa1ca9dSYoshinobu Inoue INADDR_ANY || 567cfa1ca9dSYoshinobu Inoue ntohl(t->inp_laddr.s_addr) != 568cfa1ca9dSYoshinobu Inoue INADDR_ANY || 569cfa1ca9dSYoshinobu Inoue INP_SOCKAF(so) == 570cfa1ca9dSYoshinobu Inoue INP_SOCKAF(t->inp_socket)) 571e3fd5ffdSRobert Watson #endif 572df8bae1dSRodney W. Grimes return (EADDRINUSE); 573df8bae1dSRodney W. Grimes } 574cfa1ca9dSYoshinobu Inoue } 575df8bae1dSRodney W. Grimes } 5764b932371SIan Dowse if (*lportp != 0) 5774b932371SIan Dowse lport = *lportp; 57833b3ac06SPeter Wemm if (lport == 0) { 579*efc76f72SBjoern A. Zeeb error = in_pcb_lport(inp, &laddr, &lport, cred, wild); 580*efc76f72SBjoern A. Zeeb if (error != 0) 581*efc76f72SBjoern A. Zeeb return (error); 58233b3ac06SPeter Wemm 58333b3ac06SPeter Wemm } 5844b932371SIan Dowse *laddrp = laddr.s_addr; 5854b932371SIan Dowse *lportp = lport; 586df8bae1dSRodney W. Grimes return (0); 587df8bae1dSRodney W. Grimes } 588df8bae1dSRodney W. Grimes 589999f1343SGarrett Wollman /* 5905200e00eSIan Dowse * Connect from a socket to a specified address. 5915200e00eSIan Dowse * Both address and port must be specified in argument sin. 5925200e00eSIan Dowse * If don't have a local address for this socket yet, 5935200e00eSIan Dowse * then pick one. 594999f1343SGarrett Wollman */ 595999f1343SGarrett Wollman int 596136d4f1cSRobert Watson in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred) 597999f1343SGarrett Wollman { 5985200e00eSIan Dowse u_short lport, fport; 5995200e00eSIan Dowse in_addr_t laddr, faddr; 6005200e00eSIan Dowse int anonport, error; 601df8bae1dSRodney W. Grimes 60227f74fd0SRobert Watson INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo); 6038501a69cSRobert Watson INP_WLOCK_ASSERT(inp); 60427f74fd0SRobert Watson 6055200e00eSIan Dowse lport = inp->inp_lport; 6065200e00eSIan Dowse laddr = inp->inp_laddr.s_addr; 6075200e00eSIan Dowse anonport = (lport == 0); 6085200e00eSIan Dowse error = in_pcbconnect_setup(inp, nam, &laddr, &lport, &faddr, &fport, 609b0330ed9SPawel Jakub Dawidek NULL, cred); 6105200e00eSIan Dowse if (error) 6115200e00eSIan Dowse return (error); 6125200e00eSIan Dowse 6135200e00eSIan Dowse /* Do the initial binding of the local address if required. */ 6145200e00eSIan Dowse if (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0) { 6155200e00eSIan Dowse inp->inp_lport = lport; 6165200e00eSIan Dowse inp->inp_laddr.s_addr = laddr; 6175200e00eSIan Dowse if (in_pcbinshash(inp) != 0) { 6185200e00eSIan Dowse inp->inp_laddr.s_addr = INADDR_ANY; 6195200e00eSIan Dowse inp->inp_lport = 0; 6205200e00eSIan Dowse return (EAGAIN); 6215200e00eSIan Dowse } 6225200e00eSIan Dowse } 6235200e00eSIan Dowse 6245200e00eSIan Dowse /* Commit the remaining changes. */ 6255200e00eSIan Dowse inp->inp_lport = lport; 6265200e00eSIan Dowse inp->inp_laddr.s_addr = laddr; 6275200e00eSIan Dowse inp->inp_faddr.s_addr = faddr; 6285200e00eSIan Dowse inp->inp_fport = fport; 6295200e00eSIan Dowse in_pcbrehash(inp); 6302cb64cb2SGeorge V. Neville-Neil 6315200e00eSIan Dowse if (anonport) 6325200e00eSIan Dowse inp->inp_flags |= INP_ANONPORT; 6335200e00eSIan Dowse return (0); 6345200e00eSIan Dowse } 6355200e00eSIan Dowse 6365200e00eSIan Dowse /* 6370895aec3SBjoern A. Zeeb * Do proper source address selection on an unbound socket in case 6380895aec3SBjoern A. Zeeb * of connect. Take jails into account as well. 6390895aec3SBjoern A. Zeeb */ 6400895aec3SBjoern A. Zeeb static int 6410895aec3SBjoern A. Zeeb in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr, 6420895aec3SBjoern A. Zeeb struct ucred *cred) 6430895aec3SBjoern A. Zeeb { 6440895aec3SBjoern A. Zeeb struct ifaddr *ifa; 6450895aec3SBjoern A. Zeeb struct sockaddr *sa; 6460895aec3SBjoern A. Zeeb struct sockaddr_in *sin; 6470895aec3SBjoern A. Zeeb struct route sro; 6480895aec3SBjoern A. Zeeb int error; 6490895aec3SBjoern A. Zeeb 650413628a7SBjoern A. Zeeb KASSERT(laddr != NULL, ("%s: laddr NULL", __func__)); 6510895aec3SBjoern A. Zeeb 652592bcae8SBjoern A. Zeeb /* 653592bcae8SBjoern A. Zeeb * Bypass source address selection and use the primary jail IP 654592bcae8SBjoern A. Zeeb * if requested. 655592bcae8SBjoern A. Zeeb */ 656592bcae8SBjoern A. Zeeb if (cred != NULL && !prison_saddrsel_ip4(cred, laddr)) 657592bcae8SBjoern A. Zeeb return (0); 658592bcae8SBjoern A. Zeeb 6590895aec3SBjoern A. Zeeb error = 0; 6600895aec3SBjoern A. Zeeb bzero(&sro, sizeof(sro)); 6610895aec3SBjoern A. Zeeb 6620895aec3SBjoern A. Zeeb sin = (struct sockaddr_in *)&sro.ro_dst; 6630895aec3SBjoern A. Zeeb sin->sin_family = AF_INET; 6640895aec3SBjoern A. Zeeb sin->sin_len = sizeof(struct sockaddr_in); 6650895aec3SBjoern A. Zeeb sin->sin_addr.s_addr = faddr->s_addr; 6660895aec3SBjoern A. Zeeb 6670895aec3SBjoern A. Zeeb /* 6680895aec3SBjoern A. Zeeb * If route is known our src addr is taken from the i/f, 6690895aec3SBjoern A. Zeeb * else punt. 6700895aec3SBjoern A. Zeeb * 6710895aec3SBjoern A. Zeeb * Find out route to destination. 6720895aec3SBjoern A. Zeeb */ 6730895aec3SBjoern A. Zeeb if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0) 6746e6b3f7cSQing Li in_rtalloc_ign(&sro, 0, inp->inp_inc.inc_fibnum); 6750895aec3SBjoern A. Zeeb 6760895aec3SBjoern A. Zeeb /* 6770895aec3SBjoern A. Zeeb * If we found a route, use the address corresponding to 6780895aec3SBjoern A. Zeeb * the outgoing interface. 6790895aec3SBjoern A. Zeeb * 6800895aec3SBjoern A. Zeeb * Otherwise assume faddr is reachable on a directly connected 6810895aec3SBjoern A. Zeeb * network and try to find a corresponding interface to take 6820895aec3SBjoern A. Zeeb * the source address from. 6830895aec3SBjoern A. Zeeb */ 6840895aec3SBjoern A. Zeeb if (sro.ro_rt == NULL || sro.ro_rt->rt_ifp == NULL) { 6858c0fec80SRobert Watson struct in_ifaddr *ia; 6860895aec3SBjoern A. Zeeb struct ifnet *ifp; 6870895aec3SBjoern A. Zeeb 6880895aec3SBjoern A. Zeeb ia = ifatoia(ifa_ifwithdstaddr((struct sockaddr *)sin)); 6890895aec3SBjoern A. Zeeb if (ia == NULL) 6900ed6142bSQing Li ia = ifatoia(ifa_ifwithnet((struct sockaddr *)sin, 0)); 6910895aec3SBjoern A. Zeeb if (ia == NULL) { 6920895aec3SBjoern A. Zeeb error = ENETUNREACH; 6930895aec3SBjoern A. Zeeb goto done; 6940895aec3SBjoern A. Zeeb } 6950895aec3SBjoern A. Zeeb 6960304c731SJamie Gritton if (cred == NULL || !prison_flag(cred, PR_IP4)) { 6970895aec3SBjoern A. Zeeb laddr->s_addr = ia->ia_addr.sin_addr.s_addr; 6988c0fec80SRobert Watson ifa_free(&ia->ia_ifa); 6990895aec3SBjoern A. Zeeb goto done; 7000895aec3SBjoern A. Zeeb } 7010895aec3SBjoern A. Zeeb 7020895aec3SBjoern A. Zeeb ifp = ia->ia_ifp; 7038c0fec80SRobert Watson ifa_free(&ia->ia_ifa); 7040895aec3SBjoern A. Zeeb ia = NULL; 7059317b04eSRobert Watson IF_ADDR_LOCK(ifp); 7060895aec3SBjoern A. Zeeb TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 7070895aec3SBjoern A. Zeeb 7080895aec3SBjoern A. Zeeb sa = ifa->ifa_addr; 7090895aec3SBjoern A. Zeeb if (sa->sa_family != AF_INET) 7100895aec3SBjoern A. Zeeb continue; 7110895aec3SBjoern A. Zeeb sin = (struct sockaddr_in *)sa; 712b89e82ddSJamie Gritton if (prison_check_ip4(cred, &sin->sin_addr) == 0) { 7130895aec3SBjoern A. Zeeb ia = (struct in_ifaddr *)ifa; 7140895aec3SBjoern A. Zeeb break; 7150895aec3SBjoern A. Zeeb } 7160895aec3SBjoern A. Zeeb } 7170895aec3SBjoern A. Zeeb if (ia != NULL) { 7180895aec3SBjoern A. Zeeb laddr->s_addr = ia->ia_addr.sin_addr.s_addr; 7199317b04eSRobert Watson IF_ADDR_UNLOCK(ifp); 7200895aec3SBjoern A. Zeeb goto done; 7210895aec3SBjoern A. Zeeb } 7229317b04eSRobert Watson IF_ADDR_UNLOCK(ifp); 7230895aec3SBjoern A. Zeeb 7240895aec3SBjoern A. Zeeb /* 3. As a last resort return the 'default' jail address. */ 725b89e82ddSJamie Gritton error = prison_get_ip4(cred, laddr); 7260895aec3SBjoern A. Zeeb goto done; 7270895aec3SBjoern A. Zeeb } 7280895aec3SBjoern A. Zeeb 7290895aec3SBjoern A. Zeeb /* 7300895aec3SBjoern A. Zeeb * If the outgoing interface on the route found is not 7310895aec3SBjoern A. Zeeb * a loopback interface, use the address from that interface. 7320895aec3SBjoern A. Zeeb * In case of jails do those three steps: 7330895aec3SBjoern A. Zeeb * 1. check if the interface address belongs to the jail. If so use it. 7340895aec3SBjoern A. Zeeb * 2. check if we have any address on the outgoing interface 7350895aec3SBjoern A. Zeeb * belonging to this jail. If so use it. 7360895aec3SBjoern A. Zeeb * 3. as a last resort return the 'default' jail address. 7370895aec3SBjoern A. Zeeb */ 7380895aec3SBjoern A. Zeeb if ((sro.ro_rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0) { 7398c0fec80SRobert Watson struct in_ifaddr *ia; 7409317b04eSRobert Watson struct ifnet *ifp; 7410895aec3SBjoern A. Zeeb 7420895aec3SBjoern A. Zeeb /* If not jailed, use the default returned. */ 7430304c731SJamie Gritton if (cred == NULL || !prison_flag(cred, PR_IP4)) { 7440895aec3SBjoern A. Zeeb ia = (struct in_ifaddr *)sro.ro_rt->rt_ifa; 7450895aec3SBjoern A. Zeeb laddr->s_addr = ia->ia_addr.sin_addr.s_addr; 7460895aec3SBjoern A. Zeeb goto done; 7470895aec3SBjoern A. Zeeb } 7480895aec3SBjoern A. Zeeb 7490895aec3SBjoern A. Zeeb /* Jailed. */ 7500895aec3SBjoern A. Zeeb /* 1. Check if the iface address belongs to the jail. */ 7510895aec3SBjoern A. Zeeb sin = (struct sockaddr_in *)sro.ro_rt->rt_ifa->ifa_addr; 752b89e82ddSJamie Gritton if (prison_check_ip4(cred, &sin->sin_addr) == 0) { 7530895aec3SBjoern A. Zeeb ia = (struct in_ifaddr *)sro.ro_rt->rt_ifa; 7540895aec3SBjoern A. Zeeb laddr->s_addr = ia->ia_addr.sin_addr.s_addr; 7550895aec3SBjoern A. Zeeb goto done; 7560895aec3SBjoern A. Zeeb } 7570895aec3SBjoern A. Zeeb 7580895aec3SBjoern A. Zeeb /* 7590895aec3SBjoern A. Zeeb * 2. Check if we have any address on the outgoing interface 7600895aec3SBjoern A. Zeeb * belonging to this jail. 7610895aec3SBjoern A. Zeeb */ 7628c0fec80SRobert Watson ia = NULL; 7639317b04eSRobert Watson ifp = sro.ro_rt->rt_ifp; 7649317b04eSRobert Watson IF_ADDR_LOCK(ifp); 7659317b04eSRobert Watson TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 7660895aec3SBjoern A. Zeeb sa = ifa->ifa_addr; 7670895aec3SBjoern A. Zeeb if (sa->sa_family != AF_INET) 7680895aec3SBjoern A. Zeeb continue; 7690895aec3SBjoern A. Zeeb sin = (struct sockaddr_in *)sa; 770b89e82ddSJamie Gritton if (prison_check_ip4(cred, &sin->sin_addr) == 0) { 7710895aec3SBjoern A. Zeeb ia = (struct in_ifaddr *)ifa; 7720895aec3SBjoern A. Zeeb break; 7730895aec3SBjoern A. Zeeb } 7740895aec3SBjoern A. Zeeb } 7750895aec3SBjoern A. Zeeb if (ia != NULL) { 7760895aec3SBjoern A. Zeeb laddr->s_addr = ia->ia_addr.sin_addr.s_addr; 7779317b04eSRobert Watson IF_ADDR_UNLOCK(ifp); 7780895aec3SBjoern A. Zeeb goto done; 7790895aec3SBjoern A. Zeeb } 7809317b04eSRobert Watson IF_ADDR_UNLOCK(ifp); 7810895aec3SBjoern A. Zeeb 7820895aec3SBjoern A. Zeeb /* 3. As a last resort return the 'default' jail address. */ 783b89e82ddSJamie Gritton error = prison_get_ip4(cred, laddr); 7840895aec3SBjoern A. Zeeb goto done; 7850895aec3SBjoern A. Zeeb } 7860895aec3SBjoern A. Zeeb 7870895aec3SBjoern A. Zeeb /* 7880895aec3SBjoern A. Zeeb * The outgoing interface is marked with 'loopback net', so a route 7890895aec3SBjoern A. Zeeb * to ourselves is here. 7900895aec3SBjoern A. Zeeb * Try to find the interface of the destination address and then 7910895aec3SBjoern A. Zeeb * take the address from there. That interface is not necessarily 7920895aec3SBjoern A. Zeeb * a loopback interface. 7930895aec3SBjoern A. Zeeb * In case of jails, check that it is an address of the jail 7940895aec3SBjoern A. Zeeb * and if we cannot find, fall back to the 'default' jail address. 7950895aec3SBjoern A. Zeeb */ 7960895aec3SBjoern A. Zeeb if ((sro.ro_rt->rt_ifp->if_flags & IFF_LOOPBACK) != 0) { 7970895aec3SBjoern A. Zeeb struct sockaddr_in sain; 7988c0fec80SRobert Watson struct in_ifaddr *ia; 7990895aec3SBjoern A. Zeeb 8000895aec3SBjoern A. Zeeb bzero(&sain, sizeof(struct sockaddr_in)); 8010895aec3SBjoern A. Zeeb sain.sin_family = AF_INET; 8020895aec3SBjoern A. Zeeb sain.sin_len = sizeof(struct sockaddr_in); 8030895aec3SBjoern A. Zeeb sain.sin_addr.s_addr = faddr->s_addr; 8040895aec3SBjoern A. Zeeb 8050895aec3SBjoern A. Zeeb ia = ifatoia(ifa_ifwithdstaddr(sintosa(&sain))); 8060895aec3SBjoern A. Zeeb if (ia == NULL) 8070ed6142bSQing Li ia = ifatoia(ifa_ifwithnet(sintosa(&sain), 0)); 808f0bb05fcSQing Li if (ia == NULL) 809f0bb05fcSQing Li ia = ifatoia(ifa_ifwithaddr(sintosa(&sain))); 8100895aec3SBjoern A. Zeeb 8110304c731SJamie Gritton if (cred == NULL || !prison_flag(cred, PR_IP4)) { 8120895aec3SBjoern A. Zeeb if (ia == NULL) { 8130895aec3SBjoern A. Zeeb error = ENETUNREACH; 8140895aec3SBjoern A. Zeeb goto done; 8150895aec3SBjoern A. Zeeb } 8160895aec3SBjoern A. Zeeb laddr->s_addr = ia->ia_addr.sin_addr.s_addr; 8178c0fec80SRobert Watson ifa_free(&ia->ia_ifa); 8180895aec3SBjoern A. Zeeb goto done; 8190895aec3SBjoern A. Zeeb } 8200895aec3SBjoern A. Zeeb 8210895aec3SBjoern A. Zeeb /* Jailed. */ 8220895aec3SBjoern A. Zeeb if (ia != NULL) { 8230895aec3SBjoern A. Zeeb struct ifnet *ifp; 8240895aec3SBjoern A. Zeeb 8250895aec3SBjoern A. Zeeb ifp = ia->ia_ifp; 8268c0fec80SRobert Watson ifa_free(&ia->ia_ifa); 8270895aec3SBjoern A. Zeeb ia = NULL; 8289317b04eSRobert Watson IF_ADDR_LOCK(ifp); 8290895aec3SBjoern A. Zeeb TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 8300895aec3SBjoern A. Zeeb 8310895aec3SBjoern A. Zeeb sa = ifa->ifa_addr; 8320895aec3SBjoern A. Zeeb if (sa->sa_family != AF_INET) 8330895aec3SBjoern A. Zeeb continue; 8340895aec3SBjoern A. Zeeb sin = (struct sockaddr_in *)sa; 835b89e82ddSJamie Gritton if (prison_check_ip4(cred, 836b89e82ddSJamie Gritton &sin->sin_addr) == 0) { 8370895aec3SBjoern A. Zeeb ia = (struct in_ifaddr *)ifa; 8380895aec3SBjoern A. Zeeb break; 8390895aec3SBjoern A. Zeeb } 8400895aec3SBjoern A. Zeeb } 8410895aec3SBjoern A. Zeeb if (ia != NULL) { 8420895aec3SBjoern A. Zeeb laddr->s_addr = ia->ia_addr.sin_addr.s_addr; 8439317b04eSRobert Watson IF_ADDR_UNLOCK(ifp); 8440895aec3SBjoern A. Zeeb goto done; 8450895aec3SBjoern A. Zeeb } 8469317b04eSRobert Watson IF_ADDR_UNLOCK(ifp); 8470895aec3SBjoern A. Zeeb } 8480895aec3SBjoern A. Zeeb 8490895aec3SBjoern A. Zeeb /* 3. As a last resort return the 'default' jail address. */ 850b89e82ddSJamie Gritton error = prison_get_ip4(cred, laddr); 8510895aec3SBjoern A. Zeeb goto done; 8520895aec3SBjoern A. Zeeb } 8530895aec3SBjoern A. Zeeb 8540895aec3SBjoern A. Zeeb done: 8550895aec3SBjoern A. Zeeb if (sro.ro_rt != NULL) 8560895aec3SBjoern A. Zeeb RTFREE(sro.ro_rt); 8570895aec3SBjoern A. Zeeb return (error); 8580895aec3SBjoern A. Zeeb } 8590895aec3SBjoern A. Zeeb 8600895aec3SBjoern A. Zeeb /* 8615200e00eSIan Dowse * Set up for a connect from a socket to the specified address. 8625200e00eSIan Dowse * On entry, *laddrp and *lportp should contain the current local 8635200e00eSIan Dowse * address and port for the PCB; these are updated to the values 8645200e00eSIan Dowse * that should be placed in inp_laddr and inp_lport to complete 8655200e00eSIan Dowse * the connect. 8665200e00eSIan Dowse * 8675200e00eSIan Dowse * On success, *faddrp and *fportp will be set to the remote address 8685200e00eSIan Dowse * and port. These are not updated in the error case. 8695200e00eSIan Dowse * 8705200e00eSIan Dowse * If the operation fails because the connection already exists, 8715200e00eSIan Dowse * *oinpp will be set to the PCB of that connection so that the 8725200e00eSIan Dowse * caller can decide to override it. In all other cases, *oinpp 8735200e00eSIan Dowse * is set to NULL. 8745200e00eSIan Dowse */ 8755200e00eSIan Dowse int 876136d4f1cSRobert Watson in_pcbconnect_setup(struct inpcb *inp, struct sockaddr *nam, 877136d4f1cSRobert Watson in_addr_t *laddrp, u_short *lportp, in_addr_t *faddrp, u_short *fportp, 878136d4f1cSRobert Watson struct inpcb **oinpp, struct ucred *cred) 8795200e00eSIan Dowse { 8805200e00eSIan Dowse struct sockaddr_in *sin = (struct sockaddr_in *)nam; 8815200e00eSIan Dowse struct in_ifaddr *ia; 8825200e00eSIan Dowse struct inpcb *oinp; 883b89e82ddSJamie Gritton struct in_addr laddr, faddr; 8845200e00eSIan Dowse u_short lport, fport; 8855200e00eSIan Dowse int error; 8865200e00eSIan Dowse 8878501a69cSRobert Watson /* 8888501a69cSRobert Watson * Because a global state change doesn't actually occur here, a read 8898501a69cSRobert Watson * lock is sufficient. 8908501a69cSRobert Watson */ 8918501a69cSRobert Watson INP_INFO_LOCK_ASSERT(inp->inp_pcbinfo); 89227f74fd0SRobert Watson INP_LOCK_ASSERT(inp); 89327f74fd0SRobert Watson 8945200e00eSIan Dowse if (oinpp != NULL) 8955200e00eSIan Dowse *oinpp = NULL; 89657bf258eSGarrett Wollman if (nam->sa_len != sizeof (*sin)) 897df8bae1dSRodney W. Grimes return (EINVAL); 898df8bae1dSRodney W. Grimes if (sin->sin_family != AF_INET) 899df8bae1dSRodney W. Grimes return (EAFNOSUPPORT); 900df8bae1dSRodney W. Grimes if (sin->sin_port == 0) 901df8bae1dSRodney W. Grimes return (EADDRNOTAVAIL); 9025200e00eSIan Dowse laddr.s_addr = *laddrp; 9035200e00eSIan Dowse lport = *lportp; 9045200e00eSIan Dowse faddr = sin->sin_addr; 9055200e00eSIan Dowse fport = sin->sin_port; 9060895aec3SBjoern A. Zeeb 907603724d3SBjoern A. Zeeb if (!TAILQ_EMPTY(&V_in_ifaddrhead)) { 908df8bae1dSRodney W. Grimes /* 909df8bae1dSRodney W. Grimes * If the destination address is INADDR_ANY, 910df8bae1dSRodney W. Grimes * use the primary local address. 911df8bae1dSRodney W. Grimes * If the supplied address is INADDR_BROADCAST, 912df8bae1dSRodney W. Grimes * and the primary interface supports broadcast, 913df8bae1dSRodney W. Grimes * choose the broadcast address for that interface. 914df8bae1dSRodney W. Grimes */ 915413628a7SBjoern A. Zeeb if (faddr.s_addr == INADDR_ANY) { 9162d9cfabaSRobert Watson IN_IFADDR_RLOCK(); 917413628a7SBjoern A. Zeeb faddr = 918b89e82ddSJamie Gritton IA_SIN(TAILQ_FIRST(&V_in_ifaddrhead))->sin_addr; 9192d9cfabaSRobert Watson IN_IFADDR_RUNLOCK(); 920b89e82ddSJamie Gritton if (cred != NULL && 921b89e82ddSJamie Gritton (error = prison_get_ip4(cred, &faddr)) != 0) 922b89e82ddSJamie Gritton return (error); 9232d9cfabaSRobert Watson } else if (faddr.s_addr == (u_long)INADDR_BROADCAST) { 9242d9cfabaSRobert Watson IN_IFADDR_RLOCK(); 9252d9cfabaSRobert Watson if (TAILQ_FIRST(&V_in_ifaddrhead)->ia_ifp->if_flags & 9262d9cfabaSRobert Watson IFF_BROADCAST) 9275200e00eSIan Dowse faddr = satosin(&TAILQ_FIRST( 928603724d3SBjoern A. Zeeb &V_in_ifaddrhead)->ia_broadaddr)->sin_addr; 9292d9cfabaSRobert Watson IN_IFADDR_RUNLOCK(); 9302d9cfabaSRobert Watson } 931df8bae1dSRodney W. Grimes } 9325200e00eSIan Dowse if (laddr.s_addr == INADDR_ANY) { 933d79fdd98SDaniel Eischen error = in_pcbladdr(inp, &faddr, &laddr, cred); 934df8bae1dSRodney W. Grimes /* 935df8bae1dSRodney W. Grimes * If the destination address is multicast and an outgoing 936d79fdd98SDaniel Eischen * interface has been set as a multicast option, prefer the 937df8bae1dSRodney W. Grimes * address of that interface as our source address. 938df8bae1dSRodney W. Grimes */ 9395200e00eSIan Dowse if (IN_MULTICAST(ntohl(faddr.s_addr)) && 940df8bae1dSRodney W. Grimes inp->inp_moptions != NULL) { 941df8bae1dSRodney W. Grimes struct ip_moptions *imo; 942df8bae1dSRodney W. Grimes struct ifnet *ifp; 943df8bae1dSRodney W. Grimes 944df8bae1dSRodney W. Grimes imo = inp->inp_moptions; 945df8bae1dSRodney W. Grimes if (imo->imo_multicast_ifp != NULL) { 946df8bae1dSRodney W. Grimes ifp = imo->imo_multicast_ifp; 9472d9cfabaSRobert Watson IN_IFADDR_RLOCK(); 948e691be70SDaniel Eischen TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { 949e691be70SDaniel Eischen if ((ia->ia_ifp == ifp) && 950e691be70SDaniel Eischen (cred == NULL || 951e691be70SDaniel Eischen prison_check_ip4(cred, 952e691be70SDaniel Eischen &ia->ia_addr.sin_addr) == 0)) 953df8bae1dSRodney W. Grimes break; 954e691be70SDaniel Eischen } 955e691be70SDaniel Eischen if (ia == NULL) 956d79fdd98SDaniel Eischen error = EADDRNOTAVAIL; 957e691be70SDaniel Eischen else { 9585200e00eSIan Dowse laddr = ia->ia_addr.sin_addr; 959d79fdd98SDaniel Eischen error = 0; 960999f1343SGarrett Wollman } 961e691be70SDaniel Eischen IN_IFADDR_RUNLOCK(); 962d79fdd98SDaniel Eischen } 963d79fdd98SDaniel Eischen } 96404215ed2SRandall Stewart if (error) 96504215ed2SRandall Stewart return (error); 9660895aec3SBjoern A. Zeeb } 9675200e00eSIan Dowse oinp = in_pcblookup_hash(inp->inp_pcbinfo, faddr, fport, laddr, lport, 9685200e00eSIan Dowse 0, NULL); 9695200e00eSIan Dowse if (oinp != NULL) { 9705200e00eSIan Dowse if (oinpp != NULL) 9715200e00eSIan Dowse *oinpp = oinp; 972df8bae1dSRodney W. Grimes return (EADDRINUSE); 973c3229e05SDavid Greenman } 9745200e00eSIan Dowse if (lport == 0) { 975b0330ed9SPawel Jakub Dawidek error = in_pcbbind_setup(inp, NULL, &laddr.s_addr, &lport, 976b0330ed9SPawel Jakub Dawidek cred); 9775a903f8dSPierre Beyssac if (error) 9785a903f8dSPierre Beyssac return (error); 9795a903f8dSPierre Beyssac } 9805200e00eSIan Dowse *laddrp = laddr.s_addr; 9815200e00eSIan Dowse *lportp = lport; 9825200e00eSIan Dowse *faddrp = faddr.s_addr; 9835200e00eSIan Dowse *fportp = fport; 984df8bae1dSRodney W. Grimes return (0); 985df8bae1dSRodney W. Grimes } 986df8bae1dSRodney W. Grimes 98726f9a767SRodney W. Grimes void 988136d4f1cSRobert Watson in_pcbdisconnect(struct inpcb *inp) 989df8bae1dSRodney W. Grimes { 9906b348152SRobert Watson 991fe6bfc37SRobert Watson INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo); 9928501a69cSRobert Watson INP_WLOCK_ASSERT(inp); 993df8bae1dSRodney W. Grimes 994df8bae1dSRodney W. Grimes inp->inp_faddr.s_addr = INADDR_ANY; 995df8bae1dSRodney W. Grimes inp->inp_fport = 0; 99615bd2b43SDavid Greenman in_pcbrehash(inp); 997df8bae1dSRodney W. Grimes } 998df8bae1dSRodney W. Grimes 9994c7c478dSRobert Watson /* 100028696211SRobert Watson * in_pcbdetach() is responsibe for disassociating a socket from an inpcb. 1001c0a211c5SRobert Watson * For most protocols, this will be invoked immediately prior to calling 100228696211SRobert Watson * in_pcbfree(). However, with TCP the inpcb may significantly outlive the 100328696211SRobert Watson * socket, in which case in_pcbfree() is deferred. 10044c7c478dSRobert Watson */ 100526f9a767SRodney W. Grimes void 1006136d4f1cSRobert Watson in_pcbdetach(struct inpcb *inp) 1007df8bae1dSRodney W. Grimes { 10084c7c478dSRobert Watson 1009a7df09e8SBjoern A. Zeeb KASSERT(inp->inp_socket != NULL, ("%s: inp_socket == NULL", __func__)); 1010c0a211c5SRobert Watson 10114c7c478dSRobert Watson inp->inp_socket->so_pcb = NULL; 10124c7c478dSRobert Watson inp->inp_socket = NULL; 10134c7c478dSRobert Watson } 10144c7c478dSRobert Watson 1015c0a211c5SRobert Watson /* 101628696211SRobert Watson * in_pcbfree_internal() frees an inpcb that has been detached from its 101728696211SRobert Watson * socket, and whose reference count has reached 0. It will also remove the 101828696211SRobert Watson * inpcb from any global lists it might remain on. 1019c0a211c5SRobert Watson */ 102028696211SRobert Watson static void 102128696211SRobert Watson in_pcbfree_internal(struct inpcb *inp) 10224c7c478dSRobert Watson { 10233d4d47f3SGarrett Wollman struct inpcbinfo *ipi = inp->inp_pcbinfo; 1024df8bae1dSRodney W. Grimes 1025a7df09e8SBjoern A. Zeeb KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", __func__)); 102628696211SRobert Watson KASSERT(inp->inp_refcount == 0, ("%s: refcount !0", __func__)); 10278501a69cSRobert Watson 1028fe6bfc37SRobert Watson INP_INFO_WLOCK_ASSERT(ipi); 10298501a69cSRobert Watson INP_WLOCK_ASSERT(inp); 103059daba27SSam Leffler 1031b2630c29SGeorge V. Neville-Neil #ifdef IPSEC 10326aee2fc5SBjoern A. Zeeb if (inp->inp_sp != NULL) 10336974bd9eSBjoern A. Zeeb ipsec_delete_pcbpolicy(inp); 1034b2630c29SGeorge V. Neville-Neil #endif /* IPSEC */ 10353d4d47f3SGarrett Wollman inp->inp_gencnt = ++ipi->ipi_gencnt; 1036c3229e05SDavid Greenman in_pcbremlists(inp); 10376aee2fc5SBjoern A. Zeeb #ifdef INET6 10386aee2fc5SBjoern A. Zeeb if (inp->inp_vflag & INP_IPV6PROTO) { 10396aee2fc5SBjoern A. Zeeb ip6_freepcbopts(inp->in6p_outputopts); 10401096332aSBruce M Simpson if (inp->in6p_moptions != NULL) 10416aee2fc5SBjoern A. Zeeb ip6_freemoptions(inp->in6p_moptions); 10426aee2fc5SBjoern A. Zeeb } 10436aee2fc5SBjoern A. Zeeb #endif 1044df8bae1dSRodney W. Grimes if (inp->inp_options) 1045df8bae1dSRodney W. Grimes (void)m_free(inp->inp_options); 104671498f30SBruce M Simpson if (inp->inp_moptions != NULL) 104771498f30SBruce M Simpson inp_freemoptions(inp->inp_moptions); 1048cfa1ca9dSYoshinobu Inoue inp->inp_vflag = 0; 104986d02c5cSBjoern A. Zeeb crfree(inp->inp_cred); 1050d915b280SStephan Uphoff 1051a557af22SRobert Watson #ifdef MAC 105230d239bcSRobert Watson mac_inpcb_destroy(inp); 1053a557af22SRobert Watson #endif 10548501a69cSRobert Watson INP_WUNLOCK(inp); 105569c2d429SJeff Roberson uma_zfree(ipi->ipi_zone, inp); 1056df8bae1dSRodney W. Grimes } 1057df8bae1dSRodney W. Grimes 105810702a28SRobert Watson /* 105928696211SRobert Watson * in_pcbref() bumps the reference count on an inpcb in order to maintain 106028696211SRobert Watson * stability of an inpcb pointer despite the inpcb lock being released. This 106128696211SRobert Watson * is used in TCP when the inpcbinfo lock needs to be acquired or upgraded, 106228696211SRobert Watson * but where the inpcb lock is already held. 106328696211SRobert Watson * 106428696211SRobert Watson * While the inpcb will not be freed, releasing the inpcb lock means that the 106528696211SRobert Watson * connection's state may change, so the caller should be careful to 106628696211SRobert Watson * revalidate any cached state on reacquiring the lock. Drop the reference 106728696211SRobert Watson * using in_pcbrele(). 106828696211SRobert Watson */ 106928696211SRobert Watson void 107028696211SRobert Watson in_pcbref(struct inpcb *inp) 107128696211SRobert Watson { 107228696211SRobert Watson 107328696211SRobert Watson INP_WLOCK_ASSERT(inp); 107428696211SRobert Watson 107528696211SRobert Watson KASSERT(inp->inp_refcount > 0, ("%s: refcount 0", __func__)); 107628696211SRobert Watson 107728696211SRobert Watson inp->inp_refcount++; 107828696211SRobert Watson } 107928696211SRobert Watson 108028696211SRobert Watson /* 108128696211SRobert Watson * Drop a refcount on an inpcb elevated using in_pcbref(); because a call to 108228696211SRobert Watson * in_pcbfree() may have been made between in_pcbref() and in_pcbrele(), we 108328696211SRobert Watson * return a flag indicating whether or not the inpcb remains valid. If it is 108428696211SRobert Watson * valid, we return with the inpcb lock held. 108528696211SRobert Watson */ 108628696211SRobert Watson int 108728696211SRobert Watson in_pcbrele(struct inpcb *inp) 108828696211SRobert Watson { 108928696211SRobert Watson #ifdef INVARIANTS 109028696211SRobert Watson struct inpcbinfo *ipi = inp->inp_pcbinfo; 109128696211SRobert Watson #endif 109228696211SRobert Watson 109328696211SRobert Watson KASSERT(inp->inp_refcount > 0, ("%s: refcount 0", __func__)); 109428696211SRobert Watson 109528696211SRobert Watson INP_INFO_WLOCK_ASSERT(ipi); 109628696211SRobert Watson INP_WLOCK_ASSERT(inp); 109728696211SRobert Watson 109828696211SRobert Watson inp->inp_refcount--; 109928696211SRobert Watson if (inp->inp_refcount > 0) 110028696211SRobert Watson return (0); 110128696211SRobert Watson in_pcbfree_internal(inp); 110228696211SRobert Watson return (1); 110328696211SRobert Watson } 110428696211SRobert Watson 110528696211SRobert Watson /* 110628696211SRobert Watson * Unconditionally schedule an inpcb to be freed by decrementing its 110728696211SRobert Watson * reference count, which should occur only after the inpcb has been detached 110828696211SRobert Watson * from its socket. If another thread holds a temporary reference (acquired 110928696211SRobert Watson * using in_pcbref()) then the free is deferred until that reference is 111028696211SRobert Watson * released using in_pcbrele(), but the inpcb is still unlocked. 111128696211SRobert Watson */ 111228696211SRobert Watson void 111328696211SRobert Watson in_pcbfree(struct inpcb *inp) 111428696211SRobert Watson { 111528696211SRobert Watson #ifdef INVARIANTS 111628696211SRobert Watson struct inpcbinfo *ipi = inp->inp_pcbinfo; 111728696211SRobert Watson #endif 111828696211SRobert Watson 111928696211SRobert Watson KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", 112028696211SRobert Watson __func__)); 112128696211SRobert Watson 112228696211SRobert Watson INP_INFO_WLOCK_ASSERT(ipi); 112328696211SRobert Watson INP_WLOCK_ASSERT(inp); 112428696211SRobert Watson 112528696211SRobert Watson if (!in_pcbrele(inp)) 112628696211SRobert Watson INP_WUNLOCK(inp); 112728696211SRobert Watson } 112828696211SRobert Watson 112928696211SRobert Watson /* 1130c0a211c5SRobert Watson * in_pcbdrop() removes an inpcb from hashed lists, releasing its address and 1131c0a211c5SRobert Watson * port reservation, and preventing it from being returned by inpcb lookups. 1132c0a211c5SRobert Watson * 1133c0a211c5SRobert Watson * It is used by TCP to mark an inpcb as unused and avoid future packet 1134c0a211c5SRobert Watson * delivery or event notification when a socket remains open but TCP has 1135c0a211c5SRobert Watson * closed. This might occur as a result of a shutdown()-initiated TCP close 1136c0a211c5SRobert Watson * or a RST on the wire, and allows the port binding to be reused while still 1137c0a211c5SRobert Watson * maintaining the invariant that so_pcb always points to a valid inpcb until 1138c0a211c5SRobert Watson * in_pcbdetach(). 1139c0a211c5SRobert Watson * 1140c0a211c5SRobert Watson * XXXRW: Possibly in_pcbdrop() should also prevent future notifications by 1141c0a211c5SRobert Watson * in_pcbnotifyall() and in_pcbpurgeif0()? 114210702a28SRobert Watson */ 114310702a28SRobert Watson void 114410702a28SRobert Watson in_pcbdrop(struct inpcb *inp) 114510702a28SRobert Watson { 114610702a28SRobert Watson 11477c5a8ab2SMarcel Moolenaar INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo); 11488501a69cSRobert Watson INP_WLOCK_ASSERT(inp); 114910702a28SRobert Watson 1150ad71fe3cSRobert Watson inp->inp_flags |= INP_DROPPED; 1151111d57a6SRobert Watson if (inp->inp_flags & INP_INHASHLIST) { 115210702a28SRobert Watson struct inpcbport *phd = inp->inp_phd; 115310702a28SRobert Watson 115410702a28SRobert Watson LIST_REMOVE(inp, inp_hash); 115510702a28SRobert Watson LIST_REMOVE(inp, inp_portlist); 115610702a28SRobert Watson if (LIST_FIRST(&phd->phd_pcblist) == NULL) { 115710702a28SRobert Watson LIST_REMOVE(phd, phd_hash); 115810702a28SRobert Watson free(phd, M_PCB); 115910702a28SRobert Watson } 1160111d57a6SRobert Watson inp->inp_flags &= ~INP_INHASHLIST; 116110702a28SRobert Watson } 116210702a28SRobert Watson } 116310702a28SRobert Watson 116454d642bbSRobert Watson /* 116554d642bbSRobert Watson * Common routines to return the socket addresses associated with inpcbs. 116654d642bbSRobert Watson */ 116726ef6ac4SDon Lewis struct sockaddr * 1168136d4f1cSRobert Watson in_sockaddr(in_port_t port, struct in_addr *addr_p) 116926ef6ac4SDon Lewis { 117026ef6ac4SDon Lewis struct sockaddr_in *sin; 117126ef6ac4SDon Lewis 11721ede983cSDag-Erling Smørgrav sin = malloc(sizeof *sin, M_SONAME, 1173a163d034SWarner Losh M_WAITOK | M_ZERO); 117426ef6ac4SDon Lewis sin->sin_family = AF_INET; 117526ef6ac4SDon Lewis sin->sin_len = sizeof(*sin); 117626ef6ac4SDon Lewis sin->sin_addr = *addr_p; 117726ef6ac4SDon Lewis sin->sin_port = port; 117826ef6ac4SDon Lewis 117926ef6ac4SDon Lewis return (struct sockaddr *)sin; 118026ef6ac4SDon Lewis } 118126ef6ac4SDon Lewis 1182117bcae7SGarrett Wollman int 118354d642bbSRobert Watson in_getsockaddr(struct socket *so, struct sockaddr **nam) 1184df8bae1dSRodney W. Grimes { 1185136d4f1cSRobert Watson struct inpcb *inp; 118626ef6ac4SDon Lewis struct in_addr addr; 118726ef6ac4SDon Lewis in_port_t port; 118842fa505bSDavid Greenman 1189fdc984f7STor Egge inp = sotoinpcb(so); 119054d642bbSRobert Watson KASSERT(inp != NULL, ("in_getsockaddr: inp == NULL")); 11916466b28aSRobert Watson 1192a69042a5SRobert Watson INP_RLOCK(inp); 119326ef6ac4SDon Lewis port = inp->inp_lport; 119426ef6ac4SDon Lewis addr = inp->inp_laddr; 1195a69042a5SRobert Watson INP_RUNLOCK(inp); 119642fa505bSDavid Greenman 119726ef6ac4SDon Lewis *nam = in_sockaddr(port, &addr); 1198117bcae7SGarrett Wollman return 0; 1199df8bae1dSRodney W. Grimes } 1200df8bae1dSRodney W. Grimes 1201117bcae7SGarrett Wollman int 120254d642bbSRobert Watson in_getpeeraddr(struct socket *so, struct sockaddr **nam) 1203df8bae1dSRodney W. Grimes { 1204136d4f1cSRobert Watson struct inpcb *inp; 120526ef6ac4SDon Lewis struct in_addr addr; 120626ef6ac4SDon Lewis in_port_t port; 120742fa505bSDavid Greenman 1208fdc984f7STor Egge inp = sotoinpcb(so); 120954d642bbSRobert Watson KASSERT(inp != NULL, ("in_getpeeraddr: inp == NULL")); 12106466b28aSRobert Watson 1211a69042a5SRobert Watson INP_RLOCK(inp); 121226ef6ac4SDon Lewis port = inp->inp_fport; 121326ef6ac4SDon Lewis addr = inp->inp_faddr; 1214a69042a5SRobert Watson INP_RUNLOCK(inp); 121542fa505bSDavid Greenman 121626ef6ac4SDon Lewis *nam = in_sockaddr(port, &addr); 1217117bcae7SGarrett Wollman return 0; 1218df8bae1dSRodney W. Grimes } 1219df8bae1dSRodney W. Grimes 122026f9a767SRodney W. Grimes void 1221136d4f1cSRobert Watson in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr, int errno, 1222136d4f1cSRobert Watson struct inpcb *(*notify)(struct inpcb *, int)) 1223d1c54148SJesper Skriver { 1224f457d580SRobert Watson struct inpcb *inp, *inp_temp; 1225d1c54148SJesper Skriver 12263dc7ebf9SJeffrey Hsu INP_INFO_WLOCK(pcbinfo); 1227f457d580SRobert Watson LIST_FOREACH_SAFE(inp, pcbinfo->ipi_listhead, inp_list, inp_temp) { 12288501a69cSRobert Watson INP_WLOCK(inp); 1229d1c54148SJesper Skriver #ifdef INET6 1230f76fcf6dSJeffrey Hsu if ((inp->inp_vflag & INP_IPV4) == 0) { 12318501a69cSRobert Watson INP_WUNLOCK(inp); 1232d1c54148SJesper Skriver continue; 1233f76fcf6dSJeffrey Hsu } 1234d1c54148SJesper Skriver #endif 1235d1c54148SJesper Skriver if (inp->inp_faddr.s_addr != faddr.s_addr || 1236f76fcf6dSJeffrey Hsu inp->inp_socket == NULL) { 12378501a69cSRobert Watson INP_WUNLOCK(inp); 1238d1c54148SJesper Skriver continue; 1239d1c54148SJesper Skriver } 12403dc7ebf9SJeffrey Hsu if ((*notify)(inp, errno)) 12418501a69cSRobert Watson INP_WUNLOCK(inp); 1242f76fcf6dSJeffrey Hsu } 12433dc7ebf9SJeffrey Hsu INP_INFO_WUNLOCK(pcbinfo); 1244d1c54148SJesper Skriver } 1245d1c54148SJesper Skriver 1246e43cc4aeSHajimu UMEMOTO void 1247136d4f1cSRobert Watson in_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp) 1248e43cc4aeSHajimu UMEMOTO { 1249e43cc4aeSHajimu UMEMOTO struct inpcb *inp; 1250e43cc4aeSHajimu UMEMOTO struct ip_moptions *imo; 1251e43cc4aeSHajimu UMEMOTO int i, gap; 1252e43cc4aeSHajimu UMEMOTO 1253f76fcf6dSJeffrey Hsu INP_INFO_RLOCK(pcbinfo); 1254712fc218SRobert Watson LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) { 12558501a69cSRobert Watson INP_WLOCK(inp); 1256e43cc4aeSHajimu UMEMOTO imo = inp->inp_moptions; 1257e43cc4aeSHajimu UMEMOTO if ((inp->inp_vflag & INP_IPV4) && 1258e43cc4aeSHajimu UMEMOTO imo != NULL) { 1259e43cc4aeSHajimu UMEMOTO /* 1260e43cc4aeSHajimu UMEMOTO * Unselect the outgoing interface if it is being 1261e43cc4aeSHajimu UMEMOTO * detached. 1262e43cc4aeSHajimu UMEMOTO */ 1263e43cc4aeSHajimu UMEMOTO if (imo->imo_multicast_ifp == ifp) 1264e43cc4aeSHajimu UMEMOTO imo->imo_multicast_ifp = NULL; 1265e43cc4aeSHajimu UMEMOTO 1266e43cc4aeSHajimu UMEMOTO /* 1267e43cc4aeSHajimu UMEMOTO * Drop multicast group membership if we joined 1268e43cc4aeSHajimu UMEMOTO * through the interface being detached. 1269e43cc4aeSHajimu UMEMOTO */ 1270e43cc4aeSHajimu UMEMOTO for (i = 0, gap = 0; i < imo->imo_num_memberships; 1271e43cc4aeSHajimu UMEMOTO i++) { 1272e43cc4aeSHajimu UMEMOTO if (imo->imo_membership[i]->inm_ifp == ifp) { 1273e43cc4aeSHajimu UMEMOTO in_delmulti(imo->imo_membership[i]); 1274e43cc4aeSHajimu UMEMOTO gap++; 1275e43cc4aeSHajimu UMEMOTO } else if (gap != 0) 1276e43cc4aeSHajimu UMEMOTO imo->imo_membership[i - gap] = 1277e43cc4aeSHajimu UMEMOTO imo->imo_membership[i]; 1278e43cc4aeSHajimu UMEMOTO } 1279e43cc4aeSHajimu UMEMOTO imo->imo_num_memberships -= gap; 1280e43cc4aeSHajimu UMEMOTO } 12818501a69cSRobert Watson INP_WUNLOCK(inp); 1282e43cc4aeSHajimu UMEMOTO } 12833cfcc388SJeffrey Hsu INP_INFO_RUNLOCK(pcbinfo); 1284e43cc4aeSHajimu UMEMOTO } 1285e43cc4aeSHajimu UMEMOTO 1286df8bae1dSRodney W. Grimes /* 1287c3229e05SDavid Greenman * Lookup a PCB based on the local address and port. 1288c3229e05SDavid Greenman */ 1289d5e8a67eSHajimu UMEMOTO #define INP_LOOKUP_MAPPED_PCB_COST 3 1290df8bae1dSRodney W. Grimes struct inpcb * 1291136d4f1cSRobert Watson in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr, 1292078b7042SBjoern A. Zeeb u_short lport, int wild_okay, struct ucred *cred) 1293df8bae1dSRodney W. Grimes { 1294136d4f1cSRobert Watson struct inpcb *inp; 1295d5e8a67eSHajimu UMEMOTO #ifdef INET6 1296d5e8a67eSHajimu UMEMOTO int matchwild = 3 + INP_LOOKUP_MAPPED_PCB_COST; 1297d5e8a67eSHajimu UMEMOTO #else 1298d5e8a67eSHajimu UMEMOTO int matchwild = 3; 1299d5e8a67eSHajimu UMEMOTO #endif 1300d5e8a67eSHajimu UMEMOTO int wildcard; 13017bc4aca7SDavid Greenman 13028501a69cSRobert Watson INP_INFO_LOCK_ASSERT(pcbinfo); 13031b73ca0bSSam Leffler 1304c3229e05SDavid Greenman if (!wild_okay) { 1305c3229e05SDavid Greenman struct inpcbhead *head; 1306c3229e05SDavid Greenman /* 1307c3229e05SDavid Greenman * Look for an unconnected (wildcard foreign addr) PCB that 1308c3229e05SDavid Greenman * matches the local address and port we're looking for. 1309c3229e05SDavid Greenman */ 1310712fc218SRobert Watson head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 1311712fc218SRobert Watson 0, pcbinfo->ipi_hashmask)]; 1312fc2ffbe6SPoul-Henning Kamp LIST_FOREACH(inp, head, inp_hash) { 1313cfa1ca9dSYoshinobu Inoue #ifdef INET6 1314413628a7SBjoern A. Zeeb /* XXX inp locking */ 1315369dc8ceSEivind Eklund if ((inp->inp_vflag & INP_IPV4) == 0) 1316cfa1ca9dSYoshinobu Inoue continue; 1317cfa1ca9dSYoshinobu Inoue #endif 1318c3229e05SDavid Greenman if (inp->inp_faddr.s_addr == INADDR_ANY && 1319c3229e05SDavid Greenman inp->inp_laddr.s_addr == laddr.s_addr && 1320c3229e05SDavid Greenman inp->inp_lport == lport) { 1321c3229e05SDavid Greenman /* 1322413628a7SBjoern A. Zeeb * Found? 1323c3229e05SDavid Greenman */ 1324413628a7SBjoern A. Zeeb if (cred == NULL || 13250304c731SJamie Gritton prison_equal_ip4(cred->cr_prison, 13260304c731SJamie Gritton inp->inp_cred->cr_prison)) 1327c3229e05SDavid Greenman return (inp); 1328df8bae1dSRodney W. Grimes } 1329c3229e05SDavid Greenman } 1330c3229e05SDavid Greenman /* 1331c3229e05SDavid Greenman * Not found. 1332c3229e05SDavid Greenman */ 1333c3229e05SDavid Greenman return (NULL); 1334c3229e05SDavid Greenman } else { 1335c3229e05SDavid Greenman struct inpcbporthead *porthash; 1336c3229e05SDavid Greenman struct inpcbport *phd; 1337c3229e05SDavid Greenman struct inpcb *match = NULL; 1338c3229e05SDavid Greenman /* 1339c3229e05SDavid Greenman * Best fit PCB lookup. 1340c3229e05SDavid Greenman * 1341c3229e05SDavid Greenman * First see if this local port is in use by looking on the 1342c3229e05SDavid Greenman * port hash list. 1343c3229e05SDavid Greenman */ 1344712fc218SRobert Watson porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport, 1345712fc218SRobert Watson pcbinfo->ipi_porthashmask)]; 1346fc2ffbe6SPoul-Henning Kamp LIST_FOREACH(phd, porthash, phd_hash) { 1347c3229e05SDavid Greenman if (phd->phd_port == lport) 1348c3229e05SDavid Greenman break; 1349c3229e05SDavid Greenman } 1350c3229e05SDavid Greenman if (phd != NULL) { 1351c3229e05SDavid Greenman /* 1352c3229e05SDavid Greenman * Port is in use by one or more PCBs. Look for best 1353c3229e05SDavid Greenman * fit. 1354c3229e05SDavid Greenman */ 135537d40066SPoul-Henning Kamp LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { 1356c3229e05SDavid Greenman wildcard = 0; 1357413628a7SBjoern A. Zeeb if (cred != NULL && 13580304c731SJamie Gritton !prison_equal_ip4(inp->inp_cred->cr_prison, 13590304c731SJamie Gritton cred->cr_prison)) 1360413628a7SBjoern A. Zeeb continue; 1361cfa1ca9dSYoshinobu Inoue #ifdef INET6 1362413628a7SBjoern A. Zeeb /* XXX inp locking */ 1363369dc8ceSEivind Eklund if ((inp->inp_vflag & INP_IPV4) == 0) 1364cfa1ca9dSYoshinobu Inoue continue; 1365d5e8a67eSHajimu UMEMOTO /* 1366d5e8a67eSHajimu UMEMOTO * We never select the PCB that has 1367d5e8a67eSHajimu UMEMOTO * INP_IPV6 flag and is bound to :: if 1368d5e8a67eSHajimu UMEMOTO * we have another PCB which is bound 1369d5e8a67eSHajimu UMEMOTO * to 0.0.0.0. If a PCB has the 1370d5e8a67eSHajimu UMEMOTO * INP_IPV6 flag, then we set its cost 1371d5e8a67eSHajimu UMEMOTO * higher than IPv4 only PCBs. 1372d5e8a67eSHajimu UMEMOTO * 1373d5e8a67eSHajimu UMEMOTO * Note that the case only happens 1374d5e8a67eSHajimu UMEMOTO * when a socket is bound to ::, under 1375d5e8a67eSHajimu UMEMOTO * the condition that the use of the 1376d5e8a67eSHajimu UMEMOTO * mapped address is allowed. 1377d5e8a67eSHajimu UMEMOTO */ 1378d5e8a67eSHajimu UMEMOTO if ((inp->inp_vflag & INP_IPV6) != 0) 1379d5e8a67eSHajimu UMEMOTO wildcard += INP_LOOKUP_MAPPED_PCB_COST; 1380cfa1ca9dSYoshinobu Inoue #endif 1381c3229e05SDavid Greenman if (inp->inp_faddr.s_addr != INADDR_ANY) 1382c3229e05SDavid Greenman wildcard++; 138315bd2b43SDavid Greenman if (inp->inp_laddr.s_addr != INADDR_ANY) { 138415bd2b43SDavid Greenman if (laddr.s_addr == INADDR_ANY) 138515bd2b43SDavid Greenman wildcard++; 138615bd2b43SDavid Greenman else if (inp->inp_laddr.s_addr != laddr.s_addr) 138715bd2b43SDavid Greenman continue; 138815bd2b43SDavid Greenman } else { 138915bd2b43SDavid Greenman if (laddr.s_addr != INADDR_ANY) 139015bd2b43SDavid Greenman wildcard++; 139115bd2b43SDavid Greenman } 1392df8bae1dSRodney W. Grimes if (wildcard < matchwild) { 1393df8bae1dSRodney W. Grimes match = inp; 1394df8bae1dSRodney W. Grimes matchwild = wildcard; 1395413628a7SBjoern A. Zeeb if (matchwild == 0) 1396df8bae1dSRodney W. Grimes break; 1397df8bae1dSRodney W. Grimes } 1398df8bae1dSRodney W. Grimes } 13993dbdc25cSDavid Greenman } 1400df8bae1dSRodney W. Grimes return (match); 1401df8bae1dSRodney W. Grimes } 1402c3229e05SDavid Greenman } 1403d5e8a67eSHajimu UMEMOTO #undef INP_LOOKUP_MAPPED_PCB_COST 140415bd2b43SDavid Greenman 140515bd2b43SDavid Greenman /* 140615bd2b43SDavid Greenman * Lookup PCB in hash list. 140715bd2b43SDavid Greenman */ 140815bd2b43SDavid Greenman struct inpcb * 1409136d4f1cSRobert Watson in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr, 1410136d4f1cSRobert Watson u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard, 1411136d4f1cSRobert Watson struct ifnet *ifp) 141215bd2b43SDavid Greenman { 141315bd2b43SDavid Greenman struct inpcbhead *head; 1414413628a7SBjoern A. Zeeb struct inpcb *inp, *tmpinp; 141515bd2b43SDavid Greenman u_short fport = fport_arg, lport = lport_arg; 141615bd2b43SDavid Greenman 14178501a69cSRobert Watson INP_INFO_LOCK_ASSERT(pcbinfo); 1418602cc7f1SRobert Watson 141915bd2b43SDavid Greenman /* 142015bd2b43SDavid Greenman * First look for an exact match. 142115bd2b43SDavid Greenman */ 1422413628a7SBjoern A. Zeeb tmpinp = NULL; 1423712fc218SRobert Watson head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, 1424712fc218SRobert Watson pcbinfo->ipi_hashmask)]; 1425fc2ffbe6SPoul-Henning Kamp LIST_FOREACH(inp, head, inp_hash) { 1426cfa1ca9dSYoshinobu Inoue #ifdef INET6 1427413628a7SBjoern A. Zeeb /* XXX inp locking */ 1428369dc8ceSEivind Eklund if ((inp->inp_vflag & INP_IPV4) == 0) 1429cfa1ca9dSYoshinobu Inoue continue; 1430cfa1ca9dSYoshinobu Inoue #endif 14316d6a026bSDavid Greenman if (inp->inp_faddr.s_addr == faddr.s_addr && 1432ca98b82cSDavid Greenman inp->inp_laddr.s_addr == laddr.s_addr && 1433ca98b82cSDavid Greenman inp->inp_fport == fport && 1434413628a7SBjoern A. Zeeb inp->inp_lport == lport) { 1435413628a7SBjoern A. Zeeb /* 1436413628a7SBjoern A. Zeeb * XXX We should be able to directly return 1437413628a7SBjoern A. Zeeb * the inp here, without any checks. 1438413628a7SBjoern A. Zeeb * Well unless both bound with SO_REUSEPORT? 1439413628a7SBjoern A. Zeeb */ 14400304c731SJamie Gritton if (prison_flag(inp->inp_cred, PR_IP4)) 1441c3229e05SDavid Greenman return (inp); 1442413628a7SBjoern A. Zeeb if (tmpinp == NULL) 1443413628a7SBjoern A. Zeeb tmpinp = inp; 1444c3229e05SDavid Greenman } 1445413628a7SBjoern A. Zeeb } 1446413628a7SBjoern A. Zeeb if (tmpinp != NULL) 1447413628a7SBjoern A. Zeeb return (tmpinp); 1448e3fd5ffdSRobert Watson 1449e3fd5ffdSRobert Watson /* 1450e3fd5ffdSRobert Watson * Then look for a wildcard match, if requested. 1451e3fd5ffdSRobert Watson */ 1452413628a7SBjoern A. Zeeb if (wildcard == INPLOOKUP_WILDCARD) { 1453413628a7SBjoern A. Zeeb struct inpcb *local_wild = NULL, *local_exact = NULL; 1454e3fd5ffdSRobert Watson #ifdef INET6 1455cfa1ca9dSYoshinobu Inoue struct inpcb *local_wild_mapped = NULL; 1456e3fd5ffdSRobert Watson #endif 1457413628a7SBjoern A. Zeeb struct inpcb *jail_wild = NULL; 1458413628a7SBjoern A. Zeeb int injail; 1459413628a7SBjoern A. Zeeb 1460413628a7SBjoern A. Zeeb /* 1461413628a7SBjoern A. Zeeb * Order of socket selection - we always prefer jails. 1462413628a7SBjoern A. Zeeb * 1. jailed, non-wild. 1463413628a7SBjoern A. Zeeb * 2. jailed, wild. 1464413628a7SBjoern A. Zeeb * 3. non-jailed, non-wild. 1465413628a7SBjoern A. Zeeb * 4. non-jailed, wild. 1466413628a7SBjoern A. Zeeb */ 14676d6a026bSDavid Greenman 1468712fc218SRobert Watson head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 1469712fc218SRobert Watson 0, pcbinfo->ipi_hashmask)]; 1470fc2ffbe6SPoul-Henning Kamp LIST_FOREACH(inp, head, inp_hash) { 1471cfa1ca9dSYoshinobu Inoue #ifdef INET6 1472413628a7SBjoern A. Zeeb /* XXX inp locking */ 1473369dc8ceSEivind Eklund if ((inp->inp_vflag & INP_IPV4) == 0) 1474cfa1ca9dSYoshinobu Inoue continue; 1475cfa1ca9dSYoshinobu Inoue #endif 1476413628a7SBjoern A. Zeeb if (inp->inp_faddr.s_addr != INADDR_ANY || 1477413628a7SBjoern A. Zeeb inp->inp_lport != lport) 1478413628a7SBjoern A. Zeeb continue; 1479413628a7SBjoern A. Zeeb 1480413628a7SBjoern A. Zeeb /* XXX inp locking */ 1481cfa1ca9dSYoshinobu Inoue if (ifp && ifp->if_type == IFT_FAITH && 1482cfa1ca9dSYoshinobu Inoue (inp->inp_flags & INP_FAITH) == 0) 1483cfa1ca9dSYoshinobu Inoue continue; 1484413628a7SBjoern A. Zeeb 14850304c731SJamie Gritton injail = prison_flag(inp->inp_cred, PR_IP4); 1486413628a7SBjoern A. Zeeb if (injail) { 1487b89e82ddSJamie Gritton if (prison_check_ip4(inp->inp_cred, 1488b89e82ddSJamie Gritton &laddr) != 0) 1489413628a7SBjoern A. Zeeb continue; 1490413628a7SBjoern A. Zeeb } else { 1491413628a7SBjoern A. Zeeb if (local_exact != NULL) 1492413628a7SBjoern A. Zeeb continue; 1493413628a7SBjoern A. Zeeb } 1494413628a7SBjoern A. Zeeb 1495413628a7SBjoern A. Zeeb if (inp->inp_laddr.s_addr == laddr.s_addr) { 1496413628a7SBjoern A. Zeeb if (injail) 1497c3229e05SDavid Greenman return (inp); 1498413628a7SBjoern A. Zeeb else 1499413628a7SBjoern A. Zeeb local_exact = inp; 1500413628a7SBjoern A. Zeeb } else if (inp->inp_laddr.s_addr == INADDR_ANY) { 1501e3fd5ffdSRobert Watson #ifdef INET6 1502413628a7SBjoern A. Zeeb /* XXX inp locking, NULL check */ 15035cd54324SBjoern A. Zeeb if (inp->inp_vflag & INP_IPV6PROTO) 1504cfa1ca9dSYoshinobu Inoue local_wild_mapped = inp; 1505cfa1ca9dSYoshinobu Inoue else 1506413628a7SBjoern A. Zeeb #endif /* INET6 */ 1507413628a7SBjoern A. Zeeb if (injail) 1508413628a7SBjoern A. Zeeb jail_wild = inp; 1509413628a7SBjoern A. Zeeb else 15106d6a026bSDavid Greenman local_wild = inp; 15116d6a026bSDavid Greenman } 1512413628a7SBjoern A. Zeeb } /* LIST_FOREACH */ 1513413628a7SBjoern A. Zeeb if (jail_wild != NULL) 1514413628a7SBjoern A. Zeeb return (jail_wild); 1515413628a7SBjoern A. Zeeb if (local_exact != NULL) 1516413628a7SBjoern A. Zeeb return (local_exact); 1517413628a7SBjoern A. Zeeb if (local_wild != NULL) 1518c3229e05SDavid Greenman return (local_wild); 1519413628a7SBjoern A. Zeeb #ifdef INET6 1520413628a7SBjoern A. Zeeb if (local_wild_mapped != NULL) 1521413628a7SBjoern A. Zeeb return (local_wild_mapped); 1522413628a7SBjoern A. Zeeb #endif /* defined(INET6) */ 1523413628a7SBjoern A. Zeeb } /* if (wildcard == INPLOOKUP_WILDCARD) */ 1524413628a7SBjoern A. Zeeb 15256d6a026bSDavid Greenman return (NULL); 152615bd2b43SDavid Greenman } 152715bd2b43SDavid Greenman 15287bc4aca7SDavid Greenman /* 1529c3229e05SDavid Greenman * Insert PCB onto various hash lists. 15307bc4aca7SDavid Greenman */ 1531c3229e05SDavid Greenman int 1532136d4f1cSRobert Watson in_pcbinshash(struct inpcb *inp) 153315bd2b43SDavid Greenman { 1534c3229e05SDavid Greenman struct inpcbhead *pcbhash; 1535c3229e05SDavid Greenman struct inpcbporthead *pcbporthash; 1536c3229e05SDavid Greenman struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 1537c3229e05SDavid Greenman struct inpcbport *phd; 1538cfa1ca9dSYoshinobu Inoue u_int32_t hashkey_faddr; 153915bd2b43SDavid Greenman 154059daba27SSam Leffler INP_INFO_WLOCK_ASSERT(pcbinfo); 15418501a69cSRobert Watson INP_WLOCK_ASSERT(inp); 1542111d57a6SRobert Watson KASSERT((inp->inp_flags & INP_INHASHLIST) == 0, 1543111d57a6SRobert Watson ("in_pcbinshash: INP_INHASHLIST")); 1544602cc7f1SRobert Watson 1545cfa1ca9dSYoshinobu Inoue #ifdef INET6 1546cfa1ca9dSYoshinobu Inoue if (inp->inp_vflag & INP_IPV6) 1547cfa1ca9dSYoshinobu Inoue hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */; 1548cfa1ca9dSYoshinobu Inoue else 1549cfa1ca9dSYoshinobu Inoue #endif /* INET6 */ 1550cfa1ca9dSYoshinobu Inoue hashkey_faddr = inp->inp_faddr.s_addr; 1551cfa1ca9dSYoshinobu Inoue 1552712fc218SRobert Watson pcbhash = &pcbinfo->ipi_hashbase[INP_PCBHASH(hashkey_faddr, 1553712fc218SRobert Watson inp->inp_lport, inp->inp_fport, pcbinfo->ipi_hashmask)]; 155415bd2b43SDavid Greenman 1555712fc218SRobert Watson pcbporthash = &pcbinfo->ipi_porthashbase[ 1556712fc218SRobert Watson INP_PCBPORTHASH(inp->inp_lport, pcbinfo->ipi_porthashmask)]; 1557c3229e05SDavid Greenman 1558c3229e05SDavid Greenman /* 1559c3229e05SDavid Greenman * Go through port list and look for a head for this lport. 1560c3229e05SDavid Greenman */ 1561fc2ffbe6SPoul-Henning Kamp LIST_FOREACH(phd, pcbporthash, phd_hash) { 1562c3229e05SDavid Greenman if (phd->phd_port == inp->inp_lport) 1563c3229e05SDavid Greenman break; 1564c3229e05SDavid Greenman } 1565c3229e05SDavid Greenman /* 1566c3229e05SDavid Greenman * If none exists, malloc one and tack it on. 1567c3229e05SDavid Greenman */ 1568c3229e05SDavid Greenman if (phd == NULL) { 15691ede983cSDag-Erling Smørgrav phd = malloc(sizeof(struct inpcbport), M_PCB, M_NOWAIT); 1570c3229e05SDavid Greenman if (phd == NULL) { 1571c3229e05SDavid Greenman return (ENOBUFS); /* XXX */ 1572c3229e05SDavid Greenman } 1573c3229e05SDavid Greenman phd->phd_port = inp->inp_lport; 1574c3229e05SDavid Greenman LIST_INIT(&phd->phd_pcblist); 1575c3229e05SDavid Greenman LIST_INSERT_HEAD(pcbporthash, phd, phd_hash); 1576c3229e05SDavid Greenman } 1577c3229e05SDavid Greenman inp->inp_phd = phd; 1578c3229e05SDavid Greenman LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist); 1579c3229e05SDavid Greenman LIST_INSERT_HEAD(pcbhash, inp, inp_hash); 1580111d57a6SRobert Watson inp->inp_flags |= INP_INHASHLIST; 1581c3229e05SDavid Greenman return (0); 158215bd2b43SDavid Greenman } 158315bd2b43SDavid Greenman 1584c3229e05SDavid Greenman /* 1585c3229e05SDavid Greenman * Move PCB to the proper hash bucket when { faddr, fport } have been 1586c3229e05SDavid Greenman * changed. NOTE: This does not handle the case of the lport changing (the 1587c3229e05SDavid Greenman * hashed port list would have to be updated as well), so the lport must 1588c3229e05SDavid Greenman * not change after in_pcbinshash() has been called. 1589c3229e05SDavid Greenman */ 159015bd2b43SDavid Greenman void 1591136d4f1cSRobert Watson in_pcbrehash(struct inpcb *inp) 159215bd2b43SDavid Greenman { 159359daba27SSam Leffler struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 159415bd2b43SDavid Greenman struct inpcbhead *head; 1595cfa1ca9dSYoshinobu Inoue u_int32_t hashkey_faddr; 159615bd2b43SDavid Greenman 159759daba27SSam Leffler INP_INFO_WLOCK_ASSERT(pcbinfo); 15988501a69cSRobert Watson INP_WLOCK_ASSERT(inp); 1599111d57a6SRobert Watson KASSERT(inp->inp_flags & INP_INHASHLIST, 1600111d57a6SRobert Watson ("in_pcbrehash: !INP_INHASHLIST")); 1601602cc7f1SRobert Watson 1602cfa1ca9dSYoshinobu Inoue #ifdef INET6 1603cfa1ca9dSYoshinobu Inoue if (inp->inp_vflag & INP_IPV6) 1604cfa1ca9dSYoshinobu Inoue hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */; 1605cfa1ca9dSYoshinobu Inoue else 1606cfa1ca9dSYoshinobu Inoue #endif /* INET6 */ 1607cfa1ca9dSYoshinobu Inoue hashkey_faddr = inp->inp_faddr.s_addr; 1608cfa1ca9dSYoshinobu Inoue 1609712fc218SRobert Watson head = &pcbinfo->ipi_hashbase[INP_PCBHASH(hashkey_faddr, 1610712fc218SRobert Watson inp->inp_lport, inp->inp_fport, pcbinfo->ipi_hashmask)]; 161115bd2b43SDavid Greenman 1612c3229e05SDavid Greenman LIST_REMOVE(inp, inp_hash); 161315bd2b43SDavid Greenman LIST_INSERT_HEAD(head, inp, inp_hash); 1614c3229e05SDavid Greenman } 1615c3229e05SDavid Greenman 1616c3229e05SDavid Greenman /* 1617c3229e05SDavid Greenman * Remove PCB from various lists. 1618c3229e05SDavid Greenman */ 16196d888973SRobert Watson static void 1620136d4f1cSRobert Watson in_pcbremlists(struct inpcb *inp) 1621c3229e05SDavid Greenman { 162259daba27SSam Leffler struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 162359daba27SSam Leffler 162459daba27SSam Leffler INP_INFO_WLOCK_ASSERT(pcbinfo); 16258501a69cSRobert Watson INP_WLOCK_ASSERT(inp); 162659daba27SSam Leffler 162759daba27SSam Leffler inp->inp_gencnt = ++pcbinfo->ipi_gencnt; 1628111d57a6SRobert Watson if (inp->inp_flags & INP_INHASHLIST) { 1629c3229e05SDavid Greenman struct inpcbport *phd = inp->inp_phd; 1630c3229e05SDavid Greenman 1631c3229e05SDavid Greenman LIST_REMOVE(inp, inp_hash); 1632c3229e05SDavid Greenman LIST_REMOVE(inp, inp_portlist); 1633fc2ffbe6SPoul-Henning Kamp if (LIST_FIRST(&phd->phd_pcblist) == NULL) { 1634c3229e05SDavid Greenman LIST_REMOVE(phd, phd_hash); 1635c3229e05SDavid Greenman free(phd, M_PCB); 1636c3229e05SDavid Greenman } 1637111d57a6SRobert Watson inp->inp_flags &= ~INP_INHASHLIST; 1638c3229e05SDavid Greenman } 1639c3229e05SDavid Greenman LIST_REMOVE(inp, inp_list); 164059daba27SSam Leffler pcbinfo->ipi_count--; 164115bd2b43SDavid Greenman } 164275c13541SPoul-Henning Kamp 1643a557af22SRobert Watson /* 1644a557af22SRobert Watson * A set label operation has occurred at the socket layer, propagate the 1645a557af22SRobert Watson * label change into the in_pcb for the socket. 1646a557af22SRobert Watson */ 1647a557af22SRobert Watson void 1648136d4f1cSRobert Watson in_pcbsosetlabel(struct socket *so) 1649a557af22SRobert Watson { 1650a557af22SRobert Watson #ifdef MAC 1651a557af22SRobert Watson struct inpcb *inp; 1652a557af22SRobert Watson 16534c7c478dSRobert Watson inp = sotoinpcb(so); 16544c7c478dSRobert Watson KASSERT(inp != NULL, ("in_pcbsosetlabel: so->so_pcb == NULL")); 1655602cc7f1SRobert Watson 16568501a69cSRobert Watson INP_WLOCK(inp); 1657310e7cebSRobert Watson SOCK_LOCK(so); 1658a557af22SRobert Watson mac_inpcb_sosetlabel(so, inp); 1659310e7cebSRobert Watson SOCK_UNLOCK(so); 16608501a69cSRobert Watson INP_WUNLOCK(inp); 1661a557af22SRobert Watson #endif 1662a557af22SRobert Watson } 16635f311da2SMike Silbersack 16645f311da2SMike Silbersack /* 1665ad3a630fSRobert Watson * ipport_tick runs once per second, determining if random port allocation 1666ad3a630fSRobert Watson * should be continued. If more than ipport_randomcps ports have been 1667ad3a630fSRobert Watson * allocated in the last second, then we return to sequential port 1668ad3a630fSRobert Watson * allocation. We return to random allocation only once we drop below 1669ad3a630fSRobert Watson * ipport_randomcps for at least ipport_randomtime seconds. 16705f311da2SMike Silbersack */ 16715f311da2SMike Silbersack void 1672136d4f1cSRobert Watson ipport_tick(void *xtp) 16735f311da2SMike Silbersack { 16748b615593SMarko Zec VNET_ITERATOR_DECL(vnet_iter); 1675ad3a630fSRobert Watson 16765ee847d3SRobert Watson VNET_LIST_RLOCK_NOSLEEP(); 16778b615593SMarko Zec VNET_FOREACH(vnet_iter) { 16788b615593SMarko Zec CURVNET_SET(vnet_iter); /* XXX appease INVARIANTS here */ 16798b615593SMarko Zec if (V_ipport_tcpallocs <= 16808b615593SMarko Zec V_ipport_tcplastcount + V_ipport_randomcps) { 1681603724d3SBjoern A. Zeeb if (V_ipport_stoprandom > 0) 1682603724d3SBjoern A. Zeeb V_ipport_stoprandom--; 1683ad3a630fSRobert Watson } else 1684603724d3SBjoern A. Zeeb V_ipport_stoprandom = V_ipport_randomtime; 1685603724d3SBjoern A. Zeeb V_ipport_tcplastcount = V_ipport_tcpallocs; 16868b615593SMarko Zec CURVNET_RESTORE(); 16878b615593SMarko Zec } 16885ee847d3SRobert Watson VNET_LIST_RUNLOCK_NOSLEEP(); 16895f311da2SMike Silbersack callout_reset(&ipport_tick_callout, hz, ipport_tick, NULL); 16905f311da2SMike Silbersack } 1691497057eeSRobert Watson 16923d585327SKip Macy void 16933d585327SKip Macy inp_wlock(struct inpcb *inp) 16943d585327SKip Macy { 16953d585327SKip Macy 16968501a69cSRobert Watson INP_WLOCK(inp); 16973d585327SKip Macy } 16983d585327SKip Macy 16993d585327SKip Macy void 17003d585327SKip Macy inp_wunlock(struct inpcb *inp) 17013d585327SKip Macy { 17023d585327SKip Macy 17038501a69cSRobert Watson INP_WUNLOCK(inp); 17043d585327SKip Macy } 17053d585327SKip Macy 17063d585327SKip Macy void 17073d585327SKip Macy inp_rlock(struct inpcb *inp) 17083d585327SKip Macy { 17093d585327SKip Macy 1710a69042a5SRobert Watson INP_RLOCK(inp); 17113d585327SKip Macy } 17123d585327SKip Macy 17133d585327SKip Macy void 17143d585327SKip Macy inp_runlock(struct inpcb *inp) 17153d585327SKip Macy { 17163d585327SKip Macy 1717a69042a5SRobert Watson INP_RUNLOCK(inp); 17183d585327SKip Macy } 17193d585327SKip Macy 17203d585327SKip Macy #ifdef INVARIANTS 17213d585327SKip Macy void 1722e79dd20dSKip Macy inp_lock_assert(struct inpcb *inp) 17233d585327SKip Macy { 17243d585327SKip Macy 17258501a69cSRobert Watson INP_WLOCK_ASSERT(inp); 17263d585327SKip Macy } 17273d585327SKip Macy 17283d585327SKip Macy void 1729e79dd20dSKip Macy inp_unlock_assert(struct inpcb *inp) 17303d585327SKip Macy { 17313d585327SKip Macy 17323d585327SKip Macy INP_UNLOCK_ASSERT(inp); 17333d585327SKip Macy } 17343d585327SKip Macy #endif 17353d585327SKip Macy 17369378e437SKip Macy void 17379378e437SKip Macy inp_apply_all(void (*func)(struct inpcb *, void *), void *arg) 17389378e437SKip Macy { 17399378e437SKip Macy struct inpcb *inp; 17409378e437SKip Macy 1741603724d3SBjoern A. Zeeb INP_INFO_RLOCK(&V_tcbinfo); 174297021c24SMarko Zec LIST_FOREACH(inp, V_tcbinfo.ipi_listhead, inp_list) { 17439378e437SKip Macy INP_WLOCK(inp); 17449378e437SKip Macy func(inp, arg); 17459378e437SKip Macy INP_WUNLOCK(inp); 17469378e437SKip Macy } 1747603724d3SBjoern A. Zeeb INP_INFO_RUNLOCK(&V_tcbinfo); 17489378e437SKip Macy } 17499378e437SKip Macy 17509378e437SKip Macy struct socket * 17519378e437SKip Macy inp_inpcbtosocket(struct inpcb *inp) 17529378e437SKip Macy { 17539378e437SKip Macy 17549378e437SKip Macy INP_WLOCK_ASSERT(inp); 17559378e437SKip Macy return (inp->inp_socket); 17569378e437SKip Macy } 17579378e437SKip Macy 17589378e437SKip Macy struct tcpcb * 17599378e437SKip Macy inp_inpcbtotcpcb(struct inpcb *inp) 17609378e437SKip Macy { 17619378e437SKip Macy 17629378e437SKip Macy INP_WLOCK_ASSERT(inp); 17639378e437SKip Macy return ((struct tcpcb *)inp->inp_ppcb); 17649378e437SKip Macy } 17659378e437SKip Macy 17669378e437SKip Macy int 17679378e437SKip Macy inp_ip_tos_get(const struct inpcb *inp) 17689378e437SKip Macy { 17699378e437SKip Macy 17709378e437SKip Macy return (inp->inp_ip_tos); 17719378e437SKip Macy } 17729378e437SKip Macy 17739378e437SKip Macy void 17749378e437SKip Macy inp_ip_tos_set(struct inpcb *inp, int val) 17759378e437SKip Macy { 17769378e437SKip Macy 17779378e437SKip Macy inp->inp_ip_tos = val; 17789378e437SKip Macy } 17799378e437SKip Macy 17809378e437SKip Macy void 1781df9cf830STai-hwa Liang inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp, 17829d29c635SKip Macy uint32_t *faddr, uint16_t *fp) 17839378e437SKip Macy { 17849378e437SKip Macy 17859d29c635SKip Macy INP_LOCK_ASSERT(inp); 1786df9cf830STai-hwa Liang *laddr = inp->inp_laddr.s_addr; 1787df9cf830STai-hwa Liang *faddr = inp->inp_faddr.s_addr; 17889378e437SKip Macy *lp = inp->inp_lport; 17899378e437SKip Macy *fp = inp->inp_fport; 17909378e437SKip Macy } 17919378e437SKip Macy 1792dd0e6c38SKip Macy struct inpcb * 1793dd0e6c38SKip Macy so_sotoinpcb(struct socket *so) 1794dd0e6c38SKip Macy { 1795dd0e6c38SKip Macy 1796dd0e6c38SKip Macy return (sotoinpcb(so)); 1797dd0e6c38SKip Macy } 1798dd0e6c38SKip Macy 1799dd0e6c38SKip Macy struct tcpcb * 1800dd0e6c38SKip Macy so_sototcpcb(struct socket *so) 1801dd0e6c38SKip Macy { 1802dd0e6c38SKip Macy 1803dd0e6c38SKip Macy return (sototcpcb(so)); 1804dd0e6c38SKip Macy } 1805dd0e6c38SKip Macy 1806497057eeSRobert Watson #ifdef DDB 1807497057eeSRobert Watson static void 1808497057eeSRobert Watson db_print_indent(int indent) 1809497057eeSRobert Watson { 1810497057eeSRobert Watson int i; 1811497057eeSRobert Watson 1812497057eeSRobert Watson for (i = 0; i < indent; i++) 1813497057eeSRobert Watson db_printf(" "); 1814497057eeSRobert Watson } 1815497057eeSRobert Watson 1816497057eeSRobert Watson static void 1817497057eeSRobert Watson db_print_inconninfo(struct in_conninfo *inc, const char *name, int indent) 1818497057eeSRobert Watson { 1819497057eeSRobert Watson char faddr_str[48], laddr_str[48]; 1820497057eeSRobert Watson 1821497057eeSRobert Watson db_print_indent(indent); 1822497057eeSRobert Watson db_printf("%s at %p\n", name, inc); 1823497057eeSRobert Watson 1824497057eeSRobert Watson indent += 2; 1825497057eeSRobert Watson 182603dc38a4SRobert Watson #ifdef INET6 1827dcdb4371SBjoern A. Zeeb if (inc->inc_flags & INC_ISIPV6) { 1828497057eeSRobert Watson /* IPv6. */ 1829497057eeSRobert Watson ip6_sprintf(laddr_str, &inc->inc6_laddr); 1830497057eeSRobert Watson ip6_sprintf(faddr_str, &inc->inc6_faddr); 1831497057eeSRobert Watson } else { 183203dc38a4SRobert Watson #endif 1833497057eeSRobert Watson /* IPv4. */ 1834497057eeSRobert Watson inet_ntoa_r(inc->inc_laddr, laddr_str); 1835497057eeSRobert Watson inet_ntoa_r(inc->inc_faddr, faddr_str); 183603dc38a4SRobert Watson #ifdef INET6 1837497057eeSRobert Watson } 183803dc38a4SRobert Watson #endif 1839497057eeSRobert Watson db_print_indent(indent); 1840497057eeSRobert Watson db_printf("inc_laddr %s inc_lport %u\n", laddr_str, 1841497057eeSRobert Watson ntohs(inc->inc_lport)); 1842497057eeSRobert Watson db_print_indent(indent); 1843497057eeSRobert Watson db_printf("inc_faddr %s inc_fport %u\n", faddr_str, 1844497057eeSRobert Watson ntohs(inc->inc_fport)); 1845497057eeSRobert Watson } 1846497057eeSRobert Watson 1847497057eeSRobert Watson static void 1848497057eeSRobert Watson db_print_inpflags(int inp_flags) 1849497057eeSRobert Watson { 1850497057eeSRobert Watson int comma; 1851497057eeSRobert Watson 1852497057eeSRobert Watson comma = 0; 1853497057eeSRobert Watson if (inp_flags & INP_RECVOPTS) { 1854497057eeSRobert Watson db_printf("%sINP_RECVOPTS", comma ? ", " : ""); 1855497057eeSRobert Watson comma = 1; 1856497057eeSRobert Watson } 1857497057eeSRobert Watson if (inp_flags & INP_RECVRETOPTS) { 1858497057eeSRobert Watson db_printf("%sINP_RECVRETOPTS", comma ? ", " : ""); 1859497057eeSRobert Watson comma = 1; 1860497057eeSRobert Watson } 1861497057eeSRobert Watson if (inp_flags & INP_RECVDSTADDR) { 1862497057eeSRobert Watson db_printf("%sINP_RECVDSTADDR", comma ? ", " : ""); 1863497057eeSRobert Watson comma = 1; 1864497057eeSRobert Watson } 1865497057eeSRobert Watson if (inp_flags & INP_HDRINCL) { 1866497057eeSRobert Watson db_printf("%sINP_HDRINCL", comma ? ", " : ""); 1867497057eeSRobert Watson comma = 1; 1868497057eeSRobert Watson } 1869497057eeSRobert Watson if (inp_flags & INP_HIGHPORT) { 1870497057eeSRobert Watson db_printf("%sINP_HIGHPORT", comma ? ", " : ""); 1871497057eeSRobert Watson comma = 1; 1872497057eeSRobert Watson } 1873497057eeSRobert Watson if (inp_flags & INP_LOWPORT) { 1874497057eeSRobert Watson db_printf("%sINP_LOWPORT", comma ? ", " : ""); 1875497057eeSRobert Watson comma = 1; 1876497057eeSRobert Watson } 1877497057eeSRobert Watson if (inp_flags & INP_ANONPORT) { 1878497057eeSRobert Watson db_printf("%sINP_ANONPORT", comma ? ", " : ""); 1879497057eeSRobert Watson comma = 1; 1880497057eeSRobert Watson } 1881497057eeSRobert Watson if (inp_flags & INP_RECVIF) { 1882497057eeSRobert Watson db_printf("%sINP_RECVIF", comma ? ", " : ""); 1883497057eeSRobert Watson comma = 1; 1884497057eeSRobert Watson } 1885497057eeSRobert Watson if (inp_flags & INP_MTUDISC) { 1886497057eeSRobert Watson db_printf("%sINP_MTUDISC", comma ? ", " : ""); 1887497057eeSRobert Watson comma = 1; 1888497057eeSRobert Watson } 1889497057eeSRobert Watson if (inp_flags & INP_FAITH) { 1890497057eeSRobert Watson db_printf("%sINP_FAITH", comma ? ", " : ""); 1891497057eeSRobert Watson comma = 1; 1892497057eeSRobert Watson } 1893497057eeSRobert Watson if (inp_flags & INP_RECVTTL) { 1894497057eeSRobert Watson db_printf("%sINP_RECVTTL", comma ? ", " : ""); 1895497057eeSRobert Watson comma = 1; 1896497057eeSRobert Watson } 1897497057eeSRobert Watson if (inp_flags & INP_DONTFRAG) { 1898497057eeSRobert Watson db_printf("%sINP_DONTFRAG", comma ? ", " : ""); 1899497057eeSRobert Watson comma = 1; 1900497057eeSRobert Watson } 1901497057eeSRobert Watson if (inp_flags & IN6P_IPV6_V6ONLY) { 1902497057eeSRobert Watson db_printf("%sIN6P_IPV6_V6ONLY", comma ? ", " : ""); 1903497057eeSRobert Watson comma = 1; 1904497057eeSRobert Watson } 1905497057eeSRobert Watson if (inp_flags & IN6P_PKTINFO) { 1906497057eeSRobert Watson db_printf("%sIN6P_PKTINFO", comma ? ", " : ""); 1907497057eeSRobert Watson comma = 1; 1908497057eeSRobert Watson } 1909497057eeSRobert Watson if (inp_flags & IN6P_HOPLIMIT) { 1910497057eeSRobert Watson db_printf("%sIN6P_HOPLIMIT", comma ? ", " : ""); 1911497057eeSRobert Watson comma = 1; 1912497057eeSRobert Watson } 1913497057eeSRobert Watson if (inp_flags & IN6P_HOPOPTS) { 1914497057eeSRobert Watson db_printf("%sIN6P_HOPOPTS", comma ? ", " : ""); 1915497057eeSRobert Watson comma = 1; 1916497057eeSRobert Watson } 1917497057eeSRobert Watson if (inp_flags & IN6P_DSTOPTS) { 1918497057eeSRobert Watson db_printf("%sIN6P_DSTOPTS", comma ? ", " : ""); 1919497057eeSRobert Watson comma = 1; 1920497057eeSRobert Watson } 1921497057eeSRobert Watson if (inp_flags & IN6P_RTHDR) { 1922497057eeSRobert Watson db_printf("%sIN6P_RTHDR", comma ? ", " : ""); 1923497057eeSRobert Watson comma = 1; 1924497057eeSRobert Watson } 1925497057eeSRobert Watson if (inp_flags & IN6P_RTHDRDSTOPTS) { 1926497057eeSRobert Watson db_printf("%sIN6P_RTHDRDSTOPTS", comma ? ", " : ""); 1927497057eeSRobert Watson comma = 1; 1928497057eeSRobert Watson } 1929497057eeSRobert Watson if (inp_flags & IN6P_TCLASS) { 1930497057eeSRobert Watson db_printf("%sIN6P_TCLASS", comma ? ", " : ""); 1931497057eeSRobert Watson comma = 1; 1932497057eeSRobert Watson } 1933497057eeSRobert Watson if (inp_flags & IN6P_AUTOFLOWLABEL) { 1934497057eeSRobert Watson db_printf("%sIN6P_AUTOFLOWLABEL", comma ? ", " : ""); 1935497057eeSRobert Watson comma = 1; 1936497057eeSRobert Watson } 1937ad71fe3cSRobert Watson if (inp_flags & INP_TIMEWAIT) { 1938ad71fe3cSRobert Watson db_printf("%sINP_TIMEWAIT", comma ? ", " : ""); 1939ad71fe3cSRobert Watson comma = 1; 1940ad71fe3cSRobert Watson } 1941ad71fe3cSRobert Watson if (inp_flags & INP_ONESBCAST) { 1942ad71fe3cSRobert Watson db_printf("%sINP_ONESBCAST", comma ? ", " : ""); 1943ad71fe3cSRobert Watson comma = 1; 1944ad71fe3cSRobert Watson } 1945ad71fe3cSRobert Watson if (inp_flags & INP_DROPPED) { 1946ad71fe3cSRobert Watson db_printf("%sINP_DROPPED", comma ? ", " : ""); 1947ad71fe3cSRobert Watson comma = 1; 1948ad71fe3cSRobert Watson } 1949ad71fe3cSRobert Watson if (inp_flags & INP_SOCKREF) { 1950ad71fe3cSRobert Watson db_printf("%sINP_SOCKREF", comma ? ", " : ""); 1951ad71fe3cSRobert Watson comma = 1; 1952ad71fe3cSRobert Watson } 1953497057eeSRobert Watson if (inp_flags & IN6P_RFC2292) { 1954497057eeSRobert Watson db_printf("%sIN6P_RFC2292", comma ? ", " : ""); 1955497057eeSRobert Watson comma = 1; 1956497057eeSRobert Watson } 1957497057eeSRobert Watson if (inp_flags & IN6P_MTU) { 1958497057eeSRobert Watson db_printf("IN6P_MTU%s", comma ? ", " : ""); 1959497057eeSRobert Watson comma = 1; 1960497057eeSRobert Watson } 1961497057eeSRobert Watson } 1962497057eeSRobert Watson 1963497057eeSRobert Watson static void 1964497057eeSRobert Watson db_print_inpvflag(u_char inp_vflag) 1965497057eeSRobert Watson { 1966497057eeSRobert Watson int comma; 1967497057eeSRobert Watson 1968497057eeSRobert Watson comma = 0; 1969497057eeSRobert Watson if (inp_vflag & INP_IPV4) { 1970497057eeSRobert Watson db_printf("%sINP_IPV4", comma ? ", " : ""); 1971497057eeSRobert Watson comma = 1; 1972497057eeSRobert Watson } 1973497057eeSRobert Watson if (inp_vflag & INP_IPV6) { 1974497057eeSRobert Watson db_printf("%sINP_IPV6", comma ? ", " : ""); 1975497057eeSRobert Watson comma = 1; 1976497057eeSRobert Watson } 1977497057eeSRobert Watson if (inp_vflag & INP_IPV6PROTO) { 1978497057eeSRobert Watson db_printf("%sINP_IPV6PROTO", comma ? ", " : ""); 1979497057eeSRobert Watson comma = 1; 1980497057eeSRobert Watson } 1981497057eeSRobert Watson } 1982497057eeSRobert Watson 19836d888973SRobert Watson static void 1984497057eeSRobert Watson db_print_inpcb(struct inpcb *inp, const char *name, int indent) 1985497057eeSRobert Watson { 1986497057eeSRobert Watson 1987497057eeSRobert Watson db_print_indent(indent); 1988497057eeSRobert Watson db_printf("%s at %p\n", name, inp); 1989497057eeSRobert Watson 1990497057eeSRobert Watson indent += 2; 1991497057eeSRobert Watson 1992497057eeSRobert Watson db_print_indent(indent); 1993497057eeSRobert Watson db_printf("inp_flow: 0x%x\n", inp->inp_flow); 1994497057eeSRobert Watson 1995497057eeSRobert Watson db_print_inconninfo(&inp->inp_inc, "inp_conninfo", indent); 1996497057eeSRobert Watson 1997497057eeSRobert Watson db_print_indent(indent); 1998497057eeSRobert Watson db_printf("inp_ppcb: %p inp_pcbinfo: %p inp_socket: %p\n", 1999497057eeSRobert Watson inp->inp_ppcb, inp->inp_pcbinfo, inp->inp_socket); 2000497057eeSRobert Watson 2001497057eeSRobert Watson db_print_indent(indent); 2002497057eeSRobert Watson db_printf("inp_label: %p inp_flags: 0x%x (", 2003497057eeSRobert Watson inp->inp_label, inp->inp_flags); 2004497057eeSRobert Watson db_print_inpflags(inp->inp_flags); 2005497057eeSRobert Watson db_printf(")\n"); 2006497057eeSRobert Watson 2007497057eeSRobert Watson db_print_indent(indent); 2008497057eeSRobert Watson db_printf("inp_sp: %p inp_vflag: 0x%x (", inp->inp_sp, 2009497057eeSRobert Watson inp->inp_vflag); 2010497057eeSRobert Watson db_print_inpvflag(inp->inp_vflag); 2011497057eeSRobert Watson db_printf(")\n"); 2012497057eeSRobert Watson 2013497057eeSRobert Watson db_print_indent(indent); 2014497057eeSRobert Watson db_printf("inp_ip_ttl: %d inp_ip_p: %d inp_ip_minttl: %d\n", 2015497057eeSRobert Watson inp->inp_ip_ttl, inp->inp_ip_p, inp->inp_ip_minttl); 2016497057eeSRobert Watson 2017497057eeSRobert Watson db_print_indent(indent); 2018497057eeSRobert Watson #ifdef INET6 2019497057eeSRobert Watson if (inp->inp_vflag & INP_IPV6) { 2020497057eeSRobert Watson db_printf("in6p_options: %p in6p_outputopts: %p " 2021497057eeSRobert Watson "in6p_moptions: %p\n", inp->in6p_options, 2022497057eeSRobert Watson inp->in6p_outputopts, inp->in6p_moptions); 2023497057eeSRobert Watson db_printf("in6p_icmp6filt: %p in6p_cksum %d " 2024497057eeSRobert Watson "in6p_hops %u\n", inp->in6p_icmp6filt, inp->in6p_cksum, 2025497057eeSRobert Watson inp->in6p_hops); 2026497057eeSRobert Watson } else 2027497057eeSRobert Watson #endif 2028497057eeSRobert Watson { 2029497057eeSRobert Watson db_printf("inp_ip_tos: %d inp_ip_options: %p " 2030497057eeSRobert Watson "inp_ip_moptions: %p\n", inp->inp_ip_tos, 2031497057eeSRobert Watson inp->inp_options, inp->inp_moptions); 2032497057eeSRobert Watson } 2033497057eeSRobert Watson 2034497057eeSRobert Watson db_print_indent(indent); 2035497057eeSRobert Watson db_printf("inp_phd: %p inp_gencnt: %ju\n", inp->inp_phd, 2036497057eeSRobert Watson (uintmax_t)inp->inp_gencnt); 2037497057eeSRobert Watson } 2038497057eeSRobert Watson 2039497057eeSRobert Watson DB_SHOW_COMMAND(inpcb, db_show_inpcb) 2040497057eeSRobert Watson { 2041497057eeSRobert Watson struct inpcb *inp; 2042497057eeSRobert Watson 2043497057eeSRobert Watson if (!have_addr) { 2044497057eeSRobert Watson db_printf("usage: show inpcb <addr>\n"); 2045497057eeSRobert Watson return; 2046497057eeSRobert Watson } 2047497057eeSRobert Watson inp = (struct inpcb *)addr; 2048497057eeSRobert Watson 2049497057eeSRobert Watson db_print_inpcb(inp, "inpcb", 0); 2050497057eeSRobert Watson } 2051497057eeSRobert Watson #endif 2052