1 /*- 2 * Copyright (c) 1982, 1986, 1991, 1993, 1995 3 * The Regents of the University of California. 4 * Copyright (c) 2007-2009 Robert N. M. Watson 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 4. Neither the name of the University nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95 32 */ 33 34 #include <sys/cdefs.h> 35 __FBSDID("$FreeBSD$"); 36 37 #include "opt_ddb.h" 38 #include "opt_ipsec.h" 39 #include "opt_inet.h" 40 #include "opt_inet6.h" 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/malloc.h> 45 #include <sys/mbuf.h> 46 #include <sys/callout.h> 47 #include <sys/domain.h> 48 #include <sys/protosw.h> 49 #include <sys/socket.h> 50 #include <sys/socketvar.h> 51 #include <sys/priv.h> 52 #include <sys/proc.h> 53 #include <sys/jail.h> 54 #include <sys/kernel.h> 55 #include <sys/sysctl.h> 56 57 #ifdef DDB 58 #include <ddb/ddb.h> 59 #endif 60 61 #include <vm/uma.h> 62 63 #include <net/if.h> 64 #include <net/if_types.h> 65 #include <net/route.h> 66 #include <net/vnet.h> 67 68 #include <netinet/in.h> 69 #include <netinet/in_pcb.h> 70 #include <netinet/in_var.h> 71 #include <netinet/ip_var.h> 72 #include <netinet/tcp_var.h> 73 #include <netinet/udp.h> 74 #include <netinet/udp_var.h> 75 #ifdef INET6 76 #include <netinet/ip6.h> 77 #include <netinet6/ip6_var.h> 78 #include <netinet6/in6_pcb.h> 79 #endif /* INET6 */ 80 81 82 #ifdef IPSEC 83 #include <netipsec/ipsec.h> 84 #include <netipsec/key.h> 85 #endif /* IPSEC */ 86 87 #include <security/mac/mac_framework.h> 88 89 static struct callout ipport_tick_callout; 90 91 /* 92 * These configure the range of local port addresses assigned to 93 * "unspecified" outgoing connections/packets/whatever. 94 */ 95 VNET_DEFINE(int, ipport_lowfirstauto) = IPPORT_RESERVED - 1; /* 1023 */ 96 VNET_DEFINE(int, ipport_lowlastauto) = IPPORT_RESERVEDSTART; /* 600 */ 97 VNET_DEFINE(int, ipport_firstauto) = IPPORT_EPHEMERALFIRST; /* 10000 */ 98 VNET_DEFINE(int, ipport_lastauto) = IPPORT_EPHEMERALLAST; /* 65535 */ 99 VNET_DEFINE(int, ipport_hifirstauto) = IPPORT_HIFIRSTAUTO; /* 49152 */ 100 VNET_DEFINE(int, ipport_hilastauto) = IPPORT_HILASTAUTO; /* 65535 */ 101 102 /* 103 * Reserved ports accessible only to root. There are significant 104 * security considerations that must be accounted for when changing these, 105 * but the security benefits can be great. Please be careful. 106 */ 107 VNET_DEFINE(int, ipport_reservedhigh) = IPPORT_RESERVED - 1; /* 1023 */ 108 VNET_DEFINE(int, ipport_reservedlow); 109 110 /* Variables dealing with random ephemeral port allocation. */ 111 VNET_DEFINE(int, ipport_randomized) = 1; /* user controlled via sysctl */ 112 VNET_DEFINE(int, ipport_randomcps) = 10; /* user controlled via sysctl */ 113 VNET_DEFINE(int, ipport_randomtime) = 45; /* user controlled via sysctl */ 114 VNET_DEFINE(int, ipport_stoprandom); /* toggled by ipport_tick */ 115 VNET_DEFINE(int, ipport_tcpallocs); 116 static VNET_DEFINE(int, ipport_tcplastcount); 117 118 #define V_ipport_tcplastcount VNET(ipport_tcplastcount) 119 120 #define RANGECHK(var, min, max) \ 121 if ((var) < (min)) { (var) = (min); } \ 122 else if ((var) > (max)) { (var) = (max); } 123 124 static void in_pcbremlists(struct inpcb *inp); 125 126 static int 127 sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS) 128 { 129 int error; 130 131 #ifdef VIMAGE 132 error = vnet_sysctl_handle_int(oidp, arg1, arg2, req); 133 #else 134 error = sysctl_handle_int(oidp, arg1, arg2, req); 135 #endif 136 if (error == 0) { 137 RANGECHK(V_ipport_lowfirstauto, 1, IPPORT_RESERVED - 1); 138 RANGECHK(V_ipport_lowlastauto, 1, IPPORT_RESERVED - 1); 139 RANGECHK(V_ipport_firstauto, IPPORT_RESERVED, IPPORT_MAX); 140 RANGECHK(V_ipport_lastauto, IPPORT_RESERVED, IPPORT_MAX); 141 RANGECHK(V_ipport_hifirstauto, IPPORT_RESERVED, IPPORT_MAX); 142 RANGECHK(V_ipport_hilastauto, IPPORT_RESERVED, IPPORT_MAX); 143 } 144 return (error); 145 } 146 147 #undef RANGECHK 148 149 SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0, "IP Ports"); 150 151 SYSCTL_VNET_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, 152 CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(ipport_lowfirstauto), 0, 153 &sysctl_net_ipport_check, "I", ""); 154 SYSCTL_VNET_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, 155 CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(ipport_lowlastauto), 0, 156 &sysctl_net_ipport_check, "I", ""); 157 SYSCTL_VNET_PROC(_net_inet_ip_portrange, OID_AUTO, first, 158 CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(ipport_firstauto), 0, 159 &sysctl_net_ipport_check, "I", ""); 160 SYSCTL_VNET_PROC(_net_inet_ip_portrange, OID_AUTO, last, 161 CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(ipport_lastauto), 0, 162 &sysctl_net_ipport_check, "I", ""); 163 SYSCTL_VNET_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, 164 CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(ipport_hifirstauto), 0, 165 &sysctl_net_ipport_check, "I", ""); 166 SYSCTL_VNET_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, 167 CTLTYPE_INT|CTLFLAG_RW, &VNET_NAME(ipport_hilastauto), 0, 168 &sysctl_net_ipport_check, "I", ""); 169 SYSCTL_VNET_INT(_net_inet_ip_portrange, OID_AUTO, reservedhigh, 170 CTLFLAG_RW|CTLFLAG_SECURE, &VNET_NAME(ipport_reservedhigh), 0, ""); 171 SYSCTL_VNET_INT(_net_inet_ip_portrange, OID_AUTO, reservedlow, 172 CTLFLAG_RW|CTLFLAG_SECURE, &VNET_NAME(ipport_reservedlow), 0, ""); 173 SYSCTL_VNET_INT(_net_inet_ip_portrange, OID_AUTO, randomized, CTLFLAG_RW, 174 &VNET_NAME(ipport_randomized), 0, "Enable random port allocation"); 175 SYSCTL_VNET_INT(_net_inet_ip_portrange, OID_AUTO, randomcps, CTLFLAG_RW, 176 &VNET_NAME(ipport_randomcps), 0, "Maximum number of random port " 177 "allocations before switching to a sequental one"); 178 SYSCTL_VNET_INT(_net_inet_ip_portrange, OID_AUTO, randomtime, CTLFLAG_RW, 179 &VNET_NAME(ipport_randomtime), 0, 180 "Minimum time to keep sequental port " 181 "allocation before switching to a random one"); 182 183 /* 184 * in_pcb.c: manage the Protocol Control Blocks. 185 * 186 * NOTE: It is assumed that most of these functions will be called with 187 * the pcbinfo lock held, and often, the inpcb lock held, as these utility 188 * functions often modify hash chains or addresses in pcbs. 189 */ 190 191 /* 192 * Initialize an inpcbinfo -- we should be able to reduce the number of 193 * arguments in time. 194 */ 195 void 196 in_pcbinfo_init(struct inpcbinfo *pcbinfo, const char *name, 197 struct inpcbhead *listhead, int hash_nelements, int porthash_nelements, 198 char *inpcbzone_name, uma_init inpcbzone_init, uma_fini inpcbzone_fini, 199 uint32_t inpcbzone_flags) 200 { 201 202 INP_INFO_LOCK_INIT(pcbinfo, name); 203 #ifdef VIMAGE 204 pcbinfo->ipi_vnet = curvnet; 205 #endif 206 pcbinfo->ipi_listhead = listhead; 207 LIST_INIT(pcbinfo->ipi_listhead); 208 pcbinfo->ipi_hashbase = hashinit(hash_nelements, M_PCB, 209 &pcbinfo->ipi_hashmask); 210 pcbinfo->ipi_porthashbase = hashinit(porthash_nelements, M_PCB, 211 &pcbinfo->ipi_porthashmask); 212 pcbinfo->ipi_zone = uma_zcreate(inpcbzone_name, sizeof(struct inpcb), 213 NULL, NULL, inpcbzone_init, inpcbzone_fini, UMA_ALIGN_PTR, 214 inpcbzone_flags); 215 uma_zone_set_max(pcbinfo->ipi_zone, maxsockets); 216 } 217 218 /* 219 * Destroy an inpcbinfo. 220 */ 221 void 222 in_pcbinfo_destroy(struct inpcbinfo *pcbinfo) 223 { 224 225 hashdestroy(pcbinfo->ipi_hashbase, M_PCB, pcbinfo->ipi_hashmask); 226 hashdestroy(pcbinfo->ipi_porthashbase, M_PCB, 227 pcbinfo->ipi_porthashmask); 228 uma_zdestroy(pcbinfo->ipi_zone); 229 INP_INFO_LOCK_DESTROY(pcbinfo); 230 } 231 232 /* 233 * Allocate a PCB and associate it with the socket. 234 * On success return with the PCB locked. 235 */ 236 int 237 in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo) 238 { 239 struct inpcb *inp; 240 int error; 241 242 INP_INFO_WLOCK_ASSERT(pcbinfo); 243 error = 0; 244 inp = uma_zalloc(pcbinfo->ipi_zone, M_NOWAIT); 245 if (inp == NULL) 246 return (ENOBUFS); 247 bzero(inp, inp_zero_size); 248 inp->inp_pcbinfo = pcbinfo; 249 inp->inp_socket = so; 250 inp->inp_cred = crhold(so->so_cred); 251 inp->inp_inc.inc_fibnum = so->so_fibnum; 252 #ifdef MAC 253 error = mac_inpcb_init(inp, M_NOWAIT); 254 if (error != 0) 255 goto out; 256 mac_inpcb_create(so, inp); 257 #endif 258 #ifdef IPSEC 259 error = ipsec_init_policy(so, &inp->inp_sp); 260 if (error != 0) { 261 #ifdef MAC 262 mac_inpcb_destroy(inp); 263 #endif 264 goto out; 265 } 266 #endif /*IPSEC*/ 267 #ifdef INET6 268 if (INP_SOCKAF(so) == AF_INET6) { 269 inp->inp_vflag |= INP_IPV6PROTO; 270 if (V_ip6_v6only) 271 inp->inp_flags |= IN6P_IPV6_V6ONLY; 272 } 273 #endif 274 LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list); 275 pcbinfo->ipi_count++; 276 so->so_pcb = (caddr_t)inp; 277 #ifdef INET6 278 if (V_ip6_auto_flowlabel) 279 inp->inp_flags |= IN6P_AUTOFLOWLABEL; 280 #endif 281 INP_WLOCK(inp); 282 inp->inp_gencnt = ++pcbinfo->ipi_gencnt; 283 inp->inp_refcount = 1; /* Reference from the inpcbinfo */ 284 #if defined(IPSEC) || defined(MAC) 285 out: 286 if (error != 0) { 287 crfree(inp->inp_cred); 288 uma_zfree(pcbinfo->ipi_zone, inp); 289 } 290 #endif 291 return (error); 292 } 293 294 int 295 in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred) 296 { 297 int anonport, error; 298 299 INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo); 300 INP_WLOCK_ASSERT(inp); 301 302 if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) 303 return (EINVAL); 304 anonport = inp->inp_lport == 0 && (nam == NULL || 305 ((struct sockaddr_in *)nam)->sin_port == 0); 306 error = in_pcbbind_setup(inp, nam, &inp->inp_laddr.s_addr, 307 &inp->inp_lport, cred); 308 if (error) 309 return (error); 310 if (in_pcbinshash(inp) != 0) { 311 inp->inp_laddr.s_addr = INADDR_ANY; 312 inp->inp_lport = 0; 313 return (EAGAIN); 314 } 315 if (anonport) 316 inp->inp_flags |= INP_ANONPORT; 317 return (0); 318 } 319 320 #if defined(INET) || defined(INET6) 321 int 322 in_pcb_lport(struct inpcb *inp, struct in_addr *laddrp, u_short *lportp, 323 struct ucred *cred, int wild) 324 { 325 struct inpcbinfo *pcbinfo; 326 struct inpcb *tmpinp; 327 unsigned short *lastport; 328 int count, dorandom, error; 329 u_short aux, first, last, lport; 330 #ifdef INET 331 struct in_addr laddr; 332 #endif 333 334 pcbinfo = inp->inp_pcbinfo; 335 336 /* 337 * Because no actual state changes occur here, a global write lock on 338 * the pcbinfo isn't required. 339 */ 340 INP_INFO_LOCK_ASSERT(pcbinfo); 341 INP_LOCK_ASSERT(inp); 342 343 if (inp->inp_flags & INP_HIGHPORT) { 344 first = V_ipport_hifirstauto; /* sysctl */ 345 last = V_ipport_hilastauto; 346 lastport = &pcbinfo->ipi_lasthi; 347 } else if (inp->inp_flags & INP_LOWPORT) { 348 error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0); 349 if (error) 350 return (error); 351 first = V_ipport_lowfirstauto; /* 1023 */ 352 last = V_ipport_lowlastauto; /* 600 */ 353 lastport = &pcbinfo->ipi_lastlow; 354 } else { 355 first = V_ipport_firstauto; /* sysctl */ 356 last = V_ipport_lastauto; 357 lastport = &pcbinfo->ipi_lastport; 358 } 359 /* 360 * For UDP, use random port allocation as long as the user 361 * allows it. For TCP (and as of yet unknown) connections, 362 * use random port allocation only if the user allows it AND 363 * ipport_tick() allows it. 364 */ 365 if (V_ipport_randomized && 366 (!V_ipport_stoprandom || pcbinfo == &V_udbinfo)) 367 dorandom = 1; 368 else 369 dorandom = 0; 370 /* 371 * It makes no sense to do random port allocation if 372 * we have the only port available. 373 */ 374 if (first == last) 375 dorandom = 0; 376 /* Make sure to not include UDP packets in the count. */ 377 if (pcbinfo != &V_udbinfo) 378 V_ipport_tcpallocs++; 379 /* 380 * Instead of having two loops further down counting up or down 381 * make sure that first is always <= last and go with only one 382 * code path implementing all logic. 383 */ 384 if (first > last) { 385 aux = first; 386 first = last; 387 last = aux; 388 } 389 390 #ifdef INET 391 /* Make the compiler happy. */ 392 laddr.s_addr = 0; 393 if ((inp->inp_vflag & (INP_IPV4|INP_IPV6)) == INP_IPV4) { 394 KASSERT(laddrp != NULL, ("%s: laddrp NULL for v4 inp %p", 395 __func__, inp)); 396 laddr = *laddrp; 397 } 398 #endif 399 lport = *lportp; 400 401 if (dorandom) 402 *lastport = first + (arc4random() % (last - first)); 403 404 count = last - first; 405 406 do { 407 if (count-- < 0) /* completely used? */ 408 return (EADDRNOTAVAIL); 409 ++*lastport; 410 if (*lastport < first || *lastport > last) 411 *lastport = first; 412 lport = htons(*lastport); 413 414 #ifdef INET6 415 if ((inp->inp_vflag & INP_IPV6) != 0) 416 tmpinp = in6_pcblookup_local(pcbinfo, 417 &inp->in6p_laddr, lport, wild, cred); 418 #endif 419 #if defined(INET) && defined(INET6) 420 else 421 #endif 422 #ifdef INET 423 tmpinp = in_pcblookup_local(pcbinfo, laddr, 424 lport, wild, cred); 425 #endif 426 } while (tmpinp != NULL); 427 428 #ifdef INET 429 if ((inp->inp_vflag & (INP_IPV4|INP_IPV6)) == INP_IPV4) 430 laddrp->s_addr = laddr.s_addr; 431 #endif 432 *lportp = lport; 433 434 return (0); 435 } 436 #endif /* INET || INET6 */ 437 438 /* 439 * Set up a bind operation on a PCB, performing port allocation 440 * as required, but do not actually modify the PCB. Callers can 441 * either complete the bind by setting inp_laddr/inp_lport and 442 * calling in_pcbinshash(), or they can just use the resulting 443 * port and address to authorise the sending of a once-off packet. 444 * 445 * On error, the values of *laddrp and *lportp are not changed. 446 */ 447 int 448 in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp, 449 u_short *lportp, struct ucred *cred) 450 { 451 struct socket *so = inp->inp_socket; 452 struct sockaddr_in *sin; 453 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 454 struct in_addr laddr; 455 u_short lport = 0; 456 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); 457 int error; 458 459 /* 460 * Because no actual state changes occur here, a global write lock on 461 * the pcbinfo isn't required. 462 */ 463 INP_INFO_LOCK_ASSERT(pcbinfo); 464 INP_LOCK_ASSERT(inp); 465 466 if (TAILQ_EMPTY(&V_in_ifaddrhead)) /* XXX broken! */ 467 return (EADDRNOTAVAIL); 468 laddr.s_addr = *laddrp; 469 if (nam != NULL && laddr.s_addr != INADDR_ANY) 470 return (EINVAL); 471 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) 472 wild = INPLOOKUP_WILDCARD; 473 if (nam == NULL) { 474 if ((error = prison_local_ip4(cred, &laddr)) != 0) 475 return (error); 476 } else { 477 sin = (struct sockaddr_in *)nam; 478 if (nam->sa_len != sizeof (*sin)) 479 return (EINVAL); 480 #ifdef notdef 481 /* 482 * We should check the family, but old programs 483 * incorrectly fail to initialize it. 484 */ 485 if (sin->sin_family != AF_INET) 486 return (EAFNOSUPPORT); 487 #endif 488 error = prison_local_ip4(cred, &sin->sin_addr); 489 if (error) 490 return (error); 491 if (sin->sin_port != *lportp) { 492 /* Don't allow the port to change. */ 493 if (*lportp != 0) 494 return (EINVAL); 495 lport = sin->sin_port; 496 } 497 /* NB: lport is left as 0 if the port isn't being changed. */ 498 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { 499 /* 500 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; 501 * allow complete duplication of binding if 502 * SO_REUSEPORT is set, or if SO_REUSEADDR is set 503 * and a multicast address is bound on both 504 * new and duplicated sockets. 505 */ 506 if (so->so_options & SO_REUSEADDR) 507 reuseport = SO_REUSEADDR|SO_REUSEPORT; 508 } else if (sin->sin_addr.s_addr != INADDR_ANY) { 509 sin->sin_port = 0; /* yech... */ 510 bzero(&sin->sin_zero, sizeof(sin->sin_zero)); 511 /* 512 * Is the address a local IP address? 513 * If INP_BINDANY is set, then the socket may be bound 514 * to any endpoint address, local or not. 515 */ 516 if ((inp->inp_flags & INP_BINDANY) == 0 && 517 ifa_ifwithaddr_check((struct sockaddr *)sin) == 0) 518 return (EADDRNOTAVAIL); 519 } 520 laddr = sin->sin_addr; 521 if (lport) { 522 struct inpcb *t; 523 struct tcptw *tw; 524 525 /* GROSS */ 526 if (ntohs(lport) <= V_ipport_reservedhigh && 527 ntohs(lport) >= V_ipport_reservedlow && 528 priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 529 0)) 530 return (EACCES); 531 if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) && 532 priv_check_cred(inp->inp_cred, 533 PRIV_NETINET_REUSEPORT, 0) != 0) { 534 t = in_pcblookup_local(pcbinfo, sin->sin_addr, 535 lport, INPLOOKUP_WILDCARD, cred); 536 /* 537 * XXX 538 * This entire block sorely needs a rewrite. 539 */ 540 if (t && 541 ((t->inp_flags & INP_TIMEWAIT) == 0) && 542 (so->so_type != SOCK_STREAM || 543 ntohl(t->inp_faddr.s_addr) == INADDR_ANY) && 544 (ntohl(sin->sin_addr.s_addr) != INADDR_ANY || 545 ntohl(t->inp_laddr.s_addr) != INADDR_ANY || 546 (t->inp_socket->so_options & 547 SO_REUSEPORT) == 0) && 548 (inp->inp_cred->cr_uid != 549 t->inp_cred->cr_uid)) 550 return (EADDRINUSE); 551 } 552 t = in_pcblookup_local(pcbinfo, sin->sin_addr, 553 lport, wild, cred); 554 if (t && (t->inp_flags & INP_TIMEWAIT)) { 555 /* 556 * XXXRW: If an incpb has had its timewait 557 * state recycled, we treat the address as 558 * being in use (for now). This is better 559 * than a panic, but not desirable. 560 */ 561 tw = intotw(inp); 562 if (tw == NULL || 563 (reuseport & tw->tw_so_options) == 0) 564 return (EADDRINUSE); 565 } else if (t && 566 (reuseport & t->inp_socket->so_options) == 0) { 567 #ifdef INET6 568 if (ntohl(sin->sin_addr.s_addr) != 569 INADDR_ANY || 570 ntohl(t->inp_laddr.s_addr) != 571 INADDR_ANY || 572 INP_SOCKAF(so) == 573 INP_SOCKAF(t->inp_socket)) 574 #endif 575 return (EADDRINUSE); 576 } 577 } 578 } 579 if (*lportp != 0) 580 lport = *lportp; 581 if (lport == 0) { 582 error = in_pcb_lport(inp, &laddr, &lport, cred, wild); 583 if (error != 0) 584 return (error); 585 586 } 587 *laddrp = laddr.s_addr; 588 *lportp = lport; 589 return (0); 590 } 591 592 /* 593 * Connect from a socket to a specified address. 594 * Both address and port must be specified in argument sin. 595 * If don't have a local address for this socket yet, 596 * then pick one. 597 */ 598 int 599 in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred) 600 { 601 u_short lport, fport; 602 in_addr_t laddr, faddr; 603 int anonport, error; 604 605 INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo); 606 INP_WLOCK_ASSERT(inp); 607 608 lport = inp->inp_lport; 609 laddr = inp->inp_laddr.s_addr; 610 anonport = (lport == 0); 611 error = in_pcbconnect_setup(inp, nam, &laddr, &lport, &faddr, &fport, 612 NULL, cred); 613 if (error) 614 return (error); 615 616 /* Do the initial binding of the local address if required. */ 617 if (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0) { 618 inp->inp_lport = lport; 619 inp->inp_laddr.s_addr = laddr; 620 if (in_pcbinshash(inp) != 0) { 621 inp->inp_laddr.s_addr = INADDR_ANY; 622 inp->inp_lport = 0; 623 return (EAGAIN); 624 } 625 } 626 627 /* Commit the remaining changes. */ 628 inp->inp_lport = lport; 629 inp->inp_laddr.s_addr = laddr; 630 inp->inp_faddr.s_addr = faddr; 631 inp->inp_fport = fport; 632 in_pcbrehash(inp); 633 634 if (anonport) 635 inp->inp_flags |= INP_ANONPORT; 636 return (0); 637 } 638 639 /* 640 * Do proper source address selection on an unbound socket in case 641 * of connect. Take jails into account as well. 642 */ 643 static int 644 in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr, 645 struct ucred *cred) 646 { 647 struct ifaddr *ifa; 648 struct sockaddr *sa; 649 struct sockaddr_in *sin; 650 struct route sro; 651 int error; 652 653 KASSERT(laddr != NULL, ("%s: laddr NULL", __func__)); 654 655 /* 656 * Bypass source address selection and use the primary jail IP 657 * if requested. 658 */ 659 if (cred != NULL && !prison_saddrsel_ip4(cred, laddr)) 660 return (0); 661 662 error = 0; 663 bzero(&sro, sizeof(sro)); 664 665 sin = (struct sockaddr_in *)&sro.ro_dst; 666 sin->sin_family = AF_INET; 667 sin->sin_len = sizeof(struct sockaddr_in); 668 sin->sin_addr.s_addr = faddr->s_addr; 669 670 /* 671 * If route is known our src addr is taken from the i/f, 672 * else punt. 673 * 674 * Find out route to destination. 675 */ 676 if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0) 677 in_rtalloc_ign(&sro, 0, inp->inp_inc.inc_fibnum); 678 679 /* 680 * If we found a route, use the address corresponding to 681 * the outgoing interface. 682 * 683 * Otherwise assume faddr is reachable on a directly connected 684 * network and try to find a corresponding interface to take 685 * the source address from. 686 */ 687 if (sro.ro_rt == NULL || sro.ro_rt->rt_ifp == NULL) { 688 struct in_ifaddr *ia; 689 struct ifnet *ifp; 690 691 ia = ifatoia(ifa_ifwithdstaddr((struct sockaddr *)sin)); 692 if (ia == NULL) 693 ia = ifatoia(ifa_ifwithnet((struct sockaddr *)sin, 0)); 694 if (ia == NULL) { 695 error = ENETUNREACH; 696 goto done; 697 } 698 699 if (cred == NULL || !prison_flag(cred, PR_IP4)) { 700 laddr->s_addr = ia->ia_addr.sin_addr.s_addr; 701 ifa_free(&ia->ia_ifa); 702 goto done; 703 } 704 705 ifp = ia->ia_ifp; 706 ifa_free(&ia->ia_ifa); 707 ia = NULL; 708 IF_ADDR_LOCK(ifp); 709 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 710 711 sa = ifa->ifa_addr; 712 if (sa->sa_family != AF_INET) 713 continue; 714 sin = (struct sockaddr_in *)sa; 715 if (prison_check_ip4(cred, &sin->sin_addr) == 0) { 716 ia = (struct in_ifaddr *)ifa; 717 break; 718 } 719 } 720 if (ia != NULL) { 721 laddr->s_addr = ia->ia_addr.sin_addr.s_addr; 722 IF_ADDR_UNLOCK(ifp); 723 goto done; 724 } 725 IF_ADDR_UNLOCK(ifp); 726 727 /* 3. As a last resort return the 'default' jail address. */ 728 error = prison_get_ip4(cred, laddr); 729 goto done; 730 } 731 732 /* 733 * If the outgoing interface on the route found is not 734 * a loopback interface, use the address from that interface. 735 * In case of jails do those three steps: 736 * 1. check if the interface address belongs to the jail. If so use it. 737 * 2. check if we have any address on the outgoing interface 738 * belonging to this jail. If so use it. 739 * 3. as a last resort return the 'default' jail address. 740 */ 741 if ((sro.ro_rt->rt_ifp->if_flags & IFF_LOOPBACK) == 0) { 742 struct in_ifaddr *ia; 743 struct ifnet *ifp; 744 745 /* If not jailed, use the default returned. */ 746 if (cred == NULL || !prison_flag(cred, PR_IP4)) { 747 ia = (struct in_ifaddr *)sro.ro_rt->rt_ifa; 748 laddr->s_addr = ia->ia_addr.sin_addr.s_addr; 749 goto done; 750 } 751 752 /* Jailed. */ 753 /* 1. Check if the iface address belongs to the jail. */ 754 sin = (struct sockaddr_in *)sro.ro_rt->rt_ifa->ifa_addr; 755 if (prison_check_ip4(cred, &sin->sin_addr) == 0) { 756 ia = (struct in_ifaddr *)sro.ro_rt->rt_ifa; 757 laddr->s_addr = ia->ia_addr.sin_addr.s_addr; 758 goto done; 759 } 760 761 /* 762 * 2. Check if we have any address on the outgoing interface 763 * belonging to this jail. 764 */ 765 ia = NULL; 766 ifp = sro.ro_rt->rt_ifp; 767 IF_ADDR_LOCK(ifp); 768 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 769 sa = ifa->ifa_addr; 770 if (sa->sa_family != AF_INET) 771 continue; 772 sin = (struct sockaddr_in *)sa; 773 if (prison_check_ip4(cred, &sin->sin_addr) == 0) { 774 ia = (struct in_ifaddr *)ifa; 775 break; 776 } 777 } 778 if (ia != NULL) { 779 laddr->s_addr = ia->ia_addr.sin_addr.s_addr; 780 IF_ADDR_UNLOCK(ifp); 781 goto done; 782 } 783 IF_ADDR_UNLOCK(ifp); 784 785 /* 3. As a last resort return the 'default' jail address. */ 786 error = prison_get_ip4(cred, laddr); 787 goto done; 788 } 789 790 /* 791 * The outgoing interface is marked with 'loopback net', so a route 792 * to ourselves is here. 793 * Try to find the interface of the destination address and then 794 * take the address from there. That interface is not necessarily 795 * a loopback interface. 796 * In case of jails, check that it is an address of the jail 797 * and if we cannot find, fall back to the 'default' jail address. 798 */ 799 if ((sro.ro_rt->rt_ifp->if_flags & IFF_LOOPBACK) != 0) { 800 struct sockaddr_in sain; 801 struct in_ifaddr *ia; 802 803 bzero(&sain, sizeof(struct sockaddr_in)); 804 sain.sin_family = AF_INET; 805 sain.sin_len = sizeof(struct sockaddr_in); 806 sain.sin_addr.s_addr = faddr->s_addr; 807 808 ia = ifatoia(ifa_ifwithdstaddr(sintosa(&sain))); 809 if (ia == NULL) 810 ia = ifatoia(ifa_ifwithnet(sintosa(&sain), 0)); 811 if (ia == NULL) 812 ia = ifatoia(ifa_ifwithaddr(sintosa(&sain))); 813 814 if (cred == NULL || !prison_flag(cred, PR_IP4)) { 815 if (ia == NULL) { 816 error = ENETUNREACH; 817 goto done; 818 } 819 laddr->s_addr = ia->ia_addr.sin_addr.s_addr; 820 ifa_free(&ia->ia_ifa); 821 goto done; 822 } 823 824 /* Jailed. */ 825 if (ia != NULL) { 826 struct ifnet *ifp; 827 828 ifp = ia->ia_ifp; 829 ifa_free(&ia->ia_ifa); 830 ia = NULL; 831 IF_ADDR_LOCK(ifp); 832 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 833 834 sa = ifa->ifa_addr; 835 if (sa->sa_family != AF_INET) 836 continue; 837 sin = (struct sockaddr_in *)sa; 838 if (prison_check_ip4(cred, 839 &sin->sin_addr) == 0) { 840 ia = (struct in_ifaddr *)ifa; 841 break; 842 } 843 } 844 if (ia != NULL) { 845 laddr->s_addr = ia->ia_addr.sin_addr.s_addr; 846 IF_ADDR_UNLOCK(ifp); 847 goto done; 848 } 849 IF_ADDR_UNLOCK(ifp); 850 } 851 852 /* 3. As a last resort return the 'default' jail address. */ 853 error = prison_get_ip4(cred, laddr); 854 goto done; 855 } 856 857 done: 858 if (sro.ro_rt != NULL) 859 RTFREE(sro.ro_rt); 860 return (error); 861 } 862 863 /* 864 * Set up for a connect from a socket to the specified address. 865 * On entry, *laddrp and *lportp should contain the current local 866 * address and port for the PCB; these are updated to the values 867 * that should be placed in inp_laddr and inp_lport to complete 868 * the connect. 869 * 870 * On success, *faddrp and *fportp will be set to the remote address 871 * and port. These are not updated in the error case. 872 * 873 * If the operation fails because the connection already exists, 874 * *oinpp will be set to the PCB of that connection so that the 875 * caller can decide to override it. In all other cases, *oinpp 876 * is set to NULL. 877 */ 878 int 879 in_pcbconnect_setup(struct inpcb *inp, struct sockaddr *nam, 880 in_addr_t *laddrp, u_short *lportp, in_addr_t *faddrp, u_short *fportp, 881 struct inpcb **oinpp, struct ucred *cred) 882 { 883 struct sockaddr_in *sin = (struct sockaddr_in *)nam; 884 struct in_ifaddr *ia; 885 struct inpcb *oinp; 886 struct in_addr laddr, faddr; 887 u_short lport, fport; 888 int error; 889 890 /* 891 * Because a global state change doesn't actually occur here, a read 892 * lock is sufficient. 893 */ 894 INP_INFO_LOCK_ASSERT(inp->inp_pcbinfo); 895 INP_LOCK_ASSERT(inp); 896 897 if (oinpp != NULL) 898 *oinpp = NULL; 899 if (nam->sa_len != sizeof (*sin)) 900 return (EINVAL); 901 if (sin->sin_family != AF_INET) 902 return (EAFNOSUPPORT); 903 if (sin->sin_port == 0) 904 return (EADDRNOTAVAIL); 905 laddr.s_addr = *laddrp; 906 lport = *lportp; 907 faddr = sin->sin_addr; 908 fport = sin->sin_port; 909 910 if (!TAILQ_EMPTY(&V_in_ifaddrhead)) { 911 /* 912 * If the destination address is INADDR_ANY, 913 * use the primary local address. 914 * If the supplied address is INADDR_BROADCAST, 915 * and the primary interface supports broadcast, 916 * choose the broadcast address for that interface. 917 */ 918 if (faddr.s_addr == INADDR_ANY) { 919 IN_IFADDR_RLOCK(); 920 faddr = 921 IA_SIN(TAILQ_FIRST(&V_in_ifaddrhead))->sin_addr; 922 IN_IFADDR_RUNLOCK(); 923 if (cred != NULL && 924 (error = prison_get_ip4(cred, &faddr)) != 0) 925 return (error); 926 } else if (faddr.s_addr == (u_long)INADDR_BROADCAST) { 927 IN_IFADDR_RLOCK(); 928 if (TAILQ_FIRST(&V_in_ifaddrhead)->ia_ifp->if_flags & 929 IFF_BROADCAST) 930 faddr = satosin(&TAILQ_FIRST( 931 &V_in_ifaddrhead)->ia_broadaddr)->sin_addr; 932 IN_IFADDR_RUNLOCK(); 933 } 934 } 935 if (laddr.s_addr == INADDR_ANY) { 936 error = in_pcbladdr(inp, &faddr, &laddr, cred); 937 /* 938 * If the destination address is multicast and an outgoing 939 * interface has been set as a multicast option, prefer the 940 * address of that interface as our source address. 941 */ 942 if (IN_MULTICAST(ntohl(faddr.s_addr)) && 943 inp->inp_moptions != NULL) { 944 struct ip_moptions *imo; 945 struct ifnet *ifp; 946 947 imo = inp->inp_moptions; 948 if (imo->imo_multicast_ifp != NULL) { 949 ifp = imo->imo_multicast_ifp; 950 IN_IFADDR_RLOCK(); 951 TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { 952 if ((ia->ia_ifp == ifp) && 953 (cred == NULL || 954 prison_check_ip4(cred, 955 &ia->ia_addr.sin_addr) == 0)) 956 break; 957 } 958 if (ia == NULL) 959 error = EADDRNOTAVAIL; 960 else { 961 laddr = ia->ia_addr.sin_addr; 962 error = 0; 963 } 964 IN_IFADDR_RUNLOCK(); 965 } 966 } 967 if (error) 968 return (error); 969 } 970 oinp = in_pcblookup_hash(inp->inp_pcbinfo, faddr, fport, laddr, lport, 971 0, NULL); 972 if (oinp != NULL) { 973 if (oinpp != NULL) 974 *oinpp = oinp; 975 return (EADDRINUSE); 976 } 977 if (lport == 0) { 978 error = in_pcbbind_setup(inp, NULL, &laddr.s_addr, &lport, 979 cred); 980 if (error) 981 return (error); 982 } 983 *laddrp = laddr.s_addr; 984 *lportp = lport; 985 *faddrp = faddr.s_addr; 986 *fportp = fport; 987 return (0); 988 } 989 990 void 991 in_pcbdisconnect(struct inpcb *inp) 992 { 993 994 INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo); 995 INP_WLOCK_ASSERT(inp); 996 997 inp->inp_faddr.s_addr = INADDR_ANY; 998 inp->inp_fport = 0; 999 in_pcbrehash(inp); 1000 } 1001 1002 /* 1003 * in_pcbdetach() is responsibe for disassociating a socket from an inpcb. 1004 * For most protocols, this will be invoked immediately prior to calling 1005 * in_pcbfree(). However, with TCP the inpcb may significantly outlive the 1006 * socket, in which case in_pcbfree() is deferred. 1007 */ 1008 void 1009 in_pcbdetach(struct inpcb *inp) 1010 { 1011 1012 KASSERT(inp->inp_socket != NULL, ("%s: inp_socket == NULL", __func__)); 1013 1014 inp->inp_socket->so_pcb = NULL; 1015 inp->inp_socket = NULL; 1016 } 1017 1018 /* 1019 * in_pcbfree_internal() frees an inpcb that has been detached from its 1020 * socket, and whose reference count has reached 0. It will also remove the 1021 * inpcb from any global lists it might remain on. 1022 */ 1023 static void 1024 in_pcbfree_internal(struct inpcb *inp) 1025 { 1026 struct inpcbinfo *ipi = inp->inp_pcbinfo; 1027 1028 KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", __func__)); 1029 KASSERT(inp->inp_refcount == 0, ("%s: refcount !0", __func__)); 1030 1031 INP_INFO_WLOCK_ASSERT(ipi); 1032 INP_WLOCK_ASSERT(inp); 1033 1034 #ifdef IPSEC 1035 if (inp->inp_sp != NULL) 1036 ipsec_delete_pcbpolicy(inp); 1037 #endif /* IPSEC */ 1038 inp->inp_gencnt = ++ipi->ipi_gencnt; 1039 in_pcbremlists(inp); 1040 #ifdef INET6 1041 if (inp->inp_vflag & INP_IPV6PROTO) { 1042 ip6_freepcbopts(inp->in6p_outputopts); 1043 if (inp->in6p_moptions != NULL) 1044 ip6_freemoptions(inp->in6p_moptions); 1045 } 1046 #endif 1047 if (inp->inp_options) 1048 (void)m_free(inp->inp_options); 1049 if (inp->inp_moptions != NULL) 1050 inp_freemoptions(inp->inp_moptions); 1051 inp->inp_vflag = 0; 1052 crfree(inp->inp_cred); 1053 1054 #ifdef MAC 1055 mac_inpcb_destroy(inp); 1056 #endif 1057 INP_WUNLOCK(inp); 1058 uma_zfree(ipi->ipi_zone, inp); 1059 } 1060 1061 /* 1062 * in_pcbref() bumps the reference count on an inpcb in order to maintain 1063 * stability of an inpcb pointer despite the inpcb lock being released. This 1064 * is used in TCP when the inpcbinfo lock needs to be acquired or upgraded, 1065 * but where the inpcb lock is already held. 1066 * 1067 * While the inpcb will not be freed, releasing the inpcb lock means that the 1068 * connection's state may change, so the caller should be careful to 1069 * revalidate any cached state on reacquiring the lock. Drop the reference 1070 * using in_pcbrele(). 1071 */ 1072 void 1073 in_pcbref(struct inpcb *inp) 1074 { 1075 1076 INP_WLOCK_ASSERT(inp); 1077 1078 KASSERT(inp->inp_refcount > 0, ("%s: refcount 0", __func__)); 1079 1080 inp->inp_refcount++; 1081 } 1082 1083 /* 1084 * Drop a refcount on an inpcb elevated using in_pcbref(); because a call to 1085 * in_pcbfree() may have been made between in_pcbref() and in_pcbrele(), we 1086 * return a flag indicating whether or not the inpcb remains valid. If it is 1087 * valid, we return with the inpcb lock held. 1088 */ 1089 int 1090 in_pcbrele(struct inpcb *inp) 1091 { 1092 #ifdef INVARIANTS 1093 struct inpcbinfo *ipi = inp->inp_pcbinfo; 1094 #endif 1095 1096 KASSERT(inp->inp_refcount > 0, ("%s: refcount 0", __func__)); 1097 1098 INP_INFO_WLOCK_ASSERT(ipi); 1099 INP_WLOCK_ASSERT(inp); 1100 1101 inp->inp_refcount--; 1102 if (inp->inp_refcount > 0) 1103 return (0); 1104 in_pcbfree_internal(inp); 1105 return (1); 1106 } 1107 1108 /* 1109 * Unconditionally schedule an inpcb to be freed by decrementing its 1110 * reference count, which should occur only after the inpcb has been detached 1111 * from its socket. If another thread holds a temporary reference (acquired 1112 * using in_pcbref()) then the free is deferred until that reference is 1113 * released using in_pcbrele(), but the inpcb is still unlocked. 1114 */ 1115 void 1116 in_pcbfree(struct inpcb *inp) 1117 { 1118 #ifdef INVARIANTS 1119 struct inpcbinfo *ipi = inp->inp_pcbinfo; 1120 #endif 1121 1122 KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", 1123 __func__)); 1124 1125 INP_INFO_WLOCK_ASSERT(ipi); 1126 INP_WLOCK_ASSERT(inp); 1127 1128 if (!in_pcbrele(inp)) 1129 INP_WUNLOCK(inp); 1130 } 1131 1132 /* 1133 * in_pcbdrop() removes an inpcb from hashed lists, releasing its address and 1134 * port reservation, and preventing it from being returned by inpcb lookups. 1135 * 1136 * It is used by TCP to mark an inpcb as unused and avoid future packet 1137 * delivery or event notification when a socket remains open but TCP has 1138 * closed. This might occur as a result of a shutdown()-initiated TCP close 1139 * or a RST on the wire, and allows the port binding to be reused while still 1140 * maintaining the invariant that so_pcb always points to a valid inpcb until 1141 * in_pcbdetach(). 1142 * 1143 * XXXRW: Possibly in_pcbdrop() should also prevent future notifications by 1144 * in_pcbnotifyall() and in_pcbpurgeif0()? 1145 */ 1146 void 1147 in_pcbdrop(struct inpcb *inp) 1148 { 1149 1150 INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo); 1151 INP_WLOCK_ASSERT(inp); 1152 1153 inp->inp_flags |= INP_DROPPED; 1154 if (inp->inp_flags & INP_INHASHLIST) { 1155 struct inpcbport *phd = inp->inp_phd; 1156 1157 LIST_REMOVE(inp, inp_hash); 1158 LIST_REMOVE(inp, inp_portlist); 1159 if (LIST_FIRST(&phd->phd_pcblist) == NULL) { 1160 LIST_REMOVE(phd, phd_hash); 1161 free(phd, M_PCB); 1162 } 1163 inp->inp_flags &= ~INP_INHASHLIST; 1164 } 1165 } 1166 1167 /* 1168 * Common routines to return the socket addresses associated with inpcbs. 1169 */ 1170 struct sockaddr * 1171 in_sockaddr(in_port_t port, struct in_addr *addr_p) 1172 { 1173 struct sockaddr_in *sin; 1174 1175 sin = malloc(sizeof *sin, M_SONAME, 1176 M_WAITOK | M_ZERO); 1177 sin->sin_family = AF_INET; 1178 sin->sin_len = sizeof(*sin); 1179 sin->sin_addr = *addr_p; 1180 sin->sin_port = port; 1181 1182 return (struct sockaddr *)sin; 1183 } 1184 1185 int 1186 in_getsockaddr(struct socket *so, struct sockaddr **nam) 1187 { 1188 struct inpcb *inp; 1189 struct in_addr addr; 1190 in_port_t port; 1191 1192 inp = sotoinpcb(so); 1193 KASSERT(inp != NULL, ("in_getsockaddr: inp == NULL")); 1194 1195 INP_RLOCK(inp); 1196 port = inp->inp_lport; 1197 addr = inp->inp_laddr; 1198 INP_RUNLOCK(inp); 1199 1200 *nam = in_sockaddr(port, &addr); 1201 return 0; 1202 } 1203 1204 int 1205 in_getpeeraddr(struct socket *so, struct sockaddr **nam) 1206 { 1207 struct inpcb *inp; 1208 struct in_addr addr; 1209 in_port_t port; 1210 1211 inp = sotoinpcb(so); 1212 KASSERT(inp != NULL, ("in_getpeeraddr: inp == NULL")); 1213 1214 INP_RLOCK(inp); 1215 port = inp->inp_fport; 1216 addr = inp->inp_faddr; 1217 INP_RUNLOCK(inp); 1218 1219 *nam = in_sockaddr(port, &addr); 1220 return 0; 1221 } 1222 1223 void 1224 in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr, int errno, 1225 struct inpcb *(*notify)(struct inpcb *, int)) 1226 { 1227 struct inpcb *inp, *inp_temp; 1228 1229 INP_INFO_WLOCK(pcbinfo); 1230 LIST_FOREACH_SAFE(inp, pcbinfo->ipi_listhead, inp_list, inp_temp) { 1231 INP_WLOCK(inp); 1232 #ifdef INET6 1233 if ((inp->inp_vflag & INP_IPV4) == 0) { 1234 INP_WUNLOCK(inp); 1235 continue; 1236 } 1237 #endif 1238 if (inp->inp_faddr.s_addr != faddr.s_addr || 1239 inp->inp_socket == NULL) { 1240 INP_WUNLOCK(inp); 1241 continue; 1242 } 1243 if ((*notify)(inp, errno)) 1244 INP_WUNLOCK(inp); 1245 } 1246 INP_INFO_WUNLOCK(pcbinfo); 1247 } 1248 1249 void 1250 in_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp) 1251 { 1252 struct inpcb *inp; 1253 struct ip_moptions *imo; 1254 int i, gap; 1255 1256 INP_INFO_RLOCK(pcbinfo); 1257 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) { 1258 INP_WLOCK(inp); 1259 imo = inp->inp_moptions; 1260 if ((inp->inp_vflag & INP_IPV4) && 1261 imo != NULL) { 1262 /* 1263 * Unselect the outgoing interface if it is being 1264 * detached. 1265 */ 1266 if (imo->imo_multicast_ifp == ifp) 1267 imo->imo_multicast_ifp = NULL; 1268 1269 /* 1270 * Drop multicast group membership if we joined 1271 * through the interface being detached. 1272 */ 1273 for (i = 0, gap = 0; i < imo->imo_num_memberships; 1274 i++) { 1275 if (imo->imo_membership[i]->inm_ifp == ifp) { 1276 in_delmulti(imo->imo_membership[i]); 1277 gap++; 1278 } else if (gap != 0) 1279 imo->imo_membership[i - gap] = 1280 imo->imo_membership[i]; 1281 } 1282 imo->imo_num_memberships -= gap; 1283 } 1284 INP_WUNLOCK(inp); 1285 } 1286 INP_INFO_RUNLOCK(pcbinfo); 1287 } 1288 1289 /* 1290 * Lookup a PCB based on the local address and port. 1291 */ 1292 #define INP_LOOKUP_MAPPED_PCB_COST 3 1293 struct inpcb * 1294 in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr, 1295 u_short lport, int wild_okay, struct ucred *cred) 1296 { 1297 struct inpcb *inp; 1298 #ifdef INET6 1299 int matchwild = 3 + INP_LOOKUP_MAPPED_PCB_COST; 1300 #else 1301 int matchwild = 3; 1302 #endif 1303 int wildcard; 1304 1305 INP_INFO_LOCK_ASSERT(pcbinfo); 1306 1307 if (!wild_okay) { 1308 struct inpcbhead *head; 1309 /* 1310 * Look for an unconnected (wildcard foreign addr) PCB that 1311 * matches the local address and port we're looking for. 1312 */ 1313 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 1314 0, pcbinfo->ipi_hashmask)]; 1315 LIST_FOREACH(inp, head, inp_hash) { 1316 #ifdef INET6 1317 /* XXX inp locking */ 1318 if ((inp->inp_vflag & INP_IPV4) == 0) 1319 continue; 1320 #endif 1321 if (inp->inp_faddr.s_addr == INADDR_ANY && 1322 inp->inp_laddr.s_addr == laddr.s_addr && 1323 inp->inp_lport == lport) { 1324 /* 1325 * Found? 1326 */ 1327 if (cred == NULL || 1328 prison_equal_ip4(cred->cr_prison, 1329 inp->inp_cred->cr_prison)) 1330 return (inp); 1331 } 1332 } 1333 /* 1334 * Not found. 1335 */ 1336 return (NULL); 1337 } else { 1338 struct inpcbporthead *porthash; 1339 struct inpcbport *phd; 1340 struct inpcb *match = NULL; 1341 /* 1342 * Best fit PCB lookup. 1343 * 1344 * First see if this local port is in use by looking on the 1345 * port hash list. 1346 */ 1347 porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport, 1348 pcbinfo->ipi_porthashmask)]; 1349 LIST_FOREACH(phd, porthash, phd_hash) { 1350 if (phd->phd_port == lport) 1351 break; 1352 } 1353 if (phd != NULL) { 1354 /* 1355 * Port is in use by one or more PCBs. Look for best 1356 * fit. 1357 */ 1358 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { 1359 wildcard = 0; 1360 if (cred != NULL && 1361 !prison_equal_ip4(inp->inp_cred->cr_prison, 1362 cred->cr_prison)) 1363 continue; 1364 #ifdef INET6 1365 /* XXX inp locking */ 1366 if ((inp->inp_vflag & INP_IPV4) == 0) 1367 continue; 1368 /* 1369 * We never select the PCB that has 1370 * INP_IPV6 flag and is bound to :: if 1371 * we have another PCB which is bound 1372 * to 0.0.0.0. If a PCB has the 1373 * INP_IPV6 flag, then we set its cost 1374 * higher than IPv4 only PCBs. 1375 * 1376 * Note that the case only happens 1377 * when a socket is bound to ::, under 1378 * the condition that the use of the 1379 * mapped address is allowed. 1380 */ 1381 if ((inp->inp_vflag & INP_IPV6) != 0) 1382 wildcard += INP_LOOKUP_MAPPED_PCB_COST; 1383 #endif 1384 if (inp->inp_faddr.s_addr != INADDR_ANY) 1385 wildcard++; 1386 if (inp->inp_laddr.s_addr != INADDR_ANY) { 1387 if (laddr.s_addr == INADDR_ANY) 1388 wildcard++; 1389 else if (inp->inp_laddr.s_addr != laddr.s_addr) 1390 continue; 1391 } else { 1392 if (laddr.s_addr != INADDR_ANY) 1393 wildcard++; 1394 } 1395 if (wildcard < matchwild) { 1396 match = inp; 1397 matchwild = wildcard; 1398 if (matchwild == 0) 1399 break; 1400 } 1401 } 1402 } 1403 return (match); 1404 } 1405 } 1406 #undef INP_LOOKUP_MAPPED_PCB_COST 1407 1408 /* 1409 * Lookup PCB in hash list. 1410 */ 1411 struct inpcb * 1412 in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr, 1413 u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard, 1414 struct ifnet *ifp) 1415 { 1416 struct inpcbhead *head; 1417 struct inpcb *inp, *tmpinp; 1418 u_short fport = fport_arg, lport = lport_arg; 1419 1420 INP_INFO_LOCK_ASSERT(pcbinfo); 1421 1422 /* 1423 * First look for an exact match. 1424 */ 1425 tmpinp = NULL; 1426 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, 1427 pcbinfo->ipi_hashmask)]; 1428 LIST_FOREACH(inp, head, inp_hash) { 1429 #ifdef INET6 1430 /* XXX inp locking */ 1431 if ((inp->inp_vflag & INP_IPV4) == 0) 1432 continue; 1433 #endif 1434 if (inp->inp_faddr.s_addr == faddr.s_addr && 1435 inp->inp_laddr.s_addr == laddr.s_addr && 1436 inp->inp_fport == fport && 1437 inp->inp_lport == lport) { 1438 /* 1439 * XXX We should be able to directly return 1440 * the inp here, without any checks. 1441 * Well unless both bound with SO_REUSEPORT? 1442 */ 1443 if (prison_flag(inp->inp_cred, PR_IP4)) 1444 return (inp); 1445 if (tmpinp == NULL) 1446 tmpinp = inp; 1447 } 1448 } 1449 if (tmpinp != NULL) 1450 return (tmpinp); 1451 1452 /* 1453 * Then look for a wildcard match, if requested. 1454 */ 1455 if (wildcard == INPLOOKUP_WILDCARD) { 1456 struct inpcb *local_wild = NULL, *local_exact = NULL; 1457 #ifdef INET6 1458 struct inpcb *local_wild_mapped = NULL; 1459 #endif 1460 struct inpcb *jail_wild = NULL; 1461 int injail; 1462 1463 /* 1464 * Order of socket selection - we always prefer jails. 1465 * 1. jailed, non-wild. 1466 * 2. jailed, wild. 1467 * 3. non-jailed, non-wild. 1468 * 4. non-jailed, wild. 1469 */ 1470 1471 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 1472 0, pcbinfo->ipi_hashmask)]; 1473 LIST_FOREACH(inp, head, inp_hash) { 1474 #ifdef INET6 1475 /* XXX inp locking */ 1476 if ((inp->inp_vflag & INP_IPV4) == 0) 1477 continue; 1478 #endif 1479 if (inp->inp_faddr.s_addr != INADDR_ANY || 1480 inp->inp_lport != lport) 1481 continue; 1482 1483 /* XXX inp locking */ 1484 if (ifp && ifp->if_type == IFT_FAITH && 1485 (inp->inp_flags & INP_FAITH) == 0) 1486 continue; 1487 1488 injail = prison_flag(inp->inp_cred, PR_IP4); 1489 if (injail) { 1490 if (prison_check_ip4(inp->inp_cred, 1491 &laddr) != 0) 1492 continue; 1493 } else { 1494 if (local_exact != NULL) 1495 continue; 1496 } 1497 1498 if (inp->inp_laddr.s_addr == laddr.s_addr) { 1499 if (injail) 1500 return (inp); 1501 else 1502 local_exact = inp; 1503 } else if (inp->inp_laddr.s_addr == INADDR_ANY) { 1504 #ifdef INET6 1505 /* XXX inp locking, NULL check */ 1506 if (inp->inp_vflag & INP_IPV6PROTO) 1507 local_wild_mapped = inp; 1508 else 1509 #endif /* INET6 */ 1510 if (injail) 1511 jail_wild = inp; 1512 else 1513 local_wild = inp; 1514 } 1515 } /* LIST_FOREACH */ 1516 if (jail_wild != NULL) 1517 return (jail_wild); 1518 if (local_exact != NULL) 1519 return (local_exact); 1520 if (local_wild != NULL) 1521 return (local_wild); 1522 #ifdef INET6 1523 if (local_wild_mapped != NULL) 1524 return (local_wild_mapped); 1525 #endif /* defined(INET6) */ 1526 } /* if (wildcard == INPLOOKUP_WILDCARD) */ 1527 1528 return (NULL); 1529 } 1530 1531 /* 1532 * Insert PCB onto various hash lists. 1533 */ 1534 int 1535 in_pcbinshash(struct inpcb *inp) 1536 { 1537 struct inpcbhead *pcbhash; 1538 struct inpcbporthead *pcbporthash; 1539 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 1540 struct inpcbport *phd; 1541 u_int32_t hashkey_faddr; 1542 1543 INP_INFO_WLOCK_ASSERT(pcbinfo); 1544 INP_WLOCK_ASSERT(inp); 1545 KASSERT((inp->inp_flags & INP_INHASHLIST) == 0, 1546 ("in_pcbinshash: INP_INHASHLIST")); 1547 1548 #ifdef INET6 1549 if (inp->inp_vflag & INP_IPV6) 1550 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */; 1551 else 1552 #endif /* INET6 */ 1553 hashkey_faddr = inp->inp_faddr.s_addr; 1554 1555 pcbhash = &pcbinfo->ipi_hashbase[INP_PCBHASH(hashkey_faddr, 1556 inp->inp_lport, inp->inp_fport, pcbinfo->ipi_hashmask)]; 1557 1558 pcbporthash = &pcbinfo->ipi_porthashbase[ 1559 INP_PCBPORTHASH(inp->inp_lport, pcbinfo->ipi_porthashmask)]; 1560 1561 /* 1562 * Go through port list and look for a head for this lport. 1563 */ 1564 LIST_FOREACH(phd, pcbporthash, phd_hash) { 1565 if (phd->phd_port == inp->inp_lport) 1566 break; 1567 } 1568 /* 1569 * If none exists, malloc one and tack it on. 1570 */ 1571 if (phd == NULL) { 1572 phd = malloc(sizeof(struct inpcbport), M_PCB, M_NOWAIT); 1573 if (phd == NULL) { 1574 return (ENOBUFS); /* XXX */ 1575 } 1576 phd->phd_port = inp->inp_lport; 1577 LIST_INIT(&phd->phd_pcblist); 1578 LIST_INSERT_HEAD(pcbporthash, phd, phd_hash); 1579 } 1580 inp->inp_phd = phd; 1581 LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist); 1582 LIST_INSERT_HEAD(pcbhash, inp, inp_hash); 1583 inp->inp_flags |= INP_INHASHLIST; 1584 return (0); 1585 } 1586 1587 /* 1588 * Move PCB to the proper hash bucket when { faddr, fport } have been 1589 * changed. NOTE: This does not handle the case of the lport changing (the 1590 * hashed port list would have to be updated as well), so the lport must 1591 * not change after in_pcbinshash() has been called. 1592 */ 1593 void 1594 in_pcbrehash(struct inpcb *inp) 1595 { 1596 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 1597 struct inpcbhead *head; 1598 u_int32_t hashkey_faddr; 1599 1600 INP_INFO_WLOCK_ASSERT(pcbinfo); 1601 INP_WLOCK_ASSERT(inp); 1602 KASSERT(inp->inp_flags & INP_INHASHLIST, 1603 ("in_pcbrehash: !INP_INHASHLIST")); 1604 1605 #ifdef INET6 1606 if (inp->inp_vflag & INP_IPV6) 1607 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */; 1608 else 1609 #endif /* INET6 */ 1610 hashkey_faddr = inp->inp_faddr.s_addr; 1611 1612 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(hashkey_faddr, 1613 inp->inp_lport, inp->inp_fport, pcbinfo->ipi_hashmask)]; 1614 1615 LIST_REMOVE(inp, inp_hash); 1616 LIST_INSERT_HEAD(head, inp, inp_hash); 1617 } 1618 1619 /* 1620 * Remove PCB from various lists. 1621 */ 1622 static void 1623 in_pcbremlists(struct inpcb *inp) 1624 { 1625 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 1626 1627 INP_INFO_WLOCK_ASSERT(pcbinfo); 1628 INP_WLOCK_ASSERT(inp); 1629 1630 inp->inp_gencnt = ++pcbinfo->ipi_gencnt; 1631 if (inp->inp_flags & INP_INHASHLIST) { 1632 struct inpcbport *phd = inp->inp_phd; 1633 1634 LIST_REMOVE(inp, inp_hash); 1635 LIST_REMOVE(inp, inp_portlist); 1636 if (LIST_FIRST(&phd->phd_pcblist) == NULL) { 1637 LIST_REMOVE(phd, phd_hash); 1638 free(phd, M_PCB); 1639 } 1640 inp->inp_flags &= ~INP_INHASHLIST; 1641 } 1642 LIST_REMOVE(inp, inp_list); 1643 pcbinfo->ipi_count--; 1644 } 1645 1646 /* 1647 * A set label operation has occurred at the socket layer, propagate the 1648 * label change into the in_pcb for the socket. 1649 */ 1650 void 1651 in_pcbsosetlabel(struct socket *so) 1652 { 1653 #ifdef MAC 1654 struct inpcb *inp; 1655 1656 inp = sotoinpcb(so); 1657 KASSERT(inp != NULL, ("in_pcbsosetlabel: so->so_pcb == NULL")); 1658 1659 INP_WLOCK(inp); 1660 SOCK_LOCK(so); 1661 mac_inpcb_sosetlabel(so, inp); 1662 SOCK_UNLOCK(so); 1663 INP_WUNLOCK(inp); 1664 #endif 1665 } 1666 1667 /* 1668 * ipport_tick runs once per second, determining if random port allocation 1669 * should be continued. If more than ipport_randomcps ports have been 1670 * allocated in the last second, then we return to sequential port 1671 * allocation. We return to random allocation only once we drop below 1672 * ipport_randomcps for at least ipport_randomtime seconds. 1673 */ 1674 static void 1675 ipport_tick(void *xtp) 1676 { 1677 VNET_ITERATOR_DECL(vnet_iter); 1678 1679 VNET_LIST_RLOCK_NOSLEEP(); 1680 VNET_FOREACH(vnet_iter) { 1681 CURVNET_SET(vnet_iter); /* XXX appease INVARIANTS here */ 1682 if (V_ipport_tcpallocs <= 1683 V_ipport_tcplastcount + V_ipport_randomcps) { 1684 if (V_ipport_stoprandom > 0) 1685 V_ipport_stoprandom--; 1686 } else 1687 V_ipport_stoprandom = V_ipport_randomtime; 1688 V_ipport_tcplastcount = V_ipport_tcpallocs; 1689 CURVNET_RESTORE(); 1690 } 1691 VNET_LIST_RUNLOCK_NOSLEEP(); 1692 callout_reset(&ipport_tick_callout, hz, ipport_tick, NULL); 1693 } 1694 1695 static void 1696 ip_fini(void *xtp) 1697 { 1698 1699 callout_stop(&ipport_tick_callout); 1700 } 1701 1702 /* 1703 * The ipport_callout should start running at about the time we attach the 1704 * inet or inet6 domains. 1705 */ 1706 static void 1707 ipport_tick_init(const void *unused __unused) 1708 { 1709 1710 /* Start ipport_tick. */ 1711 callout_init(&ipport_tick_callout, CALLOUT_MPSAFE); 1712 callout_reset(&ipport_tick_callout, 1, ipport_tick, NULL); 1713 EVENTHANDLER_REGISTER(shutdown_pre_sync, ip_fini, NULL, 1714 SHUTDOWN_PRI_DEFAULT); 1715 } 1716 SYSINIT(ipport_tick_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_MIDDLE, 1717 ipport_tick_init, NULL); 1718 1719 void 1720 inp_wlock(struct inpcb *inp) 1721 { 1722 1723 INP_WLOCK(inp); 1724 } 1725 1726 void 1727 inp_wunlock(struct inpcb *inp) 1728 { 1729 1730 INP_WUNLOCK(inp); 1731 } 1732 1733 void 1734 inp_rlock(struct inpcb *inp) 1735 { 1736 1737 INP_RLOCK(inp); 1738 } 1739 1740 void 1741 inp_runlock(struct inpcb *inp) 1742 { 1743 1744 INP_RUNLOCK(inp); 1745 } 1746 1747 #ifdef INVARIANTS 1748 void 1749 inp_lock_assert(struct inpcb *inp) 1750 { 1751 1752 INP_WLOCK_ASSERT(inp); 1753 } 1754 1755 void 1756 inp_unlock_assert(struct inpcb *inp) 1757 { 1758 1759 INP_UNLOCK_ASSERT(inp); 1760 } 1761 #endif 1762 1763 void 1764 inp_apply_all(void (*func)(struct inpcb *, void *), void *arg) 1765 { 1766 struct inpcb *inp; 1767 1768 INP_INFO_RLOCK(&V_tcbinfo); 1769 LIST_FOREACH(inp, V_tcbinfo.ipi_listhead, inp_list) { 1770 INP_WLOCK(inp); 1771 func(inp, arg); 1772 INP_WUNLOCK(inp); 1773 } 1774 INP_INFO_RUNLOCK(&V_tcbinfo); 1775 } 1776 1777 struct socket * 1778 inp_inpcbtosocket(struct inpcb *inp) 1779 { 1780 1781 INP_WLOCK_ASSERT(inp); 1782 return (inp->inp_socket); 1783 } 1784 1785 struct tcpcb * 1786 inp_inpcbtotcpcb(struct inpcb *inp) 1787 { 1788 1789 INP_WLOCK_ASSERT(inp); 1790 return ((struct tcpcb *)inp->inp_ppcb); 1791 } 1792 1793 int 1794 inp_ip_tos_get(const struct inpcb *inp) 1795 { 1796 1797 return (inp->inp_ip_tos); 1798 } 1799 1800 void 1801 inp_ip_tos_set(struct inpcb *inp, int val) 1802 { 1803 1804 inp->inp_ip_tos = val; 1805 } 1806 1807 void 1808 inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp, 1809 uint32_t *faddr, uint16_t *fp) 1810 { 1811 1812 INP_LOCK_ASSERT(inp); 1813 *laddr = inp->inp_laddr.s_addr; 1814 *faddr = inp->inp_faddr.s_addr; 1815 *lp = inp->inp_lport; 1816 *fp = inp->inp_fport; 1817 } 1818 1819 struct inpcb * 1820 so_sotoinpcb(struct socket *so) 1821 { 1822 1823 return (sotoinpcb(so)); 1824 } 1825 1826 struct tcpcb * 1827 so_sototcpcb(struct socket *so) 1828 { 1829 1830 return (sototcpcb(so)); 1831 } 1832 1833 #ifdef DDB 1834 static void 1835 db_print_indent(int indent) 1836 { 1837 int i; 1838 1839 for (i = 0; i < indent; i++) 1840 db_printf(" "); 1841 } 1842 1843 static void 1844 db_print_inconninfo(struct in_conninfo *inc, const char *name, int indent) 1845 { 1846 char faddr_str[48], laddr_str[48]; 1847 1848 db_print_indent(indent); 1849 db_printf("%s at %p\n", name, inc); 1850 1851 indent += 2; 1852 1853 #ifdef INET6 1854 if (inc->inc_flags & INC_ISIPV6) { 1855 /* IPv6. */ 1856 ip6_sprintf(laddr_str, &inc->inc6_laddr); 1857 ip6_sprintf(faddr_str, &inc->inc6_faddr); 1858 } else { 1859 #endif 1860 /* IPv4. */ 1861 inet_ntoa_r(inc->inc_laddr, laddr_str); 1862 inet_ntoa_r(inc->inc_faddr, faddr_str); 1863 #ifdef INET6 1864 } 1865 #endif 1866 db_print_indent(indent); 1867 db_printf("inc_laddr %s inc_lport %u\n", laddr_str, 1868 ntohs(inc->inc_lport)); 1869 db_print_indent(indent); 1870 db_printf("inc_faddr %s inc_fport %u\n", faddr_str, 1871 ntohs(inc->inc_fport)); 1872 } 1873 1874 static void 1875 db_print_inpflags(int inp_flags) 1876 { 1877 int comma; 1878 1879 comma = 0; 1880 if (inp_flags & INP_RECVOPTS) { 1881 db_printf("%sINP_RECVOPTS", comma ? ", " : ""); 1882 comma = 1; 1883 } 1884 if (inp_flags & INP_RECVRETOPTS) { 1885 db_printf("%sINP_RECVRETOPTS", comma ? ", " : ""); 1886 comma = 1; 1887 } 1888 if (inp_flags & INP_RECVDSTADDR) { 1889 db_printf("%sINP_RECVDSTADDR", comma ? ", " : ""); 1890 comma = 1; 1891 } 1892 if (inp_flags & INP_HDRINCL) { 1893 db_printf("%sINP_HDRINCL", comma ? ", " : ""); 1894 comma = 1; 1895 } 1896 if (inp_flags & INP_HIGHPORT) { 1897 db_printf("%sINP_HIGHPORT", comma ? ", " : ""); 1898 comma = 1; 1899 } 1900 if (inp_flags & INP_LOWPORT) { 1901 db_printf("%sINP_LOWPORT", comma ? ", " : ""); 1902 comma = 1; 1903 } 1904 if (inp_flags & INP_ANONPORT) { 1905 db_printf("%sINP_ANONPORT", comma ? ", " : ""); 1906 comma = 1; 1907 } 1908 if (inp_flags & INP_RECVIF) { 1909 db_printf("%sINP_RECVIF", comma ? ", " : ""); 1910 comma = 1; 1911 } 1912 if (inp_flags & INP_MTUDISC) { 1913 db_printf("%sINP_MTUDISC", comma ? ", " : ""); 1914 comma = 1; 1915 } 1916 if (inp_flags & INP_FAITH) { 1917 db_printf("%sINP_FAITH", comma ? ", " : ""); 1918 comma = 1; 1919 } 1920 if (inp_flags & INP_RECVTTL) { 1921 db_printf("%sINP_RECVTTL", comma ? ", " : ""); 1922 comma = 1; 1923 } 1924 if (inp_flags & INP_DONTFRAG) { 1925 db_printf("%sINP_DONTFRAG", comma ? ", " : ""); 1926 comma = 1; 1927 } 1928 if (inp_flags & IN6P_IPV6_V6ONLY) { 1929 db_printf("%sIN6P_IPV6_V6ONLY", comma ? ", " : ""); 1930 comma = 1; 1931 } 1932 if (inp_flags & IN6P_PKTINFO) { 1933 db_printf("%sIN6P_PKTINFO", comma ? ", " : ""); 1934 comma = 1; 1935 } 1936 if (inp_flags & IN6P_HOPLIMIT) { 1937 db_printf("%sIN6P_HOPLIMIT", comma ? ", " : ""); 1938 comma = 1; 1939 } 1940 if (inp_flags & IN6P_HOPOPTS) { 1941 db_printf("%sIN6P_HOPOPTS", comma ? ", " : ""); 1942 comma = 1; 1943 } 1944 if (inp_flags & IN6P_DSTOPTS) { 1945 db_printf("%sIN6P_DSTOPTS", comma ? ", " : ""); 1946 comma = 1; 1947 } 1948 if (inp_flags & IN6P_RTHDR) { 1949 db_printf("%sIN6P_RTHDR", comma ? ", " : ""); 1950 comma = 1; 1951 } 1952 if (inp_flags & IN6P_RTHDRDSTOPTS) { 1953 db_printf("%sIN6P_RTHDRDSTOPTS", comma ? ", " : ""); 1954 comma = 1; 1955 } 1956 if (inp_flags & IN6P_TCLASS) { 1957 db_printf("%sIN6P_TCLASS", comma ? ", " : ""); 1958 comma = 1; 1959 } 1960 if (inp_flags & IN6P_AUTOFLOWLABEL) { 1961 db_printf("%sIN6P_AUTOFLOWLABEL", comma ? ", " : ""); 1962 comma = 1; 1963 } 1964 if (inp_flags & INP_TIMEWAIT) { 1965 db_printf("%sINP_TIMEWAIT", comma ? ", " : ""); 1966 comma = 1; 1967 } 1968 if (inp_flags & INP_ONESBCAST) { 1969 db_printf("%sINP_ONESBCAST", comma ? ", " : ""); 1970 comma = 1; 1971 } 1972 if (inp_flags & INP_DROPPED) { 1973 db_printf("%sINP_DROPPED", comma ? ", " : ""); 1974 comma = 1; 1975 } 1976 if (inp_flags & INP_SOCKREF) { 1977 db_printf("%sINP_SOCKREF", comma ? ", " : ""); 1978 comma = 1; 1979 } 1980 if (inp_flags & IN6P_RFC2292) { 1981 db_printf("%sIN6P_RFC2292", comma ? ", " : ""); 1982 comma = 1; 1983 } 1984 if (inp_flags & IN6P_MTU) { 1985 db_printf("IN6P_MTU%s", comma ? ", " : ""); 1986 comma = 1; 1987 } 1988 } 1989 1990 static void 1991 db_print_inpvflag(u_char inp_vflag) 1992 { 1993 int comma; 1994 1995 comma = 0; 1996 if (inp_vflag & INP_IPV4) { 1997 db_printf("%sINP_IPV4", comma ? ", " : ""); 1998 comma = 1; 1999 } 2000 if (inp_vflag & INP_IPV6) { 2001 db_printf("%sINP_IPV6", comma ? ", " : ""); 2002 comma = 1; 2003 } 2004 if (inp_vflag & INP_IPV6PROTO) { 2005 db_printf("%sINP_IPV6PROTO", comma ? ", " : ""); 2006 comma = 1; 2007 } 2008 } 2009 2010 static void 2011 db_print_inpcb(struct inpcb *inp, const char *name, int indent) 2012 { 2013 2014 db_print_indent(indent); 2015 db_printf("%s at %p\n", name, inp); 2016 2017 indent += 2; 2018 2019 db_print_indent(indent); 2020 db_printf("inp_flow: 0x%x\n", inp->inp_flow); 2021 2022 db_print_inconninfo(&inp->inp_inc, "inp_conninfo", indent); 2023 2024 db_print_indent(indent); 2025 db_printf("inp_ppcb: %p inp_pcbinfo: %p inp_socket: %p\n", 2026 inp->inp_ppcb, inp->inp_pcbinfo, inp->inp_socket); 2027 2028 db_print_indent(indent); 2029 db_printf("inp_label: %p inp_flags: 0x%x (", 2030 inp->inp_label, inp->inp_flags); 2031 db_print_inpflags(inp->inp_flags); 2032 db_printf(")\n"); 2033 2034 db_print_indent(indent); 2035 db_printf("inp_sp: %p inp_vflag: 0x%x (", inp->inp_sp, 2036 inp->inp_vflag); 2037 db_print_inpvflag(inp->inp_vflag); 2038 db_printf(")\n"); 2039 2040 db_print_indent(indent); 2041 db_printf("inp_ip_ttl: %d inp_ip_p: %d inp_ip_minttl: %d\n", 2042 inp->inp_ip_ttl, inp->inp_ip_p, inp->inp_ip_minttl); 2043 2044 db_print_indent(indent); 2045 #ifdef INET6 2046 if (inp->inp_vflag & INP_IPV6) { 2047 db_printf("in6p_options: %p in6p_outputopts: %p " 2048 "in6p_moptions: %p\n", inp->in6p_options, 2049 inp->in6p_outputopts, inp->in6p_moptions); 2050 db_printf("in6p_icmp6filt: %p in6p_cksum %d " 2051 "in6p_hops %u\n", inp->in6p_icmp6filt, inp->in6p_cksum, 2052 inp->in6p_hops); 2053 } else 2054 #endif 2055 { 2056 db_printf("inp_ip_tos: %d inp_ip_options: %p " 2057 "inp_ip_moptions: %p\n", inp->inp_ip_tos, 2058 inp->inp_options, inp->inp_moptions); 2059 } 2060 2061 db_print_indent(indent); 2062 db_printf("inp_phd: %p inp_gencnt: %ju\n", inp->inp_phd, 2063 (uintmax_t)inp->inp_gencnt); 2064 } 2065 2066 DB_SHOW_COMMAND(inpcb, db_show_inpcb) 2067 { 2068 struct inpcb *inp; 2069 2070 if (!have_addr) { 2071 db_printf("usage: show inpcb <addr>\n"); 2072 return; 2073 } 2074 inp = (struct inpcb *)addr; 2075 2076 db_print_inpcb(inp, "inpcb", 0); 2077 } 2078 #endif 2079