1 /* 2 * Copyright (c) 1982, 1986, 1991, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95 30 * $FreeBSD$ 31 */ 32 33 #include "opt_ipsec.h" 34 #include "opt_inet6.h" 35 #include "opt_mac.h" 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/mac.h> 40 #include <sys/malloc.h> 41 #include <sys/mbuf.h> 42 #include <sys/domain.h> 43 #include <sys/protosw.h> 44 #include <sys/socket.h> 45 #include <sys/socketvar.h> 46 #include <sys/proc.h> 47 #include <sys/jail.h> 48 #include <sys/kernel.h> 49 #include <sys/sysctl.h> 50 51 #include <vm/uma.h> 52 53 #include <net/if.h> 54 #include <net/if_types.h> 55 #include <net/route.h> 56 57 #include <netinet/in.h> 58 #include <netinet/in_pcb.h> 59 #include <netinet/in_var.h> 60 #include <netinet/ip_var.h> 61 #include <netinet/tcp_var.h> 62 #ifdef INET6 63 #include <netinet/ip6.h> 64 #include <netinet6/ip6_var.h> 65 #endif /* INET6 */ 66 67 #ifdef IPSEC 68 #include <netinet6/ipsec.h> 69 #include <netkey/key.h> 70 #endif /* IPSEC */ 71 72 #ifdef FAST_IPSEC 73 #if defined(IPSEC) || defined(IPSEC_ESP) 74 #error "Bad idea: don't compile with both IPSEC and FAST_IPSEC!" 75 #endif 76 77 #include <netipsec/ipsec.h> 78 #include <netipsec/key.h> 79 #endif /* FAST_IPSEC */ 80 81 /* 82 * These configure the range of local port addresses assigned to 83 * "unspecified" outgoing connections/packets/whatever. 84 */ 85 int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */ 86 int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */ 87 int ipport_firstauto = IPPORT_HIFIRSTAUTO; /* 49152 */ 88 int ipport_lastauto = IPPORT_HILASTAUTO; /* 65535 */ 89 int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */ 90 int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */ 91 92 /* 93 * Reserved ports accessible only to root. There are significant 94 * security considerations that must be accounted for when changing these, 95 * but the security benefits can be great. Please be careful. 96 */ 97 int ipport_reservedhigh = IPPORT_RESERVED - 1; /* 1023 */ 98 int ipport_reservedlow = 0; 99 100 /* Shall we allocate ephemeral ports in random order? */ 101 int ipport_randomized = 1; 102 103 #define RANGECHK(var, min, max) \ 104 if ((var) < (min)) { (var) = (min); } \ 105 else if ((var) > (max)) { (var) = (max); } 106 107 static int 108 sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS) 109 { 110 int error; 111 112 error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); 113 if (error == 0) { 114 RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1); 115 RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1); 116 RANGECHK(ipport_firstauto, IPPORT_RESERVED, IPPORT_MAX); 117 RANGECHK(ipport_lastauto, IPPORT_RESERVED, IPPORT_MAX); 118 RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, IPPORT_MAX); 119 RANGECHK(ipport_hilastauto, IPPORT_RESERVED, IPPORT_MAX); 120 } 121 return (error); 122 } 123 124 #undef RANGECHK 125 126 SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0, "IP Ports"); 127 128 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, CTLTYPE_INT|CTLFLAG_RW, 129 &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", ""); 130 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, CTLTYPE_INT|CTLFLAG_RW, 131 &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", ""); 132 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, CTLTYPE_INT|CTLFLAG_RW, 133 &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", ""); 134 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, CTLTYPE_INT|CTLFLAG_RW, 135 &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", ""); 136 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLTYPE_INT|CTLFLAG_RW, 137 &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", ""); 138 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW, 139 &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", ""); 140 SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, reservedhigh, 141 CTLFLAG_RW|CTLFLAG_SECURE, &ipport_reservedhigh, 0, ""); 142 SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, reservedlow, 143 CTLFLAG_RW|CTLFLAG_SECURE, &ipport_reservedlow, 0, ""); 144 SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomized, 145 CTLFLAG_RW, &ipport_randomized, 0, ""); 146 147 /* 148 * in_pcb.c: manage the Protocol Control Blocks. 149 * 150 * NOTE: It is assumed that most of these functions will be called at 151 * splnet(). XXX - There are, unfortunately, a few exceptions to this 152 * rule that should be fixed. 153 */ 154 155 /* 156 * Allocate a PCB and associate it with the socket. 157 */ 158 int 159 in_pcballoc(so, pcbinfo, type) 160 struct socket *so; 161 struct inpcbinfo *pcbinfo; 162 const char *type; 163 { 164 register struct inpcb *inp; 165 int error; 166 167 INP_INFO_WLOCK_ASSERT(pcbinfo); 168 error = 0; 169 inp = uma_zalloc(pcbinfo->ipi_zone, M_NOWAIT | M_ZERO); 170 if (inp == NULL) 171 return (ENOBUFS); 172 inp->inp_gencnt = ++pcbinfo->ipi_gencnt; 173 inp->inp_pcbinfo = pcbinfo; 174 inp->inp_socket = so; 175 #ifdef MAC 176 error = mac_init_inpcb(inp, M_NOWAIT); 177 if (error != 0) 178 goto out; 179 mac_create_inpcb_from_socket(so, inp); 180 #endif 181 #if defined(IPSEC) || defined(FAST_IPSEC) 182 #ifdef FAST_IPSEC 183 error = ipsec_init_policy(so, &inp->inp_sp); 184 #else 185 error = ipsec_init_pcbpolicy(so, &inp->inp_sp); 186 #endif 187 if (error != 0) 188 goto out; 189 #endif /*IPSEC*/ 190 #if defined(INET6) 191 if (INP_SOCKAF(so) == AF_INET6) { 192 inp->inp_vflag |= INP_IPV6PROTO; 193 if (ip6_v6only) 194 inp->inp_flags |= IN6P_IPV6_V6ONLY; 195 } 196 #endif 197 LIST_INSERT_HEAD(pcbinfo->listhead, inp, inp_list); 198 pcbinfo->ipi_count++; 199 so->so_pcb = (caddr_t)inp; 200 INP_LOCK_INIT(inp, "inp", type); 201 #ifdef INET6 202 if (ip6_auto_flowlabel) 203 inp->inp_flags |= IN6P_AUTOFLOWLABEL; 204 #endif 205 #if defined(IPSEC) || defined(FAST_IPSEC) || defined(MAC) 206 out: 207 if (error != 0) 208 uma_zfree(pcbinfo->ipi_zone, inp); 209 #endif 210 return (error); 211 } 212 213 int 214 in_pcbbind(inp, nam, cred) 215 register struct inpcb *inp; 216 struct sockaddr *nam; 217 struct ucred *cred; 218 { 219 int anonport, error; 220 221 INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo); 222 INP_LOCK_ASSERT(inp); 223 224 if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) 225 return (EINVAL); 226 anonport = inp->inp_lport == 0 && (nam == NULL || 227 ((struct sockaddr_in *)nam)->sin_port == 0); 228 error = in_pcbbind_setup(inp, nam, &inp->inp_laddr.s_addr, 229 &inp->inp_lport, cred); 230 if (error) 231 return (error); 232 if (in_pcbinshash(inp) != 0) { 233 inp->inp_laddr.s_addr = INADDR_ANY; 234 inp->inp_lport = 0; 235 return (EAGAIN); 236 } 237 if (anonport) 238 inp->inp_flags |= INP_ANONPORT; 239 return (0); 240 } 241 242 /* 243 * Set up a bind operation on a PCB, performing port allocation 244 * as required, but do not actually modify the PCB. Callers can 245 * either complete the bind by setting inp_laddr/inp_lport and 246 * calling in_pcbinshash(), or they can just use the resulting 247 * port and address to authorise the sending of a once-off packet. 248 * 249 * On error, the values of *laddrp and *lportp are not changed. 250 */ 251 int 252 in_pcbbind_setup(inp, nam, laddrp, lportp, cred) 253 struct inpcb *inp; 254 struct sockaddr *nam; 255 in_addr_t *laddrp; 256 u_short *lportp; 257 struct ucred *cred; 258 { 259 struct socket *so = inp->inp_socket; 260 unsigned short *lastport; 261 struct sockaddr_in *sin; 262 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 263 struct in_addr laddr; 264 u_short lport = 0; 265 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); 266 int error, prison = 0; 267 268 INP_INFO_WLOCK_ASSERT(pcbinfo); 269 INP_LOCK_ASSERT(inp); 270 271 if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */ 272 return (EADDRNOTAVAIL); 273 laddr.s_addr = *laddrp; 274 if (nam != NULL && laddr.s_addr != INADDR_ANY) 275 return (EINVAL); 276 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) 277 wild = 1; 278 if (nam) { 279 sin = (struct sockaddr_in *)nam; 280 if (nam->sa_len != sizeof (*sin)) 281 return (EINVAL); 282 #ifdef notdef 283 /* 284 * We should check the family, but old programs 285 * incorrectly fail to initialize it. 286 */ 287 if (sin->sin_family != AF_INET) 288 return (EAFNOSUPPORT); 289 #endif 290 if (sin->sin_addr.s_addr != INADDR_ANY) 291 if (prison_ip(cred, 0, &sin->sin_addr.s_addr)) 292 return(EINVAL); 293 if (sin->sin_port != *lportp) { 294 /* Don't allow the port to change. */ 295 if (*lportp != 0) 296 return (EINVAL); 297 lport = sin->sin_port; 298 } 299 /* NB: lport is left as 0 if the port isn't being changed. */ 300 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { 301 /* 302 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; 303 * allow complete duplication of binding if 304 * SO_REUSEPORT is set, or if SO_REUSEADDR is set 305 * and a multicast address is bound on both 306 * new and duplicated sockets. 307 */ 308 if (so->so_options & SO_REUSEADDR) 309 reuseport = SO_REUSEADDR|SO_REUSEPORT; 310 } else if (sin->sin_addr.s_addr != INADDR_ANY) { 311 sin->sin_port = 0; /* yech... */ 312 bzero(&sin->sin_zero, sizeof(sin->sin_zero)); 313 if (ifa_ifwithaddr((struct sockaddr *)sin) == 0) 314 return (EADDRNOTAVAIL); 315 } 316 laddr = sin->sin_addr; 317 if (lport) { 318 struct inpcb *t; 319 /* GROSS */ 320 if (ntohs(lport) <= ipport_reservedhigh && 321 ntohs(lport) >= ipport_reservedlow && 322 suser_cred(cred, PRISON_ROOT)) 323 return (EACCES); 324 if (jailed(cred)) 325 prison = 1; 326 if (so->so_cred->cr_uid != 0 && 327 !IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { 328 t = in_pcblookup_local(inp->inp_pcbinfo, 329 sin->sin_addr, lport, 330 prison ? 0 : INPLOOKUP_WILDCARD); 331 /* 332 * XXX 333 * This entire block sorely needs a rewrite. 334 */ 335 if (t && (t->inp_vflag & INP_TIMEWAIT)) { 336 if ((ntohl(sin->sin_addr.s_addr) != INADDR_ANY || 337 ntohl(t->inp_laddr.s_addr) != INADDR_ANY || 338 (intotw(t)->tw_so_options & SO_REUSEPORT) == 0) && 339 (so->so_cred->cr_uid != intotw(t)->tw_cred->cr_uid)) 340 return (EADDRINUSE); 341 } else 342 if (t && 343 (ntohl(sin->sin_addr.s_addr) != INADDR_ANY || 344 ntohl(t->inp_laddr.s_addr) != INADDR_ANY || 345 (t->inp_socket->so_options & 346 SO_REUSEPORT) == 0) && 347 (so->so_cred->cr_uid != 348 t->inp_socket->so_cred->cr_uid)) { 349 #if defined(INET6) 350 if (ntohl(sin->sin_addr.s_addr) != 351 INADDR_ANY || 352 ntohl(t->inp_laddr.s_addr) != 353 INADDR_ANY || 354 INP_SOCKAF(so) == 355 INP_SOCKAF(t->inp_socket)) 356 #endif /* defined(INET6) */ 357 return (EADDRINUSE); 358 } 359 } 360 if (prison && prison_ip(cred, 0, &sin->sin_addr.s_addr)) 361 return (EADDRNOTAVAIL); 362 t = in_pcblookup_local(pcbinfo, sin->sin_addr, 363 lport, prison ? 0 : wild); 364 if (t && (t->inp_vflag & INP_TIMEWAIT)) { 365 if ((reuseport & intotw(t)->tw_so_options) == 0) 366 return (EADDRINUSE); 367 } else 368 if (t && 369 (reuseport & t->inp_socket->so_options) == 0) { 370 #if defined(INET6) 371 if (ntohl(sin->sin_addr.s_addr) != 372 INADDR_ANY || 373 ntohl(t->inp_laddr.s_addr) != 374 INADDR_ANY || 375 INP_SOCKAF(so) == 376 INP_SOCKAF(t->inp_socket)) 377 #endif /* defined(INET6) */ 378 return (EADDRINUSE); 379 } 380 } 381 } 382 if (*lportp != 0) 383 lport = *lportp; 384 if (lport == 0) { 385 u_short first, last; 386 int count; 387 388 if (laddr.s_addr != INADDR_ANY) 389 if (prison_ip(cred, 0, &laddr.s_addr)) 390 return (EINVAL); 391 392 if (inp->inp_flags & INP_HIGHPORT) { 393 first = ipport_hifirstauto; /* sysctl */ 394 last = ipport_hilastauto; 395 lastport = &pcbinfo->lasthi; 396 } else if (inp->inp_flags & INP_LOWPORT) { 397 if ((error = suser_cred(cred, PRISON_ROOT)) != 0) 398 return error; 399 first = ipport_lowfirstauto; /* 1023 */ 400 last = ipport_lowlastauto; /* 600 */ 401 lastport = &pcbinfo->lastlow; 402 } else { 403 first = ipport_firstauto; /* sysctl */ 404 last = ipport_lastauto; 405 lastport = &pcbinfo->lastport; 406 } 407 /* 408 * Simple check to ensure all ports are not used up causing 409 * a deadlock here. 410 * 411 * We split the two cases (up and down) so that the direction 412 * is not being tested on each round of the loop. 413 */ 414 if (first > last) { 415 /* 416 * counting down 417 */ 418 if (ipport_randomized) 419 *lastport = first - 420 (arc4random() % (first - last)); 421 count = first - last; 422 423 do { 424 if (count-- < 0) /* completely used? */ 425 return (EADDRNOTAVAIL); 426 --*lastport; 427 if (*lastport > first || *lastport < last) 428 *lastport = first; 429 lport = htons(*lastport); 430 } while (in_pcblookup_local(pcbinfo, laddr, lport, 431 wild)); 432 } else { 433 /* 434 * counting up 435 */ 436 if (ipport_randomized) 437 *lastport = first + 438 (arc4random() % (last - first)); 439 count = last - first; 440 441 do { 442 if (count-- < 0) /* completely used? */ 443 return (EADDRNOTAVAIL); 444 ++*lastport; 445 if (*lastport < first || *lastport > last) 446 *lastport = first; 447 lport = htons(*lastport); 448 } while (in_pcblookup_local(pcbinfo, laddr, lport, 449 wild)); 450 } 451 } 452 if (prison_ip(cred, 0, &laddr.s_addr)) 453 return (EINVAL); 454 *laddrp = laddr.s_addr; 455 *lportp = lport; 456 return (0); 457 } 458 459 /* 460 * Connect from a socket to a specified address. 461 * Both address and port must be specified in argument sin. 462 * If don't have a local address for this socket yet, 463 * then pick one. 464 */ 465 int 466 in_pcbconnect(inp, nam, cred) 467 register struct inpcb *inp; 468 struct sockaddr *nam; 469 struct ucred *cred; 470 { 471 u_short lport, fport; 472 in_addr_t laddr, faddr; 473 int anonport, error; 474 475 lport = inp->inp_lport; 476 laddr = inp->inp_laddr.s_addr; 477 anonport = (lport == 0); 478 error = in_pcbconnect_setup(inp, nam, &laddr, &lport, &faddr, &fport, 479 NULL, cred); 480 if (error) 481 return (error); 482 483 /* Do the initial binding of the local address if required. */ 484 if (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0) { 485 inp->inp_lport = lport; 486 inp->inp_laddr.s_addr = laddr; 487 if (in_pcbinshash(inp) != 0) { 488 inp->inp_laddr.s_addr = INADDR_ANY; 489 inp->inp_lport = 0; 490 return (EAGAIN); 491 } 492 } 493 494 /* Commit the remaining changes. */ 495 inp->inp_lport = lport; 496 inp->inp_laddr.s_addr = laddr; 497 inp->inp_faddr.s_addr = faddr; 498 inp->inp_fport = fport; 499 in_pcbrehash(inp); 500 #ifdef IPSEC 501 if (inp->inp_socket->so_type == SOCK_STREAM) 502 ipsec_pcbconn(inp->inp_sp); 503 #endif 504 if (anonport) 505 inp->inp_flags |= INP_ANONPORT; 506 return (0); 507 } 508 509 /* 510 * Set up for a connect from a socket to the specified address. 511 * On entry, *laddrp and *lportp should contain the current local 512 * address and port for the PCB; these are updated to the values 513 * that should be placed in inp_laddr and inp_lport to complete 514 * the connect. 515 * 516 * On success, *faddrp and *fportp will be set to the remote address 517 * and port. These are not updated in the error case. 518 * 519 * If the operation fails because the connection already exists, 520 * *oinpp will be set to the PCB of that connection so that the 521 * caller can decide to override it. In all other cases, *oinpp 522 * is set to NULL. 523 */ 524 int 525 in_pcbconnect_setup(inp, nam, laddrp, lportp, faddrp, fportp, oinpp, cred) 526 register struct inpcb *inp; 527 struct sockaddr *nam; 528 in_addr_t *laddrp; 529 u_short *lportp; 530 in_addr_t *faddrp; 531 u_short *fportp; 532 struct inpcb **oinpp; 533 struct ucred *cred; 534 { 535 struct sockaddr_in *sin = (struct sockaddr_in *)nam; 536 struct in_ifaddr *ia; 537 struct sockaddr_in sa; 538 struct ucred *socred; 539 struct inpcb *oinp; 540 struct in_addr laddr, faddr; 541 u_short lport, fport; 542 int error; 543 544 if (oinpp != NULL) 545 *oinpp = NULL; 546 if (nam->sa_len != sizeof (*sin)) 547 return (EINVAL); 548 if (sin->sin_family != AF_INET) 549 return (EAFNOSUPPORT); 550 if (sin->sin_port == 0) 551 return (EADDRNOTAVAIL); 552 laddr.s_addr = *laddrp; 553 lport = *lportp; 554 faddr = sin->sin_addr; 555 fport = sin->sin_port; 556 socred = inp->inp_socket->so_cred; 557 if (laddr.s_addr == INADDR_ANY && jailed(socred)) { 558 bzero(&sa, sizeof(sa)); 559 sa.sin_addr.s_addr = htonl(prison_getip(socred)); 560 sa.sin_len = sizeof(sa); 561 sa.sin_family = AF_INET; 562 error = in_pcbbind_setup(inp, (struct sockaddr *)&sa, 563 &laddr.s_addr, &lport, cred); 564 if (error) 565 return (error); 566 } 567 if (!TAILQ_EMPTY(&in_ifaddrhead)) { 568 /* 569 * If the destination address is INADDR_ANY, 570 * use the primary local address. 571 * If the supplied address is INADDR_BROADCAST, 572 * and the primary interface supports broadcast, 573 * choose the broadcast address for that interface. 574 */ 575 if (faddr.s_addr == INADDR_ANY) 576 faddr = IA_SIN(TAILQ_FIRST(&in_ifaddrhead))->sin_addr; 577 else if (faddr.s_addr == (u_long)INADDR_BROADCAST && 578 (TAILQ_FIRST(&in_ifaddrhead)->ia_ifp->if_flags & 579 IFF_BROADCAST)) 580 faddr = satosin(&TAILQ_FIRST( 581 &in_ifaddrhead)->ia_broadaddr)->sin_addr; 582 } 583 if (laddr.s_addr == INADDR_ANY) { 584 struct route sro; 585 586 bzero(&sro, sizeof(sro)); 587 ia = (struct in_ifaddr *)0; 588 /* 589 * If route is known our src addr is taken from the i/f, 590 * else punt. 591 */ 592 if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0) { 593 /* Find out route to destination */ 594 sro.ro_dst.sa_family = AF_INET; 595 sro.ro_dst.sa_len = sizeof(struct sockaddr_in); 596 ((struct sockaddr_in *)&sro.ro_dst)->sin_addr = faddr; 597 rtalloc_ign(&sro, RTF_CLONING); 598 } 599 /* 600 * If we found a route, use the address 601 * corresponding to the outgoing interface 602 * unless it is the loopback (in case a route 603 * to our address on another net goes to loopback). 604 */ 605 if (sro.ro_rt && !(sro.ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) 606 ia = ifatoia(sro.ro_rt->rt_ifa); 607 if (sro.ro_rt) 608 RTFREE(sro.ro_rt); 609 if (ia == 0) { 610 bzero(&sa, sizeof(sa)); 611 sa.sin_addr = faddr; 612 sa.sin_len = sizeof(sa); 613 sa.sin_family = AF_INET; 614 615 ia = ifatoia(ifa_ifwithdstaddr(sintosa(&sa))); 616 if (ia == 0) 617 ia = ifatoia(ifa_ifwithnet(sintosa(&sa))); 618 if (ia == 0) 619 ia = TAILQ_FIRST(&in_ifaddrhead); 620 if (ia == 0) 621 return (EADDRNOTAVAIL); 622 } 623 /* 624 * If the destination address is multicast and an outgoing 625 * interface has been set as a multicast option, use the 626 * address of that interface as our source address. 627 */ 628 if (IN_MULTICAST(ntohl(faddr.s_addr)) && 629 inp->inp_moptions != NULL) { 630 struct ip_moptions *imo; 631 struct ifnet *ifp; 632 633 imo = inp->inp_moptions; 634 if (imo->imo_multicast_ifp != NULL) { 635 ifp = imo->imo_multicast_ifp; 636 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) 637 if (ia->ia_ifp == ifp) 638 break; 639 if (ia == 0) 640 return (EADDRNOTAVAIL); 641 } 642 } 643 laddr = ia->ia_addr.sin_addr; 644 } 645 646 oinp = in_pcblookup_hash(inp->inp_pcbinfo, faddr, fport, laddr, lport, 647 0, NULL); 648 if (oinp != NULL) { 649 if (oinpp != NULL) 650 *oinpp = oinp; 651 return (EADDRINUSE); 652 } 653 if (lport == 0) { 654 error = in_pcbbind_setup(inp, NULL, &laddr.s_addr, &lport, 655 cred); 656 if (error) 657 return (error); 658 } 659 *laddrp = laddr.s_addr; 660 *lportp = lport; 661 *faddrp = faddr.s_addr; 662 *fportp = fport; 663 return (0); 664 } 665 666 void 667 in_pcbdisconnect(inp) 668 struct inpcb *inp; 669 { 670 INP_LOCK_ASSERT(inp); 671 672 inp->inp_faddr.s_addr = INADDR_ANY; 673 inp->inp_fport = 0; 674 in_pcbrehash(inp); 675 #ifdef IPSEC 676 ipsec_pcbdisconn(inp->inp_sp); 677 #endif 678 if (inp->inp_socket->so_state & SS_NOFDREF) 679 in_pcbdetach(inp); 680 } 681 682 void 683 in_pcbdetach(inp) 684 struct inpcb *inp; 685 { 686 struct socket *so = inp->inp_socket; 687 struct inpcbinfo *ipi = inp->inp_pcbinfo; 688 689 INP_LOCK_ASSERT(inp); 690 691 #if defined(IPSEC) || defined(FAST_IPSEC) 692 ipsec4_delete_pcbpolicy(inp); 693 #endif /*IPSEC*/ 694 inp->inp_gencnt = ++ipi->ipi_gencnt; 695 in_pcbremlists(inp); 696 if (so) { 697 so->so_pcb = 0; 698 sotryfree(so); 699 } 700 if (inp->inp_options) 701 (void)m_free(inp->inp_options); 702 ip_freemoptions(inp->inp_moptions); 703 inp->inp_vflag = 0; 704 INP_LOCK_DESTROY(inp); 705 #ifdef MAC 706 mac_destroy_inpcb(inp); 707 #endif 708 uma_zfree(ipi->ipi_zone, inp); 709 } 710 711 struct sockaddr * 712 in_sockaddr(port, addr_p) 713 in_port_t port; 714 struct in_addr *addr_p; 715 { 716 struct sockaddr_in *sin; 717 718 MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, 719 M_WAITOK | M_ZERO); 720 sin->sin_family = AF_INET; 721 sin->sin_len = sizeof(*sin); 722 sin->sin_addr = *addr_p; 723 sin->sin_port = port; 724 725 return (struct sockaddr *)sin; 726 } 727 728 /* 729 * The wrapper function will pass down the pcbinfo for this function to lock. 730 * The socket must have a valid 731 * (i.e., non-nil) PCB, but it should be impossible to get an invalid one 732 * except through a kernel programming error, so it is acceptable to panic 733 * (or in this case trap) if the PCB is invalid. (Actually, we don't trap 734 * because there actually /is/ a programming error somewhere... XXX) 735 */ 736 int 737 in_setsockaddr(so, nam, pcbinfo) 738 struct socket *so; 739 struct sockaddr **nam; 740 struct inpcbinfo *pcbinfo; 741 { 742 int s; 743 register struct inpcb *inp; 744 struct in_addr addr; 745 in_port_t port; 746 747 s = splnet(); 748 INP_INFO_RLOCK(pcbinfo); 749 inp = sotoinpcb(so); 750 if (!inp) { 751 INP_INFO_RUNLOCK(pcbinfo); 752 splx(s); 753 return ECONNRESET; 754 } 755 INP_LOCK(inp); 756 port = inp->inp_lport; 757 addr = inp->inp_laddr; 758 INP_UNLOCK(inp); 759 INP_INFO_RUNLOCK(pcbinfo); 760 splx(s); 761 762 *nam = in_sockaddr(port, &addr); 763 return 0; 764 } 765 766 /* 767 * The wrapper function will pass down the pcbinfo for this function to lock. 768 */ 769 int 770 in_setpeeraddr(so, nam, pcbinfo) 771 struct socket *so; 772 struct sockaddr **nam; 773 struct inpcbinfo *pcbinfo; 774 { 775 int s; 776 register struct inpcb *inp; 777 struct in_addr addr; 778 in_port_t port; 779 780 s = splnet(); 781 INP_INFO_RLOCK(pcbinfo); 782 inp = sotoinpcb(so); 783 if (!inp) { 784 INP_INFO_RUNLOCK(pcbinfo); 785 splx(s); 786 return ECONNRESET; 787 } 788 INP_LOCK(inp); 789 port = inp->inp_fport; 790 addr = inp->inp_faddr; 791 INP_UNLOCK(inp); 792 INP_INFO_RUNLOCK(pcbinfo); 793 splx(s); 794 795 *nam = in_sockaddr(port, &addr); 796 return 0; 797 } 798 799 void 800 in_pcbnotifyall(pcbinfo, faddr, errno, notify) 801 struct inpcbinfo *pcbinfo; 802 struct in_addr faddr; 803 int errno; 804 struct inpcb *(*notify)(struct inpcb *, int); 805 { 806 struct inpcb *inp, *ninp; 807 struct inpcbhead *head; 808 int s; 809 810 s = splnet(); 811 INP_INFO_WLOCK(pcbinfo); 812 head = pcbinfo->listhead; 813 for (inp = LIST_FIRST(head); inp != NULL; inp = ninp) { 814 INP_LOCK(inp); 815 ninp = LIST_NEXT(inp, inp_list); 816 #ifdef INET6 817 if ((inp->inp_vflag & INP_IPV4) == 0) { 818 INP_UNLOCK(inp); 819 continue; 820 } 821 #endif 822 if (inp->inp_faddr.s_addr != faddr.s_addr || 823 inp->inp_socket == NULL) { 824 INP_UNLOCK(inp); 825 continue; 826 } 827 if ((*notify)(inp, errno)) 828 INP_UNLOCK(inp); 829 } 830 INP_INFO_WUNLOCK(pcbinfo); 831 splx(s); 832 } 833 834 void 835 in_pcbpurgeif0(pcbinfo, ifp) 836 struct inpcbinfo *pcbinfo; 837 struct ifnet *ifp; 838 { 839 struct inpcb *inp; 840 struct ip_moptions *imo; 841 int i, gap; 842 843 /* why no splnet here? XXX */ 844 INP_INFO_RLOCK(pcbinfo); 845 LIST_FOREACH(inp, pcbinfo->listhead, inp_list) { 846 INP_LOCK(inp); 847 imo = inp->inp_moptions; 848 if ((inp->inp_vflag & INP_IPV4) && 849 imo != NULL) { 850 /* 851 * Unselect the outgoing interface if it is being 852 * detached. 853 */ 854 if (imo->imo_multicast_ifp == ifp) 855 imo->imo_multicast_ifp = NULL; 856 857 /* 858 * Drop multicast group membership if we joined 859 * through the interface being detached. 860 */ 861 for (i = 0, gap = 0; i < imo->imo_num_memberships; 862 i++) { 863 if (imo->imo_membership[i]->inm_ifp == ifp) { 864 in_delmulti(imo->imo_membership[i]); 865 gap++; 866 } else if (gap != 0) 867 imo->imo_membership[i - gap] = 868 imo->imo_membership[i]; 869 } 870 imo->imo_num_memberships -= gap; 871 } 872 INP_UNLOCK(inp); 873 } 874 INP_INFO_RUNLOCK(pcbinfo); 875 } 876 877 /* 878 * Lookup a PCB based on the local address and port. 879 */ 880 struct inpcb * 881 in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay) 882 struct inpcbinfo *pcbinfo; 883 struct in_addr laddr; 884 u_int lport_arg; 885 int wild_okay; 886 { 887 register struct inpcb *inp; 888 int matchwild = 3, wildcard; 889 u_short lport = lport_arg; 890 891 INP_INFO_WLOCK_ASSERT(pcbinfo); 892 893 if (!wild_okay) { 894 struct inpcbhead *head; 895 /* 896 * Look for an unconnected (wildcard foreign addr) PCB that 897 * matches the local address and port we're looking for. 898 */ 899 head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)]; 900 LIST_FOREACH(inp, head, inp_hash) { 901 #ifdef INET6 902 if ((inp->inp_vflag & INP_IPV4) == 0) 903 continue; 904 #endif 905 if (inp->inp_faddr.s_addr == INADDR_ANY && 906 inp->inp_laddr.s_addr == laddr.s_addr && 907 inp->inp_lport == lport) { 908 /* 909 * Found. 910 */ 911 return (inp); 912 } 913 } 914 /* 915 * Not found. 916 */ 917 return (NULL); 918 } else { 919 struct inpcbporthead *porthash; 920 struct inpcbport *phd; 921 struct inpcb *match = NULL; 922 /* 923 * Best fit PCB lookup. 924 * 925 * First see if this local port is in use by looking on the 926 * port hash list. 927 */ 928 retrylookup: 929 porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport, 930 pcbinfo->porthashmask)]; 931 LIST_FOREACH(phd, porthash, phd_hash) { 932 if (phd->phd_port == lport) 933 break; 934 } 935 if (phd != NULL) { 936 /* 937 * Port is in use by one or more PCBs. Look for best 938 * fit. 939 */ 940 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { 941 wildcard = 0; 942 #ifdef INET6 943 if ((inp->inp_vflag & INP_IPV4) == 0) 944 continue; 945 #endif 946 /* 947 * Clean out old time_wait sockets if they 948 * are clogging up needed local ports. 949 */ 950 if ((inp->inp_vflag & INP_TIMEWAIT) != 0) { 951 if (tcp_twrecycleable((struct tcptw *)inp->inp_ppcb)) { 952 INP_LOCK(inp); 953 tcp_twclose((struct tcptw *)inp->inp_ppcb, 0); 954 match = NULL; 955 goto retrylookup; 956 } 957 } 958 if (inp->inp_faddr.s_addr != INADDR_ANY) 959 wildcard++; 960 if (inp->inp_laddr.s_addr != INADDR_ANY) { 961 if (laddr.s_addr == INADDR_ANY) 962 wildcard++; 963 else if (inp->inp_laddr.s_addr != laddr.s_addr) 964 continue; 965 } else { 966 if (laddr.s_addr != INADDR_ANY) 967 wildcard++; 968 } 969 if (wildcard < matchwild) { 970 match = inp; 971 matchwild = wildcard; 972 if (matchwild == 0) { 973 break; 974 } 975 } 976 } 977 } 978 return (match); 979 } 980 } 981 982 /* 983 * Lookup PCB in hash list. 984 */ 985 struct inpcb * 986 in_pcblookup_hash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard, 987 ifp) 988 struct inpcbinfo *pcbinfo; 989 struct in_addr faddr, laddr; 990 u_int fport_arg, lport_arg; 991 int wildcard; 992 struct ifnet *ifp; 993 { 994 struct inpcbhead *head; 995 register struct inpcb *inp; 996 u_short fport = fport_arg, lport = lport_arg; 997 998 INP_INFO_RLOCK_ASSERT(pcbinfo); 999 /* 1000 * First look for an exact match. 1001 */ 1002 head = &pcbinfo->hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, pcbinfo->hashmask)]; 1003 LIST_FOREACH(inp, head, inp_hash) { 1004 #ifdef INET6 1005 if ((inp->inp_vflag & INP_IPV4) == 0) 1006 continue; 1007 #endif 1008 if (inp->inp_faddr.s_addr == faddr.s_addr && 1009 inp->inp_laddr.s_addr == laddr.s_addr && 1010 inp->inp_fport == fport && 1011 inp->inp_lport == lport) { 1012 /* 1013 * Found. 1014 */ 1015 return (inp); 1016 } 1017 } 1018 if (wildcard) { 1019 struct inpcb *local_wild = NULL; 1020 #if defined(INET6) 1021 struct inpcb *local_wild_mapped = NULL; 1022 #endif /* defined(INET6) */ 1023 1024 head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)]; 1025 LIST_FOREACH(inp, head, inp_hash) { 1026 #ifdef INET6 1027 if ((inp->inp_vflag & INP_IPV4) == 0) 1028 continue; 1029 #endif 1030 if (inp->inp_faddr.s_addr == INADDR_ANY && 1031 inp->inp_lport == lport) { 1032 if (ifp && ifp->if_type == IFT_FAITH && 1033 (inp->inp_flags & INP_FAITH) == 0) 1034 continue; 1035 if (inp->inp_laddr.s_addr == laddr.s_addr) 1036 return (inp); 1037 else if (inp->inp_laddr.s_addr == INADDR_ANY) { 1038 #if defined(INET6) 1039 if (INP_CHECK_SOCKAF(inp->inp_socket, 1040 AF_INET6)) 1041 local_wild_mapped = inp; 1042 else 1043 #endif /* defined(INET6) */ 1044 local_wild = inp; 1045 } 1046 } 1047 } 1048 #if defined(INET6) 1049 if (local_wild == NULL) 1050 return (local_wild_mapped); 1051 #endif /* defined(INET6) */ 1052 return (local_wild); 1053 } 1054 1055 /* 1056 * Not found. 1057 */ 1058 return (NULL); 1059 } 1060 1061 /* 1062 * Insert PCB onto various hash lists. 1063 */ 1064 int 1065 in_pcbinshash(inp) 1066 struct inpcb *inp; 1067 { 1068 struct inpcbhead *pcbhash; 1069 struct inpcbporthead *pcbporthash; 1070 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 1071 struct inpcbport *phd; 1072 u_int32_t hashkey_faddr; 1073 1074 INP_INFO_WLOCK_ASSERT(pcbinfo); 1075 #ifdef INET6 1076 if (inp->inp_vflag & INP_IPV6) 1077 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */; 1078 else 1079 #endif /* INET6 */ 1080 hashkey_faddr = inp->inp_faddr.s_addr; 1081 1082 pcbhash = &pcbinfo->hashbase[INP_PCBHASH(hashkey_faddr, 1083 inp->inp_lport, inp->inp_fport, pcbinfo->hashmask)]; 1084 1085 pcbporthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(inp->inp_lport, 1086 pcbinfo->porthashmask)]; 1087 1088 /* 1089 * Go through port list and look for a head for this lport. 1090 */ 1091 LIST_FOREACH(phd, pcbporthash, phd_hash) { 1092 if (phd->phd_port == inp->inp_lport) 1093 break; 1094 } 1095 /* 1096 * If none exists, malloc one and tack it on. 1097 */ 1098 if (phd == NULL) { 1099 MALLOC(phd, struct inpcbport *, sizeof(struct inpcbport), M_PCB, M_NOWAIT); 1100 if (phd == NULL) { 1101 return (ENOBUFS); /* XXX */ 1102 } 1103 phd->phd_port = inp->inp_lport; 1104 LIST_INIT(&phd->phd_pcblist); 1105 LIST_INSERT_HEAD(pcbporthash, phd, phd_hash); 1106 } 1107 inp->inp_phd = phd; 1108 LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist); 1109 LIST_INSERT_HEAD(pcbhash, inp, inp_hash); 1110 return (0); 1111 } 1112 1113 /* 1114 * Move PCB to the proper hash bucket when { faddr, fport } have been 1115 * changed. NOTE: This does not handle the case of the lport changing (the 1116 * hashed port list would have to be updated as well), so the lport must 1117 * not change after in_pcbinshash() has been called. 1118 */ 1119 void 1120 in_pcbrehash(inp) 1121 struct inpcb *inp; 1122 { 1123 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 1124 struct inpcbhead *head; 1125 u_int32_t hashkey_faddr; 1126 1127 INP_INFO_WLOCK_ASSERT(pcbinfo); 1128 /* XXX? INP_LOCK_ASSERT(inp); */ 1129 #ifdef INET6 1130 if (inp->inp_vflag & INP_IPV6) 1131 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */; 1132 else 1133 #endif /* INET6 */ 1134 hashkey_faddr = inp->inp_faddr.s_addr; 1135 1136 head = &pcbinfo->hashbase[INP_PCBHASH(hashkey_faddr, 1137 inp->inp_lport, inp->inp_fport, pcbinfo->hashmask)]; 1138 1139 LIST_REMOVE(inp, inp_hash); 1140 LIST_INSERT_HEAD(head, inp, inp_hash); 1141 } 1142 1143 /* 1144 * Remove PCB from various lists. 1145 */ 1146 void 1147 in_pcbremlists(inp) 1148 struct inpcb *inp; 1149 { 1150 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 1151 1152 INP_INFO_WLOCK_ASSERT(pcbinfo); 1153 INP_LOCK_ASSERT(inp); 1154 1155 inp->inp_gencnt = ++pcbinfo->ipi_gencnt; 1156 if (inp->inp_lport) { 1157 struct inpcbport *phd = inp->inp_phd; 1158 1159 LIST_REMOVE(inp, inp_hash); 1160 LIST_REMOVE(inp, inp_portlist); 1161 if (LIST_FIRST(&phd->phd_pcblist) == NULL) { 1162 LIST_REMOVE(phd, phd_hash); 1163 free(phd, M_PCB); 1164 } 1165 } 1166 LIST_REMOVE(inp, inp_list); 1167 pcbinfo->ipi_count--; 1168 } 1169 1170 /* 1171 * A set label operation has occurred at the socket layer, propagate the 1172 * label change into the in_pcb for the socket. 1173 */ 1174 void 1175 in_pcbsosetlabel(so) 1176 struct socket *so; 1177 { 1178 #ifdef MAC 1179 struct inpcb *inp; 1180 1181 /* XXX: Will assert socket lock when we have them. */ 1182 inp = (struct inpcb *)so->so_pcb; 1183 INP_LOCK(inp); 1184 mac_inpcb_sosetlabel(so, inp); 1185 INP_UNLOCK(inp); 1186 #endif 1187 } 1188