1 /* 2 * Copyright (c) 1982, 1986, 1991, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95 34 * $FreeBSD$ 35 */ 36 37 #include "opt_ipsec.h" 38 #include "opt_inet6.h" 39 #include "opt_mac.h" 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/limits.h> 44 #include <sys/mac.h> 45 #include <sys/malloc.h> 46 #include <sys/mbuf.h> 47 #include <sys/domain.h> 48 #include <sys/protosw.h> 49 #include <sys/socket.h> 50 #include <sys/socketvar.h> 51 #include <sys/proc.h> 52 #include <sys/jail.h> 53 #include <sys/kernel.h> 54 #include <sys/sysctl.h> 55 56 #include <vm/uma.h> 57 58 #include <net/if.h> 59 #include <net/if_types.h> 60 #include <net/route.h> 61 62 #include <netinet/in.h> 63 #include <netinet/in_pcb.h> 64 #include <netinet/in_var.h> 65 #include <netinet/ip_var.h> 66 #include <netinet/tcp_var.h> 67 #ifdef INET6 68 #include <netinet/ip6.h> 69 #include <netinet6/ip6_var.h> 70 #endif /* INET6 */ 71 72 #ifdef IPSEC 73 #include <netinet6/ipsec.h> 74 #include <netkey/key.h> 75 #endif /* IPSEC */ 76 77 #ifdef FAST_IPSEC 78 #if defined(IPSEC) || defined(IPSEC_ESP) 79 #error "Bad idea: don't compile with both IPSEC and FAST_IPSEC!" 80 #endif 81 82 #include <netipsec/ipsec.h> 83 #include <netipsec/key.h> 84 #endif /* FAST_IPSEC */ 85 86 /* 87 * These configure the range of local port addresses assigned to 88 * "unspecified" outgoing connections/packets/whatever. 89 */ 90 int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */ 91 int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */ 92 int ipport_firstauto = IPPORT_HIFIRSTAUTO; /* 49152 */ 93 int ipport_lastauto = IPPORT_HILASTAUTO; /* 65535 */ 94 int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */ 95 int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */ 96 97 /* 98 * Reserved ports accessible only to root. There are significant 99 * security considerations that must be accounted for when changing these, 100 * but the security benefits can be great. Please be careful. 101 */ 102 int ipport_reservedhigh = IPPORT_RESERVED - 1; /* 1023 */ 103 int ipport_reservedlow = 0; 104 105 #define RANGECHK(var, min, max) \ 106 if ((var) < (min)) { (var) = (min); } \ 107 else if ((var) > (max)) { (var) = (max); } 108 109 static int 110 sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS) 111 { 112 int error = sysctl_handle_int(oidp, 113 oidp->oid_arg1, oidp->oid_arg2, req); 114 if (!error) { 115 RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1); 116 RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1); 117 RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX); 118 RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX); 119 RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX); 120 RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX); 121 } 122 return error; 123 } 124 125 #undef RANGECHK 126 127 SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0, "IP Ports"); 128 129 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, CTLTYPE_INT|CTLFLAG_RW, 130 &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", ""); 131 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, CTLTYPE_INT|CTLFLAG_RW, 132 &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", ""); 133 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, CTLTYPE_INT|CTLFLAG_RW, 134 &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", ""); 135 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, CTLTYPE_INT|CTLFLAG_RW, 136 &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", ""); 137 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLTYPE_INT|CTLFLAG_RW, 138 &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", ""); 139 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW, 140 &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", ""); 141 SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, reservedhigh, 142 CTLFLAG_RW|CTLFLAG_SECURE, &ipport_reservedhigh, 0, ""); 143 SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, reservedlow, 144 CTLFLAG_RW|CTLFLAG_SECURE, &ipport_reservedlow, 0, ""); 145 146 /* 147 * in_pcb.c: manage the Protocol Control Blocks. 148 * 149 * NOTE: It is assumed that most of these functions will be called at 150 * splnet(). XXX - There are, unfortunately, a few exceptions to this 151 * rule that should be fixed. 152 */ 153 154 /* 155 * Allocate a PCB and associate it with the socket. 156 */ 157 int 158 in_pcballoc(so, pcbinfo, type) 159 struct socket *so; 160 struct inpcbinfo *pcbinfo; 161 const char *type; 162 { 163 register struct inpcb *inp; 164 int error; 165 166 INP_INFO_WLOCK_ASSERT(pcbinfo); 167 error = 0; 168 inp = uma_zalloc(pcbinfo->ipi_zone, M_NOWAIT | M_ZERO); 169 if (inp == NULL) 170 return (ENOBUFS); 171 inp->inp_gencnt = ++pcbinfo->ipi_gencnt; 172 inp->inp_pcbinfo = pcbinfo; 173 inp->inp_socket = so; 174 #ifdef MAC 175 error = mac_init_inpcb(inp, M_NOWAIT); 176 if (error != 0) 177 goto out; 178 mac_create_inpcb_from_socket(so, inp); 179 #endif 180 #if defined(IPSEC) || defined(FAST_IPSEC) 181 #ifdef FAST_IPSEC 182 error = ipsec_init_policy(so, &inp->inp_sp); 183 #else 184 error = ipsec_init_pcbpolicy(so, &inp->inp_sp); 185 #endif 186 if (error != 0) 187 goto out; 188 #endif /*IPSEC*/ 189 #if defined(INET6) 190 if (INP_SOCKAF(so) == AF_INET6) { 191 inp->inp_vflag |= INP_IPV6PROTO; 192 if (ip6_v6only) 193 inp->inp_flags |= IN6P_IPV6_V6ONLY; 194 } 195 #endif 196 LIST_INSERT_HEAD(pcbinfo->listhead, inp, inp_list); 197 pcbinfo->ipi_count++; 198 so->so_pcb = (caddr_t)inp; 199 INP_LOCK_INIT(inp, "inp", type); 200 #ifdef INET6 201 if (ip6_auto_flowlabel) 202 inp->inp_flags |= IN6P_AUTOFLOWLABEL; 203 #endif 204 #if defined(IPSEC) || defined(FAST_IPSEC) || defined(MAC) 205 out: 206 if (error != 0) 207 uma_zfree(pcbinfo->ipi_zone, inp); 208 #endif 209 return (error); 210 } 211 212 int 213 in_pcbbind(inp, nam, cred) 214 register struct inpcb *inp; 215 struct sockaddr *nam; 216 struct ucred *cred; 217 { 218 int anonport, error; 219 220 INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo); 221 INP_LOCK_ASSERT(inp); 222 223 if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) 224 return (EINVAL); 225 anonport = inp->inp_lport == 0 && (nam == NULL || 226 ((struct sockaddr_in *)nam)->sin_port == 0); 227 error = in_pcbbind_setup(inp, nam, &inp->inp_laddr.s_addr, 228 &inp->inp_lport, cred); 229 if (error) 230 return (error); 231 if (in_pcbinshash(inp) != 0) { 232 inp->inp_laddr.s_addr = INADDR_ANY; 233 inp->inp_lport = 0; 234 return (EAGAIN); 235 } 236 if (anonport) 237 inp->inp_flags |= INP_ANONPORT; 238 return (0); 239 } 240 241 /* 242 * Set up a bind operation on a PCB, performing port allocation 243 * as required, but do not actually modify the PCB. Callers can 244 * either complete the bind by setting inp_laddr/inp_lport and 245 * calling in_pcbinshash(), or they can just use the resulting 246 * port and address to authorise the sending of a once-off packet. 247 * 248 * On error, the values of *laddrp and *lportp are not changed. 249 */ 250 int 251 in_pcbbind_setup(inp, nam, laddrp, lportp, cred) 252 struct inpcb *inp; 253 struct sockaddr *nam; 254 in_addr_t *laddrp; 255 u_short *lportp; 256 struct ucred *cred; 257 { 258 struct socket *so = inp->inp_socket; 259 unsigned short *lastport; 260 struct sockaddr_in *sin; 261 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 262 struct in_addr laddr; 263 u_short lport = 0; 264 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); 265 int error, prison = 0; 266 267 INP_INFO_WLOCK_ASSERT(pcbinfo); 268 INP_LOCK_ASSERT(inp); 269 270 if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */ 271 return (EADDRNOTAVAIL); 272 laddr.s_addr = *laddrp; 273 if (nam != NULL && laddr.s_addr != INADDR_ANY) 274 return (EINVAL); 275 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) 276 wild = 1; 277 if (nam) { 278 sin = (struct sockaddr_in *)nam; 279 if (nam->sa_len != sizeof (*sin)) 280 return (EINVAL); 281 #ifdef notdef 282 /* 283 * We should check the family, but old programs 284 * incorrectly fail to initialize it. 285 */ 286 if (sin->sin_family != AF_INET) 287 return (EAFNOSUPPORT); 288 #endif 289 if (sin->sin_addr.s_addr != INADDR_ANY) 290 if (prison_ip(cred, 0, &sin->sin_addr.s_addr)) 291 return(EINVAL); 292 if (sin->sin_port != *lportp) { 293 /* Don't allow the port to change. */ 294 if (*lportp != 0) 295 return (EINVAL); 296 lport = sin->sin_port; 297 } 298 /* NB: lport is left as 0 if the port isn't being changed. */ 299 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { 300 /* 301 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; 302 * allow complete duplication of binding if 303 * SO_REUSEPORT is set, or if SO_REUSEADDR is set 304 * and a multicast address is bound on both 305 * new and duplicated sockets. 306 */ 307 if (so->so_options & SO_REUSEADDR) 308 reuseport = SO_REUSEADDR|SO_REUSEPORT; 309 } else if (sin->sin_addr.s_addr != INADDR_ANY) { 310 sin->sin_port = 0; /* yech... */ 311 bzero(&sin->sin_zero, sizeof(sin->sin_zero)); 312 if (ifa_ifwithaddr((struct sockaddr *)sin) == 0) 313 return (EADDRNOTAVAIL); 314 } 315 laddr = sin->sin_addr; 316 if (lport) { 317 struct inpcb *t; 318 /* GROSS */ 319 if (ntohs(lport) <= ipport_reservedhigh && 320 ntohs(lport) >= ipport_reservedlow && 321 suser_cred(cred, PRISON_ROOT)) 322 return (EACCES); 323 if (jailed(cred)) 324 prison = 1; 325 if (so->so_cred->cr_uid != 0 && 326 !IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { 327 t = in_pcblookup_local(inp->inp_pcbinfo, 328 sin->sin_addr, lport, 329 prison ? 0 : INPLOOKUP_WILDCARD); 330 /* 331 * XXX 332 * This entire block sorely needs a rewrite. 333 */ 334 if (t && (t->inp_vflag & INP_TIMEWAIT)) { 335 if ((ntohl(sin->sin_addr.s_addr) != INADDR_ANY || 336 ntohl(t->inp_laddr.s_addr) != INADDR_ANY || 337 (intotw(t)->tw_so_options & SO_REUSEPORT) == 0) && 338 (so->so_cred->cr_uid != intotw(t)->tw_cred->cr_uid)) 339 return (EADDRINUSE); 340 } else 341 if (t && 342 (ntohl(sin->sin_addr.s_addr) != INADDR_ANY || 343 ntohl(t->inp_laddr.s_addr) != INADDR_ANY || 344 (t->inp_socket->so_options & 345 SO_REUSEPORT) == 0) && 346 (so->so_cred->cr_uid != 347 t->inp_socket->so_cred->cr_uid)) { 348 #if defined(INET6) 349 if (ntohl(sin->sin_addr.s_addr) != 350 INADDR_ANY || 351 ntohl(t->inp_laddr.s_addr) != 352 INADDR_ANY || 353 INP_SOCKAF(so) == 354 INP_SOCKAF(t->inp_socket)) 355 #endif /* defined(INET6) */ 356 return (EADDRINUSE); 357 } 358 } 359 if (prison && prison_ip(cred, 0, &sin->sin_addr.s_addr)) 360 return (EADDRNOTAVAIL); 361 t = in_pcblookup_local(pcbinfo, sin->sin_addr, 362 lport, prison ? 0 : wild); 363 if (t && (t->inp_vflag & INP_TIMEWAIT)) { 364 if ((reuseport & intotw(t)->tw_so_options) == 0) 365 return (EADDRINUSE); 366 } else 367 if (t && 368 (reuseport & t->inp_socket->so_options) == 0) { 369 #if defined(INET6) 370 if (ntohl(sin->sin_addr.s_addr) != 371 INADDR_ANY || 372 ntohl(t->inp_laddr.s_addr) != 373 INADDR_ANY || 374 INP_SOCKAF(so) == 375 INP_SOCKAF(t->inp_socket)) 376 #endif /* defined(INET6) */ 377 return (EADDRINUSE); 378 } 379 } 380 } 381 if (*lportp != 0) 382 lport = *lportp; 383 if (lport == 0) { 384 u_short first, last; 385 int count; 386 387 if (laddr.s_addr != INADDR_ANY) 388 if (prison_ip(cred, 0, &laddr.s_addr)) 389 return (EINVAL); 390 391 if (inp->inp_flags & INP_HIGHPORT) { 392 first = ipport_hifirstauto; /* sysctl */ 393 last = ipport_hilastauto; 394 lastport = &pcbinfo->lasthi; 395 } else if (inp->inp_flags & INP_LOWPORT) { 396 if ((error = suser_cred(cred, PRISON_ROOT)) != 0) 397 return error; 398 first = ipport_lowfirstauto; /* 1023 */ 399 last = ipport_lowlastauto; /* 600 */ 400 lastport = &pcbinfo->lastlow; 401 } else { 402 first = ipport_firstauto; /* sysctl */ 403 last = ipport_lastauto; 404 lastport = &pcbinfo->lastport; 405 } 406 /* 407 * Simple check to ensure all ports are not used up causing 408 * a deadlock here. 409 * 410 * We split the two cases (up and down) so that the direction 411 * is not being tested on each round of the loop. 412 */ 413 if (first > last) { 414 /* 415 * counting down 416 */ 417 count = first - last; 418 419 do { 420 if (count-- < 0) /* completely used? */ 421 return (EADDRNOTAVAIL); 422 --*lastport; 423 if (*lastport > first || *lastport < last) 424 *lastport = first; 425 lport = htons(*lastport); 426 } while (in_pcblookup_local(pcbinfo, laddr, lport, 427 wild)); 428 } else { 429 /* 430 * counting up 431 */ 432 count = last - first; 433 434 do { 435 if (count-- < 0) /* completely used? */ 436 return (EADDRNOTAVAIL); 437 ++*lastport; 438 if (*lastport < first || *lastport > last) 439 *lastport = first; 440 lport = htons(*lastport); 441 } while (in_pcblookup_local(pcbinfo, laddr, lport, 442 wild)); 443 } 444 } 445 if (prison_ip(cred, 0, &laddr.s_addr)) 446 return (EINVAL); 447 *laddrp = laddr.s_addr; 448 *lportp = lport; 449 return (0); 450 } 451 452 /* 453 * Connect from a socket to a specified address. 454 * Both address and port must be specified in argument sin. 455 * If don't have a local address for this socket yet, 456 * then pick one. 457 */ 458 int 459 in_pcbconnect(inp, nam, cred) 460 register struct inpcb *inp; 461 struct sockaddr *nam; 462 struct ucred *cred; 463 { 464 u_short lport, fport; 465 in_addr_t laddr, faddr; 466 int anonport, error; 467 468 lport = inp->inp_lport; 469 laddr = inp->inp_laddr.s_addr; 470 anonport = (lport == 0); 471 error = in_pcbconnect_setup(inp, nam, &laddr, &lport, &faddr, &fport, 472 NULL, cred); 473 if (error) 474 return (error); 475 476 /* Do the initial binding of the local address if required. */ 477 if (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0) { 478 inp->inp_lport = lport; 479 inp->inp_laddr.s_addr = laddr; 480 if (in_pcbinshash(inp) != 0) { 481 inp->inp_laddr.s_addr = INADDR_ANY; 482 inp->inp_lport = 0; 483 return (EAGAIN); 484 } 485 } 486 487 /* Commit the remaining changes. */ 488 inp->inp_lport = lport; 489 inp->inp_laddr.s_addr = laddr; 490 inp->inp_faddr.s_addr = faddr; 491 inp->inp_fport = fport; 492 in_pcbrehash(inp); 493 #ifdef IPSEC 494 if (inp->inp_socket->so_type == SOCK_STREAM) 495 ipsec_pcbconn(inp->inp_sp); 496 #endif 497 if (anonport) 498 inp->inp_flags |= INP_ANONPORT; 499 return (0); 500 } 501 502 /* 503 * Set up for a connect from a socket to the specified address. 504 * On entry, *laddrp and *lportp should contain the current local 505 * address and port for the PCB; these are updated to the values 506 * that should be placed in inp_laddr and inp_lport to complete 507 * the connect. 508 * 509 * On success, *faddrp and *fportp will be set to the remote address 510 * and port. These are not updated in the error case. 511 * 512 * If the operation fails because the connection already exists, 513 * *oinpp will be set to the PCB of that connection so that the 514 * caller can decide to override it. In all other cases, *oinpp 515 * is set to NULL. 516 */ 517 int 518 in_pcbconnect_setup(inp, nam, laddrp, lportp, faddrp, fportp, oinpp, cred) 519 register struct inpcb *inp; 520 struct sockaddr *nam; 521 in_addr_t *laddrp; 522 u_short *lportp; 523 in_addr_t *faddrp; 524 u_short *fportp; 525 struct inpcb **oinpp; 526 struct ucred *cred; 527 { 528 struct sockaddr_in *sin = (struct sockaddr_in *)nam; 529 struct in_ifaddr *ia; 530 struct sockaddr_in sa; 531 struct ucred *socred; 532 struct inpcb *oinp; 533 struct in_addr laddr, faddr; 534 u_short lport, fport; 535 int error; 536 537 if (oinpp != NULL) 538 *oinpp = NULL; 539 if (nam->sa_len != sizeof (*sin)) 540 return (EINVAL); 541 if (sin->sin_family != AF_INET) 542 return (EAFNOSUPPORT); 543 if (sin->sin_port == 0) 544 return (EADDRNOTAVAIL); 545 laddr.s_addr = *laddrp; 546 lport = *lportp; 547 faddr = sin->sin_addr; 548 fport = sin->sin_port; 549 socred = inp->inp_socket->so_cred; 550 if (laddr.s_addr == INADDR_ANY && jailed(socred)) { 551 bzero(&sa, sizeof(sa)); 552 sa.sin_addr.s_addr = htonl(prison_getip(socred)); 553 sa.sin_len = sizeof(sa); 554 sa.sin_family = AF_INET; 555 error = in_pcbbind_setup(inp, (struct sockaddr *)&sa, 556 &laddr.s_addr, &lport, cred); 557 if (error) 558 return (error); 559 } 560 if (!TAILQ_EMPTY(&in_ifaddrhead)) { 561 /* 562 * If the destination address is INADDR_ANY, 563 * use the primary local address. 564 * If the supplied address is INADDR_BROADCAST, 565 * and the primary interface supports broadcast, 566 * choose the broadcast address for that interface. 567 */ 568 if (faddr.s_addr == INADDR_ANY) 569 faddr = IA_SIN(TAILQ_FIRST(&in_ifaddrhead))->sin_addr; 570 else if (faddr.s_addr == (u_long)INADDR_BROADCAST && 571 (TAILQ_FIRST(&in_ifaddrhead)->ia_ifp->if_flags & 572 IFF_BROADCAST)) 573 faddr = satosin(&TAILQ_FIRST( 574 &in_ifaddrhead)->ia_broadaddr)->sin_addr; 575 } 576 if (laddr.s_addr == INADDR_ANY) { 577 struct route sro; 578 579 bzero(&sro, sizeof(sro)); 580 ia = (struct in_ifaddr *)0; 581 /* 582 * If route is known our src addr is taken from the i/f, 583 * else punt. 584 */ 585 if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0) { 586 /* Find out route to destination */ 587 sro.ro_dst.sa_family = AF_INET; 588 sro.ro_dst.sa_len = sizeof(struct sockaddr_in); 589 ((struct sockaddr_in *)&sro.ro_dst)->sin_addr = faddr; 590 rtalloc_ign(&sro, RTF_CLONING); 591 } 592 /* 593 * If we found a route, use the address 594 * corresponding to the outgoing interface 595 * unless it is the loopback (in case a route 596 * to our address on another net goes to loopback). 597 */ 598 if (sro.ro_rt && !(sro.ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) 599 ia = ifatoia(sro.ro_rt->rt_ifa); 600 if (sro.ro_rt) 601 RTFREE(sro.ro_rt); 602 if (ia == 0) { 603 bzero(&sa, sizeof(sa)); 604 sa.sin_addr = faddr; 605 sa.sin_len = sizeof(sa); 606 sa.sin_family = AF_INET; 607 608 ia = ifatoia(ifa_ifwithdstaddr(sintosa(&sa))); 609 if (ia == 0) 610 ia = ifatoia(ifa_ifwithnet(sintosa(&sa))); 611 if (ia == 0) 612 ia = TAILQ_FIRST(&in_ifaddrhead); 613 if (ia == 0) 614 return (EADDRNOTAVAIL); 615 } 616 /* 617 * If the destination address is multicast and an outgoing 618 * interface has been set as a multicast option, use the 619 * address of that interface as our source address. 620 */ 621 if (IN_MULTICAST(ntohl(faddr.s_addr)) && 622 inp->inp_moptions != NULL) { 623 struct ip_moptions *imo; 624 struct ifnet *ifp; 625 626 imo = inp->inp_moptions; 627 if (imo->imo_multicast_ifp != NULL) { 628 ifp = imo->imo_multicast_ifp; 629 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) 630 if (ia->ia_ifp == ifp) 631 break; 632 if (ia == 0) 633 return (EADDRNOTAVAIL); 634 } 635 } 636 laddr = ia->ia_addr.sin_addr; 637 } 638 639 oinp = in_pcblookup_hash(inp->inp_pcbinfo, faddr, fport, laddr, lport, 640 0, NULL); 641 if (oinp != NULL) { 642 if (oinpp != NULL) 643 *oinpp = oinp; 644 return (EADDRINUSE); 645 } 646 if (lport == 0) { 647 error = in_pcbbind_setup(inp, NULL, &laddr.s_addr, &lport, 648 cred); 649 if (error) 650 return (error); 651 } 652 *laddrp = laddr.s_addr; 653 *lportp = lport; 654 *faddrp = faddr.s_addr; 655 *fportp = fport; 656 return (0); 657 } 658 659 void 660 in_pcbdisconnect(inp) 661 struct inpcb *inp; 662 { 663 INP_LOCK_ASSERT(inp); 664 665 inp->inp_faddr.s_addr = INADDR_ANY; 666 inp->inp_fport = 0; 667 in_pcbrehash(inp); 668 #ifdef IPSEC 669 ipsec_pcbdisconn(inp->inp_sp); 670 #endif 671 if (inp->inp_socket->so_state & SS_NOFDREF) 672 in_pcbdetach(inp); 673 } 674 675 void 676 in_pcbdetach(inp) 677 struct inpcb *inp; 678 { 679 struct socket *so = inp->inp_socket; 680 struct inpcbinfo *ipi = inp->inp_pcbinfo; 681 682 INP_LOCK_ASSERT(inp); 683 684 #if defined(IPSEC) || defined(FAST_IPSEC) 685 ipsec4_delete_pcbpolicy(inp); 686 #endif /*IPSEC*/ 687 inp->inp_gencnt = ++ipi->ipi_gencnt; 688 in_pcbremlists(inp); 689 if (so) { 690 so->so_pcb = 0; 691 sotryfree(so); 692 } 693 if (inp->inp_options) 694 (void)m_free(inp->inp_options); 695 ip_freemoptions(inp->inp_moptions); 696 inp->inp_vflag = 0; 697 INP_LOCK_DESTROY(inp); 698 #ifdef MAC 699 mac_destroy_inpcb(inp); 700 #endif 701 uma_zfree(ipi->ipi_zone, inp); 702 } 703 704 struct sockaddr * 705 in_sockaddr(port, addr_p) 706 in_port_t port; 707 struct in_addr *addr_p; 708 { 709 struct sockaddr_in *sin; 710 711 MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, 712 M_WAITOK | M_ZERO); 713 sin->sin_family = AF_INET; 714 sin->sin_len = sizeof(*sin); 715 sin->sin_addr = *addr_p; 716 sin->sin_port = port; 717 718 return (struct sockaddr *)sin; 719 } 720 721 /* 722 * The wrapper function will pass down the pcbinfo for this function to lock. 723 * The socket must have a valid 724 * (i.e., non-nil) PCB, but it should be impossible to get an invalid one 725 * except through a kernel programming error, so it is acceptable to panic 726 * (or in this case trap) if the PCB is invalid. (Actually, we don't trap 727 * because there actually /is/ a programming error somewhere... XXX) 728 */ 729 int 730 in_setsockaddr(so, nam, pcbinfo) 731 struct socket *so; 732 struct sockaddr **nam; 733 struct inpcbinfo *pcbinfo; 734 { 735 int s; 736 register struct inpcb *inp; 737 struct in_addr addr; 738 in_port_t port; 739 740 s = splnet(); 741 INP_INFO_RLOCK(pcbinfo); 742 inp = sotoinpcb(so); 743 if (!inp) { 744 INP_INFO_RUNLOCK(pcbinfo); 745 splx(s); 746 return ECONNRESET; 747 } 748 INP_LOCK(inp); 749 port = inp->inp_lport; 750 addr = inp->inp_laddr; 751 INP_UNLOCK(inp); 752 INP_INFO_RUNLOCK(pcbinfo); 753 splx(s); 754 755 *nam = in_sockaddr(port, &addr); 756 return 0; 757 } 758 759 /* 760 * The wrapper function will pass down the pcbinfo for this function to lock. 761 */ 762 int 763 in_setpeeraddr(so, nam, pcbinfo) 764 struct socket *so; 765 struct sockaddr **nam; 766 struct inpcbinfo *pcbinfo; 767 { 768 int s; 769 register struct inpcb *inp; 770 struct in_addr addr; 771 in_port_t port; 772 773 s = splnet(); 774 INP_INFO_RLOCK(pcbinfo); 775 inp = sotoinpcb(so); 776 if (!inp) { 777 INP_INFO_RUNLOCK(pcbinfo); 778 splx(s); 779 return ECONNRESET; 780 } 781 INP_LOCK(inp); 782 port = inp->inp_fport; 783 addr = inp->inp_faddr; 784 INP_UNLOCK(inp); 785 INP_INFO_RUNLOCK(pcbinfo); 786 splx(s); 787 788 *nam = in_sockaddr(port, &addr); 789 return 0; 790 } 791 792 void 793 in_pcbnotifyall(pcbinfo, faddr, errno, notify) 794 struct inpcbinfo *pcbinfo; 795 struct in_addr faddr; 796 int errno; 797 struct inpcb *(*notify)(struct inpcb *, int); 798 { 799 struct inpcb *inp, *ninp; 800 struct inpcbhead *head; 801 int s; 802 803 s = splnet(); 804 INP_INFO_WLOCK(pcbinfo); 805 head = pcbinfo->listhead; 806 for (inp = LIST_FIRST(head); inp != NULL; inp = ninp) { 807 INP_LOCK(inp); 808 ninp = LIST_NEXT(inp, inp_list); 809 #ifdef INET6 810 if ((inp->inp_vflag & INP_IPV4) == 0) { 811 INP_UNLOCK(inp); 812 continue; 813 } 814 #endif 815 if (inp->inp_faddr.s_addr != faddr.s_addr || 816 inp->inp_socket == NULL) { 817 INP_UNLOCK(inp); 818 continue; 819 } 820 if ((*notify)(inp, errno)) 821 INP_UNLOCK(inp); 822 } 823 INP_INFO_WUNLOCK(pcbinfo); 824 splx(s); 825 } 826 827 void 828 in_pcbpurgeif0(pcbinfo, ifp) 829 struct inpcbinfo *pcbinfo; 830 struct ifnet *ifp; 831 { 832 struct inpcb *inp; 833 struct ip_moptions *imo; 834 int i, gap; 835 836 /* why no splnet here? XXX */ 837 INP_INFO_RLOCK(pcbinfo); 838 LIST_FOREACH(inp, pcbinfo->listhead, inp_list) { 839 INP_LOCK(inp); 840 imo = inp->inp_moptions; 841 if ((inp->inp_vflag & INP_IPV4) && 842 imo != NULL) { 843 /* 844 * Unselect the outgoing interface if it is being 845 * detached. 846 */ 847 if (imo->imo_multicast_ifp == ifp) 848 imo->imo_multicast_ifp = NULL; 849 850 /* 851 * Drop multicast group membership if we joined 852 * through the interface being detached. 853 */ 854 for (i = 0, gap = 0; i < imo->imo_num_memberships; 855 i++) { 856 if (imo->imo_membership[i]->inm_ifp == ifp) { 857 in_delmulti(imo->imo_membership[i]); 858 gap++; 859 } else if (gap != 0) 860 imo->imo_membership[i - gap] = 861 imo->imo_membership[i]; 862 } 863 imo->imo_num_memberships -= gap; 864 } 865 INP_UNLOCK(inp); 866 } 867 INP_INFO_RUNLOCK(pcbinfo); 868 } 869 870 /* 871 * Lookup a PCB based on the local address and port. 872 */ 873 struct inpcb * 874 in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay) 875 struct inpcbinfo *pcbinfo; 876 struct in_addr laddr; 877 u_int lport_arg; 878 int wild_okay; 879 { 880 register struct inpcb *inp; 881 int matchwild = 3, wildcard; 882 u_short lport = lport_arg; 883 884 INP_INFO_WLOCK_ASSERT(pcbinfo); 885 886 if (!wild_okay) { 887 struct inpcbhead *head; 888 /* 889 * Look for an unconnected (wildcard foreign addr) PCB that 890 * matches the local address and port we're looking for. 891 */ 892 head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)]; 893 LIST_FOREACH(inp, head, inp_hash) { 894 #ifdef INET6 895 if ((inp->inp_vflag & INP_IPV4) == 0) 896 continue; 897 #endif 898 if (inp->inp_faddr.s_addr == INADDR_ANY && 899 inp->inp_laddr.s_addr == laddr.s_addr && 900 inp->inp_lport == lport) { 901 /* 902 * Found. 903 */ 904 return (inp); 905 } 906 } 907 /* 908 * Not found. 909 */ 910 return (NULL); 911 } else { 912 struct inpcbporthead *porthash; 913 struct inpcbport *phd; 914 struct inpcb *match = NULL; 915 /* 916 * Best fit PCB lookup. 917 * 918 * First see if this local port is in use by looking on the 919 * port hash list. 920 */ 921 retrylookup: 922 porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport, 923 pcbinfo->porthashmask)]; 924 LIST_FOREACH(phd, porthash, phd_hash) { 925 if (phd->phd_port == lport) 926 break; 927 } 928 if (phd != NULL) { 929 /* 930 * Port is in use by one or more PCBs. Look for best 931 * fit. 932 */ 933 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { 934 wildcard = 0; 935 #ifdef INET6 936 if ((inp->inp_vflag & INP_IPV4) == 0) 937 continue; 938 #endif 939 /* 940 * Clean out old time_wait sockets if they 941 * are clogging up needed local ports. 942 */ 943 if ((inp->inp_vflag & INP_TIMEWAIT) != 0) { 944 if (tcp_twrecycleable((struct tcptw *)inp->inp_ppcb)) { 945 INP_LOCK(inp); 946 tcp_twclose((struct tcptw *)inp->inp_ppcb, 0); 947 match = NULL; 948 goto retrylookup; 949 } 950 } 951 if (inp->inp_faddr.s_addr != INADDR_ANY) 952 wildcard++; 953 if (inp->inp_laddr.s_addr != INADDR_ANY) { 954 if (laddr.s_addr == INADDR_ANY) 955 wildcard++; 956 else if (inp->inp_laddr.s_addr != laddr.s_addr) 957 continue; 958 } else { 959 if (laddr.s_addr != INADDR_ANY) 960 wildcard++; 961 } 962 if (wildcard < matchwild) { 963 match = inp; 964 matchwild = wildcard; 965 if (matchwild == 0) { 966 break; 967 } 968 } 969 } 970 } 971 return (match); 972 } 973 } 974 975 /* 976 * Lookup PCB in hash list. 977 */ 978 struct inpcb * 979 in_pcblookup_hash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard, 980 ifp) 981 struct inpcbinfo *pcbinfo; 982 struct in_addr faddr, laddr; 983 u_int fport_arg, lport_arg; 984 int wildcard; 985 struct ifnet *ifp; 986 { 987 struct inpcbhead *head; 988 register struct inpcb *inp; 989 u_short fport = fport_arg, lport = lport_arg; 990 991 INP_INFO_RLOCK_ASSERT(pcbinfo); 992 /* 993 * First look for an exact match. 994 */ 995 head = &pcbinfo->hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, pcbinfo->hashmask)]; 996 LIST_FOREACH(inp, head, inp_hash) { 997 #ifdef INET6 998 if ((inp->inp_vflag & INP_IPV4) == 0) 999 continue; 1000 #endif 1001 if (inp->inp_faddr.s_addr == faddr.s_addr && 1002 inp->inp_laddr.s_addr == laddr.s_addr && 1003 inp->inp_fport == fport && 1004 inp->inp_lport == lport) { 1005 /* 1006 * Found. 1007 */ 1008 return (inp); 1009 } 1010 } 1011 if (wildcard) { 1012 struct inpcb *local_wild = NULL; 1013 #if defined(INET6) 1014 struct inpcb *local_wild_mapped = NULL; 1015 #endif /* defined(INET6) */ 1016 1017 head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)]; 1018 LIST_FOREACH(inp, head, inp_hash) { 1019 #ifdef INET6 1020 if ((inp->inp_vflag & INP_IPV4) == 0) 1021 continue; 1022 #endif 1023 if (inp->inp_faddr.s_addr == INADDR_ANY && 1024 inp->inp_lport == lport) { 1025 if (ifp && ifp->if_type == IFT_FAITH && 1026 (inp->inp_flags & INP_FAITH) == 0) 1027 continue; 1028 if (inp->inp_laddr.s_addr == laddr.s_addr) 1029 return (inp); 1030 else if (inp->inp_laddr.s_addr == INADDR_ANY) { 1031 #if defined(INET6) 1032 if (INP_CHECK_SOCKAF(inp->inp_socket, 1033 AF_INET6)) 1034 local_wild_mapped = inp; 1035 else 1036 #endif /* defined(INET6) */ 1037 local_wild = inp; 1038 } 1039 } 1040 } 1041 #if defined(INET6) 1042 if (local_wild == NULL) 1043 return (local_wild_mapped); 1044 #endif /* defined(INET6) */ 1045 return (local_wild); 1046 } 1047 1048 /* 1049 * Not found. 1050 */ 1051 return (NULL); 1052 } 1053 1054 /* 1055 * Insert PCB onto various hash lists. 1056 */ 1057 int 1058 in_pcbinshash(inp) 1059 struct inpcb *inp; 1060 { 1061 struct inpcbhead *pcbhash; 1062 struct inpcbporthead *pcbporthash; 1063 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 1064 struct inpcbport *phd; 1065 u_int32_t hashkey_faddr; 1066 1067 INP_INFO_WLOCK_ASSERT(pcbinfo); 1068 #ifdef INET6 1069 if (inp->inp_vflag & INP_IPV6) 1070 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */; 1071 else 1072 #endif /* INET6 */ 1073 hashkey_faddr = inp->inp_faddr.s_addr; 1074 1075 pcbhash = &pcbinfo->hashbase[INP_PCBHASH(hashkey_faddr, 1076 inp->inp_lport, inp->inp_fport, pcbinfo->hashmask)]; 1077 1078 pcbporthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(inp->inp_lport, 1079 pcbinfo->porthashmask)]; 1080 1081 /* 1082 * Go through port list and look for a head for this lport. 1083 */ 1084 LIST_FOREACH(phd, pcbporthash, phd_hash) { 1085 if (phd->phd_port == inp->inp_lport) 1086 break; 1087 } 1088 /* 1089 * If none exists, malloc one and tack it on. 1090 */ 1091 if (phd == NULL) { 1092 MALLOC(phd, struct inpcbport *, sizeof(struct inpcbport), M_PCB, M_NOWAIT); 1093 if (phd == NULL) { 1094 return (ENOBUFS); /* XXX */ 1095 } 1096 phd->phd_port = inp->inp_lport; 1097 LIST_INIT(&phd->phd_pcblist); 1098 LIST_INSERT_HEAD(pcbporthash, phd, phd_hash); 1099 } 1100 inp->inp_phd = phd; 1101 LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist); 1102 LIST_INSERT_HEAD(pcbhash, inp, inp_hash); 1103 return (0); 1104 } 1105 1106 /* 1107 * Move PCB to the proper hash bucket when { faddr, fport } have been 1108 * changed. NOTE: This does not handle the case of the lport changing (the 1109 * hashed port list would have to be updated as well), so the lport must 1110 * not change after in_pcbinshash() has been called. 1111 */ 1112 void 1113 in_pcbrehash(inp) 1114 struct inpcb *inp; 1115 { 1116 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 1117 struct inpcbhead *head; 1118 u_int32_t hashkey_faddr; 1119 1120 INP_INFO_WLOCK_ASSERT(pcbinfo); 1121 /* XXX? INP_LOCK_ASSERT(inp); */ 1122 #ifdef INET6 1123 if (inp->inp_vflag & INP_IPV6) 1124 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */; 1125 else 1126 #endif /* INET6 */ 1127 hashkey_faddr = inp->inp_faddr.s_addr; 1128 1129 head = &pcbinfo->hashbase[INP_PCBHASH(hashkey_faddr, 1130 inp->inp_lport, inp->inp_fport, pcbinfo->hashmask)]; 1131 1132 LIST_REMOVE(inp, inp_hash); 1133 LIST_INSERT_HEAD(head, inp, inp_hash); 1134 } 1135 1136 /* 1137 * Remove PCB from various lists. 1138 */ 1139 void 1140 in_pcbremlists(inp) 1141 struct inpcb *inp; 1142 { 1143 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 1144 1145 INP_INFO_WLOCK_ASSERT(pcbinfo); 1146 INP_LOCK_ASSERT(inp); 1147 1148 inp->inp_gencnt = ++pcbinfo->ipi_gencnt; 1149 if (inp->inp_lport) { 1150 struct inpcbport *phd = inp->inp_phd; 1151 1152 LIST_REMOVE(inp, inp_hash); 1153 LIST_REMOVE(inp, inp_portlist); 1154 if (LIST_FIRST(&phd->phd_pcblist) == NULL) { 1155 LIST_REMOVE(phd, phd_hash); 1156 free(phd, M_PCB); 1157 } 1158 } 1159 LIST_REMOVE(inp, inp_list); 1160 pcbinfo->ipi_count--; 1161 } 1162 1163 /* 1164 * A set label operation has occurred at the socket layer, propagate the 1165 * label change into the in_pcb for the socket. 1166 */ 1167 void 1168 in_pcbsosetlabel(so) 1169 struct socket *so; 1170 { 1171 #ifdef MAC 1172 struct inpcb *inp; 1173 1174 /* XXX: Will assert socket lock when we have them. */ 1175 inp = (struct inpcb *)so->so_pcb; 1176 INP_LOCK(inp); 1177 mac_inpcb_sosetlabel(so, inp); 1178 INP_UNLOCK(inp); 1179 #endif 1180 } 1181