1 /* 2 * Copyright (c) 1982, 1986, 1991, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 4. Neither the name of the University nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95 30 * $FreeBSD$ 31 */ 32 33 #include "opt_ipsec.h" 34 #include "opt_inet6.h" 35 #include "opt_mac.h" 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/mac.h> 40 #include <sys/malloc.h> 41 #include <sys/mbuf.h> 42 #include <sys/domain.h> 43 #include <sys/protosw.h> 44 #include <sys/socket.h> 45 #include <sys/socketvar.h> 46 #include <sys/proc.h> 47 #include <sys/jail.h> 48 #include <sys/kernel.h> 49 #include <sys/sysctl.h> 50 51 #include <vm/uma.h> 52 53 #include <net/if.h> 54 #include <net/if_types.h> 55 #include <net/route.h> 56 57 #include <netinet/in.h> 58 #include <netinet/in_pcb.h> 59 #include <netinet/in_var.h> 60 #include <netinet/ip_var.h> 61 #include <netinet/tcp_var.h> 62 #ifdef INET6 63 #include <netinet/ip6.h> 64 #include <netinet6/ip6_var.h> 65 #endif /* INET6 */ 66 67 #ifdef IPSEC 68 #include <netinet6/ipsec.h> 69 #include <netkey/key.h> 70 #endif /* IPSEC */ 71 72 #ifdef FAST_IPSEC 73 #if defined(IPSEC) || defined(IPSEC_ESP) 74 #error "Bad idea: don't compile with both IPSEC and FAST_IPSEC!" 75 #endif 76 77 #include <netipsec/ipsec.h> 78 #include <netipsec/key.h> 79 #endif /* FAST_IPSEC */ 80 81 /* 82 * These configure the range of local port addresses assigned to 83 * "unspecified" outgoing connections/packets/whatever. 84 */ 85 int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */ 86 int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */ 87 int ipport_firstauto = IPPORT_HIFIRSTAUTO; /* 49152 */ 88 int ipport_lastauto = IPPORT_HILASTAUTO; /* 65535 */ 89 int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */ 90 int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */ 91 92 /* 93 * Reserved ports accessible only to root. There are significant 94 * security considerations that must be accounted for when changing these, 95 * but the security benefits can be great. Please be careful. 96 */ 97 int ipport_reservedhigh = IPPORT_RESERVED - 1; /* 1023 */ 98 int ipport_reservedlow = 0; 99 100 /* Shall we allocate ephemeral ports in random order? */ 101 int ipport_randomized = 1; 102 103 #define RANGECHK(var, min, max) \ 104 if ((var) < (min)) { (var) = (min); } \ 105 else if ((var) > (max)) { (var) = (max); } 106 107 static int 108 sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS) 109 { 110 int error; 111 112 error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req); 113 if (error == 0) { 114 RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1); 115 RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1); 116 RANGECHK(ipport_firstauto, IPPORT_RESERVED, IPPORT_MAX); 117 RANGECHK(ipport_lastauto, IPPORT_RESERVED, IPPORT_MAX); 118 RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, IPPORT_MAX); 119 RANGECHK(ipport_hilastauto, IPPORT_RESERVED, IPPORT_MAX); 120 } 121 return (error); 122 } 123 124 #undef RANGECHK 125 126 SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0, "IP Ports"); 127 128 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, CTLTYPE_INT|CTLFLAG_RW, 129 &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", ""); 130 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, CTLTYPE_INT|CTLFLAG_RW, 131 &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", ""); 132 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, CTLTYPE_INT|CTLFLAG_RW, 133 &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", ""); 134 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, CTLTYPE_INT|CTLFLAG_RW, 135 &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", ""); 136 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLTYPE_INT|CTLFLAG_RW, 137 &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", ""); 138 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW, 139 &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", ""); 140 SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, reservedhigh, 141 CTLFLAG_RW|CTLFLAG_SECURE, &ipport_reservedhigh, 0, ""); 142 SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, reservedlow, 143 CTLFLAG_RW|CTLFLAG_SECURE, &ipport_reservedlow, 0, ""); 144 SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomized, 145 CTLFLAG_RW, &ipport_randomized, 0, ""); 146 147 /* 148 * in_pcb.c: manage the Protocol Control Blocks. 149 * 150 * NOTE: It is assumed that most of these functions will be called at 151 * splnet(). XXX - There are, unfortunately, a few exceptions to this 152 * rule that should be fixed. 153 */ 154 155 /* 156 * Allocate a PCB and associate it with the socket. 157 */ 158 int 159 in_pcballoc(so, pcbinfo, type) 160 struct socket *so; 161 struct inpcbinfo *pcbinfo; 162 const char *type; 163 { 164 register struct inpcb *inp; 165 int error; 166 167 INP_INFO_WLOCK_ASSERT(pcbinfo); 168 error = 0; 169 inp = uma_zalloc(pcbinfo->ipi_zone, M_NOWAIT | M_ZERO); 170 if (inp == NULL) 171 return (ENOBUFS); 172 inp->inp_gencnt = ++pcbinfo->ipi_gencnt; 173 inp->inp_pcbinfo = pcbinfo; 174 inp->inp_socket = so; 175 #ifdef MAC 176 error = mac_init_inpcb(inp, M_NOWAIT); 177 if (error != 0) 178 goto out; 179 mac_create_inpcb_from_socket(so, inp); 180 #endif 181 #if defined(IPSEC) || defined(FAST_IPSEC) 182 #ifdef FAST_IPSEC 183 error = ipsec_init_policy(so, &inp->inp_sp); 184 #else 185 error = ipsec_init_pcbpolicy(so, &inp->inp_sp); 186 #endif 187 if (error != 0) 188 goto out; 189 #endif /*IPSEC*/ 190 #if defined(INET6) 191 if (INP_SOCKAF(so) == AF_INET6) { 192 inp->inp_vflag |= INP_IPV6PROTO; 193 if (ip6_v6only) 194 inp->inp_flags |= IN6P_IPV6_V6ONLY; 195 } 196 #endif 197 LIST_INSERT_HEAD(pcbinfo->listhead, inp, inp_list); 198 pcbinfo->ipi_count++; 199 so->so_pcb = (caddr_t)inp; 200 INP_LOCK_INIT(inp, "inp", type); 201 #ifdef INET6 202 if (ip6_auto_flowlabel) 203 inp->inp_flags |= IN6P_AUTOFLOWLABEL; 204 #endif 205 #if defined(IPSEC) || defined(FAST_IPSEC) || defined(MAC) 206 out: 207 if (error != 0) 208 uma_zfree(pcbinfo->ipi_zone, inp); 209 #endif 210 return (error); 211 } 212 213 int 214 in_pcbbind(inp, nam, cred) 215 register struct inpcb *inp; 216 struct sockaddr *nam; 217 struct ucred *cred; 218 { 219 int anonport, error; 220 221 INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo); 222 INP_LOCK_ASSERT(inp); 223 224 if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) 225 return (EINVAL); 226 anonport = inp->inp_lport == 0 && (nam == NULL || 227 ((struct sockaddr_in *)nam)->sin_port == 0); 228 error = in_pcbbind_setup(inp, nam, &inp->inp_laddr.s_addr, 229 &inp->inp_lport, cred); 230 if (error) 231 return (error); 232 if (in_pcbinshash(inp) != 0) { 233 inp->inp_laddr.s_addr = INADDR_ANY; 234 inp->inp_lport = 0; 235 return (EAGAIN); 236 } 237 if (anonport) 238 inp->inp_flags |= INP_ANONPORT; 239 return (0); 240 } 241 242 /* 243 * Set up a bind operation on a PCB, performing port allocation 244 * as required, but do not actually modify the PCB. Callers can 245 * either complete the bind by setting inp_laddr/inp_lport and 246 * calling in_pcbinshash(), or they can just use the resulting 247 * port and address to authorise the sending of a once-off packet. 248 * 249 * On error, the values of *laddrp and *lportp are not changed. 250 */ 251 int 252 in_pcbbind_setup(inp, nam, laddrp, lportp, cred) 253 struct inpcb *inp; 254 struct sockaddr *nam; 255 in_addr_t *laddrp; 256 u_short *lportp; 257 struct ucred *cred; 258 { 259 struct socket *so = inp->inp_socket; 260 unsigned short *lastport; 261 struct sockaddr_in *sin; 262 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 263 struct in_addr laddr; 264 u_short lport = 0; 265 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); 266 int error, prison = 0; 267 268 INP_INFO_WLOCK_ASSERT(pcbinfo); 269 INP_LOCK_ASSERT(inp); 270 271 if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */ 272 return (EADDRNOTAVAIL); 273 laddr.s_addr = *laddrp; 274 if (nam != NULL && laddr.s_addr != INADDR_ANY) 275 return (EINVAL); 276 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) 277 wild = 1; 278 if (nam) { 279 sin = (struct sockaddr_in *)nam; 280 if (nam->sa_len != sizeof (*sin)) 281 return (EINVAL); 282 #ifdef notdef 283 /* 284 * We should check the family, but old programs 285 * incorrectly fail to initialize it. 286 */ 287 if (sin->sin_family != AF_INET) 288 return (EAFNOSUPPORT); 289 #endif 290 if (sin->sin_addr.s_addr != INADDR_ANY) 291 if (prison_ip(cred, 0, &sin->sin_addr.s_addr)) 292 return(EINVAL); 293 if (sin->sin_port != *lportp) { 294 /* Don't allow the port to change. */ 295 if (*lportp != 0) 296 return (EINVAL); 297 lport = sin->sin_port; 298 } 299 /* NB: lport is left as 0 if the port isn't being changed. */ 300 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { 301 /* 302 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; 303 * allow complete duplication of binding if 304 * SO_REUSEPORT is set, or if SO_REUSEADDR is set 305 * and a multicast address is bound on both 306 * new and duplicated sockets. 307 */ 308 if (so->so_options & SO_REUSEADDR) 309 reuseport = SO_REUSEADDR|SO_REUSEPORT; 310 } else if (sin->sin_addr.s_addr != INADDR_ANY) { 311 sin->sin_port = 0; /* yech... */ 312 bzero(&sin->sin_zero, sizeof(sin->sin_zero)); 313 if (ifa_ifwithaddr((struct sockaddr *)sin) == 0) 314 return (EADDRNOTAVAIL); 315 } 316 laddr = sin->sin_addr; 317 if (lport) { 318 struct inpcb *t; 319 /* GROSS */ 320 if (ntohs(lport) <= ipport_reservedhigh && 321 ntohs(lport) >= ipport_reservedlow && 322 suser_cred(cred, PRISON_ROOT)) 323 return (EACCES); 324 if (jailed(cred)) 325 prison = 1; 326 if (so->so_cred->cr_uid != 0 && 327 !IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { 328 t = in_pcblookup_local(inp->inp_pcbinfo, 329 sin->sin_addr, lport, 330 prison ? 0 : INPLOOKUP_WILDCARD); 331 /* 332 * XXX 333 * This entire block sorely needs a rewrite. 334 */ 335 if (t && 336 ((t->inp_vflag & INP_TIMEWAIT) == 0) && 337 (so->so_type != SOCK_STREAM || 338 ntohl(t->inp_faddr.s_addr) == INADDR_ANY) && 339 (ntohl(sin->sin_addr.s_addr) != INADDR_ANY || 340 ntohl(t->inp_laddr.s_addr) != INADDR_ANY || 341 (t->inp_socket->so_options & 342 SO_REUSEPORT) == 0) && 343 (so->so_cred->cr_uid != 344 t->inp_socket->so_cred->cr_uid)) { 345 #if defined(INET6) 346 if (ntohl(sin->sin_addr.s_addr) != 347 INADDR_ANY || 348 ntohl(t->inp_laddr.s_addr) != 349 INADDR_ANY || 350 INP_SOCKAF(so) == 351 INP_SOCKAF(t->inp_socket)) 352 #endif /* defined(INET6) */ 353 return (EADDRINUSE); 354 } 355 } 356 if (prison && prison_ip(cred, 0, &sin->sin_addr.s_addr)) 357 return (EADDRNOTAVAIL); 358 t = in_pcblookup_local(pcbinfo, sin->sin_addr, 359 lport, prison ? 0 : wild); 360 if (t && (t->inp_vflag & INP_TIMEWAIT)) { 361 if ((reuseport & intotw(t)->tw_so_options) == 0) 362 return (EADDRINUSE); 363 } else 364 if (t && 365 (reuseport & t->inp_socket->so_options) == 0) { 366 #if defined(INET6) 367 if (ntohl(sin->sin_addr.s_addr) != 368 INADDR_ANY || 369 ntohl(t->inp_laddr.s_addr) != 370 INADDR_ANY || 371 INP_SOCKAF(so) == 372 INP_SOCKAF(t->inp_socket)) 373 #endif /* defined(INET6) */ 374 return (EADDRINUSE); 375 } 376 } 377 } 378 if (*lportp != 0) 379 lport = *lportp; 380 if (lport == 0) { 381 u_short first, last; 382 int count; 383 384 if (laddr.s_addr != INADDR_ANY) 385 if (prison_ip(cred, 0, &laddr.s_addr)) 386 return (EINVAL); 387 388 if (inp->inp_flags & INP_HIGHPORT) { 389 first = ipport_hifirstauto; /* sysctl */ 390 last = ipport_hilastauto; 391 lastport = &pcbinfo->lasthi; 392 } else if (inp->inp_flags & INP_LOWPORT) { 393 if ((error = suser_cred(cred, PRISON_ROOT)) != 0) 394 return error; 395 first = ipport_lowfirstauto; /* 1023 */ 396 last = ipport_lowlastauto; /* 600 */ 397 lastport = &pcbinfo->lastlow; 398 } else { 399 first = ipport_firstauto; /* sysctl */ 400 last = ipport_lastauto; 401 lastport = &pcbinfo->lastport; 402 } 403 /* 404 * Simple check to ensure all ports are not used up causing 405 * a deadlock here. 406 * 407 * We split the two cases (up and down) so that the direction 408 * is not being tested on each round of the loop. 409 */ 410 if (first > last) { 411 /* 412 * counting down 413 */ 414 if (ipport_randomized) 415 *lastport = first - 416 (arc4random() % (first - last)); 417 count = first - last; 418 419 do { 420 if (count-- < 0) /* completely used? */ 421 return (EADDRNOTAVAIL); 422 --*lastport; 423 if (*lastport > first || *lastport < last) 424 *lastport = first; 425 lport = htons(*lastport); 426 } while (in_pcblookup_local(pcbinfo, laddr, lport, 427 wild)); 428 } else { 429 /* 430 * counting up 431 */ 432 if (ipport_randomized) 433 *lastport = first + 434 (arc4random() % (last - first)); 435 count = last - first; 436 437 do { 438 if (count-- < 0) /* completely used? */ 439 return (EADDRNOTAVAIL); 440 ++*lastport; 441 if (*lastport < first || *lastport > last) 442 *lastport = first; 443 lport = htons(*lastport); 444 } while (in_pcblookup_local(pcbinfo, laddr, lport, 445 wild)); 446 } 447 } 448 if (prison_ip(cred, 0, &laddr.s_addr)) 449 return (EINVAL); 450 *laddrp = laddr.s_addr; 451 *lportp = lport; 452 return (0); 453 } 454 455 /* 456 * Connect from a socket to a specified address. 457 * Both address and port must be specified in argument sin. 458 * If don't have a local address for this socket yet, 459 * then pick one. 460 */ 461 int 462 in_pcbconnect(inp, nam, cred) 463 register struct inpcb *inp; 464 struct sockaddr *nam; 465 struct ucred *cred; 466 { 467 u_short lport, fport; 468 in_addr_t laddr, faddr; 469 int anonport, error; 470 471 lport = inp->inp_lport; 472 laddr = inp->inp_laddr.s_addr; 473 anonport = (lport == 0); 474 error = in_pcbconnect_setup(inp, nam, &laddr, &lport, &faddr, &fport, 475 NULL, cred); 476 if (error) 477 return (error); 478 479 /* Do the initial binding of the local address if required. */ 480 if (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0) { 481 inp->inp_lport = lport; 482 inp->inp_laddr.s_addr = laddr; 483 if (in_pcbinshash(inp) != 0) { 484 inp->inp_laddr.s_addr = INADDR_ANY; 485 inp->inp_lport = 0; 486 return (EAGAIN); 487 } 488 } 489 490 /* Commit the remaining changes. */ 491 inp->inp_lport = lport; 492 inp->inp_laddr.s_addr = laddr; 493 inp->inp_faddr.s_addr = faddr; 494 inp->inp_fport = fport; 495 in_pcbrehash(inp); 496 #ifdef IPSEC 497 if (inp->inp_socket->so_type == SOCK_STREAM) 498 ipsec_pcbconn(inp->inp_sp); 499 #endif 500 if (anonport) 501 inp->inp_flags |= INP_ANONPORT; 502 return (0); 503 } 504 505 /* 506 * Set up for a connect from a socket to the specified address. 507 * On entry, *laddrp and *lportp should contain the current local 508 * address and port for the PCB; these are updated to the values 509 * that should be placed in inp_laddr and inp_lport to complete 510 * the connect. 511 * 512 * On success, *faddrp and *fportp will be set to the remote address 513 * and port. These are not updated in the error case. 514 * 515 * If the operation fails because the connection already exists, 516 * *oinpp will be set to the PCB of that connection so that the 517 * caller can decide to override it. In all other cases, *oinpp 518 * is set to NULL. 519 */ 520 int 521 in_pcbconnect_setup(inp, nam, laddrp, lportp, faddrp, fportp, oinpp, cred) 522 register struct inpcb *inp; 523 struct sockaddr *nam; 524 in_addr_t *laddrp; 525 u_short *lportp; 526 in_addr_t *faddrp; 527 u_short *fportp; 528 struct inpcb **oinpp; 529 struct ucred *cred; 530 { 531 struct sockaddr_in *sin = (struct sockaddr_in *)nam; 532 struct in_ifaddr *ia; 533 struct sockaddr_in sa; 534 struct ucred *socred; 535 struct inpcb *oinp; 536 struct in_addr laddr, faddr; 537 u_short lport, fport; 538 int error; 539 540 if (oinpp != NULL) 541 *oinpp = NULL; 542 if (nam->sa_len != sizeof (*sin)) 543 return (EINVAL); 544 if (sin->sin_family != AF_INET) 545 return (EAFNOSUPPORT); 546 if (sin->sin_port == 0) 547 return (EADDRNOTAVAIL); 548 laddr.s_addr = *laddrp; 549 lport = *lportp; 550 faddr = sin->sin_addr; 551 fport = sin->sin_port; 552 socred = inp->inp_socket->so_cred; 553 if (laddr.s_addr == INADDR_ANY && jailed(socred)) { 554 bzero(&sa, sizeof(sa)); 555 sa.sin_addr.s_addr = htonl(prison_getip(socred)); 556 sa.sin_len = sizeof(sa); 557 sa.sin_family = AF_INET; 558 error = in_pcbbind_setup(inp, (struct sockaddr *)&sa, 559 &laddr.s_addr, &lport, cred); 560 if (error) 561 return (error); 562 } 563 if (!TAILQ_EMPTY(&in_ifaddrhead)) { 564 /* 565 * If the destination address is INADDR_ANY, 566 * use the primary local address. 567 * If the supplied address is INADDR_BROADCAST, 568 * and the primary interface supports broadcast, 569 * choose the broadcast address for that interface. 570 */ 571 if (faddr.s_addr == INADDR_ANY) 572 faddr = IA_SIN(TAILQ_FIRST(&in_ifaddrhead))->sin_addr; 573 else if (faddr.s_addr == (u_long)INADDR_BROADCAST && 574 (TAILQ_FIRST(&in_ifaddrhead)->ia_ifp->if_flags & 575 IFF_BROADCAST)) 576 faddr = satosin(&TAILQ_FIRST( 577 &in_ifaddrhead)->ia_broadaddr)->sin_addr; 578 } 579 if (laddr.s_addr == INADDR_ANY) { 580 struct route sro; 581 582 bzero(&sro, sizeof(sro)); 583 ia = (struct in_ifaddr *)0; 584 /* 585 * If route is known our src addr is taken from the i/f, 586 * else punt. 587 */ 588 if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0) { 589 /* Find out route to destination */ 590 sro.ro_dst.sa_family = AF_INET; 591 sro.ro_dst.sa_len = sizeof(struct sockaddr_in); 592 ((struct sockaddr_in *)&sro.ro_dst)->sin_addr = faddr; 593 rtalloc_ign(&sro, RTF_CLONING); 594 } 595 /* 596 * If we found a route, use the address 597 * corresponding to the outgoing interface 598 * unless it is the loopback (in case a route 599 * to our address on another net goes to loopback). 600 */ 601 if (sro.ro_rt && !(sro.ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) 602 ia = ifatoia(sro.ro_rt->rt_ifa); 603 if (sro.ro_rt) 604 RTFREE(sro.ro_rt); 605 if (ia == 0) { 606 bzero(&sa, sizeof(sa)); 607 sa.sin_addr = faddr; 608 sa.sin_len = sizeof(sa); 609 sa.sin_family = AF_INET; 610 611 ia = ifatoia(ifa_ifwithdstaddr(sintosa(&sa))); 612 if (ia == 0) 613 ia = ifatoia(ifa_ifwithnet(sintosa(&sa))); 614 if (ia == 0) 615 ia = TAILQ_FIRST(&in_ifaddrhead); 616 if (ia == 0) 617 return (EADDRNOTAVAIL); 618 } 619 /* 620 * If the destination address is multicast and an outgoing 621 * interface has been set as a multicast option, use the 622 * address of that interface as our source address. 623 */ 624 if (IN_MULTICAST(ntohl(faddr.s_addr)) && 625 inp->inp_moptions != NULL) { 626 struct ip_moptions *imo; 627 struct ifnet *ifp; 628 629 imo = inp->inp_moptions; 630 if (imo->imo_multicast_ifp != NULL) { 631 ifp = imo->imo_multicast_ifp; 632 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) 633 if (ia->ia_ifp == ifp) 634 break; 635 if (ia == 0) 636 return (EADDRNOTAVAIL); 637 } 638 } 639 laddr = ia->ia_addr.sin_addr; 640 } 641 642 oinp = in_pcblookup_hash(inp->inp_pcbinfo, faddr, fport, laddr, lport, 643 0, NULL); 644 if (oinp != NULL) { 645 if (oinpp != NULL) 646 *oinpp = oinp; 647 return (EADDRINUSE); 648 } 649 if (lport == 0) { 650 error = in_pcbbind_setup(inp, NULL, &laddr.s_addr, &lport, 651 cred); 652 if (error) 653 return (error); 654 } 655 *laddrp = laddr.s_addr; 656 *lportp = lport; 657 *faddrp = faddr.s_addr; 658 *fportp = fport; 659 return (0); 660 } 661 662 void 663 in_pcbdisconnect(inp) 664 struct inpcb *inp; 665 { 666 INP_LOCK_ASSERT(inp); 667 668 inp->inp_faddr.s_addr = INADDR_ANY; 669 inp->inp_fport = 0; 670 in_pcbrehash(inp); 671 #ifdef IPSEC 672 ipsec_pcbdisconn(inp->inp_sp); 673 #endif 674 if (inp->inp_socket->so_state & SS_NOFDREF) 675 in_pcbdetach(inp); 676 } 677 678 void 679 in_pcbdetach(inp) 680 struct inpcb *inp; 681 { 682 struct socket *so = inp->inp_socket; 683 struct inpcbinfo *ipi = inp->inp_pcbinfo; 684 685 INP_LOCK_ASSERT(inp); 686 687 #if defined(IPSEC) || defined(FAST_IPSEC) 688 ipsec4_delete_pcbpolicy(inp); 689 #endif /*IPSEC*/ 690 inp->inp_gencnt = ++ipi->ipi_gencnt; 691 in_pcbremlists(inp); 692 if (so) { 693 so->so_pcb = 0; 694 sotryfree(so); 695 } 696 if (inp->inp_options) 697 (void)m_free(inp->inp_options); 698 ip_freemoptions(inp->inp_moptions); 699 inp->inp_vflag = 0; 700 INP_LOCK_DESTROY(inp); 701 #ifdef MAC 702 mac_destroy_inpcb(inp); 703 #endif 704 uma_zfree(ipi->ipi_zone, inp); 705 } 706 707 struct sockaddr * 708 in_sockaddr(port, addr_p) 709 in_port_t port; 710 struct in_addr *addr_p; 711 { 712 struct sockaddr_in *sin; 713 714 MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, 715 M_WAITOK | M_ZERO); 716 sin->sin_family = AF_INET; 717 sin->sin_len = sizeof(*sin); 718 sin->sin_addr = *addr_p; 719 sin->sin_port = port; 720 721 return (struct sockaddr *)sin; 722 } 723 724 /* 725 * The wrapper function will pass down the pcbinfo for this function to lock. 726 * The socket must have a valid 727 * (i.e., non-nil) PCB, but it should be impossible to get an invalid one 728 * except through a kernel programming error, so it is acceptable to panic 729 * (or in this case trap) if the PCB is invalid. (Actually, we don't trap 730 * because there actually /is/ a programming error somewhere... XXX) 731 */ 732 int 733 in_setsockaddr(so, nam, pcbinfo) 734 struct socket *so; 735 struct sockaddr **nam; 736 struct inpcbinfo *pcbinfo; 737 { 738 int s; 739 register struct inpcb *inp; 740 struct in_addr addr; 741 in_port_t port; 742 743 s = splnet(); 744 INP_INFO_RLOCK(pcbinfo); 745 inp = sotoinpcb(so); 746 if (!inp) { 747 INP_INFO_RUNLOCK(pcbinfo); 748 splx(s); 749 return ECONNRESET; 750 } 751 INP_LOCK(inp); 752 port = inp->inp_lport; 753 addr = inp->inp_laddr; 754 INP_UNLOCK(inp); 755 INP_INFO_RUNLOCK(pcbinfo); 756 splx(s); 757 758 *nam = in_sockaddr(port, &addr); 759 return 0; 760 } 761 762 /* 763 * The wrapper function will pass down the pcbinfo for this function to lock. 764 */ 765 int 766 in_setpeeraddr(so, nam, pcbinfo) 767 struct socket *so; 768 struct sockaddr **nam; 769 struct inpcbinfo *pcbinfo; 770 { 771 int s; 772 register struct inpcb *inp; 773 struct in_addr addr; 774 in_port_t port; 775 776 s = splnet(); 777 INP_INFO_RLOCK(pcbinfo); 778 inp = sotoinpcb(so); 779 if (!inp) { 780 INP_INFO_RUNLOCK(pcbinfo); 781 splx(s); 782 return ECONNRESET; 783 } 784 INP_LOCK(inp); 785 port = inp->inp_fport; 786 addr = inp->inp_faddr; 787 INP_UNLOCK(inp); 788 INP_INFO_RUNLOCK(pcbinfo); 789 splx(s); 790 791 *nam = in_sockaddr(port, &addr); 792 return 0; 793 } 794 795 void 796 in_pcbnotifyall(pcbinfo, faddr, errno, notify) 797 struct inpcbinfo *pcbinfo; 798 struct in_addr faddr; 799 int errno; 800 struct inpcb *(*notify)(struct inpcb *, int); 801 { 802 struct inpcb *inp, *ninp; 803 struct inpcbhead *head; 804 int s; 805 806 s = splnet(); 807 INP_INFO_WLOCK(pcbinfo); 808 head = pcbinfo->listhead; 809 for (inp = LIST_FIRST(head); inp != NULL; inp = ninp) { 810 INP_LOCK(inp); 811 ninp = LIST_NEXT(inp, inp_list); 812 #ifdef INET6 813 if ((inp->inp_vflag & INP_IPV4) == 0) { 814 INP_UNLOCK(inp); 815 continue; 816 } 817 #endif 818 if (inp->inp_faddr.s_addr != faddr.s_addr || 819 inp->inp_socket == NULL) { 820 INP_UNLOCK(inp); 821 continue; 822 } 823 if ((*notify)(inp, errno)) 824 INP_UNLOCK(inp); 825 } 826 INP_INFO_WUNLOCK(pcbinfo); 827 splx(s); 828 } 829 830 void 831 in_pcbpurgeif0(pcbinfo, ifp) 832 struct inpcbinfo *pcbinfo; 833 struct ifnet *ifp; 834 { 835 struct inpcb *inp; 836 struct ip_moptions *imo; 837 int i, gap; 838 839 /* why no splnet here? XXX */ 840 INP_INFO_RLOCK(pcbinfo); 841 LIST_FOREACH(inp, pcbinfo->listhead, inp_list) { 842 INP_LOCK(inp); 843 imo = inp->inp_moptions; 844 if ((inp->inp_vflag & INP_IPV4) && 845 imo != NULL) { 846 /* 847 * Unselect the outgoing interface if it is being 848 * detached. 849 */ 850 if (imo->imo_multicast_ifp == ifp) 851 imo->imo_multicast_ifp = NULL; 852 853 /* 854 * Drop multicast group membership if we joined 855 * through the interface being detached. 856 */ 857 for (i = 0, gap = 0; i < imo->imo_num_memberships; 858 i++) { 859 if (imo->imo_membership[i]->inm_ifp == ifp) { 860 in_delmulti(imo->imo_membership[i]); 861 gap++; 862 } else if (gap != 0) 863 imo->imo_membership[i - gap] = 864 imo->imo_membership[i]; 865 } 866 imo->imo_num_memberships -= gap; 867 } 868 INP_UNLOCK(inp); 869 } 870 INP_INFO_RUNLOCK(pcbinfo); 871 } 872 873 /* 874 * Lookup a PCB based on the local address and port. 875 */ 876 struct inpcb * 877 in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay) 878 struct inpcbinfo *pcbinfo; 879 struct in_addr laddr; 880 u_int lport_arg; 881 int wild_okay; 882 { 883 register struct inpcb *inp; 884 int matchwild = 3, wildcard; 885 u_short lport = lport_arg; 886 887 INP_INFO_WLOCK_ASSERT(pcbinfo); 888 889 if (!wild_okay) { 890 struct inpcbhead *head; 891 /* 892 * Look for an unconnected (wildcard foreign addr) PCB that 893 * matches the local address and port we're looking for. 894 */ 895 head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)]; 896 LIST_FOREACH(inp, head, inp_hash) { 897 #ifdef INET6 898 if ((inp->inp_vflag & INP_IPV4) == 0) 899 continue; 900 #endif 901 if (inp->inp_faddr.s_addr == INADDR_ANY && 902 inp->inp_laddr.s_addr == laddr.s_addr && 903 inp->inp_lport == lport) { 904 /* 905 * Found. 906 */ 907 return (inp); 908 } 909 } 910 /* 911 * Not found. 912 */ 913 return (NULL); 914 } else { 915 struct inpcbporthead *porthash; 916 struct inpcbport *phd; 917 struct inpcb *match = NULL; 918 /* 919 * Best fit PCB lookup. 920 * 921 * First see if this local port is in use by looking on the 922 * port hash list. 923 */ 924 retrylookup: 925 porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport, 926 pcbinfo->porthashmask)]; 927 LIST_FOREACH(phd, porthash, phd_hash) { 928 if (phd->phd_port == lport) 929 break; 930 } 931 if (phd != NULL) { 932 /* 933 * Port is in use by one or more PCBs. Look for best 934 * fit. 935 */ 936 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { 937 wildcard = 0; 938 #ifdef INET6 939 if ((inp->inp_vflag & INP_IPV4) == 0) 940 continue; 941 #endif 942 /* 943 * Clean out old time_wait sockets if they 944 * are clogging up needed local ports. 945 */ 946 if ((inp->inp_vflag & INP_TIMEWAIT) != 0) { 947 if (tcp_twrecycleable((struct tcptw *)inp->inp_ppcb)) { 948 INP_LOCK(inp); 949 tcp_twclose((struct tcptw *)inp->inp_ppcb, 0); 950 match = NULL; 951 goto retrylookup; 952 } 953 } 954 if (inp->inp_faddr.s_addr != INADDR_ANY) 955 wildcard++; 956 if (inp->inp_laddr.s_addr != INADDR_ANY) { 957 if (laddr.s_addr == INADDR_ANY) 958 wildcard++; 959 else if (inp->inp_laddr.s_addr != laddr.s_addr) 960 continue; 961 } else { 962 if (laddr.s_addr != INADDR_ANY) 963 wildcard++; 964 } 965 if (wildcard < matchwild) { 966 match = inp; 967 matchwild = wildcard; 968 if (matchwild == 0) { 969 break; 970 } 971 } 972 } 973 } 974 return (match); 975 } 976 } 977 978 /* 979 * Lookup PCB in hash list. 980 */ 981 struct inpcb * 982 in_pcblookup_hash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard, 983 ifp) 984 struct inpcbinfo *pcbinfo; 985 struct in_addr faddr, laddr; 986 u_int fport_arg, lport_arg; 987 int wildcard; 988 struct ifnet *ifp; 989 { 990 struct inpcbhead *head; 991 register struct inpcb *inp; 992 u_short fport = fport_arg, lport = lport_arg; 993 994 INP_INFO_RLOCK_ASSERT(pcbinfo); 995 /* 996 * First look for an exact match. 997 */ 998 head = &pcbinfo->hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, pcbinfo->hashmask)]; 999 LIST_FOREACH(inp, head, inp_hash) { 1000 #ifdef INET6 1001 if ((inp->inp_vflag & INP_IPV4) == 0) 1002 continue; 1003 #endif 1004 if (inp->inp_faddr.s_addr == faddr.s_addr && 1005 inp->inp_laddr.s_addr == laddr.s_addr && 1006 inp->inp_fport == fport && 1007 inp->inp_lport == lport) { 1008 /* 1009 * Found. 1010 */ 1011 return (inp); 1012 } 1013 } 1014 if (wildcard) { 1015 struct inpcb *local_wild = NULL; 1016 #if defined(INET6) 1017 struct inpcb *local_wild_mapped = NULL; 1018 #endif /* defined(INET6) */ 1019 1020 head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)]; 1021 LIST_FOREACH(inp, head, inp_hash) { 1022 #ifdef INET6 1023 if ((inp->inp_vflag & INP_IPV4) == 0) 1024 continue; 1025 #endif 1026 if (inp->inp_faddr.s_addr == INADDR_ANY && 1027 inp->inp_lport == lport) { 1028 if (ifp && ifp->if_type == IFT_FAITH && 1029 (inp->inp_flags & INP_FAITH) == 0) 1030 continue; 1031 if (inp->inp_laddr.s_addr == laddr.s_addr) 1032 return (inp); 1033 else if (inp->inp_laddr.s_addr == INADDR_ANY) { 1034 #if defined(INET6) 1035 if (INP_CHECK_SOCKAF(inp->inp_socket, 1036 AF_INET6)) 1037 local_wild_mapped = inp; 1038 else 1039 #endif /* defined(INET6) */ 1040 local_wild = inp; 1041 } 1042 } 1043 } 1044 #if defined(INET6) 1045 if (local_wild == NULL) 1046 return (local_wild_mapped); 1047 #endif /* defined(INET6) */ 1048 return (local_wild); 1049 } 1050 1051 /* 1052 * Not found. 1053 */ 1054 return (NULL); 1055 } 1056 1057 /* 1058 * Insert PCB onto various hash lists. 1059 */ 1060 int 1061 in_pcbinshash(inp) 1062 struct inpcb *inp; 1063 { 1064 struct inpcbhead *pcbhash; 1065 struct inpcbporthead *pcbporthash; 1066 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 1067 struct inpcbport *phd; 1068 u_int32_t hashkey_faddr; 1069 1070 INP_INFO_WLOCK_ASSERT(pcbinfo); 1071 #ifdef INET6 1072 if (inp->inp_vflag & INP_IPV6) 1073 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */; 1074 else 1075 #endif /* INET6 */ 1076 hashkey_faddr = inp->inp_faddr.s_addr; 1077 1078 pcbhash = &pcbinfo->hashbase[INP_PCBHASH(hashkey_faddr, 1079 inp->inp_lport, inp->inp_fport, pcbinfo->hashmask)]; 1080 1081 pcbporthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(inp->inp_lport, 1082 pcbinfo->porthashmask)]; 1083 1084 /* 1085 * Go through port list and look for a head for this lport. 1086 */ 1087 LIST_FOREACH(phd, pcbporthash, phd_hash) { 1088 if (phd->phd_port == inp->inp_lport) 1089 break; 1090 } 1091 /* 1092 * If none exists, malloc one and tack it on. 1093 */ 1094 if (phd == NULL) { 1095 MALLOC(phd, struct inpcbport *, sizeof(struct inpcbport), M_PCB, M_NOWAIT); 1096 if (phd == NULL) { 1097 return (ENOBUFS); /* XXX */ 1098 } 1099 phd->phd_port = inp->inp_lport; 1100 LIST_INIT(&phd->phd_pcblist); 1101 LIST_INSERT_HEAD(pcbporthash, phd, phd_hash); 1102 } 1103 inp->inp_phd = phd; 1104 LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist); 1105 LIST_INSERT_HEAD(pcbhash, inp, inp_hash); 1106 return (0); 1107 } 1108 1109 /* 1110 * Move PCB to the proper hash bucket when { faddr, fport } have been 1111 * changed. NOTE: This does not handle the case of the lport changing (the 1112 * hashed port list would have to be updated as well), so the lport must 1113 * not change after in_pcbinshash() has been called. 1114 */ 1115 void 1116 in_pcbrehash(inp) 1117 struct inpcb *inp; 1118 { 1119 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 1120 struct inpcbhead *head; 1121 u_int32_t hashkey_faddr; 1122 1123 INP_INFO_WLOCK_ASSERT(pcbinfo); 1124 /* XXX? INP_LOCK_ASSERT(inp); */ 1125 #ifdef INET6 1126 if (inp->inp_vflag & INP_IPV6) 1127 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */; 1128 else 1129 #endif /* INET6 */ 1130 hashkey_faddr = inp->inp_faddr.s_addr; 1131 1132 head = &pcbinfo->hashbase[INP_PCBHASH(hashkey_faddr, 1133 inp->inp_lport, inp->inp_fport, pcbinfo->hashmask)]; 1134 1135 LIST_REMOVE(inp, inp_hash); 1136 LIST_INSERT_HEAD(head, inp, inp_hash); 1137 } 1138 1139 /* 1140 * Remove PCB from various lists. 1141 */ 1142 void 1143 in_pcbremlists(inp) 1144 struct inpcb *inp; 1145 { 1146 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 1147 1148 INP_INFO_WLOCK_ASSERT(pcbinfo); 1149 INP_LOCK_ASSERT(inp); 1150 1151 inp->inp_gencnt = ++pcbinfo->ipi_gencnt; 1152 if (inp->inp_lport) { 1153 struct inpcbport *phd = inp->inp_phd; 1154 1155 LIST_REMOVE(inp, inp_hash); 1156 LIST_REMOVE(inp, inp_portlist); 1157 if (LIST_FIRST(&phd->phd_pcblist) == NULL) { 1158 LIST_REMOVE(phd, phd_hash); 1159 free(phd, M_PCB); 1160 } 1161 } 1162 LIST_REMOVE(inp, inp_list); 1163 pcbinfo->ipi_count--; 1164 } 1165 1166 /* 1167 * A set label operation has occurred at the socket layer, propagate the 1168 * label change into the in_pcb for the socket. 1169 */ 1170 void 1171 in_pcbsosetlabel(so) 1172 struct socket *so; 1173 { 1174 #ifdef MAC 1175 struct inpcb *inp; 1176 1177 /* XXX: Will assert socket lock when we have them. */ 1178 inp = (struct inpcb *)so->so_pcb; 1179 INP_LOCK(inp); 1180 mac_inpcb_sosetlabel(so, inp); 1181 INP_UNLOCK(inp); 1182 #endif 1183 } 1184