1 /* 2 * Copyright (c) 1982, 1986, 1991, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95 34 * $FreeBSD$ 35 */ 36 37 #include "opt_ipsec.h" 38 #include "opt_inet6.h" 39 #include "opt_mac.h" 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/limits.h> 44 #include <sys/mac.h> 45 #include <sys/malloc.h> 46 #include <sys/mbuf.h> 47 #include <sys/domain.h> 48 #include <sys/protosw.h> 49 #include <sys/socket.h> 50 #include <sys/socketvar.h> 51 #include <sys/proc.h> 52 #include <sys/jail.h> 53 #include <sys/kernel.h> 54 #include <sys/sysctl.h> 55 56 #include <vm/uma.h> 57 58 #include <net/if.h> 59 #include <net/if_types.h> 60 #include <net/route.h> 61 62 #include <netinet/in.h> 63 #include <netinet/in_pcb.h> 64 #include <netinet/in_var.h> 65 #include <netinet/ip_var.h> 66 #include <netinet/tcp_var.h> 67 #ifdef INET6 68 #include <netinet/ip6.h> 69 #include <netinet6/ip6_var.h> 70 #endif /* INET6 */ 71 72 #ifdef IPSEC 73 #include <netinet6/ipsec.h> 74 #include <netkey/key.h> 75 #endif /* IPSEC */ 76 77 #ifdef FAST_IPSEC 78 #if defined(IPSEC) || defined(IPSEC_ESP) 79 #error "Bad idea: don't compile with both IPSEC and FAST_IPSEC!" 80 #endif 81 82 #include <netipsec/ipsec.h> 83 #include <netipsec/key.h> 84 #endif /* FAST_IPSEC */ 85 86 struct in_addr zeroin_addr; 87 88 /* 89 * These configure the range of local port addresses assigned to 90 * "unspecified" outgoing connections/packets/whatever. 91 */ 92 int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */ 93 int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */ 94 int ipport_firstauto = IPPORT_HIFIRSTAUTO; /* 49152 */ 95 int ipport_lastauto = IPPORT_HILASTAUTO; /* 65535 */ 96 int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */ 97 int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */ 98 99 /* 100 * Reserved ports accessible only to root. There are significant 101 * security considerations that must be accounted for when changing these, 102 * but the security benefits can be great. Please be careful. 103 */ 104 int ipport_reservedhigh = IPPORT_RESERVED - 1; /* 1023 */ 105 int ipport_reservedlow = 0; 106 107 #define RANGECHK(var, min, max) \ 108 if ((var) < (min)) { (var) = (min); } \ 109 else if ((var) > (max)) { (var) = (max); } 110 111 static int 112 sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS) 113 { 114 int error = sysctl_handle_int(oidp, 115 oidp->oid_arg1, oidp->oid_arg2, req); 116 if (!error) { 117 RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1); 118 RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1); 119 RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX); 120 RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX); 121 RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX); 122 RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX); 123 } 124 return error; 125 } 126 127 #undef RANGECHK 128 129 SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0, "IP Ports"); 130 131 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, CTLTYPE_INT|CTLFLAG_RW, 132 &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", ""); 133 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, CTLTYPE_INT|CTLFLAG_RW, 134 &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", ""); 135 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, CTLTYPE_INT|CTLFLAG_RW, 136 &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", ""); 137 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, CTLTYPE_INT|CTLFLAG_RW, 138 &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", ""); 139 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLTYPE_INT|CTLFLAG_RW, 140 &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", ""); 141 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW, 142 &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", ""); 143 SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, reservedhigh, 144 CTLFLAG_RW|CTLFLAG_SECURE, &ipport_reservedhigh, 0, ""); 145 SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, reservedlow, 146 CTLFLAG_RW|CTLFLAG_SECURE, &ipport_reservedlow, 0, ""); 147 148 /* 149 * in_pcb.c: manage the Protocol Control Blocks. 150 * 151 * NOTE: It is assumed that most of these functions will be called at 152 * splnet(). XXX - There are, unfortunately, a few exceptions to this 153 * rule that should be fixed. 154 */ 155 156 /* 157 * Allocate a PCB and associate it with the socket. 158 */ 159 int 160 in_pcballoc(so, pcbinfo, td) 161 struct socket *so; 162 struct inpcbinfo *pcbinfo; 163 struct thread *td; 164 { 165 register struct inpcb *inp; 166 int error; 167 168 INP_INFO_WLOCK_ASSERT(pcbinfo); 169 error = 0; 170 inp = uma_zalloc(pcbinfo->ipi_zone, M_NOWAIT | M_ZERO); 171 if (inp == NULL) 172 return (ENOBUFS); 173 inp->inp_gencnt = ++pcbinfo->ipi_gencnt; 174 inp->inp_pcbinfo = pcbinfo; 175 inp->inp_socket = so; 176 #ifdef MAC 177 error = mac_init_inpcb(inp, M_NOWAIT); 178 if (error != 0) 179 goto out; 180 mac_create_inpcb_from_socket(so, inp); 181 #endif 182 #if defined(IPSEC) || defined(FAST_IPSEC) 183 #ifdef FAST_IPSEC 184 error = ipsec_init_policy(so, &inp->inp_sp); 185 #else 186 error = ipsec_init_pcbpolicy(so, &inp->inp_sp); 187 #endif 188 if (error != 0) 189 goto out; 190 #endif /*IPSEC*/ 191 #if defined(INET6) 192 if (INP_SOCKAF(so) == AF_INET6) { 193 inp->inp_vflag |= INP_IPV6PROTO; 194 if (ip6_v6only) 195 inp->inp_flags |= IN6P_IPV6_V6ONLY; 196 } 197 #endif 198 LIST_INSERT_HEAD(pcbinfo->listhead, inp, inp_list); 199 pcbinfo->ipi_count++; 200 so->so_pcb = (caddr_t)inp; 201 INP_LOCK_INIT(inp, "inp"); 202 #ifdef INET6 203 if (ip6_auto_flowlabel) 204 inp->inp_flags |= IN6P_AUTOFLOWLABEL; 205 #endif 206 #if defined(IPSEC) || defined(FAST_IPSEC) || defined(MAC) 207 out: 208 if (error != 0) 209 uma_zfree(pcbinfo->ipi_zone, inp); 210 #endif 211 return (error); 212 } 213 214 int 215 in_pcbbind(inp, nam, td) 216 register struct inpcb *inp; 217 struct sockaddr *nam; 218 struct thread *td; 219 { 220 int anonport, error; 221 222 INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo); 223 INP_LOCK_ASSERT(inp); 224 225 if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) 226 return (EINVAL); 227 anonport = inp->inp_lport == 0 && (nam == NULL || 228 ((struct sockaddr_in *)nam)->sin_port == 0); 229 error = in_pcbbind_setup(inp, nam, &inp->inp_laddr.s_addr, 230 &inp->inp_lport, td); 231 if (error) 232 return (error); 233 if (in_pcbinshash(inp) != 0) { 234 inp->inp_laddr.s_addr = INADDR_ANY; 235 inp->inp_lport = 0; 236 return (EAGAIN); 237 } 238 if (anonport) 239 inp->inp_flags |= INP_ANONPORT; 240 return (0); 241 } 242 243 /* 244 * Set up a bind operation on a PCB, performing port allocation 245 * as required, but do not actually modify the PCB. Callers can 246 * either complete the bind by setting inp_laddr/inp_lport and 247 * calling in_pcbinshash(), or they can just use the resulting 248 * port and address to authorise the sending of a once-off packet. 249 * 250 * On error, the values of *laddrp and *lportp are not changed. 251 */ 252 int 253 in_pcbbind_setup(inp, nam, laddrp, lportp, td) 254 struct inpcb *inp; 255 struct sockaddr *nam; 256 in_addr_t *laddrp; 257 u_short *lportp; 258 struct thread *td; 259 { 260 struct socket *so = inp->inp_socket; 261 unsigned short *lastport; 262 struct sockaddr_in *sin; 263 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 264 struct in_addr laddr; 265 u_short lport = 0; 266 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); 267 int error, prison = 0; 268 269 INP_INFO_WLOCK_ASSERT(pcbinfo); 270 INP_LOCK_ASSERT(inp); 271 272 if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */ 273 return (EADDRNOTAVAIL); 274 laddr.s_addr = *laddrp; 275 if (nam != NULL && laddr.s_addr != INADDR_ANY) 276 return (EINVAL); 277 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) 278 wild = 1; 279 if (nam) { 280 sin = (struct sockaddr_in *)nam; 281 if (nam->sa_len != sizeof (*sin)) 282 return (EINVAL); 283 #ifdef notdef 284 /* 285 * We should check the family, but old programs 286 * incorrectly fail to initialize it. 287 */ 288 if (sin->sin_family != AF_INET) 289 return (EAFNOSUPPORT); 290 #endif 291 if (sin->sin_addr.s_addr != INADDR_ANY) 292 if (prison_ip(td->td_ucred, 0, &sin->sin_addr.s_addr)) 293 return(EINVAL); 294 if (sin->sin_port != *lportp) { 295 /* Don't allow the port to change. */ 296 if (*lportp != 0) 297 return (EINVAL); 298 lport = sin->sin_port; 299 } 300 /* NB: lport is left as 0 if the port isn't being changed. */ 301 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { 302 /* 303 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; 304 * allow complete duplication of binding if 305 * SO_REUSEPORT is set, or if SO_REUSEADDR is set 306 * and a multicast address is bound on both 307 * new and duplicated sockets. 308 */ 309 if (so->so_options & SO_REUSEADDR) 310 reuseport = SO_REUSEADDR|SO_REUSEPORT; 311 } else if (sin->sin_addr.s_addr != INADDR_ANY) { 312 sin->sin_port = 0; /* yech... */ 313 bzero(&sin->sin_zero, sizeof(sin->sin_zero)); 314 if (ifa_ifwithaddr((struct sockaddr *)sin) == 0) 315 return (EADDRNOTAVAIL); 316 } 317 laddr = sin->sin_addr; 318 if (lport) { 319 struct inpcb *t; 320 /* GROSS */ 321 if (ntohs(lport) <= ipport_reservedhigh && 322 ntohs(lport) >= ipport_reservedlow && 323 td && suser_cred(td->td_ucred, PRISON_ROOT)) 324 return (EACCES); 325 if (td && jailed(td->td_ucred)) 326 prison = 1; 327 if (so->so_cred->cr_uid != 0 && 328 !IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { 329 t = in_pcblookup_local(inp->inp_pcbinfo, 330 sin->sin_addr, lport, 331 prison ? 0 : INPLOOKUP_WILDCARD); 332 /* 333 * XXX 334 * This entire block sorely needs a rewrite. 335 */ 336 if (t && (t->inp_vflag & INP_TIMEWAIT)) { 337 if ((ntohl(sin->sin_addr.s_addr) != INADDR_ANY || 338 ntohl(t->inp_laddr.s_addr) != INADDR_ANY || 339 (intotw(t)->tw_so_options & SO_REUSEPORT) == 0) && 340 (so->so_cred->cr_uid != intotw(t)->tw_cred->cr_uid)) 341 return (EADDRINUSE); 342 } else 343 if (t && 344 (ntohl(sin->sin_addr.s_addr) != INADDR_ANY || 345 ntohl(t->inp_laddr.s_addr) != INADDR_ANY || 346 (t->inp_socket->so_options & 347 SO_REUSEPORT) == 0) && 348 (so->so_cred->cr_uid != 349 t->inp_socket->so_cred->cr_uid)) { 350 #if defined(INET6) 351 if (ntohl(sin->sin_addr.s_addr) != 352 INADDR_ANY || 353 ntohl(t->inp_laddr.s_addr) != 354 INADDR_ANY || 355 INP_SOCKAF(so) == 356 INP_SOCKAF(t->inp_socket)) 357 #endif /* defined(INET6) */ 358 return (EADDRINUSE); 359 } 360 } 361 if (prison && 362 prison_ip(td->td_ucred, 0, &sin->sin_addr.s_addr)) 363 return (EADDRNOTAVAIL); 364 t = in_pcblookup_local(pcbinfo, sin->sin_addr, 365 lport, prison ? 0 : wild); 366 if (t && (t->inp_vflag & INP_TIMEWAIT)) { 367 if ((reuseport & intotw(t)->tw_so_options) == 0) 368 return (EADDRINUSE); 369 } else 370 if (t && 371 (reuseport & t->inp_socket->so_options) == 0) { 372 #if defined(INET6) 373 if (ntohl(sin->sin_addr.s_addr) != 374 INADDR_ANY || 375 ntohl(t->inp_laddr.s_addr) != 376 INADDR_ANY || 377 INP_SOCKAF(so) == 378 INP_SOCKAF(t->inp_socket)) 379 #endif /* defined(INET6) */ 380 return (EADDRINUSE); 381 } 382 } 383 } 384 if (*lportp != 0) 385 lport = *lportp; 386 if (lport == 0) { 387 u_short first, last; 388 int count; 389 390 if (laddr.s_addr != INADDR_ANY) 391 if (prison_ip(td->td_ucred, 0, &laddr.s_addr)) 392 return (EINVAL); 393 394 if (inp->inp_flags & INP_HIGHPORT) { 395 first = ipport_hifirstauto; /* sysctl */ 396 last = ipport_hilastauto; 397 lastport = &pcbinfo->lasthi; 398 } else if (inp->inp_flags & INP_LOWPORT) { 399 if (td && (error = suser_cred(td->td_ucred, 400 PRISON_ROOT)) != 0) 401 return error; 402 first = ipport_lowfirstauto; /* 1023 */ 403 last = ipport_lowlastauto; /* 600 */ 404 lastport = &pcbinfo->lastlow; 405 } else { 406 first = ipport_firstauto; /* sysctl */ 407 last = ipport_lastauto; 408 lastport = &pcbinfo->lastport; 409 } 410 /* 411 * Simple check to ensure all ports are not used up causing 412 * a deadlock here. 413 * 414 * We split the two cases (up and down) so that the direction 415 * is not being tested on each round of the loop. 416 */ 417 if (first > last) { 418 /* 419 * counting down 420 */ 421 count = first - last; 422 423 do { 424 if (count-- < 0) /* completely used? */ 425 return (EADDRNOTAVAIL); 426 --*lastport; 427 if (*lastport > first || *lastport < last) 428 *lastport = first; 429 lport = htons(*lastport); 430 } while (in_pcblookup_local(pcbinfo, laddr, lport, 431 wild)); 432 } else { 433 /* 434 * counting up 435 */ 436 count = last - first; 437 438 do { 439 if (count-- < 0) /* completely used? */ 440 return (EADDRNOTAVAIL); 441 ++*lastport; 442 if (*lastport < first || *lastport > last) 443 *lastport = first; 444 lport = htons(*lastport); 445 } while (in_pcblookup_local(pcbinfo, laddr, lport, 446 wild)); 447 } 448 } 449 if (prison_ip(td->td_ucred, 0, &laddr.s_addr)) 450 return (EINVAL); 451 *laddrp = laddr.s_addr; 452 *lportp = lport; 453 return (0); 454 } 455 456 /* 457 * Connect from a socket to a specified address. 458 * Both address and port must be specified in argument sin. 459 * If don't have a local address for this socket yet, 460 * then pick one. 461 */ 462 int 463 in_pcbconnect(inp, nam, td) 464 register struct inpcb *inp; 465 struct sockaddr *nam; 466 struct thread *td; 467 { 468 u_short lport, fport; 469 in_addr_t laddr, faddr; 470 int anonport, error; 471 472 lport = inp->inp_lport; 473 laddr = inp->inp_laddr.s_addr; 474 anonport = (lport == 0); 475 error = in_pcbconnect_setup(inp, nam, &laddr, &lport, &faddr, &fport, 476 NULL, td); 477 if (error) 478 return (error); 479 480 /* Do the initial binding of the local address if required. */ 481 if (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0) { 482 inp->inp_lport = lport; 483 inp->inp_laddr.s_addr = laddr; 484 if (in_pcbinshash(inp) != 0) { 485 inp->inp_laddr.s_addr = INADDR_ANY; 486 inp->inp_lport = 0; 487 return (EAGAIN); 488 } 489 } 490 491 /* Commit the remaining changes. */ 492 inp->inp_lport = lport; 493 inp->inp_laddr.s_addr = laddr; 494 inp->inp_faddr.s_addr = faddr; 495 inp->inp_fport = fport; 496 in_pcbrehash(inp); 497 #ifdef IPSEC 498 if (inp->inp_socket->so_type == SOCK_STREAM) 499 ipsec_pcbconn(inp->inp_sp); 500 #endif 501 if (anonport) 502 inp->inp_flags |= INP_ANONPORT; 503 return (0); 504 } 505 506 /* 507 * Set up for a connect from a socket to the specified address. 508 * On entry, *laddrp and *lportp should contain the current local 509 * address and port for the PCB; these are updated to the values 510 * that should be placed in inp_laddr and inp_lport to complete 511 * the connect. 512 * 513 * On success, *faddrp and *fportp will be set to the remote address 514 * and port. These are not updated in the error case. 515 * 516 * If the operation fails because the connection already exists, 517 * *oinpp will be set to the PCB of that connection so that the 518 * caller can decide to override it. In all other cases, *oinpp 519 * is set to NULL. 520 */ 521 int 522 in_pcbconnect_setup(inp, nam, laddrp, lportp, faddrp, fportp, oinpp, td) 523 register struct inpcb *inp; 524 struct sockaddr *nam; 525 in_addr_t *laddrp; 526 u_short *lportp; 527 in_addr_t *faddrp; 528 u_short *fportp; 529 struct inpcb **oinpp; 530 struct thread *td; 531 { 532 struct sockaddr_in *sin = (struct sockaddr_in *)nam; 533 struct in_ifaddr *ia; 534 struct sockaddr_in sa; 535 struct ucred *cred; 536 struct inpcb *oinp; 537 struct in_addr laddr, faddr; 538 u_short lport, fport; 539 int error; 540 541 if (oinpp != NULL) 542 *oinpp = NULL; 543 if (nam->sa_len != sizeof (*sin)) 544 return (EINVAL); 545 if (sin->sin_family != AF_INET) 546 return (EAFNOSUPPORT); 547 if (sin->sin_port == 0) 548 return (EADDRNOTAVAIL); 549 laddr.s_addr = *laddrp; 550 lport = *lportp; 551 faddr = sin->sin_addr; 552 fport = sin->sin_port; 553 cred = inp->inp_socket->so_cred; 554 if (laddr.s_addr == INADDR_ANY && jailed(cred)) { 555 bzero(&sa, sizeof(sa)); 556 sa.sin_addr.s_addr = htonl(prison_getip(cred)); 557 sa.sin_len = sizeof(sa); 558 sa.sin_family = AF_INET; 559 error = in_pcbbind_setup(inp, (struct sockaddr *)&sa, 560 &laddr.s_addr, &lport, td); 561 if (error) 562 return (error); 563 } 564 565 if (!TAILQ_EMPTY(&in_ifaddrhead)) { 566 /* 567 * If the destination address is INADDR_ANY, 568 * use the primary local address. 569 * If the supplied address is INADDR_BROADCAST, 570 * and the primary interface supports broadcast, 571 * choose the broadcast address for that interface. 572 */ 573 if (faddr.s_addr == INADDR_ANY) 574 faddr = IA_SIN(TAILQ_FIRST(&in_ifaddrhead))->sin_addr; 575 else if (faddr.s_addr == (u_long)INADDR_BROADCAST && 576 (TAILQ_FIRST(&in_ifaddrhead)->ia_ifp->if_flags & 577 IFF_BROADCAST)) 578 faddr = satosin(&TAILQ_FIRST( 579 &in_ifaddrhead)->ia_broadaddr)->sin_addr; 580 } 581 if (laddr.s_addr == INADDR_ANY) { 582 register struct route *ro; 583 584 ia = (struct in_ifaddr *)0; 585 /* 586 * If route is known or can be allocated now, 587 * our src addr is taken from the i/f, else punt. 588 * Note that we should check the address family of the cached 589 * destination, in case of sharing the cache with IPv6. 590 */ 591 ro = &inp->inp_route; 592 if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 || 593 ro->ro_dst.sa_family != AF_INET || 594 satosin(&ro->ro_dst)->sin_addr.s_addr != faddr.s_addr || 595 inp->inp_socket->so_options & SO_DONTROUTE)) { 596 RTFREE(ro->ro_rt); 597 ro->ro_rt = (struct rtentry *)0; 598 } 599 if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0 && /*XXX*/ 600 (ro->ro_rt == (struct rtentry *)0 || 601 ro->ro_rt->rt_ifp == (struct ifnet *)0)) { 602 /* No route yet, so try to acquire one */ 603 bzero(&ro->ro_dst, sizeof(struct sockaddr_in)); 604 ro->ro_dst.sa_family = AF_INET; 605 ro->ro_dst.sa_len = sizeof(struct sockaddr_in); 606 ((struct sockaddr_in *)&ro->ro_dst)->sin_addr = faddr; 607 rtalloc(ro); 608 } 609 /* 610 * If we found a route, use the address 611 * corresponding to the outgoing interface 612 * unless it is the loopback (in case a route 613 * to our address on another net goes to loopback). 614 */ 615 if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) 616 ia = ifatoia(ro->ro_rt->rt_ifa); 617 if (ia == 0) { 618 bzero(&sa, sizeof(sa)); 619 sa.sin_addr = faddr; 620 sa.sin_len = sizeof(sa); 621 sa.sin_family = AF_INET; 622 623 ia = ifatoia(ifa_ifwithdstaddr(sintosa(&sa))); 624 if (ia == 0) 625 ia = ifatoia(ifa_ifwithnet(sintosa(&sa))); 626 if (ia == 0) 627 ia = TAILQ_FIRST(&in_ifaddrhead); 628 if (ia == 0) 629 return (EADDRNOTAVAIL); 630 } 631 /* 632 * If the destination address is multicast and an outgoing 633 * interface has been set as a multicast option, use the 634 * address of that interface as our source address. 635 */ 636 if (IN_MULTICAST(ntohl(faddr.s_addr)) && 637 inp->inp_moptions != NULL) { 638 struct ip_moptions *imo; 639 struct ifnet *ifp; 640 641 imo = inp->inp_moptions; 642 if (imo->imo_multicast_ifp != NULL) { 643 ifp = imo->imo_multicast_ifp; 644 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) 645 if (ia->ia_ifp == ifp) 646 break; 647 if (ia == 0) 648 return (EADDRNOTAVAIL); 649 } 650 } 651 laddr = ia->ia_addr.sin_addr; 652 } 653 654 oinp = in_pcblookup_hash(inp->inp_pcbinfo, faddr, fport, laddr, lport, 655 0, NULL); 656 if (oinp != NULL) { 657 if (oinpp != NULL) 658 *oinpp = oinp; 659 return (EADDRINUSE); 660 } 661 if (lport == 0) { 662 error = in_pcbbind_setup(inp, NULL, &laddr.s_addr, &lport, td); 663 if (error) 664 return (error); 665 } 666 *laddrp = laddr.s_addr; 667 *lportp = lport; 668 *faddrp = faddr.s_addr; 669 *fportp = fport; 670 return (0); 671 } 672 673 void 674 in_pcbdisconnect(inp) 675 struct inpcb *inp; 676 { 677 INP_LOCK_ASSERT(inp); 678 679 inp->inp_faddr.s_addr = INADDR_ANY; 680 inp->inp_fport = 0; 681 in_pcbrehash(inp); 682 if (inp->inp_socket->so_state & SS_NOFDREF) 683 in_pcbdetach(inp); 684 #ifdef IPSEC 685 ipsec_pcbdisconn(inp->inp_sp); 686 #endif 687 } 688 689 void 690 in_pcbdetach(inp) 691 struct inpcb *inp; 692 { 693 struct socket *so = inp->inp_socket; 694 struct inpcbinfo *ipi = inp->inp_pcbinfo; 695 696 INP_LOCK_ASSERT(inp); 697 698 #if defined(IPSEC) || defined(FAST_IPSEC) 699 ipsec4_delete_pcbpolicy(inp); 700 #endif /*IPSEC*/ 701 inp->inp_gencnt = ++ipi->ipi_gencnt; 702 in_pcbremlists(inp); 703 if (so) { 704 so->so_pcb = 0; 705 sotryfree(so); 706 } 707 if (inp->inp_options) 708 (void)m_free(inp->inp_options); 709 if (inp->inp_route.ro_rt) 710 RTFREE(inp->inp_route.ro_rt); 711 ip_freemoptions(inp->inp_moptions); 712 inp->inp_vflag = 0; 713 INP_LOCK_DESTROY(inp); 714 #ifdef MAC 715 mac_destroy_inpcb(inp); 716 #endif 717 uma_zfree(ipi->ipi_zone, inp); 718 } 719 720 struct sockaddr * 721 in_sockaddr(port, addr_p) 722 in_port_t port; 723 struct in_addr *addr_p; 724 { 725 struct sockaddr_in *sin; 726 727 MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, 728 M_WAITOK | M_ZERO); 729 sin->sin_family = AF_INET; 730 sin->sin_len = sizeof(*sin); 731 sin->sin_addr = *addr_p; 732 sin->sin_port = port; 733 734 return (struct sockaddr *)sin; 735 } 736 737 /* 738 * The wrapper function will pass down the pcbinfo for this function to lock. 739 * The socket must have a valid 740 * (i.e., non-nil) PCB, but it should be impossible to get an invalid one 741 * except through a kernel programming error, so it is acceptable to panic 742 * (or in this case trap) if the PCB is invalid. (Actually, we don't trap 743 * because there actually /is/ a programming error somewhere... XXX) 744 */ 745 int 746 in_setsockaddr(so, nam, pcbinfo) 747 struct socket *so; 748 struct sockaddr **nam; 749 struct inpcbinfo *pcbinfo; 750 { 751 int s; 752 register struct inpcb *inp; 753 struct in_addr addr; 754 in_port_t port; 755 756 s = splnet(); 757 INP_INFO_RLOCK(pcbinfo); 758 inp = sotoinpcb(so); 759 if (!inp) { 760 INP_INFO_RUNLOCK(pcbinfo); 761 splx(s); 762 return ECONNRESET; 763 } 764 INP_LOCK(inp); 765 port = inp->inp_lport; 766 addr = inp->inp_laddr; 767 INP_UNLOCK(inp); 768 INP_INFO_RUNLOCK(pcbinfo); 769 splx(s); 770 771 *nam = in_sockaddr(port, &addr); 772 return 0; 773 } 774 775 /* 776 * The wrapper function will pass down the pcbinfo for this function to lock. 777 */ 778 int 779 in_setpeeraddr(so, nam, pcbinfo) 780 struct socket *so; 781 struct sockaddr **nam; 782 struct inpcbinfo *pcbinfo; 783 { 784 int s; 785 register struct inpcb *inp; 786 struct in_addr addr; 787 in_port_t port; 788 789 s = splnet(); 790 INP_INFO_RLOCK(pcbinfo); 791 inp = sotoinpcb(so); 792 if (!inp) { 793 INP_INFO_RUNLOCK(pcbinfo); 794 splx(s); 795 return ECONNRESET; 796 } 797 INP_LOCK(inp); 798 port = inp->inp_fport; 799 addr = inp->inp_faddr; 800 INP_UNLOCK(inp); 801 INP_INFO_RUNLOCK(pcbinfo); 802 splx(s); 803 804 *nam = in_sockaddr(port, &addr); 805 return 0; 806 } 807 808 void 809 in_pcbnotifyall(pcbinfo, faddr, errno, notify) 810 struct inpcbinfo *pcbinfo; 811 struct in_addr faddr; 812 int errno; 813 struct inpcb *(*notify)(struct inpcb *, int); 814 { 815 struct inpcb *inp, *ninp; 816 struct inpcbhead *head; 817 int s; 818 819 s = splnet(); 820 INP_INFO_WLOCK(pcbinfo); 821 head = pcbinfo->listhead; 822 for (inp = LIST_FIRST(head); inp != NULL; inp = ninp) { 823 INP_LOCK(inp); 824 ninp = LIST_NEXT(inp, inp_list); 825 #ifdef INET6 826 if ((inp->inp_vflag & INP_IPV4) == 0) { 827 INP_UNLOCK(inp); 828 continue; 829 } 830 #endif 831 if (inp->inp_faddr.s_addr != faddr.s_addr || 832 inp->inp_socket == NULL) { 833 INP_UNLOCK(inp); 834 continue; 835 } 836 if ((*notify)(inp, errno)) 837 INP_UNLOCK(inp); 838 } 839 INP_INFO_WUNLOCK(pcbinfo); 840 splx(s); 841 } 842 843 void 844 in_pcbpurgeif0(pcbinfo, ifp) 845 struct inpcbinfo *pcbinfo; 846 struct ifnet *ifp; 847 { 848 struct inpcb *inp; 849 struct ip_moptions *imo; 850 int i, gap; 851 852 /* why no splnet here? XXX */ 853 INP_INFO_RLOCK(pcbinfo); 854 LIST_FOREACH(inp, pcbinfo->listhead, inp_list) { 855 INP_LOCK(inp); 856 imo = inp->inp_moptions; 857 if ((inp->inp_vflag & INP_IPV4) && 858 imo != NULL) { 859 /* 860 * Unselect the outgoing interface if it is being 861 * detached. 862 */ 863 if (imo->imo_multicast_ifp == ifp) 864 imo->imo_multicast_ifp = NULL; 865 866 /* 867 * Drop multicast group membership if we joined 868 * through the interface being detached. 869 */ 870 for (i = 0, gap = 0; i < imo->imo_num_memberships; 871 i++) { 872 if (imo->imo_membership[i]->inm_ifp == ifp) { 873 in_delmulti(imo->imo_membership[i]); 874 gap++; 875 } else if (gap != 0) 876 imo->imo_membership[i - gap] = 877 imo->imo_membership[i]; 878 } 879 imo->imo_num_memberships -= gap; 880 } 881 INP_UNLOCK(inp); 882 } 883 INP_INFO_RUNLOCK(pcbinfo); 884 } 885 886 /* 887 * Check for alternatives when higher level complains 888 * about service problems. For now, invalidate cached 889 * routing information. If the route was created dynamically 890 * (by a redirect), time to try a default gateway again. 891 */ 892 void 893 in_losing(inp) 894 struct inpcb *inp; 895 { 896 register struct rtentry *rt; 897 struct rt_addrinfo info; 898 899 INP_LOCK_ASSERT(inp); 900 901 if ((rt = inp->inp_route.ro_rt)) { 902 RT_LOCK(rt); 903 inp->inp_route.ro_rt = NULL; 904 bzero((caddr_t)&info, sizeof(info)); 905 info.rti_flags = rt->rt_flags; 906 info.rti_info[RTAX_DST] = rt_key(rt); 907 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 908 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 909 rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0); 910 if (rt->rt_flags & RTF_DYNAMIC) 911 rtexpunge(rt); 912 RTFREE_LOCKED(rt); 913 /* 914 * A new route can be allocated 915 * the next time output is attempted. 916 */ 917 } 918 } 919 920 /* 921 * After a routing change, flush old routing 922 * and allocate a (hopefully) better one. 923 */ 924 struct inpcb * 925 in_rtchange(inp, errno) 926 register struct inpcb *inp; 927 int errno; 928 { 929 INP_LOCK_ASSERT(inp); 930 931 if (inp->inp_route.ro_rt) { 932 RTFREE(inp->inp_route.ro_rt); 933 inp->inp_route.ro_rt = 0; 934 /* 935 * A new route can be allocated the next time 936 * output is attempted. 937 */ 938 } 939 return inp; 940 } 941 942 /* 943 * Lookup a PCB based on the local address and port. 944 */ 945 struct inpcb * 946 in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay) 947 struct inpcbinfo *pcbinfo; 948 struct in_addr laddr; 949 u_int lport_arg; 950 int wild_okay; 951 { 952 register struct inpcb *inp; 953 int matchwild = 3, wildcard; 954 u_short lport = lport_arg; 955 956 INP_INFO_WLOCK_ASSERT(pcbinfo); 957 958 if (!wild_okay) { 959 struct inpcbhead *head; 960 /* 961 * Look for an unconnected (wildcard foreign addr) PCB that 962 * matches the local address and port we're looking for. 963 */ 964 head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)]; 965 LIST_FOREACH(inp, head, inp_hash) { 966 #ifdef INET6 967 if ((inp->inp_vflag & INP_IPV4) == 0) 968 continue; 969 #endif 970 if (inp->inp_faddr.s_addr == INADDR_ANY && 971 inp->inp_laddr.s_addr == laddr.s_addr && 972 inp->inp_lport == lport) { 973 /* 974 * Found. 975 */ 976 return (inp); 977 } 978 } 979 /* 980 * Not found. 981 */ 982 return (NULL); 983 } else { 984 struct inpcbporthead *porthash; 985 struct inpcbport *phd; 986 struct inpcb *match = NULL; 987 /* 988 * Best fit PCB lookup. 989 * 990 * First see if this local port is in use by looking on the 991 * port hash list. 992 */ 993 retrylookup: 994 porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport, 995 pcbinfo->porthashmask)]; 996 LIST_FOREACH(phd, porthash, phd_hash) { 997 if (phd->phd_port == lport) 998 break; 999 } 1000 if (phd != NULL) { 1001 /* 1002 * Port is in use by one or more PCBs. Look for best 1003 * fit. 1004 */ 1005 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { 1006 wildcard = 0; 1007 #ifdef INET6 1008 if ((inp->inp_vflag & INP_IPV4) == 0) 1009 continue; 1010 #endif 1011 /* 1012 * Clean out old time_wait sockets if they 1013 * are clogging up needed local ports. 1014 */ 1015 if ((inp->inp_vflag & INP_TIMEWAIT) != 0) { 1016 if (tcp_twrecycleable((struct tcptw *)inp->inp_ppcb)) { 1017 INP_LOCK(inp); 1018 tcp_twclose((struct tcptw *)inp->inp_ppcb, 0); 1019 match = NULL; 1020 goto retrylookup; 1021 } 1022 } 1023 if (inp->inp_faddr.s_addr != INADDR_ANY) 1024 wildcard++; 1025 if (inp->inp_laddr.s_addr != INADDR_ANY) { 1026 if (laddr.s_addr == INADDR_ANY) 1027 wildcard++; 1028 else if (inp->inp_laddr.s_addr != laddr.s_addr) 1029 continue; 1030 } else { 1031 if (laddr.s_addr != INADDR_ANY) 1032 wildcard++; 1033 } 1034 if (wildcard < matchwild) { 1035 match = inp; 1036 matchwild = wildcard; 1037 if (matchwild == 0) { 1038 break; 1039 } 1040 } 1041 } 1042 } 1043 return (match); 1044 } 1045 } 1046 1047 /* 1048 * Lookup PCB in hash list. 1049 */ 1050 struct inpcb * 1051 in_pcblookup_hash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard, 1052 ifp) 1053 struct inpcbinfo *pcbinfo; 1054 struct in_addr faddr, laddr; 1055 u_int fport_arg, lport_arg; 1056 int wildcard; 1057 struct ifnet *ifp; 1058 { 1059 struct inpcbhead *head; 1060 register struct inpcb *inp; 1061 u_short fport = fport_arg, lport = lport_arg; 1062 1063 INP_INFO_RLOCK_ASSERT(pcbinfo); 1064 /* 1065 * First look for an exact match. 1066 */ 1067 head = &pcbinfo->hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, pcbinfo->hashmask)]; 1068 LIST_FOREACH(inp, head, inp_hash) { 1069 #ifdef INET6 1070 if ((inp->inp_vflag & INP_IPV4) == 0) 1071 continue; 1072 #endif 1073 if (inp->inp_faddr.s_addr == faddr.s_addr && 1074 inp->inp_laddr.s_addr == laddr.s_addr && 1075 inp->inp_fport == fport && 1076 inp->inp_lport == lport) { 1077 /* 1078 * Found. 1079 */ 1080 return (inp); 1081 } 1082 } 1083 if (wildcard) { 1084 struct inpcb *local_wild = NULL; 1085 #if defined(INET6) 1086 struct inpcb *local_wild_mapped = NULL; 1087 #endif /* defined(INET6) */ 1088 1089 head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)]; 1090 LIST_FOREACH(inp, head, inp_hash) { 1091 #ifdef INET6 1092 if ((inp->inp_vflag & INP_IPV4) == 0) 1093 continue; 1094 #endif 1095 if (inp->inp_faddr.s_addr == INADDR_ANY && 1096 inp->inp_lport == lport) { 1097 if (ifp && ifp->if_type == IFT_FAITH && 1098 (inp->inp_flags & INP_FAITH) == 0) 1099 continue; 1100 if (inp->inp_laddr.s_addr == laddr.s_addr) 1101 return (inp); 1102 else if (inp->inp_laddr.s_addr == INADDR_ANY) { 1103 #if defined(INET6) 1104 if (INP_CHECK_SOCKAF(inp->inp_socket, 1105 AF_INET6)) 1106 local_wild_mapped = inp; 1107 else 1108 #endif /* defined(INET6) */ 1109 local_wild = inp; 1110 } 1111 } 1112 } 1113 #if defined(INET6) 1114 if (local_wild == NULL) 1115 return (local_wild_mapped); 1116 #endif /* defined(INET6) */ 1117 return (local_wild); 1118 } 1119 1120 /* 1121 * Not found. 1122 */ 1123 return (NULL); 1124 } 1125 1126 /* 1127 * Insert PCB onto various hash lists. 1128 */ 1129 int 1130 in_pcbinshash(inp) 1131 struct inpcb *inp; 1132 { 1133 struct inpcbhead *pcbhash; 1134 struct inpcbporthead *pcbporthash; 1135 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 1136 struct inpcbport *phd; 1137 u_int32_t hashkey_faddr; 1138 1139 INP_INFO_WLOCK_ASSERT(pcbinfo); 1140 #ifdef INET6 1141 if (inp->inp_vflag & INP_IPV6) 1142 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */; 1143 else 1144 #endif /* INET6 */ 1145 hashkey_faddr = inp->inp_faddr.s_addr; 1146 1147 pcbhash = &pcbinfo->hashbase[INP_PCBHASH(hashkey_faddr, 1148 inp->inp_lport, inp->inp_fport, pcbinfo->hashmask)]; 1149 1150 pcbporthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(inp->inp_lport, 1151 pcbinfo->porthashmask)]; 1152 1153 /* 1154 * Go through port list and look for a head for this lport. 1155 */ 1156 LIST_FOREACH(phd, pcbporthash, phd_hash) { 1157 if (phd->phd_port == inp->inp_lport) 1158 break; 1159 } 1160 /* 1161 * If none exists, malloc one and tack it on. 1162 */ 1163 if (phd == NULL) { 1164 MALLOC(phd, struct inpcbport *, sizeof(struct inpcbport), M_PCB, M_NOWAIT); 1165 if (phd == NULL) { 1166 return (ENOBUFS); /* XXX */ 1167 } 1168 phd->phd_port = inp->inp_lport; 1169 LIST_INIT(&phd->phd_pcblist); 1170 LIST_INSERT_HEAD(pcbporthash, phd, phd_hash); 1171 } 1172 inp->inp_phd = phd; 1173 LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist); 1174 LIST_INSERT_HEAD(pcbhash, inp, inp_hash); 1175 return (0); 1176 } 1177 1178 /* 1179 * Move PCB to the proper hash bucket when { faddr, fport } have been 1180 * changed. NOTE: This does not handle the case of the lport changing (the 1181 * hashed port list would have to be updated as well), so the lport must 1182 * not change after in_pcbinshash() has been called. 1183 */ 1184 void 1185 in_pcbrehash(inp) 1186 struct inpcb *inp; 1187 { 1188 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 1189 struct inpcbhead *head; 1190 u_int32_t hashkey_faddr; 1191 1192 INP_INFO_WLOCK_ASSERT(pcbinfo); 1193 /* XXX? INP_LOCK_ASSERT(inp); */ 1194 #ifdef INET6 1195 if (inp->inp_vflag & INP_IPV6) 1196 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */; 1197 else 1198 #endif /* INET6 */ 1199 hashkey_faddr = inp->inp_faddr.s_addr; 1200 1201 head = &pcbinfo->hashbase[INP_PCBHASH(hashkey_faddr, 1202 inp->inp_lport, inp->inp_fport, pcbinfo->hashmask)]; 1203 1204 LIST_REMOVE(inp, inp_hash); 1205 LIST_INSERT_HEAD(head, inp, inp_hash); 1206 } 1207 1208 /* 1209 * Remove PCB from various lists. 1210 */ 1211 void 1212 in_pcbremlists(inp) 1213 struct inpcb *inp; 1214 { 1215 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 1216 1217 INP_INFO_WLOCK_ASSERT(pcbinfo); 1218 INP_LOCK_ASSERT(inp); 1219 1220 inp->inp_gencnt = ++pcbinfo->ipi_gencnt; 1221 if (inp->inp_lport) { 1222 struct inpcbport *phd = inp->inp_phd; 1223 1224 LIST_REMOVE(inp, inp_hash); 1225 LIST_REMOVE(inp, inp_portlist); 1226 if (LIST_FIRST(&phd->phd_pcblist) == NULL) { 1227 LIST_REMOVE(phd, phd_hash); 1228 free(phd, M_PCB); 1229 } 1230 } 1231 LIST_REMOVE(inp, inp_list); 1232 pcbinfo->ipi_count--; 1233 } 1234 1235 /* 1236 * A set label operation has occurred at the socket layer, propagate the 1237 * label change into the in_pcb for the socket. 1238 */ 1239 void 1240 in_pcbsosetlabel(so) 1241 struct socket *so; 1242 { 1243 #ifdef MAC 1244 struct inpcb *inp; 1245 1246 /* XXX: Will assert socket lock when we have them. */ 1247 inp = (struct inpcb *)so->so_pcb; 1248 INP_LOCK(inp); 1249 mac_inpcb_sosetlabel(so, inp); 1250 INP_UNLOCK(inp); 1251 #endif 1252 } 1253 1254 int 1255 prison_xinpcb(struct thread *td, struct inpcb *inp) 1256 { 1257 if (!jailed(td->td_ucred)) 1258 return (0); 1259 if (ntohl(inp->inp_laddr.s_addr) == prison_getip(td->td_ucred)) 1260 return (0); 1261 return (1); 1262 } 1263