1 /* 2 * Copyright (c) 1982, 1986, 1991, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95 34 * $FreeBSD$ 35 */ 36 37 #include "opt_ipsec.h" 38 #include "opt_inet6.h" 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/limits.h> 43 #include <sys/malloc.h> 44 #include <sys/mbuf.h> 45 #include <sys/domain.h> 46 #include <sys/protosw.h> 47 #include <sys/socket.h> 48 #include <sys/socketvar.h> 49 #include <sys/proc.h> 50 #include <sys/jail.h> 51 #include <sys/kernel.h> 52 #include <sys/sysctl.h> 53 54 #include <vm/uma.h> 55 56 #include <net/if.h> 57 #include <net/if_types.h> 58 #include <net/route.h> 59 60 #include <netinet/in.h> 61 #include <netinet/in_pcb.h> 62 #include <netinet/in_var.h> 63 #include <netinet/ip_var.h> 64 #include <netinet/tcp_var.h> 65 #ifdef INET6 66 #include <netinet/ip6.h> 67 #include <netinet6/ip6_var.h> 68 #endif /* INET6 */ 69 70 #ifdef IPSEC 71 #include <netinet6/ipsec.h> 72 #include <netkey/key.h> 73 #endif /* IPSEC */ 74 75 #ifdef FAST_IPSEC 76 #if defined(IPSEC) || defined(IPSEC_ESP) 77 #error "Bad idea: don't compile with both IPSEC and FAST_IPSEC!" 78 #endif 79 80 #include <netipsec/ipsec.h> 81 #include <netipsec/key.h> 82 #define IPSEC 83 #endif /* FAST_IPSEC */ 84 85 struct in_addr zeroin_addr; 86 87 /* 88 * These configure the range of local port addresses assigned to 89 * "unspecified" outgoing connections/packets/whatever. 90 */ 91 int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */ 92 int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */ 93 int ipport_firstauto = IPPORT_HIFIRSTAUTO; /* 49152 */ 94 int ipport_lastauto = IPPORT_HILASTAUTO; /* 65535 */ 95 int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */ 96 int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */ 97 98 /* 99 * Reserved ports accessible only to root. There are significant 100 * security considerations that must be accounted for when changing these, 101 * but the security benefits can be great. Please be careful. 102 */ 103 int ipport_reservedhigh = IPPORT_RESERVED - 1; /* 1023 */ 104 int ipport_reservedlow = 0; 105 106 #define RANGECHK(var, min, max) \ 107 if ((var) < (min)) { (var) = (min); } \ 108 else if ((var) > (max)) { (var) = (max); } 109 110 static int 111 sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS) 112 { 113 int error = sysctl_handle_int(oidp, 114 oidp->oid_arg1, oidp->oid_arg2, req); 115 if (!error) { 116 RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1); 117 RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1); 118 RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX); 119 RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX); 120 RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX); 121 RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX); 122 } 123 return error; 124 } 125 126 #undef RANGECHK 127 128 SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0, "IP Ports"); 129 130 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, CTLTYPE_INT|CTLFLAG_RW, 131 &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", ""); 132 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, CTLTYPE_INT|CTLFLAG_RW, 133 &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", ""); 134 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, CTLTYPE_INT|CTLFLAG_RW, 135 &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", ""); 136 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, CTLTYPE_INT|CTLFLAG_RW, 137 &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", ""); 138 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLTYPE_INT|CTLFLAG_RW, 139 &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", ""); 140 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW, 141 &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", ""); 142 SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, reservedhigh, 143 CTLFLAG_RW|CTLFLAG_SECURE, &ipport_reservedhigh, 0, ""); 144 SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, reservedlow, 145 CTLFLAG_RW|CTLFLAG_SECURE, &ipport_reservedlow, 0, ""); 146 147 /* 148 * in_pcb.c: manage the Protocol Control Blocks. 149 * 150 * NOTE: It is assumed that most of these functions will be called at 151 * splnet(). XXX - There are, unfortunately, a few exceptions to this 152 * rule that should be fixed. 153 */ 154 155 /* 156 * Allocate a PCB and associate it with the socket. 157 */ 158 int 159 in_pcballoc(so, pcbinfo, td) 160 struct socket *so; 161 struct inpcbinfo *pcbinfo; 162 struct thread *td; 163 { 164 register struct inpcb *inp; 165 #ifdef IPSEC 166 int error; 167 #endif 168 169 inp = uma_zalloc(pcbinfo->ipi_zone, M_NOWAIT); 170 if (inp == NULL) 171 return (ENOBUFS); 172 bzero((caddr_t)inp, sizeof(*inp)); 173 inp->inp_gencnt = ++pcbinfo->ipi_gencnt; 174 inp->inp_pcbinfo = pcbinfo; 175 inp->inp_socket = so; 176 #ifdef IPSEC 177 error = ipsec_init_policy(so, &inp->inp_sp); 178 if (error != 0) { 179 uma_zfree(pcbinfo->ipi_zone, inp); 180 return error; 181 } 182 #endif /*IPSEC*/ 183 #if defined(INET6) 184 if (INP_SOCKAF(so) == AF_INET6) { 185 inp->inp_vflag |= INP_IPV6PROTO; 186 if (ip6_v6only) 187 inp->inp_flags |= IN6P_IPV6_V6ONLY; 188 } 189 #endif 190 LIST_INSERT_HEAD(pcbinfo->listhead, inp, inp_list); 191 pcbinfo->ipi_count++; 192 so->so_pcb = (caddr_t)inp; 193 INP_LOCK_INIT(inp, "inp"); 194 #ifdef INET6 195 if (ip6_auto_flowlabel) 196 inp->inp_flags |= IN6P_AUTOFLOWLABEL; 197 #endif 198 return (0); 199 } 200 201 int 202 in_pcbbind(inp, nam, td) 203 register struct inpcb *inp; 204 struct sockaddr *nam; 205 struct thread *td; 206 { 207 int anonport, error; 208 209 if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) 210 return (EINVAL); 211 anonport = inp->inp_lport == 0 && (nam == NULL || 212 ((struct sockaddr_in *)nam)->sin_port == 0); 213 error = in_pcbbind_setup(inp, nam, &inp->inp_laddr.s_addr, 214 &inp->inp_lport, td); 215 if (error) 216 return (error); 217 if (in_pcbinshash(inp) != 0) { 218 inp->inp_laddr.s_addr = INADDR_ANY; 219 inp->inp_lport = 0; 220 return (EAGAIN); 221 } 222 if (anonport) 223 inp->inp_flags |= INP_ANONPORT; 224 return (0); 225 } 226 227 /* 228 * Set up a bind operation on a PCB, performing port allocation 229 * as required, but do not actually modify the PCB. Callers can 230 * either complete the bind by setting inp_laddr/inp_lport and 231 * calling in_pcbinshash(), or they can just use the resulting 232 * port and address to authorise the sending of a once-off packet. 233 * 234 * On error, the values of *laddrp and *lportp are not changed. 235 */ 236 int 237 in_pcbbind_setup(inp, nam, laddrp, lportp, td) 238 struct inpcb *inp; 239 struct sockaddr *nam; 240 in_addr_t *laddrp; 241 u_short *lportp; 242 struct thread *td; 243 { 244 struct socket *so = inp->inp_socket; 245 unsigned short *lastport; 246 struct sockaddr_in *sin; 247 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 248 struct in_addr laddr; 249 u_short lport = 0; 250 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); 251 int error, prison = 0; 252 253 if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */ 254 return (EADDRNOTAVAIL); 255 laddr.s_addr = *laddrp; 256 if (nam != NULL && laddr.s_addr != INADDR_ANY) 257 return (EINVAL); 258 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) 259 wild = 1; 260 if (nam) { 261 sin = (struct sockaddr_in *)nam; 262 if (nam->sa_len != sizeof (*sin)) 263 return (EINVAL); 264 #ifdef notdef 265 /* 266 * We should check the family, but old programs 267 * incorrectly fail to initialize it. 268 */ 269 if (sin->sin_family != AF_INET) 270 return (EAFNOSUPPORT); 271 #endif 272 if (sin->sin_addr.s_addr != INADDR_ANY) 273 if (prison_ip(td->td_ucred, 0, &sin->sin_addr.s_addr)) 274 return(EINVAL); 275 if (sin->sin_port != *lportp) { 276 /* Don't allow the port to change. */ 277 if (*lportp != 0) 278 return (EINVAL); 279 lport = sin->sin_port; 280 } 281 /* NB: lport is left as 0 if the port isn't being changed. */ 282 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { 283 /* 284 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; 285 * allow complete duplication of binding if 286 * SO_REUSEPORT is set, or if SO_REUSEADDR is set 287 * and a multicast address is bound on both 288 * new and duplicated sockets. 289 */ 290 if (so->so_options & SO_REUSEADDR) 291 reuseport = SO_REUSEADDR|SO_REUSEPORT; 292 } else if (sin->sin_addr.s_addr != INADDR_ANY) { 293 sin->sin_port = 0; /* yech... */ 294 bzero(&sin->sin_zero, sizeof(sin->sin_zero)); 295 if (ifa_ifwithaddr((struct sockaddr *)sin) == 0) 296 return (EADDRNOTAVAIL); 297 } 298 laddr = sin->sin_addr; 299 if (lport) { 300 struct inpcb *t; 301 /* GROSS */ 302 if (ntohs(lport) <= ipport_reservedhigh && 303 ntohs(lport) >= ipport_reservedlow && 304 td && suser_cred(td->td_ucred, PRISON_ROOT)) 305 return (EACCES); 306 if (td && jailed(td->td_ucred)) 307 prison = 1; 308 if (so->so_cred->cr_uid != 0 && 309 !IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { 310 t = in_pcblookup_local(inp->inp_pcbinfo, 311 sin->sin_addr, lport, 312 prison ? 0 : INPLOOKUP_WILDCARD); 313 /* 314 * XXX 315 * This entire block sorely needs a rewrite. 316 */ 317 if (t && (t->inp_vflag & INP_TIMEWAIT)) { 318 if ((ntohl(sin->sin_addr.s_addr) != INADDR_ANY || 319 ntohl(t->inp_laddr.s_addr) != INADDR_ANY || 320 (intotw(t)->tw_so_options & SO_REUSEPORT) == 0) && 321 (so->so_cred->cr_uid != intotw(t)->tw_cred->cr_uid)) 322 return (EADDRINUSE); 323 } else 324 if (t && 325 (ntohl(sin->sin_addr.s_addr) != INADDR_ANY || 326 ntohl(t->inp_laddr.s_addr) != INADDR_ANY || 327 (t->inp_socket->so_options & 328 SO_REUSEPORT) == 0) && 329 (so->so_cred->cr_uid != 330 t->inp_socket->so_cred->cr_uid)) { 331 #if defined(INET6) 332 if (ntohl(sin->sin_addr.s_addr) != 333 INADDR_ANY || 334 ntohl(t->inp_laddr.s_addr) != 335 INADDR_ANY || 336 INP_SOCKAF(so) == 337 INP_SOCKAF(t->inp_socket)) 338 #endif /* defined(INET6) */ 339 return (EADDRINUSE); 340 } 341 } 342 if (prison && 343 prison_ip(td->td_ucred, 0, &sin->sin_addr.s_addr)) 344 return (EADDRNOTAVAIL); 345 t = in_pcblookup_local(pcbinfo, sin->sin_addr, 346 lport, prison ? 0 : wild); 347 if (t && (t->inp_vflag & INP_TIMEWAIT)) { 348 if ((reuseport & intotw(t)->tw_so_options) == 0) 349 return (EADDRINUSE); 350 } else 351 if (t && 352 (reuseport & t->inp_socket->so_options) == 0) { 353 #if defined(INET6) 354 if (ntohl(sin->sin_addr.s_addr) != 355 INADDR_ANY || 356 ntohl(t->inp_laddr.s_addr) != 357 INADDR_ANY || 358 INP_SOCKAF(so) == 359 INP_SOCKAF(t->inp_socket)) 360 #endif /* defined(INET6) */ 361 return (EADDRINUSE); 362 } 363 } 364 } 365 if (*lportp != 0) 366 lport = *lportp; 367 if (lport == 0) { 368 u_short first, last; 369 int count; 370 371 if (laddr.s_addr != INADDR_ANY) 372 if (prison_ip(td->td_ucred, 0, &laddr.s_addr)) 373 return (EINVAL); 374 375 if (inp->inp_flags & INP_HIGHPORT) { 376 first = ipport_hifirstauto; /* sysctl */ 377 last = ipport_hilastauto; 378 lastport = &pcbinfo->lasthi; 379 } else if (inp->inp_flags & INP_LOWPORT) { 380 if (td && (error = suser_cred(td->td_ucred, 381 PRISON_ROOT)) != 0) 382 return error; 383 first = ipport_lowfirstauto; /* 1023 */ 384 last = ipport_lowlastauto; /* 600 */ 385 lastport = &pcbinfo->lastlow; 386 } else { 387 first = ipport_firstauto; /* sysctl */ 388 last = ipport_lastauto; 389 lastport = &pcbinfo->lastport; 390 } 391 /* 392 * Simple check to ensure all ports are not used up causing 393 * a deadlock here. 394 * 395 * We split the two cases (up and down) so that the direction 396 * is not being tested on each round of the loop. 397 */ 398 if (first > last) { 399 /* 400 * counting down 401 */ 402 count = first - last; 403 404 do { 405 if (count-- < 0) /* completely used? */ 406 return (EADDRNOTAVAIL); 407 --*lastport; 408 if (*lastport > first || *lastport < last) 409 *lastport = first; 410 lport = htons(*lastport); 411 } while (in_pcblookup_local(pcbinfo, laddr, lport, 412 wild)); 413 } else { 414 /* 415 * counting up 416 */ 417 count = last - first; 418 419 do { 420 if (count-- < 0) /* completely used? */ 421 return (EADDRNOTAVAIL); 422 ++*lastport; 423 if (*lastport < first || *lastport > last) 424 *lastport = first; 425 lport = htons(*lastport); 426 } while (in_pcblookup_local(pcbinfo, laddr, lport, 427 wild)); 428 } 429 } 430 if (prison_ip(td->td_ucred, 0, &laddr.s_addr)) 431 return (EINVAL); 432 *laddrp = laddr.s_addr; 433 *lportp = lport; 434 return (0); 435 } 436 437 /* 438 * Connect from a socket to a specified address. 439 * Both address and port must be specified in argument sin. 440 * If don't have a local address for this socket yet, 441 * then pick one. 442 */ 443 int 444 in_pcbconnect(inp, nam, td) 445 register struct inpcb *inp; 446 struct sockaddr *nam; 447 struct thread *td; 448 { 449 u_short lport, fport; 450 in_addr_t laddr, faddr; 451 int anonport, error; 452 453 lport = inp->inp_lport; 454 laddr = inp->inp_laddr.s_addr; 455 anonport = (lport == 0); 456 error = in_pcbconnect_setup(inp, nam, &laddr, &lport, &faddr, &fport, 457 NULL, td); 458 if (error) 459 return (error); 460 461 /* Do the initial binding of the local address if required. */ 462 if (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0) { 463 inp->inp_lport = lport; 464 inp->inp_laddr.s_addr = laddr; 465 if (in_pcbinshash(inp) != 0) { 466 inp->inp_laddr.s_addr = INADDR_ANY; 467 inp->inp_lport = 0; 468 return (EAGAIN); 469 } 470 } 471 472 /* Commit the remaining changes. */ 473 inp->inp_lport = lport; 474 inp->inp_laddr.s_addr = laddr; 475 inp->inp_faddr.s_addr = faddr; 476 inp->inp_fport = fport; 477 in_pcbrehash(inp); 478 if (anonport) 479 inp->inp_flags |= INP_ANONPORT; 480 return (0); 481 } 482 483 /* 484 * Set up for a connect from a socket to the specified address. 485 * On entry, *laddrp and *lportp should contain the current local 486 * address and port for the PCB; these are updated to the values 487 * that should be placed in inp_laddr and inp_lport to complete 488 * the connect. 489 * 490 * On success, *faddrp and *fportp will be set to the remote address 491 * and port. These are not updated in the error case. 492 * 493 * If the operation fails because the connection already exists, 494 * *oinpp will be set to the PCB of that connection so that the 495 * caller can decide to override it. In all other cases, *oinpp 496 * is set to NULL. 497 */ 498 int 499 in_pcbconnect_setup(inp, nam, laddrp, lportp, faddrp, fportp, oinpp, td) 500 register struct inpcb *inp; 501 struct sockaddr *nam; 502 in_addr_t *laddrp; 503 u_short *lportp; 504 in_addr_t *faddrp; 505 u_short *fportp; 506 struct inpcb **oinpp; 507 struct thread *td; 508 { 509 struct sockaddr_in *sin = (struct sockaddr_in *)nam; 510 struct in_ifaddr *ia; 511 struct sockaddr_in sa; 512 struct ucred *cred; 513 struct inpcb *oinp; 514 struct in_addr laddr, faddr; 515 u_short lport, fport; 516 int error; 517 518 if (oinpp != NULL) 519 *oinpp = NULL; 520 if (nam->sa_len != sizeof (*sin)) 521 return (EINVAL); 522 if (sin->sin_family != AF_INET) 523 return (EAFNOSUPPORT); 524 if (sin->sin_port == 0) 525 return (EADDRNOTAVAIL); 526 laddr.s_addr = *laddrp; 527 lport = *lportp; 528 faddr = sin->sin_addr; 529 fport = sin->sin_port; 530 cred = inp->inp_socket->so_cred; 531 if (laddr.s_addr == INADDR_ANY && jailed(cred)) { 532 bzero(&sa, sizeof(sa)); 533 sa.sin_addr.s_addr = htonl(prison_getip(cred)); 534 sa.sin_len = sizeof(sa); 535 sa.sin_family = AF_INET; 536 error = in_pcbbind_setup(inp, (struct sockaddr *)&sa, 537 &laddr.s_addr, &lport, td); 538 if (error) 539 return (error); 540 } 541 542 if (!TAILQ_EMPTY(&in_ifaddrhead)) { 543 /* 544 * If the destination address is INADDR_ANY, 545 * use the primary local address. 546 * If the supplied address is INADDR_BROADCAST, 547 * and the primary interface supports broadcast, 548 * choose the broadcast address for that interface. 549 */ 550 if (faddr.s_addr == INADDR_ANY) 551 faddr = IA_SIN(TAILQ_FIRST(&in_ifaddrhead))->sin_addr; 552 else if (faddr.s_addr == (u_long)INADDR_BROADCAST && 553 (TAILQ_FIRST(&in_ifaddrhead)->ia_ifp->if_flags & 554 IFF_BROADCAST)) 555 faddr = satosin(&TAILQ_FIRST( 556 &in_ifaddrhead)->ia_broadaddr)->sin_addr; 557 } 558 if (laddr.s_addr == INADDR_ANY) { 559 register struct route *ro; 560 561 ia = (struct in_ifaddr *)0; 562 /* 563 * If route is known or can be allocated now, 564 * our src addr is taken from the i/f, else punt. 565 * Note that we should check the address family of the cached 566 * destination, in case of sharing the cache with IPv6. 567 */ 568 ro = &inp->inp_route; 569 if (ro->ro_rt && 570 (ro->ro_dst.sa_family != AF_INET || 571 satosin(&ro->ro_dst)->sin_addr.s_addr != faddr.s_addr || 572 inp->inp_socket->so_options & SO_DONTROUTE)) { 573 RTFREE(ro->ro_rt); 574 ro->ro_rt = (struct rtentry *)0; 575 } 576 if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0 && /*XXX*/ 577 (ro->ro_rt == (struct rtentry *)0 || 578 ro->ro_rt->rt_ifp == (struct ifnet *)0)) { 579 /* No route yet, so try to acquire one */ 580 bzero(&ro->ro_dst, sizeof(struct sockaddr_in)); 581 ro->ro_dst.sa_family = AF_INET; 582 ro->ro_dst.sa_len = sizeof(struct sockaddr_in); 583 ((struct sockaddr_in *)&ro->ro_dst)->sin_addr = faddr; 584 rtalloc(ro); 585 } 586 /* 587 * If we found a route, use the address 588 * corresponding to the outgoing interface 589 * unless it is the loopback (in case a route 590 * to our address on another net goes to loopback). 591 */ 592 if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) 593 ia = ifatoia(ro->ro_rt->rt_ifa); 594 if (ia == 0) { 595 bzero(&sa, sizeof(sa)); 596 sa.sin_addr = faddr; 597 sa.sin_len = sizeof(sa); 598 sa.sin_family = AF_INET; 599 600 ia = ifatoia(ifa_ifwithdstaddr(sintosa(&sa))); 601 if (ia == 0) 602 ia = ifatoia(ifa_ifwithnet(sintosa(&sa))); 603 if (ia == 0) 604 ia = TAILQ_FIRST(&in_ifaddrhead); 605 if (ia == 0) 606 return (EADDRNOTAVAIL); 607 } 608 /* 609 * If the destination address is multicast and an outgoing 610 * interface has been set as a multicast option, use the 611 * address of that interface as our source address. 612 */ 613 if (IN_MULTICAST(ntohl(faddr.s_addr)) && 614 inp->inp_moptions != NULL) { 615 struct ip_moptions *imo; 616 struct ifnet *ifp; 617 618 imo = inp->inp_moptions; 619 if (imo->imo_multicast_ifp != NULL) { 620 ifp = imo->imo_multicast_ifp; 621 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) 622 if (ia->ia_ifp == ifp) 623 break; 624 if (ia == 0) 625 return (EADDRNOTAVAIL); 626 } 627 } 628 laddr = ia->ia_addr.sin_addr; 629 } 630 631 oinp = in_pcblookup_hash(inp->inp_pcbinfo, faddr, fport, laddr, lport, 632 0, NULL); 633 if (oinp != NULL) { 634 if (oinpp != NULL) 635 *oinpp = oinp; 636 return (EADDRINUSE); 637 } 638 if (lport == 0) { 639 error = in_pcbbind_setup(inp, NULL, &laddr.s_addr, &lport, td); 640 if (error) 641 return (error); 642 } 643 *laddrp = laddr.s_addr; 644 *lportp = lport; 645 *faddrp = faddr.s_addr; 646 *fportp = fport; 647 return (0); 648 } 649 650 void 651 in_pcbdisconnect(inp) 652 struct inpcb *inp; 653 { 654 655 inp->inp_faddr.s_addr = INADDR_ANY; 656 inp->inp_fport = 0; 657 in_pcbrehash(inp); 658 if (inp->inp_socket->so_state & SS_NOFDREF) 659 in_pcbdetach(inp); 660 } 661 662 void 663 in_pcbdetach(inp) 664 struct inpcb *inp; 665 { 666 struct socket *so = inp->inp_socket; 667 struct inpcbinfo *ipi = inp->inp_pcbinfo; 668 669 #ifdef IPSEC 670 ipsec4_delete_pcbpolicy(inp); 671 #endif /*IPSEC*/ 672 inp->inp_gencnt = ++ipi->ipi_gencnt; 673 in_pcbremlists(inp); 674 if (so) { 675 so->so_pcb = 0; 676 sotryfree(so); 677 } 678 if (inp->inp_options) 679 (void)m_free(inp->inp_options); 680 if (inp->inp_route.ro_rt) 681 rtfree(inp->inp_route.ro_rt); 682 ip_freemoptions(inp->inp_moptions); 683 inp->inp_vflag = 0; 684 INP_LOCK_DESTROY(inp); 685 uma_zfree(ipi->ipi_zone, inp); 686 } 687 688 struct sockaddr * 689 in_sockaddr(port, addr_p) 690 in_port_t port; 691 struct in_addr *addr_p; 692 { 693 struct sockaddr_in *sin; 694 695 MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, 696 M_WAITOK | M_ZERO); 697 sin->sin_family = AF_INET; 698 sin->sin_len = sizeof(*sin); 699 sin->sin_addr = *addr_p; 700 sin->sin_port = port; 701 702 return (struct sockaddr *)sin; 703 } 704 705 /* 706 * The wrapper function will pass down the pcbinfo for this function to lock. 707 * The socket must have a valid 708 * (i.e., non-nil) PCB, but it should be impossible to get an invalid one 709 * except through a kernel programming error, so it is acceptable to panic 710 * (or in this case trap) if the PCB is invalid. (Actually, we don't trap 711 * because there actually /is/ a programming error somewhere... XXX) 712 */ 713 int 714 in_setsockaddr(so, nam, pcbinfo) 715 struct socket *so; 716 struct sockaddr **nam; 717 struct inpcbinfo *pcbinfo; 718 { 719 int s; 720 register struct inpcb *inp; 721 struct in_addr addr; 722 in_port_t port; 723 724 s = splnet(); 725 INP_INFO_RLOCK(pcbinfo); 726 inp = sotoinpcb(so); 727 if (!inp) { 728 INP_INFO_RUNLOCK(pcbinfo); 729 splx(s); 730 return ECONNRESET; 731 } 732 INP_LOCK(inp); 733 port = inp->inp_lport; 734 addr = inp->inp_laddr; 735 INP_UNLOCK(inp); 736 INP_INFO_RUNLOCK(pcbinfo); 737 splx(s); 738 739 *nam = in_sockaddr(port, &addr); 740 return 0; 741 } 742 743 /* 744 * The wrapper function will pass down the pcbinfo for this function to lock. 745 */ 746 int 747 in_setpeeraddr(so, nam, pcbinfo) 748 struct socket *so; 749 struct sockaddr **nam; 750 struct inpcbinfo *pcbinfo; 751 { 752 int s; 753 register struct inpcb *inp; 754 struct in_addr addr; 755 in_port_t port; 756 757 s = splnet(); 758 INP_INFO_RLOCK(pcbinfo); 759 inp = sotoinpcb(so); 760 if (!inp) { 761 INP_INFO_RUNLOCK(pcbinfo); 762 splx(s); 763 return ECONNRESET; 764 } 765 INP_LOCK(inp); 766 port = inp->inp_fport; 767 addr = inp->inp_faddr; 768 INP_UNLOCK(inp); 769 INP_INFO_RUNLOCK(pcbinfo); 770 splx(s); 771 772 *nam = in_sockaddr(port, &addr); 773 return 0; 774 } 775 776 void 777 in_pcbnotifyall(pcbinfo, faddr, errno, notify) 778 struct inpcbinfo *pcbinfo; 779 struct in_addr faddr; 780 int errno; 781 struct inpcb *(*notify)(struct inpcb *, int); 782 { 783 struct inpcb *inp, *ninp; 784 struct inpcbhead *head; 785 int s; 786 787 s = splnet(); 788 INP_INFO_WLOCK(pcbinfo); 789 head = pcbinfo->listhead; 790 for (inp = LIST_FIRST(head); inp != NULL; inp = ninp) { 791 INP_LOCK(inp); 792 ninp = LIST_NEXT(inp, inp_list); 793 #ifdef INET6 794 if ((inp->inp_vflag & INP_IPV4) == 0) { 795 INP_UNLOCK(inp); 796 continue; 797 } 798 #endif 799 if (inp->inp_faddr.s_addr != faddr.s_addr || 800 inp->inp_socket == NULL) { 801 INP_UNLOCK(inp); 802 continue; 803 } 804 if ((*notify)(inp, errno)) 805 INP_UNLOCK(inp); 806 } 807 INP_INFO_WUNLOCK(pcbinfo); 808 splx(s); 809 } 810 811 void 812 in_pcbpurgeif0(pcbinfo, ifp) 813 struct inpcbinfo *pcbinfo; 814 struct ifnet *ifp; 815 { 816 struct inpcb *inp; 817 struct ip_moptions *imo; 818 int i, gap; 819 820 /* why no splnet here? XXX */ 821 INP_INFO_RLOCK(pcbinfo); 822 LIST_FOREACH(inp, pcbinfo->listhead, inp_list) { 823 INP_LOCK(inp); 824 imo = inp->inp_moptions; 825 if ((inp->inp_vflag & INP_IPV4) && 826 imo != NULL) { 827 /* 828 * Unselect the outgoing interface if it is being 829 * detached. 830 */ 831 if (imo->imo_multicast_ifp == ifp) 832 imo->imo_multicast_ifp = NULL; 833 834 /* 835 * Drop multicast group membership if we joined 836 * through the interface being detached. 837 */ 838 for (i = 0, gap = 0; i < imo->imo_num_memberships; 839 i++) { 840 if (imo->imo_membership[i]->inm_ifp == ifp) { 841 in_delmulti(imo->imo_membership[i]); 842 gap++; 843 } else if (gap != 0) 844 imo->imo_membership[i - gap] = 845 imo->imo_membership[i]; 846 } 847 imo->imo_num_memberships -= gap; 848 } 849 INP_UNLOCK(inp); 850 } 851 INP_INFO_RUNLOCK(pcbinfo); 852 } 853 854 /* 855 * Check for alternatives when higher level complains 856 * about service problems. For now, invalidate cached 857 * routing information. If the route was created dynamically 858 * (by a redirect), time to try a default gateway again. 859 */ 860 void 861 in_losing(inp) 862 struct inpcb *inp; 863 { 864 register struct rtentry *rt; 865 struct rt_addrinfo info; 866 867 if ((rt = inp->inp_route.ro_rt)) { 868 bzero((caddr_t)&info, sizeof(info)); 869 info.rti_flags = rt->rt_flags; 870 info.rti_info[RTAX_DST] = rt_key(rt); 871 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 872 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 873 rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0); 874 if (rt->rt_flags & RTF_DYNAMIC) 875 (void) rtrequest1(RTM_DELETE, &info, NULL); 876 inp->inp_route.ro_rt = NULL; 877 rtfree(rt); 878 /* 879 * A new route can be allocated 880 * the next time output is attempted. 881 */ 882 } 883 } 884 885 /* 886 * After a routing change, flush old routing 887 * and allocate a (hopefully) better one. 888 */ 889 struct inpcb * 890 in_rtchange(inp, errno) 891 register struct inpcb *inp; 892 int errno; 893 { 894 if (inp->inp_route.ro_rt) { 895 rtfree(inp->inp_route.ro_rt); 896 inp->inp_route.ro_rt = 0; 897 /* 898 * A new route can be allocated the next time 899 * output is attempted. 900 */ 901 } 902 return inp; 903 } 904 905 /* 906 * Lookup a PCB based on the local address and port. 907 */ 908 struct inpcb * 909 in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay) 910 struct inpcbinfo *pcbinfo; 911 struct in_addr laddr; 912 u_int lport_arg; 913 int wild_okay; 914 { 915 register struct inpcb *inp; 916 int matchwild = 3, wildcard; 917 u_short lport = lport_arg; 918 919 if (!wild_okay) { 920 struct inpcbhead *head; 921 /* 922 * Look for an unconnected (wildcard foreign addr) PCB that 923 * matches the local address and port we're looking for. 924 */ 925 head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)]; 926 LIST_FOREACH(inp, head, inp_hash) { 927 #ifdef INET6 928 if ((inp->inp_vflag & INP_IPV4) == 0) 929 continue; 930 #endif 931 if (inp->inp_faddr.s_addr == INADDR_ANY && 932 inp->inp_laddr.s_addr == laddr.s_addr && 933 inp->inp_lport == lport) { 934 /* 935 * Found. 936 */ 937 return (inp); 938 } 939 } 940 /* 941 * Not found. 942 */ 943 return (NULL); 944 } else { 945 struct inpcbporthead *porthash; 946 struct inpcbport *phd; 947 struct inpcb *match = NULL; 948 /* 949 * Best fit PCB lookup. 950 * 951 * First see if this local port is in use by looking on the 952 * port hash list. 953 */ 954 porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport, 955 pcbinfo->porthashmask)]; 956 LIST_FOREACH(phd, porthash, phd_hash) { 957 if (phd->phd_port == lport) 958 break; 959 } 960 if (phd != NULL) { 961 /* 962 * Port is in use by one or more PCBs. Look for best 963 * fit. 964 */ 965 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { 966 wildcard = 0; 967 #ifdef INET6 968 if ((inp->inp_vflag & INP_IPV4) == 0) 969 continue; 970 #endif 971 if (inp->inp_faddr.s_addr != INADDR_ANY) 972 wildcard++; 973 if (inp->inp_laddr.s_addr != INADDR_ANY) { 974 if (laddr.s_addr == INADDR_ANY) 975 wildcard++; 976 else if (inp->inp_laddr.s_addr != laddr.s_addr) 977 continue; 978 } else { 979 if (laddr.s_addr != INADDR_ANY) 980 wildcard++; 981 } 982 if (wildcard < matchwild) { 983 match = inp; 984 matchwild = wildcard; 985 if (matchwild == 0) { 986 break; 987 } 988 } 989 } 990 } 991 return (match); 992 } 993 } 994 995 /* 996 * Lookup PCB in hash list. 997 */ 998 struct inpcb * 999 in_pcblookup_hash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard, 1000 ifp) 1001 struct inpcbinfo *pcbinfo; 1002 struct in_addr faddr, laddr; 1003 u_int fport_arg, lport_arg; 1004 int wildcard; 1005 struct ifnet *ifp; 1006 { 1007 struct inpcbhead *head; 1008 register struct inpcb *inp; 1009 u_short fport = fport_arg, lport = lport_arg; 1010 1011 /* 1012 * First look for an exact match. 1013 */ 1014 head = &pcbinfo->hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, pcbinfo->hashmask)]; 1015 LIST_FOREACH(inp, head, inp_hash) { 1016 #ifdef INET6 1017 if ((inp->inp_vflag & INP_IPV4) == 0) 1018 continue; 1019 #endif 1020 if (inp->inp_faddr.s_addr == faddr.s_addr && 1021 inp->inp_laddr.s_addr == laddr.s_addr && 1022 inp->inp_fport == fport && 1023 inp->inp_lport == lport) { 1024 /* 1025 * Found. 1026 */ 1027 return (inp); 1028 } 1029 } 1030 if (wildcard) { 1031 struct inpcb *local_wild = NULL; 1032 #if defined(INET6) 1033 struct inpcb *local_wild_mapped = NULL; 1034 #endif /* defined(INET6) */ 1035 1036 head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)]; 1037 LIST_FOREACH(inp, head, inp_hash) { 1038 #ifdef INET6 1039 if ((inp->inp_vflag & INP_IPV4) == 0) 1040 continue; 1041 #endif 1042 if (inp->inp_faddr.s_addr == INADDR_ANY && 1043 inp->inp_lport == lport) { 1044 if (ifp && ifp->if_type == IFT_FAITH && 1045 (inp->inp_flags & INP_FAITH) == 0) 1046 continue; 1047 if (inp->inp_laddr.s_addr == laddr.s_addr) 1048 return (inp); 1049 else if (inp->inp_laddr.s_addr == INADDR_ANY) { 1050 #if defined(INET6) 1051 if (INP_CHECK_SOCKAF(inp->inp_socket, 1052 AF_INET6)) 1053 local_wild_mapped = inp; 1054 else 1055 #endif /* defined(INET6) */ 1056 local_wild = inp; 1057 } 1058 } 1059 } 1060 #if defined(INET6) 1061 if (local_wild == NULL) 1062 return (local_wild_mapped); 1063 #endif /* defined(INET6) */ 1064 return (local_wild); 1065 } 1066 1067 /* 1068 * Not found. 1069 */ 1070 return (NULL); 1071 } 1072 1073 /* 1074 * Insert PCB onto various hash lists. 1075 */ 1076 int 1077 in_pcbinshash(inp) 1078 struct inpcb *inp; 1079 { 1080 struct inpcbhead *pcbhash; 1081 struct inpcbporthead *pcbporthash; 1082 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 1083 struct inpcbport *phd; 1084 u_int32_t hashkey_faddr; 1085 1086 #ifdef INET6 1087 if (inp->inp_vflag & INP_IPV6) 1088 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */; 1089 else 1090 #endif /* INET6 */ 1091 hashkey_faddr = inp->inp_faddr.s_addr; 1092 1093 pcbhash = &pcbinfo->hashbase[INP_PCBHASH(hashkey_faddr, 1094 inp->inp_lport, inp->inp_fport, pcbinfo->hashmask)]; 1095 1096 pcbporthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(inp->inp_lport, 1097 pcbinfo->porthashmask)]; 1098 1099 /* 1100 * Go through port list and look for a head for this lport. 1101 */ 1102 LIST_FOREACH(phd, pcbporthash, phd_hash) { 1103 if (phd->phd_port == inp->inp_lport) 1104 break; 1105 } 1106 /* 1107 * If none exists, malloc one and tack it on. 1108 */ 1109 if (phd == NULL) { 1110 MALLOC(phd, struct inpcbport *, sizeof(struct inpcbport), M_PCB, M_NOWAIT); 1111 if (phd == NULL) { 1112 return (ENOBUFS); /* XXX */ 1113 } 1114 phd->phd_port = inp->inp_lport; 1115 LIST_INIT(&phd->phd_pcblist); 1116 LIST_INSERT_HEAD(pcbporthash, phd, phd_hash); 1117 } 1118 inp->inp_phd = phd; 1119 LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist); 1120 LIST_INSERT_HEAD(pcbhash, inp, inp_hash); 1121 return (0); 1122 } 1123 1124 /* 1125 * Move PCB to the proper hash bucket when { faddr, fport } have been 1126 * changed. NOTE: This does not handle the case of the lport changing (the 1127 * hashed port list would have to be updated as well), so the lport must 1128 * not change after in_pcbinshash() has been called. 1129 */ 1130 void 1131 in_pcbrehash(inp) 1132 struct inpcb *inp; 1133 { 1134 struct inpcbhead *head; 1135 u_int32_t hashkey_faddr; 1136 1137 #ifdef INET6 1138 if (inp->inp_vflag & INP_IPV6) 1139 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */; 1140 else 1141 #endif /* INET6 */ 1142 hashkey_faddr = inp->inp_faddr.s_addr; 1143 1144 head = &inp->inp_pcbinfo->hashbase[INP_PCBHASH(hashkey_faddr, 1145 inp->inp_lport, inp->inp_fport, inp->inp_pcbinfo->hashmask)]; 1146 1147 LIST_REMOVE(inp, inp_hash); 1148 LIST_INSERT_HEAD(head, inp, inp_hash); 1149 } 1150 1151 /* 1152 * Remove PCB from various lists. 1153 */ 1154 void 1155 in_pcbremlists(inp) 1156 struct inpcb *inp; 1157 { 1158 inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt; 1159 if (inp->inp_lport) { 1160 struct inpcbport *phd = inp->inp_phd; 1161 1162 LIST_REMOVE(inp, inp_hash); 1163 LIST_REMOVE(inp, inp_portlist); 1164 if (LIST_FIRST(&phd->phd_pcblist) == NULL) { 1165 LIST_REMOVE(phd, phd_hash); 1166 free(phd, M_PCB); 1167 } 1168 } 1169 LIST_REMOVE(inp, inp_list); 1170 inp->inp_pcbinfo->ipi_count--; 1171 } 1172 1173 int 1174 prison_xinpcb(struct thread *td, struct inpcb *inp) 1175 { 1176 if (!jailed(td->td_ucred)) 1177 return (0); 1178 if (ntohl(inp->inp_laddr.s_addr) == prison_getip(td->td_ucred)) 1179 return (0); 1180 return (1); 1181 } 1182