1 /* 2 * Copyright (c) 1982, 1986, 1991, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95 34 * $FreeBSD$ 35 */ 36 37 #include "opt_ipsec.h" 38 #include "opt_inet6.h" 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/malloc.h> 43 #include <sys/mbuf.h> 44 #include <sys/domain.h> 45 #include <sys/protosw.h> 46 #include <sys/socket.h> 47 #include <sys/socketvar.h> 48 #include <sys/proc.h> 49 #include <sys/jail.h> 50 #include <sys/kernel.h> 51 #include <sys/sysctl.h> 52 53 #include <machine/limits.h> 54 55 #include <vm/uma.h> 56 57 #include <net/if.h> 58 #include <net/if_types.h> 59 #include <net/route.h> 60 61 #include <netinet/in.h> 62 #include <netinet/in_pcb.h> 63 #include <netinet/in_var.h> 64 #include <netinet/ip_var.h> 65 #ifdef INET6 66 #include <netinet/ip6.h> 67 #include <netinet6/ip6_var.h> 68 #endif /* INET6 */ 69 70 #ifdef IPSEC 71 #include <netinet6/ipsec.h> 72 #include <netkey/key.h> 73 #endif /* IPSEC */ 74 75 #ifdef FAST_IPSEC 76 #if defined(IPSEC) || defined(IPSEC_ESP) 77 #error "Bad idea: don't compile with both IPSEC and FAST_IPSEC!" 78 #endif 79 80 #include <netipsec/ipsec.h> 81 #include <netipsec/key.h> 82 #define IPSEC 83 #endif /* FAST_IPSEC */ 84 85 struct in_addr zeroin_addr; 86 87 /* 88 * These configure the range of local port addresses assigned to 89 * "unspecified" outgoing connections/packets/whatever. 90 */ 91 int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */ 92 int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */ 93 int ipport_firstauto = IPPORT_HIFIRSTAUTO; /* 49152 */ 94 int ipport_lastauto = IPPORT_HILASTAUTO; /* 65535 */ 95 int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */ 96 int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */ 97 98 #define RANGECHK(var, min, max) \ 99 if ((var) < (min)) { (var) = (min); } \ 100 else if ((var) > (max)) { (var) = (max); } 101 102 static int 103 sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS) 104 { 105 int error = sysctl_handle_int(oidp, 106 oidp->oid_arg1, oidp->oid_arg2, req); 107 if (!error) { 108 RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1); 109 RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1); 110 RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX); 111 RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX); 112 RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX); 113 RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX); 114 } 115 return error; 116 } 117 118 #undef RANGECHK 119 120 SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0, "IP Ports"); 121 122 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, CTLTYPE_INT|CTLFLAG_RW, 123 &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", ""); 124 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, CTLTYPE_INT|CTLFLAG_RW, 125 &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", ""); 126 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, CTLTYPE_INT|CTLFLAG_RW, 127 &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", ""); 128 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, CTLTYPE_INT|CTLFLAG_RW, 129 &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", ""); 130 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLTYPE_INT|CTLFLAG_RW, 131 &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", ""); 132 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW, 133 &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", ""); 134 135 /* 136 * in_pcb.c: manage the Protocol Control Blocks. 137 * 138 * NOTE: It is assumed that most of these functions will be called at 139 * splnet(). XXX - There are, unfortunately, a few exceptions to this 140 * rule that should be fixed. 141 */ 142 143 /* 144 * Allocate a PCB and associate it with the socket. 145 */ 146 int 147 in_pcballoc(so, pcbinfo, td) 148 struct socket *so; 149 struct inpcbinfo *pcbinfo; 150 struct thread *td; 151 { 152 register struct inpcb *inp; 153 #ifdef IPSEC 154 int error; 155 #endif 156 157 inp = uma_zalloc(pcbinfo->ipi_zone, M_NOWAIT); 158 if (inp == NULL) 159 return (ENOBUFS); 160 bzero((caddr_t)inp, sizeof(*inp)); 161 inp->inp_gencnt = ++pcbinfo->ipi_gencnt; 162 inp->inp_pcbinfo = pcbinfo; 163 inp->inp_socket = so; 164 #ifdef IPSEC 165 error = ipsec_init_policy(so, &inp->inp_sp); 166 if (error != 0) { 167 uma_zfree(pcbinfo->ipi_zone, inp); 168 return error; 169 } 170 #endif /*IPSEC*/ 171 #if defined(INET6) 172 if (INP_SOCKAF(so) == AF_INET6 && ip6_v6only) 173 inp->inp_flags |= IN6P_IPV6_V6ONLY; 174 #endif 175 LIST_INSERT_HEAD(pcbinfo->listhead, inp, inp_list); 176 pcbinfo->ipi_count++; 177 so->so_pcb = (caddr_t)inp; 178 INP_LOCK_INIT(inp, "inp"); 179 #ifdef INET6 180 if (ip6_auto_flowlabel) 181 inp->inp_flags |= IN6P_AUTOFLOWLABEL; 182 #endif 183 return (0); 184 } 185 186 int 187 in_pcbbind(inp, nam, td) 188 register struct inpcb *inp; 189 struct sockaddr *nam; 190 struct thread *td; 191 { 192 int anonport, error; 193 194 if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY) 195 return (EINVAL); 196 anonport = inp->inp_lport == 0 && (nam == NULL || 197 ((struct sockaddr_in *)nam)->sin_port == 0); 198 error = in_pcbbind_setup(inp, nam, &inp->inp_laddr.s_addr, 199 &inp->inp_lport, td); 200 if (error) 201 return (error); 202 if (in_pcbinshash(inp) != 0) { 203 inp->inp_laddr.s_addr = INADDR_ANY; 204 inp->inp_lport = 0; 205 return (EAGAIN); 206 } 207 if (anonport) 208 inp->inp_flags |= INP_ANONPORT; 209 return (0); 210 } 211 212 /* 213 * Set up a bind operation on a PCB, performing port allocation 214 * as required, but do not actually modify the PCB. Callers can 215 * either complete the bind by setting inp_laddr/inp_lport and 216 * calling in_pcbinshash(), or they can just use the resulting 217 * port and address to authorise the sending of a once-off packet. 218 * 219 * On error, the values of *laddrp and *lportp are not changed. 220 */ 221 int 222 in_pcbbind_setup(inp, nam, laddrp, lportp, td) 223 struct inpcb *inp; 224 struct sockaddr *nam; 225 in_addr_t *laddrp; 226 u_short *lportp; 227 struct thread *td; 228 { 229 struct socket *so = inp->inp_socket; 230 unsigned short *lastport; 231 struct sockaddr_in *sin; 232 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 233 struct in_addr laddr; 234 u_short lport = 0; 235 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); 236 int error, prison = 0; 237 238 if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */ 239 return (EADDRNOTAVAIL); 240 laddr.s_addr = *laddrp; 241 if (nam != NULL && laddr.s_addr != INADDR_ANY) 242 return (EINVAL); 243 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) 244 wild = 1; 245 if (nam) { 246 sin = (struct sockaddr_in *)nam; 247 if (nam->sa_len != sizeof (*sin)) 248 return (EINVAL); 249 #ifdef notdef 250 /* 251 * We should check the family, but old programs 252 * incorrectly fail to initialize it. 253 */ 254 if (sin->sin_family != AF_INET) 255 return (EAFNOSUPPORT); 256 #endif 257 if (sin->sin_addr.s_addr != INADDR_ANY) 258 if (prison_ip(td->td_ucred, 0, &sin->sin_addr.s_addr)) 259 return(EINVAL); 260 if (sin->sin_port != *lportp) { 261 /* Don't allow the port to change. */ 262 if (*lportp != 0) 263 return (EINVAL); 264 lport = sin->sin_port; 265 } 266 /* NB: lport is left as 0 if the port isn't being changed. */ 267 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { 268 /* 269 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; 270 * allow complete duplication of binding if 271 * SO_REUSEPORT is set, or if SO_REUSEADDR is set 272 * and a multicast address is bound on both 273 * new and duplicated sockets. 274 */ 275 if (so->so_options & SO_REUSEADDR) 276 reuseport = SO_REUSEADDR|SO_REUSEPORT; 277 } else if (sin->sin_addr.s_addr != INADDR_ANY) { 278 sin->sin_port = 0; /* yech... */ 279 bzero(&sin->sin_zero, sizeof(sin->sin_zero)); 280 if (ifa_ifwithaddr((struct sockaddr *)sin) == 0) 281 return (EADDRNOTAVAIL); 282 } 283 laddr = sin->sin_addr; 284 if (lport) { 285 struct inpcb *t; 286 /* GROSS */ 287 if (ntohs(lport) < IPPORT_RESERVED && td && 288 suser_cred(td->td_ucred, PRISON_ROOT)) 289 return (EACCES); 290 if (td && jailed(td->td_ucred)) 291 prison = 1; 292 if (so->so_cred->cr_uid != 0 && 293 !IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { 294 t = in_pcblookup_local(inp->inp_pcbinfo, 295 sin->sin_addr, lport, 296 prison ? 0 : INPLOOKUP_WILDCARD); 297 if (t && 298 (ntohl(sin->sin_addr.s_addr) != INADDR_ANY || 299 ntohl(t->inp_laddr.s_addr) != INADDR_ANY || 300 (t->inp_socket->so_options & 301 SO_REUSEPORT) == 0) && 302 (so->so_cred->cr_uid != 303 t->inp_socket->so_cred->cr_uid)) { 304 #if defined(INET6) 305 if (ntohl(sin->sin_addr.s_addr) != 306 INADDR_ANY || 307 ntohl(t->inp_laddr.s_addr) != 308 INADDR_ANY || 309 INP_SOCKAF(so) == 310 INP_SOCKAF(t->inp_socket)) 311 #endif /* defined(INET6) */ 312 return (EADDRINUSE); 313 } 314 } 315 if (prison && 316 prison_ip(td->td_ucred, 0, &sin->sin_addr.s_addr)) 317 return (EADDRNOTAVAIL); 318 t = in_pcblookup_local(pcbinfo, sin->sin_addr, 319 lport, prison ? 0 : wild); 320 if (t && 321 (reuseport & t->inp_socket->so_options) == 0) { 322 #if defined(INET6) 323 if (ntohl(sin->sin_addr.s_addr) != 324 INADDR_ANY || 325 ntohl(t->inp_laddr.s_addr) != 326 INADDR_ANY || 327 INP_SOCKAF(so) == 328 INP_SOCKAF(t->inp_socket)) 329 #endif /* defined(INET6) */ 330 return (EADDRINUSE); 331 } 332 } 333 } 334 if (*lportp != 0) 335 lport = *lportp; 336 if (lport == 0) { 337 ushort first, last; 338 int count; 339 340 if (laddr.s_addr != INADDR_ANY) 341 if (prison_ip(td->td_ucred, 0, &laddr.s_addr)) 342 return (EINVAL); 343 344 if (inp->inp_flags & INP_HIGHPORT) { 345 first = ipport_hifirstauto; /* sysctl */ 346 last = ipport_hilastauto; 347 lastport = &pcbinfo->lasthi; 348 } else if (inp->inp_flags & INP_LOWPORT) { 349 if (td && (error = suser_cred(td->td_ucred, 350 PRISON_ROOT)) != 0) 351 return error; 352 first = ipport_lowfirstauto; /* 1023 */ 353 last = ipport_lowlastauto; /* 600 */ 354 lastport = &pcbinfo->lastlow; 355 } else { 356 first = ipport_firstauto; /* sysctl */ 357 last = ipport_lastauto; 358 lastport = &pcbinfo->lastport; 359 } 360 /* 361 * Simple check to ensure all ports are not used up causing 362 * a deadlock here. 363 * 364 * We split the two cases (up and down) so that the direction 365 * is not being tested on each round of the loop. 366 */ 367 if (first > last) { 368 /* 369 * counting down 370 */ 371 count = first - last; 372 373 do { 374 if (count-- < 0) /* completely used? */ 375 return (EADDRNOTAVAIL); 376 --*lastport; 377 if (*lastport > first || *lastport < last) 378 *lastport = first; 379 lport = htons(*lastport); 380 } while (in_pcblookup_local(pcbinfo, laddr, lport, 381 wild)); 382 } else { 383 /* 384 * counting up 385 */ 386 count = last - first; 387 388 do { 389 if (count-- < 0) /* completely used? */ 390 return (EADDRNOTAVAIL); 391 ++*lastport; 392 if (*lastport < first || *lastport > last) 393 *lastport = first; 394 lport = htons(*lastport); 395 } while (in_pcblookup_local(pcbinfo, laddr, lport, 396 wild)); 397 } 398 } 399 if (prison_ip(td->td_ucred, 0, &laddr.s_addr)) 400 return (EINVAL); 401 *laddrp = laddr.s_addr; 402 *lportp = lport; 403 return (0); 404 } 405 406 /* 407 * Connect from a socket to a specified address. 408 * Both address and port must be specified in argument sin. 409 * If don't have a local address for this socket yet, 410 * then pick one. 411 */ 412 int 413 in_pcbconnect(inp, nam, td) 414 register struct inpcb *inp; 415 struct sockaddr *nam; 416 struct thread *td; 417 { 418 u_short lport, fport; 419 in_addr_t laddr, faddr; 420 int anonport, error; 421 422 lport = inp->inp_lport; 423 laddr = inp->inp_laddr.s_addr; 424 anonport = (lport == 0); 425 error = in_pcbconnect_setup(inp, nam, &laddr, &lport, &faddr, &fport, 426 NULL, td); 427 if (error) 428 return (error); 429 430 /* Do the initial binding of the local address if required. */ 431 if (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0) { 432 inp->inp_lport = lport; 433 inp->inp_laddr.s_addr = laddr; 434 if (in_pcbinshash(inp) != 0) { 435 inp->inp_laddr.s_addr = INADDR_ANY; 436 inp->inp_lport = 0; 437 return (EAGAIN); 438 } 439 } 440 441 /* Commit the remaining changes. */ 442 inp->inp_lport = lport; 443 inp->inp_laddr.s_addr = laddr; 444 inp->inp_faddr.s_addr = faddr; 445 inp->inp_fport = fport; 446 in_pcbrehash(inp); 447 if (anonport) 448 inp->inp_flags |= INP_ANONPORT; 449 return (0); 450 } 451 452 /* 453 * Set up for a connect from a socket to the specified address. 454 * On entry, *laddrp and *lportp should contain the current local 455 * address and port for the PCB; these are updated to the values 456 * that should be placed in inp_laddr and inp_lport to complete 457 * the connect. 458 * 459 * On success, *faddrp and *fportp will be set to the remote address 460 * and port. These are not updated in the error case. 461 * 462 * If the operation fails because the connection already exists, 463 * *oinpp will be set to the PCB of that connection so that the 464 * caller can decide to override it. In all other cases, *oinpp 465 * is set to NULL. 466 */ 467 int 468 in_pcbconnect_setup(inp, nam, laddrp, lportp, faddrp, fportp, oinpp, td) 469 register struct inpcb *inp; 470 struct sockaddr *nam; 471 in_addr_t *laddrp; 472 u_short *lportp; 473 in_addr_t *faddrp; 474 u_short *fportp; 475 struct inpcb **oinpp; 476 struct thread *td; 477 { 478 struct sockaddr_in *sin = (struct sockaddr_in *)nam; 479 struct in_ifaddr *ia; 480 struct sockaddr_in sa; 481 struct ucred *cred; 482 struct inpcb *oinp; 483 struct in_addr laddr, faddr; 484 u_short lport, fport; 485 int error; 486 487 if (oinpp != NULL) 488 *oinpp = NULL; 489 if (nam->sa_len != sizeof (*sin)) 490 return (EINVAL); 491 if (sin->sin_family != AF_INET) 492 return (EAFNOSUPPORT); 493 if (sin->sin_port == 0) 494 return (EADDRNOTAVAIL); 495 laddr.s_addr = *laddrp; 496 lport = *lportp; 497 faddr = sin->sin_addr; 498 fport = sin->sin_port; 499 cred = inp->inp_socket->so_cred; 500 if (laddr.s_addr == INADDR_ANY && jailed(cred)) { 501 bzero(&sa, sizeof(sa)); 502 sa.sin_addr.s_addr = htonl(prison_getip(cred)); 503 sa.sin_len = sizeof(sa); 504 sa.sin_family = AF_INET; 505 error = in_pcbbind_setup(inp, (struct sockaddr *)&sa, 506 &laddr.s_addr, &lport, td); 507 if (error) 508 return (error); 509 } 510 511 if (!TAILQ_EMPTY(&in_ifaddrhead)) { 512 /* 513 * If the destination address is INADDR_ANY, 514 * use the primary local address. 515 * If the supplied address is INADDR_BROADCAST, 516 * and the primary interface supports broadcast, 517 * choose the broadcast address for that interface. 518 */ 519 if (faddr.s_addr == INADDR_ANY) 520 faddr = IA_SIN(TAILQ_FIRST(&in_ifaddrhead))->sin_addr; 521 else if (faddr.s_addr == (u_long)INADDR_BROADCAST && 522 (TAILQ_FIRST(&in_ifaddrhead)->ia_ifp->if_flags & 523 IFF_BROADCAST)) 524 faddr = satosin(&TAILQ_FIRST( 525 &in_ifaddrhead)->ia_broadaddr)->sin_addr; 526 } 527 if (laddr.s_addr == INADDR_ANY) { 528 register struct route *ro; 529 530 ia = (struct in_ifaddr *)0; 531 /* 532 * If route is known or can be allocated now, 533 * our src addr is taken from the i/f, else punt. 534 * Note that we should check the address family of the cached 535 * destination, in case of sharing the cache with IPv6. 536 */ 537 ro = &inp->inp_route; 538 if (ro->ro_rt && 539 (ro->ro_dst.sa_family != AF_INET || 540 satosin(&ro->ro_dst)->sin_addr.s_addr != faddr.s_addr || 541 inp->inp_socket->so_options & SO_DONTROUTE)) { 542 RTFREE(ro->ro_rt); 543 ro->ro_rt = (struct rtentry *)0; 544 } 545 if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0 && /*XXX*/ 546 (ro->ro_rt == (struct rtentry *)0 || 547 ro->ro_rt->rt_ifp == (struct ifnet *)0)) { 548 /* No route yet, so try to acquire one */ 549 bzero(&ro->ro_dst, sizeof(struct sockaddr_in)); 550 ro->ro_dst.sa_family = AF_INET; 551 ro->ro_dst.sa_len = sizeof(struct sockaddr_in); 552 ((struct sockaddr_in *)&ro->ro_dst)->sin_addr = faddr; 553 rtalloc(ro); 554 } 555 /* 556 * If we found a route, use the address 557 * corresponding to the outgoing interface 558 * unless it is the loopback (in case a route 559 * to our address on another net goes to loopback). 560 */ 561 if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) 562 ia = ifatoia(ro->ro_rt->rt_ifa); 563 if (ia == 0) { 564 bzero(&sa, sizeof(sa)); 565 sa.sin_addr = faddr; 566 sa.sin_len = sizeof(sa); 567 sa.sin_family = AF_INET; 568 569 ia = ifatoia(ifa_ifwithdstaddr(sintosa(&sa))); 570 if (ia == 0) 571 ia = ifatoia(ifa_ifwithnet(sintosa(&sa))); 572 if (ia == 0) 573 ia = TAILQ_FIRST(&in_ifaddrhead); 574 if (ia == 0) 575 return (EADDRNOTAVAIL); 576 } 577 /* 578 * If the destination address is multicast and an outgoing 579 * interface has been set as a multicast option, use the 580 * address of that interface as our source address. 581 */ 582 if (IN_MULTICAST(ntohl(faddr.s_addr)) && 583 inp->inp_moptions != NULL) { 584 struct ip_moptions *imo; 585 struct ifnet *ifp; 586 587 imo = inp->inp_moptions; 588 if (imo->imo_multicast_ifp != NULL) { 589 ifp = imo->imo_multicast_ifp; 590 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) 591 if (ia->ia_ifp == ifp) 592 break; 593 if (ia == 0) 594 return (EADDRNOTAVAIL); 595 } 596 } 597 laddr = ia->ia_addr.sin_addr; 598 } 599 600 oinp = in_pcblookup_hash(inp->inp_pcbinfo, faddr, fport, laddr, lport, 601 0, NULL); 602 if (oinp != NULL) { 603 if (oinpp != NULL) 604 *oinpp = oinp; 605 return (EADDRINUSE); 606 } 607 if (lport == 0) { 608 error = in_pcbbind_setup(inp, NULL, &laddr.s_addr, &lport, td); 609 if (error) 610 return (error); 611 } 612 *laddrp = laddr.s_addr; 613 *lportp = lport; 614 *faddrp = faddr.s_addr; 615 *fportp = fport; 616 return (0); 617 } 618 619 void 620 in_pcbdisconnect(inp) 621 struct inpcb *inp; 622 { 623 624 inp->inp_faddr.s_addr = INADDR_ANY; 625 inp->inp_fport = 0; 626 in_pcbrehash(inp); 627 if (inp->inp_socket->so_state & SS_NOFDREF) 628 in_pcbdetach(inp); 629 } 630 631 void 632 in_pcbdetach(inp) 633 struct inpcb *inp; 634 { 635 struct socket *so = inp->inp_socket; 636 struct inpcbinfo *ipi = inp->inp_pcbinfo; 637 638 #ifdef IPSEC 639 ipsec4_delete_pcbpolicy(inp); 640 #endif /*IPSEC*/ 641 inp->inp_gencnt = ++ipi->ipi_gencnt; 642 in_pcbremlists(inp); 643 so->so_pcb = 0; 644 sotryfree(so); 645 if (inp->inp_options) 646 (void)m_free(inp->inp_options); 647 if (inp->inp_route.ro_rt) 648 rtfree(inp->inp_route.ro_rt); 649 ip_freemoptions(inp->inp_moptions); 650 inp->inp_vflag = 0; 651 INP_LOCK_DESTROY(inp); 652 uma_zfree(ipi->ipi_zone, inp); 653 } 654 655 struct sockaddr * 656 in_sockaddr(port, addr_p) 657 in_port_t port; 658 struct in_addr *addr_p; 659 { 660 struct sockaddr_in *sin; 661 662 MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, 663 M_WAITOK | M_ZERO); 664 sin->sin_family = AF_INET; 665 sin->sin_len = sizeof(*sin); 666 sin->sin_addr = *addr_p; 667 sin->sin_port = port; 668 669 return (struct sockaddr *)sin; 670 } 671 672 /* 673 * The wrapper function will pass down the pcbinfo for this function to lock. 674 * The socket must have a valid 675 * (i.e., non-nil) PCB, but it should be impossible to get an invalid one 676 * except through a kernel programming error, so it is acceptable to panic 677 * (or in this case trap) if the PCB is invalid. (Actually, we don't trap 678 * because there actually /is/ a programming error somewhere... XXX) 679 */ 680 int 681 in_setsockaddr(so, nam, pcbinfo) 682 struct socket *so; 683 struct sockaddr **nam; 684 struct inpcbinfo *pcbinfo; 685 { 686 int s; 687 register struct inpcb *inp; 688 struct in_addr addr; 689 in_port_t port; 690 691 s = splnet(); 692 INP_INFO_RLOCK(pcbinfo); 693 inp = sotoinpcb(so); 694 if (!inp) { 695 INP_INFO_RUNLOCK(pcbinfo); 696 splx(s); 697 return ECONNRESET; 698 } 699 INP_LOCK(inp); 700 port = inp->inp_lport; 701 addr = inp->inp_laddr; 702 INP_UNLOCK(inp); 703 INP_INFO_RUNLOCK(pcbinfo); 704 splx(s); 705 706 *nam = in_sockaddr(port, &addr); 707 return 0; 708 } 709 710 /* 711 * The wrapper function will pass down the pcbinfo for this function to lock. 712 */ 713 int 714 in_setpeeraddr(so, nam, pcbinfo) 715 struct socket *so; 716 struct sockaddr **nam; 717 struct inpcbinfo *pcbinfo; 718 { 719 int s; 720 register struct inpcb *inp; 721 struct in_addr addr; 722 in_port_t port; 723 724 s = splnet(); 725 INP_INFO_RLOCK(pcbinfo); 726 inp = sotoinpcb(so); 727 if (!inp) { 728 INP_INFO_RUNLOCK(pcbinfo); 729 splx(s); 730 return ECONNRESET; 731 } 732 INP_LOCK(inp); 733 port = inp->inp_fport; 734 addr = inp->inp_faddr; 735 INP_UNLOCK(inp); 736 INP_INFO_RUNLOCK(pcbinfo); 737 splx(s); 738 739 *nam = in_sockaddr(port, &addr); 740 return 0; 741 } 742 743 void 744 in_pcbnotifyall(pcbinfo, faddr, errno, notify) 745 struct inpcbinfo *pcbinfo; 746 struct in_addr faddr; 747 int errno; 748 struct inpcb *(*notify)(struct inpcb *, int); 749 { 750 struct inpcb *inp, *ninp; 751 struct inpcbhead *head; 752 int s; 753 754 s = splnet(); 755 INP_INFO_RLOCK(pcbinfo); 756 head = pcbinfo->listhead; 757 for (inp = LIST_FIRST(head); inp != NULL; inp = ninp) { 758 INP_LOCK(inp); 759 ninp = LIST_NEXT(inp, inp_list); 760 #ifdef INET6 761 if ((inp->inp_vflag & INP_IPV4) == 0) { 762 INP_UNLOCK(inp); 763 continue; 764 } 765 #endif 766 if (inp->inp_faddr.s_addr != faddr.s_addr || 767 inp->inp_socket == NULL) { 768 INP_UNLOCK(inp); 769 continue; 770 } 771 (*notify)(inp, errno); 772 INP_UNLOCK(inp); 773 } 774 INP_INFO_RUNLOCK(pcbinfo); 775 splx(s); 776 } 777 778 void 779 in_pcbpurgeif0(pcbinfo, ifp) 780 struct inpcbinfo *pcbinfo; 781 struct ifnet *ifp; 782 { 783 struct inpcb *inp; 784 struct ip_moptions *imo; 785 int i, gap; 786 787 /* why no splnet here? XXX */ 788 INP_INFO_RLOCK(pcbinfo); 789 LIST_FOREACH(inp, pcbinfo->listhead, inp_list) { 790 INP_LOCK(inp); 791 imo = inp->inp_moptions; 792 if ((inp->inp_vflag & INP_IPV4) && 793 imo != NULL) { 794 /* 795 * Unselect the outgoing interface if it is being 796 * detached. 797 */ 798 if (imo->imo_multicast_ifp == ifp) 799 imo->imo_multicast_ifp = NULL; 800 801 /* 802 * Drop multicast group membership if we joined 803 * through the interface being detached. 804 */ 805 for (i = 0, gap = 0; i < imo->imo_num_memberships; 806 i++) { 807 if (imo->imo_membership[i]->inm_ifp == ifp) { 808 in_delmulti(imo->imo_membership[i]); 809 gap++; 810 } else if (gap != 0) 811 imo->imo_membership[i - gap] = 812 imo->imo_membership[i]; 813 } 814 imo->imo_num_memberships -= gap; 815 } 816 INP_UNLOCK(inp); 817 } 818 INP_INFO_RUNLOCK(pcbinfo); 819 } 820 821 /* 822 * Check for alternatives when higher level complains 823 * about service problems. For now, invalidate cached 824 * routing information. If the route was created dynamically 825 * (by a redirect), time to try a default gateway again. 826 */ 827 void 828 in_losing(inp) 829 struct inpcb *inp; 830 { 831 register struct rtentry *rt; 832 struct rt_addrinfo info; 833 834 if ((rt = inp->inp_route.ro_rt)) { 835 bzero((caddr_t)&info, sizeof(info)); 836 info.rti_flags = rt->rt_flags; 837 info.rti_info[RTAX_DST] = rt_key(rt); 838 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 839 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 840 rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0); 841 if (rt->rt_flags & RTF_DYNAMIC) 842 (void) rtrequest1(RTM_DELETE, &info, NULL); 843 inp->inp_route.ro_rt = NULL; 844 rtfree(rt); 845 /* 846 * A new route can be allocated 847 * the next time output is attempted. 848 */ 849 } 850 } 851 852 /* 853 * After a routing change, flush old routing 854 * and allocate a (hopefully) better one. 855 */ 856 struct inpcb * 857 in_rtchange(inp, errno) 858 register struct inpcb *inp; 859 int errno; 860 { 861 if (inp->inp_route.ro_rt) { 862 rtfree(inp->inp_route.ro_rt); 863 inp->inp_route.ro_rt = 0; 864 /* 865 * A new route can be allocated the next time 866 * output is attempted. 867 */ 868 } 869 return inp; 870 } 871 872 /* 873 * Lookup a PCB based on the local address and port. 874 */ 875 struct inpcb * 876 in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay) 877 struct inpcbinfo *pcbinfo; 878 struct in_addr laddr; 879 u_int lport_arg; 880 int wild_okay; 881 { 882 register struct inpcb *inp; 883 int matchwild = 3, wildcard; 884 u_short lport = lport_arg; 885 886 if (!wild_okay) { 887 struct inpcbhead *head; 888 /* 889 * Look for an unconnected (wildcard foreign addr) PCB that 890 * matches the local address and port we're looking for. 891 */ 892 head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)]; 893 LIST_FOREACH(inp, head, inp_hash) { 894 #ifdef INET6 895 if ((inp->inp_vflag & INP_IPV4) == 0) 896 continue; 897 #endif 898 if (inp->inp_faddr.s_addr == INADDR_ANY && 899 inp->inp_laddr.s_addr == laddr.s_addr && 900 inp->inp_lport == lport) { 901 /* 902 * Found. 903 */ 904 return (inp); 905 } 906 } 907 /* 908 * Not found. 909 */ 910 return (NULL); 911 } else { 912 struct inpcbporthead *porthash; 913 struct inpcbport *phd; 914 struct inpcb *match = NULL; 915 /* 916 * Best fit PCB lookup. 917 * 918 * First see if this local port is in use by looking on the 919 * port hash list. 920 */ 921 porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport, 922 pcbinfo->porthashmask)]; 923 LIST_FOREACH(phd, porthash, phd_hash) { 924 if (phd->phd_port == lport) 925 break; 926 } 927 if (phd != NULL) { 928 /* 929 * Port is in use by one or more PCBs. Look for best 930 * fit. 931 */ 932 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { 933 wildcard = 0; 934 #ifdef INET6 935 if ((inp->inp_vflag & INP_IPV4) == 0) 936 continue; 937 #endif 938 if (inp->inp_faddr.s_addr != INADDR_ANY) 939 wildcard++; 940 if (inp->inp_laddr.s_addr != INADDR_ANY) { 941 if (laddr.s_addr == INADDR_ANY) 942 wildcard++; 943 else if (inp->inp_laddr.s_addr != laddr.s_addr) 944 continue; 945 } else { 946 if (laddr.s_addr != INADDR_ANY) 947 wildcard++; 948 } 949 if (wildcard < matchwild) { 950 match = inp; 951 matchwild = wildcard; 952 if (matchwild == 0) { 953 break; 954 } 955 } 956 } 957 } 958 return (match); 959 } 960 } 961 962 /* 963 * Lookup PCB in hash list. 964 */ 965 struct inpcb * 966 in_pcblookup_hash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard, 967 ifp) 968 struct inpcbinfo *pcbinfo; 969 struct in_addr faddr, laddr; 970 u_int fport_arg, lport_arg; 971 int wildcard; 972 struct ifnet *ifp; 973 { 974 struct inpcbhead *head; 975 register struct inpcb *inp; 976 u_short fport = fport_arg, lport = lport_arg; 977 978 /* 979 * First look for an exact match. 980 */ 981 head = &pcbinfo->hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, pcbinfo->hashmask)]; 982 LIST_FOREACH(inp, head, inp_hash) { 983 #ifdef INET6 984 if ((inp->inp_vflag & INP_IPV4) == 0) 985 continue; 986 #endif 987 if (inp->inp_faddr.s_addr == faddr.s_addr && 988 inp->inp_laddr.s_addr == laddr.s_addr && 989 inp->inp_fport == fport && 990 inp->inp_lport == lport) { 991 /* 992 * Found. 993 */ 994 return (inp); 995 } 996 } 997 if (wildcard) { 998 struct inpcb *local_wild = NULL; 999 #if defined(INET6) 1000 struct inpcb *local_wild_mapped = NULL; 1001 #endif /* defined(INET6) */ 1002 1003 head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)]; 1004 LIST_FOREACH(inp, head, inp_hash) { 1005 #ifdef INET6 1006 if ((inp->inp_vflag & INP_IPV4) == 0) 1007 continue; 1008 #endif 1009 if (inp->inp_faddr.s_addr == INADDR_ANY && 1010 inp->inp_lport == lport) { 1011 if (ifp && ifp->if_type == IFT_FAITH && 1012 (inp->inp_flags & INP_FAITH) == 0) 1013 continue; 1014 if (inp->inp_laddr.s_addr == laddr.s_addr) 1015 return (inp); 1016 else if (inp->inp_laddr.s_addr == INADDR_ANY) { 1017 #if defined(INET6) 1018 if (INP_CHECK_SOCKAF(inp->inp_socket, 1019 AF_INET6)) 1020 local_wild_mapped = inp; 1021 else 1022 #endif /* defined(INET6) */ 1023 local_wild = inp; 1024 } 1025 } 1026 } 1027 #if defined(INET6) 1028 if (local_wild == NULL) 1029 return (local_wild_mapped); 1030 #endif /* defined(INET6) */ 1031 return (local_wild); 1032 } 1033 1034 /* 1035 * Not found. 1036 */ 1037 return (NULL); 1038 } 1039 1040 /* 1041 * Insert PCB onto various hash lists. 1042 */ 1043 int 1044 in_pcbinshash(inp) 1045 struct inpcb *inp; 1046 { 1047 struct inpcbhead *pcbhash; 1048 struct inpcbporthead *pcbporthash; 1049 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 1050 struct inpcbport *phd; 1051 u_int32_t hashkey_faddr; 1052 1053 #ifdef INET6 1054 if (inp->inp_vflag & INP_IPV6) 1055 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */; 1056 else 1057 #endif /* INET6 */ 1058 hashkey_faddr = inp->inp_faddr.s_addr; 1059 1060 pcbhash = &pcbinfo->hashbase[INP_PCBHASH(hashkey_faddr, 1061 inp->inp_lport, inp->inp_fport, pcbinfo->hashmask)]; 1062 1063 pcbporthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(inp->inp_lport, 1064 pcbinfo->porthashmask)]; 1065 1066 /* 1067 * Go through port list and look for a head for this lport. 1068 */ 1069 LIST_FOREACH(phd, pcbporthash, phd_hash) { 1070 if (phd->phd_port == inp->inp_lport) 1071 break; 1072 } 1073 /* 1074 * If none exists, malloc one and tack it on. 1075 */ 1076 if (phd == NULL) { 1077 MALLOC(phd, struct inpcbport *, sizeof(struct inpcbport), M_PCB, M_NOWAIT); 1078 if (phd == NULL) { 1079 return (ENOBUFS); /* XXX */ 1080 } 1081 phd->phd_port = inp->inp_lport; 1082 LIST_INIT(&phd->phd_pcblist); 1083 LIST_INSERT_HEAD(pcbporthash, phd, phd_hash); 1084 } 1085 inp->inp_phd = phd; 1086 LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist); 1087 LIST_INSERT_HEAD(pcbhash, inp, inp_hash); 1088 return (0); 1089 } 1090 1091 /* 1092 * Move PCB to the proper hash bucket when { faddr, fport } have been 1093 * changed. NOTE: This does not handle the case of the lport changing (the 1094 * hashed port list would have to be updated as well), so the lport must 1095 * not change after in_pcbinshash() has been called. 1096 */ 1097 void 1098 in_pcbrehash(inp) 1099 struct inpcb *inp; 1100 { 1101 struct inpcbhead *head; 1102 u_int32_t hashkey_faddr; 1103 1104 #ifdef INET6 1105 if (inp->inp_vflag & INP_IPV6) 1106 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */; 1107 else 1108 #endif /* INET6 */ 1109 hashkey_faddr = inp->inp_faddr.s_addr; 1110 1111 head = &inp->inp_pcbinfo->hashbase[INP_PCBHASH(hashkey_faddr, 1112 inp->inp_lport, inp->inp_fport, inp->inp_pcbinfo->hashmask)]; 1113 1114 LIST_REMOVE(inp, inp_hash); 1115 LIST_INSERT_HEAD(head, inp, inp_hash); 1116 } 1117 1118 /* 1119 * Remove PCB from various lists. 1120 */ 1121 void 1122 in_pcbremlists(inp) 1123 struct inpcb *inp; 1124 { 1125 inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt; 1126 if (inp->inp_lport) { 1127 struct inpcbport *phd = inp->inp_phd; 1128 1129 LIST_REMOVE(inp, inp_hash); 1130 LIST_REMOVE(inp, inp_portlist); 1131 if (LIST_FIRST(&phd->phd_pcblist) == NULL) { 1132 LIST_REMOVE(phd, phd_hash); 1133 free(phd, M_PCB); 1134 } 1135 } 1136 LIST_REMOVE(inp, inp_list); 1137 inp->inp_pcbinfo->ipi_count--; 1138 } 1139 1140 int 1141 prison_xinpcb(struct thread *td, struct inpcb *inp) 1142 { 1143 if (!jailed(td->td_ucred)) 1144 return (0); 1145 if (ntohl(inp->inp_laddr.s_addr) == prison_getip(td->td_ucred)) 1146 return (0); 1147 return (1); 1148 } 1149