1 /* 2 * Copyright (c) 1982, 1986, 1991, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95 34 * $FreeBSD$ 35 */ 36 37 #include "opt_ipsec.h" 38 #include "opt_inet6.h" 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/malloc.h> 43 #include <sys/mbuf.h> 44 #include <sys/domain.h> 45 #include <sys/protosw.h> 46 #include <sys/socket.h> 47 #include <sys/socketvar.h> 48 #include <sys/proc.h> 49 #include <sys/jail.h> 50 #include <sys/kernel.h> 51 #include <sys/sysctl.h> 52 53 #include <machine/limits.h> 54 55 #include <vm/vm_zone.h> 56 57 #include <net/if.h> 58 #include <net/if_types.h> 59 #include <net/route.h> 60 61 #include <netinet/in.h> 62 #include <netinet/in_pcb.h> 63 #include <netinet/in_var.h> 64 #include <netinet/ip_var.h> 65 #include <netinet/tcp.h> 66 #include <netinet/tcp_var.h> 67 #ifdef INET6 68 #include <netinet/ip6.h> 69 #include <netinet6/ip6_var.h> 70 #endif /* INET6 */ 71 72 #include "faith.h" 73 74 #ifdef IPSEC 75 #include <netinet6/ipsec.h> 76 #include <netkey/key.h> 77 #endif /* IPSEC */ 78 79 struct in_addr zeroin_addr; 80 81 static void in_rtchange __P((struct inpcb *, int)); 82 83 /* 84 * These configure the range of local port addresses assigned to 85 * "unspecified" outgoing connections/packets/whatever. 86 */ 87 int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */ 88 int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */ 89 int ipport_firstauto = IPPORT_RESERVED; /* 1024 */ 90 int ipport_lastauto = IPPORT_USERRESERVED; /* 5000 */ 91 int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */ 92 int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */ 93 94 #define RANGECHK(var, min, max) \ 95 if ((var) < (min)) { (var) = (min); } \ 96 else if ((var) > (max)) { (var) = (max); } 97 98 static int 99 sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS) 100 { 101 int error = sysctl_handle_int(oidp, 102 oidp->oid_arg1, oidp->oid_arg2, req); 103 if (!error) { 104 RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1); 105 RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1); 106 RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX); 107 RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX); 108 RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX); 109 RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX); 110 } 111 return error; 112 } 113 114 #undef RANGECHK 115 116 SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0, "IP Ports"); 117 118 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, CTLTYPE_INT|CTLFLAG_RW, 119 &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", ""); 120 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, CTLTYPE_INT|CTLFLAG_RW, 121 &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", ""); 122 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, CTLTYPE_INT|CTLFLAG_RW, 123 &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", ""); 124 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, CTLTYPE_INT|CTLFLAG_RW, 125 &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", ""); 126 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLTYPE_INT|CTLFLAG_RW, 127 &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", ""); 128 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW, 129 &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", ""); 130 131 /* 132 * in_pcb.c: manage the Protocol Control Blocks. 133 * 134 * NOTE: It is assumed that most of these functions will be called at 135 * splnet(). XXX - There are, unfortunately, a few exceptions to this 136 * rule that should be fixed. 137 */ 138 139 /* 140 * Allocate a PCB and associate it with the socket. 141 */ 142 int 143 in_pcballoc(so, pcbinfo, p) 144 struct socket *so; 145 struct inpcbinfo *pcbinfo; 146 struct proc *p; 147 { 148 register struct inpcb *inp; 149 150 inp = zalloc(pcbinfo->ipi_zone); 151 if (inp == NULL) 152 return (ENOBUFS); 153 bzero((caddr_t)inp, sizeof(*inp)); 154 inp->inp_gencnt = ++pcbinfo->ipi_gencnt; 155 inp->inp_pcbinfo = pcbinfo; 156 inp->inp_socket = so; 157 #if defined(INET6) 158 if (ip6_mapped_addr_on) 159 inp->inp_flags &= ~IN6P_BINDV6ONLY; 160 else 161 inp->inp_flags |= IN6P_BINDV6ONLY; 162 #endif 163 LIST_INSERT_HEAD(pcbinfo->listhead, inp, inp_list); 164 pcbinfo->ipi_count++; 165 so->so_pcb = (caddr_t)inp; 166 return (0); 167 } 168 169 int 170 in_pcbbind(inp, nam, p) 171 register struct inpcb *inp; 172 struct sockaddr *nam; 173 struct proc *p; 174 { 175 register struct socket *so = inp->inp_socket; 176 unsigned short *lastport; 177 struct sockaddr_in *sin; 178 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 179 u_short lport = 0; 180 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); 181 int error, prison = 0; 182 183 if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */ 184 return (EADDRNOTAVAIL); 185 if (inp->inp_lport || inp->inp_laddr.s_addr != INADDR_ANY) 186 return (EINVAL); 187 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) 188 wild = 1; 189 if (nam) { 190 sin = (struct sockaddr_in *)nam; 191 if (nam->sa_len != sizeof (*sin)) 192 return (EINVAL); 193 #ifdef notdef 194 /* 195 * We should check the family, but old programs 196 * incorrectly fail to initialize it. 197 */ 198 if (sin->sin_family != AF_INET) 199 return (EAFNOSUPPORT); 200 #endif 201 if (sin->sin_addr.s_addr != INADDR_ANY) 202 if (prison_ip(p, 0, &sin->sin_addr.s_addr)) 203 return(EINVAL); 204 lport = sin->sin_port; 205 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { 206 /* 207 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; 208 * allow complete duplication of binding if 209 * SO_REUSEPORT is set, or if SO_REUSEADDR is set 210 * and a multicast address is bound on both 211 * new and duplicated sockets. 212 */ 213 if (so->so_options & SO_REUSEADDR) 214 reuseport = SO_REUSEADDR|SO_REUSEPORT; 215 } else if (sin->sin_addr.s_addr != INADDR_ANY) { 216 sin->sin_port = 0; /* yech... */ 217 if (ifa_ifwithaddr((struct sockaddr *)sin) == 0) 218 return (EADDRNOTAVAIL); 219 } 220 if (lport) { 221 struct inpcb *t; 222 223 /* GROSS */ 224 if (ntohs(lport) < IPPORT_RESERVED && p && 225 suser_xxx(0, p, PRISON_ROOT)) 226 return (EACCES); 227 if (p && p->p_prison) 228 prison = 1; 229 if (so->so_cred->cr_uid != 0 && 230 !IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { 231 t = in_pcblookup_local(inp->inp_pcbinfo, 232 sin->sin_addr, lport, 233 prison ? 0 : INPLOOKUP_WILDCARD); 234 if (t && 235 (ntohl(sin->sin_addr.s_addr) != INADDR_ANY || 236 ntohl(t->inp_laddr.s_addr) != INADDR_ANY || 237 (t->inp_socket->so_options & 238 SO_REUSEPORT) == 0) && 239 (so->so_cred->cr_uid != 240 t->inp_socket->so_cred->cr_uid)) { 241 #if defined(INET6) 242 if ((inp->inp_flags & 243 IN6P_BINDV6ONLY) != 0 || 244 ntohl(sin->sin_addr.s_addr) != 245 INADDR_ANY || 246 ntohl(t->inp_laddr.s_addr) != 247 INADDR_ANY || 248 INP_SOCKAF(so) == 249 INP_SOCKAF(t->inp_socket)) 250 #endif /* defined(INET6) */ 251 return (EADDRINUSE); 252 } 253 } 254 t = in_pcblookup_local(pcbinfo, sin->sin_addr, 255 lport, prison ? 0 : wild); 256 if (t && 257 (reuseport & t->inp_socket->so_options) == 0) { 258 #if defined(INET6) 259 if ((inp->inp_flags & IN6P_BINDV6ONLY) != 0 || 260 ntohl(sin->sin_addr.s_addr) != 261 INADDR_ANY || 262 ntohl(t->inp_laddr.s_addr) != 263 INADDR_ANY || 264 INP_SOCKAF(so) == 265 INP_SOCKAF(t->inp_socket)) 266 #endif /* defined(INET6) */ 267 return (EADDRINUSE); 268 } 269 } 270 inp->inp_laddr = sin->sin_addr; 271 } 272 if (lport == 0) { 273 ushort first, last; 274 int count; 275 276 if (inp->inp_laddr.s_addr != INADDR_ANY) 277 if (prison_ip(p, 0, &inp->inp_laddr.s_addr )) 278 return (EINVAL); 279 inp->inp_flags |= INP_ANONPORT; 280 281 if (inp->inp_flags & INP_HIGHPORT) { 282 first = ipport_hifirstauto; /* sysctl */ 283 last = ipport_hilastauto; 284 lastport = &pcbinfo->lasthi; 285 } else if (inp->inp_flags & INP_LOWPORT) { 286 if (p && (error = suser_xxx(0, p, PRISON_ROOT))) 287 return error; 288 first = ipport_lowfirstauto; /* 1023 */ 289 last = ipport_lowlastauto; /* 600 */ 290 lastport = &pcbinfo->lastlow; 291 } else { 292 first = ipport_firstauto; /* sysctl */ 293 last = ipport_lastauto; 294 lastport = &pcbinfo->lastport; 295 } 296 /* 297 * Simple check to ensure all ports are not used up causing 298 * a deadlock here. 299 * 300 * We split the two cases (up and down) so that the direction 301 * is not being tested on each round of the loop. 302 */ 303 if (first > last) { 304 /* 305 * counting down 306 */ 307 count = first - last; 308 309 do { 310 if (count-- < 0) { /* completely used? */ 311 /* 312 * Undo any address bind that may have 313 * occurred above. 314 */ 315 inp->inp_laddr.s_addr = INADDR_ANY; 316 return (EADDRNOTAVAIL); 317 } 318 --*lastport; 319 if (*lastport > first || *lastport < last) 320 *lastport = first; 321 lport = htons(*lastport); 322 } while (in_pcblookup_local(pcbinfo, 323 inp->inp_laddr, lport, wild)); 324 } else { 325 /* 326 * counting up 327 */ 328 count = last - first; 329 330 do { 331 if (count-- < 0) { /* completely used? */ 332 /* 333 * Undo any address bind that may have 334 * occurred above. 335 */ 336 inp->inp_laddr.s_addr = INADDR_ANY; 337 return (EADDRNOTAVAIL); 338 } 339 ++*lastport; 340 if (*lastport < first || *lastport > last) 341 *lastport = first; 342 lport = htons(*lastport); 343 } while (in_pcblookup_local(pcbinfo, 344 inp->inp_laddr, lport, wild)); 345 } 346 } 347 inp->inp_lport = lport; 348 if (prison_ip(p, 0, &inp->inp_laddr.s_addr)) 349 return(EINVAL); 350 if (in_pcbinshash(inp) != 0) { 351 inp->inp_laddr.s_addr = INADDR_ANY; 352 inp->inp_lport = 0; 353 return (EAGAIN); 354 } 355 return (0); 356 } 357 358 /* 359 * Transform old in_pcbconnect() into an inner subroutine for new 360 * in_pcbconnect(): Do some validity-checking on the remote 361 * address (in mbuf 'nam') and then determine local host address 362 * (i.e., which interface) to use to access that remote host. 363 * 364 * This preserves definition of in_pcbconnect(), while supporting a 365 * slightly different version for T/TCP. (This is more than 366 * a bit of a kludge, but cleaning up the internal interfaces would 367 * have forced minor changes in every protocol). 368 */ 369 370 int 371 in_pcbladdr(inp, nam, plocal_sin) 372 register struct inpcb *inp; 373 struct sockaddr *nam; 374 struct sockaddr_in **plocal_sin; 375 { 376 struct in_ifaddr *ia; 377 register struct sockaddr_in *sin = (struct sockaddr_in *)nam; 378 379 if (nam->sa_len != sizeof (*sin)) 380 return (EINVAL); 381 if (sin->sin_family != AF_INET) 382 return (EAFNOSUPPORT); 383 if (sin->sin_port == 0) 384 return (EADDRNOTAVAIL); 385 if (!TAILQ_EMPTY(&in_ifaddrhead)) { 386 /* 387 * If the destination address is INADDR_ANY, 388 * use the primary local address. 389 * If the supplied address is INADDR_BROADCAST, 390 * and the primary interface supports broadcast, 391 * choose the broadcast address for that interface. 392 */ 393 #define satosin(sa) ((struct sockaddr_in *)(sa)) 394 #define sintosa(sin) ((struct sockaddr *)(sin)) 395 #define ifatoia(ifa) ((struct in_ifaddr *)(ifa)) 396 if (sin->sin_addr.s_addr == INADDR_ANY) 397 sin->sin_addr = IA_SIN(TAILQ_FIRST(&in_ifaddrhead))->sin_addr; 398 else if (sin->sin_addr.s_addr == (u_long)INADDR_BROADCAST && 399 (TAILQ_FIRST(&in_ifaddrhead)->ia_ifp->if_flags & IFF_BROADCAST)) 400 sin->sin_addr = satosin(&TAILQ_FIRST(&in_ifaddrhead)->ia_broadaddr)->sin_addr; 401 } 402 if (inp->inp_laddr.s_addr == INADDR_ANY) { 403 register struct route *ro; 404 405 ia = (struct in_ifaddr *)0; 406 /* 407 * If route is known or can be allocated now, 408 * our src addr is taken from the i/f, else punt. 409 */ 410 ro = &inp->inp_route; 411 if (ro->ro_rt && 412 (satosin(&ro->ro_dst)->sin_addr.s_addr != 413 sin->sin_addr.s_addr || 414 inp->inp_socket->so_options & SO_DONTROUTE)) { 415 RTFREE(ro->ro_rt); 416 ro->ro_rt = (struct rtentry *)0; 417 } 418 if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0 && /*XXX*/ 419 (ro->ro_rt == (struct rtentry *)0 || 420 ro->ro_rt->rt_ifp == (struct ifnet *)0)) { 421 /* No route yet, so try to acquire one */ 422 ro->ro_dst.sa_family = AF_INET; 423 ro->ro_dst.sa_len = sizeof(struct sockaddr_in); 424 ((struct sockaddr_in *) &ro->ro_dst)->sin_addr = 425 sin->sin_addr; 426 rtalloc(ro); 427 } 428 /* 429 * If we found a route, use the address 430 * corresponding to the outgoing interface 431 * unless it is the loopback (in case a route 432 * to our address on another net goes to loopback). 433 */ 434 if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) 435 ia = ifatoia(ro->ro_rt->rt_ifa); 436 if (ia == 0) { 437 u_short fport = sin->sin_port; 438 439 sin->sin_port = 0; 440 ia = ifatoia(ifa_ifwithdstaddr(sintosa(sin))); 441 if (ia == 0) 442 ia = ifatoia(ifa_ifwithnet(sintosa(sin))); 443 sin->sin_port = fport; 444 if (ia == 0) 445 ia = TAILQ_FIRST(&in_ifaddrhead); 446 if (ia == 0) 447 return (EADDRNOTAVAIL); 448 } 449 /* 450 * If the destination address is multicast and an outgoing 451 * interface has been set as a multicast option, use the 452 * address of that interface as our source address. 453 */ 454 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) && 455 inp->inp_moptions != NULL) { 456 struct ip_moptions *imo; 457 struct ifnet *ifp; 458 459 imo = inp->inp_moptions; 460 if (imo->imo_multicast_ifp != NULL) { 461 ifp = imo->imo_multicast_ifp; 462 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) 463 if (ia->ia_ifp == ifp) 464 break; 465 if (ia == 0) 466 return (EADDRNOTAVAIL); 467 } 468 } 469 /* 470 * Don't do pcblookup call here; return interface in plocal_sin 471 * and exit to caller, that will do the lookup. 472 */ 473 *plocal_sin = &ia->ia_addr; 474 475 } 476 return(0); 477 } 478 479 /* 480 * Outer subroutine: 481 * Connect from a socket to a specified address. 482 * Both address and port must be specified in argument sin. 483 * If don't have a local address for this socket yet, 484 * then pick one. 485 */ 486 int 487 in_pcbconnect(inp, nam, p) 488 register struct inpcb *inp; 489 struct sockaddr *nam; 490 struct proc *p; 491 { 492 struct sockaddr_in *ifaddr; 493 struct sockaddr_in *sin = (struct sockaddr_in *)nam; 494 struct sockaddr_in sa; 495 int error; 496 497 if (inp->inp_laddr.s_addr == INADDR_ANY && p->p_prison != NULL) { 498 bzero(&sa, sizeof (sa)); 499 sa.sin_addr.s_addr = htonl(p->p_prison->pr_ip); 500 sa.sin_len=sizeof (sa); 501 sa.sin_family = AF_INET; 502 error = in_pcbbind(inp, (struct sockaddr *)&sa, p); 503 if (error) 504 return (error); 505 } 506 /* 507 * Call inner routine, to assign local interface address. 508 */ 509 if ((error = in_pcbladdr(inp, nam, &ifaddr)) != 0) 510 return(error); 511 512 if (in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port, 513 inp->inp_laddr.s_addr ? inp->inp_laddr : ifaddr->sin_addr, 514 inp->inp_lport, 0, NULL) != NULL) { 515 return (EADDRINUSE); 516 } 517 if (inp->inp_laddr.s_addr == INADDR_ANY) { 518 if (inp->inp_lport == 0) { 519 error = in_pcbbind(inp, (struct sockaddr *)0, p); 520 if (error) 521 return (error); 522 } 523 inp->inp_laddr = ifaddr->sin_addr; 524 } 525 inp->inp_faddr = sin->sin_addr; 526 inp->inp_fport = sin->sin_port; 527 in_pcbrehash(inp); 528 return (0); 529 } 530 531 void 532 in_pcbdisconnect(inp) 533 struct inpcb *inp; 534 { 535 536 inp->inp_faddr.s_addr = INADDR_ANY; 537 inp->inp_fport = 0; 538 in_pcbrehash(inp); 539 if (inp->inp_socket->so_state & SS_NOFDREF) 540 in_pcbdetach(inp); 541 } 542 543 void 544 in_pcbdetach(inp) 545 struct inpcb *inp; 546 { 547 struct socket *so = inp->inp_socket; 548 struct inpcbinfo *ipi = inp->inp_pcbinfo; 549 struct rtentry *rt = inp->inp_route.ro_rt; 550 551 #ifdef IPSEC 552 ipsec4_delete_pcbpolicy(inp); 553 #endif /*IPSEC*/ 554 inp->inp_gencnt = ++ipi->ipi_gencnt; 555 in_pcbremlists(inp); 556 so->so_pcb = 0; 557 sofree(so); 558 if (inp->inp_options) 559 (void)m_free(inp->inp_options); 560 if (rt) { 561 /* 562 * route deletion requires reference count to be <= zero 563 */ 564 if ((rt->rt_flags & RTF_DELCLONE) && 565 (rt->rt_flags & RTF_WASCLONED)) { 566 if (--rt->rt_refcnt <= 0) { 567 rt->rt_flags &= ~RTF_UP; 568 rtrequest(RTM_DELETE, rt_key(rt), 569 rt->rt_gateway, rt_mask(rt), 570 rt->rt_flags, (struct rtentry **)0); 571 } 572 else 573 /* 574 * more than one reference, bump it up 575 * again. 576 */ 577 rt->rt_refcnt++; 578 } 579 else 580 rtfree(rt); 581 } 582 ip_freemoptions(inp->inp_moptions); 583 inp->inp_vflag = 0; 584 zfree(ipi->ipi_zone, inp); 585 } 586 587 /* 588 * The calling convention of in_setsockaddr() and in_setpeeraddr() was 589 * modified to match the pru_sockaddr() and pru_peeraddr() entry points 590 * in struct pr_usrreqs, so that protocols can just reference then directly 591 * without the need for a wrapper function. The socket must have a valid 592 * (i.e., non-nil) PCB, but it should be impossible to get an invalid one 593 * except through a kernel programming error, so it is acceptable to panic 594 * (or in this case trap) if the PCB is invalid. (Actually, we don't trap 595 * because there actually /is/ a programming error somewhere... XXX) 596 */ 597 int 598 in_setsockaddr(so, nam) 599 struct socket *so; 600 struct sockaddr **nam; 601 { 602 int s; 603 register struct inpcb *inp; 604 register struct sockaddr_in *sin; 605 606 /* 607 * Do the malloc first in case it blocks. 608 */ 609 MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, 610 M_WAITOK | M_ZERO); 611 sin->sin_family = AF_INET; 612 sin->sin_len = sizeof(*sin); 613 614 s = splnet(); 615 inp = sotoinpcb(so); 616 if (!inp) { 617 splx(s); 618 free(sin, M_SONAME); 619 return ECONNRESET; 620 } 621 sin->sin_port = inp->inp_lport; 622 sin->sin_addr = inp->inp_laddr; 623 splx(s); 624 625 *nam = (struct sockaddr *)sin; 626 return 0; 627 } 628 629 int 630 in_setpeeraddr(so, nam) 631 struct socket *so; 632 struct sockaddr **nam; 633 { 634 int s; 635 struct inpcb *inp; 636 register struct sockaddr_in *sin; 637 638 /* 639 * Do the malloc first in case it blocks. 640 */ 641 MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, 642 M_WAITOK | M_ZERO); 643 sin->sin_family = AF_INET; 644 sin->sin_len = sizeof(*sin); 645 646 s = splnet(); 647 inp = sotoinpcb(so); 648 if (!inp) { 649 splx(s); 650 free(sin, M_SONAME); 651 return ECONNRESET; 652 } 653 sin->sin_port = inp->inp_fport; 654 sin->sin_addr = inp->inp_faddr; 655 splx(s); 656 657 *nam = (struct sockaddr *)sin; 658 return 0; 659 } 660 661 /* 662 * Pass some notification to all connections of a protocol 663 * associated with address dst. The local address and/or port numbers 664 * may be specified to limit the search. The "usual action" will be 665 * taken, depending on the ctlinput cmd. The caller must filter any 666 * cmds that are uninteresting (e.g., no error in the map). 667 * Call the protocol specific routine (if any) to report 668 * any errors for each matching socket. 669 * 670 * If tcp_seq_check != 0 it also checks if tcp_sequence is 671 * a valid TCP sequence number for the session. 672 */ 673 void 674 in_pcbnotify(head, dst, fport_arg, laddr, lport_arg, cmd, notify, tcp_sequence, tcp_seq_check) 675 struct inpcbhead *head; 676 struct sockaddr *dst; 677 u_int fport_arg, lport_arg; 678 struct in_addr laddr; 679 int cmd; 680 void (*notify) __P((struct inpcb *, int)); 681 u_int32_t tcp_sequence; 682 int tcp_seq_check; 683 { 684 register struct inpcb *inp, *oinp; 685 struct in_addr faddr; 686 u_short fport = fport_arg, lport = lport_arg; 687 int errno, s; 688 689 if ((unsigned)cmd > PRC_NCMDS || dst->sa_family != AF_INET) 690 return; 691 faddr = ((struct sockaddr_in *)dst)->sin_addr; 692 if (faddr.s_addr == INADDR_ANY) 693 return; 694 695 /* 696 * Redirects go to all references to the destination, 697 * and use in_rtchange to invalidate the route cache. 698 * Dead host indications: notify all references to the destination. 699 * Otherwise, if we have knowledge of the local port and address, 700 * deliver only to that socket. 701 */ 702 if (PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD) { 703 fport = 0; 704 lport = 0; 705 laddr.s_addr = 0; 706 if (cmd != PRC_HOSTDEAD) 707 notify = in_rtchange; 708 } 709 errno = inetctlerrmap[cmd]; 710 s = splnet(); 711 for (inp = LIST_FIRST(head); inp != NULL;) { 712 #ifdef INET6 713 if ((inp->inp_vflag & INP_IPV4) == 0) { 714 inp = LIST_NEXT(inp, inp_list); 715 continue; 716 } 717 #endif 718 if (inp->inp_faddr.s_addr != faddr.s_addr || 719 inp->inp_socket == 0 || 720 (lport && inp->inp_lport != lport) || 721 (laddr.s_addr && inp->inp_laddr.s_addr != laddr.s_addr) || 722 (fport && inp->inp_fport != fport)) { 723 inp = LIST_NEXT(inp, inp_list); 724 continue; 725 } 726 /* 727 * If tcp_seq_check is set, then skip sessions where 728 * the sequence number is not one of a unacknowledged 729 * packet. 730 * 731 * If it doesn't match, we break the loop, as only a 732 * single session can match on src/dst ip addresses 733 * and TCP port numbers. 734 */ 735 if ((tcp_seq_check == 1) && (tcp_seq_vs_sess(inp, tcp_sequence) == 0)) { 736 inp = LIST_NEXT(inp, inp_list); 737 break; 738 } 739 oinp = inp; 740 inp = LIST_NEXT(inp, inp_list); 741 if (notify) 742 (*notify)(oinp, errno); 743 } 744 splx(s); 745 } 746 747 /* 748 * Check for alternatives when higher level complains 749 * about service problems. For now, invalidate cached 750 * routing information. If the route was created dynamically 751 * (by a redirect), time to try a default gateway again. 752 */ 753 void 754 in_losing(inp) 755 struct inpcb *inp; 756 { 757 register struct rtentry *rt; 758 struct rt_addrinfo info; 759 760 if ((rt = inp->inp_route.ro_rt)) { 761 inp->inp_route.ro_rt = 0; 762 bzero((caddr_t)&info, sizeof(info)); 763 info.rti_info[RTAX_DST] = 764 (struct sockaddr *)&inp->inp_route.ro_dst; 765 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 766 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 767 rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0); 768 if (rt->rt_flags & RTF_DYNAMIC) 769 (void) rtrequest(RTM_DELETE, rt_key(rt), 770 rt->rt_gateway, rt_mask(rt), rt->rt_flags, 771 (struct rtentry **)0); 772 else 773 /* 774 * A new route can be allocated 775 * the next time output is attempted. 776 */ 777 rtfree(rt); 778 } 779 } 780 781 /* 782 * After a routing change, flush old routing 783 * and allocate a (hopefully) better one. 784 */ 785 static void 786 in_rtchange(inp, errno) 787 register struct inpcb *inp; 788 int errno; 789 { 790 if (inp->inp_route.ro_rt) { 791 rtfree(inp->inp_route.ro_rt); 792 inp->inp_route.ro_rt = 0; 793 /* 794 * A new route can be allocated the next time 795 * output is attempted. 796 */ 797 } 798 } 799 800 /* 801 * Lookup a PCB based on the local address and port. 802 */ 803 struct inpcb * 804 in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay) 805 struct inpcbinfo *pcbinfo; 806 struct in_addr laddr; 807 u_int lport_arg; 808 int wild_okay; 809 { 810 register struct inpcb *inp; 811 int matchwild = 3, wildcard; 812 u_short lport = lport_arg; 813 814 if (!wild_okay) { 815 struct inpcbhead *head; 816 /* 817 * Look for an unconnected (wildcard foreign addr) PCB that 818 * matches the local address and port we're looking for. 819 */ 820 head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)]; 821 LIST_FOREACH(inp, head, inp_hash) { 822 #ifdef INET6 823 if ((inp->inp_vflag & INP_IPV4) == 0) 824 continue; 825 #endif 826 if (inp->inp_faddr.s_addr == INADDR_ANY && 827 inp->inp_laddr.s_addr == laddr.s_addr && 828 inp->inp_lport == lport) { 829 /* 830 * Found. 831 */ 832 return (inp); 833 } 834 } 835 /* 836 * Not found. 837 */ 838 return (NULL); 839 } else { 840 struct inpcbporthead *porthash; 841 struct inpcbport *phd; 842 struct inpcb *match = NULL; 843 /* 844 * Best fit PCB lookup. 845 * 846 * First see if this local port is in use by looking on the 847 * port hash list. 848 */ 849 porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport, 850 pcbinfo->porthashmask)]; 851 LIST_FOREACH(phd, porthash, phd_hash) { 852 if (phd->phd_port == lport) 853 break; 854 } 855 if (phd != NULL) { 856 /* 857 * Port is in use by one or more PCBs. Look for best 858 * fit. 859 */ 860 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { 861 wildcard = 0; 862 #ifdef INET6 863 if ((inp->inp_vflag & INP_IPV4) == 0) 864 continue; 865 #endif 866 if (inp->inp_faddr.s_addr != INADDR_ANY) 867 wildcard++; 868 if (inp->inp_laddr.s_addr != INADDR_ANY) { 869 if (laddr.s_addr == INADDR_ANY) 870 wildcard++; 871 else if (inp->inp_laddr.s_addr != laddr.s_addr) 872 continue; 873 } else { 874 if (laddr.s_addr != INADDR_ANY) 875 wildcard++; 876 } 877 if (wildcard < matchwild) { 878 match = inp; 879 matchwild = wildcard; 880 if (matchwild == 0) { 881 break; 882 } 883 } 884 } 885 } 886 return (match); 887 } 888 } 889 890 /* 891 * Lookup PCB in hash list. 892 */ 893 struct inpcb * 894 in_pcblookup_hash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard, 895 ifp) 896 struct inpcbinfo *pcbinfo; 897 struct in_addr faddr, laddr; 898 u_int fport_arg, lport_arg; 899 int wildcard; 900 struct ifnet *ifp; 901 { 902 struct inpcbhead *head; 903 register struct inpcb *inp; 904 u_short fport = fport_arg, lport = lport_arg; 905 906 /* 907 * First look for an exact match. 908 */ 909 head = &pcbinfo->hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, pcbinfo->hashmask)]; 910 LIST_FOREACH(inp, head, inp_hash) { 911 #ifdef INET6 912 if ((inp->inp_vflag & INP_IPV4) == 0) 913 continue; 914 #endif 915 if (inp->inp_faddr.s_addr == faddr.s_addr && 916 inp->inp_laddr.s_addr == laddr.s_addr && 917 inp->inp_fport == fport && 918 inp->inp_lport == lport) { 919 /* 920 * Found. 921 */ 922 return (inp); 923 } 924 } 925 if (wildcard) { 926 struct inpcb *local_wild = NULL; 927 #if defined(INET6) 928 struct inpcb *local_wild_mapped = NULL; 929 #endif /* defined(INET6) */ 930 931 head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)]; 932 LIST_FOREACH(inp, head, inp_hash) { 933 #ifdef INET6 934 if ((inp->inp_vflag & INP_IPV4) == 0) 935 continue; 936 #endif 937 if (inp->inp_faddr.s_addr == INADDR_ANY && 938 inp->inp_lport == lport) { 939 #if defined(NFAITH) && NFAITH > 0 940 if (ifp && ifp->if_type == IFT_FAITH && 941 (inp->inp_flags & INP_FAITH) == 0) 942 continue; 943 #endif 944 if (inp->inp_laddr.s_addr == laddr.s_addr) 945 return (inp); 946 else if (inp->inp_laddr.s_addr == INADDR_ANY) { 947 #if defined(INET6) 948 if (INP_CHECK_SOCKAF(inp->inp_socket, 949 AF_INET6)) 950 local_wild_mapped = inp; 951 else 952 #endif /* defined(INET6) */ 953 local_wild = inp; 954 } 955 } 956 } 957 #if defined(INET6) 958 if (local_wild == NULL) 959 return (local_wild_mapped); 960 #endif /* defined(INET6) */ 961 return (local_wild); 962 } 963 964 /* 965 * Not found. 966 */ 967 return (NULL); 968 } 969 970 /* 971 * Insert PCB onto various hash lists. 972 */ 973 int 974 in_pcbinshash(inp) 975 struct inpcb *inp; 976 { 977 struct inpcbhead *pcbhash; 978 struct inpcbporthead *pcbporthash; 979 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 980 struct inpcbport *phd; 981 u_int32_t hashkey_faddr; 982 983 #ifdef INET6 984 if (inp->inp_vflag & INP_IPV6) 985 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */; 986 else 987 #endif /* INET6 */ 988 hashkey_faddr = inp->inp_faddr.s_addr; 989 990 pcbhash = &pcbinfo->hashbase[INP_PCBHASH(hashkey_faddr, 991 inp->inp_lport, inp->inp_fport, pcbinfo->hashmask)]; 992 993 pcbporthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(inp->inp_lport, 994 pcbinfo->porthashmask)]; 995 996 /* 997 * Go through port list and look for a head for this lport. 998 */ 999 LIST_FOREACH(phd, pcbporthash, phd_hash) { 1000 if (phd->phd_port == inp->inp_lport) 1001 break; 1002 } 1003 /* 1004 * If none exists, malloc one and tack it on. 1005 */ 1006 if (phd == NULL) { 1007 MALLOC(phd, struct inpcbport *, sizeof(struct inpcbport), M_PCB, M_NOWAIT); 1008 if (phd == NULL) { 1009 return (ENOBUFS); /* XXX */ 1010 } 1011 phd->phd_port = inp->inp_lport; 1012 LIST_INIT(&phd->phd_pcblist); 1013 LIST_INSERT_HEAD(pcbporthash, phd, phd_hash); 1014 } 1015 inp->inp_phd = phd; 1016 LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist); 1017 LIST_INSERT_HEAD(pcbhash, inp, inp_hash); 1018 return (0); 1019 } 1020 1021 /* 1022 * Move PCB to the proper hash bucket when { faddr, fport } have been 1023 * changed. NOTE: This does not handle the case of the lport changing (the 1024 * hashed port list would have to be updated as well), so the lport must 1025 * not change after in_pcbinshash() has been called. 1026 */ 1027 void 1028 in_pcbrehash(inp) 1029 struct inpcb *inp; 1030 { 1031 struct inpcbhead *head; 1032 u_int32_t hashkey_faddr; 1033 1034 #ifdef INET6 1035 if (inp->inp_vflag & INP_IPV6) 1036 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */; 1037 else 1038 #endif /* INET6 */ 1039 hashkey_faddr = inp->inp_faddr.s_addr; 1040 1041 head = &inp->inp_pcbinfo->hashbase[INP_PCBHASH(hashkey_faddr, 1042 inp->inp_lport, inp->inp_fport, inp->inp_pcbinfo->hashmask)]; 1043 1044 LIST_REMOVE(inp, inp_hash); 1045 LIST_INSERT_HEAD(head, inp, inp_hash); 1046 } 1047 1048 /* 1049 * Remove PCB from various lists. 1050 */ 1051 void 1052 in_pcbremlists(inp) 1053 struct inpcb *inp; 1054 { 1055 inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt; 1056 if (inp->inp_lport) { 1057 struct inpcbport *phd = inp->inp_phd; 1058 1059 LIST_REMOVE(inp, inp_hash); 1060 LIST_REMOVE(inp, inp_portlist); 1061 if (LIST_FIRST(&phd->phd_pcblist) == NULL) { 1062 LIST_REMOVE(phd, phd_hash); 1063 free(phd, M_PCB); 1064 } 1065 } 1066 LIST_REMOVE(inp, inp_list); 1067 inp->inp_pcbinfo->ipi_count--; 1068 } 1069 1070 int 1071 prison_xinpcb(struct proc *p, struct inpcb *inp) 1072 { 1073 if (!p->p_prison) 1074 return (0); 1075 if (ntohl(inp->inp_laddr.s_addr) == p->p_prison->pr_ip) 1076 return (0); 1077 return (1); 1078 } 1079