1 /* 2 * Copyright (c) 1982, 1986, 1991, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95 34 * $FreeBSD$ 35 */ 36 37 #include "opt_ipsec.h" 38 #include "opt_inet6.h" 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/malloc.h> 43 #include <sys/mbuf.h> 44 #include <sys/domain.h> 45 #include <sys/protosw.h> 46 #include <sys/socket.h> 47 #include <sys/socketvar.h> 48 #include <sys/proc.h> 49 #include <sys/jail.h> 50 #include <sys/kernel.h> 51 #include <sys/sysctl.h> 52 53 #include <machine/limits.h> 54 55 #include <vm/vm_zone.h> 56 57 #include <net/if.h> 58 #include <net/if_types.h> 59 #include <net/route.h> 60 61 #include <netinet/in.h> 62 #include <netinet/in_pcb.h> 63 #include <netinet/in_var.h> 64 #include <netinet/ip_var.h> 65 #include <netinet/tcp.h> 66 #include <netinet/tcp_var.h> 67 #ifdef INET6 68 #include <netinet/ip6.h> 69 #include <netinet6/ip6_var.h> 70 #endif /* INET6 */ 71 72 #include "faith.h" 73 74 #ifdef IPSEC 75 #include <netinet6/ipsec.h> 76 #include <netkey/key.h> 77 #endif /* IPSEC */ 78 79 struct in_addr zeroin_addr; 80 81 static void in_rtchange __P((struct inpcb *, int)); 82 83 /* 84 * These configure the range of local port addresses assigned to 85 * "unspecified" outgoing connections/packets/whatever. 86 */ 87 int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */ 88 int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */ 89 int ipport_firstauto = IPPORT_RESERVED; /* 1024 */ 90 int ipport_lastauto = IPPORT_USERRESERVED; /* 5000 */ 91 int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */ 92 int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */ 93 94 #define RANGECHK(var, min, max) \ 95 if ((var) < (min)) { (var) = (min); } \ 96 else if ((var) > (max)) { (var) = (max); } 97 98 static int 99 sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS) 100 { 101 int error = sysctl_handle_int(oidp, 102 oidp->oid_arg1, oidp->oid_arg2, req); 103 if (!error) { 104 RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1); 105 RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1); 106 RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX); 107 RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX); 108 RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX); 109 RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX); 110 } 111 return error; 112 } 113 114 #undef RANGECHK 115 116 SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0, "IP Ports"); 117 118 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, CTLTYPE_INT|CTLFLAG_RW, 119 &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", ""); 120 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, CTLTYPE_INT|CTLFLAG_RW, 121 &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", ""); 122 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, CTLTYPE_INT|CTLFLAG_RW, 123 &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", ""); 124 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, CTLTYPE_INT|CTLFLAG_RW, 125 &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", ""); 126 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLTYPE_INT|CTLFLAG_RW, 127 &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", ""); 128 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW, 129 &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", ""); 130 131 /* 132 * in_pcb.c: manage the Protocol Control Blocks. 133 * 134 * NOTE: It is assumed that most of these functions will be called at 135 * splnet(). XXX - There are, unfortunately, a few exceptions to this 136 * rule that should be fixed. 137 */ 138 139 /* 140 * Allocate a PCB and associate it with the socket. 141 */ 142 int 143 in_pcballoc(so, pcbinfo, p) 144 struct socket *so; 145 struct inpcbinfo *pcbinfo; 146 struct proc *p; 147 { 148 register struct inpcb *inp; 149 150 inp = zalloci(pcbinfo->ipi_zone); 151 if (inp == NULL) 152 return (ENOBUFS); 153 bzero((caddr_t)inp, sizeof(*inp)); 154 inp->inp_gencnt = ++pcbinfo->ipi_gencnt; 155 inp->inp_pcbinfo = pcbinfo; 156 inp->inp_socket = so; 157 #if defined(INET6) 158 if (ip6_mapped_addr_on) 159 inp->inp_flags &= ~IN6P_BINDV6ONLY; 160 else 161 inp->inp_flags |= IN6P_BINDV6ONLY; 162 #endif 163 LIST_INSERT_HEAD(pcbinfo->listhead, inp, inp_list); 164 pcbinfo->ipi_count++; 165 so->so_pcb = (caddr_t)inp; 166 return (0); 167 } 168 169 int 170 in_pcbbind(inp, nam, p) 171 register struct inpcb *inp; 172 struct sockaddr *nam; 173 struct proc *p; 174 { 175 register struct socket *so = inp->inp_socket; 176 unsigned short *lastport; 177 struct sockaddr_in *sin; 178 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 179 u_short lport = 0; 180 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT); 181 int error, prison = 0; 182 183 if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */ 184 return (EADDRNOTAVAIL); 185 if (inp->inp_lport || inp->inp_laddr.s_addr != INADDR_ANY) 186 return (EINVAL); 187 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) 188 wild = 1; 189 if (nam) { 190 sin = (struct sockaddr_in *)nam; 191 if (nam->sa_len != sizeof (*sin)) 192 return (EINVAL); 193 #ifdef notdef 194 /* 195 * We should check the family, but old programs 196 * incorrectly fail to initialize it. 197 */ 198 if (sin->sin_family != AF_INET) 199 return (EAFNOSUPPORT); 200 #endif 201 if (sin->sin_addr.s_addr != INADDR_ANY) 202 if (prison_ip(p, 0, &sin->sin_addr.s_addr)) 203 return(EINVAL); 204 lport = sin->sin_port; 205 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { 206 /* 207 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; 208 * allow complete duplication of binding if 209 * SO_REUSEPORT is set, or if SO_REUSEADDR is set 210 * and a multicast address is bound on both 211 * new and duplicated sockets. 212 */ 213 if (so->so_options & SO_REUSEADDR) 214 reuseport = SO_REUSEADDR|SO_REUSEPORT; 215 } else if (sin->sin_addr.s_addr != INADDR_ANY) { 216 sin->sin_port = 0; /* yech... */ 217 if (ifa_ifwithaddr((struct sockaddr *)sin) == 0) 218 return (EADDRNOTAVAIL); 219 } 220 if (lport) { 221 struct inpcb *t; 222 223 /* GROSS */ 224 if (ntohs(lport) < IPPORT_RESERVED && p && 225 suser_xxx(0, p, PRISON_ROOT)) 226 return (EACCES); 227 if (p && p->p_prison) 228 prison = 1; 229 if (so->so_cred->cr_uid != 0 && 230 !IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) { 231 t = in_pcblookup_local(inp->inp_pcbinfo, 232 sin->sin_addr, lport, 233 prison ? 0 : INPLOOKUP_WILDCARD); 234 if (t && 235 (ntohl(sin->sin_addr.s_addr) != INADDR_ANY || 236 ntohl(t->inp_laddr.s_addr) != INADDR_ANY || 237 (t->inp_socket->so_options & 238 SO_REUSEPORT) == 0) && 239 (so->so_cred->cr_uid != 240 t->inp_socket->so_cred->cr_uid)) { 241 #if defined(INET6) 242 if ((inp->inp_flags & 243 IN6P_BINDV6ONLY) != 0 || 244 ntohl(sin->sin_addr.s_addr) != 245 INADDR_ANY || 246 ntohl(t->inp_laddr.s_addr) != 247 INADDR_ANY || 248 INP_SOCKAF(so) == 249 INP_SOCKAF(t->inp_socket)) 250 #endif /* defined(INET6) */ 251 return (EADDRINUSE); 252 } 253 } 254 t = in_pcblookup_local(pcbinfo, sin->sin_addr, 255 lport, prison ? 0 : wild); 256 if (t && 257 (reuseport & t->inp_socket->so_options) == 0) { 258 #if defined(INET6) 259 if ((inp->inp_flags & IN6P_BINDV6ONLY) != 0 || 260 ntohl(sin->sin_addr.s_addr) != 261 INADDR_ANY || 262 ntohl(t->inp_laddr.s_addr) != 263 INADDR_ANY || 264 INP_SOCKAF(so) == 265 INP_SOCKAF(t->inp_socket)) 266 #endif /* defined(INET6) */ 267 return (EADDRINUSE); 268 } 269 } 270 inp->inp_laddr = sin->sin_addr; 271 } 272 if (lport == 0) { 273 ushort first, last; 274 int count; 275 276 if (inp->inp_laddr.s_addr != INADDR_ANY) 277 if (prison_ip(p, 0, &inp->inp_laddr.s_addr )) 278 return (EINVAL); 279 inp->inp_flags |= INP_ANONPORT; 280 281 if (inp->inp_flags & INP_HIGHPORT) { 282 first = ipport_hifirstauto; /* sysctl */ 283 last = ipport_hilastauto; 284 lastport = &pcbinfo->lasthi; 285 } else if (inp->inp_flags & INP_LOWPORT) { 286 if (p && (error = suser_xxx(0, p, PRISON_ROOT))) 287 return error; 288 first = ipport_lowfirstauto; /* 1023 */ 289 last = ipport_lowlastauto; /* 600 */ 290 lastport = &pcbinfo->lastlow; 291 } else { 292 first = ipport_firstauto; /* sysctl */ 293 last = ipport_lastauto; 294 lastport = &pcbinfo->lastport; 295 } 296 /* 297 * Simple check to ensure all ports are not used up causing 298 * a deadlock here. 299 * 300 * We split the two cases (up and down) so that the direction 301 * is not being tested on each round of the loop. 302 */ 303 if (first > last) { 304 /* 305 * counting down 306 */ 307 count = first - last; 308 309 do { 310 if (count-- < 0) { /* completely used? */ 311 /* 312 * Undo any address bind that may have 313 * occurred above. 314 */ 315 inp->inp_laddr.s_addr = INADDR_ANY; 316 return (EAGAIN); 317 } 318 --*lastport; 319 if (*lastport > first || *lastport < last) 320 *lastport = first; 321 lport = htons(*lastport); 322 } while (in_pcblookup_local(pcbinfo, 323 inp->inp_laddr, lport, wild)); 324 } else { 325 /* 326 * counting up 327 */ 328 count = last - first; 329 330 do { 331 if (count-- < 0) { /* completely used? */ 332 /* 333 * Undo any address bind that may have 334 * occurred above. 335 */ 336 inp->inp_laddr.s_addr = INADDR_ANY; 337 return (EAGAIN); 338 } 339 ++*lastport; 340 if (*lastport < first || *lastport > last) 341 *lastport = first; 342 lport = htons(*lastport); 343 } while (in_pcblookup_local(pcbinfo, 344 inp->inp_laddr, lport, wild)); 345 } 346 } 347 inp->inp_lport = lport; 348 if (prison_ip(p, 0, &inp->inp_laddr.s_addr)) 349 return(EINVAL); 350 if (in_pcbinshash(inp) != 0) { 351 inp->inp_laddr.s_addr = INADDR_ANY; 352 inp->inp_lport = 0; 353 return (EAGAIN); 354 } 355 return (0); 356 } 357 358 /* 359 * Transform old in_pcbconnect() into an inner subroutine for new 360 * in_pcbconnect(): Do some validity-checking on the remote 361 * address (in mbuf 'nam') and then determine local host address 362 * (i.e., which interface) to use to access that remote host. 363 * 364 * This preserves definition of in_pcbconnect(), while supporting a 365 * slightly different version for T/TCP. (This is more than 366 * a bit of a kludge, but cleaning up the internal interfaces would 367 * have forced minor changes in every protocol). 368 */ 369 370 int 371 in_pcbladdr(inp, nam, plocal_sin) 372 register struct inpcb *inp; 373 struct sockaddr *nam; 374 struct sockaddr_in **plocal_sin; 375 { 376 struct in_ifaddr *ia; 377 register struct sockaddr_in *sin = (struct sockaddr_in *)nam; 378 379 if (nam->sa_len != sizeof (*sin)) 380 return (EINVAL); 381 if (sin->sin_family != AF_INET) 382 return (EAFNOSUPPORT); 383 if (sin->sin_port == 0) 384 return (EADDRNOTAVAIL); 385 if (!TAILQ_EMPTY(&in_ifaddrhead)) { 386 /* 387 * If the destination address is INADDR_ANY, 388 * use the primary local address. 389 * If the supplied address is INADDR_BROADCAST, 390 * and the primary interface supports broadcast, 391 * choose the broadcast address for that interface. 392 */ 393 #define satosin(sa) ((struct sockaddr_in *)(sa)) 394 #define sintosa(sin) ((struct sockaddr *)(sin)) 395 #define ifatoia(ifa) ((struct in_ifaddr *)(ifa)) 396 if (sin->sin_addr.s_addr == INADDR_ANY) 397 sin->sin_addr = IA_SIN(in_ifaddrhead.tqh_first)->sin_addr; 398 else if (sin->sin_addr.s_addr == (u_long)INADDR_BROADCAST && 399 (in_ifaddrhead.tqh_first->ia_ifp->if_flags & IFF_BROADCAST)) 400 sin->sin_addr = satosin(&in_ifaddrhead.tqh_first->ia_broadaddr)->sin_addr; 401 } 402 if (inp->inp_laddr.s_addr == INADDR_ANY) { 403 register struct route *ro; 404 405 ia = (struct in_ifaddr *)0; 406 /* 407 * If route is known or can be allocated now, 408 * our src addr is taken from the i/f, else punt. 409 */ 410 ro = &inp->inp_route; 411 if (ro->ro_rt && 412 (satosin(&ro->ro_dst)->sin_addr.s_addr != 413 sin->sin_addr.s_addr || 414 inp->inp_socket->so_options & SO_DONTROUTE)) { 415 RTFREE(ro->ro_rt); 416 ro->ro_rt = (struct rtentry *)0; 417 } 418 if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0 && /*XXX*/ 419 (ro->ro_rt == (struct rtentry *)0 || 420 ro->ro_rt->rt_ifp == (struct ifnet *)0)) { 421 /* No route yet, so try to acquire one */ 422 ro->ro_dst.sa_family = AF_INET; 423 ro->ro_dst.sa_len = sizeof(struct sockaddr_in); 424 ((struct sockaddr_in *) &ro->ro_dst)->sin_addr = 425 sin->sin_addr; 426 rtalloc(ro); 427 } 428 /* 429 * If we found a route, use the address 430 * corresponding to the outgoing interface 431 * unless it is the loopback (in case a route 432 * to our address on another net goes to loopback). 433 */ 434 if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK)) 435 ia = ifatoia(ro->ro_rt->rt_ifa); 436 if (ia == 0) { 437 u_short fport = sin->sin_port; 438 439 sin->sin_port = 0; 440 ia = ifatoia(ifa_ifwithdstaddr(sintosa(sin))); 441 if (ia == 0) 442 ia = ifatoia(ifa_ifwithnet(sintosa(sin))); 443 sin->sin_port = fport; 444 if (ia == 0) 445 ia = in_ifaddrhead.tqh_first; 446 if (ia == 0) 447 return (EADDRNOTAVAIL); 448 } 449 /* 450 * If the destination address is multicast and an outgoing 451 * interface has been set as a multicast option, use the 452 * address of that interface as our source address. 453 */ 454 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) && 455 inp->inp_moptions != NULL) { 456 struct ip_moptions *imo; 457 struct ifnet *ifp; 458 459 imo = inp->inp_moptions; 460 if (imo->imo_multicast_ifp != NULL) { 461 ifp = imo->imo_multicast_ifp; 462 for (ia = in_ifaddrhead.tqh_first; ia; 463 ia = ia->ia_link.tqe_next) 464 if (ia->ia_ifp == ifp) 465 break; 466 if (ia == 0) 467 return (EADDRNOTAVAIL); 468 } 469 } 470 /* 471 * Don't do pcblookup call here; return interface in plocal_sin 472 * and exit to caller, that will do the lookup. 473 */ 474 *plocal_sin = &ia->ia_addr; 475 476 } 477 return(0); 478 } 479 480 /* 481 * Outer subroutine: 482 * Connect from a socket to a specified address. 483 * Both address and port must be specified in argument sin. 484 * If don't have a local address for this socket yet, 485 * then pick one. 486 */ 487 int 488 in_pcbconnect(inp, nam, p) 489 register struct inpcb *inp; 490 struct sockaddr *nam; 491 struct proc *p; 492 { 493 struct sockaddr_in *ifaddr; 494 struct sockaddr_in *sin = (struct sockaddr_in *)nam; 495 struct sockaddr_in sa; 496 int error; 497 498 if (inp->inp_laddr.s_addr == INADDR_ANY && p->p_prison != NULL) { 499 bzero(&sa, sizeof (sa)); 500 sa.sin_addr.s_addr = htonl(p->p_prison->pr_ip); 501 sa.sin_len=sizeof (sa); 502 sa.sin_family = AF_INET; 503 error = in_pcbbind(inp, (struct sockaddr *)&sa, p); 504 if (error) 505 return (error); 506 } 507 /* 508 * Call inner routine, to assign local interface address. 509 */ 510 if ((error = in_pcbladdr(inp, nam, &ifaddr)) != 0) 511 return(error); 512 513 if (in_pcblookup_hash(inp->inp_pcbinfo, sin->sin_addr, sin->sin_port, 514 inp->inp_laddr.s_addr ? inp->inp_laddr : ifaddr->sin_addr, 515 inp->inp_lport, 0, NULL) != NULL) { 516 return (EADDRINUSE); 517 } 518 if (inp->inp_laddr.s_addr == INADDR_ANY) { 519 if (inp->inp_lport == 0) { 520 error = in_pcbbind(inp, (struct sockaddr *)0, p); 521 if (error) 522 return (error); 523 } 524 inp->inp_laddr = ifaddr->sin_addr; 525 } 526 inp->inp_faddr = sin->sin_addr; 527 inp->inp_fport = sin->sin_port; 528 in_pcbrehash(inp); 529 return (0); 530 } 531 532 void 533 in_pcbdisconnect(inp) 534 struct inpcb *inp; 535 { 536 537 inp->inp_faddr.s_addr = INADDR_ANY; 538 inp->inp_fport = 0; 539 in_pcbrehash(inp); 540 if (inp->inp_socket->so_state & SS_NOFDREF) 541 in_pcbdetach(inp); 542 } 543 544 void 545 in_pcbdetach(inp) 546 struct inpcb *inp; 547 { 548 struct socket *so = inp->inp_socket; 549 struct inpcbinfo *ipi = inp->inp_pcbinfo; 550 struct rtentry *rt = inp->inp_route.ro_rt; 551 552 #ifdef IPSEC 553 ipsec4_delete_pcbpolicy(inp); 554 #endif /*IPSEC*/ 555 inp->inp_gencnt = ++ipi->ipi_gencnt; 556 in_pcbremlists(inp); 557 so->so_pcb = 0; 558 sofree(so); 559 if (inp->inp_options) 560 (void)m_free(inp->inp_options); 561 if (rt) { 562 /* 563 * route deletion requires reference count to be <= zero 564 */ 565 if ((rt->rt_flags & RTF_DELCLONE) && 566 (rt->rt_flags & RTF_WASCLONED)) { 567 if (--rt->rt_refcnt <= 0) { 568 rt->rt_flags &= ~RTF_UP; 569 rtrequest(RTM_DELETE, rt_key(rt), 570 rt->rt_gateway, rt_mask(rt), 571 rt->rt_flags, (struct rtentry **)0); 572 } 573 else 574 /* 575 * more than one reference, bump it up 576 * again. 577 */ 578 rt->rt_refcnt++; 579 } 580 else 581 rtfree(rt); 582 } 583 ip_freemoptions(inp->inp_moptions); 584 inp->inp_vflag = 0; 585 zfreei(ipi->ipi_zone, inp); 586 } 587 588 /* 589 * The calling convention of in_setsockaddr() and in_setpeeraddr() was 590 * modified to match the pru_sockaddr() and pru_peeraddr() entry points 591 * in struct pr_usrreqs, so that protocols can just reference then directly 592 * without the need for a wrapper function. The socket must have a valid 593 * (i.e., non-nil) PCB, but it should be impossible to get an invalid one 594 * except through a kernel programming error, so it is acceptable to panic 595 * (or in this case trap) if the PCB is invalid. (Actually, we don't trap 596 * because there actually /is/ a programming error somewhere... XXX) 597 */ 598 int 599 in_setsockaddr(so, nam) 600 struct socket *so; 601 struct sockaddr **nam; 602 { 603 int s; 604 register struct inpcb *inp; 605 register struct sockaddr_in *sin; 606 607 /* 608 * Do the malloc first in case it blocks. 609 */ 610 MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, 611 M_WAITOK | M_ZERO); 612 sin->sin_family = AF_INET; 613 sin->sin_len = sizeof(*sin); 614 615 s = splnet(); 616 inp = sotoinpcb(so); 617 if (!inp) { 618 splx(s); 619 free(sin, M_SONAME); 620 return ECONNRESET; 621 } 622 sin->sin_port = inp->inp_lport; 623 sin->sin_addr = inp->inp_laddr; 624 splx(s); 625 626 *nam = (struct sockaddr *)sin; 627 return 0; 628 } 629 630 int 631 in_setpeeraddr(so, nam) 632 struct socket *so; 633 struct sockaddr **nam; 634 { 635 int s; 636 struct inpcb *inp; 637 register struct sockaddr_in *sin; 638 639 /* 640 * Do the malloc first in case it blocks. 641 */ 642 MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME, 643 M_WAITOK | M_ZERO); 644 sin->sin_family = AF_INET; 645 sin->sin_len = sizeof(*sin); 646 647 s = splnet(); 648 inp = sotoinpcb(so); 649 if (!inp) { 650 splx(s); 651 free(sin, M_SONAME); 652 return ECONNRESET; 653 } 654 sin->sin_port = inp->inp_fport; 655 sin->sin_addr = inp->inp_faddr; 656 splx(s); 657 658 *nam = (struct sockaddr *)sin; 659 return 0; 660 } 661 662 /* 663 * Pass some notification to all connections of a protocol 664 * associated with address dst. The local address and/or port numbers 665 * may be specified to limit the search. The "usual action" will be 666 * taken, depending on the ctlinput cmd. The caller must filter any 667 * cmds that are uninteresting (e.g., no error in the map). 668 * Call the protocol specific routine (if any) to report 669 * any errors for each matching socket. 670 * 671 * If tcp_seq_check != 0 it also checks if tcp_sequence is 672 * a valid TCP sequence number for the session. 673 */ 674 void 675 in_pcbnotify(head, dst, fport_arg, laddr, lport_arg, cmd, notify, tcp_sequence, tcp_seq_check) 676 struct inpcbhead *head; 677 struct sockaddr *dst; 678 u_int fport_arg, lport_arg; 679 struct in_addr laddr; 680 int cmd; 681 void (*notify) __P((struct inpcb *, int)); 682 u_int32_t tcp_sequence; 683 int tcp_seq_check; 684 { 685 register struct inpcb *inp, *oinp; 686 struct in_addr faddr; 687 u_short fport = fport_arg, lport = lport_arg; 688 int errno, s; 689 690 if ((unsigned)cmd > PRC_NCMDS || dst->sa_family != AF_INET) 691 return; 692 faddr = ((struct sockaddr_in *)dst)->sin_addr; 693 if (faddr.s_addr == INADDR_ANY) 694 return; 695 696 /* 697 * Redirects go to all references to the destination, 698 * and use in_rtchange to invalidate the route cache. 699 * Dead host indications: notify all references to the destination. 700 * Otherwise, if we have knowledge of the local port and address, 701 * deliver only to that socket. 702 */ 703 if (PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD) { 704 fport = 0; 705 lport = 0; 706 laddr.s_addr = 0; 707 if (cmd != PRC_HOSTDEAD) 708 notify = in_rtchange; 709 } 710 errno = inetctlerrmap[cmd]; 711 s = splnet(); 712 for (inp = head->lh_first; inp != NULL;) { 713 #ifdef INET6 714 if ((inp->inp_vflag & INP_IPV4) == 0) { 715 inp = LIST_NEXT(inp, inp_list); 716 continue; 717 } 718 #endif 719 if (inp->inp_faddr.s_addr != faddr.s_addr || 720 inp->inp_socket == 0 || 721 (lport && inp->inp_lport != lport) || 722 (laddr.s_addr && inp->inp_laddr.s_addr != laddr.s_addr) || 723 (fport && inp->inp_fport != fport)) { 724 inp = inp->inp_list.le_next; 725 continue; 726 } 727 /* 728 * If tcp_seq_check is set, then skip sessions where 729 * the sequence number is not one of a unacknowledged 730 * packet. 731 * 732 * If it doesn't match, we break the loop, as only a 733 * single session can match on src/dst ip addresses 734 * and TCP port numbers. 735 */ 736 if ((tcp_seq_check == 1) && (tcp_seq_vs_sess(inp, tcp_sequence) == 0)) { 737 inp = inp->inp_list.le_next; 738 break; 739 } 740 oinp = inp; 741 inp = inp->inp_list.le_next; 742 if (notify) 743 (*notify)(oinp, errno); 744 } 745 splx(s); 746 } 747 748 /* 749 * Check for alternatives when higher level complains 750 * about service problems. For now, invalidate cached 751 * routing information. If the route was created dynamically 752 * (by a redirect), time to try a default gateway again. 753 */ 754 void 755 in_losing(inp) 756 struct inpcb *inp; 757 { 758 register struct rtentry *rt; 759 struct rt_addrinfo info; 760 761 if ((rt = inp->inp_route.ro_rt)) { 762 inp->inp_route.ro_rt = 0; 763 bzero((caddr_t)&info, sizeof(info)); 764 info.rti_info[RTAX_DST] = 765 (struct sockaddr *)&inp->inp_route.ro_dst; 766 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway; 767 info.rti_info[RTAX_NETMASK] = rt_mask(rt); 768 rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0); 769 if (rt->rt_flags & RTF_DYNAMIC) 770 (void) rtrequest(RTM_DELETE, rt_key(rt), 771 rt->rt_gateway, rt_mask(rt), rt->rt_flags, 772 (struct rtentry **)0); 773 else 774 /* 775 * A new route can be allocated 776 * the next time output is attempted. 777 */ 778 rtfree(rt); 779 } 780 } 781 782 /* 783 * After a routing change, flush old routing 784 * and allocate a (hopefully) better one. 785 */ 786 static void 787 in_rtchange(inp, errno) 788 register struct inpcb *inp; 789 int errno; 790 { 791 if (inp->inp_route.ro_rt) { 792 rtfree(inp->inp_route.ro_rt); 793 inp->inp_route.ro_rt = 0; 794 /* 795 * A new route can be allocated the next time 796 * output is attempted. 797 */ 798 } 799 } 800 801 /* 802 * Lookup a PCB based on the local address and port. 803 */ 804 struct inpcb * 805 in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay) 806 struct inpcbinfo *pcbinfo; 807 struct in_addr laddr; 808 u_int lport_arg; 809 int wild_okay; 810 { 811 register struct inpcb *inp; 812 int matchwild = 3, wildcard; 813 u_short lport = lport_arg; 814 815 if (!wild_okay) { 816 struct inpcbhead *head; 817 /* 818 * Look for an unconnected (wildcard foreign addr) PCB that 819 * matches the local address and port we're looking for. 820 */ 821 head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)]; 822 for (inp = head->lh_first; inp != NULL; inp = inp->inp_hash.le_next) { 823 #ifdef INET6 824 if ((inp->inp_vflag & INP_IPV4) == 0) 825 continue; 826 #endif 827 if (inp->inp_faddr.s_addr == INADDR_ANY && 828 inp->inp_laddr.s_addr == laddr.s_addr && 829 inp->inp_lport == lport) { 830 /* 831 * Found. 832 */ 833 return (inp); 834 } 835 } 836 /* 837 * Not found. 838 */ 839 return (NULL); 840 } else { 841 struct inpcbporthead *porthash; 842 struct inpcbport *phd; 843 struct inpcb *match = NULL; 844 /* 845 * Best fit PCB lookup. 846 * 847 * First see if this local port is in use by looking on the 848 * port hash list. 849 */ 850 porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport, 851 pcbinfo->porthashmask)]; 852 for (phd = porthash->lh_first; phd != NULL; phd = phd->phd_hash.le_next) { 853 if (phd->phd_port == lport) 854 break; 855 } 856 if (phd != NULL) { 857 /* 858 * Port is in use by one or more PCBs. Look for best 859 * fit. 860 */ 861 for (inp = phd->phd_pcblist.lh_first; inp != NULL; 862 inp = inp->inp_portlist.le_next) { 863 wildcard = 0; 864 #ifdef INET6 865 if ((inp->inp_vflag & INP_IPV4) == 0) 866 continue; 867 #endif 868 if (inp->inp_faddr.s_addr != INADDR_ANY) 869 wildcard++; 870 if (inp->inp_laddr.s_addr != INADDR_ANY) { 871 if (laddr.s_addr == INADDR_ANY) 872 wildcard++; 873 else if (inp->inp_laddr.s_addr != laddr.s_addr) 874 continue; 875 } else { 876 if (laddr.s_addr != INADDR_ANY) 877 wildcard++; 878 } 879 if (wildcard < matchwild) { 880 match = inp; 881 matchwild = wildcard; 882 if (matchwild == 0) { 883 break; 884 } 885 } 886 } 887 } 888 return (match); 889 } 890 } 891 892 /* 893 * Lookup PCB in hash list. 894 */ 895 struct inpcb * 896 in_pcblookup_hash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard, 897 ifp) 898 struct inpcbinfo *pcbinfo; 899 struct in_addr faddr, laddr; 900 u_int fport_arg, lport_arg; 901 int wildcard; 902 struct ifnet *ifp; 903 { 904 struct inpcbhead *head; 905 register struct inpcb *inp; 906 u_short fport = fport_arg, lport = lport_arg; 907 908 /* 909 * First look for an exact match. 910 */ 911 head = &pcbinfo->hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, pcbinfo->hashmask)]; 912 for (inp = head->lh_first; inp != NULL; inp = inp->inp_hash.le_next) { 913 #ifdef INET6 914 if ((inp->inp_vflag & INP_IPV4) == 0) 915 continue; 916 #endif 917 if (inp->inp_faddr.s_addr == faddr.s_addr && 918 inp->inp_laddr.s_addr == laddr.s_addr && 919 inp->inp_fport == fport && 920 inp->inp_lport == lport) { 921 /* 922 * Found. 923 */ 924 return (inp); 925 } 926 } 927 if (wildcard) { 928 struct inpcb *local_wild = NULL; 929 #if defined(INET6) 930 struct inpcb *local_wild_mapped = NULL; 931 #endif /* defined(INET6) */ 932 933 head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)]; 934 for (inp = head->lh_first; inp != NULL; inp = inp->inp_hash.le_next) { 935 #ifdef INET6 936 if ((inp->inp_vflag & INP_IPV4) == 0) 937 continue; 938 #endif 939 if (inp->inp_faddr.s_addr == INADDR_ANY && 940 inp->inp_lport == lport) { 941 #if defined(NFAITH) && NFAITH > 0 942 if (ifp && ifp->if_type == IFT_FAITH && 943 (inp->inp_flags & INP_FAITH) == 0) 944 continue; 945 #endif 946 if (inp->inp_laddr.s_addr == laddr.s_addr) 947 return (inp); 948 else if (inp->inp_laddr.s_addr == INADDR_ANY) { 949 #if defined(INET6) 950 if (INP_CHECK_SOCKAF(inp->inp_socket, 951 AF_INET6)) 952 local_wild_mapped = inp; 953 else 954 #endif /* defined(INET6) */ 955 local_wild = inp; 956 } 957 } 958 } 959 #if defined(INET6) 960 if (local_wild == NULL) 961 return (local_wild_mapped); 962 #endif /* defined(INET6) */ 963 return (local_wild); 964 } 965 966 /* 967 * Not found. 968 */ 969 return (NULL); 970 } 971 972 /* 973 * Insert PCB onto various hash lists. 974 */ 975 int 976 in_pcbinshash(inp) 977 struct inpcb *inp; 978 { 979 struct inpcbhead *pcbhash; 980 struct inpcbporthead *pcbporthash; 981 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 982 struct inpcbport *phd; 983 u_int32_t hashkey_faddr; 984 985 #ifdef INET6 986 if (inp->inp_vflag & INP_IPV6) 987 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */; 988 else 989 #endif /* INET6 */ 990 hashkey_faddr = inp->inp_faddr.s_addr; 991 992 pcbhash = &pcbinfo->hashbase[INP_PCBHASH(hashkey_faddr, 993 inp->inp_lport, inp->inp_fport, pcbinfo->hashmask)]; 994 995 pcbporthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(inp->inp_lport, 996 pcbinfo->porthashmask)]; 997 998 /* 999 * Go through port list and look for a head for this lport. 1000 */ 1001 for (phd = pcbporthash->lh_first; phd != NULL; phd = phd->phd_hash.le_next) { 1002 if (phd->phd_port == inp->inp_lport) 1003 break; 1004 } 1005 /* 1006 * If none exists, malloc one and tack it on. 1007 */ 1008 if (phd == NULL) { 1009 MALLOC(phd, struct inpcbport *, sizeof(struct inpcbport), M_PCB, M_NOWAIT); 1010 if (phd == NULL) { 1011 return (ENOBUFS); /* XXX */ 1012 } 1013 phd->phd_port = inp->inp_lport; 1014 LIST_INIT(&phd->phd_pcblist); 1015 LIST_INSERT_HEAD(pcbporthash, phd, phd_hash); 1016 } 1017 inp->inp_phd = phd; 1018 LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist); 1019 LIST_INSERT_HEAD(pcbhash, inp, inp_hash); 1020 return (0); 1021 } 1022 1023 /* 1024 * Move PCB to the proper hash bucket when { faddr, fport } have been 1025 * changed. NOTE: This does not handle the case of the lport changing (the 1026 * hashed port list would have to be updated as well), so the lport must 1027 * not change after in_pcbinshash() has been called. 1028 */ 1029 void 1030 in_pcbrehash(inp) 1031 struct inpcb *inp; 1032 { 1033 struct inpcbhead *head; 1034 u_int32_t hashkey_faddr; 1035 1036 #ifdef INET6 1037 if (inp->inp_vflag & INP_IPV6) 1038 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */; 1039 else 1040 #endif /* INET6 */ 1041 hashkey_faddr = inp->inp_faddr.s_addr; 1042 1043 head = &inp->inp_pcbinfo->hashbase[INP_PCBHASH(hashkey_faddr, 1044 inp->inp_lport, inp->inp_fport, inp->inp_pcbinfo->hashmask)]; 1045 1046 LIST_REMOVE(inp, inp_hash); 1047 LIST_INSERT_HEAD(head, inp, inp_hash); 1048 } 1049 1050 /* 1051 * Remove PCB from various lists. 1052 */ 1053 void 1054 in_pcbremlists(inp) 1055 struct inpcb *inp; 1056 { 1057 inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt; 1058 if (inp->inp_lport) { 1059 struct inpcbport *phd = inp->inp_phd; 1060 1061 LIST_REMOVE(inp, inp_hash); 1062 LIST_REMOVE(inp, inp_portlist); 1063 if (phd->phd_pcblist.lh_first == NULL) { 1064 LIST_REMOVE(phd, phd_hash); 1065 free(phd, M_PCB); 1066 } 1067 } 1068 LIST_REMOVE(inp, inp_list); 1069 inp->inp_pcbinfo->ipi_count--; 1070 } 1071 1072 int 1073 prison_xinpcb(struct proc *p, struct inpcb *inp) 1074 { 1075 if (!p->p_prison) 1076 return (0); 1077 if (ntohl(inp->inp_laddr.s_addr) == p->p_prison->pr_ip) 1078 return (0); 1079 return (1); 1080 } 1081