1 /*- 2 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 3 * Copyright (c) 2010-2011 Juniper Networks, Inc. 4 * All rights reserved. 5 * 6 * Portions of this software were developed by Robert N. M. Watson under 7 * contract to Juniper Networks, Inc. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. Neither the name of the project nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * $KAME: in6_pcb.c,v 1.31 2001/05/21 05:45:10 jinmei Exp $ 34 */ 35 36 /*- 37 * Copyright (c) 1982, 1986, 1991, 1993 38 * The Regents of the University of California. All rights reserved. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 4. Neither the name of the University nor the names of its contributors 49 * may be used to endorse or promote products derived from this software 50 * without specific prior written permission. 51 * 52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62 * SUCH DAMAGE. 63 * 64 * @(#)in_pcb.c 8.2 (Berkeley) 1/4/94 65 */ 66 67 #include <sys/cdefs.h> 68 __FBSDID("$FreeBSD$"); 69 70 #include "opt_inet.h" 71 #include "opt_inet6.h" 72 #include "opt_ipsec.h" 73 #include "opt_pcbgroup.h" 74 75 #include <sys/param.h> 76 #include <sys/systm.h> 77 #include <sys/malloc.h> 78 #include <sys/mbuf.h> 79 #include <sys/domain.h> 80 #include <sys/protosw.h> 81 #include <sys/socket.h> 82 #include <sys/socketvar.h> 83 #include <sys/sockio.h> 84 #include <sys/errno.h> 85 #include <sys/time.h> 86 #include <sys/priv.h> 87 #include <sys/proc.h> 88 #include <sys/jail.h> 89 90 #include <vm/uma.h> 91 92 #include <net/if.h> 93 #include <net/if_var.h> 94 #include <net/if_types.h> 95 #include <net/route.h> 96 97 #include <netinet/in.h> 98 #include <netinet/in_var.h> 99 #include <netinet/in_systm.h> 100 #include <netinet/tcp_var.h> 101 #include <netinet/ip6.h> 102 #include <netinet/ip_var.h> 103 104 #include <netinet6/ip6_var.h> 105 #include <netinet6/nd6.h> 106 #include <netinet/in_pcb.h> 107 #include <netinet6/in6_pcb.h> 108 #include <netinet6/scope6_var.h> 109 110 int 111 in6_pcbbind(register struct inpcb *inp, struct sockaddr *nam, 112 struct ucred *cred) 113 { 114 struct socket *so = inp->inp_socket; 115 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)NULL; 116 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 117 u_short lport = 0; 118 int error, lookupflags = 0; 119 int reuseport = (so->so_options & SO_REUSEPORT); 120 121 INP_WLOCK_ASSERT(inp); 122 INP_HASH_WLOCK_ASSERT(pcbinfo); 123 124 if (TAILQ_EMPTY(&V_in6_ifaddrhead)) /* XXX broken! */ 125 return (EADDRNOTAVAIL); 126 if (inp->inp_lport || !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) 127 return (EINVAL); 128 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) 129 lookupflags = INPLOOKUP_WILDCARD; 130 if (nam == NULL) { 131 if ((error = prison_local_ip6(cred, &inp->in6p_laddr, 132 ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0))) != 0) 133 return (error); 134 } else { 135 sin6 = (struct sockaddr_in6 *)nam; 136 if (nam->sa_len != sizeof(*sin6)) 137 return (EINVAL); 138 /* 139 * family check. 140 */ 141 if (nam->sa_family != AF_INET6) 142 return (EAFNOSUPPORT); 143 144 if ((error = sa6_embedscope(sin6, V_ip6_use_defzone)) != 0) 145 return(error); 146 147 if ((error = prison_local_ip6(cred, &sin6->sin6_addr, 148 ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0))) != 0) 149 return (error); 150 151 lport = sin6->sin6_port; 152 if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) { 153 /* 154 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast; 155 * allow compepte duplication of binding if 156 * SO_REUSEPORT is set, or if SO_REUSEADDR is set 157 * and a multicast address is bound on both 158 * new and duplicated sockets. 159 */ 160 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) != 0) 161 reuseport = SO_REUSEADDR|SO_REUSEPORT; 162 } else if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 163 struct ifaddr *ifa; 164 165 sin6->sin6_port = 0; /* yech... */ 166 if ((ifa = ifa_ifwithaddr((struct sockaddr *)sin6)) == 167 NULL && 168 (inp->inp_flags & INP_BINDANY) == 0) { 169 return (EADDRNOTAVAIL); 170 } 171 172 /* 173 * XXX: bind to an anycast address might accidentally 174 * cause sending a packet with anycast source address. 175 * We should allow to bind to a deprecated address, since 176 * the application dares to use it. 177 */ 178 if (ifa != NULL && 179 ((struct in6_ifaddr *)ifa)->ia6_flags & 180 (IN6_IFF_ANYCAST|IN6_IFF_NOTREADY|IN6_IFF_DETACHED)) { 181 ifa_free(ifa); 182 return (EADDRNOTAVAIL); 183 } 184 if (ifa != NULL) 185 ifa_free(ifa); 186 } 187 if (lport) { 188 struct inpcb *t; 189 struct tcptw *tw; 190 191 /* GROSS */ 192 if (ntohs(lport) <= V_ipport_reservedhigh && 193 ntohs(lport) >= V_ipport_reservedlow && 194 priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 195 0)) 196 return (EACCES); 197 if (!IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr) && 198 priv_check_cred(inp->inp_cred, 199 PRIV_NETINET_REUSEPORT, 0) != 0) { 200 t = in6_pcblookup_local(pcbinfo, 201 &sin6->sin6_addr, lport, 202 INPLOOKUP_WILDCARD, cred); 203 if (t && 204 ((t->inp_flags & INP_TIMEWAIT) == 0) && 205 (so->so_type != SOCK_STREAM || 206 IN6_IS_ADDR_UNSPECIFIED(&t->in6p_faddr)) && 207 (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) || 208 !IN6_IS_ADDR_UNSPECIFIED(&t->in6p_laddr) || 209 (t->inp_flags2 & INP_REUSEPORT) == 0) && 210 (inp->inp_cred->cr_uid != 211 t->inp_cred->cr_uid)) 212 return (EADDRINUSE); 213 #ifdef INET 214 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 && 215 IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 216 struct sockaddr_in sin; 217 218 in6_sin6_2_sin(&sin, sin6); 219 t = in_pcblookup_local(pcbinfo, 220 sin.sin_addr, lport, 221 INPLOOKUP_WILDCARD, cred); 222 if (t && 223 ((t->inp_flags & 224 INP_TIMEWAIT) == 0) && 225 (so->so_type != SOCK_STREAM || 226 ntohl(t->inp_faddr.s_addr) == 227 INADDR_ANY) && 228 (inp->inp_cred->cr_uid != 229 t->inp_cred->cr_uid)) 230 return (EADDRINUSE); 231 } 232 #endif 233 } 234 t = in6_pcblookup_local(pcbinfo, &sin6->sin6_addr, 235 lport, lookupflags, cred); 236 if (t && (t->inp_flags & INP_TIMEWAIT)) { 237 /* 238 * XXXRW: If an incpb has had its timewait 239 * state recycled, we treat the address as 240 * being in use (for now). This is better 241 * than a panic, but not desirable. 242 */ 243 tw = intotw(t); 244 if (tw == NULL || 245 (reuseport & tw->tw_so_options) == 0) 246 return (EADDRINUSE); 247 } else if (t && (reuseport & inp_so_options(t)) == 0) { 248 return (EADDRINUSE); 249 } 250 #ifdef INET 251 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0 && 252 IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) { 253 struct sockaddr_in sin; 254 255 in6_sin6_2_sin(&sin, sin6); 256 t = in_pcblookup_local(pcbinfo, sin.sin_addr, 257 lport, lookupflags, cred); 258 if (t && t->inp_flags & INP_TIMEWAIT) { 259 tw = intotw(t); 260 if (tw == NULL) 261 return (EADDRINUSE); 262 if ((reuseport & tw->tw_so_options) == 0 263 && (ntohl(t->inp_laddr.s_addr) != 264 INADDR_ANY || ((inp->inp_vflag & 265 INP_IPV6PROTO) == 266 (t->inp_vflag & INP_IPV6PROTO)))) 267 return (EADDRINUSE); 268 } else if (t && 269 (reuseport & inp_so_options(t)) == 0 && 270 (ntohl(t->inp_laddr.s_addr) != INADDR_ANY || 271 (t->inp_vflag & INP_IPV6PROTO) != 0)) 272 return (EADDRINUSE); 273 } 274 #endif 275 } 276 inp->in6p_laddr = sin6->sin6_addr; 277 } 278 if (lport == 0) { 279 if ((error = in6_pcbsetport(&inp->in6p_laddr, inp, cred)) != 0) { 280 /* Undo an address bind that may have occurred. */ 281 inp->in6p_laddr = in6addr_any; 282 return (error); 283 } 284 } else { 285 inp->inp_lport = lport; 286 if (in_pcbinshash(inp) != 0) { 287 inp->in6p_laddr = in6addr_any; 288 inp->inp_lport = 0; 289 return (EAGAIN); 290 } 291 } 292 return (0); 293 } 294 295 /* 296 * Transform old in6_pcbconnect() into an inner subroutine for new 297 * in6_pcbconnect(): Do some validity-checking on the remote 298 * address (in mbuf 'nam') and then determine local host address 299 * (i.e., which interface) to use to access that remote host. 300 * 301 * This preserves definition of in6_pcbconnect(), while supporting a 302 * slightly different version for T/TCP. (This is more than 303 * a bit of a kludge, but cleaning up the internal interfaces would 304 * have forced minor changes in every protocol). 305 */ 306 int 307 in6_pcbladdr(register struct inpcb *inp, struct sockaddr *nam, 308 struct in6_addr *plocal_addr6) 309 { 310 register struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; 311 int error = 0; 312 struct ifnet *ifp = NULL; 313 int scope_ambiguous = 0; 314 struct in6_addr in6a; 315 316 INP_WLOCK_ASSERT(inp); 317 INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo); /* XXXRW: why? */ 318 319 if (nam->sa_len != sizeof (*sin6)) 320 return (EINVAL); 321 if (sin6->sin6_family != AF_INET6) 322 return (EAFNOSUPPORT); 323 if (sin6->sin6_port == 0) 324 return (EADDRNOTAVAIL); 325 326 if (sin6->sin6_scope_id == 0 && !V_ip6_use_defzone) 327 scope_ambiguous = 1; 328 if ((error = sa6_embedscope(sin6, V_ip6_use_defzone)) != 0) 329 return(error); 330 331 if (!TAILQ_EMPTY(&V_in6_ifaddrhead)) { 332 /* 333 * If the destination address is UNSPECIFIED addr, 334 * use the loopback addr, e.g ::1. 335 */ 336 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) 337 sin6->sin6_addr = in6addr_loopback; 338 } 339 if ((error = prison_remote_ip6(inp->inp_cred, &sin6->sin6_addr)) != 0) 340 return (error); 341 342 error = in6_selectsrc(sin6, inp->in6p_outputopts, 343 inp, NULL, inp->inp_cred, &ifp, &in6a); 344 if (error) 345 return (error); 346 347 if (ifp && scope_ambiguous && 348 (error = in6_setscope(&sin6->sin6_addr, ifp, NULL)) != 0) { 349 return(error); 350 } 351 352 /* 353 * Do not update this earlier, in case we return with an error. 354 * 355 * XXX: this in6_selectsrc result might replace the bound local 356 * address with the address specified by setsockopt(IPV6_PKTINFO). 357 * Is it the intended behavior? 358 */ 359 *plocal_addr6 = in6a; 360 361 /* 362 * Don't do pcblookup call here; return interface in 363 * plocal_addr6 364 * and exit to caller, that will do the lookup. 365 */ 366 367 return (0); 368 } 369 370 /* 371 * Outer subroutine: 372 * Connect from a socket to a specified address. 373 * Both address and port must be specified in argument sin. 374 * If don't have a local address for this socket yet, 375 * then pick one. 376 */ 377 int 378 in6_pcbconnect_mbuf(register struct inpcb *inp, struct sockaddr *nam, 379 struct ucred *cred, struct mbuf *m) 380 { 381 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 382 register struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; 383 struct in6_addr addr6; 384 int error; 385 386 INP_WLOCK_ASSERT(inp); 387 INP_HASH_WLOCK_ASSERT(pcbinfo); 388 389 /* 390 * Call inner routine, to assign local interface address. 391 * in6_pcbladdr() may automatically fill in sin6_scope_id. 392 */ 393 if ((error = in6_pcbladdr(inp, nam, &addr6)) != 0) 394 return (error); 395 396 if (in6_pcblookup_hash_locked(pcbinfo, &sin6->sin6_addr, 397 sin6->sin6_port, 398 IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) 399 ? &addr6 : &inp->in6p_laddr, 400 inp->inp_lport, 0, NULL) != NULL) { 401 return (EADDRINUSE); 402 } 403 if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { 404 if (inp->inp_lport == 0) { 405 error = in6_pcbbind(inp, (struct sockaddr *)0, cred); 406 if (error) 407 return (error); 408 } 409 inp->in6p_laddr = addr6; 410 } 411 inp->in6p_faddr = sin6->sin6_addr; 412 inp->inp_fport = sin6->sin6_port; 413 /* update flowinfo - draft-itojun-ipv6-flowlabel-api-00 */ 414 inp->inp_flow &= ~IPV6_FLOWLABEL_MASK; 415 if (inp->inp_flags & IN6P_AUTOFLOWLABEL) 416 inp->inp_flow |= 417 (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK); 418 419 in_pcbrehash_mbuf(inp, m); 420 421 return (0); 422 } 423 424 int 425 in6_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred) 426 { 427 428 return (in6_pcbconnect_mbuf(inp, nam, cred, NULL)); 429 } 430 431 void 432 in6_pcbdisconnect(struct inpcb *inp) 433 { 434 435 INP_WLOCK_ASSERT(inp); 436 INP_HASH_WLOCK_ASSERT(inp->inp_pcbinfo); 437 438 bzero((caddr_t)&inp->in6p_faddr, sizeof(inp->in6p_faddr)); 439 inp->inp_fport = 0; 440 /* clear flowinfo - draft-itojun-ipv6-flowlabel-api-00 */ 441 inp->inp_flow &= ~IPV6_FLOWLABEL_MASK; 442 in_pcbrehash(inp); 443 } 444 445 struct sockaddr * 446 in6_sockaddr(in_port_t port, struct in6_addr *addr_p) 447 { 448 struct sockaddr_in6 *sin6; 449 450 sin6 = malloc(sizeof *sin6, M_SONAME, M_WAITOK); 451 bzero(sin6, sizeof *sin6); 452 sin6->sin6_family = AF_INET6; 453 sin6->sin6_len = sizeof(*sin6); 454 sin6->sin6_port = port; 455 sin6->sin6_addr = *addr_p; 456 (void)sa6_recoverscope(sin6); /* XXX: should catch errors */ 457 458 return (struct sockaddr *)sin6; 459 } 460 461 struct sockaddr * 462 in6_v4mapsin6_sockaddr(in_port_t port, struct in_addr *addr_p) 463 { 464 struct sockaddr_in sin; 465 struct sockaddr_in6 *sin6_p; 466 467 bzero(&sin, sizeof sin); 468 sin.sin_family = AF_INET; 469 sin.sin_len = sizeof(sin); 470 sin.sin_port = port; 471 sin.sin_addr = *addr_p; 472 473 sin6_p = malloc(sizeof *sin6_p, M_SONAME, 474 M_WAITOK); 475 in6_sin_2_v4mapsin6(&sin, sin6_p); 476 477 return (struct sockaddr *)sin6_p; 478 } 479 480 int 481 in6_getsockaddr(struct socket *so, struct sockaddr **nam) 482 { 483 register struct inpcb *inp; 484 struct in6_addr addr; 485 in_port_t port; 486 487 inp = sotoinpcb(so); 488 KASSERT(inp != NULL, ("in6_getsockaddr: inp == NULL")); 489 490 INP_RLOCK(inp); 491 port = inp->inp_lport; 492 addr = inp->in6p_laddr; 493 INP_RUNLOCK(inp); 494 495 *nam = in6_sockaddr(port, &addr); 496 return 0; 497 } 498 499 int 500 in6_getpeeraddr(struct socket *so, struct sockaddr **nam) 501 { 502 struct inpcb *inp; 503 struct in6_addr addr; 504 in_port_t port; 505 506 inp = sotoinpcb(so); 507 KASSERT(inp != NULL, ("in6_getpeeraddr: inp == NULL")); 508 509 INP_RLOCK(inp); 510 port = inp->inp_fport; 511 addr = inp->in6p_faddr; 512 INP_RUNLOCK(inp); 513 514 *nam = in6_sockaddr(port, &addr); 515 return 0; 516 } 517 518 int 519 in6_mapped_sockaddr(struct socket *so, struct sockaddr **nam) 520 { 521 struct inpcb *inp; 522 int error; 523 524 inp = sotoinpcb(so); 525 KASSERT(inp != NULL, ("in6_mapped_sockaddr: inp == NULL")); 526 527 #ifdef INET 528 if ((inp->inp_vflag & (INP_IPV4 | INP_IPV6)) == INP_IPV4) { 529 error = in_getsockaddr(so, nam); 530 if (error == 0) 531 in6_sin_2_v4mapsin6_in_sock(nam); 532 } else 533 #endif 534 { 535 /* scope issues will be handled in in6_getsockaddr(). */ 536 error = in6_getsockaddr(so, nam); 537 } 538 539 return error; 540 } 541 542 int 543 in6_mapped_peeraddr(struct socket *so, struct sockaddr **nam) 544 { 545 struct inpcb *inp; 546 int error; 547 548 inp = sotoinpcb(so); 549 KASSERT(inp != NULL, ("in6_mapped_peeraddr: inp == NULL")); 550 551 #ifdef INET 552 if ((inp->inp_vflag & (INP_IPV4 | INP_IPV6)) == INP_IPV4) { 553 error = in_getpeeraddr(so, nam); 554 if (error == 0) 555 in6_sin_2_v4mapsin6_in_sock(nam); 556 } else 557 #endif 558 /* scope issues will be handled in in6_getpeeraddr(). */ 559 error = in6_getpeeraddr(so, nam); 560 561 return error; 562 } 563 564 /* 565 * Pass some notification to all connections of a protocol 566 * associated with address dst. The local address and/or port numbers 567 * may be specified to limit the search. The "usual action" will be 568 * taken, depending on the ctlinput cmd. The caller must filter any 569 * cmds that are uninteresting (e.g., no error in the map). 570 * Call the protocol specific routine (if any) to report 571 * any errors for each matching socket. 572 */ 573 void 574 in6_pcbnotify(struct inpcbinfo *pcbinfo, struct sockaddr *dst, 575 u_int fport_arg, const struct sockaddr *src, u_int lport_arg, 576 int cmd, void *cmdarg, 577 struct inpcb *(*notify)(struct inpcb *, int)) 578 { 579 struct inpcb *inp, *inp_temp; 580 struct sockaddr_in6 sa6_src, *sa6_dst; 581 u_short fport = fport_arg, lport = lport_arg; 582 u_int32_t flowinfo; 583 int errno; 584 585 if ((unsigned)cmd >= PRC_NCMDS || dst->sa_family != AF_INET6) 586 return; 587 588 sa6_dst = (struct sockaddr_in6 *)dst; 589 if (IN6_IS_ADDR_UNSPECIFIED(&sa6_dst->sin6_addr)) 590 return; 591 592 /* 593 * note that src can be NULL when we get notify by local fragmentation. 594 */ 595 sa6_src = (src == NULL) ? sa6_any : *(const struct sockaddr_in6 *)src; 596 flowinfo = sa6_src.sin6_flowinfo; 597 598 /* 599 * Redirects go to all references to the destination, 600 * and use in6_rtchange to invalidate the route cache. 601 * Dead host indications: also use in6_rtchange to invalidate 602 * the cache, and deliver the error to all the sockets. 603 * Otherwise, if we have knowledge of the local port and address, 604 * deliver only to that socket. 605 */ 606 if (PRC_IS_REDIRECT(cmd) || cmd == PRC_HOSTDEAD) { 607 fport = 0; 608 lport = 0; 609 bzero((caddr_t)&sa6_src.sin6_addr, sizeof(sa6_src.sin6_addr)); 610 611 if (cmd != PRC_HOSTDEAD) 612 notify = in6_rtchange; 613 } 614 errno = inet6ctlerrmap[cmd]; 615 INP_INFO_WLOCK(pcbinfo); 616 LIST_FOREACH_SAFE(inp, pcbinfo->ipi_listhead, inp_list, inp_temp) { 617 INP_WLOCK(inp); 618 if ((inp->inp_vflag & INP_IPV6) == 0) { 619 INP_WUNLOCK(inp); 620 continue; 621 } 622 623 /* 624 * If the error designates a new path MTU for a destination 625 * and the application (associated with this socket) wanted to 626 * know the value, notify. Note that we notify for all 627 * disconnected sockets if the corresponding application 628 * wanted. This is because some UDP applications keep sending 629 * sockets disconnected. 630 * XXX: should we avoid to notify the value to TCP sockets? 631 */ 632 if (cmd == PRC_MSGSIZE && (inp->inp_flags & IN6P_MTU) != 0 && 633 (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) || 634 IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, &sa6_dst->sin6_addr))) { 635 ip6_notify_pmtu(inp, (struct sockaddr_in6 *)dst, 636 (u_int32_t *)cmdarg); 637 } 638 639 /* 640 * Detect if we should notify the error. If no source and 641 * destination ports are specifed, but non-zero flowinfo and 642 * local address match, notify the error. This is the case 643 * when the error is delivered with an encrypted buffer 644 * by ESP. Otherwise, just compare addresses and ports 645 * as usual. 646 */ 647 if (lport == 0 && fport == 0 && flowinfo && 648 inp->inp_socket != NULL && 649 flowinfo == (inp->inp_flow & IPV6_FLOWLABEL_MASK) && 650 IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, &sa6_src.sin6_addr)) 651 goto do_notify; 652 else if (!IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, 653 &sa6_dst->sin6_addr) || 654 inp->inp_socket == 0 || 655 (lport && inp->inp_lport != lport) || 656 (!IN6_IS_ADDR_UNSPECIFIED(&sa6_src.sin6_addr) && 657 !IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, 658 &sa6_src.sin6_addr)) || 659 (fport && inp->inp_fport != fport)) { 660 INP_WUNLOCK(inp); 661 continue; 662 } 663 664 do_notify: 665 if (notify) { 666 if ((*notify)(inp, errno)) 667 INP_WUNLOCK(inp); 668 } else 669 INP_WUNLOCK(inp); 670 } 671 INP_INFO_WUNLOCK(pcbinfo); 672 } 673 674 /* 675 * Lookup a PCB based on the local address and port. Caller must hold the 676 * hash lock. No inpcb locks or references are acquired. 677 */ 678 struct inpcb * 679 in6_pcblookup_local(struct inpcbinfo *pcbinfo, struct in6_addr *laddr, 680 u_short lport, int lookupflags, struct ucred *cred) 681 { 682 register struct inpcb *inp; 683 int matchwild = 3, wildcard; 684 685 KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0, 686 ("%s: invalid lookup flags %d", __func__, lookupflags)); 687 688 INP_HASH_WLOCK_ASSERT(pcbinfo); 689 690 if ((lookupflags & INPLOOKUP_WILDCARD) == 0) { 691 struct inpcbhead *head; 692 /* 693 * Look for an unconnected (wildcard foreign addr) PCB that 694 * matches the local address and port we're looking for. 695 */ 696 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 697 0, pcbinfo->ipi_hashmask)]; 698 LIST_FOREACH(inp, head, inp_hash) { 699 /* XXX inp locking */ 700 if ((inp->inp_vflag & INP_IPV6) == 0) 701 continue; 702 if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) && 703 IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) && 704 inp->inp_lport == lport) { 705 /* Found. */ 706 if (cred == NULL || 707 prison_equal_ip6(cred->cr_prison, 708 inp->inp_cred->cr_prison)) 709 return (inp); 710 } 711 } 712 /* 713 * Not found. 714 */ 715 return (NULL); 716 } else { 717 struct inpcbporthead *porthash; 718 struct inpcbport *phd; 719 struct inpcb *match = NULL; 720 /* 721 * Best fit PCB lookup. 722 * 723 * First see if this local port is in use by looking on the 724 * port hash list. 725 */ 726 porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport, 727 pcbinfo->ipi_porthashmask)]; 728 LIST_FOREACH(phd, porthash, phd_hash) { 729 if (phd->phd_port == lport) 730 break; 731 } 732 if (phd != NULL) { 733 /* 734 * Port is in use by one or more PCBs. Look for best 735 * fit. 736 */ 737 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) { 738 wildcard = 0; 739 if (cred != NULL && 740 !prison_equal_ip6(cred->cr_prison, 741 inp->inp_cred->cr_prison)) 742 continue; 743 /* XXX inp locking */ 744 if ((inp->inp_vflag & INP_IPV6) == 0) 745 continue; 746 if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) 747 wildcard++; 748 if (!IN6_IS_ADDR_UNSPECIFIED( 749 &inp->in6p_laddr)) { 750 if (IN6_IS_ADDR_UNSPECIFIED(laddr)) 751 wildcard++; 752 else if (!IN6_ARE_ADDR_EQUAL( 753 &inp->in6p_laddr, laddr)) 754 continue; 755 } else { 756 if (!IN6_IS_ADDR_UNSPECIFIED(laddr)) 757 wildcard++; 758 } 759 if (wildcard < matchwild) { 760 match = inp; 761 matchwild = wildcard; 762 if (matchwild == 0) 763 break; 764 } 765 } 766 } 767 return (match); 768 } 769 } 770 771 void 772 in6_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp) 773 { 774 struct inpcb *in6p; 775 struct ip6_moptions *im6o; 776 int i, gap; 777 778 INP_INFO_RLOCK(pcbinfo); 779 LIST_FOREACH(in6p, pcbinfo->ipi_listhead, inp_list) { 780 INP_WLOCK(in6p); 781 im6o = in6p->in6p_moptions; 782 if ((in6p->inp_vflag & INP_IPV6) && im6o != NULL) { 783 /* 784 * Unselect the outgoing ifp for multicast if it 785 * is being detached. 786 */ 787 if (im6o->im6o_multicast_ifp == ifp) 788 im6o->im6o_multicast_ifp = NULL; 789 /* 790 * Drop multicast group membership if we joined 791 * through the interface being detached. 792 */ 793 gap = 0; 794 for (i = 0; i < im6o->im6o_num_memberships; i++) { 795 if (im6o->im6o_membership[i]->in6m_ifp == 796 ifp) { 797 in6_mc_leave(im6o->im6o_membership[i], 798 NULL); 799 gap++; 800 } else if (gap != 0) { 801 im6o->im6o_membership[i - gap] = 802 im6o->im6o_membership[i]; 803 } 804 } 805 im6o->im6o_num_memberships -= gap; 806 } 807 INP_WUNLOCK(in6p); 808 } 809 INP_INFO_RUNLOCK(pcbinfo); 810 } 811 812 /* 813 * Check for alternatives when higher level complains 814 * about service problems. For now, invalidate cached 815 * routing information. If the route was created dynamically 816 * (by a redirect), time to try a default gateway again. 817 */ 818 void 819 in6_losing(struct inpcb *in6p) 820 { 821 822 /* 823 * We don't store route pointers in the routing table anymore 824 */ 825 return; 826 } 827 828 /* 829 * After a routing change, flush old routing 830 * and allocate a (hopefully) better one. 831 */ 832 struct inpcb * 833 in6_rtchange(struct inpcb *inp, int errno) 834 { 835 /* 836 * We don't store route pointers in the routing table anymore 837 */ 838 return inp; 839 } 840 841 #ifdef PCBGROUP 842 /* 843 * Lookup PCB in hash list, using pcbgroup tables. 844 */ 845 static struct inpcb * 846 in6_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup, 847 struct in6_addr *faddr, u_int fport_arg, struct in6_addr *laddr, 848 u_int lport_arg, int lookupflags, struct ifnet *ifp) 849 { 850 struct inpcbhead *head; 851 struct inpcb *inp, *tmpinp; 852 u_short fport = fport_arg, lport = lport_arg; 853 int faith; 854 855 if (faithprefix_p != NULL) 856 faith = (*faithprefix_p)(laddr); 857 else 858 faith = 0; 859 860 /* 861 * First look for an exact match. 862 */ 863 tmpinp = NULL; 864 INP_GROUP_LOCK(pcbgroup); 865 head = &pcbgroup->ipg_hashbase[ 866 INP_PCBHASH(faddr->s6_addr32[3] /* XXX */, lport, fport, 867 pcbgroup->ipg_hashmask)]; 868 LIST_FOREACH(inp, head, inp_pcbgrouphash) { 869 /* XXX inp locking */ 870 if ((inp->inp_vflag & INP_IPV6) == 0) 871 continue; 872 if (IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, faddr) && 873 IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) && 874 inp->inp_fport == fport && 875 inp->inp_lport == lport) { 876 /* 877 * XXX We should be able to directly return 878 * the inp here, without any checks. 879 * Well unless both bound with SO_REUSEPORT? 880 */ 881 if (prison_flag(inp->inp_cred, PR_IP6)) 882 goto found; 883 if (tmpinp == NULL) 884 tmpinp = inp; 885 } 886 } 887 if (tmpinp != NULL) { 888 inp = tmpinp; 889 goto found; 890 } 891 892 /* 893 * Then look for a wildcard match, if requested. 894 */ 895 if ((lookupflags & INPLOOKUP_WILDCARD) != 0) { 896 struct inpcb *local_wild = NULL, *local_exact = NULL; 897 struct inpcb *jail_wild = NULL; 898 int injail; 899 900 /* 901 * Order of socket selection - we always prefer jails. 902 * 1. jailed, non-wild. 903 * 2. jailed, wild. 904 * 3. non-jailed, non-wild. 905 * 4. non-jailed, wild. 906 */ 907 head = &pcbinfo->ipi_wildbase[INP_PCBHASH(INADDR_ANY, lport, 908 0, pcbinfo->ipi_wildmask)]; 909 LIST_FOREACH(inp, head, inp_pcbgroup_wild) { 910 /* XXX inp locking */ 911 if ((inp->inp_vflag & INP_IPV6) == 0) 912 continue; 913 914 if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) || 915 inp->inp_lport != lport) { 916 continue; 917 } 918 919 /* XXX inp locking */ 920 if (faith && (inp->inp_flags & INP_FAITH) == 0) 921 continue; 922 923 injail = prison_flag(inp->inp_cred, PR_IP6); 924 if (injail) { 925 if (prison_check_ip6(inp->inp_cred, 926 laddr) != 0) 927 continue; 928 } else { 929 if (local_exact != NULL) 930 continue; 931 } 932 933 if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr)) { 934 if (injail) 935 goto found; 936 else 937 local_exact = inp; 938 } else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { 939 if (injail) 940 jail_wild = inp; 941 else 942 local_wild = inp; 943 } 944 } /* LIST_FOREACH */ 945 946 inp = jail_wild; 947 if (inp == NULL) 948 inp = jail_wild; 949 if (inp == NULL) 950 inp = local_exact; 951 if (inp == NULL) 952 inp = local_wild; 953 if (inp != NULL) 954 goto found; 955 } /* if ((lookupflags & INPLOOKUP_WILDCARD) != 0) */ 956 INP_GROUP_UNLOCK(pcbgroup); 957 return (NULL); 958 959 found: 960 in_pcbref(inp); 961 INP_GROUP_UNLOCK(pcbgroup); 962 if (lookupflags & INPLOOKUP_WLOCKPCB) { 963 INP_WLOCK(inp); 964 if (in_pcbrele_wlocked(inp)) 965 return (NULL); 966 } else if (lookupflags & INPLOOKUP_RLOCKPCB) { 967 INP_RLOCK(inp); 968 if (in_pcbrele_rlocked(inp)) 969 return (NULL); 970 } else 971 panic("%s: locking buf", __func__); 972 return (inp); 973 } 974 #endif /* PCBGROUP */ 975 976 /* 977 * Lookup PCB in hash list. 978 */ 979 struct inpcb * 980 in6_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, 981 u_int fport_arg, struct in6_addr *laddr, u_int lport_arg, 982 int lookupflags, struct ifnet *ifp) 983 { 984 struct inpcbhead *head; 985 struct inpcb *inp, *tmpinp; 986 u_short fport = fport_arg, lport = lport_arg; 987 int faith; 988 989 KASSERT((lookupflags & ~(INPLOOKUP_WILDCARD)) == 0, 990 ("%s: invalid lookup flags %d", __func__, lookupflags)); 991 992 INP_HASH_LOCK_ASSERT(pcbinfo); 993 994 if (faithprefix_p != NULL) 995 faith = (*faithprefix_p)(laddr); 996 else 997 faith = 0; 998 999 /* 1000 * First look for an exact match. 1001 */ 1002 tmpinp = NULL; 1003 head = &pcbinfo->ipi_hashbase[ 1004 INP_PCBHASH(faddr->s6_addr32[3] /* XXX */, lport, fport, 1005 pcbinfo->ipi_hashmask)]; 1006 LIST_FOREACH(inp, head, inp_hash) { 1007 /* XXX inp locking */ 1008 if ((inp->inp_vflag & INP_IPV6) == 0) 1009 continue; 1010 if (IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, faddr) && 1011 IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) && 1012 inp->inp_fport == fport && 1013 inp->inp_lport == lport) { 1014 /* 1015 * XXX We should be able to directly return 1016 * the inp here, without any checks. 1017 * Well unless both bound with SO_REUSEPORT? 1018 */ 1019 if (prison_flag(inp->inp_cred, PR_IP6)) 1020 return (inp); 1021 if (tmpinp == NULL) 1022 tmpinp = inp; 1023 } 1024 } 1025 if (tmpinp != NULL) 1026 return (tmpinp); 1027 1028 /* 1029 * Then look for a wildcard match, if requested. 1030 */ 1031 if ((lookupflags & INPLOOKUP_WILDCARD) != 0) { 1032 struct inpcb *local_wild = NULL, *local_exact = NULL; 1033 struct inpcb *jail_wild = NULL; 1034 int injail; 1035 1036 /* 1037 * Order of socket selection - we always prefer jails. 1038 * 1. jailed, non-wild. 1039 * 2. jailed, wild. 1040 * 3. non-jailed, non-wild. 1041 * 4. non-jailed, wild. 1042 */ 1043 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport, 1044 0, pcbinfo->ipi_hashmask)]; 1045 LIST_FOREACH(inp, head, inp_hash) { 1046 /* XXX inp locking */ 1047 if ((inp->inp_vflag & INP_IPV6) == 0) 1048 continue; 1049 1050 if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) || 1051 inp->inp_lport != lport) { 1052 continue; 1053 } 1054 1055 /* XXX inp locking */ 1056 if (faith && (inp->inp_flags & INP_FAITH) == 0) 1057 continue; 1058 1059 injail = prison_flag(inp->inp_cred, PR_IP6); 1060 if (injail) { 1061 if (prison_check_ip6(inp->inp_cred, 1062 laddr) != 0) 1063 continue; 1064 } else { 1065 if (local_exact != NULL) 1066 continue; 1067 } 1068 1069 if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr)) { 1070 if (injail) 1071 return (inp); 1072 else 1073 local_exact = inp; 1074 } else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { 1075 if (injail) 1076 jail_wild = inp; 1077 else 1078 local_wild = inp; 1079 } 1080 } /* LIST_FOREACH */ 1081 1082 if (jail_wild != NULL) 1083 return (jail_wild); 1084 if (local_exact != NULL) 1085 return (local_exact); 1086 if (local_wild != NULL) 1087 return (local_wild); 1088 } /* if ((lookupflags & INPLOOKUP_WILDCARD) != 0) */ 1089 1090 /* 1091 * Not found. 1092 */ 1093 return (NULL); 1094 } 1095 1096 /* 1097 * Lookup PCB in hash list, using pcbinfo tables. This variation locks the 1098 * hash list lock, and will return the inpcb locked (i.e., requires 1099 * INPLOOKUP_LOCKPCB). 1100 */ 1101 static struct inpcb * 1102 in6_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, 1103 u_int fport, struct in6_addr *laddr, u_int lport, int lookupflags, 1104 struct ifnet *ifp) 1105 { 1106 struct inpcb *inp; 1107 1108 INP_HASH_RLOCK(pcbinfo); 1109 inp = in6_pcblookup_hash_locked(pcbinfo, faddr, fport, laddr, lport, 1110 (lookupflags & ~(INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)), ifp); 1111 if (inp != NULL) { 1112 in_pcbref(inp); 1113 INP_HASH_RUNLOCK(pcbinfo); 1114 if (lookupflags & INPLOOKUP_WLOCKPCB) { 1115 INP_WLOCK(inp); 1116 if (in_pcbrele_wlocked(inp)) 1117 return (NULL); 1118 } else if (lookupflags & INPLOOKUP_RLOCKPCB) { 1119 INP_RLOCK(inp); 1120 if (in_pcbrele_rlocked(inp)) 1121 return (NULL); 1122 } else 1123 panic("%s: locking bug", __func__); 1124 } else 1125 INP_HASH_RUNLOCK(pcbinfo); 1126 return (inp); 1127 } 1128 1129 /* 1130 * Public inpcb lookup routines, accepting a 4-tuple, and optionally, an mbuf 1131 * from which a pre-calculated hash value may be extracted. 1132 * 1133 * Possibly more of this logic should be in in6_pcbgroup.c. 1134 */ 1135 struct inpcb * 1136 in6_pcblookup(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, u_int fport, 1137 struct in6_addr *laddr, u_int lport, int lookupflags, struct ifnet *ifp) 1138 { 1139 #if defined(PCBGROUP) 1140 struct inpcbgroup *pcbgroup; 1141 #endif 1142 1143 KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0, 1144 ("%s: invalid lookup flags %d", __func__, lookupflags)); 1145 KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0, 1146 ("%s: LOCKPCB not set", __func__)); 1147 1148 #if defined(PCBGROUP) 1149 if (in_pcbgroup_enabled(pcbinfo)) { 1150 pcbgroup = in6_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr, 1151 fport); 1152 return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr, fport, 1153 laddr, lport, lookupflags, ifp)); 1154 } 1155 #endif 1156 return (in6_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport, 1157 lookupflags, ifp)); 1158 } 1159 1160 struct inpcb * 1161 in6_pcblookup_mbuf(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, 1162 u_int fport, struct in6_addr *laddr, u_int lport, int lookupflags, 1163 struct ifnet *ifp, struct mbuf *m) 1164 { 1165 #ifdef PCBGROUP 1166 struct inpcbgroup *pcbgroup; 1167 #endif 1168 1169 KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0, 1170 ("%s: invalid lookup flags %d", __func__, lookupflags)); 1171 KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0, 1172 ("%s: LOCKPCB not set", __func__)); 1173 1174 #ifdef PCBGROUP 1175 if (in_pcbgroup_enabled(pcbinfo)) { 1176 pcbgroup = in6_pcbgroup_byhash(pcbinfo, M_HASHTYPE_GET(m), 1177 m->m_pkthdr.flowid); 1178 if (pcbgroup != NULL) 1179 return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr, 1180 fport, laddr, lport, lookupflags, ifp)); 1181 pcbgroup = in6_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr, 1182 fport); 1183 return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr, fport, 1184 laddr, lport, lookupflags, ifp)); 1185 } 1186 #endif 1187 return (in6_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport, 1188 lookupflags, ifp)); 1189 } 1190 1191 void 1192 init_sin6(struct sockaddr_in6 *sin6, struct mbuf *m) 1193 { 1194 struct ip6_hdr *ip; 1195 1196 ip = mtod(m, struct ip6_hdr *); 1197 bzero(sin6, sizeof(*sin6)); 1198 sin6->sin6_len = sizeof(*sin6); 1199 sin6->sin6_family = AF_INET6; 1200 sin6->sin6_addr = ip->ip6_src; 1201 1202 (void)sa6_recoverscope(sin6); /* XXX: should catch errors... */ 1203 1204 return; 1205 } 1206