1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. Neither the name of the project nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 22 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 29 * SUCH DAMAGE. 30 * 31 * $KAME: in6_src.c,v 1.132 2003/08/26 04:42:27 keiichi Exp $ 32 */ 33 34 /*- 35 * Copyright (c) 1982, 1986, 1991, 1993 36 * The Regents of the University of California. All rights reserved. 37 * 38 * Redistribution and use in source and binary forms, with or without 39 * modification, are permitted provided that the following conditions 40 * are met: 41 * 1. Redistributions of source code must retain the above copyright 42 * notice, this list of conditions and the following disclaimer. 43 * 2. Redistributions in binary form must reproduce the above copyright 44 * notice, this list of conditions and the following disclaimer in the 45 * documentation and/or other materials provided with the distribution. 46 * 3. Neither the name of the University nor the names of its contributors 47 * may be used to endorse or promote products derived from this software 48 * without specific prior written permission. 49 * 50 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 51 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 52 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 53 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 54 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 55 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 56 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 57 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 58 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 59 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 60 * SUCH DAMAGE. 61 * 62 * @(#)in_pcb.c 8.2 (Berkeley) 1/4/94 63 */ 64 65 #include <sys/cdefs.h> 66 #include "opt_inet.h" 67 #include "opt_inet6.h" 68 #include <sys/param.h> 69 #include <sys/systm.h> 70 #include <sys/lock.h> 71 #include <sys/malloc.h> 72 #include <sys/mbuf.h> 73 #include <sys/priv.h> 74 #include <sys/protosw.h> 75 #include <sys/socket.h> 76 #include <sys/socketvar.h> 77 #include <sys/sockio.h> 78 #include <sys/sysctl.h> 79 #include <sys/errno.h> 80 #include <sys/time.h> 81 #include <sys/jail.h> 82 #include <sys/kernel.h> 83 #include <sys/rmlock.h> 84 #include <sys/sx.h> 85 86 #include <net/if.h> 87 #include <net/if_var.h> 88 #include <net/if_dl.h> 89 #include <net/if_private.h> 90 #include <net/route.h> 91 #include <net/route/nhop.h> 92 #include <net/if_llatbl.h> 93 94 #include <netinet/in.h> 95 #include <netinet/in_var.h> 96 #include <netinet/in_systm.h> 97 #include <netinet/ip.h> 98 #include <netinet/in_pcb.h> 99 #include <netinet/ip_var.h> 100 #include <netinet/udp.h> 101 #include <netinet/udp_var.h> 102 103 #include <netinet6/in6_var.h> 104 #include <netinet/ip6.h> 105 #include <netinet6/in6_fib.h> 106 #include <netinet6/in6_pcb.h> 107 #include <netinet6/ip6_var.h> 108 #include <netinet6/scope6_var.h> 109 #include <netinet6/nd6.h> 110 111 static struct mtx addrsel_lock; 112 #define ADDRSEL_LOCK_INIT() mtx_init(&addrsel_lock, "addrsel_lock", NULL, MTX_DEF) 113 #define ADDRSEL_LOCK() mtx_lock(&addrsel_lock) 114 #define ADDRSEL_UNLOCK() mtx_unlock(&addrsel_lock) 115 #define ADDRSEL_LOCK_ASSERT() mtx_assert(&addrsel_lock, MA_OWNED) 116 117 static struct sx addrsel_sxlock; 118 #define ADDRSEL_SXLOCK_INIT() sx_init(&addrsel_sxlock, "addrsel_sxlock") 119 #define ADDRSEL_SLOCK() sx_slock(&addrsel_sxlock) 120 #define ADDRSEL_SUNLOCK() sx_sunlock(&addrsel_sxlock) 121 #define ADDRSEL_XLOCK() sx_xlock(&addrsel_sxlock) 122 #define ADDRSEL_XUNLOCK() sx_xunlock(&addrsel_sxlock) 123 124 #define ADDR_LABEL_NOTAPP (-1) 125 VNET_DEFINE_STATIC(struct in6_addrpolicy, defaultaddrpolicy); 126 #define V_defaultaddrpolicy VNET(defaultaddrpolicy) 127 128 VNET_DEFINE(int, ip6_prefer_tempaddr) = 0; 129 130 static int selectroute(struct sockaddr_in6 *, struct ip6_pktopts *, 131 struct ip6_moptions *, struct route_in6 *, struct ifnet **, 132 struct nhop_object **, int, u_int, uint32_t); 133 static int in6_selectif(struct sockaddr_in6 *, struct ip6_pktopts *, 134 struct ip6_moptions *, struct ifnet **, 135 struct ifnet *, u_int); 136 static int in6_selectsrc(uint32_t, struct sockaddr_in6 *, 137 struct ip6_pktopts *, struct inpcb *, struct ucred *, 138 struct ifnet **, struct in6_addr *); 139 140 static struct in6_addrpolicy *lookup_addrsel_policy(struct sockaddr_in6 *); 141 142 static void init_policy_queue(void); 143 static int add_addrsel_policyent(struct in6_addrpolicy *); 144 static int delete_addrsel_policyent(struct in6_addrpolicy *); 145 static int walk_addrsel_policy(int (*)(struct in6_addrpolicy *, void *), 146 void *); 147 static int dump_addrsel_policyent(struct in6_addrpolicy *, void *); 148 static struct in6_addrpolicy *match_addrsel_policy(struct sockaddr_in6 *); 149 150 /* 151 * Return an IPv6 address, which is the most appropriate for a given 152 * destination and user specified options. 153 * If necessary, this function lookups the routing table and returns 154 * an entry to the caller for later use. 155 */ 156 #define REPLACE(r) do {\ 157 IP6STAT_INC(ip6s_sources_rule[(r)]); \ 158 /* { \ 159 char ip6buf[INET6_ADDRSTRLEN], ip6b[INET6_ADDRSTRLEN]; \ 160 printf("in6_selectsrc: replace %s with %s by %d\n", ia_best ? ip6_sprintf(ip6buf, &ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(ip6b, &ia->ia_addr.sin6_addr), (r)); \ 161 } */ \ 162 goto replace; \ 163 } while(0) 164 #define NEXT(r) do {\ 165 /* { \ 166 char ip6buf[INET6_ADDRSTRLEN], ip6b[INET6_ADDRSTRLEN]; \ 167 printf("in6_selectsrc: keep %s against %s by %d\n", ia_best ? ip6_sprintf(ip6buf, &ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(ip6b, &ia->ia_addr.sin6_addr), (r)); \ 168 } */ \ 169 goto next; /* XXX: we can't use 'continue' here */ \ 170 } while(0) 171 #define BREAK(r) do { \ 172 IP6STAT_INC(ip6s_sources_rule[(r)]); \ 173 goto out; /* XXX: we can't use 'break' here */ \ 174 } while(0) 175 176 static int 177 in6_selectsrc(uint32_t fibnum, struct sockaddr_in6 *dstsock, 178 struct ip6_pktopts *opts, struct inpcb *inp, struct ucred *cred, 179 struct ifnet **ifpp, struct in6_addr *srcp) 180 { 181 struct rm_priotracker in6_ifa_tracker; 182 struct in6_addr dst, tmp; 183 struct ifnet *ifp = NULL, *oifp = NULL; 184 struct in6_ifaddr *ia = NULL, *ia_best = NULL; 185 struct in6_pktinfo *pi = NULL; 186 int dst_scope = -1, best_scope = -1, best_matchlen = -1; 187 struct in6_addrpolicy *dst_policy = NULL, *best_policy = NULL; 188 u_int32_t odstzone; 189 int prefer_tempaddr; 190 int error; 191 struct ip6_moptions *mopts; 192 193 NET_EPOCH_ASSERT(); 194 KASSERT(srcp != NULL, ("%s: srcp is NULL", __func__)); 195 196 dst = dstsock->sin6_addr; /* make a copy for local operation */ 197 if (ifpp) { 198 /* 199 * Save a possibly passed in ifp for in6_selectsrc. Only 200 * neighbor discovery code should use this feature, where 201 * we may know the interface but not the FIB number holding 202 * the connected subnet in case someone deleted it from the 203 * default FIB and we need to check the interface. 204 */ 205 if (*ifpp != NULL) 206 oifp = *ifpp; 207 *ifpp = NULL; 208 } 209 210 if (inp != NULL) { 211 INP_LOCK_ASSERT(inp); 212 mopts = inp->in6p_moptions; 213 } else { 214 mopts = NULL; 215 } 216 217 /* 218 * If the source address is explicitly specified by the caller, 219 * check if the requested source address is indeed a unicast address 220 * assigned to the node, and can be used as the packet's source 221 * address. If everything is okay, use the address as source. 222 */ 223 if (opts && (pi = opts->ip6po_pktinfo) && 224 !IN6_IS_ADDR_UNSPECIFIED(&pi->ipi6_addr)) { 225 /* get the outgoing interface */ 226 if ((error = in6_selectif(dstsock, opts, mopts, &ifp, oifp, 227 fibnum)) 228 != 0) 229 return (error); 230 231 /* 232 * determine the appropriate zone id of the source based on 233 * the zone of the destination and the outgoing interface. 234 * If the specified address is ambiguous wrt the scope zone, 235 * the interface must be specified; otherwise, ifa_ifwithaddr() 236 * will fail matching the address. 237 */ 238 tmp = pi->ipi6_addr; 239 if (ifp) { 240 error = in6_setscope(&tmp, ifp, &odstzone); 241 if (error) 242 return (error); 243 } 244 if (cred != NULL && (error = prison_local_ip6(cred, 245 &tmp, (inp->inp_flags & IN6P_IPV6_V6ONLY) != 0)) != 0) 246 return (error); 247 248 /* 249 * If IPV6_BINDANY socket option is set, we allow to specify 250 * non local addresses as source address in IPV6_PKTINFO 251 * ancillary data. 252 */ 253 if ((inp->inp_flags & INP_BINDANY) == 0) { 254 ia = in6ifa_ifwithaddr(&tmp, 0 /* XXX */, false); 255 if (ia == NULL || (ia->ia6_flags & (IN6_IFF_ANYCAST | 256 IN6_IFF_NOTREADY))) 257 return (EADDRNOTAVAIL); 258 bcopy(&ia->ia_addr.sin6_addr, srcp, sizeof(*srcp)); 259 } else 260 bcopy(&tmp, srcp, sizeof(*srcp)); 261 pi->ipi6_addr = tmp; /* XXX: this overrides pi */ 262 if (ifpp) 263 *ifpp = ifp; 264 return (0); 265 } 266 267 /* 268 * Otherwise, if the socket has already bound the source, just use it. 269 */ 270 if (inp != NULL && !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { 271 if (cred != NULL && 272 (error = prison_local_ip6(cred, &inp->in6p_laddr, 273 ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0))) != 0) 274 return (error); 275 bcopy(&inp->in6p_laddr, srcp, sizeof(*srcp)); 276 return (0); 277 } 278 279 /* 280 * Bypass source address selection and use the primary jail IP 281 * if requested. 282 */ 283 if (cred != NULL && !prison_saddrsel_ip6(cred, srcp)) 284 return (0); 285 286 /* 287 * If the address is not specified, choose the best one based on 288 * the outgoing interface and the destination address. 289 */ 290 /* get the outgoing interface */ 291 if ((error = in6_selectif(dstsock, opts, mopts, &ifp, oifp, 292 (inp != NULL) ? inp->inp_inc.inc_fibnum : fibnum)) != 0) 293 return (error); 294 295 #ifdef DIAGNOSTIC 296 if (ifp == NULL) /* this should not happen */ 297 panic("in6_selectsrc: NULL ifp"); 298 #endif 299 error = in6_setscope(&dst, ifp, &odstzone); 300 if (error) 301 return (error); 302 303 IN6_IFADDR_RLOCK(&in6_ifa_tracker); 304 CK_STAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) { 305 int new_scope = -1, new_matchlen = -1; 306 struct in6_addrpolicy *new_policy = NULL; 307 u_int32_t srczone, osrczone, dstzone; 308 struct in6_addr src; 309 struct ifnet *ifp1 = ia->ia_ifp; 310 311 /* 312 * We'll never take an address that breaks the scope zone 313 * of the destination. We also skip an address if its zone 314 * does not contain the outgoing interface. 315 * XXX: we should probably use sin6_scope_id here. 316 */ 317 if (in6_setscope(&dst, ifp1, &dstzone) || 318 odstzone != dstzone) { 319 continue; 320 } 321 src = ia->ia_addr.sin6_addr; 322 if (in6_setscope(&src, ifp, &osrczone) || 323 in6_setscope(&src, ifp1, &srczone) || 324 osrczone != srczone) { 325 continue; 326 } 327 328 /* avoid unusable addresses */ 329 if ((ia->ia6_flags & 330 (IN6_IFF_NOTREADY | IN6_IFF_ANYCAST | IN6_IFF_DETACHED))) { 331 continue; 332 } 333 if (!V_ip6_use_deprecated && IFA6_IS_DEPRECATED(ia)) 334 continue; 335 336 /* If jailed only take addresses of the jail into account. */ 337 if (cred != NULL && 338 prison_check_ip6(cred, &ia->ia_addr.sin6_addr) != 0) 339 continue; 340 341 /* Rule 1: Prefer same address */ 342 if (IN6_ARE_ADDR_EQUAL(&dst, &ia->ia_addr.sin6_addr)) { 343 ia_best = ia; 344 BREAK(1); /* there should be no better candidate */ 345 } 346 347 if (ia_best == NULL) 348 REPLACE(0); 349 350 /* Rule 2: Prefer appropriate scope */ 351 if (dst_scope < 0) 352 dst_scope = in6_addrscope(&dst); 353 new_scope = in6_addrscope(&ia->ia_addr.sin6_addr); 354 if (IN6_ARE_SCOPE_CMP(best_scope, new_scope) < 0) { 355 if (IN6_ARE_SCOPE_CMP(best_scope, dst_scope) < 0) 356 REPLACE(2); 357 NEXT(2); 358 } else if (IN6_ARE_SCOPE_CMP(new_scope, best_scope) < 0) { 359 if (IN6_ARE_SCOPE_CMP(new_scope, dst_scope) < 0) 360 NEXT(2); 361 REPLACE(2); 362 } 363 364 /* 365 * Rule 3: Avoid deprecated addresses. Note that the case of 366 * !ip6_use_deprecated is already rejected above. 367 */ 368 if (!IFA6_IS_DEPRECATED(ia_best) && IFA6_IS_DEPRECATED(ia)) 369 NEXT(3); 370 if (IFA6_IS_DEPRECATED(ia_best) && !IFA6_IS_DEPRECATED(ia)) 371 REPLACE(3); 372 373 /* Rule 4: Prefer home addresses */ 374 /* 375 * XXX: This is a TODO. We should probably merge the MIP6 376 * case above. 377 */ 378 379 /* Rule 5: Prefer outgoing interface */ 380 if (!(ND_IFINFO(ifp)->flags & ND6_IFF_NO_PREFER_IFACE)) { 381 if (ia_best->ia_ifp == ifp && ia->ia_ifp != ifp) 382 NEXT(5); 383 if (ia_best->ia_ifp != ifp && ia->ia_ifp == ifp) 384 REPLACE(5); 385 } 386 387 /* 388 * Rule 6: Prefer matching label 389 * Note that best_policy should be non-NULL here. 390 */ 391 if (dst_policy == NULL) 392 dst_policy = lookup_addrsel_policy(dstsock); 393 if (dst_policy->label != ADDR_LABEL_NOTAPP) { 394 new_policy = lookup_addrsel_policy(&ia->ia_addr); 395 if (dst_policy->label == best_policy->label && 396 dst_policy->label != new_policy->label) 397 NEXT(6); 398 if (dst_policy->label != best_policy->label && 399 dst_policy->label == new_policy->label) 400 REPLACE(6); 401 } 402 403 /* 404 * Rule 7: Prefer public addresses. 405 * We allow users to reverse the logic by configuring 406 * a sysctl variable, so that privacy conscious users can 407 * always prefer temporary addresses. 408 */ 409 if (opts == NULL || 410 opts->ip6po_prefer_tempaddr == IP6PO_TEMPADDR_SYSTEM) { 411 prefer_tempaddr = V_ip6_prefer_tempaddr; 412 } else if (opts->ip6po_prefer_tempaddr == 413 IP6PO_TEMPADDR_NOTPREFER) { 414 prefer_tempaddr = 0; 415 } else 416 prefer_tempaddr = 1; 417 if (!(ia_best->ia6_flags & IN6_IFF_TEMPORARY) && 418 (ia->ia6_flags & IN6_IFF_TEMPORARY)) { 419 if (prefer_tempaddr) 420 REPLACE(7); 421 else 422 NEXT(7); 423 } 424 if ((ia_best->ia6_flags & IN6_IFF_TEMPORARY) && 425 !(ia->ia6_flags & IN6_IFF_TEMPORARY)) { 426 if (prefer_tempaddr) 427 NEXT(7); 428 else 429 REPLACE(7); 430 } 431 432 /* 433 * Rule 8: prefer addresses on alive interfaces. 434 * This is a KAME specific rule. 435 */ 436 if ((ia_best->ia_ifp->if_flags & IFF_UP) && 437 !(ia->ia_ifp->if_flags & IFF_UP)) 438 NEXT(8); 439 if (!(ia_best->ia_ifp->if_flags & IFF_UP) && 440 (ia->ia_ifp->if_flags & IFF_UP)) 441 REPLACE(8); 442 443 /* 444 * Rule 9: prefer address with better virtual status. 445 */ 446 if (ifa_preferred(&ia_best->ia_ifa, &ia->ia_ifa)) 447 REPLACE(9); 448 if (ifa_preferred(&ia->ia_ifa, &ia_best->ia_ifa)) 449 NEXT(9); 450 451 /* 452 * Rule 10: prefer address with `prefer_source' flag. 453 */ 454 if ((ia_best->ia6_flags & IN6_IFF_PREFER_SOURCE) == 0 && 455 (ia->ia6_flags & IN6_IFF_PREFER_SOURCE) != 0) 456 REPLACE(10); 457 if ((ia_best->ia6_flags & IN6_IFF_PREFER_SOURCE) != 0 && 458 (ia->ia6_flags & IN6_IFF_PREFER_SOURCE) == 0) 459 NEXT(10); 460 461 /* 462 * Rule 14: Use longest matching prefix. 463 * Note: in the address selection draft, this rule is 464 * documented as "Rule 8". However, since it is also 465 * documented that this rule can be overridden, we assign 466 * a large number so that it is easy to assign smaller numbers 467 * to more preferred rules. 468 */ 469 new_matchlen = in6_matchlen(&ia->ia_addr.sin6_addr, &dst); 470 if (best_matchlen < new_matchlen) 471 REPLACE(14); 472 if (new_matchlen < best_matchlen) 473 NEXT(14); 474 475 /* Rule 15 is reserved. */ 476 477 /* 478 * Last resort: just keep the current candidate. 479 * Or, do we need more rules? 480 */ 481 continue; 482 483 replace: 484 ia_best = ia; 485 best_scope = (new_scope >= 0 ? new_scope : 486 in6_addrscope(&ia_best->ia_addr.sin6_addr)); 487 best_policy = (new_policy ? new_policy : 488 lookup_addrsel_policy(&ia_best->ia_addr)); 489 best_matchlen = (new_matchlen >= 0 ? new_matchlen : 490 in6_matchlen(&ia_best->ia_addr.sin6_addr, 491 &dst)); 492 493 next: 494 continue; 495 496 out: 497 break; 498 } 499 500 if ((ia = ia_best) == NULL) { 501 IN6_IFADDR_RUNLOCK(&in6_ifa_tracker); 502 IP6STAT_INC(ip6s_sources_none); 503 return (EADDRNOTAVAIL); 504 } 505 506 /* 507 * At this point at least one of the addresses belonged to the jail 508 * but it could still be, that we want to further restrict it, e.g. 509 * theoratically IN6_IS_ADDR_LOOPBACK. 510 * It must not be IN6_IS_ADDR_UNSPECIFIED anymore. 511 * prison_local_ip6() will fix an IN6_IS_ADDR_LOOPBACK but should 512 * let all others previously selected pass. 513 * Use tmp to not change ::1 on lo0 to the primary jail address. 514 */ 515 tmp = ia->ia_addr.sin6_addr; 516 if (cred != NULL && prison_local_ip6(cred, &tmp, (inp != NULL && 517 (inp->inp_flags & IN6P_IPV6_V6ONLY) != 0)) != 0) { 518 IN6_IFADDR_RUNLOCK(&in6_ifa_tracker); 519 IP6STAT_INC(ip6s_sources_none); 520 return (EADDRNOTAVAIL); 521 } 522 523 if (ifpp) 524 *ifpp = ifp; 525 526 bcopy(&tmp, srcp, sizeof(*srcp)); 527 if (ia->ia_ifp == ifp) 528 IP6STAT_INC(ip6s_sources_sameif[best_scope]); 529 else 530 IP6STAT_INC(ip6s_sources_otherif[best_scope]); 531 if (dst_scope == best_scope) 532 IP6STAT_INC(ip6s_sources_samescope[best_scope]); 533 else 534 IP6STAT_INC(ip6s_sources_otherscope[best_scope]); 535 if (IFA6_IS_DEPRECATED(ia)) 536 IP6STAT_INC(ip6s_sources_deprecated[best_scope]); 537 IN6_IFADDR_RUNLOCK(&in6_ifa_tracker); 538 return (0); 539 } 540 541 /* 542 * Select source address based on @inp, @dstsock and @opts. 543 * Stores selected address to @srcp. If @scope_ambiguous is set, 544 * embed scope from selected outgoing interface. If @hlim pointer 545 * is provided, stores calculated hop limit there. 546 * Returns 0 on success. 547 */ 548 int 549 in6_selectsrc_socket(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, 550 struct inpcb *inp, struct ucred *cred, int scope_ambiguous, 551 struct in6_addr *srcp, int *hlim) 552 { 553 struct ifnet *retifp; 554 uint32_t fibnum; 555 int error; 556 557 fibnum = inp->inp_inc.inc_fibnum; 558 retifp = NULL; 559 560 error = in6_selectsrc(fibnum, dstsock, opts, inp, cred, &retifp, srcp); 561 if (error != 0) 562 return (error); 563 564 if (hlim != NULL) 565 *hlim = in6_selecthlim(inp, retifp); 566 567 if (retifp == NULL || scope_ambiguous == 0) 568 return (0); 569 570 /* 571 * Application should provide a proper zone ID or the use of 572 * default zone IDs should be enabled. Unfortunately, some 573 * applications do not behave as it should, so we need a 574 * workaround. Even if an appropriate ID is not determined 575 * (when it's required), if we can determine the outgoing 576 * interface. determine the zone ID based on the interface. 577 */ 578 error = in6_setscope(&dstsock->sin6_addr, retifp, NULL); 579 580 return (error); 581 } 582 583 /* 584 * Select source address based on @fibnum, @dst and @scopeid. 585 * Stores selected address to @srcp. 586 * Returns 0 on success. 587 * 588 * Used by non-socket based consumers (ND code mostly) 589 */ 590 int 591 in6_selectsrc_addr(uint32_t fibnum, const struct in6_addr *dst, 592 uint32_t scopeid, struct ifnet *ifp, struct in6_addr *srcp, 593 int *hlim) 594 { 595 struct ifnet *retifp; 596 struct sockaddr_in6 dst_sa; 597 int error; 598 599 retifp = ifp; 600 bzero(&dst_sa, sizeof(dst_sa)); 601 dst_sa.sin6_family = AF_INET6; 602 dst_sa.sin6_len = sizeof(dst_sa); 603 dst_sa.sin6_addr = *dst; 604 dst_sa.sin6_scope_id = scopeid; 605 sa6_embedscope(&dst_sa, 0); 606 607 error = in6_selectsrc(fibnum, &dst_sa, NULL, NULL, NULL, &retifp, srcp); 608 if (hlim != NULL) 609 *hlim = in6_selecthlim(NULL, retifp); 610 611 return (error); 612 } 613 614 static struct nhop_object * 615 cache_route(uint32_t fibnum, const struct sockaddr_in6 *dst, struct route_in6 *ro, 616 uint32_t flowid) 617 { 618 /* 619 * Use a cached route if it exists and is valid, else try to allocate 620 * a new one. Note that we should check the address family of the 621 * cached destination, in case of sharing the cache with IPv4. 622 * Assumes that 'struct route_in6' is exclusively locked. 623 */ 624 if (ro->ro_nh != NULL && ( 625 !NH_IS_VALID(ro->ro_nh) || ro->ro_dst.sin6_family != AF_INET6 || 626 !IN6_ARE_ADDR_EQUAL(&ro->ro_dst.sin6_addr, &dst->sin6_addr))) 627 RO_NHFREE(ro); 628 629 if (ro->ro_nh == NULL) { 630 ro->ro_dst = *dst; 631 632 const struct in6_addr *paddr; 633 struct in6_addr unscoped_addr; 634 uint32_t scopeid = 0; 635 if (IN6_IS_SCOPE_LINKLOCAL(&dst->sin6_addr)) { 636 in6_splitscope(&dst->sin6_addr, &unscoped_addr, &scopeid); 637 paddr = &unscoped_addr; 638 } else 639 paddr = &dst->sin6_addr; 640 ro->ro_nh = fib6_lookup(fibnum, paddr, scopeid, NHR_REF, flowid); 641 } 642 return (ro->ro_nh); 643 } 644 645 static struct nhop_object * 646 lookup_route(uint32_t fibnum, struct sockaddr_in6 *dst, struct route_in6 *ro, 647 struct ip6_pktopts *opts, uint32_t flowid) 648 { 649 struct nhop_object *nh = NULL; 650 651 /* 652 * If the next hop address for the packet is specified by the caller, 653 * use it as the gateway. 654 */ 655 if (opts && opts->ip6po_nexthop) { 656 struct route_in6 *ron = &opts->ip6po_nextroute; 657 struct sockaddr_in6 *sin6_next = satosin6(opts->ip6po_nexthop); 658 659 nh = cache_route(fibnum, sin6_next, ron, flowid); 660 661 /* 662 * The node identified by that address must be a 663 * neighbor of the sending host. 664 */ 665 if (nh != NULL && (nh->nh_flags & NHF_GATEWAY) != 0) 666 nh = NULL; 667 } else if (ro != NULL) { 668 nh = cache_route(fibnum, dst, ro, flowid); 669 if (nh == NULL) 670 return (NULL); 671 672 /* 673 * Check if the outgoing interface conflicts with 674 * the interface specified by ipi6_ifindex (if specified). 675 */ 676 struct in6_pktinfo *pi; 677 if (opts && (pi = opts->ip6po_pktinfo) != NULL && pi->ipi6_ifindex) { 678 if (nh->nh_aifp->if_index != pi->ipi6_ifindex) 679 nh = NULL; 680 } 681 } 682 683 return (nh); 684 } 685 686 /* 687 * Finds outgoing nexthop or the outgoing interface for the 688 * @dstsock. 689 * Return 0 on success and stores the lookup result in @retnh and @retifp 690 */ 691 static int 692 selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, 693 struct ip6_moptions *mopts, struct route_in6 *ro, 694 struct ifnet **retifp, struct nhop_object **retnh, int norouteok, 695 u_int fibnum, uint32_t flowid) 696 { 697 int error = 0; 698 struct ifnet *ifp = NULL; 699 struct in6_pktinfo *pi = NULL; 700 struct in6_addr *dst = &dstsock->sin6_addr; 701 702 /* If the caller specify the outgoing interface explicitly, use it. */ 703 if (opts && (pi = opts->ip6po_pktinfo) != NULL && pi->ipi6_ifindex) { 704 /* XXX boundary check is assumed to be already done. */ 705 ifp = ifnet_byindex(pi->ipi6_ifindex); 706 if (ifp != NULL && (norouteok || IN6_IS_ADDR_MULTICAST(dst))) { 707 /* 708 * we do not have to check or get the route for 709 * multicast. 710 */ 711 goto done; 712 } else 713 goto getroute; 714 } 715 /* 716 * If the destination address is a multicast address and the outgoing 717 * interface for the address is specified by the caller, use it. 718 */ 719 if (IN6_IS_ADDR_MULTICAST(dst) && 720 mopts != NULL && (ifp = mopts->im6o_multicast_ifp) != NULL) { 721 goto done; /* we do not need a route for multicast. */ 722 } 723 /* 724 * If destination address is LLA or link- or node-local multicast, 725 * use it's embedded scope zone id to determine outgoing interface. 726 */ 727 if (IN6_IS_ADDR_MC_LINKLOCAL(dst) || 728 IN6_IS_ADDR_MC_NODELOCAL(dst)) { 729 uint32_t zoneid = ntohs(in6_getscope(dst)); 730 if (zoneid > 0) { 731 ifp = in6_getlinkifnet(zoneid); 732 goto done; 733 } 734 } 735 736 getroute:; 737 struct nhop_object *nh = lookup_route(fibnum, dstsock, ro, opts, flowid); 738 if (nh != NULL) { 739 *retifp = nh->nh_aifp; 740 error = 0; 741 } else { 742 *retifp = NULL; 743 IP6STAT_INC(ip6s_noroute); 744 error = EHOSTUNREACH; 745 } 746 *retnh = nh; 747 return (error); 748 749 done: 750 if (ifp == NULL) { 751 /* 752 * This can happen if the caller did not pass a cached route 753 * nor any other hints. We treat this case an error. 754 */ 755 error = EHOSTUNREACH; 756 } 757 if (error == EHOSTUNREACH) 758 IP6STAT_INC(ip6s_noroute); 759 760 *retifp = ifp; 761 *retnh = NULL; 762 763 return (error); 764 } 765 766 static int 767 in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, 768 struct ip6_moptions *mopts, struct ifnet **retifp, 769 struct ifnet *oifp, u_int fibnum) 770 { 771 int error; 772 struct route_in6 sro; 773 struct nhop_object *nh = NULL; 774 uint16_t nh_flags; 775 776 KASSERT(retifp != NULL, ("%s: retifp is NULL", __func__)); 777 778 bzero(&sro, sizeof(sro)); 779 nh_flags = 0; 780 781 error = selectroute(dstsock, opts, mopts, &sro, retifp, &nh, 1, fibnum, 0); 782 783 if (nh != NULL) 784 nh_flags = nh->nh_flags; 785 if (nh != NULL && nh == sro.ro_nh) 786 NH_FREE(nh); 787 788 if (error != 0) { 789 /* Help ND. See oifp comment in in6_selectsrc(). */ 790 if (oifp != NULL && fibnum == RT_DEFAULT_FIB) { 791 *retifp = oifp; 792 error = 0; 793 } 794 return (error); 795 } 796 797 /* 798 * do not use a rejected or black hole route. 799 * XXX: this check should be done in the L2 output routine. 800 * However, if we skipped this check here, we'd see the following 801 * scenario: 802 * - install a rejected route for a scoped address prefix 803 * (like fe80::/10) 804 * - send a packet to a destination that matches the scoped prefix, 805 * with ambiguity about the scope zone. 806 * - pick the outgoing interface from the route, and disambiguate the 807 * scope zone with the interface. 808 * - ip6_output() would try to get another route with the "new" 809 * destination, which may be valid. 810 * - we'd see no error on output. 811 * Although this may not be very harmful, it should still be confusing. 812 * We thus reject the case here. 813 */ 814 815 if (nh_flags & (NHF_REJECT | NHF_BLACKHOLE)) { 816 error = (nh_flags & NHF_HOST ? EHOSTUNREACH : ENETUNREACH); 817 return (error); 818 } 819 820 return (0); 821 } 822 823 /* Public wrapper function to selectroute(). */ 824 int 825 in6_selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, 826 struct ip6_moptions *mopts, struct route_in6 *ro, 827 struct ifnet **retifp, struct nhop_object **retnh, u_int fibnum, uint32_t flowid) 828 { 829 MPASS(retifp != NULL); 830 MPASS(retnh != NULL); 831 832 return (selectroute(dstsock, opts, mopts, ro, retifp, 833 retnh, 0, fibnum, flowid)); 834 } 835 836 /* 837 * Default hop limit selection. The precedence is as follows: 838 * 1. Hoplimit value specified via ioctl. 839 * 2. (If the outgoing interface is detected) the current 840 * hop limit of the interface specified by router advertisement. 841 * 3. The system default hoplimit. 842 */ 843 int 844 in6_selecthlim(struct inpcb *inp, struct ifnet *ifp) 845 { 846 847 if (inp && inp->in6p_hops >= 0) 848 return (inp->in6p_hops); 849 else if (ifp) 850 return (ND_IFINFO(ifp)->chlim); 851 else if (inp && !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)) { 852 struct nhop_object *nh; 853 struct in6_addr dst; 854 uint32_t fibnum, scopeid; 855 int hlim; 856 857 fibnum = inp->inp_inc.inc_fibnum; 858 in6_splitscope(&inp->in6p_faddr, &dst, &scopeid); 859 nh = fib6_lookup(fibnum, &dst, scopeid, 0, 0); 860 if (nh != NULL) { 861 hlim = ND_IFINFO(nh->nh_ifp)->chlim; 862 return (hlim); 863 } 864 } 865 return (V_ip6_defhlim); 866 } 867 868 void 869 addrsel_policy_init(void) 870 { 871 872 init_policy_queue(); 873 874 /* initialize the "last resort" policy */ 875 bzero(&V_defaultaddrpolicy, sizeof(V_defaultaddrpolicy)); 876 V_defaultaddrpolicy.label = ADDR_LABEL_NOTAPP; 877 878 if (!IS_DEFAULT_VNET(curvnet)) 879 return; 880 881 ADDRSEL_LOCK_INIT(); 882 ADDRSEL_SXLOCK_INIT(); 883 } 884 885 static struct in6_addrpolicy * 886 lookup_addrsel_policy(struct sockaddr_in6 *key) 887 { 888 struct in6_addrpolicy *match = NULL; 889 890 ADDRSEL_LOCK(); 891 match = match_addrsel_policy(key); 892 893 if (match == NULL) 894 match = &V_defaultaddrpolicy; 895 else 896 match->use++; 897 ADDRSEL_UNLOCK(); 898 899 return (match); 900 } 901 902 /* 903 * Subroutines to manage the address selection policy table via sysctl. 904 */ 905 struct walkarg { 906 struct sysctl_req *w_req; 907 }; 908 909 static int in6_src_sysctl(SYSCTL_HANDLER_ARGS); 910 SYSCTL_DECL(_net_inet6_ip6); 911 static SYSCTL_NODE(_net_inet6_ip6, IPV6CTL_ADDRCTLPOLICY, addrctlpolicy, 912 CTLFLAG_RD | CTLFLAG_MPSAFE, in6_src_sysctl, 913 ""); 914 915 static int 916 in6_src_sysctl(SYSCTL_HANDLER_ARGS) 917 { 918 struct walkarg w; 919 920 if (req->newptr) 921 return EPERM; 922 923 bzero(&w, sizeof(w)); 924 w.w_req = req; 925 926 return (walk_addrsel_policy(dump_addrsel_policyent, &w)); 927 } 928 929 int 930 in6_src_ioctl(u_long cmd, caddr_t data) 931 { 932 struct in6_addrpolicy ent0; 933 934 if (cmd != SIOCAADDRCTL_POLICY && cmd != SIOCDADDRCTL_POLICY) 935 return (EOPNOTSUPP); /* check for safety */ 936 937 ent0 = *(struct in6_addrpolicy *)data; 938 939 if (ent0.label == ADDR_LABEL_NOTAPP) 940 return (EINVAL); 941 /* check if the prefix mask is consecutive. */ 942 if (in6_mask2len(&ent0.addrmask.sin6_addr, NULL) < 0) 943 return (EINVAL); 944 /* clear trailing garbages (if any) of the prefix address. */ 945 IN6_MASK_ADDR(&ent0.addr.sin6_addr, &ent0.addrmask.sin6_addr); 946 ent0.use = 0; 947 948 switch (cmd) { 949 case SIOCAADDRCTL_POLICY: 950 return (add_addrsel_policyent(&ent0)); 951 case SIOCDADDRCTL_POLICY: 952 return (delete_addrsel_policyent(&ent0)); 953 } 954 955 return (0); /* XXX: compromise compilers */ 956 } 957 958 /* 959 * The followings are implementation of the policy table using a 960 * simple tail queue. 961 * XXX such details should be hidden. 962 * XXX implementation using binary tree should be more efficient. 963 */ 964 struct addrsel_policyent { 965 TAILQ_ENTRY(addrsel_policyent) ape_entry; 966 struct in6_addrpolicy ape_policy; 967 }; 968 969 TAILQ_HEAD(addrsel_policyhead, addrsel_policyent); 970 971 VNET_DEFINE_STATIC(struct addrsel_policyhead, addrsel_policytab); 972 #define V_addrsel_policytab VNET(addrsel_policytab) 973 974 static void 975 init_policy_queue(void) 976 { 977 978 TAILQ_INIT(&V_addrsel_policytab); 979 } 980 981 static int 982 add_addrsel_policyent(struct in6_addrpolicy *newpolicy) 983 { 984 struct addrsel_policyent *new, *pol; 985 986 new = malloc(sizeof(*new), M_IFADDR, 987 M_WAITOK); 988 ADDRSEL_XLOCK(); 989 ADDRSEL_LOCK(); 990 991 /* duplication check */ 992 TAILQ_FOREACH(pol, &V_addrsel_policytab, ape_entry) { 993 if (IN6_ARE_ADDR_EQUAL(&newpolicy->addr.sin6_addr, 994 &pol->ape_policy.addr.sin6_addr) && 995 IN6_ARE_ADDR_EQUAL(&newpolicy->addrmask.sin6_addr, 996 &pol->ape_policy.addrmask.sin6_addr)) { 997 ADDRSEL_UNLOCK(); 998 ADDRSEL_XUNLOCK(); 999 free(new, M_IFADDR); 1000 return (EEXIST); /* or override it? */ 1001 } 1002 } 1003 1004 bzero(new, sizeof(*new)); 1005 1006 /* XXX: should validate entry */ 1007 new->ape_policy = *newpolicy; 1008 1009 TAILQ_INSERT_TAIL(&V_addrsel_policytab, new, ape_entry); 1010 ADDRSEL_UNLOCK(); 1011 ADDRSEL_XUNLOCK(); 1012 1013 return (0); 1014 } 1015 1016 static int 1017 delete_addrsel_policyent(struct in6_addrpolicy *key) 1018 { 1019 struct addrsel_policyent *pol; 1020 1021 ADDRSEL_XLOCK(); 1022 ADDRSEL_LOCK(); 1023 1024 /* search for the entry in the table */ 1025 TAILQ_FOREACH(pol, &V_addrsel_policytab, ape_entry) { 1026 if (IN6_ARE_ADDR_EQUAL(&key->addr.sin6_addr, 1027 &pol->ape_policy.addr.sin6_addr) && 1028 IN6_ARE_ADDR_EQUAL(&key->addrmask.sin6_addr, 1029 &pol->ape_policy.addrmask.sin6_addr)) { 1030 break; 1031 } 1032 } 1033 if (pol == NULL) { 1034 ADDRSEL_UNLOCK(); 1035 ADDRSEL_XUNLOCK(); 1036 return (ESRCH); 1037 } 1038 1039 TAILQ_REMOVE(&V_addrsel_policytab, pol, ape_entry); 1040 ADDRSEL_UNLOCK(); 1041 ADDRSEL_XUNLOCK(); 1042 free(pol, M_IFADDR); 1043 1044 return (0); 1045 } 1046 1047 static int 1048 walk_addrsel_policy(int (*callback)(struct in6_addrpolicy *, void *), void *w) 1049 { 1050 struct addrsel_policyent *pol; 1051 int error = 0; 1052 1053 ADDRSEL_SLOCK(); 1054 TAILQ_FOREACH(pol, &V_addrsel_policytab, ape_entry) { 1055 if ((error = (*callback)(&pol->ape_policy, w)) != 0) { 1056 ADDRSEL_SUNLOCK(); 1057 return (error); 1058 } 1059 } 1060 ADDRSEL_SUNLOCK(); 1061 return (error); 1062 } 1063 1064 static int 1065 dump_addrsel_policyent(struct in6_addrpolicy *pol, void *arg) 1066 { 1067 int error = 0; 1068 struct walkarg *w = arg; 1069 1070 error = SYSCTL_OUT(w->w_req, pol, sizeof(*pol)); 1071 1072 return (error); 1073 } 1074 1075 static struct in6_addrpolicy * 1076 match_addrsel_policy(struct sockaddr_in6 *key) 1077 { 1078 struct addrsel_policyent *pent; 1079 struct in6_addrpolicy *bestpol = NULL, *pol; 1080 int matchlen, bestmatchlen = -1; 1081 u_char *mp, *ep, *k, *p, m; 1082 1083 TAILQ_FOREACH(pent, &V_addrsel_policytab, ape_entry) { 1084 matchlen = 0; 1085 1086 pol = &pent->ape_policy; 1087 mp = (u_char *)&pol->addrmask.sin6_addr; 1088 ep = mp + 16; /* XXX: scope field? */ 1089 k = (u_char *)&key->sin6_addr; 1090 p = (u_char *)&pol->addr.sin6_addr; 1091 for (; mp < ep && *mp; mp++, k++, p++) { 1092 m = *mp; 1093 if ((*k & m) != *p) 1094 goto next; /* not match */ 1095 if (m == 0xff) /* short cut for a typical case */ 1096 matchlen += 8; 1097 else { 1098 while (m >= 0x80) { 1099 matchlen++; 1100 m <<= 1; 1101 } 1102 } 1103 } 1104 1105 /* matched. check if this is better than the current best. */ 1106 if (bestpol == NULL || 1107 matchlen > bestmatchlen) { 1108 bestpol = pol; 1109 bestmatchlen = matchlen; 1110 } 1111 1112 next: 1113 continue; 1114 } 1115 1116 return (bestpol); 1117 } 1118