1 /*- 2 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of the project nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * $KAME: in6_src.c,v 1.132 2003/08/26 04:42:27 keiichi Exp $ 30 */ 31 32 /*- 33 * Copyright (c) 1982, 1986, 1991, 1993 34 * The Regents of the University of California. All rights reserved. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 4. Neither the name of the University nor the names of its contributors 45 * may be used to endorse or promote products derived from this software 46 * without specific prior written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 58 * SUCH DAMAGE. 59 * 60 * @(#)in_pcb.c 8.2 (Berkeley) 1/4/94 61 */ 62 63 #include <sys/cdefs.h> 64 __FBSDID("$FreeBSD$"); 65 66 #include "opt_inet.h" 67 #include "opt_inet6.h" 68 #include "opt_mpath.h" 69 70 #include <sys/param.h> 71 #include <sys/systm.h> 72 #include <sys/lock.h> 73 #include <sys/malloc.h> 74 #include <sys/mbuf.h> 75 #include <sys/priv.h> 76 #include <sys/protosw.h> 77 #include <sys/socket.h> 78 #include <sys/socketvar.h> 79 #include <sys/sockio.h> 80 #include <sys/sysctl.h> 81 #include <sys/errno.h> 82 #include <sys/time.h> 83 #include <sys/jail.h> 84 #include <sys/kernel.h> 85 #include <sys/sx.h> 86 87 #include <net/if.h> 88 #include <net/if_dl.h> 89 #include <net/route.h> 90 #include <net/if_llatbl.h> 91 #ifdef RADIX_MPATH 92 #include <net/radix_mpath.h> 93 #endif 94 95 #include <netinet/in.h> 96 #include <netinet/in_var.h> 97 #include <netinet/in_systm.h> 98 #include <netinet/ip.h> 99 #include <netinet/in_pcb.h> 100 #include <netinet/ip_var.h> 101 #include <netinet/udp.h> 102 #include <netinet/udp_var.h> 103 104 #include <netinet6/in6_var.h> 105 #include <netinet/ip6.h> 106 #include <netinet6/in6_pcb.h> 107 #include <netinet6/ip6_var.h> 108 #include <netinet6/scope6_var.h> 109 #include <netinet6/nd6.h> 110 111 static struct mtx addrsel_lock; 112 #define ADDRSEL_LOCK_INIT() mtx_init(&addrsel_lock, "addrsel_lock", NULL, MTX_DEF) 113 #define ADDRSEL_LOCK() mtx_lock(&addrsel_lock) 114 #define ADDRSEL_UNLOCK() mtx_unlock(&addrsel_lock) 115 #define ADDRSEL_LOCK_ASSERT() mtx_assert(&addrsel_lock, MA_OWNED) 116 117 static struct sx addrsel_sxlock; 118 #define ADDRSEL_SXLOCK_INIT() sx_init(&addrsel_sxlock, "addrsel_sxlock") 119 #define ADDRSEL_SLOCK() sx_slock(&addrsel_sxlock) 120 #define ADDRSEL_SUNLOCK() sx_sunlock(&addrsel_sxlock) 121 #define ADDRSEL_XLOCK() sx_xlock(&addrsel_sxlock) 122 #define ADDRSEL_XUNLOCK() sx_xunlock(&addrsel_sxlock) 123 124 #define ADDR_LABEL_NOTAPP (-1) 125 static VNET_DEFINE(struct in6_addrpolicy, defaultaddrpolicy); 126 #define V_defaultaddrpolicy VNET(defaultaddrpolicy) 127 128 VNET_DEFINE(int, ip6_prefer_tempaddr) = 0; 129 130 static int selectroute __P((struct sockaddr_in6 *, struct ip6_pktopts *, 131 struct ip6_moptions *, struct route_in6 *, struct ifnet **, 132 struct rtentry **, int, int)); 133 static int in6_selectif __P((struct sockaddr_in6 *, struct ip6_pktopts *, 134 struct ip6_moptions *, struct route_in6 *ro, struct ifnet **, int)); 135 136 static struct in6_addrpolicy *lookup_addrsel_policy(struct sockaddr_in6 *); 137 138 static void init_policy_queue(void); 139 static int add_addrsel_policyent(struct in6_addrpolicy *); 140 static int delete_addrsel_policyent(struct in6_addrpolicy *); 141 static int walk_addrsel_policy __P((int (*)(struct in6_addrpolicy *, void *), 142 void *)); 143 static int dump_addrsel_policyent(struct in6_addrpolicy *, void *); 144 static struct in6_addrpolicy *match_addrsel_policy(struct sockaddr_in6 *); 145 146 /* 147 * Return an IPv6 address, which is the most appropriate for a given 148 * destination and user specified options. 149 * If necessary, this function lookups the routing table and returns 150 * an entry to the caller for later use. 151 */ 152 #define REPLACE(r) do {\ 153 if ((r) < sizeof(V_ip6stat.ip6s_sources_rule) / \ 154 sizeof(V_ip6stat.ip6s_sources_rule[0])) /* check for safety */ \ 155 V_ip6stat.ip6s_sources_rule[(r)]++; \ 156 /* { \ 157 char ip6buf[INET6_ADDRSTRLEN], ip6b[INET6_ADDRSTRLEN]; \ 158 printf("in6_selectsrc: replace %s with %s by %d\n", ia_best ? ip6_sprintf(ip6buf, &ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(ip6b, &ia->ia_addr.sin6_addr), (r)); \ 159 } */ \ 160 goto replace; \ 161 } while(0) 162 #define NEXT(r) do {\ 163 if ((r) < sizeof(V_ip6stat.ip6s_sources_rule) / \ 164 sizeof(V_ip6stat.ip6s_sources_rule[0])) /* check for safety */ \ 165 V_ip6stat.ip6s_sources_rule[(r)]++; \ 166 /* { \ 167 char ip6buf[INET6_ADDRSTRLEN], ip6b[INET6_ADDRSTRLEN]; \ 168 printf("in6_selectsrc: keep %s against %s by %d\n", ia_best ? ip6_sprintf(ip6buf, &ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(ip6b, &ia->ia_addr.sin6_addr), (r)); \ 169 } */ \ 170 goto next; /* XXX: we can't use 'continue' here */ \ 171 } while(0) 172 #define BREAK(r) do { \ 173 if ((r) < sizeof(V_ip6stat.ip6s_sources_rule) / \ 174 sizeof(V_ip6stat.ip6s_sources_rule[0])) /* check for safety */ \ 175 V_ip6stat.ip6s_sources_rule[(r)]++; \ 176 goto out; /* XXX: we can't use 'break' here */ \ 177 } while(0) 178 179 int 180 in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, 181 struct inpcb *inp, struct route_in6 *ro, struct ucred *cred, 182 struct ifnet **ifpp, struct in6_addr *srcp) 183 { 184 struct in6_addr dst, tmp; 185 struct ifnet *ifp = NULL; 186 struct in6_ifaddr *ia = NULL, *ia_best = NULL; 187 struct in6_pktinfo *pi = NULL; 188 int dst_scope = -1, best_scope = -1, best_matchlen = -1; 189 struct in6_addrpolicy *dst_policy = NULL, *best_policy = NULL; 190 u_int32_t odstzone; 191 int prefer_tempaddr; 192 int error; 193 struct ip6_moptions *mopts; 194 195 KASSERT(srcp != NULL, ("%s: srcp is NULL", __func__)); 196 197 dst = dstsock->sin6_addr; /* make a copy for local operation */ 198 if (ifpp) 199 *ifpp = NULL; 200 201 if (inp != NULL) { 202 INP_LOCK_ASSERT(inp); 203 mopts = inp->in6p_moptions; 204 } else { 205 mopts = NULL; 206 } 207 208 /* 209 * If the source address is explicitly specified by the caller, 210 * check if the requested source address is indeed a unicast address 211 * assigned to the node, and can be used as the packet's source 212 * address. If everything is okay, use the address as source. 213 */ 214 if (opts && (pi = opts->ip6po_pktinfo) && 215 !IN6_IS_ADDR_UNSPECIFIED(&pi->ipi6_addr)) { 216 struct sockaddr_in6 srcsock; 217 struct in6_ifaddr *ia6; 218 219 /* get the outgoing interface */ 220 if ((error = in6_selectif(dstsock, opts, mopts, ro, &ifp, 221 (inp != NULL) ? inp->inp_inc.inc_fibnum : RT_DEFAULT_FIB)) 222 != 0) 223 return (error); 224 225 /* 226 * determine the appropriate zone id of the source based on 227 * the zone of the destination and the outgoing interface. 228 * If the specified address is ambiguous wrt the scope zone, 229 * the interface must be specified; otherwise, ifa_ifwithaddr() 230 * will fail matching the address. 231 */ 232 bzero(&srcsock, sizeof(srcsock)); 233 srcsock.sin6_family = AF_INET6; 234 srcsock.sin6_len = sizeof(srcsock); 235 srcsock.sin6_addr = pi->ipi6_addr; 236 if (ifp) { 237 error = in6_setscope(&srcsock.sin6_addr, ifp, NULL); 238 if (error) 239 return (error); 240 } 241 if (cred != NULL && (error = prison_local_ip6(cred, 242 &srcsock.sin6_addr, (inp != NULL && 243 (inp->inp_flags & IN6P_IPV6_V6ONLY) != 0))) != 0) 244 return (error); 245 246 ia6 = (struct in6_ifaddr *)ifa_ifwithaddr( 247 (struct sockaddr *)&srcsock); 248 if (ia6 == NULL || 249 (ia6->ia6_flags & (IN6_IFF_ANYCAST | IN6_IFF_NOTREADY))) { 250 if (ia6 != NULL) 251 ifa_free(&ia6->ia_ifa); 252 return (EADDRNOTAVAIL); 253 } 254 pi->ipi6_addr = srcsock.sin6_addr; /* XXX: this overrides pi */ 255 if (ifpp) 256 *ifpp = ifp; 257 bcopy(&ia6->ia_addr.sin6_addr, srcp, sizeof(*srcp)); 258 ifa_free(&ia6->ia_ifa); 259 return (0); 260 } 261 262 /* 263 * Otherwise, if the socket has already bound the source, just use it. 264 */ 265 if (inp != NULL && !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { 266 if (cred != NULL && 267 (error = prison_local_ip6(cred, &inp->in6p_laddr, 268 ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0))) != 0) 269 return (error); 270 bcopy(&inp->in6p_laddr, srcp, sizeof(*srcp)); 271 return (0); 272 } 273 274 /* 275 * Bypass source address selection and use the primary jail IP 276 * if requested. 277 */ 278 if (cred != NULL && !prison_saddrsel_ip6(cred, srcp)) 279 return (0); 280 281 /* 282 * If the address is not specified, choose the best one based on 283 * the outgoing interface and the destination address. 284 */ 285 /* get the outgoing interface */ 286 if ((error = in6_selectif(dstsock, opts, mopts, ro, &ifp, 287 (inp != NULL) ? inp->inp_inc.inc_fibnum : RT_DEFAULT_FIB)) != 0) 288 return (error); 289 290 #ifdef DIAGNOSTIC 291 if (ifp == NULL) /* this should not happen */ 292 panic("in6_selectsrc: NULL ifp"); 293 #endif 294 error = in6_setscope(&dst, ifp, &odstzone); 295 if (error) 296 return (error); 297 298 IN6_IFADDR_RLOCK(); 299 TAILQ_FOREACH(ia, &V_in6_ifaddrhead, ia_link) { 300 int new_scope = -1, new_matchlen = -1; 301 struct in6_addrpolicy *new_policy = NULL; 302 u_int32_t srczone, osrczone, dstzone; 303 struct in6_addr src; 304 struct ifnet *ifp1 = ia->ia_ifp; 305 306 /* 307 * We'll never take an address that breaks the scope zone 308 * of the destination. We also skip an address if its zone 309 * does not contain the outgoing interface. 310 * XXX: we should probably use sin6_scope_id here. 311 */ 312 if (in6_setscope(&dst, ifp1, &dstzone) || 313 odstzone != dstzone) { 314 continue; 315 } 316 src = ia->ia_addr.sin6_addr; 317 if (in6_setscope(&src, ifp, &osrczone) || 318 in6_setscope(&src, ifp1, &srczone) || 319 osrczone != srczone) { 320 continue; 321 } 322 323 /* avoid unusable addresses */ 324 if ((ia->ia6_flags & 325 (IN6_IFF_NOTREADY | IN6_IFF_ANYCAST | IN6_IFF_DETACHED))) { 326 continue; 327 } 328 if (!V_ip6_use_deprecated && IFA6_IS_DEPRECATED(ia)) 329 continue; 330 331 /* If jailed only take addresses of the jail into account. */ 332 if (cred != NULL && 333 prison_check_ip6(cred, &ia->ia_addr.sin6_addr) != 0) 334 continue; 335 336 /* Rule 1: Prefer same address */ 337 if (IN6_ARE_ADDR_EQUAL(&dst, &ia->ia_addr.sin6_addr)) { 338 ia_best = ia; 339 BREAK(1); /* there should be no better candidate */ 340 } 341 342 if (ia_best == NULL) 343 REPLACE(0); 344 345 /* Rule 2: Prefer appropriate scope */ 346 if (dst_scope < 0) 347 dst_scope = in6_addrscope(&dst); 348 new_scope = in6_addrscope(&ia->ia_addr.sin6_addr); 349 if (IN6_ARE_SCOPE_CMP(best_scope, new_scope) < 0) { 350 if (IN6_ARE_SCOPE_CMP(best_scope, dst_scope) < 0) 351 REPLACE(2); 352 NEXT(2); 353 } else if (IN6_ARE_SCOPE_CMP(new_scope, best_scope) < 0) { 354 if (IN6_ARE_SCOPE_CMP(new_scope, dst_scope) < 0) 355 NEXT(2); 356 REPLACE(2); 357 } 358 359 /* 360 * Rule 3: Avoid deprecated addresses. Note that the case of 361 * !ip6_use_deprecated is already rejected above. 362 */ 363 if (!IFA6_IS_DEPRECATED(ia_best) && IFA6_IS_DEPRECATED(ia)) 364 NEXT(3); 365 if (IFA6_IS_DEPRECATED(ia_best) && !IFA6_IS_DEPRECATED(ia)) 366 REPLACE(3); 367 368 /* Rule 4: Prefer home addresses */ 369 /* 370 * XXX: This is a TODO. We should probably merge the MIP6 371 * case above. 372 */ 373 374 /* Rule 5: Prefer outgoing interface */ 375 if (ia_best->ia_ifp == ifp && ia->ia_ifp != ifp) 376 NEXT(5); 377 if (ia_best->ia_ifp != ifp && ia->ia_ifp == ifp) 378 REPLACE(5); 379 380 /* 381 * Rule 6: Prefer matching label 382 * Note that best_policy should be non-NULL here. 383 */ 384 if (dst_policy == NULL) 385 dst_policy = lookup_addrsel_policy(dstsock); 386 if (dst_policy->label != ADDR_LABEL_NOTAPP) { 387 new_policy = lookup_addrsel_policy(&ia->ia_addr); 388 if (dst_policy->label == best_policy->label && 389 dst_policy->label != new_policy->label) 390 NEXT(6); 391 if (dst_policy->label != best_policy->label && 392 dst_policy->label == new_policy->label) 393 REPLACE(6); 394 } 395 396 /* 397 * Rule 7: Prefer public addresses. 398 * We allow users to reverse the logic by configuring 399 * a sysctl variable, so that privacy conscious users can 400 * always prefer temporary addresses. 401 */ 402 if (opts == NULL || 403 opts->ip6po_prefer_tempaddr == IP6PO_TEMPADDR_SYSTEM) { 404 prefer_tempaddr = V_ip6_prefer_tempaddr; 405 } else if (opts->ip6po_prefer_tempaddr == 406 IP6PO_TEMPADDR_NOTPREFER) { 407 prefer_tempaddr = 0; 408 } else 409 prefer_tempaddr = 1; 410 if (!(ia_best->ia6_flags & IN6_IFF_TEMPORARY) && 411 (ia->ia6_flags & IN6_IFF_TEMPORARY)) { 412 if (prefer_tempaddr) 413 REPLACE(7); 414 else 415 NEXT(7); 416 } 417 if ((ia_best->ia6_flags & IN6_IFF_TEMPORARY) && 418 !(ia->ia6_flags & IN6_IFF_TEMPORARY)) { 419 if (prefer_tempaddr) 420 NEXT(7); 421 else 422 REPLACE(7); 423 } 424 425 /* 426 * Rule 8: prefer addresses on alive interfaces. 427 * This is a KAME specific rule. 428 */ 429 if ((ia_best->ia_ifp->if_flags & IFF_UP) && 430 !(ia->ia_ifp->if_flags & IFF_UP)) 431 NEXT(8); 432 if (!(ia_best->ia_ifp->if_flags & IFF_UP) && 433 (ia->ia_ifp->if_flags & IFF_UP)) 434 REPLACE(8); 435 436 /* 437 * Rule 14: Use longest matching prefix. 438 * Note: in the address selection draft, this rule is 439 * documented as "Rule 8". However, since it is also 440 * documented that this rule can be overridden, we assign 441 * a large number so that it is easy to assign smaller numbers 442 * to more preferred rules. 443 */ 444 new_matchlen = in6_matchlen(&ia->ia_addr.sin6_addr, &dst); 445 if (best_matchlen < new_matchlen) 446 REPLACE(14); 447 if (new_matchlen < best_matchlen) 448 NEXT(14); 449 450 /* Rule 15 is reserved. */ 451 452 /* 453 * Last resort: just keep the current candidate. 454 * Or, do we need more rules? 455 */ 456 continue; 457 458 replace: 459 ia_best = ia; 460 best_scope = (new_scope >= 0 ? new_scope : 461 in6_addrscope(&ia_best->ia_addr.sin6_addr)); 462 best_policy = (new_policy ? new_policy : 463 lookup_addrsel_policy(&ia_best->ia_addr)); 464 best_matchlen = (new_matchlen >= 0 ? new_matchlen : 465 in6_matchlen(&ia_best->ia_addr.sin6_addr, 466 &dst)); 467 468 next: 469 continue; 470 471 out: 472 break; 473 } 474 475 if ((ia = ia_best) == NULL) { 476 IN6_IFADDR_RUNLOCK(); 477 return (EADDRNOTAVAIL); 478 } 479 480 /* 481 * At this point at least one of the addresses belonged to the jail 482 * but it could still be, that we want to further restrict it, e.g. 483 * theoratically IN6_IS_ADDR_LOOPBACK. 484 * It must not be IN6_IS_ADDR_UNSPECIFIED anymore. 485 * prison_local_ip6() will fix an IN6_IS_ADDR_LOOPBACK but should 486 * let all others previously selected pass. 487 * Use tmp to not change ::1 on lo0 to the primary jail address. 488 */ 489 tmp = ia->ia_addr.sin6_addr; 490 if (cred != NULL && prison_local_ip6(cred, &tmp, (inp != NULL && 491 (inp->inp_flags & IN6P_IPV6_V6ONLY) != 0)) != 0) { 492 IN6_IFADDR_RUNLOCK(); 493 return (EADDRNOTAVAIL); 494 } 495 496 if (ifpp) 497 *ifpp = ifp; 498 499 bcopy(&tmp, srcp, sizeof(*srcp)); 500 IN6_IFADDR_RUNLOCK(); 501 return (0); 502 } 503 504 /* 505 * clone - meaningful only for bsdi and freebsd 506 */ 507 static int 508 selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, 509 struct ip6_moptions *mopts, struct route_in6 *ro, 510 struct ifnet **retifp, struct rtentry **retrt, int norouteok, int fibnum) 511 { 512 int error = 0; 513 struct ifnet *ifp = NULL; 514 struct rtentry *rt = NULL; 515 struct sockaddr_in6 *sin6_next; 516 struct in6_pktinfo *pi = NULL; 517 struct in6_addr *dst = &dstsock->sin6_addr; 518 #if 0 519 char ip6buf[INET6_ADDRSTRLEN]; 520 521 if (dstsock->sin6_addr.s6_addr32[0] == 0 && 522 dstsock->sin6_addr.s6_addr32[1] == 0 && 523 !IN6_IS_ADDR_LOOPBACK(&dstsock->sin6_addr)) { 524 printf("in6_selectroute: strange destination %s\n", 525 ip6_sprintf(ip6buf, &dstsock->sin6_addr)); 526 } else { 527 printf("in6_selectroute: destination = %s%%%d\n", 528 ip6_sprintf(ip6buf, &dstsock->sin6_addr), 529 dstsock->sin6_scope_id); /* for debug */ 530 } 531 #endif 532 533 /* If the caller specify the outgoing interface explicitly, use it. */ 534 if (opts && (pi = opts->ip6po_pktinfo) != NULL && pi->ipi6_ifindex) { 535 /* XXX boundary check is assumed to be already done. */ 536 ifp = ifnet_byindex(pi->ipi6_ifindex); 537 if (ifp != NULL && 538 (norouteok || retrt == NULL || 539 IN6_IS_ADDR_MULTICAST(dst))) { 540 /* 541 * we do not have to check or get the route for 542 * multicast. 543 */ 544 goto done; 545 } else 546 goto getroute; 547 } 548 549 /* 550 * If the destination address is a multicast address and the outgoing 551 * interface for the address is specified by the caller, use it. 552 */ 553 if (IN6_IS_ADDR_MULTICAST(dst) && 554 mopts != NULL && (ifp = mopts->im6o_multicast_ifp) != NULL) { 555 goto done; /* we do not need a route for multicast. */ 556 } 557 558 getroute: 559 /* 560 * If the next hop address for the packet is specified by the caller, 561 * use it as the gateway. 562 */ 563 if (opts && opts->ip6po_nexthop) { 564 struct route_in6 *ron; 565 struct llentry *la; 566 567 sin6_next = satosin6(opts->ip6po_nexthop); 568 569 /* at this moment, we only support AF_INET6 next hops */ 570 if (sin6_next->sin6_family != AF_INET6) { 571 error = EAFNOSUPPORT; /* or should we proceed? */ 572 goto done; 573 } 574 575 /* 576 * If the next hop is an IPv6 address, then the node identified 577 * by that address must be a neighbor of the sending host. 578 */ 579 ron = &opts->ip6po_nextroute; 580 /* 581 * XXX what do we do here? 582 * PLZ to be fixing 583 */ 584 585 586 if (ron->ro_rt == NULL) { 587 in6_rtalloc(ron, fibnum); /* multi path case? */ 588 if (ron->ro_rt == NULL) { 589 if (ron->ro_rt) { 590 RTFREE(ron->ro_rt); 591 ron->ro_rt = NULL; 592 } 593 error = EHOSTUNREACH; 594 goto done; 595 } 596 } 597 598 rt = ron->ro_rt; 599 ifp = rt->rt_ifp; 600 IF_AFDATA_LOCK(ifp); 601 la = lla_lookup(LLTABLE6(ifp), 0, (struct sockaddr *)&sin6_next->sin6_addr); 602 IF_AFDATA_UNLOCK(ifp); 603 if (la != NULL) 604 LLE_RUNLOCK(la); 605 else { 606 error = EHOSTUNREACH; 607 goto done; 608 } 609 #if 0 610 if ((ron->ro_rt && 611 (ron->ro_rt->rt_flags & (RTF_UP | RTF_LLINFO)) != 612 (RTF_UP | RTF_LLINFO)) || 613 !IN6_ARE_ADDR_EQUAL(&satosin6(&ron->ro_dst)->sin6_addr, 614 &sin6_next->sin6_addr)) { 615 if (ron->ro_rt) { 616 RTFREE(ron->ro_rt); 617 ron->ro_rt = NULL; 618 } 619 *satosin6(&ron->ro_dst) = *sin6_next; 620 } 621 if (ron->ro_rt == NULL) { 622 in6_rtalloc(ron); /* multi path case? */ 623 if (ron->ro_rt == NULL || 624 !(ron->ro_rt->rt_flags & RTF_LLINFO)) { 625 if (ron->ro_rt) { 626 RTFREE(ron->ro_rt); 627 ron->ro_rt = NULL; 628 } 629 error = EHOSTUNREACH; 630 goto done; 631 } 632 } 633 #endif 634 635 /* 636 * When cloning is required, try to allocate a route to the 637 * destination so that the caller can store path MTU 638 * information. 639 */ 640 goto done; 641 } 642 643 /* 644 * Use a cached route if it exists and is valid, else try to allocate 645 * a new one. Note that we should check the address family of the 646 * cached destination, in case of sharing the cache with IPv4. 647 */ 648 if (ro) { 649 if (ro->ro_rt && 650 (!(ro->ro_rt->rt_flags & RTF_UP) || 651 ((struct sockaddr *)(&ro->ro_dst))->sa_family != AF_INET6 || 652 !IN6_ARE_ADDR_EQUAL(&satosin6(&ro->ro_dst)->sin6_addr, 653 dst))) { 654 RTFREE(ro->ro_rt); 655 ro->ro_rt = (struct rtentry *)NULL; 656 } 657 if (ro->ro_rt == (struct rtentry *)NULL) { 658 struct sockaddr_in6 *sa6; 659 660 /* No route yet, so try to acquire one */ 661 bzero(&ro->ro_dst, sizeof(struct sockaddr_in6)); 662 sa6 = (struct sockaddr_in6 *)&ro->ro_dst; 663 *sa6 = *dstsock; 664 sa6->sin6_scope_id = 0; 665 666 #ifdef RADIX_MPATH 667 rtalloc_mpath_fib((struct route *)ro, 668 ntohl(sa6->sin6_addr.s6_addr32[3]), fibnum); 669 #else 670 ro->ro_rt = in6_rtalloc1((struct sockaddr *) 671 &ro->ro_dst, 0, 0UL, fibnum); 672 if (ro->ro_rt) 673 RT_UNLOCK(ro->ro_rt); 674 #endif 675 } 676 677 /* 678 * do not care about the result if we have the nexthop 679 * explicitly specified. 680 */ 681 if (opts && opts->ip6po_nexthop) 682 goto done; 683 684 if (ro->ro_rt) { 685 ifp = ro->ro_rt->rt_ifp; 686 687 if (ifp == NULL) { /* can this really happen? */ 688 RTFREE(ro->ro_rt); 689 ro->ro_rt = NULL; 690 } 691 } 692 if (ro->ro_rt == NULL) 693 error = EHOSTUNREACH; 694 rt = ro->ro_rt; 695 696 /* 697 * Check if the outgoing interface conflicts with 698 * the interface specified by ipi6_ifindex (if specified). 699 * Note that loopback interface is always okay. 700 * (this may happen when we are sending a packet to one of 701 * our own addresses.) 702 */ 703 if (ifp && opts && opts->ip6po_pktinfo && 704 opts->ip6po_pktinfo->ipi6_ifindex) { 705 if (!(ifp->if_flags & IFF_LOOPBACK) && 706 ifp->if_index != 707 opts->ip6po_pktinfo->ipi6_ifindex) { 708 error = EHOSTUNREACH; 709 goto done; 710 } 711 } 712 } 713 714 done: 715 if (ifp == NULL && rt == NULL) { 716 /* 717 * This can happen if the caller did not pass a cached route 718 * nor any other hints. We treat this case an error. 719 */ 720 error = EHOSTUNREACH; 721 } 722 if (error == EHOSTUNREACH) 723 V_ip6stat.ip6s_noroute++; 724 725 if (retifp != NULL) { 726 *retifp = ifp; 727 728 /* 729 * Adjust the "outgoing" interface. If we're going to loop 730 * the packet back to ourselves, the ifp would be the loopback 731 * interface. However, we'd rather know the interface associated 732 * to the destination address (which should probably be one of 733 * our own addresses.) 734 */ 735 if (rt) { 736 if ((rt->rt_ifp->if_flags & IFF_LOOPBACK) && 737 (rt->rt_gateway->sa_family == AF_LINK)) 738 *retifp = 739 ifnet_byindex(((struct sockaddr_dl *) 740 rt->rt_gateway)->sdl_index); 741 } 742 } 743 744 if (retrt != NULL) 745 *retrt = rt; /* rt may be NULL */ 746 747 return (error); 748 } 749 750 static int 751 in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, 752 struct ip6_moptions *mopts, struct route_in6 *ro, struct ifnet **retifp, 753 int fibnum) 754 { 755 int error; 756 struct route_in6 sro; 757 struct rtentry *rt = NULL; 758 759 if (ro == NULL) { 760 bzero(&sro, sizeof(sro)); 761 ro = &sro; 762 } 763 764 if ((error = selectroute(dstsock, opts, mopts, ro, retifp, 765 &rt, 1, fibnum)) != 0) { 766 if (ro == &sro && rt && rt == sro.ro_rt) 767 RTFREE(rt); 768 return (error); 769 } 770 771 /* 772 * do not use a rejected or black hole route. 773 * XXX: this check should be done in the L2 output routine. 774 * However, if we skipped this check here, we'd see the following 775 * scenario: 776 * - install a rejected route for a scoped address prefix 777 * (like fe80::/10) 778 * - send a packet to a destination that matches the scoped prefix, 779 * with ambiguity about the scope zone. 780 * - pick the outgoing interface from the route, and disambiguate the 781 * scope zone with the interface. 782 * - ip6_output() would try to get another route with the "new" 783 * destination, which may be valid. 784 * - we'd see no error on output. 785 * Although this may not be very harmful, it should still be confusing. 786 * We thus reject the case here. 787 */ 788 if (rt && (rt->rt_flags & (RTF_REJECT | RTF_BLACKHOLE))) { 789 int flags = (rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); 790 791 if (ro == &sro && rt && rt == sro.ro_rt) 792 RTFREE(rt); 793 return (flags); 794 } 795 796 if (ro == &sro && rt && rt == sro.ro_rt) 797 RTFREE(rt); 798 return (0); 799 } 800 801 /* 802 * Public wrapper function to selectroute(). 803 * 804 * XXX-BZ in6_selectroute() should and will grow the FIB argument. The 805 * in6_selectroute_fib() function is only there for backward compat on stable. 806 */ 807 int 808 in6_selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, 809 struct ip6_moptions *mopts, struct route_in6 *ro, 810 struct ifnet **retifp, struct rtentry **retrt) 811 { 812 813 return (selectroute(dstsock, opts, mopts, ro, retifp, 814 retrt, 0, RT_DEFAULT_FIB)); 815 } 816 817 #ifndef BURN_BRIDGES 818 int 819 in6_selectroute_fib(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, 820 struct ip6_moptions *mopts, struct route_in6 *ro, 821 struct ifnet **retifp, struct rtentry **retrt, int fibnum) 822 { 823 824 return (selectroute(dstsock, opts, mopts, ro, retifp, 825 retrt, 0, fibnum)); 826 } 827 #endif 828 829 /* 830 * Default hop limit selection. The precedence is as follows: 831 * 1. Hoplimit value specified via ioctl. 832 * 2. (If the outgoing interface is detected) the current 833 * hop limit of the interface specified by router advertisement. 834 * 3. The system default hoplimit. 835 */ 836 int 837 in6_selecthlim(struct inpcb *in6p, struct ifnet *ifp) 838 { 839 840 if (in6p && in6p->in6p_hops >= 0) 841 return (in6p->in6p_hops); 842 else if (ifp) 843 return (ND_IFINFO(ifp)->chlim); 844 else if (in6p && !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr)) { 845 struct route_in6 ro6; 846 struct ifnet *lifp; 847 848 bzero(&ro6, sizeof(ro6)); 849 ro6.ro_dst.sin6_family = AF_INET6; 850 ro6.ro_dst.sin6_len = sizeof(struct sockaddr_in6); 851 ro6.ro_dst.sin6_addr = in6p->in6p_faddr; 852 in6_rtalloc(&ro6, in6p ? in6p->inp_inc.inc_fibnum : 853 RT_DEFAULT_FIB); 854 if (ro6.ro_rt) { 855 lifp = ro6.ro_rt->rt_ifp; 856 RTFREE(ro6.ro_rt); 857 if (lifp) 858 return (ND_IFINFO(lifp)->chlim); 859 } else 860 return (V_ip6_defhlim); 861 } 862 return (V_ip6_defhlim); 863 } 864 865 /* 866 * XXX: this is borrowed from in6_pcbbind(). If possible, we should 867 * share this function by all *bsd*... 868 */ 869 int 870 in6_pcbsetport(struct in6_addr *laddr, struct inpcb *inp, struct ucred *cred) 871 { 872 struct socket *so = inp->inp_socket; 873 u_int16_t lport = 0; 874 int error, lookupflags = 0; 875 #ifdef INVARIANTS 876 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 877 #endif 878 879 INP_WLOCK_ASSERT(inp); 880 INP_HASH_WLOCK_ASSERT(pcbinfo); 881 882 error = prison_local_ip6(cred, laddr, 883 ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0)); 884 if (error) 885 return(error); 886 887 /* XXX: this is redundant when called from in6_pcbbind */ 888 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) 889 lookupflags = INPLOOKUP_WILDCARD; 890 891 inp->inp_flags |= INP_ANONPORT; 892 893 error = in_pcb_lport(inp, NULL, &lport, cred, lookupflags); 894 if (error != 0) 895 return (error); 896 897 inp->inp_lport = lport; 898 if (in_pcbinshash(inp) != 0) { 899 inp->in6p_laddr = in6addr_any; 900 inp->inp_lport = 0; 901 return (EAGAIN); 902 } 903 904 return (0); 905 } 906 907 void 908 addrsel_policy_init(void) 909 { 910 911 init_policy_queue(); 912 913 /* initialize the "last resort" policy */ 914 bzero(&V_defaultaddrpolicy, sizeof(V_defaultaddrpolicy)); 915 V_defaultaddrpolicy.label = ADDR_LABEL_NOTAPP; 916 917 if (!IS_DEFAULT_VNET(curvnet)) 918 return; 919 920 ADDRSEL_LOCK_INIT(); 921 ADDRSEL_SXLOCK_INIT(); 922 } 923 924 static struct in6_addrpolicy * 925 lookup_addrsel_policy(struct sockaddr_in6 *key) 926 { 927 struct in6_addrpolicy *match = NULL; 928 929 ADDRSEL_LOCK(); 930 match = match_addrsel_policy(key); 931 932 if (match == NULL) 933 match = &V_defaultaddrpolicy; 934 else 935 match->use++; 936 ADDRSEL_UNLOCK(); 937 938 return (match); 939 } 940 941 /* 942 * Subroutines to manage the address selection policy table via sysctl. 943 */ 944 struct walkarg { 945 struct sysctl_req *w_req; 946 }; 947 948 static int in6_src_sysctl(SYSCTL_HANDLER_ARGS); 949 SYSCTL_DECL(_net_inet6_ip6); 950 static SYSCTL_NODE(_net_inet6_ip6, IPV6CTL_ADDRCTLPOLICY, addrctlpolicy, 951 CTLFLAG_RD, in6_src_sysctl, ""); 952 953 static int 954 in6_src_sysctl(SYSCTL_HANDLER_ARGS) 955 { 956 struct walkarg w; 957 958 if (req->newptr) 959 return EPERM; 960 961 bzero(&w, sizeof(w)); 962 w.w_req = req; 963 964 return (walk_addrsel_policy(dump_addrsel_policyent, &w)); 965 } 966 967 int 968 in6_src_ioctl(u_long cmd, caddr_t data) 969 { 970 int i; 971 struct in6_addrpolicy ent0; 972 973 if (cmd != SIOCAADDRCTL_POLICY && cmd != SIOCDADDRCTL_POLICY) 974 return (EOPNOTSUPP); /* check for safety */ 975 976 ent0 = *(struct in6_addrpolicy *)data; 977 978 if (ent0.label == ADDR_LABEL_NOTAPP) 979 return (EINVAL); 980 /* check if the prefix mask is consecutive. */ 981 if (in6_mask2len(&ent0.addrmask.sin6_addr, NULL) < 0) 982 return (EINVAL); 983 /* clear trailing garbages (if any) of the prefix address. */ 984 for (i = 0; i < 4; i++) { 985 ent0.addr.sin6_addr.s6_addr32[i] &= 986 ent0.addrmask.sin6_addr.s6_addr32[i]; 987 } 988 ent0.use = 0; 989 990 switch (cmd) { 991 case SIOCAADDRCTL_POLICY: 992 return (add_addrsel_policyent(&ent0)); 993 case SIOCDADDRCTL_POLICY: 994 return (delete_addrsel_policyent(&ent0)); 995 } 996 997 return (0); /* XXX: compromise compilers */ 998 } 999 1000 /* 1001 * The followings are implementation of the policy table using a 1002 * simple tail queue. 1003 * XXX such details should be hidden. 1004 * XXX implementation using binary tree should be more efficient. 1005 */ 1006 struct addrsel_policyent { 1007 TAILQ_ENTRY(addrsel_policyent) ape_entry; 1008 struct in6_addrpolicy ape_policy; 1009 }; 1010 1011 TAILQ_HEAD(addrsel_policyhead, addrsel_policyent); 1012 1013 static VNET_DEFINE(struct addrsel_policyhead, addrsel_policytab); 1014 #define V_addrsel_policytab VNET(addrsel_policytab) 1015 1016 static void 1017 init_policy_queue(void) 1018 { 1019 1020 TAILQ_INIT(&V_addrsel_policytab); 1021 } 1022 1023 static int 1024 add_addrsel_policyent(struct in6_addrpolicy *newpolicy) 1025 { 1026 struct addrsel_policyent *new, *pol; 1027 1028 new = malloc(sizeof(*new), M_IFADDR, 1029 M_WAITOK); 1030 ADDRSEL_XLOCK(); 1031 ADDRSEL_LOCK(); 1032 1033 /* duplication check */ 1034 TAILQ_FOREACH(pol, &V_addrsel_policytab, ape_entry) { 1035 if (IN6_ARE_ADDR_EQUAL(&newpolicy->addr.sin6_addr, 1036 &pol->ape_policy.addr.sin6_addr) && 1037 IN6_ARE_ADDR_EQUAL(&newpolicy->addrmask.sin6_addr, 1038 &pol->ape_policy.addrmask.sin6_addr)) { 1039 ADDRSEL_UNLOCK(); 1040 ADDRSEL_XUNLOCK(); 1041 free(new, M_IFADDR); 1042 return (EEXIST); /* or override it? */ 1043 } 1044 } 1045 1046 bzero(new, sizeof(*new)); 1047 1048 /* XXX: should validate entry */ 1049 new->ape_policy = *newpolicy; 1050 1051 TAILQ_INSERT_TAIL(&V_addrsel_policytab, new, ape_entry); 1052 ADDRSEL_UNLOCK(); 1053 ADDRSEL_XUNLOCK(); 1054 1055 return (0); 1056 } 1057 1058 static int 1059 delete_addrsel_policyent(struct in6_addrpolicy *key) 1060 { 1061 struct addrsel_policyent *pol; 1062 1063 ADDRSEL_XLOCK(); 1064 ADDRSEL_LOCK(); 1065 1066 /* search for the entry in the table */ 1067 TAILQ_FOREACH(pol, &V_addrsel_policytab, ape_entry) { 1068 if (IN6_ARE_ADDR_EQUAL(&key->addr.sin6_addr, 1069 &pol->ape_policy.addr.sin6_addr) && 1070 IN6_ARE_ADDR_EQUAL(&key->addrmask.sin6_addr, 1071 &pol->ape_policy.addrmask.sin6_addr)) { 1072 break; 1073 } 1074 } 1075 if (pol == NULL) { 1076 ADDRSEL_UNLOCK(); 1077 ADDRSEL_XUNLOCK(); 1078 return (ESRCH); 1079 } 1080 1081 TAILQ_REMOVE(&V_addrsel_policytab, pol, ape_entry); 1082 ADDRSEL_UNLOCK(); 1083 ADDRSEL_XUNLOCK(); 1084 1085 return (0); 1086 } 1087 1088 static int 1089 walk_addrsel_policy(int (*callback)(struct in6_addrpolicy *, void *), 1090 void *w) 1091 { 1092 struct addrsel_policyent *pol; 1093 int error = 0; 1094 1095 ADDRSEL_SLOCK(); 1096 TAILQ_FOREACH(pol, &V_addrsel_policytab, ape_entry) { 1097 if ((error = (*callback)(&pol->ape_policy, w)) != 0) { 1098 ADDRSEL_SUNLOCK(); 1099 return (error); 1100 } 1101 } 1102 ADDRSEL_SUNLOCK(); 1103 return (error); 1104 } 1105 1106 static int 1107 dump_addrsel_policyent(struct in6_addrpolicy *pol, void *arg) 1108 { 1109 int error = 0; 1110 struct walkarg *w = arg; 1111 1112 error = SYSCTL_OUT(w->w_req, pol, sizeof(*pol)); 1113 1114 return (error); 1115 } 1116 1117 static struct in6_addrpolicy * 1118 match_addrsel_policy(struct sockaddr_in6 *key) 1119 { 1120 struct addrsel_policyent *pent; 1121 struct in6_addrpolicy *bestpol = NULL, *pol; 1122 int matchlen, bestmatchlen = -1; 1123 u_char *mp, *ep, *k, *p, m; 1124 1125 TAILQ_FOREACH(pent, &V_addrsel_policytab, ape_entry) { 1126 matchlen = 0; 1127 1128 pol = &pent->ape_policy; 1129 mp = (u_char *)&pol->addrmask.sin6_addr; 1130 ep = mp + 16; /* XXX: scope field? */ 1131 k = (u_char *)&key->sin6_addr; 1132 p = (u_char *)&pol->addr.sin6_addr; 1133 for (; mp < ep && *mp; mp++, k++, p++) { 1134 m = *mp; 1135 if ((*k & m) != *p) 1136 goto next; /* not match */ 1137 if (m == 0xff) /* short cut for a typical case */ 1138 matchlen += 8; 1139 else { 1140 while (m >= 0x80) { 1141 matchlen++; 1142 m <<= 1; 1143 } 1144 } 1145 } 1146 1147 /* matched. check if this is better than the current best. */ 1148 if (bestpol == NULL || 1149 matchlen > bestmatchlen) { 1150 bestpol = pol; 1151 bestmatchlen = matchlen; 1152 } 1153 1154 next: 1155 continue; 1156 } 1157 1158 return (bestpol); 1159 } 1160