1 /*- 2 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of the project nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * $KAME: in6_src.c,v 1.132 2003/08/26 04:42:27 keiichi Exp $ 30 */ 31 32 /*- 33 * Copyright (c) 1982, 1986, 1991, 1993 34 * The Regents of the University of California. All rights reserved. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 4. Neither the name of the University nor the names of its contributors 45 * may be used to endorse or promote products derived from this software 46 * without specific prior written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 58 * SUCH DAMAGE. 59 * 60 * @(#)in_pcb.c 8.2 (Berkeley) 1/4/94 61 */ 62 63 #include <sys/cdefs.h> 64 __FBSDID("$FreeBSD$"); 65 66 #include "opt_inet.h" 67 #include "opt_inet6.h" 68 69 #include <sys/param.h> 70 #include <sys/systm.h> 71 #include <sys/lock.h> 72 #include <sys/malloc.h> 73 #include <sys/mbuf.h> 74 #include <sys/priv.h> 75 #include <sys/protosw.h> 76 #include <sys/socket.h> 77 #include <sys/socketvar.h> 78 #include <sys/sockio.h> 79 #include <sys/sysctl.h> 80 #include <sys/errno.h> 81 #include <sys/time.h> 82 #include <sys/kernel.h> 83 #include <sys/sx.h> 84 85 #include <net/if.h> 86 #include <net/route.h> 87 88 #include <netinet/in.h> 89 #include <netinet/in_var.h> 90 #include <netinet/in_systm.h> 91 #include <netinet/ip.h> 92 #include <netinet/in_pcb.h> 93 #include <netinet6/in6_var.h> 94 #include <netinet/ip6.h> 95 #include <netinet6/in6_pcb.h> 96 #include <netinet6/ip6_var.h> 97 #include <netinet6/scope6_var.h> 98 #include <netinet6/nd6.h> 99 100 static struct mtx addrsel_lock; 101 #define ADDRSEL_LOCK_INIT() mtx_init(&addrsel_lock, "addrsel_lock", NULL, MTX_DEF) 102 #define ADDRSEL_LOCK() mtx_lock(&addrsel_lock) 103 #define ADDRSEL_UNLOCK() mtx_unlock(&addrsel_lock) 104 #define ADDRSEL_LOCK_ASSERT() mtx_assert(&addrsel_lock, MA_OWNED) 105 106 static struct sx addrsel_sxlock; 107 #define ADDRSEL_SXLOCK_INIT() sx_init(&addrsel_sxlock, "addrsel_sxlock") 108 #define ADDRSEL_SLOCK() sx_slock(&addrsel_sxlock) 109 #define ADDRSEL_SUNLOCK() sx_sunlock(&addrsel_sxlock) 110 #define ADDRSEL_XLOCK() sx_xlock(&addrsel_sxlock) 111 #define ADDRSEL_XUNLOCK() sx_xunlock(&addrsel_sxlock) 112 113 #define ADDR_LABEL_NOTAPP (-1) 114 struct in6_addrpolicy defaultaddrpolicy; 115 116 int ip6_prefer_tempaddr = 0; 117 118 static int selectroute __P((struct sockaddr_in6 *, struct ip6_pktopts *, 119 struct ip6_moptions *, struct route_in6 *, struct ifnet **, 120 struct rtentry **, int, int)); 121 static int in6_selectif __P((struct sockaddr_in6 *, struct ip6_pktopts *, 122 struct ip6_moptions *, struct route_in6 *ro, struct ifnet **)); 123 124 static struct in6_addrpolicy *lookup_addrsel_policy(struct sockaddr_in6 *); 125 126 static void init_policy_queue(void); 127 static int add_addrsel_policyent(struct in6_addrpolicy *); 128 static int delete_addrsel_policyent(struct in6_addrpolicy *); 129 static int walk_addrsel_policy __P((int (*)(struct in6_addrpolicy *, void *), 130 void *)); 131 static int dump_addrsel_policyent(struct in6_addrpolicy *, void *); 132 static struct in6_addrpolicy *match_addrsel_policy(struct sockaddr_in6 *); 133 134 /* 135 * Return an IPv6 address, which is the most appropriate for a given 136 * destination and user specified options. 137 * If necessary, this function lookups the routing table and returns 138 * an entry to the caller for later use. 139 */ 140 #define REPLACE(r) do {\ 141 if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \ 142 sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \ 143 ip6stat.ip6s_sources_rule[(r)]++; \ 144 /* { \ 145 char ip6buf[INET6_ADDRSTRLEN], ip6b[INET6_ADDRSTRLEN]; \ 146 printf("in6_selectsrc: replace %s with %s by %d\n", ia_best ? ip6_sprintf(ip6buf, &ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(ip6b, &ia->ia_addr.sin6_addr), (r)); \ 147 } */ \ 148 goto replace; \ 149 } while(0) 150 #define NEXT(r) do {\ 151 if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \ 152 sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \ 153 ip6stat.ip6s_sources_rule[(r)]++; \ 154 /* { \ 155 char ip6buf[INET6_ADDRSTRLEN], ip6b[INET6_ADDRSTRLEN]; \ 156 printf("in6_selectsrc: keep %s against %s by %d\n", ia_best ? ip6_sprintf(ip6buf, &ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(ip6b, &ia->ia_addr.sin6_addr), (r)); \ 157 } */ \ 158 goto next; /* XXX: we can't use 'continue' here */ \ 159 } while(0) 160 #define BREAK(r) do { \ 161 if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \ 162 sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \ 163 ip6stat.ip6s_sources_rule[(r)]++; \ 164 goto out; /* XXX: we can't use 'break' here */ \ 165 } while(0) 166 167 struct in6_addr * 168 in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, 169 struct ip6_moptions *mopts, struct route_in6 *ro, 170 struct in6_addr *laddr, struct ifnet **ifpp, int *errorp) 171 { 172 struct in6_addr dst; 173 struct ifnet *ifp = NULL; 174 struct in6_ifaddr *ia = NULL, *ia_best = NULL; 175 struct in6_pktinfo *pi = NULL; 176 int dst_scope = -1, best_scope = -1, best_matchlen = -1; 177 struct in6_addrpolicy *dst_policy = NULL, *best_policy = NULL; 178 u_int32_t odstzone; 179 int prefer_tempaddr; 180 181 dst = dstsock->sin6_addr; /* make a copy for local operation */ 182 *errorp = 0; 183 if (ifpp) 184 *ifpp = NULL; 185 186 /* 187 * If the source address is explicitly specified by the caller, 188 * check if the requested source address is indeed a unicast address 189 * assigned to the node, and can be used as the packet's source 190 * address. If everything is okay, use the address as source. 191 */ 192 if (opts && (pi = opts->ip6po_pktinfo) && 193 !IN6_IS_ADDR_UNSPECIFIED(&pi->ipi6_addr)) { 194 struct sockaddr_in6 srcsock; 195 struct in6_ifaddr *ia6; 196 197 /* get the outgoing interface */ 198 if ((*errorp = in6_selectif(dstsock, opts, mopts, ro, &ifp)) 199 != 0) { 200 return (NULL); 201 } 202 203 /* 204 * determine the appropriate zone id of the source based on 205 * the zone of the destination and the outgoing interface. 206 * If the specified address is ambiguous wrt the scope zone, 207 * the interface must be specified; otherwise, ifa_ifwithaddr() 208 * will fail matching the address. 209 */ 210 bzero(&srcsock, sizeof(srcsock)); 211 srcsock.sin6_family = AF_INET6; 212 srcsock.sin6_len = sizeof(srcsock); 213 srcsock.sin6_addr = pi->ipi6_addr; 214 if (ifp) { 215 *errorp = in6_setscope(&srcsock.sin6_addr, ifp, NULL); 216 if (*errorp != 0) 217 return (NULL); 218 } 219 220 ia6 = (struct in6_ifaddr *)ifa_ifwithaddr((struct sockaddr *)(&srcsock)); 221 if (ia6 == NULL || 222 (ia6->ia6_flags & (IN6_IFF_ANYCAST | IN6_IFF_NOTREADY))) { 223 *errorp = EADDRNOTAVAIL; 224 return (NULL); 225 } 226 pi->ipi6_addr = srcsock.sin6_addr; /* XXX: this overrides pi */ 227 if (ifpp) 228 *ifpp = ifp; 229 return (&ia6->ia_addr.sin6_addr); 230 } 231 232 /* 233 * Otherwise, if the socket has already bound the source, just use it. 234 */ 235 if (laddr && !IN6_IS_ADDR_UNSPECIFIED(laddr)) 236 return (laddr); 237 238 /* 239 * If the address is not specified, choose the best one based on 240 * the outgoing interface and the destination address. 241 */ 242 /* get the outgoing interface */ 243 if ((*errorp = in6_selectif(dstsock, opts, mopts, ro, &ifp)) != 0) 244 return (NULL); 245 246 #ifdef DIAGNOSTIC 247 if (ifp == NULL) /* this should not happen */ 248 panic("in6_selectsrc: NULL ifp"); 249 #endif 250 *errorp = in6_setscope(&dst, ifp, &odstzone); 251 if (*errorp != 0) 252 return (NULL); 253 254 for (ia = in6_ifaddr; ia; ia = ia->ia_next) { 255 int new_scope = -1, new_matchlen = -1; 256 struct in6_addrpolicy *new_policy = NULL; 257 u_int32_t srczone, osrczone, dstzone; 258 struct in6_addr src; 259 struct ifnet *ifp1 = ia->ia_ifp; 260 261 /* 262 * We'll never take an address that breaks the scope zone 263 * of the destination. We also skip an address if its zone 264 * does not contain the outgoing interface. 265 * XXX: we should probably use sin6_scope_id here. 266 */ 267 if (in6_setscope(&dst, ifp1, &dstzone) || 268 odstzone != dstzone) { 269 continue; 270 } 271 src = ia->ia_addr.sin6_addr; 272 if (in6_setscope(&src, ifp, &osrczone) || 273 in6_setscope(&src, ifp1, &srczone) || 274 osrczone != srczone) { 275 continue; 276 } 277 278 /* avoid unusable addresses */ 279 if ((ia->ia6_flags & 280 (IN6_IFF_NOTREADY | IN6_IFF_ANYCAST | IN6_IFF_DETACHED))) { 281 continue; 282 } 283 if (!ip6_use_deprecated && IFA6_IS_DEPRECATED(ia)) 284 continue; 285 286 /* Rule 1: Prefer same address */ 287 if (IN6_ARE_ADDR_EQUAL(&dst, &ia->ia_addr.sin6_addr)) { 288 ia_best = ia; 289 BREAK(1); /* there should be no better candidate */ 290 } 291 292 if (ia_best == NULL) 293 REPLACE(0); 294 295 /* Rule 2: Prefer appropriate scope */ 296 if (dst_scope < 0) 297 dst_scope = in6_addrscope(&dst); 298 new_scope = in6_addrscope(&ia->ia_addr.sin6_addr); 299 if (IN6_ARE_SCOPE_CMP(best_scope, new_scope) < 0) { 300 if (IN6_ARE_SCOPE_CMP(best_scope, dst_scope) < 0) 301 REPLACE(2); 302 NEXT(2); 303 } else if (IN6_ARE_SCOPE_CMP(new_scope, best_scope) < 0) { 304 if (IN6_ARE_SCOPE_CMP(new_scope, dst_scope) < 0) 305 NEXT(2); 306 REPLACE(2); 307 } 308 309 /* 310 * Rule 3: Avoid deprecated addresses. Note that the case of 311 * !ip6_use_deprecated is already rejected above. 312 */ 313 if (!IFA6_IS_DEPRECATED(ia_best) && IFA6_IS_DEPRECATED(ia)) 314 NEXT(3); 315 if (IFA6_IS_DEPRECATED(ia_best) && !IFA6_IS_DEPRECATED(ia)) 316 REPLACE(3); 317 318 /* Rule 4: Prefer home addresses */ 319 /* 320 * XXX: This is a TODO. We should probably merge the MIP6 321 * case above. 322 */ 323 324 /* Rule 5: Prefer outgoing interface */ 325 if (ia_best->ia_ifp == ifp && ia->ia_ifp != ifp) 326 NEXT(5); 327 if (ia_best->ia_ifp != ifp && ia->ia_ifp == ifp) 328 REPLACE(5); 329 330 /* 331 * Rule 6: Prefer matching label 332 * Note that best_policy should be non-NULL here. 333 */ 334 if (dst_policy == NULL) 335 dst_policy = lookup_addrsel_policy(dstsock); 336 if (dst_policy->label != ADDR_LABEL_NOTAPP) { 337 new_policy = lookup_addrsel_policy(&ia->ia_addr); 338 if (dst_policy->label == best_policy->label && 339 dst_policy->label != new_policy->label) 340 NEXT(6); 341 if (dst_policy->label != best_policy->label && 342 dst_policy->label == new_policy->label) 343 REPLACE(6); 344 } 345 346 /* 347 * Rule 7: Prefer public addresses. 348 * We allow users to reverse the logic by configuring 349 * a sysctl variable, so that privacy conscious users can 350 * always prefer temporary addresses. 351 */ 352 if (opts == NULL || 353 opts->ip6po_prefer_tempaddr == IP6PO_TEMPADDR_SYSTEM) { 354 prefer_tempaddr = ip6_prefer_tempaddr; 355 } else if (opts->ip6po_prefer_tempaddr == 356 IP6PO_TEMPADDR_NOTPREFER) { 357 prefer_tempaddr = 0; 358 } else 359 prefer_tempaddr = 1; 360 if (!(ia_best->ia6_flags & IN6_IFF_TEMPORARY) && 361 (ia->ia6_flags & IN6_IFF_TEMPORARY)) { 362 if (prefer_tempaddr) 363 REPLACE(7); 364 else 365 NEXT(7); 366 } 367 if ((ia_best->ia6_flags & IN6_IFF_TEMPORARY) && 368 !(ia->ia6_flags & IN6_IFF_TEMPORARY)) { 369 if (prefer_tempaddr) 370 NEXT(7); 371 else 372 REPLACE(7); 373 } 374 375 /* 376 * Rule 8: prefer addresses on alive interfaces. 377 * This is a KAME specific rule. 378 */ 379 if ((ia_best->ia_ifp->if_flags & IFF_UP) && 380 !(ia->ia_ifp->if_flags & IFF_UP)) 381 NEXT(8); 382 if (!(ia_best->ia_ifp->if_flags & IFF_UP) && 383 (ia->ia_ifp->if_flags & IFF_UP)) 384 REPLACE(8); 385 386 /* 387 * Rule 14: Use longest matching prefix. 388 * Note: in the address selection draft, this rule is 389 * documented as "Rule 8". However, since it is also 390 * documented that this rule can be overridden, we assign 391 * a large number so that it is easy to assign smaller numbers 392 * to more preferred rules. 393 */ 394 new_matchlen = in6_matchlen(&ia->ia_addr.sin6_addr, &dst); 395 if (best_matchlen < new_matchlen) 396 REPLACE(14); 397 if (new_matchlen < best_matchlen) 398 NEXT(14); 399 400 /* Rule 15 is reserved. */ 401 402 /* 403 * Last resort: just keep the current candidate. 404 * Or, do we need more rules? 405 */ 406 continue; 407 408 replace: 409 ia_best = ia; 410 best_scope = (new_scope >= 0 ? new_scope : 411 in6_addrscope(&ia_best->ia_addr.sin6_addr)); 412 best_policy = (new_policy ? new_policy : 413 lookup_addrsel_policy(&ia_best->ia_addr)); 414 best_matchlen = (new_matchlen >= 0 ? new_matchlen : 415 in6_matchlen(&ia_best->ia_addr.sin6_addr, 416 &dst)); 417 418 next: 419 continue; 420 421 out: 422 break; 423 } 424 425 if ((ia = ia_best) == NULL) { 426 *errorp = EADDRNOTAVAIL; 427 return (NULL); 428 } 429 430 if (ifpp) 431 *ifpp = ifp; 432 433 return (&ia->ia_addr.sin6_addr); 434 } 435 436 /* 437 * clone - meaningful only for bsdi and freebsd 438 */ 439 static int 440 selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, 441 struct ip6_moptions *mopts, struct route_in6 *ro, 442 struct ifnet **retifp, struct rtentry **retrt, int clone, 443 int norouteok) 444 { 445 int error = 0; 446 struct ifnet *ifp = NULL; 447 struct rtentry *rt = NULL; 448 struct sockaddr_in6 *sin6_next; 449 struct in6_pktinfo *pi = NULL; 450 struct in6_addr *dst = &dstsock->sin6_addr; 451 #if 0 452 char ip6buf[INET6_ADDRSTRLEN]; 453 454 if (dstsock->sin6_addr.s6_addr32[0] == 0 && 455 dstsock->sin6_addr.s6_addr32[1] == 0 && 456 !IN6_IS_ADDR_LOOPBACK(&dstsock->sin6_addr)) { 457 printf("in6_selectroute: strange destination %s\n", 458 ip6_sprintf(ip6buf, &dstsock->sin6_addr)); 459 } else { 460 printf("in6_selectroute: destination = %s%%%d\n", 461 ip6_sprintf(ip6buf, &dstsock->sin6_addr), 462 dstsock->sin6_scope_id); /* for debug */ 463 } 464 #endif 465 466 /* If the caller specify the outgoing interface explicitly, use it. */ 467 if (opts && (pi = opts->ip6po_pktinfo) != NULL && pi->ipi6_ifindex) { 468 /* XXX boundary check is assumed to be already done. */ 469 ifp = ifnet_byindex(pi->ipi6_ifindex); 470 if (ifp != NULL && 471 (norouteok || retrt == NULL || 472 IN6_IS_ADDR_MULTICAST(dst))) { 473 /* 474 * we do not have to check or get the route for 475 * multicast. 476 */ 477 goto done; 478 } else 479 goto getroute; 480 } 481 482 /* 483 * If the destination address is a multicast address and the outgoing 484 * interface for the address is specified by the caller, use it. 485 */ 486 if (IN6_IS_ADDR_MULTICAST(dst) && 487 mopts != NULL && (ifp = mopts->im6o_multicast_ifp) != NULL) { 488 goto done; /* we do not need a route for multicast. */ 489 } 490 491 getroute: 492 /* 493 * If the next hop address for the packet is specified by the caller, 494 * use it as the gateway. 495 */ 496 if (opts && opts->ip6po_nexthop) { 497 struct route_in6 *ron; 498 499 sin6_next = satosin6(opts->ip6po_nexthop); 500 501 /* at this moment, we only support AF_INET6 next hops */ 502 if (sin6_next->sin6_family != AF_INET6) { 503 error = EAFNOSUPPORT; /* or should we proceed? */ 504 goto done; 505 } 506 507 /* 508 * If the next hop is an IPv6 address, then the node identified 509 * by that address must be a neighbor of the sending host. 510 */ 511 ron = &opts->ip6po_nextroute; 512 if ((ron->ro_rt && 513 (ron->ro_rt->rt_flags & (RTF_UP | RTF_LLINFO)) != 514 (RTF_UP | RTF_LLINFO)) || 515 !IN6_ARE_ADDR_EQUAL(&satosin6(&ron->ro_dst)->sin6_addr, 516 &sin6_next->sin6_addr)) { 517 if (ron->ro_rt) { 518 RTFREE(ron->ro_rt); 519 ron->ro_rt = NULL; 520 } 521 *satosin6(&ron->ro_dst) = *sin6_next; 522 } 523 if (ron->ro_rt == NULL) { 524 rtalloc((struct route *)ron); /* multi path case? */ 525 if (ron->ro_rt == NULL || 526 !(ron->ro_rt->rt_flags & RTF_LLINFO)) { 527 if (ron->ro_rt) { 528 RTFREE(ron->ro_rt); 529 ron->ro_rt = NULL; 530 } 531 error = EHOSTUNREACH; 532 goto done; 533 } 534 } 535 rt = ron->ro_rt; 536 ifp = rt->rt_ifp; 537 538 /* 539 * When cloning is required, try to allocate a route to the 540 * destination so that the caller can store path MTU 541 * information. 542 */ 543 if (!clone) 544 goto done; 545 } 546 547 /* 548 * Use a cached route if it exists and is valid, else try to allocate 549 * a new one. Note that we should check the address family of the 550 * cached destination, in case of sharing the cache with IPv4. 551 */ 552 if (ro) { 553 if (ro->ro_rt && 554 (!(ro->ro_rt->rt_flags & RTF_UP) || 555 ((struct sockaddr *)(&ro->ro_dst))->sa_family != AF_INET6 || 556 !IN6_ARE_ADDR_EQUAL(&satosin6(&ro->ro_dst)->sin6_addr, 557 dst))) { 558 RTFREE(ro->ro_rt); 559 ro->ro_rt = (struct rtentry *)NULL; 560 } 561 if (ro->ro_rt == (struct rtentry *)NULL) { 562 struct sockaddr_in6 *sa6; 563 564 /* No route yet, so try to acquire one */ 565 bzero(&ro->ro_dst, sizeof(struct sockaddr_in6)); 566 sa6 = (struct sockaddr_in6 *)&ro->ro_dst; 567 *sa6 = *dstsock; 568 sa6->sin6_scope_id = 0; 569 570 if (clone) { 571 rtalloc((struct route *)ro); 572 } else { 573 ro->ro_rt = rtalloc1(&((struct route *)ro) 574 ->ro_dst, 0, 0UL); 575 if (ro->ro_rt) 576 RT_UNLOCK(ro->ro_rt); 577 } 578 } 579 580 /* 581 * do not care about the result if we have the nexthop 582 * explicitly specified. 583 */ 584 if (opts && opts->ip6po_nexthop) 585 goto done; 586 587 if (ro->ro_rt) { 588 ifp = ro->ro_rt->rt_ifp; 589 590 if (ifp == NULL) { /* can this really happen? */ 591 RTFREE(ro->ro_rt); 592 ro->ro_rt = NULL; 593 } 594 } 595 if (ro->ro_rt == NULL) 596 error = EHOSTUNREACH; 597 rt = ro->ro_rt; 598 599 /* 600 * Check if the outgoing interface conflicts with 601 * the interface specified by ipi6_ifindex (if specified). 602 * Note that loopback interface is always okay. 603 * (this may happen when we are sending a packet to one of 604 * our own addresses.) 605 */ 606 if (ifp && opts && opts->ip6po_pktinfo && 607 opts->ip6po_pktinfo->ipi6_ifindex) { 608 if (!(ifp->if_flags & IFF_LOOPBACK) && 609 ifp->if_index != 610 opts->ip6po_pktinfo->ipi6_ifindex) { 611 error = EHOSTUNREACH; 612 goto done; 613 } 614 } 615 } 616 617 done: 618 if (ifp == NULL && rt == NULL) { 619 /* 620 * This can happen if the caller did not pass a cached route 621 * nor any other hints. We treat this case an error. 622 */ 623 error = EHOSTUNREACH; 624 } 625 if (error == EHOSTUNREACH) 626 ip6stat.ip6s_noroute++; 627 628 if (retifp != NULL) 629 *retifp = ifp; 630 if (retrt != NULL) 631 *retrt = rt; /* rt may be NULL */ 632 633 return (error); 634 } 635 636 static int 637 in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, 638 struct ip6_moptions *mopts, struct route_in6 *ro, struct ifnet **retifp) 639 { 640 int error; 641 struct route_in6 sro; 642 struct rtentry *rt = NULL; 643 644 if (ro == NULL) { 645 bzero(&sro, sizeof(sro)); 646 ro = &sro; 647 } 648 649 if ((error = selectroute(dstsock, opts, mopts, ro, retifp, 650 &rt, 0, 1)) != 0) { 651 if (ro == &sro && rt && rt == sro.ro_rt) 652 RTFREE(rt); 653 return (error); 654 } 655 656 /* 657 * do not use a rejected or black hole route. 658 * XXX: this check should be done in the L2 output routine. 659 * However, if we skipped this check here, we'd see the following 660 * scenario: 661 * - install a rejected route for a scoped address prefix 662 * (like fe80::/10) 663 * - send a packet to a destination that matches the scoped prefix, 664 * with ambiguity about the scope zone. 665 * - pick the outgoing interface from the route, and disambiguate the 666 * scope zone with the interface. 667 * - ip6_output() would try to get another route with the "new" 668 * destination, which may be valid. 669 * - we'd see no error on output. 670 * Although this may not be very harmful, it should still be confusing. 671 * We thus reject the case here. 672 */ 673 if (rt && (rt->rt_flags & (RTF_REJECT | RTF_BLACKHOLE))) { 674 int flags = (rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); 675 676 if (ro == &sro && rt && rt == sro.ro_rt) 677 RTFREE(rt); 678 return (flags); 679 } 680 681 /* 682 * Adjust the "outgoing" interface. If we're going to loop the packet 683 * back to ourselves, the ifp would be the loopback interface. 684 * However, we'd rather know the interface associated to the 685 * destination address (which should probably be one of our own 686 * addresses.) 687 */ 688 if (rt && rt->rt_ifa && rt->rt_ifa->ifa_ifp) 689 *retifp = rt->rt_ifa->ifa_ifp; 690 691 if (ro == &sro && rt && rt == sro.ro_rt) 692 RTFREE(rt); 693 return (0); 694 } 695 696 /* 697 * clone - meaningful only for bsdi and freebsd 698 */ 699 int 700 in6_selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, 701 struct ip6_moptions *mopts, struct route_in6 *ro, 702 struct ifnet **retifp, struct rtentry **retrt, int clone) 703 { 704 705 return (selectroute(dstsock, opts, mopts, ro, retifp, 706 retrt, clone, 0)); 707 } 708 709 /* 710 * Default hop limit selection. The precedence is as follows: 711 * 1. Hoplimit value specified via ioctl. 712 * 2. (If the outgoing interface is detected) the current 713 * hop limit of the interface specified by router advertisement. 714 * 3. The system default hoplimit. 715 */ 716 int 717 in6_selecthlim(struct in6pcb *in6p, struct ifnet *ifp) 718 { 719 720 if (in6p && in6p->in6p_hops >= 0) 721 return (in6p->in6p_hops); 722 else if (ifp) 723 return (ND_IFINFO(ifp)->chlim); 724 else if (in6p && !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr)) { 725 struct route_in6 ro6; 726 struct ifnet *lifp; 727 728 bzero(&ro6, sizeof(ro6)); 729 ro6.ro_dst.sin6_family = AF_INET6; 730 ro6.ro_dst.sin6_len = sizeof(struct sockaddr_in6); 731 ro6.ro_dst.sin6_addr = in6p->in6p_faddr; 732 rtalloc((struct route *)&ro6); 733 if (ro6.ro_rt) { 734 lifp = ro6.ro_rt->rt_ifp; 735 RTFREE(ro6.ro_rt); 736 if (lifp) 737 return (ND_IFINFO(lifp)->chlim); 738 } else 739 return (ip6_defhlim); 740 } 741 return (ip6_defhlim); 742 } 743 744 /* 745 * XXX: this is borrowed from in6_pcbbind(). If possible, we should 746 * share this function by all *bsd*... 747 */ 748 int 749 in6_pcbsetport(struct in6_addr *laddr, struct inpcb *inp, struct ucred *cred) 750 { 751 struct socket *so = inp->inp_socket; 752 u_int16_t lport = 0, first, last, *lastport; 753 int count, error = 0, wild = 0; 754 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 755 756 INP_INFO_WLOCK_ASSERT(pcbinfo); 757 INP_LOCK_ASSERT(inp); 758 759 /* XXX: this is redundant when called from in6_pcbbind */ 760 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) 761 wild = INPLOOKUP_WILDCARD; 762 763 inp->inp_flags |= INP_ANONPORT; 764 765 if (inp->inp_flags & INP_HIGHPORT) { 766 first = ipport_hifirstauto; /* sysctl */ 767 last = ipport_hilastauto; 768 lastport = &pcbinfo->ipi_lasthi; 769 } else if (inp->inp_flags & INP_LOWPORT) { 770 error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0); 771 if (error) 772 return error; 773 first = ipport_lowfirstauto; /* 1023 */ 774 last = ipport_lowlastauto; /* 600 */ 775 lastport = &pcbinfo->ipi_lastlow; 776 } else { 777 first = ipport_firstauto; /* sysctl */ 778 last = ipport_lastauto; 779 lastport = &pcbinfo->ipi_lastport; 780 } 781 /* 782 * Simple check to ensure all ports are not used up causing 783 * a deadlock here. 784 * 785 * We split the two cases (up and down) so that the direction 786 * is not being tested on each round of the loop. 787 */ 788 if (first > last) { 789 /* 790 * counting down 791 */ 792 count = first - last; 793 794 do { 795 if (count-- < 0) { /* completely used? */ 796 /* 797 * Undo any address bind that may have 798 * occurred above. 799 */ 800 inp->in6p_laddr = in6addr_any; 801 return (EAGAIN); 802 } 803 --*lastport; 804 if (*lastport > first || *lastport < last) 805 *lastport = first; 806 lport = htons(*lastport); 807 } while (in6_pcblookup_local(pcbinfo, &inp->in6p_laddr, 808 lport, wild)); 809 } else { 810 /* 811 * counting up 812 */ 813 count = last - first; 814 815 do { 816 if (count-- < 0) { /* completely used? */ 817 /* 818 * Undo any address bind that may have 819 * occurred above. 820 */ 821 inp->in6p_laddr = in6addr_any; 822 return (EAGAIN); 823 } 824 ++*lastport; 825 if (*lastport < first || *lastport > last) 826 *lastport = first; 827 lport = htons(*lastport); 828 } while (in6_pcblookup_local(pcbinfo, 829 &inp->in6p_laddr, lport, wild)); 830 } 831 832 inp->inp_lport = lport; 833 if (in_pcbinshash(inp) != 0) { 834 inp->in6p_laddr = in6addr_any; 835 inp->inp_lport = 0; 836 return (EAGAIN); 837 } 838 839 return (0); 840 } 841 842 void 843 addrsel_policy_init(void) 844 { 845 ADDRSEL_LOCK_INIT(); 846 ADDRSEL_SXLOCK_INIT(); 847 848 init_policy_queue(); 849 850 /* initialize the "last resort" policy */ 851 bzero(&defaultaddrpolicy, sizeof(defaultaddrpolicy)); 852 defaultaddrpolicy.label = ADDR_LABEL_NOTAPP; 853 } 854 855 static struct in6_addrpolicy * 856 lookup_addrsel_policy(struct sockaddr_in6 *key) 857 { 858 struct in6_addrpolicy *match = NULL; 859 860 ADDRSEL_LOCK(); 861 match = match_addrsel_policy(key); 862 863 if (match == NULL) 864 match = &defaultaddrpolicy; 865 else 866 match->use++; 867 ADDRSEL_UNLOCK(); 868 869 return (match); 870 } 871 872 /* 873 * Subroutines to manage the address selection policy table via sysctl. 874 */ 875 struct walkarg { 876 struct sysctl_req *w_req; 877 }; 878 879 static int in6_src_sysctl(SYSCTL_HANDLER_ARGS); 880 SYSCTL_DECL(_net_inet6_ip6); 881 SYSCTL_NODE(_net_inet6_ip6, IPV6CTL_ADDRCTLPOLICY, addrctlpolicy, 882 CTLFLAG_RD, in6_src_sysctl, ""); 883 884 static int 885 in6_src_sysctl(SYSCTL_HANDLER_ARGS) 886 { 887 struct walkarg w; 888 889 if (req->newptr) 890 return EPERM; 891 892 bzero(&w, sizeof(w)); 893 w.w_req = req; 894 895 return (walk_addrsel_policy(dump_addrsel_policyent, &w)); 896 } 897 898 int 899 in6_src_ioctl(u_long cmd, caddr_t data) 900 { 901 int i; 902 struct in6_addrpolicy ent0; 903 904 if (cmd != SIOCAADDRCTL_POLICY && cmd != SIOCDADDRCTL_POLICY) 905 return (EOPNOTSUPP); /* check for safety */ 906 907 ent0 = *(struct in6_addrpolicy *)data; 908 909 if (ent0.label == ADDR_LABEL_NOTAPP) 910 return (EINVAL); 911 /* check if the prefix mask is consecutive. */ 912 if (in6_mask2len(&ent0.addrmask.sin6_addr, NULL) < 0) 913 return (EINVAL); 914 /* clear trailing garbages (if any) of the prefix address. */ 915 for (i = 0; i < 4; i++) { 916 ent0.addr.sin6_addr.s6_addr32[i] &= 917 ent0.addrmask.sin6_addr.s6_addr32[i]; 918 } 919 ent0.use = 0; 920 921 switch (cmd) { 922 case SIOCAADDRCTL_POLICY: 923 return (add_addrsel_policyent(&ent0)); 924 case SIOCDADDRCTL_POLICY: 925 return (delete_addrsel_policyent(&ent0)); 926 } 927 928 return (0); /* XXX: compromise compilers */ 929 } 930 931 /* 932 * The followings are implementation of the policy table using a 933 * simple tail queue. 934 * XXX such details should be hidden. 935 * XXX implementation using binary tree should be more efficient. 936 */ 937 struct addrsel_policyent { 938 TAILQ_ENTRY(addrsel_policyent) ape_entry; 939 struct in6_addrpolicy ape_policy; 940 }; 941 942 TAILQ_HEAD(addrsel_policyhead, addrsel_policyent); 943 944 struct addrsel_policyhead addrsel_policytab; 945 946 static void 947 init_policy_queue(void) 948 { 949 950 TAILQ_INIT(&addrsel_policytab); 951 } 952 953 static int 954 add_addrsel_policyent(struct in6_addrpolicy *newpolicy) 955 { 956 struct addrsel_policyent *new, *pol; 957 958 MALLOC(new, struct addrsel_policyent *, sizeof(*new), M_IFADDR, 959 M_WAITOK); 960 ADDRSEL_XLOCK(); 961 ADDRSEL_LOCK(); 962 963 /* duplication check */ 964 TAILQ_FOREACH(pol, &addrsel_policytab, ape_entry) { 965 if (IN6_ARE_ADDR_EQUAL(&newpolicy->addr.sin6_addr, 966 &pol->ape_policy.addr.sin6_addr) && 967 IN6_ARE_ADDR_EQUAL(&newpolicy->addrmask.sin6_addr, 968 &pol->ape_policy.addrmask.sin6_addr)) { 969 ADDRSEL_UNLOCK(); 970 ADDRSEL_XUNLOCK(); 971 FREE(new, M_IFADDR); 972 return (EEXIST); /* or override it? */ 973 } 974 } 975 976 bzero(new, sizeof(*new)); 977 978 /* XXX: should validate entry */ 979 new->ape_policy = *newpolicy; 980 981 TAILQ_INSERT_TAIL(&addrsel_policytab, new, ape_entry); 982 ADDRSEL_UNLOCK(); 983 ADDRSEL_XUNLOCK(); 984 985 return (0); 986 } 987 988 static int 989 delete_addrsel_policyent(struct in6_addrpolicy *key) 990 { 991 struct addrsel_policyent *pol; 992 993 ADDRSEL_XLOCK(); 994 ADDRSEL_LOCK(); 995 996 /* search for the entry in the table */ 997 TAILQ_FOREACH(pol, &addrsel_policytab, ape_entry) { 998 if (IN6_ARE_ADDR_EQUAL(&key->addr.sin6_addr, 999 &pol->ape_policy.addr.sin6_addr) && 1000 IN6_ARE_ADDR_EQUAL(&key->addrmask.sin6_addr, 1001 &pol->ape_policy.addrmask.sin6_addr)) { 1002 break; 1003 } 1004 } 1005 if (pol == NULL) { 1006 ADDRSEL_UNLOCK(); 1007 ADDRSEL_XUNLOCK(); 1008 return (ESRCH); 1009 } 1010 1011 TAILQ_REMOVE(&addrsel_policytab, pol, ape_entry); 1012 ADDRSEL_UNLOCK(); 1013 ADDRSEL_XUNLOCK(); 1014 1015 return (0); 1016 } 1017 1018 static int 1019 walk_addrsel_policy(int (*callback)(struct in6_addrpolicy *, void *), 1020 void *w) 1021 { 1022 struct addrsel_policyent *pol; 1023 int error = 0; 1024 1025 ADDRSEL_SLOCK(); 1026 TAILQ_FOREACH(pol, &addrsel_policytab, ape_entry) { 1027 if ((error = (*callback)(&pol->ape_policy, w)) != 0) { 1028 ADDRSEL_SUNLOCK(); 1029 return (error); 1030 } 1031 } 1032 ADDRSEL_SUNLOCK(); 1033 return (error); 1034 } 1035 1036 static int 1037 dump_addrsel_policyent(struct in6_addrpolicy *pol, void *arg) 1038 { 1039 int error = 0; 1040 struct walkarg *w = arg; 1041 1042 error = SYSCTL_OUT(w->w_req, pol, sizeof(*pol)); 1043 1044 return (error); 1045 } 1046 1047 static struct in6_addrpolicy * 1048 match_addrsel_policy(struct sockaddr_in6 *key) 1049 { 1050 struct addrsel_policyent *pent; 1051 struct in6_addrpolicy *bestpol = NULL, *pol; 1052 int matchlen, bestmatchlen = -1; 1053 u_char *mp, *ep, *k, *p, m; 1054 1055 TAILQ_FOREACH(pent, &addrsel_policytab, ape_entry) { 1056 matchlen = 0; 1057 1058 pol = &pent->ape_policy; 1059 mp = (u_char *)&pol->addrmask.sin6_addr; 1060 ep = mp + 16; /* XXX: scope field? */ 1061 k = (u_char *)&key->sin6_addr; 1062 p = (u_char *)&pol->addr.sin6_addr; 1063 for (; mp < ep && *mp; mp++, k++, p++) { 1064 m = *mp; 1065 if ((*k & m) != *p) 1066 goto next; /* not match */ 1067 if (m == 0xff) /* short cut for a typical case */ 1068 matchlen += 8; 1069 else { 1070 while (m >= 0x80) { 1071 matchlen++; 1072 m <<= 1; 1073 } 1074 } 1075 } 1076 1077 /* matched. check if this is better than the current best. */ 1078 if (bestpol == NULL || 1079 matchlen > bestmatchlen) { 1080 bestpol = pol; 1081 bestmatchlen = matchlen; 1082 } 1083 1084 next: 1085 continue; 1086 } 1087 1088 return (bestpol); 1089 } 1090