1 /* $FreeBSD$ */ 2 /* $KAME: in6_src.c,v 1.132 2003/08/26 04:42:27 keiichi Exp $ */ 3 4 /* 5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the project nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1982, 1986, 1991, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 4. Neither the name of the University nor the names of its contributors 46 * may be used to endorse or promote products derived from this software 47 * without specific prior written permission. 48 * 49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 59 * SUCH DAMAGE. 60 * 61 * @(#)in_pcb.c 8.2 (Berkeley) 1/4/94 62 */ 63 64 #include "opt_inet.h" 65 #include "opt_inet6.h" 66 67 #include <sys/param.h> 68 #include <sys/systm.h> 69 #include <sys/malloc.h> 70 #include <sys/mbuf.h> 71 #include <sys/protosw.h> 72 #include <sys/socket.h> 73 #include <sys/socketvar.h> 74 #include <sys/sockio.h> 75 #include <sys/sysctl.h> 76 #include <sys/errno.h> 77 #include <sys/time.h> 78 #include <sys/kernel.h> 79 80 #include <net/if.h> 81 #include <net/route.h> 82 83 #include <netinet/in.h> 84 #include <netinet/in_var.h> 85 #include <netinet/in_systm.h> 86 #include <netinet/ip.h> 87 #include <netinet/in_pcb.h> 88 #include <netinet6/in6_var.h> 89 #include <netinet/ip6.h> 90 #include <netinet6/in6_pcb.h> 91 #include <netinet6/ip6_var.h> 92 #include <netinet6/nd6.h> 93 #ifdef ENABLE_DEFAULT_SCOPE 94 #include <netinet6/scope6_var.h> 95 #endif 96 97 #include <net/net_osdep.h> 98 99 static struct mtx addrsel_lock; 100 #define ADDRSEL_LOCK_INIT() mtx_init(&addrsel_lock, "addrsel_lock", NULL, MTX_DEF) 101 #define ADDRSEL_LOCK() mtx_lock(&addrsel_lock) 102 #define ADDRSEL_UNLOCK() mtx_unlock(&addrsel_lock) 103 #define ADDRSEL_LOCK_ASSERT() mtx_assert(&addrsel_lock, MA_OWNED) 104 105 #define ADDR_LABEL_NOTAPP (-1) 106 struct in6_addrpolicy defaultaddrpolicy; 107 108 int ip6_prefer_tempaddr = 0; 109 110 static int in6_selectif __P((struct sockaddr_in6 *, struct ip6_pktopts *, 111 struct ip6_moptions *, struct route_in6 *ro, struct ifnet **)); 112 113 static struct in6_addrpolicy *lookup_addrsel_policy __P((struct sockaddr_in6 *)); 114 115 static void init_policy_queue __P((void)); 116 static int add_addrsel_policyent __P((struct in6_addrpolicy *)); 117 static int delete_addrsel_policyent __P((struct in6_addrpolicy *)); 118 static int walk_addrsel_policy __P((int (*)(struct in6_addrpolicy *, void *), 119 void *)); 120 static int dump_addrsel_policyent __P((struct in6_addrpolicy *, void *)); 121 static struct in6_addrpolicy *match_addrsel_policy __P((struct sockaddr_in6 *)); 122 123 /* 124 * Return an IPv6 address, which is the most appropriate for a given 125 * destination and user specified options. 126 * If necessary, this function lookups the routing table and returns 127 * an entry to the caller for later use. 128 */ 129 #define REPLACE(r) do {\ 130 if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \ 131 sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \ 132 ip6stat.ip6s_sources_rule[(r)]++; \ 133 /* printf("in6_selectsrc: replace %s with %s by %d\n", ia_best ? ip6_sprintf(&ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(&ia->ia_addr.sin6_addr), (r)); */ \ 134 goto replace; \ 135 } while(0) 136 #define NEXT(r) do {\ 137 if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \ 138 sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \ 139 ip6stat.ip6s_sources_rule[(r)]++; \ 140 /* printf("in6_selectsrc: keep %s against %s by %d\n", ia_best ? ip6_sprintf(&ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(&ia->ia_addr.sin6_addr), (r)); */ \ 141 goto next; /* XXX: we can't use 'continue' here */ \ 142 } while(0) 143 #define BREAK(r) do { \ 144 if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \ 145 sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \ 146 ip6stat.ip6s_sources_rule[(r)]++; \ 147 goto out; /* XXX: we can't use 'break' here */ \ 148 } while(0) 149 150 struct in6_addr * 151 in6_selectsrc(dstsock, opts, mopts, ro, laddr, errorp) 152 struct sockaddr_in6 *dstsock; 153 struct ip6_pktopts *opts; 154 struct ip6_moptions *mopts; 155 struct route_in6 *ro; 156 struct in6_addr *laddr; 157 int *errorp; 158 { 159 struct in6_addr *dst; 160 struct ifnet *ifp = NULL; 161 struct in6_ifaddr *ia = NULL, *ia_best = NULL; 162 struct in6_pktinfo *pi = NULL; 163 int dst_scope = -1, best_scope = -1, best_matchlen = -1; 164 struct in6_addrpolicy *dst_policy = NULL, *best_policy = NULL; 165 u_int32_t odstzone; 166 int prefer_tempaddr; 167 struct sockaddr_in6 dstsock0; 168 169 dstsock0 = *dstsock; 170 if (IN6_IS_SCOPE_LINKLOCAL(&dstsock0.sin6_addr) || 171 IN6_IS_ADDR_MC_INTFACELOCAL(&dstsock0.sin6_addr)) { 172 /* KAME assumption: link id == interface id */ 173 if (opts && opts->ip6po_pktinfo && 174 opts->ip6po_pktinfo->ipi6_ifindex) { 175 ifp = ifnet_byindex(opts->ip6po_pktinfo->ipi6_ifindex); 176 dstsock0.sin6_addr.s6_addr16[1] = 177 htons(opts->ip6po_pktinfo->ipi6_ifindex); 178 } else if (mopts && 179 IN6_IS_ADDR_MULTICAST(&dstsock0.sin6_addr) && 180 mopts->im6o_multicast_ifp) { 181 ifp = mopts->im6o_multicast_ifp; 182 dstsock0.sin6_addr.s6_addr16[1] = htons(ifp->if_index); 183 } else if ((*errorp = in6_embedscope(&dstsock0.sin6_addr, 184 &dstsock0, NULL, NULL)) != 0) 185 return (NULL); 186 } 187 dstsock = &dstsock0; 188 189 dst = &dstsock->sin6_addr; 190 *errorp = 0; 191 192 /* 193 * If the source address is explicitly specified by the caller, 194 * check if the requested source address is indeed a unicast address 195 * assigned to the node, and can be used as the packet's source 196 * address. If everything is okay, use the address as source. 197 */ 198 if (opts && (pi = opts->ip6po_pktinfo) && 199 !IN6_IS_ADDR_UNSPECIFIED(&pi->ipi6_addr)) { 200 struct sockaddr_in6 srcsock; 201 struct in6_ifaddr *ia6; 202 203 /* get the outgoing interface */ 204 if ((*errorp = in6_selectif(dstsock, opts, mopts, ro, &ifp)) 205 != 0) { 206 return (NULL); 207 } 208 209 /* 210 * determine the appropriate zone id of the source based on 211 * the zone of the destination and the outgoing interface. 212 */ 213 bzero(&srcsock, sizeof(srcsock)); 214 srcsock.sin6_family = AF_INET6; 215 srcsock.sin6_len = sizeof(srcsock); 216 srcsock.sin6_addr = pi->ipi6_addr; 217 if (ifp) { 218 if (in6_addr2zoneid(ifp, &pi->ipi6_addr, 219 &srcsock.sin6_scope_id)) { 220 *errorp = EINVAL; /* XXX */ 221 return (NULL); 222 } 223 } 224 if ((*errorp = in6_embedscope(&srcsock.sin6_addr, &srcsock, 225 NULL, NULL)) != 0) { 226 return (NULL); 227 } 228 srcsock.sin6_scope_id = 0; /* XXX: ifa_ifwithaddr expects 0 */ 229 ia6 = (struct in6_ifaddr *)ifa_ifwithaddr((struct sockaddr *)(&srcsock)); 230 if (ia6 == NULL || 231 (ia6->ia6_flags & (IN6_IFF_ANYCAST | IN6_IFF_NOTREADY))) { 232 *errorp = EADDRNOTAVAIL; 233 return (NULL); 234 } 235 pi->ipi6_addr = srcsock.sin6_addr; /* XXX: this overrides pi */ 236 return (&ia6->ia_addr.sin6_addr); 237 } 238 239 /* 240 * Otherwise, if the socket has already bound the source, just use it. 241 */ 242 if (laddr && !IN6_IS_ADDR_UNSPECIFIED(laddr)) 243 return (laddr); 244 245 /* 246 * If the address is not specified, choose the best one based on 247 * the outgoing interface and the destination address. 248 */ 249 /* get the outgoing interface */ 250 if ((*errorp = in6_selectif(dstsock, opts, mopts, ro, &ifp)) != 0) 251 return (NULL); 252 253 #ifdef DIAGNOSTIC 254 if (ifp == NULL) /* this should not happen */ 255 panic("in6_selectsrc: NULL ifp"); 256 #endif 257 if (in6_addr2zoneid(ifp, dst, &odstzone)) { /* impossible */ 258 *errorp = EIO; /* XXX */ 259 return (NULL); 260 } 261 for (ia = in6_ifaddr; ia; ia = ia->ia_next) { 262 int new_scope = -1, new_matchlen = -1; 263 struct in6_addrpolicy *new_policy = NULL; 264 u_int32_t srczone, osrczone, dstzone; 265 struct ifnet *ifp1 = ia->ia_ifp; 266 267 /* 268 * We'll never take an address that breaks the scope zone 269 * of the destination. We also skip an address if its zone 270 * does not contain the outgoing interface. 271 * XXX: we should probably use sin6_scope_id here. 272 */ 273 if (in6_addr2zoneid(ifp1, dst, &dstzone) || 274 odstzone != dstzone) { 275 continue; 276 } 277 if (in6_addr2zoneid(ifp, &ia->ia_addr.sin6_addr, &osrczone) || 278 in6_addr2zoneid(ifp1, &ia->ia_addr.sin6_addr, &srczone) || 279 osrczone != srczone) { 280 continue; 281 } 282 283 /* avoid unusable addresses */ 284 if ((ia->ia6_flags & 285 (IN6_IFF_NOTREADY | IN6_IFF_ANYCAST | IN6_IFF_DETACHED))) { 286 continue; 287 } 288 if (!ip6_use_deprecated && IFA6_IS_DEPRECATED(ia)) 289 continue; 290 291 /* Rule 1: Prefer same address */ 292 if (IN6_ARE_ADDR_EQUAL(dst, &ia->ia_addr.sin6_addr)) { 293 ia_best = ia; 294 BREAK(1); /* there should be no better candidate */ 295 } 296 297 if (ia_best == NULL) 298 REPLACE(0); 299 300 /* Rule 2: Prefer appropriate scope */ 301 if (dst_scope < 0) 302 dst_scope = in6_addrscope(dst); 303 new_scope = in6_addrscope(&ia->ia_addr.sin6_addr); 304 if (IN6_ARE_SCOPE_CMP(best_scope, new_scope) < 0) { 305 if (IN6_ARE_SCOPE_CMP(best_scope, dst_scope) < 0) 306 REPLACE(2); 307 NEXT(2); 308 } else if (IN6_ARE_SCOPE_CMP(new_scope, best_scope) < 0) { 309 if (IN6_ARE_SCOPE_CMP(new_scope, dst_scope) < 0) 310 NEXT(2); 311 REPLACE(2); 312 } 313 314 /* 315 * Rule 3: Avoid deprecated addresses. Note that the case of 316 * !ip6_use_deprecated is already rejected above. 317 */ 318 if (!IFA6_IS_DEPRECATED(ia_best) && IFA6_IS_DEPRECATED(ia)) 319 NEXT(3); 320 if (IFA6_IS_DEPRECATED(ia_best) && !IFA6_IS_DEPRECATED(ia)) 321 REPLACE(3); 322 323 /* Rule 4: Prefer home addresses */ 324 /* 325 * XXX: This is a TODO. We should probably merge the MIP6 326 * case above. 327 */ 328 329 /* Rule 5: Prefer outgoing interface */ 330 if (ia_best->ia_ifp == ifp && ia->ia_ifp != ifp) 331 NEXT(5); 332 if (ia_best->ia_ifp != ifp && ia->ia_ifp == ifp) 333 REPLACE(5); 334 335 /* 336 * Rule 6: Prefer matching label 337 * Note that best_policy should be non-NULL here. 338 */ 339 if (dst_policy == NULL) 340 dst_policy = lookup_addrsel_policy(dstsock); 341 if (dst_policy->label != ADDR_LABEL_NOTAPP) { 342 new_policy = lookup_addrsel_policy(&ia->ia_addr); 343 if (dst_policy->label == best_policy->label && 344 dst_policy->label != new_policy->label) 345 NEXT(6); 346 if (dst_policy->label != best_policy->label && 347 dst_policy->label == new_policy->label) 348 REPLACE(6); 349 } 350 351 /* 352 * Rule 7: Prefer public addresses. 353 * We allow users to reverse the logic by configuring 354 * a sysctl variable, so that privacy conscious users can 355 * always prefer temporary addresses. 356 */ 357 if (opts == NULL || 358 opts->ip6po_prefer_tempaddr == IP6PO_TEMPADDR_SYSTEM) { 359 prefer_tempaddr = ip6_prefer_tempaddr; 360 } else if (opts->ip6po_prefer_tempaddr == 361 IP6PO_TEMPADDR_NOTPREFER) { 362 prefer_tempaddr = 0; 363 } else 364 prefer_tempaddr = 1; 365 if (!(ia_best->ia6_flags & IN6_IFF_TEMPORARY) && 366 (ia->ia6_flags & IN6_IFF_TEMPORARY)) { 367 if (prefer_tempaddr) 368 REPLACE(7); 369 else 370 NEXT(7); 371 } 372 if ((ia_best->ia6_flags & IN6_IFF_TEMPORARY) && 373 !(ia->ia6_flags & IN6_IFF_TEMPORARY)) { 374 if (prefer_tempaddr) 375 NEXT(7); 376 else 377 REPLACE(7); 378 } 379 380 /* 381 * Rule 8: prefer addresses on alive interfaces. 382 * This is a KAME specific rule. 383 */ 384 if ((ia_best->ia_ifp->if_flags & IFF_UP) && 385 !(ia->ia_ifp->if_flags & IFF_UP)) 386 NEXT(8); 387 if (!(ia_best->ia_ifp->if_flags & IFF_UP) && 388 (ia->ia_ifp->if_flags & IFF_UP)) 389 REPLACE(8); 390 391 /* 392 * Rule 14: Use longest matching prefix. 393 * Note: in the address selection draft, this rule is 394 * documented as "Rule 8". However, since it is also 395 * documented that this rule can be overridden, we assign 396 * a large number so that it is easy to assign smaller numbers 397 * to more preferred rules. 398 */ 399 new_matchlen = in6_matchlen(&ia->ia_addr.sin6_addr, dst); 400 if (best_matchlen < new_matchlen) 401 REPLACE(14); 402 if (new_matchlen < best_matchlen) 403 NEXT(14); 404 405 /* Rule 15 is reserved. */ 406 407 /* 408 * Last resort: just keep the current candidate. 409 * Or, do we need more rules? 410 */ 411 continue; 412 413 replace: 414 ia_best = ia; 415 best_scope = (new_scope >= 0 ? new_scope : 416 in6_addrscope(&ia_best->ia_addr.sin6_addr)); 417 best_policy = (new_policy ? new_policy : 418 lookup_addrsel_policy(&ia_best->ia_addr)); 419 best_matchlen = (new_matchlen >= 0 ? new_matchlen : 420 in6_matchlen(&ia_best->ia_addr.sin6_addr, 421 dst)); 422 423 next: 424 continue; 425 426 out: 427 break; 428 } 429 430 if ((ia = ia_best) == NULL) { 431 *errorp = EADDRNOTAVAIL; 432 return (NULL); 433 } 434 435 return (&ia->ia_addr.sin6_addr); 436 } 437 438 static int 439 in6_selectif(dstsock, opts, mopts, ro, retifp) 440 struct sockaddr_in6 *dstsock; 441 struct ip6_pktopts *opts; 442 struct ip6_moptions *mopts; 443 struct route_in6 *ro; 444 struct ifnet **retifp; 445 { 446 int error; 447 struct route_in6 sro; 448 struct rtentry *rt = NULL; 449 450 if (ro == NULL) { 451 bzero(&sro, sizeof(sro)); 452 ro = &sro; 453 } 454 455 if ((error = in6_selectroute(dstsock, opts, mopts, ro, retifp, 456 &rt, 0)) != 0) { 457 if (rt && rt == sro.ro_rt) 458 RTFREE(rt); 459 return (error); 460 } 461 462 /* 463 * do not use a rejected or black hole route. 464 * XXX: this check should be done in the L2 output routine. 465 * However, if we skipped this check here, we'd see the following 466 * scenario: 467 * - install a rejected route for a scoped address prefix 468 * (like fe80::/10) 469 * - send a packet to a destination that matches the scoped prefix, 470 * with ambiguity about the scope zone. 471 * - pick the outgoing interface from the route, and disambiguate the 472 * scope zone with the interface. 473 * - ip6_output() would try to get another route with the "new" 474 * destination, which may be valid. 475 * - we'd see no error on output. 476 * Although this may not be very harmful, it should still be confusing. 477 * We thus reject the case here. 478 */ 479 if (rt && (rt->rt_flags & (RTF_REJECT | RTF_BLACKHOLE))) { 480 int flags = (rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); 481 482 if (rt && rt == sro.ro_rt) 483 RTFREE(rt); 484 return (flags); 485 } 486 487 /* 488 * Adjust the "outgoing" interface. If we're going to loop the packet 489 * back to ourselves, the ifp would be the loopback interface. 490 * However, we'd rather know the interface associated to the 491 * destination address (which should probably be one of our own 492 * addresses.) 493 */ 494 if (rt && rt->rt_ifa && rt->rt_ifa->ifa_ifp) 495 *retifp = rt->rt_ifa->ifa_ifp; 496 497 if (rt && rt == sro.ro_rt) 498 RTFREE(rt); 499 return (0); 500 } 501 502 int 503 in6_selectroute(dstsock, opts, mopts, ro, retifp, retrt, clone) 504 struct sockaddr_in6 *dstsock; 505 struct ip6_pktopts *opts; 506 struct ip6_moptions *mopts; 507 struct route_in6 *ro; 508 struct ifnet **retifp; 509 struct rtentry **retrt; 510 int clone; /* meaningful only for bsdi and freebsd. */ 511 { 512 int error = 0; 513 struct ifnet *ifp = NULL; 514 struct rtentry *rt = NULL; 515 struct sockaddr_in6 *sin6_next; 516 struct in6_pktinfo *pi = NULL; 517 struct in6_addr *dst = &dstsock->sin6_addr; 518 519 #if 0 520 if (dstsock->sin6_addr.s6_addr32[0] == 0 && 521 dstsock->sin6_addr.s6_addr32[1] == 0 && 522 !IN6_IS_ADDR_LOOPBACK(&dstsock->sin6_addr)) { 523 printf("in6_selectroute: strange destination %s\n", 524 ip6_sprintf(&dstsock->sin6_addr)); 525 } else { 526 printf("in6_selectroute: destination = %s%%%d\n", 527 ip6_sprintf(&dstsock->sin6_addr), 528 dstsock->sin6_scope_id); /* for debug */ 529 } 530 #endif 531 532 /* If the caller specify the outgoing interface explicitly, use it. */ 533 if (opts && (pi = opts->ip6po_pktinfo) != NULL && pi->ipi6_ifindex) { 534 /* XXX boundary check is assumed to be already done. */ 535 ifp = ifnet_byindex(pi->ipi6_ifindex); 536 if (ifp != NULL && 537 (retrt == NULL || IN6_IS_ADDR_MULTICAST(dst))) { 538 /* 539 * we do not have to check nor get the route for 540 * multicast. 541 */ 542 goto done; 543 } else 544 goto getroute; 545 } 546 547 /* 548 * If the destination address is a multicast address and the outgoing 549 * interface for the address is specified by the caller, use it. 550 */ 551 if (IN6_IS_ADDR_MULTICAST(dst) && 552 mopts != NULL && (ifp = mopts->im6o_multicast_ifp) != NULL) { 553 goto done; /* we do not need a route for multicast. */ 554 } 555 556 getroute: 557 /* 558 * If the next hop address for the packet is specified by the caller, 559 * use it as the gateway. 560 */ 561 if (opts && opts->ip6po_nexthop) { 562 struct route_in6 *ron; 563 564 sin6_next = satosin6(opts->ip6po_nexthop); 565 566 /* at this moment, we only support AF_INET6 next hops */ 567 if (sin6_next->sin6_family != AF_INET6) { 568 error = EAFNOSUPPORT; /* or should we proceed? */ 569 goto done; 570 } 571 572 /* 573 * If the next hop is an IPv6 address, then the node identified 574 * by that address must be a neighbor of the sending host. 575 */ 576 ron = &opts->ip6po_nextroute; 577 if ((ron->ro_rt && 578 (ron->ro_rt->rt_flags & (RTF_UP | RTF_LLINFO)) != 579 (RTF_UP | RTF_LLINFO)) || 580 !SA6_ARE_ADDR_EQUAL(satosin6(&ron->ro_dst), sin6_next)) { 581 if (ron->ro_rt) { 582 RTFREE(ron->ro_rt); 583 ron->ro_rt = NULL; 584 } 585 *satosin6(&ron->ro_dst) = *sin6_next; 586 } 587 if (ron->ro_rt == NULL) { 588 rtalloc((struct route *)ron); /* multi path case? */ 589 if (ron->ro_rt == NULL || 590 !(ron->ro_rt->rt_flags & RTF_LLINFO)) { 591 if (ron->ro_rt) { 592 RTFREE(ron->ro_rt); 593 ron->ro_rt = NULL; 594 } 595 error = EHOSTUNREACH; 596 goto done; 597 } 598 } 599 rt = ron->ro_rt; 600 ifp = rt->rt_ifp; 601 602 /* 603 * When cloning is required, try to allocate a route to the 604 * destination so that the caller can store path MTU 605 * information. 606 */ 607 if (!clone) 608 goto done; 609 } 610 611 /* 612 * Use a cached route if it exists and is valid, else try to allocate 613 * a new one. Note that we should check the address family of the 614 * cached destination, in case of sharing the cache with IPv4. 615 */ 616 if (ro) { 617 if (ro->ro_rt && 618 (!(ro->ro_rt->rt_flags & RTF_UP) || 619 ((struct sockaddr *)(&ro->ro_dst))->sa_family != AF_INET6 || 620 !IN6_ARE_ADDR_EQUAL(&satosin6(&ro->ro_dst)->sin6_addr, 621 dst))) { 622 RTFREE(ro->ro_rt); 623 ro->ro_rt = (struct rtentry *)NULL; 624 } 625 if (ro->ro_rt == (struct rtentry *)NULL) { 626 struct sockaddr_in6 *sa6; 627 628 /* No route yet, so try to acquire one */ 629 bzero(&ro->ro_dst, sizeof(struct sockaddr_in6)); 630 sa6 = (struct sockaddr_in6 *)&ro->ro_dst; 631 *sa6 = *dstsock; 632 sa6->sin6_scope_id = 0; 633 634 if (clone) { 635 rtalloc((struct route *)ro); 636 } else { 637 ro->ro_rt = rtalloc1(&((struct route *)ro) 638 ->ro_dst, 0, 0UL); 639 if (ro->ro_rt) 640 RT_UNLOCK(ro->ro_rt); 641 } 642 } 643 644 /* 645 * do not care about the result if we have the nexthop 646 * explicitly specified. 647 */ 648 if (opts && opts->ip6po_nexthop) 649 goto done; 650 651 if (ro->ro_rt) { 652 ifp = ro->ro_rt->rt_ifp; 653 654 if (ifp == NULL) { /* can this really happen? */ 655 RTFREE(ro->ro_rt); 656 ro->ro_rt = NULL; 657 } 658 } 659 if (ro->ro_rt == NULL) 660 error = EHOSTUNREACH; 661 rt = ro->ro_rt; 662 663 /* 664 * Check if the outgoing interface conflicts with 665 * the interface specified by ipi6_ifindex (if specified). 666 * Note that loopback interface is always okay. 667 * (this may happen when we are sending a packet to one of 668 * our own addresses.) 669 */ 670 if (opts && opts->ip6po_pktinfo && 671 opts->ip6po_pktinfo->ipi6_ifindex) { 672 if (!(ifp->if_flags & IFF_LOOPBACK) && 673 ifp->if_index != 674 opts->ip6po_pktinfo->ipi6_ifindex) { 675 error = EHOSTUNREACH; 676 goto done; 677 } 678 } 679 } 680 681 done: 682 if (ifp == NULL && rt == NULL) { 683 /* 684 * This can happen if the caller did not pass a cached route 685 * nor any other hints. We treat this case an error. 686 */ 687 error = EHOSTUNREACH; 688 } 689 if (error == EHOSTUNREACH) 690 ip6stat.ip6s_noroute++; 691 692 if (retifp != NULL) 693 *retifp = ifp; 694 if (retrt != NULL) 695 *retrt = rt; /* rt may be NULL */ 696 697 return (error); 698 } 699 700 /* 701 * Default hop limit selection. The precedence is as follows: 702 * 1. Hoplimit value specified via ioctl. 703 * 2. (If the outgoing interface is detected) the current 704 * hop limit of the interface specified by router advertisement. 705 * 3. The system default hoplimit. 706 */ 707 int 708 in6_selecthlim(in6p, ifp) 709 struct in6pcb *in6p; 710 struct ifnet *ifp; 711 { 712 if (in6p && in6p->in6p_hops >= 0) 713 return (in6p->in6p_hops); 714 else if (ifp) 715 return (ND_IFINFO(ifp)->chlim); 716 else if (in6p && !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr)) { 717 struct route_in6 ro6; 718 struct ifnet *lifp; 719 720 bzero(&ro6, sizeof(ro6)); 721 ro6.ro_dst.sin6_family = AF_INET6; 722 ro6.ro_dst.sin6_len = sizeof(struct sockaddr_in6); 723 ro6.ro_dst.sin6_addr = in6p->in6p_faddr; 724 rtalloc((struct route *)&ro6); 725 if (ro6.ro_rt) { 726 lifp = ro6.ro_rt->rt_ifp; 727 RTFREE(ro6.ro_rt); 728 if (lifp) 729 return (ND_IFINFO(lifp)->chlim); 730 } else 731 return (ip6_defhlim); 732 } 733 return (ip6_defhlim); 734 } 735 736 /* 737 * XXX: this is borrowed from in6_pcbbind(). If possible, we should 738 * share this function by all *bsd*... 739 */ 740 int 741 in6_pcbsetport(laddr, inp, cred) 742 struct in6_addr *laddr; 743 struct inpcb *inp; 744 struct ucred *cred; 745 { 746 struct socket *so = inp->inp_socket; 747 u_int16_t lport = 0, first, last, *lastport; 748 int count, error = 0, wild = 0; 749 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 750 751 /* XXX: this is redundant when called from in6_pcbbind */ 752 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) 753 wild = INPLOOKUP_WILDCARD; 754 755 inp->inp_flags |= INP_ANONPORT; 756 757 if (inp->inp_flags & INP_HIGHPORT) { 758 first = ipport_hifirstauto; /* sysctl */ 759 last = ipport_hilastauto; 760 lastport = &pcbinfo->lasthi; 761 } else if (inp->inp_flags & INP_LOWPORT) { 762 if ((error = suser_cred(cred, 0))) 763 return error; 764 first = ipport_lowfirstauto; /* 1023 */ 765 last = ipport_lowlastauto; /* 600 */ 766 lastport = &pcbinfo->lastlow; 767 } else { 768 first = ipport_firstauto; /* sysctl */ 769 last = ipport_lastauto; 770 lastport = &pcbinfo->lastport; 771 } 772 /* 773 * Simple check to ensure all ports are not used up causing 774 * a deadlock here. 775 * 776 * We split the two cases (up and down) so that the direction 777 * is not being tested on each round of the loop. 778 */ 779 if (first > last) { 780 /* 781 * counting down 782 */ 783 count = first - last; 784 785 do { 786 if (count-- < 0) { /* completely used? */ 787 /* 788 * Undo any address bind that may have 789 * occurred above. 790 */ 791 inp->in6p_laddr = in6addr_any; 792 return (EAGAIN); 793 } 794 --*lastport; 795 if (*lastport > first || *lastport < last) 796 *lastport = first; 797 lport = htons(*lastport); 798 } while (in6_pcblookup_local(pcbinfo, &inp->in6p_laddr, 799 lport, wild)); 800 } else { 801 /* 802 * counting up 803 */ 804 count = last - first; 805 806 do { 807 if (count-- < 0) { /* completely used? */ 808 /* 809 * Undo any address bind that may have 810 * occurred above. 811 */ 812 inp->in6p_laddr = in6addr_any; 813 return (EAGAIN); 814 } 815 ++*lastport; 816 if (*lastport < first || *lastport > last) 817 *lastport = first; 818 lport = htons(*lastport); 819 } while (in6_pcblookup_local(pcbinfo, 820 &inp->in6p_laddr, lport, wild)); 821 } 822 823 inp->inp_lport = lport; 824 if (in_pcbinshash(inp) != 0) { 825 inp->in6p_laddr = in6addr_any; 826 inp->inp_lport = 0; 827 return (EAGAIN); 828 } 829 830 return (0); 831 } 832 833 /* 834 * Generate kernel-internal form (scopeid embedded into s6_addr16[1]). 835 * If the address scope of is link-local, embed the interface index in the 836 * address. The routine determines our precedence 837 * between advanced API scope/interface specification and basic API 838 * specification. 839 * 840 * This function should be nuked in the future, when we get rid of embedded 841 * scopeid thing. 842 * 843 * XXX actually, it is over-specification to return ifp against sin6_scope_id. 844 * there can be multiple interfaces that belong to a particular scope zone 845 * (in specification, we have 1:N mapping between a scope zone and interfaces). 846 * we may want to change the function to return something other than ifp. 847 */ 848 int 849 in6_embedscope(in6, sin6, in6p, ifpp) 850 struct in6_addr *in6; 851 const struct sockaddr_in6 *sin6; 852 struct in6pcb *in6p; 853 struct ifnet **ifpp; 854 { 855 struct ifnet *ifp = NULL; 856 u_int32_t zoneid = sin6->sin6_scope_id; 857 858 *in6 = sin6->sin6_addr; 859 if (ifpp) 860 *ifpp = NULL; 861 862 /* 863 * don't try to read sin6->sin6_addr beyond here, since the caller may 864 * ask us to overwrite existing sockaddr_in6 865 */ 866 867 #ifdef ENABLE_DEFAULT_SCOPE 868 if (zoneid == 0) 869 zoneid = scope6_addr2default(in6); 870 #endif 871 872 if (IN6_IS_SCOPE_LINKLOCAL(in6) || IN6_IS_ADDR_MC_INTFACELOCAL(in6)) { 873 struct in6_pktinfo *pi; 874 875 /* KAME assumption: link id == interface id */ 876 if (in6p && in6p->in6p_outputopts && 877 (pi = in6p->in6p_outputopts->ip6po_pktinfo) && 878 pi->ipi6_ifindex) { 879 ifp = ifnet_byindex(pi->ipi6_ifindex); 880 in6->s6_addr16[1] = htons(pi->ipi6_ifindex); 881 } else if (in6p && IN6_IS_ADDR_MULTICAST(in6) && 882 in6p->in6p_moptions && 883 in6p->in6p_moptions->im6o_multicast_ifp) { 884 ifp = in6p->in6p_moptions->im6o_multicast_ifp; 885 in6->s6_addr16[1] = htons(ifp->if_index); 886 } else if (zoneid) { 887 if (if_index < zoneid) 888 return (ENXIO); /* XXX EINVAL? */ 889 ifp = ifnet_byindex(zoneid); 890 891 /* XXX assignment to 16bit from 32bit variable */ 892 in6->s6_addr16[1] = htons(zoneid & 0xffff); 893 } 894 895 if (ifpp) 896 *ifpp = ifp; 897 } 898 899 return 0; 900 } 901 902 /* 903 * generate standard sockaddr_in6 from embedded form. 904 * touches sin6_addr and sin6_scope_id only. 905 * 906 * this function should be nuked in the future, when we get rid of 907 * embedded scopeid thing. 908 */ 909 int 910 in6_recoverscope(sin6, in6, ifp) 911 struct sockaddr_in6 *sin6; 912 const struct in6_addr *in6; 913 struct ifnet *ifp; 914 { 915 u_int32_t zoneid; 916 917 sin6->sin6_addr = *in6; 918 919 /* 920 * don't try to read *in6 beyond here, since the caller may 921 * ask us to overwrite existing sockaddr_in6 922 */ 923 924 sin6->sin6_scope_id = 0; 925 if (IN6_IS_SCOPE_LINKLOCAL(in6) || IN6_IS_ADDR_MC_INTFACELOCAL(in6)) { 926 /* 927 * KAME assumption: link id == interface id 928 */ 929 zoneid = ntohs(sin6->sin6_addr.s6_addr16[1]); 930 if (zoneid) { 931 /* sanity check */ 932 if (zoneid < 0 || if_index < zoneid) 933 return ENXIO; 934 if (ifp && ifp->if_index != zoneid) 935 return ENXIO; 936 sin6->sin6_addr.s6_addr16[1] = 0; 937 sin6->sin6_scope_id = zoneid; 938 } 939 } 940 941 return 0; 942 } 943 944 /* 945 * just clear the embedded scope identifier. 946 */ 947 void 948 in6_clearscope(addr) 949 struct in6_addr *addr; 950 { 951 if (IN6_IS_SCOPE_LINKLOCAL(addr) || IN6_IS_ADDR_MC_INTFACELOCAL(addr)) 952 addr->s6_addr16[1] = 0; 953 } 954 955 void 956 addrsel_policy_init() 957 { 958 ADDRSEL_LOCK_INIT(); 959 960 init_policy_queue(); 961 962 /* initialize the "last resort" policy */ 963 bzero(&defaultaddrpolicy, sizeof(defaultaddrpolicy)); 964 defaultaddrpolicy.label = ADDR_LABEL_NOTAPP; 965 } 966 967 static struct in6_addrpolicy * 968 lookup_addrsel_policy(key) 969 struct sockaddr_in6 *key; 970 { 971 struct in6_addrpolicy *match = NULL; 972 973 ADDRSEL_LOCK(); 974 match = match_addrsel_policy(key); 975 976 if (match == NULL) 977 match = &defaultaddrpolicy; 978 else 979 match->use++; 980 ADDRSEL_UNLOCK(); 981 982 return (match); 983 } 984 985 /* 986 * Subroutines to manage the address selection policy table via sysctl. 987 */ 988 struct walkarg { 989 struct sysctl_req *w_req; 990 }; 991 992 static int in6_src_sysctl(SYSCTL_HANDLER_ARGS); 993 SYSCTL_DECL(_net_inet6_ip6); 994 SYSCTL_NODE(_net_inet6_ip6, IPV6CTL_ADDRCTLPOLICY, addrctlpolicy, 995 CTLFLAG_RD, in6_src_sysctl, ""); 996 997 static int 998 in6_src_sysctl(SYSCTL_HANDLER_ARGS) 999 { 1000 struct walkarg w; 1001 1002 if (req->newptr) 1003 return EPERM; 1004 1005 bzero(&w, sizeof(w)); 1006 w.w_req = req; 1007 1008 return (walk_addrsel_policy(dump_addrsel_policyent, &w)); 1009 } 1010 1011 int 1012 in6_src_ioctl(cmd, data) 1013 u_long cmd; 1014 caddr_t data; 1015 { 1016 int i; 1017 struct in6_addrpolicy ent0; 1018 1019 if (cmd != SIOCAADDRCTL_POLICY && cmd != SIOCDADDRCTL_POLICY) 1020 return (EOPNOTSUPP); /* check for safety */ 1021 1022 ent0 = *(struct in6_addrpolicy *)data; 1023 1024 if (ent0.label == ADDR_LABEL_NOTAPP) 1025 return (EINVAL); 1026 /* check if the prefix mask is consecutive. */ 1027 if (in6_mask2len(&ent0.addrmask.sin6_addr, NULL) < 0) 1028 return (EINVAL); 1029 /* clear trailing garbages (if any) of the prefix address. */ 1030 for (i = 0; i < 4; i++) { 1031 ent0.addr.sin6_addr.s6_addr32[i] &= 1032 ent0.addrmask.sin6_addr.s6_addr32[i]; 1033 } 1034 ent0.use = 0; 1035 1036 switch (cmd) { 1037 case SIOCAADDRCTL_POLICY: 1038 return (add_addrsel_policyent(&ent0)); 1039 case SIOCDADDRCTL_POLICY: 1040 return (delete_addrsel_policyent(&ent0)); 1041 } 1042 1043 return (0); /* XXX: compromise compilers */ 1044 } 1045 1046 /* 1047 * The followings are implementation of the policy table using a 1048 * simple tail queue. 1049 * XXX such details should be hidden. 1050 * XXX implementation using binary tree should be more efficient. 1051 */ 1052 struct addrsel_policyent { 1053 TAILQ_ENTRY(addrsel_policyent) ape_entry; 1054 struct in6_addrpolicy ape_policy; 1055 }; 1056 1057 TAILQ_HEAD(addrsel_policyhead, addrsel_policyent); 1058 1059 struct addrsel_policyhead addrsel_policytab; 1060 1061 static void 1062 init_policy_queue() 1063 { 1064 TAILQ_INIT(&addrsel_policytab); 1065 } 1066 1067 static int 1068 add_addrsel_policyent(newpolicy) 1069 struct in6_addrpolicy *newpolicy; 1070 { 1071 struct addrsel_policyent *new, *pol; 1072 1073 MALLOC(new, struct addrsel_policyent *, sizeof(*new), M_IFADDR, 1074 M_WAITOK); 1075 ADDRSEL_LOCK(); 1076 1077 /* duplication check */ 1078 for (pol = TAILQ_FIRST(&addrsel_policytab); pol; 1079 pol = TAILQ_NEXT(pol, ape_entry)) { 1080 if (SA6_ARE_ADDR_EQUAL(&newpolicy->addr, 1081 &pol->ape_policy.addr) && 1082 SA6_ARE_ADDR_EQUAL(&newpolicy->addrmask, 1083 &pol->ape_policy.addrmask)) { 1084 ADDRSEL_UNLOCK(); 1085 FREE(new, M_IFADDR); 1086 return (EEXIST); /* or override it? */ 1087 } 1088 } 1089 1090 bzero(new, sizeof(*new)); 1091 1092 /* XXX: should validate entry */ 1093 new->ape_policy = *newpolicy; 1094 1095 TAILQ_INSERT_TAIL(&addrsel_policytab, new, ape_entry); 1096 ADDRSEL_UNLOCK(); 1097 1098 return (0); 1099 } 1100 1101 static int 1102 delete_addrsel_policyent(key) 1103 struct in6_addrpolicy *key; 1104 { 1105 struct addrsel_policyent *pol; 1106 1107 ADDRSEL_LOCK(); 1108 1109 /* search for the entry in the table */ 1110 for (pol = TAILQ_FIRST(&addrsel_policytab); pol; 1111 pol = TAILQ_NEXT(pol, ape_entry)) { 1112 if (SA6_ARE_ADDR_EQUAL(&key->addr, &pol->ape_policy.addr) && 1113 SA6_ARE_ADDR_EQUAL(&key->addrmask, 1114 &pol->ape_policy.addrmask)) { 1115 break; 1116 } 1117 } 1118 if (pol == NULL) { 1119 ADDRSEL_UNLOCK(); 1120 return (ESRCH); 1121 } 1122 1123 TAILQ_REMOVE(&addrsel_policytab, pol, ape_entry); 1124 ADDRSEL_UNLOCK(); 1125 1126 return (0); 1127 } 1128 1129 static int 1130 walk_addrsel_policy(callback, w) 1131 int (*callback) __P((struct in6_addrpolicy *, void *)); 1132 void *w; 1133 { 1134 struct addrsel_policyent *pol; 1135 int error = 0; 1136 1137 ADDRSEL_LOCK(); 1138 for (pol = TAILQ_FIRST(&addrsel_policytab); pol; 1139 pol = TAILQ_NEXT(pol, ape_entry)) { 1140 if ((error = (*callback)(&pol->ape_policy, w)) != 0) { 1141 ADDRSEL_UNLOCK(); 1142 return (error); 1143 } 1144 } 1145 ADDRSEL_UNLOCK(); 1146 1147 return (error); 1148 } 1149 1150 static int 1151 dump_addrsel_policyent(pol, arg) 1152 struct in6_addrpolicy *pol; 1153 void *arg; 1154 { 1155 int error = 0; 1156 struct walkarg *w = arg; 1157 1158 error = SYSCTL_OUT(w->w_req, pol, sizeof(*pol)); 1159 1160 return (error); 1161 } 1162 1163 static struct in6_addrpolicy * 1164 match_addrsel_policy(key) 1165 struct sockaddr_in6 *key; 1166 { 1167 struct addrsel_policyent *pent; 1168 struct in6_addrpolicy *bestpol = NULL, *pol; 1169 int matchlen, bestmatchlen = -1; 1170 u_char *mp, *ep, *k, *p, m; 1171 1172 for (pent = TAILQ_FIRST(&addrsel_policytab); pent; 1173 pent = TAILQ_NEXT(pent, ape_entry)) { 1174 matchlen = 0; 1175 1176 pol = &pent->ape_policy; 1177 mp = (u_char *)&pol->addrmask.sin6_addr; 1178 ep = mp + 16; /* XXX: scope field? */ 1179 k = (u_char *)&key->sin6_addr; 1180 p = (u_char *)&pol->addr.sin6_addr; 1181 for (; mp < ep && *mp; mp++, k++, p++) { 1182 m = *mp; 1183 if ((*k & m) != *p) 1184 goto next; /* not match */ 1185 if (m == 0xff) /* short cut for a typical case */ 1186 matchlen += 8; 1187 else { 1188 while (m >= 0x80) { 1189 matchlen++; 1190 m <<= 1; 1191 } 1192 } 1193 } 1194 1195 /* matched. check if this is better than the current best. */ 1196 if (bestpol == NULL || 1197 matchlen > bestmatchlen) { 1198 bestpol = pol; 1199 bestmatchlen = matchlen; 1200 } 1201 1202 next: 1203 continue; 1204 } 1205 1206 return (bestpol); 1207 } 1208