1 /* $FreeBSD$ */ 2 /* $KAME: in6_src.c,v 1.132 2003/08/26 04:42:27 keiichi Exp $ */ 3 4 /* 5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the project nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1982, 1986, 1991, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. All advertising materials mentioning features or use of this software 46 * must display the following acknowledgement: 47 * This product includes software developed by the University of 48 * California, Berkeley and its contributors. 49 * 4. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)in_pcb.c 8.2 (Berkeley) 1/4/94 66 */ 67 68 #include "opt_inet.h" 69 #include "opt_inet6.h" 70 71 #include <sys/param.h> 72 #include <sys/systm.h> 73 #include <sys/malloc.h> 74 #include <sys/mbuf.h> 75 #include <sys/protosw.h> 76 #include <sys/socket.h> 77 #include <sys/socketvar.h> 78 #include <sys/sockio.h> 79 #include <sys/sysctl.h> 80 #include <sys/errno.h> 81 #include <sys/time.h> 82 #include <sys/kernel.h> 83 84 #include <net/if.h> 85 #include <net/route.h> 86 87 #include <netinet/in.h> 88 #include <netinet/in_var.h> 89 #include <netinet/in_systm.h> 90 #include <netinet/ip.h> 91 #include <netinet/in_pcb.h> 92 #include <netinet6/in6_var.h> 93 #include <netinet/ip6.h> 94 #include <netinet6/in6_pcb.h> 95 #include <netinet6/ip6_var.h> 96 #include <netinet6/nd6.h> 97 #ifdef ENABLE_DEFAULT_SCOPE 98 #include <netinet6/scope6_var.h> 99 #endif 100 101 #include <net/net_osdep.h> 102 103 static struct mtx addrsel_lock; 104 #define ADDRSEL_LOCK_INIT() mtx_init(&addrsel_lock, "addrsel_lock", NULL, MTX_DEF) 105 #define ADDRSEL_LOCK() mtx_lock(&addrsel_lock) 106 #define ADDRSEL_UNLOCK() mtx_unlock(&addrsel_lock) 107 #define ADDRSEL_LOCK_ASSERT() mtx_assert(&addrsel_lock, MA_OWNED) 108 109 #define ADDR_LABEL_NOTAPP (-1) 110 struct in6_addrpolicy defaultaddrpolicy; 111 112 int ip6_prefer_tempaddr = 0; 113 114 static int in6_selectif __P((struct sockaddr_in6 *, struct ip6_pktopts *, 115 struct ip6_moptions *, 116 struct route_in6 *ro, 117 struct ifnet **)); 118 119 static struct in6_addrpolicy *lookup_addrsel_policy __P((struct sockaddr_in6 *)); 120 121 static void init_policy_queue __P((void)); 122 static int add_addrsel_policyent __P((struct in6_addrpolicy *)); 123 static int delete_addrsel_policyent __P((struct in6_addrpolicy *)); 124 static int walk_addrsel_policy __P((int (*)(struct in6_addrpolicy *, void *), 125 void *)); 126 static int dump_addrsel_policyent __P((struct in6_addrpolicy *, void *)); 127 static struct in6_addrpolicy *match_addrsel_policy __P((struct sockaddr_in6 *)); 128 129 /* 130 * Return an IPv6 address, which is the most appropriate for a given 131 * destination and user specified options. 132 * If necessary, this function lookups the routing table and returns 133 * an entry to the caller for later use. 134 */ 135 #define REPLACE(r) do {\ 136 if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \ 137 sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \ 138 ip6stat.ip6s_sources_rule[(r)]++; \ 139 /* printf("in6_selectsrc: replace %s with %s by %d\n", ia_best ? ip6_sprintf(&ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(&ia->ia_addr.sin6_addr), (r)); */ \ 140 goto replace; \ 141 } while(0) 142 #define NEXT(r) do {\ 143 if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \ 144 sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \ 145 ip6stat.ip6s_sources_rule[(r)]++; \ 146 /* printf("in6_selectsrc: keep %s against %s by %d\n", ia_best ? ip6_sprintf(&ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(&ia->ia_addr.sin6_addr), (r)); */ \ 147 goto next; /* XXX: we can't use 'continue' here */ \ 148 } while(0) 149 #define BREAK(r) do { \ 150 if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \ 151 sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \ 152 ip6stat.ip6s_sources_rule[(r)]++; \ 153 goto out; /* XXX: we can't use 'break' here */ \ 154 } while(0) 155 156 struct in6_addr * 157 in6_selectsrc(dstsock, opts, mopts, ro, laddr, errorp) 158 struct sockaddr_in6 *dstsock; 159 struct ip6_pktopts *opts; 160 struct ip6_moptions *mopts; 161 struct route_in6 *ro; 162 struct in6_addr *laddr; 163 int *errorp; 164 { 165 struct in6_addr *dst; 166 struct ifnet *ifp = NULL; 167 struct in6_ifaddr *ia = NULL, *ia_best = NULL; 168 struct in6_pktinfo *pi = NULL; 169 int dst_scope = -1, best_scope = -1, best_matchlen = -1; 170 struct in6_addrpolicy *dst_policy = NULL, *best_policy = NULL; 171 u_int32_t odstzone; 172 int prefer_tempaddr; 173 struct sockaddr_in6 dstsock0; 174 175 dstsock0 = *dstsock; 176 if (IN6_IS_SCOPE_LINKLOCAL(&dstsock0.sin6_addr) || 177 IN6_IS_ADDR_MC_INTFACELOCAL(&dstsock0.sin6_addr)) { 178 /* KAME assumption: link id == interface id */ 179 if (opts && opts->ip6po_pktinfo && 180 opts->ip6po_pktinfo->ipi6_ifindex) { 181 ifp = ifnet_byindex(opts->ip6po_pktinfo->ipi6_ifindex); 182 dstsock0.sin6_addr.s6_addr16[1] = 183 htons(opts->ip6po_pktinfo->ipi6_ifindex); 184 } else if (mopts && 185 IN6_IS_ADDR_MULTICAST(&dstsock0.sin6_addr) && 186 mopts->im6o_multicast_ifp) { 187 ifp = mopts->im6o_multicast_ifp; 188 dstsock0.sin6_addr.s6_addr16[1] = htons(ifp->if_index); 189 } else if ((*errorp = in6_embedscope(&dstsock0.sin6_addr, 190 &dstsock0, NULL, NULL)) != 0) 191 return (NULL); 192 } 193 dstsock = &dstsock0; 194 195 dst = &dstsock->sin6_addr; 196 *errorp = 0; 197 198 /* 199 * If the source address is explicitly specified by the caller, 200 * check if the requested source address is indeed a unicast address 201 * assigned to the node, and can be used as the packet's source 202 * address. If everything is okay, use the address as source. 203 */ 204 if (opts && (pi = opts->ip6po_pktinfo) && 205 !IN6_IS_ADDR_UNSPECIFIED(&pi->ipi6_addr)) { 206 struct sockaddr_in6 srcsock; 207 struct in6_ifaddr *ia6; 208 209 /* get the outgoing interface */ 210 if ((*errorp = in6_selectif(dstsock, opts, mopts, ro, &ifp)) 211 != 0) { 212 return (NULL); 213 } 214 /* 215 * determine the appropriate zone id of the source based on 216 * the zone of the destination and the outgoing interface. 217 */ 218 bzero(&srcsock, sizeof(srcsock)); 219 srcsock.sin6_family = AF_INET6; 220 srcsock.sin6_len = sizeof(srcsock); 221 srcsock.sin6_addr = pi->ipi6_addr; 222 if (ifp) { 223 if (in6_addr2zoneid(ifp, &pi->ipi6_addr, 224 &srcsock.sin6_scope_id)) { 225 *errorp = EINVAL; /* XXX */ 226 return (NULL); 227 } 228 } 229 if ((*errorp = in6_embedscope(&srcsock.sin6_addr, &srcsock, 230 NULL, NULL)) != 0) { 231 return (NULL); 232 } 233 srcsock.sin6_scope_id = 0; /* XXX: ifa_ifwithaddr expects 0 */ 234 ia6 = (struct in6_ifaddr *)ifa_ifwithaddr((struct sockaddr *)(&srcsock)); 235 if (ia6 == NULL || 236 (ia6->ia6_flags & (IN6_IFF_ANYCAST | IN6_IFF_NOTREADY))) { 237 *errorp = EADDRNOTAVAIL; 238 return (NULL); 239 } 240 pi->ipi6_addr = srcsock.sin6_addr; /* XXX: this overrides pi */ 241 return (&ia6->ia_addr.sin6_addr); 242 } 243 244 /* 245 * Otherwise, if the socket has already bound the source, just use it. 246 */ 247 if (laddr && !IN6_IS_ADDR_UNSPECIFIED(laddr)) 248 return (laddr); 249 250 /* 251 * If the address is not specified, choose the best one based on 252 * the outgoing interface and the destination address. 253 */ 254 /* get the outgoing interface */ 255 if ((*errorp = in6_selectif(dstsock, opts, mopts, ro, &ifp)) != 0) 256 return (NULL); 257 258 #ifdef DIAGNOSTIC 259 if (ifp == NULL) /* this should not happen */ 260 panic("in6_selectsrc: NULL ifp"); 261 #endif 262 if (in6_addr2zoneid(ifp, dst, &odstzone)) { /* impossible */ 263 *errorp = EIO; /* XXX */ 264 return (NULL); 265 } 266 for (ia = in6_ifaddr; ia; ia = ia->ia_next) { 267 int new_scope = -1, new_matchlen = -1; 268 struct in6_addrpolicy *new_policy = NULL; 269 u_int32_t srczone, osrczone, dstzone; 270 struct ifnet *ifp1 = ia->ia_ifp; 271 272 /* 273 * We'll never take an address that breaks the scope zone 274 * of the destination. We also skip an address if its zone 275 * does not contain the outgoing interface. 276 * XXX: we should probably use sin6_scope_id here. 277 */ 278 if (in6_addr2zoneid(ifp1, dst, &dstzone) || 279 odstzone != dstzone) { 280 continue; 281 } 282 if (in6_addr2zoneid(ifp, &ia->ia_addr.sin6_addr, &osrczone) || 283 in6_addr2zoneid(ifp1, &ia->ia_addr.sin6_addr, &srczone) || 284 osrczone != srczone) { 285 continue; 286 } 287 288 /* avoid unusable addresses */ 289 if ((ia->ia6_flags & 290 (IN6_IFF_NOTREADY | IN6_IFF_ANYCAST | IN6_IFF_DETACHED))) { 291 continue; 292 } 293 if (!ip6_use_deprecated && IFA6_IS_DEPRECATED(ia)) 294 continue; 295 296 /* Rule 1: Prefer same address */ 297 if (IN6_ARE_ADDR_EQUAL(dst, &ia->ia_addr.sin6_addr)) { 298 ia_best = ia; 299 BREAK(1); /* there should be no better candidate */ 300 } 301 302 if (ia_best == NULL) 303 REPLACE(0); 304 305 /* Rule 2: Prefer appropriate scope */ 306 if (dst_scope < 0) 307 dst_scope = in6_addrscope(dst); 308 new_scope = in6_addrscope(&ia->ia_addr.sin6_addr); 309 if (IN6_ARE_SCOPE_CMP(best_scope, new_scope) < 0) { 310 if (IN6_ARE_SCOPE_CMP(best_scope, dst_scope) < 0) 311 REPLACE(2); 312 NEXT(2); 313 } else if (IN6_ARE_SCOPE_CMP(new_scope, best_scope) < 0) { 314 if (IN6_ARE_SCOPE_CMP(new_scope, dst_scope) < 0) 315 NEXT(2); 316 REPLACE(2); 317 } 318 319 /* 320 * Rule 3: Avoid deprecated addresses. Note that the case of 321 * !ip6_use_deprecated is already rejected above. 322 */ 323 if (!IFA6_IS_DEPRECATED(ia_best) && IFA6_IS_DEPRECATED(ia)) 324 NEXT(3); 325 if (IFA6_IS_DEPRECATED(ia_best) && !IFA6_IS_DEPRECATED(ia)) 326 REPLACE(3); 327 328 /* Rule 4: Prefer home addresses */ 329 /* 330 * XXX: This is a TODO. We should probably merge the MIP6 331 * case above. 332 */ 333 334 /* Rule 5: Prefer outgoing interface */ 335 if (ia_best->ia_ifp == ifp && ia->ia_ifp != ifp) 336 NEXT(5); 337 if (ia_best->ia_ifp != ifp && ia->ia_ifp == ifp) 338 REPLACE(5); 339 340 /* 341 * Rule 6: Prefer matching label 342 * Note that best_policy should be non-NULL here. 343 */ 344 if (dst_policy == NULL) 345 dst_policy = lookup_addrsel_policy(dstsock); 346 if (dst_policy->label != ADDR_LABEL_NOTAPP) { 347 new_policy = lookup_addrsel_policy(&ia->ia_addr); 348 if (dst_policy->label == best_policy->label && 349 dst_policy->label != new_policy->label) 350 NEXT(6); 351 if (dst_policy->label != best_policy->label && 352 dst_policy->label == new_policy->label) 353 REPLACE(6); 354 } 355 356 /* 357 * Rule 7: Prefer public addresses. 358 * We allow users to reverse the logic by configuring 359 * a sysctl variable, so that privacy conscious users can 360 * always prefer temporary addresses. 361 */ 362 if (opts == NULL || 363 opts->ip6po_prefer_tempaddr == IP6PO_TEMPADDR_SYSTEM) { 364 prefer_tempaddr = ip6_prefer_tempaddr; 365 } else if (opts->ip6po_prefer_tempaddr == 366 IP6PO_TEMPADDR_NOTPREFER) { 367 prefer_tempaddr = 0; 368 } else 369 prefer_tempaddr = 1; 370 if (!(ia_best->ia6_flags & IN6_IFF_TEMPORARY) && 371 (ia->ia6_flags & IN6_IFF_TEMPORARY)) { 372 if (prefer_tempaddr) 373 REPLACE(7); 374 else 375 NEXT(7); 376 } 377 if ((ia_best->ia6_flags & IN6_IFF_TEMPORARY) && 378 !(ia->ia6_flags & IN6_IFF_TEMPORARY)) { 379 if (prefer_tempaddr) 380 NEXT(7); 381 else 382 REPLACE(7); 383 } 384 385 /* 386 * Rule 8: prefer addresses on alive interfaces. 387 * This is a KAME specific rule. 388 */ 389 if ((ia_best->ia_ifp->if_flags & IFF_UP) && 390 !(ia->ia_ifp->if_flags & IFF_UP)) 391 NEXT(8); 392 if (!(ia_best->ia_ifp->if_flags & IFF_UP) && 393 (ia->ia_ifp->if_flags & IFF_UP)) 394 REPLACE(8); 395 396 /* 397 * Rule 14: Use longest matching prefix. 398 * Note: in the address selection draft, this rule is 399 * documented as "Rule 8". However, since it is also 400 * documented that this rule can be overridden, we assign 401 * a large number so that it is easy to assign smaller numbers 402 * to more preferred rules. 403 */ 404 new_matchlen = in6_matchlen(&ia->ia_addr.sin6_addr, dst); 405 if (best_matchlen < new_matchlen) 406 REPLACE(14); 407 if (new_matchlen < best_matchlen) 408 NEXT(14); 409 410 /* Rule 15 is reserved. */ 411 412 /* 413 * Last resort: just keep the current candidate. 414 * Or, do we need more rules? 415 */ 416 continue; 417 418 replace: 419 ia_best = ia; 420 best_scope = (new_scope >= 0 ? new_scope : 421 in6_addrscope(&ia_best->ia_addr.sin6_addr)); 422 best_policy = (new_policy ? new_policy : 423 lookup_addrsel_policy(&ia_best->ia_addr)); 424 best_matchlen = (new_matchlen >= 0 ? new_matchlen : 425 in6_matchlen(&ia_best->ia_addr.sin6_addr, 426 dst)); 427 428 next: 429 continue; 430 431 out: 432 break; 433 } 434 435 if ((ia = ia_best) == NULL) { 436 *errorp = EADDRNOTAVAIL; 437 return (NULL); 438 } 439 440 return (&ia->ia_addr.sin6_addr); 441 } 442 443 static int 444 in6_selectif(dstsock, opts, mopts, ro, retifp) 445 struct sockaddr_in6 *dstsock; 446 struct ip6_pktopts *opts; 447 struct ip6_moptions *mopts; 448 struct route_in6 *ro; 449 struct ifnet **retifp; 450 { 451 int error; 452 struct route_in6 sro; 453 struct rtentry *rt = NULL; 454 455 if (ro == NULL) { 456 bzero(&sro, sizeof(sro)); 457 ro = &sro; 458 } 459 460 if ((error = in6_selectroute(dstsock, opts, mopts, ro, retifp, 461 &rt, 0)) != 0) { 462 if (rt && rt == sro.ro_rt) 463 RTFREE(rt); 464 return (error); 465 } 466 467 /* 468 * do not use a rejected or black hole route. 469 * XXX: this check should be done in the L2 output routine. 470 * However, if we skipped this check here, we'd see the following 471 * scenario: 472 * - install a rejected route for a scoped address prefix 473 * (like fe80::/10) 474 * - send a packet to a destination that matches the scoped prefix, 475 * with ambiguity about the scope zone. 476 * - pick the outgoing interface from the route, and disambiguate the 477 * scope zone with the interface. 478 * - ip6_output() would try to get another route with the "new" 479 * destination, which may be valid. 480 * - we'd see no error on output. 481 * Although this may not be very harmful, it should still be confusing. 482 * We thus reject the case here. 483 */ 484 if (rt && (rt->rt_flags & (RTF_REJECT | RTF_BLACKHOLE))) { 485 int flags = (rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); 486 487 if (rt && rt == sro.ro_rt) 488 RTFREE(rt); 489 return (flags); 490 } 491 492 /* 493 * Adjust the "outgoing" interface. If we're going to loop the packet 494 * back to ourselves, the ifp would be the loopback interface. 495 * However, we'd rather know the interface associated to the 496 * destination address (which should probably be one of our own 497 * addresses.) 498 */ 499 if (rt && rt->rt_ifa && rt->rt_ifa->ifa_ifp) 500 *retifp = rt->rt_ifa->ifa_ifp; 501 502 if (rt && rt == sro.ro_rt) 503 RTFREE(rt); 504 return (0); 505 } 506 507 int 508 in6_selectroute(dstsock, opts, mopts, ro, retifp, retrt, clone) 509 struct sockaddr_in6 *dstsock; 510 struct ip6_pktopts *opts; 511 struct ip6_moptions *mopts; 512 struct route_in6 *ro; 513 struct ifnet **retifp; 514 struct rtentry **retrt; 515 int clone; /* meaningful only for bsdi and freebsd. */ 516 { 517 int error = 0; 518 struct ifnet *ifp = NULL; 519 struct rtentry *rt = NULL; 520 struct sockaddr_in6 *sin6_next; 521 struct in6_pktinfo *pi = NULL; 522 struct in6_addr *dst = &dstsock->sin6_addr; 523 524 #if 0 525 if (dstsock->sin6_addr.s6_addr32[0] == 0 && 526 dstsock->sin6_addr.s6_addr32[1] == 0 && 527 !IN6_IS_ADDR_LOOPBACK(&dstsock->sin6_addr)) { 528 printf("in6_selectroute: strange destination %s\n", 529 ip6_sprintf(&dstsock->sin6_addr)); 530 } else { 531 printf("in6_selectroute: destination = %s%%%d\n", 532 ip6_sprintf(&dstsock->sin6_addr), 533 dstsock->sin6_scope_id); /* for debug */ 534 } 535 #endif 536 537 /* If the caller specify the outgoing interface explicitly, use it. */ 538 if (opts && (pi = opts->ip6po_pktinfo) != NULL && pi->ipi6_ifindex) { 539 /* XXX boundary check is assumed to be already done. */ 540 ifp = ifnet_byindex(pi->ipi6_ifindex); 541 if (ifp != NULL && 542 (retrt == NULL || IN6_IS_ADDR_MULTICAST(dst))) { 543 /* 544 * we do not have to check nor get the route for 545 * multicast. 546 */ 547 goto done; 548 } else 549 goto getroute; 550 } 551 552 /* 553 * If the destination address is a multicast address and the outgoing 554 * interface for the address is specified by the caller, use it. 555 */ 556 if (IN6_IS_ADDR_MULTICAST(dst) && 557 mopts != NULL && (ifp = mopts->im6o_multicast_ifp) != NULL) { 558 goto done; /* we do not need a route for multicast. */ 559 } 560 561 getroute: 562 /* 563 * If the next hop address for the packet is specified by the caller, 564 * use it as the gateway. 565 */ 566 if (opts && opts->ip6po_nexthop) { 567 struct route_in6 *ron; 568 569 sin6_next = satosin6(opts->ip6po_nexthop); 570 571 /* at this moment, we only support AF_INET6 next hops */ 572 if (sin6_next->sin6_family != AF_INET6) { 573 error = EAFNOSUPPORT; /* or should we proceed? */ 574 goto done; 575 } 576 577 /* 578 * If the next hop is an IPv6 address, then the node identified 579 * by that address must be a neighbor of the sending host. 580 */ 581 ron = &opts->ip6po_nextroute; 582 if ((ron->ro_rt && 583 (ron->ro_rt->rt_flags & (RTF_UP | RTF_LLINFO)) != 584 (RTF_UP | RTF_LLINFO)) || 585 !SA6_ARE_ADDR_EQUAL(satosin6(&ron->ro_dst), sin6_next)) { 586 if (ron->ro_rt) { 587 RTFREE(ron->ro_rt); 588 ron->ro_rt = NULL; 589 } 590 *satosin6(&ron->ro_dst) = *sin6_next; 591 } 592 if (ron->ro_rt == NULL) { 593 rtalloc((struct route *)ron); /* multi path case? */ 594 if (ron->ro_rt == NULL || 595 !(ron->ro_rt->rt_flags & RTF_LLINFO)) { 596 if (ron->ro_rt) { 597 RTFREE(ron->ro_rt); 598 ron->ro_rt = NULL; 599 } 600 error = EHOSTUNREACH; 601 goto done; 602 } 603 } 604 rt = ron->ro_rt; 605 ifp = rt->rt_ifp; 606 607 /* 608 * When cloning is required, try to allocate a route to the 609 * destination so that the caller can store path MTU 610 * information. 611 */ 612 if (!clone) 613 goto done; 614 } 615 616 /* 617 * Use a cached route if it exists and is valid, else try to allocate 618 * a new one. Note that we should check the address family of the 619 * cached destination, in case of sharing the cache with IPv4. 620 */ 621 if (ro) { 622 if (ro->ro_rt && 623 (!(ro->ro_rt->rt_flags & RTF_UP) || 624 ((struct sockaddr *)(&ro->ro_dst))->sa_family != AF_INET6 || 625 !IN6_ARE_ADDR_EQUAL(&satosin6(&ro->ro_dst)->sin6_addr, 626 dst))) { 627 RTFREE(ro->ro_rt); 628 ro->ro_rt = (struct rtentry *)NULL; 629 } 630 if (ro->ro_rt == (struct rtentry *)NULL) { 631 struct sockaddr_in6 *sa6; 632 633 /* No route yet, so try to acquire one */ 634 bzero(&ro->ro_dst, sizeof(struct sockaddr_in6)); 635 sa6 = (struct sockaddr_in6 *)&ro->ro_dst; 636 *sa6 = *dstsock; 637 sa6->sin6_scope_id = 0; 638 639 if (clone) { 640 rtalloc((struct route *)ro); 641 } else { 642 ro->ro_rt = rtalloc1(&((struct route *)ro) 643 ->ro_dst, 0, 0UL); 644 if (ro->ro_rt) 645 RT_UNLOCK(ro->ro_rt); 646 } 647 } 648 649 /* 650 * do not care about the result if we have the nexthop 651 * explicitly specified. 652 */ 653 if (opts && opts->ip6po_nexthop) 654 goto done; 655 656 if (ro->ro_rt) { 657 ifp = ro->ro_rt->rt_ifp; 658 659 if (ifp == NULL) { /* can this really happen? */ 660 RTFREE(ro->ro_rt); 661 ro->ro_rt = NULL; 662 } 663 } 664 if (ro->ro_rt == NULL) 665 error = EHOSTUNREACH; 666 rt = ro->ro_rt; 667 668 /* 669 * Check if the outgoing interface conflicts with 670 * the interface specified by ipi6_ifindex (if specified). 671 * Note that loopback interface is always okay. 672 * (this may happen when we are sending a packet to one of 673 * our own addresses.) 674 */ 675 if (opts && opts->ip6po_pktinfo 676 && opts->ip6po_pktinfo->ipi6_ifindex) { 677 if (!(ifp->if_flags & IFF_LOOPBACK) && 678 ifp->if_index != 679 opts->ip6po_pktinfo->ipi6_ifindex) { 680 error = EHOSTUNREACH; 681 goto done; 682 } 683 } 684 } 685 686 done: 687 if (ifp == NULL && rt == NULL) { 688 /* 689 * This can happen if the caller did not pass a cached route 690 * nor any other hints. We treat this case an error. 691 */ 692 error = EHOSTUNREACH; 693 } 694 if (error == EHOSTUNREACH) 695 ip6stat.ip6s_noroute++; 696 697 if (retifp != NULL) 698 *retifp = ifp; 699 if (retrt != NULL) 700 *retrt = rt; /* rt may be NULL */ 701 702 return (error); 703 } 704 705 /* 706 * Default hop limit selection. The precedence is as follows: 707 * 1. Hoplimit value specified via ioctl. 708 * 2. (If the outgoing interface is detected) the current 709 * hop limit of the interface specified by router advertisement. 710 * 3. The system default hoplimit. 711 */ 712 int 713 in6_selecthlim(in6p, ifp) 714 struct in6pcb *in6p; 715 struct ifnet *ifp; 716 { 717 if (in6p && in6p->in6p_hops >= 0) 718 return (in6p->in6p_hops); 719 else if (ifp) 720 return (ND_IFINFO(ifp)->chlim); 721 else if (in6p && !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr)) { 722 struct route_in6 ro6; 723 struct ifnet *lifp; 724 725 bzero(&ro6, sizeof(ro6)); 726 ro6.ro_dst.sin6_family = AF_INET6; 727 ro6.ro_dst.sin6_len = sizeof(struct sockaddr_in6); 728 ro6.ro_dst.sin6_addr = in6p->in6p_faddr; 729 rtalloc((struct route *)&ro6); 730 if (ro6.ro_rt) { 731 lifp = ro6.ro_rt->rt_ifp; 732 RTFREE(ro6.ro_rt); 733 if (lifp) 734 return (ND_IFINFO(lifp)->chlim); 735 } else 736 return (ip6_defhlim); 737 } 738 return (ip6_defhlim); 739 } 740 741 /* 742 * XXX: this is borrowed from in6_pcbbind(). If possible, we should 743 * share this function by all *bsd*... 744 */ 745 int 746 in6_pcbsetport(laddr, inp, td) 747 struct in6_addr *laddr; 748 struct inpcb *inp; 749 struct thread *td; 750 { 751 struct socket *so = inp->inp_socket; 752 u_int16_t lport = 0, first, last, *lastport; 753 int count, error = 0, wild = 0; 754 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 755 756 /* XXX: this is redundant when called from in6_pcbbind */ 757 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) 758 wild = INPLOOKUP_WILDCARD; 759 760 inp->inp_flags |= INP_ANONPORT; 761 762 if (inp->inp_flags & INP_HIGHPORT) { 763 first = ipport_hifirstauto; /* sysctl */ 764 last = ipport_hilastauto; 765 lastport = &pcbinfo->lasthi; 766 } else if (inp->inp_flags & INP_LOWPORT) { 767 if (td && (error = suser(td))) 768 return error; 769 first = ipport_lowfirstauto; /* 1023 */ 770 last = ipport_lowlastauto; /* 600 */ 771 lastport = &pcbinfo->lastlow; 772 } else { 773 first = ipport_firstauto; /* sysctl */ 774 last = ipport_lastauto; 775 lastport = &pcbinfo->lastport; 776 } 777 /* 778 * Simple check to ensure all ports are not used up causing 779 * a deadlock here. 780 * 781 * We split the two cases (up and down) so that the direction 782 * is not being tested on each round of the loop. 783 */ 784 if (first > last) { 785 /* 786 * counting down 787 */ 788 count = first - last; 789 790 do { 791 if (count-- < 0) { /* completely used? */ 792 /* 793 * Undo any address bind that may have 794 * occurred above. 795 */ 796 inp->in6p_laddr = in6addr_any; 797 return (EAGAIN); 798 } 799 --*lastport; 800 if (*lastport > first || *lastport < last) 801 *lastport = first; 802 lport = htons(*lastport); 803 } while (in6_pcblookup_local(pcbinfo, 804 &inp->in6p_laddr, lport, wild)); 805 } else { 806 /* 807 * counting up 808 */ 809 count = last - first; 810 811 do { 812 if (count-- < 0) { /* completely used? */ 813 /* 814 * Undo any address bind that may have 815 * occurred above. 816 */ 817 inp->in6p_laddr = in6addr_any; 818 return (EAGAIN); 819 } 820 ++*lastport; 821 if (*lastport < first || *lastport > last) 822 *lastport = first; 823 lport = htons(*lastport); 824 } while (in6_pcblookup_local(pcbinfo, 825 &inp->in6p_laddr, lport, wild)); 826 } 827 828 inp->inp_lport = lport; 829 if (in_pcbinshash(inp) != 0) { 830 inp->in6p_laddr = in6addr_any; 831 inp->inp_lport = 0; 832 return (EAGAIN); 833 } 834 835 return (0); 836 } 837 838 /* 839 * Generate kernel-internal form (scopeid embedded into s6_addr16[1]). 840 * If the address scope of is link-local, embed the interface index in the 841 * address. The routine determines our precedence 842 * between advanced API scope/interface specification and basic API 843 * specification. 844 * 845 * This function should be nuked in the future, when we get rid of embedded 846 * scopeid thing. 847 * 848 * XXX actually, it is over-specification to return ifp against sin6_scope_id. 849 * there can be multiple interfaces that belong to a particular scope zone 850 * (in specification, we have 1:N mapping between a scope zone and interfaces). 851 * we may want to change the function to return something other than ifp. 852 */ 853 int 854 in6_embedscope(in6, sin6, in6p, ifpp) 855 struct in6_addr *in6; 856 const struct sockaddr_in6 *sin6; 857 struct in6pcb *in6p; 858 struct ifnet **ifpp; 859 { 860 struct ifnet *ifp = NULL; 861 u_int32_t zoneid = sin6->sin6_scope_id; 862 863 *in6 = sin6->sin6_addr; 864 if (ifpp) 865 *ifpp = NULL; 866 867 /* 868 * don't try to read sin6->sin6_addr beyond here, since the caller may 869 * ask us to overwrite existing sockaddr_in6 870 */ 871 872 #ifdef ENABLE_DEFAULT_SCOPE 873 if (zoneid == 0) 874 zoneid = scope6_addr2default(in6); 875 #endif 876 877 if (IN6_IS_SCOPE_LINKLOCAL(in6) || IN6_IS_ADDR_MC_INTFACELOCAL(in6)) { 878 struct in6_pktinfo *pi; 879 880 /* KAME assumption: link id == interface id */ 881 if (in6p && in6p->in6p_outputopts && 882 (pi = in6p->in6p_outputopts->ip6po_pktinfo) && 883 pi->ipi6_ifindex) { 884 ifp = ifnet_byindex(pi->ipi6_ifindex); 885 in6->s6_addr16[1] = htons(pi->ipi6_ifindex); 886 } else if (in6p && IN6_IS_ADDR_MULTICAST(in6) && 887 in6p->in6p_moptions && 888 in6p->in6p_moptions->im6o_multicast_ifp) { 889 ifp = in6p->in6p_moptions->im6o_multicast_ifp; 890 in6->s6_addr16[1] = htons(ifp->if_index); 891 } else if (zoneid) { 892 if (if_index < zoneid) 893 return (ENXIO); /* XXX EINVAL? */ 894 ifp = ifnet_byindex(zoneid); 895 896 /* XXX assignment to 16bit from 32bit variable */ 897 in6->s6_addr16[1] = htons(zoneid & 0xffff); 898 } 899 900 if (ifpp) 901 *ifpp = ifp; 902 } 903 904 return 0; 905 } 906 907 /* 908 * generate standard sockaddr_in6 from embedded form. 909 * touches sin6_addr and sin6_scope_id only. 910 * 911 * this function should be nuked in the future, when we get rid of 912 * embedded scopeid thing. 913 */ 914 int 915 in6_recoverscope(sin6, in6, ifp) 916 struct sockaddr_in6 *sin6; 917 const struct in6_addr *in6; 918 struct ifnet *ifp; 919 { 920 u_int32_t zoneid; 921 922 sin6->sin6_addr = *in6; 923 924 /* 925 * don't try to read *in6 beyond here, since the caller may 926 * ask us to overwrite existing sockaddr_in6 927 */ 928 929 sin6->sin6_scope_id = 0; 930 if (IN6_IS_SCOPE_LINKLOCAL(in6) || IN6_IS_ADDR_MC_INTFACELOCAL(in6)) { 931 /* 932 * KAME assumption: link id == interface id 933 */ 934 zoneid = ntohs(sin6->sin6_addr.s6_addr16[1]); 935 if (zoneid) { 936 /* sanity check */ 937 if (zoneid < 0 || if_index < zoneid) 938 return ENXIO; 939 if (ifp && ifp->if_index != zoneid) 940 return ENXIO; 941 sin6->sin6_addr.s6_addr16[1] = 0; 942 sin6->sin6_scope_id = zoneid; 943 } 944 } 945 946 return 0; 947 } 948 949 /* 950 * just clear the embedded scope identifier. 951 */ 952 void 953 in6_clearscope(addr) 954 struct in6_addr *addr; 955 { 956 if (IN6_IS_SCOPE_LINKLOCAL(addr) || IN6_IS_ADDR_MC_INTFACELOCAL(addr)) 957 addr->s6_addr16[1] = 0; 958 } 959 960 void 961 addrsel_policy_init() 962 { 963 ADDRSEL_LOCK_INIT(); 964 965 init_policy_queue(); 966 967 /* initialize the "last resort" policy */ 968 bzero(&defaultaddrpolicy, sizeof(defaultaddrpolicy)); 969 defaultaddrpolicy.label = ADDR_LABEL_NOTAPP; 970 } 971 972 static struct in6_addrpolicy * 973 lookup_addrsel_policy(key) 974 struct sockaddr_in6 *key; 975 { 976 struct in6_addrpolicy *match = NULL; 977 978 ADDRSEL_LOCK(); 979 match = match_addrsel_policy(key); 980 981 if (match == NULL) 982 match = &defaultaddrpolicy; 983 else 984 match->use++; 985 ADDRSEL_UNLOCK(); 986 987 return (match); 988 } 989 990 /* 991 * Subroutines to manage the address selection policy table via sysctl. 992 */ 993 struct walkarg { 994 struct sysctl_req *w_req; 995 }; 996 997 static int in6_src_sysctl(SYSCTL_HANDLER_ARGS); 998 SYSCTL_DECL(_net_inet6_ip6); 999 SYSCTL_NODE(_net_inet6_ip6, IPV6CTL_ADDRCTLPOLICY, addrctlpolicy, 1000 CTLFLAG_RD, in6_src_sysctl, ""); 1001 1002 static int 1003 in6_src_sysctl(SYSCTL_HANDLER_ARGS) 1004 { 1005 struct walkarg w; 1006 1007 if (req->newptr) 1008 return EPERM; 1009 1010 bzero(&w, sizeof(w)); 1011 w.w_req = req; 1012 1013 return (walk_addrsel_policy(dump_addrsel_policyent, &w)); 1014 } 1015 1016 int 1017 in6_src_ioctl(cmd, data) 1018 u_long cmd; 1019 caddr_t data; 1020 { 1021 int i; 1022 struct in6_addrpolicy ent0; 1023 1024 if (cmd != SIOCAADDRCTL_POLICY && cmd != SIOCDADDRCTL_POLICY) 1025 return (EOPNOTSUPP); /* check for safety */ 1026 1027 ent0 = *(struct in6_addrpolicy *)data; 1028 1029 if (ent0.label == ADDR_LABEL_NOTAPP) 1030 return (EINVAL); 1031 /* check if the prefix mask is consecutive. */ 1032 if (in6_mask2len(&ent0.addrmask.sin6_addr, NULL) < 0) 1033 return (EINVAL); 1034 /* clear trailing garbages (if any) of the prefix address. */ 1035 for (i = 0; i < 4; i++) { 1036 ent0.addr.sin6_addr.s6_addr32[i] &= 1037 ent0.addrmask.sin6_addr.s6_addr32[i]; 1038 } 1039 ent0.use = 0; 1040 1041 switch (cmd) { 1042 case SIOCAADDRCTL_POLICY: 1043 return (add_addrsel_policyent(&ent0)); 1044 case SIOCDADDRCTL_POLICY: 1045 return (delete_addrsel_policyent(&ent0)); 1046 } 1047 1048 return (0); /* XXX: compromise compilers */ 1049 } 1050 1051 /* 1052 * The followings are implementation of the policy table using a 1053 * simple tail queue. 1054 * XXX such details should be hidden. 1055 * XXX implementation using binary tree should be more efficient. 1056 */ 1057 struct addrsel_policyent { 1058 TAILQ_ENTRY(addrsel_policyent) ape_entry; 1059 struct in6_addrpolicy ape_policy; 1060 }; 1061 1062 TAILQ_HEAD(addrsel_policyhead, addrsel_policyent); 1063 1064 struct addrsel_policyhead addrsel_policytab; 1065 1066 static void 1067 init_policy_queue() 1068 { 1069 TAILQ_INIT(&addrsel_policytab); 1070 } 1071 1072 static int 1073 add_addrsel_policyent(newpolicy) 1074 struct in6_addrpolicy *newpolicy; 1075 { 1076 struct addrsel_policyent *new, *pol; 1077 1078 MALLOC(new, struct addrsel_policyent *, sizeof(*new), M_IFADDR, 1079 M_WAITOK); 1080 ADDRSEL_LOCK(); 1081 1082 /* duplication check */ 1083 for (pol = TAILQ_FIRST(&addrsel_policytab); pol; 1084 pol = TAILQ_NEXT(pol, ape_entry)) { 1085 if (SA6_ARE_ADDR_EQUAL(&newpolicy->addr, 1086 &pol->ape_policy.addr) && 1087 SA6_ARE_ADDR_EQUAL(&newpolicy->addrmask, 1088 &pol->ape_policy.addrmask)) { 1089 ADDRSEL_UNLOCK(); 1090 FREE(new, M_IFADDR); 1091 return (EEXIST); /* or override it? */ 1092 } 1093 } 1094 1095 bzero(new, sizeof(*new)); 1096 1097 /* XXX: should validate entry */ 1098 new->ape_policy = *newpolicy; 1099 1100 TAILQ_INSERT_TAIL(&addrsel_policytab, new, ape_entry); 1101 ADDRSEL_UNLOCK(); 1102 1103 return (0); 1104 } 1105 1106 static int 1107 delete_addrsel_policyent(key) 1108 struct in6_addrpolicy *key; 1109 { 1110 struct addrsel_policyent *pol; 1111 1112 ADDRSEL_LOCK(); 1113 1114 /* search for the entry in the table */ 1115 for (pol = TAILQ_FIRST(&addrsel_policytab); pol; 1116 pol = TAILQ_NEXT(pol, ape_entry)) { 1117 if (SA6_ARE_ADDR_EQUAL(&key->addr, &pol->ape_policy.addr) && 1118 SA6_ARE_ADDR_EQUAL(&key->addrmask, 1119 &pol->ape_policy.addrmask)) { 1120 break; 1121 } 1122 } 1123 if (pol == NULL) { 1124 ADDRSEL_UNLOCK(); 1125 return (ESRCH); 1126 } 1127 1128 TAILQ_REMOVE(&addrsel_policytab, pol, ape_entry); 1129 ADDRSEL_UNLOCK(); 1130 1131 return (0); 1132 } 1133 1134 static int 1135 walk_addrsel_policy(callback, w) 1136 int (*callback) __P((struct in6_addrpolicy *, void *)); 1137 void *w; 1138 { 1139 struct addrsel_policyent *pol; 1140 int error = 0; 1141 1142 ADDRSEL_LOCK(); 1143 for (pol = TAILQ_FIRST(&addrsel_policytab); pol; 1144 pol = TAILQ_NEXT(pol, ape_entry)) { 1145 if ((error = (*callback)(&pol->ape_policy, w)) != 0) { 1146 ADDRSEL_UNLOCK(); 1147 return (error); 1148 } 1149 } 1150 ADDRSEL_UNLOCK(); 1151 1152 return (error); 1153 } 1154 1155 static int 1156 dump_addrsel_policyent(pol, arg) 1157 struct in6_addrpolicy *pol; 1158 void *arg; 1159 { 1160 int error = 0; 1161 struct walkarg *w = arg; 1162 1163 error = SYSCTL_OUT(w->w_req, pol, sizeof(*pol)); 1164 1165 return (error); 1166 } 1167 1168 static struct in6_addrpolicy * 1169 match_addrsel_policy(key) 1170 struct sockaddr_in6 *key; 1171 { 1172 struct addrsel_policyent *pent; 1173 struct in6_addrpolicy *bestpol = NULL, *pol; 1174 int matchlen, bestmatchlen = -1; 1175 u_char *mp, *ep, *k, *p, m; 1176 1177 for (pent = TAILQ_FIRST(&addrsel_policytab); pent; 1178 pent = TAILQ_NEXT(pent, ape_entry)) { 1179 matchlen = 0; 1180 1181 pol = &pent->ape_policy; 1182 mp = (u_char *)&pol->addrmask.sin6_addr; 1183 ep = mp + 16; /* XXX: scope field? */ 1184 k = (u_char *)&key->sin6_addr; 1185 p = (u_char *)&pol->addr.sin6_addr; 1186 for (; mp < ep && *mp; mp++, k++, p++) { 1187 m = *mp; 1188 if ((*k & m) != *p) 1189 goto next; /* not match */ 1190 if (m == 0xff) /* short cut for a typical case */ 1191 matchlen += 8; 1192 else { 1193 while (m >= 0x80) { 1194 matchlen++; 1195 m <<= 1; 1196 } 1197 } 1198 } 1199 1200 /* matched. check if this is better than the current best. */ 1201 if (bestpol == NULL || 1202 matchlen > bestmatchlen) { 1203 bestpol = pol; 1204 bestmatchlen = matchlen; 1205 } 1206 1207 next: 1208 continue; 1209 } 1210 1211 return (bestpol); 1212 } 1213