1 /* $FreeBSD$ */ 2 /* $KAME: in6_src.c,v 1.132 2003/08/26 04:42:27 keiichi Exp $ */ 3 4 /* 5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. Neither the name of the project nor the names of its contributors 17 * may be used to endorse or promote products derived from this software 18 * without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 /* 34 * Copyright (c) 1982, 1986, 1991, 1993 35 * The Regents of the University of California. All rights reserved. 36 * 37 * Redistribution and use in source and binary forms, with or without 38 * modification, are permitted provided that the following conditions 39 * are met: 40 * 1. Redistributions of source code must retain the above copyright 41 * notice, this list of conditions and the following disclaimer. 42 * 2. Redistributions in binary form must reproduce the above copyright 43 * notice, this list of conditions and the following disclaimer in the 44 * documentation and/or other materials provided with the distribution. 45 * 3. All advertising materials mentioning features or use of this software 46 * must display the following acknowledgement: 47 * This product includes software developed by the University of 48 * California, Berkeley and its contributors. 49 * 4. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)in_pcb.c 8.2 (Berkeley) 1/4/94 66 */ 67 68 #include "opt_inet.h" 69 #include "opt_inet6.h" 70 71 #include <sys/param.h> 72 #include <sys/systm.h> 73 #include <sys/malloc.h> 74 #include <sys/mbuf.h> 75 #include <sys/protosw.h> 76 #include <sys/socket.h> 77 #include <sys/socketvar.h> 78 #include <sys/sockio.h> 79 #include <sys/sysctl.h> 80 #include <sys/errno.h> 81 #include <sys/time.h> 82 #include <sys/kernel.h> 83 84 #include <net/if.h> 85 #include <net/route.h> 86 87 #include <netinet/in.h> 88 #include <netinet/in_var.h> 89 #include <netinet/in_systm.h> 90 #include <netinet/ip.h> 91 #include <netinet/in_pcb.h> 92 #include <netinet6/in6_var.h> 93 #include <netinet/ip6.h> 94 #include <netinet6/in6_pcb.h> 95 #include <netinet6/ip6_var.h> 96 #include <netinet6/nd6.h> 97 #ifdef ENABLE_DEFAULT_SCOPE 98 #include <netinet6/scope6_var.h> 99 #endif 100 101 #include <net/net_osdep.h> 102 103 static struct mtx addrsel_lock; 104 #define ADDRSEL_LOCK_INIT() mtx_init(&addrsel_lock, "addrsel_lock", NULL, MTX_DEF) 105 #define ADDRSEL_LOCK() mtx_lock(&addrsel_lock) 106 #define ADDRSEL_UNLOCK() mtx_unlock(&addrsel_lock) 107 #define ADDRSEL_LOCK_ASSERT() mtx_assert(&addrsel_lock, MA_OWNED) 108 109 #define ADDR_LABEL_NOTAPP (-1) 110 struct in6_addrpolicy defaultaddrpolicy; 111 112 int ip6_prefer_tempaddr = 0; 113 114 static int in6_selectif __P((struct sockaddr_in6 *, struct ip6_pktopts *, 115 struct ip6_moptions *, struct route_in6 *ro, struct ifnet **)); 116 117 static struct in6_addrpolicy *lookup_addrsel_policy __P((struct sockaddr_in6 *)); 118 119 static void init_policy_queue __P((void)); 120 static int add_addrsel_policyent __P((struct in6_addrpolicy *)); 121 static int delete_addrsel_policyent __P((struct in6_addrpolicy *)); 122 static int walk_addrsel_policy __P((int (*)(struct in6_addrpolicy *, void *), 123 void *)); 124 static int dump_addrsel_policyent __P((struct in6_addrpolicy *, void *)); 125 static struct in6_addrpolicy *match_addrsel_policy __P((struct sockaddr_in6 *)); 126 127 /* 128 * Return an IPv6 address, which is the most appropriate for a given 129 * destination and user specified options. 130 * If necessary, this function lookups the routing table and returns 131 * an entry to the caller for later use. 132 */ 133 #define REPLACE(r) do {\ 134 if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \ 135 sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \ 136 ip6stat.ip6s_sources_rule[(r)]++; \ 137 /* printf("in6_selectsrc: replace %s with %s by %d\n", ia_best ? ip6_sprintf(&ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(&ia->ia_addr.sin6_addr), (r)); */ \ 138 goto replace; \ 139 } while(0) 140 #define NEXT(r) do {\ 141 if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \ 142 sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \ 143 ip6stat.ip6s_sources_rule[(r)]++; \ 144 /* printf("in6_selectsrc: keep %s against %s by %d\n", ia_best ? ip6_sprintf(&ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(&ia->ia_addr.sin6_addr), (r)); */ \ 145 goto next; /* XXX: we can't use 'continue' here */ \ 146 } while(0) 147 #define BREAK(r) do { \ 148 if ((r) < sizeof(ip6stat.ip6s_sources_rule) / \ 149 sizeof(ip6stat.ip6s_sources_rule[0])) /* check for safety */ \ 150 ip6stat.ip6s_sources_rule[(r)]++; \ 151 goto out; /* XXX: we can't use 'break' here */ \ 152 } while(0) 153 154 struct in6_addr * 155 in6_selectsrc(dstsock, opts, mopts, ro, laddr, errorp) 156 struct sockaddr_in6 *dstsock; 157 struct ip6_pktopts *opts; 158 struct ip6_moptions *mopts; 159 struct route_in6 *ro; 160 struct in6_addr *laddr; 161 int *errorp; 162 { 163 struct in6_addr *dst; 164 struct ifnet *ifp = NULL; 165 struct in6_ifaddr *ia = NULL, *ia_best = NULL; 166 struct in6_pktinfo *pi = NULL; 167 int dst_scope = -1, best_scope = -1, best_matchlen = -1; 168 struct in6_addrpolicy *dst_policy = NULL, *best_policy = NULL; 169 u_int32_t odstzone; 170 int prefer_tempaddr; 171 struct sockaddr_in6 dstsock0; 172 173 dstsock0 = *dstsock; 174 if (IN6_IS_SCOPE_LINKLOCAL(&dstsock0.sin6_addr) || 175 IN6_IS_ADDR_MC_INTFACELOCAL(&dstsock0.sin6_addr)) { 176 /* KAME assumption: link id == interface id */ 177 if (opts && opts->ip6po_pktinfo && 178 opts->ip6po_pktinfo->ipi6_ifindex) { 179 ifp = ifnet_byindex(opts->ip6po_pktinfo->ipi6_ifindex); 180 dstsock0.sin6_addr.s6_addr16[1] = 181 htons(opts->ip6po_pktinfo->ipi6_ifindex); 182 } else if (mopts && 183 IN6_IS_ADDR_MULTICAST(&dstsock0.sin6_addr) && 184 mopts->im6o_multicast_ifp) { 185 ifp = mopts->im6o_multicast_ifp; 186 dstsock0.sin6_addr.s6_addr16[1] = htons(ifp->if_index); 187 } else if ((*errorp = in6_embedscope(&dstsock0.sin6_addr, 188 &dstsock0, NULL, NULL)) != 0) 189 return (NULL); 190 } 191 dstsock = &dstsock0; 192 193 dst = &dstsock->sin6_addr; 194 *errorp = 0; 195 196 /* 197 * If the source address is explicitly specified by the caller, 198 * check if the requested source address is indeed a unicast address 199 * assigned to the node, and can be used as the packet's source 200 * address. If everything is okay, use the address as source. 201 */ 202 if (opts && (pi = opts->ip6po_pktinfo) && 203 !IN6_IS_ADDR_UNSPECIFIED(&pi->ipi6_addr)) { 204 struct sockaddr_in6 srcsock; 205 struct in6_ifaddr *ia6; 206 207 /* get the outgoing interface */ 208 if ((*errorp = in6_selectif(dstsock, opts, mopts, ro, &ifp)) 209 != 0) { 210 return (NULL); 211 } 212 213 /* 214 * determine the appropriate zone id of the source based on 215 * the zone of the destination and the outgoing interface. 216 */ 217 bzero(&srcsock, sizeof(srcsock)); 218 srcsock.sin6_family = AF_INET6; 219 srcsock.sin6_len = sizeof(srcsock); 220 srcsock.sin6_addr = pi->ipi6_addr; 221 if (ifp) { 222 if (in6_addr2zoneid(ifp, &pi->ipi6_addr, 223 &srcsock.sin6_scope_id)) { 224 *errorp = EINVAL; /* XXX */ 225 return (NULL); 226 } 227 } 228 if ((*errorp = in6_embedscope(&srcsock.sin6_addr, &srcsock, 229 NULL, NULL)) != 0) { 230 return (NULL); 231 } 232 srcsock.sin6_scope_id = 0; /* XXX: ifa_ifwithaddr expects 0 */ 233 ia6 = (struct in6_ifaddr *)ifa_ifwithaddr((struct sockaddr *)(&srcsock)); 234 if (ia6 == NULL || 235 (ia6->ia6_flags & (IN6_IFF_ANYCAST | IN6_IFF_NOTREADY))) { 236 *errorp = EADDRNOTAVAIL; 237 return (NULL); 238 } 239 pi->ipi6_addr = srcsock.sin6_addr; /* XXX: this overrides pi */ 240 return (&ia6->ia_addr.sin6_addr); 241 } 242 243 /* 244 * Otherwise, if the socket has already bound the source, just use it. 245 */ 246 if (laddr && !IN6_IS_ADDR_UNSPECIFIED(laddr)) 247 return (laddr); 248 249 /* 250 * If the address is not specified, choose the best one based on 251 * the outgoing interface and the destination address. 252 */ 253 /* get the outgoing interface */ 254 if ((*errorp = in6_selectif(dstsock, opts, mopts, ro, &ifp)) != 0) 255 return (NULL); 256 257 #ifdef DIAGNOSTIC 258 if (ifp == NULL) /* this should not happen */ 259 panic("in6_selectsrc: NULL ifp"); 260 #endif 261 if (in6_addr2zoneid(ifp, dst, &odstzone)) { /* impossible */ 262 *errorp = EIO; /* XXX */ 263 return (NULL); 264 } 265 for (ia = in6_ifaddr; ia; ia = ia->ia_next) { 266 int new_scope = -1, new_matchlen = -1; 267 struct in6_addrpolicy *new_policy = NULL; 268 u_int32_t srczone, osrczone, dstzone; 269 struct ifnet *ifp1 = ia->ia_ifp; 270 271 /* 272 * We'll never take an address that breaks the scope zone 273 * of the destination. We also skip an address if its zone 274 * does not contain the outgoing interface. 275 * XXX: we should probably use sin6_scope_id here. 276 */ 277 if (in6_addr2zoneid(ifp1, dst, &dstzone) || 278 odstzone != dstzone) { 279 continue; 280 } 281 if (in6_addr2zoneid(ifp, &ia->ia_addr.sin6_addr, &osrczone) || 282 in6_addr2zoneid(ifp1, &ia->ia_addr.sin6_addr, &srczone) || 283 osrczone != srczone) { 284 continue; 285 } 286 287 /* avoid unusable addresses */ 288 if ((ia->ia6_flags & 289 (IN6_IFF_NOTREADY | IN6_IFF_ANYCAST | IN6_IFF_DETACHED))) { 290 continue; 291 } 292 if (!ip6_use_deprecated && IFA6_IS_DEPRECATED(ia)) 293 continue; 294 295 /* Rule 1: Prefer same address */ 296 if (IN6_ARE_ADDR_EQUAL(dst, &ia->ia_addr.sin6_addr)) { 297 ia_best = ia; 298 BREAK(1); /* there should be no better candidate */ 299 } 300 301 if (ia_best == NULL) 302 REPLACE(0); 303 304 /* Rule 2: Prefer appropriate scope */ 305 if (dst_scope < 0) 306 dst_scope = in6_addrscope(dst); 307 new_scope = in6_addrscope(&ia->ia_addr.sin6_addr); 308 if (IN6_ARE_SCOPE_CMP(best_scope, new_scope) < 0) { 309 if (IN6_ARE_SCOPE_CMP(best_scope, dst_scope) < 0) 310 REPLACE(2); 311 NEXT(2); 312 } else if (IN6_ARE_SCOPE_CMP(new_scope, best_scope) < 0) { 313 if (IN6_ARE_SCOPE_CMP(new_scope, dst_scope) < 0) 314 NEXT(2); 315 REPLACE(2); 316 } 317 318 /* 319 * Rule 3: Avoid deprecated addresses. Note that the case of 320 * !ip6_use_deprecated is already rejected above. 321 */ 322 if (!IFA6_IS_DEPRECATED(ia_best) && IFA6_IS_DEPRECATED(ia)) 323 NEXT(3); 324 if (IFA6_IS_DEPRECATED(ia_best) && !IFA6_IS_DEPRECATED(ia)) 325 REPLACE(3); 326 327 /* Rule 4: Prefer home addresses */ 328 /* 329 * XXX: This is a TODO. We should probably merge the MIP6 330 * case above. 331 */ 332 333 /* Rule 5: Prefer outgoing interface */ 334 if (ia_best->ia_ifp == ifp && ia->ia_ifp != ifp) 335 NEXT(5); 336 if (ia_best->ia_ifp != ifp && ia->ia_ifp == ifp) 337 REPLACE(5); 338 339 /* 340 * Rule 6: Prefer matching label 341 * Note that best_policy should be non-NULL here. 342 */ 343 if (dst_policy == NULL) 344 dst_policy = lookup_addrsel_policy(dstsock); 345 if (dst_policy->label != ADDR_LABEL_NOTAPP) { 346 new_policy = lookup_addrsel_policy(&ia->ia_addr); 347 if (dst_policy->label == best_policy->label && 348 dst_policy->label != new_policy->label) 349 NEXT(6); 350 if (dst_policy->label != best_policy->label && 351 dst_policy->label == new_policy->label) 352 REPLACE(6); 353 } 354 355 /* 356 * Rule 7: Prefer public addresses. 357 * We allow users to reverse the logic by configuring 358 * a sysctl variable, so that privacy conscious users can 359 * always prefer temporary addresses. 360 */ 361 if (opts == NULL || 362 opts->ip6po_prefer_tempaddr == IP6PO_TEMPADDR_SYSTEM) { 363 prefer_tempaddr = ip6_prefer_tempaddr; 364 } else if (opts->ip6po_prefer_tempaddr == 365 IP6PO_TEMPADDR_NOTPREFER) { 366 prefer_tempaddr = 0; 367 } else 368 prefer_tempaddr = 1; 369 if (!(ia_best->ia6_flags & IN6_IFF_TEMPORARY) && 370 (ia->ia6_flags & IN6_IFF_TEMPORARY)) { 371 if (prefer_tempaddr) 372 REPLACE(7); 373 else 374 NEXT(7); 375 } 376 if ((ia_best->ia6_flags & IN6_IFF_TEMPORARY) && 377 !(ia->ia6_flags & IN6_IFF_TEMPORARY)) { 378 if (prefer_tempaddr) 379 NEXT(7); 380 else 381 REPLACE(7); 382 } 383 384 /* 385 * Rule 8: prefer addresses on alive interfaces. 386 * This is a KAME specific rule. 387 */ 388 if ((ia_best->ia_ifp->if_flags & IFF_UP) && 389 !(ia->ia_ifp->if_flags & IFF_UP)) 390 NEXT(8); 391 if (!(ia_best->ia_ifp->if_flags & IFF_UP) && 392 (ia->ia_ifp->if_flags & IFF_UP)) 393 REPLACE(8); 394 395 /* 396 * Rule 14: Use longest matching prefix. 397 * Note: in the address selection draft, this rule is 398 * documented as "Rule 8". However, since it is also 399 * documented that this rule can be overridden, we assign 400 * a large number so that it is easy to assign smaller numbers 401 * to more preferred rules. 402 */ 403 new_matchlen = in6_matchlen(&ia->ia_addr.sin6_addr, dst); 404 if (best_matchlen < new_matchlen) 405 REPLACE(14); 406 if (new_matchlen < best_matchlen) 407 NEXT(14); 408 409 /* Rule 15 is reserved. */ 410 411 /* 412 * Last resort: just keep the current candidate. 413 * Or, do we need more rules? 414 */ 415 continue; 416 417 replace: 418 ia_best = ia; 419 best_scope = (new_scope >= 0 ? new_scope : 420 in6_addrscope(&ia_best->ia_addr.sin6_addr)); 421 best_policy = (new_policy ? new_policy : 422 lookup_addrsel_policy(&ia_best->ia_addr)); 423 best_matchlen = (new_matchlen >= 0 ? new_matchlen : 424 in6_matchlen(&ia_best->ia_addr.sin6_addr, 425 dst)); 426 427 next: 428 continue; 429 430 out: 431 break; 432 } 433 434 if ((ia = ia_best) == NULL) { 435 *errorp = EADDRNOTAVAIL; 436 return (NULL); 437 } 438 439 return (&ia->ia_addr.sin6_addr); 440 } 441 442 static int 443 in6_selectif(dstsock, opts, mopts, ro, retifp) 444 struct sockaddr_in6 *dstsock; 445 struct ip6_pktopts *opts; 446 struct ip6_moptions *mopts; 447 struct route_in6 *ro; 448 struct ifnet **retifp; 449 { 450 int error; 451 struct route_in6 sro; 452 struct rtentry *rt = NULL; 453 454 if (ro == NULL) { 455 bzero(&sro, sizeof(sro)); 456 ro = &sro; 457 } 458 459 if ((error = in6_selectroute(dstsock, opts, mopts, ro, retifp, 460 &rt, 0)) != 0) { 461 if (rt && rt == sro.ro_rt) 462 RTFREE(rt); 463 return (error); 464 } 465 466 /* 467 * do not use a rejected or black hole route. 468 * XXX: this check should be done in the L2 output routine. 469 * However, if we skipped this check here, we'd see the following 470 * scenario: 471 * - install a rejected route for a scoped address prefix 472 * (like fe80::/10) 473 * - send a packet to a destination that matches the scoped prefix, 474 * with ambiguity about the scope zone. 475 * - pick the outgoing interface from the route, and disambiguate the 476 * scope zone with the interface. 477 * - ip6_output() would try to get another route with the "new" 478 * destination, which may be valid. 479 * - we'd see no error on output. 480 * Although this may not be very harmful, it should still be confusing. 481 * We thus reject the case here. 482 */ 483 if (rt && (rt->rt_flags & (RTF_REJECT | RTF_BLACKHOLE))) { 484 int flags = (rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); 485 486 if (rt && rt == sro.ro_rt) 487 RTFREE(rt); 488 return (flags); 489 } 490 491 /* 492 * Adjust the "outgoing" interface. If we're going to loop the packet 493 * back to ourselves, the ifp would be the loopback interface. 494 * However, we'd rather know the interface associated to the 495 * destination address (which should probably be one of our own 496 * addresses.) 497 */ 498 if (rt && rt->rt_ifa && rt->rt_ifa->ifa_ifp) 499 *retifp = rt->rt_ifa->ifa_ifp; 500 501 if (rt && rt == sro.ro_rt) 502 RTFREE(rt); 503 return (0); 504 } 505 506 int 507 in6_selectroute(dstsock, opts, mopts, ro, retifp, retrt, clone) 508 struct sockaddr_in6 *dstsock; 509 struct ip6_pktopts *opts; 510 struct ip6_moptions *mopts; 511 struct route_in6 *ro; 512 struct ifnet **retifp; 513 struct rtentry **retrt; 514 int clone; /* meaningful only for bsdi and freebsd. */ 515 { 516 int error = 0; 517 struct ifnet *ifp = NULL; 518 struct rtentry *rt = NULL; 519 struct sockaddr_in6 *sin6_next; 520 struct in6_pktinfo *pi = NULL; 521 struct in6_addr *dst = &dstsock->sin6_addr; 522 523 #if 0 524 if (dstsock->sin6_addr.s6_addr32[0] == 0 && 525 dstsock->sin6_addr.s6_addr32[1] == 0 && 526 !IN6_IS_ADDR_LOOPBACK(&dstsock->sin6_addr)) { 527 printf("in6_selectroute: strange destination %s\n", 528 ip6_sprintf(&dstsock->sin6_addr)); 529 } else { 530 printf("in6_selectroute: destination = %s%%%d\n", 531 ip6_sprintf(&dstsock->sin6_addr), 532 dstsock->sin6_scope_id); /* for debug */ 533 } 534 #endif 535 536 /* If the caller specify the outgoing interface explicitly, use it. */ 537 if (opts && (pi = opts->ip6po_pktinfo) != NULL && pi->ipi6_ifindex) { 538 /* XXX boundary check is assumed to be already done. */ 539 ifp = ifnet_byindex(pi->ipi6_ifindex); 540 if (ifp != NULL && 541 (retrt == NULL || IN6_IS_ADDR_MULTICAST(dst))) { 542 /* 543 * we do not have to check nor get the route for 544 * multicast. 545 */ 546 goto done; 547 } else 548 goto getroute; 549 } 550 551 /* 552 * If the destination address is a multicast address and the outgoing 553 * interface for the address is specified by the caller, use it. 554 */ 555 if (IN6_IS_ADDR_MULTICAST(dst) && 556 mopts != NULL && (ifp = mopts->im6o_multicast_ifp) != NULL) { 557 goto done; /* we do not need a route for multicast. */ 558 } 559 560 getroute: 561 /* 562 * If the next hop address for the packet is specified by the caller, 563 * use it as the gateway. 564 */ 565 if (opts && opts->ip6po_nexthop) { 566 struct route_in6 *ron; 567 568 sin6_next = satosin6(opts->ip6po_nexthop); 569 570 /* at this moment, we only support AF_INET6 next hops */ 571 if (sin6_next->sin6_family != AF_INET6) { 572 error = EAFNOSUPPORT; /* or should we proceed? */ 573 goto done; 574 } 575 576 /* 577 * If the next hop is an IPv6 address, then the node identified 578 * by that address must be a neighbor of the sending host. 579 */ 580 ron = &opts->ip6po_nextroute; 581 if ((ron->ro_rt && 582 (ron->ro_rt->rt_flags & (RTF_UP | RTF_LLINFO)) != 583 (RTF_UP | RTF_LLINFO)) || 584 !SA6_ARE_ADDR_EQUAL(satosin6(&ron->ro_dst), sin6_next)) { 585 if (ron->ro_rt) { 586 RTFREE(ron->ro_rt); 587 ron->ro_rt = NULL; 588 } 589 *satosin6(&ron->ro_dst) = *sin6_next; 590 } 591 if (ron->ro_rt == NULL) { 592 rtalloc((struct route *)ron); /* multi path case? */ 593 if (ron->ro_rt == NULL || 594 !(ron->ro_rt->rt_flags & RTF_LLINFO)) { 595 if (ron->ro_rt) { 596 RTFREE(ron->ro_rt); 597 ron->ro_rt = NULL; 598 } 599 error = EHOSTUNREACH; 600 goto done; 601 } 602 } 603 rt = ron->ro_rt; 604 ifp = rt->rt_ifp; 605 606 /* 607 * When cloning is required, try to allocate a route to the 608 * destination so that the caller can store path MTU 609 * information. 610 */ 611 if (!clone) 612 goto done; 613 } 614 615 /* 616 * Use a cached route if it exists and is valid, else try to allocate 617 * a new one. Note that we should check the address family of the 618 * cached destination, in case of sharing the cache with IPv4. 619 */ 620 if (ro) { 621 if (ro->ro_rt && 622 (!(ro->ro_rt->rt_flags & RTF_UP) || 623 ((struct sockaddr *)(&ro->ro_dst))->sa_family != AF_INET6 || 624 !IN6_ARE_ADDR_EQUAL(&satosin6(&ro->ro_dst)->sin6_addr, 625 dst))) { 626 RTFREE(ro->ro_rt); 627 ro->ro_rt = (struct rtentry *)NULL; 628 } 629 if (ro->ro_rt == (struct rtentry *)NULL) { 630 struct sockaddr_in6 *sa6; 631 632 /* No route yet, so try to acquire one */ 633 bzero(&ro->ro_dst, sizeof(struct sockaddr_in6)); 634 sa6 = (struct sockaddr_in6 *)&ro->ro_dst; 635 *sa6 = *dstsock; 636 sa6->sin6_scope_id = 0; 637 638 if (clone) { 639 rtalloc((struct route *)ro); 640 } else { 641 ro->ro_rt = rtalloc1(&((struct route *)ro) 642 ->ro_dst, 0, 0UL); 643 if (ro->ro_rt) 644 RT_UNLOCK(ro->ro_rt); 645 } 646 } 647 648 /* 649 * do not care about the result if we have the nexthop 650 * explicitly specified. 651 */ 652 if (opts && opts->ip6po_nexthop) 653 goto done; 654 655 if (ro->ro_rt) { 656 ifp = ro->ro_rt->rt_ifp; 657 658 if (ifp == NULL) { /* can this really happen? */ 659 RTFREE(ro->ro_rt); 660 ro->ro_rt = NULL; 661 } 662 } 663 if (ro->ro_rt == NULL) 664 error = EHOSTUNREACH; 665 rt = ro->ro_rt; 666 667 /* 668 * Check if the outgoing interface conflicts with 669 * the interface specified by ipi6_ifindex (if specified). 670 * Note that loopback interface is always okay. 671 * (this may happen when we are sending a packet to one of 672 * our own addresses.) 673 */ 674 if (opts && opts->ip6po_pktinfo && 675 opts->ip6po_pktinfo->ipi6_ifindex) { 676 if (!(ifp->if_flags & IFF_LOOPBACK) && 677 ifp->if_index != 678 opts->ip6po_pktinfo->ipi6_ifindex) { 679 error = EHOSTUNREACH; 680 goto done; 681 } 682 } 683 } 684 685 done: 686 if (ifp == NULL && rt == NULL) { 687 /* 688 * This can happen if the caller did not pass a cached route 689 * nor any other hints. We treat this case an error. 690 */ 691 error = EHOSTUNREACH; 692 } 693 if (error == EHOSTUNREACH) 694 ip6stat.ip6s_noroute++; 695 696 if (retifp != NULL) 697 *retifp = ifp; 698 if (retrt != NULL) 699 *retrt = rt; /* rt may be NULL */ 700 701 return (error); 702 } 703 704 /* 705 * Default hop limit selection. The precedence is as follows: 706 * 1. Hoplimit value specified via ioctl. 707 * 2. (If the outgoing interface is detected) the current 708 * hop limit of the interface specified by router advertisement. 709 * 3. The system default hoplimit. 710 */ 711 int 712 in6_selecthlim(in6p, ifp) 713 struct in6pcb *in6p; 714 struct ifnet *ifp; 715 { 716 if (in6p && in6p->in6p_hops >= 0) 717 return (in6p->in6p_hops); 718 else if (ifp) 719 return (ND_IFINFO(ifp)->chlim); 720 else if (in6p && !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr)) { 721 struct route_in6 ro6; 722 struct ifnet *lifp; 723 724 bzero(&ro6, sizeof(ro6)); 725 ro6.ro_dst.sin6_family = AF_INET6; 726 ro6.ro_dst.sin6_len = sizeof(struct sockaddr_in6); 727 ro6.ro_dst.sin6_addr = in6p->in6p_faddr; 728 rtalloc((struct route *)&ro6); 729 if (ro6.ro_rt) { 730 lifp = ro6.ro_rt->rt_ifp; 731 RTFREE(ro6.ro_rt); 732 if (lifp) 733 return (ND_IFINFO(lifp)->chlim); 734 } else 735 return (ip6_defhlim); 736 } 737 return (ip6_defhlim); 738 } 739 740 /* 741 * XXX: this is borrowed from in6_pcbbind(). If possible, we should 742 * share this function by all *bsd*... 743 */ 744 int 745 in6_pcbsetport(laddr, inp, cred) 746 struct in6_addr *laddr; 747 struct inpcb *inp; 748 struct ucred *cred; 749 { 750 struct socket *so = inp->inp_socket; 751 u_int16_t lport = 0, first, last, *lastport; 752 int count, error = 0, wild = 0; 753 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 754 755 /* XXX: this is redundant when called from in6_pcbbind */ 756 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) 757 wild = INPLOOKUP_WILDCARD; 758 759 inp->inp_flags |= INP_ANONPORT; 760 761 if (inp->inp_flags & INP_HIGHPORT) { 762 first = ipport_hifirstauto; /* sysctl */ 763 last = ipport_hilastauto; 764 lastport = &pcbinfo->lasthi; 765 } else if (inp->inp_flags & INP_LOWPORT) { 766 if ((error = suser_cred(cred, 0))) 767 return error; 768 first = ipport_lowfirstauto; /* 1023 */ 769 last = ipport_lowlastauto; /* 600 */ 770 lastport = &pcbinfo->lastlow; 771 } else { 772 first = ipport_firstauto; /* sysctl */ 773 last = ipport_lastauto; 774 lastport = &pcbinfo->lastport; 775 } 776 /* 777 * Simple check to ensure all ports are not used up causing 778 * a deadlock here. 779 * 780 * We split the two cases (up and down) so that the direction 781 * is not being tested on each round of the loop. 782 */ 783 if (first > last) { 784 /* 785 * counting down 786 */ 787 count = first - last; 788 789 do { 790 if (count-- < 0) { /* completely used? */ 791 /* 792 * Undo any address bind that may have 793 * occurred above. 794 */ 795 inp->in6p_laddr = in6addr_any; 796 return (EAGAIN); 797 } 798 --*lastport; 799 if (*lastport > first || *lastport < last) 800 *lastport = first; 801 lport = htons(*lastport); 802 } while (in6_pcblookup_local(pcbinfo, &inp->in6p_laddr, 803 lport, wild)); 804 } else { 805 /* 806 * counting up 807 */ 808 count = last - first; 809 810 do { 811 if (count-- < 0) { /* completely used? */ 812 /* 813 * Undo any address bind that may have 814 * occurred above. 815 */ 816 inp->in6p_laddr = in6addr_any; 817 return (EAGAIN); 818 } 819 ++*lastport; 820 if (*lastport < first || *lastport > last) 821 *lastport = first; 822 lport = htons(*lastport); 823 } while (in6_pcblookup_local(pcbinfo, 824 &inp->in6p_laddr, lport, wild)); 825 } 826 827 inp->inp_lport = lport; 828 if (in_pcbinshash(inp) != 0) { 829 inp->in6p_laddr = in6addr_any; 830 inp->inp_lport = 0; 831 return (EAGAIN); 832 } 833 834 return (0); 835 } 836 837 /* 838 * Generate kernel-internal form (scopeid embedded into s6_addr16[1]). 839 * If the address scope of is link-local, embed the interface index in the 840 * address. The routine determines our precedence 841 * between advanced API scope/interface specification and basic API 842 * specification. 843 * 844 * This function should be nuked in the future, when we get rid of embedded 845 * scopeid thing. 846 * 847 * XXX actually, it is over-specification to return ifp against sin6_scope_id. 848 * there can be multiple interfaces that belong to a particular scope zone 849 * (in specification, we have 1:N mapping between a scope zone and interfaces). 850 * we may want to change the function to return something other than ifp. 851 */ 852 int 853 in6_embedscope(in6, sin6, in6p, ifpp) 854 struct in6_addr *in6; 855 const struct sockaddr_in6 *sin6; 856 struct in6pcb *in6p; 857 struct ifnet **ifpp; 858 { 859 struct ifnet *ifp = NULL; 860 u_int32_t zoneid = sin6->sin6_scope_id; 861 862 *in6 = sin6->sin6_addr; 863 if (ifpp) 864 *ifpp = NULL; 865 866 /* 867 * don't try to read sin6->sin6_addr beyond here, since the caller may 868 * ask us to overwrite existing sockaddr_in6 869 */ 870 871 #ifdef ENABLE_DEFAULT_SCOPE 872 if (zoneid == 0) 873 zoneid = scope6_addr2default(in6); 874 #endif 875 876 if (IN6_IS_SCOPE_LINKLOCAL(in6) || IN6_IS_ADDR_MC_INTFACELOCAL(in6)) { 877 struct in6_pktinfo *pi; 878 879 /* KAME assumption: link id == interface id */ 880 if (in6p && in6p->in6p_outputopts && 881 (pi = in6p->in6p_outputopts->ip6po_pktinfo) && 882 pi->ipi6_ifindex) { 883 ifp = ifnet_byindex(pi->ipi6_ifindex); 884 in6->s6_addr16[1] = htons(pi->ipi6_ifindex); 885 } else if (in6p && IN6_IS_ADDR_MULTICAST(in6) && 886 in6p->in6p_moptions && 887 in6p->in6p_moptions->im6o_multicast_ifp) { 888 ifp = in6p->in6p_moptions->im6o_multicast_ifp; 889 in6->s6_addr16[1] = htons(ifp->if_index); 890 } else if (zoneid) { 891 if (if_index < zoneid) 892 return (ENXIO); /* XXX EINVAL? */ 893 ifp = ifnet_byindex(zoneid); 894 895 /* XXX assignment to 16bit from 32bit variable */ 896 in6->s6_addr16[1] = htons(zoneid & 0xffff); 897 } 898 899 if (ifpp) 900 *ifpp = ifp; 901 } 902 903 return 0; 904 } 905 906 /* 907 * generate standard sockaddr_in6 from embedded form. 908 * touches sin6_addr and sin6_scope_id only. 909 * 910 * this function should be nuked in the future, when we get rid of 911 * embedded scopeid thing. 912 */ 913 int 914 in6_recoverscope(sin6, in6, ifp) 915 struct sockaddr_in6 *sin6; 916 const struct in6_addr *in6; 917 struct ifnet *ifp; 918 { 919 u_int32_t zoneid; 920 921 sin6->sin6_addr = *in6; 922 923 /* 924 * don't try to read *in6 beyond here, since the caller may 925 * ask us to overwrite existing sockaddr_in6 926 */ 927 928 sin6->sin6_scope_id = 0; 929 if (IN6_IS_SCOPE_LINKLOCAL(in6) || IN6_IS_ADDR_MC_INTFACELOCAL(in6)) { 930 /* 931 * KAME assumption: link id == interface id 932 */ 933 zoneid = ntohs(sin6->sin6_addr.s6_addr16[1]); 934 if (zoneid) { 935 /* sanity check */ 936 if (zoneid < 0 || if_index < zoneid) 937 return ENXIO; 938 if (ifp && ifp->if_index != zoneid) 939 return ENXIO; 940 sin6->sin6_addr.s6_addr16[1] = 0; 941 sin6->sin6_scope_id = zoneid; 942 } 943 } 944 945 return 0; 946 } 947 948 /* 949 * just clear the embedded scope identifier. 950 */ 951 void 952 in6_clearscope(addr) 953 struct in6_addr *addr; 954 { 955 if (IN6_IS_SCOPE_LINKLOCAL(addr) || IN6_IS_ADDR_MC_INTFACELOCAL(addr)) 956 addr->s6_addr16[1] = 0; 957 } 958 959 void 960 addrsel_policy_init() 961 { 962 ADDRSEL_LOCK_INIT(); 963 964 init_policy_queue(); 965 966 /* initialize the "last resort" policy */ 967 bzero(&defaultaddrpolicy, sizeof(defaultaddrpolicy)); 968 defaultaddrpolicy.label = ADDR_LABEL_NOTAPP; 969 } 970 971 static struct in6_addrpolicy * 972 lookup_addrsel_policy(key) 973 struct sockaddr_in6 *key; 974 { 975 struct in6_addrpolicy *match = NULL; 976 977 ADDRSEL_LOCK(); 978 match = match_addrsel_policy(key); 979 980 if (match == NULL) 981 match = &defaultaddrpolicy; 982 else 983 match->use++; 984 ADDRSEL_UNLOCK(); 985 986 return (match); 987 } 988 989 /* 990 * Subroutines to manage the address selection policy table via sysctl. 991 */ 992 struct walkarg { 993 struct sysctl_req *w_req; 994 }; 995 996 static int in6_src_sysctl(SYSCTL_HANDLER_ARGS); 997 SYSCTL_DECL(_net_inet6_ip6); 998 SYSCTL_NODE(_net_inet6_ip6, IPV6CTL_ADDRCTLPOLICY, addrctlpolicy, 999 CTLFLAG_RD, in6_src_sysctl, ""); 1000 1001 static int 1002 in6_src_sysctl(SYSCTL_HANDLER_ARGS) 1003 { 1004 struct walkarg w; 1005 1006 if (req->newptr) 1007 return EPERM; 1008 1009 bzero(&w, sizeof(w)); 1010 w.w_req = req; 1011 1012 return (walk_addrsel_policy(dump_addrsel_policyent, &w)); 1013 } 1014 1015 int 1016 in6_src_ioctl(cmd, data) 1017 u_long cmd; 1018 caddr_t data; 1019 { 1020 int i; 1021 struct in6_addrpolicy ent0; 1022 1023 if (cmd != SIOCAADDRCTL_POLICY && cmd != SIOCDADDRCTL_POLICY) 1024 return (EOPNOTSUPP); /* check for safety */ 1025 1026 ent0 = *(struct in6_addrpolicy *)data; 1027 1028 if (ent0.label == ADDR_LABEL_NOTAPP) 1029 return (EINVAL); 1030 /* check if the prefix mask is consecutive. */ 1031 if (in6_mask2len(&ent0.addrmask.sin6_addr, NULL) < 0) 1032 return (EINVAL); 1033 /* clear trailing garbages (if any) of the prefix address. */ 1034 for (i = 0; i < 4; i++) { 1035 ent0.addr.sin6_addr.s6_addr32[i] &= 1036 ent0.addrmask.sin6_addr.s6_addr32[i]; 1037 } 1038 ent0.use = 0; 1039 1040 switch (cmd) { 1041 case SIOCAADDRCTL_POLICY: 1042 return (add_addrsel_policyent(&ent0)); 1043 case SIOCDADDRCTL_POLICY: 1044 return (delete_addrsel_policyent(&ent0)); 1045 } 1046 1047 return (0); /* XXX: compromise compilers */ 1048 } 1049 1050 /* 1051 * The followings are implementation of the policy table using a 1052 * simple tail queue. 1053 * XXX such details should be hidden. 1054 * XXX implementation using binary tree should be more efficient. 1055 */ 1056 struct addrsel_policyent { 1057 TAILQ_ENTRY(addrsel_policyent) ape_entry; 1058 struct in6_addrpolicy ape_policy; 1059 }; 1060 1061 TAILQ_HEAD(addrsel_policyhead, addrsel_policyent); 1062 1063 struct addrsel_policyhead addrsel_policytab; 1064 1065 static void 1066 init_policy_queue() 1067 { 1068 TAILQ_INIT(&addrsel_policytab); 1069 } 1070 1071 static int 1072 add_addrsel_policyent(newpolicy) 1073 struct in6_addrpolicy *newpolicy; 1074 { 1075 struct addrsel_policyent *new, *pol; 1076 1077 MALLOC(new, struct addrsel_policyent *, sizeof(*new), M_IFADDR, 1078 M_WAITOK); 1079 ADDRSEL_LOCK(); 1080 1081 /* duplication check */ 1082 for (pol = TAILQ_FIRST(&addrsel_policytab); pol; 1083 pol = TAILQ_NEXT(pol, ape_entry)) { 1084 if (SA6_ARE_ADDR_EQUAL(&newpolicy->addr, 1085 &pol->ape_policy.addr) && 1086 SA6_ARE_ADDR_EQUAL(&newpolicy->addrmask, 1087 &pol->ape_policy.addrmask)) { 1088 ADDRSEL_UNLOCK(); 1089 FREE(new, M_IFADDR); 1090 return (EEXIST); /* or override it? */ 1091 } 1092 } 1093 1094 bzero(new, sizeof(*new)); 1095 1096 /* XXX: should validate entry */ 1097 new->ape_policy = *newpolicy; 1098 1099 TAILQ_INSERT_TAIL(&addrsel_policytab, new, ape_entry); 1100 ADDRSEL_UNLOCK(); 1101 1102 return (0); 1103 } 1104 1105 static int 1106 delete_addrsel_policyent(key) 1107 struct in6_addrpolicy *key; 1108 { 1109 struct addrsel_policyent *pol; 1110 1111 ADDRSEL_LOCK(); 1112 1113 /* search for the entry in the table */ 1114 for (pol = TAILQ_FIRST(&addrsel_policytab); pol; 1115 pol = TAILQ_NEXT(pol, ape_entry)) { 1116 if (SA6_ARE_ADDR_EQUAL(&key->addr, &pol->ape_policy.addr) && 1117 SA6_ARE_ADDR_EQUAL(&key->addrmask, 1118 &pol->ape_policy.addrmask)) { 1119 break; 1120 } 1121 } 1122 if (pol == NULL) { 1123 ADDRSEL_UNLOCK(); 1124 return (ESRCH); 1125 } 1126 1127 TAILQ_REMOVE(&addrsel_policytab, pol, ape_entry); 1128 ADDRSEL_UNLOCK(); 1129 1130 return (0); 1131 } 1132 1133 static int 1134 walk_addrsel_policy(callback, w) 1135 int (*callback) __P((struct in6_addrpolicy *, void *)); 1136 void *w; 1137 { 1138 struct addrsel_policyent *pol; 1139 int error = 0; 1140 1141 ADDRSEL_LOCK(); 1142 for (pol = TAILQ_FIRST(&addrsel_policytab); pol; 1143 pol = TAILQ_NEXT(pol, ape_entry)) { 1144 if ((error = (*callback)(&pol->ape_policy, w)) != 0) { 1145 ADDRSEL_UNLOCK(); 1146 return (error); 1147 } 1148 } 1149 ADDRSEL_UNLOCK(); 1150 1151 return (error); 1152 } 1153 1154 static int 1155 dump_addrsel_policyent(pol, arg) 1156 struct in6_addrpolicy *pol; 1157 void *arg; 1158 { 1159 int error = 0; 1160 struct walkarg *w = arg; 1161 1162 error = SYSCTL_OUT(w->w_req, pol, sizeof(*pol)); 1163 1164 return (error); 1165 } 1166 1167 static struct in6_addrpolicy * 1168 match_addrsel_policy(key) 1169 struct sockaddr_in6 *key; 1170 { 1171 struct addrsel_policyent *pent; 1172 struct in6_addrpolicy *bestpol = NULL, *pol; 1173 int matchlen, bestmatchlen = -1; 1174 u_char *mp, *ep, *k, *p, m; 1175 1176 for (pent = TAILQ_FIRST(&addrsel_policytab); pent; 1177 pent = TAILQ_NEXT(pent, ape_entry)) { 1178 matchlen = 0; 1179 1180 pol = &pent->ape_policy; 1181 mp = (u_char *)&pol->addrmask.sin6_addr; 1182 ep = mp + 16; /* XXX: scope field? */ 1183 k = (u_char *)&key->sin6_addr; 1184 p = (u_char *)&pol->addr.sin6_addr; 1185 for (; mp < ep && *mp; mp++, k++, p++) { 1186 m = *mp; 1187 if ((*k & m) != *p) 1188 goto next; /* not match */ 1189 if (m == 0xff) /* short cut for a typical case */ 1190 matchlen += 8; 1191 else { 1192 while (m >= 0x80) { 1193 matchlen++; 1194 m <<= 1; 1195 } 1196 } 1197 } 1198 1199 /* matched. check if this is better than the current best. */ 1200 if (bestpol == NULL || 1201 matchlen > bestmatchlen) { 1202 bestpol = pol; 1203 bestmatchlen = matchlen; 1204 } 1205 1206 next: 1207 continue; 1208 } 1209 1210 return (bestpol); 1211 } 1212