1 /*- 2 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of the project nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * $KAME: in6_src.c,v 1.132 2003/08/26 04:42:27 keiichi Exp $ 30 */ 31 32 /*- 33 * Copyright (c) 1982, 1986, 1991, 1993 34 * The Regents of the University of California. All rights reserved. 35 * 36 * Redistribution and use in source and binary forms, with or without 37 * modification, are permitted provided that the following conditions 38 * are met: 39 * 1. Redistributions of source code must retain the above copyright 40 * notice, this list of conditions and the following disclaimer. 41 * 2. Redistributions in binary form must reproduce the above copyright 42 * notice, this list of conditions and the following disclaimer in the 43 * documentation and/or other materials provided with the distribution. 44 * 4. Neither the name of the University nor the names of its contributors 45 * may be used to endorse or promote products derived from this software 46 * without specific prior written permission. 47 * 48 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 50 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 51 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 52 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 53 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 54 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 55 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 56 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 57 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 58 * SUCH DAMAGE. 59 * 60 * @(#)in_pcb.c 8.2 (Berkeley) 1/4/94 61 */ 62 63 #include <sys/cdefs.h> 64 __FBSDID("$FreeBSD$"); 65 66 #include "opt_inet.h" 67 #include "opt_inet6.h" 68 #include "opt_mpath.h" 69 70 #include <sys/param.h> 71 #include <sys/systm.h> 72 #include <sys/lock.h> 73 #include <sys/malloc.h> 74 #include <sys/mbuf.h> 75 #include <sys/priv.h> 76 #include <sys/protosw.h> 77 #include <sys/socket.h> 78 #include <sys/socketvar.h> 79 #include <sys/sockio.h> 80 #include <sys/sysctl.h> 81 #include <sys/errno.h> 82 #include <sys/time.h> 83 #include <sys/kernel.h> 84 #include <sys/sx.h> 85 #include <sys/vimage.h> 86 87 #include <net/if.h> 88 #include <net/route.h> 89 #ifdef RADIX_MPATH 90 #include <net/radix_mpath.h> 91 #endif 92 93 #include <netinet/in.h> 94 #include <netinet/in_var.h> 95 #include <netinet/in_systm.h> 96 #include <netinet/ip.h> 97 #include <netinet/in_pcb.h> 98 #include <netinet6/in6_var.h> 99 #include <netinet/ip6.h> 100 #include <netinet6/in6_pcb.h> 101 #include <netinet6/ip6_var.h> 102 #include <netinet6/scope6_var.h> 103 #include <netinet6/nd6.h> 104 105 static struct mtx addrsel_lock; 106 #define ADDRSEL_LOCK_INIT() mtx_init(&addrsel_lock, "addrsel_lock", NULL, MTX_DEF) 107 #define ADDRSEL_LOCK() mtx_lock(&addrsel_lock) 108 #define ADDRSEL_UNLOCK() mtx_unlock(&addrsel_lock) 109 #define ADDRSEL_LOCK_ASSERT() mtx_assert(&addrsel_lock, MA_OWNED) 110 111 static struct sx addrsel_sxlock; 112 #define ADDRSEL_SXLOCK_INIT() sx_init(&addrsel_sxlock, "addrsel_sxlock") 113 #define ADDRSEL_SLOCK() sx_slock(&addrsel_sxlock) 114 #define ADDRSEL_SUNLOCK() sx_sunlock(&addrsel_sxlock) 115 #define ADDRSEL_XLOCK() sx_xlock(&addrsel_sxlock) 116 #define ADDRSEL_XUNLOCK() sx_xunlock(&addrsel_sxlock) 117 118 #define ADDR_LABEL_NOTAPP (-1) 119 struct in6_addrpolicy defaultaddrpolicy; 120 121 int ip6_prefer_tempaddr = 0; 122 123 static int selectroute __P((struct sockaddr_in6 *, struct ip6_pktopts *, 124 struct ip6_moptions *, struct route_in6 *, struct ifnet **, 125 struct rtentry **, int, int)); 126 static int in6_selectif __P((struct sockaddr_in6 *, struct ip6_pktopts *, 127 struct ip6_moptions *, struct route_in6 *ro, struct ifnet **)); 128 129 static struct in6_addrpolicy *lookup_addrsel_policy(struct sockaddr_in6 *); 130 131 static void init_policy_queue(void); 132 static int add_addrsel_policyent(struct in6_addrpolicy *); 133 static int delete_addrsel_policyent(struct in6_addrpolicy *); 134 static int walk_addrsel_policy __P((int (*)(struct in6_addrpolicy *, void *), 135 void *)); 136 static int dump_addrsel_policyent(struct in6_addrpolicy *, void *); 137 static struct in6_addrpolicy *match_addrsel_policy(struct sockaddr_in6 *); 138 139 /* 140 * Return an IPv6 address, which is the most appropriate for a given 141 * destination and user specified options. 142 * If necessary, this function lookups the routing table and returns 143 * an entry to the caller for later use. 144 */ 145 #define REPLACE(r) do {\ 146 if ((r) < sizeof(V_ip6stat.ip6s_sources_rule) / \ 147 sizeof(V_ip6stat.ip6s_sources_rule[0])) /* check for safety */ \ 148 V_ip6stat.ip6s_sources_rule[(r)]++; \ 149 /* { \ 150 char ip6buf[INET6_ADDRSTRLEN], ip6b[INET6_ADDRSTRLEN]; \ 151 printf("in6_selectsrc: replace %s with %s by %d\n", ia_best ? ip6_sprintf(ip6buf, &ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(ip6b, &ia->ia_addr.sin6_addr), (r)); \ 152 } */ \ 153 goto replace; \ 154 } while(0) 155 #define NEXT(r) do {\ 156 if ((r) < sizeof(V_ip6stat.ip6s_sources_rule) / \ 157 sizeof(V_ip6stat.ip6s_sources_rule[0])) /* check for safety */ \ 158 V_ip6stat.ip6s_sources_rule[(r)]++; \ 159 /* { \ 160 char ip6buf[INET6_ADDRSTRLEN], ip6b[INET6_ADDRSTRLEN]; \ 161 printf("in6_selectsrc: keep %s against %s by %d\n", ia_best ? ip6_sprintf(ip6buf, &ia_best->ia_addr.sin6_addr) : "none", ip6_sprintf(ip6b, &ia->ia_addr.sin6_addr), (r)); \ 162 } */ \ 163 goto next; /* XXX: we can't use 'continue' here */ \ 164 } while(0) 165 #define BREAK(r) do { \ 166 if ((r) < sizeof(V_ip6stat.ip6s_sources_rule) / \ 167 sizeof(V_ip6stat.ip6s_sources_rule[0])) /* check for safety */ \ 168 V_ip6stat.ip6s_sources_rule[(r)]++; \ 169 goto out; /* XXX: we can't use 'break' here */ \ 170 } while(0) 171 172 struct in6_addr * 173 in6_selectsrc(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, 174 struct inpcb *inp, struct route_in6 *ro, struct ucred *cred, 175 struct ifnet **ifpp, int *errorp) 176 { 177 struct in6_addr dst; 178 struct ifnet *ifp = NULL; 179 struct in6_ifaddr *ia = NULL, *ia_best = NULL; 180 struct in6_pktinfo *pi = NULL; 181 int dst_scope = -1, best_scope = -1, best_matchlen = -1; 182 struct in6_addrpolicy *dst_policy = NULL, *best_policy = NULL; 183 u_int32_t odstzone; 184 int prefer_tempaddr; 185 struct ip6_moptions *mopts; 186 187 dst = dstsock->sin6_addr; /* make a copy for local operation */ 188 *errorp = 0; 189 if (ifpp) 190 *ifpp = NULL; 191 192 if (inp != NULL) { 193 INP_LOCK_ASSERT(inp); 194 mopts = inp->in6p_moptions; 195 } else { 196 mopts = NULL; 197 } 198 199 /* 200 * If the source address is explicitly specified by the caller, 201 * check if the requested source address is indeed a unicast address 202 * assigned to the node, and can be used as the packet's source 203 * address. If everything is okay, use the address as source. 204 */ 205 if (opts && (pi = opts->ip6po_pktinfo) && 206 !IN6_IS_ADDR_UNSPECIFIED(&pi->ipi6_addr)) { 207 struct sockaddr_in6 srcsock; 208 struct in6_ifaddr *ia6; 209 210 /* get the outgoing interface */ 211 if ((*errorp = in6_selectif(dstsock, opts, mopts, ro, &ifp)) 212 != 0) { 213 return (NULL); 214 } 215 216 /* 217 * determine the appropriate zone id of the source based on 218 * the zone of the destination and the outgoing interface. 219 * If the specified address is ambiguous wrt the scope zone, 220 * the interface must be specified; otherwise, ifa_ifwithaddr() 221 * will fail matching the address. 222 */ 223 bzero(&srcsock, sizeof(srcsock)); 224 srcsock.sin6_family = AF_INET6; 225 srcsock.sin6_len = sizeof(srcsock); 226 srcsock.sin6_addr = pi->ipi6_addr; 227 if (ifp) { 228 *errorp = in6_setscope(&srcsock.sin6_addr, ifp, NULL); 229 if (*errorp != 0) 230 return (NULL); 231 } 232 233 ia6 = (struct in6_ifaddr *)ifa_ifwithaddr((struct sockaddr *)(&srcsock)); 234 if (ia6 == NULL || 235 (ia6->ia6_flags & (IN6_IFF_ANYCAST | IN6_IFF_NOTREADY))) { 236 *errorp = EADDRNOTAVAIL; 237 return (NULL); 238 } 239 pi->ipi6_addr = srcsock.sin6_addr; /* XXX: this overrides pi */ 240 if (ifpp) 241 *ifpp = ifp; 242 return (&ia6->ia_addr.sin6_addr); 243 } 244 245 /* 246 * Otherwise, if the socket has already bound the source, just use it. 247 */ 248 if (inp != NULL && !IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { 249 return (&inp->in6p_laddr); 250 } 251 252 /* 253 * If the address is not specified, choose the best one based on 254 * the outgoing interface and the destination address. 255 */ 256 /* get the outgoing interface */ 257 if ((*errorp = in6_selectif(dstsock, opts, mopts, ro, &ifp)) != 0) 258 return (NULL); 259 260 #ifdef DIAGNOSTIC 261 if (ifp == NULL) /* this should not happen */ 262 panic("in6_selectsrc: NULL ifp"); 263 #endif 264 *errorp = in6_setscope(&dst, ifp, &odstzone); 265 if (*errorp != 0) 266 return (NULL); 267 268 for (ia = V_in6_ifaddr; ia; ia = ia->ia_next) { 269 int new_scope = -1, new_matchlen = -1; 270 struct in6_addrpolicy *new_policy = NULL; 271 u_int32_t srczone, osrczone, dstzone; 272 struct in6_addr src; 273 struct ifnet *ifp1 = ia->ia_ifp; 274 275 /* 276 * We'll never take an address that breaks the scope zone 277 * of the destination. We also skip an address if its zone 278 * does not contain the outgoing interface. 279 * XXX: we should probably use sin6_scope_id here. 280 */ 281 if (in6_setscope(&dst, ifp1, &dstzone) || 282 odstzone != dstzone) { 283 continue; 284 } 285 src = ia->ia_addr.sin6_addr; 286 if (in6_setscope(&src, ifp, &osrczone) || 287 in6_setscope(&src, ifp1, &srczone) || 288 osrczone != srczone) { 289 continue; 290 } 291 292 /* avoid unusable addresses */ 293 if ((ia->ia6_flags & 294 (IN6_IFF_NOTREADY | IN6_IFF_ANYCAST | IN6_IFF_DETACHED))) { 295 continue; 296 } 297 if (!V_ip6_use_deprecated && IFA6_IS_DEPRECATED(ia)) 298 continue; 299 300 /* Rule 1: Prefer same address */ 301 if (IN6_ARE_ADDR_EQUAL(&dst, &ia->ia_addr.sin6_addr)) { 302 ia_best = ia; 303 BREAK(1); /* there should be no better candidate */ 304 } 305 306 if (ia_best == NULL) 307 REPLACE(0); 308 309 /* Rule 2: Prefer appropriate scope */ 310 if (dst_scope < 0) 311 dst_scope = in6_addrscope(&dst); 312 new_scope = in6_addrscope(&ia->ia_addr.sin6_addr); 313 if (IN6_ARE_SCOPE_CMP(best_scope, new_scope) < 0) { 314 if (IN6_ARE_SCOPE_CMP(best_scope, dst_scope) < 0) 315 REPLACE(2); 316 NEXT(2); 317 } else if (IN6_ARE_SCOPE_CMP(new_scope, best_scope) < 0) { 318 if (IN6_ARE_SCOPE_CMP(new_scope, dst_scope) < 0) 319 NEXT(2); 320 REPLACE(2); 321 } 322 323 /* 324 * Rule 3: Avoid deprecated addresses. Note that the case of 325 * !ip6_use_deprecated is already rejected above. 326 */ 327 if (!IFA6_IS_DEPRECATED(ia_best) && IFA6_IS_DEPRECATED(ia)) 328 NEXT(3); 329 if (IFA6_IS_DEPRECATED(ia_best) && !IFA6_IS_DEPRECATED(ia)) 330 REPLACE(3); 331 332 /* Rule 4: Prefer home addresses */ 333 /* 334 * XXX: This is a TODO. We should probably merge the MIP6 335 * case above. 336 */ 337 338 /* Rule 5: Prefer outgoing interface */ 339 if (ia_best->ia_ifp == ifp && ia->ia_ifp != ifp) 340 NEXT(5); 341 if (ia_best->ia_ifp != ifp && ia->ia_ifp == ifp) 342 REPLACE(5); 343 344 /* 345 * Rule 6: Prefer matching label 346 * Note that best_policy should be non-NULL here. 347 */ 348 if (dst_policy == NULL) 349 dst_policy = lookup_addrsel_policy(dstsock); 350 if (dst_policy->label != ADDR_LABEL_NOTAPP) { 351 new_policy = lookup_addrsel_policy(&ia->ia_addr); 352 if (dst_policy->label == best_policy->label && 353 dst_policy->label != new_policy->label) 354 NEXT(6); 355 if (dst_policy->label != best_policy->label && 356 dst_policy->label == new_policy->label) 357 REPLACE(6); 358 } 359 360 /* 361 * Rule 7: Prefer public addresses. 362 * We allow users to reverse the logic by configuring 363 * a sysctl variable, so that privacy conscious users can 364 * always prefer temporary addresses. 365 */ 366 if (opts == NULL || 367 opts->ip6po_prefer_tempaddr == IP6PO_TEMPADDR_SYSTEM) { 368 prefer_tempaddr = V_ip6_prefer_tempaddr; 369 } else if (opts->ip6po_prefer_tempaddr == 370 IP6PO_TEMPADDR_NOTPREFER) { 371 prefer_tempaddr = 0; 372 } else 373 prefer_tempaddr = 1; 374 if (!(ia_best->ia6_flags & IN6_IFF_TEMPORARY) && 375 (ia->ia6_flags & IN6_IFF_TEMPORARY)) { 376 if (prefer_tempaddr) 377 REPLACE(7); 378 else 379 NEXT(7); 380 } 381 if ((ia_best->ia6_flags & IN6_IFF_TEMPORARY) && 382 !(ia->ia6_flags & IN6_IFF_TEMPORARY)) { 383 if (prefer_tempaddr) 384 NEXT(7); 385 else 386 REPLACE(7); 387 } 388 389 /* 390 * Rule 8: prefer addresses on alive interfaces. 391 * This is a KAME specific rule. 392 */ 393 if ((ia_best->ia_ifp->if_flags & IFF_UP) && 394 !(ia->ia_ifp->if_flags & IFF_UP)) 395 NEXT(8); 396 if (!(ia_best->ia_ifp->if_flags & IFF_UP) && 397 (ia->ia_ifp->if_flags & IFF_UP)) 398 REPLACE(8); 399 400 /* 401 * Rule 14: Use longest matching prefix. 402 * Note: in the address selection draft, this rule is 403 * documented as "Rule 8". However, since it is also 404 * documented that this rule can be overridden, we assign 405 * a large number so that it is easy to assign smaller numbers 406 * to more preferred rules. 407 */ 408 new_matchlen = in6_matchlen(&ia->ia_addr.sin6_addr, &dst); 409 if (best_matchlen < new_matchlen) 410 REPLACE(14); 411 if (new_matchlen < best_matchlen) 412 NEXT(14); 413 414 /* Rule 15 is reserved. */ 415 416 /* 417 * Last resort: just keep the current candidate. 418 * Or, do we need more rules? 419 */ 420 continue; 421 422 replace: 423 ia_best = ia; 424 best_scope = (new_scope >= 0 ? new_scope : 425 in6_addrscope(&ia_best->ia_addr.sin6_addr)); 426 best_policy = (new_policy ? new_policy : 427 lookup_addrsel_policy(&ia_best->ia_addr)); 428 best_matchlen = (new_matchlen >= 0 ? new_matchlen : 429 in6_matchlen(&ia_best->ia_addr.sin6_addr, 430 &dst)); 431 432 next: 433 continue; 434 435 out: 436 break; 437 } 438 439 if ((ia = ia_best) == NULL) { 440 *errorp = EADDRNOTAVAIL; 441 return (NULL); 442 } 443 444 if (ifpp) 445 *ifpp = ifp; 446 447 return (&ia->ia_addr.sin6_addr); 448 } 449 450 /* 451 * clone - meaningful only for bsdi and freebsd 452 */ 453 static int 454 selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, 455 struct ip6_moptions *mopts, struct route_in6 *ro, 456 struct ifnet **retifp, struct rtentry **retrt, int clone, 457 int norouteok) 458 { 459 int error = 0; 460 struct ifnet *ifp = NULL; 461 struct rtentry *rt = NULL; 462 struct sockaddr_in6 *sin6_next; 463 struct in6_pktinfo *pi = NULL; 464 struct in6_addr *dst = &dstsock->sin6_addr; 465 #if 0 466 char ip6buf[INET6_ADDRSTRLEN]; 467 468 if (dstsock->sin6_addr.s6_addr32[0] == 0 && 469 dstsock->sin6_addr.s6_addr32[1] == 0 && 470 !IN6_IS_ADDR_LOOPBACK(&dstsock->sin6_addr)) { 471 printf("in6_selectroute: strange destination %s\n", 472 ip6_sprintf(ip6buf, &dstsock->sin6_addr)); 473 } else { 474 printf("in6_selectroute: destination = %s%%%d\n", 475 ip6_sprintf(ip6buf, &dstsock->sin6_addr), 476 dstsock->sin6_scope_id); /* for debug */ 477 } 478 #endif 479 480 /* If the caller specify the outgoing interface explicitly, use it. */ 481 if (opts && (pi = opts->ip6po_pktinfo) != NULL && pi->ipi6_ifindex) { 482 /* XXX boundary check is assumed to be already done. */ 483 ifp = ifnet_byindex(pi->ipi6_ifindex); 484 if (ifp != NULL && 485 (norouteok || retrt == NULL || 486 IN6_IS_ADDR_MULTICAST(dst))) { 487 /* 488 * we do not have to check or get the route for 489 * multicast. 490 */ 491 goto done; 492 } else 493 goto getroute; 494 } 495 496 /* 497 * If the destination address is a multicast address and the outgoing 498 * interface for the address is specified by the caller, use it. 499 */ 500 if (IN6_IS_ADDR_MULTICAST(dst) && 501 mopts != NULL && (ifp = mopts->im6o_multicast_ifp) != NULL) { 502 goto done; /* we do not need a route for multicast. */ 503 } 504 505 getroute: 506 /* 507 * If the next hop address for the packet is specified by the caller, 508 * use it as the gateway. 509 */ 510 if (opts && opts->ip6po_nexthop) { 511 struct route_in6 *ron; 512 513 sin6_next = satosin6(opts->ip6po_nexthop); 514 515 /* at this moment, we only support AF_INET6 next hops */ 516 if (sin6_next->sin6_family != AF_INET6) { 517 error = EAFNOSUPPORT; /* or should we proceed? */ 518 goto done; 519 } 520 521 /* 522 * If the next hop is an IPv6 address, then the node identified 523 * by that address must be a neighbor of the sending host. 524 */ 525 ron = &opts->ip6po_nextroute; 526 if ((ron->ro_rt && 527 (ron->ro_rt->rt_flags & (RTF_UP | RTF_LLINFO)) != 528 (RTF_UP | RTF_LLINFO)) || 529 !IN6_ARE_ADDR_EQUAL(&satosin6(&ron->ro_dst)->sin6_addr, 530 &sin6_next->sin6_addr)) { 531 if (ron->ro_rt) { 532 RTFREE(ron->ro_rt); 533 ron->ro_rt = NULL; 534 } 535 *satosin6(&ron->ro_dst) = *sin6_next; 536 } 537 if (ron->ro_rt == NULL) { 538 rtalloc((struct route *)ron); /* multi path case? */ 539 if (ron->ro_rt == NULL || 540 !(ron->ro_rt->rt_flags & RTF_LLINFO)) { 541 if (ron->ro_rt) { 542 RTFREE(ron->ro_rt); 543 ron->ro_rt = NULL; 544 } 545 error = EHOSTUNREACH; 546 goto done; 547 } 548 } 549 rt = ron->ro_rt; 550 ifp = rt->rt_ifp; 551 552 /* 553 * When cloning is required, try to allocate a route to the 554 * destination so that the caller can store path MTU 555 * information. 556 */ 557 if (!clone) 558 goto done; 559 } 560 561 /* 562 * Use a cached route if it exists and is valid, else try to allocate 563 * a new one. Note that we should check the address family of the 564 * cached destination, in case of sharing the cache with IPv4. 565 */ 566 if (ro) { 567 if (ro->ro_rt && 568 (!(ro->ro_rt->rt_flags & RTF_UP) || 569 ((struct sockaddr *)(&ro->ro_dst))->sa_family != AF_INET6 || 570 !IN6_ARE_ADDR_EQUAL(&satosin6(&ro->ro_dst)->sin6_addr, 571 dst))) { 572 RTFREE(ro->ro_rt); 573 ro->ro_rt = (struct rtentry *)NULL; 574 } 575 if (ro->ro_rt == (struct rtentry *)NULL) { 576 struct sockaddr_in6 *sa6; 577 578 /* No route yet, so try to acquire one */ 579 bzero(&ro->ro_dst, sizeof(struct sockaddr_in6)); 580 sa6 = (struct sockaddr_in6 *)&ro->ro_dst; 581 *sa6 = *dstsock; 582 sa6->sin6_scope_id = 0; 583 584 if (clone) { 585 #ifdef RADIX_MPATH 586 rtalloc_mpath((struct route *)ro, 587 ntohl(sa6->sin6_addr.s6_addr32[3])); 588 #else 589 rtalloc((struct route *)ro); 590 #endif 591 } else { 592 ro->ro_rt = rtalloc1(&((struct route *)ro) 593 ->ro_dst, 0, 0UL); 594 if (ro->ro_rt) 595 RT_UNLOCK(ro->ro_rt); 596 } 597 } 598 599 /* 600 * do not care about the result if we have the nexthop 601 * explicitly specified. 602 */ 603 if (opts && opts->ip6po_nexthop) 604 goto done; 605 606 if (ro->ro_rt) { 607 ifp = ro->ro_rt->rt_ifp; 608 609 if (ifp == NULL) { /* can this really happen? */ 610 RTFREE(ro->ro_rt); 611 ro->ro_rt = NULL; 612 } 613 } 614 if (ro->ro_rt == NULL) 615 error = EHOSTUNREACH; 616 rt = ro->ro_rt; 617 618 /* 619 * Check if the outgoing interface conflicts with 620 * the interface specified by ipi6_ifindex (if specified). 621 * Note that loopback interface is always okay. 622 * (this may happen when we are sending a packet to one of 623 * our own addresses.) 624 */ 625 if (ifp && opts && opts->ip6po_pktinfo && 626 opts->ip6po_pktinfo->ipi6_ifindex) { 627 if (!(ifp->if_flags & IFF_LOOPBACK) && 628 ifp->if_index != 629 opts->ip6po_pktinfo->ipi6_ifindex) { 630 error = EHOSTUNREACH; 631 goto done; 632 } 633 } 634 } 635 636 done: 637 if (ifp == NULL && rt == NULL) { 638 /* 639 * This can happen if the caller did not pass a cached route 640 * nor any other hints. We treat this case an error. 641 */ 642 error = EHOSTUNREACH; 643 } 644 if (error == EHOSTUNREACH) 645 V_ip6stat.ip6s_noroute++; 646 647 if (retifp != NULL) 648 *retifp = ifp; 649 if (retrt != NULL) 650 *retrt = rt; /* rt may be NULL */ 651 652 return (error); 653 } 654 655 static int 656 in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, 657 struct ip6_moptions *mopts, struct route_in6 *ro, struct ifnet **retifp) 658 { 659 int error; 660 struct route_in6 sro; 661 struct rtentry *rt = NULL; 662 663 if (ro == NULL) { 664 bzero(&sro, sizeof(sro)); 665 ro = &sro; 666 } 667 668 if ((error = selectroute(dstsock, opts, mopts, ro, retifp, 669 &rt, 0, 1)) != 0) { 670 if (ro == &sro && rt && rt == sro.ro_rt) 671 RTFREE(rt); 672 return (error); 673 } 674 675 /* 676 * do not use a rejected or black hole route. 677 * XXX: this check should be done in the L2 output routine. 678 * However, if we skipped this check here, we'd see the following 679 * scenario: 680 * - install a rejected route for a scoped address prefix 681 * (like fe80::/10) 682 * - send a packet to a destination that matches the scoped prefix, 683 * with ambiguity about the scope zone. 684 * - pick the outgoing interface from the route, and disambiguate the 685 * scope zone with the interface. 686 * - ip6_output() would try to get another route with the "new" 687 * destination, which may be valid. 688 * - we'd see no error on output. 689 * Although this may not be very harmful, it should still be confusing. 690 * We thus reject the case here. 691 */ 692 if (rt && (rt->rt_flags & (RTF_REJECT | RTF_BLACKHOLE))) { 693 int flags = (rt->rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); 694 695 if (ro == &sro && rt && rt == sro.ro_rt) 696 RTFREE(rt); 697 return (flags); 698 } 699 700 /* 701 * Adjust the "outgoing" interface. If we're going to loop the packet 702 * back to ourselves, the ifp would be the loopback interface. 703 * However, we'd rather know the interface associated to the 704 * destination address (which should probably be one of our own 705 * addresses.) 706 */ 707 if (rt && rt->rt_ifa && rt->rt_ifa->ifa_ifp) 708 *retifp = rt->rt_ifa->ifa_ifp; 709 710 if (ro == &sro && rt && rt == sro.ro_rt) 711 RTFREE(rt); 712 return (0); 713 } 714 715 /* 716 * clone - meaningful only for bsdi and freebsd 717 */ 718 int 719 in6_selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, 720 struct ip6_moptions *mopts, struct route_in6 *ro, 721 struct ifnet **retifp, struct rtentry **retrt, int clone) 722 { 723 724 return (selectroute(dstsock, opts, mopts, ro, retifp, 725 retrt, clone, 0)); 726 } 727 728 /* 729 * Default hop limit selection. The precedence is as follows: 730 * 1. Hoplimit value specified via ioctl. 731 * 2. (If the outgoing interface is detected) the current 732 * hop limit of the interface specified by router advertisement. 733 * 3. The system default hoplimit. 734 */ 735 int 736 in6_selecthlim(struct in6pcb *in6p, struct ifnet *ifp) 737 { 738 739 if (in6p && in6p->in6p_hops >= 0) 740 return (in6p->in6p_hops); 741 else if (ifp) 742 return (ND_IFINFO(ifp)->chlim); 743 else if (in6p && !IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr)) { 744 struct route_in6 ro6; 745 struct ifnet *lifp; 746 747 bzero(&ro6, sizeof(ro6)); 748 ro6.ro_dst.sin6_family = AF_INET6; 749 ro6.ro_dst.sin6_len = sizeof(struct sockaddr_in6); 750 ro6.ro_dst.sin6_addr = in6p->in6p_faddr; 751 rtalloc((struct route *)&ro6); 752 if (ro6.ro_rt) { 753 lifp = ro6.ro_rt->rt_ifp; 754 RTFREE(ro6.ro_rt); 755 if (lifp) 756 return (ND_IFINFO(lifp)->chlim); 757 } else 758 return (V_ip6_defhlim); 759 } 760 return (V_ip6_defhlim); 761 } 762 763 /* 764 * XXX: this is borrowed from in6_pcbbind(). If possible, we should 765 * share this function by all *bsd*... 766 */ 767 int 768 in6_pcbsetport(struct in6_addr *laddr, struct inpcb *inp, struct ucred *cred) 769 { 770 struct socket *so = inp->inp_socket; 771 u_int16_t lport = 0, first, last, *lastport; 772 int count, error = 0, wild = 0; 773 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; 774 775 INP_INFO_WLOCK_ASSERT(pcbinfo); 776 INP_WLOCK_ASSERT(inp); 777 778 /* XXX: this is redundant when called from in6_pcbbind */ 779 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0) 780 wild = INPLOOKUP_WILDCARD; 781 782 inp->inp_flags |= INP_ANONPORT; 783 784 if (inp->inp_flags & INP_HIGHPORT) { 785 first = V_ipport_hifirstauto; /* sysctl */ 786 last = V_ipport_hilastauto; 787 lastport = &pcbinfo->ipi_lasthi; 788 } else if (inp->inp_flags & INP_LOWPORT) { 789 error = priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT, 0); 790 if (error) 791 return error; 792 first = V_ipport_lowfirstauto; /* 1023 */ 793 last = V_ipport_lowlastauto; /* 600 */ 794 lastport = &pcbinfo->ipi_lastlow; 795 } else { 796 first = V_ipport_firstauto; /* sysctl */ 797 last = V_ipport_lastauto; 798 lastport = &pcbinfo->ipi_lastport; 799 } 800 /* 801 * Simple check to ensure all ports are not used up causing 802 * a deadlock here. 803 * 804 * We split the two cases (up and down) so that the direction 805 * is not being tested on each round of the loop. 806 */ 807 if (first > last) { 808 /* 809 * counting down 810 */ 811 count = first - last; 812 813 do { 814 if (count-- < 0) { /* completely used? */ 815 /* 816 * Undo any address bind that may have 817 * occurred above. 818 */ 819 inp->in6p_laddr = in6addr_any; 820 return (EAGAIN); 821 } 822 --*lastport; 823 if (*lastport > first || *lastport < last) 824 *lastport = first; 825 lport = htons(*lastport); 826 } while (in6_pcblookup_local(pcbinfo, &inp->in6p_laddr, 827 lport, wild, cred)); 828 } else { 829 /* 830 * counting up 831 */ 832 count = last - first; 833 834 do { 835 if (count-- < 0) { /* completely used? */ 836 /* 837 * Undo any address bind that may have 838 * occurred above. 839 */ 840 inp->in6p_laddr = in6addr_any; 841 return (EAGAIN); 842 } 843 ++*lastport; 844 if (*lastport < first || *lastport > last) 845 *lastport = first; 846 lport = htons(*lastport); 847 } while (in6_pcblookup_local(pcbinfo, &inp->in6p_laddr, 848 lport, wild, cred)); 849 } 850 851 inp->inp_lport = lport; 852 if (in_pcbinshash(inp) != 0) { 853 inp->in6p_laddr = in6addr_any; 854 inp->inp_lport = 0; 855 return (EAGAIN); 856 } 857 858 return (0); 859 } 860 861 void 862 addrsel_policy_init(void) 863 { 864 ADDRSEL_LOCK_INIT(); 865 ADDRSEL_SXLOCK_INIT(); 866 867 init_policy_queue(); 868 869 /* initialize the "last resort" policy */ 870 bzero(&V_defaultaddrpolicy, sizeof(V_defaultaddrpolicy)); 871 V_defaultaddrpolicy.label = ADDR_LABEL_NOTAPP; 872 } 873 874 static struct in6_addrpolicy * 875 lookup_addrsel_policy(struct sockaddr_in6 *key) 876 { 877 struct in6_addrpolicy *match = NULL; 878 879 ADDRSEL_LOCK(); 880 match = match_addrsel_policy(key); 881 882 if (match == NULL) 883 match = &V_defaultaddrpolicy; 884 else 885 match->use++; 886 ADDRSEL_UNLOCK(); 887 888 return (match); 889 } 890 891 /* 892 * Subroutines to manage the address selection policy table via sysctl. 893 */ 894 struct walkarg { 895 struct sysctl_req *w_req; 896 }; 897 898 static int in6_src_sysctl(SYSCTL_HANDLER_ARGS); 899 SYSCTL_DECL(_net_inet6_ip6); 900 SYSCTL_NODE(_net_inet6_ip6, IPV6CTL_ADDRCTLPOLICY, addrctlpolicy, 901 CTLFLAG_RD, in6_src_sysctl, ""); 902 903 static int 904 in6_src_sysctl(SYSCTL_HANDLER_ARGS) 905 { 906 struct walkarg w; 907 908 if (req->newptr) 909 return EPERM; 910 911 bzero(&w, sizeof(w)); 912 w.w_req = req; 913 914 return (walk_addrsel_policy(dump_addrsel_policyent, &w)); 915 } 916 917 int 918 in6_src_ioctl(u_long cmd, caddr_t data) 919 { 920 int i; 921 struct in6_addrpolicy ent0; 922 923 if (cmd != SIOCAADDRCTL_POLICY && cmd != SIOCDADDRCTL_POLICY) 924 return (EOPNOTSUPP); /* check for safety */ 925 926 ent0 = *(struct in6_addrpolicy *)data; 927 928 if (ent0.label == ADDR_LABEL_NOTAPP) 929 return (EINVAL); 930 /* check if the prefix mask is consecutive. */ 931 if (in6_mask2len(&ent0.addrmask.sin6_addr, NULL) < 0) 932 return (EINVAL); 933 /* clear trailing garbages (if any) of the prefix address. */ 934 for (i = 0; i < 4; i++) { 935 ent0.addr.sin6_addr.s6_addr32[i] &= 936 ent0.addrmask.sin6_addr.s6_addr32[i]; 937 } 938 ent0.use = 0; 939 940 switch (cmd) { 941 case SIOCAADDRCTL_POLICY: 942 return (add_addrsel_policyent(&ent0)); 943 case SIOCDADDRCTL_POLICY: 944 return (delete_addrsel_policyent(&ent0)); 945 } 946 947 return (0); /* XXX: compromise compilers */ 948 } 949 950 /* 951 * The followings are implementation of the policy table using a 952 * simple tail queue. 953 * XXX such details should be hidden. 954 * XXX implementation using binary tree should be more efficient. 955 */ 956 struct addrsel_policyent { 957 TAILQ_ENTRY(addrsel_policyent) ape_entry; 958 struct in6_addrpolicy ape_policy; 959 }; 960 961 TAILQ_HEAD(addrsel_policyhead, addrsel_policyent); 962 963 struct addrsel_policyhead addrsel_policytab; 964 965 static void 966 init_policy_queue(void) 967 { 968 969 TAILQ_INIT(&V_addrsel_policytab); 970 } 971 972 static int 973 add_addrsel_policyent(struct in6_addrpolicy *newpolicy) 974 { 975 struct addrsel_policyent *new, *pol; 976 977 MALLOC(new, struct addrsel_policyent *, sizeof(*new), M_IFADDR, 978 M_WAITOK); 979 ADDRSEL_XLOCK(); 980 ADDRSEL_LOCK(); 981 982 /* duplication check */ 983 TAILQ_FOREACH(pol, &V_addrsel_policytab, ape_entry) { 984 if (IN6_ARE_ADDR_EQUAL(&newpolicy->addr.sin6_addr, 985 &pol->ape_policy.addr.sin6_addr) && 986 IN6_ARE_ADDR_EQUAL(&newpolicy->addrmask.sin6_addr, 987 &pol->ape_policy.addrmask.sin6_addr)) { 988 ADDRSEL_UNLOCK(); 989 ADDRSEL_XUNLOCK(); 990 FREE(new, M_IFADDR); 991 return (EEXIST); /* or override it? */ 992 } 993 } 994 995 bzero(new, sizeof(*new)); 996 997 /* XXX: should validate entry */ 998 new->ape_policy = *newpolicy; 999 1000 TAILQ_INSERT_TAIL(&V_addrsel_policytab, new, ape_entry); 1001 ADDRSEL_UNLOCK(); 1002 ADDRSEL_XUNLOCK(); 1003 1004 return (0); 1005 } 1006 1007 static int 1008 delete_addrsel_policyent(struct in6_addrpolicy *key) 1009 { 1010 struct addrsel_policyent *pol; 1011 1012 ADDRSEL_XLOCK(); 1013 ADDRSEL_LOCK(); 1014 1015 /* search for the entry in the table */ 1016 TAILQ_FOREACH(pol, &V_addrsel_policytab, ape_entry) { 1017 if (IN6_ARE_ADDR_EQUAL(&key->addr.sin6_addr, 1018 &pol->ape_policy.addr.sin6_addr) && 1019 IN6_ARE_ADDR_EQUAL(&key->addrmask.sin6_addr, 1020 &pol->ape_policy.addrmask.sin6_addr)) { 1021 break; 1022 } 1023 } 1024 if (pol == NULL) { 1025 ADDRSEL_UNLOCK(); 1026 ADDRSEL_XUNLOCK(); 1027 return (ESRCH); 1028 } 1029 1030 TAILQ_REMOVE(&V_addrsel_policytab, pol, ape_entry); 1031 ADDRSEL_UNLOCK(); 1032 ADDRSEL_XUNLOCK(); 1033 1034 return (0); 1035 } 1036 1037 static int 1038 walk_addrsel_policy(int (*callback)(struct in6_addrpolicy *, void *), 1039 void *w) 1040 { 1041 struct addrsel_policyent *pol; 1042 int error = 0; 1043 1044 ADDRSEL_SLOCK(); 1045 TAILQ_FOREACH(pol, &V_addrsel_policytab, ape_entry) { 1046 if ((error = (*callback)(&pol->ape_policy, w)) != 0) { 1047 ADDRSEL_SUNLOCK(); 1048 return (error); 1049 } 1050 } 1051 ADDRSEL_SUNLOCK(); 1052 return (error); 1053 } 1054 1055 static int 1056 dump_addrsel_policyent(struct in6_addrpolicy *pol, void *arg) 1057 { 1058 int error = 0; 1059 struct walkarg *w = arg; 1060 1061 error = SYSCTL_OUT(w->w_req, pol, sizeof(*pol)); 1062 1063 return (error); 1064 } 1065 1066 static struct in6_addrpolicy * 1067 match_addrsel_policy(struct sockaddr_in6 *key) 1068 { 1069 struct addrsel_policyent *pent; 1070 struct in6_addrpolicy *bestpol = NULL, *pol; 1071 int matchlen, bestmatchlen = -1; 1072 u_char *mp, *ep, *k, *p, m; 1073 1074 TAILQ_FOREACH(pent, &V_addrsel_policytab, ape_entry) { 1075 matchlen = 0; 1076 1077 pol = &pent->ape_policy; 1078 mp = (u_char *)&pol->addrmask.sin6_addr; 1079 ep = mp + 16; /* XXX: scope field? */ 1080 k = (u_char *)&key->sin6_addr; 1081 p = (u_char *)&pol->addr.sin6_addr; 1082 for (; mp < ep && *mp; mp++, k++, p++) { 1083 m = *mp; 1084 if ((*k & m) != *p) 1085 goto next; /* not match */ 1086 if (m == 0xff) /* short cut for a typical case */ 1087 matchlen += 8; 1088 else { 1089 while (m >= 0x80) { 1090 matchlen++; 1091 m <<= 1; 1092 } 1093 } 1094 } 1095 1096 /* matched. check if this is better than the current best. */ 1097 if (bestpol == NULL || 1098 matchlen > bestmatchlen) { 1099 bestpol = pol; 1100 bestmatchlen = matchlen; 1101 } 1102 1103 next: 1104 continue; 1105 } 1106 1107 return (bestpol); 1108 } 1109