1 /*- 2 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of the project nor the names of its contributors 14 * may be used to endorse or promote products derived from this software 15 * without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 * 29 * $KAME: nd6.c,v 1.144 2001/05/24 07:44:00 itojun Exp $ 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include "opt_inet.h" 36 #include "opt_inet6.h" 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/callout.h> 41 #include <sys/malloc.h> 42 #include <sys/mbuf.h> 43 #include <sys/socket.h> 44 #include <sys/sockio.h> 45 #include <sys/time.h> 46 #include <sys/kernel.h> 47 #include <sys/protosw.h> 48 #include <sys/errno.h> 49 #include <sys/syslog.h> 50 #include <sys/lock.h> 51 #include <sys/rwlock.h> 52 #include <sys/queue.h> 53 #include <sys/sdt.h> 54 #include <sys/sysctl.h> 55 56 #include <net/if.h> 57 #include <net/if_var.h> 58 #include <net/if_arc.h> 59 #include <net/if_dl.h> 60 #include <net/if_types.h> 61 #include <net/iso88025.h> 62 #include <net/fddi.h> 63 #include <net/route.h> 64 #include <net/vnet.h> 65 66 #include <netinet/in.h> 67 #include <netinet/in_kdtrace.h> 68 #include <net/if_llatbl.h> 69 #define L3_ADDR_SIN6(le) ((struct sockaddr_in6 *) L3_ADDR(le)) 70 #include <netinet/if_ether.h> 71 #include <netinet6/in6_var.h> 72 #include <netinet/ip6.h> 73 #include <netinet6/ip6_var.h> 74 #include <netinet6/scope6_var.h> 75 #include <netinet6/nd6.h> 76 #include <netinet6/in6_ifattach.h> 77 #include <netinet/icmp6.h> 78 #include <netinet6/send.h> 79 80 #include <sys/limits.h> 81 82 #include <security/mac/mac_framework.h> 83 84 #define ND6_SLOWTIMER_INTERVAL (60 * 60) /* 1 hour */ 85 #define ND6_RECALC_REACHTM_INTERVAL (60 * 120) /* 2 hours */ 86 87 #define SIN6(s) ((const struct sockaddr_in6 *)(s)) 88 89 /* timer values */ 90 VNET_DEFINE(int, nd6_prune) = 1; /* walk list every 1 seconds */ 91 VNET_DEFINE(int, nd6_delay) = 5; /* delay first probe time 5 second */ 92 VNET_DEFINE(int, nd6_umaxtries) = 3; /* maximum unicast query */ 93 VNET_DEFINE(int, nd6_mmaxtries) = 3; /* maximum multicast query */ 94 VNET_DEFINE(int, nd6_useloopback) = 1; /* use loopback interface for 95 * local traffic */ 96 VNET_DEFINE(int, nd6_gctimer) = (60 * 60 * 24); /* 1 day: garbage 97 * collection timer */ 98 99 /* preventing too many loops in ND option parsing */ 100 static VNET_DEFINE(int, nd6_maxndopt) = 10; /* max # of ND options allowed */ 101 102 VNET_DEFINE(int, nd6_maxnudhint) = 0; /* max # of subsequent upper 103 * layer hints */ 104 static VNET_DEFINE(int, nd6_maxqueuelen) = 1; /* max pkts cached in unresolved 105 * ND entries */ 106 #define V_nd6_maxndopt VNET(nd6_maxndopt) 107 #define V_nd6_maxqueuelen VNET(nd6_maxqueuelen) 108 109 #ifdef ND6_DEBUG 110 VNET_DEFINE(int, nd6_debug) = 1; 111 #else 112 VNET_DEFINE(int, nd6_debug) = 0; 113 #endif 114 115 /* for debugging? */ 116 #if 0 117 static int nd6_inuse, nd6_allocated; 118 #endif 119 120 VNET_DEFINE(struct nd_drhead, nd_defrouter); 121 VNET_DEFINE(struct nd_prhead, nd_prefix); 122 123 VNET_DEFINE(int, nd6_recalc_reachtm_interval) = ND6_RECALC_REACHTM_INTERVAL; 124 #define V_nd6_recalc_reachtm_interval VNET(nd6_recalc_reachtm_interval) 125 126 int (*send_sendso_input_hook)(struct mbuf *, struct ifnet *, int, int); 127 128 static int nd6_is_new_addr_neighbor(struct sockaddr_in6 *, 129 struct ifnet *); 130 static void nd6_setmtu0(struct ifnet *, struct nd_ifinfo *); 131 static void nd6_slowtimo(void *); 132 static int regen_tmpaddr(struct in6_ifaddr *); 133 static struct llentry *nd6_free(struct llentry *, int); 134 static void nd6_llinfo_timer(void *); 135 static void clear_llinfo_pqueue(struct llentry *); 136 static void nd6_rtrequest(int, struct rtentry *, struct rt_addrinfo *); 137 138 static VNET_DEFINE(struct callout, nd6_slowtimo_ch); 139 #define V_nd6_slowtimo_ch VNET(nd6_slowtimo_ch) 140 141 VNET_DEFINE(struct callout, nd6_timer_ch); 142 143 void 144 nd6_init(void) 145 { 146 147 LIST_INIT(&V_nd_prefix); 148 149 /* initialization of the default router list */ 150 TAILQ_INIT(&V_nd_defrouter); 151 152 /* start timer */ 153 callout_init(&V_nd6_slowtimo_ch, 0); 154 callout_reset(&V_nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz, 155 nd6_slowtimo, curvnet); 156 157 nd6_dad_init(); 158 } 159 160 #ifdef VIMAGE 161 void 162 nd6_destroy() 163 { 164 165 callout_drain(&V_nd6_slowtimo_ch); 166 callout_drain(&V_nd6_timer_ch); 167 } 168 #endif 169 170 struct nd_ifinfo * 171 nd6_ifattach(struct ifnet *ifp) 172 { 173 struct nd_ifinfo *nd; 174 175 nd = (struct nd_ifinfo *)malloc(sizeof(*nd), M_IP6NDP, M_WAITOK|M_ZERO); 176 nd->initialized = 1; 177 178 nd->chlim = IPV6_DEFHLIM; 179 nd->basereachable = REACHABLE_TIME; 180 nd->reachable = ND_COMPUTE_RTIME(nd->basereachable); 181 nd->retrans = RETRANS_TIMER; 182 183 nd->flags = ND6_IFF_PERFORMNUD; 184 185 /* A loopback interface always has ND6_IFF_AUTO_LINKLOCAL. 186 * XXXHRS: Clear ND6_IFF_AUTO_LINKLOCAL on an IFT_BRIDGE interface by 187 * default regardless of the V_ip6_auto_linklocal configuration to 188 * give a reasonable default behavior. 189 */ 190 if ((V_ip6_auto_linklocal && ifp->if_type != IFT_BRIDGE) || 191 (ifp->if_flags & IFF_LOOPBACK)) 192 nd->flags |= ND6_IFF_AUTO_LINKLOCAL; 193 /* 194 * A loopback interface does not need to accept RTADV. 195 * XXXHRS: Clear ND6_IFF_ACCEPT_RTADV on an IFT_BRIDGE interface by 196 * default regardless of the V_ip6_accept_rtadv configuration to 197 * prevent the interface from accepting RA messages arrived 198 * on one of the member interfaces with ND6_IFF_ACCEPT_RTADV. 199 */ 200 if (V_ip6_accept_rtadv && 201 !(ifp->if_flags & IFF_LOOPBACK) && 202 (ifp->if_type != IFT_BRIDGE)) 203 nd->flags |= ND6_IFF_ACCEPT_RTADV; 204 if (V_ip6_no_radr && !(ifp->if_flags & IFF_LOOPBACK)) 205 nd->flags |= ND6_IFF_NO_RADR; 206 207 /* XXX: we cannot call nd6_setmtu since ifp is not fully initialized */ 208 nd6_setmtu0(ifp, nd); 209 210 return nd; 211 } 212 213 void 214 nd6_ifdetach(struct nd_ifinfo *nd) 215 { 216 217 free(nd, M_IP6NDP); 218 } 219 220 /* 221 * Reset ND level link MTU. This function is called when the physical MTU 222 * changes, which means we might have to adjust the ND level MTU. 223 */ 224 void 225 nd6_setmtu(struct ifnet *ifp) 226 { 227 228 nd6_setmtu0(ifp, ND_IFINFO(ifp)); 229 } 230 231 /* XXX todo: do not maintain copy of ifp->if_mtu in ndi->maxmtu */ 232 void 233 nd6_setmtu0(struct ifnet *ifp, struct nd_ifinfo *ndi) 234 { 235 u_int32_t omaxmtu; 236 237 omaxmtu = ndi->maxmtu; 238 239 switch (ifp->if_type) { 240 case IFT_ARCNET: 241 ndi->maxmtu = MIN(ARC_PHDS_MAXMTU, ifp->if_mtu); /* RFC2497 */ 242 break; 243 case IFT_FDDI: 244 ndi->maxmtu = MIN(FDDIIPMTU, ifp->if_mtu); /* RFC2467 */ 245 break; 246 case IFT_ISO88025: 247 ndi->maxmtu = MIN(ISO88025_MAX_MTU, ifp->if_mtu); 248 break; 249 default: 250 ndi->maxmtu = ifp->if_mtu; 251 break; 252 } 253 254 /* 255 * Decreasing the interface MTU under IPV6 minimum MTU may cause 256 * undesirable situation. We thus notify the operator of the change 257 * explicitly. The check for omaxmtu is necessary to restrict the 258 * log to the case of changing the MTU, not initializing it. 259 */ 260 if (omaxmtu >= IPV6_MMTU && ndi->maxmtu < IPV6_MMTU) { 261 log(LOG_NOTICE, "nd6_setmtu0: " 262 "new link MTU on %s (%lu) is too small for IPv6\n", 263 if_name(ifp), (unsigned long)ndi->maxmtu); 264 } 265 266 if (ndi->maxmtu > V_in6_maxmtu) 267 in6_setmaxmtu(); /* check all interfaces just in case */ 268 269 } 270 271 void 272 nd6_option_init(void *opt, int icmp6len, union nd_opts *ndopts) 273 { 274 275 bzero(ndopts, sizeof(*ndopts)); 276 ndopts->nd_opts_search = (struct nd_opt_hdr *)opt; 277 ndopts->nd_opts_last 278 = (struct nd_opt_hdr *)(((u_char *)opt) + icmp6len); 279 280 if (icmp6len == 0) { 281 ndopts->nd_opts_done = 1; 282 ndopts->nd_opts_search = NULL; 283 } 284 } 285 286 /* 287 * Take one ND option. 288 */ 289 struct nd_opt_hdr * 290 nd6_option(union nd_opts *ndopts) 291 { 292 struct nd_opt_hdr *nd_opt; 293 int olen; 294 295 KASSERT(ndopts != NULL, ("%s: ndopts == NULL", __func__)); 296 KASSERT(ndopts->nd_opts_last != NULL, ("%s: uninitialized ndopts", 297 __func__)); 298 if (ndopts->nd_opts_search == NULL) 299 return NULL; 300 if (ndopts->nd_opts_done) 301 return NULL; 302 303 nd_opt = ndopts->nd_opts_search; 304 305 /* make sure nd_opt_len is inside the buffer */ 306 if ((caddr_t)&nd_opt->nd_opt_len >= (caddr_t)ndopts->nd_opts_last) { 307 bzero(ndopts, sizeof(*ndopts)); 308 return NULL; 309 } 310 311 olen = nd_opt->nd_opt_len << 3; 312 if (olen == 0) { 313 /* 314 * Message validation requires that all included 315 * options have a length that is greater than zero. 316 */ 317 bzero(ndopts, sizeof(*ndopts)); 318 return NULL; 319 } 320 321 ndopts->nd_opts_search = (struct nd_opt_hdr *)((caddr_t)nd_opt + olen); 322 if (ndopts->nd_opts_search > ndopts->nd_opts_last) { 323 /* option overruns the end of buffer, invalid */ 324 bzero(ndopts, sizeof(*ndopts)); 325 return NULL; 326 } else if (ndopts->nd_opts_search == ndopts->nd_opts_last) { 327 /* reached the end of options chain */ 328 ndopts->nd_opts_done = 1; 329 ndopts->nd_opts_search = NULL; 330 } 331 return nd_opt; 332 } 333 334 /* 335 * Parse multiple ND options. 336 * This function is much easier to use, for ND routines that do not need 337 * multiple options of the same type. 338 */ 339 int 340 nd6_options(union nd_opts *ndopts) 341 { 342 struct nd_opt_hdr *nd_opt; 343 int i = 0; 344 345 KASSERT(ndopts != NULL, ("%s: ndopts == NULL", __func__)); 346 KASSERT(ndopts->nd_opts_last != NULL, ("%s: uninitialized ndopts", 347 __func__)); 348 if (ndopts->nd_opts_search == NULL) 349 return 0; 350 351 while (1) { 352 nd_opt = nd6_option(ndopts); 353 if (nd_opt == NULL && ndopts->nd_opts_last == NULL) { 354 /* 355 * Message validation requires that all included 356 * options have a length that is greater than zero. 357 */ 358 ICMP6STAT_INC(icp6s_nd_badopt); 359 bzero(ndopts, sizeof(*ndopts)); 360 return -1; 361 } 362 363 if (nd_opt == NULL) 364 goto skip1; 365 366 switch (nd_opt->nd_opt_type) { 367 case ND_OPT_SOURCE_LINKADDR: 368 case ND_OPT_TARGET_LINKADDR: 369 case ND_OPT_MTU: 370 case ND_OPT_REDIRECTED_HEADER: 371 if (ndopts->nd_opt_array[nd_opt->nd_opt_type]) { 372 nd6log((LOG_INFO, 373 "duplicated ND6 option found (type=%d)\n", 374 nd_opt->nd_opt_type)); 375 /* XXX bark? */ 376 } else { 377 ndopts->nd_opt_array[nd_opt->nd_opt_type] 378 = nd_opt; 379 } 380 break; 381 case ND_OPT_PREFIX_INFORMATION: 382 if (ndopts->nd_opt_array[nd_opt->nd_opt_type] == 0) { 383 ndopts->nd_opt_array[nd_opt->nd_opt_type] 384 = nd_opt; 385 } 386 ndopts->nd_opts_pi_end = 387 (struct nd_opt_prefix_info *)nd_opt; 388 break; 389 /* What about ND_OPT_ROUTE_INFO? RFC 4191 */ 390 case ND_OPT_RDNSS: /* RFC 6106 */ 391 case ND_OPT_DNSSL: /* RFC 6106 */ 392 /* 393 * Silently ignore options we know and do not care about 394 * in the kernel. 395 */ 396 break; 397 default: 398 /* 399 * Unknown options must be silently ignored, 400 * to accomodate future extension to the protocol. 401 */ 402 nd6log((LOG_DEBUG, 403 "nd6_options: unsupported option %d - " 404 "option ignored\n", nd_opt->nd_opt_type)); 405 } 406 407 skip1: 408 i++; 409 if (i > V_nd6_maxndopt) { 410 ICMP6STAT_INC(icp6s_nd_toomanyopt); 411 nd6log((LOG_INFO, "too many loop in nd opt\n")); 412 break; 413 } 414 415 if (ndopts->nd_opts_done) 416 break; 417 } 418 419 return 0; 420 } 421 422 /* 423 * ND6 timer routine to handle ND6 entries 424 */ 425 void 426 nd6_llinfo_settimer_locked(struct llentry *ln, long tick) 427 { 428 int canceled; 429 430 LLE_WLOCK_ASSERT(ln); 431 432 if (tick < 0) { 433 ln->la_expire = 0; 434 ln->ln_ntick = 0; 435 canceled = callout_stop(&ln->ln_timer_ch); 436 } else { 437 ln->la_expire = time_uptime + tick / hz; 438 LLE_ADDREF(ln); 439 if (tick > INT_MAX) { 440 ln->ln_ntick = tick - INT_MAX; 441 canceled = callout_reset(&ln->ln_timer_ch, INT_MAX, 442 nd6_llinfo_timer, ln); 443 } else { 444 ln->ln_ntick = 0; 445 canceled = callout_reset(&ln->ln_timer_ch, tick, 446 nd6_llinfo_timer, ln); 447 } 448 } 449 if (canceled) 450 LLE_REMREF(ln); 451 } 452 453 void 454 nd6_llinfo_settimer(struct llentry *ln, long tick) 455 { 456 457 LLE_WLOCK(ln); 458 nd6_llinfo_settimer_locked(ln, tick); 459 LLE_WUNLOCK(ln); 460 } 461 462 static void 463 nd6_llinfo_timer(void *arg) 464 { 465 struct llentry *ln; 466 struct in6_addr *dst; 467 struct ifnet *ifp; 468 struct nd_ifinfo *ndi = NULL; 469 470 KASSERT(arg != NULL, ("%s: arg NULL", __func__)); 471 ln = (struct llentry *)arg; 472 LLE_WLOCK_ASSERT(ln); 473 ifp = ln->lle_tbl->llt_ifp; 474 475 CURVNET_SET(ifp->if_vnet); 476 477 if (ln->ln_ntick > 0) { 478 if (ln->ln_ntick > INT_MAX) { 479 ln->ln_ntick -= INT_MAX; 480 nd6_llinfo_settimer_locked(ln, INT_MAX); 481 } else { 482 ln->ln_ntick = 0; 483 nd6_llinfo_settimer_locked(ln, ln->ln_ntick); 484 } 485 goto done; 486 } 487 488 ndi = ND_IFINFO(ifp); 489 dst = &L3_ADDR_SIN6(ln)->sin6_addr; 490 if (ln->la_flags & LLE_STATIC) { 491 goto done; 492 } 493 494 if (ln->la_flags & LLE_DELETED) { 495 (void)nd6_free(ln, 0); 496 ln = NULL; 497 goto done; 498 } 499 500 switch (ln->ln_state) { 501 case ND6_LLINFO_INCOMPLETE: 502 if (ln->la_asked < V_nd6_mmaxtries) { 503 ln->la_asked++; 504 nd6_llinfo_settimer_locked(ln, (long)ndi->retrans * hz / 1000); 505 LLE_WUNLOCK(ln); 506 nd6_ns_output(ifp, NULL, dst, ln, 0); 507 LLE_WLOCK(ln); 508 } else { 509 struct mbuf *m = ln->la_hold; 510 if (m) { 511 struct mbuf *m0; 512 513 /* 514 * assuming every packet in la_hold has the 515 * same IP header. Send error after unlock. 516 */ 517 m0 = m->m_nextpkt; 518 m->m_nextpkt = NULL; 519 ln->la_hold = m0; 520 clear_llinfo_pqueue(ln); 521 } 522 EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_TIMEDOUT); 523 (void)nd6_free(ln, 0); 524 ln = NULL; 525 if (m != NULL) 526 icmp6_error2(m, ICMP6_DST_UNREACH, 527 ICMP6_DST_UNREACH_ADDR, 0, ifp); 528 } 529 break; 530 case ND6_LLINFO_REACHABLE: 531 if (!ND6_LLINFO_PERMANENT(ln)) { 532 ln->ln_state = ND6_LLINFO_STALE; 533 nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz); 534 } 535 break; 536 537 case ND6_LLINFO_STALE: 538 /* Garbage Collection(RFC 2461 5.3) */ 539 if (!ND6_LLINFO_PERMANENT(ln)) { 540 EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_EXPIRED); 541 (void)nd6_free(ln, 1); 542 ln = NULL; 543 } 544 break; 545 546 case ND6_LLINFO_DELAY: 547 if (ndi && (ndi->flags & ND6_IFF_PERFORMNUD) != 0) { 548 /* We need NUD */ 549 ln->la_asked = 1; 550 ln->ln_state = ND6_LLINFO_PROBE; 551 nd6_llinfo_settimer_locked(ln, (long)ndi->retrans * hz / 1000); 552 LLE_WUNLOCK(ln); 553 nd6_ns_output(ifp, dst, dst, ln, 0); 554 LLE_WLOCK(ln); 555 } else { 556 ln->ln_state = ND6_LLINFO_STALE; /* XXX */ 557 nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz); 558 } 559 break; 560 case ND6_LLINFO_PROBE: 561 if (ln->la_asked < V_nd6_umaxtries) { 562 ln->la_asked++; 563 nd6_llinfo_settimer_locked(ln, (long)ndi->retrans * hz / 1000); 564 LLE_WUNLOCK(ln); 565 nd6_ns_output(ifp, dst, dst, ln, 0); 566 LLE_WLOCK(ln); 567 } else { 568 EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_EXPIRED); 569 (void)nd6_free(ln, 0); 570 ln = NULL; 571 } 572 break; 573 default: 574 panic("%s: paths in a dark night can be confusing: %d", 575 __func__, ln->ln_state); 576 } 577 done: 578 if (ln != NULL) 579 LLE_FREE_LOCKED(ln); 580 CURVNET_RESTORE(); 581 } 582 583 584 /* 585 * ND6 timer routine to expire default route list and prefix list 586 */ 587 void 588 nd6_timer(void *arg) 589 { 590 CURVNET_SET((struct vnet *) arg); 591 struct nd_defrouter *dr, *ndr; 592 struct nd_prefix *pr, *npr; 593 struct in6_ifaddr *ia6, *nia6; 594 595 callout_reset(&V_nd6_timer_ch, V_nd6_prune * hz, 596 nd6_timer, curvnet); 597 598 /* expire default router list */ 599 TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) { 600 if (dr->expire && dr->expire < time_uptime) 601 defrtrlist_del(dr); 602 } 603 604 /* 605 * expire interface addresses. 606 * in the past the loop was inside prefix expiry processing. 607 * However, from a stricter speci-confrmance standpoint, we should 608 * rather separate address lifetimes and prefix lifetimes. 609 * 610 * XXXRW: in6_ifaddrhead locking. 611 */ 612 addrloop: 613 TAILQ_FOREACH_SAFE(ia6, &V_in6_ifaddrhead, ia_link, nia6) { 614 /* check address lifetime */ 615 if (IFA6_IS_INVALID(ia6)) { 616 int regen = 0; 617 618 /* 619 * If the expiring address is temporary, try 620 * regenerating a new one. This would be useful when 621 * we suspended a laptop PC, then turned it on after a 622 * period that could invalidate all temporary 623 * addresses. Although we may have to restart the 624 * loop (see below), it must be after purging the 625 * address. Otherwise, we'd see an infinite loop of 626 * regeneration. 627 */ 628 if (V_ip6_use_tempaddr && 629 (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0) { 630 if (regen_tmpaddr(ia6) == 0) 631 regen = 1; 632 } 633 634 in6_purgeaddr(&ia6->ia_ifa); 635 636 if (regen) 637 goto addrloop; /* XXX: see below */ 638 } else if (IFA6_IS_DEPRECATED(ia6)) { 639 int oldflags = ia6->ia6_flags; 640 641 ia6->ia6_flags |= IN6_IFF_DEPRECATED; 642 643 /* 644 * If a temporary address has just become deprecated, 645 * regenerate a new one if possible. 646 */ 647 if (V_ip6_use_tempaddr && 648 (ia6->ia6_flags & IN6_IFF_TEMPORARY) != 0 && 649 (oldflags & IN6_IFF_DEPRECATED) == 0) { 650 651 if (regen_tmpaddr(ia6) == 0) { 652 /* 653 * A new temporary address is 654 * generated. 655 * XXX: this means the address chain 656 * has changed while we are still in 657 * the loop. Although the change 658 * would not cause disaster (because 659 * it's not a deletion, but an 660 * addition,) we'd rather restart the 661 * loop just for safety. Or does this 662 * significantly reduce performance?? 663 */ 664 goto addrloop; 665 } 666 } 667 } else { 668 /* 669 * A new RA might have made a deprecated address 670 * preferred. 671 */ 672 ia6->ia6_flags &= ~IN6_IFF_DEPRECATED; 673 } 674 } 675 676 /* expire prefix list */ 677 LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, npr) { 678 /* 679 * check prefix lifetime. 680 * since pltime is just for autoconf, pltime processing for 681 * prefix is not necessary. 682 */ 683 if (pr->ndpr_vltime != ND6_INFINITE_LIFETIME && 684 time_uptime - pr->ndpr_lastupdate > pr->ndpr_vltime) { 685 686 /* 687 * address expiration and prefix expiration are 688 * separate. NEVER perform in6_purgeaddr here. 689 */ 690 prelist_remove(pr); 691 } 692 } 693 CURVNET_RESTORE(); 694 } 695 696 /* 697 * ia6 - deprecated/invalidated temporary address 698 */ 699 static int 700 regen_tmpaddr(struct in6_ifaddr *ia6) 701 { 702 struct ifaddr *ifa; 703 struct ifnet *ifp; 704 struct in6_ifaddr *public_ifa6 = NULL; 705 706 ifp = ia6->ia_ifa.ifa_ifp; 707 IF_ADDR_RLOCK(ifp); 708 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 709 struct in6_ifaddr *it6; 710 711 if (ifa->ifa_addr->sa_family != AF_INET6) 712 continue; 713 714 it6 = (struct in6_ifaddr *)ifa; 715 716 /* ignore no autoconf addresses. */ 717 if ((it6->ia6_flags & IN6_IFF_AUTOCONF) == 0) 718 continue; 719 720 /* ignore autoconf addresses with different prefixes. */ 721 if (it6->ia6_ndpr == NULL || it6->ia6_ndpr != ia6->ia6_ndpr) 722 continue; 723 724 /* 725 * Now we are looking at an autoconf address with the same 726 * prefix as ours. If the address is temporary and is still 727 * preferred, do not create another one. It would be rare, but 728 * could happen, for example, when we resume a laptop PC after 729 * a long period. 730 */ 731 if ((it6->ia6_flags & IN6_IFF_TEMPORARY) != 0 && 732 !IFA6_IS_DEPRECATED(it6)) { 733 public_ifa6 = NULL; 734 break; 735 } 736 737 /* 738 * This is a public autoconf address that has the same prefix 739 * as ours. If it is preferred, keep it. We can't break the 740 * loop here, because there may be a still-preferred temporary 741 * address with the prefix. 742 */ 743 if (!IFA6_IS_DEPRECATED(it6)) 744 public_ifa6 = it6; 745 746 if (public_ifa6 != NULL) 747 ifa_ref(&public_ifa6->ia_ifa); 748 } 749 IF_ADDR_RUNLOCK(ifp); 750 751 if (public_ifa6 != NULL) { 752 int e; 753 754 if ((e = in6_tmpifadd(public_ifa6, 0, 0)) != 0) { 755 ifa_free(&public_ifa6->ia_ifa); 756 log(LOG_NOTICE, "regen_tmpaddr: failed to create a new" 757 " tmp addr,errno=%d\n", e); 758 return (-1); 759 } 760 ifa_free(&public_ifa6->ia_ifa); 761 return (0); 762 } 763 764 return (-1); 765 } 766 767 /* 768 * Nuke neighbor cache/prefix/default router management table, right before 769 * ifp goes away. 770 */ 771 void 772 nd6_purge(struct ifnet *ifp) 773 { 774 struct nd_defrouter *dr, *ndr; 775 struct nd_prefix *pr, *npr; 776 777 /* 778 * Nuke default router list entries toward ifp. 779 * We defer removal of default router list entries that is installed 780 * in the routing table, in order to keep additional side effects as 781 * small as possible. 782 */ 783 TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) { 784 if (dr->installed) 785 continue; 786 787 if (dr->ifp == ifp) 788 defrtrlist_del(dr); 789 } 790 791 TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, ndr) { 792 if (!dr->installed) 793 continue; 794 795 if (dr->ifp == ifp) 796 defrtrlist_del(dr); 797 } 798 799 /* Nuke prefix list entries toward ifp */ 800 LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, npr) { 801 if (pr->ndpr_ifp == ifp) { 802 /* 803 * Because if_detach() does *not* release prefixes 804 * while purging addresses the reference count will 805 * still be above zero. We therefore reset it to 806 * make sure that the prefix really gets purged. 807 */ 808 pr->ndpr_refcnt = 0; 809 810 /* 811 * Previously, pr->ndpr_addr is removed as well, 812 * but I strongly believe we don't have to do it. 813 * nd6_purge() is only called from in6_ifdetach(), 814 * which removes all the associated interface addresses 815 * by itself. 816 * (jinmei@kame.net 20010129) 817 */ 818 prelist_remove(pr); 819 } 820 } 821 822 /* cancel default outgoing interface setting */ 823 if (V_nd6_defifindex == ifp->if_index) 824 nd6_setdefaultiface(0); 825 826 if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) { 827 /* Refresh default router list. */ 828 defrouter_select(); 829 } 830 831 /* XXXXX 832 * We do not nuke the neighbor cache entries here any more 833 * because the neighbor cache is kept in if_afdata[AF_INET6]. 834 * nd6_purge() is invoked by in6_ifdetach() which is called 835 * from if_detach() where everything gets purged. So let 836 * in6_domifdetach() do the actual L2 table purging work. 837 */ 838 } 839 840 /* 841 * the caller acquires and releases the lock on the lltbls 842 * Returns the llentry locked 843 */ 844 struct llentry * 845 nd6_lookup(struct in6_addr *addr6, int flags, struct ifnet *ifp) 846 { 847 struct sockaddr_in6 sin6; 848 struct llentry *ln; 849 int llflags; 850 851 bzero(&sin6, sizeof(sin6)); 852 sin6.sin6_len = sizeof(struct sockaddr_in6); 853 sin6.sin6_family = AF_INET6; 854 sin6.sin6_addr = *addr6; 855 856 IF_AFDATA_LOCK_ASSERT(ifp); 857 858 llflags = 0; 859 if (flags & ND6_CREATE) 860 llflags |= LLE_CREATE; 861 if (flags & ND6_EXCLUSIVE) 862 llflags |= LLE_EXCLUSIVE; 863 864 ln = lla_lookup(LLTABLE6(ifp), llflags, (struct sockaddr *)&sin6); 865 if ((ln != NULL) && (llflags & LLE_CREATE)) 866 ln->ln_state = ND6_LLINFO_NOSTATE; 867 868 return (ln); 869 } 870 871 /* 872 * Test whether a given IPv6 address is a neighbor or not, ignoring 873 * the actual neighbor cache. The neighbor cache is ignored in order 874 * to not reenter the routing code from within itself. 875 */ 876 static int 877 nd6_is_new_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp) 878 { 879 struct nd_prefix *pr; 880 struct ifaddr *dstaddr; 881 882 /* 883 * A link-local address is always a neighbor. 884 * XXX: a link does not necessarily specify a single interface. 885 */ 886 if (IN6_IS_ADDR_LINKLOCAL(&addr->sin6_addr)) { 887 struct sockaddr_in6 sin6_copy; 888 u_int32_t zone; 889 890 /* 891 * We need sin6_copy since sa6_recoverscope() may modify the 892 * content (XXX). 893 */ 894 sin6_copy = *addr; 895 if (sa6_recoverscope(&sin6_copy)) 896 return (0); /* XXX: should be impossible */ 897 if (in6_setscope(&sin6_copy.sin6_addr, ifp, &zone)) 898 return (0); 899 if (sin6_copy.sin6_scope_id == zone) 900 return (1); 901 else 902 return (0); 903 } 904 905 /* 906 * If the address matches one of our addresses, 907 * it should be a neighbor. 908 * If the address matches one of our on-link prefixes, it should be a 909 * neighbor. 910 */ 911 LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) { 912 if (pr->ndpr_ifp != ifp) 913 continue; 914 915 if (!(pr->ndpr_stateflags & NDPRF_ONLINK)) { 916 struct rtentry *rt; 917 918 /* Always use the default FIB here. */ 919 rt = in6_rtalloc1((struct sockaddr *)&pr->ndpr_prefix, 920 0, 0, RT_DEFAULT_FIB); 921 if (rt == NULL) 922 continue; 923 /* 924 * This is the case where multiple interfaces 925 * have the same prefix, but only one is installed 926 * into the routing table and that prefix entry 927 * is not the one being examined here. In the case 928 * where RADIX_MPATH is enabled, multiple route 929 * entries (of the same rt_key value) will be 930 * installed because the interface addresses all 931 * differ. 932 */ 933 if (!IN6_ARE_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr, 934 &((struct sockaddr_in6 *)rt_key(rt))->sin6_addr)) { 935 RTFREE_LOCKED(rt); 936 continue; 937 } 938 RTFREE_LOCKED(rt); 939 } 940 941 if (IN6_ARE_MASKED_ADDR_EQUAL(&pr->ndpr_prefix.sin6_addr, 942 &addr->sin6_addr, &pr->ndpr_mask)) 943 return (1); 944 } 945 946 /* 947 * If the address is assigned on the node of the other side of 948 * a p2p interface, the address should be a neighbor. 949 */ 950 dstaddr = ifa_ifwithdstaddr((struct sockaddr *)addr, RT_ALL_FIBS); 951 if (dstaddr != NULL) { 952 if (dstaddr->ifa_ifp == ifp) { 953 ifa_free(dstaddr); 954 return (1); 955 } 956 ifa_free(dstaddr); 957 } 958 959 /* 960 * If the default router list is empty, all addresses are regarded 961 * as on-link, and thus, as a neighbor. 962 */ 963 if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV && 964 TAILQ_EMPTY(&V_nd_defrouter) && 965 V_nd6_defifindex == ifp->if_index) { 966 return (1); 967 } 968 969 return (0); 970 } 971 972 973 /* 974 * Detect if a given IPv6 address identifies a neighbor on a given link. 975 * XXX: should take care of the destination of a p2p link? 976 */ 977 int 978 nd6_is_addr_neighbor(struct sockaddr_in6 *addr, struct ifnet *ifp) 979 { 980 struct llentry *lle; 981 int rc = 0; 982 983 IF_AFDATA_UNLOCK_ASSERT(ifp); 984 if (nd6_is_new_addr_neighbor(addr, ifp)) 985 return (1); 986 987 /* 988 * Even if the address matches none of our addresses, it might be 989 * in the neighbor cache. 990 */ 991 IF_AFDATA_RLOCK(ifp); 992 if ((lle = nd6_lookup(&addr->sin6_addr, 0, ifp)) != NULL) { 993 LLE_RUNLOCK(lle); 994 rc = 1; 995 } 996 IF_AFDATA_RUNLOCK(ifp); 997 return (rc); 998 } 999 1000 /* 1001 * Free an nd6 llinfo entry. 1002 * Since the function would cause significant changes in the kernel, DO NOT 1003 * make it global, unless you have a strong reason for the change, and are sure 1004 * that the change is safe. 1005 */ 1006 static struct llentry * 1007 nd6_free(struct llentry *ln, int gc) 1008 { 1009 struct llentry *next; 1010 struct nd_defrouter *dr; 1011 struct ifnet *ifp; 1012 1013 LLE_WLOCK_ASSERT(ln); 1014 1015 /* 1016 * we used to have pfctlinput(PRC_HOSTDEAD) here. 1017 * even though it is not harmful, it was not really necessary. 1018 */ 1019 1020 /* cancel timer */ 1021 nd6_llinfo_settimer_locked(ln, -1); 1022 1023 ifp = ln->lle_tbl->llt_ifp; 1024 1025 if (ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) { 1026 dr = defrouter_lookup(&L3_ADDR_SIN6(ln)->sin6_addr, ifp); 1027 1028 if (dr != NULL && dr->expire && 1029 ln->ln_state == ND6_LLINFO_STALE && gc) { 1030 /* 1031 * If the reason for the deletion is just garbage 1032 * collection, and the neighbor is an active default 1033 * router, do not delete it. Instead, reset the GC 1034 * timer using the router's lifetime. 1035 * Simply deleting the entry would affect default 1036 * router selection, which is not necessarily a good 1037 * thing, especially when we're using router preference 1038 * values. 1039 * XXX: the check for ln_state would be redundant, 1040 * but we intentionally keep it just in case. 1041 */ 1042 if (dr->expire > time_uptime) 1043 nd6_llinfo_settimer_locked(ln, 1044 (dr->expire - time_uptime) * hz); 1045 else 1046 nd6_llinfo_settimer_locked(ln, 1047 (long)V_nd6_gctimer * hz); 1048 1049 next = LIST_NEXT(ln, lle_next); 1050 LLE_REMREF(ln); 1051 LLE_WUNLOCK(ln); 1052 return (next); 1053 } 1054 1055 if (dr) { 1056 /* 1057 * Unreachablity of a router might affect the default 1058 * router selection and on-link detection of advertised 1059 * prefixes. 1060 */ 1061 1062 /* 1063 * Temporarily fake the state to choose a new default 1064 * router and to perform on-link determination of 1065 * prefixes correctly. 1066 * Below the state will be set correctly, 1067 * or the entry itself will be deleted. 1068 */ 1069 ln->ln_state = ND6_LLINFO_INCOMPLETE; 1070 } 1071 1072 if (ln->ln_router || dr) { 1073 1074 /* 1075 * We need to unlock to avoid a LOR with rt6_flush() with the 1076 * rnh and for the calls to pfxlist_onlink_check() and 1077 * defrouter_select() in the block further down for calls 1078 * into nd6_lookup(). We still hold a ref. 1079 */ 1080 LLE_WUNLOCK(ln); 1081 1082 /* 1083 * rt6_flush must be called whether or not the neighbor 1084 * is in the Default Router List. 1085 * See a corresponding comment in nd6_na_input(). 1086 */ 1087 rt6_flush(&L3_ADDR_SIN6(ln)->sin6_addr, ifp); 1088 } 1089 1090 if (dr) { 1091 /* 1092 * Since defrouter_select() does not affect the 1093 * on-link determination and MIP6 needs the check 1094 * before the default router selection, we perform 1095 * the check now. 1096 */ 1097 pfxlist_onlink_check(); 1098 1099 /* 1100 * Refresh default router list. 1101 */ 1102 defrouter_select(); 1103 } 1104 1105 if (ln->ln_router || dr) 1106 LLE_WLOCK(ln); 1107 } 1108 1109 /* 1110 * Before deleting the entry, remember the next entry as the 1111 * return value. We need this because pfxlist_onlink_check() above 1112 * might have freed other entries (particularly the old next entry) as 1113 * a side effect (XXX). 1114 */ 1115 next = LIST_NEXT(ln, lle_next); 1116 1117 /* 1118 * Save to unlock. We still hold an extra reference and will not 1119 * free(9) in llentry_free() if someone else holds one as well. 1120 */ 1121 LLE_WUNLOCK(ln); 1122 IF_AFDATA_LOCK(ifp); 1123 LLE_WLOCK(ln); 1124 1125 /* Guard against race with other llentry_free(). */ 1126 if (ln->la_flags & LLE_LINKED) { 1127 LLE_REMREF(ln); 1128 llentry_free(ln); 1129 } else 1130 LLE_FREE_LOCKED(ln); 1131 1132 IF_AFDATA_UNLOCK(ifp); 1133 1134 return (next); 1135 } 1136 1137 /* 1138 * Upper-layer reachability hint for Neighbor Unreachability Detection. 1139 * 1140 * XXX cost-effective methods? 1141 */ 1142 void 1143 nd6_nud_hint(struct rtentry *rt, struct in6_addr *dst6, int force) 1144 { 1145 struct llentry *ln; 1146 struct ifnet *ifp; 1147 1148 if ((dst6 == NULL) || (rt == NULL)) 1149 return; 1150 1151 ifp = rt->rt_ifp; 1152 IF_AFDATA_RLOCK(ifp); 1153 ln = nd6_lookup(dst6, ND6_EXCLUSIVE, NULL); 1154 IF_AFDATA_RUNLOCK(ifp); 1155 if (ln == NULL) 1156 return; 1157 1158 if (ln->ln_state < ND6_LLINFO_REACHABLE) 1159 goto done; 1160 1161 /* 1162 * if we get upper-layer reachability confirmation many times, 1163 * it is possible we have false information. 1164 */ 1165 if (!force) { 1166 ln->ln_byhint++; 1167 if (ln->ln_byhint > V_nd6_maxnudhint) { 1168 goto done; 1169 } 1170 } 1171 1172 ln->ln_state = ND6_LLINFO_REACHABLE; 1173 if (!ND6_LLINFO_PERMANENT(ln)) { 1174 nd6_llinfo_settimer_locked(ln, 1175 (long)ND_IFINFO(rt->rt_ifp)->reachable * hz); 1176 } 1177 done: 1178 LLE_WUNLOCK(ln); 1179 } 1180 1181 1182 /* 1183 * Rejuvenate this function for routing operations related 1184 * processing. 1185 */ 1186 void 1187 nd6_rtrequest(int req, struct rtentry *rt, struct rt_addrinfo *info) 1188 { 1189 struct sockaddr_in6 *gateway; 1190 struct nd_defrouter *dr; 1191 struct ifnet *ifp; 1192 1193 gateway = (struct sockaddr_in6 *)rt->rt_gateway; 1194 ifp = rt->rt_ifp; 1195 1196 switch (req) { 1197 case RTM_ADD: 1198 break; 1199 1200 case RTM_DELETE: 1201 if (!ifp) 1202 return; 1203 /* 1204 * Only indirect routes are interesting. 1205 */ 1206 if ((rt->rt_flags & RTF_GATEWAY) == 0) 1207 return; 1208 /* 1209 * check for default route 1210 */ 1211 if (IN6_ARE_ADDR_EQUAL(&in6addr_any, 1212 &SIN6(rt_key(rt))->sin6_addr)) { 1213 1214 dr = defrouter_lookup(&gateway->sin6_addr, ifp); 1215 if (dr != NULL) 1216 dr->installed = 0; 1217 } 1218 break; 1219 } 1220 } 1221 1222 1223 int 1224 nd6_ioctl(u_long cmd, caddr_t data, struct ifnet *ifp) 1225 { 1226 struct in6_drlist *drl = (struct in6_drlist *)data; 1227 struct in6_oprlist *oprl = (struct in6_oprlist *)data; 1228 struct in6_ndireq *ndi = (struct in6_ndireq *)data; 1229 struct in6_nbrinfo *nbi = (struct in6_nbrinfo *)data; 1230 struct in6_ndifreq *ndif = (struct in6_ndifreq *)data; 1231 struct nd_defrouter *dr; 1232 struct nd_prefix *pr; 1233 int i = 0, error = 0; 1234 1235 if (ifp->if_afdata[AF_INET6] == NULL) 1236 return (EPFNOSUPPORT); 1237 switch (cmd) { 1238 case SIOCGDRLST_IN6: 1239 /* 1240 * obsolete API, use sysctl under net.inet6.icmp6 1241 */ 1242 bzero(drl, sizeof(*drl)); 1243 TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) { 1244 if (i >= DRLSTSIZ) 1245 break; 1246 drl->defrouter[i].rtaddr = dr->rtaddr; 1247 in6_clearscope(&drl->defrouter[i].rtaddr); 1248 1249 drl->defrouter[i].flags = dr->flags; 1250 drl->defrouter[i].rtlifetime = dr->rtlifetime; 1251 drl->defrouter[i].expire = dr->expire + 1252 (time_second - time_uptime); 1253 drl->defrouter[i].if_index = dr->ifp->if_index; 1254 i++; 1255 } 1256 break; 1257 case SIOCGPRLST_IN6: 1258 /* 1259 * obsolete API, use sysctl under net.inet6.icmp6 1260 * 1261 * XXX the structure in6_prlist was changed in backward- 1262 * incompatible manner. in6_oprlist is used for SIOCGPRLST_IN6, 1263 * in6_prlist is used for nd6_sysctl() - fill_prlist(). 1264 */ 1265 /* 1266 * XXX meaning of fields, especialy "raflags", is very 1267 * differnet between RA prefix list and RR/static prefix list. 1268 * how about separating ioctls into two? 1269 */ 1270 bzero(oprl, sizeof(*oprl)); 1271 LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) { 1272 struct nd_pfxrouter *pfr; 1273 int j; 1274 1275 if (i >= PRLSTSIZ) 1276 break; 1277 oprl->prefix[i].prefix = pr->ndpr_prefix.sin6_addr; 1278 oprl->prefix[i].raflags = pr->ndpr_raf; 1279 oprl->prefix[i].prefixlen = pr->ndpr_plen; 1280 oprl->prefix[i].vltime = pr->ndpr_vltime; 1281 oprl->prefix[i].pltime = pr->ndpr_pltime; 1282 oprl->prefix[i].if_index = pr->ndpr_ifp->if_index; 1283 if (pr->ndpr_vltime == ND6_INFINITE_LIFETIME) 1284 oprl->prefix[i].expire = 0; 1285 else { 1286 time_t maxexpire; 1287 1288 /* XXX: we assume time_t is signed. */ 1289 maxexpire = (-1) & 1290 ~((time_t)1 << 1291 ((sizeof(maxexpire) * 8) - 1)); 1292 if (pr->ndpr_vltime < 1293 maxexpire - pr->ndpr_lastupdate) { 1294 oprl->prefix[i].expire = 1295 pr->ndpr_lastupdate + 1296 pr->ndpr_vltime + 1297 (time_second - time_uptime); 1298 } else 1299 oprl->prefix[i].expire = maxexpire; 1300 } 1301 1302 j = 0; 1303 LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry) { 1304 if (j < DRLSTSIZ) { 1305 #define RTRADDR oprl->prefix[i].advrtr[j] 1306 RTRADDR = pfr->router->rtaddr; 1307 in6_clearscope(&RTRADDR); 1308 #undef RTRADDR 1309 } 1310 j++; 1311 } 1312 oprl->prefix[i].advrtrs = j; 1313 oprl->prefix[i].origin = PR_ORIG_RA; 1314 1315 i++; 1316 } 1317 1318 break; 1319 case OSIOCGIFINFO_IN6: 1320 #define ND ndi->ndi 1321 /* XXX: old ndp(8) assumes a positive value for linkmtu. */ 1322 bzero(&ND, sizeof(ND)); 1323 ND.linkmtu = IN6_LINKMTU(ifp); 1324 ND.maxmtu = ND_IFINFO(ifp)->maxmtu; 1325 ND.basereachable = ND_IFINFO(ifp)->basereachable; 1326 ND.reachable = ND_IFINFO(ifp)->reachable; 1327 ND.retrans = ND_IFINFO(ifp)->retrans; 1328 ND.flags = ND_IFINFO(ifp)->flags; 1329 ND.recalctm = ND_IFINFO(ifp)->recalctm; 1330 ND.chlim = ND_IFINFO(ifp)->chlim; 1331 break; 1332 case SIOCGIFINFO_IN6: 1333 ND = *ND_IFINFO(ifp); 1334 break; 1335 case SIOCSIFINFO_IN6: 1336 /* 1337 * used to change host variables from userland. 1338 * intented for a use on router to reflect RA configurations. 1339 */ 1340 /* 0 means 'unspecified' */ 1341 if (ND.linkmtu != 0) { 1342 if (ND.linkmtu < IPV6_MMTU || 1343 ND.linkmtu > IN6_LINKMTU(ifp)) { 1344 error = EINVAL; 1345 break; 1346 } 1347 ND_IFINFO(ifp)->linkmtu = ND.linkmtu; 1348 } 1349 1350 if (ND.basereachable != 0) { 1351 int obasereachable = ND_IFINFO(ifp)->basereachable; 1352 1353 ND_IFINFO(ifp)->basereachable = ND.basereachable; 1354 if (ND.basereachable != obasereachable) 1355 ND_IFINFO(ifp)->reachable = 1356 ND_COMPUTE_RTIME(ND.basereachable); 1357 } 1358 if (ND.retrans != 0) 1359 ND_IFINFO(ifp)->retrans = ND.retrans; 1360 if (ND.chlim != 0) 1361 ND_IFINFO(ifp)->chlim = ND.chlim; 1362 /* FALLTHROUGH */ 1363 case SIOCSIFINFO_FLAGS: 1364 { 1365 struct ifaddr *ifa; 1366 struct in6_ifaddr *ia; 1367 1368 if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) && 1369 !(ND.flags & ND6_IFF_IFDISABLED)) { 1370 /* ifdisabled 1->0 transision */ 1371 1372 /* 1373 * If the interface is marked as ND6_IFF_IFDISABLED and 1374 * has an link-local address with IN6_IFF_DUPLICATED, 1375 * do not clear ND6_IFF_IFDISABLED. 1376 * See RFC 4862, Section 5.4.5. 1377 */ 1378 int duplicated_linklocal = 0; 1379 1380 IF_ADDR_RLOCK(ifp); 1381 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 1382 if (ifa->ifa_addr->sa_family != AF_INET6) 1383 continue; 1384 ia = (struct in6_ifaddr *)ifa; 1385 if ((ia->ia6_flags & IN6_IFF_DUPLICATED) && 1386 IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia))) { 1387 duplicated_linklocal = 1; 1388 break; 1389 } 1390 } 1391 IF_ADDR_RUNLOCK(ifp); 1392 1393 if (duplicated_linklocal) { 1394 ND.flags |= ND6_IFF_IFDISABLED; 1395 log(LOG_ERR, "Cannot enable an interface" 1396 " with a link-local address marked" 1397 " duplicate.\n"); 1398 } else { 1399 ND_IFINFO(ifp)->flags &= ~ND6_IFF_IFDISABLED; 1400 if (ifp->if_flags & IFF_UP) 1401 in6_if_up(ifp); 1402 } 1403 } else if (!(ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED) && 1404 (ND.flags & ND6_IFF_IFDISABLED)) { 1405 /* ifdisabled 0->1 transision */ 1406 /* Mark all IPv6 address as tentative. */ 1407 1408 ND_IFINFO(ifp)->flags |= ND6_IFF_IFDISABLED; 1409 IF_ADDR_RLOCK(ifp); 1410 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 1411 if (ifa->ifa_addr->sa_family != AF_INET6) 1412 continue; 1413 ia = (struct in6_ifaddr *)ifa; 1414 ia->ia6_flags |= IN6_IFF_TENTATIVE; 1415 } 1416 IF_ADDR_RUNLOCK(ifp); 1417 } 1418 1419 if (ND.flags & ND6_IFF_AUTO_LINKLOCAL) { 1420 if (!(ND_IFINFO(ifp)->flags & ND6_IFF_AUTO_LINKLOCAL)) { 1421 /* auto_linklocal 0->1 transision */ 1422 1423 /* If no link-local address on ifp, configure */ 1424 ND_IFINFO(ifp)->flags |= ND6_IFF_AUTO_LINKLOCAL; 1425 in6_ifattach(ifp, NULL); 1426 } else if (!(ND.flags & ND6_IFF_IFDISABLED) && 1427 ifp->if_flags & IFF_UP) { 1428 /* 1429 * When the IF already has 1430 * ND6_IFF_AUTO_LINKLOCAL, no link-local 1431 * address is assigned, and IFF_UP, try to 1432 * assign one. 1433 */ 1434 int haslinklocal = 0; 1435 1436 IF_ADDR_RLOCK(ifp); 1437 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 1438 if (ifa->ifa_addr->sa_family != AF_INET6) 1439 continue; 1440 ia = (struct in6_ifaddr *)ifa; 1441 if (IN6_IS_ADDR_LINKLOCAL(IA6_IN6(ia))) { 1442 haslinklocal = 1; 1443 break; 1444 } 1445 } 1446 IF_ADDR_RUNLOCK(ifp); 1447 if (!haslinklocal) 1448 in6_ifattach(ifp, NULL); 1449 } 1450 } 1451 } 1452 ND_IFINFO(ifp)->flags = ND.flags; 1453 break; 1454 #undef ND 1455 case SIOCSNDFLUSH_IN6: /* XXX: the ioctl name is confusing... */ 1456 /* sync kernel routing table with the default router list */ 1457 defrouter_reset(); 1458 defrouter_select(); 1459 break; 1460 case SIOCSPFXFLUSH_IN6: 1461 { 1462 /* flush all the prefix advertised by routers */ 1463 struct nd_prefix *pr, *next; 1464 1465 LIST_FOREACH_SAFE(pr, &V_nd_prefix, ndpr_entry, next) { 1466 struct in6_ifaddr *ia, *ia_next; 1467 1468 if (IN6_IS_ADDR_LINKLOCAL(&pr->ndpr_prefix.sin6_addr)) 1469 continue; /* XXX */ 1470 1471 /* do we really have to remove addresses as well? */ 1472 /* XXXRW: in6_ifaddrhead locking. */ 1473 TAILQ_FOREACH_SAFE(ia, &V_in6_ifaddrhead, ia_link, 1474 ia_next) { 1475 if ((ia->ia6_flags & IN6_IFF_AUTOCONF) == 0) 1476 continue; 1477 1478 if (ia->ia6_ndpr == pr) 1479 in6_purgeaddr(&ia->ia_ifa); 1480 } 1481 prelist_remove(pr); 1482 } 1483 break; 1484 } 1485 case SIOCSRTRFLUSH_IN6: 1486 { 1487 /* flush all the default routers */ 1488 struct nd_defrouter *dr, *next; 1489 1490 defrouter_reset(); 1491 TAILQ_FOREACH_SAFE(dr, &V_nd_defrouter, dr_entry, next) { 1492 defrtrlist_del(dr); 1493 } 1494 defrouter_select(); 1495 break; 1496 } 1497 case SIOCGNBRINFO_IN6: 1498 { 1499 struct llentry *ln; 1500 struct in6_addr nb_addr = nbi->addr; /* make local for safety */ 1501 1502 if ((error = in6_setscope(&nb_addr, ifp, NULL)) != 0) 1503 return (error); 1504 1505 IF_AFDATA_RLOCK(ifp); 1506 ln = nd6_lookup(&nb_addr, 0, ifp); 1507 IF_AFDATA_RUNLOCK(ifp); 1508 1509 if (ln == NULL) { 1510 error = EINVAL; 1511 break; 1512 } 1513 nbi->state = ln->ln_state; 1514 nbi->asked = ln->la_asked; 1515 nbi->isrouter = ln->ln_router; 1516 if (ln->la_expire == 0) 1517 nbi->expire = 0; 1518 else 1519 nbi->expire = ln->la_expire + 1520 (time_second - time_uptime); 1521 LLE_RUNLOCK(ln); 1522 break; 1523 } 1524 case SIOCGDEFIFACE_IN6: /* XXX: should be implemented as a sysctl? */ 1525 ndif->ifindex = V_nd6_defifindex; 1526 break; 1527 case SIOCSDEFIFACE_IN6: /* XXX: should be implemented as a sysctl? */ 1528 return (nd6_setdefaultiface(ndif->ifindex)); 1529 } 1530 return (error); 1531 } 1532 1533 /* 1534 * Create neighbor cache entry and cache link-layer address, 1535 * on reception of inbound ND6 packets. (RS/RA/NS/redirect) 1536 * 1537 * type - ICMP6 type 1538 * code - type dependent information 1539 * 1540 * XXXXX 1541 * The caller of this function already acquired the ndp 1542 * cache table lock because the cache entry is returned. 1543 */ 1544 struct llentry * 1545 nd6_cache_lladdr(struct ifnet *ifp, struct in6_addr *from, char *lladdr, 1546 int lladdrlen, int type, int code) 1547 { 1548 struct llentry *ln = NULL; 1549 int is_newentry; 1550 int do_update; 1551 int olladdr; 1552 int llchange; 1553 int flags; 1554 int newstate = 0; 1555 uint16_t router = 0; 1556 struct sockaddr_in6 sin6; 1557 struct mbuf *chain = NULL; 1558 int static_route = 0; 1559 1560 IF_AFDATA_UNLOCK_ASSERT(ifp); 1561 1562 KASSERT(ifp != NULL, ("%s: ifp == NULL", __func__)); 1563 KASSERT(from != NULL, ("%s: from == NULL", __func__)); 1564 1565 /* nothing must be updated for unspecified address */ 1566 if (IN6_IS_ADDR_UNSPECIFIED(from)) 1567 return NULL; 1568 1569 /* 1570 * Validation about ifp->if_addrlen and lladdrlen must be done in 1571 * the caller. 1572 * 1573 * XXX If the link does not have link-layer adderss, what should 1574 * we do? (ifp->if_addrlen == 0) 1575 * Spec says nothing in sections for RA, RS and NA. There's small 1576 * description on it in NS section (RFC 2461 7.2.3). 1577 */ 1578 flags = lladdr ? ND6_EXCLUSIVE : 0; 1579 IF_AFDATA_RLOCK(ifp); 1580 ln = nd6_lookup(from, flags, ifp); 1581 IF_AFDATA_RUNLOCK(ifp); 1582 if (ln == NULL) { 1583 flags |= ND6_EXCLUSIVE; 1584 IF_AFDATA_LOCK(ifp); 1585 ln = nd6_lookup(from, flags | ND6_CREATE, ifp); 1586 IF_AFDATA_UNLOCK(ifp); 1587 is_newentry = 1; 1588 } else { 1589 /* do nothing if static ndp is set */ 1590 if (ln->la_flags & LLE_STATIC) { 1591 static_route = 1; 1592 goto done; 1593 } 1594 is_newentry = 0; 1595 } 1596 if (ln == NULL) 1597 return (NULL); 1598 1599 olladdr = (ln->la_flags & LLE_VALID) ? 1 : 0; 1600 if (olladdr && lladdr) { 1601 llchange = bcmp(lladdr, &ln->ll_addr, 1602 ifp->if_addrlen); 1603 } else 1604 llchange = 0; 1605 1606 /* 1607 * newentry olladdr lladdr llchange (*=record) 1608 * 0 n n -- (1) 1609 * 0 y n -- (2) 1610 * 0 n y -- (3) * STALE 1611 * 0 y y n (4) * 1612 * 0 y y y (5) * STALE 1613 * 1 -- n -- (6) NOSTATE(= PASSIVE) 1614 * 1 -- y -- (7) * STALE 1615 */ 1616 1617 if (lladdr) { /* (3-5) and (7) */ 1618 /* 1619 * Record source link-layer address 1620 * XXX is it dependent to ifp->if_type? 1621 */ 1622 bcopy(lladdr, &ln->ll_addr, ifp->if_addrlen); 1623 ln->la_flags |= LLE_VALID; 1624 EVENTHANDLER_INVOKE(lle_event, ln, LLENTRY_RESOLVED); 1625 } 1626 1627 if (!is_newentry) { 1628 if ((!olladdr && lladdr != NULL) || /* (3) */ 1629 (olladdr && lladdr != NULL && llchange)) { /* (5) */ 1630 do_update = 1; 1631 newstate = ND6_LLINFO_STALE; 1632 } else /* (1-2,4) */ 1633 do_update = 0; 1634 } else { 1635 do_update = 1; 1636 if (lladdr == NULL) /* (6) */ 1637 newstate = ND6_LLINFO_NOSTATE; 1638 else /* (7) */ 1639 newstate = ND6_LLINFO_STALE; 1640 } 1641 1642 if (do_update) { 1643 /* 1644 * Update the state of the neighbor cache. 1645 */ 1646 ln->ln_state = newstate; 1647 1648 if (ln->ln_state == ND6_LLINFO_STALE) { 1649 /* 1650 * XXX: since nd6_output() below will cause 1651 * state tansition to DELAY and reset the timer, 1652 * we must set the timer now, although it is actually 1653 * meaningless. 1654 */ 1655 nd6_llinfo_settimer_locked(ln, (long)V_nd6_gctimer * hz); 1656 1657 if (ln->la_hold) { 1658 struct mbuf *m_hold, *m_hold_next; 1659 1660 /* 1661 * reset the la_hold in advance, to explicitly 1662 * prevent a la_hold lookup in nd6_output() 1663 * (wouldn't happen, though...) 1664 */ 1665 for (m_hold = ln->la_hold, ln->la_hold = NULL; 1666 m_hold; m_hold = m_hold_next) { 1667 m_hold_next = m_hold->m_nextpkt; 1668 m_hold->m_nextpkt = NULL; 1669 1670 /* 1671 * we assume ifp is not a p2p here, so 1672 * just set the 2nd argument as the 1673 * 1st one. 1674 */ 1675 nd6_output_lle(ifp, ifp, m_hold, L3_ADDR_SIN6(ln), NULL, ln, &chain); 1676 } 1677 /* 1678 * If we have mbufs in the chain we need to do 1679 * deferred transmit. Copy the address from the 1680 * llentry before dropping the lock down below. 1681 */ 1682 if (chain != NULL) 1683 memcpy(&sin6, L3_ADDR_SIN6(ln), sizeof(sin6)); 1684 } 1685 } else if (ln->ln_state == ND6_LLINFO_INCOMPLETE) { 1686 /* probe right away */ 1687 nd6_llinfo_settimer_locked((void *)ln, 0); 1688 } 1689 } 1690 1691 /* 1692 * ICMP6 type dependent behavior. 1693 * 1694 * NS: clear IsRouter if new entry 1695 * RS: clear IsRouter 1696 * RA: set IsRouter if there's lladdr 1697 * redir: clear IsRouter if new entry 1698 * 1699 * RA case, (1): 1700 * The spec says that we must set IsRouter in the following cases: 1701 * - If lladdr exist, set IsRouter. This means (1-5). 1702 * - If it is old entry (!newentry), set IsRouter. This means (7). 1703 * So, based on the spec, in (1-5) and (7) cases we must set IsRouter. 1704 * A quetion arises for (1) case. (1) case has no lladdr in the 1705 * neighbor cache, this is similar to (6). 1706 * This case is rare but we figured that we MUST NOT set IsRouter. 1707 * 1708 * newentry olladdr lladdr llchange NS RS RA redir 1709 * D R 1710 * 0 n n -- (1) c ? s 1711 * 0 y n -- (2) c s s 1712 * 0 n y -- (3) c s s 1713 * 0 y y n (4) c s s 1714 * 0 y y y (5) c s s 1715 * 1 -- n -- (6) c c c s 1716 * 1 -- y -- (7) c c s c s 1717 * 1718 * (c=clear s=set) 1719 */ 1720 switch (type & 0xff) { 1721 case ND_NEIGHBOR_SOLICIT: 1722 /* 1723 * New entry must have is_router flag cleared. 1724 */ 1725 if (is_newentry) /* (6-7) */ 1726 ln->ln_router = 0; 1727 break; 1728 case ND_REDIRECT: 1729 /* 1730 * If the icmp is a redirect to a better router, always set the 1731 * is_router flag. Otherwise, if the entry is newly created, 1732 * clear the flag. [RFC 2461, sec 8.3] 1733 */ 1734 if (code == ND_REDIRECT_ROUTER) 1735 ln->ln_router = 1; 1736 else if (is_newentry) /* (6-7) */ 1737 ln->ln_router = 0; 1738 break; 1739 case ND_ROUTER_SOLICIT: 1740 /* 1741 * is_router flag must always be cleared. 1742 */ 1743 ln->ln_router = 0; 1744 break; 1745 case ND_ROUTER_ADVERT: 1746 /* 1747 * Mark an entry with lladdr as a router. 1748 */ 1749 if ((!is_newentry && (olladdr || lladdr)) || /* (2-5) */ 1750 (is_newentry && lladdr)) { /* (7) */ 1751 ln->ln_router = 1; 1752 } 1753 break; 1754 } 1755 1756 if (ln != NULL) { 1757 static_route = (ln->la_flags & LLE_STATIC); 1758 router = ln->ln_router; 1759 1760 if (flags & ND6_EXCLUSIVE) 1761 LLE_WUNLOCK(ln); 1762 else 1763 LLE_RUNLOCK(ln); 1764 if (static_route) 1765 ln = NULL; 1766 } 1767 if (chain) 1768 nd6_output_flush(ifp, ifp, chain, &sin6); 1769 1770 /* 1771 * When the link-layer address of a router changes, select the 1772 * best router again. In particular, when the neighbor entry is newly 1773 * created, it might affect the selection policy. 1774 * Question: can we restrict the first condition to the "is_newentry" 1775 * case? 1776 * XXX: when we hear an RA from a new router with the link-layer 1777 * address option, defrouter_select() is called twice, since 1778 * defrtrlist_update called the function as well. However, I believe 1779 * we can compromise the overhead, since it only happens the first 1780 * time. 1781 * XXX: although defrouter_select() should not have a bad effect 1782 * for those are not autoconfigured hosts, we explicitly avoid such 1783 * cases for safety. 1784 */ 1785 if (do_update && router && 1786 ND_IFINFO(ifp)->flags & ND6_IFF_ACCEPT_RTADV) { 1787 /* 1788 * guaranteed recursion 1789 */ 1790 defrouter_select(); 1791 } 1792 1793 return (ln); 1794 done: 1795 if (ln != NULL) { 1796 if (flags & ND6_EXCLUSIVE) 1797 LLE_WUNLOCK(ln); 1798 else 1799 LLE_RUNLOCK(ln); 1800 if (static_route) 1801 ln = NULL; 1802 } 1803 return (ln); 1804 } 1805 1806 static void 1807 nd6_slowtimo(void *arg) 1808 { 1809 CURVNET_SET((struct vnet *) arg); 1810 struct nd_ifinfo *nd6if; 1811 struct ifnet *ifp; 1812 1813 callout_reset(&V_nd6_slowtimo_ch, ND6_SLOWTIMER_INTERVAL * hz, 1814 nd6_slowtimo, curvnet); 1815 IFNET_RLOCK_NOSLEEP(); 1816 TAILQ_FOREACH(ifp, &V_ifnet, if_link) { 1817 if (ifp->if_afdata[AF_INET6] == NULL) 1818 continue; 1819 nd6if = ND_IFINFO(ifp); 1820 if (nd6if->basereachable && /* already initialized */ 1821 (nd6if->recalctm -= ND6_SLOWTIMER_INTERVAL) <= 0) { 1822 /* 1823 * Since reachable time rarely changes by router 1824 * advertisements, we SHOULD insure that a new random 1825 * value gets recomputed at least once every few hours. 1826 * (RFC 2461, 6.3.4) 1827 */ 1828 nd6if->recalctm = V_nd6_recalc_reachtm_interval; 1829 nd6if->reachable = ND_COMPUTE_RTIME(nd6if->basereachable); 1830 } 1831 } 1832 IFNET_RUNLOCK_NOSLEEP(); 1833 CURVNET_RESTORE(); 1834 } 1835 1836 /* 1837 * IPv6 packet output - light version. 1838 * Checks if destination LLE exists and is in proper state 1839 * (e.g no modification required). If not true, fall back to 1840 * "heavy" version. 1841 */ 1842 int 1843 nd6_output(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m, 1844 struct sockaddr_in6 *dst, struct rtentry *rt0) 1845 { 1846 struct llentry *ln = NULL; 1847 int error = 0; 1848 1849 /* discard the packet if IPv6 operation is disabled on the interface */ 1850 if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)) { 1851 m_freem(m); 1852 return (ENETDOWN); /* better error? */ 1853 } 1854 1855 if (IN6_IS_ADDR_MULTICAST(&dst->sin6_addr)) 1856 goto sendpkt; 1857 1858 if (nd6_need_cache(ifp) == 0) 1859 goto sendpkt; 1860 1861 IF_AFDATA_RLOCK(ifp); 1862 ln = nd6_lookup(&dst->sin6_addr, 0, ifp); 1863 IF_AFDATA_RUNLOCK(ifp); 1864 1865 /* 1866 * Perform fast path for the following cases: 1867 * 1) lle state is REACHABLE 1868 * 2) lle state is DELAY (NS message sentNS message sent) 1869 * 1870 * Every other case involves lle modification, so we handle 1871 * them separately. 1872 */ 1873 if (ln == NULL || (ln->ln_state != ND6_LLINFO_REACHABLE && 1874 ln->ln_state != ND6_LLINFO_DELAY)) { 1875 /* Fall back to slow processing path */ 1876 if (ln != NULL) 1877 LLE_RUNLOCK(ln); 1878 return (nd6_output_lle(ifp, origifp, m, dst, rt0, NULL, NULL)); 1879 } 1880 1881 sendpkt: 1882 if (ln != NULL) 1883 LLE_RUNLOCK(ln); 1884 1885 #ifdef MAC 1886 mac_netinet6_nd6_send(ifp, m); 1887 #endif 1888 1889 /* 1890 * If called from nd6_ns_output() (NS), nd6_na_output() (NA), 1891 * icmp6_redirect_output() (REDIRECT) or from rip6_output() (RS, RA 1892 * as handled by rtsol and rtadvd), mbufs will be tagged for SeND 1893 * to be diverted to user space. When re-injected into the kernel, 1894 * send_output() will directly dispatch them to the outgoing interface. 1895 */ 1896 if (send_sendso_input_hook != NULL) { 1897 struct m_tag *mtag; 1898 struct ip6_hdr *ip6; 1899 int ip6len; 1900 mtag = m_tag_find(m, PACKET_TAG_ND_OUTGOING, NULL); 1901 if (mtag != NULL) { 1902 ip6 = mtod(m, struct ip6_hdr *); 1903 ip6len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen); 1904 /* Use the SEND socket */ 1905 error = send_sendso_input_hook(m, ifp, SND_OUT, 1906 ip6len); 1907 /* -1 == no app on SEND socket */ 1908 if (error == 0 || error != -1) 1909 return (error); 1910 } 1911 } 1912 1913 m_clrprotoflags(m); /* Avoid confusing lower layers. */ 1914 IP_PROBE(send, NULL, NULL, mtod(m, struct ip6_hdr *), ifp, NULL, 1915 mtod(m, struct ip6_hdr *)); 1916 1917 if ((ifp->if_flags & IFF_LOOPBACK) == 0) 1918 origifp = ifp; 1919 1920 error = (*ifp->if_output)(origifp, m, (struct sockaddr *)dst, NULL); 1921 return (error); 1922 } 1923 1924 1925 /* 1926 * Output IPv6 packet - heavy version. 1927 * Function assume that either 1928 * 1) destination LLE does not exist, is invalid or stale, so 1929 * ND6_EXCLUSIVE lock needs to be acquired 1930 * 2) destination lle is provided (with ND6_EXCLUSIVE lock), 1931 * in that case packets are queued in &chain. 1932 * 1933 */ 1934 int 1935 nd6_output_lle(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *m, 1936 struct sockaddr_in6 *dst, struct rtentry *rt0, struct llentry *lle, 1937 struct mbuf **chain) 1938 { 1939 struct m_tag *mtag; 1940 struct ip6_hdr *ip6; 1941 int error = 0; 1942 int flags = 0; 1943 int has_lle = 0; 1944 int ip6len; 1945 1946 #ifdef INVARIANTS 1947 if (lle != NULL) { 1948 1949 LLE_WLOCK_ASSERT(lle); 1950 1951 KASSERT(chain != NULL, (" lle locked but no mbuf chain pointer passed")); 1952 } 1953 #endif 1954 KASSERT(m != NULL, ("NULL mbuf, nothing to send")); 1955 /* discard the packet if IPv6 operation is disabled on the interface */ 1956 if ((ND_IFINFO(ifp)->flags & ND6_IFF_IFDISABLED)) { 1957 m_freem(m); 1958 return (ENETDOWN); /* better error? */ 1959 } 1960 1961 if (lle != NULL) 1962 has_lle = 1; 1963 1964 if (IN6_IS_ADDR_MULTICAST(&dst->sin6_addr)) 1965 goto sendpkt; 1966 1967 if (nd6_need_cache(ifp) == 0) 1968 goto sendpkt; 1969 1970 /* 1971 * Address resolution or Neighbor Unreachability Detection 1972 * for the next hop. 1973 * At this point, the destination of the packet must be a unicast 1974 * or an anycast address(i.e. not a multicast). 1975 */ 1976 if (lle == NULL) { 1977 IF_AFDATA_RLOCK(ifp); 1978 lle = nd6_lookup(&dst->sin6_addr, ND6_EXCLUSIVE, ifp); 1979 IF_AFDATA_RUNLOCK(ifp); 1980 if ((lle == NULL) && nd6_is_addr_neighbor(dst, ifp)) { 1981 /* 1982 * Since nd6_is_addr_neighbor() internally calls nd6_lookup(), 1983 * the condition below is not very efficient. But we believe 1984 * it is tolerable, because this should be a rare case. 1985 */ 1986 flags = ND6_CREATE | ND6_EXCLUSIVE; 1987 IF_AFDATA_LOCK(ifp); 1988 lle = nd6_lookup(&dst->sin6_addr, flags, ifp); 1989 IF_AFDATA_UNLOCK(ifp); 1990 } 1991 } 1992 if (lle == NULL) { 1993 if ((ifp->if_flags & IFF_POINTOPOINT) == 0 && 1994 !(ND_IFINFO(ifp)->flags & ND6_IFF_PERFORMNUD)) { 1995 char ip6buf[INET6_ADDRSTRLEN]; 1996 log(LOG_DEBUG, 1997 "nd6_output: can't allocate llinfo for %s " 1998 "(ln=%p)\n", 1999 ip6_sprintf(ip6buf, &dst->sin6_addr), lle); 2000 m_freem(m); 2001 return (ENOBUFS); 2002 } 2003 goto sendpkt; /* send anyway */ 2004 } 2005 2006 LLE_WLOCK_ASSERT(lle); 2007 2008 /* We don't have to do link-layer address resolution on a p2p link. */ 2009 if ((ifp->if_flags & IFF_POINTOPOINT) != 0 && 2010 lle->ln_state < ND6_LLINFO_REACHABLE) { 2011 lle->ln_state = ND6_LLINFO_STALE; 2012 nd6_llinfo_settimer_locked(lle, (long)V_nd6_gctimer * hz); 2013 } 2014 2015 /* 2016 * The first time we send a packet to a neighbor whose entry is 2017 * STALE, we have to change the state to DELAY and a sets a timer to 2018 * expire in DELAY_FIRST_PROBE_TIME seconds to ensure do 2019 * neighbor unreachability detection on expiration. 2020 * (RFC 2461 7.3.3) 2021 */ 2022 if (lle->ln_state == ND6_LLINFO_STALE) { 2023 lle->la_asked = 0; 2024 lle->ln_state = ND6_LLINFO_DELAY; 2025 nd6_llinfo_settimer_locked(lle, (long)V_nd6_delay * hz); 2026 } 2027 2028 /* 2029 * If the neighbor cache entry has a state other than INCOMPLETE 2030 * (i.e. its link-layer address is already resolved), just 2031 * send the packet. 2032 */ 2033 if (lle->ln_state > ND6_LLINFO_INCOMPLETE) 2034 goto sendpkt; 2035 2036 /* 2037 * There is a neighbor cache entry, but no ethernet address 2038 * response yet. Append this latest packet to the end of the 2039 * packet queue in the mbuf, unless the number of the packet 2040 * does not exceed nd6_maxqueuelen. When it exceeds nd6_maxqueuelen, 2041 * the oldest packet in the queue will be removed. 2042 */ 2043 if (lle->ln_state == ND6_LLINFO_NOSTATE) 2044 lle->ln_state = ND6_LLINFO_INCOMPLETE; 2045 2046 if (lle->la_hold != NULL) { 2047 struct mbuf *m_hold; 2048 int i; 2049 2050 i = 0; 2051 for (m_hold = lle->la_hold; m_hold; m_hold = m_hold->m_nextpkt){ 2052 i++; 2053 if (m_hold->m_nextpkt == NULL) { 2054 m_hold->m_nextpkt = m; 2055 break; 2056 } 2057 } 2058 while (i >= V_nd6_maxqueuelen) { 2059 m_hold = lle->la_hold; 2060 lle->la_hold = lle->la_hold->m_nextpkt; 2061 m_freem(m_hold); 2062 i--; 2063 } 2064 } else { 2065 lle->la_hold = m; 2066 } 2067 2068 /* 2069 * If there has been no NS for the neighbor after entering the 2070 * INCOMPLETE state, send the first solicitation. 2071 */ 2072 if (!ND6_LLINFO_PERMANENT(lle) && lle->la_asked == 0) { 2073 lle->la_asked++; 2074 2075 nd6_llinfo_settimer_locked(lle, 2076 (long)ND_IFINFO(ifp)->retrans * hz / 1000); 2077 LLE_WUNLOCK(lle); 2078 nd6_ns_output(ifp, NULL, &dst->sin6_addr, lle, 0); 2079 if (has_lle != 0) 2080 LLE_WLOCK(lle); 2081 } else if (has_lle == 0) { 2082 /* 2083 * We did the lookup (no lle arg) so we 2084 * need to do the unlock here. 2085 */ 2086 LLE_WUNLOCK(lle); 2087 } 2088 2089 return (0); 2090 2091 sendpkt: 2092 /* 2093 * ln is valid and the caller did not pass in 2094 * an llentry 2095 */ 2096 if (lle != NULL && has_lle == 0) 2097 LLE_WUNLOCK(lle); 2098 2099 #ifdef MAC 2100 mac_netinet6_nd6_send(ifp, m); 2101 #endif 2102 2103 /* 2104 * If called from nd6_ns_output() (NS), nd6_na_output() (NA), 2105 * icmp6_redirect_output() (REDIRECT) or from rip6_output() (RS, RA 2106 * as handled by rtsol and rtadvd), mbufs will be tagged for SeND 2107 * to be diverted to user space. When re-injected into the kernel, 2108 * send_output() will directly dispatch them to the outgoing interface. 2109 */ 2110 if (send_sendso_input_hook != NULL) { 2111 mtag = m_tag_find(m, PACKET_TAG_ND_OUTGOING, NULL); 2112 if (mtag != NULL) { 2113 ip6 = mtod(m, struct ip6_hdr *); 2114 ip6len = sizeof(struct ip6_hdr) + ntohs(ip6->ip6_plen); 2115 /* Use the SEND socket */ 2116 error = send_sendso_input_hook(m, ifp, SND_OUT, 2117 ip6len); 2118 /* -1 == no app on SEND socket */ 2119 if (error == 0 || error != -1) 2120 return (error); 2121 } 2122 } 2123 2124 /* 2125 * We were passed in a pointer to an lle with the lock held 2126 * this means that we can't call if_output as we will 2127 * recurse on the lle lock - so what we do is we create 2128 * a list of mbufs to send and transmit them in the caller 2129 * after the lock is dropped 2130 */ 2131 if (has_lle != 0) { 2132 if (*chain == NULL) 2133 *chain = m; 2134 else { 2135 struct mbuf *mb; 2136 2137 /* 2138 * append mbuf to end of deferred chain 2139 */ 2140 mb = *chain; 2141 while (mb->m_nextpkt != NULL) 2142 mb = mb->m_nextpkt; 2143 mb->m_nextpkt = m; 2144 } 2145 return (error); 2146 } 2147 m_clrprotoflags(m); /* Avoid confusing lower layers. */ 2148 IP_PROBE(send, NULL, NULL, mtod(m, struct ip6_hdr *), ifp, NULL, 2149 mtod(m, struct ip6_hdr *)); 2150 2151 if ((ifp->if_flags & IFF_LOOPBACK) == 0) 2152 origifp = ifp; 2153 2154 error = (*ifp->if_output)(origifp, m, (struct sockaddr *)dst, NULL); 2155 return (error); 2156 } 2157 2158 2159 int 2160 nd6_output_flush(struct ifnet *ifp, struct ifnet *origifp, struct mbuf *chain, 2161 struct sockaddr_in6 *dst) 2162 { 2163 struct mbuf *m, *m_head; 2164 struct ifnet *outifp; 2165 int error = 0; 2166 2167 m_head = chain; 2168 if ((ifp->if_flags & IFF_LOOPBACK) != 0) 2169 outifp = origifp; 2170 else 2171 outifp = ifp; 2172 2173 while (m_head) { 2174 m = m_head; 2175 m_head = m_head->m_nextpkt; 2176 error = (*ifp->if_output)(ifp, m, (struct sockaddr *)dst, NULL); 2177 } 2178 2179 /* 2180 * XXX 2181 * note that intermediate errors are blindly ignored - but this is 2182 * the same convention as used with nd6_output when called by 2183 * nd6_cache_lladdr 2184 */ 2185 return (error); 2186 } 2187 2188 2189 int 2190 nd6_need_cache(struct ifnet *ifp) 2191 { 2192 /* 2193 * XXX: we currently do not make neighbor cache on any interface 2194 * other than ARCnet, Ethernet, FDDI and GIF. 2195 * 2196 * RFC2893 says: 2197 * - unidirectional tunnels needs no ND 2198 */ 2199 switch (ifp->if_type) { 2200 case IFT_ARCNET: 2201 case IFT_ETHER: 2202 case IFT_FDDI: 2203 case IFT_IEEE1394: 2204 #ifdef IFT_L2VLAN 2205 case IFT_L2VLAN: 2206 #endif 2207 #ifdef IFT_IEEE80211 2208 case IFT_IEEE80211: 2209 #endif 2210 case IFT_INFINIBAND: 2211 case IFT_BRIDGE: 2212 case IFT_PROPVIRTUAL: 2213 return (1); 2214 default: 2215 return (0); 2216 } 2217 } 2218 2219 /* 2220 * Add pernament ND6 link-layer record for given 2221 * interface address. 2222 * 2223 * Very similar to IPv4 arp_ifinit(), but: 2224 * 1) IPv6 DAD is performed in different place 2225 * 2) It is called by IPv6 protocol stack in contrast to 2226 * arp_ifinit() which is typically called in SIOCSIFADDR 2227 * driver ioctl handler. 2228 * 2229 */ 2230 int 2231 nd6_add_ifa_lle(struct in6_ifaddr *ia) 2232 { 2233 struct ifnet *ifp; 2234 struct llentry *ln; 2235 2236 ifp = ia->ia_ifa.ifa_ifp; 2237 if (nd6_need_cache(ifp) == 0) 2238 return (0); 2239 IF_AFDATA_LOCK(ifp); 2240 ia->ia_ifa.ifa_rtrequest = nd6_rtrequest; 2241 ln = lla_lookup(LLTABLE6(ifp), (LLE_CREATE | LLE_IFADDR | 2242 LLE_EXCLUSIVE), (struct sockaddr *)&ia->ia_addr); 2243 IF_AFDATA_UNLOCK(ifp); 2244 if (ln != NULL) { 2245 ln->la_expire = 0; /* for IPv6 this means permanent */ 2246 ln->ln_state = ND6_LLINFO_REACHABLE; 2247 LLE_WUNLOCK(ln); 2248 in6_newaddrmsg(ia, RTM_ADD); 2249 return (0); 2250 } 2251 2252 return (ENOBUFS); 2253 } 2254 2255 /* 2256 * Removes ALL lle records for interface address prefix. 2257 * XXXME: That's probably not we really want to do, we need 2258 * to remove address record only and keep other records 2259 * until we determine if given prefix is really going 2260 * to be removed. 2261 */ 2262 void 2263 nd6_rem_ifa_lle(struct in6_ifaddr *ia) 2264 { 2265 struct sockaddr_in6 mask, addr; 2266 struct ifnet *ifp; 2267 2268 in6_newaddrmsg(ia, RTM_DELETE); 2269 2270 ifp = ia->ia_ifa.ifa_ifp; 2271 memcpy(&addr, &ia->ia_addr, sizeof(ia->ia_addr)); 2272 memcpy(&mask, &ia->ia_prefixmask, sizeof(ia->ia_prefixmask)); 2273 lltable_prefix_free(AF_INET6, (struct sockaddr *)&addr, 2274 (struct sockaddr *)&mask, LLE_STATIC); 2275 } 2276 2277 /* 2278 * the callers of this function need to be re-worked to drop 2279 * the lle lock, drop here for now 2280 */ 2281 int 2282 nd6_storelladdr(struct ifnet *ifp, struct mbuf *m, 2283 const struct sockaddr *dst, u_char *desten, uint32_t *pflags) 2284 { 2285 struct llentry *ln; 2286 2287 if (pflags != NULL) 2288 *pflags = 0; 2289 IF_AFDATA_UNLOCK_ASSERT(ifp); 2290 if (m != NULL && m->m_flags & M_MCAST) { 2291 int i; 2292 2293 switch (ifp->if_type) { 2294 case IFT_ETHER: 2295 case IFT_FDDI: 2296 #ifdef IFT_L2VLAN 2297 case IFT_L2VLAN: 2298 #endif 2299 #ifdef IFT_IEEE80211 2300 case IFT_IEEE80211: 2301 #endif 2302 case IFT_BRIDGE: 2303 case IFT_ISO88025: 2304 ETHER_MAP_IPV6_MULTICAST(&SIN6(dst)->sin6_addr, 2305 desten); 2306 return (0); 2307 case IFT_IEEE1394: 2308 /* 2309 * netbsd can use if_broadcastaddr, but we don't do so 2310 * to reduce # of ifdef. 2311 */ 2312 for (i = 0; i < ifp->if_addrlen; i++) 2313 desten[i] = ~0; 2314 return (0); 2315 case IFT_ARCNET: 2316 *desten = 0; 2317 return (0); 2318 default: 2319 m_freem(m); 2320 return (EAFNOSUPPORT); 2321 } 2322 } 2323 2324 2325 /* 2326 * the entry should have been created in nd6_store_lladdr 2327 */ 2328 IF_AFDATA_RLOCK(ifp); 2329 ln = lla_lookup(LLTABLE6(ifp), 0, dst); 2330 IF_AFDATA_RUNLOCK(ifp); 2331 if ((ln == NULL) || !(ln->la_flags & LLE_VALID)) { 2332 if (ln != NULL) 2333 LLE_RUNLOCK(ln); 2334 /* this could happen, if we could not allocate memory */ 2335 m_freem(m); 2336 return (1); 2337 } 2338 2339 bcopy(&ln->ll_addr, desten, ifp->if_addrlen); 2340 if (pflags != NULL) 2341 *pflags = ln->la_flags; 2342 LLE_RUNLOCK(ln); 2343 /* 2344 * A *small* use after free race exists here 2345 */ 2346 return (0); 2347 } 2348 2349 static void 2350 clear_llinfo_pqueue(struct llentry *ln) 2351 { 2352 struct mbuf *m_hold, *m_hold_next; 2353 2354 for (m_hold = ln->la_hold; m_hold; m_hold = m_hold_next) { 2355 m_hold_next = m_hold->m_nextpkt; 2356 m_freem(m_hold); 2357 } 2358 2359 ln->la_hold = NULL; 2360 return; 2361 } 2362 2363 static int nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS); 2364 static int nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS); 2365 #ifdef SYSCTL_DECL 2366 SYSCTL_DECL(_net_inet6_icmp6); 2367 #endif 2368 SYSCTL_NODE(_net_inet6_icmp6, ICMPV6CTL_ND6_DRLIST, nd6_drlist, 2369 CTLFLAG_RD, nd6_sysctl_drlist, ""); 2370 SYSCTL_NODE(_net_inet6_icmp6, ICMPV6CTL_ND6_PRLIST, nd6_prlist, 2371 CTLFLAG_RD, nd6_sysctl_prlist, ""); 2372 SYSCTL_INT(_net_inet6_icmp6, ICMPV6CTL_ND6_MAXQLEN, nd6_maxqueuelen, 2373 CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_maxqueuelen), 1, ""); 2374 SYSCTL_INT(_net_inet6_icmp6, OID_AUTO, nd6_gctimer, 2375 CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(nd6_gctimer), (60 * 60 * 24), ""); 2376 2377 static int 2378 nd6_sysctl_drlist(SYSCTL_HANDLER_ARGS) 2379 { 2380 struct in6_defrouter d; 2381 struct nd_defrouter *dr; 2382 int error; 2383 2384 if (req->newptr) 2385 return (EPERM); 2386 2387 bzero(&d, sizeof(d)); 2388 d.rtaddr.sin6_family = AF_INET6; 2389 d.rtaddr.sin6_len = sizeof(d.rtaddr); 2390 2391 /* 2392 * XXX locking 2393 */ 2394 TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) { 2395 d.rtaddr.sin6_addr = dr->rtaddr; 2396 error = sa6_recoverscope(&d.rtaddr); 2397 if (error != 0) 2398 return (error); 2399 d.flags = dr->flags; 2400 d.rtlifetime = dr->rtlifetime; 2401 d.expire = dr->expire + (time_second - time_uptime); 2402 d.if_index = dr->ifp->if_index; 2403 error = SYSCTL_OUT(req, &d, sizeof(d)); 2404 if (error != 0) 2405 return (error); 2406 } 2407 return (0); 2408 } 2409 2410 static int 2411 nd6_sysctl_prlist(SYSCTL_HANDLER_ARGS) 2412 { 2413 struct in6_prefix p; 2414 struct sockaddr_in6 s6; 2415 struct nd_prefix *pr; 2416 struct nd_pfxrouter *pfr; 2417 time_t maxexpire; 2418 int error; 2419 char ip6buf[INET6_ADDRSTRLEN]; 2420 2421 if (req->newptr) 2422 return (EPERM); 2423 2424 bzero(&p, sizeof(p)); 2425 p.origin = PR_ORIG_RA; 2426 bzero(&s6, sizeof(s6)); 2427 s6.sin6_family = AF_INET6; 2428 s6.sin6_len = sizeof(s6); 2429 2430 /* 2431 * XXX locking 2432 */ 2433 LIST_FOREACH(pr, &V_nd_prefix, ndpr_entry) { 2434 p.prefix = pr->ndpr_prefix; 2435 if (sa6_recoverscope(&p.prefix)) { 2436 log(LOG_ERR, "scope error in prefix list (%s)\n", 2437 ip6_sprintf(ip6buf, &p.prefix.sin6_addr)); 2438 /* XXX: press on... */ 2439 } 2440 p.raflags = pr->ndpr_raf; 2441 p.prefixlen = pr->ndpr_plen; 2442 p.vltime = pr->ndpr_vltime; 2443 p.pltime = pr->ndpr_pltime; 2444 p.if_index = pr->ndpr_ifp->if_index; 2445 if (pr->ndpr_vltime == ND6_INFINITE_LIFETIME) 2446 p.expire = 0; 2447 else { 2448 /* XXX: we assume time_t is signed. */ 2449 maxexpire = (-1) & 2450 ~((time_t)1 << ((sizeof(maxexpire) * 8) - 1)); 2451 if (pr->ndpr_vltime < maxexpire - pr->ndpr_lastupdate) 2452 p.expire = pr->ndpr_lastupdate + 2453 pr->ndpr_vltime + 2454 (time_second - time_uptime); 2455 else 2456 p.expire = maxexpire; 2457 } 2458 p.refcnt = pr->ndpr_refcnt; 2459 p.flags = pr->ndpr_stateflags; 2460 p.advrtrs = 0; 2461 LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry) 2462 p.advrtrs++; 2463 error = SYSCTL_OUT(req, &p, sizeof(p)); 2464 if (error != 0) 2465 return (error); 2466 LIST_FOREACH(pfr, &pr->ndpr_advrtrs, pfr_entry) { 2467 s6.sin6_addr = pfr->router->rtaddr; 2468 if (sa6_recoverscope(&s6)) 2469 log(LOG_ERR, 2470 "scope error in prefix list (%s)\n", 2471 ip6_sprintf(ip6buf, &pfr->router->rtaddr)); 2472 error = SYSCTL_OUT(req, &s6, sizeof(s6)); 2473 if (error != 0) 2474 return (error); 2475 } 2476 } 2477 return (0); 2478 } 2479