1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2009 Bruce Simpson. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote 15 * products derived from this software without specific prior written 16 * permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 * 30 * $KAME: mld6.c,v 1.27 2001/04/04 05:17:30 itojun Exp $ 31 */ 32 33 /*- 34 * Copyright (c) 1988 Stephen Deering. 35 * Copyright (c) 1992, 1993 36 * The Regents of the University of California. All rights reserved. 37 * 38 * This code is derived from software contributed to Berkeley by 39 * Stephen Deering of Stanford University. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 */ 65 66 #include <sys/cdefs.h> 67 #include "opt_inet.h" 68 #include "opt_inet6.h" 69 70 #include <sys/param.h> 71 #include <sys/systm.h> 72 #include <sys/mbuf.h> 73 #include <sys/socket.h> 74 #include <sys/sysctl.h> 75 #include <sys/kernel.h> 76 #include <sys/callout.h> 77 #include <sys/malloc.h> 78 #include <sys/module.h> 79 #include <sys/ktr.h> 80 81 #include <net/if.h> 82 #include <net/if_var.h> 83 #include <net/if_private.h> 84 #include <net/route.h> 85 #include <net/vnet.h> 86 87 #include <netinet/in.h> 88 #include <netinet/in_var.h> 89 #include <netinet6/in6_var.h> 90 #include <netinet/ip6.h> 91 #include <netinet6/ip6_var.h> 92 #include <netinet6/scope6_var.h> 93 #include <netinet/icmp6.h> 94 #include <netinet6/mld6.h> 95 #include <netinet6/mld6_var.h> 96 97 #include <security/mac/mac_framework.h> 98 99 #ifndef KTR_MLD 100 #define KTR_MLD KTR_INET6 101 #endif 102 103 static void mli_delete_locked(struct ifnet *); 104 static void mld_dispatch_packet(struct mbuf *); 105 static void mld_dispatch_queue(struct mbufq *, int); 106 static void mld_final_leave(struct in6_multi *, struct mld_ifsoftc *); 107 static void mld_fasttimo_vnet(struct in6_multi_head *inmh); 108 static int mld_handle_state_change(struct in6_multi *, 109 struct mld_ifsoftc *); 110 static int mld_initial_join(struct in6_multi *, struct mld_ifsoftc *, 111 const int); 112 #ifdef KTR 113 static char * mld_rec_type_to_str(const int); 114 #endif 115 static void mld_set_version(struct mld_ifsoftc *, const int); 116 static void mld_slowtimo_vnet(void); 117 static int mld_v1_input_query(struct ifnet *, const struct ip6_hdr *, 118 /*const*/ struct mld_hdr *); 119 static int mld_v1_input_report(struct ifnet *, const struct ip6_hdr *, 120 /*const*/ struct mld_hdr *); 121 static void mld_v1_process_group_timer(struct in6_multi_head *, 122 struct in6_multi *); 123 static void mld_v1_process_querier_timers(struct mld_ifsoftc *); 124 static int mld_v1_transmit_report(struct in6_multi *, const int); 125 static void mld_v1_update_group(struct in6_multi *, const int); 126 static void mld_v2_cancel_link_timers(struct mld_ifsoftc *); 127 static void mld_v2_dispatch_general_query(struct mld_ifsoftc *); 128 static struct mbuf * 129 mld_v2_encap_report(struct ifnet *, struct mbuf *); 130 static int mld_v2_enqueue_filter_change(struct mbufq *, 131 struct in6_multi *); 132 static int mld_v2_enqueue_group_record(struct mbufq *, 133 struct in6_multi *, const int, const int, const int, 134 const int); 135 static int mld_v2_input_query(struct ifnet *, const struct ip6_hdr *, 136 struct mbuf *, struct mldv2_query *, const int, const int); 137 static int mld_v2_merge_state_changes(struct in6_multi *, 138 struct mbufq *); 139 static void mld_v2_process_group_timers(struct in6_multi_head *, 140 struct mbufq *, struct mbufq *, 141 struct in6_multi *, const int); 142 static int mld_v2_process_group_query(struct in6_multi *, 143 struct mld_ifsoftc *mli, int, struct mbuf *, 144 struct mldv2_query *, const int); 145 static int sysctl_mld_gsr(SYSCTL_HANDLER_ARGS); 146 static int sysctl_mld_ifinfo(SYSCTL_HANDLER_ARGS); 147 148 /* 149 * Normative references: RFC 2710, RFC 3590, RFC 3810. 150 * 151 * Locking: 152 * * The MLD subsystem lock ends up being system-wide for the moment, 153 * but could be per-VIMAGE later on. 154 * * The permitted lock order is: IN6_MULTI_LOCK, MLD_LOCK, IF_ADDR_LOCK. 155 * Any may be taken independently; if any are held at the same 156 * time, the above lock order must be followed. 157 * * IN6_MULTI_LOCK covers in_multi. 158 * * MLD_LOCK covers per-link state and any global variables in this file. 159 * * IF_ADDR_LOCK covers if_multiaddrs, which is used for a variety of 160 * per-link state iterators. 161 * 162 * XXX LOR PREVENTION 163 * A special case for IPv6 is the in6_setscope() routine. ip6_output() 164 * will not accept an ifp; it wants an embedded scope ID, unlike 165 * ip_output(), which happily takes the ifp given to it. The embedded 166 * scope ID is only used by MLD to select the outgoing interface. 167 * 168 * During interface attach and detach, MLD will take MLD_LOCK *after* 169 * the IF_AFDATA_LOCK. 170 * As in6_setscope() takes IF_AFDATA_LOCK then SCOPE_LOCK, we can't call 171 * it with MLD_LOCK held without triggering an LOR. A netisr with indirect 172 * dispatch could work around this, but we'd rather not do that, as it 173 * can introduce other races. 174 * 175 * As such, we exploit the fact that the scope ID is just the interface 176 * index, and embed it in the IPv6 destination address accordingly. 177 * This is potentially NOT VALID for MLDv1 reports, as they 178 * are always sent to the multicast group itself; as MLDv2 179 * reports are always sent to ff02::16, this is not an issue 180 * when MLDv2 is in use. 181 * 182 * This does not however eliminate the LOR when ip6_output() itself 183 * calls in6_setscope() internally whilst MLD_LOCK is held. This will 184 * trigger a LOR warning in WITNESS when the ifnet is detached. 185 * 186 * The right answer is probably to make IF_AFDATA_LOCK an rwlock, given 187 * how it's used across the network stack. Here we're simply exploiting 188 * the fact that MLD runs at a similar layer in the stack to scope6.c. 189 * 190 * VIMAGE: 191 * * Each in6_multi corresponds to an ifp, and each ifp corresponds 192 * to a vnet in ifp->if_vnet. 193 */ 194 static struct mtx mld_mtx; 195 static MALLOC_DEFINE(M_MLD, "mld", "mld state"); 196 197 #define MLD_EMBEDSCOPE(pin6, zoneid) \ 198 if (IN6_IS_SCOPE_LINKLOCAL(pin6) || \ 199 IN6_IS_ADDR_MC_INTFACELOCAL(pin6)) \ 200 (pin6)->s6_addr16[1] = htons((zoneid) & 0xFFFF) \ 201 202 /* 203 * VIMAGE-wide globals. 204 */ 205 VNET_DEFINE_STATIC(struct timeval, mld_gsrdelay) = {10, 0}; 206 VNET_DEFINE_STATIC(LIST_HEAD(, mld_ifsoftc), mli_head); 207 VNET_DEFINE_STATIC(int, interface_timers_running6); 208 VNET_DEFINE_STATIC(int, state_change_timers_running6); 209 VNET_DEFINE_STATIC(int, current_state_timers_running6); 210 211 #define V_mld_gsrdelay VNET(mld_gsrdelay) 212 #define V_mli_head VNET(mli_head) 213 #define V_interface_timers_running6 VNET(interface_timers_running6) 214 #define V_state_change_timers_running6 VNET(state_change_timers_running6) 215 #define V_current_state_timers_running6 VNET(current_state_timers_running6) 216 217 SYSCTL_DECL(_net_inet6); /* Note: Not in any common header. */ 218 219 SYSCTL_NODE(_net_inet6, OID_AUTO, mld, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 220 "IPv6 Multicast Listener Discovery"); 221 222 /* 223 * Virtualized sysctls. 224 */ 225 SYSCTL_PROC(_net_inet6_mld, OID_AUTO, gsrdelay, 226 CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 227 &VNET_NAME(mld_gsrdelay.tv_sec), 0, sysctl_mld_gsr, "I", 228 "Rate limit for MLDv2 Group-and-Source queries in seconds"); 229 230 /* 231 * Non-virtualized sysctls. 232 */ 233 static SYSCTL_NODE(_net_inet6_mld, OID_AUTO, ifinfo, 234 CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_mld_ifinfo, 235 "Per-interface MLDv2 state"); 236 237 static int mld_v1enable = 1; 238 SYSCTL_INT(_net_inet6_mld, OID_AUTO, v1enable, CTLFLAG_RWTUN, 239 &mld_v1enable, 0, "Enable fallback to MLDv1"); 240 241 static int mld_v2enable = 1; 242 SYSCTL_INT(_net_inet6_mld, OID_AUTO, v2enable, CTLFLAG_RWTUN, 243 &mld_v2enable, 0, "Enable MLDv2"); 244 245 static int mld_use_allow = 1; 246 SYSCTL_INT(_net_inet6_mld, OID_AUTO, use_allow, CTLFLAG_RWTUN, 247 &mld_use_allow, 0, "Use ALLOW/BLOCK for RFC 4604 SSM joins/leaves"); 248 249 /* 250 * Packed Router Alert option structure declaration. 251 */ 252 struct mld_raopt { 253 struct ip6_hbh hbh; 254 struct ip6_opt pad; 255 struct ip6_opt_router ra; 256 } __packed; 257 258 /* 259 * Router Alert hop-by-hop option header. 260 */ 261 static struct mld_raopt mld_ra = { 262 .hbh = { 0, 0 }, 263 .pad = { .ip6o_type = IP6OPT_PADN, 0 }, 264 .ra = { 265 .ip6or_type = IP6OPT_ROUTER_ALERT, 266 .ip6or_len = IP6OPT_RTALERT_LEN - 2, 267 .ip6or_value[0] = ((IP6OPT_RTALERT_MLD >> 8) & 0xFF), 268 .ip6or_value[1] = (IP6OPT_RTALERT_MLD & 0xFF) 269 } 270 }; 271 static struct ip6_pktopts mld_po; 272 273 static __inline void 274 mld_save_context(struct mbuf *m, struct ifnet *ifp) 275 { 276 277 #ifdef VIMAGE 278 m->m_pkthdr.PH_loc.ptr = ifp->if_vnet; 279 #endif /* VIMAGE */ 280 m->m_pkthdr.rcvif = ifp; 281 m->m_pkthdr.flowid = ifp->if_index; 282 } 283 284 static __inline void 285 mld_scrub_context(struct mbuf *m) 286 { 287 288 m->m_pkthdr.PH_loc.ptr = NULL; 289 m->m_pkthdr.flowid = 0; 290 } 291 292 /* 293 * Restore context from a queued output chain. 294 * Return saved ifindex. 295 * 296 * VIMAGE: The assertion is there to make sure that we 297 * actually called CURVNET_SET() with what's in the mbuf chain. 298 */ 299 static __inline uint32_t 300 mld_restore_context(struct mbuf *m) 301 { 302 303 #if defined(VIMAGE) && defined(INVARIANTS) 304 KASSERT(curvnet == m->m_pkthdr.PH_loc.ptr, 305 ("%s: called when curvnet was not restored: cuvnet %p m ptr %p", 306 __func__, curvnet, m->m_pkthdr.PH_loc.ptr)); 307 #endif 308 return (m->m_pkthdr.flowid); 309 } 310 311 /* 312 * Retrieve or set threshold between group-source queries in seconds. 313 * 314 * VIMAGE: Assume curvnet set by caller. 315 * SMPng: NOTE: Serialized by MLD lock. 316 */ 317 static int 318 sysctl_mld_gsr(SYSCTL_HANDLER_ARGS) 319 { 320 int error; 321 int i; 322 323 error = sysctl_wire_old_buffer(req, sizeof(int)); 324 if (error) 325 return (error); 326 327 MLD_LOCK(); 328 329 i = V_mld_gsrdelay.tv_sec; 330 331 error = sysctl_handle_int(oidp, &i, 0, req); 332 if (error || !req->newptr) 333 goto out_locked; 334 335 if (i < -1 || i >= 60) { 336 error = EINVAL; 337 goto out_locked; 338 } 339 340 CTR2(KTR_MLD, "change mld_gsrdelay from %d to %d", 341 V_mld_gsrdelay.tv_sec, i); 342 V_mld_gsrdelay.tv_sec = i; 343 344 out_locked: 345 MLD_UNLOCK(); 346 return (error); 347 } 348 349 /* 350 * Expose struct mld_ifsoftc to userland, keyed by ifindex. 351 * For use by ifmcstat(8). 352 * 353 * VIMAGE: Assume curvnet set by caller. The node handler itself 354 * is not directly virtualized. 355 */ 356 static int 357 sysctl_mld_ifinfo(SYSCTL_HANDLER_ARGS) 358 { 359 struct epoch_tracker et; 360 int *name; 361 int error; 362 u_int namelen; 363 struct ifnet *ifp; 364 struct mld_ifsoftc *mli; 365 366 name = (int *)arg1; 367 namelen = arg2; 368 369 if (req->newptr != NULL) 370 return (EPERM); 371 372 if (namelen != 1) 373 return (EINVAL); 374 375 error = sysctl_wire_old_buffer(req, sizeof(struct mld_ifinfo)); 376 if (error) 377 return (error); 378 379 IN6_MULTI_LOCK(); 380 IN6_MULTI_LIST_LOCK(); 381 MLD_LOCK(); 382 NET_EPOCH_ENTER(et); 383 384 error = ENOENT; 385 ifp = ifnet_byindex(name[0]); 386 if (ifp == NULL) 387 goto out_locked; 388 389 LIST_FOREACH(mli, &V_mli_head, mli_link) { 390 if (ifp == mli->mli_ifp) { 391 struct mld_ifinfo info; 392 393 info.mli_version = mli->mli_version; 394 info.mli_v1_timer = mli->mli_v1_timer; 395 info.mli_v2_timer = mli->mli_v2_timer; 396 info.mli_flags = mli->mli_flags; 397 info.mli_rv = mli->mli_rv; 398 info.mli_qi = mli->mli_qi; 399 info.mli_qri = mli->mli_qri; 400 info.mli_uri = mli->mli_uri; 401 error = SYSCTL_OUT(req, &info, sizeof(info)); 402 break; 403 } 404 } 405 406 out_locked: 407 NET_EPOCH_EXIT(et); 408 MLD_UNLOCK(); 409 IN6_MULTI_LIST_UNLOCK(); 410 IN6_MULTI_UNLOCK(); 411 return (error); 412 } 413 414 /* 415 * Dispatch an entire queue of pending packet chains. 416 * VIMAGE: Assumes the vnet pointer has been set. 417 */ 418 static void 419 mld_dispatch_queue(struct mbufq *mq, int limit) 420 { 421 struct mbuf *m; 422 423 while ((m = mbufq_dequeue(mq)) != NULL) { 424 CTR3(KTR_MLD, "%s: dispatch %p from %p", __func__, mq, m); 425 mld_dispatch_packet(m); 426 if (--limit == 0) 427 break; 428 } 429 } 430 431 /* 432 * Filter outgoing MLD report state by group. 433 * 434 * Reports are ALWAYS suppressed for ALL-HOSTS (ff02::1) 435 * and node-local addresses. However, kernel and socket consumers 436 * always embed the KAME scope ID in the address provided, so strip it 437 * when performing comparison. 438 * Note: This is not the same as the *multicast* scope. 439 * 440 * Return zero if the given group is one for which MLD reports 441 * should be suppressed, or non-zero if reports should be issued. 442 */ 443 static __inline int 444 mld_is_addr_reported(const struct in6_addr *addr) 445 { 446 447 KASSERT(IN6_IS_ADDR_MULTICAST(addr), ("%s: not multicast", __func__)); 448 449 if (IPV6_ADDR_MC_SCOPE(addr) == IPV6_ADDR_SCOPE_NODELOCAL) 450 return (0); 451 452 if (IPV6_ADDR_MC_SCOPE(addr) == IPV6_ADDR_SCOPE_LINKLOCAL) { 453 struct in6_addr tmp = *addr; 454 in6_clearscope(&tmp); 455 if (IN6_ARE_ADDR_EQUAL(&tmp, &in6addr_linklocal_allnodes)) 456 return (0); 457 } 458 459 return (1); 460 } 461 462 /* 463 * Attach MLD when PF_INET6 is attached to an interface. Assumes that the 464 * current VNET is set by the caller. 465 */ 466 struct mld_ifsoftc * 467 mld_domifattach(struct ifnet *ifp) 468 { 469 struct mld_ifsoftc *mli; 470 471 CTR3(KTR_MLD, "%s: called for ifp %p(%s)", __func__, ifp, if_name(ifp)); 472 473 mli = malloc(sizeof(struct mld_ifsoftc), M_MLD, M_WAITOK | M_ZERO); 474 mli->mli_ifp = ifp; 475 mli->mli_version = MLD_VERSION_2; 476 mli->mli_flags = 0; 477 mli->mli_rv = MLD_RV_INIT; 478 mli->mli_qi = MLD_QI_INIT; 479 mli->mli_qri = MLD_QRI_INIT; 480 mli->mli_uri = MLD_URI_INIT; 481 mbufq_init(&mli->mli_gq, MLD_MAX_RESPONSE_PACKETS); 482 if ((ifp->if_flags & IFF_MULTICAST) == 0) 483 mli->mli_flags |= MLIF_SILENT; 484 if (mld_use_allow) 485 mli->mli_flags |= MLIF_USEALLOW; 486 487 MLD_LOCK(); 488 LIST_INSERT_HEAD(&V_mli_head, mli, mli_link); 489 MLD_UNLOCK(); 490 491 return (mli); 492 } 493 494 /* 495 * Hook for ifdetach. 496 * 497 * NOTE: Some finalization tasks need to run before the protocol domain 498 * is detached, but also before the link layer does its cleanup. 499 * Run before link-layer cleanup; cleanup groups, but do not free MLD state. 500 * 501 * SMPng: Caller must hold IN6_MULTI_LOCK(). 502 * Must take IF_ADDR_LOCK() to cover if_multiaddrs iterator. 503 * XXX This routine is also bitten by unlocked ifma_protospec access. 504 */ 505 void 506 mld_ifdetach(struct ifnet *ifp, struct in6_multi_head *inmh) 507 { 508 struct epoch_tracker et; 509 struct mld_ifsoftc *mli; 510 struct ifmultiaddr *ifma; 511 struct in6_multi *inm; 512 513 CTR3(KTR_MLD, "%s: called for ifp %p(%s)", __func__, ifp, 514 if_name(ifp)); 515 516 IN6_MULTI_LIST_LOCK_ASSERT(); 517 MLD_LOCK(); 518 519 mli = MLD_IFINFO(ifp); 520 IF_ADDR_WLOCK(ifp); 521 /* 522 * Extract list of in6_multi associated with the detaching ifp 523 * which the PF_INET6 layer is about to release. 524 */ 525 NET_EPOCH_ENTER(et); 526 CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 527 inm = in6m_ifmultiaddr_get_inm(ifma); 528 if (inm == NULL) 529 continue; 530 in6m_disconnect_locked(inmh, inm); 531 532 if (mli->mli_version == MLD_VERSION_2) { 533 in6m_clear_recorded(inm); 534 535 /* 536 * We need to release the final reference held 537 * for issuing the INCLUDE {}. 538 */ 539 if (inm->in6m_state == MLD_LEAVING_MEMBER) { 540 inm->in6m_state = MLD_NOT_MEMBER; 541 in6m_rele_locked(inmh, inm); 542 } 543 } 544 } 545 NET_EPOCH_EXIT(et); 546 IF_ADDR_WUNLOCK(ifp); 547 MLD_UNLOCK(); 548 } 549 550 /* 551 * Hook for domifdetach. 552 * Runs after link-layer cleanup; free MLD state. 553 * 554 * SMPng: Normally called with IF_AFDATA_LOCK held. 555 */ 556 void 557 mld_domifdetach(struct ifnet *ifp) 558 { 559 560 CTR3(KTR_MLD, "%s: called for ifp %p(%s)", 561 __func__, ifp, if_name(ifp)); 562 563 MLD_LOCK(); 564 mli_delete_locked(ifp); 565 MLD_UNLOCK(); 566 } 567 568 static void 569 mli_delete_locked(struct ifnet *ifp) 570 { 571 struct mld_ifsoftc *mli, *tmli; 572 573 CTR3(KTR_MLD, "%s: freeing mld_ifsoftc for ifp %p(%s)", 574 __func__, ifp, if_name(ifp)); 575 576 MLD_LOCK_ASSERT(); 577 578 LIST_FOREACH_SAFE(mli, &V_mli_head, mli_link, tmli) { 579 if (mli->mli_ifp == ifp) { 580 /* 581 * Free deferred General Query responses. 582 */ 583 mbufq_drain(&mli->mli_gq); 584 585 LIST_REMOVE(mli, mli_link); 586 587 free(mli, M_MLD); 588 return; 589 } 590 } 591 } 592 593 /* 594 * Process a received MLDv1 general or address-specific query. 595 * Assumes that the query header has been pulled up to sizeof(mld_hdr). 596 * 597 * NOTE: Can't be fully const correct as we temporarily embed scope ID in 598 * mld_addr. This is OK as we own the mbuf chain. 599 */ 600 static int 601 mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, 602 /*const*/ struct mld_hdr *mld) 603 { 604 struct ifmultiaddr *ifma; 605 struct mld_ifsoftc *mli; 606 struct in6_multi *inm; 607 int is_general_query; 608 uint16_t timer; 609 #ifdef KTR 610 char ip6tbuf[INET6_ADDRSTRLEN]; 611 #endif 612 613 NET_EPOCH_ASSERT(); 614 615 is_general_query = 0; 616 617 if (!mld_v1enable) { 618 CTR3(KTR_MLD, "ignore v1 query %s on ifp %p(%s)", 619 ip6_sprintf(ip6tbuf, &mld->mld_addr), 620 ifp, if_name(ifp)); 621 return (0); 622 } 623 624 /* 625 * RFC3810 Section 6.2: MLD queries must originate from 626 * a router's link-local address. 627 */ 628 if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) { 629 CTR3(KTR_MLD, "ignore v1 query src %s on ifp %p(%s)", 630 ip6_sprintf(ip6tbuf, &ip6->ip6_src), 631 ifp, if_name(ifp)); 632 return (0); 633 } 634 635 /* 636 * Do address field validation upfront before we accept 637 * the query. 638 */ 639 if (IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) { 640 /* 641 * MLDv1 General Query. 642 * If this was not sent to the all-nodes group, ignore it. 643 */ 644 struct in6_addr dst; 645 646 dst = ip6->ip6_dst; 647 in6_clearscope(&dst); 648 if (!IN6_ARE_ADDR_EQUAL(&dst, &in6addr_linklocal_allnodes)) 649 return (EINVAL); 650 is_general_query = 1; 651 } else { 652 /* 653 * Embed scope ID of receiving interface in MLD query for 654 * lookup whilst we don't hold other locks. 655 */ 656 in6_setscope(&mld->mld_addr, ifp, NULL); 657 } 658 659 IN6_MULTI_LIST_LOCK(); 660 MLD_LOCK(); 661 662 /* 663 * Switch to MLDv1 host compatibility mode. 664 */ 665 mli = MLD_IFINFO(ifp); 666 KASSERT(mli != NULL, ("%s: no mld_ifsoftc for ifp %p", __func__, ifp)); 667 mld_set_version(mli, MLD_VERSION_1); 668 669 timer = (ntohs(mld->mld_maxdelay) * MLD_FASTHZ) / MLD_TIMER_SCALE; 670 if (timer == 0) 671 timer = 1; 672 673 if (is_general_query) { 674 /* 675 * For each reporting group joined on this 676 * interface, kick the report timer. 677 */ 678 CTR2(KTR_MLD, "process v1 general query on ifp %p(%s)", 679 ifp, if_name(ifp)); 680 CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 681 inm = in6m_ifmultiaddr_get_inm(ifma); 682 if (inm == NULL) 683 continue; 684 mld_v1_update_group(inm, timer); 685 } 686 } else { 687 /* 688 * MLDv1 Group-Specific Query. 689 * If this is a group-specific MLDv1 query, we need only 690 * look up the single group to process it. 691 */ 692 inm = in6m_lookup_locked(ifp, &mld->mld_addr); 693 if (inm != NULL) { 694 CTR3(KTR_MLD, "process v1 query %s on ifp %p(%s)", 695 ip6_sprintf(ip6tbuf, &mld->mld_addr), 696 ifp, if_name(ifp)); 697 mld_v1_update_group(inm, timer); 698 } 699 /* XXX Clear embedded scope ID as userland won't expect it. */ 700 in6_clearscope(&mld->mld_addr); 701 } 702 703 MLD_UNLOCK(); 704 IN6_MULTI_LIST_UNLOCK(); 705 706 return (0); 707 } 708 709 /* 710 * Update the report timer on a group in response to an MLDv1 query. 711 * 712 * If we are becoming the reporting member for this group, start the timer. 713 * If we already are the reporting member for this group, and timer is 714 * below the threshold, reset it. 715 * 716 * We may be updating the group for the first time since we switched 717 * to MLDv2. If we are, then we must clear any recorded source lists, 718 * and transition to REPORTING state; the group timer is overloaded 719 * for group and group-source query responses. 720 * 721 * Unlike MLDv2, the delay per group should be jittered 722 * to avoid bursts of MLDv1 reports. 723 */ 724 static void 725 mld_v1_update_group(struct in6_multi *inm, const int timer) 726 { 727 #ifdef KTR 728 char ip6tbuf[INET6_ADDRSTRLEN]; 729 #endif 730 731 CTR4(KTR_MLD, "%s: %s/%s timer=%d", __func__, 732 ip6_sprintf(ip6tbuf, &inm->in6m_addr), 733 if_name(inm->in6m_ifp), timer); 734 735 IN6_MULTI_LIST_LOCK_ASSERT(); 736 737 switch (inm->in6m_state) { 738 case MLD_NOT_MEMBER: 739 case MLD_SILENT_MEMBER: 740 break; 741 case MLD_REPORTING_MEMBER: 742 if (inm->in6m_timer != 0 && 743 inm->in6m_timer <= timer) { 744 CTR1(KTR_MLD, "%s: REPORTING and timer running, " 745 "skipping.", __func__); 746 break; 747 } 748 /* FALLTHROUGH */ 749 case MLD_SG_QUERY_PENDING_MEMBER: 750 case MLD_G_QUERY_PENDING_MEMBER: 751 case MLD_IDLE_MEMBER: 752 case MLD_LAZY_MEMBER: 753 case MLD_AWAKENING_MEMBER: 754 CTR1(KTR_MLD, "%s: ->REPORTING", __func__); 755 inm->in6m_state = MLD_REPORTING_MEMBER; 756 inm->in6m_timer = MLD_RANDOM_DELAY(timer); 757 V_current_state_timers_running6 = 1; 758 break; 759 case MLD_SLEEPING_MEMBER: 760 CTR1(KTR_MLD, "%s: ->AWAKENING", __func__); 761 inm->in6m_state = MLD_AWAKENING_MEMBER; 762 break; 763 case MLD_LEAVING_MEMBER: 764 break; 765 } 766 } 767 768 /* 769 * Process a received MLDv2 general, group-specific or 770 * group-and-source-specific query. 771 * 772 * Assumes that mld points to a struct mldv2_query which is stored in 773 * contiguous memory. 774 * 775 * Return 0 if successful, otherwise an appropriate error code is returned. 776 */ 777 static int 778 mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, 779 struct mbuf *m, struct mldv2_query *mld, const int off, const int icmp6len) 780 { 781 struct mld_ifsoftc *mli; 782 struct in6_multi *inm; 783 uint32_t maxdelay, nsrc, qqi; 784 int is_general_query; 785 uint16_t timer; 786 uint8_t qrv; 787 #ifdef KTR 788 char ip6tbuf[INET6_ADDRSTRLEN]; 789 #endif 790 791 NET_EPOCH_ASSERT(); 792 793 if (!mld_v2enable) { 794 CTR3(KTR_MLD, "ignore v2 query src %s on ifp %p(%s)", 795 ip6_sprintf(ip6tbuf, &ip6->ip6_src), 796 ifp, if_name(ifp)); 797 return (0); 798 } 799 800 /* 801 * RFC3810 Section 6.2: MLD queries must originate from 802 * a router's link-local address. 803 */ 804 if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) { 805 CTR3(KTR_MLD, "ignore v1 query src %s on ifp %p(%s)", 806 ip6_sprintf(ip6tbuf, &ip6->ip6_src), 807 ifp, if_name(ifp)); 808 return (0); 809 } 810 811 is_general_query = 0; 812 813 CTR2(KTR_MLD, "input v2 query on ifp %p(%s)", ifp, if_name(ifp)); 814 815 maxdelay = ntohs(mld->mld_maxdelay); /* in 1/10ths of a second */ 816 if (maxdelay >= 32768) { 817 maxdelay = (MLD_MRC_MANT(maxdelay) | 0x1000) << 818 (MLD_MRC_EXP(maxdelay) + 3); 819 } 820 timer = (maxdelay * MLD_FASTHZ) / MLD_TIMER_SCALE; 821 if (timer == 0) 822 timer = 1; 823 824 qrv = MLD_QRV(mld->mld_misc); 825 if (qrv < 2) { 826 CTR3(KTR_MLD, "%s: clamping qrv %d to %d", __func__, 827 qrv, MLD_RV_INIT); 828 qrv = MLD_RV_INIT; 829 } 830 831 qqi = mld->mld_qqi; 832 if (qqi >= 128) { 833 qqi = MLD_QQIC_MANT(mld->mld_qqi) << 834 (MLD_QQIC_EXP(mld->mld_qqi) + 3); 835 } 836 837 nsrc = ntohs(mld->mld_numsrc); 838 if (nsrc > MLD_MAX_GS_SOURCES) 839 return (EMSGSIZE); 840 if (icmp6len < sizeof(struct mldv2_query) + 841 (nsrc * sizeof(struct in6_addr))) 842 return (EMSGSIZE); 843 844 /* 845 * Do further input validation upfront to avoid resetting timers 846 * should we need to discard this query. 847 */ 848 if (IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) { 849 /* 850 * A general query with a source list has undefined 851 * behaviour; discard it. 852 */ 853 if (nsrc > 0) 854 return (EINVAL); 855 is_general_query = 1; 856 } else { 857 /* 858 * Embed scope ID of receiving interface in MLD query for 859 * lookup whilst we don't hold other locks (due to KAME 860 * locking lameness). We own this mbuf chain just now. 861 */ 862 in6_setscope(&mld->mld_addr, ifp, NULL); 863 } 864 865 IN6_MULTI_LIST_LOCK(); 866 MLD_LOCK(); 867 868 mli = MLD_IFINFO(ifp); 869 KASSERT(mli != NULL, ("%s: no mld_ifsoftc for ifp %p", __func__, ifp)); 870 871 /* 872 * Discard the v2 query if we're in Compatibility Mode. 873 * The RFC is pretty clear that hosts need to stay in MLDv1 mode 874 * until the Old Version Querier Present timer expires. 875 */ 876 if (mli->mli_version != MLD_VERSION_2) 877 goto out_locked; 878 879 mld_set_version(mli, MLD_VERSION_2); 880 mli->mli_rv = qrv; 881 mli->mli_qi = qqi; 882 mli->mli_qri = maxdelay; 883 884 CTR4(KTR_MLD, "%s: qrv %d qi %d maxdelay %d", __func__, qrv, qqi, 885 maxdelay); 886 887 if (is_general_query) { 888 /* 889 * MLDv2 General Query. 890 * 891 * Schedule a current-state report on this ifp for 892 * all groups, possibly containing source lists. 893 * 894 * If there is a pending General Query response 895 * scheduled earlier than the selected delay, do 896 * not schedule any other reports. 897 * Otherwise, reset the interface timer. 898 */ 899 CTR2(KTR_MLD, "process v2 general query on ifp %p(%s)", 900 ifp, if_name(ifp)); 901 if (mli->mli_v2_timer == 0 || mli->mli_v2_timer >= timer) { 902 mli->mli_v2_timer = MLD_RANDOM_DELAY(timer); 903 V_interface_timers_running6 = 1; 904 } 905 } else { 906 /* 907 * MLDv2 Group-specific or Group-and-source-specific Query. 908 * 909 * Group-source-specific queries are throttled on 910 * a per-group basis to defeat denial-of-service attempts. 911 * Queries for groups we are not a member of on this 912 * link are simply ignored. 913 */ 914 inm = in6m_lookup_locked(ifp, &mld->mld_addr); 915 if (inm == NULL) 916 goto out_locked; 917 if (nsrc > 0) { 918 if (!ratecheck(&inm->in6m_lastgsrtv, 919 &V_mld_gsrdelay)) { 920 CTR1(KTR_MLD, "%s: GS query throttled.", 921 __func__); 922 goto out_locked; 923 } 924 } 925 CTR2(KTR_MLD, "process v2 group query on ifp %p(%s)", 926 ifp, if_name(ifp)); 927 /* 928 * If there is a pending General Query response 929 * scheduled sooner than the selected delay, no 930 * further report need be scheduled. 931 * Otherwise, prepare to respond to the 932 * group-specific or group-and-source query. 933 */ 934 if (mli->mli_v2_timer == 0 || mli->mli_v2_timer >= timer) 935 mld_v2_process_group_query(inm, mli, timer, m, mld, off); 936 937 /* XXX Clear embedded scope ID as userland won't expect it. */ 938 in6_clearscope(&mld->mld_addr); 939 } 940 941 out_locked: 942 MLD_UNLOCK(); 943 IN6_MULTI_LIST_UNLOCK(); 944 945 return (0); 946 } 947 948 /* 949 * Process a received MLDv2 group-specific or group-and-source-specific 950 * query. 951 * Return <0 if any error occurred. Currently this is ignored. 952 */ 953 static int 954 mld_v2_process_group_query(struct in6_multi *inm, struct mld_ifsoftc *mli, 955 int timer, struct mbuf *m0, struct mldv2_query *mld, const int off) 956 { 957 int retval; 958 uint16_t nsrc; 959 960 IN6_MULTI_LIST_LOCK_ASSERT(); 961 MLD_LOCK_ASSERT(); 962 963 retval = 0; 964 965 switch (inm->in6m_state) { 966 case MLD_NOT_MEMBER: 967 case MLD_SILENT_MEMBER: 968 case MLD_SLEEPING_MEMBER: 969 case MLD_LAZY_MEMBER: 970 case MLD_AWAKENING_MEMBER: 971 case MLD_IDLE_MEMBER: 972 case MLD_LEAVING_MEMBER: 973 return (retval); 974 break; 975 case MLD_REPORTING_MEMBER: 976 case MLD_G_QUERY_PENDING_MEMBER: 977 case MLD_SG_QUERY_PENDING_MEMBER: 978 break; 979 } 980 981 nsrc = ntohs(mld->mld_numsrc); 982 983 /* Length should be checked by calling function. */ 984 KASSERT((m0->m_flags & M_PKTHDR) == 0 || 985 m0->m_pkthdr.len >= off + sizeof(struct mldv2_query) + 986 nsrc * sizeof(struct in6_addr), 987 ("mldv2 packet is too short: (%d bytes < %zd bytes, m=%p)", 988 m0->m_pkthdr.len, off + sizeof(struct mldv2_query) + 989 nsrc * sizeof(struct in6_addr), m0)); 990 991 /* 992 * Deal with group-specific queries upfront. 993 * If any group query is already pending, purge any recorded 994 * source-list state if it exists, and schedule a query response 995 * for this group-specific query. 996 */ 997 if (nsrc == 0) { 998 if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER || 999 inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER) { 1000 in6m_clear_recorded(inm); 1001 timer = min(inm->in6m_timer, timer); 1002 } 1003 inm->in6m_state = MLD_G_QUERY_PENDING_MEMBER; 1004 inm->in6m_timer = MLD_RANDOM_DELAY(timer); 1005 V_current_state_timers_running6 = 1; 1006 return (retval); 1007 } 1008 1009 /* 1010 * Deal with the case where a group-and-source-specific query has 1011 * been received but a group-specific query is already pending. 1012 */ 1013 if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER) { 1014 timer = min(inm->in6m_timer, timer); 1015 inm->in6m_timer = MLD_RANDOM_DELAY(timer); 1016 V_current_state_timers_running6 = 1; 1017 return (retval); 1018 } 1019 1020 /* 1021 * Finally, deal with the case where a group-and-source-specific 1022 * query has been received, where a response to a previous g-s-r 1023 * query exists, or none exists. 1024 * In this case, we need to parse the source-list which the Querier 1025 * has provided us with and check if we have any source list filter 1026 * entries at T1 for these sources. If we do not, there is no need 1027 * schedule a report and the query may be dropped. 1028 * If we do, we must record them and schedule a current-state 1029 * report for those sources. 1030 */ 1031 if (inm->in6m_nsrc > 0) { 1032 struct in6_addr srcaddr; 1033 int i, nrecorded; 1034 int soff; 1035 1036 soff = off + sizeof(struct mldv2_query); 1037 nrecorded = 0; 1038 for (i = 0; i < nsrc; i++) { 1039 m_copydata(m0, soff, sizeof(struct in6_addr), 1040 (caddr_t)&srcaddr); 1041 retval = in6m_record_source(inm, &srcaddr); 1042 if (retval < 0) 1043 break; 1044 nrecorded += retval; 1045 soff += sizeof(struct in6_addr); 1046 } 1047 if (nrecorded > 0) { 1048 CTR1(KTR_MLD, 1049 "%s: schedule response to SG query", __func__); 1050 inm->in6m_state = MLD_SG_QUERY_PENDING_MEMBER; 1051 inm->in6m_timer = MLD_RANDOM_DELAY(timer); 1052 V_current_state_timers_running6 = 1; 1053 } 1054 } 1055 1056 return (retval); 1057 } 1058 1059 /* 1060 * Process a received MLDv1 host membership report. 1061 * Assumes mld points to mld_hdr in pulled up mbuf chain. 1062 * 1063 * NOTE: Can't be fully const correct as we temporarily embed scope ID in 1064 * mld_addr. This is OK as we own the mbuf chain. 1065 */ 1066 static int 1067 mld_v1_input_report(struct ifnet *ifp, const struct ip6_hdr *ip6, 1068 /*const*/ struct mld_hdr *mld) 1069 { 1070 struct in6_addr src, dst; 1071 struct in6_ifaddr *ia; 1072 struct in6_multi *inm; 1073 #ifdef KTR 1074 char ip6tbuf[INET6_ADDRSTRLEN]; 1075 #endif 1076 1077 NET_EPOCH_ASSERT(); 1078 1079 if (!mld_v1enable) { 1080 CTR3(KTR_MLD, "ignore v1 report %s on ifp %p(%s)", 1081 ip6_sprintf(ip6tbuf, &mld->mld_addr), 1082 ifp, if_name(ifp)); 1083 return (0); 1084 } 1085 1086 if (ifp->if_flags & IFF_LOOPBACK) 1087 return (0); 1088 1089 /* 1090 * MLDv1 reports must originate from a host's link-local address, 1091 * or the unspecified address (when booting). 1092 */ 1093 src = ip6->ip6_src; 1094 in6_clearscope(&src); 1095 if (!IN6_IS_SCOPE_LINKLOCAL(&src) && !IN6_IS_ADDR_UNSPECIFIED(&src)) { 1096 CTR3(KTR_MLD, "ignore v1 query src %s on ifp %p(%s)", 1097 ip6_sprintf(ip6tbuf, &ip6->ip6_src), 1098 ifp, if_name(ifp)); 1099 return (EINVAL); 1100 } 1101 1102 /* 1103 * RFC2710 Section 4: MLDv1 reports must pertain to a multicast 1104 * group, and must be directed to the group itself. 1105 */ 1106 dst = ip6->ip6_dst; 1107 in6_clearscope(&dst); 1108 if (!IN6_IS_ADDR_MULTICAST(&mld->mld_addr) || 1109 !IN6_ARE_ADDR_EQUAL(&mld->mld_addr, &dst)) { 1110 CTR3(KTR_MLD, "ignore v1 query dst %s on ifp %p(%s)", 1111 ip6_sprintf(ip6tbuf, &ip6->ip6_dst), 1112 ifp, if_name(ifp)); 1113 return (EINVAL); 1114 } 1115 1116 /* 1117 * Make sure we don't hear our own membership report, as fast 1118 * leave requires knowing that we are the only member of a 1119 * group. Assume we used the link-local address if available, 1120 * otherwise look for ::. 1121 * 1122 * XXX Note that scope ID comparison is needed for the address 1123 * returned by in6ifa_ifpforlinklocal(), but SHOULD NOT be 1124 * performed for the on-wire address. 1125 */ 1126 ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST); 1127 if ((ia && IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, IA6_IN6(ia))) || 1128 (ia == NULL && IN6_IS_ADDR_UNSPECIFIED(&src))) { 1129 if (ia != NULL) 1130 ifa_free(&ia->ia_ifa); 1131 return (0); 1132 } 1133 if (ia != NULL) 1134 ifa_free(&ia->ia_ifa); 1135 1136 CTR3(KTR_MLD, "process v1 report %s on ifp %p(%s)", 1137 ip6_sprintf(ip6tbuf, &mld->mld_addr), ifp, if_name(ifp)); 1138 1139 /* 1140 * Embed scope ID of receiving interface in MLD query for lookup 1141 * whilst we don't hold other locks (due to KAME locking lameness). 1142 */ 1143 if (!IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) 1144 in6_setscope(&mld->mld_addr, ifp, NULL); 1145 1146 IN6_MULTI_LIST_LOCK(); 1147 MLD_LOCK(); 1148 1149 /* 1150 * MLDv1 report suppression. 1151 * If we are a member of this group, and our membership should be 1152 * reported, and our group timer is pending or about to be reset, 1153 * stop our group timer by transitioning to the 'lazy' state. 1154 */ 1155 inm = in6m_lookup_locked(ifp, &mld->mld_addr); 1156 if (inm != NULL) { 1157 struct mld_ifsoftc *mli; 1158 1159 mli = inm->in6m_mli; 1160 KASSERT(mli != NULL, 1161 ("%s: no mli for ifp %p", __func__, ifp)); 1162 1163 /* 1164 * If we are in MLDv2 host mode, do not allow the 1165 * other host's MLDv1 report to suppress our reports. 1166 */ 1167 if (mli->mli_version == MLD_VERSION_2) 1168 goto out_locked; 1169 1170 inm->in6m_timer = 0; 1171 1172 switch (inm->in6m_state) { 1173 case MLD_NOT_MEMBER: 1174 case MLD_SILENT_MEMBER: 1175 case MLD_SLEEPING_MEMBER: 1176 break; 1177 case MLD_REPORTING_MEMBER: 1178 case MLD_IDLE_MEMBER: 1179 case MLD_AWAKENING_MEMBER: 1180 CTR3(KTR_MLD, 1181 "report suppressed for %s on ifp %p(%s)", 1182 ip6_sprintf(ip6tbuf, &mld->mld_addr), 1183 ifp, if_name(ifp)); 1184 case MLD_LAZY_MEMBER: 1185 inm->in6m_state = MLD_LAZY_MEMBER; 1186 break; 1187 case MLD_G_QUERY_PENDING_MEMBER: 1188 case MLD_SG_QUERY_PENDING_MEMBER: 1189 case MLD_LEAVING_MEMBER: 1190 break; 1191 } 1192 } 1193 1194 out_locked: 1195 MLD_UNLOCK(); 1196 IN6_MULTI_LIST_UNLOCK(); 1197 1198 /* XXX Clear embedded scope ID as userland won't expect it. */ 1199 in6_clearscope(&mld->mld_addr); 1200 1201 return (0); 1202 } 1203 1204 /* 1205 * MLD input path. 1206 * 1207 * Assume query messages which fit in a single ICMPv6 message header 1208 * have been pulled up. 1209 * Assume that userland will want to see the message, even if it 1210 * otherwise fails kernel input validation; do not free it. 1211 * Pullup may however free the mbuf chain m if it fails. 1212 * 1213 * Return IPPROTO_DONE if we freed m. Otherwise, return 0. 1214 */ 1215 int 1216 mld_input(struct mbuf **mp, int off, int icmp6len) 1217 { 1218 struct ifnet *ifp; 1219 struct ip6_hdr *ip6; 1220 struct mbuf *m; 1221 struct mld_hdr *mld; 1222 int mldlen; 1223 1224 m = *mp; 1225 CTR3(KTR_MLD, "%s: called w/mbuf (%p,%d)", __func__, m, off); 1226 1227 ifp = m->m_pkthdr.rcvif; 1228 1229 /* Pullup to appropriate size. */ 1230 if (m->m_len < off + sizeof(*mld)) { 1231 m = m_pullup(m, off + sizeof(*mld)); 1232 if (m == NULL) { 1233 ICMP6STAT_INC(icp6s_badlen); 1234 return (IPPROTO_DONE); 1235 } 1236 } 1237 mld = (struct mld_hdr *)(mtod(m, uint8_t *) + off); 1238 if (mld->mld_type == MLD_LISTENER_QUERY && 1239 icmp6len >= sizeof(struct mldv2_query)) { 1240 mldlen = sizeof(struct mldv2_query); 1241 } else { 1242 mldlen = sizeof(struct mld_hdr); 1243 } 1244 if (m->m_len < off + mldlen) { 1245 m = m_pullup(m, off + mldlen); 1246 if (m == NULL) { 1247 ICMP6STAT_INC(icp6s_badlen); 1248 return (IPPROTO_DONE); 1249 } 1250 } 1251 *mp = m; 1252 ip6 = mtod(m, struct ip6_hdr *); 1253 mld = (struct mld_hdr *)(mtod(m, uint8_t *) + off); 1254 1255 /* 1256 * Userland needs to see all of this traffic for implementing 1257 * the endpoint discovery portion of multicast routing. 1258 */ 1259 switch (mld->mld_type) { 1260 case MLD_LISTENER_QUERY: 1261 icmp6_ifstat_inc(ifp, ifs6_in_mldquery); 1262 if (icmp6len == sizeof(struct mld_hdr)) { 1263 if (mld_v1_input_query(ifp, ip6, mld) != 0) 1264 return (0); 1265 } else if (icmp6len >= sizeof(struct mldv2_query)) { 1266 if (mld_v2_input_query(ifp, ip6, m, 1267 (struct mldv2_query *)mld, off, icmp6len) != 0) 1268 return (0); 1269 } 1270 break; 1271 case MLD_LISTENER_REPORT: 1272 icmp6_ifstat_inc(ifp, ifs6_in_mldreport); 1273 if (mld_v1_input_report(ifp, ip6, mld) != 0) 1274 return (0); 1275 break; 1276 case MLDV2_LISTENER_REPORT: 1277 icmp6_ifstat_inc(ifp, ifs6_in_mldreport); 1278 break; 1279 case MLD_LISTENER_DONE: 1280 icmp6_ifstat_inc(ifp, ifs6_in_mlddone); 1281 break; 1282 default: 1283 break; 1284 } 1285 1286 return (0); 1287 } 1288 1289 /* 1290 * Fast timeout handler (global). 1291 * VIMAGE: Timeout handlers are expected to service all vimages. 1292 */ 1293 static struct callout mldfast_callout; 1294 static void 1295 mld_fasttimo(void *arg __unused) 1296 { 1297 struct epoch_tracker et; 1298 struct in6_multi_head inmh; 1299 VNET_ITERATOR_DECL(vnet_iter); 1300 1301 SLIST_INIT(&inmh); 1302 1303 NET_EPOCH_ENTER(et); 1304 VNET_LIST_RLOCK_NOSLEEP(); 1305 VNET_FOREACH(vnet_iter) { 1306 CURVNET_SET(vnet_iter); 1307 mld_fasttimo_vnet(&inmh); 1308 CURVNET_RESTORE(); 1309 } 1310 VNET_LIST_RUNLOCK_NOSLEEP(); 1311 NET_EPOCH_EXIT(et); 1312 in6m_release_list_deferred(&inmh); 1313 1314 callout_reset(&mldfast_callout, hz / MLD_FASTHZ, mld_fasttimo, NULL); 1315 } 1316 1317 /* 1318 * Fast timeout handler (per-vnet). 1319 * 1320 * VIMAGE: Assume caller has set up our curvnet. 1321 */ 1322 static void 1323 mld_fasttimo_vnet(struct in6_multi_head *inmh) 1324 { 1325 struct mbufq scq; /* State-change packets */ 1326 struct mbufq qrq; /* Query response packets */ 1327 struct ifnet *ifp; 1328 struct mld_ifsoftc *mli; 1329 struct ifmultiaddr *ifma; 1330 struct in6_multi *inm; 1331 int uri_fasthz; 1332 1333 uri_fasthz = 0; 1334 1335 /* 1336 * Quick check to see if any work needs to be done, in order to 1337 * minimize the overhead of fasttimo processing. 1338 * SMPng: XXX Unlocked reads. 1339 */ 1340 if (!V_current_state_timers_running6 && 1341 !V_interface_timers_running6 && 1342 !V_state_change_timers_running6) 1343 return; 1344 1345 IN6_MULTI_LIST_LOCK(); 1346 MLD_LOCK(); 1347 1348 /* 1349 * MLDv2 General Query response timer processing. 1350 */ 1351 if (V_interface_timers_running6) { 1352 CTR1(KTR_MLD, "%s: interface timers running", __func__); 1353 1354 V_interface_timers_running6 = 0; 1355 LIST_FOREACH(mli, &V_mli_head, mli_link) { 1356 if (mli->mli_v2_timer == 0) { 1357 /* Do nothing. */ 1358 } else if (--mli->mli_v2_timer == 0) { 1359 mld_v2_dispatch_general_query(mli); 1360 } else { 1361 V_interface_timers_running6 = 1; 1362 } 1363 } 1364 } 1365 1366 if (!V_current_state_timers_running6 && 1367 !V_state_change_timers_running6) 1368 goto out_locked; 1369 1370 V_current_state_timers_running6 = 0; 1371 V_state_change_timers_running6 = 0; 1372 1373 CTR1(KTR_MLD, "%s: state change timers running", __func__); 1374 1375 /* 1376 * MLD host report and state-change timer processing. 1377 * Note: Processing a v2 group timer may remove a node. 1378 */ 1379 LIST_FOREACH(mli, &V_mli_head, mli_link) { 1380 ifp = mli->mli_ifp; 1381 1382 if (mli->mli_version == MLD_VERSION_2) { 1383 uri_fasthz = MLD_RANDOM_DELAY(mli->mli_uri * 1384 MLD_FASTHZ); 1385 mbufq_init(&qrq, MLD_MAX_G_GS_PACKETS); 1386 mbufq_init(&scq, MLD_MAX_STATE_CHANGE_PACKETS); 1387 } 1388 1389 IF_ADDR_WLOCK(ifp); 1390 CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1391 inm = in6m_ifmultiaddr_get_inm(ifma); 1392 if (inm == NULL) 1393 continue; 1394 switch (mli->mli_version) { 1395 case MLD_VERSION_1: 1396 mld_v1_process_group_timer(inmh, inm); 1397 break; 1398 case MLD_VERSION_2: 1399 mld_v2_process_group_timers(inmh, &qrq, 1400 &scq, inm, uri_fasthz); 1401 break; 1402 } 1403 } 1404 IF_ADDR_WUNLOCK(ifp); 1405 1406 switch (mli->mli_version) { 1407 case MLD_VERSION_1: 1408 /* 1409 * Transmit reports for this lifecycle. This 1410 * is done while not holding IF_ADDR_LOCK 1411 * since this can call 1412 * in6ifa_ifpforlinklocal() which locks 1413 * IF_ADDR_LOCK internally as well as 1414 * ip6_output() to transmit a packet. 1415 */ 1416 while ((inm = SLIST_FIRST(inmh)) != NULL) { 1417 SLIST_REMOVE_HEAD(inmh, in6m_defer); 1418 (void)mld_v1_transmit_report(inm, 1419 MLD_LISTENER_REPORT); 1420 } 1421 break; 1422 case MLD_VERSION_2: 1423 mld_dispatch_queue(&qrq, 0); 1424 mld_dispatch_queue(&scq, 0); 1425 break; 1426 } 1427 } 1428 1429 out_locked: 1430 MLD_UNLOCK(); 1431 IN6_MULTI_LIST_UNLOCK(); 1432 } 1433 1434 /* 1435 * Update host report group timer. 1436 * Will update the global pending timer flags. 1437 */ 1438 static void 1439 mld_v1_process_group_timer(struct in6_multi_head *inmh, struct in6_multi *inm) 1440 { 1441 int report_timer_expired; 1442 1443 IN6_MULTI_LIST_LOCK_ASSERT(); 1444 MLD_LOCK_ASSERT(); 1445 1446 if (inm->in6m_timer == 0) { 1447 report_timer_expired = 0; 1448 } else if (--inm->in6m_timer == 0) { 1449 report_timer_expired = 1; 1450 } else { 1451 V_current_state_timers_running6 = 1; 1452 return; 1453 } 1454 1455 switch (inm->in6m_state) { 1456 case MLD_NOT_MEMBER: 1457 case MLD_SILENT_MEMBER: 1458 case MLD_IDLE_MEMBER: 1459 case MLD_LAZY_MEMBER: 1460 case MLD_SLEEPING_MEMBER: 1461 case MLD_AWAKENING_MEMBER: 1462 break; 1463 case MLD_REPORTING_MEMBER: 1464 if (report_timer_expired) { 1465 inm->in6m_state = MLD_IDLE_MEMBER; 1466 SLIST_INSERT_HEAD(inmh, inm, in6m_defer); 1467 } 1468 break; 1469 case MLD_G_QUERY_PENDING_MEMBER: 1470 case MLD_SG_QUERY_PENDING_MEMBER: 1471 case MLD_LEAVING_MEMBER: 1472 break; 1473 } 1474 } 1475 1476 /* 1477 * Update a group's timers for MLDv2. 1478 * Will update the global pending timer flags. 1479 * Note: Unlocked read from mli. 1480 */ 1481 static void 1482 mld_v2_process_group_timers(struct in6_multi_head *inmh, 1483 struct mbufq *qrq, struct mbufq *scq, 1484 struct in6_multi *inm, const int uri_fasthz) 1485 { 1486 int query_response_timer_expired; 1487 int state_change_retransmit_timer_expired; 1488 #ifdef KTR 1489 char ip6tbuf[INET6_ADDRSTRLEN]; 1490 #endif 1491 1492 IN6_MULTI_LIST_LOCK_ASSERT(); 1493 MLD_LOCK_ASSERT(); 1494 1495 query_response_timer_expired = 0; 1496 state_change_retransmit_timer_expired = 0; 1497 1498 /* 1499 * During a transition from compatibility mode back to MLDv2, 1500 * a group record in REPORTING state may still have its group 1501 * timer active. This is a no-op in this function; it is easier 1502 * to deal with it here than to complicate the slow-timeout path. 1503 */ 1504 if (inm->in6m_timer == 0) { 1505 query_response_timer_expired = 0; 1506 } else if (--inm->in6m_timer == 0) { 1507 query_response_timer_expired = 1; 1508 } else { 1509 V_current_state_timers_running6 = 1; 1510 } 1511 1512 if (inm->in6m_sctimer == 0) { 1513 state_change_retransmit_timer_expired = 0; 1514 } else if (--inm->in6m_sctimer == 0) { 1515 state_change_retransmit_timer_expired = 1; 1516 } else { 1517 V_state_change_timers_running6 = 1; 1518 } 1519 1520 /* We are in fasttimo, so be quick about it. */ 1521 if (!state_change_retransmit_timer_expired && 1522 !query_response_timer_expired) 1523 return; 1524 1525 switch (inm->in6m_state) { 1526 case MLD_NOT_MEMBER: 1527 case MLD_SILENT_MEMBER: 1528 case MLD_SLEEPING_MEMBER: 1529 case MLD_LAZY_MEMBER: 1530 case MLD_AWAKENING_MEMBER: 1531 case MLD_IDLE_MEMBER: 1532 break; 1533 case MLD_G_QUERY_PENDING_MEMBER: 1534 case MLD_SG_QUERY_PENDING_MEMBER: 1535 /* 1536 * Respond to a previously pending Group-Specific 1537 * or Group-and-Source-Specific query by enqueueing 1538 * the appropriate Current-State report for 1539 * immediate transmission. 1540 */ 1541 if (query_response_timer_expired) { 1542 int retval __unused; 1543 1544 retval = mld_v2_enqueue_group_record(qrq, inm, 0, 1, 1545 (inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER), 1546 0); 1547 CTR2(KTR_MLD, "%s: enqueue record = %d", 1548 __func__, retval); 1549 inm->in6m_state = MLD_REPORTING_MEMBER; 1550 in6m_clear_recorded(inm); 1551 } 1552 /* FALLTHROUGH */ 1553 case MLD_REPORTING_MEMBER: 1554 case MLD_LEAVING_MEMBER: 1555 if (state_change_retransmit_timer_expired) { 1556 /* 1557 * State-change retransmission timer fired. 1558 * If there are any further pending retransmissions, 1559 * set the global pending state-change flag, and 1560 * reset the timer. 1561 */ 1562 if (--inm->in6m_scrv > 0) { 1563 inm->in6m_sctimer = uri_fasthz; 1564 V_state_change_timers_running6 = 1; 1565 } 1566 /* 1567 * Retransmit the previously computed state-change 1568 * report. If there are no further pending 1569 * retransmissions, the mbuf queue will be consumed. 1570 * Update T0 state to T1 as we have now sent 1571 * a state-change. 1572 */ 1573 (void)mld_v2_merge_state_changes(inm, scq); 1574 1575 in6m_commit(inm); 1576 CTR3(KTR_MLD, "%s: T1 -> T0 for %s/%s", __func__, 1577 ip6_sprintf(ip6tbuf, &inm->in6m_addr), 1578 if_name(inm->in6m_ifp)); 1579 1580 /* 1581 * If we are leaving the group for good, make sure 1582 * we release MLD's reference to it. 1583 * This release must be deferred using a SLIST, 1584 * as we are called from a loop which traverses 1585 * the in_ifmultiaddr TAILQ. 1586 */ 1587 if (inm->in6m_state == MLD_LEAVING_MEMBER && 1588 inm->in6m_scrv == 0) { 1589 inm->in6m_state = MLD_NOT_MEMBER; 1590 in6m_disconnect_locked(inmh, inm); 1591 in6m_rele_locked(inmh, inm); 1592 } 1593 } 1594 break; 1595 } 1596 } 1597 1598 /* 1599 * Switch to a different version on the given interface, 1600 * as per Section 9.12. 1601 */ 1602 static void 1603 mld_set_version(struct mld_ifsoftc *mli, const int version) 1604 { 1605 int old_version_timer; 1606 1607 MLD_LOCK_ASSERT(); 1608 1609 CTR4(KTR_MLD, "%s: switching to v%d on ifp %p(%s)", __func__, 1610 version, mli->mli_ifp, if_name(mli->mli_ifp)); 1611 1612 if (version == MLD_VERSION_1) { 1613 /* 1614 * Compute the "Older Version Querier Present" timer as per 1615 * Section 9.12. 1616 */ 1617 old_version_timer = (mli->mli_rv * mli->mli_qi) + mli->mli_qri; 1618 old_version_timer *= MLD_SLOWHZ; 1619 mli->mli_v1_timer = old_version_timer; 1620 } 1621 1622 if (mli->mli_v1_timer > 0 && mli->mli_version != MLD_VERSION_1) { 1623 mli->mli_version = MLD_VERSION_1; 1624 mld_v2_cancel_link_timers(mli); 1625 } 1626 } 1627 1628 /* 1629 * Cancel pending MLDv2 timers for the given link and all groups 1630 * joined on it; state-change, general-query, and group-query timers. 1631 */ 1632 static void 1633 mld_v2_cancel_link_timers(struct mld_ifsoftc *mli) 1634 { 1635 struct epoch_tracker et; 1636 struct in6_multi_head inmh; 1637 struct ifmultiaddr *ifma; 1638 struct ifnet *ifp; 1639 struct in6_multi *inm; 1640 1641 CTR3(KTR_MLD, "%s: cancel v2 timers on ifp %p(%s)", __func__, 1642 mli->mli_ifp, if_name(mli->mli_ifp)); 1643 1644 SLIST_INIT(&inmh); 1645 IN6_MULTI_LIST_LOCK_ASSERT(); 1646 MLD_LOCK_ASSERT(); 1647 1648 /* 1649 * Fast-track this potentially expensive operation 1650 * by checking all the global 'timer pending' flags. 1651 */ 1652 if (!V_interface_timers_running6 && 1653 !V_state_change_timers_running6 && 1654 !V_current_state_timers_running6) 1655 return; 1656 1657 mli->mli_v2_timer = 0; 1658 1659 ifp = mli->mli_ifp; 1660 1661 IF_ADDR_WLOCK(ifp); 1662 NET_EPOCH_ENTER(et); 1663 CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1664 inm = in6m_ifmultiaddr_get_inm(ifma); 1665 if (inm == NULL) 1666 continue; 1667 switch (inm->in6m_state) { 1668 case MLD_NOT_MEMBER: 1669 case MLD_SILENT_MEMBER: 1670 case MLD_IDLE_MEMBER: 1671 case MLD_LAZY_MEMBER: 1672 case MLD_SLEEPING_MEMBER: 1673 case MLD_AWAKENING_MEMBER: 1674 break; 1675 case MLD_LEAVING_MEMBER: 1676 /* 1677 * If we are leaving the group and switching 1678 * version, we need to release the final 1679 * reference held for issuing the INCLUDE {}. 1680 */ 1681 if (inm->in6m_refcount == 1) 1682 in6m_disconnect_locked(&inmh, inm); 1683 in6m_rele_locked(&inmh, inm); 1684 /* FALLTHROUGH */ 1685 case MLD_G_QUERY_PENDING_MEMBER: 1686 case MLD_SG_QUERY_PENDING_MEMBER: 1687 in6m_clear_recorded(inm); 1688 /* FALLTHROUGH */ 1689 case MLD_REPORTING_MEMBER: 1690 inm->in6m_sctimer = 0; 1691 inm->in6m_timer = 0; 1692 inm->in6m_state = MLD_REPORTING_MEMBER; 1693 /* 1694 * Free any pending MLDv2 state-change records. 1695 */ 1696 mbufq_drain(&inm->in6m_scq); 1697 break; 1698 } 1699 } 1700 NET_EPOCH_EXIT(et); 1701 IF_ADDR_WUNLOCK(ifp); 1702 in6m_release_list_deferred(&inmh); 1703 } 1704 1705 /* 1706 * Global slowtimo handler. 1707 * VIMAGE: Timeout handlers are expected to service all vimages. 1708 */ 1709 static struct callout mldslow_callout; 1710 static void 1711 mld_slowtimo(void *arg __unused) 1712 { 1713 VNET_ITERATOR_DECL(vnet_iter); 1714 1715 VNET_LIST_RLOCK_NOSLEEP(); 1716 VNET_FOREACH(vnet_iter) { 1717 CURVNET_SET(vnet_iter); 1718 mld_slowtimo_vnet(); 1719 CURVNET_RESTORE(); 1720 } 1721 VNET_LIST_RUNLOCK_NOSLEEP(); 1722 1723 callout_reset(&mldslow_callout, hz / MLD_SLOWHZ, mld_slowtimo, NULL); 1724 } 1725 1726 /* 1727 * Per-vnet slowtimo handler. 1728 */ 1729 static void 1730 mld_slowtimo_vnet(void) 1731 { 1732 struct mld_ifsoftc *mli; 1733 1734 MLD_LOCK(); 1735 1736 LIST_FOREACH(mli, &V_mli_head, mli_link) { 1737 mld_v1_process_querier_timers(mli); 1738 } 1739 1740 MLD_UNLOCK(); 1741 } 1742 1743 /* 1744 * Update the Older Version Querier Present timers for a link. 1745 * See Section 9.12 of RFC 3810. 1746 */ 1747 static void 1748 mld_v1_process_querier_timers(struct mld_ifsoftc *mli) 1749 { 1750 1751 MLD_LOCK_ASSERT(); 1752 1753 if (mli->mli_version != MLD_VERSION_2 && --mli->mli_v1_timer == 0) { 1754 /* 1755 * MLDv1 Querier Present timer expired; revert to MLDv2. 1756 */ 1757 CTR5(KTR_MLD, 1758 "%s: transition from v%d -> v%d on %p(%s)", 1759 __func__, mli->mli_version, MLD_VERSION_2, 1760 mli->mli_ifp, if_name(mli->mli_ifp)); 1761 mli->mli_version = MLD_VERSION_2; 1762 } 1763 } 1764 1765 /* 1766 * Transmit an MLDv1 report immediately. 1767 */ 1768 static int 1769 mld_v1_transmit_report(struct in6_multi *in6m, const int type) 1770 { 1771 struct ifnet *ifp; 1772 struct in6_ifaddr *ia; 1773 struct ip6_hdr *ip6; 1774 struct mbuf *mh, *md; 1775 struct mld_hdr *mld; 1776 1777 NET_EPOCH_ASSERT(); 1778 IN6_MULTI_LIST_LOCK_ASSERT(); 1779 MLD_LOCK_ASSERT(); 1780 1781 ifp = in6m->in6m_ifp; 1782 /* in process of being freed */ 1783 if (ifp == NULL) 1784 return (0); 1785 ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST); 1786 /* ia may be NULL if link-local address is tentative. */ 1787 1788 mh = m_gethdr(M_NOWAIT, MT_DATA); 1789 if (mh == NULL) { 1790 if (ia != NULL) 1791 ifa_free(&ia->ia_ifa); 1792 return (ENOMEM); 1793 } 1794 md = m_get(M_NOWAIT, MT_DATA); 1795 if (md == NULL) { 1796 m_free(mh); 1797 if (ia != NULL) 1798 ifa_free(&ia->ia_ifa); 1799 return (ENOMEM); 1800 } 1801 mh->m_next = md; 1802 1803 /* 1804 * FUTURE: Consider increasing alignment by ETHER_HDR_LEN, so 1805 * that ether_output() does not need to allocate another mbuf 1806 * for the header in the most common case. 1807 */ 1808 M_ALIGN(mh, sizeof(struct ip6_hdr)); 1809 mh->m_pkthdr.len = sizeof(struct ip6_hdr) + sizeof(struct mld_hdr); 1810 mh->m_len = sizeof(struct ip6_hdr); 1811 1812 ip6 = mtod(mh, struct ip6_hdr *); 1813 ip6->ip6_flow = 0; 1814 ip6->ip6_vfc &= ~IPV6_VERSION_MASK; 1815 ip6->ip6_vfc |= IPV6_VERSION; 1816 ip6->ip6_nxt = IPPROTO_ICMPV6; 1817 ip6->ip6_src = ia ? ia->ia_addr.sin6_addr : in6addr_any; 1818 ip6->ip6_dst = in6m->in6m_addr; 1819 1820 md->m_len = sizeof(struct mld_hdr); 1821 mld = mtod(md, struct mld_hdr *); 1822 mld->mld_type = type; 1823 mld->mld_code = 0; 1824 mld->mld_cksum = 0; 1825 mld->mld_maxdelay = 0; 1826 mld->mld_reserved = 0; 1827 mld->mld_addr = in6m->in6m_addr; 1828 in6_clearscope(&mld->mld_addr); 1829 mld->mld_cksum = in6_cksum(mh, IPPROTO_ICMPV6, 1830 sizeof(struct ip6_hdr), sizeof(struct mld_hdr)); 1831 1832 mld_save_context(mh, ifp); 1833 mh->m_flags |= M_MLDV1; 1834 1835 mld_dispatch_packet(mh); 1836 1837 if (ia != NULL) 1838 ifa_free(&ia->ia_ifa); 1839 return (0); 1840 } 1841 1842 /* 1843 * Process a state change from the upper layer for the given IPv6 group. 1844 * 1845 * Each socket holds a reference on the in_multi in its own ip_moptions. 1846 * The socket layer will have made the necessary updates to.the group 1847 * state, it is now up to MLD to issue a state change report if there 1848 * has been any change between T0 (when the last state-change was issued) 1849 * and T1 (now). 1850 * 1851 * We use the MLDv2 state machine at group level. The MLd module 1852 * however makes the decision as to which MLD protocol version to speak. 1853 * A state change *from* INCLUDE {} always means an initial join. 1854 * A state change *to* INCLUDE {} always means a final leave. 1855 * 1856 * If delay is non-zero, and the state change is an initial multicast 1857 * join, the state change report will be delayed by 'delay' ticks 1858 * in units of MLD_FASTHZ if MLDv1 is active on the link; otherwise 1859 * the initial MLDv2 state change report will be delayed by whichever 1860 * is sooner, a pending state-change timer or delay itself. 1861 * 1862 * VIMAGE: curvnet should have been set by caller, as this routine 1863 * is called from the socket option handlers. 1864 */ 1865 int 1866 mld_change_state(struct in6_multi *inm, const int delay) 1867 { 1868 struct mld_ifsoftc *mli; 1869 struct ifnet *ifp; 1870 int error; 1871 1872 IN6_MULTI_LIST_LOCK_ASSERT(); 1873 1874 error = 0; 1875 1876 /* 1877 * Check if the in6_multi has already been disconnected. 1878 */ 1879 if (inm->in6m_ifp == NULL) { 1880 CTR1(KTR_MLD, "%s: inm is disconnected", __func__); 1881 return (0); 1882 } 1883 1884 /* 1885 * Try to detect if the upper layer just asked us to change state 1886 * for an interface which has now gone away. 1887 */ 1888 KASSERT(inm->in6m_ifma != NULL, ("%s: no ifma", __func__)); 1889 ifp = inm->in6m_ifma->ifma_ifp; 1890 if (ifp == NULL) 1891 return (0); 1892 /* 1893 * Sanity check that netinet6's notion of ifp is the 1894 * same as net's. 1895 */ 1896 KASSERT(inm->in6m_ifp == ifp, ("%s: bad ifp", __func__)); 1897 1898 MLD_LOCK(); 1899 mli = MLD_IFINFO(ifp); 1900 KASSERT(mli != NULL, ("%s: no mld_ifsoftc for ifp %p", __func__, ifp)); 1901 1902 /* 1903 * If we detect a state transition to or from MCAST_UNDEFINED 1904 * for this group, then we are starting or finishing an MLD 1905 * life cycle for this group. 1906 */ 1907 if (inm->in6m_st[1].iss_fmode != inm->in6m_st[0].iss_fmode) { 1908 CTR3(KTR_MLD, "%s: inm transition %d -> %d", __func__, 1909 inm->in6m_st[0].iss_fmode, inm->in6m_st[1].iss_fmode); 1910 if (inm->in6m_st[0].iss_fmode == MCAST_UNDEFINED) { 1911 CTR1(KTR_MLD, "%s: initial join", __func__); 1912 error = mld_initial_join(inm, mli, delay); 1913 goto out_locked; 1914 } else if (inm->in6m_st[1].iss_fmode == MCAST_UNDEFINED) { 1915 CTR1(KTR_MLD, "%s: final leave", __func__); 1916 mld_final_leave(inm, mli); 1917 goto out_locked; 1918 } 1919 } else { 1920 CTR1(KTR_MLD, "%s: filter set change", __func__); 1921 } 1922 1923 error = mld_handle_state_change(inm, mli); 1924 1925 out_locked: 1926 MLD_UNLOCK(); 1927 return (error); 1928 } 1929 1930 /* 1931 * Perform the initial join for an MLD group. 1932 * 1933 * When joining a group: 1934 * If the group should have its MLD traffic suppressed, do nothing. 1935 * MLDv1 starts sending MLDv1 host membership reports. 1936 * MLDv2 will schedule an MLDv2 state-change report containing the 1937 * initial state of the membership. 1938 * 1939 * If the delay argument is non-zero, then we must delay sending the 1940 * initial state change for delay ticks (in units of MLD_FASTHZ). 1941 */ 1942 static int 1943 mld_initial_join(struct in6_multi *inm, struct mld_ifsoftc *mli, 1944 const int delay) 1945 { 1946 struct epoch_tracker et; 1947 struct ifnet *ifp; 1948 struct mbufq *mq; 1949 int error, retval, syncstates; 1950 int odelay; 1951 #ifdef KTR 1952 char ip6tbuf[INET6_ADDRSTRLEN]; 1953 #endif 1954 1955 CTR4(KTR_MLD, "%s: initial join %s on ifp %p(%s)", 1956 __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr), 1957 inm->in6m_ifp, if_name(inm->in6m_ifp)); 1958 1959 error = 0; 1960 syncstates = 1; 1961 1962 ifp = inm->in6m_ifp; 1963 1964 IN6_MULTI_LIST_LOCK_ASSERT(); 1965 MLD_LOCK_ASSERT(); 1966 1967 KASSERT(mli && mli->mli_ifp == ifp, ("%s: inconsistent ifp", __func__)); 1968 1969 /* 1970 * Groups joined on loopback or marked as 'not reported', 1971 * enter the MLD_SILENT_MEMBER state and 1972 * are never reported in any protocol exchanges. 1973 * All other groups enter the appropriate state machine 1974 * for the version in use on this link. 1975 * A link marked as MLIF_SILENT causes MLD to be completely 1976 * disabled for the link. 1977 */ 1978 if ((ifp->if_flags & IFF_LOOPBACK) || 1979 (mli->mli_flags & MLIF_SILENT) || 1980 !mld_is_addr_reported(&inm->in6m_addr)) { 1981 CTR1(KTR_MLD, 1982 "%s: not kicking state machine for silent group", __func__); 1983 inm->in6m_state = MLD_SILENT_MEMBER; 1984 inm->in6m_timer = 0; 1985 } else { 1986 /* 1987 * Deal with overlapping in_multi lifecycle. 1988 * If this group was LEAVING, then make sure 1989 * we drop the reference we picked up to keep the 1990 * group around for the final INCLUDE {} enqueue. 1991 */ 1992 if (mli->mli_version == MLD_VERSION_2 && 1993 inm->in6m_state == MLD_LEAVING_MEMBER) { 1994 inm->in6m_refcount--; 1995 MPASS(inm->in6m_refcount > 0); 1996 } 1997 inm->in6m_state = MLD_REPORTING_MEMBER; 1998 1999 switch (mli->mli_version) { 2000 case MLD_VERSION_1: 2001 /* 2002 * If a delay was provided, only use it if 2003 * it is greater than the delay normally 2004 * used for an MLDv1 state change report, 2005 * and delay sending the initial MLDv1 report 2006 * by not transitioning to the IDLE state. 2007 */ 2008 odelay = MLD_RANDOM_DELAY(MLD_V1_MAX_RI * MLD_FASTHZ); 2009 if (delay) { 2010 inm->in6m_timer = max(delay, odelay); 2011 V_current_state_timers_running6 = 1; 2012 } else { 2013 inm->in6m_state = MLD_IDLE_MEMBER; 2014 NET_EPOCH_ENTER(et); 2015 error = mld_v1_transmit_report(inm, 2016 MLD_LISTENER_REPORT); 2017 NET_EPOCH_EXIT(et); 2018 if (error == 0) { 2019 inm->in6m_timer = odelay; 2020 V_current_state_timers_running6 = 1; 2021 } 2022 } 2023 break; 2024 2025 case MLD_VERSION_2: 2026 /* 2027 * Defer update of T0 to T1, until the first copy 2028 * of the state change has been transmitted. 2029 */ 2030 syncstates = 0; 2031 2032 /* 2033 * Immediately enqueue a State-Change Report for 2034 * this interface, freeing any previous reports. 2035 * Don't kick the timers if there is nothing to do, 2036 * or if an error occurred. 2037 */ 2038 mq = &inm->in6m_scq; 2039 mbufq_drain(mq); 2040 retval = mld_v2_enqueue_group_record(mq, inm, 1, 2041 0, 0, (mli->mli_flags & MLIF_USEALLOW)); 2042 CTR2(KTR_MLD, "%s: enqueue record = %d", 2043 __func__, retval); 2044 if (retval <= 0) { 2045 error = retval * -1; 2046 break; 2047 } 2048 2049 /* 2050 * Schedule transmission of pending state-change 2051 * report up to RV times for this link. The timer 2052 * will fire at the next mld_fasttimo (~200ms), 2053 * giving us an opportunity to merge the reports. 2054 * 2055 * If a delay was provided to this function, only 2056 * use this delay if sooner than the existing one. 2057 */ 2058 KASSERT(mli->mli_rv > 1, 2059 ("%s: invalid robustness %d", __func__, 2060 mli->mli_rv)); 2061 inm->in6m_scrv = mli->mli_rv; 2062 if (delay) { 2063 if (inm->in6m_sctimer > 1) { 2064 inm->in6m_sctimer = 2065 min(inm->in6m_sctimer, delay); 2066 } else 2067 inm->in6m_sctimer = delay; 2068 } else 2069 inm->in6m_sctimer = 1; 2070 V_state_change_timers_running6 = 1; 2071 2072 error = 0; 2073 break; 2074 } 2075 } 2076 2077 /* 2078 * Only update the T0 state if state change is atomic, 2079 * i.e. we don't need to wait for a timer to fire before we 2080 * can consider the state change to have been communicated. 2081 */ 2082 if (syncstates) { 2083 in6m_commit(inm); 2084 CTR3(KTR_MLD, "%s: T1 -> T0 for %s/%s", __func__, 2085 ip6_sprintf(ip6tbuf, &inm->in6m_addr), 2086 if_name(inm->in6m_ifp)); 2087 } 2088 2089 return (error); 2090 } 2091 2092 /* 2093 * Issue an intermediate state change during the life-cycle. 2094 */ 2095 static int 2096 mld_handle_state_change(struct in6_multi *inm, struct mld_ifsoftc *mli) 2097 { 2098 struct ifnet *ifp; 2099 int retval; 2100 #ifdef KTR 2101 char ip6tbuf[INET6_ADDRSTRLEN]; 2102 #endif 2103 2104 CTR4(KTR_MLD, "%s: state change for %s on ifp %p(%s)", 2105 __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr), 2106 inm->in6m_ifp, if_name(inm->in6m_ifp)); 2107 2108 ifp = inm->in6m_ifp; 2109 2110 IN6_MULTI_LIST_LOCK_ASSERT(); 2111 MLD_LOCK_ASSERT(); 2112 2113 KASSERT(mli && mli->mli_ifp == ifp, 2114 ("%s: inconsistent ifp", __func__)); 2115 2116 if ((ifp->if_flags & IFF_LOOPBACK) || 2117 (mli->mli_flags & MLIF_SILENT) || 2118 !mld_is_addr_reported(&inm->in6m_addr) || 2119 (mli->mli_version != MLD_VERSION_2)) { 2120 if (!mld_is_addr_reported(&inm->in6m_addr)) { 2121 CTR1(KTR_MLD, 2122 "%s: not kicking state machine for silent group", __func__); 2123 } 2124 CTR1(KTR_MLD, "%s: nothing to do", __func__); 2125 in6m_commit(inm); 2126 CTR3(KTR_MLD, "%s: T1 -> T0 for %s/%s", __func__, 2127 ip6_sprintf(ip6tbuf, &inm->in6m_addr), 2128 if_name(inm->in6m_ifp)); 2129 return (0); 2130 } 2131 2132 mbufq_drain(&inm->in6m_scq); 2133 2134 retval = mld_v2_enqueue_group_record(&inm->in6m_scq, inm, 1, 0, 0, 2135 (mli->mli_flags & MLIF_USEALLOW)); 2136 CTR2(KTR_MLD, "%s: enqueue record = %d", __func__, retval); 2137 if (retval <= 0) 2138 return (-retval); 2139 2140 /* 2141 * If record(s) were enqueued, start the state-change 2142 * report timer for this group. 2143 */ 2144 inm->in6m_scrv = mli->mli_rv; 2145 inm->in6m_sctimer = 1; 2146 V_state_change_timers_running6 = 1; 2147 2148 return (0); 2149 } 2150 2151 /* 2152 * Perform the final leave for a multicast address. 2153 * 2154 * When leaving a group: 2155 * MLDv1 sends a DONE message, if and only if we are the reporter. 2156 * MLDv2 enqueues a state-change report containing a transition 2157 * to INCLUDE {} for immediate transmission. 2158 */ 2159 static void 2160 mld_final_leave(struct in6_multi *inm, struct mld_ifsoftc *mli) 2161 { 2162 struct epoch_tracker et; 2163 #ifdef KTR 2164 char ip6tbuf[INET6_ADDRSTRLEN]; 2165 #endif 2166 2167 CTR4(KTR_MLD, "%s: final leave %s on ifp %p(%s)", 2168 __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr), 2169 inm->in6m_ifp, if_name(inm->in6m_ifp)); 2170 2171 IN6_MULTI_LIST_LOCK_ASSERT(); 2172 MLD_LOCK_ASSERT(); 2173 2174 switch (inm->in6m_state) { 2175 case MLD_NOT_MEMBER: 2176 case MLD_SILENT_MEMBER: 2177 case MLD_LEAVING_MEMBER: 2178 /* Already leaving or left; do nothing. */ 2179 CTR1(KTR_MLD, 2180 "%s: not kicking state machine for silent group", __func__); 2181 break; 2182 case MLD_REPORTING_MEMBER: 2183 case MLD_IDLE_MEMBER: 2184 case MLD_G_QUERY_PENDING_MEMBER: 2185 case MLD_SG_QUERY_PENDING_MEMBER: 2186 if (mli->mli_version == MLD_VERSION_1) { 2187 #ifdef INVARIANTS 2188 if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER || 2189 inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER) 2190 panic("%s: MLDv2 state reached, not MLDv2 mode", 2191 __func__); 2192 #endif 2193 NET_EPOCH_ENTER(et); 2194 mld_v1_transmit_report(inm, MLD_LISTENER_DONE); 2195 NET_EPOCH_EXIT(et); 2196 inm->in6m_state = MLD_NOT_MEMBER; 2197 V_current_state_timers_running6 = 1; 2198 } else if (mli->mli_version == MLD_VERSION_2) { 2199 /* 2200 * Stop group timer and all pending reports. 2201 * Immediately enqueue a state-change report 2202 * TO_IN {} to be sent on the next fast timeout, 2203 * giving us an opportunity to merge reports. 2204 */ 2205 mbufq_drain(&inm->in6m_scq); 2206 inm->in6m_timer = 0; 2207 inm->in6m_scrv = mli->mli_rv; 2208 CTR4(KTR_MLD, "%s: Leaving %s/%s with %d " 2209 "pending retransmissions.", __func__, 2210 ip6_sprintf(ip6tbuf, &inm->in6m_addr), 2211 if_name(inm->in6m_ifp), inm->in6m_scrv); 2212 if (inm->in6m_scrv == 0) { 2213 inm->in6m_state = MLD_NOT_MEMBER; 2214 inm->in6m_sctimer = 0; 2215 } else { 2216 int retval __diagused; 2217 2218 in6m_acquire_locked(inm); 2219 2220 retval = mld_v2_enqueue_group_record( 2221 &inm->in6m_scq, inm, 1, 0, 0, 2222 (mli->mli_flags & MLIF_USEALLOW)); 2223 KASSERT(retval != 0, 2224 ("%s: enqueue record = %d", __func__, 2225 retval)); 2226 2227 inm->in6m_state = MLD_LEAVING_MEMBER; 2228 inm->in6m_sctimer = 1; 2229 V_state_change_timers_running6 = 1; 2230 } 2231 break; 2232 } 2233 break; 2234 case MLD_LAZY_MEMBER: 2235 case MLD_SLEEPING_MEMBER: 2236 case MLD_AWAKENING_MEMBER: 2237 /* Our reports are suppressed; do nothing. */ 2238 break; 2239 } 2240 2241 in6m_commit(inm); 2242 CTR3(KTR_MLD, "%s: T1 -> T0 for %s/%s", __func__, 2243 ip6_sprintf(ip6tbuf, &inm->in6m_addr), 2244 if_name(inm->in6m_ifp)); 2245 inm->in6m_st[1].iss_fmode = MCAST_UNDEFINED; 2246 CTR3(KTR_MLD, "%s: T1 now MCAST_UNDEFINED for %p/%s", 2247 __func__, &inm->in6m_addr, if_name(inm->in6m_ifp)); 2248 } 2249 2250 /* 2251 * Enqueue an MLDv2 group record to the given output queue. 2252 * 2253 * If is_state_change is zero, a current-state record is appended. 2254 * If is_state_change is non-zero, a state-change report is appended. 2255 * 2256 * If is_group_query is non-zero, an mbuf packet chain is allocated. 2257 * If is_group_query is zero, and if there is a packet with free space 2258 * at the tail of the queue, it will be appended to providing there 2259 * is enough free space. 2260 * Otherwise a new mbuf packet chain is allocated. 2261 * 2262 * If is_source_query is non-zero, each source is checked to see if 2263 * it was recorded for a Group-Source query, and will be omitted if 2264 * it is not both in-mode and recorded. 2265 * 2266 * If use_block_allow is non-zero, state change reports for initial join 2267 * and final leave, on an inclusive mode group with a source list, will be 2268 * rewritten to use the ALLOW_NEW and BLOCK_OLD record types, respectively. 2269 * 2270 * The function will attempt to allocate leading space in the packet 2271 * for the IPv6+ICMP headers to be prepended without fragmenting the chain. 2272 * 2273 * If successful the size of all data appended to the queue is returned, 2274 * otherwise an error code less than zero is returned, or zero if 2275 * no record(s) were appended. 2276 */ 2277 static int 2278 mld_v2_enqueue_group_record(struct mbufq *mq, struct in6_multi *inm, 2279 const int is_state_change, const int is_group_query, 2280 const int is_source_query, const int use_block_allow) 2281 { 2282 struct mldv2_record mr; 2283 struct mldv2_record *pmr; 2284 struct ifnet *ifp; 2285 struct ip6_msource *ims, *nims; 2286 struct mbuf *m0, *m, *md; 2287 int is_filter_list_change; 2288 int minrec0len, m0srcs, msrcs, nbytes, off; 2289 int record_has_sources; 2290 int now; 2291 int type; 2292 uint8_t mode; 2293 #ifdef KTR 2294 char ip6tbuf[INET6_ADDRSTRLEN]; 2295 #endif 2296 2297 IN6_MULTI_LIST_LOCK_ASSERT(); 2298 2299 ifp = inm->in6m_ifp; 2300 is_filter_list_change = 0; 2301 m = NULL; 2302 m0 = NULL; 2303 m0srcs = 0; 2304 msrcs = 0; 2305 nbytes = 0; 2306 nims = NULL; 2307 record_has_sources = 1; 2308 pmr = NULL; 2309 type = MLD_DO_NOTHING; 2310 mode = inm->in6m_st[1].iss_fmode; 2311 2312 /* 2313 * If we did not transition out of ASM mode during t0->t1, 2314 * and there are no source nodes to process, we can skip 2315 * the generation of source records. 2316 */ 2317 if (inm->in6m_st[0].iss_asm > 0 && inm->in6m_st[1].iss_asm > 0 && 2318 inm->in6m_nsrc == 0) 2319 record_has_sources = 0; 2320 2321 if (is_state_change) { 2322 /* 2323 * Queue a state change record. 2324 * If the mode did not change, and there are non-ASM 2325 * listeners or source filters present, 2326 * we potentially need to issue two records for the group. 2327 * If there are ASM listeners, and there was no filter 2328 * mode transition of any kind, do nothing. 2329 * 2330 * If we are transitioning to MCAST_UNDEFINED, we need 2331 * not send any sources. A transition to/from this state is 2332 * considered inclusive with some special treatment. 2333 * 2334 * If we are rewriting initial joins/leaves to use 2335 * ALLOW/BLOCK, and the group's membership is inclusive, 2336 * we need to send sources in all cases. 2337 */ 2338 if (mode != inm->in6m_st[0].iss_fmode) { 2339 if (mode == MCAST_EXCLUDE) { 2340 CTR1(KTR_MLD, "%s: change to EXCLUDE", 2341 __func__); 2342 type = MLD_CHANGE_TO_EXCLUDE_MODE; 2343 } else { 2344 CTR1(KTR_MLD, "%s: change to INCLUDE", 2345 __func__); 2346 if (use_block_allow) { 2347 /* 2348 * XXX 2349 * Here we're interested in state 2350 * edges either direction between 2351 * MCAST_UNDEFINED and MCAST_INCLUDE. 2352 * Perhaps we should just check 2353 * the group state, rather than 2354 * the filter mode. 2355 */ 2356 if (mode == MCAST_UNDEFINED) { 2357 type = MLD_BLOCK_OLD_SOURCES; 2358 } else { 2359 type = MLD_ALLOW_NEW_SOURCES; 2360 } 2361 } else { 2362 type = MLD_CHANGE_TO_INCLUDE_MODE; 2363 if (mode == MCAST_UNDEFINED) 2364 record_has_sources = 0; 2365 } 2366 } 2367 } else { 2368 if (record_has_sources) { 2369 is_filter_list_change = 1; 2370 } else { 2371 type = MLD_DO_NOTHING; 2372 } 2373 } 2374 } else { 2375 /* 2376 * Queue a current state record. 2377 */ 2378 if (mode == MCAST_EXCLUDE) { 2379 type = MLD_MODE_IS_EXCLUDE; 2380 } else if (mode == MCAST_INCLUDE) { 2381 type = MLD_MODE_IS_INCLUDE; 2382 KASSERT(inm->in6m_st[1].iss_asm == 0, 2383 ("%s: inm %p is INCLUDE but ASM count is %d", 2384 __func__, inm, inm->in6m_st[1].iss_asm)); 2385 } 2386 } 2387 2388 /* 2389 * Generate the filter list changes using a separate function. 2390 */ 2391 if (is_filter_list_change) 2392 return (mld_v2_enqueue_filter_change(mq, inm)); 2393 2394 if (type == MLD_DO_NOTHING) { 2395 CTR3(KTR_MLD, "%s: nothing to do for %s/%s", 2396 __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr), 2397 if_name(inm->in6m_ifp)); 2398 return (0); 2399 } 2400 2401 /* 2402 * If any sources are present, we must be able to fit at least 2403 * one in the trailing space of the tail packet's mbuf, 2404 * ideally more. 2405 */ 2406 minrec0len = sizeof(struct mldv2_record); 2407 if (record_has_sources) 2408 minrec0len += sizeof(struct in6_addr); 2409 2410 CTR4(KTR_MLD, "%s: queueing %s for %s/%s", __func__, 2411 mld_rec_type_to_str(type), 2412 ip6_sprintf(ip6tbuf, &inm->in6m_addr), 2413 if_name(inm->in6m_ifp)); 2414 2415 /* 2416 * Check if we have a packet in the tail of the queue for this 2417 * group into which the first group record for this group will fit. 2418 * Otherwise allocate a new packet. 2419 * Always allocate leading space for IP6+RA+ICMPV6+REPORT. 2420 * Note: Group records for G/GSR query responses MUST be sent 2421 * in their own packet. 2422 */ 2423 m0 = mbufq_last(mq); 2424 if (!is_group_query && 2425 m0 != NULL && 2426 (m0->m_pkthdr.vt_nrecs + 1 <= MLD_V2_REPORT_MAXRECS) && 2427 (m0->m_pkthdr.len + minrec0len) < 2428 (ifp->if_mtu - MLD_MTUSPACE)) { 2429 m0srcs = (ifp->if_mtu - m0->m_pkthdr.len - 2430 sizeof(struct mldv2_record)) / 2431 sizeof(struct in6_addr); 2432 m = m0; 2433 CTR1(KTR_MLD, "%s: use existing packet", __func__); 2434 } else { 2435 if (mbufq_full(mq)) { 2436 CTR1(KTR_MLD, "%s: outbound queue full", __func__); 2437 return (-ENOMEM); 2438 } 2439 m = NULL; 2440 m0srcs = (ifp->if_mtu - MLD_MTUSPACE - 2441 sizeof(struct mldv2_record)) / sizeof(struct in6_addr); 2442 if (!is_state_change && !is_group_query) 2443 m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); 2444 if (m == NULL) 2445 m = m_gethdr(M_NOWAIT, MT_DATA); 2446 if (m == NULL) 2447 return (-ENOMEM); 2448 2449 mld_save_context(m, ifp); 2450 2451 CTR1(KTR_MLD, "%s: allocated first packet", __func__); 2452 } 2453 2454 /* 2455 * Append group record. 2456 * If we have sources, we don't know how many yet. 2457 */ 2458 mr.mr_type = type; 2459 mr.mr_datalen = 0; 2460 mr.mr_numsrc = 0; 2461 mr.mr_addr = inm->in6m_addr; 2462 in6_clearscope(&mr.mr_addr); 2463 if (!m_append(m, sizeof(struct mldv2_record), (void *)&mr)) { 2464 if (m != m0) 2465 m_freem(m); 2466 CTR1(KTR_MLD, "%s: m_append() failed.", __func__); 2467 return (-ENOMEM); 2468 } 2469 nbytes += sizeof(struct mldv2_record); 2470 2471 /* 2472 * Append as many sources as will fit in the first packet. 2473 * If we are appending to a new packet, the chain allocation 2474 * may potentially use clusters; use m_getptr() in this case. 2475 * If we are appending to an existing packet, we need to obtain 2476 * a pointer to the group record after m_append(), in case a new 2477 * mbuf was allocated. 2478 * 2479 * Only append sources which are in-mode at t1. If we are 2480 * transitioning to MCAST_UNDEFINED state on the group, and 2481 * use_block_allow is zero, do not include source entries. 2482 * Otherwise, we need to include this source in the report. 2483 * 2484 * Only report recorded sources in our filter set when responding 2485 * to a group-source query. 2486 */ 2487 if (record_has_sources) { 2488 if (m == m0) { 2489 md = m_last(m); 2490 pmr = (struct mldv2_record *)(mtod(md, uint8_t *) + 2491 md->m_len - nbytes); 2492 } else { 2493 md = m_getptr(m, 0, &off); 2494 pmr = (struct mldv2_record *)(mtod(md, uint8_t *) + 2495 off); 2496 } 2497 msrcs = 0; 2498 RB_FOREACH_SAFE(ims, ip6_msource_tree, &inm->in6m_srcs, 2499 nims) { 2500 CTR2(KTR_MLD, "%s: visit node %s", __func__, 2501 ip6_sprintf(ip6tbuf, &ims->im6s_addr)); 2502 now = im6s_get_mode(inm, ims, 1); 2503 CTR2(KTR_MLD, "%s: node is %d", __func__, now); 2504 if ((now != mode) || 2505 (now == mode && 2506 (!use_block_allow && mode == MCAST_UNDEFINED))) { 2507 CTR1(KTR_MLD, "%s: skip node", __func__); 2508 continue; 2509 } 2510 if (is_source_query && ims->im6s_stp == 0) { 2511 CTR1(KTR_MLD, "%s: skip unrecorded node", 2512 __func__); 2513 continue; 2514 } 2515 CTR1(KTR_MLD, "%s: append node", __func__); 2516 if (!m_append(m, sizeof(struct in6_addr), 2517 (void *)&ims->im6s_addr)) { 2518 if (m != m0) 2519 m_freem(m); 2520 CTR1(KTR_MLD, "%s: m_append() failed.", 2521 __func__); 2522 return (-ENOMEM); 2523 } 2524 nbytes += sizeof(struct in6_addr); 2525 ++msrcs; 2526 if (msrcs == m0srcs) 2527 break; 2528 } 2529 CTR2(KTR_MLD, "%s: msrcs is %d this packet", __func__, 2530 msrcs); 2531 pmr->mr_numsrc = htons(msrcs); 2532 nbytes += (msrcs * sizeof(struct in6_addr)); 2533 } 2534 2535 if (is_source_query && msrcs == 0) { 2536 CTR1(KTR_MLD, "%s: no recorded sources to report", __func__); 2537 if (m != m0) 2538 m_freem(m); 2539 return (0); 2540 } 2541 2542 /* 2543 * We are good to go with first packet. 2544 */ 2545 if (m != m0) { 2546 CTR1(KTR_MLD, "%s: enqueueing first packet", __func__); 2547 m->m_pkthdr.vt_nrecs = 1; 2548 mbufq_enqueue(mq, m); 2549 } else 2550 m->m_pkthdr.vt_nrecs++; 2551 2552 /* 2553 * No further work needed if no source list in packet(s). 2554 */ 2555 if (!record_has_sources) 2556 return (nbytes); 2557 2558 /* 2559 * Whilst sources remain to be announced, we need to allocate 2560 * a new packet and fill out as many sources as will fit. 2561 * Always try for a cluster first. 2562 */ 2563 while (nims != NULL) { 2564 if (mbufq_full(mq)) { 2565 CTR1(KTR_MLD, "%s: outbound queue full", __func__); 2566 return (-ENOMEM); 2567 } 2568 m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); 2569 if (m == NULL) 2570 m = m_gethdr(M_NOWAIT, MT_DATA); 2571 if (m == NULL) 2572 return (-ENOMEM); 2573 mld_save_context(m, ifp); 2574 md = m_getptr(m, 0, &off); 2575 pmr = (struct mldv2_record *)(mtod(md, uint8_t *) + off); 2576 CTR1(KTR_MLD, "%s: allocated next packet", __func__); 2577 2578 if (!m_append(m, sizeof(struct mldv2_record), (void *)&mr)) { 2579 if (m != m0) 2580 m_freem(m); 2581 CTR1(KTR_MLD, "%s: m_append() failed.", __func__); 2582 return (-ENOMEM); 2583 } 2584 m->m_pkthdr.vt_nrecs = 1; 2585 nbytes += sizeof(struct mldv2_record); 2586 2587 m0srcs = (ifp->if_mtu - MLD_MTUSPACE - 2588 sizeof(struct mldv2_record)) / sizeof(struct in6_addr); 2589 2590 msrcs = 0; 2591 RB_FOREACH_FROM(ims, ip6_msource_tree, nims) { 2592 CTR2(KTR_MLD, "%s: visit node %s", 2593 __func__, ip6_sprintf(ip6tbuf, &ims->im6s_addr)); 2594 now = im6s_get_mode(inm, ims, 1); 2595 if ((now != mode) || 2596 (now == mode && 2597 (!use_block_allow && mode == MCAST_UNDEFINED))) { 2598 CTR1(KTR_MLD, "%s: skip node", __func__); 2599 continue; 2600 } 2601 if (is_source_query && ims->im6s_stp == 0) { 2602 CTR1(KTR_MLD, "%s: skip unrecorded node", 2603 __func__); 2604 continue; 2605 } 2606 CTR1(KTR_MLD, "%s: append node", __func__); 2607 if (!m_append(m, sizeof(struct in6_addr), 2608 (void *)&ims->im6s_addr)) { 2609 if (m != m0) 2610 m_freem(m); 2611 CTR1(KTR_MLD, "%s: m_append() failed.", 2612 __func__); 2613 return (-ENOMEM); 2614 } 2615 ++msrcs; 2616 if (msrcs == m0srcs) 2617 break; 2618 } 2619 pmr->mr_numsrc = htons(msrcs); 2620 nbytes += (msrcs * sizeof(struct in6_addr)); 2621 2622 CTR1(KTR_MLD, "%s: enqueueing next packet", __func__); 2623 mbufq_enqueue(mq, m); 2624 } 2625 2626 return (nbytes); 2627 } 2628 2629 /* 2630 * Type used to mark record pass completion. 2631 * We exploit the fact we can cast to this easily from the 2632 * current filter modes on each ip_msource node. 2633 */ 2634 typedef enum { 2635 REC_NONE = 0x00, /* MCAST_UNDEFINED */ 2636 REC_ALLOW = 0x01, /* MCAST_INCLUDE */ 2637 REC_BLOCK = 0x02, /* MCAST_EXCLUDE */ 2638 REC_FULL = REC_ALLOW | REC_BLOCK 2639 } rectype_t; 2640 2641 /* 2642 * Enqueue an MLDv2 filter list change to the given output queue. 2643 * 2644 * Source list filter state is held in an RB-tree. When the filter list 2645 * for a group is changed without changing its mode, we need to compute 2646 * the deltas between T0 and T1 for each source in the filter set, 2647 * and enqueue the appropriate ALLOW_NEW/BLOCK_OLD records. 2648 * 2649 * As we may potentially queue two record types, and the entire R-B tree 2650 * needs to be walked at once, we break this out into its own function 2651 * so we can generate a tightly packed queue of packets. 2652 * 2653 * XXX This could be written to only use one tree walk, although that makes 2654 * serializing into the mbuf chains a bit harder. For now we do two walks 2655 * which makes things easier on us, and it may or may not be harder on 2656 * the L2 cache. 2657 * 2658 * If successful the size of all data appended to the queue is returned, 2659 * otherwise an error code less than zero is returned, or zero if 2660 * no record(s) were appended. 2661 */ 2662 static int 2663 mld_v2_enqueue_filter_change(struct mbufq *mq, struct in6_multi *inm) 2664 { 2665 static const int MINRECLEN = 2666 sizeof(struct mldv2_record) + sizeof(struct in6_addr); 2667 struct ifnet *ifp; 2668 struct mldv2_record mr; 2669 struct mldv2_record *pmr; 2670 struct ip6_msource *ims, *nims; 2671 struct mbuf *m, *m0, *md; 2672 int m0srcs, nbytes, npbytes, off, rsrcs, schanged; 2673 uint8_t mode, now, then; 2674 rectype_t crt, drt, nrt; 2675 #ifdef KTR 2676 int nallow, nblock; 2677 char ip6tbuf[INET6_ADDRSTRLEN]; 2678 #endif 2679 2680 IN6_MULTI_LIST_LOCK_ASSERT(); 2681 2682 if (inm->in6m_nsrc == 0 || 2683 (inm->in6m_st[0].iss_asm > 0 && inm->in6m_st[1].iss_asm > 0)) 2684 return (0); 2685 2686 ifp = inm->in6m_ifp; /* interface */ 2687 mode = inm->in6m_st[1].iss_fmode; /* filter mode at t1 */ 2688 crt = REC_NONE; /* current group record type */ 2689 drt = REC_NONE; /* mask of completed group record types */ 2690 nrt = REC_NONE; /* record type for current node */ 2691 m0srcs = 0; /* # source which will fit in current mbuf chain */ 2692 npbytes = 0; /* # of bytes appended this packet */ 2693 nbytes = 0; /* # of bytes appended to group's state-change queue */ 2694 rsrcs = 0; /* # sources encoded in current record */ 2695 schanged = 0; /* # nodes encoded in overall filter change */ 2696 #ifdef KTR 2697 nallow = 0; /* # of source entries in ALLOW_NEW */ 2698 nblock = 0; /* # of source entries in BLOCK_OLD */ 2699 #endif 2700 nims = NULL; /* next tree node pointer */ 2701 2702 /* 2703 * For each possible filter record mode. 2704 * The first kind of source we encounter tells us which 2705 * is the first kind of record we start appending. 2706 * If a node transitioned to UNDEFINED at t1, its mode is treated 2707 * as the inverse of the group's filter mode. 2708 */ 2709 while (drt != REC_FULL) { 2710 do { 2711 m0 = mbufq_last(mq); 2712 if (m0 != NULL && 2713 (m0->m_pkthdr.vt_nrecs + 1 <= 2714 MLD_V2_REPORT_MAXRECS) && 2715 (m0->m_pkthdr.len + MINRECLEN) < 2716 (ifp->if_mtu - MLD_MTUSPACE)) { 2717 m = m0; 2718 m0srcs = (ifp->if_mtu - m0->m_pkthdr.len - 2719 sizeof(struct mldv2_record)) / 2720 sizeof(struct in6_addr); 2721 CTR1(KTR_MLD, 2722 "%s: use previous packet", __func__); 2723 } else { 2724 m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); 2725 if (m == NULL) 2726 m = m_gethdr(M_NOWAIT, MT_DATA); 2727 if (m == NULL) { 2728 CTR1(KTR_MLD, 2729 "%s: m_get*() failed", __func__); 2730 return (-ENOMEM); 2731 } 2732 m->m_pkthdr.vt_nrecs = 0; 2733 mld_save_context(m, ifp); 2734 m0srcs = (ifp->if_mtu - MLD_MTUSPACE - 2735 sizeof(struct mldv2_record)) / 2736 sizeof(struct in6_addr); 2737 npbytes = 0; 2738 CTR1(KTR_MLD, 2739 "%s: allocated new packet", __func__); 2740 } 2741 /* 2742 * Append the MLD group record header to the 2743 * current packet's data area. 2744 * Recalculate pointer to free space for next 2745 * group record, in case m_append() allocated 2746 * a new mbuf or cluster. 2747 */ 2748 memset(&mr, 0, sizeof(mr)); 2749 mr.mr_addr = inm->in6m_addr; 2750 in6_clearscope(&mr.mr_addr); 2751 if (!m_append(m, sizeof(mr), (void *)&mr)) { 2752 if (m != m0) 2753 m_freem(m); 2754 CTR1(KTR_MLD, 2755 "%s: m_append() failed", __func__); 2756 return (-ENOMEM); 2757 } 2758 npbytes += sizeof(struct mldv2_record); 2759 if (m != m0) { 2760 /* new packet; offset in chain */ 2761 md = m_getptr(m, npbytes - 2762 sizeof(struct mldv2_record), &off); 2763 pmr = (struct mldv2_record *)(mtod(md, 2764 uint8_t *) + off); 2765 } else { 2766 /* current packet; offset from last append */ 2767 md = m_last(m); 2768 pmr = (struct mldv2_record *)(mtod(md, 2769 uint8_t *) + md->m_len - 2770 sizeof(struct mldv2_record)); 2771 } 2772 /* 2773 * Begin walking the tree for this record type 2774 * pass, or continue from where we left off 2775 * previously if we had to allocate a new packet. 2776 * Only report deltas in-mode at t1. 2777 * We need not report included sources as allowed 2778 * if we are in inclusive mode on the group, 2779 * however the converse is not true. 2780 */ 2781 rsrcs = 0; 2782 if (nims == NULL) { 2783 nims = RB_MIN(ip6_msource_tree, 2784 &inm->in6m_srcs); 2785 } 2786 RB_FOREACH_FROM(ims, ip6_msource_tree, nims) { 2787 CTR2(KTR_MLD, "%s: visit node %s", __func__, 2788 ip6_sprintf(ip6tbuf, &ims->im6s_addr)); 2789 now = im6s_get_mode(inm, ims, 1); 2790 then = im6s_get_mode(inm, ims, 0); 2791 CTR3(KTR_MLD, "%s: mode: t0 %d, t1 %d", 2792 __func__, then, now); 2793 if (now == then) { 2794 CTR1(KTR_MLD, 2795 "%s: skip unchanged", __func__); 2796 continue; 2797 } 2798 if (mode == MCAST_EXCLUDE && 2799 now == MCAST_INCLUDE) { 2800 CTR1(KTR_MLD, 2801 "%s: skip IN src on EX group", 2802 __func__); 2803 continue; 2804 } 2805 nrt = (rectype_t)now; 2806 if (nrt == REC_NONE) 2807 nrt = (rectype_t)(~mode & REC_FULL); 2808 if (schanged++ == 0) { 2809 crt = nrt; 2810 } else if (crt != nrt) 2811 continue; 2812 if (!m_append(m, sizeof(struct in6_addr), 2813 (void *)&ims->im6s_addr)) { 2814 if (m != m0) 2815 m_freem(m); 2816 CTR1(KTR_MLD, 2817 "%s: m_append() failed", __func__); 2818 return (-ENOMEM); 2819 } 2820 #ifdef KTR 2821 nallow += !!(crt == REC_ALLOW); 2822 nblock += !!(crt == REC_BLOCK); 2823 #endif 2824 if (++rsrcs == m0srcs) 2825 break; 2826 } 2827 /* 2828 * If we did not append any tree nodes on this 2829 * pass, back out of allocations. 2830 */ 2831 if (rsrcs == 0) { 2832 npbytes -= sizeof(struct mldv2_record); 2833 if (m != m0) { 2834 CTR1(KTR_MLD, 2835 "%s: m_free(m)", __func__); 2836 m_freem(m); 2837 } else { 2838 CTR1(KTR_MLD, 2839 "%s: m_adj(m, -mr)", __func__); 2840 m_adj(m, -((int)sizeof( 2841 struct mldv2_record))); 2842 } 2843 continue; 2844 } 2845 npbytes += (rsrcs * sizeof(struct in6_addr)); 2846 if (crt == REC_ALLOW) 2847 pmr->mr_type = MLD_ALLOW_NEW_SOURCES; 2848 else if (crt == REC_BLOCK) 2849 pmr->mr_type = MLD_BLOCK_OLD_SOURCES; 2850 pmr->mr_numsrc = htons(rsrcs); 2851 /* 2852 * Count the new group record, and enqueue this 2853 * packet if it wasn't already queued. 2854 */ 2855 m->m_pkthdr.vt_nrecs++; 2856 if (m != m0) 2857 mbufq_enqueue(mq, m); 2858 nbytes += npbytes; 2859 } while (nims != NULL); 2860 drt |= crt; 2861 crt = (~crt & REC_FULL); 2862 } 2863 2864 CTR3(KTR_MLD, "%s: queued %d ALLOW_NEW, %d BLOCK_OLD", __func__, 2865 nallow, nblock); 2866 2867 return (nbytes); 2868 } 2869 2870 static int 2871 mld_v2_merge_state_changes(struct in6_multi *inm, struct mbufq *scq) 2872 { 2873 struct mbufq *gq; 2874 struct mbuf *m; /* pending state-change */ 2875 struct mbuf *m0; /* copy of pending state-change */ 2876 struct mbuf *mt; /* last state-change in packet */ 2877 int docopy, domerge; 2878 u_int recslen; 2879 2880 docopy = 0; 2881 domerge = 0; 2882 recslen = 0; 2883 2884 IN6_MULTI_LIST_LOCK_ASSERT(); 2885 MLD_LOCK_ASSERT(); 2886 2887 /* 2888 * If there are further pending retransmissions, make a writable 2889 * copy of each queued state-change message before merging. 2890 */ 2891 if (inm->in6m_scrv > 0) 2892 docopy = 1; 2893 2894 gq = &inm->in6m_scq; 2895 #ifdef KTR 2896 if (mbufq_first(gq) == NULL) { 2897 CTR2(KTR_MLD, "%s: WARNING: queue for inm %p is empty", 2898 __func__, inm); 2899 } 2900 #endif 2901 2902 m = mbufq_first(gq); 2903 while (m != NULL) { 2904 /* 2905 * Only merge the report into the current packet if 2906 * there is sufficient space to do so; an MLDv2 report 2907 * packet may only contain 65,535 group records. 2908 * Always use a simple mbuf chain concatentation to do this, 2909 * as large state changes for single groups may have 2910 * allocated clusters. 2911 */ 2912 domerge = 0; 2913 mt = mbufq_last(scq); 2914 if (mt != NULL) { 2915 recslen = m_length(m, NULL); 2916 2917 if ((mt->m_pkthdr.vt_nrecs + 2918 m->m_pkthdr.vt_nrecs <= 2919 MLD_V2_REPORT_MAXRECS) && 2920 (mt->m_pkthdr.len + recslen <= 2921 (inm->in6m_ifp->if_mtu - MLD_MTUSPACE))) 2922 domerge = 1; 2923 } 2924 2925 if (!domerge && mbufq_full(gq)) { 2926 CTR2(KTR_MLD, 2927 "%s: outbound queue full, skipping whole packet %p", 2928 __func__, m); 2929 mt = m->m_nextpkt; 2930 if (!docopy) 2931 m_freem(m); 2932 m = mt; 2933 continue; 2934 } 2935 2936 if (!docopy) { 2937 CTR2(KTR_MLD, "%s: dequeueing %p", __func__, m); 2938 m0 = mbufq_dequeue(gq); 2939 m = m0->m_nextpkt; 2940 } else { 2941 CTR2(KTR_MLD, "%s: copying %p", __func__, m); 2942 m0 = m_dup(m, M_NOWAIT); 2943 if (m0 == NULL) 2944 return (ENOMEM); 2945 m0->m_nextpkt = NULL; 2946 m = m->m_nextpkt; 2947 } 2948 2949 if (!domerge) { 2950 CTR3(KTR_MLD, "%s: queueing %p to scq %p)", 2951 __func__, m0, scq); 2952 mbufq_enqueue(scq, m0); 2953 } else { 2954 struct mbuf *mtl; /* last mbuf of packet mt */ 2955 2956 CTR3(KTR_MLD, "%s: merging %p with ifscq tail %p)", 2957 __func__, m0, mt); 2958 2959 mtl = m_last(mt); 2960 m0->m_flags &= ~M_PKTHDR; 2961 mt->m_pkthdr.len += recslen; 2962 mt->m_pkthdr.vt_nrecs += 2963 m0->m_pkthdr.vt_nrecs; 2964 2965 mtl->m_next = m0; 2966 } 2967 } 2968 2969 return (0); 2970 } 2971 2972 /* 2973 * Respond to a pending MLDv2 General Query. 2974 */ 2975 static void 2976 mld_v2_dispatch_general_query(struct mld_ifsoftc *mli) 2977 { 2978 struct ifmultiaddr *ifma; 2979 struct ifnet *ifp; 2980 struct in6_multi *inm; 2981 int retval __unused; 2982 2983 NET_EPOCH_ASSERT(); 2984 IN6_MULTI_LIST_LOCK_ASSERT(); 2985 MLD_LOCK_ASSERT(); 2986 2987 KASSERT(mli->mli_version == MLD_VERSION_2, 2988 ("%s: called when version %d", __func__, mli->mli_version)); 2989 2990 /* 2991 * Check that there are some packets queued. If so, send them first. 2992 * For large number of groups the reply to general query can take 2993 * many packets, we should finish sending them before starting of 2994 * queuing the new reply. 2995 */ 2996 if (!mbufq_empty(&mli->mli_gq)) 2997 goto send; 2998 2999 ifp = mli->mli_ifp; 3000 3001 CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 3002 inm = in6m_ifmultiaddr_get_inm(ifma); 3003 if (inm == NULL) 3004 continue; 3005 KASSERT(ifp == inm->in6m_ifp, 3006 ("%s: inconsistent ifp", __func__)); 3007 3008 switch (inm->in6m_state) { 3009 case MLD_NOT_MEMBER: 3010 case MLD_SILENT_MEMBER: 3011 break; 3012 case MLD_REPORTING_MEMBER: 3013 case MLD_IDLE_MEMBER: 3014 case MLD_LAZY_MEMBER: 3015 case MLD_SLEEPING_MEMBER: 3016 case MLD_AWAKENING_MEMBER: 3017 inm->in6m_state = MLD_REPORTING_MEMBER; 3018 retval = mld_v2_enqueue_group_record(&mli->mli_gq, 3019 inm, 0, 0, 0, 0); 3020 CTR2(KTR_MLD, "%s: enqueue record = %d", 3021 __func__, retval); 3022 break; 3023 case MLD_G_QUERY_PENDING_MEMBER: 3024 case MLD_SG_QUERY_PENDING_MEMBER: 3025 case MLD_LEAVING_MEMBER: 3026 break; 3027 } 3028 } 3029 3030 send: 3031 mld_dispatch_queue(&mli->mli_gq, MLD_MAX_RESPONSE_BURST); 3032 3033 /* 3034 * Slew transmission of bursts over 500ms intervals. 3035 */ 3036 if (mbufq_first(&mli->mli_gq) != NULL) { 3037 mli->mli_v2_timer = 1 + MLD_RANDOM_DELAY( 3038 MLD_RESPONSE_BURST_INTERVAL); 3039 V_interface_timers_running6 = 1; 3040 } 3041 } 3042 3043 /* 3044 * Transmit the next pending message in the output queue. 3045 * 3046 * VIMAGE: Needs to store/restore vnet pointer on a per-mbuf-chain basis. 3047 * MRT: Nothing needs to be done, as MLD traffic is always local to 3048 * a link and uses a link-scope multicast address. 3049 */ 3050 static void 3051 mld_dispatch_packet(struct mbuf *m) 3052 { 3053 struct ip6_moptions im6o; 3054 struct ifnet *ifp; 3055 struct ifnet *oifp; 3056 struct mbuf *m0; 3057 struct mbuf *md; 3058 struct ip6_hdr *ip6; 3059 struct mld_hdr *mld; 3060 int error; 3061 int off; 3062 int type; 3063 uint32_t ifindex; 3064 3065 CTR2(KTR_MLD, "%s: transmit %p", __func__, m); 3066 NET_EPOCH_ASSERT(); 3067 3068 /* 3069 * Set VNET image pointer from enqueued mbuf chain 3070 * before doing anything else. Whilst we use interface 3071 * indexes to guard against interface detach, they are 3072 * unique to each VIMAGE and must be retrieved. 3073 */ 3074 ifindex = mld_restore_context(m); 3075 3076 /* 3077 * Check if the ifnet still exists. This limits the scope of 3078 * any race in the absence of a global ifp lock for low cost 3079 * (an array lookup). 3080 */ 3081 ifp = ifnet_byindex(ifindex); 3082 if (ifp == NULL) { 3083 CTR3(KTR_MLD, "%s: dropped %p as ifindex %u went away.", 3084 __func__, m, ifindex); 3085 m_freem(m); 3086 IP6STAT_INC(ip6s_noroute); 3087 goto out; 3088 } 3089 3090 im6o.im6o_multicast_hlim = 1; 3091 im6o.im6o_multicast_loop = (V_ip6_mrouter != NULL); 3092 im6o.im6o_multicast_ifp = ifp; 3093 3094 if (m->m_flags & M_MLDV1) { 3095 m0 = m; 3096 } else { 3097 m0 = mld_v2_encap_report(ifp, m); 3098 if (m0 == NULL) { 3099 CTR2(KTR_MLD, "%s: dropped %p", __func__, m); 3100 IP6STAT_INC(ip6s_odropped); 3101 goto out; 3102 } 3103 } 3104 3105 mld_scrub_context(m0); 3106 m_clrprotoflags(m); 3107 m0->m_pkthdr.rcvif = V_loif; 3108 3109 ip6 = mtod(m0, struct ip6_hdr *); 3110 #if 0 3111 (void)in6_setscope(&ip6->ip6_dst, ifp, NULL); /* XXX LOR */ 3112 #else 3113 /* 3114 * XXX XXX Break some KPI rules to prevent an LOR which would 3115 * occur if we called in6_setscope() at transmission. 3116 * See comments at top of file. 3117 */ 3118 MLD_EMBEDSCOPE(&ip6->ip6_dst, ifp->if_index); 3119 #endif 3120 3121 /* 3122 * Retrieve the ICMPv6 type before handoff to ip6_output(), 3123 * so we can bump the stats. 3124 */ 3125 md = m_getptr(m0, sizeof(struct ip6_hdr), &off); 3126 mld = (struct mld_hdr *)(mtod(md, uint8_t *) + off); 3127 type = mld->mld_type; 3128 3129 oifp = NULL; 3130 error = ip6_output(m0, &mld_po, NULL, IPV6_UNSPECSRC, &im6o, 3131 &oifp, NULL); 3132 if (error) { 3133 CTR3(KTR_MLD, "%s: ip6_output(%p) = %d", __func__, m0, error); 3134 goto out; 3135 } 3136 ICMP6STAT_INC(icp6s_outhist[type]); 3137 if (oifp != NULL) { 3138 icmp6_ifstat_inc(oifp, ifs6_out_msg); 3139 switch (type) { 3140 case MLD_LISTENER_REPORT: 3141 case MLDV2_LISTENER_REPORT: 3142 icmp6_ifstat_inc(oifp, ifs6_out_mldreport); 3143 break; 3144 case MLD_LISTENER_DONE: 3145 icmp6_ifstat_inc(oifp, ifs6_out_mlddone); 3146 break; 3147 } 3148 } 3149 out: 3150 return; 3151 } 3152 3153 /* 3154 * Encapsulate an MLDv2 report. 3155 * 3156 * KAME IPv6 requires that hop-by-hop options be passed separately, 3157 * and that the IPv6 header be prepended in a separate mbuf. 3158 * 3159 * Returns a pointer to the new mbuf chain head, or NULL if the 3160 * allocation failed. 3161 */ 3162 static struct mbuf * 3163 mld_v2_encap_report(struct ifnet *ifp, struct mbuf *m) 3164 { 3165 struct mbuf *mh; 3166 struct mldv2_report *mld; 3167 struct ip6_hdr *ip6; 3168 struct in6_ifaddr *ia; 3169 int mldreclen; 3170 3171 KASSERT(ifp != NULL, ("%s: null ifp", __func__)); 3172 KASSERT((m->m_flags & M_PKTHDR), 3173 ("%s: mbuf chain %p is !M_PKTHDR", __func__, m)); 3174 3175 /* 3176 * RFC3590: OK to send as :: or tentative during DAD. 3177 */ 3178 NET_EPOCH_ASSERT(); 3179 ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST); 3180 if (ia == NULL) 3181 CTR1(KTR_MLD, "%s: warning: ia is NULL", __func__); 3182 3183 mh = m_gethdr(M_NOWAIT, MT_DATA); 3184 if (mh == NULL) { 3185 if (ia != NULL) 3186 ifa_free(&ia->ia_ifa); 3187 m_freem(m); 3188 return (NULL); 3189 } 3190 M_ALIGN(mh, sizeof(struct ip6_hdr) + sizeof(struct mldv2_report)); 3191 3192 mldreclen = m_length(m, NULL); 3193 CTR2(KTR_MLD, "%s: mldreclen is %d", __func__, mldreclen); 3194 3195 mh->m_len = sizeof(struct ip6_hdr) + sizeof(struct mldv2_report); 3196 mh->m_pkthdr.len = sizeof(struct ip6_hdr) + 3197 sizeof(struct mldv2_report) + mldreclen; 3198 3199 ip6 = mtod(mh, struct ip6_hdr *); 3200 ip6->ip6_flow = 0; 3201 ip6->ip6_vfc &= ~IPV6_VERSION_MASK; 3202 ip6->ip6_vfc |= IPV6_VERSION; 3203 ip6->ip6_nxt = IPPROTO_ICMPV6; 3204 ip6->ip6_src = ia ? ia->ia_addr.sin6_addr : in6addr_any; 3205 if (ia != NULL) 3206 ifa_free(&ia->ia_ifa); 3207 ip6->ip6_dst = in6addr_linklocal_allv2routers; 3208 /* scope ID will be set in netisr */ 3209 3210 mld = (struct mldv2_report *)(ip6 + 1); 3211 mld->mld_type = MLDV2_LISTENER_REPORT; 3212 mld->mld_code = 0; 3213 mld->mld_cksum = 0; 3214 mld->mld_v2_reserved = 0; 3215 mld->mld_v2_numrecs = htons(m->m_pkthdr.vt_nrecs); 3216 m->m_pkthdr.vt_nrecs = 0; 3217 3218 mh->m_next = m; 3219 mld->mld_cksum = in6_cksum(mh, IPPROTO_ICMPV6, 3220 sizeof(struct ip6_hdr), sizeof(struct mldv2_report) + mldreclen); 3221 return (mh); 3222 } 3223 3224 #ifdef KTR 3225 static char * 3226 mld_rec_type_to_str(const int type) 3227 { 3228 3229 switch (type) { 3230 case MLD_CHANGE_TO_EXCLUDE_MODE: 3231 return "TO_EX"; 3232 break; 3233 case MLD_CHANGE_TO_INCLUDE_MODE: 3234 return "TO_IN"; 3235 break; 3236 case MLD_MODE_IS_EXCLUDE: 3237 return "MODE_EX"; 3238 break; 3239 case MLD_MODE_IS_INCLUDE: 3240 return "MODE_IN"; 3241 break; 3242 case MLD_ALLOW_NEW_SOURCES: 3243 return "ALLOW_NEW"; 3244 break; 3245 case MLD_BLOCK_OLD_SOURCES: 3246 return "BLOCK_OLD"; 3247 break; 3248 default: 3249 break; 3250 } 3251 return "unknown"; 3252 } 3253 #endif 3254 3255 static void 3256 mld_init(void *unused __unused) 3257 { 3258 3259 CTR1(KTR_MLD, "%s: initializing", __func__); 3260 MLD_LOCK_INIT(); 3261 3262 ip6_initpktopts(&mld_po); 3263 mld_po.ip6po_hlim = 1; 3264 mld_po.ip6po_hbh = &mld_ra.hbh; 3265 mld_po.ip6po_prefer_tempaddr = IP6PO_TEMPADDR_NOTPREFER; 3266 mld_po.ip6po_flags = IP6PO_DONTFRAG; 3267 3268 callout_init(&mldslow_callout, 1); 3269 callout_reset(&mldslow_callout, hz / MLD_SLOWHZ, mld_slowtimo, NULL); 3270 callout_init(&mldfast_callout, 1); 3271 callout_reset(&mldfast_callout, hz / MLD_FASTHZ, mld_fasttimo, NULL); 3272 } 3273 SYSINIT(mld_init, SI_SUB_PROTO_MC, SI_ORDER_MIDDLE, mld_init, NULL); 3274 3275 static void 3276 mld_uninit(void *unused __unused) 3277 { 3278 3279 CTR1(KTR_MLD, "%s: tearing down", __func__); 3280 callout_drain(&mldslow_callout); 3281 callout_drain(&mldfast_callout); 3282 MLD_LOCK_DESTROY(); 3283 } 3284 SYSUNINIT(mld_uninit, SI_SUB_PROTO_MC, SI_ORDER_MIDDLE, mld_uninit, NULL); 3285 3286 static void 3287 vnet_mld_init(const void *unused __unused) 3288 { 3289 3290 CTR1(KTR_MLD, "%s: initializing", __func__); 3291 3292 LIST_INIT(&V_mli_head); 3293 } 3294 VNET_SYSINIT(vnet_mld_init, SI_SUB_PROTO_MC, SI_ORDER_ANY, vnet_mld_init, 3295 NULL); 3296 3297 static void 3298 vnet_mld_uninit(const void *unused __unused) 3299 { 3300 3301 /* This can happen if we shutdown the network stack. */ 3302 CTR1(KTR_MLD, "%s: tearing down", __func__); 3303 } 3304 VNET_SYSUNINIT(vnet_mld_uninit, SI_SUB_PROTO_MC, SI_ORDER_ANY, vnet_mld_uninit, 3305 NULL); 3306 3307 static int 3308 mld_modevent(module_t mod, int type, void *unused __unused) 3309 { 3310 3311 switch (type) { 3312 case MOD_LOAD: 3313 case MOD_UNLOAD: 3314 break; 3315 default: 3316 return (EOPNOTSUPP); 3317 } 3318 return (0); 3319 } 3320 3321 static moduledata_t mld_mod = { 3322 "mld", 3323 mld_modevent, 3324 0 3325 }; 3326 DECLARE_MODULE(mld, mld_mod, SI_SUB_PROTO_MC, SI_ORDER_ANY); 3327