1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2009 Bruce Simpson. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote 15 * products derived from this software without specific prior written 16 * permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 * 30 * $KAME: mld6.c,v 1.27 2001/04/04 05:17:30 itojun Exp $ 31 */ 32 33 /*- 34 * Copyright (c) 1988 Stephen Deering. 35 * Copyright (c) 1992, 1993 36 * The Regents of the University of California. All rights reserved. 37 * 38 * This code is derived from software contributed to Berkeley by 39 * Stephen Deering of Stanford University. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)igmp.c 8.1 (Berkeley) 7/19/93 66 */ 67 68 #include <sys/cdefs.h> 69 __FBSDID("$FreeBSD$"); 70 71 #include "opt_inet.h" 72 #include "opt_inet6.h" 73 74 #include <sys/param.h> 75 #include <sys/systm.h> 76 #include <sys/mbuf.h> 77 #include <sys/socket.h> 78 #include <sys/protosw.h> 79 #include <sys/sysctl.h> 80 #include <sys/kernel.h> 81 #include <sys/callout.h> 82 #include <sys/malloc.h> 83 #include <sys/module.h> 84 #include <sys/ktr.h> 85 86 #include <net/if.h> 87 #include <net/if_var.h> 88 #include <net/route.h> 89 #include <net/vnet.h> 90 91 #include <netinet/in.h> 92 #include <netinet/in_var.h> 93 #include <netinet6/in6_var.h> 94 #include <netinet/ip6.h> 95 #include <netinet6/ip6_var.h> 96 #include <netinet6/scope6_var.h> 97 #include <netinet/icmp6.h> 98 #include <netinet6/mld6.h> 99 #include <netinet6/mld6_var.h> 100 101 #include <security/mac/mac_framework.h> 102 103 #ifndef KTR_MLD 104 #define KTR_MLD KTR_INET6 105 #endif 106 107 static struct mld_ifsoftc * 108 mli_alloc_locked(struct ifnet *); 109 static void mli_delete_locked(const struct ifnet *); 110 static void mld_dispatch_packet(struct mbuf *); 111 static void mld_dispatch_queue(struct mbufq *, int); 112 static void mld_final_leave(struct in6_multi *, struct mld_ifsoftc *); 113 static void mld_fasttimo_vnet(struct in6_multi_head *inmh); 114 static int mld_handle_state_change(struct in6_multi *, 115 struct mld_ifsoftc *); 116 static int mld_initial_join(struct in6_multi *, struct mld_ifsoftc *, 117 const int); 118 #ifdef KTR 119 static char * mld_rec_type_to_str(const int); 120 #endif 121 static void mld_set_version(struct mld_ifsoftc *, const int); 122 static void mld_slowtimo_vnet(void); 123 static int mld_v1_input_query(struct ifnet *, const struct ip6_hdr *, 124 /*const*/ struct mld_hdr *); 125 static int mld_v1_input_report(struct ifnet *, const struct ip6_hdr *, 126 /*const*/ struct mld_hdr *); 127 static void mld_v1_process_group_timer(struct in6_multi_head *, 128 struct in6_multi *); 129 static void mld_v1_process_querier_timers(struct mld_ifsoftc *); 130 static int mld_v1_transmit_report(struct in6_multi *, const int); 131 static void mld_v1_update_group(struct in6_multi *, const int); 132 static void mld_v2_cancel_link_timers(struct mld_ifsoftc *); 133 static void mld_v2_dispatch_general_query(struct mld_ifsoftc *); 134 static struct mbuf * 135 mld_v2_encap_report(struct ifnet *, struct mbuf *); 136 static int mld_v2_enqueue_filter_change(struct mbufq *, 137 struct in6_multi *); 138 static int mld_v2_enqueue_group_record(struct mbufq *, 139 struct in6_multi *, const int, const int, const int, 140 const int); 141 static int mld_v2_input_query(struct ifnet *, const struct ip6_hdr *, 142 struct mbuf *, struct mldv2_query *, const int, const int); 143 static int mld_v2_merge_state_changes(struct in6_multi *, 144 struct mbufq *); 145 static void mld_v2_process_group_timers(struct in6_multi_head *, 146 struct mbufq *, struct mbufq *, 147 struct in6_multi *, const int); 148 static int mld_v2_process_group_query(struct in6_multi *, 149 struct mld_ifsoftc *mli, int, struct mbuf *, 150 struct mldv2_query *, const int); 151 static int sysctl_mld_gsr(SYSCTL_HANDLER_ARGS); 152 static int sysctl_mld_ifinfo(SYSCTL_HANDLER_ARGS); 153 154 /* 155 * Normative references: RFC 2710, RFC 3590, RFC 3810. 156 * 157 * Locking: 158 * * The MLD subsystem lock ends up being system-wide for the moment, 159 * but could be per-VIMAGE later on. 160 * * The permitted lock order is: IN6_MULTI_LOCK, MLD_LOCK, IF_ADDR_LOCK. 161 * Any may be taken independently; if any are held at the same 162 * time, the above lock order must be followed. 163 * * IN6_MULTI_LOCK covers in_multi. 164 * * MLD_LOCK covers per-link state and any global variables in this file. 165 * * IF_ADDR_LOCK covers if_multiaddrs, which is used for a variety of 166 * per-link state iterators. 167 * 168 * XXX LOR PREVENTION 169 * A special case for IPv6 is the in6_setscope() routine. ip6_output() 170 * will not accept an ifp; it wants an embedded scope ID, unlike 171 * ip_output(), which happily takes the ifp given to it. The embedded 172 * scope ID is only used by MLD to select the outgoing interface. 173 * 174 * During interface attach and detach, MLD will take MLD_LOCK *after* 175 * the IF_AFDATA_LOCK. 176 * As in6_setscope() takes IF_AFDATA_LOCK then SCOPE_LOCK, we can't call 177 * it with MLD_LOCK held without triggering an LOR. A netisr with indirect 178 * dispatch could work around this, but we'd rather not do that, as it 179 * can introduce other races. 180 * 181 * As such, we exploit the fact that the scope ID is just the interface 182 * index, and embed it in the IPv6 destination address accordingly. 183 * This is potentially NOT VALID for MLDv1 reports, as they 184 * are always sent to the multicast group itself; as MLDv2 185 * reports are always sent to ff02::16, this is not an issue 186 * when MLDv2 is in use. 187 * 188 * This does not however eliminate the LOR when ip6_output() itself 189 * calls in6_setscope() internally whilst MLD_LOCK is held. This will 190 * trigger a LOR warning in WITNESS when the ifnet is detached. 191 * 192 * The right answer is probably to make IF_AFDATA_LOCK an rwlock, given 193 * how it's used across the network stack. Here we're simply exploiting 194 * the fact that MLD runs at a similar layer in the stack to scope6.c. 195 * 196 * VIMAGE: 197 * * Each in6_multi corresponds to an ifp, and each ifp corresponds 198 * to a vnet in ifp->if_vnet. 199 */ 200 static struct mtx mld_mtx; 201 static MALLOC_DEFINE(M_MLD, "mld", "mld state"); 202 203 #define MLD_EMBEDSCOPE(pin6, zoneid) \ 204 if (IN6_IS_SCOPE_LINKLOCAL(pin6) || \ 205 IN6_IS_ADDR_MC_INTFACELOCAL(pin6)) \ 206 (pin6)->s6_addr16[1] = htons((zoneid) & 0xFFFF) \ 207 208 /* 209 * VIMAGE-wide globals. 210 */ 211 VNET_DEFINE_STATIC(struct timeval, mld_gsrdelay) = {10, 0}; 212 VNET_DEFINE_STATIC(LIST_HEAD(, mld_ifsoftc), mli_head); 213 VNET_DEFINE_STATIC(int, interface_timers_running6); 214 VNET_DEFINE_STATIC(int, state_change_timers_running6); 215 VNET_DEFINE_STATIC(int, current_state_timers_running6); 216 217 #define V_mld_gsrdelay VNET(mld_gsrdelay) 218 #define V_mli_head VNET(mli_head) 219 #define V_interface_timers_running6 VNET(interface_timers_running6) 220 #define V_state_change_timers_running6 VNET(state_change_timers_running6) 221 #define V_current_state_timers_running6 VNET(current_state_timers_running6) 222 223 SYSCTL_DECL(_net_inet6); /* Note: Not in any common header. */ 224 225 SYSCTL_NODE(_net_inet6, OID_AUTO, mld, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 226 "IPv6 Multicast Listener Discovery"); 227 228 /* 229 * Virtualized sysctls. 230 */ 231 SYSCTL_PROC(_net_inet6_mld, OID_AUTO, gsrdelay, 232 CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 233 &VNET_NAME(mld_gsrdelay.tv_sec), 0, sysctl_mld_gsr, "I", 234 "Rate limit for MLDv2 Group-and-Source queries in seconds"); 235 236 /* 237 * Non-virtualized sysctls. 238 */ 239 static SYSCTL_NODE(_net_inet6_mld, OID_AUTO, ifinfo, 240 CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_mld_ifinfo, 241 "Per-interface MLDv2 state"); 242 243 static int mld_v1enable = 1; 244 SYSCTL_INT(_net_inet6_mld, OID_AUTO, v1enable, CTLFLAG_RWTUN, 245 &mld_v1enable, 0, "Enable fallback to MLDv1"); 246 247 static int mld_v2enable = 1; 248 SYSCTL_INT(_net_inet6_mld, OID_AUTO, v2enable, CTLFLAG_RWTUN, 249 &mld_v2enable, 0, "Enable MLDv2"); 250 251 static int mld_use_allow = 1; 252 SYSCTL_INT(_net_inet6_mld, OID_AUTO, use_allow, CTLFLAG_RWTUN, 253 &mld_use_allow, 0, "Use ALLOW/BLOCK for RFC 4604 SSM joins/leaves"); 254 255 /* 256 * Packed Router Alert option structure declaration. 257 */ 258 struct mld_raopt { 259 struct ip6_hbh hbh; 260 struct ip6_opt pad; 261 struct ip6_opt_router ra; 262 } __packed; 263 264 /* 265 * Router Alert hop-by-hop option header. 266 */ 267 static struct mld_raopt mld_ra = { 268 .hbh = { 0, 0 }, 269 .pad = { .ip6o_type = IP6OPT_PADN, 0 }, 270 .ra = { 271 .ip6or_type = IP6OPT_ROUTER_ALERT, 272 .ip6or_len = IP6OPT_RTALERT_LEN - 2, 273 .ip6or_value[0] = ((IP6OPT_RTALERT_MLD >> 8) & 0xFF), 274 .ip6or_value[1] = (IP6OPT_RTALERT_MLD & 0xFF) 275 } 276 }; 277 static struct ip6_pktopts mld_po; 278 279 static __inline void 280 mld_save_context(struct mbuf *m, struct ifnet *ifp) 281 { 282 283 #ifdef VIMAGE 284 m->m_pkthdr.PH_loc.ptr = ifp->if_vnet; 285 #endif /* VIMAGE */ 286 m->m_pkthdr.rcvif = ifp; 287 m->m_pkthdr.flowid = ifp->if_index; 288 } 289 290 static __inline void 291 mld_scrub_context(struct mbuf *m) 292 { 293 294 m->m_pkthdr.PH_loc.ptr = NULL; 295 m->m_pkthdr.flowid = 0; 296 } 297 298 /* 299 * Restore context from a queued output chain. 300 * Return saved ifindex. 301 * 302 * VIMAGE: The assertion is there to make sure that we 303 * actually called CURVNET_SET() with what's in the mbuf chain. 304 */ 305 static __inline uint32_t 306 mld_restore_context(struct mbuf *m) 307 { 308 309 #if defined(VIMAGE) && defined(INVARIANTS) 310 KASSERT(curvnet == m->m_pkthdr.PH_loc.ptr, 311 ("%s: called when curvnet was not restored: cuvnet %p m ptr %p", 312 __func__, curvnet, m->m_pkthdr.PH_loc.ptr)); 313 #endif 314 return (m->m_pkthdr.flowid); 315 } 316 317 /* 318 * Retrieve or set threshold between group-source queries in seconds. 319 * 320 * VIMAGE: Assume curvnet set by caller. 321 * SMPng: NOTE: Serialized by MLD lock. 322 */ 323 static int 324 sysctl_mld_gsr(SYSCTL_HANDLER_ARGS) 325 { 326 int error; 327 int i; 328 329 error = sysctl_wire_old_buffer(req, sizeof(int)); 330 if (error) 331 return (error); 332 333 MLD_LOCK(); 334 335 i = V_mld_gsrdelay.tv_sec; 336 337 error = sysctl_handle_int(oidp, &i, 0, req); 338 if (error || !req->newptr) 339 goto out_locked; 340 341 if (i < -1 || i >= 60) { 342 error = EINVAL; 343 goto out_locked; 344 } 345 346 CTR2(KTR_MLD, "change mld_gsrdelay from %d to %d", 347 V_mld_gsrdelay.tv_sec, i); 348 V_mld_gsrdelay.tv_sec = i; 349 350 out_locked: 351 MLD_UNLOCK(); 352 return (error); 353 } 354 355 /* 356 * Expose struct mld_ifsoftc to userland, keyed by ifindex. 357 * For use by ifmcstat(8). 358 * 359 * SMPng: NOTE: Does an unlocked ifindex space read. 360 * VIMAGE: Assume curvnet set by caller. The node handler itself 361 * is not directly virtualized. 362 */ 363 static int 364 sysctl_mld_ifinfo(SYSCTL_HANDLER_ARGS) 365 { 366 int *name; 367 int error; 368 u_int namelen; 369 struct ifnet *ifp; 370 struct mld_ifsoftc *mli; 371 372 name = (int *)arg1; 373 namelen = arg2; 374 375 if (req->newptr != NULL) 376 return (EPERM); 377 378 if (namelen != 1) 379 return (EINVAL); 380 381 error = sysctl_wire_old_buffer(req, sizeof(struct mld_ifinfo)); 382 if (error) 383 return (error); 384 385 IN6_MULTI_LOCK(); 386 IN6_MULTI_LIST_LOCK(); 387 MLD_LOCK(); 388 389 if (name[0] <= 0 || name[0] > V_if_index) { 390 error = ENOENT; 391 goto out_locked; 392 } 393 394 error = ENOENT; 395 396 ifp = ifnet_byindex(name[0]); 397 if (ifp == NULL) 398 goto out_locked; 399 400 LIST_FOREACH(mli, &V_mli_head, mli_link) { 401 if (ifp == mli->mli_ifp) { 402 struct mld_ifinfo info; 403 404 info.mli_version = mli->mli_version; 405 info.mli_v1_timer = mli->mli_v1_timer; 406 info.mli_v2_timer = mli->mli_v2_timer; 407 info.mli_flags = mli->mli_flags; 408 info.mli_rv = mli->mli_rv; 409 info.mli_qi = mli->mli_qi; 410 info.mli_qri = mli->mli_qri; 411 info.mli_uri = mli->mli_uri; 412 error = SYSCTL_OUT(req, &info, sizeof(info)); 413 break; 414 } 415 } 416 417 out_locked: 418 MLD_UNLOCK(); 419 IN6_MULTI_LIST_UNLOCK(); 420 IN6_MULTI_UNLOCK(); 421 return (error); 422 } 423 424 /* 425 * Dispatch an entire queue of pending packet chains. 426 * VIMAGE: Assumes the vnet pointer has been set. 427 */ 428 static void 429 mld_dispatch_queue(struct mbufq *mq, int limit) 430 { 431 struct mbuf *m; 432 433 while ((m = mbufq_dequeue(mq)) != NULL) { 434 CTR3(KTR_MLD, "%s: dispatch %p from %p", __func__, mq, m); 435 mld_dispatch_packet(m); 436 if (--limit == 0) 437 break; 438 } 439 } 440 441 /* 442 * Filter outgoing MLD report state by group. 443 * 444 * Reports are ALWAYS suppressed for ALL-HOSTS (ff02::1) 445 * and node-local addresses. However, kernel and socket consumers 446 * always embed the KAME scope ID in the address provided, so strip it 447 * when performing comparison. 448 * Note: This is not the same as the *multicast* scope. 449 * 450 * Return zero if the given group is one for which MLD reports 451 * should be suppressed, or non-zero if reports should be issued. 452 */ 453 static __inline int 454 mld_is_addr_reported(const struct in6_addr *addr) 455 { 456 457 KASSERT(IN6_IS_ADDR_MULTICAST(addr), ("%s: not multicast", __func__)); 458 459 if (IPV6_ADDR_MC_SCOPE(addr) == IPV6_ADDR_SCOPE_NODELOCAL) 460 return (0); 461 462 if (IPV6_ADDR_MC_SCOPE(addr) == IPV6_ADDR_SCOPE_LINKLOCAL) { 463 struct in6_addr tmp = *addr; 464 in6_clearscope(&tmp); 465 if (IN6_ARE_ADDR_EQUAL(&tmp, &in6addr_linklocal_allnodes)) 466 return (0); 467 } 468 469 return (1); 470 } 471 472 /* 473 * Attach MLD when PF_INET6 is attached to an interface. 474 * 475 * SMPng: Normally called with IF_AFDATA_LOCK held. 476 */ 477 struct mld_ifsoftc * 478 mld_domifattach(struct ifnet *ifp) 479 { 480 struct mld_ifsoftc *mli; 481 482 CTR3(KTR_MLD, "%s: called for ifp %p(%s)", 483 __func__, ifp, if_name(ifp)); 484 485 MLD_LOCK(); 486 487 mli = mli_alloc_locked(ifp); 488 if (!(ifp->if_flags & IFF_MULTICAST)) 489 mli->mli_flags |= MLIF_SILENT; 490 if (mld_use_allow) 491 mli->mli_flags |= MLIF_USEALLOW; 492 493 MLD_UNLOCK(); 494 495 return (mli); 496 } 497 498 /* 499 * VIMAGE: assume curvnet set by caller. 500 */ 501 static struct mld_ifsoftc * 502 mli_alloc_locked(/*const*/ struct ifnet *ifp) 503 { 504 struct mld_ifsoftc *mli; 505 506 MLD_LOCK_ASSERT(); 507 508 mli = malloc(sizeof(struct mld_ifsoftc), M_MLD, M_NOWAIT|M_ZERO); 509 if (mli == NULL) 510 goto out; 511 512 mli->mli_ifp = ifp; 513 mli->mli_version = MLD_VERSION_2; 514 mli->mli_flags = 0; 515 mli->mli_rv = MLD_RV_INIT; 516 mli->mli_qi = MLD_QI_INIT; 517 mli->mli_qri = MLD_QRI_INIT; 518 mli->mli_uri = MLD_URI_INIT; 519 mbufq_init(&mli->mli_gq, MLD_MAX_RESPONSE_PACKETS); 520 521 LIST_INSERT_HEAD(&V_mli_head, mli, mli_link); 522 523 CTR2(KTR_MLD, "allocate mld_ifsoftc for ifp %p(%s)", 524 ifp, if_name(ifp)); 525 526 out: 527 return (mli); 528 } 529 530 /* 531 * Hook for ifdetach. 532 * 533 * NOTE: Some finalization tasks need to run before the protocol domain 534 * is detached, but also before the link layer does its cleanup. 535 * Run before link-layer cleanup; cleanup groups, but do not free MLD state. 536 * 537 * SMPng: Caller must hold IN6_MULTI_LOCK(). 538 * Must take IF_ADDR_LOCK() to cover if_multiaddrs iterator. 539 * XXX This routine is also bitten by unlocked ifma_protospec access. 540 */ 541 void 542 mld_ifdetach(struct ifnet *ifp, struct in6_multi_head *inmh) 543 { 544 struct epoch_tracker et; 545 struct mld_ifsoftc *mli; 546 struct ifmultiaddr *ifma; 547 struct in6_multi *inm; 548 549 CTR3(KTR_MLD, "%s: called for ifp %p(%s)", __func__, ifp, 550 if_name(ifp)); 551 552 IN6_MULTI_LIST_LOCK_ASSERT(); 553 MLD_LOCK(); 554 555 mli = MLD_IFINFO(ifp); 556 IF_ADDR_WLOCK(ifp); 557 /* 558 * Extract list of in6_multi associated with the detaching ifp 559 * which the PF_INET6 layer is about to release. 560 */ 561 NET_EPOCH_ENTER(et); 562 CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 563 inm = in6m_ifmultiaddr_get_inm(ifma); 564 if (inm == NULL) 565 continue; 566 in6m_disconnect_locked(inmh, inm); 567 568 if (mli->mli_version == MLD_VERSION_2) { 569 in6m_clear_recorded(inm); 570 571 /* 572 * We need to release the final reference held 573 * for issuing the INCLUDE {}. 574 */ 575 if (inm->in6m_state == MLD_LEAVING_MEMBER) { 576 inm->in6m_state = MLD_NOT_MEMBER; 577 in6m_rele_locked(inmh, inm); 578 } 579 } 580 } 581 NET_EPOCH_EXIT(et); 582 IF_ADDR_WUNLOCK(ifp); 583 MLD_UNLOCK(); 584 } 585 586 /* 587 * Hook for domifdetach. 588 * Runs after link-layer cleanup; free MLD state. 589 * 590 * SMPng: Normally called with IF_AFDATA_LOCK held. 591 */ 592 void 593 mld_domifdetach(struct ifnet *ifp) 594 { 595 596 CTR3(KTR_MLD, "%s: called for ifp %p(%s)", 597 __func__, ifp, if_name(ifp)); 598 599 MLD_LOCK(); 600 mli_delete_locked(ifp); 601 MLD_UNLOCK(); 602 } 603 604 static void 605 mli_delete_locked(const struct ifnet *ifp) 606 { 607 struct mld_ifsoftc *mli, *tmli; 608 609 CTR3(KTR_MLD, "%s: freeing mld_ifsoftc for ifp %p(%s)", 610 __func__, ifp, if_name(ifp)); 611 612 MLD_LOCK_ASSERT(); 613 614 LIST_FOREACH_SAFE(mli, &V_mli_head, mli_link, tmli) { 615 if (mli->mli_ifp == ifp) { 616 /* 617 * Free deferred General Query responses. 618 */ 619 mbufq_drain(&mli->mli_gq); 620 621 LIST_REMOVE(mli, mli_link); 622 623 free(mli, M_MLD); 624 return; 625 } 626 } 627 } 628 629 /* 630 * Process a received MLDv1 general or address-specific query. 631 * Assumes that the query header has been pulled up to sizeof(mld_hdr). 632 * 633 * NOTE: Can't be fully const correct as we temporarily embed scope ID in 634 * mld_addr. This is OK as we own the mbuf chain. 635 */ 636 static int 637 mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, 638 /*const*/ struct mld_hdr *mld) 639 { 640 struct ifmultiaddr *ifma; 641 struct mld_ifsoftc *mli; 642 struct in6_multi *inm; 643 int is_general_query; 644 uint16_t timer; 645 #ifdef KTR 646 char ip6tbuf[INET6_ADDRSTRLEN]; 647 #endif 648 649 NET_EPOCH_ASSERT(); 650 651 is_general_query = 0; 652 653 if (!mld_v1enable) { 654 CTR3(KTR_MLD, "ignore v1 query %s on ifp %p(%s)", 655 ip6_sprintf(ip6tbuf, &mld->mld_addr), 656 ifp, if_name(ifp)); 657 return (0); 658 } 659 660 /* 661 * RFC3810 Section 6.2: MLD queries must originate from 662 * a router's link-local address. 663 */ 664 if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) { 665 CTR3(KTR_MLD, "ignore v1 query src %s on ifp %p(%s)", 666 ip6_sprintf(ip6tbuf, &ip6->ip6_src), 667 ifp, if_name(ifp)); 668 return (0); 669 } 670 671 /* 672 * Do address field validation upfront before we accept 673 * the query. 674 */ 675 if (IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) { 676 /* 677 * MLDv1 General Query. 678 * If this was not sent to the all-nodes group, ignore it. 679 */ 680 struct in6_addr dst; 681 682 dst = ip6->ip6_dst; 683 in6_clearscope(&dst); 684 if (!IN6_ARE_ADDR_EQUAL(&dst, &in6addr_linklocal_allnodes)) 685 return (EINVAL); 686 is_general_query = 1; 687 } else { 688 /* 689 * Embed scope ID of receiving interface in MLD query for 690 * lookup whilst we don't hold other locks. 691 */ 692 in6_setscope(&mld->mld_addr, ifp, NULL); 693 } 694 695 IN6_MULTI_LIST_LOCK(); 696 MLD_LOCK(); 697 698 /* 699 * Switch to MLDv1 host compatibility mode. 700 */ 701 mli = MLD_IFINFO(ifp); 702 KASSERT(mli != NULL, ("%s: no mld_ifsoftc for ifp %p", __func__, ifp)); 703 mld_set_version(mli, MLD_VERSION_1); 704 705 timer = (ntohs(mld->mld_maxdelay) * PR_FASTHZ) / MLD_TIMER_SCALE; 706 if (timer == 0) 707 timer = 1; 708 709 if (is_general_query) { 710 /* 711 * For each reporting group joined on this 712 * interface, kick the report timer. 713 */ 714 CTR2(KTR_MLD, "process v1 general query on ifp %p(%s)", 715 ifp, if_name(ifp)); 716 CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 717 inm = in6m_ifmultiaddr_get_inm(ifma); 718 if (inm == NULL) 719 continue; 720 mld_v1_update_group(inm, timer); 721 } 722 } else { 723 /* 724 * MLDv1 Group-Specific Query. 725 * If this is a group-specific MLDv1 query, we need only 726 * look up the single group to process it. 727 */ 728 inm = in6m_lookup_locked(ifp, &mld->mld_addr); 729 if (inm != NULL) { 730 CTR3(KTR_MLD, "process v1 query %s on ifp %p(%s)", 731 ip6_sprintf(ip6tbuf, &mld->mld_addr), 732 ifp, if_name(ifp)); 733 mld_v1_update_group(inm, timer); 734 } 735 /* XXX Clear embedded scope ID as userland won't expect it. */ 736 in6_clearscope(&mld->mld_addr); 737 } 738 739 MLD_UNLOCK(); 740 IN6_MULTI_LIST_UNLOCK(); 741 742 return (0); 743 } 744 745 /* 746 * Update the report timer on a group in response to an MLDv1 query. 747 * 748 * If we are becoming the reporting member for this group, start the timer. 749 * If we already are the reporting member for this group, and timer is 750 * below the threshold, reset it. 751 * 752 * We may be updating the group for the first time since we switched 753 * to MLDv2. If we are, then we must clear any recorded source lists, 754 * and transition to REPORTING state; the group timer is overloaded 755 * for group and group-source query responses. 756 * 757 * Unlike MLDv2, the delay per group should be jittered 758 * to avoid bursts of MLDv1 reports. 759 */ 760 static void 761 mld_v1_update_group(struct in6_multi *inm, const int timer) 762 { 763 #ifdef KTR 764 char ip6tbuf[INET6_ADDRSTRLEN]; 765 #endif 766 767 CTR4(KTR_MLD, "%s: %s/%s timer=%d", __func__, 768 ip6_sprintf(ip6tbuf, &inm->in6m_addr), 769 if_name(inm->in6m_ifp), timer); 770 771 IN6_MULTI_LIST_LOCK_ASSERT(); 772 773 switch (inm->in6m_state) { 774 case MLD_NOT_MEMBER: 775 case MLD_SILENT_MEMBER: 776 break; 777 case MLD_REPORTING_MEMBER: 778 if (inm->in6m_timer != 0 && 779 inm->in6m_timer <= timer) { 780 CTR1(KTR_MLD, "%s: REPORTING and timer running, " 781 "skipping.", __func__); 782 break; 783 } 784 /* FALLTHROUGH */ 785 case MLD_SG_QUERY_PENDING_MEMBER: 786 case MLD_G_QUERY_PENDING_MEMBER: 787 case MLD_IDLE_MEMBER: 788 case MLD_LAZY_MEMBER: 789 case MLD_AWAKENING_MEMBER: 790 CTR1(KTR_MLD, "%s: ->REPORTING", __func__); 791 inm->in6m_state = MLD_REPORTING_MEMBER; 792 inm->in6m_timer = MLD_RANDOM_DELAY(timer); 793 V_current_state_timers_running6 = 1; 794 break; 795 case MLD_SLEEPING_MEMBER: 796 CTR1(KTR_MLD, "%s: ->AWAKENING", __func__); 797 inm->in6m_state = MLD_AWAKENING_MEMBER; 798 break; 799 case MLD_LEAVING_MEMBER: 800 break; 801 } 802 } 803 804 /* 805 * Process a received MLDv2 general, group-specific or 806 * group-and-source-specific query. 807 * 808 * Assumes that mld points to a struct mldv2_query which is stored in 809 * contiguous memory. 810 * 811 * Return 0 if successful, otherwise an appropriate error code is returned. 812 */ 813 static int 814 mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, 815 struct mbuf *m, struct mldv2_query *mld, const int off, const int icmp6len) 816 { 817 struct mld_ifsoftc *mli; 818 struct in6_multi *inm; 819 uint32_t maxdelay, nsrc, qqi; 820 int is_general_query; 821 uint16_t timer; 822 uint8_t qrv; 823 #ifdef KTR 824 char ip6tbuf[INET6_ADDRSTRLEN]; 825 #endif 826 827 NET_EPOCH_ASSERT(); 828 829 if (!mld_v2enable) { 830 CTR3(KTR_MLD, "ignore v2 query src %s on ifp %p(%s)", 831 ip6_sprintf(ip6tbuf, &ip6->ip6_src), 832 ifp, if_name(ifp)); 833 return (0); 834 } 835 836 /* 837 * RFC3810 Section 6.2: MLD queries must originate from 838 * a router's link-local address. 839 */ 840 if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) { 841 CTR3(KTR_MLD, "ignore v1 query src %s on ifp %p(%s)", 842 ip6_sprintf(ip6tbuf, &ip6->ip6_src), 843 ifp, if_name(ifp)); 844 return (0); 845 } 846 847 is_general_query = 0; 848 849 CTR2(KTR_MLD, "input v2 query on ifp %p(%s)", ifp, if_name(ifp)); 850 851 maxdelay = ntohs(mld->mld_maxdelay); /* in 1/10ths of a second */ 852 if (maxdelay >= 32768) { 853 maxdelay = (MLD_MRC_MANT(maxdelay) | 0x1000) << 854 (MLD_MRC_EXP(maxdelay) + 3); 855 } 856 timer = (maxdelay * PR_FASTHZ) / MLD_TIMER_SCALE; 857 if (timer == 0) 858 timer = 1; 859 860 qrv = MLD_QRV(mld->mld_misc); 861 if (qrv < 2) { 862 CTR3(KTR_MLD, "%s: clamping qrv %d to %d", __func__, 863 qrv, MLD_RV_INIT); 864 qrv = MLD_RV_INIT; 865 } 866 867 qqi = mld->mld_qqi; 868 if (qqi >= 128) { 869 qqi = MLD_QQIC_MANT(mld->mld_qqi) << 870 (MLD_QQIC_EXP(mld->mld_qqi) + 3); 871 } 872 873 nsrc = ntohs(mld->mld_numsrc); 874 if (nsrc > MLD_MAX_GS_SOURCES) 875 return (EMSGSIZE); 876 if (icmp6len < sizeof(struct mldv2_query) + 877 (nsrc * sizeof(struct in6_addr))) 878 return (EMSGSIZE); 879 880 /* 881 * Do further input validation upfront to avoid resetting timers 882 * should we need to discard this query. 883 */ 884 if (IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) { 885 /* 886 * A general query with a source list has undefined 887 * behaviour; discard it. 888 */ 889 if (nsrc > 0) 890 return (EINVAL); 891 is_general_query = 1; 892 } else { 893 /* 894 * Embed scope ID of receiving interface in MLD query for 895 * lookup whilst we don't hold other locks (due to KAME 896 * locking lameness). We own this mbuf chain just now. 897 */ 898 in6_setscope(&mld->mld_addr, ifp, NULL); 899 } 900 901 IN6_MULTI_LIST_LOCK(); 902 MLD_LOCK(); 903 904 mli = MLD_IFINFO(ifp); 905 KASSERT(mli != NULL, ("%s: no mld_ifsoftc for ifp %p", __func__, ifp)); 906 907 /* 908 * Discard the v2 query if we're in Compatibility Mode. 909 * The RFC is pretty clear that hosts need to stay in MLDv1 mode 910 * until the Old Version Querier Present timer expires. 911 */ 912 if (mli->mli_version != MLD_VERSION_2) 913 goto out_locked; 914 915 mld_set_version(mli, MLD_VERSION_2); 916 mli->mli_rv = qrv; 917 mli->mli_qi = qqi; 918 mli->mli_qri = maxdelay; 919 920 CTR4(KTR_MLD, "%s: qrv %d qi %d maxdelay %d", __func__, qrv, qqi, 921 maxdelay); 922 923 if (is_general_query) { 924 /* 925 * MLDv2 General Query. 926 * 927 * Schedule a current-state report on this ifp for 928 * all groups, possibly containing source lists. 929 * 930 * If there is a pending General Query response 931 * scheduled earlier than the selected delay, do 932 * not schedule any other reports. 933 * Otherwise, reset the interface timer. 934 */ 935 CTR2(KTR_MLD, "process v2 general query on ifp %p(%s)", 936 ifp, if_name(ifp)); 937 if (mli->mli_v2_timer == 0 || mli->mli_v2_timer >= timer) { 938 mli->mli_v2_timer = MLD_RANDOM_DELAY(timer); 939 V_interface_timers_running6 = 1; 940 } 941 } else { 942 /* 943 * MLDv2 Group-specific or Group-and-source-specific Query. 944 * 945 * Group-source-specific queries are throttled on 946 * a per-group basis to defeat denial-of-service attempts. 947 * Queries for groups we are not a member of on this 948 * link are simply ignored. 949 */ 950 inm = in6m_lookup_locked(ifp, &mld->mld_addr); 951 if (inm == NULL) 952 goto out_locked; 953 if (nsrc > 0) { 954 if (!ratecheck(&inm->in6m_lastgsrtv, 955 &V_mld_gsrdelay)) { 956 CTR1(KTR_MLD, "%s: GS query throttled.", 957 __func__); 958 goto out_locked; 959 } 960 } 961 CTR2(KTR_MLD, "process v2 group query on ifp %p(%s)", 962 ifp, if_name(ifp)); 963 /* 964 * If there is a pending General Query response 965 * scheduled sooner than the selected delay, no 966 * further report need be scheduled. 967 * Otherwise, prepare to respond to the 968 * group-specific or group-and-source query. 969 */ 970 if (mli->mli_v2_timer == 0 || mli->mli_v2_timer >= timer) 971 mld_v2_process_group_query(inm, mli, timer, m, mld, off); 972 973 /* XXX Clear embedded scope ID as userland won't expect it. */ 974 in6_clearscope(&mld->mld_addr); 975 } 976 977 out_locked: 978 MLD_UNLOCK(); 979 IN6_MULTI_LIST_UNLOCK(); 980 981 return (0); 982 } 983 984 /* 985 * Process a received MLDv2 group-specific or group-and-source-specific 986 * query. 987 * Return <0 if any error occurred. Currently this is ignored. 988 */ 989 static int 990 mld_v2_process_group_query(struct in6_multi *inm, struct mld_ifsoftc *mli, 991 int timer, struct mbuf *m0, struct mldv2_query *mld, const int off) 992 { 993 int retval; 994 uint16_t nsrc; 995 996 IN6_MULTI_LIST_LOCK_ASSERT(); 997 MLD_LOCK_ASSERT(); 998 999 retval = 0; 1000 1001 switch (inm->in6m_state) { 1002 case MLD_NOT_MEMBER: 1003 case MLD_SILENT_MEMBER: 1004 case MLD_SLEEPING_MEMBER: 1005 case MLD_LAZY_MEMBER: 1006 case MLD_AWAKENING_MEMBER: 1007 case MLD_IDLE_MEMBER: 1008 case MLD_LEAVING_MEMBER: 1009 return (retval); 1010 break; 1011 case MLD_REPORTING_MEMBER: 1012 case MLD_G_QUERY_PENDING_MEMBER: 1013 case MLD_SG_QUERY_PENDING_MEMBER: 1014 break; 1015 } 1016 1017 nsrc = ntohs(mld->mld_numsrc); 1018 1019 /* Length should be checked by calling function. */ 1020 KASSERT((m0->m_flags & M_PKTHDR) == 0 || 1021 m0->m_pkthdr.len >= off + sizeof(struct mldv2_query) + 1022 nsrc * sizeof(struct in6_addr), 1023 ("mldv2 packet is too short: (%d bytes < %zd bytes, m=%p)", 1024 m0->m_pkthdr.len, off + sizeof(struct mldv2_query) + 1025 nsrc * sizeof(struct in6_addr), m0)); 1026 1027 1028 /* 1029 * Deal with group-specific queries upfront. 1030 * If any group query is already pending, purge any recorded 1031 * source-list state if it exists, and schedule a query response 1032 * for this group-specific query. 1033 */ 1034 if (nsrc == 0) { 1035 if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER || 1036 inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER) { 1037 in6m_clear_recorded(inm); 1038 timer = min(inm->in6m_timer, timer); 1039 } 1040 inm->in6m_state = MLD_G_QUERY_PENDING_MEMBER; 1041 inm->in6m_timer = MLD_RANDOM_DELAY(timer); 1042 V_current_state_timers_running6 = 1; 1043 return (retval); 1044 } 1045 1046 /* 1047 * Deal with the case where a group-and-source-specific query has 1048 * been received but a group-specific query is already pending. 1049 */ 1050 if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER) { 1051 timer = min(inm->in6m_timer, timer); 1052 inm->in6m_timer = MLD_RANDOM_DELAY(timer); 1053 V_current_state_timers_running6 = 1; 1054 return (retval); 1055 } 1056 1057 /* 1058 * Finally, deal with the case where a group-and-source-specific 1059 * query has been received, where a response to a previous g-s-r 1060 * query exists, or none exists. 1061 * In this case, we need to parse the source-list which the Querier 1062 * has provided us with and check if we have any source list filter 1063 * entries at T1 for these sources. If we do not, there is no need 1064 * schedule a report and the query may be dropped. 1065 * If we do, we must record them and schedule a current-state 1066 * report for those sources. 1067 */ 1068 if (inm->in6m_nsrc > 0) { 1069 struct in6_addr srcaddr; 1070 int i, nrecorded; 1071 int soff; 1072 1073 soff = off + sizeof(struct mldv2_query); 1074 nrecorded = 0; 1075 for (i = 0; i < nsrc; i++) { 1076 m_copydata(m0, soff, sizeof(struct in6_addr), 1077 (caddr_t)&srcaddr); 1078 retval = in6m_record_source(inm, &srcaddr); 1079 if (retval < 0) 1080 break; 1081 nrecorded += retval; 1082 soff += sizeof(struct in6_addr); 1083 } 1084 if (nrecorded > 0) { 1085 CTR1(KTR_MLD, 1086 "%s: schedule response to SG query", __func__); 1087 inm->in6m_state = MLD_SG_QUERY_PENDING_MEMBER; 1088 inm->in6m_timer = MLD_RANDOM_DELAY(timer); 1089 V_current_state_timers_running6 = 1; 1090 } 1091 } 1092 1093 return (retval); 1094 } 1095 1096 /* 1097 * Process a received MLDv1 host membership report. 1098 * Assumes mld points to mld_hdr in pulled up mbuf chain. 1099 * 1100 * NOTE: Can't be fully const correct as we temporarily embed scope ID in 1101 * mld_addr. This is OK as we own the mbuf chain. 1102 */ 1103 static int 1104 mld_v1_input_report(struct ifnet *ifp, const struct ip6_hdr *ip6, 1105 /*const*/ struct mld_hdr *mld) 1106 { 1107 struct in6_addr src, dst; 1108 struct in6_ifaddr *ia; 1109 struct in6_multi *inm; 1110 #ifdef KTR 1111 char ip6tbuf[INET6_ADDRSTRLEN]; 1112 #endif 1113 1114 NET_EPOCH_ASSERT(); 1115 1116 if (!mld_v1enable) { 1117 CTR3(KTR_MLD, "ignore v1 report %s on ifp %p(%s)", 1118 ip6_sprintf(ip6tbuf, &mld->mld_addr), 1119 ifp, if_name(ifp)); 1120 return (0); 1121 } 1122 1123 if (ifp->if_flags & IFF_LOOPBACK) 1124 return (0); 1125 1126 /* 1127 * MLDv1 reports must originate from a host's link-local address, 1128 * or the unspecified address (when booting). 1129 */ 1130 src = ip6->ip6_src; 1131 in6_clearscope(&src); 1132 if (!IN6_IS_SCOPE_LINKLOCAL(&src) && !IN6_IS_ADDR_UNSPECIFIED(&src)) { 1133 CTR3(KTR_MLD, "ignore v1 query src %s on ifp %p(%s)", 1134 ip6_sprintf(ip6tbuf, &ip6->ip6_src), 1135 ifp, if_name(ifp)); 1136 return (EINVAL); 1137 } 1138 1139 /* 1140 * RFC2710 Section 4: MLDv1 reports must pertain to a multicast 1141 * group, and must be directed to the group itself. 1142 */ 1143 dst = ip6->ip6_dst; 1144 in6_clearscope(&dst); 1145 if (!IN6_IS_ADDR_MULTICAST(&mld->mld_addr) || 1146 !IN6_ARE_ADDR_EQUAL(&mld->mld_addr, &dst)) { 1147 CTR3(KTR_MLD, "ignore v1 query dst %s on ifp %p(%s)", 1148 ip6_sprintf(ip6tbuf, &ip6->ip6_dst), 1149 ifp, if_name(ifp)); 1150 return (EINVAL); 1151 } 1152 1153 /* 1154 * Make sure we don't hear our own membership report, as fast 1155 * leave requires knowing that we are the only member of a 1156 * group. Assume we used the link-local address if available, 1157 * otherwise look for ::. 1158 * 1159 * XXX Note that scope ID comparison is needed for the address 1160 * returned by in6ifa_ifpforlinklocal(), but SHOULD NOT be 1161 * performed for the on-wire address. 1162 */ 1163 ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST); 1164 if ((ia && IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, IA6_IN6(ia))) || 1165 (ia == NULL && IN6_IS_ADDR_UNSPECIFIED(&src))) { 1166 if (ia != NULL) 1167 ifa_free(&ia->ia_ifa); 1168 return (0); 1169 } 1170 if (ia != NULL) 1171 ifa_free(&ia->ia_ifa); 1172 1173 CTR3(KTR_MLD, "process v1 report %s on ifp %p(%s)", 1174 ip6_sprintf(ip6tbuf, &mld->mld_addr), ifp, if_name(ifp)); 1175 1176 /* 1177 * Embed scope ID of receiving interface in MLD query for lookup 1178 * whilst we don't hold other locks (due to KAME locking lameness). 1179 */ 1180 if (!IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) 1181 in6_setscope(&mld->mld_addr, ifp, NULL); 1182 1183 IN6_MULTI_LIST_LOCK(); 1184 MLD_LOCK(); 1185 1186 /* 1187 * MLDv1 report suppression. 1188 * If we are a member of this group, and our membership should be 1189 * reported, and our group timer is pending or about to be reset, 1190 * stop our group timer by transitioning to the 'lazy' state. 1191 */ 1192 inm = in6m_lookup_locked(ifp, &mld->mld_addr); 1193 if (inm != NULL) { 1194 struct mld_ifsoftc *mli; 1195 1196 mli = inm->in6m_mli; 1197 KASSERT(mli != NULL, 1198 ("%s: no mli for ifp %p", __func__, ifp)); 1199 1200 /* 1201 * If we are in MLDv2 host mode, do not allow the 1202 * other host's MLDv1 report to suppress our reports. 1203 */ 1204 if (mli->mli_version == MLD_VERSION_2) 1205 goto out_locked; 1206 1207 inm->in6m_timer = 0; 1208 1209 switch (inm->in6m_state) { 1210 case MLD_NOT_MEMBER: 1211 case MLD_SILENT_MEMBER: 1212 case MLD_SLEEPING_MEMBER: 1213 break; 1214 case MLD_REPORTING_MEMBER: 1215 case MLD_IDLE_MEMBER: 1216 case MLD_AWAKENING_MEMBER: 1217 CTR3(KTR_MLD, 1218 "report suppressed for %s on ifp %p(%s)", 1219 ip6_sprintf(ip6tbuf, &mld->mld_addr), 1220 ifp, if_name(ifp)); 1221 case MLD_LAZY_MEMBER: 1222 inm->in6m_state = MLD_LAZY_MEMBER; 1223 break; 1224 case MLD_G_QUERY_PENDING_MEMBER: 1225 case MLD_SG_QUERY_PENDING_MEMBER: 1226 case MLD_LEAVING_MEMBER: 1227 break; 1228 } 1229 } 1230 1231 out_locked: 1232 MLD_UNLOCK(); 1233 IN6_MULTI_LIST_UNLOCK(); 1234 1235 /* XXX Clear embedded scope ID as userland won't expect it. */ 1236 in6_clearscope(&mld->mld_addr); 1237 1238 return (0); 1239 } 1240 1241 /* 1242 * MLD input path. 1243 * 1244 * Assume query messages which fit in a single ICMPv6 message header 1245 * have been pulled up. 1246 * Assume that userland will want to see the message, even if it 1247 * otherwise fails kernel input validation; do not free it. 1248 * Pullup may however free the mbuf chain m if it fails. 1249 * 1250 * Return IPPROTO_DONE if we freed m. Otherwise, return 0. 1251 */ 1252 int 1253 mld_input(struct mbuf **mp, int off, int icmp6len) 1254 { 1255 struct ifnet *ifp; 1256 struct ip6_hdr *ip6; 1257 struct mbuf *m; 1258 struct mld_hdr *mld; 1259 int mldlen; 1260 1261 m = *mp; 1262 CTR3(KTR_MLD, "%s: called w/mbuf (%p,%d)", __func__, m, off); 1263 1264 ifp = m->m_pkthdr.rcvif; 1265 1266 /* Pullup to appropriate size. */ 1267 if (m->m_len < off + sizeof(*mld)) { 1268 m = m_pullup(m, off + sizeof(*mld)); 1269 if (m == NULL) { 1270 ICMP6STAT_INC(icp6s_badlen); 1271 return (IPPROTO_DONE); 1272 } 1273 } 1274 mld = (struct mld_hdr *)(mtod(m, uint8_t *) + off); 1275 if (mld->mld_type == MLD_LISTENER_QUERY && 1276 icmp6len >= sizeof(struct mldv2_query)) { 1277 mldlen = sizeof(struct mldv2_query); 1278 } else { 1279 mldlen = sizeof(struct mld_hdr); 1280 } 1281 if (m->m_len < off + mldlen) { 1282 m = m_pullup(m, off + mldlen); 1283 if (m == NULL) { 1284 ICMP6STAT_INC(icp6s_badlen); 1285 return (IPPROTO_DONE); 1286 } 1287 } 1288 *mp = m; 1289 ip6 = mtod(m, struct ip6_hdr *); 1290 mld = (struct mld_hdr *)(mtod(m, uint8_t *) + off); 1291 1292 /* 1293 * Userland needs to see all of this traffic for implementing 1294 * the endpoint discovery portion of multicast routing. 1295 */ 1296 switch (mld->mld_type) { 1297 case MLD_LISTENER_QUERY: 1298 icmp6_ifstat_inc(ifp, ifs6_in_mldquery); 1299 if (icmp6len == sizeof(struct mld_hdr)) { 1300 if (mld_v1_input_query(ifp, ip6, mld) != 0) 1301 return (0); 1302 } else if (icmp6len >= sizeof(struct mldv2_query)) { 1303 if (mld_v2_input_query(ifp, ip6, m, 1304 (struct mldv2_query *)mld, off, icmp6len) != 0) 1305 return (0); 1306 } 1307 break; 1308 case MLD_LISTENER_REPORT: 1309 icmp6_ifstat_inc(ifp, ifs6_in_mldreport); 1310 if (mld_v1_input_report(ifp, ip6, mld) != 0) 1311 return (0); 1312 break; 1313 case MLDV2_LISTENER_REPORT: 1314 icmp6_ifstat_inc(ifp, ifs6_in_mldreport); 1315 break; 1316 case MLD_LISTENER_DONE: 1317 icmp6_ifstat_inc(ifp, ifs6_in_mlddone); 1318 break; 1319 default: 1320 break; 1321 } 1322 1323 return (0); 1324 } 1325 1326 /* 1327 * Fast timeout handler (global). 1328 * VIMAGE: Timeout handlers are expected to service all vimages. 1329 */ 1330 void 1331 mld_fasttimo(void) 1332 { 1333 struct in6_multi_head inmh; 1334 VNET_ITERATOR_DECL(vnet_iter); 1335 1336 SLIST_INIT(&inmh); 1337 1338 VNET_LIST_RLOCK_NOSLEEP(); 1339 VNET_FOREACH(vnet_iter) { 1340 CURVNET_SET(vnet_iter); 1341 mld_fasttimo_vnet(&inmh); 1342 CURVNET_RESTORE(); 1343 } 1344 VNET_LIST_RUNLOCK_NOSLEEP(); 1345 in6m_release_list_deferred(&inmh); 1346 } 1347 1348 /* 1349 * Fast timeout handler (per-vnet). 1350 * 1351 * VIMAGE: Assume caller has set up our curvnet. 1352 */ 1353 static void 1354 mld_fasttimo_vnet(struct in6_multi_head *inmh) 1355 { 1356 struct epoch_tracker et; 1357 struct mbufq scq; /* State-change packets */ 1358 struct mbufq qrq; /* Query response packets */ 1359 struct ifnet *ifp; 1360 struct mld_ifsoftc *mli; 1361 struct ifmultiaddr *ifma; 1362 struct in6_multi *inm; 1363 int uri_fasthz; 1364 1365 uri_fasthz = 0; 1366 1367 /* 1368 * Quick check to see if any work needs to be done, in order to 1369 * minimize the overhead of fasttimo processing. 1370 * SMPng: XXX Unlocked reads. 1371 */ 1372 if (!V_current_state_timers_running6 && 1373 !V_interface_timers_running6 && 1374 !V_state_change_timers_running6) 1375 return; 1376 1377 IN6_MULTI_LIST_LOCK(); 1378 MLD_LOCK(); 1379 1380 /* 1381 * MLDv2 General Query response timer processing. 1382 */ 1383 if (V_interface_timers_running6) { 1384 CTR1(KTR_MLD, "%s: interface timers running", __func__); 1385 1386 V_interface_timers_running6 = 0; 1387 LIST_FOREACH(mli, &V_mli_head, mli_link) { 1388 if (mli->mli_v2_timer == 0) { 1389 /* Do nothing. */ 1390 } else if (--mli->mli_v2_timer == 0) { 1391 mld_v2_dispatch_general_query(mli); 1392 } else { 1393 V_interface_timers_running6 = 1; 1394 } 1395 } 1396 } 1397 1398 if (!V_current_state_timers_running6 && 1399 !V_state_change_timers_running6) 1400 goto out_locked; 1401 1402 V_current_state_timers_running6 = 0; 1403 V_state_change_timers_running6 = 0; 1404 1405 CTR1(KTR_MLD, "%s: state change timers running", __func__); 1406 1407 /* 1408 * MLD host report and state-change timer processing. 1409 * Note: Processing a v2 group timer may remove a node. 1410 */ 1411 LIST_FOREACH(mli, &V_mli_head, mli_link) { 1412 ifp = mli->mli_ifp; 1413 1414 if (mli->mli_version == MLD_VERSION_2) { 1415 uri_fasthz = MLD_RANDOM_DELAY(mli->mli_uri * 1416 PR_FASTHZ); 1417 mbufq_init(&qrq, MLD_MAX_G_GS_PACKETS); 1418 mbufq_init(&scq, MLD_MAX_STATE_CHANGE_PACKETS); 1419 } 1420 1421 NET_EPOCH_ENTER(et); 1422 IF_ADDR_WLOCK(ifp); 1423 CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1424 inm = in6m_ifmultiaddr_get_inm(ifma); 1425 if (inm == NULL) 1426 continue; 1427 switch (mli->mli_version) { 1428 case MLD_VERSION_1: 1429 mld_v1_process_group_timer(inmh, inm); 1430 break; 1431 case MLD_VERSION_2: 1432 mld_v2_process_group_timers(inmh, &qrq, 1433 &scq, inm, uri_fasthz); 1434 break; 1435 } 1436 } 1437 IF_ADDR_WUNLOCK(ifp); 1438 1439 switch (mli->mli_version) { 1440 case MLD_VERSION_1: 1441 /* 1442 * Transmit reports for this lifecycle. This 1443 * is done while not holding IF_ADDR_LOCK 1444 * since this can call 1445 * in6ifa_ifpforlinklocal() which locks 1446 * IF_ADDR_LOCK internally as well as 1447 * ip6_output() to transmit a packet. 1448 */ 1449 while ((inm = SLIST_FIRST(inmh)) != NULL) { 1450 SLIST_REMOVE_HEAD(inmh, in6m_defer); 1451 (void)mld_v1_transmit_report(inm, 1452 MLD_LISTENER_REPORT); 1453 } 1454 break; 1455 case MLD_VERSION_2: 1456 mld_dispatch_queue(&qrq, 0); 1457 mld_dispatch_queue(&scq, 0); 1458 break; 1459 } 1460 NET_EPOCH_EXIT(et); 1461 } 1462 1463 out_locked: 1464 MLD_UNLOCK(); 1465 IN6_MULTI_LIST_UNLOCK(); 1466 } 1467 1468 /* 1469 * Update host report group timer. 1470 * Will update the global pending timer flags. 1471 */ 1472 static void 1473 mld_v1_process_group_timer(struct in6_multi_head *inmh, struct in6_multi *inm) 1474 { 1475 int report_timer_expired; 1476 1477 IN6_MULTI_LIST_LOCK_ASSERT(); 1478 MLD_LOCK_ASSERT(); 1479 1480 if (inm->in6m_timer == 0) { 1481 report_timer_expired = 0; 1482 } else if (--inm->in6m_timer == 0) { 1483 report_timer_expired = 1; 1484 } else { 1485 V_current_state_timers_running6 = 1; 1486 return; 1487 } 1488 1489 switch (inm->in6m_state) { 1490 case MLD_NOT_MEMBER: 1491 case MLD_SILENT_MEMBER: 1492 case MLD_IDLE_MEMBER: 1493 case MLD_LAZY_MEMBER: 1494 case MLD_SLEEPING_MEMBER: 1495 case MLD_AWAKENING_MEMBER: 1496 break; 1497 case MLD_REPORTING_MEMBER: 1498 if (report_timer_expired) { 1499 inm->in6m_state = MLD_IDLE_MEMBER; 1500 SLIST_INSERT_HEAD(inmh, inm, in6m_defer); 1501 } 1502 break; 1503 case MLD_G_QUERY_PENDING_MEMBER: 1504 case MLD_SG_QUERY_PENDING_MEMBER: 1505 case MLD_LEAVING_MEMBER: 1506 break; 1507 } 1508 } 1509 1510 /* 1511 * Update a group's timers for MLDv2. 1512 * Will update the global pending timer flags. 1513 * Note: Unlocked read from mli. 1514 */ 1515 static void 1516 mld_v2_process_group_timers(struct in6_multi_head *inmh, 1517 struct mbufq *qrq, struct mbufq *scq, 1518 struct in6_multi *inm, const int uri_fasthz) 1519 { 1520 int query_response_timer_expired; 1521 int state_change_retransmit_timer_expired; 1522 #ifdef KTR 1523 char ip6tbuf[INET6_ADDRSTRLEN]; 1524 #endif 1525 1526 IN6_MULTI_LIST_LOCK_ASSERT(); 1527 MLD_LOCK_ASSERT(); 1528 1529 query_response_timer_expired = 0; 1530 state_change_retransmit_timer_expired = 0; 1531 1532 /* 1533 * During a transition from compatibility mode back to MLDv2, 1534 * a group record in REPORTING state may still have its group 1535 * timer active. This is a no-op in this function; it is easier 1536 * to deal with it here than to complicate the slow-timeout path. 1537 */ 1538 if (inm->in6m_timer == 0) { 1539 query_response_timer_expired = 0; 1540 } else if (--inm->in6m_timer == 0) { 1541 query_response_timer_expired = 1; 1542 } else { 1543 V_current_state_timers_running6 = 1; 1544 } 1545 1546 if (inm->in6m_sctimer == 0) { 1547 state_change_retransmit_timer_expired = 0; 1548 } else if (--inm->in6m_sctimer == 0) { 1549 state_change_retransmit_timer_expired = 1; 1550 } else { 1551 V_state_change_timers_running6 = 1; 1552 } 1553 1554 /* We are in fasttimo, so be quick about it. */ 1555 if (!state_change_retransmit_timer_expired && 1556 !query_response_timer_expired) 1557 return; 1558 1559 switch (inm->in6m_state) { 1560 case MLD_NOT_MEMBER: 1561 case MLD_SILENT_MEMBER: 1562 case MLD_SLEEPING_MEMBER: 1563 case MLD_LAZY_MEMBER: 1564 case MLD_AWAKENING_MEMBER: 1565 case MLD_IDLE_MEMBER: 1566 break; 1567 case MLD_G_QUERY_PENDING_MEMBER: 1568 case MLD_SG_QUERY_PENDING_MEMBER: 1569 /* 1570 * Respond to a previously pending Group-Specific 1571 * or Group-and-Source-Specific query by enqueueing 1572 * the appropriate Current-State report for 1573 * immediate transmission. 1574 */ 1575 if (query_response_timer_expired) { 1576 int retval; 1577 1578 retval = mld_v2_enqueue_group_record(qrq, inm, 0, 1, 1579 (inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER), 1580 0); 1581 CTR2(KTR_MLD, "%s: enqueue record = %d", 1582 __func__, retval); 1583 inm->in6m_state = MLD_REPORTING_MEMBER; 1584 in6m_clear_recorded(inm); 1585 } 1586 /* FALLTHROUGH */ 1587 case MLD_REPORTING_MEMBER: 1588 case MLD_LEAVING_MEMBER: 1589 if (state_change_retransmit_timer_expired) { 1590 /* 1591 * State-change retransmission timer fired. 1592 * If there are any further pending retransmissions, 1593 * set the global pending state-change flag, and 1594 * reset the timer. 1595 */ 1596 if (--inm->in6m_scrv > 0) { 1597 inm->in6m_sctimer = uri_fasthz; 1598 V_state_change_timers_running6 = 1; 1599 } 1600 /* 1601 * Retransmit the previously computed state-change 1602 * report. If there are no further pending 1603 * retransmissions, the mbuf queue will be consumed. 1604 * Update T0 state to T1 as we have now sent 1605 * a state-change. 1606 */ 1607 (void)mld_v2_merge_state_changes(inm, scq); 1608 1609 in6m_commit(inm); 1610 CTR3(KTR_MLD, "%s: T1 -> T0 for %s/%s", __func__, 1611 ip6_sprintf(ip6tbuf, &inm->in6m_addr), 1612 if_name(inm->in6m_ifp)); 1613 1614 /* 1615 * If we are leaving the group for good, make sure 1616 * we release MLD's reference to it. 1617 * This release must be deferred using a SLIST, 1618 * as we are called from a loop which traverses 1619 * the in_ifmultiaddr TAILQ. 1620 */ 1621 if (inm->in6m_state == MLD_LEAVING_MEMBER && 1622 inm->in6m_scrv == 0) { 1623 inm->in6m_state = MLD_NOT_MEMBER; 1624 in6m_disconnect_locked(inmh, inm); 1625 in6m_rele_locked(inmh, inm); 1626 } 1627 } 1628 break; 1629 } 1630 } 1631 1632 /* 1633 * Switch to a different version on the given interface, 1634 * as per Section 9.12. 1635 */ 1636 static void 1637 mld_set_version(struct mld_ifsoftc *mli, const int version) 1638 { 1639 int old_version_timer; 1640 1641 MLD_LOCK_ASSERT(); 1642 1643 CTR4(KTR_MLD, "%s: switching to v%d on ifp %p(%s)", __func__, 1644 version, mli->mli_ifp, if_name(mli->mli_ifp)); 1645 1646 if (version == MLD_VERSION_1) { 1647 /* 1648 * Compute the "Older Version Querier Present" timer as per 1649 * Section 9.12. 1650 */ 1651 old_version_timer = (mli->mli_rv * mli->mli_qi) + mli->mli_qri; 1652 old_version_timer *= PR_SLOWHZ; 1653 mli->mli_v1_timer = old_version_timer; 1654 } 1655 1656 if (mli->mli_v1_timer > 0 && mli->mli_version != MLD_VERSION_1) { 1657 mli->mli_version = MLD_VERSION_1; 1658 mld_v2_cancel_link_timers(mli); 1659 } 1660 } 1661 1662 /* 1663 * Cancel pending MLDv2 timers for the given link and all groups 1664 * joined on it; state-change, general-query, and group-query timers. 1665 */ 1666 static void 1667 mld_v2_cancel_link_timers(struct mld_ifsoftc *mli) 1668 { 1669 struct epoch_tracker et; 1670 struct in6_multi_head inmh; 1671 struct ifmultiaddr *ifma; 1672 struct ifnet *ifp; 1673 struct in6_multi *inm; 1674 1675 CTR3(KTR_MLD, "%s: cancel v2 timers on ifp %p(%s)", __func__, 1676 mli->mli_ifp, if_name(mli->mli_ifp)); 1677 1678 SLIST_INIT(&inmh); 1679 IN6_MULTI_LIST_LOCK_ASSERT(); 1680 MLD_LOCK_ASSERT(); 1681 1682 /* 1683 * Fast-track this potentially expensive operation 1684 * by checking all the global 'timer pending' flags. 1685 */ 1686 if (!V_interface_timers_running6 && 1687 !V_state_change_timers_running6 && 1688 !V_current_state_timers_running6) 1689 return; 1690 1691 mli->mli_v2_timer = 0; 1692 1693 ifp = mli->mli_ifp; 1694 1695 IF_ADDR_WLOCK(ifp); 1696 NET_EPOCH_ENTER(et); 1697 CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1698 inm = in6m_ifmultiaddr_get_inm(ifma); 1699 if (inm == NULL) 1700 continue; 1701 switch (inm->in6m_state) { 1702 case MLD_NOT_MEMBER: 1703 case MLD_SILENT_MEMBER: 1704 case MLD_IDLE_MEMBER: 1705 case MLD_LAZY_MEMBER: 1706 case MLD_SLEEPING_MEMBER: 1707 case MLD_AWAKENING_MEMBER: 1708 break; 1709 case MLD_LEAVING_MEMBER: 1710 /* 1711 * If we are leaving the group and switching 1712 * version, we need to release the final 1713 * reference held for issuing the INCLUDE {}. 1714 */ 1715 if (inm->in6m_refcount == 1) 1716 in6m_disconnect_locked(&inmh, inm); 1717 in6m_rele_locked(&inmh, inm); 1718 /* FALLTHROUGH */ 1719 case MLD_G_QUERY_PENDING_MEMBER: 1720 case MLD_SG_QUERY_PENDING_MEMBER: 1721 in6m_clear_recorded(inm); 1722 /* FALLTHROUGH */ 1723 case MLD_REPORTING_MEMBER: 1724 inm->in6m_sctimer = 0; 1725 inm->in6m_timer = 0; 1726 inm->in6m_state = MLD_REPORTING_MEMBER; 1727 /* 1728 * Free any pending MLDv2 state-change records. 1729 */ 1730 mbufq_drain(&inm->in6m_scq); 1731 break; 1732 } 1733 } 1734 NET_EPOCH_EXIT(et); 1735 IF_ADDR_WUNLOCK(ifp); 1736 in6m_release_list_deferred(&inmh); 1737 } 1738 1739 /* 1740 * Global slowtimo handler. 1741 * VIMAGE: Timeout handlers are expected to service all vimages. 1742 */ 1743 void 1744 mld_slowtimo(void) 1745 { 1746 VNET_ITERATOR_DECL(vnet_iter); 1747 1748 VNET_LIST_RLOCK_NOSLEEP(); 1749 VNET_FOREACH(vnet_iter) { 1750 CURVNET_SET(vnet_iter); 1751 mld_slowtimo_vnet(); 1752 CURVNET_RESTORE(); 1753 } 1754 VNET_LIST_RUNLOCK_NOSLEEP(); 1755 } 1756 1757 /* 1758 * Per-vnet slowtimo handler. 1759 */ 1760 static void 1761 mld_slowtimo_vnet(void) 1762 { 1763 struct mld_ifsoftc *mli; 1764 1765 MLD_LOCK(); 1766 1767 LIST_FOREACH(mli, &V_mli_head, mli_link) { 1768 mld_v1_process_querier_timers(mli); 1769 } 1770 1771 MLD_UNLOCK(); 1772 } 1773 1774 /* 1775 * Update the Older Version Querier Present timers for a link. 1776 * See Section 9.12 of RFC 3810. 1777 */ 1778 static void 1779 mld_v1_process_querier_timers(struct mld_ifsoftc *mli) 1780 { 1781 1782 MLD_LOCK_ASSERT(); 1783 1784 if (mli->mli_version != MLD_VERSION_2 && --mli->mli_v1_timer == 0) { 1785 /* 1786 * MLDv1 Querier Present timer expired; revert to MLDv2. 1787 */ 1788 CTR5(KTR_MLD, 1789 "%s: transition from v%d -> v%d on %p(%s)", 1790 __func__, mli->mli_version, MLD_VERSION_2, 1791 mli->mli_ifp, if_name(mli->mli_ifp)); 1792 mli->mli_version = MLD_VERSION_2; 1793 } 1794 } 1795 1796 /* 1797 * Transmit an MLDv1 report immediately. 1798 */ 1799 static int 1800 mld_v1_transmit_report(struct in6_multi *in6m, const int type) 1801 { 1802 struct ifnet *ifp; 1803 struct in6_ifaddr *ia; 1804 struct ip6_hdr *ip6; 1805 struct mbuf *mh, *md; 1806 struct mld_hdr *mld; 1807 1808 NET_EPOCH_ASSERT(); 1809 IN6_MULTI_LIST_LOCK_ASSERT(); 1810 MLD_LOCK_ASSERT(); 1811 1812 ifp = in6m->in6m_ifp; 1813 /* in process of being freed */ 1814 if (ifp == NULL) 1815 return (0); 1816 ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST); 1817 /* ia may be NULL if link-local address is tentative. */ 1818 1819 mh = m_gethdr(M_NOWAIT, MT_DATA); 1820 if (mh == NULL) { 1821 if (ia != NULL) 1822 ifa_free(&ia->ia_ifa); 1823 return (ENOMEM); 1824 } 1825 md = m_get(M_NOWAIT, MT_DATA); 1826 if (md == NULL) { 1827 m_free(mh); 1828 if (ia != NULL) 1829 ifa_free(&ia->ia_ifa); 1830 return (ENOMEM); 1831 } 1832 mh->m_next = md; 1833 1834 /* 1835 * FUTURE: Consider increasing alignment by ETHER_HDR_LEN, so 1836 * that ether_output() does not need to allocate another mbuf 1837 * for the header in the most common case. 1838 */ 1839 M_ALIGN(mh, sizeof(struct ip6_hdr)); 1840 mh->m_pkthdr.len = sizeof(struct ip6_hdr) + sizeof(struct mld_hdr); 1841 mh->m_len = sizeof(struct ip6_hdr); 1842 1843 ip6 = mtod(mh, struct ip6_hdr *); 1844 ip6->ip6_flow = 0; 1845 ip6->ip6_vfc &= ~IPV6_VERSION_MASK; 1846 ip6->ip6_vfc |= IPV6_VERSION; 1847 ip6->ip6_nxt = IPPROTO_ICMPV6; 1848 ip6->ip6_src = ia ? ia->ia_addr.sin6_addr : in6addr_any; 1849 ip6->ip6_dst = in6m->in6m_addr; 1850 1851 md->m_len = sizeof(struct mld_hdr); 1852 mld = mtod(md, struct mld_hdr *); 1853 mld->mld_type = type; 1854 mld->mld_code = 0; 1855 mld->mld_cksum = 0; 1856 mld->mld_maxdelay = 0; 1857 mld->mld_reserved = 0; 1858 mld->mld_addr = in6m->in6m_addr; 1859 in6_clearscope(&mld->mld_addr); 1860 mld->mld_cksum = in6_cksum(mh, IPPROTO_ICMPV6, 1861 sizeof(struct ip6_hdr), sizeof(struct mld_hdr)); 1862 1863 mld_save_context(mh, ifp); 1864 mh->m_flags |= M_MLDV1; 1865 1866 mld_dispatch_packet(mh); 1867 1868 if (ia != NULL) 1869 ifa_free(&ia->ia_ifa); 1870 return (0); 1871 } 1872 1873 /* 1874 * Process a state change from the upper layer for the given IPv6 group. 1875 * 1876 * Each socket holds a reference on the in_multi in its own ip_moptions. 1877 * The socket layer will have made the necessary updates to.the group 1878 * state, it is now up to MLD to issue a state change report if there 1879 * has been any change between T0 (when the last state-change was issued) 1880 * and T1 (now). 1881 * 1882 * We use the MLDv2 state machine at group level. The MLd module 1883 * however makes the decision as to which MLD protocol version to speak. 1884 * A state change *from* INCLUDE {} always means an initial join. 1885 * A state change *to* INCLUDE {} always means a final leave. 1886 * 1887 * If delay is non-zero, and the state change is an initial multicast 1888 * join, the state change report will be delayed by 'delay' ticks 1889 * in units of PR_FASTHZ if MLDv1 is active on the link; otherwise 1890 * the initial MLDv2 state change report will be delayed by whichever 1891 * is sooner, a pending state-change timer or delay itself. 1892 * 1893 * VIMAGE: curvnet should have been set by caller, as this routine 1894 * is called from the socket option handlers. 1895 */ 1896 int 1897 mld_change_state(struct in6_multi *inm, const int delay) 1898 { 1899 struct mld_ifsoftc *mli; 1900 struct ifnet *ifp; 1901 int error; 1902 1903 IN6_MULTI_LIST_LOCK_ASSERT(); 1904 1905 error = 0; 1906 1907 /* 1908 * Check if the in6_multi has already been disconnected. 1909 */ 1910 if (inm->in6m_ifp == NULL) { 1911 CTR1(KTR_MLD, "%s: inm is disconnected", __func__); 1912 return (0); 1913 } 1914 1915 /* 1916 * Try to detect if the upper layer just asked us to change state 1917 * for an interface which has now gone away. 1918 */ 1919 KASSERT(inm->in6m_ifma != NULL, ("%s: no ifma", __func__)); 1920 ifp = inm->in6m_ifma->ifma_ifp; 1921 if (ifp == NULL) 1922 return (0); 1923 /* 1924 * Sanity check that netinet6's notion of ifp is the 1925 * same as net's. 1926 */ 1927 KASSERT(inm->in6m_ifp == ifp, ("%s: bad ifp", __func__)); 1928 1929 MLD_LOCK(); 1930 mli = MLD_IFINFO(ifp); 1931 KASSERT(mli != NULL, ("%s: no mld_ifsoftc for ifp %p", __func__, ifp)); 1932 1933 /* 1934 * If we detect a state transition to or from MCAST_UNDEFINED 1935 * for this group, then we are starting or finishing an MLD 1936 * life cycle for this group. 1937 */ 1938 if (inm->in6m_st[1].iss_fmode != inm->in6m_st[0].iss_fmode) { 1939 CTR3(KTR_MLD, "%s: inm transition %d -> %d", __func__, 1940 inm->in6m_st[0].iss_fmode, inm->in6m_st[1].iss_fmode); 1941 if (inm->in6m_st[0].iss_fmode == MCAST_UNDEFINED) { 1942 CTR1(KTR_MLD, "%s: initial join", __func__); 1943 error = mld_initial_join(inm, mli, delay); 1944 goto out_locked; 1945 } else if (inm->in6m_st[1].iss_fmode == MCAST_UNDEFINED) { 1946 CTR1(KTR_MLD, "%s: final leave", __func__); 1947 mld_final_leave(inm, mli); 1948 goto out_locked; 1949 } 1950 } else { 1951 CTR1(KTR_MLD, "%s: filter set change", __func__); 1952 } 1953 1954 error = mld_handle_state_change(inm, mli); 1955 1956 out_locked: 1957 MLD_UNLOCK(); 1958 return (error); 1959 } 1960 1961 /* 1962 * Perform the initial join for an MLD group. 1963 * 1964 * When joining a group: 1965 * If the group should have its MLD traffic suppressed, do nothing. 1966 * MLDv1 starts sending MLDv1 host membership reports. 1967 * MLDv2 will schedule an MLDv2 state-change report containing the 1968 * initial state of the membership. 1969 * 1970 * If the delay argument is non-zero, then we must delay sending the 1971 * initial state change for delay ticks (in units of PR_FASTHZ). 1972 */ 1973 static int 1974 mld_initial_join(struct in6_multi *inm, struct mld_ifsoftc *mli, 1975 const int delay) 1976 { 1977 struct epoch_tracker et; 1978 struct ifnet *ifp; 1979 struct mbufq *mq; 1980 int error, retval, syncstates; 1981 int odelay; 1982 #ifdef KTR 1983 char ip6tbuf[INET6_ADDRSTRLEN]; 1984 #endif 1985 1986 CTR4(KTR_MLD, "%s: initial join %s on ifp %p(%s)", 1987 __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr), 1988 inm->in6m_ifp, if_name(inm->in6m_ifp)); 1989 1990 error = 0; 1991 syncstates = 1; 1992 1993 ifp = inm->in6m_ifp; 1994 1995 IN6_MULTI_LIST_LOCK_ASSERT(); 1996 MLD_LOCK_ASSERT(); 1997 1998 KASSERT(mli && mli->mli_ifp == ifp, ("%s: inconsistent ifp", __func__)); 1999 2000 /* 2001 * Groups joined on loopback or marked as 'not reported', 2002 * enter the MLD_SILENT_MEMBER state and 2003 * are never reported in any protocol exchanges. 2004 * All other groups enter the appropriate state machine 2005 * for the version in use on this link. 2006 * A link marked as MLIF_SILENT causes MLD to be completely 2007 * disabled for the link. 2008 */ 2009 if ((ifp->if_flags & IFF_LOOPBACK) || 2010 (mli->mli_flags & MLIF_SILENT) || 2011 !mld_is_addr_reported(&inm->in6m_addr)) { 2012 CTR1(KTR_MLD, 2013 "%s: not kicking state machine for silent group", __func__); 2014 inm->in6m_state = MLD_SILENT_MEMBER; 2015 inm->in6m_timer = 0; 2016 } else { 2017 /* 2018 * Deal with overlapping in_multi lifecycle. 2019 * If this group was LEAVING, then make sure 2020 * we drop the reference we picked up to keep the 2021 * group around for the final INCLUDE {} enqueue. 2022 */ 2023 if (mli->mli_version == MLD_VERSION_2 && 2024 inm->in6m_state == MLD_LEAVING_MEMBER) { 2025 inm->in6m_refcount--; 2026 MPASS(inm->in6m_refcount > 0); 2027 } 2028 inm->in6m_state = MLD_REPORTING_MEMBER; 2029 2030 switch (mli->mli_version) { 2031 case MLD_VERSION_1: 2032 /* 2033 * If a delay was provided, only use it if 2034 * it is greater than the delay normally 2035 * used for an MLDv1 state change report, 2036 * and delay sending the initial MLDv1 report 2037 * by not transitioning to the IDLE state. 2038 */ 2039 odelay = MLD_RANDOM_DELAY(MLD_V1_MAX_RI * PR_FASTHZ); 2040 if (delay) { 2041 inm->in6m_timer = max(delay, odelay); 2042 V_current_state_timers_running6 = 1; 2043 } else { 2044 inm->in6m_state = MLD_IDLE_MEMBER; 2045 NET_EPOCH_ENTER(et); 2046 error = mld_v1_transmit_report(inm, 2047 MLD_LISTENER_REPORT); 2048 NET_EPOCH_EXIT(et); 2049 if (error == 0) { 2050 inm->in6m_timer = odelay; 2051 V_current_state_timers_running6 = 1; 2052 } 2053 } 2054 break; 2055 2056 case MLD_VERSION_2: 2057 /* 2058 * Defer update of T0 to T1, until the first copy 2059 * of the state change has been transmitted. 2060 */ 2061 syncstates = 0; 2062 2063 /* 2064 * Immediately enqueue a State-Change Report for 2065 * this interface, freeing any previous reports. 2066 * Don't kick the timers if there is nothing to do, 2067 * or if an error occurred. 2068 */ 2069 mq = &inm->in6m_scq; 2070 mbufq_drain(mq); 2071 retval = mld_v2_enqueue_group_record(mq, inm, 1, 2072 0, 0, (mli->mli_flags & MLIF_USEALLOW)); 2073 CTR2(KTR_MLD, "%s: enqueue record = %d", 2074 __func__, retval); 2075 if (retval <= 0) { 2076 error = retval * -1; 2077 break; 2078 } 2079 2080 /* 2081 * Schedule transmission of pending state-change 2082 * report up to RV times for this link. The timer 2083 * will fire at the next mld_fasttimo (~200ms), 2084 * giving us an opportunity to merge the reports. 2085 * 2086 * If a delay was provided to this function, only 2087 * use this delay if sooner than the existing one. 2088 */ 2089 KASSERT(mli->mli_rv > 1, 2090 ("%s: invalid robustness %d", __func__, 2091 mli->mli_rv)); 2092 inm->in6m_scrv = mli->mli_rv; 2093 if (delay) { 2094 if (inm->in6m_sctimer > 1) { 2095 inm->in6m_sctimer = 2096 min(inm->in6m_sctimer, delay); 2097 } else 2098 inm->in6m_sctimer = delay; 2099 } else 2100 inm->in6m_sctimer = 1; 2101 V_state_change_timers_running6 = 1; 2102 2103 error = 0; 2104 break; 2105 } 2106 } 2107 2108 /* 2109 * Only update the T0 state if state change is atomic, 2110 * i.e. we don't need to wait for a timer to fire before we 2111 * can consider the state change to have been communicated. 2112 */ 2113 if (syncstates) { 2114 in6m_commit(inm); 2115 CTR3(KTR_MLD, "%s: T1 -> T0 for %s/%s", __func__, 2116 ip6_sprintf(ip6tbuf, &inm->in6m_addr), 2117 if_name(inm->in6m_ifp)); 2118 } 2119 2120 return (error); 2121 } 2122 2123 /* 2124 * Issue an intermediate state change during the life-cycle. 2125 */ 2126 static int 2127 mld_handle_state_change(struct in6_multi *inm, struct mld_ifsoftc *mli) 2128 { 2129 struct ifnet *ifp; 2130 int retval; 2131 #ifdef KTR 2132 char ip6tbuf[INET6_ADDRSTRLEN]; 2133 #endif 2134 2135 CTR4(KTR_MLD, "%s: state change for %s on ifp %p(%s)", 2136 __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr), 2137 inm->in6m_ifp, if_name(inm->in6m_ifp)); 2138 2139 ifp = inm->in6m_ifp; 2140 2141 IN6_MULTI_LIST_LOCK_ASSERT(); 2142 MLD_LOCK_ASSERT(); 2143 2144 KASSERT(mli && mli->mli_ifp == ifp, 2145 ("%s: inconsistent ifp", __func__)); 2146 2147 if ((ifp->if_flags & IFF_LOOPBACK) || 2148 (mli->mli_flags & MLIF_SILENT) || 2149 !mld_is_addr_reported(&inm->in6m_addr) || 2150 (mli->mli_version != MLD_VERSION_2)) { 2151 if (!mld_is_addr_reported(&inm->in6m_addr)) { 2152 CTR1(KTR_MLD, 2153 "%s: not kicking state machine for silent group", __func__); 2154 } 2155 CTR1(KTR_MLD, "%s: nothing to do", __func__); 2156 in6m_commit(inm); 2157 CTR3(KTR_MLD, "%s: T1 -> T0 for %s/%s", __func__, 2158 ip6_sprintf(ip6tbuf, &inm->in6m_addr), 2159 if_name(inm->in6m_ifp)); 2160 return (0); 2161 } 2162 2163 mbufq_drain(&inm->in6m_scq); 2164 2165 retval = mld_v2_enqueue_group_record(&inm->in6m_scq, inm, 1, 0, 0, 2166 (mli->mli_flags & MLIF_USEALLOW)); 2167 CTR2(KTR_MLD, "%s: enqueue record = %d", __func__, retval); 2168 if (retval <= 0) 2169 return (-retval); 2170 2171 /* 2172 * If record(s) were enqueued, start the state-change 2173 * report timer for this group. 2174 */ 2175 inm->in6m_scrv = mli->mli_rv; 2176 inm->in6m_sctimer = 1; 2177 V_state_change_timers_running6 = 1; 2178 2179 return (0); 2180 } 2181 2182 /* 2183 * Perform the final leave for a multicast address. 2184 * 2185 * When leaving a group: 2186 * MLDv1 sends a DONE message, if and only if we are the reporter. 2187 * MLDv2 enqueues a state-change report containing a transition 2188 * to INCLUDE {} for immediate transmission. 2189 */ 2190 static void 2191 mld_final_leave(struct in6_multi *inm, struct mld_ifsoftc *mli) 2192 { 2193 struct epoch_tracker et; 2194 int syncstates; 2195 #ifdef KTR 2196 char ip6tbuf[INET6_ADDRSTRLEN]; 2197 #endif 2198 2199 syncstates = 1; 2200 2201 CTR4(KTR_MLD, "%s: final leave %s on ifp %p(%s)", 2202 __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr), 2203 inm->in6m_ifp, if_name(inm->in6m_ifp)); 2204 2205 IN6_MULTI_LIST_LOCK_ASSERT(); 2206 MLD_LOCK_ASSERT(); 2207 2208 switch (inm->in6m_state) { 2209 case MLD_NOT_MEMBER: 2210 case MLD_SILENT_MEMBER: 2211 case MLD_LEAVING_MEMBER: 2212 /* Already leaving or left; do nothing. */ 2213 CTR1(KTR_MLD, 2214 "%s: not kicking state machine for silent group", __func__); 2215 break; 2216 case MLD_REPORTING_MEMBER: 2217 case MLD_IDLE_MEMBER: 2218 case MLD_G_QUERY_PENDING_MEMBER: 2219 case MLD_SG_QUERY_PENDING_MEMBER: 2220 if (mli->mli_version == MLD_VERSION_1) { 2221 #ifdef INVARIANTS 2222 if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER || 2223 inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER) 2224 panic("%s: MLDv2 state reached, not MLDv2 mode", 2225 __func__); 2226 #endif 2227 NET_EPOCH_ENTER(et); 2228 mld_v1_transmit_report(inm, MLD_LISTENER_DONE); 2229 NET_EPOCH_EXIT(et); 2230 inm->in6m_state = MLD_NOT_MEMBER; 2231 V_current_state_timers_running6 = 1; 2232 } else if (mli->mli_version == MLD_VERSION_2) { 2233 /* 2234 * Stop group timer and all pending reports. 2235 * Immediately enqueue a state-change report 2236 * TO_IN {} to be sent on the next fast timeout, 2237 * giving us an opportunity to merge reports. 2238 */ 2239 mbufq_drain(&inm->in6m_scq); 2240 inm->in6m_timer = 0; 2241 inm->in6m_scrv = mli->mli_rv; 2242 CTR4(KTR_MLD, "%s: Leaving %s/%s with %d " 2243 "pending retransmissions.", __func__, 2244 ip6_sprintf(ip6tbuf, &inm->in6m_addr), 2245 if_name(inm->in6m_ifp), inm->in6m_scrv); 2246 if (inm->in6m_scrv == 0) { 2247 inm->in6m_state = MLD_NOT_MEMBER; 2248 inm->in6m_sctimer = 0; 2249 } else { 2250 int retval; 2251 2252 in6m_acquire_locked(inm); 2253 2254 retval = mld_v2_enqueue_group_record( 2255 &inm->in6m_scq, inm, 1, 0, 0, 2256 (mli->mli_flags & MLIF_USEALLOW)); 2257 KASSERT(retval != 0, 2258 ("%s: enqueue record = %d", __func__, 2259 retval)); 2260 2261 inm->in6m_state = MLD_LEAVING_MEMBER; 2262 inm->in6m_sctimer = 1; 2263 V_state_change_timers_running6 = 1; 2264 syncstates = 0; 2265 } 2266 break; 2267 } 2268 break; 2269 case MLD_LAZY_MEMBER: 2270 case MLD_SLEEPING_MEMBER: 2271 case MLD_AWAKENING_MEMBER: 2272 /* Our reports are suppressed; do nothing. */ 2273 break; 2274 } 2275 2276 if (syncstates) { 2277 in6m_commit(inm); 2278 CTR3(KTR_MLD, "%s: T1 -> T0 for %s/%s", __func__, 2279 ip6_sprintf(ip6tbuf, &inm->in6m_addr), 2280 if_name(inm->in6m_ifp)); 2281 inm->in6m_st[1].iss_fmode = MCAST_UNDEFINED; 2282 CTR3(KTR_MLD, "%s: T1 now MCAST_UNDEFINED for %p/%s", 2283 __func__, &inm->in6m_addr, if_name(inm->in6m_ifp)); 2284 } 2285 } 2286 2287 /* 2288 * Enqueue an MLDv2 group record to the given output queue. 2289 * 2290 * If is_state_change is zero, a current-state record is appended. 2291 * If is_state_change is non-zero, a state-change report is appended. 2292 * 2293 * If is_group_query is non-zero, an mbuf packet chain is allocated. 2294 * If is_group_query is zero, and if there is a packet with free space 2295 * at the tail of the queue, it will be appended to providing there 2296 * is enough free space. 2297 * Otherwise a new mbuf packet chain is allocated. 2298 * 2299 * If is_source_query is non-zero, each source is checked to see if 2300 * it was recorded for a Group-Source query, and will be omitted if 2301 * it is not both in-mode and recorded. 2302 * 2303 * If use_block_allow is non-zero, state change reports for initial join 2304 * and final leave, on an inclusive mode group with a source list, will be 2305 * rewritten to use the ALLOW_NEW and BLOCK_OLD record types, respectively. 2306 * 2307 * The function will attempt to allocate leading space in the packet 2308 * for the IPv6+ICMP headers to be prepended without fragmenting the chain. 2309 * 2310 * If successful the size of all data appended to the queue is returned, 2311 * otherwise an error code less than zero is returned, or zero if 2312 * no record(s) were appended. 2313 */ 2314 static int 2315 mld_v2_enqueue_group_record(struct mbufq *mq, struct in6_multi *inm, 2316 const int is_state_change, const int is_group_query, 2317 const int is_source_query, const int use_block_allow) 2318 { 2319 struct mldv2_record mr; 2320 struct mldv2_record *pmr; 2321 struct ifnet *ifp; 2322 struct ip6_msource *ims, *nims; 2323 struct mbuf *m0, *m, *md; 2324 int is_filter_list_change; 2325 int minrec0len, m0srcs, msrcs, nbytes, off; 2326 int record_has_sources; 2327 int now; 2328 int type; 2329 uint8_t mode; 2330 #ifdef KTR 2331 char ip6tbuf[INET6_ADDRSTRLEN]; 2332 #endif 2333 2334 IN6_MULTI_LIST_LOCK_ASSERT(); 2335 2336 ifp = inm->in6m_ifp; 2337 is_filter_list_change = 0; 2338 m = NULL; 2339 m0 = NULL; 2340 m0srcs = 0; 2341 msrcs = 0; 2342 nbytes = 0; 2343 nims = NULL; 2344 record_has_sources = 1; 2345 pmr = NULL; 2346 type = MLD_DO_NOTHING; 2347 mode = inm->in6m_st[1].iss_fmode; 2348 2349 /* 2350 * If we did not transition out of ASM mode during t0->t1, 2351 * and there are no source nodes to process, we can skip 2352 * the generation of source records. 2353 */ 2354 if (inm->in6m_st[0].iss_asm > 0 && inm->in6m_st[1].iss_asm > 0 && 2355 inm->in6m_nsrc == 0) 2356 record_has_sources = 0; 2357 2358 if (is_state_change) { 2359 /* 2360 * Queue a state change record. 2361 * If the mode did not change, and there are non-ASM 2362 * listeners or source filters present, 2363 * we potentially need to issue two records for the group. 2364 * If there are ASM listeners, and there was no filter 2365 * mode transition of any kind, do nothing. 2366 * 2367 * If we are transitioning to MCAST_UNDEFINED, we need 2368 * not send any sources. A transition to/from this state is 2369 * considered inclusive with some special treatment. 2370 * 2371 * If we are rewriting initial joins/leaves to use 2372 * ALLOW/BLOCK, and the group's membership is inclusive, 2373 * we need to send sources in all cases. 2374 */ 2375 if (mode != inm->in6m_st[0].iss_fmode) { 2376 if (mode == MCAST_EXCLUDE) { 2377 CTR1(KTR_MLD, "%s: change to EXCLUDE", 2378 __func__); 2379 type = MLD_CHANGE_TO_EXCLUDE_MODE; 2380 } else { 2381 CTR1(KTR_MLD, "%s: change to INCLUDE", 2382 __func__); 2383 if (use_block_allow) { 2384 /* 2385 * XXX 2386 * Here we're interested in state 2387 * edges either direction between 2388 * MCAST_UNDEFINED and MCAST_INCLUDE. 2389 * Perhaps we should just check 2390 * the group state, rather than 2391 * the filter mode. 2392 */ 2393 if (mode == MCAST_UNDEFINED) { 2394 type = MLD_BLOCK_OLD_SOURCES; 2395 } else { 2396 type = MLD_ALLOW_NEW_SOURCES; 2397 } 2398 } else { 2399 type = MLD_CHANGE_TO_INCLUDE_MODE; 2400 if (mode == MCAST_UNDEFINED) 2401 record_has_sources = 0; 2402 } 2403 } 2404 } else { 2405 if (record_has_sources) { 2406 is_filter_list_change = 1; 2407 } else { 2408 type = MLD_DO_NOTHING; 2409 } 2410 } 2411 } else { 2412 /* 2413 * Queue a current state record. 2414 */ 2415 if (mode == MCAST_EXCLUDE) { 2416 type = MLD_MODE_IS_EXCLUDE; 2417 } else if (mode == MCAST_INCLUDE) { 2418 type = MLD_MODE_IS_INCLUDE; 2419 KASSERT(inm->in6m_st[1].iss_asm == 0, 2420 ("%s: inm %p is INCLUDE but ASM count is %d", 2421 __func__, inm, inm->in6m_st[1].iss_asm)); 2422 } 2423 } 2424 2425 /* 2426 * Generate the filter list changes using a separate function. 2427 */ 2428 if (is_filter_list_change) 2429 return (mld_v2_enqueue_filter_change(mq, inm)); 2430 2431 if (type == MLD_DO_NOTHING) { 2432 CTR3(KTR_MLD, "%s: nothing to do for %s/%s", 2433 __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr), 2434 if_name(inm->in6m_ifp)); 2435 return (0); 2436 } 2437 2438 /* 2439 * If any sources are present, we must be able to fit at least 2440 * one in the trailing space of the tail packet's mbuf, 2441 * ideally more. 2442 */ 2443 minrec0len = sizeof(struct mldv2_record); 2444 if (record_has_sources) 2445 minrec0len += sizeof(struct in6_addr); 2446 2447 CTR4(KTR_MLD, "%s: queueing %s for %s/%s", __func__, 2448 mld_rec_type_to_str(type), 2449 ip6_sprintf(ip6tbuf, &inm->in6m_addr), 2450 if_name(inm->in6m_ifp)); 2451 2452 /* 2453 * Check if we have a packet in the tail of the queue for this 2454 * group into which the first group record for this group will fit. 2455 * Otherwise allocate a new packet. 2456 * Always allocate leading space for IP6+RA+ICMPV6+REPORT. 2457 * Note: Group records for G/GSR query responses MUST be sent 2458 * in their own packet. 2459 */ 2460 m0 = mbufq_last(mq); 2461 if (!is_group_query && 2462 m0 != NULL && 2463 (m0->m_pkthdr.PH_vt.vt_nrecs + 1 <= MLD_V2_REPORT_MAXRECS) && 2464 (m0->m_pkthdr.len + minrec0len) < 2465 (ifp->if_mtu - MLD_MTUSPACE)) { 2466 m0srcs = (ifp->if_mtu - m0->m_pkthdr.len - 2467 sizeof(struct mldv2_record)) / 2468 sizeof(struct in6_addr); 2469 m = m0; 2470 CTR1(KTR_MLD, "%s: use existing packet", __func__); 2471 } else { 2472 if (mbufq_full(mq)) { 2473 CTR1(KTR_MLD, "%s: outbound queue full", __func__); 2474 return (-ENOMEM); 2475 } 2476 m = NULL; 2477 m0srcs = (ifp->if_mtu - MLD_MTUSPACE - 2478 sizeof(struct mldv2_record)) / sizeof(struct in6_addr); 2479 if (!is_state_change && !is_group_query) 2480 m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); 2481 if (m == NULL) 2482 m = m_gethdr(M_NOWAIT, MT_DATA); 2483 if (m == NULL) 2484 return (-ENOMEM); 2485 2486 mld_save_context(m, ifp); 2487 2488 CTR1(KTR_MLD, "%s: allocated first packet", __func__); 2489 } 2490 2491 /* 2492 * Append group record. 2493 * If we have sources, we don't know how many yet. 2494 */ 2495 mr.mr_type = type; 2496 mr.mr_datalen = 0; 2497 mr.mr_numsrc = 0; 2498 mr.mr_addr = inm->in6m_addr; 2499 in6_clearscope(&mr.mr_addr); 2500 if (!m_append(m, sizeof(struct mldv2_record), (void *)&mr)) { 2501 if (m != m0) 2502 m_freem(m); 2503 CTR1(KTR_MLD, "%s: m_append() failed.", __func__); 2504 return (-ENOMEM); 2505 } 2506 nbytes += sizeof(struct mldv2_record); 2507 2508 /* 2509 * Append as many sources as will fit in the first packet. 2510 * If we are appending to a new packet, the chain allocation 2511 * may potentially use clusters; use m_getptr() in this case. 2512 * If we are appending to an existing packet, we need to obtain 2513 * a pointer to the group record after m_append(), in case a new 2514 * mbuf was allocated. 2515 * 2516 * Only append sources which are in-mode at t1. If we are 2517 * transitioning to MCAST_UNDEFINED state on the group, and 2518 * use_block_allow is zero, do not include source entries. 2519 * Otherwise, we need to include this source in the report. 2520 * 2521 * Only report recorded sources in our filter set when responding 2522 * to a group-source query. 2523 */ 2524 if (record_has_sources) { 2525 if (m == m0) { 2526 md = m_last(m); 2527 pmr = (struct mldv2_record *)(mtod(md, uint8_t *) + 2528 md->m_len - nbytes); 2529 } else { 2530 md = m_getptr(m, 0, &off); 2531 pmr = (struct mldv2_record *)(mtod(md, uint8_t *) + 2532 off); 2533 } 2534 msrcs = 0; 2535 RB_FOREACH_SAFE(ims, ip6_msource_tree, &inm->in6m_srcs, 2536 nims) { 2537 CTR2(KTR_MLD, "%s: visit node %s", __func__, 2538 ip6_sprintf(ip6tbuf, &ims->im6s_addr)); 2539 now = im6s_get_mode(inm, ims, 1); 2540 CTR2(KTR_MLD, "%s: node is %d", __func__, now); 2541 if ((now != mode) || 2542 (now == mode && 2543 (!use_block_allow && mode == MCAST_UNDEFINED))) { 2544 CTR1(KTR_MLD, "%s: skip node", __func__); 2545 continue; 2546 } 2547 if (is_source_query && ims->im6s_stp == 0) { 2548 CTR1(KTR_MLD, "%s: skip unrecorded node", 2549 __func__); 2550 continue; 2551 } 2552 CTR1(KTR_MLD, "%s: append node", __func__); 2553 if (!m_append(m, sizeof(struct in6_addr), 2554 (void *)&ims->im6s_addr)) { 2555 if (m != m0) 2556 m_freem(m); 2557 CTR1(KTR_MLD, "%s: m_append() failed.", 2558 __func__); 2559 return (-ENOMEM); 2560 } 2561 nbytes += sizeof(struct in6_addr); 2562 ++msrcs; 2563 if (msrcs == m0srcs) 2564 break; 2565 } 2566 CTR2(KTR_MLD, "%s: msrcs is %d this packet", __func__, 2567 msrcs); 2568 pmr->mr_numsrc = htons(msrcs); 2569 nbytes += (msrcs * sizeof(struct in6_addr)); 2570 } 2571 2572 if (is_source_query && msrcs == 0) { 2573 CTR1(KTR_MLD, "%s: no recorded sources to report", __func__); 2574 if (m != m0) 2575 m_freem(m); 2576 return (0); 2577 } 2578 2579 /* 2580 * We are good to go with first packet. 2581 */ 2582 if (m != m0) { 2583 CTR1(KTR_MLD, "%s: enqueueing first packet", __func__); 2584 m->m_pkthdr.PH_vt.vt_nrecs = 1; 2585 mbufq_enqueue(mq, m); 2586 } else 2587 m->m_pkthdr.PH_vt.vt_nrecs++; 2588 2589 /* 2590 * No further work needed if no source list in packet(s). 2591 */ 2592 if (!record_has_sources) 2593 return (nbytes); 2594 2595 /* 2596 * Whilst sources remain to be announced, we need to allocate 2597 * a new packet and fill out as many sources as will fit. 2598 * Always try for a cluster first. 2599 */ 2600 while (nims != NULL) { 2601 if (mbufq_full(mq)) { 2602 CTR1(KTR_MLD, "%s: outbound queue full", __func__); 2603 return (-ENOMEM); 2604 } 2605 m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); 2606 if (m == NULL) 2607 m = m_gethdr(M_NOWAIT, MT_DATA); 2608 if (m == NULL) 2609 return (-ENOMEM); 2610 mld_save_context(m, ifp); 2611 md = m_getptr(m, 0, &off); 2612 pmr = (struct mldv2_record *)(mtod(md, uint8_t *) + off); 2613 CTR1(KTR_MLD, "%s: allocated next packet", __func__); 2614 2615 if (!m_append(m, sizeof(struct mldv2_record), (void *)&mr)) { 2616 if (m != m0) 2617 m_freem(m); 2618 CTR1(KTR_MLD, "%s: m_append() failed.", __func__); 2619 return (-ENOMEM); 2620 } 2621 m->m_pkthdr.PH_vt.vt_nrecs = 1; 2622 nbytes += sizeof(struct mldv2_record); 2623 2624 m0srcs = (ifp->if_mtu - MLD_MTUSPACE - 2625 sizeof(struct mldv2_record)) / sizeof(struct in6_addr); 2626 2627 msrcs = 0; 2628 RB_FOREACH_FROM(ims, ip6_msource_tree, nims) { 2629 CTR2(KTR_MLD, "%s: visit node %s", 2630 __func__, ip6_sprintf(ip6tbuf, &ims->im6s_addr)); 2631 now = im6s_get_mode(inm, ims, 1); 2632 if ((now != mode) || 2633 (now == mode && 2634 (!use_block_allow && mode == MCAST_UNDEFINED))) { 2635 CTR1(KTR_MLD, "%s: skip node", __func__); 2636 continue; 2637 } 2638 if (is_source_query && ims->im6s_stp == 0) { 2639 CTR1(KTR_MLD, "%s: skip unrecorded node", 2640 __func__); 2641 continue; 2642 } 2643 CTR1(KTR_MLD, "%s: append node", __func__); 2644 if (!m_append(m, sizeof(struct in6_addr), 2645 (void *)&ims->im6s_addr)) { 2646 if (m != m0) 2647 m_freem(m); 2648 CTR1(KTR_MLD, "%s: m_append() failed.", 2649 __func__); 2650 return (-ENOMEM); 2651 } 2652 ++msrcs; 2653 if (msrcs == m0srcs) 2654 break; 2655 } 2656 pmr->mr_numsrc = htons(msrcs); 2657 nbytes += (msrcs * sizeof(struct in6_addr)); 2658 2659 CTR1(KTR_MLD, "%s: enqueueing next packet", __func__); 2660 mbufq_enqueue(mq, m); 2661 } 2662 2663 return (nbytes); 2664 } 2665 2666 /* 2667 * Type used to mark record pass completion. 2668 * We exploit the fact we can cast to this easily from the 2669 * current filter modes on each ip_msource node. 2670 */ 2671 typedef enum { 2672 REC_NONE = 0x00, /* MCAST_UNDEFINED */ 2673 REC_ALLOW = 0x01, /* MCAST_INCLUDE */ 2674 REC_BLOCK = 0x02, /* MCAST_EXCLUDE */ 2675 REC_FULL = REC_ALLOW | REC_BLOCK 2676 } rectype_t; 2677 2678 /* 2679 * Enqueue an MLDv2 filter list change to the given output queue. 2680 * 2681 * Source list filter state is held in an RB-tree. When the filter list 2682 * for a group is changed without changing its mode, we need to compute 2683 * the deltas between T0 and T1 for each source in the filter set, 2684 * and enqueue the appropriate ALLOW_NEW/BLOCK_OLD records. 2685 * 2686 * As we may potentially queue two record types, and the entire R-B tree 2687 * needs to be walked at once, we break this out into its own function 2688 * so we can generate a tightly packed queue of packets. 2689 * 2690 * XXX This could be written to only use one tree walk, although that makes 2691 * serializing into the mbuf chains a bit harder. For now we do two walks 2692 * which makes things easier on us, and it may or may not be harder on 2693 * the L2 cache. 2694 * 2695 * If successful the size of all data appended to the queue is returned, 2696 * otherwise an error code less than zero is returned, or zero if 2697 * no record(s) were appended. 2698 */ 2699 static int 2700 mld_v2_enqueue_filter_change(struct mbufq *mq, struct in6_multi *inm) 2701 { 2702 static const int MINRECLEN = 2703 sizeof(struct mldv2_record) + sizeof(struct in6_addr); 2704 struct ifnet *ifp; 2705 struct mldv2_record mr; 2706 struct mldv2_record *pmr; 2707 struct ip6_msource *ims, *nims; 2708 struct mbuf *m, *m0, *md; 2709 int m0srcs, nbytes, npbytes, off, rsrcs, schanged; 2710 int nallow, nblock; 2711 uint8_t mode, now, then; 2712 rectype_t crt, drt, nrt; 2713 #ifdef KTR 2714 char ip6tbuf[INET6_ADDRSTRLEN]; 2715 #endif 2716 2717 IN6_MULTI_LIST_LOCK_ASSERT(); 2718 2719 if (inm->in6m_nsrc == 0 || 2720 (inm->in6m_st[0].iss_asm > 0 && inm->in6m_st[1].iss_asm > 0)) 2721 return (0); 2722 2723 ifp = inm->in6m_ifp; /* interface */ 2724 mode = inm->in6m_st[1].iss_fmode; /* filter mode at t1 */ 2725 crt = REC_NONE; /* current group record type */ 2726 drt = REC_NONE; /* mask of completed group record types */ 2727 nrt = REC_NONE; /* record type for current node */ 2728 m0srcs = 0; /* # source which will fit in current mbuf chain */ 2729 npbytes = 0; /* # of bytes appended this packet */ 2730 nbytes = 0; /* # of bytes appended to group's state-change queue */ 2731 rsrcs = 0; /* # sources encoded in current record */ 2732 schanged = 0; /* # nodes encoded in overall filter change */ 2733 nallow = 0; /* # of source entries in ALLOW_NEW */ 2734 nblock = 0; /* # of source entries in BLOCK_OLD */ 2735 nims = NULL; /* next tree node pointer */ 2736 2737 /* 2738 * For each possible filter record mode. 2739 * The first kind of source we encounter tells us which 2740 * is the first kind of record we start appending. 2741 * If a node transitioned to UNDEFINED at t1, its mode is treated 2742 * as the inverse of the group's filter mode. 2743 */ 2744 while (drt != REC_FULL) { 2745 do { 2746 m0 = mbufq_last(mq); 2747 if (m0 != NULL && 2748 (m0->m_pkthdr.PH_vt.vt_nrecs + 1 <= 2749 MLD_V2_REPORT_MAXRECS) && 2750 (m0->m_pkthdr.len + MINRECLEN) < 2751 (ifp->if_mtu - MLD_MTUSPACE)) { 2752 m = m0; 2753 m0srcs = (ifp->if_mtu - m0->m_pkthdr.len - 2754 sizeof(struct mldv2_record)) / 2755 sizeof(struct in6_addr); 2756 CTR1(KTR_MLD, 2757 "%s: use previous packet", __func__); 2758 } else { 2759 m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); 2760 if (m == NULL) 2761 m = m_gethdr(M_NOWAIT, MT_DATA); 2762 if (m == NULL) { 2763 CTR1(KTR_MLD, 2764 "%s: m_get*() failed", __func__); 2765 return (-ENOMEM); 2766 } 2767 m->m_pkthdr.PH_vt.vt_nrecs = 0; 2768 mld_save_context(m, ifp); 2769 m0srcs = (ifp->if_mtu - MLD_MTUSPACE - 2770 sizeof(struct mldv2_record)) / 2771 sizeof(struct in6_addr); 2772 npbytes = 0; 2773 CTR1(KTR_MLD, 2774 "%s: allocated new packet", __func__); 2775 } 2776 /* 2777 * Append the MLD group record header to the 2778 * current packet's data area. 2779 * Recalculate pointer to free space for next 2780 * group record, in case m_append() allocated 2781 * a new mbuf or cluster. 2782 */ 2783 memset(&mr, 0, sizeof(mr)); 2784 mr.mr_addr = inm->in6m_addr; 2785 in6_clearscope(&mr.mr_addr); 2786 if (!m_append(m, sizeof(mr), (void *)&mr)) { 2787 if (m != m0) 2788 m_freem(m); 2789 CTR1(KTR_MLD, 2790 "%s: m_append() failed", __func__); 2791 return (-ENOMEM); 2792 } 2793 npbytes += sizeof(struct mldv2_record); 2794 if (m != m0) { 2795 /* new packet; offset in chain */ 2796 md = m_getptr(m, npbytes - 2797 sizeof(struct mldv2_record), &off); 2798 pmr = (struct mldv2_record *)(mtod(md, 2799 uint8_t *) + off); 2800 } else { 2801 /* current packet; offset from last append */ 2802 md = m_last(m); 2803 pmr = (struct mldv2_record *)(mtod(md, 2804 uint8_t *) + md->m_len - 2805 sizeof(struct mldv2_record)); 2806 } 2807 /* 2808 * Begin walking the tree for this record type 2809 * pass, or continue from where we left off 2810 * previously if we had to allocate a new packet. 2811 * Only report deltas in-mode at t1. 2812 * We need not report included sources as allowed 2813 * if we are in inclusive mode on the group, 2814 * however the converse is not true. 2815 */ 2816 rsrcs = 0; 2817 if (nims == NULL) { 2818 nims = RB_MIN(ip6_msource_tree, 2819 &inm->in6m_srcs); 2820 } 2821 RB_FOREACH_FROM(ims, ip6_msource_tree, nims) { 2822 CTR2(KTR_MLD, "%s: visit node %s", __func__, 2823 ip6_sprintf(ip6tbuf, &ims->im6s_addr)); 2824 now = im6s_get_mode(inm, ims, 1); 2825 then = im6s_get_mode(inm, ims, 0); 2826 CTR3(KTR_MLD, "%s: mode: t0 %d, t1 %d", 2827 __func__, then, now); 2828 if (now == then) { 2829 CTR1(KTR_MLD, 2830 "%s: skip unchanged", __func__); 2831 continue; 2832 } 2833 if (mode == MCAST_EXCLUDE && 2834 now == MCAST_INCLUDE) { 2835 CTR1(KTR_MLD, 2836 "%s: skip IN src on EX group", 2837 __func__); 2838 continue; 2839 } 2840 nrt = (rectype_t)now; 2841 if (nrt == REC_NONE) 2842 nrt = (rectype_t)(~mode & REC_FULL); 2843 if (schanged++ == 0) { 2844 crt = nrt; 2845 } else if (crt != nrt) 2846 continue; 2847 if (!m_append(m, sizeof(struct in6_addr), 2848 (void *)&ims->im6s_addr)) { 2849 if (m != m0) 2850 m_freem(m); 2851 CTR1(KTR_MLD, 2852 "%s: m_append() failed", __func__); 2853 return (-ENOMEM); 2854 } 2855 nallow += !!(crt == REC_ALLOW); 2856 nblock += !!(crt == REC_BLOCK); 2857 if (++rsrcs == m0srcs) 2858 break; 2859 } 2860 /* 2861 * If we did not append any tree nodes on this 2862 * pass, back out of allocations. 2863 */ 2864 if (rsrcs == 0) { 2865 npbytes -= sizeof(struct mldv2_record); 2866 if (m != m0) { 2867 CTR1(KTR_MLD, 2868 "%s: m_free(m)", __func__); 2869 m_freem(m); 2870 } else { 2871 CTR1(KTR_MLD, 2872 "%s: m_adj(m, -mr)", __func__); 2873 m_adj(m, -((int)sizeof( 2874 struct mldv2_record))); 2875 } 2876 continue; 2877 } 2878 npbytes += (rsrcs * sizeof(struct in6_addr)); 2879 if (crt == REC_ALLOW) 2880 pmr->mr_type = MLD_ALLOW_NEW_SOURCES; 2881 else if (crt == REC_BLOCK) 2882 pmr->mr_type = MLD_BLOCK_OLD_SOURCES; 2883 pmr->mr_numsrc = htons(rsrcs); 2884 /* 2885 * Count the new group record, and enqueue this 2886 * packet if it wasn't already queued. 2887 */ 2888 m->m_pkthdr.PH_vt.vt_nrecs++; 2889 if (m != m0) 2890 mbufq_enqueue(mq, m); 2891 nbytes += npbytes; 2892 } while (nims != NULL); 2893 drt |= crt; 2894 crt = (~crt & REC_FULL); 2895 } 2896 2897 CTR3(KTR_MLD, "%s: queued %d ALLOW_NEW, %d BLOCK_OLD", __func__, 2898 nallow, nblock); 2899 2900 return (nbytes); 2901 } 2902 2903 static int 2904 mld_v2_merge_state_changes(struct in6_multi *inm, struct mbufq *scq) 2905 { 2906 struct mbufq *gq; 2907 struct mbuf *m; /* pending state-change */ 2908 struct mbuf *m0; /* copy of pending state-change */ 2909 struct mbuf *mt; /* last state-change in packet */ 2910 int docopy, domerge; 2911 u_int recslen; 2912 2913 docopy = 0; 2914 domerge = 0; 2915 recslen = 0; 2916 2917 IN6_MULTI_LIST_LOCK_ASSERT(); 2918 MLD_LOCK_ASSERT(); 2919 2920 /* 2921 * If there are further pending retransmissions, make a writable 2922 * copy of each queued state-change message before merging. 2923 */ 2924 if (inm->in6m_scrv > 0) 2925 docopy = 1; 2926 2927 gq = &inm->in6m_scq; 2928 #ifdef KTR 2929 if (mbufq_first(gq) == NULL) { 2930 CTR2(KTR_MLD, "%s: WARNING: queue for inm %p is empty", 2931 __func__, inm); 2932 } 2933 #endif 2934 2935 m = mbufq_first(gq); 2936 while (m != NULL) { 2937 /* 2938 * Only merge the report into the current packet if 2939 * there is sufficient space to do so; an MLDv2 report 2940 * packet may only contain 65,535 group records. 2941 * Always use a simple mbuf chain concatentation to do this, 2942 * as large state changes for single groups may have 2943 * allocated clusters. 2944 */ 2945 domerge = 0; 2946 mt = mbufq_last(scq); 2947 if (mt != NULL) { 2948 recslen = m_length(m, NULL); 2949 2950 if ((mt->m_pkthdr.PH_vt.vt_nrecs + 2951 m->m_pkthdr.PH_vt.vt_nrecs <= 2952 MLD_V2_REPORT_MAXRECS) && 2953 (mt->m_pkthdr.len + recslen <= 2954 (inm->in6m_ifp->if_mtu - MLD_MTUSPACE))) 2955 domerge = 1; 2956 } 2957 2958 if (!domerge && mbufq_full(gq)) { 2959 CTR2(KTR_MLD, 2960 "%s: outbound queue full, skipping whole packet %p", 2961 __func__, m); 2962 mt = m->m_nextpkt; 2963 if (!docopy) 2964 m_freem(m); 2965 m = mt; 2966 continue; 2967 } 2968 2969 if (!docopy) { 2970 CTR2(KTR_MLD, "%s: dequeueing %p", __func__, m); 2971 m0 = mbufq_dequeue(gq); 2972 m = m0->m_nextpkt; 2973 } else { 2974 CTR2(KTR_MLD, "%s: copying %p", __func__, m); 2975 m0 = m_dup(m, M_NOWAIT); 2976 if (m0 == NULL) 2977 return (ENOMEM); 2978 m0->m_nextpkt = NULL; 2979 m = m->m_nextpkt; 2980 } 2981 2982 if (!domerge) { 2983 CTR3(KTR_MLD, "%s: queueing %p to scq %p)", 2984 __func__, m0, scq); 2985 mbufq_enqueue(scq, m0); 2986 } else { 2987 struct mbuf *mtl; /* last mbuf of packet mt */ 2988 2989 CTR3(KTR_MLD, "%s: merging %p with ifscq tail %p)", 2990 __func__, m0, mt); 2991 2992 mtl = m_last(mt); 2993 m0->m_flags &= ~M_PKTHDR; 2994 mt->m_pkthdr.len += recslen; 2995 mt->m_pkthdr.PH_vt.vt_nrecs += 2996 m0->m_pkthdr.PH_vt.vt_nrecs; 2997 2998 mtl->m_next = m0; 2999 } 3000 } 3001 3002 return (0); 3003 } 3004 3005 /* 3006 * Respond to a pending MLDv2 General Query. 3007 */ 3008 static void 3009 mld_v2_dispatch_general_query(struct mld_ifsoftc *mli) 3010 { 3011 struct ifmultiaddr *ifma; 3012 struct ifnet *ifp; 3013 struct in6_multi *inm; 3014 int retval; 3015 3016 NET_EPOCH_ASSERT(); 3017 IN6_MULTI_LIST_LOCK_ASSERT(); 3018 MLD_LOCK_ASSERT(); 3019 3020 KASSERT(mli->mli_version == MLD_VERSION_2, 3021 ("%s: called when version %d", __func__, mli->mli_version)); 3022 3023 /* 3024 * Check that there are some packets queued. If so, send them first. 3025 * For large number of groups the reply to general query can take 3026 * many packets, we should finish sending them before starting of 3027 * queuing the new reply. 3028 */ 3029 if (mbufq_len(&mli->mli_gq) != 0) 3030 goto send; 3031 3032 ifp = mli->mli_ifp; 3033 3034 CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 3035 inm = in6m_ifmultiaddr_get_inm(ifma); 3036 if (inm == NULL) 3037 continue; 3038 KASSERT(ifp == inm->in6m_ifp, 3039 ("%s: inconsistent ifp", __func__)); 3040 3041 switch (inm->in6m_state) { 3042 case MLD_NOT_MEMBER: 3043 case MLD_SILENT_MEMBER: 3044 break; 3045 case MLD_REPORTING_MEMBER: 3046 case MLD_IDLE_MEMBER: 3047 case MLD_LAZY_MEMBER: 3048 case MLD_SLEEPING_MEMBER: 3049 case MLD_AWAKENING_MEMBER: 3050 inm->in6m_state = MLD_REPORTING_MEMBER; 3051 retval = mld_v2_enqueue_group_record(&mli->mli_gq, 3052 inm, 0, 0, 0, 0); 3053 CTR2(KTR_MLD, "%s: enqueue record = %d", 3054 __func__, retval); 3055 break; 3056 case MLD_G_QUERY_PENDING_MEMBER: 3057 case MLD_SG_QUERY_PENDING_MEMBER: 3058 case MLD_LEAVING_MEMBER: 3059 break; 3060 } 3061 } 3062 3063 send: 3064 mld_dispatch_queue(&mli->mli_gq, MLD_MAX_RESPONSE_BURST); 3065 3066 /* 3067 * Slew transmission of bursts over 500ms intervals. 3068 */ 3069 if (mbufq_first(&mli->mli_gq) != NULL) { 3070 mli->mli_v2_timer = 1 + MLD_RANDOM_DELAY( 3071 MLD_RESPONSE_BURST_INTERVAL); 3072 V_interface_timers_running6 = 1; 3073 } 3074 } 3075 3076 /* 3077 * Transmit the next pending message in the output queue. 3078 * 3079 * VIMAGE: Needs to store/restore vnet pointer on a per-mbuf-chain basis. 3080 * MRT: Nothing needs to be done, as MLD traffic is always local to 3081 * a link and uses a link-scope multicast address. 3082 */ 3083 static void 3084 mld_dispatch_packet(struct mbuf *m) 3085 { 3086 struct ip6_moptions im6o; 3087 struct ifnet *ifp; 3088 struct ifnet *oifp; 3089 struct mbuf *m0; 3090 struct mbuf *md; 3091 struct ip6_hdr *ip6; 3092 struct mld_hdr *mld; 3093 int error; 3094 int off; 3095 int type; 3096 uint32_t ifindex; 3097 3098 CTR2(KTR_MLD, "%s: transmit %p", __func__, m); 3099 NET_EPOCH_ASSERT(); 3100 3101 /* 3102 * Set VNET image pointer from enqueued mbuf chain 3103 * before doing anything else. Whilst we use interface 3104 * indexes to guard against interface detach, they are 3105 * unique to each VIMAGE and must be retrieved. 3106 */ 3107 ifindex = mld_restore_context(m); 3108 3109 /* 3110 * Check if the ifnet still exists. This limits the scope of 3111 * any race in the absence of a global ifp lock for low cost 3112 * (an array lookup). 3113 */ 3114 ifp = ifnet_byindex(ifindex); 3115 if (ifp == NULL) { 3116 CTR3(KTR_MLD, "%s: dropped %p as ifindex %u went away.", 3117 __func__, m, ifindex); 3118 m_freem(m); 3119 IP6STAT_INC(ip6s_noroute); 3120 goto out; 3121 } 3122 3123 im6o.im6o_multicast_hlim = 1; 3124 im6o.im6o_multicast_loop = (V_ip6_mrouter != NULL); 3125 im6o.im6o_multicast_ifp = ifp; 3126 3127 if (m->m_flags & M_MLDV1) { 3128 m0 = m; 3129 } else { 3130 m0 = mld_v2_encap_report(ifp, m); 3131 if (m0 == NULL) { 3132 CTR2(KTR_MLD, "%s: dropped %p", __func__, m); 3133 IP6STAT_INC(ip6s_odropped); 3134 goto out; 3135 } 3136 } 3137 3138 mld_scrub_context(m0); 3139 m_clrprotoflags(m); 3140 m0->m_pkthdr.rcvif = V_loif; 3141 3142 ip6 = mtod(m0, struct ip6_hdr *); 3143 #if 0 3144 (void)in6_setscope(&ip6->ip6_dst, ifp, NULL); /* XXX LOR */ 3145 #else 3146 /* 3147 * XXX XXX Break some KPI rules to prevent an LOR which would 3148 * occur if we called in6_setscope() at transmission. 3149 * See comments at top of file. 3150 */ 3151 MLD_EMBEDSCOPE(&ip6->ip6_dst, ifp->if_index); 3152 #endif 3153 3154 /* 3155 * Retrieve the ICMPv6 type before handoff to ip6_output(), 3156 * so we can bump the stats. 3157 */ 3158 md = m_getptr(m0, sizeof(struct ip6_hdr), &off); 3159 mld = (struct mld_hdr *)(mtod(md, uint8_t *) + off); 3160 type = mld->mld_type; 3161 3162 oifp = NULL; 3163 error = ip6_output(m0, &mld_po, NULL, IPV6_UNSPECSRC, &im6o, 3164 &oifp, NULL); 3165 if (error) { 3166 CTR3(KTR_MLD, "%s: ip6_output(%p) = %d", __func__, m0, error); 3167 goto out; 3168 } 3169 ICMP6STAT_INC(icp6s_outhist[type]); 3170 if (oifp != NULL) { 3171 icmp6_ifstat_inc(oifp, ifs6_out_msg); 3172 switch (type) { 3173 case MLD_LISTENER_REPORT: 3174 case MLDV2_LISTENER_REPORT: 3175 icmp6_ifstat_inc(oifp, ifs6_out_mldreport); 3176 break; 3177 case MLD_LISTENER_DONE: 3178 icmp6_ifstat_inc(oifp, ifs6_out_mlddone); 3179 break; 3180 } 3181 } 3182 out: 3183 return; 3184 } 3185 3186 /* 3187 * Encapsulate an MLDv2 report. 3188 * 3189 * KAME IPv6 requires that hop-by-hop options be passed separately, 3190 * and that the IPv6 header be prepended in a separate mbuf. 3191 * 3192 * Returns a pointer to the new mbuf chain head, or NULL if the 3193 * allocation failed. 3194 */ 3195 static struct mbuf * 3196 mld_v2_encap_report(struct ifnet *ifp, struct mbuf *m) 3197 { 3198 struct mbuf *mh; 3199 struct mldv2_report *mld; 3200 struct ip6_hdr *ip6; 3201 struct in6_ifaddr *ia; 3202 int mldreclen; 3203 3204 KASSERT(ifp != NULL, ("%s: null ifp", __func__)); 3205 KASSERT((m->m_flags & M_PKTHDR), 3206 ("%s: mbuf chain %p is !M_PKTHDR", __func__, m)); 3207 3208 /* 3209 * RFC3590: OK to send as :: or tentative during DAD. 3210 */ 3211 NET_EPOCH_ASSERT(); 3212 ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST); 3213 if (ia == NULL) 3214 CTR1(KTR_MLD, "%s: warning: ia is NULL", __func__); 3215 3216 mh = m_gethdr(M_NOWAIT, MT_DATA); 3217 if (mh == NULL) { 3218 if (ia != NULL) 3219 ifa_free(&ia->ia_ifa); 3220 m_freem(m); 3221 return (NULL); 3222 } 3223 M_ALIGN(mh, sizeof(struct ip6_hdr) + sizeof(struct mldv2_report)); 3224 3225 mldreclen = m_length(m, NULL); 3226 CTR2(KTR_MLD, "%s: mldreclen is %d", __func__, mldreclen); 3227 3228 mh->m_len = sizeof(struct ip6_hdr) + sizeof(struct mldv2_report); 3229 mh->m_pkthdr.len = sizeof(struct ip6_hdr) + 3230 sizeof(struct mldv2_report) + mldreclen; 3231 3232 ip6 = mtod(mh, struct ip6_hdr *); 3233 ip6->ip6_flow = 0; 3234 ip6->ip6_vfc &= ~IPV6_VERSION_MASK; 3235 ip6->ip6_vfc |= IPV6_VERSION; 3236 ip6->ip6_nxt = IPPROTO_ICMPV6; 3237 ip6->ip6_src = ia ? ia->ia_addr.sin6_addr : in6addr_any; 3238 if (ia != NULL) 3239 ifa_free(&ia->ia_ifa); 3240 ip6->ip6_dst = in6addr_linklocal_allv2routers; 3241 /* scope ID will be set in netisr */ 3242 3243 mld = (struct mldv2_report *)(ip6 + 1); 3244 mld->mld_type = MLDV2_LISTENER_REPORT; 3245 mld->mld_code = 0; 3246 mld->mld_cksum = 0; 3247 mld->mld_v2_reserved = 0; 3248 mld->mld_v2_numrecs = htons(m->m_pkthdr.PH_vt.vt_nrecs); 3249 m->m_pkthdr.PH_vt.vt_nrecs = 0; 3250 3251 mh->m_next = m; 3252 mld->mld_cksum = in6_cksum(mh, IPPROTO_ICMPV6, 3253 sizeof(struct ip6_hdr), sizeof(struct mldv2_report) + mldreclen); 3254 return (mh); 3255 } 3256 3257 #ifdef KTR 3258 static char * 3259 mld_rec_type_to_str(const int type) 3260 { 3261 3262 switch (type) { 3263 case MLD_CHANGE_TO_EXCLUDE_MODE: 3264 return "TO_EX"; 3265 break; 3266 case MLD_CHANGE_TO_INCLUDE_MODE: 3267 return "TO_IN"; 3268 break; 3269 case MLD_MODE_IS_EXCLUDE: 3270 return "MODE_EX"; 3271 break; 3272 case MLD_MODE_IS_INCLUDE: 3273 return "MODE_IN"; 3274 break; 3275 case MLD_ALLOW_NEW_SOURCES: 3276 return "ALLOW_NEW"; 3277 break; 3278 case MLD_BLOCK_OLD_SOURCES: 3279 return "BLOCK_OLD"; 3280 break; 3281 default: 3282 break; 3283 } 3284 return "unknown"; 3285 } 3286 #endif 3287 3288 static void 3289 mld_init(void *unused __unused) 3290 { 3291 3292 CTR1(KTR_MLD, "%s: initializing", __func__); 3293 MLD_LOCK_INIT(); 3294 3295 ip6_initpktopts(&mld_po); 3296 mld_po.ip6po_hlim = 1; 3297 mld_po.ip6po_hbh = &mld_ra.hbh; 3298 mld_po.ip6po_prefer_tempaddr = IP6PO_TEMPADDR_NOTPREFER; 3299 mld_po.ip6po_flags = IP6PO_DONTFRAG; 3300 } 3301 SYSINIT(mld_init, SI_SUB_PROTO_MC, SI_ORDER_MIDDLE, mld_init, NULL); 3302 3303 static void 3304 mld_uninit(void *unused __unused) 3305 { 3306 3307 CTR1(KTR_MLD, "%s: tearing down", __func__); 3308 MLD_LOCK_DESTROY(); 3309 } 3310 SYSUNINIT(mld_uninit, SI_SUB_PROTO_MC, SI_ORDER_MIDDLE, mld_uninit, NULL); 3311 3312 static void 3313 vnet_mld_init(const void *unused __unused) 3314 { 3315 3316 CTR1(KTR_MLD, "%s: initializing", __func__); 3317 3318 LIST_INIT(&V_mli_head); 3319 } 3320 VNET_SYSINIT(vnet_mld_init, SI_SUB_PROTO_MC, SI_ORDER_ANY, vnet_mld_init, 3321 NULL); 3322 3323 static void 3324 vnet_mld_uninit(const void *unused __unused) 3325 { 3326 3327 /* This can happen if we shutdown the network stack. */ 3328 CTR1(KTR_MLD, "%s: tearing down", __func__); 3329 } 3330 VNET_SYSUNINIT(vnet_mld_uninit, SI_SUB_PROTO_MC, SI_ORDER_ANY, vnet_mld_uninit, 3331 NULL); 3332 3333 static int 3334 mld_modevent(module_t mod, int type, void *unused __unused) 3335 { 3336 3337 switch (type) { 3338 case MOD_LOAD: 3339 case MOD_UNLOAD: 3340 break; 3341 default: 3342 return (EOPNOTSUPP); 3343 } 3344 return (0); 3345 } 3346 3347 static moduledata_t mld_mod = { 3348 "mld", 3349 mld_modevent, 3350 0 3351 }; 3352 DECLARE_MODULE(mld, mld_mod, SI_SUB_PROTO_MC, SI_ORDER_ANY); 3353