1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2009 Bruce Simpson. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote 15 * products derived from this software without specific prior written 16 * permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 * 30 * $KAME: mld6.c,v 1.27 2001/04/04 05:17:30 itojun Exp $ 31 */ 32 33 /*- 34 * Copyright (c) 1988 Stephen Deering. 35 * Copyright (c) 1992, 1993 36 * The Regents of the University of California. All rights reserved. 37 * 38 * This code is derived from software contributed to Berkeley by 39 * Stephen Deering of Stanford University. 40 * 41 * Redistribution and use in source and binary forms, with or without 42 * modification, are permitted provided that the following conditions 43 * are met: 44 * 1. Redistributions of source code must retain the above copyright 45 * notice, this list of conditions and the following disclaimer. 46 * 2. Redistributions in binary form must reproduce the above copyright 47 * notice, this list of conditions and the following disclaimer in the 48 * documentation and/or other materials provided with the distribution. 49 * 3. Neither the name of the University nor the names of its contributors 50 * may be used to endorse or promote products derived from this software 51 * without specific prior written permission. 52 * 53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 63 * SUCH DAMAGE. 64 * 65 * @(#)igmp.c 8.1 (Berkeley) 7/19/93 66 */ 67 68 #include <sys/cdefs.h> 69 #include "opt_inet.h" 70 #include "opt_inet6.h" 71 72 #include <sys/param.h> 73 #include <sys/systm.h> 74 #include <sys/mbuf.h> 75 #include <sys/socket.h> 76 #include <sys/sysctl.h> 77 #include <sys/kernel.h> 78 #include <sys/callout.h> 79 #include <sys/malloc.h> 80 #include <sys/module.h> 81 #include <sys/ktr.h> 82 83 #include <net/if.h> 84 #include <net/if_var.h> 85 #include <net/if_private.h> 86 #include <net/route.h> 87 #include <net/vnet.h> 88 89 #include <netinet/in.h> 90 #include <netinet/in_var.h> 91 #include <netinet6/in6_var.h> 92 #include <netinet/ip6.h> 93 #include <netinet6/ip6_var.h> 94 #include <netinet6/scope6_var.h> 95 #include <netinet/icmp6.h> 96 #include <netinet6/mld6.h> 97 #include <netinet6/mld6_var.h> 98 99 #include <security/mac/mac_framework.h> 100 101 #ifndef KTR_MLD 102 #define KTR_MLD KTR_INET6 103 #endif 104 105 static void mli_delete_locked(struct ifnet *); 106 static void mld_dispatch_packet(struct mbuf *); 107 static void mld_dispatch_queue(struct mbufq *, int); 108 static void mld_final_leave(struct in6_multi *, struct mld_ifsoftc *); 109 static void mld_fasttimo_vnet(struct in6_multi_head *inmh); 110 static int mld_handle_state_change(struct in6_multi *, 111 struct mld_ifsoftc *); 112 static int mld_initial_join(struct in6_multi *, struct mld_ifsoftc *, 113 const int); 114 #ifdef KTR 115 static char * mld_rec_type_to_str(const int); 116 #endif 117 static void mld_set_version(struct mld_ifsoftc *, const int); 118 static void mld_slowtimo_vnet(void); 119 static int mld_v1_input_query(struct ifnet *, const struct ip6_hdr *, 120 /*const*/ struct mld_hdr *); 121 static int mld_v1_input_report(struct ifnet *, const struct ip6_hdr *, 122 /*const*/ struct mld_hdr *); 123 static void mld_v1_process_group_timer(struct in6_multi_head *, 124 struct in6_multi *); 125 static void mld_v1_process_querier_timers(struct mld_ifsoftc *); 126 static int mld_v1_transmit_report(struct in6_multi *, const int); 127 static void mld_v1_update_group(struct in6_multi *, const int); 128 static void mld_v2_cancel_link_timers(struct mld_ifsoftc *); 129 static void mld_v2_dispatch_general_query(struct mld_ifsoftc *); 130 static struct mbuf * 131 mld_v2_encap_report(struct ifnet *, struct mbuf *); 132 static int mld_v2_enqueue_filter_change(struct mbufq *, 133 struct in6_multi *); 134 static int mld_v2_enqueue_group_record(struct mbufq *, 135 struct in6_multi *, const int, const int, const int, 136 const int); 137 static int mld_v2_input_query(struct ifnet *, const struct ip6_hdr *, 138 struct mbuf *, struct mldv2_query *, const int, const int); 139 static int mld_v2_merge_state_changes(struct in6_multi *, 140 struct mbufq *); 141 static void mld_v2_process_group_timers(struct in6_multi_head *, 142 struct mbufq *, struct mbufq *, 143 struct in6_multi *, const int); 144 static int mld_v2_process_group_query(struct in6_multi *, 145 struct mld_ifsoftc *mli, int, struct mbuf *, 146 struct mldv2_query *, const int); 147 static int sysctl_mld_gsr(SYSCTL_HANDLER_ARGS); 148 static int sysctl_mld_ifinfo(SYSCTL_HANDLER_ARGS); 149 150 /* 151 * Normative references: RFC 2710, RFC 3590, RFC 3810. 152 * 153 * Locking: 154 * * The MLD subsystem lock ends up being system-wide for the moment, 155 * but could be per-VIMAGE later on. 156 * * The permitted lock order is: IN6_MULTI_LOCK, MLD_LOCK, IF_ADDR_LOCK. 157 * Any may be taken independently; if any are held at the same 158 * time, the above lock order must be followed. 159 * * IN6_MULTI_LOCK covers in_multi. 160 * * MLD_LOCK covers per-link state and any global variables in this file. 161 * * IF_ADDR_LOCK covers if_multiaddrs, which is used for a variety of 162 * per-link state iterators. 163 * 164 * XXX LOR PREVENTION 165 * A special case for IPv6 is the in6_setscope() routine. ip6_output() 166 * will not accept an ifp; it wants an embedded scope ID, unlike 167 * ip_output(), which happily takes the ifp given to it. The embedded 168 * scope ID is only used by MLD to select the outgoing interface. 169 * 170 * During interface attach and detach, MLD will take MLD_LOCK *after* 171 * the IF_AFDATA_LOCK. 172 * As in6_setscope() takes IF_AFDATA_LOCK then SCOPE_LOCK, we can't call 173 * it with MLD_LOCK held without triggering an LOR. A netisr with indirect 174 * dispatch could work around this, but we'd rather not do that, as it 175 * can introduce other races. 176 * 177 * As such, we exploit the fact that the scope ID is just the interface 178 * index, and embed it in the IPv6 destination address accordingly. 179 * This is potentially NOT VALID for MLDv1 reports, as they 180 * are always sent to the multicast group itself; as MLDv2 181 * reports are always sent to ff02::16, this is not an issue 182 * when MLDv2 is in use. 183 * 184 * This does not however eliminate the LOR when ip6_output() itself 185 * calls in6_setscope() internally whilst MLD_LOCK is held. This will 186 * trigger a LOR warning in WITNESS when the ifnet is detached. 187 * 188 * The right answer is probably to make IF_AFDATA_LOCK an rwlock, given 189 * how it's used across the network stack. Here we're simply exploiting 190 * the fact that MLD runs at a similar layer in the stack to scope6.c. 191 * 192 * VIMAGE: 193 * * Each in6_multi corresponds to an ifp, and each ifp corresponds 194 * to a vnet in ifp->if_vnet. 195 */ 196 static struct mtx mld_mtx; 197 static MALLOC_DEFINE(M_MLD, "mld", "mld state"); 198 199 #define MLD_EMBEDSCOPE(pin6, zoneid) \ 200 if (IN6_IS_SCOPE_LINKLOCAL(pin6) || \ 201 IN6_IS_ADDR_MC_INTFACELOCAL(pin6)) \ 202 (pin6)->s6_addr16[1] = htons((zoneid) & 0xFFFF) \ 203 204 /* 205 * VIMAGE-wide globals. 206 */ 207 VNET_DEFINE_STATIC(struct timeval, mld_gsrdelay) = {10, 0}; 208 VNET_DEFINE_STATIC(LIST_HEAD(, mld_ifsoftc), mli_head); 209 VNET_DEFINE_STATIC(int, interface_timers_running6); 210 VNET_DEFINE_STATIC(int, state_change_timers_running6); 211 VNET_DEFINE_STATIC(int, current_state_timers_running6); 212 213 #define V_mld_gsrdelay VNET(mld_gsrdelay) 214 #define V_mli_head VNET(mli_head) 215 #define V_interface_timers_running6 VNET(interface_timers_running6) 216 #define V_state_change_timers_running6 VNET(state_change_timers_running6) 217 #define V_current_state_timers_running6 VNET(current_state_timers_running6) 218 219 SYSCTL_DECL(_net_inet6); /* Note: Not in any common header. */ 220 221 SYSCTL_NODE(_net_inet6, OID_AUTO, mld, CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 222 "IPv6 Multicast Listener Discovery"); 223 224 /* 225 * Virtualized sysctls. 226 */ 227 SYSCTL_PROC(_net_inet6_mld, OID_AUTO, gsrdelay, 228 CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 229 &VNET_NAME(mld_gsrdelay.tv_sec), 0, sysctl_mld_gsr, "I", 230 "Rate limit for MLDv2 Group-and-Source queries in seconds"); 231 232 /* 233 * Non-virtualized sysctls. 234 */ 235 static SYSCTL_NODE(_net_inet6_mld, OID_AUTO, ifinfo, 236 CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_mld_ifinfo, 237 "Per-interface MLDv2 state"); 238 239 static int mld_v1enable = 1; 240 SYSCTL_INT(_net_inet6_mld, OID_AUTO, v1enable, CTLFLAG_RWTUN, 241 &mld_v1enable, 0, "Enable fallback to MLDv1"); 242 243 static int mld_v2enable = 1; 244 SYSCTL_INT(_net_inet6_mld, OID_AUTO, v2enable, CTLFLAG_RWTUN, 245 &mld_v2enable, 0, "Enable MLDv2"); 246 247 static int mld_use_allow = 1; 248 SYSCTL_INT(_net_inet6_mld, OID_AUTO, use_allow, CTLFLAG_RWTUN, 249 &mld_use_allow, 0, "Use ALLOW/BLOCK for RFC 4604 SSM joins/leaves"); 250 251 /* 252 * Packed Router Alert option structure declaration. 253 */ 254 struct mld_raopt { 255 struct ip6_hbh hbh; 256 struct ip6_opt pad; 257 struct ip6_opt_router ra; 258 } __packed; 259 260 /* 261 * Router Alert hop-by-hop option header. 262 */ 263 static struct mld_raopt mld_ra = { 264 .hbh = { 0, 0 }, 265 .pad = { .ip6o_type = IP6OPT_PADN, 0 }, 266 .ra = { 267 .ip6or_type = IP6OPT_ROUTER_ALERT, 268 .ip6or_len = IP6OPT_RTALERT_LEN - 2, 269 .ip6or_value[0] = ((IP6OPT_RTALERT_MLD >> 8) & 0xFF), 270 .ip6or_value[1] = (IP6OPT_RTALERT_MLD & 0xFF) 271 } 272 }; 273 static struct ip6_pktopts mld_po; 274 275 static __inline void 276 mld_save_context(struct mbuf *m, struct ifnet *ifp) 277 { 278 279 #ifdef VIMAGE 280 m->m_pkthdr.PH_loc.ptr = ifp->if_vnet; 281 #endif /* VIMAGE */ 282 m->m_pkthdr.rcvif = ifp; 283 m->m_pkthdr.flowid = ifp->if_index; 284 } 285 286 static __inline void 287 mld_scrub_context(struct mbuf *m) 288 { 289 290 m->m_pkthdr.PH_loc.ptr = NULL; 291 m->m_pkthdr.flowid = 0; 292 } 293 294 /* 295 * Restore context from a queued output chain. 296 * Return saved ifindex. 297 * 298 * VIMAGE: The assertion is there to make sure that we 299 * actually called CURVNET_SET() with what's in the mbuf chain. 300 */ 301 static __inline uint32_t 302 mld_restore_context(struct mbuf *m) 303 { 304 305 #if defined(VIMAGE) && defined(INVARIANTS) 306 KASSERT(curvnet == m->m_pkthdr.PH_loc.ptr, 307 ("%s: called when curvnet was not restored: cuvnet %p m ptr %p", 308 __func__, curvnet, m->m_pkthdr.PH_loc.ptr)); 309 #endif 310 return (m->m_pkthdr.flowid); 311 } 312 313 /* 314 * Retrieve or set threshold between group-source queries in seconds. 315 * 316 * VIMAGE: Assume curvnet set by caller. 317 * SMPng: NOTE: Serialized by MLD lock. 318 */ 319 static int 320 sysctl_mld_gsr(SYSCTL_HANDLER_ARGS) 321 { 322 int error; 323 int i; 324 325 error = sysctl_wire_old_buffer(req, sizeof(int)); 326 if (error) 327 return (error); 328 329 MLD_LOCK(); 330 331 i = V_mld_gsrdelay.tv_sec; 332 333 error = sysctl_handle_int(oidp, &i, 0, req); 334 if (error || !req->newptr) 335 goto out_locked; 336 337 if (i < -1 || i >= 60) { 338 error = EINVAL; 339 goto out_locked; 340 } 341 342 CTR2(KTR_MLD, "change mld_gsrdelay from %d to %d", 343 V_mld_gsrdelay.tv_sec, i); 344 V_mld_gsrdelay.tv_sec = i; 345 346 out_locked: 347 MLD_UNLOCK(); 348 return (error); 349 } 350 351 /* 352 * Expose struct mld_ifsoftc to userland, keyed by ifindex. 353 * For use by ifmcstat(8). 354 * 355 * VIMAGE: Assume curvnet set by caller. The node handler itself 356 * is not directly virtualized. 357 */ 358 static int 359 sysctl_mld_ifinfo(SYSCTL_HANDLER_ARGS) 360 { 361 struct epoch_tracker et; 362 int *name; 363 int error; 364 u_int namelen; 365 struct ifnet *ifp; 366 struct mld_ifsoftc *mli; 367 368 name = (int *)arg1; 369 namelen = arg2; 370 371 if (req->newptr != NULL) 372 return (EPERM); 373 374 if (namelen != 1) 375 return (EINVAL); 376 377 error = sysctl_wire_old_buffer(req, sizeof(struct mld_ifinfo)); 378 if (error) 379 return (error); 380 381 IN6_MULTI_LOCK(); 382 IN6_MULTI_LIST_LOCK(); 383 MLD_LOCK(); 384 NET_EPOCH_ENTER(et); 385 386 error = ENOENT; 387 ifp = ifnet_byindex(name[0]); 388 if (ifp == NULL) 389 goto out_locked; 390 391 LIST_FOREACH(mli, &V_mli_head, mli_link) { 392 if (ifp == mli->mli_ifp) { 393 struct mld_ifinfo info; 394 395 info.mli_version = mli->mli_version; 396 info.mli_v1_timer = mli->mli_v1_timer; 397 info.mli_v2_timer = mli->mli_v2_timer; 398 info.mli_flags = mli->mli_flags; 399 info.mli_rv = mli->mli_rv; 400 info.mli_qi = mli->mli_qi; 401 info.mli_qri = mli->mli_qri; 402 info.mli_uri = mli->mli_uri; 403 error = SYSCTL_OUT(req, &info, sizeof(info)); 404 break; 405 } 406 } 407 408 out_locked: 409 NET_EPOCH_EXIT(et); 410 MLD_UNLOCK(); 411 IN6_MULTI_LIST_UNLOCK(); 412 IN6_MULTI_UNLOCK(); 413 return (error); 414 } 415 416 /* 417 * Dispatch an entire queue of pending packet chains. 418 * VIMAGE: Assumes the vnet pointer has been set. 419 */ 420 static void 421 mld_dispatch_queue(struct mbufq *mq, int limit) 422 { 423 struct mbuf *m; 424 425 while ((m = mbufq_dequeue(mq)) != NULL) { 426 CTR3(KTR_MLD, "%s: dispatch %p from %p", __func__, mq, m); 427 mld_dispatch_packet(m); 428 if (--limit == 0) 429 break; 430 } 431 } 432 433 /* 434 * Filter outgoing MLD report state by group. 435 * 436 * Reports are ALWAYS suppressed for ALL-HOSTS (ff02::1) 437 * and node-local addresses. However, kernel and socket consumers 438 * always embed the KAME scope ID in the address provided, so strip it 439 * when performing comparison. 440 * Note: This is not the same as the *multicast* scope. 441 * 442 * Return zero if the given group is one for which MLD reports 443 * should be suppressed, or non-zero if reports should be issued. 444 */ 445 static __inline int 446 mld_is_addr_reported(const struct in6_addr *addr) 447 { 448 449 KASSERT(IN6_IS_ADDR_MULTICAST(addr), ("%s: not multicast", __func__)); 450 451 if (IPV6_ADDR_MC_SCOPE(addr) == IPV6_ADDR_SCOPE_NODELOCAL) 452 return (0); 453 454 if (IPV6_ADDR_MC_SCOPE(addr) == IPV6_ADDR_SCOPE_LINKLOCAL) { 455 struct in6_addr tmp = *addr; 456 in6_clearscope(&tmp); 457 if (IN6_ARE_ADDR_EQUAL(&tmp, &in6addr_linklocal_allnodes)) 458 return (0); 459 } 460 461 return (1); 462 } 463 464 /* 465 * Attach MLD when PF_INET6 is attached to an interface. Assumes that the 466 * current VNET is set by the caller. 467 */ 468 struct mld_ifsoftc * 469 mld_domifattach(struct ifnet *ifp) 470 { 471 struct mld_ifsoftc *mli; 472 473 CTR3(KTR_MLD, "%s: called for ifp %p(%s)", __func__, ifp, if_name(ifp)); 474 475 mli = malloc(sizeof(struct mld_ifsoftc), M_MLD, M_WAITOK | M_ZERO); 476 mli->mli_ifp = ifp; 477 mli->mli_version = MLD_VERSION_2; 478 mli->mli_flags = 0; 479 mli->mli_rv = MLD_RV_INIT; 480 mli->mli_qi = MLD_QI_INIT; 481 mli->mli_qri = MLD_QRI_INIT; 482 mli->mli_uri = MLD_URI_INIT; 483 mbufq_init(&mli->mli_gq, MLD_MAX_RESPONSE_PACKETS); 484 if ((ifp->if_flags & IFF_MULTICAST) == 0) 485 mli->mli_flags |= MLIF_SILENT; 486 if (mld_use_allow) 487 mli->mli_flags |= MLIF_USEALLOW; 488 489 MLD_LOCK(); 490 LIST_INSERT_HEAD(&V_mli_head, mli, mli_link); 491 MLD_UNLOCK(); 492 493 return (mli); 494 } 495 496 /* 497 * Hook for ifdetach. 498 * 499 * NOTE: Some finalization tasks need to run before the protocol domain 500 * is detached, but also before the link layer does its cleanup. 501 * Run before link-layer cleanup; cleanup groups, but do not free MLD state. 502 * 503 * SMPng: Caller must hold IN6_MULTI_LOCK(). 504 * Must take IF_ADDR_LOCK() to cover if_multiaddrs iterator. 505 * XXX This routine is also bitten by unlocked ifma_protospec access. 506 */ 507 void 508 mld_ifdetach(struct ifnet *ifp, struct in6_multi_head *inmh) 509 { 510 struct epoch_tracker et; 511 struct mld_ifsoftc *mli; 512 struct ifmultiaddr *ifma; 513 struct in6_multi *inm; 514 515 CTR3(KTR_MLD, "%s: called for ifp %p(%s)", __func__, ifp, 516 if_name(ifp)); 517 518 IN6_MULTI_LIST_LOCK_ASSERT(); 519 MLD_LOCK(); 520 521 mli = MLD_IFINFO(ifp); 522 IF_ADDR_WLOCK(ifp); 523 /* 524 * Extract list of in6_multi associated with the detaching ifp 525 * which the PF_INET6 layer is about to release. 526 */ 527 NET_EPOCH_ENTER(et); 528 CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 529 inm = in6m_ifmultiaddr_get_inm(ifma); 530 if (inm == NULL) 531 continue; 532 in6m_disconnect_locked(inmh, inm); 533 534 if (mli->mli_version == MLD_VERSION_2) { 535 in6m_clear_recorded(inm); 536 537 /* 538 * We need to release the final reference held 539 * for issuing the INCLUDE {}. 540 */ 541 if (inm->in6m_state == MLD_LEAVING_MEMBER) { 542 inm->in6m_state = MLD_NOT_MEMBER; 543 in6m_rele_locked(inmh, inm); 544 } 545 } 546 } 547 NET_EPOCH_EXIT(et); 548 IF_ADDR_WUNLOCK(ifp); 549 MLD_UNLOCK(); 550 } 551 552 /* 553 * Hook for domifdetach. 554 * Runs after link-layer cleanup; free MLD state. 555 * 556 * SMPng: Normally called with IF_AFDATA_LOCK held. 557 */ 558 void 559 mld_domifdetach(struct ifnet *ifp) 560 { 561 562 CTR3(KTR_MLD, "%s: called for ifp %p(%s)", 563 __func__, ifp, if_name(ifp)); 564 565 MLD_LOCK(); 566 mli_delete_locked(ifp); 567 MLD_UNLOCK(); 568 } 569 570 static void 571 mli_delete_locked(struct ifnet *ifp) 572 { 573 struct mld_ifsoftc *mli, *tmli; 574 575 CTR3(KTR_MLD, "%s: freeing mld_ifsoftc for ifp %p(%s)", 576 __func__, ifp, if_name(ifp)); 577 578 MLD_LOCK_ASSERT(); 579 580 LIST_FOREACH_SAFE(mli, &V_mli_head, mli_link, tmli) { 581 if (mli->mli_ifp == ifp) { 582 /* 583 * Free deferred General Query responses. 584 */ 585 mbufq_drain(&mli->mli_gq); 586 587 LIST_REMOVE(mli, mli_link); 588 589 free(mli, M_MLD); 590 return; 591 } 592 } 593 } 594 595 /* 596 * Process a received MLDv1 general or address-specific query. 597 * Assumes that the query header has been pulled up to sizeof(mld_hdr). 598 * 599 * NOTE: Can't be fully const correct as we temporarily embed scope ID in 600 * mld_addr. This is OK as we own the mbuf chain. 601 */ 602 static int 603 mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, 604 /*const*/ struct mld_hdr *mld) 605 { 606 struct ifmultiaddr *ifma; 607 struct mld_ifsoftc *mli; 608 struct in6_multi *inm; 609 int is_general_query; 610 uint16_t timer; 611 #ifdef KTR 612 char ip6tbuf[INET6_ADDRSTRLEN]; 613 #endif 614 615 NET_EPOCH_ASSERT(); 616 617 is_general_query = 0; 618 619 if (!mld_v1enable) { 620 CTR3(KTR_MLD, "ignore v1 query %s on ifp %p(%s)", 621 ip6_sprintf(ip6tbuf, &mld->mld_addr), 622 ifp, if_name(ifp)); 623 return (0); 624 } 625 626 /* 627 * RFC3810 Section 6.2: MLD queries must originate from 628 * a router's link-local address. 629 */ 630 if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) { 631 CTR3(KTR_MLD, "ignore v1 query src %s on ifp %p(%s)", 632 ip6_sprintf(ip6tbuf, &ip6->ip6_src), 633 ifp, if_name(ifp)); 634 return (0); 635 } 636 637 /* 638 * Do address field validation upfront before we accept 639 * the query. 640 */ 641 if (IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) { 642 /* 643 * MLDv1 General Query. 644 * If this was not sent to the all-nodes group, ignore it. 645 */ 646 struct in6_addr dst; 647 648 dst = ip6->ip6_dst; 649 in6_clearscope(&dst); 650 if (!IN6_ARE_ADDR_EQUAL(&dst, &in6addr_linklocal_allnodes)) 651 return (EINVAL); 652 is_general_query = 1; 653 } else { 654 /* 655 * Embed scope ID of receiving interface in MLD query for 656 * lookup whilst we don't hold other locks. 657 */ 658 in6_setscope(&mld->mld_addr, ifp, NULL); 659 } 660 661 IN6_MULTI_LIST_LOCK(); 662 MLD_LOCK(); 663 664 /* 665 * Switch to MLDv1 host compatibility mode. 666 */ 667 mli = MLD_IFINFO(ifp); 668 KASSERT(mli != NULL, ("%s: no mld_ifsoftc for ifp %p", __func__, ifp)); 669 mld_set_version(mli, MLD_VERSION_1); 670 671 timer = (ntohs(mld->mld_maxdelay) * MLD_FASTHZ) / MLD_TIMER_SCALE; 672 if (timer == 0) 673 timer = 1; 674 675 if (is_general_query) { 676 /* 677 * For each reporting group joined on this 678 * interface, kick the report timer. 679 */ 680 CTR2(KTR_MLD, "process v1 general query on ifp %p(%s)", 681 ifp, if_name(ifp)); 682 CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 683 inm = in6m_ifmultiaddr_get_inm(ifma); 684 if (inm == NULL) 685 continue; 686 mld_v1_update_group(inm, timer); 687 } 688 } else { 689 /* 690 * MLDv1 Group-Specific Query. 691 * If this is a group-specific MLDv1 query, we need only 692 * look up the single group to process it. 693 */ 694 inm = in6m_lookup_locked(ifp, &mld->mld_addr); 695 if (inm != NULL) { 696 CTR3(KTR_MLD, "process v1 query %s on ifp %p(%s)", 697 ip6_sprintf(ip6tbuf, &mld->mld_addr), 698 ifp, if_name(ifp)); 699 mld_v1_update_group(inm, timer); 700 } 701 /* XXX Clear embedded scope ID as userland won't expect it. */ 702 in6_clearscope(&mld->mld_addr); 703 } 704 705 MLD_UNLOCK(); 706 IN6_MULTI_LIST_UNLOCK(); 707 708 return (0); 709 } 710 711 /* 712 * Update the report timer on a group in response to an MLDv1 query. 713 * 714 * If we are becoming the reporting member for this group, start the timer. 715 * If we already are the reporting member for this group, and timer is 716 * below the threshold, reset it. 717 * 718 * We may be updating the group for the first time since we switched 719 * to MLDv2. If we are, then we must clear any recorded source lists, 720 * and transition to REPORTING state; the group timer is overloaded 721 * for group and group-source query responses. 722 * 723 * Unlike MLDv2, the delay per group should be jittered 724 * to avoid bursts of MLDv1 reports. 725 */ 726 static void 727 mld_v1_update_group(struct in6_multi *inm, const int timer) 728 { 729 #ifdef KTR 730 char ip6tbuf[INET6_ADDRSTRLEN]; 731 #endif 732 733 CTR4(KTR_MLD, "%s: %s/%s timer=%d", __func__, 734 ip6_sprintf(ip6tbuf, &inm->in6m_addr), 735 if_name(inm->in6m_ifp), timer); 736 737 IN6_MULTI_LIST_LOCK_ASSERT(); 738 739 switch (inm->in6m_state) { 740 case MLD_NOT_MEMBER: 741 case MLD_SILENT_MEMBER: 742 break; 743 case MLD_REPORTING_MEMBER: 744 if (inm->in6m_timer != 0 && 745 inm->in6m_timer <= timer) { 746 CTR1(KTR_MLD, "%s: REPORTING and timer running, " 747 "skipping.", __func__); 748 break; 749 } 750 /* FALLTHROUGH */ 751 case MLD_SG_QUERY_PENDING_MEMBER: 752 case MLD_G_QUERY_PENDING_MEMBER: 753 case MLD_IDLE_MEMBER: 754 case MLD_LAZY_MEMBER: 755 case MLD_AWAKENING_MEMBER: 756 CTR1(KTR_MLD, "%s: ->REPORTING", __func__); 757 inm->in6m_state = MLD_REPORTING_MEMBER; 758 inm->in6m_timer = MLD_RANDOM_DELAY(timer); 759 V_current_state_timers_running6 = 1; 760 break; 761 case MLD_SLEEPING_MEMBER: 762 CTR1(KTR_MLD, "%s: ->AWAKENING", __func__); 763 inm->in6m_state = MLD_AWAKENING_MEMBER; 764 break; 765 case MLD_LEAVING_MEMBER: 766 break; 767 } 768 } 769 770 /* 771 * Process a received MLDv2 general, group-specific or 772 * group-and-source-specific query. 773 * 774 * Assumes that mld points to a struct mldv2_query which is stored in 775 * contiguous memory. 776 * 777 * Return 0 if successful, otherwise an appropriate error code is returned. 778 */ 779 static int 780 mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, 781 struct mbuf *m, struct mldv2_query *mld, const int off, const int icmp6len) 782 { 783 struct mld_ifsoftc *mli; 784 struct in6_multi *inm; 785 uint32_t maxdelay, nsrc, qqi; 786 int is_general_query; 787 uint16_t timer; 788 uint8_t qrv; 789 #ifdef KTR 790 char ip6tbuf[INET6_ADDRSTRLEN]; 791 #endif 792 793 NET_EPOCH_ASSERT(); 794 795 if (!mld_v2enable) { 796 CTR3(KTR_MLD, "ignore v2 query src %s on ifp %p(%s)", 797 ip6_sprintf(ip6tbuf, &ip6->ip6_src), 798 ifp, if_name(ifp)); 799 return (0); 800 } 801 802 /* 803 * RFC3810 Section 6.2: MLD queries must originate from 804 * a router's link-local address. 805 */ 806 if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) { 807 CTR3(KTR_MLD, "ignore v1 query src %s on ifp %p(%s)", 808 ip6_sprintf(ip6tbuf, &ip6->ip6_src), 809 ifp, if_name(ifp)); 810 return (0); 811 } 812 813 is_general_query = 0; 814 815 CTR2(KTR_MLD, "input v2 query on ifp %p(%s)", ifp, if_name(ifp)); 816 817 maxdelay = ntohs(mld->mld_maxdelay); /* in 1/10ths of a second */ 818 if (maxdelay >= 32768) { 819 maxdelay = (MLD_MRC_MANT(maxdelay) | 0x1000) << 820 (MLD_MRC_EXP(maxdelay) + 3); 821 } 822 timer = (maxdelay * MLD_FASTHZ) / MLD_TIMER_SCALE; 823 if (timer == 0) 824 timer = 1; 825 826 qrv = MLD_QRV(mld->mld_misc); 827 if (qrv < 2) { 828 CTR3(KTR_MLD, "%s: clamping qrv %d to %d", __func__, 829 qrv, MLD_RV_INIT); 830 qrv = MLD_RV_INIT; 831 } 832 833 qqi = mld->mld_qqi; 834 if (qqi >= 128) { 835 qqi = MLD_QQIC_MANT(mld->mld_qqi) << 836 (MLD_QQIC_EXP(mld->mld_qqi) + 3); 837 } 838 839 nsrc = ntohs(mld->mld_numsrc); 840 if (nsrc > MLD_MAX_GS_SOURCES) 841 return (EMSGSIZE); 842 if (icmp6len < sizeof(struct mldv2_query) + 843 (nsrc * sizeof(struct in6_addr))) 844 return (EMSGSIZE); 845 846 /* 847 * Do further input validation upfront to avoid resetting timers 848 * should we need to discard this query. 849 */ 850 if (IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) { 851 /* 852 * A general query with a source list has undefined 853 * behaviour; discard it. 854 */ 855 if (nsrc > 0) 856 return (EINVAL); 857 is_general_query = 1; 858 } else { 859 /* 860 * Embed scope ID of receiving interface in MLD query for 861 * lookup whilst we don't hold other locks (due to KAME 862 * locking lameness). We own this mbuf chain just now. 863 */ 864 in6_setscope(&mld->mld_addr, ifp, NULL); 865 } 866 867 IN6_MULTI_LIST_LOCK(); 868 MLD_LOCK(); 869 870 mli = MLD_IFINFO(ifp); 871 KASSERT(mli != NULL, ("%s: no mld_ifsoftc for ifp %p", __func__, ifp)); 872 873 /* 874 * Discard the v2 query if we're in Compatibility Mode. 875 * The RFC is pretty clear that hosts need to stay in MLDv1 mode 876 * until the Old Version Querier Present timer expires. 877 */ 878 if (mli->mli_version != MLD_VERSION_2) 879 goto out_locked; 880 881 mld_set_version(mli, MLD_VERSION_2); 882 mli->mli_rv = qrv; 883 mli->mli_qi = qqi; 884 mli->mli_qri = maxdelay; 885 886 CTR4(KTR_MLD, "%s: qrv %d qi %d maxdelay %d", __func__, qrv, qqi, 887 maxdelay); 888 889 if (is_general_query) { 890 /* 891 * MLDv2 General Query. 892 * 893 * Schedule a current-state report on this ifp for 894 * all groups, possibly containing source lists. 895 * 896 * If there is a pending General Query response 897 * scheduled earlier than the selected delay, do 898 * not schedule any other reports. 899 * Otherwise, reset the interface timer. 900 */ 901 CTR2(KTR_MLD, "process v2 general query on ifp %p(%s)", 902 ifp, if_name(ifp)); 903 if (mli->mli_v2_timer == 0 || mli->mli_v2_timer >= timer) { 904 mli->mli_v2_timer = MLD_RANDOM_DELAY(timer); 905 V_interface_timers_running6 = 1; 906 } 907 } else { 908 /* 909 * MLDv2 Group-specific or Group-and-source-specific Query. 910 * 911 * Group-source-specific queries are throttled on 912 * a per-group basis to defeat denial-of-service attempts. 913 * Queries for groups we are not a member of on this 914 * link are simply ignored. 915 */ 916 inm = in6m_lookup_locked(ifp, &mld->mld_addr); 917 if (inm == NULL) 918 goto out_locked; 919 if (nsrc > 0) { 920 if (!ratecheck(&inm->in6m_lastgsrtv, 921 &V_mld_gsrdelay)) { 922 CTR1(KTR_MLD, "%s: GS query throttled.", 923 __func__); 924 goto out_locked; 925 } 926 } 927 CTR2(KTR_MLD, "process v2 group query on ifp %p(%s)", 928 ifp, if_name(ifp)); 929 /* 930 * If there is a pending General Query response 931 * scheduled sooner than the selected delay, no 932 * further report need be scheduled. 933 * Otherwise, prepare to respond to the 934 * group-specific or group-and-source query. 935 */ 936 if (mli->mli_v2_timer == 0 || mli->mli_v2_timer >= timer) 937 mld_v2_process_group_query(inm, mli, timer, m, mld, off); 938 939 /* XXX Clear embedded scope ID as userland won't expect it. */ 940 in6_clearscope(&mld->mld_addr); 941 } 942 943 out_locked: 944 MLD_UNLOCK(); 945 IN6_MULTI_LIST_UNLOCK(); 946 947 return (0); 948 } 949 950 /* 951 * Process a received MLDv2 group-specific or group-and-source-specific 952 * query. 953 * Return <0 if any error occurred. Currently this is ignored. 954 */ 955 static int 956 mld_v2_process_group_query(struct in6_multi *inm, struct mld_ifsoftc *mli, 957 int timer, struct mbuf *m0, struct mldv2_query *mld, const int off) 958 { 959 int retval; 960 uint16_t nsrc; 961 962 IN6_MULTI_LIST_LOCK_ASSERT(); 963 MLD_LOCK_ASSERT(); 964 965 retval = 0; 966 967 switch (inm->in6m_state) { 968 case MLD_NOT_MEMBER: 969 case MLD_SILENT_MEMBER: 970 case MLD_SLEEPING_MEMBER: 971 case MLD_LAZY_MEMBER: 972 case MLD_AWAKENING_MEMBER: 973 case MLD_IDLE_MEMBER: 974 case MLD_LEAVING_MEMBER: 975 return (retval); 976 break; 977 case MLD_REPORTING_MEMBER: 978 case MLD_G_QUERY_PENDING_MEMBER: 979 case MLD_SG_QUERY_PENDING_MEMBER: 980 break; 981 } 982 983 nsrc = ntohs(mld->mld_numsrc); 984 985 /* Length should be checked by calling function. */ 986 KASSERT((m0->m_flags & M_PKTHDR) == 0 || 987 m0->m_pkthdr.len >= off + sizeof(struct mldv2_query) + 988 nsrc * sizeof(struct in6_addr), 989 ("mldv2 packet is too short: (%d bytes < %zd bytes, m=%p)", 990 m0->m_pkthdr.len, off + sizeof(struct mldv2_query) + 991 nsrc * sizeof(struct in6_addr), m0)); 992 993 /* 994 * Deal with group-specific queries upfront. 995 * If any group query is already pending, purge any recorded 996 * source-list state if it exists, and schedule a query response 997 * for this group-specific query. 998 */ 999 if (nsrc == 0) { 1000 if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER || 1001 inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER) { 1002 in6m_clear_recorded(inm); 1003 timer = min(inm->in6m_timer, timer); 1004 } 1005 inm->in6m_state = MLD_G_QUERY_PENDING_MEMBER; 1006 inm->in6m_timer = MLD_RANDOM_DELAY(timer); 1007 V_current_state_timers_running6 = 1; 1008 return (retval); 1009 } 1010 1011 /* 1012 * Deal with the case where a group-and-source-specific query has 1013 * been received but a group-specific query is already pending. 1014 */ 1015 if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER) { 1016 timer = min(inm->in6m_timer, timer); 1017 inm->in6m_timer = MLD_RANDOM_DELAY(timer); 1018 V_current_state_timers_running6 = 1; 1019 return (retval); 1020 } 1021 1022 /* 1023 * Finally, deal with the case where a group-and-source-specific 1024 * query has been received, where a response to a previous g-s-r 1025 * query exists, or none exists. 1026 * In this case, we need to parse the source-list which the Querier 1027 * has provided us with and check if we have any source list filter 1028 * entries at T1 for these sources. If we do not, there is no need 1029 * schedule a report and the query may be dropped. 1030 * If we do, we must record them and schedule a current-state 1031 * report for those sources. 1032 */ 1033 if (inm->in6m_nsrc > 0) { 1034 struct in6_addr srcaddr; 1035 int i, nrecorded; 1036 int soff; 1037 1038 soff = off + sizeof(struct mldv2_query); 1039 nrecorded = 0; 1040 for (i = 0; i < nsrc; i++) { 1041 m_copydata(m0, soff, sizeof(struct in6_addr), 1042 (caddr_t)&srcaddr); 1043 retval = in6m_record_source(inm, &srcaddr); 1044 if (retval < 0) 1045 break; 1046 nrecorded += retval; 1047 soff += sizeof(struct in6_addr); 1048 } 1049 if (nrecorded > 0) { 1050 CTR1(KTR_MLD, 1051 "%s: schedule response to SG query", __func__); 1052 inm->in6m_state = MLD_SG_QUERY_PENDING_MEMBER; 1053 inm->in6m_timer = MLD_RANDOM_DELAY(timer); 1054 V_current_state_timers_running6 = 1; 1055 } 1056 } 1057 1058 return (retval); 1059 } 1060 1061 /* 1062 * Process a received MLDv1 host membership report. 1063 * Assumes mld points to mld_hdr in pulled up mbuf chain. 1064 * 1065 * NOTE: Can't be fully const correct as we temporarily embed scope ID in 1066 * mld_addr. This is OK as we own the mbuf chain. 1067 */ 1068 static int 1069 mld_v1_input_report(struct ifnet *ifp, const struct ip6_hdr *ip6, 1070 /*const*/ struct mld_hdr *mld) 1071 { 1072 struct in6_addr src, dst; 1073 struct in6_ifaddr *ia; 1074 struct in6_multi *inm; 1075 #ifdef KTR 1076 char ip6tbuf[INET6_ADDRSTRLEN]; 1077 #endif 1078 1079 NET_EPOCH_ASSERT(); 1080 1081 if (!mld_v1enable) { 1082 CTR3(KTR_MLD, "ignore v1 report %s on ifp %p(%s)", 1083 ip6_sprintf(ip6tbuf, &mld->mld_addr), 1084 ifp, if_name(ifp)); 1085 return (0); 1086 } 1087 1088 if (ifp->if_flags & IFF_LOOPBACK) 1089 return (0); 1090 1091 /* 1092 * MLDv1 reports must originate from a host's link-local address, 1093 * or the unspecified address (when booting). 1094 */ 1095 src = ip6->ip6_src; 1096 in6_clearscope(&src); 1097 if (!IN6_IS_SCOPE_LINKLOCAL(&src) && !IN6_IS_ADDR_UNSPECIFIED(&src)) { 1098 CTR3(KTR_MLD, "ignore v1 query src %s on ifp %p(%s)", 1099 ip6_sprintf(ip6tbuf, &ip6->ip6_src), 1100 ifp, if_name(ifp)); 1101 return (EINVAL); 1102 } 1103 1104 /* 1105 * RFC2710 Section 4: MLDv1 reports must pertain to a multicast 1106 * group, and must be directed to the group itself. 1107 */ 1108 dst = ip6->ip6_dst; 1109 in6_clearscope(&dst); 1110 if (!IN6_IS_ADDR_MULTICAST(&mld->mld_addr) || 1111 !IN6_ARE_ADDR_EQUAL(&mld->mld_addr, &dst)) { 1112 CTR3(KTR_MLD, "ignore v1 query dst %s on ifp %p(%s)", 1113 ip6_sprintf(ip6tbuf, &ip6->ip6_dst), 1114 ifp, if_name(ifp)); 1115 return (EINVAL); 1116 } 1117 1118 /* 1119 * Make sure we don't hear our own membership report, as fast 1120 * leave requires knowing that we are the only member of a 1121 * group. Assume we used the link-local address if available, 1122 * otherwise look for ::. 1123 * 1124 * XXX Note that scope ID comparison is needed for the address 1125 * returned by in6ifa_ifpforlinklocal(), but SHOULD NOT be 1126 * performed for the on-wire address. 1127 */ 1128 ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST); 1129 if ((ia && IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, IA6_IN6(ia))) || 1130 (ia == NULL && IN6_IS_ADDR_UNSPECIFIED(&src))) { 1131 if (ia != NULL) 1132 ifa_free(&ia->ia_ifa); 1133 return (0); 1134 } 1135 if (ia != NULL) 1136 ifa_free(&ia->ia_ifa); 1137 1138 CTR3(KTR_MLD, "process v1 report %s on ifp %p(%s)", 1139 ip6_sprintf(ip6tbuf, &mld->mld_addr), ifp, if_name(ifp)); 1140 1141 /* 1142 * Embed scope ID of receiving interface in MLD query for lookup 1143 * whilst we don't hold other locks (due to KAME locking lameness). 1144 */ 1145 if (!IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) 1146 in6_setscope(&mld->mld_addr, ifp, NULL); 1147 1148 IN6_MULTI_LIST_LOCK(); 1149 MLD_LOCK(); 1150 1151 /* 1152 * MLDv1 report suppression. 1153 * If we are a member of this group, and our membership should be 1154 * reported, and our group timer is pending or about to be reset, 1155 * stop our group timer by transitioning to the 'lazy' state. 1156 */ 1157 inm = in6m_lookup_locked(ifp, &mld->mld_addr); 1158 if (inm != NULL) { 1159 struct mld_ifsoftc *mli; 1160 1161 mli = inm->in6m_mli; 1162 KASSERT(mli != NULL, 1163 ("%s: no mli for ifp %p", __func__, ifp)); 1164 1165 /* 1166 * If we are in MLDv2 host mode, do not allow the 1167 * other host's MLDv1 report to suppress our reports. 1168 */ 1169 if (mli->mli_version == MLD_VERSION_2) 1170 goto out_locked; 1171 1172 inm->in6m_timer = 0; 1173 1174 switch (inm->in6m_state) { 1175 case MLD_NOT_MEMBER: 1176 case MLD_SILENT_MEMBER: 1177 case MLD_SLEEPING_MEMBER: 1178 break; 1179 case MLD_REPORTING_MEMBER: 1180 case MLD_IDLE_MEMBER: 1181 case MLD_AWAKENING_MEMBER: 1182 CTR3(KTR_MLD, 1183 "report suppressed for %s on ifp %p(%s)", 1184 ip6_sprintf(ip6tbuf, &mld->mld_addr), 1185 ifp, if_name(ifp)); 1186 case MLD_LAZY_MEMBER: 1187 inm->in6m_state = MLD_LAZY_MEMBER; 1188 break; 1189 case MLD_G_QUERY_PENDING_MEMBER: 1190 case MLD_SG_QUERY_PENDING_MEMBER: 1191 case MLD_LEAVING_MEMBER: 1192 break; 1193 } 1194 } 1195 1196 out_locked: 1197 MLD_UNLOCK(); 1198 IN6_MULTI_LIST_UNLOCK(); 1199 1200 /* XXX Clear embedded scope ID as userland won't expect it. */ 1201 in6_clearscope(&mld->mld_addr); 1202 1203 return (0); 1204 } 1205 1206 /* 1207 * MLD input path. 1208 * 1209 * Assume query messages which fit in a single ICMPv6 message header 1210 * have been pulled up. 1211 * Assume that userland will want to see the message, even if it 1212 * otherwise fails kernel input validation; do not free it. 1213 * Pullup may however free the mbuf chain m if it fails. 1214 * 1215 * Return IPPROTO_DONE if we freed m. Otherwise, return 0. 1216 */ 1217 int 1218 mld_input(struct mbuf **mp, int off, int icmp6len) 1219 { 1220 struct ifnet *ifp; 1221 struct ip6_hdr *ip6; 1222 struct mbuf *m; 1223 struct mld_hdr *mld; 1224 int mldlen; 1225 1226 m = *mp; 1227 CTR3(KTR_MLD, "%s: called w/mbuf (%p,%d)", __func__, m, off); 1228 1229 ifp = m->m_pkthdr.rcvif; 1230 1231 /* Pullup to appropriate size. */ 1232 if (m->m_len < off + sizeof(*mld)) { 1233 m = m_pullup(m, off + sizeof(*mld)); 1234 if (m == NULL) { 1235 ICMP6STAT_INC(icp6s_badlen); 1236 return (IPPROTO_DONE); 1237 } 1238 } 1239 mld = (struct mld_hdr *)(mtod(m, uint8_t *) + off); 1240 if (mld->mld_type == MLD_LISTENER_QUERY && 1241 icmp6len >= sizeof(struct mldv2_query)) { 1242 mldlen = sizeof(struct mldv2_query); 1243 } else { 1244 mldlen = sizeof(struct mld_hdr); 1245 } 1246 if (m->m_len < off + mldlen) { 1247 m = m_pullup(m, off + mldlen); 1248 if (m == NULL) { 1249 ICMP6STAT_INC(icp6s_badlen); 1250 return (IPPROTO_DONE); 1251 } 1252 } 1253 *mp = m; 1254 ip6 = mtod(m, struct ip6_hdr *); 1255 mld = (struct mld_hdr *)(mtod(m, uint8_t *) + off); 1256 1257 /* 1258 * Userland needs to see all of this traffic for implementing 1259 * the endpoint discovery portion of multicast routing. 1260 */ 1261 switch (mld->mld_type) { 1262 case MLD_LISTENER_QUERY: 1263 icmp6_ifstat_inc(ifp, ifs6_in_mldquery); 1264 if (icmp6len == sizeof(struct mld_hdr)) { 1265 if (mld_v1_input_query(ifp, ip6, mld) != 0) 1266 return (0); 1267 } else if (icmp6len >= sizeof(struct mldv2_query)) { 1268 if (mld_v2_input_query(ifp, ip6, m, 1269 (struct mldv2_query *)mld, off, icmp6len) != 0) 1270 return (0); 1271 } 1272 break; 1273 case MLD_LISTENER_REPORT: 1274 icmp6_ifstat_inc(ifp, ifs6_in_mldreport); 1275 if (mld_v1_input_report(ifp, ip6, mld) != 0) 1276 return (0); 1277 break; 1278 case MLDV2_LISTENER_REPORT: 1279 icmp6_ifstat_inc(ifp, ifs6_in_mldreport); 1280 break; 1281 case MLD_LISTENER_DONE: 1282 icmp6_ifstat_inc(ifp, ifs6_in_mlddone); 1283 break; 1284 default: 1285 break; 1286 } 1287 1288 return (0); 1289 } 1290 1291 /* 1292 * Fast timeout handler (global). 1293 * VIMAGE: Timeout handlers are expected to service all vimages. 1294 */ 1295 static struct callout mldfast_callout; 1296 static void 1297 mld_fasttimo(void *arg __unused) 1298 { 1299 struct epoch_tracker et; 1300 struct in6_multi_head inmh; 1301 VNET_ITERATOR_DECL(vnet_iter); 1302 1303 SLIST_INIT(&inmh); 1304 1305 NET_EPOCH_ENTER(et); 1306 VNET_LIST_RLOCK_NOSLEEP(); 1307 VNET_FOREACH(vnet_iter) { 1308 CURVNET_SET(vnet_iter); 1309 mld_fasttimo_vnet(&inmh); 1310 CURVNET_RESTORE(); 1311 } 1312 VNET_LIST_RUNLOCK_NOSLEEP(); 1313 NET_EPOCH_EXIT(et); 1314 in6m_release_list_deferred(&inmh); 1315 1316 callout_reset(&mldfast_callout, hz / MLD_FASTHZ, mld_fasttimo, NULL); 1317 } 1318 1319 /* 1320 * Fast timeout handler (per-vnet). 1321 * 1322 * VIMAGE: Assume caller has set up our curvnet. 1323 */ 1324 static void 1325 mld_fasttimo_vnet(struct in6_multi_head *inmh) 1326 { 1327 struct mbufq scq; /* State-change packets */ 1328 struct mbufq qrq; /* Query response packets */ 1329 struct ifnet *ifp; 1330 struct mld_ifsoftc *mli; 1331 struct ifmultiaddr *ifma; 1332 struct in6_multi *inm; 1333 int uri_fasthz; 1334 1335 uri_fasthz = 0; 1336 1337 /* 1338 * Quick check to see if any work needs to be done, in order to 1339 * minimize the overhead of fasttimo processing. 1340 * SMPng: XXX Unlocked reads. 1341 */ 1342 if (!V_current_state_timers_running6 && 1343 !V_interface_timers_running6 && 1344 !V_state_change_timers_running6) 1345 return; 1346 1347 IN6_MULTI_LIST_LOCK(); 1348 MLD_LOCK(); 1349 1350 /* 1351 * MLDv2 General Query response timer processing. 1352 */ 1353 if (V_interface_timers_running6) { 1354 CTR1(KTR_MLD, "%s: interface timers running", __func__); 1355 1356 V_interface_timers_running6 = 0; 1357 LIST_FOREACH(mli, &V_mli_head, mli_link) { 1358 if (mli->mli_v2_timer == 0) { 1359 /* Do nothing. */ 1360 } else if (--mli->mli_v2_timer == 0) { 1361 mld_v2_dispatch_general_query(mli); 1362 } else { 1363 V_interface_timers_running6 = 1; 1364 } 1365 } 1366 } 1367 1368 if (!V_current_state_timers_running6 && 1369 !V_state_change_timers_running6) 1370 goto out_locked; 1371 1372 V_current_state_timers_running6 = 0; 1373 V_state_change_timers_running6 = 0; 1374 1375 CTR1(KTR_MLD, "%s: state change timers running", __func__); 1376 1377 /* 1378 * MLD host report and state-change timer processing. 1379 * Note: Processing a v2 group timer may remove a node. 1380 */ 1381 LIST_FOREACH(mli, &V_mli_head, mli_link) { 1382 ifp = mli->mli_ifp; 1383 1384 if (mli->mli_version == MLD_VERSION_2) { 1385 uri_fasthz = MLD_RANDOM_DELAY(mli->mli_uri * 1386 MLD_FASTHZ); 1387 mbufq_init(&qrq, MLD_MAX_G_GS_PACKETS); 1388 mbufq_init(&scq, MLD_MAX_STATE_CHANGE_PACKETS); 1389 } 1390 1391 IF_ADDR_WLOCK(ifp); 1392 CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1393 inm = in6m_ifmultiaddr_get_inm(ifma); 1394 if (inm == NULL) 1395 continue; 1396 switch (mli->mli_version) { 1397 case MLD_VERSION_1: 1398 mld_v1_process_group_timer(inmh, inm); 1399 break; 1400 case MLD_VERSION_2: 1401 mld_v2_process_group_timers(inmh, &qrq, 1402 &scq, inm, uri_fasthz); 1403 break; 1404 } 1405 } 1406 IF_ADDR_WUNLOCK(ifp); 1407 1408 switch (mli->mli_version) { 1409 case MLD_VERSION_1: 1410 /* 1411 * Transmit reports for this lifecycle. This 1412 * is done while not holding IF_ADDR_LOCK 1413 * since this can call 1414 * in6ifa_ifpforlinklocal() which locks 1415 * IF_ADDR_LOCK internally as well as 1416 * ip6_output() to transmit a packet. 1417 */ 1418 while ((inm = SLIST_FIRST(inmh)) != NULL) { 1419 SLIST_REMOVE_HEAD(inmh, in6m_defer); 1420 (void)mld_v1_transmit_report(inm, 1421 MLD_LISTENER_REPORT); 1422 } 1423 break; 1424 case MLD_VERSION_2: 1425 mld_dispatch_queue(&qrq, 0); 1426 mld_dispatch_queue(&scq, 0); 1427 break; 1428 } 1429 } 1430 1431 out_locked: 1432 MLD_UNLOCK(); 1433 IN6_MULTI_LIST_UNLOCK(); 1434 } 1435 1436 /* 1437 * Update host report group timer. 1438 * Will update the global pending timer flags. 1439 */ 1440 static void 1441 mld_v1_process_group_timer(struct in6_multi_head *inmh, struct in6_multi *inm) 1442 { 1443 int report_timer_expired; 1444 1445 IN6_MULTI_LIST_LOCK_ASSERT(); 1446 MLD_LOCK_ASSERT(); 1447 1448 if (inm->in6m_timer == 0) { 1449 report_timer_expired = 0; 1450 } else if (--inm->in6m_timer == 0) { 1451 report_timer_expired = 1; 1452 } else { 1453 V_current_state_timers_running6 = 1; 1454 return; 1455 } 1456 1457 switch (inm->in6m_state) { 1458 case MLD_NOT_MEMBER: 1459 case MLD_SILENT_MEMBER: 1460 case MLD_IDLE_MEMBER: 1461 case MLD_LAZY_MEMBER: 1462 case MLD_SLEEPING_MEMBER: 1463 case MLD_AWAKENING_MEMBER: 1464 break; 1465 case MLD_REPORTING_MEMBER: 1466 if (report_timer_expired) { 1467 inm->in6m_state = MLD_IDLE_MEMBER; 1468 SLIST_INSERT_HEAD(inmh, inm, in6m_defer); 1469 } 1470 break; 1471 case MLD_G_QUERY_PENDING_MEMBER: 1472 case MLD_SG_QUERY_PENDING_MEMBER: 1473 case MLD_LEAVING_MEMBER: 1474 break; 1475 } 1476 } 1477 1478 /* 1479 * Update a group's timers for MLDv2. 1480 * Will update the global pending timer flags. 1481 * Note: Unlocked read from mli. 1482 */ 1483 static void 1484 mld_v2_process_group_timers(struct in6_multi_head *inmh, 1485 struct mbufq *qrq, struct mbufq *scq, 1486 struct in6_multi *inm, const int uri_fasthz) 1487 { 1488 int query_response_timer_expired; 1489 int state_change_retransmit_timer_expired; 1490 #ifdef KTR 1491 char ip6tbuf[INET6_ADDRSTRLEN]; 1492 #endif 1493 1494 IN6_MULTI_LIST_LOCK_ASSERT(); 1495 MLD_LOCK_ASSERT(); 1496 1497 query_response_timer_expired = 0; 1498 state_change_retransmit_timer_expired = 0; 1499 1500 /* 1501 * During a transition from compatibility mode back to MLDv2, 1502 * a group record in REPORTING state may still have its group 1503 * timer active. This is a no-op in this function; it is easier 1504 * to deal with it here than to complicate the slow-timeout path. 1505 */ 1506 if (inm->in6m_timer == 0) { 1507 query_response_timer_expired = 0; 1508 } else if (--inm->in6m_timer == 0) { 1509 query_response_timer_expired = 1; 1510 } else { 1511 V_current_state_timers_running6 = 1; 1512 } 1513 1514 if (inm->in6m_sctimer == 0) { 1515 state_change_retransmit_timer_expired = 0; 1516 } else if (--inm->in6m_sctimer == 0) { 1517 state_change_retransmit_timer_expired = 1; 1518 } else { 1519 V_state_change_timers_running6 = 1; 1520 } 1521 1522 /* We are in fasttimo, so be quick about it. */ 1523 if (!state_change_retransmit_timer_expired && 1524 !query_response_timer_expired) 1525 return; 1526 1527 switch (inm->in6m_state) { 1528 case MLD_NOT_MEMBER: 1529 case MLD_SILENT_MEMBER: 1530 case MLD_SLEEPING_MEMBER: 1531 case MLD_LAZY_MEMBER: 1532 case MLD_AWAKENING_MEMBER: 1533 case MLD_IDLE_MEMBER: 1534 break; 1535 case MLD_G_QUERY_PENDING_MEMBER: 1536 case MLD_SG_QUERY_PENDING_MEMBER: 1537 /* 1538 * Respond to a previously pending Group-Specific 1539 * or Group-and-Source-Specific query by enqueueing 1540 * the appropriate Current-State report for 1541 * immediate transmission. 1542 */ 1543 if (query_response_timer_expired) { 1544 int retval __unused; 1545 1546 retval = mld_v2_enqueue_group_record(qrq, inm, 0, 1, 1547 (inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER), 1548 0); 1549 CTR2(KTR_MLD, "%s: enqueue record = %d", 1550 __func__, retval); 1551 inm->in6m_state = MLD_REPORTING_MEMBER; 1552 in6m_clear_recorded(inm); 1553 } 1554 /* FALLTHROUGH */ 1555 case MLD_REPORTING_MEMBER: 1556 case MLD_LEAVING_MEMBER: 1557 if (state_change_retransmit_timer_expired) { 1558 /* 1559 * State-change retransmission timer fired. 1560 * If there are any further pending retransmissions, 1561 * set the global pending state-change flag, and 1562 * reset the timer. 1563 */ 1564 if (--inm->in6m_scrv > 0) { 1565 inm->in6m_sctimer = uri_fasthz; 1566 V_state_change_timers_running6 = 1; 1567 } 1568 /* 1569 * Retransmit the previously computed state-change 1570 * report. If there are no further pending 1571 * retransmissions, the mbuf queue will be consumed. 1572 * Update T0 state to T1 as we have now sent 1573 * a state-change. 1574 */ 1575 (void)mld_v2_merge_state_changes(inm, scq); 1576 1577 in6m_commit(inm); 1578 CTR3(KTR_MLD, "%s: T1 -> T0 for %s/%s", __func__, 1579 ip6_sprintf(ip6tbuf, &inm->in6m_addr), 1580 if_name(inm->in6m_ifp)); 1581 1582 /* 1583 * If we are leaving the group for good, make sure 1584 * we release MLD's reference to it. 1585 * This release must be deferred using a SLIST, 1586 * as we are called from a loop which traverses 1587 * the in_ifmultiaddr TAILQ. 1588 */ 1589 if (inm->in6m_state == MLD_LEAVING_MEMBER && 1590 inm->in6m_scrv == 0) { 1591 inm->in6m_state = MLD_NOT_MEMBER; 1592 in6m_disconnect_locked(inmh, inm); 1593 in6m_rele_locked(inmh, inm); 1594 } 1595 } 1596 break; 1597 } 1598 } 1599 1600 /* 1601 * Switch to a different version on the given interface, 1602 * as per Section 9.12. 1603 */ 1604 static void 1605 mld_set_version(struct mld_ifsoftc *mli, const int version) 1606 { 1607 int old_version_timer; 1608 1609 MLD_LOCK_ASSERT(); 1610 1611 CTR4(KTR_MLD, "%s: switching to v%d on ifp %p(%s)", __func__, 1612 version, mli->mli_ifp, if_name(mli->mli_ifp)); 1613 1614 if (version == MLD_VERSION_1) { 1615 /* 1616 * Compute the "Older Version Querier Present" timer as per 1617 * Section 9.12. 1618 */ 1619 old_version_timer = (mli->mli_rv * mli->mli_qi) + mli->mli_qri; 1620 old_version_timer *= MLD_SLOWHZ; 1621 mli->mli_v1_timer = old_version_timer; 1622 } 1623 1624 if (mli->mli_v1_timer > 0 && mli->mli_version != MLD_VERSION_1) { 1625 mli->mli_version = MLD_VERSION_1; 1626 mld_v2_cancel_link_timers(mli); 1627 } 1628 } 1629 1630 /* 1631 * Cancel pending MLDv2 timers for the given link and all groups 1632 * joined on it; state-change, general-query, and group-query timers. 1633 */ 1634 static void 1635 mld_v2_cancel_link_timers(struct mld_ifsoftc *mli) 1636 { 1637 struct epoch_tracker et; 1638 struct in6_multi_head inmh; 1639 struct ifmultiaddr *ifma; 1640 struct ifnet *ifp; 1641 struct in6_multi *inm; 1642 1643 CTR3(KTR_MLD, "%s: cancel v2 timers on ifp %p(%s)", __func__, 1644 mli->mli_ifp, if_name(mli->mli_ifp)); 1645 1646 SLIST_INIT(&inmh); 1647 IN6_MULTI_LIST_LOCK_ASSERT(); 1648 MLD_LOCK_ASSERT(); 1649 1650 /* 1651 * Fast-track this potentially expensive operation 1652 * by checking all the global 'timer pending' flags. 1653 */ 1654 if (!V_interface_timers_running6 && 1655 !V_state_change_timers_running6 && 1656 !V_current_state_timers_running6) 1657 return; 1658 1659 mli->mli_v2_timer = 0; 1660 1661 ifp = mli->mli_ifp; 1662 1663 IF_ADDR_WLOCK(ifp); 1664 NET_EPOCH_ENTER(et); 1665 CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1666 inm = in6m_ifmultiaddr_get_inm(ifma); 1667 if (inm == NULL) 1668 continue; 1669 switch (inm->in6m_state) { 1670 case MLD_NOT_MEMBER: 1671 case MLD_SILENT_MEMBER: 1672 case MLD_IDLE_MEMBER: 1673 case MLD_LAZY_MEMBER: 1674 case MLD_SLEEPING_MEMBER: 1675 case MLD_AWAKENING_MEMBER: 1676 break; 1677 case MLD_LEAVING_MEMBER: 1678 /* 1679 * If we are leaving the group and switching 1680 * version, we need to release the final 1681 * reference held for issuing the INCLUDE {}. 1682 */ 1683 if (inm->in6m_refcount == 1) 1684 in6m_disconnect_locked(&inmh, inm); 1685 in6m_rele_locked(&inmh, inm); 1686 /* FALLTHROUGH */ 1687 case MLD_G_QUERY_PENDING_MEMBER: 1688 case MLD_SG_QUERY_PENDING_MEMBER: 1689 in6m_clear_recorded(inm); 1690 /* FALLTHROUGH */ 1691 case MLD_REPORTING_MEMBER: 1692 inm->in6m_sctimer = 0; 1693 inm->in6m_timer = 0; 1694 inm->in6m_state = MLD_REPORTING_MEMBER; 1695 /* 1696 * Free any pending MLDv2 state-change records. 1697 */ 1698 mbufq_drain(&inm->in6m_scq); 1699 break; 1700 } 1701 } 1702 NET_EPOCH_EXIT(et); 1703 IF_ADDR_WUNLOCK(ifp); 1704 in6m_release_list_deferred(&inmh); 1705 } 1706 1707 /* 1708 * Global slowtimo handler. 1709 * VIMAGE: Timeout handlers are expected to service all vimages. 1710 */ 1711 static struct callout mldslow_callout; 1712 static void 1713 mld_slowtimo(void *arg __unused) 1714 { 1715 VNET_ITERATOR_DECL(vnet_iter); 1716 1717 VNET_LIST_RLOCK_NOSLEEP(); 1718 VNET_FOREACH(vnet_iter) { 1719 CURVNET_SET(vnet_iter); 1720 mld_slowtimo_vnet(); 1721 CURVNET_RESTORE(); 1722 } 1723 VNET_LIST_RUNLOCK_NOSLEEP(); 1724 1725 callout_reset(&mldslow_callout, hz / MLD_SLOWHZ, mld_slowtimo, NULL); 1726 } 1727 1728 /* 1729 * Per-vnet slowtimo handler. 1730 */ 1731 static void 1732 mld_slowtimo_vnet(void) 1733 { 1734 struct mld_ifsoftc *mli; 1735 1736 MLD_LOCK(); 1737 1738 LIST_FOREACH(mli, &V_mli_head, mli_link) { 1739 mld_v1_process_querier_timers(mli); 1740 } 1741 1742 MLD_UNLOCK(); 1743 } 1744 1745 /* 1746 * Update the Older Version Querier Present timers for a link. 1747 * See Section 9.12 of RFC 3810. 1748 */ 1749 static void 1750 mld_v1_process_querier_timers(struct mld_ifsoftc *mli) 1751 { 1752 1753 MLD_LOCK_ASSERT(); 1754 1755 if (mli->mli_version != MLD_VERSION_2 && --mli->mli_v1_timer == 0) { 1756 /* 1757 * MLDv1 Querier Present timer expired; revert to MLDv2. 1758 */ 1759 CTR5(KTR_MLD, 1760 "%s: transition from v%d -> v%d on %p(%s)", 1761 __func__, mli->mli_version, MLD_VERSION_2, 1762 mli->mli_ifp, if_name(mli->mli_ifp)); 1763 mli->mli_version = MLD_VERSION_2; 1764 } 1765 } 1766 1767 /* 1768 * Transmit an MLDv1 report immediately. 1769 */ 1770 static int 1771 mld_v1_transmit_report(struct in6_multi *in6m, const int type) 1772 { 1773 struct ifnet *ifp; 1774 struct in6_ifaddr *ia; 1775 struct ip6_hdr *ip6; 1776 struct mbuf *mh, *md; 1777 struct mld_hdr *mld; 1778 1779 NET_EPOCH_ASSERT(); 1780 IN6_MULTI_LIST_LOCK_ASSERT(); 1781 MLD_LOCK_ASSERT(); 1782 1783 ifp = in6m->in6m_ifp; 1784 /* in process of being freed */ 1785 if (ifp == NULL) 1786 return (0); 1787 ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST); 1788 /* ia may be NULL if link-local address is tentative. */ 1789 1790 mh = m_gethdr(M_NOWAIT, MT_DATA); 1791 if (mh == NULL) { 1792 if (ia != NULL) 1793 ifa_free(&ia->ia_ifa); 1794 return (ENOMEM); 1795 } 1796 md = m_get(M_NOWAIT, MT_DATA); 1797 if (md == NULL) { 1798 m_free(mh); 1799 if (ia != NULL) 1800 ifa_free(&ia->ia_ifa); 1801 return (ENOMEM); 1802 } 1803 mh->m_next = md; 1804 1805 /* 1806 * FUTURE: Consider increasing alignment by ETHER_HDR_LEN, so 1807 * that ether_output() does not need to allocate another mbuf 1808 * for the header in the most common case. 1809 */ 1810 M_ALIGN(mh, sizeof(struct ip6_hdr)); 1811 mh->m_pkthdr.len = sizeof(struct ip6_hdr) + sizeof(struct mld_hdr); 1812 mh->m_len = sizeof(struct ip6_hdr); 1813 1814 ip6 = mtod(mh, struct ip6_hdr *); 1815 ip6->ip6_flow = 0; 1816 ip6->ip6_vfc &= ~IPV6_VERSION_MASK; 1817 ip6->ip6_vfc |= IPV6_VERSION; 1818 ip6->ip6_nxt = IPPROTO_ICMPV6; 1819 ip6->ip6_src = ia ? ia->ia_addr.sin6_addr : in6addr_any; 1820 ip6->ip6_dst = in6m->in6m_addr; 1821 1822 md->m_len = sizeof(struct mld_hdr); 1823 mld = mtod(md, struct mld_hdr *); 1824 mld->mld_type = type; 1825 mld->mld_code = 0; 1826 mld->mld_cksum = 0; 1827 mld->mld_maxdelay = 0; 1828 mld->mld_reserved = 0; 1829 mld->mld_addr = in6m->in6m_addr; 1830 in6_clearscope(&mld->mld_addr); 1831 mld->mld_cksum = in6_cksum(mh, IPPROTO_ICMPV6, 1832 sizeof(struct ip6_hdr), sizeof(struct mld_hdr)); 1833 1834 mld_save_context(mh, ifp); 1835 mh->m_flags |= M_MLDV1; 1836 1837 mld_dispatch_packet(mh); 1838 1839 if (ia != NULL) 1840 ifa_free(&ia->ia_ifa); 1841 return (0); 1842 } 1843 1844 /* 1845 * Process a state change from the upper layer for the given IPv6 group. 1846 * 1847 * Each socket holds a reference on the in_multi in its own ip_moptions. 1848 * The socket layer will have made the necessary updates to.the group 1849 * state, it is now up to MLD to issue a state change report if there 1850 * has been any change between T0 (when the last state-change was issued) 1851 * and T1 (now). 1852 * 1853 * We use the MLDv2 state machine at group level. The MLd module 1854 * however makes the decision as to which MLD protocol version to speak. 1855 * A state change *from* INCLUDE {} always means an initial join. 1856 * A state change *to* INCLUDE {} always means a final leave. 1857 * 1858 * If delay is non-zero, and the state change is an initial multicast 1859 * join, the state change report will be delayed by 'delay' ticks 1860 * in units of MLD_FASTHZ if MLDv1 is active on the link; otherwise 1861 * the initial MLDv2 state change report will be delayed by whichever 1862 * is sooner, a pending state-change timer or delay itself. 1863 * 1864 * VIMAGE: curvnet should have been set by caller, as this routine 1865 * is called from the socket option handlers. 1866 */ 1867 int 1868 mld_change_state(struct in6_multi *inm, const int delay) 1869 { 1870 struct mld_ifsoftc *mli; 1871 struct ifnet *ifp; 1872 int error; 1873 1874 IN6_MULTI_LIST_LOCK_ASSERT(); 1875 1876 error = 0; 1877 1878 /* 1879 * Check if the in6_multi has already been disconnected. 1880 */ 1881 if (inm->in6m_ifp == NULL) { 1882 CTR1(KTR_MLD, "%s: inm is disconnected", __func__); 1883 return (0); 1884 } 1885 1886 /* 1887 * Try to detect if the upper layer just asked us to change state 1888 * for an interface which has now gone away. 1889 */ 1890 KASSERT(inm->in6m_ifma != NULL, ("%s: no ifma", __func__)); 1891 ifp = inm->in6m_ifma->ifma_ifp; 1892 if (ifp == NULL) 1893 return (0); 1894 /* 1895 * Sanity check that netinet6's notion of ifp is the 1896 * same as net's. 1897 */ 1898 KASSERT(inm->in6m_ifp == ifp, ("%s: bad ifp", __func__)); 1899 1900 MLD_LOCK(); 1901 mli = MLD_IFINFO(ifp); 1902 KASSERT(mli != NULL, ("%s: no mld_ifsoftc for ifp %p", __func__, ifp)); 1903 1904 /* 1905 * If we detect a state transition to or from MCAST_UNDEFINED 1906 * for this group, then we are starting or finishing an MLD 1907 * life cycle for this group. 1908 */ 1909 if (inm->in6m_st[1].iss_fmode != inm->in6m_st[0].iss_fmode) { 1910 CTR3(KTR_MLD, "%s: inm transition %d -> %d", __func__, 1911 inm->in6m_st[0].iss_fmode, inm->in6m_st[1].iss_fmode); 1912 if (inm->in6m_st[0].iss_fmode == MCAST_UNDEFINED) { 1913 CTR1(KTR_MLD, "%s: initial join", __func__); 1914 error = mld_initial_join(inm, mli, delay); 1915 goto out_locked; 1916 } else if (inm->in6m_st[1].iss_fmode == MCAST_UNDEFINED) { 1917 CTR1(KTR_MLD, "%s: final leave", __func__); 1918 mld_final_leave(inm, mli); 1919 goto out_locked; 1920 } 1921 } else { 1922 CTR1(KTR_MLD, "%s: filter set change", __func__); 1923 } 1924 1925 error = mld_handle_state_change(inm, mli); 1926 1927 out_locked: 1928 MLD_UNLOCK(); 1929 return (error); 1930 } 1931 1932 /* 1933 * Perform the initial join for an MLD group. 1934 * 1935 * When joining a group: 1936 * If the group should have its MLD traffic suppressed, do nothing. 1937 * MLDv1 starts sending MLDv1 host membership reports. 1938 * MLDv2 will schedule an MLDv2 state-change report containing the 1939 * initial state of the membership. 1940 * 1941 * If the delay argument is non-zero, then we must delay sending the 1942 * initial state change for delay ticks (in units of MLD_FASTHZ). 1943 */ 1944 static int 1945 mld_initial_join(struct in6_multi *inm, struct mld_ifsoftc *mli, 1946 const int delay) 1947 { 1948 struct epoch_tracker et; 1949 struct ifnet *ifp; 1950 struct mbufq *mq; 1951 int error, retval, syncstates; 1952 int odelay; 1953 #ifdef KTR 1954 char ip6tbuf[INET6_ADDRSTRLEN]; 1955 #endif 1956 1957 CTR4(KTR_MLD, "%s: initial join %s on ifp %p(%s)", 1958 __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr), 1959 inm->in6m_ifp, if_name(inm->in6m_ifp)); 1960 1961 error = 0; 1962 syncstates = 1; 1963 1964 ifp = inm->in6m_ifp; 1965 1966 IN6_MULTI_LIST_LOCK_ASSERT(); 1967 MLD_LOCK_ASSERT(); 1968 1969 KASSERT(mli && mli->mli_ifp == ifp, ("%s: inconsistent ifp", __func__)); 1970 1971 /* 1972 * Groups joined on loopback or marked as 'not reported', 1973 * enter the MLD_SILENT_MEMBER state and 1974 * are never reported in any protocol exchanges. 1975 * All other groups enter the appropriate state machine 1976 * for the version in use on this link. 1977 * A link marked as MLIF_SILENT causes MLD to be completely 1978 * disabled for the link. 1979 */ 1980 if ((ifp->if_flags & IFF_LOOPBACK) || 1981 (mli->mli_flags & MLIF_SILENT) || 1982 !mld_is_addr_reported(&inm->in6m_addr)) { 1983 CTR1(KTR_MLD, 1984 "%s: not kicking state machine for silent group", __func__); 1985 inm->in6m_state = MLD_SILENT_MEMBER; 1986 inm->in6m_timer = 0; 1987 } else { 1988 /* 1989 * Deal with overlapping in_multi lifecycle. 1990 * If this group was LEAVING, then make sure 1991 * we drop the reference we picked up to keep the 1992 * group around for the final INCLUDE {} enqueue. 1993 */ 1994 if (mli->mli_version == MLD_VERSION_2 && 1995 inm->in6m_state == MLD_LEAVING_MEMBER) { 1996 inm->in6m_refcount--; 1997 MPASS(inm->in6m_refcount > 0); 1998 } 1999 inm->in6m_state = MLD_REPORTING_MEMBER; 2000 2001 switch (mli->mli_version) { 2002 case MLD_VERSION_1: 2003 /* 2004 * If a delay was provided, only use it if 2005 * it is greater than the delay normally 2006 * used for an MLDv1 state change report, 2007 * and delay sending the initial MLDv1 report 2008 * by not transitioning to the IDLE state. 2009 */ 2010 odelay = MLD_RANDOM_DELAY(MLD_V1_MAX_RI * MLD_FASTHZ); 2011 if (delay) { 2012 inm->in6m_timer = max(delay, odelay); 2013 V_current_state_timers_running6 = 1; 2014 } else { 2015 inm->in6m_state = MLD_IDLE_MEMBER; 2016 NET_EPOCH_ENTER(et); 2017 error = mld_v1_transmit_report(inm, 2018 MLD_LISTENER_REPORT); 2019 NET_EPOCH_EXIT(et); 2020 if (error == 0) { 2021 inm->in6m_timer = odelay; 2022 V_current_state_timers_running6 = 1; 2023 } 2024 } 2025 break; 2026 2027 case MLD_VERSION_2: 2028 /* 2029 * Defer update of T0 to T1, until the first copy 2030 * of the state change has been transmitted. 2031 */ 2032 syncstates = 0; 2033 2034 /* 2035 * Immediately enqueue a State-Change Report for 2036 * this interface, freeing any previous reports. 2037 * Don't kick the timers if there is nothing to do, 2038 * or if an error occurred. 2039 */ 2040 mq = &inm->in6m_scq; 2041 mbufq_drain(mq); 2042 retval = mld_v2_enqueue_group_record(mq, inm, 1, 2043 0, 0, (mli->mli_flags & MLIF_USEALLOW)); 2044 CTR2(KTR_MLD, "%s: enqueue record = %d", 2045 __func__, retval); 2046 if (retval <= 0) { 2047 error = retval * -1; 2048 break; 2049 } 2050 2051 /* 2052 * Schedule transmission of pending state-change 2053 * report up to RV times for this link. The timer 2054 * will fire at the next mld_fasttimo (~200ms), 2055 * giving us an opportunity to merge the reports. 2056 * 2057 * If a delay was provided to this function, only 2058 * use this delay if sooner than the existing one. 2059 */ 2060 KASSERT(mli->mli_rv > 1, 2061 ("%s: invalid robustness %d", __func__, 2062 mli->mli_rv)); 2063 inm->in6m_scrv = mli->mli_rv; 2064 if (delay) { 2065 if (inm->in6m_sctimer > 1) { 2066 inm->in6m_sctimer = 2067 min(inm->in6m_sctimer, delay); 2068 } else 2069 inm->in6m_sctimer = delay; 2070 } else 2071 inm->in6m_sctimer = 1; 2072 V_state_change_timers_running6 = 1; 2073 2074 error = 0; 2075 break; 2076 } 2077 } 2078 2079 /* 2080 * Only update the T0 state if state change is atomic, 2081 * i.e. we don't need to wait for a timer to fire before we 2082 * can consider the state change to have been communicated. 2083 */ 2084 if (syncstates) { 2085 in6m_commit(inm); 2086 CTR3(KTR_MLD, "%s: T1 -> T0 for %s/%s", __func__, 2087 ip6_sprintf(ip6tbuf, &inm->in6m_addr), 2088 if_name(inm->in6m_ifp)); 2089 } 2090 2091 return (error); 2092 } 2093 2094 /* 2095 * Issue an intermediate state change during the life-cycle. 2096 */ 2097 static int 2098 mld_handle_state_change(struct in6_multi *inm, struct mld_ifsoftc *mli) 2099 { 2100 struct ifnet *ifp; 2101 int retval; 2102 #ifdef KTR 2103 char ip6tbuf[INET6_ADDRSTRLEN]; 2104 #endif 2105 2106 CTR4(KTR_MLD, "%s: state change for %s on ifp %p(%s)", 2107 __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr), 2108 inm->in6m_ifp, if_name(inm->in6m_ifp)); 2109 2110 ifp = inm->in6m_ifp; 2111 2112 IN6_MULTI_LIST_LOCK_ASSERT(); 2113 MLD_LOCK_ASSERT(); 2114 2115 KASSERT(mli && mli->mli_ifp == ifp, 2116 ("%s: inconsistent ifp", __func__)); 2117 2118 if ((ifp->if_flags & IFF_LOOPBACK) || 2119 (mli->mli_flags & MLIF_SILENT) || 2120 !mld_is_addr_reported(&inm->in6m_addr) || 2121 (mli->mli_version != MLD_VERSION_2)) { 2122 if (!mld_is_addr_reported(&inm->in6m_addr)) { 2123 CTR1(KTR_MLD, 2124 "%s: not kicking state machine for silent group", __func__); 2125 } 2126 CTR1(KTR_MLD, "%s: nothing to do", __func__); 2127 in6m_commit(inm); 2128 CTR3(KTR_MLD, "%s: T1 -> T0 for %s/%s", __func__, 2129 ip6_sprintf(ip6tbuf, &inm->in6m_addr), 2130 if_name(inm->in6m_ifp)); 2131 return (0); 2132 } 2133 2134 mbufq_drain(&inm->in6m_scq); 2135 2136 retval = mld_v2_enqueue_group_record(&inm->in6m_scq, inm, 1, 0, 0, 2137 (mli->mli_flags & MLIF_USEALLOW)); 2138 CTR2(KTR_MLD, "%s: enqueue record = %d", __func__, retval); 2139 if (retval <= 0) 2140 return (-retval); 2141 2142 /* 2143 * If record(s) were enqueued, start the state-change 2144 * report timer for this group. 2145 */ 2146 inm->in6m_scrv = mli->mli_rv; 2147 inm->in6m_sctimer = 1; 2148 V_state_change_timers_running6 = 1; 2149 2150 return (0); 2151 } 2152 2153 /* 2154 * Perform the final leave for a multicast address. 2155 * 2156 * When leaving a group: 2157 * MLDv1 sends a DONE message, if and only if we are the reporter. 2158 * MLDv2 enqueues a state-change report containing a transition 2159 * to INCLUDE {} for immediate transmission. 2160 */ 2161 static void 2162 mld_final_leave(struct in6_multi *inm, struct mld_ifsoftc *mli) 2163 { 2164 struct epoch_tracker et; 2165 #ifdef KTR 2166 char ip6tbuf[INET6_ADDRSTRLEN]; 2167 #endif 2168 2169 CTR4(KTR_MLD, "%s: final leave %s on ifp %p(%s)", 2170 __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr), 2171 inm->in6m_ifp, if_name(inm->in6m_ifp)); 2172 2173 IN6_MULTI_LIST_LOCK_ASSERT(); 2174 MLD_LOCK_ASSERT(); 2175 2176 switch (inm->in6m_state) { 2177 case MLD_NOT_MEMBER: 2178 case MLD_SILENT_MEMBER: 2179 case MLD_LEAVING_MEMBER: 2180 /* Already leaving or left; do nothing. */ 2181 CTR1(KTR_MLD, 2182 "%s: not kicking state machine for silent group", __func__); 2183 break; 2184 case MLD_REPORTING_MEMBER: 2185 case MLD_IDLE_MEMBER: 2186 case MLD_G_QUERY_PENDING_MEMBER: 2187 case MLD_SG_QUERY_PENDING_MEMBER: 2188 if (mli->mli_version == MLD_VERSION_1) { 2189 #ifdef INVARIANTS 2190 if (inm->in6m_state == MLD_G_QUERY_PENDING_MEMBER || 2191 inm->in6m_state == MLD_SG_QUERY_PENDING_MEMBER) 2192 panic("%s: MLDv2 state reached, not MLDv2 mode", 2193 __func__); 2194 #endif 2195 NET_EPOCH_ENTER(et); 2196 mld_v1_transmit_report(inm, MLD_LISTENER_DONE); 2197 NET_EPOCH_EXIT(et); 2198 inm->in6m_state = MLD_NOT_MEMBER; 2199 V_current_state_timers_running6 = 1; 2200 } else if (mli->mli_version == MLD_VERSION_2) { 2201 /* 2202 * Stop group timer and all pending reports. 2203 * Immediately enqueue a state-change report 2204 * TO_IN {} to be sent on the next fast timeout, 2205 * giving us an opportunity to merge reports. 2206 */ 2207 mbufq_drain(&inm->in6m_scq); 2208 inm->in6m_timer = 0; 2209 inm->in6m_scrv = mli->mli_rv; 2210 CTR4(KTR_MLD, "%s: Leaving %s/%s with %d " 2211 "pending retransmissions.", __func__, 2212 ip6_sprintf(ip6tbuf, &inm->in6m_addr), 2213 if_name(inm->in6m_ifp), inm->in6m_scrv); 2214 if (inm->in6m_scrv == 0) { 2215 inm->in6m_state = MLD_NOT_MEMBER; 2216 inm->in6m_sctimer = 0; 2217 } else { 2218 int retval __diagused; 2219 2220 in6m_acquire_locked(inm); 2221 2222 retval = mld_v2_enqueue_group_record( 2223 &inm->in6m_scq, inm, 1, 0, 0, 2224 (mli->mli_flags & MLIF_USEALLOW)); 2225 KASSERT(retval != 0, 2226 ("%s: enqueue record = %d", __func__, 2227 retval)); 2228 2229 inm->in6m_state = MLD_LEAVING_MEMBER; 2230 inm->in6m_sctimer = 1; 2231 V_state_change_timers_running6 = 1; 2232 } 2233 break; 2234 } 2235 break; 2236 case MLD_LAZY_MEMBER: 2237 case MLD_SLEEPING_MEMBER: 2238 case MLD_AWAKENING_MEMBER: 2239 /* Our reports are suppressed; do nothing. */ 2240 break; 2241 } 2242 2243 in6m_commit(inm); 2244 CTR3(KTR_MLD, "%s: T1 -> T0 for %s/%s", __func__, 2245 ip6_sprintf(ip6tbuf, &inm->in6m_addr), 2246 if_name(inm->in6m_ifp)); 2247 inm->in6m_st[1].iss_fmode = MCAST_UNDEFINED; 2248 CTR3(KTR_MLD, "%s: T1 now MCAST_UNDEFINED for %p/%s", 2249 __func__, &inm->in6m_addr, if_name(inm->in6m_ifp)); 2250 } 2251 2252 /* 2253 * Enqueue an MLDv2 group record to the given output queue. 2254 * 2255 * If is_state_change is zero, a current-state record is appended. 2256 * If is_state_change is non-zero, a state-change report is appended. 2257 * 2258 * If is_group_query is non-zero, an mbuf packet chain is allocated. 2259 * If is_group_query is zero, and if there is a packet with free space 2260 * at the tail of the queue, it will be appended to providing there 2261 * is enough free space. 2262 * Otherwise a new mbuf packet chain is allocated. 2263 * 2264 * If is_source_query is non-zero, each source is checked to see if 2265 * it was recorded for a Group-Source query, and will be omitted if 2266 * it is not both in-mode and recorded. 2267 * 2268 * If use_block_allow is non-zero, state change reports for initial join 2269 * and final leave, on an inclusive mode group with a source list, will be 2270 * rewritten to use the ALLOW_NEW and BLOCK_OLD record types, respectively. 2271 * 2272 * The function will attempt to allocate leading space in the packet 2273 * for the IPv6+ICMP headers to be prepended without fragmenting the chain. 2274 * 2275 * If successful the size of all data appended to the queue is returned, 2276 * otherwise an error code less than zero is returned, or zero if 2277 * no record(s) were appended. 2278 */ 2279 static int 2280 mld_v2_enqueue_group_record(struct mbufq *mq, struct in6_multi *inm, 2281 const int is_state_change, const int is_group_query, 2282 const int is_source_query, const int use_block_allow) 2283 { 2284 struct mldv2_record mr; 2285 struct mldv2_record *pmr; 2286 struct ifnet *ifp; 2287 struct ip6_msource *ims, *nims; 2288 struct mbuf *m0, *m, *md; 2289 int is_filter_list_change; 2290 int minrec0len, m0srcs, msrcs, nbytes, off; 2291 int record_has_sources; 2292 int now; 2293 int type; 2294 uint8_t mode; 2295 #ifdef KTR 2296 char ip6tbuf[INET6_ADDRSTRLEN]; 2297 #endif 2298 2299 IN6_MULTI_LIST_LOCK_ASSERT(); 2300 2301 ifp = inm->in6m_ifp; 2302 is_filter_list_change = 0; 2303 m = NULL; 2304 m0 = NULL; 2305 m0srcs = 0; 2306 msrcs = 0; 2307 nbytes = 0; 2308 nims = NULL; 2309 record_has_sources = 1; 2310 pmr = NULL; 2311 type = MLD_DO_NOTHING; 2312 mode = inm->in6m_st[1].iss_fmode; 2313 2314 /* 2315 * If we did not transition out of ASM mode during t0->t1, 2316 * and there are no source nodes to process, we can skip 2317 * the generation of source records. 2318 */ 2319 if (inm->in6m_st[0].iss_asm > 0 && inm->in6m_st[1].iss_asm > 0 && 2320 inm->in6m_nsrc == 0) 2321 record_has_sources = 0; 2322 2323 if (is_state_change) { 2324 /* 2325 * Queue a state change record. 2326 * If the mode did not change, and there are non-ASM 2327 * listeners or source filters present, 2328 * we potentially need to issue two records for the group. 2329 * If there are ASM listeners, and there was no filter 2330 * mode transition of any kind, do nothing. 2331 * 2332 * If we are transitioning to MCAST_UNDEFINED, we need 2333 * not send any sources. A transition to/from this state is 2334 * considered inclusive with some special treatment. 2335 * 2336 * If we are rewriting initial joins/leaves to use 2337 * ALLOW/BLOCK, and the group's membership is inclusive, 2338 * we need to send sources in all cases. 2339 */ 2340 if (mode != inm->in6m_st[0].iss_fmode) { 2341 if (mode == MCAST_EXCLUDE) { 2342 CTR1(KTR_MLD, "%s: change to EXCLUDE", 2343 __func__); 2344 type = MLD_CHANGE_TO_EXCLUDE_MODE; 2345 } else { 2346 CTR1(KTR_MLD, "%s: change to INCLUDE", 2347 __func__); 2348 if (use_block_allow) { 2349 /* 2350 * XXX 2351 * Here we're interested in state 2352 * edges either direction between 2353 * MCAST_UNDEFINED and MCAST_INCLUDE. 2354 * Perhaps we should just check 2355 * the group state, rather than 2356 * the filter mode. 2357 */ 2358 if (mode == MCAST_UNDEFINED) { 2359 type = MLD_BLOCK_OLD_SOURCES; 2360 } else { 2361 type = MLD_ALLOW_NEW_SOURCES; 2362 } 2363 } else { 2364 type = MLD_CHANGE_TO_INCLUDE_MODE; 2365 if (mode == MCAST_UNDEFINED) 2366 record_has_sources = 0; 2367 } 2368 } 2369 } else { 2370 if (record_has_sources) { 2371 is_filter_list_change = 1; 2372 } else { 2373 type = MLD_DO_NOTHING; 2374 } 2375 } 2376 } else { 2377 /* 2378 * Queue a current state record. 2379 */ 2380 if (mode == MCAST_EXCLUDE) { 2381 type = MLD_MODE_IS_EXCLUDE; 2382 } else if (mode == MCAST_INCLUDE) { 2383 type = MLD_MODE_IS_INCLUDE; 2384 KASSERT(inm->in6m_st[1].iss_asm == 0, 2385 ("%s: inm %p is INCLUDE but ASM count is %d", 2386 __func__, inm, inm->in6m_st[1].iss_asm)); 2387 } 2388 } 2389 2390 /* 2391 * Generate the filter list changes using a separate function. 2392 */ 2393 if (is_filter_list_change) 2394 return (mld_v2_enqueue_filter_change(mq, inm)); 2395 2396 if (type == MLD_DO_NOTHING) { 2397 CTR3(KTR_MLD, "%s: nothing to do for %s/%s", 2398 __func__, ip6_sprintf(ip6tbuf, &inm->in6m_addr), 2399 if_name(inm->in6m_ifp)); 2400 return (0); 2401 } 2402 2403 /* 2404 * If any sources are present, we must be able to fit at least 2405 * one in the trailing space of the tail packet's mbuf, 2406 * ideally more. 2407 */ 2408 minrec0len = sizeof(struct mldv2_record); 2409 if (record_has_sources) 2410 minrec0len += sizeof(struct in6_addr); 2411 2412 CTR4(KTR_MLD, "%s: queueing %s for %s/%s", __func__, 2413 mld_rec_type_to_str(type), 2414 ip6_sprintf(ip6tbuf, &inm->in6m_addr), 2415 if_name(inm->in6m_ifp)); 2416 2417 /* 2418 * Check if we have a packet in the tail of the queue for this 2419 * group into which the first group record for this group will fit. 2420 * Otherwise allocate a new packet. 2421 * Always allocate leading space for IP6+RA+ICMPV6+REPORT. 2422 * Note: Group records for G/GSR query responses MUST be sent 2423 * in their own packet. 2424 */ 2425 m0 = mbufq_last(mq); 2426 if (!is_group_query && 2427 m0 != NULL && 2428 (m0->m_pkthdr.vt_nrecs + 1 <= MLD_V2_REPORT_MAXRECS) && 2429 (m0->m_pkthdr.len + minrec0len) < 2430 (ifp->if_mtu - MLD_MTUSPACE)) { 2431 m0srcs = (ifp->if_mtu - m0->m_pkthdr.len - 2432 sizeof(struct mldv2_record)) / 2433 sizeof(struct in6_addr); 2434 m = m0; 2435 CTR1(KTR_MLD, "%s: use existing packet", __func__); 2436 } else { 2437 if (mbufq_full(mq)) { 2438 CTR1(KTR_MLD, "%s: outbound queue full", __func__); 2439 return (-ENOMEM); 2440 } 2441 m = NULL; 2442 m0srcs = (ifp->if_mtu - MLD_MTUSPACE - 2443 sizeof(struct mldv2_record)) / sizeof(struct in6_addr); 2444 if (!is_state_change && !is_group_query) 2445 m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); 2446 if (m == NULL) 2447 m = m_gethdr(M_NOWAIT, MT_DATA); 2448 if (m == NULL) 2449 return (-ENOMEM); 2450 2451 mld_save_context(m, ifp); 2452 2453 CTR1(KTR_MLD, "%s: allocated first packet", __func__); 2454 } 2455 2456 /* 2457 * Append group record. 2458 * If we have sources, we don't know how many yet. 2459 */ 2460 mr.mr_type = type; 2461 mr.mr_datalen = 0; 2462 mr.mr_numsrc = 0; 2463 mr.mr_addr = inm->in6m_addr; 2464 in6_clearscope(&mr.mr_addr); 2465 if (!m_append(m, sizeof(struct mldv2_record), (void *)&mr)) { 2466 if (m != m0) 2467 m_freem(m); 2468 CTR1(KTR_MLD, "%s: m_append() failed.", __func__); 2469 return (-ENOMEM); 2470 } 2471 nbytes += sizeof(struct mldv2_record); 2472 2473 /* 2474 * Append as many sources as will fit in the first packet. 2475 * If we are appending to a new packet, the chain allocation 2476 * may potentially use clusters; use m_getptr() in this case. 2477 * If we are appending to an existing packet, we need to obtain 2478 * a pointer to the group record after m_append(), in case a new 2479 * mbuf was allocated. 2480 * 2481 * Only append sources which are in-mode at t1. If we are 2482 * transitioning to MCAST_UNDEFINED state on the group, and 2483 * use_block_allow is zero, do not include source entries. 2484 * Otherwise, we need to include this source in the report. 2485 * 2486 * Only report recorded sources in our filter set when responding 2487 * to a group-source query. 2488 */ 2489 if (record_has_sources) { 2490 if (m == m0) { 2491 md = m_last(m); 2492 pmr = (struct mldv2_record *)(mtod(md, uint8_t *) + 2493 md->m_len - nbytes); 2494 } else { 2495 md = m_getptr(m, 0, &off); 2496 pmr = (struct mldv2_record *)(mtod(md, uint8_t *) + 2497 off); 2498 } 2499 msrcs = 0; 2500 RB_FOREACH_SAFE(ims, ip6_msource_tree, &inm->in6m_srcs, 2501 nims) { 2502 CTR2(KTR_MLD, "%s: visit node %s", __func__, 2503 ip6_sprintf(ip6tbuf, &ims->im6s_addr)); 2504 now = im6s_get_mode(inm, ims, 1); 2505 CTR2(KTR_MLD, "%s: node is %d", __func__, now); 2506 if ((now != mode) || 2507 (now == mode && 2508 (!use_block_allow && mode == MCAST_UNDEFINED))) { 2509 CTR1(KTR_MLD, "%s: skip node", __func__); 2510 continue; 2511 } 2512 if (is_source_query && ims->im6s_stp == 0) { 2513 CTR1(KTR_MLD, "%s: skip unrecorded node", 2514 __func__); 2515 continue; 2516 } 2517 CTR1(KTR_MLD, "%s: append node", __func__); 2518 if (!m_append(m, sizeof(struct in6_addr), 2519 (void *)&ims->im6s_addr)) { 2520 if (m != m0) 2521 m_freem(m); 2522 CTR1(KTR_MLD, "%s: m_append() failed.", 2523 __func__); 2524 return (-ENOMEM); 2525 } 2526 nbytes += sizeof(struct in6_addr); 2527 ++msrcs; 2528 if (msrcs == m0srcs) 2529 break; 2530 } 2531 CTR2(KTR_MLD, "%s: msrcs is %d this packet", __func__, 2532 msrcs); 2533 pmr->mr_numsrc = htons(msrcs); 2534 nbytes += (msrcs * sizeof(struct in6_addr)); 2535 } 2536 2537 if (is_source_query && msrcs == 0) { 2538 CTR1(KTR_MLD, "%s: no recorded sources to report", __func__); 2539 if (m != m0) 2540 m_freem(m); 2541 return (0); 2542 } 2543 2544 /* 2545 * We are good to go with first packet. 2546 */ 2547 if (m != m0) { 2548 CTR1(KTR_MLD, "%s: enqueueing first packet", __func__); 2549 m->m_pkthdr.vt_nrecs = 1; 2550 mbufq_enqueue(mq, m); 2551 } else 2552 m->m_pkthdr.vt_nrecs++; 2553 2554 /* 2555 * No further work needed if no source list in packet(s). 2556 */ 2557 if (!record_has_sources) 2558 return (nbytes); 2559 2560 /* 2561 * Whilst sources remain to be announced, we need to allocate 2562 * a new packet and fill out as many sources as will fit. 2563 * Always try for a cluster first. 2564 */ 2565 while (nims != NULL) { 2566 if (mbufq_full(mq)) { 2567 CTR1(KTR_MLD, "%s: outbound queue full", __func__); 2568 return (-ENOMEM); 2569 } 2570 m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); 2571 if (m == NULL) 2572 m = m_gethdr(M_NOWAIT, MT_DATA); 2573 if (m == NULL) 2574 return (-ENOMEM); 2575 mld_save_context(m, ifp); 2576 md = m_getptr(m, 0, &off); 2577 pmr = (struct mldv2_record *)(mtod(md, uint8_t *) + off); 2578 CTR1(KTR_MLD, "%s: allocated next packet", __func__); 2579 2580 if (!m_append(m, sizeof(struct mldv2_record), (void *)&mr)) { 2581 if (m != m0) 2582 m_freem(m); 2583 CTR1(KTR_MLD, "%s: m_append() failed.", __func__); 2584 return (-ENOMEM); 2585 } 2586 m->m_pkthdr.vt_nrecs = 1; 2587 nbytes += sizeof(struct mldv2_record); 2588 2589 m0srcs = (ifp->if_mtu - MLD_MTUSPACE - 2590 sizeof(struct mldv2_record)) / sizeof(struct in6_addr); 2591 2592 msrcs = 0; 2593 RB_FOREACH_FROM(ims, ip6_msource_tree, nims) { 2594 CTR2(KTR_MLD, "%s: visit node %s", 2595 __func__, ip6_sprintf(ip6tbuf, &ims->im6s_addr)); 2596 now = im6s_get_mode(inm, ims, 1); 2597 if ((now != mode) || 2598 (now == mode && 2599 (!use_block_allow && mode == MCAST_UNDEFINED))) { 2600 CTR1(KTR_MLD, "%s: skip node", __func__); 2601 continue; 2602 } 2603 if (is_source_query && ims->im6s_stp == 0) { 2604 CTR1(KTR_MLD, "%s: skip unrecorded node", 2605 __func__); 2606 continue; 2607 } 2608 CTR1(KTR_MLD, "%s: append node", __func__); 2609 if (!m_append(m, sizeof(struct in6_addr), 2610 (void *)&ims->im6s_addr)) { 2611 if (m != m0) 2612 m_freem(m); 2613 CTR1(KTR_MLD, "%s: m_append() failed.", 2614 __func__); 2615 return (-ENOMEM); 2616 } 2617 ++msrcs; 2618 if (msrcs == m0srcs) 2619 break; 2620 } 2621 pmr->mr_numsrc = htons(msrcs); 2622 nbytes += (msrcs * sizeof(struct in6_addr)); 2623 2624 CTR1(KTR_MLD, "%s: enqueueing next packet", __func__); 2625 mbufq_enqueue(mq, m); 2626 } 2627 2628 return (nbytes); 2629 } 2630 2631 /* 2632 * Type used to mark record pass completion. 2633 * We exploit the fact we can cast to this easily from the 2634 * current filter modes on each ip_msource node. 2635 */ 2636 typedef enum { 2637 REC_NONE = 0x00, /* MCAST_UNDEFINED */ 2638 REC_ALLOW = 0x01, /* MCAST_INCLUDE */ 2639 REC_BLOCK = 0x02, /* MCAST_EXCLUDE */ 2640 REC_FULL = REC_ALLOW | REC_BLOCK 2641 } rectype_t; 2642 2643 /* 2644 * Enqueue an MLDv2 filter list change to the given output queue. 2645 * 2646 * Source list filter state is held in an RB-tree. When the filter list 2647 * for a group is changed without changing its mode, we need to compute 2648 * the deltas between T0 and T1 for each source in the filter set, 2649 * and enqueue the appropriate ALLOW_NEW/BLOCK_OLD records. 2650 * 2651 * As we may potentially queue two record types, and the entire R-B tree 2652 * needs to be walked at once, we break this out into its own function 2653 * so we can generate a tightly packed queue of packets. 2654 * 2655 * XXX This could be written to only use one tree walk, although that makes 2656 * serializing into the mbuf chains a bit harder. For now we do two walks 2657 * which makes things easier on us, and it may or may not be harder on 2658 * the L2 cache. 2659 * 2660 * If successful the size of all data appended to the queue is returned, 2661 * otherwise an error code less than zero is returned, or zero if 2662 * no record(s) were appended. 2663 */ 2664 static int 2665 mld_v2_enqueue_filter_change(struct mbufq *mq, struct in6_multi *inm) 2666 { 2667 static const int MINRECLEN = 2668 sizeof(struct mldv2_record) + sizeof(struct in6_addr); 2669 struct ifnet *ifp; 2670 struct mldv2_record mr; 2671 struct mldv2_record *pmr; 2672 struct ip6_msource *ims, *nims; 2673 struct mbuf *m, *m0, *md; 2674 int m0srcs, nbytes, npbytes, off, rsrcs, schanged; 2675 uint8_t mode, now, then; 2676 rectype_t crt, drt, nrt; 2677 #ifdef KTR 2678 int nallow, nblock; 2679 char ip6tbuf[INET6_ADDRSTRLEN]; 2680 #endif 2681 2682 IN6_MULTI_LIST_LOCK_ASSERT(); 2683 2684 if (inm->in6m_nsrc == 0 || 2685 (inm->in6m_st[0].iss_asm > 0 && inm->in6m_st[1].iss_asm > 0)) 2686 return (0); 2687 2688 ifp = inm->in6m_ifp; /* interface */ 2689 mode = inm->in6m_st[1].iss_fmode; /* filter mode at t1 */ 2690 crt = REC_NONE; /* current group record type */ 2691 drt = REC_NONE; /* mask of completed group record types */ 2692 nrt = REC_NONE; /* record type for current node */ 2693 m0srcs = 0; /* # source which will fit in current mbuf chain */ 2694 npbytes = 0; /* # of bytes appended this packet */ 2695 nbytes = 0; /* # of bytes appended to group's state-change queue */ 2696 rsrcs = 0; /* # sources encoded in current record */ 2697 schanged = 0; /* # nodes encoded in overall filter change */ 2698 #ifdef KTR 2699 nallow = 0; /* # of source entries in ALLOW_NEW */ 2700 nblock = 0; /* # of source entries in BLOCK_OLD */ 2701 #endif 2702 nims = NULL; /* next tree node pointer */ 2703 2704 /* 2705 * For each possible filter record mode. 2706 * The first kind of source we encounter tells us which 2707 * is the first kind of record we start appending. 2708 * If a node transitioned to UNDEFINED at t1, its mode is treated 2709 * as the inverse of the group's filter mode. 2710 */ 2711 while (drt != REC_FULL) { 2712 do { 2713 m0 = mbufq_last(mq); 2714 if (m0 != NULL && 2715 (m0->m_pkthdr.vt_nrecs + 1 <= 2716 MLD_V2_REPORT_MAXRECS) && 2717 (m0->m_pkthdr.len + MINRECLEN) < 2718 (ifp->if_mtu - MLD_MTUSPACE)) { 2719 m = m0; 2720 m0srcs = (ifp->if_mtu - m0->m_pkthdr.len - 2721 sizeof(struct mldv2_record)) / 2722 sizeof(struct in6_addr); 2723 CTR1(KTR_MLD, 2724 "%s: use previous packet", __func__); 2725 } else { 2726 m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); 2727 if (m == NULL) 2728 m = m_gethdr(M_NOWAIT, MT_DATA); 2729 if (m == NULL) { 2730 CTR1(KTR_MLD, 2731 "%s: m_get*() failed", __func__); 2732 return (-ENOMEM); 2733 } 2734 m->m_pkthdr.vt_nrecs = 0; 2735 mld_save_context(m, ifp); 2736 m0srcs = (ifp->if_mtu - MLD_MTUSPACE - 2737 sizeof(struct mldv2_record)) / 2738 sizeof(struct in6_addr); 2739 npbytes = 0; 2740 CTR1(KTR_MLD, 2741 "%s: allocated new packet", __func__); 2742 } 2743 /* 2744 * Append the MLD group record header to the 2745 * current packet's data area. 2746 * Recalculate pointer to free space for next 2747 * group record, in case m_append() allocated 2748 * a new mbuf or cluster. 2749 */ 2750 memset(&mr, 0, sizeof(mr)); 2751 mr.mr_addr = inm->in6m_addr; 2752 in6_clearscope(&mr.mr_addr); 2753 if (!m_append(m, sizeof(mr), (void *)&mr)) { 2754 if (m != m0) 2755 m_freem(m); 2756 CTR1(KTR_MLD, 2757 "%s: m_append() failed", __func__); 2758 return (-ENOMEM); 2759 } 2760 npbytes += sizeof(struct mldv2_record); 2761 if (m != m0) { 2762 /* new packet; offset in chain */ 2763 md = m_getptr(m, npbytes - 2764 sizeof(struct mldv2_record), &off); 2765 pmr = (struct mldv2_record *)(mtod(md, 2766 uint8_t *) + off); 2767 } else { 2768 /* current packet; offset from last append */ 2769 md = m_last(m); 2770 pmr = (struct mldv2_record *)(mtod(md, 2771 uint8_t *) + md->m_len - 2772 sizeof(struct mldv2_record)); 2773 } 2774 /* 2775 * Begin walking the tree for this record type 2776 * pass, or continue from where we left off 2777 * previously if we had to allocate a new packet. 2778 * Only report deltas in-mode at t1. 2779 * We need not report included sources as allowed 2780 * if we are in inclusive mode on the group, 2781 * however the converse is not true. 2782 */ 2783 rsrcs = 0; 2784 if (nims == NULL) { 2785 nims = RB_MIN(ip6_msource_tree, 2786 &inm->in6m_srcs); 2787 } 2788 RB_FOREACH_FROM(ims, ip6_msource_tree, nims) { 2789 CTR2(KTR_MLD, "%s: visit node %s", __func__, 2790 ip6_sprintf(ip6tbuf, &ims->im6s_addr)); 2791 now = im6s_get_mode(inm, ims, 1); 2792 then = im6s_get_mode(inm, ims, 0); 2793 CTR3(KTR_MLD, "%s: mode: t0 %d, t1 %d", 2794 __func__, then, now); 2795 if (now == then) { 2796 CTR1(KTR_MLD, 2797 "%s: skip unchanged", __func__); 2798 continue; 2799 } 2800 if (mode == MCAST_EXCLUDE && 2801 now == MCAST_INCLUDE) { 2802 CTR1(KTR_MLD, 2803 "%s: skip IN src on EX group", 2804 __func__); 2805 continue; 2806 } 2807 nrt = (rectype_t)now; 2808 if (nrt == REC_NONE) 2809 nrt = (rectype_t)(~mode & REC_FULL); 2810 if (schanged++ == 0) { 2811 crt = nrt; 2812 } else if (crt != nrt) 2813 continue; 2814 if (!m_append(m, sizeof(struct in6_addr), 2815 (void *)&ims->im6s_addr)) { 2816 if (m != m0) 2817 m_freem(m); 2818 CTR1(KTR_MLD, 2819 "%s: m_append() failed", __func__); 2820 return (-ENOMEM); 2821 } 2822 #ifdef KTR 2823 nallow += !!(crt == REC_ALLOW); 2824 nblock += !!(crt == REC_BLOCK); 2825 #endif 2826 if (++rsrcs == m0srcs) 2827 break; 2828 } 2829 /* 2830 * If we did not append any tree nodes on this 2831 * pass, back out of allocations. 2832 */ 2833 if (rsrcs == 0) { 2834 npbytes -= sizeof(struct mldv2_record); 2835 if (m != m0) { 2836 CTR1(KTR_MLD, 2837 "%s: m_free(m)", __func__); 2838 m_freem(m); 2839 } else { 2840 CTR1(KTR_MLD, 2841 "%s: m_adj(m, -mr)", __func__); 2842 m_adj(m, -((int)sizeof( 2843 struct mldv2_record))); 2844 } 2845 continue; 2846 } 2847 npbytes += (rsrcs * sizeof(struct in6_addr)); 2848 if (crt == REC_ALLOW) 2849 pmr->mr_type = MLD_ALLOW_NEW_SOURCES; 2850 else if (crt == REC_BLOCK) 2851 pmr->mr_type = MLD_BLOCK_OLD_SOURCES; 2852 pmr->mr_numsrc = htons(rsrcs); 2853 /* 2854 * Count the new group record, and enqueue this 2855 * packet if it wasn't already queued. 2856 */ 2857 m->m_pkthdr.vt_nrecs++; 2858 if (m != m0) 2859 mbufq_enqueue(mq, m); 2860 nbytes += npbytes; 2861 } while (nims != NULL); 2862 drt |= crt; 2863 crt = (~crt & REC_FULL); 2864 } 2865 2866 CTR3(KTR_MLD, "%s: queued %d ALLOW_NEW, %d BLOCK_OLD", __func__, 2867 nallow, nblock); 2868 2869 return (nbytes); 2870 } 2871 2872 static int 2873 mld_v2_merge_state_changes(struct in6_multi *inm, struct mbufq *scq) 2874 { 2875 struct mbufq *gq; 2876 struct mbuf *m; /* pending state-change */ 2877 struct mbuf *m0; /* copy of pending state-change */ 2878 struct mbuf *mt; /* last state-change in packet */ 2879 int docopy, domerge; 2880 u_int recslen; 2881 2882 docopy = 0; 2883 domerge = 0; 2884 recslen = 0; 2885 2886 IN6_MULTI_LIST_LOCK_ASSERT(); 2887 MLD_LOCK_ASSERT(); 2888 2889 /* 2890 * If there are further pending retransmissions, make a writable 2891 * copy of each queued state-change message before merging. 2892 */ 2893 if (inm->in6m_scrv > 0) 2894 docopy = 1; 2895 2896 gq = &inm->in6m_scq; 2897 #ifdef KTR 2898 if (mbufq_first(gq) == NULL) { 2899 CTR2(KTR_MLD, "%s: WARNING: queue for inm %p is empty", 2900 __func__, inm); 2901 } 2902 #endif 2903 2904 m = mbufq_first(gq); 2905 while (m != NULL) { 2906 /* 2907 * Only merge the report into the current packet if 2908 * there is sufficient space to do so; an MLDv2 report 2909 * packet may only contain 65,535 group records. 2910 * Always use a simple mbuf chain concatentation to do this, 2911 * as large state changes for single groups may have 2912 * allocated clusters. 2913 */ 2914 domerge = 0; 2915 mt = mbufq_last(scq); 2916 if (mt != NULL) { 2917 recslen = m_length(m, NULL); 2918 2919 if ((mt->m_pkthdr.vt_nrecs + 2920 m->m_pkthdr.vt_nrecs <= 2921 MLD_V2_REPORT_MAXRECS) && 2922 (mt->m_pkthdr.len + recslen <= 2923 (inm->in6m_ifp->if_mtu - MLD_MTUSPACE))) 2924 domerge = 1; 2925 } 2926 2927 if (!domerge && mbufq_full(gq)) { 2928 CTR2(KTR_MLD, 2929 "%s: outbound queue full, skipping whole packet %p", 2930 __func__, m); 2931 mt = m->m_nextpkt; 2932 if (!docopy) 2933 m_freem(m); 2934 m = mt; 2935 continue; 2936 } 2937 2938 if (!docopy) { 2939 CTR2(KTR_MLD, "%s: dequeueing %p", __func__, m); 2940 m0 = mbufq_dequeue(gq); 2941 m = m0->m_nextpkt; 2942 } else { 2943 CTR2(KTR_MLD, "%s: copying %p", __func__, m); 2944 m0 = m_dup(m, M_NOWAIT); 2945 if (m0 == NULL) 2946 return (ENOMEM); 2947 m0->m_nextpkt = NULL; 2948 m = m->m_nextpkt; 2949 } 2950 2951 if (!domerge) { 2952 CTR3(KTR_MLD, "%s: queueing %p to scq %p)", 2953 __func__, m0, scq); 2954 mbufq_enqueue(scq, m0); 2955 } else { 2956 struct mbuf *mtl; /* last mbuf of packet mt */ 2957 2958 CTR3(KTR_MLD, "%s: merging %p with ifscq tail %p)", 2959 __func__, m0, mt); 2960 2961 mtl = m_last(mt); 2962 m0->m_flags &= ~M_PKTHDR; 2963 mt->m_pkthdr.len += recslen; 2964 mt->m_pkthdr.vt_nrecs += 2965 m0->m_pkthdr.vt_nrecs; 2966 2967 mtl->m_next = m0; 2968 } 2969 } 2970 2971 return (0); 2972 } 2973 2974 /* 2975 * Respond to a pending MLDv2 General Query. 2976 */ 2977 static void 2978 mld_v2_dispatch_general_query(struct mld_ifsoftc *mli) 2979 { 2980 struct ifmultiaddr *ifma; 2981 struct ifnet *ifp; 2982 struct in6_multi *inm; 2983 int retval __unused; 2984 2985 NET_EPOCH_ASSERT(); 2986 IN6_MULTI_LIST_LOCK_ASSERT(); 2987 MLD_LOCK_ASSERT(); 2988 2989 KASSERT(mli->mli_version == MLD_VERSION_2, 2990 ("%s: called when version %d", __func__, mli->mli_version)); 2991 2992 /* 2993 * Check that there are some packets queued. If so, send them first. 2994 * For large number of groups the reply to general query can take 2995 * many packets, we should finish sending them before starting of 2996 * queuing the new reply. 2997 */ 2998 if (mbufq_len(&mli->mli_gq) != 0) 2999 goto send; 3000 3001 ifp = mli->mli_ifp; 3002 3003 CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 3004 inm = in6m_ifmultiaddr_get_inm(ifma); 3005 if (inm == NULL) 3006 continue; 3007 KASSERT(ifp == inm->in6m_ifp, 3008 ("%s: inconsistent ifp", __func__)); 3009 3010 switch (inm->in6m_state) { 3011 case MLD_NOT_MEMBER: 3012 case MLD_SILENT_MEMBER: 3013 break; 3014 case MLD_REPORTING_MEMBER: 3015 case MLD_IDLE_MEMBER: 3016 case MLD_LAZY_MEMBER: 3017 case MLD_SLEEPING_MEMBER: 3018 case MLD_AWAKENING_MEMBER: 3019 inm->in6m_state = MLD_REPORTING_MEMBER; 3020 retval = mld_v2_enqueue_group_record(&mli->mli_gq, 3021 inm, 0, 0, 0, 0); 3022 CTR2(KTR_MLD, "%s: enqueue record = %d", 3023 __func__, retval); 3024 break; 3025 case MLD_G_QUERY_PENDING_MEMBER: 3026 case MLD_SG_QUERY_PENDING_MEMBER: 3027 case MLD_LEAVING_MEMBER: 3028 break; 3029 } 3030 } 3031 3032 send: 3033 mld_dispatch_queue(&mli->mli_gq, MLD_MAX_RESPONSE_BURST); 3034 3035 /* 3036 * Slew transmission of bursts over 500ms intervals. 3037 */ 3038 if (mbufq_first(&mli->mli_gq) != NULL) { 3039 mli->mli_v2_timer = 1 + MLD_RANDOM_DELAY( 3040 MLD_RESPONSE_BURST_INTERVAL); 3041 V_interface_timers_running6 = 1; 3042 } 3043 } 3044 3045 /* 3046 * Transmit the next pending message in the output queue. 3047 * 3048 * VIMAGE: Needs to store/restore vnet pointer on a per-mbuf-chain basis. 3049 * MRT: Nothing needs to be done, as MLD traffic is always local to 3050 * a link and uses a link-scope multicast address. 3051 */ 3052 static void 3053 mld_dispatch_packet(struct mbuf *m) 3054 { 3055 struct ip6_moptions im6o; 3056 struct ifnet *ifp; 3057 struct ifnet *oifp; 3058 struct mbuf *m0; 3059 struct mbuf *md; 3060 struct ip6_hdr *ip6; 3061 struct mld_hdr *mld; 3062 int error; 3063 int off; 3064 int type; 3065 uint32_t ifindex; 3066 3067 CTR2(KTR_MLD, "%s: transmit %p", __func__, m); 3068 NET_EPOCH_ASSERT(); 3069 3070 /* 3071 * Set VNET image pointer from enqueued mbuf chain 3072 * before doing anything else. Whilst we use interface 3073 * indexes to guard against interface detach, they are 3074 * unique to each VIMAGE and must be retrieved. 3075 */ 3076 ifindex = mld_restore_context(m); 3077 3078 /* 3079 * Check if the ifnet still exists. This limits the scope of 3080 * any race in the absence of a global ifp lock for low cost 3081 * (an array lookup). 3082 */ 3083 ifp = ifnet_byindex(ifindex); 3084 if (ifp == NULL) { 3085 CTR3(KTR_MLD, "%s: dropped %p as ifindex %u went away.", 3086 __func__, m, ifindex); 3087 m_freem(m); 3088 IP6STAT_INC(ip6s_noroute); 3089 goto out; 3090 } 3091 3092 im6o.im6o_multicast_hlim = 1; 3093 im6o.im6o_multicast_loop = (V_ip6_mrouter != NULL); 3094 im6o.im6o_multicast_ifp = ifp; 3095 3096 if (m->m_flags & M_MLDV1) { 3097 m0 = m; 3098 } else { 3099 m0 = mld_v2_encap_report(ifp, m); 3100 if (m0 == NULL) { 3101 CTR2(KTR_MLD, "%s: dropped %p", __func__, m); 3102 IP6STAT_INC(ip6s_odropped); 3103 goto out; 3104 } 3105 } 3106 3107 mld_scrub_context(m0); 3108 m_clrprotoflags(m); 3109 m0->m_pkthdr.rcvif = V_loif; 3110 3111 ip6 = mtod(m0, struct ip6_hdr *); 3112 #if 0 3113 (void)in6_setscope(&ip6->ip6_dst, ifp, NULL); /* XXX LOR */ 3114 #else 3115 /* 3116 * XXX XXX Break some KPI rules to prevent an LOR which would 3117 * occur if we called in6_setscope() at transmission. 3118 * See comments at top of file. 3119 */ 3120 MLD_EMBEDSCOPE(&ip6->ip6_dst, ifp->if_index); 3121 #endif 3122 3123 /* 3124 * Retrieve the ICMPv6 type before handoff to ip6_output(), 3125 * so we can bump the stats. 3126 */ 3127 md = m_getptr(m0, sizeof(struct ip6_hdr), &off); 3128 mld = (struct mld_hdr *)(mtod(md, uint8_t *) + off); 3129 type = mld->mld_type; 3130 3131 oifp = NULL; 3132 error = ip6_output(m0, &mld_po, NULL, IPV6_UNSPECSRC, &im6o, 3133 &oifp, NULL); 3134 if (error) { 3135 CTR3(KTR_MLD, "%s: ip6_output(%p) = %d", __func__, m0, error); 3136 goto out; 3137 } 3138 ICMP6STAT_INC(icp6s_outhist[type]); 3139 if (oifp != NULL) { 3140 icmp6_ifstat_inc(oifp, ifs6_out_msg); 3141 switch (type) { 3142 case MLD_LISTENER_REPORT: 3143 case MLDV2_LISTENER_REPORT: 3144 icmp6_ifstat_inc(oifp, ifs6_out_mldreport); 3145 break; 3146 case MLD_LISTENER_DONE: 3147 icmp6_ifstat_inc(oifp, ifs6_out_mlddone); 3148 break; 3149 } 3150 } 3151 out: 3152 return; 3153 } 3154 3155 /* 3156 * Encapsulate an MLDv2 report. 3157 * 3158 * KAME IPv6 requires that hop-by-hop options be passed separately, 3159 * and that the IPv6 header be prepended in a separate mbuf. 3160 * 3161 * Returns a pointer to the new mbuf chain head, or NULL if the 3162 * allocation failed. 3163 */ 3164 static struct mbuf * 3165 mld_v2_encap_report(struct ifnet *ifp, struct mbuf *m) 3166 { 3167 struct mbuf *mh; 3168 struct mldv2_report *mld; 3169 struct ip6_hdr *ip6; 3170 struct in6_ifaddr *ia; 3171 int mldreclen; 3172 3173 KASSERT(ifp != NULL, ("%s: null ifp", __func__)); 3174 KASSERT((m->m_flags & M_PKTHDR), 3175 ("%s: mbuf chain %p is !M_PKTHDR", __func__, m)); 3176 3177 /* 3178 * RFC3590: OK to send as :: or tentative during DAD. 3179 */ 3180 NET_EPOCH_ASSERT(); 3181 ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST); 3182 if (ia == NULL) 3183 CTR1(KTR_MLD, "%s: warning: ia is NULL", __func__); 3184 3185 mh = m_gethdr(M_NOWAIT, MT_DATA); 3186 if (mh == NULL) { 3187 if (ia != NULL) 3188 ifa_free(&ia->ia_ifa); 3189 m_freem(m); 3190 return (NULL); 3191 } 3192 M_ALIGN(mh, sizeof(struct ip6_hdr) + sizeof(struct mldv2_report)); 3193 3194 mldreclen = m_length(m, NULL); 3195 CTR2(KTR_MLD, "%s: mldreclen is %d", __func__, mldreclen); 3196 3197 mh->m_len = sizeof(struct ip6_hdr) + sizeof(struct mldv2_report); 3198 mh->m_pkthdr.len = sizeof(struct ip6_hdr) + 3199 sizeof(struct mldv2_report) + mldreclen; 3200 3201 ip6 = mtod(mh, struct ip6_hdr *); 3202 ip6->ip6_flow = 0; 3203 ip6->ip6_vfc &= ~IPV6_VERSION_MASK; 3204 ip6->ip6_vfc |= IPV6_VERSION; 3205 ip6->ip6_nxt = IPPROTO_ICMPV6; 3206 ip6->ip6_src = ia ? ia->ia_addr.sin6_addr : in6addr_any; 3207 if (ia != NULL) 3208 ifa_free(&ia->ia_ifa); 3209 ip6->ip6_dst = in6addr_linklocal_allv2routers; 3210 /* scope ID will be set in netisr */ 3211 3212 mld = (struct mldv2_report *)(ip6 + 1); 3213 mld->mld_type = MLDV2_LISTENER_REPORT; 3214 mld->mld_code = 0; 3215 mld->mld_cksum = 0; 3216 mld->mld_v2_reserved = 0; 3217 mld->mld_v2_numrecs = htons(m->m_pkthdr.vt_nrecs); 3218 m->m_pkthdr.vt_nrecs = 0; 3219 3220 mh->m_next = m; 3221 mld->mld_cksum = in6_cksum(mh, IPPROTO_ICMPV6, 3222 sizeof(struct ip6_hdr), sizeof(struct mldv2_report) + mldreclen); 3223 return (mh); 3224 } 3225 3226 #ifdef KTR 3227 static char * 3228 mld_rec_type_to_str(const int type) 3229 { 3230 3231 switch (type) { 3232 case MLD_CHANGE_TO_EXCLUDE_MODE: 3233 return "TO_EX"; 3234 break; 3235 case MLD_CHANGE_TO_INCLUDE_MODE: 3236 return "TO_IN"; 3237 break; 3238 case MLD_MODE_IS_EXCLUDE: 3239 return "MODE_EX"; 3240 break; 3241 case MLD_MODE_IS_INCLUDE: 3242 return "MODE_IN"; 3243 break; 3244 case MLD_ALLOW_NEW_SOURCES: 3245 return "ALLOW_NEW"; 3246 break; 3247 case MLD_BLOCK_OLD_SOURCES: 3248 return "BLOCK_OLD"; 3249 break; 3250 default: 3251 break; 3252 } 3253 return "unknown"; 3254 } 3255 #endif 3256 3257 static void 3258 mld_init(void *unused __unused) 3259 { 3260 3261 CTR1(KTR_MLD, "%s: initializing", __func__); 3262 MLD_LOCK_INIT(); 3263 3264 ip6_initpktopts(&mld_po); 3265 mld_po.ip6po_hlim = 1; 3266 mld_po.ip6po_hbh = &mld_ra.hbh; 3267 mld_po.ip6po_prefer_tempaddr = IP6PO_TEMPADDR_NOTPREFER; 3268 mld_po.ip6po_flags = IP6PO_DONTFRAG; 3269 3270 callout_init(&mldslow_callout, 1); 3271 callout_reset(&mldslow_callout, hz / MLD_SLOWHZ, mld_slowtimo, NULL); 3272 callout_init(&mldfast_callout, 1); 3273 callout_reset(&mldfast_callout, hz / MLD_FASTHZ, mld_fasttimo, NULL); 3274 } 3275 SYSINIT(mld_init, SI_SUB_PROTO_MC, SI_ORDER_MIDDLE, mld_init, NULL); 3276 3277 static void 3278 mld_uninit(void *unused __unused) 3279 { 3280 3281 CTR1(KTR_MLD, "%s: tearing down", __func__); 3282 callout_drain(&mldslow_callout); 3283 callout_drain(&mldfast_callout); 3284 MLD_LOCK_DESTROY(); 3285 } 3286 SYSUNINIT(mld_uninit, SI_SUB_PROTO_MC, SI_ORDER_MIDDLE, mld_uninit, NULL); 3287 3288 static void 3289 vnet_mld_init(const void *unused __unused) 3290 { 3291 3292 CTR1(KTR_MLD, "%s: initializing", __func__); 3293 3294 LIST_INIT(&V_mli_head); 3295 } 3296 VNET_SYSINIT(vnet_mld_init, SI_SUB_PROTO_MC, SI_ORDER_ANY, vnet_mld_init, 3297 NULL); 3298 3299 static void 3300 vnet_mld_uninit(const void *unused __unused) 3301 { 3302 3303 /* This can happen if we shutdown the network stack. */ 3304 CTR1(KTR_MLD, "%s: tearing down", __func__); 3305 } 3306 VNET_SYSUNINIT(vnet_mld_uninit, SI_SUB_PROTO_MC, SI_ORDER_ANY, vnet_mld_uninit, 3307 NULL); 3308 3309 static int 3310 mld_modevent(module_t mod, int type, void *unused __unused) 3311 { 3312 3313 switch (type) { 3314 case MOD_LOAD: 3315 case MOD_UNLOAD: 3316 break; 3317 default: 3318 return (EOPNOTSUPP); 3319 } 3320 return (0); 3321 } 3322 3323 static moduledata_t mld_mod = { 3324 "mld", 3325 mld_modevent, 3326 0 3327 }; 3328 DECLARE_MODULE(mld, mld_mod, SI_SUB_PROTO_MC, SI_ORDER_ANY); 3329