1 /*- 2 * Copyright (c) 2007-2009 Bruce Simpson. 3 * Copyright (c) 1988 Stephen Deering. 4 * Copyright (c) 1992, 1993 5 * The Regents of the University of California. All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Stephen Deering of Stanford University. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)igmp.c 8.1 (Berkeley) 7/19/93 35 */ 36 37 /* 38 * Internet Group Management Protocol (IGMP) routines. 39 * [RFC1112, RFC2236, RFC3376] 40 * 41 * Written by Steve Deering, Stanford, May 1988. 42 * Modified by Rosen Sharma, Stanford, Aug 1994. 43 * Modified by Bill Fenner, Xerox PARC, Feb 1995. 44 * Modified to fully comply to IGMPv2 by Bill Fenner, Oct 1995. 45 * Significantly rewritten for IGMPv3, VIMAGE, and SMP by Bruce Simpson. 46 * 47 * MULTICAST Revision: 3.5.1.4 48 */ 49 50 #include <sys/cdefs.h> 51 __FBSDID("$FreeBSD$"); 52 53 #include "opt_mac.h" 54 #include "opt_route.h" 55 56 #include <sys/param.h> 57 #include <sys/systm.h> 58 #include <sys/module.h> 59 #include <sys/malloc.h> 60 #include <sys/mbuf.h> 61 #include <sys/socket.h> 62 #include <sys/protosw.h> 63 #include <sys/kernel.h> 64 #include <sys/sysctl.h> 65 #include <sys/vimage.h> 66 #include <sys/ktr.h> 67 #include <sys/condvar.h> 68 69 #include <net/if.h> 70 #include <net/netisr.h> 71 #include <net/route.h> 72 #include <net/vnet.h> 73 74 #include <netinet/in.h> 75 #include <netinet/in_var.h> 76 #include <netinet/in_systm.h> 77 #include <netinet/ip.h> 78 #include <netinet/ip_var.h> 79 #include <netinet/ip_options.h> 80 #include <netinet/igmp.h> 81 #include <netinet/igmp_var.h> 82 #include <netinet/vinet.h> 83 84 #include <machine/in_cksum.h> 85 86 #include <security/mac/mac_framework.h> 87 88 #ifndef KTR_IGMPV3 89 #define KTR_IGMPV3 KTR_INET 90 #endif 91 92 static struct igmp_ifinfo * 93 igi_alloc_locked(struct ifnet *); 94 static void igi_delete_locked(const struct ifnet *); 95 static void igmp_dispatch_queue(struct ifqueue *, int, const int); 96 static void igmp_fasttimo_vnet(void); 97 static void igmp_final_leave(struct in_multi *, struct igmp_ifinfo *); 98 static int igmp_handle_state_change(struct in_multi *, 99 struct igmp_ifinfo *); 100 static int igmp_initial_join(struct in_multi *, struct igmp_ifinfo *); 101 static int igmp_input_v1_query(struct ifnet *, const struct ip *); 102 static int igmp_input_v2_query(struct ifnet *, const struct ip *, 103 const struct igmp *); 104 static int igmp_input_v3_query(struct ifnet *, const struct ip *, 105 /*const*/ struct igmpv3 *); 106 static int igmp_input_v3_group_query(struct in_multi *, 107 struct igmp_ifinfo *, int, /*const*/ struct igmpv3 *); 108 static int igmp_input_v1_report(struct ifnet *, /*const*/ struct ip *, 109 /*const*/ struct igmp *); 110 static int igmp_input_v2_report(struct ifnet *, /*const*/ struct ip *, 111 /*const*/ struct igmp *); 112 static void igmp_intr(struct mbuf *); 113 static int igmp_isgroupreported(const struct in_addr); 114 static struct mbuf * 115 igmp_ra_alloc(void); 116 #ifdef KTR 117 static char * igmp_rec_type_to_str(const int); 118 #endif 119 static void igmp_set_version(struct igmp_ifinfo *, const int); 120 static void igmp_slowtimo_vnet(void); 121 static void igmp_sysinit(void); 122 static int igmp_v1v2_queue_report(struct in_multi *, const int); 123 static void igmp_v1v2_process_group_timer(struct in_multi *, const int); 124 static void igmp_v1v2_process_querier_timers(struct igmp_ifinfo *); 125 static void igmp_v2_update_group(struct in_multi *, const int); 126 static void igmp_v3_cancel_link_timers(struct igmp_ifinfo *); 127 static void igmp_v3_dispatch_general_query(struct igmp_ifinfo *); 128 static struct mbuf * 129 igmp_v3_encap_report(struct ifnet *, struct mbuf *); 130 static int igmp_v3_enqueue_group_record(struct ifqueue *, 131 struct in_multi *, const int, const int, const int); 132 static int igmp_v3_enqueue_filter_change(struct ifqueue *, 133 struct in_multi *); 134 static void igmp_v3_process_group_timers(struct igmp_ifinfo *, 135 struct ifqueue *, struct ifqueue *, struct in_multi *, 136 const int); 137 static int igmp_v3_merge_state_changes(struct in_multi *, 138 struct ifqueue *); 139 static void igmp_v3_suppress_group_record(struct in_multi *); 140 static int sysctl_igmp_default_version(SYSCTL_HANDLER_ARGS); 141 static int sysctl_igmp_gsr(SYSCTL_HANDLER_ARGS); 142 static int sysctl_igmp_ifinfo(SYSCTL_HANDLER_ARGS); 143 144 static vnet_attach_fn vnet_igmp_iattach; 145 static vnet_detach_fn vnet_igmp_idetach; 146 147 /* 148 * System-wide globals. 149 * 150 * Unlocked access to these is OK, except for the global IGMP output 151 * queue. The IGMP subsystem lock ends up being system-wide for the moment, 152 * because all VIMAGEs have to share a global output queue, as netisrs 153 * themselves are not virtualized. 154 * 155 * Locking: 156 * * The permitted lock order is: IN_MULTI_LOCK, IGMP_LOCK, IF_ADDR_LOCK. 157 * Any may be taken independently; if any are held at the same 158 * time, the above lock order must be followed. 159 * * All output is delegated to the netisr. 160 * Now that Giant has been eliminated, the netisr may be inlined. 161 * * IN_MULTI_LOCK covers in_multi. 162 * * IGMP_LOCK covers igmp_ifinfo and any global variables in this file, 163 * including the output queue. 164 * * IF_ADDR_LOCK covers if_multiaddrs, which is used for a variety of 165 * per-link state iterators. 166 * * igmp_ifinfo is valid as long as PF_INET is attached to the interface, 167 * therefore it is not refcounted. 168 * We allow unlocked reads of igmp_ifinfo when accessed via in_multi. 169 * 170 * Reference counting 171 * * IGMP acquires its own reference every time an in_multi is passed to 172 * it and the group is being joined for the first time. 173 * * IGMP releases its reference(s) on in_multi in a deferred way, 174 * because the operations which process the release run as part of 175 * a loop whose control variables are directly affected by the release 176 * (that, and not recursing on the IF_ADDR_LOCK). 177 * 178 * VIMAGE: Each in_multi corresponds to an ifp, and each ifp corresponds 179 * to a vnet in ifp->if_vnet. 180 * 181 * SMPng: XXX We may potentially race operations on ifma_protospec. 182 * The problem is that we currently lack a clean way of taking the 183 * IF_ADDR_LOCK() between the ifnet and in layers w/o recursing, 184 * as anything which modifies ifma needs to be covered by that lock. 185 * So check for ifma_protospec being NULL before proceeding. 186 */ 187 struct mtx igmp_mtx; 188 189 struct mbuf *m_raopt; /* Router Alert option */ 190 MALLOC_DEFINE(M_IGMP, "igmp", "igmp state"); 191 192 /* 193 * Global netisr output queue. 194 */ 195 struct ifqueue igmpoq; 196 197 /* 198 * VIMAGE-wide globals. 199 * 200 * The IGMPv3 timers themselves need to run per-image, however, 201 * protosw timers run globally (see tcp). 202 * An ifnet can only be in one vimage at a time, and the loopback 203 * ifnet, loif, is itself virtualized. 204 * It would otherwise be possible to seriously hose IGMP state, 205 * and create inconsistencies in upstream multicast routing, if you have 206 * multiple VIMAGEs running on the same link joining different multicast 207 * groups, UNLESS the "primary IP address" is different. This is because 208 * IGMP for IPv4 does not force link-local addresses to be used for each 209 * node, unlike MLD for IPv6. 210 * Obviously the IGMPv3 per-interface state has per-vimage granularity 211 * also as a result. 212 * 213 * FUTURE: Stop using IFP_TO_IA/INADDR_ANY, and use source address selection 214 * policy to control the address used by IGMP on the link. 215 */ 216 #ifdef VIMAGE_GLOBALS 217 int interface_timers_running; /* IGMPv3 general query response */ 218 int state_change_timers_running; /* IGMPv3 state-change retransmit */ 219 int current_state_timers_running; /* IGMPv1/v2 host report; 220 * IGMPv3 g/sg query response */ 221 222 LIST_HEAD(, igmp_ifinfo) igi_head; 223 struct igmpstat igmpstat; 224 struct timeval igmp_gsrdelay; 225 226 int igmp_recvifkludge; 227 int igmp_sendra; 228 int igmp_sendlocal; 229 int igmp_v1enable; 230 int igmp_v2enable; 231 int igmp_legacysupp; 232 int igmp_default_version; 233 #endif /* VIMAGE_GLOBALS */ 234 235 /* 236 * Virtualized sysctls. 237 */ 238 SYSCTL_V_STRUCT(V_NET, vnet_inet, _net_inet_igmp, IGMPCTL_STATS, stats, 239 CTLFLAG_RW, igmpstat, igmpstat, ""); 240 SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_igmp, OID_AUTO, recvifkludge, 241 CTLFLAG_RW, igmp_recvifkludge, 0, 242 "Rewrite IGMPv1/v2 reports from 0.0.0.0 to contain subnet address"); 243 SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_igmp, OID_AUTO, sendra, 244 CTLFLAG_RW, igmp_sendra, 0, 245 "Send IP Router Alert option in IGMPv2/v3 messages"); 246 SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_igmp, OID_AUTO, sendlocal, 247 CTLFLAG_RW, igmp_sendlocal, 0, 248 "Send IGMP membership reports for 224.0.0.0/24 groups"); 249 SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_igmp, OID_AUTO, v1enable, 250 CTLFLAG_RW, igmp_v1enable, 0, 251 "Enable backwards compatibility with IGMPv1"); 252 SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_igmp, OID_AUTO, v2enable, 253 CTLFLAG_RW, igmp_v2enable, 0, 254 "Enable backwards compatibility with IGMPv2"); 255 SYSCTL_V_INT(V_NET, vnet_inet, _net_inet_igmp, OID_AUTO, legacysupp, 256 CTLFLAG_RW, igmp_legacysupp, 0, 257 "Allow v1/v2 reports to suppress v3 group responses"); 258 SYSCTL_V_PROC(V_NET, vnet_inet, _net_inet_igmp, OID_AUTO, default_version, 259 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, igmp_default_version, 0, 260 sysctl_igmp_default_version, "I", 261 "Default version of IGMP to run on each interface"); 262 SYSCTL_V_PROC(V_NET, vnet_inet, _net_inet_igmp, OID_AUTO, gsrdelay, 263 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, igmp_gsrdelay.tv_sec, 0, 264 sysctl_igmp_gsr, "I", 265 "Rate limit for IGMPv3 Group-and-Source queries in seconds"); 266 267 /* 268 * Non-virtualized sysctls. 269 */ 270 SYSCTL_NODE(_net_inet_igmp, OID_AUTO, ifinfo, CTLFLAG_RD | CTLFLAG_MPSAFE, 271 sysctl_igmp_ifinfo, "Per-interface IGMPv3 state"); 272 273 static __inline void 274 igmp_save_context(struct mbuf *m, struct ifnet *ifp) 275 { 276 277 #ifdef VIMAGE 278 m->m_pkthdr.header = ifp->if_vnet; 279 #endif /* VIMAGE */ 280 m->m_pkthdr.flowid = ifp->if_index; 281 } 282 283 static __inline void 284 igmp_scrub_context(struct mbuf *m) 285 { 286 287 m->m_pkthdr.header = NULL; 288 m->m_pkthdr.flowid = 0; 289 } 290 291 #ifdef KTR 292 static __inline char * 293 inet_ntoa_haddr(in_addr_t haddr) 294 { 295 struct in_addr ia; 296 297 ia.s_addr = htonl(haddr); 298 return (inet_ntoa(ia)); 299 } 300 #endif 301 302 /* 303 * Restore context from a queued IGMP output chain. 304 * Return saved ifindex. 305 * 306 * VIMAGE: The assertion is there to make sure that we 307 * actually called CURVNET_SET() with what's in the mbuf chain. 308 */ 309 static __inline uint32_t 310 igmp_restore_context(struct mbuf *m) 311 { 312 313 #ifdef notyet 314 #if defined(VIMAGE) && defined(INVARIANTS) 315 KASSERT(curvnet == (m->m_pkthdr.header), 316 ("%s: called when curvnet was not restored", __func__)); 317 #endif 318 #endif 319 return (m->m_pkthdr.flowid); 320 } 321 322 /* 323 * Retrieve or set default IGMP version. 324 * 325 * VIMAGE: Assume curvnet set by caller. 326 * SMPng: NOTE: Serialized by IGMP lock. 327 */ 328 static int 329 sysctl_igmp_default_version(SYSCTL_HANDLER_ARGS) 330 { 331 INIT_VNET_INET(curvnet); 332 int error; 333 int new; 334 335 error = sysctl_wire_old_buffer(req, sizeof(int)); 336 if (error) 337 return (error); 338 339 IGMP_LOCK(); 340 341 new = V_igmp_default_version; 342 343 error = sysctl_handle_int(oidp, &new, 0, req); 344 if (error || !req->newptr) 345 goto out_locked; 346 347 if (new < IGMP_VERSION_1 || new > IGMP_VERSION_3) { 348 error = EINVAL; 349 goto out_locked; 350 } 351 352 CTR2(KTR_IGMPV3, "change igmp_default_version from %d to %d", 353 V_igmp_default_version, new); 354 355 V_igmp_default_version = new; 356 357 out_locked: 358 IGMP_UNLOCK(); 359 return (error); 360 } 361 362 /* 363 * Retrieve or set threshold between group-source queries in seconds. 364 * 365 * VIMAGE: Assume curvnet set by caller. 366 * SMPng: NOTE: Serialized by IGMP lock. 367 */ 368 static int 369 sysctl_igmp_gsr(SYSCTL_HANDLER_ARGS) 370 { 371 INIT_VNET_INET(curvnet); 372 int error; 373 int i; 374 375 error = sysctl_wire_old_buffer(req, sizeof(int)); 376 if (error) 377 return (error); 378 379 IGMP_LOCK(); 380 381 i = V_igmp_gsrdelay.tv_sec; 382 383 error = sysctl_handle_int(oidp, &i, 0, req); 384 if (error || !req->newptr) 385 goto out_locked; 386 387 if (i < -1 || i >= 60) { 388 error = EINVAL; 389 goto out_locked; 390 } 391 392 CTR2(KTR_IGMPV3, "change igmp_gsrdelay from %d to %d", 393 V_igmp_gsrdelay.tv_sec, i); 394 V_igmp_gsrdelay.tv_sec = i; 395 396 out_locked: 397 IGMP_UNLOCK(); 398 return (error); 399 } 400 401 /* 402 * Expose struct igmp_ifinfo to userland, keyed by ifindex. 403 * For use by ifmcstat(8). 404 * 405 * SMPng: NOTE: Does an unlocked ifindex space read. 406 * VIMAGE: Assume curvnet set by caller. The node handler itself 407 * is not directly virtualized. 408 */ 409 static int 410 sysctl_igmp_ifinfo(SYSCTL_HANDLER_ARGS) 411 { 412 INIT_VNET_NET(curvnet); 413 INIT_VNET_INET(curvnet); 414 int *name; 415 int error; 416 u_int namelen; 417 struct ifnet *ifp; 418 struct igmp_ifinfo *igi; 419 420 name = (int *)arg1; 421 namelen = arg2; 422 423 if (req->newptr != NULL) 424 return (EPERM); 425 426 if (namelen != 1) 427 return (EINVAL); 428 429 error = sysctl_wire_old_buffer(req, sizeof(struct igmp_ifinfo)); 430 if (error) 431 return (error); 432 433 IN_MULTI_LOCK(); 434 IGMP_LOCK(); 435 436 if (name[0] <= 0 || name[0] > V_if_index) { 437 error = ENOENT; 438 goto out_locked; 439 } 440 441 error = ENOENT; 442 443 ifp = ifnet_byindex(name[0]); 444 if (ifp == NULL) 445 goto out_locked; 446 447 LIST_FOREACH(igi, &V_igi_head, igi_link) { 448 if (ifp == igi->igi_ifp) { 449 error = SYSCTL_OUT(req, igi, 450 sizeof(struct igmp_ifinfo)); 451 break; 452 } 453 } 454 455 out_locked: 456 IGMP_UNLOCK(); 457 IN_MULTI_UNLOCK(); 458 return (error); 459 } 460 461 /* 462 * Dispatch an entire queue of pending packet chains 463 * using the netisr. 464 * VIMAGE: Assumes the vnet pointer has been set. 465 */ 466 static void 467 igmp_dispatch_queue(struct ifqueue *ifq, int limit, const int loop) 468 { 469 struct mbuf *m; 470 471 for (;;) { 472 _IF_DEQUEUE(ifq, m); 473 if (m == NULL) 474 break; 475 CTR3(KTR_IGMPV3, "%s: dispatch %p from %p", __func__, ifq, m); 476 if (loop) 477 m->m_flags |= M_IGMP_LOOP; 478 netisr_dispatch(NETISR_IGMP, m); 479 if (--limit == 0) 480 break; 481 } 482 } 483 484 /* 485 * Filter outgoing IGMP report state by group. 486 * 487 * Reports are ALWAYS suppressed for ALL-HOSTS (224.0.0.1). 488 * If the net.inet.igmp.sendlocal sysctl is 0, then IGMP reports are 489 * disabled for all groups in the 224.0.0.0/24 link-local scope. However, 490 * this may break certain IGMP snooping switches which rely on the old 491 * report behaviour. 492 * 493 * Return zero if the given group is one for which IGMP reports 494 * should be suppressed, or non-zero if reports should be issued. 495 */ 496 static __inline int 497 igmp_isgroupreported(const struct in_addr addr) 498 { 499 INIT_VNET_INET(curvnet); 500 501 if (in_allhosts(addr) || 502 ((!V_igmp_sendlocal && IN_LOCAL_GROUP(ntohl(addr.s_addr))))) 503 return (0); 504 505 return (1); 506 } 507 508 /* 509 * Construct a Router Alert option to use in outgoing packets. 510 */ 511 static struct mbuf * 512 igmp_ra_alloc(void) 513 { 514 struct mbuf *m; 515 struct ipoption *p; 516 517 MGET(m, M_DONTWAIT, MT_DATA); 518 p = mtod(m, struct ipoption *); 519 p->ipopt_dst.s_addr = INADDR_ANY; 520 p->ipopt_list[0] = IPOPT_RA; /* Router Alert Option */ 521 p->ipopt_list[1] = 0x04; /* 4 bytes long */ 522 p->ipopt_list[2] = IPOPT_EOL; /* End of IP option list */ 523 p->ipopt_list[3] = 0x00; /* pad byte */ 524 m->m_len = sizeof(p->ipopt_dst) + p->ipopt_list[1]; 525 526 return (m); 527 } 528 529 /* 530 * Attach IGMP when PF_INET is attached to an interface. 531 */ 532 struct igmp_ifinfo * 533 igmp_domifattach(struct ifnet *ifp) 534 { 535 struct igmp_ifinfo *igi; 536 537 CTR3(KTR_IGMPV3, "%s: called for ifp %p(%s)", 538 __func__, ifp, ifp->if_xname); 539 540 IGMP_LOCK(); 541 542 igi = igi_alloc_locked(ifp); 543 if (!(ifp->if_flags & IFF_MULTICAST)) 544 igi->igi_flags |= IGIF_SILENT; 545 546 IGMP_UNLOCK(); 547 548 return (igi); 549 } 550 551 /* 552 * VIMAGE: assume curvnet set by caller. 553 */ 554 static struct igmp_ifinfo * 555 igi_alloc_locked(/*const*/ struct ifnet *ifp) 556 { 557 INIT_VNET_INET(ifp->if_vnet); 558 struct igmp_ifinfo *igi; 559 560 IGMP_LOCK_ASSERT(); 561 562 igi = malloc(sizeof(struct igmp_ifinfo), M_IGMP, M_NOWAIT|M_ZERO); 563 if (igi == NULL) 564 goto out; 565 566 igi->igi_ifp = ifp; 567 igi->igi_version = V_igmp_default_version; 568 igi->igi_flags = 0; 569 igi->igi_rv = IGMP_RV_INIT; 570 igi->igi_qi = IGMP_QI_INIT; 571 igi->igi_qri = IGMP_QRI_INIT; 572 igi->igi_uri = IGMP_URI_INIT; 573 574 SLIST_INIT(&igi->igi_relinmhead); 575 576 /* 577 * Responses to general queries are subject to bounds. 578 */ 579 IFQ_SET_MAXLEN(&igi->igi_gq, IGMP_MAX_RESPONSE_PACKETS); 580 581 LIST_INSERT_HEAD(&V_igi_head, igi, igi_link); 582 583 CTR2(KTR_IGMPV3, "allocate igmp_ifinfo for ifp %p(%s)", 584 ifp, ifp->if_xname); 585 586 out: 587 return (igi); 588 } 589 590 /* 591 * Hook for ifdetach. 592 * 593 * NOTE: Some finalization tasks need to run before the protocol domain 594 * is detached, but also before the link layer does its cleanup. 595 * 596 * SMPNG: igmp_ifdetach() needs to take IF_ADDR_LOCK(). 597 * XXX This is also bitten by unlocked ifma_protospec access. 598 */ 599 void 600 igmp_ifdetach(struct ifnet *ifp) 601 { 602 struct igmp_ifinfo *igi; 603 struct ifmultiaddr *ifma; 604 struct in_multi *inm, *tinm; 605 606 CTR3(KTR_IGMPV3, "%s: called for ifp %p(%s)", __func__, ifp, 607 ifp->if_xname); 608 609 IGMP_LOCK(); 610 611 igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp; 612 if (igi->igi_version == IGMP_VERSION_3) { 613 IF_ADDR_LOCK(ifp); 614 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 615 if (ifma->ifma_addr->sa_family != AF_INET || 616 ifma->ifma_protospec == NULL) 617 continue; 618 #if 0 619 KASSERT(ifma->ifma_protospec != NULL, 620 ("%s: ifma_protospec is NULL", __func__)); 621 #endif 622 inm = (struct in_multi *)ifma->ifma_protospec; 623 if (inm->inm_state == IGMP_LEAVING_MEMBER) { 624 SLIST_INSERT_HEAD(&igi->igi_relinmhead, 625 inm, inm_nrele); 626 } 627 inm_clear_recorded(inm); 628 } 629 IF_ADDR_UNLOCK(ifp); 630 /* 631 * Free the in_multi reference(s) for this IGMP lifecycle. 632 */ 633 SLIST_FOREACH_SAFE(inm, &igi->igi_relinmhead, inm_nrele, 634 tinm) { 635 SLIST_REMOVE_HEAD(&igi->igi_relinmhead, inm_nrele); 636 inm_release_locked(inm); 637 } 638 } 639 640 IGMP_UNLOCK(); 641 } 642 643 /* 644 * Hook for domifdetach. 645 */ 646 void 647 igmp_domifdetach(struct ifnet *ifp) 648 { 649 struct igmp_ifinfo *igi; 650 651 CTR3(KTR_IGMPV3, "%s: called for ifp %p(%s)", 652 __func__, ifp, ifp->if_xname); 653 654 IGMP_LOCK(); 655 656 igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp; 657 igi_delete_locked(ifp); 658 659 IGMP_UNLOCK(); 660 } 661 662 static void 663 igi_delete_locked(const struct ifnet *ifp) 664 { 665 INIT_VNET_INET(ifp->if_vnet); 666 struct igmp_ifinfo *igi, *tigi; 667 668 CTR3(KTR_IGMPV3, "%s: freeing igmp_ifinfo for ifp %p(%s)", 669 __func__, ifp, ifp->if_xname); 670 671 IGMP_LOCK_ASSERT(); 672 673 LIST_FOREACH_SAFE(igi, &V_igi_head, igi_link, tigi) { 674 if (igi->igi_ifp == ifp) { 675 /* 676 * Free deferred General Query responses. 677 */ 678 _IF_DRAIN(&igi->igi_gq); 679 680 LIST_REMOVE(igi, igi_link); 681 682 KASSERT(SLIST_EMPTY(&igi->igi_relinmhead), 683 ("%s: there are dangling in_multi references", 684 __func__)); 685 686 free(igi, M_IGMP); 687 return; 688 } 689 } 690 691 #ifdef INVARIANTS 692 panic("%s: igmp_ifinfo not found for ifp %p\n", __func__, ifp); 693 #endif 694 } 695 696 /* 697 * Process a received IGMPv1 query. 698 * Return non-zero if the message should be dropped. 699 * 700 * VIMAGE: The curvnet pointer is derived from the input ifp. 701 */ 702 static int 703 igmp_input_v1_query(struct ifnet *ifp, const struct ip *ip) 704 { 705 INIT_VNET_INET(ifp->if_vnet); 706 struct ifmultiaddr *ifma; 707 struct igmp_ifinfo *igi; 708 struct in_multi *inm; 709 710 /* 711 * IGMPv1 General Queries SHOULD always addressed to 224.0.0.1. 712 * igmp_group is always ignored. Do not drop it as a userland 713 * daemon may wish to see it. 714 */ 715 if (!in_allhosts(ip->ip_dst)) { 716 IGMPSTAT_INC(igps_rcv_badqueries); 717 return (0); 718 } 719 720 IGMPSTAT_INC(igps_rcv_gen_queries); 721 722 /* 723 * Switch to IGMPv1 host compatibility mode. 724 */ 725 IN_MULTI_LOCK(); 726 IGMP_LOCK(); 727 728 igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp; 729 KASSERT(igi != NULL, ("%s: no igmp_ifinfo for ifp %p", __func__, ifp)); 730 731 if (igi->igi_flags & IGIF_LOOPBACK) { 732 CTR2(KTR_IGMPV3, "ignore v1 query on IGIF_LOOPBACK ifp %p(%s)", 733 ifp, ifp->if_xname); 734 goto out_locked; 735 } 736 737 igmp_set_version(igi, IGMP_VERSION_1); 738 739 CTR2(KTR_IGMPV3, "process v1 query on ifp %p(%s)", ifp, ifp->if_xname); 740 741 /* 742 * Start the timers in all of our group records 743 * for the interface on which the query arrived, 744 * except those which are already running. 745 */ 746 IF_ADDR_LOCK(ifp); 747 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 748 if (ifma->ifma_addr->sa_family != AF_INET || 749 ifma->ifma_protospec == NULL) 750 continue; 751 inm = (struct in_multi *)ifma->ifma_protospec; 752 if (inm->inm_timer != 0) 753 continue; 754 switch (inm->inm_state) { 755 case IGMP_NOT_MEMBER: 756 case IGMP_SILENT_MEMBER: 757 break; 758 case IGMP_G_QUERY_PENDING_MEMBER: 759 case IGMP_SG_QUERY_PENDING_MEMBER: 760 case IGMP_REPORTING_MEMBER: 761 case IGMP_IDLE_MEMBER: 762 case IGMP_LAZY_MEMBER: 763 case IGMP_SLEEPING_MEMBER: 764 case IGMP_AWAKENING_MEMBER: 765 inm->inm_state = IGMP_REPORTING_MEMBER; 766 inm->inm_timer = IGMP_RANDOM_DELAY( 767 IGMP_V1V2_MAX_RI * PR_FASTHZ); 768 V_current_state_timers_running = 1; 769 break; 770 case IGMP_LEAVING_MEMBER: 771 break; 772 } 773 } 774 IF_ADDR_UNLOCK(ifp); 775 776 out_locked: 777 IGMP_UNLOCK(); 778 IN_MULTI_UNLOCK(); 779 780 return (0); 781 } 782 783 /* 784 * Process a received IGMPv2 general or group-specific query. 785 */ 786 static int 787 igmp_input_v2_query(struct ifnet *ifp, const struct ip *ip, 788 const struct igmp *igmp) 789 { 790 INIT_VNET_INET(ifp->if_vnet); 791 struct ifmultiaddr *ifma; 792 struct igmp_ifinfo *igi; 793 struct in_multi *inm; 794 uint16_t timer; 795 796 /* 797 * Perform lazy allocation of IGMP link info if required, 798 * and switch to IGMPv2 host compatibility mode. 799 */ 800 IN_MULTI_LOCK(); 801 IGMP_LOCK(); 802 803 igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp; 804 KASSERT(igi != NULL, ("%s: no igmp_ifinfo for ifp %p", __func__, ifp)); 805 806 if (igi->igi_flags & IGIF_LOOPBACK) { 807 CTR2(KTR_IGMPV3, "ignore v2 query on IGIF_LOOPBACK ifp %p(%s)", 808 ifp, ifp->if_xname); 809 goto out_locked; 810 } 811 812 igmp_set_version(igi, IGMP_VERSION_2); 813 814 timer = igmp->igmp_code * PR_FASTHZ / IGMP_TIMER_SCALE; 815 if (timer == 0) 816 timer = 1; 817 818 if (!in_nullhost(igmp->igmp_group)) { 819 /* 820 * IGMPv2 Group-Specific Query. 821 * If this is a group-specific IGMPv2 query, we need only 822 * look up the single group to process it. 823 */ 824 inm = inm_lookup(ifp, igmp->igmp_group); 825 if (inm != NULL) { 826 CTR3(KTR_IGMPV3, "process v2 query %s on ifp %p(%s)", 827 inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname); 828 igmp_v2_update_group(inm, timer); 829 } 830 IGMPSTAT_INC(igps_rcv_group_queries); 831 } else { 832 /* 833 * IGMPv2 General Query. 834 * If this was not sent to the all-hosts group, ignore it. 835 */ 836 if (in_allhosts(ip->ip_dst)) { 837 /* 838 * For each reporting group joined on this 839 * interface, kick the report timer. 840 */ 841 CTR2(KTR_IGMPV3, 842 "process v2 general query on ifp %p(%s)", 843 ifp, ifp->if_xname); 844 845 IF_ADDR_LOCK(ifp); 846 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 847 if (ifma->ifma_addr->sa_family != AF_INET || 848 ifma->ifma_protospec == NULL) 849 continue; 850 inm = (struct in_multi *)ifma->ifma_protospec; 851 igmp_v2_update_group(inm, timer); 852 } 853 IF_ADDR_UNLOCK(ifp); 854 } 855 IGMPSTAT_INC(igps_rcv_gen_queries); 856 } 857 858 out_locked: 859 IGMP_UNLOCK(); 860 IN_MULTI_UNLOCK(); 861 862 return (0); 863 } 864 865 /* 866 * Update the report timer on a group in response to an IGMPv2 query. 867 * 868 * If we are becoming the reporting member for this group, start the timer. 869 * If we already are the reporting member for this group, and timer is 870 * below the threshold, reset it. 871 * 872 * We may be updating the group for the first time since we switched 873 * to IGMPv3. If we are, then we must clear any recorded source lists, 874 * and transition to REPORTING state; the group timer is overloaded 875 * for group and group-source query responses. 876 * 877 * Unlike IGMPv3, the delay per group should be jittered 878 * to avoid bursts of IGMPv2 reports. 879 */ 880 static void 881 igmp_v2_update_group(struct in_multi *inm, const int timer) 882 { 883 INIT_VNET_INET(curvnet); 884 885 CTR4(KTR_IGMPV3, "%s: %s/%s timer=%d", __func__, 886 inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname, timer); 887 888 IN_MULTI_LOCK_ASSERT(); 889 890 switch (inm->inm_state) { 891 case IGMP_NOT_MEMBER: 892 case IGMP_SILENT_MEMBER: 893 break; 894 case IGMP_REPORTING_MEMBER: 895 if (inm->inm_timer != 0 && 896 inm->inm_timer <= timer) { 897 CTR1(KTR_IGMPV3, "%s: REPORTING and timer running, " 898 "skipping.", __func__); 899 break; 900 } 901 /* FALLTHROUGH */ 902 case IGMP_SG_QUERY_PENDING_MEMBER: 903 case IGMP_G_QUERY_PENDING_MEMBER: 904 case IGMP_IDLE_MEMBER: 905 case IGMP_LAZY_MEMBER: 906 case IGMP_AWAKENING_MEMBER: 907 CTR1(KTR_IGMPV3, "%s: ->REPORTING", __func__); 908 inm->inm_state = IGMP_REPORTING_MEMBER; 909 inm->inm_timer = IGMP_RANDOM_DELAY(timer); 910 V_current_state_timers_running = 1; 911 break; 912 case IGMP_SLEEPING_MEMBER: 913 CTR1(KTR_IGMPV3, "%s: ->AWAKENING", __func__); 914 inm->inm_state = IGMP_AWAKENING_MEMBER; 915 break; 916 case IGMP_LEAVING_MEMBER: 917 break; 918 } 919 } 920 921 /* 922 * Process a received IGMPv3 general, group-specific or 923 * group-and-source-specific query. 924 * Assumes m has already been pulled up to the full IGMP message length. 925 * Return 0 if successful, otherwise an appropriate error code is returned. 926 */ 927 static int 928 igmp_input_v3_query(struct ifnet *ifp, const struct ip *ip, 929 /*const*/ struct igmpv3 *igmpv3) 930 { 931 INIT_VNET_INET(ifp->if_vnet); 932 struct igmp_ifinfo *igi; 933 struct in_multi *inm; 934 uint32_t maxresp, nsrc, qqi; 935 uint16_t timer; 936 uint8_t qrv; 937 938 CTR2(KTR_IGMPV3, "process v3 query on ifp %p(%s)", ifp, ifp->if_xname); 939 940 maxresp = igmpv3->igmp_code; /* in 1/10ths of a second */ 941 if (maxresp >= 128) { 942 maxresp = IGMP_MANT(igmpv3->igmp_code) << 943 (IGMP_EXP(igmpv3->igmp_code) + 3); 944 } 945 946 /* 947 * Robustness must never be less than 2 for on-wire IGMPv3. 948 * FIXME: Check if ifp has IGIF_LOOPBACK set, as we make 949 * an exception for interfaces whose IGMPv3 state changes 950 * are redirected to loopback (e.g. MANET). 951 */ 952 qrv = IGMP_QRV(igmpv3->igmp_misc); 953 if (qrv < 2) { 954 CTR3(KTR_IGMPV3, "%s: clamping qrv %d to %d", __func__, 955 qrv, IGMP_RV_INIT); 956 qrv = IGMP_RV_INIT; 957 } 958 959 qqi = igmpv3->igmp_qqi; 960 if (qqi >= 128) { 961 qqi = IGMP_MANT(igmpv3->igmp_qqi) << 962 (IGMP_EXP(igmpv3->igmp_qqi) + 3); 963 } 964 965 timer = maxresp * PR_FASTHZ / IGMP_TIMER_SCALE; 966 if (timer == 0) 967 timer = 1; 968 969 nsrc = ntohs(igmpv3->igmp_numsrc); 970 971 IN_MULTI_LOCK(); 972 IGMP_LOCK(); 973 974 igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp; 975 KASSERT(igi != NULL, ("%s: no igmp_ifinfo for ifp %p", __func__, ifp)); 976 977 if (igi->igi_flags & IGIF_LOOPBACK) { 978 CTR2(KTR_IGMPV3, "ignore v3 query on IGIF_LOOPBACK ifp %p(%s)", 979 ifp, ifp->if_xname); 980 goto out_locked; 981 } 982 983 igmp_set_version(igi, IGMP_VERSION_3); 984 985 igi->igi_rv = qrv; 986 igi->igi_qi = qqi; 987 igi->igi_qri = maxresp; 988 989 CTR4(KTR_IGMPV3, "%s: qrv %d qi %d qri %d", __func__, qrv, qqi, 990 maxresp); 991 992 if (in_nullhost(igmpv3->igmp_group)) { 993 /* 994 * IGMPv3 General Query. 995 * Schedule a current-state report on this ifp for 996 * all groups, possibly containing source lists. 997 */ 998 IGMPSTAT_INC(igps_rcv_gen_queries); 999 1000 if (!in_allhosts(ip->ip_dst) || nsrc > 0) { 1001 /* 1002 * General Queries SHOULD be directed to 224.0.0.1. 1003 * A general query with a source list has undefined 1004 * behaviour; discard it. 1005 */ 1006 IGMPSTAT_INC(igps_rcv_badqueries); 1007 goto out_locked; 1008 } 1009 1010 CTR2(KTR_IGMPV3, "process v3 general query on ifp %p(%s)", 1011 ifp, ifp->if_xname); 1012 1013 /* 1014 * If there is a pending General Query response 1015 * scheduled earlier than the selected delay, do 1016 * not schedule any other reports. 1017 * Otherwise, reset the interface timer. 1018 */ 1019 if (igi->igi_v3_timer == 0 || igi->igi_v3_timer >= timer) { 1020 igi->igi_v3_timer = IGMP_RANDOM_DELAY(timer); 1021 V_interface_timers_running = 1; 1022 } 1023 } else { 1024 /* 1025 * IGMPv3 Group-specific or Group-and-source-specific Query. 1026 * 1027 * Group-source-specific queries are throttled on 1028 * a per-group basis to defeat denial-of-service attempts. 1029 * Queries for groups we are not a member of on this 1030 * link are simply ignored. 1031 */ 1032 inm = inm_lookup(ifp, igmpv3->igmp_group); 1033 if (inm == NULL) 1034 goto out_locked; 1035 if (nsrc > 0) { 1036 IGMPSTAT_INC(igps_rcv_gsr_queries); 1037 if (!ratecheck(&inm->inm_lastgsrtv, 1038 &V_igmp_gsrdelay)) { 1039 CTR1(KTR_IGMPV3, "%s: GS query throttled.", 1040 __func__); 1041 IGMPSTAT_INC(igps_drop_gsr_queries); 1042 goto out_locked; 1043 } 1044 } else { 1045 IGMPSTAT_INC(igps_rcv_group_queries); 1046 } 1047 CTR3(KTR_IGMPV3, "process v3 %s query on ifp %p(%s)", 1048 inet_ntoa(igmpv3->igmp_group), ifp, ifp->if_xname); 1049 /* 1050 * If there is a pending General Query response 1051 * scheduled sooner than the selected delay, no 1052 * further report need be scheduled. 1053 * Otherwise, prepare to respond to the 1054 * group-specific or group-and-source query. 1055 */ 1056 if (igi->igi_v3_timer == 0 || igi->igi_v3_timer >= timer) 1057 igmp_input_v3_group_query(inm, igi, timer, igmpv3); 1058 } 1059 1060 out_locked: 1061 IGMP_UNLOCK(); 1062 IN_MULTI_UNLOCK(); 1063 1064 return (0); 1065 } 1066 1067 /* 1068 * Process a recieved IGMPv3 group-specific or group-and-source-specific 1069 * query. 1070 * Return <0 if any error occured. Currently this is ignored. 1071 */ 1072 static int 1073 igmp_input_v3_group_query(struct in_multi *inm, struct igmp_ifinfo *igi, 1074 int timer, /*const*/ struct igmpv3 *igmpv3) 1075 { 1076 INIT_VNET_INET(curvnet); 1077 int retval; 1078 uint16_t nsrc; 1079 1080 IN_MULTI_LOCK_ASSERT(); 1081 IGMP_LOCK_ASSERT(); 1082 1083 retval = 0; 1084 1085 switch (inm->inm_state) { 1086 case IGMP_NOT_MEMBER: 1087 case IGMP_SILENT_MEMBER: 1088 case IGMP_SLEEPING_MEMBER: 1089 case IGMP_LAZY_MEMBER: 1090 case IGMP_AWAKENING_MEMBER: 1091 case IGMP_IDLE_MEMBER: 1092 case IGMP_LEAVING_MEMBER: 1093 return (retval); 1094 break; 1095 case IGMP_REPORTING_MEMBER: 1096 case IGMP_G_QUERY_PENDING_MEMBER: 1097 case IGMP_SG_QUERY_PENDING_MEMBER: 1098 break; 1099 } 1100 1101 nsrc = ntohs(igmpv3->igmp_numsrc); 1102 1103 /* 1104 * Deal with group-specific queries upfront. 1105 * If any group query is already pending, purge any recorded 1106 * source-list state if it exists, and schedule a query response 1107 * for this group-specific query. 1108 */ 1109 if (nsrc == 0) { 1110 if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER || 1111 inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) { 1112 inm_clear_recorded(inm); 1113 timer = min(inm->inm_timer, timer); 1114 } 1115 inm->inm_state = IGMP_G_QUERY_PENDING_MEMBER; 1116 inm->inm_timer = IGMP_RANDOM_DELAY(timer); 1117 V_current_state_timers_running = 1; 1118 return (retval); 1119 } 1120 1121 /* 1122 * Deal with the case where a group-and-source-specific query has 1123 * been received but a group-specific query is already pending. 1124 */ 1125 if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER) { 1126 timer = min(inm->inm_timer, timer); 1127 inm->inm_timer = IGMP_RANDOM_DELAY(timer); 1128 V_current_state_timers_running = 1; 1129 return (retval); 1130 } 1131 1132 /* 1133 * Finally, deal with the case where a group-and-source-specific 1134 * query has been received, where a response to a previous g-s-r 1135 * query exists, or none exists. 1136 * In this case, we need to parse the source-list which the Querier 1137 * has provided us with and check if we have any source list filter 1138 * entries at T1 for these sources. If we do not, there is no need 1139 * schedule a report and the query may be dropped. 1140 * If we do, we must record them and schedule a current-state 1141 * report for those sources. 1142 * FIXME: Handling source lists larger than 1 mbuf requires that 1143 * we pass the mbuf chain pointer down to this function, and use 1144 * m_getptr() to walk the chain. 1145 */ 1146 if (inm->inm_nsrc > 0) { 1147 const struct in_addr *ap; 1148 int i, nrecorded; 1149 1150 ap = (const struct in_addr *)(igmpv3 + 1); 1151 nrecorded = 0; 1152 for (i = 0; i < nsrc; i++, ap++) { 1153 retval = inm_record_source(inm, ap->s_addr); 1154 if (retval < 0) 1155 break; 1156 nrecorded += retval; 1157 } 1158 if (nrecorded > 0) { 1159 CTR1(KTR_IGMPV3, 1160 "%s: schedule response to SG query", __func__); 1161 inm->inm_state = IGMP_SG_QUERY_PENDING_MEMBER; 1162 inm->inm_timer = IGMP_RANDOM_DELAY(timer); 1163 V_current_state_timers_running = 1; 1164 } 1165 } 1166 1167 return (retval); 1168 } 1169 1170 /* 1171 * Process a received IGMPv1 host membership report. 1172 * 1173 * NOTE: 0.0.0.0 workaround breaks const correctness. 1174 */ 1175 static int 1176 igmp_input_v1_report(struct ifnet *ifp, /*const*/ struct ip *ip, 1177 /*const*/ struct igmp *igmp) 1178 { 1179 INIT_VNET_INET(ifp->if_vnet); 1180 struct in_ifaddr *ia; 1181 struct in_multi *inm; 1182 1183 IGMPSTAT_INC(igps_rcv_reports); 1184 1185 if (ifp->if_flags & IFF_LOOPBACK) 1186 return (0); 1187 1188 if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr) || 1189 !in_hosteq(igmp->igmp_group, ip->ip_dst))) { 1190 IGMPSTAT_INC(igps_rcv_badreports); 1191 return (EINVAL); 1192 } 1193 1194 /* 1195 * RFC 3376, Section 4.2.13, 9.2, 9.3: 1196 * Booting clients may use the source address 0.0.0.0. Some 1197 * IGMP daemons may not know how to use IP_RECVIF to determine 1198 * the interface upon which this message was received. 1199 * Replace 0.0.0.0 with the subnet address if told to do so. 1200 */ 1201 if (V_igmp_recvifkludge && in_nullhost(ip->ip_src)) { 1202 IFP_TO_IA(ifp, ia); 1203 if (ia != NULL) 1204 ip->ip_src.s_addr = htonl(ia->ia_subnet); 1205 } 1206 1207 CTR3(KTR_IGMPV3, "process v1 report %s on ifp %p(%s)", 1208 inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname); 1209 1210 /* 1211 * IGMPv1 report suppression. 1212 * If we are a member of this group, and our membership should be 1213 * reported, stop our group timer and transition to the 'lazy' state. 1214 */ 1215 IN_MULTI_LOCK(); 1216 inm = inm_lookup(ifp, igmp->igmp_group); 1217 if (inm != NULL) { 1218 struct igmp_ifinfo *igi; 1219 1220 igi = inm->inm_igi; 1221 if (igi == NULL) { 1222 KASSERT(igi != NULL, 1223 ("%s: no igi for ifp %p", __func__, ifp)); 1224 goto out_locked; 1225 } 1226 1227 IGMPSTAT_INC(igps_rcv_ourreports); 1228 1229 /* 1230 * If we are in IGMPv3 host mode, do not allow the 1231 * other host's IGMPv1 report to suppress our reports 1232 * unless explicitly configured to do so. 1233 */ 1234 if (igi->igi_version == IGMP_VERSION_3) { 1235 if (V_igmp_legacysupp) 1236 igmp_v3_suppress_group_record(inm); 1237 goto out_locked; 1238 } 1239 1240 inm->inm_timer = 0; 1241 1242 switch (inm->inm_state) { 1243 case IGMP_NOT_MEMBER: 1244 case IGMP_SILENT_MEMBER: 1245 break; 1246 case IGMP_IDLE_MEMBER: 1247 case IGMP_LAZY_MEMBER: 1248 case IGMP_AWAKENING_MEMBER: 1249 CTR3(KTR_IGMPV3, 1250 "report suppressed for %s on ifp %p(%s)", 1251 inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname); 1252 case IGMP_SLEEPING_MEMBER: 1253 inm->inm_state = IGMP_SLEEPING_MEMBER; 1254 break; 1255 case IGMP_REPORTING_MEMBER: 1256 CTR3(KTR_IGMPV3, 1257 "report suppressed for %s on ifp %p(%s)", 1258 inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname); 1259 if (igi->igi_version == IGMP_VERSION_1) 1260 inm->inm_state = IGMP_LAZY_MEMBER; 1261 else if (igi->igi_version == IGMP_VERSION_2) 1262 inm->inm_state = IGMP_SLEEPING_MEMBER; 1263 break; 1264 case IGMP_G_QUERY_PENDING_MEMBER: 1265 case IGMP_SG_QUERY_PENDING_MEMBER: 1266 case IGMP_LEAVING_MEMBER: 1267 break; 1268 } 1269 } 1270 1271 out_locked: 1272 IN_MULTI_UNLOCK(); 1273 1274 return (0); 1275 } 1276 1277 /* 1278 * Process a received IGMPv2 host membership report. 1279 * 1280 * NOTE: 0.0.0.0 workaround breaks const correctness. 1281 */ 1282 static int 1283 igmp_input_v2_report(struct ifnet *ifp, /*const*/ struct ip *ip, 1284 /*const*/ struct igmp *igmp) 1285 { 1286 INIT_VNET_INET(ifp->if_vnet); 1287 struct in_ifaddr *ia; 1288 struct in_multi *inm; 1289 1290 /* 1291 * Make sure we don't hear our own membership report. Fast 1292 * leave requires knowing that we are the only member of a 1293 * group. 1294 */ 1295 IFP_TO_IA(ifp, ia); 1296 if (ia != NULL && in_hosteq(ip->ip_src, IA_SIN(ia)->sin_addr)) 1297 return (0); 1298 1299 IGMPSTAT_INC(igps_rcv_reports); 1300 1301 if (ifp->if_flags & IFF_LOOPBACK) 1302 return (0); 1303 1304 if (!IN_MULTICAST(ntohl(igmp->igmp_group.s_addr)) || 1305 !in_hosteq(igmp->igmp_group, ip->ip_dst)) { 1306 IGMPSTAT_INC(igps_rcv_badreports); 1307 return (EINVAL); 1308 } 1309 1310 /* 1311 * RFC 3376, Section 4.2.13, 9.2, 9.3: 1312 * Booting clients may use the source address 0.0.0.0. Some 1313 * IGMP daemons may not know how to use IP_RECVIF to determine 1314 * the interface upon which this message was received. 1315 * Replace 0.0.0.0 with the subnet address if told to do so. 1316 */ 1317 if (V_igmp_recvifkludge && in_nullhost(ip->ip_src)) { 1318 if (ia != NULL) 1319 ip->ip_src.s_addr = htonl(ia->ia_subnet); 1320 } 1321 1322 CTR3(KTR_IGMPV3, "process v2 report %s on ifp %p(%s)", 1323 inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname); 1324 1325 /* 1326 * IGMPv2 report suppression. 1327 * If we are a member of this group, and our membership should be 1328 * reported, and our group timer is pending or about to be reset, 1329 * stop our group timer by transitioning to the 'lazy' state. 1330 */ 1331 IN_MULTI_LOCK(); 1332 inm = inm_lookup(ifp, igmp->igmp_group); 1333 if (inm != NULL) { 1334 struct igmp_ifinfo *igi; 1335 1336 igi = inm->inm_igi; 1337 KASSERT(igi != NULL, ("%s: no igi for ifp %p", __func__, ifp)); 1338 1339 IGMPSTAT_INC(igps_rcv_ourreports); 1340 1341 /* 1342 * If we are in IGMPv3 host mode, do not allow the 1343 * other host's IGMPv1 report to suppress our reports 1344 * unless explicitly configured to do so. 1345 */ 1346 if (igi->igi_version == IGMP_VERSION_3) { 1347 if (V_igmp_legacysupp) 1348 igmp_v3_suppress_group_record(inm); 1349 goto out_locked; 1350 } 1351 1352 inm->inm_timer = 0; 1353 1354 switch (inm->inm_state) { 1355 case IGMP_NOT_MEMBER: 1356 case IGMP_SILENT_MEMBER: 1357 case IGMP_SLEEPING_MEMBER: 1358 break; 1359 case IGMP_REPORTING_MEMBER: 1360 case IGMP_IDLE_MEMBER: 1361 case IGMP_AWAKENING_MEMBER: 1362 CTR3(KTR_IGMPV3, 1363 "report suppressed for %s on ifp %p(%s)", 1364 inet_ntoa(igmp->igmp_group), ifp, ifp->if_xname); 1365 case IGMP_LAZY_MEMBER: 1366 inm->inm_state = IGMP_LAZY_MEMBER; 1367 break; 1368 case IGMP_G_QUERY_PENDING_MEMBER: 1369 case IGMP_SG_QUERY_PENDING_MEMBER: 1370 case IGMP_LEAVING_MEMBER: 1371 break; 1372 } 1373 } 1374 1375 out_locked: 1376 IN_MULTI_UNLOCK(); 1377 1378 return (0); 1379 } 1380 1381 void 1382 igmp_input(struct mbuf *m, int off) 1383 { 1384 int iphlen; 1385 struct ifnet *ifp; 1386 struct igmp *igmp; 1387 struct ip *ip; 1388 int igmplen; 1389 int minlen; 1390 int queryver; 1391 1392 CTR3(KTR_IGMPV3, "%s: called w/mbuf (%p,%d)", __func__, m, off); 1393 1394 ifp = m->m_pkthdr.rcvif; 1395 INIT_VNET_INET(ifp->if_vnet); 1396 1397 IGMPSTAT_INC(igps_rcv_total); 1398 1399 ip = mtod(m, struct ip *); 1400 iphlen = off; 1401 igmplen = ip->ip_len; 1402 1403 /* 1404 * Validate lengths. 1405 */ 1406 if (igmplen < IGMP_MINLEN) { 1407 IGMPSTAT_INC(igps_rcv_tooshort); 1408 m_freem(m); 1409 return; 1410 } 1411 1412 /* 1413 * Always pullup to the minimum size for v1/v2 or v3 1414 * to amortize calls to m_pullup(). 1415 */ 1416 minlen = iphlen; 1417 if (igmplen >= IGMP_V3_QUERY_MINLEN) 1418 minlen += IGMP_V3_QUERY_MINLEN; 1419 else 1420 minlen += IGMP_MINLEN; 1421 if ((m->m_flags & M_EXT || m->m_len < minlen) && 1422 (m = m_pullup(m, minlen)) == 0) { 1423 IGMPSTAT_INC(igps_rcv_tooshort); 1424 return; 1425 } 1426 ip = mtod(m, struct ip *); 1427 1428 if (ip->ip_ttl != 1) { 1429 IGMPSTAT_INC(igps_rcv_badttl); 1430 m_freem(m); 1431 return; 1432 } 1433 1434 /* 1435 * Validate checksum. 1436 */ 1437 m->m_data += iphlen; 1438 m->m_len -= iphlen; 1439 igmp = mtod(m, struct igmp *); 1440 if (in_cksum(m, igmplen)) { 1441 IGMPSTAT_INC(igps_rcv_badsum); 1442 m_freem(m); 1443 return; 1444 } 1445 m->m_data -= iphlen; 1446 m->m_len += iphlen; 1447 1448 switch (igmp->igmp_type) { 1449 case IGMP_HOST_MEMBERSHIP_QUERY: 1450 if (igmplen == IGMP_MINLEN) { 1451 if (igmp->igmp_code == 0) 1452 queryver = IGMP_VERSION_1; 1453 else 1454 queryver = IGMP_VERSION_2; 1455 } else if (igmplen >= IGMP_V3_QUERY_MINLEN) { 1456 queryver = IGMP_VERSION_3; 1457 } else { 1458 IGMPSTAT_INC(igps_rcv_tooshort); 1459 m_freem(m); 1460 return; 1461 } 1462 1463 switch (queryver) { 1464 case IGMP_VERSION_1: 1465 IGMPSTAT_INC(igps_rcv_v1v2_queries); 1466 if (!V_igmp_v1enable) 1467 break; 1468 if (igmp_input_v1_query(ifp, ip) != 0) { 1469 m_freem(m); 1470 return; 1471 } 1472 break; 1473 1474 case IGMP_VERSION_2: 1475 IGMPSTAT_INC(igps_rcv_v1v2_queries); 1476 if (!V_igmp_v2enable) 1477 break; 1478 if (igmp_input_v2_query(ifp, ip, igmp) != 0) { 1479 m_freem(m); 1480 return; 1481 } 1482 break; 1483 1484 case IGMP_VERSION_3: { 1485 struct igmpv3 *igmpv3; 1486 uint16_t igmpv3len; 1487 uint16_t srclen; 1488 int nsrc; 1489 1490 IGMPSTAT_INC(igps_rcv_v3_queries); 1491 igmpv3 = (struct igmpv3 *)igmp; 1492 /* 1493 * Validate length based on source count. 1494 */ 1495 nsrc = ntohs(igmpv3->igmp_numsrc); 1496 srclen = sizeof(struct in_addr) * nsrc; 1497 if (nsrc * sizeof(in_addr_t) > srclen) { 1498 IGMPSTAT_INC(igps_rcv_tooshort); 1499 return; 1500 } 1501 /* 1502 * m_pullup() may modify m, so pullup in 1503 * this scope. 1504 */ 1505 igmpv3len = iphlen + IGMP_V3_QUERY_MINLEN + 1506 srclen; 1507 if ((m->m_flags & M_EXT || 1508 m->m_len < igmpv3len) && 1509 (m = m_pullup(m, igmpv3len)) == NULL) { 1510 IGMPSTAT_INC(igps_rcv_tooshort); 1511 return; 1512 } 1513 igmpv3 = (struct igmpv3 *)(mtod(m, uint8_t *) 1514 + iphlen); 1515 if (igmp_input_v3_query(ifp, ip, igmpv3) != 0) { 1516 m_freem(m); 1517 return; 1518 } 1519 } 1520 break; 1521 } 1522 break; 1523 1524 case IGMP_v1_HOST_MEMBERSHIP_REPORT: 1525 if (!V_igmp_v1enable) 1526 break; 1527 if (igmp_input_v1_report(ifp, ip, igmp) != 0) { 1528 m_freem(m); 1529 return; 1530 } 1531 break; 1532 1533 case IGMP_v2_HOST_MEMBERSHIP_REPORT: 1534 if (!V_igmp_v2enable) 1535 break; 1536 if (!ip_checkrouteralert(m)) 1537 IGMPSTAT_INC(igps_rcv_nora); 1538 if (igmp_input_v2_report(ifp, ip, igmp) != 0) { 1539 m_freem(m); 1540 return; 1541 } 1542 break; 1543 1544 case IGMP_v3_HOST_MEMBERSHIP_REPORT: 1545 /* 1546 * Hosts do not need to process IGMPv3 membership reports, 1547 * as report suppression is no longer required. 1548 */ 1549 if (!ip_checkrouteralert(m)) 1550 IGMPSTAT_INC(igps_rcv_nora); 1551 break; 1552 1553 default: 1554 break; 1555 } 1556 1557 /* 1558 * Pass all valid IGMP packets up to any process(es) listening on a 1559 * raw IGMP socket. 1560 */ 1561 rip_input(m, off); 1562 } 1563 1564 1565 /* 1566 * Fast timeout handler (global). 1567 * VIMAGE: Timeout handlers are expected to service all vimages. 1568 */ 1569 void 1570 igmp_fasttimo(void) 1571 { 1572 VNET_ITERATOR_DECL(vnet_iter); 1573 1574 VNET_LIST_RLOCK(); 1575 VNET_FOREACH(vnet_iter) { 1576 CURVNET_SET(vnet_iter); 1577 igmp_fasttimo_vnet(); 1578 CURVNET_RESTORE(); 1579 } 1580 VNET_LIST_RUNLOCK(); 1581 } 1582 1583 /* 1584 * Fast timeout handler (per-vnet). 1585 * Sends are shuffled off to a netisr to deal with Giant. 1586 * 1587 * VIMAGE: Assume caller has set up our curvnet. 1588 */ 1589 static void 1590 igmp_fasttimo_vnet(void) 1591 { 1592 INIT_VNET_INET(curvnet); 1593 struct ifqueue scq; /* State-change packets */ 1594 struct ifqueue qrq; /* Query response packets */ 1595 struct ifnet *ifp; 1596 struct igmp_ifinfo *igi; 1597 struct ifmultiaddr *ifma, *tifma; 1598 struct in_multi *inm; 1599 int loop, uri_fasthz; 1600 1601 loop = 0; 1602 uri_fasthz = 0; 1603 1604 /* 1605 * Quick check to see if any work needs to be done, in order to 1606 * minimize the overhead of fasttimo processing. 1607 * SMPng: XXX Unlocked reads. 1608 */ 1609 if (!V_current_state_timers_running && 1610 !V_interface_timers_running && 1611 !V_state_change_timers_running) 1612 return; 1613 1614 IN_MULTI_LOCK(); 1615 IGMP_LOCK(); 1616 1617 /* 1618 * IGMPv3 General Query response timer processing. 1619 */ 1620 if (V_interface_timers_running) { 1621 CTR1(KTR_IGMPV3, "%s: interface timers running", __func__); 1622 1623 V_interface_timers_running = 0; 1624 LIST_FOREACH(igi, &V_igi_head, igi_link) { 1625 if (igi->igi_v3_timer == 0) { 1626 /* Do nothing. */ 1627 } else if (--igi->igi_v3_timer == 0) { 1628 igmp_v3_dispatch_general_query(igi); 1629 } else { 1630 V_interface_timers_running = 1; 1631 } 1632 } 1633 } 1634 1635 if (!V_current_state_timers_running && 1636 !V_state_change_timers_running) 1637 goto out_locked; 1638 1639 V_current_state_timers_running = 0; 1640 V_state_change_timers_running = 0; 1641 1642 CTR1(KTR_IGMPV3, "%s: state change timers running", __func__); 1643 1644 /* 1645 * IGMPv1/v2/v3 host report and state-change timer processing. 1646 * Note: Processing a v3 group timer may remove a node. 1647 */ 1648 LIST_FOREACH(igi, &V_igi_head, igi_link) { 1649 ifp = igi->igi_ifp; 1650 1651 if (igi->igi_version == IGMP_VERSION_3) { 1652 loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0; 1653 uri_fasthz = IGMP_RANDOM_DELAY(igi->igi_uri * 1654 PR_FASTHZ); 1655 1656 memset(&qrq, 0, sizeof(struct ifqueue)); 1657 IFQ_SET_MAXLEN(&qrq, IGMP_MAX_G_GS_PACKETS); 1658 1659 memset(&scq, 0, sizeof(struct ifqueue)); 1660 IFQ_SET_MAXLEN(&scq, IGMP_MAX_STATE_CHANGE_PACKETS); 1661 } 1662 1663 IF_ADDR_LOCK(ifp); 1664 TAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, 1665 tifma) { 1666 if (ifma->ifma_addr->sa_family != AF_INET || 1667 ifma->ifma_protospec == NULL) 1668 continue; 1669 inm = (struct in_multi *)ifma->ifma_protospec; 1670 switch (igi->igi_version) { 1671 case IGMP_VERSION_1: 1672 case IGMP_VERSION_2: 1673 igmp_v1v2_process_group_timer(inm, 1674 igi->igi_version); 1675 break; 1676 case IGMP_VERSION_3: 1677 igmp_v3_process_group_timers(igi, &qrq, 1678 &scq, inm, uri_fasthz); 1679 break; 1680 } 1681 } 1682 IF_ADDR_UNLOCK(ifp); 1683 1684 if (igi->igi_version == IGMP_VERSION_3) { 1685 struct in_multi *tinm; 1686 1687 igmp_dispatch_queue(&qrq, 0, loop); 1688 igmp_dispatch_queue(&scq, 0, loop); 1689 1690 /* 1691 * Free the in_multi reference(s) for this 1692 * IGMP lifecycle. 1693 */ 1694 SLIST_FOREACH_SAFE(inm, &igi->igi_relinmhead, 1695 inm_nrele, tinm) { 1696 SLIST_REMOVE_HEAD(&igi->igi_relinmhead, 1697 inm_nrele); 1698 inm_release_locked(inm); 1699 } 1700 } 1701 } 1702 1703 out_locked: 1704 IGMP_UNLOCK(); 1705 IN_MULTI_UNLOCK(); 1706 } 1707 1708 /* 1709 * Update host report group timer for IGMPv1/v2. 1710 * Will update the global pending timer flags. 1711 */ 1712 static void 1713 igmp_v1v2_process_group_timer(struct in_multi *inm, const int version) 1714 { 1715 INIT_VNET_INET(curvnet); 1716 int report_timer_expired; 1717 1718 IN_MULTI_LOCK_ASSERT(); 1719 IGMP_LOCK_ASSERT(); 1720 1721 if (inm->inm_timer == 0) { 1722 report_timer_expired = 0; 1723 } else if (--inm->inm_timer == 0) { 1724 report_timer_expired = 1; 1725 } else { 1726 V_current_state_timers_running = 1; 1727 return; 1728 } 1729 1730 switch (inm->inm_state) { 1731 case IGMP_NOT_MEMBER: 1732 case IGMP_SILENT_MEMBER: 1733 case IGMP_IDLE_MEMBER: 1734 case IGMP_LAZY_MEMBER: 1735 case IGMP_SLEEPING_MEMBER: 1736 case IGMP_AWAKENING_MEMBER: 1737 break; 1738 case IGMP_REPORTING_MEMBER: 1739 if (report_timer_expired) { 1740 inm->inm_state = IGMP_IDLE_MEMBER; 1741 (void)igmp_v1v2_queue_report(inm, 1742 (version == IGMP_VERSION_2) ? 1743 IGMP_v2_HOST_MEMBERSHIP_REPORT : 1744 IGMP_v1_HOST_MEMBERSHIP_REPORT); 1745 } 1746 break; 1747 case IGMP_G_QUERY_PENDING_MEMBER: 1748 case IGMP_SG_QUERY_PENDING_MEMBER: 1749 case IGMP_LEAVING_MEMBER: 1750 break; 1751 } 1752 } 1753 1754 /* 1755 * Update a group's timers for IGMPv3. 1756 * Will update the global pending timer flags. 1757 * Note: Unlocked read from igi. 1758 */ 1759 static void 1760 igmp_v3_process_group_timers(struct igmp_ifinfo *igi, 1761 struct ifqueue *qrq, struct ifqueue *scq, 1762 struct in_multi *inm, const int uri_fasthz) 1763 { 1764 INIT_VNET_INET(curvnet); 1765 int query_response_timer_expired; 1766 int state_change_retransmit_timer_expired; 1767 1768 IN_MULTI_LOCK_ASSERT(); 1769 IGMP_LOCK_ASSERT(); 1770 1771 query_response_timer_expired = 0; 1772 state_change_retransmit_timer_expired = 0; 1773 1774 /* 1775 * During a transition from v1/v2 compatibility mode back to v3, 1776 * a group record in REPORTING state may still have its group 1777 * timer active. This is a no-op in this function; it is easier 1778 * to deal with it here than to complicate the slow-timeout path. 1779 */ 1780 if (inm->inm_timer == 0) { 1781 query_response_timer_expired = 0; 1782 } else if (--inm->inm_timer == 0) { 1783 query_response_timer_expired = 1; 1784 } else { 1785 V_current_state_timers_running = 1; 1786 } 1787 1788 if (inm->inm_sctimer == 0) { 1789 state_change_retransmit_timer_expired = 0; 1790 } else if (--inm->inm_sctimer == 0) { 1791 state_change_retransmit_timer_expired = 1; 1792 } else { 1793 V_state_change_timers_running = 1; 1794 } 1795 1796 /* We are in fasttimo, so be quick about it. */ 1797 if (!state_change_retransmit_timer_expired && 1798 !query_response_timer_expired) 1799 return; 1800 1801 switch (inm->inm_state) { 1802 case IGMP_NOT_MEMBER: 1803 case IGMP_SILENT_MEMBER: 1804 case IGMP_SLEEPING_MEMBER: 1805 case IGMP_LAZY_MEMBER: 1806 case IGMP_AWAKENING_MEMBER: 1807 case IGMP_IDLE_MEMBER: 1808 break; 1809 case IGMP_G_QUERY_PENDING_MEMBER: 1810 case IGMP_SG_QUERY_PENDING_MEMBER: 1811 /* 1812 * Respond to a previously pending Group-Specific 1813 * or Group-and-Source-Specific query by enqueueing 1814 * the appropriate Current-State report for 1815 * immediate transmission. 1816 */ 1817 if (query_response_timer_expired) { 1818 int retval; 1819 1820 retval = igmp_v3_enqueue_group_record(qrq, inm, 0, 1, 1821 (inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER)); 1822 CTR2(KTR_IGMPV3, "%s: enqueue record = %d", 1823 __func__, retval); 1824 inm->inm_state = IGMP_REPORTING_MEMBER; 1825 /* XXX Clear recorded sources for next time. */ 1826 inm_clear_recorded(inm); 1827 } 1828 /* FALLTHROUGH */ 1829 case IGMP_REPORTING_MEMBER: 1830 case IGMP_LEAVING_MEMBER: 1831 if (state_change_retransmit_timer_expired) { 1832 /* 1833 * State-change retransmission timer fired. 1834 * If there are any further pending retransmissions, 1835 * set the global pending state-change flag, and 1836 * reset the timer. 1837 */ 1838 if (--inm->inm_scrv > 0) { 1839 inm->inm_sctimer = uri_fasthz; 1840 V_state_change_timers_running = 1; 1841 } 1842 /* 1843 * Retransmit the previously computed state-change 1844 * report. If there are no further pending 1845 * retransmissions, the mbuf queue will be consumed. 1846 * Update T0 state to T1 as we have now sent 1847 * a state-change. 1848 */ 1849 (void)igmp_v3_merge_state_changes(inm, scq); 1850 1851 inm_commit(inm); 1852 CTR3(KTR_IGMPV3, "%s: T1 -> T0 for %s/%s", __func__, 1853 inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname); 1854 1855 /* 1856 * If we are leaving the group for good, make sure 1857 * we release IGMP's reference to it. 1858 * This release must be deferred using a SLIST, 1859 * as we are called from a loop which traverses 1860 * the in_ifmultiaddr TAILQ. 1861 */ 1862 if (inm->inm_state == IGMP_LEAVING_MEMBER && 1863 inm->inm_scrv == 0) { 1864 inm->inm_state = IGMP_NOT_MEMBER; 1865 SLIST_INSERT_HEAD(&igi->igi_relinmhead, 1866 inm, inm_nrele); 1867 } 1868 } 1869 break; 1870 } 1871 } 1872 1873 1874 /* 1875 * Suppress a group's pending response to a group or source/group query. 1876 * 1877 * Do NOT suppress state changes. This leads to IGMPv3 inconsistency. 1878 * Do NOT update ST1/ST0 as this operation merely suppresses 1879 * the currently pending group record. 1880 * Do NOT suppress the response to a general query. It is possible but 1881 * it would require adding another state or flag. 1882 */ 1883 static void 1884 igmp_v3_suppress_group_record(struct in_multi *inm) 1885 { 1886 1887 IN_MULTI_LOCK_ASSERT(); 1888 1889 KASSERT(inm->inm_igi->igi_version == IGMP_VERSION_3, 1890 ("%s: not IGMPv3 mode on link", __func__)); 1891 1892 if (inm->inm_state != IGMP_G_QUERY_PENDING_MEMBER || 1893 inm->inm_state != IGMP_SG_QUERY_PENDING_MEMBER) 1894 return; 1895 1896 if (inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) 1897 inm_clear_recorded(inm); 1898 1899 inm->inm_timer = 0; 1900 inm->inm_state = IGMP_REPORTING_MEMBER; 1901 } 1902 1903 /* 1904 * Switch to a different IGMP version on the given interface, 1905 * as per Section 7.2.1. 1906 */ 1907 static void 1908 igmp_set_version(struct igmp_ifinfo *igi, const int version) 1909 { 1910 1911 IGMP_LOCK_ASSERT(); 1912 1913 CTR4(KTR_IGMPV3, "%s: switching to v%d on ifp %p(%s)", __func__, 1914 version, igi->igi_ifp, igi->igi_ifp->if_xname); 1915 1916 if (version == IGMP_VERSION_1 || version == IGMP_VERSION_2) { 1917 int old_version_timer; 1918 /* 1919 * Compute the "Older Version Querier Present" timer as per 1920 * Section 8.12. 1921 */ 1922 old_version_timer = igi->igi_rv * igi->igi_qi + igi->igi_qri; 1923 old_version_timer *= PR_SLOWHZ; 1924 1925 if (version == IGMP_VERSION_1) { 1926 igi->igi_v1_timer = old_version_timer; 1927 igi->igi_v2_timer = 0; 1928 } else if (version == IGMP_VERSION_2) { 1929 igi->igi_v1_timer = 0; 1930 igi->igi_v2_timer = old_version_timer; 1931 } 1932 } 1933 1934 if (igi->igi_v1_timer == 0 && igi->igi_v2_timer > 0) { 1935 if (igi->igi_version != IGMP_VERSION_2) { 1936 igi->igi_version = IGMP_VERSION_2; 1937 igmp_v3_cancel_link_timers(igi); 1938 } 1939 } else if (igi->igi_v1_timer > 0) { 1940 if (igi->igi_version != IGMP_VERSION_1) { 1941 igi->igi_version = IGMP_VERSION_1; 1942 igmp_v3_cancel_link_timers(igi); 1943 } 1944 } 1945 } 1946 1947 /* 1948 * Cancel pending IGMPv3 timers for the given link and all groups 1949 * joined on it; state-change, general-query, and group-query timers. 1950 */ 1951 static void 1952 igmp_v3_cancel_link_timers(struct igmp_ifinfo *igi) 1953 { 1954 INIT_VNET_INET(curvnet); 1955 struct ifmultiaddr *ifma; 1956 struct ifnet *ifp; 1957 struct in_multi *inm; 1958 1959 CTR3(KTR_IGMPV3, "%s: cancel v3 timers on ifp %p(%s)", __func__, 1960 igi->igi_ifp, igi->igi_ifp->if_xname); 1961 1962 IN_MULTI_LOCK_ASSERT(); 1963 IGMP_LOCK_ASSERT(); 1964 1965 /* 1966 * Fast-track this potentially expensive operation 1967 * by checking all the global 'timer pending' flags. 1968 */ 1969 if (!V_interface_timers_running && 1970 !V_state_change_timers_running && 1971 !V_current_state_timers_running) 1972 return; 1973 1974 igi->igi_v3_timer = 0; 1975 1976 ifp = igi->igi_ifp; 1977 1978 IF_ADDR_LOCK(ifp); 1979 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 1980 if (ifma->ifma_addr->sa_family != AF_INET) 1981 continue; 1982 inm = (struct in_multi *)ifma->ifma_protospec; 1983 switch (inm->inm_state) { 1984 case IGMP_NOT_MEMBER: 1985 case IGMP_SILENT_MEMBER: 1986 case IGMP_IDLE_MEMBER: 1987 case IGMP_LAZY_MEMBER: 1988 case IGMP_SLEEPING_MEMBER: 1989 case IGMP_AWAKENING_MEMBER: 1990 break; 1991 case IGMP_LEAVING_MEMBER: 1992 /* 1993 * If we are leaving the group and switching 1994 * IGMP version, we need to release the final 1995 * reference held for issuing the INCLUDE {}. 1996 * 1997 * SMPNG: Must drop and re-acquire IF_ADDR_LOCK 1998 * around inm_release_locked(), as it is not 1999 * a recursive mutex. 2000 */ 2001 IF_ADDR_UNLOCK(ifp); 2002 inm_release_locked(inm); 2003 IF_ADDR_LOCK(ifp); 2004 /* FALLTHROUGH */ 2005 case IGMP_G_QUERY_PENDING_MEMBER: 2006 case IGMP_SG_QUERY_PENDING_MEMBER: 2007 inm_clear_recorded(inm); 2008 /* FALLTHROUGH */ 2009 case IGMP_REPORTING_MEMBER: 2010 inm->inm_sctimer = 0; 2011 inm->inm_timer = 0; 2012 inm->inm_state = IGMP_REPORTING_MEMBER; 2013 /* 2014 * Free any pending IGMPv3 state-change records. 2015 */ 2016 _IF_DRAIN(&inm->inm_scq); 2017 break; 2018 } 2019 } 2020 IF_ADDR_UNLOCK(ifp); 2021 } 2022 2023 /* 2024 * Update the Older Version Querier Present timers for a link. 2025 * See Section 7.2.1 of RFC 3376. 2026 */ 2027 static void 2028 igmp_v1v2_process_querier_timers(struct igmp_ifinfo *igi) 2029 { 2030 INIT_VNET_INET(curvnet); 2031 2032 IGMP_LOCK_ASSERT(); 2033 2034 if (igi->igi_v1_timer == 0 && igi->igi_v2_timer == 0) { 2035 /* 2036 * IGMPv1 and IGMPv2 Querier Present timers expired. 2037 * 2038 * Revert to IGMPv3. 2039 */ 2040 if (igi->igi_version != IGMP_VERSION_3) { 2041 CTR5(KTR_IGMPV3, 2042 "%s: transition from v%d -> v%d on %p(%s)", 2043 __func__, igi->igi_version, IGMP_VERSION_3, 2044 igi->igi_ifp, igi->igi_ifp->if_xname); 2045 igi->igi_version = IGMP_VERSION_3; 2046 } 2047 } else if (igi->igi_v1_timer == 0 && igi->igi_v2_timer > 0) { 2048 /* 2049 * IGMPv1 Querier Present timer expired, 2050 * IGMPv2 Querier Present timer running. 2051 * If IGMPv2 was disabled since last timeout, 2052 * revert to IGMPv3. 2053 * If IGMPv2 is enabled, revert to IGMPv2. 2054 */ 2055 if (!V_igmp_v2enable) { 2056 CTR5(KTR_IGMPV3, 2057 "%s: transition from v%d -> v%d on %p(%s)", 2058 __func__, igi->igi_version, IGMP_VERSION_3, 2059 igi->igi_ifp, igi->igi_ifp->if_xname); 2060 igi->igi_v2_timer = 0; 2061 igi->igi_version = IGMP_VERSION_3; 2062 } else { 2063 --igi->igi_v2_timer; 2064 if (igi->igi_version != IGMP_VERSION_2) { 2065 CTR5(KTR_IGMPV3, 2066 "%s: transition from v%d -> v%d on %p(%s)", 2067 __func__, igi->igi_version, IGMP_VERSION_2, 2068 igi->igi_ifp, igi->igi_ifp->if_xname); 2069 igi->igi_version = IGMP_VERSION_2; 2070 } 2071 } 2072 } else if (igi->igi_v1_timer > 0) { 2073 /* 2074 * IGMPv1 Querier Present timer running. 2075 * Stop IGMPv2 timer if running. 2076 * 2077 * If IGMPv1 was disabled since last timeout, 2078 * revert to IGMPv3. 2079 * If IGMPv1 is enabled, reset IGMPv2 timer if running. 2080 */ 2081 if (!V_igmp_v1enable) { 2082 CTR5(KTR_IGMPV3, 2083 "%s: transition from v%d -> v%d on %p(%s)", 2084 __func__, igi->igi_version, IGMP_VERSION_3, 2085 igi->igi_ifp, igi->igi_ifp->if_xname); 2086 igi->igi_v1_timer = 0; 2087 igi->igi_version = IGMP_VERSION_3; 2088 } else { 2089 --igi->igi_v1_timer; 2090 } 2091 if (igi->igi_v2_timer > 0) { 2092 CTR3(KTR_IGMPV3, 2093 "%s: cancel v2 timer on %p(%s)", 2094 __func__, igi->igi_ifp, igi->igi_ifp->if_xname); 2095 igi->igi_v2_timer = 0; 2096 } 2097 } 2098 } 2099 2100 /* 2101 * Global slowtimo handler. 2102 * VIMAGE: Timeout handlers are expected to service all vimages. 2103 */ 2104 void 2105 igmp_slowtimo(void) 2106 { 2107 VNET_ITERATOR_DECL(vnet_iter); 2108 2109 VNET_LIST_RLOCK(); 2110 VNET_FOREACH(vnet_iter) { 2111 CURVNET_SET(vnet_iter); 2112 igmp_slowtimo_vnet(); 2113 CURVNET_RESTORE(); 2114 } 2115 VNET_LIST_RUNLOCK(); 2116 } 2117 2118 /* 2119 * Per-vnet slowtimo handler. 2120 */ 2121 static void 2122 igmp_slowtimo_vnet(void) 2123 { 2124 INIT_VNET_INET(curvnet); 2125 struct igmp_ifinfo *igi; 2126 2127 IGMP_LOCK(); 2128 2129 LIST_FOREACH(igi, &V_igi_head, igi_link) { 2130 igmp_v1v2_process_querier_timers(igi); 2131 } 2132 2133 IGMP_UNLOCK(); 2134 } 2135 2136 /* 2137 * Dispatch an IGMPv1/v2 host report or leave message. 2138 * These are always small enough to fit inside a single mbuf. 2139 */ 2140 static int 2141 igmp_v1v2_queue_report(struct in_multi *inm, const int type) 2142 { 2143 struct ifnet *ifp; 2144 struct igmp *igmp; 2145 struct ip *ip; 2146 struct mbuf *m; 2147 2148 IN_MULTI_LOCK_ASSERT(); 2149 IGMP_LOCK_ASSERT(); 2150 2151 ifp = inm->inm_ifp; 2152 2153 MGETHDR(m, M_DONTWAIT, MT_DATA); 2154 if (m == NULL) 2155 return (ENOMEM); 2156 MH_ALIGN(m, sizeof(struct ip) + sizeof(struct igmp)); 2157 2158 m->m_pkthdr.len = sizeof(struct ip) + sizeof(struct igmp); 2159 2160 m->m_data += sizeof(struct ip); 2161 m->m_len = sizeof(struct igmp); 2162 2163 igmp = mtod(m, struct igmp *); 2164 igmp->igmp_type = type; 2165 igmp->igmp_code = 0; 2166 igmp->igmp_group = inm->inm_addr; 2167 igmp->igmp_cksum = 0; 2168 igmp->igmp_cksum = in_cksum(m, sizeof(struct igmp)); 2169 2170 m->m_data -= sizeof(struct ip); 2171 m->m_len += sizeof(struct ip); 2172 2173 ip = mtod(m, struct ip *); 2174 ip->ip_tos = 0; 2175 ip->ip_len = sizeof(struct ip) + sizeof(struct igmp); 2176 ip->ip_off = 0; 2177 ip->ip_p = IPPROTO_IGMP; 2178 ip->ip_src.s_addr = INADDR_ANY; 2179 2180 if (type == IGMP_HOST_LEAVE_MESSAGE) 2181 ip->ip_dst.s_addr = htonl(INADDR_ALLRTRS_GROUP); 2182 else 2183 ip->ip_dst = inm->inm_addr; 2184 2185 igmp_save_context(m, ifp); 2186 2187 m->m_flags |= M_IGMPV2; 2188 if (inm->inm_igi->igi_flags & IGIF_LOOPBACK) 2189 m->m_flags |= M_IGMP_LOOP; 2190 2191 CTR2(KTR_IGMPV3, "%s: netisr_dispatch(NETISR_IGMP, %p)", __func__, m); 2192 netisr_dispatch(NETISR_IGMP, m); 2193 2194 return (0); 2195 } 2196 2197 /* 2198 * Process a state change from the upper layer for the given IPv4 group. 2199 * 2200 * Each socket holds a reference on the in_multi in its own ip_moptions. 2201 * The socket layer will have made the necessary updates to.the group 2202 * state, it is now up to IGMP to issue a state change report if there 2203 * has been any change between T0 (when the last state-change was issued) 2204 * and T1 (now). 2205 * 2206 * We use the IGMPv3 state machine at group level. The IGMP module 2207 * however makes the decision as to which IGMP protocol version to speak. 2208 * A state change *from* INCLUDE {} always means an initial join. 2209 * A state change *to* INCLUDE {} always means a final leave. 2210 * 2211 * FUTURE: If IGIF_V3LITE is enabled for this interface, then we can 2212 * save ourselves a bunch of work; any exclusive mode groups need not 2213 * compute source filter lists. 2214 * 2215 * VIMAGE: curvnet should have been set by caller, as this routine 2216 * is called from the socket option handlers. 2217 */ 2218 int 2219 igmp_change_state(struct in_multi *inm) 2220 { 2221 struct igmp_ifinfo *igi; 2222 struct ifnet *ifp; 2223 int error; 2224 2225 IN_MULTI_LOCK_ASSERT(); 2226 2227 error = 0; 2228 2229 /* 2230 * Try to detect if the upper layer just asked us to change state 2231 * for an interface which has now gone away. 2232 */ 2233 KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__)); 2234 ifp = inm->inm_ifma->ifma_ifp; 2235 if (ifp != NULL) { 2236 /* 2237 * Sanity check that netinet's notion of ifp is the 2238 * same as net's. 2239 */ 2240 KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__)); 2241 } 2242 2243 IGMP_LOCK(); 2244 2245 igi = ((struct in_ifinfo *)ifp->if_afdata[AF_INET])->ii_igmp; 2246 KASSERT(igi != NULL, ("%s: no igmp_ifinfo for ifp %p", __func__, ifp)); 2247 2248 /* 2249 * If we detect a state transition to or from MCAST_UNDEFINED 2250 * for this group, then we are starting or finishing an IGMP 2251 * life cycle for this group. 2252 */ 2253 if (inm->inm_st[1].iss_fmode != inm->inm_st[0].iss_fmode) { 2254 CTR3(KTR_IGMPV3, "%s: inm transition %d -> %d", __func__, 2255 inm->inm_st[0].iss_fmode, inm->inm_st[1].iss_fmode); 2256 if (inm->inm_st[0].iss_fmode == MCAST_UNDEFINED) { 2257 CTR1(KTR_IGMPV3, "%s: initial join", __func__); 2258 error = igmp_initial_join(inm, igi); 2259 goto out_locked; 2260 } else if (inm->inm_st[1].iss_fmode == MCAST_UNDEFINED) { 2261 CTR1(KTR_IGMPV3, "%s: final leave", __func__); 2262 igmp_final_leave(inm, igi); 2263 goto out_locked; 2264 } 2265 } else { 2266 CTR1(KTR_IGMPV3, "%s: filter set change", __func__); 2267 } 2268 2269 error = igmp_handle_state_change(inm, igi); 2270 2271 out_locked: 2272 IGMP_UNLOCK(); 2273 return (error); 2274 } 2275 2276 /* 2277 * Perform the initial join for an IGMP group. 2278 * 2279 * When joining a group: 2280 * If the group should have its IGMP traffic suppressed, do nothing. 2281 * IGMPv1 starts sending IGMPv1 host membership reports. 2282 * IGMPv2 starts sending IGMPv2 host membership reports. 2283 * IGMPv3 will schedule an IGMPv3 state-change report containing the 2284 * initial state of the membership. 2285 */ 2286 static int 2287 igmp_initial_join(struct in_multi *inm, struct igmp_ifinfo *igi) 2288 { 2289 INIT_VNET_INET(curvnet); 2290 struct ifnet *ifp; 2291 struct ifqueue *ifq; 2292 int error, retval, syncstates; 2293 2294 CTR4(KTR_IGMPV3, "%s: initial join %s on ifp %p(%s)", 2295 __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp, 2296 inm->inm_ifp->if_xname); 2297 2298 error = 0; 2299 syncstates = 1; 2300 2301 ifp = inm->inm_ifp; 2302 2303 IN_MULTI_LOCK_ASSERT(); 2304 IGMP_LOCK_ASSERT(); 2305 2306 KASSERT(igi && igi->igi_ifp == ifp, ("%s: inconsistent ifp", __func__)); 2307 2308 /* 2309 * Groups joined on loopback or marked as 'not reported', 2310 * e.g. 224.0.0.1, enter the IGMP_SILENT_MEMBER state and 2311 * are never reported in any IGMP protocol exchanges. 2312 * All other groups enter the appropriate IGMP state machine 2313 * for the version in use on this link. 2314 * A link marked as IGIF_SILENT causes IGMP to be completely 2315 * disabled for the link. 2316 */ 2317 if ((ifp->if_flags & IFF_LOOPBACK) || 2318 (igi->igi_flags & IGIF_SILENT) || 2319 !igmp_isgroupreported(inm->inm_addr)) { 2320 CTR1(KTR_IGMPV3, 2321 "%s: not kicking state machine for silent group", __func__); 2322 inm->inm_state = IGMP_SILENT_MEMBER; 2323 inm->inm_timer = 0; 2324 } else { 2325 /* 2326 * Deal with overlapping in_multi lifecycle. 2327 * If this group was LEAVING, then make sure 2328 * we drop the reference we picked up to keep the 2329 * group around for the final INCLUDE {} enqueue. 2330 */ 2331 if (igi->igi_version == IGMP_VERSION_3 && 2332 inm->inm_state == IGMP_LEAVING_MEMBER) 2333 inm_release_locked(inm); 2334 2335 inm->inm_state = IGMP_REPORTING_MEMBER; 2336 2337 switch (igi->igi_version) { 2338 case IGMP_VERSION_1: 2339 case IGMP_VERSION_2: 2340 inm->inm_state = IGMP_IDLE_MEMBER; 2341 error = igmp_v1v2_queue_report(inm, 2342 (igi->igi_version == IGMP_VERSION_2) ? 2343 IGMP_v2_HOST_MEMBERSHIP_REPORT : 2344 IGMP_v1_HOST_MEMBERSHIP_REPORT); 2345 if (error == 0) { 2346 inm->inm_timer = IGMP_RANDOM_DELAY( 2347 IGMP_V1V2_MAX_RI * PR_FASTHZ); 2348 V_current_state_timers_running = 1; 2349 } 2350 break; 2351 2352 case IGMP_VERSION_3: 2353 /* 2354 * Defer update of T0 to T1, until the first copy 2355 * of the state change has been transmitted. 2356 */ 2357 syncstates = 0; 2358 2359 /* 2360 * Immediately enqueue a State-Change Report for 2361 * this interface, freeing any previous reports. 2362 * Don't kick the timers if there is nothing to do, 2363 * or if an error occurred. 2364 */ 2365 ifq = &inm->inm_scq; 2366 _IF_DRAIN(ifq); 2367 retval = igmp_v3_enqueue_group_record(ifq, inm, 1, 2368 0, 0); 2369 CTR2(KTR_IGMPV3, "%s: enqueue record = %d", 2370 __func__, retval); 2371 if (retval <= 0) { 2372 error = retval * -1; 2373 break; 2374 } 2375 2376 /* 2377 * Schedule transmission of pending state-change 2378 * report up to RV times for this link. The timer 2379 * will fire at the next igmp_fasttimo (~200ms), 2380 * giving us an opportunity to merge the reports. 2381 */ 2382 if (igi->igi_flags & IGIF_LOOPBACK) { 2383 inm->inm_scrv = 1; 2384 } else { 2385 KASSERT(igi->igi_rv > 1, 2386 ("%s: invalid robustness %d", __func__, 2387 igi->igi_rv)); 2388 inm->inm_scrv = igi->igi_rv; 2389 } 2390 inm->inm_sctimer = 1; 2391 V_state_change_timers_running = 1; 2392 2393 error = 0; 2394 break; 2395 } 2396 } 2397 2398 /* 2399 * Only update the T0 state if state change is atomic, 2400 * i.e. we don't need to wait for a timer to fire before we 2401 * can consider the state change to have been communicated. 2402 */ 2403 if (syncstates) { 2404 inm_commit(inm); 2405 CTR3(KTR_IGMPV3, "%s: T1 -> T0 for %s/%s", __func__, 2406 inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname); 2407 } 2408 2409 return (error); 2410 } 2411 2412 /* 2413 * Issue an intermediate state change during the IGMP life-cycle. 2414 */ 2415 static int 2416 igmp_handle_state_change(struct in_multi *inm, struct igmp_ifinfo *igi) 2417 { 2418 INIT_VNET_INET(curvnet); 2419 struct ifnet *ifp; 2420 int retval; 2421 2422 CTR4(KTR_IGMPV3, "%s: state change for %s on ifp %p(%s)", 2423 __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp, 2424 inm->inm_ifp->if_xname); 2425 2426 ifp = inm->inm_ifp; 2427 2428 IN_MULTI_LOCK_ASSERT(); 2429 IGMP_LOCK_ASSERT(); 2430 2431 KASSERT(igi && igi->igi_ifp == ifp, ("%s: inconsistent ifp", __func__)); 2432 2433 if ((ifp->if_flags & IFF_LOOPBACK) || 2434 (igi->igi_flags & IGIF_SILENT) || 2435 !igmp_isgroupreported(inm->inm_addr) || 2436 (igi->igi_version != IGMP_VERSION_3)) { 2437 if (!igmp_isgroupreported(inm->inm_addr)) { 2438 CTR1(KTR_IGMPV3, 2439 "%s: not kicking state machine for silent group", __func__); 2440 } 2441 CTR1(KTR_IGMPV3, "%s: nothing to do", __func__); 2442 inm_commit(inm); 2443 CTR3(KTR_IGMPV3, "%s: T1 -> T0 for %s/%s", __func__, 2444 inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname); 2445 return (0); 2446 } 2447 2448 _IF_DRAIN(&inm->inm_scq); 2449 2450 retval = igmp_v3_enqueue_group_record(&inm->inm_scq, inm, 1, 0, 0); 2451 CTR2(KTR_IGMPV3, "%s: enqueue record = %d", __func__, retval); 2452 if (retval <= 0) 2453 return (-retval); 2454 2455 /* 2456 * If record(s) were enqueued, start the state-change 2457 * report timer for this group. 2458 */ 2459 inm->inm_scrv = ((igi->igi_flags & IGIF_LOOPBACK) ? 1 : igi->igi_rv); 2460 inm->inm_sctimer = 1; 2461 V_state_change_timers_running = 1; 2462 2463 return (0); 2464 } 2465 2466 /* 2467 * Perform the final leave for an IGMP group. 2468 * 2469 * When leaving a group: 2470 * IGMPv1 does nothing. 2471 * IGMPv2 sends a host leave message, if and only if we are the reporter. 2472 * IGMPv3 enqueues a state-change report containing a transition 2473 * to INCLUDE {} for immediate transmission. 2474 */ 2475 static void 2476 igmp_final_leave(struct in_multi *inm, struct igmp_ifinfo *igi) 2477 { 2478 INIT_VNET_INET(curvnet); 2479 int syncstates; 2480 2481 syncstates = 1; 2482 2483 CTR4(KTR_IGMPV3, "%s: final leave %s on ifp %p(%s)", 2484 __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp, 2485 inm->inm_ifp->if_xname); 2486 2487 IN_MULTI_LOCK_ASSERT(); 2488 IGMP_LOCK_ASSERT(); 2489 2490 switch (inm->inm_state) { 2491 case IGMP_NOT_MEMBER: 2492 case IGMP_SILENT_MEMBER: 2493 case IGMP_LEAVING_MEMBER: 2494 /* Already leaving or left; do nothing. */ 2495 CTR1(KTR_IGMPV3, 2496 "%s: not kicking state machine for silent group", __func__); 2497 break; 2498 case IGMP_REPORTING_MEMBER: 2499 case IGMP_IDLE_MEMBER: 2500 case IGMP_G_QUERY_PENDING_MEMBER: 2501 case IGMP_SG_QUERY_PENDING_MEMBER: 2502 if (igi->igi_version == IGMP_VERSION_2) { 2503 #ifdef INVARIANTS 2504 if (inm->inm_state == IGMP_G_QUERY_PENDING_MEMBER || 2505 inm->inm_state == IGMP_SG_QUERY_PENDING_MEMBER) 2506 panic("%s: IGMPv3 state reached, not IGMPv3 mode", 2507 __func__); 2508 #endif 2509 igmp_v1v2_queue_report(inm, IGMP_HOST_LEAVE_MESSAGE); 2510 inm->inm_state = IGMP_NOT_MEMBER; 2511 } else if (igi->igi_version == IGMP_VERSION_3) { 2512 /* 2513 * Stop group timer and all pending reports. 2514 * Immediately enqueue a state-change report 2515 * TO_IN {} to be sent on the next fast timeout, 2516 * giving us an opportunity to merge reports. 2517 */ 2518 _IF_DRAIN(&inm->inm_scq); 2519 inm->inm_timer = 0; 2520 if (igi->igi_flags & IGIF_LOOPBACK) { 2521 inm->inm_scrv = 1; 2522 } else { 2523 inm->inm_scrv = igi->igi_rv; 2524 } 2525 CTR4(KTR_IGMPV3, "%s: Leaving %s/%s with %d " 2526 "pending retransmissions.", __func__, 2527 inet_ntoa(inm->inm_addr), 2528 inm->inm_ifp->if_xname, inm->inm_scrv); 2529 if (inm->inm_scrv == 0) { 2530 inm->inm_state = IGMP_NOT_MEMBER; 2531 inm->inm_sctimer = 0; 2532 } else { 2533 int retval; 2534 2535 inm_acquire_locked(inm); 2536 2537 retval = igmp_v3_enqueue_group_record( 2538 &inm->inm_scq, inm, 1, 0, 0); 2539 KASSERT(retval != 0, 2540 ("%s: enqueue record = %d", __func__, 2541 retval)); 2542 2543 inm->inm_state = IGMP_LEAVING_MEMBER; 2544 inm->inm_sctimer = 1; 2545 V_state_change_timers_running = 1; 2546 syncstates = 0; 2547 } 2548 break; 2549 } 2550 break; 2551 case IGMP_LAZY_MEMBER: 2552 case IGMP_SLEEPING_MEMBER: 2553 case IGMP_AWAKENING_MEMBER: 2554 /* Our reports are suppressed; do nothing. */ 2555 break; 2556 } 2557 2558 if (syncstates) { 2559 inm_commit(inm); 2560 CTR3(KTR_IGMPV3, "%s: T1 -> T0 for %s/%s", __func__, 2561 inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname); 2562 inm->inm_st[1].iss_fmode = MCAST_UNDEFINED; 2563 CTR3(KTR_IGMPV3, "%s: T1 now MCAST_UNDEFINED for %s/%s", 2564 __func__, inet_ntoa(inm->inm_addr), inm->inm_ifp->if_xname); 2565 } 2566 } 2567 2568 /* 2569 * Enqueue an IGMPv3 group record to the given output queue. 2570 * 2571 * XXX This function could do with having the allocation code 2572 * split out, and the multiple-tree-walks coalesced into a single 2573 * routine as has been done in igmp_v3_enqueue_filter_change(). 2574 * 2575 * If is_state_change is zero, a current-state record is appended. 2576 * If is_state_change is non-zero, a state-change report is appended. 2577 * 2578 * If is_group_query is non-zero, an mbuf packet chain is allocated. 2579 * If is_group_query is zero, and if there is a packet with free space 2580 * at the tail of the queue, it will be appended to providing there 2581 * is enough free space. 2582 * Otherwise a new mbuf packet chain is allocated. 2583 * 2584 * If is_source_query is non-zero, each source is checked to see if 2585 * it was recorded for a Group-Source query, and will be omitted if 2586 * it is not both in-mode and recorded. 2587 * 2588 * The function will attempt to allocate leading space in the packet 2589 * for the IP/IGMP header to be prepended without fragmenting the chain. 2590 * 2591 * If successful the size of all data appended to the queue is returned, 2592 * otherwise an error code less than zero is returned, or zero if 2593 * no record(s) were appended. 2594 */ 2595 static int 2596 igmp_v3_enqueue_group_record(struct ifqueue *ifq, struct in_multi *inm, 2597 const int is_state_change, const int is_group_query, 2598 const int is_source_query) 2599 { 2600 struct igmp_grouprec ig; 2601 struct igmp_grouprec *pig; 2602 struct ifnet *ifp; 2603 struct ip_msource *ims, *nims; 2604 struct mbuf *m0, *m, *md; 2605 int error, is_filter_list_change; 2606 int minrec0len, m0srcs, msrcs, nbytes, off; 2607 int record_has_sources; 2608 int now; 2609 int type; 2610 in_addr_t naddr; 2611 uint8_t mode; 2612 2613 IN_MULTI_LOCK_ASSERT(); 2614 2615 error = 0; 2616 ifp = inm->inm_ifp; 2617 is_filter_list_change = 0; 2618 m = NULL; 2619 m0 = NULL; 2620 m0srcs = 0; 2621 msrcs = 0; 2622 nbytes = 0; 2623 nims = NULL; 2624 record_has_sources = 1; 2625 pig = NULL; 2626 type = IGMP_DO_NOTHING; 2627 mode = inm->inm_st[1].iss_fmode; 2628 2629 /* 2630 * If we did not transition out of ASM mode during t0->t1, 2631 * and there are no source nodes to process, we can skip 2632 * the generation of source records. 2633 */ 2634 if (inm->inm_st[0].iss_asm > 0 && inm->inm_st[1].iss_asm > 0 && 2635 inm->inm_nsrc == 0) 2636 record_has_sources = 0; 2637 2638 if (is_state_change) { 2639 /* 2640 * Queue a state change record. 2641 * If the mode did not change, and there are non-ASM 2642 * listeners or source filters present, 2643 * we potentially need to issue two records for the group. 2644 * If we are transitioning to MCAST_UNDEFINED, we need 2645 * not send any sources. 2646 * If there are ASM listeners, and there was no filter 2647 * mode transition of any kind, do nothing. 2648 */ 2649 if (mode != inm->inm_st[0].iss_fmode) { 2650 if (mode == MCAST_EXCLUDE) { 2651 CTR1(KTR_IGMPV3, "%s: change to EXCLUDE", 2652 __func__); 2653 type = IGMP_CHANGE_TO_EXCLUDE_MODE; 2654 } else { 2655 CTR1(KTR_IGMPV3, "%s: change to INCLUDE", 2656 __func__); 2657 type = IGMP_CHANGE_TO_INCLUDE_MODE; 2658 if (mode == MCAST_UNDEFINED) 2659 record_has_sources = 0; 2660 } 2661 } else { 2662 if (record_has_sources) { 2663 is_filter_list_change = 1; 2664 } else { 2665 type = IGMP_DO_NOTHING; 2666 } 2667 } 2668 } else { 2669 /* 2670 * Queue a current state record. 2671 */ 2672 if (mode == MCAST_EXCLUDE) { 2673 type = IGMP_MODE_IS_EXCLUDE; 2674 } else if (mode == MCAST_INCLUDE) { 2675 type = IGMP_MODE_IS_INCLUDE; 2676 KASSERT(inm->inm_st[1].iss_asm == 0, 2677 ("%s: inm %p is INCLUDE but ASM count is %d", 2678 __func__, inm, inm->inm_st[1].iss_asm)); 2679 } 2680 } 2681 2682 /* 2683 * Generate the filter list changes using a separate function. 2684 */ 2685 if (is_filter_list_change) 2686 return (igmp_v3_enqueue_filter_change(ifq, inm)); 2687 2688 if (type == IGMP_DO_NOTHING) { 2689 CTR3(KTR_IGMPV3, "%s: nothing to do for %s/%s", 2690 __func__, inet_ntoa(inm->inm_addr), 2691 inm->inm_ifp->if_xname); 2692 return (0); 2693 } 2694 2695 /* 2696 * If any sources are present, we must be able to fit at least 2697 * one in the trailing space of the tail packet's mbuf, 2698 * ideally more. 2699 */ 2700 minrec0len = sizeof(struct igmp_grouprec); 2701 if (record_has_sources) 2702 minrec0len += sizeof(in_addr_t); 2703 2704 CTR4(KTR_IGMPV3, "%s: queueing %s for %s/%s", __func__, 2705 igmp_rec_type_to_str(type), inet_ntoa(inm->inm_addr), 2706 inm->inm_ifp->if_xname); 2707 2708 /* 2709 * Check if we have a packet in the tail of the queue for this 2710 * group into which the first group record for this group will fit. 2711 * Otherwise allocate a new packet. 2712 * Always allocate leading space for IP+RA_OPT+IGMP+REPORT. 2713 * Note: Group records for G/GSR query responses MUST be sent 2714 * in their own packet. 2715 */ 2716 m0 = ifq->ifq_tail; 2717 if (!is_group_query && 2718 m0 != NULL && 2719 (m0->m_pkthdr.PH_vt.vt_nrecs + 1 <= IGMP_V3_REPORT_MAXRECS) && 2720 (m0->m_pkthdr.len + minrec0len) < 2721 (ifp->if_mtu - IGMP_LEADINGSPACE)) { 2722 m0srcs = (ifp->if_mtu - m0->m_pkthdr.len - 2723 sizeof(struct igmp_grouprec)) / sizeof(in_addr_t); 2724 m = m0; 2725 CTR1(KTR_IGMPV3, "%s: use existing packet", __func__); 2726 } else { 2727 if (_IF_QFULL(ifq)) { 2728 CTR1(KTR_IGMPV3, "%s: outbound queue full", __func__); 2729 return (-ENOMEM); 2730 } 2731 m = NULL; 2732 m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE - 2733 sizeof(struct igmp_grouprec)) / sizeof(in_addr_t); 2734 if (!is_state_change && !is_group_query) { 2735 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); 2736 if (m) 2737 m->m_data += IGMP_LEADINGSPACE; 2738 } 2739 if (m == NULL) { 2740 m = m_gethdr(M_DONTWAIT, MT_DATA); 2741 if (m) 2742 MH_ALIGN(m, IGMP_LEADINGSPACE); 2743 } 2744 if (m == NULL) 2745 return (-ENOMEM); 2746 2747 igmp_save_context(m, ifp); 2748 2749 CTR1(KTR_IGMPV3, "%s: allocated first packet", __func__); 2750 } 2751 2752 /* 2753 * Append group record. 2754 * If we have sources, we don't know how many yet. 2755 */ 2756 ig.ig_type = type; 2757 ig.ig_datalen = 0; 2758 ig.ig_numsrc = 0; 2759 ig.ig_group = inm->inm_addr; 2760 if (!m_append(m, sizeof(struct igmp_grouprec), (void *)&ig)) { 2761 if (m != m0) 2762 m_freem(m); 2763 CTR1(KTR_IGMPV3, "%s: m_append() failed.", __func__); 2764 return (-ENOMEM); 2765 } 2766 nbytes += sizeof(struct igmp_grouprec); 2767 2768 /* 2769 * Append as many sources as will fit in the first packet. 2770 * If we are appending to a new packet, the chain allocation 2771 * may potentially use clusters; use m_getptr() in this case. 2772 * If we are appending to an existing packet, we need to obtain 2773 * a pointer to the group record after m_append(), in case a new 2774 * mbuf was allocated. 2775 * Only append sources which are in-mode at t1. If we are 2776 * transitioning to MCAST_UNDEFINED state on the group, do not 2777 * include source entries. 2778 * Only report recorded sources in our filter set when responding 2779 * to a group-source query. 2780 */ 2781 if (record_has_sources) { 2782 if (m == m0) { 2783 md = m_last(m); 2784 pig = (struct igmp_grouprec *)(mtod(md, uint8_t *) + 2785 md->m_len - nbytes); 2786 } else { 2787 md = m_getptr(m, 0, &off); 2788 pig = (struct igmp_grouprec *)(mtod(md, uint8_t *) + 2789 off); 2790 } 2791 msrcs = 0; 2792 RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, nims) { 2793 CTR2(KTR_IGMPV3, "%s: visit node %s", __func__, 2794 inet_ntoa_haddr(ims->ims_haddr)); 2795 now = ims_get_mode(inm, ims, 1); 2796 CTR2(KTR_IGMPV3, "%s: node is %d", __func__, now); 2797 if ((now != mode) || 2798 (now == mode && mode == MCAST_UNDEFINED)) { 2799 CTR1(KTR_IGMPV3, "%s: skip node", __func__); 2800 continue; 2801 } 2802 if (is_source_query && ims->ims_stp == 0) { 2803 CTR1(KTR_IGMPV3, "%s: skip unrecorded node", 2804 __func__); 2805 continue; 2806 } 2807 CTR1(KTR_IGMPV3, "%s: append node", __func__); 2808 naddr = htonl(ims->ims_haddr); 2809 if (!m_append(m, sizeof(in_addr_t), (void *)&naddr)) { 2810 if (m != m0) 2811 m_freem(m); 2812 CTR1(KTR_IGMPV3, "%s: m_append() failed.", 2813 __func__); 2814 return (-ENOMEM); 2815 } 2816 nbytes += sizeof(in_addr_t); 2817 ++msrcs; 2818 if (msrcs == m0srcs) 2819 break; 2820 } 2821 CTR2(KTR_IGMPV3, "%s: msrcs is %d this packet", __func__, 2822 msrcs); 2823 pig->ig_numsrc = htons(msrcs); 2824 nbytes += (msrcs * sizeof(in_addr_t)); 2825 } 2826 2827 if (is_source_query && msrcs == 0) { 2828 CTR1(KTR_IGMPV3, "%s: no recorded sources to report", __func__); 2829 if (m != m0) 2830 m_freem(m); 2831 return (0); 2832 } 2833 2834 /* 2835 * We are good to go with first packet. 2836 */ 2837 if (m != m0) { 2838 CTR1(KTR_IGMPV3, "%s: enqueueing first packet", __func__); 2839 m->m_pkthdr.PH_vt.vt_nrecs = 1; 2840 _IF_ENQUEUE(ifq, m); 2841 } else 2842 m->m_pkthdr.PH_vt.vt_nrecs++; 2843 2844 /* 2845 * No further work needed if no source list in packet(s). 2846 */ 2847 if (!record_has_sources) 2848 return (nbytes); 2849 2850 /* 2851 * Whilst sources remain to be announced, we need to allocate 2852 * a new packet and fill out as many sources as will fit. 2853 * Always try for a cluster first. 2854 */ 2855 while (nims != NULL) { 2856 if (_IF_QFULL(ifq)) { 2857 CTR1(KTR_IGMPV3, "%s: outbound queue full", __func__); 2858 return (-ENOMEM); 2859 } 2860 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); 2861 if (m) 2862 m->m_data += IGMP_LEADINGSPACE; 2863 if (m == NULL) { 2864 m = m_gethdr(M_DONTWAIT, MT_DATA); 2865 if (m) 2866 MH_ALIGN(m, IGMP_LEADINGSPACE); 2867 } 2868 if (m == NULL) 2869 return (-ENOMEM); 2870 igmp_save_context(m, ifp); 2871 md = m_getptr(m, 0, &off); 2872 pig = (struct igmp_grouprec *)(mtod(md, uint8_t *) + off); 2873 CTR1(KTR_IGMPV3, "%s: allocated next packet", __func__); 2874 2875 if (!m_append(m, sizeof(struct igmp_grouprec), (void *)&ig)) { 2876 if (m != m0) 2877 m_freem(m); 2878 CTR1(KTR_IGMPV3, "%s: m_append() failed.", __func__); 2879 return (-ENOMEM); 2880 } 2881 m->m_pkthdr.PH_vt.vt_nrecs = 1; 2882 nbytes += sizeof(struct igmp_grouprec); 2883 2884 m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE - 2885 sizeof(struct igmp_grouprec)) / sizeof(in_addr_t); 2886 2887 msrcs = 0; 2888 RB_FOREACH_FROM(ims, ip_msource_tree, nims) { 2889 CTR2(KTR_IGMPV3, "%s: visit node %s", __func__, 2890 inet_ntoa_haddr(ims->ims_haddr)); 2891 now = ims_get_mode(inm, ims, 1); 2892 if ((now != mode) || 2893 (now == mode && mode == MCAST_UNDEFINED)) { 2894 CTR1(KTR_IGMPV3, "%s: skip node", __func__); 2895 continue; 2896 } 2897 if (is_source_query && ims->ims_stp == 0) { 2898 CTR1(KTR_IGMPV3, "%s: skip unrecorded node", 2899 __func__); 2900 continue; 2901 } 2902 CTR1(KTR_IGMPV3, "%s: append node", __func__); 2903 naddr = htonl(ims->ims_haddr); 2904 if (!m_append(m, sizeof(in_addr_t), (void *)&naddr)) { 2905 if (m != m0) 2906 m_freem(m); 2907 CTR1(KTR_IGMPV3, "%s: m_append() failed.", 2908 __func__); 2909 return (-ENOMEM); 2910 } 2911 ++msrcs; 2912 if (msrcs == m0srcs) 2913 break; 2914 } 2915 pig->ig_numsrc = htons(msrcs); 2916 nbytes += (msrcs * sizeof(in_addr_t)); 2917 2918 CTR1(KTR_IGMPV3, "%s: enqueueing next packet", __func__); 2919 _IF_ENQUEUE(ifq, m); 2920 } 2921 2922 return (nbytes); 2923 } 2924 2925 /* 2926 * Type used to mark record pass completion. 2927 * We exploit the fact we can cast to this easily from the 2928 * current filter modes on each ip_msource node. 2929 */ 2930 typedef enum { 2931 REC_NONE = 0x00, /* MCAST_UNDEFINED */ 2932 REC_ALLOW = 0x01, /* MCAST_INCLUDE */ 2933 REC_BLOCK = 0x02, /* MCAST_EXCLUDE */ 2934 REC_FULL = REC_ALLOW | REC_BLOCK 2935 } rectype_t; 2936 2937 /* 2938 * Enqueue an IGMPv3 filter list change to the given output queue. 2939 * 2940 * Source list filter state is held in an RB-tree. When the filter list 2941 * for a group is changed without changing its mode, we need to compute 2942 * the deltas between T0 and T1 for each source in the filter set, 2943 * and enqueue the appropriate ALLOW_NEW/BLOCK_OLD records. 2944 * 2945 * As we may potentially queue two record types, and the entire R-B tree 2946 * needs to be walked at once, we break this out into its own function 2947 * so we can generate a tightly packed queue of packets. 2948 * 2949 * XXX This could be written to only use one tree walk, although that makes 2950 * serializing into the mbuf chains a bit harder. For now we do two walks 2951 * which makes things easier on us, and it may or may not be harder on 2952 * the L2 cache. 2953 * 2954 * If successful the size of all data appended to the queue is returned, 2955 * otherwise an error code less than zero is returned, or zero if 2956 * no record(s) were appended. 2957 */ 2958 static int 2959 igmp_v3_enqueue_filter_change(struct ifqueue *ifq, struct in_multi *inm) 2960 { 2961 static const int MINRECLEN = 2962 sizeof(struct igmp_grouprec) + sizeof(in_addr_t); 2963 struct ifnet *ifp; 2964 struct igmp_grouprec ig; 2965 struct igmp_grouprec *pig; 2966 struct ip_msource *ims, *nims; 2967 struct mbuf *m, *m0, *md; 2968 in_addr_t naddr; 2969 int m0srcs, nbytes, npbytes, off, rsrcs, schanged; 2970 int nallow, nblock; 2971 uint8_t mode, now, then; 2972 rectype_t crt, drt, nrt; 2973 2974 IN_MULTI_LOCK_ASSERT(); 2975 2976 if (inm->inm_nsrc == 0 || 2977 (inm->inm_st[0].iss_asm > 0 && inm->inm_st[1].iss_asm > 0)) 2978 return (0); 2979 2980 ifp = inm->inm_ifp; /* interface */ 2981 mode = inm->inm_st[1].iss_fmode; /* filter mode at t1 */ 2982 crt = REC_NONE; /* current group record type */ 2983 drt = REC_NONE; /* mask of completed group record types */ 2984 nrt = REC_NONE; /* record type for current node */ 2985 m0srcs = 0; /* # source which will fit in current mbuf chain */ 2986 nbytes = 0; /* # of bytes appended to group's state-change queue */ 2987 npbytes = 0; /* # of bytes appended this packet */ 2988 rsrcs = 0; /* # sources encoded in current record */ 2989 schanged = 0; /* # nodes encoded in overall filter change */ 2990 nallow = 0; /* # of source entries in ALLOW_NEW */ 2991 nblock = 0; /* # of source entries in BLOCK_OLD */ 2992 nims = NULL; /* next tree node pointer */ 2993 2994 /* 2995 * For each possible filter record mode. 2996 * The first kind of source we encounter tells us which 2997 * is the first kind of record we start appending. 2998 * If a node transitioned to UNDEFINED at t1, its mode is treated 2999 * as the inverse of the group's filter mode. 3000 */ 3001 while (drt != REC_FULL) { 3002 do { 3003 m0 = ifq->ifq_tail; 3004 if (m0 != NULL && 3005 (m0->m_pkthdr.PH_vt.vt_nrecs + 1 <= 3006 IGMP_V3_REPORT_MAXRECS) && 3007 (m0->m_pkthdr.len + MINRECLEN) < 3008 (ifp->if_mtu - IGMP_LEADINGSPACE)) { 3009 m = m0; 3010 m0srcs = (ifp->if_mtu - m0->m_pkthdr.len - 3011 sizeof(struct igmp_grouprec)) / 3012 sizeof(in_addr_t); 3013 CTR1(KTR_IGMPV3, 3014 "%s: use previous packet", __func__); 3015 } else { 3016 m = m_getcl(M_DONTWAIT, MT_DATA, M_PKTHDR); 3017 if (m) 3018 m->m_data += IGMP_LEADINGSPACE; 3019 if (m == NULL) { 3020 m = m_gethdr(M_DONTWAIT, MT_DATA); 3021 if (m) 3022 MH_ALIGN(m, IGMP_LEADINGSPACE); 3023 } 3024 if (m == NULL) { 3025 CTR1(KTR_IGMPV3, 3026 "%s: m_get*() failed", __func__); 3027 return (-ENOMEM); 3028 } 3029 m->m_pkthdr.PH_vt.vt_nrecs = 0; 3030 igmp_save_context(m, ifp); 3031 m0srcs = (ifp->if_mtu - IGMP_LEADINGSPACE - 3032 sizeof(struct igmp_grouprec)) / 3033 sizeof(in_addr_t); 3034 npbytes = 0; 3035 CTR1(KTR_IGMPV3, 3036 "%s: allocated new packet", __func__); 3037 } 3038 /* 3039 * Append the IGMP group record header to the 3040 * current packet's data area. 3041 * Recalculate pointer to free space for next 3042 * group record, in case m_append() allocated 3043 * a new mbuf or cluster. 3044 */ 3045 memset(&ig, 0, sizeof(ig)); 3046 ig.ig_group = inm->inm_addr; 3047 if (!m_append(m, sizeof(ig), (void *)&ig)) { 3048 if (m != m0) 3049 m_freem(m); 3050 CTR1(KTR_IGMPV3, 3051 "%s: m_append() failed", __func__); 3052 return (-ENOMEM); 3053 } 3054 npbytes += sizeof(struct igmp_grouprec); 3055 if (m != m0) { 3056 /* new packet; offset in c hain */ 3057 md = m_getptr(m, npbytes - 3058 sizeof(struct igmp_grouprec), &off); 3059 pig = (struct igmp_grouprec *)(mtod(md, 3060 uint8_t *) + off); 3061 } else { 3062 /* current packet; offset from last append */ 3063 md = m_last(m); 3064 pig = (struct igmp_grouprec *)(mtod(md, 3065 uint8_t *) + md->m_len - 3066 sizeof(struct igmp_grouprec)); 3067 } 3068 /* 3069 * Begin walking the tree for this record type 3070 * pass, or continue from where we left off 3071 * previously if we had to allocate a new packet. 3072 * Only report deltas in-mode at t1. 3073 * We need not report included sources as allowed 3074 * if we are in inclusive mode on the group, 3075 * however the converse is not true. 3076 */ 3077 rsrcs = 0; 3078 if (nims == NULL) 3079 nims = RB_MIN(ip_msource_tree, &inm->inm_srcs); 3080 RB_FOREACH_FROM(ims, ip_msource_tree, nims) { 3081 CTR2(KTR_IGMPV3, "%s: visit node %s", 3082 __func__, inet_ntoa_haddr(ims->ims_haddr)); 3083 now = ims_get_mode(inm, ims, 1); 3084 then = ims_get_mode(inm, ims, 0); 3085 CTR3(KTR_IGMPV3, "%s: mode: t0 %d, t1 %d", 3086 __func__, then, now); 3087 if (now == then) { 3088 CTR1(KTR_IGMPV3, 3089 "%s: skip unchanged", __func__); 3090 continue; 3091 } 3092 if (mode == MCAST_EXCLUDE && 3093 now == MCAST_INCLUDE) { 3094 CTR1(KTR_IGMPV3, 3095 "%s: skip IN src on EX group", 3096 __func__); 3097 continue; 3098 } 3099 nrt = (rectype_t)now; 3100 if (nrt == REC_NONE) 3101 nrt = (rectype_t)(~mode & REC_FULL); 3102 if (schanged++ == 0) { 3103 crt = nrt; 3104 } else if (crt != nrt) 3105 continue; 3106 naddr = htonl(ims->ims_haddr); 3107 if (!m_append(m, sizeof(in_addr_t), 3108 (void *)&naddr)) { 3109 if (m != m0) 3110 m_freem(m); 3111 CTR1(KTR_IGMPV3, 3112 "%s: m_append() failed", __func__); 3113 return (-ENOMEM); 3114 } 3115 nallow += !!(crt == REC_ALLOW); 3116 nblock += !!(crt == REC_BLOCK); 3117 if (++rsrcs == m0srcs) 3118 break; 3119 } 3120 /* 3121 * If we did not append any tree nodes on this 3122 * pass, back out of allocations. 3123 */ 3124 if (rsrcs == 0) { 3125 npbytes -= sizeof(struct igmp_grouprec); 3126 if (m != m0) { 3127 CTR1(KTR_IGMPV3, 3128 "%s: m_free(m)", __func__); 3129 m_freem(m); 3130 } else { 3131 CTR1(KTR_IGMPV3, 3132 "%s: m_adj(m, -ig)", __func__); 3133 m_adj(m, -((int)sizeof( 3134 struct igmp_grouprec))); 3135 } 3136 continue; 3137 } 3138 npbytes += (rsrcs * sizeof(in_addr_t)); 3139 if (crt == REC_ALLOW) 3140 pig->ig_type = IGMP_ALLOW_NEW_SOURCES; 3141 else if (crt == REC_BLOCK) 3142 pig->ig_type = IGMP_BLOCK_OLD_SOURCES; 3143 pig->ig_numsrc = htons(rsrcs); 3144 /* 3145 * Count the new group record, and enqueue this 3146 * packet if it wasn't already queued. 3147 */ 3148 m->m_pkthdr.PH_vt.vt_nrecs++; 3149 if (m != m0) 3150 _IF_ENQUEUE(ifq, m); 3151 nbytes += npbytes; 3152 } while (nims != NULL); 3153 drt |= crt; 3154 crt = (~crt & REC_FULL); 3155 } 3156 3157 CTR3(KTR_IGMPV3, "%s: queued %d ALLOW_NEW, %d BLOCK_OLD", __func__, 3158 nallow, nblock); 3159 3160 return (nbytes); 3161 } 3162 3163 static int 3164 igmp_v3_merge_state_changes(struct in_multi *inm, struct ifqueue *ifscq) 3165 { 3166 struct ifqueue *gq; 3167 struct mbuf *m; /* pending state-change */ 3168 struct mbuf *m0; /* copy of pending state-change */ 3169 struct mbuf *mt; /* last state-change in packet */ 3170 int docopy, domerge; 3171 u_int recslen; 3172 3173 docopy = 0; 3174 domerge = 0; 3175 recslen = 0; 3176 3177 IN_MULTI_LOCK_ASSERT(); 3178 IGMP_LOCK_ASSERT(); 3179 3180 /* 3181 * If there are further pending retransmissions, make a writable 3182 * copy of each queued state-change message before merging. 3183 */ 3184 if (inm->inm_scrv > 0) 3185 docopy = 1; 3186 3187 gq = &inm->inm_scq; 3188 #ifdef KTR 3189 if (gq->ifq_head == NULL) { 3190 CTR2(KTR_IGMPV3, "%s: WARNING: queue for inm %p is empty", 3191 __func__, inm); 3192 } 3193 #endif 3194 3195 m = gq->ifq_head; 3196 while (m != NULL) { 3197 /* 3198 * Only merge the report into the current packet if 3199 * there is sufficient space to do so; an IGMPv3 report 3200 * packet may only contain 65,535 group records. 3201 * Always use a simple mbuf chain concatentation to do this, 3202 * as large state changes for single groups may have 3203 * allocated clusters. 3204 */ 3205 domerge = 0; 3206 mt = ifscq->ifq_tail; 3207 if (mt != NULL) { 3208 recslen = m_length(m, NULL); 3209 3210 if ((mt->m_pkthdr.PH_vt.vt_nrecs + 3211 m->m_pkthdr.PH_vt.vt_nrecs <= 3212 IGMP_V3_REPORT_MAXRECS) && 3213 (mt->m_pkthdr.len + recslen <= 3214 (inm->inm_ifp->if_mtu - IGMP_LEADINGSPACE))) 3215 domerge = 1; 3216 } 3217 3218 if (!domerge && _IF_QFULL(gq)) { 3219 CTR2(KTR_IGMPV3, 3220 "%s: outbound queue full, skipping whole packet %p", 3221 __func__, m); 3222 mt = m->m_nextpkt; 3223 if (!docopy) 3224 m_freem(m); 3225 m = mt; 3226 continue; 3227 } 3228 3229 if (!docopy) { 3230 CTR2(KTR_IGMPV3, "%s: dequeueing %p", __func__, m); 3231 _IF_DEQUEUE(gq, m0); 3232 m = m0->m_nextpkt; 3233 } else { 3234 CTR2(KTR_IGMPV3, "%s: copying %p", __func__, m); 3235 m0 = m_dup(m, M_NOWAIT); 3236 if (m0 == NULL) 3237 return (ENOMEM); 3238 m0->m_nextpkt = NULL; 3239 m = m->m_nextpkt; 3240 } 3241 3242 if (!domerge) { 3243 CTR3(KTR_IGMPV3, "%s: queueing %p to ifscq %p)", 3244 __func__, m0, ifscq); 3245 _IF_ENQUEUE(ifscq, m0); 3246 } else { 3247 struct mbuf *mtl; /* last mbuf of packet mt */ 3248 3249 CTR3(KTR_IGMPV3, "%s: merging %p with ifscq tail %p)", 3250 __func__, m0, mt); 3251 3252 mtl = m_last(mt); 3253 m0->m_flags &= ~M_PKTHDR; 3254 mt->m_pkthdr.len += recslen; 3255 mt->m_pkthdr.PH_vt.vt_nrecs += 3256 m0->m_pkthdr.PH_vt.vt_nrecs; 3257 3258 mtl->m_next = m0; 3259 } 3260 } 3261 3262 return (0); 3263 } 3264 3265 /* 3266 * Respond to a pending IGMPv3 General Query. 3267 */ 3268 static void 3269 igmp_v3_dispatch_general_query(struct igmp_ifinfo *igi) 3270 { 3271 INIT_VNET_INET(curvnet); 3272 struct ifmultiaddr *ifma, *tifma; 3273 struct ifnet *ifp; 3274 struct in_multi *inm; 3275 int retval, loop; 3276 3277 IN_MULTI_LOCK_ASSERT(); 3278 IGMP_LOCK_ASSERT(); 3279 3280 KASSERT(igi->igi_version == IGMP_VERSION_3, 3281 ("%s: called when version %d", __func__, igi->igi_version)); 3282 3283 ifp = igi->igi_ifp; 3284 3285 IF_ADDR_LOCK(ifp); 3286 TAILQ_FOREACH_SAFE(ifma, &ifp->if_multiaddrs, ifma_link, tifma) { 3287 if (ifma->ifma_addr->sa_family != AF_INET || 3288 ifma->ifma_protospec == NULL) 3289 continue; 3290 3291 inm = (struct in_multi *)ifma->ifma_protospec; 3292 KASSERT(ifp == inm->inm_ifp, 3293 ("%s: inconsistent ifp", __func__)); 3294 3295 switch (inm->inm_state) { 3296 case IGMP_NOT_MEMBER: 3297 case IGMP_SILENT_MEMBER: 3298 break; 3299 case IGMP_REPORTING_MEMBER: 3300 case IGMP_IDLE_MEMBER: 3301 case IGMP_LAZY_MEMBER: 3302 case IGMP_SLEEPING_MEMBER: 3303 case IGMP_AWAKENING_MEMBER: 3304 inm->inm_state = IGMP_REPORTING_MEMBER; 3305 retval = igmp_v3_enqueue_group_record(&igi->igi_gq, 3306 inm, 0, 0, 0); 3307 CTR2(KTR_IGMPV3, "%s: enqueue record = %d", 3308 __func__, retval); 3309 break; 3310 case IGMP_G_QUERY_PENDING_MEMBER: 3311 case IGMP_SG_QUERY_PENDING_MEMBER: 3312 case IGMP_LEAVING_MEMBER: 3313 break; 3314 } 3315 } 3316 IF_ADDR_UNLOCK(ifp); 3317 3318 loop = (igi->igi_flags & IGIF_LOOPBACK) ? 1 : 0; 3319 igmp_dispatch_queue(&igi->igi_gq, IGMP_MAX_RESPONSE_BURST, loop); 3320 3321 /* 3322 * Slew transmission of bursts over 500ms intervals. 3323 */ 3324 if (igi->igi_gq.ifq_head != NULL) { 3325 igi->igi_v3_timer = 1 + IGMP_RANDOM_DELAY( 3326 IGMP_RESPONSE_BURST_INTERVAL); 3327 V_interface_timers_running = 1; 3328 } 3329 } 3330 3331 /* 3332 * Transmit the next pending IGMP message in the output queue. 3333 * 3334 * We get called from netisr_processqueue(). A mutex private to igmpoq 3335 * will be acquired and released around this routine. 3336 * 3337 * VIMAGE: Needs to store/restore vnet pointer on a per-mbuf-chain basis. 3338 * MRT: Nothing needs to be done, as IGMP traffic is always local to 3339 * a link and uses a link-scope multicast address. 3340 */ 3341 static void 3342 igmp_intr(struct mbuf *m) 3343 { 3344 struct ip_moptions imo; 3345 struct ifnet *ifp; 3346 struct mbuf *ipopts, *m0; 3347 int error; 3348 uint32_t ifindex; 3349 3350 CTR2(KTR_IGMPV3, "%s: transmit %p", __func__, m); 3351 3352 /* 3353 * Set VNET image pointer from enqueued mbuf chain 3354 * before doing anything else. Whilst we use interface 3355 * indexes to guard against interface detach, they are 3356 * unique to each VIMAGE and must be retrieved. 3357 */ 3358 CURVNET_SET((struct vnet *)(m->m_pkthdr.header)); 3359 INIT_VNET_NET(curvnet); 3360 INIT_VNET_INET(curvnet); 3361 ifindex = igmp_restore_context(m); 3362 3363 /* 3364 * Check if the ifnet still exists. This limits the scope of 3365 * any race in the absence of a global ifp lock for low cost 3366 * (an array lookup). 3367 */ 3368 ifp = ifnet_byindex(ifindex); 3369 if (ifp == NULL) { 3370 CTR3(KTR_IGMPV3, "%s: dropped %p as ifindex %u went away.", 3371 __func__, m, ifindex); 3372 m_freem(m); 3373 IPSTAT_INC(ips_noroute); 3374 goto out; 3375 } 3376 3377 ipopts = V_igmp_sendra ? m_raopt : NULL; 3378 3379 imo.imo_multicast_ttl = 1; 3380 imo.imo_multicast_vif = -1; 3381 imo.imo_multicast_loop = (V_ip_mrouter != NULL); 3382 3383 /* 3384 * If the user requested that IGMP traffic be explicitly 3385 * redirected to the loopback interface (e.g. they are running a 3386 * MANET interface and the routing protocol needs to see the 3387 * updates), handle this now. 3388 */ 3389 if (m->m_flags & M_IGMP_LOOP) 3390 imo.imo_multicast_ifp = V_loif; 3391 else 3392 imo.imo_multicast_ifp = ifp; 3393 3394 if (m->m_flags & M_IGMPV2) { 3395 m0 = m; 3396 } else { 3397 m0 = igmp_v3_encap_report(ifp, m); 3398 if (m0 == NULL) { 3399 CTR2(KTR_IGMPV3, "%s: dropped %p", __func__, m); 3400 m_freem(m); 3401 IPSTAT_INC(ips_odropped); 3402 goto out; 3403 } 3404 } 3405 3406 igmp_scrub_context(m0); 3407 m->m_flags &= ~(M_PROTOFLAGS); 3408 m0->m_pkthdr.rcvif = V_loif; 3409 #ifdef MAC 3410 mac_netinet_igmp_send(ifp, m0); 3411 #endif 3412 error = ip_output(m0, ipopts, NULL, 0, &imo, NULL); 3413 if (error) { 3414 CTR3(KTR_IGMPV3, "%s: ip_output(%p) = %d", __func__, m0, error); 3415 goto out; 3416 } 3417 3418 IGMPSTAT_INC(igps_snd_reports); 3419 3420 out: 3421 /* 3422 * We must restore the existing vnet pointer before 3423 * continuing as we are run from netisr context. 3424 */ 3425 CURVNET_RESTORE(); 3426 } 3427 3428 /* 3429 * Encapsulate an IGMPv3 report. 3430 * 3431 * The internal mbuf flag M_IGMPV3_HDR is used to indicate that the mbuf 3432 * chain has already had its IP/IGMPv3 header prepended. In this case 3433 * the function will not attempt to prepend; the lengths and checksums 3434 * will however be re-computed. 3435 * 3436 * Returns a pointer to the new mbuf chain head, or NULL if the 3437 * allocation failed. 3438 */ 3439 static struct mbuf * 3440 igmp_v3_encap_report(struct ifnet *ifp, struct mbuf *m) 3441 { 3442 INIT_VNET_INET(curvnet); 3443 struct igmp_report *igmp; 3444 struct ip *ip; 3445 int hdrlen, igmpreclen; 3446 3447 KASSERT((m->m_flags & M_PKTHDR), 3448 ("%s: mbuf chain %p is !M_PKTHDR", __func__, m)); 3449 3450 igmpreclen = m_length(m, NULL); 3451 hdrlen = sizeof(struct ip) + sizeof(struct igmp_report); 3452 3453 if (m->m_flags & M_IGMPV3_HDR) { 3454 igmpreclen -= hdrlen; 3455 } else { 3456 M_PREPEND(m, hdrlen, M_DONTWAIT); 3457 if (m == NULL) 3458 return (NULL); 3459 m->m_flags |= M_IGMPV3_HDR; 3460 } 3461 3462 CTR2(KTR_IGMPV3, "%s: igmpreclen is %d", __func__, igmpreclen); 3463 3464 m->m_data += sizeof(struct ip); 3465 m->m_len -= sizeof(struct ip); 3466 3467 igmp = mtod(m, struct igmp_report *); 3468 igmp->ir_type = IGMP_v3_HOST_MEMBERSHIP_REPORT; 3469 igmp->ir_rsv1 = 0; 3470 igmp->ir_rsv2 = 0; 3471 igmp->ir_numgrps = htons(m->m_pkthdr.PH_vt.vt_nrecs); 3472 igmp->ir_cksum = 0; 3473 igmp->ir_cksum = in_cksum(m, sizeof(struct igmp_report) + igmpreclen); 3474 m->m_pkthdr.PH_vt.vt_nrecs = 0; 3475 3476 m->m_data -= sizeof(struct ip); 3477 m->m_len += sizeof(struct ip); 3478 3479 ip = mtod(m, struct ip *); 3480 ip->ip_tos = IPTOS_PREC_INTERNETCONTROL; 3481 ip->ip_len = hdrlen + igmpreclen; 3482 ip->ip_off = IP_DF; 3483 ip->ip_p = IPPROTO_IGMP; 3484 ip->ip_sum = 0; 3485 3486 ip->ip_src.s_addr = INADDR_ANY; 3487 3488 if (m->m_flags & M_IGMP_LOOP) { 3489 struct in_ifaddr *ia; 3490 3491 IFP_TO_IA(ifp, ia); 3492 if (ia != NULL) 3493 ip->ip_src = ia->ia_addr.sin_addr; 3494 } 3495 3496 ip->ip_dst.s_addr = htonl(INADDR_ALLRPTS_GROUP); 3497 3498 return (m); 3499 } 3500 3501 #ifdef KTR 3502 static char * 3503 igmp_rec_type_to_str(const int type) 3504 { 3505 3506 switch (type) { 3507 case IGMP_CHANGE_TO_EXCLUDE_MODE: 3508 return "TO_EX"; 3509 break; 3510 case IGMP_CHANGE_TO_INCLUDE_MODE: 3511 return "TO_IN"; 3512 break; 3513 case IGMP_MODE_IS_EXCLUDE: 3514 return "MODE_EX"; 3515 break; 3516 case IGMP_MODE_IS_INCLUDE: 3517 return "MODE_IN"; 3518 break; 3519 case IGMP_ALLOW_NEW_SOURCES: 3520 return "ALLOW_NEW"; 3521 break; 3522 case IGMP_BLOCK_OLD_SOURCES: 3523 return "BLOCK_OLD"; 3524 break; 3525 default: 3526 break; 3527 } 3528 return "unknown"; 3529 } 3530 #endif 3531 3532 static void 3533 igmp_sysinit(void) 3534 { 3535 3536 CTR1(KTR_IGMPV3, "%s: initializing", __func__); 3537 3538 IGMP_LOCK_INIT(); 3539 3540 mtx_init(&igmpoq.ifq_mtx, "igmpoq_mtx", NULL, MTX_DEF); 3541 IFQ_SET_MAXLEN(&igmpoq, IFQ_MAXLEN); 3542 3543 m_raopt = igmp_ra_alloc(); 3544 3545 netisr_register(NETISR_IGMP, igmp_intr, &igmpoq, 0); 3546 } 3547 3548 static void 3549 igmp_sysuninit(void) 3550 { 3551 3552 CTR1(KTR_IGMPV3, "%s: tearing down", __func__); 3553 3554 netisr_unregister(NETISR_IGMP); 3555 mtx_destroy(&igmpoq.ifq_mtx); 3556 3557 m_free(m_raopt); 3558 m_raopt = NULL; 3559 3560 IGMP_LOCK_DESTROY(); 3561 } 3562 3563 /* 3564 * Initialize an IGMPv3 instance. 3565 * VIMAGE: Assumes curvnet set by caller and called per vimage. 3566 */ 3567 static int 3568 vnet_igmp_iattach(const void *unused __unused) 3569 { 3570 INIT_VNET_INET(curvnet); 3571 3572 CTR1(KTR_IGMPV3, "%s: initializing", __func__); 3573 3574 LIST_INIT(&V_igi_head); 3575 3576 V_current_state_timers_running = 0; 3577 V_state_change_timers_running = 0; 3578 V_interface_timers_running = 0; 3579 3580 /* 3581 * Initialize sysctls to default values. 3582 */ 3583 V_igmp_recvifkludge = 1; 3584 V_igmp_sendra = 1; 3585 V_igmp_sendlocal = 1; 3586 V_igmp_v1enable = 1; 3587 V_igmp_v2enable = 1; 3588 V_igmp_legacysupp = 0; 3589 V_igmp_default_version = IGMP_VERSION_3; 3590 V_igmp_gsrdelay.tv_sec = 10; 3591 V_igmp_gsrdelay.tv_usec = 0; 3592 3593 memset(&V_igmpstat, 0, sizeof(struct igmpstat)); 3594 V_igmpstat.igps_version = IGPS_VERSION_3; 3595 V_igmpstat.igps_len = sizeof(struct igmpstat); 3596 3597 return (0); 3598 } 3599 3600 static int 3601 vnet_igmp_idetach(const void *unused __unused) 3602 { 3603 #ifdef INVARIANTS 3604 INIT_VNET_INET(curvnet); 3605 #endif 3606 3607 CTR1(KTR_IGMPV3, "%s: tearing down", __func__); 3608 3609 KASSERT(LIST_EMPTY(&V_igi_head), 3610 ("%s: igi list not empty; ifnets not detached?", __func__)); 3611 3612 return (0); 3613 } 3614 3615 #ifndef VIMAGE_GLOBALS 3616 static vnet_modinfo_t vnet_igmp_modinfo = { 3617 .vmi_id = VNET_MOD_IGMP, 3618 .vmi_name = "igmp", 3619 .vmi_dependson = VNET_MOD_INET, 3620 .vmi_iattach = vnet_igmp_iattach, 3621 .vmi_idetach = vnet_igmp_idetach 3622 }; 3623 #endif 3624 3625 static int 3626 igmp_modevent(module_t mod, int type, void *unused __unused) 3627 { 3628 3629 switch (type) { 3630 case MOD_LOAD: 3631 igmp_sysinit(); 3632 #ifndef VIMAGE_GLOBALS 3633 vnet_mod_register(&vnet_igmp_modinfo); 3634 #else 3635 vnet_igmp_iattach(NULL); 3636 #endif 3637 break; 3638 case MOD_UNLOAD: 3639 #ifndef VIMAGE_GLOBALS 3640 vnet_mod_deregister(&vnet_igmp_modinfo); 3641 #else 3642 vnet_igmp_idetach(NULL); 3643 #endif 3644 igmp_sysuninit(); 3645 break; 3646 default: 3647 return (EOPNOTSUPP); 3648 } 3649 return (0); 3650 } 3651 3652 static moduledata_t igmp_mod = { 3653 "igmp", 3654 igmp_modevent, 3655 0 3656 }; 3657 DECLARE_MODULE(igmp, igmp_mod, SI_SUB_PSEUDO, SI_ORDER_ANY); 3658