1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * Internet Group Management Protocol (IGMP) routines. 31 * Multicast Listener Discovery Protocol (MLD) routines. 32 * 33 * Written by Steve Deering, Stanford, May 1988. 34 * Modified by Rosen Sharma, Stanford, Aug 1994. 35 * Modified by Bill Fenner, Xerox PARC, Feb. 1995. 36 * 37 * MULTICAST 3.5.1.1 38 */ 39 40 #include <sys/types.h> 41 #include <sys/stream.h> 42 #include <sys/stropts.h> 43 #include <sys/strlog.h> 44 #include <sys/strsun.h> 45 #include <sys/systm.h> 46 #include <sys/ddi.h> 47 #include <sys/sunddi.h> 48 #include <sys/cmn_err.h> 49 #include <sys/atomic.h> 50 #include <sys/zone.h> 51 52 #include <sys/param.h> 53 #include <sys/socket.h> 54 #include <inet/ipclassifier.h> 55 #include <net/if.h> 56 #include <net/route.h> 57 #include <netinet/in.h> 58 #include <netinet/igmp_var.h> 59 #include <netinet/ip6.h> 60 #include <netinet/icmp6.h> 61 62 #include <inet/common.h> 63 #include <inet/mi.h> 64 #include <inet/nd.h> 65 #include <inet/ip.h> 66 #include <inet/ip6.h> 67 #include <inet/ip_multi.h> 68 #include <inet/ip_listutils.h> 69 70 #include <netinet/igmp.h> 71 #include <inet/ip_if.h> 72 #include <net/pfkeyv2.h> 73 #include <inet/ipsec_info.h> 74 75 static uint_t igmp_query_in(ipha_t *ipha, igmpa_t *igmpa, ill_t *ill); 76 static uint_t igmpv3_query_in(igmp3qa_t *igmp3qa, ill_t *ill, int igmplen); 77 static uint_t mld_query_in(mld_hdr_t *mldh, ill_t *ill); 78 static uint_t mldv2_query_in(mld2q_t *mld2q, ill_t *ill, int mldlen); 79 static void igmp_sendpkt(ilm_t *ilm, uchar_t type, ipaddr_t addr); 80 static void mld_sendpkt(ilm_t *ilm, uchar_t type, const in6_addr_t *v6addr); 81 static void igmpv3_sendrpt(ipif_t *ipif, mrec_t *reclist); 82 static void mldv2_sendrpt(ill_t *ill, mrec_t *reclist); 83 static mrec_t *mcast_bldmrec(mcast_record_t type, in6_addr_t *grp, 84 slist_t *srclist, mrec_t *next); 85 static void mcast_init_rtx(ill_t *ill, rtx_state_t *rtxp, 86 mcast_record_t rtype, slist_t *flist); 87 static mrec_t *mcast_merge_rtx(ilm_t *ilm, mrec_t *rp, slist_t *flist); 88 89 /* Following protected by igmp_timer_lock */ 90 static int igmp_time_to_next; /* Time since last timeout */ 91 static int igmp_timer_fired_last; 92 uint_t igmp_deferred_next = INFINITY; 93 timeout_id_t igmp_timeout_id = 0; 94 kmutex_t igmp_timer_lock; 95 96 /* Protected by igmp_slowtimeout_lock */ 97 timeout_id_t igmp_slowtimeout_id = 0; 98 kmutex_t igmp_slowtimeout_lock; 99 100 /* Following protected by mld_timer_lock */ 101 static int mld_time_to_next; /* Time since last timeout */ 102 static int mld_timer_fired_last; 103 uint_t mld_deferred_next = INFINITY; 104 timeout_id_t mld_timeout_id = 0; 105 kmutex_t mld_timer_lock; 106 107 /* Protected by mld_slowtimeout_lock */ 108 timeout_id_t mld_slowtimeout_id = 0; 109 kmutex_t mld_slowtimeout_lock; 110 111 /* 112 * Macros used to do timer len conversions. Timer values are always 113 * stored and passed to the timer functions as milliseconds; but the 114 * default values and values from the wire may not be. 115 * 116 * And yes, it's obscure, but decisecond is easier to abbreviate than 117 * "tenths of a second". 118 */ 119 #define DSEC_TO_MSEC(dsec) ((dsec) * 100) 120 #define SEC_TO_MSEC(sec) ((sec) * 1000) 121 122 /* 123 * The first multicast join will trigger the igmp timers / mld timers 124 * The unit for next is milliseconds. 125 */ 126 void 127 igmp_start_timers(unsigned next) 128 { 129 int time_left; 130 /* Protected by igmp_timer_lock */ 131 static boolean_t igmp_timer_setter_active; 132 int ret; 133 134 ASSERT(next != 0 && next != INFINITY); 135 136 mutex_enter(&igmp_timer_lock); 137 138 if (igmp_timer_setter_active) { 139 /* 140 * Serialize timer setters, one at a time. If the 141 * timer is currently being set by someone, 142 * just record the next time when it has to be 143 * invoked and return. The current setter will 144 * take care. 145 */ 146 igmp_time_to_next = MIN(igmp_time_to_next, next); 147 mutex_exit(&igmp_timer_lock); 148 return; 149 } else { 150 igmp_timer_setter_active = B_TRUE; 151 } 152 if (igmp_timeout_id == 0) { 153 /* 154 * The timer is inactive. We need to start a timer 155 */ 156 igmp_time_to_next = next; 157 igmp_timeout_id = timeout(igmp_timeout_handler, NULL, 158 MSEC_TO_TICK(igmp_time_to_next)); 159 igmp_timer_setter_active = B_FALSE; 160 mutex_exit(&igmp_timer_lock); 161 return; 162 } 163 164 /* 165 * The timer was scheduled sometime back for firing in 166 * 'igmp_time_to_next' ms and is active. We need to 167 * reschedule the timeout if the new 'next' will happen 168 * earlier than the currently scheduled timeout 169 */ 170 time_left = igmp_timer_fired_last + 171 MSEC_TO_TICK(igmp_time_to_next) - ddi_get_lbolt(); 172 if (time_left < MSEC_TO_TICK(next)) { 173 igmp_timer_setter_active = B_FALSE; 174 mutex_exit(&igmp_timer_lock); 175 return; 176 } 177 178 mutex_exit(&igmp_timer_lock); 179 ret = untimeout(igmp_timeout_id); 180 mutex_enter(&igmp_timer_lock); 181 /* 182 * The timeout was cancelled, or the timeout handler 183 * completed, while we were blocked in the untimeout. 184 * No other thread could have set the timer meanwhile 185 * since we serialized all the timer setters. Thus 186 * no timer is currently active nor executing nor will 187 * any timer fire in the future. We start the timer now 188 * if needed. 189 */ 190 if (ret == -1) { 191 ASSERT(igmp_timeout_id == 0); 192 } else { 193 ASSERT(igmp_timeout_id != 0); 194 igmp_timeout_id = 0; 195 } 196 if (igmp_time_to_next != 0) { 197 igmp_time_to_next = MIN(igmp_time_to_next, next); 198 igmp_timeout_id = timeout(igmp_timeout_handler, NULL, 199 MSEC_TO_TICK(igmp_time_to_next)); 200 } 201 igmp_timer_setter_active = B_FALSE; 202 mutex_exit(&igmp_timer_lock); 203 } 204 205 /* 206 * mld_start_timers: 207 * The unit for next is milliseconds. 208 */ 209 void 210 mld_start_timers(unsigned next) 211 { 212 int time_left; 213 /* Protedted by mld_timer_lock */ 214 static boolean_t mld_timer_setter_active; 215 int ret; 216 217 ASSERT(next != 0 && next != INFINITY); 218 219 mutex_enter(&mld_timer_lock); 220 if (mld_timer_setter_active) { 221 /* 222 * Serialize timer setters, one at a time. If the 223 * timer is currently being set by someone, 224 * just record the next time when it has to be 225 * invoked and return. The current setter will 226 * take care. 227 */ 228 mld_time_to_next = MIN(mld_time_to_next, next); 229 mutex_exit(&mld_timer_lock); 230 return; 231 } else { 232 mld_timer_setter_active = B_TRUE; 233 } 234 if (mld_timeout_id == 0) { 235 /* 236 * The timer is inactive. We need to start a timer 237 */ 238 mld_time_to_next = next; 239 mld_timeout_id = timeout(mld_timeout_handler, NULL, 240 MSEC_TO_TICK(mld_time_to_next)); 241 mld_timer_setter_active = B_FALSE; 242 mutex_exit(&mld_timer_lock); 243 return; 244 } 245 246 /* 247 * The timer was scheduled sometime back for firing in 248 * 'igmp_time_to_next' ms and is active. We need to 249 * reschedule the timeout if the new 'next' will happen 250 * earlier than the currently scheduled timeout 251 */ 252 time_left = mld_timer_fired_last + 253 MSEC_TO_TICK(mld_time_to_next) - ddi_get_lbolt(); 254 if (time_left < MSEC_TO_TICK(next)) { 255 mld_timer_setter_active = B_FALSE; 256 mutex_exit(&mld_timer_lock); 257 return; 258 } 259 260 mutex_exit(&mld_timer_lock); 261 ret = untimeout(mld_timeout_id); 262 mutex_enter(&mld_timer_lock); 263 /* 264 * The timeout was cancelled, or the timeout handler 265 * completed, while we were blocked in the untimeout. 266 * No other thread could have set the timer meanwhile 267 * since we serialized all the timer setters. Thus 268 * no timer is currently active nor executing nor will 269 * any timer fire in the future. We start the timer now 270 * if needed. 271 */ 272 if (ret == -1) { 273 ASSERT(mld_timeout_id == 0); 274 } else { 275 ASSERT(mld_timeout_id != 0); 276 mld_timeout_id = 0; 277 } 278 if (mld_time_to_next != 0) { 279 mld_time_to_next = MIN(mld_time_to_next, next); 280 mld_timeout_id = timeout(mld_timeout_handler, NULL, 281 MSEC_TO_TICK(mld_time_to_next)); 282 } 283 mld_timer_setter_active = B_FALSE; 284 mutex_exit(&mld_timer_lock); 285 } 286 287 /* 288 * igmp_input: 289 * Return 0 if the message is OK and should be handed to "raw" receivers. 290 * Callers of igmp_input() may need to reinitialize variables that were copied 291 * from the mblk as this calls pullupmsg(). 292 */ 293 /* ARGSUSED */ 294 int 295 igmp_input(queue_t *q, mblk_t *mp, ill_t *ill) 296 { 297 igmpa_t *igmpa; 298 ipha_t *ipha = (ipha_t *)(mp->b_rptr); 299 int iphlen, igmplen, mblklen; 300 ilm_t *ilm; 301 uint32_t src, dst; 302 uint32_t group; 303 uint_t next; 304 ipif_t *ipif; 305 306 ASSERT(ill != NULL); 307 ASSERT(!ill->ill_isv6); 308 ++igmpstat.igps_rcv_total; 309 310 mblklen = MBLKL(mp); 311 if (mblklen < 1 || mblklen < (iphlen = IPH_HDR_LENGTH(ipha))) { 312 ++igmpstat.igps_rcv_tooshort; 313 freemsg(mp); 314 return (-1); 315 } 316 igmplen = ntohs(ipha->ipha_length) - iphlen; 317 /* 318 * Since msg sizes are more variable with v3, just pullup the 319 * whole thing now. 320 */ 321 if (MBLKL(mp) < (igmplen + iphlen)) { 322 mblk_t *mp1; 323 if ((mp1 = msgpullup(mp, -1)) == NULL) { 324 ++igmpstat.igps_rcv_tooshort; 325 freemsg(mp); 326 return (-1); 327 } 328 freemsg(mp); 329 mp = mp1; 330 ipha = (ipha_t *)(mp->b_rptr); 331 } 332 333 /* 334 * Validate lengths 335 */ 336 if (igmplen < IGMP_MINLEN) { 337 ++igmpstat.igps_rcv_tooshort; 338 freemsg(mp); 339 return (-1); 340 } 341 /* 342 * Validate checksum 343 */ 344 if (IP_CSUM(mp, iphlen, 0)) { 345 ++igmpstat.igps_rcv_badsum; 346 freemsg(mp); 347 return (-1); 348 } 349 350 igmpa = (igmpa_t *)(&mp->b_rptr[iphlen]); 351 src = ipha->ipha_src; 352 dst = ipha->ipha_dst; 353 if (ip_debug > 1) 354 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 355 "igmp_input: src 0x%x, dst 0x%x on %s\n", 356 (int)ntohl(src), (int)ntohl(dst), 357 ill->ill_name); 358 359 switch (igmpa->igmpa_type) { 360 case IGMP_MEMBERSHIP_QUERY: 361 /* 362 * packet length differentiates between v1/v2 and v3 363 * v1/v2 should be exactly 8 octets long; v3 is >= 12 364 */ 365 if (igmplen == IGMP_MINLEN) { 366 next = igmp_query_in(ipha, igmpa, ill); 367 } else if (igmplen >= IGMP_V3_QUERY_MINLEN) { 368 next = igmpv3_query_in((igmp3qa_t *)igmpa, ill, 369 igmplen); 370 } else { 371 ++igmpstat.igps_rcv_tooshort; 372 freemsg(mp); 373 return (-1); 374 } 375 if (next == 0) { 376 freemsg(mp); 377 return (-1); 378 } 379 380 if (next != INFINITY) 381 igmp_start_timers(next); 382 383 break; 384 385 case IGMP_V1_MEMBERSHIP_REPORT: 386 case IGMP_V2_MEMBERSHIP_REPORT: 387 /* 388 * For fast leave to work, we have to know that we are the 389 * last person to send a report for this group. Reports 390 * generated by us are looped back since we could potentially 391 * be a multicast router, so discard reports sourced by me. 392 */ 393 mutex_enter(&ill->ill_lock); 394 for (ipif = ill->ill_ipif; ipif != NULL; 395 ipif = ipif->ipif_next) { 396 if (ipif->ipif_lcl_addr == src) { 397 if (ip_debug > 1) { 398 (void) mi_strlog(ill->ill_rq, 399 1, 400 SL_TRACE, 401 "igmp_input: we are only " 402 "member src 0x%x ipif_local 0x%x", 403 (int)ntohl(src), 404 (int) 405 ntohl(ipif->ipif_lcl_addr)); 406 } 407 mutex_exit(&ill->ill_lock); 408 return (0); 409 } 410 } 411 mutex_exit(&ill->ill_lock); 412 413 ++igmpstat.igps_rcv_reports; 414 group = igmpa->igmpa_group; 415 if (!CLASSD(group)) { 416 ++igmpstat.igps_rcv_badreports; 417 freemsg(mp); 418 return (-1); 419 } 420 421 /* 422 * KLUDGE: if the IP source address of the report has an 423 * unspecified (i.e., zero) subnet number, as is allowed for 424 * a booting host, replace it with the correct subnet number 425 * so that a process-level multicast routing demon can 426 * determine which subnet it arrived from. This is necessary 427 * to compensate for the lack of any way for a process to 428 * determine the arrival interface of an incoming packet. 429 * 430 * Requires that a copy of *this* message it passed up 431 * to the raw interface which is done by our caller. 432 */ 433 if ((src & htonl(0xFF000000U)) == 0) { /* Minimum net mask */ 434 /* Pick the first ipif on this ill */ 435 mutex_enter(&ill->ill_lock); 436 src = ill->ill_ipif->ipif_subnet; 437 mutex_exit(&ill->ill_lock); 438 ip1dbg(("igmp_input: changed src to 0x%x\n", 439 (int)ntohl(src))); 440 ipha->ipha_src = src; 441 } 442 443 /* 444 * If we belong to the group being reported, and 445 * we are a 'Delaying member' in the RFC terminology, 446 * stop our timer for that group and 'clear flag' i.e. 447 * mark as IGMP_OTHERMEMBER. Do this for all logical 448 * interfaces on the given physical interface. 449 */ 450 mutex_enter(&ill->ill_lock); 451 for (ipif = ill->ill_ipif; ipif != NULL; 452 ipif = ipif->ipif_next) { 453 ilm = ilm_lookup_ipif(ipif, group); 454 if (ilm != NULL) { 455 ++igmpstat.igps_rcv_ourreports; 456 ilm->ilm_timer = INFINITY; 457 ilm->ilm_state = IGMP_OTHERMEMBER; 458 } 459 } /* for */ 460 mutex_exit(&ill->ill_lock); 461 break; 462 463 case IGMP_V3_MEMBERSHIP_REPORT: 464 /* 465 * Currently nothing to do here; IGMP router is not 466 * implemented in ip, and v3 hosts don't pay attention 467 * to membership reports. 468 */ 469 break; 470 } 471 /* 472 * Pass all valid IGMP packets up to any process(es) listening 473 * on a raw IGMP socket. Do not free the packet. 474 */ 475 return (0); 476 } 477 478 static uint_t 479 igmp_query_in(ipha_t *ipha, igmpa_t *igmpa, ill_t *ill) 480 { 481 ilm_t *ilm; 482 int timer; 483 uint_t next; 484 485 ++igmpstat.igps_rcv_queries; 486 487 /* 488 * In the IGMPv2 specification, there are 3 states and a flag. 489 * 490 * In Non-Member state, we simply don't have a membership record. 491 * In Delaying Member state, our timer is running (ilm->ilm_timer 492 * < INFINITY). In Idle Member state, our timer is not running 493 * (ilm->ilm_timer == INFINITY). 494 * 495 * The flag is ilm->ilm_state, it is set to IGMP_OTHERMEMBER if 496 * we have heard a report from another member, or IGMP_IREPORTEDLAST 497 * if I sent the last report. 498 */ 499 if (igmpa->igmpa_code == 0) { 500 /* 501 * Query from an old router. 502 * Remember that the querier on this interface is old, 503 * and set the timer to the value in RFC 1112. 504 */ 505 506 507 mutex_enter(&ill->ill_lock); 508 ill->ill_mcast_v1_time = 0; 509 ill->ill_mcast_v1_tset = 1; 510 if (ill->ill_mcast_type != IGMP_V1_ROUTER) { 511 ip1dbg(("Received IGMPv1 Query on %s, switching mode " 512 "to IGMP_V1_ROUTER\n", ill->ill_name)); 513 atomic_add_16(&ill->ill_ifptr->illif_mcast_v1, 1); 514 ill->ill_mcast_type = IGMP_V1_ROUTER; 515 } 516 mutex_exit(&ill->ill_lock); 517 518 timer = SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY); 519 520 if (ipha->ipha_dst != htonl(INADDR_ALLHOSTS_GROUP) || 521 igmpa->igmpa_group != 0) { 522 ++igmpstat.igps_rcv_badqueries; 523 return (0); 524 } 525 526 } else { 527 in_addr_t group; 528 529 /* 530 * Query from a new router 531 * Simply do a validity check 532 */ 533 group = igmpa->igmpa_group; 534 if (group != 0 && (!CLASSD(group))) { 535 ++igmpstat.igps_rcv_badqueries; 536 return (0); 537 } 538 539 /* 540 * Switch interface state to v2 on receipt of a v2 query 541 * ONLY IF current state is v3. Let things be if current 542 * state if v1 but do reset the v2-querier-present timer. 543 */ 544 mutex_enter(&ill->ill_lock); 545 if (ill->ill_mcast_type == IGMP_V3_ROUTER) { 546 ip1dbg(("Received IGMPv2 Query on %s, switching mode " 547 "to IGMP_V2_ROUTER", ill->ill_name)); 548 atomic_add_16(&ill->ill_ifptr->illif_mcast_v2, 1); 549 ill->ill_mcast_type = IGMP_V2_ROUTER; 550 } 551 ill->ill_mcast_v2_time = 0; 552 ill->ill_mcast_v2_tset = 1; 553 mutex_exit(&ill->ill_lock); 554 555 timer = DSEC_TO_MSEC((int)igmpa->igmpa_code); 556 } 557 558 if (ip_debug > 1) { 559 mutex_enter(&ill->ill_lock); 560 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 561 "igmp_input: TIMER = igmp_code %d igmp_type 0x%x", 562 (int)ntohs(igmpa->igmpa_code), 563 (int)ntohs(igmpa->igmpa_type)); 564 mutex_exit(&ill->ill_lock); 565 } 566 567 /* 568 * -Start the timers in all of our membership records 569 * for the physical interface on which the query 570 * arrived, excluding those that belong to the "all 571 * hosts" group (224.0.0.1). 572 * 573 * -Restart any timer that is already running but has 574 * a value longer than the requested timeout. 575 * 576 * -Use the value specified in the query message as 577 * the maximum timeout. 578 */ 579 next = (unsigned)INFINITY; 580 mutex_enter(&ill->ill_lock); 581 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 582 583 /* 584 * A multicast router joins INADDR_ANY address 585 * to enable promiscuous reception of all 586 * mcasts from the interface. This INADDR_ANY 587 * is stored in the ilm_v6addr as V6 unspec addr 588 */ 589 if (!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr)) 590 continue; 591 if (ilm->ilm_addr == htonl(INADDR_ANY)) 592 continue; 593 if (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP) && 594 (igmpa->igmpa_group == 0) || 595 (igmpa->igmpa_group == ilm->ilm_addr)) { 596 if (ilm->ilm_timer > timer) { 597 MCAST_RANDOM_DELAY(ilm->ilm_timer, timer); 598 if (ilm->ilm_timer < next) 599 next = ilm->ilm_timer; 600 } 601 } 602 } 603 mutex_exit(&ill->ill_lock); 604 605 return (next); 606 } 607 608 static uint_t 609 igmpv3_query_in(igmp3qa_t *igmp3qa, ill_t *ill, int igmplen) 610 { 611 uint_t i, next, mrd, qqi, timer, delay, numsrc; 612 ilm_t *ilm; 613 ipaddr_t *src_array; 614 uint8_t qrv; 615 616 /* make sure numsrc matches packet size */ 617 numsrc = ntohs(igmp3qa->igmp3qa_numsrc); 618 if (igmplen < IGMP_V3_QUERY_MINLEN + (numsrc * sizeof (ipaddr_t))) { 619 ++igmpstat.igps_rcv_tooshort; 620 return (0); 621 } 622 src_array = (ipaddr_t *)&igmp3qa[1]; 623 624 ++igmpstat.igps_rcv_queries; 625 626 if ((mrd = (uint_t)igmp3qa->igmp3qa_mxrc) >= IGMP_V3_MAXRT_FPMIN) { 627 uint_t hdrval, mant, exp; 628 hdrval = (uint_t)igmp3qa->igmp3qa_mxrc; 629 mant = hdrval & IGMP_V3_MAXRT_MANT_MASK; 630 exp = (hdrval & IGMP_V3_MAXRT_EXP_MASK) >> 4; 631 mrd = (mant | 0x10) << (exp + 3); 632 } 633 if (mrd == 0) 634 mrd = MCAST_DEF_QUERY_RESP_INTERVAL; 635 timer = DSEC_TO_MSEC(mrd); 636 MCAST_RANDOM_DELAY(delay, timer); 637 next = (unsigned)INFINITY; 638 639 if ((qrv = igmp3qa->igmp3qa_sqrv & IGMP_V3_RV_MASK) == 0) 640 ill->ill_mcast_rv = MCAST_DEF_ROBUSTNESS; 641 else 642 ill->ill_mcast_rv = qrv; 643 644 if ((qqi = (uint_t)igmp3qa->igmp3qa_qqic) >= IGMP_V3_QQI_FPMIN) { 645 uint_t hdrval, mant, exp; 646 hdrval = (uint_t)igmp3qa->igmp3qa_qqic; 647 mant = hdrval & IGMP_V3_QQI_MANT_MASK; 648 exp = (hdrval & IGMP_V3_QQI_EXP_MASK) >> 4; 649 qqi = (mant | 0x10) << (exp + 3); 650 } 651 ill->ill_mcast_qi = (qqi == 0) ? MCAST_DEF_QUERY_INTERVAL : qqi; 652 653 /* 654 * If we have a pending general query response that's scheduled 655 * sooner than the delay we calculated for this response, then 656 * no action is required (RFC3376 section 5.2 rule 1) 657 */ 658 mutex_enter(&ill->ill_lock); 659 if (ill->ill_global_timer < delay) { 660 mutex_exit(&ill->ill_lock); 661 return (next); 662 } 663 mutex_exit(&ill->ill_lock); 664 665 /* 666 * Now take action depending upon query type: 667 * general, group specific, or group/source specific. 668 */ 669 if ((numsrc == 0) && (igmp3qa->igmp3qa_group == INADDR_ANY)) { 670 /* 671 * general query 672 * We know global timer is either not running or is 673 * greater than our calculated delay, so reset it to 674 * our delay (random value in range [0, response time]). 675 */ 676 mutex_enter(&ill->ill_lock); 677 ill->ill_global_timer = delay; 678 next = ill->ill_global_timer; 679 mutex_exit(&ill->ill_lock); 680 681 } else { 682 /* group or group/source specific query */ 683 mutex_enter(&ill->ill_lock); 684 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 685 if (!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr) || 686 (ilm->ilm_addr == htonl(INADDR_ANY)) || 687 (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) || 688 (igmp3qa->igmp3qa_group != ilm->ilm_addr)) 689 continue; 690 /* 691 * If the query is group specific or we have a 692 * pending group specific query, the response is 693 * group specific (pending sources list should be 694 * empty). Otherwise, need to update the pending 695 * sources list for the group and source specific 696 * response. 697 */ 698 if (numsrc == 0 || (ilm->ilm_timer < INFINITY && 699 SLIST_IS_EMPTY(ilm->ilm_pendsrcs))) { 700 group_query: 701 FREE_SLIST(ilm->ilm_pendsrcs); 702 ilm->ilm_pendsrcs = NULL; 703 } else { 704 boolean_t overflow; 705 slist_t *pktl; 706 if (numsrc > MAX_FILTER_SIZE || 707 (ilm->ilm_pendsrcs == NULL && 708 (ilm->ilm_pendsrcs = l_alloc()) == NULL)) { 709 /* 710 * We've been sent more sources than 711 * we can deal with; or we can't deal 712 * with a source list at all. Revert 713 * to a group specific query. 714 */ 715 goto group_query; 716 } 717 if ((pktl = l_alloc()) == NULL) 718 goto group_query; 719 pktl->sl_numsrc = numsrc; 720 for (i = 0; i < numsrc; i++) 721 IN6_IPADDR_TO_V4MAPPED(src_array[i], 722 &(pktl->sl_addr[i])); 723 l_union_in_a(ilm->ilm_pendsrcs, pktl, 724 &overflow); 725 l_free(pktl); 726 if (overflow) 727 goto group_query; 728 } 729 /* choose soonest timer */ 730 ilm->ilm_timer = MIN(ilm->ilm_timer, delay); 731 if (ilm->ilm_timer < next) 732 next = ilm->ilm_timer; 733 } 734 mutex_exit(&ill->ill_lock); 735 } 736 737 return (next); 738 } 739 740 void 741 igmp_joingroup(ilm_t *ilm) 742 { 743 ill_t *ill; 744 745 ill = ilm->ilm_ipif->ipif_ill; 746 747 ASSERT(IAM_WRITER_ILL(ill)); 748 ASSERT(ilm->ilm_ill == NULL && !ilm->ilm_ipif->ipif_isv6); 749 750 mutex_enter(&ill->ill_lock); 751 if (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) { 752 ilm->ilm_rtx.rtx_timer = INFINITY; 753 ilm->ilm_state = IGMP_OTHERMEMBER; 754 mutex_exit(&ill->ill_lock); 755 } else { 756 ip1dbg(("Querier mode %d, sending report, group %x\n", 757 ill->ill_mcast_type, htonl(ilm->ilm_addr))); 758 if (ill->ill_mcast_type == IGMP_V1_ROUTER) { 759 mutex_exit(&ill->ill_lock); 760 igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0); 761 mutex_enter(&ill->ill_lock); 762 } else if (ill->ill_mcast_type == IGMP_V2_ROUTER) { 763 mutex_exit(&ill->ill_lock); 764 igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0); 765 mutex_enter(&ill->ill_lock); 766 } else if (ill->ill_mcast_type == IGMP_V3_ROUTER) { 767 mrec_t *rp; 768 mcast_record_t rtype; 769 /* 770 * The possible state changes we need to handle here: 771 * Old State New State Report 772 * 773 * INCLUDE(0) INCLUDE(X) ALLOW(X),BLOCK(0) 774 * INCLUDE(0) EXCLUDE(X) TO_EX(X) 775 * 776 * No need to send the BLOCK(0) report; ALLOW(X) 777 * is enough. 778 */ 779 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ? 780 ALLOW_NEW_SOURCES : CHANGE_TO_EXCLUDE; 781 rp = mcast_bldmrec(rtype, &ilm->ilm_v6addr, 782 ilm->ilm_filter, NULL); 783 mutex_exit(&ill->ill_lock); 784 igmpv3_sendrpt(ilm->ilm_ipif, rp); 785 mutex_enter(&ill->ill_lock); 786 /* 787 * Set up retransmission state. Timer is set below, 788 * for both v3 and older versions. 789 */ 790 mcast_init_rtx(ill, &ilm->ilm_rtx, rtype, 791 ilm->ilm_filter); 792 } 793 794 /* Set the ilm timer value */ 795 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer, 796 SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY)); 797 ilm->ilm_state = IGMP_IREPORTEDLAST; 798 mutex_exit(&ill->ill_lock); 799 800 /* 801 * To avoid deadlock, we don't call igmp_start_timers from 802 * here. igmp_start_timers needs to call untimeout, and we 803 * can't hold the ipsq across untimeout since 804 * igmp_timeout_handler could be blocking trying to 805 * acquire the ipsq. Instead we start the timer after we get 806 * out of the ipsq in ipsq_exit. 807 */ 808 mutex_enter(&igmp_timer_lock); 809 igmp_deferred_next = MIN(ilm->ilm_rtx.rtx_timer, 810 igmp_deferred_next); 811 mutex_exit(&igmp_timer_lock); 812 } 813 814 if (ip_debug > 1) { 815 (void) mi_strlog(ilm->ilm_ipif->ipif_ill->ill_rq, 1, SL_TRACE, 816 "igmp_joingroup: multicast_type %d timer %d", 817 (ilm->ilm_ipif->ipif_ill->ill_mcast_type), 818 (int)ntohl(ilm->ilm_rtx.rtx_timer)); 819 } 820 } 821 822 void 823 mld_joingroup(ilm_t *ilm) 824 { 825 ill_t *ill; 826 827 ill = ilm->ilm_ill; 828 829 ASSERT(IAM_WRITER_ILL(ill)); 830 ASSERT(ilm->ilm_ipif == NULL && ill->ill_isv6); 831 832 mutex_enter(&ill->ill_lock); 833 if (IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr)) { 834 ilm->ilm_rtx.rtx_timer = INFINITY; 835 ilm->ilm_state = IGMP_OTHERMEMBER; 836 mutex_exit(&ill->ill_lock); 837 } else { 838 if (ill->ill_mcast_type == MLD_V1_ROUTER) { 839 mutex_exit(&ill->ill_lock); 840 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL); 841 mutex_enter(&ill->ill_lock); 842 } else { 843 mrec_t *rp; 844 mcast_record_t rtype; 845 /* 846 * The possible state changes we need to handle here: 847 * Old State New State Report 848 * 849 * INCLUDE(0) INCLUDE(X) ALLOW(X),BLOCK(0) 850 * INCLUDE(0) EXCLUDE(X) TO_EX(X) 851 * 852 * No need to send the BLOCK(0) report; ALLOW(X) 853 * is enough 854 */ 855 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ? 856 ALLOW_NEW_SOURCES : CHANGE_TO_EXCLUDE; 857 rp = mcast_bldmrec(rtype, &ilm->ilm_v6addr, 858 ilm->ilm_filter, NULL); 859 mutex_exit(&ill->ill_lock); 860 mldv2_sendrpt(ill, rp); 861 mutex_enter(&ill->ill_lock); 862 /* 863 * Set up retransmission state. Timer is set below, 864 * for both v2 and v1. 865 */ 866 mcast_init_rtx(ill, &ilm->ilm_rtx, rtype, 867 ilm->ilm_filter); 868 } 869 870 /* Set the ilm timer value */ 871 ASSERT(ill->ill_mcast_type != MLD_V2_ROUTER || 872 ilm->ilm_rtx.rtx_cnt > 0); 873 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer, 874 SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY)); 875 ilm->ilm_state = IGMP_IREPORTEDLAST; 876 mutex_exit(&ill->ill_lock); 877 878 /* 879 * To avoid deadlock, we don't call mld_start_timers from 880 * here. mld_start_timers needs to call untimeout, and we 881 * can't hold the ipsq (i.e. the lock) across untimeout 882 * since mld_timeout_handler could be blocking trying to 883 * acquire the ipsq. Instead we start the timer after we get 884 * out of the ipsq in ipsq_exit 885 */ 886 mutex_enter(&mld_timer_lock); 887 mld_deferred_next = MIN(ilm->ilm_rtx.rtx_timer, 888 mld_deferred_next); 889 mutex_exit(&mld_timer_lock); 890 } 891 892 if (ip_debug > 1) { 893 (void) mi_strlog(ilm->ilm_ill->ill_rq, 1, SL_TRACE, 894 "mld_joingroup: multicast_type %d timer %d", 895 (ilm->ilm_ill->ill_mcast_type), 896 (int)ntohl(ilm->ilm_rtx.rtx_timer)); 897 } 898 } 899 900 void 901 igmp_leavegroup(ilm_t *ilm) 902 { 903 ill_t *ill = ilm->ilm_ipif->ipif_ill; 904 905 ASSERT(ilm->ilm_ill == NULL); 906 ASSERT(!ill->ill_isv6); 907 908 mutex_enter(&ill->ill_lock); 909 if (ilm->ilm_state == IGMP_IREPORTEDLAST && 910 ill->ill_mcast_type == IGMP_V2_ROUTER && 911 (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP))) { 912 mutex_exit(&ill->ill_lock); 913 igmp_sendpkt(ilm, IGMP_V2_LEAVE_GROUP, 914 (htonl(INADDR_ALLRTRS_GROUP))); 915 return; 916 } else if ((ill->ill_mcast_type == IGMP_V3_ROUTER) && 917 (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP))) { 918 mrec_t *rp; 919 /* 920 * The possible state changes we need to handle here: 921 * Old State New State Report 922 * 923 * INCLUDE(X) INCLUDE(0) ALLOW(0),BLOCK(X) 924 * EXCLUDE(X) INCLUDE(0) TO_IN(0) 925 * 926 * No need to send the ALLOW(0) report; BLOCK(X) is enough 927 */ 928 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 929 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr, 930 ilm->ilm_filter, NULL); 931 } else { 932 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, 933 NULL, NULL); 934 } 935 mutex_exit(&ill->ill_lock); 936 igmpv3_sendrpt(ilm->ilm_ipif, rp); 937 return; 938 } 939 mutex_exit(&ill->ill_lock); 940 } 941 942 void 943 mld_leavegroup(ilm_t *ilm) 944 { 945 ill_t *ill = ilm->ilm_ill; 946 947 ASSERT(ilm->ilm_ipif == NULL); 948 ASSERT(ill->ill_isv6); 949 950 mutex_enter(&ill->ill_lock); 951 if (ilm->ilm_state == IGMP_IREPORTEDLAST && 952 ill->ill_mcast_type == MLD_V1_ROUTER && 953 (!IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr))) { 954 mutex_exit(&ill->ill_lock); 955 mld_sendpkt(ilm, MLD_LISTENER_REDUCTION, &ipv6_all_rtrs_mcast); 956 return; 957 } else if ((ill->ill_mcast_type == MLD_V2_ROUTER) && 958 (!IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr))) { 959 mrec_t *rp; 960 /* 961 * The possible state changes we need to handle here: 962 * Old State New State Report 963 * 964 * INCLUDE(X) INCLUDE(0) ALLOW(0),BLOCK(X) 965 * EXCLUDE(X) INCLUDE(0) TO_IN(0) 966 * 967 * No need to send the ALLOW(0) report; BLOCK(X) is enough 968 */ 969 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 970 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr, 971 ilm->ilm_filter, NULL); 972 } else { 973 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, 974 NULL, NULL); 975 } 976 mutex_exit(&ill->ill_lock); 977 mldv2_sendrpt(ill, rp); 978 return; 979 } 980 mutex_exit(&ill->ill_lock); 981 } 982 983 void 984 igmp_statechange(ilm_t *ilm, mcast_record_t fmode, slist_t *flist) 985 { 986 ill_t *ill; 987 mrec_t *rp; 988 989 ASSERT(ilm != NULL); 990 991 /* state change reports should only be sent if the router is v3 */ 992 if (ilm->ilm_ipif->ipif_ill->ill_mcast_type != IGMP_V3_ROUTER) 993 return; 994 995 if (ilm->ilm_ill == NULL) { 996 ASSERT(ilm->ilm_ipif != NULL); 997 ill = ilm->ilm_ipif->ipif_ill; 998 } else { 999 ill = ilm->ilm_ill; 1000 } 1001 1002 mutex_enter(&ill->ill_lock); 1003 1004 /* 1005 * Compare existing(old) state with the new state and prepare 1006 * State Change Report, according to the rules in RFC 3376: 1007 * 1008 * Old State New State State Change Report 1009 * 1010 * INCLUDE(A) INCLUDE(B) ALLOW(B-A),BLOCK(A-B) 1011 * EXCLUDE(A) EXCLUDE(B) ALLOW(A-B),BLOCK(B-A) 1012 * INCLUDE(A) EXCLUDE(B) TO_EX(B) 1013 * EXCLUDE(A) INCLUDE(B) TO_IN(B) 1014 */ 1015 1016 if (ilm->ilm_fmode == fmode) { 1017 slist_t *a_minus_b = NULL, *b_minus_a = NULL; 1018 slist_t *allow, *block; 1019 if (((a_minus_b = l_alloc()) == NULL) || 1020 ((b_minus_a = l_alloc()) == NULL)) { 1021 l_free(a_minus_b); 1022 if (ilm->ilm_fmode == MODE_IS_INCLUDE) 1023 goto send_to_ex; 1024 else 1025 goto send_to_in; 1026 } 1027 l_difference(ilm->ilm_filter, flist, a_minus_b); 1028 l_difference(flist, ilm->ilm_filter, b_minus_a); 1029 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1030 allow = b_minus_a; 1031 block = a_minus_b; 1032 } else { 1033 allow = a_minus_b; 1034 block = b_minus_a; 1035 } 1036 rp = NULL; 1037 if (!SLIST_IS_EMPTY(allow)) 1038 rp = mcast_bldmrec(ALLOW_NEW_SOURCES, &ilm->ilm_v6addr, 1039 allow, rp); 1040 if (!SLIST_IS_EMPTY(block)) 1041 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr, 1042 block, rp); 1043 l_free(a_minus_b); 1044 l_free(b_minus_a); 1045 } else if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1046 send_to_ex: 1047 rp = mcast_bldmrec(CHANGE_TO_EXCLUDE, &ilm->ilm_v6addr, flist, 1048 NULL); 1049 } else { 1050 send_to_in: 1051 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, flist, 1052 NULL); 1053 } 1054 1055 /* 1056 * Need to set up retransmission state; merge the new info with the 1057 * current state (which may be null). If the timer is not currently 1058 * running, start it (need to do a delayed start of the timer as 1059 * we're currently in the sq). 1060 */ 1061 rp = mcast_merge_rtx(ilm, rp, flist); 1062 if (ilm->ilm_rtx.rtx_timer == INFINITY) { 1063 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer, 1064 SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY)); 1065 mutex_enter(&igmp_timer_lock); 1066 igmp_deferred_next = MIN(igmp_deferred_next, 1067 ilm->ilm_rtx.rtx_timer); 1068 mutex_exit(&igmp_timer_lock); 1069 } 1070 1071 mutex_exit(&ill->ill_lock); 1072 igmpv3_sendrpt(ilm->ilm_ipif, rp); 1073 } 1074 1075 void 1076 mld_statechange(ilm_t *ilm, mcast_record_t fmode, slist_t *flist) 1077 { 1078 ill_t *ill; 1079 mrec_t *rp = NULL; 1080 1081 ASSERT(ilm != NULL); 1082 1083 ill = ilm->ilm_ill; 1084 1085 /* only need to send if we have an mldv2-capable router */ 1086 mutex_enter(&ill->ill_lock); 1087 if (ill->ill_mcast_type != MLD_V2_ROUTER) { 1088 mutex_exit(&ill->ill_lock); 1089 return; 1090 } 1091 1092 /* 1093 * Compare existing (old) state with the new state passed in 1094 * and send appropriate MLDv2 State Change Report. 1095 * 1096 * Old State New State State Change Report 1097 * 1098 * INCLUDE(A) INCLUDE(B) ALLOW(B-A),BLOCK(A-B) 1099 * EXCLUDE(A) EXCLUDE(B) ALLOW(A-B),BLOCK(B-A) 1100 * INCLUDE(A) EXCLUDE(B) TO_EX(B) 1101 * EXCLUDE(A) INCLUDE(B) TO_IN(B) 1102 */ 1103 if (ilm->ilm_fmode == fmode) { 1104 slist_t *a_minus_b = NULL, *b_minus_a = NULL; 1105 slist_t *allow, *block; 1106 if (((a_minus_b = l_alloc()) == NULL) || 1107 ((b_minus_a = l_alloc()) == NULL)) { 1108 l_free(a_minus_b); 1109 if (ilm->ilm_fmode == MODE_IS_INCLUDE) 1110 goto send_to_ex; 1111 else 1112 goto send_to_in; 1113 } 1114 l_difference(ilm->ilm_filter, flist, a_minus_b); 1115 l_difference(flist, ilm->ilm_filter, b_minus_a); 1116 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1117 allow = b_minus_a; 1118 block = a_minus_b; 1119 } else { 1120 allow = a_minus_b; 1121 block = b_minus_a; 1122 } 1123 if (!SLIST_IS_EMPTY(allow)) 1124 rp = mcast_bldmrec(ALLOW_NEW_SOURCES, &ilm->ilm_v6addr, 1125 allow, rp); 1126 if (!SLIST_IS_EMPTY(block)) 1127 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr, 1128 block, rp); 1129 l_free(a_minus_b); 1130 l_free(b_minus_a); 1131 } else if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1132 send_to_ex: 1133 rp = mcast_bldmrec(CHANGE_TO_EXCLUDE, &ilm->ilm_v6addr, flist, 1134 NULL); 1135 } else { 1136 send_to_in: 1137 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, flist, 1138 NULL); 1139 } 1140 1141 /* 1142 * Need to set up retransmission state; merge the new info with the 1143 * current state (which may be null). If the timer is not currently 1144 * running, start it (need to do a deferred start of the timer as 1145 * we're currently in the sq). 1146 */ 1147 rp = mcast_merge_rtx(ilm, rp, flist); 1148 ASSERT(ilm->ilm_rtx.rtx_cnt > 0); 1149 if (ilm->ilm_rtx.rtx_timer == INFINITY) { 1150 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer, 1151 SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY)); 1152 mutex_enter(&mld_timer_lock); 1153 mld_deferred_next = 1154 MIN(mld_deferred_next, ilm->ilm_rtx.rtx_timer); 1155 mutex_exit(&mld_timer_lock); 1156 } 1157 1158 mutex_exit(&ill->ill_lock); 1159 mldv2_sendrpt(ill, rp); 1160 } 1161 1162 uint_t 1163 igmp_timeout_handler_per_ill(ill_t *ill, int elapsed) 1164 { 1165 uint_t next = INFINITY; 1166 ilm_t *ilm; 1167 ipif_t *ipif; 1168 mrec_t *rp = NULL; 1169 mrec_t *rtxrp = NULL; 1170 rtx_state_t *rtxp; 1171 mcast_record_t rtype; 1172 1173 ASSERT(IAM_WRITER_ILL(ill)); 1174 1175 mutex_enter(&ill->ill_lock); 1176 1177 /* First check the global timer on this interface */ 1178 if (ill->ill_global_timer == INFINITY) 1179 goto per_ilm_timer; 1180 if (ill->ill_global_timer <= elapsed) { 1181 ill->ill_global_timer = INFINITY; 1182 /* 1183 * Send report for each group on this interface. 1184 * Since we just set the global timer (received a v3 general 1185 * query), need to skip the all hosts addr (224.0.0.1), per 1186 * RFC 3376 section 5. 1187 */ 1188 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1189 if (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) 1190 continue; 1191 ASSERT(ilm->ilm_ipif != NULL); 1192 ilm->ilm_ipif->ipif_igmp_rpt = 1193 mcast_bldmrec(ilm->ilm_fmode, &ilm->ilm_v6addr, 1194 ilm->ilm_filter, ilm->ilm_ipif->ipif_igmp_rpt); 1195 /* 1196 * Since we're sending a report on this group, okay 1197 * to delete pending group-specific timers. Note 1198 * that group-specific retransmit timers still need 1199 * to be checked in the per_ilm_timer for-loop. 1200 */ 1201 ilm->ilm_timer = INFINITY; 1202 ilm->ilm_state = IGMP_IREPORTEDLAST; 1203 FREE_SLIST(ilm->ilm_pendsrcs); 1204 ilm->ilm_pendsrcs = NULL; 1205 } 1206 /* 1207 * We've built per-ipif mrec lists; walk the ill's ipif list 1208 * and send a report for each ipif that has an mrec list. 1209 */ 1210 for (ipif = ill->ill_ipif; ipif != NULL; 1211 ipif = ipif->ipif_next) { 1212 if (ipif->ipif_igmp_rpt == NULL) 1213 continue; 1214 mutex_exit(&ill->ill_lock); 1215 igmpv3_sendrpt(ipif, ipif->ipif_igmp_rpt); 1216 mutex_enter(&ill->ill_lock); 1217 /* mrec list was freed by igmpv3_sendrpt() */ 1218 ipif->ipif_igmp_rpt = NULL; 1219 } 1220 } else { 1221 ill->ill_global_timer -= elapsed; 1222 if (ill->ill_global_timer < next) 1223 next = ill->ill_global_timer; 1224 } 1225 1226 per_ilm_timer: 1227 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1228 if (ilm->ilm_timer == INFINITY) 1229 goto per_ilm_rtxtimer; 1230 1231 if (ilm->ilm_timer > elapsed) { 1232 ilm->ilm_timer -= elapsed; 1233 if (ilm->ilm_timer < next) 1234 next = ilm->ilm_timer; 1235 1236 if (ip_debug > 1) { 1237 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 1238 "igmp_timo_hlr 2: ilm_timr %d elap %d " 1239 "typ %d nxt %d", 1240 (int)ntohl(ilm->ilm_timer), elapsed, 1241 (ill->ill_mcast_type), next); 1242 } 1243 1244 goto per_ilm_rtxtimer; 1245 } 1246 1247 /* the timer has expired, need to take action */ 1248 ilm->ilm_timer = INFINITY; 1249 ilm->ilm_state = IGMP_IREPORTEDLAST; 1250 if (ill->ill_mcast_type == IGMP_V1_ROUTER) { 1251 mutex_exit(&ill->ill_lock); 1252 igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0); 1253 mutex_enter(&ill->ill_lock); 1254 } else if (ill->ill_mcast_type == IGMP_V2_ROUTER) { 1255 mutex_exit(&ill->ill_lock); 1256 igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0); 1257 mutex_enter(&ill->ill_lock); 1258 } else { 1259 slist_t *rsp; 1260 if (!SLIST_IS_EMPTY(ilm->ilm_pendsrcs) && 1261 (rsp = l_alloc()) != NULL) { 1262 /* 1263 * Contents of reply depend on pending 1264 * requested source list. 1265 */ 1266 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1267 l_intersection(ilm->ilm_filter, 1268 ilm->ilm_pendsrcs, rsp); 1269 } else { 1270 l_difference(ilm->ilm_pendsrcs, 1271 ilm->ilm_filter, rsp); 1272 } 1273 FREE_SLIST(ilm->ilm_pendsrcs); 1274 ilm->ilm_pendsrcs = NULL; 1275 if (!SLIST_IS_EMPTY(rsp)) 1276 rp = mcast_bldmrec(MODE_IS_INCLUDE, 1277 &ilm->ilm_v6addr, rsp, rp); 1278 FREE_SLIST(rsp); 1279 } else { 1280 /* 1281 * Either the pending request is just group- 1282 * specific, or we couldn't get the resources 1283 * (rsp) to build a source-specific reply. 1284 */ 1285 rp = mcast_bldmrec(ilm->ilm_fmode, 1286 &ilm->ilm_v6addr, ilm->ilm_filter, rp); 1287 } 1288 mutex_exit(&ill->ill_lock); 1289 igmpv3_sendrpt(ill->ill_ipif, rp); 1290 mutex_enter(&ill->ill_lock); 1291 rp = NULL; 1292 } 1293 1294 if (ip_debug > 1) { 1295 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 1296 "igmp_timo_hlr 1: ilm_timr %d elap %d " 1297 "typ %d nxt %d", 1298 (int)ntohl(ilm->ilm_timer), elapsed, 1299 (ill->ill_mcast_type), next); 1300 } 1301 1302 per_ilm_rtxtimer: 1303 rtxp = &ilm->ilm_rtx; 1304 1305 if (rtxp->rtx_timer == INFINITY) 1306 continue; 1307 if (rtxp->rtx_timer > elapsed) { 1308 rtxp->rtx_timer -= elapsed; 1309 if (rtxp->rtx_timer < next) 1310 next = rtxp->rtx_timer; 1311 continue; 1312 } 1313 1314 rtxp->rtx_timer = INFINITY; 1315 ilm->ilm_state = IGMP_IREPORTEDLAST; 1316 if (ill->ill_mcast_type == IGMP_V1_ROUTER) { 1317 mutex_exit(&ill->ill_lock); 1318 igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0); 1319 mutex_enter(&ill->ill_lock); 1320 continue; 1321 } else if (ill->ill_mcast_type == IGMP_V2_ROUTER) { 1322 mutex_exit(&ill->ill_lock); 1323 igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0); 1324 mutex_enter(&ill->ill_lock); 1325 continue; 1326 } 1327 1328 /* 1329 * The retransmit timer has popped, and our router is 1330 * IGMPv3. We have to delve into the retransmit state 1331 * stored in the ilm. 1332 * 1333 * Decrement the retransmit count. If the fmode rtx 1334 * count is active, decrement it, and send a filter 1335 * mode change report with the ilm's source list. 1336 * Otherwise, send a source list change report with 1337 * the current retransmit lists. 1338 */ 1339 ASSERT(rtxp->rtx_cnt > 0); 1340 ASSERT(rtxp->rtx_cnt >= rtxp->rtx_fmode_cnt); 1341 rtxp->rtx_cnt--; 1342 if (rtxp->rtx_fmode_cnt > 0) { 1343 rtxp->rtx_fmode_cnt--; 1344 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ? 1345 CHANGE_TO_INCLUDE : CHANGE_TO_EXCLUDE; 1346 rtxrp = mcast_bldmrec(rtype, &ilm->ilm_v6addr, 1347 ilm->ilm_filter, rtxrp); 1348 } else { 1349 rtxrp = mcast_bldmrec(ALLOW_NEW_SOURCES, 1350 &ilm->ilm_v6addr, rtxp->rtx_allow, rtxrp); 1351 rtxrp = mcast_bldmrec(BLOCK_OLD_SOURCES, 1352 &ilm->ilm_v6addr, rtxp->rtx_block, rtxrp); 1353 } 1354 if (rtxp->rtx_cnt > 0) { 1355 MCAST_RANDOM_DELAY(rtxp->rtx_timer, 1356 SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY)); 1357 if (rtxp->rtx_timer < next) 1358 next = rtxp->rtx_timer; 1359 } else { 1360 CLEAR_SLIST(rtxp->rtx_allow); 1361 CLEAR_SLIST(rtxp->rtx_block); 1362 } 1363 mutex_exit(&ill->ill_lock); 1364 igmpv3_sendrpt(ilm->ilm_ipif, rtxrp); 1365 mutex_enter(&ill->ill_lock); 1366 rtxrp = NULL; 1367 } 1368 1369 mutex_exit(&ill->ill_lock); 1370 1371 return (next); 1372 } 1373 1374 /* 1375 * igmp_timeout_handler: 1376 * Called when there are timeout events, every next * TMEOUT_INTERVAL (tick). 1377 * Returns number of ticks to next event (or 0 if none). 1378 * 1379 * As part of multicast join and leave igmp we may need to send out an 1380 * igmp request. The igmp related state variables in the ilm are protected 1381 * by ill_lock. A single global igmp timer is used to track igmp timeouts. 1382 * igmp_timer_lock protects the global igmp_timeout_id. igmp_start_timers 1383 * starts the igmp timer if needed. It serializes multiple threads trying to 1384 * simultaneously start the timer using the igmp_timer_setter_active flag. 1385 * 1386 * igmp_input() receives igmp queries and responds to the queries 1387 * in a delayed fashion by posting a timer i.e. it calls igmp_start_timers(). 1388 * Later the igmp_timer fires, the timeout handler igmp_timerout_handler() 1389 * performs the action exclusively after entering each ill's ipsq as writer. 1390 * The actual igmp timeout handler needs to run in the ipsq since it has to 1391 * access the ilm's and we don't want another exclusive operation like 1392 * say an IPMP failover to be simultaneously moving the ilms from one ill to 1393 * another. 1394 * 1395 * The igmp_slowtimeo() function is called thru another timer. 1396 * igmp_slowtimeout_lock protects the igmp_slowtimeout_id 1397 */ 1398 1399 /* ARGSUSED */ 1400 void 1401 igmp_timeout_handler(void *arg) 1402 { 1403 ill_t *ill; 1404 int elapsed; /* Since last call */ 1405 uint_t global_next = INFINITY; 1406 uint_t next; 1407 ill_walk_context_t ctx; 1408 boolean_t success; 1409 1410 mutex_enter(&igmp_timer_lock); 1411 ASSERT(igmp_timeout_id != 0); 1412 igmp_timer_fired_last = ddi_get_lbolt(); 1413 elapsed = igmp_time_to_next; 1414 igmp_time_to_next = 0; 1415 mutex_exit(&igmp_timer_lock); 1416 1417 rw_enter(&ill_g_lock, RW_READER); 1418 ill = ILL_START_WALK_V4(&ctx); 1419 for (; ill != NULL; ill = ill_next(&ctx, ill)) { 1420 ASSERT(!ill->ill_isv6); 1421 /* 1422 * We may not be able to refhold the ill if the ill/ipif 1423 * is changing. But we need to make sure that the ill will 1424 * not vanish. So we just bump up the ill_waiter count. 1425 */ 1426 if (!ill_waiter_inc(ill)) 1427 continue; 1428 rw_exit(&ill_g_lock); 1429 success = ipsq_enter(ill, B_TRUE); 1430 if (success) { 1431 next = igmp_timeout_handler_per_ill(ill, elapsed); 1432 if (next < global_next) 1433 global_next = next; 1434 ipsq_exit(ill->ill_phyint->phyint_ipsq, B_FALSE, 1435 B_TRUE); 1436 } 1437 rw_enter(&ill_g_lock, RW_READER); 1438 ill_waiter_dcr(ill); 1439 } 1440 rw_exit(&ill_g_lock); 1441 1442 mutex_enter(&igmp_timer_lock); 1443 ASSERT(igmp_timeout_id != 0); 1444 igmp_timeout_id = 0; 1445 mutex_exit(&igmp_timer_lock); 1446 1447 if (global_next != INFINITY) 1448 igmp_start_timers(global_next); 1449 } 1450 1451 /* 1452 * mld_timeout_handler: 1453 * Called when there are timeout events, every next (tick). 1454 * Returns number of ticks to next event (or 0 if none). 1455 */ 1456 /* ARGSUSED */ 1457 uint_t 1458 mld_timeout_handler_per_ill(ill_t *ill, int elapsed) 1459 { 1460 ilm_t *ilm; 1461 uint_t next = INFINITY; 1462 mrec_t *rp, *rtxrp; 1463 rtx_state_t *rtxp; 1464 mcast_record_t rtype; 1465 1466 ASSERT(IAM_WRITER_ILL(ill)); 1467 1468 mutex_enter(&ill->ill_lock); 1469 1470 /* 1471 * First check the global timer on this interface; the global timer 1472 * is not used for MLDv1, so if it's set we can assume we're v2. 1473 */ 1474 if (ill->ill_global_timer == INFINITY) 1475 goto per_ilm_timer; 1476 if (ill->ill_global_timer <= elapsed) { 1477 ill->ill_global_timer = INFINITY; 1478 /* 1479 * Send report for each group on this interface. 1480 * Since we just set the global timer (received a v2 general 1481 * query), need to skip the all hosts addr (ff02::1), per 1482 * RFC 3810 section 6. 1483 */ 1484 rp = NULL; 1485 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1486 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, 1487 &ipv6_all_hosts_mcast)) 1488 continue; 1489 rp = mcast_bldmrec(ilm->ilm_fmode, &ilm->ilm_v6addr, 1490 ilm->ilm_filter, rp); 1491 /* 1492 * Since we're sending a report on this group, okay 1493 * to delete pending group-specific timers. Note 1494 * that group-specific retransmit timers still need 1495 * to be checked in the per_ilm_timer for-loop. 1496 */ 1497 ilm->ilm_timer = INFINITY; 1498 ilm->ilm_state = IGMP_IREPORTEDLAST; 1499 FREE_SLIST(ilm->ilm_pendsrcs); 1500 ilm->ilm_pendsrcs = NULL; 1501 } 1502 mutex_exit(&ill->ill_lock); 1503 mldv2_sendrpt(ill, rp); 1504 mutex_enter(&ill->ill_lock); 1505 } else { 1506 ill->ill_global_timer -= elapsed; 1507 if (ill->ill_global_timer < next) 1508 next = ill->ill_global_timer; 1509 } 1510 1511 per_ilm_timer: 1512 rp = rtxrp = NULL; 1513 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1514 if (ilm->ilm_timer == INFINITY) 1515 goto per_ilm_rtxtimer; 1516 1517 if (ilm->ilm_timer > elapsed) { 1518 ilm->ilm_timer -= elapsed; 1519 if (ilm->ilm_timer < next) 1520 next = ilm->ilm_timer; 1521 1522 if (ip_debug > 1) { 1523 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 1524 "igmp_timo_hlr 2: ilm_timr" 1525 " %d elap %d typ %d nxt %d", 1526 (int)ntohl(ilm->ilm_timer), elapsed, 1527 (ill->ill_mcast_type), next); 1528 } 1529 1530 goto per_ilm_rtxtimer; 1531 } 1532 1533 /* the timer has expired, need to take action */ 1534 ilm->ilm_timer = INFINITY; 1535 ilm->ilm_state = IGMP_IREPORTEDLAST; 1536 if (ill->ill_mcast_type == MLD_V1_ROUTER) { 1537 mutex_exit(&ill->ill_lock); 1538 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL); 1539 mutex_enter(&ill->ill_lock); 1540 } else { 1541 slist_t *rsp; 1542 if (!SLIST_IS_EMPTY(ilm->ilm_pendsrcs) && 1543 (rsp = l_alloc()) != NULL) { 1544 /* 1545 * Contents of reply depend on pending 1546 * requested source list. 1547 */ 1548 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1549 l_intersection(ilm->ilm_filter, 1550 ilm->ilm_pendsrcs, rsp); 1551 } else { 1552 l_difference(ilm->ilm_pendsrcs, 1553 ilm->ilm_filter, rsp); 1554 } 1555 FREE_SLIST(ilm->ilm_pendsrcs); 1556 ilm->ilm_pendsrcs = NULL; 1557 if (!SLIST_IS_EMPTY(rsp)) 1558 rp = mcast_bldmrec(MODE_IS_INCLUDE, 1559 &ilm->ilm_v6addr, rsp, rp); 1560 FREE_SLIST(rsp); 1561 } else { 1562 rp = mcast_bldmrec(ilm->ilm_fmode, 1563 &ilm->ilm_v6addr, ilm->ilm_filter, rp); 1564 } 1565 } 1566 1567 if (ip_debug > 1) { 1568 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 1569 "igmp_timo_hlr 1: ilm_timr %d elap %d " 1570 "typ %d nxt %d", 1571 (int)ntohl(ilm->ilm_timer), elapsed, 1572 (ill->ill_mcast_type), next); 1573 } 1574 1575 per_ilm_rtxtimer: 1576 rtxp = &ilm->ilm_rtx; 1577 1578 if (rtxp->rtx_timer == INFINITY) 1579 continue; 1580 if (rtxp->rtx_timer > elapsed) { 1581 rtxp->rtx_timer -= elapsed; 1582 if (rtxp->rtx_timer < next) 1583 next = rtxp->rtx_timer; 1584 continue; 1585 } 1586 1587 rtxp->rtx_timer = INFINITY; 1588 ilm->ilm_state = IGMP_IREPORTEDLAST; 1589 if (ill->ill_mcast_type == MLD_V1_ROUTER) { 1590 mutex_exit(&ill->ill_lock); 1591 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL); 1592 mutex_enter(&ill->ill_lock); 1593 continue; 1594 } 1595 1596 /* 1597 * The retransmit timer has popped, and our router is 1598 * MLDv2. We have to delve into the retransmit state 1599 * stored in the ilm. 1600 * 1601 * Decrement the retransmit count. If the fmode rtx 1602 * count is active, decrement it, and send a filter 1603 * mode change report with the ilm's source list. 1604 * Otherwise, send a source list change report with 1605 * the current retransmit lists. 1606 */ 1607 ASSERT(rtxp->rtx_cnt > 0); 1608 ASSERT(rtxp->rtx_cnt >= rtxp->rtx_fmode_cnt); 1609 rtxp->rtx_cnt--; 1610 if (rtxp->rtx_fmode_cnt > 0) { 1611 rtxp->rtx_fmode_cnt--; 1612 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ? 1613 CHANGE_TO_INCLUDE : CHANGE_TO_EXCLUDE; 1614 rtxrp = mcast_bldmrec(rtype, &ilm->ilm_v6addr, 1615 ilm->ilm_filter, rtxrp); 1616 } else { 1617 rtxrp = mcast_bldmrec(ALLOW_NEW_SOURCES, 1618 &ilm->ilm_v6addr, rtxp->rtx_allow, rtxrp); 1619 rtxrp = mcast_bldmrec(BLOCK_OLD_SOURCES, 1620 &ilm->ilm_v6addr, rtxp->rtx_block, rtxrp); 1621 } 1622 if (rtxp->rtx_cnt > 0) { 1623 MCAST_RANDOM_DELAY(rtxp->rtx_timer, 1624 SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY)); 1625 if (rtxp->rtx_timer < next) 1626 next = rtxp->rtx_timer; 1627 } else { 1628 CLEAR_SLIST(rtxp->rtx_allow); 1629 CLEAR_SLIST(rtxp->rtx_block); 1630 } 1631 } 1632 1633 if (ill->ill_mcast_type == MLD_V2_ROUTER) { 1634 mutex_exit(&ill->ill_lock); 1635 mldv2_sendrpt(ill, rp); 1636 mldv2_sendrpt(ill, rtxrp); 1637 return (next); 1638 } 1639 1640 mutex_exit(&ill->ill_lock); 1641 1642 return (next); 1643 } 1644 1645 /* 1646 * mld_timeout_handler: 1647 * Called when there are timeout events, every next * TMEOUT_INTERVAL (tick). 1648 * Returns number of ticks to next event (or 0 if none). 1649 * MT issues are same as igmp_timeout_handler 1650 */ 1651 /* ARGSUSED */ 1652 void 1653 mld_timeout_handler(void *arg) 1654 { 1655 ill_t *ill; 1656 int elapsed; /* Since last call */ 1657 uint_t global_next = INFINITY; 1658 uint_t next; 1659 ill_walk_context_t ctx; 1660 boolean_t success; 1661 1662 mutex_enter(&mld_timer_lock); 1663 ASSERT(mld_timeout_id != 0); 1664 mld_timer_fired_last = ddi_get_lbolt(); 1665 elapsed = mld_time_to_next; 1666 mld_time_to_next = 0; 1667 mutex_exit(&mld_timer_lock); 1668 1669 rw_enter(&ill_g_lock, RW_READER); 1670 ill = ILL_START_WALK_V6(&ctx); 1671 for (; ill != NULL; ill = ill_next(&ctx, ill)) { 1672 ASSERT(ill->ill_isv6); 1673 /* 1674 * We may not be able to refhold the ill if the ill/ipif 1675 * is changing. But we need to make sure that the ill will 1676 * not vanish. So we just bump up the ill_waiter count. 1677 */ 1678 if (!ill_waiter_inc(ill)) 1679 continue; 1680 rw_exit(&ill_g_lock); 1681 success = ipsq_enter(ill, B_TRUE); 1682 if (success) { 1683 next = mld_timeout_handler_per_ill(ill, elapsed); 1684 if (next < global_next) 1685 global_next = next; 1686 ipsq_exit(ill->ill_phyint->phyint_ipsq, B_TRUE, 1687 B_FALSE); 1688 } 1689 rw_enter(&ill_g_lock, RW_READER); 1690 ill_waiter_dcr(ill); 1691 } 1692 rw_exit(&ill_g_lock); 1693 1694 mutex_enter(&mld_timer_lock); 1695 ASSERT(mld_timeout_id != 0); 1696 mld_timeout_id = 0; 1697 mutex_exit(&mld_timer_lock); 1698 1699 if (global_next != INFINITY) 1700 mld_start_timers(global_next); 1701 } 1702 1703 /* 1704 * Calculate the Older Version Querier Present timeout value, in number 1705 * of slowtimo intervals, for the given ill. 1706 */ 1707 #define OVQP(ill) \ 1708 ((1000 * (((ill)->ill_mcast_rv * (ill)->ill_mcast_qi) \ 1709 + MCAST_QUERY_RESP_INTERVAL)) / MCAST_SLOWTIMO_INTERVAL) 1710 1711 /* 1712 * igmp_slowtimo: 1713 * - Resets to new router if we didnt we hear from the router 1714 * in IGMP_AGE_THRESHOLD seconds. 1715 * - Resets slowtimeout. 1716 */ 1717 /* ARGSUSED */ 1718 void 1719 igmp_slowtimo(void *arg) 1720 { 1721 ill_t *ill; 1722 ill_if_t *ifp; 1723 avl_tree_t *avl_tree; 1724 1725 /* Hold the ill_g_lock so that we can safely walk the ill list */ 1726 rw_enter(&ill_g_lock, RW_READER); 1727 1728 /* 1729 * The ill_if_t list is circular, hence the odd loop parameters. 1730 * 1731 * We can't use the ILL_START_WALK and ill_next() wrappers for this 1732 * walk, as we need to check the illif_mcast_* fields in the ill_if_t 1733 * structure (allowing us to skip if none of the instances have timers 1734 * running). 1735 */ 1736 for (ifp = IP_V4_ILL_G_LIST; ifp != (ill_if_t *)&IP_V4_ILL_G_LIST; 1737 ifp = ifp->illif_next) { 1738 /* 1739 * illif_mcast_v[12] are set using atomics. If an ill hears 1740 * a V1 or V2 query now and we miss seeing the count now, 1741 * we will see it the next time igmp_slowtimo is called. 1742 */ 1743 if (ifp->illif_mcast_v1 == 0 && ifp->illif_mcast_v2 == 0) 1744 continue; 1745 1746 avl_tree = &ifp->illif_avl_by_ppa; 1747 for (ill = avl_first(avl_tree); ill != NULL; 1748 ill = avl_walk(avl_tree, ill, AVL_AFTER)) { 1749 mutex_enter(&ill->ill_lock); 1750 if (ill->ill_mcast_v1_tset == 1) 1751 ill->ill_mcast_v1_time++; 1752 if (ill->ill_mcast_v2_tset == 1) 1753 ill->ill_mcast_v2_time++; 1754 if (ill->ill_mcast_type == IGMP_V1_ROUTER) { 1755 if (ill->ill_mcast_v1_time >= OVQP(ill)) { 1756 if (ill->ill_mcast_v2_tset > 0) { 1757 ip1dbg(("V1 query timer " 1758 "expired on %s; switching " 1759 "mode to IGMP_V2\n", 1760 ill->ill_name)); 1761 ill->ill_mcast_type = 1762 IGMP_V2_ROUTER; 1763 } else { 1764 ip1dbg(("V1 query timer " 1765 "expired on %s; switching " 1766 "mode to IGMP_V3\n", 1767 ill->ill_name)); 1768 ill->ill_mcast_type = 1769 IGMP_V3_ROUTER; 1770 } 1771 ill->ill_mcast_v1_time = 0; 1772 ill->ill_mcast_v1_tset = 0; 1773 atomic_add_16(&ifp->illif_mcast_v1, -1); 1774 } 1775 } 1776 if (ill->ill_mcast_type == IGMP_V2_ROUTER) { 1777 if (ill->ill_mcast_v2_time >= OVQP(ill)) { 1778 ip1dbg(("V2 query timer expired on " 1779 "%s; switching mode to IGMP_V3\n", 1780 ill->ill_name)); 1781 ill->ill_mcast_type = IGMP_V3_ROUTER; 1782 ill->ill_mcast_v2_time = 0; 1783 ill->ill_mcast_v2_tset = 0; 1784 atomic_add_16(&ifp->illif_mcast_v2, -1); 1785 } 1786 } 1787 mutex_exit(&ill->ill_lock); 1788 } 1789 1790 } 1791 rw_exit(&ill_g_lock); 1792 mutex_enter(&igmp_slowtimeout_lock); 1793 igmp_slowtimeout_id = timeout(igmp_slowtimo, NULL, 1794 MSEC_TO_TICK(MCAST_SLOWTIMO_INTERVAL)); 1795 mutex_exit(&igmp_slowtimeout_lock); 1796 } 1797 1798 /* 1799 * mld_slowtimo: 1800 * - Resets to newer version if we didn't hear from the older version router 1801 * in MLD_AGE_THRESHOLD seconds. 1802 * - Restarts slowtimeout. 1803 */ 1804 /* ARGSUSED */ 1805 void 1806 mld_slowtimo(void *arg) 1807 { 1808 ill_t *ill; 1809 ill_if_t *ifp; 1810 avl_tree_t *avl_tree; 1811 1812 /* See comments in igmp_slowtimo() above... */ 1813 rw_enter(&ill_g_lock, RW_READER); 1814 for (ifp = IP_V6_ILL_G_LIST; ifp != (ill_if_t *)&IP_V6_ILL_G_LIST; 1815 ifp = ifp->illif_next) { 1816 1817 if (ifp->illif_mcast_v1 == 0) 1818 continue; 1819 1820 avl_tree = &ifp->illif_avl_by_ppa; 1821 for (ill = avl_first(avl_tree); ill != NULL; 1822 ill = avl_walk(avl_tree, ill, AVL_AFTER)) { 1823 mutex_enter(&ill->ill_lock); 1824 if (ill->ill_mcast_v1_tset == 1) 1825 ill->ill_mcast_v1_time++; 1826 if (ill->ill_mcast_type == MLD_V1_ROUTER) { 1827 if (ill->ill_mcast_v1_time >= OVQP(ill)) { 1828 ip1dbg(("MLD query timer expired on" 1829 " %s; switching mode to MLD_V2\n", 1830 ill->ill_name)); 1831 ill->ill_mcast_type = MLD_V2_ROUTER; 1832 ill->ill_mcast_v1_time = 0; 1833 ill->ill_mcast_v1_tset = 0; 1834 atomic_add_16(&ifp->illif_mcast_v1, -1); 1835 } 1836 } 1837 mutex_exit(&ill->ill_lock); 1838 } 1839 } 1840 rw_exit(&ill_g_lock); 1841 mutex_enter(&mld_slowtimeout_lock); 1842 mld_slowtimeout_id = timeout(mld_slowtimo, NULL, 1843 MSEC_TO_TICK(MCAST_SLOWTIMO_INTERVAL)); 1844 mutex_exit(&mld_slowtimeout_lock); 1845 } 1846 1847 /* 1848 * igmp_sendpkt: 1849 * This will send to ip_wput like icmp_inbound. 1850 * Note that the lower ill (on which the membership is kept) is used 1851 * as an upper ill to pass in the multicast parameters. 1852 */ 1853 static void 1854 igmp_sendpkt(ilm_t *ilm, uchar_t type, ipaddr_t addr) 1855 { 1856 mblk_t *mp; 1857 igmpa_t *igmpa; 1858 uint8_t *rtralert; 1859 ipha_t *ipha; 1860 int hdrlen = sizeof (ipha_t) + RTRALERT_LEN; 1861 size_t size = hdrlen + sizeof (igmpa_t); 1862 ipif_t *ipif = ilm->ilm_ipif; 1863 ill_t *ill = ipif->ipif_ill; /* Will be the "lower" ill */ 1864 mblk_t *first_mp; 1865 ipsec_out_t *io; 1866 zoneid_t zoneid; 1867 1868 /* 1869 * We need to make sure this packet goes out on an ipif. If 1870 * there is some global policy match in ip_wput_ire, we need 1871 * to get to the right interface after IPSEC processing. 1872 * To make sure this multicast packet goes out on the right 1873 * interface, we attach an ipsec_out and initialize ill_index 1874 * like we did in ip_wput. To make sure that this packet does 1875 * not get forwarded on other interfaces or looped back, we 1876 * set ipsec_out_dontroute to B_TRUE and ipsec_out_multicast_loop 1877 * to B_FALSE. 1878 * 1879 * We also need to make sure that this does not get load balanced 1880 * if it hits ip_newroute_ipif. So, we initialize ipsec_out_attach_if 1881 * here. If it gets load balanced, switches supporting igmp snooping 1882 * will send the packet that it receives for this multicast group 1883 * to the interface that we are sending on. As we have joined the 1884 * multicast group on this ill, by sending the packet out on this 1885 * ill, we receive all the packets back on this ill. 1886 */ 1887 first_mp = allocb(sizeof (ipsec_info_t), BPRI_HI); 1888 if (first_mp == NULL) 1889 return; 1890 1891 first_mp->b_datap->db_type = M_CTL; 1892 first_mp->b_wptr += sizeof (ipsec_info_t); 1893 bzero(first_mp->b_rptr, sizeof (ipsec_info_t)); 1894 /* ipsec_out_secure is B_FALSE now */ 1895 io = (ipsec_out_t *)first_mp->b_rptr; 1896 io->ipsec_out_type = IPSEC_OUT; 1897 io->ipsec_out_len = sizeof (ipsec_out_t); 1898 io->ipsec_out_use_global_policy = B_TRUE; 1899 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 1900 io->ipsec_out_attach_if = B_TRUE; 1901 io->ipsec_out_multicast_loop = B_FALSE; 1902 io->ipsec_out_dontroute = B_TRUE; 1903 if ((zoneid = ilm->ilm_zoneid) == ALL_ZONES) 1904 zoneid = GLOBAL_ZONEID; 1905 io->ipsec_out_zoneid = zoneid; 1906 1907 mp = allocb(size, BPRI_HI); 1908 if (mp == NULL) { 1909 freemsg(first_mp); 1910 return; 1911 } 1912 mp->b_wptr = mp->b_rptr + size; 1913 first_mp->b_cont = mp; 1914 1915 ipha = (ipha_t *)mp->b_rptr; 1916 rtralert = (uint8_t *)&(ipha[1]); 1917 igmpa = (igmpa_t *)&(rtralert[RTRALERT_LEN]); 1918 igmpa->igmpa_type = type; 1919 igmpa->igmpa_code = 0; 1920 igmpa->igmpa_group = ilm->ilm_addr; 1921 igmpa->igmpa_cksum = 0; 1922 igmpa->igmpa_cksum = IP_CSUM(mp, hdrlen, 0); 1923 if (igmpa->igmpa_cksum == 0) 1924 igmpa->igmpa_cksum = 0xffff; 1925 1926 rtralert[0] = IPOPT_COPY | IPOPT_RTRALERT; 1927 rtralert[1] = RTRALERT_LEN; 1928 rtralert[2] = 0; 1929 rtralert[3] = 0; 1930 1931 ipha->ipha_version_and_hdr_length = (IP_VERSION << 4) 1932 | (IP_SIMPLE_HDR_LENGTH_IN_WORDS + RTRALERT_LEN_IN_WORDS); 1933 ipha->ipha_type_of_service = 0; 1934 ipha->ipha_length = htons(size); 1935 ipha->ipha_ident = 0; 1936 ipha->ipha_fragment_offset_and_flags = 0; 1937 ipha->ipha_ttl = IGMP_TTL; 1938 ipha->ipha_protocol = IPPROTO_IGMP; 1939 ipha->ipha_hdr_checksum = 0; 1940 ipha->ipha_dst = addr ? addr : igmpa->igmpa_group; 1941 ipha->ipha_src = ipif->ipif_src_addr; 1942 /* 1943 * Request loopback of the report if we are acting as a multicast 1944 * router, so that the process-level routing demon can hear it. 1945 */ 1946 /* 1947 * This will run multiple times for the same group if there are members 1948 * on the same group for multiple ipif's on the same ill. The 1949 * igmp_input code will suppress this due to the loopback thus we 1950 * always loopback membership report. 1951 */ 1952 ASSERT(ill->ill_rq != NULL); 1953 ip_multicast_loopback(ill->ill_rq, ill, first_mp, 0, ilm->ilm_zoneid); 1954 1955 ip_wput_multicast(ill->ill_wq, first_mp, ipif, zoneid); 1956 1957 ++igmpstat.igps_snd_reports; 1958 } 1959 1960 /* 1961 * Sends an IGMP_V3_MEMBERSHIP_REPORT message out the ill associated 1962 * with the passed-in ipif. The report will contain one group record 1963 * for each element of reclist. If this causes packet length to 1964 * exceed ipif->ipif_ill->ill_max_frag, multiple reports are sent. 1965 * reclist is assumed to be made up of buffers allocated by mcast_bldmrec(), 1966 * and those buffers are freed here. 1967 */ 1968 static void 1969 igmpv3_sendrpt(ipif_t *ipif, mrec_t *reclist) 1970 { 1971 ipsec_out_t *io; 1972 igmp3ra_t *igmp3ra; 1973 grphdra_t *grphdr; 1974 mblk_t *first_mp, *mp; 1975 ipha_t *ipha; 1976 uint8_t *rtralert; 1977 ipaddr_t *src_array; 1978 int i, j, numrec, more_src_cnt; 1979 size_t hdrsize, size, rsize; 1980 ill_t *ill = ipif->ipif_ill; 1981 mrec_t *rp, *cur_reclist; 1982 mrec_t *next_reclist = reclist; 1983 boolean_t morepkts; 1984 zoneid_t zoneid; 1985 1986 /* if there aren't any records, there's nothing to send */ 1987 if (reclist == NULL) 1988 return; 1989 1990 hdrsize = sizeof (ipha_t) + RTRALERT_LEN; 1991 nextpkt: 1992 size = hdrsize + sizeof (igmp3ra_t); 1993 morepkts = B_FALSE; 1994 more_src_cnt = 0; 1995 cur_reclist = next_reclist; 1996 numrec = 0; 1997 for (rp = cur_reclist; rp != NULL; rp = rp->mrec_next) { 1998 rsize = sizeof (grphdra_t) + 1999 (rp->mrec_srcs.sl_numsrc * sizeof (ipaddr_t)); 2000 if (size + rsize > ill->ill_max_frag) { 2001 if (rp == cur_reclist) { 2002 /* 2003 * If the first mrec we looked at is too big 2004 * to fit in a single packet (i.e the source 2005 * list is too big), we must either truncate 2006 * the list (if TO_EX or IS_EX), or send 2007 * multiple reports for the same group (all 2008 * other types). 2009 */ 2010 int srcspace, srcsperpkt; 2011 srcspace = ill->ill_max_frag - (size + 2012 sizeof (grphdra_t)); 2013 srcsperpkt = srcspace / sizeof (ipaddr_t); 2014 /* 2015 * Increment size and numrec, because we will 2016 * be sending a record for the mrec we're 2017 * looking at now. 2018 */ 2019 size += sizeof (grphdra_t) + 2020 (srcsperpkt * sizeof (ipaddr_t)); 2021 numrec++; 2022 if (rp->mrec_type == MODE_IS_EXCLUDE || 2023 rp->mrec_type == CHANGE_TO_EXCLUDE) { 2024 rp->mrec_srcs.sl_numsrc = srcsperpkt; 2025 if (rp->mrec_next == NULL) { 2026 /* no more packets to send */ 2027 break; 2028 } else { 2029 /* 2030 * more packets, but we're 2031 * done with this mrec. 2032 */ 2033 next_reclist = rp->mrec_next; 2034 } 2035 } else { 2036 more_src_cnt = rp->mrec_srcs.sl_numsrc 2037 - srcsperpkt; 2038 rp->mrec_srcs.sl_numsrc = srcsperpkt; 2039 /* 2040 * We'll fix up this mrec (remove the 2041 * srcs we've already sent) before 2042 * returning to nextpkt above. 2043 */ 2044 next_reclist = rp; 2045 } 2046 } else { 2047 next_reclist = rp; 2048 } 2049 morepkts = B_TRUE; 2050 break; 2051 } 2052 size += rsize; 2053 numrec++; 2054 } 2055 2056 /* 2057 * See comments in igmp_sendpkt() about initializing for ipsec and 2058 * load balancing requirements. 2059 */ 2060 first_mp = allocb(sizeof (ipsec_info_t), BPRI_HI); 2061 if (first_mp == NULL) 2062 goto free_reclist; 2063 2064 first_mp->b_datap->db_type = M_CTL; 2065 first_mp->b_wptr += sizeof (ipsec_info_t); 2066 bzero(first_mp->b_rptr, sizeof (ipsec_info_t)); 2067 /* ipsec_out_secure is B_FALSE now */ 2068 io = (ipsec_out_t *)first_mp->b_rptr; 2069 io->ipsec_out_type = IPSEC_OUT; 2070 io->ipsec_out_len = sizeof (ipsec_out_t); 2071 io->ipsec_out_use_global_policy = B_TRUE; 2072 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 2073 io->ipsec_out_attach_if = B_TRUE; 2074 io->ipsec_out_multicast_loop = B_FALSE; 2075 io->ipsec_out_dontroute = B_TRUE; 2076 if ((zoneid = ipif->ipif_zoneid) == ALL_ZONES) 2077 zoneid = GLOBAL_ZONEID; 2078 io->ipsec_out_zoneid = zoneid; 2079 2080 mp = allocb(size, BPRI_HI); 2081 if (mp == NULL) { 2082 freemsg(first_mp); 2083 goto free_reclist; 2084 } 2085 bzero((char *)mp->b_rptr, size); 2086 mp->b_wptr = (uchar_t *)(mp->b_rptr + size); 2087 first_mp->b_cont = mp; 2088 2089 ipha = (ipha_t *)mp->b_rptr; 2090 rtralert = (uint8_t *)&(ipha[1]); 2091 igmp3ra = (igmp3ra_t *)&(rtralert[RTRALERT_LEN]); 2092 grphdr = (grphdra_t *)&(igmp3ra[1]); 2093 2094 rp = cur_reclist; 2095 for (i = 0; i < numrec; i++) { 2096 grphdr->grphdra_type = rp->mrec_type; 2097 grphdr->grphdra_numsrc = htons(rp->mrec_srcs.sl_numsrc); 2098 grphdr->grphdra_group = V4_PART_OF_V6(rp->mrec_group); 2099 src_array = (ipaddr_t *)&(grphdr[1]); 2100 2101 for (j = 0; j < rp->mrec_srcs.sl_numsrc; j++) 2102 src_array[j] = V4_PART_OF_V6(rp->mrec_srcs.sl_addr[j]); 2103 2104 grphdr = (grphdra_t *)&(src_array[j]); 2105 rp = rp->mrec_next; 2106 } 2107 2108 igmp3ra->igmp3ra_type = IGMP_V3_MEMBERSHIP_REPORT; 2109 igmp3ra->igmp3ra_numrec = htons(numrec); 2110 igmp3ra->igmp3ra_cksum = IP_CSUM(mp, hdrsize, 0); 2111 2112 rtralert[0] = IPOPT_COPY | IPOPT_RTRALERT; 2113 rtralert[1] = RTRALERT_LEN; 2114 rtralert[2] = 0; 2115 rtralert[3] = 0; 2116 2117 ipha->ipha_version_and_hdr_length = IP_VERSION << 4 2118 | (IP_SIMPLE_HDR_LENGTH_IN_WORDS + RTRALERT_LEN_IN_WORDS); 2119 ipha->ipha_type_of_service = IPTOS_PREC_INTERNETCONTROL; 2120 ipha->ipha_length = htons(size); 2121 ipha->ipha_ttl = IGMP_TTL; 2122 ipha->ipha_protocol = IPPROTO_IGMP; 2123 ipha->ipha_dst = htonl(INADDR_ALLRPTS_GROUP); 2124 ipha->ipha_src = ipif->ipif_src_addr; 2125 2126 /* 2127 * Request loopback of the report if we are acting as a multicast 2128 * router, so that the process-level routing daemon can hear it. 2129 * 2130 * This will run multiple times for the same group if there are 2131 * members on the same group for multiple ipifs on the same ill. 2132 * The igmp_input code will suppress this due to the loopback; 2133 * thus we always loopback membership report. 2134 */ 2135 ASSERT(ill->ill_rq != NULL); 2136 ip_multicast_loopback(ill->ill_rq, ill, mp, 0, ipif->ipif_zoneid); 2137 2138 ip_wput_multicast(ill->ill_wq, first_mp, ipif, zoneid); 2139 2140 ++igmpstat.igps_snd_reports; 2141 2142 if (morepkts) { 2143 if (more_src_cnt > 0) { 2144 int index, mvsize; 2145 slist_t *sl = &next_reclist->mrec_srcs; 2146 index = sl->sl_numsrc; 2147 mvsize = more_src_cnt * sizeof (in6_addr_t); 2148 (void) memmove(&sl->sl_addr[0], &sl->sl_addr[index], 2149 mvsize); 2150 sl->sl_numsrc = more_src_cnt; 2151 } 2152 goto nextpkt; 2153 } 2154 2155 free_reclist: 2156 while (reclist != NULL) { 2157 rp = reclist->mrec_next; 2158 mi_free(reclist); 2159 reclist = rp; 2160 } 2161 } 2162 2163 /* 2164 * mld_input: 2165 */ 2166 /* ARGSUSED */ 2167 void 2168 mld_input(queue_t *q, mblk_t *mp, ill_t *ill) 2169 { 2170 ip6_t *ip6h = (ip6_t *)(mp->b_rptr); 2171 mld_hdr_t *mldh; 2172 ilm_t *ilm; 2173 ipif_t *ipif; 2174 uint16_t hdr_length, exthdr_length; 2175 in6_addr_t *v6group_ptr, *lcladdr_ptr; 2176 uint_t next; 2177 int mldlen; 2178 2179 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembTotal); 2180 2181 /* Make sure the src address of the packet is link-local */ 2182 if (!(IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src))) { 2183 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 2184 freemsg(mp); 2185 return; 2186 } 2187 2188 if (ip6h->ip6_hlim != 1) { 2189 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpBadHoplimit); 2190 freemsg(mp); 2191 return; 2192 } 2193 2194 /* Get to the icmp header part */ 2195 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) { 2196 hdr_length = ip_hdr_length_v6(mp, ip6h); 2197 exthdr_length = hdr_length - IPV6_HDR_LEN; 2198 } else { 2199 hdr_length = IPV6_HDR_LEN; 2200 exthdr_length = 0; 2201 } 2202 mldlen = ntohs(ip6h->ip6_plen) - exthdr_length; 2203 2204 /* An MLD packet must at least be 24 octets to be valid */ 2205 if (mldlen < MLD_MINLEN) { 2206 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 2207 freemsg(mp); 2208 return; 2209 } 2210 2211 mldh = (mld_hdr_t *)(&mp->b_rptr[hdr_length]); 2212 2213 switch (mldh->mld_type) { 2214 case MLD_LISTENER_QUERY: 2215 /* 2216 * packet length differentiates between v1 and v2. v1 2217 * query should be exactly 24 octets long; v2 is >= 28. 2218 */ 2219 if (mldlen == MLD_MINLEN) { 2220 next = mld_query_in(mldh, ill); 2221 } else if (mldlen >= MLD_V2_QUERY_MINLEN) { 2222 next = mldv2_query_in((mld2q_t *)mldh, ill, mldlen); 2223 } else { 2224 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 2225 freemsg(mp); 2226 return; 2227 } 2228 if (next == 0) { 2229 freemsg(mp); 2230 return; 2231 } 2232 2233 if (next != INFINITY) 2234 mld_start_timers(next); 2235 break; 2236 2237 case MLD_LISTENER_REPORT: { 2238 2239 ASSERT(ill->ill_ipif != NULL); 2240 /* 2241 * For fast leave to work, we have to know that we are the 2242 * last person to send a report for this group. Reports 2243 * generated by us are looped back since we could potentially 2244 * be a multicast router, so discard reports sourced by me. 2245 */ 2246 lcladdr_ptr = &(ill->ill_ipif->ipif_v6subnet); 2247 mutex_enter(&ill->ill_lock); 2248 for (ipif = ill->ill_ipif; ipif != NULL; 2249 ipif = ipif->ipif_next) { 2250 if (IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, 2251 lcladdr_ptr)) { 2252 if (ip_debug > 1) { 2253 char buf1[INET6_ADDRSTRLEN]; 2254 char buf2[INET6_ADDRSTRLEN]; 2255 2256 (void) mi_strlog(ill->ill_rq, 2257 1, 2258 SL_TRACE, 2259 "mld_input: we are only " 2260 "member src %s ipif_local %s", 2261 inet_ntop(AF_INET6, lcladdr_ptr, 2262 buf1, sizeof (buf1)), 2263 inet_ntop(AF_INET6, 2264 &ipif->ipif_v6lcl_addr, 2265 buf2, sizeof (buf2))); 2266 } 2267 mutex_exit(&ill->ill_lock); 2268 freemsg(mp); 2269 return; 2270 } 2271 } 2272 mutex_exit(&ill->ill_lock); 2273 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembResponses); 2274 2275 v6group_ptr = &mldh->mld_addr; 2276 if (!IN6_IS_ADDR_MULTICAST(v6group_ptr)) { 2277 BUMP_MIB(ill->ill_icmp6_mib, 2278 ipv6IfIcmpInGroupMembBadReports); 2279 freemsg(mp); 2280 return; 2281 } 2282 2283 2284 /* 2285 * If we belong to the group being reported, and we are a 2286 * 'Delaying member' per the RFC terminology, stop our timer 2287 * for that group and 'clear flag' i.e. mark ilm_state as 2288 * IGMP_OTHERMEMBER. With zones, there can be multiple group 2289 * membership entries for the same group address (one per zone) 2290 * so we need to walk the ill_ilm list. 2291 */ 2292 mutex_enter(&ill->ill_lock); 2293 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 2294 if (!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group_ptr)) 2295 continue; 2296 BUMP_MIB(ill->ill_icmp6_mib, 2297 ipv6IfIcmpInGroupMembOurReports); 2298 2299 ilm->ilm_timer = INFINITY; 2300 ilm->ilm_state = IGMP_OTHERMEMBER; 2301 } 2302 mutex_exit(&ill->ill_lock); 2303 break; 2304 } 2305 case MLD_LISTENER_REDUCTION: 2306 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembReductions); 2307 break; 2308 } 2309 /* 2310 * All MLD packets have already been passed up to any 2311 * process(es) listening on a ICMP6 raw socket. This 2312 * has been accomplished in ip_deliver_local_v6 prior to 2313 * this function call. It is assumed that the multicast daemon 2314 * will have a SOCK_RAW IPPROTO_ICMPV6 (and presumbly use the 2315 * ICMP6_FILTER socket option to only receive the MLD messages) 2316 * Thus we can free the MLD message block here 2317 */ 2318 freemsg(mp); 2319 } 2320 2321 /* 2322 * Handles an MLDv1 Listener Query. Returns 0 on error, or the appropriate 2323 * (non-zero, unsigned) timer value to be set on success. 2324 */ 2325 static uint_t 2326 mld_query_in(mld_hdr_t *mldh, ill_t *ill) 2327 { 2328 ilm_t *ilm; 2329 int timer; 2330 uint_t next; 2331 in6_addr_t *v6group; 2332 2333 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembQueries); 2334 2335 /* 2336 * In the MLD specification, there are 3 states and a flag. 2337 * 2338 * In Non-Listener state, we simply don't have a membership record. 2339 * In Delaying state, our timer is running (ilm->ilm_timer < INFINITY) 2340 * In Idle Member state, our timer is not running (ilm->ilm_timer == 2341 * INFINITY) 2342 * 2343 * The flag is ilm->ilm_state, it is set to IGMP_OTHERMEMBER if 2344 * we have heard a report from another member, or IGMP_IREPORTEDLAST 2345 * if I sent the last report. 2346 */ 2347 v6group = &mldh->mld_addr; 2348 if (!(IN6_IS_ADDR_UNSPECIFIED(v6group)) && 2349 ((!IN6_IS_ADDR_MULTICAST(v6group)))) { 2350 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembBadQueries); 2351 return (0); 2352 } 2353 2354 /* Need to do compatibility mode checking */ 2355 mutex_enter(&ill->ill_lock); 2356 ill->ill_mcast_v1_time = 0; 2357 ill->ill_mcast_v1_tset = 1; 2358 if (ill->ill_mcast_type == MLD_V2_ROUTER) { 2359 ip1dbg(("Received MLDv1 Query on %s, switching mode to " 2360 "MLD_V1_ROUTER\n", ill->ill_name)); 2361 atomic_add_16(&ill->ill_ifptr->illif_mcast_v1, 1); 2362 ill->ill_mcast_type = MLD_V1_ROUTER; 2363 } 2364 mutex_exit(&ill->ill_lock); 2365 2366 timer = (int)ntohs(mldh->mld_maxdelay); 2367 if (ip_debug > 1) { 2368 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 2369 "mld_input: TIMER = mld_maxdelay %d mld_type 0x%x", 2370 timer, (int)mldh->mld_type); 2371 } 2372 2373 /* 2374 * -Start the timers in all of our membership records for 2375 * the physical interface on which the query arrived, 2376 * excl: 2377 * 1. those that belong to the "all hosts" group, 2378 * 2. those with 0 scope, or 1 node-local scope. 2379 * 2380 * -Restart any timer that is already running but has a value 2381 * longer that the requested timeout. 2382 * -Use the value specified in the query message as the 2383 * maximum timeout. 2384 */ 2385 next = INFINITY; 2386 mutex_enter(&ill->ill_lock); 2387 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 2388 ASSERT(!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr)); 2389 2390 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr) || 2391 IN6_IS_ADDR_MC_NODELOCAL(&ilm->ilm_v6addr) || 2392 IN6_IS_ADDR_MC_RESERVED(&ilm->ilm_v6addr)) 2393 continue; 2394 if ((!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, 2395 &ipv6_all_hosts_mcast)) && 2396 (IN6_IS_ADDR_UNSPECIFIED(v6group)) || 2397 (IN6_ARE_ADDR_EQUAL(v6group, &ilm->ilm_v6addr))) { 2398 if (timer == 0) { 2399 /* Respond immediately */ 2400 ilm->ilm_timer = INFINITY; 2401 ilm->ilm_state = IGMP_IREPORTEDLAST; 2402 mutex_exit(&ill->ill_lock); 2403 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL); 2404 mutex_enter(&ill->ill_lock); 2405 break; 2406 } 2407 if (ilm->ilm_timer > timer) { 2408 MCAST_RANDOM_DELAY(ilm->ilm_timer, timer); 2409 if (ilm->ilm_timer < next) 2410 next = ilm->ilm_timer; 2411 } 2412 break; 2413 } 2414 } 2415 mutex_exit(&ill->ill_lock); 2416 2417 return (next); 2418 } 2419 2420 /* 2421 * Handles an MLDv2 Listener Query. On error, returns 0; on success, 2422 * returns the appropriate (non-zero, unsigned) timer value (which may 2423 * be INFINITY) to be set. 2424 */ 2425 static uint_t 2426 mldv2_query_in(mld2q_t *mld2q, ill_t *ill, int mldlen) 2427 { 2428 ilm_t *ilm; 2429 in6_addr_t *v6group, *src_array; 2430 uint_t next, numsrc, i, mrd, delay, qqi; 2431 uint8_t qrv; 2432 2433 v6group = &mld2q->mld2q_addr; 2434 numsrc = ntohs(mld2q->mld2q_numsrc); 2435 2436 /* make sure numsrc matches packet size */ 2437 if (mldlen < MLD_V2_QUERY_MINLEN + (numsrc * sizeof (in6_addr_t))) { 2438 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 2439 return (0); 2440 } 2441 src_array = (in6_addr_t *)&mld2q[1]; 2442 2443 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembQueries); 2444 2445 /* extract Maximum Response Delay from code in header */ 2446 mrd = ntohs(mld2q->mld2q_mxrc); 2447 if (mrd >= MLD_V2_MAXRT_FPMIN) { 2448 uint_t hdrval, mant, exp; 2449 hdrval = mrd; 2450 mant = hdrval & MLD_V2_MAXRT_MANT_MASK; 2451 exp = (hdrval & MLD_V2_MAXRT_EXP_MASK) >> 12; 2452 mrd = (mant | 0x1000) << (exp + 3); 2453 } 2454 MCAST_RANDOM_DELAY(delay, mrd); 2455 next = (unsigned)INFINITY; 2456 2457 if ((qrv = mld2q->mld2q_sqrv & MLD_V2_RV_MASK) == 0) 2458 ill->ill_mcast_rv = MCAST_DEF_ROBUSTNESS; 2459 else 2460 ill->ill_mcast_rv = qrv; 2461 2462 if ((qqi = (uint_t)mld2q->mld2q_qqic) >= MLD_V2_QQI_FPMIN) { 2463 uint_t mant, exp; 2464 mant = qqi & MLD_V2_QQI_MANT_MASK; 2465 exp = (qqi & MLD_V2_QQI_EXP_MASK) >> 12; 2466 qqi = (mant | 0x10) << (exp + 3); 2467 } 2468 ill->ill_mcast_qi = (qqi == 0) ? MCAST_DEF_QUERY_INTERVAL : qqi; 2469 2470 /* 2471 * If we have a pending general query response that's scheduled 2472 * sooner than the delay we calculated for this response, then 2473 * no action is required (MLDv2 draft section 6.2 rule 1) 2474 */ 2475 mutex_enter(&ill->ill_lock); 2476 if (ill->ill_global_timer < delay) { 2477 mutex_exit(&ill->ill_lock); 2478 return (next); 2479 } 2480 mutex_exit(&ill->ill_lock); 2481 2482 /* 2483 * Now take action depending on query type: general, 2484 * group specific, or group/source specific. 2485 */ 2486 if ((numsrc == 0) && IN6_IS_ADDR_UNSPECIFIED(v6group)) { 2487 /* 2488 * general query 2489 * We know global timer is either not running or is 2490 * greater than our calculated delay, so reset it to 2491 * our delay (random value in range [0, response time]) 2492 */ 2493 mutex_enter(&ill->ill_lock); 2494 ill->ill_global_timer = delay; 2495 next = ill->ill_global_timer; 2496 mutex_exit(&ill->ill_lock); 2497 2498 } else { 2499 /* group or group/source specific query */ 2500 mutex_enter(&ill->ill_lock); 2501 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 2502 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr) || 2503 IN6_IS_ADDR_MC_NODELOCAL(&ilm->ilm_v6addr) || 2504 IN6_IS_ADDR_MC_RESERVED(&ilm->ilm_v6addr) || 2505 !IN6_ARE_ADDR_EQUAL(v6group, &ilm->ilm_v6addr)) 2506 continue; 2507 2508 /* 2509 * If the query is group specific or we have a 2510 * pending group specific query, the response is 2511 * group specific (pending sources list should be 2512 * empty). Otherwise, need to update the pending 2513 * sources list for the group and source specific 2514 * response. 2515 */ 2516 if (numsrc == 0 || (ilm->ilm_timer < INFINITY && 2517 SLIST_IS_EMPTY(ilm->ilm_pendsrcs))) { 2518 group_query: 2519 FREE_SLIST(ilm->ilm_pendsrcs); 2520 ilm->ilm_pendsrcs = NULL; 2521 } else { 2522 boolean_t overflow; 2523 slist_t *pktl; 2524 if (numsrc > MAX_FILTER_SIZE || 2525 (ilm->ilm_pendsrcs == NULL && 2526 (ilm->ilm_pendsrcs = l_alloc()) == NULL)) { 2527 /* 2528 * We've been sent more sources than 2529 * we can deal with; or we can't deal 2530 * with a source list at all. Revert 2531 * to a group specific query. 2532 */ 2533 goto group_query; 2534 } 2535 if ((pktl = l_alloc()) == NULL) 2536 goto group_query; 2537 pktl->sl_numsrc = numsrc; 2538 for (i = 0; i < numsrc; i++) 2539 pktl->sl_addr[i] = src_array[i]; 2540 l_union_in_a(ilm->ilm_pendsrcs, pktl, 2541 &overflow); 2542 l_free(pktl); 2543 if (overflow) 2544 goto group_query; 2545 } 2546 /* set timer to soonest value */ 2547 ilm->ilm_timer = MIN(ilm->ilm_timer, delay); 2548 if (ilm->ilm_timer < next) 2549 next = ilm->ilm_timer; 2550 break; 2551 } 2552 mutex_exit(&ill->ill_lock); 2553 } 2554 2555 return (next); 2556 } 2557 2558 /* 2559 * Send MLDv1 response packet with hoplimit 1 2560 */ 2561 static void 2562 mld_sendpkt(ilm_t *ilm, uchar_t type, const in6_addr_t *v6addr) 2563 { 2564 mblk_t *mp; 2565 mld_hdr_t *mldh; 2566 ip6_t *ip6h; 2567 ip6_hbh_t *ip6hbh; 2568 struct ip6_opt_router *ip6router; 2569 size_t size = IPV6_HDR_LEN + sizeof (mld_hdr_t); 2570 ill_t *ill = ilm->ilm_ill; /* Will be the "lower" ill */ 2571 ipif_t *ipif; 2572 ip6i_t *ip6i; 2573 2574 /* 2575 * We need to place a router alert option in this packet. The length 2576 * of the options must be a multiple of 8. The hbh option header is 2 2577 * bytes followed by the 4 byte router alert option. That leaves 2578 * 2 bytes of pad for a total of 8 bytes. 2579 */ 2580 const int router_alert_length = 8; 2581 2582 ASSERT(ill->ill_isv6); 2583 2584 /* 2585 * We need to make sure that this packet does not get load balanced. 2586 * So, we allocate an ip6i_t and set ATTACH_IF. ip_wput_v6 and 2587 * ip_newroute_ipif_v6 knows how to handle such packets. 2588 * If it gets load balanced, switches supporting MLD snooping 2589 * (in the future) will send the packet that it receives for this 2590 * multicast group to the interface that we are sending on. As we have 2591 * joined the multicast group on this ill, by sending the packet out 2592 * on this ill, we receive all the packets back on this ill. 2593 */ 2594 size += sizeof (ip6i_t) + router_alert_length; 2595 mp = allocb(size, BPRI_HI); 2596 if (mp == NULL) 2597 return; 2598 bzero(mp->b_rptr, size); 2599 mp->b_wptr = mp->b_rptr + size; 2600 2601 ip6i = (ip6i_t *)mp->b_rptr; 2602 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2603 ip6i->ip6i_nxt = IPPROTO_RAW; 2604 ip6i->ip6i_flags = IP6I_ATTACH_IF | IP6I_HOPLIMIT; 2605 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 2606 2607 ip6h = (ip6_t *)&ip6i[1]; 2608 ip6hbh = (struct ip6_hbh *)&ip6h[1]; 2609 ip6router = (struct ip6_opt_router *)&ip6hbh[1]; 2610 /* 2611 * A zero is a pad option of length 1. The bzero of the whole packet 2612 * above will pad between ip6router and mld. 2613 */ 2614 mldh = (mld_hdr_t *)((uint8_t *)ip6hbh + router_alert_length); 2615 2616 mldh->mld_type = type; 2617 mldh->mld_addr = ilm->ilm_v6addr; 2618 2619 ip6router->ip6or_type = IP6OPT_ROUTER_ALERT; 2620 ip6router->ip6or_len = 2; 2621 ip6router->ip6or_value[0] = 0; 2622 ip6router->ip6or_value[1] = IP6_ALERT_MLD; 2623 2624 ip6hbh->ip6h_nxt = IPPROTO_ICMPV6; 2625 ip6hbh->ip6h_len = 0; 2626 2627 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2628 ip6h->ip6_plen = htons(sizeof (*mldh) + router_alert_length); 2629 ip6h->ip6_nxt = IPPROTO_HOPOPTS; 2630 ip6h->ip6_hops = MLD_HOP_LIMIT; 2631 if (v6addr == NULL) 2632 ip6h->ip6_dst = ilm->ilm_v6addr; 2633 else 2634 ip6h->ip6_dst = *v6addr; 2635 2636 /* ipif returned by ipif_lookup_zoneid is link-local (if present) */ 2637 if (ipif_lookup_zoneid(ill, ilm->ilm_zoneid, IPIF_UP, &ipif)) { 2638 ip6h->ip6_src = ipif->ipif_v6src_addr; 2639 ipif_refrele(ipif); 2640 } else { 2641 /* Otherwise, use IPv6 default address selection. */ 2642 ip6h->ip6_src = ipv6_all_zeros; 2643 } 2644 2645 /* 2646 * Prepare for checksum by putting icmp length in the icmp 2647 * checksum field. The checksum is calculated in ip_wput_v6. 2648 */ 2649 mldh->mld_cksum = htons(sizeof (*mldh)); 2650 2651 /* 2652 * ip_wput will automatically loopback the multicast packet to 2653 * the conn if multicast loopback is enabled. 2654 * The MIB stats corresponding to this outgoing MLD packet 2655 * will be accounted for in ip_wput->ip_wput_v6->ip_wput_ire_v6 2656 * ->icmp_update_out_mib_v6 function call. 2657 */ 2658 (void) ip_output_v6(NULL, mp, ill->ill_wq, IP_WPUT); 2659 } 2660 2661 /* 2662 * Sends an MLD_V2_LISTENER_REPORT message out the passed-in ill. The 2663 * report will contain one multicast address record for each element of 2664 * reclist. If this causes packet length to exceed ill->ill_max_frag, 2665 * multiple reports are sent. reclist is assumed to be made up of 2666 * buffers allocated by mcast_bldmrec(), and those buffers are freed here. 2667 */ 2668 static void 2669 mldv2_sendrpt(ill_t *ill, mrec_t *reclist) 2670 { 2671 mblk_t *mp; 2672 mld2r_t *mld2r; 2673 mld2mar_t *mld2mar; 2674 in6_addr_t *srcarray; 2675 ip6_t *ip6h; 2676 ip6_hbh_t *ip6hbh; 2677 ip6i_t *ip6i; 2678 struct ip6_opt_router *ip6router; 2679 size_t size, optlen, padlen, icmpsize, rsize; 2680 ipif_t *ipif; 2681 int i, numrec, more_src_cnt; 2682 mrec_t *rp, *cur_reclist; 2683 mrec_t *next_reclist = reclist; 2684 boolean_t morepkts; 2685 2686 /* If there aren't any records, there's nothing to send */ 2687 if (reclist == NULL) 2688 return; 2689 2690 ASSERT(ill->ill_isv6); 2691 2692 /* 2693 * Total option length (optlen + padlen) must be a multiple of 2694 * 8 bytes. We assume here that optlen <= 8, so the total option 2695 * length will be 8. Assert this in case anything ever changes. 2696 */ 2697 optlen = sizeof (ip6_hbh_t) + sizeof (struct ip6_opt_router); 2698 ASSERT(optlen <= 8); 2699 padlen = 8 - optlen; 2700 nextpkt: 2701 icmpsize = sizeof (mld2r_t); 2702 size = IPV6_HDR_LEN + optlen + padlen + icmpsize; 2703 morepkts = B_FALSE; 2704 more_src_cnt = 0; 2705 for (rp = cur_reclist = next_reclist, numrec = 0; rp != NULL; 2706 rp = rp->mrec_next, numrec++) { 2707 rsize = sizeof (mld2mar_t) + 2708 (rp->mrec_srcs.sl_numsrc * sizeof (in6_addr_t)); 2709 if (size + rsize > ill->ill_max_frag) { 2710 if (rp == cur_reclist) { 2711 /* 2712 * If the first mrec we looked at is too big 2713 * to fit in a single packet (i.e the source 2714 * list is too big), we must either truncate 2715 * the list (if TO_EX or IS_EX), or send 2716 * multiple reports for the same group (all 2717 * other types). 2718 */ 2719 int srcspace, srcsperpkt; 2720 srcspace = ill->ill_max_frag - 2721 (size + sizeof (mld2mar_t)); 2722 srcsperpkt = srcspace / sizeof (in6_addr_t); 2723 /* 2724 * Increment icmpsize and size, because we will 2725 * be sending a record for the mrec we're 2726 * looking at now. 2727 */ 2728 rsize = sizeof (mld2mar_t) + 2729 (srcsperpkt * sizeof (in6_addr_t)); 2730 icmpsize += rsize; 2731 size += rsize; 2732 if (rp->mrec_type == MODE_IS_EXCLUDE || 2733 rp->mrec_type == CHANGE_TO_EXCLUDE) { 2734 rp->mrec_srcs.sl_numsrc = srcsperpkt; 2735 if (rp->mrec_next == NULL) { 2736 /* no more packets to send */ 2737 break; 2738 } else { 2739 /* 2740 * more packets, but we're 2741 * done with this mrec. 2742 */ 2743 next_reclist = rp->mrec_next; 2744 } 2745 } else { 2746 more_src_cnt = rp->mrec_srcs.sl_numsrc 2747 - srcsperpkt; 2748 rp->mrec_srcs.sl_numsrc = srcsperpkt; 2749 /* 2750 * We'll fix up this mrec (remove the 2751 * srcs we've already sent) before 2752 * returning to nextpkt above. 2753 */ 2754 next_reclist = rp; 2755 } 2756 } else { 2757 next_reclist = rp; 2758 } 2759 morepkts = B_TRUE; 2760 break; 2761 } 2762 icmpsize += rsize; 2763 size += rsize; 2764 } 2765 2766 /* 2767 * We need to make sure that this packet does not get load balanced. 2768 * So, we allocate an ip6i_t and set ATTACH_IF. ip_wput_v6 and 2769 * ip_newroute_ipif_v6 know how to handle such packets. 2770 * If it gets load balanced, switches supporting MLD snooping 2771 * (in the future) will send the packet that it receives for this 2772 * multicast group to the interface that we are sending on. As we have 2773 * joined the multicast group on this ill, by sending the packet out 2774 * on this ill, we receive all the packets back on this ill. 2775 */ 2776 size += sizeof (ip6i_t); 2777 mp = allocb(size, BPRI_HI); 2778 if (mp == NULL) 2779 goto free_reclist; 2780 bzero(mp->b_rptr, size); 2781 mp->b_wptr = mp->b_rptr + size; 2782 2783 ip6i = (ip6i_t *)mp->b_rptr; 2784 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2785 ip6i->ip6i_nxt = IPPROTO_RAW; 2786 ip6i->ip6i_flags = IP6I_ATTACH_IF; 2787 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 2788 2789 ip6h = (ip6_t *)&(ip6i[1]); 2790 ip6hbh = (ip6_hbh_t *)&(ip6h[1]); 2791 ip6router = (struct ip6_opt_router *)&(ip6hbh[1]); 2792 mld2r = (mld2r_t *)((uint8_t *)ip6hbh + optlen + padlen); 2793 mld2mar = (mld2mar_t *)&(mld2r[1]); 2794 2795 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2796 ip6h->ip6_plen = htons(optlen + padlen + icmpsize); 2797 ip6h->ip6_nxt = IPPROTO_HOPOPTS; 2798 ip6h->ip6_hops = MLD_HOP_LIMIT; 2799 ip6h->ip6_dst = ipv6_all_v2rtrs_mcast; 2800 /* ipif returned by ipif_lookup_zoneid is link-local (if present) */ 2801 if (ipif_lookup_zoneid(ill, ALL_ZONES, IPIF_UP, &ipif)) { 2802 ip6h->ip6_src = ipif->ipif_v6src_addr; 2803 ipif_refrele(ipif); 2804 } else { 2805 /* otherwise, use IPv6 default address selection. */ 2806 ip6h->ip6_src = ipv6_all_zeros; 2807 } 2808 2809 ip6hbh->ip6h_nxt = IPPROTO_ICMPV6; 2810 /* 2811 * ip6h_len is the number of 8-byte words, not including the first 2812 * 8 bytes; we've assumed optlen + padlen == 8 bytes; hence len = 0. 2813 */ 2814 ip6hbh->ip6h_len = 0; 2815 2816 ip6router->ip6or_type = IP6OPT_ROUTER_ALERT; 2817 ip6router->ip6or_len = 2; 2818 ip6router->ip6or_value[0] = 0; 2819 ip6router->ip6or_value[1] = IP6_ALERT_MLD; 2820 2821 mld2r->mld2r_type = MLD_V2_LISTENER_REPORT; 2822 mld2r->mld2r_nummar = htons(numrec); 2823 /* 2824 * Prepare for the checksum by putting icmp length in the icmp 2825 * checksum field. The checksum is calculated in ip_wput_v6. 2826 */ 2827 mld2r->mld2r_cksum = htons(icmpsize); 2828 2829 for (rp = cur_reclist; rp != NULL; rp = rp->mrec_next) { 2830 mld2mar->mld2mar_type = rp->mrec_type; 2831 mld2mar->mld2mar_auxlen = 0; 2832 mld2mar->mld2mar_numsrc = htons(rp->mrec_srcs.sl_numsrc); 2833 mld2mar->mld2mar_group = rp->mrec_group; 2834 srcarray = (in6_addr_t *)&(mld2mar[1]); 2835 2836 for (i = 0; i < rp->mrec_srcs.sl_numsrc; i++) 2837 srcarray[i] = rp->mrec_srcs.sl_addr[i]; 2838 2839 mld2mar = (mld2mar_t *)&(srcarray[i]); 2840 } 2841 2842 /* 2843 * ip_wput will automatically loopback the multicast packet to 2844 * the conn if multicast loopback is enabled. 2845 * The MIB stats corresponding to this outgoing MLD packet 2846 * will be accounted for in ip_wput->ip_wput_v6->ip_wput_ire_v6 2847 * ->icmp_update_out_mib_v6 function call. 2848 */ 2849 (void) ip_output_v6(NULL, mp, ill->ill_wq, IP_WPUT); 2850 2851 if (morepkts) { 2852 if (more_src_cnt > 0) { 2853 int index, mvsize; 2854 slist_t *sl = &next_reclist->mrec_srcs; 2855 index = sl->sl_numsrc; 2856 mvsize = more_src_cnt * sizeof (in6_addr_t); 2857 (void) memmove(&sl->sl_addr[0], &sl->sl_addr[index], 2858 mvsize); 2859 sl->sl_numsrc = more_src_cnt; 2860 } 2861 goto nextpkt; 2862 } 2863 2864 free_reclist: 2865 while (reclist != NULL) { 2866 rp = reclist->mrec_next; 2867 mi_free(reclist); 2868 reclist = rp; 2869 } 2870 } 2871 2872 static mrec_t * 2873 mcast_bldmrec(mcast_record_t type, in6_addr_t *grp, slist_t *srclist, 2874 mrec_t *next) 2875 { 2876 mrec_t *rp; 2877 int i; 2878 2879 if ((type == ALLOW_NEW_SOURCES || type == BLOCK_OLD_SOURCES) && 2880 SLIST_IS_EMPTY(srclist)) 2881 return (next); 2882 2883 rp = (mrec_t *)mi_alloc(sizeof (mrec_t), BPRI_HI); 2884 if (rp == NULL) 2885 return (next); 2886 2887 rp->mrec_next = next; 2888 rp->mrec_type = type; 2889 rp->mrec_auxlen = 0; 2890 rp->mrec_group = *grp; 2891 if (srclist == NULL) { 2892 rp->mrec_srcs.sl_numsrc = 0; 2893 } else { 2894 rp->mrec_srcs.sl_numsrc = srclist->sl_numsrc; 2895 for (i = 0; i < srclist->sl_numsrc; i++) 2896 rp->mrec_srcs.sl_addr[i] = srclist->sl_addr[i]; 2897 } 2898 2899 return (rp); 2900 } 2901 2902 /* 2903 * Set up initial retransmit state. If memory cannot be allocated for 2904 * the source lists, simply create as much state as is possible; memory 2905 * allocation failures are considered one type of transient error that 2906 * the retransmissions are designed to overcome (and if they aren't 2907 * transient, there are bigger problems than failing to notify the 2908 * router about multicast group membership state changes). 2909 */ 2910 static void 2911 mcast_init_rtx(ill_t *ill, rtx_state_t *rtxp, mcast_record_t rtype, 2912 slist_t *flist) 2913 { 2914 /* 2915 * There are only three possibilities for rtype: 2916 * New join, transition from INCLUDE {} to INCLUDE {flist} 2917 * => rtype is ALLOW_NEW_SOURCES 2918 * New join, transition from INCLUDE {} to EXCLUDE {flist} 2919 * => rtype is CHANGE_TO_EXCLUDE 2920 * State change that involves a filter mode change 2921 * => rtype is either CHANGE_TO_INCLUDE or CHANGE_TO_EXCLUDE 2922 */ 2923 ASSERT(rtype == CHANGE_TO_EXCLUDE || rtype == CHANGE_TO_INCLUDE || 2924 rtype == ALLOW_NEW_SOURCES); 2925 2926 rtxp->rtx_cnt = ill->ill_mcast_rv; 2927 2928 switch (rtype) { 2929 case CHANGE_TO_EXCLUDE: 2930 rtxp->rtx_fmode_cnt = ill->ill_mcast_rv; 2931 CLEAR_SLIST(rtxp->rtx_allow); 2932 COPY_SLIST(flist, rtxp->rtx_block); 2933 break; 2934 case ALLOW_NEW_SOURCES: 2935 case CHANGE_TO_INCLUDE: 2936 rtxp->rtx_fmode_cnt = 2937 rtype == ALLOW_NEW_SOURCES ? 0 : ill->ill_mcast_rv; 2938 CLEAR_SLIST(rtxp->rtx_block); 2939 COPY_SLIST(flist, rtxp->rtx_allow); 2940 break; 2941 } 2942 } 2943 2944 /* 2945 * The basic strategy here, as extrapolated from RFC 3810 section 6.1 and 2946 * RFC 3376 section 5.1, covers three cases: 2947 * * The current state change is a filter mode change 2948 * Set filter mode retransmit counter; set retransmit allow or 2949 * block list to new source list as appropriate, and clear the 2950 * retransmit list that was not set; send TO_IN or TO_EX with 2951 * new source list. 2952 * * The current state change is a source list change, but the filter 2953 * mode retransmit counter is > 0 2954 * Decrement filter mode retransmit counter; set retransmit 2955 * allow or block list to new source list as appropriate, 2956 * and clear the retransmit list that was not set; send TO_IN 2957 * or TO_EX with new source list. 2958 * * The current state change is a source list change, and the filter 2959 * mode retransmit counter is 0. 2960 * Merge existing rtx allow and block lists with new state: 2961 * rtx_allow = (new allow + rtx_allow) - new block 2962 * rtx_block = (new block + rtx_block) - new allow 2963 * Send ALLOW and BLOCK records for new retransmit lists; 2964 * decrement retransmit counter. 2965 * 2966 * As is the case for mcast_init_rtx(), memory allocation failures are 2967 * acceptable; we just create as much state as we can. 2968 */ 2969 static mrec_t * 2970 mcast_merge_rtx(ilm_t *ilm, mrec_t *mreclist, slist_t *flist) 2971 { 2972 ill_t *ill; 2973 rtx_state_t *rtxp = &ilm->ilm_rtx; 2974 mcast_record_t txtype; 2975 mrec_t *rp, *rpnext, *rtnmrec; 2976 boolean_t ovf; 2977 2978 ill = (ilm->ilm_ill == NULL ? ilm->ilm_ipif->ipif_ill : ilm->ilm_ill); 2979 2980 if (mreclist == NULL) 2981 return (mreclist); 2982 2983 /* 2984 * A filter mode change is indicated by a single mrec, which is 2985 * either TO_IN or TO_EX. In this case, we just need to set new 2986 * retransmit state as if this were an initial join. There is 2987 * no change to the mrec list. 2988 */ 2989 if (mreclist->mrec_type == CHANGE_TO_INCLUDE || 2990 mreclist->mrec_type == CHANGE_TO_EXCLUDE) { 2991 mcast_init_rtx(ill, rtxp, mreclist->mrec_type, 2992 &mreclist->mrec_srcs); 2993 return (mreclist); 2994 } 2995 2996 /* 2997 * Only the source list has changed 2998 */ 2999 rtxp->rtx_cnt = ill->ill_mcast_rv; 3000 if (rtxp->rtx_fmode_cnt > 0) { 3001 /* but we're still sending filter mode change reports */ 3002 rtxp->rtx_fmode_cnt--; 3003 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 3004 CLEAR_SLIST(rtxp->rtx_block); 3005 COPY_SLIST(flist, rtxp->rtx_allow); 3006 txtype = CHANGE_TO_INCLUDE; 3007 } else { 3008 CLEAR_SLIST(rtxp->rtx_allow); 3009 COPY_SLIST(flist, rtxp->rtx_block); 3010 txtype = CHANGE_TO_EXCLUDE; 3011 } 3012 /* overwrite first mrec with new info */ 3013 mreclist->mrec_type = txtype; 3014 l_copy(flist, &mreclist->mrec_srcs); 3015 /* then free any remaining mrecs */ 3016 for (rp = mreclist->mrec_next; rp != NULL; rp = rpnext) { 3017 rpnext = rp->mrec_next; 3018 mi_free(rp); 3019 } 3020 mreclist->mrec_next = NULL; 3021 rtnmrec = mreclist; 3022 } else { 3023 mrec_t *allow_mrec, *block_mrec; 3024 /* 3025 * Just send the source change reports; but we need to 3026 * recalculate the ALLOW and BLOCK lists based on previous 3027 * state and new changes. 3028 */ 3029 rtnmrec = mreclist; 3030 allow_mrec = block_mrec = NULL; 3031 for (rp = mreclist; rp != NULL; rp = rp->mrec_next) { 3032 ASSERT(rp->mrec_type == ALLOW_NEW_SOURCES || 3033 rp->mrec_type == BLOCK_OLD_SOURCES); 3034 if (rp->mrec_type == ALLOW_NEW_SOURCES) 3035 allow_mrec = rp; 3036 else 3037 block_mrec = rp; 3038 } 3039 /* 3040 * Perform calculations: 3041 * new_allow = mrec_allow + (rtx_allow - mrec_block) 3042 * new_block = mrec_block + (rtx_block - mrec_allow) 3043 * 3044 * Each calc requires two steps, for example: 3045 * rtx_allow = rtx_allow - mrec_block; 3046 * new_allow = mrec_allow + rtx_allow; 3047 * 3048 * Store results in mrec lists, and then copy into rtx lists. 3049 * We do it in this order in case the rtx list hasn't been 3050 * alloc'd yet; if it hasn't and our alloc fails, that's okay, 3051 * Overflows are also okay. 3052 */ 3053 if (block_mrec != NULL) { 3054 l_difference_in_a(rtxp->rtx_allow, 3055 &block_mrec->mrec_srcs); 3056 } 3057 if (allow_mrec != NULL) { 3058 l_difference_in_a(rtxp->rtx_block, 3059 &allow_mrec->mrec_srcs); 3060 l_union_in_a(&allow_mrec->mrec_srcs, rtxp->rtx_allow, 3061 &ovf); 3062 } 3063 if (block_mrec != NULL) { 3064 l_union_in_a(&block_mrec->mrec_srcs, rtxp->rtx_block, 3065 &ovf); 3066 COPY_SLIST(&block_mrec->mrec_srcs, rtxp->rtx_block); 3067 } else { 3068 rtnmrec = mcast_bldmrec(BLOCK_OLD_SOURCES, 3069 &ilm->ilm_v6addr, rtxp->rtx_block, allow_mrec); 3070 } 3071 if (allow_mrec != NULL) { 3072 COPY_SLIST(&allow_mrec->mrec_srcs, rtxp->rtx_allow); 3073 } else { 3074 rtnmrec = mcast_bldmrec(ALLOW_NEW_SOURCES, 3075 &ilm->ilm_v6addr, rtxp->rtx_allow, block_mrec); 3076 } 3077 } 3078 3079 return (rtnmrec); 3080 } 3081