1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * Internet Group Management Protocol (IGMP) routines. 31 * Multicast Listener Discovery Protocol (MLD) routines. 32 * 33 * Written by Steve Deering, Stanford, May 1988. 34 * Modified by Rosen Sharma, Stanford, Aug 1994. 35 * Modified by Bill Fenner, Xerox PARC, Feb. 1995. 36 * 37 * MULTICAST 3.5.1.1 38 */ 39 40 #include <sys/types.h> 41 #include <sys/stream.h> 42 #include <sys/stropts.h> 43 #include <sys/strlog.h> 44 #include <sys/strsun.h> 45 #include <sys/systm.h> 46 #include <sys/ddi.h> 47 #include <sys/sunddi.h> 48 #include <sys/cmn_err.h> 49 #include <sys/atomic.h> 50 #include <sys/zone.h> 51 52 #include <sys/param.h> 53 #include <sys/socket.h> 54 #include <inet/ipclassifier.h> 55 #include <net/if.h> 56 #include <net/route.h> 57 #include <netinet/in.h> 58 #include <netinet/igmp_var.h> 59 #include <netinet/ip6.h> 60 #include <netinet/icmp6.h> 61 62 #include <inet/common.h> 63 #include <inet/mi.h> 64 #include <inet/nd.h> 65 #include <inet/ip.h> 66 #include <inet/ip6.h> 67 #include <inet/ip_multi.h> 68 #include <inet/ip_listutils.h> 69 70 #include <netinet/igmp.h> 71 #include <inet/ip_if.h> 72 #include <net/pfkeyv2.h> 73 #include <inet/ipsec_info.h> 74 75 static uint_t igmp_query_in(ipha_t *ipha, igmpa_t *igmpa, ill_t *ill); 76 static uint_t igmpv3_query_in(igmp3qa_t *igmp3qa, ill_t *ill, int igmplen); 77 static uint_t mld_query_in(mld_hdr_t *mldh, ill_t *ill); 78 static uint_t mldv2_query_in(mld2q_t *mld2q, ill_t *ill, int mldlen); 79 static void igmp_sendpkt(ilm_t *ilm, uchar_t type, ipaddr_t addr); 80 static void mld_sendpkt(ilm_t *ilm, uchar_t type, const in6_addr_t *v6addr); 81 static void igmpv3_sendrpt(ipif_t *ipif, mrec_t *reclist); 82 static void mldv2_sendrpt(ill_t *ill, mrec_t *reclist); 83 static mrec_t *mcast_bldmrec(mcast_record_t type, in6_addr_t *grp, 84 slist_t *srclist, mrec_t *next); 85 static void mcast_init_rtx(ill_t *ill, rtx_state_t *rtxp, 86 mcast_record_t rtype, slist_t *flist); 87 static mrec_t *mcast_merge_rtx(ilm_t *ilm, mrec_t *rp, slist_t *flist); 88 89 /* Following protected by igmp_timer_lock */ 90 static int igmp_time_to_next; /* Time since last timeout */ 91 static int igmp_timer_fired_last; 92 uint_t igmp_deferred_next = INFINITY; 93 timeout_id_t igmp_timeout_id = 0; 94 kmutex_t igmp_timer_lock; 95 96 /* Protected by igmp_slowtimeout_lock */ 97 timeout_id_t igmp_slowtimeout_id = 0; 98 kmutex_t igmp_slowtimeout_lock; 99 100 /* Following protected by mld_timer_lock */ 101 static int mld_time_to_next; /* Time since last timeout */ 102 static int mld_timer_fired_last; 103 uint_t mld_deferred_next = INFINITY; 104 timeout_id_t mld_timeout_id = 0; 105 kmutex_t mld_timer_lock; 106 107 /* Protected by mld_slowtimeout_lock */ 108 timeout_id_t mld_slowtimeout_id = 0; 109 kmutex_t mld_slowtimeout_lock; 110 111 /* 112 * Macros used to do timer len conversions. Timer values are always 113 * stored and passed to the timer functions as milliseconds; but the 114 * default values and values from the wire may not be. 115 * 116 * And yes, it's obscure, but decisecond is easier to abbreviate than 117 * "tenths of a second". 118 */ 119 #define DSEC_TO_MSEC(dsec) ((dsec) * 100) 120 #define SEC_TO_MSEC(sec) ((sec) * 1000) 121 122 /* 123 * The first multicast join will trigger the igmp timers / mld timers 124 * The unit for next is milliseconds. 125 */ 126 void 127 igmp_start_timers(unsigned next) 128 { 129 int time_left; 130 /* Protected by igmp_timer_lock */ 131 static boolean_t igmp_timer_setter_active; 132 int ret; 133 134 ASSERT(next != 0 && next != INFINITY); 135 136 mutex_enter(&igmp_timer_lock); 137 138 if (igmp_timer_setter_active) { 139 /* 140 * Serialize timer setters, one at a time. If the 141 * timer is currently being set by someone, 142 * just record the next time when it has to be 143 * invoked and return. The current setter will 144 * take care. 145 */ 146 igmp_time_to_next = MIN(igmp_time_to_next, next); 147 mutex_exit(&igmp_timer_lock); 148 return; 149 } else { 150 igmp_timer_setter_active = B_TRUE; 151 } 152 if (igmp_timeout_id == 0) { 153 /* 154 * The timer is inactive. We need to start a timer 155 */ 156 igmp_time_to_next = next; 157 igmp_timeout_id = timeout(igmp_timeout_handler, NULL, 158 MSEC_TO_TICK(igmp_time_to_next)); 159 igmp_timer_setter_active = B_FALSE; 160 mutex_exit(&igmp_timer_lock); 161 return; 162 } 163 164 /* 165 * The timer was scheduled sometime back for firing in 166 * 'igmp_time_to_next' ms and is active. We need to 167 * reschedule the timeout if the new 'next' will happen 168 * earlier than the currently scheduled timeout 169 */ 170 time_left = igmp_timer_fired_last + 171 MSEC_TO_TICK(igmp_time_to_next) - ddi_get_lbolt(); 172 if (time_left < MSEC_TO_TICK(next)) { 173 igmp_timer_setter_active = B_FALSE; 174 mutex_exit(&igmp_timer_lock); 175 return; 176 } 177 178 mutex_exit(&igmp_timer_lock); 179 ret = untimeout(igmp_timeout_id); 180 mutex_enter(&igmp_timer_lock); 181 /* 182 * The timeout was cancelled, or the timeout handler 183 * completed, while we were blocked in the untimeout. 184 * No other thread could have set the timer meanwhile 185 * since we serialized all the timer setters. Thus 186 * no timer is currently active nor executing nor will 187 * any timer fire in the future. We start the timer now 188 * if needed. 189 */ 190 if (ret == -1) { 191 ASSERT(igmp_timeout_id == 0); 192 } else { 193 ASSERT(igmp_timeout_id != 0); 194 igmp_timeout_id = 0; 195 } 196 if (igmp_time_to_next != 0) { 197 igmp_time_to_next = MIN(igmp_time_to_next, next); 198 igmp_timeout_id = timeout(igmp_timeout_handler, NULL, 199 MSEC_TO_TICK(igmp_time_to_next)); 200 } 201 igmp_timer_setter_active = B_FALSE; 202 mutex_exit(&igmp_timer_lock); 203 } 204 205 /* 206 * mld_start_timers: 207 * The unit for next is milliseconds. 208 */ 209 void 210 mld_start_timers(unsigned next) 211 { 212 int time_left; 213 /* Protedted by mld_timer_lock */ 214 static boolean_t mld_timer_setter_active; 215 int ret; 216 217 ASSERT(next != 0 && next != INFINITY); 218 219 mutex_enter(&mld_timer_lock); 220 if (mld_timer_setter_active) { 221 /* 222 * Serialize timer setters, one at a time. If the 223 * timer is currently being set by someone, 224 * just record the next time when it has to be 225 * invoked and return. The current setter will 226 * take care. 227 */ 228 mld_time_to_next = MIN(mld_time_to_next, next); 229 mutex_exit(&mld_timer_lock); 230 return; 231 } else { 232 mld_timer_setter_active = B_TRUE; 233 } 234 if (mld_timeout_id == 0) { 235 /* 236 * The timer is inactive. We need to start a timer 237 */ 238 mld_time_to_next = next; 239 mld_timeout_id = timeout(mld_timeout_handler, NULL, 240 MSEC_TO_TICK(mld_time_to_next)); 241 mld_timer_setter_active = B_FALSE; 242 mutex_exit(&mld_timer_lock); 243 return; 244 } 245 246 /* 247 * The timer was scheduled sometime back for firing in 248 * 'igmp_time_to_next' ms and is active. We need to 249 * reschedule the timeout if the new 'next' will happen 250 * earlier than the currently scheduled timeout 251 */ 252 time_left = mld_timer_fired_last + 253 MSEC_TO_TICK(mld_time_to_next) - ddi_get_lbolt(); 254 if (time_left < MSEC_TO_TICK(next)) { 255 mld_timer_setter_active = B_FALSE; 256 mutex_exit(&mld_timer_lock); 257 return; 258 } 259 260 mutex_exit(&mld_timer_lock); 261 ret = untimeout(mld_timeout_id); 262 mutex_enter(&mld_timer_lock); 263 /* 264 * The timeout was cancelled, or the timeout handler 265 * completed, while we were blocked in the untimeout. 266 * No other thread could have set the timer meanwhile 267 * since we serialized all the timer setters. Thus 268 * no timer is currently active nor executing nor will 269 * any timer fire in the future. We start the timer now 270 * if needed. 271 */ 272 if (ret == -1) { 273 ASSERT(mld_timeout_id == 0); 274 } else { 275 ASSERT(mld_timeout_id != 0); 276 mld_timeout_id = 0; 277 } 278 if (mld_time_to_next != 0) { 279 mld_time_to_next = MIN(mld_time_to_next, next); 280 mld_timeout_id = timeout(mld_timeout_handler, NULL, 281 MSEC_TO_TICK(mld_time_to_next)); 282 } 283 mld_timer_setter_active = B_FALSE; 284 mutex_exit(&mld_timer_lock); 285 } 286 287 /* 288 * igmp_input: 289 * Return NULL for a bad packet that is discarded here. 290 * Return mp if the message is OK and should be handed to "raw" receivers. 291 * Callers of igmp_input() may need to reinitialize variables that were copied 292 * from the mblk as this calls pullupmsg(). 293 */ 294 /* ARGSUSED */ 295 mblk_t * 296 igmp_input(queue_t *q, mblk_t *mp, ill_t *ill) 297 { 298 igmpa_t *igmpa; 299 ipha_t *ipha = (ipha_t *)(mp->b_rptr); 300 int iphlen, igmplen, mblklen; 301 ilm_t *ilm; 302 uint32_t src, dst; 303 uint32_t group; 304 uint_t next; 305 ipif_t *ipif; 306 307 ASSERT(ill != NULL); 308 ASSERT(!ill->ill_isv6); 309 ++igmpstat.igps_rcv_total; 310 311 mblklen = MBLKL(mp); 312 if (mblklen < 1 || mblklen < (iphlen = IPH_HDR_LENGTH(ipha))) { 313 ++igmpstat.igps_rcv_tooshort; 314 goto bad_pkt; 315 } 316 igmplen = ntohs(ipha->ipha_length) - iphlen; 317 /* 318 * Since msg sizes are more variable with v3, just pullup the 319 * whole thing now. 320 */ 321 if (MBLKL(mp) < (igmplen + iphlen)) { 322 mblk_t *mp1; 323 if ((mp1 = msgpullup(mp, -1)) == NULL) { 324 ++igmpstat.igps_rcv_tooshort; 325 goto bad_pkt; 326 } 327 freemsg(mp); 328 mp = mp1; 329 ipha = (ipha_t *)(mp->b_rptr); 330 } 331 332 /* 333 * Validate lengths 334 */ 335 if (igmplen < IGMP_MINLEN) { 336 ++igmpstat.igps_rcv_tooshort; 337 goto bad_pkt; 338 } 339 /* 340 * Validate checksum 341 */ 342 if (IP_CSUM(mp, iphlen, 0)) { 343 ++igmpstat.igps_rcv_badsum; 344 goto bad_pkt; 345 } 346 347 igmpa = (igmpa_t *)(&mp->b_rptr[iphlen]); 348 src = ipha->ipha_src; 349 dst = ipha->ipha_dst; 350 if (ip_debug > 1) 351 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 352 "igmp_input: src 0x%x, dst 0x%x on %s\n", 353 (int)ntohl(src), (int)ntohl(dst), 354 ill->ill_name); 355 356 switch (igmpa->igmpa_type) { 357 case IGMP_MEMBERSHIP_QUERY: 358 /* 359 * packet length differentiates between v1/v2 and v3 360 * v1/v2 should be exactly 8 octets long; v3 is >= 12 361 */ 362 if (igmplen == IGMP_MINLEN) { 363 next = igmp_query_in(ipha, igmpa, ill); 364 } else if (igmplen >= IGMP_V3_QUERY_MINLEN) { 365 next = igmpv3_query_in((igmp3qa_t *)igmpa, ill, 366 igmplen); 367 } else { 368 ++igmpstat.igps_rcv_tooshort; 369 goto bad_pkt; 370 } 371 if (next == 0) 372 goto bad_pkt; 373 374 if (next != INFINITY) 375 igmp_start_timers(next); 376 377 break; 378 379 case IGMP_V1_MEMBERSHIP_REPORT: 380 case IGMP_V2_MEMBERSHIP_REPORT: 381 /* 382 * For fast leave to work, we have to know that we are the 383 * last person to send a report for this group. Reports 384 * generated by us are looped back since we could potentially 385 * be a multicast router, so discard reports sourced by me. 386 */ 387 mutex_enter(&ill->ill_lock); 388 for (ipif = ill->ill_ipif; ipif != NULL; 389 ipif = ipif->ipif_next) { 390 if (ipif->ipif_lcl_addr == src) { 391 if (ip_debug > 1) { 392 (void) mi_strlog(ill->ill_rq, 393 1, 394 SL_TRACE, 395 "igmp_input: we are only " 396 "member src 0x%x ipif_local 0x%x", 397 (int)ntohl(src), 398 (int) 399 ntohl(ipif->ipif_lcl_addr)); 400 } 401 mutex_exit(&ill->ill_lock); 402 return (mp); 403 } 404 } 405 mutex_exit(&ill->ill_lock); 406 407 ++igmpstat.igps_rcv_reports; 408 group = igmpa->igmpa_group; 409 if (!CLASSD(group)) { 410 ++igmpstat.igps_rcv_badreports; 411 goto bad_pkt; 412 } 413 414 /* 415 * KLUDGE: if the IP source address of the report has an 416 * unspecified (i.e., zero) subnet number, as is allowed for 417 * a booting host, replace it with the correct subnet number 418 * so that a process-level multicast routing demon can 419 * determine which subnet it arrived from. This is necessary 420 * to compensate for the lack of any way for a process to 421 * determine the arrival interface of an incoming packet. 422 * 423 * Requires that a copy of *this* message it passed up 424 * to the raw interface which is done by our caller. 425 */ 426 if ((src & htonl(0xFF000000U)) == 0) { /* Minimum net mask */ 427 /* Pick the first ipif on this ill */ 428 mutex_enter(&ill->ill_lock); 429 src = ill->ill_ipif->ipif_subnet; 430 mutex_exit(&ill->ill_lock); 431 ip1dbg(("igmp_input: changed src to 0x%x\n", 432 (int)ntohl(src))); 433 ipha->ipha_src = src; 434 } 435 436 /* 437 * If we belong to the group being reported, and 438 * we are a 'Delaying member' in the RFC terminology, 439 * stop our timer for that group and 'clear flag' i.e. 440 * mark as IGMP_OTHERMEMBER. Do this for all logical 441 * interfaces on the given physical interface. 442 */ 443 mutex_enter(&ill->ill_lock); 444 for (ipif = ill->ill_ipif; ipif != NULL; 445 ipif = ipif->ipif_next) { 446 ilm = ilm_lookup_ipif(ipif, group); 447 if (ilm != NULL) { 448 ++igmpstat.igps_rcv_ourreports; 449 ilm->ilm_timer = INFINITY; 450 ilm->ilm_state = IGMP_OTHERMEMBER; 451 } 452 } /* for */ 453 mutex_exit(&ill->ill_lock); 454 break; 455 456 case IGMP_V3_MEMBERSHIP_REPORT: 457 /* 458 * Currently nothing to do here; IGMP router is not 459 * implemented in ip, and v3 hosts don't pay attention 460 * to membership reports. 461 */ 462 break; 463 } 464 /* 465 * Pass all valid IGMP packets up to any process(es) listening 466 * on a raw IGMP socket. Do not free the packet. 467 */ 468 return (mp); 469 470 bad_pkt: 471 freemsg(mp); 472 return (NULL); 473 } 474 475 static uint_t 476 igmp_query_in(ipha_t *ipha, igmpa_t *igmpa, ill_t *ill) 477 { 478 ilm_t *ilm; 479 int timer; 480 uint_t next; 481 482 ++igmpstat.igps_rcv_queries; 483 484 /* 485 * In the IGMPv2 specification, there are 3 states and a flag. 486 * 487 * In Non-Member state, we simply don't have a membership record. 488 * In Delaying Member state, our timer is running (ilm->ilm_timer 489 * < INFINITY). In Idle Member state, our timer is not running 490 * (ilm->ilm_timer == INFINITY). 491 * 492 * The flag is ilm->ilm_state, it is set to IGMP_OTHERMEMBER if 493 * we have heard a report from another member, or IGMP_IREPORTEDLAST 494 * if I sent the last report. 495 */ 496 if (igmpa->igmpa_code == 0) { 497 /* 498 * Query from an old router. 499 * Remember that the querier on this interface is old, 500 * and set the timer to the value in RFC 1112. 501 */ 502 503 504 mutex_enter(&ill->ill_lock); 505 ill->ill_mcast_v1_time = 0; 506 ill->ill_mcast_v1_tset = 1; 507 if (ill->ill_mcast_type != IGMP_V1_ROUTER) { 508 ip1dbg(("Received IGMPv1 Query on %s, switching mode " 509 "to IGMP_V1_ROUTER\n", ill->ill_name)); 510 atomic_add_16(&ill->ill_ifptr->illif_mcast_v1, 1); 511 ill->ill_mcast_type = IGMP_V1_ROUTER; 512 } 513 mutex_exit(&ill->ill_lock); 514 515 timer = SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY); 516 517 if (ipha->ipha_dst != htonl(INADDR_ALLHOSTS_GROUP) || 518 igmpa->igmpa_group != 0) { 519 ++igmpstat.igps_rcv_badqueries; 520 return (0); 521 } 522 523 } else { 524 in_addr_t group; 525 526 /* 527 * Query from a new router 528 * Simply do a validity check 529 */ 530 group = igmpa->igmpa_group; 531 if (group != 0 && (!CLASSD(group))) { 532 ++igmpstat.igps_rcv_badqueries; 533 return (0); 534 } 535 536 /* 537 * Switch interface state to v2 on receipt of a v2 query 538 * ONLY IF current state is v3. Let things be if current 539 * state if v1 but do reset the v2-querier-present timer. 540 */ 541 mutex_enter(&ill->ill_lock); 542 if (ill->ill_mcast_type == IGMP_V3_ROUTER) { 543 ip1dbg(("Received IGMPv2 Query on %s, switching mode " 544 "to IGMP_V2_ROUTER", ill->ill_name)); 545 atomic_add_16(&ill->ill_ifptr->illif_mcast_v2, 1); 546 ill->ill_mcast_type = IGMP_V2_ROUTER; 547 } 548 ill->ill_mcast_v2_time = 0; 549 ill->ill_mcast_v2_tset = 1; 550 mutex_exit(&ill->ill_lock); 551 552 timer = DSEC_TO_MSEC((int)igmpa->igmpa_code); 553 } 554 555 if (ip_debug > 1) { 556 mutex_enter(&ill->ill_lock); 557 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 558 "igmp_input: TIMER = igmp_code %d igmp_type 0x%x", 559 (int)ntohs(igmpa->igmpa_code), 560 (int)ntohs(igmpa->igmpa_type)); 561 mutex_exit(&ill->ill_lock); 562 } 563 564 /* 565 * -Start the timers in all of our membership records 566 * for the physical interface on which the query 567 * arrived, excluding those that belong to the "all 568 * hosts" group (224.0.0.1). 569 * 570 * -Restart any timer that is already running but has 571 * a value longer than the requested timeout. 572 * 573 * -Use the value specified in the query message as 574 * the maximum timeout. 575 */ 576 next = (unsigned)INFINITY; 577 mutex_enter(&ill->ill_lock); 578 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 579 580 /* 581 * A multicast router joins INADDR_ANY address 582 * to enable promiscuous reception of all 583 * mcasts from the interface. This INADDR_ANY 584 * is stored in the ilm_v6addr as V6 unspec addr 585 */ 586 if (!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr)) 587 continue; 588 if (ilm->ilm_addr == htonl(INADDR_ANY)) 589 continue; 590 if (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP) && 591 (igmpa->igmpa_group == 0) || 592 (igmpa->igmpa_group == ilm->ilm_addr)) { 593 if (ilm->ilm_timer > timer) { 594 MCAST_RANDOM_DELAY(ilm->ilm_timer, timer); 595 if (ilm->ilm_timer < next) 596 next = ilm->ilm_timer; 597 } 598 } 599 } 600 mutex_exit(&ill->ill_lock); 601 602 return (next); 603 } 604 605 static uint_t 606 igmpv3_query_in(igmp3qa_t *igmp3qa, ill_t *ill, int igmplen) 607 { 608 uint_t i, next, mrd, qqi, timer, delay, numsrc; 609 ilm_t *ilm; 610 ipaddr_t *src_array; 611 uint8_t qrv; 612 613 /* make sure numsrc matches packet size */ 614 numsrc = ntohs(igmp3qa->igmp3qa_numsrc); 615 if (igmplen < IGMP_V3_QUERY_MINLEN + (numsrc * sizeof (ipaddr_t))) { 616 ++igmpstat.igps_rcv_tooshort; 617 return (0); 618 } 619 src_array = (ipaddr_t *)&igmp3qa[1]; 620 621 ++igmpstat.igps_rcv_queries; 622 623 if ((mrd = (uint_t)igmp3qa->igmp3qa_mxrc) >= IGMP_V3_MAXRT_FPMIN) { 624 uint_t hdrval, mant, exp; 625 hdrval = (uint_t)igmp3qa->igmp3qa_mxrc; 626 mant = hdrval & IGMP_V3_MAXRT_MANT_MASK; 627 exp = (hdrval & IGMP_V3_MAXRT_EXP_MASK) >> 4; 628 mrd = (mant | 0x10) << (exp + 3); 629 } 630 if (mrd == 0) 631 mrd = MCAST_DEF_QUERY_RESP_INTERVAL; 632 timer = DSEC_TO_MSEC(mrd); 633 MCAST_RANDOM_DELAY(delay, timer); 634 next = (unsigned)INFINITY; 635 636 if ((qrv = igmp3qa->igmp3qa_sqrv & IGMP_V3_RV_MASK) == 0) 637 ill->ill_mcast_rv = MCAST_DEF_ROBUSTNESS; 638 else 639 ill->ill_mcast_rv = qrv; 640 641 if ((qqi = (uint_t)igmp3qa->igmp3qa_qqic) >= IGMP_V3_QQI_FPMIN) { 642 uint_t hdrval, mant, exp; 643 hdrval = (uint_t)igmp3qa->igmp3qa_qqic; 644 mant = hdrval & IGMP_V3_QQI_MANT_MASK; 645 exp = (hdrval & IGMP_V3_QQI_EXP_MASK) >> 4; 646 qqi = (mant | 0x10) << (exp + 3); 647 } 648 ill->ill_mcast_qi = (qqi == 0) ? MCAST_DEF_QUERY_INTERVAL : qqi; 649 650 /* 651 * If we have a pending general query response that's scheduled 652 * sooner than the delay we calculated for this response, then 653 * no action is required (RFC3376 section 5.2 rule 1) 654 */ 655 mutex_enter(&ill->ill_lock); 656 if (ill->ill_global_timer < delay) { 657 mutex_exit(&ill->ill_lock); 658 return (next); 659 } 660 mutex_exit(&ill->ill_lock); 661 662 /* 663 * Now take action depending upon query type: 664 * general, group specific, or group/source specific. 665 */ 666 if ((numsrc == 0) && (igmp3qa->igmp3qa_group == INADDR_ANY)) { 667 /* 668 * general query 669 * We know global timer is either not running or is 670 * greater than our calculated delay, so reset it to 671 * our delay (random value in range [0, response time]). 672 */ 673 mutex_enter(&ill->ill_lock); 674 ill->ill_global_timer = delay; 675 next = ill->ill_global_timer; 676 mutex_exit(&ill->ill_lock); 677 678 } else { 679 /* group or group/source specific query */ 680 mutex_enter(&ill->ill_lock); 681 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 682 if (!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr) || 683 (ilm->ilm_addr == htonl(INADDR_ANY)) || 684 (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) || 685 (igmp3qa->igmp3qa_group != ilm->ilm_addr)) 686 continue; 687 /* 688 * If the query is group specific or we have a 689 * pending group specific query, the response is 690 * group specific (pending sources list should be 691 * empty). Otherwise, need to update the pending 692 * sources list for the group and source specific 693 * response. 694 */ 695 if (numsrc == 0 || (ilm->ilm_timer < INFINITY && 696 SLIST_IS_EMPTY(ilm->ilm_pendsrcs))) { 697 group_query: 698 FREE_SLIST(ilm->ilm_pendsrcs); 699 ilm->ilm_pendsrcs = NULL; 700 } else { 701 boolean_t overflow; 702 slist_t *pktl; 703 if (numsrc > MAX_FILTER_SIZE || 704 (ilm->ilm_pendsrcs == NULL && 705 (ilm->ilm_pendsrcs = l_alloc()) == NULL)) { 706 /* 707 * We've been sent more sources than 708 * we can deal with; or we can't deal 709 * with a source list at all. Revert 710 * to a group specific query. 711 */ 712 goto group_query; 713 } 714 if ((pktl = l_alloc()) == NULL) 715 goto group_query; 716 pktl->sl_numsrc = numsrc; 717 for (i = 0; i < numsrc; i++) 718 IN6_IPADDR_TO_V4MAPPED(src_array[i], 719 &(pktl->sl_addr[i])); 720 l_union_in_a(ilm->ilm_pendsrcs, pktl, 721 &overflow); 722 l_free(pktl); 723 if (overflow) 724 goto group_query; 725 } 726 /* choose soonest timer */ 727 ilm->ilm_timer = MIN(ilm->ilm_timer, delay); 728 if (ilm->ilm_timer < next) 729 next = ilm->ilm_timer; 730 } 731 mutex_exit(&ill->ill_lock); 732 } 733 734 return (next); 735 } 736 737 void 738 igmp_joingroup(ilm_t *ilm) 739 { 740 ill_t *ill; 741 742 ill = ilm->ilm_ipif->ipif_ill; 743 744 ASSERT(IAM_WRITER_ILL(ill)); 745 ASSERT(ilm->ilm_ill == NULL && !ilm->ilm_ipif->ipif_isv6); 746 747 mutex_enter(&ill->ill_lock); 748 if (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) { 749 ilm->ilm_rtx.rtx_timer = INFINITY; 750 ilm->ilm_state = IGMP_OTHERMEMBER; 751 mutex_exit(&ill->ill_lock); 752 } else { 753 ip1dbg(("Querier mode %d, sending report, group %x\n", 754 ill->ill_mcast_type, htonl(ilm->ilm_addr))); 755 if (ill->ill_mcast_type == IGMP_V1_ROUTER) { 756 mutex_exit(&ill->ill_lock); 757 igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0); 758 mutex_enter(&ill->ill_lock); 759 } else if (ill->ill_mcast_type == IGMP_V2_ROUTER) { 760 mutex_exit(&ill->ill_lock); 761 igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0); 762 mutex_enter(&ill->ill_lock); 763 } else if (ill->ill_mcast_type == IGMP_V3_ROUTER) { 764 mrec_t *rp; 765 mcast_record_t rtype; 766 /* 767 * The possible state changes we need to handle here: 768 * Old State New State Report 769 * 770 * INCLUDE(0) INCLUDE(X) ALLOW(X),BLOCK(0) 771 * INCLUDE(0) EXCLUDE(X) TO_EX(X) 772 * 773 * No need to send the BLOCK(0) report; ALLOW(X) 774 * is enough. 775 */ 776 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ? 777 ALLOW_NEW_SOURCES : CHANGE_TO_EXCLUDE; 778 rp = mcast_bldmrec(rtype, &ilm->ilm_v6addr, 779 ilm->ilm_filter, NULL); 780 mutex_exit(&ill->ill_lock); 781 igmpv3_sendrpt(ilm->ilm_ipif, rp); 782 mutex_enter(&ill->ill_lock); 783 /* 784 * Set up retransmission state. Timer is set below, 785 * for both v3 and older versions. 786 */ 787 mcast_init_rtx(ill, &ilm->ilm_rtx, rtype, 788 ilm->ilm_filter); 789 } 790 791 /* Set the ilm timer value */ 792 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer, 793 SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY)); 794 ilm->ilm_state = IGMP_IREPORTEDLAST; 795 mutex_exit(&ill->ill_lock); 796 797 /* 798 * To avoid deadlock, we don't call igmp_start_timers from 799 * here. igmp_start_timers needs to call untimeout, and we 800 * can't hold the ipsq across untimeout since 801 * igmp_timeout_handler could be blocking trying to 802 * acquire the ipsq. Instead we start the timer after we get 803 * out of the ipsq in ipsq_exit. 804 */ 805 mutex_enter(&igmp_timer_lock); 806 igmp_deferred_next = MIN(ilm->ilm_rtx.rtx_timer, 807 igmp_deferred_next); 808 mutex_exit(&igmp_timer_lock); 809 } 810 811 if (ip_debug > 1) { 812 (void) mi_strlog(ilm->ilm_ipif->ipif_ill->ill_rq, 1, SL_TRACE, 813 "igmp_joingroup: multicast_type %d timer %d", 814 (ilm->ilm_ipif->ipif_ill->ill_mcast_type), 815 (int)ntohl(ilm->ilm_rtx.rtx_timer)); 816 } 817 } 818 819 void 820 mld_joingroup(ilm_t *ilm) 821 { 822 ill_t *ill; 823 824 ill = ilm->ilm_ill; 825 826 ASSERT(IAM_WRITER_ILL(ill)); 827 ASSERT(ilm->ilm_ipif == NULL && ill->ill_isv6); 828 829 mutex_enter(&ill->ill_lock); 830 if (IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr)) { 831 ilm->ilm_rtx.rtx_timer = INFINITY; 832 ilm->ilm_state = IGMP_OTHERMEMBER; 833 mutex_exit(&ill->ill_lock); 834 } else { 835 if (ill->ill_mcast_type == MLD_V1_ROUTER) { 836 mutex_exit(&ill->ill_lock); 837 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL); 838 mutex_enter(&ill->ill_lock); 839 } else { 840 mrec_t *rp; 841 mcast_record_t rtype; 842 /* 843 * The possible state changes we need to handle here: 844 * Old State New State Report 845 * 846 * INCLUDE(0) INCLUDE(X) ALLOW(X),BLOCK(0) 847 * INCLUDE(0) EXCLUDE(X) TO_EX(X) 848 * 849 * No need to send the BLOCK(0) report; ALLOW(X) 850 * is enough 851 */ 852 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ? 853 ALLOW_NEW_SOURCES : CHANGE_TO_EXCLUDE; 854 rp = mcast_bldmrec(rtype, &ilm->ilm_v6addr, 855 ilm->ilm_filter, NULL); 856 mutex_exit(&ill->ill_lock); 857 mldv2_sendrpt(ill, rp); 858 mutex_enter(&ill->ill_lock); 859 /* 860 * Set up retransmission state. Timer is set below, 861 * for both v2 and v1. 862 */ 863 mcast_init_rtx(ill, &ilm->ilm_rtx, rtype, 864 ilm->ilm_filter); 865 } 866 867 /* Set the ilm timer value */ 868 ASSERT(ill->ill_mcast_type != MLD_V2_ROUTER || 869 ilm->ilm_rtx.rtx_cnt > 0); 870 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer, 871 SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY)); 872 ilm->ilm_state = IGMP_IREPORTEDLAST; 873 mutex_exit(&ill->ill_lock); 874 875 /* 876 * To avoid deadlock, we don't call mld_start_timers from 877 * here. mld_start_timers needs to call untimeout, and we 878 * can't hold the ipsq (i.e. the lock) across untimeout 879 * since mld_timeout_handler could be blocking trying to 880 * acquire the ipsq. Instead we start the timer after we get 881 * out of the ipsq in ipsq_exit 882 */ 883 mutex_enter(&mld_timer_lock); 884 mld_deferred_next = MIN(ilm->ilm_rtx.rtx_timer, 885 mld_deferred_next); 886 mutex_exit(&mld_timer_lock); 887 } 888 889 if (ip_debug > 1) { 890 (void) mi_strlog(ilm->ilm_ill->ill_rq, 1, SL_TRACE, 891 "mld_joingroup: multicast_type %d timer %d", 892 (ilm->ilm_ill->ill_mcast_type), 893 (int)ntohl(ilm->ilm_rtx.rtx_timer)); 894 } 895 } 896 897 void 898 igmp_leavegroup(ilm_t *ilm) 899 { 900 ill_t *ill = ilm->ilm_ipif->ipif_ill; 901 902 ASSERT(ilm->ilm_ill == NULL); 903 ASSERT(!ill->ill_isv6); 904 905 mutex_enter(&ill->ill_lock); 906 if (ilm->ilm_state == IGMP_IREPORTEDLAST && 907 ill->ill_mcast_type == IGMP_V2_ROUTER && 908 (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP))) { 909 mutex_exit(&ill->ill_lock); 910 igmp_sendpkt(ilm, IGMP_V2_LEAVE_GROUP, 911 (htonl(INADDR_ALLRTRS_GROUP))); 912 return; 913 } else if ((ill->ill_mcast_type == IGMP_V3_ROUTER) && 914 (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP))) { 915 mrec_t *rp; 916 /* 917 * The possible state changes we need to handle here: 918 * Old State New State Report 919 * 920 * INCLUDE(X) INCLUDE(0) ALLOW(0),BLOCK(X) 921 * EXCLUDE(X) INCLUDE(0) TO_IN(0) 922 * 923 * No need to send the ALLOW(0) report; BLOCK(X) is enough 924 */ 925 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 926 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr, 927 ilm->ilm_filter, NULL); 928 } else { 929 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, 930 NULL, NULL); 931 } 932 mutex_exit(&ill->ill_lock); 933 igmpv3_sendrpt(ilm->ilm_ipif, rp); 934 return; 935 } 936 mutex_exit(&ill->ill_lock); 937 } 938 939 void 940 mld_leavegroup(ilm_t *ilm) 941 { 942 ill_t *ill = ilm->ilm_ill; 943 944 ASSERT(ilm->ilm_ipif == NULL); 945 ASSERT(ill->ill_isv6); 946 947 mutex_enter(&ill->ill_lock); 948 if (ilm->ilm_state == IGMP_IREPORTEDLAST && 949 ill->ill_mcast_type == MLD_V1_ROUTER && 950 (!IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr))) { 951 mutex_exit(&ill->ill_lock); 952 mld_sendpkt(ilm, MLD_LISTENER_REDUCTION, &ipv6_all_rtrs_mcast); 953 return; 954 } else if ((ill->ill_mcast_type == MLD_V2_ROUTER) && 955 (!IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr))) { 956 mrec_t *rp; 957 /* 958 * The possible state changes we need to handle here: 959 * Old State New State Report 960 * 961 * INCLUDE(X) INCLUDE(0) ALLOW(0),BLOCK(X) 962 * EXCLUDE(X) INCLUDE(0) TO_IN(0) 963 * 964 * No need to send the ALLOW(0) report; BLOCK(X) is enough 965 */ 966 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 967 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr, 968 ilm->ilm_filter, NULL); 969 } else { 970 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, 971 NULL, NULL); 972 } 973 mutex_exit(&ill->ill_lock); 974 mldv2_sendrpt(ill, rp); 975 return; 976 } 977 mutex_exit(&ill->ill_lock); 978 } 979 980 void 981 igmp_statechange(ilm_t *ilm, mcast_record_t fmode, slist_t *flist) 982 { 983 ill_t *ill; 984 mrec_t *rp; 985 986 ASSERT(ilm != NULL); 987 988 /* state change reports should only be sent if the router is v3 */ 989 if (ilm->ilm_ipif->ipif_ill->ill_mcast_type != IGMP_V3_ROUTER) 990 return; 991 992 if (ilm->ilm_ill == NULL) { 993 ASSERT(ilm->ilm_ipif != NULL); 994 ill = ilm->ilm_ipif->ipif_ill; 995 } else { 996 ill = ilm->ilm_ill; 997 } 998 999 mutex_enter(&ill->ill_lock); 1000 1001 /* 1002 * Compare existing(old) state with the new state and prepare 1003 * State Change Report, according to the rules in RFC 3376: 1004 * 1005 * Old State New State State Change Report 1006 * 1007 * INCLUDE(A) INCLUDE(B) ALLOW(B-A),BLOCK(A-B) 1008 * EXCLUDE(A) EXCLUDE(B) ALLOW(A-B),BLOCK(B-A) 1009 * INCLUDE(A) EXCLUDE(B) TO_EX(B) 1010 * EXCLUDE(A) INCLUDE(B) TO_IN(B) 1011 */ 1012 1013 if (ilm->ilm_fmode == fmode) { 1014 slist_t *a_minus_b = NULL, *b_minus_a = NULL; 1015 slist_t *allow, *block; 1016 if (((a_minus_b = l_alloc()) == NULL) || 1017 ((b_minus_a = l_alloc()) == NULL)) { 1018 l_free(a_minus_b); 1019 if (ilm->ilm_fmode == MODE_IS_INCLUDE) 1020 goto send_to_ex; 1021 else 1022 goto send_to_in; 1023 } 1024 l_difference(ilm->ilm_filter, flist, a_minus_b); 1025 l_difference(flist, ilm->ilm_filter, b_minus_a); 1026 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1027 allow = b_minus_a; 1028 block = a_minus_b; 1029 } else { 1030 allow = a_minus_b; 1031 block = b_minus_a; 1032 } 1033 rp = NULL; 1034 if (!SLIST_IS_EMPTY(allow)) 1035 rp = mcast_bldmrec(ALLOW_NEW_SOURCES, &ilm->ilm_v6addr, 1036 allow, rp); 1037 if (!SLIST_IS_EMPTY(block)) 1038 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr, 1039 block, rp); 1040 l_free(a_minus_b); 1041 l_free(b_minus_a); 1042 } else if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1043 send_to_ex: 1044 rp = mcast_bldmrec(CHANGE_TO_EXCLUDE, &ilm->ilm_v6addr, flist, 1045 NULL); 1046 } else { 1047 send_to_in: 1048 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, flist, 1049 NULL); 1050 } 1051 1052 /* 1053 * Need to set up retransmission state; merge the new info with the 1054 * current state (which may be null). If the timer is not currently 1055 * running, start it (need to do a delayed start of the timer as 1056 * we're currently in the sq). 1057 */ 1058 rp = mcast_merge_rtx(ilm, rp, flist); 1059 if (ilm->ilm_rtx.rtx_timer == INFINITY) { 1060 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer, 1061 SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY)); 1062 mutex_enter(&igmp_timer_lock); 1063 igmp_deferred_next = MIN(igmp_deferred_next, 1064 ilm->ilm_rtx.rtx_timer); 1065 mutex_exit(&igmp_timer_lock); 1066 } 1067 1068 mutex_exit(&ill->ill_lock); 1069 igmpv3_sendrpt(ilm->ilm_ipif, rp); 1070 } 1071 1072 void 1073 mld_statechange(ilm_t *ilm, mcast_record_t fmode, slist_t *flist) 1074 { 1075 ill_t *ill; 1076 mrec_t *rp = NULL; 1077 1078 ASSERT(ilm != NULL); 1079 1080 ill = ilm->ilm_ill; 1081 1082 /* only need to send if we have an mldv2-capable router */ 1083 mutex_enter(&ill->ill_lock); 1084 if (ill->ill_mcast_type != MLD_V2_ROUTER) { 1085 mutex_exit(&ill->ill_lock); 1086 return; 1087 } 1088 1089 /* 1090 * Compare existing (old) state with the new state passed in 1091 * and send appropriate MLDv2 State Change Report. 1092 * 1093 * Old State New State State Change Report 1094 * 1095 * INCLUDE(A) INCLUDE(B) ALLOW(B-A),BLOCK(A-B) 1096 * EXCLUDE(A) EXCLUDE(B) ALLOW(A-B),BLOCK(B-A) 1097 * INCLUDE(A) EXCLUDE(B) TO_EX(B) 1098 * EXCLUDE(A) INCLUDE(B) TO_IN(B) 1099 */ 1100 if (ilm->ilm_fmode == fmode) { 1101 slist_t *a_minus_b = NULL, *b_minus_a = NULL; 1102 slist_t *allow, *block; 1103 if (((a_minus_b = l_alloc()) == NULL) || 1104 ((b_minus_a = l_alloc()) == NULL)) { 1105 l_free(a_minus_b); 1106 if (ilm->ilm_fmode == MODE_IS_INCLUDE) 1107 goto send_to_ex; 1108 else 1109 goto send_to_in; 1110 } 1111 l_difference(ilm->ilm_filter, flist, a_minus_b); 1112 l_difference(flist, ilm->ilm_filter, b_minus_a); 1113 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1114 allow = b_minus_a; 1115 block = a_minus_b; 1116 } else { 1117 allow = a_minus_b; 1118 block = b_minus_a; 1119 } 1120 if (!SLIST_IS_EMPTY(allow)) 1121 rp = mcast_bldmrec(ALLOW_NEW_SOURCES, &ilm->ilm_v6addr, 1122 allow, rp); 1123 if (!SLIST_IS_EMPTY(block)) 1124 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr, 1125 block, rp); 1126 l_free(a_minus_b); 1127 l_free(b_minus_a); 1128 } else if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1129 send_to_ex: 1130 rp = mcast_bldmrec(CHANGE_TO_EXCLUDE, &ilm->ilm_v6addr, flist, 1131 NULL); 1132 } else { 1133 send_to_in: 1134 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, flist, 1135 NULL); 1136 } 1137 1138 /* 1139 * Need to set up retransmission state; merge the new info with the 1140 * current state (which may be null). If the timer is not currently 1141 * running, start it (need to do a deferred start of the timer as 1142 * we're currently in the sq). 1143 */ 1144 rp = mcast_merge_rtx(ilm, rp, flist); 1145 ASSERT(ilm->ilm_rtx.rtx_cnt > 0); 1146 if (ilm->ilm_rtx.rtx_timer == INFINITY) { 1147 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer, 1148 SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY)); 1149 mutex_enter(&mld_timer_lock); 1150 mld_deferred_next = 1151 MIN(mld_deferred_next, ilm->ilm_rtx.rtx_timer); 1152 mutex_exit(&mld_timer_lock); 1153 } 1154 1155 mutex_exit(&ill->ill_lock); 1156 mldv2_sendrpt(ill, rp); 1157 } 1158 1159 uint_t 1160 igmp_timeout_handler_per_ill(ill_t *ill, int elapsed) 1161 { 1162 uint_t next = INFINITY; 1163 ilm_t *ilm; 1164 ipif_t *ipif; 1165 mrec_t *rp = NULL; 1166 mrec_t *rtxrp = NULL; 1167 rtx_state_t *rtxp; 1168 mcast_record_t rtype; 1169 1170 ASSERT(IAM_WRITER_ILL(ill)); 1171 1172 mutex_enter(&ill->ill_lock); 1173 1174 /* First check the global timer on this interface */ 1175 if (ill->ill_global_timer == INFINITY) 1176 goto per_ilm_timer; 1177 if (ill->ill_global_timer <= elapsed) { 1178 ill->ill_global_timer = INFINITY; 1179 /* 1180 * Send report for each group on this interface. 1181 * Since we just set the global timer (received a v3 general 1182 * query), need to skip the all hosts addr (224.0.0.1), per 1183 * RFC 3376 section 5. 1184 */ 1185 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1186 if (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) 1187 continue; 1188 ASSERT(ilm->ilm_ipif != NULL); 1189 ilm->ilm_ipif->ipif_igmp_rpt = 1190 mcast_bldmrec(ilm->ilm_fmode, &ilm->ilm_v6addr, 1191 ilm->ilm_filter, ilm->ilm_ipif->ipif_igmp_rpt); 1192 /* 1193 * Since we're sending a report on this group, okay 1194 * to delete pending group-specific timers. Note 1195 * that group-specific retransmit timers still need 1196 * to be checked in the per_ilm_timer for-loop. 1197 */ 1198 ilm->ilm_timer = INFINITY; 1199 ilm->ilm_state = IGMP_IREPORTEDLAST; 1200 FREE_SLIST(ilm->ilm_pendsrcs); 1201 ilm->ilm_pendsrcs = NULL; 1202 } 1203 /* 1204 * We've built per-ipif mrec lists; walk the ill's ipif list 1205 * and send a report for each ipif that has an mrec list. 1206 */ 1207 for (ipif = ill->ill_ipif; ipif != NULL; 1208 ipif = ipif->ipif_next) { 1209 if (ipif->ipif_igmp_rpt == NULL) 1210 continue; 1211 mutex_exit(&ill->ill_lock); 1212 igmpv3_sendrpt(ipif, ipif->ipif_igmp_rpt); 1213 mutex_enter(&ill->ill_lock); 1214 /* mrec list was freed by igmpv3_sendrpt() */ 1215 ipif->ipif_igmp_rpt = NULL; 1216 } 1217 } else { 1218 ill->ill_global_timer -= elapsed; 1219 if (ill->ill_global_timer < next) 1220 next = ill->ill_global_timer; 1221 } 1222 1223 per_ilm_timer: 1224 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1225 if (ilm->ilm_timer == INFINITY) 1226 goto per_ilm_rtxtimer; 1227 1228 if (ilm->ilm_timer > elapsed) { 1229 ilm->ilm_timer -= elapsed; 1230 if (ilm->ilm_timer < next) 1231 next = ilm->ilm_timer; 1232 1233 if (ip_debug > 1) { 1234 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 1235 "igmp_timo_hlr 2: ilm_timr %d elap %d " 1236 "typ %d nxt %d", 1237 (int)ntohl(ilm->ilm_timer), elapsed, 1238 (ill->ill_mcast_type), next); 1239 } 1240 1241 goto per_ilm_rtxtimer; 1242 } 1243 1244 /* the timer has expired, need to take action */ 1245 ilm->ilm_timer = INFINITY; 1246 ilm->ilm_state = IGMP_IREPORTEDLAST; 1247 if (ill->ill_mcast_type == IGMP_V1_ROUTER) { 1248 mutex_exit(&ill->ill_lock); 1249 igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0); 1250 mutex_enter(&ill->ill_lock); 1251 } else if (ill->ill_mcast_type == IGMP_V2_ROUTER) { 1252 mutex_exit(&ill->ill_lock); 1253 igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0); 1254 mutex_enter(&ill->ill_lock); 1255 } else { 1256 slist_t *rsp; 1257 if (!SLIST_IS_EMPTY(ilm->ilm_pendsrcs) && 1258 (rsp = l_alloc()) != NULL) { 1259 /* 1260 * Contents of reply depend on pending 1261 * requested source list. 1262 */ 1263 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1264 l_intersection(ilm->ilm_filter, 1265 ilm->ilm_pendsrcs, rsp); 1266 } else { 1267 l_difference(ilm->ilm_pendsrcs, 1268 ilm->ilm_filter, rsp); 1269 } 1270 FREE_SLIST(ilm->ilm_pendsrcs); 1271 ilm->ilm_pendsrcs = NULL; 1272 if (!SLIST_IS_EMPTY(rsp)) 1273 rp = mcast_bldmrec(MODE_IS_INCLUDE, 1274 &ilm->ilm_v6addr, rsp, rp); 1275 FREE_SLIST(rsp); 1276 } else { 1277 /* 1278 * Either the pending request is just group- 1279 * specific, or we couldn't get the resources 1280 * (rsp) to build a source-specific reply. 1281 */ 1282 rp = mcast_bldmrec(ilm->ilm_fmode, 1283 &ilm->ilm_v6addr, ilm->ilm_filter, rp); 1284 } 1285 mutex_exit(&ill->ill_lock); 1286 igmpv3_sendrpt(ill->ill_ipif, rp); 1287 mutex_enter(&ill->ill_lock); 1288 rp = NULL; 1289 } 1290 1291 if (ip_debug > 1) { 1292 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 1293 "igmp_timo_hlr 1: ilm_timr %d elap %d " 1294 "typ %d nxt %d", 1295 (int)ntohl(ilm->ilm_timer), elapsed, 1296 (ill->ill_mcast_type), next); 1297 } 1298 1299 per_ilm_rtxtimer: 1300 rtxp = &ilm->ilm_rtx; 1301 1302 if (rtxp->rtx_timer == INFINITY) 1303 continue; 1304 if (rtxp->rtx_timer > elapsed) { 1305 rtxp->rtx_timer -= elapsed; 1306 if (rtxp->rtx_timer < next) 1307 next = rtxp->rtx_timer; 1308 continue; 1309 } 1310 1311 rtxp->rtx_timer = INFINITY; 1312 ilm->ilm_state = IGMP_IREPORTEDLAST; 1313 if (ill->ill_mcast_type == IGMP_V1_ROUTER) { 1314 mutex_exit(&ill->ill_lock); 1315 igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0); 1316 mutex_enter(&ill->ill_lock); 1317 continue; 1318 } else if (ill->ill_mcast_type == IGMP_V2_ROUTER) { 1319 mutex_exit(&ill->ill_lock); 1320 igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0); 1321 mutex_enter(&ill->ill_lock); 1322 continue; 1323 } 1324 1325 /* 1326 * The retransmit timer has popped, and our router is 1327 * IGMPv3. We have to delve into the retransmit state 1328 * stored in the ilm. 1329 * 1330 * Decrement the retransmit count. If the fmode rtx 1331 * count is active, decrement it, and send a filter 1332 * mode change report with the ilm's source list. 1333 * Otherwise, send a source list change report with 1334 * the current retransmit lists. 1335 */ 1336 ASSERT(rtxp->rtx_cnt > 0); 1337 ASSERT(rtxp->rtx_cnt >= rtxp->rtx_fmode_cnt); 1338 rtxp->rtx_cnt--; 1339 if (rtxp->rtx_fmode_cnt > 0) { 1340 rtxp->rtx_fmode_cnt--; 1341 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ? 1342 CHANGE_TO_INCLUDE : CHANGE_TO_EXCLUDE; 1343 rtxrp = mcast_bldmrec(rtype, &ilm->ilm_v6addr, 1344 ilm->ilm_filter, rtxrp); 1345 } else { 1346 rtxrp = mcast_bldmrec(ALLOW_NEW_SOURCES, 1347 &ilm->ilm_v6addr, rtxp->rtx_allow, rtxrp); 1348 rtxrp = mcast_bldmrec(BLOCK_OLD_SOURCES, 1349 &ilm->ilm_v6addr, rtxp->rtx_block, rtxrp); 1350 } 1351 if (rtxp->rtx_cnt > 0) { 1352 MCAST_RANDOM_DELAY(rtxp->rtx_timer, 1353 SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY)); 1354 if (rtxp->rtx_timer < next) 1355 next = rtxp->rtx_timer; 1356 } else { 1357 CLEAR_SLIST(rtxp->rtx_allow); 1358 CLEAR_SLIST(rtxp->rtx_block); 1359 } 1360 mutex_exit(&ill->ill_lock); 1361 igmpv3_sendrpt(ilm->ilm_ipif, rtxrp); 1362 mutex_enter(&ill->ill_lock); 1363 rtxrp = NULL; 1364 } 1365 1366 mutex_exit(&ill->ill_lock); 1367 1368 return (next); 1369 } 1370 1371 /* 1372 * igmp_timeout_handler: 1373 * Called when there are timeout events, every next * TMEOUT_INTERVAL (tick). 1374 * Returns number of ticks to next event (or 0 if none). 1375 * 1376 * As part of multicast join and leave igmp we may need to send out an 1377 * igmp request. The igmp related state variables in the ilm are protected 1378 * by ill_lock. A single global igmp timer is used to track igmp timeouts. 1379 * igmp_timer_lock protects the global igmp_timeout_id. igmp_start_timers 1380 * starts the igmp timer if needed. It serializes multiple threads trying to 1381 * simultaneously start the timer using the igmp_timer_setter_active flag. 1382 * 1383 * igmp_input() receives igmp queries and responds to the queries 1384 * in a delayed fashion by posting a timer i.e. it calls igmp_start_timers(). 1385 * Later the igmp_timer fires, the timeout handler igmp_timerout_handler() 1386 * performs the action exclusively after entering each ill's ipsq as writer. 1387 * The actual igmp timeout handler needs to run in the ipsq since it has to 1388 * access the ilm's and we don't want another exclusive operation like 1389 * say an IPMP failover to be simultaneously moving the ilms from one ill to 1390 * another. 1391 * 1392 * The igmp_slowtimeo() function is called thru another timer. 1393 * igmp_slowtimeout_lock protects the igmp_slowtimeout_id 1394 */ 1395 1396 /* ARGSUSED */ 1397 void 1398 igmp_timeout_handler(void *arg) 1399 { 1400 ill_t *ill; 1401 int elapsed; /* Since last call */ 1402 uint_t global_next = INFINITY; 1403 uint_t next; 1404 ill_walk_context_t ctx; 1405 boolean_t success; 1406 1407 mutex_enter(&igmp_timer_lock); 1408 ASSERT(igmp_timeout_id != 0); 1409 igmp_timer_fired_last = ddi_get_lbolt(); 1410 elapsed = igmp_time_to_next; 1411 igmp_time_to_next = 0; 1412 mutex_exit(&igmp_timer_lock); 1413 1414 rw_enter(&ill_g_lock, RW_READER); 1415 ill = ILL_START_WALK_V4(&ctx); 1416 for (; ill != NULL; ill = ill_next(&ctx, ill)) { 1417 ASSERT(!ill->ill_isv6); 1418 /* 1419 * We may not be able to refhold the ill if the ill/ipif 1420 * is changing. But we need to make sure that the ill will 1421 * not vanish. So we just bump up the ill_waiter count. 1422 */ 1423 if (!ill_waiter_inc(ill)) 1424 continue; 1425 rw_exit(&ill_g_lock); 1426 success = ipsq_enter(ill, B_TRUE); 1427 if (success) { 1428 next = igmp_timeout_handler_per_ill(ill, elapsed); 1429 if (next < global_next) 1430 global_next = next; 1431 ipsq_exit(ill->ill_phyint->phyint_ipsq, B_FALSE, 1432 B_TRUE); 1433 } 1434 rw_enter(&ill_g_lock, RW_READER); 1435 ill_waiter_dcr(ill); 1436 } 1437 rw_exit(&ill_g_lock); 1438 1439 mutex_enter(&igmp_timer_lock); 1440 ASSERT(igmp_timeout_id != 0); 1441 igmp_timeout_id = 0; 1442 mutex_exit(&igmp_timer_lock); 1443 1444 if (global_next != INFINITY) 1445 igmp_start_timers(global_next); 1446 } 1447 1448 /* 1449 * mld_timeout_handler: 1450 * Called when there are timeout events, every next (tick). 1451 * Returns number of ticks to next event (or 0 if none). 1452 */ 1453 /* ARGSUSED */ 1454 uint_t 1455 mld_timeout_handler_per_ill(ill_t *ill, int elapsed) 1456 { 1457 ilm_t *ilm; 1458 uint_t next = INFINITY; 1459 mrec_t *rp, *rtxrp; 1460 rtx_state_t *rtxp; 1461 mcast_record_t rtype; 1462 1463 ASSERT(IAM_WRITER_ILL(ill)); 1464 1465 mutex_enter(&ill->ill_lock); 1466 1467 /* 1468 * First check the global timer on this interface; the global timer 1469 * is not used for MLDv1, so if it's set we can assume we're v2. 1470 */ 1471 if (ill->ill_global_timer == INFINITY) 1472 goto per_ilm_timer; 1473 if (ill->ill_global_timer <= elapsed) { 1474 ill->ill_global_timer = INFINITY; 1475 /* 1476 * Send report for each group on this interface. 1477 * Since we just set the global timer (received a v2 general 1478 * query), need to skip the all hosts addr (ff02::1), per 1479 * RFC 3810 section 6. 1480 */ 1481 rp = NULL; 1482 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1483 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, 1484 &ipv6_all_hosts_mcast)) 1485 continue; 1486 rp = mcast_bldmrec(ilm->ilm_fmode, &ilm->ilm_v6addr, 1487 ilm->ilm_filter, rp); 1488 /* 1489 * Since we're sending a report on this group, okay 1490 * to delete pending group-specific timers. Note 1491 * that group-specific retransmit timers still need 1492 * to be checked in the per_ilm_timer for-loop. 1493 */ 1494 ilm->ilm_timer = INFINITY; 1495 ilm->ilm_state = IGMP_IREPORTEDLAST; 1496 FREE_SLIST(ilm->ilm_pendsrcs); 1497 ilm->ilm_pendsrcs = NULL; 1498 } 1499 mutex_exit(&ill->ill_lock); 1500 mldv2_sendrpt(ill, rp); 1501 mutex_enter(&ill->ill_lock); 1502 } else { 1503 ill->ill_global_timer -= elapsed; 1504 if (ill->ill_global_timer < next) 1505 next = ill->ill_global_timer; 1506 } 1507 1508 per_ilm_timer: 1509 rp = rtxrp = NULL; 1510 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1511 if (ilm->ilm_timer == INFINITY) 1512 goto per_ilm_rtxtimer; 1513 1514 if (ilm->ilm_timer > elapsed) { 1515 ilm->ilm_timer -= elapsed; 1516 if (ilm->ilm_timer < next) 1517 next = ilm->ilm_timer; 1518 1519 if (ip_debug > 1) { 1520 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 1521 "igmp_timo_hlr 2: ilm_timr" 1522 " %d elap %d typ %d nxt %d", 1523 (int)ntohl(ilm->ilm_timer), elapsed, 1524 (ill->ill_mcast_type), next); 1525 } 1526 1527 goto per_ilm_rtxtimer; 1528 } 1529 1530 /* the timer has expired, need to take action */ 1531 ilm->ilm_timer = INFINITY; 1532 ilm->ilm_state = IGMP_IREPORTEDLAST; 1533 if (ill->ill_mcast_type == MLD_V1_ROUTER) { 1534 mutex_exit(&ill->ill_lock); 1535 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL); 1536 mutex_enter(&ill->ill_lock); 1537 } else { 1538 slist_t *rsp; 1539 if (!SLIST_IS_EMPTY(ilm->ilm_pendsrcs) && 1540 (rsp = l_alloc()) != NULL) { 1541 /* 1542 * Contents of reply depend on pending 1543 * requested source list. 1544 */ 1545 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1546 l_intersection(ilm->ilm_filter, 1547 ilm->ilm_pendsrcs, rsp); 1548 } else { 1549 l_difference(ilm->ilm_pendsrcs, 1550 ilm->ilm_filter, rsp); 1551 } 1552 FREE_SLIST(ilm->ilm_pendsrcs); 1553 ilm->ilm_pendsrcs = NULL; 1554 if (!SLIST_IS_EMPTY(rsp)) 1555 rp = mcast_bldmrec(MODE_IS_INCLUDE, 1556 &ilm->ilm_v6addr, rsp, rp); 1557 FREE_SLIST(rsp); 1558 } else { 1559 rp = mcast_bldmrec(ilm->ilm_fmode, 1560 &ilm->ilm_v6addr, ilm->ilm_filter, rp); 1561 } 1562 } 1563 1564 if (ip_debug > 1) { 1565 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 1566 "igmp_timo_hlr 1: ilm_timr %d elap %d " 1567 "typ %d nxt %d", 1568 (int)ntohl(ilm->ilm_timer), elapsed, 1569 (ill->ill_mcast_type), next); 1570 } 1571 1572 per_ilm_rtxtimer: 1573 rtxp = &ilm->ilm_rtx; 1574 1575 if (rtxp->rtx_timer == INFINITY) 1576 continue; 1577 if (rtxp->rtx_timer > elapsed) { 1578 rtxp->rtx_timer -= elapsed; 1579 if (rtxp->rtx_timer < next) 1580 next = rtxp->rtx_timer; 1581 continue; 1582 } 1583 1584 rtxp->rtx_timer = INFINITY; 1585 ilm->ilm_state = IGMP_IREPORTEDLAST; 1586 if (ill->ill_mcast_type == MLD_V1_ROUTER) { 1587 mutex_exit(&ill->ill_lock); 1588 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL); 1589 mutex_enter(&ill->ill_lock); 1590 continue; 1591 } 1592 1593 /* 1594 * The retransmit timer has popped, and our router is 1595 * MLDv2. We have to delve into the retransmit state 1596 * stored in the ilm. 1597 * 1598 * Decrement the retransmit count. If the fmode rtx 1599 * count is active, decrement it, and send a filter 1600 * mode change report with the ilm's source list. 1601 * Otherwise, send a source list change report with 1602 * the current retransmit lists. 1603 */ 1604 ASSERT(rtxp->rtx_cnt > 0); 1605 ASSERT(rtxp->rtx_cnt >= rtxp->rtx_fmode_cnt); 1606 rtxp->rtx_cnt--; 1607 if (rtxp->rtx_fmode_cnt > 0) { 1608 rtxp->rtx_fmode_cnt--; 1609 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ? 1610 CHANGE_TO_INCLUDE : CHANGE_TO_EXCLUDE; 1611 rtxrp = mcast_bldmrec(rtype, &ilm->ilm_v6addr, 1612 ilm->ilm_filter, rtxrp); 1613 } else { 1614 rtxrp = mcast_bldmrec(ALLOW_NEW_SOURCES, 1615 &ilm->ilm_v6addr, rtxp->rtx_allow, rtxrp); 1616 rtxrp = mcast_bldmrec(BLOCK_OLD_SOURCES, 1617 &ilm->ilm_v6addr, rtxp->rtx_block, rtxrp); 1618 } 1619 if (rtxp->rtx_cnt > 0) { 1620 MCAST_RANDOM_DELAY(rtxp->rtx_timer, 1621 SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY)); 1622 if (rtxp->rtx_timer < next) 1623 next = rtxp->rtx_timer; 1624 } else { 1625 CLEAR_SLIST(rtxp->rtx_allow); 1626 CLEAR_SLIST(rtxp->rtx_block); 1627 } 1628 } 1629 1630 if (ill->ill_mcast_type == MLD_V2_ROUTER) { 1631 mutex_exit(&ill->ill_lock); 1632 mldv2_sendrpt(ill, rp); 1633 mldv2_sendrpt(ill, rtxrp); 1634 return (next); 1635 } 1636 1637 mutex_exit(&ill->ill_lock); 1638 1639 return (next); 1640 } 1641 1642 /* 1643 * mld_timeout_handler: 1644 * Called when there are timeout events, every next * TMEOUT_INTERVAL (tick). 1645 * Returns number of ticks to next event (or 0 if none). 1646 * MT issues are same as igmp_timeout_handler 1647 */ 1648 /* ARGSUSED */ 1649 void 1650 mld_timeout_handler(void *arg) 1651 { 1652 ill_t *ill; 1653 int elapsed; /* Since last call */ 1654 uint_t global_next = INFINITY; 1655 uint_t next; 1656 ill_walk_context_t ctx; 1657 boolean_t success; 1658 1659 mutex_enter(&mld_timer_lock); 1660 ASSERT(mld_timeout_id != 0); 1661 mld_timer_fired_last = ddi_get_lbolt(); 1662 elapsed = mld_time_to_next; 1663 mld_time_to_next = 0; 1664 mutex_exit(&mld_timer_lock); 1665 1666 rw_enter(&ill_g_lock, RW_READER); 1667 ill = ILL_START_WALK_V6(&ctx); 1668 for (; ill != NULL; ill = ill_next(&ctx, ill)) { 1669 ASSERT(ill->ill_isv6); 1670 /* 1671 * We may not be able to refhold the ill if the ill/ipif 1672 * is changing. But we need to make sure that the ill will 1673 * not vanish. So we just bump up the ill_waiter count. 1674 */ 1675 if (!ill_waiter_inc(ill)) 1676 continue; 1677 rw_exit(&ill_g_lock); 1678 success = ipsq_enter(ill, B_TRUE); 1679 if (success) { 1680 next = mld_timeout_handler_per_ill(ill, elapsed); 1681 if (next < global_next) 1682 global_next = next; 1683 ipsq_exit(ill->ill_phyint->phyint_ipsq, B_TRUE, 1684 B_FALSE); 1685 } 1686 rw_enter(&ill_g_lock, RW_READER); 1687 ill_waiter_dcr(ill); 1688 } 1689 rw_exit(&ill_g_lock); 1690 1691 mutex_enter(&mld_timer_lock); 1692 ASSERT(mld_timeout_id != 0); 1693 mld_timeout_id = 0; 1694 mutex_exit(&mld_timer_lock); 1695 1696 if (global_next != INFINITY) 1697 mld_start_timers(global_next); 1698 } 1699 1700 /* 1701 * Calculate the Older Version Querier Present timeout value, in number 1702 * of slowtimo intervals, for the given ill. 1703 */ 1704 #define OVQP(ill) \ 1705 ((1000 * (((ill)->ill_mcast_rv * (ill)->ill_mcast_qi) \ 1706 + MCAST_QUERY_RESP_INTERVAL)) / MCAST_SLOWTIMO_INTERVAL) 1707 1708 /* 1709 * igmp_slowtimo: 1710 * - Resets to new router if we didnt we hear from the router 1711 * in IGMP_AGE_THRESHOLD seconds. 1712 * - Resets slowtimeout. 1713 */ 1714 /* ARGSUSED */ 1715 void 1716 igmp_slowtimo(void *arg) 1717 { 1718 ill_t *ill; 1719 ill_if_t *ifp; 1720 avl_tree_t *avl_tree; 1721 1722 /* Hold the ill_g_lock so that we can safely walk the ill list */ 1723 rw_enter(&ill_g_lock, RW_READER); 1724 1725 /* 1726 * The ill_if_t list is circular, hence the odd loop parameters. 1727 * 1728 * We can't use the ILL_START_WALK and ill_next() wrappers for this 1729 * walk, as we need to check the illif_mcast_* fields in the ill_if_t 1730 * structure (allowing us to skip if none of the instances have timers 1731 * running). 1732 */ 1733 for (ifp = IP_V4_ILL_G_LIST; ifp != (ill_if_t *)&IP_V4_ILL_G_LIST; 1734 ifp = ifp->illif_next) { 1735 /* 1736 * illif_mcast_v[12] are set using atomics. If an ill hears 1737 * a V1 or V2 query now and we miss seeing the count now, 1738 * we will see it the next time igmp_slowtimo is called. 1739 */ 1740 if (ifp->illif_mcast_v1 == 0 && ifp->illif_mcast_v2 == 0) 1741 continue; 1742 1743 avl_tree = &ifp->illif_avl_by_ppa; 1744 for (ill = avl_first(avl_tree); ill != NULL; 1745 ill = avl_walk(avl_tree, ill, AVL_AFTER)) { 1746 mutex_enter(&ill->ill_lock); 1747 if (ill->ill_mcast_v1_tset == 1) 1748 ill->ill_mcast_v1_time++; 1749 if (ill->ill_mcast_v2_tset == 1) 1750 ill->ill_mcast_v2_time++; 1751 if (ill->ill_mcast_type == IGMP_V1_ROUTER) { 1752 if (ill->ill_mcast_v1_time >= OVQP(ill)) { 1753 if (ill->ill_mcast_v2_tset > 0) { 1754 ip1dbg(("V1 query timer " 1755 "expired on %s; switching " 1756 "mode to IGMP_V2\n", 1757 ill->ill_name)); 1758 ill->ill_mcast_type = 1759 IGMP_V2_ROUTER; 1760 } else { 1761 ip1dbg(("V1 query timer " 1762 "expired on %s; switching " 1763 "mode to IGMP_V3\n", 1764 ill->ill_name)); 1765 ill->ill_mcast_type = 1766 IGMP_V3_ROUTER; 1767 } 1768 ill->ill_mcast_v1_time = 0; 1769 ill->ill_mcast_v1_tset = 0; 1770 atomic_add_16(&ifp->illif_mcast_v1, -1); 1771 } 1772 } 1773 if (ill->ill_mcast_type == IGMP_V2_ROUTER) { 1774 if (ill->ill_mcast_v2_time >= OVQP(ill)) { 1775 ip1dbg(("V2 query timer expired on " 1776 "%s; switching mode to IGMP_V3\n", 1777 ill->ill_name)); 1778 ill->ill_mcast_type = IGMP_V3_ROUTER; 1779 ill->ill_mcast_v2_time = 0; 1780 ill->ill_mcast_v2_tset = 0; 1781 atomic_add_16(&ifp->illif_mcast_v2, -1); 1782 } 1783 } 1784 mutex_exit(&ill->ill_lock); 1785 } 1786 1787 } 1788 rw_exit(&ill_g_lock); 1789 mutex_enter(&igmp_slowtimeout_lock); 1790 igmp_slowtimeout_id = timeout(igmp_slowtimo, NULL, 1791 MSEC_TO_TICK(MCAST_SLOWTIMO_INTERVAL)); 1792 mutex_exit(&igmp_slowtimeout_lock); 1793 } 1794 1795 /* 1796 * mld_slowtimo: 1797 * - Resets to newer version if we didn't hear from the older version router 1798 * in MLD_AGE_THRESHOLD seconds. 1799 * - Restarts slowtimeout. 1800 */ 1801 /* ARGSUSED */ 1802 void 1803 mld_slowtimo(void *arg) 1804 { 1805 ill_t *ill; 1806 ill_if_t *ifp; 1807 avl_tree_t *avl_tree; 1808 1809 /* See comments in igmp_slowtimo() above... */ 1810 rw_enter(&ill_g_lock, RW_READER); 1811 for (ifp = IP_V6_ILL_G_LIST; ifp != (ill_if_t *)&IP_V6_ILL_G_LIST; 1812 ifp = ifp->illif_next) { 1813 1814 if (ifp->illif_mcast_v1 == 0) 1815 continue; 1816 1817 avl_tree = &ifp->illif_avl_by_ppa; 1818 for (ill = avl_first(avl_tree); ill != NULL; 1819 ill = avl_walk(avl_tree, ill, AVL_AFTER)) { 1820 mutex_enter(&ill->ill_lock); 1821 if (ill->ill_mcast_v1_tset == 1) 1822 ill->ill_mcast_v1_time++; 1823 if (ill->ill_mcast_type == MLD_V1_ROUTER) { 1824 if (ill->ill_mcast_v1_time >= OVQP(ill)) { 1825 ip1dbg(("MLD query timer expired on" 1826 " %s; switching mode to MLD_V2\n", 1827 ill->ill_name)); 1828 ill->ill_mcast_type = MLD_V2_ROUTER; 1829 ill->ill_mcast_v1_time = 0; 1830 ill->ill_mcast_v1_tset = 0; 1831 atomic_add_16(&ifp->illif_mcast_v1, -1); 1832 } 1833 } 1834 mutex_exit(&ill->ill_lock); 1835 } 1836 } 1837 rw_exit(&ill_g_lock); 1838 mutex_enter(&mld_slowtimeout_lock); 1839 mld_slowtimeout_id = timeout(mld_slowtimo, NULL, 1840 MSEC_TO_TICK(MCAST_SLOWTIMO_INTERVAL)); 1841 mutex_exit(&mld_slowtimeout_lock); 1842 } 1843 1844 /* 1845 * igmp_sendpkt: 1846 * This will send to ip_wput like icmp_inbound. 1847 * Note that the lower ill (on which the membership is kept) is used 1848 * as an upper ill to pass in the multicast parameters. 1849 */ 1850 static void 1851 igmp_sendpkt(ilm_t *ilm, uchar_t type, ipaddr_t addr) 1852 { 1853 mblk_t *mp; 1854 igmpa_t *igmpa; 1855 uint8_t *rtralert; 1856 ipha_t *ipha; 1857 int hdrlen = sizeof (ipha_t) + RTRALERT_LEN; 1858 size_t size = hdrlen + sizeof (igmpa_t); 1859 ipif_t *ipif = ilm->ilm_ipif; 1860 ill_t *ill = ipif->ipif_ill; /* Will be the "lower" ill */ 1861 mblk_t *first_mp; 1862 ipsec_out_t *io; 1863 zoneid_t zoneid; 1864 1865 /* 1866 * We need to make sure this packet goes out on an ipif. If 1867 * there is some global policy match in ip_wput_ire, we need 1868 * to get to the right interface after IPSEC processing. 1869 * To make sure this multicast packet goes out on the right 1870 * interface, we attach an ipsec_out and initialize ill_index 1871 * like we did in ip_wput. To make sure that this packet does 1872 * not get forwarded on other interfaces or looped back, we 1873 * set ipsec_out_dontroute to B_TRUE and ipsec_out_multicast_loop 1874 * to B_FALSE. 1875 * 1876 * We also need to make sure that this does not get load balanced 1877 * if it hits ip_newroute_ipif. So, we initialize ipsec_out_attach_if 1878 * here. If it gets load balanced, switches supporting igmp snooping 1879 * will send the packet that it receives for this multicast group 1880 * to the interface that we are sending on. As we have joined the 1881 * multicast group on this ill, by sending the packet out on this 1882 * ill, we receive all the packets back on this ill. 1883 */ 1884 first_mp = allocb(sizeof (ipsec_info_t), BPRI_HI); 1885 if (first_mp == NULL) 1886 return; 1887 1888 first_mp->b_datap->db_type = M_CTL; 1889 first_mp->b_wptr += sizeof (ipsec_info_t); 1890 bzero(first_mp->b_rptr, sizeof (ipsec_info_t)); 1891 /* ipsec_out_secure is B_FALSE now */ 1892 io = (ipsec_out_t *)first_mp->b_rptr; 1893 io->ipsec_out_type = IPSEC_OUT; 1894 io->ipsec_out_len = sizeof (ipsec_out_t); 1895 io->ipsec_out_use_global_policy = B_TRUE; 1896 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 1897 io->ipsec_out_attach_if = B_TRUE; 1898 io->ipsec_out_multicast_loop = B_FALSE; 1899 io->ipsec_out_dontroute = B_TRUE; 1900 if ((zoneid = ilm->ilm_zoneid) == ALL_ZONES) 1901 zoneid = GLOBAL_ZONEID; 1902 io->ipsec_out_zoneid = zoneid; 1903 1904 mp = allocb(size, BPRI_HI); 1905 if (mp == NULL) { 1906 freemsg(first_mp); 1907 return; 1908 } 1909 mp->b_wptr = mp->b_rptr + size; 1910 first_mp->b_cont = mp; 1911 1912 ipha = (ipha_t *)mp->b_rptr; 1913 rtralert = (uint8_t *)&(ipha[1]); 1914 igmpa = (igmpa_t *)&(rtralert[RTRALERT_LEN]); 1915 igmpa->igmpa_type = type; 1916 igmpa->igmpa_code = 0; 1917 igmpa->igmpa_group = ilm->ilm_addr; 1918 igmpa->igmpa_cksum = 0; 1919 igmpa->igmpa_cksum = IP_CSUM(mp, hdrlen, 0); 1920 if (igmpa->igmpa_cksum == 0) 1921 igmpa->igmpa_cksum = 0xffff; 1922 1923 rtralert[0] = IPOPT_COPY | IPOPT_RTRALERT; 1924 rtralert[1] = RTRALERT_LEN; 1925 rtralert[2] = 0; 1926 rtralert[3] = 0; 1927 1928 ipha->ipha_version_and_hdr_length = (IP_VERSION << 4) 1929 | (IP_SIMPLE_HDR_LENGTH_IN_WORDS + RTRALERT_LEN_IN_WORDS); 1930 ipha->ipha_type_of_service = 0; 1931 ipha->ipha_length = htons(size); 1932 ipha->ipha_ident = 0; 1933 ipha->ipha_fragment_offset_and_flags = 0; 1934 ipha->ipha_ttl = IGMP_TTL; 1935 ipha->ipha_protocol = IPPROTO_IGMP; 1936 ipha->ipha_hdr_checksum = 0; 1937 ipha->ipha_dst = addr ? addr : igmpa->igmpa_group; 1938 ipha->ipha_src = ipif->ipif_src_addr; 1939 /* 1940 * Request loopback of the report if we are acting as a multicast 1941 * router, so that the process-level routing demon can hear it. 1942 */ 1943 /* 1944 * This will run multiple times for the same group if there are members 1945 * on the same group for multiple ipif's on the same ill. The 1946 * igmp_input code will suppress this due to the loopback thus we 1947 * always loopback membership report. 1948 */ 1949 ASSERT(ill->ill_rq != NULL); 1950 ip_multicast_loopback(ill->ill_rq, ill, first_mp, 0, ilm->ilm_zoneid); 1951 1952 ip_wput_multicast(ill->ill_wq, first_mp, ipif, zoneid); 1953 1954 ++igmpstat.igps_snd_reports; 1955 } 1956 1957 /* 1958 * Sends an IGMP_V3_MEMBERSHIP_REPORT message out the ill associated 1959 * with the passed-in ipif. The report will contain one group record 1960 * for each element of reclist. If this causes packet length to 1961 * exceed ipif->ipif_ill->ill_max_frag, multiple reports are sent. 1962 * reclist is assumed to be made up of buffers allocated by mcast_bldmrec(), 1963 * and those buffers are freed here. 1964 */ 1965 static void 1966 igmpv3_sendrpt(ipif_t *ipif, mrec_t *reclist) 1967 { 1968 ipsec_out_t *io; 1969 igmp3ra_t *igmp3ra; 1970 grphdra_t *grphdr; 1971 mblk_t *first_mp, *mp; 1972 ipha_t *ipha; 1973 uint8_t *rtralert; 1974 ipaddr_t *src_array; 1975 int i, j, numrec, more_src_cnt; 1976 size_t hdrsize, size, rsize; 1977 ill_t *ill = ipif->ipif_ill; 1978 mrec_t *rp, *cur_reclist; 1979 mrec_t *next_reclist = reclist; 1980 boolean_t morepkts; 1981 zoneid_t zoneid; 1982 1983 /* if there aren't any records, there's nothing to send */ 1984 if (reclist == NULL) 1985 return; 1986 1987 hdrsize = sizeof (ipha_t) + RTRALERT_LEN; 1988 nextpkt: 1989 size = hdrsize + sizeof (igmp3ra_t); 1990 morepkts = B_FALSE; 1991 more_src_cnt = 0; 1992 cur_reclist = next_reclist; 1993 numrec = 0; 1994 for (rp = cur_reclist; rp != NULL; rp = rp->mrec_next) { 1995 rsize = sizeof (grphdra_t) + 1996 (rp->mrec_srcs.sl_numsrc * sizeof (ipaddr_t)); 1997 if (size + rsize > ill->ill_max_frag) { 1998 if (rp == cur_reclist) { 1999 /* 2000 * If the first mrec we looked at is too big 2001 * to fit in a single packet (i.e the source 2002 * list is too big), we must either truncate 2003 * the list (if TO_EX or IS_EX), or send 2004 * multiple reports for the same group (all 2005 * other types). 2006 */ 2007 int srcspace, srcsperpkt; 2008 srcspace = ill->ill_max_frag - (size + 2009 sizeof (grphdra_t)); 2010 srcsperpkt = srcspace / sizeof (ipaddr_t); 2011 /* 2012 * Increment size and numrec, because we will 2013 * be sending a record for the mrec we're 2014 * looking at now. 2015 */ 2016 size += sizeof (grphdra_t) + 2017 (srcsperpkt * sizeof (ipaddr_t)); 2018 numrec++; 2019 if (rp->mrec_type == MODE_IS_EXCLUDE || 2020 rp->mrec_type == CHANGE_TO_EXCLUDE) { 2021 rp->mrec_srcs.sl_numsrc = srcsperpkt; 2022 if (rp->mrec_next == NULL) { 2023 /* no more packets to send */ 2024 break; 2025 } else { 2026 /* 2027 * more packets, but we're 2028 * done with this mrec. 2029 */ 2030 next_reclist = rp->mrec_next; 2031 } 2032 } else { 2033 more_src_cnt = rp->mrec_srcs.sl_numsrc 2034 - srcsperpkt; 2035 rp->mrec_srcs.sl_numsrc = srcsperpkt; 2036 /* 2037 * We'll fix up this mrec (remove the 2038 * srcs we've already sent) before 2039 * returning to nextpkt above. 2040 */ 2041 next_reclist = rp; 2042 } 2043 } else { 2044 next_reclist = rp; 2045 } 2046 morepkts = B_TRUE; 2047 break; 2048 } 2049 size += rsize; 2050 numrec++; 2051 } 2052 2053 /* 2054 * See comments in igmp_sendpkt() about initializing for ipsec and 2055 * load balancing requirements. 2056 */ 2057 first_mp = allocb(sizeof (ipsec_info_t), BPRI_HI); 2058 if (first_mp == NULL) 2059 goto free_reclist; 2060 2061 first_mp->b_datap->db_type = M_CTL; 2062 first_mp->b_wptr += sizeof (ipsec_info_t); 2063 bzero(first_mp->b_rptr, sizeof (ipsec_info_t)); 2064 /* ipsec_out_secure is B_FALSE now */ 2065 io = (ipsec_out_t *)first_mp->b_rptr; 2066 io->ipsec_out_type = IPSEC_OUT; 2067 io->ipsec_out_len = sizeof (ipsec_out_t); 2068 io->ipsec_out_use_global_policy = B_TRUE; 2069 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 2070 io->ipsec_out_attach_if = B_TRUE; 2071 io->ipsec_out_multicast_loop = B_FALSE; 2072 io->ipsec_out_dontroute = B_TRUE; 2073 if ((zoneid = ipif->ipif_zoneid) == ALL_ZONES) 2074 zoneid = GLOBAL_ZONEID; 2075 io->ipsec_out_zoneid = zoneid; 2076 2077 mp = allocb(size, BPRI_HI); 2078 if (mp == NULL) { 2079 freemsg(first_mp); 2080 goto free_reclist; 2081 } 2082 bzero((char *)mp->b_rptr, size); 2083 mp->b_wptr = (uchar_t *)(mp->b_rptr + size); 2084 first_mp->b_cont = mp; 2085 2086 ipha = (ipha_t *)mp->b_rptr; 2087 rtralert = (uint8_t *)&(ipha[1]); 2088 igmp3ra = (igmp3ra_t *)&(rtralert[RTRALERT_LEN]); 2089 grphdr = (grphdra_t *)&(igmp3ra[1]); 2090 2091 rp = cur_reclist; 2092 for (i = 0; i < numrec; i++) { 2093 grphdr->grphdra_type = rp->mrec_type; 2094 grphdr->grphdra_numsrc = htons(rp->mrec_srcs.sl_numsrc); 2095 grphdr->grphdra_group = V4_PART_OF_V6(rp->mrec_group); 2096 src_array = (ipaddr_t *)&(grphdr[1]); 2097 2098 for (j = 0; j < rp->mrec_srcs.sl_numsrc; j++) 2099 src_array[j] = V4_PART_OF_V6(rp->mrec_srcs.sl_addr[j]); 2100 2101 grphdr = (grphdra_t *)&(src_array[j]); 2102 rp = rp->mrec_next; 2103 } 2104 2105 igmp3ra->igmp3ra_type = IGMP_V3_MEMBERSHIP_REPORT; 2106 igmp3ra->igmp3ra_numrec = htons(numrec); 2107 igmp3ra->igmp3ra_cksum = IP_CSUM(mp, hdrsize, 0); 2108 2109 rtralert[0] = IPOPT_COPY | IPOPT_RTRALERT; 2110 rtralert[1] = RTRALERT_LEN; 2111 rtralert[2] = 0; 2112 rtralert[3] = 0; 2113 2114 ipha->ipha_version_and_hdr_length = IP_VERSION << 4 2115 | (IP_SIMPLE_HDR_LENGTH_IN_WORDS + RTRALERT_LEN_IN_WORDS); 2116 ipha->ipha_type_of_service = IPTOS_PREC_INTERNETCONTROL; 2117 ipha->ipha_length = htons(size); 2118 ipha->ipha_ttl = IGMP_TTL; 2119 ipha->ipha_protocol = IPPROTO_IGMP; 2120 ipha->ipha_dst = htonl(INADDR_ALLRPTS_GROUP); 2121 ipha->ipha_src = ipif->ipif_src_addr; 2122 2123 /* 2124 * Request loopback of the report if we are acting as a multicast 2125 * router, so that the process-level routing daemon can hear it. 2126 * 2127 * This will run multiple times for the same group if there are 2128 * members on the same group for multiple ipifs on the same ill. 2129 * The igmp_input code will suppress this due to the loopback; 2130 * thus we always loopback membership report. 2131 */ 2132 ASSERT(ill->ill_rq != NULL); 2133 ip_multicast_loopback(ill->ill_rq, ill, mp, 0, ipif->ipif_zoneid); 2134 2135 ip_wput_multicast(ill->ill_wq, first_mp, ipif, zoneid); 2136 2137 ++igmpstat.igps_snd_reports; 2138 2139 if (morepkts) { 2140 if (more_src_cnt > 0) { 2141 int index, mvsize; 2142 slist_t *sl = &next_reclist->mrec_srcs; 2143 index = sl->sl_numsrc; 2144 mvsize = more_src_cnt * sizeof (in6_addr_t); 2145 (void) memmove(&sl->sl_addr[0], &sl->sl_addr[index], 2146 mvsize); 2147 sl->sl_numsrc = more_src_cnt; 2148 } 2149 goto nextpkt; 2150 } 2151 2152 free_reclist: 2153 while (reclist != NULL) { 2154 rp = reclist->mrec_next; 2155 mi_free(reclist); 2156 reclist = rp; 2157 } 2158 } 2159 2160 /* 2161 * mld_input: 2162 */ 2163 /* ARGSUSED */ 2164 void 2165 mld_input(queue_t *q, mblk_t *mp, ill_t *ill) 2166 { 2167 ip6_t *ip6h = (ip6_t *)(mp->b_rptr); 2168 mld_hdr_t *mldh; 2169 ilm_t *ilm; 2170 ipif_t *ipif; 2171 uint16_t hdr_length, exthdr_length; 2172 in6_addr_t *v6group_ptr, *lcladdr_ptr; 2173 uint_t next; 2174 int mldlen; 2175 2176 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembTotal); 2177 2178 /* Make sure the src address of the packet is link-local */ 2179 if (!(IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src))) { 2180 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 2181 freemsg(mp); 2182 return; 2183 } 2184 2185 if (ip6h->ip6_hlim != 1) { 2186 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpBadHoplimit); 2187 freemsg(mp); 2188 return; 2189 } 2190 2191 /* Get to the icmp header part */ 2192 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) { 2193 hdr_length = ip_hdr_length_v6(mp, ip6h); 2194 exthdr_length = hdr_length - IPV6_HDR_LEN; 2195 } else { 2196 hdr_length = IPV6_HDR_LEN; 2197 exthdr_length = 0; 2198 } 2199 mldlen = ntohs(ip6h->ip6_plen) - exthdr_length; 2200 2201 /* An MLD packet must at least be 24 octets to be valid */ 2202 if (mldlen < MLD_MINLEN) { 2203 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 2204 freemsg(mp); 2205 return; 2206 } 2207 2208 mldh = (mld_hdr_t *)(&mp->b_rptr[hdr_length]); 2209 2210 switch (mldh->mld_type) { 2211 case MLD_LISTENER_QUERY: 2212 /* 2213 * packet length differentiates between v1 and v2. v1 2214 * query should be exactly 24 octets long; v2 is >= 28. 2215 */ 2216 if (mldlen == MLD_MINLEN) { 2217 next = mld_query_in(mldh, ill); 2218 } else if (mldlen >= MLD_V2_QUERY_MINLEN) { 2219 next = mldv2_query_in((mld2q_t *)mldh, ill, mldlen); 2220 } else { 2221 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 2222 freemsg(mp); 2223 return; 2224 } 2225 if (next == 0) { 2226 freemsg(mp); 2227 return; 2228 } 2229 2230 if (next != INFINITY) 2231 mld_start_timers(next); 2232 break; 2233 2234 case MLD_LISTENER_REPORT: { 2235 2236 ASSERT(ill->ill_ipif != NULL); 2237 /* 2238 * For fast leave to work, we have to know that we are the 2239 * last person to send a report for this group. Reports 2240 * generated by us are looped back since we could potentially 2241 * be a multicast router, so discard reports sourced by me. 2242 */ 2243 lcladdr_ptr = &(ill->ill_ipif->ipif_v6subnet); 2244 mutex_enter(&ill->ill_lock); 2245 for (ipif = ill->ill_ipif; ipif != NULL; 2246 ipif = ipif->ipif_next) { 2247 if (IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, 2248 lcladdr_ptr)) { 2249 if (ip_debug > 1) { 2250 char buf1[INET6_ADDRSTRLEN]; 2251 char buf2[INET6_ADDRSTRLEN]; 2252 2253 (void) mi_strlog(ill->ill_rq, 2254 1, 2255 SL_TRACE, 2256 "mld_input: we are only " 2257 "member src %s ipif_local %s", 2258 inet_ntop(AF_INET6, lcladdr_ptr, 2259 buf1, sizeof (buf1)), 2260 inet_ntop(AF_INET6, 2261 &ipif->ipif_v6lcl_addr, 2262 buf2, sizeof (buf2))); 2263 } 2264 mutex_exit(&ill->ill_lock); 2265 freemsg(mp); 2266 return; 2267 } 2268 } 2269 mutex_exit(&ill->ill_lock); 2270 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembResponses); 2271 2272 v6group_ptr = &mldh->mld_addr; 2273 if (!IN6_IS_ADDR_MULTICAST(v6group_ptr)) { 2274 BUMP_MIB(ill->ill_icmp6_mib, 2275 ipv6IfIcmpInGroupMembBadReports); 2276 freemsg(mp); 2277 return; 2278 } 2279 2280 2281 /* 2282 * If we belong to the group being reported, and we are a 2283 * 'Delaying member' per the RFC terminology, stop our timer 2284 * for that group and 'clear flag' i.e. mark ilm_state as 2285 * IGMP_OTHERMEMBER. With zones, there can be multiple group 2286 * membership entries for the same group address (one per zone) 2287 * so we need to walk the ill_ilm list. 2288 */ 2289 mutex_enter(&ill->ill_lock); 2290 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 2291 if (!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group_ptr)) 2292 continue; 2293 BUMP_MIB(ill->ill_icmp6_mib, 2294 ipv6IfIcmpInGroupMembOurReports); 2295 2296 ilm->ilm_timer = INFINITY; 2297 ilm->ilm_state = IGMP_OTHERMEMBER; 2298 } 2299 mutex_exit(&ill->ill_lock); 2300 break; 2301 } 2302 case MLD_LISTENER_REDUCTION: 2303 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembReductions); 2304 break; 2305 } 2306 /* 2307 * All MLD packets have already been passed up to any 2308 * process(es) listening on a ICMP6 raw socket. This 2309 * has been accomplished in ip_deliver_local_v6 prior to 2310 * this function call. It is assumed that the multicast daemon 2311 * will have a SOCK_RAW IPPROTO_ICMPV6 (and presumbly use the 2312 * ICMP6_FILTER socket option to only receive the MLD messages) 2313 * Thus we can free the MLD message block here 2314 */ 2315 freemsg(mp); 2316 } 2317 2318 /* 2319 * Handles an MLDv1 Listener Query. Returns 0 on error, or the appropriate 2320 * (non-zero, unsigned) timer value to be set on success. 2321 */ 2322 static uint_t 2323 mld_query_in(mld_hdr_t *mldh, ill_t *ill) 2324 { 2325 ilm_t *ilm; 2326 int timer; 2327 uint_t next; 2328 in6_addr_t *v6group; 2329 2330 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembQueries); 2331 2332 /* 2333 * In the MLD specification, there are 3 states and a flag. 2334 * 2335 * In Non-Listener state, we simply don't have a membership record. 2336 * In Delaying state, our timer is running (ilm->ilm_timer < INFINITY) 2337 * In Idle Member state, our timer is not running (ilm->ilm_timer == 2338 * INFINITY) 2339 * 2340 * The flag is ilm->ilm_state, it is set to IGMP_OTHERMEMBER if 2341 * we have heard a report from another member, or IGMP_IREPORTEDLAST 2342 * if I sent the last report. 2343 */ 2344 v6group = &mldh->mld_addr; 2345 if (!(IN6_IS_ADDR_UNSPECIFIED(v6group)) && 2346 ((!IN6_IS_ADDR_MULTICAST(v6group)))) { 2347 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembBadQueries); 2348 return (0); 2349 } 2350 2351 /* Need to do compatibility mode checking */ 2352 mutex_enter(&ill->ill_lock); 2353 ill->ill_mcast_v1_time = 0; 2354 ill->ill_mcast_v1_tset = 1; 2355 if (ill->ill_mcast_type == MLD_V2_ROUTER) { 2356 ip1dbg(("Received MLDv1 Query on %s, switching mode to " 2357 "MLD_V1_ROUTER\n", ill->ill_name)); 2358 atomic_add_16(&ill->ill_ifptr->illif_mcast_v1, 1); 2359 ill->ill_mcast_type = MLD_V1_ROUTER; 2360 } 2361 mutex_exit(&ill->ill_lock); 2362 2363 timer = (int)ntohs(mldh->mld_maxdelay); 2364 if (ip_debug > 1) { 2365 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 2366 "mld_input: TIMER = mld_maxdelay %d mld_type 0x%x", 2367 timer, (int)mldh->mld_type); 2368 } 2369 2370 /* 2371 * -Start the timers in all of our membership records for 2372 * the physical interface on which the query arrived, 2373 * excl: 2374 * 1. those that belong to the "all hosts" group, 2375 * 2. those with 0 scope, or 1 node-local scope. 2376 * 2377 * -Restart any timer that is already running but has a value 2378 * longer that the requested timeout. 2379 * -Use the value specified in the query message as the 2380 * maximum timeout. 2381 */ 2382 next = INFINITY; 2383 mutex_enter(&ill->ill_lock); 2384 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 2385 ASSERT(!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr)); 2386 2387 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr) || 2388 IN6_IS_ADDR_MC_NODELOCAL(&ilm->ilm_v6addr) || 2389 IN6_IS_ADDR_MC_RESERVED(&ilm->ilm_v6addr)) 2390 continue; 2391 if ((!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, 2392 &ipv6_all_hosts_mcast)) && 2393 (IN6_IS_ADDR_UNSPECIFIED(v6group)) || 2394 (IN6_ARE_ADDR_EQUAL(v6group, &ilm->ilm_v6addr))) { 2395 if (timer == 0) { 2396 /* Respond immediately */ 2397 ilm->ilm_timer = INFINITY; 2398 ilm->ilm_state = IGMP_IREPORTEDLAST; 2399 mutex_exit(&ill->ill_lock); 2400 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL); 2401 mutex_enter(&ill->ill_lock); 2402 break; 2403 } 2404 if (ilm->ilm_timer > timer) { 2405 MCAST_RANDOM_DELAY(ilm->ilm_timer, timer); 2406 if (ilm->ilm_timer < next) 2407 next = ilm->ilm_timer; 2408 } 2409 break; 2410 } 2411 } 2412 mutex_exit(&ill->ill_lock); 2413 2414 return (next); 2415 } 2416 2417 /* 2418 * Handles an MLDv2 Listener Query. On error, returns 0; on success, 2419 * returns the appropriate (non-zero, unsigned) timer value (which may 2420 * be INFINITY) to be set. 2421 */ 2422 static uint_t 2423 mldv2_query_in(mld2q_t *mld2q, ill_t *ill, int mldlen) 2424 { 2425 ilm_t *ilm; 2426 in6_addr_t *v6group, *src_array; 2427 uint_t next, numsrc, i, mrd, delay, qqi; 2428 uint8_t qrv; 2429 2430 v6group = &mld2q->mld2q_addr; 2431 numsrc = ntohs(mld2q->mld2q_numsrc); 2432 2433 /* make sure numsrc matches packet size */ 2434 if (mldlen < MLD_V2_QUERY_MINLEN + (numsrc * sizeof (in6_addr_t))) { 2435 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 2436 return (0); 2437 } 2438 src_array = (in6_addr_t *)&mld2q[1]; 2439 2440 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembQueries); 2441 2442 /* extract Maximum Response Delay from code in header */ 2443 mrd = ntohs(mld2q->mld2q_mxrc); 2444 if (mrd >= MLD_V2_MAXRT_FPMIN) { 2445 uint_t hdrval, mant, exp; 2446 hdrval = mrd; 2447 mant = hdrval & MLD_V2_MAXRT_MANT_MASK; 2448 exp = (hdrval & MLD_V2_MAXRT_EXP_MASK) >> 12; 2449 mrd = (mant | 0x1000) << (exp + 3); 2450 } 2451 MCAST_RANDOM_DELAY(delay, mrd); 2452 next = (unsigned)INFINITY; 2453 2454 if ((qrv = mld2q->mld2q_sqrv & MLD_V2_RV_MASK) == 0) 2455 ill->ill_mcast_rv = MCAST_DEF_ROBUSTNESS; 2456 else 2457 ill->ill_mcast_rv = qrv; 2458 2459 if ((qqi = (uint_t)mld2q->mld2q_qqic) >= MLD_V2_QQI_FPMIN) { 2460 uint_t mant, exp; 2461 mant = qqi & MLD_V2_QQI_MANT_MASK; 2462 exp = (qqi & MLD_V2_QQI_EXP_MASK) >> 12; 2463 qqi = (mant | 0x10) << (exp + 3); 2464 } 2465 ill->ill_mcast_qi = (qqi == 0) ? MCAST_DEF_QUERY_INTERVAL : qqi; 2466 2467 /* 2468 * If we have a pending general query response that's scheduled 2469 * sooner than the delay we calculated for this response, then 2470 * no action is required (MLDv2 draft section 6.2 rule 1) 2471 */ 2472 mutex_enter(&ill->ill_lock); 2473 if (ill->ill_global_timer < delay) { 2474 mutex_exit(&ill->ill_lock); 2475 return (next); 2476 } 2477 mutex_exit(&ill->ill_lock); 2478 2479 /* 2480 * Now take action depending on query type: general, 2481 * group specific, or group/source specific. 2482 */ 2483 if ((numsrc == 0) && IN6_IS_ADDR_UNSPECIFIED(v6group)) { 2484 /* 2485 * general query 2486 * We know global timer is either not running or is 2487 * greater than our calculated delay, so reset it to 2488 * our delay (random value in range [0, response time]) 2489 */ 2490 mutex_enter(&ill->ill_lock); 2491 ill->ill_global_timer = delay; 2492 next = ill->ill_global_timer; 2493 mutex_exit(&ill->ill_lock); 2494 2495 } else { 2496 /* group or group/source specific query */ 2497 mutex_enter(&ill->ill_lock); 2498 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 2499 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr) || 2500 IN6_IS_ADDR_MC_NODELOCAL(&ilm->ilm_v6addr) || 2501 IN6_IS_ADDR_MC_RESERVED(&ilm->ilm_v6addr) || 2502 !IN6_ARE_ADDR_EQUAL(v6group, &ilm->ilm_v6addr)) 2503 continue; 2504 2505 /* 2506 * If the query is group specific or we have a 2507 * pending group specific query, the response is 2508 * group specific (pending sources list should be 2509 * empty). Otherwise, need to update the pending 2510 * sources list for the group and source specific 2511 * response. 2512 */ 2513 if (numsrc == 0 || (ilm->ilm_timer < INFINITY && 2514 SLIST_IS_EMPTY(ilm->ilm_pendsrcs))) { 2515 group_query: 2516 FREE_SLIST(ilm->ilm_pendsrcs); 2517 ilm->ilm_pendsrcs = NULL; 2518 } else { 2519 boolean_t overflow; 2520 slist_t *pktl; 2521 if (numsrc > MAX_FILTER_SIZE || 2522 (ilm->ilm_pendsrcs == NULL && 2523 (ilm->ilm_pendsrcs = l_alloc()) == NULL)) { 2524 /* 2525 * We've been sent more sources than 2526 * we can deal with; or we can't deal 2527 * with a source list at all. Revert 2528 * to a group specific query. 2529 */ 2530 goto group_query; 2531 } 2532 if ((pktl = l_alloc()) == NULL) 2533 goto group_query; 2534 pktl->sl_numsrc = numsrc; 2535 for (i = 0; i < numsrc; i++) 2536 pktl->sl_addr[i] = src_array[i]; 2537 l_union_in_a(ilm->ilm_pendsrcs, pktl, 2538 &overflow); 2539 l_free(pktl); 2540 if (overflow) 2541 goto group_query; 2542 } 2543 /* set timer to soonest value */ 2544 ilm->ilm_timer = MIN(ilm->ilm_timer, delay); 2545 if (ilm->ilm_timer < next) 2546 next = ilm->ilm_timer; 2547 break; 2548 } 2549 mutex_exit(&ill->ill_lock); 2550 } 2551 2552 return (next); 2553 } 2554 2555 /* 2556 * Send MLDv1 response packet with hoplimit 1 2557 */ 2558 static void 2559 mld_sendpkt(ilm_t *ilm, uchar_t type, const in6_addr_t *v6addr) 2560 { 2561 mblk_t *mp; 2562 mld_hdr_t *mldh; 2563 ip6_t *ip6h; 2564 ip6_hbh_t *ip6hbh; 2565 struct ip6_opt_router *ip6router; 2566 size_t size = IPV6_HDR_LEN + sizeof (mld_hdr_t); 2567 ill_t *ill = ilm->ilm_ill; /* Will be the "lower" ill */ 2568 ipif_t *ipif; 2569 ip6i_t *ip6i; 2570 2571 /* 2572 * We need to place a router alert option in this packet. The length 2573 * of the options must be a multiple of 8. The hbh option header is 2 2574 * bytes followed by the 4 byte router alert option. That leaves 2575 * 2 bytes of pad for a total of 8 bytes. 2576 */ 2577 const int router_alert_length = 8; 2578 2579 ASSERT(ill->ill_isv6); 2580 2581 /* 2582 * We need to make sure that this packet does not get load balanced. 2583 * So, we allocate an ip6i_t and set ATTACH_IF. ip_wput_v6 and 2584 * ip_newroute_ipif_v6 knows how to handle such packets. 2585 * If it gets load balanced, switches supporting MLD snooping 2586 * (in the future) will send the packet that it receives for this 2587 * multicast group to the interface that we are sending on. As we have 2588 * joined the multicast group on this ill, by sending the packet out 2589 * on this ill, we receive all the packets back on this ill. 2590 */ 2591 size += sizeof (ip6i_t) + router_alert_length; 2592 mp = allocb(size, BPRI_HI); 2593 if (mp == NULL) 2594 return; 2595 bzero(mp->b_rptr, size); 2596 mp->b_wptr = mp->b_rptr + size; 2597 2598 ip6i = (ip6i_t *)mp->b_rptr; 2599 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2600 ip6i->ip6i_nxt = IPPROTO_RAW; 2601 ip6i->ip6i_flags = IP6I_ATTACH_IF | IP6I_HOPLIMIT; 2602 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 2603 2604 ip6h = (ip6_t *)&ip6i[1]; 2605 ip6hbh = (struct ip6_hbh *)&ip6h[1]; 2606 ip6router = (struct ip6_opt_router *)&ip6hbh[1]; 2607 /* 2608 * A zero is a pad option of length 1. The bzero of the whole packet 2609 * above will pad between ip6router and mld. 2610 */ 2611 mldh = (mld_hdr_t *)((uint8_t *)ip6hbh + router_alert_length); 2612 2613 mldh->mld_type = type; 2614 mldh->mld_addr = ilm->ilm_v6addr; 2615 2616 ip6router->ip6or_type = IP6OPT_ROUTER_ALERT; 2617 ip6router->ip6or_len = 2; 2618 ip6router->ip6or_value[0] = 0; 2619 ip6router->ip6or_value[1] = IP6_ALERT_MLD; 2620 2621 ip6hbh->ip6h_nxt = IPPROTO_ICMPV6; 2622 ip6hbh->ip6h_len = 0; 2623 2624 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2625 ip6h->ip6_plen = htons(sizeof (*mldh) + router_alert_length); 2626 ip6h->ip6_nxt = IPPROTO_HOPOPTS; 2627 ip6h->ip6_hops = MLD_HOP_LIMIT; 2628 if (v6addr == NULL) 2629 ip6h->ip6_dst = ilm->ilm_v6addr; 2630 else 2631 ip6h->ip6_dst = *v6addr; 2632 2633 /* ipif returned by ipif_lookup_zoneid is link-local (if present) */ 2634 if (ipif_lookup_zoneid(ill, ilm->ilm_zoneid, IPIF_UP, &ipif)) { 2635 ip6h->ip6_src = ipif->ipif_v6src_addr; 2636 ipif_refrele(ipif); 2637 } else { 2638 /* Otherwise, use IPv6 default address selection. */ 2639 ip6h->ip6_src = ipv6_all_zeros; 2640 } 2641 2642 /* 2643 * Prepare for checksum by putting icmp length in the icmp 2644 * checksum field. The checksum is calculated in ip_wput_v6. 2645 */ 2646 mldh->mld_cksum = htons(sizeof (*mldh)); 2647 2648 /* 2649 * ip_wput will automatically loopback the multicast packet to 2650 * the conn if multicast loopback is enabled. 2651 * The MIB stats corresponding to this outgoing MLD packet 2652 * will be accounted for in ip_wput->ip_wput_v6->ip_wput_ire_v6 2653 * ->icmp_update_out_mib_v6 function call. 2654 */ 2655 (void) ip_output_v6(NULL, mp, ill->ill_wq, IP_WPUT); 2656 } 2657 2658 /* 2659 * Sends an MLD_V2_LISTENER_REPORT message out the passed-in ill. The 2660 * report will contain one multicast address record for each element of 2661 * reclist. If this causes packet length to exceed ill->ill_max_frag, 2662 * multiple reports are sent. reclist is assumed to be made up of 2663 * buffers allocated by mcast_bldmrec(), and those buffers are freed here. 2664 */ 2665 static void 2666 mldv2_sendrpt(ill_t *ill, mrec_t *reclist) 2667 { 2668 mblk_t *mp; 2669 mld2r_t *mld2r; 2670 mld2mar_t *mld2mar; 2671 in6_addr_t *srcarray; 2672 ip6_t *ip6h; 2673 ip6_hbh_t *ip6hbh; 2674 ip6i_t *ip6i; 2675 struct ip6_opt_router *ip6router; 2676 size_t size, optlen, padlen, icmpsize, rsize; 2677 ipif_t *ipif; 2678 int i, numrec, more_src_cnt; 2679 mrec_t *rp, *cur_reclist; 2680 mrec_t *next_reclist = reclist; 2681 boolean_t morepkts; 2682 2683 /* If there aren't any records, there's nothing to send */ 2684 if (reclist == NULL) 2685 return; 2686 2687 ASSERT(ill->ill_isv6); 2688 2689 /* 2690 * Total option length (optlen + padlen) must be a multiple of 2691 * 8 bytes. We assume here that optlen <= 8, so the total option 2692 * length will be 8. Assert this in case anything ever changes. 2693 */ 2694 optlen = sizeof (ip6_hbh_t) + sizeof (struct ip6_opt_router); 2695 ASSERT(optlen <= 8); 2696 padlen = 8 - optlen; 2697 nextpkt: 2698 icmpsize = sizeof (mld2r_t); 2699 size = IPV6_HDR_LEN + optlen + padlen + icmpsize; 2700 morepkts = B_FALSE; 2701 more_src_cnt = 0; 2702 for (rp = cur_reclist = next_reclist, numrec = 0; rp != NULL; 2703 rp = rp->mrec_next, numrec++) { 2704 rsize = sizeof (mld2mar_t) + 2705 (rp->mrec_srcs.sl_numsrc * sizeof (in6_addr_t)); 2706 if (size + rsize > ill->ill_max_frag) { 2707 if (rp == cur_reclist) { 2708 /* 2709 * If the first mrec we looked at is too big 2710 * to fit in a single packet (i.e the source 2711 * list is too big), we must either truncate 2712 * the list (if TO_EX or IS_EX), or send 2713 * multiple reports for the same group (all 2714 * other types). 2715 */ 2716 int srcspace, srcsperpkt; 2717 srcspace = ill->ill_max_frag - 2718 (size + sizeof (mld2mar_t)); 2719 srcsperpkt = srcspace / sizeof (in6_addr_t); 2720 /* 2721 * Increment icmpsize and size, because we will 2722 * be sending a record for the mrec we're 2723 * looking at now. 2724 */ 2725 rsize = sizeof (mld2mar_t) + 2726 (srcsperpkt * sizeof (in6_addr_t)); 2727 icmpsize += rsize; 2728 size += rsize; 2729 if (rp->mrec_type == MODE_IS_EXCLUDE || 2730 rp->mrec_type == CHANGE_TO_EXCLUDE) { 2731 rp->mrec_srcs.sl_numsrc = srcsperpkt; 2732 if (rp->mrec_next == NULL) { 2733 /* no more packets to send */ 2734 break; 2735 } else { 2736 /* 2737 * more packets, but we're 2738 * done with this mrec. 2739 */ 2740 next_reclist = rp->mrec_next; 2741 } 2742 } else { 2743 more_src_cnt = rp->mrec_srcs.sl_numsrc 2744 - srcsperpkt; 2745 rp->mrec_srcs.sl_numsrc = srcsperpkt; 2746 /* 2747 * We'll fix up this mrec (remove the 2748 * srcs we've already sent) before 2749 * returning to nextpkt above. 2750 */ 2751 next_reclist = rp; 2752 } 2753 } else { 2754 next_reclist = rp; 2755 } 2756 morepkts = B_TRUE; 2757 break; 2758 } 2759 icmpsize += rsize; 2760 size += rsize; 2761 } 2762 2763 /* 2764 * We need to make sure that this packet does not get load balanced. 2765 * So, we allocate an ip6i_t and set ATTACH_IF. ip_wput_v6 and 2766 * ip_newroute_ipif_v6 know how to handle such packets. 2767 * If it gets load balanced, switches supporting MLD snooping 2768 * (in the future) will send the packet that it receives for this 2769 * multicast group to the interface that we are sending on. As we have 2770 * joined the multicast group on this ill, by sending the packet out 2771 * on this ill, we receive all the packets back on this ill. 2772 */ 2773 size += sizeof (ip6i_t); 2774 mp = allocb(size, BPRI_HI); 2775 if (mp == NULL) 2776 goto free_reclist; 2777 bzero(mp->b_rptr, size); 2778 mp->b_wptr = mp->b_rptr + size; 2779 2780 ip6i = (ip6i_t *)mp->b_rptr; 2781 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2782 ip6i->ip6i_nxt = IPPROTO_RAW; 2783 ip6i->ip6i_flags = IP6I_ATTACH_IF; 2784 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 2785 2786 ip6h = (ip6_t *)&(ip6i[1]); 2787 ip6hbh = (ip6_hbh_t *)&(ip6h[1]); 2788 ip6router = (struct ip6_opt_router *)&(ip6hbh[1]); 2789 mld2r = (mld2r_t *)((uint8_t *)ip6hbh + optlen + padlen); 2790 mld2mar = (mld2mar_t *)&(mld2r[1]); 2791 2792 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2793 ip6h->ip6_plen = htons(optlen + padlen + icmpsize); 2794 ip6h->ip6_nxt = IPPROTO_HOPOPTS; 2795 ip6h->ip6_hops = MLD_HOP_LIMIT; 2796 ip6h->ip6_dst = ipv6_all_v2rtrs_mcast; 2797 /* ipif returned by ipif_lookup_zoneid is link-local (if present) */ 2798 if (ipif_lookup_zoneid(ill, ALL_ZONES, IPIF_UP, &ipif)) { 2799 ip6h->ip6_src = ipif->ipif_v6src_addr; 2800 ipif_refrele(ipif); 2801 } else { 2802 /* otherwise, use IPv6 default address selection. */ 2803 ip6h->ip6_src = ipv6_all_zeros; 2804 } 2805 2806 ip6hbh->ip6h_nxt = IPPROTO_ICMPV6; 2807 /* 2808 * ip6h_len is the number of 8-byte words, not including the first 2809 * 8 bytes; we've assumed optlen + padlen == 8 bytes; hence len = 0. 2810 */ 2811 ip6hbh->ip6h_len = 0; 2812 2813 ip6router->ip6or_type = IP6OPT_ROUTER_ALERT; 2814 ip6router->ip6or_len = 2; 2815 ip6router->ip6or_value[0] = 0; 2816 ip6router->ip6or_value[1] = IP6_ALERT_MLD; 2817 2818 mld2r->mld2r_type = MLD_V2_LISTENER_REPORT; 2819 mld2r->mld2r_nummar = htons(numrec); 2820 /* 2821 * Prepare for the checksum by putting icmp length in the icmp 2822 * checksum field. The checksum is calculated in ip_wput_v6. 2823 */ 2824 mld2r->mld2r_cksum = htons(icmpsize); 2825 2826 for (rp = cur_reclist; rp != NULL; rp = rp->mrec_next) { 2827 mld2mar->mld2mar_type = rp->mrec_type; 2828 mld2mar->mld2mar_auxlen = 0; 2829 mld2mar->mld2mar_numsrc = htons(rp->mrec_srcs.sl_numsrc); 2830 mld2mar->mld2mar_group = rp->mrec_group; 2831 srcarray = (in6_addr_t *)&(mld2mar[1]); 2832 2833 for (i = 0; i < rp->mrec_srcs.sl_numsrc; i++) 2834 srcarray[i] = rp->mrec_srcs.sl_addr[i]; 2835 2836 mld2mar = (mld2mar_t *)&(srcarray[i]); 2837 } 2838 2839 /* 2840 * ip_wput will automatically loopback the multicast packet to 2841 * the conn if multicast loopback is enabled. 2842 * The MIB stats corresponding to this outgoing MLD packet 2843 * will be accounted for in ip_wput->ip_wput_v6->ip_wput_ire_v6 2844 * ->icmp_update_out_mib_v6 function call. 2845 */ 2846 (void) ip_output_v6(NULL, mp, ill->ill_wq, IP_WPUT); 2847 2848 if (morepkts) { 2849 if (more_src_cnt > 0) { 2850 int index, mvsize; 2851 slist_t *sl = &next_reclist->mrec_srcs; 2852 index = sl->sl_numsrc; 2853 mvsize = more_src_cnt * sizeof (in6_addr_t); 2854 (void) memmove(&sl->sl_addr[0], &sl->sl_addr[index], 2855 mvsize); 2856 sl->sl_numsrc = more_src_cnt; 2857 } 2858 goto nextpkt; 2859 } 2860 2861 free_reclist: 2862 while (reclist != NULL) { 2863 rp = reclist->mrec_next; 2864 mi_free(reclist); 2865 reclist = rp; 2866 } 2867 } 2868 2869 static mrec_t * 2870 mcast_bldmrec(mcast_record_t type, in6_addr_t *grp, slist_t *srclist, 2871 mrec_t *next) 2872 { 2873 mrec_t *rp; 2874 int i; 2875 2876 if ((type == ALLOW_NEW_SOURCES || type == BLOCK_OLD_SOURCES) && 2877 SLIST_IS_EMPTY(srclist)) 2878 return (next); 2879 2880 rp = (mrec_t *)mi_alloc(sizeof (mrec_t), BPRI_HI); 2881 if (rp == NULL) 2882 return (next); 2883 2884 rp->mrec_next = next; 2885 rp->mrec_type = type; 2886 rp->mrec_auxlen = 0; 2887 rp->mrec_group = *grp; 2888 if (srclist == NULL) { 2889 rp->mrec_srcs.sl_numsrc = 0; 2890 } else { 2891 rp->mrec_srcs.sl_numsrc = srclist->sl_numsrc; 2892 for (i = 0; i < srclist->sl_numsrc; i++) 2893 rp->mrec_srcs.sl_addr[i] = srclist->sl_addr[i]; 2894 } 2895 2896 return (rp); 2897 } 2898 2899 /* 2900 * Set up initial retransmit state. If memory cannot be allocated for 2901 * the source lists, simply create as much state as is possible; memory 2902 * allocation failures are considered one type of transient error that 2903 * the retransmissions are designed to overcome (and if they aren't 2904 * transient, there are bigger problems than failing to notify the 2905 * router about multicast group membership state changes). 2906 */ 2907 static void 2908 mcast_init_rtx(ill_t *ill, rtx_state_t *rtxp, mcast_record_t rtype, 2909 slist_t *flist) 2910 { 2911 /* 2912 * There are only three possibilities for rtype: 2913 * New join, transition from INCLUDE {} to INCLUDE {flist} 2914 * => rtype is ALLOW_NEW_SOURCES 2915 * New join, transition from INCLUDE {} to EXCLUDE {flist} 2916 * => rtype is CHANGE_TO_EXCLUDE 2917 * State change that involves a filter mode change 2918 * => rtype is either CHANGE_TO_INCLUDE or CHANGE_TO_EXCLUDE 2919 */ 2920 ASSERT(rtype == CHANGE_TO_EXCLUDE || rtype == CHANGE_TO_INCLUDE || 2921 rtype == ALLOW_NEW_SOURCES); 2922 2923 rtxp->rtx_cnt = ill->ill_mcast_rv; 2924 2925 switch (rtype) { 2926 case CHANGE_TO_EXCLUDE: 2927 rtxp->rtx_fmode_cnt = ill->ill_mcast_rv; 2928 CLEAR_SLIST(rtxp->rtx_allow); 2929 COPY_SLIST(flist, rtxp->rtx_block); 2930 break; 2931 case ALLOW_NEW_SOURCES: 2932 case CHANGE_TO_INCLUDE: 2933 rtxp->rtx_fmode_cnt = 2934 rtype == ALLOW_NEW_SOURCES ? 0 : ill->ill_mcast_rv; 2935 CLEAR_SLIST(rtxp->rtx_block); 2936 COPY_SLIST(flist, rtxp->rtx_allow); 2937 break; 2938 } 2939 } 2940 2941 /* 2942 * The basic strategy here, as extrapolated from RFC 3810 section 6.1 and 2943 * RFC 3376 section 5.1, covers three cases: 2944 * * The current state change is a filter mode change 2945 * Set filter mode retransmit counter; set retransmit allow or 2946 * block list to new source list as appropriate, and clear the 2947 * retransmit list that was not set; send TO_IN or TO_EX with 2948 * new source list. 2949 * * The current state change is a source list change, but the filter 2950 * mode retransmit counter is > 0 2951 * Decrement filter mode retransmit counter; set retransmit 2952 * allow or block list to new source list as appropriate, 2953 * and clear the retransmit list that was not set; send TO_IN 2954 * or TO_EX with new source list. 2955 * * The current state change is a source list change, and the filter 2956 * mode retransmit counter is 0. 2957 * Merge existing rtx allow and block lists with new state: 2958 * rtx_allow = (new allow + rtx_allow) - new block 2959 * rtx_block = (new block + rtx_block) - new allow 2960 * Send ALLOW and BLOCK records for new retransmit lists; 2961 * decrement retransmit counter. 2962 * 2963 * As is the case for mcast_init_rtx(), memory allocation failures are 2964 * acceptable; we just create as much state as we can. 2965 */ 2966 static mrec_t * 2967 mcast_merge_rtx(ilm_t *ilm, mrec_t *mreclist, slist_t *flist) 2968 { 2969 ill_t *ill; 2970 rtx_state_t *rtxp = &ilm->ilm_rtx; 2971 mcast_record_t txtype; 2972 mrec_t *rp, *rpnext, *rtnmrec; 2973 boolean_t ovf; 2974 2975 ill = (ilm->ilm_ill == NULL ? ilm->ilm_ipif->ipif_ill : ilm->ilm_ill); 2976 2977 if (mreclist == NULL) 2978 return (mreclist); 2979 2980 /* 2981 * A filter mode change is indicated by a single mrec, which is 2982 * either TO_IN or TO_EX. In this case, we just need to set new 2983 * retransmit state as if this were an initial join. There is 2984 * no change to the mrec list. 2985 */ 2986 if (mreclist->mrec_type == CHANGE_TO_INCLUDE || 2987 mreclist->mrec_type == CHANGE_TO_EXCLUDE) { 2988 mcast_init_rtx(ill, rtxp, mreclist->mrec_type, 2989 &mreclist->mrec_srcs); 2990 return (mreclist); 2991 } 2992 2993 /* 2994 * Only the source list has changed 2995 */ 2996 rtxp->rtx_cnt = ill->ill_mcast_rv; 2997 if (rtxp->rtx_fmode_cnt > 0) { 2998 /* but we're still sending filter mode change reports */ 2999 rtxp->rtx_fmode_cnt--; 3000 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 3001 CLEAR_SLIST(rtxp->rtx_block); 3002 COPY_SLIST(flist, rtxp->rtx_allow); 3003 txtype = CHANGE_TO_INCLUDE; 3004 } else { 3005 CLEAR_SLIST(rtxp->rtx_allow); 3006 COPY_SLIST(flist, rtxp->rtx_block); 3007 txtype = CHANGE_TO_EXCLUDE; 3008 } 3009 /* overwrite first mrec with new info */ 3010 mreclist->mrec_type = txtype; 3011 l_copy(flist, &mreclist->mrec_srcs); 3012 /* then free any remaining mrecs */ 3013 for (rp = mreclist->mrec_next; rp != NULL; rp = rpnext) { 3014 rpnext = rp->mrec_next; 3015 mi_free(rp); 3016 } 3017 mreclist->mrec_next = NULL; 3018 rtnmrec = mreclist; 3019 } else { 3020 mrec_t *allow_mrec, *block_mrec; 3021 /* 3022 * Just send the source change reports; but we need to 3023 * recalculate the ALLOW and BLOCK lists based on previous 3024 * state and new changes. 3025 */ 3026 rtnmrec = mreclist; 3027 allow_mrec = block_mrec = NULL; 3028 for (rp = mreclist; rp != NULL; rp = rp->mrec_next) { 3029 ASSERT(rp->mrec_type == ALLOW_NEW_SOURCES || 3030 rp->mrec_type == BLOCK_OLD_SOURCES); 3031 if (rp->mrec_type == ALLOW_NEW_SOURCES) 3032 allow_mrec = rp; 3033 else 3034 block_mrec = rp; 3035 } 3036 /* 3037 * Perform calculations: 3038 * new_allow = mrec_allow + (rtx_allow - mrec_block) 3039 * new_block = mrec_block + (rtx_block - mrec_allow) 3040 * 3041 * Each calc requires two steps, for example: 3042 * rtx_allow = rtx_allow - mrec_block; 3043 * new_allow = mrec_allow + rtx_allow; 3044 * 3045 * Store results in mrec lists, and then copy into rtx lists. 3046 * We do it in this order in case the rtx list hasn't been 3047 * alloc'd yet; if it hasn't and our alloc fails, that's okay, 3048 * Overflows are also okay. 3049 */ 3050 if (block_mrec != NULL) { 3051 l_difference_in_a(rtxp->rtx_allow, 3052 &block_mrec->mrec_srcs); 3053 } 3054 if (allow_mrec != NULL) { 3055 l_difference_in_a(rtxp->rtx_block, 3056 &allow_mrec->mrec_srcs); 3057 l_union_in_a(&allow_mrec->mrec_srcs, rtxp->rtx_allow, 3058 &ovf); 3059 } 3060 if (block_mrec != NULL) { 3061 l_union_in_a(&block_mrec->mrec_srcs, rtxp->rtx_block, 3062 &ovf); 3063 COPY_SLIST(&block_mrec->mrec_srcs, rtxp->rtx_block); 3064 } else { 3065 rtnmrec = mcast_bldmrec(BLOCK_OLD_SOURCES, 3066 &ilm->ilm_v6addr, rtxp->rtx_block, allow_mrec); 3067 } 3068 if (allow_mrec != NULL) { 3069 COPY_SLIST(&allow_mrec->mrec_srcs, rtxp->rtx_allow); 3070 } else { 3071 rtnmrec = mcast_bldmrec(ALLOW_NEW_SOURCES, 3072 &ilm->ilm_v6addr, rtxp->rtx_allow, block_mrec); 3073 } 3074 } 3075 3076 return (rtnmrec); 3077 } 3078