1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 /* Copyright (c) 1990 Mentat Inc. */ 27 28 #pragma ident "%Z%%M% %I% %E% SMI" 29 30 /* 31 * Internet Group Management Protocol (IGMP) routines. 32 * Multicast Listener Discovery Protocol (MLD) routines. 33 * 34 * Written by Steve Deering, Stanford, May 1988. 35 * Modified by Rosen Sharma, Stanford, Aug 1994. 36 * Modified by Bill Fenner, Xerox PARC, Feb. 1995. 37 * 38 * MULTICAST 3.5.1.1 39 */ 40 41 42 #include <sys/types.h> 43 #include <sys/stream.h> 44 #include <sys/dlpi.h> 45 #include <sys/stropts.h> 46 #include <sys/strlog.h> 47 #include <sys/strsun.h> 48 #include <sys/systm.h> 49 #include <sys/ddi.h> 50 #include <sys/sunddi.h> 51 #include <sys/cmn_err.h> 52 #include <sys/atomic.h> 53 #include <sys/zone.h> 54 55 #include <sys/param.h> 56 #include <sys/socket.h> 57 #define _SUN_TPI_VERSION 2 58 #include <sys/tihdr.h> 59 #include <inet/ipclassifier.h> 60 #include <net/if.h> 61 #include <net/if_arp.h> 62 #include <sys/sockio.h> 63 #include <net/route.h> 64 #include <netinet/in.h> 65 #include <netinet/igmp_var.h> 66 #include <netinet/ip6.h> 67 #include <netinet/icmp6.h> 68 69 #include <inet/common.h> 70 #include <inet/mi.h> 71 #include <inet/nd.h> 72 #include <inet/arp.h> 73 #include <inet/ip.h> 74 #include <inet/ip6.h> 75 #include <inet/ip_multi.h> 76 #include <inet/ip_listutils.h> 77 78 #include <netinet/igmp.h> 79 #include <inet/ip_if.h> 80 #include <net/pfkeyv2.h> 81 #include <inet/ipsec_info.h> 82 83 static uint_t igmp_query_in(ipha_t *ipha, igmpa_t *igmpa, ill_t *ill); 84 static uint_t igmpv3_query_in(igmp3qa_t *igmp3qa, ill_t *ill, int igmplen); 85 static uint_t mld_query_in(mld_hdr_t *mldh, ill_t *ill); 86 static uint_t mldv2_query_in(mld2q_t *mld2q, ill_t *ill, int mldlen); 87 static void igmp_sendpkt(ilm_t *ilm, uchar_t type, ipaddr_t addr); 88 static void mld_sendpkt(ilm_t *ilm, uchar_t type, const in6_addr_t *v6addr); 89 static void igmpv3_sendrpt(ipif_t *ipif, mrec_t *reclist); 90 static void mldv2_sendrpt(ill_t *ill, mrec_t *reclist); 91 static mrec_t *mcast_bldmrec(mcast_record_t type, in6_addr_t *grp, 92 slist_t *srclist, mrec_t *next); 93 static void mcast_init_rtx(ill_t *ill, rtx_state_t *rtxp, 94 mcast_record_t rtype, slist_t *flist); 95 static mrec_t *mcast_merge_rtx(ilm_t *ilm, mrec_t *rp, slist_t *flist); 96 97 /* Following protected by igmp_timer_lock */ 98 static int igmp_time_to_next; /* Time since last timeout */ 99 static int igmp_timer_fired_last; 100 uint_t igmp_deferred_next = INFINITY; 101 timeout_id_t igmp_timeout_id = 0; 102 kmutex_t igmp_timer_lock; 103 104 /* Protected by igmp_slowtimeout_lock */ 105 timeout_id_t igmp_slowtimeout_id = 0; 106 kmutex_t igmp_slowtimeout_lock; 107 108 /* Following protected by mld_timer_lock */ 109 static int mld_time_to_next; /* Time since last timeout */ 110 static int mld_timer_fired_last; 111 uint_t mld_deferred_next = INFINITY; 112 timeout_id_t mld_timeout_id = 0; 113 kmutex_t mld_timer_lock; 114 115 /* Protected by mld_slowtimeout_lock */ 116 timeout_id_t mld_slowtimeout_id = 0; 117 kmutex_t mld_slowtimeout_lock; 118 119 /* 120 * Macros used to do timer len conversions. Timer values are always 121 * stored and passed to the timer functions as milliseconds; but the 122 * default values and values from the wire may not be. 123 * 124 * And yes, it's obscure, but decisecond is easier to abbreviate than 125 * "tenths of a second". 126 */ 127 #define DSEC_TO_MSEC(dsec) ((dsec) * 100) 128 #define SEC_TO_MSEC(sec) ((sec) * 1000) 129 130 /* 131 * The first multicast join will trigger the igmp timers / mld timers 132 * The unit for next is milliseconds. 133 */ 134 void 135 igmp_start_timers(unsigned next) 136 { 137 int time_left; 138 /* Protected by igmp_timer_lock */ 139 static boolean_t igmp_timer_setter_active; 140 int ret; 141 142 ASSERT(next != 0 && next != INFINITY); 143 144 mutex_enter(&igmp_timer_lock); 145 146 if (igmp_timer_setter_active) { 147 /* 148 * Serialize timer setters, one at a time. If the 149 * timer is currently being set by someone, 150 * just record the next time when it has to be 151 * invoked and return. The current setter will 152 * take care. 153 */ 154 igmp_time_to_next = MIN(igmp_time_to_next, next); 155 mutex_exit(&igmp_timer_lock); 156 return; 157 } else { 158 igmp_timer_setter_active = B_TRUE; 159 } 160 if (igmp_timeout_id == 0) { 161 /* 162 * The timer is inactive. We need to start a timer 163 */ 164 igmp_time_to_next = next; 165 igmp_timeout_id = timeout(igmp_timeout_handler, NULL, 166 MSEC_TO_TICK(igmp_time_to_next)); 167 igmp_timer_setter_active = B_FALSE; 168 mutex_exit(&igmp_timer_lock); 169 return; 170 } 171 172 /* 173 * The timer was scheduled sometime back for firing in 174 * 'igmp_time_to_next' ms and is active. We need to 175 * reschedule the timeout if the new 'next' will happen 176 * earlier than the currently scheduled timeout 177 */ 178 time_left = igmp_timer_fired_last + 179 MSEC_TO_TICK(igmp_time_to_next) - ddi_get_lbolt(); 180 if (time_left < MSEC_TO_TICK(next)) { 181 igmp_timer_setter_active = B_FALSE; 182 mutex_exit(&igmp_timer_lock); 183 return; 184 } 185 186 mutex_exit(&igmp_timer_lock); 187 ret = untimeout(igmp_timeout_id); 188 mutex_enter(&igmp_timer_lock); 189 /* 190 * The timeout was cancelled, or the timeout handler 191 * completed, while we were blocked in the untimeout. 192 * No other thread could have set the timer meanwhile 193 * since we serialized all the timer setters. Thus 194 * no timer is currently active nor executing nor will 195 * any timer fire in the future. We start the timer now 196 * if needed. 197 */ 198 if (ret == -1) { 199 ASSERT(igmp_timeout_id == 0); 200 } else { 201 ASSERT(igmp_timeout_id != 0); 202 igmp_timeout_id = 0; 203 } 204 if (igmp_time_to_next != 0) { 205 igmp_time_to_next = MIN(igmp_time_to_next, next); 206 igmp_timeout_id = timeout(igmp_timeout_handler, NULL, 207 MSEC_TO_TICK(igmp_time_to_next)); 208 igmp_timer_setter_active = B_FALSE; 209 } 210 mutex_exit(&igmp_timer_lock); 211 } 212 213 /* 214 * mld_start_timers: 215 * The unit for next is milliseconds. 216 */ 217 void 218 mld_start_timers(unsigned next) 219 { 220 int time_left; 221 /* Protedted by mld_timer_lock */ 222 static boolean_t mld_timer_setter_active; 223 int ret; 224 225 ASSERT(next != 0 && next != INFINITY); 226 227 mutex_enter(&mld_timer_lock); 228 if (mld_timer_setter_active) { 229 /* 230 * Serialize timer setters, one at a time. If the 231 * timer is currently being set by someone, 232 * just record the next time when it has to be 233 * invoked and return. The current setter will 234 * take care. 235 */ 236 mld_time_to_next = MIN(mld_time_to_next, next); 237 mutex_exit(&mld_timer_lock); 238 return; 239 } else { 240 mld_timer_setter_active = B_TRUE; 241 } 242 if (mld_timeout_id == 0) { 243 /* 244 * The timer is inactive. We need to start a timer 245 */ 246 mld_time_to_next = next; 247 mld_timeout_id = timeout(mld_timeout_handler, NULL, 248 MSEC_TO_TICK(mld_time_to_next)); 249 mld_timer_setter_active = B_FALSE; 250 mutex_exit(&mld_timer_lock); 251 return; 252 } 253 254 /* 255 * The timer was scheduled sometime back for firing in 256 * 'igmp_time_to_next' ms and is active. We need to 257 * reschedule the timeout if the new 'next' will happen 258 * earlier than the currently scheduled timeout 259 */ 260 time_left = mld_timer_fired_last + 261 MSEC_TO_TICK(mld_time_to_next) - ddi_get_lbolt(); 262 if (time_left < MSEC_TO_TICK(next)) { 263 mld_timer_setter_active = B_FALSE; 264 mutex_exit(&mld_timer_lock); 265 return; 266 } 267 268 mutex_exit(&mld_timer_lock); 269 ret = untimeout(mld_timeout_id); 270 mutex_enter(&mld_timer_lock); 271 /* 272 * The timeout was cancelled, or the timeout handler 273 * completed, while we were blocked in the untimeout. 274 * No other thread could have set the timer meanwhile 275 * since we serialized all the timer setters. Thus 276 * no timer is currently active nor executing nor will 277 * any timer fire in the future. We start the timer now 278 * if needed. 279 */ 280 if (ret == -1) { 281 ASSERT(mld_timeout_id == 0); 282 } else { 283 ASSERT(mld_timeout_id != 0); 284 mld_timeout_id = 0; 285 } 286 if (mld_time_to_next != 0) { 287 mld_time_to_next = MIN(mld_time_to_next, next); 288 mld_timeout_id = timeout(mld_timeout_handler, NULL, 289 MSEC_TO_TICK(mld_time_to_next)); 290 mld_timer_setter_active = B_FALSE; 291 } 292 mutex_exit(&mld_timer_lock); 293 } 294 295 /* 296 * igmp_input: 297 * Return 0 if the message is OK and should be handed to "raw" receivers. 298 * Callers of igmp_input() may need to reinitialize variables that were copied 299 * from the mblk as this calls pullupmsg(). 300 */ 301 /* ARGSUSED */ 302 int 303 igmp_input(queue_t *q, mblk_t *mp, ill_t *ill) 304 { 305 igmpa_t *igmpa; 306 ipha_t *ipha = (ipha_t *)(mp->b_rptr); 307 int iphlen, igmplen, mblklen; 308 ilm_t *ilm; 309 uint32_t src, dst; 310 uint32_t group; 311 uint_t next; 312 ipif_t *ipif; 313 314 ASSERT(ill != NULL); 315 ASSERT(!ill->ill_isv6); 316 ++igmpstat.igps_rcv_total; 317 318 mblklen = MBLKL(mp); 319 if (mblklen < 1 || mblklen < (iphlen = IPH_HDR_LENGTH(ipha))) { 320 ++igmpstat.igps_rcv_tooshort; 321 freemsg(mp); 322 return (-1); 323 } 324 igmplen = ntohs(ipha->ipha_length) - iphlen; 325 /* 326 * Since msg sizes are more variable with v3, just pullup the 327 * whole thing now. 328 */ 329 if (MBLKL(mp) < (igmplen + iphlen)) { 330 mblk_t *mp1; 331 if ((mp1 = msgpullup(mp, -1)) == NULL) { 332 ++igmpstat.igps_rcv_tooshort; 333 freemsg(mp); 334 return (-1); 335 } 336 freemsg(mp); 337 mp = mp1; 338 ipha = (ipha_t *)(mp->b_rptr); 339 } 340 341 /* 342 * Validate lengths 343 */ 344 if (igmplen < IGMP_MINLEN) { 345 ++igmpstat.igps_rcv_tooshort; 346 freemsg(mp); 347 return (-1); 348 } 349 /* 350 * Validate checksum 351 */ 352 if (IP_CSUM(mp, iphlen, 0)) { 353 ++igmpstat.igps_rcv_badsum; 354 freemsg(mp); 355 return (-1); 356 } 357 358 igmpa = (igmpa_t *)(&mp->b_rptr[iphlen]); 359 src = ipha->ipha_src; 360 dst = ipha->ipha_dst; 361 if (ip_debug > 1) 362 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 363 "igmp_input: src 0x%x, dst 0x%x on %s\n", 364 (int)ntohl(src), (int)ntohl(dst), 365 ill->ill_name); 366 367 switch (igmpa->igmpa_type) { 368 case IGMP_MEMBERSHIP_QUERY: 369 /* 370 * packet length differentiates between v1/v2 and v3 371 * v1/v2 should be exactly 8 octets long; v3 is >= 12 372 */ 373 if (igmplen == IGMP_MINLEN) { 374 next = igmp_query_in(ipha, igmpa, ill); 375 } else if (igmplen >= IGMP_V3_QUERY_MINLEN) { 376 next = igmpv3_query_in((igmp3qa_t *)igmpa, ill, 377 igmplen); 378 } else { 379 ++igmpstat.igps_rcv_tooshort; 380 freemsg(mp); 381 return (-1); 382 } 383 if (next == 0) { 384 freemsg(mp); 385 return (-1); 386 } 387 388 if (next != INFINITY) 389 igmp_start_timers(next); 390 391 break; 392 393 case IGMP_V1_MEMBERSHIP_REPORT: 394 case IGMP_V2_MEMBERSHIP_REPORT: 395 /* 396 * For fast leave to work, we have to know that we are the 397 * last person to send a report for this group. Reports 398 * generated by us are looped back since we could potentially 399 * be a multicast router, so discard reports sourced by me. 400 */ 401 mutex_enter(&ill->ill_lock); 402 for (ipif = ill->ill_ipif; ipif != NULL; 403 ipif = ipif->ipif_next) { 404 if (ipif->ipif_lcl_addr == src) { 405 if (ip_debug > 1) { 406 (void) mi_strlog(ill->ill_rq, 407 1, 408 SL_TRACE, 409 "igmp_input: we are only " 410 "member src 0x%x ipif_local 0x%x", 411 (int)ntohl(src), 412 (int) 413 ntohl(ipif->ipif_lcl_addr)); 414 } 415 mutex_exit(&ill->ill_lock); 416 return (0); 417 } 418 } 419 mutex_exit(&ill->ill_lock); 420 421 ++igmpstat.igps_rcv_reports; 422 group = igmpa->igmpa_group; 423 if (!CLASSD(group)) { 424 ++igmpstat.igps_rcv_badreports; 425 freemsg(mp); 426 return (-1); 427 } 428 429 /* 430 * KLUDGE: if the IP source address of the report has an 431 * unspecified (i.e., zero) subnet number, as is allowed for 432 * a booting host, replace it with the correct subnet number 433 * so that a process-level multicast routing demon can 434 * determine which subnet it arrived from. This is necessary 435 * to compensate for the lack of any way for a process to 436 * determine the arrival interface of an incoming packet. 437 * 438 * Requires that a copy of *this* message it passed up 439 * to the raw interface which is done by our caller. 440 */ 441 if ((src & htonl(0xFF000000U)) == 0) { /* Minimum net mask */ 442 /* Pick the first ipif on this ill */ 443 mutex_enter(&ill->ill_lock); 444 src = ill->ill_ipif->ipif_subnet; 445 mutex_exit(&ill->ill_lock); 446 ip1dbg(("igmp_input: changed src to 0x%x\n", 447 (int)ntohl(src))); 448 ipha->ipha_src = src; 449 } 450 451 /* 452 * If we belong to the group being reported, and 453 * we are a 'Delaying member' in the RFC terminology, 454 * stop our timer for that group and 'clear flag' i.e. 455 * mark as IGMP_OTHERMEMBER. Do this for all logical 456 * interfaces on the given physical interface. 457 */ 458 mutex_enter(&ill->ill_lock); 459 for (ipif = ill->ill_ipif; ipif != NULL; 460 ipif = ipif->ipif_next) { 461 ilm = ilm_lookup_ipif(ipif, group); 462 if (ilm != NULL) { 463 ++igmpstat.igps_rcv_ourreports; 464 ilm->ilm_timer = INFINITY; 465 ilm->ilm_state = IGMP_OTHERMEMBER; 466 } 467 } /* for */ 468 mutex_exit(&ill->ill_lock); 469 break; 470 471 case IGMP_V3_MEMBERSHIP_REPORT: 472 /* 473 * Currently nothing to do here; IGMP router is not 474 * implemented in ip, and v3 hosts don't pay attention 475 * to membership reports. 476 */ 477 break; 478 } 479 /* 480 * Pass all valid IGMP packets up to any process(es) listening 481 * on a raw IGMP socket. Do not free the packet. 482 */ 483 return (0); 484 } 485 486 static uint_t 487 igmp_query_in(ipha_t *ipha, igmpa_t *igmpa, ill_t *ill) 488 { 489 ilm_t *ilm; 490 int timer; 491 uint_t next; 492 493 ++igmpstat.igps_rcv_queries; 494 495 /* 496 * In the IGMPv2 specification, there are 3 states and a flag. 497 * 498 * In Non-Member state, we simply don't have a membership record. 499 * In Delaying Member state, our timer is running (ilm->ilm_timer 500 * < INFINITY). In Idle Member state, our timer is not running 501 * (ilm->ilm_timer == INFINITY). 502 * 503 * The flag is ilm->ilm_state, it is set to IGMP_OTHERMEMBER if 504 * we have heard a report from another member, or IGMP_IREPORTEDLAST 505 * if I sent the last report. 506 */ 507 if (igmpa->igmpa_code == 0) { 508 /* 509 * Query from an old router. 510 * Remember that the querier on this interface is old, 511 * and set the timer to the value in RFC 1112. 512 */ 513 514 515 mutex_enter(&ill->ill_lock); 516 ill->ill_mcast_v1_time = 0; 517 ill->ill_mcast_v1_tset = 1; 518 if (ill->ill_mcast_type != IGMP_V1_ROUTER) { 519 ip1dbg(("Received IGMPv1 Query on %s, switching mode " 520 "to IGMP_V1_ROUTER\n", ill->ill_name)); 521 atomic_add_16(&ill->ill_ifptr->illif_mcast_v1, 1); 522 ill->ill_mcast_type = IGMP_V1_ROUTER; 523 } 524 mutex_exit(&ill->ill_lock); 525 526 timer = SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY); 527 528 if (ipha->ipha_dst != htonl(INADDR_ALLHOSTS_GROUP) || 529 igmpa->igmpa_group != 0) { 530 ++igmpstat.igps_rcv_badqueries; 531 return (0); 532 } 533 534 } else { 535 in_addr_t group; 536 537 /* 538 * Query from a new router 539 * Simply do a validity check 540 */ 541 group = igmpa->igmpa_group; 542 if (group != 0 && (!CLASSD(group))) { 543 ++igmpstat.igps_rcv_badqueries; 544 return (0); 545 } 546 547 /* 548 * Switch interface state to v2 on receipt of a v2 query 549 * ONLY IF current state is v3. Let things be if current 550 * state if v1 but do reset the v2-querier-present timer. 551 */ 552 mutex_enter(&ill->ill_lock); 553 if (ill->ill_mcast_type == IGMP_V3_ROUTER) { 554 ip1dbg(("Received IGMPv2 Query on %s, switching mode " 555 "to IGMP_V2_ROUTER", ill->ill_name)); 556 atomic_add_16(&ill->ill_ifptr->illif_mcast_v2, 1); 557 ill->ill_mcast_type = IGMP_V2_ROUTER; 558 } 559 ill->ill_mcast_v2_time = 0; 560 ill->ill_mcast_v2_tset = 1; 561 mutex_exit(&ill->ill_lock); 562 563 timer = DSEC_TO_MSEC((int)igmpa->igmpa_code); 564 } 565 566 if (ip_debug > 1) { 567 mutex_enter(&ill->ill_lock); 568 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 569 "igmp_input: TIMER = igmp_code %d igmp_type 0x%x", 570 (int)ntohs(igmpa->igmpa_code), 571 (int)ntohs(igmpa->igmpa_type)); 572 mutex_exit(&ill->ill_lock); 573 } 574 575 /* 576 * -Start the timers in all of our membership records 577 * for the physical interface on which the query 578 * arrived, excluding those that belong to the "all 579 * hosts" group (224.0.0.1). 580 * 581 * -Restart any timer that is already running but has 582 * a value longer than the requested timeout. 583 * 584 * -Use the value specified in the query message as 585 * the maximum timeout. 586 */ 587 next = (unsigned)INFINITY; 588 mutex_enter(&ill->ill_lock); 589 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 590 591 /* 592 * A multicast router joins INADDR_ANY address 593 * to enable promiscuous reception of all 594 * mcasts from the interface. This INADDR_ANY 595 * is stored in the ilm_v6addr as V6 unspec addr 596 */ 597 if (!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr)) 598 continue; 599 if (ilm->ilm_addr == htonl(INADDR_ANY)) 600 continue; 601 if (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP) && 602 (igmpa->igmpa_group == 0) || 603 (igmpa->igmpa_group == ilm->ilm_addr)) { 604 if (ilm->ilm_timer > timer) { 605 MCAST_RANDOM_DELAY(ilm->ilm_timer, timer); 606 if (ilm->ilm_timer < next) 607 next = ilm->ilm_timer; 608 } 609 } 610 } 611 mutex_exit(&ill->ill_lock); 612 613 return (next); 614 } 615 616 static uint_t 617 igmpv3_query_in(igmp3qa_t *igmp3qa, ill_t *ill, int igmplen) 618 { 619 uint_t i, next, mrd, qqi, timer, delay, numsrc; 620 ilm_t *ilm; 621 ipaddr_t *src_array; 622 uint8_t qrv; 623 624 /* make sure numsrc matches packet size */ 625 numsrc = ntohs(igmp3qa->igmp3qa_numsrc); 626 if (igmplen < IGMP_V3_QUERY_MINLEN + (numsrc * sizeof (ipaddr_t))) { 627 ++igmpstat.igps_rcv_tooshort; 628 return (0); 629 } 630 src_array = (ipaddr_t *)&igmp3qa[1]; 631 632 ++igmpstat.igps_rcv_queries; 633 634 if ((mrd = (uint_t)igmp3qa->igmp3qa_mxrc) >= IGMP_V3_MAXRT_FPMIN) { 635 uint_t hdrval, mant, exp; 636 hdrval = (uint_t)igmp3qa->igmp3qa_mxrc; 637 mant = hdrval & IGMP_V3_MAXRT_MANT_MASK; 638 exp = (hdrval & IGMP_V3_MAXRT_EXP_MASK) >> 4; 639 mrd = (mant | 0x10) << (exp + 3); 640 } 641 if (mrd == 0) 642 mrd = MCAST_DEF_QUERY_RESP_INTERVAL; 643 timer = DSEC_TO_MSEC(mrd); 644 MCAST_RANDOM_DELAY(delay, timer); 645 next = (unsigned)INFINITY; 646 647 if ((qrv = igmp3qa->igmp3qa_sqrv & IGMP_V3_RV_MASK) == 0) 648 ill->ill_mcast_rv = MCAST_DEF_ROBUSTNESS; 649 else 650 ill->ill_mcast_rv = qrv; 651 652 if ((qqi = (uint_t)igmp3qa->igmp3qa_qqic) >= IGMP_V3_QQI_FPMIN) { 653 uint_t hdrval, mant, exp; 654 hdrval = (uint_t)igmp3qa->igmp3qa_qqic; 655 mant = hdrval & IGMP_V3_QQI_MANT_MASK; 656 exp = (hdrval & IGMP_V3_QQI_EXP_MASK) >> 4; 657 qqi = (mant | 0x10) << (exp + 3); 658 } 659 ill->ill_mcast_qi = (qqi == 0) ? MCAST_DEF_QUERY_INTERVAL : qqi; 660 661 /* 662 * If we have a pending general query response that's scheduled 663 * sooner than the delay we calculated for this response, then 664 * no action is required (RFC3376 section 5.2 rule 1) 665 */ 666 mutex_enter(&ill->ill_lock); 667 if (ill->ill_global_timer < delay) { 668 mutex_exit(&ill->ill_lock); 669 return (next); 670 } 671 mutex_exit(&ill->ill_lock); 672 673 /* 674 * Now take action depending upon query type: 675 * general, group specific, or group/source specific. 676 */ 677 if ((numsrc == 0) && (igmp3qa->igmp3qa_group == INADDR_ANY)) { 678 /* 679 * general query 680 * We know global timer is either not running or is 681 * greater than our calculated delay, so reset it to 682 * our delay (random value in range [0, response time]). 683 */ 684 mutex_enter(&ill->ill_lock); 685 ill->ill_global_timer = delay; 686 next = ill->ill_global_timer; 687 mutex_exit(&ill->ill_lock); 688 689 } else { 690 /* group or group/source specific query */ 691 mutex_enter(&ill->ill_lock); 692 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 693 if (!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr) || 694 (ilm->ilm_addr == htonl(INADDR_ANY)) || 695 (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) || 696 (igmp3qa->igmp3qa_group != ilm->ilm_addr)) 697 continue; 698 /* 699 * If the query is group specific or we have a 700 * pending group specific query, the response is 701 * group specific (pending sources list should be 702 * empty). Otherwise, need to update the pending 703 * sources list for the group and source specific 704 * response. 705 */ 706 if (numsrc == 0 || (ilm->ilm_timer < INFINITY && 707 SLIST_IS_EMPTY(ilm->ilm_pendsrcs))) { 708 group_query: 709 FREE_SLIST(ilm->ilm_pendsrcs); 710 ilm->ilm_pendsrcs = NULL; 711 } else { 712 boolean_t overflow; 713 slist_t *pktl; 714 if (numsrc > MAX_FILTER_SIZE || 715 (ilm->ilm_pendsrcs == NULL && 716 (ilm->ilm_pendsrcs = l_alloc()) == NULL)) { 717 /* 718 * We've been sent more sources than 719 * we can deal with; or we can't deal 720 * with a source list at all. Revert 721 * to a group specific query. 722 */ 723 goto group_query; 724 } 725 if ((pktl = l_alloc()) == NULL) 726 goto group_query; 727 pktl->sl_numsrc = numsrc; 728 for (i = 0; i < numsrc; i++) 729 IN6_IPADDR_TO_V4MAPPED(src_array[i], 730 &(pktl->sl_addr[i])); 731 l_union_in_a(ilm->ilm_pendsrcs, pktl, 732 &overflow); 733 l_free(pktl); 734 if (overflow) 735 goto group_query; 736 } 737 /* choose soonest timer */ 738 ilm->ilm_timer = MIN(ilm->ilm_timer, delay); 739 if (ilm->ilm_timer < next) 740 next = ilm->ilm_timer; 741 } 742 mutex_exit(&ill->ill_lock); 743 } 744 745 return (next); 746 } 747 748 void 749 igmp_joingroup(ilm_t *ilm) 750 { 751 ill_t *ill; 752 753 ill = ilm->ilm_ipif->ipif_ill; 754 755 ASSERT(IAM_WRITER_ILL(ill)); 756 ASSERT(ilm->ilm_ill == NULL && !ilm->ilm_ipif->ipif_isv6); 757 758 mutex_enter(&ill->ill_lock); 759 if (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) { 760 ilm->ilm_rtx.rtx_timer = INFINITY; 761 ilm->ilm_state = IGMP_OTHERMEMBER; 762 mutex_exit(&ill->ill_lock); 763 } else { 764 ip1dbg(("Querier mode %d, sending report, group %x\n", 765 ill->ill_mcast_type, htonl(ilm->ilm_addr))); 766 if (ill->ill_mcast_type == IGMP_V1_ROUTER) { 767 mutex_exit(&ill->ill_lock); 768 igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0); 769 mutex_enter(&ill->ill_lock); 770 } else if (ill->ill_mcast_type == IGMP_V2_ROUTER) { 771 mutex_exit(&ill->ill_lock); 772 igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0); 773 mutex_enter(&ill->ill_lock); 774 } else if (ill->ill_mcast_type == IGMP_V3_ROUTER) { 775 mrec_t *rp; 776 mcast_record_t rtype; 777 /* 778 * The possible state changes we need to handle here: 779 * Old State New State Report 780 * 781 * INCLUDE(0) INCLUDE(X) ALLOW(X),BLOCK(0) 782 * INCLUDE(0) EXCLUDE(X) TO_EX(X) 783 * 784 * No need to send the BLOCK(0) report; ALLOW(X) 785 * is enough. 786 */ 787 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ? 788 ALLOW_NEW_SOURCES : CHANGE_TO_EXCLUDE; 789 rp = mcast_bldmrec(rtype, &ilm->ilm_v6addr, 790 ilm->ilm_filter, NULL); 791 mutex_exit(&ill->ill_lock); 792 igmpv3_sendrpt(ilm->ilm_ipif, rp); 793 mutex_enter(&ill->ill_lock); 794 /* 795 * Set up retransmission state. Timer is set below, 796 * for both v3 and older versions. 797 */ 798 mcast_init_rtx(ill, &ilm->ilm_rtx, rtype, 799 ilm->ilm_filter); 800 } 801 802 /* Set the ilm timer value */ 803 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer, 804 SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY)); 805 ilm->ilm_state = IGMP_IREPORTEDLAST; 806 mutex_exit(&ill->ill_lock); 807 808 /* 809 * To avoid deadlock, we don't call igmp_start_timers from 810 * here. igmp_start_timers needs to call untimeout, and we 811 * can't hold the ipsq across untimeout since 812 * igmp_timeout_handler could be blocking trying to 813 * acquire the ipsq. Instead we start the timer after we get 814 * out of the ipsq in ipsq_exit. 815 */ 816 mutex_enter(&igmp_timer_lock); 817 igmp_deferred_next = MIN(ilm->ilm_rtx.rtx_timer, 818 igmp_deferred_next); 819 mutex_exit(&igmp_timer_lock); 820 } 821 822 if (ip_debug > 1) { 823 (void) mi_strlog(ilm->ilm_ipif->ipif_ill->ill_rq, 1, SL_TRACE, 824 "igmp_joingroup: multicast_type %d timer %d", 825 (ilm->ilm_ipif->ipif_ill->ill_mcast_type), 826 (int)ntohl(ilm->ilm_rtx.rtx_timer)); 827 } 828 } 829 830 void 831 mld_joingroup(ilm_t *ilm) 832 { 833 ill_t *ill; 834 835 ill = ilm->ilm_ill; 836 837 ASSERT(IAM_WRITER_ILL(ill)); 838 ASSERT(ilm->ilm_ipif == NULL && ill->ill_isv6); 839 840 mutex_enter(&ill->ill_lock); 841 if (IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr)) { 842 ilm->ilm_rtx.rtx_timer = INFINITY; 843 ilm->ilm_state = IGMP_OTHERMEMBER; 844 mutex_exit(&ill->ill_lock); 845 } else { 846 if (ill->ill_mcast_type == MLD_V1_ROUTER) { 847 mutex_exit(&ill->ill_lock); 848 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL); 849 mutex_enter(&ill->ill_lock); 850 } else { 851 mrec_t *rp; 852 mcast_record_t rtype; 853 /* 854 * The possible state changes we need to handle here: 855 * Old State New State Report 856 * 857 * INCLUDE(0) INCLUDE(X) ALLOW(X),BLOCK(0) 858 * INCLUDE(0) EXCLUDE(X) TO_EX(X) 859 * 860 * No need to send the BLOCK(0) report; ALLOW(X) 861 * is enough 862 */ 863 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ? 864 ALLOW_NEW_SOURCES : CHANGE_TO_EXCLUDE; 865 rp = mcast_bldmrec(rtype, &ilm->ilm_v6addr, 866 ilm->ilm_filter, NULL); 867 mutex_exit(&ill->ill_lock); 868 mldv2_sendrpt(ill, rp); 869 mutex_enter(&ill->ill_lock); 870 /* 871 * Set up retransmission state. Timer is set below, 872 * for both v2 and v1. 873 */ 874 mcast_init_rtx(ill, &ilm->ilm_rtx, rtype, 875 ilm->ilm_filter); 876 } 877 878 /* Set the ilm timer value */ 879 ASSERT(ill->ill_mcast_type != MLD_V2_ROUTER || 880 ilm->ilm_rtx.rtx_cnt > 0); 881 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer, 882 SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY)); 883 ilm->ilm_state = IGMP_IREPORTEDLAST; 884 mutex_exit(&ill->ill_lock); 885 886 /* 887 * To avoid deadlock, we don't call mld_start_timers from 888 * here. mld_start_timers needs to call untimeout, and we 889 * can't hold the ipsq (i.e. the lock) across untimeout 890 * since mld_timeout_handler could be blocking trying to 891 * acquire the ipsq. Instead we start the timer after we get 892 * out of the ipsq in ipsq_exit 893 */ 894 mutex_enter(&mld_timer_lock); 895 mld_deferred_next = MIN(ilm->ilm_rtx.rtx_timer, 896 mld_deferred_next); 897 mutex_exit(&mld_timer_lock); 898 } 899 900 if (ip_debug > 1) { 901 (void) mi_strlog(ilm->ilm_ill->ill_rq, 1, SL_TRACE, 902 "mld_joingroup: multicast_type %d timer %d", 903 (ilm->ilm_ill->ill_mcast_type), 904 (int)ntohl(ilm->ilm_rtx.rtx_timer)); 905 } 906 } 907 908 void 909 igmp_leavegroup(ilm_t *ilm) 910 { 911 ill_t *ill = ilm->ilm_ipif->ipif_ill; 912 913 ASSERT(ilm->ilm_ill == NULL); 914 ASSERT(!ill->ill_isv6); 915 916 mutex_enter(&ill->ill_lock); 917 if (ilm->ilm_state == IGMP_IREPORTEDLAST && 918 ill->ill_mcast_type == IGMP_V2_ROUTER && 919 (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP))) { 920 mutex_exit(&ill->ill_lock); 921 igmp_sendpkt(ilm, IGMP_V2_LEAVE_GROUP, 922 (htonl(INADDR_ALLRTRS_GROUP))); 923 return; 924 } else if ((ill->ill_mcast_type == IGMP_V3_ROUTER) && 925 (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP))) { 926 mrec_t *rp; 927 /* 928 * The possible state changes we need to handle here: 929 * Old State New State Report 930 * 931 * INCLUDE(X) INCLUDE(0) ALLOW(0),BLOCK(X) 932 * EXCLUDE(X) INCLUDE(0) TO_IN(0) 933 * 934 * No need to send the ALLOW(0) report; BLOCK(X) is enough 935 */ 936 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 937 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr, 938 ilm->ilm_filter, NULL); 939 } else { 940 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, 941 NULL, NULL); 942 } 943 mutex_exit(&ill->ill_lock); 944 igmpv3_sendrpt(ilm->ilm_ipif, rp); 945 return; 946 } 947 mutex_exit(&ill->ill_lock); 948 } 949 950 void 951 mld_leavegroup(ilm_t *ilm) 952 { 953 ill_t *ill = ilm->ilm_ill; 954 955 ASSERT(ilm->ilm_ipif == NULL); 956 ASSERT(ill->ill_isv6); 957 958 mutex_enter(&ill->ill_lock); 959 if (ilm->ilm_state == IGMP_IREPORTEDLAST && 960 ill->ill_mcast_type == MLD_V1_ROUTER && 961 (!IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr))) { 962 mutex_exit(&ill->ill_lock); 963 mld_sendpkt(ilm, MLD_LISTENER_REDUCTION, &ipv6_all_rtrs_mcast); 964 return; 965 } else if ((ill->ill_mcast_type == MLD_V2_ROUTER) && 966 (!IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr))) { 967 mrec_t *rp; 968 /* 969 * The possible state changes we need to handle here: 970 * Old State New State Report 971 * 972 * INCLUDE(X) INCLUDE(0) ALLOW(0),BLOCK(X) 973 * EXCLUDE(X) INCLUDE(0) TO_IN(0) 974 * 975 * No need to send the ALLOW(0) report; BLOCK(X) is enough 976 */ 977 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 978 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr, 979 ilm->ilm_filter, NULL); 980 } else { 981 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, 982 NULL, NULL); 983 } 984 mutex_exit(&ill->ill_lock); 985 mldv2_sendrpt(ill, rp); 986 return; 987 } 988 mutex_exit(&ill->ill_lock); 989 } 990 991 void 992 igmp_statechange(ilm_t *ilm, mcast_record_t fmode, slist_t *flist) 993 { 994 ill_t *ill; 995 mrec_t *rp; 996 997 ASSERT(ilm != NULL); 998 999 /* state change reports should only be sent if the router is v3 */ 1000 if (ilm->ilm_ipif->ipif_ill->ill_mcast_type != IGMP_V3_ROUTER) 1001 return; 1002 1003 if (ilm->ilm_ill == NULL) { 1004 ASSERT(ilm->ilm_ipif != NULL); 1005 ill = ilm->ilm_ipif->ipif_ill; 1006 } else { 1007 ill = ilm->ilm_ill; 1008 } 1009 1010 mutex_enter(&ill->ill_lock); 1011 1012 /* 1013 * Compare existing(old) state with the new state and prepare 1014 * State Change Report, according to the rules in RFC 3376: 1015 * 1016 * Old State New State State Change Report 1017 * 1018 * INCLUDE(A) INCLUDE(B) ALLOW(B-A),BLOCK(A-B) 1019 * EXCLUDE(A) EXCLUDE(B) ALLOW(A-B),BLOCK(B-A) 1020 * INCLUDE(A) EXCLUDE(B) TO_EX(B) 1021 * EXCLUDE(A) INCLUDE(B) TO_IN(B) 1022 */ 1023 1024 if (ilm->ilm_fmode == fmode) { 1025 slist_t *a_minus_b = NULL, *b_minus_a = NULL; 1026 slist_t *allow, *block; 1027 if (((a_minus_b = l_alloc()) == NULL) || 1028 ((b_minus_a = l_alloc()) == NULL)) { 1029 l_free(a_minus_b); 1030 if (ilm->ilm_fmode == MODE_IS_INCLUDE) 1031 goto send_to_ex; 1032 else 1033 goto send_to_in; 1034 } 1035 l_difference(ilm->ilm_filter, flist, a_minus_b); 1036 l_difference(flist, ilm->ilm_filter, b_minus_a); 1037 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1038 allow = b_minus_a; 1039 block = a_minus_b; 1040 } else { 1041 allow = a_minus_b; 1042 block = b_minus_a; 1043 } 1044 rp = NULL; 1045 if (!SLIST_IS_EMPTY(allow)) 1046 rp = mcast_bldmrec(ALLOW_NEW_SOURCES, &ilm->ilm_v6addr, 1047 allow, rp); 1048 if (!SLIST_IS_EMPTY(block)) 1049 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr, 1050 block, rp); 1051 l_free(a_minus_b); 1052 l_free(b_minus_a); 1053 } else if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1054 send_to_ex: 1055 rp = mcast_bldmrec(CHANGE_TO_EXCLUDE, &ilm->ilm_v6addr, flist, 1056 NULL); 1057 } else { 1058 send_to_in: 1059 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, flist, 1060 NULL); 1061 } 1062 1063 /* 1064 * Need to set up retransmission state; merge the new info with the 1065 * current state (which may be null). If the timer is not currently 1066 * running, start it (need to do a delayed start of the timer as 1067 * we're currently in the sq). 1068 */ 1069 rp = mcast_merge_rtx(ilm, rp, flist); 1070 if (ilm->ilm_rtx.rtx_timer == INFINITY) { 1071 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer, 1072 SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY)); 1073 mutex_enter(&igmp_timer_lock); 1074 igmp_deferred_next = MIN(igmp_deferred_next, 1075 ilm->ilm_rtx.rtx_timer); 1076 mutex_exit(&igmp_timer_lock); 1077 } 1078 1079 mutex_exit(&ill->ill_lock); 1080 igmpv3_sendrpt(ilm->ilm_ipif, rp); 1081 } 1082 1083 void 1084 mld_statechange(ilm_t *ilm, mcast_record_t fmode, slist_t *flist) 1085 { 1086 ill_t *ill; 1087 mrec_t *rp = NULL; 1088 1089 ASSERT(ilm != NULL); 1090 1091 ill = ilm->ilm_ill; 1092 1093 /* only need to send if we have an mldv2-capable router */ 1094 mutex_enter(&ill->ill_lock); 1095 if (ill->ill_mcast_type != MLD_V2_ROUTER) { 1096 mutex_exit(&ill->ill_lock); 1097 return; 1098 } 1099 1100 /* 1101 * Compare existing (old) state with the new state passed in 1102 * and send appropriate MLDv2 State Change Report. 1103 * 1104 * Old State New State State Change Report 1105 * 1106 * INCLUDE(A) INCLUDE(B) ALLOW(B-A),BLOCK(A-B) 1107 * EXCLUDE(A) EXCLUDE(B) ALLOW(A-B),BLOCK(B-A) 1108 * INCLUDE(A) EXCLUDE(B) TO_EX(B) 1109 * EXCLUDE(A) INCLUDE(B) TO_IN(B) 1110 */ 1111 if (ilm->ilm_fmode == fmode) { 1112 slist_t *a_minus_b = NULL, *b_minus_a = NULL; 1113 slist_t *allow, *block; 1114 if (((a_minus_b = l_alloc()) == NULL) || 1115 ((b_minus_a = l_alloc()) == NULL)) { 1116 l_free(a_minus_b); 1117 if (ilm->ilm_fmode == MODE_IS_INCLUDE) 1118 goto send_to_ex; 1119 else 1120 goto send_to_in; 1121 } 1122 l_difference(ilm->ilm_filter, flist, a_minus_b); 1123 l_difference(flist, ilm->ilm_filter, b_minus_a); 1124 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1125 allow = b_minus_a; 1126 block = a_minus_b; 1127 } else { 1128 allow = a_minus_b; 1129 block = b_minus_a; 1130 } 1131 if (!SLIST_IS_EMPTY(allow)) 1132 rp = mcast_bldmrec(ALLOW_NEW_SOURCES, &ilm->ilm_v6addr, 1133 allow, rp); 1134 if (!SLIST_IS_EMPTY(block)) 1135 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr, 1136 block, rp); 1137 l_free(a_minus_b); 1138 l_free(b_minus_a); 1139 } else if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1140 send_to_ex: 1141 rp = mcast_bldmrec(CHANGE_TO_EXCLUDE, &ilm->ilm_v6addr, flist, 1142 NULL); 1143 } else { 1144 send_to_in: 1145 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, flist, 1146 NULL); 1147 } 1148 1149 /* 1150 * Need to set up retransmission state; merge the new info with the 1151 * current state (which may be null). If the timer is not currently 1152 * running, start it (need to do a deferred start of the timer as 1153 * we're currently in the sq). 1154 */ 1155 rp = mcast_merge_rtx(ilm, rp, flist); 1156 ASSERT(ilm->ilm_rtx.rtx_cnt > 0); 1157 if (ilm->ilm_rtx.rtx_timer == INFINITY) { 1158 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer, 1159 SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY)); 1160 mutex_enter(&mld_timer_lock); 1161 mld_deferred_next = 1162 MIN(mld_deferred_next, ilm->ilm_rtx.rtx_timer); 1163 mutex_exit(&mld_timer_lock); 1164 } 1165 1166 mutex_exit(&ill->ill_lock); 1167 mldv2_sendrpt(ill, rp); 1168 } 1169 1170 uint_t 1171 igmp_timeout_handler_per_ill(ill_t *ill, int elapsed) 1172 { 1173 uint_t next = INFINITY; 1174 ilm_t *ilm; 1175 ipif_t *ipif; 1176 mrec_t *rp = NULL; 1177 mrec_t *rtxrp = NULL; 1178 rtx_state_t *rtxp; 1179 mcast_record_t rtype; 1180 1181 ASSERT(IAM_WRITER_ILL(ill)); 1182 1183 mutex_enter(&ill->ill_lock); 1184 1185 /* First check the global timer on this interface */ 1186 if (ill->ill_global_timer == INFINITY) 1187 goto per_ilm_timer; 1188 if (ill->ill_global_timer <= elapsed) { 1189 ill->ill_global_timer = INFINITY; 1190 /* 1191 * Send report for each group on this interface. 1192 * Since we just set the global timer (received a v3 general 1193 * query), need to skip the all hosts addr (224.0.0.1), per 1194 * RFC 3376 section 5. 1195 */ 1196 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1197 if (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) 1198 continue; 1199 ASSERT(ilm->ilm_ipif != NULL); 1200 ilm->ilm_ipif->ipif_igmp_rpt = 1201 mcast_bldmrec(ilm->ilm_fmode, &ilm->ilm_v6addr, 1202 ilm->ilm_filter, ilm->ilm_ipif->ipif_igmp_rpt); 1203 /* 1204 * Since we're sending a report on this group, okay 1205 * to delete pending group-specific timers. Note 1206 * that group-specific retransmit timers still need 1207 * to be checked in the per_ilm_timer for-loop. 1208 */ 1209 ilm->ilm_timer = INFINITY; 1210 ilm->ilm_state = IGMP_IREPORTEDLAST; 1211 FREE_SLIST(ilm->ilm_pendsrcs); 1212 ilm->ilm_pendsrcs = NULL; 1213 } 1214 /* 1215 * We've built per-ipif mrec lists; walk the ill's ipif list 1216 * and send a report for each ipif that has an mrec list. 1217 */ 1218 for (ipif = ill->ill_ipif; ipif != NULL; 1219 ipif = ipif->ipif_next) { 1220 if (ipif->ipif_igmp_rpt == NULL) 1221 continue; 1222 mutex_exit(&ill->ill_lock); 1223 igmpv3_sendrpt(ipif, ipif->ipif_igmp_rpt); 1224 mutex_enter(&ill->ill_lock); 1225 /* mrec list was freed by igmpv3_sendrpt() */ 1226 ipif->ipif_igmp_rpt = NULL; 1227 } 1228 } else { 1229 ill->ill_global_timer -= elapsed; 1230 if (ill->ill_global_timer < next) 1231 next = ill->ill_global_timer; 1232 } 1233 1234 per_ilm_timer: 1235 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1236 if (ilm->ilm_timer == INFINITY) 1237 goto per_ilm_rtxtimer; 1238 1239 if (ilm->ilm_timer > elapsed) { 1240 ilm->ilm_timer -= elapsed; 1241 if (ilm->ilm_timer < next) 1242 next = ilm->ilm_timer; 1243 1244 if (ip_debug > 1) { 1245 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 1246 "igmp_timo_hlr 2: ilm_timr %d elap %d " 1247 "typ %d nxt %d", 1248 (int)ntohl(ilm->ilm_timer), elapsed, 1249 (ill->ill_mcast_type), next); 1250 } 1251 1252 goto per_ilm_rtxtimer; 1253 } 1254 1255 /* the timer has expired, need to take action */ 1256 ilm->ilm_timer = INFINITY; 1257 ilm->ilm_state = IGMP_IREPORTEDLAST; 1258 if (ill->ill_mcast_type == IGMP_V1_ROUTER) { 1259 mutex_exit(&ill->ill_lock); 1260 igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0); 1261 mutex_enter(&ill->ill_lock); 1262 } else if (ill->ill_mcast_type == IGMP_V2_ROUTER) { 1263 mutex_exit(&ill->ill_lock); 1264 igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0); 1265 mutex_enter(&ill->ill_lock); 1266 } else { 1267 slist_t *rsp; 1268 if (!SLIST_IS_EMPTY(ilm->ilm_pendsrcs) && 1269 (rsp = l_alloc()) != NULL) { 1270 /* 1271 * Contents of reply depend on pending 1272 * requested source list. 1273 */ 1274 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1275 l_intersection(ilm->ilm_filter, 1276 ilm->ilm_pendsrcs, rsp); 1277 } else { 1278 l_difference(ilm->ilm_pendsrcs, 1279 ilm->ilm_filter, rsp); 1280 } 1281 FREE_SLIST(ilm->ilm_pendsrcs); 1282 ilm->ilm_pendsrcs = NULL; 1283 if (!SLIST_IS_EMPTY(rsp)) 1284 rp = mcast_bldmrec(MODE_IS_INCLUDE, 1285 &ilm->ilm_v6addr, rsp, rp); 1286 FREE_SLIST(rsp); 1287 } else { 1288 /* 1289 * Either the pending request is just group- 1290 * specific, or we couldn't get the resources 1291 * (rsp) to build a source-specific reply. 1292 */ 1293 rp = mcast_bldmrec(ilm->ilm_fmode, 1294 &ilm->ilm_v6addr, ilm->ilm_filter, rp); 1295 } 1296 mutex_exit(&ill->ill_lock); 1297 igmpv3_sendrpt(ill->ill_ipif, rp); 1298 mutex_enter(&ill->ill_lock); 1299 rp = NULL; 1300 } 1301 1302 if (ip_debug > 1) { 1303 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 1304 "igmp_timo_hlr 1: ilm_timr %d elap %d " 1305 "typ %d nxt %d", 1306 (int)ntohl(ilm->ilm_timer), elapsed, 1307 (ill->ill_mcast_type), next); 1308 } 1309 1310 per_ilm_rtxtimer: 1311 rtxp = &ilm->ilm_rtx; 1312 1313 if (rtxp->rtx_timer == INFINITY) 1314 continue; 1315 if (rtxp->rtx_timer > elapsed) { 1316 rtxp->rtx_timer -= elapsed; 1317 if (rtxp->rtx_timer < next) 1318 next = rtxp->rtx_timer; 1319 continue; 1320 } 1321 1322 rtxp->rtx_timer = INFINITY; 1323 ilm->ilm_state = IGMP_IREPORTEDLAST; 1324 if (ill->ill_mcast_type == IGMP_V1_ROUTER) { 1325 mutex_exit(&ill->ill_lock); 1326 igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0); 1327 mutex_enter(&ill->ill_lock); 1328 continue; 1329 } else if (ill->ill_mcast_type == IGMP_V2_ROUTER) { 1330 mutex_exit(&ill->ill_lock); 1331 igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0); 1332 mutex_enter(&ill->ill_lock); 1333 continue; 1334 } 1335 1336 /* 1337 * The retransmit timer has popped, and our router is 1338 * IGMPv3. We have to delve into the retransmit state 1339 * stored in the ilm. 1340 * 1341 * Decrement the retransmit count. If the fmode rtx 1342 * count is active, decrement it, and send a filter 1343 * mode change report with the ilm's source list. 1344 * Otherwise, send a source list change report with 1345 * the current retransmit lists. 1346 */ 1347 ASSERT(rtxp->rtx_cnt > 0); 1348 ASSERT(rtxp->rtx_cnt >= rtxp->rtx_fmode_cnt); 1349 rtxp->rtx_cnt--; 1350 if (rtxp->rtx_fmode_cnt > 0) { 1351 rtxp->rtx_fmode_cnt--; 1352 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ? 1353 CHANGE_TO_INCLUDE : CHANGE_TO_EXCLUDE; 1354 rtxrp = mcast_bldmrec(rtype, &ilm->ilm_v6addr, 1355 ilm->ilm_filter, rtxrp); 1356 } else { 1357 rtxrp = mcast_bldmrec(ALLOW_NEW_SOURCES, 1358 &ilm->ilm_v6addr, rtxp->rtx_allow, rtxrp); 1359 rtxrp = mcast_bldmrec(BLOCK_OLD_SOURCES, 1360 &ilm->ilm_v6addr, rtxp->rtx_block, rtxrp); 1361 } 1362 if (rtxp->rtx_cnt > 0) { 1363 MCAST_RANDOM_DELAY(rtxp->rtx_timer, 1364 SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY)); 1365 if (rtxp->rtx_timer < next) 1366 next = rtxp->rtx_timer; 1367 } else { 1368 CLEAR_SLIST(rtxp->rtx_allow); 1369 CLEAR_SLIST(rtxp->rtx_block); 1370 } 1371 mutex_exit(&ill->ill_lock); 1372 igmpv3_sendrpt(ilm->ilm_ipif, rtxrp); 1373 mutex_enter(&ill->ill_lock); 1374 rtxrp = NULL; 1375 } 1376 1377 mutex_exit(&ill->ill_lock); 1378 1379 return (next); 1380 } 1381 1382 /* 1383 * igmp_timeout_handler: 1384 * Called when there are timeout events, every next * TMEOUT_INTERVAL (tick). 1385 * Returns number of ticks to next event (or 0 if none). 1386 * 1387 * As part of multicast join and leave igmp we may need to send out an 1388 * igmp request. The igmp related state variables in the ilm are protected 1389 * by ill_lock. A single global igmp timer is used to track igmp timeouts. 1390 * igmp_timer_lock protects the global igmp_timeout_id. igmp_start_timers 1391 * starts the igmp timer if needed. It serializes multiple threads trying to 1392 * simultaneously start the timer using the igmp_timer_setter_active flag. 1393 * 1394 * igmp_input() receives igmp queries and responds to the queries 1395 * in a delayed fashion by posting a timer i.e. it calls igmp_start_timers(). 1396 * Later the igmp_timer fires, the timeout handler igmp_timerout_handler() 1397 * performs the action exclusively after entering each ill's ipsq as writer. 1398 * The actual igmp timeout handler needs to run in the ipsq since it has to 1399 * access the ilm's and we don't want another exclusive operation like 1400 * say an IPMP failover to be simultaneously moving the ilms from one ill to 1401 * another. 1402 * 1403 * The igmp_slowtimeo() function is called thru another timer. 1404 * igmp_slowtimeout_lock protects the igmp_slowtimeout_id 1405 */ 1406 1407 /* ARGSUSED */ 1408 void 1409 igmp_timeout_handler(void *arg) 1410 { 1411 ill_t *ill; 1412 int elapsed; /* Since last call */ 1413 uint_t global_next = INFINITY; 1414 uint_t next; 1415 ill_walk_context_t ctx; 1416 boolean_t success; 1417 1418 mutex_enter(&igmp_timer_lock); 1419 ASSERT(igmp_timeout_id != 0); 1420 igmp_timer_fired_last = ddi_get_lbolt(); 1421 elapsed = igmp_time_to_next; 1422 igmp_time_to_next = 0; 1423 mutex_exit(&igmp_timer_lock); 1424 1425 rw_enter(&ill_g_lock, RW_READER); 1426 ill = ILL_START_WALK_V4(&ctx); 1427 for (; ill != NULL; ill = ill_next(&ctx, ill)) { 1428 ASSERT(!ill->ill_isv6); 1429 /* 1430 * We may not be able to refhold the ill if the ill/ipif 1431 * is changing. But we need to make sure that the ill will 1432 * not vanish. So we just bump up the ill_waiter count. 1433 */ 1434 if (!ill_waiter_inc(ill)) 1435 continue; 1436 rw_exit(&ill_g_lock); 1437 success = ipsq_enter(ill, B_TRUE); 1438 if (success) { 1439 next = igmp_timeout_handler_per_ill(ill, elapsed); 1440 if (next < global_next) 1441 global_next = next; 1442 ipsq_exit(ill->ill_phyint->phyint_ipsq, B_FALSE, 1443 B_TRUE); 1444 } 1445 rw_enter(&ill_g_lock, RW_READER); 1446 ill_waiter_dcr(ill); 1447 } 1448 rw_exit(&ill_g_lock); 1449 1450 mutex_enter(&igmp_timer_lock); 1451 ASSERT(igmp_timeout_id != 0); 1452 igmp_timeout_id = 0; 1453 mutex_exit(&igmp_timer_lock); 1454 1455 if (global_next != INFINITY) 1456 igmp_start_timers(global_next); 1457 } 1458 1459 /* 1460 * mld_timeout_handler: 1461 * Called when there are timeout events, every next (tick). 1462 * Returns number of ticks to next event (or 0 if none). 1463 */ 1464 /* ARGSUSED */ 1465 uint_t 1466 mld_timeout_handler_per_ill(ill_t *ill, int elapsed) 1467 { 1468 ilm_t *ilm; 1469 uint_t next = INFINITY; 1470 mrec_t *rp, *rtxrp; 1471 rtx_state_t *rtxp; 1472 mcast_record_t rtype; 1473 1474 ASSERT(IAM_WRITER_ILL(ill)); 1475 1476 mutex_enter(&ill->ill_lock); 1477 1478 /* 1479 * First check the global timer on this interface; the global timer 1480 * is not used for MLDv1, so if it's set we can assume we're v2. 1481 */ 1482 if (ill->ill_global_timer == INFINITY) 1483 goto per_ilm_timer; 1484 if (ill->ill_global_timer <= elapsed) { 1485 ill->ill_global_timer = INFINITY; 1486 /* 1487 * Send report for each group on this interface. 1488 * Since we just set the global timer (received a v2 general 1489 * query), need to skip the all hosts addr (ff02::1), per 1490 * RFC 3810 section 6. 1491 */ 1492 rp = NULL; 1493 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1494 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, 1495 &ipv6_all_hosts_mcast)) 1496 continue; 1497 rp = mcast_bldmrec(ilm->ilm_fmode, &ilm->ilm_v6addr, 1498 ilm->ilm_filter, rp); 1499 /* 1500 * Since we're sending a report on this group, okay 1501 * to delete pending group-specific timers. Note 1502 * that group-specific retransmit timers still need 1503 * to be checked in the per_ilm_timer for-loop. 1504 */ 1505 ilm->ilm_timer = INFINITY; 1506 ilm->ilm_state = IGMP_IREPORTEDLAST; 1507 FREE_SLIST(ilm->ilm_pendsrcs); 1508 ilm->ilm_pendsrcs = NULL; 1509 } 1510 mutex_exit(&ill->ill_lock); 1511 mldv2_sendrpt(ill, rp); 1512 mutex_enter(&ill->ill_lock); 1513 } else { 1514 ill->ill_global_timer -= elapsed; 1515 if (ill->ill_global_timer < next) 1516 next = ill->ill_global_timer; 1517 } 1518 1519 per_ilm_timer: 1520 rp = rtxrp = NULL; 1521 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1522 if (ilm->ilm_timer == INFINITY) 1523 goto per_ilm_rtxtimer; 1524 1525 if (ilm->ilm_timer > elapsed) { 1526 ilm->ilm_timer -= elapsed; 1527 if (ilm->ilm_timer < next) 1528 next = ilm->ilm_timer; 1529 1530 if (ip_debug > 1) { 1531 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 1532 "igmp_timo_hlr 2: ilm_timr" 1533 " %d elap %d typ %d nxt %d", 1534 (int)ntohl(ilm->ilm_timer), elapsed, 1535 (ill->ill_mcast_type), next); 1536 } 1537 1538 goto per_ilm_rtxtimer; 1539 } 1540 1541 /* the timer has expired, need to take action */ 1542 ilm->ilm_timer = INFINITY; 1543 ilm->ilm_state = IGMP_IREPORTEDLAST; 1544 if (ill->ill_mcast_type == MLD_V1_ROUTER) { 1545 mutex_exit(&ill->ill_lock); 1546 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL); 1547 mutex_enter(&ill->ill_lock); 1548 } else { 1549 slist_t *rsp; 1550 if (!SLIST_IS_EMPTY(ilm->ilm_pendsrcs) && 1551 (rsp = l_alloc()) != NULL) { 1552 /* 1553 * Contents of reply depend on pending 1554 * requested source list. 1555 */ 1556 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1557 l_intersection(ilm->ilm_filter, 1558 ilm->ilm_pendsrcs, rsp); 1559 } else { 1560 l_difference(ilm->ilm_pendsrcs, 1561 ilm->ilm_filter, rsp); 1562 } 1563 FREE_SLIST(ilm->ilm_pendsrcs); 1564 ilm->ilm_pendsrcs = NULL; 1565 if (!SLIST_IS_EMPTY(rsp)) 1566 rp = mcast_bldmrec(MODE_IS_INCLUDE, 1567 &ilm->ilm_v6addr, rsp, rp); 1568 FREE_SLIST(rsp); 1569 } else { 1570 rp = mcast_bldmrec(ilm->ilm_fmode, 1571 &ilm->ilm_v6addr, ilm->ilm_filter, rp); 1572 } 1573 } 1574 1575 if (ip_debug > 1) { 1576 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 1577 "igmp_timo_hlr 1: ilm_timr %d elap %d " 1578 "typ %d nxt %d", 1579 (int)ntohl(ilm->ilm_timer), elapsed, 1580 (ill->ill_mcast_type), next); 1581 } 1582 1583 per_ilm_rtxtimer: 1584 rtxp = &ilm->ilm_rtx; 1585 1586 if (rtxp->rtx_timer == INFINITY) 1587 continue; 1588 if (rtxp->rtx_timer > elapsed) { 1589 rtxp->rtx_timer -= elapsed; 1590 if (rtxp->rtx_timer < next) 1591 next = rtxp->rtx_timer; 1592 continue; 1593 } 1594 1595 rtxp->rtx_timer = INFINITY; 1596 ilm->ilm_state = IGMP_IREPORTEDLAST; 1597 if (ill->ill_mcast_type == MLD_V1_ROUTER) { 1598 mutex_exit(&ill->ill_lock); 1599 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL); 1600 mutex_enter(&ill->ill_lock); 1601 continue; 1602 } 1603 1604 /* 1605 * The retransmit timer has popped, and our router is 1606 * MLDv2. We have to delve into the retransmit state 1607 * stored in the ilm. 1608 * 1609 * Decrement the retransmit count. If the fmode rtx 1610 * count is active, decrement it, and send a filter 1611 * mode change report with the ilm's source list. 1612 * Otherwise, send a source list change report with 1613 * the current retransmit lists. 1614 */ 1615 ASSERT(rtxp->rtx_cnt > 0); 1616 ASSERT(rtxp->rtx_cnt >= rtxp->rtx_fmode_cnt); 1617 rtxp->rtx_cnt--; 1618 if (rtxp->rtx_fmode_cnt > 0) { 1619 rtxp->rtx_fmode_cnt--; 1620 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ? 1621 CHANGE_TO_INCLUDE : CHANGE_TO_EXCLUDE; 1622 rtxrp = mcast_bldmrec(rtype, &ilm->ilm_v6addr, 1623 ilm->ilm_filter, rtxrp); 1624 } else { 1625 rtxrp = mcast_bldmrec(ALLOW_NEW_SOURCES, 1626 &ilm->ilm_v6addr, rtxp->rtx_allow, rtxrp); 1627 rtxrp = mcast_bldmrec(BLOCK_OLD_SOURCES, 1628 &ilm->ilm_v6addr, rtxp->rtx_block, rtxrp); 1629 } 1630 if (rtxp->rtx_cnt > 0) { 1631 MCAST_RANDOM_DELAY(rtxp->rtx_timer, 1632 SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY)); 1633 if (rtxp->rtx_timer < next) 1634 next = rtxp->rtx_timer; 1635 } else { 1636 CLEAR_SLIST(rtxp->rtx_allow); 1637 CLEAR_SLIST(rtxp->rtx_block); 1638 } 1639 } 1640 1641 if (ill->ill_mcast_type == MLD_V2_ROUTER) { 1642 mutex_exit(&ill->ill_lock); 1643 mldv2_sendrpt(ill, rp); 1644 mldv2_sendrpt(ill, rtxrp); 1645 return (next); 1646 } 1647 1648 mutex_exit(&ill->ill_lock); 1649 1650 return (next); 1651 } 1652 1653 /* 1654 * mld_timeout_handler: 1655 * Called when there are timeout events, every next * TMEOUT_INTERVAL (tick). 1656 * Returns number of ticks to next event (or 0 if none). 1657 * MT issues are same as igmp_timeout_handler 1658 */ 1659 /* ARGSUSED */ 1660 void 1661 mld_timeout_handler(void *arg) 1662 { 1663 ill_t *ill; 1664 int elapsed; /* Since last call */ 1665 uint_t global_next = INFINITY; 1666 uint_t next; 1667 ill_walk_context_t ctx; 1668 boolean_t success; 1669 1670 mutex_enter(&mld_timer_lock); 1671 ASSERT(mld_timeout_id != 0); 1672 mld_timer_fired_last = ddi_get_lbolt(); 1673 elapsed = mld_time_to_next; 1674 mld_time_to_next = 0; 1675 mutex_exit(&mld_timer_lock); 1676 1677 rw_enter(&ill_g_lock, RW_READER); 1678 ill = ILL_START_WALK_V6(&ctx); 1679 for (; ill != NULL; ill = ill_next(&ctx, ill)) { 1680 ASSERT(ill->ill_isv6); 1681 /* 1682 * We may not be able to refhold the ill if the ill/ipif 1683 * is changing. But we need to make sure that the ill will 1684 * not vanish. So we just bump up the ill_waiter count. 1685 */ 1686 if (!ill_waiter_inc(ill)) 1687 continue; 1688 rw_exit(&ill_g_lock); 1689 success = ipsq_enter(ill, B_TRUE); 1690 if (success) { 1691 next = mld_timeout_handler_per_ill(ill, elapsed); 1692 if (next < global_next) 1693 global_next = next; 1694 ipsq_exit(ill->ill_phyint->phyint_ipsq, B_TRUE, 1695 B_FALSE); 1696 } 1697 rw_enter(&ill_g_lock, RW_READER); 1698 ill_waiter_dcr(ill); 1699 } 1700 rw_exit(&ill_g_lock); 1701 1702 mutex_enter(&mld_timer_lock); 1703 ASSERT(mld_timeout_id != 0); 1704 mld_timeout_id = 0; 1705 mutex_exit(&mld_timer_lock); 1706 1707 if (global_next != INFINITY) 1708 mld_start_timers(global_next); 1709 } 1710 1711 /* 1712 * Calculate the Older Version Querier Present timeout value, in number 1713 * of slowtimo intervals, for the given ill. 1714 */ 1715 #define OVQP(ill) \ 1716 ((1000 * (((ill)->ill_mcast_rv * (ill)->ill_mcast_qi) \ 1717 + MCAST_QUERY_RESP_INTERVAL)) / MCAST_SLOWTIMO_INTERVAL) 1718 1719 /* 1720 * igmp_slowtimo: 1721 * - Resets to new router if we didnt we hear from the router 1722 * in IGMP_AGE_THRESHOLD seconds. 1723 * - Resets slowtimeout. 1724 */ 1725 /* ARGSUSED */ 1726 void 1727 igmp_slowtimo(void *arg) 1728 { 1729 ill_t *ill; 1730 ill_if_t *ifp; 1731 avl_tree_t *avl_tree; 1732 1733 /* Hold the ill_g_lock so that we can safely walk the ill list */ 1734 rw_enter(&ill_g_lock, RW_READER); 1735 1736 /* 1737 * The ill_if_t list is circular, hence the odd loop parameters. 1738 * 1739 * We can't use the ILL_START_WALK and ill_next() wrappers for this 1740 * walk, as we need to check the illif_mcast_* fields in the ill_if_t 1741 * structure (allowing us to skip if none of the instances have timers 1742 * running). 1743 */ 1744 for (ifp = IP_V4_ILL_G_LIST; ifp != (ill_if_t *)&IP_V4_ILL_G_LIST; 1745 ifp = ifp->illif_next) { 1746 /* 1747 * illif_mcast_v[12] are set using atomics. If an ill hears 1748 * a V1 or V2 query now and we miss seeing the count now, 1749 * we will see it the next time igmp_slowtimo is called. 1750 */ 1751 if (ifp->illif_mcast_v1 == 0 && ifp->illif_mcast_v2 == 0) 1752 continue; 1753 1754 avl_tree = &ifp->illif_avl_by_ppa; 1755 for (ill = avl_first(avl_tree); ill != NULL; 1756 ill = avl_walk(avl_tree, ill, AVL_AFTER)) { 1757 mutex_enter(&ill->ill_lock); 1758 if (ill->ill_mcast_v1_tset == 1) 1759 ill->ill_mcast_v1_time++; 1760 if (ill->ill_mcast_v2_tset == 1) 1761 ill->ill_mcast_v2_time++; 1762 if (ill->ill_mcast_type == IGMP_V1_ROUTER) { 1763 if (ill->ill_mcast_v1_time >= OVQP(ill)) { 1764 if (ill->ill_mcast_v2_tset > 0) { 1765 ip1dbg(("V1 query timer " 1766 "expired on %s; switching " 1767 "mode to IGMP_V2\n", 1768 ill->ill_name)); 1769 ill->ill_mcast_type = 1770 IGMP_V2_ROUTER; 1771 } else { 1772 ip1dbg(("V1 query timer " 1773 "expired on %s; switching " 1774 "mode to IGMP_V3\n", 1775 ill->ill_name)); 1776 ill->ill_mcast_type = 1777 IGMP_V3_ROUTER; 1778 } 1779 ill->ill_mcast_v1_time = 0; 1780 ill->ill_mcast_v1_tset = 0; 1781 atomic_add_16(&ifp->illif_mcast_v1, -1); 1782 } 1783 } 1784 if (ill->ill_mcast_type == IGMP_V2_ROUTER) { 1785 if (ill->ill_mcast_v2_time >= OVQP(ill)) { 1786 ip1dbg(("V2 query timer expired on " 1787 "%s; switching mode to IGMP_V3\n", 1788 ill->ill_name)); 1789 ill->ill_mcast_type = IGMP_V3_ROUTER; 1790 ill->ill_mcast_v2_time = 0; 1791 ill->ill_mcast_v2_tset = 0; 1792 atomic_add_16(&ifp->illif_mcast_v2, -1); 1793 } 1794 } 1795 mutex_exit(&ill->ill_lock); 1796 } 1797 1798 } 1799 rw_exit(&ill_g_lock); 1800 mutex_enter(&igmp_slowtimeout_lock); 1801 igmp_slowtimeout_id = timeout(igmp_slowtimo, NULL, 1802 MSEC_TO_TICK(MCAST_SLOWTIMO_INTERVAL)); 1803 mutex_exit(&igmp_slowtimeout_lock); 1804 } 1805 1806 /* 1807 * mld_slowtimo: 1808 * - Resets to newer version if we didn't hear from the older version router 1809 * in MLD_AGE_THRESHOLD seconds. 1810 * - Restarts slowtimeout. 1811 */ 1812 /* ARGSUSED */ 1813 void 1814 mld_slowtimo(void *arg) 1815 { 1816 ill_t *ill; 1817 ill_if_t *ifp; 1818 avl_tree_t *avl_tree; 1819 1820 /* See comments in igmp_slowtimo() above... */ 1821 rw_enter(&ill_g_lock, RW_READER); 1822 for (ifp = IP_V6_ILL_G_LIST; ifp != (ill_if_t *)&IP_V6_ILL_G_LIST; 1823 ifp = ifp->illif_next) { 1824 1825 if (ifp->illif_mcast_v1 == 0) 1826 continue; 1827 1828 avl_tree = &ifp->illif_avl_by_ppa; 1829 for (ill = avl_first(avl_tree); ill != NULL; 1830 ill = avl_walk(avl_tree, ill, AVL_AFTER)) { 1831 mutex_enter(&ill->ill_lock); 1832 if (ill->ill_mcast_v1_tset == 1) 1833 ill->ill_mcast_v1_time++; 1834 if (ill->ill_mcast_type == MLD_V1_ROUTER) { 1835 if (ill->ill_mcast_v1_time >= OVQP(ill)) { 1836 ip1dbg(("MLD query timer expired on" 1837 " %s; switching mode to MLD_V2\n", 1838 ill->ill_name)); 1839 ill->ill_mcast_type = MLD_V2_ROUTER; 1840 ill->ill_mcast_v1_time = 0; 1841 ill->ill_mcast_v1_tset = 0; 1842 atomic_add_16(&ifp->illif_mcast_v1, -1); 1843 } 1844 } 1845 mutex_exit(&ill->ill_lock); 1846 } 1847 } 1848 rw_exit(&ill_g_lock); 1849 mutex_enter(&mld_slowtimeout_lock); 1850 mld_slowtimeout_id = timeout(mld_slowtimo, NULL, 1851 MSEC_TO_TICK(MCAST_SLOWTIMO_INTERVAL)); 1852 mutex_exit(&mld_slowtimeout_lock); 1853 } 1854 1855 /* 1856 * igmp_sendpkt: 1857 * This will send to ip_wput like icmp_inbound. 1858 * Note that the lower ill (on which the membership is kept) is used 1859 * as an upper ill to pass in the multicast parameters. 1860 */ 1861 static void 1862 igmp_sendpkt(ilm_t *ilm, uchar_t type, ipaddr_t addr) 1863 { 1864 mblk_t *mp; 1865 igmpa_t *igmpa; 1866 uint8_t *rtralert; 1867 ipha_t *ipha; 1868 int hdrlen = sizeof (ipha_t) + RTRALERT_LEN; 1869 size_t size = hdrlen + sizeof (igmpa_t); 1870 ipif_t *ipif = ilm->ilm_ipif; 1871 ill_t *ill = ipif->ipif_ill; /* Will be the "lower" ill */ 1872 mblk_t *first_mp; 1873 ipsec_out_t *io; 1874 1875 /* 1876 * We need to make sure this packet goes out on an ipif. If 1877 * there is some global policy match in ip_wput_ire, we need 1878 * to get to the right interface after IPSEC processing. 1879 * To make sure this multicast packet goes out on the right 1880 * interface, we attach an ipsec_out and initialize ill_index 1881 * like we did in ip_wput. To make sure that this packet does 1882 * not get forwarded on other interfaces or looped back, we 1883 * set ipsec_out_dontroute to B_TRUE and ipsec_out_multicast_loop 1884 * to B_FALSE. 1885 * 1886 * We also need to make sure that this does not get load balanced 1887 * if it hits ip_newroute_ipif. So, we initialize ipsec_out_attach_if 1888 * here. If it gets load balanced, switches supporting igmp snooping 1889 * will send the packet that it receives for this multicast group 1890 * to the interface that we are sending on. As we have joined the 1891 * multicast group on this ill, by sending the packet out on this 1892 * ill, we receive all the packets back on this ill. 1893 */ 1894 first_mp = allocb(sizeof (ipsec_info_t), BPRI_HI); 1895 if (first_mp == NULL) 1896 return; 1897 1898 first_mp->b_datap->db_type = M_CTL; 1899 first_mp->b_wptr += sizeof (ipsec_info_t); 1900 bzero(first_mp->b_rptr, sizeof (ipsec_info_t)); 1901 /* ipsec_out_secure is B_FALSE now */ 1902 io = (ipsec_out_t *)first_mp->b_rptr; 1903 io->ipsec_out_type = IPSEC_OUT; 1904 io->ipsec_out_len = sizeof (ipsec_out_t); 1905 io->ipsec_out_use_global_policy = B_TRUE; 1906 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 1907 io->ipsec_out_attach_if = B_TRUE; 1908 io->ipsec_out_multicast_loop = B_FALSE; 1909 io->ipsec_out_dontroute = B_TRUE; 1910 io->ipsec_out_zoneid = ilm->ilm_zoneid; 1911 1912 mp = allocb(size, BPRI_HI); 1913 if (mp == NULL) { 1914 freemsg(first_mp); 1915 return; 1916 } 1917 mp->b_wptr = mp->b_rptr + size; 1918 first_mp->b_cont = mp; 1919 1920 ipha = (ipha_t *)mp->b_rptr; 1921 rtralert = (uint8_t *)&(ipha[1]); 1922 igmpa = (igmpa_t *)&(rtralert[RTRALERT_LEN]); 1923 igmpa->igmpa_type = type; 1924 igmpa->igmpa_code = 0; 1925 igmpa->igmpa_group = ilm->ilm_addr; 1926 igmpa->igmpa_cksum = 0; 1927 igmpa->igmpa_cksum = IP_CSUM(mp, hdrlen, 0); 1928 1929 rtralert[0] = IPOPT_COPY & IPOPT_RTRALERT; 1930 rtralert[1] = RTRALERT_LEN; 1931 rtralert[2] = 0; 1932 rtralert[3] = 0; 1933 1934 ipha->ipha_version_and_hdr_length = (IP_VERSION << 4) 1935 | (IP_SIMPLE_HDR_LENGTH_IN_WORDS + RTRALERT_LEN_IN_WORDS); 1936 ipha->ipha_type_of_service = 0; 1937 ipha->ipha_length = htons(size); 1938 ipha->ipha_ident = 0; 1939 ipha->ipha_fragment_offset_and_flags = 0; 1940 ipha->ipha_ttl = IGMP_TTL; 1941 ipha->ipha_protocol = IPPROTO_IGMP; 1942 ipha->ipha_hdr_checksum = 0; 1943 ipha->ipha_dst = addr ? addr : igmpa->igmpa_group; 1944 ipha->ipha_src = ipif->ipif_src_addr; 1945 /* 1946 * Request loopback of the report if we are acting as a multicast 1947 * router, so that the process-level routing demon can hear it. 1948 */ 1949 /* 1950 * This will run multiple times for the same group if there are members 1951 * on the same group for multiple ipif's on the same ill. The 1952 * igmp_input code will suppress this due to the loopback thus we 1953 * always loopback membership report. 1954 */ 1955 ASSERT(ill->ill_rq != NULL); 1956 ip_multicast_loopback(ill->ill_rq, ill, first_mp, 0, ilm->ilm_zoneid); 1957 1958 ip_wput_multicast(ill->ill_wq, first_mp, ipif); 1959 1960 ++igmpstat.igps_snd_reports; 1961 } 1962 1963 /* 1964 * Sends an IGMP_V3_MEMBERSHIP_REPORT message out the ill associated 1965 * with the passed-in ipif. The report will contain one group record 1966 * for each element of reclist. If this causes packet length to 1967 * exceed ipif->ipif_ill->ill_max_frag, multiple reports are sent. 1968 * reclist is assumed to be made up of buffers allocated by mcast_bldmrec(), 1969 * and those buffers are freed here. 1970 */ 1971 static void 1972 igmpv3_sendrpt(ipif_t *ipif, mrec_t *reclist) 1973 { 1974 ipsec_out_t *io; 1975 igmp3ra_t *igmp3ra; 1976 grphdra_t *grphdr; 1977 mblk_t *first_mp, *mp; 1978 ipha_t *ipha; 1979 uint8_t *rtralert; 1980 ipaddr_t *src_array; 1981 int i, j, numrec, more_src_cnt; 1982 size_t hdrsize, size, rsize; 1983 ill_t *ill = ipif->ipif_ill; 1984 mrec_t *rp, *cur_reclist; 1985 mrec_t *next_reclist = reclist; 1986 boolean_t morepkts; 1987 1988 /* if there aren't any records, there's nothing to send */ 1989 if (reclist == NULL) 1990 return; 1991 1992 hdrsize = sizeof (ipha_t) + RTRALERT_LEN; 1993 nextpkt: 1994 size = hdrsize + sizeof (igmp3ra_t); 1995 morepkts = B_FALSE; 1996 more_src_cnt = 0; 1997 cur_reclist = next_reclist; 1998 numrec = 0; 1999 for (rp = cur_reclist; rp != NULL; rp = rp->mrec_next) { 2000 rsize = sizeof (grphdra_t) + 2001 (rp->mrec_srcs.sl_numsrc * sizeof (ipaddr_t)); 2002 if (size + rsize > ill->ill_max_frag) { 2003 if (rp == cur_reclist) { 2004 /* 2005 * If the first mrec we looked at is too big 2006 * to fit in a single packet (i.e the source 2007 * list is too big), we must either truncate 2008 * the list (if TO_EX or IS_EX), or send 2009 * multiple reports for the same group (all 2010 * other types). 2011 */ 2012 int srcspace, srcsperpkt; 2013 srcspace = ill->ill_max_frag - (size + 2014 sizeof (grphdra_t)); 2015 srcsperpkt = srcspace / sizeof (ipaddr_t); 2016 /* 2017 * Increment size and numrec, because we will 2018 * be sending a record for the mrec we're 2019 * looking at now. 2020 */ 2021 size += sizeof (grphdra_t) + 2022 (srcsperpkt * sizeof (ipaddr_t)); 2023 numrec++; 2024 if (rp->mrec_type == MODE_IS_EXCLUDE || 2025 rp->mrec_type == CHANGE_TO_EXCLUDE) { 2026 rp->mrec_srcs.sl_numsrc = srcsperpkt; 2027 if (rp->mrec_next == NULL) { 2028 /* no more packets to send */ 2029 break; 2030 } else { 2031 /* 2032 * more packets, but we're 2033 * done with this mrec. 2034 */ 2035 next_reclist = rp->mrec_next; 2036 } 2037 } else { 2038 more_src_cnt = rp->mrec_srcs.sl_numsrc 2039 - srcsperpkt; 2040 rp->mrec_srcs.sl_numsrc = srcsperpkt; 2041 /* 2042 * We'll fix up this mrec (remove the 2043 * srcs we've already sent) before 2044 * returning to nextpkt above. 2045 */ 2046 next_reclist = rp; 2047 } 2048 } else { 2049 next_reclist = rp; 2050 } 2051 morepkts = B_TRUE; 2052 break; 2053 } 2054 size += rsize; 2055 numrec++; 2056 } 2057 2058 /* 2059 * See comments in igmp_sendpkt() about initializing for ipsec and 2060 * load balancing requirements. 2061 */ 2062 first_mp = allocb(sizeof (ipsec_info_t), BPRI_HI); 2063 if (first_mp == NULL) 2064 goto free_reclist; 2065 2066 first_mp->b_datap->db_type = M_CTL; 2067 first_mp->b_wptr += sizeof (ipsec_info_t); 2068 bzero(first_mp->b_rptr, sizeof (ipsec_info_t)); 2069 /* ipsec_out_secure is B_FALSE now */ 2070 io = (ipsec_out_t *)first_mp->b_rptr; 2071 io->ipsec_out_type = IPSEC_OUT; 2072 io->ipsec_out_len = sizeof (ipsec_out_t); 2073 io->ipsec_out_use_global_policy = B_TRUE; 2074 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 2075 io->ipsec_out_attach_if = B_TRUE; 2076 io->ipsec_out_multicast_loop = B_FALSE; 2077 io->ipsec_out_dontroute = B_TRUE; 2078 io->ipsec_out_zoneid = ipif->ipif_zoneid; 2079 2080 mp = allocb(size, BPRI_HI); 2081 if (mp == NULL) { 2082 freemsg(first_mp); 2083 goto free_reclist; 2084 } 2085 bzero((char *)mp->b_rptr, size); 2086 mp->b_wptr = (uchar_t *)(mp->b_rptr + size); 2087 first_mp->b_cont = mp; 2088 2089 ipha = (ipha_t *)mp->b_rptr; 2090 rtralert = (uint8_t *)&(ipha[1]); 2091 igmp3ra = (igmp3ra_t *)&(rtralert[RTRALERT_LEN]); 2092 grphdr = (grphdra_t *)&(igmp3ra[1]); 2093 2094 rp = cur_reclist; 2095 for (i = 0; i < numrec; i++) { 2096 grphdr->grphdra_type = rp->mrec_type; 2097 grphdr->grphdra_numsrc = htons(rp->mrec_srcs.sl_numsrc); 2098 grphdr->grphdra_group = V4_PART_OF_V6(rp->mrec_group); 2099 src_array = (ipaddr_t *)&(grphdr[1]); 2100 2101 for (j = 0; j < rp->mrec_srcs.sl_numsrc; j++) 2102 src_array[j] = V4_PART_OF_V6(rp->mrec_srcs.sl_addr[j]); 2103 2104 grphdr = (grphdra_t *)&(src_array[j]); 2105 rp = rp->mrec_next; 2106 } 2107 2108 igmp3ra->igmp3ra_type = IGMP_V3_MEMBERSHIP_REPORT; 2109 igmp3ra->igmp3ra_numrec = htons(numrec); 2110 igmp3ra->igmp3ra_cksum = IP_CSUM(mp, hdrsize, 0); 2111 2112 rtralert[0] = IPOPT_COPY & IPOPT_RTRALERT; 2113 rtralert[1] = RTRALERT_LEN; 2114 rtralert[2] = 0; 2115 rtralert[3] = 0; 2116 2117 ipha->ipha_version_and_hdr_length = IP_VERSION << 4 2118 | (IP_SIMPLE_HDR_LENGTH_IN_WORDS + RTRALERT_LEN_IN_WORDS); 2119 ipha->ipha_type_of_service = IPTOS_PREC_INTERNETCONTROL; 2120 ipha->ipha_length = htons(size); 2121 ipha->ipha_ttl = IGMP_TTL; 2122 ipha->ipha_protocol = IPPROTO_IGMP; 2123 ipha->ipha_dst = htonl(INADDR_ALLRPTS_GROUP); 2124 ipha->ipha_src = ipif->ipif_src_addr; 2125 2126 /* 2127 * Request loopback of the report if we are acting as a multicast 2128 * router, so that the process-level routing daemon can hear it. 2129 * 2130 * This will run multiple times for the same group if there are 2131 * members on the same group for multiple ipifs on the same ill. 2132 * The igmp_input code will suppress this due to the loopback; 2133 * thus we always loopback membership report. 2134 */ 2135 ASSERT(ill->ill_rq != NULL); 2136 ip_multicast_loopback(ill->ill_rq, ill, mp, 0, ipif->ipif_zoneid); 2137 2138 ip_wput_multicast(ill->ill_wq, first_mp, ipif); 2139 2140 ++igmpstat.igps_snd_reports; 2141 2142 if (morepkts) { 2143 if (more_src_cnt > 0) { 2144 int index, mvsize; 2145 slist_t *sl = &next_reclist->mrec_srcs; 2146 index = sl->sl_numsrc; 2147 mvsize = more_src_cnt * sizeof (in6_addr_t); 2148 (void) memmove(&sl->sl_addr[0], &sl->sl_addr[index], 2149 mvsize); 2150 sl->sl_numsrc = more_src_cnt; 2151 } 2152 goto nextpkt; 2153 } 2154 2155 free_reclist: 2156 while (reclist != NULL) { 2157 rp = reclist->mrec_next; 2158 mi_free(reclist); 2159 reclist = rp; 2160 } 2161 } 2162 2163 /* 2164 * mld_input: 2165 */ 2166 /* ARGSUSED */ 2167 void 2168 mld_input(queue_t *q, mblk_t *mp, ill_t *ill) 2169 { 2170 ip6_t *ip6h = (ip6_t *)(mp->b_rptr); 2171 mld_hdr_t *mldh; 2172 ilm_t *ilm; 2173 ipif_t *ipif; 2174 uint16_t hdr_length, exthdr_length; 2175 in6_addr_t *v6group_ptr, *lcladdr_ptr; 2176 uint_t next; 2177 int mldlen; 2178 2179 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembTotal); 2180 2181 /* Make sure the src address of the packet is link-local */ 2182 if (!(IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src))) { 2183 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 2184 freemsg(mp); 2185 return; 2186 } 2187 2188 if (ip6h->ip6_hlim != 1) { 2189 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpBadHoplimit); 2190 freemsg(mp); 2191 return; 2192 } 2193 2194 /* Get to the icmp header part */ 2195 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) { 2196 hdr_length = ip_hdr_length_v6(mp, ip6h); 2197 exthdr_length = hdr_length - IPV6_HDR_LEN; 2198 } else { 2199 hdr_length = IPV6_HDR_LEN; 2200 exthdr_length = 0; 2201 } 2202 mldlen = ntohs(ip6h->ip6_plen) - exthdr_length; 2203 2204 /* An MLD packet must at least be 24 octets to be valid */ 2205 if (mldlen < MLD_MINLEN) { 2206 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 2207 freemsg(mp); 2208 return; 2209 } 2210 2211 mldh = (mld_hdr_t *)(&mp->b_rptr[hdr_length]); 2212 2213 switch (mldh->mld_type) { 2214 case MLD_LISTENER_QUERY: 2215 /* 2216 * packet length differentiates between v1 and v2. v1 2217 * query should be exactly 24 octets long; v2 is >= 28. 2218 */ 2219 if (mldlen == MLD_MINLEN) { 2220 next = mld_query_in(mldh, ill); 2221 } else if (mldlen >= MLD_V2_QUERY_MINLEN) { 2222 next = mldv2_query_in((mld2q_t *)mldh, ill, mldlen); 2223 } else { 2224 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 2225 freemsg(mp); 2226 return; 2227 } 2228 if (next == 0) { 2229 freemsg(mp); 2230 return; 2231 } 2232 2233 if (next != INFINITY) 2234 mld_start_timers(next); 2235 break; 2236 2237 case MLD_LISTENER_REPORT: { 2238 2239 ASSERT(ill->ill_ipif != NULL); 2240 /* 2241 * For fast leave to work, we have to know that we are the 2242 * last person to send a report for this group. Reports 2243 * generated by us are looped back since we could potentially 2244 * be a multicast router, so discard reports sourced by me. 2245 */ 2246 lcladdr_ptr = &(ill->ill_ipif->ipif_v6subnet); 2247 mutex_enter(&ill->ill_lock); 2248 for (ipif = ill->ill_ipif; ipif != NULL; 2249 ipif = ipif->ipif_next) { 2250 if (IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, 2251 lcladdr_ptr)) { 2252 if (ip_debug > 1) { 2253 char buf1[INET6_ADDRSTRLEN]; 2254 char buf2[INET6_ADDRSTRLEN]; 2255 2256 (void) mi_strlog(ill->ill_rq, 2257 1, 2258 SL_TRACE, 2259 "mld_input: we are only " 2260 "member src %s ipif_local %s", 2261 inet_ntop(AF_INET6, lcladdr_ptr, 2262 buf1, sizeof (buf1)), 2263 inet_ntop(AF_INET6, 2264 &ipif->ipif_v6lcl_addr, 2265 buf2, sizeof (buf2))); 2266 } 2267 mutex_exit(&ill->ill_lock); 2268 freemsg(mp); 2269 return; 2270 } 2271 } 2272 mutex_exit(&ill->ill_lock); 2273 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembResponses); 2274 2275 v6group_ptr = &mldh->mld_addr; 2276 if (!IN6_IS_ADDR_MULTICAST(v6group_ptr)) { 2277 BUMP_MIB(ill->ill_icmp6_mib, 2278 ipv6IfIcmpInGroupMembBadReports); 2279 freemsg(mp); 2280 return; 2281 } 2282 2283 2284 /* 2285 * If we belong to the group being reported, and we are a 2286 * 'Delaying member' per the RFC terminology, stop our timer 2287 * for that group and 'clear flag' i.e. mark ilm_state as 2288 * IGMP_OTHERMEMBER. With zones, there can be multiple group 2289 * membership entries for the same group address (one per zone) 2290 * so we need to walk the ill_ilm list. 2291 */ 2292 mutex_enter(&ill->ill_lock); 2293 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 2294 if (!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group_ptr)) 2295 continue; 2296 BUMP_MIB(ill->ill_icmp6_mib, 2297 ipv6IfIcmpInGroupMembOurReports); 2298 2299 ilm->ilm_timer = INFINITY; 2300 ilm->ilm_state = IGMP_OTHERMEMBER; 2301 } 2302 mutex_exit(&ill->ill_lock); 2303 break; 2304 } 2305 case MLD_LISTENER_REDUCTION: 2306 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembReductions); 2307 break; 2308 } 2309 /* 2310 * All MLD packets have already been passed up to any 2311 * process(es) listening on a ICMP6 raw socket. This 2312 * has been accomplished in ip_deliver_local_v6 prior to 2313 * this function call. It is assumed that the multicast daemon 2314 * will have a SOCK_RAW IPPROTO_ICMPV6 (and presumbly use the 2315 * ICMP6_FILTER socket option to only receive the MLD messages) 2316 * Thus we can free the MLD message block here 2317 */ 2318 freemsg(mp); 2319 } 2320 2321 /* 2322 * Handles an MLDv1 Listener Query. Returns 0 on error, or the appropriate 2323 * (non-zero, unsigned) timer value to be set on success. 2324 */ 2325 static uint_t 2326 mld_query_in(mld_hdr_t *mldh, ill_t *ill) 2327 { 2328 ilm_t *ilm; 2329 int timer; 2330 uint_t next; 2331 in6_addr_t *v6group; 2332 2333 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembQueries); 2334 2335 /* 2336 * In the MLD specification, there are 3 states and a flag. 2337 * 2338 * In Non-Listener state, we simply don't have a membership record. 2339 * In Delaying state, our timer is running (ilm->ilm_timer < INFINITY) 2340 * In Idle Member state, our timer is not running (ilm->ilm_timer == 2341 * INFINITY) 2342 * 2343 * The flag is ilm->ilm_state, it is set to IGMP_OTHERMEMBER if 2344 * we have heard a report from another member, or IGMP_IREPORTEDLAST 2345 * if I sent the last report. 2346 */ 2347 v6group = &mldh->mld_addr; 2348 if (!(IN6_IS_ADDR_UNSPECIFIED(v6group)) && 2349 ((!IN6_IS_ADDR_MULTICAST(v6group)))) { 2350 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembBadQueries); 2351 return (0); 2352 } 2353 2354 /* Need to do compatibility mode checking */ 2355 mutex_enter(&ill->ill_lock); 2356 ill->ill_mcast_v1_time = 0; 2357 ill->ill_mcast_v1_tset = 1; 2358 if (ill->ill_mcast_type == MLD_V2_ROUTER) { 2359 ip1dbg(("Received MLDv1 Query on %s, switching mode to " 2360 "MLD_V1_ROUTER\n", ill->ill_name)); 2361 atomic_add_16(&ill->ill_ifptr->illif_mcast_v1, 1); 2362 ill->ill_mcast_type = MLD_V1_ROUTER; 2363 } 2364 mutex_exit(&ill->ill_lock); 2365 2366 timer = (int)ntohs(mldh->mld_maxdelay); 2367 if (ip_debug > 1) { 2368 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 2369 "mld_input: TIMER = mld_maxdelay %d mld_type 0x%x", 2370 timer, (int)mldh->mld_type); 2371 } 2372 2373 /* 2374 * -Start the timers in all of our membership records for 2375 * the physical interface on which the query arrived, 2376 * excl: 2377 * 1. those that belong to the "all hosts" group, 2378 * 2. those with 0 scope, or 1 node-local scope. 2379 * 2380 * -Restart any timer that is already running but has a value 2381 * longer that the requested timeout. 2382 * -Use the value specified in the query message as the 2383 * maximum timeout. 2384 */ 2385 next = INFINITY; 2386 mutex_enter(&ill->ill_lock); 2387 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 2388 ASSERT(!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr)); 2389 2390 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr) || 2391 IN6_IS_ADDR_MC_NODELOCAL(&ilm->ilm_v6addr) || 2392 IN6_IS_ADDR_MC_RESERVED(&ilm->ilm_v6addr)) 2393 continue; 2394 if ((!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, 2395 &ipv6_all_hosts_mcast)) && 2396 (IN6_IS_ADDR_UNSPECIFIED(v6group)) || 2397 (IN6_ARE_ADDR_EQUAL(v6group, &ilm->ilm_v6addr))) { 2398 if (timer == 0) { 2399 /* Respond immediately */ 2400 ilm->ilm_timer = INFINITY; 2401 ilm->ilm_state = IGMP_IREPORTEDLAST; 2402 mutex_exit(&ill->ill_lock); 2403 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL); 2404 mutex_enter(&ill->ill_lock); 2405 break; 2406 } 2407 if (ilm->ilm_timer > timer) { 2408 MCAST_RANDOM_DELAY(ilm->ilm_timer, timer); 2409 if (ilm->ilm_timer < next) 2410 next = ilm->ilm_timer; 2411 } 2412 break; 2413 } 2414 } 2415 mutex_exit(&ill->ill_lock); 2416 2417 return (next); 2418 } 2419 2420 /* 2421 * Handles an MLDv2 Listener Query. On error, returns 0; on success, 2422 * returns the appropriate (non-zero, unsigned) timer value (which may 2423 * be INFINITY) to be set. 2424 */ 2425 static uint_t 2426 mldv2_query_in(mld2q_t *mld2q, ill_t *ill, int mldlen) 2427 { 2428 ilm_t *ilm; 2429 in6_addr_t *v6group, *src_array; 2430 uint_t next, numsrc, i, mrd, delay, qqi; 2431 uint8_t qrv; 2432 2433 v6group = &mld2q->mld2q_addr; 2434 numsrc = ntohs(mld2q->mld2q_numsrc); 2435 2436 /* make sure numsrc matches packet size */ 2437 if (mldlen < MLD_V2_QUERY_MINLEN + (numsrc * sizeof (in6_addr_t))) { 2438 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 2439 return (0); 2440 } 2441 src_array = (in6_addr_t *)&mld2q[1]; 2442 2443 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembQueries); 2444 2445 /* extract Maximum Response Delay from code in header */ 2446 mrd = ntohs(mld2q->mld2q_mxrc); 2447 if (mrd >= MLD_V2_MAXRT_FPMIN) { 2448 uint_t hdrval, mant, exp; 2449 hdrval = mrd; 2450 mant = hdrval & MLD_V2_MAXRT_MANT_MASK; 2451 exp = (hdrval & MLD_V2_MAXRT_EXP_MASK) >> 12; 2452 mrd = (mant | 0x1000) << (exp + 3); 2453 } 2454 MCAST_RANDOM_DELAY(delay, mrd); 2455 next = (unsigned)INFINITY; 2456 2457 if ((qrv = mld2q->mld2q_sqrv & MLD_V2_RV_MASK) == 0) 2458 ill->ill_mcast_rv = MCAST_DEF_ROBUSTNESS; 2459 else 2460 ill->ill_mcast_rv = qrv; 2461 2462 if ((qqi = (uint_t)mld2q->mld2q_qqic) >= MLD_V2_QQI_FPMIN) { 2463 uint_t mant, exp; 2464 mant = qqi & MLD_V2_QQI_MANT_MASK; 2465 exp = (qqi & MLD_V2_QQI_EXP_MASK) >> 12; 2466 qqi = (mant | 0x10) << (exp + 3); 2467 } 2468 ill->ill_mcast_qi = (qqi == 0) ? MCAST_DEF_QUERY_INTERVAL : qqi; 2469 2470 /* 2471 * If we have a pending general query response that's scheduled 2472 * sooner than the delay we calculated for this response, then 2473 * no action is required (MLDv2 draft section 6.2 rule 1) 2474 */ 2475 mutex_enter(&ill->ill_lock); 2476 if (ill->ill_global_timer < delay) { 2477 mutex_exit(&ill->ill_lock); 2478 return (next); 2479 } 2480 mutex_exit(&ill->ill_lock); 2481 2482 /* 2483 * Now take action depending on query type: general, 2484 * group specific, or group/source specific. 2485 */ 2486 if ((numsrc == 0) && IN6_IS_ADDR_UNSPECIFIED(v6group)) { 2487 /* 2488 * general query 2489 * We know global timer is either not running or is 2490 * greater than our calculated delay, so reset it to 2491 * our delay (random value in range [0, response time]) 2492 */ 2493 mutex_enter(&ill->ill_lock); 2494 ill->ill_global_timer = delay; 2495 next = ill->ill_global_timer; 2496 mutex_exit(&ill->ill_lock); 2497 2498 } else { 2499 /* group or group/source specific query */ 2500 mutex_enter(&ill->ill_lock); 2501 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 2502 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr) || 2503 IN6_IS_ADDR_MC_NODELOCAL(&ilm->ilm_v6addr) || 2504 IN6_IS_ADDR_MC_RESERVED(&ilm->ilm_v6addr) || 2505 !IN6_ARE_ADDR_EQUAL(v6group, &ilm->ilm_v6addr)) 2506 continue; 2507 2508 /* 2509 * If the query is group specific or we have a 2510 * pending group specific query, the response is 2511 * group specific (pending sources list should be 2512 * empty). Otherwise, need to update the pending 2513 * sources list for the group and source specific 2514 * response. 2515 */ 2516 if (numsrc == 0 || (ilm->ilm_timer < INFINITY && 2517 SLIST_IS_EMPTY(ilm->ilm_pendsrcs))) { 2518 group_query: 2519 FREE_SLIST(ilm->ilm_pendsrcs); 2520 ilm->ilm_pendsrcs = NULL; 2521 } else { 2522 boolean_t overflow; 2523 slist_t *pktl; 2524 if (numsrc > MAX_FILTER_SIZE || 2525 (ilm->ilm_pendsrcs == NULL && 2526 (ilm->ilm_pendsrcs = l_alloc()) == NULL)) { 2527 /* 2528 * We've been sent more sources than 2529 * we can deal with; or we can't deal 2530 * with a source list at all. Revert 2531 * to a group specific query. 2532 */ 2533 goto group_query; 2534 } 2535 if ((pktl = l_alloc()) == NULL) 2536 goto group_query; 2537 pktl->sl_numsrc = numsrc; 2538 for (i = 0; i < numsrc; i++) 2539 pktl->sl_addr[i] = src_array[i]; 2540 l_union_in_a(ilm->ilm_pendsrcs, pktl, 2541 &overflow); 2542 l_free(pktl); 2543 if (overflow) 2544 goto group_query; 2545 } 2546 /* set timer to soonest value */ 2547 ilm->ilm_timer = MIN(ilm->ilm_timer, delay); 2548 if (ilm->ilm_timer < next) 2549 next = ilm->ilm_timer; 2550 break; 2551 } 2552 mutex_exit(&ill->ill_lock); 2553 } 2554 2555 return (next); 2556 } 2557 2558 /* 2559 * Send MLDv1 response packet with hoplimit 1 2560 */ 2561 static void 2562 mld_sendpkt(ilm_t *ilm, uchar_t type, const in6_addr_t *v6addr) 2563 { 2564 mblk_t *mp; 2565 mld_hdr_t *mldh; 2566 ip6_t *ip6h; 2567 ip6_hbh_t *ip6hbh; 2568 struct ip6_opt_router *ip6router; 2569 size_t size = IPV6_HDR_LEN + sizeof (mld_hdr_t); 2570 ill_t *ill = ilm->ilm_ill; /* Will be the "lower" ill */ 2571 ipif_t *ipif; 2572 ip6i_t *ip6i; 2573 2574 /* 2575 * We need to place a router alert option in this packet. The length 2576 * of the options must be a multiple of 8. The hbh option header is 2 2577 * bytes followed by the 4 byte router alert option. That leaves 2578 * 2 bytes of pad for a total of 8 bytes. 2579 */ 2580 const int router_alert_length = 8; 2581 2582 ASSERT(ill->ill_isv6); 2583 2584 /* 2585 * We need to make sure that this packet does not get load balanced. 2586 * So, we allocate an ip6i_t and set ATTACH_IF. ip_wput_v6 and 2587 * ip_newroute_ipif_v6 knows how to handle such packets. 2588 * If it gets load balanced, switches supporting MLD snooping 2589 * (in the future) will send the packet that it receives for this 2590 * multicast group to the interface that we are sending on. As we have 2591 * joined the multicast group on this ill, by sending the packet out 2592 * on this ill, we receive all the packets back on this ill. 2593 */ 2594 size += sizeof (ip6i_t) + router_alert_length; 2595 mp = allocb(size, BPRI_HI); 2596 if (mp == NULL) 2597 return; 2598 bzero(mp->b_rptr, size); 2599 mp->b_wptr = mp->b_rptr + size; 2600 2601 ip6i = (ip6i_t *)mp->b_rptr; 2602 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2603 ip6i->ip6i_nxt = IPPROTO_RAW; 2604 ip6i->ip6i_flags = IP6I_ATTACH_IF | IP6I_HOPLIMIT; 2605 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 2606 2607 ip6h = (ip6_t *)&ip6i[1]; 2608 ip6hbh = (struct ip6_hbh *)&ip6h[1]; 2609 ip6router = (struct ip6_opt_router *)&ip6hbh[1]; 2610 /* 2611 * A zero is a pad option of length 1. The bzero of the whole packet 2612 * above will pad between ip6router and mld. 2613 */ 2614 mldh = (mld_hdr_t *)((uint8_t *)ip6hbh + router_alert_length); 2615 2616 mldh->mld_type = type; 2617 mldh->mld_addr = ilm->ilm_v6addr; 2618 2619 ip6router->ip6or_type = IP6OPT_ROUTER_ALERT; 2620 ip6router->ip6or_len = 2; 2621 ip6router->ip6or_value[0] = 0; 2622 ip6router->ip6or_value[1] = IP6_ALERT_MLD; 2623 2624 ip6hbh->ip6h_nxt = IPPROTO_ICMPV6; 2625 ip6hbh->ip6h_len = 0; 2626 2627 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2628 ip6h->ip6_plen = htons(sizeof (*mldh) + router_alert_length); 2629 ip6h->ip6_nxt = IPPROTO_HOPOPTS; 2630 ip6h->ip6_hops = MLD_HOP_LIMIT; 2631 if (v6addr == NULL) 2632 ip6h->ip6_dst = ilm->ilm_v6addr; 2633 else 2634 ip6h->ip6_dst = *v6addr; 2635 2636 /* ipif returned by ipif_lookup_zoneid is link-local (if present) */ 2637 if (ipif_lookup_zoneid(ill, ilm->ilm_zoneid, IPIF_UP, &ipif)) { 2638 ip6h->ip6_src = ipif->ipif_v6src_addr; 2639 ipif_refrele(ipif); 2640 } else { 2641 /* Otherwise, use IPv6 default address selection. */ 2642 ip6h->ip6_src = ipv6_all_zeros; 2643 } 2644 2645 /* 2646 * Prepare for checksum by putting icmp length in the icmp 2647 * checksum field. The checksum is calculated in ip_wput_v6. 2648 */ 2649 mldh->mld_cksum = htons(sizeof (*mldh)); 2650 2651 /* 2652 * ip_wput will automatically loopback the multicast packet to 2653 * the conn if multicast loopback is enabled. 2654 * The MIB stats corresponding to this outgoing MLD packet 2655 * will be accounted for in ip_wput->ip_wput_v6->ip_wput_ire_v6 2656 * ->icmp_update_out_mib_v6 function call. 2657 */ 2658 (void) ip_output_v6(NULL, mp, ill->ill_wq, IP_WPUT); 2659 } 2660 2661 /* 2662 * Sends an MLD_V2_LISTENER_REPORT message out the passed-in ill. The 2663 * report will contain one multicast address record for each element of 2664 * reclist. If this causes packet length to exceed ill->ill_max_frag, 2665 * multiple reports are sent. reclist is assumed to be made up of 2666 * buffers allocated by mcast_bldmrec(), and those buffers are freed here. 2667 */ 2668 static void 2669 mldv2_sendrpt(ill_t *ill, mrec_t *reclist) 2670 { 2671 mblk_t *mp; 2672 mld2r_t *mld2r; 2673 mld2mar_t *mld2mar; 2674 in6_addr_t *srcarray; 2675 ip6_t *ip6h; 2676 ip6_hbh_t *ip6hbh; 2677 ip6i_t *ip6i; 2678 struct ip6_opt_router *ip6router; 2679 size_t size, optlen, padlen, icmpsize, rsize; 2680 ipif_t *ipif; 2681 int i, numrec, more_src_cnt; 2682 mrec_t *rp, *cur_reclist; 2683 mrec_t *next_reclist = reclist; 2684 boolean_t morepkts; 2685 2686 /* If there aren't any records, there's nothing to send */ 2687 if (reclist == NULL) 2688 return; 2689 2690 ASSERT(ill->ill_isv6); 2691 2692 /* 2693 * Total option length (optlen + padlen) must be a multiple of 2694 * 8 bytes. We assume here that optlen <= 8, so the total option 2695 * length will be 8. Assert this in case anything ever changes. 2696 */ 2697 optlen = sizeof (ip6_hbh_t) + sizeof (struct ip6_opt_router); 2698 ASSERT(optlen <= 8); 2699 padlen = 8 - optlen; 2700 nextpkt: 2701 icmpsize = sizeof (mld2r_t); 2702 size = IPV6_HDR_LEN + optlen + padlen + icmpsize; 2703 morepkts = B_FALSE; 2704 more_src_cnt = 0; 2705 for (rp = cur_reclist = next_reclist, numrec = 0; rp != NULL; 2706 rp = rp->mrec_next, numrec++) { 2707 rsize = sizeof (mld2mar_t) + 2708 (rp->mrec_srcs.sl_numsrc * sizeof (in6_addr_t)); 2709 if (size + rsize > ill->ill_max_frag) { 2710 if (rp == cur_reclist) { 2711 /* 2712 * If the first mrec we looked at is too big 2713 * to fit in a single packet (i.e the source 2714 * list is too big), we must either truncate 2715 * the list (if TO_EX or IS_EX), or send 2716 * multiple reports for the same group (all 2717 * other types). 2718 */ 2719 int srcspace, srcsperpkt; 2720 srcspace = ill->ill_max_frag - 2721 (size + sizeof (mld2mar_t)); 2722 srcsperpkt = srcspace / sizeof (in6_addr_t); 2723 /* 2724 * Increment icmpsize and size, because we will 2725 * be sending a record for the mrec we're 2726 * looking at now. 2727 */ 2728 rsize = sizeof (mld2mar_t) + 2729 (srcsperpkt * sizeof (in6_addr_t)); 2730 icmpsize += rsize; 2731 size += rsize; 2732 if (rp->mrec_type == MODE_IS_EXCLUDE || 2733 rp->mrec_type == CHANGE_TO_EXCLUDE) { 2734 rp->mrec_srcs.sl_numsrc = srcsperpkt; 2735 if (rp->mrec_next == NULL) { 2736 /* no more packets to send */ 2737 break; 2738 } else { 2739 /* 2740 * more packets, but we're 2741 * done with this mrec. 2742 */ 2743 next_reclist = rp->mrec_next; 2744 } 2745 } else { 2746 more_src_cnt = rp->mrec_srcs.sl_numsrc 2747 - srcsperpkt; 2748 rp->mrec_srcs.sl_numsrc = srcsperpkt; 2749 /* 2750 * We'll fix up this mrec (remove the 2751 * srcs we've already sent) before 2752 * returning to nextpkt above. 2753 */ 2754 next_reclist = rp; 2755 } 2756 } else { 2757 next_reclist = rp; 2758 } 2759 morepkts = B_TRUE; 2760 break; 2761 } 2762 icmpsize += rsize; 2763 size += rsize; 2764 } 2765 2766 /* 2767 * We need to make sure that this packet does not get load balanced. 2768 * So, we allocate an ip6i_t and set ATTACH_IF. ip_wput_v6 and 2769 * ip_newroute_ipif_v6 know how to handle such packets. 2770 * If it gets load balanced, switches supporting MLD snooping 2771 * (in the future) will send the packet that it receives for this 2772 * multicast group to the interface that we are sending on. As we have 2773 * joined the multicast group on this ill, by sending the packet out 2774 * on this ill, we receive all the packets back on this ill. 2775 */ 2776 size += sizeof (ip6i_t); 2777 mp = allocb(size, BPRI_HI); 2778 if (mp == NULL) 2779 goto free_reclist; 2780 bzero(mp->b_rptr, size); 2781 mp->b_wptr = mp->b_rptr + size; 2782 2783 ip6i = (ip6i_t *)mp->b_rptr; 2784 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2785 ip6i->ip6i_nxt = IPPROTO_RAW; 2786 ip6i->ip6i_flags = IP6I_ATTACH_IF; 2787 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 2788 2789 ip6h = (ip6_t *)&(ip6i[1]); 2790 ip6hbh = (ip6_hbh_t *)&(ip6h[1]); 2791 ip6router = (struct ip6_opt_router *)&(ip6hbh[1]); 2792 mld2r = (mld2r_t *)((uint8_t *)ip6hbh + optlen + padlen); 2793 mld2mar = (mld2mar_t *)&(mld2r[1]); 2794 2795 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2796 ip6h->ip6_plen = htons(optlen + padlen + icmpsize); 2797 ip6h->ip6_nxt = IPPROTO_HOPOPTS; 2798 ip6h->ip6_hops = MLD_HOP_LIMIT; 2799 ip6h->ip6_dst = ipv6_all_v2rtrs_mcast; 2800 /* ipif returned by ipif_lookup_zoneid is link-local (if present) */ 2801 if (ipif_lookup_zoneid(ill, ALL_ZONES, IPIF_UP, &ipif)) { 2802 ip6h->ip6_src = ipif->ipif_v6src_addr; 2803 ipif_refrele(ipif); 2804 } else { 2805 /* otherwise, use IPv6 default address selection. */ 2806 ip6h->ip6_src = ipv6_all_zeros; 2807 } 2808 2809 ip6hbh->ip6h_nxt = IPPROTO_ICMPV6; 2810 /* 2811 * ip6h_len is the number of 8-byte words, not including the first 2812 * 8 bytes; we've assumed optlen + padlen == 8 bytes; hence len = 0. 2813 */ 2814 ip6hbh->ip6h_len = 0; 2815 2816 ip6router->ip6or_type = IP6OPT_ROUTER_ALERT; 2817 ip6router->ip6or_len = 2; 2818 ip6router->ip6or_value[0] = 0; 2819 ip6router->ip6or_value[1] = IP6_ALERT_MLD; 2820 2821 mld2r->mld2r_type = MLD_V2_LISTENER_REPORT; 2822 mld2r->mld2r_nummar = htons(numrec); 2823 /* 2824 * Prepare for the checksum by putting icmp length in the icmp 2825 * checksum field. The checksum is calculated in ip_wput_v6. 2826 */ 2827 mld2r->mld2r_cksum = htons(icmpsize); 2828 2829 for (rp = cur_reclist; rp != NULL; rp = rp->mrec_next) { 2830 mld2mar->mld2mar_type = rp->mrec_type; 2831 mld2mar->mld2mar_auxlen = 0; 2832 mld2mar->mld2mar_numsrc = htons(rp->mrec_srcs.sl_numsrc); 2833 mld2mar->mld2mar_group = rp->mrec_group; 2834 srcarray = (in6_addr_t *)&(mld2mar[1]); 2835 2836 for (i = 0; i < rp->mrec_srcs.sl_numsrc; i++) 2837 srcarray[i] = rp->mrec_srcs.sl_addr[i]; 2838 2839 mld2mar = (mld2mar_t *)&(srcarray[i]); 2840 } 2841 2842 /* 2843 * ip_wput will automatically loopback the multicast packet to 2844 * the conn if multicast loopback is enabled. 2845 * The MIB stats corresponding to this outgoing MLD packet 2846 * will be accounted for in ip_wput->ip_wput_v6->ip_wput_ire_v6 2847 * ->icmp_update_out_mib_v6 function call. 2848 */ 2849 (void) ip_output_v6(NULL, mp, ill->ill_wq, IP_WPUT); 2850 2851 if (morepkts) { 2852 if (more_src_cnt > 0) { 2853 int index, mvsize; 2854 slist_t *sl = &next_reclist->mrec_srcs; 2855 index = sl->sl_numsrc; 2856 mvsize = more_src_cnt * sizeof (in6_addr_t); 2857 (void) memmove(&sl->sl_addr[0], &sl->sl_addr[index], 2858 mvsize); 2859 sl->sl_numsrc = more_src_cnt; 2860 } 2861 goto nextpkt; 2862 } 2863 2864 free_reclist: 2865 while (reclist != NULL) { 2866 rp = reclist->mrec_next; 2867 mi_free(reclist); 2868 reclist = rp; 2869 } 2870 } 2871 2872 static mrec_t * 2873 mcast_bldmrec(mcast_record_t type, in6_addr_t *grp, slist_t *srclist, 2874 mrec_t *next) 2875 { 2876 mrec_t *rp; 2877 int i; 2878 2879 if ((type == ALLOW_NEW_SOURCES || type == BLOCK_OLD_SOURCES) && 2880 SLIST_IS_EMPTY(srclist)) 2881 return (next); 2882 2883 rp = (mrec_t *)mi_alloc(sizeof (mrec_t), BPRI_HI); 2884 if (rp == NULL) 2885 return (next); 2886 2887 rp->mrec_next = next; 2888 rp->mrec_type = type; 2889 rp->mrec_auxlen = 0; 2890 rp->mrec_group = *grp; 2891 if (srclist == NULL) { 2892 rp->mrec_srcs.sl_numsrc = 0; 2893 } else { 2894 rp->mrec_srcs.sl_numsrc = srclist->sl_numsrc; 2895 for (i = 0; i < srclist->sl_numsrc; i++) 2896 rp->mrec_srcs.sl_addr[i] = srclist->sl_addr[i]; 2897 } 2898 2899 return (rp); 2900 } 2901 2902 /* 2903 * Set up initial retransmit state. If memory cannot be allocated for 2904 * the source lists, simply create as much state as is possible; memory 2905 * allocation failures are considered one type of transient error that 2906 * the retransmissions are designed to overcome (and if they aren't 2907 * transient, there are bigger problems than failing to notify the 2908 * router about multicast group membership state changes). 2909 */ 2910 static void 2911 mcast_init_rtx(ill_t *ill, rtx_state_t *rtxp, mcast_record_t rtype, 2912 slist_t *flist) 2913 { 2914 /* 2915 * There are only three possibilities for rtype: 2916 * New join, transition from INCLUDE {} to INCLUDE {flist} 2917 * => rtype is ALLOW_NEW_SOURCES 2918 * New join, transition from INCLUDE {} to EXCLUDE {flist} 2919 * => rtype is CHANGE_TO_EXCLUDE 2920 * State change that involves a filter mode change 2921 * => rtype is either CHANGE_TO_INCLUDE or CHANGE_TO_EXCLUDE 2922 */ 2923 ASSERT(rtype == CHANGE_TO_EXCLUDE || rtype == CHANGE_TO_INCLUDE || 2924 rtype == ALLOW_NEW_SOURCES); 2925 2926 rtxp->rtx_cnt = ill->ill_mcast_rv; 2927 2928 switch (rtype) { 2929 case CHANGE_TO_EXCLUDE: 2930 rtxp->rtx_fmode_cnt = ill->ill_mcast_rv; 2931 CLEAR_SLIST(rtxp->rtx_allow); 2932 COPY_SLIST(flist, rtxp->rtx_block); 2933 break; 2934 case ALLOW_NEW_SOURCES: 2935 case CHANGE_TO_INCLUDE: 2936 rtxp->rtx_fmode_cnt = 2937 rtype == ALLOW_NEW_SOURCES ? 0 : ill->ill_mcast_rv; 2938 CLEAR_SLIST(rtxp->rtx_block); 2939 COPY_SLIST(flist, rtxp->rtx_allow); 2940 break; 2941 } 2942 } 2943 2944 /* 2945 * The basic strategy here, as extrapolated from RFC 3810 section 6.1 and 2946 * RFC 3376 section 5.1, covers three cases: 2947 * * The current state change is a filter mode change 2948 * Set filter mode retransmit counter; set retransmit allow or 2949 * block list to new source list as appropriate, and clear the 2950 * retransmit list that was not set; send TO_IN or TO_EX with 2951 * new source list. 2952 * * The current state change is a source list change, but the filter 2953 * mode retransmit counter is > 0 2954 * Decrement filter mode retransmit counter; set retransmit 2955 * allow or block list to new source list as appropriate, 2956 * and clear the retransmit list that was not set; send TO_IN 2957 * or TO_EX with new source list. 2958 * * The current state change is a source list change, and the filter 2959 * mode retransmit counter is 0. 2960 * Merge existing rtx allow and block lists with new state: 2961 * rtx_allow = (new allow + rtx_allow) - new block 2962 * rtx_block = (new block + rtx_block) - new allow 2963 * Send ALLOW and BLOCK records for new retransmit lists; 2964 * decrement retransmit counter. 2965 * 2966 * As is the case for mcast_init_rtx(), memory allocation failures are 2967 * acceptable; we just create as much state as we can. 2968 */ 2969 static mrec_t * 2970 mcast_merge_rtx(ilm_t *ilm, mrec_t *mreclist, slist_t *flist) 2971 { 2972 ill_t *ill; 2973 rtx_state_t *rtxp = &ilm->ilm_rtx; 2974 mcast_record_t txtype; 2975 mrec_t *rp, *rpnext, *rtnmrec; 2976 boolean_t ovf; 2977 2978 ill = (ilm->ilm_ill == NULL ? ilm->ilm_ipif->ipif_ill : ilm->ilm_ill); 2979 2980 if (mreclist == NULL) 2981 return (mreclist); 2982 2983 /* 2984 * A filter mode change is indicated by a single mrec, which is 2985 * either TO_IN or TO_EX. In this case, we just need to set new 2986 * retransmit state as if this were an initial join. There is 2987 * no change to the mrec list. 2988 */ 2989 if (mreclist->mrec_type == CHANGE_TO_INCLUDE || 2990 mreclist->mrec_type == CHANGE_TO_EXCLUDE) { 2991 mcast_init_rtx(ill, rtxp, mreclist->mrec_type, 2992 &mreclist->mrec_srcs); 2993 return (mreclist); 2994 } 2995 2996 /* 2997 * Only the source list has changed 2998 */ 2999 rtxp->rtx_cnt = ill->ill_mcast_rv; 3000 if (rtxp->rtx_fmode_cnt > 0) { 3001 /* but we're still sending filter mode change reports */ 3002 rtxp->rtx_fmode_cnt--; 3003 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 3004 CLEAR_SLIST(rtxp->rtx_block); 3005 COPY_SLIST(flist, rtxp->rtx_allow); 3006 txtype = CHANGE_TO_INCLUDE; 3007 } else { 3008 CLEAR_SLIST(rtxp->rtx_allow); 3009 COPY_SLIST(flist, rtxp->rtx_block); 3010 txtype = CHANGE_TO_EXCLUDE; 3011 } 3012 /* overwrite first mrec with new info */ 3013 mreclist->mrec_type = txtype; 3014 l_copy(flist, &mreclist->mrec_srcs); 3015 /* then free any remaining mrecs */ 3016 for (rp = mreclist->mrec_next; rp != NULL; rp = rpnext) { 3017 rpnext = rp->mrec_next; 3018 mi_free(rp); 3019 } 3020 mreclist->mrec_next = NULL; 3021 rtnmrec = mreclist; 3022 } else { 3023 mrec_t *allow_mrec, *block_mrec; 3024 /* 3025 * Just send the source change reports; but we need to 3026 * recalculate the ALLOW and BLOCK lists based on previous 3027 * state and new changes. 3028 */ 3029 rtnmrec = mreclist; 3030 allow_mrec = block_mrec = NULL; 3031 for (rp = mreclist; rp != NULL; rp = rp->mrec_next) { 3032 ASSERT(rp->mrec_type == ALLOW_NEW_SOURCES || 3033 rp->mrec_type == BLOCK_OLD_SOURCES); 3034 if (rp->mrec_type == ALLOW_NEW_SOURCES) 3035 allow_mrec = rp; 3036 else 3037 block_mrec = rp; 3038 } 3039 /* 3040 * Perform calculations: 3041 * new_allow = mrec_allow + (rtx_allow - mrec_block) 3042 * new_block = mrec_block + (rtx_block - mrec_allow) 3043 * 3044 * Each calc requires two steps, for example: 3045 * rtx_allow = rtx_allow - mrec_block; 3046 * new_allow = mrec_allow + rtx_allow; 3047 * 3048 * Store results in mrec lists, and then copy into rtx lists. 3049 * We do it in this order in case the rtx list hasn't been 3050 * alloc'd yet; if it hasn't and our alloc fails, that's okay, 3051 * Overflows are also okay. 3052 */ 3053 if (block_mrec != NULL) { 3054 l_difference_in_a(rtxp->rtx_allow, 3055 &block_mrec->mrec_srcs); 3056 } 3057 if (allow_mrec != NULL) { 3058 l_difference_in_a(rtxp->rtx_block, 3059 &allow_mrec->mrec_srcs); 3060 l_union_in_a(&allow_mrec->mrec_srcs, rtxp->rtx_allow, 3061 &ovf); 3062 } 3063 if (block_mrec != NULL) { 3064 l_union_in_a(&block_mrec->mrec_srcs, rtxp->rtx_block, 3065 &ovf); 3066 COPY_SLIST(&block_mrec->mrec_srcs, rtxp->rtx_block); 3067 } else { 3068 rtnmrec = mcast_bldmrec(BLOCK_OLD_SOURCES, 3069 &ilm->ilm_v6addr, rtxp->rtx_block, allow_mrec); 3070 } 3071 if (allow_mrec != NULL) { 3072 COPY_SLIST(&allow_mrec->mrec_srcs, rtxp->rtx_allow); 3073 } else { 3074 rtnmrec = mcast_bldmrec(ALLOW_NEW_SOURCES, 3075 &ilm->ilm_v6addr, rtxp->rtx_allow, block_mrec); 3076 } 3077 } 3078 3079 return (rtnmrec); 3080 } 3081