1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * Internet Group Management Protocol (IGMP) routines. 31 * Multicast Listener Discovery Protocol (MLD) routines. 32 * 33 * Written by Steve Deering, Stanford, May 1988. 34 * Modified by Rosen Sharma, Stanford, Aug 1994. 35 * Modified by Bill Fenner, Xerox PARC, Feb. 1995. 36 * 37 * MULTICAST 3.5.1.1 38 */ 39 40 #include <sys/types.h> 41 #include <sys/stream.h> 42 #include <sys/stropts.h> 43 #include <sys/strlog.h> 44 #include <sys/strsun.h> 45 #include <sys/systm.h> 46 #include <sys/ddi.h> 47 #include <sys/sunddi.h> 48 #include <sys/cmn_err.h> 49 #include <sys/atomic.h> 50 #include <sys/zone.h> 51 52 #include <sys/param.h> 53 #include <sys/socket.h> 54 #include <inet/ipclassifier.h> 55 #include <net/if.h> 56 #include <net/route.h> 57 #include <netinet/in.h> 58 #include <netinet/igmp_var.h> 59 #include <netinet/ip6.h> 60 #include <netinet/icmp6.h> 61 62 #include <inet/common.h> 63 #include <inet/mi.h> 64 #include <inet/nd.h> 65 #include <inet/ip.h> 66 #include <inet/ip6.h> 67 #include <inet/ip_multi.h> 68 #include <inet/ip_listutils.h> 69 70 #include <netinet/igmp.h> 71 #include <inet/ip_if.h> 72 #include <net/pfkeyv2.h> 73 #include <inet/ipsec_info.h> 74 75 static uint_t igmp_query_in(ipha_t *ipha, igmpa_t *igmpa, ill_t *ill); 76 static uint_t igmpv3_query_in(igmp3qa_t *igmp3qa, ill_t *ill, int igmplen); 77 static uint_t mld_query_in(mld_hdr_t *mldh, ill_t *ill); 78 static uint_t mldv2_query_in(mld2q_t *mld2q, ill_t *ill, int mldlen); 79 static void igmp_sendpkt(ilm_t *ilm, uchar_t type, ipaddr_t addr); 80 static void mld_sendpkt(ilm_t *ilm, uchar_t type, const in6_addr_t *v6addr); 81 static void igmpv3_sendrpt(ipif_t *ipif, mrec_t *reclist); 82 static void mldv2_sendrpt(ill_t *ill, mrec_t *reclist); 83 static mrec_t *mcast_bldmrec(mcast_record_t type, in6_addr_t *grp, 84 slist_t *srclist, mrec_t *next); 85 static void mcast_init_rtx(ill_t *ill, rtx_state_t *rtxp, 86 mcast_record_t rtype, slist_t *flist); 87 static mrec_t *mcast_merge_rtx(ilm_t *ilm, mrec_t *rp, slist_t *flist); 88 89 90 /* 91 * Macros used to do timer len conversions. Timer values are always 92 * stored and passed to the timer functions as milliseconds; but the 93 * default values and values from the wire may not be. 94 * 95 * And yes, it's obscure, but decisecond is easier to abbreviate than 96 * "tenths of a second". 97 */ 98 #define DSEC_TO_MSEC(dsec) ((dsec) * 100) 99 #define SEC_TO_MSEC(sec) ((sec) * 1000) 100 101 /* 102 * The first multicast join will trigger the igmp timers / mld timers 103 * The unit for next is milliseconds. 104 */ 105 void 106 igmp_start_timers(unsigned next, ip_stack_t *ipst) 107 { 108 int time_left; 109 int ret; 110 111 ASSERT(next != 0 && next != INFINITY); 112 113 mutex_enter(&ipst->ips_igmp_timer_lock); 114 115 if (ipst->ips_igmp_timer_setter_active) { 116 /* 117 * Serialize timer setters, one at a time. If the 118 * timer is currently being set by someone, 119 * just record the next time when it has to be 120 * invoked and return. The current setter will 121 * take care. 122 */ 123 ipst->ips_igmp_time_to_next = 124 MIN(ipst->ips_igmp_time_to_next, next); 125 mutex_exit(&ipst->ips_igmp_timer_lock); 126 return; 127 } else { 128 ipst->ips_igmp_timer_setter_active = B_TRUE; 129 } 130 if (ipst->ips_igmp_timeout_id == 0) { 131 /* 132 * The timer is inactive. We need to start a timer 133 */ 134 ipst->ips_igmp_time_to_next = next; 135 ipst->ips_igmp_timeout_id = timeout(igmp_timeout_handler, 136 (void *)ipst, MSEC_TO_TICK(ipst->ips_igmp_time_to_next)); 137 ipst->ips_igmp_timer_setter_active = B_FALSE; 138 mutex_exit(&ipst->ips_igmp_timer_lock); 139 return; 140 } 141 142 /* 143 * The timer was scheduled sometime back for firing in 144 * 'igmp_time_to_next' ms and is active. We need to 145 * reschedule the timeout if the new 'next' will happen 146 * earlier than the currently scheduled timeout 147 */ 148 time_left = ipst->ips_igmp_timer_fired_last + 149 MSEC_TO_TICK(ipst->ips_igmp_time_to_next) - ddi_get_lbolt(); 150 if (time_left < MSEC_TO_TICK(next)) { 151 ipst->ips_igmp_timer_setter_active = B_FALSE; 152 mutex_exit(&ipst->ips_igmp_timer_lock); 153 return; 154 } 155 156 mutex_exit(&ipst->ips_igmp_timer_lock); 157 ret = untimeout(ipst->ips_igmp_timeout_id); 158 mutex_enter(&ipst->ips_igmp_timer_lock); 159 /* 160 * The timeout was cancelled, or the timeout handler 161 * completed, while we were blocked in the untimeout. 162 * No other thread could have set the timer meanwhile 163 * since we serialized all the timer setters. Thus 164 * no timer is currently active nor executing nor will 165 * any timer fire in the future. We start the timer now 166 * if needed. 167 */ 168 if (ret == -1) { 169 ASSERT(ipst->ips_igmp_timeout_id == 0); 170 } else { 171 ASSERT(ipst->ips_igmp_timeout_id != 0); 172 ipst->ips_igmp_timeout_id = 0; 173 } 174 if (ipst->ips_igmp_time_to_next != 0) { 175 ipst->ips_igmp_time_to_next = 176 MIN(ipst->ips_igmp_time_to_next, next); 177 ipst->ips_igmp_timeout_id = timeout(igmp_timeout_handler, 178 (void *)ipst, MSEC_TO_TICK(ipst->ips_igmp_time_to_next)); 179 } 180 ipst->ips_igmp_timer_setter_active = B_FALSE; 181 mutex_exit(&ipst->ips_igmp_timer_lock); 182 } 183 184 /* 185 * mld_start_timers: 186 * The unit for next is milliseconds. 187 */ 188 void 189 mld_start_timers(unsigned next, ip_stack_t *ipst) 190 { 191 int time_left; 192 int ret; 193 194 ASSERT(next != 0 && next != INFINITY); 195 196 mutex_enter(&ipst->ips_mld_timer_lock); 197 if (ipst->ips_mld_timer_setter_active) { 198 /* 199 * Serialize timer setters, one at a time. If the 200 * timer is currently being set by someone, 201 * just record the next time when it has to be 202 * invoked and return. The current setter will 203 * take care. 204 */ 205 ipst->ips_mld_time_to_next = 206 MIN(ipst->ips_mld_time_to_next, next); 207 mutex_exit(&ipst->ips_mld_timer_lock); 208 return; 209 } else { 210 ipst->ips_mld_timer_setter_active = B_TRUE; 211 } 212 if (ipst->ips_mld_timeout_id == 0) { 213 /* 214 * The timer is inactive. We need to start a timer 215 */ 216 ipst->ips_mld_time_to_next = next; 217 ipst->ips_mld_timeout_id = timeout(mld_timeout_handler, 218 (void *)ipst, MSEC_TO_TICK(ipst->ips_mld_time_to_next)); 219 ipst->ips_mld_timer_setter_active = B_FALSE; 220 mutex_exit(&ipst->ips_mld_timer_lock); 221 return; 222 } 223 224 /* 225 * The timer was scheduled sometime back for firing in 226 * 'igmp_time_to_next' ms and is active. We need to 227 * reschedule the timeout if the new 'next' will happen 228 * earlier than the currently scheduled timeout 229 */ 230 time_left = ipst->ips_mld_timer_fired_last + 231 MSEC_TO_TICK(ipst->ips_mld_time_to_next) - ddi_get_lbolt(); 232 if (time_left < MSEC_TO_TICK(next)) { 233 ipst->ips_mld_timer_setter_active = B_FALSE; 234 mutex_exit(&ipst->ips_mld_timer_lock); 235 return; 236 } 237 238 mutex_exit(&ipst->ips_mld_timer_lock); 239 ret = untimeout(ipst->ips_mld_timeout_id); 240 mutex_enter(&ipst->ips_mld_timer_lock); 241 /* 242 * The timeout was cancelled, or the timeout handler 243 * completed, while we were blocked in the untimeout. 244 * No other thread could have set the timer meanwhile 245 * since we serialized all the timer setters. Thus 246 * no timer is currently active nor executing nor will 247 * any timer fire in the future. We start the timer now 248 * if needed. 249 */ 250 if (ret == -1) { 251 ASSERT(ipst->ips_mld_timeout_id == 0); 252 } else { 253 ASSERT(ipst->ips_mld_timeout_id != 0); 254 ipst->ips_mld_timeout_id = 0; 255 } 256 if (ipst->ips_mld_time_to_next != 0) { 257 ipst->ips_mld_time_to_next = 258 MIN(ipst->ips_mld_time_to_next, next); 259 ipst->ips_mld_timeout_id = timeout(mld_timeout_handler, 260 (void *)ipst, MSEC_TO_TICK(ipst->ips_mld_time_to_next)); 261 } 262 ipst->ips_mld_timer_setter_active = B_FALSE; 263 mutex_exit(&ipst->ips_mld_timer_lock); 264 } 265 266 /* 267 * igmp_input: 268 * Return NULL for a bad packet that is discarded here. 269 * Return mp if the message is OK and should be handed to "raw" receivers. 270 * Callers of igmp_input() may need to reinitialize variables that were copied 271 * from the mblk as this calls pullupmsg(). 272 */ 273 /* ARGSUSED */ 274 mblk_t * 275 igmp_input(queue_t *q, mblk_t *mp, ill_t *ill) 276 { 277 igmpa_t *igmpa; 278 ipha_t *ipha = (ipha_t *)(mp->b_rptr); 279 int iphlen, igmplen, mblklen; 280 ilm_t *ilm; 281 uint32_t src, dst; 282 uint32_t group; 283 uint_t next; 284 ipif_t *ipif; 285 ip_stack_t *ipst; 286 287 ASSERT(ill != NULL); 288 ASSERT(!ill->ill_isv6); 289 ipst = ill->ill_ipst; 290 ++ipst->ips_igmpstat.igps_rcv_total; 291 292 mblklen = MBLKL(mp); 293 if (mblklen < 1 || mblklen < (iphlen = IPH_HDR_LENGTH(ipha))) { 294 ++ipst->ips_igmpstat.igps_rcv_tooshort; 295 goto bad_pkt; 296 } 297 igmplen = ntohs(ipha->ipha_length) - iphlen; 298 /* 299 * Since msg sizes are more variable with v3, just pullup the 300 * whole thing now. 301 */ 302 if (MBLKL(mp) < (igmplen + iphlen)) { 303 mblk_t *mp1; 304 if ((mp1 = msgpullup(mp, -1)) == NULL) { 305 ++ipst->ips_igmpstat.igps_rcv_tooshort; 306 goto bad_pkt; 307 } 308 freemsg(mp); 309 mp = mp1; 310 ipha = (ipha_t *)(mp->b_rptr); 311 } 312 313 /* 314 * Validate lengths 315 */ 316 if (igmplen < IGMP_MINLEN) { 317 ++ipst->ips_igmpstat.igps_rcv_tooshort; 318 goto bad_pkt; 319 } 320 /* 321 * Validate checksum 322 */ 323 if (IP_CSUM(mp, iphlen, 0)) { 324 ++ipst->ips_igmpstat.igps_rcv_badsum; 325 goto bad_pkt; 326 } 327 328 igmpa = (igmpa_t *)(&mp->b_rptr[iphlen]); 329 src = ipha->ipha_src; 330 dst = ipha->ipha_dst; 331 if (ip_debug > 1) 332 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 333 "igmp_input: src 0x%x, dst 0x%x on %s\n", 334 (int)ntohl(src), (int)ntohl(dst), 335 ill->ill_name); 336 337 switch (igmpa->igmpa_type) { 338 case IGMP_MEMBERSHIP_QUERY: 339 /* 340 * packet length differentiates between v1/v2 and v3 341 * v1/v2 should be exactly 8 octets long; v3 is >= 12 342 */ 343 if (igmplen == IGMP_MINLEN) { 344 next = igmp_query_in(ipha, igmpa, ill); 345 } else if (igmplen >= IGMP_V3_QUERY_MINLEN) { 346 next = igmpv3_query_in((igmp3qa_t *)igmpa, ill, 347 igmplen); 348 } else { 349 ++ipst->ips_igmpstat.igps_rcv_tooshort; 350 goto bad_pkt; 351 } 352 if (next == 0) 353 goto bad_pkt; 354 355 if (next != INFINITY) 356 igmp_start_timers(next, ipst); 357 358 break; 359 360 case IGMP_V1_MEMBERSHIP_REPORT: 361 case IGMP_V2_MEMBERSHIP_REPORT: 362 /* 363 * For fast leave to work, we have to know that we are the 364 * last person to send a report for this group. Reports 365 * generated by us are looped back since we could potentially 366 * be a multicast router, so discard reports sourced by me. 367 */ 368 mutex_enter(&ill->ill_lock); 369 for (ipif = ill->ill_ipif; ipif != NULL; 370 ipif = ipif->ipif_next) { 371 if (ipif->ipif_lcl_addr == src) { 372 if (ip_debug > 1) { 373 (void) mi_strlog(ill->ill_rq, 374 1, 375 SL_TRACE, 376 "igmp_input: we are only " 377 "member src 0x%x ipif_local 0x%x", 378 (int)ntohl(src), 379 (int) 380 ntohl(ipif->ipif_lcl_addr)); 381 } 382 mutex_exit(&ill->ill_lock); 383 return (mp); 384 } 385 } 386 mutex_exit(&ill->ill_lock); 387 388 ++ipst->ips_igmpstat.igps_rcv_reports; 389 group = igmpa->igmpa_group; 390 if (!CLASSD(group)) { 391 ++ipst->ips_igmpstat.igps_rcv_badreports; 392 goto bad_pkt; 393 } 394 395 /* 396 * KLUDGE: if the IP source address of the report has an 397 * unspecified (i.e., zero) subnet number, as is allowed for 398 * a booting host, replace it with the correct subnet number 399 * so that a process-level multicast routing demon can 400 * determine which subnet it arrived from. This is necessary 401 * to compensate for the lack of any way for a process to 402 * determine the arrival interface of an incoming packet. 403 * 404 * Requires that a copy of *this* message it passed up 405 * to the raw interface which is done by our caller. 406 */ 407 if ((src & htonl(0xFF000000U)) == 0) { /* Minimum net mask */ 408 /* Pick the first ipif on this ill */ 409 mutex_enter(&ill->ill_lock); 410 src = ill->ill_ipif->ipif_subnet; 411 mutex_exit(&ill->ill_lock); 412 ip1dbg(("igmp_input: changed src to 0x%x\n", 413 (int)ntohl(src))); 414 ipha->ipha_src = src; 415 } 416 417 /* 418 * If we belong to the group being reported, and 419 * we are a 'Delaying member' in the RFC terminology, 420 * stop our timer for that group and 'clear flag' i.e. 421 * mark as IGMP_OTHERMEMBER. Do this for all logical 422 * interfaces on the given physical interface. 423 */ 424 mutex_enter(&ill->ill_lock); 425 for (ipif = ill->ill_ipif; ipif != NULL; 426 ipif = ipif->ipif_next) { 427 ilm = ilm_lookup_ipif(ipif, group); 428 if (ilm != NULL) { 429 ++ipst->ips_igmpstat.igps_rcv_ourreports; 430 ilm->ilm_timer = INFINITY; 431 ilm->ilm_state = IGMP_OTHERMEMBER; 432 } 433 } /* for */ 434 mutex_exit(&ill->ill_lock); 435 break; 436 437 case IGMP_V3_MEMBERSHIP_REPORT: 438 /* 439 * Currently nothing to do here; IGMP router is not 440 * implemented in ip, and v3 hosts don't pay attention 441 * to membership reports. 442 */ 443 break; 444 } 445 /* 446 * Pass all valid IGMP packets up to any process(es) listening 447 * on a raw IGMP socket. Do not free the packet. 448 */ 449 return (mp); 450 451 bad_pkt: 452 freemsg(mp); 453 return (NULL); 454 } 455 456 static uint_t 457 igmp_query_in(ipha_t *ipha, igmpa_t *igmpa, ill_t *ill) 458 { 459 ilm_t *ilm; 460 int timer; 461 uint_t next; 462 ip_stack_t *ipst; 463 464 ipst = ill->ill_ipst; 465 ++ipst->ips_igmpstat.igps_rcv_queries; 466 467 /* 468 * In the IGMPv2 specification, there are 3 states and a flag. 469 * 470 * In Non-Member state, we simply don't have a membership record. 471 * In Delaying Member state, our timer is running (ilm->ilm_timer 472 * < INFINITY). In Idle Member state, our timer is not running 473 * (ilm->ilm_timer == INFINITY). 474 * 475 * The flag is ilm->ilm_state, it is set to IGMP_OTHERMEMBER if 476 * we have heard a report from another member, or IGMP_IREPORTEDLAST 477 * if I sent the last report. 478 */ 479 if (igmpa->igmpa_code == 0) { 480 /* 481 * Query from an old router. 482 * Remember that the querier on this interface is old, 483 * and set the timer to the value in RFC 1112. 484 */ 485 486 487 mutex_enter(&ill->ill_lock); 488 ill->ill_mcast_v1_time = 0; 489 ill->ill_mcast_v1_tset = 1; 490 if (ill->ill_mcast_type != IGMP_V1_ROUTER) { 491 ip1dbg(("Received IGMPv1 Query on %s, switching mode " 492 "to IGMP_V1_ROUTER\n", ill->ill_name)); 493 atomic_add_16(&ill->ill_ifptr->illif_mcast_v1, 1); 494 ill->ill_mcast_type = IGMP_V1_ROUTER; 495 } 496 mutex_exit(&ill->ill_lock); 497 498 timer = SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY); 499 500 if (ipha->ipha_dst != htonl(INADDR_ALLHOSTS_GROUP) || 501 igmpa->igmpa_group != 0) { 502 ++ipst->ips_igmpstat.igps_rcv_badqueries; 503 return (0); 504 } 505 506 } else { 507 in_addr_t group; 508 509 /* 510 * Query from a new router 511 * Simply do a validity check 512 */ 513 group = igmpa->igmpa_group; 514 if (group != 0 && (!CLASSD(group))) { 515 ++ipst->ips_igmpstat.igps_rcv_badqueries; 516 return (0); 517 } 518 519 /* 520 * Switch interface state to v2 on receipt of a v2 query 521 * ONLY IF current state is v3. Let things be if current 522 * state if v1 but do reset the v2-querier-present timer. 523 */ 524 mutex_enter(&ill->ill_lock); 525 if (ill->ill_mcast_type == IGMP_V3_ROUTER) { 526 ip1dbg(("Received IGMPv2 Query on %s, switching mode " 527 "to IGMP_V2_ROUTER", ill->ill_name)); 528 atomic_add_16(&ill->ill_ifptr->illif_mcast_v2, 1); 529 ill->ill_mcast_type = IGMP_V2_ROUTER; 530 } 531 ill->ill_mcast_v2_time = 0; 532 ill->ill_mcast_v2_tset = 1; 533 mutex_exit(&ill->ill_lock); 534 535 timer = DSEC_TO_MSEC((int)igmpa->igmpa_code); 536 } 537 538 if (ip_debug > 1) { 539 mutex_enter(&ill->ill_lock); 540 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 541 "igmp_input: TIMER = igmp_code %d igmp_type 0x%x", 542 (int)ntohs(igmpa->igmpa_code), 543 (int)ntohs(igmpa->igmpa_type)); 544 mutex_exit(&ill->ill_lock); 545 } 546 547 /* 548 * -Start the timers in all of our membership records 549 * for the physical interface on which the query 550 * arrived, excluding those that belong to the "all 551 * hosts" group (224.0.0.1). 552 * 553 * -Restart any timer that is already running but has 554 * a value longer than the requested timeout. 555 * 556 * -Use the value specified in the query message as 557 * the maximum timeout. 558 */ 559 next = (unsigned)INFINITY; 560 mutex_enter(&ill->ill_lock); 561 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 562 563 /* 564 * A multicast router joins INADDR_ANY address 565 * to enable promiscuous reception of all 566 * mcasts from the interface. This INADDR_ANY 567 * is stored in the ilm_v6addr as V6 unspec addr 568 */ 569 if (!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr)) 570 continue; 571 if (ilm->ilm_addr == htonl(INADDR_ANY)) 572 continue; 573 if (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP) && 574 (igmpa->igmpa_group == 0) || 575 (igmpa->igmpa_group == ilm->ilm_addr)) { 576 if (ilm->ilm_timer > timer) { 577 MCAST_RANDOM_DELAY(ilm->ilm_timer, timer); 578 if (ilm->ilm_timer < next) 579 next = ilm->ilm_timer; 580 } 581 } 582 } 583 mutex_exit(&ill->ill_lock); 584 585 return (next); 586 } 587 588 static uint_t 589 igmpv3_query_in(igmp3qa_t *igmp3qa, ill_t *ill, int igmplen) 590 { 591 uint_t i, next, mrd, qqi, timer, delay, numsrc; 592 ilm_t *ilm; 593 ipaddr_t *src_array; 594 uint8_t qrv; 595 ip_stack_t *ipst; 596 597 ipst = ill->ill_ipst; 598 /* make sure numsrc matches packet size */ 599 numsrc = ntohs(igmp3qa->igmp3qa_numsrc); 600 if (igmplen < IGMP_V3_QUERY_MINLEN + (numsrc * sizeof (ipaddr_t))) { 601 ++ipst->ips_igmpstat.igps_rcv_tooshort; 602 return (0); 603 } 604 src_array = (ipaddr_t *)&igmp3qa[1]; 605 606 ++ipst->ips_igmpstat.igps_rcv_queries; 607 608 if ((mrd = (uint_t)igmp3qa->igmp3qa_mxrc) >= IGMP_V3_MAXRT_FPMIN) { 609 uint_t hdrval, mant, exp; 610 hdrval = (uint_t)igmp3qa->igmp3qa_mxrc; 611 mant = hdrval & IGMP_V3_MAXRT_MANT_MASK; 612 exp = (hdrval & IGMP_V3_MAXRT_EXP_MASK) >> 4; 613 mrd = (mant | 0x10) << (exp + 3); 614 } 615 if (mrd == 0) 616 mrd = MCAST_DEF_QUERY_RESP_INTERVAL; 617 timer = DSEC_TO_MSEC(mrd); 618 MCAST_RANDOM_DELAY(delay, timer); 619 next = (unsigned)INFINITY; 620 621 if ((qrv = igmp3qa->igmp3qa_sqrv & IGMP_V3_RV_MASK) == 0) 622 ill->ill_mcast_rv = MCAST_DEF_ROBUSTNESS; 623 else 624 ill->ill_mcast_rv = qrv; 625 626 if ((qqi = (uint_t)igmp3qa->igmp3qa_qqic) >= IGMP_V3_QQI_FPMIN) { 627 uint_t hdrval, mant, exp; 628 hdrval = (uint_t)igmp3qa->igmp3qa_qqic; 629 mant = hdrval & IGMP_V3_QQI_MANT_MASK; 630 exp = (hdrval & IGMP_V3_QQI_EXP_MASK) >> 4; 631 qqi = (mant | 0x10) << (exp + 3); 632 } 633 ill->ill_mcast_qi = (qqi == 0) ? MCAST_DEF_QUERY_INTERVAL : qqi; 634 635 /* 636 * If we have a pending general query response that's scheduled 637 * sooner than the delay we calculated for this response, then 638 * no action is required (RFC3376 section 5.2 rule 1) 639 */ 640 mutex_enter(&ill->ill_lock); 641 if (ill->ill_global_timer < delay) { 642 mutex_exit(&ill->ill_lock); 643 return (next); 644 } 645 mutex_exit(&ill->ill_lock); 646 647 /* 648 * Now take action depending upon query type: 649 * general, group specific, or group/source specific. 650 */ 651 if ((numsrc == 0) && (igmp3qa->igmp3qa_group == INADDR_ANY)) { 652 /* 653 * general query 654 * We know global timer is either not running or is 655 * greater than our calculated delay, so reset it to 656 * our delay (random value in range [0, response time]). 657 */ 658 mutex_enter(&ill->ill_lock); 659 ill->ill_global_timer = delay; 660 next = ill->ill_global_timer; 661 mutex_exit(&ill->ill_lock); 662 663 } else { 664 /* group or group/source specific query */ 665 mutex_enter(&ill->ill_lock); 666 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 667 if (!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr) || 668 (ilm->ilm_addr == htonl(INADDR_ANY)) || 669 (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) || 670 (igmp3qa->igmp3qa_group != ilm->ilm_addr)) 671 continue; 672 /* 673 * If the query is group specific or we have a 674 * pending group specific query, the response is 675 * group specific (pending sources list should be 676 * empty). Otherwise, need to update the pending 677 * sources list for the group and source specific 678 * response. 679 */ 680 if (numsrc == 0 || (ilm->ilm_timer < INFINITY && 681 SLIST_IS_EMPTY(ilm->ilm_pendsrcs))) { 682 group_query: 683 FREE_SLIST(ilm->ilm_pendsrcs); 684 ilm->ilm_pendsrcs = NULL; 685 } else { 686 boolean_t overflow; 687 slist_t *pktl; 688 if (numsrc > MAX_FILTER_SIZE || 689 (ilm->ilm_pendsrcs == NULL && 690 (ilm->ilm_pendsrcs = l_alloc()) == NULL)) { 691 /* 692 * We've been sent more sources than 693 * we can deal with; or we can't deal 694 * with a source list at all. Revert 695 * to a group specific query. 696 */ 697 goto group_query; 698 } 699 if ((pktl = l_alloc()) == NULL) 700 goto group_query; 701 pktl->sl_numsrc = numsrc; 702 for (i = 0; i < numsrc; i++) 703 IN6_IPADDR_TO_V4MAPPED(src_array[i], 704 &(pktl->sl_addr[i])); 705 l_union_in_a(ilm->ilm_pendsrcs, pktl, 706 &overflow); 707 l_free(pktl); 708 if (overflow) 709 goto group_query; 710 } 711 /* choose soonest timer */ 712 ilm->ilm_timer = MIN(ilm->ilm_timer, delay); 713 if (ilm->ilm_timer < next) 714 next = ilm->ilm_timer; 715 } 716 mutex_exit(&ill->ill_lock); 717 } 718 719 return (next); 720 } 721 722 void 723 igmp_joingroup(ilm_t *ilm) 724 { 725 ill_t *ill; 726 ip_stack_t *ipst = ilm->ilm_ipst; 727 728 ill = ilm->ilm_ipif->ipif_ill; 729 730 ASSERT(IAM_WRITER_ILL(ill)); 731 ASSERT(ilm->ilm_ill == NULL && !ilm->ilm_ipif->ipif_isv6); 732 733 mutex_enter(&ill->ill_lock); 734 if (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) { 735 ilm->ilm_rtx.rtx_timer = INFINITY; 736 ilm->ilm_state = IGMP_OTHERMEMBER; 737 mutex_exit(&ill->ill_lock); 738 } else { 739 ip1dbg(("Querier mode %d, sending report, group %x\n", 740 ill->ill_mcast_type, htonl(ilm->ilm_addr))); 741 if (ill->ill_mcast_type == IGMP_V1_ROUTER) { 742 mutex_exit(&ill->ill_lock); 743 igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0); 744 mutex_enter(&ill->ill_lock); 745 } else if (ill->ill_mcast_type == IGMP_V2_ROUTER) { 746 mutex_exit(&ill->ill_lock); 747 igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0); 748 mutex_enter(&ill->ill_lock); 749 } else if (ill->ill_mcast_type == IGMP_V3_ROUTER) { 750 mrec_t *rp; 751 mcast_record_t rtype; 752 /* 753 * The possible state changes we need to handle here: 754 * Old State New State Report 755 * 756 * INCLUDE(0) INCLUDE(X) ALLOW(X),BLOCK(0) 757 * INCLUDE(0) EXCLUDE(X) TO_EX(X) 758 * 759 * No need to send the BLOCK(0) report; ALLOW(X) 760 * is enough. 761 */ 762 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ? 763 ALLOW_NEW_SOURCES : CHANGE_TO_EXCLUDE; 764 rp = mcast_bldmrec(rtype, &ilm->ilm_v6addr, 765 ilm->ilm_filter, NULL); 766 mutex_exit(&ill->ill_lock); 767 igmpv3_sendrpt(ilm->ilm_ipif, rp); 768 mutex_enter(&ill->ill_lock); 769 /* 770 * Set up retransmission state. Timer is set below, 771 * for both v3 and older versions. 772 */ 773 mcast_init_rtx(ill, &ilm->ilm_rtx, rtype, 774 ilm->ilm_filter); 775 } 776 777 /* Set the ilm timer value */ 778 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer, 779 SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY)); 780 ilm->ilm_state = IGMP_IREPORTEDLAST; 781 mutex_exit(&ill->ill_lock); 782 783 /* 784 * To avoid deadlock, we don't call igmp_start_timers from 785 * here. igmp_start_timers needs to call untimeout, and we 786 * can't hold the ipsq across untimeout since 787 * igmp_timeout_handler could be blocking trying to 788 * acquire the ipsq. Instead we start the timer after we get 789 * out of the ipsq in ipsq_exit. 790 */ 791 mutex_enter(&ipst->ips_igmp_timer_lock); 792 ipst->ips_igmp_deferred_next = MIN(ilm->ilm_rtx.rtx_timer, 793 ipst->ips_igmp_deferred_next); 794 mutex_exit(&ipst->ips_igmp_timer_lock); 795 } 796 797 if (ip_debug > 1) { 798 (void) mi_strlog(ilm->ilm_ipif->ipif_ill->ill_rq, 1, SL_TRACE, 799 "igmp_joingroup: multicast_type %d timer %d", 800 (ilm->ilm_ipif->ipif_ill->ill_mcast_type), 801 (int)ntohl(ilm->ilm_rtx.rtx_timer)); 802 } 803 } 804 805 void 806 mld_joingroup(ilm_t *ilm) 807 { 808 ill_t *ill; 809 ip_stack_t *ipst = ilm->ilm_ipst; 810 811 ill = ilm->ilm_ill; 812 813 ASSERT(IAM_WRITER_ILL(ill)); 814 ASSERT(ilm->ilm_ipif == NULL && ill->ill_isv6); 815 816 mutex_enter(&ill->ill_lock); 817 if (IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr)) { 818 ilm->ilm_rtx.rtx_timer = INFINITY; 819 ilm->ilm_state = IGMP_OTHERMEMBER; 820 mutex_exit(&ill->ill_lock); 821 } else { 822 if (ill->ill_mcast_type == MLD_V1_ROUTER) { 823 mutex_exit(&ill->ill_lock); 824 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL); 825 mutex_enter(&ill->ill_lock); 826 } else { 827 mrec_t *rp; 828 mcast_record_t rtype; 829 /* 830 * The possible state changes we need to handle here: 831 * Old State New State Report 832 * 833 * INCLUDE(0) INCLUDE(X) ALLOW(X),BLOCK(0) 834 * INCLUDE(0) EXCLUDE(X) TO_EX(X) 835 * 836 * No need to send the BLOCK(0) report; ALLOW(X) 837 * is enough 838 */ 839 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ? 840 ALLOW_NEW_SOURCES : CHANGE_TO_EXCLUDE; 841 rp = mcast_bldmrec(rtype, &ilm->ilm_v6addr, 842 ilm->ilm_filter, NULL); 843 mutex_exit(&ill->ill_lock); 844 mldv2_sendrpt(ill, rp); 845 mutex_enter(&ill->ill_lock); 846 /* 847 * Set up retransmission state. Timer is set below, 848 * for both v2 and v1. 849 */ 850 mcast_init_rtx(ill, &ilm->ilm_rtx, rtype, 851 ilm->ilm_filter); 852 } 853 854 /* Set the ilm timer value */ 855 ASSERT(ill->ill_mcast_type != MLD_V2_ROUTER || 856 ilm->ilm_rtx.rtx_cnt > 0); 857 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer, 858 SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY)); 859 ilm->ilm_state = IGMP_IREPORTEDLAST; 860 mutex_exit(&ill->ill_lock); 861 862 /* 863 * To avoid deadlock, we don't call mld_start_timers from 864 * here. mld_start_timers needs to call untimeout, and we 865 * can't hold the ipsq (i.e. the lock) across untimeout 866 * since mld_timeout_handler could be blocking trying to 867 * acquire the ipsq. Instead we start the timer after we get 868 * out of the ipsq in ipsq_exit 869 */ 870 mutex_enter(&ipst->ips_mld_timer_lock); 871 ipst->ips_mld_deferred_next = MIN(ilm->ilm_rtx.rtx_timer, 872 ipst->ips_mld_deferred_next); 873 mutex_exit(&ipst->ips_mld_timer_lock); 874 } 875 876 if (ip_debug > 1) { 877 (void) mi_strlog(ilm->ilm_ill->ill_rq, 1, SL_TRACE, 878 "mld_joingroup: multicast_type %d timer %d", 879 (ilm->ilm_ill->ill_mcast_type), 880 (int)ntohl(ilm->ilm_rtx.rtx_timer)); 881 } 882 } 883 884 void 885 igmp_leavegroup(ilm_t *ilm) 886 { 887 ill_t *ill = ilm->ilm_ipif->ipif_ill; 888 889 ASSERT(ilm->ilm_ill == NULL); 890 ASSERT(!ill->ill_isv6); 891 892 mutex_enter(&ill->ill_lock); 893 if (ilm->ilm_state == IGMP_IREPORTEDLAST && 894 ill->ill_mcast_type == IGMP_V2_ROUTER && 895 (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP))) { 896 mutex_exit(&ill->ill_lock); 897 igmp_sendpkt(ilm, IGMP_V2_LEAVE_GROUP, 898 (htonl(INADDR_ALLRTRS_GROUP))); 899 return; 900 } else if ((ill->ill_mcast_type == IGMP_V3_ROUTER) && 901 (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP))) { 902 mrec_t *rp; 903 /* 904 * The possible state changes we need to handle here: 905 * Old State New State Report 906 * 907 * INCLUDE(X) INCLUDE(0) ALLOW(0),BLOCK(X) 908 * EXCLUDE(X) INCLUDE(0) TO_IN(0) 909 * 910 * No need to send the ALLOW(0) report; BLOCK(X) is enough 911 */ 912 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 913 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr, 914 ilm->ilm_filter, NULL); 915 } else { 916 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, 917 NULL, NULL); 918 } 919 mutex_exit(&ill->ill_lock); 920 igmpv3_sendrpt(ilm->ilm_ipif, rp); 921 return; 922 } 923 mutex_exit(&ill->ill_lock); 924 } 925 926 void 927 mld_leavegroup(ilm_t *ilm) 928 { 929 ill_t *ill = ilm->ilm_ill; 930 931 ASSERT(ilm->ilm_ipif == NULL); 932 ASSERT(ill->ill_isv6); 933 934 mutex_enter(&ill->ill_lock); 935 if (ilm->ilm_state == IGMP_IREPORTEDLAST && 936 ill->ill_mcast_type == MLD_V1_ROUTER && 937 (!IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr))) { 938 mutex_exit(&ill->ill_lock); 939 mld_sendpkt(ilm, MLD_LISTENER_REDUCTION, &ipv6_all_rtrs_mcast); 940 return; 941 } else if ((ill->ill_mcast_type == MLD_V2_ROUTER) && 942 (!IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr))) { 943 mrec_t *rp; 944 /* 945 * The possible state changes we need to handle here: 946 * Old State New State Report 947 * 948 * INCLUDE(X) INCLUDE(0) ALLOW(0),BLOCK(X) 949 * EXCLUDE(X) INCLUDE(0) TO_IN(0) 950 * 951 * No need to send the ALLOW(0) report; BLOCK(X) is enough 952 */ 953 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 954 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr, 955 ilm->ilm_filter, NULL); 956 } else { 957 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, 958 NULL, NULL); 959 } 960 mutex_exit(&ill->ill_lock); 961 mldv2_sendrpt(ill, rp); 962 return; 963 } 964 mutex_exit(&ill->ill_lock); 965 } 966 967 void 968 igmp_statechange(ilm_t *ilm, mcast_record_t fmode, slist_t *flist) 969 { 970 ill_t *ill; 971 mrec_t *rp; 972 ip_stack_t *ipst = ilm->ilm_ipst; 973 974 ASSERT(ilm != NULL); 975 976 /* state change reports should only be sent if the router is v3 */ 977 if (ilm->ilm_ipif->ipif_ill->ill_mcast_type != IGMP_V3_ROUTER) 978 return; 979 980 if (ilm->ilm_ill == NULL) { 981 ASSERT(ilm->ilm_ipif != NULL); 982 ill = ilm->ilm_ipif->ipif_ill; 983 } else { 984 ill = ilm->ilm_ill; 985 } 986 987 mutex_enter(&ill->ill_lock); 988 989 /* 990 * Compare existing(old) state with the new state and prepare 991 * State Change Report, according to the rules in RFC 3376: 992 * 993 * Old State New State State Change Report 994 * 995 * INCLUDE(A) INCLUDE(B) ALLOW(B-A),BLOCK(A-B) 996 * EXCLUDE(A) EXCLUDE(B) ALLOW(A-B),BLOCK(B-A) 997 * INCLUDE(A) EXCLUDE(B) TO_EX(B) 998 * EXCLUDE(A) INCLUDE(B) TO_IN(B) 999 */ 1000 1001 if (ilm->ilm_fmode == fmode) { 1002 slist_t *a_minus_b = NULL, *b_minus_a = NULL; 1003 slist_t *allow, *block; 1004 if (((a_minus_b = l_alloc()) == NULL) || 1005 ((b_minus_a = l_alloc()) == NULL)) { 1006 l_free(a_minus_b); 1007 if (ilm->ilm_fmode == MODE_IS_INCLUDE) 1008 goto send_to_ex; 1009 else 1010 goto send_to_in; 1011 } 1012 l_difference(ilm->ilm_filter, flist, a_minus_b); 1013 l_difference(flist, ilm->ilm_filter, b_minus_a); 1014 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1015 allow = b_minus_a; 1016 block = a_minus_b; 1017 } else { 1018 allow = a_minus_b; 1019 block = b_minus_a; 1020 } 1021 rp = NULL; 1022 if (!SLIST_IS_EMPTY(allow)) 1023 rp = mcast_bldmrec(ALLOW_NEW_SOURCES, &ilm->ilm_v6addr, 1024 allow, rp); 1025 if (!SLIST_IS_EMPTY(block)) 1026 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr, 1027 block, rp); 1028 l_free(a_minus_b); 1029 l_free(b_minus_a); 1030 } else if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1031 send_to_ex: 1032 rp = mcast_bldmrec(CHANGE_TO_EXCLUDE, &ilm->ilm_v6addr, flist, 1033 NULL); 1034 } else { 1035 send_to_in: 1036 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, flist, 1037 NULL); 1038 } 1039 1040 /* 1041 * Need to set up retransmission state; merge the new info with the 1042 * current state (which may be null). If the timer is not currently 1043 * running, start it (need to do a delayed start of the timer as 1044 * we're currently in the sq). 1045 */ 1046 rp = mcast_merge_rtx(ilm, rp, flist); 1047 if (ilm->ilm_rtx.rtx_timer == INFINITY) { 1048 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer, 1049 SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY)); 1050 mutex_enter(&ipst->ips_igmp_timer_lock); 1051 ipst->ips_igmp_deferred_next = MIN(ipst->ips_igmp_deferred_next, 1052 ilm->ilm_rtx.rtx_timer); 1053 mutex_exit(&ipst->ips_igmp_timer_lock); 1054 } 1055 1056 mutex_exit(&ill->ill_lock); 1057 igmpv3_sendrpt(ilm->ilm_ipif, rp); 1058 } 1059 1060 void 1061 mld_statechange(ilm_t *ilm, mcast_record_t fmode, slist_t *flist) 1062 { 1063 ill_t *ill; 1064 mrec_t *rp = NULL; 1065 ip_stack_t *ipst = ilm->ilm_ipst; 1066 1067 ASSERT(ilm != NULL); 1068 1069 ill = ilm->ilm_ill; 1070 1071 /* only need to send if we have an mldv2-capable router */ 1072 mutex_enter(&ill->ill_lock); 1073 if (ill->ill_mcast_type != MLD_V2_ROUTER) { 1074 mutex_exit(&ill->ill_lock); 1075 return; 1076 } 1077 1078 /* 1079 * Compare existing (old) state with the new state passed in 1080 * and send appropriate MLDv2 State Change Report. 1081 * 1082 * Old State New State State Change Report 1083 * 1084 * INCLUDE(A) INCLUDE(B) ALLOW(B-A),BLOCK(A-B) 1085 * EXCLUDE(A) EXCLUDE(B) ALLOW(A-B),BLOCK(B-A) 1086 * INCLUDE(A) EXCLUDE(B) TO_EX(B) 1087 * EXCLUDE(A) INCLUDE(B) TO_IN(B) 1088 */ 1089 if (ilm->ilm_fmode == fmode) { 1090 slist_t *a_minus_b = NULL, *b_minus_a = NULL; 1091 slist_t *allow, *block; 1092 if (((a_minus_b = l_alloc()) == NULL) || 1093 ((b_minus_a = l_alloc()) == NULL)) { 1094 l_free(a_minus_b); 1095 if (ilm->ilm_fmode == MODE_IS_INCLUDE) 1096 goto send_to_ex; 1097 else 1098 goto send_to_in; 1099 } 1100 l_difference(ilm->ilm_filter, flist, a_minus_b); 1101 l_difference(flist, ilm->ilm_filter, b_minus_a); 1102 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1103 allow = b_minus_a; 1104 block = a_minus_b; 1105 } else { 1106 allow = a_minus_b; 1107 block = b_minus_a; 1108 } 1109 if (!SLIST_IS_EMPTY(allow)) 1110 rp = mcast_bldmrec(ALLOW_NEW_SOURCES, &ilm->ilm_v6addr, 1111 allow, rp); 1112 if (!SLIST_IS_EMPTY(block)) 1113 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr, 1114 block, rp); 1115 l_free(a_minus_b); 1116 l_free(b_minus_a); 1117 } else if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1118 send_to_ex: 1119 rp = mcast_bldmrec(CHANGE_TO_EXCLUDE, &ilm->ilm_v6addr, flist, 1120 NULL); 1121 } else { 1122 send_to_in: 1123 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, flist, 1124 NULL); 1125 } 1126 1127 /* 1128 * Need to set up retransmission state; merge the new info with the 1129 * current state (which may be null). If the timer is not currently 1130 * running, start it (need to do a deferred start of the timer as 1131 * we're currently in the sq). 1132 */ 1133 rp = mcast_merge_rtx(ilm, rp, flist); 1134 ASSERT(ilm->ilm_rtx.rtx_cnt > 0); 1135 if (ilm->ilm_rtx.rtx_timer == INFINITY) { 1136 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer, 1137 SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY)); 1138 mutex_enter(&ipst->ips_mld_timer_lock); 1139 ipst->ips_mld_deferred_next = 1140 MIN(ipst->ips_mld_deferred_next, ilm->ilm_rtx.rtx_timer); 1141 mutex_exit(&ipst->ips_mld_timer_lock); 1142 } 1143 1144 mutex_exit(&ill->ill_lock); 1145 mldv2_sendrpt(ill, rp); 1146 } 1147 1148 uint_t 1149 igmp_timeout_handler_per_ill(ill_t *ill, int elapsed) 1150 { 1151 uint_t next = INFINITY; 1152 ilm_t *ilm; 1153 ipif_t *ipif; 1154 mrec_t *rp = NULL; 1155 mrec_t *rtxrp = NULL; 1156 rtx_state_t *rtxp; 1157 mcast_record_t rtype; 1158 1159 ASSERT(IAM_WRITER_ILL(ill)); 1160 1161 mutex_enter(&ill->ill_lock); 1162 1163 /* First check the global timer on this interface */ 1164 if (ill->ill_global_timer == INFINITY) 1165 goto per_ilm_timer; 1166 if (ill->ill_global_timer <= elapsed) { 1167 ill->ill_global_timer = INFINITY; 1168 /* 1169 * Send report for each group on this interface. 1170 * Since we just set the global timer (received a v3 general 1171 * query), need to skip the all hosts addr (224.0.0.1), per 1172 * RFC 3376 section 5. 1173 */ 1174 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1175 if (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) 1176 continue; 1177 ASSERT(ilm->ilm_ipif != NULL); 1178 ilm->ilm_ipif->ipif_igmp_rpt = 1179 mcast_bldmrec(ilm->ilm_fmode, &ilm->ilm_v6addr, 1180 ilm->ilm_filter, ilm->ilm_ipif->ipif_igmp_rpt); 1181 /* 1182 * Since we're sending a report on this group, okay 1183 * to delete pending group-specific timers. Note 1184 * that group-specific retransmit timers still need 1185 * to be checked in the per_ilm_timer for-loop. 1186 */ 1187 ilm->ilm_timer = INFINITY; 1188 ilm->ilm_state = IGMP_IREPORTEDLAST; 1189 FREE_SLIST(ilm->ilm_pendsrcs); 1190 ilm->ilm_pendsrcs = NULL; 1191 } 1192 /* 1193 * We've built per-ipif mrec lists; walk the ill's ipif list 1194 * and send a report for each ipif that has an mrec list. 1195 */ 1196 for (ipif = ill->ill_ipif; ipif != NULL; 1197 ipif = ipif->ipif_next) { 1198 if (ipif->ipif_igmp_rpt == NULL) 1199 continue; 1200 mutex_exit(&ill->ill_lock); 1201 igmpv3_sendrpt(ipif, ipif->ipif_igmp_rpt); 1202 mutex_enter(&ill->ill_lock); 1203 /* mrec list was freed by igmpv3_sendrpt() */ 1204 ipif->ipif_igmp_rpt = NULL; 1205 } 1206 } else { 1207 ill->ill_global_timer -= elapsed; 1208 if (ill->ill_global_timer < next) 1209 next = ill->ill_global_timer; 1210 } 1211 1212 per_ilm_timer: 1213 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1214 if (ilm->ilm_timer == INFINITY) 1215 goto per_ilm_rtxtimer; 1216 1217 if (ilm->ilm_timer > elapsed) { 1218 ilm->ilm_timer -= elapsed; 1219 if (ilm->ilm_timer < next) 1220 next = ilm->ilm_timer; 1221 1222 if (ip_debug > 1) { 1223 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 1224 "igmp_timo_hlr 2: ilm_timr %d elap %d " 1225 "typ %d nxt %d", 1226 (int)ntohl(ilm->ilm_timer), elapsed, 1227 (ill->ill_mcast_type), next); 1228 } 1229 1230 goto per_ilm_rtxtimer; 1231 } 1232 1233 /* the timer has expired, need to take action */ 1234 ilm->ilm_timer = INFINITY; 1235 ilm->ilm_state = IGMP_IREPORTEDLAST; 1236 if (ill->ill_mcast_type == IGMP_V1_ROUTER) { 1237 mutex_exit(&ill->ill_lock); 1238 igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0); 1239 mutex_enter(&ill->ill_lock); 1240 } else if (ill->ill_mcast_type == IGMP_V2_ROUTER) { 1241 mutex_exit(&ill->ill_lock); 1242 igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0); 1243 mutex_enter(&ill->ill_lock); 1244 } else { 1245 slist_t *rsp; 1246 if (!SLIST_IS_EMPTY(ilm->ilm_pendsrcs) && 1247 (rsp = l_alloc()) != NULL) { 1248 /* 1249 * Contents of reply depend on pending 1250 * requested source list. 1251 */ 1252 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1253 l_intersection(ilm->ilm_filter, 1254 ilm->ilm_pendsrcs, rsp); 1255 } else { 1256 l_difference(ilm->ilm_pendsrcs, 1257 ilm->ilm_filter, rsp); 1258 } 1259 FREE_SLIST(ilm->ilm_pendsrcs); 1260 ilm->ilm_pendsrcs = NULL; 1261 if (!SLIST_IS_EMPTY(rsp)) 1262 rp = mcast_bldmrec(MODE_IS_INCLUDE, 1263 &ilm->ilm_v6addr, rsp, rp); 1264 FREE_SLIST(rsp); 1265 } else { 1266 /* 1267 * Either the pending request is just group- 1268 * specific, or we couldn't get the resources 1269 * (rsp) to build a source-specific reply. 1270 */ 1271 rp = mcast_bldmrec(ilm->ilm_fmode, 1272 &ilm->ilm_v6addr, ilm->ilm_filter, rp); 1273 } 1274 mutex_exit(&ill->ill_lock); 1275 igmpv3_sendrpt(ill->ill_ipif, rp); 1276 mutex_enter(&ill->ill_lock); 1277 rp = NULL; 1278 } 1279 1280 if (ip_debug > 1) { 1281 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 1282 "igmp_timo_hlr 1: ilm_timr %d elap %d " 1283 "typ %d nxt %d", 1284 (int)ntohl(ilm->ilm_timer), elapsed, 1285 (ill->ill_mcast_type), next); 1286 } 1287 1288 per_ilm_rtxtimer: 1289 rtxp = &ilm->ilm_rtx; 1290 1291 if (rtxp->rtx_timer == INFINITY) 1292 continue; 1293 if (rtxp->rtx_timer > elapsed) { 1294 rtxp->rtx_timer -= elapsed; 1295 if (rtxp->rtx_timer < next) 1296 next = rtxp->rtx_timer; 1297 continue; 1298 } 1299 1300 rtxp->rtx_timer = INFINITY; 1301 ilm->ilm_state = IGMP_IREPORTEDLAST; 1302 if (ill->ill_mcast_type == IGMP_V1_ROUTER) { 1303 mutex_exit(&ill->ill_lock); 1304 igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0); 1305 mutex_enter(&ill->ill_lock); 1306 continue; 1307 } else if (ill->ill_mcast_type == IGMP_V2_ROUTER) { 1308 mutex_exit(&ill->ill_lock); 1309 igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0); 1310 mutex_enter(&ill->ill_lock); 1311 continue; 1312 } 1313 1314 /* 1315 * The retransmit timer has popped, and our router is 1316 * IGMPv3. We have to delve into the retransmit state 1317 * stored in the ilm. 1318 * 1319 * Decrement the retransmit count. If the fmode rtx 1320 * count is active, decrement it, and send a filter 1321 * mode change report with the ilm's source list. 1322 * Otherwise, send a source list change report with 1323 * the current retransmit lists. 1324 */ 1325 ASSERT(rtxp->rtx_cnt > 0); 1326 ASSERT(rtxp->rtx_cnt >= rtxp->rtx_fmode_cnt); 1327 rtxp->rtx_cnt--; 1328 if (rtxp->rtx_fmode_cnt > 0) { 1329 rtxp->rtx_fmode_cnt--; 1330 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ? 1331 CHANGE_TO_INCLUDE : CHANGE_TO_EXCLUDE; 1332 rtxrp = mcast_bldmrec(rtype, &ilm->ilm_v6addr, 1333 ilm->ilm_filter, rtxrp); 1334 } else { 1335 rtxrp = mcast_bldmrec(ALLOW_NEW_SOURCES, 1336 &ilm->ilm_v6addr, rtxp->rtx_allow, rtxrp); 1337 rtxrp = mcast_bldmrec(BLOCK_OLD_SOURCES, 1338 &ilm->ilm_v6addr, rtxp->rtx_block, rtxrp); 1339 } 1340 if (rtxp->rtx_cnt > 0) { 1341 MCAST_RANDOM_DELAY(rtxp->rtx_timer, 1342 SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY)); 1343 if (rtxp->rtx_timer < next) 1344 next = rtxp->rtx_timer; 1345 } else { 1346 CLEAR_SLIST(rtxp->rtx_allow); 1347 CLEAR_SLIST(rtxp->rtx_block); 1348 } 1349 mutex_exit(&ill->ill_lock); 1350 igmpv3_sendrpt(ilm->ilm_ipif, rtxrp); 1351 mutex_enter(&ill->ill_lock); 1352 rtxrp = NULL; 1353 } 1354 1355 mutex_exit(&ill->ill_lock); 1356 1357 return (next); 1358 } 1359 1360 /* 1361 * igmp_timeout_handler: 1362 * Called when there are timeout events, every next * TMEOUT_INTERVAL (tick). 1363 * Returns number of ticks to next event (or 0 if none). 1364 * 1365 * As part of multicast join and leave igmp we may need to send out an 1366 * igmp request. The igmp related state variables in the ilm are protected 1367 * by ill_lock. A single global igmp timer is used to track igmp timeouts. 1368 * igmp_timer_lock protects the global igmp_timeout_id. igmp_start_timers 1369 * starts the igmp timer if needed. It serializes multiple threads trying to 1370 * simultaneously start the timer using the igmp_timer_setter_active flag. 1371 * 1372 * igmp_input() receives igmp queries and responds to the queries 1373 * in a delayed fashion by posting a timer i.e. it calls igmp_start_timers(). 1374 * Later the igmp_timer fires, the timeout handler igmp_timerout_handler() 1375 * performs the action exclusively after entering each ill's ipsq as writer. 1376 * The actual igmp timeout handler needs to run in the ipsq since it has to 1377 * access the ilm's and we don't want another exclusive operation like 1378 * say an IPMP failover to be simultaneously moving the ilms from one ill to 1379 * another. 1380 * 1381 * The igmp_slowtimeo() function is called thru another timer. 1382 * igmp_slowtimeout_lock protects the igmp_slowtimeout_id 1383 */ 1384 void 1385 igmp_timeout_handler(void *arg) 1386 { 1387 ill_t *ill; 1388 int elapsed; /* Since last call */ 1389 uint_t global_next = INFINITY; 1390 uint_t next; 1391 ill_walk_context_t ctx; 1392 boolean_t success; 1393 ip_stack_t *ipst = (ip_stack_t *)arg; 1394 1395 ASSERT(arg != NULL); 1396 mutex_enter(&ipst->ips_igmp_timer_lock); 1397 ASSERT(ipst->ips_igmp_timeout_id != 0); 1398 ipst->ips_igmp_timer_fired_last = ddi_get_lbolt(); 1399 elapsed = ipst->ips_igmp_time_to_next; 1400 ipst->ips_igmp_time_to_next = 0; 1401 mutex_exit(&ipst->ips_igmp_timer_lock); 1402 1403 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1404 ill = ILL_START_WALK_V4(&ctx, ipst); 1405 for (; ill != NULL; ill = ill_next(&ctx, ill)) { 1406 ASSERT(!ill->ill_isv6); 1407 /* 1408 * We may not be able to refhold the ill if the ill/ipif 1409 * is changing. But we need to make sure that the ill will 1410 * not vanish. So we just bump up the ill_waiter count. 1411 */ 1412 if (!ill_waiter_inc(ill)) 1413 continue; 1414 rw_exit(&ipst->ips_ill_g_lock); 1415 success = ipsq_enter(ill, B_TRUE); 1416 if (success) { 1417 next = igmp_timeout_handler_per_ill(ill, elapsed); 1418 if (next < global_next) 1419 global_next = next; 1420 ipsq_exit(ill->ill_phyint->phyint_ipsq, B_FALSE, 1421 B_TRUE); 1422 } 1423 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1424 ill_waiter_dcr(ill); 1425 } 1426 rw_exit(&ipst->ips_ill_g_lock); 1427 1428 mutex_enter(&ipst->ips_igmp_timer_lock); 1429 ASSERT(ipst->ips_igmp_timeout_id != 0); 1430 ipst->ips_igmp_timeout_id = 0; 1431 mutex_exit(&ipst->ips_igmp_timer_lock); 1432 1433 if (global_next != INFINITY) 1434 igmp_start_timers(global_next, ipst); 1435 } 1436 1437 /* 1438 * mld_timeout_handler: 1439 * Called when there are timeout events, every next (tick). 1440 * Returns number of ticks to next event (or 0 if none). 1441 */ 1442 /* ARGSUSED */ 1443 uint_t 1444 mld_timeout_handler_per_ill(ill_t *ill, int elapsed) 1445 { 1446 ilm_t *ilm; 1447 uint_t next = INFINITY; 1448 mrec_t *rp, *rtxrp; 1449 rtx_state_t *rtxp; 1450 mcast_record_t rtype; 1451 1452 ASSERT(IAM_WRITER_ILL(ill)); 1453 1454 mutex_enter(&ill->ill_lock); 1455 1456 /* 1457 * First check the global timer on this interface; the global timer 1458 * is not used for MLDv1, so if it's set we can assume we're v2. 1459 */ 1460 if (ill->ill_global_timer == INFINITY) 1461 goto per_ilm_timer; 1462 if (ill->ill_global_timer <= elapsed) { 1463 ill->ill_global_timer = INFINITY; 1464 /* 1465 * Send report for each group on this interface. 1466 * Since we just set the global timer (received a v2 general 1467 * query), need to skip the all hosts addr (ff02::1), per 1468 * RFC 3810 section 6. 1469 */ 1470 rp = NULL; 1471 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1472 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, 1473 &ipv6_all_hosts_mcast)) 1474 continue; 1475 rp = mcast_bldmrec(ilm->ilm_fmode, &ilm->ilm_v6addr, 1476 ilm->ilm_filter, rp); 1477 /* 1478 * Since we're sending a report on this group, okay 1479 * to delete pending group-specific timers. Note 1480 * that group-specific retransmit timers still need 1481 * to be checked in the per_ilm_timer for-loop. 1482 */ 1483 ilm->ilm_timer = INFINITY; 1484 ilm->ilm_state = IGMP_IREPORTEDLAST; 1485 FREE_SLIST(ilm->ilm_pendsrcs); 1486 ilm->ilm_pendsrcs = NULL; 1487 } 1488 mutex_exit(&ill->ill_lock); 1489 mldv2_sendrpt(ill, rp); 1490 mutex_enter(&ill->ill_lock); 1491 } else { 1492 ill->ill_global_timer -= elapsed; 1493 if (ill->ill_global_timer < next) 1494 next = ill->ill_global_timer; 1495 } 1496 1497 per_ilm_timer: 1498 rp = rtxrp = NULL; 1499 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1500 if (ilm->ilm_timer == INFINITY) 1501 goto per_ilm_rtxtimer; 1502 1503 if (ilm->ilm_timer > elapsed) { 1504 ilm->ilm_timer -= elapsed; 1505 if (ilm->ilm_timer < next) 1506 next = ilm->ilm_timer; 1507 1508 if (ip_debug > 1) { 1509 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 1510 "igmp_timo_hlr 2: ilm_timr" 1511 " %d elap %d typ %d nxt %d", 1512 (int)ntohl(ilm->ilm_timer), elapsed, 1513 (ill->ill_mcast_type), next); 1514 } 1515 1516 goto per_ilm_rtxtimer; 1517 } 1518 1519 /* the timer has expired, need to take action */ 1520 ilm->ilm_timer = INFINITY; 1521 ilm->ilm_state = IGMP_IREPORTEDLAST; 1522 if (ill->ill_mcast_type == MLD_V1_ROUTER) { 1523 mutex_exit(&ill->ill_lock); 1524 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL); 1525 mutex_enter(&ill->ill_lock); 1526 } else { 1527 slist_t *rsp; 1528 if (!SLIST_IS_EMPTY(ilm->ilm_pendsrcs) && 1529 (rsp = l_alloc()) != NULL) { 1530 /* 1531 * Contents of reply depend on pending 1532 * requested source list. 1533 */ 1534 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1535 l_intersection(ilm->ilm_filter, 1536 ilm->ilm_pendsrcs, rsp); 1537 } else { 1538 l_difference(ilm->ilm_pendsrcs, 1539 ilm->ilm_filter, rsp); 1540 } 1541 FREE_SLIST(ilm->ilm_pendsrcs); 1542 ilm->ilm_pendsrcs = NULL; 1543 if (!SLIST_IS_EMPTY(rsp)) 1544 rp = mcast_bldmrec(MODE_IS_INCLUDE, 1545 &ilm->ilm_v6addr, rsp, rp); 1546 FREE_SLIST(rsp); 1547 } else { 1548 rp = mcast_bldmrec(ilm->ilm_fmode, 1549 &ilm->ilm_v6addr, ilm->ilm_filter, rp); 1550 } 1551 } 1552 1553 if (ip_debug > 1) { 1554 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 1555 "igmp_timo_hlr 1: ilm_timr %d elap %d " 1556 "typ %d nxt %d", 1557 (int)ntohl(ilm->ilm_timer), elapsed, 1558 (ill->ill_mcast_type), next); 1559 } 1560 1561 per_ilm_rtxtimer: 1562 rtxp = &ilm->ilm_rtx; 1563 1564 if (rtxp->rtx_timer == INFINITY) 1565 continue; 1566 if (rtxp->rtx_timer > elapsed) { 1567 rtxp->rtx_timer -= elapsed; 1568 if (rtxp->rtx_timer < next) 1569 next = rtxp->rtx_timer; 1570 continue; 1571 } 1572 1573 rtxp->rtx_timer = INFINITY; 1574 ilm->ilm_state = IGMP_IREPORTEDLAST; 1575 if (ill->ill_mcast_type == MLD_V1_ROUTER) { 1576 mutex_exit(&ill->ill_lock); 1577 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL); 1578 mutex_enter(&ill->ill_lock); 1579 continue; 1580 } 1581 1582 /* 1583 * The retransmit timer has popped, and our router is 1584 * MLDv2. We have to delve into the retransmit state 1585 * stored in the ilm. 1586 * 1587 * Decrement the retransmit count. If the fmode rtx 1588 * count is active, decrement it, and send a filter 1589 * mode change report with the ilm's source list. 1590 * Otherwise, send a source list change report with 1591 * the current retransmit lists. 1592 */ 1593 ASSERT(rtxp->rtx_cnt > 0); 1594 ASSERT(rtxp->rtx_cnt >= rtxp->rtx_fmode_cnt); 1595 rtxp->rtx_cnt--; 1596 if (rtxp->rtx_fmode_cnt > 0) { 1597 rtxp->rtx_fmode_cnt--; 1598 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ? 1599 CHANGE_TO_INCLUDE : CHANGE_TO_EXCLUDE; 1600 rtxrp = mcast_bldmrec(rtype, &ilm->ilm_v6addr, 1601 ilm->ilm_filter, rtxrp); 1602 } else { 1603 rtxrp = mcast_bldmrec(ALLOW_NEW_SOURCES, 1604 &ilm->ilm_v6addr, rtxp->rtx_allow, rtxrp); 1605 rtxrp = mcast_bldmrec(BLOCK_OLD_SOURCES, 1606 &ilm->ilm_v6addr, rtxp->rtx_block, rtxrp); 1607 } 1608 if (rtxp->rtx_cnt > 0) { 1609 MCAST_RANDOM_DELAY(rtxp->rtx_timer, 1610 SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY)); 1611 if (rtxp->rtx_timer < next) 1612 next = rtxp->rtx_timer; 1613 } else { 1614 CLEAR_SLIST(rtxp->rtx_allow); 1615 CLEAR_SLIST(rtxp->rtx_block); 1616 } 1617 } 1618 1619 if (ill->ill_mcast_type == MLD_V2_ROUTER) { 1620 mutex_exit(&ill->ill_lock); 1621 mldv2_sendrpt(ill, rp); 1622 mldv2_sendrpt(ill, rtxrp); 1623 return (next); 1624 } 1625 1626 mutex_exit(&ill->ill_lock); 1627 1628 return (next); 1629 } 1630 1631 /* 1632 * mld_timeout_handler: 1633 * Called when there are timeout events, every next * TMEOUT_INTERVAL (tick). 1634 * Returns number of ticks to next event (or 0 if none). 1635 * MT issues are same as igmp_timeout_handler 1636 */ 1637 void 1638 mld_timeout_handler(void *arg) 1639 { 1640 ill_t *ill; 1641 int elapsed; /* Since last call */ 1642 uint_t global_next = INFINITY; 1643 uint_t next; 1644 ill_walk_context_t ctx; 1645 boolean_t success; 1646 ip_stack_t *ipst = (ip_stack_t *)arg; 1647 1648 ASSERT(arg != NULL); 1649 mutex_enter(&ipst->ips_mld_timer_lock); 1650 ASSERT(ipst->ips_mld_timeout_id != 0); 1651 ipst->ips_mld_timer_fired_last = ddi_get_lbolt(); 1652 elapsed = ipst->ips_mld_time_to_next; 1653 ipst->ips_mld_time_to_next = 0; 1654 mutex_exit(&ipst->ips_mld_timer_lock); 1655 1656 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1657 ill = ILL_START_WALK_V6(&ctx, ipst); 1658 for (; ill != NULL; ill = ill_next(&ctx, ill)) { 1659 ASSERT(ill->ill_isv6); 1660 /* 1661 * We may not be able to refhold the ill if the ill/ipif 1662 * is changing. But we need to make sure that the ill will 1663 * not vanish. So we just bump up the ill_waiter count. 1664 */ 1665 if (!ill_waiter_inc(ill)) 1666 continue; 1667 rw_exit(&ipst->ips_ill_g_lock); 1668 success = ipsq_enter(ill, B_TRUE); 1669 if (success) { 1670 next = mld_timeout_handler_per_ill(ill, elapsed); 1671 if (next < global_next) 1672 global_next = next; 1673 ipsq_exit(ill->ill_phyint->phyint_ipsq, B_TRUE, 1674 B_FALSE); 1675 } 1676 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1677 ill_waiter_dcr(ill); 1678 } 1679 rw_exit(&ipst->ips_ill_g_lock); 1680 1681 mutex_enter(&ipst->ips_mld_timer_lock); 1682 ASSERT(ipst->ips_mld_timeout_id != 0); 1683 ipst->ips_mld_timeout_id = 0; 1684 mutex_exit(&ipst->ips_mld_timer_lock); 1685 1686 if (global_next != INFINITY) 1687 mld_start_timers(global_next, ipst); 1688 } 1689 1690 /* 1691 * Calculate the Older Version Querier Present timeout value, in number 1692 * of slowtimo intervals, for the given ill. 1693 */ 1694 #define OVQP(ill) \ 1695 ((1000 * (((ill)->ill_mcast_rv * (ill)->ill_mcast_qi) \ 1696 + MCAST_QUERY_RESP_INTERVAL)) / MCAST_SLOWTIMO_INTERVAL) 1697 1698 /* 1699 * igmp_slowtimo: 1700 * - Resets to new router if we didnt we hear from the router 1701 * in IGMP_AGE_THRESHOLD seconds. 1702 * - Resets slowtimeout. 1703 */ 1704 void 1705 igmp_slowtimo(void *arg) 1706 { 1707 ill_t *ill; 1708 ill_if_t *ifp; 1709 avl_tree_t *avl_tree; 1710 ip_stack_t *ipst = (ip_stack_t *)arg; 1711 1712 ASSERT(arg != NULL); 1713 /* Hold the ill_g_lock so that we can safely walk the ill list */ 1714 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1715 1716 /* 1717 * The ill_if_t list is circular, hence the odd loop parameters. 1718 * 1719 * We can't use the ILL_START_WALK and ill_next() wrappers for this 1720 * walk, as we need to check the illif_mcast_* fields in the ill_if_t 1721 * structure (allowing us to skip if none of the instances have timers 1722 * running). 1723 */ 1724 for (ifp = IP_V4_ILL_G_LIST(ipst); 1725 ifp != (ill_if_t *)&IP_V4_ILL_G_LIST(ipst); 1726 ifp = ifp->illif_next) { 1727 /* 1728 * illif_mcast_v[12] are set using atomics. If an ill hears 1729 * a V1 or V2 query now and we miss seeing the count now, 1730 * we will see it the next time igmp_slowtimo is called. 1731 */ 1732 if (ifp->illif_mcast_v1 == 0 && ifp->illif_mcast_v2 == 0) 1733 continue; 1734 1735 avl_tree = &ifp->illif_avl_by_ppa; 1736 for (ill = avl_first(avl_tree); ill != NULL; 1737 ill = avl_walk(avl_tree, ill, AVL_AFTER)) { 1738 mutex_enter(&ill->ill_lock); 1739 if (ill->ill_mcast_v1_tset == 1) 1740 ill->ill_mcast_v1_time++; 1741 if (ill->ill_mcast_v2_tset == 1) 1742 ill->ill_mcast_v2_time++; 1743 if (ill->ill_mcast_type == IGMP_V1_ROUTER) { 1744 if (ill->ill_mcast_v1_time >= OVQP(ill)) { 1745 if (ill->ill_mcast_v2_tset > 0) { 1746 ip1dbg(("V1 query timer " 1747 "expired on %s; switching " 1748 "mode to IGMP_V2\n", 1749 ill->ill_name)); 1750 ill->ill_mcast_type = 1751 IGMP_V2_ROUTER; 1752 } else { 1753 ip1dbg(("V1 query timer " 1754 "expired on %s; switching " 1755 "mode to IGMP_V3\n", 1756 ill->ill_name)); 1757 ill->ill_mcast_type = 1758 IGMP_V3_ROUTER; 1759 } 1760 ill->ill_mcast_v1_time = 0; 1761 ill->ill_mcast_v1_tset = 0; 1762 atomic_add_16(&ifp->illif_mcast_v1, -1); 1763 } 1764 } 1765 if (ill->ill_mcast_type == IGMP_V2_ROUTER) { 1766 if (ill->ill_mcast_v2_time >= OVQP(ill)) { 1767 ip1dbg(("V2 query timer expired on " 1768 "%s; switching mode to IGMP_V3\n", 1769 ill->ill_name)); 1770 ill->ill_mcast_type = IGMP_V3_ROUTER; 1771 ill->ill_mcast_v2_time = 0; 1772 ill->ill_mcast_v2_tset = 0; 1773 atomic_add_16(&ifp->illif_mcast_v2, -1); 1774 } 1775 } 1776 mutex_exit(&ill->ill_lock); 1777 } 1778 1779 } 1780 rw_exit(&ipst->ips_ill_g_lock); 1781 mutex_enter(&ipst->ips_igmp_slowtimeout_lock); 1782 ipst->ips_igmp_slowtimeout_id = timeout(igmp_slowtimo, (void *)ipst, 1783 MSEC_TO_TICK(MCAST_SLOWTIMO_INTERVAL)); 1784 mutex_exit(&ipst->ips_igmp_slowtimeout_lock); 1785 } 1786 1787 /* 1788 * mld_slowtimo: 1789 * - Resets to newer version if we didn't hear from the older version router 1790 * in MLD_AGE_THRESHOLD seconds. 1791 * - Restarts slowtimeout. 1792 */ 1793 /* ARGSUSED */ 1794 void 1795 mld_slowtimo(void *arg) 1796 { 1797 ill_t *ill; 1798 ill_if_t *ifp; 1799 avl_tree_t *avl_tree; 1800 ip_stack_t *ipst = (ip_stack_t *)arg; 1801 1802 ASSERT(arg != NULL); 1803 /* See comments in igmp_slowtimo() above... */ 1804 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1805 for (ifp = IP_V6_ILL_G_LIST(ipst); 1806 ifp != (ill_if_t *)&IP_V6_ILL_G_LIST(ipst); 1807 ifp = ifp->illif_next) { 1808 if (ifp->illif_mcast_v1 == 0) 1809 continue; 1810 1811 avl_tree = &ifp->illif_avl_by_ppa; 1812 for (ill = avl_first(avl_tree); ill != NULL; 1813 ill = avl_walk(avl_tree, ill, AVL_AFTER)) { 1814 mutex_enter(&ill->ill_lock); 1815 if (ill->ill_mcast_v1_tset == 1) 1816 ill->ill_mcast_v1_time++; 1817 if (ill->ill_mcast_type == MLD_V1_ROUTER) { 1818 if (ill->ill_mcast_v1_time >= OVQP(ill)) { 1819 ip1dbg(("MLD query timer expired on" 1820 " %s; switching mode to MLD_V2\n", 1821 ill->ill_name)); 1822 ill->ill_mcast_type = MLD_V2_ROUTER; 1823 ill->ill_mcast_v1_time = 0; 1824 ill->ill_mcast_v1_tset = 0; 1825 atomic_add_16(&ifp->illif_mcast_v1, -1); 1826 } 1827 } 1828 mutex_exit(&ill->ill_lock); 1829 } 1830 } 1831 rw_exit(&ipst->ips_ill_g_lock); 1832 mutex_enter(&ipst->ips_mld_slowtimeout_lock); 1833 ipst->ips_mld_slowtimeout_id = timeout(mld_slowtimo, (void *)ipst, 1834 MSEC_TO_TICK(MCAST_SLOWTIMO_INTERVAL)); 1835 mutex_exit(&ipst->ips_mld_slowtimeout_lock); 1836 } 1837 1838 /* 1839 * igmp_sendpkt: 1840 * This will send to ip_wput like icmp_inbound. 1841 * Note that the lower ill (on which the membership is kept) is used 1842 * as an upper ill to pass in the multicast parameters. 1843 */ 1844 static void 1845 igmp_sendpkt(ilm_t *ilm, uchar_t type, ipaddr_t addr) 1846 { 1847 mblk_t *mp; 1848 igmpa_t *igmpa; 1849 uint8_t *rtralert; 1850 ipha_t *ipha; 1851 int hdrlen = sizeof (ipha_t) + RTRALERT_LEN; 1852 size_t size = hdrlen + sizeof (igmpa_t); 1853 ipif_t *ipif = ilm->ilm_ipif; 1854 ill_t *ill = ipif->ipif_ill; /* Will be the "lower" ill */ 1855 mblk_t *first_mp; 1856 ipsec_out_t *io; 1857 zoneid_t zoneid; 1858 ip_stack_t *ipst = ill->ill_ipst; 1859 1860 /* 1861 * We need to make sure this packet goes out on an ipif. If 1862 * there is some global policy match in ip_wput_ire, we need 1863 * to get to the right interface after IPSEC processing. 1864 * To make sure this multicast packet goes out on the right 1865 * interface, we attach an ipsec_out and initialize ill_index 1866 * like we did in ip_wput. To make sure that this packet does 1867 * not get forwarded on other interfaces or looped back, we 1868 * set ipsec_out_dontroute to B_TRUE and ipsec_out_multicast_loop 1869 * to B_FALSE. 1870 * 1871 * We also need to make sure that this does not get load balanced 1872 * if it hits ip_newroute_ipif. So, we initialize ipsec_out_attach_if 1873 * here. If it gets load balanced, switches supporting igmp snooping 1874 * will send the packet that it receives for this multicast group 1875 * to the interface that we are sending on. As we have joined the 1876 * multicast group on this ill, by sending the packet out on this 1877 * ill, we receive all the packets back on this ill. 1878 */ 1879 first_mp = allocb(sizeof (ipsec_info_t), BPRI_HI); 1880 if (first_mp == NULL) 1881 return; 1882 1883 first_mp->b_datap->db_type = M_CTL; 1884 first_mp->b_wptr += sizeof (ipsec_info_t); 1885 bzero(first_mp->b_rptr, sizeof (ipsec_info_t)); 1886 /* ipsec_out_secure is B_FALSE now */ 1887 io = (ipsec_out_t *)first_mp->b_rptr; 1888 io->ipsec_out_type = IPSEC_OUT; 1889 io->ipsec_out_len = sizeof (ipsec_out_t); 1890 io->ipsec_out_use_global_policy = B_TRUE; 1891 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 1892 io->ipsec_out_attach_if = B_TRUE; 1893 io->ipsec_out_multicast_loop = B_FALSE; 1894 io->ipsec_out_dontroute = B_TRUE; 1895 if ((zoneid = ilm->ilm_zoneid) == ALL_ZONES) 1896 zoneid = GLOBAL_ZONEID; 1897 io->ipsec_out_zoneid = zoneid; 1898 io->ipsec_out_ns = ipst->ips_netstack; /* No netstack_hold */ 1899 1900 mp = allocb(size, BPRI_HI); 1901 if (mp == NULL) { 1902 freemsg(first_mp); 1903 return; 1904 } 1905 mp->b_wptr = mp->b_rptr + size; 1906 first_mp->b_cont = mp; 1907 1908 ipha = (ipha_t *)mp->b_rptr; 1909 rtralert = (uint8_t *)&(ipha[1]); 1910 igmpa = (igmpa_t *)&(rtralert[RTRALERT_LEN]); 1911 igmpa->igmpa_type = type; 1912 igmpa->igmpa_code = 0; 1913 igmpa->igmpa_group = ilm->ilm_addr; 1914 igmpa->igmpa_cksum = 0; 1915 igmpa->igmpa_cksum = IP_CSUM(mp, hdrlen, 0); 1916 1917 rtralert[0] = IPOPT_COPY | IPOPT_RTRALERT; 1918 rtralert[1] = RTRALERT_LEN; 1919 rtralert[2] = 0; 1920 rtralert[3] = 0; 1921 1922 ipha->ipha_version_and_hdr_length = (IP_VERSION << 4) 1923 | (IP_SIMPLE_HDR_LENGTH_IN_WORDS + RTRALERT_LEN_IN_WORDS); 1924 ipha->ipha_type_of_service = 0; 1925 ipha->ipha_length = htons(size); 1926 ipha->ipha_ident = 0; 1927 ipha->ipha_fragment_offset_and_flags = 0; 1928 ipha->ipha_ttl = IGMP_TTL; 1929 ipha->ipha_protocol = IPPROTO_IGMP; 1930 ipha->ipha_hdr_checksum = 0; 1931 ipha->ipha_dst = addr ? addr : igmpa->igmpa_group; 1932 ipha->ipha_src = ipif->ipif_src_addr; 1933 /* 1934 * Request loopback of the report if we are acting as a multicast 1935 * router, so that the process-level routing demon can hear it. 1936 */ 1937 /* 1938 * This will run multiple times for the same group if there are members 1939 * on the same group for multiple ipif's on the same ill. The 1940 * igmp_input code will suppress this due to the loopback thus we 1941 * always loopback membership report. 1942 */ 1943 ASSERT(ill->ill_rq != NULL); 1944 ip_multicast_loopback(ill->ill_rq, ill, first_mp, 0, ilm->ilm_zoneid); 1945 1946 ip_wput_multicast(ill->ill_wq, first_mp, ipif, zoneid); 1947 1948 ++ipst->ips_igmpstat.igps_snd_reports; 1949 } 1950 1951 /* 1952 * Sends an IGMP_V3_MEMBERSHIP_REPORT message out the ill associated 1953 * with the passed-in ipif. The report will contain one group record 1954 * for each element of reclist. If this causes packet length to 1955 * exceed ipif->ipif_ill->ill_max_frag, multiple reports are sent. 1956 * reclist is assumed to be made up of buffers allocated by mcast_bldmrec(), 1957 * and those buffers are freed here. 1958 */ 1959 static void 1960 igmpv3_sendrpt(ipif_t *ipif, mrec_t *reclist) 1961 { 1962 ipsec_out_t *io; 1963 igmp3ra_t *igmp3ra; 1964 grphdra_t *grphdr; 1965 mblk_t *first_mp, *mp; 1966 ipha_t *ipha; 1967 uint8_t *rtralert; 1968 ipaddr_t *src_array; 1969 int i, j, numrec, more_src_cnt; 1970 size_t hdrsize, size, rsize; 1971 ill_t *ill = ipif->ipif_ill; 1972 mrec_t *rp, *cur_reclist; 1973 mrec_t *next_reclist = reclist; 1974 boolean_t morepkts; 1975 zoneid_t zoneid; 1976 ip_stack_t *ipst = ill->ill_ipst; 1977 1978 /* if there aren't any records, there's nothing to send */ 1979 if (reclist == NULL) 1980 return; 1981 1982 hdrsize = sizeof (ipha_t) + RTRALERT_LEN; 1983 nextpkt: 1984 size = hdrsize + sizeof (igmp3ra_t); 1985 morepkts = B_FALSE; 1986 more_src_cnt = 0; 1987 cur_reclist = next_reclist; 1988 numrec = 0; 1989 for (rp = cur_reclist; rp != NULL; rp = rp->mrec_next) { 1990 rsize = sizeof (grphdra_t) + 1991 (rp->mrec_srcs.sl_numsrc * sizeof (ipaddr_t)); 1992 if (size + rsize > ill->ill_max_frag) { 1993 if (rp == cur_reclist) { 1994 /* 1995 * If the first mrec we looked at is too big 1996 * to fit in a single packet (i.e the source 1997 * list is too big), we must either truncate 1998 * the list (if TO_EX or IS_EX), or send 1999 * multiple reports for the same group (all 2000 * other types). 2001 */ 2002 int srcspace, srcsperpkt; 2003 srcspace = ill->ill_max_frag - (size + 2004 sizeof (grphdra_t)); 2005 srcsperpkt = srcspace / sizeof (ipaddr_t); 2006 /* 2007 * Increment size and numrec, because we will 2008 * be sending a record for the mrec we're 2009 * looking at now. 2010 */ 2011 size += sizeof (grphdra_t) + 2012 (srcsperpkt * sizeof (ipaddr_t)); 2013 numrec++; 2014 if (rp->mrec_type == MODE_IS_EXCLUDE || 2015 rp->mrec_type == CHANGE_TO_EXCLUDE) { 2016 rp->mrec_srcs.sl_numsrc = srcsperpkt; 2017 if (rp->mrec_next == NULL) { 2018 /* no more packets to send */ 2019 break; 2020 } else { 2021 /* 2022 * more packets, but we're 2023 * done with this mrec. 2024 */ 2025 next_reclist = rp->mrec_next; 2026 } 2027 } else { 2028 more_src_cnt = rp->mrec_srcs.sl_numsrc 2029 - srcsperpkt; 2030 rp->mrec_srcs.sl_numsrc = srcsperpkt; 2031 /* 2032 * We'll fix up this mrec (remove the 2033 * srcs we've already sent) before 2034 * returning to nextpkt above. 2035 */ 2036 next_reclist = rp; 2037 } 2038 } else { 2039 next_reclist = rp; 2040 } 2041 morepkts = B_TRUE; 2042 break; 2043 } 2044 size += rsize; 2045 numrec++; 2046 } 2047 2048 /* 2049 * See comments in igmp_sendpkt() about initializing for ipsec and 2050 * load balancing requirements. 2051 */ 2052 first_mp = allocb(sizeof (ipsec_info_t), BPRI_HI); 2053 if (first_mp == NULL) 2054 goto free_reclist; 2055 2056 first_mp->b_datap->db_type = M_CTL; 2057 first_mp->b_wptr += sizeof (ipsec_info_t); 2058 bzero(first_mp->b_rptr, sizeof (ipsec_info_t)); 2059 /* ipsec_out_secure is B_FALSE now */ 2060 io = (ipsec_out_t *)first_mp->b_rptr; 2061 io->ipsec_out_type = IPSEC_OUT; 2062 io->ipsec_out_len = sizeof (ipsec_out_t); 2063 io->ipsec_out_use_global_policy = B_TRUE; 2064 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 2065 io->ipsec_out_attach_if = B_TRUE; 2066 io->ipsec_out_multicast_loop = B_FALSE; 2067 io->ipsec_out_dontroute = B_TRUE; 2068 if ((zoneid = ipif->ipif_zoneid) == ALL_ZONES) 2069 zoneid = GLOBAL_ZONEID; 2070 io->ipsec_out_zoneid = zoneid; 2071 2072 mp = allocb(size, BPRI_HI); 2073 if (mp == NULL) { 2074 freemsg(first_mp); 2075 goto free_reclist; 2076 } 2077 bzero((char *)mp->b_rptr, size); 2078 mp->b_wptr = (uchar_t *)(mp->b_rptr + size); 2079 first_mp->b_cont = mp; 2080 2081 ipha = (ipha_t *)mp->b_rptr; 2082 rtralert = (uint8_t *)&(ipha[1]); 2083 igmp3ra = (igmp3ra_t *)&(rtralert[RTRALERT_LEN]); 2084 grphdr = (grphdra_t *)&(igmp3ra[1]); 2085 2086 rp = cur_reclist; 2087 for (i = 0; i < numrec; i++) { 2088 grphdr->grphdra_type = rp->mrec_type; 2089 grphdr->grphdra_numsrc = htons(rp->mrec_srcs.sl_numsrc); 2090 grphdr->grphdra_group = V4_PART_OF_V6(rp->mrec_group); 2091 src_array = (ipaddr_t *)&(grphdr[1]); 2092 2093 for (j = 0; j < rp->mrec_srcs.sl_numsrc; j++) 2094 src_array[j] = V4_PART_OF_V6(rp->mrec_srcs.sl_addr[j]); 2095 2096 grphdr = (grphdra_t *)&(src_array[j]); 2097 rp = rp->mrec_next; 2098 } 2099 2100 igmp3ra->igmp3ra_type = IGMP_V3_MEMBERSHIP_REPORT; 2101 igmp3ra->igmp3ra_numrec = htons(numrec); 2102 igmp3ra->igmp3ra_cksum = IP_CSUM(mp, hdrsize, 0); 2103 2104 rtralert[0] = IPOPT_COPY | IPOPT_RTRALERT; 2105 rtralert[1] = RTRALERT_LEN; 2106 rtralert[2] = 0; 2107 rtralert[3] = 0; 2108 2109 ipha->ipha_version_and_hdr_length = IP_VERSION << 4 2110 | (IP_SIMPLE_HDR_LENGTH_IN_WORDS + RTRALERT_LEN_IN_WORDS); 2111 ipha->ipha_type_of_service = IPTOS_PREC_INTERNETCONTROL; 2112 ipha->ipha_length = htons(size); 2113 ipha->ipha_ttl = IGMP_TTL; 2114 ipha->ipha_protocol = IPPROTO_IGMP; 2115 ipha->ipha_dst = htonl(INADDR_ALLRPTS_GROUP); 2116 ipha->ipha_src = ipif->ipif_src_addr; 2117 2118 /* 2119 * Request loopback of the report if we are acting as a multicast 2120 * router, so that the process-level routing daemon can hear it. 2121 * 2122 * This will run multiple times for the same group if there are 2123 * members on the same group for multiple ipifs on the same ill. 2124 * The igmp_input code will suppress this due to the loopback; 2125 * thus we always loopback membership report. 2126 */ 2127 ASSERT(ill->ill_rq != NULL); 2128 ip_multicast_loopback(ill->ill_rq, ill, mp, 0, ipif->ipif_zoneid); 2129 2130 ip_wput_multicast(ill->ill_wq, first_mp, ipif, zoneid); 2131 2132 ++ipst->ips_igmpstat.igps_snd_reports; 2133 2134 if (morepkts) { 2135 if (more_src_cnt > 0) { 2136 int index, mvsize; 2137 slist_t *sl = &next_reclist->mrec_srcs; 2138 index = sl->sl_numsrc; 2139 mvsize = more_src_cnt * sizeof (in6_addr_t); 2140 (void) memmove(&sl->sl_addr[0], &sl->sl_addr[index], 2141 mvsize); 2142 sl->sl_numsrc = more_src_cnt; 2143 } 2144 goto nextpkt; 2145 } 2146 2147 free_reclist: 2148 while (reclist != NULL) { 2149 rp = reclist->mrec_next; 2150 mi_free(reclist); 2151 reclist = rp; 2152 } 2153 } 2154 2155 /* 2156 * mld_input: 2157 */ 2158 /* ARGSUSED */ 2159 void 2160 mld_input(queue_t *q, mblk_t *mp, ill_t *ill) 2161 { 2162 ip6_t *ip6h = (ip6_t *)(mp->b_rptr); 2163 mld_hdr_t *mldh; 2164 ilm_t *ilm; 2165 ipif_t *ipif; 2166 uint16_t hdr_length, exthdr_length; 2167 in6_addr_t *v6group_ptr, *lcladdr_ptr; 2168 uint_t next; 2169 int mldlen; 2170 ip_stack_t *ipst = ill->ill_ipst; 2171 2172 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembTotal); 2173 2174 /* Make sure the src address of the packet is link-local */ 2175 if (!(IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src))) { 2176 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 2177 freemsg(mp); 2178 return; 2179 } 2180 2181 if (ip6h->ip6_hlim != 1) { 2182 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpBadHoplimit); 2183 freemsg(mp); 2184 return; 2185 } 2186 2187 /* Get to the icmp header part */ 2188 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) { 2189 hdr_length = ip_hdr_length_v6(mp, ip6h); 2190 exthdr_length = hdr_length - IPV6_HDR_LEN; 2191 } else { 2192 hdr_length = IPV6_HDR_LEN; 2193 exthdr_length = 0; 2194 } 2195 mldlen = ntohs(ip6h->ip6_plen) - exthdr_length; 2196 2197 /* An MLD packet must at least be 24 octets to be valid */ 2198 if (mldlen < MLD_MINLEN) { 2199 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 2200 freemsg(mp); 2201 return; 2202 } 2203 2204 mldh = (mld_hdr_t *)(&mp->b_rptr[hdr_length]); 2205 2206 switch (mldh->mld_type) { 2207 case MLD_LISTENER_QUERY: 2208 /* 2209 * packet length differentiates between v1 and v2. v1 2210 * query should be exactly 24 octets long; v2 is >= 28. 2211 */ 2212 if (mldlen == MLD_MINLEN) { 2213 next = mld_query_in(mldh, ill); 2214 } else if (mldlen >= MLD_V2_QUERY_MINLEN) { 2215 next = mldv2_query_in((mld2q_t *)mldh, ill, mldlen); 2216 } else { 2217 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 2218 freemsg(mp); 2219 return; 2220 } 2221 if (next == 0) { 2222 freemsg(mp); 2223 return; 2224 } 2225 2226 if (next != INFINITY) 2227 mld_start_timers(next, ipst); 2228 break; 2229 2230 case MLD_LISTENER_REPORT: { 2231 2232 ASSERT(ill->ill_ipif != NULL); 2233 /* 2234 * For fast leave to work, we have to know that we are the 2235 * last person to send a report for this group. Reports 2236 * generated by us are looped back since we could potentially 2237 * be a multicast router, so discard reports sourced by me. 2238 */ 2239 lcladdr_ptr = &(ill->ill_ipif->ipif_v6subnet); 2240 mutex_enter(&ill->ill_lock); 2241 for (ipif = ill->ill_ipif; ipif != NULL; 2242 ipif = ipif->ipif_next) { 2243 if (IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, 2244 lcladdr_ptr)) { 2245 if (ip_debug > 1) { 2246 char buf1[INET6_ADDRSTRLEN]; 2247 char buf2[INET6_ADDRSTRLEN]; 2248 2249 (void) mi_strlog(ill->ill_rq, 2250 1, 2251 SL_TRACE, 2252 "mld_input: we are only " 2253 "member src %s ipif_local %s", 2254 inet_ntop(AF_INET6, lcladdr_ptr, 2255 buf1, sizeof (buf1)), 2256 inet_ntop(AF_INET6, 2257 &ipif->ipif_v6lcl_addr, 2258 buf2, sizeof (buf2))); 2259 } 2260 mutex_exit(&ill->ill_lock); 2261 freemsg(mp); 2262 return; 2263 } 2264 } 2265 mutex_exit(&ill->ill_lock); 2266 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembResponses); 2267 2268 v6group_ptr = &mldh->mld_addr; 2269 if (!IN6_IS_ADDR_MULTICAST(v6group_ptr)) { 2270 BUMP_MIB(ill->ill_icmp6_mib, 2271 ipv6IfIcmpInGroupMembBadReports); 2272 freemsg(mp); 2273 return; 2274 } 2275 2276 2277 /* 2278 * If we belong to the group being reported, and we are a 2279 * 'Delaying member' per the RFC terminology, stop our timer 2280 * for that group and 'clear flag' i.e. mark ilm_state as 2281 * IGMP_OTHERMEMBER. With zones, there can be multiple group 2282 * membership entries for the same group address (one per zone) 2283 * so we need to walk the ill_ilm list. 2284 */ 2285 mutex_enter(&ill->ill_lock); 2286 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 2287 if (!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group_ptr)) 2288 continue; 2289 BUMP_MIB(ill->ill_icmp6_mib, 2290 ipv6IfIcmpInGroupMembOurReports); 2291 2292 ilm->ilm_timer = INFINITY; 2293 ilm->ilm_state = IGMP_OTHERMEMBER; 2294 } 2295 mutex_exit(&ill->ill_lock); 2296 break; 2297 } 2298 case MLD_LISTENER_REDUCTION: 2299 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembReductions); 2300 break; 2301 } 2302 /* 2303 * All MLD packets have already been passed up to any 2304 * process(es) listening on a ICMP6 raw socket. This 2305 * has been accomplished in ip_deliver_local_v6 prior to 2306 * this function call. It is assumed that the multicast daemon 2307 * will have a SOCK_RAW IPPROTO_ICMPV6 (and presumbly use the 2308 * ICMP6_FILTER socket option to only receive the MLD messages) 2309 * Thus we can free the MLD message block here 2310 */ 2311 freemsg(mp); 2312 } 2313 2314 /* 2315 * Handles an MLDv1 Listener Query. Returns 0 on error, or the appropriate 2316 * (non-zero, unsigned) timer value to be set on success. 2317 */ 2318 static uint_t 2319 mld_query_in(mld_hdr_t *mldh, ill_t *ill) 2320 { 2321 ilm_t *ilm; 2322 int timer; 2323 uint_t next; 2324 in6_addr_t *v6group; 2325 2326 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembQueries); 2327 2328 /* 2329 * In the MLD specification, there are 3 states and a flag. 2330 * 2331 * In Non-Listener state, we simply don't have a membership record. 2332 * In Delaying state, our timer is running (ilm->ilm_timer < INFINITY) 2333 * In Idle Member state, our timer is not running (ilm->ilm_timer == 2334 * INFINITY) 2335 * 2336 * The flag is ilm->ilm_state, it is set to IGMP_OTHERMEMBER if 2337 * we have heard a report from another member, or IGMP_IREPORTEDLAST 2338 * if I sent the last report. 2339 */ 2340 v6group = &mldh->mld_addr; 2341 if (!(IN6_IS_ADDR_UNSPECIFIED(v6group)) && 2342 ((!IN6_IS_ADDR_MULTICAST(v6group)))) { 2343 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembBadQueries); 2344 return (0); 2345 } 2346 2347 /* Need to do compatibility mode checking */ 2348 mutex_enter(&ill->ill_lock); 2349 ill->ill_mcast_v1_time = 0; 2350 ill->ill_mcast_v1_tset = 1; 2351 if (ill->ill_mcast_type == MLD_V2_ROUTER) { 2352 ip1dbg(("Received MLDv1 Query on %s, switching mode to " 2353 "MLD_V1_ROUTER\n", ill->ill_name)); 2354 atomic_add_16(&ill->ill_ifptr->illif_mcast_v1, 1); 2355 ill->ill_mcast_type = MLD_V1_ROUTER; 2356 } 2357 mutex_exit(&ill->ill_lock); 2358 2359 timer = (int)ntohs(mldh->mld_maxdelay); 2360 if (ip_debug > 1) { 2361 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 2362 "mld_input: TIMER = mld_maxdelay %d mld_type 0x%x", 2363 timer, (int)mldh->mld_type); 2364 } 2365 2366 /* 2367 * -Start the timers in all of our membership records for 2368 * the physical interface on which the query arrived, 2369 * excl: 2370 * 1. those that belong to the "all hosts" group, 2371 * 2. those with 0 scope, or 1 node-local scope. 2372 * 2373 * -Restart any timer that is already running but has a value 2374 * longer that the requested timeout. 2375 * -Use the value specified in the query message as the 2376 * maximum timeout. 2377 */ 2378 next = INFINITY; 2379 mutex_enter(&ill->ill_lock); 2380 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 2381 ASSERT(!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr)); 2382 2383 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr) || 2384 IN6_IS_ADDR_MC_NODELOCAL(&ilm->ilm_v6addr) || 2385 IN6_IS_ADDR_MC_RESERVED(&ilm->ilm_v6addr)) 2386 continue; 2387 if ((!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, 2388 &ipv6_all_hosts_mcast)) && 2389 (IN6_IS_ADDR_UNSPECIFIED(v6group)) || 2390 (IN6_ARE_ADDR_EQUAL(v6group, &ilm->ilm_v6addr))) { 2391 if (timer == 0) { 2392 /* Respond immediately */ 2393 ilm->ilm_timer = INFINITY; 2394 ilm->ilm_state = IGMP_IREPORTEDLAST; 2395 mutex_exit(&ill->ill_lock); 2396 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL); 2397 mutex_enter(&ill->ill_lock); 2398 break; 2399 } 2400 if (ilm->ilm_timer > timer) { 2401 MCAST_RANDOM_DELAY(ilm->ilm_timer, timer); 2402 if (ilm->ilm_timer < next) 2403 next = ilm->ilm_timer; 2404 } 2405 break; 2406 } 2407 } 2408 mutex_exit(&ill->ill_lock); 2409 2410 return (next); 2411 } 2412 2413 /* 2414 * Handles an MLDv2 Listener Query. On error, returns 0; on success, 2415 * returns the appropriate (non-zero, unsigned) timer value (which may 2416 * be INFINITY) to be set. 2417 */ 2418 static uint_t 2419 mldv2_query_in(mld2q_t *mld2q, ill_t *ill, int mldlen) 2420 { 2421 ilm_t *ilm; 2422 in6_addr_t *v6group, *src_array; 2423 uint_t next, numsrc, i, mrd, delay, qqi; 2424 uint8_t qrv; 2425 2426 v6group = &mld2q->mld2q_addr; 2427 numsrc = ntohs(mld2q->mld2q_numsrc); 2428 2429 /* make sure numsrc matches packet size */ 2430 if (mldlen < MLD_V2_QUERY_MINLEN + (numsrc * sizeof (in6_addr_t))) { 2431 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 2432 return (0); 2433 } 2434 src_array = (in6_addr_t *)&mld2q[1]; 2435 2436 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembQueries); 2437 2438 /* extract Maximum Response Delay from code in header */ 2439 mrd = ntohs(mld2q->mld2q_mxrc); 2440 if (mrd >= MLD_V2_MAXRT_FPMIN) { 2441 uint_t hdrval, mant, exp; 2442 hdrval = mrd; 2443 mant = hdrval & MLD_V2_MAXRT_MANT_MASK; 2444 exp = (hdrval & MLD_V2_MAXRT_EXP_MASK) >> 12; 2445 mrd = (mant | 0x1000) << (exp + 3); 2446 } 2447 MCAST_RANDOM_DELAY(delay, mrd); 2448 next = (unsigned)INFINITY; 2449 2450 if ((qrv = mld2q->mld2q_sqrv & MLD_V2_RV_MASK) == 0) 2451 ill->ill_mcast_rv = MCAST_DEF_ROBUSTNESS; 2452 else 2453 ill->ill_mcast_rv = qrv; 2454 2455 if ((qqi = (uint_t)mld2q->mld2q_qqic) >= MLD_V2_QQI_FPMIN) { 2456 uint_t mant, exp; 2457 mant = qqi & MLD_V2_QQI_MANT_MASK; 2458 exp = (qqi & MLD_V2_QQI_EXP_MASK) >> 12; 2459 qqi = (mant | 0x10) << (exp + 3); 2460 } 2461 ill->ill_mcast_qi = (qqi == 0) ? MCAST_DEF_QUERY_INTERVAL : qqi; 2462 2463 /* 2464 * If we have a pending general query response that's scheduled 2465 * sooner than the delay we calculated for this response, then 2466 * no action is required (MLDv2 draft section 6.2 rule 1) 2467 */ 2468 mutex_enter(&ill->ill_lock); 2469 if (ill->ill_global_timer < delay) { 2470 mutex_exit(&ill->ill_lock); 2471 return (next); 2472 } 2473 mutex_exit(&ill->ill_lock); 2474 2475 /* 2476 * Now take action depending on query type: general, 2477 * group specific, or group/source specific. 2478 */ 2479 if ((numsrc == 0) && IN6_IS_ADDR_UNSPECIFIED(v6group)) { 2480 /* 2481 * general query 2482 * We know global timer is either not running or is 2483 * greater than our calculated delay, so reset it to 2484 * our delay (random value in range [0, response time]) 2485 */ 2486 mutex_enter(&ill->ill_lock); 2487 ill->ill_global_timer = delay; 2488 next = ill->ill_global_timer; 2489 mutex_exit(&ill->ill_lock); 2490 2491 } else { 2492 /* group or group/source specific query */ 2493 mutex_enter(&ill->ill_lock); 2494 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 2495 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr) || 2496 IN6_IS_ADDR_MC_NODELOCAL(&ilm->ilm_v6addr) || 2497 IN6_IS_ADDR_MC_RESERVED(&ilm->ilm_v6addr) || 2498 !IN6_ARE_ADDR_EQUAL(v6group, &ilm->ilm_v6addr)) 2499 continue; 2500 2501 /* 2502 * If the query is group specific or we have a 2503 * pending group specific query, the response is 2504 * group specific (pending sources list should be 2505 * empty). Otherwise, need to update the pending 2506 * sources list for the group and source specific 2507 * response. 2508 */ 2509 if (numsrc == 0 || (ilm->ilm_timer < INFINITY && 2510 SLIST_IS_EMPTY(ilm->ilm_pendsrcs))) { 2511 group_query: 2512 FREE_SLIST(ilm->ilm_pendsrcs); 2513 ilm->ilm_pendsrcs = NULL; 2514 } else { 2515 boolean_t overflow; 2516 slist_t *pktl; 2517 if (numsrc > MAX_FILTER_SIZE || 2518 (ilm->ilm_pendsrcs == NULL && 2519 (ilm->ilm_pendsrcs = l_alloc()) == NULL)) { 2520 /* 2521 * We've been sent more sources than 2522 * we can deal with; or we can't deal 2523 * with a source list at all. Revert 2524 * to a group specific query. 2525 */ 2526 goto group_query; 2527 } 2528 if ((pktl = l_alloc()) == NULL) 2529 goto group_query; 2530 pktl->sl_numsrc = numsrc; 2531 for (i = 0; i < numsrc; i++) 2532 pktl->sl_addr[i] = src_array[i]; 2533 l_union_in_a(ilm->ilm_pendsrcs, pktl, 2534 &overflow); 2535 l_free(pktl); 2536 if (overflow) 2537 goto group_query; 2538 } 2539 /* set timer to soonest value */ 2540 ilm->ilm_timer = MIN(ilm->ilm_timer, delay); 2541 if (ilm->ilm_timer < next) 2542 next = ilm->ilm_timer; 2543 break; 2544 } 2545 mutex_exit(&ill->ill_lock); 2546 } 2547 2548 return (next); 2549 } 2550 2551 /* 2552 * Send MLDv1 response packet with hoplimit 1 2553 */ 2554 static void 2555 mld_sendpkt(ilm_t *ilm, uchar_t type, const in6_addr_t *v6addr) 2556 { 2557 mblk_t *mp; 2558 mld_hdr_t *mldh; 2559 ip6_t *ip6h; 2560 ip6_hbh_t *ip6hbh; 2561 struct ip6_opt_router *ip6router; 2562 size_t size = IPV6_HDR_LEN + sizeof (mld_hdr_t); 2563 ill_t *ill = ilm->ilm_ill; /* Will be the "lower" ill */ 2564 ipif_t *ipif; 2565 ip6i_t *ip6i; 2566 2567 /* 2568 * We need to place a router alert option in this packet. The length 2569 * of the options must be a multiple of 8. The hbh option header is 2 2570 * bytes followed by the 4 byte router alert option. That leaves 2571 * 2 bytes of pad for a total of 8 bytes. 2572 */ 2573 const int router_alert_length = 8; 2574 2575 ASSERT(ill->ill_isv6); 2576 2577 /* 2578 * We need to make sure that this packet does not get load balanced. 2579 * So, we allocate an ip6i_t and set ATTACH_IF. ip_wput_v6 and 2580 * ip_newroute_ipif_v6 knows how to handle such packets. 2581 * If it gets load balanced, switches supporting MLD snooping 2582 * (in the future) will send the packet that it receives for this 2583 * multicast group to the interface that we are sending on. As we have 2584 * joined the multicast group on this ill, by sending the packet out 2585 * on this ill, we receive all the packets back on this ill. 2586 */ 2587 size += sizeof (ip6i_t) + router_alert_length; 2588 mp = allocb(size, BPRI_HI); 2589 if (mp == NULL) 2590 return; 2591 bzero(mp->b_rptr, size); 2592 mp->b_wptr = mp->b_rptr + size; 2593 2594 ip6i = (ip6i_t *)mp->b_rptr; 2595 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2596 ip6i->ip6i_nxt = IPPROTO_RAW; 2597 ip6i->ip6i_flags = IP6I_ATTACH_IF | IP6I_HOPLIMIT; 2598 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 2599 2600 ip6h = (ip6_t *)&ip6i[1]; 2601 ip6hbh = (struct ip6_hbh *)&ip6h[1]; 2602 ip6router = (struct ip6_opt_router *)&ip6hbh[1]; 2603 /* 2604 * A zero is a pad option of length 1. The bzero of the whole packet 2605 * above will pad between ip6router and mld. 2606 */ 2607 mldh = (mld_hdr_t *)((uint8_t *)ip6hbh + router_alert_length); 2608 2609 mldh->mld_type = type; 2610 mldh->mld_addr = ilm->ilm_v6addr; 2611 2612 ip6router->ip6or_type = IP6OPT_ROUTER_ALERT; 2613 ip6router->ip6or_len = 2; 2614 ip6router->ip6or_value[0] = 0; 2615 ip6router->ip6or_value[1] = IP6_ALERT_MLD; 2616 2617 ip6hbh->ip6h_nxt = IPPROTO_ICMPV6; 2618 ip6hbh->ip6h_len = 0; 2619 2620 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2621 ip6h->ip6_plen = htons(sizeof (*mldh) + router_alert_length); 2622 ip6h->ip6_nxt = IPPROTO_HOPOPTS; 2623 ip6h->ip6_hops = MLD_HOP_LIMIT; 2624 if (v6addr == NULL) 2625 ip6h->ip6_dst = ilm->ilm_v6addr; 2626 else 2627 ip6h->ip6_dst = *v6addr; 2628 2629 /* ipif returned by ipif_lookup_zoneid is link-local (if present) */ 2630 if (ipif_lookup_zoneid(ill, ilm->ilm_zoneid, IPIF_UP, &ipif)) { 2631 ip6h->ip6_src = ipif->ipif_v6src_addr; 2632 ipif_refrele(ipif); 2633 } else { 2634 /* Otherwise, use IPv6 default address selection. */ 2635 ip6h->ip6_src = ipv6_all_zeros; 2636 } 2637 2638 /* 2639 * Prepare for checksum by putting icmp length in the icmp 2640 * checksum field. The checksum is calculated in ip_wput_v6. 2641 */ 2642 mldh->mld_cksum = htons(sizeof (*mldh)); 2643 2644 /* 2645 * ip_wput will automatically loopback the multicast packet to 2646 * the conn if multicast loopback is enabled. 2647 * The MIB stats corresponding to this outgoing MLD packet 2648 * will be accounted for in ip_wput->ip_wput_v6->ip_wput_ire_v6 2649 * ->icmp_update_out_mib_v6 function call. 2650 */ 2651 (void) ip_output_v6(NULL, mp, ill->ill_wq, IP_WPUT); 2652 } 2653 2654 /* 2655 * Sends an MLD_V2_LISTENER_REPORT message out the passed-in ill. The 2656 * report will contain one multicast address record for each element of 2657 * reclist. If this causes packet length to exceed ill->ill_max_frag, 2658 * multiple reports are sent. reclist is assumed to be made up of 2659 * buffers allocated by mcast_bldmrec(), and those buffers are freed here. 2660 */ 2661 static void 2662 mldv2_sendrpt(ill_t *ill, mrec_t *reclist) 2663 { 2664 mblk_t *mp; 2665 mld2r_t *mld2r; 2666 mld2mar_t *mld2mar; 2667 in6_addr_t *srcarray; 2668 ip6_t *ip6h; 2669 ip6_hbh_t *ip6hbh; 2670 ip6i_t *ip6i; 2671 struct ip6_opt_router *ip6router; 2672 size_t size, optlen, padlen, icmpsize, rsize; 2673 ipif_t *ipif; 2674 int i, numrec, more_src_cnt; 2675 mrec_t *rp, *cur_reclist; 2676 mrec_t *next_reclist = reclist; 2677 boolean_t morepkts; 2678 2679 /* If there aren't any records, there's nothing to send */ 2680 if (reclist == NULL) 2681 return; 2682 2683 ASSERT(ill->ill_isv6); 2684 2685 /* 2686 * Total option length (optlen + padlen) must be a multiple of 2687 * 8 bytes. We assume here that optlen <= 8, so the total option 2688 * length will be 8. Assert this in case anything ever changes. 2689 */ 2690 optlen = sizeof (ip6_hbh_t) + sizeof (struct ip6_opt_router); 2691 ASSERT(optlen <= 8); 2692 padlen = 8 - optlen; 2693 nextpkt: 2694 icmpsize = sizeof (mld2r_t); 2695 size = IPV6_HDR_LEN + optlen + padlen + icmpsize; 2696 morepkts = B_FALSE; 2697 more_src_cnt = 0; 2698 for (rp = cur_reclist = next_reclist, numrec = 0; rp != NULL; 2699 rp = rp->mrec_next, numrec++) { 2700 rsize = sizeof (mld2mar_t) + 2701 (rp->mrec_srcs.sl_numsrc * sizeof (in6_addr_t)); 2702 if (size + rsize > ill->ill_max_frag) { 2703 if (rp == cur_reclist) { 2704 /* 2705 * If the first mrec we looked at is too big 2706 * to fit in a single packet (i.e the source 2707 * list is too big), we must either truncate 2708 * the list (if TO_EX or IS_EX), or send 2709 * multiple reports for the same group (all 2710 * other types). 2711 */ 2712 int srcspace, srcsperpkt; 2713 srcspace = ill->ill_max_frag - 2714 (size + sizeof (mld2mar_t)); 2715 srcsperpkt = srcspace / sizeof (in6_addr_t); 2716 /* 2717 * Increment icmpsize and size, because we will 2718 * be sending a record for the mrec we're 2719 * looking at now. 2720 */ 2721 rsize = sizeof (mld2mar_t) + 2722 (srcsperpkt * sizeof (in6_addr_t)); 2723 icmpsize += rsize; 2724 size += rsize; 2725 if (rp->mrec_type == MODE_IS_EXCLUDE || 2726 rp->mrec_type == CHANGE_TO_EXCLUDE) { 2727 rp->mrec_srcs.sl_numsrc = srcsperpkt; 2728 if (rp->mrec_next == NULL) { 2729 /* no more packets to send */ 2730 break; 2731 } else { 2732 /* 2733 * more packets, but we're 2734 * done with this mrec. 2735 */ 2736 next_reclist = rp->mrec_next; 2737 } 2738 } else { 2739 more_src_cnt = rp->mrec_srcs.sl_numsrc 2740 - srcsperpkt; 2741 rp->mrec_srcs.sl_numsrc = srcsperpkt; 2742 /* 2743 * We'll fix up this mrec (remove the 2744 * srcs we've already sent) before 2745 * returning to nextpkt above. 2746 */ 2747 next_reclist = rp; 2748 } 2749 } else { 2750 next_reclist = rp; 2751 } 2752 morepkts = B_TRUE; 2753 break; 2754 } 2755 icmpsize += rsize; 2756 size += rsize; 2757 } 2758 2759 /* 2760 * We need to make sure that this packet does not get load balanced. 2761 * So, we allocate an ip6i_t and set ATTACH_IF. ip_wput_v6 and 2762 * ip_newroute_ipif_v6 know how to handle such packets. 2763 * If it gets load balanced, switches supporting MLD snooping 2764 * (in the future) will send the packet that it receives for this 2765 * multicast group to the interface that we are sending on. As we have 2766 * joined the multicast group on this ill, by sending the packet out 2767 * on this ill, we receive all the packets back on this ill. 2768 */ 2769 size += sizeof (ip6i_t); 2770 mp = allocb(size, BPRI_HI); 2771 if (mp == NULL) 2772 goto free_reclist; 2773 bzero(mp->b_rptr, size); 2774 mp->b_wptr = mp->b_rptr + size; 2775 2776 ip6i = (ip6i_t *)mp->b_rptr; 2777 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2778 ip6i->ip6i_nxt = IPPROTO_RAW; 2779 ip6i->ip6i_flags = IP6I_ATTACH_IF; 2780 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 2781 2782 ip6h = (ip6_t *)&(ip6i[1]); 2783 ip6hbh = (ip6_hbh_t *)&(ip6h[1]); 2784 ip6router = (struct ip6_opt_router *)&(ip6hbh[1]); 2785 mld2r = (mld2r_t *)((uint8_t *)ip6hbh + optlen + padlen); 2786 mld2mar = (mld2mar_t *)&(mld2r[1]); 2787 2788 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2789 ip6h->ip6_plen = htons(optlen + padlen + icmpsize); 2790 ip6h->ip6_nxt = IPPROTO_HOPOPTS; 2791 ip6h->ip6_hops = MLD_HOP_LIMIT; 2792 ip6h->ip6_dst = ipv6_all_v2rtrs_mcast; 2793 /* ipif returned by ipif_lookup_zoneid is link-local (if present) */ 2794 if (ipif_lookup_zoneid(ill, ALL_ZONES, IPIF_UP, &ipif)) { 2795 ip6h->ip6_src = ipif->ipif_v6src_addr; 2796 ipif_refrele(ipif); 2797 } else { 2798 /* otherwise, use IPv6 default address selection. */ 2799 ip6h->ip6_src = ipv6_all_zeros; 2800 } 2801 2802 ip6hbh->ip6h_nxt = IPPROTO_ICMPV6; 2803 /* 2804 * ip6h_len is the number of 8-byte words, not including the first 2805 * 8 bytes; we've assumed optlen + padlen == 8 bytes; hence len = 0. 2806 */ 2807 ip6hbh->ip6h_len = 0; 2808 2809 ip6router->ip6or_type = IP6OPT_ROUTER_ALERT; 2810 ip6router->ip6or_len = 2; 2811 ip6router->ip6or_value[0] = 0; 2812 ip6router->ip6or_value[1] = IP6_ALERT_MLD; 2813 2814 mld2r->mld2r_type = MLD_V2_LISTENER_REPORT; 2815 mld2r->mld2r_nummar = htons(numrec); 2816 /* 2817 * Prepare for the checksum by putting icmp length in the icmp 2818 * checksum field. The checksum is calculated in ip_wput_v6. 2819 */ 2820 mld2r->mld2r_cksum = htons(icmpsize); 2821 2822 for (rp = cur_reclist; rp != NULL; rp = rp->mrec_next) { 2823 mld2mar->mld2mar_type = rp->mrec_type; 2824 mld2mar->mld2mar_auxlen = 0; 2825 mld2mar->mld2mar_numsrc = htons(rp->mrec_srcs.sl_numsrc); 2826 mld2mar->mld2mar_group = rp->mrec_group; 2827 srcarray = (in6_addr_t *)&(mld2mar[1]); 2828 2829 for (i = 0; i < rp->mrec_srcs.sl_numsrc; i++) 2830 srcarray[i] = rp->mrec_srcs.sl_addr[i]; 2831 2832 mld2mar = (mld2mar_t *)&(srcarray[i]); 2833 } 2834 2835 /* 2836 * ip_wput will automatically loopback the multicast packet to 2837 * the conn if multicast loopback is enabled. 2838 * The MIB stats corresponding to this outgoing MLD packet 2839 * will be accounted for in ip_wput->ip_wput_v6->ip_wput_ire_v6 2840 * ->icmp_update_out_mib_v6 function call. 2841 */ 2842 (void) ip_output_v6(NULL, mp, ill->ill_wq, IP_WPUT); 2843 2844 if (morepkts) { 2845 if (more_src_cnt > 0) { 2846 int index, mvsize; 2847 slist_t *sl = &next_reclist->mrec_srcs; 2848 index = sl->sl_numsrc; 2849 mvsize = more_src_cnt * sizeof (in6_addr_t); 2850 (void) memmove(&sl->sl_addr[0], &sl->sl_addr[index], 2851 mvsize); 2852 sl->sl_numsrc = more_src_cnt; 2853 } 2854 goto nextpkt; 2855 } 2856 2857 free_reclist: 2858 while (reclist != NULL) { 2859 rp = reclist->mrec_next; 2860 mi_free(reclist); 2861 reclist = rp; 2862 } 2863 } 2864 2865 static mrec_t * 2866 mcast_bldmrec(mcast_record_t type, in6_addr_t *grp, slist_t *srclist, 2867 mrec_t *next) 2868 { 2869 mrec_t *rp; 2870 int i; 2871 2872 if ((type == ALLOW_NEW_SOURCES || type == BLOCK_OLD_SOURCES) && 2873 SLIST_IS_EMPTY(srclist)) 2874 return (next); 2875 2876 rp = (mrec_t *)mi_alloc(sizeof (mrec_t), BPRI_HI); 2877 if (rp == NULL) 2878 return (next); 2879 2880 rp->mrec_next = next; 2881 rp->mrec_type = type; 2882 rp->mrec_auxlen = 0; 2883 rp->mrec_group = *grp; 2884 if (srclist == NULL) { 2885 rp->mrec_srcs.sl_numsrc = 0; 2886 } else { 2887 rp->mrec_srcs.sl_numsrc = srclist->sl_numsrc; 2888 for (i = 0; i < srclist->sl_numsrc; i++) 2889 rp->mrec_srcs.sl_addr[i] = srclist->sl_addr[i]; 2890 } 2891 2892 return (rp); 2893 } 2894 2895 /* 2896 * Set up initial retransmit state. If memory cannot be allocated for 2897 * the source lists, simply create as much state as is possible; memory 2898 * allocation failures are considered one type of transient error that 2899 * the retransmissions are designed to overcome (and if they aren't 2900 * transient, there are bigger problems than failing to notify the 2901 * router about multicast group membership state changes). 2902 */ 2903 static void 2904 mcast_init_rtx(ill_t *ill, rtx_state_t *rtxp, mcast_record_t rtype, 2905 slist_t *flist) 2906 { 2907 /* 2908 * There are only three possibilities for rtype: 2909 * New join, transition from INCLUDE {} to INCLUDE {flist} 2910 * => rtype is ALLOW_NEW_SOURCES 2911 * New join, transition from INCLUDE {} to EXCLUDE {flist} 2912 * => rtype is CHANGE_TO_EXCLUDE 2913 * State change that involves a filter mode change 2914 * => rtype is either CHANGE_TO_INCLUDE or CHANGE_TO_EXCLUDE 2915 */ 2916 ASSERT(rtype == CHANGE_TO_EXCLUDE || rtype == CHANGE_TO_INCLUDE || 2917 rtype == ALLOW_NEW_SOURCES); 2918 2919 rtxp->rtx_cnt = ill->ill_mcast_rv; 2920 2921 switch (rtype) { 2922 case CHANGE_TO_EXCLUDE: 2923 rtxp->rtx_fmode_cnt = ill->ill_mcast_rv; 2924 CLEAR_SLIST(rtxp->rtx_allow); 2925 COPY_SLIST(flist, rtxp->rtx_block); 2926 break; 2927 case ALLOW_NEW_SOURCES: 2928 case CHANGE_TO_INCLUDE: 2929 rtxp->rtx_fmode_cnt = 2930 rtype == ALLOW_NEW_SOURCES ? 0 : ill->ill_mcast_rv; 2931 CLEAR_SLIST(rtxp->rtx_block); 2932 COPY_SLIST(flist, rtxp->rtx_allow); 2933 break; 2934 } 2935 } 2936 2937 /* 2938 * The basic strategy here, as extrapolated from RFC 3810 section 6.1 and 2939 * RFC 3376 section 5.1, covers three cases: 2940 * * The current state change is a filter mode change 2941 * Set filter mode retransmit counter; set retransmit allow or 2942 * block list to new source list as appropriate, and clear the 2943 * retransmit list that was not set; send TO_IN or TO_EX with 2944 * new source list. 2945 * * The current state change is a source list change, but the filter 2946 * mode retransmit counter is > 0 2947 * Decrement filter mode retransmit counter; set retransmit 2948 * allow or block list to new source list as appropriate, 2949 * and clear the retransmit list that was not set; send TO_IN 2950 * or TO_EX with new source list. 2951 * * The current state change is a source list change, and the filter 2952 * mode retransmit counter is 0. 2953 * Merge existing rtx allow and block lists with new state: 2954 * rtx_allow = (new allow + rtx_allow) - new block 2955 * rtx_block = (new block + rtx_block) - new allow 2956 * Send ALLOW and BLOCK records for new retransmit lists; 2957 * decrement retransmit counter. 2958 * 2959 * As is the case for mcast_init_rtx(), memory allocation failures are 2960 * acceptable; we just create as much state as we can. 2961 */ 2962 static mrec_t * 2963 mcast_merge_rtx(ilm_t *ilm, mrec_t *mreclist, slist_t *flist) 2964 { 2965 ill_t *ill; 2966 rtx_state_t *rtxp = &ilm->ilm_rtx; 2967 mcast_record_t txtype; 2968 mrec_t *rp, *rpnext, *rtnmrec; 2969 boolean_t ovf; 2970 2971 ill = (ilm->ilm_ill == NULL ? ilm->ilm_ipif->ipif_ill : ilm->ilm_ill); 2972 2973 if (mreclist == NULL) 2974 return (mreclist); 2975 2976 /* 2977 * A filter mode change is indicated by a single mrec, which is 2978 * either TO_IN or TO_EX. In this case, we just need to set new 2979 * retransmit state as if this were an initial join. There is 2980 * no change to the mrec list. 2981 */ 2982 if (mreclist->mrec_type == CHANGE_TO_INCLUDE || 2983 mreclist->mrec_type == CHANGE_TO_EXCLUDE) { 2984 mcast_init_rtx(ill, rtxp, mreclist->mrec_type, 2985 &mreclist->mrec_srcs); 2986 return (mreclist); 2987 } 2988 2989 /* 2990 * Only the source list has changed 2991 */ 2992 rtxp->rtx_cnt = ill->ill_mcast_rv; 2993 if (rtxp->rtx_fmode_cnt > 0) { 2994 /* but we're still sending filter mode change reports */ 2995 rtxp->rtx_fmode_cnt--; 2996 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 2997 CLEAR_SLIST(rtxp->rtx_block); 2998 COPY_SLIST(flist, rtxp->rtx_allow); 2999 txtype = CHANGE_TO_INCLUDE; 3000 } else { 3001 CLEAR_SLIST(rtxp->rtx_allow); 3002 COPY_SLIST(flist, rtxp->rtx_block); 3003 txtype = CHANGE_TO_EXCLUDE; 3004 } 3005 /* overwrite first mrec with new info */ 3006 mreclist->mrec_type = txtype; 3007 l_copy(flist, &mreclist->mrec_srcs); 3008 /* then free any remaining mrecs */ 3009 for (rp = mreclist->mrec_next; rp != NULL; rp = rpnext) { 3010 rpnext = rp->mrec_next; 3011 mi_free(rp); 3012 } 3013 mreclist->mrec_next = NULL; 3014 rtnmrec = mreclist; 3015 } else { 3016 mrec_t *allow_mrec, *block_mrec; 3017 /* 3018 * Just send the source change reports; but we need to 3019 * recalculate the ALLOW and BLOCK lists based on previous 3020 * state and new changes. 3021 */ 3022 rtnmrec = mreclist; 3023 allow_mrec = block_mrec = NULL; 3024 for (rp = mreclist; rp != NULL; rp = rp->mrec_next) { 3025 ASSERT(rp->mrec_type == ALLOW_NEW_SOURCES || 3026 rp->mrec_type == BLOCK_OLD_SOURCES); 3027 if (rp->mrec_type == ALLOW_NEW_SOURCES) 3028 allow_mrec = rp; 3029 else 3030 block_mrec = rp; 3031 } 3032 /* 3033 * Perform calculations: 3034 * new_allow = mrec_allow + (rtx_allow - mrec_block) 3035 * new_block = mrec_block + (rtx_block - mrec_allow) 3036 * 3037 * Each calc requires two steps, for example: 3038 * rtx_allow = rtx_allow - mrec_block; 3039 * new_allow = mrec_allow + rtx_allow; 3040 * 3041 * Store results in mrec lists, and then copy into rtx lists. 3042 * We do it in this order in case the rtx list hasn't been 3043 * alloc'd yet; if it hasn't and our alloc fails, that's okay, 3044 * Overflows are also okay. 3045 */ 3046 if (block_mrec != NULL) { 3047 l_difference_in_a(rtxp->rtx_allow, 3048 &block_mrec->mrec_srcs); 3049 } 3050 if (allow_mrec != NULL) { 3051 l_difference_in_a(rtxp->rtx_block, 3052 &allow_mrec->mrec_srcs); 3053 l_union_in_a(&allow_mrec->mrec_srcs, rtxp->rtx_allow, 3054 &ovf); 3055 } 3056 if (block_mrec != NULL) { 3057 l_union_in_a(&block_mrec->mrec_srcs, rtxp->rtx_block, 3058 &ovf); 3059 COPY_SLIST(&block_mrec->mrec_srcs, rtxp->rtx_block); 3060 } else { 3061 rtnmrec = mcast_bldmrec(BLOCK_OLD_SOURCES, 3062 &ilm->ilm_v6addr, rtxp->rtx_block, allow_mrec); 3063 } 3064 if (allow_mrec != NULL) { 3065 COPY_SLIST(&allow_mrec->mrec_srcs, rtxp->rtx_allow); 3066 } else { 3067 rtnmrec = mcast_bldmrec(ALLOW_NEW_SOURCES, 3068 &ilm->ilm_v6addr, rtxp->rtx_allow, block_mrec); 3069 } 3070 } 3071 3072 return (rtnmrec); 3073 } 3074