1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * Internet Group Management Protocol (IGMP) routines. 31 * Multicast Listener Discovery Protocol (MLD) routines. 32 * 33 * Written by Steve Deering, Stanford, May 1988. 34 * Modified by Rosen Sharma, Stanford, Aug 1994. 35 * Modified by Bill Fenner, Xerox PARC, Feb. 1995. 36 * 37 * MULTICAST 3.5.1.1 38 */ 39 40 #include <sys/types.h> 41 #include <sys/stream.h> 42 #include <sys/stropts.h> 43 #include <sys/strlog.h> 44 #include <sys/strsun.h> 45 #include <sys/systm.h> 46 #include <sys/ddi.h> 47 #include <sys/sunddi.h> 48 #include <sys/cmn_err.h> 49 #include <sys/atomic.h> 50 #include <sys/zone.h> 51 52 #include <sys/param.h> 53 #include <sys/socket.h> 54 #include <inet/ipclassifier.h> 55 #include <net/if.h> 56 #include <net/route.h> 57 #include <netinet/in.h> 58 #include <netinet/igmp_var.h> 59 #include <netinet/ip6.h> 60 #include <netinet/icmp6.h> 61 62 #include <inet/common.h> 63 #include <inet/mi.h> 64 #include <inet/nd.h> 65 #include <inet/ip.h> 66 #include <inet/ip6.h> 67 #include <inet/ip_multi.h> 68 #include <inet/ip_listutils.h> 69 70 #include <netinet/igmp.h> 71 #include <inet/ip_if.h> 72 #include <net/pfkeyv2.h> 73 #include <inet/ipsec_info.h> 74 75 static uint_t igmp_query_in(ipha_t *ipha, igmpa_t *igmpa, ill_t *ill); 76 static uint_t igmpv3_query_in(igmp3qa_t *igmp3qa, ill_t *ill, int igmplen); 77 static uint_t mld_query_in(mld_hdr_t *mldh, ill_t *ill); 78 static uint_t mldv2_query_in(mld2q_t *mld2q, ill_t *ill, int mldlen); 79 static void igmp_sendpkt(ilm_t *ilm, uchar_t type, ipaddr_t addr); 80 static void mld_sendpkt(ilm_t *ilm, uchar_t type, const in6_addr_t *v6addr); 81 static void igmpv3_sendrpt(ipif_t *ipif, mrec_t *reclist); 82 static void mldv2_sendrpt(ill_t *ill, mrec_t *reclist); 83 static mrec_t *mcast_bldmrec(mcast_record_t type, in6_addr_t *grp, 84 slist_t *srclist, mrec_t *next); 85 static void mcast_init_rtx(ill_t *ill, rtx_state_t *rtxp, 86 mcast_record_t rtype, slist_t *flist); 87 static mrec_t *mcast_merge_rtx(ilm_t *ilm, mrec_t *rp, slist_t *flist); 88 89 90 /* 91 * Macros used to do timer len conversions. Timer values are always 92 * stored and passed to the timer functions as milliseconds; but the 93 * default values and values from the wire may not be. 94 * 95 * And yes, it's obscure, but decisecond is easier to abbreviate than 96 * "tenths of a second". 97 */ 98 #define DSEC_TO_MSEC(dsec) ((dsec) * 100) 99 #define SEC_TO_MSEC(sec) ((sec) * 1000) 100 101 /* 102 * The first multicast join will trigger the igmp timers / mld timers 103 * The unit for next is milliseconds. 104 */ 105 void 106 igmp_start_timers(unsigned next, ip_stack_t *ipst) 107 { 108 int time_left; 109 int ret; 110 111 ASSERT(next != 0 && next != INFINITY); 112 113 mutex_enter(&ipst->ips_igmp_timer_lock); 114 115 if (ipst->ips_igmp_timer_setter_active) { 116 /* 117 * Serialize timer setters, one at a time. If the 118 * timer is currently being set by someone, 119 * just record the next time when it has to be 120 * invoked and return. The current setter will 121 * take care. 122 */ 123 ipst->ips_igmp_time_to_next = 124 MIN(ipst->ips_igmp_time_to_next, next); 125 mutex_exit(&ipst->ips_igmp_timer_lock); 126 return; 127 } else { 128 ipst->ips_igmp_timer_setter_active = B_TRUE; 129 } 130 if (ipst->ips_igmp_timeout_id == 0) { 131 /* 132 * The timer is inactive. We need to start a timer 133 */ 134 ipst->ips_igmp_time_to_next = next; 135 ipst->ips_igmp_timeout_id = timeout(igmp_timeout_handler, 136 (void *)ipst, MSEC_TO_TICK(ipst->ips_igmp_time_to_next)); 137 ipst->ips_igmp_timer_setter_active = B_FALSE; 138 mutex_exit(&ipst->ips_igmp_timer_lock); 139 return; 140 } 141 142 /* 143 * The timer was scheduled sometime back for firing in 144 * 'igmp_time_to_next' ms and is active. We need to 145 * reschedule the timeout if the new 'next' will happen 146 * earlier than the currently scheduled timeout 147 */ 148 time_left = ipst->ips_igmp_timer_fired_last + 149 MSEC_TO_TICK(ipst->ips_igmp_time_to_next) - ddi_get_lbolt(); 150 if (time_left < MSEC_TO_TICK(next)) { 151 ipst->ips_igmp_timer_setter_active = B_FALSE; 152 mutex_exit(&ipst->ips_igmp_timer_lock); 153 return; 154 } 155 156 mutex_exit(&ipst->ips_igmp_timer_lock); 157 ret = untimeout(ipst->ips_igmp_timeout_id); 158 mutex_enter(&ipst->ips_igmp_timer_lock); 159 /* 160 * The timeout was cancelled, or the timeout handler 161 * completed, while we were blocked in the untimeout. 162 * No other thread could have set the timer meanwhile 163 * since we serialized all the timer setters. Thus 164 * no timer is currently active nor executing nor will 165 * any timer fire in the future. We start the timer now 166 * if needed. 167 */ 168 if (ret == -1) { 169 ASSERT(ipst->ips_igmp_timeout_id == 0); 170 } else { 171 ASSERT(ipst->ips_igmp_timeout_id != 0); 172 ipst->ips_igmp_timeout_id = 0; 173 } 174 if (ipst->ips_igmp_time_to_next != 0) { 175 ipst->ips_igmp_time_to_next = 176 MIN(ipst->ips_igmp_time_to_next, next); 177 ipst->ips_igmp_timeout_id = timeout(igmp_timeout_handler, 178 (void *)ipst, MSEC_TO_TICK(ipst->ips_igmp_time_to_next)); 179 } 180 ipst->ips_igmp_timer_setter_active = B_FALSE; 181 mutex_exit(&ipst->ips_igmp_timer_lock); 182 } 183 184 /* 185 * mld_start_timers: 186 * The unit for next is milliseconds. 187 */ 188 void 189 mld_start_timers(unsigned next, ip_stack_t *ipst) 190 { 191 int time_left; 192 int ret; 193 194 ASSERT(next != 0 && next != INFINITY); 195 196 mutex_enter(&ipst->ips_mld_timer_lock); 197 if (ipst->ips_mld_timer_setter_active) { 198 /* 199 * Serialize timer setters, one at a time. If the 200 * timer is currently being set by someone, 201 * just record the next time when it has to be 202 * invoked and return. The current setter will 203 * take care. 204 */ 205 ipst->ips_mld_time_to_next = 206 MIN(ipst->ips_mld_time_to_next, next); 207 mutex_exit(&ipst->ips_mld_timer_lock); 208 return; 209 } else { 210 ipst->ips_mld_timer_setter_active = B_TRUE; 211 } 212 if (ipst->ips_mld_timeout_id == 0) { 213 /* 214 * The timer is inactive. We need to start a timer 215 */ 216 ipst->ips_mld_time_to_next = next; 217 ipst->ips_mld_timeout_id = timeout(mld_timeout_handler, 218 (void *)ipst, MSEC_TO_TICK(ipst->ips_mld_time_to_next)); 219 ipst->ips_mld_timer_setter_active = B_FALSE; 220 mutex_exit(&ipst->ips_mld_timer_lock); 221 return; 222 } 223 224 /* 225 * The timer was scheduled sometime back for firing in 226 * 'igmp_time_to_next' ms and is active. We need to 227 * reschedule the timeout if the new 'next' will happen 228 * earlier than the currently scheduled timeout 229 */ 230 time_left = ipst->ips_mld_timer_fired_last + 231 MSEC_TO_TICK(ipst->ips_mld_time_to_next) - ddi_get_lbolt(); 232 if (time_left < MSEC_TO_TICK(next)) { 233 ipst->ips_mld_timer_setter_active = B_FALSE; 234 mutex_exit(&ipst->ips_mld_timer_lock); 235 return; 236 } 237 238 mutex_exit(&ipst->ips_mld_timer_lock); 239 ret = untimeout(ipst->ips_mld_timeout_id); 240 mutex_enter(&ipst->ips_mld_timer_lock); 241 /* 242 * The timeout was cancelled, or the timeout handler 243 * completed, while we were blocked in the untimeout. 244 * No other thread could have set the timer meanwhile 245 * since we serialized all the timer setters. Thus 246 * no timer is currently active nor executing nor will 247 * any timer fire in the future. We start the timer now 248 * if needed. 249 */ 250 if (ret == -1) { 251 ASSERT(ipst->ips_mld_timeout_id == 0); 252 } else { 253 ASSERT(ipst->ips_mld_timeout_id != 0); 254 ipst->ips_mld_timeout_id = 0; 255 } 256 if (ipst->ips_mld_time_to_next != 0) { 257 ipst->ips_mld_time_to_next = 258 MIN(ipst->ips_mld_time_to_next, next); 259 ipst->ips_mld_timeout_id = timeout(mld_timeout_handler, 260 (void *)ipst, MSEC_TO_TICK(ipst->ips_mld_time_to_next)); 261 } 262 ipst->ips_mld_timer_setter_active = B_FALSE; 263 mutex_exit(&ipst->ips_mld_timer_lock); 264 } 265 266 /* 267 * igmp_input: 268 * Return NULL for a bad packet that is discarded here. 269 * Return mp if the message is OK and should be handed to "raw" receivers. 270 * Callers of igmp_input() may need to reinitialize variables that were copied 271 * from the mblk as this calls pullupmsg(). 272 */ 273 /* ARGSUSED */ 274 mblk_t * 275 igmp_input(queue_t *q, mblk_t *mp, ill_t *ill) 276 { 277 igmpa_t *igmpa; 278 ipha_t *ipha = (ipha_t *)(mp->b_rptr); 279 int iphlen, igmplen, mblklen; 280 ilm_t *ilm; 281 uint32_t src, dst; 282 uint32_t group; 283 uint_t next; 284 ipif_t *ipif; 285 ip_stack_t *ipst; 286 287 ASSERT(ill != NULL); 288 ASSERT(!ill->ill_isv6); 289 ipst = ill->ill_ipst; 290 ++ipst->ips_igmpstat.igps_rcv_total; 291 292 mblklen = MBLKL(mp); 293 if (mblklen < 1 || mblklen < (iphlen = IPH_HDR_LENGTH(ipha))) { 294 ++ipst->ips_igmpstat.igps_rcv_tooshort; 295 goto bad_pkt; 296 } 297 igmplen = ntohs(ipha->ipha_length) - iphlen; 298 /* 299 * Since msg sizes are more variable with v3, just pullup the 300 * whole thing now. 301 */ 302 if (MBLKL(mp) < (igmplen + iphlen)) { 303 mblk_t *mp1; 304 if ((mp1 = msgpullup(mp, -1)) == NULL) { 305 ++ipst->ips_igmpstat.igps_rcv_tooshort; 306 goto bad_pkt; 307 } 308 freemsg(mp); 309 mp = mp1; 310 ipha = (ipha_t *)(mp->b_rptr); 311 } 312 313 /* 314 * Validate lengths 315 */ 316 if (igmplen < IGMP_MINLEN) { 317 ++ipst->ips_igmpstat.igps_rcv_tooshort; 318 goto bad_pkt; 319 } 320 /* 321 * Validate checksum 322 */ 323 if (IP_CSUM(mp, iphlen, 0)) { 324 ++ipst->ips_igmpstat.igps_rcv_badsum; 325 goto bad_pkt; 326 } 327 328 igmpa = (igmpa_t *)(&mp->b_rptr[iphlen]); 329 src = ipha->ipha_src; 330 dst = ipha->ipha_dst; 331 if (ip_debug > 1) 332 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 333 "igmp_input: src 0x%x, dst 0x%x on %s\n", 334 (int)ntohl(src), (int)ntohl(dst), 335 ill->ill_name); 336 337 switch (igmpa->igmpa_type) { 338 case IGMP_MEMBERSHIP_QUERY: 339 /* 340 * packet length differentiates between v1/v2 and v3 341 * v1/v2 should be exactly 8 octets long; v3 is >= 12 342 */ 343 if (igmplen == IGMP_MINLEN) { 344 next = igmp_query_in(ipha, igmpa, ill); 345 } else if (igmplen >= IGMP_V3_QUERY_MINLEN) { 346 next = igmpv3_query_in((igmp3qa_t *)igmpa, ill, 347 igmplen); 348 } else { 349 ++ipst->ips_igmpstat.igps_rcv_tooshort; 350 goto bad_pkt; 351 } 352 if (next == 0) 353 goto bad_pkt; 354 355 if (next != INFINITY) 356 igmp_start_timers(next, ipst); 357 358 break; 359 360 case IGMP_V1_MEMBERSHIP_REPORT: 361 case IGMP_V2_MEMBERSHIP_REPORT: 362 /* 363 * For fast leave to work, we have to know that we are the 364 * last person to send a report for this group. Reports 365 * generated by us are looped back since we could potentially 366 * be a multicast router, so discard reports sourced by me. 367 */ 368 mutex_enter(&ill->ill_lock); 369 for (ipif = ill->ill_ipif; ipif != NULL; 370 ipif = ipif->ipif_next) { 371 if (ipif->ipif_lcl_addr == src) { 372 if (ip_debug > 1) { 373 (void) mi_strlog(ill->ill_rq, 374 1, 375 SL_TRACE, 376 "igmp_input: we are only " 377 "member src 0x%x ipif_local 0x%x", 378 (int)ntohl(src), 379 (int) 380 ntohl(ipif->ipif_lcl_addr)); 381 } 382 mutex_exit(&ill->ill_lock); 383 return (mp); 384 } 385 } 386 mutex_exit(&ill->ill_lock); 387 388 ++ipst->ips_igmpstat.igps_rcv_reports; 389 group = igmpa->igmpa_group; 390 if (!CLASSD(group)) { 391 ++ipst->ips_igmpstat.igps_rcv_badreports; 392 goto bad_pkt; 393 } 394 395 /* 396 * KLUDGE: if the IP source address of the report has an 397 * unspecified (i.e., zero) subnet number, as is allowed for 398 * a booting host, replace it with the correct subnet number 399 * so that a process-level multicast routing demon can 400 * determine which subnet it arrived from. This is necessary 401 * to compensate for the lack of any way for a process to 402 * determine the arrival interface of an incoming packet. 403 * 404 * Requires that a copy of *this* message it passed up 405 * to the raw interface which is done by our caller. 406 */ 407 if ((src & htonl(0xFF000000U)) == 0) { /* Minimum net mask */ 408 /* Pick the first ipif on this ill */ 409 mutex_enter(&ill->ill_lock); 410 src = ill->ill_ipif->ipif_subnet; 411 mutex_exit(&ill->ill_lock); 412 ip1dbg(("igmp_input: changed src to 0x%x\n", 413 (int)ntohl(src))); 414 ipha->ipha_src = src; 415 } 416 417 /* 418 * If we belong to the group being reported, and 419 * we are a 'Delaying member' in the RFC terminology, 420 * stop our timer for that group and 'clear flag' i.e. 421 * mark as IGMP_OTHERMEMBER. Do this for all logical 422 * interfaces on the given physical interface. 423 */ 424 mutex_enter(&ill->ill_lock); 425 for (ipif = ill->ill_ipif; ipif != NULL; 426 ipif = ipif->ipif_next) { 427 ilm = ilm_lookup_ipif(ipif, group); 428 if (ilm != NULL) { 429 ++ipst->ips_igmpstat.igps_rcv_ourreports; 430 ilm->ilm_timer = INFINITY; 431 ilm->ilm_state = IGMP_OTHERMEMBER; 432 } 433 } /* for */ 434 mutex_exit(&ill->ill_lock); 435 break; 436 437 case IGMP_V3_MEMBERSHIP_REPORT: 438 /* 439 * Currently nothing to do here; IGMP router is not 440 * implemented in ip, and v3 hosts don't pay attention 441 * to membership reports. 442 */ 443 break; 444 } 445 /* 446 * Pass all valid IGMP packets up to any process(es) listening 447 * on a raw IGMP socket. Do not free the packet. 448 */ 449 return (mp); 450 451 bad_pkt: 452 freemsg(mp); 453 return (NULL); 454 } 455 456 static uint_t 457 igmp_query_in(ipha_t *ipha, igmpa_t *igmpa, ill_t *ill) 458 { 459 ilm_t *ilm; 460 int timer; 461 uint_t next; 462 ip_stack_t *ipst; 463 464 ipst = ill->ill_ipst; 465 ++ipst->ips_igmpstat.igps_rcv_queries; 466 467 /* 468 * In the IGMPv2 specification, there are 3 states and a flag. 469 * 470 * In Non-Member state, we simply don't have a membership record. 471 * In Delaying Member state, our timer is running (ilm->ilm_timer 472 * < INFINITY). In Idle Member state, our timer is not running 473 * (ilm->ilm_timer == INFINITY). 474 * 475 * The flag is ilm->ilm_state, it is set to IGMP_OTHERMEMBER if 476 * we have heard a report from another member, or IGMP_IREPORTEDLAST 477 * if I sent the last report. 478 */ 479 if (igmpa->igmpa_code == 0) { 480 /* 481 * Query from an old router. 482 * Remember that the querier on this interface is old, 483 * and set the timer to the value in RFC 1112. 484 */ 485 486 487 mutex_enter(&ill->ill_lock); 488 ill->ill_mcast_v1_time = 0; 489 ill->ill_mcast_v1_tset = 1; 490 if (ill->ill_mcast_type != IGMP_V1_ROUTER) { 491 ip1dbg(("Received IGMPv1 Query on %s, switching mode " 492 "to IGMP_V1_ROUTER\n", ill->ill_name)); 493 atomic_add_16(&ill->ill_ifptr->illif_mcast_v1, 1); 494 ill->ill_mcast_type = IGMP_V1_ROUTER; 495 } 496 mutex_exit(&ill->ill_lock); 497 498 timer = SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY); 499 500 if (ipha->ipha_dst != htonl(INADDR_ALLHOSTS_GROUP) || 501 igmpa->igmpa_group != 0) { 502 ++ipst->ips_igmpstat.igps_rcv_badqueries; 503 return (0); 504 } 505 506 } else { 507 in_addr_t group; 508 509 /* 510 * Query from a new router 511 * Simply do a validity check 512 */ 513 group = igmpa->igmpa_group; 514 if (group != 0 && (!CLASSD(group))) { 515 ++ipst->ips_igmpstat.igps_rcv_badqueries; 516 return (0); 517 } 518 519 /* 520 * Switch interface state to v2 on receipt of a v2 query 521 * ONLY IF current state is v3. Let things be if current 522 * state if v1 but do reset the v2-querier-present timer. 523 */ 524 mutex_enter(&ill->ill_lock); 525 if (ill->ill_mcast_type == IGMP_V3_ROUTER) { 526 ip1dbg(("Received IGMPv2 Query on %s, switching mode " 527 "to IGMP_V2_ROUTER", ill->ill_name)); 528 atomic_add_16(&ill->ill_ifptr->illif_mcast_v2, 1); 529 ill->ill_mcast_type = IGMP_V2_ROUTER; 530 } 531 ill->ill_mcast_v2_time = 0; 532 ill->ill_mcast_v2_tset = 1; 533 mutex_exit(&ill->ill_lock); 534 535 timer = DSEC_TO_MSEC((int)igmpa->igmpa_code); 536 } 537 538 if (ip_debug > 1) { 539 mutex_enter(&ill->ill_lock); 540 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 541 "igmp_input: TIMER = igmp_code %d igmp_type 0x%x", 542 (int)ntohs(igmpa->igmpa_code), 543 (int)ntohs(igmpa->igmpa_type)); 544 mutex_exit(&ill->ill_lock); 545 } 546 547 /* 548 * -Start the timers in all of our membership records 549 * for the physical interface on which the query 550 * arrived, excluding those that belong to the "all 551 * hosts" group (224.0.0.1). 552 * 553 * -Restart any timer that is already running but has 554 * a value longer than the requested timeout. 555 * 556 * -Use the value specified in the query message as 557 * the maximum timeout. 558 */ 559 next = (unsigned)INFINITY; 560 mutex_enter(&ill->ill_lock); 561 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 562 563 /* 564 * A multicast router joins INADDR_ANY address 565 * to enable promiscuous reception of all 566 * mcasts from the interface. This INADDR_ANY 567 * is stored in the ilm_v6addr as V6 unspec addr 568 */ 569 if (!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr)) 570 continue; 571 if (ilm->ilm_addr == htonl(INADDR_ANY)) 572 continue; 573 if (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP) && 574 (igmpa->igmpa_group == 0) || 575 (igmpa->igmpa_group == ilm->ilm_addr)) { 576 if (ilm->ilm_timer > timer) { 577 MCAST_RANDOM_DELAY(ilm->ilm_timer, timer); 578 if (ilm->ilm_timer < next) 579 next = ilm->ilm_timer; 580 } 581 } 582 } 583 mutex_exit(&ill->ill_lock); 584 585 return (next); 586 } 587 588 static uint_t 589 igmpv3_query_in(igmp3qa_t *igmp3qa, ill_t *ill, int igmplen) 590 { 591 uint_t i, next, mrd, qqi, timer, delay, numsrc; 592 ilm_t *ilm; 593 ipaddr_t *src_array; 594 uint8_t qrv; 595 ip_stack_t *ipst; 596 597 ipst = ill->ill_ipst; 598 /* make sure numsrc matches packet size */ 599 numsrc = ntohs(igmp3qa->igmp3qa_numsrc); 600 if (igmplen < IGMP_V3_QUERY_MINLEN + (numsrc * sizeof (ipaddr_t))) { 601 ++ipst->ips_igmpstat.igps_rcv_tooshort; 602 return (0); 603 } 604 src_array = (ipaddr_t *)&igmp3qa[1]; 605 606 ++ipst->ips_igmpstat.igps_rcv_queries; 607 608 if ((mrd = (uint_t)igmp3qa->igmp3qa_mxrc) >= IGMP_V3_MAXRT_FPMIN) { 609 uint_t hdrval, mant, exp; 610 hdrval = (uint_t)igmp3qa->igmp3qa_mxrc; 611 mant = hdrval & IGMP_V3_MAXRT_MANT_MASK; 612 exp = (hdrval & IGMP_V3_MAXRT_EXP_MASK) >> 4; 613 mrd = (mant | 0x10) << (exp + 3); 614 } 615 if (mrd == 0) 616 mrd = MCAST_DEF_QUERY_RESP_INTERVAL; 617 timer = DSEC_TO_MSEC(mrd); 618 MCAST_RANDOM_DELAY(delay, timer); 619 next = (unsigned)INFINITY; 620 621 if ((qrv = igmp3qa->igmp3qa_sqrv & IGMP_V3_RV_MASK) == 0) 622 ill->ill_mcast_rv = MCAST_DEF_ROBUSTNESS; 623 else 624 ill->ill_mcast_rv = qrv; 625 626 if ((qqi = (uint_t)igmp3qa->igmp3qa_qqic) >= IGMP_V3_QQI_FPMIN) { 627 uint_t hdrval, mant, exp; 628 hdrval = (uint_t)igmp3qa->igmp3qa_qqic; 629 mant = hdrval & IGMP_V3_QQI_MANT_MASK; 630 exp = (hdrval & IGMP_V3_QQI_EXP_MASK) >> 4; 631 qqi = (mant | 0x10) << (exp + 3); 632 } 633 ill->ill_mcast_qi = (qqi == 0) ? MCAST_DEF_QUERY_INTERVAL : qqi; 634 635 /* 636 * If we have a pending general query response that's scheduled 637 * sooner than the delay we calculated for this response, then 638 * no action is required (RFC3376 section 5.2 rule 1) 639 */ 640 mutex_enter(&ill->ill_lock); 641 if (ill->ill_global_timer < delay) { 642 mutex_exit(&ill->ill_lock); 643 return (next); 644 } 645 mutex_exit(&ill->ill_lock); 646 647 /* 648 * Now take action depending upon query type: 649 * general, group specific, or group/source specific. 650 */ 651 if ((numsrc == 0) && (igmp3qa->igmp3qa_group == INADDR_ANY)) { 652 /* 653 * general query 654 * We know global timer is either not running or is 655 * greater than our calculated delay, so reset it to 656 * our delay (random value in range [0, response time]). 657 */ 658 mutex_enter(&ill->ill_lock); 659 ill->ill_global_timer = delay; 660 next = ill->ill_global_timer; 661 mutex_exit(&ill->ill_lock); 662 663 } else { 664 /* group or group/source specific query */ 665 mutex_enter(&ill->ill_lock); 666 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 667 if (!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr) || 668 (ilm->ilm_addr == htonl(INADDR_ANY)) || 669 (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) || 670 (igmp3qa->igmp3qa_group != ilm->ilm_addr)) 671 continue; 672 /* 673 * If the query is group specific or we have a 674 * pending group specific query, the response is 675 * group specific (pending sources list should be 676 * empty). Otherwise, need to update the pending 677 * sources list for the group and source specific 678 * response. 679 */ 680 if (numsrc == 0 || (ilm->ilm_timer < INFINITY && 681 SLIST_IS_EMPTY(ilm->ilm_pendsrcs))) { 682 group_query: 683 FREE_SLIST(ilm->ilm_pendsrcs); 684 ilm->ilm_pendsrcs = NULL; 685 } else { 686 boolean_t overflow; 687 slist_t *pktl; 688 if (numsrc > MAX_FILTER_SIZE || 689 (ilm->ilm_pendsrcs == NULL && 690 (ilm->ilm_pendsrcs = l_alloc()) == NULL)) { 691 /* 692 * We've been sent more sources than 693 * we can deal with; or we can't deal 694 * with a source list at all. Revert 695 * to a group specific query. 696 */ 697 goto group_query; 698 } 699 if ((pktl = l_alloc()) == NULL) 700 goto group_query; 701 pktl->sl_numsrc = numsrc; 702 for (i = 0; i < numsrc; i++) 703 IN6_IPADDR_TO_V4MAPPED(src_array[i], 704 &(pktl->sl_addr[i])); 705 l_union_in_a(ilm->ilm_pendsrcs, pktl, 706 &overflow); 707 l_free(pktl); 708 if (overflow) 709 goto group_query; 710 } 711 /* choose soonest timer */ 712 ilm->ilm_timer = MIN(ilm->ilm_timer, delay); 713 if (ilm->ilm_timer < next) 714 next = ilm->ilm_timer; 715 } 716 mutex_exit(&ill->ill_lock); 717 } 718 719 return (next); 720 } 721 722 void 723 igmp_joingroup(ilm_t *ilm) 724 { 725 ill_t *ill; 726 ip_stack_t *ipst = ilm->ilm_ipst; 727 728 ill = ilm->ilm_ipif->ipif_ill; 729 730 ASSERT(IAM_WRITER_ILL(ill)); 731 ASSERT(ilm->ilm_ill == NULL && !ilm->ilm_ipif->ipif_isv6); 732 733 mutex_enter(&ill->ill_lock); 734 if (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) { 735 ilm->ilm_rtx.rtx_timer = INFINITY; 736 ilm->ilm_state = IGMP_OTHERMEMBER; 737 mutex_exit(&ill->ill_lock); 738 } else { 739 ip1dbg(("Querier mode %d, sending report, group %x\n", 740 ill->ill_mcast_type, htonl(ilm->ilm_addr))); 741 if (ill->ill_mcast_type == IGMP_V1_ROUTER) { 742 mutex_exit(&ill->ill_lock); 743 igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0); 744 mutex_enter(&ill->ill_lock); 745 } else if (ill->ill_mcast_type == IGMP_V2_ROUTER) { 746 mutex_exit(&ill->ill_lock); 747 igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0); 748 mutex_enter(&ill->ill_lock); 749 } else if (ill->ill_mcast_type == IGMP_V3_ROUTER) { 750 mrec_t *rp; 751 mcast_record_t rtype; 752 /* 753 * The possible state changes we need to handle here: 754 * Old State New State Report 755 * 756 * INCLUDE(0) INCLUDE(X) ALLOW(X),BLOCK(0) 757 * INCLUDE(0) EXCLUDE(X) TO_EX(X) 758 * 759 * No need to send the BLOCK(0) report; ALLOW(X) 760 * is enough. 761 */ 762 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ? 763 ALLOW_NEW_SOURCES : CHANGE_TO_EXCLUDE; 764 rp = mcast_bldmrec(rtype, &ilm->ilm_v6addr, 765 ilm->ilm_filter, NULL); 766 mutex_exit(&ill->ill_lock); 767 igmpv3_sendrpt(ilm->ilm_ipif, rp); 768 mutex_enter(&ill->ill_lock); 769 /* 770 * Set up retransmission state. Timer is set below, 771 * for both v3 and older versions. 772 */ 773 mcast_init_rtx(ill, &ilm->ilm_rtx, rtype, 774 ilm->ilm_filter); 775 } 776 777 /* Set the ilm timer value */ 778 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer, 779 SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY)); 780 ilm->ilm_state = IGMP_IREPORTEDLAST; 781 mutex_exit(&ill->ill_lock); 782 783 /* 784 * To avoid deadlock, we don't call igmp_start_timers from 785 * here. igmp_start_timers needs to call untimeout, and we 786 * can't hold the ipsq across untimeout since 787 * igmp_timeout_handler could be blocking trying to 788 * acquire the ipsq. Instead we start the timer after we get 789 * out of the ipsq in ipsq_exit. 790 */ 791 mutex_enter(&ipst->ips_igmp_timer_lock); 792 ipst->ips_igmp_deferred_next = MIN(ilm->ilm_rtx.rtx_timer, 793 ipst->ips_igmp_deferred_next); 794 mutex_exit(&ipst->ips_igmp_timer_lock); 795 } 796 797 if (ip_debug > 1) { 798 (void) mi_strlog(ilm->ilm_ipif->ipif_ill->ill_rq, 1, SL_TRACE, 799 "igmp_joingroup: multicast_type %d timer %d", 800 (ilm->ilm_ipif->ipif_ill->ill_mcast_type), 801 (int)ntohl(ilm->ilm_rtx.rtx_timer)); 802 } 803 } 804 805 void 806 mld_joingroup(ilm_t *ilm) 807 { 808 ill_t *ill; 809 ip_stack_t *ipst = ilm->ilm_ipst; 810 811 ill = ilm->ilm_ill; 812 813 ASSERT(IAM_WRITER_ILL(ill)); 814 ASSERT(ilm->ilm_ipif == NULL && ill->ill_isv6); 815 816 mutex_enter(&ill->ill_lock); 817 if (IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr)) { 818 ilm->ilm_rtx.rtx_timer = INFINITY; 819 ilm->ilm_state = IGMP_OTHERMEMBER; 820 mutex_exit(&ill->ill_lock); 821 } else { 822 if (ill->ill_mcast_type == MLD_V1_ROUTER) { 823 mutex_exit(&ill->ill_lock); 824 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL); 825 mutex_enter(&ill->ill_lock); 826 } else { 827 mrec_t *rp; 828 mcast_record_t rtype; 829 /* 830 * The possible state changes we need to handle here: 831 * Old State New State Report 832 * 833 * INCLUDE(0) INCLUDE(X) ALLOW(X),BLOCK(0) 834 * INCLUDE(0) EXCLUDE(X) TO_EX(X) 835 * 836 * No need to send the BLOCK(0) report; ALLOW(X) 837 * is enough 838 */ 839 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ? 840 ALLOW_NEW_SOURCES : CHANGE_TO_EXCLUDE; 841 rp = mcast_bldmrec(rtype, &ilm->ilm_v6addr, 842 ilm->ilm_filter, NULL); 843 mutex_exit(&ill->ill_lock); 844 mldv2_sendrpt(ill, rp); 845 mutex_enter(&ill->ill_lock); 846 /* 847 * Set up retransmission state. Timer is set below, 848 * for both v2 and v1. 849 */ 850 mcast_init_rtx(ill, &ilm->ilm_rtx, rtype, 851 ilm->ilm_filter); 852 } 853 854 /* Set the ilm timer value */ 855 ASSERT(ill->ill_mcast_type != MLD_V2_ROUTER || 856 ilm->ilm_rtx.rtx_cnt > 0); 857 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer, 858 SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY)); 859 ilm->ilm_state = IGMP_IREPORTEDLAST; 860 mutex_exit(&ill->ill_lock); 861 862 /* 863 * To avoid deadlock, we don't call mld_start_timers from 864 * here. mld_start_timers needs to call untimeout, and we 865 * can't hold the ipsq (i.e. the lock) across untimeout 866 * since mld_timeout_handler could be blocking trying to 867 * acquire the ipsq. Instead we start the timer after we get 868 * out of the ipsq in ipsq_exit 869 */ 870 mutex_enter(&ipst->ips_mld_timer_lock); 871 ipst->ips_mld_deferred_next = MIN(ilm->ilm_rtx.rtx_timer, 872 ipst->ips_mld_deferred_next); 873 mutex_exit(&ipst->ips_mld_timer_lock); 874 } 875 876 if (ip_debug > 1) { 877 (void) mi_strlog(ilm->ilm_ill->ill_rq, 1, SL_TRACE, 878 "mld_joingroup: multicast_type %d timer %d", 879 (ilm->ilm_ill->ill_mcast_type), 880 (int)ntohl(ilm->ilm_rtx.rtx_timer)); 881 } 882 } 883 884 void 885 igmp_leavegroup(ilm_t *ilm) 886 { 887 ill_t *ill = ilm->ilm_ipif->ipif_ill; 888 889 ASSERT(ilm->ilm_ill == NULL); 890 ASSERT(!ill->ill_isv6); 891 892 mutex_enter(&ill->ill_lock); 893 if (ilm->ilm_state == IGMP_IREPORTEDLAST && 894 ill->ill_mcast_type == IGMP_V2_ROUTER && 895 (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP))) { 896 mutex_exit(&ill->ill_lock); 897 igmp_sendpkt(ilm, IGMP_V2_LEAVE_GROUP, 898 (htonl(INADDR_ALLRTRS_GROUP))); 899 return; 900 } else if ((ill->ill_mcast_type == IGMP_V3_ROUTER) && 901 (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP))) { 902 mrec_t *rp; 903 /* 904 * The possible state changes we need to handle here: 905 * Old State New State Report 906 * 907 * INCLUDE(X) INCLUDE(0) ALLOW(0),BLOCK(X) 908 * EXCLUDE(X) INCLUDE(0) TO_IN(0) 909 * 910 * No need to send the ALLOW(0) report; BLOCK(X) is enough 911 */ 912 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 913 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr, 914 ilm->ilm_filter, NULL); 915 } else { 916 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, 917 NULL, NULL); 918 } 919 mutex_exit(&ill->ill_lock); 920 igmpv3_sendrpt(ilm->ilm_ipif, rp); 921 return; 922 } 923 mutex_exit(&ill->ill_lock); 924 } 925 926 void 927 mld_leavegroup(ilm_t *ilm) 928 { 929 ill_t *ill = ilm->ilm_ill; 930 931 ASSERT(ilm->ilm_ipif == NULL); 932 ASSERT(ill->ill_isv6); 933 934 mutex_enter(&ill->ill_lock); 935 if (ilm->ilm_state == IGMP_IREPORTEDLAST && 936 ill->ill_mcast_type == MLD_V1_ROUTER && 937 (!IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr))) { 938 mutex_exit(&ill->ill_lock); 939 mld_sendpkt(ilm, MLD_LISTENER_REDUCTION, &ipv6_all_rtrs_mcast); 940 return; 941 } else if ((ill->ill_mcast_type == MLD_V2_ROUTER) && 942 (!IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr))) { 943 mrec_t *rp; 944 /* 945 * The possible state changes we need to handle here: 946 * Old State New State Report 947 * 948 * INCLUDE(X) INCLUDE(0) ALLOW(0),BLOCK(X) 949 * EXCLUDE(X) INCLUDE(0) TO_IN(0) 950 * 951 * No need to send the ALLOW(0) report; BLOCK(X) is enough 952 */ 953 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 954 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr, 955 ilm->ilm_filter, NULL); 956 } else { 957 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, 958 NULL, NULL); 959 } 960 mutex_exit(&ill->ill_lock); 961 mldv2_sendrpt(ill, rp); 962 return; 963 } 964 mutex_exit(&ill->ill_lock); 965 } 966 967 void 968 igmp_statechange(ilm_t *ilm, mcast_record_t fmode, slist_t *flist) 969 { 970 ill_t *ill; 971 mrec_t *rp; 972 ip_stack_t *ipst = ilm->ilm_ipst; 973 974 ASSERT(ilm != NULL); 975 976 /* state change reports should only be sent if the router is v3 */ 977 if (ilm->ilm_ipif->ipif_ill->ill_mcast_type != IGMP_V3_ROUTER) 978 return; 979 980 if (ilm->ilm_ill == NULL) { 981 ASSERT(ilm->ilm_ipif != NULL); 982 ill = ilm->ilm_ipif->ipif_ill; 983 } else { 984 ill = ilm->ilm_ill; 985 } 986 987 mutex_enter(&ill->ill_lock); 988 989 /* 990 * Compare existing(old) state with the new state and prepare 991 * State Change Report, according to the rules in RFC 3376: 992 * 993 * Old State New State State Change Report 994 * 995 * INCLUDE(A) INCLUDE(B) ALLOW(B-A),BLOCK(A-B) 996 * EXCLUDE(A) EXCLUDE(B) ALLOW(A-B),BLOCK(B-A) 997 * INCLUDE(A) EXCLUDE(B) TO_EX(B) 998 * EXCLUDE(A) INCLUDE(B) TO_IN(B) 999 */ 1000 1001 if (ilm->ilm_fmode == fmode) { 1002 slist_t *a_minus_b = NULL, *b_minus_a = NULL; 1003 slist_t *allow, *block; 1004 if (((a_minus_b = l_alloc()) == NULL) || 1005 ((b_minus_a = l_alloc()) == NULL)) { 1006 l_free(a_minus_b); 1007 if (ilm->ilm_fmode == MODE_IS_INCLUDE) 1008 goto send_to_ex; 1009 else 1010 goto send_to_in; 1011 } 1012 l_difference(ilm->ilm_filter, flist, a_minus_b); 1013 l_difference(flist, ilm->ilm_filter, b_minus_a); 1014 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1015 allow = b_minus_a; 1016 block = a_minus_b; 1017 } else { 1018 allow = a_minus_b; 1019 block = b_minus_a; 1020 } 1021 rp = NULL; 1022 if (!SLIST_IS_EMPTY(allow)) 1023 rp = mcast_bldmrec(ALLOW_NEW_SOURCES, &ilm->ilm_v6addr, 1024 allow, rp); 1025 if (!SLIST_IS_EMPTY(block)) 1026 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr, 1027 block, rp); 1028 l_free(a_minus_b); 1029 l_free(b_minus_a); 1030 } else if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1031 send_to_ex: 1032 rp = mcast_bldmrec(CHANGE_TO_EXCLUDE, &ilm->ilm_v6addr, flist, 1033 NULL); 1034 } else { 1035 send_to_in: 1036 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, flist, 1037 NULL); 1038 } 1039 1040 /* 1041 * Need to set up retransmission state; merge the new info with the 1042 * current state (which may be null). If the timer is not currently 1043 * running, start it (need to do a delayed start of the timer as 1044 * we're currently in the sq). 1045 */ 1046 rp = mcast_merge_rtx(ilm, rp, flist); 1047 if (ilm->ilm_rtx.rtx_timer == INFINITY) { 1048 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer, 1049 SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY)); 1050 mutex_enter(&ipst->ips_igmp_timer_lock); 1051 ipst->ips_igmp_deferred_next = MIN(ipst->ips_igmp_deferred_next, 1052 ilm->ilm_rtx.rtx_timer); 1053 mutex_exit(&ipst->ips_igmp_timer_lock); 1054 } 1055 1056 mutex_exit(&ill->ill_lock); 1057 igmpv3_sendrpt(ilm->ilm_ipif, rp); 1058 } 1059 1060 void 1061 mld_statechange(ilm_t *ilm, mcast_record_t fmode, slist_t *flist) 1062 { 1063 ill_t *ill; 1064 mrec_t *rp = NULL; 1065 ip_stack_t *ipst = ilm->ilm_ipst; 1066 1067 ASSERT(ilm != NULL); 1068 1069 ill = ilm->ilm_ill; 1070 1071 /* only need to send if we have an mldv2-capable router */ 1072 mutex_enter(&ill->ill_lock); 1073 if (ill->ill_mcast_type != MLD_V2_ROUTER) { 1074 mutex_exit(&ill->ill_lock); 1075 return; 1076 } 1077 1078 /* 1079 * Compare existing (old) state with the new state passed in 1080 * and send appropriate MLDv2 State Change Report. 1081 * 1082 * Old State New State State Change Report 1083 * 1084 * INCLUDE(A) INCLUDE(B) ALLOW(B-A),BLOCK(A-B) 1085 * EXCLUDE(A) EXCLUDE(B) ALLOW(A-B),BLOCK(B-A) 1086 * INCLUDE(A) EXCLUDE(B) TO_EX(B) 1087 * EXCLUDE(A) INCLUDE(B) TO_IN(B) 1088 */ 1089 if (ilm->ilm_fmode == fmode) { 1090 slist_t *a_minus_b = NULL, *b_minus_a = NULL; 1091 slist_t *allow, *block; 1092 if (((a_minus_b = l_alloc()) == NULL) || 1093 ((b_minus_a = l_alloc()) == NULL)) { 1094 l_free(a_minus_b); 1095 if (ilm->ilm_fmode == MODE_IS_INCLUDE) 1096 goto send_to_ex; 1097 else 1098 goto send_to_in; 1099 } 1100 l_difference(ilm->ilm_filter, flist, a_minus_b); 1101 l_difference(flist, ilm->ilm_filter, b_minus_a); 1102 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1103 allow = b_minus_a; 1104 block = a_minus_b; 1105 } else { 1106 allow = a_minus_b; 1107 block = b_minus_a; 1108 } 1109 if (!SLIST_IS_EMPTY(allow)) 1110 rp = mcast_bldmrec(ALLOW_NEW_SOURCES, &ilm->ilm_v6addr, 1111 allow, rp); 1112 if (!SLIST_IS_EMPTY(block)) 1113 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr, 1114 block, rp); 1115 l_free(a_minus_b); 1116 l_free(b_minus_a); 1117 } else if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1118 send_to_ex: 1119 rp = mcast_bldmrec(CHANGE_TO_EXCLUDE, &ilm->ilm_v6addr, flist, 1120 NULL); 1121 } else { 1122 send_to_in: 1123 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, flist, 1124 NULL); 1125 } 1126 1127 /* 1128 * Need to set up retransmission state; merge the new info with the 1129 * current state (which may be null). If the timer is not currently 1130 * running, start it (need to do a deferred start of the timer as 1131 * we're currently in the sq). 1132 */ 1133 rp = mcast_merge_rtx(ilm, rp, flist); 1134 ASSERT(ilm->ilm_rtx.rtx_cnt > 0); 1135 if (ilm->ilm_rtx.rtx_timer == INFINITY) { 1136 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer, 1137 SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY)); 1138 mutex_enter(&ipst->ips_mld_timer_lock); 1139 ipst->ips_mld_deferred_next = 1140 MIN(ipst->ips_mld_deferred_next, ilm->ilm_rtx.rtx_timer); 1141 mutex_exit(&ipst->ips_mld_timer_lock); 1142 } 1143 1144 mutex_exit(&ill->ill_lock); 1145 mldv2_sendrpt(ill, rp); 1146 } 1147 1148 uint_t 1149 igmp_timeout_handler_per_ill(ill_t *ill, int elapsed) 1150 { 1151 uint_t next = INFINITY; 1152 ilm_t *ilm; 1153 ipif_t *ipif; 1154 mrec_t *rp = NULL; 1155 mrec_t *rtxrp = NULL; 1156 rtx_state_t *rtxp; 1157 mcast_record_t rtype; 1158 1159 ASSERT(IAM_WRITER_ILL(ill)); 1160 1161 mutex_enter(&ill->ill_lock); 1162 1163 /* First check the global timer on this interface */ 1164 if (ill->ill_global_timer == INFINITY) 1165 goto per_ilm_timer; 1166 if (ill->ill_global_timer <= elapsed) { 1167 ill->ill_global_timer = INFINITY; 1168 /* 1169 * Send report for each group on this interface. 1170 * Since we just set the global timer (received a v3 general 1171 * query), need to skip the all hosts addr (224.0.0.1), per 1172 * RFC 3376 section 5. 1173 */ 1174 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1175 if (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) 1176 continue; 1177 ASSERT(ilm->ilm_ipif != NULL); 1178 ilm->ilm_ipif->ipif_igmp_rpt = 1179 mcast_bldmrec(ilm->ilm_fmode, &ilm->ilm_v6addr, 1180 ilm->ilm_filter, ilm->ilm_ipif->ipif_igmp_rpt); 1181 /* 1182 * Since we're sending a report on this group, okay 1183 * to delete pending group-specific timers. Note 1184 * that group-specific retransmit timers still need 1185 * to be checked in the per_ilm_timer for-loop. 1186 */ 1187 ilm->ilm_timer = INFINITY; 1188 ilm->ilm_state = IGMP_IREPORTEDLAST; 1189 FREE_SLIST(ilm->ilm_pendsrcs); 1190 ilm->ilm_pendsrcs = NULL; 1191 } 1192 /* 1193 * We've built per-ipif mrec lists; walk the ill's ipif list 1194 * and send a report for each ipif that has an mrec list. 1195 */ 1196 for (ipif = ill->ill_ipif; ipif != NULL; 1197 ipif = ipif->ipif_next) { 1198 if (ipif->ipif_igmp_rpt == NULL) 1199 continue; 1200 mutex_exit(&ill->ill_lock); 1201 igmpv3_sendrpt(ipif, ipif->ipif_igmp_rpt); 1202 mutex_enter(&ill->ill_lock); 1203 /* mrec list was freed by igmpv3_sendrpt() */ 1204 ipif->ipif_igmp_rpt = NULL; 1205 } 1206 } else { 1207 ill->ill_global_timer -= elapsed; 1208 if (ill->ill_global_timer < next) 1209 next = ill->ill_global_timer; 1210 } 1211 1212 per_ilm_timer: 1213 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1214 if (ilm->ilm_timer == INFINITY) 1215 goto per_ilm_rtxtimer; 1216 1217 if (ilm->ilm_timer > elapsed) { 1218 ilm->ilm_timer -= elapsed; 1219 if (ilm->ilm_timer < next) 1220 next = ilm->ilm_timer; 1221 1222 if (ip_debug > 1) { 1223 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 1224 "igmp_timo_hlr 2: ilm_timr %d elap %d " 1225 "typ %d nxt %d", 1226 (int)ntohl(ilm->ilm_timer), elapsed, 1227 (ill->ill_mcast_type), next); 1228 } 1229 1230 goto per_ilm_rtxtimer; 1231 } 1232 1233 /* the timer has expired, need to take action */ 1234 ilm->ilm_timer = INFINITY; 1235 ilm->ilm_state = IGMP_IREPORTEDLAST; 1236 if (ill->ill_mcast_type == IGMP_V1_ROUTER) { 1237 mutex_exit(&ill->ill_lock); 1238 igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0); 1239 mutex_enter(&ill->ill_lock); 1240 } else if (ill->ill_mcast_type == IGMP_V2_ROUTER) { 1241 mutex_exit(&ill->ill_lock); 1242 igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0); 1243 mutex_enter(&ill->ill_lock); 1244 } else { 1245 slist_t *rsp; 1246 if (!SLIST_IS_EMPTY(ilm->ilm_pendsrcs) && 1247 (rsp = l_alloc()) != NULL) { 1248 /* 1249 * Contents of reply depend on pending 1250 * requested source list. 1251 */ 1252 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1253 l_intersection(ilm->ilm_filter, 1254 ilm->ilm_pendsrcs, rsp); 1255 } else { 1256 l_difference(ilm->ilm_pendsrcs, 1257 ilm->ilm_filter, rsp); 1258 } 1259 FREE_SLIST(ilm->ilm_pendsrcs); 1260 ilm->ilm_pendsrcs = NULL; 1261 if (!SLIST_IS_EMPTY(rsp)) 1262 rp = mcast_bldmrec(MODE_IS_INCLUDE, 1263 &ilm->ilm_v6addr, rsp, rp); 1264 FREE_SLIST(rsp); 1265 } else { 1266 /* 1267 * Either the pending request is just group- 1268 * specific, or we couldn't get the resources 1269 * (rsp) to build a source-specific reply. 1270 */ 1271 rp = mcast_bldmrec(ilm->ilm_fmode, 1272 &ilm->ilm_v6addr, ilm->ilm_filter, rp); 1273 } 1274 mutex_exit(&ill->ill_lock); 1275 igmpv3_sendrpt(ill->ill_ipif, rp); 1276 mutex_enter(&ill->ill_lock); 1277 rp = NULL; 1278 } 1279 1280 if (ip_debug > 1) { 1281 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 1282 "igmp_timo_hlr 1: ilm_timr %d elap %d " 1283 "typ %d nxt %d", 1284 (int)ntohl(ilm->ilm_timer), elapsed, 1285 (ill->ill_mcast_type), next); 1286 } 1287 1288 per_ilm_rtxtimer: 1289 rtxp = &ilm->ilm_rtx; 1290 1291 if (rtxp->rtx_timer == INFINITY) 1292 continue; 1293 if (rtxp->rtx_timer > elapsed) { 1294 rtxp->rtx_timer -= elapsed; 1295 if (rtxp->rtx_timer < next) 1296 next = rtxp->rtx_timer; 1297 continue; 1298 } 1299 1300 rtxp->rtx_timer = INFINITY; 1301 ilm->ilm_state = IGMP_IREPORTEDLAST; 1302 if (ill->ill_mcast_type == IGMP_V1_ROUTER) { 1303 mutex_exit(&ill->ill_lock); 1304 igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0); 1305 mutex_enter(&ill->ill_lock); 1306 continue; 1307 } else if (ill->ill_mcast_type == IGMP_V2_ROUTER) { 1308 mutex_exit(&ill->ill_lock); 1309 igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0); 1310 mutex_enter(&ill->ill_lock); 1311 continue; 1312 } 1313 1314 /* 1315 * The retransmit timer has popped, and our router is 1316 * IGMPv3. We have to delve into the retransmit state 1317 * stored in the ilm. 1318 * 1319 * Decrement the retransmit count. If the fmode rtx 1320 * count is active, decrement it, and send a filter 1321 * mode change report with the ilm's source list. 1322 * Otherwise, send a source list change report with 1323 * the current retransmit lists. 1324 */ 1325 ASSERT(rtxp->rtx_cnt > 0); 1326 ASSERT(rtxp->rtx_cnt >= rtxp->rtx_fmode_cnt); 1327 rtxp->rtx_cnt--; 1328 if (rtxp->rtx_fmode_cnt > 0) { 1329 rtxp->rtx_fmode_cnt--; 1330 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ? 1331 CHANGE_TO_INCLUDE : CHANGE_TO_EXCLUDE; 1332 rtxrp = mcast_bldmrec(rtype, &ilm->ilm_v6addr, 1333 ilm->ilm_filter, rtxrp); 1334 } else { 1335 rtxrp = mcast_bldmrec(ALLOW_NEW_SOURCES, 1336 &ilm->ilm_v6addr, rtxp->rtx_allow, rtxrp); 1337 rtxrp = mcast_bldmrec(BLOCK_OLD_SOURCES, 1338 &ilm->ilm_v6addr, rtxp->rtx_block, rtxrp); 1339 } 1340 if (rtxp->rtx_cnt > 0) { 1341 MCAST_RANDOM_DELAY(rtxp->rtx_timer, 1342 SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY)); 1343 if (rtxp->rtx_timer < next) 1344 next = rtxp->rtx_timer; 1345 } else { 1346 CLEAR_SLIST(rtxp->rtx_allow); 1347 CLEAR_SLIST(rtxp->rtx_block); 1348 } 1349 mutex_exit(&ill->ill_lock); 1350 igmpv3_sendrpt(ilm->ilm_ipif, rtxrp); 1351 mutex_enter(&ill->ill_lock); 1352 rtxrp = NULL; 1353 } 1354 1355 mutex_exit(&ill->ill_lock); 1356 1357 return (next); 1358 } 1359 1360 /* 1361 * igmp_timeout_handler: 1362 * Called when there are timeout events, every next * TMEOUT_INTERVAL (tick). 1363 * Returns number of ticks to next event (or 0 if none). 1364 * 1365 * As part of multicast join and leave igmp we may need to send out an 1366 * igmp request. The igmp related state variables in the ilm are protected 1367 * by ill_lock. A single global igmp timer is used to track igmp timeouts. 1368 * igmp_timer_lock protects the global igmp_timeout_id. igmp_start_timers 1369 * starts the igmp timer if needed. It serializes multiple threads trying to 1370 * simultaneously start the timer using the igmp_timer_setter_active flag. 1371 * 1372 * igmp_input() receives igmp queries and responds to the queries 1373 * in a delayed fashion by posting a timer i.e. it calls igmp_start_timers(). 1374 * Later the igmp_timer fires, the timeout handler igmp_timerout_handler() 1375 * performs the action exclusively after entering each ill's ipsq as writer. 1376 * The actual igmp timeout handler needs to run in the ipsq since it has to 1377 * access the ilm's and we don't want another exclusive operation like 1378 * say an IPMP failover to be simultaneously moving the ilms from one ill to 1379 * another. 1380 * 1381 * The igmp_slowtimeo() function is called thru another timer. 1382 * igmp_slowtimeout_lock protects the igmp_slowtimeout_id 1383 */ 1384 void 1385 igmp_timeout_handler(void *arg) 1386 { 1387 ill_t *ill; 1388 int elapsed; /* Since last call */ 1389 uint_t global_next = INFINITY; 1390 uint_t next; 1391 ill_walk_context_t ctx; 1392 boolean_t success; 1393 ip_stack_t *ipst = (ip_stack_t *)arg; 1394 1395 ASSERT(arg != NULL); 1396 mutex_enter(&ipst->ips_igmp_timer_lock); 1397 ASSERT(ipst->ips_igmp_timeout_id != 0); 1398 ipst->ips_igmp_timer_fired_last = ddi_get_lbolt(); 1399 elapsed = ipst->ips_igmp_time_to_next; 1400 ipst->ips_igmp_time_to_next = 0; 1401 mutex_exit(&ipst->ips_igmp_timer_lock); 1402 1403 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1404 ill = ILL_START_WALK_V4(&ctx, ipst); 1405 for (; ill != NULL; ill = ill_next(&ctx, ill)) { 1406 ASSERT(!ill->ill_isv6); 1407 /* 1408 * We may not be able to refhold the ill if the ill/ipif 1409 * is changing. But we need to make sure that the ill will 1410 * not vanish. So we just bump up the ill_waiter count. 1411 */ 1412 if (!ill_waiter_inc(ill)) 1413 continue; 1414 rw_exit(&ipst->ips_ill_g_lock); 1415 success = ipsq_enter(ill, B_TRUE); 1416 if (success) { 1417 next = igmp_timeout_handler_per_ill(ill, elapsed); 1418 if (next < global_next) 1419 global_next = next; 1420 ipsq_exit(ill->ill_phyint->phyint_ipsq, B_FALSE, 1421 B_TRUE); 1422 } 1423 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1424 ill_waiter_dcr(ill); 1425 } 1426 rw_exit(&ipst->ips_ill_g_lock); 1427 1428 mutex_enter(&ipst->ips_igmp_timer_lock); 1429 ASSERT(ipst->ips_igmp_timeout_id != 0); 1430 ipst->ips_igmp_timeout_id = 0; 1431 mutex_exit(&ipst->ips_igmp_timer_lock); 1432 1433 if (global_next != INFINITY) 1434 igmp_start_timers(global_next, ipst); 1435 } 1436 1437 /* 1438 * mld_timeout_handler: 1439 * Called when there are timeout events, every next (tick). 1440 * Returns number of ticks to next event (or 0 if none). 1441 */ 1442 /* ARGSUSED */ 1443 uint_t 1444 mld_timeout_handler_per_ill(ill_t *ill, int elapsed) 1445 { 1446 ilm_t *ilm; 1447 uint_t next = INFINITY; 1448 mrec_t *rp, *rtxrp; 1449 rtx_state_t *rtxp; 1450 mcast_record_t rtype; 1451 1452 ASSERT(IAM_WRITER_ILL(ill)); 1453 1454 mutex_enter(&ill->ill_lock); 1455 1456 /* 1457 * First check the global timer on this interface; the global timer 1458 * is not used for MLDv1, so if it's set we can assume we're v2. 1459 */ 1460 if (ill->ill_global_timer == INFINITY) 1461 goto per_ilm_timer; 1462 if (ill->ill_global_timer <= elapsed) { 1463 ill->ill_global_timer = INFINITY; 1464 /* 1465 * Send report for each group on this interface. 1466 * Since we just set the global timer (received a v2 general 1467 * query), need to skip the all hosts addr (ff02::1), per 1468 * RFC 3810 section 6. 1469 */ 1470 rp = NULL; 1471 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1472 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, 1473 &ipv6_all_hosts_mcast)) 1474 continue; 1475 rp = mcast_bldmrec(ilm->ilm_fmode, &ilm->ilm_v6addr, 1476 ilm->ilm_filter, rp); 1477 /* 1478 * Since we're sending a report on this group, okay 1479 * to delete pending group-specific timers. Note 1480 * that group-specific retransmit timers still need 1481 * to be checked in the per_ilm_timer for-loop. 1482 */ 1483 ilm->ilm_timer = INFINITY; 1484 ilm->ilm_state = IGMP_IREPORTEDLAST; 1485 FREE_SLIST(ilm->ilm_pendsrcs); 1486 ilm->ilm_pendsrcs = NULL; 1487 } 1488 mutex_exit(&ill->ill_lock); 1489 mldv2_sendrpt(ill, rp); 1490 mutex_enter(&ill->ill_lock); 1491 } else { 1492 ill->ill_global_timer -= elapsed; 1493 if (ill->ill_global_timer < next) 1494 next = ill->ill_global_timer; 1495 } 1496 1497 per_ilm_timer: 1498 rp = rtxrp = NULL; 1499 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1500 if (ilm->ilm_timer == INFINITY) 1501 goto per_ilm_rtxtimer; 1502 1503 if (ilm->ilm_timer > elapsed) { 1504 ilm->ilm_timer -= elapsed; 1505 if (ilm->ilm_timer < next) 1506 next = ilm->ilm_timer; 1507 1508 if (ip_debug > 1) { 1509 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 1510 "igmp_timo_hlr 2: ilm_timr" 1511 " %d elap %d typ %d nxt %d", 1512 (int)ntohl(ilm->ilm_timer), elapsed, 1513 (ill->ill_mcast_type), next); 1514 } 1515 1516 goto per_ilm_rtxtimer; 1517 } 1518 1519 /* the timer has expired, need to take action */ 1520 ilm->ilm_timer = INFINITY; 1521 ilm->ilm_state = IGMP_IREPORTEDLAST; 1522 if (ill->ill_mcast_type == MLD_V1_ROUTER) { 1523 mutex_exit(&ill->ill_lock); 1524 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL); 1525 mutex_enter(&ill->ill_lock); 1526 } else { 1527 slist_t *rsp; 1528 if (!SLIST_IS_EMPTY(ilm->ilm_pendsrcs) && 1529 (rsp = l_alloc()) != NULL) { 1530 /* 1531 * Contents of reply depend on pending 1532 * requested source list. 1533 */ 1534 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1535 l_intersection(ilm->ilm_filter, 1536 ilm->ilm_pendsrcs, rsp); 1537 } else { 1538 l_difference(ilm->ilm_pendsrcs, 1539 ilm->ilm_filter, rsp); 1540 } 1541 FREE_SLIST(ilm->ilm_pendsrcs); 1542 ilm->ilm_pendsrcs = NULL; 1543 if (!SLIST_IS_EMPTY(rsp)) 1544 rp = mcast_bldmrec(MODE_IS_INCLUDE, 1545 &ilm->ilm_v6addr, rsp, rp); 1546 FREE_SLIST(rsp); 1547 } else { 1548 rp = mcast_bldmrec(ilm->ilm_fmode, 1549 &ilm->ilm_v6addr, ilm->ilm_filter, rp); 1550 } 1551 } 1552 1553 if (ip_debug > 1) { 1554 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 1555 "igmp_timo_hlr 1: ilm_timr %d elap %d " 1556 "typ %d nxt %d", 1557 (int)ntohl(ilm->ilm_timer), elapsed, 1558 (ill->ill_mcast_type), next); 1559 } 1560 1561 per_ilm_rtxtimer: 1562 rtxp = &ilm->ilm_rtx; 1563 1564 if (rtxp->rtx_timer == INFINITY) 1565 continue; 1566 if (rtxp->rtx_timer > elapsed) { 1567 rtxp->rtx_timer -= elapsed; 1568 if (rtxp->rtx_timer < next) 1569 next = rtxp->rtx_timer; 1570 continue; 1571 } 1572 1573 rtxp->rtx_timer = INFINITY; 1574 ilm->ilm_state = IGMP_IREPORTEDLAST; 1575 if (ill->ill_mcast_type == MLD_V1_ROUTER) { 1576 mutex_exit(&ill->ill_lock); 1577 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL); 1578 mutex_enter(&ill->ill_lock); 1579 continue; 1580 } 1581 1582 /* 1583 * The retransmit timer has popped, and our router is 1584 * MLDv2. We have to delve into the retransmit state 1585 * stored in the ilm. 1586 * 1587 * Decrement the retransmit count. If the fmode rtx 1588 * count is active, decrement it, and send a filter 1589 * mode change report with the ilm's source list. 1590 * Otherwise, send a source list change report with 1591 * the current retransmit lists. 1592 */ 1593 ASSERT(rtxp->rtx_cnt > 0); 1594 ASSERT(rtxp->rtx_cnt >= rtxp->rtx_fmode_cnt); 1595 rtxp->rtx_cnt--; 1596 if (rtxp->rtx_fmode_cnt > 0) { 1597 rtxp->rtx_fmode_cnt--; 1598 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ? 1599 CHANGE_TO_INCLUDE : CHANGE_TO_EXCLUDE; 1600 rtxrp = mcast_bldmrec(rtype, &ilm->ilm_v6addr, 1601 ilm->ilm_filter, rtxrp); 1602 } else { 1603 rtxrp = mcast_bldmrec(ALLOW_NEW_SOURCES, 1604 &ilm->ilm_v6addr, rtxp->rtx_allow, rtxrp); 1605 rtxrp = mcast_bldmrec(BLOCK_OLD_SOURCES, 1606 &ilm->ilm_v6addr, rtxp->rtx_block, rtxrp); 1607 } 1608 if (rtxp->rtx_cnt > 0) { 1609 MCAST_RANDOM_DELAY(rtxp->rtx_timer, 1610 SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY)); 1611 if (rtxp->rtx_timer < next) 1612 next = rtxp->rtx_timer; 1613 } else { 1614 CLEAR_SLIST(rtxp->rtx_allow); 1615 CLEAR_SLIST(rtxp->rtx_block); 1616 } 1617 } 1618 1619 if (ill->ill_mcast_type == MLD_V2_ROUTER) { 1620 mutex_exit(&ill->ill_lock); 1621 mldv2_sendrpt(ill, rp); 1622 mldv2_sendrpt(ill, rtxrp); 1623 return (next); 1624 } 1625 1626 mutex_exit(&ill->ill_lock); 1627 1628 return (next); 1629 } 1630 1631 /* 1632 * mld_timeout_handler: 1633 * Called when there are timeout events, every next * TMEOUT_INTERVAL (tick). 1634 * Returns number of ticks to next event (or 0 if none). 1635 * MT issues are same as igmp_timeout_handler 1636 */ 1637 void 1638 mld_timeout_handler(void *arg) 1639 { 1640 ill_t *ill; 1641 int elapsed; /* Since last call */ 1642 uint_t global_next = INFINITY; 1643 uint_t next; 1644 ill_walk_context_t ctx; 1645 boolean_t success; 1646 ip_stack_t *ipst = (ip_stack_t *)arg; 1647 1648 ASSERT(arg != NULL); 1649 mutex_enter(&ipst->ips_mld_timer_lock); 1650 ASSERT(ipst->ips_mld_timeout_id != 0); 1651 ipst->ips_mld_timer_fired_last = ddi_get_lbolt(); 1652 elapsed = ipst->ips_mld_time_to_next; 1653 ipst->ips_mld_time_to_next = 0; 1654 mutex_exit(&ipst->ips_mld_timer_lock); 1655 1656 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1657 ill = ILL_START_WALK_V6(&ctx, ipst); 1658 for (; ill != NULL; ill = ill_next(&ctx, ill)) { 1659 ASSERT(ill->ill_isv6); 1660 /* 1661 * We may not be able to refhold the ill if the ill/ipif 1662 * is changing. But we need to make sure that the ill will 1663 * not vanish. So we just bump up the ill_waiter count. 1664 */ 1665 if (!ill_waiter_inc(ill)) 1666 continue; 1667 rw_exit(&ipst->ips_ill_g_lock); 1668 success = ipsq_enter(ill, B_TRUE); 1669 if (success) { 1670 next = mld_timeout_handler_per_ill(ill, elapsed); 1671 if (next < global_next) 1672 global_next = next; 1673 ipsq_exit(ill->ill_phyint->phyint_ipsq, B_TRUE, 1674 B_FALSE); 1675 } 1676 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1677 ill_waiter_dcr(ill); 1678 } 1679 rw_exit(&ipst->ips_ill_g_lock); 1680 1681 mutex_enter(&ipst->ips_mld_timer_lock); 1682 ASSERT(ipst->ips_mld_timeout_id != 0); 1683 ipst->ips_mld_timeout_id = 0; 1684 mutex_exit(&ipst->ips_mld_timer_lock); 1685 1686 if (global_next != INFINITY) 1687 mld_start_timers(global_next, ipst); 1688 } 1689 1690 /* 1691 * Calculate the Older Version Querier Present timeout value, in number 1692 * of slowtimo intervals, for the given ill. 1693 */ 1694 #define OVQP(ill) \ 1695 ((1000 * (((ill)->ill_mcast_rv * (ill)->ill_mcast_qi) \ 1696 + MCAST_QUERY_RESP_INTERVAL)) / MCAST_SLOWTIMO_INTERVAL) 1697 1698 /* 1699 * igmp_slowtimo: 1700 * - Resets to new router if we didnt we hear from the router 1701 * in IGMP_AGE_THRESHOLD seconds. 1702 * - Resets slowtimeout. 1703 */ 1704 void 1705 igmp_slowtimo(void *arg) 1706 { 1707 ill_t *ill; 1708 ill_if_t *ifp; 1709 avl_tree_t *avl_tree; 1710 ip_stack_t *ipst = (ip_stack_t *)arg; 1711 1712 ASSERT(arg != NULL); 1713 /* Hold the ill_g_lock so that we can safely walk the ill list */ 1714 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1715 1716 /* 1717 * The ill_if_t list is circular, hence the odd loop parameters. 1718 * 1719 * We can't use the ILL_START_WALK and ill_next() wrappers for this 1720 * walk, as we need to check the illif_mcast_* fields in the ill_if_t 1721 * structure (allowing us to skip if none of the instances have timers 1722 * running). 1723 */ 1724 for (ifp = IP_V4_ILL_G_LIST(ipst); 1725 ifp != (ill_if_t *)&IP_V4_ILL_G_LIST(ipst); 1726 ifp = ifp->illif_next) { 1727 /* 1728 * illif_mcast_v[12] are set using atomics. If an ill hears 1729 * a V1 or V2 query now and we miss seeing the count now, 1730 * we will see it the next time igmp_slowtimo is called. 1731 */ 1732 if (ifp->illif_mcast_v1 == 0 && ifp->illif_mcast_v2 == 0) 1733 continue; 1734 1735 avl_tree = &ifp->illif_avl_by_ppa; 1736 for (ill = avl_first(avl_tree); ill != NULL; 1737 ill = avl_walk(avl_tree, ill, AVL_AFTER)) { 1738 mutex_enter(&ill->ill_lock); 1739 if (ill->ill_mcast_v1_tset == 1) 1740 ill->ill_mcast_v1_time++; 1741 if (ill->ill_mcast_v2_tset == 1) 1742 ill->ill_mcast_v2_time++; 1743 if (ill->ill_mcast_type == IGMP_V1_ROUTER) { 1744 if (ill->ill_mcast_v1_time >= OVQP(ill)) { 1745 if (ill->ill_mcast_v2_tset > 0) { 1746 ip1dbg(("V1 query timer " 1747 "expired on %s; switching " 1748 "mode to IGMP_V2\n", 1749 ill->ill_name)); 1750 ill->ill_mcast_type = 1751 IGMP_V2_ROUTER; 1752 } else { 1753 ip1dbg(("V1 query timer " 1754 "expired on %s; switching " 1755 "mode to IGMP_V3\n", 1756 ill->ill_name)); 1757 ill->ill_mcast_type = 1758 IGMP_V3_ROUTER; 1759 } 1760 ill->ill_mcast_v1_time = 0; 1761 ill->ill_mcast_v1_tset = 0; 1762 atomic_add_16(&ifp->illif_mcast_v1, -1); 1763 } 1764 } 1765 if (ill->ill_mcast_type == IGMP_V2_ROUTER) { 1766 if (ill->ill_mcast_v2_time >= OVQP(ill)) { 1767 ip1dbg(("V2 query timer expired on " 1768 "%s; switching mode to IGMP_V3\n", 1769 ill->ill_name)); 1770 ill->ill_mcast_type = IGMP_V3_ROUTER; 1771 ill->ill_mcast_v2_time = 0; 1772 ill->ill_mcast_v2_tset = 0; 1773 atomic_add_16(&ifp->illif_mcast_v2, -1); 1774 } 1775 } 1776 mutex_exit(&ill->ill_lock); 1777 } 1778 1779 } 1780 rw_exit(&ipst->ips_ill_g_lock); 1781 mutex_enter(&ipst->ips_igmp_slowtimeout_lock); 1782 ipst->ips_igmp_slowtimeout_id = timeout(igmp_slowtimo, (void *)ipst, 1783 MSEC_TO_TICK(MCAST_SLOWTIMO_INTERVAL)); 1784 mutex_exit(&ipst->ips_igmp_slowtimeout_lock); 1785 } 1786 1787 /* 1788 * mld_slowtimo: 1789 * - Resets to newer version if we didn't hear from the older version router 1790 * in MLD_AGE_THRESHOLD seconds. 1791 * - Restarts slowtimeout. 1792 */ 1793 /* ARGSUSED */ 1794 void 1795 mld_slowtimo(void *arg) 1796 { 1797 ill_t *ill; 1798 ill_if_t *ifp; 1799 avl_tree_t *avl_tree; 1800 ip_stack_t *ipst = (ip_stack_t *)arg; 1801 1802 ASSERT(arg != NULL); 1803 /* See comments in igmp_slowtimo() above... */ 1804 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1805 for (ifp = IP_V6_ILL_G_LIST(ipst); 1806 ifp != (ill_if_t *)&IP_V6_ILL_G_LIST(ipst); 1807 ifp = ifp->illif_next) { 1808 if (ifp->illif_mcast_v1 == 0) 1809 continue; 1810 1811 avl_tree = &ifp->illif_avl_by_ppa; 1812 for (ill = avl_first(avl_tree); ill != NULL; 1813 ill = avl_walk(avl_tree, ill, AVL_AFTER)) { 1814 mutex_enter(&ill->ill_lock); 1815 if (ill->ill_mcast_v1_tset == 1) 1816 ill->ill_mcast_v1_time++; 1817 if (ill->ill_mcast_type == MLD_V1_ROUTER) { 1818 if (ill->ill_mcast_v1_time >= OVQP(ill)) { 1819 ip1dbg(("MLD query timer expired on" 1820 " %s; switching mode to MLD_V2\n", 1821 ill->ill_name)); 1822 ill->ill_mcast_type = MLD_V2_ROUTER; 1823 ill->ill_mcast_v1_time = 0; 1824 ill->ill_mcast_v1_tset = 0; 1825 atomic_add_16(&ifp->illif_mcast_v1, -1); 1826 } 1827 } 1828 mutex_exit(&ill->ill_lock); 1829 } 1830 } 1831 rw_exit(&ipst->ips_ill_g_lock); 1832 mutex_enter(&ipst->ips_mld_slowtimeout_lock); 1833 ipst->ips_mld_slowtimeout_id = timeout(mld_slowtimo, (void *)ipst, 1834 MSEC_TO_TICK(MCAST_SLOWTIMO_INTERVAL)); 1835 mutex_exit(&ipst->ips_mld_slowtimeout_lock); 1836 } 1837 1838 /* 1839 * igmp_sendpkt: 1840 * This will send to ip_wput like icmp_inbound. 1841 * Note that the lower ill (on which the membership is kept) is used 1842 * as an upper ill to pass in the multicast parameters. 1843 */ 1844 static void 1845 igmp_sendpkt(ilm_t *ilm, uchar_t type, ipaddr_t addr) 1846 { 1847 mblk_t *mp; 1848 igmpa_t *igmpa; 1849 uint8_t *rtralert; 1850 ipha_t *ipha; 1851 int hdrlen = sizeof (ipha_t) + RTRALERT_LEN; 1852 size_t size = hdrlen + sizeof (igmpa_t); 1853 ipif_t *ipif = ilm->ilm_ipif; 1854 ill_t *ill = ipif->ipif_ill; /* Will be the "lower" ill */ 1855 mblk_t *first_mp; 1856 ipsec_out_t *io; 1857 zoneid_t zoneid; 1858 ip_stack_t *ipst = ill->ill_ipst; 1859 1860 /* 1861 * We need to make sure this packet goes out on an ipif. If 1862 * there is some global policy match in ip_wput_ire, we need 1863 * to get to the right interface after IPSEC processing. 1864 * To make sure this multicast packet goes out on the right 1865 * interface, we attach an ipsec_out and initialize ill_index 1866 * like we did in ip_wput. To make sure that this packet does 1867 * not get forwarded on other interfaces or looped back, we 1868 * set ipsec_out_dontroute to B_TRUE and ipsec_out_multicast_loop 1869 * to B_FALSE. 1870 * 1871 * We also need to make sure that this does not get load balanced 1872 * if it hits ip_newroute_ipif. So, we initialize ipsec_out_attach_if 1873 * here. If it gets load balanced, switches supporting igmp snooping 1874 * will send the packet that it receives for this multicast group 1875 * to the interface that we are sending on. As we have joined the 1876 * multicast group on this ill, by sending the packet out on this 1877 * ill, we receive all the packets back on this ill. 1878 */ 1879 first_mp = allocb(sizeof (ipsec_info_t), BPRI_HI); 1880 if (first_mp == NULL) 1881 return; 1882 1883 first_mp->b_datap->db_type = M_CTL; 1884 first_mp->b_wptr += sizeof (ipsec_info_t); 1885 bzero(first_mp->b_rptr, sizeof (ipsec_info_t)); 1886 /* ipsec_out_secure is B_FALSE now */ 1887 io = (ipsec_out_t *)first_mp->b_rptr; 1888 io->ipsec_out_type = IPSEC_OUT; 1889 io->ipsec_out_len = sizeof (ipsec_out_t); 1890 io->ipsec_out_use_global_policy = B_TRUE; 1891 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 1892 io->ipsec_out_attach_if = B_TRUE; 1893 io->ipsec_out_multicast_loop = B_FALSE; 1894 io->ipsec_out_dontroute = B_TRUE; 1895 if ((zoneid = ilm->ilm_zoneid) == ALL_ZONES) 1896 zoneid = GLOBAL_ZONEID; 1897 io->ipsec_out_zoneid = zoneid; 1898 io->ipsec_out_ns = ipst->ips_netstack; /* No netstack_hold */ 1899 1900 mp = allocb(size, BPRI_HI); 1901 if (mp == NULL) { 1902 freemsg(first_mp); 1903 return; 1904 } 1905 mp->b_wptr = mp->b_rptr + size; 1906 first_mp->b_cont = mp; 1907 1908 ipha = (ipha_t *)mp->b_rptr; 1909 rtralert = (uint8_t *)&(ipha[1]); 1910 igmpa = (igmpa_t *)&(rtralert[RTRALERT_LEN]); 1911 igmpa->igmpa_type = type; 1912 igmpa->igmpa_code = 0; 1913 igmpa->igmpa_group = ilm->ilm_addr; 1914 igmpa->igmpa_cksum = 0; 1915 igmpa->igmpa_cksum = IP_CSUM(mp, hdrlen, 0); 1916 if (igmpa->igmpa_cksum == 0) 1917 igmpa->igmpa_cksum = 0xffff; 1918 1919 rtralert[0] = IPOPT_COPY | IPOPT_RTRALERT; 1920 rtralert[1] = RTRALERT_LEN; 1921 rtralert[2] = 0; 1922 rtralert[3] = 0; 1923 1924 ipha->ipha_version_and_hdr_length = (IP_VERSION << 4) 1925 | (IP_SIMPLE_HDR_LENGTH_IN_WORDS + RTRALERT_LEN_IN_WORDS); 1926 ipha->ipha_type_of_service = 0; 1927 ipha->ipha_length = htons(size); 1928 ipha->ipha_ident = 0; 1929 ipha->ipha_fragment_offset_and_flags = 0; 1930 ipha->ipha_ttl = IGMP_TTL; 1931 ipha->ipha_protocol = IPPROTO_IGMP; 1932 ipha->ipha_hdr_checksum = 0; 1933 ipha->ipha_dst = addr ? addr : igmpa->igmpa_group; 1934 ipha->ipha_src = ipif->ipif_src_addr; 1935 /* 1936 * Request loopback of the report if we are acting as a multicast 1937 * router, so that the process-level routing demon can hear it. 1938 */ 1939 /* 1940 * This will run multiple times for the same group if there are members 1941 * on the same group for multiple ipif's on the same ill. The 1942 * igmp_input code will suppress this due to the loopback thus we 1943 * always loopback membership report. 1944 */ 1945 ASSERT(ill->ill_rq != NULL); 1946 ip_multicast_loopback(ill->ill_rq, ill, first_mp, 0, ilm->ilm_zoneid); 1947 1948 ip_wput_multicast(ill->ill_wq, first_mp, ipif, zoneid); 1949 1950 ++ipst->ips_igmpstat.igps_snd_reports; 1951 } 1952 1953 /* 1954 * Sends an IGMP_V3_MEMBERSHIP_REPORT message out the ill associated 1955 * with the passed-in ipif. The report will contain one group record 1956 * for each element of reclist. If this causes packet length to 1957 * exceed ipif->ipif_ill->ill_max_frag, multiple reports are sent. 1958 * reclist is assumed to be made up of buffers allocated by mcast_bldmrec(), 1959 * and those buffers are freed here. 1960 */ 1961 static void 1962 igmpv3_sendrpt(ipif_t *ipif, mrec_t *reclist) 1963 { 1964 ipsec_out_t *io; 1965 igmp3ra_t *igmp3ra; 1966 grphdra_t *grphdr; 1967 mblk_t *first_mp, *mp; 1968 ipha_t *ipha; 1969 uint8_t *rtralert; 1970 ipaddr_t *src_array; 1971 int i, j, numrec, more_src_cnt; 1972 size_t hdrsize, size, rsize; 1973 ill_t *ill = ipif->ipif_ill; 1974 mrec_t *rp, *cur_reclist; 1975 mrec_t *next_reclist = reclist; 1976 boolean_t morepkts; 1977 zoneid_t zoneid; 1978 ip_stack_t *ipst = ill->ill_ipst; 1979 1980 /* if there aren't any records, there's nothing to send */ 1981 if (reclist == NULL) 1982 return; 1983 1984 hdrsize = sizeof (ipha_t) + RTRALERT_LEN; 1985 nextpkt: 1986 size = hdrsize + sizeof (igmp3ra_t); 1987 morepkts = B_FALSE; 1988 more_src_cnt = 0; 1989 cur_reclist = next_reclist; 1990 numrec = 0; 1991 for (rp = cur_reclist; rp != NULL; rp = rp->mrec_next) { 1992 rsize = sizeof (grphdra_t) + 1993 (rp->mrec_srcs.sl_numsrc * sizeof (ipaddr_t)); 1994 if (size + rsize > ill->ill_max_frag) { 1995 if (rp == cur_reclist) { 1996 /* 1997 * If the first mrec we looked at is too big 1998 * to fit in a single packet (i.e the source 1999 * list is too big), we must either truncate 2000 * the list (if TO_EX or IS_EX), or send 2001 * multiple reports for the same group (all 2002 * other types). 2003 */ 2004 int srcspace, srcsperpkt; 2005 srcspace = ill->ill_max_frag - (size + 2006 sizeof (grphdra_t)); 2007 srcsperpkt = srcspace / sizeof (ipaddr_t); 2008 /* 2009 * Increment size and numrec, because we will 2010 * be sending a record for the mrec we're 2011 * looking at now. 2012 */ 2013 size += sizeof (grphdra_t) + 2014 (srcsperpkt * sizeof (ipaddr_t)); 2015 numrec++; 2016 if (rp->mrec_type == MODE_IS_EXCLUDE || 2017 rp->mrec_type == CHANGE_TO_EXCLUDE) { 2018 rp->mrec_srcs.sl_numsrc = srcsperpkt; 2019 if (rp->mrec_next == NULL) { 2020 /* no more packets to send */ 2021 break; 2022 } else { 2023 /* 2024 * more packets, but we're 2025 * done with this mrec. 2026 */ 2027 next_reclist = rp->mrec_next; 2028 } 2029 } else { 2030 more_src_cnt = rp->mrec_srcs.sl_numsrc 2031 - srcsperpkt; 2032 rp->mrec_srcs.sl_numsrc = srcsperpkt; 2033 /* 2034 * We'll fix up this mrec (remove the 2035 * srcs we've already sent) before 2036 * returning to nextpkt above. 2037 */ 2038 next_reclist = rp; 2039 } 2040 } else { 2041 next_reclist = rp; 2042 } 2043 morepkts = B_TRUE; 2044 break; 2045 } 2046 size += rsize; 2047 numrec++; 2048 } 2049 2050 /* 2051 * See comments in igmp_sendpkt() about initializing for ipsec and 2052 * load balancing requirements. 2053 */ 2054 first_mp = allocb(sizeof (ipsec_info_t), BPRI_HI); 2055 if (first_mp == NULL) 2056 goto free_reclist; 2057 2058 first_mp->b_datap->db_type = M_CTL; 2059 first_mp->b_wptr += sizeof (ipsec_info_t); 2060 bzero(first_mp->b_rptr, sizeof (ipsec_info_t)); 2061 /* ipsec_out_secure is B_FALSE now */ 2062 io = (ipsec_out_t *)first_mp->b_rptr; 2063 io->ipsec_out_type = IPSEC_OUT; 2064 io->ipsec_out_len = sizeof (ipsec_out_t); 2065 io->ipsec_out_use_global_policy = B_TRUE; 2066 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 2067 io->ipsec_out_attach_if = B_TRUE; 2068 io->ipsec_out_multicast_loop = B_FALSE; 2069 io->ipsec_out_dontroute = B_TRUE; 2070 if ((zoneid = ipif->ipif_zoneid) == ALL_ZONES) 2071 zoneid = GLOBAL_ZONEID; 2072 io->ipsec_out_zoneid = zoneid; 2073 2074 mp = allocb(size, BPRI_HI); 2075 if (mp == NULL) { 2076 freemsg(first_mp); 2077 goto free_reclist; 2078 } 2079 bzero((char *)mp->b_rptr, size); 2080 mp->b_wptr = (uchar_t *)(mp->b_rptr + size); 2081 first_mp->b_cont = mp; 2082 2083 ipha = (ipha_t *)mp->b_rptr; 2084 rtralert = (uint8_t *)&(ipha[1]); 2085 igmp3ra = (igmp3ra_t *)&(rtralert[RTRALERT_LEN]); 2086 grphdr = (grphdra_t *)&(igmp3ra[1]); 2087 2088 rp = cur_reclist; 2089 for (i = 0; i < numrec; i++) { 2090 grphdr->grphdra_type = rp->mrec_type; 2091 grphdr->grphdra_numsrc = htons(rp->mrec_srcs.sl_numsrc); 2092 grphdr->grphdra_group = V4_PART_OF_V6(rp->mrec_group); 2093 src_array = (ipaddr_t *)&(grphdr[1]); 2094 2095 for (j = 0; j < rp->mrec_srcs.sl_numsrc; j++) 2096 src_array[j] = V4_PART_OF_V6(rp->mrec_srcs.sl_addr[j]); 2097 2098 grphdr = (grphdra_t *)&(src_array[j]); 2099 rp = rp->mrec_next; 2100 } 2101 2102 igmp3ra->igmp3ra_type = IGMP_V3_MEMBERSHIP_REPORT; 2103 igmp3ra->igmp3ra_numrec = htons(numrec); 2104 igmp3ra->igmp3ra_cksum = IP_CSUM(mp, hdrsize, 0); 2105 2106 rtralert[0] = IPOPT_COPY | IPOPT_RTRALERT; 2107 rtralert[1] = RTRALERT_LEN; 2108 rtralert[2] = 0; 2109 rtralert[3] = 0; 2110 2111 ipha->ipha_version_and_hdr_length = IP_VERSION << 4 2112 | (IP_SIMPLE_HDR_LENGTH_IN_WORDS + RTRALERT_LEN_IN_WORDS); 2113 ipha->ipha_type_of_service = IPTOS_PREC_INTERNETCONTROL; 2114 ipha->ipha_length = htons(size); 2115 ipha->ipha_ttl = IGMP_TTL; 2116 ipha->ipha_protocol = IPPROTO_IGMP; 2117 ipha->ipha_dst = htonl(INADDR_ALLRPTS_GROUP); 2118 ipha->ipha_src = ipif->ipif_src_addr; 2119 2120 /* 2121 * Request loopback of the report if we are acting as a multicast 2122 * router, so that the process-level routing daemon can hear it. 2123 * 2124 * This will run multiple times for the same group if there are 2125 * members on the same group for multiple ipifs on the same ill. 2126 * The igmp_input code will suppress this due to the loopback; 2127 * thus we always loopback membership report. 2128 */ 2129 ASSERT(ill->ill_rq != NULL); 2130 ip_multicast_loopback(ill->ill_rq, ill, mp, 0, ipif->ipif_zoneid); 2131 2132 ip_wput_multicast(ill->ill_wq, first_mp, ipif, zoneid); 2133 2134 ++ipst->ips_igmpstat.igps_snd_reports; 2135 2136 if (morepkts) { 2137 if (more_src_cnt > 0) { 2138 int index, mvsize; 2139 slist_t *sl = &next_reclist->mrec_srcs; 2140 index = sl->sl_numsrc; 2141 mvsize = more_src_cnt * sizeof (in6_addr_t); 2142 (void) memmove(&sl->sl_addr[0], &sl->sl_addr[index], 2143 mvsize); 2144 sl->sl_numsrc = more_src_cnt; 2145 } 2146 goto nextpkt; 2147 } 2148 2149 free_reclist: 2150 while (reclist != NULL) { 2151 rp = reclist->mrec_next; 2152 mi_free(reclist); 2153 reclist = rp; 2154 } 2155 } 2156 2157 /* 2158 * mld_input: 2159 */ 2160 /* ARGSUSED */ 2161 void 2162 mld_input(queue_t *q, mblk_t *mp, ill_t *ill) 2163 { 2164 ip6_t *ip6h = (ip6_t *)(mp->b_rptr); 2165 mld_hdr_t *mldh; 2166 ilm_t *ilm; 2167 ipif_t *ipif; 2168 uint16_t hdr_length, exthdr_length; 2169 in6_addr_t *v6group_ptr, *lcladdr_ptr; 2170 uint_t next; 2171 int mldlen; 2172 ip_stack_t *ipst = ill->ill_ipst; 2173 2174 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembTotal); 2175 2176 /* Make sure the src address of the packet is link-local */ 2177 if (!(IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src))) { 2178 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 2179 freemsg(mp); 2180 return; 2181 } 2182 2183 if (ip6h->ip6_hlim != 1) { 2184 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpBadHoplimit); 2185 freemsg(mp); 2186 return; 2187 } 2188 2189 /* Get to the icmp header part */ 2190 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) { 2191 hdr_length = ip_hdr_length_v6(mp, ip6h); 2192 exthdr_length = hdr_length - IPV6_HDR_LEN; 2193 } else { 2194 hdr_length = IPV6_HDR_LEN; 2195 exthdr_length = 0; 2196 } 2197 mldlen = ntohs(ip6h->ip6_plen) - exthdr_length; 2198 2199 /* An MLD packet must at least be 24 octets to be valid */ 2200 if (mldlen < MLD_MINLEN) { 2201 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 2202 freemsg(mp); 2203 return; 2204 } 2205 2206 mldh = (mld_hdr_t *)(&mp->b_rptr[hdr_length]); 2207 2208 switch (mldh->mld_type) { 2209 case MLD_LISTENER_QUERY: 2210 /* 2211 * packet length differentiates between v1 and v2. v1 2212 * query should be exactly 24 octets long; v2 is >= 28. 2213 */ 2214 if (mldlen == MLD_MINLEN) { 2215 next = mld_query_in(mldh, ill); 2216 } else if (mldlen >= MLD_V2_QUERY_MINLEN) { 2217 next = mldv2_query_in((mld2q_t *)mldh, ill, mldlen); 2218 } else { 2219 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 2220 freemsg(mp); 2221 return; 2222 } 2223 if (next == 0) { 2224 freemsg(mp); 2225 return; 2226 } 2227 2228 if (next != INFINITY) 2229 mld_start_timers(next, ipst); 2230 break; 2231 2232 case MLD_LISTENER_REPORT: { 2233 2234 ASSERT(ill->ill_ipif != NULL); 2235 /* 2236 * For fast leave to work, we have to know that we are the 2237 * last person to send a report for this group. Reports 2238 * generated by us are looped back since we could potentially 2239 * be a multicast router, so discard reports sourced by me. 2240 */ 2241 lcladdr_ptr = &(ill->ill_ipif->ipif_v6subnet); 2242 mutex_enter(&ill->ill_lock); 2243 for (ipif = ill->ill_ipif; ipif != NULL; 2244 ipif = ipif->ipif_next) { 2245 if (IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, 2246 lcladdr_ptr)) { 2247 if (ip_debug > 1) { 2248 char buf1[INET6_ADDRSTRLEN]; 2249 char buf2[INET6_ADDRSTRLEN]; 2250 2251 (void) mi_strlog(ill->ill_rq, 2252 1, 2253 SL_TRACE, 2254 "mld_input: we are only " 2255 "member src %s ipif_local %s", 2256 inet_ntop(AF_INET6, lcladdr_ptr, 2257 buf1, sizeof (buf1)), 2258 inet_ntop(AF_INET6, 2259 &ipif->ipif_v6lcl_addr, 2260 buf2, sizeof (buf2))); 2261 } 2262 mutex_exit(&ill->ill_lock); 2263 freemsg(mp); 2264 return; 2265 } 2266 } 2267 mutex_exit(&ill->ill_lock); 2268 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembResponses); 2269 2270 v6group_ptr = &mldh->mld_addr; 2271 if (!IN6_IS_ADDR_MULTICAST(v6group_ptr)) { 2272 BUMP_MIB(ill->ill_icmp6_mib, 2273 ipv6IfIcmpInGroupMembBadReports); 2274 freemsg(mp); 2275 return; 2276 } 2277 2278 2279 /* 2280 * If we belong to the group being reported, and we are a 2281 * 'Delaying member' per the RFC terminology, stop our timer 2282 * for that group and 'clear flag' i.e. mark ilm_state as 2283 * IGMP_OTHERMEMBER. With zones, there can be multiple group 2284 * membership entries for the same group address (one per zone) 2285 * so we need to walk the ill_ilm list. 2286 */ 2287 mutex_enter(&ill->ill_lock); 2288 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 2289 if (!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group_ptr)) 2290 continue; 2291 BUMP_MIB(ill->ill_icmp6_mib, 2292 ipv6IfIcmpInGroupMembOurReports); 2293 2294 ilm->ilm_timer = INFINITY; 2295 ilm->ilm_state = IGMP_OTHERMEMBER; 2296 } 2297 mutex_exit(&ill->ill_lock); 2298 break; 2299 } 2300 case MLD_LISTENER_REDUCTION: 2301 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembReductions); 2302 break; 2303 } 2304 /* 2305 * All MLD packets have already been passed up to any 2306 * process(es) listening on a ICMP6 raw socket. This 2307 * has been accomplished in ip_deliver_local_v6 prior to 2308 * this function call. It is assumed that the multicast daemon 2309 * will have a SOCK_RAW IPPROTO_ICMPV6 (and presumbly use the 2310 * ICMP6_FILTER socket option to only receive the MLD messages) 2311 * Thus we can free the MLD message block here 2312 */ 2313 freemsg(mp); 2314 } 2315 2316 /* 2317 * Handles an MLDv1 Listener Query. Returns 0 on error, or the appropriate 2318 * (non-zero, unsigned) timer value to be set on success. 2319 */ 2320 static uint_t 2321 mld_query_in(mld_hdr_t *mldh, ill_t *ill) 2322 { 2323 ilm_t *ilm; 2324 int timer; 2325 uint_t next; 2326 in6_addr_t *v6group; 2327 2328 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembQueries); 2329 2330 /* 2331 * In the MLD specification, there are 3 states and a flag. 2332 * 2333 * In Non-Listener state, we simply don't have a membership record. 2334 * In Delaying state, our timer is running (ilm->ilm_timer < INFINITY) 2335 * In Idle Member state, our timer is not running (ilm->ilm_timer == 2336 * INFINITY) 2337 * 2338 * The flag is ilm->ilm_state, it is set to IGMP_OTHERMEMBER if 2339 * we have heard a report from another member, or IGMP_IREPORTEDLAST 2340 * if I sent the last report. 2341 */ 2342 v6group = &mldh->mld_addr; 2343 if (!(IN6_IS_ADDR_UNSPECIFIED(v6group)) && 2344 ((!IN6_IS_ADDR_MULTICAST(v6group)))) { 2345 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembBadQueries); 2346 return (0); 2347 } 2348 2349 /* Need to do compatibility mode checking */ 2350 mutex_enter(&ill->ill_lock); 2351 ill->ill_mcast_v1_time = 0; 2352 ill->ill_mcast_v1_tset = 1; 2353 if (ill->ill_mcast_type == MLD_V2_ROUTER) { 2354 ip1dbg(("Received MLDv1 Query on %s, switching mode to " 2355 "MLD_V1_ROUTER\n", ill->ill_name)); 2356 atomic_add_16(&ill->ill_ifptr->illif_mcast_v1, 1); 2357 ill->ill_mcast_type = MLD_V1_ROUTER; 2358 } 2359 mutex_exit(&ill->ill_lock); 2360 2361 timer = (int)ntohs(mldh->mld_maxdelay); 2362 if (ip_debug > 1) { 2363 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 2364 "mld_input: TIMER = mld_maxdelay %d mld_type 0x%x", 2365 timer, (int)mldh->mld_type); 2366 } 2367 2368 /* 2369 * -Start the timers in all of our membership records for 2370 * the physical interface on which the query arrived, 2371 * excl: 2372 * 1. those that belong to the "all hosts" group, 2373 * 2. those with 0 scope, or 1 node-local scope. 2374 * 2375 * -Restart any timer that is already running but has a value 2376 * longer that the requested timeout. 2377 * -Use the value specified in the query message as the 2378 * maximum timeout. 2379 */ 2380 next = INFINITY; 2381 mutex_enter(&ill->ill_lock); 2382 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 2383 ASSERT(!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr)); 2384 2385 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr) || 2386 IN6_IS_ADDR_MC_NODELOCAL(&ilm->ilm_v6addr) || 2387 IN6_IS_ADDR_MC_RESERVED(&ilm->ilm_v6addr)) 2388 continue; 2389 if ((!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, 2390 &ipv6_all_hosts_mcast)) && 2391 (IN6_IS_ADDR_UNSPECIFIED(v6group)) || 2392 (IN6_ARE_ADDR_EQUAL(v6group, &ilm->ilm_v6addr))) { 2393 if (timer == 0) { 2394 /* Respond immediately */ 2395 ilm->ilm_timer = INFINITY; 2396 ilm->ilm_state = IGMP_IREPORTEDLAST; 2397 mutex_exit(&ill->ill_lock); 2398 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL); 2399 mutex_enter(&ill->ill_lock); 2400 break; 2401 } 2402 if (ilm->ilm_timer > timer) { 2403 MCAST_RANDOM_DELAY(ilm->ilm_timer, timer); 2404 if (ilm->ilm_timer < next) 2405 next = ilm->ilm_timer; 2406 } 2407 break; 2408 } 2409 } 2410 mutex_exit(&ill->ill_lock); 2411 2412 return (next); 2413 } 2414 2415 /* 2416 * Handles an MLDv2 Listener Query. On error, returns 0; on success, 2417 * returns the appropriate (non-zero, unsigned) timer value (which may 2418 * be INFINITY) to be set. 2419 */ 2420 static uint_t 2421 mldv2_query_in(mld2q_t *mld2q, ill_t *ill, int mldlen) 2422 { 2423 ilm_t *ilm; 2424 in6_addr_t *v6group, *src_array; 2425 uint_t next, numsrc, i, mrd, delay, qqi; 2426 uint8_t qrv; 2427 2428 v6group = &mld2q->mld2q_addr; 2429 numsrc = ntohs(mld2q->mld2q_numsrc); 2430 2431 /* make sure numsrc matches packet size */ 2432 if (mldlen < MLD_V2_QUERY_MINLEN + (numsrc * sizeof (in6_addr_t))) { 2433 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 2434 return (0); 2435 } 2436 src_array = (in6_addr_t *)&mld2q[1]; 2437 2438 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembQueries); 2439 2440 /* extract Maximum Response Delay from code in header */ 2441 mrd = ntohs(mld2q->mld2q_mxrc); 2442 if (mrd >= MLD_V2_MAXRT_FPMIN) { 2443 uint_t hdrval, mant, exp; 2444 hdrval = mrd; 2445 mant = hdrval & MLD_V2_MAXRT_MANT_MASK; 2446 exp = (hdrval & MLD_V2_MAXRT_EXP_MASK) >> 12; 2447 mrd = (mant | 0x1000) << (exp + 3); 2448 } 2449 MCAST_RANDOM_DELAY(delay, mrd); 2450 next = (unsigned)INFINITY; 2451 2452 if ((qrv = mld2q->mld2q_sqrv & MLD_V2_RV_MASK) == 0) 2453 ill->ill_mcast_rv = MCAST_DEF_ROBUSTNESS; 2454 else 2455 ill->ill_mcast_rv = qrv; 2456 2457 if ((qqi = (uint_t)mld2q->mld2q_qqic) >= MLD_V2_QQI_FPMIN) { 2458 uint_t mant, exp; 2459 mant = qqi & MLD_V2_QQI_MANT_MASK; 2460 exp = (qqi & MLD_V2_QQI_EXP_MASK) >> 12; 2461 qqi = (mant | 0x10) << (exp + 3); 2462 } 2463 ill->ill_mcast_qi = (qqi == 0) ? MCAST_DEF_QUERY_INTERVAL : qqi; 2464 2465 /* 2466 * If we have a pending general query response that's scheduled 2467 * sooner than the delay we calculated for this response, then 2468 * no action is required (MLDv2 draft section 6.2 rule 1) 2469 */ 2470 mutex_enter(&ill->ill_lock); 2471 if (ill->ill_global_timer < delay) { 2472 mutex_exit(&ill->ill_lock); 2473 return (next); 2474 } 2475 mutex_exit(&ill->ill_lock); 2476 2477 /* 2478 * Now take action depending on query type: general, 2479 * group specific, or group/source specific. 2480 */ 2481 if ((numsrc == 0) && IN6_IS_ADDR_UNSPECIFIED(v6group)) { 2482 /* 2483 * general query 2484 * We know global timer is either not running or is 2485 * greater than our calculated delay, so reset it to 2486 * our delay (random value in range [0, response time]) 2487 */ 2488 mutex_enter(&ill->ill_lock); 2489 ill->ill_global_timer = delay; 2490 next = ill->ill_global_timer; 2491 mutex_exit(&ill->ill_lock); 2492 2493 } else { 2494 /* group or group/source specific query */ 2495 mutex_enter(&ill->ill_lock); 2496 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 2497 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr) || 2498 IN6_IS_ADDR_MC_NODELOCAL(&ilm->ilm_v6addr) || 2499 IN6_IS_ADDR_MC_RESERVED(&ilm->ilm_v6addr) || 2500 !IN6_ARE_ADDR_EQUAL(v6group, &ilm->ilm_v6addr)) 2501 continue; 2502 2503 /* 2504 * If the query is group specific or we have a 2505 * pending group specific query, the response is 2506 * group specific (pending sources list should be 2507 * empty). Otherwise, need to update the pending 2508 * sources list for the group and source specific 2509 * response. 2510 */ 2511 if (numsrc == 0 || (ilm->ilm_timer < INFINITY && 2512 SLIST_IS_EMPTY(ilm->ilm_pendsrcs))) { 2513 group_query: 2514 FREE_SLIST(ilm->ilm_pendsrcs); 2515 ilm->ilm_pendsrcs = NULL; 2516 } else { 2517 boolean_t overflow; 2518 slist_t *pktl; 2519 if (numsrc > MAX_FILTER_SIZE || 2520 (ilm->ilm_pendsrcs == NULL && 2521 (ilm->ilm_pendsrcs = l_alloc()) == NULL)) { 2522 /* 2523 * We've been sent more sources than 2524 * we can deal with; or we can't deal 2525 * with a source list at all. Revert 2526 * to a group specific query. 2527 */ 2528 goto group_query; 2529 } 2530 if ((pktl = l_alloc()) == NULL) 2531 goto group_query; 2532 pktl->sl_numsrc = numsrc; 2533 for (i = 0; i < numsrc; i++) 2534 pktl->sl_addr[i] = src_array[i]; 2535 l_union_in_a(ilm->ilm_pendsrcs, pktl, 2536 &overflow); 2537 l_free(pktl); 2538 if (overflow) 2539 goto group_query; 2540 } 2541 /* set timer to soonest value */ 2542 ilm->ilm_timer = MIN(ilm->ilm_timer, delay); 2543 if (ilm->ilm_timer < next) 2544 next = ilm->ilm_timer; 2545 break; 2546 } 2547 mutex_exit(&ill->ill_lock); 2548 } 2549 2550 return (next); 2551 } 2552 2553 /* 2554 * Send MLDv1 response packet with hoplimit 1 2555 */ 2556 static void 2557 mld_sendpkt(ilm_t *ilm, uchar_t type, const in6_addr_t *v6addr) 2558 { 2559 mblk_t *mp; 2560 mld_hdr_t *mldh; 2561 ip6_t *ip6h; 2562 ip6_hbh_t *ip6hbh; 2563 struct ip6_opt_router *ip6router; 2564 size_t size = IPV6_HDR_LEN + sizeof (mld_hdr_t); 2565 ill_t *ill = ilm->ilm_ill; /* Will be the "lower" ill */ 2566 ipif_t *ipif; 2567 ip6i_t *ip6i; 2568 2569 /* 2570 * We need to place a router alert option in this packet. The length 2571 * of the options must be a multiple of 8. The hbh option header is 2 2572 * bytes followed by the 4 byte router alert option. That leaves 2573 * 2 bytes of pad for a total of 8 bytes. 2574 */ 2575 const int router_alert_length = 8; 2576 2577 ASSERT(ill->ill_isv6); 2578 2579 /* 2580 * We need to make sure that this packet does not get load balanced. 2581 * So, we allocate an ip6i_t and set ATTACH_IF. ip_wput_v6 and 2582 * ip_newroute_ipif_v6 knows how to handle such packets. 2583 * If it gets load balanced, switches supporting MLD snooping 2584 * (in the future) will send the packet that it receives for this 2585 * multicast group to the interface that we are sending on. As we have 2586 * joined the multicast group on this ill, by sending the packet out 2587 * on this ill, we receive all the packets back on this ill. 2588 */ 2589 size += sizeof (ip6i_t) + router_alert_length; 2590 mp = allocb(size, BPRI_HI); 2591 if (mp == NULL) 2592 return; 2593 bzero(mp->b_rptr, size); 2594 mp->b_wptr = mp->b_rptr + size; 2595 2596 ip6i = (ip6i_t *)mp->b_rptr; 2597 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2598 ip6i->ip6i_nxt = IPPROTO_RAW; 2599 ip6i->ip6i_flags = IP6I_ATTACH_IF | IP6I_HOPLIMIT; 2600 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 2601 2602 ip6h = (ip6_t *)&ip6i[1]; 2603 ip6hbh = (struct ip6_hbh *)&ip6h[1]; 2604 ip6router = (struct ip6_opt_router *)&ip6hbh[1]; 2605 /* 2606 * A zero is a pad option of length 1. The bzero of the whole packet 2607 * above will pad between ip6router and mld. 2608 */ 2609 mldh = (mld_hdr_t *)((uint8_t *)ip6hbh + router_alert_length); 2610 2611 mldh->mld_type = type; 2612 mldh->mld_addr = ilm->ilm_v6addr; 2613 2614 ip6router->ip6or_type = IP6OPT_ROUTER_ALERT; 2615 ip6router->ip6or_len = 2; 2616 ip6router->ip6or_value[0] = 0; 2617 ip6router->ip6or_value[1] = IP6_ALERT_MLD; 2618 2619 ip6hbh->ip6h_nxt = IPPROTO_ICMPV6; 2620 ip6hbh->ip6h_len = 0; 2621 2622 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2623 ip6h->ip6_plen = htons(sizeof (*mldh) + router_alert_length); 2624 ip6h->ip6_nxt = IPPROTO_HOPOPTS; 2625 ip6h->ip6_hops = MLD_HOP_LIMIT; 2626 if (v6addr == NULL) 2627 ip6h->ip6_dst = ilm->ilm_v6addr; 2628 else 2629 ip6h->ip6_dst = *v6addr; 2630 2631 /* ipif returned by ipif_lookup_zoneid is link-local (if present) */ 2632 if (ipif_lookup_zoneid(ill, ilm->ilm_zoneid, IPIF_UP, &ipif)) { 2633 ip6h->ip6_src = ipif->ipif_v6src_addr; 2634 ipif_refrele(ipif); 2635 } else { 2636 /* Otherwise, use IPv6 default address selection. */ 2637 ip6h->ip6_src = ipv6_all_zeros; 2638 } 2639 2640 /* 2641 * Prepare for checksum by putting icmp length in the icmp 2642 * checksum field. The checksum is calculated in ip_wput_v6. 2643 */ 2644 mldh->mld_cksum = htons(sizeof (*mldh)); 2645 2646 /* 2647 * ip_wput will automatically loopback the multicast packet to 2648 * the conn if multicast loopback is enabled. 2649 * The MIB stats corresponding to this outgoing MLD packet 2650 * will be accounted for in ip_wput->ip_wput_v6->ip_wput_ire_v6 2651 * ->icmp_update_out_mib_v6 function call. 2652 */ 2653 (void) ip_output_v6(NULL, mp, ill->ill_wq, IP_WPUT); 2654 } 2655 2656 /* 2657 * Sends an MLD_V2_LISTENER_REPORT message out the passed-in ill. The 2658 * report will contain one multicast address record for each element of 2659 * reclist. If this causes packet length to exceed ill->ill_max_frag, 2660 * multiple reports are sent. reclist is assumed to be made up of 2661 * buffers allocated by mcast_bldmrec(), and those buffers are freed here. 2662 */ 2663 static void 2664 mldv2_sendrpt(ill_t *ill, mrec_t *reclist) 2665 { 2666 mblk_t *mp; 2667 mld2r_t *mld2r; 2668 mld2mar_t *mld2mar; 2669 in6_addr_t *srcarray; 2670 ip6_t *ip6h; 2671 ip6_hbh_t *ip6hbh; 2672 ip6i_t *ip6i; 2673 struct ip6_opt_router *ip6router; 2674 size_t size, optlen, padlen, icmpsize, rsize; 2675 ipif_t *ipif; 2676 int i, numrec, more_src_cnt; 2677 mrec_t *rp, *cur_reclist; 2678 mrec_t *next_reclist = reclist; 2679 boolean_t morepkts; 2680 2681 /* If there aren't any records, there's nothing to send */ 2682 if (reclist == NULL) 2683 return; 2684 2685 ASSERT(ill->ill_isv6); 2686 2687 /* 2688 * Total option length (optlen + padlen) must be a multiple of 2689 * 8 bytes. We assume here that optlen <= 8, so the total option 2690 * length will be 8. Assert this in case anything ever changes. 2691 */ 2692 optlen = sizeof (ip6_hbh_t) + sizeof (struct ip6_opt_router); 2693 ASSERT(optlen <= 8); 2694 padlen = 8 - optlen; 2695 nextpkt: 2696 icmpsize = sizeof (mld2r_t); 2697 size = IPV6_HDR_LEN + optlen + padlen + icmpsize; 2698 morepkts = B_FALSE; 2699 more_src_cnt = 0; 2700 for (rp = cur_reclist = next_reclist, numrec = 0; rp != NULL; 2701 rp = rp->mrec_next, numrec++) { 2702 rsize = sizeof (mld2mar_t) + 2703 (rp->mrec_srcs.sl_numsrc * sizeof (in6_addr_t)); 2704 if (size + rsize > ill->ill_max_frag) { 2705 if (rp == cur_reclist) { 2706 /* 2707 * If the first mrec we looked at is too big 2708 * to fit in a single packet (i.e the source 2709 * list is too big), we must either truncate 2710 * the list (if TO_EX or IS_EX), or send 2711 * multiple reports for the same group (all 2712 * other types). 2713 */ 2714 int srcspace, srcsperpkt; 2715 srcspace = ill->ill_max_frag - 2716 (size + sizeof (mld2mar_t)); 2717 srcsperpkt = srcspace / sizeof (in6_addr_t); 2718 /* 2719 * Increment icmpsize and size, because we will 2720 * be sending a record for the mrec we're 2721 * looking at now. 2722 */ 2723 rsize = sizeof (mld2mar_t) + 2724 (srcsperpkt * sizeof (in6_addr_t)); 2725 icmpsize += rsize; 2726 size += rsize; 2727 if (rp->mrec_type == MODE_IS_EXCLUDE || 2728 rp->mrec_type == CHANGE_TO_EXCLUDE) { 2729 rp->mrec_srcs.sl_numsrc = srcsperpkt; 2730 if (rp->mrec_next == NULL) { 2731 /* no more packets to send */ 2732 break; 2733 } else { 2734 /* 2735 * more packets, but we're 2736 * done with this mrec. 2737 */ 2738 next_reclist = rp->mrec_next; 2739 } 2740 } else { 2741 more_src_cnt = rp->mrec_srcs.sl_numsrc 2742 - srcsperpkt; 2743 rp->mrec_srcs.sl_numsrc = srcsperpkt; 2744 /* 2745 * We'll fix up this mrec (remove the 2746 * srcs we've already sent) before 2747 * returning to nextpkt above. 2748 */ 2749 next_reclist = rp; 2750 } 2751 } else { 2752 next_reclist = rp; 2753 } 2754 morepkts = B_TRUE; 2755 break; 2756 } 2757 icmpsize += rsize; 2758 size += rsize; 2759 } 2760 2761 /* 2762 * We need to make sure that this packet does not get load balanced. 2763 * So, we allocate an ip6i_t and set ATTACH_IF. ip_wput_v6 and 2764 * ip_newroute_ipif_v6 know how to handle such packets. 2765 * If it gets load balanced, switches supporting MLD snooping 2766 * (in the future) will send the packet that it receives for this 2767 * multicast group to the interface that we are sending on. As we have 2768 * joined the multicast group on this ill, by sending the packet out 2769 * on this ill, we receive all the packets back on this ill. 2770 */ 2771 size += sizeof (ip6i_t); 2772 mp = allocb(size, BPRI_HI); 2773 if (mp == NULL) 2774 goto free_reclist; 2775 bzero(mp->b_rptr, size); 2776 mp->b_wptr = mp->b_rptr + size; 2777 2778 ip6i = (ip6i_t *)mp->b_rptr; 2779 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2780 ip6i->ip6i_nxt = IPPROTO_RAW; 2781 ip6i->ip6i_flags = IP6I_ATTACH_IF; 2782 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 2783 2784 ip6h = (ip6_t *)&(ip6i[1]); 2785 ip6hbh = (ip6_hbh_t *)&(ip6h[1]); 2786 ip6router = (struct ip6_opt_router *)&(ip6hbh[1]); 2787 mld2r = (mld2r_t *)((uint8_t *)ip6hbh + optlen + padlen); 2788 mld2mar = (mld2mar_t *)&(mld2r[1]); 2789 2790 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2791 ip6h->ip6_plen = htons(optlen + padlen + icmpsize); 2792 ip6h->ip6_nxt = IPPROTO_HOPOPTS; 2793 ip6h->ip6_hops = MLD_HOP_LIMIT; 2794 ip6h->ip6_dst = ipv6_all_v2rtrs_mcast; 2795 /* ipif returned by ipif_lookup_zoneid is link-local (if present) */ 2796 if (ipif_lookup_zoneid(ill, ALL_ZONES, IPIF_UP, &ipif)) { 2797 ip6h->ip6_src = ipif->ipif_v6src_addr; 2798 ipif_refrele(ipif); 2799 } else { 2800 /* otherwise, use IPv6 default address selection. */ 2801 ip6h->ip6_src = ipv6_all_zeros; 2802 } 2803 2804 ip6hbh->ip6h_nxt = IPPROTO_ICMPV6; 2805 /* 2806 * ip6h_len is the number of 8-byte words, not including the first 2807 * 8 bytes; we've assumed optlen + padlen == 8 bytes; hence len = 0. 2808 */ 2809 ip6hbh->ip6h_len = 0; 2810 2811 ip6router->ip6or_type = IP6OPT_ROUTER_ALERT; 2812 ip6router->ip6or_len = 2; 2813 ip6router->ip6or_value[0] = 0; 2814 ip6router->ip6or_value[1] = IP6_ALERT_MLD; 2815 2816 mld2r->mld2r_type = MLD_V2_LISTENER_REPORT; 2817 mld2r->mld2r_nummar = htons(numrec); 2818 /* 2819 * Prepare for the checksum by putting icmp length in the icmp 2820 * checksum field. The checksum is calculated in ip_wput_v6. 2821 */ 2822 mld2r->mld2r_cksum = htons(icmpsize); 2823 2824 for (rp = cur_reclist; rp != NULL; rp = rp->mrec_next) { 2825 mld2mar->mld2mar_type = rp->mrec_type; 2826 mld2mar->mld2mar_auxlen = 0; 2827 mld2mar->mld2mar_numsrc = htons(rp->mrec_srcs.sl_numsrc); 2828 mld2mar->mld2mar_group = rp->mrec_group; 2829 srcarray = (in6_addr_t *)&(mld2mar[1]); 2830 2831 for (i = 0; i < rp->mrec_srcs.sl_numsrc; i++) 2832 srcarray[i] = rp->mrec_srcs.sl_addr[i]; 2833 2834 mld2mar = (mld2mar_t *)&(srcarray[i]); 2835 } 2836 2837 /* 2838 * ip_wput will automatically loopback the multicast packet to 2839 * the conn if multicast loopback is enabled. 2840 * The MIB stats corresponding to this outgoing MLD packet 2841 * will be accounted for in ip_wput->ip_wput_v6->ip_wput_ire_v6 2842 * ->icmp_update_out_mib_v6 function call. 2843 */ 2844 (void) ip_output_v6(NULL, mp, ill->ill_wq, IP_WPUT); 2845 2846 if (morepkts) { 2847 if (more_src_cnt > 0) { 2848 int index, mvsize; 2849 slist_t *sl = &next_reclist->mrec_srcs; 2850 index = sl->sl_numsrc; 2851 mvsize = more_src_cnt * sizeof (in6_addr_t); 2852 (void) memmove(&sl->sl_addr[0], &sl->sl_addr[index], 2853 mvsize); 2854 sl->sl_numsrc = more_src_cnt; 2855 } 2856 goto nextpkt; 2857 } 2858 2859 free_reclist: 2860 while (reclist != NULL) { 2861 rp = reclist->mrec_next; 2862 mi_free(reclist); 2863 reclist = rp; 2864 } 2865 } 2866 2867 static mrec_t * 2868 mcast_bldmrec(mcast_record_t type, in6_addr_t *grp, slist_t *srclist, 2869 mrec_t *next) 2870 { 2871 mrec_t *rp; 2872 int i; 2873 2874 if ((type == ALLOW_NEW_SOURCES || type == BLOCK_OLD_SOURCES) && 2875 SLIST_IS_EMPTY(srclist)) 2876 return (next); 2877 2878 rp = (mrec_t *)mi_alloc(sizeof (mrec_t), BPRI_HI); 2879 if (rp == NULL) 2880 return (next); 2881 2882 rp->mrec_next = next; 2883 rp->mrec_type = type; 2884 rp->mrec_auxlen = 0; 2885 rp->mrec_group = *grp; 2886 if (srclist == NULL) { 2887 rp->mrec_srcs.sl_numsrc = 0; 2888 } else { 2889 rp->mrec_srcs.sl_numsrc = srclist->sl_numsrc; 2890 for (i = 0; i < srclist->sl_numsrc; i++) 2891 rp->mrec_srcs.sl_addr[i] = srclist->sl_addr[i]; 2892 } 2893 2894 return (rp); 2895 } 2896 2897 /* 2898 * Set up initial retransmit state. If memory cannot be allocated for 2899 * the source lists, simply create as much state as is possible; memory 2900 * allocation failures are considered one type of transient error that 2901 * the retransmissions are designed to overcome (and if they aren't 2902 * transient, there are bigger problems than failing to notify the 2903 * router about multicast group membership state changes). 2904 */ 2905 static void 2906 mcast_init_rtx(ill_t *ill, rtx_state_t *rtxp, mcast_record_t rtype, 2907 slist_t *flist) 2908 { 2909 /* 2910 * There are only three possibilities for rtype: 2911 * New join, transition from INCLUDE {} to INCLUDE {flist} 2912 * => rtype is ALLOW_NEW_SOURCES 2913 * New join, transition from INCLUDE {} to EXCLUDE {flist} 2914 * => rtype is CHANGE_TO_EXCLUDE 2915 * State change that involves a filter mode change 2916 * => rtype is either CHANGE_TO_INCLUDE or CHANGE_TO_EXCLUDE 2917 */ 2918 ASSERT(rtype == CHANGE_TO_EXCLUDE || rtype == CHANGE_TO_INCLUDE || 2919 rtype == ALLOW_NEW_SOURCES); 2920 2921 rtxp->rtx_cnt = ill->ill_mcast_rv; 2922 2923 switch (rtype) { 2924 case CHANGE_TO_EXCLUDE: 2925 rtxp->rtx_fmode_cnt = ill->ill_mcast_rv; 2926 CLEAR_SLIST(rtxp->rtx_allow); 2927 COPY_SLIST(flist, rtxp->rtx_block); 2928 break; 2929 case ALLOW_NEW_SOURCES: 2930 case CHANGE_TO_INCLUDE: 2931 rtxp->rtx_fmode_cnt = 2932 rtype == ALLOW_NEW_SOURCES ? 0 : ill->ill_mcast_rv; 2933 CLEAR_SLIST(rtxp->rtx_block); 2934 COPY_SLIST(flist, rtxp->rtx_allow); 2935 break; 2936 } 2937 } 2938 2939 /* 2940 * The basic strategy here, as extrapolated from RFC 3810 section 6.1 and 2941 * RFC 3376 section 5.1, covers three cases: 2942 * * The current state change is a filter mode change 2943 * Set filter mode retransmit counter; set retransmit allow or 2944 * block list to new source list as appropriate, and clear the 2945 * retransmit list that was not set; send TO_IN or TO_EX with 2946 * new source list. 2947 * * The current state change is a source list change, but the filter 2948 * mode retransmit counter is > 0 2949 * Decrement filter mode retransmit counter; set retransmit 2950 * allow or block list to new source list as appropriate, 2951 * and clear the retransmit list that was not set; send TO_IN 2952 * or TO_EX with new source list. 2953 * * The current state change is a source list change, and the filter 2954 * mode retransmit counter is 0. 2955 * Merge existing rtx allow and block lists with new state: 2956 * rtx_allow = (new allow + rtx_allow) - new block 2957 * rtx_block = (new block + rtx_block) - new allow 2958 * Send ALLOW and BLOCK records for new retransmit lists; 2959 * decrement retransmit counter. 2960 * 2961 * As is the case for mcast_init_rtx(), memory allocation failures are 2962 * acceptable; we just create as much state as we can. 2963 */ 2964 static mrec_t * 2965 mcast_merge_rtx(ilm_t *ilm, mrec_t *mreclist, slist_t *flist) 2966 { 2967 ill_t *ill; 2968 rtx_state_t *rtxp = &ilm->ilm_rtx; 2969 mcast_record_t txtype; 2970 mrec_t *rp, *rpnext, *rtnmrec; 2971 boolean_t ovf; 2972 2973 ill = (ilm->ilm_ill == NULL ? ilm->ilm_ipif->ipif_ill : ilm->ilm_ill); 2974 2975 if (mreclist == NULL) 2976 return (mreclist); 2977 2978 /* 2979 * A filter mode change is indicated by a single mrec, which is 2980 * either TO_IN or TO_EX. In this case, we just need to set new 2981 * retransmit state as if this were an initial join. There is 2982 * no change to the mrec list. 2983 */ 2984 if (mreclist->mrec_type == CHANGE_TO_INCLUDE || 2985 mreclist->mrec_type == CHANGE_TO_EXCLUDE) { 2986 mcast_init_rtx(ill, rtxp, mreclist->mrec_type, 2987 &mreclist->mrec_srcs); 2988 return (mreclist); 2989 } 2990 2991 /* 2992 * Only the source list has changed 2993 */ 2994 rtxp->rtx_cnt = ill->ill_mcast_rv; 2995 if (rtxp->rtx_fmode_cnt > 0) { 2996 /* but we're still sending filter mode change reports */ 2997 rtxp->rtx_fmode_cnt--; 2998 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 2999 CLEAR_SLIST(rtxp->rtx_block); 3000 COPY_SLIST(flist, rtxp->rtx_allow); 3001 txtype = CHANGE_TO_INCLUDE; 3002 } else { 3003 CLEAR_SLIST(rtxp->rtx_allow); 3004 COPY_SLIST(flist, rtxp->rtx_block); 3005 txtype = CHANGE_TO_EXCLUDE; 3006 } 3007 /* overwrite first mrec with new info */ 3008 mreclist->mrec_type = txtype; 3009 l_copy(flist, &mreclist->mrec_srcs); 3010 /* then free any remaining mrecs */ 3011 for (rp = mreclist->mrec_next; rp != NULL; rp = rpnext) { 3012 rpnext = rp->mrec_next; 3013 mi_free(rp); 3014 } 3015 mreclist->mrec_next = NULL; 3016 rtnmrec = mreclist; 3017 } else { 3018 mrec_t *allow_mrec, *block_mrec; 3019 /* 3020 * Just send the source change reports; but we need to 3021 * recalculate the ALLOW and BLOCK lists based on previous 3022 * state and new changes. 3023 */ 3024 rtnmrec = mreclist; 3025 allow_mrec = block_mrec = NULL; 3026 for (rp = mreclist; rp != NULL; rp = rp->mrec_next) { 3027 ASSERT(rp->mrec_type == ALLOW_NEW_SOURCES || 3028 rp->mrec_type == BLOCK_OLD_SOURCES); 3029 if (rp->mrec_type == ALLOW_NEW_SOURCES) 3030 allow_mrec = rp; 3031 else 3032 block_mrec = rp; 3033 } 3034 /* 3035 * Perform calculations: 3036 * new_allow = mrec_allow + (rtx_allow - mrec_block) 3037 * new_block = mrec_block + (rtx_block - mrec_allow) 3038 * 3039 * Each calc requires two steps, for example: 3040 * rtx_allow = rtx_allow - mrec_block; 3041 * new_allow = mrec_allow + rtx_allow; 3042 * 3043 * Store results in mrec lists, and then copy into rtx lists. 3044 * We do it in this order in case the rtx list hasn't been 3045 * alloc'd yet; if it hasn't and our alloc fails, that's okay, 3046 * Overflows are also okay. 3047 */ 3048 if (block_mrec != NULL) { 3049 l_difference_in_a(rtxp->rtx_allow, 3050 &block_mrec->mrec_srcs); 3051 } 3052 if (allow_mrec != NULL) { 3053 l_difference_in_a(rtxp->rtx_block, 3054 &allow_mrec->mrec_srcs); 3055 l_union_in_a(&allow_mrec->mrec_srcs, rtxp->rtx_allow, 3056 &ovf); 3057 } 3058 if (block_mrec != NULL) { 3059 l_union_in_a(&block_mrec->mrec_srcs, rtxp->rtx_block, 3060 &ovf); 3061 COPY_SLIST(&block_mrec->mrec_srcs, rtxp->rtx_block); 3062 } else { 3063 rtnmrec = mcast_bldmrec(BLOCK_OLD_SOURCES, 3064 &ilm->ilm_v6addr, rtxp->rtx_block, allow_mrec); 3065 } 3066 if (allow_mrec != NULL) { 3067 COPY_SLIST(&allow_mrec->mrec_srcs, rtxp->rtx_allow); 3068 } else { 3069 rtnmrec = mcast_bldmrec(ALLOW_NEW_SOURCES, 3070 &ilm->ilm_v6addr, rtxp->rtx_allow, block_mrec); 3071 } 3072 } 3073 3074 return (rtnmrec); 3075 } 3076