1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 /* 28 * Internet Group Management Protocol (IGMP) routines. 29 * Multicast Listener Discovery Protocol (MLD) routines. 30 * 31 * Written by Steve Deering, Stanford, May 1988. 32 * Modified by Rosen Sharma, Stanford, Aug 1994. 33 * Modified by Bill Fenner, Xerox PARC, Feb. 1995. 34 * 35 * MULTICAST 3.5.1.1 36 */ 37 38 #include <sys/types.h> 39 #include <sys/stream.h> 40 #include <sys/stropts.h> 41 #include <sys/strlog.h> 42 #include <sys/strsun.h> 43 #include <sys/systm.h> 44 #include <sys/ddi.h> 45 #include <sys/sunddi.h> 46 #include <sys/cmn_err.h> 47 #include <sys/atomic.h> 48 #include <sys/zone.h> 49 #include <sys/callb.h> 50 #include <sys/param.h> 51 #include <sys/socket.h> 52 #include <inet/ipclassifier.h> 53 #include <net/if.h> 54 #include <net/route.h> 55 #include <netinet/in.h> 56 #include <netinet/igmp_var.h> 57 #include <netinet/ip6.h> 58 #include <netinet/icmp6.h> 59 60 #include <inet/common.h> 61 #include <inet/mi.h> 62 #include <inet/nd.h> 63 #include <inet/ip.h> 64 #include <inet/ip6.h> 65 #include <inet/ip_multi.h> 66 #include <inet/ip_listutils.h> 67 68 #include <netinet/igmp.h> 69 #include <inet/ip_if.h> 70 #include <net/pfkeyv2.h> 71 #include <inet/ipsec_info.h> 72 73 static uint_t igmp_query_in(ipha_t *ipha, igmpa_t *igmpa, ill_t *ill); 74 static uint_t igmpv3_query_in(igmp3qa_t *igmp3qa, ill_t *ill, int igmplen); 75 static uint_t mld_query_in(mld_hdr_t *mldh, ill_t *ill); 76 static uint_t mldv2_query_in(mld2q_t *mld2q, ill_t *ill, int mldlen); 77 static void igmp_sendpkt(ilm_t *ilm, uchar_t type, ipaddr_t addr); 78 static void mld_sendpkt(ilm_t *ilm, uchar_t type, const in6_addr_t *v6addr); 79 static void igmpv3_sendrpt(ipif_t *ipif, mrec_t *reclist); 80 static void mldv2_sendrpt(ill_t *ill, mrec_t *reclist); 81 static mrec_t *mcast_bldmrec(mcast_record_t type, in6_addr_t *grp, 82 slist_t *srclist, mrec_t *next); 83 static void mcast_init_rtx(ill_t *ill, rtx_state_t *rtxp, 84 mcast_record_t rtype, slist_t *flist); 85 static mrec_t *mcast_merge_rtx(ilm_t *ilm, mrec_t *rp, slist_t *flist); 86 static void mcast_signal_restart_thread(ip_stack_t *ipst); 87 88 /* 89 * Macros used to do timer len conversions. Timer values are always 90 * stored and passed to the timer functions as milliseconds; but the 91 * default values and values from the wire may not be. 92 * 93 * And yes, it's obscure, but decisecond is easier to abbreviate than 94 * "tenths of a second". 95 */ 96 #define DSEC_TO_MSEC(dsec) ((dsec) * 100) 97 #define SEC_TO_MSEC(sec) ((sec) * 1000) 98 99 /* 100 * A running timer (scheduled thru timeout) can be cancelled if another 101 * timer with a shorter timeout value is scheduled before it has timed 102 * out. When the shorter timer expires, the original timer is updated 103 * to account for the time elapsed while the shorter timer ran; but this 104 * does not take into account the amount of time already spent in timeout 105 * state before being preempted by the shorter timer, that is the time 106 * interval between time scheduled to time cancelled. This can cause 107 * delays in sending out multicast membership reports. To resolve this 108 * problem, wallclock time (absolute time) is used instead of deltas 109 * (relative time) to track timers. 110 * 111 * The MACRO below gets the lbolt value, used for proper timer scheduling 112 * and firing. Therefore multicast membership reports are sent on time. 113 * The timer does not exactly fire at the time it was scehduled to fire, 114 * there is a difference of a few milliseconds observed. An offset is used 115 * to take care of the difference. 116 */ 117 118 #define CURRENT_MSTIME ((uint_t)TICK_TO_MSEC(ddi_get_lbolt())) 119 #define CURRENT_OFFSET (999) 120 121 /* 122 * The first multicast join will trigger the igmp timers / mld timers 123 * The unit for next is milliseconds. 124 */ 125 static void 126 igmp_start_timers(unsigned next, ip_stack_t *ipst) 127 { 128 int time_left; 129 int ret; 130 131 ASSERT(next != 0 && next != INFINITY); 132 133 mutex_enter(&ipst->ips_igmp_timer_lock); 134 135 if (ipst->ips_igmp_timer_setter_active) { 136 /* 137 * Serialize timer setters, one at a time. If the 138 * timer is currently being set by someone, 139 * just record the next time when it has to be 140 * invoked and return. The current setter will 141 * take care. 142 */ 143 ipst->ips_igmp_time_to_next = 144 MIN(ipst->ips_igmp_time_to_next, next); 145 mutex_exit(&ipst->ips_igmp_timer_lock); 146 return; 147 } else { 148 ipst->ips_igmp_timer_setter_active = B_TRUE; 149 } 150 if (ipst->ips_igmp_timeout_id == 0) { 151 /* 152 * The timer is inactive. We need to start a timer 153 */ 154 ipst->ips_igmp_time_to_next = next; 155 ipst->ips_igmp_timeout_id = timeout(igmp_timeout_handler, 156 (void *)ipst, MSEC_TO_TICK(ipst->ips_igmp_time_to_next)); 157 ipst->ips_igmp_timer_scheduled_last = ddi_get_lbolt(); 158 ipst->ips_igmp_timer_setter_active = B_FALSE; 159 mutex_exit(&ipst->ips_igmp_timer_lock); 160 return; 161 } 162 163 /* 164 * The timer was scheduled sometime back for firing in 165 * 'igmp_time_to_next' ms and is active. We need to 166 * reschedule the timeout if the new 'next' will happen 167 * earlier than the currently scheduled timeout 168 */ 169 time_left = ipst->ips_igmp_timer_scheduled_last + 170 MSEC_TO_TICK(ipst->ips_igmp_time_to_next) - ddi_get_lbolt(); 171 if (time_left < MSEC_TO_TICK(next)) { 172 ipst->ips_igmp_timer_setter_active = B_FALSE; 173 mutex_exit(&ipst->ips_igmp_timer_lock); 174 return; 175 } 176 177 mutex_exit(&ipst->ips_igmp_timer_lock); 178 ret = untimeout(ipst->ips_igmp_timeout_id); 179 mutex_enter(&ipst->ips_igmp_timer_lock); 180 /* 181 * The timeout was cancelled, or the timeout handler 182 * completed, while we were blocked in the untimeout. 183 * No other thread could have set the timer meanwhile 184 * since we serialized all the timer setters. Thus 185 * no timer is currently active nor executing nor will 186 * any timer fire in the future. We start the timer now 187 * if needed. 188 */ 189 if (ret == -1) { 190 ASSERT(ipst->ips_igmp_timeout_id == 0); 191 } else { 192 ASSERT(ipst->ips_igmp_timeout_id != 0); 193 ipst->ips_igmp_timeout_id = 0; 194 } 195 if (ipst->ips_igmp_time_to_next != 0) { 196 ipst->ips_igmp_time_to_next = 197 MIN(ipst->ips_igmp_time_to_next, next); 198 ipst->ips_igmp_timeout_id = timeout(igmp_timeout_handler, 199 (void *)ipst, MSEC_TO_TICK(ipst->ips_igmp_time_to_next)); 200 ipst->ips_igmp_timer_scheduled_last = ddi_get_lbolt(); 201 } 202 ipst->ips_igmp_timer_setter_active = B_FALSE; 203 mutex_exit(&ipst->ips_igmp_timer_lock); 204 } 205 206 /* 207 * mld_start_timers: 208 * The unit for next is milliseconds. 209 */ 210 static void 211 mld_start_timers(unsigned next, ip_stack_t *ipst) 212 { 213 int time_left; 214 int ret; 215 216 ASSERT(next != 0 && next != INFINITY); 217 218 mutex_enter(&ipst->ips_mld_timer_lock); 219 if (ipst->ips_mld_timer_setter_active) { 220 /* 221 * Serialize timer setters, one at a time. If the 222 * timer is currently being set by someone, 223 * just record the next time when it has to be 224 * invoked and return. The current setter will 225 * take care. 226 */ 227 ipst->ips_mld_time_to_next = 228 MIN(ipst->ips_mld_time_to_next, next); 229 mutex_exit(&ipst->ips_mld_timer_lock); 230 return; 231 } else { 232 ipst->ips_mld_timer_setter_active = B_TRUE; 233 } 234 if (ipst->ips_mld_timeout_id == 0) { 235 /* 236 * The timer is inactive. We need to start a timer 237 */ 238 ipst->ips_mld_time_to_next = next; 239 ipst->ips_mld_timeout_id = timeout(mld_timeout_handler, 240 (void *)ipst, MSEC_TO_TICK(ipst->ips_mld_time_to_next)); 241 ipst->ips_mld_timer_scheduled_last = ddi_get_lbolt(); 242 ipst->ips_mld_timer_setter_active = B_FALSE; 243 mutex_exit(&ipst->ips_mld_timer_lock); 244 return; 245 } 246 247 /* 248 * The timer was scheduled sometime back for firing in 249 * 'igmp_time_to_next' ms and is active. We need to 250 * reschedule the timeout if the new 'next' will happen 251 * earlier than the currently scheduled timeout 252 */ 253 time_left = ipst->ips_mld_timer_scheduled_last + 254 MSEC_TO_TICK(ipst->ips_mld_time_to_next) - ddi_get_lbolt(); 255 if (time_left < MSEC_TO_TICK(next)) { 256 ipst->ips_mld_timer_setter_active = B_FALSE; 257 mutex_exit(&ipst->ips_mld_timer_lock); 258 return; 259 } 260 261 mutex_exit(&ipst->ips_mld_timer_lock); 262 ret = untimeout(ipst->ips_mld_timeout_id); 263 mutex_enter(&ipst->ips_mld_timer_lock); 264 /* 265 * The timeout was cancelled, or the timeout handler 266 * completed, while we were blocked in the untimeout. 267 * No other thread could have set the timer meanwhile 268 * since we serialized all the timer setters. Thus 269 * no timer is currently active nor executing nor will 270 * any timer fire in the future. We start the timer now 271 * if needed. 272 */ 273 if (ret == -1) { 274 ASSERT(ipst->ips_mld_timeout_id == 0); 275 } else { 276 ASSERT(ipst->ips_mld_timeout_id != 0); 277 ipst->ips_mld_timeout_id = 0; 278 } 279 if (ipst->ips_mld_time_to_next != 0) { 280 ipst->ips_mld_time_to_next = 281 MIN(ipst->ips_mld_time_to_next, next); 282 ipst->ips_mld_timeout_id = timeout(mld_timeout_handler, 283 (void *)ipst, MSEC_TO_TICK(ipst->ips_mld_time_to_next)); 284 ipst->ips_mld_timer_scheduled_last = ddi_get_lbolt(); 285 } 286 ipst->ips_mld_timer_setter_active = B_FALSE; 287 mutex_exit(&ipst->ips_mld_timer_lock); 288 } 289 290 /* 291 * igmp_input: 292 * Return NULL for a bad packet that is discarded here. 293 * Return mp if the message is OK and should be handed to "raw" receivers. 294 * Callers of igmp_input() may need to reinitialize variables that were copied 295 * from the mblk as this calls pullupmsg(). 296 */ 297 /* ARGSUSED */ 298 mblk_t * 299 igmp_input(queue_t *q, mblk_t *mp, ill_t *ill) 300 { 301 igmpa_t *igmpa; 302 ipha_t *ipha = (ipha_t *)(mp->b_rptr); 303 int iphlen, igmplen, mblklen; 304 ilm_t *ilm; 305 uint32_t src, dst; 306 uint32_t group; 307 uint_t next; 308 ipif_t *ipif; 309 ip_stack_t *ipst; 310 ilm_walker_t ilw; 311 312 ASSERT(ill != NULL); 313 ASSERT(!ill->ill_isv6); 314 ipst = ill->ill_ipst; 315 ++ipst->ips_igmpstat.igps_rcv_total; 316 317 mblklen = MBLKL(mp); 318 if (mblklen < 1 || mblklen < (iphlen = IPH_HDR_LENGTH(ipha))) { 319 ++ipst->ips_igmpstat.igps_rcv_tooshort; 320 goto bad_pkt; 321 } 322 igmplen = ntohs(ipha->ipha_length) - iphlen; 323 /* 324 * Since msg sizes are more variable with v3, just pullup the 325 * whole thing now. 326 */ 327 if (MBLKL(mp) < (igmplen + iphlen)) { 328 mblk_t *mp1; 329 if ((mp1 = msgpullup(mp, -1)) == NULL) { 330 ++ipst->ips_igmpstat.igps_rcv_tooshort; 331 goto bad_pkt; 332 } 333 freemsg(mp); 334 mp = mp1; 335 ipha = (ipha_t *)(mp->b_rptr); 336 } 337 338 /* 339 * Validate lengths 340 */ 341 if (igmplen < IGMP_MINLEN) { 342 ++ipst->ips_igmpstat.igps_rcv_tooshort; 343 goto bad_pkt; 344 } 345 /* 346 * Validate checksum 347 */ 348 if (IP_CSUM(mp, iphlen, 0)) { 349 ++ipst->ips_igmpstat.igps_rcv_badsum; 350 goto bad_pkt; 351 } 352 353 igmpa = (igmpa_t *)(&mp->b_rptr[iphlen]); 354 src = ipha->ipha_src; 355 dst = ipha->ipha_dst; 356 if (ip_debug > 1) 357 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 358 "igmp_input: src 0x%x, dst 0x%x on %s\n", 359 (int)ntohl(src), (int)ntohl(dst), 360 ill->ill_name); 361 362 switch (igmpa->igmpa_type) { 363 case IGMP_MEMBERSHIP_QUERY: 364 /* 365 * packet length differentiates between v1/v2 and v3 366 * v1/v2 should be exactly 8 octets long; v3 is >= 12 367 */ 368 if ((igmplen == IGMP_MINLEN) || 369 (ipst->ips_igmp_max_version <= IGMP_V2_ROUTER)) { 370 next = igmp_query_in(ipha, igmpa, ill); 371 } else if (igmplen >= IGMP_V3_QUERY_MINLEN) { 372 next = igmpv3_query_in((igmp3qa_t *)igmpa, ill, 373 igmplen); 374 } else { 375 ++ipst->ips_igmpstat.igps_rcv_tooshort; 376 goto bad_pkt; 377 } 378 if (next == 0) 379 goto bad_pkt; 380 381 if (next != INFINITY) 382 igmp_start_timers(next, ipst); 383 384 break; 385 386 case IGMP_V1_MEMBERSHIP_REPORT: 387 case IGMP_V2_MEMBERSHIP_REPORT: 388 /* 389 * For fast leave to work, we have to know that we are the 390 * last person to send a report for this group. Reports 391 * generated by us are looped back since we could potentially 392 * be a multicast router, so discard reports sourced by me. 393 */ 394 mutex_enter(&ill->ill_lock); 395 for (ipif = ill->ill_ipif; ipif != NULL; 396 ipif = ipif->ipif_next) { 397 if (ipif->ipif_lcl_addr == src) { 398 if (ip_debug > 1) { 399 (void) mi_strlog(ill->ill_rq, 400 1, 401 SL_TRACE, 402 "igmp_input: we are only " 403 "member src 0x%x ipif_local 0x%x", 404 (int)ntohl(src), 405 (int)ntohl(ipif->ipif_lcl_addr)); 406 } 407 mutex_exit(&ill->ill_lock); 408 return (mp); 409 } 410 } 411 mutex_exit(&ill->ill_lock); 412 413 ++ipst->ips_igmpstat.igps_rcv_reports; 414 group = igmpa->igmpa_group; 415 if (!CLASSD(group)) { 416 ++ipst->ips_igmpstat.igps_rcv_badreports; 417 goto bad_pkt; 418 } 419 420 /* 421 * KLUDGE: if the IP source address of the report has an 422 * unspecified (i.e., zero) subnet number, as is allowed for 423 * a booting host, replace it with the correct subnet number 424 * so that a process-level multicast routing demon can 425 * determine which subnet it arrived from. This is necessary 426 * to compensate for the lack of any way for a process to 427 * determine the arrival interface of an incoming packet. 428 * 429 * Requires that a copy of *this* message it passed up 430 * to the raw interface which is done by our caller. 431 */ 432 if ((src & htonl(0xFF000000U)) == 0) { /* Minimum net mask */ 433 /* Pick the first ipif on this ill */ 434 mutex_enter(&ill->ill_lock); 435 src = ill->ill_ipif->ipif_subnet; 436 mutex_exit(&ill->ill_lock); 437 ip1dbg(("igmp_input: changed src to 0x%x\n", 438 (int)ntohl(src))); 439 ipha->ipha_src = src; 440 } 441 442 /* 443 * If our ill has ILMs that belong to the group being 444 * reported, and we are a 'Delaying Member' in the RFC 445 * terminology, stop our timer for that group and 'clear 446 * flag' i.e. mark as IGMP_OTHERMEMBER. 447 */ 448 ilm = ilm_walker_start(&ilw, ill); 449 for (; ilm != NULL; ilm = ilm_walker_step(&ilw, ilm)) { 450 if (ilm->ilm_addr == group) { 451 ++ipst->ips_igmpstat.igps_rcv_ourreports; 452 ilm->ilm_timer = INFINITY; 453 ilm->ilm_state = IGMP_OTHERMEMBER; 454 } 455 } 456 ilm_walker_finish(&ilw); 457 break; 458 459 case IGMP_V3_MEMBERSHIP_REPORT: 460 /* 461 * Currently nothing to do here; IGMP router is not 462 * implemented in ip, and v3 hosts don't pay attention 463 * to membership reports. 464 */ 465 break; 466 } 467 /* 468 * Pass all valid IGMP packets up to any process(es) listening 469 * on a raw IGMP socket. Do not free the packet. 470 */ 471 return (mp); 472 473 bad_pkt: 474 freemsg(mp); 475 return (NULL); 476 } 477 478 static uint_t 479 igmp_query_in(ipha_t *ipha, igmpa_t *igmpa, ill_t *ill) 480 { 481 ilm_t *ilm; 482 int timer; 483 uint_t next, current; 484 ip_stack_t *ipst; 485 ilm_walker_t ilw; 486 487 ipst = ill->ill_ipst; 488 ++ipst->ips_igmpstat.igps_rcv_queries; 489 490 /* 491 * In the IGMPv2 specification, there are 3 states and a flag. 492 * 493 * In Non-Member state, we simply don't have a membership record. 494 * In Delaying Member state, our timer is running (ilm->ilm_timer 495 * < INFINITY). In Idle Member state, our timer is not running 496 * (ilm->ilm_timer == INFINITY). 497 * 498 * The flag is ilm->ilm_state, it is set to IGMP_OTHERMEMBER if 499 * we have heard a report from another member, or IGMP_IREPORTEDLAST 500 * if I sent the last report. 501 */ 502 if ((igmpa->igmpa_code == 0) || 503 (ipst->ips_igmp_max_version == IGMP_V1_ROUTER)) { 504 /* 505 * Query from an old router. 506 * Remember that the querier on this interface is old, 507 * and set the timer to the value in RFC 1112. 508 */ 509 510 511 mutex_enter(&ill->ill_lock); 512 ill->ill_mcast_v1_time = 0; 513 ill->ill_mcast_v1_tset = 1; 514 if (ill->ill_mcast_type != IGMP_V1_ROUTER) { 515 ip1dbg(("Received IGMPv1 Query on %s, switching mode " 516 "to IGMP_V1_ROUTER\n", ill->ill_name)); 517 atomic_add_16(&ill->ill_ifptr->illif_mcast_v1, 1); 518 ill->ill_mcast_type = IGMP_V1_ROUTER; 519 } 520 mutex_exit(&ill->ill_lock); 521 522 timer = SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY); 523 524 if (ipha->ipha_dst != htonl(INADDR_ALLHOSTS_GROUP) || 525 igmpa->igmpa_group != 0) { 526 ++ipst->ips_igmpstat.igps_rcv_badqueries; 527 return (0); 528 } 529 530 } else { 531 in_addr_t group; 532 533 /* 534 * Query from a new router 535 * Simply do a validity check 536 */ 537 group = igmpa->igmpa_group; 538 if (group != 0 && (!CLASSD(group))) { 539 ++ipst->ips_igmpstat.igps_rcv_badqueries; 540 return (0); 541 } 542 543 /* 544 * Switch interface state to v2 on receipt of a v2 query 545 * ONLY IF current state is v3. Let things be if current 546 * state if v1 but do reset the v2-querier-present timer. 547 */ 548 mutex_enter(&ill->ill_lock); 549 if (ill->ill_mcast_type == IGMP_V3_ROUTER) { 550 ip1dbg(("Received IGMPv2 Query on %s, switching mode " 551 "to IGMP_V2_ROUTER", ill->ill_name)); 552 atomic_add_16(&ill->ill_ifptr->illif_mcast_v2, 1); 553 ill->ill_mcast_type = IGMP_V2_ROUTER; 554 } 555 ill->ill_mcast_v2_time = 0; 556 ill->ill_mcast_v2_tset = 1; 557 mutex_exit(&ill->ill_lock); 558 559 timer = DSEC_TO_MSEC((int)igmpa->igmpa_code); 560 } 561 562 if (ip_debug > 1) { 563 mutex_enter(&ill->ill_lock); 564 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 565 "igmp_input: TIMER = igmp_code %d igmp_type 0x%x", 566 (int)ntohs(igmpa->igmpa_code), 567 (int)ntohs(igmpa->igmpa_type)); 568 mutex_exit(&ill->ill_lock); 569 } 570 571 /* 572 * -Start the timers in all of our membership records 573 * for the physical interface on which the query 574 * arrived, excluding those that belong to the "all 575 * hosts" group (224.0.0.1). 576 * 577 * -Restart any timer that is already running but has 578 * a value longer than the requested timeout. 579 * 580 * -Use the value specified in the query message as 581 * the maximum timeout. 582 */ 583 next = (unsigned)INFINITY; 584 585 ilm = ilm_walker_start(&ilw, ill); 586 mutex_enter(&ill->ill_lock); 587 current = CURRENT_MSTIME; 588 589 for (; ilm != NULL; ilm = ilm_walker_step(&ilw, ilm)) { 590 /* 591 * A multicast router joins INADDR_ANY address 592 * to enable promiscuous reception of all 593 * mcasts from the interface. This INADDR_ANY 594 * is stored in the ilm_v6addr as V6 unspec addr 595 */ 596 if (!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr)) 597 continue; 598 if (ilm->ilm_addr == htonl(INADDR_ANY)) 599 continue; 600 if (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP) && 601 (igmpa->igmpa_group == 0) || 602 (igmpa->igmpa_group == ilm->ilm_addr)) { 603 if (ilm->ilm_timer > timer) { 604 MCAST_RANDOM_DELAY(ilm->ilm_timer, timer); 605 if (ilm->ilm_timer < next) 606 next = ilm->ilm_timer; 607 ilm->ilm_timer += current; 608 } 609 } 610 } 611 mutex_exit(&ill->ill_lock); 612 ilm_walker_finish(&ilw); 613 614 return (next); 615 } 616 617 static uint_t 618 igmpv3_query_in(igmp3qa_t *igmp3qa, ill_t *ill, int igmplen) 619 { 620 uint_t i, next, mrd, qqi, timer, delay, numsrc; 621 uint_t current; 622 ilm_t *ilm; 623 ipaddr_t *src_array; 624 uint8_t qrv; 625 ip_stack_t *ipst; 626 ilm_walker_t ilw; 627 628 ipst = ill->ill_ipst; 629 /* make sure numsrc matches packet size */ 630 numsrc = ntohs(igmp3qa->igmp3qa_numsrc); 631 if (igmplen < IGMP_V3_QUERY_MINLEN + (numsrc * sizeof (ipaddr_t))) { 632 ++ipst->ips_igmpstat.igps_rcv_tooshort; 633 return (0); 634 } 635 src_array = (ipaddr_t *)&igmp3qa[1]; 636 637 ++ipst->ips_igmpstat.igps_rcv_queries; 638 639 if ((mrd = (uint_t)igmp3qa->igmp3qa_mxrc) >= IGMP_V3_MAXRT_FPMIN) { 640 uint_t hdrval, mant, exp; 641 hdrval = (uint_t)igmp3qa->igmp3qa_mxrc; 642 mant = hdrval & IGMP_V3_MAXRT_MANT_MASK; 643 exp = (hdrval & IGMP_V3_MAXRT_EXP_MASK) >> 4; 644 mrd = (mant | 0x10) << (exp + 3); 645 } 646 if (mrd == 0) 647 mrd = MCAST_DEF_QUERY_RESP_INTERVAL; 648 timer = DSEC_TO_MSEC(mrd); 649 MCAST_RANDOM_DELAY(delay, timer); 650 next = (unsigned)INFINITY; 651 current = CURRENT_MSTIME; 652 653 if ((qrv = igmp3qa->igmp3qa_sqrv & IGMP_V3_RV_MASK) == 0) 654 ill->ill_mcast_rv = MCAST_DEF_ROBUSTNESS; 655 else 656 ill->ill_mcast_rv = qrv; 657 658 if ((qqi = (uint_t)igmp3qa->igmp3qa_qqic) >= IGMP_V3_QQI_FPMIN) { 659 uint_t hdrval, mant, exp; 660 hdrval = (uint_t)igmp3qa->igmp3qa_qqic; 661 mant = hdrval & IGMP_V3_QQI_MANT_MASK; 662 exp = (hdrval & IGMP_V3_QQI_EXP_MASK) >> 4; 663 qqi = (mant | 0x10) << (exp + 3); 664 } 665 ill->ill_mcast_qi = (qqi == 0) ? MCAST_DEF_QUERY_INTERVAL : qqi; 666 667 /* 668 * If we have a pending general query response that's scheduled 669 * sooner than the delay we calculated for this response, then 670 * no action is required (RFC3376 section 5.2 rule 1) 671 */ 672 mutex_enter(&ill->ill_lock); 673 if (ill->ill_global_timer < (current + delay)) { 674 mutex_exit(&ill->ill_lock); 675 return (next); 676 } 677 mutex_exit(&ill->ill_lock); 678 679 /* 680 * Now take action depending upon query type: 681 * general, group specific, or group/source specific. 682 */ 683 if ((numsrc == 0) && (igmp3qa->igmp3qa_group == INADDR_ANY)) { 684 /* 685 * general query 686 * We know global timer is either not running or is 687 * greater than our calculated delay, so reset it to 688 * our delay (random value in range [0, response time]). 689 */ 690 mutex_enter(&ill->ill_lock); 691 ill->ill_global_timer = current + delay; 692 mutex_exit(&ill->ill_lock); 693 next = delay; 694 695 } else { 696 /* group or group/source specific query */ 697 ilm = ilm_walker_start(&ilw, ill); 698 mutex_enter(&ill->ill_lock); 699 for (; ilm != NULL; ilm = ilm_walker_step(&ilw, ilm)) { 700 if (!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr) || 701 (ilm->ilm_addr == htonl(INADDR_ANY)) || 702 (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) || 703 (igmp3qa->igmp3qa_group != ilm->ilm_addr)) 704 continue; 705 /* 706 * If the query is group specific or we have a 707 * pending group specific query, the response is 708 * group specific (pending sources list should be 709 * empty). Otherwise, need to update the pending 710 * sources list for the group and source specific 711 * response. 712 */ 713 if (numsrc == 0 || (ilm->ilm_timer < INFINITY && 714 SLIST_IS_EMPTY(ilm->ilm_pendsrcs))) { 715 group_query: 716 FREE_SLIST(ilm->ilm_pendsrcs); 717 ilm->ilm_pendsrcs = NULL; 718 } else { 719 boolean_t overflow; 720 slist_t *pktl; 721 if (numsrc > MAX_FILTER_SIZE || 722 (ilm->ilm_pendsrcs == NULL && 723 (ilm->ilm_pendsrcs = l_alloc()) == NULL)) { 724 /* 725 * We've been sent more sources than 726 * we can deal with; or we can't deal 727 * with a source list at all. Revert 728 * to a group specific query. 729 */ 730 goto group_query; 731 } 732 if ((pktl = l_alloc()) == NULL) 733 goto group_query; 734 pktl->sl_numsrc = numsrc; 735 for (i = 0; i < numsrc; i++) 736 IN6_IPADDR_TO_V4MAPPED(src_array[i], 737 &(pktl->sl_addr[i])); 738 l_union_in_a(ilm->ilm_pendsrcs, pktl, 739 &overflow); 740 l_free(pktl); 741 if (overflow) 742 goto group_query; 743 } 744 745 ilm->ilm_timer = (ilm->ilm_timer == INFINITY) ? 746 INFINITY : (ilm->ilm_timer - current); 747 /* choose soonest timer */ 748 ilm->ilm_timer = MIN(ilm->ilm_timer, delay); 749 if (ilm->ilm_timer < next) 750 next = ilm->ilm_timer; 751 ilm->ilm_timer += current; 752 } 753 mutex_exit(&ill->ill_lock); 754 ilm_walker_finish(&ilw); 755 } 756 757 return (next); 758 } 759 760 void 761 igmp_joingroup(ilm_t *ilm) 762 { 763 uint_t timer; 764 ill_t *ill; 765 ip_stack_t *ipst = ilm->ilm_ipst; 766 767 ill = ilm->ilm_ipif->ipif_ill; 768 769 ASSERT(IAM_WRITER_ILL(ill)); 770 ASSERT(ilm->ilm_ill == NULL && !ilm->ilm_ipif->ipif_isv6); 771 772 mutex_enter(&ill->ill_lock); 773 if (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) { 774 ilm->ilm_rtx.rtx_timer = INFINITY; 775 ilm->ilm_state = IGMP_OTHERMEMBER; 776 mutex_exit(&ill->ill_lock); 777 } else { 778 ip1dbg(("Querier mode %d, sending report, group %x\n", 779 ill->ill_mcast_type, htonl(ilm->ilm_addr))); 780 if (ill->ill_mcast_type == IGMP_V1_ROUTER) { 781 mutex_exit(&ill->ill_lock); 782 igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0); 783 mutex_enter(&ill->ill_lock); 784 } else if (ill->ill_mcast_type == IGMP_V2_ROUTER) { 785 mutex_exit(&ill->ill_lock); 786 igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0); 787 mutex_enter(&ill->ill_lock); 788 } else if (ill->ill_mcast_type == IGMP_V3_ROUTER) { 789 mrec_t *rp; 790 mcast_record_t rtype; 791 /* 792 * The possible state changes we need to handle here: 793 * Old State New State Report 794 * 795 * INCLUDE(0) INCLUDE(X) ALLOW(X),BLOCK(0) 796 * INCLUDE(0) EXCLUDE(X) TO_EX(X) 797 * 798 * No need to send the BLOCK(0) report; ALLOW(X) 799 * is enough. 800 */ 801 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ? 802 ALLOW_NEW_SOURCES : CHANGE_TO_EXCLUDE; 803 rp = mcast_bldmrec(rtype, &ilm->ilm_v6addr, 804 ilm->ilm_filter, NULL); 805 mutex_exit(&ill->ill_lock); 806 igmpv3_sendrpt(ilm->ilm_ipif, rp); 807 mutex_enter(&ill->ill_lock); 808 /* 809 * Set up retransmission state. Timer is set below, 810 * for both v3 and older versions. 811 */ 812 mcast_init_rtx(ill, &ilm->ilm_rtx, rtype, 813 ilm->ilm_filter); 814 } 815 816 /* Set the ilm timer value */ 817 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer, 818 SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY)); 819 timer = ilm->ilm_rtx.rtx_timer; 820 ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME; 821 ilm->ilm_state = IGMP_IREPORTEDLAST; 822 mutex_exit(&ill->ill_lock); 823 824 /* 825 * We need to restart the IGMP timers, but we can't do it here 826 * since we're inside the IPSQ and thus igmp_start_timers() -> 827 * untimeout() (inside the IPSQ, waiting for a running timeout 828 * to finish) could deadlock with igmp_timeout_handler() -> 829 * ipsq_enter() (running the timeout, waiting to get inside 830 * the IPSQ). We also can't just delay it until after we 831 * ipsq_exit() since we could be inside more than one IPSQ and 832 * thus still have the other IPSQs pinned after we exit -- and 833 * igmp_start_timers() may be trying to enter one of those. 834 * Instead, signal a dedicated thread that will do it for us. 835 */ 836 mutex_enter(&ipst->ips_igmp_timer_lock); 837 ipst->ips_igmp_deferred_next = MIN(timer, 838 ipst->ips_igmp_deferred_next); 839 mutex_exit(&ipst->ips_igmp_timer_lock); 840 mcast_signal_restart_thread(ipst); 841 } 842 843 if (ip_debug > 1) { 844 (void) mi_strlog(ilm->ilm_ipif->ipif_ill->ill_rq, 1, SL_TRACE, 845 "igmp_joingroup: multicast_type %d timer %d", 846 (ilm->ilm_ipif->ipif_ill->ill_mcast_type), 847 (int)ntohl(timer)); 848 } 849 } 850 851 void 852 mld_joingroup(ilm_t *ilm) 853 { 854 uint_t timer; 855 ill_t *ill; 856 ip_stack_t *ipst = ilm->ilm_ipst; 857 858 ill = ilm->ilm_ill; 859 860 ASSERT(IAM_WRITER_ILL(ill)); 861 ASSERT(ilm->ilm_ipif == NULL && ill->ill_isv6); 862 863 mutex_enter(&ill->ill_lock); 864 if (IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr)) { 865 ilm->ilm_rtx.rtx_timer = INFINITY; 866 ilm->ilm_state = IGMP_OTHERMEMBER; 867 mutex_exit(&ill->ill_lock); 868 } else { 869 if (ill->ill_mcast_type == MLD_V1_ROUTER) { 870 mutex_exit(&ill->ill_lock); 871 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL); 872 mutex_enter(&ill->ill_lock); 873 } else { 874 mrec_t *rp; 875 mcast_record_t rtype; 876 /* 877 * The possible state changes we need to handle here: 878 * Old State New State Report 879 * 880 * INCLUDE(0) INCLUDE(X) ALLOW(X),BLOCK(0) 881 * INCLUDE(0) EXCLUDE(X) TO_EX(X) 882 * 883 * No need to send the BLOCK(0) report; ALLOW(X) 884 * is enough 885 */ 886 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ? 887 ALLOW_NEW_SOURCES : CHANGE_TO_EXCLUDE; 888 rp = mcast_bldmrec(rtype, &ilm->ilm_v6addr, 889 ilm->ilm_filter, NULL); 890 mutex_exit(&ill->ill_lock); 891 mldv2_sendrpt(ill, rp); 892 mutex_enter(&ill->ill_lock); 893 /* 894 * Set up retransmission state. Timer is set below, 895 * for both v2 and v1. 896 */ 897 mcast_init_rtx(ill, &ilm->ilm_rtx, rtype, 898 ilm->ilm_filter); 899 } 900 901 /* Set the ilm timer value */ 902 ASSERT(ill->ill_mcast_type != MLD_V2_ROUTER || 903 ilm->ilm_rtx.rtx_cnt > 0); 904 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer, 905 SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY)); 906 timer = ilm->ilm_rtx.rtx_timer; 907 ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME; 908 ilm->ilm_state = IGMP_IREPORTEDLAST; 909 mutex_exit(&ill->ill_lock); 910 911 /* 912 * Signal another thread to restart the timers. See the 913 * comment in igmp_joingroup() for details. 914 */ 915 mutex_enter(&ipst->ips_mld_timer_lock); 916 ipst->ips_mld_deferred_next = MIN(timer, 917 ipst->ips_mld_deferred_next); 918 mutex_exit(&ipst->ips_mld_timer_lock); 919 mcast_signal_restart_thread(ipst); 920 } 921 922 if (ip_debug > 1) { 923 (void) mi_strlog(ilm->ilm_ill->ill_rq, 1, SL_TRACE, 924 "mld_joingroup: multicast_type %d timer %d", 925 (ilm->ilm_ill->ill_mcast_type), 926 (int)ntohl(timer)); 927 } 928 } 929 930 void 931 igmp_leavegroup(ilm_t *ilm) 932 { 933 ill_t *ill = ilm->ilm_ipif->ipif_ill; 934 935 ASSERT(ilm->ilm_ill == NULL); 936 ASSERT(!ill->ill_isv6); 937 938 mutex_enter(&ill->ill_lock); 939 if (ilm->ilm_state == IGMP_IREPORTEDLAST && 940 ill->ill_mcast_type == IGMP_V2_ROUTER && 941 (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP))) { 942 mutex_exit(&ill->ill_lock); 943 igmp_sendpkt(ilm, IGMP_V2_LEAVE_GROUP, 944 (htonl(INADDR_ALLRTRS_GROUP))); 945 return; 946 } else if ((ill->ill_mcast_type == IGMP_V3_ROUTER) && 947 (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP))) { 948 mrec_t *rp; 949 /* 950 * The possible state changes we need to handle here: 951 * Old State New State Report 952 * 953 * INCLUDE(X) INCLUDE(0) ALLOW(0),BLOCK(X) 954 * EXCLUDE(X) INCLUDE(0) TO_IN(0) 955 * 956 * No need to send the ALLOW(0) report; BLOCK(X) is enough 957 */ 958 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 959 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr, 960 ilm->ilm_filter, NULL); 961 } else { 962 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, 963 NULL, NULL); 964 } 965 mutex_exit(&ill->ill_lock); 966 igmpv3_sendrpt(ilm->ilm_ipif, rp); 967 return; 968 } 969 mutex_exit(&ill->ill_lock); 970 } 971 972 void 973 mld_leavegroup(ilm_t *ilm) 974 { 975 ill_t *ill = ilm->ilm_ill; 976 977 ASSERT(ilm->ilm_ipif == NULL); 978 ASSERT(ill->ill_isv6); 979 980 mutex_enter(&ill->ill_lock); 981 if (ilm->ilm_state == IGMP_IREPORTEDLAST && 982 ill->ill_mcast_type == MLD_V1_ROUTER && 983 (!IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr))) { 984 mutex_exit(&ill->ill_lock); 985 mld_sendpkt(ilm, MLD_LISTENER_REDUCTION, &ipv6_all_rtrs_mcast); 986 return; 987 } else if ((ill->ill_mcast_type == MLD_V2_ROUTER) && 988 (!IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr))) { 989 mrec_t *rp; 990 /* 991 * The possible state changes we need to handle here: 992 * Old State New State Report 993 * 994 * INCLUDE(X) INCLUDE(0) ALLOW(0),BLOCK(X) 995 * EXCLUDE(X) INCLUDE(0) TO_IN(0) 996 * 997 * No need to send the ALLOW(0) report; BLOCK(X) is enough 998 */ 999 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1000 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr, 1001 ilm->ilm_filter, NULL); 1002 } else { 1003 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, 1004 NULL, NULL); 1005 } 1006 mutex_exit(&ill->ill_lock); 1007 mldv2_sendrpt(ill, rp); 1008 return; 1009 } 1010 mutex_exit(&ill->ill_lock); 1011 } 1012 1013 void 1014 igmp_statechange(ilm_t *ilm, mcast_record_t fmode, slist_t *flist) 1015 { 1016 ill_t *ill; 1017 mrec_t *rp; 1018 ip_stack_t *ipst = ilm->ilm_ipst; 1019 1020 ASSERT(ilm != NULL); 1021 1022 /* state change reports should only be sent if the router is v3 */ 1023 if (ilm->ilm_ipif->ipif_ill->ill_mcast_type != IGMP_V3_ROUTER) 1024 return; 1025 1026 if (ilm->ilm_ill == NULL) { 1027 ASSERT(ilm->ilm_ipif != NULL); 1028 ill = ilm->ilm_ipif->ipif_ill; 1029 } else { 1030 ill = ilm->ilm_ill; 1031 } 1032 1033 mutex_enter(&ill->ill_lock); 1034 1035 /* 1036 * Compare existing(old) state with the new state and prepare 1037 * State Change Report, according to the rules in RFC 3376: 1038 * 1039 * Old State New State State Change Report 1040 * 1041 * INCLUDE(A) INCLUDE(B) ALLOW(B-A),BLOCK(A-B) 1042 * EXCLUDE(A) EXCLUDE(B) ALLOW(A-B),BLOCK(B-A) 1043 * INCLUDE(A) EXCLUDE(B) TO_EX(B) 1044 * EXCLUDE(A) INCLUDE(B) TO_IN(B) 1045 */ 1046 1047 if (ilm->ilm_fmode == fmode) { 1048 slist_t *a_minus_b = NULL, *b_minus_a = NULL; 1049 slist_t *allow, *block; 1050 if (((a_minus_b = l_alloc()) == NULL) || 1051 ((b_minus_a = l_alloc()) == NULL)) { 1052 l_free(a_minus_b); 1053 if (ilm->ilm_fmode == MODE_IS_INCLUDE) 1054 goto send_to_ex; 1055 else 1056 goto send_to_in; 1057 } 1058 l_difference(ilm->ilm_filter, flist, a_minus_b); 1059 l_difference(flist, ilm->ilm_filter, b_minus_a); 1060 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1061 allow = b_minus_a; 1062 block = a_minus_b; 1063 } else { 1064 allow = a_minus_b; 1065 block = b_minus_a; 1066 } 1067 rp = NULL; 1068 if (!SLIST_IS_EMPTY(allow)) 1069 rp = mcast_bldmrec(ALLOW_NEW_SOURCES, &ilm->ilm_v6addr, 1070 allow, rp); 1071 if (!SLIST_IS_EMPTY(block)) 1072 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr, 1073 block, rp); 1074 l_free(a_minus_b); 1075 l_free(b_minus_a); 1076 } else if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1077 send_to_ex: 1078 rp = mcast_bldmrec(CHANGE_TO_EXCLUDE, &ilm->ilm_v6addr, flist, 1079 NULL); 1080 } else { 1081 send_to_in: 1082 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, flist, 1083 NULL); 1084 } 1085 1086 /* 1087 * Need to set up retransmission state; merge the new info with the 1088 * current state (which may be null). If the timer is not currently 1089 * running, signal a thread to restart it -- see the comment in 1090 * igmp_joingroup() for details. 1091 */ 1092 rp = mcast_merge_rtx(ilm, rp, flist); 1093 if (ilm->ilm_rtx.rtx_timer == INFINITY) { 1094 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer, 1095 SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY)); 1096 mutex_enter(&ipst->ips_igmp_timer_lock); 1097 ipst->ips_igmp_deferred_next = MIN(ipst->ips_igmp_deferred_next, 1098 ilm->ilm_rtx.rtx_timer); 1099 ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME; 1100 mutex_exit(&ipst->ips_igmp_timer_lock); 1101 mcast_signal_restart_thread(ipst); 1102 } 1103 1104 mutex_exit(&ill->ill_lock); 1105 igmpv3_sendrpt(ilm->ilm_ipif, rp); 1106 } 1107 1108 void 1109 mld_statechange(ilm_t *ilm, mcast_record_t fmode, slist_t *flist) 1110 { 1111 ill_t *ill; 1112 mrec_t *rp = NULL; 1113 ip_stack_t *ipst = ilm->ilm_ipst; 1114 1115 ASSERT(ilm != NULL); 1116 1117 ill = ilm->ilm_ill; 1118 1119 /* only need to send if we have an mldv2-capable router */ 1120 mutex_enter(&ill->ill_lock); 1121 if (ill->ill_mcast_type != MLD_V2_ROUTER) { 1122 mutex_exit(&ill->ill_lock); 1123 return; 1124 } 1125 1126 /* 1127 * Compare existing (old) state with the new state passed in 1128 * and send appropriate MLDv2 State Change Report. 1129 * 1130 * Old State New State State Change Report 1131 * 1132 * INCLUDE(A) INCLUDE(B) ALLOW(B-A),BLOCK(A-B) 1133 * EXCLUDE(A) EXCLUDE(B) ALLOW(A-B),BLOCK(B-A) 1134 * INCLUDE(A) EXCLUDE(B) TO_EX(B) 1135 * EXCLUDE(A) INCLUDE(B) TO_IN(B) 1136 */ 1137 if (ilm->ilm_fmode == fmode) { 1138 slist_t *a_minus_b = NULL, *b_minus_a = NULL; 1139 slist_t *allow, *block; 1140 if (((a_minus_b = l_alloc()) == NULL) || 1141 ((b_minus_a = l_alloc()) == NULL)) { 1142 l_free(a_minus_b); 1143 if (ilm->ilm_fmode == MODE_IS_INCLUDE) 1144 goto send_to_ex; 1145 else 1146 goto send_to_in; 1147 } 1148 l_difference(ilm->ilm_filter, flist, a_minus_b); 1149 l_difference(flist, ilm->ilm_filter, b_minus_a); 1150 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1151 allow = b_minus_a; 1152 block = a_minus_b; 1153 } else { 1154 allow = a_minus_b; 1155 block = b_minus_a; 1156 } 1157 if (!SLIST_IS_EMPTY(allow)) 1158 rp = mcast_bldmrec(ALLOW_NEW_SOURCES, &ilm->ilm_v6addr, 1159 allow, rp); 1160 if (!SLIST_IS_EMPTY(block)) 1161 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr, 1162 block, rp); 1163 l_free(a_minus_b); 1164 l_free(b_minus_a); 1165 } else if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1166 send_to_ex: 1167 rp = mcast_bldmrec(CHANGE_TO_EXCLUDE, &ilm->ilm_v6addr, flist, 1168 NULL); 1169 } else { 1170 send_to_in: 1171 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, flist, 1172 NULL); 1173 } 1174 1175 /* 1176 * Need to set up retransmission state; merge the new info with the 1177 * current state (which may be null). If the timer is not currently 1178 * running, signal a thread to restart it -- see the comment in 1179 * igmp_joingroup() for details. 1180 */ 1181 rp = mcast_merge_rtx(ilm, rp, flist); 1182 ASSERT(ilm->ilm_rtx.rtx_cnt > 0); 1183 if (ilm->ilm_rtx.rtx_timer == INFINITY) { 1184 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer, 1185 SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY)); 1186 mutex_enter(&ipst->ips_mld_timer_lock); 1187 ipst->ips_mld_deferred_next = 1188 MIN(ipst->ips_mld_deferred_next, ilm->ilm_rtx.rtx_timer); 1189 ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME; 1190 mutex_exit(&ipst->ips_mld_timer_lock); 1191 mcast_signal_restart_thread(ipst); 1192 } 1193 1194 mutex_exit(&ill->ill_lock); 1195 mldv2_sendrpt(ill, rp); 1196 } 1197 1198 uint_t 1199 igmp_timeout_handler_per_ill(ill_t *ill) 1200 { 1201 uint_t next = INFINITY, current; 1202 ilm_t *ilm; 1203 ipif_t *ipif; 1204 mrec_t *rp = NULL; 1205 mrec_t *rtxrp = NULL; 1206 rtx_state_t *rtxp; 1207 mcast_record_t rtype; 1208 1209 ASSERT(IAM_WRITER_ILL(ill)); 1210 1211 mutex_enter(&ill->ill_lock); 1212 1213 current = CURRENT_MSTIME; 1214 /* First check the global timer on this interface */ 1215 if (ill->ill_global_timer == INFINITY) 1216 goto per_ilm_timer; 1217 if (ill->ill_global_timer <= (current + CURRENT_OFFSET)) { 1218 ill->ill_global_timer = INFINITY; 1219 /* 1220 * Send report for each group on this interface. 1221 * Since we just set the global timer (received a v3 general 1222 * query), need to skip the all hosts addr (224.0.0.1), per 1223 * RFC 3376 section 5. 1224 */ 1225 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1226 if (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) 1227 continue; 1228 ASSERT(ilm->ilm_ipif != NULL); 1229 ilm->ilm_ipif->ipif_igmp_rpt = 1230 mcast_bldmrec(ilm->ilm_fmode, &ilm->ilm_v6addr, 1231 ilm->ilm_filter, ilm->ilm_ipif->ipif_igmp_rpt); 1232 /* 1233 * Since we're sending a report on this group, okay 1234 * to delete pending group-specific timers. Note 1235 * that group-specific retransmit timers still need 1236 * to be checked in the per_ilm_timer for-loop. 1237 */ 1238 ilm->ilm_timer = INFINITY; 1239 ilm->ilm_state = IGMP_IREPORTEDLAST; 1240 FREE_SLIST(ilm->ilm_pendsrcs); 1241 ilm->ilm_pendsrcs = NULL; 1242 } 1243 /* 1244 * We've built per-ipif mrec lists; walk the ill's ipif list 1245 * and send a report for each ipif that has an mrec list. 1246 */ 1247 for (ipif = ill->ill_ipif; ipif != NULL; 1248 ipif = ipif->ipif_next) { 1249 if (ipif->ipif_igmp_rpt == NULL) 1250 continue; 1251 mutex_exit(&ill->ill_lock); 1252 igmpv3_sendrpt(ipif, ipif->ipif_igmp_rpt); 1253 mutex_enter(&ill->ill_lock); 1254 /* mrec list was freed by igmpv3_sendrpt() */ 1255 ipif->ipif_igmp_rpt = NULL; 1256 } 1257 } else { 1258 if ((ill->ill_global_timer - current) < next) 1259 next = ill->ill_global_timer - current; 1260 } 1261 1262 per_ilm_timer: 1263 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1264 if (ilm->ilm_timer == INFINITY) 1265 goto per_ilm_rtxtimer; 1266 1267 if (ilm->ilm_timer > (current + CURRENT_OFFSET)) { 1268 if ((ilm->ilm_timer - current) < next) 1269 next = ilm->ilm_timer - current; 1270 1271 if (ip_debug > 1) { 1272 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 1273 "igmp_timo_hlr 2: ilm_timr %d " 1274 "typ %d nxt %d", 1275 (int)ntohl(ilm->ilm_timer - current), 1276 (ill->ill_mcast_type), next); 1277 } 1278 1279 goto per_ilm_rtxtimer; 1280 } 1281 1282 /* the timer has expired, need to take action */ 1283 ilm->ilm_timer = INFINITY; 1284 ilm->ilm_state = IGMP_IREPORTEDLAST; 1285 if (ill->ill_mcast_type == IGMP_V1_ROUTER) { 1286 mutex_exit(&ill->ill_lock); 1287 igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0); 1288 mutex_enter(&ill->ill_lock); 1289 } else if (ill->ill_mcast_type == IGMP_V2_ROUTER) { 1290 mutex_exit(&ill->ill_lock); 1291 igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0); 1292 mutex_enter(&ill->ill_lock); 1293 } else { 1294 slist_t *rsp; 1295 if (!SLIST_IS_EMPTY(ilm->ilm_pendsrcs) && 1296 (rsp = l_alloc()) != NULL) { 1297 /* 1298 * Contents of reply depend on pending 1299 * requested source list. 1300 */ 1301 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1302 l_intersection(ilm->ilm_filter, 1303 ilm->ilm_pendsrcs, rsp); 1304 } else { 1305 l_difference(ilm->ilm_pendsrcs, 1306 ilm->ilm_filter, rsp); 1307 } 1308 FREE_SLIST(ilm->ilm_pendsrcs); 1309 ilm->ilm_pendsrcs = NULL; 1310 if (!SLIST_IS_EMPTY(rsp)) 1311 rp = mcast_bldmrec(MODE_IS_INCLUDE, 1312 &ilm->ilm_v6addr, rsp, rp); 1313 FREE_SLIST(rsp); 1314 } else { 1315 /* 1316 * Either the pending request is just group- 1317 * specific, or we couldn't get the resources 1318 * (rsp) to build a source-specific reply. 1319 */ 1320 rp = mcast_bldmrec(ilm->ilm_fmode, 1321 &ilm->ilm_v6addr, ilm->ilm_filter, rp); 1322 } 1323 mutex_exit(&ill->ill_lock); 1324 igmpv3_sendrpt(ill->ill_ipif, rp); 1325 mutex_enter(&ill->ill_lock); 1326 rp = NULL; 1327 } 1328 1329 per_ilm_rtxtimer: 1330 rtxp = &ilm->ilm_rtx; 1331 1332 if (rtxp->rtx_timer == INFINITY) 1333 continue; 1334 if (rtxp->rtx_timer > (current + CURRENT_OFFSET)) { 1335 if ((rtxp->rtx_timer - current) < next) 1336 next = rtxp->rtx_timer - current; 1337 continue; 1338 } 1339 1340 rtxp->rtx_timer = INFINITY; 1341 ilm->ilm_state = IGMP_IREPORTEDLAST; 1342 if (ill->ill_mcast_type == IGMP_V1_ROUTER) { 1343 mutex_exit(&ill->ill_lock); 1344 igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0); 1345 mutex_enter(&ill->ill_lock); 1346 continue; 1347 } else if (ill->ill_mcast_type == IGMP_V2_ROUTER) { 1348 mutex_exit(&ill->ill_lock); 1349 igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0); 1350 mutex_enter(&ill->ill_lock); 1351 continue; 1352 } 1353 1354 /* 1355 * The retransmit timer has popped, and our router is 1356 * IGMPv3. We have to delve into the retransmit state 1357 * stored in the ilm. 1358 * 1359 * Decrement the retransmit count. If the fmode rtx 1360 * count is active, decrement it, and send a filter 1361 * mode change report with the ilm's source list. 1362 * Otherwise, send a source list change report with 1363 * the current retransmit lists. 1364 */ 1365 ASSERT(rtxp->rtx_cnt > 0); 1366 ASSERT(rtxp->rtx_cnt >= rtxp->rtx_fmode_cnt); 1367 rtxp->rtx_cnt--; 1368 if (rtxp->rtx_fmode_cnt > 0) { 1369 rtxp->rtx_fmode_cnt--; 1370 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ? 1371 CHANGE_TO_INCLUDE : CHANGE_TO_EXCLUDE; 1372 rtxrp = mcast_bldmrec(rtype, &ilm->ilm_v6addr, 1373 ilm->ilm_filter, rtxrp); 1374 } else { 1375 rtxrp = mcast_bldmrec(ALLOW_NEW_SOURCES, 1376 &ilm->ilm_v6addr, rtxp->rtx_allow, rtxrp); 1377 rtxrp = mcast_bldmrec(BLOCK_OLD_SOURCES, 1378 &ilm->ilm_v6addr, rtxp->rtx_block, rtxrp); 1379 } 1380 if (rtxp->rtx_cnt > 0) { 1381 MCAST_RANDOM_DELAY(rtxp->rtx_timer, 1382 SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY)); 1383 if (rtxp->rtx_timer < next) 1384 next = rtxp->rtx_timer; 1385 rtxp->rtx_timer += current; 1386 } else { 1387 CLEAR_SLIST(rtxp->rtx_allow); 1388 CLEAR_SLIST(rtxp->rtx_block); 1389 } 1390 mutex_exit(&ill->ill_lock); 1391 igmpv3_sendrpt(ilm->ilm_ipif, rtxrp); 1392 mutex_enter(&ill->ill_lock); 1393 rtxrp = NULL; 1394 } 1395 1396 mutex_exit(&ill->ill_lock); 1397 1398 return (next); 1399 } 1400 1401 /* 1402 * igmp_timeout_handler: 1403 * Called when there are timeout events, every next * TMEOUT_INTERVAL (tick). 1404 * Returns number of ticks to next event (or 0 if none). 1405 * 1406 * As part of multicast join and leave igmp we may need to send out an 1407 * igmp request. The igmp related state variables in the ilm are protected 1408 * by ill_lock. A single global igmp timer is used to track igmp timeouts. 1409 * igmp_timer_lock protects the global igmp_timeout_id. igmp_start_timers 1410 * starts the igmp timer if needed. It serializes multiple threads trying to 1411 * simultaneously start the timer using the igmp_timer_setter_active flag. 1412 * 1413 * igmp_input() receives igmp queries and responds to the queries 1414 * in a delayed fashion by posting a timer i.e. it calls igmp_start_timers(). 1415 * Later the igmp_timer fires, the timeout handler igmp_timeout_handler() 1416 * performs the action exclusively after entering each ill's ipsq as writer. 1417 * (The need to enter the IPSQ is largely historical but there are still some 1418 * fields like ilm_filter that rely on it.) 1419 * 1420 * The igmp_slowtimeo() function is called thru another timer. 1421 * igmp_slowtimeout_lock protects the igmp_slowtimeout_id 1422 */ 1423 void 1424 igmp_timeout_handler(void *arg) 1425 { 1426 ill_t *ill; 1427 uint_t global_next = INFINITY; 1428 uint_t next; 1429 ill_walk_context_t ctx; 1430 boolean_t success; 1431 ip_stack_t *ipst = arg; 1432 1433 ASSERT(arg != NULL); 1434 mutex_enter(&ipst->ips_igmp_timer_lock); 1435 ASSERT(ipst->ips_igmp_timeout_id != 0); 1436 ipst->ips_igmp_timer_scheduled_last = 0; 1437 ipst->ips_igmp_time_to_next = 0; 1438 mutex_exit(&ipst->ips_igmp_timer_lock); 1439 1440 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1441 ill = ILL_START_WALK_V4(&ctx, ipst); 1442 for (; ill != NULL; ill = ill_next(&ctx, ill)) { 1443 ASSERT(!ill->ill_isv6); 1444 /* 1445 * We may not be able to refhold the ill if the ill/ipif 1446 * is changing. But we need to make sure that the ill will 1447 * not vanish. So we just bump up the ill_waiter count. 1448 */ 1449 if (!ill_waiter_inc(ill)) 1450 continue; 1451 rw_exit(&ipst->ips_ill_g_lock); 1452 success = ipsq_enter(ill, B_TRUE, NEW_OP); 1453 if (success) { 1454 next = igmp_timeout_handler_per_ill(ill); 1455 if (next < global_next) 1456 global_next = next; 1457 ipsq_exit(ill->ill_phyint->phyint_ipsq); 1458 } 1459 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1460 ill_waiter_dcr(ill); 1461 } 1462 rw_exit(&ipst->ips_ill_g_lock); 1463 1464 mutex_enter(&ipst->ips_igmp_timer_lock); 1465 ASSERT(ipst->ips_igmp_timeout_id != 0); 1466 ipst->ips_igmp_timeout_id = 0; 1467 mutex_exit(&ipst->ips_igmp_timer_lock); 1468 1469 if (global_next != INFINITY) 1470 igmp_start_timers(global_next, ipst); 1471 } 1472 1473 /* 1474 * mld_timeout_handler: 1475 * Called when there are timeout events, every next (tick). 1476 * Returns number of ticks to next event (or 0 if none). 1477 */ 1478 /* ARGSUSED */ 1479 uint_t 1480 mld_timeout_handler_per_ill(ill_t *ill) 1481 { 1482 ilm_t *ilm; 1483 uint_t next = INFINITY, current; 1484 mrec_t *rp, *rtxrp; 1485 rtx_state_t *rtxp; 1486 mcast_record_t rtype; 1487 1488 ASSERT(IAM_WRITER_ILL(ill)); 1489 1490 mutex_enter(&ill->ill_lock); 1491 1492 current = CURRENT_MSTIME; 1493 /* 1494 * First check the global timer on this interface; the global timer 1495 * is not used for MLDv1, so if it's set we can assume we're v2. 1496 */ 1497 if (ill->ill_global_timer == INFINITY) 1498 goto per_ilm_timer; 1499 if (ill->ill_global_timer <= (current + CURRENT_OFFSET)) { 1500 ill->ill_global_timer = INFINITY; 1501 /* 1502 * Send report for each group on this interface. 1503 * Since we just set the global timer (received a v2 general 1504 * query), need to skip the all hosts addr (ff02::1), per 1505 * RFC 3810 section 6. 1506 */ 1507 rp = NULL; 1508 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1509 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, 1510 &ipv6_all_hosts_mcast)) 1511 continue; 1512 rp = mcast_bldmrec(ilm->ilm_fmode, &ilm->ilm_v6addr, 1513 ilm->ilm_filter, rp); 1514 /* 1515 * Since we're sending a report on this group, okay 1516 * to delete pending group-specific timers. Note 1517 * that group-specific retransmit timers still need 1518 * to be checked in the per_ilm_timer for-loop. 1519 */ 1520 ilm->ilm_timer = INFINITY; 1521 ilm->ilm_state = IGMP_IREPORTEDLAST; 1522 FREE_SLIST(ilm->ilm_pendsrcs); 1523 ilm->ilm_pendsrcs = NULL; 1524 } 1525 mutex_exit(&ill->ill_lock); 1526 mldv2_sendrpt(ill, rp); 1527 mutex_enter(&ill->ill_lock); 1528 } else { 1529 if ((ill->ill_global_timer - current) < next) 1530 next = ill->ill_global_timer - current; 1531 } 1532 1533 per_ilm_timer: 1534 rp = rtxrp = NULL; 1535 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1536 if (ilm->ilm_timer == INFINITY) 1537 goto per_ilm_rtxtimer; 1538 1539 if (ilm->ilm_timer > (current + CURRENT_OFFSET)) { 1540 if ((ilm->ilm_timer - current) < next) 1541 next = ilm->ilm_timer - current; 1542 1543 if (ip_debug > 1) { 1544 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 1545 "igmp_timo_hlr 2: ilm_timr" 1546 " %d typ %d nxt %d", 1547 (int)ntohl(ilm->ilm_timer - current), 1548 (ill->ill_mcast_type), next); 1549 } 1550 1551 goto per_ilm_rtxtimer; 1552 } 1553 1554 /* the timer has expired, need to take action */ 1555 ilm->ilm_timer = INFINITY; 1556 ilm->ilm_state = IGMP_IREPORTEDLAST; 1557 if (ill->ill_mcast_type == MLD_V1_ROUTER) { 1558 mutex_exit(&ill->ill_lock); 1559 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL); 1560 mutex_enter(&ill->ill_lock); 1561 } else { 1562 slist_t *rsp; 1563 if (!SLIST_IS_EMPTY(ilm->ilm_pendsrcs) && 1564 (rsp = l_alloc()) != NULL) { 1565 /* 1566 * Contents of reply depend on pending 1567 * requested source list. 1568 */ 1569 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1570 l_intersection(ilm->ilm_filter, 1571 ilm->ilm_pendsrcs, rsp); 1572 } else { 1573 l_difference(ilm->ilm_pendsrcs, 1574 ilm->ilm_filter, rsp); 1575 } 1576 FREE_SLIST(ilm->ilm_pendsrcs); 1577 ilm->ilm_pendsrcs = NULL; 1578 if (!SLIST_IS_EMPTY(rsp)) 1579 rp = mcast_bldmrec(MODE_IS_INCLUDE, 1580 &ilm->ilm_v6addr, rsp, rp); 1581 FREE_SLIST(rsp); 1582 } else { 1583 rp = mcast_bldmrec(ilm->ilm_fmode, 1584 &ilm->ilm_v6addr, ilm->ilm_filter, rp); 1585 } 1586 } 1587 1588 per_ilm_rtxtimer: 1589 rtxp = &ilm->ilm_rtx; 1590 1591 if (rtxp->rtx_timer == INFINITY) 1592 continue; 1593 if (rtxp->rtx_timer > (current + CURRENT_OFFSET)) { 1594 if ((rtxp->rtx_timer - current) < next) 1595 next = rtxp->rtx_timer - current; 1596 continue; 1597 } 1598 1599 rtxp->rtx_timer = INFINITY; 1600 ilm->ilm_state = IGMP_IREPORTEDLAST; 1601 if (ill->ill_mcast_type == MLD_V1_ROUTER) { 1602 mutex_exit(&ill->ill_lock); 1603 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL); 1604 mutex_enter(&ill->ill_lock); 1605 continue; 1606 } 1607 1608 /* 1609 * The retransmit timer has popped, and our router is 1610 * MLDv2. We have to delve into the retransmit state 1611 * stored in the ilm. 1612 * 1613 * Decrement the retransmit count. If the fmode rtx 1614 * count is active, decrement it, and send a filter 1615 * mode change report with the ilm's source list. 1616 * Otherwise, send a source list change report with 1617 * the current retransmit lists. 1618 */ 1619 ASSERT(rtxp->rtx_cnt > 0); 1620 ASSERT(rtxp->rtx_cnt >= rtxp->rtx_fmode_cnt); 1621 rtxp->rtx_cnt--; 1622 if (rtxp->rtx_fmode_cnt > 0) { 1623 rtxp->rtx_fmode_cnt--; 1624 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ? 1625 CHANGE_TO_INCLUDE : CHANGE_TO_EXCLUDE; 1626 rtxrp = mcast_bldmrec(rtype, &ilm->ilm_v6addr, 1627 ilm->ilm_filter, rtxrp); 1628 } else { 1629 rtxrp = mcast_bldmrec(ALLOW_NEW_SOURCES, 1630 &ilm->ilm_v6addr, rtxp->rtx_allow, rtxrp); 1631 rtxrp = mcast_bldmrec(BLOCK_OLD_SOURCES, 1632 &ilm->ilm_v6addr, rtxp->rtx_block, rtxrp); 1633 } 1634 if (rtxp->rtx_cnt > 0) { 1635 MCAST_RANDOM_DELAY(rtxp->rtx_timer, 1636 SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY)); 1637 if (rtxp->rtx_timer < next) 1638 next = rtxp->rtx_timer; 1639 rtxp->rtx_timer += current; 1640 } else { 1641 CLEAR_SLIST(rtxp->rtx_allow); 1642 CLEAR_SLIST(rtxp->rtx_block); 1643 } 1644 } 1645 1646 if (ill->ill_mcast_type == MLD_V2_ROUTER) { 1647 mutex_exit(&ill->ill_lock); 1648 mldv2_sendrpt(ill, rp); 1649 mldv2_sendrpt(ill, rtxrp); 1650 return (next); 1651 } 1652 1653 mutex_exit(&ill->ill_lock); 1654 1655 return (next); 1656 } 1657 1658 /* 1659 * mld_timeout_handler: 1660 * Called when there are timeout events, every next * TMEOUT_INTERVAL (tick). 1661 * Returns number of ticks to next event (or 0 if none). 1662 * MT issues are same as igmp_timeout_handler 1663 */ 1664 void 1665 mld_timeout_handler(void *arg) 1666 { 1667 ill_t *ill; 1668 uint_t global_next = INFINITY; 1669 uint_t next; 1670 ill_walk_context_t ctx; 1671 boolean_t success; 1672 ip_stack_t *ipst = arg; 1673 1674 ASSERT(arg != NULL); 1675 mutex_enter(&ipst->ips_mld_timer_lock); 1676 ASSERT(ipst->ips_mld_timeout_id != 0); 1677 ipst->ips_mld_timer_scheduled_last = 0; 1678 ipst->ips_mld_time_to_next = 0; 1679 mutex_exit(&ipst->ips_mld_timer_lock); 1680 1681 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1682 ill = ILL_START_WALK_V6(&ctx, ipst); 1683 for (; ill != NULL; ill = ill_next(&ctx, ill)) { 1684 ASSERT(ill->ill_isv6); 1685 /* 1686 * We may not be able to refhold the ill if the ill/ipif 1687 * is changing. But we need to make sure that the ill will 1688 * not vanish. So we just bump up the ill_waiter count. 1689 */ 1690 if (!ill_waiter_inc(ill)) 1691 continue; 1692 rw_exit(&ipst->ips_ill_g_lock); 1693 success = ipsq_enter(ill, B_TRUE, NEW_OP); 1694 if (success) { 1695 next = mld_timeout_handler_per_ill(ill); 1696 if (next < global_next) 1697 global_next = next; 1698 ipsq_exit(ill->ill_phyint->phyint_ipsq); 1699 } 1700 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1701 ill_waiter_dcr(ill); 1702 } 1703 rw_exit(&ipst->ips_ill_g_lock); 1704 1705 mutex_enter(&ipst->ips_mld_timer_lock); 1706 ASSERT(ipst->ips_mld_timeout_id != 0); 1707 ipst->ips_mld_timeout_id = 0; 1708 mutex_exit(&ipst->ips_mld_timer_lock); 1709 1710 if (global_next != INFINITY) 1711 mld_start_timers(global_next, ipst); 1712 } 1713 1714 /* 1715 * Calculate the Older Version Querier Present timeout value, in number 1716 * of slowtimo intervals, for the given ill. 1717 */ 1718 #define OVQP(ill) \ 1719 ((1000 * (((ill)->ill_mcast_rv * (ill)->ill_mcast_qi) \ 1720 + MCAST_QUERY_RESP_INTERVAL)) / MCAST_SLOWTIMO_INTERVAL) 1721 1722 /* 1723 * igmp_slowtimo: 1724 * - Resets to new router if we didnt we hear from the router 1725 * in IGMP_AGE_THRESHOLD seconds. 1726 * - Resets slowtimeout. 1727 * Check for ips_igmp_max_version ensures that we don't revert to a higher 1728 * IGMP version than configured. 1729 */ 1730 void 1731 igmp_slowtimo(void *arg) 1732 { 1733 ill_t *ill; 1734 ill_if_t *ifp; 1735 avl_tree_t *avl_tree; 1736 ip_stack_t *ipst = (ip_stack_t *)arg; 1737 1738 ASSERT(arg != NULL); 1739 /* Hold the ill_g_lock so that we can safely walk the ill list */ 1740 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1741 1742 /* 1743 * The ill_if_t list is circular, hence the odd loop parameters. 1744 * 1745 * We can't use the ILL_START_WALK and ill_next() wrappers for this 1746 * walk, as we need to check the illif_mcast_* fields in the ill_if_t 1747 * structure (allowing us to skip if none of the instances have timers 1748 * running). 1749 */ 1750 for (ifp = IP_V4_ILL_G_LIST(ipst); 1751 ifp != (ill_if_t *)&IP_V4_ILL_G_LIST(ipst); 1752 ifp = ifp->illif_next) { 1753 /* 1754 * illif_mcast_v[12] are set using atomics. If an ill hears 1755 * a V1 or V2 query now and we miss seeing the count now, 1756 * we will see it the next time igmp_slowtimo is called. 1757 */ 1758 if (ifp->illif_mcast_v1 == 0 && ifp->illif_mcast_v2 == 0) 1759 continue; 1760 1761 avl_tree = &ifp->illif_avl_by_ppa; 1762 for (ill = avl_first(avl_tree); ill != NULL; 1763 ill = avl_walk(avl_tree, ill, AVL_AFTER)) { 1764 mutex_enter(&ill->ill_lock); 1765 if (ill->ill_mcast_v1_tset == 1) 1766 ill->ill_mcast_v1_time++; 1767 if (ill->ill_mcast_v2_tset == 1) 1768 ill->ill_mcast_v2_time++; 1769 if ((ill->ill_mcast_type == IGMP_V1_ROUTER) && 1770 (ipst->ips_igmp_max_version >= IGMP_V2_ROUTER) && 1771 (ill->ill_mcast_v1_time >= OVQP(ill))) { 1772 if ((ill->ill_mcast_v2_tset > 0) || 1773 (ipst->ips_igmp_max_version == 1774 IGMP_V2_ROUTER)) { 1775 ip1dbg(("V1 query timer " 1776 "expired on %s; switching " 1777 "mode to IGMP_V2\n", 1778 ill->ill_name)); 1779 ill->ill_mcast_type = 1780 IGMP_V2_ROUTER; 1781 } else { 1782 ip1dbg(("V1 query timer " 1783 "expired on %s; switching " 1784 "mode to IGMP_V3\n", 1785 ill->ill_name)); 1786 ill->ill_mcast_type = 1787 IGMP_V3_ROUTER; 1788 } 1789 ill->ill_mcast_v1_time = 0; 1790 ill->ill_mcast_v1_tset = 0; 1791 atomic_add_16(&ifp->illif_mcast_v1, -1); 1792 } 1793 if ((ill->ill_mcast_type == IGMP_V2_ROUTER) && 1794 (ipst->ips_igmp_max_version >= IGMP_V3_ROUTER) && 1795 (ill->ill_mcast_v2_time >= OVQP(ill))) { 1796 ip1dbg(("V2 query timer expired on " 1797 "%s; switching mode to IGMP_V3\n", 1798 ill->ill_name)); 1799 ill->ill_mcast_type = IGMP_V3_ROUTER; 1800 ill->ill_mcast_v2_time = 0; 1801 ill->ill_mcast_v2_tset = 0; 1802 atomic_add_16(&ifp->illif_mcast_v2, -1); 1803 } 1804 mutex_exit(&ill->ill_lock); 1805 } 1806 } 1807 rw_exit(&ipst->ips_ill_g_lock); 1808 mutex_enter(&ipst->ips_igmp_slowtimeout_lock); 1809 ipst->ips_igmp_slowtimeout_id = timeout(igmp_slowtimo, (void *)ipst, 1810 MSEC_TO_TICK(MCAST_SLOWTIMO_INTERVAL)); 1811 mutex_exit(&ipst->ips_igmp_slowtimeout_lock); 1812 } 1813 1814 /* 1815 * mld_slowtimo: 1816 * - Resets to newer version if we didn't hear from the older version router 1817 * in MLD_AGE_THRESHOLD seconds. 1818 * - Restarts slowtimeout. 1819 * Check for ips_mld_max_version ensures that we don't revert to a higher 1820 * IGMP version than configured. 1821 */ 1822 /* ARGSUSED */ 1823 void 1824 mld_slowtimo(void *arg) 1825 { 1826 ill_t *ill; 1827 ill_if_t *ifp; 1828 avl_tree_t *avl_tree; 1829 ip_stack_t *ipst = (ip_stack_t *)arg; 1830 1831 ASSERT(arg != NULL); 1832 /* See comments in igmp_slowtimo() above... */ 1833 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1834 for (ifp = IP_V6_ILL_G_LIST(ipst); 1835 ifp != (ill_if_t *)&IP_V6_ILL_G_LIST(ipst); 1836 ifp = ifp->illif_next) { 1837 if (ifp->illif_mcast_v1 == 0) 1838 continue; 1839 1840 avl_tree = &ifp->illif_avl_by_ppa; 1841 for (ill = avl_first(avl_tree); ill != NULL; 1842 ill = avl_walk(avl_tree, ill, AVL_AFTER)) { 1843 mutex_enter(&ill->ill_lock); 1844 if (ill->ill_mcast_v1_tset == 1) 1845 ill->ill_mcast_v1_time++; 1846 if ((ill->ill_mcast_type == MLD_V1_ROUTER) && 1847 (ipst->ips_mld_max_version >= MLD_V2_ROUTER) && 1848 (ill->ill_mcast_v1_time >= OVQP(ill))) { 1849 ip1dbg(("MLD query timer expired on" 1850 " %s; switching mode to MLD_V2\n", 1851 ill->ill_name)); 1852 ill->ill_mcast_type = MLD_V2_ROUTER; 1853 ill->ill_mcast_v1_time = 0; 1854 ill->ill_mcast_v1_tset = 0; 1855 atomic_add_16(&ifp->illif_mcast_v1, -1); 1856 } 1857 mutex_exit(&ill->ill_lock); 1858 } 1859 } 1860 rw_exit(&ipst->ips_ill_g_lock); 1861 mutex_enter(&ipst->ips_mld_slowtimeout_lock); 1862 ipst->ips_mld_slowtimeout_id = timeout(mld_slowtimo, (void *)ipst, 1863 MSEC_TO_TICK(MCAST_SLOWTIMO_INTERVAL)); 1864 mutex_exit(&ipst->ips_mld_slowtimeout_lock); 1865 } 1866 1867 /* 1868 * igmp_sendpkt: 1869 * This will send to ip_wput like icmp_inbound. 1870 * Note that the lower ill (on which the membership is kept) is used 1871 * as an upper ill to pass in the multicast parameters. 1872 */ 1873 static void 1874 igmp_sendpkt(ilm_t *ilm, uchar_t type, ipaddr_t addr) 1875 { 1876 mblk_t *mp; 1877 igmpa_t *igmpa; 1878 uint8_t *rtralert; 1879 ipha_t *ipha; 1880 int hdrlen = sizeof (ipha_t) + RTRALERT_LEN; 1881 size_t size = hdrlen + sizeof (igmpa_t); 1882 ipif_t *ipif = ilm->ilm_ipif; 1883 ill_t *ill = ipif->ipif_ill; 1884 mblk_t *first_mp; 1885 ipsec_out_t *io; 1886 zoneid_t zoneid; 1887 ip_stack_t *ipst = ill->ill_ipst; 1888 1889 /* 1890 * We need to make sure this packet goes out on an ipif. If 1891 * there is some global policy match in ip_wput_ire, we need 1892 * to get to the right interface after IPSEC processing. 1893 * To make sure this multicast packet goes out on the right 1894 * interface, we attach an ipsec_out and initialize ill_index 1895 * like we did in ip_wput. To make sure that this packet does 1896 * not get forwarded on other interfaces or looped back, we 1897 * set ipsec_out_dontroute to B_TRUE and ipsec_out_multicast_loop 1898 * to B_FALSE. 1899 */ 1900 first_mp = allocb(sizeof (ipsec_info_t), BPRI_HI); 1901 if (first_mp == NULL) 1902 return; 1903 1904 first_mp->b_datap->db_type = M_CTL; 1905 first_mp->b_wptr += sizeof (ipsec_info_t); 1906 bzero(first_mp->b_rptr, sizeof (ipsec_info_t)); 1907 /* ipsec_out_secure is B_FALSE now */ 1908 io = (ipsec_out_t *)first_mp->b_rptr; 1909 io->ipsec_out_type = IPSEC_OUT; 1910 io->ipsec_out_len = sizeof (ipsec_out_t); 1911 io->ipsec_out_use_global_policy = B_TRUE; 1912 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 1913 io->ipsec_out_multicast_loop = B_FALSE; 1914 io->ipsec_out_dontroute = B_TRUE; 1915 if ((zoneid = ilm->ilm_zoneid) == ALL_ZONES) 1916 zoneid = GLOBAL_ZONEID; 1917 io->ipsec_out_zoneid = zoneid; 1918 io->ipsec_out_ns = ipst->ips_netstack; /* No netstack_hold */ 1919 1920 mp = allocb(size, BPRI_HI); 1921 if (mp == NULL) { 1922 freemsg(first_mp); 1923 return; 1924 } 1925 mp->b_wptr = mp->b_rptr + size; 1926 first_mp->b_cont = mp; 1927 1928 ipha = (ipha_t *)mp->b_rptr; 1929 rtralert = (uint8_t *)&(ipha[1]); 1930 igmpa = (igmpa_t *)&(rtralert[RTRALERT_LEN]); 1931 igmpa->igmpa_type = type; 1932 igmpa->igmpa_code = 0; 1933 igmpa->igmpa_group = ilm->ilm_addr; 1934 igmpa->igmpa_cksum = 0; 1935 igmpa->igmpa_cksum = IP_CSUM(mp, hdrlen, 0); 1936 1937 rtralert[0] = IPOPT_COPY | IPOPT_RTRALERT; 1938 rtralert[1] = RTRALERT_LEN; 1939 rtralert[2] = 0; 1940 rtralert[3] = 0; 1941 1942 ipha->ipha_version_and_hdr_length = (IP_VERSION << 4) 1943 | (IP_SIMPLE_HDR_LENGTH_IN_WORDS + RTRALERT_LEN_IN_WORDS); 1944 ipha->ipha_type_of_service = 0; 1945 ipha->ipha_length = htons(size); 1946 ipha->ipha_ident = 0; 1947 ipha->ipha_fragment_offset_and_flags = 0; 1948 ipha->ipha_ttl = IGMP_TTL; 1949 ipha->ipha_protocol = IPPROTO_IGMP; 1950 ipha->ipha_hdr_checksum = 0; 1951 ipha->ipha_dst = addr ? addr : igmpa->igmpa_group; 1952 ipha->ipha_src = ipif->ipif_src_addr; 1953 /* 1954 * Request loopback of the report if we are acting as a multicast 1955 * router, so that the process-level routing demon can hear it. 1956 */ 1957 /* 1958 * This will run multiple times for the same group if there are members 1959 * on the same group for multiple ipif's on the same ill. The 1960 * igmp_input code will suppress this due to the loopback thus we 1961 * always loopback membership report. 1962 */ 1963 ASSERT(ill->ill_rq != NULL); 1964 ip_multicast_loopback(ill->ill_rq, ill, first_mp, 0, ilm->ilm_zoneid); 1965 1966 ip_wput_multicast(ill->ill_wq, first_mp, ipif, zoneid); 1967 1968 ++ipst->ips_igmpstat.igps_snd_reports; 1969 } 1970 1971 /* 1972 * Sends an IGMP_V3_MEMBERSHIP_REPORT message out the ill associated 1973 * with the passed-in ipif. The report will contain one group record 1974 * for each element of reclist. If this causes packet length to 1975 * exceed ipif->ipif_ill->ill_max_frag, multiple reports are sent. 1976 * reclist is assumed to be made up of buffers allocated by mcast_bldmrec(), 1977 * and those buffers are freed here. 1978 */ 1979 static void 1980 igmpv3_sendrpt(ipif_t *ipif, mrec_t *reclist) 1981 { 1982 ipsec_out_t *io; 1983 igmp3ra_t *igmp3ra; 1984 grphdra_t *grphdr; 1985 mblk_t *first_mp, *mp; 1986 ipha_t *ipha; 1987 uint8_t *rtralert; 1988 ipaddr_t *src_array; 1989 int i, j, numrec, more_src_cnt; 1990 size_t hdrsize, size, rsize; 1991 ill_t *ill = ipif->ipif_ill; 1992 mrec_t *rp, *cur_reclist; 1993 mrec_t *next_reclist = reclist; 1994 boolean_t morepkts; 1995 zoneid_t zoneid; 1996 ip_stack_t *ipst = ill->ill_ipst; 1997 1998 ASSERT(IAM_WRITER_IPIF(ipif)); 1999 2000 /* if there aren't any records, there's nothing to send */ 2001 if (reclist == NULL) 2002 return; 2003 2004 hdrsize = sizeof (ipha_t) + RTRALERT_LEN; 2005 nextpkt: 2006 size = hdrsize + sizeof (igmp3ra_t); 2007 morepkts = B_FALSE; 2008 more_src_cnt = 0; 2009 cur_reclist = next_reclist; 2010 numrec = 0; 2011 for (rp = cur_reclist; rp != NULL; rp = rp->mrec_next) { 2012 rsize = sizeof (grphdra_t) + 2013 (rp->mrec_srcs.sl_numsrc * sizeof (ipaddr_t)); 2014 if (size + rsize > ill->ill_max_frag) { 2015 if (rp == cur_reclist) { 2016 /* 2017 * If the first mrec we looked at is too big 2018 * to fit in a single packet (i.e the source 2019 * list is too big), we must either truncate 2020 * the list (if TO_EX or IS_EX), or send 2021 * multiple reports for the same group (all 2022 * other types). 2023 */ 2024 int srcspace, srcsperpkt; 2025 srcspace = ill->ill_max_frag - (size + 2026 sizeof (grphdra_t)); 2027 2028 /* 2029 * Skip if there's not even enough room in 2030 * a single packet to send something useful. 2031 */ 2032 if (srcspace <= sizeof (ipaddr_t)) 2033 continue; 2034 2035 srcsperpkt = srcspace / sizeof (ipaddr_t); 2036 /* 2037 * Increment size and numrec, because we will 2038 * be sending a record for the mrec we're 2039 * looking at now. 2040 */ 2041 size += sizeof (grphdra_t) + 2042 (srcsperpkt * sizeof (ipaddr_t)); 2043 numrec++; 2044 if (rp->mrec_type == MODE_IS_EXCLUDE || 2045 rp->mrec_type == CHANGE_TO_EXCLUDE) { 2046 rp->mrec_srcs.sl_numsrc = srcsperpkt; 2047 if (rp->mrec_next == NULL) { 2048 /* no more packets to send */ 2049 break; 2050 } else { 2051 /* 2052 * more packets, but we're 2053 * done with this mrec. 2054 */ 2055 next_reclist = rp->mrec_next; 2056 } 2057 } else { 2058 more_src_cnt = rp->mrec_srcs.sl_numsrc 2059 - srcsperpkt; 2060 rp->mrec_srcs.sl_numsrc = srcsperpkt; 2061 /* 2062 * We'll fix up this mrec (remove the 2063 * srcs we've already sent) before 2064 * returning to nextpkt above. 2065 */ 2066 next_reclist = rp; 2067 } 2068 } else { 2069 next_reclist = rp; 2070 } 2071 morepkts = B_TRUE; 2072 break; 2073 } 2074 size += rsize; 2075 numrec++; 2076 } 2077 2078 /* 2079 * See comments in igmp_sendpkt() about initializing for ipsec and 2080 * load balancing requirements. 2081 */ 2082 first_mp = allocb(sizeof (ipsec_info_t), BPRI_HI); 2083 if (first_mp == NULL) 2084 goto free_reclist; 2085 2086 first_mp->b_datap->db_type = M_CTL; 2087 first_mp->b_wptr += sizeof (ipsec_info_t); 2088 bzero(first_mp->b_rptr, sizeof (ipsec_info_t)); 2089 /* ipsec_out_secure is B_FALSE now */ 2090 io = (ipsec_out_t *)first_mp->b_rptr; 2091 io->ipsec_out_type = IPSEC_OUT; 2092 io->ipsec_out_len = sizeof (ipsec_out_t); 2093 io->ipsec_out_use_global_policy = B_TRUE; 2094 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 2095 io->ipsec_out_multicast_loop = B_FALSE; 2096 io->ipsec_out_dontroute = B_TRUE; 2097 if ((zoneid = ipif->ipif_zoneid) == ALL_ZONES) 2098 zoneid = GLOBAL_ZONEID; 2099 io->ipsec_out_zoneid = zoneid; 2100 2101 mp = allocb(size, BPRI_HI); 2102 if (mp == NULL) { 2103 freemsg(first_mp); 2104 goto free_reclist; 2105 } 2106 bzero((char *)mp->b_rptr, size); 2107 mp->b_wptr = (uchar_t *)(mp->b_rptr + size); 2108 first_mp->b_cont = mp; 2109 2110 ipha = (ipha_t *)mp->b_rptr; 2111 rtralert = (uint8_t *)&(ipha[1]); 2112 igmp3ra = (igmp3ra_t *)&(rtralert[RTRALERT_LEN]); 2113 grphdr = (grphdra_t *)&(igmp3ra[1]); 2114 2115 rp = cur_reclist; 2116 for (i = 0; i < numrec; i++) { 2117 grphdr->grphdra_type = rp->mrec_type; 2118 grphdr->grphdra_numsrc = htons(rp->mrec_srcs.sl_numsrc); 2119 grphdr->grphdra_group = V4_PART_OF_V6(rp->mrec_group); 2120 src_array = (ipaddr_t *)&(grphdr[1]); 2121 2122 for (j = 0; j < rp->mrec_srcs.sl_numsrc; j++) 2123 src_array[j] = V4_PART_OF_V6(rp->mrec_srcs.sl_addr[j]); 2124 2125 grphdr = (grphdra_t *)&(src_array[j]); 2126 rp = rp->mrec_next; 2127 } 2128 2129 igmp3ra->igmp3ra_type = IGMP_V3_MEMBERSHIP_REPORT; 2130 igmp3ra->igmp3ra_numrec = htons(numrec); 2131 igmp3ra->igmp3ra_cksum = IP_CSUM(mp, hdrsize, 0); 2132 2133 rtralert[0] = IPOPT_COPY | IPOPT_RTRALERT; 2134 rtralert[1] = RTRALERT_LEN; 2135 rtralert[2] = 0; 2136 rtralert[3] = 0; 2137 2138 ipha->ipha_version_and_hdr_length = IP_VERSION << 4 2139 | (IP_SIMPLE_HDR_LENGTH_IN_WORDS + RTRALERT_LEN_IN_WORDS); 2140 ipha->ipha_type_of_service = IPTOS_PREC_INTERNETCONTROL; 2141 ipha->ipha_length = htons(size); 2142 ipha->ipha_ttl = IGMP_TTL; 2143 ipha->ipha_protocol = IPPROTO_IGMP; 2144 ipha->ipha_dst = htonl(INADDR_ALLRPTS_GROUP); 2145 ipha->ipha_src = ipif->ipif_src_addr; 2146 2147 /* 2148 * Request loopback of the report if we are acting as a multicast 2149 * router, so that the process-level routing daemon can hear it. 2150 * 2151 * This will run multiple times for the same group if there are 2152 * members on the same group for multiple ipifs on the same ill. 2153 * The igmp_input code will suppress this due to the loopback; 2154 * thus we always loopback membership report. 2155 */ 2156 ASSERT(ill->ill_rq != NULL); 2157 ip_multicast_loopback(ill->ill_rq, ill, mp, 0, ipif->ipif_zoneid); 2158 2159 ip_wput_multicast(ill->ill_wq, first_mp, ipif, zoneid); 2160 2161 ++ipst->ips_igmpstat.igps_snd_reports; 2162 2163 if (morepkts) { 2164 if (more_src_cnt > 0) { 2165 int index, mvsize; 2166 slist_t *sl = &next_reclist->mrec_srcs; 2167 index = sl->sl_numsrc; 2168 mvsize = more_src_cnt * sizeof (in6_addr_t); 2169 (void) memmove(&sl->sl_addr[0], &sl->sl_addr[index], 2170 mvsize); 2171 sl->sl_numsrc = more_src_cnt; 2172 } 2173 goto nextpkt; 2174 } 2175 2176 free_reclist: 2177 while (reclist != NULL) { 2178 rp = reclist->mrec_next; 2179 mi_free(reclist); 2180 reclist = rp; 2181 } 2182 } 2183 2184 /* 2185 * mld_input: 2186 */ 2187 /* ARGSUSED */ 2188 void 2189 mld_input(queue_t *q, mblk_t *mp, ill_t *ill) 2190 { 2191 ip6_t *ip6h = (ip6_t *)(mp->b_rptr); 2192 mld_hdr_t *mldh; 2193 ilm_t *ilm; 2194 ipif_t *ipif; 2195 uint16_t hdr_length, exthdr_length; 2196 in6_addr_t *v6group_ptr, *lcladdr_ptr; 2197 uint_t next; 2198 int mldlen; 2199 ip_stack_t *ipst = ill->ill_ipst; 2200 ilm_walker_t ilw; 2201 2202 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembTotal); 2203 2204 /* Make sure the src address of the packet is link-local */ 2205 if (!(IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src))) { 2206 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 2207 freemsg(mp); 2208 return; 2209 } 2210 2211 if (ip6h->ip6_hlim != 1) { 2212 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpBadHoplimit); 2213 freemsg(mp); 2214 return; 2215 } 2216 2217 /* Get to the icmp header part */ 2218 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) { 2219 hdr_length = ip_hdr_length_v6(mp, ip6h); 2220 exthdr_length = hdr_length - IPV6_HDR_LEN; 2221 } else { 2222 hdr_length = IPV6_HDR_LEN; 2223 exthdr_length = 0; 2224 } 2225 mldlen = ntohs(ip6h->ip6_plen) - exthdr_length; 2226 2227 /* An MLD packet must at least be 24 octets to be valid */ 2228 if (mldlen < MLD_MINLEN) { 2229 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 2230 freemsg(mp); 2231 return; 2232 } 2233 2234 mldh = (mld_hdr_t *)(&mp->b_rptr[hdr_length]); 2235 2236 switch (mldh->mld_type) { 2237 case MLD_LISTENER_QUERY: 2238 /* 2239 * packet length differentiates between v1 and v2. v1 2240 * query should be exactly 24 octets long; v2 is >= 28. 2241 */ 2242 if ((mldlen == MLD_MINLEN) || 2243 (ipst->ips_mld_max_version < MLD_V2_ROUTER)) { 2244 next = mld_query_in(mldh, ill); 2245 } else if (mldlen >= MLD_V2_QUERY_MINLEN) { 2246 next = mldv2_query_in((mld2q_t *)mldh, ill, mldlen); 2247 } else { 2248 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 2249 freemsg(mp); 2250 return; 2251 } 2252 if (next == 0) { 2253 freemsg(mp); 2254 return; 2255 } 2256 2257 if (next != INFINITY) 2258 mld_start_timers(next, ipst); 2259 break; 2260 2261 case MLD_LISTENER_REPORT: { 2262 2263 ASSERT(ill->ill_ipif != NULL); 2264 /* 2265 * For fast leave to work, we have to know that we are the 2266 * last person to send a report for this group. Reports 2267 * generated by us are looped back since we could potentially 2268 * be a multicast router, so discard reports sourced by me. 2269 */ 2270 lcladdr_ptr = &(ill->ill_ipif->ipif_v6subnet); 2271 mutex_enter(&ill->ill_lock); 2272 for (ipif = ill->ill_ipif; ipif != NULL; 2273 ipif = ipif->ipif_next) { 2274 if (IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, 2275 lcladdr_ptr)) { 2276 if (ip_debug > 1) { 2277 char buf1[INET6_ADDRSTRLEN]; 2278 char buf2[INET6_ADDRSTRLEN]; 2279 2280 (void) mi_strlog(ill->ill_rq, 2281 1, 2282 SL_TRACE, 2283 "mld_input: we are only " 2284 "member src %s ipif_local %s", 2285 inet_ntop(AF_INET6, lcladdr_ptr, 2286 buf1, sizeof (buf1)), 2287 inet_ntop(AF_INET6, 2288 &ipif->ipif_v6lcl_addr, 2289 buf2, sizeof (buf2))); 2290 } 2291 mutex_exit(&ill->ill_lock); 2292 freemsg(mp); 2293 return; 2294 } 2295 } 2296 mutex_exit(&ill->ill_lock); 2297 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembResponses); 2298 2299 v6group_ptr = &mldh->mld_addr; 2300 if (!IN6_IS_ADDR_MULTICAST(v6group_ptr)) { 2301 BUMP_MIB(ill->ill_icmp6_mib, 2302 ipv6IfIcmpInGroupMembBadReports); 2303 freemsg(mp); 2304 return; 2305 } 2306 2307 /* 2308 * If we belong to the group being reported, and we are a 2309 * 'Delaying member' per the RFC terminology, stop our timer 2310 * for that group and 'clear flag' i.e. mark ilm_state as 2311 * IGMP_OTHERMEMBER. With zones, there can be multiple group 2312 * membership entries for the same group address (one per zone) 2313 * so we need to walk the ill_ilm list. 2314 */ 2315 ilm = ilm_walker_start(&ilw, ill); 2316 for (; ilm != NULL; ilm = ilm_walker_step(&ilw, ilm)) { 2317 if (!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group_ptr)) 2318 continue; 2319 BUMP_MIB(ill->ill_icmp6_mib, 2320 ipv6IfIcmpInGroupMembOurReports); 2321 2322 ilm->ilm_timer = INFINITY; 2323 ilm->ilm_state = IGMP_OTHERMEMBER; 2324 } 2325 ilm_walker_finish(&ilw); 2326 break; 2327 } 2328 case MLD_LISTENER_REDUCTION: 2329 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembReductions); 2330 break; 2331 } 2332 /* 2333 * All MLD packets have already been passed up to any 2334 * process(es) listening on a ICMP6 raw socket. This 2335 * has been accomplished in ip_deliver_local_v6 prior to 2336 * this function call. It is assumed that the multicast daemon 2337 * will have a SOCK_RAW IPPROTO_ICMPV6 (and presumbly use the 2338 * ICMP6_FILTER socket option to only receive the MLD messages) 2339 * Thus we can free the MLD message block here 2340 */ 2341 freemsg(mp); 2342 } 2343 2344 /* 2345 * Handles an MLDv1 Listener Query. Returns 0 on error, or the appropriate 2346 * (non-zero, unsigned) timer value to be set on success. 2347 */ 2348 static uint_t 2349 mld_query_in(mld_hdr_t *mldh, ill_t *ill) 2350 { 2351 ilm_t *ilm; 2352 int timer; 2353 uint_t next, current; 2354 in6_addr_t *v6group; 2355 ilm_walker_t ilw; 2356 2357 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembQueries); 2358 2359 /* 2360 * In the MLD specification, there are 3 states and a flag. 2361 * 2362 * In Non-Listener state, we simply don't have a membership record. 2363 * In Delaying state, our timer is running (ilm->ilm_timer < INFINITY) 2364 * In Idle Member state, our timer is not running (ilm->ilm_timer == 2365 * INFINITY) 2366 * 2367 * The flag is ilm->ilm_state, it is set to IGMP_OTHERMEMBER if 2368 * we have heard a report from another member, or IGMP_IREPORTEDLAST 2369 * if I sent the last report. 2370 */ 2371 v6group = &mldh->mld_addr; 2372 if (!(IN6_IS_ADDR_UNSPECIFIED(v6group)) && 2373 ((!IN6_IS_ADDR_MULTICAST(v6group)))) { 2374 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembBadQueries); 2375 return (0); 2376 } 2377 2378 /* Need to do compatibility mode checking */ 2379 mutex_enter(&ill->ill_lock); 2380 ill->ill_mcast_v1_time = 0; 2381 ill->ill_mcast_v1_tset = 1; 2382 if (ill->ill_mcast_type == MLD_V2_ROUTER) { 2383 ip1dbg(("Received MLDv1 Query on %s, switching mode to " 2384 "MLD_V1_ROUTER\n", ill->ill_name)); 2385 atomic_add_16(&ill->ill_ifptr->illif_mcast_v1, 1); 2386 ill->ill_mcast_type = MLD_V1_ROUTER; 2387 } 2388 mutex_exit(&ill->ill_lock); 2389 2390 timer = (int)ntohs(mldh->mld_maxdelay); 2391 if (ip_debug > 1) { 2392 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 2393 "mld_input: TIMER = mld_maxdelay %d mld_type 0x%x", 2394 timer, (int)mldh->mld_type); 2395 } 2396 2397 /* 2398 * -Start the timers in all of our membership records for 2399 * the physical interface on which the query arrived, 2400 * excl: 2401 * 1. those that belong to the "all hosts" group, 2402 * 2. those with 0 scope, or 1 node-local scope. 2403 * 2404 * -Restart any timer that is already running but has a value 2405 * longer that the requested timeout. 2406 * -Use the value specified in the query message as the 2407 * maximum timeout. 2408 */ 2409 next = INFINITY; 2410 2411 ilm = ilm_walker_start(&ilw, ill); 2412 mutex_enter(&ill->ill_lock); 2413 current = CURRENT_MSTIME; 2414 2415 for (; ilm != NULL; ilm = ilm_walker_step(&ilw, ilm)) { 2416 ASSERT(!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr)); 2417 2418 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr) || 2419 IN6_IS_ADDR_MC_NODELOCAL(&ilm->ilm_v6addr) || 2420 IN6_IS_ADDR_MC_RESERVED(&ilm->ilm_v6addr)) 2421 continue; 2422 if ((!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, 2423 &ipv6_all_hosts_mcast)) && 2424 (IN6_IS_ADDR_UNSPECIFIED(v6group)) || 2425 (IN6_ARE_ADDR_EQUAL(v6group, &ilm->ilm_v6addr))) { 2426 if (timer == 0) { 2427 /* Respond immediately */ 2428 ilm->ilm_timer = INFINITY; 2429 ilm->ilm_state = IGMP_IREPORTEDLAST; 2430 mutex_exit(&ill->ill_lock); 2431 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL); 2432 mutex_enter(&ill->ill_lock); 2433 break; 2434 } 2435 if (ilm->ilm_timer > timer) { 2436 MCAST_RANDOM_DELAY(ilm->ilm_timer, timer); 2437 if (ilm->ilm_timer < next) 2438 next = ilm->ilm_timer; 2439 ilm->ilm_timer += current; 2440 } 2441 break; 2442 } 2443 } 2444 mutex_exit(&ill->ill_lock); 2445 ilm_walker_finish(&ilw); 2446 2447 return (next); 2448 } 2449 2450 /* 2451 * Handles an MLDv2 Listener Query. On error, returns 0; on success, 2452 * returns the appropriate (non-zero, unsigned) timer value (which may 2453 * be INFINITY) to be set. 2454 */ 2455 static uint_t 2456 mldv2_query_in(mld2q_t *mld2q, ill_t *ill, int mldlen) 2457 { 2458 ilm_t *ilm; 2459 in6_addr_t *v6group, *src_array; 2460 uint_t next, numsrc, i, mrd, delay, qqi, current; 2461 uint8_t qrv; 2462 ilm_walker_t ilw; 2463 2464 v6group = &mld2q->mld2q_addr; 2465 numsrc = ntohs(mld2q->mld2q_numsrc); 2466 2467 /* make sure numsrc matches packet size */ 2468 if (mldlen < MLD_V2_QUERY_MINLEN + (numsrc * sizeof (in6_addr_t))) { 2469 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 2470 return (0); 2471 } 2472 src_array = (in6_addr_t *)&mld2q[1]; 2473 2474 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembQueries); 2475 2476 /* extract Maximum Response Delay from code in header */ 2477 mrd = ntohs(mld2q->mld2q_mxrc); 2478 if (mrd >= MLD_V2_MAXRT_FPMIN) { 2479 uint_t hdrval, mant, exp; 2480 hdrval = mrd; 2481 mant = hdrval & MLD_V2_MAXRT_MANT_MASK; 2482 exp = (hdrval & MLD_V2_MAXRT_EXP_MASK) >> 12; 2483 mrd = (mant | 0x1000) << (exp + 3); 2484 } 2485 if (mrd == 0) 2486 mrd = DSEC_TO_MSEC(MCAST_DEF_QUERY_RESP_INTERVAL); 2487 2488 MCAST_RANDOM_DELAY(delay, mrd); 2489 next = (unsigned)INFINITY; 2490 current = CURRENT_MSTIME; 2491 2492 if ((qrv = mld2q->mld2q_sqrv & MLD_V2_RV_MASK) == 0) 2493 ill->ill_mcast_rv = MCAST_DEF_ROBUSTNESS; 2494 else 2495 ill->ill_mcast_rv = qrv; 2496 2497 if ((qqi = (uint_t)mld2q->mld2q_qqic) >= MLD_V2_QQI_FPMIN) { 2498 uint_t mant, exp; 2499 mant = qqi & MLD_V2_QQI_MANT_MASK; 2500 exp = (qqi & MLD_V2_QQI_EXP_MASK) >> 12; 2501 qqi = (mant | 0x10) << (exp + 3); 2502 } 2503 ill->ill_mcast_qi = (qqi == 0) ? MCAST_DEF_QUERY_INTERVAL : qqi; 2504 2505 /* 2506 * If we have a pending general query response that's scheduled 2507 * sooner than the delay we calculated for this response, then 2508 * no action is required (MLDv2 draft section 6.2 rule 1) 2509 */ 2510 mutex_enter(&ill->ill_lock); 2511 if (ill->ill_global_timer < (current + delay)) { 2512 mutex_exit(&ill->ill_lock); 2513 return (next); 2514 } 2515 mutex_exit(&ill->ill_lock); 2516 2517 /* 2518 * Now take action depending on query type: general, 2519 * group specific, or group/source specific. 2520 */ 2521 if ((numsrc == 0) && IN6_IS_ADDR_UNSPECIFIED(v6group)) { 2522 /* 2523 * general query 2524 * We know global timer is either not running or is 2525 * greater than our calculated delay, so reset it to 2526 * our delay (random value in range [0, response time]) 2527 */ 2528 mutex_enter(&ill->ill_lock); 2529 ill->ill_global_timer = current + delay; 2530 mutex_exit(&ill->ill_lock); 2531 next = delay; 2532 2533 } else { 2534 /* group or group/source specific query */ 2535 ilm = ilm_walker_start(&ilw, ill); 2536 mutex_enter(&ill->ill_lock); 2537 for (; ilm != NULL; ilm = ilm_walker_step(&ilw, ilm)) { 2538 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr) || 2539 IN6_IS_ADDR_MC_NODELOCAL(&ilm->ilm_v6addr) || 2540 IN6_IS_ADDR_MC_RESERVED(&ilm->ilm_v6addr) || 2541 !IN6_ARE_ADDR_EQUAL(v6group, &ilm->ilm_v6addr)) 2542 continue; 2543 2544 /* 2545 * If the query is group specific or we have a 2546 * pending group specific query, the response is 2547 * group specific (pending sources list should be 2548 * empty). Otherwise, need to update the pending 2549 * sources list for the group and source specific 2550 * response. 2551 */ 2552 if (numsrc == 0 || (ilm->ilm_timer < INFINITY && 2553 SLIST_IS_EMPTY(ilm->ilm_pendsrcs))) { 2554 group_query: 2555 FREE_SLIST(ilm->ilm_pendsrcs); 2556 ilm->ilm_pendsrcs = NULL; 2557 } else { 2558 boolean_t overflow; 2559 slist_t *pktl; 2560 if (numsrc > MAX_FILTER_SIZE || 2561 (ilm->ilm_pendsrcs == NULL && 2562 (ilm->ilm_pendsrcs = l_alloc()) == NULL)) { 2563 /* 2564 * We've been sent more sources than 2565 * we can deal with; or we can't deal 2566 * with a source list at all. Revert 2567 * to a group specific query. 2568 */ 2569 goto group_query; 2570 } 2571 if ((pktl = l_alloc()) == NULL) 2572 goto group_query; 2573 pktl->sl_numsrc = numsrc; 2574 for (i = 0; i < numsrc; i++) 2575 pktl->sl_addr[i] = src_array[i]; 2576 l_union_in_a(ilm->ilm_pendsrcs, pktl, 2577 &overflow); 2578 l_free(pktl); 2579 if (overflow) 2580 goto group_query; 2581 } 2582 ilm->ilm_timer = (ilm->ilm_timer == INFINITY) ? 2583 INFINITY : (ilm->ilm_timer - current); 2584 /* set timer to soonest value */ 2585 ilm->ilm_timer = MIN(ilm->ilm_timer, delay); 2586 if (ilm->ilm_timer < next) 2587 next = ilm->ilm_timer; 2588 ilm->ilm_timer += current; 2589 break; 2590 } 2591 mutex_exit(&ill->ill_lock); 2592 ilm_walker_finish(&ilw); 2593 } 2594 2595 return (next); 2596 } 2597 2598 /* 2599 * Send MLDv1 response packet with hoplimit 1 2600 */ 2601 static void 2602 mld_sendpkt(ilm_t *ilm, uchar_t type, const in6_addr_t *v6addr) 2603 { 2604 mblk_t *mp; 2605 mld_hdr_t *mldh; 2606 ip6_t *ip6h; 2607 ip6_hbh_t *ip6hbh; 2608 struct ip6_opt_router *ip6router; 2609 size_t size = IPV6_HDR_LEN + sizeof (mld_hdr_t); 2610 ill_t *ill = ilm->ilm_ill; 2611 ipif_t *ipif; 2612 2613 /* 2614 * We need to place a router alert option in this packet. The length 2615 * of the options must be a multiple of 8. The hbh option header is 2 2616 * bytes followed by the 4 byte router alert option. That leaves 2617 * 2 bytes of pad for a total of 8 bytes. 2618 */ 2619 const int router_alert_length = 8; 2620 2621 ASSERT(ill->ill_isv6); 2622 2623 size += router_alert_length; 2624 mp = allocb(size, BPRI_HI); 2625 if (mp == NULL) 2626 return; 2627 bzero(mp->b_rptr, size); 2628 mp->b_wptr = mp->b_rptr + size; 2629 2630 ip6h = (ip6_t *)mp->b_rptr; 2631 ip6hbh = (struct ip6_hbh *)&ip6h[1]; 2632 ip6router = (struct ip6_opt_router *)&ip6hbh[1]; 2633 /* 2634 * A zero is a pad option of length 1. The bzero of the whole packet 2635 * above will pad between ip6router and mld. 2636 */ 2637 mldh = (mld_hdr_t *)((uint8_t *)ip6hbh + router_alert_length); 2638 2639 mldh->mld_type = type; 2640 mldh->mld_addr = ilm->ilm_v6addr; 2641 2642 ip6router->ip6or_type = IP6OPT_ROUTER_ALERT; 2643 ip6router->ip6or_len = 2; 2644 ip6router->ip6or_value[0] = 0; 2645 ip6router->ip6or_value[1] = IP6_ALERT_MLD; 2646 2647 ip6hbh->ip6h_nxt = IPPROTO_ICMPV6; 2648 ip6hbh->ip6h_len = 0; 2649 2650 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2651 ip6h->ip6_plen = htons(sizeof (*mldh) + router_alert_length); 2652 ip6h->ip6_nxt = IPPROTO_HOPOPTS; 2653 ip6h->ip6_hops = MLD_HOP_LIMIT; 2654 if (v6addr == NULL) 2655 ip6h->ip6_dst = ilm->ilm_v6addr; 2656 else 2657 ip6h->ip6_dst = *v6addr; 2658 2659 /* ipif returned by ipif_lookup_zoneid is link-local (if present) */ 2660 if (ipif_lookup_zoneid(ill, ilm->ilm_zoneid, IPIF_UP, &ipif)) { 2661 ip6h->ip6_src = ipif->ipif_v6src_addr; 2662 ipif_refrele(ipif); 2663 } else { 2664 /* Otherwise, use IPv6 default address selection. */ 2665 ip6h->ip6_src = ipv6_all_zeros; 2666 } 2667 2668 /* 2669 * Prepare for checksum by putting icmp length in the icmp 2670 * checksum field. The checksum is calculated in ip_wput_v6. 2671 */ 2672 mldh->mld_cksum = htons(sizeof (*mldh)); 2673 2674 /* 2675 * ip_wput will automatically loopback the multicast packet to 2676 * the conn if multicast loopback is enabled. 2677 * The MIB stats corresponding to this outgoing MLD packet 2678 * will be accounted for in ip_wput->ip_wput_v6->ip_wput_ire_v6 2679 * ->icmp_update_out_mib_v6 function call. 2680 */ 2681 (void) ip_output_v6(NULL, mp, ill->ill_wq, IP_WPUT); 2682 } 2683 2684 /* 2685 * Sends an MLD_V2_LISTENER_REPORT message out the passed-in ill. The 2686 * report will contain one multicast address record for each element of 2687 * reclist. If this causes packet length to exceed ill->ill_max_frag, 2688 * multiple reports are sent. reclist is assumed to be made up of 2689 * buffers allocated by mcast_bldmrec(), and those buffers are freed here. 2690 */ 2691 static void 2692 mldv2_sendrpt(ill_t *ill, mrec_t *reclist) 2693 { 2694 mblk_t *mp; 2695 mld2r_t *mld2r; 2696 mld2mar_t *mld2mar; 2697 in6_addr_t *srcarray; 2698 ip6_t *ip6h; 2699 ip6_hbh_t *ip6hbh; 2700 struct ip6_opt_router *ip6router; 2701 size_t size, optlen, padlen, icmpsize, rsize; 2702 ipif_t *ipif; 2703 int i, numrec, more_src_cnt; 2704 mrec_t *rp, *cur_reclist; 2705 mrec_t *next_reclist = reclist; 2706 boolean_t morepkts; 2707 2708 ASSERT(IAM_WRITER_ILL(ill)); 2709 2710 /* If there aren't any records, there's nothing to send */ 2711 if (reclist == NULL) 2712 return; 2713 2714 ASSERT(ill->ill_isv6); 2715 2716 /* 2717 * Total option length (optlen + padlen) must be a multiple of 2718 * 8 bytes. We assume here that optlen <= 8, so the total option 2719 * length will be 8. Assert this in case anything ever changes. 2720 */ 2721 optlen = sizeof (ip6_hbh_t) + sizeof (struct ip6_opt_router); 2722 ASSERT(optlen <= 8); 2723 padlen = 8 - optlen; 2724 nextpkt: 2725 icmpsize = sizeof (mld2r_t); 2726 size = IPV6_HDR_LEN + optlen + padlen + icmpsize; 2727 morepkts = B_FALSE; 2728 more_src_cnt = 0; 2729 for (rp = cur_reclist = next_reclist, numrec = 0; rp != NULL; 2730 rp = rp->mrec_next, numrec++) { 2731 rsize = sizeof (mld2mar_t) + 2732 (rp->mrec_srcs.sl_numsrc * sizeof (in6_addr_t)); 2733 if (size + rsize > ill->ill_max_frag) { 2734 if (rp == cur_reclist) { 2735 /* 2736 * If the first mrec we looked at is too big 2737 * to fit in a single packet (i.e the source 2738 * list is too big), we must either truncate 2739 * the list (if TO_EX or IS_EX), or send 2740 * multiple reports for the same group (all 2741 * other types). 2742 */ 2743 int srcspace, srcsperpkt; 2744 srcspace = ill->ill_max_frag - 2745 (size + sizeof (mld2mar_t)); 2746 2747 /* 2748 * Skip if there's not even enough room in 2749 * a single packet to send something useful. 2750 */ 2751 if (srcspace <= sizeof (in6_addr_t)) 2752 continue; 2753 2754 srcsperpkt = srcspace / sizeof (in6_addr_t); 2755 /* 2756 * Increment icmpsize and size, because we will 2757 * be sending a record for the mrec we're 2758 * looking at now. 2759 */ 2760 rsize = sizeof (mld2mar_t) + 2761 (srcsperpkt * sizeof (in6_addr_t)); 2762 icmpsize += rsize; 2763 size += rsize; 2764 if (rp->mrec_type == MODE_IS_EXCLUDE || 2765 rp->mrec_type == CHANGE_TO_EXCLUDE) { 2766 rp->mrec_srcs.sl_numsrc = srcsperpkt; 2767 if (rp->mrec_next == NULL) { 2768 /* no more packets to send */ 2769 break; 2770 } else { 2771 /* 2772 * more packets, but we're 2773 * done with this mrec. 2774 */ 2775 next_reclist = rp->mrec_next; 2776 } 2777 } else { 2778 more_src_cnt = rp->mrec_srcs.sl_numsrc 2779 - srcsperpkt; 2780 rp->mrec_srcs.sl_numsrc = srcsperpkt; 2781 /* 2782 * We'll fix up this mrec (remove the 2783 * srcs we've already sent) before 2784 * returning to nextpkt above. 2785 */ 2786 next_reclist = rp; 2787 } 2788 } else { 2789 next_reclist = rp; 2790 } 2791 morepkts = B_TRUE; 2792 break; 2793 } 2794 icmpsize += rsize; 2795 size += rsize; 2796 } 2797 2798 mp = allocb(size, BPRI_HI); 2799 if (mp == NULL) 2800 goto free_reclist; 2801 bzero(mp->b_rptr, size); 2802 mp->b_wptr = mp->b_rptr + size; 2803 2804 ip6h = (ip6_t *)mp->b_rptr; 2805 ip6hbh = (ip6_hbh_t *)&(ip6h[1]); 2806 ip6router = (struct ip6_opt_router *)&(ip6hbh[1]); 2807 mld2r = (mld2r_t *)((uint8_t *)ip6hbh + optlen + padlen); 2808 mld2mar = (mld2mar_t *)&(mld2r[1]); 2809 2810 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2811 ip6h->ip6_plen = htons(optlen + padlen + icmpsize); 2812 ip6h->ip6_nxt = IPPROTO_HOPOPTS; 2813 ip6h->ip6_hops = MLD_HOP_LIMIT; 2814 ip6h->ip6_dst = ipv6_all_v2rtrs_mcast; 2815 /* ipif returned by ipif_lookup_zoneid is link-local (if present) */ 2816 if (ipif_lookup_zoneid(ill, ALL_ZONES, IPIF_UP, &ipif)) { 2817 ip6h->ip6_src = ipif->ipif_v6src_addr; 2818 ipif_refrele(ipif); 2819 } else { 2820 /* otherwise, use IPv6 default address selection. */ 2821 ip6h->ip6_src = ipv6_all_zeros; 2822 } 2823 2824 ip6hbh->ip6h_nxt = IPPROTO_ICMPV6; 2825 /* 2826 * ip6h_len is the number of 8-byte words, not including the first 2827 * 8 bytes; we've assumed optlen + padlen == 8 bytes; hence len = 0. 2828 */ 2829 ip6hbh->ip6h_len = 0; 2830 2831 ip6router->ip6or_type = IP6OPT_ROUTER_ALERT; 2832 ip6router->ip6or_len = 2; 2833 ip6router->ip6or_value[0] = 0; 2834 ip6router->ip6or_value[1] = IP6_ALERT_MLD; 2835 2836 mld2r->mld2r_type = MLD_V2_LISTENER_REPORT; 2837 mld2r->mld2r_nummar = htons(numrec); 2838 /* 2839 * Prepare for the checksum by putting icmp length in the icmp 2840 * checksum field. The checksum is calculated in ip_wput_v6. 2841 */ 2842 mld2r->mld2r_cksum = htons(icmpsize); 2843 2844 for (rp = cur_reclist; rp != NULL; rp = rp->mrec_next) { 2845 mld2mar->mld2mar_type = rp->mrec_type; 2846 mld2mar->mld2mar_auxlen = 0; 2847 mld2mar->mld2mar_numsrc = htons(rp->mrec_srcs.sl_numsrc); 2848 mld2mar->mld2mar_group = rp->mrec_group; 2849 srcarray = (in6_addr_t *)&(mld2mar[1]); 2850 2851 for (i = 0; i < rp->mrec_srcs.sl_numsrc; i++) 2852 srcarray[i] = rp->mrec_srcs.sl_addr[i]; 2853 2854 mld2mar = (mld2mar_t *)&(srcarray[i]); 2855 } 2856 2857 /* 2858 * ip_wput will automatically loopback the multicast packet to 2859 * the conn if multicast loopback is enabled. 2860 * The MIB stats corresponding to this outgoing MLD packet 2861 * will be accounted for in ip_wput->ip_wput_v6->ip_wput_ire_v6 2862 * ->icmp_update_out_mib_v6 function call. 2863 */ 2864 (void) ip_output_v6(NULL, mp, ill->ill_wq, IP_WPUT); 2865 2866 if (morepkts) { 2867 if (more_src_cnt > 0) { 2868 int index, mvsize; 2869 slist_t *sl = &next_reclist->mrec_srcs; 2870 index = sl->sl_numsrc; 2871 mvsize = more_src_cnt * sizeof (in6_addr_t); 2872 (void) memmove(&sl->sl_addr[0], &sl->sl_addr[index], 2873 mvsize); 2874 sl->sl_numsrc = more_src_cnt; 2875 } 2876 goto nextpkt; 2877 } 2878 2879 free_reclist: 2880 while (reclist != NULL) { 2881 rp = reclist->mrec_next; 2882 mi_free(reclist); 2883 reclist = rp; 2884 } 2885 } 2886 2887 static mrec_t * 2888 mcast_bldmrec(mcast_record_t type, in6_addr_t *grp, slist_t *srclist, 2889 mrec_t *next) 2890 { 2891 mrec_t *rp; 2892 int i; 2893 2894 if ((type == ALLOW_NEW_SOURCES || type == BLOCK_OLD_SOURCES) && 2895 SLIST_IS_EMPTY(srclist)) 2896 return (next); 2897 2898 rp = (mrec_t *)mi_alloc(sizeof (mrec_t), BPRI_HI); 2899 if (rp == NULL) 2900 return (next); 2901 2902 rp->mrec_next = next; 2903 rp->mrec_type = type; 2904 rp->mrec_auxlen = 0; 2905 rp->mrec_group = *grp; 2906 if (srclist == NULL) { 2907 rp->mrec_srcs.sl_numsrc = 0; 2908 } else { 2909 rp->mrec_srcs.sl_numsrc = srclist->sl_numsrc; 2910 for (i = 0; i < srclist->sl_numsrc; i++) 2911 rp->mrec_srcs.sl_addr[i] = srclist->sl_addr[i]; 2912 } 2913 2914 return (rp); 2915 } 2916 2917 /* 2918 * Set up initial retransmit state. If memory cannot be allocated for 2919 * the source lists, simply create as much state as is possible; memory 2920 * allocation failures are considered one type of transient error that 2921 * the retransmissions are designed to overcome (and if they aren't 2922 * transient, there are bigger problems than failing to notify the 2923 * router about multicast group membership state changes). 2924 */ 2925 static void 2926 mcast_init_rtx(ill_t *ill, rtx_state_t *rtxp, mcast_record_t rtype, 2927 slist_t *flist) 2928 { 2929 /* 2930 * There are only three possibilities for rtype: 2931 * New join, transition from INCLUDE {} to INCLUDE {flist} 2932 * => rtype is ALLOW_NEW_SOURCES 2933 * New join, transition from INCLUDE {} to EXCLUDE {flist} 2934 * => rtype is CHANGE_TO_EXCLUDE 2935 * State change that involves a filter mode change 2936 * => rtype is either CHANGE_TO_INCLUDE or CHANGE_TO_EXCLUDE 2937 */ 2938 ASSERT(rtype == CHANGE_TO_EXCLUDE || rtype == CHANGE_TO_INCLUDE || 2939 rtype == ALLOW_NEW_SOURCES); 2940 2941 rtxp->rtx_cnt = ill->ill_mcast_rv; 2942 2943 switch (rtype) { 2944 case CHANGE_TO_EXCLUDE: 2945 rtxp->rtx_fmode_cnt = ill->ill_mcast_rv; 2946 CLEAR_SLIST(rtxp->rtx_allow); 2947 COPY_SLIST(flist, rtxp->rtx_block); 2948 break; 2949 case ALLOW_NEW_SOURCES: 2950 case CHANGE_TO_INCLUDE: 2951 rtxp->rtx_fmode_cnt = 2952 rtype == ALLOW_NEW_SOURCES ? 0 : ill->ill_mcast_rv; 2953 CLEAR_SLIST(rtxp->rtx_block); 2954 COPY_SLIST(flist, rtxp->rtx_allow); 2955 break; 2956 } 2957 } 2958 2959 /* 2960 * The basic strategy here, as extrapolated from RFC 3810 section 6.1 and 2961 * RFC 3376 section 5.1, covers three cases: 2962 * * The current state change is a filter mode change 2963 * Set filter mode retransmit counter; set retransmit allow or 2964 * block list to new source list as appropriate, and clear the 2965 * retransmit list that was not set; send TO_IN or TO_EX with 2966 * new source list. 2967 * * The current state change is a source list change, but the filter 2968 * mode retransmit counter is > 0 2969 * Decrement filter mode retransmit counter; set retransmit 2970 * allow or block list to new source list as appropriate, 2971 * and clear the retransmit list that was not set; send TO_IN 2972 * or TO_EX with new source list. 2973 * * The current state change is a source list change, and the filter 2974 * mode retransmit counter is 0. 2975 * Merge existing rtx allow and block lists with new state: 2976 * rtx_allow = (new allow + rtx_allow) - new block 2977 * rtx_block = (new block + rtx_block) - new allow 2978 * Send ALLOW and BLOCK records for new retransmit lists; 2979 * decrement retransmit counter. 2980 * 2981 * As is the case for mcast_init_rtx(), memory allocation failures are 2982 * acceptable; we just create as much state as we can. 2983 */ 2984 static mrec_t * 2985 mcast_merge_rtx(ilm_t *ilm, mrec_t *mreclist, slist_t *flist) 2986 { 2987 ill_t *ill; 2988 rtx_state_t *rtxp = &ilm->ilm_rtx; 2989 mcast_record_t txtype; 2990 mrec_t *rp, *rpnext, *rtnmrec; 2991 boolean_t ovf; 2992 2993 ill = (ilm->ilm_ill == NULL ? ilm->ilm_ipif->ipif_ill : ilm->ilm_ill); 2994 2995 if (mreclist == NULL) 2996 return (mreclist); 2997 2998 /* 2999 * A filter mode change is indicated by a single mrec, which is 3000 * either TO_IN or TO_EX. In this case, we just need to set new 3001 * retransmit state as if this were an initial join. There is 3002 * no change to the mrec list. 3003 */ 3004 if (mreclist->mrec_type == CHANGE_TO_INCLUDE || 3005 mreclist->mrec_type == CHANGE_TO_EXCLUDE) { 3006 mcast_init_rtx(ill, rtxp, mreclist->mrec_type, 3007 &mreclist->mrec_srcs); 3008 return (mreclist); 3009 } 3010 3011 /* 3012 * Only the source list has changed 3013 */ 3014 rtxp->rtx_cnt = ill->ill_mcast_rv; 3015 if (rtxp->rtx_fmode_cnt > 0) { 3016 /* but we're still sending filter mode change reports */ 3017 rtxp->rtx_fmode_cnt--; 3018 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 3019 CLEAR_SLIST(rtxp->rtx_block); 3020 COPY_SLIST(flist, rtxp->rtx_allow); 3021 txtype = CHANGE_TO_INCLUDE; 3022 } else { 3023 CLEAR_SLIST(rtxp->rtx_allow); 3024 COPY_SLIST(flist, rtxp->rtx_block); 3025 txtype = CHANGE_TO_EXCLUDE; 3026 } 3027 /* overwrite first mrec with new info */ 3028 mreclist->mrec_type = txtype; 3029 l_copy(flist, &mreclist->mrec_srcs); 3030 /* then free any remaining mrecs */ 3031 for (rp = mreclist->mrec_next; rp != NULL; rp = rpnext) { 3032 rpnext = rp->mrec_next; 3033 mi_free(rp); 3034 } 3035 mreclist->mrec_next = NULL; 3036 rtnmrec = mreclist; 3037 } else { 3038 mrec_t *allow_mrec, *block_mrec; 3039 /* 3040 * Just send the source change reports; but we need to 3041 * recalculate the ALLOW and BLOCK lists based on previous 3042 * state and new changes. 3043 */ 3044 rtnmrec = mreclist; 3045 allow_mrec = block_mrec = NULL; 3046 for (rp = mreclist; rp != NULL; rp = rp->mrec_next) { 3047 ASSERT(rp->mrec_type == ALLOW_NEW_SOURCES || 3048 rp->mrec_type == BLOCK_OLD_SOURCES); 3049 if (rp->mrec_type == ALLOW_NEW_SOURCES) 3050 allow_mrec = rp; 3051 else 3052 block_mrec = rp; 3053 } 3054 /* 3055 * Perform calculations: 3056 * new_allow = mrec_allow + (rtx_allow - mrec_block) 3057 * new_block = mrec_block + (rtx_block - mrec_allow) 3058 * 3059 * Each calc requires two steps, for example: 3060 * rtx_allow = rtx_allow - mrec_block; 3061 * new_allow = mrec_allow + rtx_allow; 3062 * 3063 * Store results in mrec lists, and then copy into rtx lists. 3064 * We do it in this order in case the rtx list hasn't been 3065 * alloc'd yet; if it hasn't and our alloc fails, that's okay, 3066 * Overflows are also okay. 3067 */ 3068 if (block_mrec != NULL) { 3069 l_difference_in_a(rtxp->rtx_allow, 3070 &block_mrec->mrec_srcs); 3071 } 3072 if (allow_mrec != NULL) { 3073 l_difference_in_a(rtxp->rtx_block, 3074 &allow_mrec->mrec_srcs); 3075 l_union_in_a(&allow_mrec->mrec_srcs, rtxp->rtx_allow, 3076 &ovf); 3077 } 3078 if (block_mrec != NULL) { 3079 l_union_in_a(&block_mrec->mrec_srcs, rtxp->rtx_block, 3080 &ovf); 3081 COPY_SLIST(&block_mrec->mrec_srcs, rtxp->rtx_block); 3082 } else { 3083 rtnmrec = mcast_bldmrec(BLOCK_OLD_SOURCES, 3084 &ilm->ilm_v6addr, rtxp->rtx_block, allow_mrec); 3085 } 3086 if (allow_mrec != NULL) { 3087 COPY_SLIST(&allow_mrec->mrec_srcs, rtxp->rtx_allow); 3088 } else { 3089 rtnmrec = mcast_bldmrec(ALLOW_NEW_SOURCES, 3090 &ilm->ilm_v6addr, rtxp->rtx_allow, block_mrec); 3091 } 3092 } 3093 3094 return (rtnmrec); 3095 } 3096 3097 /* 3098 * Convenience routine to signal the restart-timer thread. 3099 */ 3100 static void 3101 mcast_signal_restart_thread(ip_stack_t *ipst) 3102 { 3103 mutex_enter(&ipst->ips_mrt_lock); 3104 ipst->ips_mrt_flags |= IP_MRT_RUN; 3105 cv_signal(&ipst->ips_mrt_cv); 3106 mutex_exit(&ipst->ips_mrt_lock); 3107 } 3108 3109 /* 3110 * Thread to restart IGMP/MLD timers. See the comment in igmp_joingroup() for 3111 * the story behind this unfortunate thread. 3112 */ 3113 void 3114 mcast_restart_timers_thread(ip_stack_t *ipst) 3115 { 3116 int next; 3117 char name[64]; 3118 callb_cpr_t cprinfo; 3119 3120 (void) snprintf(name, sizeof (name), "mcast_restart_timers_thread_%d", 3121 ipst->ips_netstack->netstack_stackid); 3122 CALLB_CPR_INIT(&cprinfo, &ipst->ips_mrt_lock, callb_generic_cpr, name); 3123 3124 for (;;) { 3125 mutex_enter(&ipst->ips_mrt_lock); 3126 while (!(ipst->ips_mrt_flags & (IP_MRT_STOP|IP_MRT_RUN))) { 3127 CALLB_CPR_SAFE_BEGIN(&cprinfo); 3128 cv_wait(&ipst->ips_mrt_cv, &ipst->ips_mrt_lock); 3129 CALLB_CPR_SAFE_END(&cprinfo, &ipst->ips_mrt_lock); 3130 } 3131 if (ipst->ips_mrt_flags & IP_MRT_STOP) 3132 break; 3133 ipst->ips_mrt_flags &= ~IP_MRT_RUN; 3134 mutex_exit(&ipst->ips_mrt_lock); 3135 3136 mutex_enter(&ipst->ips_igmp_timer_lock); 3137 next = ipst->ips_igmp_deferred_next; 3138 ipst->ips_igmp_deferred_next = INFINITY; 3139 mutex_exit(&ipst->ips_igmp_timer_lock); 3140 3141 if (next != INFINITY) 3142 igmp_start_timers(next, ipst); 3143 3144 mutex_enter(&ipst->ips_mld_timer_lock); 3145 next = ipst->ips_mld_deferred_next; 3146 ipst->ips_mld_deferred_next = INFINITY; 3147 mutex_exit(&ipst->ips_mld_timer_lock); 3148 if (next != INFINITY) 3149 mld_start_timers(next, ipst); 3150 } 3151 3152 ipst->ips_mrt_flags |= IP_MRT_DONE; 3153 cv_signal(&ipst->ips_mrt_done_cv); 3154 CALLB_CPR_EXIT(&cprinfo); /* drops ips_mrt_lock */ 3155 thread_exit(); 3156 } 3157