1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 /* 28 * Internet Group Management Protocol (IGMP) routines. 29 * Multicast Listener Discovery Protocol (MLD) routines. 30 * 31 * Written by Steve Deering, Stanford, May 1988. 32 * Modified by Rosen Sharma, Stanford, Aug 1994. 33 * Modified by Bill Fenner, Xerox PARC, Feb. 1995. 34 * 35 * MULTICAST 3.5.1.1 36 */ 37 38 #include <sys/types.h> 39 #include <sys/stream.h> 40 #include <sys/stropts.h> 41 #include <sys/strlog.h> 42 #include <sys/strsun.h> 43 #include <sys/systm.h> 44 #include <sys/ddi.h> 45 #include <sys/sunddi.h> 46 #include <sys/cmn_err.h> 47 #include <sys/atomic.h> 48 #include <sys/zone.h> 49 #include <sys/callb.h> 50 #include <sys/param.h> 51 #include <sys/socket.h> 52 #include <inet/ipclassifier.h> 53 #include <net/if.h> 54 #include <net/route.h> 55 #include <netinet/in.h> 56 #include <netinet/igmp_var.h> 57 #include <netinet/ip6.h> 58 #include <netinet/icmp6.h> 59 60 #include <inet/common.h> 61 #include <inet/mi.h> 62 #include <inet/nd.h> 63 #include <inet/ip.h> 64 #include <inet/ip6.h> 65 #include <inet/ip_multi.h> 66 #include <inet/ip_listutils.h> 67 68 #include <netinet/igmp.h> 69 #include <inet/ip_if.h> 70 #include <net/pfkeyv2.h> 71 #include <inet/ipsec_info.h> 72 73 static uint_t igmp_query_in(ipha_t *ipha, igmpa_t *igmpa, ill_t *ill); 74 static uint_t igmpv3_query_in(igmp3qa_t *igmp3qa, ill_t *ill, int igmplen); 75 static uint_t mld_query_in(mld_hdr_t *mldh, ill_t *ill); 76 static uint_t mldv2_query_in(mld2q_t *mld2q, ill_t *ill, int mldlen); 77 static void igmp_sendpkt(ilm_t *ilm, uchar_t type, ipaddr_t addr); 78 static void mld_sendpkt(ilm_t *ilm, uchar_t type, const in6_addr_t *v6addr); 79 static void igmpv3_sendrpt(ipif_t *ipif, mrec_t *reclist); 80 static void mldv2_sendrpt(ill_t *ill, mrec_t *reclist); 81 static mrec_t *mcast_bldmrec(mcast_record_t type, in6_addr_t *grp, 82 slist_t *srclist, mrec_t *next); 83 static void mcast_init_rtx(ill_t *ill, rtx_state_t *rtxp, 84 mcast_record_t rtype, slist_t *flist); 85 static mrec_t *mcast_merge_rtx(ilm_t *ilm, mrec_t *rp, slist_t *flist); 86 static void mcast_signal_restart_thread(ip_stack_t *ipst); 87 88 /* 89 * Macros used to do timer len conversions. Timer values are always 90 * stored and passed to the timer functions as milliseconds; but the 91 * default values and values from the wire may not be. 92 * 93 * And yes, it's obscure, but decisecond is easier to abbreviate than 94 * "tenths of a second". 95 */ 96 #define DSEC_TO_MSEC(dsec) ((dsec) * 100) 97 #define SEC_TO_MSEC(sec) ((sec) * 1000) 98 99 /* 100 * A running timer (scheduled thru timeout) can be cancelled if another 101 * timer with a shorter timeout value is scheduled before it has timed 102 * out. When the shorter timer expires, the original timer is updated 103 * to account for the time elapsed while the shorter timer ran; but this 104 * does not take into account the amount of time already spent in timeout 105 * state before being preempted by the shorter timer, that is the time 106 * interval between time scheduled to time cancelled. This can cause 107 * delays in sending out multicast membership reports. To resolve this 108 * problem, wallclock time (absolute time) is used instead of deltas 109 * (relative time) to track timers. 110 * 111 * The MACRO below gets the lbolt value, used for proper timer scheduling 112 * and firing. Therefore multicast membership reports are sent on time. 113 * The timer does not exactly fire at the time it was scehduled to fire, 114 * there is a difference of a few milliseconds observed. An offset is used 115 * to take care of the difference. 116 */ 117 118 #define CURRENT_MSTIME ((uint_t)TICK_TO_MSEC(ddi_get_lbolt())) 119 #define CURRENT_OFFSET (999) 120 121 /* 122 * The first multicast join will trigger the igmp timers / mld timers 123 * The unit for next is milliseconds. 124 */ 125 static void 126 igmp_start_timers(unsigned next, ip_stack_t *ipst) 127 { 128 int time_left; 129 int ret; 130 131 ASSERT(next != 0 && next != INFINITY); 132 133 mutex_enter(&ipst->ips_igmp_timer_lock); 134 135 if (ipst->ips_igmp_timer_setter_active) { 136 /* 137 * Serialize timer setters, one at a time. If the 138 * timer is currently being set by someone, 139 * just record the next time when it has to be 140 * invoked and return. The current setter will 141 * take care. 142 */ 143 ipst->ips_igmp_time_to_next = 144 MIN(ipst->ips_igmp_time_to_next, next); 145 mutex_exit(&ipst->ips_igmp_timer_lock); 146 return; 147 } else { 148 ipst->ips_igmp_timer_setter_active = B_TRUE; 149 } 150 if (ipst->ips_igmp_timeout_id == 0) { 151 /* 152 * The timer is inactive. We need to start a timer 153 */ 154 ipst->ips_igmp_time_to_next = next; 155 ipst->ips_igmp_timeout_id = timeout(igmp_timeout_handler, 156 (void *)ipst, MSEC_TO_TICK(ipst->ips_igmp_time_to_next)); 157 ipst->ips_igmp_timer_scheduled_last = ddi_get_lbolt(); 158 ipst->ips_igmp_timer_setter_active = B_FALSE; 159 mutex_exit(&ipst->ips_igmp_timer_lock); 160 return; 161 } 162 163 /* 164 * The timer was scheduled sometime back for firing in 165 * 'igmp_time_to_next' ms and is active. We need to 166 * reschedule the timeout if the new 'next' will happen 167 * earlier than the currently scheduled timeout 168 */ 169 time_left = ipst->ips_igmp_timer_scheduled_last + 170 MSEC_TO_TICK(ipst->ips_igmp_time_to_next) - ddi_get_lbolt(); 171 if (time_left < MSEC_TO_TICK(next)) { 172 ipst->ips_igmp_timer_setter_active = B_FALSE; 173 mutex_exit(&ipst->ips_igmp_timer_lock); 174 return; 175 } 176 177 mutex_exit(&ipst->ips_igmp_timer_lock); 178 ret = untimeout(ipst->ips_igmp_timeout_id); 179 mutex_enter(&ipst->ips_igmp_timer_lock); 180 /* 181 * The timeout was cancelled, or the timeout handler 182 * completed, while we were blocked in the untimeout. 183 * No other thread could have set the timer meanwhile 184 * since we serialized all the timer setters. Thus 185 * no timer is currently active nor executing nor will 186 * any timer fire in the future. We start the timer now 187 * if needed. 188 */ 189 if (ret == -1) { 190 ASSERT(ipst->ips_igmp_timeout_id == 0); 191 } else { 192 ASSERT(ipst->ips_igmp_timeout_id != 0); 193 ipst->ips_igmp_timeout_id = 0; 194 } 195 if (ipst->ips_igmp_time_to_next != 0) { 196 ipst->ips_igmp_time_to_next = 197 MIN(ipst->ips_igmp_time_to_next, next); 198 ipst->ips_igmp_timeout_id = timeout(igmp_timeout_handler, 199 (void *)ipst, MSEC_TO_TICK(ipst->ips_igmp_time_to_next)); 200 ipst->ips_igmp_timer_scheduled_last = ddi_get_lbolt(); 201 } 202 ipst->ips_igmp_timer_setter_active = B_FALSE; 203 mutex_exit(&ipst->ips_igmp_timer_lock); 204 } 205 206 /* 207 * mld_start_timers: 208 * The unit for next is milliseconds. 209 */ 210 static void 211 mld_start_timers(unsigned next, ip_stack_t *ipst) 212 { 213 int time_left; 214 int ret; 215 216 ASSERT(next != 0 && next != INFINITY); 217 218 mutex_enter(&ipst->ips_mld_timer_lock); 219 if (ipst->ips_mld_timer_setter_active) { 220 /* 221 * Serialize timer setters, one at a time. If the 222 * timer is currently being set by someone, 223 * just record the next time when it has to be 224 * invoked and return. The current setter will 225 * take care. 226 */ 227 ipst->ips_mld_time_to_next = 228 MIN(ipst->ips_mld_time_to_next, next); 229 mutex_exit(&ipst->ips_mld_timer_lock); 230 return; 231 } else { 232 ipst->ips_mld_timer_setter_active = B_TRUE; 233 } 234 if (ipst->ips_mld_timeout_id == 0) { 235 /* 236 * The timer is inactive. We need to start a timer 237 */ 238 ipst->ips_mld_time_to_next = next; 239 ipst->ips_mld_timeout_id = timeout(mld_timeout_handler, 240 (void *)ipst, MSEC_TO_TICK(ipst->ips_mld_time_to_next)); 241 ipst->ips_mld_timer_scheduled_last = ddi_get_lbolt(); 242 ipst->ips_mld_timer_setter_active = B_FALSE; 243 mutex_exit(&ipst->ips_mld_timer_lock); 244 return; 245 } 246 247 /* 248 * The timer was scheduled sometime back for firing in 249 * 'igmp_time_to_next' ms and is active. We need to 250 * reschedule the timeout if the new 'next' will happen 251 * earlier than the currently scheduled timeout 252 */ 253 time_left = ipst->ips_mld_timer_scheduled_last + 254 MSEC_TO_TICK(ipst->ips_mld_time_to_next) - ddi_get_lbolt(); 255 if (time_left < MSEC_TO_TICK(next)) { 256 ipst->ips_mld_timer_setter_active = B_FALSE; 257 mutex_exit(&ipst->ips_mld_timer_lock); 258 return; 259 } 260 261 mutex_exit(&ipst->ips_mld_timer_lock); 262 ret = untimeout(ipst->ips_mld_timeout_id); 263 mutex_enter(&ipst->ips_mld_timer_lock); 264 /* 265 * The timeout was cancelled, or the timeout handler 266 * completed, while we were blocked in the untimeout. 267 * No other thread could have set the timer meanwhile 268 * since we serialized all the timer setters. Thus 269 * no timer is currently active nor executing nor will 270 * any timer fire in the future. We start the timer now 271 * if needed. 272 */ 273 if (ret == -1) { 274 ASSERT(ipst->ips_mld_timeout_id == 0); 275 } else { 276 ASSERT(ipst->ips_mld_timeout_id != 0); 277 ipst->ips_mld_timeout_id = 0; 278 } 279 if (ipst->ips_mld_time_to_next != 0) { 280 ipst->ips_mld_time_to_next = 281 MIN(ipst->ips_mld_time_to_next, next); 282 ipst->ips_mld_timeout_id = timeout(mld_timeout_handler, 283 (void *)ipst, MSEC_TO_TICK(ipst->ips_mld_time_to_next)); 284 ipst->ips_mld_timer_scheduled_last = ddi_get_lbolt(); 285 } 286 ipst->ips_mld_timer_setter_active = B_FALSE; 287 mutex_exit(&ipst->ips_mld_timer_lock); 288 } 289 290 /* 291 * igmp_input: 292 * Return NULL for a bad packet that is discarded here. 293 * Return mp if the message is OK and should be handed to "raw" receivers. 294 * Callers of igmp_input() may need to reinitialize variables that were copied 295 * from the mblk as this calls pullupmsg(). 296 */ 297 /* ARGSUSED */ 298 mblk_t * 299 igmp_input(queue_t *q, mblk_t *mp, ill_t *ill) 300 { 301 igmpa_t *igmpa; 302 ipha_t *ipha = (ipha_t *)(mp->b_rptr); 303 int iphlen, igmplen, mblklen; 304 ilm_t *ilm; 305 uint32_t src, dst; 306 uint32_t group; 307 uint_t next; 308 ipif_t *ipif; 309 ip_stack_t *ipst; 310 ilm_walker_t ilw; 311 312 ASSERT(ill != NULL); 313 ASSERT(!ill->ill_isv6); 314 ipst = ill->ill_ipst; 315 ++ipst->ips_igmpstat.igps_rcv_total; 316 317 mblklen = MBLKL(mp); 318 if (mblklen < 1 || mblklen < (iphlen = IPH_HDR_LENGTH(ipha))) { 319 ++ipst->ips_igmpstat.igps_rcv_tooshort; 320 goto bad_pkt; 321 } 322 igmplen = ntohs(ipha->ipha_length) - iphlen; 323 /* 324 * Since msg sizes are more variable with v3, just pullup the 325 * whole thing now. 326 */ 327 if (MBLKL(mp) < (igmplen + iphlen)) { 328 mblk_t *mp1; 329 if ((mp1 = msgpullup(mp, -1)) == NULL) { 330 ++ipst->ips_igmpstat.igps_rcv_tooshort; 331 goto bad_pkt; 332 } 333 freemsg(mp); 334 mp = mp1; 335 ipha = (ipha_t *)(mp->b_rptr); 336 } 337 338 /* 339 * Validate lengths 340 */ 341 if (igmplen < IGMP_MINLEN) { 342 ++ipst->ips_igmpstat.igps_rcv_tooshort; 343 goto bad_pkt; 344 } 345 /* 346 * Validate checksum 347 */ 348 if (IP_CSUM(mp, iphlen, 0)) { 349 ++ipst->ips_igmpstat.igps_rcv_badsum; 350 goto bad_pkt; 351 } 352 353 igmpa = (igmpa_t *)(&mp->b_rptr[iphlen]); 354 src = ipha->ipha_src; 355 dst = ipha->ipha_dst; 356 if (ip_debug > 1) 357 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 358 "igmp_input: src 0x%x, dst 0x%x on %s\n", 359 (int)ntohl(src), (int)ntohl(dst), 360 ill->ill_name); 361 362 switch (igmpa->igmpa_type) { 363 case IGMP_MEMBERSHIP_QUERY: 364 /* 365 * packet length differentiates between v1/v2 and v3 366 * v1/v2 should be exactly 8 octets long; v3 is >= 12 367 */ 368 if ((igmplen == IGMP_MINLEN) || 369 (ipst->ips_igmp_max_version <= IGMP_V2_ROUTER)) { 370 next = igmp_query_in(ipha, igmpa, ill); 371 } else if (igmplen >= IGMP_V3_QUERY_MINLEN) { 372 next = igmpv3_query_in((igmp3qa_t *)igmpa, ill, 373 igmplen); 374 } else { 375 ++ipst->ips_igmpstat.igps_rcv_tooshort; 376 goto bad_pkt; 377 } 378 if (next == 0) 379 goto bad_pkt; 380 381 if (next != INFINITY) 382 igmp_start_timers(next, ipst); 383 384 break; 385 386 case IGMP_V1_MEMBERSHIP_REPORT: 387 case IGMP_V2_MEMBERSHIP_REPORT: 388 /* 389 * For fast leave to work, we have to know that we are the 390 * last person to send a report for this group. Reports 391 * generated by us are looped back since we could potentially 392 * be a multicast router, so discard reports sourced by me. 393 */ 394 mutex_enter(&ill->ill_lock); 395 for (ipif = ill->ill_ipif; ipif != NULL; 396 ipif = ipif->ipif_next) { 397 if (ipif->ipif_lcl_addr == src) { 398 if (ip_debug > 1) { 399 (void) mi_strlog(ill->ill_rq, 400 1, 401 SL_TRACE, 402 "igmp_input: we are only " 403 "member src 0x%x ipif_local 0x%x", 404 (int)ntohl(src), 405 (int)ntohl(ipif->ipif_lcl_addr)); 406 } 407 mutex_exit(&ill->ill_lock); 408 return (mp); 409 } 410 } 411 mutex_exit(&ill->ill_lock); 412 413 ++ipst->ips_igmpstat.igps_rcv_reports; 414 group = igmpa->igmpa_group; 415 if (!CLASSD(group)) { 416 ++ipst->ips_igmpstat.igps_rcv_badreports; 417 goto bad_pkt; 418 } 419 420 /* 421 * KLUDGE: if the IP source address of the report has an 422 * unspecified (i.e., zero) subnet number, as is allowed for 423 * a booting host, replace it with the correct subnet number 424 * so that a process-level multicast routing demon can 425 * determine which subnet it arrived from. This is necessary 426 * to compensate for the lack of any way for a process to 427 * determine the arrival interface of an incoming packet. 428 * 429 * Requires that a copy of *this* message it passed up 430 * to the raw interface which is done by our caller. 431 */ 432 if ((src & htonl(0xFF000000U)) == 0) { /* Minimum net mask */ 433 /* Pick the first ipif on this ill */ 434 mutex_enter(&ill->ill_lock); 435 src = ill->ill_ipif->ipif_subnet; 436 mutex_exit(&ill->ill_lock); 437 ip1dbg(("igmp_input: changed src to 0x%x\n", 438 (int)ntohl(src))); 439 ipha->ipha_src = src; 440 } 441 442 /* 443 * If our ill has ILMs that belong to the group being 444 * reported, and we are a 'Delaying Member' in the RFC 445 * terminology, stop our timer for that group and 'clear 446 * flag' i.e. mark as IGMP_OTHERMEMBER. 447 */ 448 ilm = ilm_walker_start(&ilw, ill); 449 for (; ilm != NULL; ilm = ilm_walker_step(&ilw, ilm)) { 450 if (ilm->ilm_addr == group) { 451 ++ipst->ips_igmpstat.igps_rcv_ourreports; 452 ilm->ilm_timer = INFINITY; 453 ilm->ilm_state = IGMP_OTHERMEMBER; 454 } 455 } 456 ilm_walker_finish(&ilw); 457 break; 458 459 case IGMP_V3_MEMBERSHIP_REPORT: 460 /* 461 * Currently nothing to do here; IGMP router is not 462 * implemented in ip, and v3 hosts don't pay attention 463 * to membership reports. 464 */ 465 break; 466 } 467 /* 468 * Pass all valid IGMP packets up to any process(es) listening 469 * on a raw IGMP socket. Do not free the packet. 470 */ 471 return (mp); 472 473 bad_pkt: 474 freemsg(mp); 475 return (NULL); 476 } 477 478 static uint_t 479 igmp_query_in(ipha_t *ipha, igmpa_t *igmpa, ill_t *ill) 480 { 481 ilm_t *ilm; 482 int timer; 483 uint_t next, current; 484 ip_stack_t *ipst; 485 ilm_walker_t ilw; 486 487 ipst = ill->ill_ipst; 488 ++ipst->ips_igmpstat.igps_rcv_queries; 489 490 /* 491 * In the IGMPv2 specification, there are 3 states and a flag. 492 * 493 * In Non-Member state, we simply don't have a membership record. 494 * In Delaying Member state, our timer is running (ilm->ilm_timer 495 * < INFINITY). In Idle Member state, our timer is not running 496 * (ilm->ilm_timer == INFINITY). 497 * 498 * The flag is ilm->ilm_state, it is set to IGMP_OTHERMEMBER if 499 * we have heard a report from another member, or IGMP_IREPORTEDLAST 500 * if I sent the last report. 501 */ 502 if ((igmpa->igmpa_code == 0) || 503 (ipst->ips_igmp_max_version == IGMP_V1_ROUTER)) { 504 /* 505 * Query from an old router. 506 * Remember that the querier on this interface is old, 507 * and set the timer to the value in RFC 1112. 508 */ 509 510 511 mutex_enter(&ill->ill_lock); 512 ill->ill_mcast_v1_time = 0; 513 ill->ill_mcast_v1_tset = 1; 514 if (ill->ill_mcast_type != IGMP_V1_ROUTER) { 515 ip1dbg(("Received IGMPv1 Query on %s, switching mode " 516 "to IGMP_V1_ROUTER\n", ill->ill_name)); 517 atomic_add_16(&ill->ill_ifptr->illif_mcast_v1, 1); 518 ill->ill_mcast_type = IGMP_V1_ROUTER; 519 } 520 mutex_exit(&ill->ill_lock); 521 522 timer = SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY); 523 524 if (ipha->ipha_dst != htonl(INADDR_ALLHOSTS_GROUP) || 525 igmpa->igmpa_group != 0) { 526 ++ipst->ips_igmpstat.igps_rcv_badqueries; 527 return (0); 528 } 529 530 } else { 531 in_addr_t group; 532 533 /* 534 * Query from a new router 535 * Simply do a validity check 536 */ 537 group = igmpa->igmpa_group; 538 if (group != 0 && (!CLASSD(group))) { 539 ++ipst->ips_igmpstat.igps_rcv_badqueries; 540 return (0); 541 } 542 543 /* 544 * Switch interface state to v2 on receipt of a v2 query 545 * ONLY IF current state is v3. Let things be if current 546 * state if v1 but do reset the v2-querier-present timer. 547 */ 548 mutex_enter(&ill->ill_lock); 549 if (ill->ill_mcast_type == IGMP_V3_ROUTER) { 550 ip1dbg(("Received IGMPv2 Query on %s, switching mode " 551 "to IGMP_V2_ROUTER", ill->ill_name)); 552 atomic_add_16(&ill->ill_ifptr->illif_mcast_v2, 1); 553 ill->ill_mcast_type = IGMP_V2_ROUTER; 554 } 555 ill->ill_mcast_v2_time = 0; 556 ill->ill_mcast_v2_tset = 1; 557 mutex_exit(&ill->ill_lock); 558 559 timer = DSEC_TO_MSEC((int)igmpa->igmpa_code); 560 } 561 562 if (ip_debug > 1) { 563 mutex_enter(&ill->ill_lock); 564 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 565 "igmp_input: TIMER = igmp_code %d igmp_type 0x%x", 566 (int)ntohs(igmpa->igmpa_code), 567 (int)ntohs(igmpa->igmpa_type)); 568 mutex_exit(&ill->ill_lock); 569 } 570 571 /* 572 * -Start the timers in all of our membership records 573 * for the physical interface on which the query 574 * arrived, excluding those that belong to the "all 575 * hosts" group (224.0.0.1). 576 * 577 * -Restart any timer that is already running but has 578 * a value longer than the requested timeout. 579 * 580 * -Use the value specified in the query message as 581 * the maximum timeout. 582 */ 583 next = (unsigned)INFINITY; 584 585 ilm = ilm_walker_start(&ilw, ill); 586 mutex_enter(&ill->ill_lock); 587 current = CURRENT_MSTIME; 588 589 for (; ilm != NULL; ilm = ilm_walker_step(&ilw, ilm)) { 590 /* 591 * A multicast router joins INADDR_ANY address 592 * to enable promiscuous reception of all 593 * mcasts from the interface. This INADDR_ANY 594 * is stored in the ilm_v6addr as V6 unspec addr 595 */ 596 if (!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr)) 597 continue; 598 if (ilm->ilm_addr == htonl(INADDR_ANY)) 599 continue; 600 if (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP) && 601 (igmpa->igmpa_group == 0) || 602 (igmpa->igmpa_group == ilm->ilm_addr)) { 603 if (ilm->ilm_timer > timer) { 604 MCAST_RANDOM_DELAY(ilm->ilm_timer, timer); 605 if (ilm->ilm_timer < next) 606 next = ilm->ilm_timer; 607 ilm->ilm_timer += current; 608 } 609 } 610 } 611 mutex_exit(&ill->ill_lock); 612 ilm_walker_finish(&ilw); 613 614 return (next); 615 } 616 617 static uint_t 618 igmpv3_query_in(igmp3qa_t *igmp3qa, ill_t *ill, int igmplen) 619 { 620 uint_t i, next, mrd, qqi, timer, delay, numsrc; 621 uint_t current; 622 ilm_t *ilm; 623 ipaddr_t *src_array; 624 uint8_t qrv; 625 ip_stack_t *ipst; 626 ilm_walker_t ilw; 627 628 ipst = ill->ill_ipst; 629 /* make sure numsrc matches packet size */ 630 numsrc = ntohs(igmp3qa->igmp3qa_numsrc); 631 if (igmplen < IGMP_V3_QUERY_MINLEN + (numsrc * sizeof (ipaddr_t))) { 632 ++ipst->ips_igmpstat.igps_rcv_tooshort; 633 return (0); 634 } 635 src_array = (ipaddr_t *)&igmp3qa[1]; 636 637 ++ipst->ips_igmpstat.igps_rcv_queries; 638 639 if ((mrd = (uint_t)igmp3qa->igmp3qa_mxrc) >= IGMP_V3_MAXRT_FPMIN) { 640 uint_t hdrval, mant, exp; 641 hdrval = (uint_t)igmp3qa->igmp3qa_mxrc; 642 mant = hdrval & IGMP_V3_MAXRT_MANT_MASK; 643 exp = (hdrval & IGMP_V3_MAXRT_EXP_MASK) >> 4; 644 mrd = (mant | 0x10) << (exp + 3); 645 } 646 if (mrd == 0) 647 mrd = MCAST_DEF_QUERY_RESP_INTERVAL; 648 timer = DSEC_TO_MSEC(mrd); 649 MCAST_RANDOM_DELAY(delay, timer); 650 next = (unsigned)INFINITY; 651 current = CURRENT_MSTIME; 652 653 if ((qrv = igmp3qa->igmp3qa_sqrv & IGMP_V3_RV_MASK) == 0) 654 ill->ill_mcast_rv = MCAST_DEF_ROBUSTNESS; 655 else 656 ill->ill_mcast_rv = qrv; 657 658 if ((qqi = (uint_t)igmp3qa->igmp3qa_qqic) >= IGMP_V3_QQI_FPMIN) { 659 uint_t hdrval, mant, exp; 660 hdrval = (uint_t)igmp3qa->igmp3qa_qqic; 661 mant = hdrval & IGMP_V3_QQI_MANT_MASK; 662 exp = (hdrval & IGMP_V3_QQI_EXP_MASK) >> 4; 663 qqi = (mant | 0x10) << (exp + 3); 664 } 665 ill->ill_mcast_qi = (qqi == 0) ? MCAST_DEF_QUERY_INTERVAL : qqi; 666 667 /* 668 * If we have a pending general query response that's scheduled 669 * sooner than the delay we calculated for this response, then 670 * no action is required (RFC3376 section 5.2 rule 1) 671 */ 672 mutex_enter(&ill->ill_lock); 673 if (ill->ill_global_timer < (current + delay)) { 674 mutex_exit(&ill->ill_lock); 675 return (next); 676 } 677 mutex_exit(&ill->ill_lock); 678 679 /* 680 * Now take action depending upon query type: 681 * general, group specific, or group/source specific. 682 */ 683 if ((numsrc == 0) && (igmp3qa->igmp3qa_group == INADDR_ANY)) { 684 /* 685 * general query 686 * We know global timer is either not running or is 687 * greater than our calculated delay, so reset it to 688 * our delay (random value in range [0, response time]). 689 */ 690 mutex_enter(&ill->ill_lock); 691 ill->ill_global_timer = current + delay; 692 mutex_exit(&ill->ill_lock); 693 next = delay; 694 695 } else { 696 /* group or group/source specific query */ 697 ilm = ilm_walker_start(&ilw, ill); 698 mutex_enter(&ill->ill_lock); 699 for (; ilm != NULL; ilm = ilm_walker_step(&ilw, ilm)) { 700 if (!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr) || 701 (ilm->ilm_addr == htonl(INADDR_ANY)) || 702 (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) || 703 (igmp3qa->igmp3qa_group != ilm->ilm_addr)) 704 continue; 705 /* 706 * If the query is group specific or we have a 707 * pending group specific query, the response is 708 * group specific (pending sources list should be 709 * empty). Otherwise, need to update the pending 710 * sources list for the group and source specific 711 * response. 712 */ 713 if (numsrc == 0 || (ilm->ilm_timer < INFINITY && 714 SLIST_IS_EMPTY(ilm->ilm_pendsrcs))) { 715 group_query: 716 FREE_SLIST(ilm->ilm_pendsrcs); 717 ilm->ilm_pendsrcs = NULL; 718 } else { 719 boolean_t overflow; 720 slist_t *pktl; 721 if (numsrc > MAX_FILTER_SIZE || 722 (ilm->ilm_pendsrcs == NULL && 723 (ilm->ilm_pendsrcs = l_alloc()) == NULL)) { 724 /* 725 * We've been sent more sources than 726 * we can deal with; or we can't deal 727 * with a source list at all. Revert 728 * to a group specific query. 729 */ 730 goto group_query; 731 } 732 if ((pktl = l_alloc()) == NULL) 733 goto group_query; 734 pktl->sl_numsrc = numsrc; 735 for (i = 0; i < numsrc; i++) 736 IN6_IPADDR_TO_V4MAPPED(src_array[i], 737 &(pktl->sl_addr[i])); 738 l_union_in_a(ilm->ilm_pendsrcs, pktl, 739 &overflow); 740 l_free(pktl); 741 if (overflow) 742 goto group_query; 743 } 744 745 ilm->ilm_timer = (ilm->ilm_timer == INFINITY) ? 746 INFINITY : (ilm->ilm_timer - current); 747 /* choose soonest timer */ 748 ilm->ilm_timer = MIN(ilm->ilm_timer, delay); 749 if (ilm->ilm_timer < next) 750 next = ilm->ilm_timer; 751 ilm->ilm_timer += current; 752 } 753 mutex_exit(&ill->ill_lock); 754 ilm_walker_finish(&ilw); 755 } 756 757 return (next); 758 } 759 760 void 761 igmp_joingroup(ilm_t *ilm) 762 { 763 uint_t timer; 764 ill_t *ill; 765 ip_stack_t *ipst = ilm->ilm_ipst; 766 767 ill = ilm->ilm_ipif->ipif_ill; 768 769 ASSERT(IAM_WRITER_ILL(ill)); 770 ASSERT(ilm->ilm_ill == NULL && !ilm->ilm_ipif->ipif_isv6); 771 772 mutex_enter(&ill->ill_lock); 773 if (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) { 774 ilm->ilm_rtx.rtx_timer = INFINITY; 775 ilm->ilm_state = IGMP_OTHERMEMBER; 776 mutex_exit(&ill->ill_lock); 777 } else { 778 ip1dbg(("Querier mode %d, sending report, group %x\n", 779 ill->ill_mcast_type, htonl(ilm->ilm_addr))); 780 if (ill->ill_mcast_type == IGMP_V1_ROUTER) { 781 mutex_exit(&ill->ill_lock); 782 igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0); 783 mutex_enter(&ill->ill_lock); 784 } else if (ill->ill_mcast_type == IGMP_V2_ROUTER) { 785 mutex_exit(&ill->ill_lock); 786 igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0); 787 mutex_enter(&ill->ill_lock); 788 } else if (ill->ill_mcast_type == IGMP_V3_ROUTER) { 789 mrec_t *rp; 790 mcast_record_t rtype; 791 /* 792 * The possible state changes we need to handle here: 793 * Old State New State Report 794 * 795 * INCLUDE(0) INCLUDE(X) ALLOW(X),BLOCK(0) 796 * INCLUDE(0) EXCLUDE(X) TO_EX(X) 797 * 798 * No need to send the BLOCK(0) report; ALLOW(X) 799 * is enough. 800 */ 801 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ? 802 ALLOW_NEW_SOURCES : CHANGE_TO_EXCLUDE; 803 rp = mcast_bldmrec(rtype, &ilm->ilm_v6addr, 804 ilm->ilm_filter, NULL); 805 mutex_exit(&ill->ill_lock); 806 igmpv3_sendrpt(ilm->ilm_ipif, rp); 807 mutex_enter(&ill->ill_lock); 808 /* 809 * Set up retransmission state. Timer is set below, 810 * for both v3 and older versions. 811 */ 812 mcast_init_rtx(ill, &ilm->ilm_rtx, rtype, 813 ilm->ilm_filter); 814 } 815 816 /* Set the ilm timer value */ 817 ilm->ilm_rtx.rtx_cnt = ill->ill_mcast_rv; 818 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer, 819 SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY)); 820 timer = ilm->ilm_rtx.rtx_timer; 821 ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME; 822 ilm->ilm_state = IGMP_IREPORTEDLAST; 823 mutex_exit(&ill->ill_lock); 824 825 /* 826 * We need to restart the IGMP timers, but we can't do it here 827 * since we're inside the IPSQ and thus igmp_start_timers() -> 828 * untimeout() (inside the IPSQ, waiting for a running timeout 829 * to finish) could deadlock with igmp_timeout_handler() -> 830 * ipsq_enter() (running the timeout, waiting to get inside 831 * the IPSQ). We also can't just delay it until after we 832 * ipsq_exit() since we could be inside more than one IPSQ and 833 * thus still have the other IPSQs pinned after we exit -- and 834 * igmp_start_timers() may be trying to enter one of those. 835 * Instead, signal a dedicated thread that will do it for us. 836 */ 837 mutex_enter(&ipst->ips_igmp_timer_lock); 838 ipst->ips_igmp_deferred_next = MIN(timer, 839 ipst->ips_igmp_deferred_next); 840 mutex_exit(&ipst->ips_igmp_timer_lock); 841 mcast_signal_restart_thread(ipst); 842 } 843 844 if (ip_debug > 1) { 845 (void) mi_strlog(ilm->ilm_ipif->ipif_ill->ill_rq, 1, SL_TRACE, 846 "igmp_joingroup: multicast_type %d timer %d", 847 (ilm->ilm_ipif->ipif_ill->ill_mcast_type), 848 (int)ntohl(timer)); 849 } 850 } 851 852 void 853 mld_joingroup(ilm_t *ilm) 854 { 855 uint_t timer; 856 ill_t *ill; 857 ip_stack_t *ipst = ilm->ilm_ipst; 858 859 ill = ilm->ilm_ill; 860 861 ASSERT(IAM_WRITER_ILL(ill)); 862 ASSERT(ilm->ilm_ipif == NULL && ill->ill_isv6); 863 864 mutex_enter(&ill->ill_lock); 865 if (IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr)) { 866 ilm->ilm_rtx.rtx_timer = INFINITY; 867 ilm->ilm_state = IGMP_OTHERMEMBER; 868 mutex_exit(&ill->ill_lock); 869 } else { 870 if (ill->ill_mcast_type == MLD_V1_ROUTER) { 871 mutex_exit(&ill->ill_lock); 872 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL); 873 mutex_enter(&ill->ill_lock); 874 } else { 875 mrec_t *rp; 876 mcast_record_t rtype; 877 /* 878 * The possible state changes we need to handle here: 879 * Old State New State Report 880 * 881 * INCLUDE(0) INCLUDE(X) ALLOW(X),BLOCK(0) 882 * INCLUDE(0) EXCLUDE(X) TO_EX(X) 883 * 884 * No need to send the BLOCK(0) report; ALLOW(X) 885 * is enough 886 */ 887 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ? 888 ALLOW_NEW_SOURCES : CHANGE_TO_EXCLUDE; 889 rp = mcast_bldmrec(rtype, &ilm->ilm_v6addr, 890 ilm->ilm_filter, NULL); 891 mutex_exit(&ill->ill_lock); 892 mldv2_sendrpt(ill, rp); 893 mutex_enter(&ill->ill_lock); 894 /* 895 * Set up retransmission state. Timer is set below, 896 * for both v2 and v1. 897 */ 898 mcast_init_rtx(ill, &ilm->ilm_rtx, rtype, 899 ilm->ilm_filter); 900 } 901 902 /* Set the ilm timer value */ 903 ASSERT(ill->ill_mcast_type != MLD_V2_ROUTER || 904 ilm->ilm_rtx.rtx_cnt > 0); 905 906 ilm->ilm_rtx.rtx_cnt = ill->ill_mcast_rv; 907 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer, 908 SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY)); 909 timer = ilm->ilm_rtx.rtx_timer; 910 ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME; 911 ilm->ilm_state = IGMP_IREPORTEDLAST; 912 mutex_exit(&ill->ill_lock); 913 914 /* 915 * Signal another thread to restart the timers. See the 916 * comment in igmp_joingroup() for details. 917 */ 918 mutex_enter(&ipst->ips_mld_timer_lock); 919 ipst->ips_mld_deferred_next = MIN(timer, 920 ipst->ips_mld_deferred_next); 921 mutex_exit(&ipst->ips_mld_timer_lock); 922 mcast_signal_restart_thread(ipst); 923 } 924 925 if (ip_debug > 1) { 926 (void) mi_strlog(ilm->ilm_ill->ill_rq, 1, SL_TRACE, 927 "mld_joingroup: multicast_type %d timer %d", 928 (ilm->ilm_ill->ill_mcast_type), 929 (int)ntohl(timer)); 930 } 931 } 932 933 void 934 igmp_leavegroup(ilm_t *ilm) 935 { 936 ill_t *ill = ilm->ilm_ipif->ipif_ill; 937 938 ASSERT(ilm->ilm_ill == NULL); 939 ASSERT(!ill->ill_isv6); 940 941 mutex_enter(&ill->ill_lock); 942 if (ilm->ilm_state == IGMP_IREPORTEDLAST && 943 ill->ill_mcast_type == IGMP_V2_ROUTER && 944 (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP))) { 945 mutex_exit(&ill->ill_lock); 946 igmp_sendpkt(ilm, IGMP_V2_LEAVE_GROUP, 947 (htonl(INADDR_ALLRTRS_GROUP))); 948 return; 949 } else if ((ill->ill_mcast_type == IGMP_V3_ROUTER) && 950 (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP))) { 951 mrec_t *rp; 952 /* 953 * The possible state changes we need to handle here: 954 * Old State New State Report 955 * 956 * INCLUDE(X) INCLUDE(0) ALLOW(0),BLOCK(X) 957 * EXCLUDE(X) INCLUDE(0) TO_IN(0) 958 * 959 * No need to send the ALLOW(0) report; BLOCK(X) is enough 960 */ 961 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 962 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr, 963 ilm->ilm_filter, NULL); 964 } else { 965 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, 966 NULL, NULL); 967 } 968 mutex_exit(&ill->ill_lock); 969 igmpv3_sendrpt(ilm->ilm_ipif, rp); 970 return; 971 } 972 mutex_exit(&ill->ill_lock); 973 } 974 975 void 976 mld_leavegroup(ilm_t *ilm) 977 { 978 ill_t *ill = ilm->ilm_ill; 979 980 ASSERT(ilm->ilm_ipif == NULL); 981 ASSERT(ill->ill_isv6); 982 983 mutex_enter(&ill->ill_lock); 984 if (ilm->ilm_state == IGMP_IREPORTEDLAST && 985 ill->ill_mcast_type == MLD_V1_ROUTER && 986 (!IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr))) { 987 mutex_exit(&ill->ill_lock); 988 mld_sendpkt(ilm, MLD_LISTENER_REDUCTION, &ipv6_all_rtrs_mcast); 989 return; 990 } else if ((ill->ill_mcast_type == MLD_V2_ROUTER) && 991 (!IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr))) { 992 mrec_t *rp; 993 /* 994 * The possible state changes we need to handle here: 995 * Old State New State Report 996 * 997 * INCLUDE(X) INCLUDE(0) ALLOW(0),BLOCK(X) 998 * EXCLUDE(X) INCLUDE(0) TO_IN(0) 999 * 1000 * No need to send the ALLOW(0) report; BLOCK(X) is enough 1001 */ 1002 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1003 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr, 1004 ilm->ilm_filter, NULL); 1005 } else { 1006 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, 1007 NULL, NULL); 1008 } 1009 mutex_exit(&ill->ill_lock); 1010 mldv2_sendrpt(ill, rp); 1011 return; 1012 } 1013 mutex_exit(&ill->ill_lock); 1014 } 1015 1016 void 1017 igmp_statechange(ilm_t *ilm, mcast_record_t fmode, slist_t *flist) 1018 { 1019 ill_t *ill; 1020 mrec_t *rp; 1021 ip_stack_t *ipst = ilm->ilm_ipst; 1022 1023 ASSERT(ilm != NULL); 1024 1025 /* state change reports should only be sent if the router is v3 */ 1026 if (ilm->ilm_ipif->ipif_ill->ill_mcast_type != IGMP_V3_ROUTER) 1027 return; 1028 1029 if (ilm->ilm_ill == NULL) { 1030 ASSERT(ilm->ilm_ipif != NULL); 1031 ill = ilm->ilm_ipif->ipif_ill; 1032 } else { 1033 ill = ilm->ilm_ill; 1034 } 1035 1036 mutex_enter(&ill->ill_lock); 1037 1038 /* 1039 * Compare existing(old) state with the new state and prepare 1040 * State Change Report, according to the rules in RFC 3376: 1041 * 1042 * Old State New State State Change Report 1043 * 1044 * INCLUDE(A) INCLUDE(B) ALLOW(B-A),BLOCK(A-B) 1045 * EXCLUDE(A) EXCLUDE(B) ALLOW(A-B),BLOCK(B-A) 1046 * INCLUDE(A) EXCLUDE(B) TO_EX(B) 1047 * EXCLUDE(A) INCLUDE(B) TO_IN(B) 1048 */ 1049 1050 if (ilm->ilm_fmode == fmode) { 1051 slist_t *a_minus_b = NULL, *b_minus_a = NULL; 1052 slist_t *allow, *block; 1053 if (((a_minus_b = l_alloc()) == NULL) || 1054 ((b_minus_a = l_alloc()) == NULL)) { 1055 l_free(a_minus_b); 1056 if (ilm->ilm_fmode == MODE_IS_INCLUDE) 1057 goto send_to_ex; 1058 else 1059 goto send_to_in; 1060 } 1061 l_difference(ilm->ilm_filter, flist, a_minus_b); 1062 l_difference(flist, ilm->ilm_filter, b_minus_a); 1063 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1064 allow = b_minus_a; 1065 block = a_minus_b; 1066 } else { 1067 allow = a_minus_b; 1068 block = b_minus_a; 1069 } 1070 rp = NULL; 1071 if (!SLIST_IS_EMPTY(allow)) 1072 rp = mcast_bldmrec(ALLOW_NEW_SOURCES, &ilm->ilm_v6addr, 1073 allow, rp); 1074 if (!SLIST_IS_EMPTY(block)) 1075 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr, 1076 block, rp); 1077 l_free(a_minus_b); 1078 l_free(b_minus_a); 1079 } else if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1080 send_to_ex: 1081 rp = mcast_bldmrec(CHANGE_TO_EXCLUDE, &ilm->ilm_v6addr, flist, 1082 NULL); 1083 } else { 1084 send_to_in: 1085 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, flist, 1086 NULL); 1087 } 1088 1089 /* 1090 * Need to set up retransmission state; merge the new info with the 1091 * current state (which may be null). If the timer is not currently 1092 * running, signal a thread to restart it -- see the comment in 1093 * igmp_joingroup() for details. 1094 */ 1095 rp = mcast_merge_rtx(ilm, rp, flist); 1096 if (ilm->ilm_rtx.rtx_timer == INFINITY) { 1097 ilm->ilm_rtx.rtx_cnt = ill->ill_mcast_rv; 1098 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer, 1099 SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY)); 1100 mutex_enter(&ipst->ips_igmp_timer_lock); 1101 ipst->ips_igmp_deferred_next = MIN(ipst->ips_igmp_deferred_next, 1102 ilm->ilm_rtx.rtx_timer); 1103 ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME; 1104 mutex_exit(&ipst->ips_igmp_timer_lock); 1105 mcast_signal_restart_thread(ipst); 1106 } 1107 1108 mutex_exit(&ill->ill_lock); 1109 igmpv3_sendrpt(ilm->ilm_ipif, rp); 1110 } 1111 1112 void 1113 mld_statechange(ilm_t *ilm, mcast_record_t fmode, slist_t *flist) 1114 { 1115 ill_t *ill; 1116 mrec_t *rp = NULL; 1117 ip_stack_t *ipst = ilm->ilm_ipst; 1118 1119 ASSERT(ilm != NULL); 1120 1121 ill = ilm->ilm_ill; 1122 1123 /* only need to send if we have an mldv2-capable router */ 1124 mutex_enter(&ill->ill_lock); 1125 if (ill->ill_mcast_type != MLD_V2_ROUTER) { 1126 mutex_exit(&ill->ill_lock); 1127 return; 1128 } 1129 1130 /* 1131 * Compare existing (old) state with the new state passed in 1132 * and send appropriate MLDv2 State Change Report. 1133 * 1134 * Old State New State State Change Report 1135 * 1136 * INCLUDE(A) INCLUDE(B) ALLOW(B-A),BLOCK(A-B) 1137 * EXCLUDE(A) EXCLUDE(B) ALLOW(A-B),BLOCK(B-A) 1138 * INCLUDE(A) EXCLUDE(B) TO_EX(B) 1139 * EXCLUDE(A) INCLUDE(B) TO_IN(B) 1140 */ 1141 if (ilm->ilm_fmode == fmode) { 1142 slist_t *a_minus_b = NULL, *b_minus_a = NULL; 1143 slist_t *allow, *block; 1144 if (((a_minus_b = l_alloc()) == NULL) || 1145 ((b_minus_a = l_alloc()) == NULL)) { 1146 l_free(a_minus_b); 1147 if (ilm->ilm_fmode == MODE_IS_INCLUDE) 1148 goto send_to_ex; 1149 else 1150 goto send_to_in; 1151 } 1152 l_difference(ilm->ilm_filter, flist, a_minus_b); 1153 l_difference(flist, ilm->ilm_filter, b_minus_a); 1154 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1155 allow = b_minus_a; 1156 block = a_minus_b; 1157 } else { 1158 allow = a_minus_b; 1159 block = b_minus_a; 1160 } 1161 if (!SLIST_IS_EMPTY(allow)) 1162 rp = mcast_bldmrec(ALLOW_NEW_SOURCES, &ilm->ilm_v6addr, 1163 allow, rp); 1164 if (!SLIST_IS_EMPTY(block)) 1165 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr, 1166 block, rp); 1167 l_free(a_minus_b); 1168 l_free(b_minus_a); 1169 } else if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1170 send_to_ex: 1171 rp = mcast_bldmrec(CHANGE_TO_EXCLUDE, &ilm->ilm_v6addr, flist, 1172 NULL); 1173 } else { 1174 send_to_in: 1175 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, flist, 1176 NULL); 1177 } 1178 1179 /* 1180 * Need to set up retransmission state; merge the new info with the 1181 * current state (which may be null). If the timer is not currently 1182 * running, signal a thread to restart it -- see the comment in 1183 * igmp_joingroup() for details. 1184 */ 1185 rp = mcast_merge_rtx(ilm, rp, flist); 1186 ASSERT(ilm->ilm_rtx.rtx_cnt > 0); 1187 if (ilm->ilm_rtx.rtx_timer == INFINITY) { 1188 ilm->ilm_rtx.rtx_cnt = ill->ill_mcast_rv; 1189 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer, 1190 SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY)); 1191 mutex_enter(&ipst->ips_mld_timer_lock); 1192 ipst->ips_mld_deferred_next = 1193 MIN(ipst->ips_mld_deferred_next, ilm->ilm_rtx.rtx_timer); 1194 ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME; 1195 mutex_exit(&ipst->ips_mld_timer_lock); 1196 mcast_signal_restart_thread(ipst); 1197 } 1198 1199 mutex_exit(&ill->ill_lock); 1200 mldv2_sendrpt(ill, rp); 1201 } 1202 1203 uint_t 1204 igmp_timeout_handler_per_ill(ill_t *ill) 1205 { 1206 uint_t next = INFINITY, current; 1207 ilm_t *ilm; 1208 ipif_t *ipif; 1209 mrec_t *rp = NULL; 1210 mrec_t *rtxrp = NULL; 1211 rtx_state_t *rtxp; 1212 mcast_record_t rtype; 1213 1214 ASSERT(IAM_WRITER_ILL(ill)); 1215 1216 mutex_enter(&ill->ill_lock); 1217 1218 current = CURRENT_MSTIME; 1219 /* First check the global timer on this interface */ 1220 if (ill->ill_global_timer == INFINITY) 1221 goto per_ilm_timer; 1222 if (ill->ill_global_timer <= (current + CURRENT_OFFSET)) { 1223 ill->ill_global_timer = INFINITY; 1224 /* 1225 * Send report for each group on this interface. 1226 * Since we just set the global timer (received a v3 general 1227 * query), need to skip the all hosts addr (224.0.0.1), per 1228 * RFC 3376 section 5. 1229 */ 1230 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1231 if (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) 1232 continue; 1233 ASSERT(ilm->ilm_ipif != NULL); 1234 ilm->ilm_ipif->ipif_igmp_rpt = 1235 mcast_bldmrec(ilm->ilm_fmode, &ilm->ilm_v6addr, 1236 ilm->ilm_filter, ilm->ilm_ipif->ipif_igmp_rpt); 1237 /* 1238 * Since we're sending a report on this group, okay 1239 * to delete pending group-specific timers. Note 1240 * that group-specific retransmit timers still need 1241 * to be checked in the per_ilm_timer for-loop. 1242 */ 1243 ilm->ilm_timer = INFINITY; 1244 ilm->ilm_state = IGMP_IREPORTEDLAST; 1245 FREE_SLIST(ilm->ilm_pendsrcs); 1246 ilm->ilm_pendsrcs = NULL; 1247 } 1248 /* 1249 * We've built per-ipif mrec lists; walk the ill's ipif list 1250 * and send a report for each ipif that has an mrec list. 1251 */ 1252 for (ipif = ill->ill_ipif; ipif != NULL; 1253 ipif = ipif->ipif_next) { 1254 if (ipif->ipif_igmp_rpt == NULL) 1255 continue; 1256 mutex_exit(&ill->ill_lock); 1257 igmpv3_sendrpt(ipif, ipif->ipif_igmp_rpt); 1258 mutex_enter(&ill->ill_lock); 1259 /* mrec list was freed by igmpv3_sendrpt() */ 1260 ipif->ipif_igmp_rpt = NULL; 1261 } 1262 } else { 1263 if ((ill->ill_global_timer - current) < next) 1264 next = ill->ill_global_timer - current; 1265 } 1266 1267 per_ilm_timer: 1268 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1269 if (ilm->ilm_timer == INFINITY) 1270 goto per_ilm_rtxtimer; 1271 1272 if (ilm->ilm_timer > (current + CURRENT_OFFSET)) { 1273 if ((ilm->ilm_timer - current) < next) 1274 next = ilm->ilm_timer - current; 1275 1276 if (ip_debug > 1) { 1277 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 1278 "igmp_timo_hlr 2: ilm_timr %d " 1279 "typ %d nxt %d", 1280 (int)ntohl(ilm->ilm_timer - current), 1281 (ill->ill_mcast_type), next); 1282 } 1283 1284 goto per_ilm_rtxtimer; 1285 } 1286 1287 /* the timer has expired, need to take action */ 1288 ilm->ilm_timer = INFINITY; 1289 ilm->ilm_state = IGMP_IREPORTEDLAST; 1290 if (ill->ill_mcast_type == IGMP_V1_ROUTER) { 1291 mutex_exit(&ill->ill_lock); 1292 igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0); 1293 mutex_enter(&ill->ill_lock); 1294 } else if (ill->ill_mcast_type == IGMP_V2_ROUTER) { 1295 mutex_exit(&ill->ill_lock); 1296 igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0); 1297 mutex_enter(&ill->ill_lock); 1298 } else { 1299 slist_t *rsp; 1300 if (!SLIST_IS_EMPTY(ilm->ilm_pendsrcs) && 1301 (rsp = l_alloc()) != NULL) { 1302 /* 1303 * Contents of reply depend on pending 1304 * requested source list. 1305 */ 1306 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1307 l_intersection(ilm->ilm_filter, 1308 ilm->ilm_pendsrcs, rsp); 1309 } else { 1310 l_difference(ilm->ilm_pendsrcs, 1311 ilm->ilm_filter, rsp); 1312 } 1313 FREE_SLIST(ilm->ilm_pendsrcs); 1314 ilm->ilm_pendsrcs = NULL; 1315 if (!SLIST_IS_EMPTY(rsp)) 1316 rp = mcast_bldmrec(MODE_IS_INCLUDE, 1317 &ilm->ilm_v6addr, rsp, rp); 1318 FREE_SLIST(rsp); 1319 } else { 1320 /* 1321 * Either the pending request is just group- 1322 * specific, or we couldn't get the resources 1323 * (rsp) to build a source-specific reply. 1324 */ 1325 rp = mcast_bldmrec(ilm->ilm_fmode, 1326 &ilm->ilm_v6addr, ilm->ilm_filter, rp); 1327 } 1328 mutex_exit(&ill->ill_lock); 1329 igmpv3_sendrpt(ill->ill_ipif, rp); 1330 mutex_enter(&ill->ill_lock); 1331 rp = NULL; 1332 } 1333 1334 per_ilm_rtxtimer: 1335 rtxp = &ilm->ilm_rtx; 1336 1337 if (rtxp->rtx_timer == INFINITY) 1338 continue; 1339 if (rtxp->rtx_timer > (current + CURRENT_OFFSET)) { 1340 if ((rtxp->rtx_timer - current) < next) 1341 next = rtxp->rtx_timer - current; 1342 continue; 1343 } 1344 1345 rtxp->rtx_timer = INFINITY; 1346 ilm->ilm_state = IGMP_IREPORTEDLAST; 1347 if (ill->ill_mcast_type == IGMP_V1_ROUTER) { 1348 mutex_exit(&ill->ill_lock); 1349 igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0); 1350 mutex_enter(&ill->ill_lock); 1351 continue; 1352 } else if (ill->ill_mcast_type == IGMP_V2_ROUTER) { 1353 mutex_exit(&ill->ill_lock); 1354 igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0); 1355 mutex_enter(&ill->ill_lock); 1356 continue; 1357 } 1358 1359 /* 1360 * The retransmit timer has popped, and our router is 1361 * IGMPv3. We have to delve into the retransmit state 1362 * stored in the ilm. 1363 * 1364 * Decrement the retransmit count. If the fmode rtx 1365 * count is active, decrement it, and send a filter 1366 * mode change report with the ilm's source list. 1367 * Otherwise, send a source list change report with 1368 * the current retransmit lists. 1369 */ 1370 ASSERT(rtxp->rtx_cnt > 0); 1371 ASSERT(rtxp->rtx_cnt >= rtxp->rtx_fmode_cnt); 1372 rtxp->rtx_cnt--; 1373 if (rtxp->rtx_fmode_cnt > 0) { 1374 rtxp->rtx_fmode_cnt--; 1375 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ? 1376 CHANGE_TO_INCLUDE : CHANGE_TO_EXCLUDE; 1377 rtxrp = mcast_bldmrec(rtype, &ilm->ilm_v6addr, 1378 ilm->ilm_filter, rtxrp); 1379 } else { 1380 rtxrp = mcast_bldmrec(ALLOW_NEW_SOURCES, 1381 &ilm->ilm_v6addr, rtxp->rtx_allow, rtxrp); 1382 rtxrp = mcast_bldmrec(BLOCK_OLD_SOURCES, 1383 &ilm->ilm_v6addr, rtxp->rtx_block, rtxrp); 1384 } 1385 if (rtxp->rtx_cnt > 0) { 1386 MCAST_RANDOM_DELAY(rtxp->rtx_timer, 1387 SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY)); 1388 if (rtxp->rtx_timer < next) 1389 next = rtxp->rtx_timer; 1390 rtxp->rtx_timer += current; 1391 } else { 1392 ASSERT(rtxp->rtx_timer == INFINITY); 1393 CLEAR_SLIST(rtxp->rtx_allow); 1394 CLEAR_SLIST(rtxp->rtx_block); 1395 } 1396 mutex_exit(&ill->ill_lock); 1397 igmpv3_sendrpt(ilm->ilm_ipif, rtxrp); 1398 mutex_enter(&ill->ill_lock); 1399 rtxrp = NULL; 1400 } 1401 1402 mutex_exit(&ill->ill_lock); 1403 1404 return (next); 1405 } 1406 1407 /* 1408 * igmp_timeout_handler: 1409 * Called when there are timeout events, every next * TMEOUT_INTERVAL (tick). 1410 * Returns number of ticks to next event (or 0 if none). 1411 * 1412 * As part of multicast join and leave igmp we may need to send out an 1413 * igmp request. The igmp related state variables in the ilm are protected 1414 * by ill_lock. A single global igmp timer is used to track igmp timeouts. 1415 * igmp_timer_lock protects the global igmp_timeout_id. igmp_start_timers 1416 * starts the igmp timer if needed. It serializes multiple threads trying to 1417 * simultaneously start the timer using the igmp_timer_setter_active flag. 1418 * 1419 * igmp_input() receives igmp queries and responds to the queries 1420 * in a delayed fashion by posting a timer i.e. it calls igmp_start_timers(). 1421 * Later the igmp_timer fires, the timeout handler igmp_timeout_handler() 1422 * performs the action exclusively after entering each ill's ipsq as writer. 1423 * (The need to enter the IPSQ is largely historical but there are still some 1424 * fields like ilm_filter that rely on it.) 1425 * 1426 * The igmp_slowtimeo() function is called thru another timer. 1427 * igmp_slowtimeout_lock protects the igmp_slowtimeout_id 1428 */ 1429 void 1430 igmp_timeout_handler(void *arg) 1431 { 1432 ill_t *ill; 1433 uint_t global_next = INFINITY; 1434 uint_t next; 1435 ill_walk_context_t ctx; 1436 boolean_t success; 1437 ip_stack_t *ipst = arg; 1438 1439 ASSERT(arg != NULL); 1440 mutex_enter(&ipst->ips_igmp_timer_lock); 1441 ASSERT(ipst->ips_igmp_timeout_id != 0); 1442 ipst->ips_igmp_timer_scheduled_last = 0; 1443 ipst->ips_igmp_time_to_next = 0; 1444 mutex_exit(&ipst->ips_igmp_timer_lock); 1445 1446 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1447 ill = ILL_START_WALK_V4(&ctx, ipst); 1448 for (; ill != NULL; ill = ill_next(&ctx, ill)) { 1449 ASSERT(!ill->ill_isv6); 1450 /* 1451 * We may not be able to refhold the ill if the ill/ipif 1452 * is changing. But we need to make sure that the ill will 1453 * not vanish. So we just bump up the ill_waiter count. 1454 */ 1455 if (!ill_waiter_inc(ill)) 1456 continue; 1457 rw_exit(&ipst->ips_ill_g_lock); 1458 success = ipsq_enter(ill, B_TRUE, NEW_OP); 1459 if (success) { 1460 next = igmp_timeout_handler_per_ill(ill); 1461 if (next < global_next) 1462 global_next = next; 1463 ipsq_exit(ill->ill_phyint->phyint_ipsq); 1464 } 1465 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1466 ill_waiter_dcr(ill); 1467 } 1468 rw_exit(&ipst->ips_ill_g_lock); 1469 1470 mutex_enter(&ipst->ips_igmp_timer_lock); 1471 ASSERT(ipst->ips_igmp_timeout_id != 0); 1472 ipst->ips_igmp_timeout_id = 0; 1473 mutex_exit(&ipst->ips_igmp_timer_lock); 1474 1475 if (global_next != INFINITY) 1476 igmp_start_timers(global_next, ipst); 1477 } 1478 1479 /* 1480 * mld_timeout_handler: 1481 * Called when there are timeout events, every next (tick). 1482 * Returns number of ticks to next event (or 0 if none). 1483 */ 1484 /* ARGSUSED */ 1485 uint_t 1486 mld_timeout_handler_per_ill(ill_t *ill) 1487 { 1488 ilm_t *ilm; 1489 uint_t next = INFINITY, current; 1490 mrec_t *rp, *rtxrp; 1491 rtx_state_t *rtxp; 1492 mcast_record_t rtype; 1493 1494 ASSERT(IAM_WRITER_ILL(ill)); 1495 1496 mutex_enter(&ill->ill_lock); 1497 1498 current = CURRENT_MSTIME; 1499 /* 1500 * First check the global timer on this interface; the global timer 1501 * is not used for MLDv1, so if it's set we can assume we're v2. 1502 */ 1503 if (ill->ill_global_timer == INFINITY) 1504 goto per_ilm_timer; 1505 if (ill->ill_global_timer <= (current + CURRENT_OFFSET)) { 1506 ill->ill_global_timer = INFINITY; 1507 /* 1508 * Send report for each group on this interface. 1509 * Since we just set the global timer (received a v2 general 1510 * query), need to skip the all hosts addr (ff02::1), per 1511 * RFC 3810 section 6. 1512 */ 1513 rp = NULL; 1514 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1515 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, 1516 &ipv6_all_hosts_mcast)) 1517 continue; 1518 rp = mcast_bldmrec(ilm->ilm_fmode, &ilm->ilm_v6addr, 1519 ilm->ilm_filter, rp); 1520 /* 1521 * Since we're sending a report on this group, okay 1522 * to delete pending group-specific timers. Note 1523 * that group-specific retransmit timers still need 1524 * to be checked in the per_ilm_timer for-loop. 1525 */ 1526 ilm->ilm_timer = INFINITY; 1527 ilm->ilm_state = IGMP_IREPORTEDLAST; 1528 FREE_SLIST(ilm->ilm_pendsrcs); 1529 ilm->ilm_pendsrcs = NULL; 1530 } 1531 mutex_exit(&ill->ill_lock); 1532 mldv2_sendrpt(ill, rp); 1533 mutex_enter(&ill->ill_lock); 1534 } else { 1535 if ((ill->ill_global_timer - current) < next) 1536 next = ill->ill_global_timer - current; 1537 } 1538 1539 per_ilm_timer: 1540 rp = rtxrp = NULL; 1541 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1542 if (ilm->ilm_timer == INFINITY) 1543 goto per_ilm_rtxtimer; 1544 1545 if (ilm->ilm_timer > (current + CURRENT_OFFSET)) { 1546 if ((ilm->ilm_timer - current) < next) 1547 next = ilm->ilm_timer - current; 1548 1549 if (ip_debug > 1) { 1550 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 1551 "igmp_timo_hlr 2: ilm_timr" 1552 " %d typ %d nxt %d", 1553 (int)ntohl(ilm->ilm_timer - current), 1554 (ill->ill_mcast_type), next); 1555 } 1556 1557 goto per_ilm_rtxtimer; 1558 } 1559 1560 /* the timer has expired, need to take action */ 1561 ilm->ilm_timer = INFINITY; 1562 ilm->ilm_state = IGMP_IREPORTEDLAST; 1563 if (ill->ill_mcast_type == MLD_V1_ROUTER) { 1564 mutex_exit(&ill->ill_lock); 1565 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL); 1566 mutex_enter(&ill->ill_lock); 1567 } else { 1568 slist_t *rsp; 1569 if (!SLIST_IS_EMPTY(ilm->ilm_pendsrcs) && 1570 (rsp = l_alloc()) != NULL) { 1571 /* 1572 * Contents of reply depend on pending 1573 * requested source list. 1574 */ 1575 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1576 l_intersection(ilm->ilm_filter, 1577 ilm->ilm_pendsrcs, rsp); 1578 } else { 1579 l_difference(ilm->ilm_pendsrcs, 1580 ilm->ilm_filter, rsp); 1581 } 1582 FREE_SLIST(ilm->ilm_pendsrcs); 1583 ilm->ilm_pendsrcs = NULL; 1584 if (!SLIST_IS_EMPTY(rsp)) 1585 rp = mcast_bldmrec(MODE_IS_INCLUDE, 1586 &ilm->ilm_v6addr, rsp, rp); 1587 FREE_SLIST(rsp); 1588 } else { 1589 rp = mcast_bldmrec(ilm->ilm_fmode, 1590 &ilm->ilm_v6addr, ilm->ilm_filter, rp); 1591 } 1592 } 1593 1594 per_ilm_rtxtimer: 1595 rtxp = &ilm->ilm_rtx; 1596 1597 if (rtxp->rtx_timer == INFINITY) 1598 continue; 1599 if (rtxp->rtx_timer > (current + CURRENT_OFFSET)) { 1600 if ((rtxp->rtx_timer - current) < next) 1601 next = rtxp->rtx_timer - current; 1602 continue; 1603 } 1604 1605 rtxp->rtx_timer = INFINITY; 1606 ilm->ilm_state = IGMP_IREPORTEDLAST; 1607 if (ill->ill_mcast_type == MLD_V1_ROUTER) { 1608 mutex_exit(&ill->ill_lock); 1609 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL); 1610 mutex_enter(&ill->ill_lock); 1611 continue; 1612 } 1613 1614 /* 1615 * The retransmit timer has popped, and our router is 1616 * MLDv2. We have to delve into the retransmit state 1617 * stored in the ilm. 1618 * 1619 * Decrement the retransmit count. If the fmode rtx 1620 * count is active, decrement it, and send a filter 1621 * mode change report with the ilm's source list. 1622 * Otherwise, send a source list change report with 1623 * the current retransmit lists. 1624 */ 1625 ASSERT(rtxp->rtx_cnt > 0); 1626 ASSERT(rtxp->rtx_cnt >= rtxp->rtx_fmode_cnt); 1627 rtxp->rtx_cnt--; 1628 if (rtxp->rtx_fmode_cnt > 0) { 1629 rtxp->rtx_fmode_cnt--; 1630 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ? 1631 CHANGE_TO_INCLUDE : CHANGE_TO_EXCLUDE; 1632 rtxrp = mcast_bldmrec(rtype, &ilm->ilm_v6addr, 1633 ilm->ilm_filter, rtxrp); 1634 } else { 1635 rtxrp = mcast_bldmrec(ALLOW_NEW_SOURCES, 1636 &ilm->ilm_v6addr, rtxp->rtx_allow, rtxrp); 1637 rtxrp = mcast_bldmrec(BLOCK_OLD_SOURCES, 1638 &ilm->ilm_v6addr, rtxp->rtx_block, rtxrp); 1639 } 1640 if (rtxp->rtx_cnt > 0) { 1641 MCAST_RANDOM_DELAY(rtxp->rtx_timer, 1642 SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY)); 1643 if (rtxp->rtx_timer < next) 1644 next = rtxp->rtx_timer; 1645 rtxp->rtx_timer += current; 1646 } else { 1647 ASSERT(rtxp->rtx_timer == INFINITY); 1648 CLEAR_SLIST(rtxp->rtx_allow); 1649 CLEAR_SLIST(rtxp->rtx_block); 1650 } 1651 } 1652 1653 if (ill->ill_mcast_type == MLD_V2_ROUTER) { 1654 mutex_exit(&ill->ill_lock); 1655 mldv2_sendrpt(ill, rp); 1656 mldv2_sendrpt(ill, rtxrp); 1657 return (next); 1658 } 1659 1660 mutex_exit(&ill->ill_lock); 1661 1662 return (next); 1663 } 1664 1665 /* 1666 * mld_timeout_handler: 1667 * Called when there are timeout events, every next * TMEOUT_INTERVAL (tick). 1668 * Returns number of ticks to next event (or 0 if none). 1669 * MT issues are same as igmp_timeout_handler 1670 */ 1671 void 1672 mld_timeout_handler(void *arg) 1673 { 1674 ill_t *ill; 1675 uint_t global_next = INFINITY; 1676 uint_t next; 1677 ill_walk_context_t ctx; 1678 boolean_t success; 1679 ip_stack_t *ipst = arg; 1680 1681 ASSERT(arg != NULL); 1682 mutex_enter(&ipst->ips_mld_timer_lock); 1683 ASSERT(ipst->ips_mld_timeout_id != 0); 1684 ipst->ips_mld_timer_scheduled_last = 0; 1685 ipst->ips_mld_time_to_next = 0; 1686 mutex_exit(&ipst->ips_mld_timer_lock); 1687 1688 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1689 ill = ILL_START_WALK_V6(&ctx, ipst); 1690 for (; ill != NULL; ill = ill_next(&ctx, ill)) { 1691 ASSERT(ill->ill_isv6); 1692 /* 1693 * We may not be able to refhold the ill if the ill/ipif 1694 * is changing. But we need to make sure that the ill will 1695 * not vanish. So we just bump up the ill_waiter count. 1696 */ 1697 if (!ill_waiter_inc(ill)) 1698 continue; 1699 rw_exit(&ipst->ips_ill_g_lock); 1700 success = ipsq_enter(ill, B_TRUE, NEW_OP); 1701 if (success) { 1702 next = mld_timeout_handler_per_ill(ill); 1703 if (next < global_next) 1704 global_next = next; 1705 ipsq_exit(ill->ill_phyint->phyint_ipsq); 1706 } 1707 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1708 ill_waiter_dcr(ill); 1709 } 1710 rw_exit(&ipst->ips_ill_g_lock); 1711 1712 mutex_enter(&ipst->ips_mld_timer_lock); 1713 ASSERT(ipst->ips_mld_timeout_id != 0); 1714 ipst->ips_mld_timeout_id = 0; 1715 mutex_exit(&ipst->ips_mld_timer_lock); 1716 1717 if (global_next != INFINITY) 1718 mld_start_timers(global_next, ipst); 1719 } 1720 1721 /* 1722 * Calculate the Older Version Querier Present timeout value, in number 1723 * of slowtimo intervals, for the given ill. 1724 */ 1725 #define OVQP(ill) \ 1726 ((1000 * (((ill)->ill_mcast_rv * (ill)->ill_mcast_qi) \ 1727 + MCAST_QUERY_RESP_INTERVAL)) / MCAST_SLOWTIMO_INTERVAL) 1728 1729 /* 1730 * igmp_slowtimo: 1731 * - Resets to new router if we didnt we hear from the router 1732 * in IGMP_AGE_THRESHOLD seconds. 1733 * - Resets slowtimeout. 1734 * Check for ips_igmp_max_version ensures that we don't revert to a higher 1735 * IGMP version than configured. 1736 */ 1737 void 1738 igmp_slowtimo(void *arg) 1739 { 1740 ill_t *ill; 1741 ill_if_t *ifp; 1742 avl_tree_t *avl_tree; 1743 ip_stack_t *ipst = (ip_stack_t *)arg; 1744 1745 ASSERT(arg != NULL); 1746 /* Hold the ill_g_lock so that we can safely walk the ill list */ 1747 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1748 1749 /* 1750 * The ill_if_t list is circular, hence the odd loop parameters. 1751 * 1752 * We can't use the ILL_START_WALK and ill_next() wrappers for this 1753 * walk, as we need to check the illif_mcast_* fields in the ill_if_t 1754 * structure (allowing us to skip if none of the instances have timers 1755 * running). 1756 */ 1757 for (ifp = IP_V4_ILL_G_LIST(ipst); 1758 ifp != (ill_if_t *)&IP_V4_ILL_G_LIST(ipst); 1759 ifp = ifp->illif_next) { 1760 /* 1761 * illif_mcast_v[12] are set using atomics. If an ill hears 1762 * a V1 or V2 query now and we miss seeing the count now, 1763 * we will see it the next time igmp_slowtimo is called. 1764 */ 1765 if (ifp->illif_mcast_v1 == 0 && ifp->illif_mcast_v2 == 0) 1766 continue; 1767 1768 avl_tree = &ifp->illif_avl_by_ppa; 1769 for (ill = avl_first(avl_tree); ill != NULL; 1770 ill = avl_walk(avl_tree, ill, AVL_AFTER)) { 1771 mutex_enter(&ill->ill_lock); 1772 if (ill->ill_mcast_v1_tset == 1) 1773 ill->ill_mcast_v1_time++; 1774 if (ill->ill_mcast_v2_tset == 1) 1775 ill->ill_mcast_v2_time++; 1776 if ((ill->ill_mcast_type == IGMP_V1_ROUTER) && 1777 (ipst->ips_igmp_max_version >= IGMP_V2_ROUTER) && 1778 (ill->ill_mcast_v1_time >= OVQP(ill))) { 1779 if ((ill->ill_mcast_v2_tset > 0) || 1780 (ipst->ips_igmp_max_version == 1781 IGMP_V2_ROUTER)) { 1782 ip1dbg(("V1 query timer " 1783 "expired on %s; switching " 1784 "mode to IGMP_V2\n", 1785 ill->ill_name)); 1786 ill->ill_mcast_type = 1787 IGMP_V2_ROUTER; 1788 } else { 1789 ip1dbg(("V1 query timer " 1790 "expired on %s; switching " 1791 "mode to IGMP_V3\n", 1792 ill->ill_name)); 1793 ill->ill_mcast_type = 1794 IGMP_V3_ROUTER; 1795 } 1796 ill->ill_mcast_v1_time = 0; 1797 ill->ill_mcast_v1_tset = 0; 1798 atomic_add_16(&ifp->illif_mcast_v1, -1); 1799 } 1800 if ((ill->ill_mcast_type == IGMP_V2_ROUTER) && 1801 (ipst->ips_igmp_max_version >= IGMP_V3_ROUTER) && 1802 (ill->ill_mcast_v2_time >= OVQP(ill))) { 1803 ip1dbg(("V2 query timer expired on " 1804 "%s; switching mode to IGMP_V3\n", 1805 ill->ill_name)); 1806 ill->ill_mcast_type = IGMP_V3_ROUTER; 1807 ill->ill_mcast_v2_time = 0; 1808 ill->ill_mcast_v2_tset = 0; 1809 atomic_add_16(&ifp->illif_mcast_v2, -1); 1810 } 1811 mutex_exit(&ill->ill_lock); 1812 } 1813 } 1814 rw_exit(&ipst->ips_ill_g_lock); 1815 mutex_enter(&ipst->ips_igmp_slowtimeout_lock); 1816 ipst->ips_igmp_slowtimeout_id = timeout(igmp_slowtimo, (void *)ipst, 1817 MSEC_TO_TICK(MCAST_SLOWTIMO_INTERVAL)); 1818 mutex_exit(&ipst->ips_igmp_slowtimeout_lock); 1819 } 1820 1821 /* 1822 * mld_slowtimo: 1823 * - Resets to newer version if we didn't hear from the older version router 1824 * in MLD_AGE_THRESHOLD seconds. 1825 * - Restarts slowtimeout. 1826 * Check for ips_mld_max_version ensures that we don't revert to a higher 1827 * IGMP version than configured. 1828 */ 1829 /* ARGSUSED */ 1830 void 1831 mld_slowtimo(void *arg) 1832 { 1833 ill_t *ill; 1834 ill_if_t *ifp; 1835 avl_tree_t *avl_tree; 1836 ip_stack_t *ipst = (ip_stack_t *)arg; 1837 1838 ASSERT(arg != NULL); 1839 /* See comments in igmp_slowtimo() above... */ 1840 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1841 for (ifp = IP_V6_ILL_G_LIST(ipst); 1842 ifp != (ill_if_t *)&IP_V6_ILL_G_LIST(ipst); 1843 ifp = ifp->illif_next) { 1844 if (ifp->illif_mcast_v1 == 0) 1845 continue; 1846 1847 avl_tree = &ifp->illif_avl_by_ppa; 1848 for (ill = avl_first(avl_tree); ill != NULL; 1849 ill = avl_walk(avl_tree, ill, AVL_AFTER)) { 1850 mutex_enter(&ill->ill_lock); 1851 if (ill->ill_mcast_v1_tset == 1) 1852 ill->ill_mcast_v1_time++; 1853 if ((ill->ill_mcast_type == MLD_V1_ROUTER) && 1854 (ipst->ips_mld_max_version >= MLD_V2_ROUTER) && 1855 (ill->ill_mcast_v1_time >= OVQP(ill))) { 1856 ip1dbg(("MLD query timer expired on" 1857 " %s; switching mode to MLD_V2\n", 1858 ill->ill_name)); 1859 ill->ill_mcast_type = MLD_V2_ROUTER; 1860 ill->ill_mcast_v1_time = 0; 1861 ill->ill_mcast_v1_tset = 0; 1862 atomic_add_16(&ifp->illif_mcast_v1, -1); 1863 } 1864 mutex_exit(&ill->ill_lock); 1865 } 1866 } 1867 rw_exit(&ipst->ips_ill_g_lock); 1868 mutex_enter(&ipst->ips_mld_slowtimeout_lock); 1869 ipst->ips_mld_slowtimeout_id = timeout(mld_slowtimo, (void *)ipst, 1870 MSEC_TO_TICK(MCAST_SLOWTIMO_INTERVAL)); 1871 mutex_exit(&ipst->ips_mld_slowtimeout_lock); 1872 } 1873 1874 /* 1875 * igmp_sendpkt: 1876 * This will send to ip_wput like icmp_inbound. 1877 * Note that the lower ill (on which the membership is kept) is used 1878 * as an upper ill to pass in the multicast parameters. 1879 */ 1880 static void 1881 igmp_sendpkt(ilm_t *ilm, uchar_t type, ipaddr_t addr) 1882 { 1883 mblk_t *mp; 1884 igmpa_t *igmpa; 1885 uint8_t *rtralert; 1886 ipha_t *ipha; 1887 int hdrlen = sizeof (ipha_t) + RTRALERT_LEN; 1888 size_t size = hdrlen + sizeof (igmpa_t); 1889 ipif_t *ipif = ilm->ilm_ipif; 1890 ill_t *ill = ipif->ipif_ill; 1891 mblk_t *first_mp; 1892 ipsec_out_t *io; 1893 zoneid_t zoneid; 1894 ip_stack_t *ipst = ill->ill_ipst; 1895 1896 /* 1897 * We need to make sure this packet goes out on an ipif. If 1898 * there is some global policy match in ip_wput_ire, we need 1899 * to get to the right interface after IPSEC processing. 1900 * To make sure this multicast packet goes out on the right 1901 * interface, we attach an ipsec_out and initialize ill_index 1902 * like we did in ip_wput. To make sure that this packet does 1903 * not get forwarded on other interfaces or looped back, we 1904 * set ipsec_out_dontroute to B_TRUE and ipsec_out_multicast_loop 1905 * to B_FALSE. 1906 */ 1907 first_mp = allocb(sizeof (ipsec_info_t), BPRI_HI); 1908 if (first_mp == NULL) 1909 return; 1910 1911 first_mp->b_datap->db_type = M_CTL; 1912 first_mp->b_wptr += sizeof (ipsec_info_t); 1913 bzero(first_mp->b_rptr, sizeof (ipsec_info_t)); 1914 /* ipsec_out_secure is B_FALSE now */ 1915 io = (ipsec_out_t *)first_mp->b_rptr; 1916 io->ipsec_out_type = IPSEC_OUT; 1917 io->ipsec_out_len = sizeof (ipsec_out_t); 1918 io->ipsec_out_use_global_policy = B_TRUE; 1919 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 1920 io->ipsec_out_multicast_loop = B_FALSE; 1921 io->ipsec_out_dontroute = B_TRUE; 1922 if ((zoneid = ilm->ilm_zoneid) == ALL_ZONES) 1923 zoneid = GLOBAL_ZONEID; 1924 io->ipsec_out_zoneid = zoneid; 1925 io->ipsec_out_ns = ipst->ips_netstack; /* No netstack_hold */ 1926 1927 mp = allocb(size, BPRI_HI); 1928 if (mp == NULL) { 1929 freemsg(first_mp); 1930 return; 1931 } 1932 mp->b_wptr = mp->b_rptr + size; 1933 first_mp->b_cont = mp; 1934 1935 ipha = (ipha_t *)mp->b_rptr; 1936 rtralert = (uint8_t *)&(ipha[1]); 1937 igmpa = (igmpa_t *)&(rtralert[RTRALERT_LEN]); 1938 igmpa->igmpa_type = type; 1939 igmpa->igmpa_code = 0; 1940 igmpa->igmpa_group = ilm->ilm_addr; 1941 igmpa->igmpa_cksum = 0; 1942 igmpa->igmpa_cksum = IP_CSUM(mp, hdrlen, 0); 1943 1944 rtralert[0] = IPOPT_COPY | IPOPT_RTRALERT; 1945 rtralert[1] = RTRALERT_LEN; 1946 rtralert[2] = 0; 1947 rtralert[3] = 0; 1948 1949 ipha->ipha_version_and_hdr_length = (IP_VERSION << 4) 1950 | (IP_SIMPLE_HDR_LENGTH_IN_WORDS + RTRALERT_LEN_IN_WORDS); 1951 ipha->ipha_type_of_service = 0; 1952 ipha->ipha_length = htons(size); 1953 ipha->ipha_ident = 0; 1954 ipha->ipha_fragment_offset_and_flags = 0; 1955 ipha->ipha_ttl = IGMP_TTL; 1956 ipha->ipha_protocol = IPPROTO_IGMP; 1957 ipha->ipha_hdr_checksum = 0; 1958 ipha->ipha_dst = addr ? addr : igmpa->igmpa_group; 1959 ipha->ipha_src = ipif->ipif_src_addr; 1960 /* 1961 * Request loopback of the report if we are acting as a multicast 1962 * router, so that the process-level routing demon can hear it. 1963 */ 1964 /* 1965 * This will run multiple times for the same group if there are members 1966 * on the same group for multiple ipif's on the same ill. The 1967 * igmp_input code will suppress this due to the loopback thus we 1968 * always loopback membership report. 1969 */ 1970 ASSERT(ill->ill_rq != NULL); 1971 ip_multicast_loopback(ill->ill_rq, ill, first_mp, 0, ilm->ilm_zoneid); 1972 1973 ip_wput_multicast(ill->ill_wq, first_mp, ipif, zoneid); 1974 1975 ++ipst->ips_igmpstat.igps_snd_reports; 1976 } 1977 1978 /* 1979 * Sends an IGMP_V3_MEMBERSHIP_REPORT message out the ill associated 1980 * with the passed-in ipif. The report will contain one group record 1981 * for each element of reclist. If this causes packet length to 1982 * exceed ipif->ipif_ill->ill_max_frag, multiple reports are sent. 1983 * reclist is assumed to be made up of buffers allocated by mcast_bldmrec(), 1984 * and those buffers are freed here. 1985 */ 1986 static void 1987 igmpv3_sendrpt(ipif_t *ipif, mrec_t *reclist) 1988 { 1989 ipsec_out_t *io; 1990 igmp3ra_t *igmp3ra; 1991 grphdra_t *grphdr; 1992 mblk_t *first_mp, *mp; 1993 ipha_t *ipha; 1994 uint8_t *rtralert; 1995 ipaddr_t *src_array; 1996 int i, j, numrec, more_src_cnt; 1997 size_t hdrsize, size, rsize; 1998 ill_t *ill = ipif->ipif_ill; 1999 mrec_t *rp, *cur_reclist; 2000 mrec_t *next_reclist = reclist; 2001 boolean_t morepkts; 2002 zoneid_t zoneid; 2003 ip_stack_t *ipst = ill->ill_ipst; 2004 2005 ASSERT(IAM_WRITER_IPIF(ipif)); 2006 2007 /* if there aren't any records, there's nothing to send */ 2008 if (reclist == NULL) 2009 return; 2010 2011 hdrsize = sizeof (ipha_t) + RTRALERT_LEN; 2012 nextpkt: 2013 size = hdrsize + sizeof (igmp3ra_t); 2014 morepkts = B_FALSE; 2015 more_src_cnt = 0; 2016 cur_reclist = next_reclist; 2017 numrec = 0; 2018 for (rp = cur_reclist; rp != NULL; rp = rp->mrec_next) { 2019 rsize = sizeof (grphdra_t) + 2020 (rp->mrec_srcs.sl_numsrc * sizeof (ipaddr_t)); 2021 if (size + rsize > ill->ill_max_frag) { 2022 if (rp == cur_reclist) { 2023 /* 2024 * If the first mrec we looked at is too big 2025 * to fit in a single packet (i.e the source 2026 * list is too big), we must either truncate 2027 * the list (if TO_EX or IS_EX), or send 2028 * multiple reports for the same group (all 2029 * other types). 2030 */ 2031 int srcspace, srcsperpkt; 2032 srcspace = ill->ill_max_frag - (size + 2033 sizeof (grphdra_t)); 2034 2035 /* 2036 * Skip if there's not even enough room in 2037 * a single packet to send something useful. 2038 */ 2039 if (srcspace <= sizeof (ipaddr_t)) 2040 continue; 2041 2042 srcsperpkt = srcspace / sizeof (ipaddr_t); 2043 /* 2044 * Increment size and numrec, because we will 2045 * be sending a record for the mrec we're 2046 * looking at now. 2047 */ 2048 size += sizeof (grphdra_t) + 2049 (srcsperpkt * sizeof (ipaddr_t)); 2050 numrec++; 2051 if (rp->mrec_type == MODE_IS_EXCLUDE || 2052 rp->mrec_type == CHANGE_TO_EXCLUDE) { 2053 rp->mrec_srcs.sl_numsrc = srcsperpkt; 2054 if (rp->mrec_next == NULL) { 2055 /* no more packets to send */ 2056 break; 2057 } else { 2058 /* 2059 * more packets, but we're 2060 * done with this mrec. 2061 */ 2062 next_reclist = rp->mrec_next; 2063 } 2064 } else { 2065 more_src_cnt = rp->mrec_srcs.sl_numsrc 2066 - srcsperpkt; 2067 rp->mrec_srcs.sl_numsrc = srcsperpkt; 2068 /* 2069 * We'll fix up this mrec (remove the 2070 * srcs we've already sent) before 2071 * returning to nextpkt above. 2072 */ 2073 next_reclist = rp; 2074 } 2075 } else { 2076 next_reclist = rp; 2077 } 2078 morepkts = B_TRUE; 2079 break; 2080 } 2081 size += rsize; 2082 numrec++; 2083 } 2084 2085 /* 2086 * See comments in igmp_sendpkt() about initializing for ipsec and 2087 * load balancing requirements. 2088 */ 2089 first_mp = allocb(sizeof (ipsec_info_t), BPRI_HI); 2090 if (first_mp == NULL) 2091 goto free_reclist; 2092 2093 first_mp->b_datap->db_type = M_CTL; 2094 first_mp->b_wptr += sizeof (ipsec_info_t); 2095 bzero(first_mp->b_rptr, sizeof (ipsec_info_t)); 2096 /* ipsec_out_secure is B_FALSE now */ 2097 io = (ipsec_out_t *)first_mp->b_rptr; 2098 io->ipsec_out_type = IPSEC_OUT; 2099 io->ipsec_out_len = sizeof (ipsec_out_t); 2100 io->ipsec_out_use_global_policy = B_TRUE; 2101 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 2102 io->ipsec_out_multicast_loop = B_FALSE; 2103 io->ipsec_out_dontroute = B_TRUE; 2104 if ((zoneid = ipif->ipif_zoneid) == ALL_ZONES) 2105 zoneid = GLOBAL_ZONEID; 2106 io->ipsec_out_zoneid = zoneid; 2107 2108 mp = allocb(size, BPRI_HI); 2109 if (mp == NULL) { 2110 freemsg(first_mp); 2111 goto free_reclist; 2112 } 2113 bzero((char *)mp->b_rptr, size); 2114 mp->b_wptr = (uchar_t *)(mp->b_rptr + size); 2115 first_mp->b_cont = mp; 2116 2117 ipha = (ipha_t *)mp->b_rptr; 2118 rtralert = (uint8_t *)&(ipha[1]); 2119 igmp3ra = (igmp3ra_t *)&(rtralert[RTRALERT_LEN]); 2120 grphdr = (grphdra_t *)&(igmp3ra[1]); 2121 2122 rp = cur_reclist; 2123 for (i = 0; i < numrec; i++) { 2124 grphdr->grphdra_type = rp->mrec_type; 2125 grphdr->grphdra_numsrc = htons(rp->mrec_srcs.sl_numsrc); 2126 grphdr->grphdra_group = V4_PART_OF_V6(rp->mrec_group); 2127 src_array = (ipaddr_t *)&(grphdr[1]); 2128 2129 for (j = 0; j < rp->mrec_srcs.sl_numsrc; j++) 2130 src_array[j] = V4_PART_OF_V6(rp->mrec_srcs.sl_addr[j]); 2131 2132 grphdr = (grphdra_t *)&(src_array[j]); 2133 rp = rp->mrec_next; 2134 } 2135 2136 igmp3ra->igmp3ra_type = IGMP_V3_MEMBERSHIP_REPORT; 2137 igmp3ra->igmp3ra_numrec = htons(numrec); 2138 igmp3ra->igmp3ra_cksum = IP_CSUM(mp, hdrsize, 0); 2139 2140 rtralert[0] = IPOPT_COPY | IPOPT_RTRALERT; 2141 rtralert[1] = RTRALERT_LEN; 2142 rtralert[2] = 0; 2143 rtralert[3] = 0; 2144 2145 ipha->ipha_version_and_hdr_length = IP_VERSION << 4 2146 | (IP_SIMPLE_HDR_LENGTH_IN_WORDS + RTRALERT_LEN_IN_WORDS); 2147 ipha->ipha_type_of_service = IPTOS_PREC_INTERNETCONTROL; 2148 ipha->ipha_length = htons(size); 2149 ipha->ipha_ttl = IGMP_TTL; 2150 ipha->ipha_protocol = IPPROTO_IGMP; 2151 ipha->ipha_dst = htonl(INADDR_ALLRPTS_GROUP); 2152 ipha->ipha_src = ipif->ipif_src_addr; 2153 2154 /* 2155 * Request loopback of the report if we are acting as a multicast 2156 * router, so that the process-level routing daemon can hear it. 2157 * 2158 * This will run multiple times for the same group if there are 2159 * members on the same group for multiple ipifs on the same ill. 2160 * The igmp_input code will suppress this due to the loopback; 2161 * thus we always loopback membership report. 2162 */ 2163 ASSERT(ill->ill_rq != NULL); 2164 ip_multicast_loopback(ill->ill_rq, ill, mp, 0, ipif->ipif_zoneid); 2165 2166 ip_wput_multicast(ill->ill_wq, first_mp, ipif, zoneid); 2167 2168 ++ipst->ips_igmpstat.igps_snd_reports; 2169 2170 if (morepkts) { 2171 if (more_src_cnt > 0) { 2172 int index, mvsize; 2173 slist_t *sl = &next_reclist->mrec_srcs; 2174 index = sl->sl_numsrc; 2175 mvsize = more_src_cnt * sizeof (in6_addr_t); 2176 (void) memmove(&sl->sl_addr[0], &sl->sl_addr[index], 2177 mvsize); 2178 sl->sl_numsrc = more_src_cnt; 2179 } 2180 goto nextpkt; 2181 } 2182 2183 free_reclist: 2184 while (reclist != NULL) { 2185 rp = reclist->mrec_next; 2186 mi_free(reclist); 2187 reclist = rp; 2188 } 2189 } 2190 2191 /* 2192 * mld_input: 2193 */ 2194 /* ARGSUSED */ 2195 void 2196 mld_input(queue_t *q, mblk_t *mp, ill_t *ill) 2197 { 2198 ip6_t *ip6h = (ip6_t *)(mp->b_rptr); 2199 mld_hdr_t *mldh; 2200 ilm_t *ilm; 2201 ipif_t *ipif; 2202 uint16_t hdr_length, exthdr_length; 2203 in6_addr_t *v6group_ptr, *lcladdr_ptr; 2204 uint_t next; 2205 int mldlen; 2206 ip_stack_t *ipst = ill->ill_ipst; 2207 ilm_walker_t ilw; 2208 2209 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembTotal); 2210 2211 /* Make sure the src address of the packet is link-local */ 2212 if (!(IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src))) { 2213 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 2214 freemsg(mp); 2215 return; 2216 } 2217 2218 if (ip6h->ip6_hlim != 1) { 2219 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpBadHoplimit); 2220 freemsg(mp); 2221 return; 2222 } 2223 2224 /* Get to the icmp header part */ 2225 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) { 2226 hdr_length = ip_hdr_length_v6(mp, ip6h); 2227 exthdr_length = hdr_length - IPV6_HDR_LEN; 2228 } else { 2229 hdr_length = IPV6_HDR_LEN; 2230 exthdr_length = 0; 2231 } 2232 mldlen = ntohs(ip6h->ip6_plen) - exthdr_length; 2233 2234 /* An MLD packet must at least be 24 octets to be valid */ 2235 if (mldlen < MLD_MINLEN) { 2236 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 2237 freemsg(mp); 2238 return; 2239 } 2240 2241 mldh = (mld_hdr_t *)(&mp->b_rptr[hdr_length]); 2242 2243 switch (mldh->mld_type) { 2244 case MLD_LISTENER_QUERY: 2245 /* 2246 * packet length differentiates between v1 and v2. v1 2247 * query should be exactly 24 octets long; v2 is >= 28. 2248 */ 2249 if ((mldlen == MLD_MINLEN) || 2250 (ipst->ips_mld_max_version < MLD_V2_ROUTER)) { 2251 next = mld_query_in(mldh, ill); 2252 } else if (mldlen >= MLD_V2_QUERY_MINLEN) { 2253 next = mldv2_query_in((mld2q_t *)mldh, ill, mldlen); 2254 } else { 2255 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 2256 freemsg(mp); 2257 return; 2258 } 2259 if (next == 0) { 2260 freemsg(mp); 2261 return; 2262 } 2263 2264 if (next != INFINITY) 2265 mld_start_timers(next, ipst); 2266 break; 2267 2268 case MLD_LISTENER_REPORT: { 2269 2270 ASSERT(ill->ill_ipif != NULL); 2271 /* 2272 * For fast leave to work, we have to know that we are the 2273 * last person to send a report for this group. Reports 2274 * generated by us are looped back since we could potentially 2275 * be a multicast router, so discard reports sourced by me. 2276 */ 2277 lcladdr_ptr = &(ill->ill_ipif->ipif_v6subnet); 2278 mutex_enter(&ill->ill_lock); 2279 for (ipif = ill->ill_ipif; ipif != NULL; 2280 ipif = ipif->ipif_next) { 2281 if (IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, 2282 lcladdr_ptr)) { 2283 if (ip_debug > 1) { 2284 char buf1[INET6_ADDRSTRLEN]; 2285 char buf2[INET6_ADDRSTRLEN]; 2286 2287 (void) mi_strlog(ill->ill_rq, 2288 1, 2289 SL_TRACE, 2290 "mld_input: we are only " 2291 "member src %s ipif_local %s", 2292 inet_ntop(AF_INET6, lcladdr_ptr, 2293 buf1, sizeof (buf1)), 2294 inet_ntop(AF_INET6, 2295 &ipif->ipif_v6lcl_addr, 2296 buf2, sizeof (buf2))); 2297 } 2298 mutex_exit(&ill->ill_lock); 2299 freemsg(mp); 2300 return; 2301 } 2302 } 2303 mutex_exit(&ill->ill_lock); 2304 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembResponses); 2305 2306 v6group_ptr = &mldh->mld_addr; 2307 if (!IN6_IS_ADDR_MULTICAST(v6group_ptr)) { 2308 BUMP_MIB(ill->ill_icmp6_mib, 2309 ipv6IfIcmpInGroupMembBadReports); 2310 freemsg(mp); 2311 return; 2312 } 2313 2314 /* 2315 * If we belong to the group being reported, and we are a 2316 * 'Delaying member' per the RFC terminology, stop our timer 2317 * for that group and 'clear flag' i.e. mark ilm_state as 2318 * IGMP_OTHERMEMBER. With zones, there can be multiple group 2319 * membership entries for the same group address (one per zone) 2320 * so we need to walk the ill_ilm list. 2321 */ 2322 ilm = ilm_walker_start(&ilw, ill); 2323 for (; ilm != NULL; ilm = ilm_walker_step(&ilw, ilm)) { 2324 if (!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group_ptr)) 2325 continue; 2326 BUMP_MIB(ill->ill_icmp6_mib, 2327 ipv6IfIcmpInGroupMembOurReports); 2328 2329 ilm->ilm_timer = INFINITY; 2330 ilm->ilm_state = IGMP_OTHERMEMBER; 2331 } 2332 ilm_walker_finish(&ilw); 2333 break; 2334 } 2335 case MLD_LISTENER_REDUCTION: 2336 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembReductions); 2337 break; 2338 } 2339 /* 2340 * All MLD packets have already been passed up to any 2341 * process(es) listening on a ICMP6 raw socket. This 2342 * has been accomplished in ip_deliver_local_v6 prior to 2343 * this function call. It is assumed that the multicast daemon 2344 * will have a SOCK_RAW IPPROTO_ICMPV6 (and presumbly use the 2345 * ICMP6_FILTER socket option to only receive the MLD messages) 2346 * Thus we can free the MLD message block here 2347 */ 2348 freemsg(mp); 2349 } 2350 2351 /* 2352 * Handles an MLDv1 Listener Query. Returns 0 on error, or the appropriate 2353 * (non-zero, unsigned) timer value to be set on success. 2354 */ 2355 static uint_t 2356 mld_query_in(mld_hdr_t *mldh, ill_t *ill) 2357 { 2358 ilm_t *ilm; 2359 int timer; 2360 uint_t next, current; 2361 in6_addr_t *v6group; 2362 ilm_walker_t ilw; 2363 2364 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembQueries); 2365 2366 /* 2367 * In the MLD specification, there are 3 states and a flag. 2368 * 2369 * In Non-Listener state, we simply don't have a membership record. 2370 * In Delaying state, our timer is running (ilm->ilm_timer < INFINITY) 2371 * In Idle Member state, our timer is not running (ilm->ilm_timer == 2372 * INFINITY) 2373 * 2374 * The flag is ilm->ilm_state, it is set to IGMP_OTHERMEMBER if 2375 * we have heard a report from another member, or IGMP_IREPORTEDLAST 2376 * if I sent the last report. 2377 */ 2378 v6group = &mldh->mld_addr; 2379 if (!(IN6_IS_ADDR_UNSPECIFIED(v6group)) && 2380 ((!IN6_IS_ADDR_MULTICAST(v6group)))) { 2381 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembBadQueries); 2382 return (0); 2383 } 2384 2385 /* Need to do compatibility mode checking */ 2386 mutex_enter(&ill->ill_lock); 2387 ill->ill_mcast_v1_time = 0; 2388 ill->ill_mcast_v1_tset = 1; 2389 if (ill->ill_mcast_type == MLD_V2_ROUTER) { 2390 ip1dbg(("Received MLDv1 Query on %s, switching mode to " 2391 "MLD_V1_ROUTER\n", ill->ill_name)); 2392 atomic_add_16(&ill->ill_ifptr->illif_mcast_v1, 1); 2393 ill->ill_mcast_type = MLD_V1_ROUTER; 2394 } 2395 mutex_exit(&ill->ill_lock); 2396 2397 timer = (int)ntohs(mldh->mld_maxdelay); 2398 if (ip_debug > 1) { 2399 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 2400 "mld_input: TIMER = mld_maxdelay %d mld_type 0x%x", 2401 timer, (int)mldh->mld_type); 2402 } 2403 2404 /* 2405 * -Start the timers in all of our membership records for 2406 * the physical interface on which the query arrived, 2407 * excl: 2408 * 1. those that belong to the "all hosts" group, 2409 * 2. those with 0 scope, or 1 node-local scope. 2410 * 2411 * -Restart any timer that is already running but has a value 2412 * longer that the requested timeout. 2413 * -Use the value specified in the query message as the 2414 * maximum timeout. 2415 */ 2416 next = INFINITY; 2417 2418 ilm = ilm_walker_start(&ilw, ill); 2419 mutex_enter(&ill->ill_lock); 2420 current = CURRENT_MSTIME; 2421 2422 for (; ilm != NULL; ilm = ilm_walker_step(&ilw, ilm)) { 2423 ASSERT(!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr)); 2424 2425 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr) || 2426 IN6_IS_ADDR_MC_NODELOCAL(&ilm->ilm_v6addr) || 2427 IN6_IS_ADDR_MC_RESERVED(&ilm->ilm_v6addr)) 2428 continue; 2429 if ((!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, 2430 &ipv6_all_hosts_mcast)) && 2431 (IN6_IS_ADDR_UNSPECIFIED(v6group)) || 2432 (IN6_ARE_ADDR_EQUAL(v6group, &ilm->ilm_v6addr))) { 2433 if (timer == 0) { 2434 /* Respond immediately */ 2435 ilm->ilm_timer = INFINITY; 2436 ilm->ilm_state = IGMP_IREPORTEDLAST; 2437 mutex_exit(&ill->ill_lock); 2438 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL); 2439 mutex_enter(&ill->ill_lock); 2440 break; 2441 } 2442 if (ilm->ilm_timer > timer) { 2443 MCAST_RANDOM_DELAY(ilm->ilm_timer, timer); 2444 if (ilm->ilm_timer < next) 2445 next = ilm->ilm_timer; 2446 ilm->ilm_timer += current; 2447 } 2448 break; 2449 } 2450 } 2451 mutex_exit(&ill->ill_lock); 2452 ilm_walker_finish(&ilw); 2453 2454 return (next); 2455 } 2456 2457 /* 2458 * Handles an MLDv2 Listener Query. On error, returns 0; on success, 2459 * returns the appropriate (non-zero, unsigned) timer value (which may 2460 * be INFINITY) to be set. 2461 */ 2462 static uint_t 2463 mldv2_query_in(mld2q_t *mld2q, ill_t *ill, int mldlen) 2464 { 2465 ilm_t *ilm; 2466 in6_addr_t *v6group, *src_array; 2467 uint_t next, numsrc, i, mrd, delay, qqi, current; 2468 uint8_t qrv; 2469 ilm_walker_t ilw; 2470 2471 v6group = &mld2q->mld2q_addr; 2472 numsrc = ntohs(mld2q->mld2q_numsrc); 2473 2474 /* make sure numsrc matches packet size */ 2475 if (mldlen < MLD_V2_QUERY_MINLEN + (numsrc * sizeof (in6_addr_t))) { 2476 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 2477 return (0); 2478 } 2479 src_array = (in6_addr_t *)&mld2q[1]; 2480 2481 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembQueries); 2482 2483 /* extract Maximum Response Delay from code in header */ 2484 mrd = ntohs(mld2q->mld2q_mxrc); 2485 if (mrd >= MLD_V2_MAXRT_FPMIN) { 2486 uint_t hdrval, mant, exp; 2487 hdrval = mrd; 2488 mant = hdrval & MLD_V2_MAXRT_MANT_MASK; 2489 exp = (hdrval & MLD_V2_MAXRT_EXP_MASK) >> 12; 2490 mrd = (mant | 0x1000) << (exp + 3); 2491 } 2492 if (mrd == 0) 2493 mrd = DSEC_TO_MSEC(MCAST_DEF_QUERY_RESP_INTERVAL); 2494 2495 MCAST_RANDOM_DELAY(delay, mrd); 2496 next = (unsigned)INFINITY; 2497 current = CURRENT_MSTIME; 2498 2499 if ((qrv = mld2q->mld2q_sqrv & MLD_V2_RV_MASK) == 0) 2500 ill->ill_mcast_rv = MCAST_DEF_ROBUSTNESS; 2501 else 2502 ill->ill_mcast_rv = qrv; 2503 2504 if ((qqi = (uint_t)mld2q->mld2q_qqic) >= MLD_V2_QQI_FPMIN) { 2505 uint_t mant, exp; 2506 mant = qqi & MLD_V2_QQI_MANT_MASK; 2507 exp = (qqi & MLD_V2_QQI_EXP_MASK) >> 12; 2508 qqi = (mant | 0x10) << (exp + 3); 2509 } 2510 ill->ill_mcast_qi = (qqi == 0) ? MCAST_DEF_QUERY_INTERVAL : qqi; 2511 2512 /* 2513 * If we have a pending general query response that's scheduled 2514 * sooner than the delay we calculated for this response, then 2515 * no action is required (MLDv2 draft section 6.2 rule 1) 2516 */ 2517 mutex_enter(&ill->ill_lock); 2518 if (ill->ill_global_timer < (current + delay)) { 2519 mutex_exit(&ill->ill_lock); 2520 return (next); 2521 } 2522 mutex_exit(&ill->ill_lock); 2523 2524 /* 2525 * Now take action depending on query type: general, 2526 * group specific, or group/source specific. 2527 */ 2528 if ((numsrc == 0) && IN6_IS_ADDR_UNSPECIFIED(v6group)) { 2529 /* 2530 * general query 2531 * We know global timer is either not running or is 2532 * greater than our calculated delay, so reset it to 2533 * our delay (random value in range [0, response time]) 2534 */ 2535 mutex_enter(&ill->ill_lock); 2536 ill->ill_global_timer = current + delay; 2537 mutex_exit(&ill->ill_lock); 2538 next = delay; 2539 2540 } else { 2541 /* group or group/source specific query */ 2542 ilm = ilm_walker_start(&ilw, ill); 2543 mutex_enter(&ill->ill_lock); 2544 for (; ilm != NULL; ilm = ilm_walker_step(&ilw, ilm)) { 2545 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr) || 2546 IN6_IS_ADDR_MC_NODELOCAL(&ilm->ilm_v6addr) || 2547 IN6_IS_ADDR_MC_RESERVED(&ilm->ilm_v6addr) || 2548 !IN6_ARE_ADDR_EQUAL(v6group, &ilm->ilm_v6addr)) 2549 continue; 2550 2551 /* 2552 * If the query is group specific or we have a 2553 * pending group specific query, the response is 2554 * group specific (pending sources list should be 2555 * empty). Otherwise, need to update the pending 2556 * sources list for the group and source specific 2557 * response. 2558 */ 2559 if (numsrc == 0 || (ilm->ilm_timer < INFINITY && 2560 SLIST_IS_EMPTY(ilm->ilm_pendsrcs))) { 2561 group_query: 2562 FREE_SLIST(ilm->ilm_pendsrcs); 2563 ilm->ilm_pendsrcs = NULL; 2564 } else { 2565 boolean_t overflow; 2566 slist_t *pktl; 2567 if (numsrc > MAX_FILTER_SIZE || 2568 (ilm->ilm_pendsrcs == NULL && 2569 (ilm->ilm_pendsrcs = l_alloc()) == NULL)) { 2570 /* 2571 * We've been sent more sources than 2572 * we can deal with; or we can't deal 2573 * with a source list at all. Revert 2574 * to a group specific query. 2575 */ 2576 goto group_query; 2577 } 2578 if ((pktl = l_alloc()) == NULL) 2579 goto group_query; 2580 pktl->sl_numsrc = numsrc; 2581 for (i = 0; i < numsrc; i++) 2582 pktl->sl_addr[i] = src_array[i]; 2583 l_union_in_a(ilm->ilm_pendsrcs, pktl, 2584 &overflow); 2585 l_free(pktl); 2586 if (overflow) 2587 goto group_query; 2588 } 2589 ilm->ilm_timer = (ilm->ilm_timer == INFINITY) ? 2590 INFINITY : (ilm->ilm_timer - current); 2591 /* set timer to soonest value */ 2592 ilm->ilm_timer = MIN(ilm->ilm_timer, delay); 2593 if (ilm->ilm_timer < next) 2594 next = ilm->ilm_timer; 2595 ilm->ilm_timer += current; 2596 break; 2597 } 2598 mutex_exit(&ill->ill_lock); 2599 ilm_walker_finish(&ilw); 2600 } 2601 2602 return (next); 2603 } 2604 2605 /* 2606 * Send MLDv1 response packet with hoplimit 1 2607 */ 2608 static void 2609 mld_sendpkt(ilm_t *ilm, uchar_t type, const in6_addr_t *v6addr) 2610 { 2611 mblk_t *mp; 2612 mld_hdr_t *mldh; 2613 ip6_t *ip6h; 2614 ip6_hbh_t *ip6hbh; 2615 struct ip6_opt_router *ip6router; 2616 size_t size = IPV6_HDR_LEN + sizeof (mld_hdr_t); 2617 ill_t *ill = ilm->ilm_ill; 2618 ipif_t *ipif; 2619 2620 /* 2621 * We need to place a router alert option in this packet. The length 2622 * of the options must be a multiple of 8. The hbh option header is 2 2623 * bytes followed by the 4 byte router alert option. That leaves 2624 * 2 bytes of pad for a total of 8 bytes. 2625 */ 2626 const int router_alert_length = 8; 2627 2628 ASSERT(ill->ill_isv6); 2629 2630 size += router_alert_length; 2631 mp = allocb(size, BPRI_HI); 2632 if (mp == NULL) 2633 return; 2634 bzero(mp->b_rptr, size); 2635 mp->b_wptr = mp->b_rptr + size; 2636 2637 ip6h = (ip6_t *)mp->b_rptr; 2638 ip6hbh = (struct ip6_hbh *)&ip6h[1]; 2639 ip6router = (struct ip6_opt_router *)&ip6hbh[1]; 2640 /* 2641 * A zero is a pad option of length 1. The bzero of the whole packet 2642 * above will pad between ip6router and mld. 2643 */ 2644 mldh = (mld_hdr_t *)((uint8_t *)ip6hbh + router_alert_length); 2645 2646 mldh->mld_type = type; 2647 mldh->mld_addr = ilm->ilm_v6addr; 2648 2649 ip6router->ip6or_type = IP6OPT_ROUTER_ALERT; 2650 ip6router->ip6or_len = 2; 2651 ip6router->ip6or_value[0] = 0; 2652 ip6router->ip6or_value[1] = IP6_ALERT_MLD; 2653 2654 ip6hbh->ip6h_nxt = IPPROTO_ICMPV6; 2655 ip6hbh->ip6h_len = 0; 2656 2657 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2658 ip6h->ip6_plen = htons(sizeof (*mldh) + router_alert_length); 2659 ip6h->ip6_nxt = IPPROTO_HOPOPTS; 2660 ip6h->ip6_hops = MLD_HOP_LIMIT; 2661 if (v6addr == NULL) 2662 ip6h->ip6_dst = ilm->ilm_v6addr; 2663 else 2664 ip6h->ip6_dst = *v6addr; 2665 2666 /* ipif returned by ipif_lookup_zoneid is link-local (if present) */ 2667 if (ipif_lookup_zoneid(ill, ilm->ilm_zoneid, IPIF_UP, &ipif)) { 2668 ip6h->ip6_src = ipif->ipif_v6src_addr; 2669 ipif_refrele(ipif); 2670 } else { 2671 /* Otherwise, use IPv6 default address selection. */ 2672 ip6h->ip6_src = ipv6_all_zeros; 2673 } 2674 2675 /* 2676 * Prepare for checksum by putting icmp length in the icmp 2677 * checksum field. The checksum is calculated in ip_wput_v6. 2678 */ 2679 mldh->mld_cksum = htons(sizeof (*mldh)); 2680 2681 /* 2682 * ip_wput will automatically loopback the multicast packet to 2683 * the conn if multicast loopback is enabled. 2684 * The MIB stats corresponding to this outgoing MLD packet 2685 * will be accounted for in ip_wput->ip_wput_v6->ip_wput_ire_v6 2686 * ->icmp_update_out_mib_v6 function call. 2687 */ 2688 (void) ip_output_v6(NULL, mp, ill->ill_wq, IP_WPUT); 2689 } 2690 2691 /* 2692 * Sends an MLD_V2_LISTENER_REPORT message out the passed-in ill. The 2693 * report will contain one multicast address record for each element of 2694 * reclist. If this causes packet length to exceed ill->ill_max_frag, 2695 * multiple reports are sent. reclist is assumed to be made up of 2696 * buffers allocated by mcast_bldmrec(), and those buffers are freed here. 2697 */ 2698 static void 2699 mldv2_sendrpt(ill_t *ill, mrec_t *reclist) 2700 { 2701 mblk_t *mp; 2702 mld2r_t *mld2r; 2703 mld2mar_t *mld2mar; 2704 in6_addr_t *srcarray; 2705 ip6_t *ip6h; 2706 ip6_hbh_t *ip6hbh; 2707 struct ip6_opt_router *ip6router; 2708 size_t size, optlen, padlen, icmpsize, rsize; 2709 ipif_t *ipif; 2710 int i, numrec, more_src_cnt; 2711 mrec_t *rp, *cur_reclist; 2712 mrec_t *next_reclist = reclist; 2713 boolean_t morepkts; 2714 2715 ASSERT(IAM_WRITER_ILL(ill)); 2716 2717 /* If there aren't any records, there's nothing to send */ 2718 if (reclist == NULL) 2719 return; 2720 2721 ASSERT(ill->ill_isv6); 2722 2723 /* 2724 * Total option length (optlen + padlen) must be a multiple of 2725 * 8 bytes. We assume here that optlen <= 8, so the total option 2726 * length will be 8. Assert this in case anything ever changes. 2727 */ 2728 optlen = sizeof (ip6_hbh_t) + sizeof (struct ip6_opt_router); 2729 ASSERT(optlen <= 8); 2730 padlen = 8 - optlen; 2731 nextpkt: 2732 icmpsize = sizeof (mld2r_t); 2733 size = IPV6_HDR_LEN + optlen + padlen + icmpsize; 2734 morepkts = B_FALSE; 2735 more_src_cnt = 0; 2736 for (rp = cur_reclist = next_reclist, numrec = 0; rp != NULL; 2737 rp = rp->mrec_next, numrec++) { 2738 rsize = sizeof (mld2mar_t) + 2739 (rp->mrec_srcs.sl_numsrc * sizeof (in6_addr_t)); 2740 if (size + rsize > ill->ill_max_frag) { 2741 if (rp == cur_reclist) { 2742 /* 2743 * If the first mrec we looked at is too big 2744 * to fit in a single packet (i.e the source 2745 * list is too big), we must either truncate 2746 * the list (if TO_EX or IS_EX), or send 2747 * multiple reports for the same group (all 2748 * other types). 2749 */ 2750 int srcspace, srcsperpkt; 2751 srcspace = ill->ill_max_frag - 2752 (size + sizeof (mld2mar_t)); 2753 2754 /* 2755 * Skip if there's not even enough room in 2756 * a single packet to send something useful. 2757 */ 2758 if (srcspace <= sizeof (in6_addr_t)) 2759 continue; 2760 2761 srcsperpkt = srcspace / sizeof (in6_addr_t); 2762 /* 2763 * Increment icmpsize and size, because we will 2764 * be sending a record for the mrec we're 2765 * looking at now. 2766 */ 2767 rsize = sizeof (mld2mar_t) + 2768 (srcsperpkt * sizeof (in6_addr_t)); 2769 icmpsize += rsize; 2770 size += rsize; 2771 if (rp->mrec_type == MODE_IS_EXCLUDE || 2772 rp->mrec_type == CHANGE_TO_EXCLUDE) { 2773 rp->mrec_srcs.sl_numsrc = srcsperpkt; 2774 if (rp->mrec_next == NULL) { 2775 /* no more packets to send */ 2776 break; 2777 } else { 2778 /* 2779 * more packets, but we're 2780 * done with this mrec. 2781 */ 2782 next_reclist = rp->mrec_next; 2783 } 2784 } else { 2785 more_src_cnt = rp->mrec_srcs.sl_numsrc 2786 - srcsperpkt; 2787 rp->mrec_srcs.sl_numsrc = srcsperpkt; 2788 /* 2789 * We'll fix up this mrec (remove the 2790 * srcs we've already sent) before 2791 * returning to nextpkt above. 2792 */ 2793 next_reclist = rp; 2794 } 2795 } else { 2796 next_reclist = rp; 2797 } 2798 morepkts = B_TRUE; 2799 break; 2800 } 2801 icmpsize += rsize; 2802 size += rsize; 2803 } 2804 2805 mp = allocb(size, BPRI_HI); 2806 if (mp == NULL) 2807 goto free_reclist; 2808 bzero(mp->b_rptr, size); 2809 mp->b_wptr = mp->b_rptr + size; 2810 2811 ip6h = (ip6_t *)mp->b_rptr; 2812 ip6hbh = (ip6_hbh_t *)&(ip6h[1]); 2813 ip6router = (struct ip6_opt_router *)&(ip6hbh[1]); 2814 mld2r = (mld2r_t *)((uint8_t *)ip6hbh + optlen + padlen); 2815 mld2mar = (mld2mar_t *)&(mld2r[1]); 2816 2817 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2818 ip6h->ip6_plen = htons(optlen + padlen + icmpsize); 2819 ip6h->ip6_nxt = IPPROTO_HOPOPTS; 2820 ip6h->ip6_hops = MLD_HOP_LIMIT; 2821 ip6h->ip6_dst = ipv6_all_v2rtrs_mcast; 2822 /* ipif returned by ipif_lookup_zoneid is link-local (if present) */ 2823 if (ipif_lookup_zoneid(ill, ALL_ZONES, IPIF_UP, &ipif)) { 2824 ip6h->ip6_src = ipif->ipif_v6src_addr; 2825 ipif_refrele(ipif); 2826 } else { 2827 /* otherwise, use IPv6 default address selection. */ 2828 ip6h->ip6_src = ipv6_all_zeros; 2829 } 2830 2831 ip6hbh->ip6h_nxt = IPPROTO_ICMPV6; 2832 /* 2833 * ip6h_len is the number of 8-byte words, not including the first 2834 * 8 bytes; we've assumed optlen + padlen == 8 bytes; hence len = 0. 2835 */ 2836 ip6hbh->ip6h_len = 0; 2837 2838 ip6router->ip6or_type = IP6OPT_ROUTER_ALERT; 2839 ip6router->ip6or_len = 2; 2840 ip6router->ip6or_value[0] = 0; 2841 ip6router->ip6or_value[1] = IP6_ALERT_MLD; 2842 2843 mld2r->mld2r_type = MLD_V2_LISTENER_REPORT; 2844 mld2r->mld2r_nummar = htons(numrec); 2845 /* 2846 * Prepare for the checksum by putting icmp length in the icmp 2847 * checksum field. The checksum is calculated in ip_wput_v6. 2848 */ 2849 mld2r->mld2r_cksum = htons(icmpsize); 2850 2851 for (rp = cur_reclist; rp != NULL; rp = rp->mrec_next) { 2852 mld2mar->mld2mar_type = rp->mrec_type; 2853 mld2mar->mld2mar_auxlen = 0; 2854 mld2mar->mld2mar_numsrc = htons(rp->mrec_srcs.sl_numsrc); 2855 mld2mar->mld2mar_group = rp->mrec_group; 2856 srcarray = (in6_addr_t *)&(mld2mar[1]); 2857 2858 for (i = 0; i < rp->mrec_srcs.sl_numsrc; i++) 2859 srcarray[i] = rp->mrec_srcs.sl_addr[i]; 2860 2861 mld2mar = (mld2mar_t *)&(srcarray[i]); 2862 } 2863 2864 /* 2865 * ip_wput will automatically loopback the multicast packet to 2866 * the conn if multicast loopback is enabled. 2867 * The MIB stats corresponding to this outgoing MLD packet 2868 * will be accounted for in ip_wput->ip_wput_v6->ip_wput_ire_v6 2869 * ->icmp_update_out_mib_v6 function call. 2870 */ 2871 (void) ip_output_v6(NULL, mp, ill->ill_wq, IP_WPUT); 2872 2873 if (morepkts) { 2874 if (more_src_cnt > 0) { 2875 int index, mvsize; 2876 slist_t *sl = &next_reclist->mrec_srcs; 2877 index = sl->sl_numsrc; 2878 mvsize = more_src_cnt * sizeof (in6_addr_t); 2879 (void) memmove(&sl->sl_addr[0], &sl->sl_addr[index], 2880 mvsize); 2881 sl->sl_numsrc = more_src_cnt; 2882 } 2883 goto nextpkt; 2884 } 2885 2886 free_reclist: 2887 while (reclist != NULL) { 2888 rp = reclist->mrec_next; 2889 mi_free(reclist); 2890 reclist = rp; 2891 } 2892 } 2893 2894 static mrec_t * 2895 mcast_bldmrec(mcast_record_t type, in6_addr_t *grp, slist_t *srclist, 2896 mrec_t *next) 2897 { 2898 mrec_t *rp; 2899 int i; 2900 2901 if ((type == ALLOW_NEW_SOURCES || type == BLOCK_OLD_SOURCES) && 2902 SLIST_IS_EMPTY(srclist)) 2903 return (next); 2904 2905 rp = (mrec_t *)mi_alloc(sizeof (mrec_t), BPRI_HI); 2906 if (rp == NULL) 2907 return (next); 2908 2909 rp->mrec_next = next; 2910 rp->mrec_type = type; 2911 rp->mrec_auxlen = 0; 2912 rp->mrec_group = *grp; 2913 if (srclist == NULL) { 2914 rp->mrec_srcs.sl_numsrc = 0; 2915 } else { 2916 rp->mrec_srcs.sl_numsrc = srclist->sl_numsrc; 2917 for (i = 0; i < srclist->sl_numsrc; i++) 2918 rp->mrec_srcs.sl_addr[i] = srclist->sl_addr[i]; 2919 } 2920 2921 return (rp); 2922 } 2923 2924 /* 2925 * Set up initial retransmit state. If memory cannot be allocated for 2926 * the source lists, simply create as much state as is possible; memory 2927 * allocation failures are considered one type of transient error that 2928 * the retransmissions are designed to overcome (and if they aren't 2929 * transient, there are bigger problems than failing to notify the 2930 * router about multicast group membership state changes). 2931 */ 2932 static void 2933 mcast_init_rtx(ill_t *ill, rtx_state_t *rtxp, mcast_record_t rtype, 2934 slist_t *flist) 2935 { 2936 /* 2937 * There are only three possibilities for rtype: 2938 * New join, transition from INCLUDE {} to INCLUDE {flist} 2939 * => rtype is ALLOW_NEW_SOURCES 2940 * New join, transition from INCLUDE {} to EXCLUDE {flist} 2941 * => rtype is CHANGE_TO_EXCLUDE 2942 * State change that involves a filter mode change 2943 * => rtype is either CHANGE_TO_INCLUDE or CHANGE_TO_EXCLUDE 2944 */ 2945 ASSERT(rtype == CHANGE_TO_EXCLUDE || rtype == CHANGE_TO_INCLUDE || 2946 rtype == ALLOW_NEW_SOURCES); 2947 2948 rtxp->rtx_cnt = ill->ill_mcast_rv; 2949 2950 switch (rtype) { 2951 case CHANGE_TO_EXCLUDE: 2952 rtxp->rtx_fmode_cnt = ill->ill_mcast_rv; 2953 CLEAR_SLIST(rtxp->rtx_allow); 2954 COPY_SLIST(flist, rtxp->rtx_block); 2955 break; 2956 case ALLOW_NEW_SOURCES: 2957 case CHANGE_TO_INCLUDE: 2958 rtxp->rtx_fmode_cnt = 2959 rtype == ALLOW_NEW_SOURCES ? 0 : ill->ill_mcast_rv; 2960 CLEAR_SLIST(rtxp->rtx_block); 2961 COPY_SLIST(flist, rtxp->rtx_allow); 2962 break; 2963 } 2964 } 2965 2966 /* 2967 * The basic strategy here, as extrapolated from RFC 3810 section 6.1 and 2968 * RFC 3376 section 5.1, covers three cases: 2969 * * The current state change is a filter mode change 2970 * Set filter mode retransmit counter; set retransmit allow or 2971 * block list to new source list as appropriate, and clear the 2972 * retransmit list that was not set; send TO_IN or TO_EX with 2973 * new source list. 2974 * * The current state change is a source list change, but the filter 2975 * mode retransmit counter is > 0 2976 * Decrement filter mode retransmit counter; set retransmit 2977 * allow or block list to new source list as appropriate, 2978 * and clear the retransmit list that was not set; send TO_IN 2979 * or TO_EX with new source list. 2980 * * The current state change is a source list change, and the filter 2981 * mode retransmit counter is 0. 2982 * Merge existing rtx allow and block lists with new state: 2983 * rtx_allow = (new allow + rtx_allow) - new block 2984 * rtx_block = (new block + rtx_block) - new allow 2985 * Send ALLOW and BLOCK records for new retransmit lists; 2986 * decrement retransmit counter. 2987 * 2988 * As is the case for mcast_init_rtx(), memory allocation failures are 2989 * acceptable; we just create as much state as we can. 2990 */ 2991 static mrec_t * 2992 mcast_merge_rtx(ilm_t *ilm, mrec_t *mreclist, slist_t *flist) 2993 { 2994 ill_t *ill; 2995 rtx_state_t *rtxp = &ilm->ilm_rtx; 2996 mcast_record_t txtype; 2997 mrec_t *rp, *rpnext, *rtnmrec; 2998 boolean_t ovf; 2999 3000 ill = (ilm->ilm_ill == NULL ? ilm->ilm_ipif->ipif_ill : ilm->ilm_ill); 3001 3002 if (mreclist == NULL) 3003 return (mreclist); 3004 3005 /* 3006 * A filter mode change is indicated by a single mrec, which is 3007 * either TO_IN or TO_EX. In this case, we just need to set new 3008 * retransmit state as if this were an initial join. There is 3009 * no change to the mrec list. 3010 */ 3011 if (mreclist->mrec_type == CHANGE_TO_INCLUDE || 3012 mreclist->mrec_type == CHANGE_TO_EXCLUDE) { 3013 mcast_init_rtx(ill, rtxp, mreclist->mrec_type, 3014 &mreclist->mrec_srcs); 3015 return (mreclist); 3016 } 3017 3018 /* 3019 * Only the source list has changed 3020 */ 3021 rtxp->rtx_cnt = ill->ill_mcast_rv; 3022 if (rtxp->rtx_fmode_cnt > 0) { 3023 /* but we're still sending filter mode change reports */ 3024 rtxp->rtx_fmode_cnt--; 3025 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 3026 CLEAR_SLIST(rtxp->rtx_block); 3027 COPY_SLIST(flist, rtxp->rtx_allow); 3028 txtype = CHANGE_TO_INCLUDE; 3029 } else { 3030 CLEAR_SLIST(rtxp->rtx_allow); 3031 COPY_SLIST(flist, rtxp->rtx_block); 3032 txtype = CHANGE_TO_EXCLUDE; 3033 } 3034 /* overwrite first mrec with new info */ 3035 mreclist->mrec_type = txtype; 3036 l_copy(flist, &mreclist->mrec_srcs); 3037 /* then free any remaining mrecs */ 3038 for (rp = mreclist->mrec_next; rp != NULL; rp = rpnext) { 3039 rpnext = rp->mrec_next; 3040 mi_free(rp); 3041 } 3042 mreclist->mrec_next = NULL; 3043 rtnmrec = mreclist; 3044 } else { 3045 mrec_t *allow_mrec, *block_mrec; 3046 /* 3047 * Just send the source change reports; but we need to 3048 * recalculate the ALLOW and BLOCK lists based on previous 3049 * state and new changes. 3050 */ 3051 rtnmrec = mreclist; 3052 allow_mrec = block_mrec = NULL; 3053 for (rp = mreclist; rp != NULL; rp = rp->mrec_next) { 3054 ASSERT(rp->mrec_type == ALLOW_NEW_SOURCES || 3055 rp->mrec_type == BLOCK_OLD_SOURCES); 3056 if (rp->mrec_type == ALLOW_NEW_SOURCES) 3057 allow_mrec = rp; 3058 else 3059 block_mrec = rp; 3060 } 3061 /* 3062 * Perform calculations: 3063 * new_allow = mrec_allow + (rtx_allow - mrec_block) 3064 * new_block = mrec_block + (rtx_block - mrec_allow) 3065 * 3066 * Each calc requires two steps, for example: 3067 * rtx_allow = rtx_allow - mrec_block; 3068 * new_allow = mrec_allow + rtx_allow; 3069 * 3070 * Store results in mrec lists, and then copy into rtx lists. 3071 * We do it in this order in case the rtx list hasn't been 3072 * alloc'd yet; if it hasn't and our alloc fails, that's okay, 3073 * Overflows are also okay. 3074 */ 3075 if (block_mrec != NULL) { 3076 l_difference_in_a(rtxp->rtx_allow, 3077 &block_mrec->mrec_srcs); 3078 } 3079 if (allow_mrec != NULL) { 3080 l_difference_in_a(rtxp->rtx_block, 3081 &allow_mrec->mrec_srcs); 3082 l_union_in_a(&allow_mrec->mrec_srcs, rtxp->rtx_allow, 3083 &ovf); 3084 } 3085 if (block_mrec != NULL) { 3086 l_union_in_a(&block_mrec->mrec_srcs, rtxp->rtx_block, 3087 &ovf); 3088 COPY_SLIST(&block_mrec->mrec_srcs, rtxp->rtx_block); 3089 } else { 3090 rtnmrec = mcast_bldmrec(BLOCK_OLD_SOURCES, 3091 &ilm->ilm_v6addr, rtxp->rtx_block, allow_mrec); 3092 } 3093 if (allow_mrec != NULL) { 3094 COPY_SLIST(&allow_mrec->mrec_srcs, rtxp->rtx_allow); 3095 } else { 3096 rtnmrec = mcast_bldmrec(ALLOW_NEW_SOURCES, 3097 &ilm->ilm_v6addr, rtxp->rtx_allow, block_mrec); 3098 } 3099 } 3100 3101 return (rtnmrec); 3102 } 3103 3104 /* 3105 * Convenience routine to signal the restart-timer thread. 3106 */ 3107 static void 3108 mcast_signal_restart_thread(ip_stack_t *ipst) 3109 { 3110 mutex_enter(&ipst->ips_mrt_lock); 3111 ipst->ips_mrt_flags |= IP_MRT_RUN; 3112 cv_signal(&ipst->ips_mrt_cv); 3113 mutex_exit(&ipst->ips_mrt_lock); 3114 } 3115 3116 /* 3117 * Thread to restart IGMP/MLD timers. See the comment in igmp_joingroup() for 3118 * the story behind this unfortunate thread. 3119 */ 3120 void 3121 mcast_restart_timers_thread(ip_stack_t *ipst) 3122 { 3123 int next; 3124 char name[64]; 3125 callb_cpr_t cprinfo; 3126 3127 (void) snprintf(name, sizeof (name), "mcast_restart_timers_thread_%d", 3128 ipst->ips_netstack->netstack_stackid); 3129 CALLB_CPR_INIT(&cprinfo, &ipst->ips_mrt_lock, callb_generic_cpr, name); 3130 3131 for (;;) { 3132 mutex_enter(&ipst->ips_mrt_lock); 3133 while (!(ipst->ips_mrt_flags & (IP_MRT_STOP|IP_MRT_RUN))) { 3134 CALLB_CPR_SAFE_BEGIN(&cprinfo); 3135 cv_wait(&ipst->ips_mrt_cv, &ipst->ips_mrt_lock); 3136 CALLB_CPR_SAFE_END(&cprinfo, &ipst->ips_mrt_lock); 3137 } 3138 if (ipst->ips_mrt_flags & IP_MRT_STOP) 3139 break; 3140 ipst->ips_mrt_flags &= ~IP_MRT_RUN; 3141 mutex_exit(&ipst->ips_mrt_lock); 3142 3143 mutex_enter(&ipst->ips_igmp_timer_lock); 3144 next = ipst->ips_igmp_deferred_next; 3145 ipst->ips_igmp_deferred_next = INFINITY; 3146 mutex_exit(&ipst->ips_igmp_timer_lock); 3147 3148 if (next != INFINITY) 3149 igmp_start_timers(next, ipst); 3150 3151 mutex_enter(&ipst->ips_mld_timer_lock); 3152 next = ipst->ips_mld_deferred_next; 3153 ipst->ips_mld_deferred_next = INFINITY; 3154 mutex_exit(&ipst->ips_mld_timer_lock); 3155 if (next != INFINITY) 3156 mld_start_timers(next, ipst); 3157 } 3158 3159 ipst->ips_mrt_flags |= IP_MRT_DONE; 3160 cv_signal(&ipst->ips_mrt_done_cv); 3161 CALLB_CPR_EXIT(&cprinfo); /* drops ips_mrt_lock */ 3162 thread_exit(); 3163 } 3164