1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * Internet Group Management Protocol (IGMP) routines. 31 * Multicast Listener Discovery Protocol (MLD) routines. 32 * 33 * Written by Steve Deering, Stanford, May 1988. 34 * Modified by Rosen Sharma, Stanford, Aug 1994. 35 * Modified by Bill Fenner, Xerox PARC, Feb. 1995. 36 * 37 * MULTICAST 3.5.1.1 38 */ 39 40 #include <sys/types.h> 41 #include <sys/stream.h> 42 #include <sys/stropts.h> 43 #include <sys/strlog.h> 44 #include <sys/strsun.h> 45 #include <sys/systm.h> 46 #include <sys/ddi.h> 47 #include <sys/sunddi.h> 48 #include <sys/cmn_err.h> 49 #include <sys/atomic.h> 50 #include <sys/zone.h> 51 52 #include <sys/param.h> 53 #include <sys/socket.h> 54 #include <inet/ipclassifier.h> 55 #include <net/if.h> 56 #include <net/route.h> 57 #include <netinet/in.h> 58 #include <netinet/igmp_var.h> 59 #include <netinet/ip6.h> 60 #include <netinet/icmp6.h> 61 62 #include <inet/common.h> 63 #include <inet/mi.h> 64 #include <inet/nd.h> 65 #include <inet/ip.h> 66 #include <inet/ip6.h> 67 #include <inet/ip_multi.h> 68 #include <inet/ip_listutils.h> 69 70 #include <netinet/igmp.h> 71 #include <inet/ip_if.h> 72 #include <net/pfkeyv2.h> 73 #include <inet/ipsec_info.h> 74 75 static uint_t igmp_query_in(ipha_t *ipha, igmpa_t *igmpa, ill_t *ill); 76 static uint_t igmpv3_query_in(igmp3qa_t *igmp3qa, ill_t *ill, int igmplen); 77 static uint_t mld_query_in(mld_hdr_t *mldh, ill_t *ill); 78 static uint_t mldv2_query_in(mld2q_t *mld2q, ill_t *ill, int mldlen); 79 static void igmp_sendpkt(ilm_t *ilm, uchar_t type, ipaddr_t addr); 80 static void mld_sendpkt(ilm_t *ilm, uchar_t type, const in6_addr_t *v6addr); 81 static void igmpv3_sendrpt(ipif_t *ipif, mrec_t *reclist); 82 static void mldv2_sendrpt(ill_t *ill, mrec_t *reclist); 83 static mrec_t *mcast_bldmrec(mcast_record_t type, in6_addr_t *grp, 84 slist_t *srclist, mrec_t *next); 85 static void mcast_init_rtx(ill_t *ill, rtx_state_t *rtxp, 86 mcast_record_t rtype, slist_t *flist); 87 static mrec_t *mcast_merge_rtx(ilm_t *ilm, mrec_t *rp, slist_t *flist); 88 89 90 /* 91 * Macros used to do timer len conversions. Timer values are always 92 * stored and passed to the timer functions as milliseconds; but the 93 * default values and values from the wire may not be. 94 * 95 * And yes, it's obscure, but decisecond is easier to abbreviate than 96 * "tenths of a second". 97 */ 98 #define DSEC_TO_MSEC(dsec) ((dsec) * 100) 99 #define SEC_TO_MSEC(sec) ((sec) * 1000) 100 101 /* 102 * A running timer (scheduled thru timeout) can be cancelled if another 103 * timer with a shorter timeout value is scheduled before it has timed 104 * out. When the shorter timer expires, the original timer is updated 105 * to account for the time elapsed while the shorter timer ran; but this 106 * does not take into account the amount of time already spent in timeout 107 * state before being preempted by the shorter timer, that is the time 108 * interval between time scheduled to time cancelled. This can cause 109 * delays in sending out multicast membership reports. To resolve this 110 * problem, wallclock time (absolute time) is used instead of deltas 111 * (relative time) to track timers. 112 * 113 * The MACRO below gets the lbolt value, used for proper timer scheduling 114 * and firing. Therefore multicast membership reports are sent on time. 115 * The timer does not exactly fire at the time it was scehduled to fire, 116 * there is a difference of a few milliseconds observed. An offset is used 117 * to take care of the difference. 118 */ 119 120 #define CURRENT_MSTIME ((uint_t)TICK_TO_MSEC(ddi_get_lbolt())) 121 #define CURRENT_OFFSET (999) 122 123 /* 124 * The first multicast join will trigger the igmp timers / mld timers 125 * The unit for next is milliseconds. 126 */ 127 void 128 igmp_start_timers(unsigned next, ip_stack_t *ipst) 129 { 130 int time_left; 131 int ret; 132 133 ASSERT(next != 0 && next != INFINITY); 134 135 mutex_enter(&ipst->ips_igmp_timer_lock); 136 137 if (ipst->ips_igmp_timer_setter_active) { 138 /* 139 * Serialize timer setters, one at a time. If the 140 * timer is currently being set by someone, 141 * just record the next time when it has to be 142 * invoked and return. The current setter will 143 * take care. 144 */ 145 ipst->ips_igmp_time_to_next = 146 MIN(ipst->ips_igmp_time_to_next, next); 147 mutex_exit(&ipst->ips_igmp_timer_lock); 148 return; 149 } else { 150 ipst->ips_igmp_timer_setter_active = B_TRUE; 151 } 152 if (ipst->ips_igmp_timeout_id == 0) { 153 /* 154 * The timer is inactive. We need to start a timer 155 */ 156 ipst->ips_igmp_time_to_next = next; 157 ipst->ips_igmp_timeout_id = timeout(igmp_timeout_handler, 158 (void *)ipst, MSEC_TO_TICK(ipst->ips_igmp_time_to_next)); 159 ipst->ips_igmp_timer_scheduled_last = ddi_get_lbolt(); 160 ipst->ips_igmp_timer_setter_active = B_FALSE; 161 mutex_exit(&ipst->ips_igmp_timer_lock); 162 return; 163 } 164 165 /* 166 * The timer was scheduled sometime back for firing in 167 * 'igmp_time_to_next' ms and is active. We need to 168 * reschedule the timeout if the new 'next' will happen 169 * earlier than the currently scheduled timeout 170 */ 171 time_left = ipst->ips_igmp_timer_scheduled_last + 172 MSEC_TO_TICK(ipst->ips_igmp_time_to_next) - ddi_get_lbolt(); 173 if (time_left < MSEC_TO_TICK(next)) { 174 ipst->ips_igmp_timer_setter_active = B_FALSE; 175 mutex_exit(&ipst->ips_igmp_timer_lock); 176 return; 177 } 178 179 mutex_exit(&ipst->ips_igmp_timer_lock); 180 ret = untimeout(ipst->ips_igmp_timeout_id); 181 mutex_enter(&ipst->ips_igmp_timer_lock); 182 /* 183 * The timeout was cancelled, or the timeout handler 184 * completed, while we were blocked in the untimeout. 185 * No other thread could have set the timer meanwhile 186 * since we serialized all the timer setters. Thus 187 * no timer is currently active nor executing nor will 188 * any timer fire in the future. We start the timer now 189 * if needed. 190 */ 191 if (ret == -1) { 192 ASSERT(ipst->ips_igmp_timeout_id == 0); 193 } else { 194 ASSERT(ipst->ips_igmp_timeout_id != 0); 195 ipst->ips_igmp_timeout_id = 0; 196 } 197 if (ipst->ips_igmp_time_to_next != 0) { 198 ipst->ips_igmp_time_to_next = 199 MIN(ipst->ips_igmp_time_to_next, next); 200 ipst->ips_igmp_timeout_id = timeout(igmp_timeout_handler, 201 (void *)ipst, MSEC_TO_TICK(ipst->ips_igmp_time_to_next)); 202 ipst->ips_igmp_timer_scheduled_last = ddi_get_lbolt(); 203 } 204 ipst->ips_igmp_timer_setter_active = B_FALSE; 205 mutex_exit(&ipst->ips_igmp_timer_lock); 206 } 207 208 /* 209 * mld_start_timers: 210 * The unit for next is milliseconds. 211 */ 212 void 213 mld_start_timers(unsigned next, ip_stack_t *ipst) 214 { 215 int time_left; 216 int ret; 217 218 ASSERT(next != 0 && next != INFINITY); 219 220 mutex_enter(&ipst->ips_mld_timer_lock); 221 if (ipst->ips_mld_timer_setter_active) { 222 /* 223 * Serialize timer setters, one at a time. If the 224 * timer is currently being set by someone, 225 * just record the next time when it has to be 226 * invoked and return. The current setter will 227 * take care. 228 */ 229 ipst->ips_mld_time_to_next = 230 MIN(ipst->ips_mld_time_to_next, next); 231 mutex_exit(&ipst->ips_mld_timer_lock); 232 return; 233 } else { 234 ipst->ips_mld_timer_setter_active = B_TRUE; 235 } 236 if (ipst->ips_mld_timeout_id == 0) { 237 /* 238 * The timer is inactive. We need to start a timer 239 */ 240 ipst->ips_mld_time_to_next = next; 241 ipst->ips_mld_timeout_id = timeout(mld_timeout_handler, 242 (void *)ipst, MSEC_TO_TICK(ipst->ips_mld_time_to_next)); 243 ipst->ips_mld_timer_scheduled_last = ddi_get_lbolt(); 244 ipst->ips_mld_timer_setter_active = B_FALSE; 245 mutex_exit(&ipst->ips_mld_timer_lock); 246 return; 247 } 248 249 /* 250 * The timer was scheduled sometime back for firing in 251 * 'igmp_time_to_next' ms and is active. We need to 252 * reschedule the timeout if the new 'next' will happen 253 * earlier than the currently scheduled timeout 254 */ 255 time_left = ipst->ips_mld_timer_scheduled_last + 256 MSEC_TO_TICK(ipst->ips_mld_time_to_next) - ddi_get_lbolt(); 257 if (time_left < MSEC_TO_TICK(next)) { 258 ipst->ips_mld_timer_setter_active = B_FALSE; 259 mutex_exit(&ipst->ips_mld_timer_lock); 260 return; 261 } 262 263 mutex_exit(&ipst->ips_mld_timer_lock); 264 ret = untimeout(ipst->ips_mld_timeout_id); 265 mutex_enter(&ipst->ips_mld_timer_lock); 266 /* 267 * The timeout was cancelled, or the timeout handler 268 * completed, while we were blocked in the untimeout. 269 * No other thread could have set the timer meanwhile 270 * since we serialized all the timer setters. Thus 271 * no timer is currently active nor executing nor will 272 * any timer fire in the future. We start the timer now 273 * if needed. 274 */ 275 if (ret == -1) { 276 ASSERT(ipst->ips_mld_timeout_id == 0); 277 } else { 278 ASSERT(ipst->ips_mld_timeout_id != 0); 279 ipst->ips_mld_timeout_id = 0; 280 } 281 if (ipst->ips_mld_time_to_next != 0) { 282 ipst->ips_mld_time_to_next = 283 MIN(ipst->ips_mld_time_to_next, next); 284 ipst->ips_mld_timeout_id = timeout(mld_timeout_handler, 285 (void *)ipst, MSEC_TO_TICK(ipst->ips_mld_time_to_next)); 286 ipst->ips_mld_timer_scheduled_last = ddi_get_lbolt(); 287 } 288 ipst->ips_mld_timer_setter_active = B_FALSE; 289 mutex_exit(&ipst->ips_mld_timer_lock); 290 } 291 292 /* 293 * igmp_input: 294 * Return NULL for a bad packet that is discarded here. 295 * Return mp if the message is OK and should be handed to "raw" receivers. 296 * Callers of igmp_input() may need to reinitialize variables that were copied 297 * from the mblk as this calls pullupmsg(). 298 */ 299 /* ARGSUSED */ 300 mblk_t * 301 igmp_input(queue_t *q, mblk_t *mp, ill_t *ill) 302 { 303 igmpa_t *igmpa; 304 ipha_t *ipha = (ipha_t *)(mp->b_rptr); 305 int iphlen, igmplen, mblklen; 306 ilm_t *ilm; 307 uint32_t src, dst; 308 uint32_t group; 309 uint_t next; 310 ipif_t *ipif; 311 ip_stack_t *ipst; 312 313 ASSERT(ill != NULL); 314 ASSERT(!ill->ill_isv6); 315 ipst = ill->ill_ipst; 316 ++ipst->ips_igmpstat.igps_rcv_total; 317 318 mblklen = MBLKL(mp); 319 if (mblklen < 1 || mblklen < (iphlen = IPH_HDR_LENGTH(ipha))) { 320 ++ipst->ips_igmpstat.igps_rcv_tooshort; 321 goto bad_pkt; 322 } 323 igmplen = ntohs(ipha->ipha_length) - iphlen; 324 /* 325 * Since msg sizes are more variable with v3, just pullup the 326 * whole thing now. 327 */ 328 if (MBLKL(mp) < (igmplen + iphlen)) { 329 mblk_t *mp1; 330 if ((mp1 = msgpullup(mp, -1)) == NULL) { 331 ++ipst->ips_igmpstat.igps_rcv_tooshort; 332 goto bad_pkt; 333 } 334 freemsg(mp); 335 mp = mp1; 336 ipha = (ipha_t *)(mp->b_rptr); 337 } 338 339 /* 340 * Validate lengths 341 */ 342 if (igmplen < IGMP_MINLEN) { 343 ++ipst->ips_igmpstat.igps_rcv_tooshort; 344 goto bad_pkt; 345 } 346 /* 347 * Validate checksum 348 */ 349 if (IP_CSUM(mp, iphlen, 0)) { 350 ++ipst->ips_igmpstat.igps_rcv_badsum; 351 goto bad_pkt; 352 } 353 354 igmpa = (igmpa_t *)(&mp->b_rptr[iphlen]); 355 src = ipha->ipha_src; 356 dst = ipha->ipha_dst; 357 if (ip_debug > 1) 358 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 359 "igmp_input: src 0x%x, dst 0x%x on %s\n", 360 (int)ntohl(src), (int)ntohl(dst), 361 ill->ill_name); 362 363 switch (igmpa->igmpa_type) { 364 case IGMP_MEMBERSHIP_QUERY: 365 /* 366 * packet length differentiates between v1/v2 and v3 367 * v1/v2 should be exactly 8 octets long; v3 is >= 12 368 */ 369 if ((igmplen == IGMP_MINLEN) || 370 (ipst->ips_igmp_max_version <= IGMP_V2_ROUTER)) { 371 next = igmp_query_in(ipha, igmpa, ill); 372 } else if (igmplen >= IGMP_V3_QUERY_MINLEN) { 373 next = igmpv3_query_in((igmp3qa_t *)igmpa, ill, 374 igmplen); 375 } else { 376 ++ipst->ips_igmpstat.igps_rcv_tooshort; 377 goto bad_pkt; 378 } 379 if (next == 0) 380 goto bad_pkt; 381 382 if (next != INFINITY) 383 igmp_start_timers(next, ipst); 384 385 break; 386 387 case IGMP_V1_MEMBERSHIP_REPORT: 388 case IGMP_V2_MEMBERSHIP_REPORT: 389 /* 390 * For fast leave to work, we have to know that we are the 391 * last person to send a report for this group. Reports 392 * generated by us are looped back since we could potentially 393 * be a multicast router, so discard reports sourced by me. 394 */ 395 mutex_enter(&ill->ill_lock); 396 for (ipif = ill->ill_ipif; ipif != NULL; 397 ipif = ipif->ipif_next) { 398 if (ipif->ipif_lcl_addr == src) { 399 if (ip_debug > 1) { 400 (void) mi_strlog(ill->ill_rq, 401 1, 402 SL_TRACE, 403 "igmp_input: we are only " 404 "member src 0x%x ipif_local 0x%x", 405 (int)ntohl(src), 406 (int) 407 ntohl(ipif->ipif_lcl_addr)); 408 } 409 mutex_exit(&ill->ill_lock); 410 return (mp); 411 } 412 } 413 mutex_exit(&ill->ill_lock); 414 415 ++ipst->ips_igmpstat.igps_rcv_reports; 416 group = igmpa->igmpa_group; 417 if (!CLASSD(group)) { 418 ++ipst->ips_igmpstat.igps_rcv_badreports; 419 goto bad_pkt; 420 } 421 422 /* 423 * KLUDGE: if the IP source address of the report has an 424 * unspecified (i.e., zero) subnet number, as is allowed for 425 * a booting host, replace it with the correct subnet number 426 * so that a process-level multicast routing demon can 427 * determine which subnet it arrived from. This is necessary 428 * to compensate for the lack of any way for a process to 429 * determine the arrival interface of an incoming packet. 430 * 431 * Requires that a copy of *this* message it passed up 432 * to the raw interface which is done by our caller. 433 */ 434 if ((src & htonl(0xFF000000U)) == 0) { /* Minimum net mask */ 435 /* Pick the first ipif on this ill */ 436 mutex_enter(&ill->ill_lock); 437 src = ill->ill_ipif->ipif_subnet; 438 mutex_exit(&ill->ill_lock); 439 ip1dbg(("igmp_input: changed src to 0x%x\n", 440 (int)ntohl(src))); 441 ipha->ipha_src = src; 442 } 443 444 /* 445 * If we belong to the group being reported, and 446 * we are a 'Delaying member' in the RFC terminology, 447 * stop our timer for that group and 'clear flag' i.e. 448 * mark as IGMP_OTHERMEMBER. Do this for all logical 449 * interfaces on the given physical interface. 450 */ 451 mutex_enter(&ill->ill_lock); 452 for (ipif = ill->ill_ipif; ipif != NULL; 453 ipif = ipif->ipif_next) { 454 ilm = ilm_lookup_ipif(ipif, group); 455 if (ilm != NULL) { 456 ++ipst->ips_igmpstat.igps_rcv_ourreports; 457 ilm->ilm_timer = INFINITY; 458 ilm->ilm_state = IGMP_OTHERMEMBER; 459 } 460 } /* for */ 461 mutex_exit(&ill->ill_lock); 462 break; 463 464 case IGMP_V3_MEMBERSHIP_REPORT: 465 /* 466 * Currently nothing to do here; IGMP router is not 467 * implemented in ip, and v3 hosts don't pay attention 468 * to membership reports. 469 */ 470 break; 471 } 472 /* 473 * Pass all valid IGMP packets up to any process(es) listening 474 * on a raw IGMP socket. Do not free the packet. 475 */ 476 return (mp); 477 478 bad_pkt: 479 freemsg(mp); 480 return (NULL); 481 } 482 483 static uint_t 484 igmp_query_in(ipha_t *ipha, igmpa_t *igmpa, ill_t *ill) 485 { 486 ilm_t *ilm; 487 int timer; 488 uint_t next, current; 489 ip_stack_t *ipst; 490 491 ipst = ill->ill_ipst; 492 ++ipst->ips_igmpstat.igps_rcv_queries; 493 494 /* 495 * In the IGMPv2 specification, there are 3 states and a flag. 496 * 497 * In Non-Member state, we simply don't have a membership record. 498 * In Delaying Member state, our timer is running (ilm->ilm_timer 499 * < INFINITY). In Idle Member state, our timer is not running 500 * (ilm->ilm_timer == INFINITY). 501 * 502 * The flag is ilm->ilm_state, it is set to IGMP_OTHERMEMBER if 503 * we have heard a report from another member, or IGMP_IREPORTEDLAST 504 * if I sent the last report. 505 */ 506 if ((igmpa->igmpa_code == 0) || 507 (ipst->ips_igmp_max_version == IGMP_V1_ROUTER)) { 508 /* 509 * Query from an old router. 510 * Remember that the querier on this interface is old, 511 * and set the timer to the value in RFC 1112. 512 */ 513 514 515 mutex_enter(&ill->ill_lock); 516 ill->ill_mcast_v1_time = 0; 517 ill->ill_mcast_v1_tset = 1; 518 if (ill->ill_mcast_type != IGMP_V1_ROUTER) { 519 ip1dbg(("Received IGMPv1 Query on %s, switching mode " 520 "to IGMP_V1_ROUTER\n", ill->ill_name)); 521 atomic_add_16(&ill->ill_ifptr->illif_mcast_v1, 1); 522 ill->ill_mcast_type = IGMP_V1_ROUTER; 523 } 524 mutex_exit(&ill->ill_lock); 525 526 timer = SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY); 527 528 if (ipha->ipha_dst != htonl(INADDR_ALLHOSTS_GROUP) || 529 igmpa->igmpa_group != 0) { 530 ++ipst->ips_igmpstat.igps_rcv_badqueries; 531 return (0); 532 } 533 534 } else { 535 in_addr_t group; 536 537 /* 538 * Query from a new router 539 * Simply do a validity check 540 */ 541 group = igmpa->igmpa_group; 542 if (group != 0 && (!CLASSD(group))) { 543 ++ipst->ips_igmpstat.igps_rcv_badqueries; 544 return (0); 545 } 546 547 /* 548 * Switch interface state to v2 on receipt of a v2 query 549 * ONLY IF current state is v3. Let things be if current 550 * state if v1 but do reset the v2-querier-present timer. 551 */ 552 mutex_enter(&ill->ill_lock); 553 if (ill->ill_mcast_type == IGMP_V3_ROUTER) { 554 ip1dbg(("Received IGMPv2 Query on %s, switching mode " 555 "to IGMP_V2_ROUTER", ill->ill_name)); 556 atomic_add_16(&ill->ill_ifptr->illif_mcast_v2, 1); 557 ill->ill_mcast_type = IGMP_V2_ROUTER; 558 } 559 ill->ill_mcast_v2_time = 0; 560 ill->ill_mcast_v2_tset = 1; 561 mutex_exit(&ill->ill_lock); 562 563 timer = DSEC_TO_MSEC((int)igmpa->igmpa_code); 564 } 565 566 if (ip_debug > 1) { 567 mutex_enter(&ill->ill_lock); 568 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 569 "igmp_input: TIMER = igmp_code %d igmp_type 0x%x", 570 (int)ntohs(igmpa->igmpa_code), 571 (int)ntohs(igmpa->igmpa_type)); 572 mutex_exit(&ill->ill_lock); 573 } 574 575 /* 576 * -Start the timers in all of our membership records 577 * for the physical interface on which the query 578 * arrived, excluding those that belong to the "all 579 * hosts" group (224.0.0.1). 580 * 581 * -Restart any timer that is already running but has 582 * a value longer than the requested timeout. 583 * 584 * -Use the value specified in the query message as 585 * the maximum timeout. 586 */ 587 next = (unsigned)INFINITY; 588 mutex_enter(&ill->ill_lock); 589 590 current = CURRENT_MSTIME; 591 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 592 593 /* 594 * A multicast router joins INADDR_ANY address 595 * to enable promiscuous reception of all 596 * mcasts from the interface. This INADDR_ANY 597 * is stored in the ilm_v6addr as V6 unspec addr 598 */ 599 if (!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr)) 600 continue; 601 if (ilm->ilm_addr == htonl(INADDR_ANY)) 602 continue; 603 if (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP) && 604 (igmpa->igmpa_group == 0) || 605 (igmpa->igmpa_group == ilm->ilm_addr)) { 606 if (ilm->ilm_timer > timer) { 607 MCAST_RANDOM_DELAY(ilm->ilm_timer, timer); 608 if (ilm->ilm_timer < next) 609 next = ilm->ilm_timer; 610 ilm->ilm_timer += current; 611 } 612 } 613 } 614 mutex_exit(&ill->ill_lock); 615 616 return (next); 617 } 618 619 static uint_t 620 igmpv3_query_in(igmp3qa_t *igmp3qa, ill_t *ill, int igmplen) 621 { 622 uint_t i, next, mrd, qqi, timer, delay, numsrc; 623 uint_t current; 624 ilm_t *ilm; 625 ipaddr_t *src_array; 626 uint8_t qrv; 627 ip_stack_t *ipst; 628 629 ipst = ill->ill_ipst; 630 /* make sure numsrc matches packet size */ 631 numsrc = ntohs(igmp3qa->igmp3qa_numsrc); 632 if (igmplen < IGMP_V3_QUERY_MINLEN + (numsrc * sizeof (ipaddr_t))) { 633 ++ipst->ips_igmpstat.igps_rcv_tooshort; 634 return (0); 635 } 636 src_array = (ipaddr_t *)&igmp3qa[1]; 637 638 ++ipst->ips_igmpstat.igps_rcv_queries; 639 640 if ((mrd = (uint_t)igmp3qa->igmp3qa_mxrc) >= IGMP_V3_MAXRT_FPMIN) { 641 uint_t hdrval, mant, exp; 642 hdrval = (uint_t)igmp3qa->igmp3qa_mxrc; 643 mant = hdrval & IGMP_V3_MAXRT_MANT_MASK; 644 exp = (hdrval & IGMP_V3_MAXRT_EXP_MASK) >> 4; 645 mrd = (mant | 0x10) << (exp + 3); 646 } 647 if (mrd == 0) 648 mrd = MCAST_DEF_QUERY_RESP_INTERVAL; 649 timer = DSEC_TO_MSEC(mrd); 650 MCAST_RANDOM_DELAY(delay, timer); 651 next = (unsigned)INFINITY; 652 current = CURRENT_MSTIME; 653 654 if ((qrv = igmp3qa->igmp3qa_sqrv & IGMP_V3_RV_MASK) == 0) 655 ill->ill_mcast_rv = MCAST_DEF_ROBUSTNESS; 656 else 657 ill->ill_mcast_rv = qrv; 658 659 if ((qqi = (uint_t)igmp3qa->igmp3qa_qqic) >= IGMP_V3_QQI_FPMIN) { 660 uint_t hdrval, mant, exp; 661 hdrval = (uint_t)igmp3qa->igmp3qa_qqic; 662 mant = hdrval & IGMP_V3_QQI_MANT_MASK; 663 exp = (hdrval & IGMP_V3_QQI_EXP_MASK) >> 4; 664 qqi = (mant | 0x10) << (exp + 3); 665 } 666 ill->ill_mcast_qi = (qqi == 0) ? MCAST_DEF_QUERY_INTERVAL : qqi; 667 668 /* 669 * If we have a pending general query response that's scheduled 670 * sooner than the delay we calculated for this response, then 671 * no action is required (RFC3376 section 5.2 rule 1) 672 */ 673 mutex_enter(&ill->ill_lock); 674 if (ill->ill_global_timer < (current + delay)) { 675 mutex_exit(&ill->ill_lock); 676 return (next); 677 } 678 mutex_exit(&ill->ill_lock); 679 680 /* 681 * Now take action depending upon query type: 682 * general, group specific, or group/source specific. 683 */ 684 if ((numsrc == 0) && (igmp3qa->igmp3qa_group == INADDR_ANY)) { 685 /* 686 * general query 687 * We know global timer is either not running or is 688 * greater than our calculated delay, so reset it to 689 * our delay (random value in range [0, response time]). 690 */ 691 mutex_enter(&ill->ill_lock); 692 ill->ill_global_timer = current + delay; 693 mutex_exit(&ill->ill_lock); 694 next = delay; 695 696 } else { 697 /* group or group/source specific query */ 698 mutex_enter(&ill->ill_lock); 699 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 700 if (!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr) || 701 (ilm->ilm_addr == htonl(INADDR_ANY)) || 702 (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) || 703 (igmp3qa->igmp3qa_group != ilm->ilm_addr)) 704 continue; 705 /* 706 * If the query is group specific or we have a 707 * pending group specific query, the response is 708 * group specific (pending sources list should be 709 * empty). Otherwise, need to update the pending 710 * sources list for the group and source specific 711 * response. 712 */ 713 if (numsrc == 0 || (ilm->ilm_timer < INFINITY && 714 SLIST_IS_EMPTY(ilm->ilm_pendsrcs))) { 715 group_query: 716 FREE_SLIST(ilm->ilm_pendsrcs); 717 ilm->ilm_pendsrcs = NULL; 718 } else { 719 boolean_t overflow; 720 slist_t *pktl; 721 if (numsrc > MAX_FILTER_SIZE || 722 (ilm->ilm_pendsrcs == NULL && 723 (ilm->ilm_pendsrcs = l_alloc()) == NULL)) { 724 /* 725 * We've been sent more sources than 726 * we can deal with; or we can't deal 727 * with a source list at all. Revert 728 * to a group specific query. 729 */ 730 goto group_query; 731 } 732 if ((pktl = l_alloc()) == NULL) 733 goto group_query; 734 pktl->sl_numsrc = numsrc; 735 for (i = 0; i < numsrc; i++) 736 IN6_IPADDR_TO_V4MAPPED(src_array[i], 737 &(pktl->sl_addr[i])); 738 l_union_in_a(ilm->ilm_pendsrcs, pktl, 739 &overflow); 740 l_free(pktl); 741 if (overflow) 742 goto group_query; 743 } 744 745 ilm->ilm_timer = (ilm->ilm_timer == INFINITY) ? 746 INFINITY : (ilm->ilm_timer - current); 747 /* choose soonest timer */ 748 ilm->ilm_timer = MIN(ilm->ilm_timer, delay); 749 if (ilm->ilm_timer < next) 750 next = ilm->ilm_timer; 751 ilm->ilm_timer += current; 752 } 753 mutex_exit(&ill->ill_lock); 754 } 755 756 return (next); 757 } 758 759 void 760 igmp_joingroup(ilm_t *ilm) 761 { 762 uint_t timer; 763 ill_t *ill; 764 ip_stack_t *ipst = ilm->ilm_ipst; 765 766 ill = ilm->ilm_ipif->ipif_ill; 767 768 ASSERT(IAM_WRITER_ILL(ill)); 769 ASSERT(ilm->ilm_ill == NULL && !ilm->ilm_ipif->ipif_isv6); 770 771 mutex_enter(&ill->ill_lock); 772 if (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) { 773 ilm->ilm_rtx.rtx_timer = INFINITY; 774 ilm->ilm_state = IGMP_OTHERMEMBER; 775 mutex_exit(&ill->ill_lock); 776 } else { 777 ip1dbg(("Querier mode %d, sending report, group %x\n", 778 ill->ill_mcast_type, htonl(ilm->ilm_addr))); 779 if (ill->ill_mcast_type == IGMP_V1_ROUTER) { 780 mutex_exit(&ill->ill_lock); 781 igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0); 782 mutex_enter(&ill->ill_lock); 783 } else if (ill->ill_mcast_type == IGMP_V2_ROUTER) { 784 mutex_exit(&ill->ill_lock); 785 igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0); 786 mutex_enter(&ill->ill_lock); 787 } else if (ill->ill_mcast_type == IGMP_V3_ROUTER) { 788 mrec_t *rp; 789 mcast_record_t rtype; 790 /* 791 * The possible state changes we need to handle here: 792 * Old State New State Report 793 * 794 * INCLUDE(0) INCLUDE(X) ALLOW(X),BLOCK(0) 795 * INCLUDE(0) EXCLUDE(X) TO_EX(X) 796 * 797 * No need to send the BLOCK(0) report; ALLOW(X) 798 * is enough. 799 */ 800 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ? 801 ALLOW_NEW_SOURCES : CHANGE_TO_EXCLUDE; 802 rp = mcast_bldmrec(rtype, &ilm->ilm_v6addr, 803 ilm->ilm_filter, NULL); 804 mutex_exit(&ill->ill_lock); 805 igmpv3_sendrpt(ilm->ilm_ipif, rp); 806 mutex_enter(&ill->ill_lock); 807 /* 808 * Set up retransmission state. Timer is set below, 809 * for both v3 and older versions. 810 */ 811 mcast_init_rtx(ill, &ilm->ilm_rtx, rtype, 812 ilm->ilm_filter); 813 } 814 815 /* Set the ilm timer value */ 816 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer, 817 SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY)); 818 timer = ilm->ilm_rtx.rtx_timer; 819 ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME; 820 ilm->ilm_state = IGMP_IREPORTEDLAST; 821 mutex_exit(&ill->ill_lock); 822 823 /* 824 * To avoid deadlock, we defer igmp_start_timers() to 825 * ipsq_exit(). See the comment in ipsq_exit() for details. 826 */ 827 mutex_enter(&ipst->ips_igmp_timer_lock); 828 ipst->ips_igmp_deferred_next = MIN(timer, 829 ipst->ips_igmp_deferred_next); 830 mutex_exit(&ipst->ips_igmp_timer_lock); 831 } 832 833 if (ip_debug > 1) { 834 (void) mi_strlog(ilm->ilm_ipif->ipif_ill->ill_rq, 1, SL_TRACE, 835 "igmp_joingroup: multicast_type %d timer %d", 836 (ilm->ilm_ipif->ipif_ill->ill_mcast_type), 837 (int)ntohl(timer)); 838 } 839 } 840 841 void 842 mld_joingroup(ilm_t *ilm) 843 { 844 uint_t timer; 845 ill_t *ill; 846 ip_stack_t *ipst = ilm->ilm_ipst; 847 848 ill = ilm->ilm_ill; 849 850 ASSERT(IAM_WRITER_ILL(ill)); 851 ASSERT(ilm->ilm_ipif == NULL && ill->ill_isv6); 852 853 mutex_enter(&ill->ill_lock); 854 if (IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr)) { 855 ilm->ilm_rtx.rtx_timer = INFINITY; 856 ilm->ilm_state = IGMP_OTHERMEMBER; 857 mutex_exit(&ill->ill_lock); 858 } else { 859 if (ill->ill_mcast_type == MLD_V1_ROUTER) { 860 mutex_exit(&ill->ill_lock); 861 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL); 862 mutex_enter(&ill->ill_lock); 863 } else { 864 mrec_t *rp; 865 mcast_record_t rtype; 866 /* 867 * The possible state changes we need to handle here: 868 * Old State New State Report 869 * 870 * INCLUDE(0) INCLUDE(X) ALLOW(X),BLOCK(0) 871 * INCLUDE(0) EXCLUDE(X) TO_EX(X) 872 * 873 * No need to send the BLOCK(0) report; ALLOW(X) 874 * is enough 875 */ 876 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ? 877 ALLOW_NEW_SOURCES : CHANGE_TO_EXCLUDE; 878 rp = mcast_bldmrec(rtype, &ilm->ilm_v6addr, 879 ilm->ilm_filter, NULL); 880 mutex_exit(&ill->ill_lock); 881 mldv2_sendrpt(ill, rp); 882 mutex_enter(&ill->ill_lock); 883 /* 884 * Set up retransmission state. Timer is set below, 885 * for both v2 and v1. 886 */ 887 mcast_init_rtx(ill, &ilm->ilm_rtx, rtype, 888 ilm->ilm_filter); 889 } 890 891 /* Set the ilm timer value */ 892 ASSERT(ill->ill_mcast_type != MLD_V2_ROUTER || 893 ilm->ilm_rtx.rtx_cnt > 0); 894 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer, 895 SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY)); 896 timer = ilm->ilm_rtx.rtx_timer; 897 ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME; 898 ilm->ilm_state = IGMP_IREPORTEDLAST; 899 mutex_exit(&ill->ill_lock); 900 901 /* 902 * To avoid deadlock, we defer mld_start_timers() to 903 * ipsq_exit(). See the comment in ipsq_exit() for details. 904 */ 905 mutex_enter(&ipst->ips_mld_timer_lock); 906 ipst->ips_mld_deferred_next = MIN(timer, 907 ipst->ips_mld_deferred_next); 908 mutex_exit(&ipst->ips_mld_timer_lock); 909 } 910 911 if (ip_debug > 1) { 912 (void) mi_strlog(ilm->ilm_ill->ill_rq, 1, SL_TRACE, 913 "mld_joingroup: multicast_type %d timer %d", 914 (ilm->ilm_ill->ill_mcast_type), 915 (int)ntohl(timer)); 916 } 917 } 918 919 void 920 igmp_leavegroup(ilm_t *ilm) 921 { 922 ill_t *ill = ilm->ilm_ipif->ipif_ill; 923 924 ASSERT(ilm->ilm_ill == NULL); 925 ASSERT(!ill->ill_isv6); 926 927 mutex_enter(&ill->ill_lock); 928 if (ilm->ilm_state == IGMP_IREPORTEDLAST && 929 ill->ill_mcast_type == IGMP_V2_ROUTER && 930 (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP))) { 931 mutex_exit(&ill->ill_lock); 932 igmp_sendpkt(ilm, IGMP_V2_LEAVE_GROUP, 933 (htonl(INADDR_ALLRTRS_GROUP))); 934 return; 935 } else if ((ill->ill_mcast_type == IGMP_V3_ROUTER) && 936 (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP))) { 937 mrec_t *rp; 938 /* 939 * The possible state changes we need to handle here: 940 * Old State New State Report 941 * 942 * INCLUDE(X) INCLUDE(0) ALLOW(0),BLOCK(X) 943 * EXCLUDE(X) INCLUDE(0) TO_IN(0) 944 * 945 * No need to send the ALLOW(0) report; BLOCK(X) is enough 946 */ 947 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 948 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr, 949 ilm->ilm_filter, NULL); 950 } else { 951 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, 952 NULL, NULL); 953 } 954 mutex_exit(&ill->ill_lock); 955 igmpv3_sendrpt(ilm->ilm_ipif, rp); 956 return; 957 } 958 mutex_exit(&ill->ill_lock); 959 } 960 961 void 962 mld_leavegroup(ilm_t *ilm) 963 { 964 ill_t *ill = ilm->ilm_ill; 965 966 ASSERT(ilm->ilm_ipif == NULL); 967 ASSERT(ill->ill_isv6); 968 969 mutex_enter(&ill->ill_lock); 970 if (ilm->ilm_state == IGMP_IREPORTEDLAST && 971 ill->ill_mcast_type == MLD_V1_ROUTER && 972 (!IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr))) { 973 mutex_exit(&ill->ill_lock); 974 mld_sendpkt(ilm, MLD_LISTENER_REDUCTION, &ipv6_all_rtrs_mcast); 975 return; 976 } else if ((ill->ill_mcast_type == MLD_V2_ROUTER) && 977 (!IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr))) { 978 mrec_t *rp; 979 /* 980 * The possible state changes we need to handle here: 981 * Old State New State Report 982 * 983 * INCLUDE(X) INCLUDE(0) ALLOW(0),BLOCK(X) 984 * EXCLUDE(X) INCLUDE(0) TO_IN(0) 985 * 986 * No need to send the ALLOW(0) report; BLOCK(X) is enough 987 */ 988 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 989 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr, 990 ilm->ilm_filter, NULL); 991 } else { 992 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, 993 NULL, NULL); 994 } 995 mutex_exit(&ill->ill_lock); 996 mldv2_sendrpt(ill, rp); 997 return; 998 } 999 mutex_exit(&ill->ill_lock); 1000 } 1001 1002 void 1003 igmp_statechange(ilm_t *ilm, mcast_record_t fmode, slist_t *flist) 1004 { 1005 ill_t *ill; 1006 mrec_t *rp; 1007 ip_stack_t *ipst = ilm->ilm_ipst; 1008 1009 ASSERT(ilm != NULL); 1010 1011 /* state change reports should only be sent if the router is v3 */ 1012 if (ilm->ilm_ipif->ipif_ill->ill_mcast_type != IGMP_V3_ROUTER) 1013 return; 1014 1015 if (ilm->ilm_ill == NULL) { 1016 ASSERT(ilm->ilm_ipif != NULL); 1017 ill = ilm->ilm_ipif->ipif_ill; 1018 } else { 1019 ill = ilm->ilm_ill; 1020 } 1021 1022 mutex_enter(&ill->ill_lock); 1023 1024 /* 1025 * Compare existing(old) state with the new state and prepare 1026 * State Change Report, according to the rules in RFC 3376: 1027 * 1028 * Old State New State State Change Report 1029 * 1030 * INCLUDE(A) INCLUDE(B) ALLOW(B-A),BLOCK(A-B) 1031 * EXCLUDE(A) EXCLUDE(B) ALLOW(A-B),BLOCK(B-A) 1032 * INCLUDE(A) EXCLUDE(B) TO_EX(B) 1033 * EXCLUDE(A) INCLUDE(B) TO_IN(B) 1034 */ 1035 1036 if (ilm->ilm_fmode == fmode) { 1037 slist_t *a_minus_b = NULL, *b_minus_a = NULL; 1038 slist_t *allow, *block; 1039 if (((a_minus_b = l_alloc()) == NULL) || 1040 ((b_minus_a = l_alloc()) == NULL)) { 1041 l_free(a_minus_b); 1042 if (ilm->ilm_fmode == MODE_IS_INCLUDE) 1043 goto send_to_ex; 1044 else 1045 goto send_to_in; 1046 } 1047 l_difference(ilm->ilm_filter, flist, a_minus_b); 1048 l_difference(flist, ilm->ilm_filter, b_minus_a); 1049 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1050 allow = b_minus_a; 1051 block = a_minus_b; 1052 } else { 1053 allow = a_minus_b; 1054 block = b_minus_a; 1055 } 1056 rp = NULL; 1057 if (!SLIST_IS_EMPTY(allow)) 1058 rp = mcast_bldmrec(ALLOW_NEW_SOURCES, &ilm->ilm_v6addr, 1059 allow, rp); 1060 if (!SLIST_IS_EMPTY(block)) 1061 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr, 1062 block, rp); 1063 l_free(a_minus_b); 1064 l_free(b_minus_a); 1065 } else if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1066 send_to_ex: 1067 rp = mcast_bldmrec(CHANGE_TO_EXCLUDE, &ilm->ilm_v6addr, flist, 1068 NULL); 1069 } else { 1070 send_to_in: 1071 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, flist, 1072 NULL); 1073 } 1074 1075 /* 1076 * Need to set up retransmission state; merge the new info with the 1077 * current state (which may be null). If the timer is not currently 1078 * running, start it (need to do a delayed start of the timer as 1079 * we're currently in the sq). 1080 */ 1081 rp = mcast_merge_rtx(ilm, rp, flist); 1082 if (ilm->ilm_rtx.rtx_timer == INFINITY) { 1083 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer, 1084 SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY)); 1085 mutex_enter(&ipst->ips_igmp_timer_lock); 1086 ipst->ips_igmp_deferred_next = MIN(ipst->ips_igmp_deferred_next, 1087 ilm->ilm_rtx.rtx_timer); 1088 ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME; 1089 mutex_exit(&ipst->ips_igmp_timer_lock); 1090 } 1091 1092 mutex_exit(&ill->ill_lock); 1093 igmpv3_sendrpt(ilm->ilm_ipif, rp); 1094 } 1095 1096 void 1097 mld_statechange(ilm_t *ilm, mcast_record_t fmode, slist_t *flist) 1098 { 1099 ill_t *ill; 1100 mrec_t *rp = NULL; 1101 ip_stack_t *ipst = ilm->ilm_ipst; 1102 1103 ASSERT(ilm != NULL); 1104 1105 ill = ilm->ilm_ill; 1106 1107 /* only need to send if we have an mldv2-capable router */ 1108 mutex_enter(&ill->ill_lock); 1109 if (ill->ill_mcast_type != MLD_V2_ROUTER) { 1110 mutex_exit(&ill->ill_lock); 1111 return; 1112 } 1113 1114 /* 1115 * Compare existing (old) state with the new state passed in 1116 * and send appropriate MLDv2 State Change Report. 1117 * 1118 * Old State New State State Change Report 1119 * 1120 * INCLUDE(A) INCLUDE(B) ALLOW(B-A),BLOCK(A-B) 1121 * EXCLUDE(A) EXCLUDE(B) ALLOW(A-B),BLOCK(B-A) 1122 * INCLUDE(A) EXCLUDE(B) TO_EX(B) 1123 * EXCLUDE(A) INCLUDE(B) TO_IN(B) 1124 */ 1125 if (ilm->ilm_fmode == fmode) { 1126 slist_t *a_minus_b = NULL, *b_minus_a = NULL; 1127 slist_t *allow, *block; 1128 if (((a_minus_b = l_alloc()) == NULL) || 1129 ((b_minus_a = l_alloc()) == NULL)) { 1130 l_free(a_minus_b); 1131 if (ilm->ilm_fmode == MODE_IS_INCLUDE) 1132 goto send_to_ex; 1133 else 1134 goto send_to_in; 1135 } 1136 l_difference(ilm->ilm_filter, flist, a_minus_b); 1137 l_difference(flist, ilm->ilm_filter, b_minus_a); 1138 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1139 allow = b_minus_a; 1140 block = a_minus_b; 1141 } else { 1142 allow = a_minus_b; 1143 block = b_minus_a; 1144 } 1145 if (!SLIST_IS_EMPTY(allow)) 1146 rp = mcast_bldmrec(ALLOW_NEW_SOURCES, &ilm->ilm_v6addr, 1147 allow, rp); 1148 if (!SLIST_IS_EMPTY(block)) 1149 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr, 1150 block, rp); 1151 l_free(a_minus_b); 1152 l_free(b_minus_a); 1153 } else if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1154 send_to_ex: 1155 rp = mcast_bldmrec(CHANGE_TO_EXCLUDE, &ilm->ilm_v6addr, flist, 1156 NULL); 1157 } else { 1158 send_to_in: 1159 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, flist, 1160 NULL); 1161 } 1162 1163 /* 1164 * Need to set up retransmission state; merge the new info with the 1165 * current state (which may be null). If the timer is not currently 1166 * running, start it (need to do a deferred start of the timer as 1167 * we're currently in the sq). 1168 */ 1169 rp = mcast_merge_rtx(ilm, rp, flist); 1170 ASSERT(ilm->ilm_rtx.rtx_cnt > 0); 1171 if (ilm->ilm_rtx.rtx_timer == INFINITY) { 1172 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer, 1173 SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY)); 1174 mutex_enter(&ipst->ips_mld_timer_lock); 1175 ipst->ips_mld_deferred_next = 1176 MIN(ipst->ips_mld_deferred_next, ilm->ilm_rtx.rtx_timer); 1177 ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME; 1178 mutex_exit(&ipst->ips_mld_timer_lock); 1179 } 1180 1181 mutex_exit(&ill->ill_lock); 1182 mldv2_sendrpt(ill, rp); 1183 } 1184 1185 uint_t 1186 igmp_timeout_handler_per_ill(ill_t *ill) 1187 { 1188 uint_t next = INFINITY, current; 1189 ilm_t *ilm; 1190 ipif_t *ipif; 1191 mrec_t *rp = NULL; 1192 mrec_t *rtxrp = NULL; 1193 rtx_state_t *rtxp; 1194 mcast_record_t rtype; 1195 1196 ASSERT(IAM_WRITER_ILL(ill)); 1197 1198 mutex_enter(&ill->ill_lock); 1199 1200 current = CURRENT_MSTIME; 1201 /* First check the global timer on this interface */ 1202 if (ill->ill_global_timer == INFINITY) 1203 goto per_ilm_timer; 1204 if (ill->ill_global_timer <= (current + CURRENT_OFFSET)) { 1205 ill->ill_global_timer = INFINITY; 1206 /* 1207 * Send report for each group on this interface. 1208 * Since we just set the global timer (received a v3 general 1209 * query), need to skip the all hosts addr (224.0.0.1), per 1210 * RFC 3376 section 5. 1211 */ 1212 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1213 if (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) 1214 continue; 1215 ASSERT(ilm->ilm_ipif != NULL); 1216 ilm->ilm_ipif->ipif_igmp_rpt = 1217 mcast_bldmrec(ilm->ilm_fmode, &ilm->ilm_v6addr, 1218 ilm->ilm_filter, ilm->ilm_ipif->ipif_igmp_rpt); 1219 /* 1220 * Since we're sending a report on this group, okay 1221 * to delete pending group-specific timers. Note 1222 * that group-specific retransmit timers still need 1223 * to be checked in the per_ilm_timer for-loop. 1224 */ 1225 ilm->ilm_timer = INFINITY; 1226 ilm->ilm_state = IGMP_IREPORTEDLAST; 1227 FREE_SLIST(ilm->ilm_pendsrcs); 1228 ilm->ilm_pendsrcs = NULL; 1229 } 1230 /* 1231 * We've built per-ipif mrec lists; walk the ill's ipif list 1232 * and send a report for each ipif that has an mrec list. 1233 */ 1234 for (ipif = ill->ill_ipif; ipif != NULL; 1235 ipif = ipif->ipif_next) { 1236 if (ipif->ipif_igmp_rpt == NULL) 1237 continue; 1238 mutex_exit(&ill->ill_lock); 1239 igmpv3_sendrpt(ipif, ipif->ipif_igmp_rpt); 1240 mutex_enter(&ill->ill_lock); 1241 /* mrec list was freed by igmpv3_sendrpt() */ 1242 ipif->ipif_igmp_rpt = NULL; 1243 } 1244 } else { 1245 if ((ill->ill_global_timer - current) < next) 1246 next = ill->ill_global_timer - current; 1247 } 1248 1249 per_ilm_timer: 1250 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1251 if (ilm->ilm_timer == INFINITY) 1252 goto per_ilm_rtxtimer; 1253 1254 if (ilm->ilm_timer > (current + CURRENT_OFFSET)) { 1255 if ((ilm->ilm_timer - current) < next) 1256 next = ilm->ilm_timer - current; 1257 1258 if (ip_debug > 1) { 1259 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 1260 "igmp_timo_hlr 2: ilm_timr %d " 1261 "typ %d nxt %d", 1262 (int)ntohl(ilm->ilm_timer - current), 1263 (ill->ill_mcast_type), next); 1264 } 1265 1266 goto per_ilm_rtxtimer; 1267 } 1268 1269 /* the timer has expired, need to take action */ 1270 ilm->ilm_timer = INFINITY; 1271 ilm->ilm_state = IGMP_IREPORTEDLAST; 1272 if (ill->ill_mcast_type == IGMP_V1_ROUTER) { 1273 mutex_exit(&ill->ill_lock); 1274 igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0); 1275 mutex_enter(&ill->ill_lock); 1276 } else if (ill->ill_mcast_type == IGMP_V2_ROUTER) { 1277 mutex_exit(&ill->ill_lock); 1278 igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0); 1279 mutex_enter(&ill->ill_lock); 1280 } else { 1281 slist_t *rsp; 1282 if (!SLIST_IS_EMPTY(ilm->ilm_pendsrcs) && 1283 (rsp = l_alloc()) != NULL) { 1284 /* 1285 * Contents of reply depend on pending 1286 * requested source list. 1287 */ 1288 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1289 l_intersection(ilm->ilm_filter, 1290 ilm->ilm_pendsrcs, rsp); 1291 } else { 1292 l_difference(ilm->ilm_pendsrcs, 1293 ilm->ilm_filter, rsp); 1294 } 1295 FREE_SLIST(ilm->ilm_pendsrcs); 1296 ilm->ilm_pendsrcs = NULL; 1297 if (!SLIST_IS_EMPTY(rsp)) 1298 rp = mcast_bldmrec(MODE_IS_INCLUDE, 1299 &ilm->ilm_v6addr, rsp, rp); 1300 FREE_SLIST(rsp); 1301 } else { 1302 /* 1303 * Either the pending request is just group- 1304 * specific, or we couldn't get the resources 1305 * (rsp) to build a source-specific reply. 1306 */ 1307 rp = mcast_bldmrec(ilm->ilm_fmode, 1308 &ilm->ilm_v6addr, ilm->ilm_filter, rp); 1309 } 1310 mutex_exit(&ill->ill_lock); 1311 igmpv3_sendrpt(ill->ill_ipif, rp); 1312 mutex_enter(&ill->ill_lock); 1313 rp = NULL; 1314 } 1315 1316 per_ilm_rtxtimer: 1317 rtxp = &ilm->ilm_rtx; 1318 1319 if (rtxp->rtx_timer == INFINITY) 1320 continue; 1321 if (rtxp->rtx_timer > (current + CURRENT_OFFSET)) { 1322 if ((rtxp->rtx_timer - current) < next) 1323 next = rtxp->rtx_timer - current; 1324 continue; 1325 } 1326 1327 rtxp->rtx_timer = INFINITY; 1328 ilm->ilm_state = IGMP_IREPORTEDLAST; 1329 if (ill->ill_mcast_type == IGMP_V1_ROUTER) { 1330 mutex_exit(&ill->ill_lock); 1331 igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0); 1332 mutex_enter(&ill->ill_lock); 1333 continue; 1334 } else if (ill->ill_mcast_type == IGMP_V2_ROUTER) { 1335 mutex_exit(&ill->ill_lock); 1336 igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0); 1337 mutex_enter(&ill->ill_lock); 1338 continue; 1339 } 1340 1341 /* 1342 * The retransmit timer has popped, and our router is 1343 * IGMPv3. We have to delve into the retransmit state 1344 * stored in the ilm. 1345 * 1346 * Decrement the retransmit count. If the fmode rtx 1347 * count is active, decrement it, and send a filter 1348 * mode change report with the ilm's source list. 1349 * Otherwise, send a source list change report with 1350 * the current retransmit lists. 1351 */ 1352 ASSERT(rtxp->rtx_cnt > 0); 1353 ASSERT(rtxp->rtx_cnt >= rtxp->rtx_fmode_cnt); 1354 rtxp->rtx_cnt--; 1355 if (rtxp->rtx_fmode_cnt > 0) { 1356 rtxp->rtx_fmode_cnt--; 1357 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ? 1358 CHANGE_TO_INCLUDE : CHANGE_TO_EXCLUDE; 1359 rtxrp = mcast_bldmrec(rtype, &ilm->ilm_v6addr, 1360 ilm->ilm_filter, rtxrp); 1361 } else { 1362 rtxrp = mcast_bldmrec(ALLOW_NEW_SOURCES, 1363 &ilm->ilm_v6addr, rtxp->rtx_allow, rtxrp); 1364 rtxrp = mcast_bldmrec(BLOCK_OLD_SOURCES, 1365 &ilm->ilm_v6addr, rtxp->rtx_block, rtxrp); 1366 } 1367 if (rtxp->rtx_cnt > 0) { 1368 MCAST_RANDOM_DELAY(rtxp->rtx_timer, 1369 SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY)); 1370 if (rtxp->rtx_timer < next) 1371 next = rtxp->rtx_timer; 1372 rtxp->rtx_timer += current; 1373 } else { 1374 CLEAR_SLIST(rtxp->rtx_allow); 1375 CLEAR_SLIST(rtxp->rtx_block); 1376 } 1377 mutex_exit(&ill->ill_lock); 1378 igmpv3_sendrpt(ilm->ilm_ipif, rtxrp); 1379 mutex_enter(&ill->ill_lock); 1380 rtxrp = NULL; 1381 } 1382 1383 mutex_exit(&ill->ill_lock); 1384 1385 return (next); 1386 } 1387 1388 /* 1389 * igmp_timeout_handler: 1390 * Called when there are timeout events, every next * TMEOUT_INTERVAL (tick). 1391 * Returns number of ticks to next event (or 0 if none). 1392 * 1393 * As part of multicast join and leave igmp we may need to send out an 1394 * igmp request. The igmp related state variables in the ilm are protected 1395 * by ill_lock. A single global igmp timer is used to track igmp timeouts. 1396 * igmp_timer_lock protects the global igmp_timeout_id. igmp_start_timers 1397 * starts the igmp timer if needed. It serializes multiple threads trying to 1398 * simultaneously start the timer using the igmp_timer_setter_active flag. 1399 * 1400 * igmp_input() receives igmp queries and responds to the queries 1401 * in a delayed fashion by posting a timer i.e. it calls igmp_start_timers(). 1402 * Later the igmp_timer fires, the timeout handler igmp_timerout_handler() 1403 * performs the action exclusively after entering each ill's ipsq as writer. 1404 * The actual igmp timeout handler needs to run in the ipsq since it has to 1405 * access the ilm's and we don't want another exclusive operation like 1406 * say an IPMP failover to be simultaneously moving the ilms from one ill to 1407 * another. 1408 * 1409 * The igmp_slowtimeo() function is called thru another timer. 1410 * igmp_slowtimeout_lock protects the igmp_slowtimeout_id 1411 */ 1412 void 1413 igmp_timeout_handler(void *arg) 1414 { 1415 ill_t *ill; 1416 uint_t global_next = INFINITY; 1417 uint_t next; 1418 ill_walk_context_t ctx; 1419 boolean_t success; 1420 ip_stack_t *ipst = arg; 1421 1422 ASSERT(arg != NULL); 1423 mutex_enter(&ipst->ips_igmp_timer_lock); 1424 ASSERT(ipst->ips_igmp_timeout_id != 0); 1425 ipst->ips_igmp_timer_thread = curthread; 1426 ipst->ips_igmp_timer_scheduled_last = 0; 1427 ipst->ips_igmp_time_to_next = 0; 1428 mutex_exit(&ipst->ips_igmp_timer_lock); 1429 1430 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1431 ill = ILL_START_WALK_V4(&ctx, ipst); 1432 for (; ill != NULL; ill = ill_next(&ctx, ill)) { 1433 ASSERT(!ill->ill_isv6); 1434 /* 1435 * We may not be able to refhold the ill if the ill/ipif 1436 * is changing. But we need to make sure that the ill will 1437 * not vanish. So we just bump up the ill_waiter count. 1438 */ 1439 if (!ill_waiter_inc(ill)) 1440 continue; 1441 rw_exit(&ipst->ips_ill_g_lock); 1442 success = ipsq_enter(ill, B_TRUE); 1443 if (success) { 1444 next = igmp_timeout_handler_per_ill(ill); 1445 if (next < global_next) 1446 global_next = next; 1447 ipsq_exit(ill->ill_phyint->phyint_ipsq); 1448 } 1449 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1450 ill_waiter_dcr(ill); 1451 } 1452 rw_exit(&ipst->ips_ill_g_lock); 1453 1454 mutex_enter(&ipst->ips_igmp_timer_lock); 1455 ASSERT(ipst->ips_igmp_timeout_id != 0); 1456 ipst->ips_igmp_timeout_id = 0; 1457 ipst->ips_igmp_timer_thread = NULL; 1458 mutex_exit(&ipst->ips_igmp_timer_lock); 1459 1460 if (global_next != INFINITY) 1461 igmp_start_timers(global_next, ipst); 1462 } 1463 1464 /* 1465 * mld_timeout_handler: 1466 * Called when there are timeout events, every next (tick). 1467 * Returns number of ticks to next event (or 0 if none). 1468 */ 1469 /* ARGSUSED */ 1470 uint_t 1471 mld_timeout_handler_per_ill(ill_t *ill) 1472 { 1473 ilm_t *ilm; 1474 uint_t next = INFINITY, current; 1475 mrec_t *rp, *rtxrp; 1476 rtx_state_t *rtxp; 1477 mcast_record_t rtype; 1478 1479 ASSERT(IAM_WRITER_ILL(ill)); 1480 1481 mutex_enter(&ill->ill_lock); 1482 1483 current = CURRENT_MSTIME; 1484 /* 1485 * First check the global timer on this interface; the global timer 1486 * is not used for MLDv1, so if it's set we can assume we're v2. 1487 */ 1488 if (ill->ill_global_timer == INFINITY) 1489 goto per_ilm_timer; 1490 if (ill->ill_global_timer <= (current + CURRENT_OFFSET)) { 1491 ill->ill_global_timer = INFINITY; 1492 /* 1493 * Send report for each group on this interface. 1494 * Since we just set the global timer (received a v2 general 1495 * query), need to skip the all hosts addr (ff02::1), per 1496 * RFC 3810 section 6. 1497 */ 1498 rp = NULL; 1499 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1500 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, 1501 &ipv6_all_hosts_mcast)) 1502 continue; 1503 rp = mcast_bldmrec(ilm->ilm_fmode, &ilm->ilm_v6addr, 1504 ilm->ilm_filter, rp); 1505 /* 1506 * Since we're sending a report on this group, okay 1507 * to delete pending group-specific timers. Note 1508 * that group-specific retransmit timers still need 1509 * to be checked in the per_ilm_timer for-loop. 1510 */ 1511 ilm->ilm_timer = INFINITY; 1512 ilm->ilm_state = IGMP_IREPORTEDLAST; 1513 FREE_SLIST(ilm->ilm_pendsrcs); 1514 ilm->ilm_pendsrcs = NULL; 1515 } 1516 mutex_exit(&ill->ill_lock); 1517 mldv2_sendrpt(ill, rp); 1518 mutex_enter(&ill->ill_lock); 1519 } else { 1520 if ((ill->ill_global_timer - current) < next) 1521 next = ill->ill_global_timer - current; 1522 } 1523 1524 per_ilm_timer: 1525 rp = rtxrp = NULL; 1526 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1527 if (ilm->ilm_timer == INFINITY) 1528 goto per_ilm_rtxtimer; 1529 1530 if (ilm->ilm_timer > (current + CURRENT_OFFSET)) { 1531 if ((ilm->ilm_timer - current) < next) 1532 next = ilm->ilm_timer - current; 1533 1534 if (ip_debug > 1) { 1535 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 1536 "igmp_timo_hlr 2: ilm_timr" 1537 " %d typ %d nxt %d", 1538 (int)ntohl(ilm->ilm_timer - current), 1539 (ill->ill_mcast_type), next); 1540 } 1541 1542 goto per_ilm_rtxtimer; 1543 } 1544 1545 /* the timer has expired, need to take action */ 1546 ilm->ilm_timer = INFINITY; 1547 ilm->ilm_state = IGMP_IREPORTEDLAST; 1548 if (ill->ill_mcast_type == MLD_V1_ROUTER) { 1549 mutex_exit(&ill->ill_lock); 1550 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL); 1551 mutex_enter(&ill->ill_lock); 1552 } else { 1553 slist_t *rsp; 1554 if (!SLIST_IS_EMPTY(ilm->ilm_pendsrcs) && 1555 (rsp = l_alloc()) != NULL) { 1556 /* 1557 * Contents of reply depend on pending 1558 * requested source list. 1559 */ 1560 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1561 l_intersection(ilm->ilm_filter, 1562 ilm->ilm_pendsrcs, rsp); 1563 } else { 1564 l_difference(ilm->ilm_pendsrcs, 1565 ilm->ilm_filter, rsp); 1566 } 1567 FREE_SLIST(ilm->ilm_pendsrcs); 1568 ilm->ilm_pendsrcs = NULL; 1569 if (!SLIST_IS_EMPTY(rsp)) 1570 rp = mcast_bldmrec(MODE_IS_INCLUDE, 1571 &ilm->ilm_v6addr, rsp, rp); 1572 FREE_SLIST(rsp); 1573 } else { 1574 rp = mcast_bldmrec(ilm->ilm_fmode, 1575 &ilm->ilm_v6addr, ilm->ilm_filter, rp); 1576 } 1577 } 1578 1579 per_ilm_rtxtimer: 1580 rtxp = &ilm->ilm_rtx; 1581 1582 if (rtxp->rtx_timer == INFINITY) 1583 continue; 1584 if (rtxp->rtx_timer > (current + CURRENT_OFFSET)) { 1585 if ((rtxp->rtx_timer - current) < next) 1586 next = rtxp->rtx_timer - current; 1587 continue; 1588 } 1589 1590 rtxp->rtx_timer = INFINITY; 1591 ilm->ilm_state = IGMP_IREPORTEDLAST; 1592 if (ill->ill_mcast_type == MLD_V1_ROUTER) { 1593 mutex_exit(&ill->ill_lock); 1594 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL); 1595 mutex_enter(&ill->ill_lock); 1596 continue; 1597 } 1598 1599 /* 1600 * The retransmit timer has popped, and our router is 1601 * MLDv2. We have to delve into the retransmit state 1602 * stored in the ilm. 1603 * 1604 * Decrement the retransmit count. If the fmode rtx 1605 * count is active, decrement it, and send a filter 1606 * mode change report with the ilm's source list. 1607 * Otherwise, send a source list change report with 1608 * the current retransmit lists. 1609 */ 1610 ASSERT(rtxp->rtx_cnt > 0); 1611 ASSERT(rtxp->rtx_cnt >= rtxp->rtx_fmode_cnt); 1612 rtxp->rtx_cnt--; 1613 if (rtxp->rtx_fmode_cnt > 0) { 1614 rtxp->rtx_fmode_cnt--; 1615 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ? 1616 CHANGE_TO_INCLUDE : CHANGE_TO_EXCLUDE; 1617 rtxrp = mcast_bldmrec(rtype, &ilm->ilm_v6addr, 1618 ilm->ilm_filter, rtxrp); 1619 } else { 1620 rtxrp = mcast_bldmrec(ALLOW_NEW_SOURCES, 1621 &ilm->ilm_v6addr, rtxp->rtx_allow, rtxrp); 1622 rtxrp = mcast_bldmrec(BLOCK_OLD_SOURCES, 1623 &ilm->ilm_v6addr, rtxp->rtx_block, rtxrp); 1624 } 1625 if (rtxp->rtx_cnt > 0) { 1626 MCAST_RANDOM_DELAY(rtxp->rtx_timer, 1627 SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY)); 1628 if (rtxp->rtx_timer < next) 1629 next = rtxp->rtx_timer; 1630 rtxp->rtx_timer += current; 1631 } else { 1632 CLEAR_SLIST(rtxp->rtx_allow); 1633 CLEAR_SLIST(rtxp->rtx_block); 1634 } 1635 } 1636 1637 if (ill->ill_mcast_type == MLD_V2_ROUTER) { 1638 mutex_exit(&ill->ill_lock); 1639 mldv2_sendrpt(ill, rp); 1640 mldv2_sendrpt(ill, rtxrp); 1641 return (next); 1642 } 1643 1644 mutex_exit(&ill->ill_lock); 1645 1646 return (next); 1647 } 1648 1649 /* 1650 * mld_timeout_handler: 1651 * Called when there are timeout events, every next * TMEOUT_INTERVAL (tick). 1652 * Returns number of ticks to next event (or 0 if none). 1653 * MT issues are same as igmp_timeout_handler 1654 */ 1655 void 1656 mld_timeout_handler(void *arg) 1657 { 1658 ill_t *ill; 1659 uint_t global_next = INFINITY; 1660 uint_t next; 1661 ill_walk_context_t ctx; 1662 boolean_t success; 1663 ip_stack_t *ipst = arg; 1664 1665 ASSERT(arg != NULL); 1666 mutex_enter(&ipst->ips_mld_timer_lock); 1667 ASSERT(ipst->ips_mld_timeout_id != 0); 1668 ipst->ips_mld_timer_thread = curthread; 1669 ipst->ips_mld_timer_scheduled_last = 0; 1670 ipst->ips_mld_time_to_next = 0; 1671 mutex_exit(&ipst->ips_mld_timer_lock); 1672 1673 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1674 ill = ILL_START_WALK_V6(&ctx, ipst); 1675 for (; ill != NULL; ill = ill_next(&ctx, ill)) { 1676 ASSERT(ill->ill_isv6); 1677 /* 1678 * We may not be able to refhold the ill if the ill/ipif 1679 * is changing. But we need to make sure that the ill will 1680 * not vanish. So we just bump up the ill_waiter count. 1681 */ 1682 if (!ill_waiter_inc(ill)) 1683 continue; 1684 rw_exit(&ipst->ips_ill_g_lock); 1685 success = ipsq_enter(ill, B_TRUE); 1686 if (success) { 1687 next = mld_timeout_handler_per_ill(ill); 1688 if (next < global_next) 1689 global_next = next; 1690 ipsq_exit(ill->ill_phyint->phyint_ipsq); 1691 } 1692 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1693 ill_waiter_dcr(ill); 1694 } 1695 rw_exit(&ipst->ips_ill_g_lock); 1696 1697 mutex_enter(&ipst->ips_mld_timer_lock); 1698 ASSERT(ipst->ips_mld_timeout_id != 0); 1699 ipst->ips_mld_timeout_id = 0; 1700 ipst->ips_mld_timer_thread = NULL; 1701 mutex_exit(&ipst->ips_mld_timer_lock); 1702 1703 if (global_next != INFINITY) 1704 mld_start_timers(global_next, ipst); 1705 } 1706 1707 /* 1708 * Calculate the Older Version Querier Present timeout value, in number 1709 * of slowtimo intervals, for the given ill. 1710 */ 1711 #define OVQP(ill) \ 1712 ((1000 * (((ill)->ill_mcast_rv * (ill)->ill_mcast_qi) \ 1713 + MCAST_QUERY_RESP_INTERVAL)) / MCAST_SLOWTIMO_INTERVAL) 1714 1715 /* 1716 * igmp_slowtimo: 1717 * - Resets to new router if we didnt we hear from the router 1718 * in IGMP_AGE_THRESHOLD seconds. 1719 * - Resets slowtimeout. 1720 * Check for ips_igmp_max_version ensures that we don't revert to a higher 1721 * IGMP version than configured. 1722 */ 1723 void 1724 igmp_slowtimo(void *arg) 1725 { 1726 ill_t *ill; 1727 ill_if_t *ifp; 1728 avl_tree_t *avl_tree; 1729 ip_stack_t *ipst = (ip_stack_t *)arg; 1730 1731 ASSERT(arg != NULL); 1732 /* Hold the ill_g_lock so that we can safely walk the ill list */ 1733 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1734 1735 /* 1736 * The ill_if_t list is circular, hence the odd loop parameters. 1737 * 1738 * We can't use the ILL_START_WALK and ill_next() wrappers for this 1739 * walk, as we need to check the illif_mcast_* fields in the ill_if_t 1740 * structure (allowing us to skip if none of the instances have timers 1741 * running). 1742 */ 1743 for (ifp = IP_V4_ILL_G_LIST(ipst); 1744 ifp != (ill_if_t *)&IP_V4_ILL_G_LIST(ipst); 1745 ifp = ifp->illif_next) { 1746 /* 1747 * illif_mcast_v[12] are set using atomics. If an ill hears 1748 * a V1 or V2 query now and we miss seeing the count now, 1749 * we will see it the next time igmp_slowtimo is called. 1750 */ 1751 if (ifp->illif_mcast_v1 == 0 && ifp->illif_mcast_v2 == 0) 1752 continue; 1753 1754 avl_tree = &ifp->illif_avl_by_ppa; 1755 for (ill = avl_first(avl_tree); ill != NULL; 1756 ill = avl_walk(avl_tree, ill, AVL_AFTER)) { 1757 mutex_enter(&ill->ill_lock); 1758 if (ill->ill_mcast_v1_tset == 1) 1759 ill->ill_mcast_v1_time++; 1760 if (ill->ill_mcast_v2_tset == 1) 1761 ill->ill_mcast_v2_time++; 1762 if ((ill->ill_mcast_type == IGMP_V1_ROUTER) && 1763 (ipst->ips_igmp_max_version >= IGMP_V2_ROUTER) && 1764 (ill->ill_mcast_v1_time >= OVQP(ill))) { 1765 if ((ill->ill_mcast_v2_tset > 0) || 1766 (ipst->ips_igmp_max_version == 1767 IGMP_V2_ROUTER)) { 1768 ip1dbg(("V1 query timer " 1769 "expired on %s; switching " 1770 "mode to IGMP_V2\n", 1771 ill->ill_name)); 1772 ill->ill_mcast_type = 1773 IGMP_V2_ROUTER; 1774 } else { 1775 ip1dbg(("V1 query timer " 1776 "expired on %s; switching " 1777 "mode to IGMP_V3\n", 1778 ill->ill_name)); 1779 ill->ill_mcast_type = 1780 IGMP_V3_ROUTER; 1781 } 1782 ill->ill_mcast_v1_time = 0; 1783 ill->ill_mcast_v1_tset = 0; 1784 atomic_add_16(&ifp->illif_mcast_v1, -1); 1785 } 1786 if ((ill->ill_mcast_type == IGMP_V2_ROUTER) && 1787 (ipst->ips_igmp_max_version >= IGMP_V3_ROUTER) && 1788 (ill->ill_mcast_v2_time >= OVQP(ill))) { 1789 ip1dbg(("V2 query timer expired on " 1790 "%s; switching mode to IGMP_V3\n", 1791 ill->ill_name)); 1792 ill->ill_mcast_type = IGMP_V3_ROUTER; 1793 ill->ill_mcast_v2_time = 0; 1794 ill->ill_mcast_v2_tset = 0; 1795 atomic_add_16(&ifp->illif_mcast_v2, -1); 1796 } 1797 mutex_exit(&ill->ill_lock); 1798 } 1799 } 1800 rw_exit(&ipst->ips_ill_g_lock); 1801 mutex_enter(&ipst->ips_igmp_slowtimeout_lock); 1802 ipst->ips_igmp_slowtimeout_id = timeout(igmp_slowtimo, (void *)ipst, 1803 MSEC_TO_TICK(MCAST_SLOWTIMO_INTERVAL)); 1804 mutex_exit(&ipst->ips_igmp_slowtimeout_lock); 1805 } 1806 1807 /* 1808 * mld_slowtimo: 1809 * - Resets to newer version if we didn't hear from the older version router 1810 * in MLD_AGE_THRESHOLD seconds. 1811 * - Restarts slowtimeout. 1812 * Check for ips_mld_max_version ensures that we don't revert to a higher 1813 * IGMP version than configured. 1814 */ 1815 /* ARGSUSED */ 1816 void 1817 mld_slowtimo(void *arg) 1818 { 1819 ill_t *ill; 1820 ill_if_t *ifp; 1821 avl_tree_t *avl_tree; 1822 ip_stack_t *ipst = (ip_stack_t *)arg; 1823 1824 ASSERT(arg != NULL); 1825 /* See comments in igmp_slowtimo() above... */ 1826 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1827 for (ifp = IP_V6_ILL_G_LIST(ipst); 1828 ifp != (ill_if_t *)&IP_V6_ILL_G_LIST(ipst); 1829 ifp = ifp->illif_next) { 1830 if (ifp->illif_mcast_v1 == 0) 1831 continue; 1832 1833 avl_tree = &ifp->illif_avl_by_ppa; 1834 for (ill = avl_first(avl_tree); ill != NULL; 1835 ill = avl_walk(avl_tree, ill, AVL_AFTER)) { 1836 mutex_enter(&ill->ill_lock); 1837 if (ill->ill_mcast_v1_tset == 1) 1838 ill->ill_mcast_v1_time++; 1839 if ((ill->ill_mcast_type == MLD_V1_ROUTER) && 1840 (ipst->ips_mld_max_version >= MLD_V2_ROUTER) && 1841 (ill->ill_mcast_v1_time >= OVQP(ill))) { 1842 ip1dbg(("MLD query timer expired on" 1843 " %s; switching mode to MLD_V2\n", 1844 ill->ill_name)); 1845 ill->ill_mcast_type = MLD_V2_ROUTER; 1846 ill->ill_mcast_v1_time = 0; 1847 ill->ill_mcast_v1_tset = 0; 1848 atomic_add_16(&ifp->illif_mcast_v1, -1); 1849 } 1850 mutex_exit(&ill->ill_lock); 1851 } 1852 } 1853 rw_exit(&ipst->ips_ill_g_lock); 1854 mutex_enter(&ipst->ips_mld_slowtimeout_lock); 1855 ipst->ips_mld_slowtimeout_id = timeout(mld_slowtimo, (void *)ipst, 1856 MSEC_TO_TICK(MCAST_SLOWTIMO_INTERVAL)); 1857 mutex_exit(&ipst->ips_mld_slowtimeout_lock); 1858 } 1859 1860 /* 1861 * igmp_sendpkt: 1862 * This will send to ip_wput like icmp_inbound. 1863 * Note that the lower ill (on which the membership is kept) is used 1864 * as an upper ill to pass in the multicast parameters. 1865 */ 1866 static void 1867 igmp_sendpkt(ilm_t *ilm, uchar_t type, ipaddr_t addr) 1868 { 1869 mblk_t *mp; 1870 igmpa_t *igmpa; 1871 uint8_t *rtralert; 1872 ipha_t *ipha; 1873 int hdrlen = sizeof (ipha_t) + RTRALERT_LEN; 1874 size_t size = hdrlen + sizeof (igmpa_t); 1875 ipif_t *ipif = ilm->ilm_ipif; 1876 ill_t *ill = ipif->ipif_ill; /* Will be the "lower" ill */ 1877 mblk_t *first_mp; 1878 ipsec_out_t *io; 1879 zoneid_t zoneid; 1880 ip_stack_t *ipst = ill->ill_ipst; 1881 1882 /* 1883 * We need to make sure this packet goes out on an ipif. If 1884 * there is some global policy match in ip_wput_ire, we need 1885 * to get to the right interface after IPSEC processing. 1886 * To make sure this multicast packet goes out on the right 1887 * interface, we attach an ipsec_out and initialize ill_index 1888 * like we did in ip_wput. To make sure that this packet does 1889 * not get forwarded on other interfaces or looped back, we 1890 * set ipsec_out_dontroute to B_TRUE and ipsec_out_multicast_loop 1891 * to B_FALSE. 1892 * 1893 * We also need to make sure that this does not get load balanced 1894 * if it hits ip_newroute_ipif. So, we initialize ipsec_out_attach_if 1895 * here. If it gets load balanced, switches supporting igmp snooping 1896 * will send the packet that it receives for this multicast group 1897 * to the interface that we are sending on. As we have joined the 1898 * multicast group on this ill, by sending the packet out on this 1899 * ill, we receive all the packets back on this ill. 1900 */ 1901 first_mp = allocb(sizeof (ipsec_info_t), BPRI_HI); 1902 if (first_mp == NULL) 1903 return; 1904 1905 first_mp->b_datap->db_type = M_CTL; 1906 first_mp->b_wptr += sizeof (ipsec_info_t); 1907 bzero(first_mp->b_rptr, sizeof (ipsec_info_t)); 1908 /* ipsec_out_secure is B_FALSE now */ 1909 io = (ipsec_out_t *)first_mp->b_rptr; 1910 io->ipsec_out_type = IPSEC_OUT; 1911 io->ipsec_out_len = sizeof (ipsec_out_t); 1912 io->ipsec_out_use_global_policy = B_TRUE; 1913 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 1914 io->ipsec_out_attach_if = B_TRUE; 1915 io->ipsec_out_multicast_loop = B_FALSE; 1916 io->ipsec_out_dontroute = B_TRUE; 1917 if ((zoneid = ilm->ilm_zoneid) == ALL_ZONES) 1918 zoneid = GLOBAL_ZONEID; 1919 io->ipsec_out_zoneid = zoneid; 1920 io->ipsec_out_ns = ipst->ips_netstack; /* No netstack_hold */ 1921 1922 mp = allocb(size, BPRI_HI); 1923 if (mp == NULL) { 1924 freemsg(first_mp); 1925 return; 1926 } 1927 mp->b_wptr = mp->b_rptr + size; 1928 first_mp->b_cont = mp; 1929 1930 ipha = (ipha_t *)mp->b_rptr; 1931 rtralert = (uint8_t *)&(ipha[1]); 1932 igmpa = (igmpa_t *)&(rtralert[RTRALERT_LEN]); 1933 igmpa->igmpa_type = type; 1934 igmpa->igmpa_code = 0; 1935 igmpa->igmpa_group = ilm->ilm_addr; 1936 igmpa->igmpa_cksum = 0; 1937 igmpa->igmpa_cksum = IP_CSUM(mp, hdrlen, 0); 1938 1939 rtralert[0] = IPOPT_COPY | IPOPT_RTRALERT; 1940 rtralert[1] = RTRALERT_LEN; 1941 rtralert[2] = 0; 1942 rtralert[3] = 0; 1943 1944 ipha->ipha_version_and_hdr_length = (IP_VERSION << 4) 1945 | (IP_SIMPLE_HDR_LENGTH_IN_WORDS + RTRALERT_LEN_IN_WORDS); 1946 ipha->ipha_type_of_service = 0; 1947 ipha->ipha_length = htons(size); 1948 ipha->ipha_ident = 0; 1949 ipha->ipha_fragment_offset_and_flags = 0; 1950 ipha->ipha_ttl = IGMP_TTL; 1951 ipha->ipha_protocol = IPPROTO_IGMP; 1952 ipha->ipha_hdr_checksum = 0; 1953 ipha->ipha_dst = addr ? addr : igmpa->igmpa_group; 1954 ipha->ipha_src = ipif->ipif_src_addr; 1955 /* 1956 * Request loopback of the report if we are acting as a multicast 1957 * router, so that the process-level routing demon can hear it. 1958 */ 1959 /* 1960 * This will run multiple times for the same group if there are members 1961 * on the same group for multiple ipif's on the same ill. The 1962 * igmp_input code will suppress this due to the loopback thus we 1963 * always loopback membership report. 1964 */ 1965 ASSERT(ill->ill_rq != NULL); 1966 ip_multicast_loopback(ill->ill_rq, ill, first_mp, 0, ilm->ilm_zoneid); 1967 1968 ip_wput_multicast(ill->ill_wq, first_mp, ipif, zoneid); 1969 1970 ++ipst->ips_igmpstat.igps_snd_reports; 1971 } 1972 1973 /* 1974 * Sends an IGMP_V3_MEMBERSHIP_REPORT message out the ill associated 1975 * with the passed-in ipif. The report will contain one group record 1976 * for each element of reclist. If this causes packet length to 1977 * exceed ipif->ipif_ill->ill_max_frag, multiple reports are sent. 1978 * reclist is assumed to be made up of buffers allocated by mcast_bldmrec(), 1979 * and those buffers are freed here. 1980 */ 1981 static void 1982 igmpv3_sendrpt(ipif_t *ipif, mrec_t *reclist) 1983 { 1984 ipsec_out_t *io; 1985 igmp3ra_t *igmp3ra; 1986 grphdra_t *grphdr; 1987 mblk_t *first_mp, *mp; 1988 ipha_t *ipha; 1989 uint8_t *rtralert; 1990 ipaddr_t *src_array; 1991 int i, j, numrec, more_src_cnt; 1992 size_t hdrsize, size, rsize; 1993 ill_t *ill = ipif->ipif_ill; 1994 mrec_t *rp, *cur_reclist; 1995 mrec_t *next_reclist = reclist; 1996 boolean_t morepkts; 1997 zoneid_t zoneid; 1998 ip_stack_t *ipst = ill->ill_ipst; 1999 2000 /* if there aren't any records, there's nothing to send */ 2001 if (reclist == NULL) 2002 return; 2003 2004 hdrsize = sizeof (ipha_t) + RTRALERT_LEN; 2005 nextpkt: 2006 size = hdrsize + sizeof (igmp3ra_t); 2007 morepkts = B_FALSE; 2008 more_src_cnt = 0; 2009 cur_reclist = next_reclist; 2010 numrec = 0; 2011 for (rp = cur_reclist; rp != NULL; rp = rp->mrec_next) { 2012 rsize = sizeof (grphdra_t) + 2013 (rp->mrec_srcs.sl_numsrc * sizeof (ipaddr_t)); 2014 if (size + rsize > ill->ill_max_frag) { 2015 if (rp == cur_reclist) { 2016 /* 2017 * If the first mrec we looked at is too big 2018 * to fit in a single packet (i.e the source 2019 * list is too big), we must either truncate 2020 * the list (if TO_EX or IS_EX), or send 2021 * multiple reports for the same group (all 2022 * other types). 2023 */ 2024 int srcspace, srcsperpkt; 2025 srcspace = ill->ill_max_frag - (size + 2026 sizeof (grphdra_t)); 2027 srcsperpkt = srcspace / sizeof (ipaddr_t); 2028 /* 2029 * Increment size and numrec, because we will 2030 * be sending a record for the mrec we're 2031 * looking at now. 2032 */ 2033 size += sizeof (grphdra_t) + 2034 (srcsperpkt * sizeof (ipaddr_t)); 2035 numrec++; 2036 if (rp->mrec_type == MODE_IS_EXCLUDE || 2037 rp->mrec_type == CHANGE_TO_EXCLUDE) { 2038 rp->mrec_srcs.sl_numsrc = srcsperpkt; 2039 if (rp->mrec_next == NULL) { 2040 /* no more packets to send */ 2041 break; 2042 } else { 2043 /* 2044 * more packets, but we're 2045 * done with this mrec. 2046 */ 2047 next_reclist = rp->mrec_next; 2048 } 2049 } else { 2050 more_src_cnt = rp->mrec_srcs.sl_numsrc 2051 - srcsperpkt; 2052 rp->mrec_srcs.sl_numsrc = srcsperpkt; 2053 /* 2054 * We'll fix up this mrec (remove the 2055 * srcs we've already sent) before 2056 * returning to nextpkt above. 2057 */ 2058 next_reclist = rp; 2059 } 2060 } else { 2061 next_reclist = rp; 2062 } 2063 morepkts = B_TRUE; 2064 break; 2065 } 2066 size += rsize; 2067 numrec++; 2068 } 2069 2070 /* 2071 * See comments in igmp_sendpkt() about initializing for ipsec and 2072 * load balancing requirements. 2073 */ 2074 first_mp = allocb(sizeof (ipsec_info_t), BPRI_HI); 2075 if (first_mp == NULL) 2076 goto free_reclist; 2077 2078 first_mp->b_datap->db_type = M_CTL; 2079 first_mp->b_wptr += sizeof (ipsec_info_t); 2080 bzero(first_mp->b_rptr, sizeof (ipsec_info_t)); 2081 /* ipsec_out_secure is B_FALSE now */ 2082 io = (ipsec_out_t *)first_mp->b_rptr; 2083 io->ipsec_out_type = IPSEC_OUT; 2084 io->ipsec_out_len = sizeof (ipsec_out_t); 2085 io->ipsec_out_use_global_policy = B_TRUE; 2086 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 2087 io->ipsec_out_attach_if = B_TRUE; 2088 io->ipsec_out_multicast_loop = B_FALSE; 2089 io->ipsec_out_dontroute = B_TRUE; 2090 if ((zoneid = ipif->ipif_zoneid) == ALL_ZONES) 2091 zoneid = GLOBAL_ZONEID; 2092 io->ipsec_out_zoneid = zoneid; 2093 2094 mp = allocb(size, BPRI_HI); 2095 if (mp == NULL) { 2096 freemsg(first_mp); 2097 goto free_reclist; 2098 } 2099 bzero((char *)mp->b_rptr, size); 2100 mp->b_wptr = (uchar_t *)(mp->b_rptr + size); 2101 first_mp->b_cont = mp; 2102 2103 ipha = (ipha_t *)mp->b_rptr; 2104 rtralert = (uint8_t *)&(ipha[1]); 2105 igmp3ra = (igmp3ra_t *)&(rtralert[RTRALERT_LEN]); 2106 grphdr = (grphdra_t *)&(igmp3ra[1]); 2107 2108 rp = cur_reclist; 2109 for (i = 0; i < numrec; i++) { 2110 grphdr->grphdra_type = rp->mrec_type; 2111 grphdr->grphdra_numsrc = htons(rp->mrec_srcs.sl_numsrc); 2112 grphdr->grphdra_group = V4_PART_OF_V6(rp->mrec_group); 2113 src_array = (ipaddr_t *)&(grphdr[1]); 2114 2115 for (j = 0; j < rp->mrec_srcs.sl_numsrc; j++) 2116 src_array[j] = V4_PART_OF_V6(rp->mrec_srcs.sl_addr[j]); 2117 2118 grphdr = (grphdra_t *)&(src_array[j]); 2119 rp = rp->mrec_next; 2120 } 2121 2122 igmp3ra->igmp3ra_type = IGMP_V3_MEMBERSHIP_REPORT; 2123 igmp3ra->igmp3ra_numrec = htons(numrec); 2124 igmp3ra->igmp3ra_cksum = IP_CSUM(mp, hdrsize, 0); 2125 2126 rtralert[0] = IPOPT_COPY | IPOPT_RTRALERT; 2127 rtralert[1] = RTRALERT_LEN; 2128 rtralert[2] = 0; 2129 rtralert[3] = 0; 2130 2131 ipha->ipha_version_and_hdr_length = IP_VERSION << 4 2132 | (IP_SIMPLE_HDR_LENGTH_IN_WORDS + RTRALERT_LEN_IN_WORDS); 2133 ipha->ipha_type_of_service = IPTOS_PREC_INTERNETCONTROL; 2134 ipha->ipha_length = htons(size); 2135 ipha->ipha_ttl = IGMP_TTL; 2136 ipha->ipha_protocol = IPPROTO_IGMP; 2137 ipha->ipha_dst = htonl(INADDR_ALLRPTS_GROUP); 2138 ipha->ipha_src = ipif->ipif_src_addr; 2139 2140 /* 2141 * Request loopback of the report if we are acting as a multicast 2142 * router, so that the process-level routing daemon can hear it. 2143 * 2144 * This will run multiple times for the same group if there are 2145 * members on the same group for multiple ipifs on the same ill. 2146 * The igmp_input code will suppress this due to the loopback; 2147 * thus we always loopback membership report. 2148 */ 2149 ASSERT(ill->ill_rq != NULL); 2150 ip_multicast_loopback(ill->ill_rq, ill, mp, 0, ipif->ipif_zoneid); 2151 2152 ip_wput_multicast(ill->ill_wq, first_mp, ipif, zoneid); 2153 2154 ++ipst->ips_igmpstat.igps_snd_reports; 2155 2156 if (morepkts) { 2157 if (more_src_cnt > 0) { 2158 int index, mvsize; 2159 slist_t *sl = &next_reclist->mrec_srcs; 2160 index = sl->sl_numsrc; 2161 mvsize = more_src_cnt * sizeof (in6_addr_t); 2162 (void) memmove(&sl->sl_addr[0], &sl->sl_addr[index], 2163 mvsize); 2164 sl->sl_numsrc = more_src_cnt; 2165 } 2166 goto nextpkt; 2167 } 2168 2169 free_reclist: 2170 while (reclist != NULL) { 2171 rp = reclist->mrec_next; 2172 mi_free(reclist); 2173 reclist = rp; 2174 } 2175 } 2176 2177 /* 2178 * mld_input: 2179 */ 2180 /* ARGSUSED */ 2181 void 2182 mld_input(queue_t *q, mblk_t *mp, ill_t *ill) 2183 { 2184 ip6_t *ip6h = (ip6_t *)(mp->b_rptr); 2185 mld_hdr_t *mldh; 2186 ilm_t *ilm; 2187 ipif_t *ipif; 2188 uint16_t hdr_length, exthdr_length; 2189 in6_addr_t *v6group_ptr, *lcladdr_ptr; 2190 uint_t next; 2191 int mldlen; 2192 ip_stack_t *ipst = ill->ill_ipst; 2193 2194 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembTotal); 2195 2196 /* Make sure the src address of the packet is link-local */ 2197 if (!(IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src))) { 2198 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 2199 freemsg(mp); 2200 return; 2201 } 2202 2203 if (ip6h->ip6_hlim != 1) { 2204 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpBadHoplimit); 2205 freemsg(mp); 2206 return; 2207 } 2208 2209 /* Get to the icmp header part */ 2210 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) { 2211 hdr_length = ip_hdr_length_v6(mp, ip6h); 2212 exthdr_length = hdr_length - IPV6_HDR_LEN; 2213 } else { 2214 hdr_length = IPV6_HDR_LEN; 2215 exthdr_length = 0; 2216 } 2217 mldlen = ntohs(ip6h->ip6_plen) - exthdr_length; 2218 2219 /* An MLD packet must at least be 24 octets to be valid */ 2220 if (mldlen < MLD_MINLEN) { 2221 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 2222 freemsg(mp); 2223 return; 2224 } 2225 2226 mldh = (mld_hdr_t *)(&mp->b_rptr[hdr_length]); 2227 2228 switch (mldh->mld_type) { 2229 case MLD_LISTENER_QUERY: 2230 /* 2231 * packet length differentiates between v1 and v2. v1 2232 * query should be exactly 24 octets long; v2 is >= 28. 2233 */ 2234 if ((mldlen == MLD_MINLEN) || 2235 (ipst->ips_mld_max_version < MLD_V2_ROUTER)) { 2236 next = mld_query_in(mldh, ill); 2237 } else if (mldlen >= MLD_V2_QUERY_MINLEN) { 2238 next = mldv2_query_in((mld2q_t *)mldh, ill, mldlen); 2239 } else { 2240 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 2241 freemsg(mp); 2242 return; 2243 } 2244 if (next == 0) { 2245 freemsg(mp); 2246 return; 2247 } 2248 2249 if (next != INFINITY) 2250 mld_start_timers(next, ipst); 2251 break; 2252 2253 case MLD_LISTENER_REPORT: { 2254 2255 ASSERT(ill->ill_ipif != NULL); 2256 /* 2257 * For fast leave to work, we have to know that we are the 2258 * last person to send a report for this group. Reports 2259 * generated by us are looped back since we could potentially 2260 * be a multicast router, so discard reports sourced by me. 2261 */ 2262 lcladdr_ptr = &(ill->ill_ipif->ipif_v6subnet); 2263 mutex_enter(&ill->ill_lock); 2264 for (ipif = ill->ill_ipif; ipif != NULL; 2265 ipif = ipif->ipif_next) { 2266 if (IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, 2267 lcladdr_ptr)) { 2268 if (ip_debug > 1) { 2269 char buf1[INET6_ADDRSTRLEN]; 2270 char buf2[INET6_ADDRSTRLEN]; 2271 2272 (void) mi_strlog(ill->ill_rq, 2273 1, 2274 SL_TRACE, 2275 "mld_input: we are only " 2276 "member src %s ipif_local %s", 2277 inet_ntop(AF_INET6, lcladdr_ptr, 2278 buf1, sizeof (buf1)), 2279 inet_ntop(AF_INET6, 2280 &ipif->ipif_v6lcl_addr, 2281 buf2, sizeof (buf2))); 2282 } 2283 mutex_exit(&ill->ill_lock); 2284 freemsg(mp); 2285 return; 2286 } 2287 } 2288 mutex_exit(&ill->ill_lock); 2289 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembResponses); 2290 2291 v6group_ptr = &mldh->mld_addr; 2292 if (!IN6_IS_ADDR_MULTICAST(v6group_ptr)) { 2293 BUMP_MIB(ill->ill_icmp6_mib, 2294 ipv6IfIcmpInGroupMembBadReports); 2295 freemsg(mp); 2296 return; 2297 } 2298 2299 2300 /* 2301 * If we belong to the group being reported, and we are a 2302 * 'Delaying member' per the RFC terminology, stop our timer 2303 * for that group and 'clear flag' i.e. mark ilm_state as 2304 * IGMP_OTHERMEMBER. With zones, there can be multiple group 2305 * membership entries for the same group address (one per zone) 2306 * so we need to walk the ill_ilm list. 2307 */ 2308 mutex_enter(&ill->ill_lock); 2309 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 2310 if (!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group_ptr)) 2311 continue; 2312 BUMP_MIB(ill->ill_icmp6_mib, 2313 ipv6IfIcmpInGroupMembOurReports); 2314 2315 ilm->ilm_timer = INFINITY; 2316 ilm->ilm_state = IGMP_OTHERMEMBER; 2317 } 2318 mutex_exit(&ill->ill_lock); 2319 break; 2320 } 2321 case MLD_LISTENER_REDUCTION: 2322 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembReductions); 2323 break; 2324 } 2325 /* 2326 * All MLD packets have already been passed up to any 2327 * process(es) listening on a ICMP6 raw socket. This 2328 * has been accomplished in ip_deliver_local_v6 prior to 2329 * this function call. It is assumed that the multicast daemon 2330 * will have a SOCK_RAW IPPROTO_ICMPV6 (and presumbly use the 2331 * ICMP6_FILTER socket option to only receive the MLD messages) 2332 * Thus we can free the MLD message block here 2333 */ 2334 freemsg(mp); 2335 } 2336 2337 /* 2338 * Handles an MLDv1 Listener Query. Returns 0 on error, or the appropriate 2339 * (non-zero, unsigned) timer value to be set on success. 2340 */ 2341 static uint_t 2342 mld_query_in(mld_hdr_t *mldh, ill_t *ill) 2343 { 2344 ilm_t *ilm; 2345 int timer; 2346 uint_t next, current; 2347 in6_addr_t *v6group; 2348 2349 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembQueries); 2350 2351 /* 2352 * In the MLD specification, there are 3 states and a flag. 2353 * 2354 * In Non-Listener state, we simply don't have a membership record. 2355 * In Delaying state, our timer is running (ilm->ilm_timer < INFINITY) 2356 * In Idle Member state, our timer is not running (ilm->ilm_timer == 2357 * INFINITY) 2358 * 2359 * The flag is ilm->ilm_state, it is set to IGMP_OTHERMEMBER if 2360 * we have heard a report from another member, or IGMP_IREPORTEDLAST 2361 * if I sent the last report. 2362 */ 2363 v6group = &mldh->mld_addr; 2364 if (!(IN6_IS_ADDR_UNSPECIFIED(v6group)) && 2365 ((!IN6_IS_ADDR_MULTICAST(v6group)))) { 2366 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembBadQueries); 2367 return (0); 2368 } 2369 2370 /* Need to do compatibility mode checking */ 2371 mutex_enter(&ill->ill_lock); 2372 ill->ill_mcast_v1_time = 0; 2373 ill->ill_mcast_v1_tset = 1; 2374 if (ill->ill_mcast_type == MLD_V2_ROUTER) { 2375 ip1dbg(("Received MLDv1 Query on %s, switching mode to " 2376 "MLD_V1_ROUTER\n", ill->ill_name)); 2377 atomic_add_16(&ill->ill_ifptr->illif_mcast_v1, 1); 2378 ill->ill_mcast_type = MLD_V1_ROUTER; 2379 } 2380 mutex_exit(&ill->ill_lock); 2381 2382 timer = (int)ntohs(mldh->mld_maxdelay); 2383 if (ip_debug > 1) { 2384 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 2385 "mld_input: TIMER = mld_maxdelay %d mld_type 0x%x", 2386 timer, (int)mldh->mld_type); 2387 } 2388 2389 /* 2390 * -Start the timers in all of our membership records for 2391 * the physical interface on which the query arrived, 2392 * excl: 2393 * 1. those that belong to the "all hosts" group, 2394 * 2. those with 0 scope, or 1 node-local scope. 2395 * 2396 * -Restart any timer that is already running but has a value 2397 * longer that the requested timeout. 2398 * -Use the value specified in the query message as the 2399 * maximum timeout. 2400 */ 2401 next = INFINITY; 2402 mutex_enter(&ill->ill_lock); 2403 2404 current = CURRENT_MSTIME; 2405 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 2406 ASSERT(!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr)); 2407 2408 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr) || 2409 IN6_IS_ADDR_MC_NODELOCAL(&ilm->ilm_v6addr) || 2410 IN6_IS_ADDR_MC_RESERVED(&ilm->ilm_v6addr)) 2411 continue; 2412 if ((!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, 2413 &ipv6_all_hosts_mcast)) && 2414 (IN6_IS_ADDR_UNSPECIFIED(v6group)) || 2415 (IN6_ARE_ADDR_EQUAL(v6group, &ilm->ilm_v6addr))) { 2416 if (timer == 0) { 2417 /* Respond immediately */ 2418 ilm->ilm_timer = INFINITY; 2419 ilm->ilm_state = IGMP_IREPORTEDLAST; 2420 mutex_exit(&ill->ill_lock); 2421 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL); 2422 mutex_enter(&ill->ill_lock); 2423 break; 2424 } 2425 if (ilm->ilm_timer > timer) { 2426 MCAST_RANDOM_DELAY(ilm->ilm_timer, timer); 2427 if (ilm->ilm_timer < next) 2428 next = ilm->ilm_timer; 2429 ilm->ilm_timer += current; 2430 } 2431 break; 2432 } 2433 } 2434 mutex_exit(&ill->ill_lock); 2435 2436 return (next); 2437 } 2438 2439 /* 2440 * Handles an MLDv2 Listener Query. On error, returns 0; on success, 2441 * returns the appropriate (non-zero, unsigned) timer value (which may 2442 * be INFINITY) to be set. 2443 */ 2444 static uint_t 2445 mldv2_query_in(mld2q_t *mld2q, ill_t *ill, int mldlen) 2446 { 2447 ilm_t *ilm; 2448 in6_addr_t *v6group, *src_array; 2449 uint_t next, numsrc, i, mrd, delay, qqi, current; 2450 uint8_t qrv; 2451 2452 v6group = &mld2q->mld2q_addr; 2453 numsrc = ntohs(mld2q->mld2q_numsrc); 2454 2455 /* make sure numsrc matches packet size */ 2456 if (mldlen < MLD_V2_QUERY_MINLEN + (numsrc * sizeof (in6_addr_t))) { 2457 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 2458 return (0); 2459 } 2460 src_array = (in6_addr_t *)&mld2q[1]; 2461 2462 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembQueries); 2463 2464 /* extract Maximum Response Delay from code in header */ 2465 mrd = ntohs(mld2q->mld2q_mxrc); 2466 if (mrd >= MLD_V2_MAXRT_FPMIN) { 2467 uint_t hdrval, mant, exp; 2468 hdrval = mrd; 2469 mant = hdrval & MLD_V2_MAXRT_MANT_MASK; 2470 exp = (hdrval & MLD_V2_MAXRT_EXP_MASK) >> 12; 2471 mrd = (mant | 0x1000) << (exp + 3); 2472 } 2473 if (mrd == 0) 2474 mrd = DSEC_TO_MSEC(MCAST_DEF_QUERY_RESP_INTERVAL); 2475 2476 MCAST_RANDOM_DELAY(delay, mrd); 2477 next = (unsigned)INFINITY; 2478 current = CURRENT_MSTIME; 2479 2480 if ((qrv = mld2q->mld2q_sqrv & MLD_V2_RV_MASK) == 0) 2481 ill->ill_mcast_rv = MCAST_DEF_ROBUSTNESS; 2482 else 2483 ill->ill_mcast_rv = qrv; 2484 2485 if ((qqi = (uint_t)mld2q->mld2q_qqic) >= MLD_V2_QQI_FPMIN) { 2486 uint_t mant, exp; 2487 mant = qqi & MLD_V2_QQI_MANT_MASK; 2488 exp = (qqi & MLD_V2_QQI_EXP_MASK) >> 12; 2489 qqi = (mant | 0x10) << (exp + 3); 2490 } 2491 ill->ill_mcast_qi = (qqi == 0) ? MCAST_DEF_QUERY_INTERVAL : qqi; 2492 2493 /* 2494 * If we have a pending general query response that's scheduled 2495 * sooner than the delay we calculated for this response, then 2496 * no action is required (MLDv2 draft section 6.2 rule 1) 2497 */ 2498 mutex_enter(&ill->ill_lock); 2499 if (ill->ill_global_timer < (current + delay)) { 2500 mutex_exit(&ill->ill_lock); 2501 return (next); 2502 } 2503 mutex_exit(&ill->ill_lock); 2504 2505 /* 2506 * Now take action depending on query type: general, 2507 * group specific, or group/source specific. 2508 */ 2509 if ((numsrc == 0) && IN6_IS_ADDR_UNSPECIFIED(v6group)) { 2510 /* 2511 * general query 2512 * We know global timer is either not running or is 2513 * greater than our calculated delay, so reset it to 2514 * our delay (random value in range [0, response time]) 2515 */ 2516 mutex_enter(&ill->ill_lock); 2517 ill->ill_global_timer = current + delay; 2518 mutex_exit(&ill->ill_lock); 2519 next = delay; 2520 2521 } else { 2522 /* group or group/source specific query */ 2523 mutex_enter(&ill->ill_lock); 2524 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 2525 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr) || 2526 IN6_IS_ADDR_MC_NODELOCAL(&ilm->ilm_v6addr) || 2527 IN6_IS_ADDR_MC_RESERVED(&ilm->ilm_v6addr) || 2528 !IN6_ARE_ADDR_EQUAL(v6group, &ilm->ilm_v6addr)) 2529 continue; 2530 2531 /* 2532 * If the query is group specific or we have a 2533 * pending group specific query, the response is 2534 * group specific (pending sources list should be 2535 * empty). Otherwise, need to update the pending 2536 * sources list for the group and source specific 2537 * response. 2538 */ 2539 if (numsrc == 0 || (ilm->ilm_timer < INFINITY && 2540 SLIST_IS_EMPTY(ilm->ilm_pendsrcs))) { 2541 group_query: 2542 FREE_SLIST(ilm->ilm_pendsrcs); 2543 ilm->ilm_pendsrcs = NULL; 2544 } else { 2545 boolean_t overflow; 2546 slist_t *pktl; 2547 if (numsrc > MAX_FILTER_SIZE || 2548 (ilm->ilm_pendsrcs == NULL && 2549 (ilm->ilm_pendsrcs = l_alloc()) == NULL)) { 2550 /* 2551 * We've been sent more sources than 2552 * we can deal with; or we can't deal 2553 * with a source list at all. Revert 2554 * to a group specific query. 2555 */ 2556 goto group_query; 2557 } 2558 if ((pktl = l_alloc()) == NULL) 2559 goto group_query; 2560 pktl->sl_numsrc = numsrc; 2561 for (i = 0; i < numsrc; i++) 2562 pktl->sl_addr[i] = src_array[i]; 2563 l_union_in_a(ilm->ilm_pendsrcs, pktl, 2564 &overflow); 2565 l_free(pktl); 2566 if (overflow) 2567 goto group_query; 2568 } 2569 ilm->ilm_timer = (ilm->ilm_timer == INFINITY) ? 2570 INFINITY : (ilm->ilm_timer - current); 2571 /* set timer to soonest value */ 2572 ilm->ilm_timer = MIN(ilm->ilm_timer, delay); 2573 if (ilm->ilm_timer < next) 2574 next = ilm->ilm_timer; 2575 ilm->ilm_timer += current; 2576 break; 2577 } 2578 mutex_exit(&ill->ill_lock); 2579 } 2580 2581 return (next); 2582 } 2583 2584 /* 2585 * Send MLDv1 response packet with hoplimit 1 2586 */ 2587 static void 2588 mld_sendpkt(ilm_t *ilm, uchar_t type, const in6_addr_t *v6addr) 2589 { 2590 mblk_t *mp; 2591 mld_hdr_t *mldh; 2592 ip6_t *ip6h; 2593 ip6_hbh_t *ip6hbh; 2594 struct ip6_opt_router *ip6router; 2595 size_t size = IPV6_HDR_LEN + sizeof (mld_hdr_t); 2596 ill_t *ill = ilm->ilm_ill; /* Will be the "lower" ill */ 2597 ipif_t *ipif; 2598 ip6i_t *ip6i; 2599 2600 /* 2601 * We need to place a router alert option in this packet. The length 2602 * of the options must be a multiple of 8. The hbh option header is 2 2603 * bytes followed by the 4 byte router alert option. That leaves 2604 * 2 bytes of pad for a total of 8 bytes. 2605 */ 2606 const int router_alert_length = 8; 2607 2608 ASSERT(ill->ill_isv6); 2609 2610 /* 2611 * We need to make sure that this packet does not get load balanced. 2612 * So, we allocate an ip6i_t and set ATTACH_IF. ip_wput_v6 and 2613 * ip_newroute_ipif_v6 knows how to handle such packets. 2614 * If it gets load balanced, switches supporting MLD snooping 2615 * (in the future) will send the packet that it receives for this 2616 * multicast group to the interface that we are sending on. As we have 2617 * joined the multicast group on this ill, by sending the packet out 2618 * on this ill, we receive all the packets back on this ill. 2619 */ 2620 size += sizeof (ip6i_t) + router_alert_length; 2621 mp = allocb(size, BPRI_HI); 2622 if (mp == NULL) 2623 return; 2624 bzero(mp->b_rptr, size); 2625 mp->b_wptr = mp->b_rptr + size; 2626 2627 ip6i = (ip6i_t *)mp->b_rptr; 2628 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2629 ip6i->ip6i_nxt = IPPROTO_RAW; 2630 ip6i->ip6i_flags = IP6I_ATTACH_IF | IP6I_HOPLIMIT; 2631 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 2632 2633 ip6h = (ip6_t *)&ip6i[1]; 2634 ip6hbh = (struct ip6_hbh *)&ip6h[1]; 2635 ip6router = (struct ip6_opt_router *)&ip6hbh[1]; 2636 /* 2637 * A zero is a pad option of length 1. The bzero of the whole packet 2638 * above will pad between ip6router and mld. 2639 */ 2640 mldh = (mld_hdr_t *)((uint8_t *)ip6hbh + router_alert_length); 2641 2642 mldh->mld_type = type; 2643 mldh->mld_addr = ilm->ilm_v6addr; 2644 2645 ip6router->ip6or_type = IP6OPT_ROUTER_ALERT; 2646 ip6router->ip6or_len = 2; 2647 ip6router->ip6or_value[0] = 0; 2648 ip6router->ip6or_value[1] = IP6_ALERT_MLD; 2649 2650 ip6hbh->ip6h_nxt = IPPROTO_ICMPV6; 2651 ip6hbh->ip6h_len = 0; 2652 2653 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2654 ip6h->ip6_plen = htons(sizeof (*mldh) + router_alert_length); 2655 ip6h->ip6_nxt = IPPROTO_HOPOPTS; 2656 ip6h->ip6_hops = MLD_HOP_LIMIT; 2657 if (v6addr == NULL) 2658 ip6h->ip6_dst = ilm->ilm_v6addr; 2659 else 2660 ip6h->ip6_dst = *v6addr; 2661 2662 /* ipif returned by ipif_lookup_zoneid is link-local (if present) */ 2663 if (ipif_lookup_zoneid(ill, ilm->ilm_zoneid, IPIF_UP, &ipif)) { 2664 ip6h->ip6_src = ipif->ipif_v6src_addr; 2665 ipif_refrele(ipif); 2666 } else { 2667 /* Otherwise, use IPv6 default address selection. */ 2668 ip6h->ip6_src = ipv6_all_zeros; 2669 } 2670 2671 /* 2672 * Prepare for checksum by putting icmp length in the icmp 2673 * checksum field. The checksum is calculated in ip_wput_v6. 2674 */ 2675 mldh->mld_cksum = htons(sizeof (*mldh)); 2676 2677 /* 2678 * ip_wput will automatically loopback the multicast packet to 2679 * the conn if multicast loopback is enabled. 2680 * The MIB stats corresponding to this outgoing MLD packet 2681 * will be accounted for in ip_wput->ip_wput_v6->ip_wput_ire_v6 2682 * ->icmp_update_out_mib_v6 function call. 2683 */ 2684 (void) ip_output_v6(NULL, mp, ill->ill_wq, IP_WPUT); 2685 } 2686 2687 /* 2688 * Sends an MLD_V2_LISTENER_REPORT message out the passed-in ill. The 2689 * report will contain one multicast address record for each element of 2690 * reclist. If this causes packet length to exceed ill->ill_max_frag, 2691 * multiple reports are sent. reclist is assumed to be made up of 2692 * buffers allocated by mcast_bldmrec(), and those buffers are freed here. 2693 */ 2694 static void 2695 mldv2_sendrpt(ill_t *ill, mrec_t *reclist) 2696 { 2697 mblk_t *mp; 2698 mld2r_t *mld2r; 2699 mld2mar_t *mld2mar; 2700 in6_addr_t *srcarray; 2701 ip6_t *ip6h; 2702 ip6_hbh_t *ip6hbh; 2703 ip6i_t *ip6i; 2704 struct ip6_opt_router *ip6router; 2705 size_t size, optlen, padlen, icmpsize, rsize; 2706 ipif_t *ipif; 2707 int i, numrec, more_src_cnt; 2708 mrec_t *rp, *cur_reclist; 2709 mrec_t *next_reclist = reclist; 2710 boolean_t morepkts; 2711 2712 /* If there aren't any records, there's nothing to send */ 2713 if (reclist == NULL) 2714 return; 2715 2716 ASSERT(ill->ill_isv6); 2717 2718 /* 2719 * Total option length (optlen + padlen) must be a multiple of 2720 * 8 bytes. We assume here that optlen <= 8, so the total option 2721 * length will be 8. Assert this in case anything ever changes. 2722 */ 2723 optlen = sizeof (ip6_hbh_t) + sizeof (struct ip6_opt_router); 2724 ASSERT(optlen <= 8); 2725 padlen = 8 - optlen; 2726 nextpkt: 2727 icmpsize = sizeof (mld2r_t); 2728 size = IPV6_HDR_LEN + optlen + padlen + icmpsize; 2729 morepkts = B_FALSE; 2730 more_src_cnt = 0; 2731 for (rp = cur_reclist = next_reclist, numrec = 0; rp != NULL; 2732 rp = rp->mrec_next, numrec++) { 2733 rsize = sizeof (mld2mar_t) + 2734 (rp->mrec_srcs.sl_numsrc * sizeof (in6_addr_t)); 2735 if (size + rsize > ill->ill_max_frag) { 2736 if (rp == cur_reclist) { 2737 /* 2738 * If the first mrec we looked at is too big 2739 * to fit in a single packet (i.e the source 2740 * list is too big), we must either truncate 2741 * the list (if TO_EX or IS_EX), or send 2742 * multiple reports for the same group (all 2743 * other types). 2744 */ 2745 int srcspace, srcsperpkt; 2746 srcspace = ill->ill_max_frag - 2747 (size + sizeof (mld2mar_t)); 2748 srcsperpkt = srcspace / sizeof (in6_addr_t); 2749 /* 2750 * Increment icmpsize and size, because we will 2751 * be sending a record for the mrec we're 2752 * looking at now. 2753 */ 2754 rsize = sizeof (mld2mar_t) + 2755 (srcsperpkt * sizeof (in6_addr_t)); 2756 icmpsize += rsize; 2757 size += rsize; 2758 if (rp->mrec_type == MODE_IS_EXCLUDE || 2759 rp->mrec_type == CHANGE_TO_EXCLUDE) { 2760 rp->mrec_srcs.sl_numsrc = srcsperpkt; 2761 if (rp->mrec_next == NULL) { 2762 /* no more packets to send */ 2763 break; 2764 } else { 2765 /* 2766 * more packets, but we're 2767 * done with this mrec. 2768 */ 2769 next_reclist = rp->mrec_next; 2770 } 2771 } else { 2772 more_src_cnt = rp->mrec_srcs.sl_numsrc 2773 - srcsperpkt; 2774 rp->mrec_srcs.sl_numsrc = srcsperpkt; 2775 /* 2776 * We'll fix up this mrec (remove the 2777 * srcs we've already sent) before 2778 * returning to nextpkt above. 2779 */ 2780 next_reclist = rp; 2781 } 2782 } else { 2783 next_reclist = rp; 2784 } 2785 morepkts = B_TRUE; 2786 break; 2787 } 2788 icmpsize += rsize; 2789 size += rsize; 2790 } 2791 2792 /* 2793 * We need to make sure that this packet does not get load balanced. 2794 * So, we allocate an ip6i_t and set ATTACH_IF. ip_wput_v6 and 2795 * ip_newroute_ipif_v6 know how to handle such packets. 2796 * If it gets load balanced, switches supporting MLD snooping 2797 * (in the future) will send the packet that it receives for this 2798 * multicast group to the interface that we are sending on. As we have 2799 * joined the multicast group on this ill, by sending the packet out 2800 * on this ill, we receive all the packets back on this ill. 2801 */ 2802 size += sizeof (ip6i_t); 2803 mp = allocb(size, BPRI_HI); 2804 if (mp == NULL) 2805 goto free_reclist; 2806 bzero(mp->b_rptr, size); 2807 mp->b_wptr = mp->b_rptr + size; 2808 2809 ip6i = (ip6i_t *)mp->b_rptr; 2810 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2811 ip6i->ip6i_nxt = IPPROTO_RAW; 2812 ip6i->ip6i_flags = IP6I_ATTACH_IF; 2813 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 2814 2815 ip6h = (ip6_t *)&(ip6i[1]); 2816 ip6hbh = (ip6_hbh_t *)&(ip6h[1]); 2817 ip6router = (struct ip6_opt_router *)&(ip6hbh[1]); 2818 mld2r = (mld2r_t *)((uint8_t *)ip6hbh + optlen + padlen); 2819 mld2mar = (mld2mar_t *)&(mld2r[1]); 2820 2821 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2822 ip6h->ip6_plen = htons(optlen + padlen + icmpsize); 2823 ip6h->ip6_nxt = IPPROTO_HOPOPTS; 2824 ip6h->ip6_hops = MLD_HOP_LIMIT; 2825 ip6h->ip6_dst = ipv6_all_v2rtrs_mcast; 2826 /* ipif returned by ipif_lookup_zoneid is link-local (if present) */ 2827 if (ipif_lookup_zoneid(ill, ALL_ZONES, IPIF_UP, &ipif)) { 2828 ip6h->ip6_src = ipif->ipif_v6src_addr; 2829 ipif_refrele(ipif); 2830 } else { 2831 /* otherwise, use IPv6 default address selection. */ 2832 ip6h->ip6_src = ipv6_all_zeros; 2833 } 2834 2835 ip6hbh->ip6h_nxt = IPPROTO_ICMPV6; 2836 /* 2837 * ip6h_len is the number of 8-byte words, not including the first 2838 * 8 bytes; we've assumed optlen + padlen == 8 bytes; hence len = 0. 2839 */ 2840 ip6hbh->ip6h_len = 0; 2841 2842 ip6router->ip6or_type = IP6OPT_ROUTER_ALERT; 2843 ip6router->ip6or_len = 2; 2844 ip6router->ip6or_value[0] = 0; 2845 ip6router->ip6or_value[1] = IP6_ALERT_MLD; 2846 2847 mld2r->mld2r_type = MLD_V2_LISTENER_REPORT; 2848 mld2r->mld2r_nummar = htons(numrec); 2849 /* 2850 * Prepare for the checksum by putting icmp length in the icmp 2851 * checksum field. The checksum is calculated in ip_wput_v6. 2852 */ 2853 mld2r->mld2r_cksum = htons(icmpsize); 2854 2855 for (rp = cur_reclist; rp != NULL; rp = rp->mrec_next) { 2856 mld2mar->mld2mar_type = rp->mrec_type; 2857 mld2mar->mld2mar_auxlen = 0; 2858 mld2mar->mld2mar_numsrc = htons(rp->mrec_srcs.sl_numsrc); 2859 mld2mar->mld2mar_group = rp->mrec_group; 2860 srcarray = (in6_addr_t *)&(mld2mar[1]); 2861 2862 for (i = 0; i < rp->mrec_srcs.sl_numsrc; i++) 2863 srcarray[i] = rp->mrec_srcs.sl_addr[i]; 2864 2865 mld2mar = (mld2mar_t *)&(srcarray[i]); 2866 } 2867 2868 /* 2869 * ip_wput will automatically loopback the multicast packet to 2870 * the conn if multicast loopback is enabled. 2871 * The MIB stats corresponding to this outgoing MLD packet 2872 * will be accounted for in ip_wput->ip_wput_v6->ip_wput_ire_v6 2873 * ->icmp_update_out_mib_v6 function call. 2874 */ 2875 (void) ip_output_v6(NULL, mp, ill->ill_wq, IP_WPUT); 2876 2877 if (morepkts) { 2878 if (more_src_cnt > 0) { 2879 int index, mvsize; 2880 slist_t *sl = &next_reclist->mrec_srcs; 2881 index = sl->sl_numsrc; 2882 mvsize = more_src_cnt * sizeof (in6_addr_t); 2883 (void) memmove(&sl->sl_addr[0], &sl->sl_addr[index], 2884 mvsize); 2885 sl->sl_numsrc = more_src_cnt; 2886 } 2887 goto nextpkt; 2888 } 2889 2890 free_reclist: 2891 while (reclist != NULL) { 2892 rp = reclist->mrec_next; 2893 mi_free(reclist); 2894 reclist = rp; 2895 } 2896 } 2897 2898 static mrec_t * 2899 mcast_bldmrec(mcast_record_t type, in6_addr_t *grp, slist_t *srclist, 2900 mrec_t *next) 2901 { 2902 mrec_t *rp; 2903 int i; 2904 2905 if ((type == ALLOW_NEW_SOURCES || type == BLOCK_OLD_SOURCES) && 2906 SLIST_IS_EMPTY(srclist)) 2907 return (next); 2908 2909 rp = (mrec_t *)mi_alloc(sizeof (mrec_t), BPRI_HI); 2910 if (rp == NULL) 2911 return (next); 2912 2913 rp->mrec_next = next; 2914 rp->mrec_type = type; 2915 rp->mrec_auxlen = 0; 2916 rp->mrec_group = *grp; 2917 if (srclist == NULL) { 2918 rp->mrec_srcs.sl_numsrc = 0; 2919 } else { 2920 rp->mrec_srcs.sl_numsrc = srclist->sl_numsrc; 2921 for (i = 0; i < srclist->sl_numsrc; i++) 2922 rp->mrec_srcs.sl_addr[i] = srclist->sl_addr[i]; 2923 } 2924 2925 return (rp); 2926 } 2927 2928 /* 2929 * Set up initial retransmit state. If memory cannot be allocated for 2930 * the source lists, simply create as much state as is possible; memory 2931 * allocation failures are considered one type of transient error that 2932 * the retransmissions are designed to overcome (and if they aren't 2933 * transient, there are bigger problems than failing to notify the 2934 * router about multicast group membership state changes). 2935 */ 2936 static void 2937 mcast_init_rtx(ill_t *ill, rtx_state_t *rtxp, mcast_record_t rtype, 2938 slist_t *flist) 2939 { 2940 /* 2941 * There are only three possibilities for rtype: 2942 * New join, transition from INCLUDE {} to INCLUDE {flist} 2943 * => rtype is ALLOW_NEW_SOURCES 2944 * New join, transition from INCLUDE {} to EXCLUDE {flist} 2945 * => rtype is CHANGE_TO_EXCLUDE 2946 * State change that involves a filter mode change 2947 * => rtype is either CHANGE_TO_INCLUDE or CHANGE_TO_EXCLUDE 2948 */ 2949 ASSERT(rtype == CHANGE_TO_EXCLUDE || rtype == CHANGE_TO_INCLUDE || 2950 rtype == ALLOW_NEW_SOURCES); 2951 2952 rtxp->rtx_cnt = ill->ill_mcast_rv; 2953 2954 switch (rtype) { 2955 case CHANGE_TO_EXCLUDE: 2956 rtxp->rtx_fmode_cnt = ill->ill_mcast_rv; 2957 CLEAR_SLIST(rtxp->rtx_allow); 2958 COPY_SLIST(flist, rtxp->rtx_block); 2959 break; 2960 case ALLOW_NEW_SOURCES: 2961 case CHANGE_TO_INCLUDE: 2962 rtxp->rtx_fmode_cnt = 2963 rtype == ALLOW_NEW_SOURCES ? 0 : ill->ill_mcast_rv; 2964 CLEAR_SLIST(rtxp->rtx_block); 2965 COPY_SLIST(flist, rtxp->rtx_allow); 2966 break; 2967 } 2968 } 2969 2970 /* 2971 * The basic strategy here, as extrapolated from RFC 3810 section 6.1 and 2972 * RFC 3376 section 5.1, covers three cases: 2973 * * The current state change is a filter mode change 2974 * Set filter mode retransmit counter; set retransmit allow or 2975 * block list to new source list as appropriate, and clear the 2976 * retransmit list that was not set; send TO_IN or TO_EX with 2977 * new source list. 2978 * * The current state change is a source list change, but the filter 2979 * mode retransmit counter is > 0 2980 * Decrement filter mode retransmit counter; set retransmit 2981 * allow or block list to new source list as appropriate, 2982 * and clear the retransmit list that was not set; send TO_IN 2983 * or TO_EX with new source list. 2984 * * The current state change is a source list change, and the filter 2985 * mode retransmit counter is 0. 2986 * Merge existing rtx allow and block lists with new state: 2987 * rtx_allow = (new allow + rtx_allow) - new block 2988 * rtx_block = (new block + rtx_block) - new allow 2989 * Send ALLOW and BLOCK records for new retransmit lists; 2990 * decrement retransmit counter. 2991 * 2992 * As is the case for mcast_init_rtx(), memory allocation failures are 2993 * acceptable; we just create as much state as we can. 2994 */ 2995 static mrec_t * 2996 mcast_merge_rtx(ilm_t *ilm, mrec_t *mreclist, slist_t *flist) 2997 { 2998 ill_t *ill; 2999 rtx_state_t *rtxp = &ilm->ilm_rtx; 3000 mcast_record_t txtype; 3001 mrec_t *rp, *rpnext, *rtnmrec; 3002 boolean_t ovf; 3003 3004 ill = (ilm->ilm_ill == NULL ? ilm->ilm_ipif->ipif_ill : ilm->ilm_ill); 3005 3006 if (mreclist == NULL) 3007 return (mreclist); 3008 3009 /* 3010 * A filter mode change is indicated by a single mrec, which is 3011 * either TO_IN or TO_EX. In this case, we just need to set new 3012 * retransmit state as if this were an initial join. There is 3013 * no change to the mrec list. 3014 */ 3015 if (mreclist->mrec_type == CHANGE_TO_INCLUDE || 3016 mreclist->mrec_type == CHANGE_TO_EXCLUDE) { 3017 mcast_init_rtx(ill, rtxp, mreclist->mrec_type, 3018 &mreclist->mrec_srcs); 3019 return (mreclist); 3020 } 3021 3022 /* 3023 * Only the source list has changed 3024 */ 3025 rtxp->rtx_cnt = ill->ill_mcast_rv; 3026 if (rtxp->rtx_fmode_cnt > 0) { 3027 /* but we're still sending filter mode change reports */ 3028 rtxp->rtx_fmode_cnt--; 3029 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 3030 CLEAR_SLIST(rtxp->rtx_block); 3031 COPY_SLIST(flist, rtxp->rtx_allow); 3032 txtype = CHANGE_TO_INCLUDE; 3033 } else { 3034 CLEAR_SLIST(rtxp->rtx_allow); 3035 COPY_SLIST(flist, rtxp->rtx_block); 3036 txtype = CHANGE_TO_EXCLUDE; 3037 } 3038 /* overwrite first mrec with new info */ 3039 mreclist->mrec_type = txtype; 3040 l_copy(flist, &mreclist->mrec_srcs); 3041 /* then free any remaining mrecs */ 3042 for (rp = mreclist->mrec_next; rp != NULL; rp = rpnext) { 3043 rpnext = rp->mrec_next; 3044 mi_free(rp); 3045 } 3046 mreclist->mrec_next = NULL; 3047 rtnmrec = mreclist; 3048 } else { 3049 mrec_t *allow_mrec, *block_mrec; 3050 /* 3051 * Just send the source change reports; but we need to 3052 * recalculate the ALLOW and BLOCK lists based on previous 3053 * state and new changes. 3054 */ 3055 rtnmrec = mreclist; 3056 allow_mrec = block_mrec = NULL; 3057 for (rp = mreclist; rp != NULL; rp = rp->mrec_next) { 3058 ASSERT(rp->mrec_type == ALLOW_NEW_SOURCES || 3059 rp->mrec_type == BLOCK_OLD_SOURCES); 3060 if (rp->mrec_type == ALLOW_NEW_SOURCES) 3061 allow_mrec = rp; 3062 else 3063 block_mrec = rp; 3064 } 3065 /* 3066 * Perform calculations: 3067 * new_allow = mrec_allow + (rtx_allow - mrec_block) 3068 * new_block = mrec_block + (rtx_block - mrec_allow) 3069 * 3070 * Each calc requires two steps, for example: 3071 * rtx_allow = rtx_allow - mrec_block; 3072 * new_allow = mrec_allow + rtx_allow; 3073 * 3074 * Store results in mrec lists, and then copy into rtx lists. 3075 * We do it in this order in case the rtx list hasn't been 3076 * alloc'd yet; if it hasn't and our alloc fails, that's okay, 3077 * Overflows are also okay. 3078 */ 3079 if (block_mrec != NULL) { 3080 l_difference_in_a(rtxp->rtx_allow, 3081 &block_mrec->mrec_srcs); 3082 } 3083 if (allow_mrec != NULL) { 3084 l_difference_in_a(rtxp->rtx_block, 3085 &allow_mrec->mrec_srcs); 3086 l_union_in_a(&allow_mrec->mrec_srcs, rtxp->rtx_allow, 3087 &ovf); 3088 } 3089 if (block_mrec != NULL) { 3090 l_union_in_a(&block_mrec->mrec_srcs, rtxp->rtx_block, 3091 &ovf); 3092 COPY_SLIST(&block_mrec->mrec_srcs, rtxp->rtx_block); 3093 } else { 3094 rtnmrec = mcast_bldmrec(BLOCK_OLD_SOURCES, 3095 &ilm->ilm_v6addr, rtxp->rtx_block, allow_mrec); 3096 } 3097 if (allow_mrec != NULL) { 3098 COPY_SLIST(&allow_mrec->mrec_srcs, rtxp->rtx_allow); 3099 } else { 3100 rtnmrec = mcast_bldmrec(ALLOW_NEW_SOURCES, 3101 &ilm->ilm_v6addr, rtxp->rtx_allow, block_mrec); 3102 } 3103 } 3104 3105 return (rtnmrec); 3106 } 3107