1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 /* 30 * Internet Group Management Protocol (IGMP) routines. 31 * Multicast Listener Discovery Protocol (MLD) routines. 32 * 33 * Written by Steve Deering, Stanford, May 1988. 34 * Modified by Rosen Sharma, Stanford, Aug 1994. 35 * Modified by Bill Fenner, Xerox PARC, Feb. 1995. 36 * 37 * MULTICAST 3.5.1.1 38 */ 39 40 #include <sys/types.h> 41 #include <sys/stream.h> 42 #include <sys/stropts.h> 43 #include <sys/strlog.h> 44 #include <sys/strsun.h> 45 #include <sys/systm.h> 46 #include <sys/ddi.h> 47 #include <sys/sunddi.h> 48 #include <sys/cmn_err.h> 49 #include <sys/atomic.h> 50 #include <sys/zone.h> 51 52 #include <sys/param.h> 53 #include <sys/socket.h> 54 #include <inet/ipclassifier.h> 55 #include <net/if.h> 56 #include <net/route.h> 57 #include <netinet/in.h> 58 #include <netinet/igmp_var.h> 59 #include <netinet/ip6.h> 60 #include <netinet/icmp6.h> 61 62 #include <inet/common.h> 63 #include <inet/mi.h> 64 #include <inet/nd.h> 65 #include <inet/ip.h> 66 #include <inet/ip6.h> 67 #include <inet/ip_multi.h> 68 #include <inet/ip_listutils.h> 69 70 #include <netinet/igmp.h> 71 #include <inet/ip_if.h> 72 #include <net/pfkeyv2.h> 73 #include <inet/ipsec_info.h> 74 75 static uint_t igmp_query_in(ipha_t *ipha, igmpa_t *igmpa, ill_t *ill); 76 static uint_t igmpv3_query_in(igmp3qa_t *igmp3qa, ill_t *ill, int igmplen); 77 static uint_t mld_query_in(mld_hdr_t *mldh, ill_t *ill); 78 static uint_t mldv2_query_in(mld2q_t *mld2q, ill_t *ill, int mldlen); 79 static void igmp_sendpkt(ilm_t *ilm, uchar_t type, ipaddr_t addr); 80 static void mld_sendpkt(ilm_t *ilm, uchar_t type, const in6_addr_t *v6addr); 81 static void igmpv3_sendrpt(ipif_t *ipif, mrec_t *reclist); 82 static void mldv2_sendrpt(ill_t *ill, mrec_t *reclist); 83 static mrec_t *mcast_bldmrec(mcast_record_t type, in6_addr_t *grp, 84 slist_t *srclist, mrec_t *next); 85 static void mcast_init_rtx(ill_t *ill, rtx_state_t *rtxp, 86 mcast_record_t rtype, slist_t *flist); 87 static mrec_t *mcast_merge_rtx(ilm_t *ilm, mrec_t *rp, slist_t *flist); 88 89 90 /* 91 * Macros used to do timer len conversions. Timer values are always 92 * stored and passed to the timer functions as milliseconds; but the 93 * default values and values from the wire may not be. 94 * 95 * And yes, it's obscure, but decisecond is easier to abbreviate than 96 * "tenths of a second". 97 */ 98 #define DSEC_TO_MSEC(dsec) ((dsec) * 100) 99 #define SEC_TO_MSEC(sec) ((sec) * 1000) 100 101 /* 102 * A running timer (scheduled thru timeout) can be cancelled if another 103 * timer with a shorter timeout value is scheduled before it has timed 104 * out. When the shorter timer expires, the original timer is updated 105 * to account for the time elapsed while the shorter timer ran; but this 106 * does not take into account the amount of time already spent in timeout 107 * state before being preempted by the shorter timer, that is the time 108 * interval between time scheduled to time cancelled. This can cause 109 * delays in sending out multicast membership reports. To resolve this 110 * problem, wallclock time (absolute time) is used instead of deltas 111 * (relative time) to track timers. 112 * 113 * The MACRO below gets the lbolt value, used for proper timer scheduling 114 * and firing. Therefore multicast membership reports are sent on time. 115 * The timer does not exactly fire at the time it was scehduled to fire, 116 * there is a difference of a few milliseconds observed. An offset is used 117 * to take care of the difference. 118 */ 119 120 #define CURRENT_MSTIME ((uint_t)TICK_TO_MSEC(ddi_get_lbolt())) 121 #define CURRENT_OFFSET (999) 122 123 /* 124 * The first multicast join will trigger the igmp timers / mld timers 125 * The unit for next is milliseconds. 126 */ 127 void 128 igmp_start_timers(unsigned next, ip_stack_t *ipst) 129 { 130 int time_left; 131 int ret; 132 133 ASSERT(next != 0 && next != INFINITY); 134 135 mutex_enter(&ipst->ips_igmp_timer_lock); 136 137 if (ipst->ips_igmp_timer_setter_active) { 138 /* 139 * Serialize timer setters, one at a time. If the 140 * timer is currently being set by someone, 141 * just record the next time when it has to be 142 * invoked and return. The current setter will 143 * take care. 144 */ 145 ipst->ips_igmp_time_to_next = 146 MIN(ipst->ips_igmp_time_to_next, next); 147 mutex_exit(&ipst->ips_igmp_timer_lock); 148 return; 149 } else { 150 ipst->ips_igmp_timer_setter_active = B_TRUE; 151 } 152 if (ipst->ips_igmp_timeout_id == 0) { 153 /* 154 * The timer is inactive. We need to start a timer 155 */ 156 ipst->ips_igmp_time_to_next = next; 157 ipst->ips_igmp_timeout_id = timeout(igmp_timeout_handler, 158 (void *)ipst, MSEC_TO_TICK(ipst->ips_igmp_time_to_next)); 159 ipst->ips_igmp_timer_scheduled_last = ddi_get_lbolt(); 160 ipst->ips_igmp_timer_setter_active = B_FALSE; 161 mutex_exit(&ipst->ips_igmp_timer_lock); 162 return; 163 } 164 165 /* 166 * The timer was scheduled sometime back for firing in 167 * 'igmp_time_to_next' ms and is active. We need to 168 * reschedule the timeout if the new 'next' will happen 169 * earlier than the currently scheduled timeout 170 */ 171 time_left = ipst->ips_igmp_timer_scheduled_last + 172 MSEC_TO_TICK(ipst->ips_igmp_time_to_next) - ddi_get_lbolt(); 173 if (time_left < MSEC_TO_TICK(next)) { 174 ipst->ips_igmp_timer_setter_active = B_FALSE; 175 mutex_exit(&ipst->ips_igmp_timer_lock); 176 return; 177 } 178 179 mutex_exit(&ipst->ips_igmp_timer_lock); 180 ret = untimeout(ipst->ips_igmp_timeout_id); 181 mutex_enter(&ipst->ips_igmp_timer_lock); 182 /* 183 * The timeout was cancelled, or the timeout handler 184 * completed, while we were blocked in the untimeout. 185 * No other thread could have set the timer meanwhile 186 * since we serialized all the timer setters. Thus 187 * no timer is currently active nor executing nor will 188 * any timer fire in the future. We start the timer now 189 * if needed. 190 */ 191 if (ret == -1) { 192 ASSERT(ipst->ips_igmp_timeout_id == 0); 193 } else { 194 ASSERT(ipst->ips_igmp_timeout_id != 0); 195 ipst->ips_igmp_timeout_id = 0; 196 } 197 if (ipst->ips_igmp_time_to_next != 0) { 198 ipst->ips_igmp_time_to_next = 199 MIN(ipst->ips_igmp_time_to_next, next); 200 ipst->ips_igmp_timeout_id = timeout(igmp_timeout_handler, 201 (void *)ipst, MSEC_TO_TICK(ipst->ips_igmp_time_to_next)); 202 ipst->ips_igmp_timer_scheduled_last = ddi_get_lbolt(); 203 } 204 ipst->ips_igmp_timer_setter_active = B_FALSE; 205 mutex_exit(&ipst->ips_igmp_timer_lock); 206 } 207 208 /* 209 * mld_start_timers: 210 * The unit for next is milliseconds. 211 */ 212 void 213 mld_start_timers(unsigned next, ip_stack_t *ipst) 214 { 215 int time_left; 216 int ret; 217 218 ASSERT(next != 0 && next != INFINITY); 219 220 mutex_enter(&ipst->ips_mld_timer_lock); 221 if (ipst->ips_mld_timer_setter_active) { 222 /* 223 * Serialize timer setters, one at a time. If the 224 * timer is currently being set by someone, 225 * just record the next time when it has to be 226 * invoked and return. The current setter will 227 * take care. 228 */ 229 ipst->ips_mld_time_to_next = 230 MIN(ipst->ips_mld_time_to_next, next); 231 mutex_exit(&ipst->ips_mld_timer_lock); 232 return; 233 } else { 234 ipst->ips_mld_timer_setter_active = B_TRUE; 235 } 236 if (ipst->ips_mld_timeout_id == 0) { 237 /* 238 * The timer is inactive. We need to start a timer 239 */ 240 ipst->ips_mld_time_to_next = next; 241 ipst->ips_mld_timeout_id = timeout(mld_timeout_handler, 242 (void *)ipst, MSEC_TO_TICK(ipst->ips_mld_time_to_next)); 243 ipst->ips_mld_timer_scheduled_last = ddi_get_lbolt(); 244 ipst->ips_mld_timer_setter_active = B_FALSE; 245 mutex_exit(&ipst->ips_mld_timer_lock); 246 return; 247 } 248 249 /* 250 * The timer was scheduled sometime back for firing in 251 * 'igmp_time_to_next' ms and is active. We need to 252 * reschedule the timeout if the new 'next' will happen 253 * earlier than the currently scheduled timeout 254 */ 255 time_left = ipst->ips_mld_timer_scheduled_last + 256 MSEC_TO_TICK(ipst->ips_mld_time_to_next) - ddi_get_lbolt(); 257 if (time_left < MSEC_TO_TICK(next)) { 258 ipst->ips_mld_timer_setter_active = B_FALSE; 259 mutex_exit(&ipst->ips_mld_timer_lock); 260 return; 261 } 262 263 mutex_exit(&ipst->ips_mld_timer_lock); 264 ret = untimeout(ipst->ips_mld_timeout_id); 265 mutex_enter(&ipst->ips_mld_timer_lock); 266 /* 267 * The timeout was cancelled, or the timeout handler 268 * completed, while we were blocked in the untimeout. 269 * No other thread could have set the timer meanwhile 270 * since we serialized all the timer setters. Thus 271 * no timer is currently active nor executing nor will 272 * any timer fire in the future. We start the timer now 273 * if needed. 274 */ 275 if (ret == -1) { 276 ASSERT(ipst->ips_mld_timeout_id == 0); 277 } else { 278 ASSERT(ipst->ips_mld_timeout_id != 0); 279 ipst->ips_mld_timeout_id = 0; 280 } 281 if (ipst->ips_mld_time_to_next != 0) { 282 ipst->ips_mld_time_to_next = 283 MIN(ipst->ips_mld_time_to_next, next); 284 ipst->ips_mld_timeout_id = timeout(mld_timeout_handler, 285 (void *)ipst, MSEC_TO_TICK(ipst->ips_mld_time_to_next)); 286 ipst->ips_mld_timer_scheduled_last = ddi_get_lbolt(); 287 } 288 ipst->ips_mld_timer_setter_active = B_FALSE; 289 mutex_exit(&ipst->ips_mld_timer_lock); 290 } 291 292 /* 293 * igmp_input: 294 * Return NULL for a bad packet that is discarded here. 295 * Return mp if the message is OK and should be handed to "raw" receivers. 296 * Callers of igmp_input() may need to reinitialize variables that were copied 297 * from the mblk as this calls pullupmsg(). 298 */ 299 /* ARGSUSED */ 300 mblk_t * 301 igmp_input(queue_t *q, mblk_t *mp, ill_t *ill) 302 { 303 igmpa_t *igmpa; 304 ipha_t *ipha = (ipha_t *)(mp->b_rptr); 305 int iphlen, igmplen, mblklen; 306 ilm_t *ilm; 307 uint32_t src, dst; 308 uint32_t group; 309 uint_t next; 310 ipif_t *ipif; 311 ip_stack_t *ipst; 312 313 ASSERT(ill != NULL); 314 ASSERT(!ill->ill_isv6); 315 ipst = ill->ill_ipst; 316 ++ipst->ips_igmpstat.igps_rcv_total; 317 318 mblklen = MBLKL(mp); 319 if (mblklen < 1 || mblklen < (iphlen = IPH_HDR_LENGTH(ipha))) { 320 ++ipst->ips_igmpstat.igps_rcv_tooshort; 321 goto bad_pkt; 322 } 323 igmplen = ntohs(ipha->ipha_length) - iphlen; 324 /* 325 * Since msg sizes are more variable with v3, just pullup the 326 * whole thing now. 327 */ 328 if (MBLKL(mp) < (igmplen + iphlen)) { 329 mblk_t *mp1; 330 if ((mp1 = msgpullup(mp, -1)) == NULL) { 331 ++ipst->ips_igmpstat.igps_rcv_tooshort; 332 goto bad_pkt; 333 } 334 freemsg(mp); 335 mp = mp1; 336 ipha = (ipha_t *)(mp->b_rptr); 337 } 338 339 /* 340 * Validate lengths 341 */ 342 if (igmplen < IGMP_MINLEN) { 343 ++ipst->ips_igmpstat.igps_rcv_tooshort; 344 goto bad_pkt; 345 } 346 /* 347 * Validate checksum 348 */ 349 if (IP_CSUM(mp, iphlen, 0)) { 350 ++ipst->ips_igmpstat.igps_rcv_badsum; 351 goto bad_pkt; 352 } 353 354 igmpa = (igmpa_t *)(&mp->b_rptr[iphlen]); 355 src = ipha->ipha_src; 356 dst = ipha->ipha_dst; 357 if (ip_debug > 1) 358 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 359 "igmp_input: src 0x%x, dst 0x%x on %s\n", 360 (int)ntohl(src), (int)ntohl(dst), 361 ill->ill_name); 362 363 switch (igmpa->igmpa_type) { 364 case IGMP_MEMBERSHIP_QUERY: 365 /* 366 * packet length differentiates between v1/v2 and v3 367 * v1/v2 should be exactly 8 octets long; v3 is >= 12 368 */ 369 if ((igmplen == IGMP_MINLEN) || 370 (ipst->ips_igmp_max_version <= IGMP_V2_ROUTER)) { 371 next = igmp_query_in(ipha, igmpa, ill); 372 } else if (igmplen >= IGMP_V3_QUERY_MINLEN) { 373 next = igmpv3_query_in((igmp3qa_t *)igmpa, ill, 374 igmplen); 375 } else { 376 ++ipst->ips_igmpstat.igps_rcv_tooshort; 377 goto bad_pkt; 378 } 379 if (next == 0) 380 goto bad_pkt; 381 382 if (next != INFINITY) 383 igmp_start_timers(next, ipst); 384 385 break; 386 387 case IGMP_V1_MEMBERSHIP_REPORT: 388 case IGMP_V2_MEMBERSHIP_REPORT: 389 /* 390 * For fast leave to work, we have to know that we are the 391 * last person to send a report for this group. Reports 392 * generated by us are looped back since we could potentially 393 * be a multicast router, so discard reports sourced by me. 394 */ 395 mutex_enter(&ill->ill_lock); 396 for (ipif = ill->ill_ipif; ipif != NULL; 397 ipif = ipif->ipif_next) { 398 if (ipif->ipif_lcl_addr == src) { 399 if (ip_debug > 1) { 400 (void) mi_strlog(ill->ill_rq, 401 1, 402 SL_TRACE, 403 "igmp_input: we are only " 404 "member src 0x%x ipif_local 0x%x", 405 (int)ntohl(src), 406 (int) 407 ntohl(ipif->ipif_lcl_addr)); 408 } 409 mutex_exit(&ill->ill_lock); 410 return (mp); 411 } 412 } 413 mutex_exit(&ill->ill_lock); 414 415 ++ipst->ips_igmpstat.igps_rcv_reports; 416 group = igmpa->igmpa_group; 417 if (!CLASSD(group)) { 418 ++ipst->ips_igmpstat.igps_rcv_badreports; 419 goto bad_pkt; 420 } 421 422 /* 423 * KLUDGE: if the IP source address of the report has an 424 * unspecified (i.e., zero) subnet number, as is allowed for 425 * a booting host, replace it with the correct subnet number 426 * so that a process-level multicast routing demon can 427 * determine which subnet it arrived from. This is necessary 428 * to compensate for the lack of any way for a process to 429 * determine the arrival interface of an incoming packet. 430 * 431 * Requires that a copy of *this* message it passed up 432 * to the raw interface which is done by our caller. 433 */ 434 if ((src & htonl(0xFF000000U)) == 0) { /* Minimum net mask */ 435 /* Pick the first ipif on this ill */ 436 mutex_enter(&ill->ill_lock); 437 src = ill->ill_ipif->ipif_subnet; 438 mutex_exit(&ill->ill_lock); 439 ip1dbg(("igmp_input: changed src to 0x%x\n", 440 (int)ntohl(src))); 441 ipha->ipha_src = src; 442 } 443 444 /* 445 * If we belong to the group being reported, and 446 * we are a 'Delaying member' in the RFC terminology, 447 * stop our timer for that group and 'clear flag' i.e. 448 * mark as IGMP_OTHERMEMBER. Do this for all logical 449 * interfaces on the given physical interface. 450 */ 451 mutex_enter(&ill->ill_lock); 452 for (ipif = ill->ill_ipif; ipif != NULL; 453 ipif = ipif->ipif_next) { 454 ilm = ilm_lookup_ipif(ipif, group); 455 if (ilm != NULL) { 456 ++ipst->ips_igmpstat.igps_rcv_ourreports; 457 ilm->ilm_timer = INFINITY; 458 ilm->ilm_state = IGMP_OTHERMEMBER; 459 } 460 } /* for */ 461 mutex_exit(&ill->ill_lock); 462 break; 463 464 case IGMP_V3_MEMBERSHIP_REPORT: 465 /* 466 * Currently nothing to do here; IGMP router is not 467 * implemented in ip, and v3 hosts don't pay attention 468 * to membership reports. 469 */ 470 break; 471 } 472 /* 473 * Pass all valid IGMP packets up to any process(es) listening 474 * on a raw IGMP socket. Do not free the packet. 475 */ 476 return (mp); 477 478 bad_pkt: 479 freemsg(mp); 480 return (NULL); 481 } 482 483 static uint_t 484 igmp_query_in(ipha_t *ipha, igmpa_t *igmpa, ill_t *ill) 485 { 486 ilm_t *ilm; 487 int timer; 488 uint_t next, current; 489 ip_stack_t *ipst; 490 491 ipst = ill->ill_ipst; 492 ++ipst->ips_igmpstat.igps_rcv_queries; 493 494 /* 495 * In the IGMPv2 specification, there are 3 states and a flag. 496 * 497 * In Non-Member state, we simply don't have a membership record. 498 * In Delaying Member state, our timer is running (ilm->ilm_timer 499 * < INFINITY). In Idle Member state, our timer is not running 500 * (ilm->ilm_timer == INFINITY). 501 * 502 * The flag is ilm->ilm_state, it is set to IGMP_OTHERMEMBER if 503 * we have heard a report from another member, or IGMP_IREPORTEDLAST 504 * if I sent the last report. 505 */ 506 if ((igmpa->igmpa_code == 0) || 507 (ipst->ips_igmp_max_version == IGMP_V1_ROUTER)) { 508 /* 509 * Query from an old router. 510 * Remember that the querier on this interface is old, 511 * and set the timer to the value in RFC 1112. 512 */ 513 514 515 mutex_enter(&ill->ill_lock); 516 ill->ill_mcast_v1_time = 0; 517 ill->ill_mcast_v1_tset = 1; 518 if (ill->ill_mcast_type != IGMP_V1_ROUTER) { 519 ip1dbg(("Received IGMPv1 Query on %s, switching mode " 520 "to IGMP_V1_ROUTER\n", ill->ill_name)); 521 atomic_add_16(&ill->ill_ifptr->illif_mcast_v1, 1); 522 ill->ill_mcast_type = IGMP_V1_ROUTER; 523 } 524 mutex_exit(&ill->ill_lock); 525 526 timer = SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY); 527 528 if (ipha->ipha_dst != htonl(INADDR_ALLHOSTS_GROUP) || 529 igmpa->igmpa_group != 0) { 530 ++ipst->ips_igmpstat.igps_rcv_badqueries; 531 return (0); 532 } 533 534 } else { 535 in_addr_t group; 536 537 /* 538 * Query from a new router 539 * Simply do a validity check 540 */ 541 group = igmpa->igmpa_group; 542 if (group != 0 && (!CLASSD(group))) { 543 ++ipst->ips_igmpstat.igps_rcv_badqueries; 544 return (0); 545 } 546 547 /* 548 * Switch interface state to v2 on receipt of a v2 query 549 * ONLY IF current state is v3. Let things be if current 550 * state if v1 but do reset the v2-querier-present timer. 551 */ 552 mutex_enter(&ill->ill_lock); 553 if (ill->ill_mcast_type == IGMP_V3_ROUTER) { 554 ip1dbg(("Received IGMPv2 Query on %s, switching mode " 555 "to IGMP_V2_ROUTER", ill->ill_name)); 556 atomic_add_16(&ill->ill_ifptr->illif_mcast_v2, 1); 557 ill->ill_mcast_type = IGMP_V2_ROUTER; 558 } 559 ill->ill_mcast_v2_time = 0; 560 ill->ill_mcast_v2_tset = 1; 561 mutex_exit(&ill->ill_lock); 562 563 timer = DSEC_TO_MSEC((int)igmpa->igmpa_code); 564 } 565 566 if (ip_debug > 1) { 567 mutex_enter(&ill->ill_lock); 568 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 569 "igmp_input: TIMER = igmp_code %d igmp_type 0x%x", 570 (int)ntohs(igmpa->igmpa_code), 571 (int)ntohs(igmpa->igmpa_type)); 572 mutex_exit(&ill->ill_lock); 573 } 574 575 /* 576 * -Start the timers in all of our membership records 577 * for the physical interface on which the query 578 * arrived, excluding those that belong to the "all 579 * hosts" group (224.0.0.1). 580 * 581 * -Restart any timer that is already running but has 582 * a value longer than the requested timeout. 583 * 584 * -Use the value specified in the query message as 585 * the maximum timeout. 586 */ 587 next = (unsigned)INFINITY; 588 mutex_enter(&ill->ill_lock); 589 590 current = CURRENT_MSTIME; 591 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 592 593 /* 594 * A multicast router joins INADDR_ANY address 595 * to enable promiscuous reception of all 596 * mcasts from the interface. This INADDR_ANY 597 * is stored in the ilm_v6addr as V6 unspec addr 598 */ 599 if (!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr)) 600 continue; 601 if (ilm->ilm_addr == htonl(INADDR_ANY)) 602 continue; 603 if (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP) && 604 (igmpa->igmpa_group == 0) || 605 (igmpa->igmpa_group == ilm->ilm_addr)) { 606 if (ilm->ilm_timer > timer) { 607 MCAST_RANDOM_DELAY(ilm->ilm_timer, timer); 608 if (ilm->ilm_timer < next) 609 next = ilm->ilm_timer; 610 ilm->ilm_timer += current; 611 } 612 } 613 } 614 mutex_exit(&ill->ill_lock); 615 616 return (next); 617 } 618 619 static uint_t 620 igmpv3_query_in(igmp3qa_t *igmp3qa, ill_t *ill, int igmplen) 621 { 622 uint_t i, next, mrd, qqi, timer, delay, numsrc; 623 uint_t current; 624 ilm_t *ilm; 625 ipaddr_t *src_array; 626 uint8_t qrv; 627 ip_stack_t *ipst; 628 629 ipst = ill->ill_ipst; 630 /* make sure numsrc matches packet size */ 631 numsrc = ntohs(igmp3qa->igmp3qa_numsrc); 632 if (igmplen < IGMP_V3_QUERY_MINLEN + (numsrc * sizeof (ipaddr_t))) { 633 ++ipst->ips_igmpstat.igps_rcv_tooshort; 634 return (0); 635 } 636 src_array = (ipaddr_t *)&igmp3qa[1]; 637 638 ++ipst->ips_igmpstat.igps_rcv_queries; 639 640 if ((mrd = (uint_t)igmp3qa->igmp3qa_mxrc) >= IGMP_V3_MAXRT_FPMIN) { 641 uint_t hdrval, mant, exp; 642 hdrval = (uint_t)igmp3qa->igmp3qa_mxrc; 643 mant = hdrval & IGMP_V3_MAXRT_MANT_MASK; 644 exp = (hdrval & IGMP_V3_MAXRT_EXP_MASK) >> 4; 645 mrd = (mant | 0x10) << (exp + 3); 646 } 647 if (mrd == 0) 648 mrd = MCAST_DEF_QUERY_RESP_INTERVAL; 649 timer = DSEC_TO_MSEC(mrd); 650 MCAST_RANDOM_DELAY(delay, timer); 651 next = (unsigned)INFINITY; 652 current = CURRENT_MSTIME; 653 654 if ((qrv = igmp3qa->igmp3qa_sqrv & IGMP_V3_RV_MASK) == 0) 655 ill->ill_mcast_rv = MCAST_DEF_ROBUSTNESS; 656 else 657 ill->ill_mcast_rv = qrv; 658 659 if ((qqi = (uint_t)igmp3qa->igmp3qa_qqic) >= IGMP_V3_QQI_FPMIN) { 660 uint_t hdrval, mant, exp; 661 hdrval = (uint_t)igmp3qa->igmp3qa_qqic; 662 mant = hdrval & IGMP_V3_QQI_MANT_MASK; 663 exp = (hdrval & IGMP_V3_QQI_EXP_MASK) >> 4; 664 qqi = (mant | 0x10) << (exp + 3); 665 } 666 ill->ill_mcast_qi = (qqi == 0) ? MCAST_DEF_QUERY_INTERVAL : qqi; 667 668 /* 669 * If we have a pending general query response that's scheduled 670 * sooner than the delay we calculated for this response, then 671 * no action is required (RFC3376 section 5.2 rule 1) 672 */ 673 mutex_enter(&ill->ill_lock); 674 if (ill->ill_global_timer < (current + delay)) { 675 mutex_exit(&ill->ill_lock); 676 return (next); 677 } 678 mutex_exit(&ill->ill_lock); 679 680 /* 681 * Now take action depending upon query type: 682 * general, group specific, or group/source specific. 683 */ 684 if ((numsrc == 0) && (igmp3qa->igmp3qa_group == INADDR_ANY)) { 685 /* 686 * general query 687 * We know global timer is either not running or is 688 * greater than our calculated delay, so reset it to 689 * our delay (random value in range [0, response time]). 690 */ 691 mutex_enter(&ill->ill_lock); 692 ill->ill_global_timer = current + delay; 693 mutex_exit(&ill->ill_lock); 694 next = delay; 695 696 } else { 697 /* group or group/source specific query */ 698 mutex_enter(&ill->ill_lock); 699 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 700 if (!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr) || 701 (ilm->ilm_addr == htonl(INADDR_ANY)) || 702 (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) || 703 (igmp3qa->igmp3qa_group != ilm->ilm_addr)) 704 continue; 705 /* 706 * If the query is group specific or we have a 707 * pending group specific query, the response is 708 * group specific (pending sources list should be 709 * empty). Otherwise, need to update the pending 710 * sources list for the group and source specific 711 * response. 712 */ 713 if (numsrc == 0 || (ilm->ilm_timer < INFINITY && 714 SLIST_IS_EMPTY(ilm->ilm_pendsrcs))) { 715 group_query: 716 FREE_SLIST(ilm->ilm_pendsrcs); 717 ilm->ilm_pendsrcs = NULL; 718 } else { 719 boolean_t overflow; 720 slist_t *pktl; 721 if (numsrc > MAX_FILTER_SIZE || 722 (ilm->ilm_pendsrcs == NULL && 723 (ilm->ilm_pendsrcs = l_alloc()) == NULL)) { 724 /* 725 * We've been sent more sources than 726 * we can deal with; or we can't deal 727 * with a source list at all. Revert 728 * to a group specific query. 729 */ 730 goto group_query; 731 } 732 if ((pktl = l_alloc()) == NULL) 733 goto group_query; 734 pktl->sl_numsrc = numsrc; 735 for (i = 0; i < numsrc; i++) 736 IN6_IPADDR_TO_V4MAPPED(src_array[i], 737 &(pktl->sl_addr[i])); 738 l_union_in_a(ilm->ilm_pendsrcs, pktl, 739 &overflow); 740 l_free(pktl); 741 if (overflow) 742 goto group_query; 743 } 744 745 ilm->ilm_timer = (ilm->ilm_timer == INFINITY) ? 746 INFINITY : (ilm->ilm_timer - current); 747 /* choose soonest timer */ 748 ilm->ilm_timer = MIN(ilm->ilm_timer, delay); 749 if (ilm->ilm_timer < next) 750 next = ilm->ilm_timer; 751 ilm->ilm_timer += current; 752 } 753 mutex_exit(&ill->ill_lock); 754 } 755 756 return (next); 757 } 758 759 void 760 igmp_joingroup(ilm_t *ilm) 761 { 762 uint_t timer; 763 ill_t *ill; 764 ip_stack_t *ipst = ilm->ilm_ipst; 765 766 ill = ilm->ilm_ipif->ipif_ill; 767 768 ASSERT(IAM_WRITER_ILL(ill)); 769 ASSERT(ilm->ilm_ill == NULL && !ilm->ilm_ipif->ipif_isv6); 770 771 mutex_enter(&ill->ill_lock); 772 if (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) { 773 ilm->ilm_rtx.rtx_timer = INFINITY; 774 ilm->ilm_state = IGMP_OTHERMEMBER; 775 mutex_exit(&ill->ill_lock); 776 } else { 777 ip1dbg(("Querier mode %d, sending report, group %x\n", 778 ill->ill_mcast_type, htonl(ilm->ilm_addr))); 779 if (ill->ill_mcast_type == IGMP_V1_ROUTER) { 780 mutex_exit(&ill->ill_lock); 781 igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0); 782 mutex_enter(&ill->ill_lock); 783 } else if (ill->ill_mcast_type == IGMP_V2_ROUTER) { 784 mutex_exit(&ill->ill_lock); 785 igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0); 786 mutex_enter(&ill->ill_lock); 787 } else if (ill->ill_mcast_type == IGMP_V3_ROUTER) { 788 mrec_t *rp; 789 mcast_record_t rtype; 790 /* 791 * The possible state changes we need to handle here: 792 * Old State New State Report 793 * 794 * INCLUDE(0) INCLUDE(X) ALLOW(X),BLOCK(0) 795 * INCLUDE(0) EXCLUDE(X) TO_EX(X) 796 * 797 * No need to send the BLOCK(0) report; ALLOW(X) 798 * is enough. 799 */ 800 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ? 801 ALLOW_NEW_SOURCES : CHANGE_TO_EXCLUDE; 802 rp = mcast_bldmrec(rtype, &ilm->ilm_v6addr, 803 ilm->ilm_filter, NULL); 804 mutex_exit(&ill->ill_lock); 805 igmpv3_sendrpt(ilm->ilm_ipif, rp); 806 mutex_enter(&ill->ill_lock); 807 /* 808 * Set up retransmission state. Timer is set below, 809 * for both v3 and older versions. 810 */ 811 mcast_init_rtx(ill, &ilm->ilm_rtx, rtype, 812 ilm->ilm_filter); 813 } 814 815 /* Set the ilm timer value */ 816 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer, 817 SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY)); 818 timer = ilm->ilm_rtx.rtx_timer; 819 ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME; 820 ilm->ilm_state = IGMP_IREPORTEDLAST; 821 mutex_exit(&ill->ill_lock); 822 823 /* 824 * To avoid deadlock, we don't call igmp_start_timers from 825 * here. igmp_start_timers needs to call untimeout, and we 826 * can't hold the ipsq across untimeout since 827 * igmp_timeout_handler could be blocking trying to 828 * acquire the ipsq. Instead we start the timer after we get 829 * out of the ipsq in ipsq_exit. 830 */ 831 mutex_enter(&ipst->ips_igmp_timer_lock); 832 ipst->ips_igmp_deferred_next = MIN(timer, 833 ipst->ips_igmp_deferred_next); 834 mutex_exit(&ipst->ips_igmp_timer_lock); 835 } 836 837 if (ip_debug > 1) { 838 (void) mi_strlog(ilm->ilm_ipif->ipif_ill->ill_rq, 1, SL_TRACE, 839 "igmp_joingroup: multicast_type %d timer %d", 840 (ilm->ilm_ipif->ipif_ill->ill_mcast_type), 841 (int)ntohl(timer)); 842 } 843 } 844 845 void 846 mld_joingroup(ilm_t *ilm) 847 { 848 uint_t timer; 849 ill_t *ill; 850 ip_stack_t *ipst = ilm->ilm_ipst; 851 852 ill = ilm->ilm_ill; 853 854 ASSERT(IAM_WRITER_ILL(ill)); 855 ASSERT(ilm->ilm_ipif == NULL && ill->ill_isv6); 856 857 mutex_enter(&ill->ill_lock); 858 if (IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr)) { 859 ilm->ilm_rtx.rtx_timer = INFINITY; 860 ilm->ilm_state = IGMP_OTHERMEMBER; 861 mutex_exit(&ill->ill_lock); 862 } else { 863 if (ill->ill_mcast_type == MLD_V1_ROUTER) { 864 mutex_exit(&ill->ill_lock); 865 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL); 866 mutex_enter(&ill->ill_lock); 867 } else { 868 mrec_t *rp; 869 mcast_record_t rtype; 870 /* 871 * The possible state changes we need to handle here: 872 * Old State New State Report 873 * 874 * INCLUDE(0) INCLUDE(X) ALLOW(X),BLOCK(0) 875 * INCLUDE(0) EXCLUDE(X) TO_EX(X) 876 * 877 * No need to send the BLOCK(0) report; ALLOW(X) 878 * is enough 879 */ 880 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ? 881 ALLOW_NEW_SOURCES : CHANGE_TO_EXCLUDE; 882 rp = mcast_bldmrec(rtype, &ilm->ilm_v6addr, 883 ilm->ilm_filter, NULL); 884 mutex_exit(&ill->ill_lock); 885 mldv2_sendrpt(ill, rp); 886 mutex_enter(&ill->ill_lock); 887 /* 888 * Set up retransmission state. Timer is set below, 889 * for both v2 and v1. 890 */ 891 mcast_init_rtx(ill, &ilm->ilm_rtx, rtype, 892 ilm->ilm_filter); 893 } 894 895 /* Set the ilm timer value */ 896 ASSERT(ill->ill_mcast_type != MLD_V2_ROUTER || 897 ilm->ilm_rtx.rtx_cnt > 0); 898 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer, 899 SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY)); 900 timer = ilm->ilm_rtx.rtx_timer; 901 ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME; 902 ilm->ilm_state = IGMP_IREPORTEDLAST; 903 mutex_exit(&ill->ill_lock); 904 905 /* 906 * To avoid deadlock, we don't call mld_start_timers from 907 * here. mld_start_timers needs to call untimeout, and we 908 * can't hold the ipsq (i.e. the lock) across untimeout 909 * since mld_timeout_handler could be blocking trying to 910 * acquire the ipsq. Instead we start the timer after we get 911 * out of the ipsq in ipsq_exit 912 */ 913 mutex_enter(&ipst->ips_mld_timer_lock); 914 ipst->ips_mld_deferred_next = MIN(timer, 915 ipst->ips_mld_deferred_next); 916 mutex_exit(&ipst->ips_mld_timer_lock); 917 } 918 919 if (ip_debug > 1) { 920 (void) mi_strlog(ilm->ilm_ill->ill_rq, 1, SL_TRACE, 921 "mld_joingroup: multicast_type %d timer %d", 922 (ilm->ilm_ill->ill_mcast_type), 923 (int)ntohl(timer)); 924 } 925 } 926 927 void 928 igmp_leavegroup(ilm_t *ilm) 929 { 930 ill_t *ill = ilm->ilm_ipif->ipif_ill; 931 932 ASSERT(ilm->ilm_ill == NULL); 933 ASSERT(!ill->ill_isv6); 934 935 mutex_enter(&ill->ill_lock); 936 if (ilm->ilm_state == IGMP_IREPORTEDLAST && 937 ill->ill_mcast_type == IGMP_V2_ROUTER && 938 (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP))) { 939 mutex_exit(&ill->ill_lock); 940 igmp_sendpkt(ilm, IGMP_V2_LEAVE_GROUP, 941 (htonl(INADDR_ALLRTRS_GROUP))); 942 return; 943 } else if ((ill->ill_mcast_type == IGMP_V3_ROUTER) && 944 (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP))) { 945 mrec_t *rp; 946 /* 947 * The possible state changes we need to handle here: 948 * Old State New State Report 949 * 950 * INCLUDE(X) INCLUDE(0) ALLOW(0),BLOCK(X) 951 * EXCLUDE(X) INCLUDE(0) TO_IN(0) 952 * 953 * No need to send the ALLOW(0) report; BLOCK(X) is enough 954 */ 955 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 956 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr, 957 ilm->ilm_filter, NULL); 958 } else { 959 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, 960 NULL, NULL); 961 } 962 mutex_exit(&ill->ill_lock); 963 igmpv3_sendrpt(ilm->ilm_ipif, rp); 964 return; 965 } 966 mutex_exit(&ill->ill_lock); 967 } 968 969 void 970 mld_leavegroup(ilm_t *ilm) 971 { 972 ill_t *ill = ilm->ilm_ill; 973 974 ASSERT(ilm->ilm_ipif == NULL); 975 ASSERT(ill->ill_isv6); 976 977 mutex_enter(&ill->ill_lock); 978 if (ilm->ilm_state == IGMP_IREPORTEDLAST && 979 ill->ill_mcast_type == MLD_V1_ROUTER && 980 (!IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr))) { 981 mutex_exit(&ill->ill_lock); 982 mld_sendpkt(ilm, MLD_LISTENER_REDUCTION, &ipv6_all_rtrs_mcast); 983 return; 984 } else if ((ill->ill_mcast_type == MLD_V2_ROUTER) && 985 (!IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr))) { 986 mrec_t *rp; 987 /* 988 * The possible state changes we need to handle here: 989 * Old State New State Report 990 * 991 * INCLUDE(X) INCLUDE(0) ALLOW(0),BLOCK(X) 992 * EXCLUDE(X) INCLUDE(0) TO_IN(0) 993 * 994 * No need to send the ALLOW(0) report; BLOCK(X) is enough 995 */ 996 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 997 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr, 998 ilm->ilm_filter, NULL); 999 } else { 1000 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, 1001 NULL, NULL); 1002 } 1003 mutex_exit(&ill->ill_lock); 1004 mldv2_sendrpt(ill, rp); 1005 return; 1006 } 1007 mutex_exit(&ill->ill_lock); 1008 } 1009 1010 void 1011 igmp_statechange(ilm_t *ilm, mcast_record_t fmode, slist_t *flist) 1012 { 1013 ill_t *ill; 1014 mrec_t *rp; 1015 ip_stack_t *ipst = ilm->ilm_ipst; 1016 1017 ASSERT(ilm != NULL); 1018 1019 /* state change reports should only be sent if the router is v3 */ 1020 if (ilm->ilm_ipif->ipif_ill->ill_mcast_type != IGMP_V3_ROUTER) 1021 return; 1022 1023 if (ilm->ilm_ill == NULL) { 1024 ASSERT(ilm->ilm_ipif != NULL); 1025 ill = ilm->ilm_ipif->ipif_ill; 1026 } else { 1027 ill = ilm->ilm_ill; 1028 } 1029 1030 mutex_enter(&ill->ill_lock); 1031 1032 /* 1033 * Compare existing(old) state with the new state and prepare 1034 * State Change Report, according to the rules in RFC 3376: 1035 * 1036 * Old State New State State Change Report 1037 * 1038 * INCLUDE(A) INCLUDE(B) ALLOW(B-A),BLOCK(A-B) 1039 * EXCLUDE(A) EXCLUDE(B) ALLOW(A-B),BLOCK(B-A) 1040 * INCLUDE(A) EXCLUDE(B) TO_EX(B) 1041 * EXCLUDE(A) INCLUDE(B) TO_IN(B) 1042 */ 1043 1044 if (ilm->ilm_fmode == fmode) { 1045 slist_t *a_minus_b = NULL, *b_minus_a = NULL; 1046 slist_t *allow, *block; 1047 if (((a_minus_b = l_alloc()) == NULL) || 1048 ((b_minus_a = l_alloc()) == NULL)) { 1049 l_free(a_minus_b); 1050 if (ilm->ilm_fmode == MODE_IS_INCLUDE) 1051 goto send_to_ex; 1052 else 1053 goto send_to_in; 1054 } 1055 l_difference(ilm->ilm_filter, flist, a_minus_b); 1056 l_difference(flist, ilm->ilm_filter, b_minus_a); 1057 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1058 allow = b_minus_a; 1059 block = a_minus_b; 1060 } else { 1061 allow = a_minus_b; 1062 block = b_minus_a; 1063 } 1064 rp = NULL; 1065 if (!SLIST_IS_EMPTY(allow)) 1066 rp = mcast_bldmrec(ALLOW_NEW_SOURCES, &ilm->ilm_v6addr, 1067 allow, rp); 1068 if (!SLIST_IS_EMPTY(block)) 1069 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr, 1070 block, rp); 1071 l_free(a_minus_b); 1072 l_free(b_minus_a); 1073 } else if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1074 send_to_ex: 1075 rp = mcast_bldmrec(CHANGE_TO_EXCLUDE, &ilm->ilm_v6addr, flist, 1076 NULL); 1077 } else { 1078 send_to_in: 1079 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, flist, 1080 NULL); 1081 } 1082 1083 /* 1084 * Need to set up retransmission state; merge the new info with the 1085 * current state (which may be null). If the timer is not currently 1086 * running, start it (need to do a delayed start of the timer as 1087 * we're currently in the sq). 1088 */ 1089 rp = mcast_merge_rtx(ilm, rp, flist); 1090 if (ilm->ilm_rtx.rtx_timer == INFINITY) { 1091 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer, 1092 SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY)); 1093 mutex_enter(&ipst->ips_igmp_timer_lock); 1094 ipst->ips_igmp_deferred_next = MIN(ipst->ips_igmp_deferred_next, 1095 ilm->ilm_rtx.rtx_timer); 1096 ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME; 1097 mutex_exit(&ipst->ips_igmp_timer_lock); 1098 } 1099 1100 mutex_exit(&ill->ill_lock); 1101 igmpv3_sendrpt(ilm->ilm_ipif, rp); 1102 } 1103 1104 void 1105 mld_statechange(ilm_t *ilm, mcast_record_t fmode, slist_t *flist) 1106 { 1107 ill_t *ill; 1108 mrec_t *rp = NULL; 1109 ip_stack_t *ipst = ilm->ilm_ipst; 1110 1111 ASSERT(ilm != NULL); 1112 1113 ill = ilm->ilm_ill; 1114 1115 /* only need to send if we have an mldv2-capable router */ 1116 mutex_enter(&ill->ill_lock); 1117 if (ill->ill_mcast_type != MLD_V2_ROUTER) { 1118 mutex_exit(&ill->ill_lock); 1119 return; 1120 } 1121 1122 /* 1123 * Compare existing (old) state with the new state passed in 1124 * and send appropriate MLDv2 State Change Report. 1125 * 1126 * Old State New State State Change Report 1127 * 1128 * INCLUDE(A) INCLUDE(B) ALLOW(B-A),BLOCK(A-B) 1129 * EXCLUDE(A) EXCLUDE(B) ALLOW(A-B),BLOCK(B-A) 1130 * INCLUDE(A) EXCLUDE(B) TO_EX(B) 1131 * EXCLUDE(A) INCLUDE(B) TO_IN(B) 1132 */ 1133 if (ilm->ilm_fmode == fmode) { 1134 slist_t *a_minus_b = NULL, *b_minus_a = NULL; 1135 slist_t *allow, *block; 1136 if (((a_minus_b = l_alloc()) == NULL) || 1137 ((b_minus_a = l_alloc()) == NULL)) { 1138 l_free(a_minus_b); 1139 if (ilm->ilm_fmode == MODE_IS_INCLUDE) 1140 goto send_to_ex; 1141 else 1142 goto send_to_in; 1143 } 1144 l_difference(ilm->ilm_filter, flist, a_minus_b); 1145 l_difference(flist, ilm->ilm_filter, b_minus_a); 1146 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1147 allow = b_minus_a; 1148 block = a_minus_b; 1149 } else { 1150 allow = a_minus_b; 1151 block = b_minus_a; 1152 } 1153 if (!SLIST_IS_EMPTY(allow)) 1154 rp = mcast_bldmrec(ALLOW_NEW_SOURCES, &ilm->ilm_v6addr, 1155 allow, rp); 1156 if (!SLIST_IS_EMPTY(block)) 1157 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr, 1158 block, rp); 1159 l_free(a_minus_b); 1160 l_free(b_minus_a); 1161 } else if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1162 send_to_ex: 1163 rp = mcast_bldmrec(CHANGE_TO_EXCLUDE, &ilm->ilm_v6addr, flist, 1164 NULL); 1165 } else { 1166 send_to_in: 1167 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, flist, 1168 NULL); 1169 } 1170 1171 /* 1172 * Need to set up retransmission state; merge the new info with the 1173 * current state (which may be null). If the timer is not currently 1174 * running, start it (need to do a deferred start of the timer as 1175 * we're currently in the sq). 1176 */ 1177 rp = mcast_merge_rtx(ilm, rp, flist); 1178 ASSERT(ilm->ilm_rtx.rtx_cnt > 0); 1179 if (ilm->ilm_rtx.rtx_timer == INFINITY) { 1180 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer, 1181 SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY)); 1182 mutex_enter(&ipst->ips_mld_timer_lock); 1183 ipst->ips_mld_deferred_next = 1184 MIN(ipst->ips_mld_deferred_next, ilm->ilm_rtx.rtx_timer); 1185 ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME; 1186 mutex_exit(&ipst->ips_mld_timer_lock); 1187 } 1188 1189 mutex_exit(&ill->ill_lock); 1190 mldv2_sendrpt(ill, rp); 1191 } 1192 1193 uint_t 1194 igmp_timeout_handler_per_ill(ill_t *ill) 1195 { 1196 uint_t next = INFINITY, current; 1197 ilm_t *ilm; 1198 ipif_t *ipif; 1199 mrec_t *rp = NULL; 1200 mrec_t *rtxrp = NULL; 1201 rtx_state_t *rtxp; 1202 mcast_record_t rtype; 1203 1204 ASSERT(IAM_WRITER_ILL(ill)); 1205 1206 mutex_enter(&ill->ill_lock); 1207 1208 current = CURRENT_MSTIME; 1209 /* First check the global timer on this interface */ 1210 if (ill->ill_global_timer == INFINITY) 1211 goto per_ilm_timer; 1212 if (ill->ill_global_timer <= (current + CURRENT_OFFSET)) { 1213 ill->ill_global_timer = INFINITY; 1214 /* 1215 * Send report for each group on this interface. 1216 * Since we just set the global timer (received a v3 general 1217 * query), need to skip the all hosts addr (224.0.0.1), per 1218 * RFC 3376 section 5. 1219 */ 1220 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1221 if (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) 1222 continue; 1223 ASSERT(ilm->ilm_ipif != NULL); 1224 ilm->ilm_ipif->ipif_igmp_rpt = 1225 mcast_bldmrec(ilm->ilm_fmode, &ilm->ilm_v6addr, 1226 ilm->ilm_filter, ilm->ilm_ipif->ipif_igmp_rpt); 1227 /* 1228 * Since we're sending a report on this group, okay 1229 * to delete pending group-specific timers. Note 1230 * that group-specific retransmit timers still need 1231 * to be checked in the per_ilm_timer for-loop. 1232 */ 1233 ilm->ilm_timer = INFINITY; 1234 ilm->ilm_state = IGMP_IREPORTEDLAST; 1235 FREE_SLIST(ilm->ilm_pendsrcs); 1236 ilm->ilm_pendsrcs = NULL; 1237 } 1238 /* 1239 * We've built per-ipif mrec lists; walk the ill's ipif list 1240 * and send a report for each ipif that has an mrec list. 1241 */ 1242 for (ipif = ill->ill_ipif; ipif != NULL; 1243 ipif = ipif->ipif_next) { 1244 if (ipif->ipif_igmp_rpt == NULL) 1245 continue; 1246 mutex_exit(&ill->ill_lock); 1247 igmpv3_sendrpt(ipif, ipif->ipif_igmp_rpt); 1248 mutex_enter(&ill->ill_lock); 1249 /* mrec list was freed by igmpv3_sendrpt() */ 1250 ipif->ipif_igmp_rpt = NULL; 1251 } 1252 } else { 1253 if ((ill->ill_global_timer - current) < next) 1254 next = ill->ill_global_timer - current; 1255 } 1256 1257 per_ilm_timer: 1258 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1259 if (ilm->ilm_timer == INFINITY) 1260 goto per_ilm_rtxtimer; 1261 1262 if (ilm->ilm_timer > (current + CURRENT_OFFSET)) { 1263 if ((ilm->ilm_timer - current) < next) 1264 next = ilm->ilm_timer - current; 1265 1266 if (ip_debug > 1) { 1267 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 1268 "igmp_timo_hlr 2: ilm_timr %d " 1269 "typ %d nxt %d", 1270 (int)ntohl(ilm->ilm_timer - current), 1271 (ill->ill_mcast_type), next); 1272 } 1273 1274 goto per_ilm_rtxtimer; 1275 } 1276 1277 /* the timer has expired, need to take action */ 1278 ilm->ilm_timer = INFINITY; 1279 ilm->ilm_state = IGMP_IREPORTEDLAST; 1280 if (ill->ill_mcast_type == IGMP_V1_ROUTER) { 1281 mutex_exit(&ill->ill_lock); 1282 igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0); 1283 mutex_enter(&ill->ill_lock); 1284 } else if (ill->ill_mcast_type == IGMP_V2_ROUTER) { 1285 mutex_exit(&ill->ill_lock); 1286 igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0); 1287 mutex_enter(&ill->ill_lock); 1288 } else { 1289 slist_t *rsp; 1290 if (!SLIST_IS_EMPTY(ilm->ilm_pendsrcs) && 1291 (rsp = l_alloc()) != NULL) { 1292 /* 1293 * Contents of reply depend on pending 1294 * requested source list. 1295 */ 1296 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1297 l_intersection(ilm->ilm_filter, 1298 ilm->ilm_pendsrcs, rsp); 1299 } else { 1300 l_difference(ilm->ilm_pendsrcs, 1301 ilm->ilm_filter, rsp); 1302 } 1303 FREE_SLIST(ilm->ilm_pendsrcs); 1304 ilm->ilm_pendsrcs = NULL; 1305 if (!SLIST_IS_EMPTY(rsp)) 1306 rp = mcast_bldmrec(MODE_IS_INCLUDE, 1307 &ilm->ilm_v6addr, rsp, rp); 1308 FREE_SLIST(rsp); 1309 } else { 1310 /* 1311 * Either the pending request is just group- 1312 * specific, or we couldn't get the resources 1313 * (rsp) to build a source-specific reply. 1314 */ 1315 rp = mcast_bldmrec(ilm->ilm_fmode, 1316 &ilm->ilm_v6addr, ilm->ilm_filter, rp); 1317 } 1318 mutex_exit(&ill->ill_lock); 1319 igmpv3_sendrpt(ill->ill_ipif, rp); 1320 mutex_enter(&ill->ill_lock); 1321 rp = NULL; 1322 } 1323 1324 per_ilm_rtxtimer: 1325 rtxp = &ilm->ilm_rtx; 1326 1327 if (rtxp->rtx_timer == INFINITY) 1328 continue; 1329 if (rtxp->rtx_timer > (current + CURRENT_OFFSET)) { 1330 if ((rtxp->rtx_timer - current) < next) 1331 next = rtxp->rtx_timer - current; 1332 continue; 1333 } 1334 1335 rtxp->rtx_timer = INFINITY; 1336 ilm->ilm_state = IGMP_IREPORTEDLAST; 1337 if (ill->ill_mcast_type == IGMP_V1_ROUTER) { 1338 mutex_exit(&ill->ill_lock); 1339 igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0); 1340 mutex_enter(&ill->ill_lock); 1341 continue; 1342 } else if (ill->ill_mcast_type == IGMP_V2_ROUTER) { 1343 mutex_exit(&ill->ill_lock); 1344 igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0); 1345 mutex_enter(&ill->ill_lock); 1346 continue; 1347 } 1348 1349 /* 1350 * The retransmit timer has popped, and our router is 1351 * IGMPv3. We have to delve into the retransmit state 1352 * stored in the ilm. 1353 * 1354 * Decrement the retransmit count. If the fmode rtx 1355 * count is active, decrement it, and send a filter 1356 * mode change report with the ilm's source list. 1357 * Otherwise, send a source list change report with 1358 * the current retransmit lists. 1359 */ 1360 ASSERT(rtxp->rtx_cnt > 0); 1361 ASSERT(rtxp->rtx_cnt >= rtxp->rtx_fmode_cnt); 1362 rtxp->rtx_cnt--; 1363 if (rtxp->rtx_fmode_cnt > 0) { 1364 rtxp->rtx_fmode_cnt--; 1365 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ? 1366 CHANGE_TO_INCLUDE : CHANGE_TO_EXCLUDE; 1367 rtxrp = mcast_bldmrec(rtype, &ilm->ilm_v6addr, 1368 ilm->ilm_filter, rtxrp); 1369 } else { 1370 rtxrp = mcast_bldmrec(ALLOW_NEW_SOURCES, 1371 &ilm->ilm_v6addr, rtxp->rtx_allow, rtxrp); 1372 rtxrp = mcast_bldmrec(BLOCK_OLD_SOURCES, 1373 &ilm->ilm_v6addr, rtxp->rtx_block, rtxrp); 1374 } 1375 if (rtxp->rtx_cnt > 0) { 1376 MCAST_RANDOM_DELAY(rtxp->rtx_timer, 1377 SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY)); 1378 if (rtxp->rtx_timer < next) 1379 next = rtxp->rtx_timer; 1380 rtxp->rtx_timer += current; 1381 } else { 1382 CLEAR_SLIST(rtxp->rtx_allow); 1383 CLEAR_SLIST(rtxp->rtx_block); 1384 } 1385 mutex_exit(&ill->ill_lock); 1386 igmpv3_sendrpt(ilm->ilm_ipif, rtxrp); 1387 mutex_enter(&ill->ill_lock); 1388 rtxrp = NULL; 1389 } 1390 1391 mutex_exit(&ill->ill_lock); 1392 1393 return (next); 1394 } 1395 1396 /* 1397 * igmp_timeout_handler: 1398 * Called when there are timeout events, every next * TMEOUT_INTERVAL (tick). 1399 * Returns number of ticks to next event (or 0 if none). 1400 * 1401 * As part of multicast join and leave igmp we may need to send out an 1402 * igmp request. The igmp related state variables in the ilm are protected 1403 * by ill_lock. A single global igmp timer is used to track igmp timeouts. 1404 * igmp_timer_lock protects the global igmp_timeout_id. igmp_start_timers 1405 * starts the igmp timer if needed. It serializes multiple threads trying to 1406 * simultaneously start the timer using the igmp_timer_setter_active flag. 1407 * 1408 * igmp_input() receives igmp queries and responds to the queries 1409 * in a delayed fashion by posting a timer i.e. it calls igmp_start_timers(). 1410 * Later the igmp_timer fires, the timeout handler igmp_timerout_handler() 1411 * performs the action exclusively after entering each ill's ipsq as writer. 1412 * The actual igmp timeout handler needs to run in the ipsq since it has to 1413 * access the ilm's and we don't want another exclusive operation like 1414 * say an IPMP failover to be simultaneously moving the ilms from one ill to 1415 * another. 1416 * 1417 * The igmp_slowtimeo() function is called thru another timer. 1418 * igmp_slowtimeout_lock protects the igmp_slowtimeout_id 1419 */ 1420 void 1421 igmp_timeout_handler(void *arg) 1422 { 1423 ill_t *ill; 1424 uint_t global_next = INFINITY; 1425 uint_t next; 1426 ill_walk_context_t ctx; 1427 boolean_t success; 1428 ip_stack_t *ipst = (ip_stack_t *)arg; 1429 1430 ASSERT(arg != NULL); 1431 mutex_enter(&ipst->ips_igmp_timer_lock); 1432 ASSERT(ipst->ips_igmp_timeout_id != 0); 1433 ipst->ips_igmp_timer_scheduled_last = 0; 1434 ipst->ips_igmp_time_to_next = 0; 1435 mutex_exit(&ipst->ips_igmp_timer_lock); 1436 1437 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1438 ill = ILL_START_WALK_V4(&ctx, ipst); 1439 for (; ill != NULL; ill = ill_next(&ctx, ill)) { 1440 ASSERT(!ill->ill_isv6); 1441 /* 1442 * We may not be able to refhold the ill if the ill/ipif 1443 * is changing. But we need to make sure that the ill will 1444 * not vanish. So we just bump up the ill_waiter count. 1445 */ 1446 if (!ill_waiter_inc(ill)) 1447 continue; 1448 rw_exit(&ipst->ips_ill_g_lock); 1449 success = ipsq_enter(ill, B_TRUE); 1450 if (success) { 1451 next = igmp_timeout_handler_per_ill(ill); 1452 if (next < global_next) 1453 global_next = next; 1454 ipsq_exit(ill->ill_phyint->phyint_ipsq, B_FALSE, 1455 B_TRUE); 1456 } 1457 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1458 ill_waiter_dcr(ill); 1459 } 1460 rw_exit(&ipst->ips_ill_g_lock); 1461 1462 mutex_enter(&ipst->ips_igmp_timer_lock); 1463 ASSERT(ipst->ips_igmp_timeout_id != 0); 1464 ipst->ips_igmp_timeout_id = 0; 1465 mutex_exit(&ipst->ips_igmp_timer_lock); 1466 1467 if (global_next != INFINITY) 1468 igmp_start_timers(global_next, ipst); 1469 } 1470 1471 /* 1472 * mld_timeout_handler: 1473 * Called when there are timeout events, every next (tick). 1474 * Returns number of ticks to next event (or 0 if none). 1475 */ 1476 /* ARGSUSED */ 1477 uint_t 1478 mld_timeout_handler_per_ill(ill_t *ill) 1479 { 1480 ilm_t *ilm; 1481 uint_t next = INFINITY, current; 1482 mrec_t *rp, *rtxrp; 1483 rtx_state_t *rtxp; 1484 mcast_record_t rtype; 1485 1486 ASSERT(IAM_WRITER_ILL(ill)); 1487 1488 mutex_enter(&ill->ill_lock); 1489 1490 current = CURRENT_MSTIME; 1491 /* 1492 * First check the global timer on this interface; the global timer 1493 * is not used for MLDv1, so if it's set we can assume we're v2. 1494 */ 1495 if (ill->ill_global_timer == INFINITY) 1496 goto per_ilm_timer; 1497 if (ill->ill_global_timer <= (current + CURRENT_OFFSET)) { 1498 ill->ill_global_timer = INFINITY; 1499 /* 1500 * Send report for each group on this interface. 1501 * Since we just set the global timer (received a v2 general 1502 * query), need to skip the all hosts addr (ff02::1), per 1503 * RFC 3810 section 6. 1504 */ 1505 rp = NULL; 1506 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1507 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, 1508 &ipv6_all_hosts_mcast)) 1509 continue; 1510 rp = mcast_bldmrec(ilm->ilm_fmode, &ilm->ilm_v6addr, 1511 ilm->ilm_filter, rp); 1512 /* 1513 * Since we're sending a report on this group, okay 1514 * to delete pending group-specific timers. Note 1515 * that group-specific retransmit timers still need 1516 * to be checked in the per_ilm_timer for-loop. 1517 */ 1518 ilm->ilm_timer = INFINITY; 1519 ilm->ilm_state = IGMP_IREPORTEDLAST; 1520 FREE_SLIST(ilm->ilm_pendsrcs); 1521 ilm->ilm_pendsrcs = NULL; 1522 } 1523 mutex_exit(&ill->ill_lock); 1524 mldv2_sendrpt(ill, rp); 1525 mutex_enter(&ill->ill_lock); 1526 } else { 1527 if ((ill->ill_global_timer - current) < next) 1528 next = ill->ill_global_timer - current; 1529 } 1530 1531 per_ilm_timer: 1532 rp = rtxrp = NULL; 1533 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1534 if (ilm->ilm_timer == INFINITY) 1535 goto per_ilm_rtxtimer; 1536 1537 if (ilm->ilm_timer > (current + CURRENT_OFFSET)) { 1538 if ((ilm->ilm_timer - current) < next) 1539 next = ilm->ilm_timer - current; 1540 1541 if (ip_debug > 1) { 1542 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 1543 "igmp_timo_hlr 2: ilm_timr" 1544 " %d typ %d nxt %d", 1545 (int)ntohl(ilm->ilm_timer - current), 1546 (ill->ill_mcast_type), next); 1547 } 1548 1549 goto per_ilm_rtxtimer; 1550 } 1551 1552 /* the timer has expired, need to take action */ 1553 ilm->ilm_timer = INFINITY; 1554 ilm->ilm_state = IGMP_IREPORTEDLAST; 1555 if (ill->ill_mcast_type == MLD_V1_ROUTER) { 1556 mutex_exit(&ill->ill_lock); 1557 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL); 1558 mutex_enter(&ill->ill_lock); 1559 } else { 1560 slist_t *rsp; 1561 if (!SLIST_IS_EMPTY(ilm->ilm_pendsrcs) && 1562 (rsp = l_alloc()) != NULL) { 1563 /* 1564 * Contents of reply depend on pending 1565 * requested source list. 1566 */ 1567 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1568 l_intersection(ilm->ilm_filter, 1569 ilm->ilm_pendsrcs, rsp); 1570 } else { 1571 l_difference(ilm->ilm_pendsrcs, 1572 ilm->ilm_filter, rsp); 1573 } 1574 FREE_SLIST(ilm->ilm_pendsrcs); 1575 ilm->ilm_pendsrcs = NULL; 1576 if (!SLIST_IS_EMPTY(rsp)) 1577 rp = mcast_bldmrec(MODE_IS_INCLUDE, 1578 &ilm->ilm_v6addr, rsp, rp); 1579 FREE_SLIST(rsp); 1580 } else { 1581 rp = mcast_bldmrec(ilm->ilm_fmode, 1582 &ilm->ilm_v6addr, ilm->ilm_filter, rp); 1583 } 1584 } 1585 1586 per_ilm_rtxtimer: 1587 rtxp = &ilm->ilm_rtx; 1588 1589 if (rtxp->rtx_timer == INFINITY) 1590 continue; 1591 if (rtxp->rtx_timer > (current + CURRENT_OFFSET)) { 1592 if ((rtxp->rtx_timer - current) < next) 1593 next = rtxp->rtx_timer - current; 1594 continue; 1595 } 1596 1597 rtxp->rtx_timer = INFINITY; 1598 ilm->ilm_state = IGMP_IREPORTEDLAST; 1599 if (ill->ill_mcast_type == MLD_V1_ROUTER) { 1600 mutex_exit(&ill->ill_lock); 1601 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL); 1602 mutex_enter(&ill->ill_lock); 1603 continue; 1604 } 1605 1606 /* 1607 * The retransmit timer has popped, and our router is 1608 * MLDv2. We have to delve into the retransmit state 1609 * stored in the ilm. 1610 * 1611 * Decrement the retransmit count. If the fmode rtx 1612 * count is active, decrement it, and send a filter 1613 * mode change report with the ilm's source list. 1614 * Otherwise, send a source list change report with 1615 * the current retransmit lists. 1616 */ 1617 ASSERT(rtxp->rtx_cnt > 0); 1618 ASSERT(rtxp->rtx_cnt >= rtxp->rtx_fmode_cnt); 1619 rtxp->rtx_cnt--; 1620 if (rtxp->rtx_fmode_cnt > 0) { 1621 rtxp->rtx_fmode_cnt--; 1622 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ? 1623 CHANGE_TO_INCLUDE : CHANGE_TO_EXCLUDE; 1624 rtxrp = mcast_bldmrec(rtype, &ilm->ilm_v6addr, 1625 ilm->ilm_filter, rtxrp); 1626 } else { 1627 rtxrp = mcast_bldmrec(ALLOW_NEW_SOURCES, 1628 &ilm->ilm_v6addr, rtxp->rtx_allow, rtxrp); 1629 rtxrp = mcast_bldmrec(BLOCK_OLD_SOURCES, 1630 &ilm->ilm_v6addr, rtxp->rtx_block, rtxrp); 1631 } 1632 if (rtxp->rtx_cnt > 0) { 1633 MCAST_RANDOM_DELAY(rtxp->rtx_timer, 1634 SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY)); 1635 if (rtxp->rtx_timer < next) 1636 next = rtxp->rtx_timer; 1637 rtxp->rtx_timer += current; 1638 } else { 1639 CLEAR_SLIST(rtxp->rtx_allow); 1640 CLEAR_SLIST(rtxp->rtx_block); 1641 } 1642 } 1643 1644 if (ill->ill_mcast_type == MLD_V2_ROUTER) { 1645 mutex_exit(&ill->ill_lock); 1646 mldv2_sendrpt(ill, rp); 1647 mldv2_sendrpt(ill, rtxrp); 1648 return (next); 1649 } 1650 1651 mutex_exit(&ill->ill_lock); 1652 1653 return (next); 1654 } 1655 1656 /* 1657 * mld_timeout_handler: 1658 * Called when there are timeout events, every next * TMEOUT_INTERVAL (tick). 1659 * Returns number of ticks to next event (or 0 if none). 1660 * MT issues are same as igmp_timeout_handler 1661 */ 1662 void 1663 mld_timeout_handler(void *arg) 1664 { 1665 ill_t *ill; 1666 uint_t global_next = INFINITY; 1667 uint_t next; 1668 ill_walk_context_t ctx; 1669 boolean_t success; 1670 ip_stack_t *ipst = (ip_stack_t *)arg; 1671 1672 ASSERT(arg != NULL); 1673 mutex_enter(&ipst->ips_mld_timer_lock); 1674 ASSERT(ipst->ips_mld_timeout_id != 0); 1675 ipst->ips_mld_timer_scheduled_last = 0; 1676 ipst->ips_mld_time_to_next = 0; 1677 mutex_exit(&ipst->ips_mld_timer_lock); 1678 1679 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1680 ill = ILL_START_WALK_V6(&ctx, ipst); 1681 for (; ill != NULL; ill = ill_next(&ctx, ill)) { 1682 ASSERT(ill->ill_isv6); 1683 /* 1684 * We may not be able to refhold the ill if the ill/ipif 1685 * is changing. But we need to make sure that the ill will 1686 * not vanish. So we just bump up the ill_waiter count. 1687 */ 1688 if (!ill_waiter_inc(ill)) 1689 continue; 1690 rw_exit(&ipst->ips_ill_g_lock); 1691 success = ipsq_enter(ill, B_TRUE); 1692 if (success) { 1693 next = mld_timeout_handler_per_ill(ill); 1694 if (next < global_next) 1695 global_next = next; 1696 ipsq_exit(ill->ill_phyint->phyint_ipsq, B_TRUE, 1697 B_FALSE); 1698 } 1699 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1700 ill_waiter_dcr(ill); 1701 } 1702 rw_exit(&ipst->ips_ill_g_lock); 1703 1704 mutex_enter(&ipst->ips_mld_timer_lock); 1705 ASSERT(ipst->ips_mld_timeout_id != 0); 1706 ipst->ips_mld_timeout_id = 0; 1707 mutex_exit(&ipst->ips_mld_timer_lock); 1708 1709 if (global_next != INFINITY) 1710 mld_start_timers(global_next, ipst); 1711 } 1712 1713 /* 1714 * Calculate the Older Version Querier Present timeout value, in number 1715 * of slowtimo intervals, for the given ill. 1716 */ 1717 #define OVQP(ill) \ 1718 ((1000 * (((ill)->ill_mcast_rv * (ill)->ill_mcast_qi) \ 1719 + MCAST_QUERY_RESP_INTERVAL)) / MCAST_SLOWTIMO_INTERVAL) 1720 1721 /* 1722 * igmp_slowtimo: 1723 * - Resets to new router if we didnt we hear from the router 1724 * in IGMP_AGE_THRESHOLD seconds. 1725 * - Resets slowtimeout. 1726 * Check for ips_igmp_max_version ensures that we don't revert to a higher 1727 * IGMP version than configured. 1728 */ 1729 void 1730 igmp_slowtimo(void *arg) 1731 { 1732 ill_t *ill; 1733 ill_if_t *ifp; 1734 avl_tree_t *avl_tree; 1735 ip_stack_t *ipst = (ip_stack_t *)arg; 1736 1737 ASSERT(arg != NULL); 1738 /* Hold the ill_g_lock so that we can safely walk the ill list */ 1739 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1740 1741 /* 1742 * The ill_if_t list is circular, hence the odd loop parameters. 1743 * 1744 * We can't use the ILL_START_WALK and ill_next() wrappers for this 1745 * walk, as we need to check the illif_mcast_* fields in the ill_if_t 1746 * structure (allowing us to skip if none of the instances have timers 1747 * running). 1748 */ 1749 for (ifp = IP_V4_ILL_G_LIST(ipst); 1750 ifp != (ill_if_t *)&IP_V4_ILL_G_LIST(ipst); 1751 ifp = ifp->illif_next) { 1752 /* 1753 * illif_mcast_v[12] are set using atomics. If an ill hears 1754 * a V1 or V2 query now and we miss seeing the count now, 1755 * we will see it the next time igmp_slowtimo is called. 1756 */ 1757 if (ifp->illif_mcast_v1 == 0 && ifp->illif_mcast_v2 == 0) 1758 continue; 1759 1760 avl_tree = &ifp->illif_avl_by_ppa; 1761 for (ill = avl_first(avl_tree); ill != NULL; 1762 ill = avl_walk(avl_tree, ill, AVL_AFTER)) { 1763 mutex_enter(&ill->ill_lock); 1764 if (ill->ill_mcast_v1_tset == 1) 1765 ill->ill_mcast_v1_time++; 1766 if (ill->ill_mcast_v2_tset == 1) 1767 ill->ill_mcast_v2_time++; 1768 if ((ill->ill_mcast_type == IGMP_V1_ROUTER) && 1769 (ipst->ips_igmp_max_version >= IGMP_V2_ROUTER) && 1770 (ill->ill_mcast_v1_time >= OVQP(ill))) { 1771 if ((ill->ill_mcast_v2_tset > 0) || 1772 (ipst->ips_igmp_max_version == 1773 IGMP_V2_ROUTER)) { 1774 ip1dbg(("V1 query timer " 1775 "expired on %s; switching " 1776 "mode to IGMP_V2\n", 1777 ill->ill_name)); 1778 ill->ill_mcast_type = 1779 IGMP_V2_ROUTER; 1780 } else { 1781 ip1dbg(("V1 query timer " 1782 "expired on %s; switching " 1783 "mode to IGMP_V3\n", 1784 ill->ill_name)); 1785 ill->ill_mcast_type = 1786 IGMP_V3_ROUTER; 1787 } 1788 ill->ill_mcast_v1_time = 0; 1789 ill->ill_mcast_v1_tset = 0; 1790 atomic_add_16(&ifp->illif_mcast_v1, -1); 1791 } 1792 if ((ill->ill_mcast_type == IGMP_V2_ROUTER) && 1793 (ipst->ips_igmp_max_version >= IGMP_V3_ROUTER) && 1794 (ill->ill_mcast_v2_time >= OVQP(ill))) { 1795 ip1dbg(("V2 query timer expired on " 1796 "%s; switching mode to IGMP_V3\n", 1797 ill->ill_name)); 1798 ill->ill_mcast_type = IGMP_V3_ROUTER; 1799 ill->ill_mcast_v2_time = 0; 1800 ill->ill_mcast_v2_tset = 0; 1801 atomic_add_16(&ifp->illif_mcast_v2, -1); 1802 } 1803 mutex_exit(&ill->ill_lock); 1804 } 1805 } 1806 rw_exit(&ipst->ips_ill_g_lock); 1807 mutex_enter(&ipst->ips_igmp_slowtimeout_lock); 1808 ipst->ips_igmp_slowtimeout_id = timeout(igmp_slowtimo, (void *)ipst, 1809 MSEC_TO_TICK(MCAST_SLOWTIMO_INTERVAL)); 1810 mutex_exit(&ipst->ips_igmp_slowtimeout_lock); 1811 } 1812 1813 /* 1814 * mld_slowtimo: 1815 * - Resets to newer version if we didn't hear from the older version router 1816 * in MLD_AGE_THRESHOLD seconds. 1817 * - Restarts slowtimeout. 1818 * Check for ips_mld_max_version ensures that we don't revert to a higher 1819 * IGMP version than configured. 1820 */ 1821 /* ARGSUSED */ 1822 void 1823 mld_slowtimo(void *arg) 1824 { 1825 ill_t *ill; 1826 ill_if_t *ifp; 1827 avl_tree_t *avl_tree; 1828 ip_stack_t *ipst = (ip_stack_t *)arg; 1829 1830 ASSERT(arg != NULL); 1831 /* See comments in igmp_slowtimo() above... */ 1832 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1833 for (ifp = IP_V6_ILL_G_LIST(ipst); 1834 ifp != (ill_if_t *)&IP_V6_ILL_G_LIST(ipst); 1835 ifp = ifp->illif_next) { 1836 if (ifp->illif_mcast_v1 == 0) 1837 continue; 1838 1839 avl_tree = &ifp->illif_avl_by_ppa; 1840 for (ill = avl_first(avl_tree); ill != NULL; 1841 ill = avl_walk(avl_tree, ill, AVL_AFTER)) { 1842 mutex_enter(&ill->ill_lock); 1843 if (ill->ill_mcast_v1_tset == 1) 1844 ill->ill_mcast_v1_time++; 1845 if ((ill->ill_mcast_type == MLD_V1_ROUTER) && 1846 (ipst->ips_mld_max_version >= MLD_V2_ROUTER) && 1847 (ill->ill_mcast_v1_time >= OVQP(ill))) { 1848 ip1dbg(("MLD query timer expired on" 1849 " %s; switching mode to MLD_V2\n", 1850 ill->ill_name)); 1851 ill->ill_mcast_type = MLD_V2_ROUTER; 1852 ill->ill_mcast_v1_time = 0; 1853 ill->ill_mcast_v1_tset = 0; 1854 atomic_add_16(&ifp->illif_mcast_v1, -1); 1855 } 1856 mutex_exit(&ill->ill_lock); 1857 } 1858 } 1859 rw_exit(&ipst->ips_ill_g_lock); 1860 mutex_enter(&ipst->ips_mld_slowtimeout_lock); 1861 ipst->ips_mld_slowtimeout_id = timeout(mld_slowtimo, (void *)ipst, 1862 MSEC_TO_TICK(MCAST_SLOWTIMO_INTERVAL)); 1863 mutex_exit(&ipst->ips_mld_slowtimeout_lock); 1864 } 1865 1866 /* 1867 * igmp_sendpkt: 1868 * This will send to ip_wput like icmp_inbound. 1869 * Note that the lower ill (on which the membership is kept) is used 1870 * as an upper ill to pass in the multicast parameters. 1871 */ 1872 static void 1873 igmp_sendpkt(ilm_t *ilm, uchar_t type, ipaddr_t addr) 1874 { 1875 mblk_t *mp; 1876 igmpa_t *igmpa; 1877 uint8_t *rtralert; 1878 ipha_t *ipha; 1879 int hdrlen = sizeof (ipha_t) + RTRALERT_LEN; 1880 size_t size = hdrlen + sizeof (igmpa_t); 1881 ipif_t *ipif = ilm->ilm_ipif; 1882 ill_t *ill = ipif->ipif_ill; /* Will be the "lower" ill */ 1883 mblk_t *first_mp; 1884 ipsec_out_t *io; 1885 zoneid_t zoneid; 1886 ip_stack_t *ipst = ill->ill_ipst; 1887 1888 /* 1889 * We need to make sure this packet goes out on an ipif. If 1890 * there is some global policy match in ip_wput_ire, we need 1891 * to get to the right interface after IPSEC processing. 1892 * To make sure this multicast packet goes out on the right 1893 * interface, we attach an ipsec_out and initialize ill_index 1894 * like we did in ip_wput. To make sure that this packet does 1895 * not get forwarded on other interfaces or looped back, we 1896 * set ipsec_out_dontroute to B_TRUE and ipsec_out_multicast_loop 1897 * to B_FALSE. 1898 * 1899 * We also need to make sure that this does not get load balanced 1900 * if it hits ip_newroute_ipif. So, we initialize ipsec_out_attach_if 1901 * here. If it gets load balanced, switches supporting igmp snooping 1902 * will send the packet that it receives for this multicast group 1903 * to the interface that we are sending on. As we have joined the 1904 * multicast group on this ill, by sending the packet out on this 1905 * ill, we receive all the packets back on this ill. 1906 */ 1907 first_mp = allocb(sizeof (ipsec_info_t), BPRI_HI); 1908 if (first_mp == NULL) 1909 return; 1910 1911 first_mp->b_datap->db_type = M_CTL; 1912 first_mp->b_wptr += sizeof (ipsec_info_t); 1913 bzero(first_mp->b_rptr, sizeof (ipsec_info_t)); 1914 /* ipsec_out_secure is B_FALSE now */ 1915 io = (ipsec_out_t *)first_mp->b_rptr; 1916 io->ipsec_out_type = IPSEC_OUT; 1917 io->ipsec_out_len = sizeof (ipsec_out_t); 1918 io->ipsec_out_use_global_policy = B_TRUE; 1919 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 1920 io->ipsec_out_attach_if = B_TRUE; 1921 io->ipsec_out_multicast_loop = B_FALSE; 1922 io->ipsec_out_dontroute = B_TRUE; 1923 if ((zoneid = ilm->ilm_zoneid) == ALL_ZONES) 1924 zoneid = GLOBAL_ZONEID; 1925 io->ipsec_out_zoneid = zoneid; 1926 io->ipsec_out_ns = ipst->ips_netstack; /* No netstack_hold */ 1927 1928 mp = allocb(size, BPRI_HI); 1929 if (mp == NULL) { 1930 freemsg(first_mp); 1931 return; 1932 } 1933 mp->b_wptr = mp->b_rptr + size; 1934 first_mp->b_cont = mp; 1935 1936 ipha = (ipha_t *)mp->b_rptr; 1937 rtralert = (uint8_t *)&(ipha[1]); 1938 igmpa = (igmpa_t *)&(rtralert[RTRALERT_LEN]); 1939 igmpa->igmpa_type = type; 1940 igmpa->igmpa_code = 0; 1941 igmpa->igmpa_group = ilm->ilm_addr; 1942 igmpa->igmpa_cksum = 0; 1943 igmpa->igmpa_cksum = IP_CSUM(mp, hdrlen, 0); 1944 1945 rtralert[0] = IPOPT_COPY | IPOPT_RTRALERT; 1946 rtralert[1] = RTRALERT_LEN; 1947 rtralert[2] = 0; 1948 rtralert[3] = 0; 1949 1950 ipha->ipha_version_and_hdr_length = (IP_VERSION << 4) 1951 | (IP_SIMPLE_HDR_LENGTH_IN_WORDS + RTRALERT_LEN_IN_WORDS); 1952 ipha->ipha_type_of_service = 0; 1953 ipha->ipha_length = htons(size); 1954 ipha->ipha_ident = 0; 1955 ipha->ipha_fragment_offset_and_flags = 0; 1956 ipha->ipha_ttl = IGMP_TTL; 1957 ipha->ipha_protocol = IPPROTO_IGMP; 1958 ipha->ipha_hdr_checksum = 0; 1959 ipha->ipha_dst = addr ? addr : igmpa->igmpa_group; 1960 ipha->ipha_src = ipif->ipif_src_addr; 1961 /* 1962 * Request loopback of the report if we are acting as a multicast 1963 * router, so that the process-level routing demon can hear it. 1964 */ 1965 /* 1966 * This will run multiple times for the same group if there are members 1967 * on the same group for multiple ipif's on the same ill. The 1968 * igmp_input code will suppress this due to the loopback thus we 1969 * always loopback membership report. 1970 */ 1971 ASSERT(ill->ill_rq != NULL); 1972 ip_multicast_loopback(ill->ill_rq, ill, first_mp, 0, ilm->ilm_zoneid); 1973 1974 ip_wput_multicast(ill->ill_wq, first_mp, ipif, zoneid); 1975 1976 ++ipst->ips_igmpstat.igps_snd_reports; 1977 } 1978 1979 /* 1980 * Sends an IGMP_V3_MEMBERSHIP_REPORT message out the ill associated 1981 * with the passed-in ipif. The report will contain one group record 1982 * for each element of reclist. If this causes packet length to 1983 * exceed ipif->ipif_ill->ill_max_frag, multiple reports are sent. 1984 * reclist is assumed to be made up of buffers allocated by mcast_bldmrec(), 1985 * and those buffers are freed here. 1986 */ 1987 static void 1988 igmpv3_sendrpt(ipif_t *ipif, mrec_t *reclist) 1989 { 1990 ipsec_out_t *io; 1991 igmp3ra_t *igmp3ra; 1992 grphdra_t *grphdr; 1993 mblk_t *first_mp, *mp; 1994 ipha_t *ipha; 1995 uint8_t *rtralert; 1996 ipaddr_t *src_array; 1997 int i, j, numrec, more_src_cnt; 1998 size_t hdrsize, size, rsize; 1999 ill_t *ill = ipif->ipif_ill; 2000 mrec_t *rp, *cur_reclist; 2001 mrec_t *next_reclist = reclist; 2002 boolean_t morepkts; 2003 zoneid_t zoneid; 2004 ip_stack_t *ipst = ill->ill_ipst; 2005 2006 /* if there aren't any records, there's nothing to send */ 2007 if (reclist == NULL) 2008 return; 2009 2010 hdrsize = sizeof (ipha_t) + RTRALERT_LEN; 2011 nextpkt: 2012 size = hdrsize + sizeof (igmp3ra_t); 2013 morepkts = B_FALSE; 2014 more_src_cnt = 0; 2015 cur_reclist = next_reclist; 2016 numrec = 0; 2017 for (rp = cur_reclist; rp != NULL; rp = rp->mrec_next) { 2018 rsize = sizeof (grphdra_t) + 2019 (rp->mrec_srcs.sl_numsrc * sizeof (ipaddr_t)); 2020 if (size + rsize > ill->ill_max_frag) { 2021 if (rp == cur_reclist) { 2022 /* 2023 * If the first mrec we looked at is too big 2024 * to fit in a single packet (i.e the source 2025 * list is too big), we must either truncate 2026 * the list (if TO_EX or IS_EX), or send 2027 * multiple reports for the same group (all 2028 * other types). 2029 */ 2030 int srcspace, srcsperpkt; 2031 srcspace = ill->ill_max_frag - (size + 2032 sizeof (grphdra_t)); 2033 srcsperpkt = srcspace / sizeof (ipaddr_t); 2034 /* 2035 * Increment size and numrec, because we will 2036 * be sending a record for the mrec we're 2037 * looking at now. 2038 */ 2039 size += sizeof (grphdra_t) + 2040 (srcsperpkt * sizeof (ipaddr_t)); 2041 numrec++; 2042 if (rp->mrec_type == MODE_IS_EXCLUDE || 2043 rp->mrec_type == CHANGE_TO_EXCLUDE) { 2044 rp->mrec_srcs.sl_numsrc = srcsperpkt; 2045 if (rp->mrec_next == NULL) { 2046 /* no more packets to send */ 2047 break; 2048 } else { 2049 /* 2050 * more packets, but we're 2051 * done with this mrec. 2052 */ 2053 next_reclist = rp->mrec_next; 2054 } 2055 } else { 2056 more_src_cnt = rp->mrec_srcs.sl_numsrc 2057 - srcsperpkt; 2058 rp->mrec_srcs.sl_numsrc = srcsperpkt; 2059 /* 2060 * We'll fix up this mrec (remove the 2061 * srcs we've already sent) before 2062 * returning to nextpkt above. 2063 */ 2064 next_reclist = rp; 2065 } 2066 } else { 2067 next_reclist = rp; 2068 } 2069 morepkts = B_TRUE; 2070 break; 2071 } 2072 size += rsize; 2073 numrec++; 2074 } 2075 2076 /* 2077 * See comments in igmp_sendpkt() about initializing for ipsec and 2078 * load balancing requirements. 2079 */ 2080 first_mp = allocb(sizeof (ipsec_info_t), BPRI_HI); 2081 if (first_mp == NULL) 2082 goto free_reclist; 2083 2084 first_mp->b_datap->db_type = M_CTL; 2085 first_mp->b_wptr += sizeof (ipsec_info_t); 2086 bzero(first_mp->b_rptr, sizeof (ipsec_info_t)); 2087 /* ipsec_out_secure is B_FALSE now */ 2088 io = (ipsec_out_t *)first_mp->b_rptr; 2089 io->ipsec_out_type = IPSEC_OUT; 2090 io->ipsec_out_len = sizeof (ipsec_out_t); 2091 io->ipsec_out_use_global_policy = B_TRUE; 2092 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 2093 io->ipsec_out_attach_if = B_TRUE; 2094 io->ipsec_out_multicast_loop = B_FALSE; 2095 io->ipsec_out_dontroute = B_TRUE; 2096 if ((zoneid = ipif->ipif_zoneid) == ALL_ZONES) 2097 zoneid = GLOBAL_ZONEID; 2098 io->ipsec_out_zoneid = zoneid; 2099 2100 mp = allocb(size, BPRI_HI); 2101 if (mp == NULL) { 2102 freemsg(first_mp); 2103 goto free_reclist; 2104 } 2105 bzero((char *)mp->b_rptr, size); 2106 mp->b_wptr = (uchar_t *)(mp->b_rptr + size); 2107 first_mp->b_cont = mp; 2108 2109 ipha = (ipha_t *)mp->b_rptr; 2110 rtralert = (uint8_t *)&(ipha[1]); 2111 igmp3ra = (igmp3ra_t *)&(rtralert[RTRALERT_LEN]); 2112 grphdr = (grphdra_t *)&(igmp3ra[1]); 2113 2114 rp = cur_reclist; 2115 for (i = 0; i < numrec; i++) { 2116 grphdr->grphdra_type = rp->mrec_type; 2117 grphdr->grphdra_numsrc = htons(rp->mrec_srcs.sl_numsrc); 2118 grphdr->grphdra_group = V4_PART_OF_V6(rp->mrec_group); 2119 src_array = (ipaddr_t *)&(grphdr[1]); 2120 2121 for (j = 0; j < rp->mrec_srcs.sl_numsrc; j++) 2122 src_array[j] = V4_PART_OF_V6(rp->mrec_srcs.sl_addr[j]); 2123 2124 grphdr = (grphdra_t *)&(src_array[j]); 2125 rp = rp->mrec_next; 2126 } 2127 2128 igmp3ra->igmp3ra_type = IGMP_V3_MEMBERSHIP_REPORT; 2129 igmp3ra->igmp3ra_numrec = htons(numrec); 2130 igmp3ra->igmp3ra_cksum = IP_CSUM(mp, hdrsize, 0); 2131 2132 rtralert[0] = IPOPT_COPY | IPOPT_RTRALERT; 2133 rtralert[1] = RTRALERT_LEN; 2134 rtralert[2] = 0; 2135 rtralert[3] = 0; 2136 2137 ipha->ipha_version_and_hdr_length = IP_VERSION << 4 2138 | (IP_SIMPLE_HDR_LENGTH_IN_WORDS + RTRALERT_LEN_IN_WORDS); 2139 ipha->ipha_type_of_service = IPTOS_PREC_INTERNETCONTROL; 2140 ipha->ipha_length = htons(size); 2141 ipha->ipha_ttl = IGMP_TTL; 2142 ipha->ipha_protocol = IPPROTO_IGMP; 2143 ipha->ipha_dst = htonl(INADDR_ALLRPTS_GROUP); 2144 ipha->ipha_src = ipif->ipif_src_addr; 2145 2146 /* 2147 * Request loopback of the report if we are acting as a multicast 2148 * router, so that the process-level routing daemon can hear it. 2149 * 2150 * This will run multiple times for the same group if there are 2151 * members on the same group for multiple ipifs on the same ill. 2152 * The igmp_input code will suppress this due to the loopback; 2153 * thus we always loopback membership report. 2154 */ 2155 ASSERT(ill->ill_rq != NULL); 2156 ip_multicast_loopback(ill->ill_rq, ill, mp, 0, ipif->ipif_zoneid); 2157 2158 ip_wput_multicast(ill->ill_wq, first_mp, ipif, zoneid); 2159 2160 ++ipst->ips_igmpstat.igps_snd_reports; 2161 2162 if (morepkts) { 2163 if (more_src_cnt > 0) { 2164 int index, mvsize; 2165 slist_t *sl = &next_reclist->mrec_srcs; 2166 index = sl->sl_numsrc; 2167 mvsize = more_src_cnt * sizeof (in6_addr_t); 2168 (void) memmove(&sl->sl_addr[0], &sl->sl_addr[index], 2169 mvsize); 2170 sl->sl_numsrc = more_src_cnt; 2171 } 2172 goto nextpkt; 2173 } 2174 2175 free_reclist: 2176 while (reclist != NULL) { 2177 rp = reclist->mrec_next; 2178 mi_free(reclist); 2179 reclist = rp; 2180 } 2181 } 2182 2183 /* 2184 * mld_input: 2185 */ 2186 /* ARGSUSED */ 2187 void 2188 mld_input(queue_t *q, mblk_t *mp, ill_t *ill) 2189 { 2190 ip6_t *ip6h = (ip6_t *)(mp->b_rptr); 2191 mld_hdr_t *mldh; 2192 ilm_t *ilm; 2193 ipif_t *ipif; 2194 uint16_t hdr_length, exthdr_length; 2195 in6_addr_t *v6group_ptr, *lcladdr_ptr; 2196 uint_t next; 2197 int mldlen; 2198 ip_stack_t *ipst = ill->ill_ipst; 2199 2200 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembTotal); 2201 2202 /* Make sure the src address of the packet is link-local */ 2203 if (!(IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src))) { 2204 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 2205 freemsg(mp); 2206 return; 2207 } 2208 2209 if (ip6h->ip6_hlim != 1) { 2210 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpBadHoplimit); 2211 freemsg(mp); 2212 return; 2213 } 2214 2215 /* Get to the icmp header part */ 2216 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) { 2217 hdr_length = ip_hdr_length_v6(mp, ip6h); 2218 exthdr_length = hdr_length - IPV6_HDR_LEN; 2219 } else { 2220 hdr_length = IPV6_HDR_LEN; 2221 exthdr_length = 0; 2222 } 2223 mldlen = ntohs(ip6h->ip6_plen) - exthdr_length; 2224 2225 /* An MLD packet must at least be 24 octets to be valid */ 2226 if (mldlen < MLD_MINLEN) { 2227 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 2228 freemsg(mp); 2229 return; 2230 } 2231 2232 mldh = (mld_hdr_t *)(&mp->b_rptr[hdr_length]); 2233 2234 switch (mldh->mld_type) { 2235 case MLD_LISTENER_QUERY: 2236 /* 2237 * packet length differentiates between v1 and v2. v1 2238 * query should be exactly 24 octets long; v2 is >= 28. 2239 */ 2240 if ((mldlen == MLD_MINLEN) || 2241 (ipst->ips_mld_max_version < MLD_V2_ROUTER)) { 2242 next = mld_query_in(mldh, ill); 2243 } else if (mldlen >= MLD_V2_QUERY_MINLEN) { 2244 next = mldv2_query_in((mld2q_t *)mldh, ill, mldlen); 2245 } else { 2246 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 2247 freemsg(mp); 2248 return; 2249 } 2250 if (next == 0) { 2251 freemsg(mp); 2252 return; 2253 } 2254 2255 if (next != INFINITY) 2256 mld_start_timers(next, ipst); 2257 break; 2258 2259 case MLD_LISTENER_REPORT: { 2260 2261 ASSERT(ill->ill_ipif != NULL); 2262 /* 2263 * For fast leave to work, we have to know that we are the 2264 * last person to send a report for this group. Reports 2265 * generated by us are looped back since we could potentially 2266 * be a multicast router, so discard reports sourced by me. 2267 */ 2268 lcladdr_ptr = &(ill->ill_ipif->ipif_v6subnet); 2269 mutex_enter(&ill->ill_lock); 2270 for (ipif = ill->ill_ipif; ipif != NULL; 2271 ipif = ipif->ipif_next) { 2272 if (IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, 2273 lcladdr_ptr)) { 2274 if (ip_debug > 1) { 2275 char buf1[INET6_ADDRSTRLEN]; 2276 char buf2[INET6_ADDRSTRLEN]; 2277 2278 (void) mi_strlog(ill->ill_rq, 2279 1, 2280 SL_TRACE, 2281 "mld_input: we are only " 2282 "member src %s ipif_local %s", 2283 inet_ntop(AF_INET6, lcladdr_ptr, 2284 buf1, sizeof (buf1)), 2285 inet_ntop(AF_INET6, 2286 &ipif->ipif_v6lcl_addr, 2287 buf2, sizeof (buf2))); 2288 } 2289 mutex_exit(&ill->ill_lock); 2290 freemsg(mp); 2291 return; 2292 } 2293 } 2294 mutex_exit(&ill->ill_lock); 2295 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembResponses); 2296 2297 v6group_ptr = &mldh->mld_addr; 2298 if (!IN6_IS_ADDR_MULTICAST(v6group_ptr)) { 2299 BUMP_MIB(ill->ill_icmp6_mib, 2300 ipv6IfIcmpInGroupMembBadReports); 2301 freemsg(mp); 2302 return; 2303 } 2304 2305 2306 /* 2307 * If we belong to the group being reported, and we are a 2308 * 'Delaying member' per the RFC terminology, stop our timer 2309 * for that group and 'clear flag' i.e. mark ilm_state as 2310 * IGMP_OTHERMEMBER. With zones, there can be multiple group 2311 * membership entries for the same group address (one per zone) 2312 * so we need to walk the ill_ilm list. 2313 */ 2314 mutex_enter(&ill->ill_lock); 2315 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 2316 if (!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group_ptr)) 2317 continue; 2318 BUMP_MIB(ill->ill_icmp6_mib, 2319 ipv6IfIcmpInGroupMembOurReports); 2320 2321 ilm->ilm_timer = INFINITY; 2322 ilm->ilm_state = IGMP_OTHERMEMBER; 2323 } 2324 mutex_exit(&ill->ill_lock); 2325 break; 2326 } 2327 case MLD_LISTENER_REDUCTION: 2328 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembReductions); 2329 break; 2330 } 2331 /* 2332 * All MLD packets have already been passed up to any 2333 * process(es) listening on a ICMP6 raw socket. This 2334 * has been accomplished in ip_deliver_local_v6 prior to 2335 * this function call. It is assumed that the multicast daemon 2336 * will have a SOCK_RAW IPPROTO_ICMPV6 (and presumbly use the 2337 * ICMP6_FILTER socket option to only receive the MLD messages) 2338 * Thus we can free the MLD message block here 2339 */ 2340 freemsg(mp); 2341 } 2342 2343 /* 2344 * Handles an MLDv1 Listener Query. Returns 0 on error, or the appropriate 2345 * (non-zero, unsigned) timer value to be set on success. 2346 */ 2347 static uint_t 2348 mld_query_in(mld_hdr_t *mldh, ill_t *ill) 2349 { 2350 ilm_t *ilm; 2351 int timer; 2352 uint_t next, current; 2353 in6_addr_t *v6group; 2354 2355 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembQueries); 2356 2357 /* 2358 * In the MLD specification, there are 3 states and a flag. 2359 * 2360 * In Non-Listener state, we simply don't have a membership record. 2361 * In Delaying state, our timer is running (ilm->ilm_timer < INFINITY) 2362 * In Idle Member state, our timer is not running (ilm->ilm_timer == 2363 * INFINITY) 2364 * 2365 * The flag is ilm->ilm_state, it is set to IGMP_OTHERMEMBER if 2366 * we have heard a report from another member, or IGMP_IREPORTEDLAST 2367 * if I sent the last report. 2368 */ 2369 v6group = &mldh->mld_addr; 2370 if (!(IN6_IS_ADDR_UNSPECIFIED(v6group)) && 2371 ((!IN6_IS_ADDR_MULTICAST(v6group)))) { 2372 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembBadQueries); 2373 return (0); 2374 } 2375 2376 /* Need to do compatibility mode checking */ 2377 mutex_enter(&ill->ill_lock); 2378 ill->ill_mcast_v1_time = 0; 2379 ill->ill_mcast_v1_tset = 1; 2380 if (ill->ill_mcast_type == MLD_V2_ROUTER) { 2381 ip1dbg(("Received MLDv1 Query on %s, switching mode to " 2382 "MLD_V1_ROUTER\n", ill->ill_name)); 2383 atomic_add_16(&ill->ill_ifptr->illif_mcast_v1, 1); 2384 ill->ill_mcast_type = MLD_V1_ROUTER; 2385 } 2386 mutex_exit(&ill->ill_lock); 2387 2388 timer = (int)ntohs(mldh->mld_maxdelay); 2389 if (ip_debug > 1) { 2390 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 2391 "mld_input: TIMER = mld_maxdelay %d mld_type 0x%x", 2392 timer, (int)mldh->mld_type); 2393 } 2394 2395 /* 2396 * -Start the timers in all of our membership records for 2397 * the physical interface on which the query arrived, 2398 * excl: 2399 * 1. those that belong to the "all hosts" group, 2400 * 2. those with 0 scope, or 1 node-local scope. 2401 * 2402 * -Restart any timer that is already running but has a value 2403 * longer that the requested timeout. 2404 * -Use the value specified in the query message as the 2405 * maximum timeout. 2406 */ 2407 next = INFINITY; 2408 mutex_enter(&ill->ill_lock); 2409 2410 current = CURRENT_MSTIME; 2411 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 2412 ASSERT(!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr)); 2413 2414 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr) || 2415 IN6_IS_ADDR_MC_NODELOCAL(&ilm->ilm_v6addr) || 2416 IN6_IS_ADDR_MC_RESERVED(&ilm->ilm_v6addr)) 2417 continue; 2418 if ((!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, 2419 &ipv6_all_hosts_mcast)) && 2420 (IN6_IS_ADDR_UNSPECIFIED(v6group)) || 2421 (IN6_ARE_ADDR_EQUAL(v6group, &ilm->ilm_v6addr))) { 2422 if (timer == 0) { 2423 /* Respond immediately */ 2424 ilm->ilm_timer = INFINITY; 2425 ilm->ilm_state = IGMP_IREPORTEDLAST; 2426 mutex_exit(&ill->ill_lock); 2427 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL); 2428 mutex_enter(&ill->ill_lock); 2429 break; 2430 } 2431 if (ilm->ilm_timer > timer) { 2432 MCAST_RANDOM_DELAY(ilm->ilm_timer, timer); 2433 if (ilm->ilm_timer < next) 2434 next = ilm->ilm_timer; 2435 ilm->ilm_timer += current; 2436 } 2437 break; 2438 } 2439 } 2440 mutex_exit(&ill->ill_lock); 2441 2442 return (next); 2443 } 2444 2445 /* 2446 * Handles an MLDv2 Listener Query. On error, returns 0; on success, 2447 * returns the appropriate (non-zero, unsigned) timer value (which may 2448 * be INFINITY) to be set. 2449 */ 2450 static uint_t 2451 mldv2_query_in(mld2q_t *mld2q, ill_t *ill, int mldlen) 2452 { 2453 ilm_t *ilm; 2454 in6_addr_t *v6group, *src_array; 2455 uint_t next, numsrc, i, mrd, delay, qqi, current; 2456 uint8_t qrv; 2457 2458 v6group = &mld2q->mld2q_addr; 2459 numsrc = ntohs(mld2q->mld2q_numsrc); 2460 2461 /* make sure numsrc matches packet size */ 2462 if (mldlen < MLD_V2_QUERY_MINLEN + (numsrc * sizeof (in6_addr_t))) { 2463 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 2464 return (0); 2465 } 2466 src_array = (in6_addr_t *)&mld2q[1]; 2467 2468 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembQueries); 2469 2470 /* extract Maximum Response Delay from code in header */ 2471 mrd = ntohs(mld2q->mld2q_mxrc); 2472 if (mrd >= MLD_V2_MAXRT_FPMIN) { 2473 uint_t hdrval, mant, exp; 2474 hdrval = mrd; 2475 mant = hdrval & MLD_V2_MAXRT_MANT_MASK; 2476 exp = (hdrval & MLD_V2_MAXRT_EXP_MASK) >> 12; 2477 mrd = (mant | 0x1000) << (exp + 3); 2478 } 2479 if (mrd == 0) 2480 mrd = DSEC_TO_MSEC(MCAST_DEF_QUERY_RESP_INTERVAL); 2481 2482 MCAST_RANDOM_DELAY(delay, mrd); 2483 next = (unsigned)INFINITY; 2484 current = CURRENT_MSTIME; 2485 2486 if ((qrv = mld2q->mld2q_sqrv & MLD_V2_RV_MASK) == 0) 2487 ill->ill_mcast_rv = MCAST_DEF_ROBUSTNESS; 2488 else 2489 ill->ill_mcast_rv = qrv; 2490 2491 if ((qqi = (uint_t)mld2q->mld2q_qqic) >= MLD_V2_QQI_FPMIN) { 2492 uint_t mant, exp; 2493 mant = qqi & MLD_V2_QQI_MANT_MASK; 2494 exp = (qqi & MLD_V2_QQI_EXP_MASK) >> 12; 2495 qqi = (mant | 0x10) << (exp + 3); 2496 } 2497 ill->ill_mcast_qi = (qqi == 0) ? MCAST_DEF_QUERY_INTERVAL : qqi; 2498 2499 /* 2500 * If we have a pending general query response that's scheduled 2501 * sooner than the delay we calculated for this response, then 2502 * no action is required (MLDv2 draft section 6.2 rule 1) 2503 */ 2504 mutex_enter(&ill->ill_lock); 2505 if (ill->ill_global_timer < (current + delay)) { 2506 mutex_exit(&ill->ill_lock); 2507 return (next); 2508 } 2509 mutex_exit(&ill->ill_lock); 2510 2511 /* 2512 * Now take action depending on query type: general, 2513 * group specific, or group/source specific. 2514 */ 2515 if ((numsrc == 0) && IN6_IS_ADDR_UNSPECIFIED(v6group)) { 2516 /* 2517 * general query 2518 * We know global timer is either not running or is 2519 * greater than our calculated delay, so reset it to 2520 * our delay (random value in range [0, response time]) 2521 */ 2522 mutex_enter(&ill->ill_lock); 2523 ill->ill_global_timer = current + delay; 2524 mutex_exit(&ill->ill_lock); 2525 next = delay; 2526 2527 } else { 2528 /* group or group/source specific query */ 2529 mutex_enter(&ill->ill_lock); 2530 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 2531 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr) || 2532 IN6_IS_ADDR_MC_NODELOCAL(&ilm->ilm_v6addr) || 2533 IN6_IS_ADDR_MC_RESERVED(&ilm->ilm_v6addr) || 2534 !IN6_ARE_ADDR_EQUAL(v6group, &ilm->ilm_v6addr)) 2535 continue; 2536 2537 /* 2538 * If the query is group specific or we have a 2539 * pending group specific query, the response is 2540 * group specific (pending sources list should be 2541 * empty). Otherwise, need to update the pending 2542 * sources list for the group and source specific 2543 * response. 2544 */ 2545 if (numsrc == 0 || (ilm->ilm_timer < INFINITY && 2546 SLIST_IS_EMPTY(ilm->ilm_pendsrcs))) { 2547 group_query: 2548 FREE_SLIST(ilm->ilm_pendsrcs); 2549 ilm->ilm_pendsrcs = NULL; 2550 } else { 2551 boolean_t overflow; 2552 slist_t *pktl; 2553 if (numsrc > MAX_FILTER_SIZE || 2554 (ilm->ilm_pendsrcs == NULL && 2555 (ilm->ilm_pendsrcs = l_alloc()) == NULL)) { 2556 /* 2557 * We've been sent more sources than 2558 * we can deal with; or we can't deal 2559 * with a source list at all. Revert 2560 * to a group specific query. 2561 */ 2562 goto group_query; 2563 } 2564 if ((pktl = l_alloc()) == NULL) 2565 goto group_query; 2566 pktl->sl_numsrc = numsrc; 2567 for (i = 0; i < numsrc; i++) 2568 pktl->sl_addr[i] = src_array[i]; 2569 l_union_in_a(ilm->ilm_pendsrcs, pktl, 2570 &overflow); 2571 l_free(pktl); 2572 if (overflow) 2573 goto group_query; 2574 } 2575 ilm->ilm_timer = (ilm->ilm_timer == INFINITY) ? 2576 INFINITY : (ilm->ilm_timer - current); 2577 /* set timer to soonest value */ 2578 ilm->ilm_timer = MIN(ilm->ilm_timer, delay); 2579 if (ilm->ilm_timer < next) 2580 next = ilm->ilm_timer; 2581 ilm->ilm_timer += current; 2582 break; 2583 } 2584 mutex_exit(&ill->ill_lock); 2585 } 2586 2587 return (next); 2588 } 2589 2590 /* 2591 * Send MLDv1 response packet with hoplimit 1 2592 */ 2593 static void 2594 mld_sendpkt(ilm_t *ilm, uchar_t type, const in6_addr_t *v6addr) 2595 { 2596 mblk_t *mp; 2597 mld_hdr_t *mldh; 2598 ip6_t *ip6h; 2599 ip6_hbh_t *ip6hbh; 2600 struct ip6_opt_router *ip6router; 2601 size_t size = IPV6_HDR_LEN + sizeof (mld_hdr_t); 2602 ill_t *ill = ilm->ilm_ill; /* Will be the "lower" ill */ 2603 ipif_t *ipif; 2604 ip6i_t *ip6i; 2605 2606 /* 2607 * We need to place a router alert option in this packet. The length 2608 * of the options must be a multiple of 8. The hbh option header is 2 2609 * bytes followed by the 4 byte router alert option. That leaves 2610 * 2 bytes of pad for a total of 8 bytes. 2611 */ 2612 const int router_alert_length = 8; 2613 2614 ASSERT(ill->ill_isv6); 2615 2616 /* 2617 * We need to make sure that this packet does not get load balanced. 2618 * So, we allocate an ip6i_t and set ATTACH_IF. ip_wput_v6 and 2619 * ip_newroute_ipif_v6 knows how to handle such packets. 2620 * If it gets load balanced, switches supporting MLD snooping 2621 * (in the future) will send the packet that it receives for this 2622 * multicast group to the interface that we are sending on. As we have 2623 * joined the multicast group on this ill, by sending the packet out 2624 * on this ill, we receive all the packets back on this ill. 2625 */ 2626 size += sizeof (ip6i_t) + router_alert_length; 2627 mp = allocb(size, BPRI_HI); 2628 if (mp == NULL) 2629 return; 2630 bzero(mp->b_rptr, size); 2631 mp->b_wptr = mp->b_rptr + size; 2632 2633 ip6i = (ip6i_t *)mp->b_rptr; 2634 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2635 ip6i->ip6i_nxt = IPPROTO_RAW; 2636 ip6i->ip6i_flags = IP6I_ATTACH_IF | IP6I_HOPLIMIT; 2637 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 2638 2639 ip6h = (ip6_t *)&ip6i[1]; 2640 ip6hbh = (struct ip6_hbh *)&ip6h[1]; 2641 ip6router = (struct ip6_opt_router *)&ip6hbh[1]; 2642 /* 2643 * A zero is a pad option of length 1. The bzero of the whole packet 2644 * above will pad between ip6router and mld. 2645 */ 2646 mldh = (mld_hdr_t *)((uint8_t *)ip6hbh + router_alert_length); 2647 2648 mldh->mld_type = type; 2649 mldh->mld_addr = ilm->ilm_v6addr; 2650 2651 ip6router->ip6or_type = IP6OPT_ROUTER_ALERT; 2652 ip6router->ip6or_len = 2; 2653 ip6router->ip6or_value[0] = 0; 2654 ip6router->ip6or_value[1] = IP6_ALERT_MLD; 2655 2656 ip6hbh->ip6h_nxt = IPPROTO_ICMPV6; 2657 ip6hbh->ip6h_len = 0; 2658 2659 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2660 ip6h->ip6_plen = htons(sizeof (*mldh) + router_alert_length); 2661 ip6h->ip6_nxt = IPPROTO_HOPOPTS; 2662 ip6h->ip6_hops = MLD_HOP_LIMIT; 2663 if (v6addr == NULL) 2664 ip6h->ip6_dst = ilm->ilm_v6addr; 2665 else 2666 ip6h->ip6_dst = *v6addr; 2667 2668 /* ipif returned by ipif_lookup_zoneid is link-local (if present) */ 2669 if (ipif_lookup_zoneid(ill, ilm->ilm_zoneid, IPIF_UP, &ipif)) { 2670 ip6h->ip6_src = ipif->ipif_v6src_addr; 2671 ipif_refrele(ipif); 2672 } else { 2673 /* Otherwise, use IPv6 default address selection. */ 2674 ip6h->ip6_src = ipv6_all_zeros; 2675 } 2676 2677 /* 2678 * Prepare for checksum by putting icmp length in the icmp 2679 * checksum field. The checksum is calculated in ip_wput_v6. 2680 */ 2681 mldh->mld_cksum = htons(sizeof (*mldh)); 2682 2683 /* 2684 * ip_wput will automatically loopback the multicast packet to 2685 * the conn if multicast loopback is enabled. 2686 * The MIB stats corresponding to this outgoing MLD packet 2687 * will be accounted for in ip_wput->ip_wput_v6->ip_wput_ire_v6 2688 * ->icmp_update_out_mib_v6 function call. 2689 */ 2690 (void) ip_output_v6(NULL, mp, ill->ill_wq, IP_WPUT); 2691 } 2692 2693 /* 2694 * Sends an MLD_V2_LISTENER_REPORT message out the passed-in ill. The 2695 * report will contain one multicast address record for each element of 2696 * reclist. If this causes packet length to exceed ill->ill_max_frag, 2697 * multiple reports are sent. reclist is assumed to be made up of 2698 * buffers allocated by mcast_bldmrec(), and those buffers are freed here. 2699 */ 2700 static void 2701 mldv2_sendrpt(ill_t *ill, mrec_t *reclist) 2702 { 2703 mblk_t *mp; 2704 mld2r_t *mld2r; 2705 mld2mar_t *mld2mar; 2706 in6_addr_t *srcarray; 2707 ip6_t *ip6h; 2708 ip6_hbh_t *ip6hbh; 2709 ip6i_t *ip6i; 2710 struct ip6_opt_router *ip6router; 2711 size_t size, optlen, padlen, icmpsize, rsize; 2712 ipif_t *ipif; 2713 int i, numrec, more_src_cnt; 2714 mrec_t *rp, *cur_reclist; 2715 mrec_t *next_reclist = reclist; 2716 boolean_t morepkts; 2717 2718 /* If there aren't any records, there's nothing to send */ 2719 if (reclist == NULL) 2720 return; 2721 2722 ASSERT(ill->ill_isv6); 2723 2724 /* 2725 * Total option length (optlen + padlen) must be a multiple of 2726 * 8 bytes. We assume here that optlen <= 8, so the total option 2727 * length will be 8. Assert this in case anything ever changes. 2728 */ 2729 optlen = sizeof (ip6_hbh_t) + sizeof (struct ip6_opt_router); 2730 ASSERT(optlen <= 8); 2731 padlen = 8 - optlen; 2732 nextpkt: 2733 icmpsize = sizeof (mld2r_t); 2734 size = IPV6_HDR_LEN + optlen + padlen + icmpsize; 2735 morepkts = B_FALSE; 2736 more_src_cnt = 0; 2737 for (rp = cur_reclist = next_reclist, numrec = 0; rp != NULL; 2738 rp = rp->mrec_next, numrec++) { 2739 rsize = sizeof (mld2mar_t) + 2740 (rp->mrec_srcs.sl_numsrc * sizeof (in6_addr_t)); 2741 if (size + rsize > ill->ill_max_frag) { 2742 if (rp == cur_reclist) { 2743 /* 2744 * If the first mrec we looked at is too big 2745 * to fit in a single packet (i.e the source 2746 * list is too big), we must either truncate 2747 * the list (if TO_EX or IS_EX), or send 2748 * multiple reports for the same group (all 2749 * other types). 2750 */ 2751 int srcspace, srcsperpkt; 2752 srcspace = ill->ill_max_frag - 2753 (size + sizeof (mld2mar_t)); 2754 srcsperpkt = srcspace / sizeof (in6_addr_t); 2755 /* 2756 * Increment icmpsize and size, because we will 2757 * be sending a record for the mrec we're 2758 * looking at now. 2759 */ 2760 rsize = sizeof (mld2mar_t) + 2761 (srcsperpkt * sizeof (in6_addr_t)); 2762 icmpsize += rsize; 2763 size += rsize; 2764 if (rp->mrec_type == MODE_IS_EXCLUDE || 2765 rp->mrec_type == CHANGE_TO_EXCLUDE) { 2766 rp->mrec_srcs.sl_numsrc = srcsperpkt; 2767 if (rp->mrec_next == NULL) { 2768 /* no more packets to send */ 2769 break; 2770 } else { 2771 /* 2772 * more packets, but we're 2773 * done with this mrec. 2774 */ 2775 next_reclist = rp->mrec_next; 2776 } 2777 } else { 2778 more_src_cnt = rp->mrec_srcs.sl_numsrc 2779 - srcsperpkt; 2780 rp->mrec_srcs.sl_numsrc = srcsperpkt; 2781 /* 2782 * We'll fix up this mrec (remove the 2783 * srcs we've already sent) before 2784 * returning to nextpkt above. 2785 */ 2786 next_reclist = rp; 2787 } 2788 } else { 2789 next_reclist = rp; 2790 } 2791 morepkts = B_TRUE; 2792 break; 2793 } 2794 icmpsize += rsize; 2795 size += rsize; 2796 } 2797 2798 /* 2799 * We need to make sure that this packet does not get load balanced. 2800 * So, we allocate an ip6i_t and set ATTACH_IF. ip_wput_v6 and 2801 * ip_newroute_ipif_v6 know how to handle such packets. 2802 * If it gets load balanced, switches supporting MLD snooping 2803 * (in the future) will send the packet that it receives for this 2804 * multicast group to the interface that we are sending on. As we have 2805 * joined the multicast group on this ill, by sending the packet out 2806 * on this ill, we receive all the packets back on this ill. 2807 */ 2808 size += sizeof (ip6i_t); 2809 mp = allocb(size, BPRI_HI); 2810 if (mp == NULL) 2811 goto free_reclist; 2812 bzero(mp->b_rptr, size); 2813 mp->b_wptr = mp->b_rptr + size; 2814 2815 ip6i = (ip6i_t *)mp->b_rptr; 2816 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2817 ip6i->ip6i_nxt = IPPROTO_RAW; 2818 ip6i->ip6i_flags = IP6I_ATTACH_IF; 2819 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 2820 2821 ip6h = (ip6_t *)&(ip6i[1]); 2822 ip6hbh = (ip6_hbh_t *)&(ip6h[1]); 2823 ip6router = (struct ip6_opt_router *)&(ip6hbh[1]); 2824 mld2r = (mld2r_t *)((uint8_t *)ip6hbh + optlen + padlen); 2825 mld2mar = (mld2mar_t *)&(mld2r[1]); 2826 2827 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2828 ip6h->ip6_plen = htons(optlen + padlen + icmpsize); 2829 ip6h->ip6_nxt = IPPROTO_HOPOPTS; 2830 ip6h->ip6_hops = MLD_HOP_LIMIT; 2831 ip6h->ip6_dst = ipv6_all_v2rtrs_mcast; 2832 /* ipif returned by ipif_lookup_zoneid is link-local (if present) */ 2833 if (ipif_lookup_zoneid(ill, ALL_ZONES, IPIF_UP, &ipif)) { 2834 ip6h->ip6_src = ipif->ipif_v6src_addr; 2835 ipif_refrele(ipif); 2836 } else { 2837 /* otherwise, use IPv6 default address selection. */ 2838 ip6h->ip6_src = ipv6_all_zeros; 2839 } 2840 2841 ip6hbh->ip6h_nxt = IPPROTO_ICMPV6; 2842 /* 2843 * ip6h_len is the number of 8-byte words, not including the first 2844 * 8 bytes; we've assumed optlen + padlen == 8 bytes; hence len = 0. 2845 */ 2846 ip6hbh->ip6h_len = 0; 2847 2848 ip6router->ip6or_type = IP6OPT_ROUTER_ALERT; 2849 ip6router->ip6or_len = 2; 2850 ip6router->ip6or_value[0] = 0; 2851 ip6router->ip6or_value[1] = IP6_ALERT_MLD; 2852 2853 mld2r->mld2r_type = MLD_V2_LISTENER_REPORT; 2854 mld2r->mld2r_nummar = htons(numrec); 2855 /* 2856 * Prepare for the checksum by putting icmp length in the icmp 2857 * checksum field. The checksum is calculated in ip_wput_v6. 2858 */ 2859 mld2r->mld2r_cksum = htons(icmpsize); 2860 2861 for (rp = cur_reclist; rp != NULL; rp = rp->mrec_next) { 2862 mld2mar->mld2mar_type = rp->mrec_type; 2863 mld2mar->mld2mar_auxlen = 0; 2864 mld2mar->mld2mar_numsrc = htons(rp->mrec_srcs.sl_numsrc); 2865 mld2mar->mld2mar_group = rp->mrec_group; 2866 srcarray = (in6_addr_t *)&(mld2mar[1]); 2867 2868 for (i = 0; i < rp->mrec_srcs.sl_numsrc; i++) 2869 srcarray[i] = rp->mrec_srcs.sl_addr[i]; 2870 2871 mld2mar = (mld2mar_t *)&(srcarray[i]); 2872 } 2873 2874 /* 2875 * ip_wput will automatically loopback the multicast packet to 2876 * the conn if multicast loopback is enabled. 2877 * The MIB stats corresponding to this outgoing MLD packet 2878 * will be accounted for in ip_wput->ip_wput_v6->ip_wput_ire_v6 2879 * ->icmp_update_out_mib_v6 function call. 2880 */ 2881 (void) ip_output_v6(NULL, mp, ill->ill_wq, IP_WPUT); 2882 2883 if (morepkts) { 2884 if (more_src_cnt > 0) { 2885 int index, mvsize; 2886 slist_t *sl = &next_reclist->mrec_srcs; 2887 index = sl->sl_numsrc; 2888 mvsize = more_src_cnt * sizeof (in6_addr_t); 2889 (void) memmove(&sl->sl_addr[0], &sl->sl_addr[index], 2890 mvsize); 2891 sl->sl_numsrc = more_src_cnt; 2892 } 2893 goto nextpkt; 2894 } 2895 2896 free_reclist: 2897 while (reclist != NULL) { 2898 rp = reclist->mrec_next; 2899 mi_free(reclist); 2900 reclist = rp; 2901 } 2902 } 2903 2904 static mrec_t * 2905 mcast_bldmrec(mcast_record_t type, in6_addr_t *grp, slist_t *srclist, 2906 mrec_t *next) 2907 { 2908 mrec_t *rp; 2909 int i; 2910 2911 if ((type == ALLOW_NEW_SOURCES || type == BLOCK_OLD_SOURCES) && 2912 SLIST_IS_EMPTY(srclist)) 2913 return (next); 2914 2915 rp = (mrec_t *)mi_alloc(sizeof (mrec_t), BPRI_HI); 2916 if (rp == NULL) 2917 return (next); 2918 2919 rp->mrec_next = next; 2920 rp->mrec_type = type; 2921 rp->mrec_auxlen = 0; 2922 rp->mrec_group = *grp; 2923 if (srclist == NULL) { 2924 rp->mrec_srcs.sl_numsrc = 0; 2925 } else { 2926 rp->mrec_srcs.sl_numsrc = srclist->sl_numsrc; 2927 for (i = 0; i < srclist->sl_numsrc; i++) 2928 rp->mrec_srcs.sl_addr[i] = srclist->sl_addr[i]; 2929 } 2930 2931 return (rp); 2932 } 2933 2934 /* 2935 * Set up initial retransmit state. If memory cannot be allocated for 2936 * the source lists, simply create as much state as is possible; memory 2937 * allocation failures are considered one type of transient error that 2938 * the retransmissions are designed to overcome (and if they aren't 2939 * transient, there are bigger problems than failing to notify the 2940 * router about multicast group membership state changes). 2941 */ 2942 static void 2943 mcast_init_rtx(ill_t *ill, rtx_state_t *rtxp, mcast_record_t rtype, 2944 slist_t *flist) 2945 { 2946 /* 2947 * There are only three possibilities for rtype: 2948 * New join, transition from INCLUDE {} to INCLUDE {flist} 2949 * => rtype is ALLOW_NEW_SOURCES 2950 * New join, transition from INCLUDE {} to EXCLUDE {flist} 2951 * => rtype is CHANGE_TO_EXCLUDE 2952 * State change that involves a filter mode change 2953 * => rtype is either CHANGE_TO_INCLUDE or CHANGE_TO_EXCLUDE 2954 */ 2955 ASSERT(rtype == CHANGE_TO_EXCLUDE || rtype == CHANGE_TO_INCLUDE || 2956 rtype == ALLOW_NEW_SOURCES); 2957 2958 rtxp->rtx_cnt = ill->ill_mcast_rv; 2959 2960 switch (rtype) { 2961 case CHANGE_TO_EXCLUDE: 2962 rtxp->rtx_fmode_cnt = ill->ill_mcast_rv; 2963 CLEAR_SLIST(rtxp->rtx_allow); 2964 COPY_SLIST(flist, rtxp->rtx_block); 2965 break; 2966 case ALLOW_NEW_SOURCES: 2967 case CHANGE_TO_INCLUDE: 2968 rtxp->rtx_fmode_cnt = 2969 rtype == ALLOW_NEW_SOURCES ? 0 : ill->ill_mcast_rv; 2970 CLEAR_SLIST(rtxp->rtx_block); 2971 COPY_SLIST(flist, rtxp->rtx_allow); 2972 break; 2973 } 2974 } 2975 2976 /* 2977 * The basic strategy here, as extrapolated from RFC 3810 section 6.1 and 2978 * RFC 3376 section 5.1, covers three cases: 2979 * * The current state change is a filter mode change 2980 * Set filter mode retransmit counter; set retransmit allow or 2981 * block list to new source list as appropriate, and clear the 2982 * retransmit list that was not set; send TO_IN or TO_EX with 2983 * new source list. 2984 * * The current state change is a source list change, but the filter 2985 * mode retransmit counter is > 0 2986 * Decrement filter mode retransmit counter; set retransmit 2987 * allow or block list to new source list as appropriate, 2988 * and clear the retransmit list that was not set; send TO_IN 2989 * or TO_EX with new source list. 2990 * * The current state change is a source list change, and the filter 2991 * mode retransmit counter is 0. 2992 * Merge existing rtx allow and block lists with new state: 2993 * rtx_allow = (new allow + rtx_allow) - new block 2994 * rtx_block = (new block + rtx_block) - new allow 2995 * Send ALLOW and BLOCK records for new retransmit lists; 2996 * decrement retransmit counter. 2997 * 2998 * As is the case for mcast_init_rtx(), memory allocation failures are 2999 * acceptable; we just create as much state as we can. 3000 */ 3001 static mrec_t * 3002 mcast_merge_rtx(ilm_t *ilm, mrec_t *mreclist, slist_t *flist) 3003 { 3004 ill_t *ill; 3005 rtx_state_t *rtxp = &ilm->ilm_rtx; 3006 mcast_record_t txtype; 3007 mrec_t *rp, *rpnext, *rtnmrec; 3008 boolean_t ovf; 3009 3010 ill = (ilm->ilm_ill == NULL ? ilm->ilm_ipif->ipif_ill : ilm->ilm_ill); 3011 3012 if (mreclist == NULL) 3013 return (mreclist); 3014 3015 /* 3016 * A filter mode change is indicated by a single mrec, which is 3017 * either TO_IN or TO_EX. In this case, we just need to set new 3018 * retransmit state as if this were an initial join. There is 3019 * no change to the mrec list. 3020 */ 3021 if (mreclist->mrec_type == CHANGE_TO_INCLUDE || 3022 mreclist->mrec_type == CHANGE_TO_EXCLUDE) { 3023 mcast_init_rtx(ill, rtxp, mreclist->mrec_type, 3024 &mreclist->mrec_srcs); 3025 return (mreclist); 3026 } 3027 3028 /* 3029 * Only the source list has changed 3030 */ 3031 rtxp->rtx_cnt = ill->ill_mcast_rv; 3032 if (rtxp->rtx_fmode_cnt > 0) { 3033 /* but we're still sending filter mode change reports */ 3034 rtxp->rtx_fmode_cnt--; 3035 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 3036 CLEAR_SLIST(rtxp->rtx_block); 3037 COPY_SLIST(flist, rtxp->rtx_allow); 3038 txtype = CHANGE_TO_INCLUDE; 3039 } else { 3040 CLEAR_SLIST(rtxp->rtx_allow); 3041 COPY_SLIST(flist, rtxp->rtx_block); 3042 txtype = CHANGE_TO_EXCLUDE; 3043 } 3044 /* overwrite first mrec with new info */ 3045 mreclist->mrec_type = txtype; 3046 l_copy(flist, &mreclist->mrec_srcs); 3047 /* then free any remaining mrecs */ 3048 for (rp = mreclist->mrec_next; rp != NULL; rp = rpnext) { 3049 rpnext = rp->mrec_next; 3050 mi_free(rp); 3051 } 3052 mreclist->mrec_next = NULL; 3053 rtnmrec = mreclist; 3054 } else { 3055 mrec_t *allow_mrec, *block_mrec; 3056 /* 3057 * Just send the source change reports; but we need to 3058 * recalculate the ALLOW and BLOCK lists based on previous 3059 * state and new changes. 3060 */ 3061 rtnmrec = mreclist; 3062 allow_mrec = block_mrec = NULL; 3063 for (rp = mreclist; rp != NULL; rp = rp->mrec_next) { 3064 ASSERT(rp->mrec_type == ALLOW_NEW_SOURCES || 3065 rp->mrec_type == BLOCK_OLD_SOURCES); 3066 if (rp->mrec_type == ALLOW_NEW_SOURCES) 3067 allow_mrec = rp; 3068 else 3069 block_mrec = rp; 3070 } 3071 /* 3072 * Perform calculations: 3073 * new_allow = mrec_allow + (rtx_allow - mrec_block) 3074 * new_block = mrec_block + (rtx_block - mrec_allow) 3075 * 3076 * Each calc requires two steps, for example: 3077 * rtx_allow = rtx_allow - mrec_block; 3078 * new_allow = mrec_allow + rtx_allow; 3079 * 3080 * Store results in mrec lists, and then copy into rtx lists. 3081 * We do it in this order in case the rtx list hasn't been 3082 * alloc'd yet; if it hasn't and our alloc fails, that's okay, 3083 * Overflows are also okay. 3084 */ 3085 if (block_mrec != NULL) { 3086 l_difference_in_a(rtxp->rtx_allow, 3087 &block_mrec->mrec_srcs); 3088 } 3089 if (allow_mrec != NULL) { 3090 l_difference_in_a(rtxp->rtx_block, 3091 &allow_mrec->mrec_srcs); 3092 l_union_in_a(&allow_mrec->mrec_srcs, rtxp->rtx_allow, 3093 &ovf); 3094 } 3095 if (block_mrec != NULL) { 3096 l_union_in_a(&block_mrec->mrec_srcs, rtxp->rtx_block, 3097 &ovf); 3098 COPY_SLIST(&block_mrec->mrec_srcs, rtxp->rtx_block); 3099 } else { 3100 rtnmrec = mcast_bldmrec(BLOCK_OLD_SOURCES, 3101 &ilm->ilm_v6addr, rtxp->rtx_block, allow_mrec); 3102 } 3103 if (allow_mrec != NULL) { 3104 COPY_SLIST(&allow_mrec->mrec_srcs, rtxp->rtx_allow); 3105 } else { 3106 rtnmrec = mcast_bldmrec(ALLOW_NEW_SOURCES, 3107 &ilm->ilm_v6addr, rtxp->rtx_allow, block_mrec); 3108 } 3109 } 3110 3111 return (rtnmrec); 3112 } 3113