1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 /* 28 * Internet Group Management Protocol (IGMP) routines. 29 * Multicast Listener Discovery Protocol (MLD) routines. 30 * 31 * Written by Steve Deering, Stanford, May 1988. 32 * Modified by Rosen Sharma, Stanford, Aug 1994. 33 * Modified by Bill Fenner, Xerox PARC, Feb. 1995. 34 * 35 * MULTICAST 3.5.1.1 36 */ 37 38 #include <sys/types.h> 39 #include <sys/stream.h> 40 #include <sys/stropts.h> 41 #include <sys/strlog.h> 42 #include <sys/strsun.h> 43 #include <sys/systm.h> 44 #include <sys/ddi.h> 45 #include <sys/sunddi.h> 46 #include <sys/cmn_err.h> 47 #include <sys/atomic.h> 48 #include <sys/zone.h> 49 50 #include <sys/param.h> 51 #include <sys/socket.h> 52 #include <inet/ipclassifier.h> 53 #include <net/if.h> 54 #include <net/route.h> 55 #include <netinet/in.h> 56 #include <netinet/igmp_var.h> 57 #include <netinet/ip6.h> 58 #include <netinet/icmp6.h> 59 60 #include <inet/common.h> 61 #include <inet/mi.h> 62 #include <inet/nd.h> 63 #include <inet/ip.h> 64 #include <inet/ip6.h> 65 #include <inet/ip_multi.h> 66 #include <inet/ip_listutils.h> 67 68 #include <netinet/igmp.h> 69 #include <inet/ip_if.h> 70 #include <net/pfkeyv2.h> 71 #include <inet/ipsec_info.h> 72 73 static uint_t igmp_query_in(ipha_t *ipha, igmpa_t *igmpa, ill_t *ill); 74 static uint_t igmpv3_query_in(igmp3qa_t *igmp3qa, ill_t *ill, int igmplen); 75 static uint_t mld_query_in(mld_hdr_t *mldh, ill_t *ill); 76 static uint_t mldv2_query_in(mld2q_t *mld2q, ill_t *ill, int mldlen); 77 static void igmp_sendpkt(ilm_t *ilm, uchar_t type, ipaddr_t addr); 78 static void mld_sendpkt(ilm_t *ilm, uchar_t type, const in6_addr_t *v6addr); 79 static void igmpv3_sendrpt(ipif_t *ipif, mrec_t *reclist); 80 static void mldv2_sendrpt(ill_t *ill, mrec_t *reclist); 81 static mrec_t *mcast_bldmrec(mcast_record_t type, in6_addr_t *grp, 82 slist_t *srclist, mrec_t *next); 83 static void mcast_init_rtx(ill_t *ill, rtx_state_t *rtxp, 84 mcast_record_t rtype, slist_t *flist); 85 static mrec_t *mcast_merge_rtx(ilm_t *ilm, mrec_t *rp, slist_t *flist); 86 87 88 /* 89 * Macros used to do timer len conversions. Timer values are always 90 * stored and passed to the timer functions as milliseconds; but the 91 * default values and values from the wire may not be. 92 * 93 * And yes, it's obscure, but decisecond is easier to abbreviate than 94 * "tenths of a second". 95 */ 96 #define DSEC_TO_MSEC(dsec) ((dsec) * 100) 97 #define SEC_TO_MSEC(sec) ((sec) * 1000) 98 99 /* 100 * A running timer (scheduled thru timeout) can be cancelled if another 101 * timer with a shorter timeout value is scheduled before it has timed 102 * out. When the shorter timer expires, the original timer is updated 103 * to account for the time elapsed while the shorter timer ran; but this 104 * does not take into account the amount of time already spent in timeout 105 * state before being preempted by the shorter timer, that is the time 106 * interval between time scheduled to time cancelled. This can cause 107 * delays in sending out multicast membership reports. To resolve this 108 * problem, wallclock time (absolute time) is used instead of deltas 109 * (relative time) to track timers. 110 * 111 * The MACRO below gets the lbolt value, used for proper timer scheduling 112 * and firing. Therefore multicast membership reports are sent on time. 113 * The timer does not exactly fire at the time it was scehduled to fire, 114 * there is a difference of a few milliseconds observed. An offset is used 115 * to take care of the difference. 116 */ 117 118 #define CURRENT_MSTIME ((uint_t)TICK_TO_MSEC(ddi_get_lbolt())) 119 #define CURRENT_OFFSET (999) 120 121 /* 122 * The first multicast join will trigger the igmp timers / mld timers 123 * The unit for next is milliseconds. 124 */ 125 void 126 igmp_start_timers(unsigned next, ip_stack_t *ipst) 127 { 128 int time_left; 129 int ret; 130 131 ASSERT(next != 0 && next != INFINITY); 132 133 mutex_enter(&ipst->ips_igmp_timer_lock); 134 135 if (ipst->ips_igmp_timer_setter_active) { 136 /* 137 * Serialize timer setters, one at a time. If the 138 * timer is currently being set by someone, 139 * just record the next time when it has to be 140 * invoked and return. The current setter will 141 * take care. 142 */ 143 ipst->ips_igmp_time_to_next = 144 MIN(ipst->ips_igmp_time_to_next, next); 145 mutex_exit(&ipst->ips_igmp_timer_lock); 146 return; 147 } else { 148 ipst->ips_igmp_timer_setter_active = B_TRUE; 149 } 150 if (ipst->ips_igmp_timeout_id == 0) { 151 /* 152 * The timer is inactive. We need to start a timer 153 */ 154 ipst->ips_igmp_time_to_next = next; 155 ipst->ips_igmp_timeout_id = timeout(igmp_timeout_handler, 156 (void *)ipst, MSEC_TO_TICK(ipst->ips_igmp_time_to_next)); 157 ipst->ips_igmp_timer_scheduled_last = ddi_get_lbolt(); 158 ipst->ips_igmp_timer_setter_active = B_FALSE; 159 mutex_exit(&ipst->ips_igmp_timer_lock); 160 return; 161 } 162 163 /* 164 * The timer was scheduled sometime back for firing in 165 * 'igmp_time_to_next' ms and is active. We need to 166 * reschedule the timeout if the new 'next' will happen 167 * earlier than the currently scheduled timeout 168 */ 169 time_left = ipst->ips_igmp_timer_scheduled_last + 170 MSEC_TO_TICK(ipst->ips_igmp_time_to_next) - ddi_get_lbolt(); 171 if (time_left < MSEC_TO_TICK(next)) { 172 ipst->ips_igmp_timer_setter_active = B_FALSE; 173 mutex_exit(&ipst->ips_igmp_timer_lock); 174 return; 175 } 176 177 mutex_exit(&ipst->ips_igmp_timer_lock); 178 ret = untimeout(ipst->ips_igmp_timeout_id); 179 mutex_enter(&ipst->ips_igmp_timer_lock); 180 /* 181 * The timeout was cancelled, or the timeout handler 182 * completed, while we were blocked in the untimeout. 183 * No other thread could have set the timer meanwhile 184 * since we serialized all the timer setters. Thus 185 * no timer is currently active nor executing nor will 186 * any timer fire in the future. We start the timer now 187 * if needed. 188 */ 189 if (ret == -1) { 190 ASSERT(ipst->ips_igmp_timeout_id == 0); 191 } else { 192 ASSERT(ipst->ips_igmp_timeout_id != 0); 193 ipst->ips_igmp_timeout_id = 0; 194 } 195 if (ipst->ips_igmp_time_to_next != 0) { 196 ipst->ips_igmp_time_to_next = 197 MIN(ipst->ips_igmp_time_to_next, next); 198 ipst->ips_igmp_timeout_id = timeout(igmp_timeout_handler, 199 (void *)ipst, MSEC_TO_TICK(ipst->ips_igmp_time_to_next)); 200 ipst->ips_igmp_timer_scheduled_last = ddi_get_lbolt(); 201 } 202 ipst->ips_igmp_timer_setter_active = B_FALSE; 203 mutex_exit(&ipst->ips_igmp_timer_lock); 204 } 205 206 /* 207 * mld_start_timers: 208 * The unit for next is milliseconds. 209 */ 210 void 211 mld_start_timers(unsigned next, ip_stack_t *ipst) 212 { 213 int time_left; 214 int ret; 215 216 ASSERT(next != 0 && next != INFINITY); 217 218 mutex_enter(&ipst->ips_mld_timer_lock); 219 if (ipst->ips_mld_timer_setter_active) { 220 /* 221 * Serialize timer setters, one at a time. If the 222 * timer is currently being set by someone, 223 * just record the next time when it has to be 224 * invoked and return. The current setter will 225 * take care. 226 */ 227 ipst->ips_mld_time_to_next = 228 MIN(ipst->ips_mld_time_to_next, next); 229 mutex_exit(&ipst->ips_mld_timer_lock); 230 return; 231 } else { 232 ipst->ips_mld_timer_setter_active = B_TRUE; 233 } 234 if (ipst->ips_mld_timeout_id == 0) { 235 /* 236 * The timer is inactive. We need to start a timer 237 */ 238 ipst->ips_mld_time_to_next = next; 239 ipst->ips_mld_timeout_id = timeout(mld_timeout_handler, 240 (void *)ipst, MSEC_TO_TICK(ipst->ips_mld_time_to_next)); 241 ipst->ips_mld_timer_scheduled_last = ddi_get_lbolt(); 242 ipst->ips_mld_timer_setter_active = B_FALSE; 243 mutex_exit(&ipst->ips_mld_timer_lock); 244 return; 245 } 246 247 /* 248 * The timer was scheduled sometime back for firing in 249 * 'igmp_time_to_next' ms and is active. We need to 250 * reschedule the timeout if the new 'next' will happen 251 * earlier than the currently scheduled timeout 252 */ 253 time_left = ipst->ips_mld_timer_scheduled_last + 254 MSEC_TO_TICK(ipst->ips_mld_time_to_next) - ddi_get_lbolt(); 255 if (time_left < MSEC_TO_TICK(next)) { 256 ipst->ips_mld_timer_setter_active = B_FALSE; 257 mutex_exit(&ipst->ips_mld_timer_lock); 258 return; 259 } 260 261 mutex_exit(&ipst->ips_mld_timer_lock); 262 ret = untimeout(ipst->ips_mld_timeout_id); 263 mutex_enter(&ipst->ips_mld_timer_lock); 264 /* 265 * The timeout was cancelled, or the timeout handler 266 * completed, while we were blocked in the untimeout. 267 * No other thread could have set the timer meanwhile 268 * since we serialized all the timer setters. Thus 269 * no timer is currently active nor executing nor will 270 * any timer fire in the future. We start the timer now 271 * if needed. 272 */ 273 if (ret == -1) { 274 ASSERT(ipst->ips_mld_timeout_id == 0); 275 } else { 276 ASSERT(ipst->ips_mld_timeout_id != 0); 277 ipst->ips_mld_timeout_id = 0; 278 } 279 if (ipst->ips_mld_time_to_next != 0) { 280 ipst->ips_mld_time_to_next = 281 MIN(ipst->ips_mld_time_to_next, next); 282 ipst->ips_mld_timeout_id = timeout(mld_timeout_handler, 283 (void *)ipst, MSEC_TO_TICK(ipst->ips_mld_time_to_next)); 284 ipst->ips_mld_timer_scheduled_last = ddi_get_lbolt(); 285 } 286 ipst->ips_mld_timer_setter_active = B_FALSE; 287 mutex_exit(&ipst->ips_mld_timer_lock); 288 } 289 290 /* 291 * igmp_input: 292 * Return NULL for a bad packet that is discarded here. 293 * Return mp if the message is OK and should be handed to "raw" receivers. 294 * Callers of igmp_input() may need to reinitialize variables that were copied 295 * from the mblk as this calls pullupmsg(). 296 */ 297 /* ARGSUSED */ 298 mblk_t * 299 igmp_input(queue_t *q, mblk_t *mp, ill_t *ill) 300 { 301 igmpa_t *igmpa; 302 ipha_t *ipha = (ipha_t *)(mp->b_rptr); 303 int iphlen, igmplen, mblklen; 304 ilm_t *ilm; 305 uint32_t src, dst; 306 uint32_t group; 307 uint_t next; 308 ipif_t *ipif; 309 ip_stack_t *ipst; 310 311 ASSERT(ill != NULL); 312 ASSERT(!ill->ill_isv6); 313 ipst = ill->ill_ipst; 314 ++ipst->ips_igmpstat.igps_rcv_total; 315 316 mblklen = MBLKL(mp); 317 if (mblklen < 1 || mblklen < (iphlen = IPH_HDR_LENGTH(ipha))) { 318 ++ipst->ips_igmpstat.igps_rcv_tooshort; 319 goto bad_pkt; 320 } 321 igmplen = ntohs(ipha->ipha_length) - iphlen; 322 /* 323 * Since msg sizes are more variable with v3, just pullup the 324 * whole thing now. 325 */ 326 if (MBLKL(mp) < (igmplen + iphlen)) { 327 mblk_t *mp1; 328 if ((mp1 = msgpullup(mp, -1)) == NULL) { 329 ++ipst->ips_igmpstat.igps_rcv_tooshort; 330 goto bad_pkt; 331 } 332 freemsg(mp); 333 mp = mp1; 334 ipha = (ipha_t *)(mp->b_rptr); 335 } 336 337 /* 338 * Validate lengths 339 */ 340 if (igmplen < IGMP_MINLEN) { 341 ++ipst->ips_igmpstat.igps_rcv_tooshort; 342 goto bad_pkt; 343 } 344 /* 345 * Validate checksum 346 */ 347 if (IP_CSUM(mp, iphlen, 0)) { 348 ++ipst->ips_igmpstat.igps_rcv_badsum; 349 goto bad_pkt; 350 } 351 352 igmpa = (igmpa_t *)(&mp->b_rptr[iphlen]); 353 src = ipha->ipha_src; 354 dst = ipha->ipha_dst; 355 if (ip_debug > 1) 356 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 357 "igmp_input: src 0x%x, dst 0x%x on %s\n", 358 (int)ntohl(src), (int)ntohl(dst), 359 ill->ill_name); 360 361 switch (igmpa->igmpa_type) { 362 case IGMP_MEMBERSHIP_QUERY: 363 /* 364 * packet length differentiates between v1/v2 and v3 365 * v1/v2 should be exactly 8 octets long; v3 is >= 12 366 */ 367 if ((igmplen == IGMP_MINLEN) || 368 (ipst->ips_igmp_max_version <= IGMP_V2_ROUTER)) { 369 next = igmp_query_in(ipha, igmpa, ill); 370 } else if (igmplen >= IGMP_V3_QUERY_MINLEN) { 371 next = igmpv3_query_in((igmp3qa_t *)igmpa, ill, 372 igmplen); 373 } else { 374 ++ipst->ips_igmpstat.igps_rcv_tooshort; 375 goto bad_pkt; 376 } 377 if (next == 0) 378 goto bad_pkt; 379 380 if (next != INFINITY) 381 igmp_start_timers(next, ipst); 382 383 break; 384 385 case IGMP_V1_MEMBERSHIP_REPORT: 386 case IGMP_V2_MEMBERSHIP_REPORT: 387 /* 388 * For fast leave to work, we have to know that we are the 389 * last person to send a report for this group. Reports 390 * generated by us are looped back since we could potentially 391 * be a multicast router, so discard reports sourced by me. 392 */ 393 mutex_enter(&ill->ill_lock); 394 for (ipif = ill->ill_ipif; ipif != NULL; 395 ipif = ipif->ipif_next) { 396 if (ipif->ipif_lcl_addr == src) { 397 if (ip_debug > 1) { 398 (void) mi_strlog(ill->ill_rq, 399 1, 400 SL_TRACE, 401 "igmp_input: we are only " 402 "member src 0x%x ipif_local 0x%x", 403 (int)ntohl(src), 404 (int) 405 ntohl(ipif->ipif_lcl_addr)); 406 } 407 mutex_exit(&ill->ill_lock); 408 return (mp); 409 } 410 } 411 mutex_exit(&ill->ill_lock); 412 413 ++ipst->ips_igmpstat.igps_rcv_reports; 414 group = igmpa->igmpa_group; 415 if (!CLASSD(group)) { 416 ++ipst->ips_igmpstat.igps_rcv_badreports; 417 goto bad_pkt; 418 } 419 420 /* 421 * KLUDGE: if the IP source address of the report has an 422 * unspecified (i.e., zero) subnet number, as is allowed for 423 * a booting host, replace it with the correct subnet number 424 * so that a process-level multicast routing demon can 425 * determine which subnet it arrived from. This is necessary 426 * to compensate for the lack of any way for a process to 427 * determine the arrival interface of an incoming packet. 428 * 429 * Requires that a copy of *this* message it passed up 430 * to the raw interface which is done by our caller. 431 */ 432 if ((src & htonl(0xFF000000U)) == 0) { /* Minimum net mask */ 433 /* Pick the first ipif on this ill */ 434 mutex_enter(&ill->ill_lock); 435 src = ill->ill_ipif->ipif_subnet; 436 mutex_exit(&ill->ill_lock); 437 ip1dbg(("igmp_input: changed src to 0x%x\n", 438 (int)ntohl(src))); 439 ipha->ipha_src = src; 440 } 441 442 /* 443 * If we belong to the group being reported, and 444 * we are a 'Delaying member' in the RFC terminology, 445 * stop our timer for that group and 'clear flag' i.e. 446 * mark as IGMP_OTHERMEMBER. Do this for all logical 447 * interfaces on the given physical interface. 448 */ 449 mutex_enter(&ill->ill_lock); 450 for (ipif = ill->ill_ipif; ipif != NULL; 451 ipif = ipif->ipif_next) { 452 ilm = ilm_lookup_ipif(ipif, group); 453 if (ilm != NULL) { 454 ++ipst->ips_igmpstat.igps_rcv_ourreports; 455 ilm->ilm_timer = INFINITY; 456 ilm->ilm_state = IGMP_OTHERMEMBER; 457 } 458 } /* for */ 459 mutex_exit(&ill->ill_lock); 460 break; 461 462 case IGMP_V3_MEMBERSHIP_REPORT: 463 /* 464 * Currently nothing to do here; IGMP router is not 465 * implemented in ip, and v3 hosts don't pay attention 466 * to membership reports. 467 */ 468 break; 469 } 470 /* 471 * Pass all valid IGMP packets up to any process(es) listening 472 * on a raw IGMP socket. Do not free the packet. 473 */ 474 return (mp); 475 476 bad_pkt: 477 freemsg(mp); 478 return (NULL); 479 } 480 481 static uint_t 482 igmp_query_in(ipha_t *ipha, igmpa_t *igmpa, ill_t *ill) 483 { 484 ilm_t *ilm; 485 int timer; 486 uint_t next, current; 487 ip_stack_t *ipst; 488 489 ipst = ill->ill_ipst; 490 ++ipst->ips_igmpstat.igps_rcv_queries; 491 492 /* 493 * In the IGMPv2 specification, there are 3 states and a flag. 494 * 495 * In Non-Member state, we simply don't have a membership record. 496 * In Delaying Member state, our timer is running (ilm->ilm_timer 497 * < INFINITY). In Idle Member state, our timer is not running 498 * (ilm->ilm_timer == INFINITY). 499 * 500 * The flag is ilm->ilm_state, it is set to IGMP_OTHERMEMBER if 501 * we have heard a report from another member, or IGMP_IREPORTEDLAST 502 * if I sent the last report. 503 */ 504 if ((igmpa->igmpa_code == 0) || 505 (ipst->ips_igmp_max_version == IGMP_V1_ROUTER)) { 506 /* 507 * Query from an old router. 508 * Remember that the querier on this interface is old, 509 * and set the timer to the value in RFC 1112. 510 */ 511 512 513 mutex_enter(&ill->ill_lock); 514 ill->ill_mcast_v1_time = 0; 515 ill->ill_mcast_v1_tset = 1; 516 if (ill->ill_mcast_type != IGMP_V1_ROUTER) { 517 ip1dbg(("Received IGMPv1 Query on %s, switching mode " 518 "to IGMP_V1_ROUTER\n", ill->ill_name)); 519 atomic_add_16(&ill->ill_ifptr->illif_mcast_v1, 1); 520 ill->ill_mcast_type = IGMP_V1_ROUTER; 521 } 522 mutex_exit(&ill->ill_lock); 523 524 timer = SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY); 525 526 if (ipha->ipha_dst != htonl(INADDR_ALLHOSTS_GROUP) || 527 igmpa->igmpa_group != 0) { 528 ++ipst->ips_igmpstat.igps_rcv_badqueries; 529 return (0); 530 } 531 532 } else { 533 in_addr_t group; 534 535 /* 536 * Query from a new router 537 * Simply do a validity check 538 */ 539 group = igmpa->igmpa_group; 540 if (group != 0 && (!CLASSD(group))) { 541 ++ipst->ips_igmpstat.igps_rcv_badqueries; 542 return (0); 543 } 544 545 /* 546 * Switch interface state to v2 on receipt of a v2 query 547 * ONLY IF current state is v3. Let things be if current 548 * state if v1 but do reset the v2-querier-present timer. 549 */ 550 mutex_enter(&ill->ill_lock); 551 if (ill->ill_mcast_type == IGMP_V3_ROUTER) { 552 ip1dbg(("Received IGMPv2 Query on %s, switching mode " 553 "to IGMP_V2_ROUTER", ill->ill_name)); 554 atomic_add_16(&ill->ill_ifptr->illif_mcast_v2, 1); 555 ill->ill_mcast_type = IGMP_V2_ROUTER; 556 } 557 ill->ill_mcast_v2_time = 0; 558 ill->ill_mcast_v2_tset = 1; 559 mutex_exit(&ill->ill_lock); 560 561 timer = DSEC_TO_MSEC((int)igmpa->igmpa_code); 562 } 563 564 if (ip_debug > 1) { 565 mutex_enter(&ill->ill_lock); 566 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 567 "igmp_input: TIMER = igmp_code %d igmp_type 0x%x", 568 (int)ntohs(igmpa->igmpa_code), 569 (int)ntohs(igmpa->igmpa_type)); 570 mutex_exit(&ill->ill_lock); 571 } 572 573 /* 574 * -Start the timers in all of our membership records 575 * for the physical interface on which the query 576 * arrived, excluding those that belong to the "all 577 * hosts" group (224.0.0.1). 578 * 579 * -Restart any timer that is already running but has 580 * a value longer than the requested timeout. 581 * 582 * -Use the value specified in the query message as 583 * the maximum timeout. 584 */ 585 next = (unsigned)INFINITY; 586 mutex_enter(&ill->ill_lock); 587 588 current = CURRENT_MSTIME; 589 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 590 591 /* 592 * A multicast router joins INADDR_ANY address 593 * to enable promiscuous reception of all 594 * mcasts from the interface. This INADDR_ANY 595 * is stored in the ilm_v6addr as V6 unspec addr 596 */ 597 if (!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr)) 598 continue; 599 if (ilm->ilm_addr == htonl(INADDR_ANY)) 600 continue; 601 if (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP) && 602 (igmpa->igmpa_group == 0) || 603 (igmpa->igmpa_group == ilm->ilm_addr)) { 604 if (ilm->ilm_timer > timer) { 605 MCAST_RANDOM_DELAY(ilm->ilm_timer, timer); 606 if (ilm->ilm_timer < next) 607 next = ilm->ilm_timer; 608 ilm->ilm_timer += current; 609 } 610 } 611 } 612 mutex_exit(&ill->ill_lock); 613 614 return (next); 615 } 616 617 static uint_t 618 igmpv3_query_in(igmp3qa_t *igmp3qa, ill_t *ill, int igmplen) 619 { 620 uint_t i, next, mrd, qqi, timer, delay, numsrc; 621 uint_t current; 622 ilm_t *ilm; 623 ipaddr_t *src_array; 624 uint8_t qrv; 625 ip_stack_t *ipst; 626 627 ipst = ill->ill_ipst; 628 /* make sure numsrc matches packet size */ 629 numsrc = ntohs(igmp3qa->igmp3qa_numsrc); 630 if (igmplen < IGMP_V3_QUERY_MINLEN + (numsrc * sizeof (ipaddr_t))) { 631 ++ipst->ips_igmpstat.igps_rcv_tooshort; 632 return (0); 633 } 634 src_array = (ipaddr_t *)&igmp3qa[1]; 635 636 ++ipst->ips_igmpstat.igps_rcv_queries; 637 638 if ((mrd = (uint_t)igmp3qa->igmp3qa_mxrc) >= IGMP_V3_MAXRT_FPMIN) { 639 uint_t hdrval, mant, exp; 640 hdrval = (uint_t)igmp3qa->igmp3qa_mxrc; 641 mant = hdrval & IGMP_V3_MAXRT_MANT_MASK; 642 exp = (hdrval & IGMP_V3_MAXRT_EXP_MASK) >> 4; 643 mrd = (mant | 0x10) << (exp + 3); 644 } 645 if (mrd == 0) 646 mrd = MCAST_DEF_QUERY_RESP_INTERVAL; 647 timer = DSEC_TO_MSEC(mrd); 648 MCAST_RANDOM_DELAY(delay, timer); 649 next = (unsigned)INFINITY; 650 current = CURRENT_MSTIME; 651 652 if ((qrv = igmp3qa->igmp3qa_sqrv & IGMP_V3_RV_MASK) == 0) 653 ill->ill_mcast_rv = MCAST_DEF_ROBUSTNESS; 654 else 655 ill->ill_mcast_rv = qrv; 656 657 if ((qqi = (uint_t)igmp3qa->igmp3qa_qqic) >= IGMP_V3_QQI_FPMIN) { 658 uint_t hdrval, mant, exp; 659 hdrval = (uint_t)igmp3qa->igmp3qa_qqic; 660 mant = hdrval & IGMP_V3_QQI_MANT_MASK; 661 exp = (hdrval & IGMP_V3_QQI_EXP_MASK) >> 4; 662 qqi = (mant | 0x10) << (exp + 3); 663 } 664 ill->ill_mcast_qi = (qqi == 0) ? MCAST_DEF_QUERY_INTERVAL : qqi; 665 666 /* 667 * If we have a pending general query response that's scheduled 668 * sooner than the delay we calculated for this response, then 669 * no action is required (RFC3376 section 5.2 rule 1) 670 */ 671 mutex_enter(&ill->ill_lock); 672 if (ill->ill_global_timer < (current + delay)) { 673 mutex_exit(&ill->ill_lock); 674 return (next); 675 } 676 mutex_exit(&ill->ill_lock); 677 678 /* 679 * Now take action depending upon query type: 680 * general, group specific, or group/source specific. 681 */ 682 if ((numsrc == 0) && (igmp3qa->igmp3qa_group == INADDR_ANY)) { 683 /* 684 * general query 685 * We know global timer is either not running or is 686 * greater than our calculated delay, so reset it to 687 * our delay (random value in range [0, response time]). 688 */ 689 mutex_enter(&ill->ill_lock); 690 ill->ill_global_timer = current + delay; 691 mutex_exit(&ill->ill_lock); 692 next = delay; 693 694 } else { 695 /* group or group/source specific query */ 696 mutex_enter(&ill->ill_lock); 697 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 698 if (!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr) || 699 (ilm->ilm_addr == htonl(INADDR_ANY)) || 700 (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) || 701 (igmp3qa->igmp3qa_group != ilm->ilm_addr)) 702 continue; 703 /* 704 * If the query is group specific or we have a 705 * pending group specific query, the response is 706 * group specific (pending sources list should be 707 * empty). Otherwise, need to update the pending 708 * sources list for the group and source specific 709 * response. 710 */ 711 if (numsrc == 0 || (ilm->ilm_timer < INFINITY && 712 SLIST_IS_EMPTY(ilm->ilm_pendsrcs))) { 713 group_query: 714 FREE_SLIST(ilm->ilm_pendsrcs); 715 ilm->ilm_pendsrcs = NULL; 716 } else { 717 boolean_t overflow; 718 slist_t *pktl; 719 if (numsrc > MAX_FILTER_SIZE || 720 (ilm->ilm_pendsrcs == NULL && 721 (ilm->ilm_pendsrcs = l_alloc()) == NULL)) { 722 /* 723 * We've been sent more sources than 724 * we can deal with; or we can't deal 725 * with a source list at all. Revert 726 * to a group specific query. 727 */ 728 goto group_query; 729 } 730 if ((pktl = l_alloc()) == NULL) 731 goto group_query; 732 pktl->sl_numsrc = numsrc; 733 for (i = 0; i < numsrc; i++) 734 IN6_IPADDR_TO_V4MAPPED(src_array[i], 735 &(pktl->sl_addr[i])); 736 l_union_in_a(ilm->ilm_pendsrcs, pktl, 737 &overflow); 738 l_free(pktl); 739 if (overflow) 740 goto group_query; 741 } 742 743 ilm->ilm_timer = (ilm->ilm_timer == INFINITY) ? 744 INFINITY : (ilm->ilm_timer - current); 745 /* choose soonest timer */ 746 ilm->ilm_timer = MIN(ilm->ilm_timer, delay); 747 if (ilm->ilm_timer < next) 748 next = ilm->ilm_timer; 749 ilm->ilm_timer += current; 750 } 751 mutex_exit(&ill->ill_lock); 752 } 753 754 return (next); 755 } 756 757 void 758 igmp_joingroup(ilm_t *ilm) 759 { 760 uint_t timer; 761 ill_t *ill; 762 ip_stack_t *ipst = ilm->ilm_ipst; 763 764 ill = ilm->ilm_ipif->ipif_ill; 765 766 ASSERT(IAM_WRITER_ILL(ill)); 767 ASSERT(ilm->ilm_ill == NULL && !ilm->ilm_ipif->ipif_isv6); 768 769 mutex_enter(&ill->ill_lock); 770 if (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) { 771 ilm->ilm_rtx.rtx_timer = INFINITY; 772 ilm->ilm_state = IGMP_OTHERMEMBER; 773 mutex_exit(&ill->ill_lock); 774 } else { 775 ip1dbg(("Querier mode %d, sending report, group %x\n", 776 ill->ill_mcast_type, htonl(ilm->ilm_addr))); 777 if (ill->ill_mcast_type == IGMP_V1_ROUTER) { 778 mutex_exit(&ill->ill_lock); 779 igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0); 780 mutex_enter(&ill->ill_lock); 781 } else if (ill->ill_mcast_type == IGMP_V2_ROUTER) { 782 mutex_exit(&ill->ill_lock); 783 igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0); 784 mutex_enter(&ill->ill_lock); 785 } else if (ill->ill_mcast_type == IGMP_V3_ROUTER) { 786 mrec_t *rp; 787 mcast_record_t rtype; 788 /* 789 * The possible state changes we need to handle here: 790 * Old State New State Report 791 * 792 * INCLUDE(0) INCLUDE(X) ALLOW(X),BLOCK(0) 793 * INCLUDE(0) EXCLUDE(X) TO_EX(X) 794 * 795 * No need to send the BLOCK(0) report; ALLOW(X) 796 * is enough. 797 */ 798 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ? 799 ALLOW_NEW_SOURCES : CHANGE_TO_EXCLUDE; 800 rp = mcast_bldmrec(rtype, &ilm->ilm_v6addr, 801 ilm->ilm_filter, NULL); 802 mutex_exit(&ill->ill_lock); 803 igmpv3_sendrpt(ilm->ilm_ipif, rp); 804 mutex_enter(&ill->ill_lock); 805 /* 806 * Set up retransmission state. Timer is set below, 807 * for both v3 and older versions. 808 */ 809 mcast_init_rtx(ill, &ilm->ilm_rtx, rtype, 810 ilm->ilm_filter); 811 } 812 813 /* Set the ilm timer value */ 814 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer, 815 SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY)); 816 timer = ilm->ilm_rtx.rtx_timer; 817 ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME; 818 ilm->ilm_state = IGMP_IREPORTEDLAST; 819 mutex_exit(&ill->ill_lock); 820 821 /* 822 * To avoid deadlock, we defer igmp_start_timers() to 823 * ipsq_exit(). See the comment in ipsq_exit() for details. 824 */ 825 mutex_enter(&ipst->ips_igmp_timer_lock); 826 ipst->ips_igmp_deferred_next = MIN(timer, 827 ipst->ips_igmp_deferred_next); 828 mutex_exit(&ipst->ips_igmp_timer_lock); 829 } 830 831 if (ip_debug > 1) { 832 (void) mi_strlog(ilm->ilm_ipif->ipif_ill->ill_rq, 1, SL_TRACE, 833 "igmp_joingroup: multicast_type %d timer %d", 834 (ilm->ilm_ipif->ipif_ill->ill_mcast_type), 835 (int)ntohl(timer)); 836 } 837 } 838 839 void 840 mld_joingroup(ilm_t *ilm) 841 { 842 uint_t timer; 843 ill_t *ill; 844 ip_stack_t *ipst = ilm->ilm_ipst; 845 846 ill = ilm->ilm_ill; 847 848 ASSERT(IAM_WRITER_ILL(ill)); 849 ASSERT(ilm->ilm_ipif == NULL && ill->ill_isv6); 850 851 mutex_enter(&ill->ill_lock); 852 if (IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr)) { 853 ilm->ilm_rtx.rtx_timer = INFINITY; 854 ilm->ilm_state = IGMP_OTHERMEMBER; 855 mutex_exit(&ill->ill_lock); 856 } else { 857 if (ill->ill_mcast_type == MLD_V1_ROUTER) { 858 mutex_exit(&ill->ill_lock); 859 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL); 860 mutex_enter(&ill->ill_lock); 861 } else { 862 mrec_t *rp; 863 mcast_record_t rtype; 864 /* 865 * The possible state changes we need to handle here: 866 * Old State New State Report 867 * 868 * INCLUDE(0) INCLUDE(X) ALLOW(X),BLOCK(0) 869 * INCLUDE(0) EXCLUDE(X) TO_EX(X) 870 * 871 * No need to send the BLOCK(0) report; ALLOW(X) 872 * is enough 873 */ 874 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ? 875 ALLOW_NEW_SOURCES : CHANGE_TO_EXCLUDE; 876 rp = mcast_bldmrec(rtype, &ilm->ilm_v6addr, 877 ilm->ilm_filter, NULL); 878 mutex_exit(&ill->ill_lock); 879 mldv2_sendrpt(ill, rp); 880 mutex_enter(&ill->ill_lock); 881 /* 882 * Set up retransmission state. Timer is set below, 883 * for both v2 and v1. 884 */ 885 mcast_init_rtx(ill, &ilm->ilm_rtx, rtype, 886 ilm->ilm_filter); 887 } 888 889 /* Set the ilm timer value */ 890 ASSERT(ill->ill_mcast_type != MLD_V2_ROUTER || 891 ilm->ilm_rtx.rtx_cnt > 0); 892 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer, 893 SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY)); 894 timer = ilm->ilm_rtx.rtx_timer; 895 ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME; 896 ilm->ilm_state = IGMP_IREPORTEDLAST; 897 mutex_exit(&ill->ill_lock); 898 899 /* 900 * To avoid deadlock, we defer mld_start_timers() to 901 * ipsq_exit(). See the comment in ipsq_exit() for details. 902 */ 903 mutex_enter(&ipst->ips_mld_timer_lock); 904 ipst->ips_mld_deferred_next = MIN(timer, 905 ipst->ips_mld_deferred_next); 906 mutex_exit(&ipst->ips_mld_timer_lock); 907 } 908 909 if (ip_debug > 1) { 910 (void) mi_strlog(ilm->ilm_ill->ill_rq, 1, SL_TRACE, 911 "mld_joingroup: multicast_type %d timer %d", 912 (ilm->ilm_ill->ill_mcast_type), 913 (int)ntohl(timer)); 914 } 915 } 916 917 void 918 igmp_leavegroup(ilm_t *ilm) 919 { 920 ill_t *ill = ilm->ilm_ipif->ipif_ill; 921 922 ASSERT(ilm->ilm_ill == NULL); 923 ASSERT(!ill->ill_isv6); 924 925 mutex_enter(&ill->ill_lock); 926 if (ilm->ilm_state == IGMP_IREPORTEDLAST && 927 ill->ill_mcast_type == IGMP_V2_ROUTER && 928 (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP))) { 929 mutex_exit(&ill->ill_lock); 930 igmp_sendpkt(ilm, IGMP_V2_LEAVE_GROUP, 931 (htonl(INADDR_ALLRTRS_GROUP))); 932 return; 933 } else if ((ill->ill_mcast_type == IGMP_V3_ROUTER) && 934 (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP))) { 935 mrec_t *rp; 936 /* 937 * The possible state changes we need to handle here: 938 * Old State New State Report 939 * 940 * INCLUDE(X) INCLUDE(0) ALLOW(0),BLOCK(X) 941 * EXCLUDE(X) INCLUDE(0) TO_IN(0) 942 * 943 * No need to send the ALLOW(0) report; BLOCK(X) is enough 944 */ 945 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 946 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr, 947 ilm->ilm_filter, NULL); 948 } else { 949 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, 950 NULL, NULL); 951 } 952 mutex_exit(&ill->ill_lock); 953 igmpv3_sendrpt(ilm->ilm_ipif, rp); 954 return; 955 } 956 mutex_exit(&ill->ill_lock); 957 } 958 959 void 960 mld_leavegroup(ilm_t *ilm) 961 { 962 ill_t *ill = ilm->ilm_ill; 963 964 ASSERT(ilm->ilm_ipif == NULL); 965 ASSERT(ill->ill_isv6); 966 967 mutex_enter(&ill->ill_lock); 968 if (ilm->ilm_state == IGMP_IREPORTEDLAST && 969 ill->ill_mcast_type == MLD_V1_ROUTER && 970 (!IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr))) { 971 mutex_exit(&ill->ill_lock); 972 mld_sendpkt(ilm, MLD_LISTENER_REDUCTION, &ipv6_all_rtrs_mcast); 973 return; 974 } else if ((ill->ill_mcast_type == MLD_V2_ROUTER) && 975 (!IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr))) { 976 mrec_t *rp; 977 /* 978 * The possible state changes we need to handle here: 979 * Old State New State Report 980 * 981 * INCLUDE(X) INCLUDE(0) ALLOW(0),BLOCK(X) 982 * EXCLUDE(X) INCLUDE(0) TO_IN(0) 983 * 984 * No need to send the ALLOW(0) report; BLOCK(X) is enough 985 */ 986 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 987 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr, 988 ilm->ilm_filter, NULL); 989 } else { 990 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, 991 NULL, NULL); 992 } 993 mutex_exit(&ill->ill_lock); 994 mldv2_sendrpt(ill, rp); 995 return; 996 } 997 mutex_exit(&ill->ill_lock); 998 } 999 1000 void 1001 igmp_statechange(ilm_t *ilm, mcast_record_t fmode, slist_t *flist) 1002 { 1003 ill_t *ill; 1004 mrec_t *rp; 1005 ip_stack_t *ipst = ilm->ilm_ipst; 1006 1007 ASSERT(ilm != NULL); 1008 1009 /* state change reports should only be sent if the router is v3 */ 1010 if (ilm->ilm_ipif->ipif_ill->ill_mcast_type != IGMP_V3_ROUTER) 1011 return; 1012 1013 if (ilm->ilm_ill == NULL) { 1014 ASSERT(ilm->ilm_ipif != NULL); 1015 ill = ilm->ilm_ipif->ipif_ill; 1016 } else { 1017 ill = ilm->ilm_ill; 1018 } 1019 1020 mutex_enter(&ill->ill_lock); 1021 1022 /* 1023 * Compare existing(old) state with the new state and prepare 1024 * State Change Report, according to the rules in RFC 3376: 1025 * 1026 * Old State New State State Change Report 1027 * 1028 * INCLUDE(A) INCLUDE(B) ALLOW(B-A),BLOCK(A-B) 1029 * EXCLUDE(A) EXCLUDE(B) ALLOW(A-B),BLOCK(B-A) 1030 * INCLUDE(A) EXCLUDE(B) TO_EX(B) 1031 * EXCLUDE(A) INCLUDE(B) TO_IN(B) 1032 */ 1033 1034 if (ilm->ilm_fmode == fmode) { 1035 slist_t *a_minus_b = NULL, *b_minus_a = NULL; 1036 slist_t *allow, *block; 1037 if (((a_minus_b = l_alloc()) == NULL) || 1038 ((b_minus_a = l_alloc()) == NULL)) { 1039 l_free(a_minus_b); 1040 if (ilm->ilm_fmode == MODE_IS_INCLUDE) 1041 goto send_to_ex; 1042 else 1043 goto send_to_in; 1044 } 1045 l_difference(ilm->ilm_filter, flist, a_minus_b); 1046 l_difference(flist, ilm->ilm_filter, b_minus_a); 1047 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1048 allow = b_minus_a; 1049 block = a_minus_b; 1050 } else { 1051 allow = a_minus_b; 1052 block = b_minus_a; 1053 } 1054 rp = NULL; 1055 if (!SLIST_IS_EMPTY(allow)) 1056 rp = mcast_bldmrec(ALLOW_NEW_SOURCES, &ilm->ilm_v6addr, 1057 allow, rp); 1058 if (!SLIST_IS_EMPTY(block)) 1059 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr, 1060 block, rp); 1061 l_free(a_minus_b); 1062 l_free(b_minus_a); 1063 } else if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1064 send_to_ex: 1065 rp = mcast_bldmrec(CHANGE_TO_EXCLUDE, &ilm->ilm_v6addr, flist, 1066 NULL); 1067 } else { 1068 send_to_in: 1069 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, flist, 1070 NULL); 1071 } 1072 1073 /* 1074 * Need to set up retransmission state; merge the new info with the 1075 * current state (which may be null). If the timer is not currently 1076 * running, start it (need to do a delayed start of the timer as 1077 * we're currently in the sq). 1078 */ 1079 rp = mcast_merge_rtx(ilm, rp, flist); 1080 if (ilm->ilm_rtx.rtx_timer == INFINITY) { 1081 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer, 1082 SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY)); 1083 mutex_enter(&ipst->ips_igmp_timer_lock); 1084 ipst->ips_igmp_deferred_next = MIN(ipst->ips_igmp_deferred_next, 1085 ilm->ilm_rtx.rtx_timer); 1086 ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME; 1087 mutex_exit(&ipst->ips_igmp_timer_lock); 1088 } 1089 1090 mutex_exit(&ill->ill_lock); 1091 igmpv3_sendrpt(ilm->ilm_ipif, rp); 1092 } 1093 1094 void 1095 mld_statechange(ilm_t *ilm, mcast_record_t fmode, slist_t *flist) 1096 { 1097 ill_t *ill; 1098 mrec_t *rp = NULL; 1099 ip_stack_t *ipst = ilm->ilm_ipst; 1100 1101 ASSERT(ilm != NULL); 1102 1103 ill = ilm->ilm_ill; 1104 1105 /* only need to send if we have an mldv2-capable router */ 1106 mutex_enter(&ill->ill_lock); 1107 if (ill->ill_mcast_type != MLD_V2_ROUTER) { 1108 mutex_exit(&ill->ill_lock); 1109 return; 1110 } 1111 1112 /* 1113 * Compare existing (old) state with the new state passed in 1114 * and send appropriate MLDv2 State Change Report. 1115 * 1116 * Old State New State State Change Report 1117 * 1118 * INCLUDE(A) INCLUDE(B) ALLOW(B-A),BLOCK(A-B) 1119 * EXCLUDE(A) EXCLUDE(B) ALLOW(A-B),BLOCK(B-A) 1120 * INCLUDE(A) EXCLUDE(B) TO_EX(B) 1121 * EXCLUDE(A) INCLUDE(B) TO_IN(B) 1122 */ 1123 if (ilm->ilm_fmode == fmode) { 1124 slist_t *a_minus_b = NULL, *b_minus_a = NULL; 1125 slist_t *allow, *block; 1126 if (((a_minus_b = l_alloc()) == NULL) || 1127 ((b_minus_a = l_alloc()) == NULL)) { 1128 l_free(a_minus_b); 1129 if (ilm->ilm_fmode == MODE_IS_INCLUDE) 1130 goto send_to_ex; 1131 else 1132 goto send_to_in; 1133 } 1134 l_difference(ilm->ilm_filter, flist, a_minus_b); 1135 l_difference(flist, ilm->ilm_filter, b_minus_a); 1136 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1137 allow = b_minus_a; 1138 block = a_minus_b; 1139 } else { 1140 allow = a_minus_b; 1141 block = b_minus_a; 1142 } 1143 if (!SLIST_IS_EMPTY(allow)) 1144 rp = mcast_bldmrec(ALLOW_NEW_SOURCES, &ilm->ilm_v6addr, 1145 allow, rp); 1146 if (!SLIST_IS_EMPTY(block)) 1147 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr, 1148 block, rp); 1149 l_free(a_minus_b); 1150 l_free(b_minus_a); 1151 } else if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1152 send_to_ex: 1153 rp = mcast_bldmrec(CHANGE_TO_EXCLUDE, &ilm->ilm_v6addr, flist, 1154 NULL); 1155 } else { 1156 send_to_in: 1157 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, flist, 1158 NULL); 1159 } 1160 1161 /* 1162 * Need to set up retransmission state; merge the new info with the 1163 * current state (which may be null). If the timer is not currently 1164 * running, start it (need to do a deferred start of the timer as 1165 * we're currently in the sq). 1166 */ 1167 rp = mcast_merge_rtx(ilm, rp, flist); 1168 ASSERT(ilm->ilm_rtx.rtx_cnt > 0); 1169 if (ilm->ilm_rtx.rtx_timer == INFINITY) { 1170 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer, 1171 SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY)); 1172 mutex_enter(&ipst->ips_mld_timer_lock); 1173 ipst->ips_mld_deferred_next = 1174 MIN(ipst->ips_mld_deferred_next, ilm->ilm_rtx.rtx_timer); 1175 ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME; 1176 mutex_exit(&ipst->ips_mld_timer_lock); 1177 } 1178 1179 mutex_exit(&ill->ill_lock); 1180 mldv2_sendrpt(ill, rp); 1181 } 1182 1183 uint_t 1184 igmp_timeout_handler_per_ill(ill_t *ill) 1185 { 1186 uint_t next = INFINITY, current; 1187 ilm_t *ilm; 1188 ipif_t *ipif; 1189 mrec_t *rp = NULL; 1190 mrec_t *rtxrp = NULL; 1191 rtx_state_t *rtxp; 1192 mcast_record_t rtype; 1193 1194 ASSERT(IAM_WRITER_ILL(ill)); 1195 1196 mutex_enter(&ill->ill_lock); 1197 1198 current = CURRENT_MSTIME; 1199 /* First check the global timer on this interface */ 1200 if (ill->ill_global_timer == INFINITY) 1201 goto per_ilm_timer; 1202 if (ill->ill_global_timer <= (current + CURRENT_OFFSET)) { 1203 ill->ill_global_timer = INFINITY; 1204 /* 1205 * Send report for each group on this interface. 1206 * Since we just set the global timer (received a v3 general 1207 * query), need to skip the all hosts addr (224.0.0.1), per 1208 * RFC 3376 section 5. 1209 */ 1210 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1211 if (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) 1212 continue; 1213 ASSERT(ilm->ilm_ipif != NULL); 1214 ilm->ilm_ipif->ipif_igmp_rpt = 1215 mcast_bldmrec(ilm->ilm_fmode, &ilm->ilm_v6addr, 1216 ilm->ilm_filter, ilm->ilm_ipif->ipif_igmp_rpt); 1217 /* 1218 * Since we're sending a report on this group, okay 1219 * to delete pending group-specific timers. Note 1220 * that group-specific retransmit timers still need 1221 * to be checked in the per_ilm_timer for-loop. 1222 */ 1223 ilm->ilm_timer = INFINITY; 1224 ilm->ilm_state = IGMP_IREPORTEDLAST; 1225 FREE_SLIST(ilm->ilm_pendsrcs); 1226 ilm->ilm_pendsrcs = NULL; 1227 } 1228 /* 1229 * We've built per-ipif mrec lists; walk the ill's ipif list 1230 * and send a report for each ipif that has an mrec list. 1231 */ 1232 for (ipif = ill->ill_ipif; ipif != NULL; 1233 ipif = ipif->ipif_next) { 1234 if (ipif->ipif_igmp_rpt == NULL) 1235 continue; 1236 mutex_exit(&ill->ill_lock); 1237 igmpv3_sendrpt(ipif, ipif->ipif_igmp_rpt); 1238 mutex_enter(&ill->ill_lock); 1239 /* mrec list was freed by igmpv3_sendrpt() */ 1240 ipif->ipif_igmp_rpt = NULL; 1241 } 1242 } else { 1243 if ((ill->ill_global_timer - current) < next) 1244 next = ill->ill_global_timer - current; 1245 } 1246 1247 per_ilm_timer: 1248 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1249 if (ilm->ilm_timer == INFINITY) 1250 goto per_ilm_rtxtimer; 1251 1252 if (ilm->ilm_timer > (current + CURRENT_OFFSET)) { 1253 if ((ilm->ilm_timer - current) < next) 1254 next = ilm->ilm_timer - current; 1255 1256 if (ip_debug > 1) { 1257 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 1258 "igmp_timo_hlr 2: ilm_timr %d " 1259 "typ %d nxt %d", 1260 (int)ntohl(ilm->ilm_timer - current), 1261 (ill->ill_mcast_type), next); 1262 } 1263 1264 goto per_ilm_rtxtimer; 1265 } 1266 1267 /* the timer has expired, need to take action */ 1268 ilm->ilm_timer = INFINITY; 1269 ilm->ilm_state = IGMP_IREPORTEDLAST; 1270 if (ill->ill_mcast_type == IGMP_V1_ROUTER) { 1271 mutex_exit(&ill->ill_lock); 1272 igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0); 1273 mutex_enter(&ill->ill_lock); 1274 } else if (ill->ill_mcast_type == IGMP_V2_ROUTER) { 1275 mutex_exit(&ill->ill_lock); 1276 igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0); 1277 mutex_enter(&ill->ill_lock); 1278 } else { 1279 slist_t *rsp; 1280 if (!SLIST_IS_EMPTY(ilm->ilm_pendsrcs) && 1281 (rsp = l_alloc()) != NULL) { 1282 /* 1283 * Contents of reply depend on pending 1284 * requested source list. 1285 */ 1286 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1287 l_intersection(ilm->ilm_filter, 1288 ilm->ilm_pendsrcs, rsp); 1289 } else { 1290 l_difference(ilm->ilm_pendsrcs, 1291 ilm->ilm_filter, rsp); 1292 } 1293 FREE_SLIST(ilm->ilm_pendsrcs); 1294 ilm->ilm_pendsrcs = NULL; 1295 if (!SLIST_IS_EMPTY(rsp)) 1296 rp = mcast_bldmrec(MODE_IS_INCLUDE, 1297 &ilm->ilm_v6addr, rsp, rp); 1298 FREE_SLIST(rsp); 1299 } else { 1300 /* 1301 * Either the pending request is just group- 1302 * specific, or we couldn't get the resources 1303 * (rsp) to build a source-specific reply. 1304 */ 1305 rp = mcast_bldmrec(ilm->ilm_fmode, 1306 &ilm->ilm_v6addr, ilm->ilm_filter, rp); 1307 } 1308 mutex_exit(&ill->ill_lock); 1309 igmpv3_sendrpt(ill->ill_ipif, rp); 1310 mutex_enter(&ill->ill_lock); 1311 rp = NULL; 1312 } 1313 1314 per_ilm_rtxtimer: 1315 rtxp = &ilm->ilm_rtx; 1316 1317 if (rtxp->rtx_timer == INFINITY) 1318 continue; 1319 if (rtxp->rtx_timer > (current + CURRENT_OFFSET)) { 1320 if ((rtxp->rtx_timer - current) < next) 1321 next = rtxp->rtx_timer - current; 1322 continue; 1323 } 1324 1325 rtxp->rtx_timer = INFINITY; 1326 ilm->ilm_state = IGMP_IREPORTEDLAST; 1327 if (ill->ill_mcast_type == IGMP_V1_ROUTER) { 1328 mutex_exit(&ill->ill_lock); 1329 igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0); 1330 mutex_enter(&ill->ill_lock); 1331 continue; 1332 } else if (ill->ill_mcast_type == IGMP_V2_ROUTER) { 1333 mutex_exit(&ill->ill_lock); 1334 igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0); 1335 mutex_enter(&ill->ill_lock); 1336 continue; 1337 } 1338 1339 /* 1340 * The retransmit timer has popped, and our router is 1341 * IGMPv3. We have to delve into the retransmit state 1342 * stored in the ilm. 1343 * 1344 * Decrement the retransmit count. If the fmode rtx 1345 * count is active, decrement it, and send a filter 1346 * mode change report with the ilm's source list. 1347 * Otherwise, send a source list change report with 1348 * the current retransmit lists. 1349 */ 1350 ASSERT(rtxp->rtx_cnt > 0); 1351 ASSERT(rtxp->rtx_cnt >= rtxp->rtx_fmode_cnt); 1352 rtxp->rtx_cnt--; 1353 if (rtxp->rtx_fmode_cnt > 0) { 1354 rtxp->rtx_fmode_cnt--; 1355 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ? 1356 CHANGE_TO_INCLUDE : CHANGE_TO_EXCLUDE; 1357 rtxrp = mcast_bldmrec(rtype, &ilm->ilm_v6addr, 1358 ilm->ilm_filter, rtxrp); 1359 } else { 1360 rtxrp = mcast_bldmrec(ALLOW_NEW_SOURCES, 1361 &ilm->ilm_v6addr, rtxp->rtx_allow, rtxrp); 1362 rtxrp = mcast_bldmrec(BLOCK_OLD_SOURCES, 1363 &ilm->ilm_v6addr, rtxp->rtx_block, rtxrp); 1364 } 1365 if (rtxp->rtx_cnt > 0) { 1366 MCAST_RANDOM_DELAY(rtxp->rtx_timer, 1367 SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY)); 1368 if (rtxp->rtx_timer < next) 1369 next = rtxp->rtx_timer; 1370 rtxp->rtx_timer += current; 1371 } else { 1372 CLEAR_SLIST(rtxp->rtx_allow); 1373 CLEAR_SLIST(rtxp->rtx_block); 1374 } 1375 mutex_exit(&ill->ill_lock); 1376 igmpv3_sendrpt(ilm->ilm_ipif, rtxrp); 1377 mutex_enter(&ill->ill_lock); 1378 rtxrp = NULL; 1379 } 1380 1381 mutex_exit(&ill->ill_lock); 1382 1383 return (next); 1384 } 1385 1386 /* 1387 * igmp_timeout_handler: 1388 * Called when there are timeout events, every next * TMEOUT_INTERVAL (tick). 1389 * Returns number of ticks to next event (or 0 if none). 1390 * 1391 * As part of multicast join and leave igmp we may need to send out an 1392 * igmp request. The igmp related state variables in the ilm are protected 1393 * by ill_lock. A single global igmp timer is used to track igmp timeouts. 1394 * igmp_timer_lock protects the global igmp_timeout_id. igmp_start_timers 1395 * starts the igmp timer if needed. It serializes multiple threads trying to 1396 * simultaneously start the timer using the igmp_timer_setter_active flag. 1397 * 1398 * igmp_input() receives igmp queries and responds to the queries 1399 * in a delayed fashion by posting a timer i.e. it calls igmp_start_timers(). 1400 * Later the igmp_timer fires, the timeout handler igmp_timerout_handler() 1401 * performs the action exclusively after entering each ill's ipsq as writer. 1402 * The actual igmp timeout handler needs to run in the ipsq since it has to 1403 * access the ilm's and we don't want another exclusive operation like 1404 * say an IPMP failover to be simultaneously moving the ilms from one ill to 1405 * another. 1406 * 1407 * The igmp_slowtimeo() function is called thru another timer. 1408 * igmp_slowtimeout_lock protects the igmp_slowtimeout_id 1409 */ 1410 void 1411 igmp_timeout_handler(void *arg) 1412 { 1413 ill_t *ill; 1414 uint_t global_next = INFINITY; 1415 uint_t next; 1416 ill_walk_context_t ctx; 1417 boolean_t success; 1418 ip_stack_t *ipst = arg; 1419 1420 ASSERT(arg != NULL); 1421 mutex_enter(&ipst->ips_igmp_timer_lock); 1422 ASSERT(ipst->ips_igmp_timeout_id != 0); 1423 ipst->ips_igmp_timer_thread = curthread; 1424 ipst->ips_igmp_timer_scheduled_last = 0; 1425 ipst->ips_igmp_time_to_next = 0; 1426 mutex_exit(&ipst->ips_igmp_timer_lock); 1427 1428 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1429 ill = ILL_START_WALK_V4(&ctx, ipst); 1430 for (; ill != NULL; ill = ill_next(&ctx, ill)) { 1431 ASSERT(!ill->ill_isv6); 1432 /* 1433 * We may not be able to refhold the ill if the ill/ipif 1434 * is changing. But we need to make sure that the ill will 1435 * not vanish. So we just bump up the ill_waiter count. 1436 */ 1437 if (!ill_waiter_inc(ill)) 1438 continue; 1439 rw_exit(&ipst->ips_ill_g_lock); 1440 success = ipsq_enter(ill, B_TRUE, NEW_OP); 1441 if (success) { 1442 next = igmp_timeout_handler_per_ill(ill); 1443 if (next < global_next) 1444 global_next = next; 1445 ipsq_exit(ill->ill_phyint->phyint_ipsq); 1446 } 1447 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1448 ill_waiter_dcr(ill); 1449 } 1450 rw_exit(&ipst->ips_ill_g_lock); 1451 1452 mutex_enter(&ipst->ips_igmp_timer_lock); 1453 ASSERT(ipst->ips_igmp_timeout_id != 0); 1454 ipst->ips_igmp_timeout_id = 0; 1455 ipst->ips_igmp_timer_thread = NULL; 1456 mutex_exit(&ipst->ips_igmp_timer_lock); 1457 1458 if (global_next != INFINITY) 1459 igmp_start_timers(global_next, ipst); 1460 } 1461 1462 /* 1463 * mld_timeout_handler: 1464 * Called when there are timeout events, every next (tick). 1465 * Returns number of ticks to next event (or 0 if none). 1466 */ 1467 /* ARGSUSED */ 1468 uint_t 1469 mld_timeout_handler_per_ill(ill_t *ill) 1470 { 1471 ilm_t *ilm; 1472 uint_t next = INFINITY, current; 1473 mrec_t *rp, *rtxrp; 1474 rtx_state_t *rtxp; 1475 mcast_record_t rtype; 1476 1477 ASSERT(IAM_WRITER_ILL(ill)); 1478 1479 mutex_enter(&ill->ill_lock); 1480 1481 current = CURRENT_MSTIME; 1482 /* 1483 * First check the global timer on this interface; the global timer 1484 * is not used for MLDv1, so if it's set we can assume we're v2. 1485 */ 1486 if (ill->ill_global_timer == INFINITY) 1487 goto per_ilm_timer; 1488 if (ill->ill_global_timer <= (current + CURRENT_OFFSET)) { 1489 ill->ill_global_timer = INFINITY; 1490 /* 1491 * Send report for each group on this interface. 1492 * Since we just set the global timer (received a v2 general 1493 * query), need to skip the all hosts addr (ff02::1), per 1494 * RFC 3810 section 6. 1495 */ 1496 rp = NULL; 1497 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1498 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, 1499 &ipv6_all_hosts_mcast)) 1500 continue; 1501 rp = mcast_bldmrec(ilm->ilm_fmode, &ilm->ilm_v6addr, 1502 ilm->ilm_filter, rp); 1503 /* 1504 * Since we're sending a report on this group, okay 1505 * to delete pending group-specific timers. Note 1506 * that group-specific retransmit timers still need 1507 * to be checked in the per_ilm_timer for-loop. 1508 */ 1509 ilm->ilm_timer = INFINITY; 1510 ilm->ilm_state = IGMP_IREPORTEDLAST; 1511 FREE_SLIST(ilm->ilm_pendsrcs); 1512 ilm->ilm_pendsrcs = NULL; 1513 } 1514 mutex_exit(&ill->ill_lock); 1515 mldv2_sendrpt(ill, rp); 1516 mutex_enter(&ill->ill_lock); 1517 } else { 1518 if ((ill->ill_global_timer - current) < next) 1519 next = ill->ill_global_timer - current; 1520 } 1521 1522 per_ilm_timer: 1523 rp = rtxrp = NULL; 1524 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1525 if (ilm->ilm_timer == INFINITY) 1526 goto per_ilm_rtxtimer; 1527 1528 if (ilm->ilm_timer > (current + CURRENT_OFFSET)) { 1529 if ((ilm->ilm_timer - current) < next) 1530 next = ilm->ilm_timer - current; 1531 1532 if (ip_debug > 1) { 1533 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 1534 "igmp_timo_hlr 2: ilm_timr" 1535 " %d typ %d nxt %d", 1536 (int)ntohl(ilm->ilm_timer - current), 1537 (ill->ill_mcast_type), next); 1538 } 1539 1540 goto per_ilm_rtxtimer; 1541 } 1542 1543 /* the timer has expired, need to take action */ 1544 ilm->ilm_timer = INFINITY; 1545 ilm->ilm_state = IGMP_IREPORTEDLAST; 1546 if (ill->ill_mcast_type == MLD_V1_ROUTER) { 1547 mutex_exit(&ill->ill_lock); 1548 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL); 1549 mutex_enter(&ill->ill_lock); 1550 } else { 1551 slist_t *rsp; 1552 if (!SLIST_IS_EMPTY(ilm->ilm_pendsrcs) && 1553 (rsp = l_alloc()) != NULL) { 1554 /* 1555 * Contents of reply depend on pending 1556 * requested source list. 1557 */ 1558 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1559 l_intersection(ilm->ilm_filter, 1560 ilm->ilm_pendsrcs, rsp); 1561 } else { 1562 l_difference(ilm->ilm_pendsrcs, 1563 ilm->ilm_filter, rsp); 1564 } 1565 FREE_SLIST(ilm->ilm_pendsrcs); 1566 ilm->ilm_pendsrcs = NULL; 1567 if (!SLIST_IS_EMPTY(rsp)) 1568 rp = mcast_bldmrec(MODE_IS_INCLUDE, 1569 &ilm->ilm_v6addr, rsp, rp); 1570 FREE_SLIST(rsp); 1571 } else { 1572 rp = mcast_bldmrec(ilm->ilm_fmode, 1573 &ilm->ilm_v6addr, ilm->ilm_filter, rp); 1574 } 1575 } 1576 1577 per_ilm_rtxtimer: 1578 rtxp = &ilm->ilm_rtx; 1579 1580 if (rtxp->rtx_timer == INFINITY) 1581 continue; 1582 if (rtxp->rtx_timer > (current + CURRENT_OFFSET)) { 1583 if ((rtxp->rtx_timer - current) < next) 1584 next = rtxp->rtx_timer - current; 1585 continue; 1586 } 1587 1588 rtxp->rtx_timer = INFINITY; 1589 ilm->ilm_state = IGMP_IREPORTEDLAST; 1590 if (ill->ill_mcast_type == MLD_V1_ROUTER) { 1591 mutex_exit(&ill->ill_lock); 1592 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL); 1593 mutex_enter(&ill->ill_lock); 1594 continue; 1595 } 1596 1597 /* 1598 * The retransmit timer has popped, and our router is 1599 * MLDv2. We have to delve into the retransmit state 1600 * stored in the ilm. 1601 * 1602 * Decrement the retransmit count. If the fmode rtx 1603 * count is active, decrement it, and send a filter 1604 * mode change report with the ilm's source list. 1605 * Otherwise, send a source list change report with 1606 * the current retransmit lists. 1607 */ 1608 ASSERT(rtxp->rtx_cnt > 0); 1609 ASSERT(rtxp->rtx_cnt >= rtxp->rtx_fmode_cnt); 1610 rtxp->rtx_cnt--; 1611 if (rtxp->rtx_fmode_cnt > 0) { 1612 rtxp->rtx_fmode_cnt--; 1613 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ? 1614 CHANGE_TO_INCLUDE : CHANGE_TO_EXCLUDE; 1615 rtxrp = mcast_bldmrec(rtype, &ilm->ilm_v6addr, 1616 ilm->ilm_filter, rtxrp); 1617 } else { 1618 rtxrp = mcast_bldmrec(ALLOW_NEW_SOURCES, 1619 &ilm->ilm_v6addr, rtxp->rtx_allow, rtxrp); 1620 rtxrp = mcast_bldmrec(BLOCK_OLD_SOURCES, 1621 &ilm->ilm_v6addr, rtxp->rtx_block, rtxrp); 1622 } 1623 if (rtxp->rtx_cnt > 0) { 1624 MCAST_RANDOM_DELAY(rtxp->rtx_timer, 1625 SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY)); 1626 if (rtxp->rtx_timer < next) 1627 next = rtxp->rtx_timer; 1628 rtxp->rtx_timer += current; 1629 } else { 1630 CLEAR_SLIST(rtxp->rtx_allow); 1631 CLEAR_SLIST(rtxp->rtx_block); 1632 } 1633 } 1634 1635 if (ill->ill_mcast_type == MLD_V2_ROUTER) { 1636 mutex_exit(&ill->ill_lock); 1637 mldv2_sendrpt(ill, rp); 1638 mldv2_sendrpt(ill, rtxrp); 1639 return (next); 1640 } 1641 1642 mutex_exit(&ill->ill_lock); 1643 1644 return (next); 1645 } 1646 1647 /* 1648 * mld_timeout_handler: 1649 * Called when there are timeout events, every next * TMEOUT_INTERVAL (tick). 1650 * Returns number of ticks to next event (or 0 if none). 1651 * MT issues are same as igmp_timeout_handler 1652 */ 1653 void 1654 mld_timeout_handler(void *arg) 1655 { 1656 ill_t *ill; 1657 uint_t global_next = INFINITY; 1658 uint_t next; 1659 ill_walk_context_t ctx; 1660 boolean_t success; 1661 ip_stack_t *ipst = arg; 1662 1663 ASSERT(arg != NULL); 1664 mutex_enter(&ipst->ips_mld_timer_lock); 1665 ASSERT(ipst->ips_mld_timeout_id != 0); 1666 ipst->ips_mld_timer_thread = curthread; 1667 ipst->ips_mld_timer_scheduled_last = 0; 1668 ipst->ips_mld_time_to_next = 0; 1669 mutex_exit(&ipst->ips_mld_timer_lock); 1670 1671 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1672 ill = ILL_START_WALK_V6(&ctx, ipst); 1673 for (; ill != NULL; ill = ill_next(&ctx, ill)) { 1674 ASSERT(ill->ill_isv6); 1675 /* 1676 * We may not be able to refhold the ill if the ill/ipif 1677 * is changing. But we need to make sure that the ill will 1678 * not vanish. So we just bump up the ill_waiter count. 1679 */ 1680 if (!ill_waiter_inc(ill)) 1681 continue; 1682 rw_exit(&ipst->ips_ill_g_lock); 1683 success = ipsq_enter(ill, B_TRUE, NEW_OP); 1684 if (success) { 1685 next = mld_timeout_handler_per_ill(ill); 1686 if (next < global_next) 1687 global_next = next; 1688 ipsq_exit(ill->ill_phyint->phyint_ipsq); 1689 } 1690 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1691 ill_waiter_dcr(ill); 1692 } 1693 rw_exit(&ipst->ips_ill_g_lock); 1694 1695 mutex_enter(&ipst->ips_mld_timer_lock); 1696 ASSERT(ipst->ips_mld_timeout_id != 0); 1697 ipst->ips_mld_timeout_id = 0; 1698 ipst->ips_mld_timer_thread = NULL; 1699 mutex_exit(&ipst->ips_mld_timer_lock); 1700 1701 if (global_next != INFINITY) 1702 mld_start_timers(global_next, ipst); 1703 } 1704 1705 /* 1706 * Calculate the Older Version Querier Present timeout value, in number 1707 * of slowtimo intervals, for the given ill. 1708 */ 1709 #define OVQP(ill) \ 1710 ((1000 * (((ill)->ill_mcast_rv * (ill)->ill_mcast_qi) \ 1711 + MCAST_QUERY_RESP_INTERVAL)) / MCAST_SLOWTIMO_INTERVAL) 1712 1713 /* 1714 * igmp_slowtimo: 1715 * - Resets to new router if we didnt we hear from the router 1716 * in IGMP_AGE_THRESHOLD seconds. 1717 * - Resets slowtimeout. 1718 * Check for ips_igmp_max_version ensures that we don't revert to a higher 1719 * IGMP version than configured. 1720 */ 1721 void 1722 igmp_slowtimo(void *arg) 1723 { 1724 ill_t *ill; 1725 ill_if_t *ifp; 1726 avl_tree_t *avl_tree; 1727 ip_stack_t *ipst = (ip_stack_t *)arg; 1728 1729 ASSERT(arg != NULL); 1730 /* Hold the ill_g_lock so that we can safely walk the ill list */ 1731 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1732 1733 /* 1734 * The ill_if_t list is circular, hence the odd loop parameters. 1735 * 1736 * We can't use the ILL_START_WALK and ill_next() wrappers for this 1737 * walk, as we need to check the illif_mcast_* fields in the ill_if_t 1738 * structure (allowing us to skip if none of the instances have timers 1739 * running). 1740 */ 1741 for (ifp = IP_V4_ILL_G_LIST(ipst); 1742 ifp != (ill_if_t *)&IP_V4_ILL_G_LIST(ipst); 1743 ifp = ifp->illif_next) { 1744 /* 1745 * illif_mcast_v[12] are set using atomics. If an ill hears 1746 * a V1 or V2 query now and we miss seeing the count now, 1747 * we will see it the next time igmp_slowtimo is called. 1748 */ 1749 if (ifp->illif_mcast_v1 == 0 && ifp->illif_mcast_v2 == 0) 1750 continue; 1751 1752 avl_tree = &ifp->illif_avl_by_ppa; 1753 for (ill = avl_first(avl_tree); ill != NULL; 1754 ill = avl_walk(avl_tree, ill, AVL_AFTER)) { 1755 mutex_enter(&ill->ill_lock); 1756 if (ill->ill_mcast_v1_tset == 1) 1757 ill->ill_mcast_v1_time++; 1758 if (ill->ill_mcast_v2_tset == 1) 1759 ill->ill_mcast_v2_time++; 1760 if ((ill->ill_mcast_type == IGMP_V1_ROUTER) && 1761 (ipst->ips_igmp_max_version >= IGMP_V2_ROUTER) && 1762 (ill->ill_mcast_v1_time >= OVQP(ill))) { 1763 if ((ill->ill_mcast_v2_tset > 0) || 1764 (ipst->ips_igmp_max_version == 1765 IGMP_V2_ROUTER)) { 1766 ip1dbg(("V1 query timer " 1767 "expired on %s; switching " 1768 "mode to IGMP_V2\n", 1769 ill->ill_name)); 1770 ill->ill_mcast_type = 1771 IGMP_V2_ROUTER; 1772 } else { 1773 ip1dbg(("V1 query timer " 1774 "expired on %s; switching " 1775 "mode to IGMP_V3\n", 1776 ill->ill_name)); 1777 ill->ill_mcast_type = 1778 IGMP_V3_ROUTER; 1779 } 1780 ill->ill_mcast_v1_time = 0; 1781 ill->ill_mcast_v1_tset = 0; 1782 atomic_add_16(&ifp->illif_mcast_v1, -1); 1783 } 1784 if ((ill->ill_mcast_type == IGMP_V2_ROUTER) && 1785 (ipst->ips_igmp_max_version >= IGMP_V3_ROUTER) && 1786 (ill->ill_mcast_v2_time >= OVQP(ill))) { 1787 ip1dbg(("V2 query timer expired on " 1788 "%s; switching mode to IGMP_V3\n", 1789 ill->ill_name)); 1790 ill->ill_mcast_type = IGMP_V3_ROUTER; 1791 ill->ill_mcast_v2_time = 0; 1792 ill->ill_mcast_v2_tset = 0; 1793 atomic_add_16(&ifp->illif_mcast_v2, -1); 1794 } 1795 mutex_exit(&ill->ill_lock); 1796 } 1797 } 1798 rw_exit(&ipst->ips_ill_g_lock); 1799 mutex_enter(&ipst->ips_igmp_slowtimeout_lock); 1800 ipst->ips_igmp_slowtimeout_id = timeout(igmp_slowtimo, (void *)ipst, 1801 MSEC_TO_TICK(MCAST_SLOWTIMO_INTERVAL)); 1802 mutex_exit(&ipst->ips_igmp_slowtimeout_lock); 1803 } 1804 1805 /* 1806 * mld_slowtimo: 1807 * - Resets to newer version if we didn't hear from the older version router 1808 * in MLD_AGE_THRESHOLD seconds. 1809 * - Restarts slowtimeout. 1810 * Check for ips_mld_max_version ensures that we don't revert to a higher 1811 * IGMP version than configured. 1812 */ 1813 /* ARGSUSED */ 1814 void 1815 mld_slowtimo(void *arg) 1816 { 1817 ill_t *ill; 1818 ill_if_t *ifp; 1819 avl_tree_t *avl_tree; 1820 ip_stack_t *ipst = (ip_stack_t *)arg; 1821 1822 ASSERT(arg != NULL); 1823 /* See comments in igmp_slowtimo() above... */ 1824 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 1825 for (ifp = IP_V6_ILL_G_LIST(ipst); 1826 ifp != (ill_if_t *)&IP_V6_ILL_G_LIST(ipst); 1827 ifp = ifp->illif_next) { 1828 if (ifp->illif_mcast_v1 == 0) 1829 continue; 1830 1831 avl_tree = &ifp->illif_avl_by_ppa; 1832 for (ill = avl_first(avl_tree); ill != NULL; 1833 ill = avl_walk(avl_tree, ill, AVL_AFTER)) { 1834 mutex_enter(&ill->ill_lock); 1835 if (ill->ill_mcast_v1_tset == 1) 1836 ill->ill_mcast_v1_time++; 1837 if ((ill->ill_mcast_type == MLD_V1_ROUTER) && 1838 (ipst->ips_mld_max_version >= MLD_V2_ROUTER) && 1839 (ill->ill_mcast_v1_time >= OVQP(ill))) { 1840 ip1dbg(("MLD query timer expired on" 1841 " %s; switching mode to MLD_V2\n", 1842 ill->ill_name)); 1843 ill->ill_mcast_type = MLD_V2_ROUTER; 1844 ill->ill_mcast_v1_time = 0; 1845 ill->ill_mcast_v1_tset = 0; 1846 atomic_add_16(&ifp->illif_mcast_v1, -1); 1847 } 1848 mutex_exit(&ill->ill_lock); 1849 } 1850 } 1851 rw_exit(&ipst->ips_ill_g_lock); 1852 mutex_enter(&ipst->ips_mld_slowtimeout_lock); 1853 ipst->ips_mld_slowtimeout_id = timeout(mld_slowtimo, (void *)ipst, 1854 MSEC_TO_TICK(MCAST_SLOWTIMO_INTERVAL)); 1855 mutex_exit(&ipst->ips_mld_slowtimeout_lock); 1856 } 1857 1858 /* 1859 * igmp_sendpkt: 1860 * This will send to ip_wput like icmp_inbound. 1861 * Note that the lower ill (on which the membership is kept) is used 1862 * as an upper ill to pass in the multicast parameters. 1863 */ 1864 static void 1865 igmp_sendpkt(ilm_t *ilm, uchar_t type, ipaddr_t addr) 1866 { 1867 mblk_t *mp; 1868 igmpa_t *igmpa; 1869 uint8_t *rtralert; 1870 ipha_t *ipha; 1871 int hdrlen = sizeof (ipha_t) + RTRALERT_LEN; 1872 size_t size = hdrlen + sizeof (igmpa_t); 1873 ipif_t *ipif = ilm->ilm_ipif; 1874 ill_t *ill = ipif->ipif_ill; /* Will be the "lower" ill */ 1875 mblk_t *first_mp; 1876 ipsec_out_t *io; 1877 zoneid_t zoneid; 1878 ip_stack_t *ipst = ill->ill_ipst; 1879 1880 /* 1881 * We need to make sure this packet goes out on an ipif. If 1882 * there is some global policy match in ip_wput_ire, we need 1883 * to get to the right interface after IPSEC processing. 1884 * To make sure this multicast packet goes out on the right 1885 * interface, we attach an ipsec_out and initialize ill_index 1886 * like we did in ip_wput. To make sure that this packet does 1887 * not get forwarded on other interfaces or looped back, we 1888 * set ipsec_out_dontroute to B_TRUE and ipsec_out_multicast_loop 1889 * to B_FALSE. 1890 * 1891 * We also need to make sure that this does not get load balanced 1892 * if it hits ip_newroute_ipif. So, we initialize ipsec_out_attach_if 1893 * here. If it gets load balanced, switches supporting igmp snooping 1894 * will send the packet that it receives for this multicast group 1895 * to the interface that we are sending on. As we have joined the 1896 * multicast group on this ill, by sending the packet out on this 1897 * ill, we receive all the packets back on this ill. 1898 */ 1899 first_mp = allocb(sizeof (ipsec_info_t), BPRI_HI); 1900 if (first_mp == NULL) 1901 return; 1902 1903 first_mp->b_datap->db_type = M_CTL; 1904 first_mp->b_wptr += sizeof (ipsec_info_t); 1905 bzero(first_mp->b_rptr, sizeof (ipsec_info_t)); 1906 /* ipsec_out_secure is B_FALSE now */ 1907 io = (ipsec_out_t *)first_mp->b_rptr; 1908 io->ipsec_out_type = IPSEC_OUT; 1909 io->ipsec_out_len = sizeof (ipsec_out_t); 1910 io->ipsec_out_use_global_policy = B_TRUE; 1911 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 1912 io->ipsec_out_attach_if = B_TRUE; 1913 io->ipsec_out_multicast_loop = B_FALSE; 1914 io->ipsec_out_dontroute = B_TRUE; 1915 if ((zoneid = ilm->ilm_zoneid) == ALL_ZONES) 1916 zoneid = GLOBAL_ZONEID; 1917 io->ipsec_out_zoneid = zoneid; 1918 io->ipsec_out_ns = ipst->ips_netstack; /* No netstack_hold */ 1919 1920 mp = allocb(size, BPRI_HI); 1921 if (mp == NULL) { 1922 freemsg(first_mp); 1923 return; 1924 } 1925 mp->b_wptr = mp->b_rptr + size; 1926 first_mp->b_cont = mp; 1927 1928 ipha = (ipha_t *)mp->b_rptr; 1929 rtralert = (uint8_t *)&(ipha[1]); 1930 igmpa = (igmpa_t *)&(rtralert[RTRALERT_LEN]); 1931 igmpa->igmpa_type = type; 1932 igmpa->igmpa_code = 0; 1933 igmpa->igmpa_group = ilm->ilm_addr; 1934 igmpa->igmpa_cksum = 0; 1935 igmpa->igmpa_cksum = IP_CSUM(mp, hdrlen, 0); 1936 1937 rtralert[0] = IPOPT_COPY | IPOPT_RTRALERT; 1938 rtralert[1] = RTRALERT_LEN; 1939 rtralert[2] = 0; 1940 rtralert[3] = 0; 1941 1942 ipha->ipha_version_and_hdr_length = (IP_VERSION << 4) 1943 | (IP_SIMPLE_HDR_LENGTH_IN_WORDS + RTRALERT_LEN_IN_WORDS); 1944 ipha->ipha_type_of_service = 0; 1945 ipha->ipha_length = htons(size); 1946 ipha->ipha_ident = 0; 1947 ipha->ipha_fragment_offset_and_flags = 0; 1948 ipha->ipha_ttl = IGMP_TTL; 1949 ipha->ipha_protocol = IPPROTO_IGMP; 1950 ipha->ipha_hdr_checksum = 0; 1951 ipha->ipha_dst = addr ? addr : igmpa->igmpa_group; 1952 ipha->ipha_src = ipif->ipif_src_addr; 1953 /* 1954 * Request loopback of the report if we are acting as a multicast 1955 * router, so that the process-level routing demon can hear it. 1956 */ 1957 /* 1958 * This will run multiple times for the same group if there are members 1959 * on the same group for multiple ipif's on the same ill. The 1960 * igmp_input code will suppress this due to the loopback thus we 1961 * always loopback membership report. 1962 */ 1963 ASSERT(ill->ill_rq != NULL); 1964 ip_multicast_loopback(ill->ill_rq, ill, first_mp, 0, ilm->ilm_zoneid); 1965 1966 ip_wput_multicast(ill->ill_wq, first_mp, ipif, zoneid); 1967 1968 ++ipst->ips_igmpstat.igps_snd_reports; 1969 } 1970 1971 /* 1972 * Sends an IGMP_V3_MEMBERSHIP_REPORT message out the ill associated 1973 * with the passed-in ipif. The report will contain one group record 1974 * for each element of reclist. If this causes packet length to 1975 * exceed ipif->ipif_ill->ill_max_frag, multiple reports are sent. 1976 * reclist is assumed to be made up of buffers allocated by mcast_bldmrec(), 1977 * and those buffers are freed here. 1978 */ 1979 static void 1980 igmpv3_sendrpt(ipif_t *ipif, mrec_t *reclist) 1981 { 1982 ipsec_out_t *io; 1983 igmp3ra_t *igmp3ra; 1984 grphdra_t *grphdr; 1985 mblk_t *first_mp, *mp; 1986 ipha_t *ipha; 1987 uint8_t *rtralert; 1988 ipaddr_t *src_array; 1989 int i, j, numrec, more_src_cnt; 1990 size_t hdrsize, size, rsize; 1991 ill_t *ill = ipif->ipif_ill; 1992 mrec_t *rp, *cur_reclist; 1993 mrec_t *next_reclist = reclist; 1994 boolean_t morepkts; 1995 zoneid_t zoneid; 1996 ip_stack_t *ipst = ill->ill_ipst; 1997 1998 /* if there aren't any records, there's nothing to send */ 1999 if (reclist == NULL) 2000 return; 2001 2002 hdrsize = sizeof (ipha_t) + RTRALERT_LEN; 2003 nextpkt: 2004 size = hdrsize + sizeof (igmp3ra_t); 2005 morepkts = B_FALSE; 2006 more_src_cnt = 0; 2007 cur_reclist = next_reclist; 2008 numrec = 0; 2009 for (rp = cur_reclist; rp != NULL; rp = rp->mrec_next) { 2010 rsize = sizeof (grphdra_t) + 2011 (rp->mrec_srcs.sl_numsrc * sizeof (ipaddr_t)); 2012 if (size + rsize > ill->ill_max_frag) { 2013 if (rp == cur_reclist) { 2014 /* 2015 * If the first mrec we looked at is too big 2016 * to fit in a single packet (i.e the source 2017 * list is too big), we must either truncate 2018 * the list (if TO_EX or IS_EX), or send 2019 * multiple reports for the same group (all 2020 * other types). 2021 */ 2022 int srcspace, srcsperpkt; 2023 srcspace = ill->ill_max_frag - (size + 2024 sizeof (grphdra_t)); 2025 srcsperpkt = srcspace / sizeof (ipaddr_t); 2026 /* 2027 * Increment size and numrec, because we will 2028 * be sending a record for the mrec we're 2029 * looking at now. 2030 */ 2031 size += sizeof (grphdra_t) + 2032 (srcsperpkt * sizeof (ipaddr_t)); 2033 numrec++; 2034 if (rp->mrec_type == MODE_IS_EXCLUDE || 2035 rp->mrec_type == CHANGE_TO_EXCLUDE) { 2036 rp->mrec_srcs.sl_numsrc = srcsperpkt; 2037 if (rp->mrec_next == NULL) { 2038 /* no more packets to send */ 2039 break; 2040 } else { 2041 /* 2042 * more packets, but we're 2043 * done with this mrec. 2044 */ 2045 next_reclist = rp->mrec_next; 2046 } 2047 } else { 2048 more_src_cnt = rp->mrec_srcs.sl_numsrc 2049 - srcsperpkt; 2050 rp->mrec_srcs.sl_numsrc = srcsperpkt; 2051 /* 2052 * We'll fix up this mrec (remove the 2053 * srcs we've already sent) before 2054 * returning to nextpkt above. 2055 */ 2056 next_reclist = rp; 2057 } 2058 } else { 2059 next_reclist = rp; 2060 } 2061 morepkts = B_TRUE; 2062 break; 2063 } 2064 size += rsize; 2065 numrec++; 2066 } 2067 2068 /* 2069 * See comments in igmp_sendpkt() about initializing for ipsec and 2070 * load balancing requirements. 2071 */ 2072 first_mp = allocb(sizeof (ipsec_info_t), BPRI_HI); 2073 if (first_mp == NULL) 2074 goto free_reclist; 2075 2076 first_mp->b_datap->db_type = M_CTL; 2077 first_mp->b_wptr += sizeof (ipsec_info_t); 2078 bzero(first_mp->b_rptr, sizeof (ipsec_info_t)); 2079 /* ipsec_out_secure is B_FALSE now */ 2080 io = (ipsec_out_t *)first_mp->b_rptr; 2081 io->ipsec_out_type = IPSEC_OUT; 2082 io->ipsec_out_len = sizeof (ipsec_out_t); 2083 io->ipsec_out_use_global_policy = B_TRUE; 2084 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 2085 io->ipsec_out_attach_if = B_TRUE; 2086 io->ipsec_out_multicast_loop = B_FALSE; 2087 io->ipsec_out_dontroute = B_TRUE; 2088 if ((zoneid = ipif->ipif_zoneid) == ALL_ZONES) 2089 zoneid = GLOBAL_ZONEID; 2090 io->ipsec_out_zoneid = zoneid; 2091 2092 mp = allocb(size, BPRI_HI); 2093 if (mp == NULL) { 2094 freemsg(first_mp); 2095 goto free_reclist; 2096 } 2097 bzero((char *)mp->b_rptr, size); 2098 mp->b_wptr = (uchar_t *)(mp->b_rptr + size); 2099 first_mp->b_cont = mp; 2100 2101 ipha = (ipha_t *)mp->b_rptr; 2102 rtralert = (uint8_t *)&(ipha[1]); 2103 igmp3ra = (igmp3ra_t *)&(rtralert[RTRALERT_LEN]); 2104 grphdr = (grphdra_t *)&(igmp3ra[1]); 2105 2106 rp = cur_reclist; 2107 for (i = 0; i < numrec; i++) { 2108 grphdr->grphdra_type = rp->mrec_type; 2109 grphdr->grphdra_numsrc = htons(rp->mrec_srcs.sl_numsrc); 2110 grphdr->grphdra_group = V4_PART_OF_V6(rp->mrec_group); 2111 src_array = (ipaddr_t *)&(grphdr[1]); 2112 2113 for (j = 0; j < rp->mrec_srcs.sl_numsrc; j++) 2114 src_array[j] = V4_PART_OF_V6(rp->mrec_srcs.sl_addr[j]); 2115 2116 grphdr = (grphdra_t *)&(src_array[j]); 2117 rp = rp->mrec_next; 2118 } 2119 2120 igmp3ra->igmp3ra_type = IGMP_V3_MEMBERSHIP_REPORT; 2121 igmp3ra->igmp3ra_numrec = htons(numrec); 2122 igmp3ra->igmp3ra_cksum = IP_CSUM(mp, hdrsize, 0); 2123 2124 rtralert[0] = IPOPT_COPY | IPOPT_RTRALERT; 2125 rtralert[1] = RTRALERT_LEN; 2126 rtralert[2] = 0; 2127 rtralert[3] = 0; 2128 2129 ipha->ipha_version_and_hdr_length = IP_VERSION << 4 2130 | (IP_SIMPLE_HDR_LENGTH_IN_WORDS + RTRALERT_LEN_IN_WORDS); 2131 ipha->ipha_type_of_service = IPTOS_PREC_INTERNETCONTROL; 2132 ipha->ipha_length = htons(size); 2133 ipha->ipha_ttl = IGMP_TTL; 2134 ipha->ipha_protocol = IPPROTO_IGMP; 2135 ipha->ipha_dst = htonl(INADDR_ALLRPTS_GROUP); 2136 ipha->ipha_src = ipif->ipif_src_addr; 2137 2138 /* 2139 * Request loopback of the report if we are acting as a multicast 2140 * router, so that the process-level routing daemon can hear it. 2141 * 2142 * This will run multiple times for the same group if there are 2143 * members on the same group for multiple ipifs on the same ill. 2144 * The igmp_input code will suppress this due to the loopback; 2145 * thus we always loopback membership report. 2146 */ 2147 ASSERT(ill->ill_rq != NULL); 2148 ip_multicast_loopback(ill->ill_rq, ill, mp, 0, ipif->ipif_zoneid); 2149 2150 ip_wput_multicast(ill->ill_wq, first_mp, ipif, zoneid); 2151 2152 ++ipst->ips_igmpstat.igps_snd_reports; 2153 2154 if (morepkts) { 2155 if (more_src_cnt > 0) { 2156 int index, mvsize; 2157 slist_t *sl = &next_reclist->mrec_srcs; 2158 index = sl->sl_numsrc; 2159 mvsize = more_src_cnt * sizeof (in6_addr_t); 2160 (void) memmove(&sl->sl_addr[0], &sl->sl_addr[index], 2161 mvsize); 2162 sl->sl_numsrc = more_src_cnt; 2163 } 2164 goto nextpkt; 2165 } 2166 2167 free_reclist: 2168 while (reclist != NULL) { 2169 rp = reclist->mrec_next; 2170 mi_free(reclist); 2171 reclist = rp; 2172 } 2173 } 2174 2175 /* 2176 * mld_input: 2177 */ 2178 /* ARGSUSED */ 2179 void 2180 mld_input(queue_t *q, mblk_t *mp, ill_t *ill) 2181 { 2182 ip6_t *ip6h = (ip6_t *)(mp->b_rptr); 2183 mld_hdr_t *mldh; 2184 ilm_t *ilm; 2185 ipif_t *ipif; 2186 uint16_t hdr_length, exthdr_length; 2187 in6_addr_t *v6group_ptr, *lcladdr_ptr; 2188 uint_t next; 2189 int mldlen; 2190 ip_stack_t *ipst = ill->ill_ipst; 2191 2192 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembTotal); 2193 2194 /* Make sure the src address of the packet is link-local */ 2195 if (!(IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src))) { 2196 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 2197 freemsg(mp); 2198 return; 2199 } 2200 2201 if (ip6h->ip6_hlim != 1) { 2202 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpBadHoplimit); 2203 freemsg(mp); 2204 return; 2205 } 2206 2207 /* Get to the icmp header part */ 2208 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) { 2209 hdr_length = ip_hdr_length_v6(mp, ip6h); 2210 exthdr_length = hdr_length - IPV6_HDR_LEN; 2211 } else { 2212 hdr_length = IPV6_HDR_LEN; 2213 exthdr_length = 0; 2214 } 2215 mldlen = ntohs(ip6h->ip6_plen) - exthdr_length; 2216 2217 /* An MLD packet must at least be 24 octets to be valid */ 2218 if (mldlen < MLD_MINLEN) { 2219 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 2220 freemsg(mp); 2221 return; 2222 } 2223 2224 mldh = (mld_hdr_t *)(&mp->b_rptr[hdr_length]); 2225 2226 switch (mldh->mld_type) { 2227 case MLD_LISTENER_QUERY: 2228 /* 2229 * packet length differentiates between v1 and v2. v1 2230 * query should be exactly 24 octets long; v2 is >= 28. 2231 */ 2232 if ((mldlen == MLD_MINLEN) || 2233 (ipst->ips_mld_max_version < MLD_V2_ROUTER)) { 2234 next = mld_query_in(mldh, ill); 2235 } else if (mldlen >= MLD_V2_QUERY_MINLEN) { 2236 next = mldv2_query_in((mld2q_t *)mldh, ill, mldlen); 2237 } else { 2238 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 2239 freemsg(mp); 2240 return; 2241 } 2242 if (next == 0) { 2243 freemsg(mp); 2244 return; 2245 } 2246 2247 if (next != INFINITY) 2248 mld_start_timers(next, ipst); 2249 break; 2250 2251 case MLD_LISTENER_REPORT: { 2252 2253 ASSERT(ill->ill_ipif != NULL); 2254 /* 2255 * For fast leave to work, we have to know that we are the 2256 * last person to send a report for this group. Reports 2257 * generated by us are looped back since we could potentially 2258 * be a multicast router, so discard reports sourced by me. 2259 */ 2260 lcladdr_ptr = &(ill->ill_ipif->ipif_v6subnet); 2261 mutex_enter(&ill->ill_lock); 2262 for (ipif = ill->ill_ipif; ipif != NULL; 2263 ipif = ipif->ipif_next) { 2264 if (IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, 2265 lcladdr_ptr)) { 2266 if (ip_debug > 1) { 2267 char buf1[INET6_ADDRSTRLEN]; 2268 char buf2[INET6_ADDRSTRLEN]; 2269 2270 (void) mi_strlog(ill->ill_rq, 2271 1, 2272 SL_TRACE, 2273 "mld_input: we are only " 2274 "member src %s ipif_local %s", 2275 inet_ntop(AF_INET6, lcladdr_ptr, 2276 buf1, sizeof (buf1)), 2277 inet_ntop(AF_INET6, 2278 &ipif->ipif_v6lcl_addr, 2279 buf2, sizeof (buf2))); 2280 } 2281 mutex_exit(&ill->ill_lock); 2282 freemsg(mp); 2283 return; 2284 } 2285 } 2286 mutex_exit(&ill->ill_lock); 2287 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembResponses); 2288 2289 v6group_ptr = &mldh->mld_addr; 2290 if (!IN6_IS_ADDR_MULTICAST(v6group_ptr)) { 2291 BUMP_MIB(ill->ill_icmp6_mib, 2292 ipv6IfIcmpInGroupMembBadReports); 2293 freemsg(mp); 2294 return; 2295 } 2296 2297 2298 /* 2299 * If we belong to the group being reported, and we are a 2300 * 'Delaying member' per the RFC terminology, stop our timer 2301 * for that group and 'clear flag' i.e. mark ilm_state as 2302 * IGMP_OTHERMEMBER. With zones, there can be multiple group 2303 * membership entries for the same group address (one per zone) 2304 * so we need to walk the ill_ilm list. 2305 */ 2306 mutex_enter(&ill->ill_lock); 2307 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 2308 if (!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group_ptr)) 2309 continue; 2310 BUMP_MIB(ill->ill_icmp6_mib, 2311 ipv6IfIcmpInGroupMembOurReports); 2312 2313 ilm->ilm_timer = INFINITY; 2314 ilm->ilm_state = IGMP_OTHERMEMBER; 2315 } 2316 mutex_exit(&ill->ill_lock); 2317 break; 2318 } 2319 case MLD_LISTENER_REDUCTION: 2320 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembReductions); 2321 break; 2322 } 2323 /* 2324 * All MLD packets have already been passed up to any 2325 * process(es) listening on a ICMP6 raw socket. This 2326 * has been accomplished in ip_deliver_local_v6 prior to 2327 * this function call. It is assumed that the multicast daemon 2328 * will have a SOCK_RAW IPPROTO_ICMPV6 (and presumbly use the 2329 * ICMP6_FILTER socket option to only receive the MLD messages) 2330 * Thus we can free the MLD message block here 2331 */ 2332 freemsg(mp); 2333 } 2334 2335 /* 2336 * Handles an MLDv1 Listener Query. Returns 0 on error, or the appropriate 2337 * (non-zero, unsigned) timer value to be set on success. 2338 */ 2339 static uint_t 2340 mld_query_in(mld_hdr_t *mldh, ill_t *ill) 2341 { 2342 ilm_t *ilm; 2343 int timer; 2344 uint_t next, current; 2345 in6_addr_t *v6group; 2346 2347 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembQueries); 2348 2349 /* 2350 * In the MLD specification, there are 3 states and a flag. 2351 * 2352 * In Non-Listener state, we simply don't have a membership record. 2353 * In Delaying state, our timer is running (ilm->ilm_timer < INFINITY) 2354 * In Idle Member state, our timer is not running (ilm->ilm_timer == 2355 * INFINITY) 2356 * 2357 * The flag is ilm->ilm_state, it is set to IGMP_OTHERMEMBER if 2358 * we have heard a report from another member, or IGMP_IREPORTEDLAST 2359 * if I sent the last report. 2360 */ 2361 v6group = &mldh->mld_addr; 2362 if (!(IN6_IS_ADDR_UNSPECIFIED(v6group)) && 2363 ((!IN6_IS_ADDR_MULTICAST(v6group)))) { 2364 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembBadQueries); 2365 return (0); 2366 } 2367 2368 /* Need to do compatibility mode checking */ 2369 mutex_enter(&ill->ill_lock); 2370 ill->ill_mcast_v1_time = 0; 2371 ill->ill_mcast_v1_tset = 1; 2372 if (ill->ill_mcast_type == MLD_V2_ROUTER) { 2373 ip1dbg(("Received MLDv1 Query on %s, switching mode to " 2374 "MLD_V1_ROUTER\n", ill->ill_name)); 2375 atomic_add_16(&ill->ill_ifptr->illif_mcast_v1, 1); 2376 ill->ill_mcast_type = MLD_V1_ROUTER; 2377 } 2378 mutex_exit(&ill->ill_lock); 2379 2380 timer = (int)ntohs(mldh->mld_maxdelay); 2381 if (ip_debug > 1) { 2382 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 2383 "mld_input: TIMER = mld_maxdelay %d mld_type 0x%x", 2384 timer, (int)mldh->mld_type); 2385 } 2386 2387 /* 2388 * -Start the timers in all of our membership records for 2389 * the physical interface on which the query arrived, 2390 * excl: 2391 * 1. those that belong to the "all hosts" group, 2392 * 2. those with 0 scope, or 1 node-local scope. 2393 * 2394 * -Restart any timer that is already running but has a value 2395 * longer that the requested timeout. 2396 * -Use the value specified in the query message as the 2397 * maximum timeout. 2398 */ 2399 next = INFINITY; 2400 mutex_enter(&ill->ill_lock); 2401 2402 current = CURRENT_MSTIME; 2403 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 2404 ASSERT(!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr)); 2405 2406 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr) || 2407 IN6_IS_ADDR_MC_NODELOCAL(&ilm->ilm_v6addr) || 2408 IN6_IS_ADDR_MC_RESERVED(&ilm->ilm_v6addr)) 2409 continue; 2410 if ((!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, 2411 &ipv6_all_hosts_mcast)) && 2412 (IN6_IS_ADDR_UNSPECIFIED(v6group)) || 2413 (IN6_ARE_ADDR_EQUAL(v6group, &ilm->ilm_v6addr))) { 2414 if (timer == 0) { 2415 /* Respond immediately */ 2416 ilm->ilm_timer = INFINITY; 2417 ilm->ilm_state = IGMP_IREPORTEDLAST; 2418 mutex_exit(&ill->ill_lock); 2419 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL); 2420 mutex_enter(&ill->ill_lock); 2421 break; 2422 } 2423 if (ilm->ilm_timer > timer) { 2424 MCAST_RANDOM_DELAY(ilm->ilm_timer, timer); 2425 if (ilm->ilm_timer < next) 2426 next = ilm->ilm_timer; 2427 ilm->ilm_timer += current; 2428 } 2429 break; 2430 } 2431 } 2432 mutex_exit(&ill->ill_lock); 2433 2434 return (next); 2435 } 2436 2437 /* 2438 * Handles an MLDv2 Listener Query. On error, returns 0; on success, 2439 * returns the appropriate (non-zero, unsigned) timer value (which may 2440 * be INFINITY) to be set. 2441 */ 2442 static uint_t 2443 mldv2_query_in(mld2q_t *mld2q, ill_t *ill, int mldlen) 2444 { 2445 ilm_t *ilm; 2446 in6_addr_t *v6group, *src_array; 2447 uint_t next, numsrc, i, mrd, delay, qqi, current; 2448 uint8_t qrv; 2449 2450 v6group = &mld2q->mld2q_addr; 2451 numsrc = ntohs(mld2q->mld2q_numsrc); 2452 2453 /* make sure numsrc matches packet size */ 2454 if (mldlen < MLD_V2_QUERY_MINLEN + (numsrc * sizeof (in6_addr_t))) { 2455 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 2456 return (0); 2457 } 2458 src_array = (in6_addr_t *)&mld2q[1]; 2459 2460 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembQueries); 2461 2462 /* extract Maximum Response Delay from code in header */ 2463 mrd = ntohs(mld2q->mld2q_mxrc); 2464 if (mrd >= MLD_V2_MAXRT_FPMIN) { 2465 uint_t hdrval, mant, exp; 2466 hdrval = mrd; 2467 mant = hdrval & MLD_V2_MAXRT_MANT_MASK; 2468 exp = (hdrval & MLD_V2_MAXRT_EXP_MASK) >> 12; 2469 mrd = (mant | 0x1000) << (exp + 3); 2470 } 2471 if (mrd == 0) 2472 mrd = DSEC_TO_MSEC(MCAST_DEF_QUERY_RESP_INTERVAL); 2473 2474 MCAST_RANDOM_DELAY(delay, mrd); 2475 next = (unsigned)INFINITY; 2476 current = CURRENT_MSTIME; 2477 2478 if ((qrv = mld2q->mld2q_sqrv & MLD_V2_RV_MASK) == 0) 2479 ill->ill_mcast_rv = MCAST_DEF_ROBUSTNESS; 2480 else 2481 ill->ill_mcast_rv = qrv; 2482 2483 if ((qqi = (uint_t)mld2q->mld2q_qqic) >= MLD_V2_QQI_FPMIN) { 2484 uint_t mant, exp; 2485 mant = qqi & MLD_V2_QQI_MANT_MASK; 2486 exp = (qqi & MLD_V2_QQI_EXP_MASK) >> 12; 2487 qqi = (mant | 0x10) << (exp + 3); 2488 } 2489 ill->ill_mcast_qi = (qqi == 0) ? MCAST_DEF_QUERY_INTERVAL : qqi; 2490 2491 /* 2492 * If we have a pending general query response that's scheduled 2493 * sooner than the delay we calculated for this response, then 2494 * no action is required (MLDv2 draft section 6.2 rule 1) 2495 */ 2496 mutex_enter(&ill->ill_lock); 2497 if (ill->ill_global_timer < (current + delay)) { 2498 mutex_exit(&ill->ill_lock); 2499 return (next); 2500 } 2501 mutex_exit(&ill->ill_lock); 2502 2503 /* 2504 * Now take action depending on query type: general, 2505 * group specific, or group/source specific. 2506 */ 2507 if ((numsrc == 0) && IN6_IS_ADDR_UNSPECIFIED(v6group)) { 2508 /* 2509 * general query 2510 * We know global timer is either not running or is 2511 * greater than our calculated delay, so reset it to 2512 * our delay (random value in range [0, response time]) 2513 */ 2514 mutex_enter(&ill->ill_lock); 2515 ill->ill_global_timer = current + delay; 2516 mutex_exit(&ill->ill_lock); 2517 next = delay; 2518 2519 } else { 2520 /* group or group/source specific query */ 2521 mutex_enter(&ill->ill_lock); 2522 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 2523 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr) || 2524 IN6_IS_ADDR_MC_NODELOCAL(&ilm->ilm_v6addr) || 2525 IN6_IS_ADDR_MC_RESERVED(&ilm->ilm_v6addr) || 2526 !IN6_ARE_ADDR_EQUAL(v6group, &ilm->ilm_v6addr)) 2527 continue; 2528 2529 /* 2530 * If the query is group specific or we have a 2531 * pending group specific query, the response is 2532 * group specific (pending sources list should be 2533 * empty). Otherwise, need to update the pending 2534 * sources list for the group and source specific 2535 * response. 2536 */ 2537 if (numsrc == 0 || (ilm->ilm_timer < INFINITY && 2538 SLIST_IS_EMPTY(ilm->ilm_pendsrcs))) { 2539 group_query: 2540 FREE_SLIST(ilm->ilm_pendsrcs); 2541 ilm->ilm_pendsrcs = NULL; 2542 } else { 2543 boolean_t overflow; 2544 slist_t *pktl; 2545 if (numsrc > MAX_FILTER_SIZE || 2546 (ilm->ilm_pendsrcs == NULL && 2547 (ilm->ilm_pendsrcs = l_alloc()) == NULL)) { 2548 /* 2549 * We've been sent more sources than 2550 * we can deal with; or we can't deal 2551 * with a source list at all. Revert 2552 * to a group specific query. 2553 */ 2554 goto group_query; 2555 } 2556 if ((pktl = l_alloc()) == NULL) 2557 goto group_query; 2558 pktl->sl_numsrc = numsrc; 2559 for (i = 0; i < numsrc; i++) 2560 pktl->sl_addr[i] = src_array[i]; 2561 l_union_in_a(ilm->ilm_pendsrcs, pktl, 2562 &overflow); 2563 l_free(pktl); 2564 if (overflow) 2565 goto group_query; 2566 } 2567 ilm->ilm_timer = (ilm->ilm_timer == INFINITY) ? 2568 INFINITY : (ilm->ilm_timer - current); 2569 /* set timer to soonest value */ 2570 ilm->ilm_timer = MIN(ilm->ilm_timer, delay); 2571 if (ilm->ilm_timer < next) 2572 next = ilm->ilm_timer; 2573 ilm->ilm_timer += current; 2574 break; 2575 } 2576 mutex_exit(&ill->ill_lock); 2577 } 2578 2579 return (next); 2580 } 2581 2582 /* 2583 * Send MLDv1 response packet with hoplimit 1 2584 */ 2585 static void 2586 mld_sendpkt(ilm_t *ilm, uchar_t type, const in6_addr_t *v6addr) 2587 { 2588 mblk_t *mp; 2589 mld_hdr_t *mldh; 2590 ip6_t *ip6h; 2591 ip6_hbh_t *ip6hbh; 2592 struct ip6_opt_router *ip6router; 2593 size_t size = IPV6_HDR_LEN + sizeof (mld_hdr_t); 2594 ill_t *ill = ilm->ilm_ill; /* Will be the "lower" ill */ 2595 ipif_t *ipif; 2596 ip6i_t *ip6i; 2597 2598 /* 2599 * We need to place a router alert option in this packet. The length 2600 * of the options must be a multiple of 8. The hbh option header is 2 2601 * bytes followed by the 4 byte router alert option. That leaves 2602 * 2 bytes of pad for a total of 8 bytes. 2603 */ 2604 const int router_alert_length = 8; 2605 2606 ASSERT(ill->ill_isv6); 2607 2608 /* 2609 * We need to make sure that this packet does not get load balanced. 2610 * So, we allocate an ip6i_t and set ATTACH_IF. ip_wput_v6 and 2611 * ip_newroute_ipif_v6 knows how to handle such packets. 2612 * If it gets load balanced, switches supporting MLD snooping 2613 * (in the future) will send the packet that it receives for this 2614 * multicast group to the interface that we are sending on. As we have 2615 * joined the multicast group on this ill, by sending the packet out 2616 * on this ill, we receive all the packets back on this ill. 2617 */ 2618 size += sizeof (ip6i_t) + router_alert_length; 2619 mp = allocb(size, BPRI_HI); 2620 if (mp == NULL) 2621 return; 2622 bzero(mp->b_rptr, size); 2623 mp->b_wptr = mp->b_rptr + size; 2624 2625 ip6i = (ip6i_t *)mp->b_rptr; 2626 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2627 ip6i->ip6i_nxt = IPPROTO_RAW; 2628 ip6i->ip6i_flags = IP6I_ATTACH_IF | IP6I_HOPLIMIT; 2629 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 2630 2631 ip6h = (ip6_t *)&ip6i[1]; 2632 ip6hbh = (struct ip6_hbh *)&ip6h[1]; 2633 ip6router = (struct ip6_opt_router *)&ip6hbh[1]; 2634 /* 2635 * A zero is a pad option of length 1. The bzero of the whole packet 2636 * above will pad between ip6router and mld. 2637 */ 2638 mldh = (mld_hdr_t *)((uint8_t *)ip6hbh + router_alert_length); 2639 2640 mldh->mld_type = type; 2641 mldh->mld_addr = ilm->ilm_v6addr; 2642 2643 ip6router->ip6or_type = IP6OPT_ROUTER_ALERT; 2644 ip6router->ip6or_len = 2; 2645 ip6router->ip6or_value[0] = 0; 2646 ip6router->ip6or_value[1] = IP6_ALERT_MLD; 2647 2648 ip6hbh->ip6h_nxt = IPPROTO_ICMPV6; 2649 ip6hbh->ip6h_len = 0; 2650 2651 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2652 ip6h->ip6_plen = htons(sizeof (*mldh) + router_alert_length); 2653 ip6h->ip6_nxt = IPPROTO_HOPOPTS; 2654 ip6h->ip6_hops = MLD_HOP_LIMIT; 2655 if (v6addr == NULL) 2656 ip6h->ip6_dst = ilm->ilm_v6addr; 2657 else 2658 ip6h->ip6_dst = *v6addr; 2659 2660 /* ipif returned by ipif_lookup_zoneid is link-local (if present) */ 2661 if (ipif_lookup_zoneid(ill, ilm->ilm_zoneid, IPIF_UP, &ipif)) { 2662 ip6h->ip6_src = ipif->ipif_v6src_addr; 2663 ipif_refrele(ipif); 2664 } else { 2665 /* Otherwise, use IPv6 default address selection. */ 2666 ip6h->ip6_src = ipv6_all_zeros; 2667 } 2668 2669 /* 2670 * Prepare for checksum by putting icmp length in the icmp 2671 * checksum field. The checksum is calculated in ip_wput_v6. 2672 */ 2673 mldh->mld_cksum = htons(sizeof (*mldh)); 2674 2675 /* 2676 * ip_wput will automatically loopback the multicast packet to 2677 * the conn if multicast loopback is enabled. 2678 * The MIB stats corresponding to this outgoing MLD packet 2679 * will be accounted for in ip_wput->ip_wput_v6->ip_wput_ire_v6 2680 * ->icmp_update_out_mib_v6 function call. 2681 */ 2682 (void) ip_output_v6(NULL, mp, ill->ill_wq, IP_WPUT); 2683 } 2684 2685 /* 2686 * Sends an MLD_V2_LISTENER_REPORT message out the passed-in ill. The 2687 * report will contain one multicast address record for each element of 2688 * reclist. If this causes packet length to exceed ill->ill_max_frag, 2689 * multiple reports are sent. reclist is assumed to be made up of 2690 * buffers allocated by mcast_bldmrec(), and those buffers are freed here. 2691 */ 2692 static void 2693 mldv2_sendrpt(ill_t *ill, mrec_t *reclist) 2694 { 2695 mblk_t *mp; 2696 mld2r_t *mld2r; 2697 mld2mar_t *mld2mar; 2698 in6_addr_t *srcarray; 2699 ip6_t *ip6h; 2700 ip6_hbh_t *ip6hbh; 2701 ip6i_t *ip6i; 2702 struct ip6_opt_router *ip6router; 2703 size_t size, optlen, padlen, icmpsize, rsize; 2704 ipif_t *ipif; 2705 int i, numrec, more_src_cnt; 2706 mrec_t *rp, *cur_reclist; 2707 mrec_t *next_reclist = reclist; 2708 boolean_t morepkts; 2709 2710 /* If there aren't any records, there's nothing to send */ 2711 if (reclist == NULL) 2712 return; 2713 2714 ASSERT(ill->ill_isv6); 2715 2716 /* 2717 * Total option length (optlen + padlen) must be a multiple of 2718 * 8 bytes. We assume here that optlen <= 8, so the total option 2719 * length will be 8. Assert this in case anything ever changes. 2720 */ 2721 optlen = sizeof (ip6_hbh_t) + sizeof (struct ip6_opt_router); 2722 ASSERT(optlen <= 8); 2723 padlen = 8 - optlen; 2724 nextpkt: 2725 icmpsize = sizeof (mld2r_t); 2726 size = IPV6_HDR_LEN + optlen + padlen + icmpsize; 2727 morepkts = B_FALSE; 2728 more_src_cnt = 0; 2729 for (rp = cur_reclist = next_reclist, numrec = 0; rp != NULL; 2730 rp = rp->mrec_next, numrec++) { 2731 rsize = sizeof (mld2mar_t) + 2732 (rp->mrec_srcs.sl_numsrc * sizeof (in6_addr_t)); 2733 if (size + rsize > ill->ill_max_frag) { 2734 if (rp == cur_reclist) { 2735 /* 2736 * If the first mrec we looked at is too big 2737 * to fit in a single packet (i.e the source 2738 * list is too big), we must either truncate 2739 * the list (if TO_EX or IS_EX), or send 2740 * multiple reports for the same group (all 2741 * other types). 2742 */ 2743 int srcspace, srcsperpkt; 2744 srcspace = ill->ill_max_frag - 2745 (size + sizeof (mld2mar_t)); 2746 srcsperpkt = srcspace / sizeof (in6_addr_t); 2747 /* 2748 * Increment icmpsize and size, because we will 2749 * be sending a record for the mrec we're 2750 * looking at now. 2751 */ 2752 rsize = sizeof (mld2mar_t) + 2753 (srcsperpkt * sizeof (in6_addr_t)); 2754 icmpsize += rsize; 2755 size += rsize; 2756 if (rp->mrec_type == MODE_IS_EXCLUDE || 2757 rp->mrec_type == CHANGE_TO_EXCLUDE) { 2758 rp->mrec_srcs.sl_numsrc = srcsperpkt; 2759 if (rp->mrec_next == NULL) { 2760 /* no more packets to send */ 2761 break; 2762 } else { 2763 /* 2764 * more packets, but we're 2765 * done with this mrec. 2766 */ 2767 next_reclist = rp->mrec_next; 2768 } 2769 } else { 2770 more_src_cnt = rp->mrec_srcs.sl_numsrc 2771 - srcsperpkt; 2772 rp->mrec_srcs.sl_numsrc = srcsperpkt; 2773 /* 2774 * We'll fix up this mrec (remove the 2775 * srcs we've already sent) before 2776 * returning to nextpkt above. 2777 */ 2778 next_reclist = rp; 2779 } 2780 } else { 2781 next_reclist = rp; 2782 } 2783 morepkts = B_TRUE; 2784 break; 2785 } 2786 icmpsize += rsize; 2787 size += rsize; 2788 } 2789 2790 /* 2791 * We need to make sure that this packet does not get load balanced. 2792 * So, we allocate an ip6i_t and set ATTACH_IF. ip_wput_v6 and 2793 * ip_newroute_ipif_v6 know how to handle such packets. 2794 * If it gets load balanced, switches supporting MLD snooping 2795 * (in the future) will send the packet that it receives for this 2796 * multicast group to the interface that we are sending on. As we have 2797 * joined the multicast group on this ill, by sending the packet out 2798 * on this ill, we receive all the packets back on this ill. 2799 */ 2800 size += sizeof (ip6i_t); 2801 mp = allocb(size, BPRI_HI); 2802 if (mp == NULL) 2803 goto free_reclist; 2804 bzero(mp->b_rptr, size); 2805 mp->b_wptr = mp->b_rptr + size; 2806 2807 ip6i = (ip6i_t *)mp->b_rptr; 2808 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2809 ip6i->ip6i_nxt = IPPROTO_RAW; 2810 ip6i->ip6i_flags = IP6I_ATTACH_IF; 2811 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 2812 2813 ip6h = (ip6_t *)&(ip6i[1]); 2814 ip6hbh = (ip6_hbh_t *)&(ip6h[1]); 2815 ip6router = (struct ip6_opt_router *)&(ip6hbh[1]); 2816 mld2r = (mld2r_t *)((uint8_t *)ip6hbh + optlen + padlen); 2817 mld2mar = (mld2mar_t *)&(mld2r[1]); 2818 2819 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2820 ip6h->ip6_plen = htons(optlen + padlen + icmpsize); 2821 ip6h->ip6_nxt = IPPROTO_HOPOPTS; 2822 ip6h->ip6_hops = MLD_HOP_LIMIT; 2823 ip6h->ip6_dst = ipv6_all_v2rtrs_mcast; 2824 /* ipif returned by ipif_lookup_zoneid is link-local (if present) */ 2825 if (ipif_lookup_zoneid(ill, ALL_ZONES, IPIF_UP, &ipif)) { 2826 ip6h->ip6_src = ipif->ipif_v6src_addr; 2827 ipif_refrele(ipif); 2828 } else { 2829 /* otherwise, use IPv6 default address selection. */ 2830 ip6h->ip6_src = ipv6_all_zeros; 2831 } 2832 2833 ip6hbh->ip6h_nxt = IPPROTO_ICMPV6; 2834 /* 2835 * ip6h_len is the number of 8-byte words, not including the first 2836 * 8 bytes; we've assumed optlen + padlen == 8 bytes; hence len = 0. 2837 */ 2838 ip6hbh->ip6h_len = 0; 2839 2840 ip6router->ip6or_type = IP6OPT_ROUTER_ALERT; 2841 ip6router->ip6or_len = 2; 2842 ip6router->ip6or_value[0] = 0; 2843 ip6router->ip6or_value[1] = IP6_ALERT_MLD; 2844 2845 mld2r->mld2r_type = MLD_V2_LISTENER_REPORT; 2846 mld2r->mld2r_nummar = htons(numrec); 2847 /* 2848 * Prepare for the checksum by putting icmp length in the icmp 2849 * checksum field. The checksum is calculated in ip_wput_v6. 2850 */ 2851 mld2r->mld2r_cksum = htons(icmpsize); 2852 2853 for (rp = cur_reclist; rp != NULL; rp = rp->mrec_next) { 2854 mld2mar->mld2mar_type = rp->mrec_type; 2855 mld2mar->mld2mar_auxlen = 0; 2856 mld2mar->mld2mar_numsrc = htons(rp->mrec_srcs.sl_numsrc); 2857 mld2mar->mld2mar_group = rp->mrec_group; 2858 srcarray = (in6_addr_t *)&(mld2mar[1]); 2859 2860 for (i = 0; i < rp->mrec_srcs.sl_numsrc; i++) 2861 srcarray[i] = rp->mrec_srcs.sl_addr[i]; 2862 2863 mld2mar = (mld2mar_t *)&(srcarray[i]); 2864 } 2865 2866 /* 2867 * ip_wput will automatically loopback the multicast packet to 2868 * the conn if multicast loopback is enabled. 2869 * The MIB stats corresponding to this outgoing MLD packet 2870 * will be accounted for in ip_wput->ip_wput_v6->ip_wput_ire_v6 2871 * ->icmp_update_out_mib_v6 function call. 2872 */ 2873 (void) ip_output_v6(NULL, mp, ill->ill_wq, IP_WPUT); 2874 2875 if (morepkts) { 2876 if (more_src_cnt > 0) { 2877 int index, mvsize; 2878 slist_t *sl = &next_reclist->mrec_srcs; 2879 index = sl->sl_numsrc; 2880 mvsize = more_src_cnt * sizeof (in6_addr_t); 2881 (void) memmove(&sl->sl_addr[0], &sl->sl_addr[index], 2882 mvsize); 2883 sl->sl_numsrc = more_src_cnt; 2884 } 2885 goto nextpkt; 2886 } 2887 2888 free_reclist: 2889 while (reclist != NULL) { 2890 rp = reclist->mrec_next; 2891 mi_free(reclist); 2892 reclist = rp; 2893 } 2894 } 2895 2896 static mrec_t * 2897 mcast_bldmrec(mcast_record_t type, in6_addr_t *grp, slist_t *srclist, 2898 mrec_t *next) 2899 { 2900 mrec_t *rp; 2901 int i; 2902 2903 if ((type == ALLOW_NEW_SOURCES || type == BLOCK_OLD_SOURCES) && 2904 SLIST_IS_EMPTY(srclist)) 2905 return (next); 2906 2907 rp = (mrec_t *)mi_alloc(sizeof (mrec_t), BPRI_HI); 2908 if (rp == NULL) 2909 return (next); 2910 2911 rp->mrec_next = next; 2912 rp->mrec_type = type; 2913 rp->mrec_auxlen = 0; 2914 rp->mrec_group = *grp; 2915 if (srclist == NULL) { 2916 rp->mrec_srcs.sl_numsrc = 0; 2917 } else { 2918 rp->mrec_srcs.sl_numsrc = srclist->sl_numsrc; 2919 for (i = 0; i < srclist->sl_numsrc; i++) 2920 rp->mrec_srcs.sl_addr[i] = srclist->sl_addr[i]; 2921 } 2922 2923 return (rp); 2924 } 2925 2926 /* 2927 * Set up initial retransmit state. If memory cannot be allocated for 2928 * the source lists, simply create as much state as is possible; memory 2929 * allocation failures are considered one type of transient error that 2930 * the retransmissions are designed to overcome (and if they aren't 2931 * transient, there are bigger problems than failing to notify the 2932 * router about multicast group membership state changes). 2933 */ 2934 static void 2935 mcast_init_rtx(ill_t *ill, rtx_state_t *rtxp, mcast_record_t rtype, 2936 slist_t *flist) 2937 { 2938 /* 2939 * There are only three possibilities for rtype: 2940 * New join, transition from INCLUDE {} to INCLUDE {flist} 2941 * => rtype is ALLOW_NEW_SOURCES 2942 * New join, transition from INCLUDE {} to EXCLUDE {flist} 2943 * => rtype is CHANGE_TO_EXCLUDE 2944 * State change that involves a filter mode change 2945 * => rtype is either CHANGE_TO_INCLUDE or CHANGE_TO_EXCLUDE 2946 */ 2947 ASSERT(rtype == CHANGE_TO_EXCLUDE || rtype == CHANGE_TO_INCLUDE || 2948 rtype == ALLOW_NEW_SOURCES); 2949 2950 rtxp->rtx_cnt = ill->ill_mcast_rv; 2951 2952 switch (rtype) { 2953 case CHANGE_TO_EXCLUDE: 2954 rtxp->rtx_fmode_cnt = ill->ill_mcast_rv; 2955 CLEAR_SLIST(rtxp->rtx_allow); 2956 COPY_SLIST(flist, rtxp->rtx_block); 2957 break; 2958 case ALLOW_NEW_SOURCES: 2959 case CHANGE_TO_INCLUDE: 2960 rtxp->rtx_fmode_cnt = 2961 rtype == ALLOW_NEW_SOURCES ? 0 : ill->ill_mcast_rv; 2962 CLEAR_SLIST(rtxp->rtx_block); 2963 COPY_SLIST(flist, rtxp->rtx_allow); 2964 break; 2965 } 2966 } 2967 2968 /* 2969 * The basic strategy here, as extrapolated from RFC 3810 section 6.1 and 2970 * RFC 3376 section 5.1, covers three cases: 2971 * * The current state change is a filter mode change 2972 * Set filter mode retransmit counter; set retransmit allow or 2973 * block list to new source list as appropriate, and clear the 2974 * retransmit list that was not set; send TO_IN or TO_EX with 2975 * new source list. 2976 * * The current state change is a source list change, but the filter 2977 * mode retransmit counter is > 0 2978 * Decrement filter mode retransmit counter; set retransmit 2979 * allow or block list to new source list as appropriate, 2980 * and clear the retransmit list that was not set; send TO_IN 2981 * or TO_EX with new source list. 2982 * * The current state change is a source list change, and the filter 2983 * mode retransmit counter is 0. 2984 * Merge existing rtx allow and block lists with new state: 2985 * rtx_allow = (new allow + rtx_allow) - new block 2986 * rtx_block = (new block + rtx_block) - new allow 2987 * Send ALLOW and BLOCK records for new retransmit lists; 2988 * decrement retransmit counter. 2989 * 2990 * As is the case for mcast_init_rtx(), memory allocation failures are 2991 * acceptable; we just create as much state as we can. 2992 */ 2993 static mrec_t * 2994 mcast_merge_rtx(ilm_t *ilm, mrec_t *mreclist, slist_t *flist) 2995 { 2996 ill_t *ill; 2997 rtx_state_t *rtxp = &ilm->ilm_rtx; 2998 mcast_record_t txtype; 2999 mrec_t *rp, *rpnext, *rtnmrec; 3000 boolean_t ovf; 3001 3002 ill = (ilm->ilm_ill == NULL ? ilm->ilm_ipif->ipif_ill : ilm->ilm_ill); 3003 3004 if (mreclist == NULL) 3005 return (mreclist); 3006 3007 /* 3008 * A filter mode change is indicated by a single mrec, which is 3009 * either TO_IN or TO_EX. In this case, we just need to set new 3010 * retransmit state as if this were an initial join. There is 3011 * no change to the mrec list. 3012 */ 3013 if (mreclist->mrec_type == CHANGE_TO_INCLUDE || 3014 mreclist->mrec_type == CHANGE_TO_EXCLUDE) { 3015 mcast_init_rtx(ill, rtxp, mreclist->mrec_type, 3016 &mreclist->mrec_srcs); 3017 return (mreclist); 3018 } 3019 3020 /* 3021 * Only the source list has changed 3022 */ 3023 rtxp->rtx_cnt = ill->ill_mcast_rv; 3024 if (rtxp->rtx_fmode_cnt > 0) { 3025 /* but we're still sending filter mode change reports */ 3026 rtxp->rtx_fmode_cnt--; 3027 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 3028 CLEAR_SLIST(rtxp->rtx_block); 3029 COPY_SLIST(flist, rtxp->rtx_allow); 3030 txtype = CHANGE_TO_INCLUDE; 3031 } else { 3032 CLEAR_SLIST(rtxp->rtx_allow); 3033 COPY_SLIST(flist, rtxp->rtx_block); 3034 txtype = CHANGE_TO_EXCLUDE; 3035 } 3036 /* overwrite first mrec with new info */ 3037 mreclist->mrec_type = txtype; 3038 l_copy(flist, &mreclist->mrec_srcs); 3039 /* then free any remaining mrecs */ 3040 for (rp = mreclist->mrec_next; rp != NULL; rp = rpnext) { 3041 rpnext = rp->mrec_next; 3042 mi_free(rp); 3043 } 3044 mreclist->mrec_next = NULL; 3045 rtnmrec = mreclist; 3046 } else { 3047 mrec_t *allow_mrec, *block_mrec; 3048 /* 3049 * Just send the source change reports; but we need to 3050 * recalculate the ALLOW and BLOCK lists based on previous 3051 * state and new changes. 3052 */ 3053 rtnmrec = mreclist; 3054 allow_mrec = block_mrec = NULL; 3055 for (rp = mreclist; rp != NULL; rp = rp->mrec_next) { 3056 ASSERT(rp->mrec_type == ALLOW_NEW_SOURCES || 3057 rp->mrec_type == BLOCK_OLD_SOURCES); 3058 if (rp->mrec_type == ALLOW_NEW_SOURCES) 3059 allow_mrec = rp; 3060 else 3061 block_mrec = rp; 3062 } 3063 /* 3064 * Perform calculations: 3065 * new_allow = mrec_allow + (rtx_allow - mrec_block) 3066 * new_block = mrec_block + (rtx_block - mrec_allow) 3067 * 3068 * Each calc requires two steps, for example: 3069 * rtx_allow = rtx_allow - mrec_block; 3070 * new_allow = mrec_allow + rtx_allow; 3071 * 3072 * Store results in mrec lists, and then copy into rtx lists. 3073 * We do it in this order in case the rtx list hasn't been 3074 * alloc'd yet; if it hasn't and our alloc fails, that's okay, 3075 * Overflows are also okay. 3076 */ 3077 if (block_mrec != NULL) { 3078 l_difference_in_a(rtxp->rtx_allow, 3079 &block_mrec->mrec_srcs); 3080 } 3081 if (allow_mrec != NULL) { 3082 l_difference_in_a(rtxp->rtx_block, 3083 &allow_mrec->mrec_srcs); 3084 l_union_in_a(&allow_mrec->mrec_srcs, rtxp->rtx_allow, 3085 &ovf); 3086 } 3087 if (block_mrec != NULL) { 3088 l_union_in_a(&block_mrec->mrec_srcs, rtxp->rtx_block, 3089 &ovf); 3090 COPY_SLIST(&block_mrec->mrec_srcs, rtxp->rtx_block); 3091 } else { 3092 rtnmrec = mcast_bldmrec(BLOCK_OLD_SOURCES, 3093 &ilm->ilm_v6addr, rtxp->rtx_block, allow_mrec); 3094 } 3095 if (allow_mrec != NULL) { 3096 COPY_SLIST(&allow_mrec->mrec_srcs, rtxp->rtx_allow); 3097 } else { 3098 rtnmrec = mcast_bldmrec(ALLOW_NEW_SOURCES, 3099 &ilm->ilm_v6addr, rtxp->rtx_allow, block_mrec); 3100 } 3101 } 3102 3103 return (rtnmrec); 3104 } 3105