1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 /* Copyright (c) 1990 Mentat Inc. */ 27 28 #pragma ident "%Z%%M% %I% %E% SMI" 29 30 /* 31 * Internet Group Management Protocol (IGMP) routines. 32 * Multicast Listener Discovery Protocol (MLD) routines. 33 * 34 * Written by Steve Deering, Stanford, May 1988. 35 * Modified by Rosen Sharma, Stanford, Aug 1994. 36 * Modified by Bill Fenner, Xerox PARC, Feb. 1995. 37 * 38 * MULTICAST 3.5.1.1 39 */ 40 41 42 #include <sys/types.h> 43 #include <sys/stream.h> 44 #include <sys/dlpi.h> 45 #include <sys/stropts.h> 46 #include <sys/strlog.h> 47 #include <sys/strsun.h> 48 #include <sys/systm.h> 49 #include <sys/ddi.h> 50 #include <sys/sunddi.h> 51 #include <sys/cmn_err.h> 52 #include <sys/atomic.h> 53 #include <sys/zone.h> 54 55 #include <sys/param.h> 56 #include <sys/socket.h> 57 #define _SUN_TPI_VERSION 2 58 #include <sys/tihdr.h> 59 #include <inet/ipclassifier.h> 60 #include <net/if.h> 61 #include <net/if_arp.h> 62 #include <sys/sockio.h> 63 #include <net/route.h> 64 #include <netinet/in.h> 65 #include <netinet/igmp_var.h> 66 #include <netinet/ip6.h> 67 #include <netinet/icmp6.h> 68 69 #include <inet/common.h> 70 #include <inet/mi.h> 71 #include <inet/nd.h> 72 #include <inet/arp.h> 73 #include <inet/ip.h> 74 #include <inet/ip6.h> 75 #include <inet/ip_multi.h> 76 #include <inet/ip_listutils.h> 77 78 #include <netinet/igmp.h> 79 #include <inet/ip_if.h> 80 #include <net/pfkeyv2.h> 81 #include <inet/ipsec_info.h> 82 83 static uint_t igmp_query_in(ipha_t *ipha, igmpa_t *igmpa, ill_t *ill); 84 static uint_t igmpv3_query_in(igmp3qa_t *igmp3qa, ill_t *ill, int igmplen); 85 static uint_t mld_query_in(mld_hdr_t *mldh, ill_t *ill); 86 static uint_t mldv2_query_in(mld2q_t *mld2q, ill_t *ill, int mldlen); 87 static void igmp_sendpkt(ilm_t *ilm, uchar_t type, ipaddr_t addr); 88 static void mld_sendpkt(ilm_t *ilm, uchar_t type, const in6_addr_t *v6addr); 89 static void igmpv3_sendrpt(ipif_t *ipif, mrec_t *reclist); 90 static void mldv2_sendrpt(ill_t *ill, mrec_t *reclist); 91 static mrec_t *mcast_bldmrec(mcast_record_t type, in6_addr_t *grp, 92 slist_t *srclist, mrec_t *next); 93 static void mcast_init_rtx(ill_t *ill, rtx_state_t *rtxp, 94 mcast_record_t rtype, slist_t *flist); 95 static mrec_t *mcast_merge_rtx(ilm_t *ilm, mrec_t *rp, slist_t *flist); 96 97 /* Following protected by igmp_timer_lock */ 98 static int igmp_time_to_next; /* Time since last timeout */ 99 static int igmp_timer_fired_last; 100 uint_t igmp_deferred_next = INFINITY; 101 timeout_id_t igmp_timeout_id = 0; 102 kmutex_t igmp_timer_lock; 103 104 /* Protected by igmp_slowtimeout_lock */ 105 timeout_id_t igmp_slowtimeout_id = 0; 106 kmutex_t igmp_slowtimeout_lock; 107 108 /* Following protected by mld_timer_lock */ 109 static int mld_time_to_next; /* Time since last timeout */ 110 static int mld_timer_fired_last; 111 uint_t mld_deferred_next = INFINITY; 112 timeout_id_t mld_timeout_id = 0; 113 kmutex_t mld_timer_lock; 114 115 /* Protected by mld_slowtimeout_lock */ 116 timeout_id_t mld_slowtimeout_id = 0; 117 kmutex_t mld_slowtimeout_lock; 118 119 /* 120 * Macros used to do timer len conversions. Timer values are always 121 * stored and passed to the timer functions as milliseconds; but the 122 * default values and values from the wire may not be. 123 * 124 * And yes, it's obscure, but decisecond is easier to abbreviate than 125 * "tenths of a second". 126 */ 127 #define DSEC_TO_MSEC(dsec) ((dsec) * 100) 128 #define SEC_TO_MSEC(sec) ((sec) * 1000) 129 130 /* 131 * The first multicast join will trigger the igmp timers / mld timers 132 * The unit for next is milliseconds. 133 */ 134 void 135 igmp_start_timers(unsigned next) 136 { 137 int time_left; 138 /* Protected by igmp_timer_lock */ 139 static boolean_t igmp_timer_setter_active; 140 int ret; 141 142 ASSERT(next != 0 && next != INFINITY); 143 144 mutex_enter(&igmp_timer_lock); 145 146 if (igmp_timer_setter_active) { 147 /* 148 * Serialize timer setters, one at a time. If the 149 * timer is currently being set by someone, 150 * just record the next time when it has to be 151 * invoked and return. The current setter will 152 * take care. 153 */ 154 igmp_time_to_next = MIN(igmp_time_to_next, next); 155 mutex_exit(&igmp_timer_lock); 156 return; 157 } else { 158 igmp_timer_setter_active = B_TRUE; 159 } 160 if (igmp_timeout_id == 0) { 161 /* 162 * The timer is inactive. We need to start a timer 163 */ 164 igmp_time_to_next = next; 165 igmp_timeout_id = timeout(igmp_timeout_handler, NULL, 166 MSEC_TO_TICK(igmp_time_to_next)); 167 igmp_timer_setter_active = B_FALSE; 168 mutex_exit(&igmp_timer_lock); 169 return; 170 } 171 172 /* 173 * The timer was scheduled sometime back for firing in 174 * 'igmp_time_to_next' ms and is active. We need to 175 * reschedule the timeout if the new 'next' will happen 176 * earlier than the currently scheduled timeout 177 */ 178 time_left = igmp_timer_fired_last + 179 MSEC_TO_TICK(igmp_time_to_next) - ddi_get_lbolt(); 180 if (time_left < MSEC_TO_TICK(next)) { 181 igmp_timer_setter_active = B_FALSE; 182 mutex_exit(&igmp_timer_lock); 183 return; 184 } 185 186 mutex_exit(&igmp_timer_lock); 187 ret = untimeout(igmp_timeout_id); 188 mutex_enter(&igmp_timer_lock); 189 /* 190 * The timeout was cancelled, or the timeout handler 191 * completed, while we were blocked in the untimeout. 192 * No other thread could have set the timer meanwhile 193 * since we serialized all the timer setters. Thus 194 * no timer is currently active nor executing nor will 195 * any timer fire in the future. We start the timer now 196 * if needed. 197 */ 198 if (ret == -1) { 199 ASSERT(igmp_timeout_id == 0); 200 } else { 201 ASSERT(igmp_timeout_id != 0); 202 igmp_timeout_id = 0; 203 } 204 if (igmp_time_to_next != 0) { 205 igmp_time_to_next = MIN(igmp_time_to_next, next); 206 igmp_timeout_id = timeout(igmp_timeout_handler, NULL, 207 MSEC_TO_TICK(igmp_time_to_next)); 208 } 209 igmp_timer_setter_active = B_FALSE; 210 mutex_exit(&igmp_timer_lock); 211 } 212 213 /* 214 * mld_start_timers: 215 * The unit for next is milliseconds. 216 */ 217 void 218 mld_start_timers(unsigned next) 219 { 220 int time_left; 221 /* Protedted by mld_timer_lock */ 222 static boolean_t mld_timer_setter_active; 223 int ret; 224 225 ASSERT(next != 0 && next != INFINITY); 226 227 mutex_enter(&mld_timer_lock); 228 if (mld_timer_setter_active) { 229 /* 230 * Serialize timer setters, one at a time. If the 231 * timer is currently being set by someone, 232 * just record the next time when it has to be 233 * invoked and return. The current setter will 234 * take care. 235 */ 236 mld_time_to_next = MIN(mld_time_to_next, next); 237 mutex_exit(&mld_timer_lock); 238 return; 239 } else { 240 mld_timer_setter_active = B_TRUE; 241 } 242 if (mld_timeout_id == 0) { 243 /* 244 * The timer is inactive. We need to start a timer 245 */ 246 mld_time_to_next = next; 247 mld_timeout_id = timeout(mld_timeout_handler, NULL, 248 MSEC_TO_TICK(mld_time_to_next)); 249 mld_timer_setter_active = B_FALSE; 250 mutex_exit(&mld_timer_lock); 251 return; 252 } 253 254 /* 255 * The timer was scheduled sometime back for firing in 256 * 'igmp_time_to_next' ms and is active. We need to 257 * reschedule the timeout if the new 'next' will happen 258 * earlier than the currently scheduled timeout 259 */ 260 time_left = mld_timer_fired_last + 261 MSEC_TO_TICK(mld_time_to_next) - ddi_get_lbolt(); 262 if (time_left < MSEC_TO_TICK(next)) { 263 mld_timer_setter_active = B_FALSE; 264 mutex_exit(&mld_timer_lock); 265 return; 266 } 267 268 mutex_exit(&mld_timer_lock); 269 ret = untimeout(mld_timeout_id); 270 mutex_enter(&mld_timer_lock); 271 /* 272 * The timeout was cancelled, or the timeout handler 273 * completed, while we were blocked in the untimeout. 274 * No other thread could have set the timer meanwhile 275 * since we serialized all the timer setters. Thus 276 * no timer is currently active nor executing nor will 277 * any timer fire in the future. We start the timer now 278 * if needed. 279 */ 280 if (ret == -1) { 281 ASSERT(mld_timeout_id == 0); 282 } else { 283 ASSERT(mld_timeout_id != 0); 284 mld_timeout_id = 0; 285 } 286 if (mld_time_to_next != 0) { 287 mld_time_to_next = MIN(mld_time_to_next, next); 288 mld_timeout_id = timeout(mld_timeout_handler, NULL, 289 MSEC_TO_TICK(mld_time_to_next)); 290 } 291 mld_timer_setter_active = B_FALSE; 292 mutex_exit(&mld_timer_lock); 293 } 294 295 /* 296 * igmp_input: 297 * Return 0 if the message is OK and should be handed to "raw" receivers. 298 * Callers of igmp_input() may need to reinitialize variables that were copied 299 * from the mblk as this calls pullupmsg(). 300 */ 301 /* ARGSUSED */ 302 int 303 igmp_input(queue_t *q, mblk_t *mp, ill_t *ill) 304 { 305 igmpa_t *igmpa; 306 ipha_t *ipha = (ipha_t *)(mp->b_rptr); 307 int iphlen, igmplen, mblklen; 308 ilm_t *ilm; 309 uint32_t src, dst; 310 uint32_t group; 311 uint_t next; 312 ipif_t *ipif; 313 314 ASSERT(ill != NULL); 315 ASSERT(!ill->ill_isv6); 316 ++igmpstat.igps_rcv_total; 317 318 mblklen = MBLKL(mp); 319 if (mblklen < 1 || mblklen < (iphlen = IPH_HDR_LENGTH(ipha))) { 320 ++igmpstat.igps_rcv_tooshort; 321 freemsg(mp); 322 return (-1); 323 } 324 igmplen = ntohs(ipha->ipha_length) - iphlen; 325 /* 326 * Since msg sizes are more variable with v3, just pullup the 327 * whole thing now. 328 */ 329 if (MBLKL(mp) < (igmplen + iphlen)) { 330 mblk_t *mp1; 331 if ((mp1 = msgpullup(mp, -1)) == NULL) { 332 ++igmpstat.igps_rcv_tooshort; 333 freemsg(mp); 334 return (-1); 335 } 336 freemsg(mp); 337 mp = mp1; 338 ipha = (ipha_t *)(mp->b_rptr); 339 } 340 341 /* 342 * Validate lengths 343 */ 344 if (igmplen < IGMP_MINLEN) { 345 ++igmpstat.igps_rcv_tooshort; 346 freemsg(mp); 347 return (-1); 348 } 349 /* 350 * Validate checksum 351 */ 352 if (IP_CSUM(mp, iphlen, 0)) { 353 ++igmpstat.igps_rcv_badsum; 354 freemsg(mp); 355 return (-1); 356 } 357 358 igmpa = (igmpa_t *)(&mp->b_rptr[iphlen]); 359 src = ipha->ipha_src; 360 dst = ipha->ipha_dst; 361 if (ip_debug > 1) 362 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 363 "igmp_input: src 0x%x, dst 0x%x on %s\n", 364 (int)ntohl(src), (int)ntohl(dst), 365 ill->ill_name); 366 367 switch (igmpa->igmpa_type) { 368 case IGMP_MEMBERSHIP_QUERY: 369 /* 370 * packet length differentiates between v1/v2 and v3 371 * v1/v2 should be exactly 8 octets long; v3 is >= 12 372 */ 373 if (igmplen == IGMP_MINLEN) { 374 next = igmp_query_in(ipha, igmpa, ill); 375 } else if (igmplen >= IGMP_V3_QUERY_MINLEN) { 376 next = igmpv3_query_in((igmp3qa_t *)igmpa, ill, 377 igmplen); 378 } else { 379 ++igmpstat.igps_rcv_tooshort; 380 freemsg(mp); 381 return (-1); 382 } 383 if (next == 0) { 384 freemsg(mp); 385 return (-1); 386 } 387 388 if (next != INFINITY) 389 igmp_start_timers(next); 390 391 break; 392 393 case IGMP_V1_MEMBERSHIP_REPORT: 394 case IGMP_V2_MEMBERSHIP_REPORT: 395 /* 396 * For fast leave to work, we have to know that we are the 397 * last person to send a report for this group. Reports 398 * generated by us are looped back since we could potentially 399 * be a multicast router, so discard reports sourced by me. 400 */ 401 mutex_enter(&ill->ill_lock); 402 for (ipif = ill->ill_ipif; ipif != NULL; 403 ipif = ipif->ipif_next) { 404 if (ipif->ipif_lcl_addr == src) { 405 if (ip_debug > 1) { 406 (void) mi_strlog(ill->ill_rq, 407 1, 408 SL_TRACE, 409 "igmp_input: we are only " 410 "member src 0x%x ipif_local 0x%x", 411 (int)ntohl(src), 412 (int) 413 ntohl(ipif->ipif_lcl_addr)); 414 } 415 mutex_exit(&ill->ill_lock); 416 return (0); 417 } 418 } 419 mutex_exit(&ill->ill_lock); 420 421 ++igmpstat.igps_rcv_reports; 422 group = igmpa->igmpa_group; 423 if (!CLASSD(group)) { 424 ++igmpstat.igps_rcv_badreports; 425 freemsg(mp); 426 return (-1); 427 } 428 429 /* 430 * KLUDGE: if the IP source address of the report has an 431 * unspecified (i.e., zero) subnet number, as is allowed for 432 * a booting host, replace it with the correct subnet number 433 * so that a process-level multicast routing demon can 434 * determine which subnet it arrived from. This is necessary 435 * to compensate for the lack of any way for a process to 436 * determine the arrival interface of an incoming packet. 437 * 438 * Requires that a copy of *this* message it passed up 439 * to the raw interface which is done by our caller. 440 */ 441 if ((src & htonl(0xFF000000U)) == 0) { /* Minimum net mask */ 442 /* Pick the first ipif on this ill */ 443 mutex_enter(&ill->ill_lock); 444 src = ill->ill_ipif->ipif_subnet; 445 mutex_exit(&ill->ill_lock); 446 ip1dbg(("igmp_input: changed src to 0x%x\n", 447 (int)ntohl(src))); 448 ipha->ipha_src = src; 449 } 450 451 /* 452 * If we belong to the group being reported, and 453 * we are a 'Delaying member' in the RFC terminology, 454 * stop our timer for that group and 'clear flag' i.e. 455 * mark as IGMP_OTHERMEMBER. Do this for all logical 456 * interfaces on the given physical interface. 457 */ 458 mutex_enter(&ill->ill_lock); 459 for (ipif = ill->ill_ipif; ipif != NULL; 460 ipif = ipif->ipif_next) { 461 ilm = ilm_lookup_ipif(ipif, group); 462 if (ilm != NULL) { 463 ++igmpstat.igps_rcv_ourreports; 464 ilm->ilm_timer = INFINITY; 465 ilm->ilm_state = IGMP_OTHERMEMBER; 466 } 467 } /* for */ 468 mutex_exit(&ill->ill_lock); 469 break; 470 471 case IGMP_V3_MEMBERSHIP_REPORT: 472 /* 473 * Currently nothing to do here; IGMP router is not 474 * implemented in ip, and v3 hosts don't pay attention 475 * to membership reports. 476 */ 477 break; 478 } 479 /* 480 * Pass all valid IGMP packets up to any process(es) listening 481 * on a raw IGMP socket. Do not free the packet. 482 */ 483 return (0); 484 } 485 486 static uint_t 487 igmp_query_in(ipha_t *ipha, igmpa_t *igmpa, ill_t *ill) 488 { 489 ilm_t *ilm; 490 int timer; 491 uint_t next; 492 493 ++igmpstat.igps_rcv_queries; 494 495 /* 496 * In the IGMPv2 specification, there are 3 states and a flag. 497 * 498 * In Non-Member state, we simply don't have a membership record. 499 * In Delaying Member state, our timer is running (ilm->ilm_timer 500 * < INFINITY). In Idle Member state, our timer is not running 501 * (ilm->ilm_timer == INFINITY). 502 * 503 * The flag is ilm->ilm_state, it is set to IGMP_OTHERMEMBER if 504 * we have heard a report from another member, or IGMP_IREPORTEDLAST 505 * if I sent the last report. 506 */ 507 if (igmpa->igmpa_code == 0) { 508 /* 509 * Query from an old router. 510 * Remember that the querier on this interface is old, 511 * and set the timer to the value in RFC 1112. 512 */ 513 514 515 mutex_enter(&ill->ill_lock); 516 ill->ill_mcast_v1_time = 0; 517 ill->ill_mcast_v1_tset = 1; 518 if (ill->ill_mcast_type != IGMP_V1_ROUTER) { 519 ip1dbg(("Received IGMPv1 Query on %s, switching mode " 520 "to IGMP_V1_ROUTER\n", ill->ill_name)); 521 atomic_add_16(&ill->ill_ifptr->illif_mcast_v1, 1); 522 ill->ill_mcast_type = IGMP_V1_ROUTER; 523 } 524 mutex_exit(&ill->ill_lock); 525 526 timer = SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY); 527 528 if (ipha->ipha_dst != htonl(INADDR_ALLHOSTS_GROUP) || 529 igmpa->igmpa_group != 0) { 530 ++igmpstat.igps_rcv_badqueries; 531 return (0); 532 } 533 534 } else { 535 in_addr_t group; 536 537 /* 538 * Query from a new router 539 * Simply do a validity check 540 */ 541 group = igmpa->igmpa_group; 542 if (group != 0 && (!CLASSD(group))) { 543 ++igmpstat.igps_rcv_badqueries; 544 return (0); 545 } 546 547 /* 548 * Switch interface state to v2 on receipt of a v2 query 549 * ONLY IF current state is v3. Let things be if current 550 * state if v1 but do reset the v2-querier-present timer. 551 */ 552 mutex_enter(&ill->ill_lock); 553 if (ill->ill_mcast_type == IGMP_V3_ROUTER) { 554 ip1dbg(("Received IGMPv2 Query on %s, switching mode " 555 "to IGMP_V2_ROUTER", ill->ill_name)); 556 atomic_add_16(&ill->ill_ifptr->illif_mcast_v2, 1); 557 ill->ill_mcast_type = IGMP_V2_ROUTER; 558 } 559 ill->ill_mcast_v2_time = 0; 560 ill->ill_mcast_v2_tset = 1; 561 mutex_exit(&ill->ill_lock); 562 563 timer = DSEC_TO_MSEC((int)igmpa->igmpa_code); 564 } 565 566 if (ip_debug > 1) { 567 mutex_enter(&ill->ill_lock); 568 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 569 "igmp_input: TIMER = igmp_code %d igmp_type 0x%x", 570 (int)ntohs(igmpa->igmpa_code), 571 (int)ntohs(igmpa->igmpa_type)); 572 mutex_exit(&ill->ill_lock); 573 } 574 575 /* 576 * -Start the timers in all of our membership records 577 * for the physical interface on which the query 578 * arrived, excluding those that belong to the "all 579 * hosts" group (224.0.0.1). 580 * 581 * -Restart any timer that is already running but has 582 * a value longer than the requested timeout. 583 * 584 * -Use the value specified in the query message as 585 * the maximum timeout. 586 */ 587 next = (unsigned)INFINITY; 588 mutex_enter(&ill->ill_lock); 589 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 590 591 /* 592 * A multicast router joins INADDR_ANY address 593 * to enable promiscuous reception of all 594 * mcasts from the interface. This INADDR_ANY 595 * is stored in the ilm_v6addr as V6 unspec addr 596 */ 597 if (!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr)) 598 continue; 599 if (ilm->ilm_addr == htonl(INADDR_ANY)) 600 continue; 601 if (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP) && 602 (igmpa->igmpa_group == 0) || 603 (igmpa->igmpa_group == ilm->ilm_addr)) { 604 if (ilm->ilm_timer > timer) { 605 MCAST_RANDOM_DELAY(ilm->ilm_timer, timer); 606 if (ilm->ilm_timer < next) 607 next = ilm->ilm_timer; 608 } 609 } 610 } 611 mutex_exit(&ill->ill_lock); 612 613 return (next); 614 } 615 616 static uint_t 617 igmpv3_query_in(igmp3qa_t *igmp3qa, ill_t *ill, int igmplen) 618 { 619 uint_t i, next, mrd, qqi, timer, delay, numsrc; 620 ilm_t *ilm; 621 ipaddr_t *src_array; 622 uint8_t qrv; 623 624 /* make sure numsrc matches packet size */ 625 numsrc = ntohs(igmp3qa->igmp3qa_numsrc); 626 if (igmplen < IGMP_V3_QUERY_MINLEN + (numsrc * sizeof (ipaddr_t))) { 627 ++igmpstat.igps_rcv_tooshort; 628 return (0); 629 } 630 src_array = (ipaddr_t *)&igmp3qa[1]; 631 632 ++igmpstat.igps_rcv_queries; 633 634 if ((mrd = (uint_t)igmp3qa->igmp3qa_mxrc) >= IGMP_V3_MAXRT_FPMIN) { 635 uint_t hdrval, mant, exp; 636 hdrval = (uint_t)igmp3qa->igmp3qa_mxrc; 637 mant = hdrval & IGMP_V3_MAXRT_MANT_MASK; 638 exp = (hdrval & IGMP_V3_MAXRT_EXP_MASK) >> 4; 639 mrd = (mant | 0x10) << (exp + 3); 640 } 641 if (mrd == 0) 642 mrd = MCAST_DEF_QUERY_RESP_INTERVAL; 643 timer = DSEC_TO_MSEC(mrd); 644 MCAST_RANDOM_DELAY(delay, timer); 645 next = (unsigned)INFINITY; 646 647 if ((qrv = igmp3qa->igmp3qa_sqrv & IGMP_V3_RV_MASK) == 0) 648 ill->ill_mcast_rv = MCAST_DEF_ROBUSTNESS; 649 else 650 ill->ill_mcast_rv = qrv; 651 652 if ((qqi = (uint_t)igmp3qa->igmp3qa_qqic) >= IGMP_V3_QQI_FPMIN) { 653 uint_t hdrval, mant, exp; 654 hdrval = (uint_t)igmp3qa->igmp3qa_qqic; 655 mant = hdrval & IGMP_V3_QQI_MANT_MASK; 656 exp = (hdrval & IGMP_V3_QQI_EXP_MASK) >> 4; 657 qqi = (mant | 0x10) << (exp + 3); 658 } 659 ill->ill_mcast_qi = (qqi == 0) ? MCAST_DEF_QUERY_INTERVAL : qqi; 660 661 /* 662 * If we have a pending general query response that's scheduled 663 * sooner than the delay we calculated for this response, then 664 * no action is required (RFC3376 section 5.2 rule 1) 665 */ 666 mutex_enter(&ill->ill_lock); 667 if (ill->ill_global_timer < delay) { 668 mutex_exit(&ill->ill_lock); 669 return (next); 670 } 671 mutex_exit(&ill->ill_lock); 672 673 /* 674 * Now take action depending upon query type: 675 * general, group specific, or group/source specific. 676 */ 677 if ((numsrc == 0) && (igmp3qa->igmp3qa_group == INADDR_ANY)) { 678 /* 679 * general query 680 * We know global timer is either not running or is 681 * greater than our calculated delay, so reset it to 682 * our delay (random value in range [0, response time]). 683 */ 684 mutex_enter(&ill->ill_lock); 685 ill->ill_global_timer = delay; 686 next = ill->ill_global_timer; 687 mutex_exit(&ill->ill_lock); 688 689 } else { 690 /* group or group/source specific query */ 691 mutex_enter(&ill->ill_lock); 692 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 693 if (!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr) || 694 (ilm->ilm_addr == htonl(INADDR_ANY)) || 695 (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) || 696 (igmp3qa->igmp3qa_group != ilm->ilm_addr)) 697 continue; 698 /* 699 * If the query is group specific or we have a 700 * pending group specific query, the response is 701 * group specific (pending sources list should be 702 * empty). Otherwise, need to update the pending 703 * sources list for the group and source specific 704 * response. 705 */ 706 if (numsrc == 0 || (ilm->ilm_timer < INFINITY && 707 SLIST_IS_EMPTY(ilm->ilm_pendsrcs))) { 708 group_query: 709 FREE_SLIST(ilm->ilm_pendsrcs); 710 ilm->ilm_pendsrcs = NULL; 711 } else { 712 boolean_t overflow; 713 slist_t *pktl; 714 if (numsrc > MAX_FILTER_SIZE || 715 (ilm->ilm_pendsrcs == NULL && 716 (ilm->ilm_pendsrcs = l_alloc()) == NULL)) { 717 /* 718 * We've been sent more sources than 719 * we can deal with; or we can't deal 720 * with a source list at all. Revert 721 * to a group specific query. 722 */ 723 goto group_query; 724 } 725 if ((pktl = l_alloc()) == NULL) 726 goto group_query; 727 pktl->sl_numsrc = numsrc; 728 for (i = 0; i < numsrc; i++) 729 IN6_IPADDR_TO_V4MAPPED(src_array[i], 730 &(pktl->sl_addr[i])); 731 l_union_in_a(ilm->ilm_pendsrcs, pktl, 732 &overflow); 733 l_free(pktl); 734 if (overflow) 735 goto group_query; 736 } 737 /* choose soonest timer */ 738 ilm->ilm_timer = MIN(ilm->ilm_timer, delay); 739 if (ilm->ilm_timer < next) 740 next = ilm->ilm_timer; 741 } 742 mutex_exit(&ill->ill_lock); 743 } 744 745 return (next); 746 } 747 748 void 749 igmp_joingroup(ilm_t *ilm) 750 { 751 ill_t *ill; 752 753 ill = ilm->ilm_ipif->ipif_ill; 754 755 ASSERT(IAM_WRITER_ILL(ill)); 756 ASSERT(ilm->ilm_ill == NULL && !ilm->ilm_ipif->ipif_isv6); 757 758 mutex_enter(&ill->ill_lock); 759 if (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) { 760 ilm->ilm_rtx.rtx_timer = INFINITY; 761 ilm->ilm_state = IGMP_OTHERMEMBER; 762 mutex_exit(&ill->ill_lock); 763 } else { 764 ip1dbg(("Querier mode %d, sending report, group %x\n", 765 ill->ill_mcast_type, htonl(ilm->ilm_addr))); 766 if (ill->ill_mcast_type == IGMP_V1_ROUTER) { 767 mutex_exit(&ill->ill_lock); 768 igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0); 769 mutex_enter(&ill->ill_lock); 770 } else if (ill->ill_mcast_type == IGMP_V2_ROUTER) { 771 mutex_exit(&ill->ill_lock); 772 igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0); 773 mutex_enter(&ill->ill_lock); 774 } else if (ill->ill_mcast_type == IGMP_V3_ROUTER) { 775 mrec_t *rp; 776 mcast_record_t rtype; 777 /* 778 * The possible state changes we need to handle here: 779 * Old State New State Report 780 * 781 * INCLUDE(0) INCLUDE(X) ALLOW(X),BLOCK(0) 782 * INCLUDE(0) EXCLUDE(X) TO_EX(X) 783 * 784 * No need to send the BLOCK(0) report; ALLOW(X) 785 * is enough. 786 */ 787 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ? 788 ALLOW_NEW_SOURCES : CHANGE_TO_EXCLUDE; 789 rp = mcast_bldmrec(rtype, &ilm->ilm_v6addr, 790 ilm->ilm_filter, NULL); 791 mutex_exit(&ill->ill_lock); 792 igmpv3_sendrpt(ilm->ilm_ipif, rp); 793 mutex_enter(&ill->ill_lock); 794 /* 795 * Set up retransmission state. Timer is set below, 796 * for both v3 and older versions. 797 */ 798 mcast_init_rtx(ill, &ilm->ilm_rtx, rtype, 799 ilm->ilm_filter); 800 } 801 802 /* Set the ilm timer value */ 803 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer, 804 SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY)); 805 ilm->ilm_state = IGMP_IREPORTEDLAST; 806 mutex_exit(&ill->ill_lock); 807 808 /* 809 * To avoid deadlock, we don't call igmp_start_timers from 810 * here. igmp_start_timers needs to call untimeout, and we 811 * can't hold the ipsq across untimeout since 812 * igmp_timeout_handler could be blocking trying to 813 * acquire the ipsq. Instead we start the timer after we get 814 * out of the ipsq in ipsq_exit. 815 */ 816 mutex_enter(&igmp_timer_lock); 817 igmp_deferred_next = MIN(ilm->ilm_rtx.rtx_timer, 818 igmp_deferred_next); 819 mutex_exit(&igmp_timer_lock); 820 } 821 822 if (ip_debug > 1) { 823 (void) mi_strlog(ilm->ilm_ipif->ipif_ill->ill_rq, 1, SL_TRACE, 824 "igmp_joingroup: multicast_type %d timer %d", 825 (ilm->ilm_ipif->ipif_ill->ill_mcast_type), 826 (int)ntohl(ilm->ilm_rtx.rtx_timer)); 827 } 828 } 829 830 void 831 mld_joingroup(ilm_t *ilm) 832 { 833 ill_t *ill; 834 835 ill = ilm->ilm_ill; 836 837 ASSERT(IAM_WRITER_ILL(ill)); 838 ASSERT(ilm->ilm_ipif == NULL && ill->ill_isv6); 839 840 mutex_enter(&ill->ill_lock); 841 if (IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr)) { 842 ilm->ilm_rtx.rtx_timer = INFINITY; 843 ilm->ilm_state = IGMP_OTHERMEMBER; 844 mutex_exit(&ill->ill_lock); 845 } else { 846 if (ill->ill_mcast_type == MLD_V1_ROUTER) { 847 mutex_exit(&ill->ill_lock); 848 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL); 849 mutex_enter(&ill->ill_lock); 850 } else { 851 mrec_t *rp; 852 mcast_record_t rtype; 853 /* 854 * The possible state changes we need to handle here: 855 * Old State New State Report 856 * 857 * INCLUDE(0) INCLUDE(X) ALLOW(X),BLOCK(0) 858 * INCLUDE(0) EXCLUDE(X) TO_EX(X) 859 * 860 * No need to send the BLOCK(0) report; ALLOW(X) 861 * is enough 862 */ 863 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ? 864 ALLOW_NEW_SOURCES : CHANGE_TO_EXCLUDE; 865 rp = mcast_bldmrec(rtype, &ilm->ilm_v6addr, 866 ilm->ilm_filter, NULL); 867 mutex_exit(&ill->ill_lock); 868 mldv2_sendrpt(ill, rp); 869 mutex_enter(&ill->ill_lock); 870 /* 871 * Set up retransmission state. Timer is set below, 872 * for both v2 and v1. 873 */ 874 mcast_init_rtx(ill, &ilm->ilm_rtx, rtype, 875 ilm->ilm_filter); 876 } 877 878 /* Set the ilm timer value */ 879 ASSERT(ill->ill_mcast_type != MLD_V2_ROUTER || 880 ilm->ilm_rtx.rtx_cnt > 0); 881 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer, 882 SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY)); 883 ilm->ilm_state = IGMP_IREPORTEDLAST; 884 mutex_exit(&ill->ill_lock); 885 886 /* 887 * To avoid deadlock, we don't call mld_start_timers from 888 * here. mld_start_timers needs to call untimeout, and we 889 * can't hold the ipsq (i.e. the lock) across untimeout 890 * since mld_timeout_handler could be blocking trying to 891 * acquire the ipsq. Instead we start the timer after we get 892 * out of the ipsq in ipsq_exit 893 */ 894 mutex_enter(&mld_timer_lock); 895 mld_deferred_next = MIN(ilm->ilm_rtx.rtx_timer, 896 mld_deferred_next); 897 mutex_exit(&mld_timer_lock); 898 } 899 900 if (ip_debug > 1) { 901 (void) mi_strlog(ilm->ilm_ill->ill_rq, 1, SL_TRACE, 902 "mld_joingroup: multicast_type %d timer %d", 903 (ilm->ilm_ill->ill_mcast_type), 904 (int)ntohl(ilm->ilm_rtx.rtx_timer)); 905 } 906 } 907 908 void 909 igmp_leavegroup(ilm_t *ilm) 910 { 911 ill_t *ill = ilm->ilm_ipif->ipif_ill; 912 913 ASSERT(ilm->ilm_ill == NULL); 914 ASSERT(!ill->ill_isv6); 915 916 mutex_enter(&ill->ill_lock); 917 if (ilm->ilm_state == IGMP_IREPORTEDLAST && 918 ill->ill_mcast_type == IGMP_V2_ROUTER && 919 (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP))) { 920 mutex_exit(&ill->ill_lock); 921 igmp_sendpkt(ilm, IGMP_V2_LEAVE_GROUP, 922 (htonl(INADDR_ALLRTRS_GROUP))); 923 return; 924 } else if ((ill->ill_mcast_type == IGMP_V3_ROUTER) && 925 (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP))) { 926 mrec_t *rp; 927 /* 928 * The possible state changes we need to handle here: 929 * Old State New State Report 930 * 931 * INCLUDE(X) INCLUDE(0) ALLOW(0),BLOCK(X) 932 * EXCLUDE(X) INCLUDE(0) TO_IN(0) 933 * 934 * No need to send the ALLOW(0) report; BLOCK(X) is enough 935 */ 936 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 937 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr, 938 ilm->ilm_filter, NULL); 939 } else { 940 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, 941 NULL, NULL); 942 } 943 mutex_exit(&ill->ill_lock); 944 igmpv3_sendrpt(ilm->ilm_ipif, rp); 945 return; 946 } 947 mutex_exit(&ill->ill_lock); 948 } 949 950 void 951 mld_leavegroup(ilm_t *ilm) 952 { 953 ill_t *ill = ilm->ilm_ill; 954 955 ASSERT(ilm->ilm_ipif == NULL); 956 ASSERT(ill->ill_isv6); 957 958 mutex_enter(&ill->ill_lock); 959 if (ilm->ilm_state == IGMP_IREPORTEDLAST && 960 ill->ill_mcast_type == MLD_V1_ROUTER && 961 (!IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr))) { 962 mutex_exit(&ill->ill_lock); 963 mld_sendpkt(ilm, MLD_LISTENER_REDUCTION, &ipv6_all_rtrs_mcast); 964 return; 965 } else if ((ill->ill_mcast_type == MLD_V2_ROUTER) && 966 (!IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr))) { 967 mrec_t *rp; 968 /* 969 * The possible state changes we need to handle here: 970 * Old State New State Report 971 * 972 * INCLUDE(X) INCLUDE(0) ALLOW(0),BLOCK(X) 973 * EXCLUDE(X) INCLUDE(0) TO_IN(0) 974 * 975 * No need to send the ALLOW(0) report; BLOCK(X) is enough 976 */ 977 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 978 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr, 979 ilm->ilm_filter, NULL); 980 } else { 981 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, 982 NULL, NULL); 983 } 984 mutex_exit(&ill->ill_lock); 985 mldv2_sendrpt(ill, rp); 986 return; 987 } 988 mutex_exit(&ill->ill_lock); 989 } 990 991 void 992 igmp_statechange(ilm_t *ilm, mcast_record_t fmode, slist_t *flist) 993 { 994 ill_t *ill; 995 mrec_t *rp; 996 997 ASSERT(ilm != NULL); 998 999 /* state change reports should only be sent if the router is v3 */ 1000 if (ilm->ilm_ipif->ipif_ill->ill_mcast_type != IGMP_V3_ROUTER) 1001 return; 1002 1003 if (ilm->ilm_ill == NULL) { 1004 ASSERT(ilm->ilm_ipif != NULL); 1005 ill = ilm->ilm_ipif->ipif_ill; 1006 } else { 1007 ill = ilm->ilm_ill; 1008 } 1009 1010 mutex_enter(&ill->ill_lock); 1011 1012 /* 1013 * Compare existing(old) state with the new state and prepare 1014 * State Change Report, according to the rules in RFC 3376: 1015 * 1016 * Old State New State State Change Report 1017 * 1018 * INCLUDE(A) INCLUDE(B) ALLOW(B-A),BLOCK(A-B) 1019 * EXCLUDE(A) EXCLUDE(B) ALLOW(A-B),BLOCK(B-A) 1020 * INCLUDE(A) EXCLUDE(B) TO_EX(B) 1021 * EXCLUDE(A) INCLUDE(B) TO_IN(B) 1022 */ 1023 1024 if (ilm->ilm_fmode == fmode) { 1025 slist_t *a_minus_b = NULL, *b_minus_a = NULL; 1026 slist_t *allow, *block; 1027 if (((a_minus_b = l_alloc()) == NULL) || 1028 ((b_minus_a = l_alloc()) == NULL)) { 1029 l_free(a_minus_b); 1030 if (ilm->ilm_fmode == MODE_IS_INCLUDE) 1031 goto send_to_ex; 1032 else 1033 goto send_to_in; 1034 } 1035 l_difference(ilm->ilm_filter, flist, a_minus_b); 1036 l_difference(flist, ilm->ilm_filter, b_minus_a); 1037 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1038 allow = b_minus_a; 1039 block = a_minus_b; 1040 } else { 1041 allow = a_minus_b; 1042 block = b_minus_a; 1043 } 1044 rp = NULL; 1045 if (!SLIST_IS_EMPTY(allow)) 1046 rp = mcast_bldmrec(ALLOW_NEW_SOURCES, &ilm->ilm_v6addr, 1047 allow, rp); 1048 if (!SLIST_IS_EMPTY(block)) 1049 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr, 1050 block, rp); 1051 l_free(a_minus_b); 1052 l_free(b_minus_a); 1053 } else if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1054 send_to_ex: 1055 rp = mcast_bldmrec(CHANGE_TO_EXCLUDE, &ilm->ilm_v6addr, flist, 1056 NULL); 1057 } else { 1058 send_to_in: 1059 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, flist, 1060 NULL); 1061 } 1062 1063 /* 1064 * Need to set up retransmission state; merge the new info with the 1065 * current state (which may be null). If the timer is not currently 1066 * running, start it (need to do a delayed start of the timer as 1067 * we're currently in the sq). 1068 */ 1069 rp = mcast_merge_rtx(ilm, rp, flist); 1070 if (ilm->ilm_rtx.rtx_timer == INFINITY) { 1071 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer, 1072 SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY)); 1073 mutex_enter(&igmp_timer_lock); 1074 igmp_deferred_next = MIN(igmp_deferred_next, 1075 ilm->ilm_rtx.rtx_timer); 1076 mutex_exit(&igmp_timer_lock); 1077 } 1078 1079 mutex_exit(&ill->ill_lock); 1080 igmpv3_sendrpt(ilm->ilm_ipif, rp); 1081 } 1082 1083 void 1084 mld_statechange(ilm_t *ilm, mcast_record_t fmode, slist_t *flist) 1085 { 1086 ill_t *ill; 1087 mrec_t *rp = NULL; 1088 1089 ASSERT(ilm != NULL); 1090 1091 ill = ilm->ilm_ill; 1092 1093 /* only need to send if we have an mldv2-capable router */ 1094 mutex_enter(&ill->ill_lock); 1095 if (ill->ill_mcast_type != MLD_V2_ROUTER) { 1096 mutex_exit(&ill->ill_lock); 1097 return; 1098 } 1099 1100 /* 1101 * Compare existing (old) state with the new state passed in 1102 * and send appropriate MLDv2 State Change Report. 1103 * 1104 * Old State New State State Change Report 1105 * 1106 * INCLUDE(A) INCLUDE(B) ALLOW(B-A),BLOCK(A-B) 1107 * EXCLUDE(A) EXCLUDE(B) ALLOW(A-B),BLOCK(B-A) 1108 * INCLUDE(A) EXCLUDE(B) TO_EX(B) 1109 * EXCLUDE(A) INCLUDE(B) TO_IN(B) 1110 */ 1111 if (ilm->ilm_fmode == fmode) { 1112 slist_t *a_minus_b = NULL, *b_minus_a = NULL; 1113 slist_t *allow, *block; 1114 if (((a_minus_b = l_alloc()) == NULL) || 1115 ((b_minus_a = l_alloc()) == NULL)) { 1116 l_free(a_minus_b); 1117 if (ilm->ilm_fmode == MODE_IS_INCLUDE) 1118 goto send_to_ex; 1119 else 1120 goto send_to_in; 1121 } 1122 l_difference(ilm->ilm_filter, flist, a_minus_b); 1123 l_difference(flist, ilm->ilm_filter, b_minus_a); 1124 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1125 allow = b_minus_a; 1126 block = a_minus_b; 1127 } else { 1128 allow = a_minus_b; 1129 block = b_minus_a; 1130 } 1131 if (!SLIST_IS_EMPTY(allow)) 1132 rp = mcast_bldmrec(ALLOW_NEW_SOURCES, &ilm->ilm_v6addr, 1133 allow, rp); 1134 if (!SLIST_IS_EMPTY(block)) 1135 rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr, 1136 block, rp); 1137 l_free(a_minus_b); 1138 l_free(b_minus_a); 1139 } else if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1140 send_to_ex: 1141 rp = mcast_bldmrec(CHANGE_TO_EXCLUDE, &ilm->ilm_v6addr, flist, 1142 NULL); 1143 } else { 1144 send_to_in: 1145 rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, flist, 1146 NULL); 1147 } 1148 1149 /* 1150 * Need to set up retransmission state; merge the new info with the 1151 * current state (which may be null). If the timer is not currently 1152 * running, start it (need to do a deferred start of the timer as 1153 * we're currently in the sq). 1154 */ 1155 rp = mcast_merge_rtx(ilm, rp, flist); 1156 ASSERT(ilm->ilm_rtx.rtx_cnt > 0); 1157 if (ilm->ilm_rtx.rtx_timer == INFINITY) { 1158 MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer, 1159 SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY)); 1160 mutex_enter(&mld_timer_lock); 1161 mld_deferred_next = 1162 MIN(mld_deferred_next, ilm->ilm_rtx.rtx_timer); 1163 mutex_exit(&mld_timer_lock); 1164 } 1165 1166 mutex_exit(&ill->ill_lock); 1167 mldv2_sendrpt(ill, rp); 1168 } 1169 1170 uint_t 1171 igmp_timeout_handler_per_ill(ill_t *ill, int elapsed) 1172 { 1173 uint_t next = INFINITY; 1174 ilm_t *ilm; 1175 ipif_t *ipif; 1176 mrec_t *rp = NULL; 1177 mrec_t *rtxrp = NULL; 1178 rtx_state_t *rtxp; 1179 mcast_record_t rtype; 1180 1181 ASSERT(IAM_WRITER_ILL(ill)); 1182 1183 mutex_enter(&ill->ill_lock); 1184 1185 /* First check the global timer on this interface */ 1186 if (ill->ill_global_timer == INFINITY) 1187 goto per_ilm_timer; 1188 if (ill->ill_global_timer <= elapsed) { 1189 ill->ill_global_timer = INFINITY; 1190 /* 1191 * Send report for each group on this interface. 1192 * Since we just set the global timer (received a v3 general 1193 * query), need to skip the all hosts addr (224.0.0.1), per 1194 * RFC 3376 section 5. 1195 */ 1196 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1197 if (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) 1198 continue; 1199 ASSERT(ilm->ilm_ipif != NULL); 1200 ilm->ilm_ipif->ipif_igmp_rpt = 1201 mcast_bldmrec(ilm->ilm_fmode, &ilm->ilm_v6addr, 1202 ilm->ilm_filter, ilm->ilm_ipif->ipif_igmp_rpt); 1203 /* 1204 * Since we're sending a report on this group, okay 1205 * to delete pending group-specific timers. Note 1206 * that group-specific retransmit timers still need 1207 * to be checked in the per_ilm_timer for-loop. 1208 */ 1209 ilm->ilm_timer = INFINITY; 1210 ilm->ilm_state = IGMP_IREPORTEDLAST; 1211 FREE_SLIST(ilm->ilm_pendsrcs); 1212 ilm->ilm_pendsrcs = NULL; 1213 } 1214 /* 1215 * We've built per-ipif mrec lists; walk the ill's ipif list 1216 * and send a report for each ipif that has an mrec list. 1217 */ 1218 for (ipif = ill->ill_ipif; ipif != NULL; 1219 ipif = ipif->ipif_next) { 1220 if (ipif->ipif_igmp_rpt == NULL) 1221 continue; 1222 mutex_exit(&ill->ill_lock); 1223 igmpv3_sendrpt(ipif, ipif->ipif_igmp_rpt); 1224 mutex_enter(&ill->ill_lock); 1225 /* mrec list was freed by igmpv3_sendrpt() */ 1226 ipif->ipif_igmp_rpt = NULL; 1227 } 1228 } else { 1229 ill->ill_global_timer -= elapsed; 1230 if (ill->ill_global_timer < next) 1231 next = ill->ill_global_timer; 1232 } 1233 1234 per_ilm_timer: 1235 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1236 if (ilm->ilm_timer == INFINITY) 1237 goto per_ilm_rtxtimer; 1238 1239 if (ilm->ilm_timer > elapsed) { 1240 ilm->ilm_timer -= elapsed; 1241 if (ilm->ilm_timer < next) 1242 next = ilm->ilm_timer; 1243 1244 if (ip_debug > 1) { 1245 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 1246 "igmp_timo_hlr 2: ilm_timr %d elap %d " 1247 "typ %d nxt %d", 1248 (int)ntohl(ilm->ilm_timer), elapsed, 1249 (ill->ill_mcast_type), next); 1250 } 1251 1252 goto per_ilm_rtxtimer; 1253 } 1254 1255 /* the timer has expired, need to take action */ 1256 ilm->ilm_timer = INFINITY; 1257 ilm->ilm_state = IGMP_IREPORTEDLAST; 1258 if (ill->ill_mcast_type == IGMP_V1_ROUTER) { 1259 mutex_exit(&ill->ill_lock); 1260 igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0); 1261 mutex_enter(&ill->ill_lock); 1262 } else if (ill->ill_mcast_type == IGMP_V2_ROUTER) { 1263 mutex_exit(&ill->ill_lock); 1264 igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0); 1265 mutex_enter(&ill->ill_lock); 1266 } else { 1267 slist_t *rsp; 1268 if (!SLIST_IS_EMPTY(ilm->ilm_pendsrcs) && 1269 (rsp = l_alloc()) != NULL) { 1270 /* 1271 * Contents of reply depend on pending 1272 * requested source list. 1273 */ 1274 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1275 l_intersection(ilm->ilm_filter, 1276 ilm->ilm_pendsrcs, rsp); 1277 } else { 1278 l_difference(ilm->ilm_pendsrcs, 1279 ilm->ilm_filter, rsp); 1280 } 1281 FREE_SLIST(ilm->ilm_pendsrcs); 1282 ilm->ilm_pendsrcs = NULL; 1283 if (!SLIST_IS_EMPTY(rsp)) 1284 rp = mcast_bldmrec(MODE_IS_INCLUDE, 1285 &ilm->ilm_v6addr, rsp, rp); 1286 FREE_SLIST(rsp); 1287 } else { 1288 /* 1289 * Either the pending request is just group- 1290 * specific, or we couldn't get the resources 1291 * (rsp) to build a source-specific reply. 1292 */ 1293 rp = mcast_bldmrec(ilm->ilm_fmode, 1294 &ilm->ilm_v6addr, ilm->ilm_filter, rp); 1295 } 1296 mutex_exit(&ill->ill_lock); 1297 igmpv3_sendrpt(ill->ill_ipif, rp); 1298 mutex_enter(&ill->ill_lock); 1299 rp = NULL; 1300 } 1301 1302 if (ip_debug > 1) { 1303 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 1304 "igmp_timo_hlr 1: ilm_timr %d elap %d " 1305 "typ %d nxt %d", 1306 (int)ntohl(ilm->ilm_timer), elapsed, 1307 (ill->ill_mcast_type), next); 1308 } 1309 1310 per_ilm_rtxtimer: 1311 rtxp = &ilm->ilm_rtx; 1312 1313 if (rtxp->rtx_timer == INFINITY) 1314 continue; 1315 if (rtxp->rtx_timer > elapsed) { 1316 rtxp->rtx_timer -= elapsed; 1317 if (rtxp->rtx_timer < next) 1318 next = rtxp->rtx_timer; 1319 continue; 1320 } 1321 1322 rtxp->rtx_timer = INFINITY; 1323 ilm->ilm_state = IGMP_IREPORTEDLAST; 1324 if (ill->ill_mcast_type == IGMP_V1_ROUTER) { 1325 mutex_exit(&ill->ill_lock); 1326 igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0); 1327 mutex_enter(&ill->ill_lock); 1328 continue; 1329 } else if (ill->ill_mcast_type == IGMP_V2_ROUTER) { 1330 mutex_exit(&ill->ill_lock); 1331 igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0); 1332 mutex_enter(&ill->ill_lock); 1333 continue; 1334 } 1335 1336 /* 1337 * The retransmit timer has popped, and our router is 1338 * IGMPv3. We have to delve into the retransmit state 1339 * stored in the ilm. 1340 * 1341 * Decrement the retransmit count. If the fmode rtx 1342 * count is active, decrement it, and send a filter 1343 * mode change report with the ilm's source list. 1344 * Otherwise, send a source list change report with 1345 * the current retransmit lists. 1346 */ 1347 ASSERT(rtxp->rtx_cnt > 0); 1348 ASSERT(rtxp->rtx_cnt >= rtxp->rtx_fmode_cnt); 1349 rtxp->rtx_cnt--; 1350 if (rtxp->rtx_fmode_cnt > 0) { 1351 rtxp->rtx_fmode_cnt--; 1352 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ? 1353 CHANGE_TO_INCLUDE : CHANGE_TO_EXCLUDE; 1354 rtxrp = mcast_bldmrec(rtype, &ilm->ilm_v6addr, 1355 ilm->ilm_filter, rtxrp); 1356 } else { 1357 rtxrp = mcast_bldmrec(ALLOW_NEW_SOURCES, 1358 &ilm->ilm_v6addr, rtxp->rtx_allow, rtxrp); 1359 rtxrp = mcast_bldmrec(BLOCK_OLD_SOURCES, 1360 &ilm->ilm_v6addr, rtxp->rtx_block, rtxrp); 1361 } 1362 if (rtxp->rtx_cnt > 0) { 1363 MCAST_RANDOM_DELAY(rtxp->rtx_timer, 1364 SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY)); 1365 if (rtxp->rtx_timer < next) 1366 next = rtxp->rtx_timer; 1367 } else { 1368 CLEAR_SLIST(rtxp->rtx_allow); 1369 CLEAR_SLIST(rtxp->rtx_block); 1370 } 1371 mutex_exit(&ill->ill_lock); 1372 igmpv3_sendrpt(ilm->ilm_ipif, rtxrp); 1373 mutex_enter(&ill->ill_lock); 1374 rtxrp = NULL; 1375 } 1376 1377 mutex_exit(&ill->ill_lock); 1378 1379 return (next); 1380 } 1381 1382 /* 1383 * igmp_timeout_handler: 1384 * Called when there are timeout events, every next * TMEOUT_INTERVAL (tick). 1385 * Returns number of ticks to next event (or 0 if none). 1386 * 1387 * As part of multicast join and leave igmp we may need to send out an 1388 * igmp request. The igmp related state variables in the ilm are protected 1389 * by ill_lock. A single global igmp timer is used to track igmp timeouts. 1390 * igmp_timer_lock protects the global igmp_timeout_id. igmp_start_timers 1391 * starts the igmp timer if needed. It serializes multiple threads trying to 1392 * simultaneously start the timer using the igmp_timer_setter_active flag. 1393 * 1394 * igmp_input() receives igmp queries and responds to the queries 1395 * in a delayed fashion by posting a timer i.e. it calls igmp_start_timers(). 1396 * Later the igmp_timer fires, the timeout handler igmp_timerout_handler() 1397 * performs the action exclusively after entering each ill's ipsq as writer. 1398 * The actual igmp timeout handler needs to run in the ipsq since it has to 1399 * access the ilm's and we don't want another exclusive operation like 1400 * say an IPMP failover to be simultaneously moving the ilms from one ill to 1401 * another. 1402 * 1403 * The igmp_slowtimeo() function is called thru another timer. 1404 * igmp_slowtimeout_lock protects the igmp_slowtimeout_id 1405 */ 1406 1407 /* ARGSUSED */ 1408 void 1409 igmp_timeout_handler(void *arg) 1410 { 1411 ill_t *ill; 1412 int elapsed; /* Since last call */ 1413 uint_t global_next = INFINITY; 1414 uint_t next; 1415 ill_walk_context_t ctx; 1416 boolean_t success; 1417 1418 mutex_enter(&igmp_timer_lock); 1419 ASSERT(igmp_timeout_id != 0); 1420 igmp_timer_fired_last = ddi_get_lbolt(); 1421 elapsed = igmp_time_to_next; 1422 igmp_time_to_next = 0; 1423 mutex_exit(&igmp_timer_lock); 1424 1425 rw_enter(&ill_g_lock, RW_READER); 1426 ill = ILL_START_WALK_V4(&ctx); 1427 for (; ill != NULL; ill = ill_next(&ctx, ill)) { 1428 ASSERT(!ill->ill_isv6); 1429 /* 1430 * We may not be able to refhold the ill if the ill/ipif 1431 * is changing. But we need to make sure that the ill will 1432 * not vanish. So we just bump up the ill_waiter count. 1433 */ 1434 if (!ill_waiter_inc(ill)) 1435 continue; 1436 rw_exit(&ill_g_lock); 1437 success = ipsq_enter(ill, B_TRUE); 1438 if (success) { 1439 next = igmp_timeout_handler_per_ill(ill, elapsed); 1440 if (next < global_next) 1441 global_next = next; 1442 ipsq_exit(ill->ill_phyint->phyint_ipsq, B_FALSE, 1443 B_TRUE); 1444 } 1445 rw_enter(&ill_g_lock, RW_READER); 1446 ill_waiter_dcr(ill); 1447 } 1448 rw_exit(&ill_g_lock); 1449 1450 mutex_enter(&igmp_timer_lock); 1451 ASSERT(igmp_timeout_id != 0); 1452 igmp_timeout_id = 0; 1453 mutex_exit(&igmp_timer_lock); 1454 1455 if (global_next != INFINITY) 1456 igmp_start_timers(global_next); 1457 } 1458 1459 /* 1460 * mld_timeout_handler: 1461 * Called when there are timeout events, every next (tick). 1462 * Returns number of ticks to next event (or 0 if none). 1463 */ 1464 /* ARGSUSED */ 1465 uint_t 1466 mld_timeout_handler_per_ill(ill_t *ill, int elapsed) 1467 { 1468 ilm_t *ilm; 1469 uint_t next = INFINITY; 1470 mrec_t *rp, *rtxrp; 1471 rtx_state_t *rtxp; 1472 mcast_record_t rtype; 1473 1474 ASSERT(IAM_WRITER_ILL(ill)); 1475 1476 mutex_enter(&ill->ill_lock); 1477 1478 /* 1479 * First check the global timer on this interface; the global timer 1480 * is not used for MLDv1, so if it's set we can assume we're v2. 1481 */ 1482 if (ill->ill_global_timer == INFINITY) 1483 goto per_ilm_timer; 1484 if (ill->ill_global_timer <= elapsed) { 1485 ill->ill_global_timer = INFINITY; 1486 /* 1487 * Send report for each group on this interface. 1488 * Since we just set the global timer (received a v2 general 1489 * query), need to skip the all hosts addr (ff02::1), per 1490 * RFC 3810 section 6. 1491 */ 1492 rp = NULL; 1493 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1494 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, 1495 &ipv6_all_hosts_mcast)) 1496 continue; 1497 rp = mcast_bldmrec(ilm->ilm_fmode, &ilm->ilm_v6addr, 1498 ilm->ilm_filter, rp); 1499 /* 1500 * Since we're sending a report on this group, okay 1501 * to delete pending group-specific timers. Note 1502 * that group-specific retransmit timers still need 1503 * to be checked in the per_ilm_timer for-loop. 1504 */ 1505 ilm->ilm_timer = INFINITY; 1506 ilm->ilm_state = IGMP_IREPORTEDLAST; 1507 FREE_SLIST(ilm->ilm_pendsrcs); 1508 ilm->ilm_pendsrcs = NULL; 1509 } 1510 mutex_exit(&ill->ill_lock); 1511 mldv2_sendrpt(ill, rp); 1512 mutex_enter(&ill->ill_lock); 1513 } else { 1514 ill->ill_global_timer -= elapsed; 1515 if (ill->ill_global_timer < next) 1516 next = ill->ill_global_timer; 1517 } 1518 1519 per_ilm_timer: 1520 rp = rtxrp = NULL; 1521 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1522 if (ilm->ilm_timer == INFINITY) 1523 goto per_ilm_rtxtimer; 1524 1525 if (ilm->ilm_timer > elapsed) { 1526 ilm->ilm_timer -= elapsed; 1527 if (ilm->ilm_timer < next) 1528 next = ilm->ilm_timer; 1529 1530 if (ip_debug > 1) { 1531 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 1532 "igmp_timo_hlr 2: ilm_timr" 1533 " %d elap %d typ %d nxt %d", 1534 (int)ntohl(ilm->ilm_timer), elapsed, 1535 (ill->ill_mcast_type), next); 1536 } 1537 1538 goto per_ilm_rtxtimer; 1539 } 1540 1541 /* the timer has expired, need to take action */ 1542 ilm->ilm_timer = INFINITY; 1543 ilm->ilm_state = IGMP_IREPORTEDLAST; 1544 if (ill->ill_mcast_type == MLD_V1_ROUTER) { 1545 mutex_exit(&ill->ill_lock); 1546 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL); 1547 mutex_enter(&ill->ill_lock); 1548 } else { 1549 slist_t *rsp; 1550 if (!SLIST_IS_EMPTY(ilm->ilm_pendsrcs) && 1551 (rsp = l_alloc()) != NULL) { 1552 /* 1553 * Contents of reply depend on pending 1554 * requested source list. 1555 */ 1556 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 1557 l_intersection(ilm->ilm_filter, 1558 ilm->ilm_pendsrcs, rsp); 1559 } else { 1560 l_difference(ilm->ilm_pendsrcs, 1561 ilm->ilm_filter, rsp); 1562 } 1563 FREE_SLIST(ilm->ilm_pendsrcs); 1564 ilm->ilm_pendsrcs = NULL; 1565 if (!SLIST_IS_EMPTY(rsp)) 1566 rp = mcast_bldmrec(MODE_IS_INCLUDE, 1567 &ilm->ilm_v6addr, rsp, rp); 1568 FREE_SLIST(rsp); 1569 } else { 1570 rp = mcast_bldmrec(ilm->ilm_fmode, 1571 &ilm->ilm_v6addr, ilm->ilm_filter, rp); 1572 } 1573 } 1574 1575 if (ip_debug > 1) { 1576 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 1577 "igmp_timo_hlr 1: ilm_timr %d elap %d " 1578 "typ %d nxt %d", 1579 (int)ntohl(ilm->ilm_timer), elapsed, 1580 (ill->ill_mcast_type), next); 1581 } 1582 1583 per_ilm_rtxtimer: 1584 rtxp = &ilm->ilm_rtx; 1585 1586 if (rtxp->rtx_timer == INFINITY) 1587 continue; 1588 if (rtxp->rtx_timer > elapsed) { 1589 rtxp->rtx_timer -= elapsed; 1590 if (rtxp->rtx_timer < next) 1591 next = rtxp->rtx_timer; 1592 continue; 1593 } 1594 1595 rtxp->rtx_timer = INFINITY; 1596 ilm->ilm_state = IGMP_IREPORTEDLAST; 1597 if (ill->ill_mcast_type == MLD_V1_ROUTER) { 1598 mutex_exit(&ill->ill_lock); 1599 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL); 1600 mutex_enter(&ill->ill_lock); 1601 continue; 1602 } 1603 1604 /* 1605 * The retransmit timer has popped, and our router is 1606 * MLDv2. We have to delve into the retransmit state 1607 * stored in the ilm. 1608 * 1609 * Decrement the retransmit count. If the fmode rtx 1610 * count is active, decrement it, and send a filter 1611 * mode change report with the ilm's source list. 1612 * Otherwise, send a source list change report with 1613 * the current retransmit lists. 1614 */ 1615 ASSERT(rtxp->rtx_cnt > 0); 1616 ASSERT(rtxp->rtx_cnt >= rtxp->rtx_fmode_cnt); 1617 rtxp->rtx_cnt--; 1618 if (rtxp->rtx_fmode_cnt > 0) { 1619 rtxp->rtx_fmode_cnt--; 1620 rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ? 1621 CHANGE_TO_INCLUDE : CHANGE_TO_EXCLUDE; 1622 rtxrp = mcast_bldmrec(rtype, &ilm->ilm_v6addr, 1623 ilm->ilm_filter, rtxrp); 1624 } else { 1625 rtxrp = mcast_bldmrec(ALLOW_NEW_SOURCES, 1626 &ilm->ilm_v6addr, rtxp->rtx_allow, rtxrp); 1627 rtxrp = mcast_bldmrec(BLOCK_OLD_SOURCES, 1628 &ilm->ilm_v6addr, rtxp->rtx_block, rtxrp); 1629 } 1630 if (rtxp->rtx_cnt > 0) { 1631 MCAST_RANDOM_DELAY(rtxp->rtx_timer, 1632 SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY)); 1633 if (rtxp->rtx_timer < next) 1634 next = rtxp->rtx_timer; 1635 } else { 1636 CLEAR_SLIST(rtxp->rtx_allow); 1637 CLEAR_SLIST(rtxp->rtx_block); 1638 } 1639 } 1640 1641 if (ill->ill_mcast_type == MLD_V2_ROUTER) { 1642 mutex_exit(&ill->ill_lock); 1643 mldv2_sendrpt(ill, rp); 1644 mldv2_sendrpt(ill, rtxrp); 1645 return (next); 1646 } 1647 1648 mutex_exit(&ill->ill_lock); 1649 1650 return (next); 1651 } 1652 1653 /* 1654 * mld_timeout_handler: 1655 * Called when there are timeout events, every next * TMEOUT_INTERVAL (tick). 1656 * Returns number of ticks to next event (or 0 if none). 1657 * MT issues are same as igmp_timeout_handler 1658 */ 1659 /* ARGSUSED */ 1660 void 1661 mld_timeout_handler(void *arg) 1662 { 1663 ill_t *ill; 1664 int elapsed; /* Since last call */ 1665 uint_t global_next = INFINITY; 1666 uint_t next; 1667 ill_walk_context_t ctx; 1668 boolean_t success; 1669 1670 mutex_enter(&mld_timer_lock); 1671 ASSERT(mld_timeout_id != 0); 1672 mld_timer_fired_last = ddi_get_lbolt(); 1673 elapsed = mld_time_to_next; 1674 mld_time_to_next = 0; 1675 mutex_exit(&mld_timer_lock); 1676 1677 rw_enter(&ill_g_lock, RW_READER); 1678 ill = ILL_START_WALK_V6(&ctx); 1679 for (; ill != NULL; ill = ill_next(&ctx, ill)) { 1680 ASSERT(ill->ill_isv6); 1681 /* 1682 * We may not be able to refhold the ill if the ill/ipif 1683 * is changing. But we need to make sure that the ill will 1684 * not vanish. So we just bump up the ill_waiter count. 1685 */ 1686 if (!ill_waiter_inc(ill)) 1687 continue; 1688 rw_exit(&ill_g_lock); 1689 success = ipsq_enter(ill, B_TRUE); 1690 if (success) { 1691 next = mld_timeout_handler_per_ill(ill, elapsed); 1692 if (next < global_next) 1693 global_next = next; 1694 ipsq_exit(ill->ill_phyint->phyint_ipsq, B_TRUE, 1695 B_FALSE); 1696 } 1697 rw_enter(&ill_g_lock, RW_READER); 1698 ill_waiter_dcr(ill); 1699 } 1700 rw_exit(&ill_g_lock); 1701 1702 mutex_enter(&mld_timer_lock); 1703 ASSERT(mld_timeout_id != 0); 1704 mld_timeout_id = 0; 1705 mutex_exit(&mld_timer_lock); 1706 1707 if (global_next != INFINITY) 1708 mld_start_timers(global_next); 1709 } 1710 1711 /* 1712 * Calculate the Older Version Querier Present timeout value, in number 1713 * of slowtimo intervals, for the given ill. 1714 */ 1715 #define OVQP(ill) \ 1716 ((1000 * (((ill)->ill_mcast_rv * (ill)->ill_mcast_qi) \ 1717 + MCAST_QUERY_RESP_INTERVAL)) / MCAST_SLOWTIMO_INTERVAL) 1718 1719 /* 1720 * igmp_slowtimo: 1721 * - Resets to new router if we didnt we hear from the router 1722 * in IGMP_AGE_THRESHOLD seconds. 1723 * - Resets slowtimeout. 1724 */ 1725 /* ARGSUSED */ 1726 void 1727 igmp_slowtimo(void *arg) 1728 { 1729 ill_t *ill; 1730 ill_if_t *ifp; 1731 avl_tree_t *avl_tree; 1732 1733 /* Hold the ill_g_lock so that we can safely walk the ill list */ 1734 rw_enter(&ill_g_lock, RW_READER); 1735 1736 /* 1737 * The ill_if_t list is circular, hence the odd loop parameters. 1738 * 1739 * We can't use the ILL_START_WALK and ill_next() wrappers for this 1740 * walk, as we need to check the illif_mcast_* fields in the ill_if_t 1741 * structure (allowing us to skip if none of the instances have timers 1742 * running). 1743 */ 1744 for (ifp = IP_V4_ILL_G_LIST; ifp != (ill_if_t *)&IP_V4_ILL_G_LIST; 1745 ifp = ifp->illif_next) { 1746 /* 1747 * illif_mcast_v[12] are set using atomics. If an ill hears 1748 * a V1 or V2 query now and we miss seeing the count now, 1749 * we will see it the next time igmp_slowtimo is called. 1750 */ 1751 if (ifp->illif_mcast_v1 == 0 && ifp->illif_mcast_v2 == 0) 1752 continue; 1753 1754 avl_tree = &ifp->illif_avl_by_ppa; 1755 for (ill = avl_first(avl_tree); ill != NULL; 1756 ill = avl_walk(avl_tree, ill, AVL_AFTER)) { 1757 mutex_enter(&ill->ill_lock); 1758 if (ill->ill_mcast_v1_tset == 1) 1759 ill->ill_mcast_v1_time++; 1760 if (ill->ill_mcast_v2_tset == 1) 1761 ill->ill_mcast_v2_time++; 1762 if (ill->ill_mcast_type == IGMP_V1_ROUTER) { 1763 if (ill->ill_mcast_v1_time >= OVQP(ill)) { 1764 if (ill->ill_mcast_v2_tset > 0) { 1765 ip1dbg(("V1 query timer " 1766 "expired on %s; switching " 1767 "mode to IGMP_V2\n", 1768 ill->ill_name)); 1769 ill->ill_mcast_type = 1770 IGMP_V2_ROUTER; 1771 } else { 1772 ip1dbg(("V1 query timer " 1773 "expired on %s; switching " 1774 "mode to IGMP_V3\n", 1775 ill->ill_name)); 1776 ill->ill_mcast_type = 1777 IGMP_V3_ROUTER; 1778 } 1779 ill->ill_mcast_v1_time = 0; 1780 ill->ill_mcast_v1_tset = 0; 1781 atomic_add_16(&ifp->illif_mcast_v1, -1); 1782 } 1783 } 1784 if (ill->ill_mcast_type == IGMP_V2_ROUTER) { 1785 if (ill->ill_mcast_v2_time >= OVQP(ill)) { 1786 ip1dbg(("V2 query timer expired on " 1787 "%s; switching mode to IGMP_V3\n", 1788 ill->ill_name)); 1789 ill->ill_mcast_type = IGMP_V3_ROUTER; 1790 ill->ill_mcast_v2_time = 0; 1791 ill->ill_mcast_v2_tset = 0; 1792 atomic_add_16(&ifp->illif_mcast_v2, -1); 1793 } 1794 } 1795 mutex_exit(&ill->ill_lock); 1796 } 1797 1798 } 1799 rw_exit(&ill_g_lock); 1800 mutex_enter(&igmp_slowtimeout_lock); 1801 igmp_slowtimeout_id = timeout(igmp_slowtimo, NULL, 1802 MSEC_TO_TICK(MCAST_SLOWTIMO_INTERVAL)); 1803 mutex_exit(&igmp_slowtimeout_lock); 1804 } 1805 1806 /* 1807 * mld_slowtimo: 1808 * - Resets to newer version if we didn't hear from the older version router 1809 * in MLD_AGE_THRESHOLD seconds. 1810 * - Restarts slowtimeout. 1811 */ 1812 /* ARGSUSED */ 1813 void 1814 mld_slowtimo(void *arg) 1815 { 1816 ill_t *ill; 1817 ill_if_t *ifp; 1818 avl_tree_t *avl_tree; 1819 1820 /* See comments in igmp_slowtimo() above... */ 1821 rw_enter(&ill_g_lock, RW_READER); 1822 for (ifp = IP_V6_ILL_G_LIST; ifp != (ill_if_t *)&IP_V6_ILL_G_LIST; 1823 ifp = ifp->illif_next) { 1824 1825 if (ifp->illif_mcast_v1 == 0) 1826 continue; 1827 1828 avl_tree = &ifp->illif_avl_by_ppa; 1829 for (ill = avl_first(avl_tree); ill != NULL; 1830 ill = avl_walk(avl_tree, ill, AVL_AFTER)) { 1831 mutex_enter(&ill->ill_lock); 1832 if (ill->ill_mcast_v1_tset == 1) 1833 ill->ill_mcast_v1_time++; 1834 if (ill->ill_mcast_type == MLD_V1_ROUTER) { 1835 if (ill->ill_mcast_v1_time >= OVQP(ill)) { 1836 ip1dbg(("MLD query timer expired on" 1837 " %s; switching mode to MLD_V2\n", 1838 ill->ill_name)); 1839 ill->ill_mcast_type = MLD_V2_ROUTER; 1840 ill->ill_mcast_v1_time = 0; 1841 ill->ill_mcast_v1_tset = 0; 1842 atomic_add_16(&ifp->illif_mcast_v1, -1); 1843 } 1844 } 1845 mutex_exit(&ill->ill_lock); 1846 } 1847 } 1848 rw_exit(&ill_g_lock); 1849 mutex_enter(&mld_slowtimeout_lock); 1850 mld_slowtimeout_id = timeout(mld_slowtimo, NULL, 1851 MSEC_TO_TICK(MCAST_SLOWTIMO_INTERVAL)); 1852 mutex_exit(&mld_slowtimeout_lock); 1853 } 1854 1855 /* 1856 * igmp_sendpkt: 1857 * This will send to ip_wput like icmp_inbound. 1858 * Note that the lower ill (on which the membership is kept) is used 1859 * as an upper ill to pass in the multicast parameters. 1860 */ 1861 static void 1862 igmp_sendpkt(ilm_t *ilm, uchar_t type, ipaddr_t addr) 1863 { 1864 mblk_t *mp; 1865 igmpa_t *igmpa; 1866 uint8_t *rtralert; 1867 ipha_t *ipha; 1868 int hdrlen = sizeof (ipha_t) + RTRALERT_LEN; 1869 size_t size = hdrlen + sizeof (igmpa_t); 1870 ipif_t *ipif = ilm->ilm_ipif; 1871 ill_t *ill = ipif->ipif_ill; /* Will be the "lower" ill */ 1872 mblk_t *first_mp; 1873 ipsec_out_t *io; 1874 1875 /* 1876 * We need to make sure this packet goes out on an ipif. If 1877 * there is some global policy match in ip_wput_ire, we need 1878 * to get to the right interface after IPSEC processing. 1879 * To make sure this multicast packet goes out on the right 1880 * interface, we attach an ipsec_out and initialize ill_index 1881 * like we did in ip_wput. To make sure that this packet does 1882 * not get forwarded on other interfaces or looped back, we 1883 * set ipsec_out_dontroute to B_TRUE and ipsec_out_multicast_loop 1884 * to B_FALSE. 1885 * 1886 * We also need to make sure that this does not get load balanced 1887 * if it hits ip_newroute_ipif. So, we initialize ipsec_out_attach_if 1888 * here. If it gets load balanced, switches supporting igmp snooping 1889 * will send the packet that it receives for this multicast group 1890 * to the interface that we are sending on. As we have joined the 1891 * multicast group on this ill, by sending the packet out on this 1892 * ill, we receive all the packets back on this ill. 1893 */ 1894 first_mp = allocb(sizeof (ipsec_info_t), BPRI_HI); 1895 if (first_mp == NULL) 1896 return; 1897 1898 first_mp->b_datap->db_type = M_CTL; 1899 first_mp->b_wptr += sizeof (ipsec_info_t); 1900 bzero(first_mp->b_rptr, sizeof (ipsec_info_t)); 1901 /* ipsec_out_secure is B_FALSE now */ 1902 io = (ipsec_out_t *)first_mp->b_rptr; 1903 io->ipsec_out_type = IPSEC_OUT; 1904 io->ipsec_out_len = sizeof (ipsec_out_t); 1905 io->ipsec_out_use_global_policy = B_TRUE; 1906 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 1907 io->ipsec_out_attach_if = B_TRUE; 1908 io->ipsec_out_multicast_loop = B_FALSE; 1909 io->ipsec_out_dontroute = B_TRUE; 1910 io->ipsec_out_zoneid = ilm->ilm_zoneid; 1911 1912 mp = allocb(size, BPRI_HI); 1913 if (mp == NULL) { 1914 freemsg(first_mp); 1915 return; 1916 } 1917 mp->b_wptr = mp->b_rptr + size; 1918 first_mp->b_cont = mp; 1919 1920 ipha = (ipha_t *)mp->b_rptr; 1921 rtralert = (uint8_t *)&(ipha[1]); 1922 igmpa = (igmpa_t *)&(rtralert[RTRALERT_LEN]); 1923 igmpa->igmpa_type = type; 1924 igmpa->igmpa_code = 0; 1925 igmpa->igmpa_group = ilm->ilm_addr; 1926 igmpa->igmpa_cksum = 0; 1927 igmpa->igmpa_cksum = IP_CSUM(mp, hdrlen, 0); 1928 if (igmpa->igmpa_cksum == 0) 1929 igmpa->igmpa_cksum = 0xffff; 1930 1931 rtralert[0] = IPOPT_COPY & IPOPT_RTRALERT; 1932 rtralert[1] = RTRALERT_LEN; 1933 rtralert[2] = 0; 1934 rtralert[3] = 0; 1935 1936 ipha->ipha_version_and_hdr_length = (IP_VERSION << 4) 1937 | (IP_SIMPLE_HDR_LENGTH_IN_WORDS + RTRALERT_LEN_IN_WORDS); 1938 ipha->ipha_type_of_service = 0; 1939 ipha->ipha_length = htons(size); 1940 ipha->ipha_ident = 0; 1941 ipha->ipha_fragment_offset_and_flags = 0; 1942 ipha->ipha_ttl = IGMP_TTL; 1943 ipha->ipha_protocol = IPPROTO_IGMP; 1944 ipha->ipha_hdr_checksum = 0; 1945 ipha->ipha_dst = addr ? addr : igmpa->igmpa_group; 1946 ipha->ipha_src = ipif->ipif_src_addr; 1947 /* 1948 * Request loopback of the report if we are acting as a multicast 1949 * router, so that the process-level routing demon can hear it. 1950 */ 1951 /* 1952 * This will run multiple times for the same group if there are members 1953 * on the same group for multiple ipif's on the same ill. The 1954 * igmp_input code will suppress this due to the loopback thus we 1955 * always loopback membership report. 1956 */ 1957 ASSERT(ill->ill_rq != NULL); 1958 ip_multicast_loopback(ill->ill_rq, ill, first_mp, 0, ilm->ilm_zoneid); 1959 1960 ip_wput_multicast(ill->ill_wq, first_mp, ipif); 1961 1962 ++igmpstat.igps_snd_reports; 1963 } 1964 1965 /* 1966 * Sends an IGMP_V3_MEMBERSHIP_REPORT message out the ill associated 1967 * with the passed-in ipif. The report will contain one group record 1968 * for each element of reclist. If this causes packet length to 1969 * exceed ipif->ipif_ill->ill_max_frag, multiple reports are sent. 1970 * reclist is assumed to be made up of buffers allocated by mcast_bldmrec(), 1971 * and those buffers are freed here. 1972 */ 1973 static void 1974 igmpv3_sendrpt(ipif_t *ipif, mrec_t *reclist) 1975 { 1976 ipsec_out_t *io; 1977 igmp3ra_t *igmp3ra; 1978 grphdra_t *grphdr; 1979 mblk_t *first_mp, *mp; 1980 ipha_t *ipha; 1981 uint8_t *rtralert; 1982 ipaddr_t *src_array; 1983 int i, j, numrec, more_src_cnt; 1984 size_t hdrsize, size, rsize; 1985 ill_t *ill = ipif->ipif_ill; 1986 mrec_t *rp, *cur_reclist; 1987 mrec_t *next_reclist = reclist; 1988 boolean_t morepkts; 1989 1990 /* if there aren't any records, there's nothing to send */ 1991 if (reclist == NULL) 1992 return; 1993 1994 hdrsize = sizeof (ipha_t) + RTRALERT_LEN; 1995 nextpkt: 1996 size = hdrsize + sizeof (igmp3ra_t); 1997 morepkts = B_FALSE; 1998 more_src_cnt = 0; 1999 cur_reclist = next_reclist; 2000 numrec = 0; 2001 for (rp = cur_reclist; rp != NULL; rp = rp->mrec_next) { 2002 rsize = sizeof (grphdra_t) + 2003 (rp->mrec_srcs.sl_numsrc * sizeof (ipaddr_t)); 2004 if (size + rsize > ill->ill_max_frag) { 2005 if (rp == cur_reclist) { 2006 /* 2007 * If the first mrec we looked at is too big 2008 * to fit in a single packet (i.e the source 2009 * list is too big), we must either truncate 2010 * the list (if TO_EX or IS_EX), or send 2011 * multiple reports for the same group (all 2012 * other types). 2013 */ 2014 int srcspace, srcsperpkt; 2015 srcspace = ill->ill_max_frag - (size + 2016 sizeof (grphdra_t)); 2017 srcsperpkt = srcspace / sizeof (ipaddr_t); 2018 /* 2019 * Increment size and numrec, because we will 2020 * be sending a record for the mrec we're 2021 * looking at now. 2022 */ 2023 size += sizeof (grphdra_t) + 2024 (srcsperpkt * sizeof (ipaddr_t)); 2025 numrec++; 2026 if (rp->mrec_type == MODE_IS_EXCLUDE || 2027 rp->mrec_type == CHANGE_TO_EXCLUDE) { 2028 rp->mrec_srcs.sl_numsrc = srcsperpkt; 2029 if (rp->mrec_next == NULL) { 2030 /* no more packets to send */ 2031 break; 2032 } else { 2033 /* 2034 * more packets, but we're 2035 * done with this mrec. 2036 */ 2037 next_reclist = rp->mrec_next; 2038 } 2039 } else { 2040 more_src_cnt = rp->mrec_srcs.sl_numsrc 2041 - srcsperpkt; 2042 rp->mrec_srcs.sl_numsrc = srcsperpkt; 2043 /* 2044 * We'll fix up this mrec (remove the 2045 * srcs we've already sent) before 2046 * returning to nextpkt above. 2047 */ 2048 next_reclist = rp; 2049 } 2050 } else { 2051 next_reclist = rp; 2052 } 2053 morepkts = B_TRUE; 2054 break; 2055 } 2056 size += rsize; 2057 numrec++; 2058 } 2059 2060 /* 2061 * See comments in igmp_sendpkt() about initializing for ipsec and 2062 * load balancing requirements. 2063 */ 2064 first_mp = allocb(sizeof (ipsec_info_t), BPRI_HI); 2065 if (first_mp == NULL) 2066 goto free_reclist; 2067 2068 first_mp->b_datap->db_type = M_CTL; 2069 first_mp->b_wptr += sizeof (ipsec_info_t); 2070 bzero(first_mp->b_rptr, sizeof (ipsec_info_t)); 2071 /* ipsec_out_secure is B_FALSE now */ 2072 io = (ipsec_out_t *)first_mp->b_rptr; 2073 io->ipsec_out_type = IPSEC_OUT; 2074 io->ipsec_out_len = sizeof (ipsec_out_t); 2075 io->ipsec_out_use_global_policy = B_TRUE; 2076 io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex; 2077 io->ipsec_out_attach_if = B_TRUE; 2078 io->ipsec_out_multicast_loop = B_FALSE; 2079 io->ipsec_out_dontroute = B_TRUE; 2080 io->ipsec_out_zoneid = ipif->ipif_zoneid; 2081 2082 mp = allocb(size, BPRI_HI); 2083 if (mp == NULL) { 2084 freemsg(first_mp); 2085 goto free_reclist; 2086 } 2087 bzero((char *)mp->b_rptr, size); 2088 mp->b_wptr = (uchar_t *)(mp->b_rptr + size); 2089 first_mp->b_cont = mp; 2090 2091 ipha = (ipha_t *)mp->b_rptr; 2092 rtralert = (uint8_t *)&(ipha[1]); 2093 igmp3ra = (igmp3ra_t *)&(rtralert[RTRALERT_LEN]); 2094 grphdr = (grphdra_t *)&(igmp3ra[1]); 2095 2096 rp = cur_reclist; 2097 for (i = 0; i < numrec; i++) { 2098 grphdr->grphdra_type = rp->mrec_type; 2099 grphdr->grphdra_numsrc = htons(rp->mrec_srcs.sl_numsrc); 2100 grphdr->grphdra_group = V4_PART_OF_V6(rp->mrec_group); 2101 src_array = (ipaddr_t *)&(grphdr[1]); 2102 2103 for (j = 0; j < rp->mrec_srcs.sl_numsrc; j++) 2104 src_array[j] = V4_PART_OF_V6(rp->mrec_srcs.sl_addr[j]); 2105 2106 grphdr = (grphdra_t *)&(src_array[j]); 2107 rp = rp->mrec_next; 2108 } 2109 2110 igmp3ra->igmp3ra_type = IGMP_V3_MEMBERSHIP_REPORT; 2111 igmp3ra->igmp3ra_numrec = htons(numrec); 2112 igmp3ra->igmp3ra_cksum = IP_CSUM(mp, hdrsize, 0); 2113 2114 rtralert[0] = IPOPT_COPY & IPOPT_RTRALERT; 2115 rtralert[1] = RTRALERT_LEN; 2116 rtralert[2] = 0; 2117 rtralert[3] = 0; 2118 2119 ipha->ipha_version_and_hdr_length = IP_VERSION << 4 2120 | (IP_SIMPLE_HDR_LENGTH_IN_WORDS + RTRALERT_LEN_IN_WORDS); 2121 ipha->ipha_type_of_service = IPTOS_PREC_INTERNETCONTROL; 2122 ipha->ipha_length = htons(size); 2123 ipha->ipha_ttl = IGMP_TTL; 2124 ipha->ipha_protocol = IPPROTO_IGMP; 2125 ipha->ipha_dst = htonl(INADDR_ALLRPTS_GROUP); 2126 ipha->ipha_src = ipif->ipif_src_addr; 2127 2128 /* 2129 * Request loopback of the report if we are acting as a multicast 2130 * router, so that the process-level routing daemon can hear it. 2131 * 2132 * This will run multiple times for the same group if there are 2133 * members on the same group for multiple ipifs on the same ill. 2134 * The igmp_input code will suppress this due to the loopback; 2135 * thus we always loopback membership report. 2136 */ 2137 ASSERT(ill->ill_rq != NULL); 2138 ip_multicast_loopback(ill->ill_rq, ill, mp, 0, ipif->ipif_zoneid); 2139 2140 ip_wput_multicast(ill->ill_wq, first_mp, ipif); 2141 2142 ++igmpstat.igps_snd_reports; 2143 2144 if (morepkts) { 2145 if (more_src_cnt > 0) { 2146 int index, mvsize; 2147 slist_t *sl = &next_reclist->mrec_srcs; 2148 index = sl->sl_numsrc; 2149 mvsize = more_src_cnt * sizeof (in6_addr_t); 2150 (void) memmove(&sl->sl_addr[0], &sl->sl_addr[index], 2151 mvsize); 2152 sl->sl_numsrc = more_src_cnt; 2153 } 2154 goto nextpkt; 2155 } 2156 2157 free_reclist: 2158 while (reclist != NULL) { 2159 rp = reclist->mrec_next; 2160 mi_free(reclist); 2161 reclist = rp; 2162 } 2163 } 2164 2165 /* 2166 * mld_input: 2167 */ 2168 /* ARGSUSED */ 2169 void 2170 mld_input(queue_t *q, mblk_t *mp, ill_t *ill) 2171 { 2172 ip6_t *ip6h = (ip6_t *)(mp->b_rptr); 2173 mld_hdr_t *mldh; 2174 ilm_t *ilm; 2175 ipif_t *ipif; 2176 uint16_t hdr_length, exthdr_length; 2177 in6_addr_t *v6group_ptr, *lcladdr_ptr; 2178 uint_t next; 2179 int mldlen; 2180 2181 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembTotal); 2182 2183 /* Make sure the src address of the packet is link-local */ 2184 if (!(IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src))) { 2185 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 2186 freemsg(mp); 2187 return; 2188 } 2189 2190 if (ip6h->ip6_hlim != 1) { 2191 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpBadHoplimit); 2192 freemsg(mp); 2193 return; 2194 } 2195 2196 /* Get to the icmp header part */ 2197 if (ip6h->ip6_nxt != IPPROTO_ICMPV6) { 2198 hdr_length = ip_hdr_length_v6(mp, ip6h); 2199 exthdr_length = hdr_length - IPV6_HDR_LEN; 2200 } else { 2201 hdr_length = IPV6_HDR_LEN; 2202 exthdr_length = 0; 2203 } 2204 mldlen = ntohs(ip6h->ip6_plen) - exthdr_length; 2205 2206 /* An MLD packet must at least be 24 octets to be valid */ 2207 if (mldlen < MLD_MINLEN) { 2208 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 2209 freemsg(mp); 2210 return; 2211 } 2212 2213 mldh = (mld_hdr_t *)(&mp->b_rptr[hdr_length]); 2214 2215 switch (mldh->mld_type) { 2216 case MLD_LISTENER_QUERY: 2217 /* 2218 * packet length differentiates between v1 and v2. v1 2219 * query should be exactly 24 octets long; v2 is >= 28. 2220 */ 2221 if (mldlen == MLD_MINLEN) { 2222 next = mld_query_in(mldh, ill); 2223 } else if (mldlen >= MLD_V2_QUERY_MINLEN) { 2224 next = mldv2_query_in((mld2q_t *)mldh, ill, mldlen); 2225 } else { 2226 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 2227 freemsg(mp); 2228 return; 2229 } 2230 if (next == 0) { 2231 freemsg(mp); 2232 return; 2233 } 2234 2235 if (next != INFINITY) 2236 mld_start_timers(next); 2237 break; 2238 2239 case MLD_LISTENER_REPORT: { 2240 2241 ASSERT(ill->ill_ipif != NULL); 2242 /* 2243 * For fast leave to work, we have to know that we are the 2244 * last person to send a report for this group. Reports 2245 * generated by us are looped back since we could potentially 2246 * be a multicast router, so discard reports sourced by me. 2247 */ 2248 lcladdr_ptr = &(ill->ill_ipif->ipif_v6subnet); 2249 mutex_enter(&ill->ill_lock); 2250 for (ipif = ill->ill_ipif; ipif != NULL; 2251 ipif = ipif->ipif_next) { 2252 if (IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr, 2253 lcladdr_ptr)) { 2254 if (ip_debug > 1) { 2255 char buf1[INET6_ADDRSTRLEN]; 2256 char buf2[INET6_ADDRSTRLEN]; 2257 2258 (void) mi_strlog(ill->ill_rq, 2259 1, 2260 SL_TRACE, 2261 "mld_input: we are only " 2262 "member src %s ipif_local %s", 2263 inet_ntop(AF_INET6, lcladdr_ptr, 2264 buf1, sizeof (buf1)), 2265 inet_ntop(AF_INET6, 2266 &ipif->ipif_v6lcl_addr, 2267 buf2, sizeof (buf2))); 2268 } 2269 mutex_exit(&ill->ill_lock); 2270 freemsg(mp); 2271 return; 2272 } 2273 } 2274 mutex_exit(&ill->ill_lock); 2275 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembResponses); 2276 2277 v6group_ptr = &mldh->mld_addr; 2278 if (!IN6_IS_ADDR_MULTICAST(v6group_ptr)) { 2279 BUMP_MIB(ill->ill_icmp6_mib, 2280 ipv6IfIcmpInGroupMembBadReports); 2281 freemsg(mp); 2282 return; 2283 } 2284 2285 2286 /* 2287 * If we belong to the group being reported, and we are a 2288 * 'Delaying member' per the RFC terminology, stop our timer 2289 * for that group and 'clear flag' i.e. mark ilm_state as 2290 * IGMP_OTHERMEMBER. With zones, there can be multiple group 2291 * membership entries for the same group address (one per zone) 2292 * so we need to walk the ill_ilm list. 2293 */ 2294 mutex_enter(&ill->ill_lock); 2295 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 2296 if (!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group_ptr)) 2297 continue; 2298 BUMP_MIB(ill->ill_icmp6_mib, 2299 ipv6IfIcmpInGroupMembOurReports); 2300 2301 ilm->ilm_timer = INFINITY; 2302 ilm->ilm_state = IGMP_OTHERMEMBER; 2303 } 2304 mutex_exit(&ill->ill_lock); 2305 break; 2306 } 2307 case MLD_LISTENER_REDUCTION: 2308 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembReductions); 2309 break; 2310 } 2311 /* 2312 * All MLD packets have already been passed up to any 2313 * process(es) listening on a ICMP6 raw socket. This 2314 * has been accomplished in ip_deliver_local_v6 prior to 2315 * this function call. It is assumed that the multicast daemon 2316 * will have a SOCK_RAW IPPROTO_ICMPV6 (and presumbly use the 2317 * ICMP6_FILTER socket option to only receive the MLD messages) 2318 * Thus we can free the MLD message block here 2319 */ 2320 freemsg(mp); 2321 } 2322 2323 /* 2324 * Handles an MLDv1 Listener Query. Returns 0 on error, or the appropriate 2325 * (non-zero, unsigned) timer value to be set on success. 2326 */ 2327 static uint_t 2328 mld_query_in(mld_hdr_t *mldh, ill_t *ill) 2329 { 2330 ilm_t *ilm; 2331 int timer; 2332 uint_t next; 2333 in6_addr_t *v6group; 2334 2335 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembQueries); 2336 2337 /* 2338 * In the MLD specification, there are 3 states and a flag. 2339 * 2340 * In Non-Listener state, we simply don't have a membership record. 2341 * In Delaying state, our timer is running (ilm->ilm_timer < INFINITY) 2342 * In Idle Member state, our timer is not running (ilm->ilm_timer == 2343 * INFINITY) 2344 * 2345 * The flag is ilm->ilm_state, it is set to IGMP_OTHERMEMBER if 2346 * we have heard a report from another member, or IGMP_IREPORTEDLAST 2347 * if I sent the last report. 2348 */ 2349 v6group = &mldh->mld_addr; 2350 if (!(IN6_IS_ADDR_UNSPECIFIED(v6group)) && 2351 ((!IN6_IS_ADDR_MULTICAST(v6group)))) { 2352 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembBadQueries); 2353 return (0); 2354 } 2355 2356 /* Need to do compatibility mode checking */ 2357 mutex_enter(&ill->ill_lock); 2358 ill->ill_mcast_v1_time = 0; 2359 ill->ill_mcast_v1_tset = 1; 2360 if (ill->ill_mcast_type == MLD_V2_ROUTER) { 2361 ip1dbg(("Received MLDv1 Query on %s, switching mode to " 2362 "MLD_V1_ROUTER\n", ill->ill_name)); 2363 atomic_add_16(&ill->ill_ifptr->illif_mcast_v1, 1); 2364 ill->ill_mcast_type = MLD_V1_ROUTER; 2365 } 2366 mutex_exit(&ill->ill_lock); 2367 2368 timer = (int)ntohs(mldh->mld_maxdelay); 2369 if (ip_debug > 1) { 2370 (void) mi_strlog(ill->ill_rq, 1, SL_TRACE, 2371 "mld_input: TIMER = mld_maxdelay %d mld_type 0x%x", 2372 timer, (int)mldh->mld_type); 2373 } 2374 2375 /* 2376 * -Start the timers in all of our membership records for 2377 * the physical interface on which the query arrived, 2378 * excl: 2379 * 1. those that belong to the "all hosts" group, 2380 * 2. those with 0 scope, or 1 node-local scope. 2381 * 2382 * -Restart any timer that is already running but has a value 2383 * longer that the requested timeout. 2384 * -Use the value specified in the query message as the 2385 * maximum timeout. 2386 */ 2387 next = INFINITY; 2388 mutex_enter(&ill->ill_lock); 2389 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 2390 ASSERT(!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr)); 2391 2392 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr) || 2393 IN6_IS_ADDR_MC_NODELOCAL(&ilm->ilm_v6addr) || 2394 IN6_IS_ADDR_MC_RESERVED(&ilm->ilm_v6addr)) 2395 continue; 2396 if ((!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, 2397 &ipv6_all_hosts_mcast)) && 2398 (IN6_IS_ADDR_UNSPECIFIED(v6group)) || 2399 (IN6_ARE_ADDR_EQUAL(v6group, &ilm->ilm_v6addr))) { 2400 if (timer == 0) { 2401 /* Respond immediately */ 2402 ilm->ilm_timer = INFINITY; 2403 ilm->ilm_state = IGMP_IREPORTEDLAST; 2404 mutex_exit(&ill->ill_lock); 2405 mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL); 2406 mutex_enter(&ill->ill_lock); 2407 break; 2408 } 2409 if (ilm->ilm_timer > timer) { 2410 MCAST_RANDOM_DELAY(ilm->ilm_timer, timer); 2411 if (ilm->ilm_timer < next) 2412 next = ilm->ilm_timer; 2413 } 2414 break; 2415 } 2416 } 2417 mutex_exit(&ill->ill_lock); 2418 2419 return (next); 2420 } 2421 2422 /* 2423 * Handles an MLDv2 Listener Query. On error, returns 0; on success, 2424 * returns the appropriate (non-zero, unsigned) timer value (which may 2425 * be INFINITY) to be set. 2426 */ 2427 static uint_t 2428 mldv2_query_in(mld2q_t *mld2q, ill_t *ill, int mldlen) 2429 { 2430 ilm_t *ilm; 2431 in6_addr_t *v6group, *src_array; 2432 uint_t next, numsrc, i, mrd, delay, qqi; 2433 uint8_t qrv; 2434 2435 v6group = &mld2q->mld2q_addr; 2436 numsrc = ntohs(mld2q->mld2q_numsrc); 2437 2438 /* make sure numsrc matches packet size */ 2439 if (mldlen < MLD_V2_QUERY_MINLEN + (numsrc * sizeof (in6_addr_t))) { 2440 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors); 2441 return (0); 2442 } 2443 src_array = (in6_addr_t *)&mld2q[1]; 2444 2445 BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembQueries); 2446 2447 /* extract Maximum Response Delay from code in header */ 2448 mrd = ntohs(mld2q->mld2q_mxrc); 2449 if (mrd >= MLD_V2_MAXRT_FPMIN) { 2450 uint_t hdrval, mant, exp; 2451 hdrval = mrd; 2452 mant = hdrval & MLD_V2_MAXRT_MANT_MASK; 2453 exp = (hdrval & MLD_V2_MAXRT_EXP_MASK) >> 12; 2454 mrd = (mant | 0x1000) << (exp + 3); 2455 } 2456 MCAST_RANDOM_DELAY(delay, mrd); 2457 next = (unsigned)INFINITY; 2458 2459 if ((qrv = mld2q->mld2q_sqrv & MLD_V2_RV_MASK) == 0) 2460 ill->ill_mcast_rv = MCAST_DEF_ROBUSTNESS; 2461 else 2462 ill->ill_mcast_rv = qrv; 2463 2464 if ((qqi = (uint_t)mld2q->mld2q_qqic) >= MLD_V2_QQI_FPMIN) { 2465 uint_t mant, exp; 2466 mant = qqi & MLD_V2_QQI_MANT_MASK; 2467 exp = (qqi & MLD_V2_QQI_EXP_MASK) >> 12; 2468 qqi = (mant | 0x10) << (exp + 3); 2469 } 2470 ill->ill_mcast_qi = (qqi == 0) ? MCAST_DEF_QUERY_INTERVAL : qqi; 2471 2472 /* 2473 * If we have a pending general query response that's scheduled 2474 * sooner than the delay we calculated for this response, then 2475 * no action is required (MLDv2 draft section 6.2 rule 1) 2476 */ 2477 mutex_enter(&ill->ill_lock); 2478 if (ill->ill_global_timer < delay) { 2479 mutex_exit(&ill->ill_lock); 2480 return (next); 2481 } 2482 mutex_exit(&ill->ill_lock); 2483 2484 /* 2485 * Now take action depending on query type: general, 2486 * group specific, or group/source specific. 2487 */ 2488 if ((numsrc == 0) && IN6_IS_ADDR_UNSPECIFIED(v6group)) { 2489 /* 2490 * general query 2491 * We know global timer is either not running or is 2492 * greater than our calculated delay, so reset it to 2493 * our delay (random value in range [0, response time]) 2494 */ 2495 mutex_enter(&ill->ill_lock); 2496 ill->ill_global_timer = delay; 2497 next = ill->ill_global_timer; 2498 mutex_exit(&ill->ill_lock); 2499 2500 } else { 2501 /* group or group/source specific query */ 2502 mutex_enter(&ill->ill_lock); 2503 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 2504 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr) || 2505 IN6_IS_ADDR_MC_NODELOCAL(&ilm->ilm_v6addr) || 2506 IN6_IS_ADDR_MC_RESERVED(&ilm->ilm_v6addr) || 2507 !IN6_ARE_ADDR_EQUAL(v6group, &ilm->ilm_v6addr)) 2508 continue; 2509 2510 /* 2511 * If the query is group specific or we have a 2512 * pending group specific query, the response is 2513 * group specific (pending sources list should be 2514 * empty). Otherwise, need to update the pending 2515 * sources list for the group and source specific 2516 * response. 2517 */ 2518 if (numsrc == 0 || (ilm->ilm_timer < INFINITY && 2519 SLIST_IS_EMPTY(ilm->ilm_pendsrcs))) { 2520 group_query: 2521 FREE_SLIST(ilm->ilm_pendsrcs); 2522 ilm->ilm_pendsrcs = NULL; 2523 } else { 2524 boolean_t overflow; 2525 slist_t *pktl; 2526 if (numsrc > MAX_FILTER_SIZE || 2527 (ilm->ilm_pendsrcs == NULL && 2528 (ilm->ilm_pendsrcs = l_alloc()) == NULL)) { 2529 /* 2530 * We've been sent more sources than 2531 * we can deal with; or we can't deal 2532 * with a source list at all. Revert 2533 * to a group specific query. 2534 */ 2535 goto group_query; 2536 } 2537 if ((pktl = l_alloc()) == NULL) 2538 goto group_query; 2539 pktl->sl_numsrc = numsrc; 2540 for (i = 0; i < numsrc; i++) 2541 pktl->sl_addr[i] = src_array[i]; 2542 l_union_in_a(ilm->ilm_pendsrcs, pktl, 2543 &overflow); 2544 l_free(pktl); 2545 if (overflow) 2546 goto group_query; 2547 } 2548 /* set timer to soonest value */ 2549 ilm->ilm_timer = MIN(ilm->ilm_timer, delay); 2550 if (ilm->ilm_timer < next) 2551 next = ilm->ilm_timer; 2552 break; 2553 } 2554 mutex_exit(&ill->ill_lock); 2555 } 2556 2557 return (next); 2558 } 2559 2560 /* 2561 * Send MLDv1 response packet with hoplimit 1 2562 */ 2563 static void 2564 mld_sendpkt(ilm_t *ilm, uchar_t type, const in6_addr_t *v6addr) 2565 { 2566 mblk_t *mp; 2567 mld_hdr_t *mldh; 2568 ip6_t *ip6h; 2569 ip6_hbh_t *ip6hbh; 2570 struct ip6_opt_router *ip6router; 2571 size_t size = IPV6_HDR_LEN + sizeof (mld_hdr_t); 2572 ill_t *ill = ilm->ilm_ill; /* Will be the "lower" ill */ 2573 ipif_t *ipif; 2574 ip6i_t *ip6i; 2575 2576 /* 2577 * We need to place a router alert option in this packet. The length 2578 * of the options must be a multiple of 8. The hbh option header is 2 2579 * bytes followed by the 4 byte router alert option. That leaves 2580 * 2 bytes of pad for a total of 8 bytes. 2581 */ 2582 const int router_alert_length = 8; 2583 2584 ASSERT(ill->ill_isv6); 2585 2586 /* 2587 * We need to make sure that this packet does not get load balanced. 2588 * So, we allocate an ip6i_t and set ATTACH_IF. ip_wput_v6 and 2589 * ip_newroute_ipif_v6 knows how to handle such packets. 2590 * If it gets load balanced, switches supporting MLD snooping 2591 * (in the future) will send the packet that it receives for this 2592 * multicast group to the interface that we are sending on. As we have 2593 * joined the multicast group on this ill, by sending the packet out 2594 * on this ill, we receive all the packets back on this ill. 2595 */ 2596 size += sizeof (ip6i_t) + router_alert_length; 2597 mp = allocb(size, BPRI_HI); 2598 if (mp == NULL) 2599 return; 2600 bzero(mp->b_rptr, size); 2601 mp->b_wptr = mp->b_rptr + size; 2602 2603 ip6i = (ip6i_t *)mp->b_rptr; 2604 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2605 ip6i->ip6i_nxt = IPPROTO_RAW; 2606 ip6i->ip6i_flags = IP6I_ATTACH_IF | IP6I_HOPLIMIT; 2607 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 2608 2609 ip6h = (ip6_t *)&ip6i[1]; 2610 ip6hbh = (struct ip6_hbh *)&ip6h[1]; 2611 ip6router = (struct ip6_opt_router *)&ip6hbh[1]; 2612 /* 2613 * A zero is a pad option of length 1. The bzero of the whole packet 2614 * above will pad between ip6router and mld. 2615 */ 2616 mldh = (mld_hdr_t *)((uint8_t *)ip6hbh + router_alert_length); 2617 2618 mldh->mld_type = type; 2619 mldh->mld_addr = ilm->ilm_v6addr; 2620 2621 ip6router->ip6or_type = IP6OPT_ROUTER_ALERT; 2622 ip6router->ip6or_len = 2; 2623 ip6router->ip6or_value[0] = 0; 2624 ip6router->ip6or_value[1] = IP6_ALERT_MLD; 2625 2626 ip6hbh->ip6h_nxt = IPPROTO_ICMPV6; 2627 ip6hbh->ip6h_len = 0; 2628 2629 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2630 ip6h->ip6_plen = htons(sizeof (*mldh) + router_alert_length); 2631 ip6h->ip6_nxt = IPPROTO_HOPOPTS; 2632 ip6h->ip6_hops = MLD_HOP_LIMIT; 2633 if (v6addr == NULL) 2634 ip6h->ip6_dst = ilm->ilm_v6addr; 2635 else 2636 ip6h->ip6_dst = *v6addr; 2637 2638 /* ipif returned by ipif_lookup_zoneid is link-local (if present) */ 2639 if (ipif_lookup_zoneid(ill, ilm->ilm_zoneid, IPIF_UP, &ipif)) { 2640 ip6h->ip6_src = ipif->ipif_v6src_addr; 2641 ipif_refrele(ipif); 2642 } else { 2643 /* Otherwise, use IPv6 default address selection. */ 2644 ip6h->ip6_src = ipv6_all_zeros; 2645 } 2646 2647 /* 2648 * Prepare for checksum by putting icmp length in the icmp 2649 * checksum field. The checksum is calculated in ip_wput_v6. 2650 */ 2651 mldh->mld_cksum = htons(sizeof (*mldh)); 2652 2653 /* 2654 * ip_wput will automatically loopback the multicast packet to 2655 * the conn if multicast loopback is enabled. 2656 * The MIB stats corresponding to this outgoing MLD packet 2657 * will be accounted for in ip_wput->ip_wput_v6->ip_wput_ire_v6 2658 * ->icmp_update_out_mib_v6 function call. 2659 */ 2660 (void) ip_output_v6(NULL, mp, ill->ill_wq, IP_WPUT); 2661 } 2662 2663 /* 2664 * Sends an MLD_V2_LISTENER_REPORT message out the passed-in ill. The 2665 * report will contain one multicast address record for each element of 2666 * reclist. If this causes packet length to exceed ill->ill_max_frag, 2667 * multiple reports are sent. reclist is assumed to be made up of 2668 * buffers allocated by mcast_bldmrec(), and those buffers are freed here. 2669 */ 2670 static void 2671 mldv2_sendrpt(ill_t *ill, mrec_t *reclist) 2672 { 2673 mblk_t *mp; 2674 mld2r_t *mld2r; 2675 mld2mar_t *mld2mar; 2676 in6_addr_t *srcarray; 2677 ip6_t *ip6h; 2678 ip6_hbh_t *ip6hbh; 2679 ip6i_t *ip6i; 2680 struct ip6_opt_router *ip6router; 2681 size_t size, optlen, padlen, icmpsize, rsize; 2682 ipif_t *ipif; 2683 int i, numrec, more_src_cnt; 2684 mrec_t *rp, *cur_reclist; 2685 mrec_t *next_reclist = reclist; 2686 boolean_t morepkts; 2687 2688 /* If there aren't any records, there's nothing to send */ 2689 if (reclist == NULL) 2690 return; 2691 2692 ASSERT(ill->ill_isv6); 2693 2694 /* 2695 * Total option length (optlen + padlen) must be a multiple of 2696 * 8 bytes. We assume here that optlen <= 8, so the total option 2697 * length will be 8. Assert this in case anything ever changes. 2698 */ 2699 optlen = sizeof (ip6_hbh_t) + sizeof (struct ip6_opt_router); 2700 ASSERT(optlen <= 8); 2701 padlen = 8 - optlen; 2702 nextpkt: 2703 icmpsize = sizeof (mld2r_t); 2704 size = IPV6_HDR_LEN + optlen + padlen + icmpsize; 2705 morepkts = B_FALSE; 2706 more_src_cnt = 0; 2707 for (rp = cur_reclist = next_reclist, numrec = 0; rp != NULL; 2708 rp = rp->mrec_next, numrec++) { 2709 rsize = sizeof (mld2mar_t) + 2710 (rp->mrec_srcs.sl_numsrc * sizeof (in6_addr_t)); 2711 if (size + rsize > ill->ill_max_frag) { 2712 if (rp == cur_reclist) { 2713 /* 2714 * If the first mrec we looked at is too big 2715 * to fit in a single packet (i.e the source 2716 * list is too big), we must either truncate 2717 * the list (if TO_EX or IS_EX), or send 2718 * multiple reports for the same group (all 2719 * other types). 2720 */ 2721 int srcspace, srcsperpkt; 2722 srcspace = ill->ill_max_frag - 2723 (size + sizeof (mld2mar_t)); 2724 srcsperpkt = srcspace / sizeof (in6_addr_t); 2725 /* 2726 * Increment icmpsize and size, because we will 2727 * be sending a record for the mrec we're 2728 * looking at now. 2729 */ 2730 rsize = sizeof (mld2mar_t) + 2731 (srcsperpkt * sizeof (in6_addr_t)); 2732 icmpsize += rsize; 2733 size += rsize; 2734 if (rp->mrec_type == MODE_IS_EXCLUDE || 2735 rp->mrec_type == CHANGE_TO_EXCLUDE) { 2736 rp->mrec_srcs.sl_numsrc = srcsperpkt; 2737 if (rp->mrec_next == NULL) { 2738 /* no more packets to send */ 2739 break; 2740 } else { 2741 /* 2742 * more packets, but we're 2743 * done with this mrec. 2744 */ 2745 next_reclist = rp->mrec_next; 2746 } 2747 } else { 2748 more_src_cnt = rp->mrec_srcs.sl_numsrc 2749 - srcsperpkt; 2750 rp->mrec_srcs.sl_numsrc = srcsperpkt; 2751 /* 2752 * We'll fix up this mrec (remove the 2753 * srcs we've already sent) before 2754 * returning to nextpkt above. 2755 */ 2756 next_reclist = rp; 2757 } 2758 } else { 2759 next_reclist = rp; 2760 } 2761 morepkts = B_TRUE; 2762 break; 2763 } 2764 icmpsize += rsize; 2765 size += rsize; 2766 } 2767 2768 /* 2769 * We need to make sure that this packet does not get load balanced. 2770 * So, we allocate an ip6i_t and set ATTACH_IF. ip_wput_v6 and 2771 * ip_newroute_ipif_v6 know how to handle such packets. 2772 * If it gets load balanced, switches supporting MLD snooping 2773 * (in the future) will send the packet that it receives for this 2774 * multicast group to the interface that we are sending on. As we have 2775 * joined the multicast group on this ill, by sending the packet out 2776 * on this ill, we receive all the packets back on this ill. 2777 */ 2778 size += sizeof (ip6i_t); 2779 mp = allocb(size, BPRI_HI); 2780 if (mp == NULL) 2781 goto free_reclist; 2782 bzero(mp->b_rptr, size); 2783 mp->b_wptr = mp->b_rptr + size; 2784 2785 ip6i = (ip6i_t *)mp->b_rptr; 2786 ip6i->ip6i_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2787 ip6i->ip6i_nxt = IPPROTO_RAW; 2788 ip6i->ip6i_flags = IP6I_ATTACH_IF; 2789 ip6i->ip6i_ifindex = ill->ill_phyint->phyint_ifindex; 2790 2791 ip6h = (ip6_t *)&(ip6i[1]); 2792 ip6hbh = (ip6_hbh_t *)&(ip6h[1]); 2793 ip6router = (struct ip6_opt_router *)&(ip6hbh[1]); 2794 mld2r = (mld2r_t *)((uint8_t *)ip6hbh + optlen + padlen); 2795 mld2mar = (mld2mar_t *)&(mld2r[1]); 2796 2797 ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW; 2798 ip6h->ip6_plen = htons(optlen + padlen + icmpsize); 2799 ip6h->ip6_nxt = IPPROTO_HOPOPTS; 2800 ip6h->ip6_hops = MLD_HOP_LIMIT; 2801 ip6h->ip6_dst = ipv6_all_v2rtrs_mcast; 2802 /* ipif returned by ipif_lookup_zoneid is link-local (if present) */ 2803 if (ipif_lookup_zoneid(ill, ALL_ZONES, IPIF_UP, &ipif)) { 2804 ip6h->ip6_src = ipif->ipif_v6src_addr; 2805 ipif_refrele(ipif); 2806 } else { 2807 /* otherwise, use IPv6 default address selection. */ 2808 ip6h->ip6_src = ipv6_all_zeros; 2809 } 2810 2811 ip6hbh->ip6h_nxt = IPPROTO_ICMPV6; 2812 /* 2813 * ip6h_len is the number of 8-byte words, not including the first 2814 * 8 bytes; we've assumed optlen + padlen == 8 bytes; hence len = 0. 2815 */ 2816 ip6hbh->ip6h_len = 0; 2817 2818 ip6router->ip6or_type = IP6OPT_ROUTER_ALERT; 2819 ip6router->ip6or_len = 2; 2820 ip6router->ip6or_value[0] = 0; 2821 ip6router->ip6or_value[1] = IP6_ALERT_MLD; 2822 2823 mld2r->mld2r_type = MLD_V2_LISTENER_REPORT; 2824 mld2r->mld2r_nummar = htons(numrec); 2825 /* 2826 * Prepare for the checksum by putting icmp length in the icmp 2827 * checksum field. The checksum is calculated in ip_wput_v6. 2828 */ 2829 mld2r->mld2r_cksum = htons(icmpsize); 2830 2831 for (rp = cur_reclist; rp != NULL; rp = rp->mrec_next) { 2832 mld2mar->mld2mar_type = rp->mrec_type; 2833 mld2mar->mld2mar_auxlen = 0; 2834 mld2mar->mld2mar_numsrc = htons(rp->mrec_srcs.sl_numsrc); 2835 mld2mar->mld2mar_group = rp->mrec_group; 2836 srcarray = (in6_addr_t *)&(mld2mar[1]); 2837 2838 for (i = 0; i < rp->mrec_srcs.sl_numsrc; i++) 2839 srcarray[i] = rp->mrec_srcs.sl_addr[i]; 2840 2841 mld2mar = (mld2mar_t *)&(srcarray[i]); 2842 } 2843 2844 /* 2845 * ip_wput will automatically loopback the multicast packet to 2846 * the conn if multicast loopback is enabled. 2847 * The MIB stats corresponding to this outgoing MLD packet 2848 * will be accounted for in ip_wput->ip_wput_v6->ip_wput_ire_v6 2849 * ->icmp_update_out_mib_v6 function call. 2850 */ 2851 (void) ip_output_v6(NULL, mp, ill->ill_wq, IP_WPUT); 2852 2853 if (morepkts) { 2854 if (more_src_cnt > 0) { 2855 int index, mvsize; 2856 slist_t *sl = &next_reclist->mrec_srcs; 2857 index = sl->sl_numsrc; 2858 mvsize = more_src_cnt * sizeof (in6_addr_t); 2859 (void) memmove(&sl->sl_addr[0], &sl->sl_addr[index], 2860 mvsize); 2861 sl->sl_numsrc = more_src_cnt; 2862 } 2863 goto nextpkt; 2864 } 2865 2866 free_reclist: 2867 while (reclist != NULL) { 2868 rp = reclist->mrec_next; 2869 mi_free(reclist); 2870 reclist = rp; 2871 } 2872 } 2873 2874 static mrec_t * 2875 mcast_bldmrec(mcast_record_t type, in6_addr_t *grp, slist_t *srclist, 2876 mrec_t *next) 2877 { 2878 mrec_t *rp; 2879 int i; 2880 2881 if ((type == ALLOW_NEW_SOURCES || type == BLOCK_OLD_SOURCES) && 2882 SLIST_IS_EMPTY(srclist)) 2883 return (next); 2884 2885 rp = (mrec_t *)mi_alloc(sizeof (mrec_t), BPRI_HI); 2886 if (rp == NULL) 2887 return (next); 2888 2889 rp->mrec_next = next; 2890 rp->mrec_type = type; 2891 rp->mrec_auxlen = 0; 2892 rp->mrec_group = *grp; 2893 if (srclist == NULL) { 2894 rp->mrec_srcs.sl_numsrc = 0; 2895 } else { 2896 rp->mrec_srcs.sl_numsrc = srclist->sl_numsrc; 2897 for (i = 0; i < srclist->sl_numsrc; i++) 2898 rp->mrec_srcs.sl_addr[i] = srclist->sl_addr[i]; 2899 } 2900 2901 return (rp); 2902 } 2903 2904 /* 2905 * Set up initial retransmit state. If memory cannot be allocated for 2906 * the source lists, simply create as much state as is possible; memory 2907 * allocation failures are considered one type of transient error that 2908 * the retransmissions are designed to overcome (and if they aren't 2909 * transient, there are bigger problems than failing to notify the 2910 * router about multicast group membership state changes). 2911 */ 2912 static void 2913 mcast_init_rtx(ill_t *ill, rtx_state_t *rtxp, mcast_record_t rtype, 2914 slist_t *flist) 2915 { 2916 /* 2917 * There are only three possibilities for rtype: 2918 * New join, transition from INCLUDE {} to INCLUDE {flist} 2919 * => rtype is ALLOW_NEW_SOURCES 2920 * New join, transition from INCLUDE {} to EXCLUDE {flist} 2921 * => rtype is CHANGE_TO_EXCLUDE 2922 * State change that involves a filter mode change 2923 * => rtype is either CHANGE_TO_INCLUDE or CHANGE_TO_EXCLUDE 2924 */ 2925 ASSERT(rtype == CHANGE_TO_EXCLUDE || rtype == CHANGE_TO_INCLUDE || 2926 rtype == ALLOW_NEW_SOURCES); 2927 2928 rtxp->rtx_cnt = ill->ill_mcast_rv; 2929 2930 switch (rtype) { 2931 case CHANGE_TO_EXCLUDE: 2932 rtxp->rtx_fmode_cnt = ill->ill_mcast_rv; 2933 CLEAR_SLIST(rtxp->rtx_allow); 2934 COPY_SLIST(flist, rtxp->rtx_block); 2935 break; 2936 case ALLOW_NEW_SOURCES: 2937 case CHANGE_TO_INCLUDE: 2938 rtxp->rtx_fmode_cnt = 2939 rtype == ALLOW_NEW_SOURCES ? 0 : ill->ill_mcast_rv; 2940 CLEAR_SLIST(rtxp->rtx_block); 2941 COPY_SLIST(flist, rtxp->rtx_allow); 2942 break; 2943 } 2944 } 2945 2946 /* 2947 * The basic strategy here, as extrapolated from RFC 3810 section 6.1 and 2948 * RFC 3376 section 5.1, covers three cases: 2949 * * The current state change is a filter mode change 2950 * Set filter mode retransmit counter; set retransmit allow or 2951 * block list to new source list as appropriate, and clear the 2952 * retransmit list that was not set; send TO_IN or TO_EX with 2953 * new source list. 2954 * * The current state change is a source list change, but the filter 2955 * mode retransmit counter is > 0 2956 * Decrement filter mode retransmit counter; set retransmit 2957 * allow or block list to new source list as appropriate, 2958 * and clear the retransmit list that was not set; send TO_IN 2959 * or TO_EX with new source list. 2960 * * The current state change is a source list change, and the filter 2961 * mode retransmit counter is 0. 2962 * Merge existing rtx allow and block lists with new state: 2963 * rtx_allow = (new allow + rtx_allow) - new block 2964 * rtx_block = (new block + rtx_block) - new allow 2965 * Send ALLOW and BLOCK records for new retransmit lists; 2966 * decrement retransmit counter. 2967 * 2968 * As is the case for mcast_init_rtx(), memory allocation failures are 2969 * acceptable; we just create as much state as we can. 2970 */ 2971 static mrec_t * 2972 mcast_merge_rtx(ilm_t *ilm, mrec_t *mreclist, slist_t *flist) 2973 { 2974 ill_t *ill; 2975 rtx_state_t *rtxp = &ilm->ilm_rtx; 2976 mcast_record_t txtype; 2977 mrec_t *rp, *rpnext, *rtnmrec; 2978 boolean_t ovf; 2979 2980 ill = (ilm->ilm_ill == NULL ? ilm->ilm_ipif->ipif_ill : ilm->ilm_ill); 2981 2982 if (mreclist == NULL) 2983 return (mreclist); 2984 2985 /* 2986 * A filter mode change is indicated by a single mrec, which is 2987 * either TO_IN or TO_EX. In this case, we just need to set new 2988 * retransmit state as if this were an initial join. There is 2989 * no change to the mrec list. 2990 */ 2991 if (mreclist->mrec_type == CHANGE_TO_INCLUDE || 2992 mreclist->mrec_type == CHANGE_TO_EXCLUDE) { 2993 mcast_init_rtx(ill, rtxp, mreclist->mrec_type, 2994 &mreclist->mrec_srcs); 2995 return (mreclist); 2996 } 2997 2998 /* 2999 * Only the source list has changed 3000 */ 3001 rtxp->rtx_cnt = ill->ill_mcast_rv; 3002 if (rtxp->rtx_fmode_cnt > 0) { 3003 /* but we're still sending filter mode change reports */ 3004 rtxp->rtx_fmode_cnt--; 3005 if (ilm->ilm_fmode == MODE_IS_INCLUDE) { 3006 CLEAR_SLIST(rtxp->rtx_block); 3007 COPY_SLIST(flist, rtxp->rtx_allow); 3008 txtype = CHANGE_TO_INCLUDE; 3009 } else { 3010 CLEAR_SLIST(rtxp->rtx_allow); 3011 COPY_SLIST(flist, rtxp->rtx_block); 3012 txtype = CHANGE_TO_EXCLUDE; 3013 } 3014 /* overwrite first mrec with new info */ 3015 mreclist->mrec_type = txtype; 3016 l_copy(flist, &mreclist->mrec_srcs); 3017 /* then free any remaining mrecs */ 3018 for (rp = mreclist->mrec_next; rp != NULL; rp = rpnext) { 3019 rpnext = rp->mrec_next; 3020 mi_free(rp); 3021 } 3022 mreclist->mrec_next = NULL; 3023 rtnmrec = mreclist; 3024 } else { 3025 mrec_t *allow_mrec, *block_mrec; 3026 /* 3027 * Just send the source change reports; but we need to 3028 * recalculate the ALLOW and BLOCK lists based on previous 3029 * state and new changes. 3030 */ 3031 rtnmrec = mreclist; 3032 allow_mrec = block_mrec = NULL; 3033 for (rp = mreclist; rp != NULL; rp = rp->mrec_next) { 3034 ASSERT(rp->mrec_type == ALLOW_NEW_SOURCES || 3035 rp->mrec_type == BLOCK_OLD_SOURCES); 3036 if (rp->mrec_type == ALLOW_NEW_SOURCES) 3037 allow_mrec = rp; 3038 else 3039 block_mrec = rp; 3040 } 3041 /* 3042 * Perform calculations: 3043 * new_allow = mrec_allow + (rtx_allow - mrec_block) 3044 * new_block = mrec_block + (rtx_block - mrec_allow) 3045 * 3046 * Each calc requires two steps, for example: 3047 * rtx_allow = rtx_allow - mrec_block; 3048 * new_allow = mrec_allow + rtx_allow; 3049 * 3050 * Store results in mrec lists, and then copy into rtx lists. 3051 * We do it in this order in case the rtx list hasn't been 3052 * alloc'd yet; if it hasn't and our alloc fails, that's okay, 3053 * Overflows are also okay. 3054 */ 3055 if (block_mrec != NULL) { 3056 l_difference_in_a(rtxp->rtx_allow, 3057 &block_mrec->mrec_srcs); 3058 } 3059 if (allow_mrec != NULL) { 3060 l_difference_in_a(rtxp->rtx_block, 3061 &allow_mrec->mrec_srcs); 3062 l_union_in_a(&allow_mrec->mrec_srcs, rtxp->rtx_allow, 3063 &ovf); 3064 } 3065 if (block_mrec != NULL) { 3066 l_union_in_a(&block_mrec->mrec_srcs, rtxp->rtx_block, 3067 &ovf); 3068 COPY_SLIST(&block_mrec->mrec_srcs, rtxp->rtx_block); 3069 } else { 3070 rtnmrec = mcast_bldmrec(BLOCK_OLD_SOURCES, 3071 &ilm->ilm_v6addr, rtxp->rtx_block, allow_mrec); 3072 } 3073 if (allow_mrec != NULL) { 3074 COPY_SLIST(&allow_mrec->mrec_srcs, rtxp->rtx_allow); 3075 } else { 3076 rtnmrec = mcast_bldmrec(ALLOW_NEW_SOURCES, 3077 &ilm->ilm_v6addr, rtxp->rtx_allow, block_mrec); 3078 } 3079 } 3080 3081 return (rtnmrec); 3082 } 3083