xref: /titanic_52/usr/src/uts/common/inet/ip/igmp.c (revision 0a0e9771ca0211c15f3ac4466b661c145feeb9e4)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /* Copyright (c) 1990 Mentat Inc. */
26 
27 /*
28  * Internet Group Management Protocol (IGMP) routines.
29  * Multicast Listener Discovery Protocol (MLD) routines.
30  *
31  * Written by Steve Deering, Stanford, May 1988.
32  * Modified by Rosen Sharma, Stanford, Aug 1994.
33  * Modified by Bill Fenner, Xerox PARC, Feb. 1995.
34  *
35  * MULTICAST 3.5.1.1
36  */
37 
38 #include <sys/types.h>
39 #include <sys/stream.h>
40 #include <sys/stropts.h>
41 #include <sys/strlog.h>
42 #include <sys/strsun.h>
43 #include <sys/systm.h>
44 #include <sys/ddi.h>
45 #include <sys/sunddi.h>
46 #include <sys/cmn_err.h>
47 #include <sys/atomic.h>
48 #include <sys/zone.h>
49 #include <sys/callb.h>
50 #include <sys/param.h>
51 #include <sys/socket.h>
52 #include <inet/ipclassifier.h>
53 #include <net/if.h>
54 #include <net/route.h>
55 #include <netinet/in.h>
56 #include <netinet/igmp_var.h>
57 #include <netinet/ip6.h>
58 #include <netinet/icmp6.h>
59 
60 #include <inet/common.h>
61 #include <inet/mi.h>
62 #include <inet/nd.h>
63 #include <inet/ip.h>
64 #include <inet/ip6.h>
65 #include <inet/ip_multi.h>
66 #include <inet/ip_listutils.h>
67 
68 #include <netinet/igmp.h>
69 #include <inet/ip_if.h>
70 #include <net/pfkeyv2.h>
71 #include <inet/ipsec_info.h>
72 
73 static uint_t	igmp_query_in(ipha_t *ipha, igmpa_t *igmpa, ill_t *ill);
74 static uint_t	igmpv3_query_in(igmp3qa_t *igmp3qa, ill_t *ill, int igmplen);
75 static uint_t	mld_query_in(mld_hdr_t *mldh, ill_t *ill);
76 static uint_t	mldv2_query_in(mld2q_t *mld2q, ill_t *ill, int mldlen);
77 static void	igmp_sendpkt(ilm_t *ilm, uchar_t type, ipaddr_t addr);
78 static void	mld_sendpkt(ilm_t *ilm, uchar_t type, const in6_addr_t *v6addr);
79 static void	igmpv3_sendrpt(ipif_t *ipif, mrec_t *reclist);
80 static void	mldv2_sendrpt(ill_t *ill, mrec_t *reclist);
81 static mrec_t	*mcast_bldmrec(mcast_record_t type, in6_addr_t *grp,
82 		    slist_t *srclist, mrec_t *next);
83 static void	mcast_init_rtx(ill_t *ill, rtx_state_t *rtxp,
84 		    mcast_record_t rtype, slist_t *flist);
85 static mrec_t	*mcast_merge_rtx(ilm_t *ilm, mrec_t *rp, slist_t *flist);
86 static void	mcast_signal_restart_thread(ip_stack_t *ipst);
87 
88 /*
89  * Macros used to do timer len conversions.  Timer values are always
90  * stored and passed to the timer functions as milliseconds; but the
91  * default values and values from the wire may not be.
92  *
93  * And yes, it's obscure, but decisecond is easier to abbreviate than
94  * "tenths of a second".
95  */
96 #define	DSEC_TO_MSEC(dsec)	((dsec) * 100)
97 #define	SEC_TO_MSEC(sec)	((sec) * 1000)
98 
99 /*
100  * A running timer (scheduled thru timeout) can be cancelled if another
101  * timer with a shorter timeout value is scheduled before it has timed
102  * out.  When the shorter timer expires, the original timer is updated
103  * to account for the time elapsed while the shorter timer ran; but this
104  * does not take into account the amount of time already spent in timeout
105  * state before being preempted by the shorter timer, that is the time
106  * interval between time scheduled to time cancelled.  This can cause
107  * delays in sending out multicast membership reports.  To resolve this
108  * problem, wallclock time (absolute time) is used instead of deltas
109  * (relative time) to track timers.
110  *
111  * The MACRO below gets the lbolt value, used for proper timer scheduling
112  * and firing. Therefore multicast membership reports are sent on time.
113  * The timer does not exactly fire at the time it was scehduled to fire,
114  * there is a difference of a few milliseconds observed. An offset is used
115  * to take care of the difference.
116  */
117 
118 #define	CURRENT_MSTIME	((uint_t)TICK_TO_MSEC(ddi_get_lbolt()))
119 #define	CURRENT_OFFSET	(999)
120 
121 /*
122  * The first multicast join will trigger the igmp timers / mld timers
123  * The unit for next is milliseconds.
124  */
125 static void
126 igmp_start_timers(unsigned next, ip_stack_t *ipst)
127 {
128 	int	time_left;
129 	int	ret;
130 
131 	ASSERT(next != 0 && next != INFINITY);
132 
133 	mutex_enter(&ipst->ips_igmp_timer_lock);
134 
135 	if (ipst->ips_igmp_timer_setter_active) {
136 		/*
137 		 * Serialize timer setters, one at a time. If the
138 		 * timer is currently being set by someone,
139 		 * just record the next time when it has to be
140 		 * invoked and return. The current setter will
141 		 * take care.
142 		 */
143 		ipst->ips_igmp_time_to_next =
144 		    MIN(ipst->ips_igmp_time_to_next, next);
145 		mutex_exit(&ipst->ips_igmp_timer_lock);
146 		return;
147 	} else {
148 		ipst->ips_igmp_timer_setter_active = B_TRUE;
149 	}
150 	if (ipst->ips_igmp_timeout_id == 0) {
151 		/*
152 		 * The timer is inactive. We need to start a timer
153 		 */
154 		ipst->ips_igmp_time_to_next = next;
155 		ipst->ips_igmp_timeout_id = timeout(igmp_timeout_handler,
156 		    (void *)ipst, MSEC_TO_TICK(ipst->ips_igmp_time_to_next));
157 		ipst->ips_igmp_timer_scheduled_last = ddi_get_lbolt();
158 		ipst->ips_igmp_timer_setter_active = B_FALSE;
159 		mutex_exit(&ipst->ips_igmp_timer_lock);
160 		return;
161 	}
162 
163 	/*
164 	 * The timer was scheduled sometime back for firing in
165 	 * 'igmp_time_to_next' ms and is active. We need to
166 	 * reschedule the timeout if the new 'next' will happen
167 	 * earlier than the currently scheduled timeout
168 	 */
169 	time_left = ipst->ips_igmp_timer_scheduled_last +
170 	    MSEC_TO_TICK(ipst->ips_igmp_time_to_next) - ddi_get_lbolt();
171 	if (time_left < MSEC_TO_TICK(next)) {
172 		ipst->ips_igmp_timer_setter_active = B_FALSE;
173 		mutex_exit(&ipst->ips_igmp_timer_lock);
174 		return;
175 	}
176 
177 	mutex_exit(&ipst->ips_igmp_timer_lock);
178 	ret = untimeout(ipst->ips_igmp_timeout_id);
179 	mutex_enter(&ipst->ips_igmp_timer_lock);
180 	/*
181 	 * The timeout was cancelled, or the timeout handler
182 	 * completed, while we were blocked in the untimeout.
183 	 * No other thread could have set the timer meanwhile
184 	 * since we serialized all the timer setters. Thus
185 	 * no timer is currently active nor executing nor will
186 	 * any timer fire in the future. We start the timer now
187 	 * if needed.
188 	 */
189 	if (ret == -1) {
190 		ASSERT(ipst->ips_igmp_timeout_id == 0);
191 	} else {
192 		ASSERT(ipst->ips_igmp_timeout_id != 0);
193 		ipst->ips_igmp_timeout_id = 0;
194 	}
195 	if (ipst->ips_igmp_time_to_next != 0) {
196 		ipst->ips_igmp_time_to_next =
197 		    MIN(ipst->ips_igmp_time_to_next, next);
198 		ipst->ips_igmp_timeout_id = timeout(igmp_timeout_handler,
199 		    (void *)ipst, MSEC_TO_TICK(ipst->ips_igmp_time_to_next));
200 		ipst->ips_igmp_timer_scheduled_last = ddi_get_lbolt();
201 	}
202 	ipst->ips_igmp_timer_setter_active = B_FALSE;
203 	mutex_exit(&ipst->ips_igmp_timer_lock);
204 }
205 
206 /*
207  * mld_start_timers:
208  * The unit for next is milliseconds.
209  */
210 static void
211 mld_start_timers(unsigned next, ip_stack_t *ipst)
212 {
213 	int	time_left;
214 	int	ret;
215 
216 	ASSERT(next != 0 && next != INFINITY);
217 
218 	mutex_enter(&ipst->ips_mld_timer_lock);
219 	if (ipst->ips_mld_timer_setter_active) {
220 		/*
221 		 * Serialize timer setters, one at a time. If the
222 		 * timer is currently being set by someone,
223 		 * just record the next time when it has to be
224 		 * invoked and return. The current setter will
225 		 * take care.
226 		 */
227 		ipst->ips_mld_time_to_next =
228 		    MIN(ipst->ips_mld_time_to_next, next);
229 		mutex_exit(&ipst->ips_mld_timer_lock);
230 		return;
231 	} else {
232 		ipst->ips_mld_timer_setter_active = B_TRUE;
233 	}
234 	if (ipst->ips_mld_timeout_id == 0) {
235 		/*
236 		 * The timer is inactive. We need to start a timer
237 		 */
238 		ipst->ips_mld_time_to_next = next;
239 		ipst->ips_mld_timeout_id = timeout(mld_timeout_handler,
240 		    (void *)ipst, MSEC_TO_TICK(ipst->ips_mld_time_to_next));
241 		ipst->ips_mld_timer_scheduled_last = ddi_get_lbolt();
242 		ipst->ips_mld_timer_setter_active = B_FALSE;
243 		mutex_exit(&ipst->ips_mld_timer_lock);
244 		return;
245 	}
246 
247 	/*
248 	 * The timer was scheduled sometime back for firing in
249 	 * 'igmp_time_to_next' ms and is active. We need to
250 	 * reschedule the timeout if the new 'next' will happen
251 	 * earlier than the currently scheduled timeout
252 	 */
253 	time_left = ipst->ips_mld_timer_scheduled_last +
254 	    MSEC_TO_TICK(ipst->ips_mld_time_to_next) - ddi_get_lbolt();
255 	if (time_left < MSEC_TO_TICK(next)) {
256 		ipst->ips_mld_timer_setter_active = B_FALSE;
257 		mutex_exit(&ipst->ips_mld_timer_lock);
258 		return;
259 	}
260 
261 	mutex_exit(&ipst->ips_mld_timer_lock);
262 	ret = untimeout(ipst->ips_mld_timeout_id);
263 	mutex_enter(&ipst->ips_mld_timer_lock);
264 	/*
265 	 * The timeout was cancelled, or the timeout handler
266 	 * completed, while we were blocked in the untimeout.
267 	 * No other thread could have set the timer meanwhile
268 	 * since we serialized all the timer setters. Thus
269 	 * no timer is currently active nor executing nor will
270 	 * any timer fire in the future. We start the timer now
271 	 * if needed.
272 	 */
273 	if (ret == -1) {
274 		ASSERT(ipst->ips_mld_timeout_id == 0);
275 	} else {
276 		ASSERT(ipst->ips_mld_timeout_id != 0);
277 		ipst->ips_mld_timeout_id = 0;
278 	}
279 	if (ipst->ips_mld_time_to_next != 0) {
280 		ipst->ips_mld_time_to_next =
281 		    MIN(ipst->ips_mld_time_to_next, next);
282 		ipst->ips_mld_timeout_id = timeout(mld_timeout_handler,
283 		    (void *)ipst, MSEC_TO_TICK(ipst->ips_mld_time_to_next));
284 		ipst->ips_mld_timer_scheduled_last = ddi_get_lbolt();
285 	}
286 	ipst->ips_mld_timer_setter_active = B_FALSE;
287 	mutex_exit(&ipst->ips_mld_timer_lock);
288 }
289 
290 /*
291  * igmp_input:
292  * Return NULL for a bad packet that is discarded here.
293  * Return mp if the message is OK and should be handed to "raw" receivers.
294  * Callers of igmp_input() may need to reinitialize variables that were copied
295  * from the mblk as this calls pullupmsg().
296  */
297 /* ARGSUSED */
298 mblk_t *
299 igmp_input(queue_t *q, mblk_t *mp, ill_t *ill)
300 {
301 	igmpa_t 	*igmpa;
302 	ipha_t		*ipha = (ipha_t *)(mp->b_rptr);
303 	int		iphlen, igmplen, mblklen;
304 	ilm_t 		*ilm;
305 	uint32_t	src, dst;
306 	uint32_t 	group;
307 	uint_t		next;
308 	ipif_t 		*ipif;
309 	ip_stack_t	*ipst;
310 	ilm_walker_t	ilw;
311 
312 	ASSERT(ill != NULL);
313 	ASSERT(!ill->ill_isv6);
314 	ipst = ill->ill_ipst;
315 	++ipst->ips_igmpstat.igps_rcv_total;
316 
317 	mblklen = MBLKL(mp);
318 	if (mblklen < 1 || mblklen < (iphlen = IPH_HDR_LENGTH(ipha))) {
319 		++ipst->ips_igmpstat.igps_rcv_tooshort;
320 		goto bad_pkt;
321 	}
322 	igmplen = ntohs(ipha->ipha_length) - iphlen;
323 	/*
324 	 * Since msg sizes are more variable with v3, just pullup the
325 	 * whole thing now.
326 	 */
327 	if (MBLKL(mp) < (igmplen + iphlen)) {
328 		mblk_t *mp1;
329 		if ((mp1 = msgpullup(mp, -1)) == NULL) {
330 			++ipst->ips_igmpstat.igps_rcv_tooshort;
331 			goto bad_pkt;
332 		}
333 		freemsg(mp);
334 		mp = mp1;
335 		ipha = (ipha_t *)(mp->b_rptr);
336 	}
337 
338 	/*
339 	 * Validate lengths
340 	 */
341 	if (igmplen < IGMP_MINLEN) {
342 		++ipst->ips_igmpstat.igps_rcv_tooshort;
343 		goto bad_pkt;
344 	}
345 	/*
346 	 * Validate checksum
347 	 */
348 	if (IP_CSUM(mp, iphlen, 0)) {
349 		++ipst->ips_igmpstat.igps_rcv_badsum;
350 		goto bad_pkt;
351 	}
352 
353 	igmpa = (igmpa_t *)(&mp->b_rptr[iphlen]);
354 	src = ipha->ipha_src;
355 	dst = ipha->ipha_dst;
356 	if (ip_debug > 1)
357 		(void) mi_strlog(ill->ill_rq, 1, SL_TRACE,
358 		    "igmp_input: src 0x%x, dst 0x%x on %s\n",
359 		    (int)ntohl(src), (int)ntohl(dst),
360 		    ill->ill_name);
361 
362 	switch (igmpa->igmpa_type) {
363 	case IGMP_MEMBERSHIP_QUERY:
364 		/*
365 		 * packet length differentiates between v1/v2 and v3
366 		 * v1/v2 should be exactly 8 octets long; v3 is >= 12
367 		 */
368 		if ((igmplen == IGMP_MINLEN) ||
369 		    (ipst->ips_igmp_max_version <= IGMP_V2_ROUTER)) {
370 			next = igmp_query_in(ipha, igmpa, ill);
371 		} else if (igmplen >= IGMP_V3_QUERY_MINLEN) {
372 			next = igmpv3_query_in((igmp3qa_t *)igmpa, ill,
373 			    igmplen);
374 		} else {
375 			++ipst->ips_igmpstat.igps_rcv_tooshort;
376 			goto bad_pkt;
377 		}
378 		if (next == 0)
379 			goto bad_pkt;
380 
381 		if (next != INFINITY)
382 			igmp_start_timers(next, ipst);
383 
384 		break;
385 
386 	case IGMP_V1_MEMBERSHIP_REPORT:
387 	case IGMP_V2_MEMBERSHIP_REPORT:
388 		/*
389 		 * For fast leave to work, we have to know that we are the
390 		 * last person to send a report for this group. Reports
391 		 * generated by us are looped back since we could potentially
392 		 * be a multicast router, so discard reports sourced by me.
393 		 */
394 		mutex_enter(&ill->ill_lock);
395 		for (ipif = ill->ill_ipif; ipif != NULL;
396 		    ipif = ipif->ipif_next) {
397 			if (ipif->ipif_lcl_addr == src) {
398 				if (ip_debug > 1) {
399 					(void) mi_strlog(ill->ill_rq,
400 					    1,
401 					    SL_TRACE,
402 					    "igmp_input: we are only "
403 					    "member src 0x%x ipif_local 0x%x",
404 					    (int)ntohl(src),
405 					    (int)ntohl(ipif->ipif_lcl_addr));
406 				}
407 				mutex_exit(&ill->ill_lock);
408 				return (mp);
409 			}
410 		}
411 		mutex_exit(&ill->ill_lock);
412 
413 		++ipst->ips_igmpstat.igps_rcv_reports;
414 		group = igmpa->igmpa_group;
415 		if (!CLASSD(group)) {
416 			++ipst->ips_igmpstat.igps_rcv_badreports;
417 			goto bad_pkt;
418 		}
419 
420 		/*
421 		 * KLUDGE: if the IP source address of the report has an
422 		 * unspecified (i.e., zero) subnet number, as is allowed for
423 		 * a booting host, replace it with the correct subnet number
424 		 * so that a process-level multicast routing demon can
425 		 * determine which subnet it arrived from.  This is necessary
426 		 * to compensate for the lack of any way for a process to
427 		 * determine the arrival interface of an incoming packet.
428 		 *
429 		 * Requires that a copy of *this* message it passed up
430 		 * to the raw interface which is done by our caller.
431 		 */
432 		if ((src & htonl(0xFF000000U)) == 0) {	/* Minimum net mask */
433 			/* Pick the first ipif on this ill */
434 			mutex_enter(&ill->ill_lock);
435 			src = ill->ill_ipif->ipif_subnet;
436 			mutex_exit(&ill->ill_lock);
437 			ip1dbg(("igmp_input: changed src to 0x%x\n",
438 			    (int)ntohl(src)));
439 			ipha->ipha_src = src;
440 		}
441 
442 		/*
443 		 * If our ill has ILMs that belong to the group being
444 		 * reported, and we are a 'Delaying Member' in the RFC
445 		 * terminology, stop our timer for that group and 'clear
446 		 * flag' i.e. mark as IGMP_OTHERMEMBER.
447 		 */
448 		ilm = ilm_walker_start(&ilw, ill);
449 		for (; ilm != NULL; ilm = ilm_walker_step(&ilw, ilm)) {
450 			if (ilm->ilm_addr == group) {
451 				++ipst->ips_igmpstat.igps_rcv_ourreports;
452 				ilm->ilm_timer = INFINITY;
453 				ilm->ilm_state = IGMP_OTHERMEMBER;
454 			}
455 		}
456 		ilm_walker_finish(&ilw);
457 		break;
458 
459 	case IGMP_V3_MEMBERSHIP_REPORT:
460 		/*
461 		 * Currently nothing to do here; IGMP router is not
462 		 * implemented in ip, and v3 hosts don't pay attention
463 		 * to membership reports.
464 		 */
465 		break;
466 	}
467 	/*
468 	 * Pass all valid IGMP packets up to any process(es) listening
469 	 * on a raw IGMP socket. Do not free the packet.
470 	 */
471 	return (mp);
472 
473 bad_pkt:
474 	freemsg(mp);
475 	return (NULL);
476 }
477 
478 static uint_t
479 igmp_query_in(ipha_t *ipha, igmpa_t *igmpa, ill_t *ill)
480 {
481 	ilm_t	*ilm;
482 	int	timer;
483 	uint_t	next, current;
484 	ip_stack_t	 *ipst;
485 	ilm_walker_t 	ilw;
486 
487 	ipst = ill->ill_ipst;
488 	++ipst->ips_igmpstat.igps_rcv_queries;
489 
490 	/*
491 	 * In the IGMPv2 specification, there are 3 states and a flag.
492 	 *
493 	 * In Non-Member state, we simply don't have a membership record.
494 	 * In Delaying Member state, our timer is running (ilm->ilm_timer
495 	 * < INFINITY).  In Idle Member state, our timer is not running
496 	 * (ilm->ilm_timer == INFINITY).
497 	 *
498 	 * The flag is ilm->ilm_state, it is set to IGMP_OTHERMEMBER if
499 	 * we have heard a report from another member, or IGMP_IREPORTEDLAST
500 	 * if I sent the last report.
501 	 */
502 	if ((igmpa->igmpa_code == 0) ||
503 	    (ipst->ips_igmp_max_version == IGMP_V1_ROUTER)) {
504 		/*
505 		 * Query from an old router.
506 		 * Remember that the querier on this interface is old,
507 		 * and set the timer to the value in RFC 1112.
508 		 */
509 
510 
511 		mutex_enter(&ill->ill_lock);
512 		ill->ill_mcast_v1_time = 0;
513 		ill->ill_mcast_v1_tset = 1;
514 		if (ill->ill_mcast_type != IGMP_V1_ROUTER) {
515 			ip1dbg(("Received IGMPv1 Query on %s, switching mode "
516 			    "to IGMP_V1_ROUTER\n", ill->ill_name));
517 			atomic_add_16(&ill->ill_ifptr->illif_mcast_v1, 1);
518 			ill->ill_mcast_type = IGMP_V1_ROUTER;
519 		}
520 		mutex_exit(&ill->ill_lock);
521 
522 		timer = SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY);
523 
524 		if (ipha->ipha_dst != htonl(INADDR_ALLHOSTS_GROUP) ||
525 		    igmpa->igmpa_group != 0) {
526 			++ipst->ips_igmpstat.igps_rcv_badqueries;
527 			return (0);
528 		}
529 
530 	} else {
531 		in_addr_t group;
532 
533 		/*
534 		 * Query from a new router
535 		 * Simply do a validity check
536 		 */
537 		group = igmpa->igmpa_group;
538 		if (group != 0 && (!CLASSD(group))) {
539 			++ipst->ips_igmpstat.igps_rcv_badqueries;
540 			return (0);
541 		}
542 
543 		/*
544 		 * Switch interface state to v2 on receipt of a v2 query
545 		 * ONLY IF current state is v3.  Let things be if current
546 		 * state if v1 but do reset the v2-querier-present timer.
547 		 */
548 		mutex_enter(&ill->ill_lock);
549 		if (ill->ill_mcast_type == IGMP_V3_ROUTER) {
550 			ip1dbg(("Received IGMPv2 Query on %s, switching mode "
551 			    "to IGMP_V2_ROUTER", ill->ill_name));
552 			atomic_add_16(&ill->ill_ifptr->illif_mcast_v2, 1);
553 			ill->ill_mcast_type = IGMP_V2_ROUTER;
554 		}
555 		ill->ill_mcast_v2_time = 0;
556 		ill->ill_mcast_v2_tset = 1;
557 		mutex_exit(&ill->ill_lock);
558 
559 		timer = DSEC_TO_MSEC((int)igmpa->igmpa_code);
560 	}
561 
562 	if (ip_debug > 1) {
563 		mutex_enter(&ill->ill_lock);
564 		(void) mi_strlog(ill->ill_rq, 1, SL_TRACE,
565 		    "igmp_input: TIMER = igmp_code %d igmp_type 0x%x",
566 		    (int)ntohs(igmpa->igmpa_code),
567 		    (int)ntohs(igmpa->igmpa_type));
568 		mutex_exit(&ill->ill_lock);
569 	}
570 
571 	/*
572 	 * -Start the timers in all of our membership records
573 	 *  for the physical interface on which the query
574 	 *  arrived, excluding those that belong to the "all
575 	 *  hosts" group (224.0.0.1).
576 	 *
577 	 * -Restart any timer that is already running but has
578 	 *  a value longer than the requested timeout.
579 	 *
580 	 * -Use the value specified in the query message as
581 	 *  the maximum timeout.
582 	 */
583 	next = (unsigned)INFINITY;
584 
585 	ilm = ilm_walker_start(&ilw, ill);
586 	mutex_enter(&ill->ill_lock);
587 	current = CURRENT_MSTIME;
588 
589 	for (; ilm != NULL; ilm = ilm_walker_step(&ilw, ilm)) {
590 		/*
591 		 * A multicast router joins INADDR_ANY address
592 		 * to enable promiscuous reception of all
593 		 * mcasts from the interface. This INADDR_ANY
594 		 * is stored in the ilm_v6addr as V6 unspec addr
595 		 */
596 		if (!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr))
597 			continue;
598 		if (ilm->ilm_addr == htonl(INADDR_ANY))
599 			continue;
600 		if (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP) &&
601 		    (igmpa->igmpa_group == 0) ||
602 		    (igmpa->igmpa_group == ilm->ilm_addr)) {
603 			if (ilm->ilm_timer > timer) {
604 				MCAST_RANDOM_DELAY(ilm->ilm_timer, timer);
605 				if (ilm->ilm_timer < next)
606 					next = ilm->ilm_timer;
607 				ilm->ilm_timer += current;
608 			}
609 		}
610 	}
611 	mutex_exit(&ill->ill_lock);
612 	ilm_walker_finish(&ilw);
613 
614 	return (next);
615 }
616 
617 static uint_t
618 igmpv3_query_in(igmp3qa_t *igmp3qa, ill_t *ill, int igmplen)
619 {
620 	uint_t		i, next, mrd, qqi, timer, delay, numsrc;
621 	uint_t		current;
622 	ilm_t		*ilm;
623 	ipaddr_t	*src_array;
624 	uint8_t		qrv;
625 	ip_stack_t	 *ipst;
626 	ilm_walker_t	ilw;
627 
628 	ipst = ill->ill_ipst;
629 	/* make sure numsrc matches packet size */
630 	numsrc = ntohs(igmp3qa->igmp3qa_numsrc);
631 	if (igmplen < IGMP_V3_QUERY_MINLEN + (numsrc * sizeof (ipaddr_t))) {
632 		++ipst->ips_igmpstat.igps_rcv_tooshort;
633 		return (0);
634 	}
635 	src_array = (ipaddr_t *)&igmp3qa[1];
636 
637 	++ipst->ips_igmpstat.igps_rcv_queries;
638 
639 	if ((mrd = (uint_t)igmp3qa->igmp3qa_mxrc) >= IGMP_V3_MAXRT_FPMIN) {
640 		uint_t hdrval, mant, exp;
641 		hdrval = (uint_t)igmp3qa->igmp3qa_mxrc;
642 		mant = hdrval & IGMP_V3_MAXRT_MANT_MASK;
643 		exp = (hdrval & IGMP_V3_MAXRT_EXP_MASK) >> 4;
644 		mrd = (mant | 0x10) << (exp + 3);
645 	}
646 	if (mrd == 0)
647 		mrd = MCAST_DEF_QUERY_RESP_INTERVAL;
648 	timer = DSEC_TO_MSEC(mrd);
649 	MCAST_RANDOM_DELAY(delay, timer);
650 	next = (unsigned)INFINITY;
651 	current = CURRENT_MSTIME;
652 
653 	if ((qrv = igmp3qa->igmp3qa_sqrv & IGMP_V3_RV_MASK) == 0)
654 		ill->ill_mcast_rv = MCAST_DEF_ROBUSTNESS;
655 	else
656 		ill->ill_mcast_rv = qrv;
657 
658 	if ((qqi = (uint_t)igmp3qa->igmp3qa_qqic) >= IGMP_V3_QQI_FPMIN) {
659 		uint_t hdrval, mant, exp;
660 		hdrval = (uint_t)igmp3qa->igmp3qa_qqic;
661 		mant = hdrval & IGMP_V3_QQI_MANT_MASK;
662 		exp = (hdrval & IGMP_V3_QQI_EXP_MASK) >> 4;
663 		qqi = (mant | 0x10) << (exp + 3);
664 	}
665 	ill->ill_mcast_qi = (qqi == 0) ? MCAST_DEF_QUERY_INTERVAL : qqi;
666 
667 	/*
668 	 * If we have a pending general query response that's scheduled
669 	 * sooner than the delay we calculated for this response, then
670 	 * no action is required (RFC3376 section 5.2 rule 1)
671 	 */
672 	mutex_enter(&ill->ill_lock);
673 	if (ill->ill_global_timer < (current + delay)) {
674 		mutex_exit(&ill->ill_lock);
675 		return (next);
676 	}
677 	mutex_exit(&ill->ill_lock);
678 
679 	/*
680 	 * Now take action depending upon query type:
681 	 * general, group specific, or group/source specific.
682 	 */
683 	if ((numsrc == 0) && (igmp3qa->igmp3qa_group == INADDR_ANY)) {
684 		/*
685 		 * general query
686 		 * We know global timer is either not running or is
687 		 * greater than our calculated delay, so reset it to
688 		 * our delay (random value in range [0, response time]).
689 		 */
690 		mutex_enter(&ill->ill_lock);
691 		ill->ill_global_timer =  current + delay;
692 		mutex_exit(&ill->ill_lock);
693 		next = delay;
694 
695 	} else {
696 		/* group or group/source specific query */
697 		ilm = ilm_walker_start(&ilw, ill);
698 		mutex_enter(&ill->ill_lock);
699 		for (; ilm != NULL; ilm = ilm_walker_step(&ilw, ilm)) {
700 			if (!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr) ||
701 			    (ilm->ilm_addr == htonl(INADDR_ANY)) ||
702 			    (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) ||
703 			    (igmp3qa->igmp3qa_group != ilm->ilm_addr))
704 				continue;
705 			/*
706 			 * If the query is group specific or we have a
707 			 * pending group specific query, the response is
708 			 * group specific (pending sources list should be
709 			 * empty).  Otherwise, need to update the pending
710 			 * sources list for the group and source specific
711 			 * response.
712 			 */
713 			if (numsrc == 0 || (ilm->ilm_timer < INFINITY &&
714 			    SLIST_IS_EMPTY(ilm->ilm_pendsrcs))) {
715 group_query:
716 				FREE_SLIST(ilm->ilm_pendsrcs);
717 				ilm->ilm_pendsrcs = NULL;
718 			} else {
719 				boolean_t overflow;
720 				slist_t *pktl;
721 				if (numsrc > MAX_FILTER_SIZE ||
722 				    (ilm->ilm_pendsrcs == NULL &&
723 				    (ilm->ilm_pendsrcs = l_alloc()) == NULL)) {
724 					/*
725 					 * We've been sent more sources than
726 					 * we can deal with; or we can't deal
727 					 * with a source list at all.  Revert
728 					 * to a group specific query.
729 					 */
730 					goto group_query;
731 				}
732 				if ((pktl = l_alloc()) == NULL)
733 					goto group_query;
734 				pktl->sl_numsrc = numsrc;
735 				for (i = 0; i < numsrc; i++)
736 					IN6_IPADDR_TO_V4MAPPED(src_array[i],
737 					    &(pktl->sl_addr[i]));
738 				l_union_in_a(ilm->ilm_pendsrcs, pktl,
739 				    &overflow);
740 				l_free(pktl);
741 				if (overflow)
742 					goto group_query;
743 			}
744 
745 			ilm->ilm_timer = (ilm->ilm_timer == INFINITY) ?
746 			    INFINITY : (ilm->ilm_timer - current);
747 			/* choose soonest timer */
748 			ilm->ilm_timer = MIN(ilm->ilm_timer, delay);
749 			if (ilm->ilm_timer < next)
750 				next = ilm->ilm_timer;
751 			ilm->ilm_timer += current;
752 		}
753 		mutex_exit(&ill->ill_lock);
754 		ilm_walker_finish(&ilw);
755 	}
756 
757 	return (next);
758 }
759 
760 void
761 igmp_joingroup(ilm_t *ilm)
762 {
763 	uint_t	timer;
764 	ill_t	*ill;
765 	ip_stack_t	*ipst = ilm->ilm_ipst;
766 
767 	ill = ilm->ilm_ipif->ipif_ill;
768 
769 	ASSERT(IAM_WRITER_ILL(ill));
770 	ASSERT(ilm->ilm_ill == NULL && !ilm->ilm_ipif->ipif_isv6);
771 
772 	mutex_enter(&ill->ill_lock);
773 	if (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) {
774 		ilm->ilm_rtx.rtx_timer = INFINITY;
775 		ilm->ilm_state = IGMP_OTHERMEMBER;
776 		mutex_exit(&ill->ill_lock);
777 	} else {
778 		ip1dbg(("Querier mode %d, sending report, group %x\n",
779 		    ill->ill_mcast_type, htonl(ilm->ilm_addr)));
780 		if (ill->ill_mcast_type == IGMP_V1_ROUTER) {
781 			mutex_exit(&ill->ill_lock);
782 			igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0);
783 			mutex_enter(&ill->ill_lock);
784 		} else if (ill->ill_mcast_type == IGMP_V2_ROUTER) {
785 			mutex_exit(&ill->ill_lock);
786 			igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0);
787 			mutex_enter(&ill->ill_lock);
788 		} else if (ill->ill_mcast_type == IGMP_V3_ROUTER) {
789 			mrec_t *rp;
790 			mcast_record_t rtype;
791 			/*
792 			 * The possible state changes we need to handle here:
793 			 *   Old State	New State	Report
794 			 *
795 			 *   INCLUDE(0)	INCLUDE(X)	ALLOW(X),BLOCK(0)
796 			 *   INCLUDE(0)	EXCLUDE(X)	TO_EX(X)
797 			 *
798 			 * No need to send the BLOCK(0) report; ALLOW(X)
799 			 * is enough.
800 			 */
801 			rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ?
802 			    ALLOW_NEW_SOURCES : CHANGE_TO_EXCLUDE;
803 			rp = mcast_bldmrec(rtype, &ilm->ilm_v6addr,
804 			    ilm->ilm_filter, NULL);
805 			mutex_exit(&ill->ill_lock);
806 			igmpv3_sendrpt(ilm->ilm_ipif, rp);
807 			mutex_enter(&ill->ill_lock);
808 			/*
809 			 * Set up retransmission state.  Timer is set below,
810 			 * for both v3 and older versions.
811 			 */
812 			mcast_init_rtx(ill, &ilm->ilm_rtx, rtype,
813 			    ilm->ilm_filter);
814 		}
815 
816 		/* Set the ilm timer value */
817 		ilm->ilm_rtx.rtx_cnt = ill->ill_mcast_rv;
818 		MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer,
819 		    SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY));
820 		timer = ilm->ilm_rtx.rtx_timer;
821 		ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME;
822 		ilm->ilm_state = IGMP_IREPORTEDLAST;
823 		mutex_exit(&ill->ill_lock);
824 
825 		/*
826 		 * We need to restart the IGMP timers, but we can't do it here
827 		 * since we're inside the IPSQ and thus igmp_start_timers() ->
828 		 * untimeout() (inside the IPSQ, waiting for a running timeout
829 		 * to finish) could deadlock with igmp_timeout_handler() ->
830 		 * ipsq_enter() (running the timeout, waiting to get inside
831 		 * the IPSQ).  We also can't just delay it until after we
832 		 * ipsq_exit() since we could be inside more than one IPSQ and
833 		 * thus still have the other IPSQs pinned after we exit -- and
834 		 * igmp_start_timers() may be trying to enter one of those.
835 		 * Instead, signal a dedicated thread that will do it for us.
836 		 */
837 		mutex_enter(&ipst->ips_igmp_timer_lock);
838 		ipst->ips_igmp_deferred_next = MIN(timer,
839 		    ipst->ips_igmp_deferred_next);
840 		mutex_exit(&ipst->ips_igmp_timer_lock);
841 		mcast_signal_restart_thread(ipst);
842 	}
843 
844 	if (ip_debug > 1) {
845 		(void) mi_strlog(ilm->ilm_ipif->ipif_ill->ill_rq, 1, SL_TRACE,
846 		    "igmp_joingroup: multicast_type %d timer %d",
847 		    (ilm->ilm_ipif->ipif_ill->ill_mcast_type),
848 		    (int)ntohl(timer));
849 	}
850 }
851 
852 void
853 mld_joingroup(ilm_t *ilm)
854 {
855 	uint_t	timer;
856 	ill_t	*ill;
857 	ip_stack_t	*ipst = ilm->ilm_ipst;
858 
859 	ill = ilm->ilm_ill;
860 
861 	ASSERT(IAM_WRITER_ILL(ill));
862 	ASSERT(ilm->ilm_ipif == NULL && ill->ill_isv6);
863 
864 	mutex_enter(&ill->ill_lock);
865 	if (IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr)) {
866 		ilm->ilm_rtx.rtx_timer = INFINITY;
867 		ilm->ilm_state = IGMP_OTHERMEMBER;
868 		mutex_exit(&ill->ill_lock);
869 	} else {
870 		if (ill->ill_mcast_type == MLD_V1_ROUTER) {
871 			mutex_exit(&ill->ill_lock);
872 			mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL);
873 			mutex_enter(&ill->ill_lock);
874 		} else {
875 			mrec_t *rp;
876 			mcast_record_t rtype;
877 			/*
878 			 * The possible state changes we need to handle here:
879 			 *	Old State   New State	Report
880 			 *
881 			 *	INCLUDE(0)  INCLUDE(X)	ALLOW(X),BLOCK(0)
882 			 *	INCLUDE(0)  EXCLUDE(X)	TO_EX(X)
883 			 *
884 			 * No need to send the BLOCK(0) report; ALLOW(X)
885 			 * is enough
886 			 */
887 			rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ?
888 			    ALLOW_NEW_SOURCES : CHANGE_TO_EXCLUDE;
889 			rp = mcast_bldmrec(rtype, &ilm->ilm_v6addr,
890 			    ilm->ilm_filter, NULL);
891 			mutex_exit(&ill->ill_lock);
892 			mldv2_sendrpt(ill, rp);
893 			mutex_enter(&ill->ill_lock);
894 			/*
895 			 * Set up retransmission state.  Timer is set below,
896 			 * for both v2 and v1.
897 			 */
898 			mcast_init_rtx(ill, &ilm->ilm_rtx, rtype,
899 			    ilm->ilm_filter);
900 		}
901 
902 		/* Set the ilm timer value */
903 		ASSERT(ill->ill_mcast_type != MLD_V2_ROUTER ||
904 		    ilm->ilm_rtx.rtx_cnt > 0);
905 
906 		ilm->ilm_rtx.rtx_cnt = ill->ill_mcast_rv;
907 		MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer,
908 		    SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY));
909 		timer = ilm->ilm_rtx.rtx_timer;
910 		ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME;
911 		ilm->ilm_state = IGMP_IREPORTEDLAST;
912 		mutex_exit(&ill->ill_lock);
913 
914 		/*
915 		 * Signal another thread to restart the timers.  See the
916 		 * comment in igmp_joingroup() for details.
917 		 */
918 		mutex_enter(&ipst->ips_mld_timer_lock);
919 		ipst->ips_mld_deferred_next = MIN(timer,
920 		    ipst->ips_mld_deferred_next);
921 		mutex_exit(&ipst->ips_mld_timer_lock);
922 		mcast_signal_restart_thread(ipst);
923 	}
924 
925 	if (ip_debug > 1) {
926 		(void) mi_strlog(ilm->ilm_ill->ill_rq, 1, SL_TRACE,
927 		    "mld_joingroup: multicast_type %d timer %d",
928 		    (ilm->ilm_ill->ill_mcast_type),
929 		    (int)ntohl(timer));
930 	}
931 }
932 
933 void
934 igmp_leavegroup(ilm_t *ilm)
935 {
936 	ill_t *ill = ilm->ilm_ipif->ipif_ill;
937 
938 	ASSERT(ilm->ilm_ill == NULL);
939 	ASSERT(!ill->ill_isv6);
940 
941 	mutex_enter(&ill->ill_lock);
942 	if (ilm->ilm_state == IGMP_IREPORTEDLAST &&
943 	    ill->ill_mcast_type == IGMP_V2_ROUTER &&
944 	    (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP))) {
945 		mutex_exit(&ill->ill_lock);
946 		igmp_sendpkt(ilm, IGMP_V2_LEAVE_GROUP,
947 		    (htonl(INADDR_ALLRTRS_GROUP)));
948 		return;
949 	} else if ((ill->ill_mcast_type == IGMP_V3_ROUTER) &&
950 	    (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP))) {
951 		mrec_t *rp;
952 		/*
953 		 * The possible state changes we need to handle here:
954 		 *	Old State	New State	Report
955 		 *
956 		 *	INCLUDE(X)	INCLUDE(0)	ALLOW(0),BLOCK(X)
957 		 *	EXCLUDE(X)	INCLUDE(0)	TO_IN(0)
958 		 *
959 		 * No need to send the ALLOW(0) report; BLOCK(X) is enough
960 		 */
961 		if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
962 			rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr,
963 			    ilm->ilm_filter, NULL);
964 		} else {
965 			rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr,
966 			    NULL, NULL);
967 		}
968 		mutex_exit(&ill->ill_lock);
969 		igmpv3_sendrpt(ilm->ilm_ipif, rp);
970 		return;
971 	}
972 	mutex_exit(&ill->ill_lock);
973 }
974 
975 void
976 mld_leavegroup(ilm_t *ilm)
977 {
978 	ill_t *ill = ilm->ilm_ill;
979 
980 	ASSERT(ilm->ilm_ipif == NULL);
981 	ASSERT(ill->ill_isv6);
982 
983 	mutex_enter(&ill->ill_lock);
984 	if (ilm->ilm_state == IGMP_IREPORTEDLAST &&
985 	    ill->ill_mcast_type == MLD_V1_ROUTER &&
986 	    (!IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr))) {
987 		mutex_exit(&ill->ill_lock);
988 		mld_sendpkt(ilm, MLD_LISTENER_REDUCTION, &ipv6_all_rtrs_mcast);
989 		return;
990 	} else if ((ill->ill_mcast_type == MLD_V2_ROUTER) &&
991 	    (!IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr))) {
992 		mrec_t *rp;
993 		/*
994 		 * The possible state changes we need to handle here:
995 		 *	Old State	New State	Report
996 		 *
997 		 *	INCLUDE(X)	INCLUDE(0)	ALLOW(0),BLOCK(X)
998 		 *	EXCLUDE(X)	INCLUDE(0)	TO_IN(0)
999 		 *
1000 		 * No need to send the ALLOW(0) report; BLOCK(X) is enough
1001 		 */
1002 		if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
1003 			rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr,
1004 			    ilm->ilm_filter, NULL);
1005 		} else {
1006 			rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr,
1007 			    NULL, NULL);
1008 		}
1009 		mutex_exit(&ill->ill_lock);
1010 		mldv2_sendrpt(ill, rp);
1011 		return;
1012 	}
1013 	mutex_exit(&ill->ill_lock);
1014 }
1015 
1016 void
1017 igmp_statechange(ilm_t *ilm, mcast_record_t fmode, slist_t *flist)
1018 {
1019 	ill_t *ill;
1020 	mrec_t *rp;
1021 	ip_stack_t	*ipst = ilm->ilm_ipst;
1022 
1023 	ASSERT(ilm != NULL);
1024 
1025 	/* state change reports should only be sent if the router is v3 */
1026 	if (ilm->ilm_ipif->ipif_ill->ill_mcast_type != IGMP_V3_ROUTER)
1027 		return;
1028 
1029 	if (ilm->ilm_ill == NULL) {
1030 		ASSERT(ilm->ilm_ipif != NULL);
1031 		ill = ilm->ilm_ipif->ipif_ill;
1032 	} else {
1033 		ill = ilm->ilm_ill;
1034 	}
1035 
1036 	mutex_enter(&ill->ill_lock);
1037 
1038 	/*
1039 	 * Compare existing(old) state with the new state and prepare
1040 	 * State Change Report, according to the rules in RFC 3376:
1041 	 *
1042 	 *	Old State	New State	State Change Report
1043 	 *
1044 	 *	INCLUDE(A)	INCLUDE(B)	ALLOW(B-A),BLOCK(A-B)
1045 	 *	EXCLUDE(A)	EXCLUDE(B)	ALLOW(A-B),BLOCK(B-A)
1046 	 *	INCLUDE(A)	EXCLUDE(B)	TO_EX(B)
1047 	 *	EXCLUDE(A)	INCLUDE(B)	TO_IN(B)
1048 	 */
1049 
1050 	if (ilm->ilm_fmode == fmode) {
1051 		slist_t	*a_minus_b = NULL, *b_minus_a = NULL;
1052 		slist_t *allow, *block;
1053 		if (((a_minus_b = l_alloc()) == NULL) ||
1054 		    ((b_minus_a = l_alloc()) == NULL)) {
1055 			l_free(a_minus_b);
1056 			if (ilm->ilm_fmode == MODE_IS_INCLUDE)
1057 				goto send_to_ex;
1058 			else
1059 				goto send_to_in;
1060 		}
1061 		l_difference(ilm->ilm_filter, flist, a_minus_b);
1062 		l_difference(flist, ilm->ilm_filter, b_minus_a);
1063 		if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
1064 			allow = b_minus_a;
1065 			block = a_minus_b;
1066 		} else {
1067 			allow = a_minus_b;
1068 			block = b_minus_a;
1069 		}
1070 		rp = NULL;
1071 		if (!SLIST_IS_EMPTY(allow))
1072 			rp = mcast_bldmrec(ALLOW_NEW_SOURCES, &ilm->ilm_v6addr,
1073 			    allow, rp);
1074 		if (!SLIST_IS_EMPTY(block))
1075 			rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr,
1076 			    block, rp);
1077 		l_free(a_minus_b);
1078 		l_free(b_minus_a);
1079 	} else if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
1080 send_to_ex:
1081 		rp = mcast_bldmrec(CHANGE_TO_EXCLUDE, &ilm->ilm_v6addr, flist,
1082 		    NULL);
1083 	} else {
1084 send_to_in:
1085 		rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, flist,
1086 		    NULL);
1087 	}
1088 
1089 	/*
1090 	 * Need to set up retransmission state; merge the new info with the
1091 	 * current state (which may be null).  If the timer is not currently
1092 	 * running, signal a thread to restart it -- see the comment in
1093 	 * igmp_joingroup() for details.
1094 	 */
1095 	rp = mcast_merge_rtx(ilm, rp, flist);
1096 	if (ilm->ilm_rtx.rtx_timer == INFINITY) {
1097 		ilm->ilm_rtx.rtx_cnt = ill->ill_mcast_rv;
1098 		MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer,
1099 		    SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY));
1100 		mutex_enter(&ipst->ips_igmp_timer_lock);
1101 		ipst->ips_igmp_deferred_next = MIN(ipst->ips_igmp_deferred_next,
1102 		    ilm->ilm_rtx.rtx_timer);
1103 		ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME;
1104 		mutex_exit(&ipst->ips_igmp_timer_lock);
1105 		mcast_signal_restart_thread(ipst);
1106 	}
1107 
1108 	mutex_exit(&ill->ill_lock);
1109 	igmpv3_sendrpt(ilm->ilm_ipif, rp);
1110 }
1111 
1112 void
1113 mld_statechange(ilm_t *ilm, mcast_record_t fmode, slist_t *flist)
1114 {
1115 	ill_t *ill;
1116 	mrec_t *rp = NULL;
1117 	ip_stack_t	*ipst = ilm->ilm_ipst;
1118 
1119 	ASSERT(ilm != NULL);
1120 
1121 	ill = ilm->ilm_ill;
1122 
1123 	/* only need to send if we have an mldv2-capable router */
1124 	mutex_enter(&ill->ill_lock);
1125 	if (ill->ill_mcast_type != MLD_V2_ROUTER) {
1126 		mutex_exit(&ill->ill_lock);
1127 		return;
1128 	}
1129 
1130 	/*
1131 	 * Compare existing (old) state with the new state passed in
1132 	 * and send appropriate MLDv2 State Change Report.
1133 	 *
1134 	 *	Old State	New State	State Change Report
1135 	 *
1136 	 *	INCLUDE(A)	INCLUDE(B)	ALLOW(B-A),BLOCK(A-B)
1137 	 *	EXCLUDE(A)	EXCLUDE(B)	ALLOW(A-B),BLOCK(B-A)
1138 	 *	INCLUDE(A)	EXCLUDE(B)	TO_EX(B)
1139 	 *	EXCLUDE(A)	INCLUDE(B)	TO_IN(B)
1140 	 */
1141 	if (ilm->ilm_fmode == fmode) {
1142 		slist_t	*a_minus_b = NULL, *b_minus_a = NULL;
1143 		slist_t *allow, *block;
1144 		if (((a_minus_b = l_alloc()) == NULL) ||
1145 		    ((b_minus_a = l_alloc()) == NULL)) {
1146 			l_free(a_minus_b);
1147 			if (ilm->ilm_fmode == MODE_IS_INCLUDE)
1148 				goto send_to_ex;
1149 			else
1150 				goto send_to_in;
1151 		}
1152 		l_difference(ilm->ilm_filter, flist, a_minus_b);
1153 		l_difference(flist, ilm->ilm_filter, b_minus_a);
1154 		if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
1155 			allow = b_minus_a;
1156 			block = a_minus_b;
1157 		} else {
1158 			allow = a_minus_b;
1159 			block = b_minus_a;
1160 		}
1161 		if (!SLIST_IS_EMPTY(allow))
1162 			rp = mcast_bldmrec(ALLOW_NEW_SOURCES, &ilm->ilm_v6addr,
1163 			    allow, rp);
1164 		if (!SLIST_IS_EMPTY(block))
1165 			rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr,
1166 			    block, rp);
1167 		l_free(a_minus_b);
1168 		l_free(b_minus_a);
1169 	} else if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
1170 send_to_ex:
1171 		rp = mcast_bldmrec(CHANGE_TO_EXCLUDE, &ilm->ilm_v6addr, flist,
1172 		    NULL);
1173 	} else {
1174 send_to_in:
1175 		rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, flist,
1176 		    NULL);
1177 	}
1178 
1179 	/*
1180 	 * Need to set up retransmission state; merge the new info with the
1181 	 * current state (which may be null).  If the timer is not currently
1182 	 * running, signal a thread to restart it -- see the comment in
1183 	 * igmp_joingroup() for details.
1184 	 */
1185 	rp = mcast_merge_rtx(ilm, rp, flist);
1186 	ASSERT(ilm->ilm_rtx.rtx_cnt > 0);
1187 	if (ilm->ilm_rtx.rtx_timer == INFINITY) {
1188 		ilm->ilm_rtx.rtx_cnt = ill->ill_mcast_rv;
1189 		MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer,
1190 		    SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY));
1191 		mutex_enter(&ipst->ips_mld_timer_lock);
1192 		ipst->ips_mld_deferred_next =
1193 		    MIN(ipst->ips_mld_deferred_next, ilm->ilm_rtx.rtx_timer);
1194 		ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME;
1195 		mutex_exit(&ipst->ips_mld_timer_lock);
1196 		mcast_signal_restart_thread(ipst);
1197 	}
1198 
1199 	mutex_exit(&ill->ill_lock);
1200 	mldv2_sendrpt(ill, rp);
1201 }
1202 
1203 uint_t
1204 igmp_timeout_handler_per_ill(ill_t *ill)
1205 {
1206 	uint_t	next = INFINITY, current;
1207 	ilm_t	*ilm;
1208 	ipif_t	*ipif;
1209 	mrec_t	*rp = NULL;
1210 	mrec_t	*rtxrp = NULL;
1211 	rtx_state_t *rtxp;
1212 	mcast_record_t	rtype;
1213 
1214 	ASSERT(IAM_WRITER_ILL(ill));
1215 
1216 	mutex_enter(&ill->ill_lock);
1217 
1218 	current = CURRENT_MSTIME;
1219 	/* First check the global timer on this interface */
1220 	if (ill->ill_global_timer == INFINITY)
1221 		goto per_ilm_timer;
1222 	if (ill->ill_global_timer <= (current + CURRENT_OFFSET)) {
1223 		ill->ill_global_timer = INFINITY;
1224 		/*
1225 		 * Send report for each group on this interface.
1226 		 * Since we just set the global timer (received a v3 general
1227 		 * query), need to skip the all hosts addr (224.0.0.1), per
1228 		 * RFC 3376 section 5.
1229 		 */
1230 		for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
1231 			if (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP))
1232 				continue;
1233 			ASSERT(ilm->ilm_ipif != NULL);
1234 			ilm->ilm_ipif->ipif_igmp_rpt =
1235 			    mcast_bldmrec(ilm->ilm_fmode, &ilm->ilm_v6addr,
1236 			    ilm->ilm_filter, ilm->ilm_ipif->ipif_igmp_rpt);
1237 			/*
1238 			 * Since we're sending a report on this group, okay
1239 			 * to delete pending group-specific timers.  Note
1240 			 * that group-specific retransmit timers still need
1241 			 * to be checked in the per_ilm_timer for-loop.
1242 			 */
1243 			ilm->ilm_timer = INFINITY;
1244 			ilm->ilm_state = IGMP_IREPORTEDLAST;
1245 			FREE_SLIST(ilm->ilm_pendsrcs);
1246 			ilm->ilm_pendsrcs = NULL;
1247 		}
1248 		/*
1249 		 * We've built per-ipif mrec lists; walk the ill's ipif list
1250 		 * and send a report for each ipif that has an mrec list.
1251 		 */
1252 		for (ipif = ill->ill_ipif; ipif != NULL;
1253 		    ipif = ipif->ipif_next) {
1254 			if (ipif->ipif_igmp_rpt == NULL)
1255 				continue;
1256 			mutex_exit(&ill->ill_lock);
1257 			igmpv3_sendrpt(ipif, ipif->ipif_igmp_rpt);
1258 			mutex_enter(&ill->ill_lock);
1259 			/* mrec list was freed by igmpv3_sendrpt() */
1260 			ipif->ipif_igmp_rpt = NULL;
1261 		}
1262 	} else {
1263 		if ((ill->ill_global_timer - current) < next)
1264 			next = ill->ill_global_timer - current;
1265 	}
1266 
1267 per_ilm_timer:
1268 	for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
1269 		if (ilm->ilm_timer == INFINITY)
1270 			goto per_ilm_rtxtimer;
1271 
1272 		if (ilm->ilm_timer > (current + CURRENT_OFFSET)) {
1273 			if ((ilm->ilm_timer - current) < next)
1274 				next = ilm->ilm_timer - current;
1275 
1276 			if (ip_debug > 1) {
1277 				(void) mi_strlog(ill->ill_rq, 1, SL_TRACE,
1278 				    "igmp_timo_hlr 2: ilm_timr %d "
1279 				    "typ %d nxt %d",
1280 				    (int)ntohl(ilm->ilm_timer - current),
1281 				    (ill->ill_mcast_type), next);
1282 			}
1283 
1284 			goto per_ilm_rtxtimer;
1285 		}
1286 
1287 		/* the timer has expired, need to take action */
1288 		ilm->ilm_timer = INFINITY;
1289 		ilm->ilm_state = IGMP_IREPORTEDLAST;
1290 		if (ill->ill_mcast_type == IGMP_V1_ROUTER) {
1291 			mutex_exit(&ill->ill_lock);
1292 			igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0);
1293 			mutex_enter(&ill->ill_lock);
1294 		} else if (ill->ill_mcast_type == IGMP_V2_ROUTER) {
1295 			mutex_exit(&ill->ill_lock);
1296 			igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0);
1297 			mutex_enter(&ill->ill_lock);
1298 		} else {
1299 			slist_t *rsp;
1300 			if (!SLIST_IS_EMPTY(ilm->ilm_pendsrcs) &&
1301 			    (rsp = l_alloc()) != NULL) {
1302 				/*
1303 				 * Contents of reply depend on pending
1304 				 * requested source list.
1305 				 */
1306 				if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
1307 					l_intersection(ilm->ilm_filter,
1308 					    ilm->ilm_pendsrcs, rsp);
1309 				} else {
1310 					l_difference(ilm->ilm_pendsrcs,
1311 					    ilm->ilm_filter, rsp);
1312 				}
1313 				FREE_SLIST(ilm->ilm_pendsrcs);
1314 				ilm->ilm_pendsrcs = NULL;
1315 				if (!SLIST_IS_EMPTY(rsp))
1316 					rp = mcast_bldmrec(MODE_IS_INCLUDE,
1317 					    &ilm->ilm_v6addr, rsp, rp);
1318 				FREE_SLIST(rsp);
1319 			} else {
1320 				/*
1321 				 * Either the pending request is just group-
1322 				 * specific, or we couldn't get the resources
1323 				 * (rsp) to build a source-specific reply.
1324 				 */
1325 				rp = mcast_bldmrec(ilm->ilm_fmode,
1326 				    &ilm->ilm_v6addr, ilm->ilm_filter, rp);
1327 			}
1328 			mutex_exit(&ill->ill_lock);
1329 			igmpv3_sendrpt(ill->ill_ipif, rp);
1330 			mutex_enter(&ill->ill_lock);
1331 			rp = NULL;
1332 		}
1333 
1334 per_ilm_rtxtimer:
1335 		rtxp = &ilm->ilm_rtx;
1336 
1337 		if (rtxp->rtx_timer == INFINITY)
1338 			continue;
1339 		if (rtxp->rtx_timer > (current + CURRENT_OFFSET)) {
1340 			if ((rtxp->rtx_timer - current) < next)
1341 				next = rtxp->rtx_timer - current;
1342 			continue;
1343 		}
1344 
1345 		rtxp->rtx_timer = INFINITY;
1346 		ilm->ilm_state = IGMP_IREPORTEDLAST;
1347 		if (ill->ill_mcast_type == IGMP_V1_ROUTER) {
1348 			mutex_exit(&ill->ill_lock);
1349 			igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0);
1350 			mutex_enter(&ill->ill_lock);
1351 			continue;
1352 		} else if (ill->ill_mcast_type == IGMP_V2_ROUTER) {
1353 			mutex_exit(&ill->ill_lock);
1354 			igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0);
1355 			mutex_enter(&ill->ill_lock);
1356 			continue;
1357 		}
1358 
1359 		/*
1360 		 * The retransmit timer has popped, and our router is
1361 		 * IGMPv3.  We have to delve into the retransmit state
1362 		 * stored in the ilm.
1363 		 *
1364 		 * Decrement the retransmit count.  If the fmode rtx
1365 		 * count is active, decrement it, and send a filter
1366 		 * mode change report with the ilm's source list.
1367 		 * Otherwise, send a source list change report with
1368 		 * the current retransmit lists.
1369 		 */
1370 		ASSERT(rtxp->rtx_cnt > 0);
1371 		ASSERT(rtxp->rtx_cnt >= rtxp->rtx_fmode_cnt);
1372 		rtxp->rtx_cnt--;
1373 		if (rtxp->rtx_fmode_cnt > 0) {
1374 			rtxp->rtx_fmode_cnt--;
1375 			rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ?
1376 			    CHANGE_TO_INCLUDE : CHANGE_TO_EXCLUDE;
1377 			rtxrp = mcast_bldmrec(rtype, &ilm->ilm_v6addr,
1378 			    ilm->ilm_filter, rtxrp);
1379 		} else {
1380 			rtxrp = mcast_bldmrec(ALLOW_NEW_SOURCES,
1381 			    &ilm->ilm_v6addr, rtxp->rtx_allow, rtxrp);
1382 			rtxrp = mcast_bldmrec(BLOCK_OLD_SOURCES,
1383 			    &ilm->ilm_v6addr, rtxp->rtx_block, rtxrp);
1384 		}
1385 		if (rtxp->rtx_cnt > 0) {
1386 			MCAST_RANDOM_DELAY(rtxp->rtx_timer,
1387 			    SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY));
1388 			if (rtxp->rtx_timer < next)
1389 				next = rtxp->rtx_timer;
1390 			rtxp->rtx_timer += current;
1391 		} else {
1392 			ASSERT(rtxp->rtx_timer == INFINITY);
1393 			CLEAR_SLIST(rtxp->rtx_allow);
1394 			CLEAR_SLIST(rtxp->rtx_block);
1395 		}
1396 		mutex_exit(&ill->ill_lock);
1397 		igmpv3_sendrpt(ilm->ilm_ipif, rtxrp);
1398 		mutex_enter(&ill->ill_lock);
1399 		rtxrp = NULL;
1400 	}
1401 
1402 	mutex_exit(&ill->ill_lock);
1403 
1404 	return (next);
1405 }
1406 
1407 /*
1408  * igmp_timeout_handler:
1409  * Called when there are timeout events, every next * TMEOUT_INTERVAL (tick).
1410  * Returns number of ticks to next event (or 0 if none).
1411  *
1412  * As part of multicast join and leave igmp we may need to send out an
1413  * igmp request. The igmp related state variables in the ilm are protected
1414  * by ill_lock. A single global igmp timer is used to track igmp timeouts.
1415  * igmp_timer_lock protects the global igmp_timeout_id. igmp_start_timers
1416  * starts the igmp timer if needed. It serializes multiple threads trying to
1417  * simultaneously start the timer using the igmp_timer_setter_active flag.
1418  *
1419  * igmp_input() receives igmp queries and responds to the queries
1420  * in a delayed fashion by posting a timer i.e. it calls igmp_start_timers().
1421  * Later the igmp_timer fires, the timeout handler igmp_timeout_handler()
1422  * performs the action exclusively after entering each ill's ipsq as writer.
1423  * (The need to enter the IPSQ is largely historical but there are still some
1424  * fields like ilm_filter that rely on it.)
1425  *
1426  * The igmp_slowtimeo() function is called thru another timer.
1427  * igmp_slowtimeout_lock protects the igmp_slowtimeout_id
1428  */
1429 void
1430 igmp_timeout_handler(void *arg)
1431 {
1432 	ill_t	*ill;
1433 	uint_t  global_next = INFINITY;
1434 	uint_t  next;
1435 	ill_walk_context_t ctx;
1436 	boolean_t success;
1437 	ip_stack_t *ipst = arg;
1438 
1439 	ASSERT(arg != NULL);
1440 	mutex_enter(&ipst->ips_igmp_timer_lock);
1441 	ASSERT(ipst->ips_igmp_timeout_id != 0);
1442 	ipst->ips_igmp_timer_scheduled_last = 0;
1443 	ipst->ips_igmp_time_to_next = 0;
1444 	mutex_exit(&ipst->ips_igmp_timer_lock);
1445 
1446 	rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1447 	ill = ILL_START_WALK_V4(&ctx, ipst);
1448 	for (; ill != NULL; ill = ill_next(&ctx, ill)) {
1449 		ASSERT(!ill->ill_isv6);
1450 		/*
1451 		 * We may not be able to refhold the ill if the ill/ipif
1452 		 * is changing. But we need to make sure that the ill will
1453 		 * not vanish. So we just bump up the ill_waiter count.
1454 		 */
1455 		if (!ill_waiter_inc(ill))
1456 			continue;
1457 		rw_exit(&ipst->ips_ill_g_lock);
1458 		success = ipsq_enter(ill, B_TRUE, NEW_OP);
1459 		if (success) {
1460 			next = igmp_timeout_handler_per_ill(ill);
1461 			if (next < global_next)
1462 				global_next = next;
1463 			ipsq_exit(ill->ill_phyint->phyint_ipsq);
1464 		}
1465 		rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1466 		ill_waiter_dcr(ill);
1467 	}
1468 	rw_exit(&ipst->ips_ill_g_lock);
1469 
1470 	mutex_enter(&ipst->ips_igmp_timer_lock);
1471 	ASSERT(ipst->ips_igmp_timeout_id != 0);
1472 	ipst->ips_igmp_timeout_id = 0;
1473 	mutex_exit(&ipst->ips_igmp_timer_lock);
1474 
1475 	if (global_next != INFINITY)
1476 		igmp_start_timers(global_next, ipst);
1477 }
1478 
1479 /*
1480  * mld_timeout_handler:
1481  * Called when there are timeout events, every next (tick).
1482  * Returns number of ticks to next event (or 0 if none).
1483  */
1484 /* ARGSUSED */
1485 uint_t
1486 mld_timeout_handler_per_ill(ill_t *ill)
1487 {
1488 	ilm_t 	*ilm;
1489 	uint_t	next = INFINITY, current;
1490 	mrec_t	*rp, *rtxrp;
1491 	rtx_state_t *rtxp;
1492 	mcast_record_t	rtype;
1493 
1494 	ASSERT(IAM_WRITER_ILL(ill));
1495 
1496 	mutex_enter(&ill->ill_lock);
1497 
1498 	current = CURRENT_MSTIME;
1499 	/*
1500 	 * First check the global timer on this interface; the global timer
1501 	 * is not used for MLDv1, so if it's set we can assume we're v2.
1502 	 */
1503 	if (ill->ill_global_timer == INFINITY)
1504 		goto per_ilm_timer;
1505 	if (ill->ill_global_timer <= (current + CURRENT_OFFSET)) {
1506 		ill->ill_global_timer = INFINITY;
1507 		/*
1508 		 * Send report for each group on this interface.
1509 		 * Since we just set the global timer (received a v2 general
1510 		 * query), need to skip the all hosts addr (ff02::1), per
1511 		 * RFC 3810 section 6.
1512 		 */
1513 		rp = NULL;
1514 		for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
1515 			if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr,
1516 			    &ipv6_all_hosts_mcast))
1517 				continue;
1518 			rp = mcast_bldmrec(ilm->ilm_fmode, &ilm->ilm_v6addr,
1519 			    ilm->ilm_filter, rp);
1520 			/*
1521 			 * Since we're sending a report on this group, okay
1522 			 * to delete pending group-specific timers.  Note
1523 			 * that group-specific retransmit timers still need
1524 			 * to be checked in the per_ilm_timer for-loop.
1525 			 */
1526 			ilm->ilm_timer = INFINITY;
1527 			ilm->ilm_state = IGMP_IREPORTEDLAST;
1528 			FREE_SLIST(ilm->ilm_pendsrcs);
1529 			ilm->ilm_pendsrcs = NULL;
1530 		}
1531 		mutex_exit(&ill->ill_lock);
1532 		mldv2_sendrpt(ill, rp);
1533 		mutex_enter(&ill->ill_lock);
1534 	} else {
1535 		if ((ill->ill_global_timer - current) < next)
1536 			next = ill->ill_global_timer - current;
1537 	}
1538 
1539 per_ilm_timer:
1540 	rp = rtxrp = NULL;
1541 	for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
1542 		if (ilm->ilm_timer == INFINITY)
1543 			goto per_ilm_rtxtimer;
1544 
1545 		if (ilm->ilm_timer > (current + CURRENT_OFFSET)) {
1546 			if ((ilm->ilm_timer - current) < next)
1547 				next = ilm->ilm_timer - current;
1548 
1549 			if (ip_debug > 1) {
1550 				(void) mi_strlog(ill->ill_rq, 1, SL_TRACE,
1551 				    "igmp_timo_hlr 2: ilm_timr"
1552 				    " %d typ %d nxt %d",
1553 				    (int)ntohl(ilm->ilm_timer - current),
1554 				    (ill->ill_mcast_type), next);
1555 			}
1556 
1557 			goto per_ilm_rtxtimer;
1558 		}
1559 
1560 		/* the timer has expired, need to take action */
1561 		ilm->ilm_timer = INFINITY;
1562 		ilm->ilm_state = IGMP_IREPORTEDLAST;
1563 		if (ill->ill_mcast_type == MLD_V1_ROUTER) {
1564 			mutex_exit(&ill->ill_lock);
1565 			mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL);
1566 			mutex_enter(&ill->ill_lock);
1567 		} else {
1568 			slist_t *rsp;
1569 			if (!SLIST_IS_EMPTY(ilm->ilm_pendsrcs) &&
1570 			    (rsp = l_alloc()) != NULL) {
1571 				/*
1572 				 * Contents of reply depend on pending
1573 				 * requested source list.
1574 				 */
1575 				if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
1576 					l_intersection(ilm->ilm_filter,
1577 					    ilm->ilm_pendsrcs, rsp);
1578 				} else {
1579 					l_difference(ilm->ilm_pendsrcs,
1580 					    ilm->ilm_filter, rsp);
1581 				}
1582 				FREE_SLIST(ilm->ilm_pendsrcs);
1583 				ilm->ilm_pendsrcs = NULL;
1584 				if (!SLIST_IS_EMPTY(rsp))
1585 					rp = mcast_bldmrec(MODE_IS_INCLUDE,
1586 					    &ilm->ilm_v6addr, rsp, rp);
1587 				FREE_SLIST(rsp);
1588 			} else {
1589 				rp = mcast_bldmrec(ilm->ilm_fmode,
1590 				    &ilm->ilm_v6addr, ilm->ilm_filter, rp);
1591 			}
1592 		}
1593 
1594 per_ilm_rtxtimer:
1595 		rtxp = &ilm->ilm_rtx;
1596 
1597 		if (rtxp->rtx_timer == INFINITY)
1598 			continue;
1599 		if (rtxp->rtx_timer > (current + CURRENT_OFFSET)) {
1600 			if ((rtxp->rtx_timer - current) < next)
1601 				next = rtxp->rtx_timer - current;
1602 			continue;
1603 		}
1604 
1605 		rtxp->rtx_timer = INFINITY;
1606 		ilm->ilm_state = IGMP_IREPORTEDLAST;
1607 		if (ill->ill_mcast_type == MLD_V1_ROUTER) {
1608 			mutex_exit(&ill->ill_lock);
1609 			mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL);
1610 			mutex_enter(&ill->ill_lock);
1611 			continue;
1612 		}
1613 
1614 		/*
1615 		 * The retransmit timer has popped, and our router is
1616 		 * MLDv2.  We have to delve into the retransmit state
1617 		 * stored in the ilm.
1618 		 *
1619 		 * Decrement the retransmit count.  If the fmode rtx
1620 		 * count is active, decrement it, and send a filter
1621 		 * mode change report with the ilm's source list.
1622 		 * Otherwise, send a source list change report with
1623 		 * the current retransmit lists.
1624 		 */
1625 		ASSERT(rtxp->rtx_cnt > 0);
1626 		ASSERT(rtxp->rtx_cnt >= rtxp->rtx_fmode_cnt);
1627 		rtxp->rtx_cnt--;
1628 		if (rtxp->rtx_fmode_cnt > 0) {
1629 			rtxp->rtx_fmode_cnt--;
1630 			rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ?
1631 			    CHANGE_TO_INCLUDE : CHANGE_TO_EXCLUDE;
1632 			rtxrp = mcast_bldmrec(rtype, &ilm->ilm_v6addr,
1633 			    ilm->ilm_filter, rtxrp);
1634 		} else {
1635 			rtxrp = mcast_bldmrec(ALLOW_NEW_SOURCES,
1636 			    &ilm->ilm_v6addr, rtxp->rtx_allow, rtxrp);
1637 			rtxrp = mcast_bldmrec(BLOCK_OLD_SOURCES,
1638 			    &ilm->ilm_v6addr, rtxp->rtx_block, rtxrp);
1639 		}
1640 		if (rtxp->rtx_cnt > 0) {
1641 			MCAST_RANDOM_DELAY(rtxp->rtx_timer,
1642 			    SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY));
1643 			if (rtxp->rtx_timer < next)
1644 				next = rtxp->rtx_timer;
1645 			rtxp->rtx_timer += current;
1646 		} else {
1647 			ASSERT(rtxp->rtx_timer == INFINITY);
1648 			CLEAR_SLIST(rtxp->rtx_allow);
1649 			CLEAR_SLIST(rtxp->rtx_block);
1650 		}
1651 	}
1652 
1653 	if (ill->ill_mcast_type == MLD_V2_ROUTER) {
1654 		mutex_exit(&ill->ill_lock);
1655 		mldv2_sendrpt(ill, rp);
1656 		mldv2_sendrpt(ill, rtxrp);
1657 		return (next);
1658 	}
1659 
1660 	mutex_exit(&ill->ill_lock);
1661 
1662 	return (next);
1663 }
1664 
1665 /*
1666  * mld_timeout_handler:
1667  * Called when there are timeout events, every next * TMEOUT_INTERVAL (tick).
1668  * Returns number of ticks to next event (or 0 if none).
1669  * MT issues are same as igmp_timeout_handler
1670  */
1671 void
1672 mld_timeout_handler(void *arg)
1673 {
1674 	ill_t	*ill;
1675 	uint_t  global_next = INFINITY;
1676 	uint_t  next;
1677 	ill_walk_context_t ctx;
1678 	boolean_t success;
1679 	ip_stack_t *ipst = arg;
1680 
1681 	ASSERT(arg != NULL);
1682 	mutex_enter(&ipst->ips_mld_timer_lock);
1683 	ASSERT(ipst->ips_mld_timeout_id != 0);
1684 	ipst->ips_mld_timer_scheduled_last = 0;
1685 	ipst->ips_mld_time_to_next = 0;
1686 	mutex_exit(&ipst->ips_mld_timer_lock);
1687 
1688 	rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1689 	ill = ILL_START_WALK_V6(&ctx, ipst);
1690 	for (; ill != NULL; ill = ill_next(&ctx, ill)) {
1691 		ASSERT(ill->ill_isv6);
1692 		/*
1693 		 * We may not be able to refhold the ill if the ill/ipif
1694 		 * is changing. But we need to make sure that the ill will
1695 		 * not vanish. So we just bump up the ill_waiter count.
1696 		 */
1697 		if (!ill_waiter_inc(ill))
1698 			continue;
1699 		rw_exit(&ipst->ips_ill_g_lock);
1700 		success = ipsq_enter(ill, B_TRUE, NEW_OP);
1701 		if (success) {
1702 			next = mld_timeout_handler_per_ill(ill);
1703 			if (next < global_next)
1704 				global_next = next;
1705 			ipsq_exit(ill->ill_phyint->phyint_ipsq);
1706 		}
1707 		rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1708 		ill_waiter_dcr(ill);
1709 	}
1710 	rw_exit(&ipst->ips_ill_g_lock);
1711 
1712 	mutex_enter(&ipst->ips_mld_timer_lock);
1713 	ASSERT(ipst->ips_mld_timeout_id != 0);
1714 	ipst->ips_mld_timeout_id = 0;
1715 	mutex_exit(&ipst->ips_mld_timer_lock);
1716 
1717 	if (global_next != INFINITY)
1718 		mld_start_timers(global_next, ipst);
1719 }
1720 
1721 /*
1722  * Calculate the Older Version Querier Present timeout value, in number
1723  * of slowtimo intervals, for the given ill.
1724  */
1725 #define	OVQP(ill) \
1726 	((1000 * (((ill)->ill_mcast_rv * (ill)->ill_mcast_qi) \
1727 	+ MCAST_QUERY_RESP_INTERVAL)) / MCAST_SLOWTIMO_INTERVAL)
1728 
1729 /*
1730  * igmp_slowtimo:
1731  * - Resets to new router if we didnt we hear from the router
1732  *   in IGMP_AGE_THRESHOLD seconds.
1733  * - Resets slowtimeout.
1734  * Check for ips_igmp_max_version ensures that we don't revert to a higher
1735  * IGMP version than configured.
1736  */
1737 void
1738 igmp_slowtimo(void *arg)
1739 {
1740 	ill_t	*ill;
1741 	ill_if_t *ifp;
1742 	avl_tree_t *avl_tree;
1743 	ip_stack_t *ipst = (ip_stack_t *)arg;
1744 
1745 	ASSERT(arg != NULL);
1746 	/* Hold the ill_g_lock so that we can safely walk the ill list */
1747 	rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1748 
1749 	/*
1750 	 * The ill_if_t list is circular, hence the odd loop parameters.
1751 	 *
1752 	 * We can't use the ILL_START_WALK and ill_next() wrappers for this
1753 	 * walk, as we need to check the illif_mcast_* fields in the ill_if_t
1754 	 * structure (allowing us to skip if none of the instances have timers
1755 	 * running).
1756 	 */
1757 	for (ifp = IP_V4_ILL_G_LIST(ipst);
1758 	    ifp != (ill_if_t *)&IP_V4_ILL_G_LIST(ipst);
1759 	    ifp = ifp->illif_next) {
1760 		/*
1761 		 * illif_mcast_v[12] are set using atomics. If an ill hears
1762 		 * a V1 or V2 query now and we miss seeing the count now,
1763 		 * we will see it the next time igmp_slowtimo is called.
1764 		 */
1765 		if (ifp->illif_mcast_v1 == 0 && ifp->illif_mcast_v2 == 0)
1766 			continue;
1767 
1768 		avl_tree = &ifp->illif_avl_by_ppa;
1769 		for (ill = avl_first(avl_tree); ill != NULL;
1770 		    ill = avl_walk(avl_tree, ill, AVL_AFTER)) {
1771 			mutex_enter(&ill->ill_lock);
1772 			if (ill->ill_mcast_v1_tset == 1)
1773 				ill->ill_mcast_v1_time++;
1774 			if (ill->ill_mcast_v2_tset == 1)
1775 				ill->ill_mcast_v2_time++;
1776 			if ((ill->ill_mcast_type == IGMP_V1_ROUTER) &&
1777 			    (ipst->ips_igmp_max_version >= IGMP_V2_ROUTER) &&
1778 			    (ill->ill_mcast_v1_time >= OVQP(ill))) {
1779 				if ((ill->ill_mcast_v2_tset > 0) ||
1780 				    (ipst->ips_igmp_max_version ==
1781 				    IGMP_V2_ROUTER)) {
1782 					ip1dbg(("V1 query timer "
1783 					    "expired on %s; switching "
1784 					    "mode to IGMP_V2\n",
1785 					    ill->ill_name));
1786 					ill->ill_mcast_type =
1787 					    IGMP_V2_ROUTER;
1788 				} else {
1789 					ip1dbg(("V1 query timer "
1790 					    "expired on %s; switching "
1791 					    "mode to IGMP_V3\n",
1792 					    ill->ill_name));
1793 					ill->ill_mcast_type =
1794 					    IGMP_V3_ROUTER;
1795 				}
1796 				ill->ill_mcast_v1_time = 0;
1797 				ill->ill_mcast_v1_tset = 0;
1798 				atomic_add_16(&ifp->illif_mcast_v1, -1);
1799 			}
1800 			if ((ill->ill_mcast_type == IGMP_V2_ROUTER) &&
1801 			    (ipst->ips_igmp_max_version >= IGMP_V3_ROUTER) &&
1802 			    (ill->ill_mcast_v2_time >= OVQP(ill))) {
1803 				ip1dbg(("V2 query timer expired on "
1804 				    "%s; switching mode to IGMP_V3\n",
1805 				    ill->ill_name));
1806 				ill->ill_mcast_type = IGMP_V3_ROUTER;
1807 				ill->ill_mcast_v2_time = 0;
1808 				ill->ill_mcast_v2_tset = 0;
1809 				atomic_add_16(&ifp->illif_mcast_v2, -1);
1810 			}
1811 			mutex_exit(&ill->ill_lock);
1812 		}
1813 	}
1814 	rw_exit(&ipst->ips_ill_g_lock);
1815 	mutex_enter(&ipst->ips_igmp_slowtimeout_lock);
1816 	ipst->ips_igmp_slowtimeout_id = timeout(igmp_slowtimo, (void *)ipst,
1817 	    MSEC_TO_TICK(MCAST_SLOWTIMO_INTERVAL));
1818 	mutex_exit(&ipst->ips_igmp_slowtimeout_lock);
1819 }
1820 
1821 /*
1822  * mld_slowtimo:
1823  * - Resets to newer version if we didn't hear from the older version router
1824  *   in MLD_AGE_THRESHOLD seconds.
1825  * - Restarts slowtimeout.
1826  * Check for ips_mld_max_version ensures that we don't revert to a higher
1827  * IGMP version than configured.
1828  */
1829 /* ARGSUSED */
1830 void
1831 mld_slowtimo(void *arg)
1832 {
1833 	ill_t *ill;
1834 	ill_if_t *ifp;
1835 	avl_tree_t *avl_tree;
1836 	ip_stack_t *ipst = (ip_stack_t *)arg;
1837 
1838 	ASSERT(arg != NULL);
1839 	/* See comments in igmp_slowtimo() above... */
1840 	rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1841 	for (ifp = IP_V6_ILL_G_LIST(ipst);
1842 	    ifp != (ill_if_t *)&IP_V6_ILL_G_LIST(ipst);
1843 	    ifp = ifp->illif_next) {
1844 		if (ifp->illif_mcast_v1 == 0)
1845 			continue;
1846 
1847 		avl_tree = &ifp->illif_avl_by_ppa;
1848 		for (ill = avl_first(avl_tree); ill != NULL;
1849 		    ill = avl_walk(avl_tree, ill, AVL_AFTER)) {
1850 			mutex_enter(&ill->ill_lock);
1851 			if (ill->ill_mcast_v1_tset == 1)
1852 				ill->ill_mcast_v1_time++;
1853 			if ((ill->ill_mcast_type == MLD_V1_ROUTER) &&
1854 			    (ipst->ips_mld_max_version >= MLD_V2_ROUTER) &&
1855 			    (ill->ill_mcast_v1_time >= OVQP(ill))) {
1856 				ip1dbg(("MLD query timer expired on"
1857 				    " %s; switching mode to MLD_V2\n",
1858 				    ill->ill_name));
1859 				ill->ill_mcast_type = MLD_V2_ROUTER;
1860 				ill->ill_mcast_v1_time = 0;
1861 				ill->ill_mcast_v1_tset = 0;
1862 				atomic_add_16(&ifp->illif_mcast_v1, -1);
1863 			}
1864 			mutex_exit(&ill->ill_lock);
1865 		}
1866 	}
1867 	rw_exit(&ipst->ips_ill_g_lock);
1868 	mutex_enter(&ipst->ips_mld_slowtimeout_lock);
1869 	ipst->ips_mld_slowtimeout_id = timeout(mld_slowtimo, (void *)ipst,
1870 	    MSEC_TO_TICK(MCAST_SLOWTIMO_INTERVAL));
1871 	mutex_exit(&ipst->ips_mld_slowtimeout_lock);
1872 }
1873 
1874 /*
1875  * igmp_sendpkt:
1876  * This will send to ip_wput like icmp_inbound.
1877  * Note that the lower ill (on which the membership is kept) is used
1878  * as an upper ill to pass in the multicast parameters.
1879  */
1880 static void
1881 igmp_sendpkt(ilm_t *ilm, uchar_t type, ipaddr_t addr)
1882 {
1883 	mblk_t	*mp;
1884 	igmpa_t	*igmpa;
1885 	uint8_t *rtralert;
1886 	ipha_t	*ipha;
1887 	int	hdrlen = sizeof (ipha_t) + RTRALERT_LEN;
1888 	size_t	size  = hdrlen + sizeof (igmpa_t);
1889 	ipif_t 	*ipif = ilm->ilm_ipif;
1890 	ill_t 	*ill  = ipif->ipif_ill;
1891 	mblk_t	*first_mp;
1892 	ipsec_out_t *io;
1893 	zoneid_t zoneid;
1894 	ip_stack_t *ipst = ill->ill_ipst;
1895 
1896 	/*
1897 	 * We need to make sure this packet goes out on an ipif. If
1898 	 * there is some global policy match in ip_wput_ire, we need
1899 	 * to get to the right interface after IPSEC processing.
1900 	 * To make sure this multicast packet goes out on the right
1901 	 * interface, we attach an ipsec_out and initialize ill_index
1902 	 * like we did in ip_wput. To make sure that this packet does
1903 	 * not get forwarded on other interfaces or looped back, we
1904 	 * set ipsec_out_dontroute to B_TRUE and ipsec_out_multicast_loop
1905 	 * to B_FALSE.
1906 	 */
1907 	first_mp = allocb(sizeof (ipsec_info_t), BPRI_HI);
1908 	if (first_mp == NULL)
1909 		return;
1910 
1911 	first_mp->b_datap->db_type = M_CTL;
1912 	first_mp->b_wptr += sizeof (ipsec_info_t);
1913 	bzero(first_mp->b_rptr, sizeof (ipsec_info_t));
1914 	/* ipsec_out_secure is B_FALSE now */
1915 	io = (ipsec_out_t *)first_mp->b_rptr;
1916 	io->ipsec_out_type = IPSEC_OUT;
1917 	io->ipsec_out_len = sizeof (ipsec_out_t);
1918 	io->ipsec_out_use_global_policy = B_TRUE;
1919 	io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex;
1920 	io->ipsec_out_multicast_loop = B_FALSE;
1921 	io->ipsec_out_dontroute = B_TRUE;
1922 	if ((zoneid = ilm->ilm_zoneid) == ALL_ZONES)
1923 		zoneid = GLOBAL_ZONEID;
1924 	io->ipsec_out_zoneid = zoneid;
1925 	io->ipsec_out_ns = ipst->ips_netstack;	/* No netstack_hold */
1926 
1927 	mp = allocb(size, BPRI_HI);
1928 	if (mp == NULL) {
1929 		freemsg(first_mp);
1930 		return;
1931 	}
1932 	mp->b_wptr = mp->b_rptr + size;
1933 	first_mp->b_cont = mp;
1934 
1935 	ipha = (ipha_t *)mp->b_rptr;
1936 	rtralert = (uint8_t *)&(ipha[1]);
1937 	igmpa = (igmpa_t *)&(rtralert[RTRALERT_LEN]);
1938 	igmpa->igmpa_type   = type;
1939 	igmpa->igmpa_code   = 0;
1940 	igmpa->igmpa_group  = ilm->ilm_addr;
1941 	igmpa->igmpa_cksum  = 0;
1942 	igmpa->igmpa_cksum  = IP_CSUM(mp, hdrlen, 0);
1943 
1944 	rtralert[0] = IPOPT_COPY | IPOPT_RTRALERT;
1945 	rtralert[1] = RTRALERT_LEN;
1946 	rtralert[2] = 0;
1947 	rtralert[3] = 0;
1948 
1949 	ipha->ipha_version_and_hdr_length = (IP_VERSION << 4)
1950 	    | (IP_SIMPLE_HDR_LENGTH_IN_WORDS + RTRALERT_LEN_IN_WORDS);
1951 	ipha->ipha_type_of_service 	= 0;
1952 	ipha->ipha_length = htons(size);
1953 	ipha->ipha_ident = 0;
1954 	ipha->ipha_fragment_offset_and_flags = 0;
1955 	ipha->ipha_ttl 		= IGMP_TTL;
1956 	ipha->ipha_protocol 	= IPPROTO_IGMP;
1957 	ipha->ipha_hdr_checksum 	= 0;
1958 	ipha->ipha_dst 		= addr ? addr : igmpa->igmpa_group;
1959 	ipha->ipha_src 		= ipif->ipif_src_addr;
1960 	/*
1961 	 * Request loopback of the report if we are acting as a multicast
1962 	 * router, so that the process-level routing demon can hear it.
1963 	 */
1964 	/*
1965 	 * This will run multiple times for the same group if there are members
1966 	 * on the same group for multiple ipif's on the same ill. The
1967 	 * igmp_input code will suppress this due to the loopback thus we
1968 	 * always loopback membership report.
1969 	 */
1970 	ASSERT(ill->ill_rq != NULL);
1971 	ip_multicast_loopback(ill->ill_rq, ill, first_mp, 0, ilm->ilm_zoneid);
1972 
1973 	ip_wput_multicast(ill->ill_wq, first_mp, ipif, zoneid);
1974 
1975 	++ipst->ips_igmpstat.igps_snd_reports;
1976 }
1977 
1978 /*
1979  * Sends an IGMP_V3_MEMBERSHIP_REPORT message out the ill associated
1980  * with the passed-in ipif.  The report will contain one group record
1981  * for each element of reclist.  If this causes packet length to
1982  * exceed ipif->ipif_ill->ill_max_frag, multiple reports are sent.
1983  * reclist is assumed to be made up of buffers allocated by mcast_bldmrec(),
1984  * and those buffers are freed here.
1985  */
1986 static void
1987 igmpv3_sendrpt(ipif_t *ipif, mrec_t *reclist)
1988 {
1989 	ipsec_out_t *io;
1990 	igmp3ra_t *igmp3ra;
1991 	grphdra_t *grphdr;
1992 	mblk_t *first_mp, *mp;
1993 	ipha_t *ipha;
1994 	uint8_t *rtralert;
1995 	ipaddr_t *src_array;
1996 	int i, j, numrec, more_src_cnt;
1997 	size_t hdrsize, size, rsize;
1998 	ill_t *ill = ipif->ipif_ill;
1999 	mrec_t *rp, *cur_reclist;
2000 	mrec_t *next_reclist = reclist;
2001 	boolean_t morepkts;
2002 	zoneid_t zoneid;
2003 	ip_stack_t	 *ipst = ill->ill_ipst;
2004 
2005 	ASSERT(IAM_WRITER_IPIF(ipif));
2006 
2007 	/* if there aren't any records, there's nothing to send */
2008 	if (reclist == NULL)
2009 		return;
2010 
2011 	hdrsize = sizeof (ipha_t) + RTRALERT_LEN;
2012 nextpkt:
2013 	size = hdrsize + sizeof (igmp3ra_t);
2014 	morepkts = B_FALSE;
2015 	more_src_cnt = 0;
2016 	cur_reclist = next_reclist;
2017 	numrec = 0;
2018 	for (rp = cur_reclist; rp != NULL; rp = rp->mrec_next) {
2019 		rsize = sizeof (grphdra_t) +
2020 		    (rp->mrec_srcs.sl_numsrc * sizeof (ipaddr_t));
2021 		if (size + rsize > ill->ill_max_frag) {
2022 			if (rp == cur_reclist) {
2023 				/*
2024 				 * If the first mrec we looked at is too big
2025 				 * to fit in a single packet (i.e the source
2026 				 * list is too big), we must either truncate
2027 				 * the list (if TO_EX or IS_EX), or send
2028 				 * multiple reports for the same group (all
2029 				 * other types).
2030 				 */
2031 				int srcspace, srcsperpkt;
2032 				srcspace = ill->ill_max_frag - (size +
2033 				    sizeof (grphdra_t));
2034 
2035 				/*
2036 				 * Skip if there's not even enough room in
2037 				 * a single packet to send something useful.
2038 				 */
2039 				if (srcspace <= sizeof (ipaddr_t))
2040 					continue;
2041 
2042 				srcsperpkt = srcspace / sizeof (ipaddr_t);
2043 				/*
2044 				 * Increment size and numrec, because we will
2045 				 * be sending a record for the mrec we're
2046 				 * looking at now.
2047 				 */
2048 				size += sizeof (grphdra_t) +
2049 				    (srcsperpkt * sizeof (ipaddr_t));
2050 				numrec++;
2051 				if (rp->mrec_type == MODE_IS_EXCLUDE ||
2052 				    rp->mrec_type == CHANGE_TO_EXCLUDE) {
2053 					rp->mrec_srcs.sl_numsrc = srcsperpkt;
2054 					if (rp->mrec_next == NULL) {
2055 						/* no more packets to send */
2056 						break;
2057 					} else {
2058 						/*
2059 						 * more packets, but we're
2060 						 * done with this mrec.
2061 						 */
2062 						next_reclist = rp->mrec_next;
2063 					}
2064 				} else {
2065 					more_src_cnt = rp->mrec_srcs.sl_numsrc
2066 					    - srcsperpkt;
2067 					rp->mrec_srcs.sl_numsrc = srcsperpkt;
2068 					/*
2069 					 * We'll fix up this mrec (remove the
2070 					 * srcs we've already sent) before
2071 					 * returning to nextpkt above.
2072 					 */
2073 					next_reclist = rp;
2074 				}
2075 			} else {
2076 				next_reclist = rp;
2077 			}
2078 			morepkts = B_TRUE;
2079 			break;
2080 		}
2081 		size += rsize;
2082 		numrec++;
2083 	}
2084 
2085 	/*
2086 	 * See comments in igmp_sendpkt() about initializing for ipsec and
2087 	 * load balancing requirements.
2088 	 */
2089 	first_mp = allocb(sizeof (ipsec_info_t), BPRI_HI);
2090 	if (first_mp == NULL)
2091 		goto free_reclist;
2092 
2093 	first_mp->b_datap->db_type = M_CTL;
2094 	first_mp->b_wptr += sizeof (ipsec_info_t);
2095 	bzero(first_mp->b_rptr, sizeof (ipsec_info_t));
2096 	/* ipsec_out_secure is B_FALSE now */
2097 	io = (ipsec_out_t *)first_mp->b_rptr;
2098 	io->ipsec_out_type = IPSEC_OUT;
2099 	io->ipsec_out_len = sizeof (ipsec_out_t);
2100 	io->ipsec_out_use_global_policy = B_TRUE;
2101 	io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex;
2102 	io->ipsec_out_multicast_loop = B_FALSE;
2103 	io->ipsec_out_dontroute = B_TRUE;
2104 	if ((zoneid = ipif->ipif_zoneid) == ALL_ZONES)
2105 		zoneid = GLOBAL_ZONEID;
2106 	io->ipsec_out_zoneid = zoneid;
2107 
2108 	mp = allocb(size, BPRI_HI);
2109 	if (mp == NULL) {
2110 		freemsg(first_mp);
2111 		goto free_reclist;
2112 	}
2113 	bzero((char *)mp->b_rptr, size);
2114 	mp->b_wptr = (uchar_t *)(mp->b_rptr + size);
2115 	first_mp->b_cont = mp;
2116 
2117 	ipha = (ipha_t *)mp->b_rptr;
2118 	rtralert = (uint8_t *)&(ipha[1]);
2119 	igmp3ra = (igmp3ra_t *)&(rtralert[RTRALERT_LEN]);
2120 	grphdr = (grphdra_t *)&(igmp3ra[1]);
2121 
2122 	rp = cur_reclist;
2123 	for (i = 0; i < numrec; i++) {
2124 		grphdr->grphdra_type = rp->mrec_type;
2125 		grphdr->grphdra_numsrc = htons(rp->mrec_srcs.sl_numsrc);
2126 		grphdr->grphdra_group = V4_PART_OF_V6(rp->mrec_group);
2127 		src_array = (ipaddr_t *)&(grphdr[1]);
2128 
2129 		for (j = 0; j < rp->mrec_srcs.sl_numsrc; j++)
2130 			src_array[j] = V4_PART_OF_V6(rp->mrec_srcs.sl_addr[j]);
2131 
2132 		grphdr = (grphdra_t *)&(src_array[j]);
2133 		rp = rp->mrec_next;
2134 	}
2135 
2136 	igmp3ra->igmp3ra_type = IGMP_V3_MEMBERSHIP_REPORT;
2137 	igmp3ra->igmp3ra_numrec = htons(numrec);
2138 	igmp3ra->igmp3ra_cksum = IP_CSUM(mp, hdrsize, 0);
2139 
2140 	rtralert[0] = IPOPT_COPY | IPOPT_RTRALERT;
2141 	rtralert[1] = RTRALERT_LEN;
2142 	rtralert[2] = 0;
2143 	rtralert[3] = 0;
2144 
2145 	ipha->ipha_version_and_hdr_length = IP_VERSION << 4
2146 	    | (IP_SIMPLE_HDR_LENGTH_IN_WORDS + RTRALERT_LEN_IN_WORDS);
2147 	ipha->ipha_type_of_service = IPTOS_PREC_INTERNETCONTROL;
2148 	ipha->ipha_length = htons(size);
2149 	ipha->ipha_ttl = IGMP_TTL;
2150 	ipha->ipha_protocol = IPPROTO_IGMP;
2151 	ipha->ipha_dst = htonl(INADDR_ALLRPTS_GROUP);
2152 	ipha->ipha_src = ipif->ipif_src_addr;
2153 
2154 	/*
2155 	 * Request loopback of the report if we are acting as a multicast
2156 	 * router, so that the process-level routing daemon can hear it.
2157 	 *
2158 	 * This will run multiple times for the same group if there are
2159 	 * members on the same group for multiple ipifs on the same ill.
2160 	 * The igmp_input code will suppress this due to the loopback;
2161 	 * thus we always loopback membership report.
2162 	 */
2163 	ASSERT(ill->ill_rq != NULL);
2164 	ip_multicast_loopback(ill->ill_rq, ill, mp, 0, ipif->ipif_zoneid);
2165 
2166 	ip_wput_multicast(ill->ill_wq, first_mp, ipif, zoneid);
2167 
2168 	++ipst->ips_igmpstat.igps_snd_reports;
2169 
2170 	if (morepkts) {
2171 		if (more_src_cnt > 0) {
2172 			int index, mvsize;
2173 			slist_t *sl = &next_reclist->mrec_srcs;
2174 			index = sl->sl_numsrc;
2175 			mvsize = more_src_cnt * sizeof (in6_addr_t);
2176 			(void) memmove(&sl->sl_addr[0], &sl->sl_addr[index],
2177 			    mvsize);
2178 			sl->sl_numsrc = more_src_cnt;
2179 		}
2180 		goto nextpkt;
2181 	}
2182 
2183 free_reclist:
2184 	while (reclist != NULL) {
2185 		rp = reclist->mrec_next;
2186 		mi_free(reclist);
2187 		reclist = rp;
2188 	}
2189 }
2190 
2191 /*
2192  * mld_input:
2193  */
2194 /* ARGSUSED */
2195 void
2196 mld_input(queue_t *q, mblk_t *mp, ill_t *ill)
2197 {
2198 	ip6_t		*ip6h = (ip6_t *)(mp->b_rptr);
2199 	mld_hdr_t	*mldh;
2200 	ilm_t		*ilm;
2201 	ipif_t		*ipif;
2202 	uint16_t	hdr_length, exthdr_length;
2203 	in6_addr_t	*v6group_ptr, *lcladdr_ptr;
2204 	uint_t		next;
2205 	int		mldlen;
2206 	ip_stack_t	*ipst = ill->ill_ipst;
2207 	ilm_walker_t	ilw;
2208 
2209 	BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembTotal);
2210 
2211 	/* Make sure the src address of the packet is link-local */
2212 	if (!(IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src))) {
2213 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
2214 		freemsg(mp);
2215 		return;
2216 	}
2217 
2218 	if (ip6h->ip6_hlim != 1) {
2219 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpBadHoplimit);
2220 		freemsg(mp);
2221 		return;
2222 	}
2223 
2224 	/* Get to the icmp header part */
2225 	if (ip6h->ip6_nxt != IPPROTO_ICMPV6) {
2226 		hdr_length = ip_hdr_length_v6(mp, ip6h);
2227 		exthdr_length = hdr_length - IPV6_HDR_LEN;
2228 	} else {
2229 		hdr_length = IPV6_HDR_LEN;
2230 		exthdr_length = 0;
2231 	}
2232 	mldlen = ntohs(ip6h->ip6_plen) - exthdr_length;
2233 
2234 	/* An MLD packet must at least be 24 octets to be valid */
2235 	if (mldlen < MLD_MINLEN) {
2236 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
2237 		freemsg(mp);
2238 		return;
2239 	}
2240 
2241 	mldh = (mld_hdr_t *)(&mp->b_rptr[hdr_length]);
2242 
2243 	switch (mldh->mld_type) {
2244 	case MLD_LISTENER_QUERY:
2245 		/*
2246 		 * packet length differentiates between v1 and v2.  v1
2247 		 * query should be exactly 24 octets long; v2 is >= 28.
2248 		 */
2249 		if ((mldlen == MLD_MINLEN) ||
2250 		    (ipst->ips_mld_max_version < MLD_V2_ROUTER)) {
2251 			next = mld_query_in(mldh, ill);
2252 		} else if (mldlen >= MLD_V2_QUERY_MINLEN) {
2253 			next = mldv2_query_in((mld2q_t *)mldh, ill, mldlen);
2254 		} else {
2255 			BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
2256 			freemsg(mp);
2257 			return;
2258 		}
2259 		if (next == 0) {
2260 			freemsg(mp);
2261 			return;
2262 		}
2263 
2264 		if (next != INFINITY)
2265 			mld_start_timers(next, ipst);
2266 		break;
2267 
2268 	case MLD_LISTENER_REPORT: {
2269 
2270 		ASSERT(ill->ill_ipif != NULL);
2271 		/*
2272 		 * For fast leave to work, we have to know that we are the
2273 		 * last person to send a report for this group.  Reports
2274 		 * generated by us are looped back since we could potentially
2275 		 * be a multicast router, so discard reports sourced by me.
2276 		 */
2277 		lcladdr_ptr = &(ill->ill_ipif->ipif_v6subnet);
2278 		mutex_enter(&ill->ill_lock);
2279 		for (ipif = ill->ill_ipif; ipif != NULL;
2280 		    ipif = ipif->ipif_next) {
2281 			if (IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr,
2282 			    lcladdr_ptr)) {
2283 				if (ip_debug > 1) {
2284 					char    buf1[INET6_ADDRSTRLEN];
2285 					char	buf2[INET6_ADDRSTRLEN];
2286 
2287 					(void) mi_strlog(ill->ill_rq,
2288 					    1,
2289 					    SL_TRACE,
2290 					    "mld_input: we are only "
2291 					    "member src %s ipif_local %s",
2292 					    inet_ntop(AF_INET6, lcladdr_ptr,
2293 					    buf1, sizeof (buf1)),
2294 					    inet_ntop(AF_INET6,
2295 					    &ipif->ipif_v6lcl_addr,
2296 					    buf2, sizeof (buf2)));
2297 				}
2298 				mutex_exit(&ill->ill_lock);
2299 				freemsg(mp);
2300 				return;
2301 			}
2302 		}
2303 		mutex_exit(&ill->ill_lock);
2304 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembResponses);
2305 
2306 		v6group_ptr = &mldh->mld_addr;
2307 		if (!IN6_IS_ADDR_MULTICAST(v6group_ptr)) {
2308 			BUMP_MIB(ill->ill_icmp6_mib,
2309 			    ipv6IfIcmpInGroupMembBadReports);
2310 			freemsg(mp);
2311 			return;
2312 		}
2313 
2314 		/*
2315 		 * If we belong to the group being reported, and we are a
2316 		 * 'Delaying member' per the RFC terminology, stop our timer
2317 		 * for that group and 'clear flag' i.e. mark ilm_state as
2318 		 * IGMP_OTHERMEMBER. With zones, there can be multiple group
2319 		 * membership entries for the same group address (one per zone)
2320 		 * so we need to walk the ill_ilm list.
2321 		 */
2322 		ilm = ilm_walker_start(&ilw, ill);
2323 		for (; ilm != NULL; ilm = ilm_walker_step(&ilw, ilm)) {
2324 			if (!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group_ptr))
2325 				continue;
2326 			BUMP_MIB(ill->ill_icmp6_mib,
2327 			    ipv6IfIcmpInGroupMembOurReports);
2328 
2329 			ilm->ilm_timer = INFINITY;
2330 			ilm->ilm_state = IGMP_OTHERMEMBER;
2331 		}
2332 		ilm_walker_finish(&ilw);
2333 		break;
2334 	}
2335 	case MLD_LISTENER_REDUCTION:
2336 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembReductions);
2337 		break;
2338 	}
2339 	/*
2340 	 * All MLD packets have already been passed up to any
2341 	 * process(es) listening on a ICMP6 raw socket. This
2342 	 * has been accomplished in ip_deliver_local_v6 prior to
2343 	 * this function call. It is assumed that the multicast daemon
2344 	 * will have a SOCK_RAW IPPROTO_ICMPV6 (and presumbly use the
2345 	 * ICMP6_FILTER socket option to only receive the MLD messages)
2346 	 * Thus we can free the MLD message block here
2347 	 */
2348 	freemsg(mp);
2349 }
2350 
2351 /*
2352  * Handles an MLDv1 Listener Query.  Returns 0 on error, or the appropriate
2353  * (non-zero, unsigned) timer value to be set on success.
2354  */
2355 static uint_t
2356 mld_query_in(mld_hdr_t *mldh, ill_t *ill)
2357 {
2358 	ilm_t	*ilm;
2359 	int	timer;
2360 	uint_t	next, current;
2361 	in6_addr_t *v6group;
2362 	ilm_walker_t ilw;
2363 
2364 	BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembQueries);
2365 
2366 	/*
2367 	 * In the MLD specification, there are 3 states and a flag.
2368 	 *
2369 	 * In Non-Listener state, we simply don't have a membership record.
2370 	 * In Delaying state, our timer is running (ilm->ilm_timer < INFINITY)
2371 	 * In Idle Member state, our timer is not running (ilm->ilm_timer ==
2372 	 * INFINITY)
2373 	 *
2374 	 * The flag is ilm->ilm_state, it is set to IGMP_OTHERMEMBER if
2375 	 * we have heard a report from another member, or IGMP_IREPORTEDLAST
2376 	 * if I sent the last report.
2377 	 */
2378 	v6group = &mldh->mld_addr;
2379 	if (!(IN6_IS_ADDR_UNSPECIFIED(v6group)) &&
2380 	    ((!IN6_IS_ADDR_MULTICAST(v6group)))) {
2381 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembBadQueries);
2382 		return (0);
2383 	}
2384 
2385 	/* Need to do compatibility mode checking */
2386 	mutex_enter(&ill->ill_lock);
2387 	ill->ill_mcast_v1_time = 0;
2388 	ill->ill_mcast_v1_tset = 1;
2389 	if (ill->ill_mcast_type == MLD_V2_ROUTER) {
2390 		ip1dbg(("Received MLDv1 Query on %s, switching mode to "
2391 		    "MLD_V1_ROUTER\n", ill->ill_name));
2392 		atomic_add_16(&ill->ill_ifptr->illif_mcast_v1, 1);
2393 		ill->ill_mcast_type = MLD_V1_ROUTER;
2394 	}
2395 	mutex_exit(&ill->ill_lock);
2396 
2397 	timer = (int)ntohs(mldh->mld_maxdelay);
2398 	if (ip_debug > 1) {
2399 		(void) mi_strlog(ill->ill_rq, 1, SL_TRACE,
2400 		    "mld_input: TIMER = mld_maxdelay %d mld_type 0x%x",
2401 		    timer, (int)mldh->mld_type);
2402 	}
2403 
2404 	/*
2405 	 * -Start the timers in all of our membership records for
2406 	 * the physical interface on which the query arrived,
2407 	 * excl:
2408 	 *	1.  those that belong to the "all hosts" group,
2409 	 *	2.  those with 0 scope, or 1 node-local scope.
2410 	 *
2411 	 * -Restart any timer that is already running but has a value
2412 	 * longer that the requested timeout.
2413 	 * -Use the value specified in the query message as the
2414 	 * maximum timeout.
2415 	 */
2416 	next = INFINITY;
2417 
2418 	ilm = ilm_walker_start(&ilw, ill);
2419 	mutex_enter(&ill->ill_lock);
2420 	current = CURRENT_MSTIME;
2421 
2422 	for (; ilm != NULL; ilm = ilm_walker_step(&ilw, ilm)) {
2423 		ASSERT(!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr));
2424 
2425 		if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr) ||
2426 		    IN6_IS_ADDR_MC_NODELOCAL(&ilm->ilm_v6addr) ||
2427 		    IN6_IS_ADDR_MC_RESERVED(&ilm->ilm_v6addr))
2428 			continue;
2429 		if ((!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr,
2430 		    &ipv6_all_hosts_mcast)) &&
2431 		    (IN6_IS_ADDR_UNSPECIFIED(v6group)) ||
2432 		    (IN6_ARE_ADDR_EQUAL(v6group, &ilm->ilm_v6addr))) {
2433 			if (timer == 0) {
2434 				/* Respond immediately */
2435 				ilm->ilm_timer = INFINITY;
2436 				ilm->ilm_state = IGMP_IREPORTEDLAST;
2437 				mutex_exit(&ill->ill_lock);
2438 				mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL);
2439 				mutex_enter(&ill->ill_lock);
2440 				break;
2441 			}
2442 			if (ilm->ilm_timer > timer) {
2443 				MCAST_RANDOM_DELAY(ilm->ilm_timer, timer);
2444 				if (ilm->ilm_timer < next)
2445 					next = ilm->ilm_timer;
2446 				ilm->ilm_timer += current;
2447 			}
2448 			break;
2449 		}
2450 	}
2451 	mutex_exit(&ill->ill_lock);
2452 	ilm_walker_finish(&ilw);
2453 
2454 	return (next);
2455 }
2456 
2457 /*
2458  * Handles an MLDv2 Listener Query.  On error, returns 0; on success,
2459  * returns the appropriate (non-zero, unsigned) timer value (which may
2460  * be INFINITY) to be set.
2461  */
2462 static uint_t
2463 mldv2_query_in(mld2q_t *mld2q, ill_t *ill, int mldlen)
2464 {
2465 	ilm_t	*ilm;
2466 	in6_addr_t *v6group, *src_array;
2467 	uint_t	next, numsrc, i, mrd, delay, qqi, current;
2468 	uint8_t	qrv;
2469 	ilm_walker_t ilw;
2470 
2471 	v6group = &mld2q->mld2q_addr;
2472 	numsrc = ntohs(mld2q->mld2q_numsrc);
2473 
2474 	/* make sure numsrc matches packet size */
2475 	if (mldlen < MLD_V2_QUERY_MINLEN + (numsrc * sizeof (in6_addr_t))) {
2476 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
2477 		return (0);
2478 	}
2479 	src_array = (in6_addr_t *)&mld2q[1];
2480 
2481 	BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembQueries);
2482 
2483 	/* extract Maximum Response Delay from code in header */
2484 	mrd = ntohs(mld2q->mld2q_mxrc);
2485 	if (mrd >= MLD_V2_MAXRT_FPMIN) {
2486 		uint_t hdrval, mant, exp;
2487 		hdrval = mrd;
2488 		mant = hdrval & MLD_V2_MAXRT_MANT_MASK;
2489 		exp = (hdrval & MLD_V2_MAXRT_EXP_MASK) >> 12;
2490 		mrd = (mant | 0x1000) << (exp + 3);
2491 	}
2492 	if (mrd == 0)
2493 		mrd = DSEC_TO_MSEC(MCAST_DEF_QUERY_RESP_INTERVAL);
2494 
2495 	MCAST_RANDOM_DELAY(delay, mrd);
2496 	next = (unsigned)INFINITY;
2497 	current = CURRENT_MSTIME;
2498 
2499 	if ((qrv = mld2q->mld2q_sqrv & MLD_V2_RV_MASK) == 0)
2500 		ill->ill_mcast_rv = MCAST_DEF_ROBUSTNESS;
2501 	else
2502 		ill->ill_mcast_rv = qrv;
2503 
2504 	if ((qqi = (uint_t)mld2q->mld2q_qqic) >= MLD_V2_QQI_FPMIN) {
2505 		uint_t mant, exp;
2506 		mant = qqi & MLD_V2_QQI_MANT_MASK;
2507 		exp = (qqi & MLD_V2_QQI_EXP_MASK) >> 12;
2508 		qqi = (mant | 0x10) << (exp + 3);
2509 	}
2510 	ill->ill_mcast_qi = (qqi == 0) ? MCAST_DEF_QUERY_INTERVAL : qqi;
2511 
2512 	/*
2513 	 * If we have a pending general query response that's scheduled
2514 	 * sooner than the delay we calculated for this response, then
2515 	 * no action is required (MLDv2 draft section 6.2 rule 1)
2516 	 */
2517 	mutex_enter(&ill->ill_lock);
2518 	if (ill->ill_global_timer < (current + delay)) {
2519 		mutex_exit(&ill->ill_lock);
2520 		return (next);
2521 	}
2522 	mutex_exit(&ill->ill_lock);
2523 
2524 	/*
2525 	 * Now take action depending on query type: general,
2526 	 * group specific, or group/source specific.
2527 	 */
2528 	if ((numsrc == 0) && IN6_IS_ADDR_UNSPECIFIED(v6group)) {
2529 		/*
2530 		 * general query
2531 		 * We know global timer is either not running or is
2532 		 * greater than our calculated delay, so reset it to
2533 		 * our delay (random value in range [0, response time])
2534 		 */
2535 		mutex_enter(&ill->ill_lock);
2536 		ill->ill_global_timer = current + delay;
2537 		mutex_exit(&ill->ill_lock);
2538 		next = delay;
2539 
2540 	} else {
2541 		/* group or group/source specific query */
2542 		ilm = ilm_walker_start(&ilw, ill);
2543 		mutex_enter(&ill->ill_lock);
2544 		for (; ilm != NULL; ilm = ilm_walker_step(&ilw, ilm)) {
2545 			if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr) ||
2546 			    IN6_IS_ADDR_MC_NODELOCAL(&ilm->ilm_v6addr) ||
2547 			    IN6_IS_ADDR_MC_RESERVED(&ilm->ilm_v6addr) ||
2548 			    !IN6_ARE_ADDR_EQUAL(v6group, &ilm->ilm_v6addr))
2549 				continue;
2550 
2551 			/*
2552 			 * If the query is group specific or we have a
2553 			 * pending group specific query, the response is
2554 			 * group specific (pending sources list should be
2555 			 * empty).  Otherwise, need to update the pending
2556 			 * sources list for the group and source specific
2557 			 * response.
2558 			 */
2559 			if (numsrc == 0 || (ilm->ilm_timer < INFINITY &&
2560 			    SLIST_IS_EMPTY(ilm->ilm_pendsrcs))) {
2561 group_query:
2562 				FREE_SLIST(ilm->ilm_pendsrcs);
2563 				ilm->ilm_pendsrcs = NULL;
2564 			} else {
2565 				boolean_t overflow;
2566 				slist_t *pktl;
2567 				if (numsrc > MAX_FILTER_SIZE ||
2568 				    (ilm->ilm_pendsrcs == NULL &&
2569 				    (ilm->ilm_pendsrcs = l_alloc()) == NULL)) {
2570 					/*
2571 					 * We've been sent more sources than
2572 					 * we can deal with; or we can't deal
2573 					 * with a source list at all. Revert
2574 					 * to a group specific query.
2575 					 */
2576 					goto group_query;
2577 				}
2578 				if ((pktl = l_alloc()) == NULL)
2579 					goto group_query;
2580 				pktl->sl_numsrc = numsrc;
2581 				for (i = 0; i < numsrc; i++)
2582 					pktl->sl_addr[i] = src_array[i];
2583 				l_union_in_a(ilm->ilm_pendsrcs, pktl,
2584 				    &overflow);
2585 				l_free(pktl);
2586 				if (overflow)
2587 					goto group_query;
2588 			}
2589 			ilm->ilm_timer = (ilm->ilm_timer == INFINITY) ?
2590 			    INFINITY : (ilm->ilm_timer - current);
2591 			/* set timer to soonest value */
2592 			ilm->ilm_timer = MIN(ilm->ilm_timer, delay);
2593 			if (ilm->ilm_timer < next)
2594 				next = ilm->ilm_timer;
2595 			ilm->ilm_timer += current;
2596 			break;
2597 		}
2598 		mutex_exit(&ill->ill_lock);
2599 		ilm_walker_finish(&ilw);
2600 	}
2601 
2602 	return (next);
2603 }
2604 
2605 /*
2606  * Send MLDv1 response packet with hoplimit 1
2607  */
2608 static void
2609 mld_sendpkt(ilm_t *ilm, uchar_t type, const in6_addr_t *v6addr)
2610 {
2611 	mblk_t		*mp;
2612 	mld_hdr_t	*mldh;
2613 	ip6_t 		*ip6h;
2614 	ip6_hbh_t	*ip6hbh;
2615 	struct ip6_opt_router	*ip6router;
2616 	size_t		size = IPV6_HDR_LEN + sizeof (mld_hdr_t);
2617 	ill_t		*ill = ilm->ilm_ill;
2618 	ipif_t		*ipif;
2619 
2620 	/*
2621 	 * We need to place a router alert option in this packet.  The length
2622 	 * of the options must be a multiple of 8.  The hbh option header is 2
2623 	 * bytes followed by the 4 byte router alert option.  That leaves
2624 	 * 2 bytes of pad for a total of 8 bytes.
2625 	 */
2626 	const int	router_alert_length = 8;
2627 
2628 	ASSERT(ill->ill_isv6);
2629 
2630 	size += router_alert_length;
2631 	mp = allocb(size, BPRI_HI);
2632 	if (mp == NULL)
2633 		return;
2634 	bzero(mp->b_rptr, size);
2635 	mp->b_wptr = mp->b_rptr + size;
2636 
2637 	ip6h = (ip6_t *)mp->b_rptr;
2638 	ip6hbh = (struct ip6_hbh *)&ip6h[1];
2639 	ip6router = (struct ip6_opt_router *)&ip6hbh[1];
2640 	/*
2641 	 * A zero is a pad option of length 1.  The bzero of the whole packet
2642 	 * above will pad between ip6router and mld.
2643 	 */
2644 	mldh = (mld_hdr_t *)((uint8_t *)ip6hbh + router_alert_length);
2645 
2646 	mldh->mld_type = type;
2647 	mldh->mld_addr = ilm->ilm_v6addr;
2648 
2649 	ip6router->ip6or_type = IP6OPT_ROUTER_ALERT;
2650 	ip6router->ip6or_len = 2;
2651 	ip6router->ip6or_value[0] = 0;
2652 	ip6router->ip6or_value[1] = IP6_ALERT_MLD;
2653 
2654 	ip6hbh->ip6h_nxt = IPPROTO_ICMPV6;
2655 	ip6hbh->ip6h_len = 0;
2656 
2657 	ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
2658 	ip6h->ip6_plen = htons(sizeof (*mldh) + router_alert_length);
2659 	ip6h->ip6_nxt = IPPROTO_HOPOPTS;
2660 	ip6h->ip6_hops = MLD_HOP_LIMIT;
2661 	if (v6addr == NULL)
2662 		ip6h->ip6_dst =  ilm->ilm_v6addr;
2663 	else
2664 		ip6h->ip6_dst = *v6addr;
2665 
2666 	/* ipif returned by ipif_lookup_zoneid is link-local (if present) */
2667 	if (ipif_lookup_zoneid(ill, ilm->ilm_zoneid, IPIF_UP, &ipif)) {
2668 		ip6h->ip6_src = ipif->ipif_v6src_addr;
2669 		ipif_refrele(ipif);
2670 	} else {
2671 		/* Otherwise, use IPv6 default address selection. */
2672 		ip6h->ip6_src = ipv6_all_zeros;
2673 	}
2674 
2675 	/*
2676 	 * Prepare for checksum by putting icmp length in the icmp
2677 	 * checksum field. The checksum is calculated in ip_wput_v6.
2678 	 */
2679 	mldh->mld_cksum = htons(sizeof (*mldh));
2680 
2681 	/*
2682 	 * ip_wput will automatically loopback the multicast packet to
2683 	 * the conn if multicast loopback is enabled.
2684 	 * The MIB stats corresponding to this outgoing MLD packet
2685 	 * will be accounted for in ip_wput->ip_wput_v6->ip_wput_ire_v6
2686 	 * ->icmp_update_out_mib_v6 function call.
2687 	 */
2688 	(void) ip_output_v6(NULL, mp, ill->ill_wq, IP_WPUT);
2689 }
2690 
2691 /*
2692  * Sends an MLD_V2_LISTENER_REPORT message out the passed-in ill.  The
2693  * report will contain one multicast address record for each element of
2694  * reclist.  If this causes packet length to exceed ill->ill_max_frag,
2695  * multiple reports are sent.  reclist is assumed to be made up of
2696  * buffers allocated by mcast_bldmrec(), and those buffers are freed here.
2697  */
2698 static void
2699 mldv2_sendrpt(ill_t *ill, mrec_t *reclist)
2700 {
2701 	mblk_t		*mp;
2702 	mld2r_t		*mld2r;
2703 	mld2mar_t	*mld2mar;
2704 	in6_addr_t	*srcarray;
2705 	ip6_t		*ip6h;
2706 	ip6_hbh_t	*ip6hbh;
2707 	struct ip6_opt_router	*ip6router;
2708 	size_t		size, optlen, padlen, icmpsize, rsize;
2709 	ipif_t		*ipif;
2710 	int		i, numrec, more_src_cnt;
2711 	mrec_t		*rp, *cur_reclist;
2712 	mrec_t		*next_reclist = reclist;
2713 	boolean_t	morepkts;
2714 
2715 	ASSERT(IAM_WRITER_ILL(ill));
2716 
2717 	/* If there aren't any records, there's nothing to send */
2718 	if (reclist == NULL)
2719 		return;
2720 
2721 	ASSERT(ill->ill_isv6);
2722 
2723 	/*
2724 	 * Total option length (optlen + padlen) must be a multiple of
2725 	 * 8 bytes.  We assume here that optlen <= 8, so the total option
2726 	 * length will be 8.  Assert this in case anything ever changes.
2727 	 */
2728 	optlen = sizeof (ip6_hbh_t) + sizeof (struct ip6_opt_router);
2729 	ASSERT(optlen <= 8);
2730 	padlen = 8 - optlen;
2731 nextpkt:
2732 	icmpsize = sizeof (mld2r_t);
2733 	size = IPV6_HDR_LEN + optlen + padlen + icmpsize;
2734 	morepkts = B_FALSE;
2735 	more_src_cnt = 0;
2736 	for (rp = cur_reclist = next_reclist, numrec = 0; rp != NULL;
2737 	    rp = rp->mrec_next, numrec++) {
2738 		rsize = sizeof (mld2mar_t) +
2739 		    (rp->mrec_srcs.sl_numsrc * sizeof (in6_addr_t));
2740 		if (size + rsize > ill->ill_max_frag) {
2741 			if (rp == cur_reclist) {
2742 				/*
2743 				 * If the first mrec we looked at is too big
2744 				 * to fit in a single packet (i.e the source
2745 				 * list is too big), we must either truncate
2746 				 * the list (if TO_EX or IS_EX), or send
2747 				 * multiple reports for the same group (all
2748 				 * other types).
2749 				 */
2750 				int srcspace, srcsperpkt;
2751 				srcspace = ill->ill_max_frag -
2752 				    (size + sizeof (mld2mar_t));
2753 
2754 				/*
2755 				 * Skip if there's not even enough room in
2756 				 * a single packet to send something useful.
2757 				 */
2758 				if (srcspace <= sizeof (in6_addr_t))
2759 					continue;
2760 
2761 				srcsperpkt = srcspace / sizeof (in6_addr_t);
2762 				/*
2763 				 * Increment icmpsize and size, because we will
2764 				 * be sending a record for the mrec we're
2765 				 * looking at now.
2766 				 */
2767 				rsize = sizeof (mld2mar_t) +
2768 				    (srcsperpkt * sizeof (in6_addr_t));
2769 				icmpsize += rsize;
2770 				size += rsize;
2771 				if (rp->mrec_type == MODE_IS_EXCLUDE ||
2772 				    rp->mrec_type == CHANGE_TO_EXCLUDE) {
2773 					rp->mrec_srcs.sl_numsrc = srcsperpkt;
2774 					if (rp->mrec_next == NULL) {
2775 						/* no more packets to send */
2776 						break;
2777 					} else {
2778 						/*
2779 						 * more packets, but we're
2780 						 * done with this mrec.
2781 						 */
2782 						next_reclist = rp->mrec_next;
2783 					}
2784 				} else {
2785 					more_src_cnt = rp->mrec_srcs.sl_numsrc
2786 					    - srcsperpkt;
2787 					rp->mrec_srcs.sl_numsrc = srcsperpkt;
2788 					/*
2789 					 * We'll fix up this mrec (remove the
2790 					 * srcs we've already sent) before
2791 					 * returning to nextpkt above.
2792 					 */
2793 					next_reclist = rp;
2794 				}
2795 			} else {
2796 				next_reclist = rp;
2797 			}
2798 			morepkts = B_TRUE;
2799 			break;
2800 		}
2801 		icmpsize += rsize;
2802 		size += rsize;
2803 	}
2804 
2805 	mp = allocb(size, BPRI_HI);
2806 	if (mp == NULL)
2807 		goto free_reclist;
2808 	bzero(mp->b_rptr, size);
2809 	mp->b_wptr = mp->b_rptr + size;
2810 
2811 	ip6h = (ip6_t *)mp->b_rptr;
2812 	ip6hbh = (ip6_hbh_t *)&(ip6h[1]);
2813 	ip6router = (struct ip6_opt_router *)&(ip6hbh[1]);
2814 	mld2r = (mld2r_t *)((uint8_t *)ip6hbh + optlen + padlen);
2815 	mld2mar = (mld2mar_t *)&(mld2r[1]);
2816 
2817 	ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
2818 	ip6h->ip6_plen = htons(optlen + padlen + icmpsize);
2819 	ip6h->ip6_nxt = IPPROTO_HOPOPTS;
2820 	ip6h->ip6_hops = MLD_HOP_LIMIT;
2821 	ip6h->ip6_dst = ipv6_all_v2rtrs_mcast;
2822 	/* ipif returned by ipif_lookup_zoneid is link-local (if present) */
2823 	if (ipif_lookup_zoneid(ill, ALL_ZONES, IPIF_UP, &ipif)) {
2824 		ip6h->ip6_src = ipif->ipif_v6src_addr;
2825 		ipif_refrele(ipif);
2826 	} else {
2827 		/* otherwise, use IPv6 default address selection. */
2828 		ip6h->ip6_src = ipv6_all_zeros;
2829 	}
2830 
2831 	ip6hbh->ip6h_nxt = IPPROTO_ICMPV6;
2832 	/*
2833 	 * ip6h_len is the number of 8-byte words, not including the first
2834 	 * 8 bytes; we've assumed optlen + padlen == 8 bytes; hence len = 0.
2835 	 */
2836 	ip6hbh->ip6h_len = 0;
2837 
2838 	ip6router->ip6or_type = IP6OPT_ROUTER_ALERT;
2839 	ip6router->ip6or_len = 2;
2840 	ip6router->ip6or_value[0] = 0;
2841 	ip6router->ip6or_value[1] = IP6_ALERT_MLD;
2842 
2843 	mld2r->mld2r_type = MLD_V2_LISTENER_REPORT;
2844 	mld2r->mld2r_nummar = htons(numrec);
2845 	/*
2846 	 * Prepare for the checksum by putting icmp length in the icmp
2847 	 * checksum field. The checksum is calculated in ip_wput_v6.
2848 	 */
2849 	mld2r->mld2r_cksum = htons(icmpsize);
2850 
2851 	for (rp = cur_reclist; rp != NULL; rp = rp->mrec_next) {
2852 		mld2mar->mld2mar_type = rp->mrec_type;
2853 		mld2mar->mld2mar_auxlen = 0;
2854 		mld2mar->mld2mar_numsrc = htons(rp->mrec_srcs.sl_numsrc);
2855 		mld2mar->mld2mar_group = rp->mrec_group;
2856 		srcarray = (in6_addr_t *)&(mld2mar[1]);
2857 
2858 		for (i = 0; i < rp->mrec_srcs.sl_numsrc; i++)
2859 			srcarray[i] = rp->mrec_srcs.sl_addr[i];
2860 
2861 		mld2mar = (mld2mar_t *)&(srcarray[i]);
2862 	}
2863 
2864 	/*
2865 	 * ip_wput will automatically loopback the multicast packet to
2866 	 * the conn if multicast loopback is enabled.
2867 	 * The MIB stats corresponding to this outgoing MLD packet
2868 	 * will be accounted for in ip_wput->ip_wput_v6->ip_wput_ire_v6
2869 	 * ->icmp_update_out_mib_v6 function call.
2870 	 */
2871 	(void) ip_output_v6(NULL, mp, ill->ill_wq, IP_WPUT);
2872 
2873 	if (morepkts) {
2874 		if (more_src_cnt > 0) {
2875 			int index, mvsize;
2876 			slist_t *sl = &next_reclist->mrec_srcs;
2877 			index = sl->sl_numsrc;
2878 			mvsize = more_src_cnt * sizeof (in6_addr_t);
2879 			(void) memmove(&sl->sl_addr[0], &sl->sl_addr[index],
2880 			    mvsize);
2881 			sl->sl_numsrc = more_src_cnt;
2882 		}
2883 		goto nextpkt;
2884 	}
2885 
2886 free_reclist:
2887 	while (reclist != NULL) {
2888 		rp = reclist->mrec_next;
2889 		mi_free(reclist);
2890 		reclist = rp;
2891 	}
2892 }
2893 
2894 static mrec_t *
2895 mcast_bldmrec(mcast_record_t type, in6_addr_t *grp, slist_t *srclist,
2896     mrec_t *next)
2897 {
2898 	mrec_t *rp;
2899 	int i;
2900 
2901 	if ((type == ALLOW_NEW_SOURCES || type == BLOCK_OLD_SOURCES) &&
2902 	    SLIST_IS_EMPTY(srclist))
2903 		return (next);
2904 
2905 	rp = (mrec_t *)mi_alloc(sizeof (mrec_t), BPRI_HI);
2906 	if (rp == NULL)
2907 		return (next);
2908 
2909 	rp->mrec_next = next;
2910 	rp->mrec_type = type;
2911 	rp->mrec_auxlen = 0;
2912 	rp->mrec_group = *grp;
2913 	if (srclist == NULL) {
2914 		rp->mrec_srcs.sl_numsrc = 0;
2915 	} else {
2916 		rp->mrec_srcs.sl_numsrc = srclist->sl_numsrc;
2917 		for (i = 0; i < srclist->sl_numsrc; i++)
2918 			rp->mrec_srcs.sl_addr[i] = srclist->sl_addr[i];
2919 	}
2920 
2921 	return (rp);
2922 }
2923 
2924 /*
2925  * Set up initial retransmit state.  If memory cannot be allocated for
2926  * the source lists, simply create as much state as is possible; memory
2927  * allocation failures are considered one type of transient error that
2928  * the retransmissions are designed to overcome (and if they aren't
2929  * transient, there are bigger problems than failing to notify the
2930  * router about multicast group membership state changes).
2931  */
2932 static void
2933 mcast_init_rtx(ill_t *ill, rtx_state_t *rtxp, mcast_record_t rtype,
2934     slist_t *flist)
2935 {
2936 	/*
2937 	 * There are only three possibilities for rtype:
2938 	 *	New join, transition from INCLUDE {} to INCLUDE {flist}
2939 	 *	  => rtype is ALLOW_NEW_SOURCES
2940 	 *	New join, transition from INCLUDE {} to EXCLUDE {flist}
2941 	 *	  => rtype is CHANGE_TO_EXCLUDE
2942 	 *	State change that involves a filter mode change
2943 	 *	  => rtype is either CHANGE_TO_INCLUDE or CHANGE_TO_EXCLUDE
2944 	 */
2945 	ASSERT(rtype == CHANGE_TO_EXCLUDE || rtype == CHANGE_TO_INCLUDE ||
2946 	    rtype == ALLOW_NEW_SOURCES);
2947 
2948 	rtxp->rtx_cnt = ill->ill_mcast_rv;
2949 
2950 	switch (rtype) {
2951 	case CHANGE_TO_EXCLUDE:
2952 		rtxp->rtx_fmode_cnt = ill->ill_mcast_rv;
2953 		CLEAR_SLIST(rtxp->rtx_allow);
2954 		COPY_SLIST(flist, rtxp->rtx_block);
2955 		break;
2956 	case ALLOW_NEW_SOURCES:
2957 	case CHANGE_TO_INCLUDE:
2958 		rtxp->rtx_fmode_cnt =
2959 		    rtype == ALLOW_NEW_SOURCES ? 0 : ill->ill_mcast_rv;
2960 		CLEAR_SLIST(rtxp->rtx_block);
2961 		COPY_SLIST(flist, rtxp->rtx_allow);
2962 		break;
2963 	}
2964 }
2965 
2966 /*
2967  * The basic strategy here, as extrapolated from RFC 3810 section 6.1 and
2968  * RFC 3376 section 5.1, covers three cases:
2969  *	* The current state change is a filter mode change
2970  *		Set filter mode retransmit counter; set retransmit allow or
2971  *		block list to new source list as appropriate, and clear the
2972  *		retransmit list that was not set; send TO_IN or TO_EX with
2973  *		new source list.
2974  *	* The current state change is a source list change, but the filter
2975  *	  mode retransmit counter is > 0
2976  *		Decrement filter mode retransmit counter; set retransmit
2977  *		allow or block list to  new source list as appropriate,
2978  *		and clear the retransmit list that was not set; send TO_IN
2979  *		or TO_EX with new source list.
2980  *	* The current state change is a source list change, and the filter
2981  *	  mode retransmit counter is 0.
2982  *		Merge existing rtx allow and block lists with new state:
2983  *		  rtx_allow = (new allow + rtx_allow) - new block
2984  *		  rtx_block = (new block + rtx_block) - new allow
2985  *		Send ALLOW and BLOCK records for new retransmit lists;
2986  *		decrement retransmit counter.
2987  *
2988  * As is the case for mcast_init_rtx(), memory allocation failures are
2989  * acceptable; we just create as much state as we can.
2990  */
2991 static mrec_t *
2992 mcast_merge_rtx(ilm_t *ilm, mrec_t *mreclist, slist_t *flist)
2993 {
2994 	ill_t *ill;
2995 	rtx_state_t *rtxp = &ilm->ilm_rtx;
2996 	mcast_record_t txtype;
2997 	mrec_t *rp, *rpnext, *rtnmrec;
2998 	boolean_t ovf;
2999 
3000 	ill = (ilm->ilm_ill == NULL ? ilm->ilm_ipif->ipif_ill : ilm->ilm_ill);
3001 
3002 	if (mreclist == NULL)
3003 		return (mreclist);
3004 
3005 	/*
3006 	 * A filter mode change is indicated by a single mrec, which is
3007 	 * either TO_IN or TO_EX.  In this case, we just need to set new
3008 	 * retransmit state as if this were an initial join.  There is
3009 	 * no change to the mrec list.
3010 	 */
3011 	if (mreclist->mrec_type == CHANGE_TO_INCLUDE ||
3012 	    mreclist->mrec_type == CHANGE_TO_EXCLUDE) {
3013 		mcast_init_rtx(ill, rtxp, mreclist->mrec_type,
3014 		    &mreclist->mrec_srcs);
3015 		return (mreclist);
3016 	}
3017 
3018 	/*
3019 	 * Only the source list has changed
3020 	 */
3021 	rtxp->rtx_cnt = ill->ill_mcast_rv;
3022 	if (rtxp->rtx_fmode_cnt > 0) {
3023 		/* but we're still sending filter mode change reports */
3024 		rtxp->rtx_fmode_cnt--;
3025 		if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
3026 			CLEAR_SLIST(rtxp->rtx_block);
3027 			COPY_SLIST(flist, rtxp->rtx_allow);
3028 			txtype = CHANGE_TO_INCLUDE;
3029 		} else {
3030 			CLEAR_SLIST(rtxp->rtx_allow);
3031 			COPY_SLIST(flist, rtxp->rtx_block);
3032 			txtype = CHANGE_TO_EXCLUDE;
3033 		}
3034 		/* overwrite first mrec with new info */
3035 		mreclist->mrec_type = txtype;
3036 		l_copy(flist, &mreclist->mrec_srcs);
3037 		/* then free any remaining mrecs */
3038 		for (rp = mreclist->mrec_next; rp != NULL; rp = rpnext) {
3039 			rpnext = rp->mrec_next;
3040 			mi_free(rp);
3041 		}
3042 		mreclist->mrec_next = NULL;
3043 		rtnmrec = mreclist;
3044 	} else {
3045 		mrec_t *allow_mrec, *block_mrec;
3046 		/*
3047 		 * Just send the source change reports; but we need to
3048 		 * recalculate the ALLOW and BLOCK lists based on previous
3049 		 * state and new changes.
3050 		 */
3051 		rtnmrec = mreclist;
3052 		allow_mrec = block_mrec = NULL;
3053 		for (rp = mreclist; rp != NULL; rp = rp->mrec_next) {
3054 			ASSERT(rp->mrec_type == ALLOW_NEW_SOURCES ||
3055 			    rp->mrec_type == BLOCK_OLD_SOURCES);
3056 			if (rp->mrec_type == ALLOW_NEW_SOURCES)
3057 				allow_mrec = rp;
3058 			else
3059 				block_mrec = rp;
3060 		}
3061 		/*
3062 		 * Perform calculations:
3063 		 *   new_allow = mrec_allow + (rtx_allow - mrec_block)
3064 		 *   new_block = mrec_block + (rtx_block - mrec_allow)
3065 		 *
3066 		 * Each calc requires two steps, for example:
3067 		 *   rtx_allow = rtx_allow - mrec_block;
3068 		 *   new_allow = mrec_allow + rtx_allow;
3069 		 *
3070 		 * Store results in mrec lists, and then copy into rtx lists.
3071 		 * We do it in this order in case the rtx list hasn't been
3072 		 * alloc'd yet; if it hasn't and our alloc fails, that's okay,
3073 		 * Overflows are also okay.
3074 		 */
3075 		if (block_mrec != NULL) {
3076 			l_difference_in_a(rtxp->rtx_allow,
3077 			    &block_mrec->mrec_srcs);
3078 		}
3079 		if (allow_mrec != NULL) {
3080 			l_difference_in_a(rtxp->rtx_block,
3081 			    &allow_mrec->mrec_srcs);
3082 			l_union_in_a(&allow_mrec->mrec_srcs, rtxp->rtx_allow,
3083 			    &ovf);
3084 		}
3085 		if (block_mrec != NULL) {
3086 			l_union_in_a(&block_mrec->mrec_srcs, rtxp->rtx_block,
3087 			    &ovf);
3088 			COPY_SLIST(&block_mrec->mrec_srcs, rtxp->rtx_block);
3089 		} else {
3090 			rtnmrec = mcast_bldmrec(BLOCK_OLD_SOURCES,
3091 			    &ilm->ilm_v6addr, rtxp->rtx_block, allow_mrec);
3092 		}
3093 		if (allow_mrec != NULL) {
3094 			COPY_SLIST(&allow_mrec->mrec_srcs, rtxp->rtx_allow);
3095 		} else {
3096 			rtnmrec = mcast_bldmrec(ALLOW_NEW_SOURCES,
3097 			    &ilm->ilm_v6addr, rtxp->rtx_allow, block_mrec);
3098 		}
3099 	}
3100 
3101 	return (rtnmrec);
3102 }
3103 
3104 /*
3105  * Convenience routine to signal the restart-timer thread.
3106  */
3107 static void
3108 mcast_signal_restart_thread(ip_stack_t *ipst)
3109 {
3110 	mutex_enter(&ipst->ips_mrt_lock);
3111 	ipst->ips_mrt_flags |= IP_MRT_RUN;
3112 	cv_signal(&ipst->ips_mrt_cv);
3113 	mutex_exit(&ipst->ips_mrt_lock);
3114 }
3115 
3116 /*
3117  * Thread to restart IGMP/MLD timers.  See the comment in igmp_joingroup() for
3118  * the story behind this unfortunate thread.
3119  */
3120 void
3121 mcast_restart_timers_thread(ip_stack_t *ipst)
3122 {
3123 	int next;
3124 	char name[64];
3125 	callb_cpr_t cprinfo;
3126 
3127 	(void) snprintf(name, sizeof (name), "mcast_restart_timers_thread_%d",
3128 	    ipst->ips_netstack->netstack_stackid);
3129 	CALLB_CPR_INIT(&cprinfo, &ipst->ips_mrt_lock, callb_generic_cpr, name);
3130 
3131 	for (;;) {
3132 		mutex_enter(&ipst->ips_mrt_lock);
3133 		while (!(ipst->ips_mrt_flags & (IP_MRT_STOP|IP_MRT_RUN))) {
3134 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
3135 			cv_wait(&ipst->ips_mrt_cv, &ipst->ips_mrt_lock);
3136 			CALLB_CPR_SAFE_END(&cprinfo, &ipst->ips_mrt_lock);
3137 		}
3138 		if (ipst->ips_mrt_flags & IP_MRT_STOP)
3139 			break;
3140 		ipst->ips_mrt_flags &= ~IP_MRT_RUN;
3141 		mutex_exit(&ipst->ips_mrt_lock);
3142 
3143 		mutex_enter(&ipst->ips_igmp_timer_lock);
3144 		next = ipst->ips_igmp_deferred_next;
3145 		ipst->ips_igmp_deferred_next = INFINITY;
3146 		mutex_exit(&ipst->ips_igmp_timer_lock);
3147 
3148 		if (next != INFINITY)
3149 			igmp_start_timers(next, ipst);
3150 
3151 		mutex_enter(&ipst->ips_mld_timer_lock);
3152 		next = ipst->ips_mld_deferred_next;
3153 		ipst->ips_mld_deferred_next = INFINITY;
3154 		mutex_exit(&ipst->ips_mld_timer_lock);
3155 		if (next != INFINITY)
3156 			mld_start_timers(next, ipst);
3157 	}
3158 
3159 	ipst->ips_mrt_flags |= IP_MRT_DONE;
3160 	cv_signal(&ipst->ips_mrt_done_cv);
3161 	CALLB_CPR_EXIT(&cprinfo);	/* drops ips_mrt_lock */
3162 	thread_exit();
3163 }
3164