xref: /titanic_51/usr/src/uts/common/inet/ip/igmp.c (revision 16dd44c265271a75647fb0bb41109bb7c585a526)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /* Copyright (c) 1990 Mentat Inc. */
26 
27 /*
28  * Internet Group Management Protocol (IGMP) routines.
29  * Multicast Listener Discovery Protocol (MLD) routines.
30  *
31  * Written by Steve Deering, Stanford, May 1988.
32  * Modified by Rosen Sharma, Stanford, Aug 1994.
33  * Modified by Bill Fenner, Xerox PARC, Feb. 1995.
34  *
35  * MULTICAST 3.5.1.1
36  */
37 
38 #include <sys/types.h>
39 #include <sys/stream.h>
40 #include <sys/stropts.h>
41 #include <sys/strlog.h>
42 #include <sys/strsun.h>
43 #include <sys/systm.h>
44 #include <sys/ddi.h>
45 #include <sys/sunddi.h>
46 #include <sys/cmn_err.h>
47 #include <sys/atomic.h>
48 #include <sys/zone.h>
49 #include <sys/callb.h>
50 #include <sys/param.h>
51 #include <sys/socket.h>
52 #include <inet/ipclassifier.h>
53 #include <net/if.h>
54 #include <net/route.h>
55 #include <netinet/in.h>
56 #include <netinet/igmp_var.h>
57 #include <netinet/ip6.h>
58 #include <netinet/icmp6.h>
59 
60 #include <inet/common.h>
61 #include <inet/mi.h>
62 #include <inet/nd.h>
63 #include <inet/ip.h>
64 #include <inet/ip6.h>
65 #include <inet/ip_multi.h>
66 #include <inet/ip_listutils.h>
67 
68 #include <netinet/igmp.h>
69 #include <inet/ip_if.h>
70 #include <net/pfkeyv2.h>
71 #include <inet/ipsec_info.h>
72 
73 static uint_t	igmp_query_in(ipha_t *ipha, igmpa_t *igmpa, ill_t *ill);
74 static uint_t	igmpv3_query_in(igmp3qa_t *igmp3qa, ill_t *ill, int igmplen);
75 static uint_t	mld_query_in(mld_hdr_t *mldh, ill_t *ill);
76 static uint_t	mldv2_query_in(mld2q_t *mld2q, ill_t *ill, int mldlen);
77 static void	igmp_sendpkt(ilm_t *ilm, uchar_t type, ipaddr_t addr);
78 static void	mld_sendpkt(ilm_t *ilm, uchar_t type, const in6_addr_t *v6addr);
79 static void	igmpv3_sendrpt(ipif_t *ipif, mrec_t *reclist);
80 static void	mldv2_sendrpt(ill_t *ill, mrec_t *reclist);
81 static mrec_t	*mcast_bldmrec(mcast_record_t type, in6_addr_t *grp,
82 		    slist_t *srclist, mrec_t *next);
83 static void	mcast_init_rtx(ill_t *ill, rtx_state_t *rtxp,
84 		    mcast_record_t rtype, slist_t *flist);
85 static mrec_t	*mcast_merge_rtx(ilm_t *ilm, mrec_t *rp, slist_t *flist);
86 static void	mcast_signal_restart_thread(ip_stack_t *ipst);
87 
88 /*
89  * Macros used to do timer len conversions.  Timer values are always
90  * stored and passed to the timer functions as milliseconds; but the
91  * default values and values from the wire may not be.
92  *
93  * And yes, it's obscure, but decisecond is easier to abbreviate than
94  * "tenths of a second".
95  */
96 #define	DSEC_TO_MSEC(dsec)	((dsec) * 100)
97 #define	SEC_TO_MSEC(sec)	((sec) * 1000)
98 
99 /*
100  * A running timer (scheduled thru timeout) can be cancelled if another
101  * timer with a shorter timeout value is scheduled before it has timed
102  * out.  When the shorter timer expires, the original timer is updated
103  * to account for the time elapsed while the shorter timer ran; but this
104  * does not take into account the amount of time already spent in timeout
105  * state before being preempted by the shorter timer, that is the time
106  * interval between time scheduled to time cancelled.  This can cause
107  * delays in sending out multicast membership reports.  To resolve this
108  * problem, wallclock time (absolute time) is used instead of deltas
109  * (relative time) to track timers.
110  *
111  * The MACRO below gets the lbolt value, used for proper timer scheduling
112  * and firing. Therefore multicast membership reports are sent on time.
113  * The timer does not exactly fire at the time it was scehduled to fire,
114  * there is a difference of a few milliseconds observed. An offset is used
115  * to take care of the difference.
116  */
117 
118 #define	CURRENT_MSTIME	((uint_t)TICK_TO_MSEC(ddi_get_lbolt()))
119 #define	CURRENT_OFFSET	(999)
120 
121 /*
122  * The first multicast join will trigger the igmp timers / mld timers
123  * The unit for next is milliseconds.
124  */
125 static void
126 igmp_start_timers(unsigned next, ip_stack_t *ipst)
127 {
128 	int	time_left;
129 	int	ret;
130 
131 	ASSERT(next != 0 && next != INFINITY);
132 
133 	mutex_enter(&ipst->ips_igmp_timer_lock);
134 
135 	if (ipst->ips_igmp_timer_setter_active) {
136 		/*
137 		 * Serialize timer setters, one at a time. If the
138 		 * timer is currently being set by someone,
139 		 * just record the next time when it has to be
140 		 * invoked and return. The current setter will
141 		 * take care.
142 		 */
143 		ipst->ips_igmp_time_to_next =
144 		    MIN(ipst->ips_igmp_time_to_next, next);
145 		mutex_exit(&ipst->ips_igmp_timer_lock);
146 		return;
147 	} else {
148 		ipst->ips_igmp_timer_setter_active = B_TRUE;
149 	}
150 	if (ipst->ips_igmp_timeout_id == 0) {
151 		/*
152 		 * The timer is inactive. We need to start a timer
153 		 */
154 		ipst->ips_igmp_time_to_next = next;
155 		ipst->ips_igmp_timeout_id = timeout(igmp_timeout_handler,
156 		    (void *)ipst, MSEC_TO_TICK(ipst->ips_igmp_time_to_next));
157 		ipst->ips_igmp_timer_scheduled_last = ddi_get_lbolt();
158 		ipst->ips_igmp_timer_setter_active = B_FALSE;
159 		mutex_exit(&ipst->ips_igmp_timer_lock);
160 		return;
161 	}
162 
163 	/*
164 	 * The timer was scheduled sometime back for firing in
165 	 * 'igmp_time_to_next' ms and is active. We need to
166 	 * reschedule the timeout if the new 'next' will happen
167 	 * earlier than the currently scheduled timeout
168 	 */
169 	time_left = ipst->ips_igmp_timer_scheduled_last +
170 	    MSEC_TO_TICK(ipst->ips_igmp_time_to_next) - ddi_get_lbolt();
171 	if (time_left < MSEC_TO_TICK(next)) {
172 		ipst->ips_igmp_timer_setter_active = B_FALSE;
173 		mutex_exit(&ipst->ips_igmp_timer_lock);
174 		return;
175 	}
176 
177 	mutex_exit(&ipst->ips_igmp_timer_lock);
178 	ret = untimeout(ipst->ips_igmp_timeout_id);
179 	mutex_enter(&ipst->ips_igmp_timer_lock);
180 	/*
181 	 * The timeout was cancelled, or the timeout handler
182 	 * completed, while we were blocked in the untimeout.
183 	 * No other thread could have set the timer meanwhile
184 	 * since we serialized all the timer setters. Thus
185 	 * no timer is currently active nor executing nor will
186 	 * any timer fire in the future. We start the timer now
187 	 * if needed.
188 	 */
189 	if (ret == -1) {
190 		ASSERT(ipst->ips_igmp_timeout_id == 0);
191 	} else {
192 		ASSERT(ipst->ips_igmp_timeout_id != 0);
193 		ipst->ips_igmp_timeout_id = 0;
194 	}
195 	if (ipst->ips_igmp_time_to_next != 0) {
196 		ipst->ips_igmp_time_to_next =
197 		    MIN(ipst->ips_igmp_time_to_next, next);
198 		ipst->ips_igmp_timeout_id = timeout(igmp_timeout_handler,
199 		    (void *)ipst, MSEC_TO_TICK(ipst->ips_igmp_time_to_next));
200 		ipst->ips_igmp_timer_scheduled_last = ddi_get_lbolt();
201 	}
202 	ipst->ips_igmp_timer_setter_active = B_FALSE;
203 	mutex_exit(&ipst->ips_igmp_timer_lock);
204 }
205 
206 /*
207  * mld_start_timers:
208  * The unit for next is milliseconds.
209  */
210 static void
211 mld_start_timers(unsigned next, ip_stack_t *ipst)
212 {
213 	int	time_left;
214 	int	ret;
215 
216 	ASSERT(next != 0 && next != INFINITY);
217 
218 	mutex_enter(&ipst->ips_mld_timer_lock);
219 	if (ipst->ips_mld_timer_setter_active) {
220 		/*
221 		 * Serialize timer setters, one at a time. If the
222 		 * timer is currently being set by someone,
223 		 * just record the next time when it has to be
224 		 * invoked and return. The current setter will
225 		 * take care.
226 		 */
227 		ipst->ips_mld_time_to_next =
228 		    MIN(ipst->ips_mld_time_to_next, next);
229 		mutex_exit(&ipst->ips_mld_timer_lock);
230 		return;
231 	} else {
232 		ipst->ips_mld_timer_setter_active = B_TRUE;
233 	}
234 	if (ipst->ips_mld_timeout_id == 0) {
235 		/*
236 		 * The timer is inactive. We need to start a timer
237 		 */
238 		ipst->ips_mld_time_to_next = next;
239 		ipst->ips_mld_timeout_id = timeout(mld_timeout_handler,
240 		    (void *)ipst, MSEC_TO_TICK(ipst->ips_mld_time_to_next));
241 		ipst->ips_mld_timer_scheduled_last = ddi_get_lbolt();
242 		ipst->ips_mld_timer_setter_active = B_FALSE;
243 		mutex_exit(&ipst->ips_mld_timer_lock);
244 		return;
245 	}
246 
247 	/*
248 	 * The timer was scheduled sometime back for firing in
249 	 * 'igmp_time_to_next' ms and is active. We need to
250 	 * reschedule the timeout if the new 'next' will happen
251 	 * earlier than the currently scheduled timeout
252 	 */
253 	time_left = ipst->ips_mld_timer_scheduled_last +
254 	    MSEC_TO_TICK(ipst->ips_mld_time_to_next) - ddi_get_lbolt();
255 	if (time_left < MSEC_TO_TICK(next)) {
256 		ipst->ips_mld_timer_setter_active = B_FALSE;
257 		mutex_exit(&ipst->ips_mld_timer_lock);
258 		return;
259 	}
260 
261 	mutex_exit(&ipst->ips_mld_timer_lock);
262 	ret = untimeout(ipst->ips_mld_timeout_id);
263 	mutex_enter(&ipst->ips_mld_timer_lock);
264 	/*
265 	 * The timeout was cancelled, or the timeout handler
266 	 * completed, while we were blocked in the untimeout.
267 	 * No other thread could have set the timer meanwhile
268 	 * since we serialized all the timer setters. Thus
269 	 * no timer is currently active nor executing nor will
270 	 * any timer fire in the future. We start the timer now
271 	 * if needed.
272 	 */
273 	if (ret == -1) {
274 		ASSERT(ipst->ips_mld_timeout_id == 0);
275 	} else {
276 		ASSERT(ipst->ips_mld_timeout_id != 0);
277 		ipst->ips_mld_timeout_id = 0;
278 	}
279 	if (ipst->ips_mld_time_to_next != 0) {
280 		ipst->ips_mld_time_to_next =
281 		    MIN(ipst->ips_mld_time_to_next, next);
282 		ipst->ips_mld_timeout_id = timeout(mld_timeout_handler,
283 		    (void *)ipst, MSEC_TO_TICK(ipst->ips_mld_time_to_next));
284 		ipst->ips_mld_timer_scheduled_last = ddi_get_lbolt();
285 	}
286 	ipst->ips_mld_timer_setter_active = B_FALSE;
287 	mutex_exit(&ipst->ips_mld_timer_lock);
288 }
289 
290 /*
291  * igmp_input:
292  * Return NULL for a bad packet that is discarded here.
293  * Return mp if the message is OK and should be handed to "raw" receivers.
294  * Callers of igmp_input() may need to reinitialize variables that were copied
295  * from the mblk as this calls pullupmsg().
296  */
297 /* ARGSUSED */
298 mblk_t *
299 igmp_input(queue_t *q, mblk_t *mp, ill_t *ill)
300 {
301 	igmpa_t 	*igmpa;
302 	ipha_t		*ipha = (ipha_t *)(mp->b_rptr);
303 	int		iphlen, igmplen, mblklen;
304 	ilm_t 		*ilm;
305 	uint32_t	src, dst;
306 	uint32_t 	group;
307 	uint_t		next;
308 	ipif_t 		*ipif;
309 	ip_stack_t	*ipst;
310 	ilm_walker_t	ilw;
311 
312 	ASSERT(ill != NULL);
313 	ASSERT(!ill->ill_isv6);
314 	ipst = ill->ill_ipst;
315 	++ipst->ips_igmpstat.igps_rcv_total;
316 
317 	mblklen = MBLKL(mp);
318 	if (mblklen < 1 || mblklen < (iphlen = IPH_HDR_LENGTH(ipha))) {
319 		++ipst->ips_igmpstat.igps_rcv_tooshort;
320 		goto bad_pkt;
321 	}
322 	igmplen = ntohs(ipha->ipha_length) - iphlen;
323 	/*
324 	 * Since msg sizes are more variable with v3, just pullup the
325 	 * whole thing now.
326 	 */
327 	if (MBLKL(mp) < (igmplen + iphlen)) {
328 		mblk_t *mp1;
329 		if ((mp1 = msgpullup(mp, -1)) == NULL) {
330 			++ipst->ips_igmpstat.igps_rcv_tooshort;
331 			goto bad_pkt;
332 		}
333 		freemsg(mp);
334 		mp = mp1;
335 		ipha = (ipha_t *)(mp->b_rptr);
336 	}
337 
338 	/*
339 	 * Validate lengths
340 	 */
341 	if (igmplen < IGMP_MINLEN) {
342 		++ipst->ips_igmpstat.igps_rcv_tooshort;
343 		goto bad_pkt;
344 	}
345 	/*
346 	 * Validate checksum
347 	 */
348 	if (IP_CSUM(mp, iphlen, 0)) {
349 		++ipst->ips_igmpstat.igps_rcv_badsum;
350 		goto bad_pkt;
351 	}
352 
353 	igmpa = (igmpa_t *)(&mp->b_rptr[iphlen]);
354 	src = ipha->ipha_src;
355 	dst = ipha->ipha_dst;
356 	if (ip_debug > 1)
357 		(void) mi_strlog(ill->ill_rq, 1, SL_TRACE,
358 		    "igmp_input: src 0x%x, dst 0x%x on %s\n",
359 		    (int)ntohl(src), (int)ntohl(dst),
360 		    ill->ill_name);
361 
362 	switch (igmpa->igmpa_type) {
363 	case IGMP_MEMBERSHIP_QUERY:
364 		/*
365 		 * packet length differentiates between v1/v2 and v3
366 		 * v1/v2 should be exactly 8 octets long; v3 is >= 12
367 		 */
368 		if ((igmplen == IGMP_MINLEN) ||
369 		    (ipst->ips_igmp_max_version <= IGMP_V2_ROUTER)) {
370 			next = igmp_query_in(ipha, igmpa, ill);
371 		} else if (igmplen >= IGMP_V3_QUERY_MINLEN) {
372 			next = igmpv3_query_in((igmp3qa_t *)igmpa, ill,
373 			    igmplen);
374 		} else {
375 			++ipst->ips_igmpstat.igps_rcv_tooshort;
376 			goto bad_pkt;
377 		}
378 		if (next == 0)
379 			goto bad_pkt;
380 
381 		if (next != INFINITY)
382 			igmp_start_timers(next, ipst);
383 
384 		break;
385 
386 	case IGMP_V1_MEMBERSHIP_REPORT:
387 	case IGMP_V2_MEMBERSHIP_REPORT:
388 		/*
389 		 * For fast leave to work, we have to know that we are the
390 		 * last person to send a report for this group. Reports
391 		 * generated by us are looped back since we could potentially
392 		 * be a multicast router, so discard reports sourced by me.
393 		 */
394 		mutex_enter(&ill->ill_lock);
395 		for (ipif = ill->ill_ipif; ipif != NULL;
396 		    ipif = ipif->ipif_next) {
397 			if (ipif->ipif_lcl_addr == src) {
398 				if (ip_debug > 1) {
399 					(void) mi_strlog(ill->ill_rq,
400 					    1,
401 					    SL_TRACE,
402 					    "igmp_input: we are only "
403 					    "member src 0x%x ipif_local 0x%x",
404 					    (int)ntohl(src),
405 					    (int)ntohl(ipif->ipif_lcl_addr));
406 				}
407 				mutex_exit(&ill->ill_lock);
408 				return (mp);
409 			}
410 		}
411 		mutex_exit(&ill->ill_lock);
412 
413 		++ipst->ips_igmpstat.igps_rcv_reports;
414 		group = igmpa->igmpa_group;
415 		if (!CLASSD(group)) {
416 			++ipst->ips_igmpstat.igps_rcv_badreports;
417 			goto bad_pkt;
418 		}
419 
420 		/*
421 		 * KLUDGE: if the IP source address of the report has an
422 		 * unspecified (i.e., zero) subnet number, as is allowed for
423 		 * a booting host, replace it with the correct subnet number
424 		 * so that a process-level multicast routing demon can
425 		 * determine which subnet it arrived from.  This is necessary
426 		 * to compensate for the lack of any way for a process to
427 		 * determine the arrival interface of an incoming packet.
428 		 *
429 		 * Requires that a copy of *this* message it passed up
430 		 * to the raw interface which is done by our caller.
431 		 */
432 		if ((src & htonl(0xFF000000U)) == 0) {	/* Minimum net mask */
433 			/* Pick the first ipif on this ill */
434 			mutex_enter(&ill->ill_lock);
435 			src = ill->ill_ipif->ipif_subnet;
436 			mutex_exit(&ill->ill_lock);
437 			ip1dbg(("igmp_input: changed src to 0x%x\n",
438 			    (int)ntohl(src)));
439 			ipha->ipha_src = src;
440 		}
441 
442 		/*
443 		 * If our ill has ILMs that belong to the group being
444 		 * reported, and we are a 'Delaying Member' in the RFC
445 		 * terminology, stop our timer for that group and 'clear
446 		 * flag' i.e. mark as IGMP_OTHERMEMBER.
447 		 */
448 		ilm = ilm_walker_start(&ilw, ill);
449 		for (; ilm != NULL; ilm = ilm_walker_step(&ilw, ilm)) {
450 			if (ilm->ilm_addr == group) {
451 				++ipst->ips_igmpstat.igps_rcv_ourreports;
452 				ilm->ilm_timer = INFINITY;
453 				ilm->ilm_state = IGMP_OTHERMEMBER;
454 			}
455 		}
456 		ilm_walker_finish(&ilw);
457 		break;
458 
459 	case IGMP_V3_MEMBERSHIP_REPORT:
460 		/*
461 		 * Currently nothing to do here; IGMP router is not
462 		 * implemented in ip, and v3 hosts don't pay attention
463 		 * to membership reports.
464 		 */
465 		break;
466 	}
467 	/*
468 	 * Pass all valid IGMP packets up to any process(es) listening
469 	 * on a raw IGMP socket. Do not free the packet.
470 	 */
471 	return (mp);
472 
473 bad_pkt:
474 	freemsg(mp);
475 	return (NULL);
476 }
477 
478 static uint_t
479 igmp_query_in(ipha_t *ipha, igmpa_t *igmpa, ill_t *ill)
480 {
481 	ilm_t	*ilm;
482 	int	timer;
483 	uint_t	next, current;
484 	ip_stack_t	 *ipst;
485 	ilm_walker_t 	ilw;
486 
487 	ipst = ill->ill_ipst;
488 	++ipst->ips_igmpstat.igps_rcv_queries;
489 
490 	/*
491 	 * In the IGMPv2 specification, there are 3 states and a flag.
492 	 *
493 	 * In Non-Member state, we simply don't have a membership record.
494 	 * In Delaying Member state, our timer is running (ilm->ilm_timer
495 	 * < INFINITY).  In Idle Member state, our timer is not running
496 	 * (ilm->ilm_timer == INFINITY).
497 	 *
498 	 * The flag is ilm->ilm_state, it is set to IGMP_OTHERMEMBER if
499 	 * we have heard a report from another member, or IGMP_IREPORTEDLAST
500 	 * if I sent the last report.
501 	 */
502 	if ((igmpa->igmpa_code == 0) ||
503 	    (ipst->ips_igmp_max_version == IGMP_V1_ROUTER)) {
504 		/*
505 		 * Query from an old router.
506 		 * Remember that the querier on this interface is old,
507 		 * and set the timer to the value in RFC 1112.
508 		 */
509 
510 
511 		mutex_enter(&ill->ill_lock);
512 		ill->ill_mcast_v1_time = 0;
513 		ill->ill_mcast_v1_tset = 1;
514 		if (ill->ill_mcast_type != IGMP_V1_ROUTER) {
515 			ip1dbg(("Received IGMPv1 Query on %s, switching mode "
516 			    "to IGMP_V1_ROUTER\n", ill->ill_name));
517 			atomic_add_16(&ill->ill_ifptr->illif_mcast_v1, 1);
518 			ill->ill_mcast_type = IGMP_V1_ROUTER;
519 		}
520 		mutex_exit(&ill->ill_lock);
521 
522 		timer = SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY);
523 
524 		if (ipha->ipha_dst != htonl(INADDR_ALLHOSTS_GROUP) ||
525 		    igmpa->igmpa_group != 0) {
526 			++ipst->ips_igmpstat.igps_rcv_badqueries;
527 			return (0);
528 		}
529 
530 	} else {
531 		in_addr_t group;
532 
533 		/*
534 		 * Query from a new router
535 		 * Simply do a validity check
536 		 */
537 		group = igmpa->igmpa_group;
538 		if (group != 0 && (!CLASSD(group))) {
539 			++ipst->ips_igmpstat.igps_rcv_badqueries;
540 			return (0);
541 		}
542 
543 		/*
544 		 * Switch interface state to v2 on receipt of a v2 query
545 		 * ONLY IF current state is v3.  Let things be if current
546 		 * state if v1 but do reset the v2-querier-present timer.
547 		 */
548 		mutex_enter(&ill->ill_lock);
549 		if (ill->ill_mcast_type == IGMP_V3_ROUTER) {
550 			ip1dbg(("Received IGMPv2 Query on %s, switching mode "
551 			    "to IGMP_V2_ROUTER", ill->ill_name));
552 			atomic_add_16(&ill->ill_ifptr->illif_mcast_v2, 1);
553 			ill->ill_mcast_type = IGMP_V2_ROUTER;
554 		}
555 		ill->ill_mcast_v2_time = 0;
556 		ill->ill_mcast_v2_tset = 1;
557 		mutex_exit(&ill->ill_lock);
558 
559 		timer = DSEC_TO_MSEC((int)igmpa->igmpa_code);
560 	}
561 
562 	if (ip_debug > 1) {
563 		mutex_enter(&ill->ill_lock);
564 		(void) mi_strlog(ill->ill_rq, 1, SL_TRACE,
565 		    "igmp_input: TIMER = igmp_code %d igmp_type 0x%x",
566 		    (int)ntohs(igmpa->igmpa_code),
567 		    (int)ntohs(igmpa->igmpa_type));
568 		mutex_exit(&ill->ill_lock);
569 	}
570 
571 	/*
572 	 * -Start the timers in all of our membership records
573 	 *  for the physical interface on which the query
574 	 *  arrived, excluding those that belong to the "all
575 	 *  hosts" group (224.0.0.1).
576 	 *
577 	 * -Restart any timer that is already running but has
578 	 *  a value longer than the requested timeout.
579 	 *
580 	 * -Use the value specified in the query message as
581 	 *  the maximum timeout.
582 	 */
583 	next = (unsigned)INFINITY;
584 
585 	ilm = ilm_walker_start(&ilw, ill);
586 	mutex_enter(&ill->ill_lock);
587 	current = CURRENT_MSTIME;
588 
589 	for (; ilm != NULL; ilm = ilm_walker_step(&ilw, ilm)) {
590 		/*
591 		 * A multicast router joins INADDR_ANY address
592 		 * to enable promiscuous reception of all
593 		 * mcasts from the interface. This INADDR_ANY
594 		 * is stored in the ilm_v6addr as V6 unspec addr
595 		 */
596 		if (!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr))
597 			continue;
598 		if (ilm->ilm_addr == htonl(INADDR_ANY))
599 			continue;
600 		if (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP) &&
601 		    (igmpa->igmpa_group == 0) ||
602 		    (igmpa->igmpa_group == ilm->ilm_addr)) {
603 			if (ilm->ilm_timer > timer) {
604 				MCAST_RANDOM_DELAY(ilm->ilm_timer, timer);
605 				if (ilm->ilm_timer < next)
606 					next = ilm->ilm_timer;
607 				ilm->ilm_timer += current;
608 			}
609 		}
610 	}
611 	mutex_exit(&ill->ill_lock);
612 	ilm_walker_finish(&ilw);
613 
614 	return (next);
615 }
616 
617 static uint_t
618 igmpv3_query_in(igmp3qa_t *igmp3qa, ill_t *ill, int igmplen)
619 {
620 	uint_t		i, next, mrd, qqi, timer, delay, numsrc;
621 	uint_t		current;
622 	ilm_t		*ilm;
623 	ipaddr_t	*src_array;
624 	uint8_t		qrv;
625 	ip_stack_t	 *ipst;
626 	ilm_walker_t	ilw;
627 
628 	ipst = ill->ill_ipst;
629 	/* make sure numsrc matches packet size */
630 	numsrc = ntohs(igmp3qa->igmp3qa_numsrc);
631 	if (igmplen < IGMP_V3_QUERY_MINLEN + (numsrc * sizeof (ipaddr_t))) {
632 		++ipst->ips_igmpstat.igps_rcv_tooshort;
633 		return (0);
634 	}
635 	src_array = (ipaddr_t *)&igmp3qa[1];
636 
637 	++ipst->ips_igmpstat.igps_rcv_queries;
638 
639 	if ((mrd = (uint_t)igmp3qa->igmp3qa_mxrc) >= IGMP_V3_MAXRT_FPMIN) {
640 		uint_t hdrval, mant, exp;
641 		hdrval = (uint_t)igmp3qa->igmp3qa_mxrc;
642 		mant = hdrval & IGMP_V3_MAXRT_MANT_MASK;
643 		exp = (hdrval & IGMP_V3_MAXRT_EXP_MASK) >> 4;
644 		mrd = (mant | 0x10) << (exp + 3);
645 	}
646 	if (mrd == 0)
647 		mrd = MCAST_DEF_QUERY_RESP_INTERVAL;
648 	timer = DSEC_TO_MSEC(mrd);
649 	MCAST_RANDOM_DELAY(delay, timer);
650 	next = (unsigned)INFINITY;
651 	current = CURRENT_MSTIME;
652 
653 	if ((qrv = igmp3qa->igmp3qa_sqrv & IGMP_V3_RV_MASK) == 0)
654 		ill->ill_mcast_rv = MCAST_DEF_ROBUSTNESS;
655 	else
656 		ill->ill_mcast_rv = qrv;
657 
658 	if ((qqi = (uint_t)igmp3qa->igmp3qa_qqic) >= IGMP_V3_QQI_FPMIN) {
659 		uint_t hdrval, mant, exp;
660 		hdrval = (uint_t)igmp3qa->igmp3qa_qqic;
661 		mant = hdrval & IGMP_V3_QQI_MANT_MASK;
662 		exp = (hdrval & IGMP_V3_QQI_EXP_MASK) >> 4;
663 		qqi = (mant | 0x10) << (exp + 3);
664 	}
665 	ill->ill_mcast_qi = (qqi == 0) ? MCAST_DEF_QUERY_INTERVAL : qqi;
666 
667 	/*
668 	 * If we have a pending general query response that's scheduled
669 	 * sooner than the delay we calculated for this response, then
670 	 * no action is required (RFC3376 section 5.2 rule 1)
671 	 */
672 	mutex_enter(&ill->ill_lock);
673 	if (ill->ill_global_timer < (current + delay)) {
674 		mutex_exit(&ill->ill_lock);
675 		return (next);
676 	}
677 	mutex_exit(&ill->ill_lock);
678 
679 	/*
680 	 * Now take action depending upon query type:
681 	 * general, group specific, or group/source specific.
682 	 */
683 	if ((numsrc == 0) && (igmp3qa->igmp3qa_group == INADDR_ANY)) {
684 		/*
685 		 * general query
686 		 * We know global timer is either not running or is
687 		 * greater than our calculated delay, so reset it to
688 		 * our delay (random value in range [0, response time]).
689 		 */
690 		mutex_enter(&ill->ill_lock);
691 		ill->ill_global_timer =  current + delay;
692 		mutex_exit(&ill->ill_lock);
693 		next = delay;
694 
695 	} else {
696 		/* group or group/source specific query */
697 		ilm = ilm_walker_start(&ilw, ill);
698 		mutex_enter(&ill->ill_lock);
699 		for (; ilm != NULL; ilm = ilm_walker_step(&ilw, ilm)) {
700 			if (!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr) ||
701 			    (ilm->ilm_addr == htonl(INADDR_ANY)) ||
702 			    (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) ||
703 			    (igmp3qa->igmp3qa_group != ilm->ilm_addr))
704 				continue;
705 			/*
706 			 * If the query is group specific or we have a
707 			 * pending group specific query, the response is
708 			 * group specific (pending sources list should be
709 			 * empty).  Otherwise, need to update the pending
710 			 * sources list for the group and source specific
711 			 * response.
712 			 */
713 			if (numsrc == 0 || (ilm->ilm_timer < INFINITY &&
714 			    SLIST_IS_EMPTY(ilm->ilm_pendsrcs))) {
715 group_query:
716 				FREE_SLIST(ilm->ilm_pendsrcs);
717 				ilm->ilm_pendsrcs = NULL;
718 			} else {
719 				boolean_t overflow;
720 				slist_t *pktl;
721 				if (numsrc > MAX_FILTER_SIZE ||
722 				    (ilm->ilm_pendsrcs == NULL &&
723 				    (ilm->ilm_pendsrcs = l_alloc()) == NULL)) {
724 					/*
725 					 * We've been sent more sources than
726 					 * we can deal with; or we can't deal
727 					 * with a source list at all.  Revert
728 					 * to a group specific query.
729 					 */
730 					goto group_query;
731 				}
732 				if ((pktl = l_alloc()) == NULL)
733 					goto group_query;
734 				pktl->sl_numsrc = numsrc;
735 				for (i = 0; i < numsrc; i++)
736 					IN6_IPADDR_TO_V4MAPPED(src_array[i],
737 					    &(pktl->sl_addr[i]));
738 				l_union_in_a(ilm->ilm_pendsrcs, pktl,
739 				    &overflow);
740 				l_free(pktl);
741 				if (overflow)
742 					goto group_query;
743 			}
744 
745 			ilm->ilm_timer = (ilm->ilm_timer == INFINITY) ?
746 			    INFINITY : (ilm->ilm_timer - current);
747 			/* choose soonest timer */
748 			ilm->ilm_timer = MIN(ilm->ilm_timer, delay);
749 			if (ilm->ilm_timer < next)
750 				next = ilm->ilm_timer;
751 			ilm->ilm_timer += current;
752 		}
753 		mutex_exit(&ill->ill_lock);
754 		ilm_walker_finish(&ilw);
755 	}
756 
757 	return (next);
758 }
759 
760 void
761 igmp_joingroup(ilm_t *ilm)
762 {
763 	uint_t	timer;
764 	ill_t	*ill;
765 	ip_stack_t	*ipst = ilm->ilm_ipst;
766 
767 	ill = ilm->ilm_ipif->ipif_ill;
768 
769 	ASSERT(IAM_WRITER_ILL(ill));
770 	ASSERT(ilm->ilm_ill == NULL && !ilm->ilm_ipif->ipif_isv6);
771 
772 	mutex_enter(&ill->ill_lock);
773 	if (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP)) {
774 		ilm->ilm_rtx.rtx_timer = INFINITY;
775 		ilm->ilm_state = IGMP_OTHERMEMBER;
776 		mutex_exit(&ill->ill_lock);
777 	} else {
778 		ip1dbg(("Querier mode %d, sending report, group %x\n",
779 		    ill->ill_mcast_type, htonl(ilm->ilm_addr)));
780 		if (ill->ill_mcast_type == IGMP_V1_ROUTER) {
781 			mutex_exit(&ill->ill_lock);
782 			igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0);
783 			mutex_enter(&ill->ill_lock);
784 		} else if (ill->ill_mcast_type == IGMP_V2_ROUTER) {
785 			mutex_exit(&ill->ill_lock);
786 			igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0);
787 			mutex_enter(&ill->ill_lock);
788 		} else if (ill->ill_mcast_type == IGMP_V3_ROUTER) {
789 			mrec_t *rp;
790 			mcast_record_t rtype;
791 			/*
792 			 * The possible state changes we need to handle here:
793 			 *   Old State	New State	Report
794 			 *
795 			 *   INCLUDE(0)	INCLUDE(X)	ALLOW(X),BLOCK(0)
796 			 *   INCLUDE(0)	EXCLUDE(X)	TO_EX(X)
797 			 *
798 			 * No need to send the BLOCK(0) report; ALLOW(X)
799 			 * is enough.
800 			 */
801 			rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ?
802 			    ALLOW_NEW_SOURCES : CHANGE_TO_EXCLUDE;
803 			rp = mcast_bldmrec(rtype, &ilm->ilm_v6addr,
804 			    ilm->ilm_filter, NULL);
805 			mutex_exit(&ill->ill_lock);
806 			igmpv3_sendrpt(ilm->ilm_ipif, rp);
807 			mutex_enter(&ill->ill_lock);
808 			/*
809 			 * Set up retransmission state.  Timer is set below,
810 			 * for both v3 and older versions.
811 			 */
812 			mcast_init_rtx(ill, &ilm->ilm_rtx, rtype,
813 			    ilm->ilm_filter);
814 		}
815 
816 		/* Set the ilm timer value */
817 		MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer,
818 		    SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY));
819 		timer = ilm->ilm_rtx.rtx_timer;
820 		ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME;
821 		ilm->ilm_state = IGMP_IREPORTEDLAST;
822 		mutex_exit(&ill->ill_lock);
823 
824 		/*
825 		 * We need to restart the IGMP timers, but we can't do it here
826 		 * since we're inside the IPSQ and thus igmp_start_timers() ->
827 		 * untimeout() (inside the IPSQ, waiting for a running timeout
828 		 * to finish) could deadlock with igmp_timeout_handler() ->
829 		 * ipsq_enter() (running the timeout, waiting to get inside
830 		 * the IPSQ).  We also can't just delay it until after we
831 		 * ipsq_exit() since we could be inside more than one IPSQ and
832 		 * thus still have the other IPSQs pinned after we exit -- and
833 		 * igmp_start_timers() may be trying to enter one of those.
834 		 * Instead, signal a dedicated thread that will do it for us.
835 		 */
836 		mutex_enter(&ipst->ips_igmp_timer_lock);
837 		ipst->ips_igmp_deferred_next = MIN(timer,
838 		    ipst->ips_igmp_deferred_next);
839 		mutex_exit(&ipst->ips_igmp_timer_lock);
840 		mcast_signal_restart_thread(ipst);
841 	}
842 
843 	if (ip_debug > 1) {
844 		(void) mi_strlog(ilm->ilm_ipif->ipif_ill->ill_rq, 1, SL_TRACE,
845 		    "igmp_joingroup: multicast_type %d timer %d",
846 		    (ilm->ilm_ipif->ipif_ill->ill_mcast_type),
847 		    (int)ntohl(timer));
848 	}
849 }
850 
851 void
852 mld_joingroup(ilm_t *ilm)
853 {
854 	uint_t	timer;
855 	ill_t	*ill;
856 	ip_stack_t	*ipst = ilm->ilm_ipst;
857 
858 	ill = ilm->ilm_ill;
859 
860 	ASSERT(IAM_WRITER_ILL(ill));
861 	ASSERT(ilm->ilm_ipif == NULL && ill->ill_isv6);
862 
863 	mutex_enter(&ill->ill_lock);
864 	if (IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr)) {
865 		ilm->ilm_rtx.rtx_timer = INFINITY;
866 		ilm->ilm_state = IGMP_OTHERMEMBER;
867 		mutex_exit(&ill->ill_lock);
868 	} else {
869 		if (ill->ill_mcast_type == MLD_V1_ROUTER) {
870 			mutex_exit(&ill->ill_lock);
871 			mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL);
872 			mutex_enter(&ill->ill_lock);
873 		} else {
874 			mrec_t *rp;
875 			mcast_record_t rtype;
876 			/*
877 			 * The possible state changes we need to handle here:
878 			 *	Old State   New State	Report
879 			 *
880 			 *	INCLUDE(0)  INCLUDE(X)	ALLOW(X),BLOCK(0)
881 			 *	INCLUDE(0)  EXCLUDE(X)	TO_EX(X)
882 			 *
883 			 * No need to send the BLOCK(0) report; ALLOW(X)
884 			 * is enough
885 			 */
886 			rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ?
887 			    ALLOW_NEW_SOURCES : CHANGE_TO_EXCLUDE;
888 			rp = mcast_bldmrec(rtype, &ilm->ilm_v6addr,
889 			    ilm->ilm_filter, NULL);
890 			mutex_exit(&ill->ill_lock);
891 			mldv2_sendrpt(ill, rp);
892 			mutex_enter(&ill->ill_lock);
893 			/*
894 			 * Set up retransmission state.  Timer is set below,
895 			 * for both v2 and v1.
896 			 */
897 			mcast_init_rtx(ill, &ilm->ilm_rtx, rtype,
898 			    ilm->ilm_filter);
899 		}
900 
901 		/* Set the ilm timer value */
902 		ASSERT(ill->ill_mcast_type != MLD_V2_ROUTER ||
903 		    ilm->ilm_rtx.rtx_cnt > 0);
904 		MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer,
905 		    SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY));
906 		timer = ilm->ilm_rtx.rtx_timer;
907 		ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME;
908 		ilm->ilm_state = IGMP_IREPORTEDLAST;
909 		mutex_exit(&ill->ill_lock);
910 
911 		/*
912 		 * Signal another thread to restart the timers.  See the
913 		 * comment in igmp_joingroup() for details.
914 		 */
915 		mutex_enter(&ipst->ips_mld_timer_lock);
916 		ipst->ips_mld_deferred_next = MIN(timer,
917 		    ipst->ips_mld_deferred_next);
918 		mutex_exit(&ipst->ips_mld_timer_lock);
919 		mcast_signal_restart_thread(ipst);
920 	}
921 
922 	if (ip_debug > 1) {
923 		(void) mi_strlog(ilm->ilm_ill->ill_rq, 1, SL_TRACE,
924 		    "mld_joingroup: multicast_type %d timer %d",
925 		    (ilm->ilm_ill->ill_mcast_type),
926 		    (int)ntohl(timer));
927 	}
928 }
929 
930 void
931 igmp_leavegroup(ilm_t *ilm)
932 {
933 	ill_t *ill = ilm->ilm_ipif->ipif_ill;
934 
935 	ASSERT(ilm->ilm_ill == NULL);
936 	ASSERT(!ill->ill_isv6);
937 
938 	mutex_enter(&ill->ill_lock);
939 	if (ilm->ilm_state == IGMP_IREPORTEDLAST &&
940 	    ill->ill_mcast_type == IGMP_V2_ROUTER &&
941 	    (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP))) {
942 		mutex_exit(&ill->ill_lock);
943 		igmp_sendpkt(ilm, IGMP_V2_LEAVE_GROUP,
944 		    (htonl(INADDR_ALLRTRS_GROUP)));
945 		return;
946 	} else if ((ill->ill_mcast_type == IGMP_V3_ROUTER) &&
947 	    (ilm->ilm_addr != htonl(INADDR_ALLHOSTS_GROUP))) {
948 		mrec_t *rp;
949 		/*
950 		 * The possible state changes we need to handle here:
951 		 *	Old State	New State	Report
952 		 *
953 		 *	INCLUDE(X)	INCLUDE(0)	ALLOW(0),BLOCK(X)
954 		 *	EXCLUDE(X)	INCLUDE(0)	TO_IN(0)
955 		 *
956 		 * No need to send the ALLOW(0) report; BLOCK(X) is enough
957 		 */
958 		if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
959 			rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr,
960 			    ilm->ilm_filter, NULL);
961 		} else {
962 			rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr,
963 			    NULL, NULL);
964 		}
965 		mutex_exit(&ill->ill_lock);
966 		igmpv3_sendrpt(ilm->ilm_ipif, rp);
967 		return;
968 	}
969 	mutex_exit(&ill->ill_lock);
970 }
971 
972 void
973 mld_leavegroup(ilm_t *ilm)
974 {
975 	ill_t *ill = ilm->ilm_ill;
976 
977 	ASSERT(ilm->ilm_ipif == NULL);
978 	ASSERT(ill->ill_isv6);
979 
980 	mutex_enter(&ill->ill_lock);
981 	if (ilm->ilm_state == IGMP_IREPORTEDLAST &&
982 	    ill->ill_mcast_type == MLD_V1_ROUTER &&
983 	    (!IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr))) {
984 		mutex_exit(&ill->ill_lock);
985 		mld_sendpkt(ilm, MLD_LISTENER_REDUCTION, &ipv6_all_rtrs_mcast);
986 		return;
987 	} else if ((ill->ill_mcast_type == MLD_V2_ROUTER) &&
988 	    (!IN6_ARE_ADDR_EQUAL(&ipv6_all_hosts_mcast, &ilm->ilm_v6addr))) {
989 		mrec_t *rp;
990 		/*
991 		 * The possible state changes we need to handle here:
992 		 *	Old State	New State	Report
993 		 *
994 		 *	INCLUDE(X)	INCLUDE(0)	ALLOW(0),BLOCK(X)
995 		 *	EXCLUDE(X)	INCLUDE(0)	TO_IN(0)
996 		 *
997 		 * No need to send the ALLOW(0) report; BLOCK(X) is enough
998 		 */
999 		if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
1000 			rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr,
1001 			    ilm->ilm_filter, NULL);
1002 		} else {
1003 			rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr,
1004 			    NULL, NULL);
1005 		}
1006 		mutex_exit(&ill->ill_lock);
1007 		mldv2_sendrpt(ill, rp);
1008 		return;
1009 	}
1010 	mutex_exit(&ill->ill_lock);
1011 }
1012 
1013 void
1014 igmp_statechange(ilm_t *ilm, mcast_record_t fmode, slist_t *flist)
1015 {
1016 	ill_t *ill;
1017 	mrec_t *rp;
1018 	ip_stack_t	*ipst = ilm->ilm_ipst;
1019 
1020 	ASSERT(ilm != NULL);
1021 
1022 	/* state change reports should only be sent if the router is v3 */
1023 	if (ilm->ilm_ipif->ipif_ill->ill_mcast_type != IGMP_V3_ROUTER)
1024 		return;
1025 
1026 	if (ilm->ilm_ill == NULL) {
1027 		ASSERT(ilm->ilm_ipif != NULL);
1028 		ill = ilm->ilm_ipif->ipif_ill;
1029 	} else {
1030 		ill = ilm->ilm_ill;
1031 	}
1032 
1033 	mutex_enter(&ill->ill_lock);
1034 
1035 	/*
1036 	 * Compare existing(old) state with the new state and prepare
1037 	 * State Change Report, according to the rules in RFC 3376:
1038 	 *
1039 	 *	Old State	New State	State Change Report
1040 	 *
1041 	 *	INCLUDE(A)	INCLUDE(B)	ALLOW(B-A),BLOCK(A-B)
1042 	 *	EXCLUDE(A)	EXCLUDE(B)	ALLOW(A-B),BLOCK(B-A)
1043 	 *	INCLUDE(A)	EXCLUDE(B)	TO_EX(B)
1044 	 *	EXCLUDE(A)	INCLUDE(B)	TO_IN(B)
1045 	 */
1046 
1047 	if (ilm->ilm_fmode == fmode) {
1048 		slist_t	*a_minus_b = NULL, *b_minus_a = NULL;
1049 		slist_t *allow, *block;
1050 		if (((a_minus_b = l_alloc()) == NULL) ||
1051 		    ((b_minus_a = l_alloc()) == NULL)) {
1052 			l_free(a_minus_b);
1053 			if (ilm->ilm_fmode == MODE_IS_INCLUDE)
1054 				goto send_to_ex;
1055 			else
1056 				goto send_to_in;
1057 		}
1058 		l_difference(ilm->ilm_filter, flist, a_minus_b);
1059 		l_difference(flist, ilm->ilm_filter, b_minus_a);
1060 		if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
1061 			allow = b_minus_a;
1062 			block = a_minus_b;
1063 		} else {
1064 			allow = a_minus_b;
1065 			block = b_minus_a;
1066 		}
1067 		rp = NULL;
1068 		if (!SLIST_IS_EMPTY(allow))
1069 			rp = mcast_bldmrec(ALLOW_NEW_SOURCES, &ilm->ilm_v6addr,
1070 			    allow, rp);
1071 		if (!SLIST_IS_EMPTY(block))
1072 			rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr,
1073 			    block, rp);
1074 		l_free(a_minus_b);
1075 		l_free(b_minus_a);
1076 	} else if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
1077 send_to_ex:
1078 		rp = mcast_bldmrec(CHANGE_TO_EXCLUDE, &ilm->ilm_v6addr, flist,
1079 		    NULL);
1080 	} else {
1081 send_to_in:
1082 		rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, flist,
1083 		    NULL);
1084 	}
1085 
1086 	/*
1087 	 * Need to set up retransmission state; merge the new info with the
1088 	 * current state (which may be null).  If the timer is not currently
1089 	 * running, signal a thread to restart it -- see the comment in
1090 	 * igmp_joingroup() for details.
1091 	 */
1092 	rp = mcast_merge_rtx(ilm, rp, flist);
1093 	if (ilm->ilm_rtx.rtx_timer == INFINITY) {
1094 		MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer,
1095 		    SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY));
1096 		mutex_enter(&ipst->ips_igmp_timer_lock);
1097 		ipst->ips_igmp_deferred_next = MIN(ipst->ips_igmp_deferred_next,
1098 		    ilm->ilm_rtx.rtx_timer);
1099 		ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME;
1100 		mutex_exit(&ipst->ips_igmp_timer_lock);
1101 		mcast_signal_restart_thread(ipst);
1102 	}
1103 
1104 	mutex_exit(&ill->ill_lock);
1105 	igmpv3_sendrpt(ilm->ilm_ipif, rp);
1106 }
1107 
1108 void
1109 mld_statechange(ilm_t *ilm, mcast_record_t fmode, slist_t *flist)
1110 {
1111 	ill_t *ill;
1112 	mrec_t *rp = NULL;
1113 	ip_stack_t	*ipst = ilm->ilm_ipst;
1114 
1115 	ASSERT(ilm != NULL);
1116 
1117 	ill = ilm->ilm_ill;
1118 
1119 	/* only need to send if we have an mldv2-capable router */
1120 	mutex_enter(&ill->ill_lock);
1121 	if (ill->ill_mcast_type != MLD_V2_ROUTER) {
1122 		mutex_exit(&ill->ill_lock);
1123 		return;
1124 	}
1125 
1126 	/*
1127 	 * Compare existing (old) state with the new state passed in
1128 	 * and send appropriate MLDv2 State Change Report.
1129 	 *
1130 	 *	Old State	New State	State Change Report
1131 	 *
1132 	 *	INCLUDE(A)	INCLUDE(B)	ALLOW(B-A),BLOCK(A-B)
1133 	 *	EXCLUDE(A)	EXCLUDE(B)	ALLOW(A-B),BLOCK(B-A)
1134 	 *	INCLUDE(A)	EXCLUDE(B)	TO_EX(B)
1135 	 *	EXCLUDE(A)	INCLUDE(B)	TO_IN(B)
1136 	 */
1137 	if (ilm->ilm_fmode == fmode) {
1138 		slist_t	*a_minus_b = NULL, *b_minus_a = NULL;
1139 		slist_t *allow, *block;
1140 		if (((a_minus_b = l_alloc()) == NULL) ||
1141 		    ((b_minus_a = l_alloc()) == NULL)) {
1142 			l_free(a_minus_b);
1143 			if (ilm->ilm_fmode == MODE_IS_INCLUDE)
1144 				goto send_to_ex;
1145 			else
1146 				goto send_to_in;
1147 		}
1148 		l_difference(ilm->ilm_filter, flist, a_minus_b);
1149 		l_difference(flist, ilm->ilm_filter, b_minus_a);
1150 		if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
1151 			allow = b_minus_a;
1152 			block = a_minus_b;
1153 		} else {
1154 			allow = a_minus_b;
1155 			block = b_minus_a;
1156 		}
1157 		if (!SLIST_IS_EMPTY(allow))
1158 			rp = mcast_bldmrec(ALLOW_NEW_SOURCES, &ilm->ilm_v6addr,
1159 			    allow, rp);
1160 		if (!SLIST_IS_EMPTY(block))
1161 			rp = mcast_bldmrec(BLOCK_OLD_SOURCES, &ilm->ilm_v6addr,
1162 			    block, rp);
1163 		l_free(a_minus_b);
1164 		l_free(b_minus_a);
1165 	} else if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
1166 send_to_ex:
1167 		rp = mcast_bldmrec(CHANGE_TO_EXCLUDE, &ilm->ilm_v6addr, flist,
1168 		    NULL);
1169 	} else {
1170 send_to_in:
1171 		rp = mcast_bldmrec(CHANGE_TO_INCLUDE, &ilm->ilm_v6addr, flist,
1172 		    NULL);
1173 	}
1174 
1175 	/*
1176 	 * Need to set up retransmission state; merge the new info with the
1177 	 * current state (which may be null).  If the timer is not currently
1178 	 * running, signal a thread to restart it -- see the comment in
1179 	 * igmp_joingroup() for details.
1180 	 */
1181 	rp = mcast_merge_rtx(ilm, rp, flist);
1182 	ASSERT(ilm->ilm_rtx.rtx_cnt > 0);
1183 	if (ilm->ilm_rtx.rtx_timer == INFINITY) {
1184 		MCAST_RANDOM_DELAY(ilm->ilm_rtx.rtx_timer,
1185 		    SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY));
1186 		mutex_enter(&ipst->ips_mld_timer_lock);
1187 		ipst->ips_mld_deferred_next =
1188 		    MIN(ipst->ips_mld_deferred_next, ilm->ilm_rtx.rtx_timer);
1189 		ilm->ilm_rtx.rtx_timer += CURRENT_MSTIME;
1190 		mutex_exit(&ipst->ips_mld_timer_lock);
1191 		mcast_signal_restart_thread(ipst);
1192 	}
1193 
1194 	mutex_exit(&ill->ill_lock);
1195 	mldv2_sendrpt(ill, rp);
1196 }
1197 
1198 uint_t
1199 igmp_timeout_handler_per_ill(ill_t *ill)
1200 {
1201 	uint_t	next = INFINITY, current;
1202 	ilm_t	*ilm;
1203 	ipif_t	*ipif;
1204 	mrec_t	*rp = NULL;
1205 	mrec_t	*rtxrp = NULL;
1206 	rtx_state_t *rtxp;
1207 	mcast_record_t	rtype;
1208 
1209 	ASSERT(IAM_WRITER_ILL(ill));
1210 
1211 	mutex_enter(&ill->ill_lock);
1212 
1213 	current = CURRENT_MSTIME;
1214 	/* First check the global timer on this interface */
1215 	if (ill->ill_global_timer == INFINITY)
1216 		goto per_ilm_timer;
1217 	if (ill->ill_global_timer <= (current + CURRENT_OFFSET)) {
1218 		ill->ill_global_timer = INFINITY;
1219 		/*
1220 		 * Send report for each group on this interface.
1221 		 * Since we just set the global timer (received a v3 general
1222 		 * query), need to skip the all hosts addr (224.0.0.1), per
1223 		 * RFC 3376 section 5.
1224 		 */
1225 		for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
1226 			if (ilm->ilm_addr == htonl(INADDR_ALLHOSTS_GROUP))
1227 				continue;
1228 			ASSERT(ilm->ilm_ipif != NULL);
1229 			ilm->ilm_ipif->ipif_igmp_rpt =
1230 			    mcast_bldmrec(ilm->ilm_fmode, &ilm->ilm_v6addr,
1231 			    ilm->ilm_filter, ilm->ilm_ipif->ipif_igmp_rpt);
1232 			/*
1233 			 * Since we're sending a report on this group, okay
1234 			 * to delete pending group-specific timers.  Note
1235 			 * that group-specific retransmit timers still need
1236 			 * to be checked in the per_ilm_timer for-loop.
1237 			 */
1238 			ilm->ilm_timer = INFINITY;
1239 			ilm->ilm_state = IGMP_IREPORTEDLAST;
1240 			FREE_SLIST(ilm->ilm_pendsrcs);
1241 			ilm->ilm_pendsrcs = NULL;
1242 		}
1243 		/*
1244 		 * We've built per-ipif mrec lists; walk the ill's ipif list
1245 		 * and send a report for each ipif that has an mrec list.
1246 		 */
1247 		for (ipif = ill->ill_ipif; ipif != NULL;
1248 		    ipif = ipif->ipif_next) {
1249 			if (ipif->ipif_igmp_rpt == NULL)
1250 				continue;
1251 			mutex_exit(&ill->ill_lock);
1252 			igmpv3_sendrpt(ipif, ipif->ipif_igmp_rpt);
1253 			mutex_enter(&ill->ill_lock);
1254 			/* mrec list was freed by igmpv3_sendrpt() */
1255 			ipif->ipif_igmp_rpt = NULL;
1256 		}
1257 	} else {
1258 		if ((ill->ill_global_timer - current) < next)
1259 			next = ill->ill_global_timer - current;
1260 	}
1261 
1262 per_ilm_timer:
1263 	for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
1264 		if (ilm->ilm_timer == INFINITY)
1265 			goto per_ilm_rtxtimer;
1266 
1267 		if (ilm->ilm_timer > (current + CURRENT_OFFSET)) {
1268 			if ((ilm->ilm_timer - current) < next)
1269 				next = ilm->ilm_timer - current;
1270 
1271 			if (ip_debug > 1) {
1272 				(void) mi_strlog(ill->ill_rq, 1, SL_TRACE,
1273 				    "igmp_timo_hlr 2: ilm_timr %d "
1274 				    "typ %d nxt %d",
1275 				    (int)ntohl(ilm->ilm_timer - current),
1276 				    (ill->ill_mcast_type), next);
1277 			}
1278 
1279 			goto per_ilm_rtxtimer;
1280 		}
1281 
1282 		/* the timer has expired, need to take action */
1283 		ilm->ilm_timer = INFINITY;
1284 		ilm->ilm_state = IGMP_IREPORTEDLAST;
1285 		if (ill->ill_mcast_type == IGMP_V1_ROUTER) {
1286 			mutex_exit(&ill->ill_lock);
1287 			igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0);
1288 			mutex_enter(&ill->ill_lock);
1289 		} else if (ill->ill_mcast_type == IGMP_V2_ROUTER) {
1290 			mutex_exit(&ill->ill_lock);
1291 			igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0);
1292 			mutex_enter(&ill->ill_lock);
1293 		} else {
1294 			slist_t *rsp;
1295 			if (!SLIST_IS_EMPTY(ilm->ilm_pendsrcs) &&
1296 			    (rsp = l_alloc()) != NULL) {
1297 				/*
1298 				 * Contents of reply depend on pending
1299 				 * requested source list.
1300 				 */
1301 				if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
1302 					l_intersection(ilm->ilm_filter,
1303 					    ilm->ilm_pendsrcs, rsp);
1304 				} else {
1305 					l_difference(ilm->ilm_pendsrcs,
1306 					    ilm->ilm_filter, rsp);
1307 				}
1308 				FREE_SLIST(ilm->ilm_pendsrcs);
1309 				ilm->ilm_pendsrcs = NULL;
1310 				if (!SLIST_IS_EMPTY(rsp))
1311 					rp = mcast_bldmrec(MODE_IS_INCLUDE,
1312 					    &ilm->ilm_v6addr, rsp, rp);
1313 				FREE_SLIST(rsp);
1314 			} else {
1315 				/*
1316 				 * Either the pending request is just group-
1317 				 * specific, or we couldn't get the resources
1318 				 * (rsp) to build a source-specific reply.
1319 				 */
1320 				rp = mcast_bldmrec(ilm->ilm_fmode,
1321 				    &ilm->ilm_v6addr, ilm->ilm_filter, rp);
1322 			}
1323 			mutex_exit(&ill->ill_lock);
1324 			igmpv3_sendrpt(ill->ill_ipif, rp);
1325 			mutex_enter(&ill->ill_lock);
1326 			rp = NULL;
1327 		}
1328 
1329 per_ilm_rtxtimer:
1330 		rtxp = &ilm->ilm_rtx;
1331 
1332 		if (rtxp->rtx_timer == INFINITY)
1333 			continue;
1334 		if (rtxp->rtx_timer > (current + CURRENT_OFFSET)) {
1335 			if ((rtxp->rtx_timer - current) < next)
1336 				next = rtxp->rtx_timer - current;
1337 			continue;
1338 		}
1339 
1340 		rtxp->rtx_timer = INFINITY;
1341 		ilm->ilm_state = IGMP_IREPORTEDLAST;
1342 		if (ill->ill_mcast_type == IGMP_V1_ROUTER) {
1343 			mutex_exit(&ill->ill_lock);
1344 			igmp_sendpkt(ilm, IGMP_V1_MEMBERSHIP_REPORT, 0);
1345 			mutex_enter(&ill->ill_lock);
1346 			continue;
1347 		} else if (ill->ill_mcast_type == IGMP_V2_ROUTER) {
1348 			mutex_exit(&ill->ill_lock);
1349 			igmp_sendpkt(ilm, IGMP_V2_MEMBERSHIP_REPORT, 0);
1350 			mutex_enter(&ill->ill_lock);
1351 			continue;
1352 		}
1353 
1354 		/*
1355 		 * The retransmit timer has popped, and our router is
1356 		 * IGMPv3.  We have to delve into the retransmit state
1357 		 * stored in the ilm.
1358 		 *
1359 		 * Decrement the retransmit count.  If the fmode rtx
1360 		 * count is active, decrement it, and send a filter
1361 		 * mode change report with the ilm's source list.
1362 		 * Otherwise, send a source list change report with
1363 		 * the current retransmit lists.
1364 		 */
1365 		ASSERT(rtxp->rtx_cnt > 0);
1366 		ASSERT(rtxp->rtx_cnt >= rtxp->rtx_fmode_cnt);
1367 		rtxp->rtx_cnt--;
1368 		if (rtxp->rtx_fmode_cnt > 0) {
1369 			rtxp->rtx_fmode_cnt--;
1370 			rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ?
1371 			    CHANGE_TO_INCLUDE : CHANGE_TO_EXCLUDE;
1372 			rtxrp = mcast_bldmrec(rtype, &ilm->ilm_v6addr,
1373 			    ilm->ilm_filter, rtxrp);
1374 		} else {
1375 			rtxrp = mcast_bldmrec(ALLOW_NEW_SOURCES,
1376 			    &ilm->ilm_v6addr, rtxp->rtx_allow, rtxrp);
1377 			rtxrp = mcast_bldmrec(BLOCK_OLD_SOURCES,
1378 			    &ilm->ilm_v6addr, rtxp->rtx_block, rtxrp);
1379 		}
1380 		if (rtxp->rtx_cnt > 0) {
1381 			MCAST_RANDOM_DELAY(rtxp->rtx_timer,
1382 			    SEC_TO_MSEC(IGMP_MAX_HOST_REPORT_DELAY));
1383 			if (rtxp->rtx_timer < next)
1384 				next = rtxp->rtx_timer;
1385 			rtxp->rtx_timer += current;
1386 		} else {
1387 			CLEAR_SLIST(rtxp->rtx_allow);
1388 			CLEAR_SLIST(rtxp->rtx_block);
1389 		}
1390 		mutex_exit(&ill->ill_lock);
1391 		igmpv3_sendrpt(ilm->ilm_ipif, rtxrp);
1392 		mutex_enter(&ill->ill_lock);
1393 		rtxrp = NULL;
1394 	}
1395 
1396 	mutex_exit(&ill->ill_lock);
1397 
1398 	return (next);
1399 }
1400 
1401 /*
1402  * igmp_timeout_handler:
1403  * Called when there are timeout events, every next * TMEOUT_INTERVAL (tick).
1404  * Returns number of ticks to next event (or 0 if none).
1405  *
1406  * As part of multicast join and leave igmp we may need to send out an
1407  * igmp request. The igmp related state variables in the ilm are protected
1408  * by ill_lock. A single global igmp timer is used to track igmp timeouts.
1409  * igmp_timer_lock protects the global igmp_timeout_id. igmp_start_timers
1410  * starts the igmp timer if needed. It serializes multiple threads trying to
1411  * simultaneously start the timer using the igmp_timer_setter_active flag.
1412  *
1413  * igmp_input() receives igmp queries and responds to the queries
1414  * in a delayed fashion by posting a timer i.e. it calls igmp_start_timers().
1415  * Later the igmp_timer fires, the timeout handler igmp_timeout_handler()
1416  * performs the action exclusively after entering each ill's ipsq as writer.
1417  * (The need to enter the IPSQ is largely historical but there are still some
1418  * fields like ilm_filter that rely on it.)
1419  *
1420  * The igmp_slowtimeo() function is called thru another timer.
1421  * igmp_slowtimeout_lock protects the igmp_slowtimeout_id
1422  */
1423 void
1424 igmp_timeout_handler(void *arg)
1425 {
1426 	ill_t	*ill;
1427 	uint_t  global_next = INFINITY;
1428 	uint_t  next;
1429 	ill_walk_context_t ctx;
1430 	boolean_t success;
1431 	ip_stack_t *ipst = arg;
1432 
1433 	ASSERT(arg != NULL);
1434 	mutex_enter(&ipst->ips_igmp_timer_lock);
1435 	ASSERT(ipst->ips_igmp_timeout_id != 0);
1436 	ipst->ips_igmp_timer_scheduled_last = 0;
1437 	ipst->ips_igmp_time_to_next = 0;
1438 	mutex_exit(&ipst->ips_igmp_timer_lock);
1439 
1440 	rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1441 	ill = ILL_START_WALK_V4(&ctx, ipst);
1442 	for (; ill != NULL; ill = ill_next(&ctx, ill)) {
1443 		ASSERT(!ill->ill_isv6);
1444 		/*
1445 		 * We may not be able to refhold the ill if the ill/ipif
1446 		 * is changing. But we need to make sure that the ill will
1447 		 * not vanish. So we just bump up the ill_waiter count.
1448 		 */
1449 		if (!ill_waiter_inc(ill))
1450 			continue;
1451 		rw_exit(&ipst->ips_ill_g_lock);
1452 		success = ipsq_enter(ill, B_TRUE, NEW_OP);
1453 		if (success) {
1454 			next = igmp_timeout_handler_per_ill(ill);
1455 			if (next < global_next)
1456 				global_next = next;
1457 			ipsq_exit(ill->ill_phyint->phyint_ipsq);
1458 		}
1459 		rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1460 		ill_waiter_dcr(ill);
1461 	}
1462 	rw_exit(&ipst->ips_ill_g_lock);
1463 
1464 	mutex_enter(&ipst->ips_igmp_timer_lock);
1465 	ASSERT(ipst->ips_igmp_timeout_id != 0);
1466 	ipst->ips_igmp_timeout_id = 0;
1467 	mutex_exit(&ipst->ips_igmp_timer_lock);
1468 
1469 	if (global_next != INFINITY)
1470 		igmp_start_timers(global_next, ipst);
1471 }
1472 
1473 /*
1474  * mld_timeout_handler:
1475  * Called when there are timeout events, every next (tick).
1476  * Returns number of ticks to next event (or 0 if none).
1477  */
1478 /* ARGSUSED */
1479 uint_t
1480 mld_timeout_handler_per_ill(ill_t *ill)
1481 {
1482 	ilm_t 	*ilm;
1483 	uint_t	next = INFINITY, current;
1484 	mrec_t	*rp, *rtxrp;
1485 	rtx_state_t *rtxp;
1486 	mcast_record_t	rtype;
1487 
1488 	ASSERT(IAM_WRITER_ILL(ill));
1489 
1490 	mutex_enter(&ill->ill_lock);
1491 
1492 	current = CURRENT_MSTIME;
1493 	/*
1494 	 * First check the global timer on this interface; the global timer
1495 	 * is not used for MLDv1, so if it's set we can assume we're v2.
1496 	 */
1497 	if (ill->ill_global_timer == INFINITY)
1498 		goto per_ilm_timer;
1499 	if (ill->ill_global_timer <= (current + CURRENT_OFFSET)) {
1500 		ill->ill_global_timer = INFINITY;
1501 		/*
1502 		 * Send report for each group on this interface.
1503 		 * Since we just set the global timer (received a v2 general
1504 		 * query), need to skip the all hosts addr (ff02::1), per
1505 		 * RFC 3810 section 6.
1506 		 */
1507 		rp = NULL;
1508 		for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
1509 			if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr,
1510 			    &ipv6_all_hosts_mcast))
1511 				continue;
1512 			rp = mcast_bldmrec(ilm->ilm_fmode, &ilm->ilm_v6addr,
1513 			    ilm->ilm_filter, rp);
1514 			/*
1515 			 * Since we're sending a report on this group, okay
1516 			 * to delete pending group-specific timers.  Note
1517 			 * that group-specific retransmit timers still need
1518 			 * to be checked in the per_ilm_timer for-loop.
1519 			 */
1520 			ilm->ilm_timer = INFINITY;
1521 			ilm->ilm_state = IGMP_IREPORTEDLAST;
1522 			FREE_SLIST(ilm->ilm_pendsrcs);
1523 			ilm->ilm_pendsrcs = NULL;
1524 		}
1525 		mutex_exit(&ill->ill_lock);
1526 		mldv2_sendrpt(ill, rp);
1527 		mutex_enter(&ill->ill_lock);
1528 	} else {
1529 		if ((ill->ill_global_timer - current) < next)
1530 			next = ill->ill_global_timer - current;
1531 	}
1532 
1533 per_ilm_timer:
1534 	rp = rtxrp = NULL;
1535 	for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
1536 		if (ilm->ilm_timer == INFINITY)
1537 			goto per_ilm_rtxtimer;
1538 
1539 		if (ilm->ilm_timer > (current + CURRENT_OFFSET)) {
1540 			if ((ilm->ilm_timer - current) < next)
1541 				next = ilm->ilm_timer - current;
1542 
1543 			if (ip_debug > 1) {
1544 				(void) mi_strlog(ill->ill_rq, 1, SL_TRACE,
1545 				    "igmp_timo_hlr 2: ilm_timr"
1546 				    " %d typ %d nxt %d",
1547 				    (int)ntohl(ilm->ilm_timer - current),
1548 				    (ill->ill_mcast_type), next);
1549 			}
1550 
1551 			goto per_ilm_rtxtimer;
1552 		}
1553 
1554 		/* the timer has expired, need to take action */
1555 		ilm->ilm_timer = INFINITY;
1556 		ilm->ilm_state = IGMP_IREPORTEDLAST;
1557 		if (ill->ill_mcast_type == MLD_V1_ROUTER) {
1558 			mutex_exit(&ill->ill_lock);
1559 			mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL);
1560 			mutex_enter(&ill->ill_lock);
1561 		} else {
1562 			slist_t *rsp;
1563 			if (!SLIST_IS_EMPTY(ilm->ilm_pendsrcs) &&
1564 			    (rsp = l_alloc()) != NULL) {
1565 				/*
1566 				 * Contents of reply depend on pending
1567 				 * requested source list.
1568 				 */
1569 				if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
1570 					l_intersection(ilm->ilm_filter,
1571 					    ilm->ilm_pendsrcs, rsp);
1572 				} else {
1573 					l_difference(ilm->ilm_pendsrcs,
1574 					    ilm->ilm_filter, rsp);
1575 				}
1576 				FREE_SLIST(ilm->ilm_pendsrcs);
1577 				ilm->ilm_pendsrcs = NULL;
1578 				if (!SLIST_IS_EMPTY(rsp))
1579 					rp = mcast_bldmrec(MODE_IS_INCLUDE,
1580 					    &ilm->ilm_v6addr, rsp, rp);
1581 				FREE_SLIST(rsp);
1582 			} else {
1583 				rp = mcast_bldmrec(ilm->ilm_fmode,
1584 				    &ilm->ilm_v6addr, ilm->ilm_filter, rp);
1585 			}
1586 		}
1587 
1588 per_ilm_rtxtimer:
1589 		rtxp = &ilm->ilm_rtx;
1590 
1591 		if (rtxp->rtx_timer == INFINITY)
1592 			continue;
1593 		if (rtxp->rtx_timer > (current + CURRENT_OFFSET)) {
1594 			if ((rtxp->rtx_timer - current) < next)
1595 				next = rtxp->rtx_timer - current;
1596 			continue;
1597 		}
1598 
1599 		rtxp->rtx_timer = INFINITY;
1600 		ilm->ilm_state = IGMP_IREPORTEDLAST;
1601 		if (ill->ill_mcast_type == MLD_V1_ROUTER) {
1602 			mutex_exit(&ill->ill_lock);
1603 			mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL);
1604 			mutex_enter(&ill->ill_lock);
1605 			continue;
1606 		}
1607 
1608 		/*
1609 		 * The retransmit timer has popped, and our router is
1610 		 * MLDv2.  We have to delve into the retransmit state
1611 		 * stored in the ilm.
1612 		 *
1613 		 * Decrement the retransmit count.  If the fmode rtx
1614 		 * count is active, decrement it, and send a filter
1615 		 * mode change report with the ilm's source list.
1616 		 * Otherwise, send a source list change report with
1617 		 * the current retransmit lists.
1618 		 */
1619 		ASSERT(rtxp->rtx_cnt > 0);
1620 		ASSERT(rtxp->rtx_cnt >= rtxp->rtx_fmode_cnt);
1621 		rtxp->rtx_cnt--;
1622 		if (rtxp->rtx_fmode_cnt > 0) {
1623 			rtxp->rtx_fmode_cnt--;
1624 			rtype = (ilm->ilm_fmode == MODE_IS_INCLUDE) ?
1625 			    CHANGE_TO_INCLUDE : CHANGE_TO_EXCLUDE;
1626 			rtxrp = mcast_bldmrec(rtype, &ilm->ilm_v6addr,
1627 			    ilm->ilm_filter, rtxrp);
1628 		} else {
1629 			rtxrp = mcast_bldmrec(ALLOW_NEW_SOURCES,
1630 			    &ilm->ilm_v6addr, rtxp->rtx_allow, rtxrp);
1631 			rtxrp = mcast_bldmrec(BLOCK_OLD_SOURCES,
1632 			    &ilm->ilm_v6addr, rtxp->rtx_block, rtxrp);
1633 		}
1634 		if (rtxp->rtx_cnt > 0) {
1635 			MCAST_RANDOM_DELAY(rtxp->rtx_timer,
1636 			    SEC_TO_MSEC(ICMP6_MAX_HOST_REPORT_DELAY));
1637 			if (rtxp->rtx_timer < next)
1638 				next = rtxp->rtx_timer;
1639 			rtxp->rtx_timer += current;
1640 		} else {
1641 			CLEAR_SLIST(rtxp->rtx_allow);
1642 			CLEAR_SLIST(rtxp->rtx_block);
1643 		}
1644 	}
1645 
1646 	if (ill->ill_mcast_type == MLD_V2_ROUTER) {
1647 		mutex_exit(&ill->ill_lock);
1648 		mldv2_sendrpt(ill, rp);
1649 		mldv2_sendrpt(ill, rtxrp);
1650 		return (next);
1651 	}
1652 
1653 	mutex_exit(&ill->ill_lock);
1654 
1655 	return (next);
1656 }
1657 
1658 /*
1659  * mld_timeout_handler:
1660  * Called when there are timeout events, every next * TMEOUT_INTERVAL (tick).
1661  * Returns number of ticks to next event (or 0 if none).
1662  * MT issues are same as igmp_timeout_handler
1663  */
1664 void
1665 mld_timeout_handler(void *arg)
1666 {
1667 	ill_t	*ill;
1668 	uint_t  global_next = INFINITY;
1669 	uint_t  next;
1670 	ill_walk_context_t ctx;
1671 	boolean_t success;
1672 	ip_stack_t *ipst = arg;
1673 
1674 	ASSERT(arg != NULL);
1675 	mutex_enter(&ipst->ips_mld_timer_lock);
1676 	ASSERT(ipst->ips_mld_timeout_id != 0);
1677 	ipst->ips_mld_timer_scheduled_last = 0;
1678 	ipst->ips_mld_time_to_next = 0;
1679 	mutex_exit(&ipst->ips_mld_timer_lock);
1680 
1681 	rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1682 	ill = ILL_START_WALK_V6(&ctx, ipst);
1683 	for (; ill != NULL; ill = ill_next(&ctx, ill)) {
1684 		ASSERT(ill->ill_isv6);
1685 		/*
1686 		 * We may not be able to refhold the ill if the ill/ipif
1687 		 * is changing. But we need to make sure that the ill will
1688 		 * not vanish. So we just bump up the ill_waiter count.
1689 		 */
1690 		if (!ill_waiter_inc(ill))
1691 			continue;
1692 		rw_exit(&ipst->ips_ill_g_lock);
1693 		success = ipsq_enter(ill, B_TRUE, NEW_OP);
1694 		if (success) {
1695 			next = mld_timeout_handler_per_ill(ill);
1696 			if (next < global_next)
1697 				global_next = next;
1698 			ipsq_exit(ill->ill_phyint->phyint_ipsq);
1699 		}
1700 		rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1701 		ill_waiter_dcr(ill);
1702 	}
1703 	rw_exit(&ipst->ips_ill_g_lock);
1704 
1705 	mutex_enter(&ipst->ips_mld_timer_lock);
1706 	ASSERT(ipst->ips_mld_timeout_id != 0);
1707 	ipst->ips_mld_timeout_id = 0;
1708 	mutex_exit(&ipst->ips_mld_timer_lock);
1709 
1710 	if (global_next != INFINITY)
1711 		mld_start_timers(global_next, ipst);
1712 }
1713 
1714 /*
1715  * Calculate the Older Version Querier Present timeout value, in number
1716  * of slowtimo intervals, for the given ill.
1717  */
1718 #define	OVQP(ill) \
1719 	((1000 * (((ill)->ill_mcast_rv * (ill)->ill_mcast_qi) \
1720 	+ MCAST_QUERY_RESP_INTERVAL)) / MCAST_SLOWTIMO_INTERVAL)
1721 
1722 /*
1723  * igmp_slowtimo:
1724  * - Resets to new router if we didnt we hear from the router
1725  *   in IGMP_AGE_THRESHOLD seconds.
1726  * - Resets slowtimeout.
1727  * Check for ips_igmp_max_version ensures that we don't revert to a higher
1728  * IGMP version than configured.
1729  */
1730 void
1731 igmp_slowtimo(void *arg)
1732 {
1733 	ill_t	*ill;
1734 	ill_if_t *ifp;
1735 	avl_tree_t *avl_tree;
1736 	ip_stack_t *ipst = (ip_stack_t *)arg;
1737 
1738 	ASSERT(arg != NULL);
1739 	/* Hold the ill_g_lock so that we can safely walk the ill list */
1740 	rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1741 
1742 	/*
1743 	 * The ill_if_t list is circular, hence the odd loop parameters.
1744 	 *
1745 	 * We can't use the ILL_START_WALK and ill_next() wrappers for this
1746 	 * walk, as we need to check the illif_mcast_* fields in the ill_if_t
1747 	 * structure (allowing us to skip if none of the instances have timers
1748 	 * running).
1749 	 */
1750 	for (ifp = IP_V4_ILL_G_LIST(ipst);
1751 	    ifp != (ill_if_t *)&IP_V4_ILL_G_LIST(ipst);
1752 	    ifp = ifp->illif_next) {
1753 		/*
1754 		 * illif_mcast_v[12] are set using atomics. If an ill hears
1755 		 * a V1 or V2 query now and we miss seeing the count now,
1756 		 * we will see it the next time igmp_slowtimo is called.
1757 		 */
1758 		if (ifp->illif_mcast_v1 == 0 && ifp->illif_mcast_v2 == 0)
1759 			continue;
1760 
1761 		avl_tree = &ifp->illif_avl_by_ppa;
1762 		for (ill = avl_first(avl_tree); ill != NULL;
1763 		    ill = avl_walk(avl_tree, ill, AVL_AFTER)) {
1764 			mutex_enter(&ill->ill_lock);
1765 			if (ill->ill_mcast_v1_tset == 1)
1766 				ill->ill_mcast_v1_time++;
1767 			if (ill->ill_mcast_v2_tset == 1)
1768 				ill->ill_mcast_v2_time++;
1769 			if ((ill->ill_mcast_type == IGMP_V1_ROUTER) &&
1770 			    (ipst->ips_igmp_max_version >= IGMP_V2_ROUTER) &&
1771 			    (ill->ill_mcast_v1_time >= OVQP(ill))) {
1772 				if ((ill->ill_mcast_v2_tset > 0) ||
1773 				    (ipst->ips_igmp_max_version ==
1774 				    IGMP_V2_ROUTER)) {
1775 					ip1dbg(("V1 query timer "
1776 					    "expired on %s; switching "
1777 					    "mode to IGMP_V2\n",
1778 					    ill->ill_name));
1779 					ill->ill_mcast_type =
1780 					    IGMP_V2_ROUTER;
1781 				} else {
1782 					ip1dbg(("V1 query timer "
1783 					    "expired on %s; switching "
1784 					    "mode to IGMP_V3\n",
1785 					    ill->ill_name));
1786 					ill->ill_mcast_type =
1787 					    IGMP_V3_ROUTER;
1788 				}
1789 				ill->ill_mcast_v1_time = 0;
1790 				ill->ill_mcast_v1_tset = 0;
1791 				atomic_add_16(&ifp->illif_mcast_v1, -1);
1792 			}
1793 			if ((ill->ill_mcast_type == IGMP_V2_ROUTER) &&
1794 			    (ipst->ips_igmp_max_version >= IGMP_V3_ROUTER) &&
1795 			    (ill->ill_mcast_v2_time >= OVQP(ill))) {
1796 				ip1dbg(("V2 query timer expired on "
1797 				    "%s; switching mode to IGMP_V3\n",
1798 				    ill->ill_name));
1799 				ill->ill_mcast_type = IGMP_V3_ROUTER;
1800 				ill->ill_mcast_v2_time = 0;
1801 				ill->ill_mcast_v2_tset = 0;
1802 				atomic_add_16(&ifp->illif_mcast_v2, -1);
1803 			}
1804 			mutex_exit(&ill->ill_lock);
1805 		}
1806 	}
1807 	rw_exit(&ipst->ips_ill_g_lock);
1808 	mutex_enter(&ipst->ips_igmp_slowtimeout_lock);
1809 	ipst->ips_igmp_slowtimeout_id = timeout(igmp_slowtimo, (void *)ipst,
1810 	    MSEC_TO_TICK(MCAST_SLOWTIMO_INTERVAL));
1811 	mutex_exit(&ipst->ips_igmp_slowtimeout_lock);
1812 }
1813 
1814 /*
1815  * mld_slowtimo:
1816  * - Resets to newer version if we didn't hear from the older version router
1817  *   in MLD_AGE_THRESHOLD seconds.
1818  * - Restarts slowtimeout.
1819  * Check for ips_mld_max_version ensures that we don't revert to a higher
1820  * IGMP version than configured.
1821  */
1822 /* ARGSUSED */
1823 void
1824 mld_slowtimo(void *arg)
1825 {
1826 	ill_t *ill;
1827 	ill_if_t *ifp;
1828 	avl_tree_t *avl_tree;
1829 	ip_stack_t *ipst = (ip_stack_t *)arg;
1830 
1831 	ASSERT(arg != NULL);
1832 	/* See comments in igmp_slowtimo() above... */
1833 	rw_enter(&ipst->ips_ill_g_lock, RW_READER);
1834 	for (ifp = IP_V6_ILL_G_LIST(ipst);
1835 	    ifp != (ill_if_t *)&IP_V6_ILL_G_LIST(ipst);
1836 	    ifp = ifp->illif_next) {
1837 		if (ifp->illif_mcast_v1 == 0)
1838 			continue;
1839 
1840 		avl_tree = &ifp->illif_avl_by_ppa;
1841 		for (ill = avl_first(avl_tree); ill != NULL;
1842 		    ill = avl_walk(avl_tree, ill, AVL_AFTER)) {
1843 			mutex_enter(&ill->ill_lock);
1844 			if (ill->ill_mcast_v1_tset == 1)
1845 				ill->ill_mcast_v1_time++;
1846 			if ((ill->ill_mcast_type == MLD_V1_ROUTER) &&
1847 			    (ipst->ips_mld_max_version >= MLD_V2_ROUTER) &&
1848 			    (ill->ill_mcast_v1_time >= OVQP(ill))) {
1849 				ip1dbg(("MLD query timer expired on"
1850 				    " %s; switching mode to MLD_V2\n",
1851 				    ill->ill_name));
1852 				ill->ill_mcast_type = MLD_V2_ROUTER;
1853 				ill->ill_mcast_v1_time = 0;
1854 				ill->ill_mcast_v1_tset = 0;
1855 				atomic_add_16(&ifp->illif_mcast_v1, -1);
1856 			}
1857 			mutex_exit(&ill->ill_lock);
1858 		}
1859 	}
1860 	rw_exit(&ipst->ips_ill_g_lock);
1861 	mutex_enter(&ipst->ips_mld_slowtimeout_lock);
1862 	ipst->ips_mld_slowtimeout_id = timeout(mld_slowtimo, (void *)ipst,
1863 	    MSEC_TO_TICK(MCAST_SLOWTIMO_INTERVAL));
1864 	mutex_exit(&ipst->ips_mld_slowtimeout_lock);
1865 }
1866 
1867 /*
1868  * igmp_sendpkt:
1869  * This will send to ip_wput like icmp_inbound.
1870  * Note that the lower ill (on which the membership is kept) is used
1871  * as an upper ill to pass in the multicast parameters.
1872  */
1873 static void
1874 igmp_sendpkt(ilm_t *ilm, uchar_t type, ipaddr_t addr)
1875 {
1876 	mblk_t	*mp;
1877 	igmpa_t	*igmpa;
1878 	uint8_t *rtralert;
1879 	ipha_t	*ipha;
1880 	int	hdrlen = sizeof (ipha_t) + RTRALERT_LEN;
1881 	size_t	size  = hdrlen + sizeof (igmpa_t);
1882 	ipif_t 	*ipif = ilm->ilm_ipif;
1883 	ill_t 	*ill  = ipif->ipif_ill;
1884 	mblk_t	*first_mp;
1885 	ipsec_out_t *io;
1886 	zoneid_t zoneid;
1887 	ip_stack_t *ipst = ill->ill_ipst;
1888 
1889 	/*
1890 	 * We need to make sure this packet goes out on an ipif. If
1891 	 * there is some global policy match in ip_wput_ire, we need
1892 	 * to get to the right interface after IPSEC processing.
1893 	 * To make sure this multicast packet goes out on the right
1894 	 * interface, we attach an ipsec_out and initialize ill_index
1895 	 * like we did in ip_wput. To make sure that this packet does
1896 	 * not get forwarded on other interfaces or looped back, we
1897 	 * set ipsec_out_dontroute to B_TRUE and ipsec_out_multicast_loop
1898 	 * to B_FALSE.
1899 	 */
1900 	first_mp = allocb(sizeof (ipsec_info_t), BPRI_HI);
1901 	if (first_mp == NULL)
1902 		return;
1903 
1904 	first_mp->b_datap->db_type = M_CTL;
1905 	first_mp->b_wptr += sizeof (ipsec_info_t);
1906 	bzero(first_mp->b_rptr, sizeof (ipsec_info_t));
1907 	/* ipsec_out_secure is B_FALSE now */
1908 	io = (ipsec_out_t *)first_mp->b_rptr;
1909 	io->ipsec_out_type = IPSEC_OUT;
1910 	io->ipsec_out_len = sizeof (ipsec_out_t);
1911 	io->ipsec_out_use_global_policy = B_TRUE;
1912 	io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex;
1913 	io->ipsec_out_multicast_loop = B_FALSE;
1914 	io->ipsec_out_dontroute = B_TRUE;
1915 	if ((zoneid = ilm->ilm_zoneid) == ALL_ZONES)
1916 		zoneid = GLOBAL_ZONEID;
1917 	io->ipsec_out_zoneid = zoneid;
1918 	io->ipsec_out_ns = ipst->ips_netstack;	/* No netstack_hold */
1919 
1920 	mp = allocb(size, BPRI_HI);
1921 	if (mp == NULL) {
1922 		freemsg(first_mp);
1923 		return;
1924 	}
1925 	mp->b_wptr = mp->b_rptr + size;
1926 	first_mp->b_cont = mp;
1927 
1928 	ipha = (ipha_t *)mp->b_rptr;
1929 	rtralert = (uint8_t *)&(ipha[1]);
1930 	igmpa = (igmpa_t *)&(rtralert[RTRALERT_LEN]);
1931 	igmpa->igmpa_type   = type;
1932 	igmpa->igmpa_code   = 0;
1933 	igmpa->igmpa_group  = ilm->ilm_addr;
1934 	igmpa->igmpa_cksum  = 0;
1935 	igmpa->igmpa_cksum  = IP_CSUM(mp, hdrlen, 0);
1936 
1937 	rtralert[0] = IPOPT_COPY | IPOPT_RTRALERT;
1938 	rtralert[1] = RTRALERT_LEN;
1939 	rtralert[2] = 0;
1940 	rtralert[3] = 0;
1941 
1942 	ipha->ipha_version_and_hdr_length = (IP_VERSION << 4)
1943 	    | (IP_SIMPLE_HDR_LENGTH_IN_WORDS + RTRALERT_LEN_IN_WORDS);
1944 	ipha->ipha_type_of_service 	= 0;
1945 	ipha->ipha_length = htons(size);
1946 	ipha->ipha_ident = 0;
1947 	ipha->ipha_fragment_offset_and_flags = 0;
1948 	ipha->ipha_ttl 		= IGMP_TTL;
1949 	ipha->ipha_protocol 	= IPPROTO_IGMP;
1950 	ipha->ipha_hdr_checksum 	= 0;
1951 	ipha->ipha_dst 		= addr ? addr : igmpa->igmpa_group;
1952 	ipha->ipha_src 		= ipif->ipif_src_addr;
1953 	/*
1954 	 * Request loopback of the report if we are acting as a multicast
1955 	 * router, so that the process-level routing demon can hear it.
1956 	 */
1957 	/*
1958 	 * This will run multiple times for the same group if there are members
1959 	 * on the same group for multiple ipif's on the same ill. The
1960 	 * igmp_input code will suppress this due to the loopback thus we
1961 	 * always loopback membership report.
1962 	 */
1963 	ASSERT(ill->ill_rq != NULL);
1964 	ip_multicast_loopback(ill->ill_rq, ill, first_mp, 0, ilm->ilm_zoneid);
1965 
1966 	ip_wput_multicast(ill->ill_wq, first_mp, ipif, zoneid);
1967 
1968 	++ipst->ips_igmpstat.igps_snd_reports;
1969 }
1970 
1971 /*
1972  * Sends an IGMP_V3_MEMBERSHIP_REPORT message out the ill associated
1973  * with the passed-in ipif.  The report will contain one group record
1974  * for each element of reclist.  If this causes packet length to
1975  * exceed ipif->ipif_ill->ill_max_frag, multiple reports are sent.
1976  * reclist is assumed to be made up of buffers allocated by mcast_bldmrec(),
1977  * and those buffers are freed here.
1978  */
1979 static void
1980 igmpv3_sendrpt(ipif_t *ipif, mrec_t *reclist)
1981 {
1982 	ipsec_out_t *io;
1983 	igmp3ra_t *igmp3ra;
1984 	grphdra_t *grphdr;
1985 	mblk_t *first_mp, *mp;
1986 	ipha_t *ipha;
1987 	uint8_t *rtralert;
1988 	ipaddr_t *src_array;
1989 	int i, j, numrec, more_src_cnt;
1990 	size_t hdrsize, size, rsize;
1991 	ill_t *ill = ipif->ipif_ill;
1992 	mrec_t *rp, *cur_reclist;
1993 	mrec_t *next_reclist = reclist;
1994 	boolean_t morepkts;
1995 	zoneid_t zoneid;
1996 	ip_stack_t	 *ipst = ill->ill_ipst;
1997 
1998 	ASSERT(IAM_WRITER_IPIF(ipif));
1999 
2000 	/* if there aren't any records, there's nothing to send */
2001 	if (reclist == NULL)
2002 		return;
2003 
2004 	hdrsize = sizeof (ipha_t) + RTRALERT_LEN;
2005 nextpkt:
2006 	size = hdrsize + sizeof (igmp3ra_t);
2007 	morepkts = B_FALSE;
2008 	more_src_cnt = 0;
2009 	cur_reclist = next_reclist;
2010 	numrec = 0;
2011 	for (rp = cur_reclist; rp != NULL; rp = rp->mrec_next) {
2012 		rsize = sizeof (grphdra_t) +
2013 		    (rp->mrec_srcs.sl_numsrc * sizeof (ipaddr_t));
2014 		if (size + rsize > ill->ill_max_frag) {
2015 			if (rp == cur_reclist) {
2016 				/*
2017 				 * If the first mrec we looked at is too big
2018 				 * to fit in a single packet (i.e the source
2019 				 * list is too big), we must either truncate
2020 				 * the list (if TO_EX or IS_EX), or send
2021 				 * multiple reports for the same group (all
2022 				 * other types).
2023 				 */
2024 				int srcspace, srcsperpkt;
2025 				srcspace = ill->ill_max_frag - (size +
2026 				    sizeof (grphdra_t));
2027 
2028 				/*
2029 				 * Skip if there's not even enough room in
2030 				 * a single packet to send something useful.
2031 				 */
2032 				if (srcspace <= sizeof (ipaddr_t))
2033 					continue;
2034 
2035 				srcsperpkt = srcspace / sizeof (ipaddr_t);
2036 				/*
2037 				 * Increment size and numrec, because we will
2038 				 * be sending a record for the mrec we're
2039 				 * looking at now.
2040 				 */
2041 				size += sizeof (grphdra_t) +
2042 				    (srcsperpkt * sizeof (ipaddr_t));
2043 				numrec++;
2044 				if (rp->mrec_type == MODE_IS_EXCLUDE ||
2045 				    rp->mrec_type == CHANGE_TO_EXCLUDE) {
2046 					rp->mrec_srcs.sl_numsrc = srcsperpkt;
2047 					if (rp->mrec_next == NULL) {
2048 						/* no more packets to send */
2049 						break;
2050 					} else {
2051 						/*
2052 						 * more packets, but we're
2053 						 * done with this mrec.
2054 						 */
2055 						next_reclist = rp->mrec_next;
2056 					}
2057 				} else {
2058 					more_src_cnt = rp->mrec_srcs.sl_numsrc
2059 					    - srcsperpkt;
2060 					rp->mrec_srcs.sl_numsrc = srcsperpkt;
2061 					/*
2062 					 * We'll fix up this mrec (remove the
2063 					 * srcs we've already sent) before
2064 					 * returning to nextpkt above.
2065 					 */
2066 					next_reclist = rp;
2067 				}
2068 			} else {
2069 				next_reclist = rp;
2070 			}
2071 			morepkts = B_TRUE;
2072 			break;
2073 		}
2074 		size += rsize;
2075 		numrec++;
2076 	}
2077 
2078 	/*
2079 	 * See comments in igmp_sendpkt() about initializing for ipsec and
2080 	 * load balancing requirements.
2081 	 */
2082 	first_mp = allocb(sizeof (ipsec_info_t), BPRI_HI);
2083 	if (first_mp == NULL)
2084 		goto free_reclist;
2085 
2086 	first_mp->b_datap->db_type = M_CTL;
2087 	first_mp->b_wptr += sizeof (ipsec_info_t);
2088 	bzero(first_mp->b_rptr, sizeof (ipsec_info_t));
2089 	/* ipsec_out_secure is B_FALSE now */
2090 	io = (ipsec_out_t *)first_mp->b_rptr;
2091 	io->ipsec_out_type = IPSEC_OUT;
2092 	io->ipsec_out_len = sizeof (ipsec_out_t);
2093 	io->ipsec_out_use_global_policy = B_TRUE;
2094 	io->ipsec_out_ill_index = ill->ill_phyint->phyint_ifindex;
2095 	io->ipsec_out_multicast_loop = B_FALSE;
2096 	io->ipsec_out_dontroute = B_TRUE;
2097 	if ((zoneid = ipif->ipif_zoneid) == ALL_ZONES)
2098 		zoneid = GLOBAL_ZONEID;
2099 	io->ipsec_out_zoneid = zoneid;
2100 
2101 	mp = allocb(size, BPRI_HI);
2102 	if (mp == NULL) {
2103 		freemsg(first_mp);
2104 		goto free_reclist;
2105 	}
2106 	bzero((char *)mp->b_rptr, size);
2107 	mp->b_wptr = (uchar_t *)(mp->b_rptr + size);
2108 	first_mp->b_cont = mp;
2109 
2110 	ipha = (ipha_t *)mp->b_rptr;
2111 	rtralert = (uint8_t *)&(ipha[1]);
2112 	igmp3ra = (igmp3ra_t *)&(rtralert[RTRALERT_LEN]);
2113 	grphdr = (grphdra_t *)&(igmp3ra[1]);
2114 
2115 	rp = cur_reclist;
2116 	for (i = 0; i < numrec; i++) {
2117 		grphdr->grphdra_type = rp->mrec_type;
2118 		grphdr->grphdra_numsrc = htons(rp->mrec_srcs.sl_numsrc);
2119 		grphdr->grphdra_group = V4_PART_OF_V6(rp->mrec_group);
2120 		src_array = (ipaddr_t *)&(grphdr[1]);
2121 
2122 		for (j = 0; j < rp->mrec_srcs.sl_numsrc; j++)
2123 			src_array[j] = V4_PART_OF_V6(rp->mrec_srcs.sl_addr[j]);
2124 
2125 		grphdr = (grphdra_t *)&(src_array[j]);
2126 		rp = rp->mrec_next;
2127 	}
2128 
2129 	igmp3ra->igmp3ra_type = IGMP_V3_MEMBERSHIP_REPORT;
2130 	igmp3ra->igmp3ra_numrec = htons(numrec);
2131 	igmp3ra->igmp3ra_cksum = IP_CSUM(mp, hdrsize, 0);
2132 
2133 	rtralert[0] = IPOPT_COPY | IPOPT_RTRALERT;
2134 	rtralert[1] = RTRALERT_LEN;
2135 	rtralert[2] = 0;
2136 	rtralert[3] = 0;
2137 
2138 	ipha->ipha_version_and_hdr_length = IP_VERSION << 4
2139 	    | (IP_SIMPLE_HDR_LENGTH_IN_WORDS + RTRALERT_LEN_IN_WORDS);
2140 	ipha->ipha_type_of_service = IPTOS_PREC_INTERNETCONTROL;
2141 	ipha->ipha_length = htons(size);
2142 	ipha->ipha_ttl = IGMP_TTL;
2143 	ipha->ipha_protocol = IPPROTO_IGMP;
2144 	ipha->ipha_dst = htonl(INADDR_ALLRPTS_GROUP);
2145 	ipha->ipha_src = ipif->ipif_src_addr;
2146 
2147 	/*
2148 	 * Request loopback of the report if we are acting as a multicast
2149 	 * router, so that the process-level routing daemon can hear it.
2150 	 *
2151 	 * This will run multiple times for the same group if there are
2152 	 * members on the same group for multiple ipifs on the same ill.
2153 	 * The igmp_input code will suppress this due to the loopback;
2154 	 * thus we always loopback membership report.
2155 	 */
2156 	ASSERT(ill->ill_rq != NULL);
2157 	ip_multicast_loopback(ill->ill_rq, ill, mp, 0, ipif->ipif_zoneid);
2158 
2159 	ip_wput_multicast(ill->ill_wq, first_mp, ipif, zoneid);
2160 
2161 	++ipst->ips_igmpstat.igps_snd_reports;
2162 
2163 	if (morepkts) {
2164 		if (more_src_cnt > 0) {
2165 			int index, mvsize;
2166 			slist_t *sl = &next_reclist->mrec_srcs;
2167 			index = sl->sl_numsrc;
2168 			mvsize = more_src_cnt * sizeof (in6_addr_t);
2169 			(void) memmove(&sl->sl_addr[0], &sl->sl_addr[index],
2170 			    mvsize);
2171 			sl->sl_numsrc = more_src_cnt;
2172 		}
2173 		goto nextpkt;
2174 	}
2175 
2176 free_reclist:
2177 	while (reclist != NULL) {
2178 		rp = reclist->mrec_next;
2179 		mi_free(reclist);
2180 		reclist = rp;
2181 	}
2182 }
2183 
2184 /*
2185  * mld_input:
2186  */
2187 /* ARGSUSED */
2188 void
2189 mld_input(queue_t *q, mblk_t *mp, ill_t *ill)
2190 {
2191 	ip6_t		*ip6h = (ip6_t *)(mp->b_rptr);
2192 	mld_hdr_t	*mldh;
2193 	ilm_t		*ilm;
2194 	ipif_t		*ipif;
2195 	uint16_t	hdr_length, exthdr_length;
2196 	in6_addr_t	*v6group_ptr, *lcladdr_ptr;
2197 	uint_t		next;
2198 	int		mldlen;
2199 	ip_stack_t	*ipst = ill->ill_ipst;
2200 	ilm_walker_t	ilw;
2201 
2202 	BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembTotal);
2203 
2204 	/* Make sure the src address of the packet is link-local */
2205 	if (!(IN6_IS_ADDR_LINKLOCAL(&ip6h->ip6_src))) {
2206 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
2207 		freemsg(mp);
2208 		return;
2209 	}
2210 
2211 	if (ip6h->ip6_hlim != 1) {
2212 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpBadHoplimit);
2213 		freemsg(mp);
2214 		return;
2215 	}
2216 
2217 	/* Get to the icmp header part */
2218 	if (ip6h->ip6_nxt != IPPROTO_ICMPV6) {
2219 		hdr_length = ip_hdr_length_v6(mp, ip6h);
2220 		exthdr_length = hdr_length - IPV6_HDR_LEN;
2221 	} else {
2222 		hdr_length = IPV6_HDR_LEN;
2223 		exthdr_length = 0;
2224 	}
2225 	mldlen = ntohs(ip6h->ip6_plen) - exthdr_length;
2226 
2227 	/* An MLD packet must at least be 24 octets to be valid */
2228 	if (mldlen < MLD_MINLEN) {
2229 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
2230 		freemsg(mp);
2231 		return;
2232 	}
2233 
2234 	mldh = (mld_hdr_t *)(&mp->b_rptr[hdr_length]);
2235 
2236 	switch (mldh->mld_type) {
2237 	case MLD_LISTENER_QUERY:
2238 		/*
2239 		 * packet length differentiates between v1 and v2.  v1
2240 		 * query should be exactly 24 octets long; v2 is >= 28.
2241 		 */
2242 		if ((mldlen == MLD_MINLEN) ||
2243 		    (ipst->ips_mld_max_version < MLD_V2_ROUTER)) {
2244 			next = mld_query_in(mldh, ill);
2245 		} else if (mldlen >= MLD_V2_QUERY_MINLEN) {
2246 			next = mldv2_query_in((mld2q_t *)mldh, ill, mldlen);
2247 		} else {
2248 			BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
2249 			freemsg(mp);
2250 			return;
2251 		}
2252 		if (next == 0) {
2253 			freemsg(mp);
2254 			return;
2255 		}
2256 
2257 		if (next != INFINITY)
2258 			mld_start_timers(next, ipst);
2259 		break;
2260 
2261 	case MLD_LISTENER_REPORT: {
2262 
2263 		ASSERT(ill->ill_ipif != NULL);
2264 		/*
2265 		 * For fast leave to work, we have to know that we are the
2266 		 * last person to send a report for this group.  Reports
2267 		 * generated by us are looped back since we could potentially
2268 		 * be a multicast router, so discard reports sourced by me.
2269 		 */
2270 		lcladdr_ptr = &(ill->ill_ipif->ipif_v6subnet);
2271 		mutex_enter(&ill->ill_lock);
2272 		for (ipif = ill->ill_ipif; ipif != NULL;
2273 		    ipif = ipif->ipif_next) {
2274 			if (IN6_ARE_ADDR_EQUAL(&ipif->ipif_v6lcl_addr,
2275 			    lcladdr_ptr)) {
2276 				if (ip_debug > 1) {
2277 					char    buf1[INET6_ADDRSTRLEN];
2278 					char	buf2[INET6_ADDRSTRLEN];
2279 
2280 					(void) mi_strlog(ill->ill_rq,
2281 					    1,
2282 					    SL_TRACE,
2283 					    "mld_input: we are only "
2284 					    "member src %s ipif_local %s",
2285 					    inet_ntop(AF_INET6, lcladdr_ptr,
2286 					    buf1, sizeof (buf1)),
2287 					    inet_ntop(AF_INET6,
2288 					    &ipif->ipif_v6lcl_addr,
2289 					    buf2, sizeof (buf2)));
2290 				}
2291 				mutex_exit(&ill->ill_lock);
2292 				freemsg(mp);
2293 				return;
2294 			}
2295 		}
2296 		mutex_exit(&ill->ill_lock);
2297 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembResponses);
2298 
2299 		v6group_ptr = &mldh->mld_addr;
2300 		if (!IN6_IS_ADDR_MULTICAST(v6group_ptr)) {
2301 			BUMP_MIB(ill->ill_icmp6_mib,
2302 			    ipv6IfIcmpInGroupMembBadReports);
2303 			freemsg(mp);
2304 			return;
2305 		}
2306 
2307 		/*
2308 		 * If we belong to the group being reported, and we are a
2309 		 * 'Delaying member' per the RFC terminology, stop our timer
2310 		 * for that group and 'clear flag' i.e. mark ilm_state as
2311 		 * IGMP_OTHERMEMBER. With zones, there can be multiple group
2312 		 * membership entries for the same group address (one per zone)
2313 		 * so we need to walk the ill_ilm list.
2314 		 */
2315 		ilm = ilm_walker_start(&ilw, ill);
2316 		for (; ilm != NULL; ilm = ilm_walker_step(&ilw, ilm)) {
2317 			if (!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group_ptr))
2318 				continue;
2319 			BUMP_MIB(ill->ill_icmp6_mib,
2320 			    ipv6IfIcmpInGroupMembOurReports);
2321 
2322 			ilm->ilm_timer = INFINITY;
2323 			ilm->ilm_state = IGMP_OTHERMEMBER;
2324 		}
2325 		ilm_walker_finish(&ilw);
2326 		break;
2327 	}
2328 	case MLD_LISTENER_REDUCTION:
2329 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembReductions);
2330 		break;
2331 	}
2332 	/*
2333 	 * All MLD packets have already been passed up to any
2334 	 * process(es) listening on a ICMP6 raw socket. This
2335 	 * has been accomplished in ip_deliver_local_v6 prior to
2336 	 * this function call. It is assumed that the multicast daemon
2337 	 * will have a SOCK_RAW IPPROTO_ICMPV6 (and presumbly use the
2338 	 * ICMP6_FILTER socket option to only receive the MLD messages)
2339 	 * Thus we can free the MLD message block here
2340 	 */
2341 	freemsg(mp);
2342 }
2343 
2344 /*
2345  * Handles an MLDv1 Listener Query.  Returns 0 on error, or the appropriate
2346  * (non-zero, unsigned) timer value to be set on success.
2347  */
2348 static uint_t
2349 mld_query_in(mld_hdr_t *mldh, ill_t *ill)
2350 {
2351 	ilm_t	*ilm;
2352 	int	timer;
2353 	uint_t	next, current;
2354 	in6_addr_t *v6group;
2355 	ilm_walker_t ilw;
2356 
2357 	BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembQueries);
2358 
2359 	/*
2360 	 * In the MLD specification, there are 3 states and a flag.
2361 	 *
2362 	 * In Non-Listener state, we simply don't have a membership record.
2363 	 * In Delaying state, our timer is running (ilm->ilm_timer < INFINITY)
2364 	 * In Idle Member state, our timer is not running (ilm->ilm_timer ==
2365 	 * INFINITY)
2366 	 *
2367 	 * The flag is ilm->ilm_state, it is set to IGMP_OTHERMEMBER if
2368 	 * we have heard a report from another member, or IGMP_IREPORTEDLAST
2369 	 * if I sent the last report.
2370 	 */
2371 	v6group = &mldh->mld_addr;
2372 	if (!(IN6_IS_ADDR_UNSPECIFIED(v6group)) &&
2373 	    ((!IN6_IS_ADDR_MULTICAST(v6group)))) {
2374 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembBadQueries);
2375 		return (0);
2376 	}
2377 
2378 	/* Need to do compatibility mode checking */
2379 	mutex_enter(&ill->ill_lock);
2380 	ill->ill_mcast_v1_time = 0;
2381 	ill->ill_mcast_v1_tset = 1;
2382 	if (ill->ill_mcast_type == MLD_V2_ROUTER) {
2383 		ip1dbg(("Received MLDv1 Query on %s, switching mode to "
2384 		    "MLD_V1_ROUTER\n", ill->ill_name));
2385 		atomic_add_16(&ill->ill_ifptr->illif_mcast_v1, 1);
2386 		ill->ill_mcast_type = MLD_V1_ROUTER;
2387 	}
2388 	mutex_exit(&ill->ill_lock);
2389 
2390 	timer = (int)ntohs(mldh->mld_maxdelay);
2391 	if (ip_debug > 1) {
2392 		(void) mi_strlog(ill->ill_rq, 1, SL_TRACE,
2393 		    "mld_input: TIMER = mld_maxdelay %d mld_type 0x%x",
2394 		    timer, (int)mldh->mld_type);
2395 	}
2396 
2397 	/*
2398 	 * -Start the timers in all of our membership records for
2399 	 * the physical interface on which the query arrived,
2400 	 * excl:
2401 	 *	1.  those that belong to the "all hosts" group,
2402 	 *	2.  those with 0 scope, or 1 node-local scope.
2403 	 *
2404 	 * -Restart any timer that is already running but has a value
2405 	 * longer that the requested timeout.
2406 	 * -Use the value specified in the query message as the
2407 	 * maximum timeout.
2408 	 */
2409 	next = INFINITY;
2410 
2411 	ilm = ilm_walker_start(&ilw, ill);
2412 	mutex_enter(&ill->ill_lock);
2413 	current = CURRENT_MSTIME;
2414 
2415 	for (; ilm != NULL; ilm = ilm_walker_step(&ilw, ilm)) {
2416 		ASSERT(!IN6_IS_ADDR_V4MAPPED(&ilm->ilm_v6addr));
2417 
2418 		if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr) ||
2419 		    IN6_IS_ADDR_MC_NODELOCAL(&ilm->ilm_v6addr) ||
2420 		    IN6_IS_ADDR_MC_RESERVED(&ilm->ilm_v6addr))
2421 			continue;
2422 		if ((!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr,
2423 		    &ipv6_all_hosts_mcast)) &&
2424 		    (IN6_IS_ADDR_UNSPECIFIED(v6group)) ||
2425 		    (IN6_ARE_ADDR_EQUAL(v6group, &ilm->ilm_v6addr))) {
2426 			if (timer == 0) {
2427 				/* Respond immediately */
2428 				ilm->ilm_timer = INFINITY;
2429 				ilm->ilm_state = IGMP_IREPORTEDLAST;
2430 				mutex_exit(&ill->ill_lock);
2431 				mld_sendpkt(ilm, MLD_LISTENER_REPORT, NULL);
2432 				mutex_enter(&ill->ill_lock);
2433 				break;
2434 			}
2435 			if (ilm->ilm_timer > timer) {
2436 				MCAST_RANDOM_DELAY(ilm->ilm_timer, timer);
2437 				if (ilm->ilm_timer < next)
2438 					next = ilm->ilm_timer;
2439 				ilm->ilm_timer += current;
2440 			}
2441 			break;
2442 		}
2443 	}
2444 	mutex_exit(&ill->ill_lock);
2445 	ilm_walker_finish(&ilw);
2446 
2447 	return (next);
2448 }
2449 
2450 /*
2451  * Handles an MLDv2 Listener Query.  On error, returns 0; on success,
2452  * returns the appropriate (non-zero, unsigned) timer value (which may
2453  * be INFINITY) to be set.
2454  */
2455 static uint_t
2456 mldv2_query_in(mld2q_t *mld2q, ill_t *ill, int mldlen)
2457 {
2458 	ilm_t	*ilm;
2459 	in6_addr_t *v6group, *src_array;
2460 	uint_t	next, numsrc, i, mrd, delay, qqi, current;
2461 	uint8_t	qrv;
2462 	ilm_walker_t ilw;
2463 
2464 	v6group = &mld2q->mld2q_addr;
2465 	numsrc = ntohs(mld2q->mld2q_numsrc);
2466 
2467 	/* make sure numsrc matches packet size */
2468 	if (mldlen < MLD_V2_QUERY_MINLEN + (numsrc * sizeof (in6_addr_t))) {
2469 		BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInErrors);
2470 		return (0);
2471 	}
2472 	src_array = (in6_addr_t *)&mld2q[1];
2473 
2474 	BUMP_MIB(ill->ill_icmp6_mib, ipv6IfIcmpInGroupMembQueries);
2475 
2476 	/* extract Maximum Response Delay from code in header */
2477 	mrd = ntohs(mld2q->mld2q_mxrc);
2478 	if (mrd >= MLD_V2_MAXRT_FPMIN) {
2479 		uint_t hdrval, mant, exp;
2480 		hdrval = mrd;
2481 		mant = hdrval & MLD_V2_MAXRT_MANT_MASK;
2482 		exp = (hdrval & MLD_V2_MAXRT_EXP_MASK) >> 12;
2483 		mrd = (mant | 0x1000) << (exp + 3);
2484 	}
2485 	if (mrd == 0)
2486 		mrd = DSEC_TO_MSEC(MCAST_DEF_QUERY_RESP_INTERVAL);
2487 
2488 	MCAST_RANDOM_DELAY(delay, mrd);
2489 	next = (unsigned)INFINITY;
2490 	current = CURRENT_MSTIME;
2491 
2492 	if ((qrv = mld2q->mld2q_sqrv & MLD_V2_RV_MASK) == 0)
2493 		ill->ill_mcast_rv = MCAST_DEF_ROBUSTNESS;
2494 	else
2495 		ill->ill_mcast_rv = qrv;
2496 
2497 	if ((qqi = (uint_t)mld2q->mld2q_qqic) >= MLD_V2_QQI_FPMIN) {
2498 		uint_t mant, exp;
2499 		mant = qqi & MLD_V2_QQI_MANT_MASK;
2500 		exp = (qqi & MLD_V2_QQI_EXP_MASK) >> 12;
2501 		qqi = (mant | 0x10) << (exp + 3);
2502 	}
2503 	ill->ill_mcast_qi = (qqi == 0) ? MCAST_DEF_QUERY_INTERVAL : qqi;
2504 
2505 	/*
2506 	 * If we have a pending general query response that's scheduled
2507 	 * sooner than the delay we calculated for this response, then
2508 	 * no action is required (MLDv2 draft section 6.2 rule 1)
2509 	 */
2510 	mutex_enter(&ill->ill_lock);
2511 	if (ill->ill_global_timer < (current + delay)) {
2512 		mutex_exit(&ill->ill_lock);
2513 		return (next);
2514 	}
2515 	mutex_exit(&ill->ill_lock);
2516 
2517 	/*
2518 	 * Now take action depending on query type: general,
2519 	 * group specific, or group/source specific.
2520 	 */
2521 	if ((numsrc == 0) && IN6_IS_ADDR_UNSPECIFIED(v6group)) {
2522 		/*
2523 		 * general query
2524 		 * We know global timer is either not running or is
2525 		 * greater than our calculated delay, so reset it to
2526 		 * our delay (random value in range [0, response time])
2527 		 */
2528 		mutex_enter(&ill->ill_lock);
2529 		ill->ill_global_timer = current + delay;
2530 		mutex_exit(&ill->ill_lock);
2531 		next = delay;
2532 
2533 	} else {
2534 		/* group or group/source specific query */
2535 		ilm = ilm_walker_start(&ilw, ill);
2536 		mutex_enter(&ill->ill_lock);
2537 		for (; ilm != NULL; ilm = ilm_walker_step(&ilw, ilm)) {
2538 			if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr) ||
2539 			    IN6_IS_ADDR_MC_NODELOCAL(&ilm->ilm_v6addr) ||
2540 			    IN6_IS_ADDR_MC_RESERVED(&ilm->ilm_v6addr) ||
2541 			    !IN6_ARE_ADDR_EQUAL(v6group, &ilm->ilm_v6addr))
2542 				continue;
2543 
2544 			/*
2545 			 * If the query is group specific or we have a
2546 			 * pending group specific query, the response is
2547 			 * group specific (pending sources list should be
2548 			 * empty).  Otherwise, need to update the pending
2549 			 * sources list for the group and source specific
2550 			 * response.
2551 			 */
2552 			if (numsrc == 0 || (ilm->ilm_timer < INFINITY &&
2553 			    SLIST_IS_EMPTY(ilm->ilm_pendsrcs))) {
2554 group_query:
2555 				FREE_SLIST(ilm->ilm_pendsrcs);
2556 				ilm->ilm_pendsrcs = NULL;
2557 			} else {
2558 				boolean_t overflow;
2559 				slist_t *pktl;
2560 				if (numsrc > MAX_FILTER_SIZE ||
2561 				    (ilm->ilm_pendsrcs == NULL &&
2562 				    (ilm->ilm_pendsrcs = l_alloc()) == NULL)) {
2563 					/*
2564 					 * We've been sent more sources than
2565 					 * we can deal with; or we can't deal
2566 					 * with a source list at all. Revert
2567 					 * to a group specific query.
2568 					 */
2569 					goto group_query;
2570 				}
2571 				if ((pktl = l_alloc()) == NULL)
2572 					goto group_query;
2573 				pktl->sl_numsrc = numsrc;
2574 				for (i = 0; i < numsrc; i++)
2575 					pktl->sl_addr[i] = src_array[i];
2576 				l_union_in_a(ilm->ilm_pendsrcs, pktl,
2577 				    &overflow);
2578 				l_free(pktl);
2579 				if (overflow)
2580 					goto group_query;
2581 			}
2582 			ilm->ilm_timer = (ilm->ilm_timer == INFINITY) ?
2583 			    INFINITY : (ilm->ilm_timer - current);
2584 			/* set timer to soonest value */
2585 			ilm->ilm_timer = MIN(ilm->ilm_timer, delay);
2586 			if (ilm->ilm_timer < next)
2587 				next = ilm->ilm_timer;
2588 			ilm->ilm_timer += current;
2589 			break;
2590 		}
2591 		mutex_exit(&ill->ill_lock);
2592 		ilm_walker_finish(&ilw);
2593 	}
2594 
2595 	return (next);
2596 }
2597 
2598 /*
2599  * Send MLDv1 response packet with hoplimit 1
2600  */
2601 static void
2602 mld_sendpkt(ilm_t *ilm, uchar_t type, const in6_addr_t *v6addr)
2603 {
2604 	mblk_t		*mp;
2605 	mld_hdr_t	*mldh;
2606 	ip6_t 		*ip6h;
2607 	ip6_hbh_t	*ip6hbh;
2608 	struct ip6_opt_router	*ip6router;
2609 	size_t		size = IPV6_HDR_LEN + sizeof (mld_hdr_t);
2610 	ill_t		*ill = ilm->ilm_ill;
2611 	ipif_t		*ipif;
2612 
2613 	/*
2614 	 * We need to place a router alert option in this packet.  The length
2615 	 * of the options must be a multiple of 8.  The hbh option header is 2
2616 	 * bytes followed by the 4 byte router alert option.  That leaves
2617 	 * 2 bytes of pad for a total of 8 bytes.
2618 	 */
2619 	const int	router_alert_length = 8;
2620 
2621 	ASSERT(ill->ill_isv6);
2622 
2623 	size += router_alert_length;
2624 	mp = allocb(size, BPRI_HI);
2625 	if (mp == NULL)
2626 		return;
2627 	bzero(mp->b_rptr, size);
2628 	mp->b_wptr = mp->b_rptr + size;
2629 
2630 	ip6h = (ip6_t *)mp->b_rptr;
2631 	ip6hbh = (struct ip6_hbh *)&ip6h[1];
2632 	ip6router = (struct ip6_opt_router *)&ip6hbh[1];
2633 	/*
2634 	 * A zero is a pad option of length 1.  The bzero of the whole packet
2635 	 * above will pad between ip6router and mld.
2636 	 */
2637 	mldh = (mld_hdr_t *)((uint8_t *)ip6hbh + router_alert_length);
2638 
2639 	mldh->mld_type = type;
2640 	mldh->mld_addr = ilm->ilm_v6addr;
2641 
2642 	ip6router->ip6or_type = IP6OPT_ROUTER_ALERT;
2643 	ip6router->ip6or_len = 2;
2644 	ip6router->ip6or_value[0] = 0;
2645 	ip6router->ip6or_value[1] = IP6_ALERT_MLD;
2646 
2647 	ip6hbh->ip6h_nxt = IPPROTO_ICMPV6;
2648 	ip6hbh->ip6h_len = 0;
2649 
2650 	ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
2651 	ip6h->ip6_plen = htons(sizeof (*mldh) + router_alert_length);
2652 	ip6h->ip6_nxt = IPPROTO_HOPOPTS;
2653 	ip6h->ip6_hops = MLD_HOP_LIMIT;
2654 	if (v6addr == NULL)
2655 		ip6h->ip6_dst =  ilm->ilm_v6addr;
2656 	else
2657 		ip6h->ip6_dst = *v6addr;
2658 
2659 	/* ipif returned by ipif_lookup_zoneid is link-local (if present) */
2660 	if (ipif_lookup_zoneid(ill, ilm->ilm_zoneid, IPIF_UP, &ipif)) {
2661 		ip6h->ip6_src = ipif->ipif_v6src_addr;
2662 		ipif_refrele(ipif);
2663 	} else {
2664 		/* Otherwise, use IPv6 default address selection. */
2665 		ip6h->ip6_src = ipv6_all_zeros;
2666 	}
2667 
2668 	/*
2669 	 * Prepare for checksum by putting icmp length in the icmp
2670 	 * checksum field. The checksum is calculated in ip_wput_v6.
2671 	 */
2672 	mldh->mld_cksum = htons(sizeof (*mldh));
2673 
2674 	/*
2675 	 * ip_wput will automatically loopback the multicast packet to
2676 	 * the conn if multicast loopback is enabled.
2677 	 * The MIB stats corresponding to this outgoing MLD packet
2678 	 * will be accounted for in ip_wput->ip_wput_v6->ip_wput_ire_v6
2679 	 * ->icmp_update_out_mib_v6 function call.
2680 	 */
2681 	(void) ip_output_v6(NULL, mp, ill->ill_wq, IP_WPUT);
2682 }
2683 
2684 /*
2685  * Sends an MLD_V2_LISTENER_REPORT message out the passed-in ill.  The
2686  * report will contain one multicast address record for each element of
2687  * reclist.  If this causes packet length to exceed ill->ill_max_frag,
2688  * multiple reports are sent.  reclist is assumed to be made up of
2689  * buffers allocated by mcast_bldmrec(), and those buffers are freed here.
2690  */
2691 static void
2692 mldv2_sendrpt(ill_t *ill, mrec_t *reclist)
2693 {
2694 	mblk_t		*mp;
2695 	mld2r_t		*mld2r;
2696 	mld2mar_t	*mld2mar;
2697 	in6_addr_t	*srcarray;
2698 	ip6_t		*ip6h;
2699 	ip6_hbh_t	*ip6hbh;
2700 	struct ip6_opt_router	*ip6router;
2701 	size_t		size, optlen, padlen, icmpsize, rsize;
2702 	ipif_t		*ipif;
2703 	int		i, numrec, more_src_cnt;
2704 	mrec_t		*rp, *cur_reclist;
2705 	mrec_t		*next_reclist = reclist;
2706 	boolean_t	morepkts;
2707 
2708 	ASSERT(IAM_WRITER_ILL(ill));
2709 
2710 	/* If there aren't any records, there's nothing to send */
2711 	if (reclist == NULL)
2712 		return;
2713 
2714 	ASSERT(ill->ill_isv6);
2715 
2716 	/*
2717 	 * Total option length (optlen + padlen) must be a multiple of
2718 	 * 8 bytes.  We assume here that optlen <= 8, so the total option
2719 	 * length will be 8.  Assert this in case anything ever changes.
2720 	 */
2721 	optlen = sizeof (ip6_hbh_t) + sizeof (struct ip6_opt_router);
2722 	ASSERT(optlen <= 8);
2723 	padlen = 8 - optlen;
2724 nextpkt:
2725 	icmpsize = sizeof (mld2r_t);
2726 	size = IPV6_HDR_LEN + optlen + padlen + icmpsize;
2727 	morepkts = B_FALSE;
2728 	more_src_cnt = 0;
2729 	for (rp = cur_reclist = next_reclist, numrec = 0; rp != NULL;
2730 	    rp = rp->mrec_next, numrec++) {
2731 		rsize = sizeof (mld2mar_t) +
2732 		    (rp->mrec_srcs.sl_numsrc * sizeof (in6_addr_t));
2733 		if (size + rsize > ill->ill_max_frag) {
2734 			if (rp == cur_reclist) {
2735 				/*
2736 				 * If the first mrec we looked at is too big
2737 				 * to fit in a single packet (i.e the source
2738 				 * list is too big), we must either truncate
2739 				 * the list (if TO_EX or IS_EX), or send
2740 				 * multiple reports for the same group (all
2741 				 * other types).
2742 				 */
2743 				int srcspace, srcsperpkt;
2744 				srcspace = ill->ill_max_frag -
2745 				    (size + sizeof (mld2mar_t));
2746 
2747 				/*
2748 				 * Skip if there's not even enough room in
2749 				 * a single packet to send something useful.
2750 				 */
2751 				if (srcspace <= sizeof (in6_addr_t))
2752 					continue;
2753 
2754 				srcsperpkt = srcspace / sizeof (in6_addr_t);
2755 				/*
2756 				 * Increment icmpsize and size, because we will
2757 				 * be sending a record for the mrec we're
2758 				 * looking at now.
2759 				 */
2760 				rsize = sizeof (mld2mar_t) +
2761 				    (srcsperpkt * sizeof (in6_addr_t));
2762 				icmpsize += rsize;
2763 				size += rsize;
2764 				if (rp->mrec_type == MODE_IS_EXCLUDE ||
2765 				    rp->mrec_type == CHANGE_TO_EXCLUDE) {
2766 					rp->mrec_srcs.sl_numsrc = srcsperpkt;
2767 					if (rp->mrec_next == NULL) {
2768 						/* no more packets to send */
2769 						break;
2770 					} else {
2771 						/*
2772 						 * more packets, but we're
2773 						 * done with this mrec.
2774 						 */
2775 						next_reclist = rp->mrec_next;
2776 					}
2777 				} else {
2778 					more_src_cnt = rp->mrec_srcs.sl_numsrc
2779 					    - srcsperpkt;
2780 					rp->mrec_srcs.sl_numsrc = srcsperpkt;
2781 					/*
2782 					 * We'll fix up this mrec (remove the
2783 					 * srcs we've already sent) before
2784 					 * returning to nextpkt above.
2785 					 */
2786 					next_reclist = rp;
2787 				}
2788 			} else {
2789 				next_reclist = rp;
2790 			}
2791 			morepkts = B_TRUE;
2792 			break;
2793 		}
2794 		icmpsize += rsize;
2795 		size += rsize;
2796 	}
2797 
2798 	mp = allocb(size, BPRI_HI);
2799 	if (mp == NULL)
2800 		goto free_reclist;
2801 	bzero(mp->b_rptr, size);
2802 	mp->b_wptr = mp->b_rptr + size;
2803 
2804 	ip6h = (ip6_t *)mp->b_rptr;
2805 	ip6hbh = (ip6_hbh_t *)&(ip6h[1]);
2806 	ip6router = (struct ip6_opt_router *)&(ip6hbh[1]);
2807 	mld2r = (mld2r_t *)((uint8_t *)ip6hbh + optlen + padlen);
2808 	mld2mar = (mld2mar_t *)&(mld2r[1]);
2809 
2810 	ip6h->ip6_vcf = IPV6_DEFAULT_VERS_AND_FLOW;
2811 	ip6h->ip6_plen = htons(optlen + padlen + icmpsize);
2812 	ip6h->ip6_nxt = IPPROTO_HOPOPTS;
2813 	ip6h->ip6_hops = MLD_HOP_LIMIT;
2814 	ip6h->ip6_dst = ipv6_all_v2rtrs_mcast;
2815 	/* ipif returned by ipif_lookup_zoneid is link-local (if present) */
2816 	if (ipif_lookup_zoneid(ill, ALL_ZONES, IPIF_UP, &ipif)) {
2817 		ip6h->ip6_src = ipif->ipif_v6src_addr;
2818 		ipif_refrele(ipif);
2819 	} else {
2820 		/* otherwise, use IPv6 default address selection. */
2821 		ip6h->ip6_src = ipv6_all_zeros;
2822 	}
2823 
2824 	ip6hbh->ip6h_nxt = IPPROTO_ICMPV6;
2825 	/*
2826 	 * ip6h_len is the number of 8-byte words, not including the first
2827 	 * 8 bytes; we've assumed optlen + padlen == 8 bytes; hence len = 0.
2828 	 */
2829 	ip6hbh->ip6h_len = 0;
2830 
2831 	ip6router->ip6or_type = IP6OPT_ROUTER_ALERT;
2832 	ip6router->ip6or_len = 2;
2833 	ip6router->ip6or_value[0] = 0;
2834 	ip6router->ip6or_value[1] = IP6_ALERT_MLD;
2835 
2836 	mld2r->mld2r_type = MLD_V2_LISTENER_REPORT;
2837 	mld2r->mld2r_nummar = htons(numrec);
2838 	/*
2839 	 * Prepare for the checksum by putting icmp length in the icmp
2840 	 * checksum field. The checksum is calculated in ip_wput_v6.
2841 	 */
2842 	mld2r->mld2r_cksum = htons(icmpsize);
2843 
2844 	for (rp = cur_reclist; rp != NULL; rp = rp->mrec_next) {
2845 		mld2mar->mld2mar_type = rp->mrec_type;
2846 		mld2mar->mld2mar_auxlen = 0;
2847 		mld2mar->mld2mar_numsrc = htons(rp->mrec_srcs.sl_numsrc);
2848 		mld2mar->mld2mar_group = rp->mrec_group;
2849 		srcarray = (in6_addr_t *)&(mld2mar[1]);
2850 
2851 		for (i = 0; i < rp->mrec_srcs.sl_numsrc; i++)
2852 			srcarray[i] = rp->mrec_srcs.sl_addr[i];
2853 
2854 		mld2mar = (mld2mar_t *)&(srcarray[i]);
2855 	}
2856 
2857 	/*
2858 	 * ip_wput will automatically loopback the multicast packet to
2859 	 * the conn if multicast loopback is enabled.
2860 	 * The MIB stats corresponding to this outgoing MLD packet
2861 	 * will be accounted for in ip_wput->ip_wput_v6->ip_wput_ire_v6
2862 	 * ->icmp_update_out_mib_v6 function call.
2863 	 */
2864 	(void) ip_output_v6(NULL, mp, ill->ill_wq, IP_WPUT);
2865 
2866 	if (morepkts) {
2867 		if (more_src_cnt > 0) {
2868 			int index, mvsize;
2869 			slist_t *sl = &next_reclist->mrec_srcs;
2870 			index = sl->sl_numsrc;
2871 			mvsize = more_src_cnt * sizeof (in6_addr_t);
2872 			(void) memmove(&sl->sl_addr[0], &sl->sl_addr[index],
2873 			    mvsize);
2874 			sl->sl_numsrc = more_src_cnt;
2875 		}
2876 		goto nextpkt;
2877 	}
2878 
2879 free_reclist:
2880 	while (reclist != NULL) {
2881 		rp = reclist->mrec_next;
2882 		mi_free(reclist);
2883 		reclist = rp;
2884 	}
2885 }
2886 
2887 static mrec_t *
2888 mcast_bldmrec(mcast_record_t type, in6_addr_t *grp, slist_t *srclist,
2889     mrec_t *next)
2890 {
2891 	mrec_t *rp;
2892 	int i;
2893 
2894 	if ((type == ALLOW_NEW_SOURCES || type == BLOCK_OLD_SOURCES) &&
2895 	    SLIST_IS_EMPTY(srclist))
2896 		return (next);
2897 
2898 	rp = (mrec_t *)mi_alloc(sizeof (mrec_t), BPRI_HI);
2899 	if (rp == NULL)
2900 		return (next);
2901 
2902 	rp->mrec_next = next;
2903 	rp->mrec_type = type;
2904 	rp->mrec_auxlen = 0;
2905 	rp->mrec_group = *grp;
2906 	if (srclist == NULL) {
2907 		rp->mrec_srcs.sl_numsrc = 0;
2908 	} else {
2909 		rp->mrec_srcs.sl_numsrc = srclist->sl_numsrc;
2910 		for (i = 0; i < srclist->sl_numsrc; i++)
2911 			rp->mrec_srcs.sl_addr[i] = srclist->sl_addr[i];
2912 	}
2913 
2914 	return (rp);
2915 }
2916 
2917 /*
2918  * Set up initial retransmit state.  If memory cannot be allocated for
2919  * the source lists, simply create as much state as is possible; memory
2920  * allocation failures are considered one type of transient error that
2921  * the retransmissions are designed to overcome (and if they aren't
2922  * transient, there are bigger problems than failing to notify the
2923  * router about multicast group membership state changes).
2924  */
2925 static void
2926 mcast_init_rtx(ill_t *ill, rtx_state_t *rtxp, mcast_record_t rtype,
2927     slist_t *flist)
2928 {
2929 	/*
2930 	 * There are only three possibilities for rtype:
2931 	 *	New join, transition from INCLUDE {} to INCLUDE {flist}
2932 	 *	  => rtype is ALLOW_NEW_SOURCES
2933 	 *	New join, transition from INCLUDE {} to EXCLUDE {flist}
2934 	 *	  => rtype is CHANGE_TO_EXCLUDE
2935 	 *	State change that involves a filter mode change
2936 	 *	  => rtype is either CHANGE_TO_INCLUDE or CHANGE_TO_EXCLUDE
2937 	 */
2938 	ASSERT(rtype == CHANGE_TO_EXCLUDE || rtype == CHANGE_TO_INCLUDE ||
2939 	    rtype == ALLOW_NEW_SOURCES);
2940 
2941 	rtxp->rtx_cnt = ill->ill_mcast_rv;
2942 
2943 	switch (rtype) {
2944 	case CHANGE_TO_EXCLUDE:
2945 		rtxp->rtx_fmode_cnt = ill->ill_mcast_rv;
2946 		CLEAR_SLIST(rtxp->rtx_allow);
2947 		COPY_SLIST(flist, rtxp->rtx_block);
2948 		break;
2949 	case ALLOW_NEW_SOURCES:
2950 	case CHANGE_TO_INCLUDE:
2951 		rtxp->rtx_fmode_cnt =
2952 		    rtype == ALLOW_NEW_SOURCES ? 0 : ill->ill_mcast_rv;
2953 		CLEAR_SLIST(rtxp->rtx_block);
2954 		COPY_SLIST(flist, rtxp->rtx_allow);
2955 		break;
2956 	}
2957 }
2958 
2959 /*
2960  * The basic strategy here, as extrapolated from RFC 3810 section 6.1 and
2961  * RFC 3376 section 5.1, covers three cases:
2962  *	* The current state change is a filter mode change
2963  *		Set filter mode retransmit counter; set retransmit allow or
2964  *		block list to new source list as appropriate, and clear the
2965  *		retransmit list that was not set; send TO_IN or TO_EX with
2966  *		new source list.
2967  *	* The current state change is a source list change, but the filter
2968  *	  mode retransmit counter is > 0
2969  *		Decrement filter mode retransmit counter; set retransmit
2970  *		allow or block list to  new source list as appropriate,
2971  *		and clear the retransmit list that was not set; send TO_IN
2972  *		or TO_EX with new source list.
2973  *	* The current state change is a source list change, and the filter
2974  *	  mode retransmit counter is 0.
2975  *		Merge existing rtx allow and block lists with new state:
2976  *		  rtx_allow = (new allow + rtx_allow) - new block
2977  *		  rtx_block = (new block + rtx_block) - new allow
2978  *		Send ALLOW and BLOCK records for new retransmit lists;
2979  *		decrement retransmit counter.
2980  *
2981  * As is the case for mcast_init_rtx(), memory allocation failures are
2982  * acceptable; we just create as much state as we can.
2983  */
2984 static mrec_t *
2985 mcast_merge_rtx(ilm_t *ilm, mrec_t *mreclist, slist_t *flist)
2986 {
2987 	ill_t *ill;
2988 	rtx_state_t *rtxp = &ilm->ilm_rtx;
2989 	mcast_record_t txtype;
2990 	mrec_t *rp, *rpnext, *rtnmrec;
2991 	boolean_t ovf;
2992 
2993 	ill = (ilm->ilm_ill == NULL ? ilm->ilm_ipif->ipif_ill : ilm->ilm_ill);
2994 
2995 	if (mreclist == NULL)
2996 		return (mreclist);
2997 
2998 	/*
2999 	 * A filter mode change is indicated by a single mrec, which is
3000 	 * either TO_IN or TO_EX.  In this case, we just need to set new
3001 	 * retransmit state as if this were an initial join.  There is
3002 	 * no change to the mrec list.
3003 	 */
3004 	if (mreclist->mrec_type == CHANGE_TO_INCLUDE ||
3005 	    mreclist->mrec_type == CHANGE_TO_EXCLUDE) {
3006 		mcast_init_rtx(ill, rtxp, mreclist->mrec_type,
3007 		    &mreclist->mrec_srcs);
3008 		return (mreclist);
3009 	}
3010 
3011 	/*
3012 	 * Only the source list has changed
3013 	 */
3014 	rtxp->rtx_cnt = ill->ill_mcast_rv;
3015 	if (rtxp->rtx_fmode_cnt > 0) {
3016 		/* but we're still sending filter mode change reports */
3017 		rtxp->rtx_fmode_cnt--;
3018 		if (ilm->ilm_fmode == MODE_IS_INCLUDE) {
3019 			CLEAR_SLIST(rtxp->rtx_block);
3020 			COPY_SLIST(flist, rtxp->rtx_allow);
3021 			txtype = CHANGE_TO_INCLUDE;
3022 		} else {
3023 			CLEAR_SLIST(rtxp->rtx_allow);
3024 			COPY_SLIST(flist, rtxp->rtx_block);
3025 			txtype = CHANGE_TO_EXCLUDE;
3026 		}
3027 		/* overwrite first mrec with new info */
3028 		mreclist->mrec_type = txtype;
3029 		l_copy(flist, &mreclist->mrec_srcs);
3030 		/* then free any remaining mrecs */
3031 		for (rp = mreclist->mrec_next; rp != NULL; rp = rpnext) {
3032 			rpnext = rp->mrec_next;
3033 			mi_free(rp);
3034 		}
3035 		mreclist->mrec_next = NULL;
3036 		rtnmrec = mreclist;
3037 	} else {
3038 		mrec_t *allow_mrec, *block_mrec;
3039 		/*
3040 		 * Just send the source change reports; but we need to
3041 		 * recalculate the ALLOW and BLOCK lists based on previous
3042 		 * state and new changes.
3043 		 */
3044 		rtnmrec = mreclist;
3045 		allow_mrec = block_mrec = NULL;
3046 		for (rp = mreclist; rp != NULL; rp = rp->mrec_next) {
3047 			ASSERT(rp->mrec_type == ALLOW_NEW_SOURCES ||
3048 			    rp->mrec_type == BLOCK_OLD_SOURCES);
3049 			if (rp->mrec_type == ALLOW_NEW_SOURCES)
3050 				allow_mrec = rp;
3051 			else
3052 				block_mrec = rp;
3053 		}
3054 		/*
3055 		 * Perform calculations:
3056 		 *   new_allow = mrec_allow + (rtx_allow - mrec_block)
3057 		 *   new_block = mrec_block + (rtx_block - mrec_allow)
3058 		 *
3059 		 * Each calc requires two steps, for example:
3060 		 *   rtx_allow = rtx_allow - mrec_block;
3061 		 *   new_allow = mrec_allow + rtx_allow;
3062 		 *
3063 		 * Store results in mrec lists, and then copy into rtx lists.
3064 		 * We do it in this order in case the rtx list hasn't been
3065 		 * alloc'd yet; if it hasn't and our alloc fails, that's okay,
3066 		 * Overflows are also okay.
3067 		 */
3068 		if (block_mrec != NULL) {
3069 			l_difference_in_a(rtxp->rtx_allow,
3070 			    &block_mrec->mrec_srcs);
3071 		}
3072 		if (allow_mrec != NULL) {
3073 			l_difference_in_a(rtxp->rtx_block,
3074 			    &allow_mrec->mrec_srcs);
3075 			l_union_in_a(&allow_mrec->mrec_srcs, rtxp->rtx_allow,
3076 			    &ovf);
3077 		}
3078 		if (block_mrec != NULL) {
3079 			l_union_in_a(&block_mrec->mrec_srcs, rtxp->rtx_block,
3080 			    &ovf);
3081 			COPY_SLIST(&block_mrec->mrec_srcs, rtxp->rtx_block);
3082 		} else {
3083 			rtnmrec = mcast_bldmrec(BLOCK_OLD_SOURCES,
3084 			    &ilm->ilm_v6addr, rtxp->rtx_block, allow_mrec);
3085 		}
3086 		if (allow_mrec != NULL) {
3087 			COPY_SLIST(&allow_mrec->mrec_srcs, rtxp->rtx_allow);
3088 		} else {
3089 			rtnmrec = mcast_bldmrec(ALLOW_NEW_SOURCES,
3090 			    &ilm->ilm_v6addr, rtxp->rtx_allow, block_mrec);
3091 		}
3092 	}
3093 
3094 	return (rtnmrec);
3095 }
3096 
3097 /*
3098  * Convenience routine to signal the restart-timer thread.
3099  */
3100 static void
3101 mcast_signal_restart_thread(ip_stack_t *ipst)
3102 {
3103 	mutex_enter(&ipst->ips_mrt_lock);
3104 	ipst->ips_mrt_flags |= IP_MRT_RUN;
3105 	cv_signal(&ipst->ips_mrt_cv);
3106 	mutex_exit(&ipst->ips_mrt_lock);
3107 }
3108 
3109 /*
3110  * Thread to restart IGMP/MLD timers.  See the comment in igmp_joingroup() for
3111  * the story behind this unfortunate thread.
3112  */
3113 void
3114 mcast_restart_timers_thread(ip_stack_t *ipst)
3115 {
3116 	int next;
3117 	char name[64];
3118 	callb_cpr_t cprinfo;
3119 
3120 	(void) snprintf(name, sizeof (name), "mcast_restart_timers_thread_%d",
3121 	    ipst->ips_netstack->netstack_stackid);
3122 	CALLB_CPR_INIT(&cprinfo, &ipst->ips_mrt_lock, callb_generic_cpr, name);
3123 
3124 	for (;;) {
3125 		mutex_enter(&ipst->ips_mrt_lock);
3126 		while (!(ipst->ips_mrt_flags & (IP_MRT_STOP|IP_MRT_RUN))) {
3127 			CALLB_CPR_SAFE_BEGIN(&cprinfo);
3128 			cv_wait(&ipst->ips_mrt_cv, &ipst->ips_mrt_lock);
3129 			CALLB_CPR_SAFE_END(&cprinfo, &ipst->ips_mrt_lock);
3130 		}
3131 		if (ipst->ips_mrt_flags & IP_MRT_STOP)
3132 			break;
3133 		ipst->ips_mrt_flags &= ~IP_MRT_RUN;
3134 		mutex_exit(&ipst->ips_mrt_lock);
3135 
3136 		mutex_enter(&ipst->ips_igmp_timer_lock);
3137 		next = ipst->ips_igmp_deferred_next;
3138 		ipst->ips_igmp_deferred_next = INFINITY;
3139 		mutex_exit(&ipst->ips_igmp_timer_lock);
3140 
3141 		if (next != INFINITY)
3142 			igmp_start_timers(next, ipst);
3143 
3144 		mutex_enter(&ipst->ips_mld_timer_lock);
3145 		next = ipst->ips_mld_deferred_next;
3146 		ipst->ips_mld_deferred_next = INFINITY;
3147 		mutex_exit(&ipst->ips_mld_timer_lock);
3148 		if (next != INFINITY)
3149 			mld_start_timers(next, ipst);
3150 	}
3151 
3152 	ipst->ips_mrt_flags |= IP_MRT_DONE;
3153 	cv_signal(&ipst->ips_mrt_done_cv);
3154 	CALLB_CPR_EXIT(&cprinfo);	/* drops ips_mrt_lock */
3155 	thread_exit();
3156 }
3157