xref: /illumos-gate/usr/src/uts/common/inet/ip/ip_multi.c (revision 968d2fd13688fcd3afa3a4a2106ab4a9a49821f1)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /* Copyright (c) 1990 Mentat Inc. */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/stream.h>
31 #include <sys/dlpi.h>
32 #include <sys/stropts.h>
33 #include <sys/strsun.h>
34 #include <sys/ddi.h>
35 #include <sys/cmn_err.h>
36 #include <sys/sdt.h>
37 #include <sys/zone.h>
38 
39 #include <sys/param.h>
40 #include <sys/socket.h>
41 #include <sys/sockio.h>
42 #include <net/if.h>
43 #include <sys/systm.h>
44 #include <sys/strsubr.h>
45 #include <net/route.h>
46 #include <netinet/in.h>
47 #include <net/if_dl.h>
48 #include <netinet/ip6.h>
49 #include <netinet/icmp6.h>
50 
51 #include <inet/common.h>
52 #include <inet/mi.h>
53 #include <inet/nd.h>
54 #include <inet/arp.h>
55 #include <inet/ip.h>
56 #include <inet/ip6.h>
57 #include <inet/ip_if.h>
58 #include <inet/ip_ndp.h>
59 #include <inet/ip_multi.h>
60 #include <inet/ipclassifier.h>
61 #include <inet/ipsec_impl.h>
62 #include <inet/sctp_ip.h>
63 #include <inet/ip_listutils.h>
64 #include <inet/udp_impl.h>
65 
66 /* igmpv3/mldv2 source filter manipulation */
67 static void	ilm_bld_flists(conn_t *conn, void *arg);
68 static void	ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode,
69     slist_t *flist);
70 
71 static ilm_t	*ilm_add_v6(ipif_t *ipif, const in6_addr_t *group,
72     ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist,
73     int orig_ifindex, zoneid_t zoneid);
74 static void	ilm_delete(ilm_t *ilm);
75 static int	ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *group);
76 static int	ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *group);
77 static ilg_t	*ilg_lookup_ill_index_v6(conn_t *connp,
78     const in6_addr_t *v6group, int index);
79 static ilg_t	*ilg_lookup_ipif(conn_t *connp, ipaddr_t group,
80     ipif_t *ipif);
81 static int	ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif,
82     mcast_record_t fmode, ipaddr_t src);
83 static int	ilg_add_v6(conn_t *connp, const in6_addr_t *group, ill_t *ill,
84     mcast_record_t fmode, const in6_addr_t *v6src);
85 static void	ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src);
86 static mblk_t	*ill_create_dl(ill_t *ill, uint32_t dl_primitive,
87     uint32_t length, uint32_t *addr_lenp, uint32_t *addr_offp);
88 static mblk_t	*ill_create_squery(ill_t *ill, ipaddr_t ipaddr,
89     uint32_t addrlen, uint32_t addroff, mblk_t *mp_tail);
90 static void	conn_ilg_reap(conn_t *connp);
91 static int	ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group,
92     ipif_t *ipif, mcast_record_t fmode, ipaddr_t src);
93 static int	ip_opt_delete_group_excl_v6(conn_t *connp,
94     const in6_addr_t *v6group, ill_t *ill, mcast_record_t fmode,
95     const in6_addr_t *v6src);
96 
97 /*
98  * MT notes:
99  *
100  * Multicast joins operate on both the ilg and ilm structures. Multiple
101  * threads operating on an conn (socket) trying to do multicast joins
102  * need to synchronize  when operating on the ilg. Multiple threads
103  * potentially operating on different conn (socket endpoints) trying to
104  * do multicast joins could eventually end up trying to manipulate the
105  * ilm simulatenously and need to synchronize on the access to the ilm.
106  * Both are amenable to standard Solaris MT techniques, but it would be
107  * complex to handle a failover or failback which needs to manipulate
108  * ilg/ilms if an applications can also simultaenously join/leave
109  * multicast groups. Hence multicast join/leave also go through the ipsq_t
110  * serialization.
111  *
112  * Multicast joins and leaves are single-threaded per phyint/IPMP group
113  * using the ipsq serialization mechanism.
114  *
115  * An ilm is an IP data structure used to track multicast join/leave.
116  * An ilm is associated with a <multicast group, ipif> tuple in IPv4 and
117  * with just <multicast group> in IPv6. ilm_refcnt is the number of ilg's
118  * referencing the ilm. ilms are created / destroyed only as writer. ilms
119  * are not passed around, instead they are looked up and used under the
120  * ill_lock or as writer. So we don't need a dynamic refcount of the number
121  * of threads holding reference to an ilm.
122  *
123  * Multicast Join operation:
124  *
125  * The first step is to determine the ipif (v4) or ill (v6) on which
126  * the join operation is to be done. The join is done after becoming
127  * exclusive on the ipsq associated with the ipif or ill. The conn->conn_ilg
128  * and ill->ill_ilm are thus accessed and modified exclusively per ill.
129  * Multiple threads can attempt to join simultaneously on different ipif/ill
130  * on the same conn. In this case the ipsq serialization does not help in
131  * protecting the ilg. It is the conn_lock that is used to protect the ilg.
132  * The conn_lock also protects all the ilg_t members.
133  *
134  * Leave operation.
135  *
136  * Similar to the join operation, the first step is to determine the ipif
137  * or ill (v6) on which the leave operation is to be done. The leave operation
138  * is done after becoming exclusive on the ipsq associated with the ipif or ill.
139  * As with join ilg modification is done under the protection of the conn lock.
140  */
141 
142 #define	IPSQ_ENTER_IPIF(ipif, connp, first_mp, func, ipsq, type)	\
143 	ASSERT(connp != NULL);					\
144 	(ipsq) = ipsq_try_enter((ipif), NULL, CONNP_TO_WQ(connp),	\
145 	    (first_mp), (func), (type), B_TRUE);		\
146 	if ((ipsq) == NULL) {					\
147 		ipif_refrele(ipif);				\
148 		return (EINPROGRESS);				\
149 	}
150 
151 #define	IPSQ_ENTER_ILL(ill, connp, first_mp, func, ipsq, type)	\
152 	ASSERT(connp != NULL);					\
153 	(ipsq) = ipsq_try_enter(NULL, ill, CONNP_TO_WQ(connp),	\
154 	    (first_mp),	(func), (type), B_TRUE);		\
155 	if ((ipsq) == NULL) {					\
156 		ill_refrele(ill);				\
157 		return (EINPROGRESS);				\
158 	}
159 
160 #define	IPSQ_EXIT(ipsq)	\
161 	if (ipsq != NULL)	\
162 		ipsq_exit(ipsq, B_TRUE, B_TRUE);
163 
164 #define	ILG_WALKER_HOLD(connp)	(connp)->conn_ilg_walker_cnt++
165 
166 #define	ILG_WALKER_RELE(connp)				\
167 	{						\
168 		(connp)->conn_ilg_walker_cnt--;		\
169 		if ((connp)->conn_ilg_walker_cnt == 0)	\
170 			conn_ilg_reap(connp);		\
171 	}
172 
173 static void
174 conn_ilg_reap(conn_t *connp)
175 {
176 	int	to;
177 	int	from;
178 
179 	ASSERT(MUTEX_HELD(&connp->conn_lock));
180 
181 	to = 0;
182 	from = 0;
183 	while (from < connp->conn_ilg_inuse) {
184 		if (connp->conn_ilg[from].ilg_flags & ILG_DELETED) {
185 			FREE_SLIST(connp->conn_ilg[from].ilg_filter);
186 			from++;
187 			continue;
188 		}
189 		if (to != from)
190 			connp->conn_ilg[to] = connp->conn_ilg[from];
191 		to++;
192 		from++;
193 	}
194 
195 	connp->conn_ilg_inuse = to;
196 
197 	if (connp->conn_ilg_inuse == 0) {
198 		mi_free((char *)connp->conn_ilg);
199 		connp->conn_ilg = NULL;
200 		cv_broadcast(&connp->conn_refcv);
201 	}
202 }
203 
204 #define	GETSTRUCT(structure, number)	\
205 	((structure *)mi_zalloc(sizeof (structure) * (number)))
206 
207 #define	ILG_ALLOC_CHUNK	16
208 
209 /*
210  * Returns a pointer to the next available ilg in conn_ilg.  Allocs more
211  * buffers in size of ILG_ALLOC_CHUNK ilgs when needed, and updates conn's
212  * ilg tracking fields appropriately (conn_ilg_inuse reflects usage of the
213  * returned ilg).  Returns NULL on failure (ENOMEM).
214  *
215  * Assumes connp->conn_lock is held.
216  */
217 static ilg_t *
218 conn_ilg_alloc(conn_t *connp)
219 {
220 	ilg_t *new;
221 	int curcnt;
222 
223 	ASSERT(MUTEX_HELD(&connp->conn_lock));
224 	ASSERT(connp->conn_ilg_inuse <= connp->conn_ilg_allocated);
225 
226 	if (connp->conn_ilg == NULL) {
227 		connp->conn_ilg = GETSTRUCT(ilg_t, ILG_ALLOC_CHUNK);
228 		if (connp->conn_ilg == NULL)
229 			return (NULL);
230 		connp->conn_ilg_allocated = ILG_ALLOC_CHUNK;
231 		connp->conn_ilg_inuse = 0;
232 	}
233 	if (connp->conn_ilg_inuse == connp->conn_ilg_allocated) {
234 		curcnt = connp->conn_ilg_allocated;
235 		new = GETSTRUCT(ilg_t, curcnt + ILG_ALLOC_CHUNK);
236 		if (new == NULL)
237 			return (NULL);
238 		bcopy(connp->conn_ilg, new, sizeof (ilg_t) * curcnt);
239 		mi_free((char *)connp->conn_ilg);
240 		connp->conn_ilg = new;
241 		connp->conn_ilg_allocated += ILG_ALLOC_CHUNK;
242 	}
243 
244 	return (&connp->conn_ilg[connp->conn_ilg_inuse++]);
245 }
246 
247 typedef struct ilm_fbld_s {
248 	ilm_t		*fbld_ilm;
249 	int		fbld_in_cnt;
250 	int		fbld_ex_cnt;
251 	slist_t		fbld_in;
252 	slist_t		fbld_ex;
253 	boolean_t	fbld_in_overflow;
254 } ilm_fbld_t;
255 
256 static void
257 ilm_bld_flists(conn_t *conn, void *arg)
258 {
259 	int i;
260 	ilm_fbld_t *fbld = (ilm_fbld_t *)(arg);
261 	ilm_t *ilm = fbld->fbld_ilm;
262 	in6_addr_t *v6group = &ilm->ilm_v6addr;
263 
264 	if (conn->conn_ilg_inuse == 0)
265 		return;
266 
267 	/*
268 	 * Since we can't break out of the ipcl_walk once started, we still
269 	 * have to look at every conn.  But if we've already found one
270 	 * (EXCLUDE, NULL) list, there's no need to keep checking individual
271 	 * ilgs--that will be our state.
272 	 */
273 	if (fbld->fbld_ex_cnt > 0 && fbld->fbld_ex.sl_numsrc == 0)
274 		return;
275 
276 	/*
277 	 * Check this conn's ilgs to see if any are interested in our
278 	 * ilm (group, interface match).  If so, update the master
279 	 * include and exclude lists we're building in the fbld struct
280 	 * with this ilg's filter info.
281 	 */
282 	mutex_enter(&conn->conn_lock);
283 	for (i = 0; i < conn->conn_ilg_inuse; i++) {
284 		ilg_t *ilg = &conn->conn_ilg[i];
285 		if ((ilg->ilg_ill == ilm->ilm_ill) &&
286 		    (ilg->ilg_ipif == ilm->ilm_ipif) &&
287 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) {
288 			if (ilg->ilg_fmode == MODE_IS_INCLUDE) {
289 				fbld->fbld_in_cnt++;
290 				if (!fbld->fbld_in_overflow)
291 					l_union_in_a(&fbld->fbld_in,
292 					    ilg->ilg_filter,
293 					    &fbld->fbld_in_overflow);
294 			} else {
295 				fbld->fbld_ex_cnt++;
296 				/*
297 				 * On the first exclude list, don't try to do
298 				 * an intersection, as the master exclude list
299 				 * is intentionally empty.  If the master list
300 				 * is still empty on later iterations, that
301 				 * means we have at least one ilg with an empty
302 				 * exclude list, so that should be reflected
303 				 * when we take the intersection.
304 				 */
305 				if (fbld->fbld_ex_cnt == 1) {
306 					if (ilg->ilg_filter != NULL)
307 						l_copy(ilg->ilg_filter,
308 						    &fbld->fbld_ex);
309 				} else {
310 					l_intersection_in_a(&fbld->fbld_ex,
311 					    ilg->ilg_filter);
312 				}
313 			}
314 			/* there will only be one match, so break now. */
315 			break;
316 		}
317 	}
318 	mutex_exit(&conn->conn_lock);
319 }
320 
321 static void
322 ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, slist_t *flist)
323 {
324 	ilm_fbld_t fbld;
325 	ip_stack_t *ipst = ilm->ilm_ipst;
326 
327 	fbld.fbld_ilm = ilm;
328 	fbld.fbld_in_cnt = fbld.fbld_ex_cnt = 0;
329 	fbld.fbld_in.sl_numsrc = fbld.fbld_ex.sl_numsrc = 0;
330 	fbld.fbld_in_overflow = B_FALSE;
331 
332 	/* first, construct our master include and exclude lists */
333 	ipcl_walk(ilm_bld_flists, (caddr_t)&fbld, ipst);
334 
335 	/* now use those master lists to generate the interface filter */
336 
337 	/* if include list overflowed, filter is (EXCLUDE, NULL) */
338 	if (fbld.fbld_in_overflow) {
339 		*fmode = MODE_IS_EXCLUDE;
340 		flist->sl_numsrc = 0;
341 		return;
342 	}
343 
344 	/* if nobody interested, interface filter is (INCLUDE, NULL) */
345 	if (fbld.fbld_in_cnt == 0 && fbld.fbld_ex_cnt == 0) {
346 		*fmode = MODE_IS_INCLUDE;
347 		flist->sl_numsrc = 0;
348 		return;
349 	}
350 
351 	/*
352 	 * If there are no exclude lists, then the interface filter
353 	 * is INCLUDE, with its filter list equal to fbld_in.  A single
354 	 * exclude list makes the interface filter EXCLUDE, with its
355 	 * filter list equal to (fbld_ex - fbld_in).
356 	 */
357 	if (fbld.fbld_ex_cnt == 0) {
358 		*fmode = MODE_IS_INCLUDE;
359 		l_copy(&fbld.fbld_in, flist);
360 	} else {
361 		*fmode = MODE_IS_EXCLUDE;
362 		l_difference(&fbld.fbld_ex, &fbld.fbld_in, flist);
363 	}
364 }
365 
366 /*
367  * If the given interface has failed, choose a new one to join on so
368  * that we continue to receive packets.  ilg_orig_ifindex remembers
369  * what the application used to join on so that we know the ilg to
370  * delete even though we change the ill here.  Callers will store the
371  * ilg returned from this function in ilg_ill.  Thus when we receive
372  * a packet on ilg_ill, conn_wantpacket_v6 will deliver the packets.
373  *
374  * This function must be called as writer so we can walk the group
375  * list and examine flags without holding a lock.
376  */
377 ill_t *
378 ip_choose_multi_ill(ill_t *ill, const in6_addr_t *grp)
379 {
380 	ill_t	*till;
381 	ill_group_t *illgrp = ill->ill_group;
382 
383 	ASSERT(IAM_WRITER_ILL(ill));
384 
385 	if (IN6_IS_ADDR_UNSPECIFIED(grp) || illgrp == NULL)
386 		return (ill);
387 
388 	if ((ill->ill_phyint->phyint_flags & (PHYI_FAILED|PHYI_INACTIVE)) == 0)
389 		return (ill);
390 
391 	till = illgrp->illgrp_ill;
392 	while (till != NULL &&
393 	    (till->ill_phyint->phyint_flags & (PHYI_FAILED|PHYI_INACTIVE))) {
394 		till = till->ill_group_next;
395 	}
396 	if (till != NULL)
397 		return (till);
398 
399 	return (ill);
400 }
401 
402 static int
403 ilm_update_add(ilm_t *ilm, ilg_stat_t ilgstat, slist_t *ilg_flist,
404     boolean_t isv6)
405 {
406 	mcast_record_t fmode;
407 	slist_t *flist;
408 	boolean_t fdefault;
409 	char buf[INET6_ADDRSTRLEN];
410 	ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill;
411 
412 	/*
413 	 * There are several cases where the ilm's filter state
414 	 * defaults to (EXCLUDE, NULL):
415 	 *	- we've had previous joins without associated ilgs
416 	 *	- this join has no associated ilg
417 	 *	- the ilg's filter state is (EXCLUDE, NULL)
418 	 */
419 	fdefault = (ilm->ilm_no_ilg_cnt > 0) ||
420 	    (ilgstat == ILGSTAT_NONE) || SLIST_IS_EMPTY(ilg_flist);
421 
422 	/* attempt mallocs (if needed) before doing anything else */
423 	if ((flist = l_alloc()) == NULL)
424 		return (ENOMEM);
425 	if (!fdefault && ilm->ilm_filter == NULL) {
426 		ilm->ilm_filter = l_alloc();
427 		if (ilm->ilm_filter == NULL) {
428 			l_free(flist);
429 			return (ENOMEM);
430 		}
431 	}
432 
433 	if (ilgstat != ILGSTAT_CHANGE)
434 		ilm->ilm_refcnt++;
435 
436 	if (ilgstat == ILGSTAT_NONE)
437 		ilm->ilm_no_ilg_cnt++;
438 
439 	/*
440 	 * Determine new filter state.  If it's not the default
441 	 * (EXCLUDE, NULL), we must walk the conn list to find
442 	 * any ilgs interested in this group, and re-build the
443 	 * ilm filter.
444 	 */
445 	if (fdefault) {
446 		fmode = MODE_IS_EXCLUDE;
447 		flist->sl_numsrc = 0;
448 	} else {
449 		ilm_gen_filter(ilm, &fmode, flist);
450 	}
451 
452 	/* make sure state actually changed; nothing to do if not. */
453 	if ((ilm->ilm_fmode == fmode) &&
454 	    !lists_are_different(ilm->ilm_filter, flist)) {
455 		l_free(flist);
456 		return (0);
457 	}
458 
459 	/* send the state change report */
460 	if (!IS_LOOPBACK(ill)) {
461 		if (isv6)
462 			mld_statechange(ilm, fmode, flist);
463 		else
464 			igmp_statechange(ilm, fmode, flist);
465 	}
466 
467 	/* update the ilm state */
468 	ilm->ilm_fmode = fmode;
469 	if (flist->sl_numsrc > 0)
470 		l_copy(flist, ilm->ilm_filter);
471 	else
472 		CLEAR_SLIST(ilm->ilm_filter);
473 
474 	ip1dbg(("ilm_update: new if filter mode %d, group %s\n", ilm->ilm_fmode,
475 	    inet_ntop(AF_INET6, &ilm->ilm_v6addr, buf, sizeof (buf))));
476 
477 	l_free(flist);
478 	return (0);
479 }
480 
481 static int
482 ilm_update_del(ilm_t *ilm, boolean_t isv6)
483 {
484 	mcast_record_t fmode;
485 	slist_t *flist;
486 	ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill;
487 
488 	ip1dbg(("ilm_update_del: still %d left; updating state\n",
489 	    ilm->ilm_refcnt));
490 
491 	if ((flist = l_alloc()) == NULL)
492 		return (ENOMEM);
493 
494 	/*
495 	 * If present, the ilg in question has already either been
496 	 * updated or removed from our list; so all we need to do
497 	 * now is walk the list to update the ilm filter state.
498 	 *
499 	 * Skip the list walk if we have any no-ilg joins, which
500 	 * cause the filter state to revert to (EXCLUDE, NULL).
501 	 */
502 	if (ilm->ilm_no_ilg_cnt != 0) {
503 		fmode = MODE_IS_EXCLUDE;
504 		flist->sl_numsrc = 0;
505 	} else {
506 		ilm_gen_filter(ilm, &fmode, flist);
507 	}
508 
509 	/* check to see if state needs to be updated */
510 	if ((ilm->ilm_fmode == fmode) &&
511 	    (!lists_are_different(ilm->ilm_filter, flist))) {
512 		l_free(flist);
513 		return (0);
514 	}
515 
516 	if (!IS_LOOPBACK(ill)) {
517 		if (isv6)
518 			mld_statechange(ilm, fmode, flist);
519 		else
520 			igmp_statechange(ilm, fmode, flist);
521 	}
522 
523 	ilm->ilm_fmode = fmode;
524 	if (flist->sl_numsrc > 0) {
525 		if (ilm->ilm_filter == NULL) {
526 			ilm->ilm_filter = l_alloc();
527 			if (ilm->ilm_filter == NULL) {
528 				char buf[INET6_ADDRSTRLEN];
529 				ip1dbg(("ilm_update_del: failed to alloc ilm "
530 				    "filter; no source filtering for %s on %s",
531 				    inet_ntop(AF_INET6, &ilm->ilm_v6addr,
532 				    buf, sizeof (buf)), ill->ill_name));
533 				ilm->ilm_fmode = MODE_IS_EXCLUDE;
534 				l_free(flist);
535 				return (0);
536 			}
537 		}
538 		l_copy(flist, ilm->ilm_filter);
539 	} else {
540 		CLEAR_SLIST(ilm->ilm_filter);
541 	}
542 
543 	l_free(flist);
544 	return (0);
545 }
546 
547 /*
548  * INADDR_ANY means all multicast addresses. This is only used
549  * by the multicast router.
550  * INADDR_ANY is stored as IPv6 unspecified addr.
551  */
552 int
553 ip_addmulti(ipaddr_t group, ipif_t *ipif, ilg_stat_t ilgstat,
554     mcast_record_t ilg_fmode, slist_t *ilg_flist)
555 {
556 	ill_t	*ill = ipif->ipif_ill;
557 	ilm_t 	*ilm;
558 	in6_addr_t v6group;
559 	int	ret;
560 
561 	ASSERT(IAM_WRITER_IPIF(ipif));
562 
563 	if (!CLASSD(group) && group != INADDR_ANY)
564 		return (EINVAL);
565 
566 	/*
567 	 * INADDR_ANY is represented as the IPv6 unspecifed addr.
568 	 */
569 	if (group == INADDR_ANY)
570 		v6group = ipv6_all_zeros;
571 	else
572 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
573 
574 	mutex_enter(&ill->ill_lock);
575 	ilm = ilm_lookup_ipif(ipif, group);
576 	mutex_exit(&ill->ill_lock);
577 	/*
578 	 * Since we are writer, we know the ilm_flags itself cannot
579 	 * change at this point, and ilm_lookup_ipif would not have
580 	 * returned a DELETED ilm. However, the data path can free
581 	 * ilm->next via ilm_walker_cleanup() so we can safely
582 	 * access anything in ilm except ilm_next (for safe access to
583 	 * ilm_next we'd have  to take the ill_lock).
584 	 */
585 	if (ilm != NULL)
586 		return (ilm_update_add(ilm, ilgstat, ilg_flist, B_FALSE));
587 
588 	/*
589 	 * ilms are associated with ipifs in IPv4. It moves with the
590 	 * ipif if the ipif moves to a new ill when the interface
591 	 * fails. Thus we really don't check whether the ipif_ill
592 	 * has failed like in IPv6. If it has FAILED the ipif
593 	 * will move (daemon will move it) and hence the ilm, if the
594 	 * ipif is not IPIF_NOFAILOVER. For the IPIF_NOFAILOVER ipifs,
595 	 * we continue to receive in the same place even if the
596 	 * interface fails.
597 	 */
598 	ilm = ilm_add_v6(ipif, &v6group, ilgstat, ilg_fmode, ilg_flist,
599 	    ill->ill_phyint->phyint_ifindex, ipif->ipif_zoneid);
600 	if (ilm == NULL)
601 		return (ENOMEM);
602 
603 	if (group == INADDR_ANY) {
604 		/*
605 		 * Check how many ipif's have members in this group -
606 		 * if more then one we should not tell the driver to join
607 		 * this time
608 		 */
609 		if (ilm_numentries_v6(ill, &v6group) > 1)
610 			return (0);
611 		if (ill->ill_group == NULL)
612 			ret = ip_join_allmulti(ipif);
613 		else
614 			ret = ill_nominate_mcast_rcv(ill->ill_group);
615 		if (ret != 0)
616 			ilm_delete(ilm);
617 		return (ret);
618 	}
619 
620 	if (!IS_LOOPBACK(ill))
621 		igmp_joingroup(ilm);
622 
623 	if (ilm_numentries_v6(ill, &v6group) > 1)
624 		return (0);
625 
626 	ret = ip_ll_addmulti_v6(ipif, &v6group);
627 	if (ret != 0)
628 		ilm_delete(ilm);
629 	return (ret);
630 }
631 
632 /*
633  * The unspecified address means all multicast addresses.
634  * This is only used by the multicast router.
635  *
636  * ill identifies the interface to join on; it may not match the
637  * interface requested by the application of a failover has taken
638  * place.  orig_ifindex always identifies the interface requested
639  * by the app.
640  *
641  * ilgstat tells us if there's an ilg associated with this join,
642  * and if so, if it's a new ilg or a change to an existing one.
643  * ilg_fmode and ilg_flist give us the current filter state of
644  * the ilg (and will be EXCLUDE {NULL} in the case of no ilg).
645  */
646 int
647 ip_addmulti_v6(const in6_addr_t *v6group, ill_t *ill, int orig_ifindex,
648     zoneid_t zoneid, ilg_stat_t ilgstat, mcast_record_t ilg_fmode,
649     slist_t *ilg_flist)
650 {
651 	ilm_t	*ilm;
652 	int	ret;
653 
654 	ASSERT(IAM_WRITER_ILL(ill));
655 
656 	if (!IN6_IS_ADDR_MULTICAST(v6group) &&
657 	    !IN6_IS_ADDR_UNSPECIFIED(v6group)) {
658 		return (EINVAL);
659 	}
660 
661 	/*
662 	 * An ilm is uniquely identified by the tuple of (group, ill,
663 	 * orig_ill).  group is the multicast group address, ill is
664 	 * the interface on which it is currently joined, and orig_ill
665 	 * is the interface on which the application requested the
666 	 * join.  orig_ill and ill are the same unless orig_ill has
667 	 * failed over.
668 	 *
669 	 * Both orig_ill and ill are required, which means we may have
670 	 * 2 ilms on an ill for the same group, but with different
671 	 * orig_ills.  These must be kept separate, so that when failback
672 	 * occurs, the appropriate ilms are moved back to their orig_ill
673 	 * without disrupting memberships on the ill to which they had
674 	 * been moved.
675 	 *
676 	 * In order to track orig_ill, we store orig_ifindex in the
677 	 * ilm and ilg.
678 	 */
679 	mutex_enter(&ill->ill_lock);
680 	ilm = ilm_lookup_ill_index_v6(ill, v6group, orig_ifindex, zoneid);
681 	mutex_exit(&ill->ill_lock);
682 	if (ilm != NULL)
683 		return (ilm_update_add(ilm, ilgstat, ilg_flist, B_TRUE));
684 
685 	/*
686 	 * We need to remember where the application really wanted
687 	 * to join. This will be used later if we want to failback
688 	 * to the original interface.
689 	 */
690 	ilm = ilm_add_v6(ill->ill_ipif, v6group, ilgstat, ilg_fmode,
691 	    ilg_flist, orig_ifindex, zoneid);
692 	if (ilm == NULL)
693 		return (ENOMEM);
694 
695 	if (IN6_IS_ADDR_UNSPECIFIED(v6group)) {
696 		/*
697 		 * Check how many ipif's that have members in this group -
698 		 * if more then one we should not tell the driver to join
699 		 * this time
700 		 */
701 		if (ilm_numentries_v6(ill, v6group) > 1)
702 			return (0);
703 		if (ill->ill_group == NULL)
704 			ret = ip_join_allmulti(ill->ill_ipif);
705 		else
706 			ret = ill_nominate_mcast_rcv(ill->ill_group);
707 
708 		if (ret != 0)
709 			ilm_delete(ilm);
710 		return (ret);
711 	}
712 
713 	if (!IS_LOOPBACK(ill))
714 		mld_joingroup(ilm);
715 
716 	/*
717 	 * If we have more then one we should not tell the driver
718 	 * to join this time.
719 	 */
720 	if (ilm_numentries_v6(ill, v6group) > 1)
721 		return (0);
722 
723 	ret = ip_ll_addmulti_v6(ill->ill_ipif, v6group);
724 	if (ret != 0)
725 		ilm_delete(ilm);
726 	return (ret);
727 }
728 
729 /*
730  * Send a multicast request to the driver for enabling multicast reception
731  * for v6groupp address. The caller has already checked whether it is
732  * appropriate to send one or not.
733  */
734 int
735 ip_ll_send_enabmulti_req(ill_t *ill, const in6_addr_t *v6groupp)
736 {
737 	mblk_t	*mp;
738 	uint32_t addrlen, addroff;
739 	char	group_buf[INET6_ADDRSTRLEN];
740 
741 	ASSERT(IAM_WRITER_ILL(ill));
742 
743 	/*
744 	 * Create a AR_ENTRY_SQUERY message with a dl_enabmulti_req tacked
745 	 * on.
746 	 */
747 	mp = ill_create_dl(ill, DL_ENABMULTI_REQ, sizeof (dl_enabmulti_req_t),
748 	    &addrlen, &addroff);
749 	if (!mp)
750 		return (ENOMEM);
751 	if (IN6_IS_ADDR_V4MAPPED(v6groupp)) {
752 		ipaddr_t v4group;
753 
754 		IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group);
755 		/*
756 		 * NOTE!!!
757 		 * The "addroff" passed in here was calculated by
758 		 * ill_create_dl(), and will be used by ill_create_squery()
759 		 * to perform some twisted coding magic. It is the offset
760 		 * into the dl_xxx_req of the hw addr. Here, it will be
761 		 * added to b_wptr - b_rptr to create a magic number that
762 		 * is not an offset into this squery mblk.
763 		 * The actual hardware address will be accessed only in the
764 		 * dl_xxx_req, not in the squery. More importantly,
765 		 * that hardware address can *only* be accessed in this
766 		 * mblk chain by calling mi_offset_param_c(), which uses
767 		 * the magic number in the squery hw offset field to go
768 		 * to the *next* mblk (the dl_xxx_req), subtract the
769 		 * (b_wptr - b_rptr), and find the actual offset into
770 		 * the dl_xxx_req.
771 		 * Any method that depends on using the
772 		 * offset field in the dl_disabmulti_req or squery
773 		 * to find either hardware address will similarly fail.
774 		 *
775 		 * Look in ar_entry_squery() in arp.c to see how this offset
776 		 * is used.
777 		 */
778 		mp = ill_create_squery(ill, v4group, addrlen, addroff, mp);
779 		if (!mp)
780 			return (ENOMEM);
781 		ip1dbg(("ip_ll_send_enabmulti_req: IPv4 putnext %s on %s\n",
782 		    inet_ntop(AF_INET6, v6groupp, group_buf,
783 		    sizeof (group_buf)),
784 		    ill->ill_name));
785 		putnext(ill->ill_rq, mp);
786 	} else {
787 		ip1dbg(("ip_ll_send_enabmulti_req: IPv6 ndp_mcastreq %s on"
788 		    " %s\n",
789 		    inet_ntop(AF_INET6, v6groupp, group_buf,
790 		    sizeof (group_buf)),
791 		    ill->ill_name));
792 		return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp));
793 	}
794 	return (0);
795 }
796 
797 /*
798  * Send a multicast request to the driver for enabling multicast
799  * membership for v6group if appropriate.
800  */
801 static int
802 ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *v6groupp)
803 {
804 	ill_t	*ill = ipif->ipif_ill;
805 
806 	ASSERT(IAM_WRITER_IPIF(ipif));
807 
808 	if (ill->ill_net_type != IRE_IF_RESOLVER ||
809 	    ipif->ipif_flags & IPIF_POINTOPOINT) {
810 		ip1dbg(("ip_ll_addmulti_v6: not resolver\n"));
811 		return (0);	/* Must be IRE_IF_NORESOLVER */
812 	}
813 
814 	if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) {
815 		ip1dbg(("ip_ll_addmulti_v6: MULTI_BCAST\n"));
816 		return (0);
817 	}
818 	if (!ill->ill_dl_up) {
819 		/*
820 		 * Nobody there. All multicast addresses will be re-joined
821 		 * when we get the DL_BIND_ACK bringing the interface up.
822 		 */
823 		ip1dbg(("ip_ll_addmulti_v6: nobody up\n"));
824 		return (0);
825 	}
826 	return (ip_ll_send_enabmulti_req(ill, v6groupp));
827 }
828 
829 /*
830  * INADDR_ANY means all multicast addresses. This is only used
831  * by the multicast router.
832  * INADDR_ANY is stored as the IPv6 unspecifed addr.
833  */
834 int
835 ip_delmulti(ipaddr_t group, ipif_t *ipif, boolean_t no_ilg, boolean_t leaving)
836 {
837 	ill_t	*ill = ipif->ipif_ill;
838 	ilm_t *ilm;
839 	in6_addr_t v6group;
840 	int	ret;
841 
842 	ASSERT(IAM_WRITER_IPIF(ipif));
843 
844 	if (!CLASSD(group) && group != INADDR_ANY)
845 		return (EINVAL);
846 
847 	/*
848 	 * INADDR_ANY is represented as the IPv6 unspecifed addr.
849 	 */
850 	if (group == INADDR_ANY)
851 		v6group = ipv6_all_zeros;
852 	else
853 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
854 
855 	/*
856 	 * Look for a match on the ipif.
857 	 * (IP_DROP_MEMBERSHIP specifies an ipif using an IP address).
858 	 */
859 	mutex_enter(&ill->ill_lock);
860 	ilm = ilm_lookup_ipif(ipif, group);
861 	mutex_exit(&ill->ill_lock);
862 	if (ilm == NULL)
863 		return (ENOENT);
864 
865 	/* Update counters */
866 	if (no_ilg)
867 		ilm->ilm_no_ilg_cnt--;
868 
869 	if (leaving)
870 		ilm->ilm_refcnt--;
871 
872 	if (ilm->ilm_refcnt > 0)
873 		return (ilm_update_del(ilm, B_FALSE));
874 
875 	if (group == INADDR_ANY) {
876 		ilm_delete(ilm);
877 		/*
878 		 * Check how many ipif's that have members in this group -
879 		 * if there are still some left then don't tell the driver
880 		 * to drop it.
881 		 */
882 		if (ilm_numentries_v6(ill, &v6group) != 0)
883 			return (0);
884 
885 		/*
886 		 * If we never joined, then don't leave.  This can happen
887 		 * if we're in an IPMP group, since only one ill per IPMP
888 		 * group receives all multicast packets.
889 		 */
890 		if (!ill->ill_join_allmulti) {
891 			ASSERT(ill->ill_group != NULL);
892 			return (0);
893 		}
894 
895 		ret = ip_leave_allmulti(ipif);
896 		if (ill->ill_group != NULL)
897 			(void) ill_nominate_mcast_rcv(ill->ill_group);
898 		return (ret);
899 	}
900 
901 	if (!IS_LOOPBACK(ill))
902 		igmp_leavegroup(ilm);
903 
904 	ilm_delete(ilm);
905 	/*
906 	 * Check how many ipif's that have members in this group -
907 	 * if there are still some left then don't tell the driver
908 	 * to drop it.
909 	 */
910 	if (ilm_numentries_v6(ill, &v6group) != 0)
911 		return (0);
912 	return (ip_ll_delmulti_v6(ipif, &v6group));
913 }
914 
915 /*
916  * The unspecified address means all multicast addresses.
917  * This is only used by the multicast router.
918  */
919 int
920 ip_delmulti_v6(const in6_addr_t *v6group, ill_t *ill, int orig_ifindex,
921     zoneid_t zoneid, boolean_t no_ilg, boolean_t leaving)
922 {
923 	ipif_t	*ipif;
924 	ilm_t *ilm;
925 	int	ret;
926 
927 	ASSERT(IAM_WRITER_ILL(ill));
928 
929 	if (!IN6_IS_ADDR_MULTICAST(v6group) &&
930 	    !IN6_IS_ADDR_UNSPECIFIED(v6group))
931 		return (EINVAL);
932 
933 	/*
934 	 * Look for a match on the ill.
935 	 * (IPV6_LEAVE_GROUP specifies an ill using an ifindex).
936 	 *
937 	 * Similar to ip_addmulti_v6, we should always look using
938 	 * the orig_ifindex.
939 	 *
940 	 * 1) If orig_ifindex is different from ill's ifindex
941 	 *    we should have an ilm with orig_ifindex created in
942 	 *    ip_addmulti_v6. We should delete that here.
943 	 *
944 	 * 2) If orig_ifindex is same as ill's ifindex, we should
945 	 *    not delete the ilm that is temporarily here because of
946 	 *    a FAILOVER. Those ilms will have a ilm_orig_ifindex
947 	 *    different from ill's ifindex.
948 	 *
949 	 * Thus, always lookup using orig_ifindex.
950 	 */
951 	mutex_enter(&ill->ill_lock);
952 	ilm = ilm_lookup_ill_index_v6(ill, v6group, orig_ifindex, zoneid);
953 	mutex_exit(&ill->ill_lock);
954 	if (ilm == NULL)
955 		return (ENOENT);
956 
957 	ASSERT(ilm->ilm_ill == ill);
958 
959 	ipif = ill->ill_ipif;
960 
961 	/* Update counters */
962 	if (no_ilg)
963 		ilm->ilm_no_ilg_cnt--;
964 
965 	if (leaving)
966 		ilm->ilm_refcnt--;
967 
968 	if (ilm->ilm_refcnt > 0)
969 		return (ilm_update_del(ilm, B_TRUE));
970 
971 	if (IN6_IS_ADDR_UNSPECIFIED(v6group)) {
972 		ilm_delete(ilm);
973 		/*
974 		 * Check how many ipif's that have members in this group -
975 		 * if there are still some left then don't tell the driver
976 		 * to drop it.
977 		 */
978 		if (ilm_numentries_v6(ill, v6group) != 0)
979 			return (0);
980 
981 		/*
982 		 * If we never joined, then don't leave.  This can happen
983 		 * if we're in an IPMP group, since only one ill per IPMP
984 		 * group receives all multicast packets.
985 		 */
986 		if (!ill->ill_join_allmulti) {
987 			ASSERT(ill->ill_group != NULL);
988 			return (0);
989 		}
990 
991 		ret = ip_leave_allmulti(ipif);
992 		if (ill->ill_group != NULL)
993 			(void) ill_nominate_mcast_rcv(ill->ill_group);
994 		return (ret);
995 	}
996 
997 	if (!IS_LOOPBACK(ill))
998 		mld_leavegroup(ilm);
999 
1000 	ilm_delete(ilm);
1001 	/*
1002 	 * Check how many ipif's that have members in this group -
1003 	 * if there are still some left then don't tell the driver
1004 	 * to drop it.
1005 	 */
1006 	if (ilm_numentries_v6(ill, v6group) != 0)
1007 		return (0);
1008 	return (ip_ll_delmulti_v6(ipif, v6group));
1009 }
1010 
1011 /*
1012  * Send a multicast request to the driver for disabling multicast reception
1013  * for v6groupp address. The caller has already checked whether it is
1014  * appropriate to send one or not.
1015  */
1016 int
1017 ip_ll_send_disabmulti_req(ill_t *ill, const in6_addr_t *v6groupp)
1018 {
1019 	mblk_t	*mp;
1020 	char	group_buf[INET6_ADDRSTRLEN];
1021 	uint32_t	addrlen, addroff;
1022 
1023 	ASSERT(IAM_WRITER_ILL(ill));
1024 	/*
1025 	 * Create a AR_ENTRY_SQUERY message with a dl_disabmulti_req tacked
1026 	 * on.
1027 	 */
1028 	mp = ill_create_dl(ill, DL_DISABMULTI_REQ,
1029 	    sizeof (dl_disabmulti_req_t), &addrlen, &addroff);
1030 
1031 	if (!mp)
1032 		return (ENOMEM);
1033 
1034 	if (IN6_IS_ADDR_V4MAPPED(v6groupp)) {
1035 		ipaddr_t v4group;
1036 
1037 		IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group);
1038 		/*
1039 		 * NOTE!!!
1040 		 * The "addroff" passed in here was calculated by
1041 		 * ill_create_dl(), and will be used by ill_create_squery()
1042 		 * to perform some twisted coding magic. It is the offset
1043 		 * into the dl_xxx_req of the hw addr. Here, it will be
1044 		 * added to b_wptr - b_rptr to create a magic number that
1045 		 * is not an offset into this mblk.
1046 		 *
1047 		 * Please see the comment in ip_ll_send)enabmulti_req()
1048 		 * for a complete explanation.
1049 		 *
1050 		 * Look in ar_entry_squery() in arp.c to see how this offset
1051 		 * is used.
1052 		 */
1053 		mp = ill_create_squery(ill, v4group, addrlen, addroff, mp);
1054 		if (!mp)
1055 			return (ENOMEM);
1056 		ip1dbg(("ip_ll_send_disabmulti_req: IPv4 putnext %s on %s\n",
1057 		    inet_ntop(AF_INET6, v6groupp, group_buf,
1058 		    sizeof (group_buf)),
1059 		    ill->ill_name));
1060 		putnext(ill->ill_rq, mp);
1061 	} else {
1062 		ip1dbg(("ip_ll_send_disabmulti_req: IPv6 ndp_mcastreq %s on"
1063 		    " %s\n",
1064 		    inet_ntop(AF_INET6, v6groupp, group_buf,
1065 		    sizeof (group_buf)),
1066 		    ill->ill_name));
1067 		return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp));
1068 	}
1069 	return (0);
1070 }
1071 
1072 /*
1073  * Send a multicast request to the driver for disabling multicast
1074  * membership for v6group if appropriate.
1075  */
1076 static int
1077 ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *v6group)
1078 {
1079 	ill_t	*ill = ipif->ipif_ill;
1080 
1081 	ASSERT(IAM_WRITER_IPIF(ipif));
1082 
1083 	if (ill->ill_net_type != IRE_IF_RESOLVER ||
1084 	    ipif->ipif_flags & IPIF_POINTOPOINT) {
1085 		return (0);	/* Must be IRE_IF_NORESOLVER */
1086 	}
1087 	if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) {
1088 		ip1dbg(("ip_ll_delmulti_v6: MULTI_BCAST\n"));
1089 		return (0);
1090 	}
1091 	if (!ill->ill_dl_up) {
1092 		/*
1093 		 * Nobody there. All multicast addresses will be re-joined
1094 		 * when we get the DL_BIND_ACK bringing the interface up.
1095 		 */
1096 		ip1dbg(("ip_ll_delmulti_v6: nobody up\n"));
1097 		return (0);
1098 	}
1099 	return (ip_ll_send_disabmulti_req(ill, v6group));
1100 }
1101 
1102 /*
1103  * Make the driver pass up all multicast packets
1104  *
1105  * With ill groups, the caller makes sure that there is only
1106  * one ill joining the allmulti group.
1107  */
1108 int
1109 ip_join_allmulti(ipif_t *ipif)
1110 {
1111 	ill_t	*ill = ipif->ipif_ill;
1112 	mblk_t	*mp;
1113 	uint32_t	addrlen, addroff;
1114 
1115 	ASSERT(IAM_WRITER_IPIF(ipif));
1116 
1117 	if (!ill->ill_dl_up) {
1118 		/*
1119 		 * Nobody there. All multicast addresses will be re-joined
1120 		 * when we get the DL_BIND_ACK bringing the interface up.
1121 		 */
1122 		return (0);
1123 	}
1124 
1125 	ASSERT(!ill->ill_join_allmulti);
1126 
1127 	/*
1128 	 * Create a DL_PROMISCON_REQ message and send it directly to
1129 	 * the DLPI provider.  We don't need to do this for certain
1130 	 * media types for which we never need to turn promiscuous
1131 	 * mode on.
1132 	 */
1133 	if ((ill->ill_net_type == IRE_IF_RESOLVER) &&
1134 	    !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) {
1135 		mp = ill_create_dl(ill, DL_PROMISCON_REQ,
1136 		    sizeof (dl_promiscon_req_t), &addrlen, &addroff);
1137 		if (mp == NULL)
1138 			return (ENOMEM);
1139 		ill_dlpi_send(ill, mp);
1140 	}
1141 
1142 	ill->ill_join_allmulti = B_TRUE;
1143 	return (0);
1144 }
1145 
1146 /*
1147  * Make the driver stop passing up all multicast packets
1148  *
1149  * With ill groups, we need to nominate some other ill as
1150  * this ipif->ipif_ill is leaving the group.
1151  */
1152 int
1153 ip_leave_allmulti(ipif_t *ipif)
1154 {
1155 	ill_t	*ill = ipif->ipif_ill;
1156 	mblk_t	*mp;
1157 	uint32_t	addrlen, addroff;
1158 
1159 	ASSERT(IAM_WRITER_IPIF(ipif));
1160 
1161 	if (!ill->ill_dl_up) {
1162 		/*
1163 		 * Nobody there. All multicast addresses will be re-joined
1164 		 * when we get the DL_BIND_ACK bringing the interface up.
1165 		 */
1166 		return (0);
1167 	}
1168 
1169 	ASSERT(ill->ill_join_allmulti);
1170 
1171 	/*
1172 	 * Create a DL_PROMISCOFF_REQ message and send it directly to
1173 	 * the DLPI provider.  We don't need to do this for certain
1174 	 * media types for which we never need to turn promiscuous
1175 	 * mode on.
1176 	 */
1177 	if ((ill->ill_net_type == IRE_IF_RESOLVER) &&
1178 	    !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) {
1179 		mp = ill_create_dl(ill, DL_PROMISCOFF_REQ,
1180 		    sizeof (dl_promiscoff_req_t), &addrlen, &addroff);
1181 		if (mp == NULL)
1182 			return (ENOMEM);
1183 		ill_dlpi_send(ill, mp);
1184 	}
1185 
1186 	ill->ill_join_allmulti = B_FALSE;
1187 	return (0);
1188 }
1189 
1190 /*
1191  * Copy mp_orig and pass it in as a local message.
1192  */
1193 void
1194 ip_multicast_loopback(queue_t *q, ill_t *ill, mblk_t *mp_orig, int fanout_flags,
1195     zoneid_t zoneid)
1196 {
1197 	mblk_t	*mp;
1198 	mblk_t	*ipsec_mp;
1199 	ipha_t	*iph;
1200 	ip_stack_t *ipst = ill->ill_ipst;
1201 
1202 	if (DB_TYPE(mp_orig) == M_DATA &&
1203 	    ((ipha_t *)mp_orig->b_rptr)->ipha_protocol == IPPROTO_UDP) {
1204 		uint_t hdrsz;
1205 
1206 		hdrsz = IPH_HDR_LENGTH((ipha_t *)mp_orig->b_rptr) +
1207 		    sizeof (udpha_t);
1208 		ASSERT(MBLKL(mp_orig) >= hdrsz);
1209 
1210 		if (((mp = allocb(hdrsz, BPRI_MED)) != NULL) &&
1211 		    (mp_orig = dupmsg(mp_orig)) != NULL) {
1212 			bcopy(mp_orig->b_rptr, mp->b_rptr, hdrsz);
1213 			mp->b_wptr += hdrsz;
1214 			mp->b_cont = mp_orig;
1215 			mp_orig->b_rptr += hdrsz;
1216 			if (is_system_labeled() && DB_CRED(mp_orig) != NULL)
1217 				mblk_setcred(mp, DB_CRED(mp_orig));
1218 			if (MBLKL(mp_orig) == 0) {
1219 				mp->b_cont = mp_orig->b_cont;
1220 				mp_orig->b_cont = NULL;
1221 				freeb(mp_orig);
1222 			}
1223 		} else if (mp != NULL) {
1224 			freeb(mp);
1225 			mp = NULL;
1226 		}
1227 	} else {
1228 		mp = ip_copymsg(mp_orig); /* No refcnt on ipsec_out netstack */
1229 	}
1230 
1231 	if (mp == NULL)
1232 		return;
1233 	if (DB_TYPE(mp) == M_CTL) {
1234 		ipsec_mp = mp;
1235 		mp = mp->b_cont;
1236 	} else {
1237 		ipsec_mp = mp;
1238 	}
1239 
1240 	iph = (ipha_t *)mp->b_rptr;
1241 
1242 	DTRACE_PROBE4(ip4__loopback__out__start,
1243 	    ill_t *, NULL, ill_t *, ill,
1244 	    ipha_t *, iph, mblk_t *, ipsec_mp);
1245 
1246 	FW_HOOKS(ipst->ips_ip4_loopback_out_event,
1247 	    ipst->ips_ipv4firewall_loopback_out,
1248 	    NULL, ill, iph, ipsec_mp, mp, HPE_MULTICAST, ipst);
1249 
1250 	DTRACE_PROBE1(ip4__loopback__out__end, mblk_t *, ipsec_mp);
1251 
1252 	if (ipsec_mp != NULL)
1253 		ip_wput_local(q, ill, iph, ipsec_mp, NULL,
1254 		    fanout_flags, zoneid);
1255 }
1256 
1257 static area_t	ip_aresq_template = {
1258 	AR_ENTRY_SQUERY,		/* cmd */
1259 	sizeof (area_t)+IP_ADDR_LEN,	/* name offset */
1260 	sizeof (area_t),	/* name len (filled by ill_arp_alloc) */
1261 	IP_ARP_PROTO_TYPE,		/* protocol, from arps perspective */
1262 	sizeof (area_t),			/* proto addr offset */
1263 	IP_ADDR_LEN,			/* proto addr_length */
1264 	0,				/* proto mask offset */
1265 	/* Rest is initialized when used */
1266 	0,				/* flags */
1267 	0,				/* hw addr offset */
1268 	0,				/* hw addr length */
1269 };
1270 
1271 static mblk_t *
1272 ill_create_squery(ill_t *ill, ipaddr_t ipaddr, uint32_t addrlen,
1273     uint32_t addroff, mblk_t *mp_tail)
1274 {
1275 	mblk_t	*mp;
1276 	area_t	*area;
1277 
1278 	mp = ill_arp_alloc(ill, (uchar_t *)&ip_aresq_template,
1279 	    (caddr_t)&ipaddr);
1280 	if (!mp) {
1281 		freemsg(mp_tail);
1282 		return (NULL);
1283 	}
1284 	area = (area_t *)mp->b_rptr;
1285 	area->area_hw_addr_length = addrlen;
1286 	area->area_hw_addr_offset = mp->b_wptr - mp->b_rptr + addroff;
1287 	/*
1288 	 * NOTE!
1289 	 *
1290 	 * The area_hw_addr_offset, as can be seen, does not hold the
1291 	 * actual hardware address offset. Rather, it holds the offset
1292 	 * to the hw addr in the dl_xxx_req in mp_tail, modified by
1293 	 * adding (mp->b_wptr - mp->b_rptr). This allows the function
1294 	 * mi_offset_paramc() to find the hardware address in the
1295 	 * *second* mblk (dl_xxx_req), not this mblk.
1296 	 *
1297 	 * Using mi_offset_paramc() is thus the *only* way to access
1298 	 * the dl_xxx_hw address.
1299 	 *
1300 	 * The squery hw address should *not* be accessed.
1301 	 *
1302 	 * See ar_entry_squery() in arp.c for an example of how all this works.
1303 	 */
1304 
1305 	mp->b_cont = mp_tail;
1306 	return (mp);
1307 }
1308 
1309 /*
1310  * Create a dlpi message with room for phys+sap. When we come back in
1311  * ip_wput_ctl() we will strip the sap for those primitives which
1312  * only need a physical address.
1313  */
1314 static mblk_t *
1315 ill_create_dl(ill_t *ill, uint32_t dl_primitive, uint32_t length,
1316     uint32_t *addr_lenp, uint32_t *addr_offp)
1317 {
1318 	mblk_t	*mp;
1319 	uint32_t	hw_addr_length;
1320 	char		*cp;
1321 	uint32_t	offset;
1322 	uint32_t 	size;
1323 
1324 	*addr_lenp = *addr_offp = 0;
1325 
1326 	hw_addr_length = ill->ill_phys_addr_length;
1327 	if (!hw_addr_length) {
1328 		ip0dbg(("ip_create_dl: hw addr length = 0\n"));
1329 		return (NULL);
1330 	}
1331 
1332 	size = length;
1333 	switch (dl_primitive) {
1334 	case DL_ENABMULTI_REQ:
1335 	case DL_DISABMULTI_REQ:
1336 		size += hw_addr_length;
1337 		break;
1338 	case DL_PROMISCON_REQ:
1339 	case DL_PROMISCOFF_REQ:
1340 		break;
1341 	default:
1342 		return (NULL);
1343 	}
1344 	mp = allocb(size, BPRI_HI);
1345 	if (!mp)
1346 		return (NULL);
1347 	mp->b_wptr += size;
1348 	mp->b_datap->db_type = M_PROTO;
1349 
1350 	cp = (char *)mp->b_rptr;
1351 	offset = length;
1352 
1353 	switch (dl_primitive) {
1354 	case DL_ENABMULTI_REQ: {
1355 		dl_enabmulti_req_t *dl = (dl_enabmulti_req_t *)cp;
1356 
1357 		dl->dl_primitive = dl_primitive;
1358 		dl->dl_addr_offset = offset;
1359 		*addr_lenp = dl->dl_addr_length = hw_addr_length;
1360 		*addr_offp = offset;
1361 		break;
1362 	}
1363 	case DL_DISABMULTI_REQ: {
1364 		dl_disabmulti_req_t *dl = (dl_disabmulti_req_t *)cp;
1365 
1366 		dl->dl_primitive = dl_primitive;
1367 		dl->dl_addr_offset = offset;
1368 		*addr_lenp = dl->dl_addr_length = hw_addr_length;
1369 		*addr_offp = offset;
1370 		break;
1371 	}
1372 	case DL_PROMISCON_REQ:
1373 	case DL_PROMISCOFF_REQ: {
1374 		dl_promiscon_req_t *dl = (dl_promiscon_req_t *)cp;
1375 
1376 		dl->dl_primitive = dl_primitive;
1377 		dl->dl_level = DL_PROMISC_MULTI;
1378 		break;
1379 	}
1380 	}
1381 	ip1dbg(("ill_create_dl: addr_len %d, addr_off %d\n",
1382 	    *addr_lenp, *addr_offp));
1383 	return (mp);
1384 }
1385 
1386 void
1387 ip_wput_ctl(queue_t *q, mblk_t *mp_orig)
1388 {
1389 	ill_t	*ill = (ill_t *)q->q_ptr;
1390 	mblk_t	*mp = mp_orig;
1391 	area_t	*area = (area_t *)mp->b_rptr;
1392 
1393 	/* Check that we have a AR_ENTRY_SQUERY with a tacked on mblk */
1394 	if (MBLKL(mp) < sizeof (area_t) || mp->b_cont == NULL ||
1395 	    area->area_cmd != AR_ENTRY_SQUERY) {
1396 		putnext(q, mp);
1397 		return;
1398 	}
1399 	mp = mp->b_cont;
1400 
1401 	/*
1402 	 * Update dl_addr_length and dl_addr_offset for primitives that
1403 	 * have physical addresses as opposed to full saps
1404 	 */
1405 	switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) {
1406 	case DL_ENABMULTI_REQ:
1407 		/* Track the state if this is the first enabmulti */
1408 		if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN)
1409 			ill->ill_dlpi_multicast_state = IDS_INPROGRESS;
1410 		ip1dbg(("ip_wput_ctl: ENABMULTI\n"));
1411 		break;
1412 	case DL_DISABMULTI_REQ:
1413 		ip1dbg(("ip_wput_ctl: DISABMULTI\n"));
1414 		break;
1415 	default:
1416 		ip1dbg(("ip_wput_ctl: default\n"));
1417 		break;
1418 	}
1419 	freeb(mp_orig);
1420 	ill_dlpi_send(ill, mp);
1421 }
1422 
1423 /*
1424  * Rejoin any groups which have been explicitly joined by the application (we
1425  * left all explicitly joined groups as part of ill_leave_multicast() prior to
1426  * bringing the interface down).  Note that because groups can be joined and
1427  * left while an interface is down, this may not be the same set of groups
1428  * that we left in ill_leave_multicast().
1429  */
1430 void
1431 ill_recover_multicast(ill_t *ill)
1432 {
1433 	ilm_t	*ilm;
1434 	char    addrbuf[INET6_ADDRSTRLEN];
1435 
1436 	ASSERT(IAM_WRITER_ILL(ill));
1437 	ILM_WALKER_HOLD(ill);
1438 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1439 		/*
1440 		 * Check how many ipif's that have members in this group -
1441 		 * if more then one we make sure that this entry is first
1442 		 * in the list.
1443 		 */
1444 		if (ilm_numentries_v6(ill, &ilm->ilm_v6addr) > 1 &&
1445 		    ilm_lookup_ill_v6(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm)
1446 			continue;
1447 		ip1dbg(("ill_recover_multicast: %s\n",
1448 		    inet_ntop(AF_INET6, &ilm->ilm_v6addr, addrbuf,
1449 		    sizeof (addrbuf))));
1450 		if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
1451 			if (ill->ill_group == NULL) {
1452 				(void) ip_join_allmulti(ill->ill_ipif);
1453 			} else {
1454 				/*
1455 				 * We don't want to join on this ill,
1456 				 * if somebody else in the group has
1457 				 * already been nominated.
1458 				 */
1459 				(void) ill_nominate_mcast_rcv(ill->ill_group);
1460 			}
1461 		} else {
1462 			(void) ip_ll_addmulti_v6(ill->ill_ipif,
1463 			    &ilm->ilm_v6addr);
1464 		}
1465 	}
1466 	ILM_WALKER_RELE(ill);
1467 }
1468 
1469 /*
1470  * The opposite of ill_recover_multicast() -- leaves all multicast groups
1471  * that were explicitly joined.  Note that both these functions could be
1472  * disposed of if we enhanced ARP to allow us to handle DL_DISABMULTI_REQ
1473  * and DL_ENABMULTI_REQ messages when an interface is down.
1474  */
1475 void
1476 ill_leave_multicast(ill_t *ill)
1477 {
1478 	ilm_t	*ilm;
1479 	char    addrbuf[INET6_ADDRSTRLEN];
1480 
1481 	ASSERT(IAM_WRITER_ILL(ill));
1482 	ILM_WALKER_HOLD(ill);
1483 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1484 		/*
1485 		 * Check how many ipif's that have members in this group -
1486 		 * if more then one we make sure that this entry is first
1487 		 * in the list.
1488 		 */
1489 		if (ilm_numentries_v6(ill, &ilm->ilm_v6addr) > 1 &&
1490 		    ilm_lookup_ill_v6(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm)
1491 			continue;
1492 		ip1dbg(("ill_leave_multicast: %s\n",
1493 		    inet_ntop(AF_INET6, &ilm->ilm_v6addr, addrbuf,
1494 		    sizeof (addrbuf))));
1495 		if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
1496 			(void) ip_leave_allmulti(ill->ill_ipif);
1497 			/*
1498 			 * If we were part of an IPMP group, then
1499 			 * ill_handoff_responsibility() has already
1500 			 * nominated a new member (so we don't).
1501 			 */
1502 			ASSERT(ill->ill_group == NULL);
1503 		} else {
1504 			(void) ip_ll_delmulti_v6(ill->ill_ipif,
1505 			    &ilm->ilm_v6addr);
1506 		}
1507 	}
1508 	ILM_WALKER_RELE(ill);
1509 }
1510 
1511 /* Find an ilm for matching the ill */
1512 ilm_t *
1513 ilm_lookup_ill(ill_t *ill, ipaddr_t group, zoneid_t zoneid)
1514 {
1515 	in6_addr_t	v6group;
1516 
1517 	ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock));
1518 	/*
1519 	 * INADDR_ANY is represented as the IPv6 unspecifed addr.
1520 	 */
1521 	if (group == INADDR_ANY)
1522 		v6group = ipv6_all_zeros;
1523 	else
1524 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1525 
1526 	return (ilm_lookup_ill_v6(ill, &v6group, zoneid));
1527 }
1528 
1529 /*
1530  * Find an ilm for matching the ill. All the ilm lookup functions
1531  * ignore ILM_DELETED ilms. These have been logically deleted, and
1532  * igmp and linklayer disable multicast have been done. Only mi_free
1533  * yet to be done. Still there in the list due to ilm_walkers. The
1534  * last walker will release it.
1535  */
1536 ilm_t *
1537 ilm_lookup_ill_v6(ill_t *ill, const in6_addr_t *v6group, zoneid_t zoneid)
1538 {
1539 	ilm_t	*ilm;
1540 
1541 	ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock));
1542 
1543 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1544 		if (ilm->ilm_flags & ILM_DELETED)
1545 			continue;
1546 		if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) &&
1547 		    (zoneid == ALL_ZONES || zoneid == ilm->ilm_zoneid))
1548 			return (ilm);
1549 	}
1550 	return (NULL);
1551 }
1552 
1553 ilm_t *
1554 ilm_lookup_ill_index_v6(ill_t *ill, const in6_addr_t *v6group, int index,
1555     zoneid_t zoneid)
1556 {
1557 	ilm_t *ilm;
1558 
1559 	ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock));
1560 
1561 	for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
1562 		if (ilm->ilm_flags & ILM_DELETED)
1563 			continue;
1564 		if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) &&
1565 		    (zoneid == ALL_ZONES || zoneid == ilm->ilm_zoneid) &&
1566 		    ilm->ilm_orig_ifindex == index) {
1567 			return (ilm);
1568 		}
1569 	}
1570 	return (NULL);
1571 }
1572 
1573 
1574 /*
1575  * Found an ilm for the ipif. Only needed for IPv4 which does
1576  * ipif specific socket options.
1577  */
1578 ilm_t *
1579 ilm_lookup_ipif(ipif_t *ipif, ipaddr_t group)
1580 {
1581 	ill_t	*ill = ipif->ipif_ill;
1582 	ilm_t	*ilm;
1583 	in6_addr_t	v6group;
1584 
1585 	ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock));
1586 	/*
1587 	 * INADDR_ANY is represented as the IPv6 unspecifed addr.
1588 	 */
1589 	if (group == INADDR_ANY)
1590 		v6group = ipv6_all_zeros;
1591 	else
1592 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1593 
1594 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1595 		if (ilm->ilm_flags & ILM_DELETED)
1596 			continue;
1597 		if (ilm->ilm_ipif == ipif &&
1598 		    IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, &v6group))
1599 			return (ilm);
1600 	}
1601 	return (NULL);
1602 }
1603 
1604 /*
1605  * How many members on this ill?
1606  */
1607 int
1608 ilm_numentries_v6(ill_t *ill, const in6_addr_t *v6group)
1609 {
1610 	ilm_t	*ilm;
1611 	int i = 0;
1612 
1613 	mutex_enter(&ill->ill_lock);
1614 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1615 		if (ilm->ilm_flags & ILM_DELETED)
1616 			continue;
1617 		if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group)) {
1618 			i++;
1619 		}
1620 	}
1621 	mutex_exit(&ill->ill_lock);
1622 	return (i);
1623 }
1624 
1625 /* Caller guarantees that the group is not already on the list */
1626 static ilm_t *
1627 ilm_add_v6(ipif_t *ipif, const in6_addr_t *v6group, ilg_stat_t ilgstat,
1628     mcast_record_t ilg_fmode, slist_t *ilg_flist, int orig_ifindex,
1629     zoneid_t zoneid)
1630 {
1631 	ill_t	*ill = ipif->ipif_ill;
1632 	ilm_t	*ilm;
1633 	ilm_t	*ilm_cur;
1634 	ilm_t	**ilm_ptpn;
1635 
1636 	ASSERT(IAM_WRITER_IPIF(ipif));
1637 
1638 	ilm = GETSTRUCT(ilm_t, 1);
1639 	if (ilm == NULL)
1640 		return (NULL);
1641 	if (ilgstat != ILGSTAT_NONE && !SLIST_IS_EMPTY(ilg_flist)) {
1642 		ilm->ilm_filter = l_alloc();
1643 		if (ilm->ilm_filter == NULL) {
1644 			mi_free(ilm);
1645 			return (NULL);
1646 		}
1647 	}
1648 	ilm->ilm_v6addr = *v6group;
1649 	ilm->ilm_refcnt = 1;
1650 	ilm->ilm_zoneid = zoneid;
1651 	ilm->ilm_timer = INFINITY;
1652 	ilm->ilm_rtx.rtx_timer = INFINITY;
1653 
1654 	/*
1655 	 * IPv4 Multicast groups are joined using ipif.
1656 	 * IPv6 Multicast groups are joined using ill.
1657 	 */
1658 	if (ill->ill_isv6) {
1659 		ilm->ilm_ill = ill;
1660 		ilm->ilm_ipif = NULL;
1661 		DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ill,
1662 		    (char *), "ilm", (void *), ilm);
1663 		ill->ill_cnt_ilm++;
1664 	} else {
1665 		ASSERT(ilm->ilm_zoneid == ipif->ipif_zoneid);
1666 		ilm->ilm_ipif = ipif;
1667 		ilm->ilm_ill = NULL;
1668 		DTRACE_PROBE3(ipif__incr__cnt, (ipif_t *), ipif,
1669 		    (char *), "ilm", (void *), ilm);
1670 		ipif->ipif_cnt_ilm++;
1671 	}
1672 	ASSERT(ill->ill_ipst);
1673 	ilm->ilm_ipst = ill->ill_ipst;	/* No netstack_hold */
1674 
1675 	/*
1676 	 * After this if ilm moves to a new ill, we don't change
1677 	 * the ilm_orig_ifindex. Thus, if ill_index != ilm_orig_ifindex,
1678 	 * it has been moved. Indexes don't match even when the application
1679 	 * wants to join on a FAILED/INACTIVE interface because we choose
1680 	 * a new interface to join in. This is considered as an implicit
1681 	 * move.
1682 	 */
1683 	ilm->ilm_orig_ifindex = orig_ifindex;
1684 
1685 	ASSERT(!(ipif->ipif_state_flags & IPIF_CONDEMNED));
1686 	ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED));
1687 
1688 	/*
1689 	 * Grab lock to give consistent view to readers
1690 	 */
1691 	mutex_enter(&ill->ill_lock);
1692 	/*
1693 	 * All ilms in the same zone are contiguous in the ill_ilm list.
1694 	 * The loops in ip_proto_input() and ip_wput_local() use this to avoid
1695 	 * sending duplicates up when two applications in the same zone join the
1696 	 * same group on different logical interfaces.
1697 	 */
1698 	ilm_cur = ill->ill_ilm;
1699 	ilm_ptpn = &ill->ill_ilm;
1700 	while (ilm_cur != NULL && ilm_cur->ilm_zoneid != ilm->ilm_zoneid) {
1701 		ilm_ptpn = &ilm_cur->ilm_next;
1702 		ilm_cur = ilm_cur->ilm_next;
1703 	}
1704 	ilm->ilm_next = ilm_cur;
1705 	*ilm_ptpn = ilm;
1706 
1707 	/*
1708 	 * If we have an associated ilg, use its filter state; if not,
1709 	 * default to (EXCLUDE, NULL) and set no_ilg_cnt to track this.
1710 	 */
1711 	if (ilgstat != ILGSTAT_NONE) {
1712 		if (!SLIST_IS_EMPTY(ilg_flist))
1713 			l_copy(ilg_flist, ilm->ilm_filter);
1714 		ilm->ilm_fmode = ilg_fmode;
1715 	} else {
1716 		ilm->ilm_no_ilg_cnt = 1;
1717 		ilm->ilm_fmode = MODE_IS_EXCLUDE;
1718 	}
1719 
1720 	mutex_exit(&ill->ill_lock);
1721 	return (ilm);
1722 }
1723 
1724 static void
1725 ilm_inactive(ilm_t *ilm)
1726 {
1727 	FREE_SLIST(ilm->ilm_filter);
1728 	FREE_SLIST(ilm->ilm_pendsrcs);
1729 	FREE_SLIST(ilm->ilm_rtx.rtx_allow);
1730 	FREE_SLIST(ilm->ilm_rtx.rtx_block);
1731 	ilm->ilm_ipst = NULL;
1732 	mi_free((char *)ilm);
1733 }
1734 
1735 void
1736 ilm_walker_cleanup(ill_t *ill)
1737 {
1738 	ilm_t	**ilmp;
1739 	ilm_t	*ilm;
1740 	boolean_t need_wakeup = B_FALSE;
1741 
1742 	ASSERT(MUTEX_HELD(&ill->ill_lock));
1743 	ASSERT(ill->ill_ilm_walker_cnt == 0);
1744 
1745 	ilmp = &ill->ill_ilm;
1746 	while (*ilmp != NULL) {
1747 		if ((*ilmp)->ilm_flags & ILM_DELETED) {
1748 			ilm = *ilmp;
1749 			*ilmp = ilm->ilm_next;
1750 			/*
1751 			 * check if there are any pending FREE or unplumb
1752 			 * operations that need to be restarted.
1753 			 */
1754 			if (ilm->ilm_ipif != NULL) {
1755 				/*
1756 				 * IPv4 ilms hold a ref on the ipif.
1757 				 */
1758 				DTRACE_PROBE3(ipif__decr__cnt,
1759 				    (ipif_t *), ilm->ilm_ipif,
1760 				    (char *), "ilm", (void *), ilm);
1761 				ilm->ilm_ipif->ipif_cnt_ilm--;
1762 				if (IPIF_FREE_OK(ilm->ilm_ipif))
1763 					need_wakeup = B_TRUE;
1764 			} else {
1765 				/*
1766 				 * IPv6 ilms hold a ref on the ill.
1767 				 */
1768 				ASSERT(ilm->ilm_ill == ill);
1769 				DTRACE_PROBE3(ill__decr__cnt,
1770 				    (ill_t *), ill,
1771 				    (char *), "ilm", (void *), ilm);
1772 				ill->ill_cnt_ilm--;
1773 				if (ILL_FREE_OK(ill))
1774 					need_wakeup = B_TRUE;
1775 			}
1776 			ilm_inactive(ilm); /* frees ilm */
1777 		} else {
1778 			ilmp = &(*ilmp)->ilm_next;
1779 		}
1780 	}
1781 	ill->ill_ilm_cleanup_reqd = 0;
1782 	if (need_wakeup)
1783 		ipif_ill_refrele_tail(ill);
1784 	else
1785 		mutex_exit(&ill->ill_lock);
1786 }
1787 
1788 /*
1789  * Unlink ilm and free it.
1790  */
1791 static void
1792 ilm_delete(ilm_t *ilm)
1793 {
1794 	ill_t		*ill;
1795 	ilm_t		**ilmp;
1796 	boolean_t	need_wakeup;
1797 
1798 
1799 	if (ilm->ilm_ipif != NULL) {
1800 		ASSERT(IAM_WRITER_IPIF(ilm->ilm_ipif));
1801 		ASSERT(ilm->ilm_ill == NULL);
1802 		ill = ilm->ilm_ipif->ipif_ill;
1803 		ASSERT(!ill->ill_isv6);
1804 	} else {
1805 		ASSERT(IAM_WRITER_ILL(ilm->ilm_ill));
1806 		ASSERT(ilm->ilm_ipif == NULL);
1807 		ill = ilm->ilm_ill;
1808 		ASSERT(ill->ill_isv6);
1809 	}
1810 	/*
1811 	 * Delete under lock protection so that readers don't stumble
1812 	 * on bad ilm_next
1813 	 */
1814 	mutex_enter(&ill->ill_lock);
1815 	if (ill->ill_ilm_walker_cnt != 0) {
1816 		ilm->ilm_flags |= ILM_DELETED;
1817 		ill->ill_ilm_cleanup_reqd = 1;
1818 		mutex_exit(&ill->ill_lock);
1819 		return;
1820 	}
1821 
1822 	for (ilmp = &ill->ill_ilm; *ilmp != ilm; ilmp = &(*ilmp)->ilm_next)
1823 				;
1824 	*ilmp = ilm->ilm_next;
1825 
1826 	/*
1827 	 * if we are the last reference to the ipif (for IPv4 ilms)
1828 	 * or the ill (for IPv6 ilms), we may need to wakeup any
1829 	 * pending FREE or unplumb operations.
1830 	 */
1831 	need_wakeup = B_FALSE;
1832 	if (ilm->ilm_ipif != NULL) {
1833 		DTRACE_PROBE3(ipif__decr__cnt, (ipif_t *), ilm->ilm_ipif,
1834 		    (char *), "ilm", (void *), ilm);
1835 		ilm->ilm_ipif->ipif_cnt_ilm--;
1836 		if (IPIF_FREE_OK(ilm->ilm_ipif))
1837 			need_wakeup = B_TRUE;
1838 	} else {
1839 		DTRACE_PROBE3(ill__decr__cnt, (ill_t *), ill,
1840 		    (char *), "ilm", (void *), ilm);
1841 		ill->ill_cnt_ilm--;
1842 		if (ILL_FREE_OK(ill))
1843 			need_wakeup = B_TRUE;
1844 	}
1845 
1846 	ilm_inactive(ilm); /* frees this ilm */
1847 
1848 	if (need_wakeup) {
1849 		/* drops ill lock */
1850 		ipif_ill_refrele_tail(ill);
1851 	} else {
1852 		mutex_exit(&ill->ill_lock);
1853 	}
1854 }
1855 
1856 
1857 /*
1858  * Looks up the appropriate ipif given a v4 multicast group and interface
1859  * address.  On success, returns 0, with *ipifpp pointing to the found
1860  * struct.  On failure, returns an errno and *ipifpp is NULL.
1861  */
1862 int
1863 ip_opt_check(conn_t *connp, ipaddr_t group, ipaddr_t src, ipaddr_t ifaddr,
1864     uint_t *ifindexp, mblk_t *first_mp, ipsq_func_t func, ipif_t **ipifpp)
1865 {
1866 	ipif_t *ipif;
1867 	int err = 0;
1868 	zoneid_t zoneid;
1869 	ip_stack_t	*ipst =  connp->conn_netstack->netstack_ip;
1870 
1871 	if (!CLASSD(group) || CLASSD(src)) {
1872 		return (EINVAL);
1873 	}
1874 	*ipifpp = NULL;
1875 
1876 	zoneid = IPCL_ZONEID(connp);
1877 
1878 	ASSERT(!(ifaddr != INADDR_ANY && ifindexp != NULL && *ifindexp != 0));
1879 	if (ifaddr != INADDR_ANY) {
1880 		ipif = ipif_lookup_addr(ifaddr, NULL, zoneid,
1881 		    CONNP_TO_WQ(connp), first_mp, func, &err, ipst);
1882 		if (err != 0 && err != EINPROGRESS)
1883 			err = EADDRNOTAVAIL;
1884 	} else if (ifindexp != NULL && *ifindexp != 0) {
1885 		ipif = ipif_lookup_on_ifindex(*ifindexp, B_FALSE, zoneid,
1886 		    CONNP_TO_WQ(connp), first_mp, func, &err, ipst);
1887 	} else {
1888 		ipif = ipif_lookup_group(group, zoneid, ipst);
1889 		if (ipif == NULL)
1890 			return (EADDRNOTAVAIL);
1891 	}
1892 	if (ipif == NULL)
1893 		return (err);
1894 
1895 	*ipifpp = ipif;
1896 	return (0);
1897 }
1898 
1899 /*
1900  * Looks up the appropriate ill (or ipif if v4mapped) given an interface
1901  * index and IPv6 multicast group.  On success, returns 0, with *illpp (or
1902  * *ipifpp if v4mapped) pointing to the found struct.  On failure, returns
1903  * an errno and *illpp and *ipifpp are undefined.
1904  */
1905 int
1906 ip_opt_check_v6(conn_t *connp, const in6_addr_t *v6group, ipaddr_t *v4group,
1907     const in6_addr_t *v6src, ipaddr_t *v4src, boolean_t *isv6, int ifindex,
1908     mblk_t *first_mp, ipsq_func_t func, ill_t **illpp, ipif_t **ipifpp)
1909 {
1910 	boolean_t src_unspec;
1911 	ill_t *ill = NULL;
1912 	ipif_t *ipif = NULL;
1913 	int err;
1914 	zoneid_t zoneid = connp->conn_zoneid;
1915 	queue_t *wq = CONNP_TO_WQ(connp);
1916 	ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
1917 
1918 	src_unspec = IN6_IS_ADDR_UNSPECIFIED(v6src);
1919 
1920 	if (IN6_IS_ADDR_V4MAPPED(v6group)) {
1921 		if (!IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec)
1922 			return (EINVAL);
1923 		IN6_V4MAPPED_TO_IPADDR(v6group, *v4group);
1924 		if (src_unspec) {
1925 			*v4src = INADDR_ANY;
1926 		} else {
1927 			IN6_V4MAPPED_TO_IPADDR(v6src, *v4src);
1928 		}
1929 		if (!CLASSD(*v4group) || CLASSD(*v4src))
1930 			return (EINVAL);
1931 		*ipifpp = NULL;
1932 		*isv6 = B_FALSE;
1933 	} else {
1934 		if (IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec)
1935 			return (EINVAL);
1936 		if (!IN6_IS_ADDR_MULTICAST(v6group) ||
1937 		    IN6_IS_ADDR_MULTICAST(v6src)) {
1938 			return (EINVAL);
1939 		}
1940 		*illpp = NULL;
1941 		*isv6 = B_TRUE;
1942 	}
1943 
1944 	if (ifindex == 0) {
1945 		if (*isv6)
1946 			ill = ill_lookup_group_v6(v6group, zoneid, ipst);
1947 		else
1948 			ipif = ipif_lookup_group(*v4group, zoneid, ipst);
1949 		if (ill == NULL && ipif == NULL)
1950 			return (EADDRNOTAVAIL);
1951 	} else {
1952 		if (*isv6) {
1953 			ill = ill_lookup_on_ifindex(ifindex, B_TRUE,
1954 			    wq, first_mp, func, &err, ipst);
1955 			if (ill != NULL &&
1956 			    !ipif_lookup_zoneid(ill, zoneid, 0, NULL)) {
1957 				ill_refrele(ill);
1958 				ill = NULL;
1959 				err = EADDRNOTAVAIL;
1960 			}
1961 		} else {
1962 			ipif = ipif_lookup_on_ifindex(ifindex, B_FALSE,
1963 			    zoneid, wq, first_mp, func, &err, ipst);
1964 		}
1965 		if (ill == NULL && ipif == NULL)
1966 			return (err);
1967 	}
1968 
1969 	*ipifpp = ipif;
1970 	*illpp = ill;
1971 	return (0);
1972 }
1973 
1974 static int
1975 ip_get_srcfilter(conn_t *connp, struct group_filter *gf,
1976     struct ip_msfilter *imsf, ipaddr_t grp, ipif_t *ipif, boolean_t isv4mapped)
1977 {
1978 	ilg_t *ilg;
1979 	int i, numsrc, fmode, outsrcs;
1980 	struct sockaddr_in *sin;
1981 	struct sockaddr_in6 *sin6;
1982 	struct in_addr *addrp;
1983 	slist_t *fp;
1984 	boolean_t is_v4only_api;
1985 
1986 	mutex_enter(&connp->conn_lock);
1987 
1988 	ilg = ilg_lookup_ipif(connp, grp, ipif);
1989 	if (ilg == NULL) {
1990 		mutex_exit(&connp->conn_lock);
1991 		return (EADDRNOTAVAIL);
1992 	}
1993 
1994 	if (gf == NULL) {
1995 		ASSERT(imsf != NULL);
1996 		ASSERT(!isv4mapped);
1997 		is_v4only_api = B_TRUE;
1998 		outsrcs = imsf->imsf_numsrc;
1999 	} else {
2000 		ASSERT(imsf == NULL);
2001 		is_v4only_api = B_FALSE;
2002 		outsrcs = gf->gf_numsrc;
2003 	}
2004 
2005 	/*
2006 	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
2007 	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
2008 	 * So we need to translate here.
2009 	 */
2010 	fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ?
2011 	    MCAST_INCLUDE : MCAST_EXCLUDE;
2012 	if ((fp = ilg->ilg_filter) == NULL) {
2013 		numsrc = 0;
2014 	} else {
2015 		for (i = 0; i < outsrcs; i++) {
2016 			if (i == fp->sl_numsrc)
2017 				break;
2018 			if (isv4mapped) {
2019 				sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i];
2020 				sin6->sin6_family = AF_INET6;
2021 				sin6->sin6_addr = fp->sl_addr[i];
2022 			} else {
2023 				if (is_v4only_api) {
2024 					addrp = &imsf->imsf_slist[i];
2025 				} else {
2026 					sin = (struct sockaddr_in *)
2027 					    &gf->gf_slist[i];
2028 					sin->sin_family = AF_INET;
2029 					addrp = &sin->sin_addr;
2030 				}
2031 				IN6_V4MAPPED_TO_INADDR(&fp->sl_addr[i], addrp);
2032 			}
2033 		}
2034 		numsrc = fp->sl_numsrc;
2035 	}
2036 
2037 	if (is_v4only_api) {
2038 		imsf->imsf_numsrc = numsrc;
2039 		imsf->imsf_fmode = fmode;
2040 	} else {
2041 		gf->gf_numsrc = numsrc;
2042 		gf->gf_fmode = fmode;
2043 	}
2044 
2045 	mutex_exit(&connp->conn_lock);
2046 
2047 	return (0);
2048 }
2049 
2050 static int
2051 ip_get_srcfilter_v6(conn_t *connp, struct group_filter *gf,
2052     const struct in6_addr *grp, ill_t *ill)
2053 {
2054 	ilg_t *ilg;
2055 	int i;
2056 	struct sockaddr_storage *sl;
2057 	struct sockaddr_in6 *sin6;
2058 	slist_t *fp;
2059 
2060 	mutex_enter(&connp->conn_lock);
2061 
2062 	ilg = ilg_lookup_ill_v6(connp, grp, ill);
2063 	if (ilg == NULL) {
2064 		mutex_exit(&connp->conn_lock);
2065 		return (EADDRNOTAVAIL);
2066 	}
2067 
2068 	/*
2069 	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
2070 	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
2071 	 * So we need to translate here.
2072 	 */
2073 	gf->gf_fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ?
2074 	    MCAST_INCLUDE : MCAST_EXCLUDE;
2075 	if ((fp = ilg->ilg_filter) == NULL) {
2076 		gf->gf_numsrc = 0;
2077 	} else {
2078 		for (i = 0, sl = gf->gf_slist; i < gf->gf_numsrc; i++, sl++) {
2079 			if (i == fp->sl_numsrc)
2080 				break;
2081 			sin6 = (struct sockaddr_in6 *)sl;
2082 			sin6->sin6_family = AF_INET6;
2083 			sin6->sin6_addr = fp->sl_addr[i];
2084 		}
2085 		gf->gf_numsrc = fp->sl_numsrc;
2086 	}
2087 
2088 	mutex_exit(&connp->conn_lock);
2089 
2090 	return (0);
2091 }
2092 
2093 static int
2094 ip_set_srcfilter(conn_t *connp, struct group_filter *gf,
2095     struct ip_msfilter *imsf, ipaddr_t grp, ipif_t *ipif, boolean_t isv4mapped)
2096 {
2097 	ilg_t *ilg;
2098 	int i, err, insrcs, infmode, new_fmode;
2099 	struct sockaddr_in *sin;
2100 	struct sockaddr_in6 *sin6;
2101 	struct in_addr *addrp;
2102 	slist_t *orig_filter = NULL;
2103 	slist_t *new_filter = NULL;
2104 	mcast_record_t orig_fmode;
2105 	boolean_t leave_grp, is_v4only_api;
2106 	ilg_stat_t ilgstat;
2107 
2108 	if (gf == NULL) {
2109 		ASSERT(imsf != NULL);
2110 		ASSERT(!isv4mapped);
2111 		is_v4only_api = B_TRUE;
2112 		insrcs = imsf->imsf_numsrc;
2113 		infmode = imsf->imsf_fmode;
2114 	} else {
2115 		ASSERT(imsf == NULL);
2116 		is_v4only_api = B_FALSE;
2117 		insrcs = gf->gf_numsrc;
2118 		infmode = gf->gf_fmode;
2119 	}
2120 
2121 	/* Make sure we can handle the source list */
2122 	if (insrcs > MAX_FILTER_SIZE)
2123 		return (ENOBUFS);
2124 
2125 	/*
2126 	 * setting the filter to (INCLUDE, NULL) is treated
2127 	 * as a request to leave the group.
2128 	 */
2129 	leave_grp = (infmode == MCAST_INCLUDE && insrcs == 0);
2130 
2131 	ASSERT(IAM_WRITER_IPIF(ipif));
2132 
2133 	mutex_enter(&connp->conn_lock);
2134 
2135 	ilg = ilg_lookup_ipif(connp, grp, ipif);
2136 	if (ilg == NULL) {
2137 		/*
2138 		 * if the request was actually to leave, and we
2139 		 * didn't find an ilg, there's nothing to do.
2140 		 */
2141 		if (!leave_grp)
2142 			ilg = conn_ilg_alloc(connp);
2143 		if (leave_grp || ilg == NULL) {
2144 			mutex_exit(&connp->conn_lock);
2145 			return (leave_grp ? 0 : ENOMEM);
2146 		}
2147 		ilgstat = ILGSTAT_NEW;
2148 		IN6_IPADDR_TO_V4MAPPED(grp, &ilg->ilg_v6group);
2149 		ilg->ilg_ipif = ipif;
2150 		ilg->ilg_ill = NULL;
2151 		ilg->ilg_orig_ifindex = 0;
2152 	} else if (leave_grp) {
2153 		ilg_delete(connp, ilg, NULL);
2154 		mutex_exit(&connp->conn_lock);
2155 		(void) ip_delmulti(grp, ipif, B_FALSE, B_TRUE);
2156 		return (0);
2157 	} else {
2158 		ilgstat = ILGSTAT_CHANGE;
2159 		/* Preserve existing state in case ip_addmulti() fails */
2160 		orig_fmode = ilg->ilg_fmode;
2161 		if (ilg->ilg_filter == NULL) {
2162 			orig_filter = NULL;
2163 		} else {
2164 			orig_filter = l_alloc_copy(ilg->ilg_filter);
2165 			if (orig_filter == NULL) {
2166 				mutex_exit(&connp->conn_lock);
2167 				return (ENOMEM);
2168 			}
2169 		}
2170 	}
2171 
2172 	/*
2173 	 * Alloc buffer to copy new state into (see below) before
2174 	 * we make any changes, so we can bail if it fails.
2175 	 */
2176 	if ((new_filter = l_alloc()) == NULL) {
2177 		mutex_exit(&connp->conn_lock);
2178 		err = ENOMEM;
2179 		goto free_and_exit;
2180 	}
2181 
2182 	if (insrcs == 0) {
2183 		CLEAR_SLIST(ilg->ilg_filter);
2184 	} else {
2185 		slist_t *fp;
2186 		if (ilg->ilg_filter == NULL) {
2187 			fp = l_alloc();
2188 			if (fp == NULL) {
2189 				if (ilgstat == ILGSTAT_NEW)
2190 					ilg_delete(connp, ilg, NULL);
2191 				mutex_exit(&connp->conn_lock);
2192 				err = ENOMEM;
2193 				goto free_and_exit;
2194 			}
2195 		} else {
2196 			fp = ilg->ilg_filter;
2197 		}
2198 		for (i = 0; i < insrcs; i++) {
2199 			if (isv4mapped) {
2200 				sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i];
2201 				fp->sl_addr[i] = sin6->sin6_addr;
2202 			} else {
2203 				if (is_v4only_api) {
2204 					addrp = &imsf->imsf_slist[i];
2205 				} else {
2206 					sin = (struct sockaddr_in *)
2207 					    &gf->gf_slist[i];
2208 					addrp = &sin->sin_addr;
2209 				}
2210 				IN6_INADDR_TO_V4MAPPED(addrp, &fp->sl_addr[i]);
2211 			}
2212 		}
2213 		fp->sl_numsrc = insrcs;
2214 		ilg->ilg_filter = fp;
2215 	}
2216 	/*
2217 	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
2218 	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
2219 	 * So we need to translate here.
2220 	 */
2221 	ilg->ilg_fmode = (infmode == MCAST_INCLUDE) ?
2222 	    MODE_IS_INCLUDE : MODE_IS_EXCLUDE;
2223 
2224 	/*
2225 	 * Save copy of ilg's filter state to pass to other functions,
2226 	 * so we can release conn_lock now.
2227 	 */
2228 	new_fmode = ilg->ilg_fmode;
2229 	l_copy(ilg->ilg_filter, new_filter);
2230 
2231 	mutex_exit(&connp->conn_lock);
2232 
2233 	err = ip_addmulti(grp, ipif, ilgstat, new_fmode, new_filter);
2234 	if (err != 0) {
2235 		/*
2236 		 * Restore the original filter state, or delete the
2237 		 * newly-created ilg.  We need to look up the ilg
2238 		 * again, though, since we've not been holding the
2239 		 * conn_lock.
2240 		 */
2241 		mutex_enter(&connp->conn_lock);
2242 		ilg = ilg_lookup_ipif(connp, grp, ipif);
2243 		ASSERT(ilg != NULL);
2244 		if (ilgstat == ILGSTAT_NEW) {
2245 			ilg_delete(connp, ilg, NULL);
2246 		} else {
2247 			ilg->ilg_fmode = orig_fmode;
2248 			if (SLIST_IS_EMPTY(orig_filter)) {
2249 				CLEAR_SLIST(ilg->ilg_filter);
2250 			} else {
2251 				/*
2252 				 * We didn't free the filter, even if we
2253 				 * were trying to make the source list empty;
2254 				 * so if orig_filter isn't empty, the ilg
2255 				 * must still have a filter alloc'd.
2256 				 */
2257 				l_copy(orig_filter, ilg->ilg_filter);
2258 			}
2259 		}
2260 		mutex_exit(&connp->conn_lock);
2261 	}
2262 
2263 free_and_exit:
2264 	l_free(orig_filter);
2265 	l_free(new_filter);
2266 
2267 	return (err);
2268 }
2269 
2270 static int
2271 ip_set_srcfilter_v6(conn_t *connp, struct group_filter *gf,
2272     const struct in6_addr *grp, ill_t *ill)
2273 {
2274 	ilg_t *ilg;
2275 	int i, orig_ifindex, orig_fmode, new_fmode, err;
2276 	slist_t *orig_filter = NULL;
2277 	slist_t *new_filter = NULL;
2278 	struct sockaddr_storage *sl;
2279 	struct sockaddr_in6 *sin6;
2280 	boolean_t leave_grp;
2281 	ilg_stat_t ilgstat;
2282 
2283 	/* Make sure we can handle the source list */
2284 	if (gf->gf_numsrc > MAX_FILTER_SIZE)
2285 		return (ENOBUFS);
2286 
2287 	/*
2288 	 * setting the filter to (INCLUDE, NULL) is treated
2289 	 * as a request to leave the group.
2290 	 */
2291 	leave_grp = (gf->gf_fmode == MCAST_INCLUDE && gf->gf_numsrc == 0);
2292 
2293 	ASSERT(IAM_WRITER_ILL(ill));
2294 
2295 	/*
2296 	 * Use the ifindex to do the lookup.  We can't use the ill
2297 	 * directly because ilg_ill could point to a different ill
2298 	 * if things have moved.
2299 	 */
2300 	orig_ifindex = ill->ill_phyint->phyint_ifindex;
2301 
2302 	mutex_enter(&connp->conn_lock);
2303 	ilg = ilg_lookup_ill_index_v6(connp, grp, orig_ifindex);
2304 	if (ilg == NULL) {
2305 		/*
2306 		 * if the request was actually to leave, and we
2307 		 * didn't find an ilg, there's nothing to do.
2308 		 */
2309 		if (!leave_grp)
2310 			ilg = conn_ilg_alloc(connp);
2311 		if (leave_grp || ilg == NULL) {
2312 			mutex_exit(&connp->conn_lock);
2313 			return (leave_grp ? 0 : ENOMEM);
2314 		}
2315 		ilgstat = ILGSTAT_NEW;
2316 		ilg->ilg_v6group = *grp;
2317 		ilg->ilg_ipif = NULL;
2318 		/*
2319 		 * Choose our target ill to join on. This might be
2320 		 * different from the ill we've been given if it's
2321 		 * currently down and part of a group.
2322 		 *
2323 		 * new ill is not refheld; we are writer.
2324 		 */
2325 		ill = ip_choose_multi_ill(ill, grp);
2326 		ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED));
2327 		ilg->ilg_ill = ill;
2328 		/*
2329 		 * Remember the index that we joined on, so that we can
2330 		 * successfully delete them later on and also search for
2331 		 * duplicates if the application wants to join again.
2332 		 */
2333 		ilg->ilg_orig_ifindex = orig_ifindex;
2334 	} else if (leave_grp) {
2335 		/*
2336 		 * Use the ilg's current ill for the deletion,
2337 		 * we might have failed over.
2338 		 */
2339 		ill = ilg->ilg_ill;
2340 		ilg_delete(connp, ilg, NULL);
2341 		mutex_exit(&connp->conn_lock);
2342 		(void) ip_delmulti_v6(grp, ill, orig_ifindex,
2343 		    connp->conn_zoneid, B_FALSE, B_TRUE);
2344 		return (0);
2345 	} else {
2346 		ilgstat = ILGSTAT_CHANGE;
2347 		/*
2348 		 * The current ill might be different from the one we were
2349 		 * asked to join on (if failover has occurred); we should
2350 		 * join on the ill stored in the ilg.  The original ill
2351 		 * is noted in ilg_orig_ifindex, which matched our request.
2352 		 */
2353 		ill = ilg->ilg_ill;
2354 		/* preserve existing state in case ip_addmulti() fails */
2355 		orig_fmode = ilg->ilg_fmode;
2356 		if (ilg->ilg_filter == NULL) {
2357 			orig_filter = NULL;
2358 		} else {
2359 			orig_filter = l_alloc_copy(ilg->ilg_filter);
2360 			if (orig_filter == NULL) {
2361 				mutex_exit(&connp->conn_lock);
2362 				return (ENOMEM);
2363 			}
2364 		}
2365 	}
2366 
2367 	/*
2368 	 * Alloc buffer to copy new state into (see below) before
2369 	 * we make any changes, so we can bail if it fails.
2370 	 */
2371 	if ((new_filter = l_alloc()) == NULL) {
2372 		mutex_exit(&connp->conn_lock);
2373 		err = ENOMEM;
2374 		goto free_and_exit;
2375 	}
2376 
2377 	if (gf->gf_numsrc == 0) {
2378 		CLEAR_SLIST(ilg->ilg_filter);
2379 	} else {
2380 		slist_t *fp;
2381 		if (ilg->ilg_filter == NULL) {
2382 			fp = l_alloc();
2383 			if (fp == NULL) {
2384 				if (ilgstat == ILGSTAT_NEW)
2385 					ilg_delete(connp, ilg, NULL);
2386 				mutex_exit(&connp->conn_lock);
2387 				err = ENOMEM;
2388 				goto free_and_exit;
2389 			}
2390 		} else {
2391 			fp = ilg->ilg_filter;
2392 		}
2393 		for (i = 0, sl = gf->gf_slist; i < gf->gf_numsrc; i++, sl++) {
2394 			sin6 = (struct sockaddr_in6 *)sl;
2395 			fp->sl_addr[i] = sin6->sin6_addr;
2396 		}
2397 		fp->sl_numsrc = gf->gf_numsrc;
2398 		ilg->ilg_filter = fp;
2399 	}
2400 	/*
2401 	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
2402 	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
2403 	 * So we need to translate here.
2404 	 */
2405 	ilg->ilg_fmode = (gf->gf_fmode == MCAST_INCLUDE) ?
2406 	    MODE_IS_INCLUDE : MODE_IS_EXCLUDE;
2407 
2408 	/*
2409 	 * Save copy of ilg's filter state to pass to other functions,
2410 	 * so we can release conn_lock now.
2411 	 */
2412 	new_fmode = ilg->ilg_fmode;
2413 	l_copy(ilg->ilg_filter, new_filter);
2414 
2415 	mutex_exit(&connp->conn_lock);
2416 
2417 	err = ip_addmulti_v6(grp, ill, orig_ifindex, connp->conn_zoneid,
2418 	    ilgstat, new_fmode, new_filter);
2419 	if (err != 0) {
2420 		/*
2421 		 * Restore the original filter state, or delete the
2422 		 * newly-created ilg.  We need to look up the ilg
2423 		 * again, though, since we've not been holding the
2424 		 * conn_lock.
2425 		 */
2426 		mutex_enter(&connp->conn_lock);
2427 		ilg = ilg_lookup_ill_index_v6(connp, grp, orig_ifindex);
2428 		ASSERT(ilg != NULL);
2429 		if (ilgstat == ILGSTAT_NEW) {
2430 			ilg_delete(connp, ilg, NULL);
2431 		} else {
2432 			ilg->ilg_fmode = orig_fmode;
2433 			if (SLIST_IS_EMPTY(orig_filter)) {
2434 				CLEAR_SLIST(ilg->ilg_filter);
2435 			} else {
2436 				/*
2437 				 * We didn't free the filter, even if we
2438 				 * were trying to make the source list empty;
2439 				 * so if orig_filter isn't empty, the ilg
2440 				 * must still have a filter alloc'd.
2441 				 */
2442 				l_copy(orig_filter, ilg->ilg_filter);
2443 			}
2444 		}
2445 		mutex_exit(&connp->conn_lock);
2446 	}
2447 
2448 free_and_exit:
2449 	l_free(orig_filter);
2450 	l_free(new_filter);
2451 
2452 	return (err);
2453 }
2454 
2455 /*
2456  * Process the SIOC[GS]MSFILTER and SIOC[GS]IPMSFILTER ioctls.
2457  */
2458 /* ARGSUSED */
2459 int
2460 ip_sioctl_msfilter(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp,
2461     ip_ioctl_cmd_t *ipip, void *ifreq)
2462 {
2463 	struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
2464 	/* existence verified in ip_wput_nondata() */
2465 	mblk_t *data_mp = mp->b_cont->b_cont;
2466 	int datalen, err, cmd, minsize;
2467 	int expsize = 0;
2468 	conn_t *connp;
2469 	boolean_t isv6, is_v4only_api, getcmd;
2470 	struct sockaddr_in *gsin;
2471 	struct sockaddr_in6 *gsin6;
2472 	ipaddr_t v4grp;
2473 	in6_addr_t v6grp;
2474 	struct group_filter *gf = NULL;
2475 	struct ip_msfilter *imsf = NULL;
2476 	mblk_t *ndp;
2477 
2478 	if (data_mp->b_cont != NULL) {
2479 		if ((ndp = msgpullup(data_mp, -1)) == NULL)
2480 			return (ENOMEM);
2481 		freemsg(data_mp);
2482 		data_mp = ndp;
2483 		mp->b_cont->b_cont = data_mp;
2484 	}
2485 
2486 	cmd = iocp->ioc_cmd;
2487 	getcmd = (cmd == SIOCGIPMSFILTER || cmd == SIOCGMSFILTER);
2488 	is_v4only_api = (cmd == SIOCGIPMSFILTER || cmd == SIOCSIPMSFILTER);
2489 	minsize = (is_v4only_api) ? IP_MSFILTER_SIZE(0) : GROUP_FILTER_SIZE(0);
2490 	datalen = MBLKL(data_mp);
2491 
2492 	if (datalen < minsize)
2493 		return (EINVAL);
2494 
2495 	/*
2496 	 * now we know we have at least have the initial structure,
2497 	 * but need to check for the source list array.
2498 	 */
2499 	if (is_v4only_api) {
2500 		imsf = (struct ip_msfilter *)data_mp->b_rptr;
2501 		isv6 = B_FALSE;
2502 		expsize = IP_MSFILTER_SIZE(imsf->imsf_numsrc);
2503 	} else {
2504 		gf = (struct group_filter *)data_mp->b_rptr;
2505 		if (gf->gf_group.ss_family == AF_INET6) {
2506 			gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2507 			isv6 = !(IN6_IS_ADDR_V4MAPPED(&gsin6->sin6_addr));
2508 		} else {
2509 			isv6 = B_FALSE;
2510 		}
2511 		expsize = GROUP_FILTER_SIZE(gf->gf_numsrc);
2512 	}
2513 	if (datalen < expsize)
2514 		return (EINVAL);
2515 
2516 	connp = Q_TO_CONN(q);
2517 
2518 	/* operation not supported on the virtual network interface */
2519 	if (IS_VNI(ipif->ipif_ill))
2520 		return (EINVAL);
2521 
2522 	if (isv6) {
2523 		ill_t *ill = ipif->ipif_ill;
2524 		ill_refhold(ill);
2525 
2526 		gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2527 		v6grp = gsin6->sin6_addr;
2528 		if (getcmd)
2529 			err = ip_get_srcfilter_v6(connp, gf, &v6grp, ill);
2530 		else
2531 			err = ip_set_srcfilter_v6(connp, gf, &v6grp, ill);
2532 
2533 		ill_refrele(ill);
2534 	} else {
2535 		boolean_t isv4mapped = B_FALSE;
2536 		if (is_v4only_api) {
2537 			v4grp = (ipaddr_t)imsf->imsf_multiaddr.s_addr;
2538 		} else {
2539 			if (gf->gf_group.ss_family == AF_INET) {
2540 				gsin = (struct sockaddr_in *)&gf->gf_group;
2541 				v4grp = (ipaddr_t)gsin->sin_addr.s_addr;
2542 			} else {
2543 				gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2544 				IN6_V4MAPPED_TO_IPADDR(&gsin6->sin6_addr,
2545 				    v4grp);
2546 				isv4mapped = B_TRUE;
2547 			}
2548 		}
2549 		if (getcmd)
2550 			err = ip_get_srcfilter(connp, gf, imsf, v4grp, ipif,
2551 			    isv4mapped);
2552 		else
2553 			err = ip_set_srcfilter(connp, gf, imsf, v4grp, ipif,
2554 			    isv4mapped);
2555 	}
2556 
2557 	return (err);
2558 }
2559 
2560 /*
2561  * Finds the ipif based on information in the ioctl headers.  Needed to make
2562  * ip_process_ioctl() happy (it needs to know the ipif for IPI_WR-flagged
2563  * ioctls prior to calling the ioctl's handler function).
2564  */
2565 int
2566 ip_extract_msfilter(queue_t *q, mblk_t *mp, const ip_ioctl_cmd_t *ipip,
2567     cmd_info_t *ci, ipsq_func_t func)
2568 {
2569 	int cmd = ipip->ipi_cmd;
2570 	int err = 0;
2571 	conn_t *connp;
2572 	ipif_t *ipif;
2573 	/* caller has verified this mblk exists */
2574 	char *dbuf = (char *)mp->b_cont->b_cont->b_rptr;
2575 	struct ip_msfilter *imsf;
2576 	struct group_filter *gf;
2577 	ipaddr_t v4addr, v4grp;
2578 	in6_addr_t v6grp;
2579 	uint32_t index;
2580 	zoneid_t zoneid;
2581 	ip_stack_t *ipst;
2582 
2583 	connp = Q_TO_CONN(q);
2584 	zoneid = connp->conn_zoneid;
2585 	ipst = connp->conn_netstack->netstack_ip;
2586 
2587 	/* don't allow multicast operations on a tcp conn */
2588 	if (IPCL_IS_TCP(connp))
2589 		return (ENOPROTOOPT);
2590 
2591 	if (cmd == SIOCSIPMSFILTER || cmd == SIOCGIPMSFILTER) {
2592 		/* don't allow v4-specific ioctls on v6 socket */
2593 		if (connp->conn_af_isv6)
2594 			return (EAFNOSUPPORT);
2595 
2596 		imsf = (struct ip_msfilter *)dbuf;
2597 		v4addr = imsf->imsf_interface.s_addr;
2598 		v4grp = imsf->imsf_multiaddr.s_addr;
2599 		if (v4addr == INADDR_ANY) {
2600 			ipif = ipif_lookup_group(v4grp, zoneid, ipst);
2601 			if (ipif == NULL)
2602 				err = EADDRNOTAVAIL;
2603 		} else {
2604 			ipif = ipif_lookup_addr(v4addr, NULL, zoneid, q, mp,
2605 			    func, &err, ipst);
2606 		}
2607 	} else {
2608 		boolean_t isv6 = B_FALSE;
2609 		gf = (struct group_filter *)dbuf;
2610 		index = gf->gf_interface;
2611 		if (gf->gf_group.ss_family == AF_INET6) {
2612 			struct sockaddr_in6 *sin6;
2613 			sin6 = (struct sockaddr_in6 *)&gf->gf_group;
2614 			v6grp = sin6->sin6_addr;
2615 			if (IN6_IS_ADDR_V4MAPPED(&v6grp))
2616 				IN6_V4MAPPED_TO_IPADDR(&v6grp, v4grp);
2617 			else
2618 				isv6 = B_TRUE;
2619 		} else if (gf->gf_group.ss_family == AF_INET) {
2620 			struct sockaddr_in *sin;
2621 			sin = (struct sockaddr_in *)&gf->gf_group;
2622 			v4grp = sin->sin_addr.s_addr;
2623 		} else {
2624 			return (EAFNOSUPPORT);
2625 		}
2626 		if (index == 0) {
2627 			if (isv6) {
2628 				ipif = ipif_lookup_group_v6(&v6grp, zoneid,
2629 				    ipst);
2630 			} else {
2631 				ipif = ipif_lookup_group(v4grp, zoneid, ipst);
2632 			}
2633 			if (ipif == NULL)
2634 				err = EADDRNOTAVAIL;
2635 		} else {
2636 			ipif = ipif_lookup_on_ifindex(index, isv6, zoneid,
2637 			    q, mp, func, &err, ipst);
2638 		}
2639 	}
2640 
2641 	ci->ci_ipif = ipif;
2642 	return (err);
2643 }
2644 
2645 /*
2646  * The structures used for the SIOC*MSFILTER ioctls usually must be copied
2647  * in in two stages, as the first copyin tells us the size of the attached
2648  * source buffer.  This function is called by ip_wput_nondata() after the
2649  * first copyin has completed; it figures out how big the second stage
2650  * needs to be, and kicks it off.
2651  *
2652  * In some cases (numsrc < 2), the second copyin is not needed as the
2653  * first one gets a complete structure containing 1 source addr.
2654  *
2655  * The function returns 0 if a second copyin has been started (i.e. there's
2656  * no more work to be done right now), or 1 if the second copyin is not
2657  * needed and ip_wput_nondata() can continue its processing.
2658  */
2659 int
2660 ip_copyin_msfilter(queue_t *q, mblk_t *mp)
2661 {
2662 	struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
2663 	int cmd = iocp->ioc_cmd;
2664 	/* validity of this checked in ip_wput_nondata() */
2665 	mblk_t *mp1 = mp->b_cont->b_cont;
2666 	int copysize = 0;
2667 	int offset;
2668 
2669 	if (cmd == SIOCSMSFILTER || cmd == SIOCGMSFILTER) {
2670 		struct group_filter *gf = (struct group_filter *)mp1->b_rptr;
2671 		if (gf->gf_numsrc >= 2) {
2672 			offset = sizeof (struct group_filter);
2673 			copysize = GROUP_FILTER_SIZE(gf->gf_numsrc) - offset;
2674 		}
2675 	} else {
2676 		struct ip_msfilter *imsf = (struct ip_msfilter *)mp1->b_rptr;
2677 		if (imsf->imsf_numsrc >= 2) {
2678 			offset = sizeof (struct ip_msfilter);
2679 			copysize = IP_MSFILTER_SIZE(imsf->imsf_numsrc) - offset;
2680 		}
2681 	}
2682 	if (copysize > 0) {
2683 		mi_copyin_n(q, mp, offset, copysize);
2684 		return (0);
2685 	}
2686 	return (1);
2687 }
2688 
2689 /*
2690  * Handle the following optmgmt:
2691  *	IP_ADD_MEMBERSHIP		must not have joined already
2692  *	MCAST_JOIN_GROUP		must not have joined already
2693  *	IP_BLOCK_SOURCE			must have joined already
2694  *	MCAST_BLOCK_SOURCE		must have joined already
2695  *	IP_JOIN_SOURCE_GROUP		may have joined already
2696  *	MCAST_JOIN_SOURCE_GROUP		may have joined already
2697  *
2698  * fmode and src parameters may be used to determine which option is
2699  * being set, as follows (the IP_* and MCAST_* versions of each option
2700  * are functionally equivalent):
2701  *	opt			fmode			src
2702  *	IP_ADD_MEMBERSHIP	MODE_IS_EXCLUDE		INADDR_ANY
2703  *	MCAST_JOIN_GROUP	MODE_IS_EXCLUDE		INADDR_ANY
2704  *	IP_BLOCK_SOURCE		MODE_IS_EXCLUDE		v4 addr
2705  *	MCAST_BLOCK_SOURCE	MODE_IS_EXCLUDE		v4 addr
2706  *	IP_JOIN_SOURCE_GROUP	MODE_IS_INCLUDE		v4 addr
2707  *	MCAST_JOIN_SOURCE_GROUP	MODE_IS_INCLUDE		v4 addr
2708  *
2709  * Changing the filter mode is not allowed; if a matching ilg already
2710  * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
2711  *
2712  * Verifies that there is a source address of appropriate scope for
2713  * the group; if not, EADDRNOTAVAIL is returned.
2714  *
2715  * The interface to be used may be identified by an address or by an
2716  * index.  A pointer to the index is passed; if it is NULL, use the
2717  * address, otherwise, use the index.
2718  */
2719 int
2720 ip_opt_add_group(conn_t *connp, boolean_t checkonly, ipaddr_t group,
2721     ipaddr_t ifaddr, uint_t *ifindexp, mcast_record_t fmode, ipaddr_t src,
2722     mblk_t *first_mp)
2723 {
2724 	ipif_t	*ipif;
2725 	ipsq_t	*ipsq;
2726 	int err = 0;
2727 	ill_t	*ill;
2728 
2729 	err = ip_opt_check(connp, group, src, ifaddr, ifindexp, first_mp,
2730 	    ip_restart_optmgmt, &ipif);
2731 	if (err != 0) {
2732 		if (err != EINPROGRESS) {
2733 			ip1dbg(("ip_opt_add_group: no ipif for group 0x%x, "
2734 			    "ifaddr 0x%x, ifindex %d\n", ntohl(group),
2735 			    ntohl(ifaddr), (ifindexp == NULL) ? 0 : *ifindexp));
2736 		}
2737 		return (err);
2738 	}
2739 	ASSERT(ipif != NULL);
2740 
2741 	ill = ipif->ipif_ill;
2742 	/* Operation not supported on a virtual network interface */
2743 	if (IS_VNI(ill)) {
2744 		ipif_refrele(ipif);
2745 		return (EINVAL);
2746 	}
2747 
2748 	if (checkonly) {
2749 		/*
2750 		 * do not do operation, just pretend to - new T_CHECK
2751 		 * semantics. The error return case above if encountered
2752 		 * considered a good enough "check" here.
2753 		 */
2754 		ipif_refrele(ipif);
2755 		return (0);
2756 	}
2757 
2758 	IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, ipsq,
2759 	    NEW_OP);
2760 
2761 	/* unspecified source addr => no source filtering */
2762 	err = ilg_add(connp, group, ipif, fmode, src);
2763 
2764 	IPSQ_EXIT(ipsq);
2765 
2766 	ipif_refrele(ipif);
2767 	return (err);
2768 }
2769 
2770 /*
2771  * Handle the following optmgmt:
2772  *	IPV6_JOIN_GROUP			must not have joined already
2773  *	MCAST_JOIN_GROUP		must not have joined already
2774  *	MCAST_BLOCK_SOURCE		must have joined already
2775  *	MCAST_JOIN_SOURCE_GROUP		may have joined already
2776  *
2777  * fmode and src parameters may be used to determine which option is
2778  * being set, as follows (IPV6_JOIN_GROUP and MCAST_JOIN_GROUP options
2779  * are functionally equivalent):
2780  *	opt			fmode			v6src
2781  *	IPV6_JOIN_GROUP		MODE_IS_EXCLUDE		unspecified
2782  *	MCAST_JOIN_GROUP	MODE_IS_EXCLUDE		unspecified
2783  *	MCAST_BLOCK_SOURCE	MODE_IS_EXCLUDE		v6 addr
2784  *	MCAST_JOIN_SOURCE_GROUP	MODE_IS_INCLUDE		v6 addr
2785  *
2786  * Changing the filter mode is not allowed; if a matching ilg already
2787  * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
2788  *
2789  * Verifies that there is a source address of appropriate scope for
2790  * the group; if not, EADDRNOTAVAIL is returned.
2791  *
2792  * Handles IPv4-mapped IPv6 multicast addresses by associating them
2793  * with the link-local ipif.  Assumes that if v6group is v4-mapped,
2794  * v6src is also v4-mapped.
2795  */
2796 int
2797 ip_opt_add_group_v6(conn_t *connp, boolean_t checkonly,
2798     const in6_addr_t *v6group, int ifindex, mcast_record_t fmode,
2799     const in6_addr_t *v6src, mblk_t *first_mp)
2800 {
2801 	ill_t *ill;
2802 	ipif_t	*ipif;
2803 	char buf[INET6_ADDRSTRLEN];
2804 	ipaddr_t v4group, v4src;
2805 	boolean_t isv6;
2806 	ipsq_t	*ipsq;
2807 	int	err;
2808 
2809 	err = ip_opt_check_v6(connp, v6group, &v4group, v6src, &v4src, &isv6,
2810 	    ifindex, first_mp, ip_restart_optmgmt, &ill, &ipif);
2811 	if (err != 0) {
2812 		if (err != EINPROGRESS) {
2813 			ip1dbg(("ip_opt_add_group_v6: no ill for group %s/"
2814 			    "index %d\n", inet_ntop(AF_INET6, v6group, buf,
2815 			    sizeof (buf)), ifindex));
2816 		}
2817 		return (err);
2818 	}
2819 	ASSERT((!isv6 && ipif != NULL) || (isv6 && ill != NULL));
2820 
2821 	/* operation is not supported on the virtual network interface */
2822 	if (isv6) {
2823 		if (IS_VNI(ill)) {
2824 			ill_refrele(ill);
2825 			return (EINVAL);
2826 		}
2827 	} else {
2828 		if (IS_VNI(ipif->ipif_ill)) {
2829 			ipif_refrele(ipif);
2830 			return (EINVAL);
2831 		}
2832 	}
2833 
2834 	if (checkonly) {
2835 		/*
2836 		 * do not do operation, just pretend to - new T_CHECK
2837 		 * semantics. The error return case above if encountered
2838 		 * considered a good enough "check" here.
2839 		 */
2840 		if (isv6)
2841 			ill_refrele(ill);
2842 		else
2843 			ipif_refrele(ipif);
2844 		return (0);
2845 	}
2846 
2847 	if (!isv6) {
2848 		IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt,
2849 		    ipsq, NEW_OP);
2850 		err = ilg_add(connp, v4group, ipif, fmode, v4src);
2851 		IPSQ_EXIT(ipsq);
2852 		ipif_refrele(ipif);
2853 	} else {
2854 		IPSQ_ENTER_ILL(ill, connp, first_mp, ip_restart_optmgmt,
2855 		    ipsq, NEW_OP);
2856 		err = ilg_add_v6(connp, v6group, ill, fmode, v6src);
2857 		IPSQ_EXIT(ipsq);
2858 		ill_refrele(ill);
2859 	}
2860 
2861 	return (err);
2862 }
2863 
2864 static int
2865 ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group, ipif_t *ipif,
2866     mcast_record_t fmode, ipaddr_t src)
2867 {
2868 	ilg_t	*ilg;
2869 	in6_addr_t v6src;
2870 	boolean_t leaving = B_FALSE;
2871 
2872 	ASSERT(IAM_WRITER_IPIF(ipif));
2873 
2874 	/*
2875 	 * The ilg is valid only while we hold the conn lock. Once we drop
2876 	 * the lock, another thread can locate another ilg on this connp,
2877 	 * but on a different ipif, and delete it, and cause the ilg array
2878 	 * to be reallocated and copied. Hence do the ilg_delete before
2879 	 * dropping the lock.
2880 	 */
2881 	mutex_enter(&connp->conn_lock);
2882 	ilg = ilg_lookup_ipif(connp, group, ipif);
2883 	if ((ilg == NULL) || (ilg->ilg_flags & ILG_DELETED)) {
2884 		mutex_exit(&connp->conn_lock);
2885 		return (EADDRNOTAVAIL);
2886 	}
2887 
2888 	/*
2889 	 * Decide if we're actually deleting the ilg or just removing a
2890 	 * source filter address; if just removing an addr, make sure we
2891 	 * aren't trying to change the filter mode, and that the addr is
2892 	 * actually in our filter list already.  If we're removing the
2893 	 * last src in an include list, just delete the ilg.
2894 	 */
2895 	if (src == INADDR_ANY) {
2896 		v6src = ipv6_all_zeros;
2897 		leaving = B_TRUE;
2898 	} else {
2899 		int err = 0;
2900 		IN6_IPADDR_TO_V4MAPPED(src, &v6src);
2901 		if (fmode != ilg->ilg_fmode)
2902 			err = EINVAL;
2903 		else if (ilg->ilg_filter == NULL ||
2904 		    !list_has_addr(ilg->ilg_filter, &v6src))
2905 			err = EADDRNOTAVAIL;
2906 		if (err != 0) {
2907 			mutex_exit(&connp->conn_lock);
2908 			return (err);
2909 		}
2910 		if (fmode == MODE_IS_INCLUDE &&
2911 		    ilg->ilg_filter->sl_numsrc == 1) {
2912 			v6src = ipv6_all_zeros;
2913 			leaving = B_TRUE;
2914 		}
2915 	}
2916 
2917 	ilg_delete(connp, ilg, &v6src);
2918 	mutex_exit(&connp->conn_lock);
2919 
2920 	(void) ip_delmulti(group, ipif, B_FALSE, leaving);
2921 	return (0);
2922 }
2923 
2924 static int
2925 ip_opt_delete_group_excl_v6(conn_t *connp, const in6_addr_t *v6group,
2926     ill_t *ill, mcast_record_t fmode, const in6_addr_t *v6src)
2927 {
2928 	ilg_t	*ilg;
2929 	ill_t	*ilg_ill;
2930 	uint_t	ilg_orig_ifindex;
2931 	boolean_t leaving = B_TRUE;
2932 
2933 	ASSERT(IAM_WRITER_ILL(ill));
2934 
2935 	/*
2936 	 * Use the index that we originally used to join. We can't
2937 	 * use the ill directly because ilg_ill could point to
2938 	 * a new ill if things have moved.
2939 	 */
2940 	mutex_enter(&connp->conn_lock);
2941 	ilg = ilg_lookup_ill_index_v6(connp, v6group,
2942 	    ill->ill_phyint->phyint_ifindex);
2943 	if ((ilg == NULL) || (ilg->ilg_flags & ILG_DELETED)) {
2944 		mutex_exit(&connp->conn_lock);
2945 		return (EADDRNOTAVAIL);
2946 	}
2947 
2948 	/*
2949 	 * Decide if we're actually deleting the ilg or just removing a
2950 	 * source filter address; if just removing an addr, make sure we
2951 	 * aren't trying to change the filter mode, and that the addr is
2952 	 * actually in our filter list already.  If we're removing the
2953 	 * last src in an include list, just delete the ilg.
2954 	 */
2955 	if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) {
2956 		int err = 0;
2957 		if (fmode != ilg->ilg_fmode)
2958 			err = EINVAL;
2959 		else if (ilg->ilg_filter == NULL ||
2960 		    !list_has_addr(ilg->ilg_filter, v6src))
2961 			err = EADDRNOTAVAIL;
2962 		if (err != 0) {
2963 			mutex_exit(&connp->conn_lock);
2964 			return (err);
2965 		}
2966 		if (fmode == MODE_IS_INCLUDE &&
2967 		    ilg->ilg_filter->sl_numsrc == 1)
2968 			v6src = NULL;
2969 		else
2970 			leaving = B_FALSE;
2971 	}
2972 
2973 	ilg_ill = ilg->ilg_ill;
2974 	ilg_orig_ifindex = ilg->ilg_orig_ifindex;
2975 	ilg_delete(connp, ilg, v6src);
2976 	mutex_exit(&connp->conn_lock);
2977 	(void) ip_delmulti_v6(v6group, ilg_ill, ilg_orig_ifindex,
2978 	    connp->conn_zoneid, B_FALSE, leaving);
2979 
2980 	return (0);
2981 }
2982 
2983 /*
2984  * Handle the following optmgmt:
2985  *	IP_DROP_MEMBERSHIP		will leave
2986  *	MCAST_LEAVE_GROUP		will leave
2987  *	IP_UNBLOCK_SOURCE		will not leave
2988  *	MCAST_UNBLOCK_SOURCE		will not leave
2989  *	IP_LEAVE_SOURCE_GROUP		may leave (if leaving last source)
2990  *	MCAST_LEAVE_SOURCE_GROUP	may leave (if leaving last source)
2991  *
2992  * fmode and src parameters may be used to determine which option is
2993  * being set, as follows (the IP_* and MCAST_* versions of each option
2994  * are functionally equivalent):
2995  *	opt			 fmode			src
2996  *	IP_DROP_MEMBERSHIP	 MODE_IS_INCLUDE	INADDR_ANY
2997  *	MCAST_LEAVE_GROUP	 MODE_IS_INCLUDE	INADDR_ANY
2998  *	IP_UNBLOCK_SOURCE	 MODE_IS_EXCLUDE	v4 addr
2999  *	MCAST_UNBLOCK_SOURCE	 MODE_IS_EXCLUDE	v4 addr
3000  *	IP_LEAVE_SOURCE_GROUP	 MODE_IS_INCLUDE	v4 addr
3001  *	MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE	v4 addr
3002  *
3003  * Changing the filter mode is not allowed; if a matching ilg already
3004  * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
3005  *
3006  * The interface to be used may be identified by an address or by an
3007  * index.  A pointer to the index is passed; if it is NULL, use the
3008  * address, otherwise, use the index.
3009  */
3010 int
3011 ip_opt_delete_group(conn_t *connp, boolean_t checkonly, ipaddr_t group,
3012     ipaddr_t ifaddr, uint_t *ifindexp, mcast_record_t fmode, ipaddr_t src,
3013     mblk_t *first_mp)
3014 {
3015 	ipif_t	*ipif;
3016 	ipsq_t	*ipsq;
3017 	int	err;
3018 	ill_t	*ill;
3019 
3020 	err = ip_opt_check(connp, group, src, ifaddr, ifindexp, first_mp,
3021 	    ip_restart_optmgmt, &ipif);
3022 	if (err != 0) {
3023 		if (err != EINPROGRESS) {
3024 			ip1dbg(("ip_opt_delete_group: no ipif for group "
3025 			    "0x%x, ifaddr 0x%x\n",
3026 			    (int)ntohl(group), (int)ntohl(ifaddr)));
3027 		}
3028 		return (err);
3029 	}
3030 	ASSERT(ipif != NULL);
3031 
3032 	ill = ipif->ipif_ill;
3033 	/* Operation not supported on a virtual network interface */
3034 	if (IS_VNI(ill)) {
3035 		ipif_refrele(ipif);
3036 		return (EINVAL);
3037 	}
3038 
3039 	if (checkonly) {
3040 		/*
3041 		 * do not do operation, just pretend to - new T_CHECK
3042 		 * semantics. The error return case above if encountered
3043 		 * considered a good enough "check" here.
3044 		 */
3045 		ipif_refrele(ipif);
3046 		return (0);
3047 	}
3048 
3049 	IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, ipsq,
3050 	    NEW_OP);
3051 	err = ip_opt_delete_group_excl(connp, group, ipif, fmode, src);
3052 	IPSQ_EXIT(ipsq);
3053 
3054 	ipif_refrele(ipif);
3055 	return (err);
3056 }
3057 
3058 /*
3059  * Handle the following optmgmt:
3060  *	IPV6_LEAVE_GROUP		will leave
3061  *	MCAST_LEAVE_GROUP		will leave
3062  *	MCAST_UNBLOCK_SOURCE		will not leave
3063  *	MCAST_LEAVE_SOURCE_GROUP	may leave (if leaving last source)
3064  *
3065  * fmode and src parameters may be used to determine which option is
3066  * being set, as follows (IPV6_LEAVE_GROUP and MCAST_LEAVE_GROUP options
3067  * are functionally equivalent):
3068  *	opt			 fmode			v6src
3069  *	IPV6_LEAVE_GROUP	 MODE_IS_INCLUDE	unspecified
3070  *	MCAST_LEAVE_GROUP	 MODE_IS_INCLUDE	unspecified
3071  *	MCAST_UNBLOCK_SOURCE	 MODE_IS_EXCLUDE	v6 addr
3072  *	MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE	v6 addr
3073  *
3074  * Changing the filter mode is not allowed; if a matching ilg already
3075  * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
3076  *
3077  * Handles IPv4-mapped IPv6 multicast addresses by associating them
3078  * with the link-local ipif.  Assumes that if v6group is v4-mapped,
3079  * v6src is also v4-mapped.
3080  */
3081 int
3082 ip_opt_delete_group_v6(conn_t *connp, boolean_t checkonly,
3083     const in6_addr_t *v6group, int ifindex, mcast_record_t fmode,
3084     const in6_addr_t *v6src, mblk_t *first_mp)
3085 {
3086 	ill_t *ill;
3087 	ipif_t	*ipif;
3088 	char	buf[INET6_ADDRSTRLEN];
3089 	ipaddr_t v4group, v4src;
3090 	boolean_t isv6;
3091 	ipsq_t	*ipsq;
3092 	int	err;
3093 
3094 	err = ip_opt_check_v6(connp, v6group, &v4group, v6src, &v4src, &isv6,
3095 	    ifindex, first_mp, ip_restart_optmgmt, &ill, &ipif);
3096 	if (err != 0) {
3097 		if (err != EINPROGRESS) {
3098 			ip1dbg(("ip_opt_delete_group_v6: no ill for group %s/"
3099 			    "index %d\n", inet_ntop(AF_INET6, v6group, buf,
3100 			    sizeof (buf)), ifindex));
3101 		}
3102 		return (err);
3103 	}
3104 	ASSERT((isv6 && ill != NULL) || (!isv6 && ipif != NULL));
3105 
3106 	/* operation is not supported on the virtual network interface */
3107 	if (isv6) {
3108 		if (IS_VNI(ill)) {
3109 			ill_refrele(ill);
3110 			return (EINVAL);
3111 		}
3112 	} else {
3113 		if (IS_VNI(ipif->ipif_ill)) {
3114 			ipif_refrele(ipif);
3115 			return (EINVAL);
3116 		}
3117 	}
3118 
3119 	if (checkonly) {
3120 		/*
3121 		 * do not do operation, just pretend to - new T_CHECK
3122 		 * semantics. The error return case above if encountered
3123 		 * considered a good enough "check" here.
3124 		 */
3125 		if (isv6)
3126 			ill_refrele(ill);
3127 		else
3128 			ipif_refrele(ipif);
3129 		return (0);
3130 	}
3131 
3132 	if (!isv6) {
3133 		IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt,
3134 		    ipsq, NEW_OP);
3135 		err = ip_opt_delete_group_excl(connp, v4group, ipif, fmode,
3136 		    v4src);
3137 		IPSQ_EXIT(ipsq);
3138 		ipif_refrele(ipif);
3139 	} else {
3140 		IPSQ_ENTER_ILL(ill, connp, first_mp, ip_restart_optmgmt,
3141 		    ipsq, NEW_OP);
3142 		err = ip_opt_delete_group_excl_v6(connp, v6group, ill, fmode,
3143 		    v6src);
3144 		IPSQ_EXIT(ipsq);
3145 		ill_refrele(ill);
3146 	}
3147 
3148 	return (err);
3149 }
3150 
3151 /*
3152  * Group mgmt for upper conn that passes things down
3153  * to the interface multicast list (and DLPI)
3154  * These routines can handle new style options that specify an interface name
3155  * as opposed to an interface address (needed for general handling of
3156  * unnumbered interfaces.)
3157  */
3158 
3159 /*
3160  * Add a group to an upper conn group data structure and pass things down
3161  * to the interface multicast list (and DLPI)
3162  */
3163 static int
3164 ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif, mcast_record_t fmode,
3165     ipaddr_t src)
3166 {
3167 	int	error = 0;
3168 	ill_t	*ill;
3169 	ilg_t	*ilg;
3170 	ilg_stat_t ilgstat;
3171 	slist_t	*new_filter = NULL;
3172 	int	new_fmode;
3173 
3174 	ASSERT(IAM_WRITER_IPIF(ipif));
3175 
3176 	ill = ipif->ipif_ill;
3177 
3178 	if (!(ill->ill_flags & ILLF_MULTICAST))
3179 		return (EADDRNOTAVAIL);
3180 
3181 	/*
3182 	 * conn_ilg[] is protected by conn_lock. Need to hold the conn_lock
3183 	 * to walk the conn_ilg[] list in ilg_lookup_ipif(); also needed to
3184 	 * serialize 2 threads doing join (sock, group1, hme0:0) and
3185 	 * (sock, group2, hme1:0) where hme0 and hme1 map to different ipsqs,
3186 	 * but both operations happen on the same conn.
3187 	 */
3188 	mutex_enter(&connp->conn_lock);
3189 	ilg = ilg_lookup_ipif(connp, group, ipif);
3190 
3191 	/*
3192 	 * Depending on the option we're handling, may or may not be okay
3193 	 * if group has already been added.  Figure out our rules based
3194 	 * on fmode and src params.  Also make sure there's enough room
3195 	 * in the filter if we're adding a source to an existing filter.
3196 	 */
3197 	if (src == INADDR_ANY) {
3198 		/* we're joining for all sources, must not have joined */
3199 		if (ilg != NULL)
3200 			error = EADDRINUSE;
3201 	} else {
3202 		if (fmode == MODE_IS_EXCLUDE) {
3203 			/* (excl {addr}) => block source, must have joined */
3204 			if (ilg == NULL)
3205 				error = EADDRNOTAVAIL;
3206 		}
3207 		/* (incl {addr}) => join source, may have joined */
3208 
3209 		if (ilg != NULL &&
3210 		    SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE)
3211 			error = ENOBUFS;
3212 	}
3213 	if (error != 0) {
3214 		mutex_exit(&connp->conn_lock);
3215 		return (error);
3216 	}
3217 
3218 	ASSERT(!(ipif->ipif_state_flags & IPIF_CONDEMNED));
3219 
3220 	/*
3221 	 * Alloc buffer to copy new state into (see below) before
3222 	 * we make any changes, so we can bail if it fails.
3223 	 */
3224 	if ((new_filter = l_alloc()) == NULL) {
3225 		mutex_exit(&connp->conn_lock);
3226 		return (ENOMEM);
3227 	}
3228 
3229 	if (ilg == NULL) {
3230 		ilgstat = ILGSTAT_NEW;
3231 		if ((ilg = conn_ilg_alloc(connp)) == NULL) {
3232 			mutex_exit(&connp->conn_lock);
3233 			l_free(new_filter);
3234 			return (ENOMEM);
3235 		}
3236 		if (src != INADDR_ANY) {
3237 			ilg->ilg_filter = l_alloc();
3238 			if (ilg->ilg_filter == NULL) {
3239 				ilg_delete(connp, ilg, NULL);
3240 				mutex_exit(&connp->conn_lock);
3241 				l_free(new_filter);
3242 				return (ENOMEM);
3243 			}
3244 			ilg->ilg_filter->sl_numsrc = 1;
3245 			IN6_IPADDR_TO_V4MAPPED(src,
3246 			    &ilg->ilg_filter->sl_addr[0]);
3247 		}
3248 		if (group == INADDR_ANY) {
3249 			ilg->ilg_v6group = ipv6_all_zeros;
3250 		} else {
3251 			IN6_IPADDR_TO_V4MAPPED(group, &ilg->ilg_v6group);
3252 		}
3253 		ilg->ilg_ipif = ipif;
3254 		ilg->ilg_ill = NULL;
3255 		ilg->ilg_orig_ifindex = 0;
3256 		ilg->ilg_fmode = fmode;
3257 	} else {
3258 		int index;
3259 		in6_addr_t v6src;
3260 		ilgstat = ILGSTAT_CHANGE;
3261 		if (ilg->ilg_fmode != fmode || src == INADDR_ANY) {
3262 			mutex_exit(&connp->conn_lock);
3263 			l_free(new_filter);
3264 			return (EINVAL);
3265 		}
3266 		if (ilg->ilg_filter == NULL) {
3267 			ilg->ilg_filter = l_alloc();
3268 			if (ilg->ilg_filter == NULL) {
3269 				mutex_exit(&connp->conn_lock);
3270 				l_free(new_filter);
3271 				return (ENOMEM);
3272 			}
3273 		}
3274 		IN6_IPADDR_TO_V4MAPPED(src, &v6src);
3275 		if (list_has_addr(ilg->ilg_filter, &v6src)) {
3276 			mutex_exit(&connp->conn_lock);
3277 			l_free(new_filter);
3278 			return (EADDRNOTAVAIL);
3279 		}
3280 		index = ilg->ilg_filter->sl_numsrc++;
3281 		ilg->ilg_filter->sl_addr[index] = v6src;
3282 	}
3283 
3284 	/*
3285 	 * Save copy of ilg's filter state to pass to other functions,
3286 	 * so we can release conn_lock now.
3287 	 */
3288 	new_fmode = ilg->ilg_fmode;
3289 	l_copy(ilg->ilg_filter, new_filter);
3290 
3291 	mutex_exit(&connp->conn_lock);
3292 
3293 	error = ip_addmulti(group, ipif, ilgstat, new_fmode, new_filter);
3294 	if (error != 0) {
3295 		/*
3296 		 * Need to undo what we did before calling ip_addmulti()!
3297 		 * Must look up the ilg again since we've not been holding
3298 		 * conn_lock.
3299 		 */
3300 		in6_addr_t v6src;
3301 		if (ilgstat == ILGSTAT_NEW)
3302 			v6src = ipv6_all_zeros;
3303 		else
3304 			IN6_IPADDR_TO_V4MAPPED(src, &v6src);
3305 		mutex_enter(&connp->conn_lock);
3306 		ilg = ilg_lookup_ipif(connp, group, ipif);
3307 		ASSERT(ilg != NULL);
3308 		ilg_delete(connp, ilg, &v6src);
3309 		mutex_exit(&connp->conn_lock);
3310 		l_free(new_filter);
3311 		return (error);
3312 	}
3313 
3314 	l_free(new_filter);
3315 	return (0);
3316 }
3317 
3318 static int
3319 ilg_add_v6(conn_t *connp, const in6_addr_t *v6group, ill_t *ill,
3320     mcast_record_t fmode, const in6_addr_t *v6src)
3321 {
3322 	int	error = 0;
3323 	int	orig_ifindex;
3324 	ilg_t	*ilg;
3325 	ilg_stat_t ilgstat;
3326 	slist_t	*new_filter = NULL;
3327 	int	new_fmode;
3328 
3329 	ASSERT(IAM_WRITER_ILL(ill));
3330 
3331 	if (!(ill->ill_flags & ILLF_MULTICAST))
3332 		return (EADDRNOTAVAIL);
3333 
3334 	/*
3335 	 * conn_lock protects the ilg list.  Serializes 2 threads doing
3336 	 * join (sock, group1, hme0) and (sock, group2, hme1) where hme0
3337 	 * and hme1 map to different ipsq's, but both operations happen
3338 	 * on the same conn.
3339 	 */
3340 	mutex_enter(&connp->conn_lock);
3341 
3342 	/*
3343 	 * Use the ifindex to do the lookup. We can't use the ill
3344 	 * directly because ilg_ill could point to a different ill if
3345 	 * things have moved.
3346 	 */
3347 	orig_ifindex = ill->ill_phyint->phyint_ifindex;
3348 	ilg = ilg_lookup_ill_index_v6(connp, v6group, orig_ifindex);
3349 
3350 	/*
3351 	 * Depending on the option we're handling, may or may not be okay
3352 	 * if group has already been added.  Figure out our rules based
3353 	 * on fmode and src params.  Also make sure there's enough room
3354 	 * in the filter if we're adding a source to an existing filter.
3355 	 */
3356 	if (IN6_IS_ADDR_UNSPECIFIED(v6src)) {
3357 		/* we're joining for all sources, must not have joined */
3358 		if (ilg != NULL)
3359 			error = EADDRINUSE;
3360 	} else {
3361 		if (fmode == MODE_IS_EXCLUDE) {
3362 			/* (excl {addr}) => block source, must have joined */
3363 			if (ilg == NULL)
3364 				error = EADDRNOTAVAIL;
3365 		}
3366 		/* (incl {addr}) => join source, may have joined */
3367 
3368 		if (ilg != NULL &&
3369 		    SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE)
3370 			error = ENOBUFS;
3371 	}
3372 	if (error != 0) {
3373 		mutex_exit(&connp->conn_lock);
3374 		return (error);
3375 	}
3376 
3377 	/*
3378 	 * Alloc buffer to copy new state into (see below) before
3379 	 * we make any changes, so we can bail if it fails.
3380 	 */
3381 	if ((new_filter = l_alloc()) == NULL) {
3382 		mutex_exit(&connp->conn_lock);
3383 		return (ENOMEM);
3384 	}
3385 
3386 	if (ilg == NULL) {
3387 		if ((ilg = conn_ilg_alloc(connp)) == NULL) {
3388 			mutex_exit(&connp->conn_lock);
3389 			l_free(new_filter);
3390 			return (ENOMEM);
3391 		}
3392 		if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) {
3393 			ilg->ilg_filter = l_alloc();
3394 			if (ilg->ilg_filter == NULL) {
3395 				ilg_delete(connp, ilg, NULL);
3396 				mutex_exit(&connp->conn_lock);
3397 				l_free(new_filter);
3398 				return (ENOMEM);
3399 			}
3400 			ilg->ilg_filter->sl_numsrc = 1;
3401 			ilg->ilg_filter->sl_addr[0] = *v6src;
3402 		}
3403 		ilgstat = ILGSTAT_NEW;
3404 		ilg->ilg_v6group = *v6group;
3405 		ilg->ilg_fmode = fmode;
3406 		ilg->ilg_ipif = NULL;
3407 		/*
3408 		 * Choose our target ill to join on. This might be different
3409 		 * from the ill we've been given if it's currently down and
3410 		 * part of a group.
3411 		 *
3412 		 * new ill is not refheld; we are writer.
3413 		 */
3414 		ill = ip_choose_multi_ill(ill, v6group);
3415 		ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED));
3416 		ilg->ilg_ill = ill;
3417 		/*
3418 		 * Remember the orig_ifindex that we joined on, so that we
3419 		 * can successfully delete them later on and also search
3420 		 * for duplicates if the application wants to join again.
3421 		 */
3422 		ilg->ilg_orig_ifindex = orig_ifindex;
3423 	} else {
3424 		int index;
3425 		if (ilg->ilg_fmode != fmode || IN6_IS_ADDR_UNSPECIFIED(v6src)) {
3426 			mutex_exit(&connp->conn_lock);
3427 			l_free(new_filter);
3428 			return (EINVAL);
3429 		}
3430 		if (ilg->ilg_filter == NULL) {
3431 			ilg->ilg_filter = l_alloc();
3432 			if (ilg->ilg_filter == NULL) {
3433 				mutex_exit(&connp->conn_lock);
3434 				l_free(new_filter);
3435 				return (ENOMEM);
3436 			}
3437 		}
3438 		if (list_has_addr(ilg->ilg_filter, v6src)) {
3439 			mutex_exit(&connp->conn_lock);
3440 			l_free(new_filter);
3441 			return (EADDRNOTAVAIL);
3442 		}
3443 		ilgstat = ILGSTAT_CHANGE;
3444 		index = ilg->ilg_filter->sl_numsrc++;
3445 		ilg->ilg_filter->sl_addr[index] = *v6src;
3446 		/*
3447 		 * The current ill might be different from the one we were
3448 		 * asked to join on (if failover has occurred); we should
3449 		 * join on the ill stored in the ilg.  The original ill
3450 		 * is noted in ilg_orig_ifindex, which matched our request.
3451 		 */
3452 		ill = ilg->ilg_ill;
3453 	}
3454 
3455 	/*
3456 	 * Save copy of ilg's filter state to pass to other functions,
3457 	 * so we can release conn_lock now.
3458 	 */
3459 	new_fmode = ilg->ilg_fmode;
3460 	l_copy(ilg->ilg_filter, new_filter);
3461 
3462 	mutex_exit(&connp->conn_lock);
3463 
3464 	/*
3465 	 * Now update the ill. We wait to do this until after the ilg
3466 	 * has been updated because we need to update the src filter
3467 	 * info for the ill, which involves looking at the status of
3468 	 * all the ilgs associated with this group/interface pair.
3469 	 */
3470 	error = ip_addmulti_v6(v6group, ill, orig_ifindex, connp->conn_zoneid,
3471 	    ilgstat, new_fmode, new_filter);
3472 	if (error != 0) {
3473 		/*
3474 		 * But because we waited, we have to undo the ilg update
3475 		 * if ip_addmulti_v6() fails.  We also must lookup ilg
3476 		 * again, since we've not been holding conn_lock.
3477 		 */
3478 		in6_addr_t delsrc =
3479 		    (ilgstat == ILGSTAT_NEW) ? ipv6_all_zeros : *v6src;
3480 		mutex_enter(&connp->conn_lock);
3481 		ilg = ilg_lookup_ill_index_v6(connp, v6group, orig_ifindex);
3482 		ASSERT(ilg != NULL);
3483 		ilg_delete(connp, ilg, &delsrc);
3484 		mutex_exit(&connp->conn_lock);
3485 		l_free(new_filter);
3486 		return (error);
3487 	}
3488 
3489 	l_free(new_filter);
3490 
3491 	return (0);
3492 }
3493 
3494 /*
3495  * Find an IPv4 ilg matching group, ill and source
3496  */
3497 ilg_t *
3498 ilg_lookup_ill_withsrc(conn_t *connp, ipaddr_t group, ipaddr_t src, ill_t *ill)
3499 {
3500 	in6_addr_t v6group, v6src;
3501 	int i;
3502 	boolean_t isinlist;
3503 	ilg_t *ilg;
3504 	ipif_t *ipif;
3505 	ill_t *ilg_ill;
3506 
3507 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3508 
3509 	/*
3510 	 * INADDR_ANY is represented as the IPv6 unspecified addr.
3511 	 */
3512 	if (group == INADDR_ANY)
3513 		v6group = ipv6_all_zeros;
3514 	else
3515 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
3516 
3517 	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3518 		/* ilg_ipif is NULL for v6; skip them */
3519 		ilg = &connp->conn_ilg[i];
3520 		if ((ipif = ilg->ilg_ipif) == NULL)
3521 			continue;
3522 		ASSERT(ilg->ilg_ill == NULL);
3523 		ilg_ill = ipif->ipif_ill;
3524 		ASSERT(!ilg_ill->ill_isv6);
3525 		if (ilg_ill == ill &&
3526 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, &v6group)) {
3527 			if (SLIST_IS_EMPTY(ilg->ilg_filter)) {
3528 				/* no source filter, so this is a match */
3529 				return (ilg);
3530 			}
3531 			break;
3532 		}
3533 	}
3534 	if (i == connp->conn_ilg_inuse)
3535 		return (NULL);
3536 
3537 	/*
3538 	 * we have an ilg with matching ill and group; but
3539 	 * the ilg has a source list that we must check.
3540 	 */
3541 	IN6_IPADDR_TO_V4MAPPED(src, &v6src);
3542 	isinlist = B_FALSE;
3543 	for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) {
3544 		if (IN6_ARE_ADDR_EQUAL(&v6src, &ilg->ilg_filter->sl_addr[i])) {
3545 			isinlist = B_TRUE;
3546 			break;
3547 		}
3548 	}
3549 
3550 	if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) ||
3551 	    (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE))
3552 		return (ilg);
3553 
3554 	return (NULL);
3555 }
3556 
3557 /*
3558  * Find an IPv6 ilg matching group, ill, and source
3559  */
3560 ilg_t *
3561 ilg_lookup_ill_withsrc_v6(conn_t *connp, const in6_addr_t *v6group,
3562     const in6_addr_t *v6src, ill_t *ill)
3563 {
3564 	int i;
3565 	boolean_t isinlist;
3566 	ilg_t *ilg;
3567 	ill_t *ilg_ill;
3568 
3569 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3570 
3571 	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3572 		ilg = &connp->conn_ilg[i];
3573 		if ((ilg_ill = ilg->ilg_ill) == NULL)
3574 			continue;
3575 		ASSERT(ilg->ilg_ipif == NULL);
3576 		ASSERT(ilg_ill->ill_isv6);
3577 		if (ilg_ill == ill &&
3578 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) {
3579 			if (SLIST_IS_EMPTY(ilg->ilg_filter)) {
3580 				/* no source filter, so this is a match */
3581 				return (ilg);
3582 			}
3583 			break;
3584 		}
3585 	}
3586 	if (i == connp->conn_ilg_inuse)
3587 		return (NULL);
3588 
3589 	/*
3590 	 * we have an ilg with matching ill and group; but
3591 	 * the ilg has a source list that we must check.
3592 	 */
3593 	isinlist = B_FALSE;
3594 	for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) {
3595 		if (IN6_ARE_ADDR_EQUAL(v6src, &ilg->ilg_filter->sl_addr[i])) {
3596 			isinlist = B_TRUE;
3597 			break;
3598 		}
3599 	}
3600 
3601 	if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) ||
3602 	    (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE))
3603 		return (ilg);
3604 
3605 	return (NULL);
3606 }
3607 
3608 /*
3609  * Get the ilg whose ilg_orig_ifindex is associated with ifindex.
3610  * This is useful when the interface fails and we have moved
3611  * to a new ill, but still would like to locate using the index
3612  * that we originally used to join. Used only for IPv6 currently.
3613  */
3614 static ilg_t *
3615 ilg_lookup_ill_index_v6(conn_t *connp, const in6_addr_t *v6group, int ifindex)
3616 {
3617 	ilg_t	*ilg;
3618 	int	i;
3619 
3620 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3621 	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3622 		ilg = &connp->conn_ilg[i];
3623 		/* ilg_ill is NULL for V4. Skip them */
3624 		if (ilg->ilg_ill == NULL)
3625 			continue;
3626 		/* ilg_ipif is NULL for V6 */
3627 		ASSERT(ilg->ilg_ipif == NULL);
3628 		ASSERT(ilg->ilg_orig_ifindex != 0);
3629 		if (IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group) &&
3630 		    ilg->ilg_orig_ifindex == ifindex) {
3631 			return (ilg);
3632 		}
3633 	}
3634 	return (NULL);
3635 }
3636 
3637 /*
3638  * Find an IPv6 ilg matching group and ill
3639  */
3640 ilg_t *
3641 ilg_lookup_ill_v6(conn_t *connp, const in6_addr_t *v6group, ill_t *ill)
3642 {
3643 	ilg_t	*ilg;
3644 	int	i;
3645 	ill_t 	*mem_ill;
3646 
3647 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3648 
3649 	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3650 		ilg = &connp->conn_ilg[i];
3651 		if ((mem_ill = ilg->ilg_ill) == NULL)
3652 			continue;
3653 		ASSERT(ilg->ilg_ipif == NULL);
3654 		ASSERT(mem_ill->ill_isv6);
3655 		if (mem_ill == ill &&
3656 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group))
3657 			return (ilg);
3658 	}
3659 	return (NULL);
3660 }
3661 
3662 /*
3663  * Find an IPv4 ilg matching group and ipif
3664  */
3665 static ilg_t *
3666 ilg_lookup_ipif(conn_t *connp, ipaddr_t group, ipif_t *ipif)
3667 {
3668 	in6_addr_t v6group;
3669 	int	i;
3670 
3671 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3672 	ASSERT(!ipif->ipif_ill->ill_isv6);
3673 
3674 	if (group == INADDR_ANY)
3675 		v6group = ipv6_all_zeros;
3676 	else
3677 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
3678 
3679 	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3680 		if (IN6_ARE_ADDR_EQUAL(&connp->conn_ilg[i].ilg_v6group,
3681 		    &v6group) &&
3682 		    connp->conn_ilg[i].ilg_ipif == ipif)
3683 			return (&connp->conn_ilg[i]);
3684 	}
3685 	return (NULL);
3686 }
3687 
3688 /*
3689  * If a source address is passed in (src != NULL and src is not
3690  * unspecified), remove the specified src addr from the given ilg's
3691  * filter list, else delete the ilg.
3692  */
3693 static void
3694 ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src)
3695 {
3696 	int	i;
3697 
3698 	ASSERT((ilg->ilg_ipif != NULL) ^ (ilg->ilg_ill != NULL));
3699 	ASSERT(ilg->ilg_ipif == NULL || IAM_WRITER_IPIF(ilg->ilg_ipif));
3700 	ASSERT(ilg->ilg_ill == NULL || IAM_WRITER_ILL(ilg->ilg_ill));
3701 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3702 	ASSERT(!(ilg->ilg_flags & ILG_DELETED));
3703 
3704 	if (src == NULL || IN6_IS_ADDR_UNSPECIFIED(src)) {
3705 		if (connp->conn_ilg_walker_cnt != 0) {
3706 			ilg->ilg_flags |= ILG_DELETED;
3707 			return;
3708 		}
3709 
3710 		FREE_SLIST(ilg->ilg_filter);
3711 
3712 		i = ilg - &connp->conn_ilg[0];
3713 		ASSERT(i >= 0 && i < connp->conn_ilg_inuse);
3714 
3715 		/* Move other entries up one step */
3716 		connp->conn_ilg_inuse--;
3717 		for (; i < connp->conn_ilg_inuse; i++)
3718 			connp->conn_ilg[i] = connp->conn_ilg[i+1];
3719 
3720 		if (connp->conn_ilg_inuse == 0) {
3721 			mi_free((char *)connp->conn_ilg);
3722 			connp->conn_ilg = NULL;
3723 			cv_broadcast(&connp->conn_refcv);
3724 		}
3725 	} else {
3726 		l_remove(ilg->ilg_filter, src);
3727 	}
3728 }
3729 
3730 /*
3731  * Called from conn close. No new ilg can be added or removed.
3732  * because CONN_CLOSING has been set by ip_close. ilg_add / ilg_delete
3733  * will return error if conn has started closing.
3734  */
3735 void
3736 ilg_delete_all(conn_t *connp)
3737 {
3738 	int	i;
3739 	ipif_t	*ipif = NULL;
3740 	ill_t	*ill = NULL;
3741 	ilg_t	*ilg;
3742 	in6_addr_t v6group;
3743 	boolean_t success;
3744 	ipsq_t	*ipsq;
3745 	int	orig_ifindex;
3746 
3747 	mutex_enter(&connp->conn_lock);
3748 retry:
3749 	ILG_WALKER_HOLD(connp);
3750 	for (i = connp->conn_ilg_inuse - 1; i >= 0; ) {
3751 		ilg = &connp->conn_ilg[i];
3752 		/*
3753 		 * Since this walk is not atomic (we drop the
3754 		 * conn_lock and wait in ipsq_enter) we need
3755 		 * to check for the ILG_DELETED flag.
3756 		 */
3757 		if (ilg->ilg_flags & ILG_DELETED) {
3758 			/* Go to the next ilg */
3759 			i--;
3760 			continue;
3761 		}
3762 		v6group = ilg->ilg_v6group;
3763 
3764 		if (IN6_IS_ADDR_V4MAPPED(&v6group)) {
3765 			ipif = ilg->ilg_ipif;
3766 			ill = ipif->ipif_ill;
3767 		} else {
3768 			ipif = NULL;
3769 			ill = ilg->ilg_ill;
3770 		}
3771 		/*
3772 		 * We may not be able to refhold the ill if the ill/ipif
3773 		 * is changing. But we need to make sure that the ill will
3774 		 * not vanish. So we just bump up the ill_waiter count.
3775 		 * If we are unable to do even that, then the ill is closing,
3776 		 * in which case the unplumb thread will handle the cleanup,
3777 		 * and we move on to the next ilg.
3778 		 */
3779 		if (!ill_waiter_inc(ill)) {
3780 			/* Go to the next ilg */
3781 			i--;
3782 			continue;
3783 		}
3784 		mutex_exit(&connp->conn_lock);
3785 		/*
3786 		 * To prevent deadlock between ill close which waits inside
3787 		 * the perimeter, and conn close, ipsq_enter returns error,
3788 		 * the moment ILL_CONDEMNED is set, in which case ill close
3789 		 * takes responsibility to cleanup the ilgs. Note that we
3790 		 * have not yet set condemned flag, otherwise the conn can't
3791 		 * be refheld for cleanup by those routines and it would be
3792 		 * a mutual deadlock.
3793 		 */
3794 		success = ipsq_enter(ill, B_FALSE);
3795 		ipsq = ill->ill_phyint->phyint_ipsq;
3796 		ill_waiter_dcr(ill);
3797 		mutex_enter(&connp->conn_lock);
3798 		if (!success) {
3799 			/* Go to the next ilg */
3800 			i--;
3801 			continue;
3802 		}
3803 
3804 		/*
3805 		 * Make sure that nothing has changed under. For eg.
3806 		 * a failover/failback can change ilg_ill while we were
3807 		 * waiting to become exclusive above
3808 		 */
3809 		if (IN6_IS_ADDR_V4MAPPED(&v6group)) {
3810 			ipif = ilg->ilg_ipif;
3811 			ill = ipif->ipif_ill;
3812 		} else {
3813 			ipif = NULL;
3814 			ill = ilg->ilg_ill;
3815 		}
3816 		if (!IAM_WRITER_ILL(ill) || (ilg->ilg_flags & ILG_DELETED)) {
3817 			/*
3818 			 * The ilg has changed under us probably due
3819 			 * to a failover or unplumb. Retry on the same ilg.
3820 			 */
3821 			mutex_exit(&connp->conn_lock);
3822 			ipsq_exit(ipsq, B_TRUE, B_TRUE);
3823 			mutex_enter(&connp->conn_lock);
3824 			continue;
3825 		}
3826 		v6group = ilg->ilg_v6group;
3827 		orig_ifindex = ilg->ilg_orig_ifindex;
3828 		ilg_delete(connp, ilg, NULL);
3829 		mutex_exit(&connp->conn_lock);
3830 
3831 		if (ipif != NULL)
3832 			(void) ip_delmulti(V4_PART_OF_V6(v6group), ipif,
3833 			    B_FALSE, B_TRUE);
3834 
3835 		else
3836 			(void) ip_delmulti_v6(&v6group, ill, orig_ifindex,
3837 			    connp->conn_zoneid, B_FALSE, B_TRUE);
3838 
3839 		ipsq_exit(ipsq, B_TRUE, B_TRUE);
3840 		mutex_enter(&connp->conn_lock);
3841 		/* Go to the next ilg */
3842 		i--;
3843 	}
3844 	ILG_WALKER_RELE(connp);
3845 
3846 	/* If any ill was skipped above wait and retry */
3847 	if (connp->conn_ilg_inuse != 0) {
3848 		cv_wait(&connp->conn_refcv, &connp->conn_lock);
3849 		goto retry;
3850 	}
3851 	mutex_exit(&connp->conn_lock);
3852 }
3853 
3854 /*
3855  * Called from ill close by ipcl_walk for clearing conn_ilg and
3856  * conn_multicast_ipif for a given ipif. conn is held by caller.
3857  * Note that ipcl_walk only walks conns that are not yet condemned.
3858  * condemned conns can't be refheld. For this reason, conn must become clean
3859  * first, i.e. it must not refer to any ill/ire/ipif and then only set
3860  * condemned flag.
3861  */
3862 static void
3863 conn_delete_ipif(conn_t *connp, caddr_t arg)
3864 {
3865 	ipif_t	*ipif = (ipif_t *)arg;
3866 	int	i;
3867 	char	group_buf1[INET6_ADDRSTRLEN];
3868 	char	group_buf2[INET6_ADDRSTRLEN];
3869 	ipaddr_t group;
3870 	ilg_t	*ilg;
3871 
3872 	/*
3873 	 * Even though conn_ilg_inuse can change while we are in this loop,
3874 	 * i.e.ilgs can be created or deleted on this connp, no new ilgs can
3875 	 * be created or deleted for this connp, on this ill, since this ill
3876 	 * is the perimeter. So we won't miss any ilg in this cleanup.
3877 	 */
3878 	mutex_enter(&connp->conn_lock);
3879 
3880 	/*
3881 	 * Increment the walker count, so that ilg repacking does not
3882 	 * occur while we are in the loop.
3883 	 */
3884 	ILG_WALKER_HOLD(connp);
3885 	for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) {
3886 		ilg = &connp->conn_ilg[i];
3887 		if (ilg->ilg_ipif != ipif || (ilg->ilg_flags & ILG_DELETED))
3888 			continue;
3889 		/*
3890 		 * ip_close cannot be cleaning this ilg at the same time.
3891 		 * since it also has to execute in this ill's perimeter which
3892 		 * we are now holding. Only a clean conn can be condemned.
3893 		 */
3894 		ASSERT(!(connp->conn_state_flags & CONN_CONDEMNED));
3895 
3896 		/* Blow away the membership */
3897 		ip1dbg(("conn_delete_ilg_ipif: %s on %s (%s)\n",
3898 		    inet_ntop(AF_INET6, &connp->conn_ilg[i].ilg_v6group,
3899 		    group_buf1, sizeof (group_buf1)),
3900 		    inet_ntop(AF_INET6, &ipif->ipif_v6lcl_addr,
3901 		    group_buf2, sizeof (group_buf2)),
3902 		    ipif->ipif_ill->ill_name));
3903 
3904 		/* ilg_ipif is NULL for V6, so we won't be here */
3905 		ASSERT(IN6_IS_ADDR_V4MAPPED(&ilg->ilg_v6group));
3906 
3907 		group = V4_PART_OF_V6(ilg->ilg_v6group);
3908 		ilg_delete(connp, &connp->conn_ilg[i], NULL);
3909 		mutex_exit(&connp->conn_lock);
3910 
3911 		(void) ip_delmulti(group, ipif, B_FALSE, B_TRUE);
3912 		mutex_enter(&connp->conn_lock);
3913 	}
3914 
3915 	/*
3916 	 * If we are the last walker, need to physically delete the
3917 	 * ilgs and repack.
3918 	 */
3919 	ILG_WALKER_RELE(connp);
3920 
3921 	if (connp->conn_multicast_ipif == ipif) {
3922 		/* Revert to late binding */
3923 		connp->conn_multicast_ipif = NULL;
3924 	}
3925 	mutex_exit(&connp->conn_lock);
3926 
3927 	conn_delete_ire(connp, (caddr_t)ipif);
3928 }
3929 
3930 /*
3931  * Called from ill close by ipcl_walk for clearing conn_ilg and
3932  * conn_multicast_ill for a given ill. conn is held by caller.
3933  * Note that ipcl_walk only walks conns that are not yet condemned.
3934  * condemned conns can't be refheld. For this reason, conn must become clean
3935  * first, i.e. it must not refer to any ill/ire/ipif and then only set
3936  * condemned flag.
3937  */
3938 static void
3939 conn_delete_ill(conn_t *connp, caddr_t arg)
3940 {
3941 	ill_t	*ill = (ill_t *)arg;
3942 	int	i;
3943 	char	group_buf[INET6_ADDRSTRLEN];
3944 	in6_addr_t v6group;
3945 	int	orig_ifindex;
3946 	ilg_t	*ilg;
3947 
3948 	/*
3949 	 * Even though conn_ilg_inuse can change while we are in this loop,
3950 	 * no new ilgs can be created/deleted for this connp, on this
3951 	 * ill, since this ill is the perimeter. So we won't miss any ilg
3952 	 * in this cleanup.
3953 	 */
3954 	mutex_enter(&connp->conn_lock);
3955 
3956 	/*
3957 	 * Increment the walker count, so that ilg repacking does not
3958 	 * occur while we are in the loop.
3959 	 */
3960 	ILG_WALKER_HOLD(connp);
3961 	for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) {
3962 		ilg = &connp->conn_ilg[i];
3963 		if ((ilg->ilg_ill == ill) && !(ilg->ilg_flags & ILG_DELETED)) {
3964 			/*
3965 			 * ip_close cannot be cleaning this ilg at the same
3966 			 * time, since it also has to execute in this ill's
3967 			 * perimeter which we are now holding. Only a clean
3968 			 * conn can be condemned.
3969 			 */
3970 			ASSERT(!(connp->conn_state_flags & CONN_CONDEMNED));
3971 
3972 			/* Blow away the membership */
3973 			ip1dbg(("conn_delete_ilg_ill: %s on %s\n",
3974 			    inet_ntop(AF_INET6, &ilg->ilg_v6group,
3975 			    group_buf, sizeof (group_buf)),
3976 			    ill->ill_name));
3977 
3978 			v6group = ilg->ilg_v6group;
3979 			orig_ifindex = ilg->ilg_orig_ifindex;
3980 			ilg_delete(connp, ilg, NULL);
3981 			mutex_exit(&connp->conn_lock);
3982 
3983 			(void) ip_delmulti_v6(&v6group, ill, orig_ifindex,
3984 			    connp->conn_zoneid, B_FALSE, B_TRUE);
3985 			mutex_enter(&connp->conn_lock);
3986 		}
3987 	}
3988 	/*
3989 	 * If we are the last walker, need to physically delete the
3990 	 * ilgs and repack.
3991 	 */
3992 	ILG_WALKER_RELE(connp);
3993 
3994 	if (connp->conn_multicast_ill == ill) {
3995 		/* Revert to late binding */
3996 		connp->conn_multicast_ill = NULL;
3997 		connp->conn_orig_multicast_ifindex = 0;
3998 	}
3999 	mutex_exit(&connp->conn_lock);
4000 }
4001 
4002 /*
4003  * Called when an ipif is unplumbed to make sure that there are no
4004  * dangling conn references to that ipif.
4005  * Handles ilg_ipif and conn_multicast_ipif
4006  */
4007 void
4008 reset_conn_ipif(ipif)
4009 	ipif_t	*ipif;
4010 {
4011 	ip_stack_t	*ipst = ipif->ipif_ill->ill_ipst;
4012 
4013 	ipcl_walk(conn_delete_ipif, (caddr_t)ipif, ipst);
4014 }
4015 
4016 /*
4017  * Called when an ill is unplumbed to make sure that there are no
4018  * dangling conn references to that ill.
4019  * Handles ilg_ill, conn_multicast_ill.
4020  */
4021 void
4022 reset_conn_ill(ill_t *ill)
4023 {
4024 	ip_stack_t	*ipst = ill->ill_ipst;
4025 
4026 	ipcl_walk(conn_delete_ill, (caddr_t)ill, ipst);
4027 }
4028 
4029 #ifdef DEBUG
4030 /*
4031  * Walk functions walk all the interfaces in the system to make
4032  * sure that there is no refernece to the ipif or ill that is
4033  * going away.
4034  */
4035 int
4036 ilm_walk_ill(ill_t *ill)
4037 {
4038 	int cnt = 0;
4039 	ill_t *till;
4040 	ilm_t *ilm;
4041 	ill_walk_context_t ctx;
4042 	ip_stack_t	*ipst = ill->ill_ipst;
4043 
4044 	rw_enter(&ipst->ips_ill_g_lock, RW_READER);
4045 	till = ILL_START_WALK_ALL(&ctx, ipst);
4046 	for (; till != NULL; till = ill_next(&ctx, till)) {
4047 		mutex_enter(&till->ill_lock);
4048 		for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
4049 			if (ilm->ilm_ill == ill) {
4050 				cnt++;
4051 			}
4052 		}
4053 		mutex_exit(&till->ill_lock);
4054 	}
4055 	rw_exit(&ipst->ips_ill_g_lock);
4056 
4057 	return (cnt);
4058 }
4059 
4060 /*
4061  * This function is called before the ipif is freed.
4062  */
4063 int
4064 ilm_walk_ipif(ipif_t *ipif)
4065 {
4066 	int cnt = 0;
4067 	ill_t *till;
4068 	ilm_t *ilm;
4069 	ill_walk_context_t ctx;
4070 	ip_stack_t	*ipst = ipif->ipif_ill->ill_ipst;
4071 
4072 	till = ILL_START_WALK_ALL(&ctx, ipst);
4073 	for (; till != NULL; till = ill_next(&ctx, till)) {
4074 		mutex_enter(&till->ill_lock);
4075 		for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
4076 			if (ilm->ilm_ipif == ipif) {
4077 					cnt++;
4078 			}
4079 		}
4080 		mutex_exit(&till->ill_lock);
4081 	}
4082 	return (cnt);
4083 }
4084 #endif
4085