xref: /titanic_52/usr/src/uts/common/inet/ip/ip_multi.c (revision d6c23f6fbecbcca8ddd2b74c6e10f37095f9fd46)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /* Copyright (c) 1990 Mentat Inc. */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/stream.h>
31 #include <sys/dlpi.h>
32 #include <sys/stropts.h>
33 #include <sys/strsun.h>
34 #include <sys/ddi.h>
35 #include <sys/cmn_err.h>
36 #include <sys/sdt.h>
37 #include <sys/zone.h>
38 
39 #include <sys/param.h>
40 #include <sys/socket.h>
41 #include <sys/sockio.h>
42 #include <net/if.h>
43 #include <sys/systm.h>
44 #include <sys/strsubr.h>
45 #include <net/route.h>
46 #include <netinet/in.h>
47 #include <net/if_dl.h>
48 #include <netinet/ip6.h>
49 #include <netinet/icmp6.h>
50 
51 #include <inet/common.h>
52 #include <inet/mi.h>
53 #include <inet/nd.h>
54 #include <inet/arp.h>
55 #include <inet/ip.h>
56 #include <inet/ip6.h>
57 #include <inet/ip_if.h>
58 #include <inet/ip_ndp.h>
59 #include <inet/ip_multi.h>
60 #include <inet/ipclassifier.h>
61 #include <inet/ipsec_impl.h>
62 #include <inet/sctp_ip.h>
63 #include <inet/ip_listutils.h>
64 #include <inet/udp_impl.h>
65 
66 /* igmpv3/mldv2 source filter manipulation */
67 static void	ilm_bld_flists(conn_t *conn, void *arg);
68 static void	ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode,
69     slist_t *flist);
70 
71 static ilm_t	*ilm_add_v6(ipif_t *ipif, const in6_addr_t *group,
72     ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist,
73     int orig_ifindex, zoneid_t zoneid);
74 static void	ilm_delete(ilm_t *ilm);
75 static int	ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *group);
76 static int	ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *group);
77 static ilg_t	*ilg_lookup_ill_index_v6(conn_t *connp,
78     const in6_addr_t *v6group, int index);
79 static ilg_t	*ilg_lookup_ipif(conn_t *connp, ipaddr_t group,
80     ipif_t *ipif);
81 static int	ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif,
82     mcast_record_t fmode, ipaddr_t src);
83 static int	ilg_add_v6(conn_t *connp, const in6_addr_t *group, ill_t *ill,
84     mcast_record_t fmode, const in6_addr_t *v6src);
85 static void	ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src);
86 static mblk_t	*ill_create_dl(ill_t *ill, uint32_t dl_primitive,
87     uint32_t length, uint32_t *addr_lenp, uint32_t *addr_offp);
88 static mblk_t	*ill_create_squery(ill_t *ill, ipaddr_t ipaddr,
89     uint32_t addrlen, uint32_t addroff, mblk_t *mp_tail);
90 static void	conn_ilg_reap(conn_t *connp);
91 static int	ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group,
92     ipif_t *ipif, mcast_record_t fmode, ipaddr_t src);
93 static int	ip_opt_delete_group_excl_v6(conn_t *connp,
94     const in6_addr_t *v6group, ill_t *ill, mcast_record_t fmode,
95     const in6_addr_t *v6src);
96 
97 /*
98  * MT notes:
99  *
100  * Multicast joins operate on both the ilg and ilm structures. Multiple
101  * threads operating on an conn (socket) trying to do multicast joins
102  * need to synchronize  when operating on the ilg. Multiple threads
103  * potentially operating on different conn (socket endpoints) trying to
104  * do multicast joins could eventually end up trying to manipulate the
105  * ilm simulatenously and need to synchronize on the access to the ilm.
106  * Both are amenable to standard Solaris MT techniques, but it would be
107  * complex to handle a failover or failback which needs to manipulate
108  * ilg/ilms if an applications can also simultaenously join/leave
109  * multicast groups. Hence multicast join/leave also go through the ipsq_t
110  * serialization.
111  *
112  * Multicast joins and leaves are single-threaded per phyint/IPMP group
113  * using the ipsq serialization mechanism.
114  *
115  * An ilm is an IP data structure used to track multicast join/leave.
116  * An ilm is associated with a <multicast group, ipif> tuple in IPv4 and
117  * with just <multicast group> in IPv6. ilm_refcnt is the number of ilg's
118  * referencing the ilm. ilms are created / destroyed only as writer. ilms
119  * are not passed around, instead they are looked up and used under the
120  * ill_lock or as writer. So we don't need a dynamic refcount of the number
121  * of threads holding reference to an ilm.
122  *
123  * Multicast Join operation:
124  *
125  * The first step is to determine the ipif (v4) or ill (v6) on which
126  * the join operation is to be done. The join is done after becoming
127  * exclusive on the ipsq associated with the ipif or ill. The conn->conn_ilg
128  * and ill->ill_ilm are thus accessed and modified exclusively per ill.
129  * Multiple threads can attempt to join simultaneously on different ipif/ill
130  * on the same conn. In this case the ipsq serialization does not help in
131  * protecting the ilg. It is the conn_lock that is used to protect the ilg.
132  * The conn_lock also protects all the ilg_t members.
133  *
134  * Leave operation.
135  *
136  * Similar to the join operation, the first step is to determine the ipif
137  * or ill (v6) on which the leave operation is to be done. The leave operation
138  * is done after becoming exclusive on the ipsq associated with the ipif or ill.
139  * As with join ilg modification is done under the protection of the conn lock.
140  */
141 
142 #define	IPSQ_ENTER_IPIF(ipif, connp, first_mp, func, ipsq, type)	\
143 	ASSERT(connp != NULL);					\
144 	(ipsq) = ipsq_try_enter((ipif), NULL, CONNP_TO_WQ(connp),	\
145 	    (first_mp), (func), (type), B_TRUE);		\
146 	if ((ipsq) == NULL) {					\
147 		ipif_refrele(ipif);				\
148 		return (EINPROGRESS);				\
149 	}
150 
151 #define	IPSQ_ENTER_ILL(ill, connp, first_mp, func, ipsq, type)	\
152 	ASSERT(connp != NULL);					\
153 	(ipsq) = ipsq_try_enter(NULL, ill, CONNP_TO_WQ(connp),	\
154 	    (first_mp),	(func), (type), B_TRUE);		\
155 	if ((ipsq) == NULL) {					\
156 		ill_refrele(ill);				\
157 		return (EINPROGRESS);				\
158 	}
159 
160 #define	IPSQ_EXIT(ipsq)	\
161 	if (ipsq != NULL)	\
162 		ipsq_exit(ipsq);
163 
164 #define	ILG_WALKER_HOLD(connp)	(connp)->conn_ilg_walker_cnt++
165 
166 #define	ILG_WALKER_RELE(connp)				\
167 	{						\
168 		(connp)->conn_ilg_walker_cnt--;		\
169 		if ((connp)->conn_ilg_walker_cnt == 0)	\
170 			conn_ilg_reap(connp);		\
171 	}
172 
173 static void
174 conn_ilg_reap(conn_t *connp)
175 {
176 	int	to;
177 	int	from;
178 	ilg_t	*ilg;
179 
180 	ASSERT(MUTEX_HELD(&connp->conn_lock));
181 
182 	to = 0;
183 	from = 0;
184 	while (from < connp->conn_ilg_inuse) {
185 		if (connp->conn_ilg[from].ilg_flags & ILG_DELETED) {
186 			ilg = &connp->conn_ilg[from];
187 			FREE_SLIST(ilg->ilg_filter);
188 			ilg->ilg_flags &= ~ILG_DELETED;
189 			from++;
190 			continue;
191 		}
192 		if (to != from)
193 			connp->conn_ilg[to] = connp->conn_ilg[from];
194 		to++;
195 		from++;
196 	}
197 
198 	connp->conn_ilg_inuse = to;
199 
200 	if (connp->conn_ilg_inuse == 0) {
201 		mi_free((char *)connp->conn_ilg);
202 		connp->conn_ilg = NULL;
203 		cv_broadcast(&connp->conn_refcv);
204 	}
205 }
206 
207 #define	GETSTRUCT(structure, number)	\
208 	((structure *)mi_zalloc(sizeof (structure) * (number)))
209 
210 #define	ILG_ALLOC_CHUNK	16
211 
212 /*
213  * Returns a pointer to the next available ilg in conn_ilg.  Allocs more
214  * buffers in size of ILG_ALLOC_CHUNK ilgs when needed, and updates conn's
215  * ilg tracking fields appropriately (conn_ilg_inuse reflects usage of the
216  * returned ilg).  Returns NULL on failure (ENOMEM).
217  *
218  * Assumes connp->conn_lock is held.
219  */
220 static ilg_t *
221 conn_ilg_alloc(conn_t *connp)
222 {
223 	ilg_t *new, *ret;
224 	int curcnt;
225 
226 	ASSERT(MUTEX_HELD(&connp->conn_lock));
227 	ASSERT(connp->conn_ilg_inuse <= connp->conn_ilg_allocated);
228 
229 	if (connp->conn_ilg == NULL) {
230 		connp->conn_ilg = GETSTRUCT(ilg_t, ILG_ALLOC_CHUNK);
231 		if (connp->conn_ilg == NULL)
232 			return (NULL);
233 		connp->conn_ilg_allocated = ILG_ALLOC_CHUNK;
234 		connp->conn_ilg_inuse = 0;
235 	}
236 	if (connp->conn_ilg_inuse == connp->conn_ilg_allocated) {
237 		if (connp->conn_ilg_walker_cnt != 0) {
238 			/*
239 			 * XXX We cannot grow the array at this point
240 			 * because a list walker could be in progress, and
241 			 * we cannot wipe out the existing array until the
242 			 * walker is done. Just return NULL for now.
243 			 * ilg_delete_all() will have to be changed when
244 			 * this logic is changed.
245 			 */
246 			return (NULL);
247 		}
248 		curcnt = connp->conn_ilg_allocated;
249 		new = GETSTRUCT(ilg_t, curcnt + ILG_ALLOC_CHUNK);
250 		if (new == NULL)
251 			return (NULL);
252 		bcopy(connp->conn_ilg, new, sizeof (ilg_t) * curcnt);
253 		mi_free((char *)connp->conn_ilg);
254 		connp->conn_ilg = new;
255 		connp->conn_ilg_allocated += ILG_ALLOC_CHUNK;
256 	}
257 
258 	ret = &connp->conn_ilg[connp->conn_ilg_inuse++];
259 	ASSERT((ret->ilg_flags & ILG_DELETED) == 0);
260 	bzero(ret, sizeof (*ret));
261 	return (ret);
262 }
263 
264 typedef struct ilm_fbld_s {
265 	ilm_t		*fbld_ilm;
266 	int		fbld_in_cnt;
267 	int		fbld_ex_cnt;
268 	slist_t		fbld_in;
269 	slist_t		fbld_ex;
270 	boolean_t	fbld_in_overflow;
271 } ilm_fbld_t;
272 
273 static void
274 ilm_bld_flists(conn_t *conn, void *arg)
275 {
276 	int i;
277 	ilm_fbld_t *fbld = (ilm_fbld_t *)(arg);
278 	ilm_t *ilm = fbld->fbld_ilm;
279 	in6_addr_t *v6group = &ilm->ilm_v6addr;
280 
281 	if (conn->conn_ilg_inuse == 0)
282 		return;
283 
284 	/*
285 	 * Since we can't break out of the ipcl_walk once started, we still
286 	 * have to look at every conn.  But if we've already found one
287 	 * (EXCLUDE, NULL) list, there's no need to keep checking individual
288 	 * ilgs--that will be our state.
289 	 */
290 	if (fbld->fbld_ex_cnt > 0 && fbld->fbld_ex.sl_numsrc == 0)
291 		return;
292 
293 	/*
294 	 * Check this conn's ilgs to see if any are interested in our
295 	 * ilm (group, interface match).  If so, update the master
296 	 * include and exclude lists we're building in the fbld struct
297 	 * with this ilg's filter info.
298 	 */
299 	mutex_enter(&conn->conn_lock);
300 	for (i = 0; i < conn->conn_ilg_inuse; i++) {
301 		ilg_t *ilg = &conn->conn_ilg[i];
302 		if ((ilg->ilg_ill == ilm->ilm_ill) &&
303 		    (ilg->ilg_ipif == ilm->ilm_ipif) &&
304 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) {
305 			if (ilg->ilg_fmode == MODE_IS_INCLUDE) {
306 				fbld->fbld_in_cnt++;
307 				if (!fbld->fbld_in_overflow)
308 					l_union_in_a(&fbld->fbld_in,
309 					    ilg->ilg_filter,
310 					    &fbld->fbld_in_overflow);
311 			} else {
312 				fbld->fbld_ex_cnt++;
313 				/*
314 				 * On the first exclude list, don't try to do
315 				 * an intersection, as the master exclude list
316 				 * is intentionally empty.  If the master list
317 				 * is still empty on later iterations, that
318 				 * means we have at least one ilg with an empty
319 				 * exclude list, so that should be reflected
320 				 * when we take the intersection.
321 				 */
322 				if (fbld->fbld_ex_cnt == 1) {
323 					if (ilg->ilg_filter != NULL)
324 						l_copy(ilg->ilg_filter,
325 						    &fbld->fbld_ex);
326 				} else {
327 					l_intersection_in_a(&fbld->fbld_ex,
328 					    ilg->ilg_filter);
329 				}
330 			}
331 			/* there will only be one match, so break now. */
332 			break;
333 		}
334 	}
335 	mutex_exit(&conn->conn_lock);
336 }
337 
338 static void
339 ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, slist_t *flist)
340 {
341 	ilm_fbld_t fbld;
342 	ip_stack_t *ipst = ilm->ilm_ipst;
343 
344 	fbld.fbld_ilm = ilm;
345 	fbld.fbld_in_cnt = fbld.fbld_ex_cnt = 0;
346 	fbld.fbld_in.sl_numsrc = fbld.fbld_ex.sl_numsrc = 0;
347 	fbld.fbld_in_overflow = B_FALSE;
348 
349 	/* first, construct our master include and exclude lists */
350 	ipcl_walk(ilm_bld_flists, (caddr_t)&fbld, ipst);
351 
352 	/* now use those master lists to generate the interface filter */
353 
354 	/* if include list overflowed, filter is (EXCLUDE, NULL) */
355 	if (fbld.fbld_in_overflow) {
356 		*fmode = MODE_IS_EXCLUDE;
357 		flist->sl_numsrc = 0;
358 		return;
359 	}
360 
361 	/* if nobody interested, interface filter is (INCLUDE, NULL) */
362 	if (fbld.fbld_in_cnt == 0 && fbld.fbld_ex_cnt == 0) {
363 		*fmode = MODE_IS_INCLUDE;
364 		flist->sl_numsrc = 0;
365 		return;
366 	}
367 
368 	/*
369 	 * If there are no exclude lists, then the interface filter
370 	 * is INCLUDE, with its filter list equal to fbld_in.  A single
371 	 * exclude list makes the interface filter EXCLUDE, with its
372 	 * filter list equal to (fbld_ex - fbld_in).
373 	 */
374 	if (fbld.fbld_ex_cnt == 0) {
375 		*fmode = MODE_IS_INCLUDE;
376 		l_copy(&fbld.fbld_in, flist);
377 	} else {
378 		*fmode = MODE_IS_EXCLUDE;
379 		l_difference(&fbld.fbld_ex, &fbld.fbld_in, flist);
380 	}
381 }
382 
383 /*
384  * If the given interface has failed, choose a new one to join on so
385  * that we continue to receive packets.  ilg_orig_ifindex remembers
386  * what the application used to join on so that we know the ilg to
387  * delete even though we change the ill here.  Callers will store the
388  * ilg returned from this function in ilg_ill.  Thus when we receive
389  * a packet on ilg_ill, conn_wantpacket_v6 will deliver the packets.
390  *
391  * This function must be called as writer so we can walk the group
392  * list and examine flags without holding a lock.
393  */
394 ill_t *
395 ip_choose_multi_ill(ill_t *ill, const in6_addr_t *grp)
396 {
397 	ill_t	*till;
398 	ill_group_t *illgrp = ill->ill_group;
399 
400 	ASSERT(IAM_WRITER_ILL(ill));
401 
402 	if (IN6_IS_ADDR_UNSPECIFIED(grp) || illgrp == NULL)
403 		return (ill);
404 
405 	if ((ill->ill_phyint->phyint_flags & (PHYI_FAILED|PHYI_INACTIVE)) == 0)
406 		return (ill);
407 
408 	till = illgrp->illgrp_ill;
409 	while (till != NULL &&
410 	    (till->ill_phyint->phyint_flags & (PHYI_FAILED|PHYI_INACTIVE))) {
411 		till = till->ill_group_next;
412 	}
413 	if (till != NULL)
414 		return (till);
415 
416 	return (ill);
417 }
418 
419 static int
420 ilm_update_add(ilm_t *ilm, ilg_stat_t ilgstat, slist_t *ilg_flist,
421     boolean_t isv6)
422 {
423 	mcast_record_t fmode;
424 	slist_t *flist;
425 	boolean_t fdefault;
426 	char buf[INET6_ADDRSTRLEN];
427 	ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill;
428 
429 	/*
430 	 * There are several cases where the ilm's filter state
431 	 * defaults to (EXCLUDE, NULL):
432 	 *	- we've had previous joins without associated ilgs
433 	 *	- this join has no associated ilg
434 	 *	- the ilg's filter state is (EXCLUDE, NULL)
435 	 */
436 	fdefault = (ilm->ilm_no_ilg_cnt > 0) ||
437 	    (ilgstat == ILGSTAT_NONE) || SLIST_IS_EMPTY(ilg_flist);
438 
439 	/* attempt mallocs (if needed) before doing anything else */
440 	if ((flist = l_alloc()) == NULL)
441 		return (ENOMEM);
442 	if (!fdefault && ilm->ilm_filter == NULL) {
443 		ilm->ilm_filter = l_alloc();
444 		if (ilm->ilm_filter == NULL) {
445 			l_free(flist);
446 			return (ENOMEM);
447 		}
448 	}
449 
450 	if (ilgstat != ILGSTAT_CHANGE)
451 		ilm->ilm_refcnt++;
452 
453 	if (ilgstat == ILGSTAT_NONE)
454 		ilm->ilm_no_ilg_cnt++;
455 
456 	/*
457 	 * Determine new filter state.  If it's not the default
458 	 * (EXCLUDE, NULL), we must walk the conn list to find
459 	 * any ilgs interested in this group, and re-build the
460 	 * ilm filter.
461 	 */
462 	if (fdefault) {
463 		fmode = MODE_IS_EXCLUDE;
464 		flist->sl_numsrc = 0;
465 	} else {
466 		ilm_gen_filter(ilm, &fmode, flist);
467 	}
468 
469 	/* make sure state actually changed; nothing to do if not. */
470 	if ((ilm->ilm_fmode == fmode) &&
471 	    !lists_are_different(ilm->ilm_filter, flist)) {
472 		l_free(flist);
473 		return (0);
474 	}
475 
476 	/* send the state change report */
477 	if (!IS_LOOPBACK(ill)) {
478 		if (isv6)
479 			mld_statechange(ilm, fmode, flist);
480 		else
481 			igmp_statechange(ilm, fmode, flist);
482 	}
483 
484 	/* update the ilm state */
485 	ilm->ilm_fmode = fmode;
486 	if (flist->sl_numsrc > 0)
487 		l_copy(flist, ilm->ilm_filter);
488 	else
489 		CLEAR_SLIST(ilm->ilm_filter);
490 
491 	ip1dbg(("ilm_update: new if filter mode %d, group %s\n", ilm->ilm_fmode,
492 	    inet_ntop(AF_INET6, &ilm->ilm_v6addr, buf, sizeof (buf))));
493 
494 	l_free(flist);
495 	return (0);
496 }
497 
498 static int
499 ilm_update_del(ilm_t *ilm, boolean_t isv6)
500 {
501 	mcast_record_t fmode;
502 	slist_t *flist;
503 	ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill;
504 
505 	ip1dbg(("ilm_update_del: still %d left; updating state\n",
506 	    ilm->ilm_refcnt));
507 
508 	if ((flist = l_alloc()) == NULL)
509 		return (ENOMEM);
510 
511 	/*
512 	 * If present, the ilg in question has already either been
513 	 * updated or removed from our list; so all we need to do
514 	 * now is walk the list to update the ilm filter state.
515 	 *
516 	 * Skip the list walk if we have any no-ilg joins, which
517 	 * cause the filter state to revert to (EXCLUDE, NULL).
518 	 */
519 	if (ilm->ilm_no_ilg_cnt != 0) {
520 		fmode = MODE_IS_EXCLUDE;
521 		flist->sl_numsrc = 0;
522 	} else {
523 		ilm_gen_filter(ilm, &fmode, flist);
524 	}
525 
526 	/* check to see if state needs to be updated */
527 	if ((ilm->ilm_fmode == fmode) &&
528 	    (!lists_are_different(ilm->ilm_filter, flist))) {
529 		l_free(flist);
530 		return (0);
531 	}
532 
533 	if (!IS_LOOPBACK(ill)) {
534 		if (isv6)
535 			mld_statechange(ilm, fmode, flist);
536 		else
537 			igmp_statechange(ilm, fmode, flist);
538 	}
539 
540 	ilm->ilm_fmode = fmode;
541 	if (flist->sl_numsrc > 0) {
542 		if (ilm->ilm_filter == NULL) {
543 			ilm->ilm_filter = l_alloc();
544 			if (ilm->ilm_filter == NULL) {
545 				char buf[INET6_ADDRSTRLEN];
546 				ip1dbg(("ilm_update_del: failed to alloc ilm "
547 				    "filter; no source filtering for %s on %s",
548 				    inet_ntop(AF_INET6, &ilm->ilm_v6addr,
549 				    buf, sizeof (buf)), ill->ill_name));
550 				ilm->ilm_fmode = MODE_IS_EXCLUDE;
551 				l_free(flist);
552 				return (0);
553 			}
554 		}
555 		l_copy(flist, ilm->ilm_filter);
556 	} else {
557 		CLEAR_SLIST(ilm->ilm_filter);
558 	}
559 
560 	l_free(flist);
561 	return (0);
562 }
563 
564 /*
565  * INADDR_ANY means all multicast addresses. This is only used
566  * by the multicast router.
567  * INADDR_ANY is stored as IPv6 unspecified addr.
568  */
569 int
570 ip_addmulti(ipaddr_t group, ipif_t *ipif, ilg_stat_t ilgstat,
571     mcast_record_t ilg_fmode, slist_t *ilg_flist)
572 {
573 	ill_t	*ill = ipif->ipif_ill;
574 	ilm_t 	*ilm;
575 	in6_addr_t v6group;
576 	int	ret;
577 
578 	ASSERT(IAM_WRITER_IPIF(ipif));
579 
580 	if (!CLASSD(group) && group != INADDR_ANY)
581 		return (EINVAL);
582 
583 	/*
584 	 * INADDR_ANY is represented as the IPv6 unspecifed addr.
585 	 */
586 	if (group == INADDR_ANY)
587 		v6group = ipv6_all_zeros;
588 	else
589 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
590 
591 	mutex_enter(&ill->ill_lock);
592 	ilm = ilm_lookup_ipif(ipif, group);
593 	mutex_exit(&ill->ill_lock);
594 	/*
595 	 * Since we are writer, we know the ilm_flags itself cannot
596 	 * change at this point, and ilm_lookup_ipif would not have
597 	 * returned a DELETED ilm. However, the data path can free
598 	 * ilm->next via ilm_walker_cleanup() so we can safely
599 	 * access anything in ilm except ilm_next (for safe access to
600 	 * ilm_next we'd have  to take the ill_lock).
601 	 */
602 	if (ilm != NULL)
603 		return (ilm_update_add(ilm, ilgstat, ilg_flist, B_FALSE));
604 
605 	/*
606 	 * ilms are associated with ipifs in IPv4. It moves with the
607 	 * ipif if the ipif moves to a new ill when the interface
608 	 * fails. Thus we really don't check whether the ipif_ill
609 	 * has failed like in IPv6. If it has FAILED the ipif
610 	 * will move (daemon will move it) and hence the ilm, if the
611 	 * ipif is not IPIF_NOFAILOVER. For the IPIF_NOFAILOVER ipifs,
612 	 * we continue to receive in the same place even if the
613 	 * interface fails.
614 	 */
615 	ilm = ilm_add_v6(ipif, &v6group, ilgstat, ilg_fmode, ilg_flist,
616 	    ill->ill_phyint->phyint_ifindex, ipif->ipif_zoneid);
617 	if (ilm == NULL)
618 		return (ENOMEM);
619 
620 	if (group == INADDR_ANY) {
621 		/*
622 		 * Check how many ipif's have members in this group -
623 		 * if more then one we should not tell the driver to join
624 		 * this time
625 		 */
626 		if (ilm_numentries_v6(ill, &v6group) > 1)
627 			return (0);
628 		if (ill->ill_group == NULL)
629 			ret = ip_join_allmulti(ipif);
630 		else
631 			ret = ill_nominate_mcast_rcv(ill->ill_group);
632 		if (ret != 0)
633 			ilm_delete(ilm);
634 		return (ret);
635 	}
636 
637 	if (!IS_LOOPBACK(ill))
638 		igmp_joingroup(ilm);
639 
640 	if (ilm_numentries_v6(ill, &v6group) > 1)
641 		return (0);
642 
643 	ret = ip_ll_addmulti_v6(ipif, &v6group);
644 	if (ret != 0)
645 		ilm_delete(ilm);
646 	return (ret);
647 }
648 
649 /*
650  * The unspecified address means all multicast addresses.
651  * This is only used by the multicast router.
652  *
653  * ill identifies the interface to join on; it may not match the
654  * interface requested by the application of a failover has taken
655  * place.  orig_ifindex always identifies the interface requested
656  * by the app.
657  *
658  * ilgstat tells us if there's an ilg associated with this join,
659  * and if so, if it's a new ilg or a change to an existing one.
660  * ilg_fmode and ilg_flist give us the current filter state of
661  * the ilg (and will be EXCLUDE {NULL} in the case of no ilg).
662  */
663 int
664 ip_addmulti_v6(const in6_addr_t *v6group, ill_t *ill, int orig_ifindex,
665     zoneid_t zoneid, ilg_stat_t ilgstat, mcast_record_t ilg_fmode,
666     slist_t *ilg_flist)
667 {
668 	ilm_t	*ilm;
669 	int	ret;
670 
671 	ASSERT(IAM_WRITER_ILL(ill));
672 
673 	if (!IN6_IS_ADDR_MULTICAST(v6group) &&
674 	    !IN6_IS_ADDR_UNSPECIFIED(v6group)) {
675 		return (EINVAL);
676 	}
677 
678 	/*
679 	 * An ilm is uniquely identified by the tuple of (group, ill,
680 	 * orig_ill).  group is the multicast group address, ill is
681 	 * the interface on which it is currently joined, and orig_ill
682 	 * is the interface on which the application requested the
683 	 * join.  orig_ill and ill are the same unless orig_ill has
684 	 * failed over.
685 	 *
686 	 * Both orig_ill and ill are required, which means we may have
687 	 * 2 ilms on an ill for the same group, but with different
688 	 * orig_ills.  These must be kept separate, so that when failback
689 	 * occurs, the appropriate ilms are moved back to their orig_ill
690 	 * without disrupting memberships on the ill to which they had
691 	 * been moved.
692 	 *
693 	 * In order to track orig_ill, we store orig_ifindex in the
694 	 * ilm and ilg.
695 	 */
696 	mutex_enter(&ill->ill_lock);
697 	ilm = ilm_lookup_ill_index_v6(ill, v6group, orig_ifindex, zoneid);
698 	mutex_exit(&ill->ill_lock);
699 	if (ilm != NULL)
700 		return (ilm_update_add(ilm, ilgstat, ilg_flist, B_TRUE));
701 
702 	/*
703 	 * We need to remember where the application really wanted
704 	 * to join. This will be used later if we want to failback
705 	 * to the original interface.
706 	 */
707 	ilm = ilm_add_v6(ill->ill_ipif, v6group, ilgstat, ilg_fmode,
708 	    ilg_flist, orig_ifindex, zoneid);
709 	if (ilm == NULL)
710 		return (ENOMEM);
711 
712 	if (IN6_IS_ADDR_UNSPECIFIED(v6group)) {
713 		/*
714 		 * Check how many ipif's that have members in this group -
715 		 * if more then one we should not tell the driver to join
716 		 * this time
717 		 */
718 		if (ilm_numentries_v6(ill, v6group) > 1)
719 			return (0);
720 		if (ill->ill_group == NULL)
721 			ret = ip_join_allmulti(ill->ill_ipif);
722 		else
723 			ret = ill_nominate_mcast_rcv(ill->ill_group);
724 
725 		if (ret != 0)
726 			ilm_delete(ilm);
727 		return (ret);
728 	}
729 
730 	if (!IS_LOOPBACK(ill))
731 		mld_joingroup(ilm);
732 
733 	/*
734 	 * If we have more then one we should not tell the driver
735 	 * to join this time.
736 	 */
737 	if (ilm_numentries_v6(ill, v6group) > 1)
738 		return (0);
739 
740 	ret = ip_ll_addmulti_v6(ill->ill_ipif, v6group);
741 	if (ret != 0)
742 		ilm_delete(ilm);
743 	return (ret);
744 }
745 
746 /*
747  * Send a multicast request to the driver for enabling multicast reception
748  * for v6groupp address. The caller has already checked whether it is
749  * appropriate to send one or not.
750  */
751 int
752 ip_ll_send_enabmulti_req(ill_t *ill, const in6_addr_t *v6groupp)
753 {
754 	mblk_t	*mp;
755 	uint32_t addrlen, addroff;
756 	char	group_buf[INET6_ADDRSTRLEN];
757 
758 	ASSERT(IAM_WRITER_ILL(ill));
759 
760 	/*
761 	 * Create a AR_ENTRY_SQUERY message with a dl_enabmulti_req tacked
762 	 * on.
763 	 */
764 	mp = ill_create_dl(ill, DL_ENABMULTI_REQ, sizeof (dl_enabmulti_req_t),
765 	    &addrlen, &addroff);
766 	if (!mp)
767 		return (ENOMEM);
768 	if (IN6_IS_ADDR_V4MAPPED(v6groupp)) {
769 		ipaddr_t v4group;
770 
771 		IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group);
772 		/*
773 		 * NOTE!!!
774 		 * The "addroff" passed in here was calculated by
775 		 * ill_create_dl(), and will be used by ill_create_squery()
776 		 * to perform some twisted coding magic. It is the offset
777 		 * into the dl_xxx_req of the hw addr. Here, it will be
778 		 * added to b_wptr - b_rptr to create a magic number that
779 		 * is not an offset into this squery mblk.
780 		 * The actual hardware address will be accessed only in the
781 		 * dl_xxx_req, not in the squery. More importantly,
782 		 * that hardware address can *only* be accessed in this
783 		 * mblk chain by calling mi_offset_param_c(), which uses
784 		 * the magic number in the squery hw offset field to go
785 		 * to the *next* mblk (the dl_xxx_req), subtract the
786 		 * (b_wptr - b_rptr), and find the actual offset into
787 		 * the dl_xxx_req.
788 		 * Any method that depends on using the
789 		 * offset field in the dl_disabmulti_req or squery
790 		 * to find either hardware address will similarly fail.
791 		 *
792 		 * Look in ar_entry_squery() in arp.c to see how this offset
793 		 * is used.
794 		 */
795 		mp = ill_create_squery(ill, v4group, addrlen, addroff, mp);
796 		if (!mp)
797 			return (ENOMEM);
798 		ip1dbg(("ip_ll_send_enabmulti_req: IPv4 putnext %s on %s\n",
799 		    inet_ntop(AF_INET6, v6groupp, group_buf,
800 		    sizeof (group_buf)),
801 		    ill->ill_name));
802 		putnext(ill->ill_rq, mp);
803 	} else {
804 		ip1dbg(("ip_ll_send_enabmulti_req: IPv6 ndp_mcastreq %s on"
805 		    " %s\n",
806 		    inet_ntop(AF_INET6, v6groupp, group_buf,
807 		    sizeof (group_buf)),
808 		    ill->ill_name));
809 		return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp));
810 	}
811 	return (0);
812 }
813 
814 /*
815  * Send a multicast request to the driver for enabling multicast
816  * membership for v6group if appropriate.
817  */
818 static int
819 ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *v6groupp)
820 {
821 	ill_t	*ill = ipif->ipif_ill;
822 
823 	ASSERT(IAM_WRITER_IPIF(ipif));
824 
825 	if (ill->ill_net_type != IRE_IF_RESOLVER ||
826 	    ipif->ipif_flags & IPIF_POINTOPOINT) {
827 		ip1dbg(("ip_ll_addmulti_v6: not resolver\n"));
828 		return (0);	/* Must be IRE_IF_NORESOLVER */
829 	}
830 
831 	if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) {
832 		ip1dbg(("ip_ll_addmulti_v6: MULTI_BCAST\n"));
833 		return (0);
834 	}
835 	if (!ill->ill_dl_up) {
836 		/*
837 		 * Nobody there. All multicast addresses will be re-joined
838 		 * when we get the DL_BIND_ACK bringing the interface up.
839 		 */
840 		ip1dbg(("ip_ll_addmulti_v6: nobody up\n"));
841 		return (0);
842 	}
843 	return (ip_ll_send_enabmulti_req(ill, v6groupp));
844 }
845 
846 /*
847  * INADDR_ANY means all multicast addresses. This is only used
848  * by the multicast router.
849  * INADDR_ANY is stored as the IPv6 unspecifed addr.
850  */
851 int
852 ip_delmulti(ipaddr_t group, ipif_t *ipif, boolean_t no_ilg, boolean_t leaving)
853 {
854 	ill_t	*ill = ipif->ipif_ill;
855 	ilm_t *ilm;
856 	in6_addr_t v6group;
857 	int	ret;
858 
859 	ASSERT(IAM_WRITER_IPIF(ipif));
860 
861 	if (!CLASSD(group) && group != INADDR_ANY)
862 		return (EINVAL);
863 
864 	/*
865 	 * INADDR_ANY is represented as the IPv6 unspecifed addr.
866 	 */
867 	if (group == INADDR_ANY)
868 		v6group = ipv6_all_zeros;
869 	else
870 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
871 
872 	/*
873 	 * Look for a match on the ipif.
874 	 * (IP_DROP_MEMBERSHIP specifies an ipif using an IP address).
875 	 */
876 	mutex_enter(&ill->ill_lock);
877 	ilm = ilm_lookup_ipif(ipif, group);
878 	mutex_exit(&ill->ill_lock);
879 	if (ilm == NULL)
880 		return (ENOENT);
881 
882 	/* Update counters */
883 	if (no_ilg)
884 		ilm->ilm_no_ilg_cnt--;
885 
886 	if (leaving)
887 		ilm->ilm_refcnt--;
888 
889 	if (ilm->ilm_refcnt > 0)
890 		return (ilm_update_del(ilm, B_FALSE));
891 
892 	if (group == INADDR_ANY) {
893 		ilm_delete(ilm);
894 		/*
895 		 * Check how many ipif's that have members in this group -
896 		 * if there are still some left then don't tell the driver
897 		 * to drop it.
898 		 */
899 		if (ilm_numentries_v6(ill, &v6group) != 0)
900 			return (0);
901 
902 		/*
903 		 * If we never joined, then don't leave.  This can happen
904 		 * if we're in an IPMP group, since only one ill per IPMP
905 		 * group receives all multicast packets.
906 		 */
907 		if (!ill->ill_join_allmulti) {
908 			ASSERT(ill->ill_group != NULL);
909 			return (0);
910 		}
911 
912 		ret = ip_leave_allmulti(ipif);
913 		if (ill->ill_group != NULL)
914 			(void) ill_nominate_mcast_rcv(ill->ill_group);
915 		return (ret);
916 	}
917 
918 	if (!IS_LOOPBACK(ill))
919 		igmp_leavegroup(ilm);
920 
921 	ilm_delete(ilm);
922 	/*
923 	 * Check how many ipif's that have members in this group -
924 	 * if there are still some left then don't tell the driver
925 	 * to drop it.
926 	 */
927 	if (ilm_numentries_v6(ill, &v6group) != 0)
928 		return (0);
929 	return (ip_ll_delmulti_v6(ipif, &v6group));
930 }
931 
932 /*
933  * The unspecified address means all multicast addresses.
934  * This is only used by the multicast router.
935  */
936 int
937 ip_delmulti_v6(const in6_addr_t *v6group, ill_t *ill, int orig_ifindex,
938     zoneid_t zoneid, boolean_t no_ilg, boolean_t leaving)
939 {
940 	ipif_t	*ipif;
941 	ilm_t *ilm;
942 	int	ret;
943 
944 	ASSERT(IAM_WRITER_ILL(ill));
945 
946 	if (!IN6_IS_ADDR_MULTICAST(v6group) &&
947 	    !IN6_IS_ADDR_UNSPECIFIED(v6group))
948 		return (EINVAL);
949 
950 	/*
951 	 * Look for a match on the ill.
952 	 * (IPV6_LEAVE_GROUP specifies an ill using an ifindex).
953 	 *
954 	 * Similar to ip_addmulti_v6, we should always look using
955 	 * the orig_ifindex.
956 	 *
957 	 * 1) If orig_ifindex is different from ill's ifindex
958 	 *    we should have an ilm with orig_ifindex created in
959 	 *    ip_addmulti_v6. We should delete that here.
960 	 *
961 	 * 2) If orig_ifindex is same as ill's ifindex, we should
962 	 *    not delete the ilm that is temporarily here because of
963 	 *    a FAILOVER. Those ilms will have a ilm_orig_ifindex
964 	 *    different from ill's ifindex.
965 	 *
966 	 * Thus, always lookup using orig_ifindex.
967 	 */
968 	mutex_enter(&ill->ill_lock);
969 	ilm = ilm_lookup_ill_index_v6(ill, v6group, orig_ifindex, zoneid);
970 	mutex_exit(&ill->ill_lock);
971 	if (ilm == NULL)
972 		return (ENOENT);
973 
974 	ASSERT(ilm->ilm_ill == ill);
975 
976 	ipif = ill->ill_ipif;
977 
978 	/* Update counters */
979 	if (no_ilg)
980 		ilm->ilm_no_ilg_cnt--;
981 
982 	if (leaving)
983 		ilm->ilm_refcnt--;
984 
985 	if (ilm->ilm_refcnt > 0)
986 		return (ilm_update_del(ilm, B_TRUE));
987 
988 	if (IN6_IS_ADDR_UNSPECIFIED(v6group)) {
989 		ilm_delete(ilm);
990 		/*
991 		 * Check how many ipif's that have members in this group -
992 		 * if there are still some left then don't tell the driver
993 		 * to drop it.
994 		 */
995 		if (ilm_numentries_v6(ill, v6group) != 0)
996 			return (0);
997 
998 		/*
999 		 * If we never joined, then don't leave.  This can happen
1000 		 * if we're in an IPMP group, since only one ill per IPMP
1001 		 * group receives all multicast packets.
1002 		 */
1003 		if (!ill->ill_join_allmulti) {
1004 			ASSERT(ill->ill_group != NULL);
1005 			return (0);
1006 		}
1007 
1008 		ret = ip_leave_allmulti(ipif);
1009 		if (ill->ill_group != NULL)
1010 			(void) ill_nominate_mcast_rcv(ill->ill_group);
1011 		return (ret);
1012 	}
1013 
1014 	if (!IS_LOOPBACK(ill))
1015 		mld_leavegroup(ilm);
1016 
1017 	ilm_delete(ilm);
1018 	/*
1019 	 * Check how many ipif's that have members in this group -
1020 	 * if there are still some left then don't tell the driver
1021 	 * to drop it.
1022 	 */
1023 	if (ilm_numentries_v6(ill, v6group) != 0)
1024 		return (0);
1025 	return (ip_ll_delmulti_v6(ipif, v6group));
1026 }
1027 
1028 /*
1029  * Send a multicast request to the driver for disabling multicast reception
1030  * for v6groupp address. The caller has already checked whether it is
1031  * appropriate to send one or not.
1032  */
1033 int
1034 ip_ll_send_disabmulti_req(ill_t *ill, const in6_addr_t *v6groupp)
1035 {
1036 	mblk_t	*mp;
1037 	char	group_buf[INET6_ADDRSTRLEN];
1038 	uint32_t	addrlen, addroff;
1039 
1040 	ASSERT(IAM_WRITER_ILL(ill));
1041 	/*
1042 	 * Create a AR_ENTRY_SQUERY message with a dl_disabmulti_req tacked
1043 	 * on.
1044 	 */
1045 	mp = ill_create_dl(ill, DL_DISABMULTI_REQ,
1046 	    sizeof (dl_disabmulti_req_t), &addrlen, &addroff);
1047 
1048 	if (!mp)
1049 		return (ENOMEM);
1050 
1051 	if (IN6_IS_ADDR_V4MAPPED(v6groupp)) {
1052 		ipaddr_t v4group;
1053 
1054 		IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group);
1055 		/*
1056 		 * NOTE!!!
1057 		 * The "addroff" passed in here was calculated by
1058 		 * ill_create_dl(), and will be used by ill_create_squery()
1059 		 * to perform some twisted coding magic. It is the offset
1060 		 * into the dl_xxx_req of the hw addr. Here, it will be
1061 		 * added to b_wptr - b_rptr to create a magic number that
1062 		 * is not an offset into this mblk.
1063 		 *
1064 		 * Please see the comment in ip_ll_send)enabmulti_req()
1065 		 * for a complete explanation.
1066 		 *
1067 		 * Look in ar_entry_squery() in arp.c to see how this offset
1068 		 * is used.
1069 		 */
1070 		mp = ill_create_squery(ill, v4group, addrlen, addroff, mp);
1071 		if (!mp)
1072 			return (ENOMEM);
1073 		ip1dbg(("ip_ll_send_disabmulti_req: IPv4 putnext %s on %s\n",
1074 		    inet_ntop(AF_INET6, v6groupp, group_buf,
1075 		    sizeof (group_buf)),
1076 		    ill->ill_name));
1077 		putnext(ill->ill_rq, mp);
1078 	} else {
1079 		ip1dbg(("ip_ll_send_disabmulti_req: IPv6 ndp_mcastreq %s on"
1080 		    " %s\n",
1081 		    inet_ntop(AF_INET6, v6groupp, group_buf,
1082 		    sizeof (group_buf)),
1083 		    ill->ill_name));
1084 		return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp));
1085 	}
1086 	return (0);
1087 }
1088 
1089 /*
1090  * Send a multicast request to the driver for disabling multicast
1091  * membership for v6group if appropriate.
1092  */
1093 static int
1094 ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *v6group)
1095 {
1096 	ill_t	*ill = ipif->ipif_ill;
1097 
1098 	ASSERT(IAM_WRITER_IPIF(ipif));
1099 
1100 	if (ill->ill_net_type != IRE_IF_RESOLVER ||
1101 	    ipif->ipif_flags & IPIF_POINTOPOINT) {
1102 		return (0);	/* Must be IRE_IF_NORESOLVER */
1103 	}
1104 	if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) {
1105 		ip1dbg(("ip_ll_delmulti_v6: MULTI_BCAST\n"));
1106 		return (0);
1107 	}
1108 	if (!ill->ill_dl_up) {
1109 		/*
1110 		 * Nobody there. All multicast addresses will be re-joined
1111 		 * when we get the DL_BIND_ACK bringing the interface up.
1112 		 */
1113 		ip1dbg(("ip_ll_delmulti_v6: nobody up\n"));
1114 		return (0);
1115 	}
1116 	return (ip_ll_send_disabmulti_req(ill, v6group));
1117 }
1118 
1119 /*
1120  * Make the driver pass up all multicast packets
1121  *
1122  * With ill groups, the caller makes sure that there is only
1123  * one ill joining the allmulti group.
1124  */
1125 int
1126 ip_join_allmulti(ipif_t *ipif)
1127 {
1128 	ill_t	*ill = ipif->ipif_ill;
1129 	mblk_t	*mp;
1130 	uint32_t	addrlen, addroff;
1131 
1132 	ASSERT(IAM_WRITER_IPIF(ipif));
1133 
1134 	if (!ill->ill_dl_up) {
1135 		/*
1136 		 * Nobody there. All multicast addresses will be re-joined
1137 		 * when we get the DL_BIND_ACK bringing the interface up.
1138 		 */
1139 		return (0);
1140 	}
1141 
1142 	ASSERT(!ill->ill_join_allmulti);
1143 
1144 	/*
1145 	 * Create a DL_PROMISCON_REQ message and send it directly to
1146 	 * the DLPI provider.  We don't need to do this for certain
1147 	 * media types for which we never need to turn promiscuous
1148 	 * mode on.
1149 	 */
1150 	if ((ill->ill_net_type == IRE_IF_RESOLVER) &&
1151 	    !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) {
1152 		mp = ill_create_dl(ill, DL_PROMISCON_REQ,
1153 		    sizeof (dl_promiscon_req_t), &addrlen, &addroff);
1154 		if (mp == NULL)
1155 			return (ENOMEM);
1156 		ill_dlpi_send(ill, mp);
1157 	}
1158 
1159 	ill->ill_join_allmulti = B_TRUE;
1160 	return (0);
1161 }
1162 
1163 /*
1164  * Make the driver stop passing up all multicast packets
1165  *
1166  * With ill groups, we need to nominate some other ill as
1167  * this ipif->ipif_ill is leaving the group.
1168  */
1169 int
1170 ip_leave_allmulti(ipif_t *ipif)
1171 {
1172 	ill_t	*ill = ipif->ipif_ill;
1173 	mblk_t	*mp;
1174 	uint32_t	addrlen, addroff;
1175 
1176 	ASSERT(IAM_WRITER_IPIF(ipif));
1177 
1178 	if (!ill->ill_dl_up) {
1179 		/*
1180 		 * Nobody there. All multicast addresses will be re-joined
1181 		 * when we get the DL_BIND_ACK bringing the interface up.
1182 		 */
1183 		return (0);
1184 	}
1185 
1186 	ASSERT(ill->ill_join_allmulti);
1187 
1188 	/*
1189 	 * Create a DL_PROMISCOFF_REQ message and send it directly to
1190 	 * the DLPI provider.  We don't need to do this for certain
1191 	 * media types for which we never need to turn promiscuous
1192 	 * mode on.
1193 	 */
1194 	if ((ill->ill_net_type == IRE_IF_RESOLVER) &&
1195 	    !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) {
1196 		mp = ill_create_dl(ill, DL_PROMISCOFF_REQ,
1197 		    sizeof (dl_promiscoff_req_t), &addrlen, &addroff);
1198 		if (mp == NULL)
1199 			return (ENOMEM);
1200 		ill_dlpi_send(ill, mp);
1201 	}
1202 
1203 	ill->ill_join_allmulti = B_FALSE;
1204 	return (0);
1205 }
1206 
1207 /*
1208  * Copy mp_orig and pass it in as a local message.
1209  */
1210 void
1211 ip_multicast_loopback(queue_t *q, ill_t *ill, mblk_t *mp_orig, int fanout_flags,
1212     zoneid_t zoneid)
1213 {
1214 	mblk_t	*mp;
1215 	mblk_t	*ipsec_mp;
1216 	ipha_t	*iph;
1217 	ip_stack_t *ipst = ill->ill_ipst;
1218 
1219 	if (DB_TYPE(mp_orig) == M_DATA &&
1220 	    ((ipha_t *)mp_orig->b_rptr)->ipha_protocol == IPPROTO_UDP) {
1221 		uint_t hdrsz;
1222 
1223 		hdrsz = IPH_HDR_LENGTH((ipha_t *)mp_orig->b_rptr) +
1224 		    sizeof (udpha_t);
1225 		ASSERT(MBLKL(mp_orig) >= hdrsz);
1226 
1227 		if (((mp = allocb(hdrsz, BPRI_MED)) != NULL) &&
1228 		    (mp_orig = dupmsg(mp_orig)) != NULL) {
1229 			bcopy(mp_orig->b_rptr, mp->b_rptr, hdrsz);
1230 			mp->b_wptr += hdrsz;
1231 			mp->b_cont = mp_orig;
1232 			mp_orig->b_rptr += hdrsz;
1233 			if (is_system_labeled() && DB_CRED(mp_orig) != NULL)
1234 				mblk_setcred(mp, DB_CRED(mp_orig));
1235 			if (MBLKL(mp_orig) == 0) {
1236 				mp->b_cont = mp_orig->b_cont;
1237 				mp_orig->b_cont = NULL;
1238 				freeb(mp_orig);
1239 			}
1240 		} else if (mp != NULL) {
1241 			freeb(mp);
1242 			mp = NULL;
1243 		}
1244 	} else {
1245 		mp = ip_copymsg(mp_orig); /* No refcnt on ipsec_out netstack */
1246 	}
1247 
1248 	if (mp == NULL)
1249 		return;
1250 	if (DB_TYPE(mp) == M_CTL) {
1251 		ipsec_mp = mp;
1252 		mp = mp->b_cont;
1253 	} else {
1254 		ipsec_mp = mp;
1255 	}
1256 
1257 	iph = (ipha_t *)mp->b_rptr;
1258 
1259 	/*
1260 	 * DTrace this as ip:::send.  A blocked packet will fire the send
1261 	 * probe, but not the receive probe.
1262 	 */
1263 	DTRACE_IP7(send, mblk_t *, ipsec_mp, conn_t *, NULL, void_ip_t *, iph,
1264 	    __dtrace_ipsr_ill_t *, ill, ipha_t *, iph, ip6_t *, NULL, int, 1);
1265 
1266 	DTRACE_PROBE4(ip4__loopback__out__start,
1267 	    ill_t *, NULL, ill_t *, ill,
1268 	    ipha_t *, iph, mblk_t *, ipsec_mp);
1269 
1270 	FW_HOOKS(ipst->ips_ip4_loopback_out_event,
1271 	    ipst->ips_ipv4firewall_loopback_out,
1272 	    NULL, ill, iph, ipsec_mp, mp, HPE_MULTICAST, ipst);
1273 
1274 	DTRACE_PROBE1(ip4__loopback__out__end, mblk_t *, ipsec_mp);
1275 
1276 	if (ipsec_mp != NULL)
1277 		ip_wput_local(q, ill, iph, ipsec_mp, NULL,
1278 		    fanout_flags, zoneid);
1279 }
1280 
1281 static area_t	ip_aresq_template = {
1282 	AR_ENTRY_SQUERY,		/* cmd */
1283 	sizeof (area_t)+IP_ADDR_LEN,	/* name offset */
1284 	sizeof (area_t),	/* name len (filled by ill_arp_alloc) */
1285 	IP_ARP_PROTO_TYPE,		/* protocol, from arps perspective */
1286 	sizeof (area_t),			/* proto addr offset */
1287 	IP_ADDR_LEN,			/* proto addr_length */
1288 	0,				/* proto mask offset */
1289 	/* Rest is initialized when used */
1290 	0,				/* flags */
1291 	0,				/* hw addr offset */
1292 	0,				/* hw addr length */
1293 };
1294 
1295 static mblk_t *
1296 ill_create_squery(ill_t *ill, ipaddr_t ipaddr, uint32_t addrlen,
1297     uint32_t addroff, mblk_t *mp_tail)
1298 {
1299 	mblk_t	*mp;
1300 	area_t	*area;
1301 
1302 	mp = ill_arp_alloc(ill, (uchar_t *)&ip_aresq_template,
1303 	    (caddr_t)&ipaddr);
1304 	if (!mp) {
1305 		freemsg(mp_tail);
1306 		return (NULL);
1307 	}
1308 	area = (area_t *)mp->b_rptr;
1309 	area->area_hw_addr_length = addrlen;
1310 	area->area_hw_addr_offset = mp->b_wptr - mp->b_rptr + addroff;
1311 	/*
1312 	 * NOTE!
1313 	 *
1314 	 * The area_hw_addr_offset, as can be seen, does not hold the
1315 	 * actual hardware address offset. Rather, it holds the offset
1316 	 * to the hw addr in the dl_xxx_req in mp_tail, modified by
1317 	 * adding (mp->b_wptr - mp->b_rptr). This allows the function
1318 	 * mi_offset_paramc() to find the hardware address in the
1319 	 * *second* mblk (dl_xxx_req), not this mblk.
1320 	 *
1321 	 * Using mi_offset_paramc() is thus the *only* way to access
1322 	 * the dl_xxx_hw address.
1323 	 *
1324 	 * The squery hw address should *not* be accessed.
1325 	 *
1326 	 * See ar_entry_squery() in arp.c for an example of how all this works.
1327 	 */
1328 
1329 	mp->b_cont = mp_tail;
1330 	return (mp);
1331 }
1332 
1333 /*
1334  * Create a DLPI message; for DL_{ENAB,DISAB}MULTI_REQ, room is left for
1335  * the hardware address.
1336  */
1337 static mblk_t *
1338 ill_create_dl(ill_t *ill, uint32_t dl_primitive, uint32_t length,
1339     uint32_t *addr_lenp, uint32_t *addr_offp)
1340 {
1341 	mblk_t	*mp;
1342 	uint32_t	hw_addr_length;
1343 	char		*cp;
1344 	uint32_t	offset;
1345 	uint32_t 	size;
1346 
1347 	*addr_lenp = *addr_offp = 0;
1348 
1349 	hw_addr_length = ill->ill_phys_addr_length;
1350 	if (!hw_addr_length) {
1351 		ip0dbg(("ip_create_dl: hw addr length = 0\n"));
1352 		return (NULL);
1353 	}
1354 
1355 	size = length;
1356 	switch (dl_primitive) {
1357 	case DL_ENABMULTI_REQ:
1358 	case DL_DISABMULTI_REQ:
1359 		size += hw_addr_length;
1360 		break;
1361 	case DL_PROMISCON_REQ:
1362 	case DL_PROMISCOFF_REQ:
1363 		break;
1364 	default:
1365 		return (NULL);
1366 	}
1367 	mp = allocb(size, BPRI_HI);
1368 	if (!mp)
1369 		return (NULL);
1370 	mp->b_wptr += size;
1371 	mp->b_datap->db_type = M_PROTO;
1372 
1373 	cp = (char *)mp->b_rptr;
1374 	offset = length;
1375 
1376 	switch (dl_primitive) {
1377 	case DL_ENABMULTI_REQ: {
1378 		dl_enabmulti_req_t *dl = (dl_enabmulti_req_t *)cp;
1379 
1380 		dl->dl_primitive = dl_primitive;
1381 		dl->dl_addr_offset = offset;
1382 		*addr_lenp = dl->dl_addr_length = hw_addr_length;
1383 		*addr_offp = offset;
1384 		break;
1385 	}
1386 	case DL_DISABMULTI_REQ: {
1387 		dl_disabmulti_req_t *dl = (dl_disabmulti_req_t *)cp;
1388 
1389 		dl->dl_primitive = dl_primitive;
1390 		dl->dl_addr_offset = offset;
1391 		*addr_lenp = dl->dl_addr_length = hw_addr_length;
1392 		*addr_offp = offset;
1393 		break;
1394 	}
1395 	case DL_PROMISCON_REQ:
1396 	case DL_PROMISCOFF_REQ: {
1397 		dl_promiscon_req_t *dl = (dl_promiscon_req_t *)cp;
1398 
1399 		dl->dl_primitive = dl_primitive;
1400 		dl->dl_level = DL_PROMISC_MULTI;
1401 		break;
1402 	}
1403 	}
1404 	ip1dbg(("ill_create_dl: addr_len %d, addr_off %d\n",
1405 	    *addr_lenp, *addr_offp));
1406 	return (mp);
1407 }
1408 
1409 /*
1410  * Writer processing for ip_wput_ctl(): send the DL_{ENAB,DISAB}MULTI_REQ
1411  * messages that had been delayed until we'd heard back from ARP.  One catch:
1412  * we need to ensure that no one else becomes writer on the IPSQ before we've
1413  * received the replies, or they'll incorrectly process our replies as part of
1414  * their unrelated IPSQ operation.  To do this, we start a new IPSQ operation,
1415  * which will complete when we process the reply in ip_rput_dlpi_writer().
1416  */
1417 /* ARGSUSED */
1418 static void
1419 ip_wput_ctl_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *arg)
1420 {
1421 	ill_t *ill = q->q_ptr;
1422 	t_uscalar_t prim = ((union DL_primitives *)mp->b_rptr)->dl_primitive;
1423 
1424 	ASSERT(IAM_WRITER_ILL(ill));
1425 	ASSERT(prim == DL_ENABMULTI_REQ || prim == DL_DISABMULTI_REQ);
1426 	ip1dbg(("ip_wput_ctl_writer: %s\n", dl_primstr(prim)));
1427 
1428 	if (prim == DL_ENABMULTI_REQ) {
1429 		/* Track the state if this is the first enabmulti */
1430 		if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN)
1431 			ill->ill_dlpi_multicast_state = IDS_INPROGRESS;
1432 	}
1433 
1434 	ipsq_current_start(ipsq, ill->ill_ipif, 0);
1435 	ill_dlpi_send(ill, mp);
1436 }
1437 
1438 void
1439 ip_wput_ctl(queue_t *q, mblk_t *mp)
1440 {
1441 	ill_t	*ill = q->q_ptr;
1442 	mblk_t	*dlmp = mp->b_cont;
1443 	area_t	*area = (area_t *)mp->b_rptr;
1444 	t_uscalar_t prim;
1445 
1446 	/* Check that we have an AR_ENTRY_SQUERY with a tacked on mblk */
1447 	if (MBLKL(mp) < sizeof (area_t) || area->area_cmd != AR_ENTRY_SQUERY ||
1448 	    dlmp == NULL) {
1449 		putnext(q, mp);
1450 		return;
1451 	}
1452 
1453 	/* Check that the tacked on mblk is a DL_{DISAB,ENAB}MULTI_REQ */
1454 	prim = ((union DL_primitives *)dlmp->b_rptr)->dl_primitive;
1455 	if (prim != DL_DISABMULTI_REQ && prim != DL_ENABMULTI_REQ) {
1456 		putnext(q, mp);
1457 		return;
1458 	}
1459 	freeb(mp);
1460 
1461 	/* See comments above ip_wput_ctl_writer() for details */
1462 	ill_refhold(ill);
1463 	qwriter_ip(ill, ill->ill_wq, dlmp, ip_wput_ctl_writer, NEW_OP, B_FALSE);
1464 }
1465 
1466 /*
1467  * Rejoin any groups which have been explicitly joined by the application (we
1468  * left all explicitly joined groups as part of ill_leave_multicast() prior to
1469  * bringing the interface down).  Note that because groups can be joined and
1470  * left while an interface is down, this may not be the same set of groups
1471  * that we left in ill_leave_multicast().
1472  */
1473 void
1474 ill_recover_multicast(ill_t *ill)
1475 {
1476 	ilm_t	*ilm;
1477 	char    addrbuf[INET6_ADDRSTRLEN];
1478 
1479 	ASSERT(IAM_WRITER_ILL(ill));
1480 	ILM_WALKER_HOLD(ill);
1481 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1482 		/*
1483 		 * Check how many ipif's that have members in this group -
1484 		 * if more then one we make sure that this entry is first
1485 		 * in the list.
1486 		 */
1487 		if (ilm_numentries_v6(ill, &ilm->ilm_v6addr) > 1 &&
1488 		    ilm_lookup_ill_v6(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm)
1489 			continue;
1490 		ip1dbg(("ill_recover_multicast: %s\n",
1491 		    inet_ntop(AF_INET6, &ilm->ilm_v6addr, addrbuf,
1492 		    sizeof (addrbuf))));
1493 		if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
1494 			if (ill->ill_group == NULL) {
1495 				(void) ip_join_allmulti(ill->ill_ipif);
1496 			} else {
1497 				/*
1498 				 * We don't want to join on this ill,
1499 				 * if somebody else in the group has
1500 				 * already been nominated.
1501 				 */
1502 				(void) ill_nominate_mcast_rcv(ill->ill_group);
1503 			}
1504 		} else {
1505 			(void) ip_ll_addmulti_v6(ill->ill_ipif,
1506 			    &ilm->ilm_v6addr);
1507 		}
1508 	}
1509 	ILM_WALKER_RELE(ill);
1510 }
1511 
1512 /*
1513  * The opposite of ill_recover_multicast() -- leaves all multicast groups
1514  * that were explicitly joined.  Note that both these functions could be
1515  * disposed of if we enhanced ARP to allow us to handle DL_DISABMULTI_REQ
1516  * and DL_ENABMULTI_REQ messages when an interface is down.
1517  */
1518 void
1519 ill_leave_multicast(ill_t *ill)
1520 {
1521 	ilm_t	*ilm;
1522 	char    addrbuf[INET6_ADDRSTRLEN];
1523 
1524 	ASSERT(IAM_WRITER_ILL(ill));
1525 	ILM_WALKER_HOLD(ill);
1526 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1527 		/*
1528 		 * Check how many ipif's that have members in this group -
1529 		 * if more then one we make sure that this entry is first
1530 		 * in the list.
1531 		 */
1532 		if (ilm_numentries_v6(ill, &ilm->ilm_v6addr) > 1 &&
1533 		    ilm_lookup_ill_v6(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm)
1534 			continue;
1535 		ip1dbg(("ill_leave_multicast: %s\n",
1536 		    inet_ntop(AF_INET6, &ilm->ilm_v6addr, addrbuf,
1537 		    sizeof (addrbuf))));
1538 		if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
1539 			(void) ip_leave_allmulti(ill->ill_ipif);
1540 			/*
1541 			 * If we were part of an IPMP group, then
1542 			 * ill_handoff_responsibility() has already
1543 			 * nominated a new member (so we don't).
1544 			 */
1545 			ASSERT(ill->ill_group == NULL);
1546 		} else {
1547 			(void) ip_ll_delmulti_v6(ill->ill_ipif,
1548 			    &ilm->ilm_v6addr);
1549 		}
1550 	}
1551 	ILM_WALKER_RELE(ill);
1552 }
1553 
1554 /* Find an ilm for matching the ill */
1555 ilm_t *
1556 ilm_lookup_ill(ill_t *ill, ipaddr_t group, zoneid_t zoneid)
1557 {
1558 	in6_addr_t	v6group;
1559 
1560 	ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock));
1561 	/*
1562 	 * INADDR_ANY is represented as the IPv6 unspecifed addr.
1563 	 */
1564 	if (group == INADDR_ANY)
1565 		v6group = ipv6_all_zeros;
1566 	else
1567 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1568 
1569 	return (ilm_lookup_ill_v6(ill, &v6group, zoneid));
1570 }
1571 
1572 /*
1573  * Find an ilm for matching the ill. All the ilm lookup functions
1574  * ignore ILM_DELETED ilms. These have been logically deleted, and
1575  * igmp and linklayer disable multicast have been done. Only mi_free
1576  * yet to be done. Still there in the list due to ilm_walkers. The
1577  * last walker will release it.
1578  */
1579 ilm_t *
1580 ilm_lookup_ill_v6(ill_t *ill, const in6_addr_t *v6group, zoneid_t zoneid)
1581 {
1582 	ilm_t	*ilm;
1583 
1584 	ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock));
1585 
1586 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1587 		if (ilm->ilm_flags & ILM_DELETED)
1588 			continue;
1589 		if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) &&
1590 		    (zoneid == ALL_ZONES || zoneid == ilm->ilm_zoneid))
1591 			return (ilm);
1592 	}
1593 	return (NULL);
1594 }
1595 
1596 ilm_t *
1597 ilm_lookup_ill_index_v6(ill_t *ill, const in6_addr_t *v6group, int index,
1598     zoneid_t zoneid)
1599 {
1600 	ilm_t *ilm;
1601 
1602 	ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock));
1603 
1604 	for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
1605 		if (ilm->ilm_flags & ILM_DELETED)
1606 			continue;
1607 		if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) &&
1608 		    (zoneid == ALL_ZONES || zoneid == ilm->ilm_zoneid) &&
1609 		    ilm->ilm_orig_ifindex == index) {
1610 			return (ilm);
1611 		}
1612 	}
1613 	return (NULL);
1614 }
1615 
1616 
1617 /*
1618  * Found an ilm for the ipif. Only needed for IPv4 which does
1619  * ipif specific socket options.
1620  */
1621 ilm_t *
1622 ilm_lookup_ipif(ipif_t *ipif, ipaddr_t group)
1623 {
1624 	ill_t	*ill = ipif->ipif_ill;
1625 	ilm_t	*ilm;
1626 	in6_addr_t	v6group;
1627 
1628 	ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock));
1629 	/*
1630 	 * INADDR_ANY is represented as the IPv6 unspecifed addr.
1631 	 */
1632 	if (group == INADDR_ANY)
1633 		v6group = ipv6_all_zeros;
1634 	else
1635 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1636 
1637 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1638 		if (ilm->ilm_flags & ILM_DELETED)
1639 			continue;
1640 		if (ilm->ilm_ipif == ipif &&
1641 		    IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, &v6group))
1642 			return (ilm);
1643 	}
1644 	return (NULL);
1645 }
1646 
1647 /*
1648  * How many members on this ill?
1649  */
1650 int
1651 ilm_numentries_v6(ill_t *ill, const in6_addr_t *v6group)
1652 {
1653 	ilm_t	*ilm;
1654 	int i = 0;
1655 
1656 	mutex_enter(&ill->ill_lock);
1657 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1658 		if (ilm->ilm_flags & ILM_DELETED)
1659 			continue;
1660 		if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group)) {
1661 			i++;
1662 		}
1663 	}
1664 	mutex_exit(&ill->ill_lock);
1665 	return (i);
1666 }
1667 
1668 /* Caller guarantees that the group is not already on the list */
1669 static ilm_t *
1670 ilm_add_v6(ipif_t *ipif, const in6_addr_t *v6group, ilg_stat_t ilgstat,
1671     mcast_record_t ilg_fmode, slist_t *ilg_flist, int orig_ifindex,
1672     zoneid_t zoneid)
1673 {
1674 	ill_t	*ill = ipif->ipif_ill;
1675 	ilm_t	*ilm;
1676 	ilm_t	*ilm_cur;
1677 	ilm_t	**ilm_ptpn;
1678 
1679 	ASSERT(IAM_WRITER_IPIF(ipif));
1680 
1681 	ilm = GETSTRUCT(ilm_t, 1);
1682 	if (ilm == NULL)
1683 		return (NULL);
1684 	if (ilgstat != ILGSTAT_NONE && !SLIST_IS_EMPTY(ilg_flist)) {
1685 		ilm->ilm_filter = l_alloc();
1686 		if (ilm->ilm_filter == NULL) {
1687 			mi_free(ilm);
1688 			return (NULL);
1689 		}
1690 	}
1691 	ilm->ilm_v6addr = *v6group;
1692 	ilm->ilm_refcnt = 1;
1693 	ilm->ilm_zoneid = zoneid;
1694 	ilm->ilm_timer = INFINITY;
1695 	ilm->ilm_rtx.rtx_timer = INFINITY;
1696 
1697 	/*
1698 	 * IPv4 Multicast groups are joined using ipif.
1699 	 * IPv6 Multicast groups are joined using ill.
1700 	 */
1701 	if (ill->ill_isv6) {
1702 		ilm->ilm_ill = ill;
1703 		ilm->ilm_ipif = NULL;
1704 		DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ill,
1705 		    (char *), "ilm", (void *), ilm);
1706 		ill->ill_ilm_cnt++;
1707 	} else {
1708 		ASSERT(ilm->ilm_zoneid == ipif->ipif_zoneid);
1709 		ilm->ilm_ipif = ipif;
1710 		ilm->ilm_ill = NULL;
1711 		DTRACE_PROBE3(ipif__incr__cnt, (ipif_t *), ipif,
1712 		    (char *), "ilm", (void *), ilm);
1713 		ipif->ipif_ilm_cnt++;
1714 	}
1715 	ASSERT(ill->ill_ipst);
1716 	ilm->ilm_ipst = ill->ill_ipst;	/* No netstack_hold */
1717 
1718 	/*
1719 	 * After this if ilm moves to a new ill, we don't change
1720 	 * the ilm_orig_ifindex. Thus, if ill_index != ilm_orig_ifindex,
1721 	 * it has been moved. Indexes don't match even when the application
1722 	 * wants to join on a FAILED/INACTIVE interface because we choose
1723 	 * a new interface to join in. This is considered as an implicit
1724 	 * move.
1725 	 */
1726 	ilm->ilm_orig_ifindex = orig_ifindex;
1727 
1728 	ASSERT(!(ipif->ipif_state_flags & IPIF_CONDEMNED));
1729 	ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED));
1730 
1731 	/*
1732 	 * Grab lock to give consistent view to readers
1733 	 */
1734 	mutex_enter(&ill->ill_lock);
1735 	/*
1736 	 * All ilms in the same zone are contiguous in the ill_ilm list.
1737 	 * The loops in ip_proto_input() and ip_wput_local() use this to avoid
1738 	 * sending duplicates up when two applications in the same zone join the
1739 	 * same group on different logical interfaces.
1740 	 */
1741 	ilm_cur = ill->ill_ilm;
1742 	ilm_ptpn = &ill->ill_ilm;
1743 	while (ilm_cur != NULL && ilm_cur->ilm_zoneid != ilm->ilm_zoneid) {
1744 		ilm_ptpn = &ilm_cur->ilm_next;
1745 		ilm_cur = ilm_cur->ilm_next;
1746 	}
1747 	ilm->ilm_next = ilm_cur;
1748 	*ilm_ptpn = ilm;
1749 
1750 	/*
1751 	 * If we have an associated ilg, use its filter state; if not,
1752 	 * default to (EXCLUDE, NULL) and set no_ilg_cnt to track this.
1753 	 */
1754 	if (ilgstat != ILGSTAT_NONE) {
1755 		if (!SLIST_IS_EMPTY(ilg_flist))
1756 			l_copy(ilg_flist, ilm->ilm_filter);
1757 		ilm->ilm_fmode = ilg_fmode;
1758 	} else {
1759 		ilm->ilm_no_ilg_cnt = 1;
1760 		ilm->ilm_fmode = MODE_IS_EXCLUDE;
1761 	}
1762 
1763 	mutex_exit(&ill->ill_lock);
1764 	return (ilm);
1765 }
1766 
1767 void
1768 ilm_inactive(ilm_t *ilm)
1769 {
1770 	FREE_SLIST(ilm->ilm_filter);
1771 	FREE_SLIST(ilm->ilm_pendsrcs);
1772 	FREE_SLIST(ilm->ilm_rtx.rtx_allow);
1773 	FREE_SLIST(ilm->ilm_rtx.rtx_block);
1774 	ilm->ilm_ipst = NULL;
1775 	mi_free((char *)ilm);
1776 }
1777 
1778 void
1779 ilm_walker_cleanup(ill_t *ill)
1780 {
1781 	ilm_t	**ilmp;
1782 	ilm_t	*ilm;
1783 	boolean_t need_wakeup = B_FALSE;
1784 
1785 	ASSERT(MUTEX_HELD(&ill->ill_lock));
1786 	ASSERT(ill->ill_ilm_walker_cnt == 0);
1787 
1788 	ilmp = &ill->ill_ilm;
1789 	while (*ilmp != NULL) {
1790 		if ((*ilmp)->ilm_flags & ILM_DELETED) {
1791 			ilm = *ilmp;
1792 			*ilmp = ilm->ilm_next;
1793 			/*
1794 			 * check if there are any pending FREE or unplumb
1795 			 * operations that need to be restarted.
1796 			 */
1797 			if (ilm->ilm_ipif != NULL) {
1798 				/*
1799 				 * IPv4 ilms hold a ref on the ipif.
1800 				 */
1801 				DTRACE_PROBE3(ipif__decr__cnt,
1802 				    (ipif_t *), ilm->ilm_ipif,
1803 				    (char *), "ilm", (void *), ilm);
1804 				ilm->ilm_ipif->ipif_ilm_cnt--;
1805 				if (IPIF_FREE_OK(ilm->ilm_ipif))
1806 					need_wakeup = B_TRUE;
1807 			} else {
1808 				/*
1809 				 * IPv6 ilms hold a ref on the ill.
1810 				 */
1811 				ASSERT(ilm->ilm_ill == ill);
1812 				DTRACE_PROBE3(ill__decr__cnt,
1813 				    (ill_t *), ill,
1814 				    (char *), "ilm", (void *), ilm);
1815 				ASSERT(ill->ill_ilm_cnt > 0);
1816 				ill->ill_ilm_cnt--;
1817 				if (ILL_FREE_OK(ill))
1818 					need_wakeup = B_TRUE;
1819 			}
1820 			ilm_inactive(ilm); /* frees ilm */
1821 		} else {
1822 			ilmp = &(*ilmp)->ilm_next;
1823 		}
1824 	}
1825 	ill->ill_ilm_cleanup_reqd = 0;
1826 	if (need_wakeup)
1827 		ipif_ill_refrele_tail(ill);
1828 	else
1829 		mutex_exit(&ill->ill_lock);
1830 }
1831 
1832 /*
1833  * Unlink ilm and free it.
1834  */
1835 static void
1836 ilm_delete(ilm_t *ilm)
1837 {
1838 	ill_t		*ill;
1839 	ilm_t		**ilmp;
1840 	boolean_t	need_wakeup;
1841 
1842 
1843 	if (ilm->ilm_ipif != NULL) {
1844 		ASSERT(IAM_WRITER_IPIF(ilm->ilm_ipif));
1845 		ASSERT(ilm->ilm_ill == NULL);
1846 		ill = ilm->ilm_ipif->ipif_ill;
1847 		ASSERT(!ill->ill_isv6);
1848 	} else {
1849 		ASSERT(IAM_WRITER_ILL(ilm->ilm_ill));
1850 		ASSERT(ilm->ilm_ipif == NULL);
1851 		ill = ilm->ilm_ill;
1852 		ASSERT(ill->ill_isv6);
1853 	}
1854 	/*
1855 	 * Delete under lock protection so that readers don't stumble
1856 	 * on bad ilm_next
1857 	 */
1858 	mutex_enter(&ill->ill_lock);
1859 	if (ill->ill_ilm_walker_cnt != 0) {
1860 		ilm->ilm_flags |= ILM_DELETED;
1861 		ill->ill_ilm_cleanup_reqd = 1;
1862 		mutex_exit(&ill->ill_lock);
1863 		return;
1864 	}
1865 
1866 	for (ilmp = &ill->ill_ilm; *ilmp != ilm; ilmp = &(*ilmp)->ilm_next)
1867 				;
1868 	*ilmp = ilm->ilm_next;
1869 
1870 	/*
1871 	 * if we are the last reference to the ipif (for IPv4 ilms)
1872 	 * or the ill (for IPv6 ilms), we may need to wakeup any
1873 	 * pending FREE or unplumb operations.
1874 	 */
1875 	need_wakeup = B_FALSE;
1876 	if (ilm->ilm_ipif != NULL) {
1877 		DTRACE_PROBE3(ipif__decr__cnt, (ipif_t *), ilm->ilm_ipif,
1878 		    (char *), "ilm", (void *), ilm);
1879 		ilm->ilm_ipif->ipif_ilm_cnt--;
1880 		if (IPIF_FREE_OK(ilm->ilm_ipif))
1881 			need_wakeup = B_TRUE;
1882 	} else {
1883 		DTRACE_PROBE3(ill__decr__cnt, (ill_t *), ill,
1884 		    (char *), "ilm", (void *), ilm);
1885 		ASSERT(ill->ill_ilm_cnt > 0);
1886 		ill->ill_ilm_cnt--;
1887 		if (ILL_FREE_OK(ill))
1888 			need_wakeup = B_TRUE;
1889 	}
1890 
1891 	ilm_inactive(ilm); /* frees this ilm */
1892 
1893 	if (need_wakeup) {
1894 		/* drops ill lock */
1895 		ipif_ill_refrele_tail(ill);
1896 	} else {
1897 		mutex_exit(&ill->ill_lock);
1898 	}
1899 }
1900 
1901 
1902 /*
1903  * Looks up the appropriate ipif given a v4 multicast group and interface
1904  * address.  On success, returns 0, with *ipifpp pointing to the found
1905  * struct.  On failure, returns an errno and *ipifpp is NULL.
1906  */
1907 int
1908 ip_opt_check(conn_t *connp, ipaddr_t group, ipaddr_t src, ipaddr_t ifaddr,
1909     uint_t *ifindexp, mblk_t *first_mp, ipsq_func_t func, ipif_t **ipifpp)
1910 {
1911 	ipif_t *ipif;
1912 	int err = 0;
1913 	zoneid_t zoneid;
1914 	ip_stack_t	*ipst =  connp->conn_netstack->netstack_ip;
1915 
1916 	if (!CLASSD(group) || CLASSD(src)) {
1917 		return (EINVAL);
1918 	}
1919 	*ipifpp = NULL;
1920 
1921 	zoneid = IPCL_ZONEID(connp);
1922 
1923 	ASSERT(!(ifaddr != INADDR_ANY && ifindexp != NULL && *ifindexp != 0));
1924 	if (ifaddr != INADDR_ANY) {
1925 		ipif = ipif_lookup_addr(ifaddr, NULL, zoneid,
1926 		    CONNP_TO_WQ(connp), first_mp, func, &err, ipst);
1927 		if (err != 0 && err != EINPROGRESS)
1928 			err = EADDRNOTAVAIL;
1929 	} else if (ifindexp != NULL && *ifindexp != 0) {
1930 		ipif = ipif_lookup_on_ifindex(*ifindexp, B_FALSE, zoneid,
1931 		    CONNP_TO_WQ(connp), first_mp, func, &err, ipst);
1932 	} else {
1933 		ipif = ipif_lookup_group(group, zoneid, ipst);
1934 		if (ipif == NULL)
1935 			return (EADDRNOTAVAIL);
1936 	}
1937 	if (ipif == NULL)
1938 		return (err);
1939 
1940 	*ipifpp = ipif;
1941 	return (0);
1942 }
1943 
1944 /*
1945  * Looks up the appropriate ill (or ipif if v4mapped) given an interface
1946  * index and IPv6 multicast group.  On success, returns 0, with *illpp (or
1947  * *ipifpp if v4mapped) pointing to the found struct.  On failure, returns
1948  * an errno and *illpp and *ipifpp are undefined.
1949  */
1950 int
1951 ip_opt_check_v6(conn_t *connp, const in6_addr_t *v6group, ipaddr_t *v4group,
1952     const in6_addr_t *v6src, ipaddr_t *v4src, boolean_t *isv6, int ifindex,
1953     mblk_t *first_mp, ipsq_func_t func, ill_t **illpp, ipif_t **ipifpp)
1954 {
1955 	boolean_t src_unspec;
1956 	ill_t *ill = NULL;
1957 	ipif_t *ipif = NULL;
1958 	int err;
1959 	zoneid_t zoneid = connp->conn_zoneid;
1960 	queue_t *wq = CONNP_TO_WQ(connp);
1961 	ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
1962 
1963 	src_unspec = IN6_IS_ADDR_UNSPECIFIED(v6src);
1964 
1965 	if (IN6_IS_ADDR_V4MAPPED(v6group)) {
1966 		if (!IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec)
1967 			return (EINVAL);
1968 		IN6_V4MAPPED_TO_IPADDR(v6group, *v4group);
1969 		if (src_unspec) {
1970 			*v4src = INADDR_ANY;
1971 		} else {
1972 			IN6_V4MAPPED_TO_IPADDR(v6src, *v4src);
1973 		}
1974 		if (!CLASSD(*v4group) || CLASSD(*v4src))
1975 			return (EINVAL);
1976 		*ipifpp = NULL;
1977 		*isv6 = B_FALSE;
1978 	} else {
1979 		if (IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec)
1980 			return (EINVAL);
1981 		if (!IN6_IS_ADDR_MULTICAST(v6group) ||
1982 		    IN6_IS_ADDR_MULTICAST(v6src)) {
1983 			return (EINVAL);
1984 		}
1985 		*illpp = NULL;
1986 		*isv6 = B_TRUE;
1987 	}
1988 
1989 	if (ifindex == 0) {
1990 		if (*isv6)
1991 			ill = ill_lookup_group_v6(v6group, zoneid, ipst);
1992 		else
1993 			ipif = ipif_lookup_group(*v4group, zoneid, ipst);
1994 		if (ill == NULL && ipif == NULL)
1995 			return (EADDRNOTAVAIL);
1996 	} else {
1997 		if (*isv6) {
1998 			ill = ill_lookup_on_ifindex(ifindex, B_TRUE,
1999 			    wq, first_mp, func, &err, ipst);
2000 			if (ill != NULL &&
2001 			    !ipif_lookup_zoneid(ill, zoneid, 0, NULL)) {
2002 				ill_refrele(ill);
2003 				ill = NULL;
2004 				err = EADDRNOTAVAIL;
2005 			}
2006 		} else {
2007 			ipif = ipif_lookup_on_ifindex(ifindex, B_FALSE,
2008 			    zoneid, wq, first_mp, func, &err, ipst);
2009 		}
2010 		if (ill == NULL && ipif == NULL)
2011 			return (err);
2012 	}
2013 
2014 	*ipifpp = ipif;
2015 	*illpp = ill;
2016 	return (0);
2017 }
2018 
2019 static int
2020 ip_get_srcfilter(conn_t *connp, struct group_filter *gf,
2021     struct ip_msfilter *imsf, ipaddr_t grp, ipif_t *ipif, boolean_t isv4mapped)
2022 {
2023 	ilg_t *ilg;
2024 	int i, numsrc, fmode, outsrcs;
2025 	struct sockaddr_in *sin;
2026 	struct sockaddr_in6 *sin6;
2027 	struct in_addr *addrp;
2028 	slist_t *fp;
2029 	boolean_t is_v4only_api;
2030 
2031 	mutex_enter(&connp->conn_lock);
2032 
2033 	ilg = ilg_lookup_ipif(connp, grp, ipif);
2034 	if (ilg == NULL) {
2035 		mutex_exit(&connp->conn_lock);
2036 		return (EADDRNOTAVAIL);
2037 	}
2038 
2039 	if (gf == NULL) {
2040 		ASSERT(imsf != NULL);
2041 		ASSERT(!isv4mapped);
2042 		is_v4only_api = B_TRUE;
2043 		outsrcs = imsf->imsf_numsrc;
2044 	} else {
2045 		ASSERT(imsf == NULL);
2046 		is_v4only_api = B_FALSE;
2047 		outsrcs = gf->gf_numsrc;
2048 	}
2049 
2050 	/*
2051 	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
2052 	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
2053 	 * So we need to translate here.
2054 	 */
2055 	fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ?
2056 	    MCAST_INCLUDE : MCAST_EXCLUDE;
2057 	if ((fp = ilg->ilg_filter) == NULL) {
2058 		numsrc = 0;
2059 	} else {
2060 		for (i = 0; i < outsrcs; i++) {
2061 			if (i == fp->sl_numsrc)
2062 				break;
2063 			if (isv4mapped) {
2064 				sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i];
2065 				sin6->sin6_family = AF_INET6;
2066 				sin6->sin6_addr = fp->sl_addr[i];
2067 			} else {
2068 				if (is_v4only_api) {
2069 					addrp = &imsf->imsf_slist[i];
2070 				} else {
2071 					sin = (struct sockaddr_in *)
2072 					    &gf->gf_slist[i];
2073 					sin->sin_family = AF_INET;
2074 					addrp = &sin->sin_addr;
2075 				}
2076 				IN6_V4MAPPED_TO_INADDR(&fp->sl_addr[i], addrp);
2077 			}
2078 		}
2079 		numsrc = fp->sl_numsrc;
2080 	}
2081 
2082 	if (is_v4only_api) {
2083 		imsf->imsf_numsrc = numsrc;
2084 		imsf->imsf_fmode = fmode;
2085 	} else {
2086 		gf->gf_numsrc = numsrc;
2087 		gf->gf_fmode = fmode;
2088 	}
2089 
2090 	mutex_exit(&connp->conn_lock);
2091 
2092 	return (0);
2093 }
2094 
2095 static int
2096 ip_get_srcfilter_v6(conn_t *connp, struct group_filter *gf,
2097     const struct in6_addr *grp, ill_t *ill)
2098 {
2099 	ilg_t *ilg;
2100 	int i;
2101 	struct sockaddr_storage *sl;
2102 	struct sockaddr_in6 *sin6;
2103 	slist_t *fp;
2104 
2105 	mutex_enter(&connp->conn_lock);
2106 
2107 	ilg = ilg_lookup_ill_v6(connp, grp, ill);
2108 	if (ilg == NULL) {
2109 		mutex_exit(&connp->conn_lock);
2110 		return (EADDRNOTAVAIL);
2111 	}
2112 
2113 	/*
2114 	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
2115 	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
2116 	 * So we need to translate here.
2117 	 */
2118 	gf->gf_fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ?
2119 	    MCAST_INCLUDE : MCAST_EXCLUDE;
2120 	if ((fp = ilg->ilg_filter) == NULL) {
2121 		gf->gf_numsrc = 0;
2122 	} else {
2123 		for (i = 0, sl = gf->gf_slist; i < gf->gf_numsrc; i++, sl++) {
2124 			if (i == fp->sl_numsrc)
2125 				break;
2126 			sin6 = (struct sockaddr_in6 *)sl;
2127 			sin6->sin6_family = AF_INET6;
2128 			sin6->sin6_addr = fp->sl_addr[i];
2129 		}
2130 		gf->gf_numsrc = fp->sl_numsrc;
2131 	}
2132 
2133 	mutex_exit(&connp->conn_lock);
2134 
2135 	return (0);
2136 }
2137 
2138 static int
2139 ip_set_srcfilter(conn_t *connp, struct group_filter *gf,
2140     struct ip_msfilter *imsf, ipaddr_t grp, ipif_t *ipif, boolean_t isv4mapped)
2141 {
2142 	ilg_t *ilg;
2143 	int i, err, infmode, new_fmode;
2144 	uint_t insrcs;
2145 	struct sockaddr_in *sin;
2146 	struct sockaddr_in6 *sin6;
2147 	struct in_addr *addrp;
2148 	slist_t *orig_filter = NULL;
2149 	slist_t *new_filter = NULL;
2150 	mcast_record_t orig_fmode;
2151 	boolean_t leave_grp, is_v4only_api;
2152 	ilg_stat_t ilgstat;
2153 
2154 	if (gf == NULL) {
2155 		ASSERT(imsf != NULL);
2156 		ASSERT(!isv4mapped);
2157 		is_v4only_api = B_TRUE;
2158 		insrcs = imsf->imsf_numsrc;
2159 		infmode = imsf->imsf_fmode;
2160 	} else {
2161 		ASSERT(imsf == NULL);
2162 		is_v4only_api = B_FALSE;
2163 		insrcs = gf->gf_numsrc;
2164 		infmode = gf->gf_fmode;
2165 	}
2166 
2167 	/* Make sure we can handle the source list */
2168 	if (insrcs > MAX_FILTER_SIZE)
2169 		return (ENOBUFS);
2170 
2171 	/*
2172 	 * setting the filter to (INCLUDE, NULL) is treated
2173 	 * as a request to leave the group.
2174 	 */
2175 	leave_grp = (infmode == MCAST_INCLUDE && insrcs == 0);
2176 
2177 	ASSERT(IAM_WRITER_IPIF(ipif));
2178 
2179 	mutex_enter(&connp->conn_lock);
2180 
2181 	ilg = ilg_lookup_ipif(connp, grp, ipif);
2182 	if (ilg == NULL) {
2183 		/*
2184 		 * if the request was actually to leave, and we
2185 		 * didn't find an ilg, there's nothing to do.
2186 		 */
2187 		if (!leave_grp)
2188 			ilg = conn_ilg_alloc(connp);
2189 		if (leave_grp || ilg == NULL) {
2190 			mutex_exit(&connp->conn_lock);
2191 			return (leave_grp ? 0 : ENOMEM);
2192 		}
2193 		ilgstat = ILGSTAT_NEW;
2194 		IN6_IPADDR_TO_V4MAPPED(grp, &ilg->ilg_v6group);
2195 		ilg->ilg_ipif = ipif;
2196 		ilg->ilg_ill = NULL;
2197 		ilg->ilg_orig_ifindex = 0;
2198 	} else if (leave_grp) {
2199 		ilg_delete(connp, ilg, NULL);
2200 		mutex_exit(&connp->conn_lock);
2201 		(void) ip_delmulti(grp, ipif, B_FALSE, B_TRUE);
2202 		return (0);
2203 	} else {
2204 		ilgstat = ILGSTAT_CHANGE;
2205 		/* Preserve existing state in case ip_addmulti() fails */
2206 		orig_fmode = ilg->ilg_fmode;
2207 		if (ilg->ilg_filter == NULL) {
2208 			orig_filter = NULL;
2209 		} else {
2210 			orig_filter = l_alloc_copy(ilg->ilg_filter);
2211 			if (orig_filter == NULL) {
2212 				mutex_exit(&connp->conn_lock);
2213 				return (ENOMEM);
2214 			}
2215 		}
2216 	}
2217 
2218 	/*
2219 	 * Alloc buffer to copy new state into (see below) before
2220 	 * we make any changes, so we can bail if it fails.
2221 	 */
2222 	if ((new_filter = l_alloc()) == NULL) {
2223 		mutex_exit(&connp->conn_lock);
2224 		err = ENOMEM;
2225 		goto free_and_exit;
2226 	}
2227 
2228 	if (insrcs == 0) {
2229 		CLEAR_SLIST(ilg->ilg_filter);
2230 	} else {
2231 		slist_t *fp;
2232 		if (ilg->ilg_filter == NULL) {
2233 			fp = l_alloc();
2234 			if (fp == NULL) {
2235 				if (ilgstat == ILGSTAT_NEW)
2236 					ilg_delete(connp, ilg, NULL);
2237 				mutex_exit(&connp->conn_lock);
2238 				err = ENOMEM;
2239 				goto free_and_exit;
2240 			}
2241 		} else {
2242 			fp = ilg->ilg_filter;
2243 		}
2244 		for (i = 0; i < insrcs; i++) {
2245 			if (isv4mapped) {
2246 				sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i];
2247 				fp->sl_addr[i] = sin6->sin6_addr;
2248 			} else {
2249 				if (is_v4only_api) {
2250 					addrp = &imsf->imsf_slist[i];
2251 				} else {
2252 					sin = (struct sockaddr_in *)
2253 					    &gf->gf_slist[i];
2254 					addrp = &sin->sin_addr;
2255 				}
2256 				IN6_INADDR_TO_V4MAPPED(addrp, &fp->sl_addr[i]);
2257 			}
2258 		}
2259 		fp->sl_numsrc = insrcs;
2260 		ilg->ilg_filter = fp;
2261 	}
2262 	/*
2263 	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
2264 	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
2265 	 * So we need to translate here.
2266 	 */
2267 	ilg->ilg_fmode = (infmode == MCAST_INCLUDE) ?
2268 	    MODE_IS_INCLUDE : MODE_IS_EXCLUDE;
2269 
2270 	/*
2271 	 * Save copy of ilg's filter state to pass to other functions,
2272 	 * so we can release conn_lock now.
2273 	 */
2274 	new_fmode = ilg->ilg_fmode;
2275 	l_copy(ilg->ilg_filter, new_filter);
2276 
2277 	mutex_exit(&connp->conn_lock);
2278 
2279 	err = ip_addmulti(grp, ipif, ilgstat, new_fmode, new_filter);
2280 	if (err != 0) {
2281 		/*
2282 		 * Restore the original filter state, or delete the
2283 		 * newly-created ilg.  We need to look up the ilg
2284 		 * again, though, since we've not been holding the
2285 		 * conn_lock.
2286 		 */
2287 		mutex_enter(&connp->conn_lock);
2288 		ilg = ilg_lookup_ipif(connp, grp, ipif);
2289 		ASSERT(ilg != NULL);
2290 		if (ilgstat == ILGSTAT_NEW) {
2291 			ilg_delete(connp, ilg, NULL);
2292 		} else {
2293 			ilg->ilg_fmode = orig_fmode;
2294 			if (SLIST_IS_EMPTY(orig_filter)) {
2295 				CLEAR_SLIST(ilg->ilg_filter);
2296 			} else {
2297 				/*
2298 				 * We didn't free the filter, even if we
2299 				 * were trying to make the source list empty;
2300 				 * so if orig_filter isn't empty, the ilg
2301 				 * must still have a filter alloc'd.
2302 				 */
2303 				l_copy(orig_filter, ilg->ilg_filter);
2304 			}
2305 		}
2306 		mutex_exit(&connp->conn_lock);
2307 	}
2308 
2309 free_and_exit:
2310 	l_free(orig_filter);
2311 	l_free(new_filter);
2312 
2313 	return (err);
2314 }
2315 
2316 static int
2317 ip_set_srcfilter_v6(conn_t *connp, struct group_filter *gf,
2318     const struct in6_addr *grp, ill_t *ill)
2319 {
2320 	ilg_t *ilg;
2321 	int i, orig_ifindex, orig_fmode, new_fmode, err;
2322 	slist_t *orig_filter = NULL;
2323 	slist_t *new_filter = NULL;
2324 	struct sockaddr_storage *sl;
2325 	struct sockaddr_in6 *sin6;
2326 	boolean_t leave_grp;
2327 	ilg_stat_t ilgstat;
2328 
2329 	/* Make sure we can handle the source list */
2330 	if (gf->gf_numsrc > MAX_FILTER_SIZE)
2331 		return (ENOBUFS);
2332 
2333 	/*
2334 	 * setting the filter to (INCLUDE, NULL) is treated
2335 	 * as a request to leave the group.
2336 	 */
2337 	leave_grp = (gf->gf_fmode == MCAST_INCLUDE && gf->gf_numsrc == 0);
2338 
2339 	ASSERT(IAM_WRITER_ILL(ill));
2340 
2341 	/*
2342 	 * Use the ifindex to do the lookup.  We can't use the ill
2343 	 * directly because ilg_ill could point to a different ill
2344 	 * if things have moved.
2345 	 */
2346 	orig_ifindex = ill->ill_phyint->phyint_ifindex;
2347 
2348 	mutex_enter(&connp->conn_lock);
2349 	ilg = ilg_lookup_ill_index_v6(connp, grp, orig_ifindex);
2350 	if (ilg == NULL) {
2351 		/*
2352 		 * if the request was actually to leave, and we
2353 		 * didn't find an ilg, there's nothing to do.
2354 		 */
2355 		if (!leave_grp)
2356 			ilg = conn_ilg_alloc(connp);
2357 		if (leave_grp || ilg == NULL) {
2358 			mutex_exit(&connp->conn_lock);
2359 			return (leave_grp ? 0 : ENOMEM);
2360 		}
2361 		ilgstat = ILGSTAT_NEW;
2362 		ilg->ilg_v6group = *grp;
2363 		ilg->ilg_ipif = NULL;
2364 		/*
2365 		 * Choose our target ill to join on. This might be
2366 		 * different from the ill we've been given if it's
2367 		 * currently down and part of a group.
2368 		 *
2369 		 * new ill is not refheld; we are writer.
2370 		 */
2371 		ill = ip_choose_multi_ill(ill, grp);
2372 		ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED));
2373 		ilg->ilg_ill = ill;
2374 		/*
2375 		 * Remember the index that we joined on, so that we can
2376 		 * successfully delete them later on and also search for
2377 		 * duplicates if the application wants to join again.
2378 		 */
2379 		ilg->ilg_orig_ifindex = orig_ifindex;
2380 	} else if (leave_grp) {
2381 		/*
2382 		 * Use the ilg's current ill for the deletion,
2383 		 * we might have failed over.
2384 		 */
2385 		ill = ilg->ilg_ill;
2386 		ilg_delete(connp, ilg, NULL);
2387 		mutex_exit(&connp->conn_lock);
2388 		(void) ip_delmulti_v6(grp, ill, orig_ifindex,
2389 		    connp->conn_zoneid, B_FALSE, B_TRUE);
2390 		return (0);
2391 	} else {
2392 		ilgstat = ILGSTAT_CHANGE;
2393 		/*
2394 		 * The current ill might be different from the one we were
2395 		 * asked to join on (if failover has occurred); we should
2396 		 * join on the ill stored in the ilg.  The original ill
2397 		 * is noted in ilg_orig_ifindex, which matched our request.
2398 		 */
2399 		ill = ilg->ilg_ill;
2400 		/* preserve existing state in case ip_addmulti() fails */
2401 		orig_fmode = ilg->ilg_fmode;
2402 		if (ilg->ilg_filter == NULL) {
2403 			orig_filter = NULL;
2404 		} else {
2405 			orig_filter = l_alloc_copy(ilg->ilg_filter);
2406 			if (orig_filter == NULL) {
2407 				mutex_exit(&connp->conn_lock);
2408 				return (ENOMEM);
2409 			}
2410 		}
2411 	}
2412 
2413 	/*
2414 	 * Alloc buffer to copy new state into (see below) before
2415 	 * we make any changes, so we can bail if it fails.
2416 	 */
2417 	if ((new_filter = l_alloc()) == NULL) {
2418 		mutex_exit(&connp->conn_lock);
2419 		err = ENOMEM;
2420 		goto free_and_exit;
2421 	}
2422 
2423 	if (gf->gf_numsrc == 0) {
2424 		CLEAR_SLIST(ilg->ilg_filter);
2425 	} else {
2426 		slist_t *fp;
2427 		if (ilg->ilg_filter == NULL) {
2428 			fp = l_alloc();
2429 			if (fp == NULL) {
2430 				if (ilgstat == ILGSTAT_NEW)
2431 					ilg_delete(connp, ilg, NULL);
2432 				mutex_exit(&connp->conn_lock);
2433 				err = ENOMEM;
2434 				goto free_and_exit;
2435 			}
2436 		} else {
2437 			fp = ilg->ilg_filter;
2438 		}
2439 		for (i = 0, sl = gf->gf_slist; i < gf->gf_numsrc; i++, sl++) {
2440 			sin6 = (struct sockaddr_in6 *)sl;
2441 			fp->sl_addr[i] = sin6->sin6_addr;
2442 		}
2443 		fp->sl_numsrc = gf->gf_numsrc;
2444 		ilg->ilg_filter = fp;
2445 	}
2446 	/*
2447 	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
2448 	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
2449 	 * So we need to translate here.
2450 	 */
2451 	ilg->ilg_fmode = (gf->gf_fmode == MCAST_INCLUDE) ?
2452 	    MODE_IS_INCLUDE : MODE_IS_EXCLUDE;
2453 
2454 	/*
2455 	 * Save copy of ilg's filter state to pass to other functions,
2456 	 * so we can release conn_lock now.
2457 	 */
2458 	new_fmode = ilg->ilg_fmode;
2459 	l_copy(ilg->ilg_filter, new_filter);
2460 
2461 	mutex_exit(&connp->conn_lock);
2462 
2463 	err = ip_addmulti_v6(grp, ill, orig_ifindex, connp->conn_zoneid,
2464 	    ilgstat, new_fmode, new_filter);
2465 	if (err != 0) {
2466 		/*
2467 		 * Restore the original filter state, or delete the
2468 		 * newly-created ilg.  We need to look up the ilg
2469 		 * again, though, since we've not been holding the
2470 		 * conn_lock.
2471 		 */
2472 		mutex_enter(&connp->conn_lock);
2473 		ilg = ilg_lookup_ill_index_v6(connp, grp, orig_ifindex);
2474 		ASSERT(ilg != NULL);
2475 		if (ilgstat == ILGSTAT_NEW) {
2476 			ilg_delete(connp, ilg, NULL);
2477 		} else {
2478 			ilg->ilg_fmode = orig_fmode;
2479 			if (SLIST_IS_EMPTY(orig_filter)) {
2480 				CLEAR_SLIST(ilg->ilg_filter);
2481 			} else {
2482 				/*
2483 				 * We didn't free the filter, even if we
2484 				 * were trying to make the source list empty;
2485 				 * so if orig_filter isn't empty, the ilg
2486 				 * must still have a filter alloc'd.
2487 				 */
2488 				l_copy(orig_filter, ilg->ilg_filter);
2489 			}
2490 		}
2491 		mutex_exit(&connp->conn_lock);
2492 	}
2493 
2494 free_and_exit:
2495 	l_free(orig_filter);
2496 	l_free(new_filter);
2497 
2498 	return (err);
2499 }
2500 
2501 /*
2502  * Process the SIOC[GS]MSFILTER and SIOC[GS]IPMSFILTER ioctls.
2503  */
2504 /* ARGSUSED */
2505 int
2506 ip_sioctl_msfilter(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp,
2507     ip_ioctl_cmd_t *ipip, void *ifreq)
2508 {
2509 	struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
2510 	/* existence verified in ip_wput_nondata() */
2511 	mblk_t *data_mp = mp->b_cont->b_cont;
2512 	int datalen, err, cmd, minsize;
2513 	uint_t expsize = 0;
2514 	conn_t *connp;
2515 	boolean_t isv6, is_v4only_api, getcmd;
2516 	struct sockaddr_in *gsin;
2517 	struct sockaddr_in6 *gsin6;
2518 	ipaddr_t v4grp;
2519 	in6_addr_t v6grp;
2520 	struct group_filter *gf = NULL;
2521 	struct ip_msfilter *imsf = NULL;
2522 	mblk_t *ndp;
2523 
2524 	if (data_mp->b_cont != NULL) {
2525 		if ((ndp = msgpullup(data_mp, -1)) == NULL)
2526 			return (ENOMEM);
2527 		freemsg(data_mp);
2528 		data_mp = ndp;
2529 		mp->b_cont->b_cont = data_mp;
2530 	}
2531 
2532 	cmd = iocp->ioc_cmd;
2533 	getcmd = (cmd == SIOCGIPMSFILTER || cmd == SIOCGMSFILTER);
2534 	is_v4only_api = (cmd == SIOCGIPMSFILTER || cmd == SIOCSIPMSFILTER);
2535 	minsize = (is_v4only_api) ? IP_MSFILTER_SIZE(0) : GROUP_FILTER_SIZE(0);
2536 	datalen = MBLKL(data_mp);
2537 
2538 	if (datalen < minsize)
2539 		return (EINVAL);
2540 
2541 	/*
2542 	 * now we know we have at least have the initial structure,
2543 	 * but need to check for the source list array.
2544 	 */
2545 	if (is_v4only_api) {
2546 		imsf = (struct ip_msfilter *)data_mp->b_rptr;
2547 		isv6 = B_FALSE;
2548 		expsize = IP_MSFILTER_SIZE(imsf->imsf_numsrc);
2549 	} else {
2550 		gf = (struct group_filter *)data_mp->b_rptr;
2551 		if (gf->gf_group.ss_family == AF_INET6) {
2552 			gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2553 			isv6 = !(IN6_IS_ADDR_V4MAPPED(&gsin6->sin6_addr));
2554 		} else {
2555 			isv6 = B_FALSE;
2556 		}
2557 		expsize = GROUP_FILTER_SIZE(gf->gf_numsrc);
2558 	}
2559 	if (datalen < expsize)
2560 		return (EINVAL);
2561 
2562 	connp = Q_TO_CONN(q);
2563 
2564 	/* operation not supported on the virtual network interface */
2565 	if (IS_VNI(ipif->ipif_ill))
2566 		return (EINVAL);
2567 
2568 	if (isv6) {
2569 		ill_t *ill = ipif->ipif_ill;
2570 		ill_refhold(ill);
2571 
2572 		gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2573 		v6grp = gsin6->sin6_addr;
2574 		if (getcmd)
2575 			err = ip_get_srcfilter_v6(connp, gf, &v6grp, ill);
2576 		else
2577 			err = ip_set_srcfilter_v6(connp, gf, &v6grp, ill);
2578 
2579 		ill_refrele(ill);
2580 	} else {
2581 		boolean_t isv4mapped = B_FALSE;
2582 		if (is_v4only_api) {
2583 			v4grp = (ipaddr_t)imsf->imsf_multiaddr.s_addr;
2584 		} else {
2585 			if (gf->gf_group.ss_family == AF_INET) {
2586 				gsin = (struct sockaddr_in *)&gf->gf_group;
2587 				v4grp = (ipaddr_t)gsin->sin_addr.s_addr;
2588 			} else {
2589 				gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2590 				IN6_V4MAPPED_TO_IPADDR(&gsin6->sin6_addr,
2591 				    v4grp);
2592 				isv4mapped = B_TRUE;
2593 			}
2594 		}
2595 		if (getcmd)
2596 			err = ip_get_srcfilter(connp, gf, imsf, v4grp, ipif,
2597 			    isv4mapped);
2598 		else
2599 			err = ip_set_srcfilter(connp, gf, imsf, v4grp, ipif,
2600 			    isv4mapped);
2601 	}
2602 
2603 	return (err);
2604 }
2605 
2606 /*
2607  * Finds the ipif based on information in the ioctl headers.  Needed to make
2608  * ip_process_ioctl() happy (it needs to know the ipif for IPI_WR-flagged
2609  * ioctls prior to calling the ioctl's handler function).
2610  */
2611 int
2612 ip_extract_msfilter(queue_t *q, mblk_t *mp, const ip_ioctl_cmd_t *ipip,
2613     cmd_info_t *ci, ipsq_func_t func)
2614 {
2615 	int cmd = ipip->ipi_cmd;
2616 	int err = 0;
2617 	conn_t *connp;
2618 	ipif_t *ipif;
2619 	/* caller has verified this mblk exists */
2620 	char *dbuf = (char *)mp->b_cont->b_cont->b_rptr;
2621 	struct ip_msfilter *imsf;
2622 	struct group_filter *gf;
2623 	ipaddr_t v4addr, v4grp;
2624 	in6_addr_t v6grp;
2625 	uint32_t index;
2626 	zoneid_t zoneid;
2627 	ip_stack_t *ipst;
2628 
2629 	connp = Q_TO_CONN(q);
2630 	zoneid = connp->conn_zoneid;
2631 	ipst = connp->conn_netstack->netstack_ip;
2632 
2633 	/* don't allow multicast operations on a tcp conn */
2634 	if (IPCL_IS_TCP(connp))
2635 		return (ENOPROTOOPT);
2636 
2637 	if (cmd == SIOCSIPMSFILTER || cmd == SIOCGIPMSFILTER) {
2638 		/* don't allow v4-specific ioctls on v6 socket */
2639 		if (connp->conn_af_isv6)
2640 			return (EAFNOSUPPORT);
2641 
2642 		imsf = (struct ip_msfilter *)dbuf;
2643 		v4addr = imsf->imsf_interface.s_addr;
2644 		v4grp = imsf->imsf_multiaddr.s_addr;
2645 		if (v4addr == INADDR_ANY) {
2646 			ipif = ipif_lookup_group(v4grp, zoneid, ipst);
2647 			if (ipif == NULL)
2648 				err = EADDRNOTAVAIL;
2649 		} else {
2650 			ipif = ipif_lookup_addr(v4addr, NULL, zoneid, q, mp,
2651 			    func, &err, ipst);
2652 		}
2653 	} else {
2654 		boolean_t isv6 = B_FALSE;
2655 		gf = (struct group_filter *)dbuf;
2656 		index = gf->gf_interface;
2657 		if (gf->gf_group.ss_family == AF_INET6) {
2658 			struct sockaddr_in6 *sin6;
2659 			sin6 = (struct sockaddr_in6 *)&gf->gf_group;
2660 			v6grp = sin6->sin6_addr;
2661 			if (IN6_IS_ADDR_V4MAPPED(&v6grp))
2662 				IN6_V4MAPPED_TO_IPADDR(&v6grp, v4grp);
2663 			else
2664 				isv6 = B_TRUE;
2665 		} else if (gf->gf_group.ss_family == AF_INET) {
2666 			struct sockaddr_in *sin;
2667 			sin = (struct sockaddr_in *)&gf->gf_group;
2668 			v4grp = sin->sin_addr.s_addr;
2669 		} else {
2670 			return (EAFNOSUPPORT);
2671 		}
2672 		if (index == 0) {
2673 			if (isv6) {
2674 				ipif = ipif_lookup_group_v6(&v6grp, zoneid,
2675 				    ipst);
2676 			} else {
2677 				ipif = ipif_lookup_group(v4grp, zoneid, ipst);
2678 			}
2679 			if (ipif == NULL)
2680 				err = EADDRNOTAVAIL;
2681 		} else {
2682 			ipif = ipif_lookup_on_ifindex(index, isv6, zoneid,
2683 			    q, mp, func, &err, ipst);
2684 		}
2685 	}
2686 
2687 	ci->ci_ipif = ipif;
2688 	return (err);
2689 }
2690 
2691 /*
2692  * The structures used for the SIOC*MSFILTER ioctls usually must be copied
2693  * in in two stages, as the first copyin tells us the size of the attached
2694  * source buffer.  This function is called by ip_wput_nondata() after the
2695  * first copyin has completed; it figures out how big the second stage
2696  * needs to be, and kicks it off.
2697  *
2698  * In some cases (numsrc < 2), the second copyin is not needed as the
2699  * first one gets a complete structure containing 1 source addr.
2700  *
2701  * The function returns 0 if a second copyin has been started (i.e. there's
2702  * no more work to be done right now), or 1 if the second copyin is not
2703  * needed and ip_wput_nondata() can continue its processing.
2704  */
2705 int
2706 ip_copyin_msfilter(queue_t *q, mblk_t *mp)
2707 {
2708 	struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
2709 	int cmd = iocp->ioc_cmd;
2710 	/* validity of this checked in ip_wput_nondata() */
2711 	mblk_t *mp1 = mp->b_cont->b_cont;
2712 	int copysize = 0;
2713 	int offset;
2714 
2715 	if (cmd == SIOCSMSFILTER || cmd == SIOCGMSFILTER) {
2716 		struct group_filter *gf = (struct group_filter *)mp1->b_rptr;
2717 		if (gf->gf_numsrc >= 2) {
2718 			offset = sizeof (struct group_filter);
2719 			copysize = GROUP_FILTER_SIZE(gf->gf_numsrc) - offset;
2720 		}
2721 	} else {
2722 		struct ip_msfilter *imsf = (struct ip_msfilter *)mp1->b_rptr;
2723 		if (imsf->imsf_numsrc >= 2) {
2724 			offset = sizeof (struct ip_msfilter);
2725 			copysize = IP_MSFILTER_SIZE(imsf->imsf_numsrc) - offset;
2726 		}
2727 	}
2728 	if (copysize > 0) {
2729 		mi_copyin_n(q, mp, offset, copysize);
2730 		return (0);
2731 	}
2732 	return (1);
2733 }
2734 
2735 /*
2736  * Handle the following optmgmt:
2737  *	IP_ADD_MEMBERSHIP		must not have joined already
2738  *	MCAST_JOIN_GROUP		must not have joined already
2739  *	IP_BLOCK_SOURCE			must have joined already
2740  *	MCAST_BLOCK_SOURCE		must have joined already
2741  *	IP_JOIN_SOURCE_GROUP		may have joined already
2742  *	MCAST_JOIN_SOURCE_GROUP		may have joined already
2743  *
2744  * fmode and src parameters may be used to determine which option is
2745  * being set, as follows (the IP_* and MCAST_* versions of each option
2746  * are functionally equivalent):
2747  *	opt			fmode			src
2748  *	IP_ADD_MEMBERSHIP	MODE_IS_EXCLUDE		INADDR_ANY
2749  *	MCAST_JOIN_GROUP	MODE_IS_EXCLUDE		INADDR_ANY
2750  *	IP_BLOCK_SOURCE		MODE_IS_EXCLUDE		v4 addr
2751  *	MCAST_BLOCK_SOURCE	MODE_IS_EXCLUDE		v4 addr
2752  *	IP_JOIN_SOURCE_GROUP	MODE_IS_INCLUDE		v4 addr
2753  *	MCAST_JOIN_SOURCE_GROUP	MODE_IS_INCLUDE		v4 addr
2754  *
2755  * Changing the filter mode is not allowed; if a matching ilg already
2756  * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
2757  *
2758  * Verifies that there is a source address of appropriate scope for
2759  * the group; if not, EADDRNOTAVAIL is returned.
2760  *
2761  * The interface to be used may be identified by an address or by an
2762  * index.  A pointer to the index is passed; if it is NULL, use the
2763  * address, otherwise, use the index.
2764  */
2765 int
2766 ip_opt_add_group(conn_t *connp, boolean_t checkonly, ipaddr_t group,
2767     ipaddr_t ifaddr, uint_t *ifindexp, mcast_record_t fmode, ipaddr_t src,
2768     mblk_t *first_mp)
2769 {
2770 	ipif_t	*ipif;
2771 	ipsq_t	*ipsq;
2772 	int err = 0;
2773 	ill_t	*ill;
2774 
2775 	err = ip_opt_check(connp, group, src, ifaddr, ifindexp, first_mp,
2776 	    ip_restart_optmgmt, &ipif);
2777 	if (err != 0) {
2778 		if (err != EINPROGRESS) {
2779 			ip1dbg(("ip_opt_add_group: no ipif for group 0x%x, "
2780 			    "ifaddr 0x%x, ifindex %d\n", ntohl(group),
2781 			    ntohl(ifaddr), (ifindexp == NULL) ? 0 : *ifindexp));
2782 		}
2783 		return (err);
2784 	}
2785 	ASSERT(ipif != NULL);
2786 
2787 	ill = ipif->ipif_ill;
2788 	/* Operation not supported on a virtual network interface */
2789 	if (IS_VNI(ill)) {
2790 		ipif_refrele(ipif);
2791 		return (EINVAL);
2792 	}
2793 
2794 	if (checkonly) {
2795 		/*
2796 		 * do not do operation, just pretend to - new T_CHECK
2797 		 * semantics. The error return case above if encountered
2798 		 * considered a good enough "check" here.
2799 		 */
2800 		ipif_refrele(ipif);
2801 		return (0);
2802 	}
2803 
2804 	IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, ipsq,
2805 	    NEW_OP);
2806 
2807 	/* unspecified source addr => no source filtering */
2808 	err = ilg_add(connp, group, ipif, fmode, src);
2809 
2810 	IPSQ_EXIT(ipsq);
2811 
2812 	ipif_refrele(ipif);
2813 	return (err);
2814 }
2815 
2816 /*
2817  * Handle the following optmgmt:
2818  *	IPV6_JOIN_GROUP			must not have joined already
2819  *	MCAST_JOIN_GROUP		must not have joined already
2820  *	MCAST_BLOCK_SOURCE		must have joined already
2821  *	MCAST_JOIN_SOURCE_GROUP		may have joined already
2822  *
2823  * fmode and src parameters may be used to determine which option is
2824  * being set, as follows (IPV6_JOIN_GROUP and MCAST_JOIN_GROUP options
2825  * are functionally equivalent):
2826  *	opt			fmode			v6src
2827  *	IPV6_JOIN_GROUP		MODE_IS_EXCLUDE		unspecified
2828  *	MCAST_JOIN_GROUP	MODE_IS_EXCLUDE		unspecified
2829  *	MCAST_BLOCK_SOURCE	MODE_IS_EXCLUDE		v6 addr
2830  *	MCAST_JOIN_SOURCE_GROUP	MODE_IS_INCLUDE		v6 addr
2831  *
2832  * Changing the filter mode is not allowed; if a matching ilg already
2833  * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
2834  *
2835  * Verifies that there is a source address of appropriate scope for
2836  * the group; if not, EADDRNOTAVAIL is returned.
2837  *
2838  * Handles IPv4-mapped IPv6 multicast addresses by associating them
2839  * with the link-local ipif.  Assumes that if v6group is v4-mapped,
2840  * v6src is also v4-mapped.
2841  */
2842 int
2843 ip_opt_add_group_v6(conn_t *connp, boolean_t checkonly,
2844     const in6_addr_t *v6group, int ifindex, mcast_record_t fmode,
2845     const in6_addr_t *v6src, mblk_t *first_mp)
2846 {
2847 	ill_t *ill;
2848 	ipif_t	*ipif;
2849 	char buf[INET6_ADDRSTRLEN];
2850 	ipaddr_t v4group, v4src;
2851 	boolean_t isv6;
2852 	ipsq_t	*ipsq;
2853 	int	err;
2854 
2855 	err = ip_opt_check_v6(connp, v6group, &v4group, v6src, &v4src, &isv6,
2856 	    ifindex, first_mp, ip_restart_optmgmt, &ill, &ipif);
2857 	if (err != 0) {
2858 		if (err != EINPROGRESS) {
2859 			ip1dbg(("ip_opt_add_group_v6: no ill for group %s/"
2860 			    "index %d\n", inet_ntop(AF_INET6, v6group, buf,
2861 			    sizeof (buf)), ifindex));
2862 		}
2863 		return (err);
2864 	}
2865 	ASSERT((!isv6 && ipif != NULL) || (isv6 && ill != NULL));
2866 
2867 	/* operation is not supported on the virtual network interface */
2868 	if (isv6) {
2869 		if (IS_VNI(ill)) {
2870 			ill_refrele(ill);
2871 			return (EINVAL);
2872 		}
2873 	} else {
2874 		if (IS_VNI(ipif->ipif_ill)) {
2875 			ipif_refrele(ipif);
2876 			return (EINVAL);
2877 		}
2878 	}
2879 
2880 	if (checkonly) {
2881 		/*
2882 		 * do not do operation, just pretend to - new T_CHECK
2883 		 * semantics. The error return case above if encountered
2884 		 * considered a good enough "check" here.
2885 		 */
2886 		if (isv6)
2887 			ill_refrele(ill);
2888 		else
2889 			ipif_refrele(ipif);
2890 		return (0);
2891 	}
2892 
2893 	if (!isv6) {
2894 		IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt,
2895 		    ipsq, NEW_OP);
2896 		err = ilg_add(connp, v4group, ipif, fmode, v4src);
2897 		IPSQ_EXIT(ipsq);
2898 		ipif_refrele(ipif);
2899 	} else {
2900 		IPSQ_ENTER_ILL(ill, connp, first_mp, ip_restart_optmgmt,
2901 		    ipsq, NEW_OP);
2902 		err = ilg_add_v6(connp, v6group, ill, fmode, v6src);
2903 		IPSQ_EXIT(ipsq);
2904 		ill_refrele(ill);
2905 	}
2906 
2907 	return (err);
2908 }
2909 
2910 static int
2911 ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group, ipif_t *ipif,
2912     mcast_record_t fmode, ipaddr_t src)
2913 {
2914 	ilg_t	*ilg;
2915 	in6_addr_t v6src;
2916 	boolean_t leaving = B_FALSE;
2917 
2918 	ASSERT(IAM_WRITER_IPIF(ipif));
2919 
2920 	/*
2921 	 * The ilg is valid only while we hold the conn lock. Once we drop
2922 	 * the lock, another thread can locate another ilg on this connp,
2923 	 * but on a different ipif, and delete it, and cause the ilg array
2924 	 * to be reallocated and copied. Hence do the ilg_delete before
2925 	 * dropping the lock.
2926 	 */
2927 	mutex_enter(&connp->conn_lock);
2928 	ilg = ilg_lookup_ipif(connp, group, ipif);
2929 	if ((ilg == NULL) || (ilg->ilg_flags & ILG_DELETED)) {
2930 		mutex_exit(&connp->conn_lock);
2931 		return (EADDRNOTAVAIL);
2932 	}
2933 
2934 	/*
2935 	 * Decide if we're actually deleting the ilg or just removing a
2936 	 * source filter address; if just removing an addr, make sure we
2937 	 * aren't trying to change the filter mode, and that the addr is
2938 	 * actually in our filter list already.  If we're removing the
2939 	 * last src in an include list, just delete the ilg.
2940 	 */
2941 	if (src == INADDR_ANY) {
2942 		v6src = ipv6_all_zeros;
2943 		leaving = B_TRUE;
2944 	} else {
2945 		int err = 0;
2946 		IN6_IPADDR_TO_V4MAPPED(src, &v6src);
2947 		if (fmode != ilg->ilg_fmode)
2948 			err = EINVAL;
2949 		else if (ilg->ilg_filter == NULL ||
2950 		    !list_has_addr(ilg->ilg_filter, &v6src))
2951 			err = EADDRNOTAVAIL;
2952 		if (err != 0) {
2953 			mutex_exit(&connp->conn_lock);
2954 			return (err);
2955 		}
2956 		if (fmode == MODE_IS_INCLUDE &&
2957 		    ilg->ilg_filter->sl_numsrc == 1) {
2958 			v6src = ipv6_all_zeros;
2959 			leaving = B_TRUE;
2960 		}
2961 	}
2962 
2963 	ilg_delete(connp, ilg, &v6src);
2964 	mutex_exit(&connp->conn_lock);
2965 
2966 	(void) ip_delmulti(group, ipif, B_FALSE, leaving);
2967 	return (0);
2968 }
2969 
2970 static int
2971 ip_opt_delete_group_excl_v6(conn_t *connp, const in6_addr_t *v6group,
2972     ill_t *ill, mcast_record_t fmode, const in6_addr_t *v6src)
2973 {
2974 	ilg_t	*ilg;
2975 	ill_t	*ilg_ill;
2976 	uint_t	ilg_orig_ifindex;
2977 	boolean_t leaving = B_TRUE;
2978 
2979 	ASSERT(IAM_WRITER_ILL(ill));
2980 
2981 	/*
2982 	 * Use the index that we originally used to join. We can't
2983 	 * use the ill directly because ilg_ill could point to
2984 	 * a new ill if things have moved.
2985 	 */
2986 	mutex_enter(&connp->conn_lock);
2987 	ilg = ilg_lookup_ill_index_v6(connp, v6group,
2988 	    ill->ill_phyint->phyint_ifindex);
2989 	if ((ilg == NULL) || (ilg->ilg_flags & ILG_DELETED)) {
2990 		mutex_exit(&connp->conn_lock);
2991 		return (EADDRNOTAVAIL);
2992 	}
2993 
2994 	/*
2995 	 * Decide if we're actually deleting the ilg or just removing a
2996 	 * source filter address; if just removing an addr, make sure we
2997 	 * aren't trying to change the filter mode, and that the addr is
2998 	 * actually in our filter list already.  If we're removing the
2999 	 * last src in an include list, just delete the ilg.
3000 	 */
3001 	if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) {
3002 		int err = 0;
3003 		if (fmode != ilg->ilg_fmode)
3004 			err = EINVAL;
3005 		else if (ilg->ilg_filter == NULL ||
3006 		    !list_has_addr(ilg->ilg_filter, v6src))
3007 			err = EADDRNOTAVAIL;
3008 		if (err != 0) {
3009 			mutex_exit(&connp->conn_lock);
3010 			return (err);
3011 		}
3012 		if (fmode == MODE_IS_INCLUDE &&
3013 		    ilg->ilg_filter->sl_numsrc == 1)
3014 			v6src = NULL;
3015 		else
3016 			leaving = B_FALSE;
3017 	}
3018 
3019 	ilg_ill = ilg->ilg_ill;
3020 	ilg_orig_ifindex = ilg->ilg_orig_ifindex;
3021 	ilg_delete(connp, ilg, v6src);
3022 	mutex_exit(&connp->conn_lock);
3023 	(void) ip_delmulti_v6(v6group, ilg_ill, ilg_orig_ifindex,
3024 	    connp->conn_zoneid, B_FALSE, leaving);
3025 
3026 	return (0);
3027 }
3028 
3029 /*
3030  * Handle the following optmgmt:
3031  *	IP_DROP_MEMBERSHIP		will leave
3032  *	MCAST_LEAVE_GROUP		will leave
3033  *	IP_UNBLOCK_SOURCE		will not leave
3034  *	MCAST_UNBLOCK_SOURCE		will not leave
3035  *	IP_LEAVE_SOURCE_GROUP		may leave (if leaving last source)
3036  *	MCAST_LEAVE_SOURCE_GROUP	may leave (if leaving last source)
3037  *
3038  * fmode and src parameters may be used to determine which option is
3039  * being set, as follows (the IP_* and MCAST_* versions of each option
3040  * are functionally equivalent):
3041  *	opt			 fmode			src
3042  *	IP_DROP_MEMBERSHIP	 MODE_IS_INCLUDE	INADDR_ANY
3043  *	MCAST_LEAVE_GROUP	 MODE_IS_INCLUDE	INADDR_ANY
3044  *	IP_UNBLOCK_SOURCE	 MODE_IS_EXCLUDE	v4 addr
3045  *	MCAST_UNBLOCK_SOURCE	 MODE_IS_EXCLUDE	v4 addr
3046  *	IP_LEAVE_SOURCE_GROUP	 MODE_IS_INCLUDE	v4 addr
3047  *	MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE	v4 addr
3048  *
3049  * Changing the filter mode is not allowed; if a matching ilg already
3050  * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
3051  *
3052  * The interface to be used may be identified by an address or by an
3053  * index.  A pointer to the index is passed; if it is NULL, use the
3054  * address, otherwise, use the index.
3055  */
3056 int
3057 ip_opt_delete_group(conn_t *connp, boolean_t checkonly, ipaddr_t group,
3058     ipaddr_t ifaddr, uint_t *ifindexp, mcast_record_t fmode, ipaddr_t src,
3059     mblk_t *first_mp)
3060 {
3061 	ipif_t	*ipif;
3062 	ipsq_t	*ipsq;
3063 	int	err;
3064 	ill_t	*ill;
3065 
3066 	err = ip_opt_check(connp, group, src, ifaddr, ifindexp, first_mp,
3067 	    ip_restart_optmgmt, &ipif);
3068 	if (err != 0) {
3069 		if (err != EINPROGRESS) {
3070 			ip1dbg(("ip_opt_delete_group: no ipif for group "
3071 			    "0x%x, ifaddr 0x%x\n",
3072 			    (int)ntohl(group), (int)ntohl(ifaddr)));
3073 		}
3074 		return (err);
3075 	}
3076 	ASSERT(ipif != NULL);
3077 
3078 	ill = ipif->ipif_ill;
3079 	/* Operation not supported on a virtual network interface */
3080 	if (IS_VNI(ill)) {
3081 		ipif_refrele(ipif);
3082 		return (EINVAL);
3083 	}
3084 
3085 	if (checkonly) {
3086 		/*
3087 		 * do not do operation, just pretend to - new T_CHECK
3088 		 * semantics. The error return case above if encountered
3089 		 * considered a good enough "check" here.
3090 		 */
3091 		ipif_refrele(ipif);
3092 		return (0);
3093 	}
3094 
3095 	IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, ipsq,
3096 	    NEW_OP);
3097 	err = ip_opt_delete_group_excl(connp, group, ipif, fmode, src);
3098 	IPSQ_EXIT(ipsq);
3099 
3100 	ipif_refrele(ipif);
3101 	return (err);
3102 }
3103 
3104 /*
3105  * Handle the following optmgmt:
3106  *	IPV6_LEAVE_GROUP		will leave
3107  *	MCAST_LEAVE_GROUP		will leave
3108  *	MCAST_UNBLOCK_SOURCE		will not leave
3109  *	MCAST_LEAVE_SOURCE_GROUP	may leave (if leaving last source)
3110  *
3111  * fmode and src parameters may be used to determine which option is
3112  * being set, as follows (IPV6_LEAVE_GROUP and MCAST_LEAVE_GROUP options
3113  * are functionally equivalent):
3114  *	opt			 fmode			v6src
3115  *	IPV6_LEAVE_GROUP	 MODE_IS_INCLUDE	unspecified
3116  *	MCAST_LEAVE_GROUP	 MODE_IS_INCLUDE	unspecified
3117  *	MCAST_UNBLOCK_SOURCE	 MODE_IS_EXCLUDE	v6 addr
3118  *	MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE	v6 addr
3119  *
3120  * Changing the filter mode is not allowed; if a matching ilg already
3121  * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
3122  *
3123  * Handles IPv4-mapped IPv6 multicast addresses by associating them
3124  * with the link-local ipif.  Assumes that if v6group is v4-mapped,
3125  * v6src is also v4-mapped.
3126  */
3127 int
3128 ip_opt_delete_group_v6(conn_t *connp, boolean_t checkonly,
3129     const in6_addr_t *v6group, int ifindex, mcast_record_t fmode,
3130     const in6_addr_t *v6src, mblk_t *first_mp)
3131 {
3132 	ill_t *ill;
3133 	ipif_t	*ipif;
3134 	char	buf[INET6_ADDRSTRLEN];
3135 	ipaddr_t v4group, v4src;
3136 	boolean_t isv6;
3137 	ipsq_t	*ipsq;
3138 	int	err;
3139 
3140 	err = ip_opt_check_v6(connp, v6group, &v4group, v6src, &v4src, &isv6,
3141 	    ifindex, first_mp, ip_restart_optmgmt, &ill, &ipif);
3142 	if (err != 0) {
3143 		if (err != EINPROGRESS) {
3144 			ip1dbg(("ip_opt_delete_group_v6: no ill for group %s/"
3145 			    "index %d\n", inet_ntop(AF_INET6, v6group, buf,
3146 			    sizeof (buf)), ifindex));
3147 		}
3148 		return (err);
3149 	}
3150 	ASSERT((isv6 && ill != NULL) || (!isv6 && ipif != NULL));
3151 
3152 	/* operation is not supported on the virtual network interface */
3153 	if (isv6) {
3154 		if (IS_VNI(ill)) {
3155 			ill_refrele(ill);
3156 			return (EINVAL);
3157 		}
3158 	} else {
3159 		if (IS_VNI(ipif->ipif_ill)) {
3160 			ipif_refrele(ipif);
3161 			return (EINVAL);
3162 		}
3163 	}
3164 
3165 	if (checkonly) {
3166 		/*
3167 		 * do not do operation, just pretend to - new T_CHECK
3168 		 * semantics. The error return case above if encountered
3169 		 * considered a good enough "check" here.
3170 		 */
3171 		if (isv6)
3172 			ill_refrele(ill);
3173 		else
3174 			ipif_refrele(ipif);
3175 		return (0);
3176 	}
3177 
3178 	if (!isv6) {
3179 		IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt,
3180 		    ipsq, NEW_OP);
3181 		err = ip_opt_delete_group_excl(connp, v4group, ipif, fmode,
3182 		    v4src);
3183 		IPSQ_EXIT(ipsq);
3184 		ipif_refrele(ipif);
3185 	} else {
3186 		IPSQ_ENTER_ILL(ill, connp, first_mp, ip_restart_optmgmt,
3187 		    ipsq, NEW_OP);
3188 		err = ip_opt_delete_group_excl_v6(connp, v6group, ill, fmode,
3189 		    v6src);
3190 		IPSQ_EXIT(ipsq);
3191 		ill_refrele(ill);
3192 	}
3193 
3194 	return (err);
3195 }
3196 
3197 /*
3198  * Group mgmt for upper conn that passes things down
3199  * to the interface multicast list (and DLPI)
3200  * These routines can handle new style options that specify an interface name
3201  * as opposed to an interface address (needed for general handling of
3202  * unnumbered interfaces.)
3203  */
3204 
3205 /*
3206  * Add a group to an upper conn group data structure and pass things down
3207  * to the interface multicast list (and DLPI)
3208  */
3209 static int
3210 ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif, mcast_record_t fmode,
3211     ipaddr_t src)
3212 {
3213 	int	error = 0;
3214 	ill_t	*ill;
3215 	ilg_t	*ilg;
3216 	ilg_stat_t ilgstat;
3217 	slist_t	*new_filter = NULL;
3218 	int	new_fmode;
3219 
3220 	ASSERT(IAM_WRITER_IPIF(ipif));
3221 
3222 	ill = ipif->ipif_ill;
3223 
3224 	if (!(ill->ill_flags & ILLF_MULTICAST))
3225 		return (EADDRNOTAVAIL);
3226 
3227 	/*
3228 	 * conn_ilg[] is protected by conn_lock. Need to hold the conn_lock
3229 	 * to walk the conn_ilg[] list in ilg_lookup_ipif(); also needed to
3230 	 * serialize 2 threads doing join (sock, group1, hme0:0) and
3231 	 * (sock, group2, hme1:0) where hme0 and hme1 map to different ipsqs,
3232 	 * but both operations happen on the same conn.
3233 	 */
3234 	mutex_enter(&connp->conn_lock);
3235 	ilg = ilg_lookup_ipif(connp, group, ipif);
3236 
3237 	/*
3238 	 * Depending on the option we're handling, may or may not be okay
3239 	 * if group has already been added.  Figure out our rules based
3240 	 * on fmode and src params.  Also make sure there's enough room
3241 	 * in the filter if we're adding a source to an existing filter.
3242 	 */
3243 	if (src == INADDR_ANY) {
3244 		/* we're joining for all sources, must not have joined */
3245 		if (ilg != NULL)
3246 			error = EADDRINUSE;
3247 	} else {
3248 		if (fmode == MODE_IS_EXCLUDE) {
3249 			/* (excl {addr}) => block source, must have joined */
3250 			if (ilg == NULL)
3251 				error = EADDRNOTAVAIL;
3252 		}
3253 		/* (incl {addr}) => join source, may have joined */
3254 
3255 		if (ilg != NULL &&
3256 		    SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE)
3257 			error = ENOBUFS;
3258 	}
3259 	if (error != 0) {
3260 		mutex_exit(&connp->conn_lock);
3261 		return (error);
3262 	}
3263 
3264 	ASSERT(!(ipif->ipif_state_flags & IPIF_CONDEMNED));
3265 
3266 	/*
3267 	 * Alloc buffer to copy new state into (see below) before
3268 	 * we make any changes, so we can bail if it fails.
3269 	 */
3270 	if ((new_filter = l_alloc()) == NULL) {
3271 		mutex_exit(&connp->conn_lock);
3272 		return (ENOMEM);
3273 	}
3274 
3275 	if (ilg == NULL) {
3276 		ilgstat = ILGSTAT_NEW;
3277 		if ((ilg = conn_ilg_alloc(connp)) == NULL) {
3278 			mutex_exit(&connp->conn_lock);
3279 			l_free(new_filter);
3280 			return (ENOMEM);
3281 		}
3282 		if (src != INADDR_ANY) {
3283 			ilg->ilg_filter = l_alloc();
3284 			if (ilg->ilg_filter == NULL) {
3285 				ilg_delete(connp, ilg, NULL);
3286 				mutex_exit(&connp->conn_lock);
3287 				l_free(new_filter);
3288 				return (ENOMEM);
3289 			}
3290 			ilg->ilg_filter->sl_numsrc = 1;
3291 			IN6_IPADDR_TO_V4MAPPED(src,
3292 			    &ilg->ilg_filter->sl_addr[0]);
3293 		}
3294 		if (group == INADDR_ANY) {
3295 			ilg->ilg_v6group = ipv6_all_zeros;
3296 		} else {
3297 			IN6_IPADDR_TO_V4MAPPED(group, &ilg->ilg_v6group);
3298 		}
3299 		ilg->ilg_ipif = ipif;
3300 		ilg->ilg_ill = NULL;
3301 		ilg->ilg_orig_ifindex = 0;
3302 		ilg->ilg_fmode = fmode;
3303 	} else {
3304 		int index;
3305 		in6_addr_t v6src;
3306 		ilgstat = ILGSTAT_CHANGE;
3307 		if (ilg->ilg_fmode != fmode || src == INADDR_ANY) {
3308 			mutex_exit(&connp->conn_lock);
3309 			l_free(new_filter);
3310 			return (EINVAL);
3311 		}
3312 		if (ilg->ilg_filter == NULL) {
3313 			ilg->ilg_filter = l_alloc();
3314 			if (ilg->ilg_filter == NULL) {
3315 				mutex_exit(&connp->conn_lock);
3316 				l_free(new_filter);
3317 				return (ENOMEM);
3318 			}
3319 		}
3320 		IN6_IPADDR_TO_V4MAPPED(src, &v6src);
3321 		if (list_has_addr(ilg->ilg_filter, &v6src)) {
3322 			mutex_exit(&connp->conn_lock);
3323 			l_free(new_filter);
3324 			return (EADDRNOTAVAIL);
3325 		}
3326 		index = ilg->ilg_filter->sl_numsrc++;
3327 		ilg->ilg_filter->sl_addr[index] = v6src;
3328 	}
3329 
3330 	/*
3331 	 * Save copy of ilg's filter state to pass to other functions,
3332 	 * so we can release conn_lock now.
3333 	 */
3334 	new_fmode = ilg->ilg_fmode;
3335 	l_copy(ilg->ilg_filter, new_filter);
3336 
3337 	mutex_exit(&connp->conn_lock);
3338 
3339 	error = ip_addmulti(group, ipif, ilgstat, new_fmode, new_filter);
3340 	if (error != 0) {
3341 		/*
3342 		 * Need to undo what we did before calling ip_addmulti()!
3343 		 * Must look up the ilg again since we've not been holding
3344 		 * conn_lock.
3345 		 */
3346 		in6_addr_t v6src;
3347 		if (ilgstat == ILGSTAT_NEW)
3348 			v6src = ipv6_all_zeros;
3349 		else
3350 			IN6_IPADDR_TO_V4MAPPED(src, &v6src);
3351 		mutex_enter(&connp->conn_lock);
3352 		ilg = ilg_lookup_ipif(connp, group, ipif);
3353 		ASSERT(ilg != NULL);
3354 		ilg_delete(connp, ilg, &v6src);
3355 		mutex_exit(&connp->conn_lock);
3356 		l_free(new_filter);
3357 		return (error);
3358 	}
3359 
3360 	l_free(new_filter);
3361 	return (0);
3362 }
3363 
3364 static int
3365 ilg_add_v6(conn_t *connp, const in6_addr_t *v6group, ill_t *ill,
3366     mcast_record_t fmode, const in6_addr_t *v6src)
3367 {
3368 	int	error = 0;
3369 	int	orig_ifindex;
3370 	ilg_t	*ilg;
3371 	ilg_stat_t ilgstat;
3372 	slist_t	*new_filter = NULL;
3373 	int	new_fmode;
3374 
3375 	ASSERT(IAM_WRITER_ILL(ill));
3376 
3377 	if (!(ill->ill_flags & ILLF_MULTICAST))
3378 		return (EADDRNOTAVAIL);
3379 
3380 	/*
3381 	 * conn_lock protects the ilg list.  Serializes 2 threads doing
3382 	 * join (sock, group1, hme0) and (sock, group2, hme1) where hme0
3383 	 * and hme1 map to different ipsq's, but both operations happen
3384 	 * on the same conn.
3385 	 */
3386 	mutex_enter(&connp->conn_lock);
3387 
3388 	/*
3389 	 * Use the ifindex to do the lookup. We can't use the ill
3390 	 * directly because ilg_ill could point to a different ill if
3391 	 * things have moved.
3392 	 */
3393 	orig_ifindex = ill->ill_phyint->phyint_ifindex;
3394 	ilg = ilg_lookup_ill_index_v6(connp, v6group, orig_ifindex);
3395 
3396 	/*
3397 	 * Depending on the option we're handling, may or may not be okay
3398 	 * if group has already been added.  Figure out our rules based
3399 	 * on fmode and src params.  Also make sure there's enough room
3400 	 * in the filter if we're adding a source to an existing filter.
3401 	 */
3402 	if (IN6_IS_ADDR_UNSPECIFIED(v6src)) {
3403 		/* we're joining for all sources, must not have joined */
3404 		if (ilg != NULL)
3405 			error = EADDRINUSE;
3406 	} else {
3407 		if (fmode == MODE_IS_EXCLUDE) {
3408 			/* (excl {addr}) => block source, must have joined */
3409 			if (ilg == NULL)
3410 				error = EADDRNOTAVAIL;
3411 		}
3412 		/* (incl {addr}) => join source, may have joined */
3413 
3414 		if (ilg != NULL &&
3415 		    SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE)
3416 			error = ENOBUFS;
3417 	}
3418 	if (error != 0) {
3419 		mutex_exit(&connp->conn_lock);
3420 		return (error);
3421 	}
3422 
3423 	/*
3424 	 * Alloc buffer to copy new state into (see below) before
3425 	 * we make any changes, so we can bail if it fails.
3426 	 */
3427 	if ((new_filter = l_alloc()) == NULL) {
3428 		mutex_exit(&connp->conn_lock);
3429 		return (ENOMEM);
3430 	}
3431 
3432 	if (ilg == NULL) {
3433 		if ((ilg = conn_ilg_alloc(connp)) == NULL) {
3434 			mutex_exit(&connp->conn_lock);
3435 			l_free(new_filter);
3436 			return (ENOMEM);
3437 		}
3438 		if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) {
3439 			ilg->ilg_filter = l_alloc();
3440 			if (ilg->ilg_filter == NULL) {
3441 				ilg_delete(connp, ilg, NULL);
3442 				mutex_exit(&connp->conn_lock);
3443 				l_free(new_filter);
3444 				return (ENOMEM);
3445 			}
3446 			ilg->ilg_filter->sl_numsrc = 1;
3447 			ilg->ilg_filter->sl_addr[0] = *v6src;
3448 		}
3449 		ilgstat = ILGSTAT_NEW;
3450 		ilg->ilg_v6group = *v6group;
3451 		ilg->ilg_fmode = fmode;
3452 		ilg->ilg_ipif = NULL;
3453 		/*
3454 		 * Choose our target ill to join on. This might be different
3455 		 * from the ill we've been given if it's currently down and
3456 		 * part of a group.
3457 		 *
3458 		 * new ill is not refheld; we are writer.
3459 		 */
3460 		ill = ip_choose_multi_ill(ill, v6group);
3461 		ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED));
3462 		ilg->ilg_ill = ill;
3463 		/*
3464 		 * Remember the orig_ifindex that we joined on, so that we
3465 		 * can successfully delete them later on and also search
3466 		 * for duplicates if the application wants to join again.
3467 		 */
3468 		ilg->ilg_orig_ifindex = orig_ifindex;
3469 	} else {
3470 		int index;
3471 		if (ilg->ilg_fmode != fmode || IN6_IS_ADDR_UNSPECIFIED(v6src)) {
3472 			mutex_exit(&connp->conn_lock);
3473 			l_free(new_filter);
3474 			return (EINVAL);
3475 		}
3476 		if (ilg->ilg_filter == NULL) {
3477 			ilg->ilg_filter = l_alloc();
3478 			if (ilg->ilg_filter == NULL) {
3479 				mutex_exit(&connp->conn_lock);
3480 				l_free(new_filter);
3481 				return (ENOMEM);
3482 			}
3483 		}
3484 		if (list_has_addr(ilg->ilg_filter, v6src)) {
3485 			mutex_exit(&connp->conn_lock);
3486 			l_free(new_filter);
3487 			return (EADDRNOTAVAIL);
3488 		}
3489 		ilgstat = ILGSTAT_CHANGE;
3490 		index = ilg->ilg_filter->sl_numsrc++;
3491 		ilg->ilg_filter->sl_addr[index] = *v6src;
3492 		/*
3493 		 * The current ill might be different from the one we were
3494 		 * asked to join on (if failover has occurred); we should
3495 		 * join on the ill stored in the ilg.  The original ill
3496 		 * is noted in ilg_orig_ifindex, which matched our request.
3497 		 */
3498 		ill = ilg->ilg_ill;
3499 	}
3500 
3501 	/*
3502 	 * Save copy of ilg's filter state to pass to other functions,
3503 	 * so we can release conn_lock now.
3504 	 */
3505 	new_fmode = ilg->ilg_fmode;
3506 	l_copy(ilg->ilg_filter, new_filter);
3507 
3508 	mutex_exit(&connp->conn_lock);
3509 
3510 	/*
3511 	 * Now update the ill. We wait to do this until after the ilg
3512 	 * has been updated because we need to update the src filter
3513 	 * info for the ill, which involves looking at the status of
3514 	 * all the ilgs associated with this group/interface pair.
3515 	 */
3516 	error = ip_addmulti_v6(v6group, ill, orig_ifindex, connp->conn_zoneid,
3517 	    ilgstat, new_fmode, new_filter);
3518 	if (error != 0) {
3519 		/*
3520 		 * But because we waited, we have to undo the ilg update
3521 		 * if ip_addmulti_v6() fails.  We also must lookup ilg
3522 		 * again, since we've not been holding conn_lock.
3523 		 */
3524 		in6_addr_t delsrc =
3525 		    (ilgstat == ILGSTAT_NEW) ? ipv6_all_zeros : *v6src;
3526 		mutex_enter(&connp->conn_lock);
3527 		ilg = ilg_lookup_ill_index_v6(connp, v6group, orig_ifindex);
3528 		ASSERT(ilg != NULL);
3529 		ilg_delete(connp, ilg, &delsrc);
3530 		mutex_exit(&connp->conn_lock);
3531 		l_free(new_filter);
3532 		return (error);
3533 	}
3534 
3535 	l_free(new_filter);
3536 
3537 	return (0);
3538 }
3539 
3540 /*
3541  * Find an IPv4 ilg matching group, ill and source
3542  */
3543 ilg_t *
3544 ilg_lookup_ill_withsrc(conn_t *connp, ipaddr_t group, ipaddr_t src, ill_t *ill)
3545 {
3546 	in6_addr_t v6group, v6src;
3547 	int i;
3548 	boolean_t isinlist;
3549 	ilg_t *ilg;
3550 	ipif_t *ipif;
3551 	ill_t *ilg_ill;
3552 
3553 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3554 
3555 	/*
3556 	 * INADDR_ANY is represented as the IPv6 unspecified addr.
3557 	 */
3558 	if (group == INADDR_ANY)
3559 		v6group = ipv6_all_zeros;
3560 	else
3561 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
3562 
3563 	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3564 		ilg = &connp->conn_ilg[i];
3565 		if ((ipif = ilg->ilg_ipif) == NULL ||
3566 		    (ilg->ilg_flags & ILG_DELETED) != 0)
3567 			continue;
3568 		ASSERT(ilg->ilg_ill == NULL);
3569 		ilg_ill = ipif->ipif_ill;
3570 		ASSERT(!ilg_ill->ill_isv6);
3571 		if (ilg_ill == ill &&
3572 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, &v6group)) {
3573 			if (SLIST_IS_EMPTY(ilg->ilg_filter)) {
3574 				/* no source filter, so this is a match */
3575 				return (ilg);
3576 			}
3577 			break;
3578 		}
3579 	}
3580 	if (i == connp->conn_ilg_inuse)
3581 		return (NULL);
3582 
3583 	/*
3584 	 * we have an ilg with matching ill and group; but
3585 	 * the ilg has a source list that we must check.
3586 	 */
3587 	IN6_IPADDR_TO_V4MAPPED(src, &v6src);
3588 	isinlist = B_FALSE;
3589 	for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) {
3590 		if (IN6_ARE_ADDR_EQUAL(&v6src, &ilg->ilg_filter->sl_addr[i])) {
3591 			isinlist = B_TRUE;
3592 			break;
3593 		}
3594 	}
3595 
3596 	if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) ||
3597 	    (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE))
3598 		return (ilg);
3599 
3600 	return (NULL);
3601 }
3602 
3603 /*
3604  * Find an IPv6 ilg matching group, ill, and source
3605  */
3606 ilg_t *
3607 ilg_lookup_ill_withsrc_v6(conn_t *connp, const in6_addr_t *v6group,
3608     const in6_addr_t *v6src, ill_t *ill)
3609 {
3610 	int i;
3611 	boolean_t isinlist;
3612 	ilg_t *ilg;
3613 	ill_t *ilg_ill;
3614 
3615 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3616 
3617 	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3618 		ilg = &connp->conn_ilg[i];
3619 		if ((ilg_ill = ilg->ilg_ill) == NULL ||
3620 		    (ilg->ilg_flags & ILG_DELETED) != 0)
3621 			continue;
3622 		ASSERT(ilg->ilg_ipif == NULL);
3623 		ASSERT(ilg_ill->ill_isv6);
3624 		if (ilg_ill == ill &&
3625 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) {
3626 			if (SLIST_IS_EMPTY(ilg->ilg_filter)) {
3627 				/* no source filter, so this is a match */
3628 				return (ilg);
3629 			}
3630 			break;
3631 		}
3632 	}
3633 	if (i == connp->conn_ilg_inuse)
3634 		return (NULL);
3635 
3636 	/*
3637 	 * we have an ilg with matching ill and group; but
3638 	 * the ilg has a source list that we must check.
3639 	 */
3640 	isinlist = B_FALSE;
3641 	for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) {
3642 		if (IN6_ARE_ADDR_EQUAL(v6src, &ilg->ilg_filter->sl_addr[i])) {
3643 			isinlist = B_TRUE;
3644 			break;
3645 		}
3646 	}
3647 
3648 	if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) ||
3649 	    (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE))
3650 		return (ilg);
3651 
3652 	return (NULL);
3653 }
3654 
3655 /*
3656  * Get the ilg whose ilg_orig_ifindex is associated with ifindex.
3657  * This is useful when the interface fails and we have moved
3658  * to a new ill, but still would like to locate using the index
3659  * that we originally used to join. Used only for IPv6 currently.
3660  */
3661 static ilg_t *
3662 ilg_lookup_ill_index_v6(conn_t *connp, const in6_addr_t *v6group, int ifindex)
3663 {
3664 	ilg_t	*ilg;
3665 	int	i;
3666 
3667 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3668 	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3669 		ilg = &connp->conn_ilg[i];
3670 		if (ilg->ilg_ill == NULL ||
3671 		    (ilg->ilg_flags & ILG_DELETED) != 0)
3672 			continue;
3673 		/* ilg_ipif is NULL for V6 */
3674 		ASSERT(ilg->ilg_ipif == NULL);
3675 		ASSERT(ilg->ilg_orig_ifindex != 0);
3676 		if (IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group) &&
3677 		    ilg->ilg_orig_ifindex == ifindex) {
3678 			return (ilg);
3679 		}
3680 	}
3681 	return (NULL);
3682 }
3683 
3684 /*
3685  * Find an IPv6 ilg matching group and ill
3686  */
3687 ilg_t *
3688 ilg_lookup_ill_v6(conn_t *connp, const in6_addr_t *v6group, ill_t *ill)
3689 {
3690 	ilg_t	*ilg;
3691 	int	i;
3692 	ill_t 	*mem_ill;
3693 
3694 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3695 
3696 	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3697 		ilg = &connp->conn_ilg[i];
3698 		if ((mem_ill = ilg->ilg_ill) == NULL ||
3699 		    (ilg->ilg_flags & ILG_DELETED) != 0)
3700 			continue;
3701 		ASSERT(ilg->ilg_ipif == NULL);
3702 		ASSERT(mem_ill->ill_isv6);
3703 		if (mem_ill == ill &&
3704 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group))
3705 			return (ilg);
3706 	}
3707 	return (NULL);
3708 }
3709 
3710 /*
3711  * Find an IPv4 ilg matching group and ipif
3712  */
3713 static ilg_t *
3714 ilg_lookup_ipif(conn_t *connp, ipaddr_t group, ipif_t *ipif)
3715 {
3716 	in6_addr_t v6group;
3717 	int	i;
3718 	ilg_t	*ilg;
3719 
3720 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3721 	ASSERT(!ipif->ipif_ill->ill_isv6);
3722 
3723 	if (group == INADDR_ANY)
3724 		v6group = ipv6_all_zeros;
3725 	else
3726 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
3727 
3728 	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3729 		ilg = &connp->conn_ilg[i];
3730 		if ((ilg->ilg_flags & ILG_DELETED) == 0 &&
3731 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, &v6group) &&
3732 		    ilg->ilg_ipif == ipif)
3733 			return (ilg);
3734 	}
3735 	return (NULL);
3736 }
3737 
3738 /*
3739  * If a source address is passed in (src != NULL and src is not
3740  * unspecified), remove the specified src addr from the given ilg's
3741  * filter list, else delete the ilg.
3742  */
3743 static void
3744 ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src)
3745 {
3746 	int	i;
3747 
3748 	ASSERT((ilg->ilg_ipif != NULL) ^ (ilg->ilg_ill != NULL));
3749 	ASSERT(ilg->ilg_ipif == NULL || IAM_WRITER_IPIF(ilg->ilg_ipif));
3750 	ASSERT(ilg->ilg_ill == NULL || IAM_WRITER_ILL(ilg->ilg_ill));
3751 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3752 	ASSERT(!(ilg->ilg_flags & ILG_DELETED));
3753 
3754 	if (src == NULL || IN6_IS_ADDR_UNSPECIFIED(src)) {
3755 		if (connp->conn_ilg_walker_cnt != 0) {
3756 			ilg->ilg_flags |= ILG_DELETED;
3757 			return;
3758 		}
3759 
3760 		FREE_SLIST(ilg->ilg_filter);
3761 
3762 		i = ilg - &connp->conn_ilg[0];
3763 		ASSERT(i >= 0 && i < connp->conn_ilg_inuse);
3764 
3765 		/* Move other entries up one step */
3766 		connp->conn_ilg_inuse--;
3767 		for (; i < connp->conn_ilg_inuse; i++)
3768 			connp->conn_ilg[i] = connp->conn_ilg[i+1];
3769 
3770 		if (connp->conn_ilg_inuse == 0) {
3771 			mi_free((char *)connp->conn_ilg);
3772 			connp->conn_ilg = NULL;
3773 			cv_broadcast(&connp->conn_refcv);
3774 		}
3775 	} else {
3776 		l_remove(ilg->ilg_filter, src);
3777 	}
3778 }
3779 
3780 /*
3781  * Called from conn close. No new ilg can be added or removed.
3782  * because CONN_CLOSING has been set by ip_close. ilg_add / ilg_delete
3783  * will return error if conn has started closing.
3784  */
3785 void
3786 ilg_delete_all(conn_t *connp)
3787 {
3788 	int	i;
3789 	ipif_t	*ipif = NULL;
3790 	ill_t	*ill = NULL;
3791 	ilg_t	*ilg;
3792 	in6_addr_t v6group;
3793 	boolean_t success;
3794 	ipsq_t	*ipsq;
3795 	int	orig_ifindex;
3796 
3797 	mutex_enter(&connp->conn_lock);
3798 retry:
3799 	ILG_WALKER_HOLD(connp);
3800 	for (i = connp->conn_ilg_inuse - 1; i >= 0; ) {
3801 		ilg = &connp->conn_ilg[i];
3802 		/*
3803 		 * Since this walk is not atomic (we drop the
3804 		 * conn_lock and wait in ipsq_enter) we need
3805 		 * to check for the ILG_DELETED flag.
3806 		 */
3807 		if (ilg->ilg_flags & ILG_DELETED) {
3808 			/* Go to the next ilg */
3809 			i--;
3810 			continue;
3811 		}
3812 		v6group = ilg->ilg_v6group;
3813 
3814 		if (IN6_IS_ADDR_V4MAPPED(&v6group)) {
3815 			ipif = ilg->ilg_ipif;
3816 			ill = ipif->ipif_ill;
3817 		} else {
3818 			ipif = NULL;
3819 			ill = ilg->ilg_ill;
3820 		}
3821 		/*
3822 		 * We may not be able to refhold the ill if the ill/ipif
3823 		 * is changing. But we need to make sure that the ill will
3824 		 * not vanish. So we just bump up the ill_waiter count.
3825 		 * If we are unable to do even that, then the ill is closing,
3826 		 * in which case the unplumb thread will handle the cleanup,
3827 		 * and we move on to the next ilg.
3828 		 */
3829 		if (!ill_waiter_inc(ill)) {
3830 			/* Go to the next ilg */
3831 			i--;
3832 			continue;
3833 		}
3834 		mutex_exit(&connp->conn_lock);
3835 		/*
3836 		 * To prevent deadlock between ill close which waits inside
3837 		 * the perimeter, and conn close, ipsq_enter returns error,
3838 		 * the moment ILL_CONDEMNED is set, in which case ill close
3839 		 * takes responsibility to cleanup the ilgs. Note that we
3840 		 * have not yet set condemned flag, otherwise the conn can't
3841 		 * be refheld for cleanup by those routines and it would be
3842 		 * a mutual deadlock.
3843 		 */
3844 		success = ipsq_enter(ill, B_FALSE);
3845 		ipsq = ill->ill_phyint->phyint_ipsq;
3846 		ill_waiter_dcr(ill);
3847 		mutex_enter(&connp->conn_lock);
3848 		if (!success) {
3849 			/* Go to the next ilg */
3850 			i--;
3851 			continue;
3852 		}
3853 
3854 		/*
3855 		 * Make sure that nothing has changed under. For eg.
3856 		 * a failover/failback can change ilg_ill while we were
3857 		 * waiting to become exclusive above
3858 		 */
3859 		if (IN6_IS_ADDR_V4MAPPED(&v6group)) {
3860 			ipif = ilg->ilg_ipif;
3861 			ill = ipif->ipif_ill;
3862 		} else {
3863 			ipif = NULL;
3864 			ill = ilg->ilg_ill;
3865 		}
3866 		if (!IAM_WRITER_ILL(ill) || (ilg->ilg_flags & ILG_DELETED)) {
3867 			/*
3868 			 * The ilg has changed under us probably due
3869 			 * to a failover or unplumb. Retry on the same ilg.
3870 			 */
3871 			mutex_exit(&connp->conn_lock);
3872 			ipsq_exit(ipsq);
3873 			mutex_enter(&connp->conn_lock);
3874 			continue;
3875 		}
3876 		v6group = ilg->ilg_v6group;
3877 		orig_ifindex = ilg->ilg_orig_ifindex;
3878 		ilg_delete(connp, ilg, NULL);
3879 		mutex_exit(&connp->conn_lock);
3880 
3881 		if (ipif != NULL)
3882 			(void) ip_delmulti(V4_PART_OF_V6(v6group), ipif,
3883 			    B_FALSE, B_TRUE);
3884 
3885 		else
3886 			(void) ip_delmulti_v6(&v6group, ill, orig_ifindex,
3887 			    connp->conn_zoneid, B_FALSE, B_TRUE);
3888 
3889 		ipsq_exit(ipsq);
3890 		mutex_enter(&connp->conn_lock);
3891 		/* Go to the next ilg */
3892 		i--;
3893 	}
3894 	ILG_WALKER_RELE(connp);
3895 
3896 	/* If any ill was skipped above wait and retry */
3897 	if (connp->conn_ilg_inuse != 0) {
3898 		cv_wait(&connp->conn_refcv, &connp->conn_lock);
3899 		goto retry;
3900 	}
3901 	mutex_exit(&connp->conn_lock);
3902 }
3903 
3904 /*
3905  * Called from ill close by ipcl_walk for clearing conn_ilg and
3906  * conn_multicast_ipif for a given ipif. conn is held by caller.
3907  * Note that ipcl_walk only walks conns that are not yet condemned.
3908  * condemned conns can't be refheld. For this reason, conn must become clean
3909  * first, i.e. it must not refer to any ill/ire/ipif and then only set
3910  * condemned flag.
3911  */
3912 static void
3913 conn_delete_ipif(conn_t *connp, caddr_t arg)
3914 {
3915 	ipif_t	*ipif = (ipif_t *)arg;
3916 	int	i;
3917 	char	group_buf1[INET6_ADDRSTRLEN];
3918 	char	group_buf2[INET6_ADDRSTRLEN];
3919 	ipaddr_t group;
3920 	ilg_t	*ilg;
3921 
3922 	/*
3923 	 * Even though conn_ilg_inuse can change while we are in this loop,
3924 	 * i.e.ilgs can be created or deleted on this connp, no new ilgs can
3925 	 * be created or deleted for this connp, on this ill, since this ill
3926 	 * is the perimeter. So we won't miss any ilg in this cleanup.
3927 	 */
3928 	mutex_enter(&connp->conn_lock);
3929 
3930 	/*
3931 	 * Increment the walker count, so that ilg repacking does not
3932 	 * occur while we are in the loop.
3933 	 */
3934 	ILG_WALKER_HOLD(connp);
3935 	for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) {
3936 		ilg = &connp->conn_ilg[i];
3937 		if (ilg->ilg_ipif != ipif || (ilg->ilg_flags & ILG_DELETED))
3938 			continue;
3939 		/*
3940 		 * ip_close cannot be cleaning this ilg at the same time.
3941 		 * since it also has to execute in this ill's perimeter which
3942 		 * we are now holding. Only a clean conn can be condemned.
3943 		 */
3944 		ASSERT(!(connp->conn_state_flags & CONN_CONDEMNED));
3945 
3946 		/* Blow away the membership */
3947 		ip1dbg(("conn_delete_ilg_ipif: %s on %s (%s)\n",
3948 		    inet_ntop(AF_INET6, &connp->conn_ilg[i].ilg_v6group,
3949 		    group_buf1, sizeof (group_buf1)),
3950 		    inet_ntop(AF_INET6, &ipif->ipif_v6lcl_addr,
3951 		    group_buf2, sizeof (group_buf2)),
3952 		    ipif->ipif_ill->ill_name));
3953 
3954 		/* ilg_ipif is NULL for V6, so we won't be here */
3955 		ASSERT(IN6_IS_ADDR_V4MAPPED(&ilg->ilg_v6group));
3956 
3957 		group = V4_PART_OF_V6(ilg->ilg_v6group);
3958 		ilg_delete(connp, &connp->conn_ilg[i], NULL);
3959 		mutex_exit(&connp->conn_lock);
3960 
3961 		(void) ip_delmulti(group, ipif, B_FALSE, B_TRUE);
3962 		mutex_enter(&connp->conn_lock);
3963 	}
3964 
3965 	/*
3966 	 * If we are the last walker, need to physically delete the
3967 	 * ilgs and repack.
3968 	 */
3969 	ILG_WALKER_RELE(connp);
3970 
3971 	if (connp->conn_multicast_ipif == ipif) {
3972 		/* Revert to late binding */
3973 		connp->conn_multicast_ipif = NULL;
3974 	}
3975 	mutex_exit(&connp->conn_lock);
3976 
3977 	conn_delete_ire(connp, (caddr_t)ipif);
3978 }
3979 
3980 /*
3981  * Called from ill close by ipcl_walk for clearing conn_ilg and
3982  * conn_multicast_ill for a given ill. conn is held by caller.
3983  * Note that ipcl_walk only walks conns that are not yet condemned.
3984  * condemned conns can't be refheld. For this reason, conn must become clean
3985  * first, i.e. it must not refer to any ill/ire/ipif and then only set
3986  * condemned flag.
3987  */
3988 static void
3989 conn_delete_ill(conn_t *connp, caddr_t arg)
3990 {
3991 	ill_t	*ill = (ill_t *)arg;
3992 	int	i;
3993 	char	group_buf[INET6_ADDRSTRLEN];
3994 	in6_addr_t v6group;
3995 	int	orig_ifindex;
3996 	ilg_t	*ilg;
3997 
3998 	/*
3999 	 * Even though conn_ilg_inuse can change while we are in this loop,
4000 	 * no new ilgs can be created/deleted for this connp, on this
4001 	 * ill, since this ill is the perimeter. So we won't miss any ilg
4002 	 * in this cleanup.
4003 	 */
4004 	mutex_enter(&connp->conn_lock);
4005 
4006 	/*
4007 	 * Increment the walker count, so that ilg repacking does not
4008 	 * occur while we are in the loop.
4009 	 */
4010 	ILG_WALKER_HOLD(connp);
4011 	for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) {
4012 		ilg = &connp->conn_ilg[i];
4013 		if ((ilg->ilg_ill == ill) && !(ilg->ilg_flags & ILG_DELETED)) {
4014 			/*
4015 			 * ip_close cannot be cleaning this ilg at the same
4016 			 * time, since it also has to execute in this ill's
4017 			 * perimeter which we are now holding. Only a clean
4018 			 * conn can be condemned.
4019 			 */
4020 			ASSERT(!(connp->conn_state_flags & CONN_CONDEMNED));
4021 
4022 			/* Blow away the membership */
4023 			ip1dbg(("conn_delete_ilg_ill: %s on %s\n",
4024 			    inet_ntop(AF_INET6, &ilg->ilg_v6group,
4025 			    group_buf, sizeof (group_buf)),
4026 			    ill->ill_name));
4027 
4028 			v6group = ilg->ilg_v6group;
4029 			orig_ifindex = ilg->ilg_orig_ifindex;
4030 			ilg_delete(connp, ilg, NULL);
4031 			mutex_exit(&connp->conn_lock);
4032 
4033 			(void) ip_delmulti_v6(&v6group, ill, orig_ifindex,
4034 			    connp->conn_zoneid, B_FALSE, B_TRUE);
4035 			mutex_enter(&connp->conn_lock);
4036 		}
4037 	}
4038 	/*
4039 	 * If we are the last walker, need to physically delete the
4040 	 * ilgs and repack.
4041 	 */
4042 	ILG_WALKER_RELE(connp);
4043 
4044 	if (connp->conn_multicast_ill == ill) {
4045 		/* Revert to late binding */
4046 		connp->conn_multicast_ill = NULL;
4047 		connp->conn_orig_multicast_ifindex = 0;
4048 	}
4049 	mutex_exit(&connp->conn_lock);
4050 }
4051 
4052 /*
4053  * Called when an ipif is unplumbed to make sure that there are no
4054  * dangling conn references to that ipif.
4055  * Handles ilg_ipif and conn_multicast_ipif
4056  */
4057 void
4058 reset_conn_ipif(ipif)
4059 	ipif_t	*ipif;
4060 {
4061 	ip_stack_t	*ipst = ipif->ipif_ill->ill_ipst;
4062 
4063 	ipcl_walk(conn_delete_ipif, (caddr_t)ipif, ipst);
4064 }
4065 
4066 /*
4067  * Called when an ill is unplumbed to make sure that there are no
4068  * dangling conn references to that ill.
4069  * Handles ilg_ill, conn_multicast_ill.
4070  */
4071 void
4072 reset_conn_ill(ill_t *ill)
4073 {
4074 	ip_stack_t	*ipst = ill->ill_ipst;
4075 
4076 	ipcl_walk(conn_delete_ill, (caddr_t)ill, ipst);
4077 }
4078 
4079 #ifdef DEBUG
4080 /*
4081  * Walk functions walk all the interfaces in the system to make
4082  * sure that there is no refernece to the ipif or ill that is
4083  * going away.
4084  */
4085 int
4086 ilm_walk_ill(ill_t *ill)
4087 {
4088 	int cnt = 0;
4089 	ill_t *till;
4090 	ilm_t *ilm;
4091 	ill_walk_context_t ctx;
4092 	ip_stack_t	*ipst = ill->ill_ipst;
4093 
4094 	rw_enter(&ipst->ips_ill_g_lock, RW_READER);
4095 	till = ILL_START_WALK_ALL(&ctx, ipst);
4096 	for (; till != NULL; till = ill_next(&ctx, till)) {
4097 		mutex_enter(&till->ill_lock);
4098 		for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
4099 			if (ilm->ilm_ill == ill) {
4100 				cnt++;
4101 			}
4102 		}
4103 		mutex_exit(&till->ill_lock);
4104 	}
4105 	rw_exit(&ipst->ips_ill_g_lock);
4106 
4107 	return (cnt);
4108 }
4109 
4110 /*
4111  * This function is called before the ipif is freed.
4112  */
4113 int
4114 ilm_walk_ipif(ipif_t *ipif)
4115 {
4116 	int cnt = 0;
4117 	ill_t *till;
4118 	ilm_t *ilm;
4119 	ill_walk_context_t ctx;
4120 	ip_stack_t	*ipst = ipif->ipif_ill->ill_ipst;
4121 
4122 	till = ILL_START_WALK_ALL(&ctx, ipst);
4123 	for (; till != NULL; till = ill_next(&ctx, till)) {
4124 		mutex_enter(&till->ill_lock);
4125 		for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
4126 			if (ilm->ilm_ipif == ipif) {
4127 					cnt++;
4128 			}
4129 		}
4130 		mutex_exit(&till->ill_lock);
4131 	}
4132 	return (cnt);
4133 }
4134 #endif
4135