xref: /illumos-gate/usr/src/uts/common/inet/ip/ip_multi.c (revision b1593d50e783f7d66722dde093752b74ffa95176)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /* Copyright (c) 1990 Mentat Inc. */
26 
27 #include <sys/types.h>
28 #include <sys/stream.h>
29 #include <sys/dlpi.h>
30 #include <sys/stropts.h>
31 #include <sys/strsun.h>
32 #include <sys/ddi.h>
33 #include <sys/cmn_err.h>
34 #include <sys/sdt.h>
35 #include <sys/zone.h>
36 
37 #include <sys/param.h>
38 #include <sys/socket.h>
39 #include <sys/sockio.h>
40 #include <net/if.h>
41 #include <sys/systm.h>
42 #include <sys/strsubr.h>
43 #include <net/route.h>
44 #include <netinet/in.h>
45 #include <net/if_dl.h>
46 #include <netinet/ip6.h>
47 #include <netinet/icmp6.h>
48 
49 #include <inet/common.h>
50 #include <inet/mi.h>
51 #include <inet/nd.h>
52 #include <inet/arp.h>
53 #include <inet/ip.h>
54 #include <inet/ip6.h>
55 #include <inet/ip_if.h>
56 #include <inet/ip_ndp.h>
57 #include <inet/ip_multi.h>
58 #include <inet/ipclassifier.h>
59 #include <inet/ipsec_impl.h>
60 #include <inet/sctp_ip.h>
61 #include <inet/ip_listutils.h>
62 #include <inet/udp_impl.h>
63 
64 /* igmpv3/mldv2 source filter manipulation */
65 static void	ilm_bld_flists(conn_t *conn, void *arg);
66 static void	ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode,
67     slist_t *flist);
68 
69 static ilm_t	*ilm_add_v6(ipif_t *ipif, const in6_addr_t *group,
70     ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist,
71     zoneid_t zoneid);
72 static void	ilm_delete(ilm_t *ilm);
73 static int	ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *group);
74 static int	ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *group);
75 static ilg_t	*ilg_lookup_ipif(conn_t *connp, ipaddr_t group,
76     ipif_t *ipif);
77 static int	ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif,
78     mcast_record_t fmode, ipaddr_t src);
79 static int	ilg_add_v6(conn_t *connp, const in6_addr_t *group, ill_t *ill,
80     mcast_record_t fmode, const in6_addr_t *v6src);
81 static void	ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src);
82 static mblk_t	*ill_create_dl(ill_t *ill, uint32_t dl_primitive,
83     uint32_t length, uint32_t *addr_lenp, uint32_t *addr_offp);
84 static void	conn_ilg_reap(conn_t *connp);
85 static int	ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group,
86     ipif_t *ipif, mcast_record_t fmode, ipaddr_t src);
87 static int	ip_opt_delete_group_excl_v6(conn_t *connp,
88     const in6_addr_t *v6group, ill_t *ill, mcast_record_t fmode,
89     const in6_addr_t *v6src);
90 static void	ill_ilm_walker_hold(ill_t *ill);
91 static void	ill_ilm_walker_rele(ill_t *ill);
92 
93 /*
94  * MT notes:
95  *
96  * Multicast joins operate on both the ilg and ilm structures. Multiple
97  * threads operating on an conn (socket) trying to do multicast joins
98  * need to synchronize when operating on the ilg. Multiple threads
99  * potentially operating on different conn (socket endpoints) trying to
100  * do multicast joins could eventually end up trying to manipulate the
101  * ilm simultaneously and need to synchronize access to the ilm.  Currently,
102  * this is done by synchronizing join/leave via per-phyint ipsq_t
103  * serialization.
104  *
105  * An ilm is an IP data structure used to track multicast join/leave.
106  * An ilm is associated with a <multicast group, ipif> tuple in IPv4 and
107  * with just <multicast group> in IPv6. ilm_refcnt is the number of ilg's
108  * referencing the ilm. ilms are created / destroyed only as writer. ilms
109  * are not passed around, instead they are looked up and used under the
110  * ill_lock or as writer. So we don't need a dynamic refcount of the number
111  * of threads holding reference to an ilm.
112  *
113  * Multicast Join operation:
114  *
115  * The first step is to determine the ipif (v4) or ill (v6) on which
116  * the join operation is to be done. The join is done after becoming
117  * exclusive on the ipsq associated with the ipif or ill. The conn->conn_ilg
118  * and ill->ill_ilm are thus accessed and modified exclusively per ill.
119  * Multiple threads can attempt to join simultaneously on different ipif/ill
120  * on the same conn. In this case the ipsq serialization does not help in
121  * protecting the ilg. It is the conn_lock that is used to protect the ilg.
122  * The conn_lock also protects all the ilg_t members.
123  *
124  * Leave operation.
125  *
126  * Similar to the join operation, the first step is to determine the ipif
127  * or ill (v6) on which the leave operation is to be done. The leave operation
128  * is done after becoming exclusive on the ipsq associated with the ipif or ill.
129  * As with join ilg modification is done under the protection of the conn lock.
130  */
131 
132 #define	IPSQ_ENTER_IPIF(ipif, connp, first_mp, func, ipsq, type)	\
133 	ASSERT(connp != NULL);					\
134 	(ipsq) = ipsq_try_enter((ipif), NULL, CONNP_TO_WQ(connp),	\
135 	    (first_mp), (func), (type), B_TRUE);		\
136 	if ((ipsq) == NULL) {					\
137 		ipif_refrele(ipif);				\
138 		return (EINPROGRESS);				\
139 	}
140 
141 #define	IPSQ_ENTER_ILL(ill, connp, first_mp, func, ipsq, type)	\
142 	ASSERT(connp != NULL);					\
143 	(ipsq) = ipsq_try_enter(NULL, ill, CONNP_TO_WQ(connp),	\
144 	    (first_mp),	(func), (type), B_TRUE);		\
145 	if ((ipsq) == NULL) {					\
146 		ill_refrele(ill);				\
147 		return (EINPROGRESS);				\
148 	}
149 
150 #define	IPSQ_EXIT(ipsq)	\
151 	if (ipsq != NULL)	\
152 		ipsq_exit(ipsq);
153 
154 #define	ILG_WALKER_HOLD(connp)	(connp)->conn_ilg_walker_cnt++
155 
156 #define	ILG_WALKER_RELE(connp)				\
157 	{						\
158 		(connp)->conn_ilg_walker_cnt--;		\
159 		if ((connp)->conn_ilg_walker_cnt == 0)	\
160 			conn_ilg_reap(connp);		\
161 	}
162 
163 static void
164 conn_ilg_reap(conn_t *connp)
165 {
166 	int	to;
167 	int	from;
168 	ilg_t	*ilg;
169 
170 	ASSERT(MUTEX_HELD(&connp->conn_lock));
171 
172 	to = 0;
173 	from = 0;
174 	while (from < connp->conn_ilg_inuse) {
175 		if (connp->conn_ilg[from].ilg_flags & ILG_DELETED) {
176 			ilg = &connp->conn_ilg[from];
177 			FREE_SLIST(ilg->ilg_filter);
178 			ilg->ilg_flags &= ~ILG_DELETED;
179 			from++;
180 			continue;
181 		}
182 		if (to != from)
183 			connp->conn_ilg[to] = connp->conn_ilg[from];
184 		to++;
185 		from++;
186 	}
187 
188 	connp->conn_ilg_inuse = to;
189 
190 	if (connp->conn_ilg_inuse == 0) {
191 		mi_free((char *)connp->conn_ilg);
192 		connp->conn_ilg = NULL;
193 		cv_broadcast(&connp->conn_refcv);
194 	}
195 }
196 
197 #define	GETSTRUCT(structure, number)	\
198 	((structure *)mi_zalloc(sizeof (structure) * (number)))
199 
200 #define	ILG_ALLOC_CHUNK	16
201 
202 /*
203  * Returns a pointer to the next available ilg in conn_ilg.  Allocs more
204  * buffers in size of ILG_ALLOC_CHUNK ilgs when needed, and updates conn's
205  * ilg tracking fields appropriately (conn_ilg_inuse reflects usage of the
206  * returned ilg).  Returns NULL on failure, in which case `*errp' will be
207  * filled in with the reason.
208  *
209  * Assumes connp->conn_lock is held.
210  */
211 static ilg_t *
212 conn_ilg_alloc(conn_t *connp, int *errp)
213 {
214 	ilg_t *new, *ret;
215 	int curcnt;
216 
217 	ASSERT(MUTEX_HELD(&connp->conn_lock));
218 	ASSERT(connp->conn_ilg_inuse <= connp->conn_ilg_allocated);
219 
220 	/*
221 	 * If CONN_CLOSING is set, conn_ilg cleanup has begun and we must not
222 	 * create any ilgs.
223 	 */
224 	if (connp->conn_state_flags & CONN_CLOSING) {
225 		*errp = EINVAL;
226 		return (NULL);
227 	}
228 
229 	if (connp->conn_ilg == NULL) {
230 		connp->conn_ilg = GETSTRUCT(ilg_t, ILG_ALLOC_CHUNK);
231 		if (connp->conn_ilg == NULL) {
232 			*errp = ENOMEM;
233 			return (NULL);
234 		}
235 		connp->conn_ilg_allocated = ILG_ALLOC_CHUNK;
236 		connp->conn_ilg_inuse = 0;
237 	}
238 	if (connp->conn_ilg_inuse == connp->conn_ilg_allocated) {
239 		if (connp->conn_ilg_walker_cnt != 0) {
240 			/*
241 			 * XXX We cannot grow the array at this point
242 			 * because a list walker could be in progress, and
243 			 * we cannot wipe out the existing array until the
244 			 * walker is done. Just return NULL for now.
245 			 * ilg_delete_all() will have to be changed when
246 			 * this logic is changed.
247 			 */
248 			*errp = EBUSY;
249 			return (NULL);
250 		}
251 		curcnt = connp->conn_ilg_allocated;
252 		new = GETSTRUCT(ilg_t, curcnt + ILG_ALLOC_CHUNK);
253 		if (new == NULL) {
254 			*errp = ENOMEM;
255 			return (NULL);
256 		}
257 		bcopy(connp->conn_ilg, new, sizeof (ilg_t) * curcnt);
258 		mi_free((char *)connp->conn_ilg);
259 		connp->conn_ilg = new;
260 		connp->conn_ilg_allocated += ILG_ALLOC_CHUNK;
261 	}
262 
263 	ret = &connp->conn_ilg[connp->conn_ilg_inuse++];
264 	ASSERT((ret->ilg_flags & ILG_DELETED) == 0);
265 	bzero(ret, sizeof (*ret));
266 	return (ret);
267 }
268 
269 typedef struct ilm_fbld_s {
270 	ilm_t		*fbld_ilm;
271 	int		fbld_in_cnt;
272 	int		fbld_ex_cnt;
273 	slist_t		fbld_in;
274 	slist_t		fbld_ex;
275 	boolean_t	fbld_in_overflow;
276 } ilm_fbld_t;
277 
278 static void
279 ilm_bld_flists(conn_t *conn, void *arg)
280 {
281 	int i;
282 	ilm_fbld_t *fbld = (ilm_fbld_t *)(arg);
283 	ilm_t *ilm = fbld->fbld_ilm;
284 	in6_addr_t *v6group = &ilm->ilm_v6addr;
285 
286 	if (conn->conn_ilg_inuse == 0)
287 		return;
288 
289 	/*
290 	 * Since we can't break out of the ipcl_walk once started, we still
291 	 * have to look at every conn.  But if we've already found one
292 	 * (EXCLUDE, NULL) list, there's no need to keep checking individual
293 	 * ilgs--that will be our state.
294 	 */
295 	if (fbld->fbld_ex_cnt > 0 && fbld->fbld_ex.sl_numsrc == 0)
296 		return;
297 
298 	/*
299 	 * Check this conn's ilgs to see if any are interested in our
300 	 * ilm (group, interface match).  If so, update the master
301 	 * include and exclude lists we're building in the fbld struct
302 	 * with this ilg's filter info.
303 	 */
304 	mutex_enter(&conn->conn_lock);
305 	for (i = 0; i < conn->conn_ilg_inuse; i++) {
306 		ilg_t *ilg = &conn->conn_ilg[i];
307 		if ((ilg->ilg_ill == ilm->ilm_ill) &&
308 		    (ilg->ilg_ipif == ilm->ilm_ipif) &&
309 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) {
310 			if (ilg->ilg_fmode == MODE_IS_INCLUDE) {
311 				fbld->fbld_in_cnt++;
312 				if (!fbld->fbld_in_overflow)
313 					l_union_in_a(&fbld->fbld_in,
314 					    ilg->ilg_filter,
315 					    &fbld->fbld_in_overflow);
316 			} else {
317 				fbld->fbld_ex_cnt++;
318 				/*
319 				 * On the first exclude list, don't try to do
320 				 * an intersection, as the master exclude list
321 				 * is intentionally empty.  If the master list
322 				 * is still empty on later iterations, that
323 				 * means we have at least one ilg with an empty
324 				 * exclude list, so that should be reflected
325 				 * when we take the intersection.
326 				 */
327 				if (fbld->fbld_ex_cnt == 1) {
328 					if (ilg->ilg_filter != NULL)
329 						l_copy(ilg->ilg_filter,
330 						    &fbld->fbld_ex);
331 				} else {
332 					l_intersection_in_a(&fbld->fbld_ex,
333 					    ilg->ilg_filter);
334 				}
335 			}
336 			/* there will only be one match, so break now. */
337 			break;
338 		}
339 	}
340 	mutex_exit(&conn->conn_lock);
341 }
342 
343 static void
344 ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, slist_t *flist)
345 {
346 	ilm_fbld_t fbld;
347 	ip_stack_t *ipst = ilm->ilm_ipst;
348 
349 	fbld.fbld_ilm = ilm;
350 	fbld.fbld_in_cnt = fbld.fbld_ex_cnt = 0;
351 	fbld.fbld_in.sl_numsrc = fbld.fbld_ex.sl_numsrc = 0;
352 	fbld.fbld_in_overflow = B_FALSE;
353 
354 	/* first, construct our master include and exclude lists */
355 	ipcl_walk(ilm_bld_flists, (caddr_t)&fbld, ipst);
356 
357 	/* now use those master lists to generate the interface filter */
358 
359 	/* if include list overflowed, filter is (EXCLUDE, NULL) */
360 	if (fbld.fbld_in_overflow) {
361 		*fmode = MODE_IS_EXCLUDE;
362 		flist->sl_numsrc = 0;
363 		return;
364 	}
365 
366 	/* if nobody interested, interface filter is (INCLUDE, NULL) */
367 	if (fbld.fbld_in_cnt == 0 && fbld.fbld_ex_cnt == 0) {
368 		*fmode = MODE_IS_INCLUDE;
369 		flist->sl_numsrc = 0;
370 		return;
371 	}
372 
373 	/*
374 	 * If there are no exclude lists, then the interface filter
375 	 * is INCLUDE, with its filter list equal to fbld_in.  A single
376 	 * exclude list makes the interface filter EXCLUDE, with its
377 	 * filter list equal to (fbld_ex - fbld_in).
378 	 */
379 	if (fbld.fbld_ex_cnt == 0) {
380 		*fmode = MODE_IS_INCLUDE;
381 		l_copy(&fbld.fbld_in, flist);
382 	} else {
383 		*fmode = MODE_IS_EXCLUDE;
384 		l_difference(&fbld.fbld_ex, &fbld.fbld_in, flist);
385 	}
386 }
387 
388 static int
389 ilm_update_add(ilm_t *ilm, ilg_stat_t ilgstat, slist_t *ilg_flist,
390     boolean_t isv6)
391 {
392 	mcast_record_t fmode;
393 	slist_t *flist;
394 	boolean_t fdefault;
395 	char buf[INET6_ADDRSTRLEN];
396 	ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill;
397 
398 	/*
399 	 * There are several cases where the ilm's filter state
400 	 * defaults to (EXCLUDE, NULL):
401 	 *	- we've had previous joins without associated ilgs
402 	 *	- this join has no associated ilg
403 	 *	- the ilg's filter state is (EXCLUDE, NULL)
404 	 */
405 	fdefault = (ilm->ilm_no_ilg_cnt > 0) ||
406 	    (ilgstat == ILGSTAT_NONE) || SLIST_IS_EMPTY(ilg_flist);
407 
408 	/* attempt mallocs (if needed) before doing anything else */
409 	if ((flist = l_alloc()) == NULL)
410 		return (ENOMEM);
411 	if (!fdefault && ilm->ilm_filter == NULL) {
412 		ilm->ilm_filter = l_alloc();
413 		if (ilm->ilm_filter == NULL) {
414 			l_free(flist);
415 			return (ENOMEM);
416 		}
417 	}
418 
419 	if (ilgstat != ILGSTAT_CHANGE)
420 		ilm->ilm_refcnt++;
421 
422 	if (ilgstat == ILGSTAT_NONE)
423 		ilm->ilm_no_ilg_cnt++;
424 
425 	/*
426 	 * Determine new filter state.  If it's not the default
427 	 * (EXCLUDE, NULL), we must walk the conn list to find
428 	 * any ilgs interested in this group, and re-build the
429 	 * ilm filter.
430 	 */
431 	if (fdefault) {
432 		fmode = MODE_IS_EXCLUDE;
433 		flist->sl_numsrc = 0;
434 	} else {
435 		ilm_gen_filter(ilm, &fmode, flist);
436 	}
437 
438 	/* make sure state actually changed; nothing to do if not. */
439 	if ((ilm->ilm_fmode == fmode) &&
440 	    !lists_are_different(ilm->ilm_filter, flist)) {
441 		l_free(flist);
442 		return (0);
443 	}
444 
445 	/* send the state change report */
446 	if (!IS_LOOPBACK(ill)) {
447 		if (isv6)
448 			mld_statechange(ilm, fmode, flist);
449 		else
450 			igmp_statechange(ilm, fmode, flist);
451 	}
452 
453 	/* update the ilm state */
454 	ilm->ilm_fmode = fmode;
455 	if (flist->sl_numsrc > 0)
456 		l_copy(flist, ilm->ilm_filter);
457 	else
458 		CLEAR_SLIST(ilm->ilm_filter);
459 
460 	ip1dbg(("ilm_update: new if filter mode %d, group %s\n", ilm->ilm_fmode,
461 	    inet_ntop(AF_INET6, &ilm->ilm_v6addr, buf, sizeof (buf))));
462 
463 	l_free(flist);
464 	return (0);
465 }
466 
467 static int
468 ilm_update_del(ilm_t *ilm, boolean_t isv6)
469 {
470 	mcast_record_t fmode;
471 	slist_t *flist;
472 	ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill;
473 
474 	ip1dbg(("ilm_update_del: still %d left; updating state\n",
475 	    ilm->ilm_refcnt));
476 
477 	if ((flist = l_alloc()) == NULL)
478 		return (ENOMEM);
479 
480 	/*
481 	 * If present, the ilg in question has already either been
482 	 * updated or removed from our list; so all we need to do
483 	 * now is walk the list to update the ilm filter state.
484 	 *
485 	 * Skip the list walk if we have any no-ilg joins, which
486 	 * cause the filter state to revert to (EXCLUDE, NULL).
487 	 */
488 	if (ilm->ilm_no_ilg_cnt != 0) {
489 		fmode = MODE_IS_EXCLUDE;
490 		flist->sl_numsrc = 0;
491 	} else {
492 		ilm_gen_filter(ilm, &fmode, flist);
493 	}
494 
495 	/* check to see if state needs to be updated */
496 	if ((ilm->ilm_fmode == fmode) &&
497 	    (!lists_are_different(ilm->ilm_filter, flist))) {
498 		l_free(flist);
499 		return (0);
500 	}
501 
502 	if (!IS_LOOPBACK(ill)) {
503 		if (isv6)
504 			mld_statechange(ilm, fmode, flist);
505 		else
506 			igmp_statechange(ilm, fmode, flist);
507 	}
508 
509 	ilm->ilm_fmode = fmode;
510 	if (flist->sl_numsrc > 0) {
511 		if (ilm->ilm_filter == NULL) {
512 			ilm->ilm_filter = l_alloc();
513 			if (ilm->ilm_filter == NULL) {
514 				char buf[INET6_ADDRSTRLEN];
515 				ip1dbg(("ilm_update_del: failed to alloc ilm "
516 				    "filter; no source filtering for %s on %s",
517 				    inet_ntop(AF_INET6, &ilm->ilm_v6addr,
518 				    buf, sizeof (buf)), ill->ill_name));
519 				ilm->ilm_fmode = MODE_IS_EXCLUDE;
520 				l_free(flist);
521 				return (0);
522 			}
523 		}
524 		l_copy(flist, ilm->ilm_filter);
525 	} else {
526 		CLEAR_SLIST(ilm->ilm_filter);
527 	}
528 
529 	l_free(flist);
530 	return (0);
531 }
532 
533 /*
534  * INADDR_ANY means all multicast addresses.
535  * INADDR_ANY is stored as IPv6 unspecified addr.
536  */
537 int
538 ip_addmulti(ipaddr_t group, ipif_t *ipif, ilg_stat_t ilgstat,
539     mcast_record_t ilg_fmode, slist_t *ilg_flist)
540 {
541 	ill_t	*ill = ipif->ipif_ill;
542 	ilm_t 	*ilm;
543 	in6_addr_t v6group;
544 	int	ret;
545 
546 	ASSERT(IAM_WRITER_IPIF(ipif));
547 
548 	if (!CLASSD(group) && group != INADDR_ANY)
549 		return (EINVAL);
550 
551 	if (IS_UNDER_IPMP(ill))
552 		return (EINVAL);
553 
554 	/*
555 	 * INADDR_ANY is represented as the IPv6 unspecified addr.
556 	 */
557 	if (group == INADDR_ANY)
558 		v6group = ipv6_all_zeros;
559 	else
560 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
561 
562 	ilm = ilm_lookup_ipif(ipif, group);
563 	/*
564 	 * Since we are writer, we know the ilm_flags itself cannot
565 	 * change at this point, and ilm_lookup_ipif would not have
566 	 * returned a DELETED ilm. However, the data path can free
567 	 * ilm->ilm_next via ilm_walker_cleanup() so we can safely
568 	 * access anything in ilm except ilm_next (for safe access to
569 	 * ilm_next we'd have to take the ill_lock).
570 	 */
571 	if (ilm != NULL)
572 		return (ilm_update_add(ilm, ilgstat, ilg_flist, B_FALSE));
573 
574 	ilm = ilm_add_v6(ipif, &v6group, ilgstat, ilg_fmode, ilg_flist,
575 	    ipif->ipif_zoneid);
576 	if (ilm == NULL)
577 		return (ENOMEM);
578 
579 	if (group == INADDR_ANY) {
580 		/*
581 		 * Check how many ipif's have members in this group -
582 		 * if more then one we should not tell the driver to join
583 		 * this time
584 		 */
585 		if (ilm_numentries_v6(ill, &v6group) > 1)
586 			return (0);
587 		ret = ill_join_allmulti(ill);
588 		if (ret != 0)
589 			ilm_delete(ilm);
590 		return (ret);
591 	}
592 
593 	if (!IS_LOOPBACK(ill))
594 		igmp_joingroup(ilm);
595 
596 	if (ilm_numentries_v6(ill, &v6group) > 1)
597 		return (0);
598 
599 	ret = ip_ll_addmulti_v6(ipif, &v6group);
600 	if (ret != 0)
601 		ilm_delete(ilm);
602 	return (ret);
603 }
604 
605 /*
606  * The unspecified address means all multicast addresses.
607  *
608  * ill identifies the interface to join on.
609  *
610  * ilgstat tells us if there's an ilg associated with this join,
611  * and if so, if it's a new ilg or a change to an existing one.
612  * ilg_fmode and ilg_flist give us the current filter state of
613  * the ilg (and will be EXCLUDE {NULL} in the case of no ilg).
614  */
615 int
616 ip_addmulti_v6(const in6_addr_t *v6group, ill_t *ill, zoneid_t zoneid,
617     ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist)
618 {
619 	ilm_t	*ilm;
620 	int	ret;
621 
622 	ASSERT(IAM_WRITER_ILL(ill));
623 
624 	if (!IN6_IS_ADDR_MULTICAST(v6group) &&
625 	    !IN6_IS_ADDR_UNSPECIFIED(v6group)) {
626 		return (EINVAL);
627 	}
628 
629 	if (IS_UNDER_IPMP(ill) && !IN6_IS_ADDR_MC_SOLICITEDNODE(v6group))
630 		return (EINVAL);
631 
632 	/*
633 	 * An ilm is uniquely identified by the tuple of (group, ill) where
634 	 * `group' is the multicast group address, and `ill' is the interface
635 	 * on which it is currently joined.
636 	 */
637 	ilm = ilm_lookup_ill_v6(ill, v6group, B_TRUE, zoneid);
638 	if (ilm != NULL)
639 		return (ilm_update_add(ilm, ilgstat, ilg_flist, B_TRUE));
640 
641 	ilm = ilm_add_v6(ill->ill_ipif, v6group, ilgstat, ilg_fmode,
642 	    ilg_flist, zoneid);
643 	if (ilm == NULL)
644 		return (ENOMEM);
645 
646 	if (IN6_IS_ADDR_UNSPECIFIED(v6group)) {
647 		/*
648 		 * Check how many ipif's that have members in this group -
649 		 * if more then one we should not tell the driver to join
650 		 * this time
651 		 */
652 		if (ilm_numentries_v6(ill, v6group) > 1)
653 			return (0);
654 		ret = ill_join_allmulti(ill);
655 		if (ret != 0)
656 			ilm_delete(ilm);
657 		return (ret);
658 	}
659 
660 	if (!IS_LOOPBACK(ill))
661 		mld_joingroup(ilm);
662 
663 	/*
664 	 * If we have more then one we should not tell the driver
665 	 * to join this time.
666 	 */
667 	if (ilm_numentries_v6(ill, v6group) > 1)
668 		return (0);
669 
670 	ret = ip_ll_addmulti_v6(ill->ill_ipif, v6group);
671 	if (ret != 0)
672 		ilm_delete(ilm);
673 	return (ret);
674 }
675 
676 /*
677  * Mapping the given IP multicast address to the L2 multicast mac address.
678  */
679 static void
680 ill_multicast_mapping(ill_t *ill, ipaddr_t ip_addr, uint8_t *hw_addr,
681     uint32_t hw_addrlen)
682 {
683 	dl_unitdata_req_t *dlur;
684 	ipaddr_t proto_extract_mask;
685 	uint8_t *from, *bcast_addr;
686 	uint32_t hw_extract_start;
687 	int len;
688 
689 	ASSERT(IN_CLASSD(ntohl(ip_addr)));
690 	ASSERT(hw_addrlen == ill->ill_phys_addr_length);
691 	ASSERT((ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) == 0);
692 	ASSERT((ill->ill_flags & ILLF_MULTICAST) != 0);
693 
694 	/*
695 	 * Find the physical broadcast address.
696 	 */
697 	dlur = (dl_unitdata_req_t *)ill->ill_bcast_mp->b_rptr;
698 	bcast_addr = (uint8_t *)dlur + dlur->dl_dest_addr_offset;
699 	if (ill->ill_sap_length > 0)
700 		bcast_addr += ill->ill_sap_length;
701 
702 	VERIFY(MEDIA_V4MINFO(ill->ill_media, hw_addrlen, bcast_addr,
703 	    hw_addr, &hw_extract_start, &proto_extract_mask));
704 
705 	len = MIN((int)hw_addrlen - hw_extract_start, IP_ADDR_LEN);
706 	ip_addr &= proto_extract_mask;
707 	from = (uint8_t *)&ip_addr;
708 	while (len-- > 0)
709 		hw_addr[hw_extract_start + len] |= from[len];
710 }
711 
712 /*
713  * Send a multicast request to the driver for enabling multicast reception
714  * for v6groupp address. The caller has already checked whether it is
715  * appropriate to send one or not.
716  */
717 int
718 ip_ll_send_enabmulti_req(ill_t *ill, const in6_addr_t *v6groupp)
719 {
720 	mblk_t	*mp;
721 	uint32_t addrlen, addroff;
722 	char	group_buf[INET6_ADDRSTRLEN];
723 
724 	ASSERT(IAM_WRITER_ILL(ill));
725 
726 	/*
727 	 * If we're on the IPMP ill, use the nominated multicast interface to
728 	 * send and receive DLPI messages, if one exists.  (If none exists,
729 	 * there are no usable interfaces and thus nothing to do.)
730 	 */
731 	if (IS_IPMP(ill) && (ill = ipmp_illgrp_cast_ill(ill->ill_grp)) == NULL)
732 		return (0);
733 
734 	/*
735 	 * Create a DL_ENABMULTI_REQ.
736 	 */
737 	mp = ill_create_dl(ill, DL_ENABMULTI_REQ, sizeof (dl_enabmulti_req_t),
738 	    &addrlen, &addroff);
739 	if (!mp)
740 		return (ENOMEM);
741 
742 	if (IN6_IS_ADDR_V4MAPPED(v6groupp)) {
743 		ipaddr_t v4group;
744 
745 		IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group);
746 
747 		ill_multicast_mapping(ill, v4group,
748 		    mp->b_rptr + addroff, addrlen);
749 
750 		ip1dbg(("ip_ll_send_enabmulti_req: IPv4 %s on %s\n",
751 		    inet_ntop(AF_INET6, v6groupp, group_buf,
752 		    sizeof (group_buf)),
753 		    ill->ill_name));
754 
755 		/* Track the state if this is the first enabmulti */
756 		if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN)
757 			ill->ill_dlpi_multicast_state = IDS_INPROGRESS;
758 		ill_dlpi_send(ill, mp);
759 	} else {
760 		ip1dbg(("ip_ll_send_enabmulti_req: IPv6 ndp_mcastreq %s on"
761 		    " %s\n",
762 		    inet_ntop(AF_INET6, v6groupp, group_buf,
763 		    sizeof (group_buf)),
764 		    ill->ill_name));
765 		return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp));
766 	}
767 	return (0);
768 }
769 
770 /*
771  * Send a multicast request to the driver for enabling multicast
772  * membership for v6group if appropriate.
773  */
774 static int
775 ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *v6groupp)
776 {
777 	ill_t	*ill = ipif->ipif_ill;
778 
779 	ASSERT(IAM_WRITER_IPIF(ipif));
780 
781 	if (ill->ill_net_type != IRE_IF_RESOLVER ||
782 	    ipif->ipif_flags & IPIF_POINTOPOINT) {
783 		ip1dbg(("ip_ll_addmulti_v6: not resolver\n"));
784 		return (0);	/* Must be IRE_IF_NORESOLVER */
785 	}
786 
787 	if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) {
788 		ip1dbg(("ip_ll_addmulti_v6: MULTI_BCAST\n"));
789 		return (0);
790 	}
791 	if (!ill->ill_dl_up) {
792 		/*
793 		 * Nobody there. All multicast addresses will be re-joined
794 		 * when we get the DL_BIND_ACK bringing the interface up.
795 		 */
796 		ip1dbg(("ip_ll_addmulti_v6: nobody up\n"));
797 		return (0);
798 	}
799 	return (ip_ll_send_enabmulti_req(ill, v6groupp));
800 }
801 
802 /*
803  * INADDR_ANY means all multicast addresses.
804  * INADDR_ANY is stored as the IPv6 unspecified addr.
805  */
806 int
807 ip_delmulti(ipaddr_t group, ipif_t *ipif, boolean_t no_ilg, boolean_t leaving)
808 {
809 	ill_t	*ill = ipif->ipif_ill;
810 	ilm_t *ilm;
811 	in6_addr_t v6group;
812 
813 	ASSERT(IAM_WRITER_IPIF(ipif));
814 
815 	if (!CLASSD(group) && group != INADDR_ANY)
816 		return (EINVAL);
817 
818 	/*
819 	 * INADDR_ANY is represented as the IPv6 unspecified addr.
820 	 */
821 	if (group == INADDR_ANY)
822 		v6group = ipv6_all_zeros;
823 	else
824 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
825 
826 	/*
827 	 * Look for a match on the ipif.
828 	 * (IP_DROP_MEMBERSHIP specifies an ipif using an IP address).
829 	 */
830 	ilm = ilm_lookup_ipif(ipif, group);
831 	if (ilm == NULL)
832 		return (ENOENT);
833 
834 	/* Update counters */
835 	if (no_ilg)
836 		ilm->ilm_no_ilg_cnt--;
837 
838 	if (leaving)
839 		ilm->ilm_refcnt--;
840 
841 	if (ilm->ilm_refcnt > 0)
842 		return (ilm_update_del(ilm, B_FALSE));
843 
844 	if (group == INADDR_ANY) {
845 		ilm_delete(ilm);
846 		/*
847 		 * Check how many ipif's that have members in this group -
848 		 * if there are still some left then don't tell the driver
849 		 * to drop it.
850 		 */
851 		if (ilm_numentries_v6(ill, &v6group) != 0)
852 			return (0);
853 
854 		/* If we never joined, then don't leave. */
855 		if (ill->ill_join_allmulti)
856 			ill_leave_allmulti(ill);
857 
858 		return (0);
859 	}
860 
861 	if (!IS_LOOPBACK(ill))
862 		igmp_leavegroup(ilm);
863 
864 	ilm_delete(ilm);
865 	/*
866 	 * Check how many ipif's that have members in this group -
867 	 * if there are still some left then don't tell the driver
868 	 * to drop it.
869 	 */
870 	if (ilm_numentries_v6(ill, &v6group) != 0)
871 		return (0);
872 	return (ip_ll_delmulti_v6(ipif, &v6group));
873 }
874 
875 /*
876  * The unspecified address means all multicast addresses.
877  */
878 int
879 ip_delmulti_v6(const in6_addr_t *v6group, ill_t *ill, zoneid_t zoneid,
880     boolean_t no_ilg, boolean_t leaving)
881 {
882 	ipif_t	*ipif;
883 	ilm_t *ilm;
884 
885 	ASSERT(IAM_WRITER_ILL(ill));
886 
887 	if (!IN6_IS_ADDR_MULTICAST(v6group) &&
888 	    !IN6_IS_ADDR_UNSPECIFIED(v6group))
889 		return (EINVAL);
890 
891 	/*
892 	 * Look for a match on the ill.
893 	 */
894 	ilm = ilm_lookup_ill_v6(ill, v6group, B_TRUE, zoneid);
895 	if (ilm == NULL)
896 		return (ENOENT);
897 
898 	ASSERT(ilm->ilm_ill == ill);
899 
900 	ipif = ill->ill_ipif;
901 
902 	/* Update counters */
903 	if (no_ilg)
904 		ilm->ilm_no_ilg_cnt--;
905 
906 	if (leaving)
907 		ilm->ilm_refcnt--;
908 
909 	if (ilm->ilm_refcnt > 0)
910 		return (ilm_update_del(ilm, B_TRUE));
911 
912 	if (IN6_IS_ADDR_UNSPECIFIED(v6group)) {
913 		ilm_delete(ilm);
914 		/*
915 		 * Check how many ipif's that have members in this group -
916 		 * if there are still some left then don't tell the driver
917 		 * to drop it.
918 		 */
919 		if (ilm_numentries_v6(ill, v6group) != 0)
920 			return (0);
921 
922 		/* If we never joined, then don't leave. */
923 		if (ill->ill_join_allmulti)
924 			ill_leave_allmulti(ill);
925 
926 		return (0);
927 	}
928 
929 	if (!IS_LOOPBACK(ill))
930 		mld_leavegroup(ilm);
931 
932 	ilm_delete(ilm);
933 	/*
934 	 * Check how many ipif's that have members in this group -
935 	 * if there are still some left then don't tell the driver
936 	 * to drop it.
937 	 */
938 	if (ilm_numentries_v6(ill, v6group) != 0)
939 		return (0);
940 	return (ip_ll_delmulti_v6(ipif, v6group));
941 }
942 
943 /*
944  * Send a multicast request to the driver for disabling multicast reception
945  * for v6groupp address. The caller has already checked whether it is
946  * appropriate to send one or not.
947  */
948 int
949 ip_ll_send_disabmulti_req(ill_t *ill, const in6_addr_t *v6groupp)
950 {
951 	mblk_t	*mp;
952 	char	group_buf[INET6_ADDRSTRLEN];
953 	uint32_t addrlen, addroff;
954 
955 	ASSERT(IAM_WRITER_ILL(ill));
956 
957 	/*
958 	 * See comment in ip_ll_send_enabmulti_req().
959 	 */
960 	if (IS_IPMP(ill) && (ill = ipmp_illgrp_cast_ill(ill->ill_grp)) == NULL)
961 		return (0);
962 
963 	/*
964 	 * Create a DL_DISABMULTI_REQ.
965 	 */
966 	mp = ill_create_dl(ill, DL_DISABMULTI_REQ,
967 	    sizeof (dl_disabmulti_req_t), &addrlen, &addroff);
968 	if (!mp)
969 		return (ENOMEM);
970 
971 	if (IN6_IS_ADDR_V4MAPPED(v6groupp)) {
972 		ipaddr_t v4group;
973 
974 		IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group);
975 
976 		ill_multicast_mapping(ill, v4group,
977 		    mp->b_rptr + addroff, addrlen);
978 
979 		ip1dbg(("ip_ll_send_disabmulti_req: IPv4 %s on %s\n",
980 		    inet_ntop(AF_INET6, v6groupp, group_buf,
981 		    sizeof (group_buf)),
982 		    ill->ill_name));
983 		ill_dlpi_send(ill, mp);
984 	} else {
985 		ip1dbg(("ip_ll_send_disabmulti_req: IPv6 ndp_mcastreq %s on"
986 		    " %s\n",
987 		    inet_ntop(AF_INET6, v6groupp, group_buf,
988 		    sizeof (group_buf)),
989 		    ill->ill_name));
990 		return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp));
991 	}
992 	return (0);
993 }
994 
995 /*
996  * Send a multicast request to the driver for disabling multicast
997  * membership for v6group if appropriate.
998  */
999 static int
1000 ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *v6group)
1001 {
1002 	ill_t	*ill = ipif->ipif_ill;
1003 
1004 	ASSERT(IAM_WRITER_IPIF(ipif));
1005 
1006 	if (ill->ill_net_type != IRE_IF_RESOLVER ||
1007 	    ipif->ipif_flags & IPIF_POINTOPOINT) {
1008 		return (0);	/* Must be IRE_IF_NORESOLVER */
1009 	}
1010 	if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) {
1011 		ip1dbg(("ip_ll_delmulti_v6: MULTI_BCAST\n"));
1012 		return (0);
1013 	}
1014 	if (!ill->ill_dl_up) {
1015 		/*
1016 		 * Nobody there. All multicast addresses will be re-joined
1017 		 * when we get the DL_BIND_ACK bringing the interface up.
1018 		 */
1019 		ip1dbg(("ip_ll_delmulti_v6: nobody up\n"));
1020 		return (0);
1021 	}
1022 	return (ip_ll_send_disabmulti_req(ill, v6group));
1023 }
1024 
1025 /*
1026  * Make the driver pass up all multicast packets.  NOTE: to keep callers
1027  * IPMP-unaware, if an IPMP ill is passed in, the ill_join_allmulti flag is
1028  * set on it (rather than the cast ill).
1029  */
1030 int
1031 ill_join_allmulti(ill_t *ill)
1032 {
1033 	mblk_t		*promiscon_mp, *promiscoff_mp;
1034 	uint32_t	addrlen, addroff;
1035 	ill_t		*join_ill = ill;
1036 
1037 	ASSERT(IAM_WRITER_ILL(ill));
1038 
1039 	if (!ill->ill_dl_up) {
1040 		/*
1041 		 * Nobody there. All multicast addresses will be re-joined
1042 		 * when we get the DL_BIND_ACK bringing the interface up.
1043 		 */
1044 		return (0);
1045 	}
1046 
1047 	/*
1048 	 * See comment in ip_ll_send_enabmulti_req().
1049 	 */
1050 	if (IS_IPMP(ill) && (ill = ipmp_illgrp_cast_ill(ill->ill_grp)) == NULL)
1051 		return (0);
1052 
1053 	ASSERT(!join_ill->ill_join_allmulti);
1054 
1055 	/*
1056 	 * Create a DL_PROMISCON_REQ message and send it directly to the DLPI
1057 	 * provider.  We don't need to do this for certain media types for
1058 	 * which we never need to turn promiscuous mode on.  While we're here,
1059 	 * pre-allocate a DL_PROMISCOFF_REQ message to make sure that
1060 	 * ill_leave_allmulti() will not fail due to low memory conditions.
1061 	 */
1062 	if ((ill->ill_net_type == IRE_IF_RESOLVER) &&
1063 	    !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) {
1064 		promiscon_mp = ill_create_dl(ill, DL_PROMISCON_REQ,
1065 		    sizeof (dl_promiscon_req_t), &addrlen, &addroff);
1066 		promiscoff_mp = ill_create_dl(ill, DL_PROMISCOFF_REQ,
1067 		    sizeof (dl_promiscoff_req_t), &addrlen, &addroff);
1068 		if (promiscon_mp == NULL || promiscoff_mp == NULL) {
1069 			freemsg(promiscon_mp);
1070 			freemsg(promiscoff_mp);
1071 			return (ENOMEM);
1072 		}
1073 		ill->ill_promiscoff_mp = promiscoff_mp;
1074 		ill_dlpi_send(ill, promiscon_mp);
1075 	}
1076 
1077 	join_ill->ill_join_allmulti = B_TRUE;
1078 	return (0);
1079 }
1080 
1081 /*
1082  * Make the driver stop passing up all multicast packets
1083  */
1084 void
1085 ill_leave_allmulti(ill_t *ill)
1086 {
1087 	mblk_t	*promiscoff_mp;
1088 	ill_t	*leave_ill = ill;
1089 
1090 	ASSERT(IAM_WRITER_ILL(ill));
1091 
1092 	if (!ill->ill_dl_up) {
1093 		/*
1094 		 * Nobody there. All multicast addresses will be re-joined
1095 		 * when we get the DL_BIND_ACK bringing the interface up.
1096 		 */
1097 		return;
1098 	}
1099 
1100 	/*
1101 	 * See comment in ip_ll_send_enabmulti_req().
1102 	 */
1103 	if (IS_IPMP(ill) && (ill = ipmp_illgrp_cast_ill(ill->ill_grp)) == NULL)
1104 		return;
1105 
1106 	ASSERT(leave_ill->ill_join_allmulti);
1107 
1108 	/*
1109 	 * Create a DL_PROMISCOFF_REQ message and send it directly to
1110 	 * the DLPI provider.  We don't need to do this for certain
1111 	 * media types for which we never need to turn promiscuous
1112 	 * mode on.
1113 	 */
1114 	if ((ill->ill_net_type == IRE_IF_RESOLVER) &&
1115 	    !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) {
1116 		promiscoff_mp = ill->ill_promiscoff_mp;
1117 		ASSERT(promiscoff_mp != NULL);
1118 		ill->ill_promiscoff_mp = NULL;
1119 		ill_dlpi_send(ill, promiscoff_mp);
1120 	}
1121 
1122 	leave_ill->ill_join_allmulti = B_FALSE;
1123 }
1124 
1125 static ill_t *
1126 ipsq_enter_byifindex(uint_t ifindex, boolean_t isv6, ip_stack_t *ipst)
1127 {
1128 	ill_t		*ill;
1129 	boolean_t	in_ipsq;
1130 
1131 	ill = ill_lookup_on_ifindex(ifindex, isv6, NULL, NULL, NULL, NULL,
1132 	    ipst);
1133 	if (ill != NULL) {
1134 		if (!ill_waiter_inc(ill)) {
1135 			ill_refrele(ill);
1136 			return (NULL);
1137 		}
1138 		ill_refrele(ill);
1139 		in_ipsq = ipsq_enter(ill, B_FALSE, NEW_OP);
1140 		ill_waiter_dcr(ill);
1141 		if (!in_ipsq)
1142 			ill = NULL;
1143 	}
1144 	return (ill);
1145 }
1146 
1147 int
1148 ip_join_allmulti(uint_t ifindex, boolean_t isv6, ip_stack_t *ipst)
1149 {
1150 	ill_t		*ill;
1151 	int		ret = 0;
1152 
1153 	if ((ill = ipsq_enter_byifindex(ifindex, isv6, ipst)) == NULL)
1154 		return (ENODEV);
1155 
1156 	/*
1157 	 * The ip_addmulti*() functions won't allow IPMP underlying interfaces
1158 	 * to join allmulti since only the nominated underlying interface in
1159 	 * the group should receive multicast.  We silently succeed to avoid
1160 	 * having to teach IPobs (currently the only caller of this routine)
1161 	 * to ignore failures in this case.
1162 	 */
1163 	if (IS_UNDER_IPMP(ill))
1164 		goto out;
1165 
1166 	if (isv6) {
1167 		ret = ip_addmulti_v6(&ipv6_all_zeros, ill, ill->ill_zoneid,
1168 		    ILGSTAT_NONE, MODE_IS_EXCLUDE, NULL);
1169 	} else {
1170 		ret = ip_addmulti(INADDR_ANY, ill->ill_ipif, ILGSTAT_NONE,
1171 		    MODE_IS_EXCLUDE, NULL);
1172 	}
1173 	ill->ill_ipallmulti_cnt++;
1174 out:
1175 	ipsq_exit(ill->ill_phyint->phyint_ipsq);
1176 	return (ret);
1177 }
1178 
1179 
1180 int
1181 ip_leave_allmulti(uint_t ifindex, boolean_t isv6, ip_stack_t *ipst)
1182 {
1183 	ill_t		*ill;
1184 
1185 	if ((ill = ipsq_enter_byifindex(ifindex, isv6, ipst)) == NULL)
1186 		return (ENODEV);
1187 
1188 	if (ill->ill_ipallmulti_cnt > 0) {
1189 		if (isv6) {
1190 			(void) ip_delmulti_v6(&ipv6_all_zeros, ill,
1191 			    ill->ill_zoneid, B_TRUE, B_TRUE);
1192 		} else {
1193 			(void) ip_delmulti(INADDR_ANY, ill->ill_ipif, B_TRUE,
1194 			    B_TRUE);
1195 		}
1196 		ill->ill_ipallmulti_cnt--;
1197 	}
1198 	ipsq_exit(ill->ill_phyint->phyint_ipsq);
1199 	return (0);
1200 }
1201 
1202 /*
1203  * Delete the allmulti memberships that were added as part of
1204  * ip_join_allmulti().
1205  */
1206 void
1207 ip_purge_allmulti(ill_t *ill)
1208 {
1209 	ASSERT(IAM_WRITER_ILL(ill));
1210 
1211 	for (; ill->ill_ipallmulti_cnt > 0; ill->ill_ipallmulti_cnt--) {
1212 		if (ill->ill_isv6) {
1213 			(void) ip_delmulti_v6(&ipv6_all_zeros, ill,
1214 			    ill->ill_zoneid, B_TRUE, B_TRUE);
1215 		} else {
1216 			(void) ip_delmulti(INADDR_ANY, ill->ill_ipif, B_TRUE,
1217 			    B_TRUE);
1218 		}
1219 	}
1220 }
1221 
1222 /*
1223  * Copy mp_orig and pass it in as a local message.
1224  */
1225 void
1226 ip_multicast_loopback(queue_t *q, ill_t *ill, mblk_t *mp_orig, int fanout_flags,
1227     zoneid_t zoneid)
1228 {
1229 	mblk_t	*mp;
1230 	mblk_t	*ipsec_mp;
1231 	ipha_t	*iph;
1232 	ip_stack_t *ipst = ill->ill_ipst;
1233 
1234 	if (DB_TYPE(mp_orig) == M_DATA &&
1235 	    ((ipha_t *)mp_orig->b_rptr)->ipha_protocol == IPPROTO_UDP) {
1236 		uint_t hdrsz;
1237 
1238 		hdrsz = IPH_HDR_LENGTH((ipha_t *)mp_orig->b_rptr) +
1239 		    sizeof (udpha_t);
1240 		ASSERT(MBLKL(mp_orig) >= hdrsz);
1241 
1242 		if (((mp = allocb(hdrsz, BPRI_MED)) != NULL) &&
1243 		    (mp_orig = dupmsg(mp_orig)) != NULL) {
1244 			cred_t *cr;
1245 
1246 			bcopy(mp_orig->b_rptr, mp->b_rptr, hdrsz);
1247 			mp->b_wptr += hdrsz;
1248 			mp->b_cont = mp_orig;
1249 			mp_orig->b_rptr += hdrsz;
1250 			if (is_system_labeled() &&
1251 			    (cr = msg_getcred(mp_orig, NULL)) != NULL)
1252 				mblk_setcred(mp, cr, NOPID);
1253 			if (MBLKL(mp_orig) == 0) {
1254 				mp->b_cont = mp_orig->b_cont;
1255 				mp_orig->b_cont = NULL;
1256 				freeb(mp_orig);
1257 			}
1258 		} else if (mp != NULL) {
1259 			freeb(mp);
1260 			mp = NULL;
1261 		}
1262 	} else {
1263 		mp = ip_copymsg(mp_orig); /* No refcnt on ipsec_out netstack */
1264 	}
1265 
1266 	if (mp == NULL)
1267 		return;
1268 	if (DB_TYPE(mp) == M_CTL) {
1269 		ipsec_mp = mp;
1270 		mp = mp->b_cont;
1271 	} else {
1272 		ipsec_mp = mp;
1273 	}
1274 
1275 	iph = (ipha_t *)mp->b_rptr;
1276 
1277 	/*
1278 	 * DTrace this as ip:::send.  A blocked packet will fire the send
1279 	 * probe, but not the receive probe.
1280 	 */
1281 	DTRACE_IP7(send, mblk_t *, ipsec_mp, conn_t *, NULL, void_ip_t *, iph,
1282 	    __dtrace_ipsr_ill_t *, ill, ipha_t *, iph, ip6_t *, NULL, int, 1);
1283 
1284 	DTRACE_PROBE4(ip4__loopback__out__start,
1285 	    ill_t *, NULL, ill_t *, ill,
1286 	    ipha_t *, iph, mblk_t *, ipsec_mp);
1287 
1288 	FW_HOOKS(ipst->ips_ip4_loopback_out_event,
1289 	    ipst->ips_ipv4firewall_loopback_out,
1290 	    NULL, ill, iph, ipsec_mp, mp, HPE_MULTICAST, ipst);
1291 
1292 	DTRACE_PROBE1(ip4__loopback__out__end, mblk_t *, ipsec_mp);
1293 
1294 	if (ipsec_mp != NULL)
1295 		ip_wput_local(q, ill, iph, ipsec_mp, NULL,
1296 		    fanout_flags, zoneid);
1297 }
1298 
1299 /*
1300  * Create a DLPI message; for DL_{ENAB,DISAB}MULTI_REQ, room is left for
1301  * the hardware address.
1302  */
1303 static mblk_t *
1304 ill_create_dl(ill_t *ill, uint32_t dl_primitive, uint32_t length,
1305     uint32_t *addr_lenp, uint32_t *addr_offp)
1306 {
1307 	mblk_t	*mp;
1308 	uint32_t	hw_addr_length;
1309 	char		*cp;
1310 	uint32_t	offset;
1311 	uint32_t 	size;
1312 
1313 	*addr_lenp = *addr_offp = 0;
1314 
1315 	hw_addr_length = ill->ill_phys_addr_length;
1316 	if (!hw_addr_length) {
1317 		ip0dbg(("ip_create_dl: hw addr length = 0\n"));
1318 		return (NULL);
1319 	}
1320 
1321 	size = length;
1322 	switch (dl_primitive) {
1323 	case DL_ENABMULTI_REQ:
1324 	case DL_DISABMULTI_REQ:
1325 		size += hw_addr_length;
1326 		break;
1327 	case DL_PROMISCON_REQ:
1328 	case DL_PROMISCOFF_REQ:
1329 		break;
1330 	default:
1331 		return (NULL);
1332 	}
1333 	mp = allocb(size, BPRI_HI);
1334 	if (!mp)
1335 		return (NULL);
1336 	mp->b_wptr += size;
1337 	mp->b_datap->db_type = M_PROTO;
1338 
1339 	cp = (char *)mp->b_rptr;
1340 	offset = length;
1341 
1342 	switch (dl_primitive) {
1343 	case DL_ENABMULTI_REQ: {
1344 		dl_enabmulti_req_t *dl = (dl_enabmulti_req_t *)cp;
1345 
1346 		dl->dl_primitive = dl_primitive;
1347 		dl->dl_addr_offset = offset;
1348 		*addr_lenp = dl->dl_addr_length = hw_addr_length;
1349 		*addr_offp = offset;
1350 		break;
1351 	}
1352 	case DL_DISABMULTI_REQ: {
1353 		dl_disabmulti_req_t *dl = (dl_disabmulti_req_t *)cp;
1354 
1355 		dl->dl_primitive = dl_primitive;
1356 		dl->dl_addr_offset = offset;
1357 		*addr_lenp = dl->dl_addr_length = hw_addr_length;
1358 		*addr_offp = offset;
1359 		break;
1360 	}
1361 	case DL_PROMISCON_REQ:
1362 	case DL_PROMISCOFF_REQ: {
1363 		dl_promiscon_req_t *dl = (dl_promiscon_req_t *)cp;
1364 
1365 		dl->dl_primitive = dl_primitive;
1366 		dl->dl_level = DL_PROMISC_MULTI;
1367 		break;
1368 	}
1369 	}
1370 	ip1dbg(("ill_create_dl: addr_len %d, addr_off %d\n",
1371 	    *addr_lenp, *addr_offp));
1372 	return (mp);
1373 }
1374 
1375 /*
1376  * Rejoin any groups which have been explicitly joined by the application (we
1377  * left all explicitly joined groups as part of ill_leave_multicast() prior to
1378  * bringing the interface down).  Note that because groups can be joined and
1379  * left while an interface is down, this may not be the same set of groups
1380  * that we left in ill_leave_multicast().
1381  */
1382 void
1383 ill_recover_multicast(ill_t *ill)
1384 {
1385 	ilm_t	*ilm;
1386 	ipif_t	*ipif = ill->ill_ipif;
1387 	char    addrbuf[INET6_ADDRSTRLEN];
1388 
1389 	ASSERT(IAM_WRITER_ILL(ill));
1390 
1391 	ill->ill_need_recover_multicast = 0;
1392 
1393 	ill_ilm_walker_hold(ill);
1394 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1395 		/*
1396 		 * Check how many ipif's that have members in this group -
1397 		 * if more then one we make sure that this entry is first
1398 		 * in the list.
1399 		 */
1400 		if (ilm_numentries_v6(ill, &ilm->ilm_v6addr) > 1 &&
1401 		    ilm_lookup_ill_v6(ill, &ilm->ilm_v6addr, B_TRUE,
1402 		    ALL_ZONES) != ilm) {
1403 			continue;
1404 		}
1405 
1406 		ip1dbg(("ill_recover_multicast: %s\n", inet_ntop(AF_INET6,
1407 		    &ilm->ilm_v6addr, addrbuf, sizeof (addrbuf))));
1408 
1409 		if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
1410 			(void) ill_join_allmulti(ill);
1411 		} else {
1412 			if (ill->ill_isv6)
1413 				mld_joingroup(ilm);
1414 			else
1415 				igmp_joingroup(ilm);
1416 
1417 			(void) ip_ll_addmulti_v6(ipif, &ilm->ilm_v6addr);
1418 		}
1419 	}
1420 	ill_ilm_walker_rele(ill);
1421 
1422 }
1423 
1424 /*
1425  * The opposite of ill_recover_multicast() -- leaves all multicast groups
1426  * that were explicitly joined.
1427  */
1428 void
1429 ill_leave_multicast(ill_t *ill)
1430 {
1431 	ilm_t	*ilm;
1432 	ipif_t	*ipif = ill->ill_ipif;
1433 	char    addrbuf[INET6_ADDRSTRLEN];
1434 
1435 	ASSERT(IAM_WRITER_ILL(ill));
1436 
1437 	ill->ill_need_recover_multicast = 1;
1438 
1439 	ill_ilm_walker_hold(ill);
1440 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1441 		/*
1442 		 * Check how many ipif's that have members in this group -
1443 		 * if more then one we make sure that this entry is first
1444 		 * in the list.
1445 		 */
1446 		if (ilm_numentries_v6(ill, &ilm->ilm_v6addr) > 1 &&
1447 		    ilm_lookup_ill_v6(ill, &ilm->ilm_v6addr, B_TRUE,
1448 		    ALL_ZONES) != ilm) {
1449 			continue;
1450 		}
1451 
1452 		ip1dbg(("ill_leave_multicast: %s\n", inet_ntop(AF_INET6,
1453 		    &ilm->ilm_v6addr, addrbuf, sizeof (addrbuf))));
1454 
1455 		if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
1456 			ill_leave_allmulti(ill);
1457 		} else {
1458 			if (ill->ill_isv6)
1459 				mld_leavegroup(ilm);
1460 			else
1461 				igmp_leavegroup(ilm);
1462 
1463 			(void) ip_ll_delmulti_v6(ipif, &ilm->ilm_v6addr);
1464 		}
1465 	}
1466 	ill_ilm_walker_rele(ill);
1467 }
1468 
1469 /* Find an ilm for matching the ill */
1470 ilm_t *
1471 ilm_lookup_ill(ill_t *ill, ipaddr_t group, zoneid_t zoneid)
1472 {
1473 	in6_addr_t	v6group;
1474 
1475 	/*
1476 	 * INADDR_ANY is represented as the IPv6 unspecified addr.
1477 	 */
1478 	if (group == INADDR_ANY)
1479 		v6group = ipv6_all_zeros;
1480 	else
1481 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1482 
1483 	return (ilm_lookup_ill_v6(ill, &v6group, B_TRUE, zoneid));
1484 }
1485 
1486 /*
1487  * Find an ilm for address `v6group' on `ill' and zone `zoneid' (which may be
1488  * ALL_ZONES).  In general, if `ill' is in an IPMP group, we will match
1489  * against any ill in the group.  However, if `restrict_solicited' is set,
1490  * then specifically for IPv6 solicited-node multicast, the match will be
1491  * restricted to the specified `ill'.
1492  */
1493 ilm_t *
1494 ilm_lookup_ill_v6(ill_t *ill, const in6_addr_t *v6group,
1495     boolean_t restrict_solicited, zoneid_t zoneid)
1496 {
1497 	ilm_t	*ilm;
1498 	ilm_walker_t ilw;
1499 	boolean_t restrict_ill = B_FALSE;
1500 
1501 	/*
1502 	 * In general, underlying interfaces cannot have multicast memberships
1503 	 * and thus lookups always match across the illgrp.  However, we must
1504 	 * allow IPv6 solicited-node multicast memberships on underlying
1505 	 * interfaces, and thus an IPMP meta-interface and one of its
1506 	 * underlying ills may have the same solicited-node multicast address.
1507 	 * In that case, we need to restrict the lookup to the requested ill.
1508 	 * However, we may receive packets on an underlying interface that
1509 	 * are for the corresponding IPMP interface's solicited-node multicast
1510 	 * address, and thus in that case we need to match across the group --
1511 	 * hence the unfortunate `restrict_solicited' argument.
1512 	 */
1513 	if (IN6_IS_ADDR_MC_SOLICITEDNODE(v6group) && restrict_solicited)
1514 		restrict_ill = (IS_IPMP(ill) || IS_UNDER_IPMP(ill));
1515 
1516 	ilm = ilm_walker_start(&ilw, ill);
1517 	for (; ilm != NULL; ilm = ilm_walker_step(&ilw, ilm)) {
1518 		if (!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group))
1519 			continue;
1520 		if (zoneid != ALL_ZONES && zoneid != ilm->ilm_zoneid)
1521 			continue;
1522 		if (!restrict_ill || ill == (ill->ill_isv6 ?
1523 		    ilm->ilm_ill : ilm->ilm_ipif->ipif_ill)) {
1524 			break;
1525 		}
1526 	}
1527 	ilm_walker_finish(&ilw);
1528 	return (ilm);
1529 }
1530 
1531 /*
1532  * Find an ilm for the ipif. Only needed for IPv4 which does
1533  * ipif specific socket options.
1534  */
1535 ilm_t *
1536 ilm_lookup_ipif(ipif_t *ipif, ipaddr_t group)
1537 {
1538 	ilm_t *ilm;
1539 	ilm_walker_t ilw;
1540 
1541 	ilm = ilm_walker_start(&ilw, ipif->ipif_ill);
1542 	for (; ilm != NULL; ilm = ilm_walker_step(&ilw, ilm)) {
1543 		if (ilm->ilm_ipif == ipif && ilm->ilm_addr == group)
1544 			break;
1545 	}
1546 	ilm_walker_finish(&ilw);
1547 	return (ilm);
1548 }
1549 
1550 /*
1551  * How many members on this ill?
1552  */
1553 int
1554 ilm_numentries_v6(ill_t *ill, const in6_addr_t *v6group)
1555 {
1556 	ilm_t	*ilm;
1557 	int i = 0;
1558 
1559 	mutex_enter(&ill->ill_lock);
1560 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1561 		if (ilm->ilm_flags & ILM_DELETED)
1562 			continue;
1563 		if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group)) {
1564 			i++;
1565 		}
1566 	}
1567 	mutex_exit(&ill->ill_lock);
1568 	return (i);
1569 }
1570 
1571 /* Caller guarantees that the group is not already on the list */
1572 static ilm_t *
1573 ilm_add_v6(ipif_t *ipif, const in6_addr_t *v6group, ilg_stat_t ilgstat,
1574     mcast_record_t ilg_fmode, slist_t *ilg_flist, zoneid_t zoneid)
1575 {
1576 	ill_t	*ill = ipif->ipif_ill;
1577 	ilm_t	*ilm;
1578 	ilm_t	*ilm_cur;
1579 	ilm_t	**ilm_ptpn;
1580 
1581 	ASSERT(IAM_WRITER_IPIF(ipif));
1582 
1583 	ilm = GETSTRUCT(ilm_t, 1);
1584 	if (ilm == NULL)
1585 		return (NULL);
1586 	if (ilgstat != ILGSTAT_NONE && !SLIST_IS_EMPTY(ilg_flist)) {
1587 		ilm->ilm_filter = l_alloc();
1588 		if (ilm->ilm_filter == NULL) {
1589 			mi_free(ilm);
1590 			return (NULL);
1591 		}
1592 	}
1593 	ilm->ilm_v6addr = *v6group;
1594 	ilm->ilm_refcnt = 1;
1595 	ilm->ilm_zoneid = zoneid;
1596 	ilm->ilm_timer = INFINITY;
1597 	ilm->ilm_rtx.rtx_timer = INFINITY;
1598 
1599 	/*
1600 	 * IPv4 Multicast groups are joined using ipif.
1601 	 * IPv6 Multicast groups are joined using ill.
1602 	 */
1603 	if (ill->ill_isv6) {
1604 		ilm->ilm_ill = ill;
1605 		ilm->ilm_ipif = NULL;
1606 		DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ill,
1607 		    (char *), "ilm", (void *), ilm);
1608 		ill->ill_ilm_cnt++;
1609 	} else {
1610 		ASSERT(ilm->ilm_zoneid == ipif->ipif_zoneid);
1611 		ilm->ilm_ipif = ipif;
1612 		ilm->ilm_ill = NULL;
1613 		DTRACE_PROBE3(ipif__incr__cnt, (ipif_t *), ipif,
1614 		    (char *), "ilm", (void *), ilm);
1615 		ipif->ipif_ilm_cnt++;
1616 	}
1617 
1618 	ASSERT(ill->ill_ipst);
1619 	ilm->ilm_ipst = ill->ill_ipst;	/* No netstack_hold */
1620 
1621 	ASSERT(!(ipif->ipif_state_flags & IPIF_CONDEMNED));
1622 	ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED));
1623 
1624 	/*
1625 	 * Grab lock to give consistent view to readers
1626 	 */
1627 	mutex_enter(&ill->ill_lock);
1628 	/*
1629 	 * All ilms in the same zone are contiguous in the ill_ilm list.
1630 	 * The loops in ip_proto_input() and ip_wput_local() use this to avoid
1631 	 * sending duplicates up when two applications in the same zone join the
1632 	 * same group on different logical interfaces.
1633 	 */
1634 	ilm_cur = ill->ill_ilm;
1635 	ilm_ptpn = &ill->ill_ilm;
1636 	while (ilm_cur != NULL && ilm_cur->ilm_zoneid != ilm->ilm_zoneid) {
1637 		ilm_ptpn = &ilm_cur->ilm_next;
1638 		ilm_cur = ilm_cur->ilm_next;
1639 	}
1640 	ilm->ilm_next = ilm_cur;
1641 	*ilm_ptpn = ilm;
1642 
1643 	/*
1644 	 * If we have an associated ilg, use its filter state; if not,
1645 	 * default to (EXCLUDE, NULL) and set no_ilg_cnt to track this.
1646 	 */
1647 	if (ilgstat != ILGSTAT_NONE) {
1648 		if (!SLIST_IS_EMPTY(ilg_flist))
1649 			l_copy(ilg_flist, ilm->ilm_filter);
1650 		ilm->ilm_fmode = ilg_fmode;
1651 	} else {
1652 		ilm->ilm_no_ilg_cnt = 1;
1653 		ilm->ilm_fmode = MODE_IS_EXCLUDE;
1654 	}
1655 
1656 	mutex_exit(&ill->ill_lock);
1657 	return (ilm);
1658 }
1659 
1660 void
1661 ilm_inactive(ilm_t *ilm)
1662 {
1663 	FREE_SLIST(ilm->ilm_filter);
1664 	FREE_SLIST(ilm->ilm_pendsrcs);
1665 	FREE_SLIST(ilm->ilm_rtx.rtx_allow);
1666 	FREE_SLIST(ilm->ilm_rtx.rtx_block);
1667 	ilm->ilm_ipst = NULL;
1668 	mi_free((char *)ilm);
1669 }
1670 
1671 void
1672 ilm_walker_cleanup(ill_t *ill)
1673 {
1674 	ilm_t	**ilmp;
1675 	ilm_t	*ilm;
1676 	boolean_t need_wakeup = B_FALSE;
1677 
1678 	ASSERT(MUTEX_HELD(&ill->ill_lock));
1679 	ASSERT(ill->ill_ilm_walker_cnt == 0);
1680 
1681 	ilmp = &ill->ill_ilm;
1682 	while (*ilmp != NULL) {
1683 		if ((*ilmp)->ilm_flags & ILM_DELETED) {
1684 			ilm = *ilmp;
1685 			*ilmp = ilm->ilm_next;
1686 			/*
1687 			 * check if there are any pending FREE or unplumb
1688 			 * operations that need to be restarted.
1689 			 */
1690 			if (ilm->ilm_ipif != NULL) {
1691 				/*
1692 				 * IPv4 ilms hold a ref on the ipif.
1693 				 */
1694 				DTRACE_PROBE3(ipif__decr__cnt,
1695 				    (ipif_t *), ilm->ilm_ipif,
1696 				    (char *), "ilm", (void *), ilm);
1697 				ilm->ilm_ipif->ipif_ilm_cnt--;
1698 				if (IPIF_FREE_OK(ilm->ilm_ipif))
1699 					need_wakeup = B_TRUE;
1700 			} else {
1701 				/*
1702 				 * IPv6 ilms hold a ref on the ill.
1703 				 */
1704 				ASSERT(ilm->ilm_ill == ill);
1705 				DTRACE_PROBE3(ill__decr__cnt,
1706 				    (ill_t *), ill,
1707 				    (char *), "ilm", (void *), ilm);
1708 				ASSERT(ill->ill_ilm_cnt > 0);
1709 				ill->ill_ilm_cnt--;
1710 				if (ILL_FREE_OK(ill))
1711 					need_wakeup = B_TRUE;
1712 			}
1713 			ilm_inactive(ilm); /* frees ilm */
1714 		} else {
1715 			ilmp = &(*ilmp)->ilm_next;
1716 		}
1717 	}
1718 	ill->ill_ilm_cleanup_reqd = 0;
1719 	if (need_wakeup)
1720 		ipif_ill_refrele_tail(ill);
1721 	else
1722 		mutex_exit(&ill->ill_lock);
1723 }
1724 
1725 /*
1726  * Unlink ilm and free it.
1727  */
1728 static void
1729 ilm_delete(ilm_t *ilm)
1730 {
1731 	ill_t		*ill;
1732 	ilm_t		**ilmp;
1733 	boolean_t	need_wakeup;
1734 
1735 
1736 	if (ilm->ilm_ipif != NULL) {
1737 		ASSERT(IAM_WRITER_IPIF(ilm->ilm_ipif));
1738 		ASSERT(ilm->ilm_ill == NULL);
1739 		ill = ilm->ilm_ipif->ipif_ill;
1740 		ASSERT(!ill->ill_isv6);
1741 	} else {
1742 		ASSERT(IAM_WRITER_ILL(ilm->ilm_ill));
1743 		ASSERT(ilm->ilm_ipif == NULL);
1744 		ill = ilm->ilm_ill;
1745 		ASSERT(ill->ill_isv6);
1746 	}
1747 	/*
1748 	 * Delete under lock protection so that readers don't stumble
1749 	 * on bad ilm_next
1750 	 */
1751 	mutex_enter(&ill->ill_lock);
1752 	if (ill->ill_ilm_walker_cnt != 0) {
1753 		ilm->ilm_flags |= ILM_DELETED;
1754 		ill->ill_ilm_cleanup_reqd = 1;
1755 		mutex_exit(&ill->ill_lock);
1756 		return;
1757 	}
1758 
1759 	for (ilmp = &ill->ill_ilm; *ilmp != ilm; ilmp = &(*ilmp)->ilm_next)
1760 				;
1761 	*ilmp = ilm->ilm_next;
1762 
1763 	/*
1764 	 * if we are the last reference to the ipif (for IPv4 ilms)
1765 	 * or the ill (for IPv6 ilms), we may need to wakeup any
1766 	 * pending FREE or unplumb operations.
1767 	 */
1768 	need_wakeup = B_FALSE;
1769 	if (ilm->ilm_ipif != NULL) {
1770 		DTRACE_PROBE3(ipif__decr__cnt, (ipif_t *), ilm->ilm_ipif,
1771 		    (char *), "ilm", (void *), ilm);
1772 		ilm->ilm_ipif->ipif_ilm_cnt--;
1773 		if (IPIF_FREE_OK(ilm->ilm_ipif))
1774 			need_wakeup = B_TRUE;
1775 	} else {
1776 		DTRACE_PROBE3(ill__decr__cnt, (ill_t *), ill,
1777 		    (char *), "ilm", (void *), ilm);
1778 		ASSERT(ill->ill_ilm_cnt > 0);
1779 		ill->ill_ilm_cnt--;
1780 		if (ILL_FREE_OK(ill))
1781 			need_wakeup = B_TRUE;
1782 	}
1783 
1784 	ilm_inactive(ilm); /* frees this ilm */
1785 
1786 	if (need_wakeup) {
1787 		/* drops ill lock */
1788 		ipif_ill_refrele_tail(ill);
1789 	} else {
1790 		mutex_exit(&ill->ill_lock);
1791 	}
1792 }
1793 
1794 /* Increment the ILM walker count for `ill' */
1795 static void
1796 ill_ilm_walker_hold(ill_t *ill)
1797 {
1798 	mutex_enter(&ill->ill_lock);
1799 	ill->ill_ilm_walker_cnt++;
1800 	mutex_exit(&ill->ill_lock);
1801 }
1802 
1803 /* Decrement the ILM walker count for `ill' */
1804 static void
1805 ill_ilm_walker_rele(ill_t *ill)
1806 {
1807 	mutex_enter(&ill->ill_lock);
1808 	ill->ill_ilm_walker_cnt--;
1809 	if (ill->ill_ilm_walker_cnt == 0 && ill->ill_ilm_cleanup_reqd)
1810 		ilm_walker_cleanup(ill);	/* drops ill_lock */
1811 	else
1812 		mutex_exit(&ill->ill_lock);
1813 }
1814 
1815 /*
1816  * Start walking the ILMs associated with `ill'; the first ILM in the walk
1817  * (if any) is returned.  State associated with the walk is stored in `ilw'.
1818  * Note that walks associated with interfaces under IPMP also walk the ILMs
1819  * on the associated IPMP interface; this is handled transparently to callers
1820  * via ilm_walker_step().  (Usually with IPMP all ILMs will be on the IPMP
1821  * interface; the only exception is to support IPv6 test addresses, which
1822  * require ILMs for their associated solicited-node multicast addresses.)
1823  */
1824 ilm_t *
1825 ilm_walker_start(ilm_walker_t *ilw, ill_t *ill)
1826 {
1827 	ilw->ilw_ill = ill;
1828 	if (IS_UNDER_IPMP(ill))
1829 		ilw->ilw_ipmp_ill = ipmp_ill_hold_ipmp_ill(ill);
1830 	else
1831 		ilw->ilw_ipmp_ill = NULL;
1832 
1833 	ill_ilm_walker_hold(ill);
1834 	if (ilw->ilw_ipmp_ill != NULL)
1835 		ill_ilm_walker_hold(ilw->ilw_ipmp_ill);
1836 
1837 	if (ilw->ilw_ipmp_ill != NULL && ilw->ilw_ipmp_ill->ill_ilm != NULL)
1838 		ilw->ilw_walk_ill = ilw->ilw_ipmp_ill;
1839 	else
1840 		ilw->ilw_walk_ill = ilw->ilw_ill;
1841 
1842 	return (ilm_walker_step(ilw, NULL));
1843 }
1844 
1845 /*
1846  * Helper function for ilm_walker_step() that returns the next ILM
1847  * associated with `ilw', regardless of whether it's deleted.
1848  */
1849 static ilm_t *
1850 ilm_walker_step_all(ilm_walker_t *ilw, ilm_t *ilm)
1851 {
1852 	if (ilm == NULL)
1853 		return (ilw->ilw_walk_ill->ill_ilm);
1854 
1855 	if (ilm->ilm_next != NULL)
1856 		return (ilm->ilm_next);
1857 
1858 	if (ilw->ilw_ipmp_ill != NULL && IS_IPMP(ilw->ilw_walk_ill)) {
1859 		ilw->ilw_walk_ill = ilw->ilw_ill;
1860 		/*
1861 		 * It's possible that ilw_ill left the group during our walk,
1862 		 * so we can't ASSERT() that it's under IPMP.  Callers that
1863 		 * care will be writer on the IPSQ anyway.
1864 		 */
1865 		return (ilw->ilw_walk_ill->ill_ilm);
1866 	}
1867 	return (NULL);
1868 }
1869 
1870 /*
1871  * Step to the next ILM associated with `ilw'.
1872  */
1873 ilm_t *
1874 ilm_walker_step(ilm_walker_t *ilw, ilm_t *ilm)
1875 {
1876 	while ((ilm = ilm_walker_step_all(ilw, ilm)) != NULL) {
1877 		if (!(ilm->ilm_flags & ILM_DELETED))
1878 			break;
1879 	}
1880 	return (ilm);
1881 }
1882 
1883 /*
1884  * Finish the ILM walk associated with `ilw'.
1885  */
1886 void
1887 ilm_walker_finish(ilm_walker_t *ilw)
1888 {
1889 	ill_ilm_walker_rele(ilw->ilw_ill);
1890 	if (ilw->ilw_ipmp_ill != NULL) {
1891 		ill_ilm_walker_rele(ilw->ilw_ipmp_ill);
1892 		ill_refrele(ilw->ilw_ipmp_ill);
1893 	}
1894 	bzero(&ilw, sizeof (ilw));
1895 }
1896 
1897 /*
1898  * Looks up the appropriate ipif given a v4 multicast group and interface
1899  * address.  On success, returns 0, with *ipifpp pointing to the found
1900  * struct.  On failure, returns an errno and *ipifpp is NULL.
1901  */
1902 int
1903 ip_opt_check(conn_t *connp, ipaddr_t group, ipaddr_t src, ipaddr_t ifaddr,
1904     uint_t *ifindexp, mblk_t *first_mp, ipsq_func_t func, ipif_t **ipifpp)
1905 {
1906 	ipif_t *ipif;
1907 	int err = 0;
1908 	zoneid_t zoneid;
1909 	ip_stack_t	*ipst =  connp->conn_netstack->netstack_ip;
1910 
1911 	if (!CLASSD(group) || CLASSD(src)) {
1912 		return (EINVAL);
1913 	}
1914 	*ipifpp = NULL;
1915 
1916 	zoneid = IPCL_ZONEID(connp);
1917 
1918 	ASSERT(!(ifaddr != INADDR_ANY && ifindexp != NULL && *ifindexp != 0));
1919 	if (ifaddr != INADDR_ANY) {
1920 		ipif = ipif_lookup_addr(ifaddr, NULL, zoneid,
1921 		    CONNP_TO_WQ(connp), first_mp, func, &err, ipst);
1922 		if (err != 0 && err != EINPROGRESS)
1923 			err = EADDRNOTAVAIL;
1924 	} else if (ifindexp != NULL && *ifindexp != 0) {
1925 		ipif = ipif_lookup_on_ifindex(*ifindexp, B_FALSE, zoneid,
1926 		    CONNP_TO_WQ(connp), first_mp, func, &err, ipst);
1927 	} else {
1928 		ipif = ipif_lookup_group(group, zoneid, ipst);
1929 		if (ipif == NULL)
1930 			return (EADDRNOTAVAIL);
1931 	}
1932 	if (ipif == NULL)
1933 		return (err);
1934 
1935 	*ipifpp = ipif;
1936 	return (0);
1937 }
1938 
1939 /*
1940  * Looks up the appropriate ill (or ipif if v4mapped) given an interface
1941  * index and IPv6 multicast group.  On success, returns 0, with *illpp (or
1942  * *ipifpp if v4mapped) pointing to the found struct.  On failure, returns
1943  * an errno and *illpp and *ipifpp are undefined.
1944  */
1945 int
1946 ip_opt_check_v6(conn_t *connp, const in6_addr_t *v6group, ipaddr_t *v4group,
1947     const in6_addr_t *v6src, ipaddr_t *v4src, boolean_t *isv6, int ifindex,
1948     mblk_t *first_mp, ipsq_func_t func, ill_t **illpp, ipif_t **ipifpp)
1949 {
1950 	boolean_t src_unspec;
1951 	ill_t *ill = NULL;
1952 	ipif_t *ipif = NULL;
1953 	int err;
1954 	zoneid_t zoneid = connp->conn_zoneid;
1955 	queue_t *wq = CONNP_TO_WQ(connp);
1956 	ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
1957 
1958 	src_unspec = IN6_IS_ADDR_UNSPECIFIED(v6src);
1959 
1960 	if (IN6_IS_ADDR_V4MAPPED(v6group)) {
1961 		if (!IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec)
1962 			return (EINVAL);
1963 		IN6_V4MAPPED_TO_IPADDR(v6group, *v4group);
1964 		if (src_unspec) {
1965 			*v4src = INADDR_ANY;
1966 		} else {
1967 			IN6_V4MAPPED_TO_IPADDR(v6src, *v4src);
1968 		}
1969 		if (!CLASSD(*v4group) || CLASSD(*v4src))
1970 			return (EINVAL);
1971 		*ipifpp = NULL;
1972 		*isv6 = B_FALSE;
1973 	} else {
1974 		if (IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec)
1975 			return (EINVAL);
1976 		if (!IN6_IS_ADDR_MULTICAST(v6group) ||
1977 		    IN6_IS_ADDR_MULTICAST(v6src)) {
1978 			return (EINVAL);
1979 		}
1980 		*illpp = NULL;
1981 		*isv6 = B_TRUE;
1982 	}
1983 
1984 	if (ifindex == 0) {
1985 		if (*isv6)
1986 			ill = ill_lookup_group_v6(v6group, zoneid, ipst);
1987 		else
1988 			ipif = ipif_lookup_group(*v4group, zoneid, ipst);
1989 		if (ill == NULL && ipif == NULL)
1990 			return (EADDRNOTAVAIL);
1991 	} else {
1992 		if (*isv6) {
1993 			ill = ill_lookup_on_ifindex(ifindex, B_TRUE,
1994 			    wq, first_mp, func, &err, ipst);
1995 			if (ill != NULL &&
1996 			    !ipif_lookup_zoneid(ill, zoneid, 0, NULL)) {
1997 				ill_refrele(ill);
1998 				ill = NULL;
1999 				err = EADDRNOTAVAIL;
2000 			}
2001 		} else {
2002 			ipif = ipif_lookup_on_ifindex(ifindex, B_FALSE,
2003 			    zoneid, wq, first_mp, func, &err, ipst);
2004 		}
2005 		if (ill == NULL && ipif == NULL)
2006 			return (err);
2007 	}
2008 
2009 	*ipifpp = ipif;
2010 	*illpp = ill;
2011 	return (0);
2012 }
2013 
2014 static int
2015 ip_get_srcfilter(conn_t *connp, struct group_filter *gf,
2016     struct ip_msfilter *imsf, ipaddr_t grp, ipif_t *ipif, boolean_t isv4mapped)
2017 {
2018 	ilg_t *ilg;
2019 	int i, numsrc, fmode, outsrcs;
2020 	struct sockaddr_in *sin;
2021 	struct sockaddr_in6 *sin6;
2022 	struct in_addr *addrp;
2023 	slist_t *fp;
2024 	boolean_t is_v4only_api;
2025 
2026 	mutex_enter(&connp->conn_lock);
2027 
2028 	ilg = ilg_lookup_ipif(connp, grp, ipif);
2029 	if (ilg == NULL) {
2030 		mutex_exit(&connp->conn_lock);
2031 		return (EADDRNOTAVAIL);
2032 	}
2033 
2034 	if (gf == NULL) {
2035 		ASSERT(imsf != NULL);
2036 		ASSERT(!isv4mapped);
2037 		is_v4only_api = B_TRUE;
2038 		outsrcs = imsf->imsf_numsrc;
2039 	} else {
2040 		ASSERT(imsf == NULL);
2041 		is_v4only_api = B_FALSE;
2042 		outsrcs = gf->gf_numsrc;
2043 	}
2044 
2045 	/*
2046 	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
2047 	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
2048 	 * So we need to translate here.
2049 	 */
2050 	fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ?
2051 	    MCAST_INCLUDE : MCAST_EXCLUDE;
2052 	if ((fp = ilg->ilg_filter) == NULL) {
2053 		numsrc = 0;
2054 	} else {
2055 		for (i = 0; i < outsrcs; i++) {
2056 			if (i == fp->sl_numsrc)
2057 				break;
2058 			if (isv4mapped) {
2059 				sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i];
2060 				sin6->sin6_family = AF_INET6;
2061 				sin6->sin6_addr = fp->sl_addr[i];
2062 			} else {
2063 				if (is_v4only_api) {
2064 					addrp = &imsf->imsf_slist[i];
2065 				} else {
2066 					sin = (struct sockaddr_in *)
2067 					    &gf->gf_slist[i];
2068 					sin->sin_family = AF_INET;
2069 					addrp = &sin->sin_addr;
2070 				}
2071 				IN6_V4MAPPED_TO_INADDR(&fp->sl_addr[i], addrp);
2072 			}
2073 		}
2074 		numsrc = fp->sl_numsrc;
2075 	}
2076 
2077 	if (is_v4only_api) {
2078 		imsf->imsf_numsrc = numsrc;
2079 		imsf->imsf_fmode = fmode;
2080 	} else {
2081 		gf->gf_numsrc = numsrc;
2082 		gf->gf_fmode = fmode;
2083 	}
2084 
2085 	mutex_exit(&connp->conn_lock);
2086 
2087 	return (0);
2088 }
2089 
2090 static int
2091 ip_get_srcfilter_v6(conn_t *connp, struct group_filter *gf,
2092     const struct in6_addr *grp, ill_t *ill)
2093 {
2094 	ilg_t *ilg;
2095 	int i;
2096 	struct sockaddr_storage *sl;
2097 	struct sockaddr_in6 *sin6;
2098 	slist_t *fp;
2099 
2100 	mutex_enter(&connp->conn_lock);
2101 
2102 	ilg = ilg_lookup_ill_v6(connp, grp, ill);
2103 	if (ilg == NULL) {
2104 		mutex_exit(&connp->conn_lock);
2105 		return (EADDRNOTAVAIL);
2106 	}
2107 
2108 	/*
2109 	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
2110 	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
2111 	 * So we need to translate here.
2112 	 */
2113 	gf->gf_fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ?
2114 	    MCAST_INCLUDE : MCAST_EXCLUDE;
2115 	if ((fp = ilg->ilg_filter) == NULL) {
2116 		gf->gf_numsrc = 0;
2117 	} else {
2118 		for (i = 0, sl = gf->gf_slist; i < gf->gf_numsrc; i++, sl++) {
2119 			if (i == fp->sl_numsrc)
2120 				break;
2121 			sin6 = (struct sockaddr_in6 *)sl;
2122 			sin6->sin6_family = AF_INET6;
2123 			sin6->sin6_addr = fp->sl_addr[i];
2124 		}
2125 		gf->gf_numsrc = fp->sl_numsrc;
2126 	}
2127 
2128 	mutex_exit(&connp->conn_lock);
2129 
2130 	return (0);
2131 }
2132 
2133 static int
2134 ip_set_srcfilter(conn_t *connp, struct group_filter *gf,
2135     struct ip_msfilter *imsf, ipaddr_t grp, ipif_t *ipif, boolean_t isv4mapped)
2136 {
2137 	ilg_t *ilg;
2138 	int i, err, infmode, new_fmode;
2139 	uint_t insrcs;
2140 	struct sockaddr_in *sin;
2141 	struct sockaddr_in6 *sin6;
2142 	struct in_addr *addrp;
2143 	slist_t *orig_filter = NULL;
2144 	slist_t *new_filter = NULL;
2145 	mcast_record_t orig_fmode;
2146 	boolean_t leave_grp, is_v4only_api;
2147 	ilg_stat_t ilgstat;
2148 
2149 	if (gf == NULL) {
2150 		ASSERT(imsf != NULL);
2151 		ASSERT(!isv4mapped);
2152 		is_v4only_api = B_TRUE;
2153 		insrcs = imsf->imsf_numsrc;
2154 		infmode = imsf->imsf_fmode;
2155 	} else {
2156 		ASSERT(imsf == NULL);
2157 		is_v4only_api = B_FALSE;
2158 		insrcs = gf->gf_numsrc;
2159 		infmode = gf->gf_fmode;
2160 	}
2161 
2162 	/* Make sure we can handle the source list */
2163 	if (insrcs > MAX_FILTER_SIZE)
2164 		return (ENOBUFS);
2165 
2166 	/*
2167 	 * setting the filter to (INCLUDE, NULL) is treated
2168 	 * as a request to leave the group.
2169 	 */
2170 	leave_grp = (infmode == MCAST_INCLUDE && insrcs == 0);
2171 
2172 	ASSERT(IAM_WRITER_IPIF(ipif));
2173 
2174 	mutex_enter(&connp->conn_lock);
2175 
2176 	ilg = ilg_lookup_ipif(connp, grp, ipif);
2177 	if (ilg == NULL) {
2178 		/*
2179 		 * if the request was actually to leave, and we
2180 		 * didn't find an ilg, there's nothing to do.
2181 		 */
2182 		if (!leave_grp)
2183 			ilg = conn_ilg_alloc(connp, &err);
2184 		if (leave_grp || ilg == NULL) {
2185 			mutex_exit(&connp->conn_lock);
2186 			return (leave_grp ? 0 : err);
2187 		}
2188 		ilgstat = ILGSTAT_NEW;
2189 		IN6_IPADDR_TO_V4MAPPED(grp, &ilg->ilg_v6group);
2190 		ilg->ilg_ipif = ipif;
2191 		ilg->ilg_ill = NULL;
2192 	} else if (leave_grp) {
2193 		ilg_delete(connp, ilg, NULL);
2194 		mutex_exit(&connp->conn_lock);
2195 		(void) ip_delmulti(grp, ipif, B_FALSE, B_TRUE);
2196 		return (0);
2197 	} else {
2198 		ilgstat = ILGSTAT_CHANGE;
2199 		/* Preserve existing state in case ip_addmulti() fails */
2200 		orig_fmode = ilg->ilg_fmode;
2201 		if (ilg->ilg_filter == NULL) {
2202 			orig_filter = NULL;
2203 		} else {
2204 			orig_filter = l_alloc_copy(ilg->ilg_filter);
2205 			if (orig_filter == NULL) {
2206 				mutex_exit(&connp->conn_lock);
2207 				return (ENOMEM);
2208 			}
2209 		}
2210 	}
2211 
2212 	/*
2213 	 * Alloc buffer to copy new state into (see below) before
2214 	 * we make any changes, so we can bail if it fails.
2215 	 */
2216 	if ((new_filter = l_alloc()) == NULL) {
2217 		mutex_exit(&connp->conn_lock);
2218 		err = ENOMEM;
2219 		goto free_and_exit;
2220 	}
2221 
2222 	if (insrcs == 0) {
2223 		CLEAR_SLIST(ilg->ilg_filter);
2224 	} else {
2225 		slist_t *fp;
2226 		if (ilg->ilg_filter == NULL) {
2227 			fp = l_alloc();
2228 			if (fp == NULL) {
2229 				if (ilgstat == ILGSTAT_NEW)
2230 					ilg_delete(connp, ilg, NULL);
2231 				mutex_exit(&connp->conn_lock);
2232 				err = ENOMEM;
2233 				goto free_and_exit;
2234 			}
2235 		} else {
2236 			fp = ilg->ilg_filter;
2237 		}
2238 		for (i = 0; i < insrcs; i++) {
2239 			if (isv4mapped) {
2240 				sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i];
2241 				fp->sl_addr[i] = sin6->sin6_addr;
2242 			} else {
2243 				if (is_v4only_api) {
2244 					addrp = &imsf->imsf_slist[i];
2245 				} else {
2246 					sin = (struct sockaddr_in *)
2247 					    &gf->gf_slist[i];
2248 					addrp = &sin->sin_addr;
2249 				}
2250 				IN6_INADDR_TO_V4MAPPED(addrp, &fp->sl_addr[i]);
2251 			}
2252 		}
2253 		fp->sl_numsrc = insrcs;
2254 		ilg->ilg_filter = fp;
2255 	}
2256 	/*
2257 	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
2258 	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
2259 	 * So we need to translate here.
2260 	 */
2261 	ilg->ilg_fmode = (infmode == MCAST_INCLUDE) ?
2262 	    MODE_IS_INCLUDE : MODE_IS_EXCLUDE;
2263 
2264 	/*
2265 	 * Save copy of ilg's filter state to pass to other functions,
2266 	 * so we can release conn_lock now.
2267 	 */
2268 	new_fmode = ilg->ilg_fmode;
2269 	l_copy(ilg->ilg_filter, new_filter);
2270 
2271 	mutex_exit(&connp->conn_lock);
2272 
2273 	err = ip_addmulti(grp, ipif, ilgstat, new_fmode, new_filter);
2274 	if (err != 0) {
2275 		/*
2276 		 * Restore the original filter state, or delete the
2277 		 * newly-created ilg.  We need to look up the ilg
2278 		 * again, though, since we've not been holding the
2279 		 * conn_lock.
2280 		 */
2281 		mutex_enter(&connp->conn_lock);
2282 		ilg = ilg_lookup_ipif(connp, grp, ipif);
2283 		ASSERT(ilg != NULL);
2284 		if (ilgstat == ILGSTAT_NEW) {
2285 			ilg_delete(connp, ilg, NULL);
2286 		} else {
2287 			ilg->ilg_fmode = orig_fmode;
2288 			if (SLIST_IS_EMPTY(orig_filter)) {
2289 				CLEAR_SLIST(ilg->ilg_filter);
2290 			} else {
2291 				/*
2292 				 * We didn't free the filter, even if we
2293 				 * were trying to make the source list empty;
2294 				 * so if orig_filter isn't empty, the ilg
2295 				 * must still have a filter alloc'd.
2296 				 */
2297 				l_copy(orig_filter, ilg->ilg_filter);
2298 			}
2299 		}
2300 		mutex_exit(&connp->conn_lock);
2301 	}
2302 
2303 free_and_exit:
2304 	l_free(orig_filter);
2305 	l_free(new_filter);
2306 
2307 	return (err);
2308 }
2309 
2310 static int
2311 ip_set_srcfilter_v6(conn_t *connp, struct group_filter *gf,
2312     const struct in6_addr *grp, ill_t *ill)
2313 {
2314 	ilg_t *ilg;
2315 	int i, orig_fmode, new_fmode, err;
2316 	slist_t *orig_filter = NULL;
2317 	slist_t *new_filter = NULL;
2318 	struct sockaddr_storage *sl;
2319 	struct sockaddr_in6 *sin6;
2320 	boolean_t leave_grp;
2321 	ilg_stat_t ilgstat;
2322 
2323 	/* Make sure we can handle the source list */
2324 	if (gf->gf_numsrc > MAX_FILTER_SIZE)
2325 		return (ENOBUFS);
2326 
2327 	/*
2328 	 * setting the filter to (INCLUDE, NULL) is treated
2329 	 * as a request to leave the group.
2330 	 */
2331 	leave_grp = (gf->gf_fmode == MCAST_INCLUDE && gf->gf_numsrc == 0);
2332 
2333 	ASSERT(IAM_WRITER_ILL(ill));
2334 
2335 	mutex_enter(&connp->conn_lock);
2336 	ilg = ilg_lookup_ill_v6(connp, grp, ill);
2337 	if (ilg == NULL) {
2338 		/*
2339 		 * if the request was actually to leave, and we
2340 		 * didn't find an ilg, there's nothing to do.
2341 		 */
2342 		if (!leave_grp)
2343 			ilg = conn_ilg_alloc(connp, &err);
2344 		if (leave_grp || ilg == NULL) {
2345 			mutex_exit(&connp->conn_lock);
2346 			return (leave_grp ? 0 : err);
2347 		}
2348 		ilgstat = ILGSTAT_NEW;
2349 		ilg->ilg_v6group = *grp;
2350 		ilg->ilg_ipif = NULL;
2351 		ilg->ilg_ill = ill;
2352 	} else if (leave_grp) {
2353 		ilg_delete(connp, ilg, NULL);
2354 		mutex_exit(&connp->conn_lock);
2355 		(void) ip_delmulti_v6(grp, ill, connp->conn_zoneid, B_FALSE,
2356 		    B_TRUE);
2357 		return (0);
2358 	} else {
2359 		ilgstat = ILGSTAT_CHANGE;
2360 		/* preserve existing state in case ip_addmulti() fails */
2361 		orig_fmode = ilg->ilg_fmode;
2362 		if (ilg->ilg_filter == NULL) {
2363 			orig_filter = NULL;
2364 		} else {
2365 			orig_filter = l_alloc_copy(ilg->ilg_filter);
2366 			if (orig_filter == NULL) {
2367 				mutex_exit(&connp->conn_lock);
2368 				return (ENOMEM);
2369 			}
2370 		}
2371 	}
2372 
2373 	/*
2374 	 * Alloc buffer to copy new state into (see below) before
2375 	 * we make any changes, so we can bail if it fails.
2376 	 */
2377 	if ((new_filter = l_alloc()) == NULL) {
2378 		mutex_exit(&connp->conn_lock);
2379 		err = ENOMEM;
2380 		goto free_and_exit;
2381 	}
2382 
2383 	if (gf->gf_numsrc == 0) {
2384 		CLEAR_SLIST(ilg->ilg_filter);
2385 	} else {
2386 		slist_t *fp;
2387 		if (ilg->ilg_filter == NULL) {
2388 			fp = l_alloc();
2389 			if (fp == NULL) {
2390 				if (ilgstat == ILGSTAT_NEW)
2391 					ilg_delete(connp, ilg, NULL);
2392 				mutex_exit(&connp->conn_lock);
2393 				err = ENOMEM;
2394 				goto free_and_exit;
2395 			}
2396 		} else {
2397 			fp = ilg->ilg_filter;
2398 		}
2399 		for (i = 0, sl = gf->gf_slist; i < gf->gf_numsrc; i++, sl++) {
2400 			sin6 = (struct sockaddr_in6 *)sl;
2401 			fp->sl_addr[i] = sin6->sin6_addr;
2402 		}
2403 		fp->sl_numsrc = gf->gf_numsrc;
2404 		ilg->ilg_filter = fp;
2405 	}
2406 	/*
2407 	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
2408 	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
2409 	 * So we need to translate here.
2410 	 */
2411 	ilg->ilg_fmode = (gf->gf_fmode == MCAST_INCLUDE) ?
2412 	    MODE_IS_INCLUDE : MODE_IS_EXCLUDE;
2413 
2414 	/*
2415 	 * Save copy of ilg's filter state to pass to other functions,
2416 	 * so we can release conn_lock now.
2417 	 */
2418 	new_fmode = ilg->ilg_fmode;
2419 	l_copy(ilg->ilg_filter, new_filter);
2420 
2421 	mutex_exit(&connp->conn_lock);
2422 
2423 	err = ip_addmulti_v6(grp, ill, connp->conn_zoneid, ilgstat, new_fmode,
2424 	    new_filter);
2425 	if (err != 0) {
2426 		/*
2427 		 * Restore the original filter state, or delete the
2428 		 * newly-created ilg.  We need to look up the ilg
2429 		 * again, though, since we've not been holding the
2430 		 * conn_lock.
2431 		 */
2432 		mutex_enter(&connp->conn_lock);
2433 		ilg = ilg_lookup_ill_v6(connp, grp, ill);
2434 		ASSERT(ilg != NULL);
2435 		if (ilgstat == ILGSTAT_NEW) {
2436 			ilg_delete(connp, ilg, NULL);
2437 		} else {
2438 			ilg->ilg_fmode = orig_fmode;
2439 			if (SLIST_IS_EMPTY(orig_filter)) {
2440 				CLEAR_SLIST(ilg->ilg_filter);
2441 			} else {
2442 				/*
2443 				 * We didn't free the filter, even if we
2444 				 * were trying to make the source list empty;
2445 				 * so if orig_filter isn't empty, the ilg
2446 				 * must still have a filter alloc'd.
2447 				 */
2448 				l_copy(orig_filter, ilg->ilg_filter);
2449 			}
2450 		}
2451 		mutex_exit(&connp->conn_lock);
2452 	}
2453 
2454 free_and_exit:
2455 	l_free(orig_filter);
2456 	l_free(new_filter);
2457 
2458 	return (err);
2459 }
2460 
2461 /*
2462  * Process the SIOC[GS]MSFILTER and SIOC[GS]IPMSFILTER ioctls.
2463  */
2464 /* ARGSUSED */
2465 int
2466 ip_sioctl_msfilter(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp,
2467     ip_ioctl_cmd_t *ipip, void *ifreq)
2468 {
2469 	struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
2470 	/* existence verified in ip_wput_nondata() */
2471 	mblk_t *data_mp = mp->b_cont->b_cont;
2472 	int datalen, err, cmd, minsize;
2473 	uint_t expsize = 0;
2474 	conn_t *connp;
2475 	boolean_t isv6, is_v4only_api, getcmd;
2476 	struct sockaddr_in *gsin;
2477 	struct sockaddr_in6 *gsin6;
2478 	ipaddr_t v4grp;
2479 	in6_addr_t v6grp;
2480 	struct group_filter *gf = NULL;
2481 	struct ip_msfilter *imsf = NULL;
2482 	mblk_t *ndp;
2483 
2484 	if (data_mp->b_cont != NULL) {
2485 		if ((ndp = msgpullup(data_mp, -1)) == NULL)
2486 			return (ENOMEM);
2487 		freemsg(data_mp);
2488 		data_mp = ndp;
2489 		mp->b_cont->b_cont = data_mp;
2490 	}
2491 
2492 	cmd = iocp->ioc_cmd;
2493 	getcmd = (cmd == SIOCGIPMSFILTER || cmd == SIOCGMSFILTER);
2494 	is_v4only_api = (cmd == SIOCGIPMSFILTER || cmd == SIOCSIPMSFILTER);
2495 	minsize = (is_v4only_api) ? IP_MSFILTER_SIZE(0) : GROUP_FILTER_SIZE(0);
2496 	datalen = MBLKL(data_mp);
2497 
2498 	if (datalen < minsize)
2499 		return (EINVAL);
2500 
2501 	/*
2502 	 * now we know we have at least have the initial structure,
2503 	 * but need to check for the source list array.
2504 	 */
2505 	if (is_v4only_api) {
2506 		imsf = (struct ip_msfilter *)data_mp->b_rptr;
2507 		isv6 = B_FALSE;
2508 		expsize = IP_MSFILTER_SIZE(imsf->imsf_numsrc);
2509 	} else {
2510 		gf = (struct group_filter *)data_mp->b_rptr;
2511 		if (gf->gf_group.ss_family == AF_INET6) {
2512 			gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2513 			isv6 = !(IN6_IS_ADDR_V4MAPPED(&gsin6->sin6_addr));
2514 		} else {
2515 			isv6 = B_FALSE;
2516 		}
2517 		expsize = GROUP_FILTER_SIZE(gf->gf_numsrc);
2518 	}
2519 	if (datalen < expsize)
2520 		return (EINVAL);
2521 
2522 	connp = Q_TO_CONN(q);
2523 
2524 	/* operation not supported on the virtual network interface */
2525 	if (IS_VNI(ipif->ipif_ill))
2526 		return (EINVAL);
2527 
2528 	if (isv6) {
2529 		ill_t *ill = ipif->ipif_ill;
2530 		ill_refhold(ill);
2531 
2532 		gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2533 		v6grp = gsin6->sin6_addr;
2534 		if (getcmd)
2535 			err = ip_get_srcfilter_v6(connp, gf, &v6grp, ill);
2536 		else
2537 			err = ip_set_srcfilter_v6(connp, gf, &v6grp, ill);
2538 
2539 		ill_refrele(ill);
2540 	} else {
2541 		boolean_t isv4mapped = B_FALSE;
2542 		if (is_v4only_api) {
2543 			v4grp = (ipaddr_t)imsf->imsf_multiaddr.s_addr;
2544 		} else {
2545 			if (gf->gf_group.ss_family == AF_INET) {
2546 				gsin = (struct sockaddr_in *)&gf->gf_group;
2547 				v4grp = (ipaddr_t)gsin->sin_addr.s_addr;
2548 			} else {
2549 				gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2550 				IN6_V4MAPPED_TO_IPADDR(&gsin6->sin6_addr,
2551 				    v4grp);
2552 				isv4mapped = B_TRUE;
2553 			}
2554 		}
2555 		if (getcmd)
2556 			err = ip_get_srcfilter(connp, gf, imsf, v4grp, ipif,
2557 			    isv4mapped);
2558 		else
2559 			err = ip_set_srcfilter(connp, gf, imsf, v4grp, ipif,
2560 			    isv4mapped);
2561 	}
2562 
2563 	return (err);
2564 }
2565 
2566 /*
2567  * Finds the ipif based on information in the ioctl headers.  Needed to make
2568  * ip_process_ioctl() happy (it needs to know the ipif for IPI_WR-flagged
2569  * ioctls prior to calling the ioctl's handler function).
2570  */
2571 int
2572 ip_extract_msfilter(queue_t *q, mblk_t *mp, const ip_ioctl_cmd_t *ipip,
2573     cmd_info_t *ci, ipsq_func_t func)
2574 {
2575 	int cmd = ipip->ipi_cmd;
2576 	int err = 0;
2577 	conn_t *connp;
2578 	ipif_t *ipif;
2579 	/* caller has verified this mblk exists */
2580 	char *dbuf = (char *)mp->b_cont->b_cont->b_rptr;
2581 	struct ip_msfilter *imsf;
2582 	struct group_filter *gf;
2583 	ipaddr_t v4addr, v4grp;
2584 	in6_addr_t v6grp;
2585 	uint32_t index;
2586 	zoneid_t zoneid;
2587 	ip_stack_t *ipst;
2588 
2589 	connp = Q_TO_CONN(q);
2590 	zoneid = connp->conn_zoneid;
2591 	ipst = connp->conn_netstack->netstack_ip;
2592 
2593 	/* don't allow multicast operations on a tcp conn */
2594 	if (IPCL_IS_TCP(connp))
2595 		return (ENOPROTOOPT);
2596 
2597 	if (cmd == SIOCSIPMSFILTER || cmd == SIOCGIPMSFILTER) {
2598 		/* don't allow v4-specific ioctls on v6 socket */
2599 		if (connp->conn_af_isv6)
2600 			return (EAFNOSUPPORT);
2601 
2602 		imsf = (struct ip_msfilter *)dbuf;
2603 		v4addr = imsf->imsf_interface.s_addr;
2604 		v4grp = imsf->imsf_multiaddr.s_addr;
2605 		if (v4addr == INADDR_ANY) {
2606 			ipif = ipif_lookup_group(v4grp, zoneid, ipst);
2607 			if (ipif == NULL)
2608 				err = EADDRNOTAVAIL;
2609 		} else {
2610 			ipif = ipif_lookup_addr(v4addr, NULL, zoneid, q, mp,
2611 			    func, &err, ipst);
2612 		}
2613 	} else {
2614 		boolean_t isv6 = B_FALSE;
2615 		gf = (struct group_filter *)dbuf;
2616 		index = gf->gf_interface;
2617 		if (gf->gf_group.ss_family == AF_INET6) {
2618 			struct sockaddr_in6 *sin6;
2619 			sin6 = (struct sockaddr_in6 *)&gf->gf_group;
2620 			v6grp = sin6->sin6_addr;
2621 			if (IN6_IS_ADDR_V4MAPPED(&v6grp))
2622 				IN6_V4MAPPED_TO_IPADDR(&v6grp, v4grp);
2623 			else
2624 				isv6 = B_TRUE;
2625 		} else if (gf->gf_group.ss_family == AF_INET) {
2626 			struct sockaddr_in *sin;
2627 			sin = (struct sockaddr_in *)&gf->gf_group;
2628 			v4grp = sin->sin_addr.s_addr;
2629 		} else {
2630 			return (EAFNOSUPPORT);
2631 		}
2632 		if (index == 0) {
2633 			if (isv6) {
2634 				ipif = ipif_lookup_group_v6(&v6grp, zoneid,
2635 				    ipst);
2636 			} else {
2637 				ipif = ipif_lookup_group(v4grp, zoneid, ipst);
2638 			}
2639 			if (ipif == NULL)
2640 				err = EADDRNOTAVAIL;
2641 		} else {
2642 			ipif = ipif_lookup_on_ifindex(index, isv6, zoneid,
2643 			    q, mp, func, &err, ipst);
2644 		}
2645 	}
2646 
2647 	ci->ci_ipif = ipif;
2648 	return (err);
2649 }
2650 
2651 /*
2652  * The structures used for the SIOC*MSFILTER ioctls usually must be copied
2653  * in in two stages, as the first copyin tells us the size of the attached
2654  * source buffer.  This function is called by ip_wput_nondata() after the
2655  * first copyin has completed; it figures out how big the second stage
2656  * needs to be, and kicks it off.
2657  *
2658  * In some cases (numsrc < 2), the second copyin is not needed as the
2659  * first one gets a complete structure containing 1 source addr.
2660  *
2661  * The function returns 0 if a second copyin has been started (i.e. there's
2662  * no more work to be done right now), or 1 if the second copyin is not
2663  * needed and ip_wput_nondata() can continue its processing.
2664  */
2665 int
2666 ip_copyin_msfilter(queue_t *q, mblk_t *mp)
2667 {
2668 	struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
2669 	int cmd = iocp->ioc_cmd;
2670 	/* validity of this checked in ip_wput_nondata() */
2671 	mblk_t *mp1 = mp->b_cont->b_cont;
2672 	int copysize = 0;
2673 	int offset;
2674 
2675 	if (cmd == SIOCSMSFILTER || cmd == SIOCGMSFILTER) {
2676 		struct group_filter *gf = (struct group_filter *)mp1->b_rptr;
2677 		if (gf->gf_numsrc >= 2) {
2678 			offset = sizeof (struct group_filter);
2679 			copysize = GROUP_FILTER_SIZE(gf->gf_numsrc) - offset;
2680 		}
2681 	} else {
2682 		struct ip_msfilter *imsf = (struct ip_msfilter *)mp1->b_rptr;
2683 		if (imsf->imsf_numsrc >= 2) {
2684 			offset = sizeof (struct ip_msfilter);
2685 			copysize = IP_MSFILTER_SIZE(imsf->imsf_numsrc) - offset;
2686 		}
2687 	}
2688 	if (copysize > 0) {
2689 		mi_copyin_n(q, mp, offset, copysize);
2690 		return (0);
2691 	}
2692 	return (1);
2693 }
2694 
2695 /*
2696  * Handle the following optmgmt:
2697  *	IP_ADD_MEMBERSHIP		must not have joined already
2698  *	MCAST_JOIN_GROUP		must not have joined already
2699  *	IP_BLOCK_SOURCE			must have joined already
2700  *	MCAST_BLOCK_SOURCE		must have joined already
2701  *	IP_JOIN_SOURCE_GROUP		may have joined already
2702  *	MCAST_JOIN_SOURCE_GROUP		may have joined already
2703  *
2704  * fmode and src parameters may be used to determine which option is
2705  * being set, as follows (the IP_* and MCAST_* versions of each option
2706  * are functionally equivalent):
2707  *	opt			fmode			src
2708  *	IP_ADD_MEMBERSHIP	MODE_IS_EXCLUDE		INADDR_ANY
2709  *	MCAST_JOIN_GROUP	MODE_IS_EXCLUDE		INADDR_ANY
2710  *	IP_BLOCK_SOURCE		MODE_IS_EXCLUDE		v4 addr
2711  *	MCAST_BLOCK_SOURCE	MODE_IS_EXCLUDE		v4 addr
2712  *	IP_JOIN_SOURCE_GROUP	MODE_IS_INCLUDE		v4 addr
2713  *	MCAST_JOIN_SOURCE_GROUP	MODE_IS_INCLUDE		v4 addr
2714  *
2715  * Changing the filter mode is not allowed; if a matching ilg already
2716  * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
2717  *
2718  * Verifies that there is a source address of appropriate scope for
2719  * the group; if not, EADDRNOTAVAIL is returned.
2720  *
2721  * The interface to be used may be identified by an address or by an
2722  * index.  A pointer to the index is passed; if it is NULL, use the
2723  * address, otherwise, use the index.
2724  */
2725 int
2726 ip_opt_add_group(conn_t *connp, boolean_t checkonly, ipaddr_t group,
2727     ipaddr_t ifaddr, uint_t *ifindexp, mcast_record_t fmode, ipaddr_t src,
2728     mblk_t *first_mp)
2729 {
2730 	ipif_t	*ipif;
2731 	ipsq_t	*ipsq;
2732 	int err = 0;
2733 	ill_t	*ill;
2734 
2735 	err = ip_opt_check(connp, group, src, ifaddr, ifindexp, first_mp,
2736 	    ip_restart_optmgmt, &ipif);
2737 	if (err != 0) {
2738 		if (err != EINPROGRESS) {
2739 			ip1dbg(("ip_opt_add_group: no ipif for group 0x%x, "
2740 			    "ifaddr 0x%x, ifindex %d\n", ntohl(group),
2741 			    ntohl(ifaddr), (ifindexp == NULL) ? 0 : *ifindexp));
2742 		}
2743 		return (err);
2744 	}
2745 	ASSERT(ipif != NULL);
2746 
2747 	ill = ipif->ipif_ill;
2748 	/* Operation not supported on a virtual network interface */
2749 	if (IS_VNI(ill)) {
2750 		ipif_refrele(ipif);
2751 		return (EINVAL);
2752 	}
2753 
2754 	if (checkonly) {
2755 		/*
2756 		 * do not do operation, just pretend to - new T_CHECK
2757 		 * semantics. The error return case above if encountered
2758 		 * considered a good enough "check" here.
2759 		 */
2760 		ipif_refrele(ipif);
2761 		return (0);
2762 	}
2763 
2764 	IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, ipsq,
2765 	    NEW_OP);
2766 
2767 	/* unspecified source addr => no source filtering */
2768 	err = ilg_add(connp, group, ipif, fmode, src);
2769 
2770 	IPSQ_EXIT(ipsq);
2771 
2772 	ipif_refrele(ipif);
2773 	return (err);
2774 }
2775 
2776 /*
2777  * Handle the following optmgmt:
2778  *	IPV6_JOIN_GROUP			must not have joined already
2779  *	MCAST_JOIN_GROUP		must not have joined already
2780  *	MCAST_BLOCK_SOURCE		must have joined already
2781  *	MCAST_JOIN_SOURCE_GROUP		may have joined already
2782  *
2783  * fmode and src parameters may be used to determine which option is
2784  * being set, as follows (IPV6_JOIN_GROUP and MCAST_JOIN_GROUP options
2785  * are functionally equivalent):
2786  *	opt			fmode			v6src
2787  *	IPV6_JOIN_GROUP		MODE_IS_EXCLUDE		unspecified
2788  *	MCAST_JOIN_GROUP	MODE_IS_EXCLUDE		unspecified
2789  *	MCAST_BLOCK_SOURCE	MODE_IS_EXCLUDE		v6 addr
2790  *	MCAST_JOIN_SOURCE_GROUP	MODE_IS_INCLUDE		v6 addr
2791  *
2792  * Changing the filter mode is not allowed; if a matching ilg already
2793  * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
2794  *
2795  * Verifies that there is a source address of appropriate scope for
2796  * the group; if not, EADDRNOTAVAIL is returned.
2797  *
2798  * Handles IPv4-mapped IPv6 multicast addresses by associating them
2799  * with the link-local ipif.  Assumes that if v6group is v4-mapped,
2800  * v6src is also v4-mapped.
2801  */
2802 int
2803 ip_opt_add_group_v6(conn_t *connp, boolean_t checkonly,
2804     const in6_addr_t *v6group, int ifindex, mcast_record_t fmode,
2805     const in6_addr_t *v6src, mblk_t *first_mp)
2806 {
2807 	ill_t *ill;
2808 	ipif_t	*ipif;
2809 	char buf[INET6_ADDRSTRLEN];
2810 	ipaddr_t v4group, v4src;
2811 	boolean_t isv6;
2812 	ipsq_t	*ipsq;
2813 	int	err;
2814 
2815 	err = ip_opt_check_v6(connp, v6group, &v4group, v6src, &v4src, &isv6,
2816 	    ifindex, first_mp, ip_restart_optmgmt, &ill, &ipif);
2817 	if (err != 0) {
2818 		if (err != EINPROGRESS) {
2819 			ip1dbg(("ip_opt_add_group_v6: no ill for group %s/"
2820 			    "index %d\n", inet_ntop(AF_INET6, v6group, buf,
2821 			    sizeof (buf)), ifindex));
2822 		}
2823 		return (err);
2824 	}
2825 	ASSERT((!isv6 && ipif != NULL) || (isv6 && ill != NULL));
2826 
2827 	/* operation is not supported on the virtual network interface */
2828 	if (isv6) {
2829 		if (IS_VNI(ill)) {
2830 			ill_refrele(ill);
2831 			return (EINVAL);
2832 		}
2833 	} else {
2834 		if (IS_VNI(ipif->ipif_ill)) {
2835 			ipif_refrele(ipif);
2836 			return (EINVAL);
2837 		}
2838 	}
2839 
2840 	if (checkonly) {
2841 		/*
2842 		 * do not do operation, just pretend to - new T_CHECK
2843 		 * semantics. The error return case above if encountered
2844 		 * considered a good enough "check" here.
2845 		 */
2846 		if (isv6)
2847 			ill_refrele(ill);
2848 		else
2849 			ipif_refrele(ipif);
2850 		return (0);
2851 	}
2852 
2853 	if (!isv6) {
2854 		IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt,
2855 		    ipsq, NEW_OP);
2856 		err = ilg_add(connp, v4group, ipif, fmode, v4src);
2857 		IPSQ_EXIT(ipsq);
2858 		ipif_refrele(ipif);
2859 	} else {
2860 		IPSQ_ENTER_ILL(ill, connp, first_mp, ip_restart_optmgmt,
2861 		    ipsq, NEW_OP);
2862 		err = ilg_add_v6(connp, v6group, ill, fmode, v6src);
2863 		IPSQ_EXIT(ipsq);
2864 		ill_refrele(ill);
2865 	}
2866 
2867 	return (err);
2868 }
2869 
2870 static int
2871 ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group, ipif_t *ipif,
2872     mcast_record_t fmode, ipaddr_t src)
2873 {
2874 	ilg_t	*ilg;
2875 	in6_addr_t v6src;
2876 	boolean_t leaving = B_FALSE;
2877 
2878 	ASSERT(IAM_WRITER_IPIF(ipif));
2879 
2880 	/*
2881 	 * The ilg is valid only while we hold the conn lock. Once we drop
2882 	 * the lock, another thread can locate another ilg on this connp,
2883 	 * but on a different ipif, and delete it, and cause the ilg array
2884 	 * to be reallocated and copied. Hence do the ilg_delete before
2885 	 * dropping the lock.
2886 	 */
2887 	mutex_enter(&connp->conn_lock);
2888 	ilg = ilg_lookup_ipif(connp, group, ipif);
2889 	if ((ilg == NULL) || (ilg->ilg_flags & ILG_DELETED)) {
2890 		mutex_exit(&connp->conn_lock);
2891 		return (EADDRNOTAVAIL);
2892 	}
2893 
2894 	/*
2895 	 * Decide if we're actually deleting the ilg or just removing a
2896 	 * source filter address; if just removing an addr, make sure we
2897 	 * aren't trying to change the filter mode, and that the addr is
2898 	 * actually in our filter list already.  If we're removing the
2899 	 * last src in an include list, just delete the ilg.
2900 	 */
2901 	if (src == INADDR_ANY) {
2902 		v6src = ipv6_all_zeros;
2903 		leaving = B_TRUE;
2904 	} else {
2905 		int err = 0;
2906 		IN6_IPADDR_TO_V4MAPPED(src, &v6src);
2907 		if (fmode != ilg->ilg_fmode)
2908 			err = EINVAL;
2909 		else if (ilg->ilg_filter == NULL ||
2910 		    !list_has_addr(ilg->ilg_filter, &v6src))
2911 			err = EADDRNOTAVAIL;
2912 		if (err != 0) {
2913 			mutex_exit(&connp->conn_lock);
2914 			return (err);
2915 		}
2916 		if (fmode == MODE_IS_INCLUDE &&
2917 		    ilg->ilg_filter->sl_numsrc == 1) {
2918 			v6src = ipv6_all_zeros;
2919 			leaving = B_TRUE;
2920 		}
2921 	}
2922 
2923 	ilg_delete(connp, ilg, &v6src);
2924 	mutex_exit(&connp->conn_lock);
2925 
2926 	(void) ip_delmulti(group, ipif, B_FALSE, leaving);
2927 	return (0);
2928 }
2929 
2930 static int
2931 ip_opt_delete_group_excl_v6(conn_t *connp, const in6_addr_t *v6group,
2932     ill_t *ill, mcast_record_t fmode, const in6_addr_t *v6src)
2933 {
2934 	ilg_t	*ilg;
2935 	boolean_t leaving = B_TRUE;
2936 
2937 	ASSERT(IAM_WRITER_ILL(ill));
2938 
2939 	mutex_enter(&connp->conn_lock);
2940 	ilg = ilg_lookup_ill_v6(connp, v6group, ill);
2941 	if ((ilg == NULL) || (ilg->ilg_flags & ILG_DELETED)) {
2942 		mutex_exit(&connp->conn_lock);
2943 		return (EADDRNOTAVAIL);
2944 	}
2945 
2946 	/*
2947 	 * Decide if we're actually deleting the ilg or just removing a
2948 	 * source filter address; if just removing an addr, make sure we
2949 	 * aren't trying to change the filter mode, and that the addr is
2950 	 * actually in our filter list already.  If we're removing the
2951 	 * last src in an include list, just delete the ilg.
2952 	 */
2953 	if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) {
2954 		int err = 0;
2955 		if (fmode != ilg->ilg_fmode)
2956 			err = EINVAL;
2957 		else if (ilg->ilg_filter == NULL ||
2958 		    !list_has_addr(ilg->ilg_filter, v6src))
2959 			err = EADDRNOTAVAIL;
2960 		if (err != 0) {
2961 			mutex_exit(&connp->conn_lock);
2962 			return (err);
2963 		}
2964 		if (fmode == MODE_IS_INCLUDE &&
2965 		    ilg->ilg_filter->sl_numsrc == 1)
2966 			v6src = NULL;
2967 		else
2968 			leaving = B_FALSE;
2969 	}
2970 
2971 	ilg_delete(connp, ilg, v6src);
2972 	mutex_exit(&connp->conn_lock);
2973 	(void) ip_delmulti_v6(v6group, ill, connp->conn_zoneid, B_FALSE,
2974 	    leaving);
2975 
2976 	return (0);
2977 }
2978 
2979 /*
2980  * Handle the following optmgmt:
2981  *	IP_DROP_MEMBERSHIP		will leave
2982  *	MCAST_LEAVE_GROUP		will leave
2983  *	IP_UNBLOCK_SOURCE		will not leave
2984  *	MCAST_UNBLOCK_SOURCE		will not leave
2985  *	IP_LEAVE_SOURCE_GROUP		may leave (if leaving last source)
2986  *	MCAST_LEAVE_SOURCE_GROUP	may leave (if leaving last source)
2987  *
2988  * fmode and src parameters may be used to determine which option is
2989  * being set, as follows (the IP_* and MCAST_* versions of each option
2990  * are functionally equivalent):
2991  *	opt			 fmode			src
2992  *	IP_DROP_MEMBERSHIP	 MODE_IS_INCLUDE	INADDR_ANY
2993  *	MCAST_LEAVE_GROUP	 MODE_IS_INCLUDE	INADDR_ANY
2994  *	IP_UNBLOCK_SOURCE	 MODE_IS_EXCLUDE	v4 addr
2995  *	MCAST_UNBLOCK_SOURCE	 MODE_IS_EXCLUDE	v4 addr
2996  *	IP_LEAVE_SOURCE_GROUP	 MODE_IS_INCLUDE	v4 addr
2997  *	MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE	v4 addr
2998  *
2999  * Changing the filter mode is not allowed; if a matching ilg already
3000  * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
3001  *
3002  * The interface to be used may be identified by an address or by an
3003  * index.  A pointer to the index is passed; if it is NULL, use the
3004  * address, otherwise, use the index.
3005  */
3006 int
3007 ip_opt_delete_group(conn_t *connp, boolean_t checkonly, ipaddr_t group,
3008     ipaddr_t ifaddr, uint_t *ifindexp, mcast_record_t fmode, ipaddr_t src,
3009     mblk_t *first_mp)
3010 {
3011 	ipif_t	*ipif;
3012 	ipsq_t	*ipsq;
3013 	int	err;
3014 	ill_t	*ill;
3015 
3016 	err = ip_opt_check(connp, group, src, ifaddr, ifindexp, first_mp,
3017 	    ip_restart_optmgmt, &ipif);
3018 	if (err != 0) {
3019 		if (err != EINPROGRESS) {
3020 			ip1dbg(("ip_opt_delete_group: no ipif for group "
3021 			    "0x%x, ifaddr 0x%x\n",
3022 			    (int)ntohl(group), (int)ntohl(ifaddr)));
3023 		}
3024 		return (err);
3025 	}
3026 	ASSERT(ipif != NULL);
3027 
3028 	ill = ipif->ipif_ill;
3029 	/* Operation not supported on a virtual network interface */
3030 	if (IS_VNI(ill)) {
3031 		ipif_refrele(ipif);
3032 		return (EINVAL);
3033 	}
3034 
3035 	if (checkonly) {
3036 		/*
3037 		 * do not do operation, just pretend to - new T_CHECK
3038 		 * semantics. The error return case above if encountered
3039 		 * considered a good enough "check" here.
3040 		 */
3041 		ipif_refrele(ipif);
3042 		return (0);
3043 	}
3044 
3045 	IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, ipsq,
3046 	    NEW_OP);
3047 	err = ip_opt_delete_group_excl(connp, group, ipif, fmode, src);
3048 	IPSQ_EXIT(ipsq);
3049 
3050 	ipif_refrele(ipif);
3051 	return (err);
3052 }
3053 
3054 /*
3055  * Handle the following optmgmt:
3056  *	IPV6_LEAVE_GROUP		will leave
3057  *	MCAST_LEAVE_GROUP		will leave
3058  *	MCAST_UNBLOCK_SOURCE		will not leave
3059  *	MCAST_LEAVE_SOURCE_GROUP	may leave (if leaving last source)
3060  *
3061  * fmode and src parameters may be used to determine which option is
3062  * being set, as follows (IPV6_LEAVE_GROUP and MCAST_LEAVE_GROUP options
3063  * are functionally equivalent):
3064  *	opt			 fmode			v6src
3065  *	IPV6_LEAVE_GROUP	 MODE_IS_INCLUDE	unspecified
3066  *	MCAST_LEAVE_GROUP	 MODE_IS_INCLUDE	unspecified
3067  *	MCAST_UNBLOCK_SOURCE	 MODE_IS_EXCLUDE	v6 addr
3068  *	MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE	v6 addr
3069  *
3070  * Changing the filter mode is not allowed; if a matching ilg already
3071  * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
3072  *
3073  * Handles IPv4-mapped IPv6 multicast addresses by associating them
3074  * with the link-local ipif.  Assumes that if v6group is v4-mapped,
3075  * v6src is also v4-mapped.
3076  */
3077 int
3078 ip_opt_delete_group_v6(conn_t *connp, boolean_t checkonly,
3079     const in6_addr_t *v6group, int ifindex, mcast_record_t fmode,
3080     const in6_addr_t *v6src, mblk_t *first_mp)
3081 {
3082 	ill_t *ill;
3083 	ipif_t	*ipif;
3084 	char	buf[INET6_ADDRSTRLEN];
3085 	ipaddr_t v4group, v4src;
3086 	boolean_t isv6;
3087 	ipsq_t	*ipsq;
3088 	int	err;
3089 
3090 	err = ip_opt_check_v6(connp, v6group, &v4group, v6src, &v4src, &isv6,
3091 	    ifindex, first_mp, ip_restart_optmgmt, &ill, &ipif);
3092 	if (err != 0) {
3093 		if (err != EINPROGRESS) {
3094 			ip1dbg(("ip_opt_delete_group_v6: no ill for group %s/"
3095 			    "index %d\n", inet_ntop(AF_INET6, v6group, buf,
3096 			    sizeof (buf)), ifindex));
3097 		}
3098 		return (err);
3099 	}
3100 	ASSERT((isv6 && ill != NULL) || (!isv6 && ipif != NULL));
3101 
3102 	/* operation is not supported on the virtual network interface */
3103 	if (isv6) {
3104 		if (IS_VNI(ill)) {
3105 			ill_refrele(ill);
3106 			return (EINVAL);
3107 		}
3108 	} else {
3109 		if (IS_VNI(ipif->ipif_ill)) {
3110 			ipif_refrele(ipif);
3111 			return (EINVAL);
3112 		}
3113 	}
3114 
3115 	if (checkonly) {
3116 		/*
3117 		 * do not do operation, just pretend to - new T_CHECK
3118 		 * semantics. The error return case above if encountered
3119 		 * considered a good enough "check" here.
3120 		 */
3121 		if (isv6)
3122 			ill_refrele(ill);
3123 		else
3124 			ipif_refrele(ipif);
3125 		return (0);
3126 	}
3127 
3128 	if (!isv6) {
3129 		IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt,
3130 		    ipsq, NEW_OP);
3131 		err = ip_opt_delete_group_excl(connp, v4group, ipif, fmode,
3132 		    v4src);
3133 		IPSQ_EXIT(ipsq);
3134 		ipif_refrele(ipif);
3135 	} else {
3136 		IPSQ_ENTER_ILL(ill, connp, first_mp, ip_restart_optmgmt,
3137 		    ipsq, NEW_OP);
3138 		err = ip_opt_delete_group_excl_v6(connp, v6group, ill, fmode,
3139 		    v6src);
3140 		IPSQ_EXIT(ipsq);
3141 		ill_refrele(ill);
3142 	}
3143 
3144 	return (err);
3145 }
3146 
3147 /*
3148  * Group mgmt for upper conn that passes things down
3149  * to the interface multicast list (and DLPI)
3150  * These routines can handle new style options that specify an interface name
3151  * as opposed to an interface address (needed for general handling of
3152  * unnumbered interfaces.)
3153  */
3154 
3155 /*
3156  * Add a group to an upper conn group data structure and pass things down
3157  * to the interface multicast list (and DLPI)
3158  */
3159 static int
3160 ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif, mcast_record_t fmode,
3161     ipaddr_t src)
3162 {
3163 	int	error = 0;
3164 	ill_t	*ill;
3165 	ilg_t	*ilg;
3166 	ilg_stat_t ilgstat;
3167 	slist_t	*new_filter = NULL;
3168 	int	new_fmode;
3169 
3170 	ASSERT(IAM_WRITER_IPIF(ipif));
3171 
3172 	ill = ipif->ipif_ill;
3173 
3174 	if (!(ill->ill_flags & ILLF_MULTICAST))
3175 		return (EADDRNOTAVAIL);
3176 
3177 	/*
3178 	 * conn_ilg[] is protected by conn_lock. Need to hold the conn_lock
3179 	 * to walk the conn_ilg[] list in ilg_lookup_ipif(); also needed to
3180 	 * serialize 2 threads doing join (sock, group1, hme0:0) and
3181 	 * (sock, group2, hme1:0) where hme0 and hme1 map to different ipsqs,
3182 	 * but both operations happen on the same conn.
3183 	 */
3184 	mutex_enter(&connp->conn_lock);
3185 	ilg = ilg_lookup_ipif(connp, group, ipif);
3186 
3187 	/*
3188 	 * Depending on the option we're handling, may or may not be okay
3189 	 * if group has already been added.  Figure out our rules based
3190 	 * on fmode and src params.  Also make sure there's enough room
3191 	 * in the filter if we're adding a source to an existing filter.
3192 	 */
3193 	if (src == INADDR_ANY) {
3194 		/* we're joining for all sources, must not have joined */
3195 		if (ilg != NULL)
3196 			error = EADDRINUSE;
3197 	} else {
3198 		if (fmode == MODE_IS_EXCLUDE) {
3199 			/* (excl {addr}) => block source, must have joined */
3200 			if (ilg == NULL)
3201 				error = EADDRNOTAVAIL;
3202 		}
3203 		/* (incl {addr}) => join source, may have joined */
3204 
3205 		if (ilg != NULL &&
3206 		    SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE)
3207 			error = ENOBUFS;
3208 	}
3209 	if (error != 0) {
3210 		mutex_exit(&connp->conn_lock);
3211 		return (error);
3212 	}
3213 
3214 	ASSERT(!(ipif->ipif_state_flags & IPIF_CONDEMNED));
3215 
3216 	/*
3217 	 * Alloc buffer to copy new state into (see below) before
3218 	 * we make any changes, so we can bail if it fails.
3219 	 */
3220 	if ((new_filter = l_alloc()) == NULL) {
3221 		mutex_exit(&connp->conn_lock);
3222 		return (ENOMEM);
3223 	}
3224 
3225 	if (ilg == NULL) {
3226 		ilgstat = ILGSTAT_NEW;
3227 		if ((ilg = conn_ilg_alloc(connp, &error)) == NULL) {
3228 			mutex_exit(&connp->conn_lock);
3229 			l_free(new_filter);
3230 			return (error);
3231 		}
3232 		if (src != INADDR_ANY) {
3233 			ilg->ilg_filter = l_alloc();
3234 			if (ilg->ilg_filter == NULL) {
3235 				ilg_delete(connp, ilg, NULL);
3236 				mutex_exit(&connp->conn_lock);
3237 				l_free(new_filter);
3238 				return (ENOMEM);
3239 			}
3240 			ilg->ilg_filter->sl_numsrc = 1;
3241 			IN6_IPADDR_TO_V4MAPPED(src,
3242 			    &ilg->ilg_filter->sl_addr[0]);
3243 		}
3244 		if (group == INADDR_ANY) {
3245 			ilg->ilg_v6group = ipv6_all_zeros;
3246 		} else {
3247 			IN6_IPADDR_TO_V4MAPPED(group, &ilg->ilg_v6group);
3248 		}
3249 		ilg->ilg_ipif = ipif;
3250 		ilg->ilg_ill = NULL;
3251 		ilg->ilg_fmode = fmode;
3252 	} else {
3253 		int index;
3254 		in6_addr_t v6src;
3255 		ilgstat = ILGSTAT_CHANGE;
3256 		if (ilg->ilg_fmode != fmode || src == INADDR_ANY) {
3257 			mutex_exit(&connp->conn_lock);
3258 			l_free(new_filter);
3259 			return (EINVAL);
3260 		}
3261 		if (ilg->ilg_filter == NULL) {
3262 			ilg->ilg_filter = l_alloc();
3263 			if (ilg->ilg_filter == NULL) {
3264 				mutex_exit(&connp->conn_lock);
3265 				l_free(new_filter);
3266 				return (ENOMEM);
3267 			}
3268 		}
3269 		IN6_IPADDR_TO_V4MAPPED(src, &v6src);
3270 		if (list_has_addr(ilg->ilg_filter, &v6src)) {
3271 			mutex_exit(&connp->conn_lock);
3272 			l_free(new_filter);
3273 			return (EADDRNOTAVAIL);
3274 		}
3275 		index = ilg->ilg_filter->sl_numsrc++;
3276 		ilg->ilg_filter->sl_addr[index] = v6src;
3277 	}
3278 
3279 	/*
3280 	 * Save copy of ilg's filter state to pass to other functions,
3281 	 * so we can release conn_lock now.
3282 	 */
3283 	new_fmode = ilg->ilg_fmode;
3284 	l_copy(ilg->ilg_filter, new_filter);
3285 
3286 	mutex_exit(&connp->conn_lock);
3287 
3288 	error = ip_addmulti(group, ipif, ilgstat, new_fmode, new_filter);
3289 	if (error != 0) {
3290 		/*
3291 		 * Need to undo what we did before calling ip_addmulti()!
3292 		 * Must look up the ilg again since we've not been holding
3293 		 * conn_lock.
3294 		 */
3295 		in6_addr_t v6src;
3296 		if (ilgstat == ILGSTAT_NEW)
3297 			v6src = ipv6_all_zeros;
3298 		else
3299 			IN6_IPADDR_TO_V4MAPPED(src, &v6src);
3300 		mutex_enter(&connp->conn_lock);
3301 		ilg = ilg_lookup_ipif(connp, group, ipif);
3302 		ASSERT(ilg != NULL);
3303 		ilg_delete(connp, ilg, &v6src);
3304 		mutex_exit(&connp->conn_lock);
3305 		l_free(new_filter);
3306 		return (error);
3307 	}
3308 
3309 	l_free(new_filter);
3310 	return (0);
3311 }
3312 
3313 static int
3314 ilg_add_v6(conn_t *connp, const in6_addr_t *v6group, ill_t *ill,
3315     mcast_record_t fmode, const in6_addr_t *v6src)
3316 {
3317 	int	error = 0;
3318 	ilg_t	*ilg;
3319 	ilg_stat_t ilgstat;
3320 	slist_t	*new_filter = NULL;
3321 	int	new_fmode;
3322 
3323 	ASSERT(IAM_WRITER_ILL(ill));
3324 
3325 	if (!(ill->ill_flags & ILLF_MULTICAST))
3326 		return (EADDRNOTAVAIL);
3327 
3328 	/*
3329 	 * conn_lock protects the ilg list.  Serializes 2 threads doing
3330 	 * join (sock, group1, hme0) and (sock, group2, hme1) where hme0
3331 	 * and hme1 map to different ipsq's, but both operations happen
3332 	 * on the same conn.
3333 	 */
3334 	mutex_enter(&connp->conn_lock);
3335 
3336 	ilg = ilg_lookup_ill_v6(connp, v6group, ill);
3337 
3338 	/*
3339 	 * Depending on the option we're handling, may or may not be okay
3340 	 * if group has already been added.  Figure out our rules based
3341 	 * on fmode and src params.  Also make sure there's enough room
3342 	 * in the filter if we're adding a source to an existing filter.
3343 	 */
3344 	if (IN6_IS_ADDR_UNSPECIFIED(v6src)) {
3345 		/* we're joining for all sources, must not have joined */
3346 		if (ilg != NULL)
3347 			error = EADDRINUSE;
3348 	} else {
3349 		if (fmode == MODE_IS_EXCLUDE) {
3350 			/* (excl {addr}) => block source, must have joined */
3351 			if (ilg == NULL)
3352 				error = EADDRNOTAVAIL;
3353 		}
3354 		/* (incl {addr}) => join source, may have joined */
3355 
3356 		if (ilg != NULL &&
3357 		    SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE)
3358 			error = ENOBUFS;
3359 	}
3360 	if (error != 0) {
3361 		mutex_exit(&connp->conn_lock);
3362 		return (error);
3363 	}
3364 
3365 	/*
3366 	 * Alloc buffer to copy new state into (see below) before
3367 	 * we make any changes, so we can bail if it fails.
3368 	 */
3369 	if ((new_filter = l_alloc()) == NULL) {
3370 		mutex_exit(&connp->conn_lock);
3371 		return (ENOMEM);
3372 	}
3373 
3374 	if (ilg == NULL) {
3375 		if ((ilg = conn_ilg_alloc(connp, &error)) == NULL) {
3376 			mutex_exit(&connp->conn_lock);
3377 			l_free(new_filter);
3378 			return (error);
3379 		}
3380 		if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) {
3381 			ilg->ilg_filter = l_alloc();
3382 			if (ilg->ilg_filter == NULL) {
3383 				ilg_delete(connp, ilg, NULL);
3384 				mutex_exit(&connp->conn_lock);
3385 				l_free(new_filter);
3386 				return (ENOMEM);
3387 			}
3388 			ilg->ilg_filter->sl_numsrc = 1;
3389 			ilg->ilg_filter->sl_addr[0] = *v6src;
3390 		}
3391 		ilgstat = ILGSTAT_NEW;
3392 		ilg->ilg_v6group = *v6group;
3393 		ilg->ilg_fmode = fmode;
3394 		ilg->ilg_ipif = NULL;
3395 		ilg->ilg_ill = ill;
3396 	} else {
3397 		int index;
3398 		if (ilg->ilg_fmode != fmode || IN6_IS_ADDR_UNSPECIFIED(v6src)) {
3399 			mutex_exit(&connp->conn_lock);
3400 			l_free(new_filter);
3401 			return (EINVAL);
3402 		}
3403 		if (ilg->ilg_filter == NULL) {
3404 			ilg->ilg_filter = l_alloc();
3405 			if (ilg->ilg_filter == NULL) {
3406 				mutex_exit(&connp->conn_lock);
3407 				l_free(new_filter);
3408 				return (ENOMEM);
3409 			}
3410 		}
3411 		if (list_has_addr(ilg->ilg_filter, v6src)) {
3412 			mutex_exit(&connp->conn_lock);
3413 			l_free(new_filter);
3414 			return (EADDRNOTAVAIL);
3415 		}
3416 		ilgstat = ILGSTAT_CHANGE;
3417 		index = ilg->ilg_filter->sl_numsrc++;
3418 		ilg->ilg_filter->sl_addr[index] = *v6src;
3419 	}
3420 
3421 	/*
3422 	 * Save copy of ilg's filter state to pass to other functions,
3423 	 * so we can release conn_lock now.
3424 	 */
3425 	new_fmode = ilg->ilg_fmode;
3426 	l_copy(ilg->ilg_filter, new_filter);
3427 
3428 	mutex_exit(&connp->conn_lock);
3429 
3430 	/*
3431 	 * Now update the ill. We wait to do this until after the ilg
3432 	 * has been updated because we need to update the src filter
3433 	 * info for the ill, which involves looking at the status of
3434 	 * all the ilgs associated with this group/interface pair.
3435 	 */
3436 	error = ip_addmulti_v6(v6group, ill, connp->conn_zoneid, ilgstat,
3437 	    new_fmode, new_filter);
3438 	if (error != 0) {
3439 		/*
3440 		 * But because we waited, we have to undo the ilg update
3441 		 * if ip_addmulti_v6() fails.  We also must lookup ilg
3442 		 * again, since we've not been holding conn_lock.
3443 		 */
3444 		in6_addr_t delsrc =
3445 		    (ilgstat == ILGSTAT_NEW) ? ipv6_all_zeros : *v6src;
3446 		mutex_enter(&connp->conn_lock);
3447 		ilg = ilg_lookup_ill_v6(connp, v6group, ill);
3448 		ASSERT(ilg != NULL);
3449 		ilg_delete(connp, ilg, &delsrc);
3450 		mutex_exit(&connp->conn_lock);
3451 		l_free(new_filter);
3452 		return (error);
3453 	}
3454 
3455 	l_free(new_filter);
3456 
3457 	return (0);
3458 }
3459 
3460 /*
3461  * Find an IPv4 ilg matching group, ill and source
3462  */
3463 ilg_t *
3464 ilg_lookup_ill_withsrc(conn_t *connp, ipaddr_t group, ipaddr_t src, ill_t *ill)
3465 {
3466 	in6_addr_t v6group, v6src;
3467 	int i;
3468 	boolean_t isinlist;
3469 	ilg_t *ilg;
3470 	ipif_t *ipif;
3471 	ill_t *ilg_ill;
3472 
3473 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3474 
3475 	/*
3476 	 * INADDR_ANY is represented as the IPv6 unspecified addr.
3477 	 */
3478 	if (group == INADDR_ANY)
3479 		v6group = ipv6_all_zeros;
3480 	else
3481 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
3482 
3483 	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3484 		ilg = &connp->conn_ilg[i];
3485 		if ((ipif = ilg->ilg_ipif) == NULL ||
3486 		    (ilg->ilg_flags & ILG_DELETED) != 0)
3487 			continue;
3488 		ASSERT(ilg->ilg_ill == NULL);
3489 		ilg_ill = ipif->ipif_ill;
3490 		ASSERT(!ilg_ill->ill_isv6);
3491 		if (IS_ON_SAME_LAN(ilg_ill, ill) &&
3492 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, &v6group)) {
3493 			if (SLIST_IS_EMPTY(ilg->ilg_filter)) {
3494 				/* no source filter, so this is a match */
3495 				return (ilg);
3496 			}
3497 			break;
3498 		}
3499 	}
3500 	if (i == connp->conn_ilg_inuse)
3501 		return (NULL);
3502 
3503 	/*
3504 	 * we have an ilg with matching ill and group; but
3505 	 * the ilg has a source list that we must check.
3506 	 */
3507 	IN6_IPADDR_TO_V4MAPPED(src, &v6src);
3508 	isinlist = B_FALSE;
3509 	for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) {
3510 		if (IN6_ARE_ADDR_EQUAL(&v6src, &ilg->ilg_filter->sl_addr[i])) {
3511 			isinlist = B_TRUE;
3512 			break;
3513 		}
3514 	}
3515 
3516 	if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) ||
3517 	    (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE))
3518 		return (ilg);
3519 
3520 	return (NULL);
3521 }
3522 
3523 /*
3524  * Find an IPv6 ilg matching group, ill, and source
3525  */
3526 ilg_t *
3527 ilg_lookup_ill_withsrc_v6(conn_t *connp, const in6_addr_t *v6group,
3528     const in6_addr_t *v6src, ill_t *ill)
3529 {
3530 	int i;
3531 	boolean_t isinlist;
3532 	ilg_t *ilg;
3533 	ill_t *ilg_ill;
3534 
3535 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3536 
3537 	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3538 		ilg = &connp->conn_ilg[i];
3539 		if ((ilg_ill = ilg->ilg_ill) == NULL ||
3540 		    (ilg->ilg_flags & ILG_DELETED) != 0)
3541 			continue;
3542 		ASSERT(ilg->ilg_ipif == NULL);
3543 		ASSERT(ilg_ill->ill_isv6);
3544 		if (IS_ON_SAME_LAN(ilg_ill, ill) &&
3545 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) {
3546 			if (SLIST_IS_EMPTY(ilg->ilg_filter)) {
3547 				/* no source filter, so this is a match */
3548 				return (ilg);
3549 			}
3550 			break;
3551 		}
3552 	}
3553 	if (i == connp->conn_ilg_inuse)
3554 		return (NULL);
3555 
3556 	/*
3557 	 * we have an ilg with matching ill and group; but
3558 	 * the ilg has a source list that we must check.
3559 	 */
3560 	isinlist = B_FALSE;
3561 	for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) {
3562 		if (IN6_ARE_ADDR_EQUAL(v6src, &ilg->ilg_filter->sl_addr[i])) {
3563 			isinlist = B_TRUE;
3564 			break;
3565 		}
3566 	}
3567 
3568 	if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) ||
3569 	    (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE))
3570 		return (ilg);
3571 
3572 	return (NULL);
3573 }
3574 
3575 /*
3576  * Find an IPv6 ilg matching group and ill
3577  */
3578 ilg_t *
3579 ilg_lookup_ill_v6(conn_t *connp, const in6_addr_t *v6group, ill_t *ill)
3580 {
3581 	ilg_t	*ilg;
3582 	int	i;
3583 	ill_t 	*mem_ill;
3584 
3585 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3586 
3587 	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3588 		ilg = &connp->conn_ilg[i];
3589 		if ((mem_ill = ilg->ilg_ill) == NULL ||
3590 		    (ilg->ilg_flags & ILG_DELETED) != 0)
3591 			continue;
3592 		ASSERT(ilg->ilg_ipif == NULL);
3593 		ASSERT(mem_ill->ill_isv6);
3594 		if (mem_ill == ill &&
3595 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group))
3596 			return (ilg);
3597 	}
3598 	return (NULL);
3599 }
3600 
3601 /*
3602  * Find an IPv4 ilg matching group and ipif
3603  */
3604 static ilg_t *
3605 ilg_lookup_ipif(conn_t *connp, ipaddr_t group, ipif_t *ipif)
3606 {
3607 	in6_addr_t v6group;
3608 	int	i;
3609 	ilg_t	*ilg;
3610 
3611 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3612 	ASSERT(!ipif->ipif_ill->ill_isv6);
3613 
3614 	if (group == INADDR_ANY)
3615 		v6group = ipv6_all_zeros;
3616 	else
3617 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
3618 
3619 	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3620 		ilg = &connp->conn_ilg[i];
3621 		if ((ilg->ilg_flags & ILG_DELETED) == 0 &&
3622 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, &v6group) &&
3623 		    ilg->ilg_ipif == ipif)
3624 			return (ilg);
3625 	}
3626 	return (NULL);
3627 }
3628 
3629 /*
3630  * If a source address is passed in (src != NULL and src is not
3631  * unspecified), remove the specified src addr from the given ilg's
3632  * filter list, else delete the ilg.
3633  */
3634 static void
3635 ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src)
3636 {
3637 	int	i;
3638 
3639 	ASSERT((ilg->ilg_ipif != NULL) ^ (ilg->ilg_ill != NULL));
3640 	ASSERT(ilg->ilg_ipif == NULL || IAM_WRITER_IPIF(ilg->ilg_ipif));
3641 	ASSERT(ilg->ilg_ill == NULL || IAM_WRITER_ILL(ilg->ilg_ill));
3642 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3643 	ASSERT(!(ilg->ilg_flags & ILG_DELETED));
3644 
3645 	if (src == NULL || IN6_IS_ADDR_UNSPECIFIED(src)) {
3646 		if (connp->conn_ilg_walker_cnt != 0) {
3647 			ilg->ilg_flags |= ILG_DELETED;
3648 			return;
3649 		}
3650 
3651 		FREE_SLIST(ilg->ilg_filter);
3652 
3653 		i = ilg - &connp->conn_ilg[0];
3654 		ASSERT(i >= 0 && i < connp->conn_ilg_inuse);
3655 
3656 		/* Move other entries up one step */
3657 		connp->conn_ilg_inuse--;
3658 		for (; i < connp->conn_ilg_inuse; i++)
3659 			connp->conn_ilg[i] = connp->conn_ilg[i+1];
3660 
3661 		if (connp->conn_ilg_inuse == 0) {
3662 			mi_free((char *)connp->conn_ilg);
3663 			connp->conn_ilg = NULL;
3664 			cv_broadcast(&connp->conn_refcv);
3665 		}
3666 	} else {
3667 		l_remove(ilg->ilg_filter, src);
3668 	}
3669 }
3670 
3671 /*
3672  * Called from conn close. No new ilg can be added or removed.
3673  * because CONN_CLOSING has been set by ip_close. ilg_add / ilg_delete
3674  * will return error if conn has started closing.
3675  */
3676 void
3677 ilg_delete_all(conn_t *connp)
3678 {
3679 	int	i;
3680 	ipif_t	*ipif = NULL;
3681 	ill_t	*ill = NULL;
3682 	ilg_t	*ilg;
3683 	in6_addr_t v6group;
3684 	boolean_t success;
3685 	ipsq_t	*ipsq;
3686 
3687 	mutex_enter(&connp->conn_lock);
3688 retry:
3689 	ILG_WALKER_HOLD(connp);
3690 	for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) {
3691 		ilg = &connp->conn_ilg[i];
3692 		/*
3693 		 * Since this walk is not atomic (we drop the
3694 		 * conn_lock and wait in ipsq_enter) we need
3695 		 * to check for the ILG_DELETED flag.
3696 		 */
3697 		if (ilg->ilg_flags & ILG_DELETED)
3698 			continue;
3699 
3700 		if (IN6_IS_ADDR_V4MAPPED(&ilg->ilg_v6group)) {
3701 			ipif = ilg->ilg_ipif;
3702 			ill = ipif->ipif_ill;
3703 		} else {
3704 			ipif = NULL;
3705 			ill = ilg->ilg_ill;
3706 		}
3707 
3708 		/*
3709 		 * We may not be able to refhold the ill if the ill/ipif
3710 		 * is changing. But we need to make sure that the ill will
3711 		 * not vanish. So we just bump up the ill_waiter count.
3712 		 * If we are unable to do even that, then the ill is closing,
3713 		 * in which case the unplumb thread will handle the cleanup,
3714 		 * and we move on to the next ilg.
3715 		 */
3716 		if (!ill_waiter_inc(ill))
3717 			continue;
3718 
3719 		mutex_exit(&connp->conn_lock);
3720 		/*
3721 		 * To prevent deadlock between ill close which waits inside
3722 		 * the perimeter, and conn close, ipsq_enter returns error,
3723 		 * the moment ILL_CONDEMNED is set, in which case ill close
3724 		 * takes responsibility to cleanup the ilgs. Note that we
3725 		 * have not yet set condemned flag, otherwise the conn can't
3726 		 * be refheld for cleanup by those routines and it would be
3727 		 * a mutual deadlock.
3728 		 */
3729 		success = ipsq_enter(ill, B_FALSE, NEW_OP);
3730 		ipsq = ill->ill_phyint->phyint_ipsq;
3731 		ill_waiter_dcr(ill);
3732 		mutex_enter(&connp->conn_lock);
3733 		if (!success)
3734 			continue;
3735 
3736 		/*
3737 		 * Move on if the ilg was deleted while conn_lock was dropped.
3738 		 */
3739 		if (ilg->ilg_flags & ILG_DELETED) {
3740 			mutex_exit(&connp->conn_lock);
3741 			ipsq_exit(ipsq);
3742 			mutex_enter(&connp->conn_lock);
3743 			continue;
3744 		}
3745 		v6group = ilg->ilg_v6group;
3746 		ilg_delete(connp, ilg, NULL);
3747 		mutex_exit(&connp->conn_lock);
3748 
3749 		if (ipif != NULL) {
3750 			(void) ip_delmulti(V4_PART_OF_V6(v6group), ipif,
3751 			    B_FALSE, B_TRUE);
3752 		} else {
3753 			(void) ip_delmulti_v6(&v6group, ill,
3754 			    connp->conn_zoneid, B_FALSE, B_TRUE);
3755 		}
3756 		ipsq_exit(ipsq);
3757 		mutex_enter(&connp->conn_lock);
3758 	}
3759 	ILG_WALKER_RELE(connp);
3760 
3761 	/* If any ill was skipped above wait and retry */
3762 	if (connp->conn_ilg_inuse != 0) {
3763 		cv_wait(&connp->conn_refcv, &connp->conn_lock);
3764 		goto retry;
3765 	}
3766 	mutex_exit(&connp->conn_lock);
3767 }
3768 
3769 /*
3770  * Called from ill close by ipcl_walk for clearing conn_ilg and
3771  * conn_multicast_ipif for a given ipif. conn is held by caller.
3772  * Note that ipcl_walk only walks conns that are not yet condemned.
3773  * condemned conns can't be refheld. For this reason, conn must become clean
3774  * first, i.e. it must not refer to any ill/ire/ipif and then only set
3775  * condemned flag.
3776  */
3777 static void
3778 conn_delete_ipif(conn_t *connp, caddr_t arg)
3779 {
3780 	ipif_t	*ipif = (ipif_t *)arg;
3781 	int	i;
3782 	char	group_buf1[INET6_ADDRSTRLEN];
3783 	char	group_buf2[INET6_ADDRSTRLEN];
3784 	ipaddr_t group;
3785 	ilg_t	*ilg;
3786 
3787 	/*
3788 	 * Even though conn_ilg_inuse can change while we are in this loop,
3789 	 * i.e.ilgs can be created or deleted on this connp, no new ilgs can
3790 	 * be created or deleted for this connp, on this ill, since this ill
3791 	 * is the perimeter. So we won't miss any ilg in this cleanup.
3792 	 */
3793 	mutex_enter(&connp->conn_lock);
3794 
3795 	/*
3796 	 * Increment the walker count, so that ilg repacking does not
3797 	 * occur while we are in the loop.
3798 	 */
3799 	ILG_WALKER_HOLD(connp);
3800 	for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) {
3801 		ilg = &connp->conn_ilg[i];
3802 		if (ilg->ilg_ipif != ipif || (ilg->ilg_flags & ILG_DELETED))
3803 			continue;
3804 		/*
3805 		 * ip_close cannot be cleaning this ilg at the same time.
3806 		 * since it also has to execute in this ill's perimeter which
3807 		 * we are now holding. Only a clean conn can be condemned.
3808 		 */
3809 		ASSERT(!(connp->conn_state_flags & CONN_CONDEMNED));
3810 
3811 		/* Blow away the membership */
3812 		ip1dbg(("conn_delete_ilg_ipif: %s on %s (%s)\n",
3813 		    inet_ntop(AF_INET6, &connp->conn_ilg[i].ilg_v6group,
3814 		    group_buf1, sizeof (group_buf1)),
3815 		    inet_ntop(AF_INET6, &ipif->ipif_v6lcl_addr,
3816 		    group_buf2, sizeof (group_buf2)),
3817 		    ipif->ipif_ill->ill_name));
3818 
3819 		/* ilg_ipif is NULL for V6, so we won't be here */
3820 		ASSERT(IN6_IS_ADDR_V4MAPPED(&ilg->ilg_v6group));
3821 
3822 		group = V4_PART_OF_V6(ilg->ilg_v6group);
3823 		ilg_delete(connp, &connp->conn_ilg[i], NULL);
3824 		mutex_exit(&connp->conn_lock);
3825 
3826 		(void) ip_delmulti(group, ipif, B_FALSE, B_TRUE);
3827 		mutex_enter(&connp->conn_lock);
3828 	}
3829 
3830 	/*
3831 	 * If we are the last walker, need to physically delete the
3832 	 * ilgs and repack.
3833 	 */
3834 	ILG_WALKER_RELE(connp);
3835 
3836 	if (connp->conn_multicast_ipif == ipif) {
3837 		/* Revert to late binding */
3838 		connp->conn_multicast_ipif = NULL;
3839 	}
3840 	mutex_exit(&connp->conn_lock);
3841 
3842 	conn_delete_ire(connp, (caddr_t)ipif);
3843 }
3844 
3845 /*
3846  * Called from ill close by ipcl_walk for clearing conn_ilg and
3847  * conn_multicast_ill for a given ill. conn is held by caller.
3848  * Note that ipcl_walk only walks conns that are not yet condemned.
3849  * condemned conns can't be refheld. For this reason, conn must become clean
3850  * first, i.e. it must not refer to any ill/ire/ipif and then only set
3851  * condemned flag.
3852  */
3853 static void
3854 conn_delete_ill(conn_t *connp, caddr_t arg)
3855 {
3856 	ill_t	*ill = (ill_t *)arg;
3857 	int	i;
3858 	char	group_buf[INET6_ADDRSTRLEN];
3859 	in6_addr_t v6group;
3860 	ilg_t	*ilg;
3861 
3862 	/*
3863 	 * Even though conn_ilg_inuse can change while we are in this loop,
3864 	 * no new ilgs can be created/deleted for this connp, on this
3865 	 * ill, since this ill is the perimeter. So we won't miss any ilg
3866 	 * in this cleanup.
3867 	 */
3868 	mutex_enter(&connp->conn_lock);
3869 
3870 	/*
3871 	 * Increment the walker count, so that ilg repacking does not
3872 	 * occur while we are in the loop.
3873 	 */
3874 	ILG_WALKER_HOLD(connp);
3875 	for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) {
3876 		ilg = &connp->conn_ilg[i];
3877 		if ((ilg->ilg_ill == ill) && !(ilg->ilg_flags & ILG_DELETED)) {
3878 			/*
3879 			 * ip_close cannot be cleaning this ilg at the same
3880 			 * time, since it also has to execute in this ill's
3881 			 * perimeter which we are now holding. Only a clean
3882 			 * conn can be condemned.
3883 			 */
3884 			ASSERT(!(connp->conn_state_flags & CONN_CONDEMNED));
3885 
3886 			/* Blow away the membership */
3887 			ip1dbg(("conn_delete_ilg_ill: %s on %s\n",
3888 			    inet_ntop(AF_INET6, &ilg->ilg_v6group,
3889 			    group_buf, sizeof (group_buf)),
3890 			    ill->ill_name));
3891 
3892 			v6group = ilg->ilg_v6group;
3893 			ilg_delete(connp, ilg, NULL);
3894 			mutex_exit(&connp->conn_lock);
3895 
3896 			(void) ip_delmulti_v6(&v6group, ill,
3897 			    connp->conn_zoneid, B_FALSE, B_TRUE);
3898 			mutex_enter(&connp->conn_lock);
3899 		}
3900 	}
3901 	/*
3902 	 * If we are the last walker, need to physically delete the
3903 	 * ilgs and repack.
3904 	 */
3905 	ILG_WALKER_RELE(connp);
3906 
3907 	if (connp->conn_multicast_ill == ill) {
3908 		/* Revert to late binding */
3909 		connp->conn_multicast_ill = NULL;
3910 	}
3911 	mutex_exit(&connp->conn_lock);
3912 }
3913 
3914 /*
3915  * Called when an ipif is unplumbed to make sure that there are no
3916  * dangling conn references to that ipif.
3917  * Handles ilg_ipif and conn_multicast_ipif
3918  */
3919 void
3920 reset_conn_ipif(ipif)
3921 	ipif_t	*ipif;
3922 {
3923 	ip_stack_t	*ipst = ipif->ipif_ill->ill_ipst;
3924 
3925 	ipcl_walk(conn_delete_ipif, (caddr_t)ipif, ipst);
3926 }
3927 
3928 /*
3929  * Called when an ill is unplumbed to make sure that there are no
3930  * dangling conn references to that ill.
3931  * Handles ilg_ill, conn_multicast_ill.
3932  */
3933 void
3934 reset_conn_ill(ill_t *ill)
3935 {
3936 	ip_stack_t	*ipst = ill->ill_ipst;
3937 
3938 	ipcl_walk(conn_delete_ill, (caddr_t)ill, ipst);
3939 }
3940 
3941 #ifdef DEBUG
3942 /*
3943  * Walk functions walk all the interfaces in the system to make
3944  * sure that there is no refernece to the ipif or ill that is
3945  * going away.
3946  */
3947 int
3948 ilm_walk_ill(ill_t *ill)
3949 {
3950 	int cnt = 0;
3951 	ill_t *till;
3952 	ilm_t *ilm;
3953 	ill_walk_context_t ctx;
3954 	ip_stack_t	*ipst = ill->ill_ipst;
3955 
3956 	rw_enter(&ipst->ips_ill_g_lock, RW_READER);
3957 	till = ILL_START_WALK_ALL(&ctx, ipst);
3958 	for (; till != NULL; till = ill_next(&ctx, till)) {
3959 		mutex_enter(&till->ill_lock);
3960 		for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
3961 			if (ilm->ilm_ill == ill) {
3962 				cnt++;
3963 			}
3964 		}
3965 		mutex_exit(&till->ill_lock);
3966 	}
3967 	rw_exit(&ipst->ips_ill_g_lock);
3968 
3969 	return (cnt);
3970 }
3971 
3972 /*
3973  * This function is called before the ipif is freed.
3974  */
3975 int
3976 ilm_walk_ipif(ipif_t *ipif)
3977 {
3978 	int cnt = 0;
3979 	ill_t *till;
3980 	ilm_t *ilm;
3981 	ill_walk_context_t ctx;
3982 	ip_stack_t	*ipst = ipif->ipif_ill->ill_ipst;
3983 
3984 	till = ILL_START_WALK_ALL(&ctx, ipst);
3985 	for (; till != NULL; till = ill_next(&ctx, till)) {
3986 		mutex_enter(&till->ill_lock);
3987 		for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
3988 			if (ilm->ilm_ipif == ipif) {
3989 					cnt++;
3990 			}
3991 		}
3992 		mutex_exit(&till->ill_lock);
3993 	}
3994 	return (cnt);
3995 }
3996 #endif
3997