xref: /illumos-gate/usr/src/uts/common/inet/ip/ip_multi.c (revision 743a77ed89085d3c232c4a2f65ab4e19576839e2)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /* Copyright (c) 1990 Mentat Inc. */
26 
27 #include <sys/types.h>
28 #include <sys/stream.h>
29 #include <sys/dlpi.h>
30 #include <sys/stropts.h>
31 #include <sys/strsun.h>
32 #include <sys/ddi.h>
33 #include <sys/cmn_err.h>
34 #include <sys/sdt.h>
35 #include <sys/zone.h>
36 
37 #include <sys/param.h>
38 #include <sys/socket.h>
39 #include <sys/sockio.h>
40 #include <net/if.h>
41 #include <sys/systm.h>
42 #include <sys/strsubr.h>
43 #include <net/route.h>
44 #include <netinet/in.h>
45 #include <net/if_dl.h>
46 #include <netinet/ip6.h>
47 #include <netinet/icmp6.h>
48 
49 #include <inet/common.h>
50 #include <inet/mi.h>
51 #include <inet/nd.h>
52 #include <inet/arp.h>
53 #include <inet/ip.h>
54 #include <inet/ip6.h>
55 #include <inet/ip_if.h>
56 #include <inet/ip_ndp.h>
57 #include <inet/ip_multi.h>
58 #include <inet/ipclassifier.h>
59 #include <inet/ipsec_impl.h>
60 #include <inet/sctp_ip.h>
61 #include <inet/ip_listutils.h>
62 #include <inet/udp_impl.h>
63 
64 /* igmpv3/mldv2 source filter manipulation */
65 static void	ilm_bld_flists(conn_t *conn, void *arg);
66 static void	ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode,
67     slist_t *flist);
68 
69 static ilm_t	*ilm_add_v6(ipif_t *ipif, const in6_addr_t *group,
70     ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist,
71     zoneid_t zoneid);
72 static void	ilm_delete(ilm_t *ilm);
73 static int	ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *group);
74 static int	ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *group);
75 static ilg_t	*ilg_lookup_ipif(conn_t *connp, ipaddr_t group,
76     ipif_t *ipif);
77 static int	ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif,
78     mcast_record_t fmode, ipaddr_t src);
79 static int	ilg_add_v6(conn_t *connp, const in6_addr_t *group, ill_t *ill,
80     mcast_record_t fmode, const in6_addr_t *v6src);
81 static void	ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src);
82 static mblk_t	*ill_create_dl(ill_t *ill, uint32_t dl_primitive,
83     uint32_t length, uint32_t *addr_lenp, uint32_t *addr_offp);
84 static mblk_t	*ill_create_squery(ill_t *ill, ipaddr_t ipaddr,
85     uint32_t addrlen, uint32_t addroff, mblk_t *mp_tail);
86 static void	conn_ilg_reap(conn_t *connp);
87 static int	ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group,
88     ipif_t *ipif, mcast_record_t fmode, ipaddr_t src);
89 static int	ip_opt_delete_group_excl_v6(conn_t *connp,
90     const in6_addr_t *v6group, ill_t *ill, mcast_record_t fmode,
91     const in6_addr_t *v6src);
92 static void	ill_ilm_walker_hold(ill_t *ill);
93 static void	ill_ilm_walker_rele(ill_t *ill);
94 
95 /*
96  * MT notes:
97  *
98  * Multicast joins operate on both the ilg and ilm structures. Multiple
99  * threads operating on an conn (socket) trying to do multicast joins
100  * need to synchronize when operating on the ilg. Multiple threads
101  * potentially operating on different conn (socket endpoints) trying to
102  * do multicast joins could eventually end up trying to manipulate the
103  * ilm simultaneously and need to synchronize access to the ilm.  Currently,
104  * this is done by synchronizing join/leave via per-phyint ipsq_t
105  * serialization.
106  *
107  * An ilm is an IP data structure used to track multicast join/leave.
108  * An ilm is associated with a <multicast group, ipif> tuple in IPv4 and
109  * with just <multicast group> in IPv6. ilm_refcnt is the number of ilg's
110  * referencing the ilm. ilms are created / destroyed only as writer. ilms
111  * are not passed around, instead they are looked up and used under the
112  * ill_lock or as writer. So we don't need a dynamic refcount of the number
113  * of threads holding reference to an ilm.
114  *
115  * Multicast Join operation:
116  *
117  * The first step is to determine the ipif (v4) or ill (v6) on which
118  * the join operation is to be done. The join is done after becoming
119  * exclusive on the ipsq associated with the ipif or ill. The conn->conn_ilg
120  * and ill->ill_ilm are thus accessed and modified exclusively per ill.
121  * Multiple threads can attempt to join simultaneously on different ipif/ill
122  * on the same conn. In this case the ipsq serialization does not help in
123  * protecting the ilg. It is the conn_lock that is used to protect the ilg.
124  * The conn_lock also protects all the ilg_t members.
125  *
126  * Leave operation.
127  *
128  * Similar to the join operation, the first step is to determine the ipif
129  * or ill (v6) on which the leave operation is to be done. The leave operation
130  * is done after becoming exclusive on the ipsq associated with the ipif or ill.
131  * As with join ilg modification is done under the protection of the conn lock.
132  */
133 
134 #define	IPSQ_ENTER_IPIF(ipif, connp, first_mp, func, ipsq, type)	\
135 	ASSERT(connp != NULL);					\
136 	(ipsq) = ipsq_try_enter((ipif), NULL, CONNP_TO_WQ(connp),	\
137 	    (first_mp), (func), (type), B_TRUE);		\
138 	if ((ipsq) == NULL) {					\
139 		ipif_refrele(ipif);				\
140 		return (EINPROGRESS);				\
141 	}
142 
143 #define	IPSQ_ENTER_ILL(ill, connp, first_mp, func, ipsq, type)	\
144 	ASSERT(connp != NULL);					\
145 	(ipsq) = ipsq_try_enter(NULL, ill, CONNP_TO_WQ(connp),	\
146 	    (first_mp),	(func), (type), B_TRUE);		\
147 	if ((ipsq) == NULL) {					\
148 		ill_refrele(ill);				\
149 		return (EINPROGRESS);				\
150 	}
151 
152 #define	IPSQ_EXIT(ipsq)	\
153 	if (ipsq != NULL)	\
154 		ipsq_exit(ipsq);
155 
156 #define	ILG_WALKER_HOLD(connp)	(connp)->conn_ilg_walker_cnt++
157 
158 #define	ILG_WALKER_RELE(connp)				\
159 	{						\
160 		(connp)->conn_ilg_walker_cnt--;		\
161 		if ((connp)->conn_ilg_walker_cnt == 0)	\
162 			conn_ilg_reap(connp);		\
163 	}
164 
165 static void
166 conn_ilg_reap(conn_t *connp)
167 {
168 	int	to;
169 	int	from;
170 	ilg_t	*ilg;
171 
172 	ASSERT(MUTEX_HELD(&connp->conn_lock));
173 
174 	to = 0;
175 	from = 0;
176 	while (from < connp->conn_ilg_inuse) {
177 		if (connp->conn_ilg[from].ilg_flags & ILG_DELETED) {
178 			ilg = &connp->conn_ilg[from];
179 			FREE_SLIST(ilg->ilg_filter);
180 			ilg->ilg_flags &= ~ILG_DELETED;
181 			from++;
182 			continue;
183 		}
184 		if (to != from)
185 			connp->conn_ilg[to] = connp->conn_ilg[from];
186 		to++;
187 		from++;
188 	}
189 
190 	connp->conn_ilg_inuse = to;
191 
192 	if (connp->conn_ilg_inuse == 0) {
193 		mi_free((char *)connp->conn_ilg);
194 		connp->conn_ilg = NULL;
195 		cv_broadcast(&connp->conn_refcv);
196 	}
197 }
198 
199 #define	GETSTRUCT(structure, number)	\
200 	((structure *)mi_zalloc(sizeof (structure) * (number)))
201 
202 #define	ILG_ALLOC_CHUNK	16
203 
204 /*
205  * Returns a pointer to the next available ilg in conn_ilg.  Allocs more
206  * buffers in size of ILG_ALLOC_CHUNK ilgs when needed, and updates conn's
207  * ilg tracking fields appropriately (conn_ilg_inuse reflects usage of the
208  * returned ilg).  Returns NULL on failure, in which case `*errp' will be
209  * filled in with the reason.
210  *
211  * Assumes connp->conn_lock is held.
212  */
213 static ilg_t *
214 conn_ilg_alloc(conn_t *connp, int *errp)
215 {
216 	ilg_t *new, *ret;
217 	int curcnt;
218 
219 	ASSERT(MUTEX_HELD(&connp->conn_lock));
220 	ASSERT(connp->conn_ilg_inuse <= connp->conn_ilg_allocated);
221 
222 	/*
223 	 * If CONN_CLOSING is set, conn_ilg cleanup has begun and we must not
224 	 * create any ilgs.
225 	 */
226 	if (connp->conn_state_flags & CONN_CLOSING) {
227 		*errp = EINVAL;
228 		return (NULL);
229 	}
230 
231 	if (connp->conn_ilg == NULL) {
232 		connp->conn_ilg = GETSTRUCT(ilg_t, ILG_ALLOC_CHUNK);
233 		if (connp->conn_ilg == NULL) {
234 			*errp = ENOMEM;
235 			return (NULL);
236 		}
237 		connp->conn_ilg_allocated = ILG_ALLOC_CHUNK;
238 		connp->conn_ilg_inuse = 0;
239 	}
240 	if (connp->conn_ilg_inuse == connp->conn_ilg_allocated) {
241 		if (connp->conn_ilg_walker_cnt != 0) {
242 			/*
243 			 * XXX We cannot grow the array at this point
244 			 * because a list walker could be in progress, and
245 			 * we cannot wipe out the existing array until the
246 			 * walker is done. Just return NULL for now.
247 			 * ilg_delete_all() will have to be changed when
248 			 * this logic is changed.
249 			 */
250 			*errp = EBUSY;
251 			return (NULL);
252 		}
253 		curcnt = connp->conn_ilg_allocated;
254 		new = GETSTRUCT(ilg_t, curcnt + ILG_ALLOC_CHUNK);
255 		if (new == NULL) {
256 			*errp = ENOMEM;
257 			return (NULL);
258 		}
259 		bcopy(connp->conn_ilg, new, sizeof (ilg_t) * curcnt);
260 		mi_free((char *)connp->conn_ilg);
261 		connp->conn_ilg = new;
262 		connp->conn_ilg_allocated += ILG_ALLOC_CHUNK;
263 	}
264 
265 	ret = &connp->conn_ilg[connp->conn_ilg_inuse++];
266 	ASSERT((ret->ilg_flags & ILG_DELETED) == 0);
267 	bzero(ret, sizeof (*ret));
268 	return (ret);
269 }
270 
271 typedef struct ilm_fbld_s {
272 	ilm_t		*fbld_ilm;
273 	int		fbld_in_cnt;
274 	int		fbld_ex_cnt;
275 	slist_t		fbld_in;
276 	slist_t		fbld_ex;
277 	boolean_t	fbld_in_overflow;
278 } ilm_fbld_t;
279 
280 static void
281 ilm_bld_flists(conn_t *conn, void *arg)
282 {
283 	int i;
284 	ilm_fbld_t *fbld = (ilm_fbld_t *)(arg);
285 	ilm_t *ilm = fbld->fbld_ilm;
286 	in6_addr_t *v6group = &ilm->ilm_v6addr;
287 
288 	if (conn->conn_ilg_inuse == 0)
289 		return;
290 
291 	/*
292 	 * Since we can't break out of the ipcl_walk once started, we still
293 	 * have to look at every conn.  But if we've already found one
294 	 * (EXCLUDE, NULL) list, there's no need to keep checking individual
295 	 * ilgs--that will be our state.
296 	 */
297 	if (fbld->fbld_ex_cnt > 0 && fbld->fbld_ex.sl_numsrc == 0)
298 		return;
299 
300 	/*
301 	 * Check this conn's ilgs to see if any are interested in our
302 	 * ilm (group, interface match).  If so, update the master
303 	 * include and exclude lists we're building in the fbld struct
304 	 * with this ilg's filter info.
305 	 */
306 	mutex_enter(&conn->conn_lock);
307 	for (i = 0; i < conn->conn_ilg_inuse; i++) {
308 		ilg_t *ilg = &conn->conn_ilg[i];
309 		if ((ilg->ilg_ill == ilm->ilm_ill) &&
310 		    (ilg->ilg_ipif == ilm->ilm_ipif) &&
311 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) {
312 			if (ilg->ilg_fmode == MODE_IS_INCLUDE) {
313 				fbld->fbld_in_cnt++;
314 				if (!fbld->fbld_in_overflow)
315 					l_union_in_a(&fbld->fbld_in,
316 					    ilg->ilg_filter,
317 					    &fbld->fbld_in_overflow);
318 			} else {
319 				fbld->fbld_ex_cnt++;
320 				/*
321 				 * On the first exclude list, don't try to do
322 				 * an intersection, as the master exclude list
323 				 * is intentionally empty.  If the master list
324 				 * is still empty on later iterations, that
325 				 * means we have at least one ilg with an empty
326 				 * exclude list, so that should be reflected
327 				 * when we take the intersection.
328 				 */
329 				if (fbld->fbld_ex_cnt == 1) {
330 					if (ilg->ilg_filter != NULL)
331 						l_copy(ilg->ilg_filter,
332 						    &fbld->fbld_ex);
333 				} else {
334 					l_intersection_in_a(&fbld->fbld_ex,
335 					    ilg->ilg_filter);
336 				}
337 			}
338 			/* there will only be one match, so break now. */
339 			break;
340 		}
341 	}
342 	mutex_exit(&conn->conn_lock);
343 }
344 
345 static void
346 ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, slist_t *flist)
347 {
348 	ilm_fbld_t fbld;
349 	ip_stack_t *ipst = ilm->ilm_ipst;
350 
351 	fbld.fbld_ilm = ilm;
352 	fbld.fbld_in_cnt = fbld.fbld_ex_cnt = 0;
353 	fbld.fbld_in.sl_numsrc = fbld.fbld_ex.sl_numsrc = 0;
354 	fbld.fbld_in_overflow = B_FALSE;
355 
356 	/* first, construct our master include and exclude lists */
357 	ipcl_walk(ilm_bld_flists, (caddr_t)&fbld, ipst);
358 
359 	/* now use those master lists to generate the interface filter */
360 
361 	/* if include list overflowed, filter is (EXCLUDE, NULL) */
362 	if (fbld.fbld_in_overflow) {
363 		*fmode = MODE_IS_EXCLUDE;
364 		flist->sl_numsrc = 0;
365 		return;
366 	}
367 
368 	/* if nobody interested, interface filter is (INCLUDE, NULL) */
369 	if (fbld.fbld_in_cnt == 0 && fbld.fbld_ex_cnt == 0) {
370 		*fmode = MODE_IS_INCLUDE;
371 		flist->sl_numsrc = 0;
372 		return;
373 	}
374 
375 	/*
376 	 * If there are no exclude lists, then the interface filter
377 	 * is INCLUDE, with its filter list equal to fbld_in.  A single
378 	 * exclude list makes the interface filter EXCLUDE, with its
379 	 * filter list equal to (fbld_ex - fbld_in).
380 	 */
381 	if (fbld.fbld_ex_cnt == 0) {
382 		*fmode = MODE_IS_INCLUDE;
383 		l_copy(&fbld.fbld_in, flist);
384 	} else {
385 		*fmode = MODE_IS_EXCLUDE;
386 		l_difference(&fbld.fbld_ex, &fbld.fbld_in, flist);
387 	}
388 }
389 
390 static int
391 ilm_update_add(ilm_t *ilm, ilg_stat_t ilgstat, slist_t *ilg_flist,
392     boolean_t isv6)
393 {
394 	mcast_record_t fmode;
395 	slist_t *flist;
396 	boolean_t fdefault;
397 	char buf[INET6_ADDRSTRLEN];
398 	ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill;
399 
400 	/*
401 	 * There are several cases where the ilm's filter state
402 	 * defaults to (EXCLUDE, NULL):
403 	 *	- we've had previous joins without associated ilgs
404 	 *	- this join has no associated ilg
405 	 *	- the ilg's filter state is (EXCLUDE, NULL)
406 	 */
407 	fdefault = (ilm->ilm_no_ilg_cnt > 0) ||
408 	    (ilgstat == ILGSTAT_NONE) || SLIST_IS_EMPTY(ilg_flist);
409 
410 	/* attempt mallocs (if needed) before doing anything else */
411 	if ((flist = l_alloc()) == NULL)
412 		return (ENOMEM);
413 	if (!fdefault && ilm->ilm_filter == NULL) {
414 		ilm->ilm_filter = l_alloc();
415 		if (ilm->ilm_filter == NULL) {
416 			l_free(flist);
417 			return (ENOMEM);
418 		}
419 	}
420 
421 	if (ilgstat != ILGSTAT_CHANGE)
422 		ilm->ilm_refcnt++;
423 
424 	if (ilgstat == ILGSTAT_NONE)
425 		ilm->ilm_no_ilg_cnt++;
426 
427 	/*
428 	 * Determine new filter state.  If it's not the default
429 	 * (EXCLUDE, NULL), we must walk the conn list to find
430 	 * any ilgs interested in this group, and re-build the
431 	 * ilm filter.
432 	 */
433 	if (fdefault) {
434 		fmode = MODE_IS_EXCLUDE;
435 		flist->sl_numsrc = 0;
436 	} else {
437 		ilm_gen_filter(ilm, &fmode, flist);
438 	}
439 
440 	/* make sure state actually changed; nothing to do if not. */
441 	if ((ilm->ilm_fmode == fmode) &&
442 	    !lists_are_different(ilm->ilm_filter, flist)) {
443 		l_free(flist);
444 		return (0);
445 	}
446 
447 	/* send the state change report */
448 	if (!IS_LOOPBACK(ill)) {
449 		if (isv6)
450 			mld_statechange(ilm, fmode, flist);
451 		else
452 			igmp_statechange(ilm, fmode, flist);
453 	}
454 
455 	/* update the ilm state */
456 	ilm->ilm_fmode = fmode;
457 	if (flist->sl_numsrc > 0)
458 		l_copy(flist, ilm->ilm_filter);
459 	else
460 		CLEAR_SLIST(ilm->ilm_filter);
461 
462 	ip1dbg(("ilm_update: new if filter mode %d, group %s\n", ilm->ilm_fmode,
463 	    inet_ntop(AF_INET6, &ilm->ilm_v6addr, buf, sizeof (buf))));
464 
465 	l_free(flist);
466 	return (0);
467 }
468 
469 static int
470 ilm_update_del(ilm_t *ilm, boolean_t isv6)
471 {
472 	mcast_record_t fmode;
473 	slist_t *flist;
474 	ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill;
475 
476 	ip1dbg(("ilm_update_del: still %d left; updating state\n",
477 	    ilm->ilm_refcnt));
478 
479 	if ((flist = l_alloc()) == NULL)
480 		return (ENOMEM);
481 
482 	/*
483 	 * If present, the ilg in question has already either been
484 	 * updated or removed from our list; so all we need to do
485 	 * now is walk the list to update the ilm filter state.
486 	 *
487 	 * Skip the list walk if we have any no-ilg joins, which
488 	 * cause the filter state to revert to (EXCLUDE, NULL).
489 	 */
490 	if (ilm->ilm_no_ilg_cnt != 0) {
491 		fmode = MODE_IS_EXCLUDE;
492 		flist->sl_numsrc = 0;
493 	} else {
494 		ilm_gen_filter(ilm, &fmode, flist);
495 	}
496 
497 	/* check to see if state needs to be updated */
498 	if ((ilm->ilm_fmode == fmode) &&
499 	    (!lists_are_different(ilm->ilm_filter, flist))) {
500 		l_free(flist);
501 		return (0);
502 	}
503 
504 	if (!IS_LOOPBACK(ill)) {
505 		if (isv6)
506 			mld_statechange(ilm, fmode, flist);
507 		else
508 			igmp_statechange(ilm, fmode, flist);
509 	}
510 
511 	ilm->ilm_fmode = fmode;
512 	if (flist->sl_numsrc > 0) {
513 		if (ilm->ilm_filter == NULL) {
514 			ilm->ilm_filter = l_alloc();
515 			if (ilm->ilm_filter == NULL) {
516 				char buf[INET6_ADDRSTRLEN];
517 				ip1dbg(("ilm_update_del: failed to alloc ilm "
518 				    "filter; no source filtering for %s on %s",
519 				    inet_ntop(AF_INET6, &ilm->ilm_v6addr,
520 				    buf, sizeof (buf)), ill->ill_name));
521 				ilm->ilm_fmode = MODE_IS_EXCLUDE;
522 				l_free(flist);
523 				return (0);
524 			}
525 		}
526 		l_copy(flist, ilm->ilm_filter);
527 	} else {
528 		CLEAR_SLIST(ilm->ilm_filter);
529 	}
530 
531 	l_free(flist);
532 	return (0);
533 }
534 
535 /*
536  * INADDR_ANY means all multicast addresses.
537  * INADDR_ANY is stored as IPv6 unspecified addr.
538  */
539 int
540 ip_addmulti(ipaddr_t group, ipif_t *ipif, ilg_stat_t ilgstat,
541     mcast_record_t ilg_fmode, slist_t *ilg_flist)
542 {
543 	ill_t	*ill = ipif->ipif_ill;
544 	ilm_t 	*ilm;
545 	in6_addr_t v6group;
546 	int	ret;
547 
548 	ASSERT(IAM_WRITER_IPIF(ipif));
549 
550 	if (!CLASSD(group) && group != INADDR_ANY)
551 		return (EINVAL);
552 
553 	if (IS_UNDER_IPMP(ill))
554 		return (EINVAL);
555 
556 	/*
557 	 * INADDR_ANY is represented as the IPv6 unspecified addr.
558 	 */
559 	if (group == INADDR_ANY)
560 		v6group = ipv6_all_zeros;
561 	else
562 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
563 
564 	ilm = ilm_lookup_ipif(ipif, group);
565 	/*
566 	 * Since we are writer, we know the ilm_flags itself cannot
567 	 * change at this point, and ilm_lookup_ipif would not have
568 	 * returned a DELETED ilm. However, the data path can free
569 	 * ilm->ilm_next via ilm_walker_cleanup() so we can safely
570 	 * access anything in ilm except ilm_next (for safe access to
571 	 * ilm_next we'd have to take the ill_lock).
572 	 */
573 	if (ilm != NULL)
574 		return (ilm_update_add(ilm, ilgstat, ilg_flist, B_FALSE));
575 
576 	ilm = ilm_add_v6(ipif, &v6group, ilgstat, ilg_fmode, ilg_flist,
577 	    ipif->ipif_zoneid);
578 	if (ilm == NULL)
579 		return (ENOMEM);
580 
581 	if (group == INADDR_ANY) {
582 		/*
583 		 * Check how many ipif's have members in this group -
584 		 * if more then one we should not tell the driver to join
585 		 * this time
586 		 */
587 		if (ilm_numentries_v6(ill, &v6group) > 1)
588 			return (0);
589 		ret = ill_join_allmulti(ill);
590 		if (ret != 0)
591 			ilm_delete(ilm);
592 		return (ret);
593 	}
594 
595 	if (!IS_LOOPBACK(ill))
596 		igmp_joingroup(ilm);
597 
598 	if (ilm_numentries_v6(ill, &v6group) > 1)
599 		return (0);
600 
601 	ret = ip_ll_addmulti_v6(ipif, &v6group);
602 	if (ret != 0)
603 		ilm_delete(ilm);
604 	return (ret);
605 }
606 
607 /*
608  * The unspecified address means all multicast addresses.
609  *
610  * ill identifies the interface to join on.
611  *
612  * ilgstat tells us if there's an ilg associated with this join,
613  * and if so, if it's a new ilg or a change to an existing one.
614  * ilg_fmode and ilg_flist give us the current filter state of
615  * the ilg (and will be EXCLUDE {NULL} in the case of no ilg).
616  */
617 int
618 ip_addmulti_v6(const in6_addr_t *v6group, ill_t *ill, zoneid_t zoneid,
619     ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist)
620 {
621 	ilm_t	*ilm;
622 	int	ret;
623 
624 	ASSERT(IAM_WRITER_ILL(ill));
625 
626 	if (!IN6_IS_ADDR_MULTICAST(v6group) &&
627 	    !IN6_IS_ADDR_UNSPECIFIED(v6group)) {
628 		return (EINVAL);
629 	}
630 
631 	if (IS_UNDER_IPMP(ill) && !IN6_IS_ADDR_MC_SOLICITEDNODE(v6group))
632 		return (EINVAL);
633 
634 	/*
635 	 * An ilm is uniquely identified by the tuple of (group, ill) where
636 	 * `group' is the multicast group address, and `ill' is the interface
637 	 * on which it is currently joined.
638 	 */
639 	ilm = ilm_lookup_ill_v6(ill, v6group, B_TRUE, zoneid);
640 	if (ilm != NULL)
641 		return (ilm_update_add(ilm, ilgstat, ilg_flist, B_TRUE));
642 
643 	ilm = ilm_add_v6(ill->ill_ipif, v6group, ilgstat, ilg_fmode,
644 	    ilg_flist, zoneid);
645 	if (ilm == NULL)
646 		return (ENOMEM);
647 
648 	if (IN6_IS_ADDR_UNSPECIFIED(v6group)) {
649 		/*
650 		 * Check how many ipif's that have members in this group -
651 		 * if more then one we should not tell the driver to join
652 		 * this time
653 		 */
654 		if (ilm_numentries_v6(ill, v6group) > 1)
655 			return (0);
656 		ret = ill_join_allmulti(ill);
657 		if (ret != 0)
658 			ilm_delete(ilm);
659 		return (ret);
660 	}
661 
662 	if (!IS_LOOPBACK(ill))
663 		mld_joingroup(ilm);
664 
665 	/*
666 	 * If we have more then one we should not tell the driver
667 	 * to join this time.
668 	 */
669 	if (ilm_numentries_v6(ill, v6group) > 1)
670 		return (0);
671 
672 	ret = ip_ll_addmulti_v6(ill->ill_ipif, v6group);
673 	if (ret != 0)
674 		ilm_delete(ilm);
675 	return (ret);
676 }
677 
678 /*
679  * Send a multicast request to the driver for enabling multicast reception
680  * for v6groupp address. The caller has already checked whether it is
681  * appropriate to send one or not.
682  */
683 int
684 ip_ll_send_enabmulti_req(ill_t *ill, const in6_addr_t *v6groupp)
685 {
686 	mblk_t	*mp;
687 	uint32_t addrlen, addroff;
688 	char	group_buf[INET6_ADDRSTRLEN];
689 
690 	ASSERT(IAM_WRITER_ILL(ill));
691 
692 	/*
693 	 * If we're on the IPMP ill, use the nominated multicast interface to
694 	 * send and receive DLPI messages, if one exists.  (If none exists,
695 	 * there are no usable interfaces and thus nothing to do.)
696 	 */
697 	if (IS_IPMP(ill) && (ill = ipmp_illgrp_cast_ill(ill->ill_grp)) == NULL)
698 		return (0);
699 
700 	/*
701 	 * Create a AR_ENTRY_SQUERY message with a dl_enabmulti_req tacked
702 	 * on.
703 	 */
704 	mp = ill_create_dl(ill, DL_ENABMULTI_REQ, sizeof (dl_enabmulti_req_t),
705 	    &addrlen, &addroff);
706 	if (!mp)
707 		return (ENOMEM);
708 	if (IN6_IS_ADDR_V4MAPPED(v6groupp)) {
709 		ipaddr_t v4group;
710 
711 		IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group);
712 		/*
713 		 * NOTE!!!
714 		 * The "addroff" passed in here was calculated by
715 		 * ill_create_dl(), and will be used by ill_create_squery()
716 		 * to perform some twisted coding magic. It is the offset
717 		 * into the dl_xxx_req of the hw addr. Here, it will be
718 		 * added to b_wptr - b_rptr to create a magic number that
719 		 * is not an offset into this squery mblk.
720 		 * The actual hardware address will be accessed only in the
721 		 * dl_xxx_req, not in the squery. More importantly,
722 		 * that hardware address can *only* be accessed in this
723 		 * mblk chain by calling mi_offset_param_c(), which uses
724 		 * the magic number in the squery hw offset field to go
725 		 * to the *next* mblk (the dl_xxx_req), subtract the
726 		 * (b_wptr - b_rptr), and find the actual offset into
727 		 * the dl_xxx_req.
728 		 * Any method that depends on using the
729 		 * offset field in the dl_disabmulti_req or squery
730 		 * to find either hardware address will similarly fail.
731 		 *
732 		 * Look in ar_entry_squery() in arp.c to see how this offset
733 		 * is used.
734 		 */
735 		mp = ill_create_squery(ill, v4group, addrlen, addroff, mp);
736 		if (!mp)
737 			return (ENOMEM);
738 		ip1dbg(("ip_ll_send_enabmulti_req: IPv4 putnext %s on %s\n",
739 		    inet_ntop(AF_INET6, v6groupp, group_buf,
740 		    sizeof (group_buf)),
741 		    ill->ill_name));
742 		putnext(ill->ill_rq, mp);
743 	} else {
744 		ip1dbg(("ip_ll_send_enabmulti_req: IPv6 ndp_mcastreq %s on"
745 		    " %s\n",
746 		    inet_ntop(AF_INET6, v6groupp, group_buf,
747 		    sizeof (group_buf)),
748 		    ill->ill_name));
749 		return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp));
750 	}
751 	return (0);
752 }
753 
754 /*
755  * Send a multicast request to the driver for enabling multicast
756  * membership for v6group if appropriate.
757  */
758 static int
759 ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *v6groupp)
760 {
761 	ill_t	*ill = ipif->ipif_ill;
762 
763 	ASSERT(IAM_WRITER_IPIF(ipif));
764 
765 	if (ill->ill_net_type != IRE_IF_RESOLVER ||
766 	    ipif->ipif_flags & IPIF_POINTOPOINT) {
767 		ip1dbg(("ip_ll_addmulti_v6: not resolver\n"));
768 		return (0);	/* Must be IRE_IF_NORESOLVER */
769 	}
770 
771 	if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) {
772 		ip1dbg(("ip_ll_addmulti_v6: MULTI_BCAST\n"));
773 		return (0);
774 	}
775 	if (!ill->ill_dl_up) {
776 		/*
777 		 * Nobody there. All multicast addresses will be re-joined
778 		 * when we get the DL_BIND_ACK bringing the interface up.
779 		 */
780 		ip1dbg(("ip_ll_addmulti_v6: nobody up\n"));
781 		return (0);
782 	}
783 	return (ip_ll_send_enabmulti_req(ill, v6groupp));
784 }
785 
786 /*
787  * INADDR_ANY means all multicast addresses.
788  * INADDR_ANY is stored as the IPv6 unspecified addr.
789  */
790 int
791 ip_delmulti(ipaddr_t group, ipif_t *ipif, boolean_t no_ilg, boolean_t leaving)
792 {
793 	ill_t	*ill = ipif->ipif_ill;
794 	ilm_t *ilm;
795 	in6_addr_t v6group;
796 
797 	ASSERT(IAM_WRITER_IPIF(ipif));
798 
799 	if (!CLASSD(group) && group != INADDR_ANY)
800 		return (EINVAL);
801 
802 	/*
803 	 * INADDR_ANY is represented as the IPv6 unspecified addr.
804 	 */
805 	if (group == INADDR_ANY)
806 		v6group = ipv6_all_zeros;
807 	else
808 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
809 
810 	/*
811 	 * Look for a match on the ipif.
812 	 * (IP_DROP_MEMBERSHIP specifies an ipif using an IP address).
813 	 */
814 	ilm = ilm_lookup_ipif(ipif, group);
815 	if (ilm == NULL)
816 		return (ENOENT);
817 
818 	/* Update counters */
819 	if (no_ilg)
820 		ilm->ilm_no_ilg_cnt--;
821 
822 	if (leaving)
823 		ilm->ilm_refcnt--;
824 
825 	if (ilm->ilm_refcnt > 0)
826 		return (ilm_update_del(ilm, B_FALSE));
827 
828 	if (group == INADDR_ANY) {
829 		ilm_delete(ilm);
830 		/*
831 		 * Check how many ipif's that have members in this group -
832 		 * if there are still some left then don't tell the driver
833 		 * to drop it.
834 		 */
835 		if (ilm_numentries_v6(ill, &v6group) != 0)
836 			return (0);
837 
838 		/* If we never joined, then don't leave. */
839 		if (ill->ill_join_allmulti)
840 			ill_leave_allmulti(ill);
841 
842 		return (0);
843 	}
844 
845 	if (!IS_LOOPBACK(ill))
846 		igmp_leavegroup(ilm);
847 
848 	ilm_delete(ilm);
849 	/*
850 	 * Check how many ipif's that have members in this group -
851 	 * if there are still some left then don't tell the driver
852 	 * to drop it.
853 	 */
854 	if (ilm_numentries_v6(ill, &v6group) != 0)
855 		return (0);
856 	return (ip_ll_delmulti_v6(ipif, &v6group));
857 }
858 
859 /*
860  * The unspecified address means all multicast addresses.
861  */
862 int
863 ip_delmulti_v6(const in6_addr_t *v6group, ill_t *ill, zoneid_t zoneid,
864     boolean_t no_ilg, boolean_t leaving)
865 {
866 	ipif_t	*ipif;
867 	ilm_t *ilm;
868 
869 	ASSERT(IAM_WRITER_ILL(ill));
870 
871 	if (!IN6_IS_ADDR_MULTICAST(v6group) &&
872 	    !IN6_IS_ADDR_UNSPECIFIED(v6group))
873 		return (EINVAL);
874 
875 	/*
876 	 * Look for a match on the ill.
877 	 */
878 	ilm = ilm_lookup_ill_v6(ill, v6group, B_TRUE, zoneid);
879 	if (ilm == NULL)
880 		return (ENOENT);
881 
882 	ASSERT(ilm->ilm_ill == ill);
883 
884 	ipif = ill->ill_ipif;
885 
886 	/* Update counters */
887 	if (no_ilg)
888 		ilm->ilm_no_ilg_cnt--;
889 
890 	if (leaving)
891 		ilm->ilm_refcnt--;
892 
893 	if (ilm->ilm_refcnt > 0)
894 		return (ilm_update_del(ilm, B_TRUE));
895 
896 	if (IN6_IS_ADDR_UNSPECIFIED(v6group)) {
897 		ilm_delete(ilm);
898 		/*
899 		 * Check how many ipif's that have members in this group -
900 		 * if there are still some left then don't tell the driver
901 		 * to drop it.
902 		 */
903 		if (ilm_numentries_v6(ill, v6group) != 0)
904 			return (0);
905 
906 		/* If we never joined, then don't leave. */
907 		if (ill->ill_join_allmulti)
908 			ill_leave_allmulti(ill);
909 
910 		return (0);
911 	}
912 
913 	if (!IS_LOOPBACK(ill))
914 		mld_leavegroup(ilm);
915 
916 	ilm_delete(ilm);
917 	/*
918 	 * Check how many ipif's that have members in this group -
919 	 * if there are still some left then don't tell the driver
920 	 * to drop it.
921 	 */
922 	if (ilm_numentries_v6(ill, v6group) != 0)
923 		return (0);
924 	return (ip_ll_delmulti_v6(ipif, v6group));
925 }
926 
927 /*
928  * Send a multicast request to the driver for disabling multicast reception
929  * for v6groupp address. The caller has already checked whether it is
930  * appropriate to send one or not.
931  */
932 int
933 ip_ll_send_disabmulti_req(ill_t *ill, const in6_addr_t *v6groupp)
934 {
935 	mblk_t	*mp;
936 	char	group_buf[INET6_ADDRSTRLEN];
937 	uint32_t	addrlen, addroff;
938 
939 	ASSERT(IAM_WRITER_ILL(ill));
940 
941 	/*
942 	 * See comment in ip_ll_send_enabmulti_req().
943 	 */
944 	if (IS_IPMP(ill) && (ill = ipmp_illgrp_cast_ill(ill->ill_grp)) == NULL)
945 		return (0);
946 
947 	/*
948 	 * Create a AR_ENTRY_SQUERY message with a dl_disabmulti_req tacked
949 	 * on.
950 	 */
951 	mp = ill_create_dl(ill, DL_DISABMULTI_REQ,
952 	    sizeof (dl_disabmulti_req_t), &addrlen, &addroff);
953 
954 	if (!mp)
955 		return (ENOMEM);
956 
957 	if (IN6_IS_ADDR_V4MAPPED(v6groupp)) {
958 		ipaddr_t v4group;
959 
960 		IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group);
961 		/*
962 		 * NOTE!!!
963 		 * The "addroff" passed in here was calculated by
964 		 * ill_create_dl(), and will be used by ill_create_squery()
965 		 * to perform some twisted coding magic. It is the offset
966 		 * into the dl_xxx_req of the hw addr. Here, it will be
967 		 * added to b_wptr - b_rptr to create a magic number that
968 		 * is not an offset into this mblk.
969 		 *
970 		 * Please see the comment in ip_ll_send)enabmulti_req()
971 		 * for a complete explanation.
972 		 *
973 		 * Look in ar_entry_squery() in arp.c to see how this offset
974 		 * is used.
975 		 */
976 		mp = ill_create_squery(ill, v4group, addrlen, addroff, mp);
977 		if (!mp)
978 			return (ENOMEM);
979 		ip1dbg(("ip_ll_send_disabmulti_req: IPv4 putnext %s on %s\n",
980 		    inet_ntop(AF_INET6, v6groupp, group_buf,
981 		    sizeof (group_buf)),
982 		    ill->ill_name));
983 		putnext(ill->ill_rq, mp);
984 	} else {
985 		ip1dbg(("ip_ll_send_disabmulti_req: IPv6 ndp_mcastreq %s on"
986 		    " %s\n",
987 		    inet_ntop(AF_INET6, v6groupp, group_buf,
988 		    sizeof (group_buf)),
989 		    ill->ill_name));
990 		return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp));
991 	}
992 	return (0);
993 }
994 
995 /*
996  * Send a multicast request to the driver for disabling multicast
997  * membership for v6group if appropriate.
998  */
999 static int
1000 ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *v6group)
1001 {
1002 	ill_t	*ill = ipif->ipif_ill;
1003 
1004 	ASSERT(IAM_WRITER_IPIF(ipif));
1005 
1006 	if (ill->ill_net_type != IRE_IF_RESOLVER ||
1007 	    ipif->ipif_flags & IPIF_POINTOPOINT) {
1008 		return (0);	/* Must be IRE_IF_NORESOLVER */
1009 	}
1010 	if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) {
1011 		ip1dbg(("ip_ll_delmulti_v6: MULTI_BCAST\n"));
1012 		return (0);
1013 	}
1014 	if (!ill->ill_dl_up) {
1015 		/*
1016 		 * Nobody there. All multicast addresses will be re-joined
1017 		 * when we get the DL_BIND_ACK bringing the interface up.
1018 		 */
1019 		ip1dbg(("ip_ll_delmulti_v6: nobody up\n"));
1020 		return (0);
1021 	}
1022 	return (ip_ll_send_disabmulti_req(ill, v6group));
1023 }
1024 
1025 /*
1026  * Make the driver pass up all multicast packets.  NOTE: to keep callers
1027  * IPMP-unaware, if an IPMP ill is passed in, the ill_join_allmulti flag is
1028  * set on it (rather than the cast ill).
1029  */
1030 int
1031 ill_join_allmulti(ill_t *ill)
1032 {
1033 	mblk_t		*promiscon_mp, *promiscoff_mp;
1034 	uint32_t	addrlen, addroff;
1035 	ill_t		*join_ill = ill;
1036 
1037 	ASSERT(IAM_WRITER_ILL(ill));
1038 
1039 	if (!ill->ill_dl_up) {
1040 		/*
1041 		 * Nobody there. All multicast addresses will be re-joined
1042 		 * when we get the DL_BIND_ACK bringing the interface up.
1043 		 */
1044 		return (0);
1045 	}
1046 
1047 	/*
1048 	 * See comment in ip_ll_send_enabmulti_req().
1049 	 */
1050 	if (IS_IPMP(ill) && (ill = ipmp_illgrp_cast_ill(ill->ill_grp)) == NULL)
1051 		return (0);
1052 
1053 	ASSERT(!join_ill->ill_join_allmulti);
1054 
1055 	/*
1056 	 * Create a DL_PROMISCON_REQ message and send it directly to the DLPI
1057 	 * provider.  We don't need to do this for certain media types for
1058 	 * which we never need to turn promiscuous mode on.  While we're here,
1059 	 * pre-allocate a DL_PROMISCOFF_REQ message to make sure that
1060 	 * ill_leave_allmulti() will not fail due to low memory conditions.
1061 	 */
1062 	if ((ill->ill_net_type == IRE_IF_RESOLVER) &&
1063 	    !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) {
1064 		promiscon_mp = ill_create_dl(ill, DL_PROMISCON_REQ,
1065 		    sizeof (dl_promiscon_req_t), &addrlen, &addroff);
1066 		promiscoff_mp = ill_create_dl(ill, DL_PROMISCOFF_REQ,
1067 		    sizeof (dl_promiscoff_req_t), &addrlen, &addroff);
1068 		if (promiscon_mp == NULL || promiscoff_mp == NULL) {
1069 			freemsg(promiscon_mp);
1070 			freemsg(promiscoff_mp);
1071 			return (ENOMEM);
1072 		}
1073 		ill->ill_promiscoff_mp = promiscoff_mp;
1074 		ill_dlpi_send(ill, promiscon_mp);
1075 	}
1076 
1077 	join_ill->ill_join_allmulti = B_TRUE;
1078 	return (0);
1079 }
1080 
1081 /*
1082  * Make the driver stop passing up all multicast packets
1083  */
1084 void
1085 ill_leave_allmulti(ill_t *ill)
1086 {
1087 	mblk_t	*promiscoff_mp;
1088 	ill_t	*leave_ill = ill;
1089 
1090 	ASSERT(IAM_WRITER_ILL(ill));
1091 
1092 	if (!ill->ill_dl_up) {
1093 		/*
1094 		 * Nobody there. All multicast addresses will be re-joined
1095 		 * when we get the DL_BIND_ACK bringing the interface up.
1096 		 */
1097 		return;
1098 	}
1099 
1100 	/*
1101 	 * See comment in ip_ll_send_enabmulti_req().
1102 	 */
1103 	if (IS_IPMP(ill) && (ill = ipmp_illgrp_cast_ill(ill->ill_grp)) == NULL)
1104 		return;
1105 
1106 	ASSERT(leave_ill->ill_join_allmulti);
1107 
1108 	/*
1109 	 * Create a DL_PROMISCOFF_REQ message and send it directly to
1110 	 * the DLPI provider.  We don't need to do this for certain
1111 	 * media types for which we never need to turn promiscuous
1112 	 * mode on.
1113 	 */
1114 	if ((ill->ill_net_type == IRE_IF_RESOLVER) &&
1115 	    !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) {
1116 		promiscoff_mp = ill->ill_promiscoff_mp;
1117 		ASSERT(promiscoff_mp != NULL);
1118 		ill->ill_promiscoff_mp = NULL;
1119 		ill_dlpi_send(ill, promiscoff_mp);
1120 	}
1121 
1122 	leave_ill->ill_join_allmulti = B_FALSE;
1123 }
1124 
1125 static ill_t *
1126 ipsq_enter_byifindex(uint_t ifindex, boolean_t isv6, ip_stack_t *ipst)
1127 {
1128 	ill_t		*ill;
1129 	boolean_t	in_ipsq;
1130 
1131 	ill = ill_lookup_on_ifindex(ifindex, isv6, NULL, NULL, NULL, NULL,
1132 	    ipst);
1133 	if (ill != NULL) {
1134 		if (!ill_waiter_inc(ill)) {
1135 			ill_refrele(ill);
1136 			return (NULL);
1137 		}
1138 		ill_refrele(ill);
1139 		in_ipsq = ipsq_enter(ill, B_FALSE, NEW_OP);
1140 		ill_waiter_dcr(ill);
1141 		if (!in_ipsq)
1142 			ill = NULL;
1143 	}
1144 	return (ill);
1145 }
1146 
1147 int
1148 ip_join_allmulti(uint_t ifindex, boolean_t isv6, ip_stack_t *ipst)
1149 {
1150 	ill_t		*ill;
1151 	int		ret = 0;
1152 
1153 	if ((ill = ipsq_enter_byifindex(ifindex, isv6, ipst)) == NULL)
1154 		return (ENODEV);
1155 
1156 	/*
1157 	 * The ip_addmulti*() functions won't allow IPMP underlying interfaces
1158 	 * to join allmulti since only the nominated underlying interface in
1159 	 * the group should receive multicast.  We silently succeed to avoid
1160 	 * having to teach IPobs (currently the only caller of this routine)
1161 	 * to ignore failures in this case.
1162 	 */
1163 	if (IS_UNDER_IPMP(ill))
1164 		goto out;
1165 
1166 	if (isv6) {
1167 		ret = ip_addmulti_v6(&ipv6_all_zeros, ill, ill->ill_zoneid,
1168 		    ILGSTAT_NONE, MODE_IS_EXCLUDE, NULL);
1169 	} else {
1170 		ret = ip_addmulti(INADDR_ANY, ill->ill_ipif, ILGSTAT_NONE,
1171 		    MODE_IS_EXCLUDE, NULL);
1172 	}
1173 	ill->ill_ipallmulti_cnt++;
1174 out:
1175 	ipsq_exit(ill->ill_phyint->phyint_ipsq);
1176 	return (ret);
1177 }
1178 
1179 
1180 int
1181 ip_leave_allmulti(uint_t ifindex, boolean_t isv6, ip_stack_t *ipst)
1182 {
1183 	ill_t		*ill;
1184 
1185 	if ((ill = ipsq_enter_byifindex(ifindex, isv6, ipst)) == NULL)
1186 		return (ENODEV);
1187 
1188 	if (ill->ill_ipallmulti_cnt > 0) {
1189 		if (isv6) {
1190 			(void) ip_delmulti_v6(&ipv6_all_zeros, ill,
1191 			    ill->ill_zoneid, B_TRUE, B_TRUE);
1192 		} else {
1193 			(void) ip_delmulti(INADDR_ANY, ill->ill_ipif, B_TRUE,
1194 			    B_TRUE);
1195 		}
1196 		ill->ill_ipallmulti_cnt--;
1197 	}
1198 	ipsq_exit(ill->ill_phyint->phyint_ipsq);
1199 	return (0);
1200 }
1201 
1202 /*
1203  * Delete the allmulti memberships that were added as part of
1204  * ip_join_allmulti().
1205  */
1206 void
1207 ip_purge_allmulti(ill_t *ill)
1208 {
1209 	ASSERT(IAM_WRITER_ILL(ill));
1210 
1211 	for (; ill->ill_ipallmulti_cnt > 0; ill->ill_ipallmulti_cnt--) {
1212 		if (ill->ill_isv6) {
1213 			(void) ip_delmulti_v6(&ipv6_all_zeros, ill,
1214 			    ill->ill_zoneid, B_TRUE, B_TRUE);
1215 		} else {
1216 			(void) ip_delmulti(INADDR_ANY, ill->ill_ipif, B_TRUE,
1217 			    B_TRUE);
1218 		}
1219 	}
1220 }
1221 
1222 /*
1223  * Copy mp_orig and pass it in as a local message.
1224  */
1225 void
1226 ip_multicast_loopback(queue_t *q, ill_t *ill, mblk_t *mp_orig, int fanout_flags,
1227     zoneid_t zoneid)
1228 {
1229 	mblk_t	*mp;
1230 	mblk_t	*ipsec_mp;
1231 	ipha_t	*iph;
1232 	ip_stack_t *ipst = ill->ill_ipst;
1233 
1234 	if (DB_TYPE(mp_orig) == M_DATA &&
1235 	    ((ipha_t *)mp_orig->b_rptr)->ipha_protocol == IPPROTO_UDP) {
1236 		uint_t hdrsz;
1237 
1238 		hdrsz = IPH_HDR_LENGTH((ipha_t *)mp_orig->b_rptr) +
1239 		    sizeof (udpha_t);
1240 		ASSERT(MBLKL(mp_orig) >= hdrsz);
1241 
1242 		if (((mp = allocb(hdrsz, BPRI_MED)) != NULL) &&
1243 		    (mp_orig = dupmsg(mp_orig)) != NULL) {
1244 			cred_t *cr;
1245 
1246 			bcopy(mp_orig->b_rptr, mp->b_rptr, hdrsz);
1247 			mp->b_wptr += hdrsz;
1248 			mp->b_cont = mp_orig;
1249 			mp_orig->b_rptr += hdrsz;
1250 			if (is_system_labeled() &&
1251 			    (cr = msg_getcred(mp_orig, NULL)) != NULL)
1252 				mblk_setcred(mp, cr, NOPID);
1253 			if (MBLKL(mp_orig) == 0) {
1254 				mp->b_cont = mp_orig->b_cont;
1255 				mp_orig->b_cont = NULL;
1256 				freeb(mp_orig);
1257 			}
1258 		} else if (mp != NULL) {
1259 			freeb(mp);
1260 			mp = NULL;
1261 		}
1262 	} else {
1263 		mp = ip_copymsg(mp_orig); /* No refcnt on ipsec_out netstack */
1264 	}
1265 
1266 	if (mp == NULL)
1267 		return;
1268 	if (DB_TYPE(mp) == M_CTL) {
1269 		ipsec_mp = mp;
1270 		mp = mp->b_cont;
1271 	} else {
1272 		ipsec_mp = mp;
1273 	}
1274 
1275 	iph = (ipha_t *)mp->b_rptr;
1276 
1277 	/*
1278 	 * DTrace this as ip:::send.  A blocked packet will fire the send
1279 	 * probe, but not the receive probe.
1280 	 */
1281 	DTRACE_IP7(send, mblk_t *, ipsec_mp, conn_t *, NULL, void_ip_t *, iph,
1282 	    __dtrace_ipsr_ill_t *, ill, ipha_t *, iph, ip6_t *, NULL, int, 1);
1283 
1284 	DTRACE_PROBE4(ip4__loopback__out__start,
1285 	    ill_t *, NULL, ill_t *, ill,
1286 	    ipha_t *, iph, mblk_t *, ipsec_mp);
1287 
1288 	FW_HOOKS(ipst->ips_ip4_loopback_out_event,
1289 	    ipst->ips_ipv4firewall_loopback_out,
1290 	    NULL, ill, iph, ipsec_mp, mp, HPE_MULTICAST, ipst);
1291 
1292 	DTRACE_PROBE1(ip4__loopback__out__end, mblk_t *, ipsec_mp);
1293 
1294 	if (ipsec_mp != NULL)
1295 		ip_wput_local(q, ill, iph, ipsec_mp, NULL,
1296 		    fanout_flags, zoneid);
1297 }
1298 
1299 static area_t	ip_aresq_template = {
1300 	AR_ENTRY_SQUERY,		/* cmd */
1301 	sizeof (area_t)+IP_ADDR_LEN,	/* name offset */
1302 	sizeof (area_t),	/* name len (filled by ill_arp_alloc) */
1303 	IP_ARP_PROTO_TYPE,		/* protocol, from arps perspective */
1304 	sizeof (area_t),			/* proto addr offset */
1305 	IP_ADDR_LEN,			/* proto addr_length */
1306 	0,				/* proto mask offset */
1307 	/* Rest is initialized when used */
1308 	0,				/* flags */
1309 	0,				/* hw addr offset */
1310 	0,				/* hw addr length */
1311 };
1312 
1313 static mblk_t *
1314 ill_create_squery(ill_t *ill, ipaddr_t ipaddr, uint32_t addrlen,
1315     uint32_t addroff, mblk_t *mp_tail)
1316 {
1317 	mblk_t	*mp;
1318 	area_t	*area;
1319 
1320 	mp = ill_arp_alloc(ill, (uchar_t *)&ip_aresq_template,
1321 	    (caddr_t)&ipaddr);
1322 	if (!mp) {
1323 		freemsg(mp_tail);
1324 		return (NULL);
1325 	}
1326 	area = (area_t *)mp->b_rptr;
1327 	area->area_hw_addr_length = addrlen;
1328 	area->area_hw_addr_offset = mp->b_wptr - mp->b_rptr + addroff;
1329 	/*
1330 	 * NOTE!
1331 	 *
1332 	 * The area_hw_addr_offset, as can be seen, does not hold the
1333 	 * actual hardware address offset. Rather, it holds the offset
1334 	 * to the hw addr in the dl_xxx_req in mp_tail, modified by
1335 	 * adding (mp->b_wptr - mp->b_rptr). This allows the function
1336 	 * mi_offset_paramc() to find the hardware address in the
1337 	 * *second* mblk (dl_xxx_req), not this mblk.
1338 	 *
1339 	 * Using mi_offset_paramc() is thus the *only* way to access
1340 	 * the dl_xxx_hw address.
1341 	 *
1342 	 * The squery hw address should *not* be accessed.
1343 	 *
1344 	 * See ar_entry_squery() in arp.c for an example of how all this works.
1345 	 */
1346 
1347 	mp->b_cont = mp_tail;
1348 	return (mp);
1349 }
1350 
1351 /*
1352  * Create a DLPI message; for DL_{ENAB,DISAB}MULTI_REQ, room is left for
1353  * the hardware address.
1354  */
1355 static mblk_t *
1356 ill_create_dl(ill_t *ill, uint32_t dl_primitive, uint32_t length,
1357     uint32_t *addr_lenp, uint32_t *addr_offp)
1358 {
1359 	mblk_t	*mp;
1360 	uint32_t	hw_addr_length;
1361 	char		*cp;
1362 	uint32_t	offset;
1363 	uint32_t 	size;
1364 
1365 	*addr_lenp = *addr_offp = 0;
1366 
1367 	hw_addr_length = ill->ill_phys_addr_length;
1368 	if (!hw_addr_length) {
1369 		ip0dbg(("ip_create_dl: hw addr length = 0\n"));
1370 		return (NULL);
1371 	}
1372 
1373 	size = length;
1374 	switch (dl_primitive) {
1375 	case DL_ENABMULTI_REQ:
1376 	case DL_DISABMULTI_REQ:
1377 		size += hw_addr_length;
1378 		break;
1379 	case DL_PROMISCON_REQ:
1380 	case DL_PROMISCOFF_REQ:
1381 		break;
1382 	default:
1383 		return (NULL);
1384 	}
1385 	mp = allocb(size, BPRI_HI);
1386 	if (!mp)
1387 		return (NULL);
1388 	mp->b_wptr += size;
1389 	mp->b_datap->db_type = M_PROTO;
1390 
1391 	cp = (char *)mp->b_rptr;
1392 	offset = length;
1393 
1394 	switch (dl_primitive) {
1395 	case DL_ENABMULTI_REQ: {
1396 		dl_enabmulti_req_t *dl = (dl_enabmulti_req_t *)cp;
1397 
1398 		dl->dl_primitive = dl_primitive;
1399 		dl->dl_addr_offset = offset;
1400 		*addr_lenp = dl->dl_addr_length = hw_addr_length;
1401 		*addr_offp = offset;
1402 		break;
1403 	}
1404 	case DL_DISABMULTI_REQ: {
1405 		dl_disabmulti_req_t *dl = (dl_disabmulti_req_t *)cp;
1406 
1407 		dl->dl_primitive = dl_primitive;
1408 		dl->dl_addr_offset = offset;
1409 		*addr_lenp = dl->dl_addr_length = hw_addr_length;
1410 		*addr_offp = offset;
1411 		break;
1412 	}
1413 	case DL_PROMISCON_REQ:
1414 	case DL_PROMISCOFF_REQ: {
1415 		dl_promiscon_req_t *dl = (dl_promiscon_req_t *)cp;
1416 
1417 		dl->dl_primitive = dl_primitive;
1418 		dl->dl_level = DL_PROMISC_MULTI;
1419 		break;
1420 	}
1421 	}
1422 	ip1dbg(("ill_create_dl: addr_len %d, addr_off %d\n",
1423 	    *addr_lenp, *addr_offp));
1424 	return (mp);
1425 }
1426 
1427 /*
1428  * Writer processing for ip_wput_ctl(): send the DL_{ENAB,DISAB}MULTI_REQ
1429  * messages that had been delayed until we'd heard back from ARP.  One catch:
1430  * we need to ensure that no one else becomes writer on the IPSQ before we've
1431  * received the replies, or they'll incorrectly process our replies as part of
1432  * their unrelated IPSQ operation.  To do this, we start a new IPSQ operation,
1433  * which will complete when we process the reply in ip_rput_dlpi_writer().
1434  */
1435 /* ARGSUSED */
1436 static void
1437 ip_wput_ctl_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *arg)
1438 {
1439 	ill_t *ill = q->q_ptr;
1440 	t_uscalar_t prim = ((union DL_primitives *)mp->b_rptr)->dl_primitive;
1441 
1442 	ASSERT(IAM_WRITER_ILL(ill));
1443 	ASSERT(prim == DL_ENABMULTI_REQ || prim == DL_DISABMULTI_REQ);
1444 	ip1dbg(("ip_wput_ctl_writer: %s\n", dl_primstr(prim)));
1445 
1446 	if (prim == DL_ENABMULTI_REQ) {
1447 		/* Track the state if this is the first enabmulti */
1448 		if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN)
1449 			ill->ill_dlpi_multicast_state = IDS_INPROGRESS;
1450 	}
1451 
1452 	ipsq_current_start(ipsq, ill->ill_ipif, 0);
1453 	ill_dlpi_send(ill, mp);
1454 }
1455 
1456 void
1457 ip_wput_ctl(queue_t *q, mblk_t *mp)
1458 {
1459 	ill_t	*ill = q->q_ptr;
1460 	mblk_t	*dlmp = mp->b_cont;
1461 	area_t	*area = (area_t *)mp->b_rptr;
1462 	t_uscalar_t prim;
1463 
1464 	/* Check that we have an AR_ENTRY_SQUERY with a tacked on mblk */
1465 	if (MBLKL(mp) < sizeof (area_t) || area->area_cmd != AR_ENTRY_SQUERY ||
1466 	    dlmp == NULL) {
1467 		putnext(q, mp);
1468 		return;
1469 	}
1470 
1471 	/* Check that the tacked on mblk is a DL_{DISAB,ENAB}MULTI_REQ */
1472 	prim = ((union DL_primitives *)dlmp->b_rptr)->dl_primitive;
1473 	if (prim != DL_DISABMULTI_REQ && prim != DL_ENABMULTI_REQ) {
1474 		putnext(q, mp);
1475 		return;
1476 	}
1477 	freeb(mp);
1478 
1479 	/* See comments above ip_wput_ctl_writer() for details */
1480 	ill_refhold(ill);
1481 	qwriter_ip(ill, ill->ill_wq, dlmp, ip_wput_ctl_writer, NEW_OP, B_FALSE);
1482 }
1483 
1484 /*
1485  * Rejoin any groups which have been explicitly joined by the application (we
1486  * left all explicitly joined groups as part of ill_leave_multicast() prior to
1487  * bringing the interface down).  Note that because groups can be joined and
1488  * left while an interface is down, this may not be the same set of groups
1489  * that we left in ill_leave_multicast().
1490  */
1491 void
1492 ill_recover_multicast(ill_t *ill)
1493 {
1494 	ilm_t	*ilm;
1495 	ipif_t	*ipif = ill->ill_ipif;
1496 	char    addrbuf[INET6_ADDRSTRLEN];
1497 
1498 	ASSERT(IAM_WRITER_ILL(ill));
1499 
1500 	ill->ill_need_recover_multicast = 0;
1501 
1502 	ill_ilm_walker_hold(ill);
1503 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1504 		/*
1505 		 * Check how many ipif's that have members in this group -
1506 		 * if more then one we make sure that this entry is first
1507 		 * in the list.
1508 		 */
1509 		if (ilm_numentries_v6(ill, &ilm->ilm_v6addr) > 1 &&
1510 		    ilm_lookup_ill_v6(ill, &ilm->ilm_v6addr, B_TRUE,
1511 		    ALL_ZONES) != ilm) {
1512 			continue;
1513 		}
1514 
1515 		ip1dbg(("ill_recover_multicast: %s\n", inet_ntop(AF_INET6,
1516 		    &ilm->ilm_v6addr, addrbuf, sizeof (addrbuf))));
1517 
1518 		if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
1519 			(void) ill_join_allmulti(ill);
1520 		} else {
1521 			if (ill->ill_isv6)
1522 				mld_joingroup(ilm);
1523 			else
1524 				igmp_joingroup(ilm);
1525 
1526 			(void) ip_ll_addmulti_v6(ipif, &ilm->ilm_v6addr);
1527 		}
1528 	}
1529 	ill_ilm_walker_rele(ill);
1530 
1531 }
1532 
1533 /*
1534  * The opposite of ill_recover_multicast() -- leaves all multicast groups
1535  * that were explicitly joined.
1536  */
1537 void
1538 ill_leave_multicast(ill_t *ill)
1539 {
1540 	ilm_t	*ilm;
1541 	ipif_t	*ipif = ill->ill_ipif;
1542 	char    addrbuf[INET6_ADDRSTRLEN];
1543 
1544 	ASSERT(IAM_WRITER_ILL(ill));
1545 
1546 	ill->ill_need_recover_multicast = 1;
1547 
1548 	ill_ilm_walker_hold(ill);
1549 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1550 		/*
1551 		 * Check how many ipif's that have members in this group -
1552 		 * if more then one we make sure that this entry is first
1553 		 * in the list.
1554 		 */
1555 		if (ilm_numentries_v6(ill, &ilm->ilm_v6addr) > 1 &&
1556 		    ilm_lookup_ill_v6(ill, &ilm->ilm_v6addr, B_TRUE,
1557 		    ALL_ZONES) != ilm) {
1558 			continue;
1559 		}
1560 
1561 		ip1dbg(("ill_leave_multicast: %s\n", inet_ntop(AF_INET6,
1562 		    &ilm->ilm_v6addr, addrbuf, sizeof (addrbuf))));
1563 
1564 		if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
1565 			ill_leave_allmulti(ill);
1566 		} else {
1567 			if (ill->ill_isv6)
1568 				mld_leavegroup(ilm);
1569 			else
1570 				igmp_leavegroup(ilm);
1571 
1572 			(void) ip_ll_delmulti_v6(ipif, &ilm->ilm_v6addr);
1573 		}
1574 	}
1575 	ill_ilm_walker_rele(ill);
1576 }
1577 
1578 /* Find an ilm for matching the ill */
1579 ilm_t *
1580 ilm_lookup_ill(ill_t *ill, ipaddr_t group, zoneid_t zoneid)
1581 {
1582 	in6_addr_t	v6group;
1583 
1584 	/*
1585 	 * INADDR_ANY is represented as the IPv6 unspecified addr.
1586 	 */
1587 	if (group == INADDR_ANY)
1588 		v6group = ipv6_all_zeros;
1589 	else
1590 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1591 
1592 	return (ilm_lookup_ill_v6(ill, &v6group, B_TRUE, zoneid));
1593 }
1594 
1595 /*
1596  * Find an ilm for address `v6group' on `ill' and zone `zoneid' (which may be
1597  * ALL_ZONES).  In general, if `ill' is in an IPMP group, we will match
1598  * against any ill in the group.  However, if `restrict_solicited' is set,
1599  * then specifically for IPv6 solicited-node multicast, the match will be
1600  * restricted to the specified `ill'.
1601  */
1602 ilm_t *
1603 ilm_lookup_ill_v6(ill_t *ill, const in6_addr_t *v6group,
1604     boolean_t restrict_solicited, zoneid_t zoneid)
1605 {
1606 	ilm_t	*ilm;
1607 	ilm_walker_t ilw;
1608 	boolean_t restrict_ill = B_FALSE;
1609 
1610 	/*
1611 	 * In general, underlying interfaces cannot have multicast memberships
1612 	 * and thus lookups always match across the illgrp.  However, we must
1613 	 * allow IPv6 solicited-node multicast memberships on underlying
1614 	 * interfaces, and thus an IPMP meta-interface and one of its
1615 	 * underlying ills may have the same solicited-node multicast address.
1616 	 * In that case, we need to restrict the lookup to the requested ill.
1617 	 * However, we may receive packets on an underlying interface that
1618 	 * are for the corresponding IPMP interface's solicited-node multicast
1619 	 * address, and thus in that case we need to match across the group --
1620 	 * hence the unfortunate `restrict_solicited' argument.
1621 	 */
1622 	if (IN6_IS_ADDR_MC_SOLICITEDNODE(v6group) && restrict_solicited)
1623 		restrict_ill = (IS_IPMP(ill) || IS_UNDER_IPMP(ill));
1624 
1625 	ilm = ilm_walker_start(&ilw, ill);
1626 	for (; ilm != NULL; ilm = ilm_walker_step(&ilw, ilm)) {
1627 		if (!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group))
1628 			continue;
1629 		if (zoneid != ALL_ZONES && zoneid != ilm->ilm_zoneid)
1630 			continue;
1631 		if (!restrict_ill || ill == (ill->ill_isv6 ?
1632 		    ilm->ilm_ill : ilm->ilm_ipif->ipif_ill)) {
1633 			break;
1634 		}
1635 	}
1636 	ilm_walker_finish(&ilw);
1637 	return (ilm);
1638 }
1639 
1640 /*
1641  * Find an ilm for the ipif. Only needed for IPv4 which does
1642  * ipif specific socket options.
1643  */
1644 ilm_t *
1645 ilm_lookup_ipif(ipif_t *ipif, ipaddr_t group)
1646 {
1647 	ilm_t *ilm;
1648 	ilm_walker_t ilw;
1649 
1650 	ilm = ilm_walker_start(&ilw, ipif->ipif_ill);
1651 	for (; ilm != NULL; ilm = ilm_walker_step(&ilw, ilm)) {
1652 		if (ilm->ilm_ipif == ipif && ilm->ilm_addr == group)
1653 			break;
1654 	}
1655 	ilm_walker_finish(&ilw);
1656 	return (ilm);
1657 }
1658 
1659 /*
1660  * How many members on this ill?
1661  */
1662 int
1663 ilm_numentries_v6(ill_t *ill, const in6_addr_t *v6group)
1664 {
1665 	ilm_t	*ilm;
1666 	int i = 0;
1667 
1668 	mutex_enter(&ill->ill_lock);
1669 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1670 		if (ilm->ilm_flags & ILM_DELETED)
1671 			continue;
1672 		if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group)) {
1673 			i++;
1674 		}
1675 	}
1676 	mutex_exit(&ill->ill_lock);
1677 	return (i);
1678 }
1679 
1680 /* Caller guarantees that the group is not already on the list */
1681 static ilm_t *
1682 ilm_add_v6(ipif_t *ipif, const in6_addr_t *v6group, ilg_stat_t ilgstat,
1683     mcast_record_t ilg_fmode, slist_t *ilg_flist, zoneid_t zoneid)
1684 {
1685 	ill_t	*ill = ipif->ipif_ill;
1686 	ilm_t	*ilm;
1687 	ilm_t	*ilm_cur;
1688 	ilm_t	**ilm_ptpn;
1689 
1690 	ASSERT(IAM_WRITER_IPIF(ipif));
1691 
1692 	ilm = GETSTRUCT(ilm_t, 1);
1693 	if (ilm == NULL)
1694 		return (NULL);
1695 	if (ilgstat != ILGSTAT_NONE && !SLIST_IS_EMPTY(ilg_flist)) {
1696 		ilm->ilm_filter = l_alloc();
1697 		if (ilm->ilm_filter == NULL) {
1698 			mi_free(ilm);
1699 			return (NULL);
1700 		}
1701 	}
1702 	ilm->ilm_v6addr = *v6group;
1703 	ilm->ilm_refcnt = 1;
1704 	ilm->ilm_zoneid = zoneid;
1705 	ilm->ilm_timer = INFINITY;
1706 	ilm->ilm_rtx.rtx_timer = INFINITY;
1707 
1708 	/*
1709 	 * IPv4 Multicast groups are joined using ipif.
1710 	 * IPv6 Multicast groups are joined using ill.
1711 	 */
1712 	if (ill->ill_isv6) {
1713 		ilm->ilm_ill = ill;
1714 		ilm->ilm_ipif = NULL;
1715 		DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ill,
1716 		    (char *), "ilm", (void *), ilm);
1717 		ill->ill_ilm_cnt++;
1718 	} else {
1719 		ASSERT(ilm->ilm_zoneid == ipif->ipif_zoneid);
1720 		ilm->ilm_ipif = ipif;
1721 		ilm->ilm_ill = NULL;
1722 		DTRACE_PROBE3(ipif__incr__cnt, (ipif_t *), ipif,
1723 		    (char *), "ilm", (void *), ilm);
1724 		ipif->ipif_ilm_cnt++;
1725 	}
1726 
1727 	ASSERT(ill->ill_ipst);
1728 	ilm->ilm_ipst = ill->ill_ipst;	/* No netstack_hold */
1729 
1730 	ASSERT(!(ipif->ipif_state_flags & IPIF_CONDEMNED));
1731 	ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED));
1732 
1733 	/*
1734 	 * Grab lock to give consistent view to readers
1735 	 */
1736 	mutex_enter(&ill->ill_lock);
1737 	/*
1738 	 * All ilms in the same zone are contiguous in the ill_ilm list.
1739 	 * The loops in ip_proto_input() and ip_wput_local() use this to avoid
1740 	 * sending duplicates up when two applications in the same zone join the
1741 	 * same group on different logical interfaces.
1742 	 */
1743 	ilm_cur = ill->ill_ilm;
1744 	ilm_ptpn = &ill->ill_ilm;
1745 	while (ilm_cur != NULL && ilm_cur->ilm_zoneid != ilm->ilm_zoneid) {
1746 		ilm_ptpn = &ilm_cur->ilm_next;
1747 		ilm_cur = ilm_cur->ilm_next;
1748 	}
1749 	ilm->ilm_next = ilm_cur;
1750 	*ilm_ptpn = ilm;
1751 
1752 	/*
1753 	 * If we have an associated ilg, use its filter state; if not,
1754 	 * default to (EXCLUDE, NULL) and set no_ilg_cnt to track this.
1755 	 */
1756 	if (ilgstat != ILGSTAT_NONE) {
1757 		if (!SLIST_IS_EMPTY(ilg_flist))
1758 			l_copy(ilg_flist, ilm->ilm_filter);
1759 		ilm->ilm_fmode = ilg_fmode;
1760 	} else {
1761 		ilm->ilm_no_ilg_cnt = 1;
1762 		ilm->ilm_fmode = MODE_IS_EXCLUDE;
1763 	}
1764 
1765 	mutex_exit(&ill->ill_lock);
1766 	return (ilm);
1767 }
1768 
1769 void
1770 ilm_inactive(ilm_t *ilm)
1771 {
1772 	FREE_SLIST(ilm->ilm_filter);
1773 	FREE_SLIST(ilm->ilm_pendsrcs);
1774 	FREE_SLIST(ilm->ilm_rtx.rtx_allow);
1775 	FREE_SLIST(ilm->ilm_rtx.rtx_block);
1776 	ilm->ilm_ipst = NULL;
1777 	mi_free((char *)ilm);
1778 }
1779 
1780 void
1781 ilm_walker_cleanup(ill_t *ill)
1782 {
1783 	ilm_t	**ilmp;
1784 	ilm_t	*ilm;
1785 	boolean_t need_wakeup = B_FALSE;
1786 
1787 	ASSERT(MUTEX_HELD(&ill->ill_lock));
1788 	ASSERT(ill->ill_ilm_walker_cnt == 0);
1789 
1790 	ilmp = &ill->ill_ilm;
1791 	while (*ilmp != NULL) {
1792 		if ((*ilmp)->ilm_flags & ILM_DELETED) {
1793 			ilm = *ilmp;
1794 			*ilmp = ilm->ilm_next;
1795 			/*
1796 			 * check if there are any pending FREE or unplumb
1797 			 * operations that need to be restarted.
1798 			 */
1799 			if (ilm->ilm_ipif != NULL) {
1800 				/*
1801 				 * IPv4 ilms hold a ref on the ipif.
1802 				 */
1803 				DTRACE_PROBE3(ipif__decr__cnt,
1804 				    (ipif_t *), ilm->ilm_ipif,
1805 				    (char *), "ilm", (void *), ilm);
1806 				ilm->ilm_ipif->ipif_ilm_cnt--;
1807 				if (IPIF_FREE_OK(ilm->ilm_ipif))
1808 					need_wakeup = B_TRUE;
1809 			} else {
1810 				/*
1811 				 * IPv6 ilms hold a ref on the ill.
1812 				 */
1813 				ASSERT(ilm->ilm_ill == ill);
1814 				DTRACE_PROBE3(ill__decr__cnt,
1815 				    (ill_t *), ill,
1816 				    (char *), "ilm", (void *), ilm);
1817 				ASSERT(ill->ill_ilm_cnt > 0);
1818 				ill->ill_ilm_cnt--;
1819 				if (ILL_FREE_OK(ill))
1820 					need_wakeup = B_TRUE;
1821 			}
1822 			ilm_inactive(ilm); /* frees ilm */
1823 		} else {
1824 			ilmp = &(*ilmp)->ilm_next;
1825 		}
1826 	}
1827 	ill->ill_ilm_cleanup_reqd = 0;
1828 	if (need_wakeup)
1829 		ipif_ill_refrele_tail(ill);
1830 	else
1831 		mutex_exit(&ill->ill_lock);
1832 }
1833 
1834 /*
1835  * Unlink ilm and free it.
1836  */
1837 static void
1838 ilm_delete(ilm_t *ilm)
1839 {
1840 	ill_t		*ill;
1841 	ilm_t		**ilmp;
1842 	boolean_t	need_wakeup;
1843 
1844 
1845 	if (ilm->ilm_ipif != NULL) {
1846 		ASSERT(IAM_WRITER_IPIF(ilm->ilm_ipif));
1847 		ASSERT(ilm->ilm_ill == NULL);
1848 		ill = ilm->ilm_ipif->ipif_ill;
1849 		ASSERT(!ill->ill_isv6);
1850 	} else {
1851 		ASSERT(IAM_WRITER_ILL(ilm->ilm_ill));
1852 		ASSERT(ilm->ilm_ipif == NULL);
1853 		ill = ilm->ilm_ill;
1854 		ASSERT(ill->ill_isv6);
1855 	}
1856 	/*
1857 	 * Delete under lock protection so that readers don't stumble
1858 	 * on bad ilm_next
1859 	 */
1860 	mutex_enter(&ill->ill_lock);
1861 	if (ill->ill_ilm_walker_cnt != 0) {
1862 		ilm->ilm_flags |= ILM_DELETED;
1863 		ill->ill_ilm_cleanup_reqd = 1;
1864 		mutex_exit(&ill->ill_lock);
1865 		return;
1866 	}
1867 
1868 	for (ilmp = &ill->ill_ilm; *ilmp != ilm; ilmp = &(*ilmp)->ilm_next)
1869 				;
1870 	*ilmp = ilm->ilm_next;
1871 
1872 	/*
1873 	 * if we are the last reference to the ipif (for IPv4 ilms)
1874 	 * or the ill (for IPv6 ilms), we may need to wakeup any
1875 	 * pending FREE or unplumb operations.
1876 	 */
1877 	need_wakeup = B_FALSE;
1878 	if (ilm->ilm_ipif != NULL) {
1879 		DTRACE_PROBE3(ipif__decr__cnt, (ipif_t *), ilm->ilm_ipif,
1880 		    (char *), "ilm", (void *), ilm);
1881 		ilm->ilm_ipif->ipif_ilm_cnt--;
1882 		if (IPIF_FREE_OK(ilm->ilm_ipif))
1883 			need_wakeup = B_TRUE;
1884 	} else {
1885 		DTRACE_PROBE3(ill__decr__cnt, (ill_t *), ill,
1886 		    (char *), "ilm", (void *), ilm);
1887 		ASSERT(ill->ill_ilm_cnt > 0);
1888 		ill->ill_ilm_cnt--;
1889 		if (ILL_FREE_OK(ill))
1890 			need_wakeup = B_TRUE;
1891 	}
1892 
1893 	ilm_inactive(ilm); /* frees this ilm */
1894 
1895 	if (need_wakeup) {
1896 		/* drops ill lock */
1897 		ipif_ill_refrele_tail(ill);
1898 	} else {
1899 		mutex_exit(&ill->ill_lock);
1900 	}
1901 }
1902 
1903 /* Increment the ILM walker count for `ill' */
1904 static void
1905 ill_ilm_walker_hold(ill_t *ill)
1906 {
1907 	mutex_enter(&ill->ill_lock);
1908 	ill->ill_ilm_walker_cnt++;
1909 	mutex_exit(&ill->ill_lock);
1910 }
1911 
1912 /* Decrement the ILM walker count for `ill' */
1913 static void
1914 ill_ilm_walker_rele(ill_t *ill)
1915 {
1916 	mutex_enter(&ill->ill_lock);
1917 	ill->ill_ilm_walker_cnt--;
1918 	if (ill->ill_ilm_walker_cnt == 0 && ill->ill_ilm_cleanup_reqd)
1919 		ilm_walker_cleanup(ill);	/* drops ill_lock */
1920 	else
1921 		mutex_exit(&ill->ill_lock);
1922 }
1923 
1924 /*
1925  * Start walking the ILMs associated with `ill'; the first ILM in the walk
1926  * (if any) is returned.  State associated with the walk is stored in `ilw'.
1927  * Note that walks associated with interfaces under IPMP also walk the ILMs
1928  * on the associated IPMP interface; this is handled transparently to callers
1929  * via ilm_walker_step().  (Usually with IPMP all ILMs will be on the IPMP
1930  * interface; the only exception is to support IPv6 test addresses, which
1931  * require ILMs for their associated solicited-node multicast addresses.)
1932  */
1933 ilm_t *
1934 ilm_walker_start(ilm_walker_t *ilw, ill_t *ill)
1935 {
1936 	ilw->ilw_ill = ill;
1937 	if (IS_UNDER_IPMP(ill))
1938 		ilw->ilw_ipmp_ill = ipmp_ill_hold_ipmp_ill(ill);
1939 	else
1940 		ilw->ilw_ipmp_ill = NULL;
1941 
1942 	ill_ilm_walker_hold(ill);
1943 	if (ilw->ilw_ipmp_ill != NULL)
1944 		ill_ilm_walker_hold(ilw->ilw_ipmp_ill);
1945 
1946 	if (ilw->ilw_ipmp_ill != NULL && ilw->ilw_ipmp_ill->ill_ilm != NULL)
1947 		ilw->ilw_walk_ill = ilw->ilw_ipmp_ill;
1948 	else
1949 		ilw->ilw_walk_ill = ilw->ilw_ill;
1950 
1951 	return (ilm_walker_step(ilw, NULL));
1952 }
1953 
1954 /*
1955  * Helper function for ilm_walker_step() that returns the next ILM
1956  * associated with `ilw', regardless of whether it's deleted.
1957  */
1958 static ilm_t *
1959 ilm_walker_step_all(ilm_walker_t *ilw, ilm_t *ilm)
1960 {
1961 	if (ilm == NULL)
1962 		return (ilw->ilw_walk_ill->ill_ilm);
1963 
1964 	if (ilm->ilm_next != NULL)
1965 		return (ilm->ilm_next);
1966 
1967 	if (ilw->ilw_ipmp_ill != NULL && IS_IPMP(ilw->ilw_walk_ill)) {
1968 		ilw->ilw_walk_ill = ilw->ilw_ill;
1969 		/*
1970 		 * It's possible that ilw_ill left the group during our walk,
1971 		 * so we can't ASSERT() that it's under IPMP.  Callers that
1972 		 * care will be writer on the IPSQ anyway.
1973 		 */
1974 		return (ilw->ilw_walk_ill->ill_ilm);
1975 	}
1976 	return (NULL);
1977 }
1978 
1979 /*
1980  * Step to the next ILM associated with `ilw'.
1981  */
1982 ilm_t *
1983 ilm_walker_step(ilm_walker_t *ilw, ilm_t *ilm)
1984 {
1985 	while ((ilm = ilm_walker_step_all(ilw, ilm)) != NULL) {
1986 		if (!(ilm->ilm_flags & ILM_DELETED))
1987 			break;
1988 	}
1989 	return (ilm);
1990 }
1991 
1992 /*
1993  * Finish the ILM walk associated with `ilw'.
1994  */
1995 void
1996 ilm_walker_finish(ilm_walker_t *ilw)
1997 {
1998 	ill_ilm_walker_rele(ilw->ilw_ill);
1999 	if (ilw->ilw_ipmp_ill != NULL) {
2000 		ill_ilm_walker_rele(ilw->ilw_ipmp_ill);
2001 		ill_refrele(ilw->ilw_ipmp_ill);
2002 	}
2003 	bzero(&ilw, sizeof (ilw));
2004 }
2005 
2006 /*
2007  * Looks up the appropriate ipif given a v4 multicast group and interface
2008  * address.  On success, returns 0, with *ipifpp pointing to the found
2009  * struct.  On failure, returns an errno and *ipifpp is NULL.
2010  */
2011 int
2012 ip_opt_check(conn_t *connp, ipaddr_t group, ipaddr_t src, ipaddr_t ifaddr,
2013     uint_t *ifindexp, mblk_t *first_mp, ipsq_func_t func, ipif_t **ipifpp)
2014 {
2015 	ipif_t *ipif;
2016 	int err = 0;
2017 	zoneid_t zoneid;
2018 	ip_stack_t	*ipst =  connp->conn_netstack->netstack_ip;
2019 
2020 	if (!CLASSD(group) || CLASSD(src)) {
2021 		return (EINVAL);
2022 	}
2023 	*ipifpp = NULL;
2024 
2025 	zoneid = IPCL_ZONEID(connp);
2026 
2027 	ASSERT(!(ifaddr != INADDR_ANY && ifindexp != NULL && *ifindexp != 0));
2028 	if (ifaddr != INADDR_ANY) {
2029 		ipif = ipif_lookup_addr(ifaddr, NULL, zoneid,
2030 		    CONNP_TO_WQ(connp), first_mp, func, &err, ipst);
2031 		if (err != 0 && err != EINPROGRESS)
2032 			err = EADDRNOTAVAIL;
2033 	} else if (ifindexp != NULL && *ifindexp != 0) {
2034 		ipif = ipif_lookup_on_ifindex(*ifindexp, B_FALSE, zoneid,
2035 		    CONNP_TO_WQ(connp), first_mp, func, &err, ipst);
2036 	} else {
2037 		ipif = ipif_lookup_group(group, zoneid, ipst);
2038 		if (ipif == NULL)
2039 			return (EADDRNOTAVAIL);
2040 	}
2041 	if (ipif == NULL)
2042 		return (err);
2043 
2044 	*ipifpp = ipif;
2045 	return (0);
2046 }
2047 
2048 /*
2049  * Looks up the appropriate ill (or ipif if v4mapped) given an interface
2050  * index and IPv6 multicast group.  On success, returns 0, with *illpp (or
2051  * *ipifpp if v4mapped) pointing to the found struct.  On failure, returns
2052  * an errno and *illpp and *ipifpp are undefined.
2053  */
2054 int
2055 ip_opt_check_v6(conn_t *connp, const in6_addr_t *v6group, ipaddr_t *v4group,
2056     const in6_addr_t *v6src, ipaddr_t *v4src, boolean_t *isv6, int ifindex,
2057     mblk_t *first_mp, ipsq_func_t func, ill_t **illpp, ipif_t **ipifpp)
2058 {
2059 	boolean_t src_unspec;
2060 	ill_t *ill = NULL;
2061 	ipif_t *ipif = NULL;
2062 	int err;
2063 	zoneid_t zoneid = connp->conn_zoneid;
2064 	queue_t *wq = CONNP_TO_WQ(connp);
2065 	ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
2066 
2067 	src_unspec = IN6_IS_ADDR_UNSPECIFIED(v6src);
2068 
2069 	if (IN6_IS_ADDR_V4MAPPED(v6group)) {
2070 		if (!IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec)
2071 			return (EINVAL);
2072 		IN6_V4MAPPED_TO_IPADDR(v6group, *v4group);
2073 		if (src_unspec) {
2074 			*v4src = INADDR_ANY;
2075 		} else {
2076 			IN6_V4MAPPED_TO_IPADDR(v6src, *v4src);
2077 		}
2078 		if (!CLASSD(*v4group) || CLASSD(*v4src))
2079 			return (EINVAL);
2080 		*ipifpp = NULL;
2081 		*isv6 = B_FALSE;
2082 	} else {
2083 		if (IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec)
2084 			return (EINVAL);
2085 		if (!IN6_IS_ADDR_MULTICAST(v6group) ||
2086 		    IN6_IS_ADDR_MULTICAST(v6src)) {
2087 			return (EINVAL);
2088 		}
2089 		*illpp = NULL;
2090 		*isv6 = B_TRUE;
2091 	}
2092 
2093 	if (ifindex == 0) {
2094 		if (*isv6)
2095 			ill = ill_lookup_group_v6(v6group, zoneid, ipst);
2096 		else
2097 			ipif = ipif_lookup_group(*v4group, zoneid, ipst);
2098 		if (ill == NULL && ipif == NULL)
2099 			return (EADDRNOTAVAIL);
2100 	} else {
2101 		if (*isv6) {
2102 			ill = ill_lookup_on_ifindex(ifindex, B_TRUE,
2103 			    wq, first_mp, func, &err, ipst);
2104 			if (ill != NULL &&
2105 			    !ipif_lookup_zoneid(ill, zoneid, 0, NULL)) {
2106 				ill_refrele(ill);
2107 				ill = NULL;
2108 				err = EADDRNOTAVAIL;
2109 			}
2110 		} else {
2111 			ipif = ipif_lookup_on_ifindex(ifindex, B_FALSE,
2112 			    zoneid, wq, first_mp, func, &err, ipst);
2113 		}
2114 		if (ill == NULL && ipif == NULL)
2115 			return (err);
2116 	}
2117 
2118 	*ipifpp = ipif;
2119 	*illpp = ill;
2120 	return (0);
2121 }
2122 
2123 static int
2124 ip_get_srcfilter(conn_t *connp, struct group_filter *gf,
2125     struct ip_msfilter *imsf, ipaddr_t grp, ipif_t *ipif, boolean_t isv4mapped)
2126 {
2127 	ilg_t *ilg;
2128 	int i, numsrc, fmode, outsrcs;
2129 	struct sockaddr_in *sin;
2130 	struct sockaddr_in6 *sin6;
2131 	struct in_addr *addrp;
2132 	slist_t *fp;
2133 	boolean_t is_v4only_api;
2134 
2135 	mutex_enter(&connp->conn_lock);
2136 
2137 	ilg = ilg_lookup_ipif(connp, grp, ipif);
2138 	if (ilg == NULL) {
2139 		mutex_exit(&connp->conn_lock);
2140 		return (EADDRNOTAVAIL);
2141 	}
2142 
2143 	if (gf == NULL) {
2144 		ASSERT(imsf != NULL);
2145 		ASSERT(!isv4mapped);
2146 		is_v4only_api = B_TRUE;
2147 		outsrcs = imsf->imsf_numsrc;
2148 	} else {
2149 		ASSERT(imsf == NULL);
2150 		is_v4only_api = B_FALSE;
2151 		outsrcs = gf->gf_numsrc;
2152 	}
2153 
2154 	/*
2155 	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
2156 	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
2157 	 * So we need to translate here.
2158 	 */
2159 	fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ?
2160 	    MCAST_INCLUDE : MCAST_EXCLUDE;
2161 	if ((fp = ilg->ilg_filter) == NULL) {
2162 		numsrc = 0;
2163 	} else {
2164 		for (i = 0; i < outsrcs; i++) {
2165 			if (i == fp->sl_numsrc)
2166 				break;
2167 			if (isv4mapped) {
2168 				sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i];
2169 				sin6->sin6_family = AF_INET6;
2170 				sin6->sin6_addr = fp->sl_addr[i];
2171 			} else {
2172 				if (is_v4only_api) {
2173 					addrp = &imsf->imsf_slist[i];
2174 				} else {
2175 					sin = (struct sockaddr_in *)
2176 					    &gf->gf_slist[i];
2177 					sin->sin_family = AF_INET;
2178 					addrp = &sin->sin_addr;
2179 				}
2180 				IN6_V4MAPPED_TO_INADDR(&fp->sl_addr[i], addrp);
2181 			}
2182 		}
2183 		numsrc = fp->sl_numsrc;
2184 	}
2185 
2186 	if (is_v4only_api) {
2187 		imsf->imsf_numsrc = numsrc;
2188 		imsf->imsf_fmode = fmode;
2189 	} else {
2190 		gf->gf_numsrc = numsrc;
2191 		gf->gf_fmode = fmode;
2192 	}
2193 
2194 	mutex_exit(&connp->conn_lock);
2195 
2196 	return (0);
2197 }
2198 
2199 static int
2200 ip_get_srcfilter_v6(conn_t *connp, struct group_filter *gf,
2201     const struct in6_addr *grp, ill_t *ill)
2202 {
2203 	ilg_t *ilg;
2204 	int i;
2205 	struct sockaddr_storage *sl;
2206 	struct sockaddr_in6 *sin6;
2207 	slist_t *fp;
2208 
2209 	mutex_enter(&connp->conn_lock);
2210 
2211 	ilg = ilg_lookup_ill_v6(connp, grp, ill);
2212 	if (ilg == NULL) {
2213 		mutex_exit(&connp->conn_lock);
2214 		return (EADDRNOTAVAIL);
2215 	}
2216 
2217 	/*
2218 	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
2219 	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
2220 	 * So we need to translate here.
2221 	 */
2222 	gf->gf_fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ?
2223 	    MCAST_INCLUDE : MCAST_EXCLUDE;
2224 	if ((fp = ilg->ilg_filter) == NULL) {
2225 		gf->gf_numsrc = 0;
2226 	} else {
2227 		for (i = 0, sl = gf->gf_slist; i < gf->gf_numsrc; i++, sl++) {
2228 			if (i == fp->sl_numsrc)
2229 				break;
2230 			sin6 = (struct sockaddr_in6 *)sl;
2231 			sin6->sin6_family = AF_INET6;
2232 			sin6->sin6_addr = fp->sl_addr[i];
2233 		}
2234 		gf->gf_numsrc = fp->sl_numsrc;
2235 	}
2236 
2237 	mutex_exit(&connp->conn_lock);
2238 
2239 	return (0);
2240 }
2241 
2242 static int
2243 ip_set_srcfilter(conn_t *connp, struct group_filter *gf,
2244     struct ip_msfilter *imsf, ipaddr_t grp, ipif_t *ipif, boolean_t isv4mapped)
2245 {
2246 	ilg_t *ilg;
2247 	int i, err, infmode, new_fmode;
2248 	uint_t insrcs;
2249 	struct sockaddr_in *sin;
2250 	struct sockaddr_in6 *sin6;
2251 	struct in_addr *addrp;
2252 	slist_t *orig_filter = NULL;
2253 	slist_t *new_filter = NULL;
2254 	mcast_record_t orig_fmode;
2255 	boolean_t leave_grp, is_v4only_api;
2256 	ilg_stat_t ilgstat;
2257 
2258 	if (gf == NULL) {
2259 		ASSERT(imsf != NULL);
2260 		ASSERT(!isv4mapped);
2261 		is_v4only_api = B_TRUE;
2262 		insrcs = imsf->imsf_numsrc;
2263 		infmode = imsf->imsf_fmode;
2264 	} else {
2265 		ASSERT(imsf == NULL);
2266 		is_v4only_api = B_FALSE;
2267 		insrcs = gf->gf_numsrc;
2268 		infmode = gf->gf_fmode;
2269 	}
2270 
2271 	/* Make sure we can handle the source list */
2272 	if (insrcs > MAX_FILTER_SIZE)
2273 		return (ENOBUFS);
2274 
2275 	/*
2276 	 * setting the filter to (INCLUDE, NULL) is treated
2277 	 * as a request to leave the group.
2278 	 */
2279 	leave_grp = (infmode == MCAST_INCLUDE && insrcs == 0);
2280 
2281 	ASSERT(IAM_WRITER_IPIF(ipif));
2282 
2283 	mutex_enter(&connp->conn_lock);
2284 
2285 	ilg = ilg_lookup_ipif(connp, grp, ipif);
2286 	if (ilg == NULL) {
2287 		/*
2288 		 * if the request was actually to leave, and we
2289 		 * didn't find an ilg, there's nothing to do.
2290 		 */
2291 		if (!leave_grp)
2292 			ilg = conn_ilg_alloc(connp, &err);
2293 		if (leave_grp || ilg == NULL) {
2294 			mutex_exit(&connp->conn_lock);
2295 			return (leave_grp ? 0 : err);
2296 		}
2297 		ilgstat = ILGSTAT_NEW;
2298 		IN6_IPADDR_TO_V4MAPPED(grp, &ilg->ilg_v6group);
2299 		ilg->ilg_ipif = ipif;
2300 		ilg->ilg_ill = NULL;
2301 	} else if (leave_grp) {
2302 		ilg_delete(connp, ilg, NULL);
2303 		mutex_exit(&connp->conn_lock);
2304 		(void) ip_delmulti(grp, ipif, B_FALSE, B_TRUE);
2305 		return (0);
2306 	} else {
2307 		ilgstat = ILGSTAT_CHANGE;
2308 		/* Preserve existing state in case ip_addmulti() fails */
2309 		orig_fmode = ilg->ilg_fmode;
2310 		if (ilg->ilg_filter == NULL) {
2311 			orig_filter = NULL;
2312 		} else {
2313 			orig_filter = l_alloc_copy(ilg->ilg_filter);
2314 			if (orig_filter == NULL) {
2315 				mutex_exit(&connp->conn_lock);
2316 				return (ENOMEM);
2317 			}
2318 		}
2319 	}
2320 
2321 	/*
2322 	 * Alloc buffer to copy new state into (see below) before
2323 	 * we make any changes, so we can bail if it fails.
2324 	 */
2325 	if ((new_filter = l_alloc()) == NULL) {
2326 		mutex_exit(&connp->conn_lock);
2327 		err = ENOMEM;
2328 		goto free_and_exit;
2329 	}
2330 
2331 	if (insrcs == 0) {
2332 		CLEAR_SLIST(ilg->ilg_filter);
2333 	} else {
2334 		slist_t *fp;
2335 		if (ilg->ilg_filter == NULL) {
2336 			fp = l_alloc();
2337 			if (fp == NULL) {
2338 				if (ilgstat == ILGSTAT_NEW)
2339 					ilg_delete(connp, ilg, NULL);
2340 				mutex_exit(&connp->conn_lock);
2341 				err = ENOMEM;
2342 				goto free_and_exit;
2343 			}
2344 		} else {
2345 			fp = ilg->ilg_filter;
2346 		}
2347 		for (i = 0; i < insrcs; i++) {
2348 			if (isv4mapped) {
2349 				sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i];
2350 				fp->sl_addr[i] = sin6->sin6_addr;
2351 			} else {
2352 				if (is_v4only_api) {
2353 					addrp = &imsf->imsf_slist[i];
2354 				} else {
2355 					sin = (struct sockaddr_in *)
2356 					    &gf->gf_slist[i];
2357 					addrp = &sin->sin_addr;
2358 				}
2359 				IN6_INADDR_TO_V4MAPPED(addrp, &fp->sl_addr[i]);
2360 			}
2361 		}
2362 		fp->sl_numsrc = insrcs;
2363 		ilg->ilg_filter = fp;
2364 	}
2365 	/*
2366 	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
2367 	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
2368 	 * So we need to translate here.
2369 	 */
2370 	ilg->ilg_fmode = (infmode == MCAST_INCLUDE) ?
2371 	    MODE_IS_INCLUDE : MODE_IS_EXCLUDE;
2372 
2373 	/*
2374 	 * Save copy of ilg's filter state to pass to other functions,
2375 	 * so we can release conn_lock now.
2376 	 */
2377 	new_fmode = ilg->ilg_fmode;
2378 	l_copy(ilg->ilg_filter, new_filter);
2379 
2380 	mutex_exit(&connp->conn_lock);
2381 
2382 	err = ip_addmulti(grp, ipif, ilgstat, new_fmode, new_filter);
2383 	if (err != 0) {
2384 		/*
2385 		 * Restore the original filter state, or delete the
2386 		 * newly-created ilg.  We need to look up the ilg
2387 		 * again, though, since we've not been holding the
2388 		 * conn_lock.
2389 		 */
2390 		mutex_enter(&connp->conn_lock);
2391 		ilg = ilg_lookup_ipif(connp, grp, ipif);
2392 		ASSERT(ilg != NULL);
2393 		if (ilgstat == ILGSTAT_NEW) {
2394 			ilg_delete(connp, ilg, NULL);
2395 		} else {
2396 			ilg->ilg_fmode = orig_fmode;
2397 			if (SLIST_IS_EMPTY(orig_filter)) {
2398 				CLEAR_SLIST(ilg->ilg_filter);
2399 			} else {
2400 				/*
2401 				 * We didn't free the filter, even if we
2402 				 * were trying to make the source list empty;
2403 				 * so if orig_filter isn't empty, the ilg
2404 				 * must still have a filter alloc'd.
2405 				 */
2406 				l_copy(orig_filter, ilg->ilg_filter);
2407 			}
2408 		}
2409 		mutex_exit(&connp->conn_lock);
2410 	}
2411 
2412 free_and_exit:
2413 	l_free(orig_filter);
2414 	l_free(new_filter);
2415 
2416 	return (err);
2417 }
2418 
2419 static int
2420 ip_set_srcfilter_v6(conn_t *connp, struct group_filter *gf,
2421     const struct in6_addr *grp, ill_t *ill)
2422 {
2423 	ilg_t *ilg;
2424 	int i, orig_fmode, new_fmode, err;
2425 	slist_t *orig_filter = NULL;
2426 	slist_t *new_filter = NULL;
2427 	struct sockaddr_storage *sl;
2428 	struct sockaddr_in6 *sin6;
2429 	boolean_t leave_grp;
2430 	ilg_stat_t ilgstat;
2431 
2432 	/* Make sure we can handle the source list */
2433 	if (gf->gf_numsrc > MAX_FILTER_SIZE)
2434 		return (ENOBUFS);
2435 
2436 	/*
2437 	 * setting the filter to (INCLUDE, NULL) is treated
2438 	 * as a request to leave the group.
2439 	 */
2440 	leave_grp = (gf->gf_fmode == MCAST_INCLUDE && gf->gf_numsrc == 0);
2441 
2442 	ASSERT(IAM_WRITER_ILL(ill));
2443 
2444 	mutex_enter(&connp->conn_lock);
2445 	ilg = ilg_lookup_ill_v6(connp, grp, ill);
2446 	if (ilg == NULL) {
2447 		/*
2448 		 * if the request was actually to leave, and we
2449 		 * didn't find an ilg, there's nothing to do.
2450 		 */
2451 		if (!leave_grp)
2452 			ilg = conn_ilg_alloc(connp, &err);
2453 		if (leave_grp || ilg == NULL) {
2454 			mutex_exit(&connp->conn_lock);
2455 			return (leave_grp ? 0 : err);
2456 		}
2457 		ilgstat = ILGSTAT_NEW;
2458 		ilg->ilg_v6group = *grp;
2459 		ilg->ilg_ipif = NULL;
2460 		ilg->ilg_ill = ill;
2461 	} else if (leave_grp) {
2462 		ilg_delete(connp, ilg, NULL);
2463 		mutex_exit(&connp->conn_lock);
2464 		(void) ip_delmulti_v6(grp, ill, connp->conn_zoneid, B_FALSE,
2465 		    B_TRUE);
2466 		return (0);
2467 	} else {
2468 		ilgstat = ILGSTAT_CHANGE;
2469 		/* preserve existing state in case ip_addmulti() fails */
2470 		orig_fmode = ilg->ilg_fmode;
2471 		if (ilg->ilg_filter == NULL) {
2472 			orig_filter = NULL;
2473 		} else {
2474 			orig_filter = l_alloc_copy(ilg->ilg_filter);
2475 			if (orig_filter == NULL) {
2476 				mutex_exit(&connp->conn_lock);
2477 				return (ENOMEM);
2478 			}
2479 		}
2480 	}
2481 
2482 	/*
2483 	 * Alloc buffer to copy new state into (see below) before
2484 	 * we make any changes, so we can bail if it fails.
2485 	 */
2486 	if ((new_filter = l_alloc()) == NULL) {
2487 		mutex_exit(&connp->conn_lock);
2488 		err = ENOMEM;
2489 		goto free_and_exit;
2490 	}
2491 
2492 	if (gf->gf_numsrc == 0) {
2493 		CLEAR_SLIST(ilg->ilg_filter);
2494 	} else {
2495 		slist_t *fp;
2496 		if (ilg->ilg_filter == NULL) {
2497 			fp = l_alloc();
2498 			if (fp == NULL) {
2499 				if (ilgstat == ILGSTAT_NEW)
2500 					ilg_delete(connp, ilg, NULL);
2501 				mutex_exit(&connp->conn_lock);
2502 				err = ENOMEM;
2503 				goto free_and_exit;
2504 			}
2505 		} else {
2506 			fp = ilg->ilg_filter;
2507 		}
2508 		for (i = 0, sl = gf->gf_slist; i < gf->gf_numsrc; i++, sl++) {
2509 			sin6 = (struct sockaddr_in6 *)sl;
2510 			fp->sl_addr[i] = sin6->sin6_addr;
2511 		}
2512 		fp->sl_numsrc = gf->gf_numsrc;
2513 		ilg->ilg_filter = fp;
2514 	}
2515 	/*
2516 	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
2517 	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
2518 	 * So we need to translate here.
2519 	 */
2520 	ilg->ilg_fmode = (gf->gf_fmode == MCAST_INCLUDE) ?
2521 	    MODE_IS_INCLUDE : MODE_IS_EXCLUDE;
2522 
2523 	/*
2524 	 * Save copy of ilg's filter state to pass to other functions,
2525 	 * so we can release conn_lock now.
2526 	 */
2527 	new_fmode = ilg->ilg_fmode;
2528 	l_copy(ilg->ilg_filter, new_filter);
2529 
2530 	mutex_exit(&connp->conn_lock);
2531 
2532 	err = ip_addmulti_v6(grp, ill, connp->conn_zoneid, ilgstat, new_fmode,
2533 	    new_filter);
2534 	if (err != 0) {
2535 		/*
2536 		 * Restore the original filter state, or delete the
2537 		 * newly-created ilg.  We need to look up the ilg
2538 		 * again, though, since we've not been holding the
2539 		 * conn_lock.
2540 		 */
2541 		mutex_enter(&connp->conn_lock);
2542 		ilg = ilg_lookup_ill_v6(connp, grp, ill);
2543 		ASSERT(ilg != NULL);
2544 		if (ilgstat == ILGSTAT_NEW) {
2545 			ilg_delete(connp, ilg, NULL);
2546 		} else {
2547 			ilg->ilg_fmode = orig_fmode;
2548 			if (SLIST_IS_EMPTY(orig_filter)) {
2549 				CLEAR_SLIST(ilg->ilg_filter);
2550 			} else {
2551 				/*
2552 				 * We didn't free the filter, even if we
2553 				 * were trying to make the source list empty;
2554 				 * so if orig_filter isn't empty, the ilg
2555 				 * must still have a filter alloc'd.
2556 				 */
2557 				l_copy(orig_filter, ilg->ilg_filter);
2558 			}
2559 		}
2560 		mutex_exit(&connp->conn_lock);
2561 	}
2562 
2563 free_and_exit:
2564 	l_free(orig_filter);
2565 	l_free(new_filter);
2566 
2567 	return (err);
2568 }
2569 
2570 /*
2571  * Process the SIOC[GS]MSFILTER and SIOC[GS]IPMSFILTER ioctls.
2572  */
2573 /* ARGSUSED */
2574 int
2575 ip_sioctl_msfilter(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp,
2576     ip_ioctl_cmd_t *ipip, void *ifreq)
2577 {
2578 	struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
2579 	/* existence verified in ip_wput_nondata() */
2580 	mblk_t *data_mp = mp->b_cont->b_cont;
2581 	int datalen, err, cmd, minsize;
2582 	uint_t expsize = 0;
2583 	conn_t *connp;
2584 	boolean_t isv6, is_v4only_api, getcmd;
2585 	struct sockaddr_in *gsin;
2586 	struct sockaddr_in6 *gsin6;
2587 	ipaddr_t v4grp;
2588 	in6_addr_t v6grp;
2589 	struct group_filter *gf = NULL;
2590 	struct ip_msfilter *imsf = NULL;
2591 	mblk_t *ndp;
2592 
2593 	if (data_mp->b_cont != NULL) {
2594 		if ((ndp = msgpullup(data_mp, -1)) == NULL)
2595 			return (ENOMEM);
2596 		freemsg(data_mp);
2597 		data_mp = ndp;
2598 		mp->b_cont->b_cont = data_mp;
2599 	}
2600 
2601 	cmd = iocp->ioc_cmd;
2602 	getcmd = (cmd == SIOCGIPMSFILTER || cmd == SIOCGMSFILTER);
2603 	is_v4only_api = (cmd == SIOCGIPMSFILTER || cmd == SIOCSIPMSFILTER);
2604 	minsize = (is_v4only_api) ? IP_MSFILTER_SIZE(0) : GROUP_FILTER_SIZE(0);
2605 	datalen = MBLKL(data_mp);
2606 
2607 	if (datalen < minsize)
2608 		return (EINVAL);
2609 
2610 	/*
2611 	 * now we know we have at least have the initial structure,
2612 	 * but need to check for the source list array.
2613 	 */
2614 	if (is_v4only_api) {
2615 		imsf = (struct ip_msfilter *)data_mp->b_rptr;
2616 		isv6 = B_FALSE;
2617 		expsize = IP_MSFILTER_SIZE(imsf->imsf_numsrc);
2618 	} else {
2619 		gf = (struct group_filter *)data_mp->b_rptr;
2620 		if (gf->gf_group.ss_family == AF_INET6) {
2621 			gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2622 			isv6 = !(IN6_IS_ADDR_V4MAPPED(&gsin6->sin6_addr));
2623 		} else {
2624 			isv6 = B_FALSE;
2625 		}
2626 		expsize = GROUP_FILTER_SIZE(gf->gf_numsrc);
2627 	}
2628 	if (datalen < expsize)
2629 		return (EINVAL);
2630 
2631 	connp = Q_TO_CONN(q);
2632 
2633 	/* operation not supported on the virtual network interface */
2634 	if (IS_VNI(ipif->ipif_ill))
2635 		return (EINVAL);
2636 
2637 	if (isv6) {
2638 		ill_t *ill = ipif->ipif_ill;
2639 		ill_refhold(ill);
2640 
2641 		gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2642 		v6grp = gsin6->sin6_addr;
2643 		if (getcmd)
2644 			err = ip_get_srcfilter_v6(connp, gf, &v6grp, ill);
2645 		else
2646 			err = ip_set_srcfilter_v6(connp, gf, &v6grp, ill);
2647 
2648 		ill_refrele(ill);
2649 	} else {
2650 		boolean_t isv4mapped = B_FALSE;
2651 		if (is_v4only_api) {
2652 			v4grp = (ipaddr_t)imsf->imsf_multiaddr.s_addr;
2653 		} else {
2654 			if (gf->gf_group.ss_family == AF_INET) {
2655 				gsin = (struct sockaddr_in *)&gf->gf_group;
2656 				v4grp = (ipaddr_t)gsin->sin_addr.s_addr;
2657 			} else {
2658 				gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2659 				IN6_V4MAPPED_TO_IPADDR(&gsin6->sin6_addr,
2660 				    v4grp);
2661 				isv4mapped = B_TRUE;
2662 			}
2663 		}
2664 		if (getcmd)
2665 			err = ip_get_srcfilter(connp, gf, imsf, v4grp, ipif,
2666 			    isv4mapped);
2667 		else
2668 			err = ip_set_srcfilter(connp, gf, imsf, v4grp, ipif,
2669 			    isv4mapped);
2670 	}
2671 
2672 	return (err);
2673 }
2674 
2675 /*
2676  * Finds the ipif based on information in the ioctl headers.  Needed to make
2677  * ip_process_ioctl() happy (it needs to know the ipif for IPI_WR-flagged
2678  * ioctls prior to calling the ioctl's handler function).
2679  */
2680 int
2681 ip_extract_msfilter(queue_t *q, mblk_t *mp, const ip_ioctl_cmd_t *ipip,
2682     cmd_info_t *ci, ipsq_func_t func)
2683 {
2684 	int cmd = ipip->ipi_cmd;
2685 	int err = 0;
2686 	conn_t *connp;
2687 	ipif_t *ipif;
2688 	/* caller has verified this mblk exists */
2689 	char *dbuf = (char *)mp->b_cont->b_cont->b_rptr;
2690 	struct ip_msfilter *imsf;
2691 	struct group_filter *gf;
2692 	ipaddr_t v4addr, v4grp;
2693 	in6_addr_t v6grp;
2694 	uint32_t index;
2695 	zoneid_t zoneid;
2696 	ip_stack_t *ipst;
2697 
2698 	connp = Q_TO_CONN(q);
2699 	zoneid = connp->conn_zoneid;
2700 	ipst = connp->conn_netstack->netstack_ip;
2701 
2702 	/* don't allow multicast operations on a tcp conn */
2703 	if (IPCL_IS_TCP(connp))
2704 		return (ENOPROTOOPT);
2705 
2706 	if (cmd == SIOCSIPMSFILTER || cmd == SIOCGIPMSFILTER) {
2707 		/* don't allow v4-specific ioctls on v6 socket */
2708 		if (connp->conn_af_isv6)
2709 			return (EAFNOSUPPORT);
2710 
2711 		imsf = (struct ip_msfilter *)dbuf;
2712 		v4addr = imsf->imsf_interface.s_addr;
2713 		v4grp = imsf->imsf_multiaddr.s_addr;
2714 		if (v4addr == INADDR_ANY) {
2715 			ipif = ipif_lookup_group(v4grp, zoneid, ipst);
2716 			if (ipif == NULL)
2717 				err = EADDRNOTAVAIL;
2718 		} else {
2719 			ipif = ipif_lookup_addr(v4addr, NULL, zoneid, q, mp,
2720 			    func, &err, ipst);
2721 		}
2722 	} else {
2723 		boolean_t isv6 = B_FALSE;
2724 		gf = (struct group_filter *)dbuf;
2725 		index = gf->gf_interface;
2726 		if (gf->gf_group.ss_family == AF_INET6) {
2727 			struct sockaddr_in6 *sin6;
2728 			sin6 = (struct sockaddr_in6 *)&gf->gf_group;
2729 			v6grp = sin6->sin6_addr;
2730 			if (IN6_IS_ADDR_V4MAPPED(&v6grp))
2731 				IN6_V4MAPPED_TO_IPADDR(&v6grp, v4grp);
2732 			else
2733 				isv6 = B_TRUE;
2734 		} else if (gf->gf_group.ss_family == AF_INET) {
2735 			struct sockaddr_in *sin;
2736 			sin = (struct sockaddr_in *)&gf->gf_group;
2737 			v4grp = sin->sin_addr.s_addr;
2738 		} else {
2739 			return (EAFNOSUPPORT);
2740 		}
2741 		if (index == 0) {
2742 			if (isv6) {
2743 				ipif = ipif_lookup_group_v6(&v6grp, zoneid,
2744 				    ipst);
2745 			} else {
2746 				ipif = ipif_lookup_group(v4grp, zoneid, ipst);
2747 			}
2748 			if (ipif == NULL)
2749 				err = EADDRNOTAVAIL;
2750 		} else {
2751 			ipif = ipif_lookup_on_ifindex(index, isv6, zoneid,
2752 			    q, mp, func, &err, ipst);
2753 		}
2754 	}
2755 
2756 	ci->ci_ipif = ipif;
2757 	return (err);
2758 }
2759 
2760 /*
2761  * The structures used for the SIOC*MSFILTER ioctls usually must be copied
2762  * in in two stages, as the first copyin tells us the size of the attached
2763  * source buffer.  This function is called by ip_wput_nondata() after the
2764  * first copyin has completed; it figures out how big the second stage
2765  * needs to be, and kicks it off.
2766  *
2767  * In some cases (numsrc < 2), the second copyin is not needed as the
2768  * first one gets a complete structure containing 1 source addr.
2769  *
2770  * The function returns 0 if a second copyin has been started (i.e. there's
2771  * no more work to be done right now), or 1 if the second copyin is not
2772  * needed and ip_wput_nondata() can continue its processing.
2773  */
2774 int
2775 ip_copyin_msfilter(queue_t *q, mblk_t *mp)
2776 {
2777 	struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
2778 	int cmd = iocp->ioc_cmd;
2779 	/* validity of this checked in ip_wput_nondata() */
2780 	mblk_t *mp1 = mp->b_cont->b_cont;
2781 	int copysize = 0;
2782 	int offset;
2783 
2784 	if (cmd == SIOCSMSFILTER || cmd == SIOCGMSFILTER) {
2785 		struct group_filter *gf = (struct group_filter *)mp1->b_rptr;
2786 		if (gf->gf_numsrc >= 2) {
2787 			offset = sizeof (struct group_filter);
2788 			copysize = GROUP_FILTER_SIZE(gf->gf_numsrc) - offset;
2789 		}
2790 	} else {
2791 		struct ip_msfilter *imsf = (struct ip_msfilter *)mp1->b_rptr;
2792 		if (imsf->imsf_numsrc >= 2) {
2793 			offset = sizeof (struct ip_msfilter);
2794 			copysize = IP_MSFILTER_SIZE(imsf->imsf_numsrc) - offset;
2795 		}
2796 	}
2797 	if (copysize > 0) {
2798 		mi_copyin_n(q, mp, offset, copysize);
2799 		return (0);
2800 	}
2801 	return (1);
2802 }
2803 
2804 /*
2805  * Handle the following optmgmt:
2806  *	IP_ADD_MEMBERSHIP		must not have joined already
2807  *	MCAST_JOIN_GROUP		must not have joined already
2808  *	IP_BLOCK_SOURCE			must have joined already
2809  *	MCAST_BLOCK_SOURCE		must have joined already
2810  *	IP_JOIN_SOURCE_GROUP		may have joined already
2811  *	MCAST_JOIN_SOURCE_GROUP		may have joined already
2812  *
2813  * fmode and src parameters may be used to determine which option is
2814  * being set, as follows (the IP_* and MCAST_* versions of each option
2815  * are functionally equivalent):
2816  *	opt			fmode			src
2817  *	IP_ADD_MEMBERSHIP	MODE_IS_EXCLUDE		INADDR_ANY
2818  *	MCAST_JOIN_GROUP	MODE_IS_EXCLUDE		INADDR_ANY
2819  *	IP_BLOCK_SOURCE		MODE_IS_EXCLUDE		v4 addr
2820  *	MCAST_BLOCK_SOURCE	MODE_IS_EXCLUDE		v4 addr
2821  *	IP_JOIN_SOURCE_GROUP	MODE_IS_INCLUDE		v4 addr
2822  *	MCAST_JOIN_SOURCE_GROUP	MODE_IS_INCLUDE		v4 addr
2823  *
2824  * Changing the filter mode is not allowed; if a matching ilg already
2825  * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
2826  *
2827  * Verifies that there is a source address of appropriate scope for
2828  * the group; if not, EADDRNOTAVAIL is returned.
2829  *
2830  * The interface to be used may be identified by an address or by an
2831  * index.  A pointer to the index is passed; if it is NULL, use the
2832  * address, otherwise, use the index.
2833  */
2834 int
2835 ip_opt_add_group(conn_t *connp, boolean_t checkonly, ipaddr_t group,
2836     ipaddr_t ifaddr, uint_t *ifindexp, mcast_record_t fmode, ipaddr_t src,
2837     mblk_t *first_mp)
2838 {
2839 	ipif_t	*ipif;
2840 	ipsq_t	*ipsq;
2841 	int err = 0;
2842 	ill_t	*ill;
2843 
2844 	err = ip_opt_check(connp, group, src, ifaddr, ifindexp, first_mp,
2845 	    ip_restart_optmgmt, &ipif);
2846 	if (err != 0) {
2847 		if (err != EINPROGRESS) {
2848 			ip1dbg(("ip_opt_add_group: no ipif for group 0x%x, "
2849 			    "ifaddr 0x%x, ifindex %d\n", ntohl(group),
2850 			    ntohl(ifaddr), (ifindexp == NULL) ? 0 : *ifindexp));
2851 		}
2852 		return (err);
2853 	}
2854 	ASSERT(ipif != NULL);
2855 
2856 	ill = ipif->ipif_ill;
2857 	/* Operation not supported on a virtual network interface */
2858 	if (IS_VNI(ill)) {
2859 		ipif_refrele(ipif);
2860 		return (EINVAL);
2861 	}
2862 
2863 	if (checkonly) {
2864 		/*
2865 		 * do not do operation, just pretend to - new T_CHECK
2866 		 * semantics. The error return case above if encountered
2867 		 * considered a good enough "check" here.
2868 		 */
2869 		ipif_refrele(ipif);
2870 		return (0);
2871 	}
2872 
2873 	IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, ipsq,
2874 	    NEW_OP);
2875 
2876 	/* unspecified source addr => no source filtering */
2877 	err = ilg_add(connp, group, ipif, fmode, src);
2878 
2879 	IPSQ_EXIT(ipsq);
2880 
2881 	ipif_refrele(ipif);
2882 	return (err);
2883 }
2884 
2885 /*
2886  * Handle the following optmgmt:
2887  *	IPV6_JOIN_GROUP			must not have joined already
2888  *	MCAST_JOIN_GROUP		must not have joined already
2889  *	MCAST_BLOCK_SOURCE		must have joined already
2890  *	MCAST_JOIN_SOURCE_GROUP		may have joined already
2891  *
2892  * fmode and src parameters may be used to determine which option is
2893  * being set, as follows (IPV6_JOIN_GROUP and MCAST_JOIN_GROUP options
2894  * are functionally equivalent):
2895  *	opt			fmode			v6src
2896  *	IPV6_JOIN_GROUP		MODE_IS_EXCLUDE		unspecified
2897  *	MCAST_JOIN_GROUP	MODE_IS_EXCLUDE		unspecified
2898  *	MCAST_BLOCK_SOURCE	MODE_IS_EXCLUDE		v6 addr
2899  *	MCAST_JOIN_SOURCE_GROUP	MODE_IS_INCLUDE		v6 addr
2900  *
2901  * Changing the filter mode is not allowed; if a matching ilg already
2902  * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
2903  *
2904  * Verifies that there is a source address of appropriate scope for
2905  * the group; if not, EADDRNOTAVAIL is returned.
2906  *
2907  * Handles IPv4-mapped IPv6 multicast addresses by associating them
2908  * with the link-local ipif.  Assumes that if v6group is v4-mapped,
2909  * v6src is also v4-mapped.
2910  */
2911 int
2912 ip_opt_add_group_v6(conn_t *connp, boolean_t checkonly,
2913     const in6_addr_t *v6group, int ifindex, mcast_record_t fmode,
2914     const in6_addr_t *v6src, mblk_t *first_mp)
2915 {
2916 	ill_t *ill;
2917 	ipif_t	*ipif;
2918 	char buf[INET6_ADDRSTRLEN];
2919 	ipaddr_t v4group, v4src;
2920 	boolean_t isv6;
2921 	ipsq_t	*ipsq;
2922 	int	err;
2923 
2924 	err = ip_opt_check_v6(connp, v6group, &v4group, v6src, &v4src, &isv6,
2925 	    ifindex, first_mp, ip_restart_optmgmt, &ill, &ipif);
2926 	if (err != 0) {
2927 		if (err != EINPROGRESS) {
2928 			ip1dbg(("ip_opt_add_group_v6: no ill for group %s/"
2929 			    "index %d\n", inet_ntop(AF_INET6, v6group, buf,
2930 			    sizeof (buf)), ifindex));
2931 		}
2932 		return (err);
2933 	}
2934 	ASSERT((!isv6 && ipif != NULL) || (isv6 && ill != NULL));
2935 
2936 	/* operation is not supported on the virtual network interface */
2937 	if (isv6) {
2938 		if (IS_VNI(ill)) {
2939 			ill_refrele(ill);
2940 			return (EINVAL);
2941 		}
2942 	} else {
2943 		if (IS_VNI(ipif->ipif_ill)) {
2944 			ipif_refrele(ipif);
2945 			return (EINVAL);
2946 		}
2947 	}
2948 
2949 	if (checkonly) {
2950 		/*
2951 		 * do not do operation, just pretend to - new T_CHECK
2952 		 * semantics. The error return case above if encountered
2953 		 * considered a good enough "check" here.
2954 		 */
2955 		if (isv6)
2956 			ill_refrele(ill);
2957 		else
2958 			ipif_refrele(ipif);
2959 		return (0);
2960 	}
2961 
2962 	if (!isv6) {
2963 		IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt,
2964 		    ipsq, NEW_OP);
2965 		err = ilg_add(connp, v4group, ipif, fmode, v4src);
2966 		IPSQ_EXIT(ipsq);
2967 		ipif_refrele(ipif);
2968 	} else {
2969 		IPSQ_ENTER_ILL(ill, connp, first_mp, ip_restart_optmgmt,
2970 		    ipsq, NEW_OP);
2971 		err = ilg_add_v6(connp, v6group, ill, fmode, v6src);
2972 		IPSQ_EXIT(ipsq);
2973 		ill_refrele(ill);
2974 	}
2975 
2976 	return (err);
2977 }
2978 
2979 static int
2980 ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group, ipif_t *ipif,
2981     mcast_record_t fmode, ipaddr_t src)
2982 {
2983 	ilg_t	*ilg;
2984 	in6_addr_t v6src;
2985 	boolean_t leaving = B_FALSE;
2986 
2987 	ASSERT(IAM_WRITER_IPIF(ipif));
2988 
2989 	/*
2990 	 * The ilg is valid only while we hold the conn lock. Once we drop
2991 	 * the lock, another thread can locate another ilg on this connp,
2992 	 * but on a different ipif, and delete it, and cause the ilg array
2993 	 * to be reallocated and copied. Hence do the ilg_delete before
2994 	 * dropping the lock.
2995 	 */
2996 	mutex_enter(&connp->conn_lock);
2997 	ilg = ilg_lookup_ipif(connp, group, ipif);
2998 	if ((ilg == NULL) || (ilg->ilg_flags & ILG_DELETED)) {
2999 		mutex_exit(&connp->conn_lock);
3000 		return (EADDRNOTAVAIL);
3001 	}
3002 
3003 	/*
3004 	 * Decide if we're actually deleting the ilg or just removing a
3005 	 * source filter address; if just removing an addr, make sure we
3006 	 * aren't trying to change the filter mode, and that the addr is
3007 	 * actually in our filter list already.  If we're removing the
3008 	 * last src in an include list, just delete the ilg.
3009 	 */
3010 	if (src == INADDR_ANY) {
3011 		v6src = ipv6_all_zeros;
3012 		leaving = B_TRUE;
3013 	} else {
3014 		int err = 0;
3015 		IN6_IPADDR_TO_V4MAPPED(src, &v6src);
3016 		if (fmode != ilg->ilg_fmode)
3017 			err = EINVAL;
3018 		else if (ilg->ilg_filter == NULL ||
3019 		    !list_has_addr(ilg->ilg_filter, &v6src))
3020 			err = EADDRNOTAVAIL;
3021 		if (err != 0) {
3022 			mutex_exit(&connp->conn_lock);
3023 			return (err);
3024 		}
3025 		if (fmode == MODE_IS_INCLUDE &&
3026 		    ilg->ilg_filter->sl_numsrc == 1) {
3027 			v6src = ipv6_all_zeros;
3028 			leaving = B_TRUE;
3029 		}
3030 	}
3031 
3032 	ilg_delete(connp, ilg, &v6src);
3033 	mutex_exit(&connp->conn_lock);
3034 
3035 	(void) ip_delmulti(group, ipif, B_FALSE, leaving);
3036 	return (0);
3037 }
3038 
3039 static int
3040 ip_opt_delete_group_excl_v6(conn_t *connp, const in6_addr_t *v6group,
3041     ill_t *ill, mcast_record_t fmode, const in6_addr_t *v6src)
3042 {
3043 	ilg_t	*ilg;
3044 	boolean_t leaving = B_TRUE;
3045 
3046 	ASSERT(IAM_WRITER_ILL(ill));
3047 
3048 	mutex_enter(&connp->conn_lock);
3049 	ilg = ilg_lookup_ill_v6(connp, v6group, ill);
3050 	if ((ilg == NULL) || (ilg->ilg_flags & ILG_DELETED)) {
3051 		mutex_exit(&connp->conn_lock);
3052 		return (EADDRNOTAVAIL);
3053 	}
3054 
3055 	/*
3056 	 * Decide if we're actually deleting the ilg or just removing a
3057 	 * source filter address; if just removing an addr, make sure we
3058 	 * aren't trying to change the filter mode, and that the addr is
3059 	 * actually in our filter list already.  If we're removing the
3060 	 * last src in an include list, just delete the ilg.
3061 	 */
3062 	if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) {
3063 		int err = 0;
3064 		if (fmode != ilg->ilg_fmode)
3065 			err = EINVAL;
3066 		else if (ilg->ilg_filter == NULL ||
3067 		    !list_has_addr(ilg->ilg_filter, v6src))
3068 			err = EADDRNOTAVAIL;
3069 		if (err != 0) {
3070 			mutex_exit(&connp->conn_lock);
3071 			return (err);
3072 		}
3073 		if (fmode == MODE_IS_INCLUDE &&
3074 		    ilg->ilg_filter->sl_numsrc == 1)
3075 			v6src = NULL;
3076 		else
3077 			leaving = B_FALSE;
3078 	}
3079 
3080 	ilg_delete(connp, ilg, v6src);
3081 	mutex_exit(&connp->conn_lock);
3082 	(void) ip_delmulti_v6(v6group, ill, connp->conn_zoneid, B_FALSE,
3083 	    leaving);
3084 
3085 	return (0);
3086 }
3087 
3088 /*
3089  * Handle the following optmgmt:
3090  *	IP_DROP_MEMBERSHIP		will leave
3091  *	MCAST_LEAVE_GROUP		will leave
3092  *	IP_UNBLOCK_SOURCE		will not leave
3093  *	MCAST_UNBLOCK_SOURCE		will not leave
3094  *	IP_LEAVE_SOURCE_GROUP		may leave (if leaving last source)
3095  *	MCAST_LEAVE_SOURCE_GROUP	may leave (if leaving last source)
3096  *
3097  * fmode and src parameters may be used to determine which option is
3098  * being set, as follows (the IP_* and MCAST_* versions of each option
3099  * are functionally equivalent):
3100  *	opt			 fmode			src
3101  *	IP_DROP_MEMBERSHIP	 MODE_IS_INCLUDE	INADDR_ANY
3102  *	MCAST_LEAVE_GROUP	 MODE_IS_INCLUDE	INADDR_ANY
3103  *	IP_UNBLOCK_SOURCE	 MODE_IS_EXCLUDE	v4 addr
3104  *	MCAST_UNBLOCK_SOURCE	 MODE_IS_EXCLUDE	v4 addr
3105  *	IP_LEAVE_SOURCE_GROUP	 MODE_IS_INCLUDE	v4 addr
3106  *	MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE	v4 addr
3107  *
3108  * Changing the filter mode is not allowed; if a matching ilg already
3109  * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
3110  *
3111  * The interface to be used may be identified by an address or by an
3112  * index.  A pointer to the index is passed; if it is NULL, use the
3113  * address, otherwise, use the index.
3114  */
3115 int
3116 ip_opt_delete_group(conn_t *connp, boolean_t checkonly, ipaddr_t group,
3117     ipaddr_t ifaddr, uint_t *ifindexp, mcast_record_t fmode, ipaddr_t src,
3118     mblk_t *first_mp)
3119 {
3120 	ipif_t	*ipif;
3121 	ipsq_t	*ipsq;
3122 	int	err;
3123 	ill_t	*ill;
3124 
3125 	err = ip_opt_check(connp, group, src, ifaddr, ifindexp, first_mp,
3126 	    ip_restart_optmgmt, &ipif);
3127 	if (err != 0) {
3128 		if (err != EINPROGRESS) {
3129 			ip1dbg(("ip_opt_delete_group: no ipif for group "
3130 			    "0x%x, ifaddr 0x%x\n",
3131 			    (int)ntohl(group), (int)ntohl(ifaddr)));
3132 		}
3133 		return (err);
3134 	}
3135 	ASSERT(ipif != NULL);
3136 
3137 	ill = ipif->ipif_ill;
3138 	/* Operation not supported on a virtual network interface */
3139 	if (IS_VNI(ill)) {
3140 		ipif_refrele(ipif);
3141 		return (EINVAL);
3142 	}
3143 
3144 	if (checkonly) {
3145 		/*
3146 		 * do not do operation, just pretend to - new T_CHECK
3147 		 * semantics. The error return case above if encountered
3148 		 * considered a good enough "check" here.
3149 		 */
3150 		ipif_refrele(ipif);
3151 		return (0);
3152 	}
3153 
3154 	IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, ipsq,
3155 	    NEW_OP);
3156 	err = ip_opt_delete_group_excl(connp, group, ipif, fmode, src);
3157 	IPSQ_EXIT(ipsq);
3158 
3159 	ipif_refrele(ipif);
3160 	return (err);
3161 }
3162 
3163 /*
3164  * Handle the following optmgmt:
3165  *	IPV6_LEAVE_GROUP		will leave
3166  *	MCAST_LEAVE_GROUP		will leave
3167  *	MCAST_UNBLOCK_SOURCE		will not leave
3168  *	MCAST_LEAVE_SOURCE_GROUP	may leave (if leaving last source)
3169  *
3170  * fmode and src parameters may be used to determine which option is
3171  * being set, as follows (IPV6_LEAVE_GROUP and MCAST_LEAVE_GROUP options
3172  * are functionally equivalent):
3173  *	opt			 fmode			v6src
3174  *	IPV6_LEAVE_GROUP	 MODE_IS_INCLUDE	unspecified
3175  *	MCAST_LEAVE_GROUP	 MODE_IS_INCLUDE	unspecified
3176  *	MCAST_UNBLOCK_SOURCE	 MODE_IS_EXCLUDE	v6 addr
3177  *	MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE	v6 addr
3178  *
3179  * Changing the filter mode is not allowed; if a matching ilg already
3180  * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
3181  *
3182  * Handles IPv4-mapped IPv6 multicast addresses by associating them
3183  * with the link-local ipif.  Assumes that if v6group is v4-mapped,
3184  * v6src is also v4-mapped.
3185  */
3186 int
3187 ip_opt_delete_group_v6(conn_t *connp, boolean_t checkonly,
3188     const in6_addr_t *v6group, int ifindex, mcast_record_t fmode,
3189     const in6_addr_t *v6src, mblk_t *first_mp)
3190 {
3191 	ill_t *ill;
3192 	ipif_t	*ipif;
3193 	char	buf[INET6_ADDRSTRLEN];
3194 	ipaddr_t v4group, v4src;
3195 	boolean_t isv6;
3196 	ipsq_t	*ipsq;
3197 	int	err;
3198 
3199 	err = ip_opt_check_v6(connp, v6group, &v4group, v6src, &v4src, &isv6,
3200 	    ifindex, first_mp, ip_restart_optmgmt, &ill, &ipif);
3201 	if (err != 0) {
3202 		if (err != EINPROGRESS) {
3203 			ip1dbg(("ip_opt_delete_group_v6: no ill for group %s/"
3204 			    "index %d\n", inet_ntop(AF_INET6, v6group, buf,
3205 			    sizeof (buf)), ifindex));
3206 		}
3207 		return (err);
3208 	}
3209 	ASSERT((isv6 && ill != NULL) || (!isv6 && ipif != NULL));
3210 
3211 	/* operation is not supported on the virtual network interface */
3212 	if (isv6) {
3213 		if (IS_VNI(ill)) {
3214 			ill_refrele(ill);
3215 			return (EINVAL);
3216 		}
3217 	} else {
3218 		if (IS_VNI(ipif->ipif_ill)) {
3219 			ipif_refrele(ipif);
3220 			return (EINVAL);
3221 		}
3222 	}
3223 
3224 	if (checkonly) {
3225 		/*
3226 		 * do not do operation, just pretend to - new T_CHECK
3227 		 * semantics. The error return case above if encountered
3228 		 * considered a good enough "check" here.
3229 		 */
3230 		if (isv6)
3231 			ill_refrele(ill);
3232 		else
3233 			ipif_refrele(ipif);
3234 		return (0);
3235 	}
3236 
3237 	if (!isv6) {
3238 		IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt,
3239 		    ipsq, NEW_OP);
3240 		err = ip_opt_delete_group_excl(connp, v4group, ipif, fmode,
3241 		    v4src);
3242 		IPSQ_EXIT(ipsq);
3243 		ipif_refrele(ipif);
3244 	} else {
3245 		IPSQ_ENTER_ILL(ill, connp, first_mp, ip_restart_optmgmt,
3246 		    ipsq, NEW_OP);
3247 		err = ip_opt_delete_group_excl_v6(connp, v6group, ill, fmode,
3248 		    v6src);
3249 		IPSQ_EXIT(ipsq);
3250 		ill_refrele(ill);
3251 	}
3252 
3253 	return (err);
3254 }
3255 
3256 /*
3257  * Group mgmt for upper conn that passes things down
3258  * to the interface multicast list (and DLPI)
3259  * These routines can handle new style options that specify an interface name
3260  * as opposed to an interface address (needed for general handling of
3261  * unnumbered interfaces.)
3262  */
3263 
3264 /*
3265  * Add a group to an upper conn group data structure and pass things down
3266  * to the interface multicast list (and DLPI)
3267  */
3268 static int
3269 ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif, mcast_record_t fmode,
3270     ipaddr_t src)
3271 {
3272 	int	error = 0;
3273 	ill_t	*ill;
3274 	ilg_t	*ilg;
3275 	ilg_stat_t ilgstat;
3276 	slist_t	*new_filter = NULL;
3277 	int	new_fmode;
3278 
3279 	ASSERT(IAM_WRITER_IPIF(ipif));
3280 
3281 	ill = ipif->ipif_ill;
3282 
3283 	if (!(ill->ill_flags & ILLF_MULTICAST))
3284 		return (EADDRNOTAVAIL);
3285 
3286 	/*
3287 	 * conn_ilg[] is protected by conn_lock. Need to hold the conn_lock
3288 	 * to walk the conn_ilg[] list in ilg_lookup_ipif(); also needed to
3289 	 * serialize 2 threads doing join (sock, group1, hme0:0) and
3290 	 * (sock, group2, hme1:0) where hme0 and hme1 map to different ipsqs,
3291 	 * but both operations happen on the same conn.
3292 	 */
3293 	mutex_enter(&connp->conn_lock);
3294 	ilg = ilg_lookup_ipif(connp, group, ipif);
3295 
3296 	/*
3297 	 * Depending on the option we're handling, may or may not be okay
3298 	 * if group has already been added.  Figure out our rules based
3299 	 * on fmode and src params.  Also make sure there's enough room
3300 	 * in the filter if we're adding a source to an existing filter.
3301 	 */
3302 	if (src == INADDR_ANY) {
3303 		/* we're joining for all sources, must not have joined */
3304 		if (ilg != NULL)
3305 			error = EADDRINUSE;
3306 	} else {
3307 		if (fmode == MODE_IS_EXCLUDE) {
3308 			/* (excl {addr}) => block source, must have joined */
3309 			if (ilg == NULL)
3310 				error = EADDRNOTAVAIL;
3311 		}
3312 		/* (incl {addr}) => join source, may have joined */
3313 
3314 		if (ilg != NULL &&
3315 		    SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE)
3316 			error = ENOBUFS;
3317 	}
3318 	if (error != 0) {
3319 		mutex_exit(&connp->conn_lock);
3320 		return (error);
3321 	}
3322 
3323 	ASSERT(!(ipif->ipif_state_flags & IPIF_CONDEMNED));
3324 
3325 	/*
3326 	 * Alloc buffer to copy new state into (see below) before
3327 	 * we make any changes, so we can bail if it fails.
3328 	 */
3329 	if ((new_filter = l_alloc()) == NULL) {
3330 		mutex_exit(&connp->conn_lock);
3331 		return (ENOMEM);
3332 	}
3333 
3334 	if (ilg == NULL) {
3335 		ilgstat = ILGSTAT_NEW;
3336 		if ((ilg = conn_ilg_alloc(connp, &error)) == NULL) {
3337 			mutex_exit(&connp->conn_lock);
3338 			l_free(new_filter);
3339 			return (error);
3340 		}
3341 		if (src != INADDR_ANY) {
3342 			ilg->ilg_filter = l_alloc();
3343 			if (ilg->ilg_filter == NULL) {
3344 				ilg_delete(connp, ilg, NULL);
3345 				mutex_exit(&connp->conn_lock);
3346 				l_free(new_filter);
3347 				return (ENOMEM);
3348 			}
3349 			ilg->ilg_filter->sl_numsrc = 1;
3350 			IN6_IPADDR_TO_V4MAPPED(src,
3351 			    &ilg->ilg_filter->sl_addr[0]);
3352 		}
3353 		if (group == INADDR_ANY) {
3354 			ilg->ilg_v6group = ipv6_all_zeros;
3355 		} else {
3356 			IN6_IPADDR_TO_V4MAPPED(group, &ilg->ilg_v6group);
3357 		}
3358 		ilg->ilg_ipif = ipif;
3359 		ilg->ilg_ill = NULL;
3360 		ilg->ilg_fmode = fmode;
3361 	} else {
3362 		int index;
3363 		in6_addr_t v6src;
3364 		ilgstat = ILGSTAT_CHANGE;
3365 		if (ilg->ilg_fmode != fmode || src == INADDR_ANY) {
3366 			mutex_exit(&connp->conn_lock);
3367 			l_free(new_filter);
3368 			return (EINVAL);
3369 		}
3370 		if (ilg->ilg_filter == NULL) {
3371 			ilg->ilg_filter = l_alloc();
3372 			if (ilg->ilg_filter == NULL) {
3373 				mutex_exit(&connp->conn_lock);
3374 				l_free(new_filter);
3375 				return (ENOMEM);
3376 			}
3377 		}
3378 		IN6_IPADDR_TO_V4MAPPED(src, &v6src);
3379 		if (list_has_addr(ilg->ilg_filter, &v6src)) {
3380 			mutex_exit(&connp->conn_lock);
3381 			l_free(new_filter);
3382 			return (EADDRNOTAVAIL);
3383 		}
3384 		index = ilg->ilg_filter->sl_numsrc++;
3385 		ilg->ilg_filter->sl_addr[index] = v6src;
3386 	}
3387 
3388 	/*
3389 	 * Save copy of ilg's filter state to pass to other functions,
3390 	 * so we can release conn_lock now.
3391 	 */
3392 	new_fmode = ilg->ilg_fmode;
3393 	l_copy(ilg->ilg_filter, new_filter);
3394 
3395 	mutex_exit(&connp->conn_lock);
3396 
3397 	error = ip_addmulti(group, ipif, ilgstat, new_fmode, new_filter);
3398 	if (error != 0) {
3399 		/*
3400 		 * Need to undo what we did before calling ip_addmulti()!
3401 		 * Must look up the ilg again since we've not been holding
3402 		 * conn_lock.
3403 		 */
3404 		in6_addr_t v6src;
3405 		if (ilgstat == ILGSTAT_NEW)
3406 			v6src = ipv6_all_zeros;
3407 		else
3408 			IN6_IPADDR_TO_V4MAPPED(src, &v6src);
3409 		mutex_enter(&connp->conn_lock);
3410 		ilg = ilg_lookup_ipif(connp, group, ipif);
3411 		ASSERT(ilg != NULL);
3412 		ilg_delete(connp, ilg, &v6src);
3413 		mutex_exit(&connp->conn_lock);
3414 		l_free(new_filter);
3415 		return (error);
3416 	}
3417 
3418 	l_free(new_filter);
3419 	return (0);
3420 }
3421 
3422 static int
3423 ilg_add_v6(conn_t *connp, const in6_addr_t *v6group, ill_t *ill,
3424     mcast_record_t fmode, const in6_addr_t *v6src)
3425 {
3426 	int	error = 0;
3427 	ilg_t	*ilg;
3428 	ilg_stat_t ilgstat;
3429 	slist_t	*new_filter = NULL;
3430 	int	new_fmode;
3431 
3432 	ASSERT(IAM_WRITER_ILL(ill));
3433 
3434 	if (!(ill->ill_flags & ILLF_MULTICAST))
3435 		return (EADDRNOTAVAIL);
3436 
3437 	/*
3438 	 * conn_lock protects the ilg list.  Serializes 2 threads doing
3439 	 * join (sock, group1, hme0) and (sock, group2, hme1) where hme0
3440 	 * and hme1 map to different ipsq's, but both operations happen
3441 	 * on the same conn.
3442 	 */
3443 	mutex_enter(&connp->conn_lock);
3444 
3445 	ilg = ilg_lookup_ill_v6(connp, v6group, ill);
3446 
3447 	/*
3448 	 * Depending on the option we're handling, may or may not be okay
3449 	 * if group has already been added.  Figure out our rules based
3450 	 * on fmode and src params.  Also make sure there's enough room
3451 	 * in the filter if we're adding a source to an existing filter.
3452 	 */
3453 	if (IN6_IS_ADDR_UNSPECIFIED(v6src)) {
3454 		/* we're joining for all sources, must not have joined */
3455 		if (ilg != NULL)
3456 			error = EADDRINUSE;
3457 	} else {
3458 		if (fmode == MODE_IS_EXCLUDE) {
3459 			/* (excl {addr}) => block source, must have joined */
3460 			if (ilg == NULL)
3461 				error = EADDRNOTAVAIL;
3462 		}
3463 		/* (incl {addr}) => join source, may have joined */
3464 
3465 		if (ilg != NULL &&
3466 		    SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE)
3467 			error = ENOBUFS;
3468 	}
3469 	if (error != 0) {
3470 		mutex_exit(&connp->conn_lock);
3471 		return (error);
3472 	}
3473 
3474 	/*
3475 	 * Alloc buffer to copy new state into (see below) before
3476 	 * we make any changes, so we can bail if it fails.
3477 	 */
3478 	if ((new_filter = l_alloc()) == NULL) {
3479 		mutex_exit(&connp->conn_lock);
3480 		return (ENOMEM);
3481 	}
3482 
3483 	if (ilg == NULL) {
3484 		if ((ilg = conn_ilg_alloc(connp, &error)) == NULL) {
3485 			mutex_exit(&connp->conn_lock);
3486 			l_free(new_filter);
3487 			return (error);
3488 		}
3489 		if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) {
3490 			ilg->ilg_filter = l_alloc();
3491 			if (ilg->ilg_filter == NULL) {
3492 				ilg_delete(connp, ilg, NULL);
3493 				mutex_exit(&connp->conn_lock);
3494 				l_free(new_filter);
3495 				return (ENOMEM);
3496 			}
3497 			ilg->ilg_filter->sl_numsrc = 1;
3498 			ilg->ilg_filter->sl_addr[0] = *v6src;
3499 		}
3500 		ilgstat = ILGSTAT_NEW;
3501 		ilg->ilg_v6group = *v6group;
3502 		ilg->ilg_fmode = fmode;
3503 		ilg->ilg_ipif = NULL;
3504 		ilg->ilg_ill = ill;
3505 	} else {
3506 		int index;
3507 		if (ilg->ilg_fmode != fmode || IN6_IS_ADDR_UNSPECIFIED(v6src)) {
3508 			mutex_exit(&connp->conn_lock);
3509 			l_free(new_filter);
3510 			return (EINVAL);
3511 		}
3512 		if (ilg->ilg_filter == NULL) {
3513 			ilg->ilg_filter = l_alloc();
3514 			if (ilg->ilg_filter == NULL) {
3515 				mutex_exit(&connp->conn_lock);
3516 				l_free(new_filter);
3517 				return (ENOMEM);
3518 			}
3519 		}
3520 		if (list_has_addr(ilg->ilg_filter, v6src)) {
3521 			mutex_exit(&connp->conn_lock);
3522 			l_free(new_filter);
3523 			return (EADDRNOTAVAIL);
3524 		}
3525 		ilgstat = ILGSTAT_CHANGE;
3526 		index = ilg->ilg_filter->sl_numsrc++;
3527 		ilg->ilg_filter->sl_addr[index] = *v6src;
3528 	}
3529 
3530 	/*
3531 	 * Save copy of ilg's filter state to pass to other functions,
3532 	 * so we can release conn_lock now.
3533 	 */
3534 	new_fmode = ilg->ilg_fmode;
3535 	l_copy(ilg->ilg_filter, new_filter);
3536 
3537 	mutex_exit(&connp->conn_lock);
3538 
3539 	/*
3540 	 * Now update the ill. We wait to do this until after the ilg
3541 	 * has been updated because we need to update the src filter
3542 	 * info for the ill, which involves looking at the status of
3543 	 * all the ilgs associated with this group/interface pair.
3544 	 */
3545 	error = ip_addmulti_v6(v6group, ill, connp->conn_zoneid, ilgstat,
3546 	    new_fmode, new_filter);
3547 	if (error != 0) {
3548 		/*
3549 		 * But because we waited, we have to undo the ilg update
3550 		 * if ip_addmulti_v6() fails.  We also must lookup ilg
3551 		 * again, since we've not been holding conn_lock.
3552 		 */
3553 		in6_addr_t delsrc =
3554 		    (ilgstat == ILGSTAT_NEW) ? ipv6_all_zeros : *v6src;
3555 		mutex_enter(&connp->conn_lock);
3556 		ilg = ilg_lookup_ill_v6(connp, v6group, ill);
3557 		ASSERT(ilg != NULL);
3558 		ilg_delete(connp, ilg, &delsrc);
3559 		mutex_exit(&connp->conn_lock);
3560 		l_free(new_filter);
3561 		return (error);
3562 	}
3563 
3564 	l_free(new_filter);
3565 
3566 	return (0);
3567 }
3568 
3569 /*
3570  * Find an IPv4 ilg matching group, ill and source
3571  */
3572 ilg_t *
3573 ilg_lookup_ill_withsrc(conn_t *connp, ipaddr_t group, ipaddr_t src, ill_t *ill)
3574 {
3575 	in6_addr_t v6group, v6src;
3576 	int i;
3577 	boolean_t isinlist;
3578 	ilg_t *ilg;
3579 	ipif_t *ipif;
3580 	ill_t *ilg_ill;
3581 
3582 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3583 
3584 	/*
3585 	 * INADDR_ANY is represented as the IPv6 unspecified addr.
3586 	 */
3587 	if (group == INADDR_ANY)
3588 		v6group = ipv6_all_zeros;
3589 	else
3590 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
3591 
3592 	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3593 		ilg = &connp->conn_ilg[i];
3594 		if ((ipif = ilg->ilg_ipif) == NULL ||
3595 		    (ilg->ilg_flags & ILG_DELETED) != 0)
3596 			continue;
3597 		ASSERT(ilg->ilg_ill == NULL);
3598 		ilg_ill = ipif->ipif_ill;
3599 		ASSERT(!ilg_ill->ill_isv6);
3600 		if (IS_ON_SAME_LAN(ilg_ill, ill) &&
3601 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, &v6group)) {
3602 			if (SLIST_IS_EMPTY(ilg->ilg_filter)) {
3603 				/* no source filter, so this is a match */
3604 				return (ilg);
3605 			}
3606 			break;
3607 		}
3608 	}
3609 	if (i == connp->conn_ilg_inuse)
3610 		return (NULL);
3611 
3612 	/*
3613 	 * we have an ilg with matching ill and group; but
3614 	 * the ilg has a source list that we must check.
3615 	 */
3616 	IN6_IPADDR_TO_V4MAPPED(src, &v6src);
3617 	isinlist = B_FALSE;
3618 	for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) {
3619 		if (IN6_ARE_ADDR_EQUAL(&v6src, &ilg->ilg_filter->sl_addr[i])) {
3620 			isinlist = B_TRUE;
3621 			break;
3622 		}
3623 	}
3624 
3625 	if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) ||
3626 	    (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE))
3627 		return (ilg);
3628 
3629 	return (NULL);
3630 }
3631 
3632 /*
3633  * Find an IPv6 ilg matching group, ill, and source
3634  */
3635 ilg_t *
3636 ilg_lookup_ill_withsrc_v6(conn_t *connp, const in6_addr_t *v6group,
3637     const in6_addr_t *v6src, ill_t *ill)
3638 {
3639 	int i;
3640 	boolean_t isinlist;
3641 	ilg_t *ilg;
3642 	ill_t *ilg_ill;
3643 
3644 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3645 
3646 	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3647 		ilg = &connp->conn_ilg[i];
3648 		if ((ilg_ill = ilg->ilg_ill) == NULL ||
3649 		    (ilg->ilg_flags & ILG_DELETED) != 0)
3650 			continue;
3651 		ASSERT(ilg->ilg_ipif == NULL);
3652 		ASSERT(ilg_ill->ill_isv6);
3653 		if (IS_ON_SAME_LAN(ilg_ill, ill) &&
3654 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) {
3655 			if (SLIST_IS_EMPTY(ilg->ilg_filter)) {
3656 				/* no source filter, so this is a match */
3657 				return (ilg);
3658 			}
3659 			break;
3660 		}
3661 	}
3662 	if (i == connp->conn_ilg_inuse)
3663 		return (NULL);
3664 
3665 	/*
3666 	 * we have an ilg with matching ill and group; but
3667 	 * the ilg has a source list that we must check.
3668 	 */
3669 	isinlist = B_FALSE;
3670 	for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) {
3671 		if (IN6_ARE_ADDR_EQUAL(v6src, &ilg->ilg_filter->sl_addr[i])) {
3672 			isinlist = B_TRUE;
3673 			break;
3674 		}
3675 	}
3676 
3677 	if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) ||
3678 	    (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE))
3679 		return (ilg);
3680 
3681 	return (NULL);
3682 }
3683 
3684 /*
3685  * Find an IPv6 ilg matching group and ill
3686  */
3687 ilg_t *
3688 ilg_lookup_ill_v6(conn_t *connp, const in6_addr_t *v6group, ill_t *ill)
3689 {
3690 	ilg_t	*ilg;
3691 	int	i;
3692 	ill_t 	*mem_ill;
3693 
3694 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3695 
3696 	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3697 		ilg = &connp->conn_ilg[i];
3698 		if ((mem_ill = ilg->ilg_ill) == NULL ||
3699 		    (ilg->ilg_flags & ILG_DELETED) != 0)
3700 			continue;
3701 		ASSERT(ilg->ilg_ipif == NULL);
3702 		ASSERT(mem_ill->ill_isv6);
3703 		if (mem_ill == ill &&
3704 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group))
3705 			return (ilg);
3706 	}
3707 	return (NULL);
3708 }
3709 
3710 /*
3711  * Find an IPv4 ilg matching group and ipif
3712  */
3713 static ilg_t *
3714 ilg_lookup_ipif(conn_t *connp, ipaddr_t group, ipif_t *ipif)
3715 {
3716 	in6_addr_t v6group;
3717 	int	i;
3718 	ilg_t	*ilg;
3719 
3720 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3721 	ASSERT(!ipif->ipif_ill->ill_isv6);
3722 
3723 	if (group == INADDR_ANY)
3724 		v6group = ipv6_all_zeros;
3725 	else
3726 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
3727 
3728 	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3729 		ilg = &connp->conn_ilg[i];
3730 		if ((ilg->ilg_flags & ILG_DELETED) == 0 &&
3731 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, &v6group) &&
3732 		    ilg->ilg_ipif == ipif)
3733 			return (ilg);
3734 	}
3735 	return (NULL);
3736 }
3737 
3738 /*
3739  * If a source address is passed in (src != NULL and src is not
3740  * unspecified), remove the specified src addr from the given ilg's
3741  * filter list, else delete the ilg.
3742  */
3743 static void
3744 ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src)
3745 {
3746 	int	i;
3747 
3748 	ASSERT((ilg->ilg_ipif != NULL) ^ (ilg->ilg_ill != NULL));
3749 	ASSERT(ilg->ilg_ipif == NULL || IAM_WRITER_IPIF(ilg->ilg_ipif));
3750 	ASSERT(ilg->ilg_ill == NULL || IAM_WRITER_ILL(ilg->ilg_ill));
3751 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3752 	ASSERT(!(ilg->ilg_flags & ILG_DELETED));
3753 
3754 	if (src == NULL || IN6_IS_ADDR_UNSPECIFIED(src)) {
3755 		if (connp->conn_ilg_walker_cnt != 0) {
3756 			ilg->ilg_flags |= ILG_DELETED;
3757 			return;
3758 		}
3759 
3760 		FREE_SLIST(ilg->ilg_filter);
3761 
3762 		i = ilg - &connp->conn_ilg[0];
3763 		ASSERT(i >= 0 && i < connp->conn_ilg_inuse);
3764 
3765 		/* Move other entries up one step */
3766 		connp->conn_ilg_inuse--;
3767 		for (; i < connp->conn_ilg_inuse; i++)
3768 			connp->conn_ilg[i] = connp->conn_ilg[i+1];
3769 
3770 		if (connp->conn_ilg_inuse == 0) {
3771 			mi_free((char *)connp->conn_ilg);
3772 			connp->conn_ilg = NULL;
3773 			cv_broadcast(&connp->conn_refcv);
3774 		}
3775 	} else {
3776 		l_remove(ilg->ilg_filter, src);
3777 	}
3778 }
3779 
3780 /*
3781  * Called from conn close. No new ilg can be added or removed.
3782  * because CONN_CLOSING has been set by ip_close. ilg_add / ilg_delete
3783  * will return error if conn has started closing.
3784  */
3785 void
3786 ilg_delete_all(conn_t *connp)
3787 {
3788 	int	i;
3789 	ipif_t	*ipif = NULL;
3790 	ill_t	*ill = NULL;
3791 	ilg_t	*ilg;
3792 	in6_addr_t v6group;
3793 	boolean_t success;
3794 	ipsq_t	*ipsq;
3795 
3796 	mutex_enter(&connp->conn_lock);
3797 retry:
3798 	ILG_WALKER_HOLD(connp);
3799 	for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) {
3800 		ilg = &connp->conn_ilg[i];
3801 		/*
3802 		 * Since this walk is not atomic (we drop the
3803 		 * conn_lock and wait in ipsq_enter) we need
3804 		 * to check for the ILG_DELETED flag.
3805 		 */
3806 		if (ilg->ilg_flags & ILG_DELETED)
3807 			continue;
3808 
3809 		if (IN6_IS_ADDR_V4MAPPED(&ilg->ilg_v6group)) {
3810 			ipif = ilg->ilg_ipif;
3811 			ill = ipif->ipif_ill;
3812 		} else {
3813 			ipif = NULL;
3814 			ill = ilg->ilg_ill;
3815 		}
3816 
3817 		/*
3818 		 * We may not be able to refhold the ill if the ill/ipif
3819 		 * is changing. But we need to make sure that the ill will
3820 		 * not vanish. So we just bump up the ill_waiter count.
3821 		 * If we are unable to do even that, then the ill is closing,
3822 		 * in which case the unplumb thread will handle the cleanup,
3823 		 * and we move on to the next ilg.
3824 		 */
3825 		if (!ill_waiter_inc(ill))
3826 			continue;
3827 
3828 		mutex_exit(&connp->conn_lock);
3829 		/*
3830 		 * To prevent deadlock between ill close which waits inside
3831 		 * the perimeter, and conn close, ipsq_enter returns error,
3832 		 * the moment ILL_CONDEMNED is set, in which case ill close
3833 		 * takes responsibility to cleanup the ilgs. Note that we
3834 		 * have not yet set condemned flag, otherwise the conn can't
3835 		 * be refheld for cleanup by those routines and it would be
3836 		 * a mutual deadlock.
3837 		 */
3838 		success = ipsq_enter(ill, B_FALSE, NEW_OP);
3839 		ipsq = ill->ill_phyint->phyint_ipsq;
3840 		ill_waiter_dcr(ill);
3841 		mutex_enter(&connp->conn_lock);
3842 		if (!success)
3843 			continue;
3844 
3845 		/*
3846 		 * Move on if the ilg was deleted while conn_lock was dropped.
3847 		 */
3848 		if (ilg->ilg_flags & ILG_DELETED) {
3849 			mutex_exit(&connp->conn_lock);
3850 			ipsq_exit(ipsq);
3851 			mutex_enter(&connp->conn_lock);
3852 			continue;
3853 		}
3854 		v6group = ilg->ilg_v6group;
3855 		ilg_delete(connp, ilg, NULL);
3856 		mutex_exit(&connp->conn_lock);
3857 
3858 		if (ipif != NULL) {
3859 			(void) ip_delmulti(V4_PART_OF_V6(v6group), ipif,
3860 			    B_FALSE, B_TRUE);
3861 		} else {
3862 			(void) ip_delmulti_v6(&v6group, ill,
3863 			    connp->conn_zoneid, B_FALSE, B_TRUE);
3864 		}
3865 		ipsq_exit(ipsq);
3866 		mutex_enter(&connp->conn_lock);
3867 	}
3868 	ILG_WALKER_RELE(connp);
3869 
3870 	/* If any ill was skipped above wait and retry */
3871 	if (connp->conn_ilg_inuse != 0) {
3872 		cv_wait(&connp->conn_refcv, &connp->conn_lock);
3873 		goto retry;
3874 	}
3875 	mutex_exit(&connp->conn_lock);
3876 }
3877 
3878 /*
3879  * Called from ill close by ipcl_walk for clearing conn_ilg and
3880  * conn_multicast_ipif for a given ipif. conn is held by caller.
3881  * Note that ipcl_walk only walks conns that are not yet condemned.
3882  * condemned conns can't be refheld. For this reason, conn must become clean
3883  * first, i.e. it must not refer to any ill/ire/ipif and then only set
3884  * condemned flag.
3885  */
3886 static void
3887 conn_delete_ipif(conn_t *connp, caddr_t arg)
3888 {
3889 	ipif_t	*ipif = (ipif_t *)arg;
3890 	int	i;
3891 	char	group_buf1[INET6_ADDRSTRLEN];
3892 	char	group_buf2[INET6_ADDRSTRLEN];
3893 	ipaddr_t group;
3894 	ilg_t	*ilg;
3895 
3896 	/*
3897 	 * Even though conn_ilg_inuse can change while we are in this loop,
3898 	 * i.e.ilgs can be created or deleted on this connp, no new ilgs can
3899 	 * be created or deleted for this connp, on this ill, since this ill
3900 	 * is the perimeter. So we won't miss any ilg in this cleanup.
3901 	 */
3902 	mutex_enter(&connp->conn_lock);
3903 
3904 	/*
3905 	 * Increment the walker count, so that ilg repacking does not
3906 	 * occur while we are in the loop.
3907 	 */
3908 	ILG_WALKER_HOLD(connp);
3909 	for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) {
3910 		ilg = &connp->conn_ilg[i];
3911 		if (ilg->ilg_ipif != ipif || (ilg->ilg_flags & ILG_DELETED))
3912 			continue;
3913 		/*
3914 		 * ip_close cannot be cleaning this ilg at the same time.
3915 		 * since it also has to execute in this ill's perimeter which
3916 		 * we are now holding. Only a clean conn can be condemned.
3917 		 */
3918 		ASSERT(!(connp->conn_state_flags & CONN_CONDEMNED));
3919 
3920 		/* Blow away the membership */
3921 		ip1dbg(("conn_delete_ilg_ipif: %s on %s (%s)\n",
3922 		    inet_ntop(AF_INET6, &connp->conn_ilg[i].ilg_v6group,
3923 		    group_buf1, sizeof (group_buf1)),
3924 		    inet_ntop(AF_INET6, &ipif->ipif_v6lcl_addr,
3925 		    group_buf2, sizeof (group_buf2)),
3926 		    ipif->ipif_ill->ill_name));
3927 
3928 		/* ilg_ipif is NULL for V6, so we won't be here */
3929 		ASSERT(IN6_IS_ADDR_V4MAPPED(&ilg->ilg_v6group));
3930 
3931 		group = V4_PART_OF_V6(ilg->ilg_v6group);
3932 		ilg_delete(connp, &connp->conn_ilg[i], NULL);
3933 		mutex_exit(&connp->conn_lock);
3934 
3935 		(void) ip_delmulti(group, ipif, B_FALSE, B_TRUE);
3936 		mutex_enter(&connp->conn_lock);
3937 	}
3938 
3939 	/*
3940 	 * If we are the last walker, need to physically delete the
3941 	 * ilgs and repack.
3942 	 */
3943 	ILG_WALKER_RELE(connp);
3944 
3945 	if (connp->conn_multicast_ipif == ipif) {
3946 		/* Revert to late binding */
3947 		connp->conn_multicast_ipif = NULL;
3948 	}
3949 	mutex_exit(&connp->conn_lock);
3950 
3951 	conn_delete_ire(connp, (caddr_t)ipif);
3952 }
3953 
3954 /*
3955  * Called from ill close by ipcl_walk for clearing conn_ilg and
3956  * conn_multicast_ill for a given ill. conn is held by caller.
3957  * Note that ipcl_walk only walks conns that are not yet condemned.
3958  * condemned conns can't be refheld. For this reason, conn must become clean
3959  * first, i.e. it must not refer to any ill/ire/ipif and then only set
3960  * condemned flag.
3961  */
3962 static void
3963 conn_delete_ill(conn_t *connp, caddr_t arg)
3964 {
3965 	ill_t	*ill = (ill_t *)arg;
3966 	int	i;
3967 	char	group_buf[INET6_ADDRSTRLEN];
3968 	in6_addr_t v6group;
3969 	ilg_t	*ilg;
3970 
3971 	/*
3972 	 * Even though conn_ilg_inuse can change while we are in this loop,
3973 	 * no new ilgs can be created/deleted for this connp, on this
3974 	 * ill, since this ill is the perimeter. So we won't miss any ilg
3975 	 * in this cleanup.
3976 	 */
3977 	mutex_enter(&connp->conn_lock);
3978 
3979 	/*
3980 	 * Increment the walker count, so that ilg repacking does not
3981 	 * occur while we are in the loop.
3982 	 */
3983 	ILG_WALKER_HOLD(connp);
3984 	for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) {
3985 		ilg = &connp->conn_ilg[i];
3986 		if ((ilg->ilg_ill == ill) && !(ilg->ilg_flags & ILG_DELETED)) {
3987 			/*
3988 			 * ip_close cannot be cleaning this ilg at the same
3989 			 * time, since it also has to execute in this ill's
3990 			 * perimeter which we are now holding. Only a clean
3991 			 * conn can be condemned.
3992 			 */
3993 			ASSERT(!(connp->conn_state_flags & CONN_CONDEMNED));
3994 
3995 			/* Blow away the membership */
3996 			ip1dbg(("conn_delete_ilg_ill: %s on %s\n",
3997 			    inet_ntop(AF_INET6, &ilg->ilg_v6group,
3998 			    group_buf, sizeof (group_buf)),
3999 			    ill->ill_name));
4000 
4001 			v6group = ilg->ilg_v6group;
4002 			ilg_delete(connp, ilg, NULL);
4003 			mutex_exit(&connp->conn_lock);
4004 
4005 			(void) ip_delmulti_v6(&v6group, ill,
4006 			    connp->conn_zoneid, B_FALSE, B_TRUE);
4007 			mutex_enter(&connp->conn_lock);
4008 		}
4009 	}
4010 	/*
4011 	 * If we are the last walker, need to physically delete the
4012 	 * ilgs and repack.
4013 	 */
4014 	ILG_WALKER_RELE(connp);
4015 
4016 	if (connp->conn_multicast_ill == ill) {
4017 		/* Revert to late binding */
4018 		connp->conn_multicast_ill = NULL;
4019 	}
4020 	mutex_exit(&connp->conn_lock);
4021 }
4022 
4023 /*
4024  * Called when an ipif is unplumbed to make sure that there are no
4025  * dangling conn references to that ipif.
4026  * Handles ilg_ipif and conn_multicast_ipif
4027  */
4028 void
4029 reset_conn_ipif(ipif)
4030 	ipif_t	*ipif;
4031 {
4032 	ip_stack_t	*ipst = ipif->ipif_ill->ill_ipst;
4033 
4034 	ipcl_walk(conn_delete_ipif, (caddr_t)ipif, ipst);
4035 }
4036 
4037 /*
4038  * Called when an ill is unplumbed to make sure that there are no
4039  * dangling conn references to that ill.
4040  * Handles ilg_ill, conn_multicast_ill.
4041  */
4042 void
4043 reset_conn_ill(ill_t *ill)
4044 {
4045 	ip_stack_t	*ipst = ill->ill_ipst;
4046 
4047 	ipcl_walk(conn_delete_ill, (caddr_t)ill, ipst);
4048 }
4049 
4050 #ifdef DEBUG
4051 /*
4052  * Walk functions walk all the interfaces in the system to make
4053  * sure that there is no refernece to the ipif or ill that is
4054  * going away.
4055  */
4056 int
4057 ilm_walk_ill(ill_t *ill)
4058 {
4059 	int cnt = 0;
4060 	ill_t *till;
4061 	ilm_t *ilm;
4062 	ill_walk_context_t ctx;
4063 	ip_stack_t	*ipst = ill->ill_ipst;
4064 
4065 	rw_enter(&ipst->ips_ill_g_lock, RW_READER);
4066 	till = ILL_START_WALK_ALL(&ctx, ipst);
4067 	for (; till != NULL; till = ill_next(&ctx, till)) {
4068 		mutex_enter(&till->ill_lock);
4069 		for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
4070 			if (ilm->ilm_ill == ill) {
4071 				cnt++;
4072 			}
4073 		}
4074 		mutex_exit(&till->ill_lock);
4075 	}
4076 	rw_exit(&ipst->ips_ill_g_lock);
4077 
4078 	return (cnt);
4079 }
4080 
4081 /*
4082  * This function is called before the ipif is freed.
4083  */
4084 int
4085 ilm_walk_ipif(ipif_t *ipif)
4086 {
4087 	int cnt = 0;
4088 	ill_t *till;
4089 	ilm_t *ilm;
4090 	ill_walk_context_t ctx;
4091 	ip_stack_t	*ipst = ipif->ipif_ill->ill_ipst;
4092 
4093 	till = ILL_START_WALK_ALL(&ctx, ipst);
4094 	for (; till != NULL; till = ill_next(&ctx, till)) {
4095 		mutex_enter(&till->ill_lock);
4096 		for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
4097 			if (ilm->ilm_ipif == ipif) {
4098 					cnt++;
4099 			}
4100 		}
4101 		mutex_exit(&till->ill_lock);
4102 	}
4103 	return (cnt);
4104 }
4105 #endif
4106