xref: /titanic_51/usr/src/uts/common/inet/ip/ip_multi.c (revision 261a51afbf7133d9f7c89f1388050677f56b7d1a)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /* Copyright (c) 1990 Mentat Inc. */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/stream.h>
31 #include <sys/dlpi.h>
32 #include <sys/stropts.h>
33 #include <sys/strsun.h>
34 #include <sys/ddi.h>
35 #include <sys/cmn_err.h>
36 #include <sys/sdt.h>
37 #include <sys/zone.h>
38 
39 #include <sys/param.h>
40 #include <sys/socket.h>
41 #include <sys/sockio.h>
42 #include <net/if.h>
43 #include <sys/systm.h>
44 #include <net/route.h>
45 #include <netinet/in.h>
46 #include <net/if_dl.h>
47 #include <netinet/ip6.h>
48 #include <netinet/icmp6.h>
49 
50 #include <inet/common.h>
51 #include <inet/mi.h>
52 #include <inet/nd.h>
53 #include <inet/arp.h>
54 #include <inet/ip.h>
55 #include <inet/ip6.h>
56 #include <inet/ip_if.h>
57 #include <inet/ip_ndp.h>
58 #include <inet/ip_multi.h>
59 #include <inet/ipclassifier.h>
60 #include <inet/ipsec_impl.h>
61 #include <inet/sctp_ip.h>
62 #include <inet/ip_listutils.h>
63 #include <inet/udp_impl.h>
64 
65 /* igmpv3/mldv2 source filter manipulation */
66 static void	ilm_bld_flists(conn_t *conn, void *arg);
67 static void	ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode,
68     slist_t *flist);
69 
70 static ilm_t	*ilm_add_v6(ipif_t *ipif, const in6_addr_t *group,
71     ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist,
72     int orig_ifindex, zoneid_t zoneid);
73 static void	ilm_delete(ilm_t *ilm);
74 static int	ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *group);
75 static int	ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *group);
76 static ilg_t	*ilg_lookup_ill_index_v6(conn_t *connp,
77     const in6_addr_t *v6group, int index);
78 static ilg_t	*ilg_lookup_ipif(conn_t *connp, ipaddr_t group,
79     ipif_t *ipif);
80 static int	ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif,
81     mcast_record_t fmode, ipaddr_t src);
82 static int	ilg_add_v6(conn_t *connp, const in6_addr_t *group, ill_t *ill,
83     mcast_record_t fmode, const in6_addr_t *v6src);
84 static void	ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src);
85 static mblk_t	*ill_create_dl(ill_t *ill, uint32_t dl_primitive,
86     uint32_t length, uint32_t *addr_lenp, uint32_t *addr_offp);
87 static mblk_t	*ill_create_squery(ill_t *ill, ipaddr_t ipaddr,
88     uint32_t addrlen, uint32_t addroff, mblk_t *mp_tail);
89 static void	conn_ilg_reap(conn_t *connp);
90 static int	ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group,
91     ipif_t *ipif, mcast_record_t fmode, ipaddr_t src);
92 static int	ip_opt_delete_group_excl_v6(conn_t *connp,
93     const in6_addr_t *v6group, ill_t *ill, mcast_record_t fmode,
94     const in6_addr_t *v6src);
95 
96 /*
97  * MT notes:
98  *
99  * Multicast joins operate on both the ilg and ilm structures. Multiple
100  * threads operating on an conn (socket) trying to do multicast joins
101  * need to synchronize  when operating on the ilg. Multiple threads
102  * potentially operating on different conn (socket endpoints) trying to
103  * do multicast joins could eventually end up trying to manipulate the
104  * ilm simulatenously and need to synchronize on the access to the ilm.
105  * Both are amenable to standard Solaris MT techniques, but it would be
106  * complex to handle a failover or failback which needs to manipulate
107  * ilg/ilms if an applications can also simultaenously join/leave
108  * multicast groups. Hence multicast join/leave also go through the ipsq_t
109  * serialization.
110  *
111  * Multicast joins and leaves are single-threaded per phyint/IPMP group
112  * using the ipsq serialization mechanism.
113  *
114  * An ilm is an IP data structure used to track multicast join/leave.
115  * An ilm is associated with a <multicast group, ipif> tuple in IPv4 and
116  * with just <multicast group> in IPv6. ilm_refcnt is the number of ilg's
117  * referencing the ilm. ilms are created / destroyed only as writer. ilms
118  * are not passed around, instead they are looked up and used under the
119  * ill_lock or as writer. So we don't need a dynamic refcount of the number
120  * of threads holding reference to an ilm.
121  *
122  * Multicast Join operation:
123  *
124  * The first step is to determine the ipif (v4) or ill (v6) on which
125  * the join operation is to be done. The join is done after becoming
126  * exclusive on the ipsq associated with the ipif or ill. The conn->conn_ilg
127  * and ill->ill_ilm are thus accessed and modified exclusively per ill.
128  * Multiple threads can attempt to join simultaneously on different ipif/ill
129  * on the same conn. In this case the ipsq serialization does not help in
130  * protecting the ilg. It is the conn_lock that is used to protect the ilg.
131  * The conn_lock also protects all the ilg_t members.
132  *
133  * Leave operation.
134  *
135  * Similar to the join operation, the first step is to determine the ipif
136  * or ill (v6) on which the leave operation is to be done. The leave operation
137  * is done after becoming exclusive on the ipsq associated with the ipif or ill.
138  * As with join ilg modification is done under the protection of the conn lock.
139  */
140 
141 #define	IPSQ_ENTER_IPIF(ipif, connp, first_mp, func, ipsq, type)	\
142 	ASSERT(connp != NULL);					\
143 	(ipsq) = ipsq_try_enter((ipif), NULL, CONNP_TO_WQ(connp),	\
144 	    (first_mp), (func), (type), B_TRUE);		\
145 	if ((ipsq) == NULL) {					\
146 		ipif_refrele(ipif);				\
147 		return (EINPROGRESS);				\
148 	}
149 
150 #define	IPSQ_ENTER_ILL(ill, connp, first_mp, func, ipsq, type)	\
151 	ASSERT(connp != NULL);					\
152 	(ipsq) = ipsq_try_enter(NULL, ill, CONNP_TO_WQ(connp),	\
153 	    (first_mp),	(func), (type), B_TRUE);		\
154 	if ((ipsq) == NULL) {					\
155 		ill_refrele(ill);				\
156 		return (EINPROGRESS);				\
157 	}
158 
159 #define	IPSQ_EXIT(ipsq)	\
160 	if (ipsq != NULL)	\
161 		ipsq_exit(ipsq, B_TRUE, B_TRUE);
162 
163 #define	ILG_WALKER_HOLD(connp)	(connp)->conn_ilg_walker_cnt++
164 
165 #define	ILG_WALKER_RELE(connp)				\
166 	{						\
167 		(connp)->conn_ilg_walker_cnt--;		\
168 		if ((connp)->conn_ilg_walker_cnt == 0)	\
169 			conn_ilg_reap(connp);		\
170 	}
171 
172 static void
173 conn_ilg_reap(conn_t *connp)
174 {
175 	int	to;
176 	int	from;
177 
178 	ASSERT(MUTEX_HELD(&connp->conn_lock));
179 
180 	to = 0;
181 	from = 0;
182 	while (from < connp->conn_ilg_inuse) {
183 		if (connp->conn_ilg[from].ilg_flags & ILG_DELETED) {
184 			FREE_SLIST(connp->conn_ilg[from].ilg_filter);
185 			from++;
186 			continue;
187 		}
188 		if (to != from)
189 			connp->conn_ilg[to] = connp->conn_ilg[from];
190 		to++;
191 		from++;
192 	}
193 
194 	connp->conn_ilg_inuse = to;
195 
196 	if (connp->conn_ilg_inuse == 0) {
197 		mi_free((char *)connp->conn_ilg);
198 		connp->conn_ilg = NULL;
199 		cv_broadcast(&connp->conn_refcv);
200 	}
201 }
202 
203 #define	GETSTRUCT(structure, number)	\
204 	((structure *)mi_zalloc(sizeof (structure) * (number)))
205 
206 #define	ILG_ALLOC_CHUNK	16
207 
208 /*
209  * Returns a pointer to the next available ilg in conn_ilg.  Allocs more
210  * buffers in size of ILG_ALLOC_CHUNK ilgs when needed, and updates conn's
211  * ilg tracking fields appropriately (conn_ilg_inuse reflects usage of the
212  * returned ilg).  Returns NULL on failure (ENOMEM).
213  *
214  * Assumes connp->conn_lock is held.
215  */
216 static ilg_t *
217 conn_ilg_alloc(conn_t *connp)
218 {
219 	ilg_t *new;
220 	int curcnt;
221 
222 	ASSERT(MUTEX_HELD(&connp->conn_lock));
223 	ASSERT(connp->conn_ilg_inuse <= connp->conn_ilg_allocated);
224 
225 	if (connp->conn_ilg == NULL) {
226 		connp->conn_ilg = GETSTRUCT(ilg_t, ILG_ALLOC_CHUNK);
227 		if (connp->conn_ilg == NULL)
228 			return (NULL);
229 		connp->conn_ilg_allocated = ILG_ALLOC_CHUNK;
230 		connp->conn_ilg_inuse = 0;
231 	}
232 	if (connp->conn_ilg_inuse == connp->conn_ilg_allocated) {
233 		curcnt = connp->conn_ilg_allocated;
234 		new = GETSTRUCT(ilg_t, curcnt + ILG_ALLOC_CHUNK);
235 		if (new == NULL)
236 			return (NULL);
237 		bcopy(connp->conn_ilg, new, sizeof (ilg_t) * curcnt);
238 		mi_free((char *)connp->conn_ilg);
239 		connp->conn_ilg = new;
240 		connp->conn_ilg_allocated += ILG_ALLOC_CHUNK;
241 	}
242 
243 	return (&connp->conn_ilg[connp->conn_ilg_inuse++]);
244 }
245 
246 typedef struct ilm_fbld_s {
247 	ilm_t		*fbld_ilm;
248 	int		fbld_in_cnt;
249 	int		fbld_ex_cnt;
250 	slist_t		fbld_in;
251 	slist_t		fbld_ex;
252 	boolean_t	fbld_in_overflow;
253 } ilm_fbld_t;
254 
255 static void
256 ilm_bld_flists(conn_t *conn, void *arg)
257 {
258 	int i;
259 	ilm_fbld_t *fbld = (ilm_fbld_t *)(arg);
260 	ilm_t *ilm = fbld->fbld_ilm;
261 	in6_addr_t *v6group = &ilm->ilm_v6addr;
262 
263 	if (conn->conn_ilg_inuse == 0)
264 		return;
265 
266 	/*
267 	 * Since we can't break out of the ipcl_walk once started, we still
268 	 * have to look at every conn.  But if we've already found one
269 	 * (EXCLUDE, NULL) list, there's no need to keep checking individual
270 	 * ilgs--that will be our state.
271 	 */
272 	if (fbld->fbld_ex_cnt > 0 && fbld->fbld_ex.sl_numsrc == 0)
273 		return;
274 
275 	/*
276 	 * Check this conn's ilgs to see if any are interested in our
277 	 * ilm (group, interface match).  If so, update the master
278 	 * include and exclude lists we're building in the fbld struct
279 	 * with this ilg's filter info.
280 	 */
281 	mutex_enter(&conn->conn_lock);
282 	for (i = 0; i < conn->conn_ilg_inuse; i++) {
283 		ilg_t *ilg = &conn->conn_ilg[i];
284 		if ((ilg->ilg_ill == ilm->ilm_ill) &&
285 		    (ilg->ilg_ipif == ilm->ilm_ipif) &&
286 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) {
287 			if (ilg->ilg_fmode == MODE_IS_INCLUDE) {
288 				fbld->fbld_in_cnt++;
289 				if (!fbld->fbld_in_overflow)
290 					l_union_in_a(&fbld->fbld_in,
291 					    ilg->ilg_filter,
292 					    &fbld->fbld_in_overflow);
293 			} else {
294 				fbld->fbld_ex_cnt++;
295 				/*
296 				 * On the first exclude list, don't try to do
297 				 * an intersection, as the master exclude list
298 				 * is intentionally empty.  If the master list
299 				 * is still empty on later iterations, that
300 				 * means we have at least one ilg with an empty
301 				 * exclude list, so that should be reflected
302 				 * when we take the intersection.
303 				 */
304 				if (fbld->fbld_ex_cnt == 1) {
305 					if (ilg->ilg_filter != NULL)
306 						l_copy(ilg->ilg_filter,
307 						    &fbld->fbld_ex);
308 				} else {
309 					l_intersection_in_a(&fbld->fbld_ex,
310 					    ilg->ilg_filter);
311 				}
312 			}
313 			/* there will only be one match, so break now. */
314 			break;
315 		}
316 	}
317 	mutex_exit(&conn->conn_lock);
318 }
319 
320 static void
321 ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, slist_t *flist)
322 {
323 	ilm_fbld_t fbld;
324 	ip_stack_t *ipst = ilm->ilm_ipst;
325 
326 	fbld.fbld_ilm = ilm;
327 	fbld.fbld_in_cnt = fbld.fbld_ex_cnt = 0;
328 	fbld.fbld_in.sl_numsrc = fbld.fbld_ex.sl_numsrc = 0;
329 	fbld.fbld_in_overflow = B_FALSE;
330 
331 	/* first, construct our master include and exclude lists */
332 	ipcl_walk(ilm_bld_flists, (caddr_t)&fbld, ipst);
333 
334 	/* now use those master lists to generate the interface filter */
335 
336 	/* if include list overflowed, filter is (EXCLUDE, NULL) */
337 	if (fbld.fbld_in_overflow) {
338 		*fmode = MODE_IS_EXCLUDE;
339 		flist->sl_numsrc = 0;
340 		return;
341 	}
342 
343 	/* if nobody interested, interface filter is (INCLUDE, NULL) */
344 	if (fbld.fbld_in_cnt == 0 && fbld.fbld_ex_cnt == 0) {
345 		*fmode = MODE_IS_INCLUDE;
346 		flist->sl_numsrc = 0;
347 		return;
348 	}
349 
350 	/*
351 	 * If there are no exclude lists, then the interface filter
352 	 * is INCLUDE, with its filter list equal to fbld_in.  A single
353 	 * exclude list makes the interface filter EXCLUDE, with its
354 	 * filter list equal to (fbld_ex - fbld_in).
355 	 */
356 	if (fbld.fbld_ex_cnt == 0) {
357 		*fmode = MODE_IS_INCLUDE;
358 		l_copy(&fbld.fbld_in, flist);
359 	} else {
360 		*fmode = MODE_IS_EXCLUDE;
361 		l_difference(&fbld.fbld_ex, &fbld.fbld_in, flist);
362 	}
363 }
364 
365 /*
366  * If the given interface has failed, choose a new one to join on so
367  * that we continue to receive packets.  ilg_orig_ifindex remembers
368  * what the application used to join on so that we know the ilg to
369  * delete even though we change the ill here.  Callers will store the
370  * ilg returned from this function in ilg_ill.  Thus when we receive
371  * a packet on ilg_ill, conn_wantpacket_v6 will deliver the packets.
372  *
373  * This function must be called as writer so we can walk the group
374  * list and examine flags without holding a lock.
375  */
376 ill_t *
377 ip_choose_multi_ill(ill_t *ill, const in6_addr_t *grp)
378 {
379 	ill_t	*till;
380 	ill_group_t *illgrp = ill->ill_group;
381 
382 	ASSERT(IAM_WRITER_ILL(ill));
383 
384 	if (IN6_IS_ADDR_UNSPECIFIED(grp) || illgrp == NULL)
385 		return (ill);
386 
387 	if ((ill->ill_phyint->phyint_flags & (PHYI_FAILED|PHYI_INACTIVE)) == 0)
388 		return (ill);
389 
390 	till = illgrp->illgrp_ill;
391 	while (till != NULL &&
392 	    (till->ill_phyint->phyint_flags & (PHYI_FAILED|PHYI_INACTIVE))) {
393 		till = till->ill_group_next;
394 	}
395 	if (till != NULL)
396 		return (till);
397 
398 	return (ill);
399 }
400 
401 static int
402 ilm_update_add(ilm_t *ilm, ilg_stat_t ilgstat, slist_t *ilg_flist,
403     boolean_t isv6)
404 {
405 	mcast_record_t fmode;
406 	slist_t *flist;
407 	boolean_t fdefault;
408 	char buf[INET6_ADDRSTRLEN];
409 	ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill;
410 
411 	/*
412 	 * There are several cases where the ilm's filter state
413 	 * defaults to (EXCLUDE, NULL):
414 	 *	- we've had previous joins without associated ilgs
415 	 *	- this join has no associated ilg
416 	 *	- the ilg's filter state is (EXCLUDE, NULL)
417 	 */
418 	fdefault = (ilm->ilm_no_ilg_cnt > 0) ||
419 	    (ilgstat == ILGSTAT_NONE) || SLIST_IS_EMPTY(ilg_flist);
420 
421 	/* attempt mallocs (if needed) before doing anything else */
422 	if ((flist = l_alloc()) == NULL)
423 		return (ENOMEM);
424 	if (!fdefault && ilm->ilm_filter == NULL) {
425 		ilm->ilm_filter = l_alloc();
426 		if (ilm->ilm_filter == NULL) {
427 			l_free(flist);
428 			return (ENOMEM);
429 		}
430 	}
431 
432 	if (ilgstat != ILGSTAT_CHANGE)
433 		ilm->ilm_refcnt++;
434 
435 	if (ilgstat == ILGSTAT_NONE)
436 		ilm->ilm_no_ilg_cnt++;
437 
438 	/*
439 	 * Determine new filter state.  If it's not the default
440 	 * (EXCLUDE, NULL), we must walk the conn list to find
441 	 * any ilgs interested in this group, and re-build the
442 	 * ilm filter.
443 	 */
444 	if (fdefault) {
445 		fmode = MODE_IS_EXCLUDE;
446 		flist->sl_numsrc = 0;
447 	} else {
448 		ilm_gen_filter(ilm, &fmode, flist);
449 	}
450 
451 	/* make sure state actually changed; nothing to do if not. */
452 	if ((ilm->ilm_fmode == fmode) &&
453 	    !lists_are_different(ilm->ilm_filter, flist)) {
454 		l_free(flist);
455 		return (0);
456 	}
457 
458 	/* send the state change report */
459 	if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0) {
460 		if (isv6)
461 			mld_statechange(ilm, fmode, flist);
462 		else
463 			igmp_statechange(ilm, fmode, flist);
464 	}
465 
466 	/* update the ilm state */
467 	ilm->ilm_fmode = fmode;
468 	if (flist->sl_numsrc > 0)
469 		l_copy(flist, ilm->ilm_filter);
470 	else
471 		CLEAR_SLIST(ilm->ilm_filter);
472 
473 	ip1dbg(("ilm_update: new if filter mode %d, group %s\n", ilm->ilm_fmode,
474 	    inet_ntop(AF_INET6, &ilm->ilm_v6addr, buf, sizeof (buf))));
475 
476 	l_free(flist);
477 	return (0);
478 }
479 
480 static int
481 ilm_update_del(ilm_t *ilm, boolean_t isv6)
482 {
483 	mcast_record_t fmode;
484 	slist_t *flist;
485 	ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill;
486 
487 	ip1dbg(("ilm_update_del: still %d left; updating state\n",
488 	    ilm->ilm_refcnt));
489 
490 	if ((flist = l_alloc()) == NULL)
491 		return (ENOMEM);
492 
493 	/*
494 	 * If present, the ilg in question has already either been
495 	 * updated or removed from our list; so all we need to do
496 	 * now is walk the list to update the ilm filter state.
497 	 *
498 	 * Skip the list walk if we have any no-ilg joins, which
499 	 * cause the filter state to revert to (EXCLUDE, NULL).
500 	 */
501 	if (ilm->ilm_no_ilg_cnt != 0) {
502 		fmode = MODE_IS_EXCLUDE;
503 		flist->sl_numsrc = 0;
504 	} else {
505 		ilm_gen_filter(ilm, &fmode, flist);
506 	}
507 
508 	/* check to see if state needs to be updated */
509 	if ((ilm->ilm_fmode == fmode) &&
510 	    (!lists_are_different(ilm->ilm_filter, flist))) {
511 		l_free(flist);
512 		return (0);
513 	}
514 
515 	if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0) {
516 		if (isv6)
517 			mld_statechange(ilm, fmode, flist);
518 		else
519 			igmp_statechange(ilm, fmode, flist);
520 	}
521 
522 	ilm->ilm_fmode = fmode;
523 	if (flist->sl_numsrc > 0) {
524 		if (ilm->ilm_filter == NULL) {
525 			ilm->ilm_filter = l_alloc();
526 			if (ilm->ilm_filter == NULL) {
527 				char buf[INET6_ADDRSTRLEN];
528 				ip1dbg(("ilm_update_del: failed to alloc ilm "
529 				    "filter; no source filtering for %s on %s",
530 				    inet_ntop(AF_INET6, &ilm->ilm_v6addr,
531 				    buf, sizeof (buf)), ill->ill_name));
532 				ilm->ilm_fmode = MODE_IS_EXCLUDE;
533 				l_free(flist);
534 				return (0);
535 			}
536 		}
537 		l_copy(flist, ilm->ilm_filter);
538 	} else {
539 		CLEAR_SLIST(ilm->ilm_filter);
540 	}
541 
542 	l_free(flist);
543 	return (0);
544 }
545 
546 /*
547  * INADDR_ANY means all multicast addresses. This is only used
548  * by the multicast router.
549  * INADDR_ANY is stored as IPv6 unspecified addr.
550  */
551 int
552 ip_addmulti(ipaddr_t group, ipif_t *ipif, ilg_stat_t ilgstat,
553     mcast_record_t ilg_fmode, slist_t *ilg_flist)
554 {
555 	ill_t	*ill = ipif->ipif_ill;
556 	ilm_t 	*ilm;
557 	in6_addr_t v6group;
558 	int	ret;
559 
560 	ASSERT(IAM_WRITER_IPIF(ipif));
561 
562 	if (!CLASSD(group) && group != INADDR_ANY)
563 		return (EINVAL);
564 
565 	/*
566 	 * INADDR_ANY is represented as the IPv6 unspecifed addr.
567 	 */
568 	if (group == INADDR_ANY)
569 		v6group = ipv6_all_zeros;
570 	else
571 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
572 
573 	ilm = ilm_lookup_ipif(ipif, group);
574 	if (ilm != NULL)
575 		return (ilm_update_add(ilm, ilgstat, ilg_flist, B_FALSE));
576 
577 	/*
578 	 * ilms are associated with ipifs in IPv4. It moves with the
579 	 * ipif if the ipif moves to a new ill when the interface
580 	 * fails. Thus we really don't check whether the ipif_ill
581 	 * has failed like in IPv6. If it has FAILED the ipif
582 	 * will move (daemon will move it) and hence the ilm, if the
583 	 * ipif is not IPIF_NOFAILOVER. For the IPIF_NOFAILOVER ipifs,
584 	 * we continue to receive in the same place even if the
585 	 * interface fails.
586 	 */
587 	ilm = ilm_add_v6(ipif, &v6group, ilgstat, ilg_fmode, ilg_flist,
588 	    ill->ill_phyint->phyint_ifindex, ipif->ipif_zoneid);
589 	if (ilm == NULL)
590 		return (ENOMEM);
591 
592 	if (group == INADDR_ANY) {
593 		/*
594 		 * Check how many ipif's have members in this group -
595 		 * if more then one we should not tell the driver to join
596 		 * this time
597 		 */
598 		if (ilm_numentries_v6(ill, &v6group) > 1)
599 			return (0);
600 		if (ill->ill_group == NULL)
601 			ret = ip_join_allmulti(ipif);
602 		else
603 			ret = ill_nominate_mcast_rcv(ill->ill_group);
604 		if (ret != 0)
605 			ilm_delete(ilm);
606 		return (ret);
607 	}
608 
609 	if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0)
610 		igmp_joingroup(ilm);
611 
612 	if (ilm_numentries_v6(ill, &v6group) > 1)
613 		return (0);
614 
615 	ret = ip_ll_addmulti_v6(ipif, &v6group);
616 	if (ret != 0)
617 		ilm_delete(ilm);
618 	return (ret);
619 }
620 
621 /*
622  * The unspecified address means all multicast addresses.
623  * This is only used by the multicast router.
624  *
625  * ill identifies the interface to join on; it may not match the
626  * interface requested by the application of a failover has taken
627  * place.  orig_ifindex always identifies the interface requested
628  * by the app.
629  *
630  * ilgstat tells us if there's an ilg associated with this join,
631  * and if so, if it's a new ilg or a change to an existing one.
632  * ilg_fmode and ilg_flist give us the current filter state of
633  * the ilg (and will be EXCLUDE {NULL} in the case of no ilg).
634  */
635 int
636 ip_addmulti_v6(const in6_addr_t *v6group, ill_t *ill, int orig_ifindex,
637     zoneid_t zoneid, ilg_stat_t ilgstat, mcast_record_t ilg_fmode,
638     slist_t *ilg_flist)
639 {
640 	ilm_t	*ilm;
641 	int	ret;
642 
643 	ASSERT(IAM_WRITER_ILL(ill));
644 
645 	if (!IN6_IS_ADDR_MULTICAST(v6group) &&
646 	    !IN6_IS_ADDR_UNSPECIFIED(v6group)) {
647 		return (EINVAL);
648 	}
649 
650 	/*
651 	 * An ilm is uniquely identified by the tuple of (group, ill,
652 	 * orig_ill).  group is the multicast group address, ill is
653 	 * the interface on which it is currently joined, and orig_ill
654 	 * is the interface on which the application requested the
655 	 * join.  orig_ill and ill are the same unless orig_ill has
656 	 * failed over.
657 	 *
658 	 * Both orig_ill and ill are required, which means we may have
659 	 * 2 ilms on an ill for the same group, but with different
660 	 * orig_ills.  These must be kept separate, so that when failback
661 	 * occurs, the appropriate ilms are moved back to their orig_ill
662 	 * without disrupting memberships on the ill to which they had
663 	 * been moved.
664 	 *
665 	 * In order to track orig_ill, we store orig_ifindex in the
666 	 * ilm and ilg.
667 	 */
668 	ilm = ilm_lookup_ill_index_v6(ill, v6group, orig_ifindex, zoneid);
669 	if (ilm != NULL)
670 		return (ilm_update_add(ilm, ilgstat, ilg_flist, B_TRUE));
671 
672 	/*
673 	 * We need to remember where the application really wanted
674 	 * to join. This will be used later if we want to failback
675 	 * to the original interface.
676 	 */
677 	ilm = ilm_add_v6(ill->ill_ipif, v6group, ilgstat, ilg_fmode,
678 	    ilg_flist, orig_ifindex, zoneid);
679 	if (ilm == NULL)
680 		return (ENOMEM);
681 
682 	if (IN6_IS_ADDR_UNSPECIFIED(v6group)) {
683 		/*
684 		 * Check how many ipif's that have members in this group -
685 		 * if more then one we should not tell the driver to join
686 		 * this time
687 		 */
688 		if (ilm_numentries_v6(ill, v6group) > 1)
689 			return (0);
690 		if (ill->ill_group == NULL)
691 			ret = ip_join_allmulti(ill->ill_ipif);
692 		else
693 			ret = ill_nominate_mcast_rcv(ill->ill_group);
694 
695 		if (ret != 0)
696 			ilm_delete(ilm);
697 		return (ret);
698 	}
699 
700 	if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0)
701 		mld_joingroup(ilm);
702 
703 	/*
704 	 * If we have more then one we should not tell the driver
705 	 * to join this time.
706 	 */
707 	if (ilm_numentries_v6(ill, v6group) > 1)
708 		return (0);
709 
710 	ret = ip_ll_addmulti_v6(ill->ill_ipif, v6group);
711 	if (ret != 0)
712 		ilm_delete(ilm);
713 	return (ret);
714 }
715 
716 /*
717  * Send a multicast request to the driver for enabling multicast reception
718  * for v6groupp address. The caller has already checked whether it is
719  * appropriate to send one or not.
720  */
721 int
722 ip_ll_send_enabmulti_req(ill_t *ill, const in6_addr_t *v6groupp)
723 {
724 	mblk_t	*mp;
725 	uint32_t addrlen, addroff;
726 	char	group_buf[INET6_ADDRSTRLEN];
727 
728 	ASSERT(IAM_WRITER_ILL(ill));
729 
730 	/*
731 	 * Create a AR_ENTRY_SQUERY message with a dl_enabmulti_req tacked
732 	 * on.
733 	 */
734 	mp = ill_create_dl(ill, DL_ENABMULTI_REQ, sizeof (dl_enabmulti_req_t),
735 	    &addrlen, &addroff);
736 	if (!mp)
737 		return (ENOMEM);
738 	if (IN6_IS_ADDR_V4MAPPED(v6groupp)) {
739 		ipaddr_t v4group;
740 
741 		IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group);
742 		/*
743 		 * NOTE!!!
744 		 * The "addroff" passed in here was calculated by
745 		 * ill_create_dl(), and will be used by ill_create_squery()
746 		 * to perform some twisted coding magic. It is the offset
747 		 * into the dl_xxx_req of the hw addr. Here, it will be
748 		 * added to b_wptr - b_rptr to create a magic number that
749 		 * is not an offset into this squery mblk.
750 		 * The actual hardware address will be accessed only in the
751 		 * dl_xxx_req, not in the squery. More importantly,
752 		 * that hardware address can *only* be accessed in this
753 		 * mblk chain by calling mi_offset_param_c(), which uses
754 		 * the magic number in the squery hw offset field to go
755 		 * to the *next* mblk (the dl_xxx_req), subtract the
756 		 * (b_wptr - b_rptr), and find the actual offset into
757 		 * the dl_xxx_req.
758 		 * Any method that depends on using the
759 		 * offset field in the dl_disabmulti_req or squery
760 		 * to find either hardware address will similarly fail.
761 		 *
762 		 * Look in ar_entry_squery() in arp.c to see how this offset
763 		 * is used.
764 		 */
765 		mp = ill_create_squery(ill, v4group, addrlen, addroff, mp);
766 		if (!mp)
767 			return (ENOMEM);
768 		ip1dbg(("ip_ll_send_enabmulti_req: IPv4 putnext %s on %s\n",
769 		    inet_ntop(AF_INET6, v6groupp, group_buf,
770 		    sizeof (group_buf)),
771 		    ill->ill_name));
772 		putnext(ill->ill_rq, mp);
773 	} else {
774 		ip1dbg(("ip_ll_send_enabmulti_req: IPv6 ndp_squery_mp %s on"
775 		    " %s\n",
776 		    inet_ntop(AF_INET6, v6groupp, group_buf,
777 		    sizeof (group_buf)),
778 		    ill->ill_name));
779 		return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp));
780 	}
781 	return (0);
782 }
783 
784 /*
785  * Send a multicast request to the driver for enabling multicast
786  * membership for v6group if appropriate.
787  */
788 static int
789 ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *v6groupp)
790 {
791 	ill_t	*ill = ipif->ipif_ill;
792 
793 	ASSERT(IAM_WRITER_IPIF(ipif));
794 
795 	if (ill->ill_net_type != IRE_IF_RESOLVER ||
796 	    ipif->ipif_flags & IPIF_POINTOPOINT) {
797 		ip1dbg(("ip_ll_addmulti_v6: not resolver\n"));
798 		return (0);	/* Must be IRE_IF_NORESOLVER */
799 	}
800 
801 	if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) {
802 		ip1dbg(("ip_ll_addmulti_v6: MULTI_BCAST\n"));
803 		return (0);
804 	}
805 	if (ill->ill_ipif_up_count == 0) {
806 		/*
807 		 * Nobody there. All multicast addresses will be re-joined
808 		 * when we get the DL_BIND_ACK bringing the interface up.
809 		 */
810 		ip1dbg(("ip_ll_addmulti_v6: nobody up\n"));
811 		return (0);
812 	}
813 	return (ip_ll_send_enabmulti_req(ill, v6groupp));
814 }
815 
816 /*
817  * INADDR_ANY means all multicast addresses. This is only used
818  * by the multicast router.
819  * INADDR_ANY is stored as the IPv6 unspecifed addr.
820  */
821 int
822 ip_delmulti(ipaddr_t group, ipif_t *ipif, boolean_t no_ilg, boolean_t leaving)
823 {
824 	ill_t	*ill = ipif->ipif_ill;
825 	ilm_t *ilm;
826 	in6_addr_t v6group;
827 	int	ret;
828 
829 	ASSERT(IAM_WRITER_IPIF(ipif));
830 
831 	if (!CLASSD(group) && group != INADDR_ANY)
832 		return (EINVAL);
833 
834 	/*
835 	 * INADDR_ANY is represented as the IPv6 unspecifed addr.
836 	 */
837 	if (group == INADDR_ANY)
838 		v6group = ipv6_all_zeros;
839 	else
840 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
841 
842 	/*
843 	 * Look for a match on the ipif.
844 	 * (IP_DROP_MEMBERSHIP specifies an ipif using an IP address).
845 	 */
846 	ilm = ilm_lookup_ipif(ipif, group);
847 	if (ilm == NULL)
848 		return (ENOENT);
849 
850 	/* Update counters */
851 	if (no_ilg)
852 		ilm->ilm_no_ilg_cnt--;
853 
854 	if (leaving)
855 		ilm->ilm_refcnt--;
856 
857 	if (ilm->ilm_refcnt > 0)
858 		return (ilm_update_del(ilm, B_FALSE));
859 
860 	if (group == INADDR_ANY) {
861 		ilm_delete(ilm);
862 		/*
863 		 * Check how many ipif's that have members in this group -
864 		 * if there are still some left then don't tell the driver
865 		 * to drop it.
866 		 */
867 		if (ilm_numentries_v6(ill, &v6group) != 0)
868 			return (0);
869 
870 		/*
871 		 * If we never joined, then don't leave.  This can happen
872 		 * if we're in an IPMP group, since only one ill per IPMP
873 		 * group receives all multicast packets.
874 		 */
875 		if (!ill->ill_join_allmulti) {
876 			ASSERT(ill->ill_group != NULL);
877 			return (0);
878 		}
879 
880 		ret = ip_leave_allmulti(ipif);
881 		if (ill->ill_group != NULL)
882 			(void) ill_nominate_mcast_rcv(ill->ill_group);
883 		return (ret);
884 	}
885 
886 	if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0)
887 		igmp_leavegroup(ilm);
888 
889 	ilm_delete(ilm);
890 	/*
891 	 * Check how many ipif's that have members in this group -
892 	 * if there are still some left then don't tell the driver
893 	 * to drop it.
894 	 */
895 	if (ilm_numentries_v6(ill, &v6group) != 0)
896 		return (0);
897 	return (ip_ll_delmulti_v6(ipif, &v6group));
898 }
899 
900 /*
901  * The unspecified address means all multicast addresses.
902  * This is only used by the multicast router.
903  */
904 int
905 ip_delmulti_v6(const in6_addr_t *v6group, ill_t *ill, int orig_ifindex,
906     zoneid_t zoneid, boolean_t no_ilg, boolean_t leaving)
907 {
908 	ipif_t	*ipif;
909 	ilm_t *ilm;
910 	int	ret;
911 
912 	ASSERT(IAM_WRITER_ILL(ill));
913 
914 	if (!IN6_IS_ADDR_MULTICAST(v6group) &&
915 	    !IN6_IS_ADDR_UNSPECIFIED(v6group))
916 		return (EINVAL);
917 
918 	/*
919 	 * Look for a match on the ill.
920 	 * (IPV6_LEAVE_GROUP specifies an ill using an ifindex).
921 	 *
922 	 * Similar to ip_addmulti_v6, we should always look using
923 	 * the orig_ifindex.
924 	 *
925 	 * 1) If orig_ifindex is different from ill's ifindex
926 	 *    we should have an ilm with orig_ifindex created in
927 	 *    ip_addmulti_v6. We should delete that here.
928 	 *
929 	 * 2) If orig_ifindex is same as ill's ifindex, we should
930 	 *    not delete the ilm that is temporarily here because of
931 	 *    a FAILOVER. Those ilms will have a ilm_orig_ifindex
932 	 *    different from ill's ifindex.
933 	 *
934 	 * Thus, always lookup using orig_ifindex.
935 	 */
936 	ilm = ilm_lookup_ill_index_v6(ill, v6group, orig_ifindex, zoneid);
937 	if (ilm == NULL)
938 		return (ENOENT);
939 
940 	ASSERT(ilm->ilm_ill == ill);
941 
942 	ipif = ill->ill_ipif;
943 
944 	/* Update counters */
945 	if (no_ilg)
946 		ilm->ilm_no_ilg_cnt--;
947 
948 	if (leaving)
949 		ilm->ilm_refcnt--;
950 
951 	if (ilm->ilm_refcnt > 0)
952 		return (ilm_update_del(ilm, B_TRUE));
953 
954 	if (IN6_IS_ADDR_UNSPECIFIED(v6group)) {
955 		ilm_delete(ilm);
956 		/*
957 		 * Check how many ipif's that have members in this group -
958 		 * if there are still some left then don't tell the driver
959 		 * to drop it.
960 		 */
961 		if (ilm_numentries_v6(ill, v6group) != 0)
962 			return (0);
963 
964 		/*
965 		 * If we never joined, then don't leave.  This can happen
966 		 * if we're in an IPMP group, since only one ill per IPMP
967 		 * group receives all multicast packets.
968 		 */
969 		if (!ill->ill_join_allmulti) {
970 			ASSERT(ill->ill_group != NULL);
971 			return (0);
972 		}
973 
974 		ret = ip_leave_allmulti(ipif);
975 		if (ill->ill_group != NULL)
976 			(void) ill_nominate_mcast_rcv(ill->ill_group);
977 		return (ret);
978 	}
979 
980 	if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0)
981 		mld_leavegroup(ilm);
982 
983 	ilm_delete(ilm);
984 	/*
985 	 * Check how many ipif's that have members in this group -
986 	 * if there are still some left then don't tell the driver
987 	 * to drop it.
988 	 */
989 	if (ilm_numentries_v6(ill, v6group) != 0)
990 		return (0);
991 	return (ip_ll_delmulti_v6(ipif, v6group));
992 }
993 
994 /*
995  * Send a multicast request to the driver for disabling multicast reception
996  * for v6groupp address. The caller has already checked whether it is
997  * appropriate to send one or not.
998  */
999 int
1000 ip_ll_send_disabmulti_req(ill_t *ill, const in6_addr_t *v6groupp)
1001 {
1002 	mblk_t	*mp;
1003 	char	group_buf[INET6_ADDRSTRLEN];
1004 	uint32_t	addrlen, addroff;
1005 
1006 	ASSERT(IAM_WRITER_ILL(ill));
1007 	/*
1008 	 * Create a AR_ENTRY_SQUERY message with a dl_disabmulti_req tacked
1009 	 * on.
1010 	 */
1011 	mp = ill_create_dl(ill, DL_DISABMULTI_REQ,
1012 	    sizeof (dl_disabmulti_req_t), &addrlen, &addroff);
1013 
1014 	if (!mp)
1015 		return (ENOMEM);
1016 
1017 	if (IN6_IS_ADDR_V4MAPPED(v6groupp)) {
1018 		ipaddr_t v4group;
1019 
1020 		IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group);
1021 		/*
1022 		 * NOTE!!!
1023 		 * The "addroff" passed in here was calculated by
1024 		 * ill_create_dl(), and will be used by ill_create_squery()
1025 		 * to perform some twisted coding magic. It is the offset
1026 		 * into the dl_xxx_req of the hw addr. Here, it will be
1027 		 * added to b_wptr - b_rptr to create a magic number that
1028 		 * is not an offset into this mblk.
1029 		 *
1030 		 * Please see the comment in ip_ll_send)enabmulti_req()
1031 		 * for a complete explanation.
1032 		 *
1033 		 * Look in ar_entry_squery() in arp.c to see how this offset
1034 		 * is used.
1035 		 */
1036 		mp = ill_create_squery(ill, v4group, addrlen, addroff, mp);
1037 		if (!mp)
1038 			return (ENOMEM);
1039 		ip1dbg(("ip_ll_send_disabmulti_req: IPv4 putnext %s on %s\n",
1040 		    inet_ntop(AF_INET6, v6groupp, group_buf,
1041 		    sizeof (group_buf)),
1042 		    ill->ill_name));
1043 		putnext(ill->ill_rq, mp);
1044 	} else {
1045 		ip1dbg(("ip_ll_send_disabmulti_req: IPv6 ndp_squery_mp %s on"
1046 		    " %s\n",
1047 		    inet_ntop(AF_INET6, v6groupp, group_buf,
1048 		    sizeof (group_buf)),
1049 		    ill->ill_name));
1050 		return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp));
1051 	}
1052 	return (0);
1053 }
1054 
1055 /*
1056  * Send a multicast request to the driver for disabling multicast
1057  * membership for v6group if appropriate.
1058  */
1059 static int
1060 ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *v6group)
1061 {
1062 	ill_t	*ill = ipif->ipif_ill;
1063 
1064 	ASSERT(IAM_WRITER_IPIF(ipif));
1065 
1066 	if (ill->ill_net_type != IRE_IF_RESOLVER ||
1067 	    ipif->ipif_flags & IPIF_POINTOPOINT) {
1068 		return (0);	/* Must be IRE_IF_NORESOLVER */
1069 	}
1070 	if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) {
1071 		ip1dbg(("ip_ll_delmulti_v6: MULTI_BCAST\n"));
1072 		return (0);
1073 	}
1074 	if (ill->ill_ipif_up_count == 0) {
1075 		/*
1076 		 * Nobody there. All multicast addresses will be re-joined
1077 		 * when we get the DL_BIND_ACK bringing the interface up.
1078 		 */
1079 		ip1dbg(("ip_ll_delmulti_v6: nobody up\n"));
1080 		return (0);
1081 	}
1082 	return (ip_ll_send_disabmulti_req(ill, v6group));
1083 }
1084 
1085 /*
1086  * Make the driver pass up all multicast packets
1087  *
1088  * With ill groups, the caller makes sure that there is only
1089  * one ill joining the allmulti group.
1090  */
1091 int
1092 ip_join_allmulti(ipif_t *ipif)
1093 {
1094 	ill_t	*ill = ipif->ipif_ill;
1095 	mblk_t	*mp;
1096 	uint32_t	addrlen, addroff;
1097 
1098 	ASSERT(IAM_WRITER_IPIF(ipif));
1099 
1100 	if (ill->ill_ipif_up_count == 0) {
1101 		/*
1102 		 * Nobody there. All multicast addresses will be re-joined
1103 		 * when we get the DL_BIND_ACK bringing the interface up.
1104 		 */
1105 		return (0);
1106 	}
1107 
1108 	ASSERT(!ill->ill_join_allmulti);
1109 
1110 	/*
1111 	 * Create a DL_PROMISCON_REQ message and send it directly to
1112 	 * the DLPI provider.  We don't need to do this for certain
1113 	 * media types for which we never need to turn promiscuous
1114 	 * mode on.
1115 	 */
1116 	if ((ill->ill_net_type == IRE_IF_RESOLVER) &&
1117 	    !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) {
1118 		mp = ill_create_dl(ill, DL_PROMISCON_REQ,
1119 		    sizeof (dl_promiscon_req_t), &addrlen, &addroff);
1120 		if (mp == NULL)
1121 			return (ENOMEM);
1122 		putnext(ill->ill_wq, mp);
1123 	}
1124 
1125 	mutex_enter(&ill->ill_lock);
1126 	ill->ill_join_allmulti = B_TRUE;
1127 	mutex_exit(&ill->ill_lock);
1128 	return (0);
1129 }
1130 
1131 /*
1132  * Make the driver stop passing up all multicast packets
1133  *
1134  * With ill groups, we need to nominate some other ill as
1135  * this ipif->ipif_ill is leaving the group.
1136  */
1137 int
1138 ip_leave_allmulti(ipif_t *ipif)
1139 {
1140 	ill_t	*ill = ipif->ipif_ill;
1141 	mblk_t	*mp;
1142 	uint32_t	addrlen, addroff;
1143 
1144 	ASSERT(IAM_WRITER_IPIF(ipif));
1145 
1146 	if (ill->ill_ipif_up_count == 0) {
1147 		/*
1148 		 * Nobody there. All multicast addresses will be re-joined
1149 		 * when we get the DL_BIND_ACK bringing the interface up.
1150 		 */
1151 		return (0);
1152 	}
1153 
1154 	ASSERT(ill->ill_join_allmulti);
1155 
1156 	/*
1157 	 * Create a DL_PROMISCOFF_REQ message and send it directly to
1158 	 * the DLPI provider.  We don't need to do this for certain
1159 	 * media types for which we never need to turn promiscuous
1160 	 * mode on.
1161 	 */
1162 	if ((ill->ill_net_type == IRE_IF_RESOLVER) &&
1163 	    !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) {
1164 		mp = ill_create_dl(ill, DL_PROMISCOFF_REQ,
1165 		    sizeof (dl_promiscoff_req_t), &addrlen, &addroff);
1166 		if (mp == NULL)
1167 			return (ENOMEM);
1168 		putnext(ill->ill_wq, mp);
1169 	}
1170 
1171 	mutex_enter(&ill->ill_lock);
1172 	ill->ill_join_allmulti = B_FALSE;
1173 	mutex_exit(&ill->ill_lock);
1174 	return (0);
1175 }
1176 
1177 /*
1178  * Copy mp_orig and pass it in as a local message.
1179  */
1180 void
1181 ip_multicast_loopback(queue_t *q, ill_t *ill, mblk_t *mp_orig, int fanout_flags,
1182     zoneid_t zoneid)
1183 {
1184 	mblk_t	*mp;
1185 	mblk_t	*ipsec_mp;
1186 	ipha_t	*iph;
1187 	ip_stack_t *ipst = ill->ill_ipst;
1188 
1189 	if (DB_TYPE(mp_orig) == M_DATA &&
1190 	    ((ipha_t *)mp_orig->b_rptr)->ipha_protocol == IPPROTO_UDP) {
1191 		uint_t hdrsz;
1192 
1193 		hdrsz = IPH_HDR_LENGTH((ipha_t *)mp_orig->b_rptr) +
1194 		    sizeof (udpha_t);
1195 		ASSERT(MBLKL(mp_orig) >= hdrsz);
1196 
1197 		if (((mp = allocb(hdrsz, BPRI_MED)) != NULL) &&
1198 		    (mp_orig = dupmsg(mp_orig)) != NULL) {
1199 			bcopy(mp_orig->b_rptr, mp->b_rptr, hdrsz);
1200 			mp->b_wptr += hdrsz;
1201 			mp->b_cont = mp_orig;
1202 			mp_orig->b_rptr += hdrsz;
1203 			if (MBLKL(mp_orig) == 0) {
1204 				mp->b_cont = mp_orig->b_cont;
1205 				mp_orig->b_cont = NULL;
1206 				freeb(mp_orig);
1207 			}
1208 		} else if (mp != NULL) {
1209 			freeb(mp);
1210 			mp = NULL;
1211 		}
1212 	} else {
1213 		mp = ip_copymsg(mp_orig); /* No refcnt on ipsec_out netstack */
1214 	}
1215 
1216 	if (mp == NULL)
1217 		return;
1218 	if (DB_TYPE(mp) == M_CTL) {
1219 		ipsec_mp = mp;
1220 		mp = mp->b_cont;
1221 	} else {
1222 		ipsec_mp = mp;
1223 	}
1224 
1225 	iph = (ipha_t *)mp->b_rptr;
1226 
1227 	DTRACE_PROBE4(ip4__loopback__out__start,
1228 	    ill_t *, NULL, ill_t *, ill,
1229 	    ipha_t *, iph, mblk_t *, ipsec_mp);
1230 
1231 	FW_HOOKS(ipst->ips_ip4_loopback_out_event,
1232 	    ipst->ips_ipv4firewall_loopback_out,
1233 	    NULL, ill, iph, ipsec_mp, mp, ipst);
1234 
1235 	DTRACE_PROBE1(ip4__loopback__out__end, mblk_t *, ipsec_mp);
1236 
1237 	if (ipsec_mp != NULL)
1238 		ip_wput_local(q, ill, iph, ipsec_mp, NULL,
1239 		    fanout_flags, zoneid);
1240 }
1241 
1242 static area_t	ip_aresq_template = {
1243 	AR_ENTRY_SQUERY,		/* cmd */
1244 	sizeof (area_t)+IP_ADDR_LEN,	/* name offset */
1245 	sizeof (area_t),	/* name len (filled by ill_arp_alloc) */
1246 	IP_ARP_PROTO_TYPE,		/* protocol, from arps perspective */
1247 	sizeof (area_t),			/* proto addr offset */
1248 	IP_ADDR_LEN,			/* proto addr_length */
1249 	0,				/* proto mask offset */
1250 	/* Rest is initialized when used */
1251 	0,				/* flags */
1252 	0,				/* hw addr offset */
1253 	0,				/* hw addr length */
1254 };
1255 
1256 static mblk_t *
1257 ill_create_squery(ill_t *ill, ipaddr_t ipaddr, uint32_t addrlen,
1258     uint32_t addroff, mblk_t *mp_tail)
1259 {
1260 	mblk_t	*mp;
1261 	area_t	*area;
1262 
1263 	mp = ill_arp_alloc(ill, (uchar_t *)&ip_aresq_template,
1264 				(caddr_t)&ipaddr);
1265 	if (!mp) {
1266 		freemsg(mp_tail);
1267 		return (NULL);
1268 	}
1269 	area = (area_t *)mp->b_rptr;
1270 	area->area_hw_addr_length = addrlen;
1271 	area->area_hw_addr_offset = mp->b_wptr - mp->b_rptr + addroff;
1272 	/*
1273 	 * NOTE!
1274 	 *
1275 	 * The area_hw_addr_offset, as can be seen, does not hold the
1276 	 * actual hardware address offset. Rather, it holds the offset
1277 	 * to the hw addr in the dl_xxx_req in mp_tail, modified by
1278 	 * adding (mp->b_wptr - mp->b_rptr). This allows the function
1279 	 * mi_offset_paramc() to find the hardware address in the
1280 	 * *second* mblk (dl_xxx_req), not this mblk.
1281 	 *
1282 	 * Using mi_offset_paramc() is thus the *only* way to access
1283 	 * the dl_xxx_hw address.
1284 	 *
1285 	 * The squery hw address should *not* be accessed.
1286 	 *
1287 	 * See ar_entry_squery() in arp.c for an example of how all this works.
1288 	 */
1289 
1290 	mp->b_cont = mp_tail;
1291 	return (mp);
1292 }
1293 
1294 /*
1295  * Create a dlpi message with room for phys+sap. When we come back in
1296  * ip_wput_ctl() we will strip the sap for those primitives which
1297  * only need a physical address.
1298  */
1299 static mblk_t *
1300 ill_create_dl(ill_t *ill, uint32_t dl_primitive, uint32_t length,
1301     uint32_t *addr_lenp, uint32_t *addr_offp)
1302 {
1303 	mblk_t	*mp;
1304 	uint32_t	hw_addr_length;
1305 	char		*cp;
1306 	uint32_t	offset;
1307 	uint32_t 	size;
1308 
1309 	*addr_lenp = *addr_offp = 0;
1310 
1311 	hw_addr_length = ill->ill_phys_addr_length;
1312 	if (!hw_addr_length) {
1313 		ip0dbg(("ip_create_dl: hw addr length = 0\n"));
1314 		return (NULL);
1315 	}
1316 
1317 	size = length;
1318 	switch (dl_primitive) {
1319 	case DL_ENABMULTI_REQ:
1320 	case DL_DISABMULTI_REQ:
1321 		size += hw_addr_length;
1322 		break;
1323 	case DL_PROMISCON_REQ:
1324 	case DL_PROMISCOFF_REQ:
1325 		break;
1326 	default:
1327 		return (NULL);
1328 	}
1329 	mp = allocb(size, BPRI_HI);
1330 	if (!mp)
1331 		return (NULL);
1332 	mp->b_wptr += size;
1333 	mp->b_datap->db_type = M_PROTO;
1334 
1335 	cp = (char *)mp->b_rptr;
1336 	offset = length;
1337 
1338 	switch (dl_primitive) {
1339 	case DL_ENABMULTI_REQ: {
1340 		dl_enabmulti_req_t *dl = (dl_enabmulti_req_t *)cp;
1341 
1342 		dl->dl_primitive = dl_primitive;
1343 		dl->dl_addr_offset = offset;
1344 		*addr_lenp = dl->dl_addr_length = hw_addr_length;
1345 		*addr_offp = offset;
1346 		break;
1347 	}
1348 	case DL_DISABMULTI_REQ: {
1349 		dl_disabmulti_req_t *dl = (dl_disabmulti_req_t *)cp;
1350 
1351 		dl->dl_primitive = dl_primitive;
1352 		dl->dl_addr_offset = offset;
1353 		*addr_lenp = dl->dl_addr_length = hw_addr_length;
1354 		*addr_offp = offset;
1355 		break;
1356 	}
1357 	case DL_PROMISCON_REQ:
1358 	case DL_PROMISCOFF_REQ: {
1359 		dl_promiscon_req_t *dl = (dl_promiscon_req_t *)cp;
1360 
1361 		dl->dl_primitive = dl_primitive;
1362 		dl->dl_level = DL_PROMISC_MULTI;
1363 		break;
1364 	}
1365 	}
1366 	ip1dbg(("ill_create_dl: addr_len %d, addr_off %d\n",
1367 		*addr_lenp, *addr_offp));
1368 	return (mp);
1369 }
1370 
1371 void
1372 ip_wput_ctl(queue_t *q, mblk_t *mp_orig)
1373 {
1374 	ill_t	*ill = (ill_t *)q->q_ptr;
1375 	mblk_t	*mp = mp_orig;
1376 	area_t	*area;
1377 
1378 	/* Check that we have a AR_ENTRY_SQUERY with a tacked on mblk */
1379 	if ((mp->b_wptr - mp->b_rptr) < sizeof (area_t) ||
1380 	    mp->b_cont == NULL) {
1381 		putnext(q, mp);
1382 		return;
1383 	}
1384 	area = (area_t *)mp->b_rptr;
1385 	if (area->area_cmd != AR_ENTRY_SQUERY) {
1386 		putnext(q, mp);
1387 		return;
1388 	}
1389 	mp = mp->b_cont;
1390 	/*
1391 	 * Update dl_addr_length and dl_addr_offset for primitives that
1392 	 * have physical addresses as opposed to full saps
1393 	 */
1394 	switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) {
1395 	case DL_ENABMULTI_REQ:
1396 		/* Track the state if this is the first enabmulti */
1397 		if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN)
1398 			ill->ill_dlpi_multicast_state = IDS_INPROGRESS;
1399 		ip1dbg(("ip_wput_ctl: ENABMULTI\n"));
1400 		break;
1401 	case DL_DISABMULTI_REQ:
1402 		ip1dbg(("ip_wput_ctl: DISABMULTI\n"));
1403 		break;
1404 	default:
1405 		ip1dbg(("ip_wput_ctl: default\n"));
1406 		break;
1407 	}
1408 	freeb(mp_orig);
1409 	putnext(q, mp);
1410 }
1411 
1412 /*
1413  * Rejoin any groups which have been explicitly joined by the application (we
1414  * left all explicitly joined groups as part of ill_leave_multicast() prior to
1415  * bringing the interface down).  Note that because groups can be joined and
1416  * left while an interface is down, this may not be the same set of groups
1417  * that we left in ill_leave_multicast().
1418  */
1419 void
1420 ill_recover_multicast(ill_t *ill)
1421 {
1422 	ilm_t	*ilm;
1423 	char    addrbuf[INET6_ADDRSTRLEN];
1424 
1425 	ASSERT(IAM_WRITER_ILL(ill));
1426 
1427 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1428 		/*
1429 		 * Check how many ipif's that have members in this group -
1430 		 * if more then one we make sure that this entry is first
1431 		 * in the list.
1432 		 */
1433 		if (ilm_numentries_v6(ill, &ilm->ilm_v6addr) > 1 &&
1434 		    ilm_lookup_ill_v6(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm)
1435 			continue;
1436 		ip1dbg(("ill_recover_multicast: %s\n",
1437 		    inet_ntop(AF_INET6, &ilm->ilm_v6addr, addrbuf,
1438 		    sizeof (addrbuf))));
1439 		if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
1440 			if (ill->ill_group == NULL) {
1441 				(void) ip_join_allmulti(ill->ill_ipif);
1442 			} else {
1443 				/*
1444 				 * We don't want to join on this ill,
1445 				 * if somebody else in the group has
1446 				 * already been nominated.
1447 				 */
1448 				(void) ill_nominate_mcast_rcv(ill->ill_group);
1449 			}
1450 		} else {
1451 			(void) ip_ll_addmulti_v6(ill->ill_ipif,
1452 			    &ilm->ilm_v6addr);
1453 		}
1454 	}
1455 }
1456 
1457 /*
1458  * The opposite of ill_recover_multicast() -- leaves all multicast groups
1459  * that were explicitly joined.  Note that both these functions could be
1460  * disposed of if we enhanced ARP to allow us to handle DL_DISABMULTI_REQ
1461  * and DL_ENABMULTI_REQ messages when an interface is down.
1462  */
1463 void
1464 ill_leave_multicast(ill_t *ill)
1465 {
1466 	ilm_t	*ilm;
1467 	char    addrbuf[INET6_ADDRSTRLEN];
1468 
1469 	ASSERT(IAM_WRITER_ILL(ill));
1470 
1471 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1472 		/*
1473 		 * Check how many ipif's that have members in this group -
1474 		 * if more then one we make sure that this entry is first
1475 		 * in the list.
1476 		 */
1477 		if (ilm_numentries_v6(ill, &ilm->ilm_v6addr) > 1 &&
1478 		    ilm_lookup_ill_v6(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm)
1479 			continue;
1480 		ip1dbg(("ill_leave_multicast: %s\n",
1481 		    inet_ntop(AF_INET6, &ilm->ilm_v6addr, addrbuf,
1482 		    sizeof (addrbuf))));
1483 		if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
1484 			(void) ip_leave_allmulti(ill->ill_ipif);
1485 			/*
1486 			 * If we were part of an IPMP group, then
1487 			 * ill_handoff_responsibility() has already
1488 			 * nominated a new member (so we don't).
1489 			 */
1490 			ASSERT(ill->ill_group == NULL);
1491 		} else {
1492 			(void) ip_ll_send_disabmulti_req(ill, &ilm->ilm_v6addr);
1493 		}
1494 	}
1495 }
1496 
1497 /*
1498  * Find an ilm for matching the ill and which has the source in its
1499  * INCLUDE list or does not have it in its EXCLUDE list
1500  */
1501 ilm_t *
1502 ilm_lookup_ill_withsrc(ill_t *ill, ipaddr_t group, ipaddr_t src)
1503 {
1504 	in6_addr_t	v6group, v6src;
1505 
1506 	/*
1507 	 * INADDR_ANY is represented as the IPv6 unspecified addr.
1508 	 */
1509 	if (group == INADDR_ANY)
1510 		v6group = ipv6_all_zeros;
1511 	else
1512 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1513 	IN6_IPADDR_TO_V4MAPPED(src, &v6src);
1514 
1515 	return (ilm_lookup_ill_withsrc_v6(ill, &v6group, &v6src));
1516 }
1517 
1518 ilm_t *
1519 ilm_lookup_ill_withsrc_v6(ill_t *ill, const in6_addr_t *v6group,
1520     const in6_addr_t *v6src)
1521 {
1522 	ilm_t	*ilm;
1523 	boolean_t isinlist;
1524 	int	i, numsrc;
1525 
1526 	/*
1527 	 * If the source is in any ilm's INCLUDE list, or if
1528 	 * it is not in any ilm's EXCLUDE list, we have a hit.
1529 	 */
1530 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1531 		if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group)) {
1532 
1533 			isinlist = B_FALSE;
1534 			numsrc = (ilm->ilm_filter == NULL) ?
1535 			    0 : ilm->ilm_filter->sl_numsrc;
1536 			for (i = 0; i < numsrc; i++) {
1537 				if (IN6_ARE_ADDR_EQUAL(v6src,
1538 				    &ilm->ilm_filter->sl_addr[i])) {
1539 					isinlist = B_TRUE;
1540 					break;
1541 				}
1542 			}
1543 			if ((isinlist && ilm->ilm_fmode == MODE_IS_INCLUDE) ||
1544 			    (!isinlist && ilm->ilm_fmode == MODE_IS_EXCLUDE))
1545 				return (ilm);
1546 			else
1547 				return (NULL);
1548 		}
1549 	}
1550 	return (NULL);
1551 }
1552 
1553 
1554 /* Find an ilm for matching the ill */
1555 ilm_t *
1556 ilm_lookup_ill(ill_t *ill, ipaddr_t group, zoneid_t zoneid)
1557 {
1558 	in6_addr_t	v6group;
1559 
1560 	ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) ||
1561 	    IAM_WRITER_ILL(ill));
1562 	/*
1563 	 * INADDR_ANY is represented as the IPv6 unspecifed addr.
1564 	 */
1565 	if (group == INADDR_ANY)
1566 		v6group = ipv6_all_zeros;
1567 	else
1568 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1569 
1570 	return (ilm_lookup_ill_v6(ill, &v6group, zoneid));
1571 }
1572 
1573 /*
1574  * Find an ilm for matching the ill. All the ilm lookup functions
1575  * ignore ILM_DELETED ilms. These have been logically deleted, and
1576  * igmp and linklayer disable multicast have been done. Only mi_free
1577  * yet to be done. Still there in the list due to ilm_walkers. The
1578  * last walker will release it.
1579  */
1580 ilm_t *
1581 ilm_lookup_ill_v6(ill_t *ill, const in6_addr_t *v6group, zoneid_t zoneid)
1582 {
1583 	ilm_t	*ilm;
1584 
1585 	ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) ||
1586 	    IAM_WRITER_ILL(ill));
1587 
1588 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1589 		if (ilm->ilm_flags & ILM_DELETED)
1590 			continue;
1591 		if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) &&
1592 		    (zoneid == ALL_ZONES || zoneid == ilm->ilm_zoneid))
1593 			return (ilm);
1594 	}
1595 	return (NULL);
1596 }
1597 
1598 ilm_t *
1599 ilm_lookup_ill_index_v6(ill_t *ill, const in6_addr_t *v6group, int index,
1600     zoneid_t zoneid)
1601 {
1602 	ilm_t *ilm;
1603 
1604 	ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) ||
1605 	    IAM_WRITER_ILL(ill));
1606 
1607 	for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
1608 		if (ilm->ilm_flags & ILM_DELETED)
1609 			continue;
1610 		if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) &&
1611 		    (zoneid == ALL_ZONES || zoneid == ilm->ilm_zoneid) &&
1612 		    ilm->ilm_orig_ifindex == index) {
1613 			return (ilm);
1614 		}
1615 	}
1616 	return (NULL);
1617 }
1618 
1619 ilm_t *
1620 ilm_lookup_ill_index_v4(ill_t *ill, ipaddr_t group, int index, zoneid_t zoneid)
1621 {
1622 	in6_addr_t	v6group;
1623 
1624 	ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) ||
1625 	    IAM_WRITER_ILL(ill));
1626 	/*
1627 	 * INADDR_ANY is represented as the IPv6 unspecifed addr.
1628 	 */
1629 	if (group == INADDR_ANY)
1630 		v6group = ipv6_all_zeros;
1631 	else
1632 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1633 
1634 	return (ilm_lookup_ill_index_v6(ill, &v6group, index, zoneid));
1635 }
1636 
1637 /*
1638  * Found an ilm for the ipif. Only needed for IPv4 which does
1639  * ipif specific socket options.
1640  */
1641 ilm_t *
1642 ilm_lookup_ipif(ipif_t *ipif, ipaddr_t group)
1643 {
1644 	ill_t	*ill = ipif->ipif_ill;
1645 	ilm_t	*ilm;
1646 	in6_addr_t	v6group;
1647 
1648 	ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) ||
1649 	    IAM_WRITER_ILL(ill));
1650 
1651 	/*
1652 	 * INADDR_ANY is represented as the IPv6 unspecifed addr.
1653 	 */
1654 	if (group == INADDR_ANY)
1655 		v6group = ipv6_all_zeros;
1656 	else
1657 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1658 
1659 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1660 		if (ilm->ilm_flags & ILM_DELETED)
1661 			continue;
1662 		if (ilm->ilm_ipif == ipif &&
1663 		    IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, &v6group))
1664 			return (ilm);
1665 	}
1666 	return (NULL);
1667 }
1668 
1669 /*
1670  * How many members on this ill?
1671  */
1672 int
1673 ilm_numentries_v6(ill_t *ill, const in6_addr_t *v6group)
1674 {
1675 	ilm_t	*ilm;
1676 	int i = 0;
1677 
1678 	ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) ||
1679 	    IAM_WRITER_ILL(ill));
1680 
1681 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1682 		if (ilm->ilm_flags & ILM_DELETED)
1683 			continue;
1684 		if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group)) {
1685 			i++;
1686 		}
1687 	}
1688 	return (i);
1689 }
1690 
1691 /* Caller guarantees that the group is not already on the list */
1692 static ilm_t *
1693 ilm_add_v6(ipif_t *ipif, const in6_addr_t *v6group, ilg_stat_t ilgstat,
1694     mcast_record_t ilg_fmode, slist_t *ilg_flist, int orig_ifindex,
1695     zoneid_t zoneid)
1696 {
1697 	ill_t	*ill = ipif->ipif_ill;
1698 	ilm_t	*ilm;
1699 	ilm_t	*ilm_cur;
1700 	ilm_t	**ilm_ptpn;
1701 
1702 	ASSERT(IAM_WRITER_IPIF(ipif));
1703 
1704 	ilm = GETSTRUCT(ilm_t, 1);
1705 	if (ilm == NULL)
1706 		return (NULL);
1707 	if (ilgstat != ILGSTAT_NONE && !SLIST_IS_EMPTY(ilg_flist)) {
1708 		ilm->ilm_filter = l_alloc();
1709 		if (ilm->ilm_filter == NULL) {
1710 			mi_free(ilm);
1711 			return (NULL);
1712 		}
1713 	}
1714 	ilm->ilm_v6addr = *v6group;
1715 	ilm->ilm_refcnt = 1;
1716 	ilm->ilm_zoneid = zoneid;
1717 	ilm->ilm_timer = INFINITY;
1718 	ilm->ilm_rtx.rtx_timer = INFINITY;
1719 
1720 	/*
1721 	 * IPv4 Multicast groups are joined using ipif.
1722 	 * IPv6 Multicast groups are joined using ill.
1723 	 */
1724 	if (ill->ill_isv6) {
1725 		ilm->ilm_ill = ill;
1726 		ilm->ilm_ipif = NULL;
1727 	} else {
1728 		ASSERT(ilm->ilm_zoneid == ipif->ipif_zoneid);
1729 		ilm->ilm_ipif = ipif;
1730 		ilm->ilm_ill = NULL;
1731 	}
1732 	ASSERT(ill->ill_ipst);
1733 	ilm->ilm_ipst = ill->ill_ipst;	/* No netstack_hold */
1734 
1735 	/*
1736 	 * After this if ilm moves to a new ill, we don't change
1737 	 * the ilm_orig_ifindex. Thus, if ill_index != ilm_orig_ifindex,
1738 	 * it has been moved. Indexes don't match even when the application
1739 	 * wants to join on a FAILED/INACTIVE interface because we choose
1740 	 * a new interface to join in. This is considered as an implicit
1741 	 * move.
1742 	 */
1743 	ilm->ilm_orig_ifindex = orig_ifindex;
1744 
1745 	ASSERT(!(ipif->ipif_state_flags & IPIF_CONDEMNED));
1746 	ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED));
1747 
1748 	/*
1749 	 * Grab lock to give consistent view to readers
1750 	 */
1751 	mutex_enter(&ill->ill_lock);
1752 	/*
1753 	 * All ilms in the same zone are contiguous in the ill_ilm list.
1754 	 * The loops in ip_proto_input() and ip_wput_local() use this to avoid
1755 	 * sending duplicates up when two applications in the same zone join the
1756 	 * same group on different logical interfaces.
1757 	 */
1758 	ilm_cur = ill->ill_ilm;
1759 	ilm_ptpn = &ill->ill_ilm;
1760 	while (ilm_cur != NULL && ilm_cur->ilm_zoneid != ilm->ilm_zoneid) {
1761 		ilm_ptpn = &ilm_cur->ilm_next;
1762 		ilm_cur = ilm_cur->ilm_next;
1763 	}
1764 	ilm->ilm_next = ilm_cur;
1765 	*ilm_ptpn = ilm;
1766 
1767 	/*
1768 	 * If we have an associated ilg, use its filter state; if not,
1769 	 * default to (EXCLUDE, NULL) and set no_ilg_cnt to track this.
1770 	 */
1771 	if (ilgstat != ILGSTAT_NONE) {
1772 		if (!SLIST_IS_EMPTY(ilg_flist))
1773 			l_copy(ilg_flist, ilm->ilm_filter);
1774 		ilm->ilm_fmode = ilg_fmode;
1775 	} else {
1776 		ilm->ilm_no_ilg_cnt = 1;
1777 		ilm->ilm_fmode = MODE_IS_EXCLUDE;
1778 	}
1779 
1780 	mutex_exit(&ill->ill_lock);
1781 	return (ilm);
1782 }
1783 
1784 void
1785 ilm_walker_cleanup(ill_t *ill)
1786 {
1787 	ilm_t	**ilmp;
1788 	ilm_t	*ilm;
1789 
1790 	ASSERT(MUTEX_HELD(&ill->ill_lock));
1791 	ASSERT(ill->ill_ilm_walker_cnt == 0);
1792 
1793 	ilmp = &ill->ill_ilm;
1794 	while (*ilmp != NULL) {
1795 		if ((*ilmp)->ilm_flags & ILM_DELETED) {
1796 			ilm = *ilmp;
1797 			*ilmp = ilm->ilm_next;
1798 			FREE_SLIST(ilm->ilm_filter);
1799 			FREE_SLIST(ilm->ilm_pendsrcs);
1800 			FREE_SLIST(ilm->ilm_rtx.rtx_allow);
1801 			FREE_SLIST(ilm->ilm_rtx.rtx_block);
1802 			ilm->ilm_ipst = NULL;
1803 			mi_free((char *)ilm);
1804 		} else {
1805 			ilmp = &(*ilmp)->ilm_next;
1806 		}
1807 	}
1808 	ill->ill_ilm_cleanup_reqd = 0;
1809 }
1810 
1811 /*
1812  * Unlink ilm and free it.
1813  */
1814 static void
1815 ilm_delete(ilm_t *ilm)
1816 {
1817 	ill_t	*ill;
1818 	ilm_t	**ilmp;
1819 
1820 	if (ilm->ilm_ipif != NULL) {
1821 		ASSERT(IAM_WRITER_IPIF(ilm->ilm_ipif));
1822 		ASSERT(ilm->ilm_ill == NULL);
1823 		ill = ilm->ilm_ipif->ipif_ill;
1824 		ASSERT(!ill->ill_isv6);
1825 	} else {
1826 		ASSERT(IAM_WRITER_ILL(ilm->ilm_ill));
1827 		ASSERT(ilm->ilm_ipif == NULL);
1828 		ill = ilm->ilm_ill;
1829 		ASSERT(ill->ill_isv6);
1830 	}
1831 	/*
1832 	 * Delete under lock protection so that readers don't stumble
1833 	 * on bad ilm_next
1834 	 */
1835 	mutex_enter(&ill->ill_lock);
1836 	if (ill->ill_ilm_walker_cnt != 0) {
1837 		ilm->ilm_flags |= ILM_DELETED;
1838 		ill->ill_ilm_cleanup_reqd = 1;
1839 		mutex_exit(&ill->ill_lock);
1840 		return;
1841 	}
1842 
1843 	for (ilmp = &ill->ill_ilm; *ilmp != ilm; ilmp = &(*ilmp)->ilm_next)
1844 				;
1845 	*ilmp = ilm->ilm_next;
1846 	mutex_exit(&ill->ill_lock);
1847 
1848 	FREE_SLIST(ilm->ilm_filter);
1849 	FREE_SLIST(ilm->ilm_pendsrcs);
1850 	FREE_SLIST(ilm->ilm_rtx.rtx_allow);
1851 	FREE_SLIST(ilm->ilm_rtx.rtx_block);
1852 	ilm->ilm_ipst = NULL;
1853 	mi_free((char *)ilm);
1854 }
1855 
1856 /* Free all ilms for this ipif */
1857 void
1858 ilm_free(ipif_t *ipif)
1859 {
1860 	ill_t	*ill = ipif->ipif_ill;
1861 	ilm_t	*ilm;
1862 	ilm_t	 *next_ilm;
1863 
1864 	ASSERT(IAM_WRITER_IPIF(ipif));
1865 
1866 	for (ilm = ill->ill_ilm; ilm; ilm = next_ilm) {
1867 		next_ilm = ilm->ilm_next;
1868 		if (ilm->ilm_ipif == ipif)
1869 			ilm_delete(ilm);
1870 	}
1871 }
1872 
1873 /*
1874  * Looks up the appropriate ipif given a v4 multicast group and interface
1875  * address.  On success, returns 0, with *ipifpp pointing to the found
1876  * struct.  On failure, returns an errno and *ipifpp is NULL.
1877  */
1878 int
1879 ip_opt_check(conn_t *connp, ipaddr_t group, ipaddr_t src, ipaddr_t ifaddr,
1880     uint_t *ifindexp, mblk_t *first_mp, ipsq_func_t func, ipif_t **ipifpp)
1881 {
1882 	ipif_t *ipif;
1883 	int err = 0;
1884 	zoneid_t zoneid;
1885 	ip_stack_t	*ipst =  connp->conn_netstack->netstack_ip;
1886 
1887 	if (!CLASSD(group) || CLASSD(src)) {
1888 		return (EINVAL);
1889 	}
1890 	*ipifpp = NULL;
1891 
1892 	zoneid = IPCL_ZONEID(connp);
1893 
1894 	ASSERT(!(ifaddr != INADDR_ANY && ifindexp != NULL && *ifindexp != 0));
1895 	if (ifaddr != INADDR_ANY) {
1896 		ipif = ipif_lookup_addr(ifaddr, NULL, zoneid,
1897 			CONNP_TO_WQ(connp), first_mp, func, &err, ipst);
1898 		if (err != 0 && err != EINPROGRESS)
1899 			err = EADDRNOTAVAIL;
1900 	} else if (ifindexp != NULL && *ifindexp != 0) {
1901 		ipif = ipif_lookup_on_ifindex(*ifindexp, B_FALSE, zoneid,
1902 		    CONNP_TO_WQ(connp), first_mp, func, &err, ipst);
1903 	} else {
1904 		ipif = ipif_lookup_group(group, zoneid, ipst);
1905 		if (ipif == NULL)
1906 			return (EADDRNOTAVAIL);
1907 	}
1908 	if (ipif == NULL)
1909 		return (err);
1910 
1911 	*ipifpp = ipif;
1912 	return (0);
1913 }
1914 
1915 /*
1916  * Looks up the appropriate ill (or ipif if v4mapped) given an interface
1917  * index and IPv6 multicast group.  On success, returns 0, with *illpp (or
1918  * *ipifpp if v4mapped) pointing to the found struct.  On failure, returns
1919  * an errno and *illpp and *ipifpp are undefined.
1920  */
1921 int
1922 ip_opt_check_v6(conn_t *connp, const in6_addr_t *v6group, ipaddr_t *v4group,
1923     const in6_addr_t *v6src, ipaddr_t *v4src, boolean_t *isv6, int ifindex,
1924     mblk_t *first_mp, ipsq_func_t func, ill_t **illpp, ipif_t **ipifpp)
1925 {
1926 	boolean_t src_unspec;
1927 	ill_t *ill = NULL;
1928 	ipif_t *ipif = NULL;
1929 	int err;
1930 	zoneid_t zoneid = connp->conn_zoneid;
1931 	queue_t *wq = CONNP_TO_WQ(connp);
1932 	ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
1933 
1934 	src_unspec = IN6_IS_ADDR_UNSPECIFIED(v6src);
1935 
1936 	if (IN6_IS_ADDR_V4MAPPED(v6group)) {
1937 		if (!IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec)
1938 			return (EINVAL);
1939 		IN6_V4MAPPED_TO_IPADDR(v6group, *v4group);
1940 		if (src_unspec) {
1941 			*v4src = INADDR_ANY;
1942 		} else {
1943 			IN6_V4MAPPED_TO_IPADDR(v6src, *v4src);
1944 		}
1945 		if (!CLASSD(*v4group) || CLASSD(*v4src))
1946 			return (EINVAL);
1947 		*ipifpp = NULL;
1948 		*isv6 = B_FALSE;
1949 	} else {
1950 		if (IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec)
1951 			return (EINVAL);
1952 		if (!IN6_IS_ADDR_MULTICAST(v6group) ||
1953 		    IN6_IS_ADDR_MULTICAST(v6src)) {
1954 			return (EINVAL);
1955 		}
1956 		*illpp = NULL;
1957 		*isv6 = B_TRUE;
1958 	}
1959 
1960 	if (ifindex == 0) {
1961 		if (*isv6)
1962 			ill = ill_lookup_group_v6(v6group, zoneid, ipst);
1963 		else
1964 			ipif = ipif_lookup_group(*v4group, zoneid, ipst);
1965 		if (ill == NULL && ipif == NULL)
1966 			return (EADDRNOTAVAIL);
1967 	} else {
1968 		if (*isv6) {
1969 			ill = ill_lookup_on_ifindex(ifindex, B_TRUE,
1970 			    wq, first_mp, func, &err, ipst);
1971 			if (ill != NULL &&
1972 			    !ipif_lookup_zoneid(ill, zoneid, 0, NULL)) {
1973 				ill_refrele(ill);
1974 				ill = NULL;
1975 				err = EADDRNOTAVAIL;
1976 			}
1977 		} else {
1978 			ipif = ipif_lookup_on_ifindex(ifindex, B_FALSE,
1979 			    zoneid, wq, first_mp, func, &err, ipst);
1980 		}
1981 		if (ill == NULL && ipif == NULL)
1982 			return (err);
1983 	}
1984 
1985 	*ipifpp = ipif;
1986 	*illpp = ill;
1987 	return (0);
1988 }
1989 
1990 static int
1991 ip_get_srcfilter(conn_t *connp, struct group_filter *gf,
1992     struct ip_msfilter *imsf, ipaddr_t grp, ipif_t *ipif, boolean_t isv4mapped)
1993 {
1994 	ilg_t *ilg;
1995 	int i, numsrc, fmode, outsrcs;
1996 	struct sockaddr_in *sin;
1997 	struct sockaddr_in6 *sin6;
1998 	struct in_addr *addrp;
1999 	slist_t *fp;
2000 	boolean_t is_v4only_api;
2001 
2002 	mutex_enter(&connp->conn_lock);
2003 
2004 	ilg = ilg_lookup_ipif(connp, grp, ipif);
2005 	if (ilg == NULL) {
2006 		mutex_exit(&connp->conn_lock);
2007 		return (EADDRNOTAVAIL);
2008 	}
2009 
2010 	if (gf == NULL) {
2011 		ASSERT(imsf != NULL);
2012 		ASSERT(!isv4mapped);
2013 		is_v4only_api = B_TRUE;
2014 		outsrcs = imsf->imsf_numsrc;
2015 	} else {
2016 		ASSERT(imsf == NULL);
2017 		is_v4only_api = B_FALSE;
2018 		outsrcs = gf->gf_numsrc;
2019 	}
2020 
2021 	/*
2022 	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
2023 	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
2024 	 * So we need to translate here.
2025 	 */
2026 	fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ?
2027 	    MCAST_INCLUDE : MCAST_EXCLUDE;
2028 	if ((fp = ilg->ilg_filter) == NULL) {
2029 		numsrc = 0;
2030 	} else {
2031 		for (i = 0; i < outsrcs; i++) {
2032 			if (i == fp->sl_numsrc)
2033 				break;
2034 			if (isv4mapped) {
2035 				sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i];
2036 				sin6->sin6_family = AF_INET6;
2037 				sin6->sin6_addr = fp->sl_addr[i];
2038 			} else {
2039 				if (is_v4only_api) {
2040 					addrp = &imsf->imsf_slist[i];
2041 				} else {
2042 					sin = (struct sockaddr_in *)
2043 					    &gf->gf_slist[i];
2044 					sin->sin_family = AF_INET;
2045 					addrp = &sin->sin_addr;
2046 				}
2047 				IN6_V4MAPPED_TO_INADDR(&fp->sl_addr[i], addrp);
2048 			}
2049 		}
2050 		numsrc = fp->sl_numsrc;
2051 	}
2052 
2053 	if (is_v4only_api) {
2054 		imsf->imsf_numsrc = numsrc;
2055 		imsf->imsf_fmode = fmode;
2056 	} else {
2057 		gf->gf_numsrc = numsrc;
2058 		gf->gf_fmode = fmode;
2059 	}
2060 
2061 	mutex_exit(&connp->conn_lock);
2062 
2063 	return (0);
2064 }
2065 
2066 static int
2067 ip_get_srcfilter_v6(conn_t *connp, struct group_filter *gf,
2068     const struct in6_addr *grp, ill_t *ill)
2069 {
2070 	ilg_t *ilg;
2071 	int i;
2072 	struct sockaddr_storage *sl;
2073 	struct sockaddr_in6 *sin6;
2074 	slist_t *fp;
2075 
2076 	mutex_enter(&connp->conn_lock);
2077 
2078 	ilg = ilg_lookup_ill_v6(connp, grp, ill);
2079 	if (ilg == NULL) {
2080 		mutex_exit(&connp->conn_lock);
2081 		return (EADDRNOTAVAIL);
2082 	}
2083 
2084 	/*
2085 	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
2086 	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
2087 	 * So we need to translate here.
2088 	 */
2089 	gf->gf_fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ?
2090 	    MCAST_INCLUDE : MCAST_EXCLUDE;
2091 	if ((fp = ilg->ilg_filter) == NULL) {
2092 		gf->gf_numsrc = 0;
2093 	} else {
2094 		for (i = 0, sl = gf->gf_slist; i < gf->gf_numsrc; i++, sl++) {
2095 			if (i == fp->sl_numsrc)
2096 				break;
2097 			sin6 = (struct sockaddr_in6 *)sl;
2098 			sin6->sin6_family = AF_INET6;
2099 			sin6->sin6_addr = fp->sl_addr[i];
2100 		}
2101 		gf->gf_numsrc = fp->sl_numsrc;
2102 	}
2103 
2104 	mutex_exit(&connp->conn_lock);
2105 
2106 	return (0);
2107 }
2108 
2109 static int
2110 ip_set_srcfilter(conn_t *connp, struct group_filter *gf,
2111     struct ip_msfilter *imsf, ipaddr_t grp, ipif_t *ipif, boolean_t isv4mapped)
2112 {
2113 	ilg_t *ilg;
2114 	int i, err, insrcs, infmode, new_fmode;
2115 	struct sockaddr_in *sin;
2116 	struct sockaddr_in6 *sin6;
2117 	struct in_addr *addrp;
2118 	slist_t *orig_filter = NULL;
2119 	slist_t *new_filter = NULL;
2120 	mcast_record_t orig_fmode;
2121 	boolean_t leave_grp, is_v4only_api;
2122 	ilg_stat_t ilgstat;
2123 
2124 	if (gf == NULL) {
2125 		ASSERT(imsf != NULL);
2126 		ASSERT(!isv4mapped);
2127 		is_v4only_api = B_TRUE;
2128 		insrcs = imsf->imsf_numsrc;
2129 		infmode = imsf->imsf_fmode;
2130 	} else {
2131 		ASSERT(imsf == NULL);
2132 		is_v4only_api = B_FALSE;
2133 		insrcs = gf->gf_numsrc;
2134 		infmode = gf->gf_fmode;
2135 	}
2136 
2137 	/* Make sure we can handle the source list */
2138 	if (insrcs > MAX_FILTER_SIZE)
2139 		return (ENOBUFS);
2140 
2141 	/*
2142 	 * setting the filter to (INCLUDE, NULL) is treated
2143 	 * as a request to leave the group.
2144 	 */
2145 	leave_grp = (infmode == MCAST_INCLUDE && insrcs == 0);
2146 
2147 	ASSERT(IAM_WRITER_IPIF(ipif));
2148 
2149 	mutex_enter(&connp->conn_lock);
2150 
2151 	ilg = ilg_lookup_ipif(connp, grp, ipif);
2152 	if (ilg == NULL) {
2153 		/*
2154 		 * if the request was actually to leave, and we
2155 		 * didn't find an ilg, there's nothing to do.
2156 		 */
2157 		if (!leave_grp)
2158 			ilg = conn_ilg_alloc(connp);
2159 		if (leave_grp || ilg == NULL) {
2160 			mutex_exit(&connp->conn_lock);
2161 			return (leave_grp ? 0 : ENOMEM);
2162 		}
2163 		ilgstat = ILGSTAT_NEW;
2164 		IN6_IPADDR_TO_V4MAPPED(grp, &ilg->ilg_v6group);
2165 		ilg->ilg_ipif = ipif;
2166 		ilg->ilg_ill = NULL;
2167 		ilg->ilg_orig_ifindex = 0;
2168 	} else if (leave_grp) {
2169 		ilg_delete(connp, ilg, NULL);
2170 		mutex_exit(&connp->conn_lock);
2171 		(void) ip_delmulti(grp, ipif, B_FALSE, B_TRUE);
2172 		return (0);
2173 	} else {
2174 		ilgstat = ILGSTAT_CHANGE;
2175 		/* Preserve existing state in case ip_addmulti() fails */
2176 		orig_fmode = ilg->ilg_fmode;
2177 		if (ilg->ilg_filter == NULL) {
2178 			orig_filter = NULL;
2179 		} else {
2180 			orig_filter = l_alloc_copy(ilg->ilg_filter);
2181 			if (orig_filter == NULL) {
2182 				mutex_exit(&connp->conn_lock);
2183 				return (ENOMEM);
2184 			}
2185 		}
2186 	}
2187 
2188 	/*
2189 	 * Alloc buffer to copy new state into (see below) before
2190 	 * we make any changes, so we can bail if it fails.
2191 	 */
2192 	if ((new_filter = l_alloc()) == NULL) {
2193 		mutex_exit(&connp->conn_lock);
2194 		err = ENOMEM;
2195 		goto free_and_exit;
2196 	}
2197 
2198 	if (insrcs == 0) {
2199 		CLEAR_SLIST(ilg->ilg_filter);
2200 	} else {
2201 		slist_t *fp;
2202 		if (ilg->ilg_filter == NULL) {
2203 			fp = l_alloc();
2204 			if (fp == NULL) {
2205 				if (ilgstat == ILGSTAT_NEW)
2206 					ilg_delete(connp, ilg, NULL);
2207 				mutex_exit(&connp->conn_lock);
2208 				err = ENOMEM;
2209 				goto free_and_exit;
2210 			}
2211 		} else {
2212 			fp = ilg->ilg_filter;
2213 		}
2214 		for (i = 0; i < insrcs; i++) {
2215 			if (isv4mapped) {
2216 				sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i];
2217 				fp->sl_addr[i] = sin6->sin6_addr;
2218 			} else {
2219 				if (is_v4only_api) {
2220 					addrp = &imsf->imsf_slist[i];
2221 				} else {
2222 					sin = (struct sockaddr_in *)
2223 					    &gf->gf_slist[i];
2224 					addrp = &sin->sin_addr;
2225 				}
2226 				IN6_INADDR_TO_V4MAPPED(addrp, &fp->sl_addr[i]);
2227 			}
2228 		}
2229 		fp->sl_numsrc = insrcs;
2230 		ilg->ilg_filter = fp;
2231 	}
2232 	/*
2233 	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
2234 	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
2235 	 * So we need to translate here.
2236 	 */
2237 	ilg->ilg_fmode = (infmode == MCAST_INCLUDE) ?
2238 		    MODE_IS_INCLUDE : MODE_IS_EXCLUDE;
2239 
2240 	/*
2241 	 * Save copy of ilg's filter state to pass to other functions,
2242 	 * so we can release conn_lock now.
2243 	 */
2244 	new_fmode = ilg->ilg_fmode;
2245 	l_copy(ilg->ilg_filter, new_filter);
2246 
2247 	mutex_exit(&connp->conn_lock);
2248 
2249 	err = ip_addmulti(grp, ipif, ilgstat, new_fmode, new_filter);
2250 	if (err != 0) {
2251 		/*
2252 		 * Restore the original filter state, or delete the
2253 		 * newly-created ilg.  We need to look up the ilg
2254 		 * again, though, since we've not been holding the
2255 		 * conn_lock.
2256 		 */
2257 		mutex_enter(&connp->conn_lock);
2258 		ilg = ilg_lookup_ipif(connp, grp, ipif);
2259 		ASSERT(ilg != NULL);
2260 		if (ilgstat == ILGSTAT_NEW) {
2261 			ilg_delete(connp, ilg, NULL);
2262 		} else {
2263 			ilg->ilg_fmode = orig_fmode;
2264 			if (SLIST_IS_EMPTY(orig_filter)) {
2265 				CLEAR_SLIST(ilg->ilg_filter);
2266 			} else {
2267 				/*
2268 				 * We didn't free the filter, even if we
2269 				 * were trying to make the source list empty;
2270 				 * so if orig_filter isn't empty, the ilg
2271 				 * must still have a filter alloc'd.
2272 				 */
2273 				l_copy(orig_filter, ilg->ilg_filter);
2274 			}
2275 		}
2276 		mutex_exit(&connp->conn_lock);
2277 	}
2278 
2279 free_and_exit:
2280 	l_free(orig_filter);
2281 	l_free(new_filter);
2282 
2283 	return (err);
2284 }
2285 
2286 static int
2287 ip_set_srcfilter_v6(conn_t *connp, struct group_filter *gf,
2288     const struct in6_addr *grp, ill_t *ill)
2289 {
2290 	ilg_t *ilg;
2291 	int i, orig_ifindex, orig_fmode, new_fmode, err;
2292 	slist_t *orig_filter = NULL;
2293 	slist_t *new_filter = NULL;
2294 	struct sockaddr_storage *sl;
2295 	struct sockaddr_in6 *sin6;
2296 	boolean_t leave_grp;
2297 	ilg_stat_t ilgstat;
2298 
2299 	/* Make sure we can handle the source list */
2300 	if (gf->gf_numsrc > MAX_FILTER_SIZE)
2301 		return (ENOBUFS);
2302 
2303 	/*
2304 	 * setting the filter to (INCLUDE, NULL) is treated
2305 	 * as a request to leave the group.
2306 	 */
2307 	leave_grp = (gf->gf_fmode == MCAST_INCLUDE && gf->gf_numsrc == 0);
2308 
2309 	ASSERT(IAM_WRITER_ILL(ill));
2310 
2311 	/*
2312 	 * Use the ifindex to do the lookup.  We can't use the ill
2313 	 * directly because ilg_ill could point to a different ill
2314 	 * if things have moved.
2315 	 */
2316 	orig_ifindex = ill->ill_phyint->phyint_ifindex;
2317 
2318 	mutex_enter(&connp->conn_lock);
2319 	ilg = ilg_lookup_ill_index_v6(connp, grp, orig_ifindex);
2320 	if (ilg == NULL) {
2321 		/*
2322 		 * if the request was actually to leave, and we
2323 		 * didn't find an ilg, there's nothing to do.
2324 		 */
2325 		if (!leave_grp)
2326 			ilg = conn_ilg_alloc(connp);
2327 		if (leave_grp || ilg == NULL) {
2328 			mutex_exit(&connp->conn_lock);
2329 			return (leave_grp ? 0 : ENOMEM);
2330 		}
2331 		ilgstat = ILGSTAT_NEW;
2332 		ilg->ilg_v6group = *grp;
2333 		ilg->ilg_ipif = NULL;
2334 		/*
2335 		 * Choose our target ill to join on. This might be
2336 		 * different from the ill we've been given if it's
2337 		 * currently down and part of a group.
2338 		 *
2339 		 * new ill is not refheld; we are writer.
2340 		 */
2341 		ill = ip_choose_multi_ill(ill, grp);
2342 		ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED));
2343 		ilg->ilg_ill = ill;
2344 		/*
2345 		 * Remember the index that we joined on, so that we can
2346 		 * successfully delete them later on and also search for
2347 		 * duplicates if the application wants to join again.
2348 		 */
2349 		ilg->ilg_orig_ifindex = orig_ifindex;
2350 	} else if (leave_grp) {
2351 		/*
2352 		 * Use the ilg's current ill for the deletion,
2353 		 * we might have failed over.
2354 		 */
2355 		ill = ilg->ilg_ill;
2356 		ilg_delete(connp, ilg, NULL);
2357 		mutex_exit(&connp->conn_lock);
2358 		(void) ip_delmulti_v6(grp, ill, orig_ifindex,
2359 		    connp->conn_zoneid, B_FALSE, B_TRUE);
2360 		return (0);
2361 	} else {
2362 		ilgstat = ILGSTAT_CHANGE;
2363 		/*
2364 		 * The current ill might be different from the one we were
2365 		 * asked to join on (if failover has occurred); we should
2366 		 * join on the ill stored in the ilg.  The original ill
2367 		 * is noted in ilg_orig_ifindex, which matched our request.
2368 		 */
2369 		ill = ilg->ilg_ill;
2370 		/* preserve existing state in case ip_addmulti() fails */
2371 		orig_fmode = ilg->ilg_fmode;
2372 		if (ilg->ilg_filter == NULL) {
2373 			orig_filter = NULL;
2374 		} else {
2375 			orig_filter = l_alloc_copy(ilg->ilg_filter);
2376 			if (orig_filter == NULL) {
2377 				mutex_exit(&connp->conn_lock);
2378 				return (ENOMEM);
2379 			}
2380 		}
2381 	}
2382 
2383 	/*
2384 	 * Alloc buffer to copy new state into (see below) before
2385 	 * we make any changes, so we can bail if it fails.
2386 	 */
2387 	if ((new_filter = l_alloc()) == NULL) {
2388 		mutex_exit(&connp->conn_lock);
2389 		err = ENOMEM;
2390 		goto free_and_exit;
2391 	}
2392 
2393 	if (gf->gf_numsrc == 0) {
2394 		CLEAR_SLIST(ilg->ilg_filter);
2395 	} else {
2396 		slist_t *fp;
2397 		if (ilg->ilg_filter == NULL) {
2398 			fp = l_alloc();
2399 			if (fp == NULL) {
2400 				if (ilgstat == ILGSTAT_NEW)
2401 					ilg_delete(connp, ilg, NULL);
2402 				mutex_exit(&connp->conn_lock);
2403 				err = ENOMEM;
2404 				goto free_and_exit;
2405 			}
2406 		} else {
2407 			fp = ilg->ilg_filter;
2408 		}
2409 		for (i = 0, sl = gf->gf_slist; i < gf->gf_numsrc; i++, sl++) {
2410 			sin6 = (struct sockaddr_in6 *)sl;
2411 			fp->sl_addr[i] = sin6->sin6_addr;
2412 		}
2413 		fp->sl_numsrc = gf->gf_numsrc;
2414 		ilg->ilg_filter = fp;
2415 	}
2416 	/*
2417 	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
2418 	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
2419 	 * So we need to translate here.
2420 	 */
2421 	ilg->ilg_fmode = (gf->gf_fmode == MCAST_INCLUDE) ?
2422 	    MODE_IS_INCLUDE : MODE_IS_EXCLUDE;
2423 
2424 	/*
2425 	 * Save copy of ilg's filter state to pass to other functions,
2426 	 * so we can release conn_lock now.
2427 	 */
2428 	new_fmode = ilg->ilg_fmode;
2429 	l_copy(ilg->ilg_filter, new_filter);
2430 
2431 	mutex_exit(&connp->conn_lock);
2432 
2433 	err = ip_addmulti_v6(grp, ill, orig_ifindex, connp->conn_zoneid,
2434 	    ilgstat, new_fmode, new_filter);
2435 	if (err != 0) {
2436 		/*
2437 		 * Restore the original filter state, or delete the
2438 		 * newly-created ilg.  We need to look up the ilg
2439 		 * again, though, since we've not been holding the
2440 		 * conn_lock.
2441 		 */
2442 		mutex_enter(&connp->conn_lock);
2443 		ilg = ilg_lookup_ill_index_v6(connp, grp, orig_ifindex);
2444 		ASSERT(ilg != NULL);
2445 		if (ilgstat == ILGSTAT_NEW) {
2446 			ilg_delete(connp, ilg, NULL);
2447 		} else {
2448 			ilg->ilg_fmode = orig_fmode;
2449 			if (SLIST_IS_EMPTY(orig_filter)) {
2450 				CLEAR_SLIST(ilg->ilg_filter);
2451 			} else {
2452 				/*
2453 				 * We didn't free the filter, even if we
2454 				 * were trying to make the source list empty;
2455 				 * so if orig_filter isn't empty, the ilg
2456 				 * must still have a filter alloc'd.
2457 				 */
2458 				l_copy(orig_filter, ilg->ilg_filter);
2459 			}
2460 		}
2461 		mutex_exit(&connp->conn_lock);
2462 	}
2463 
2464 free_and_exit:
2465 	l_free(orig_filter);
2466 	l_free(new_filter);
2467 
2468 	return (err);
2469 }
2470 
2471 /*
2472  * Process the SIOC[GS]MSFILTER and SIOC[GS]IPMSFILTER ioctls.
2473  */
2474 /* ARGSUSED */
2475 int
2476 ip_sioctl_msfilter(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp,
2477     ip_ioctl_cmd_t *ipip, void *ifreq)
2478 {
2479 	struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
2480 	/* existence verified in ip_wput_nondata() */
2481 	mblk_t *data_mp = mp->b_cont->b_cont;
2482 	int datalen, err, cmd, minsize;
2483 	int expsize = 0;
2484 	conn_t *connp;
2485 	boolean_t isv6, is_v4only_api, getcmd;
2486 	struct sockaddr_in *gsin;
2487 	struct sockaddr_in6 *gsin6;
2488 	ipaddr_t v4grp;
2489 	in6_addr_t v6grp;
2490 	struct group_filter *gf = NULL;
2491 	struct ip_msfilter *imsf = NULL;
2492 	mblk_t *ndp;
2493 
2494 	if (data_mp->b_cont != NULL) {
2495 		if ((ndp = msgpullup(data_mp, -1)) == NULL)
2496 			return (ENOMEM);
2497 		freemsg(data_mp);
2498 		data_mp = ndp;
2499 		mp->b_cont->b_cont = data_mp;
2500 	}
2501 
2502 	cmd = iocp->ioc_cmd;
2503 	getcmd = (cmd == SIOCGIPMSFILTER || cmd == SIOCGMSFILTER);
2504 	is_v4only_api = (cmd == SIOCGIPMSFILTER || cmd == SIOCSIPMSFILTER);
2505 	minsize = (is_v4only_api) ? IP_MSFILTER_SIZE(0) : GROUP_FILTER_SIZE(0);
2506 	datalen = MBLKL(data_mp);
2507 
2508 	if (datalen < minsize)
2509 		return (EINVAL);
2510 
2511 	/*
2512 	 * now we know we have at least have the initial structure,
2513 	 * but need to check for the source list array.
2514 	 */
2515 	if (is_v4only_api) {
2516 		imsf = (struct ip_msfilter *)data_mp->b_rptr;
2517 		isv6 = B_FALSE;
2518 		expsize = IP_MSFILTER_SIZE(imsf->imsf_numsrc);
2519 	} else {
2520 		gf = (struct group_filter *)data_mp->b_rptr;
2521 		if (gf->gf_group.ss_family == AF_INET6) {
2522 			gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2523 			isv6 = !(IN6_IS_ADDR_V4MAPPED(&gsin6->sin6_addr));
2524 		} else {
2525 			isv6 = B_FALSE;
2526 		}
2527 		expsize = GROUP_FILTER_SIZE(gf->gf_numsrc);
2528 	}
2529 	if (datalen < expsize)
2530 		return (EINVAL);
2531 
2532 	connp = Q_TO_CONN(q);
2533 
2534 	/* operation not supported on the virtual network interface */
2535 	if (IS_VNI(ipif->ipif_ill))
2536 		return (EINVAL);
2537 
2538 	if (isv6) {
2539 		ill_t *ill = ipif->ipif_ill;
2540 		ill_refhold(ill);
2541 
2542 		gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2543 		v6grp = gsin6->sin6_addr;
2544 		if (getcmd)
2545 			err = ip_get_srcfilter_v6(connp, gf, &v6grp, ill);
2546 		else
2547 			err = ip_set_srcfilter_v6(connp, gf, &v6grp, ill);
2548 
2549 		ill_refrele(ill);
2550 	} else {
2551 		boolean_t isv4mapped = B_FALSE;
2552 		if (is_v4only_api) {
2553 			v4grp = (ipaddr_t)imsf->imsf_multiaddr.s_addr;
2554 		} else {
2555 			if (gf->gf_group.ss_family == AF_INET) {
2556 				gsin = (struct sockaddr_in *)&gf->gf_group;
2557 				v4grp = (ipaddr_t)gsin->sin_addr.s_addr;
2558 			} else {
2559 				gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2560 				IN6_V4MAPPED_TO_IPADDR(&gsin6->sin6_addr,
2561 				    v4grp);
2562 				isv4mapped = B_TRUE;
2563 			}
2564 		}
2565 		if (getcmd)
2566 			err = ip_get_srcfilter(connp, gf, imsf, v4grp, ipif,
2567 			    isv4mapped);
2568 		else
2569 			err = ip_set_srcfilter(connp, gf, imsf, v4grp, ipif,
2570 			    isv4mapped);
2571 	}
2572 
2573 	return (err);
2574 }
2575 
2576 /*
2577  * Finds the ipif based on information in the ioctl headers.  Needed to make
2578  * ip_process_ioctl() happy (it needs to know the ipif for IPI_WR-flagged
2579  * ioctls prior to calling the ioctl's handler function).  Somewhat analogous
2580  * to ip_extract_lifreq_cmn() and ip_extract_tunreq().
2581  */
2582 int
2583 ip_extract_msfilter(queue_t *q, mblk_t *mp, ipif_t **ipifpp, ipsq_func_t func)
2584 {
2585 	struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
2586 	int cmd = iocp->ioc_cmd, err = 0;
2587 	conn_t *connp;
2588 	ipif_t *ipif;
2589 	/* caller has verified this mblk exists */
2590 	char *dbuf = (char *)mp->b_cont->b_cont->b_rptr;
2591 	struct ip_msfilter *imsf;
2592 	struct group_filter *gf;
2593 	ipaddr_t v4addr, v4grp;
2594 	in6_addr_t v6grp;
2595 	uint32_t index;
2596 	zoneid_t zoneid;
2597 	ip_stack_t *ipst;
2598 
2599 	connp = Q_TO_CONN(q);
2600 	zoneid = connp->conn_zoneid;
2601 	ipst = connp->conn_netstack->netstack_ip;
2602 
2603 	/* don't allow multicast operations on a tcp conn */
2604 	if (IPCL_IS_TCP(connp))
2605 		return (ENOPROTOOPT);
2606 
2607 	if (cmd == SIOCSIPMSFILTER || cmd == SIOCGIPMSFILTER) {
2608 		/* don't allow v4-specific ioctls on v6 socket */
2609 		if (connp->conn_af_isv6)
2610 			return (EAFNOSUPPORT);
2611 
2612 		imsf = (struct ip_msfilter *)dbuf;
2613 		v4addr = imsf->imsf_interface.s_addr;
2614 		v4grp = imsf->imsf_multiaddr.s_addr;
2615 		if (v4addr == INADDR_ANY) {
2616 			ipif = ipif_lookup_group(v4grp, zoneid, ipst);
2617 			if (ipif == NULL)
2618 				err = EADDRNOTAVAIL;
2619 		} else {
2620 			ipif = ipif_lookup_addr(v4addr, NULL, zoneid, q, mp,
2621 						func, &err, ipst);
2622 		}
2623 	} else {
2624 		boolean_t isv6 = B_FALSE;
2625 		gf = (struct group_filter *)dbuf;
2626 		index = gf->gf_interface;
2627 		if (gf->gf_group.ss_family == AF_INET6) {
2628 			struct sockaddr_in6 *sin6;
2629 			sin6 = (struct sockaddr_in6 *)&gf->gf_group;
2630 			v6grp = sin6->sin6_addr;
2631 			if (IN6_IS_ADDR_V4MAPPED(&v6grp))
2632 				IN6_V4MAPPED_TO_IPADDR(&v6grp, v4grp);
2633 			else
2634 				isv6 = B_TRUE;
2635 		} else if (gf->gf_group.ss_family == AF_INET) {
2636 			struct sockaddr_in *sin;
2637 			sin = (struct sockaddr_in *)&gf->gf_group;
2638 			v4grp = sin->sin_addr.s_addr;
2639 		} else {
2640 			return (EAFNOSUPPORT);
2641 		}
2642 		if (index == 0) {
2643 			if (isv6) {
2644 				ipif = ipif_lookup_group_v6(&v6grp, zoneid,
2645 				    ipst);
2646 			} else {
2647 				ipif = ipif_lookup_group(v4grp, zoneid, ipst);
2648 			}
2649 			if (ipif == NULL)
2650 				err = EADDRNOTAVAIL;
2651 		} else {
2652 			ipif = ipif_lookup_on_ifindex(index, isv6, zoneid,
2653 			    q, mp, func, &err, ipst);
2654 		}
2655 	}
2656 
2657 	*ipifpp = ipif;
2658 	return (err);
2659 }
2660 
2661 /*
2662  * The structures used for the SIOC*MSFILTER ioctls usually must be copied
2663  * in in two stages, as the first copyin tells us the size of the attached
2664  * source buffer.  This function is called by ip_wput_nondata() after the
2665  * first copyin has completed; it figures out how big the second stage
2666  * needs to be, and kicks it off.
2667  *
2668  * In some cases (numsrc < 2), the second copyin is not needed as the
2669  * first one gets a complete structure containing 1 source addr.
2670  *
2671  * The function returns 0 if a second copyin has been started (i.e. there's
2672  * no more work to be done right now), or 1 if the second copyin is not
2673  * needed and ip_wput_nondata() can continue its processing.
2674  */
2675 int
2676 ip_copyin_msfilter(queue_t *q, mblk_t *mp)
2677 {
2678 	struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
2679 	int cmd = iocp->ioc_cmd;
2680 	/* validity of this checked in ip_wput_nondata() */
2681 	mblk_t *mp1 = mp->b_cont->b_cont;
2682 	int copysize = 0;
2683 	int offset;
2684 
2685 	if (cmd == SIOCSMSFILTER || cmd == SIOCGMSFILTER) {
2686 		struct group_filter *gf = (struct group_filter *)mp1->b_rptr;
2687 		if (gf->gf_numsrc >= 2) {
2688 			offset = sizeof (struct group_filter);
2689 			copysize = GROUP_FILTER_SIZE(gf->gf_numsrc) - offset;
2690 		}
2691 	} else {
2692 		struct ip_msfilter *imsf = (struct ip_msfilter *)mp1->b_rptr;
2693 		if (imsf->imsf_numsrc >= 2) {
2694 			offset = sizeof (struct ip_msfilter);
2695 			copysize = IP_MSFILTER_SIZE(imsf->imsf_numsrc) - offset;
2696 		}
2697 	}
2698 	if (copysize > 0) {
2699 		mi_copyin_n(q, mp, offset, copysize);
2700 		return (0);
2701 	}
2702 	return (1);
2703 }
2704 
2705 /*
2706  * Handle the following optmgmt:
2707  *	IP_ADD_MEMBERSHIP		must not have joined already
2708  *	MCAST_JOIN_GROUP		must not have joined already
2709  *	IP_BLOCK_SOURCE			must have joined already
2710  *	MCAST_BLOCK_SOURCE		must have joined already
2711  *	IP_JOIN_SOURCE_GROUP		may have joined already
2712  *	MCAST_JOIN_SOURCE_GROUP		may have joined already
2713  *
2714  * fmode and src parameters may be used to determine which option is
2715  * being set, as follows (the IP_* and MCAST_* versions of each option
2716  * are functionally equivalent):
2717  *	opt			fmode			src
2718  *	IP_ADD_MEMBERSHIP	MODE_IS_EXCLUDE		INADDR_ANY
2719  *	MCAST_JOIN_GROUP	MODE_IS_EXCLUDE		INADDR_ANY
2720  *	IP_BLOCK_SOURCE		MODE_IS_EXCLUDE		v4 addr
2721  *	MCAST_BLOCK_SOURCE	MODE_IS_EXCLUDE		v4 addr
2722  *	IP_JOIN_SOURCE_GROUP	MODE_IS_INCLUDE		v4 addr
2723  *	MCAST_JOIN_SOURCE_GROUP	MODE_IS_INCLUDE		v4 addr
2724  *
2725  * Changing the filter mode is not allowed; if a matching ilg already
2726  * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
2727  *
2728  * Verifies that there is a source address of appropriate scope for
2729  * the group; if not, EADDRNOTAVAIL is returned.
2730  *
2731  * The interface to be used may be identified by an address or by an
2732  * index.  A pointer to the index is passed; if it is NULL, use the
2733  * address, otherwise, use the index.
2734  */
2735 int
2736 ip_opt_add_group(conn_t *connp, boolean_t checkonly, ipaddr_t group,
2737     ipaddr_t ifaddr, uint_t *ifindexp, mcast_record_t fmode, ipaddr_t src,
2738     mblk_t *first_mp)
2739 {
2740 	ipif_t	*ipif;
2741 	ipsq_t	*ipsq;
2742 	int err = 0;
2743 	ill_t	*ill;
2744 
2745 	err = ip_opt_check(connp, group, src, ifaddr, ifindexp, first_mp,
2746 	    ip_restart_optmgmt, &ipif);
2747 	if (err != 0) {
2748 		if (err != EINPROGRESS) {
2749 			ip1dbg(("ip_opt_add_group: no ipif for group 0x%x, "
2750 			    "ifaddr 0x%x, ifindex %d\n", ntohl(group),
2751 			    ntohl(ifaddr), (ifindexp == NULL) ? 0 : *ifindexp));
2752 		}
2753 		return (err);
2754 	}
2755 	ASSERT(ipif != NULL);
2756 
2757 	ill = ipif->ipif_ill;
2758 	/* Operation not supported on a virtual network interface */
2759 	if (IS_VNI(ill)) {
2760 		ipif_refrele(ipif);
2761 		return (EINVAL);
2762 	}
2763 
2764 	if (checkonly) {
2765 		/*
2766 		 * do not do operation, just pretend to - new T_CHECK
2767 		 * semantics. The error return case above if encountered
2768 		 * considered a good enough "check" here.
2769 		 */
2770 		ipif_refrele(ipif);
2771 		return (0);
2772 	}
2773 
2774 	IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, ipsq,
2775 	    NEW_OP);
2776 
2777 	/* unspecified source addr => no source filtering */
2778 	err = ilg_add(connp, group, ipif, fmode, src);
2779 
2780 	IPSQ_EXIT(ipsq);
2781 
2782 	ipif_refrele(ipif);
2783 	return (err);
2784 }
2785 
2786 /*
2787  * Handle the following optmgmt:
2788  *	IPV6_JOIN_GROUP			must not have joined already
2789  *	MCAST_JOIN_GROUP		must not have joined already
2790  *	MCAST_BLOCK_SOURCE		must have joined already
2791  *	MCAST_JOIN_SOURCE_GROUP		may have joined already
2792  *
2793  * fmode and src parameters may be used to determine which option is
2794  * being set, as follows (IPV6_JOIN_GROUP and MCAST_JOIN_GROUP options
2795  * are functionally equivalent):
2796  *	opt			fmode			v6src
2797  *	IPV6_JOIN_GROUP		MODE_IS_EXCLUDE		unspecified
2798  *	MCAST_JOIN_GROUP	MODE_IS_EXCLUDE		unspecified
2799  *	MCAST_BLOCK_SOURCE	MODE_IS_EXCLUDE		v6 addr
2800  *	MCAST_JOIN_SOURCE_GROUP	MODE_IS_INCLUDE		v6 addr
2801  *
2802  * Changing the filter mode is not allowed; if a matching ilg already
2803  * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
2804  *
2805  * Verifies that there is a source address of appropriate scope for
2806  * the group; if not, EADDRNOTAVAIL is returned.
2807  *
2808  * Handles IPv4-mapped IPv6 multicast addresses by associating them
2809  * with the link-local ipif.  Assumes that if v6group is v4-mapped,
2810  * v6src is also v4-mapped.
2811  */
2812 int
2813 ip_opt_add_group_v6(conn_t *connp, boolean_t checkonly,
2814     const in6_addr_t *v6group, int ifindex, mcast_record_t fmode,
2815     const in6_addr_t *v6src, mblk_t *first_mp)
2816 {
2817 	ill_t *ill;
2818 	ipif_t	*ipif;
2819 	char buf[INET6_ADDRSTRLEN];
2820 	ipaddr_t v4group, v4src;
2821 	boolean_t isv6;
2822 	ipsq_t	*ipsq;
2823 	int	err;
2824 
2825 	err = ip_opt_check_v6(connp, v6group, &v4group, v6src, &v4src, &isv6,
2826 	    ifindex, first_mp, ip_restart_optmgmt, &ill, &ipif);
2827 	if (err != 0) {
2828 		if (err != EINPROGRESS) {
2829 			ip1dbg(("ip_opt_add_group_v6: no ill for group %s/"
2830 			    "index %d\n", inet_ntop(AF_INET6, v6group, buf,
2831 			    sizeof (buf)), ifindex));
2832 		}
2833 		return (err);
2834 	}
2835 	ASSERT((!isv6 && ipif != NULL) || (isv6 && ill != NULL));
2836 
2837 	/* operation is not supported on the virtual network interface */
2838 	if (isv6) {
2839 		if (IS_VNI(ill)) {
2840 			ill_refrele(ill);
2841 			return (EINVAL);
2842 		}
2843 	} else {
2844 		if (IS_VNI(ipif->ipif_ill)) {
2845 			ipif_refrele(ipif);
2846 			return (EINVAL);
2847 		}
2848 	}
2849 
2850 	if (checkonly) {
2851 		/*
2852 		 * do not do operation, just pretend to - new T_CHECK
2853 		 * semantics. The error return case above if encountered
2854 		 * considered a good enough "check" here.
2855 		 */
2856 		if (isv6)
2857 			ill_refrele(ill);
2858 		else
2859 			ipif_refrele(ipif);
2860 		return (0);
2861 	}
2862 
2863 	if (!isv6) {
2864 		IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt,
2865 		    ipsq, NEW_OP);
2866 		err = ilg_add(connp, v4group, ipif, fmode, v4src);
2867 		IPSQ_EXIT(ipsq);
2868 		ipif_refrele(ipif);
2869 	} else {
2870 		IPSQ_ENTER_ILL(ill, connp, first_mp, ip_restart_optmgmt,
2871 		    ipsq, NEW_OP);
2872 		err = ilg_add_v6(connp, v6group, ill, fmode, v6src);
2873 		IPSQ_EXIT(ipsq);
2874 		ill_refrele(ill);
2875 	}
2876 
2877 	return (err);
2878 }
2879 
2880 static int
2881 ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group, ipif_t *ipif,
2882     mcast_record_t fmode, ipaddr_t src)
2883 {
2884 	ilg_t	*ilg;
2885 	in6_addr_t v6src;
2886 	boolean_t leaving = B_FALSE;
2887 
2888 	ASSERT(IAM_WRITER_IPIF(ipif));
2889 
2890 	/*
2891 	 * The ilg is valid only while we hold the conn lock. Once we drop
2892 	 * the lock, another thread can locate another ilg on this connp,
2893 	 * but on a different ipif, and delete it, and cause the ilg array
2894 	 * to be reallocated and copied. Hence do the ilg_delete before
2895 	 * dropping the lock.
2896 	 */
2897 	mutex_enter(&connp->conn_lock);
2898 	ilg = ilg_lookup_ipif(connp, group, ipif);
2899 	if ((ilg == NULL) || (ilg->ilg_flags & ILG_DELETED)) {
2900 		mutex_exit(&connp->conn_lock);
2901 		return (EADDRNOTAVAIL);
2902 	}
2903 
2904 	/*
2905 	 * Decide if we're actually deleting the ilg or just removing a
2906 	 * source filter address; if just removing an addr, make sure we
2907 	 * aren't trying to change the filter mode, and that the addr is
2908 	 * actually in our filter list already.  If we're removing the
2909 	 * last src in an include list, just delete the ilg.
2910 	 */
2911 	if (src == INADDR_ANY) {
2912 		v6src = ipv6_all_zeros;
2913 		leaving = B_TRUE;
2914 	} else {
2915 		int err = 0;
2916 		IN6_IPADDR_TO_V4MAPPED(src, &v6src);
2917 		if (fmode != ilg->ilg_fmode)
2918 			err = EINVAL;
2919 		else if (ilg->ilg_filter == NULL ||
2920 		    !list_has_addr(ilg->ilg_filter, &v6src))
2921 			err = EADDRNOTAVAIL;
2922 		if (err != 0) {
2923 			mutex_exit(&connp->conn_lock);
2924 			return (err);
2925 		}
2926 		if (fmode == MODE_IS_INCLUDE &&
2927 		    ilg->ilg_filter->sl_numsrc == 1) {
2928 			v6src = ipv6_all_zeros;
2929 			leaving = B_TRUE;
2930 		}
2931 	}
2932 
2933 	ilg_delete(connp, ilg, &v6src);
2934 	mutex_exit(&connp->conn_lock);
2935 
2936 	(void) ip_delmulti(group, ipif, B_FALSE, leaving);
2937 	return (0);
2938 }
2939 
2940 static int
2941 ip_opt_delete_group_excl_v6(conn_t *connp, const in6_addr_t *v6group,
2942     ill_t *ill, mcast_record_t fmode, const in6_addr_t *v6src)
2943 {
2944 	ilg_t	*ilg;
2945 	ill_t	*ilg_ill;
2946 	uint_t	ilg_orig_ifindex;
2947 	boolean_t leaving = B_TRUE;
2948 
2949 	ASSERT(IAM_WRITER_ILL(ill));
2950 
2951 	/*
2952 	 * Use the index that we originally used to join. We can't
2953 	 * use the ill directly because ilg_ill could point to
2954 	 * a new ill if things have moved.
2955 	 */
2956 	mutex_enter(&connp->conn_lock);
2957 	ilg = ilg_lookup_ill_index_v6(connp, v6group,
2958 	    ill->ill_phyint->phyint_ifindex);
2959 	if ((ilg == NULL) || (ilg->ilg_flags & ILG_DELETED)) {
2960 		mutex_exit(&connp->conn_lock);
2961 		return (EADDRNOTAVAIL);
2962 	}
2963 
2964 	/*
2965 	 * Decide if we're actually deleting the ilg or just removing a
2966 	 * source filter address; if just removing an addr, make sure we
2967 	 * aren't trying to change the filter mode, and that the addr is
2968 	 * actually in our filter list already.  If we're removing the
2969 	 * last src in an include list, just delete the ilg.
2970 	 */
2971 	if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) {
2972 		int err = 0;
2973 		if (fmode != ilg->ilg_fmode)
2974 			err = EINVAL;
2975 		else if (ilg->ilg_filter == NULL ||
2976 		    !list_has_addr(ilg->ilg_filter, v6src))
2977 			err = EADDRNOTAVAIL;
2978 		if (err != 0) {
2979 			mutex_exit(&connp->conn_lock);
2980 			return (err);
2981 		}
2982 		if (fmode == MODE_IS_INCLUDE &&
2983 		    ilg->ilg_filter->sl_numsrc == 1)
2984 			v6src = NULL;
2985 		else
2986 			leaving = B_FALSE;
2987 	}
2988 
2989 	ilg_ill = ilg->ilg_ill;
2990 	ilg_orig_ifindex = ilg->ilg_orig_ifindex;
2991 	ilg_delete(connp, ilg, v6src);
2992 	mutex_exit(&connp->conn_lock);
2993 	(void) ip_delmulti_v6(v6group, ilg_ill, ilg_orig_ifindex,
2994 	    connp->conn_zoneid, B_FALSE, leaving);
2995 
2996 	return (0);
2997 }
2998 
2999 /*
3000  * Handle the following optmgmt:
3001  *	IP_DROP_MEMBERSHIP		will leave
3002  *	MCAST_LEAVE_GROUP		will leave
3003  *	IP_UNBLOCK_SOURCE		will not leave
3004  *	MCAST_UNBLOCK_SOURCE		will not leave
3005  *	IP_LEAVE_SOURCE_GROUP		may leave (if leaving last source)
3006  *	MCAST_LEAVE_SOURCE_GROUP	may leave (if leaving last source)
3007  *
3008  * fmode and src parameters may be used to determine which option is
3009  * being set, as follows (the IP_* and MCAST_* versions of each option
3010  * are functionally equivalent):
3011  *	opt			 fmode			src
3012  *	IP_DROP_MEMBERSHIP	 MODE_IS_INCLUDE	INADDR_ANY
3013  *	MCAST_LEAVE_GROUP	 MODE_IS_INCLUDE	INADDR_ANY
3014  *	IP_UNBLOCK_SOURCE	 MODE_IS_EXCLUDE	v4 addr
3015  *	MCAST_UNBLOCK_SOURCE	 MODE_IS_EXCLUDE	v4 addr
3016  *	IP_LEAVE_SOURCE_GROUP	 MODE_IS_INCLUDE	v4 addr
3017  *	MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE	v4 addr
3018  *
3019  * Changing the filter mode is not allowed; if a matching ilg already
3020  * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
3021  *
3022  * The interface to be used may be identified by an address or by an
3023  * index.  A pointer to the index is passed; if it is NULL, use the
3024  * address, otherwise, use the index.
3025  */
3026 int
3027 ip_opt_delete_group(conn_t *connp, boolean_t checkonly, ipaddr_t group,
3028     ipaddr_t ifaddr, uint_t *ifindexp, mcast_record_t fmode, ipaddr_t src,
3029     mblk_t *first_mp)
3030 {
3031 	ipif_t	*ipif;
3032 	ipsq_t	*ipsq;
3033 	int	err;
3034 	ill_t	*ill;
3035 
3036 	err = ip_opt_check(connp, group, src, ifaddr, ifindexp, first_mp,
3037 	    ip_restart_optmgmt, &ipif);
3038 	if (err != 0) {
3039 		if (err != EINPROGRESS) {
3040 			ip1dbg(("ip_opt_delete_group: no ipif for group "
3041 			    "0x%x, ifaddr 0x%x\n",
3042 			    (int)ntohl(group), (int)ntohl(ifaddr)));
3043 		}
3044 		return (err);
3045 	}
3046 	ASSERT(ipif != NULL);
3047 
3048 	ill = ipif->ipif_ill;
3049 	/* Operation not supported on a virtual network interface */
3050 	if (IS_VNI(ill)) {
3051 		ipif_refrele(ipif);
3052 		return (EINVAL);
3053 	}
3054 
3055 	if (checkonly) {
3056 		/*
3057 		 * do not do operation, just pretend to - new T_CHECK
3058 		 * semantics. The error return case above if encountered
3059 		 * considered a good enough "check" here.
3060 		 */
3061 		ipif_refrele(ipif);
3062 		return (0);
3063 	}
3064 
3065 	IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, ipsq,
3066 	    NEW_OP);
3067 	err = ip_opt_delete_group_excl(connp, group, ipif, fmode, src);
3068 	IPSQ_EXIT(ipsq);
3069 
3070 	ipif_refrele(ipif);
3071 	return (err);
3072 }
3073 
3074 /*
3075  * Handle the following optmgmt:
3076  *	IPV6_LEAVE_GROUP		will leave
3077  *	MCAST_LEAVE_GROUP		will leave
3078  *	MCAST_UNBLOCK_SOURCE		will not leave
3079  *	MCAST_LEAVE_SOURCE_GROUP	may leave (if leaving last source)
3080  *
3081  * fmode and src parameters may be used to determine which option is
3082  * being set, as follows (IPV6_LEAVE_GROUP and MCAST_LEAVE_GROUP options
3083  * are functionally equivalent):
3084  *	opt			 fmode			v6src
3085  *	IPV6_LEAVE_GROUP	 MODE_IS_INCLUDE	unspecified
3086  *	MCAST_LEAVE_GROUP	 MODE_IS_INCLUDE	unspecified
3087  *	MCAST_UNBLOCK_SOURCE	 MODE_IS_EXCLUDE	v6 addr
3088  *	MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE	v6 addr
3089  *
3090  * Changing the filter mode is not allowed; if a matching ilg already
3091  * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
3092  *
3093  * Handles IPv4-mapped IPv6 multicast addresses by associating them
3094  * with the link-local ipif.  Assumes that if v6group is v4-mapped,
3095  * v6src is also v4-mapped.
3096  */
3097 int
3098 ip_opt_delete_group_v6(conn_t *connp, boolean_t checkonly,
3099     const in6_addr_t *v6group, int ifindex, mcast_record_t fmode,
3100     const in6_addr_t *v6src, mblk_t *first_mp)
3101 {
3102 	ill_t *ill;
3103 	ipif_t	*ipif;
3104 	char	buf[INET6_ADDRSTRLEN];
3105 	ipaddr_t v4group, v4src;
3106 	boolean_t isv6;
3107 	ipsq_t	*ipsq;
3108 	int	err;
3109 
3110 	err = ip_opt_check_v6(connp, v6group, &v4group, v6src, &v4src, &isv6,
3111 	    ifindex, first_mp, ip_restart_optmgmt, &ill, &ipif);
3112 	if (err != 0) {
3113 		if (err != EINPROGRESS) {
3114 			ip1dbg(("ip_opt_delete_group_v6: no ill for group %s/"
3115 			    "index %d\n", inet_ntop(AF_INET6, v6group, buf,
3116 			    sizeof (buf)), ifindex));
3117 		}
3118 		return (err);
3119 	}
3120 	ASSERT((isv6 && ill != NULL) || (!isv6 && ipif != NULL));
3121 
3122 	/* operation is not supported on the virtual network interface */
3123 	if (isv6) {
3124 		if (IS_VNI(ill)) {
3125 			ill_refrele(ill);
3126 			return (EINVAL);
3127 		}
3128 	} else {
3129 		if (IS_VNI(ipif->ipif_ill)) {
3130 			ipif_refrele(ipif);
3131 			return (EINVAL);
3132 		}
3133 	}
3134 
3135 	if (checkonly) {
3136 		/*
3137 		 * do not do operation, just pretend to - new T_CHECK
3138 		 * semantics. The error return case above if encountered
3139 		 * considered a good enough "check" here.
3140 		 */
3141 		if (isv6)
3142 			ill_refrele(ill);
3143 		else
3144 			ipif_refrele(ipif);
3145 		return (0);
3146 	}
3147 
3148 	if (!isv6) {
3149 		IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt,
3150 		    ipsq, NEW_OP);
3151 		err = ip_opt_delete_group_excl(connp, v4group, ipif, fmode,
3152 		    v4src);
3153 		IPSQ_EXIT(ipsq);
3154 		ipif_refrele(ipif);
3155 	} else {
3156 		IPSQ_ENTER_ILL(ill, connp, first_mp, ip_restart_optmgmt,
3157 		    ipsq, NEW_OP);
3158 		err = ip_opt_delete_group_excl_v6(connp, v6group, ill, fmode,
3159 		    v6src);
3160 		IPSQ_EXIT(ipsq);
3161 		ill_refrele(ill);
3162 	}
3163 
3164 	return (err);
3165 }
3166 
3167 /*
3168  * Group mgmt for upper conn that passes things down
3169  * to the interface multicast list (and DLPI)
3170  * These routines can handle new style options that specify an interface name
3171  * as opposed to an interface address (needed for general handling of
3172  * unnumbered interfaces.)
3173  */
3174 
3175 /*
3176  * Add a group to an upper conn group data structure and pass things down
3177  * to the interface multicast list (and DLPI)
3178  */
3179 static int
3180 ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif, mcast_record_t fmode,
3181     ipaddr_t src)
3182 {
3183 	int	error = 0;
3184 	ill_t	*ill;
3185 	ilg_t	*ilg;
3186 	ilg_stat_t ilgstat;
3187 	slist_t	*new_filter = NULL;
3188 	int	new_fmode;
3189 
3190 	ASSERT(IAM_WRITER_IPIF(ipif));
3191 
3192 	ill = ipif->ipif_ill;
3193 
3194 	if (!(ill->ill_flags & ILLF_MULTICAST))
3195 		return (EADDRNOTAVAIL);
3196 
3197 	/*
3198 	 * conn_ilg[] is protected by conn_lock. Need to hold the conn_lock
3199 	 * to walk the conn_ilg[] list in ilg_lookup_ipif(); also needed to
3200 	 * serialize 2 threads doing join (sock, group1, hme0:0) and
3201 	 * (sock, group2, hme1:0) where hme0 and hme1 map to different ipsqs,
3202 	 * but both operations happen on the same conn.
3203 	 */
3204 	mutex_enter(&connp->conn_lock);
3205 	ilg = ilg_lookup_ipif(connp, group, ipif);
3206 
3207 	/*
3208 	 * Depending on the option we're handling, may or may not be okay
3209 	 * if group has already been added.  Figure out our rules based
3210 	 * on fmode and src params.  Also make sure there's enough room
3211 	 * in the filter if we're adding a source to an existing filter.
3212 	 */
3213 	if (src == INADDR_ANY) {
3214 		/* we're joining for all sources, must not have joined */
3215 		if (ilg != NULL)
3216 			error = EADDRINUSE;
3217 	} else {
3218 		if (fmode == MODE_IS_EXCLUDE) {
3219 			/* (excl {addr}) => block source, must have joined */
3220 			if (ilg == NULL)
3221 				error = EADDRNOTAVAIL;
3222 		}
3223 		/* (incl {addr}) => join source, may have joined */
3224 
3225 		if (ilg != NULL &&
3226 		    SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE)
3227 			error = ENOBUFS;
3228 	}
3229 	if (error != 0) {
3230 		mutex_exit(&connp->conn_lock);
3231 		return (error);
3232 	}
3233 
3234 	ASSERT(!(ipif->ipif_state_flags & IPIF_CONDEMNED));
3235 
3236 	/*
3237 	 * Alloc buffer to copy new state into (see below) before
3238 	 * we make any changes, so we can bail if it fails.
3239 	 */
3240 	if ((new_filter = l_alloc()) == NULL) {
3241 		mutex_exit(&connp->conn_lock);
3242 		return (ENOMEM);
3243 	}
3244 
3245 	if (ilg == NULL) {
3246 		ilgstat = ILGSTAT_NEW;
3247 		if ((ilg = conn_ilg_alloc(connp)) == NULL) {
3248 			mutex_exit(&connp->conn_lock);
3249 			l_free(new_filter);
3250 			return (ENOMEM);
3251 		}
3252 		if (src != INADDR_ANY) {
3253 			ilg->ilg_filter = l_alloc();
3254 			if (ilg->ilg_filter == NULL) {
3255 				ilg_delete(connp, ilg, NULL);
3256 				mutex_exit(&connp->conn_lock);
3257 				l_free(new_filter);
3258 				return (ENOMEM);
3259 			}
3260 			ilg->ilg_filter->sl_numsrc = 1;
3261 			IN6_IPADDR_TO_V4MAPPED(src,
3262 			    &ilg->ilg_filter->sl_addr[0]);
3263 		}
3264 		if (group == INADDR_ANY) {
3265 			ilg->ilg_v6group = ipv6_all_zeros;
3266 		} else {
3267 			IN6_IPADDR_TO_V4MAPPED(group, &ilg->ilg_v6group);
3268 		}
3269 		ilg->ilg_ipif = ipif;
3270 		ilg->ilg_ill = NULL;
3271 		ilg->ilg_orig_ifindex = 0;
3272 		ilg->ilg_fmode = fmode;
3273 	} else {
3274 		int index;
3275 		in6_addr_t v6src;
3276 		ilgstat = ILGSTAT_CHANGE;
3277 		if (ilg->ilg_fmode != fmode || src == INADDR_ANY) {
3278 			mutex_exit(&connp->conn_lock);
3279 			l_free(new_filter);
3280 			return (EINVAL);
3281 		}
3282 		if (ilg->ilg_filter == NULL) {
3283 			ilg->ilg_filter = l_alloc();
3284 			if (ilg->ilg_filter == NULL) {
3285 				mutex_exit(&connp->conn_lock);
3286 				l_free(new_filter);
3287 				return (ENOMEM);
3288 			}
3289 		}
3290 		IN6_IPADDR_TO_V4MAPPED(src, &v6src);
3291 		if (list_has_addr(ilg->ilg_filter, &v6src)) {
3292 			mutex_exit(&connp->conn_lock);
3293 			l_free(new_filter);
3294 			return (EADDRNOTAVAIL);
3295 		}
3296 		index = ilg->ilg_filter->sl_numsrc++;
3297 		ilg->ilg_filter->sl_addr[index] = v6src;
3298 	}
3299 
3300 	/*
3301 	 * Save copy of ilg's filter state to pass to other functions,
3302 	 * so we can release conn_lock now.
3303 	 */
3304 	new_fmode = ilg->ilg_fmode;
3305 	l_copy(ilg->ilg_filter, new_filter);
3306 
3307 	mutex_exit(&connp->conn_lock);
3308 
3309 	error = ip_addmulti(group, ipif, ilgstat, new_fmode, new_filter);
3310 	if (error != 0) {
3311 		/*
3312 		 * Need to undo what we did before calling ip_addmulti()!
3313 		 * Must look up the ilg again since we've not been holding
3314 		 * conn_lock.
3315 		 */
3316 		in6_addr_t v6src;
3317 		if (ilgstat == ILGSTAT_NEW)
3318 			v6src = ipv6_all_zeros;
3319 		else
3320 			IN6_IPADDR_TO_V4MAPPED(src, &v6src);
3321 		mutex_enter(&connp->conn_lock);
3322 		ilg = ilg_lookup_ipif(connp, group, ipif);
3323 		ASSERT(ilg != NULL);
3324 		ilg_delete(connp, ilg, &v6src);
3325 		mutex_exit(&connp->conn_lock);
3326 		l_free(new_filter);
3327 		return (error);
3328 	}
3329 
3330 	l_free(new_filter);
3331 	return (0);
3332 }
3333 
3334 static int
3335 ilg_add_v6(conn_t *connp, const in6_addr_t *v6group, ill_t *ill,
3336     mcast_record_t fmode, const in6_addr_t *v6src)
3337 {
3338 	int	error = 0;
3339 	int	orig_ifindex;
3340 	ilg_t	*ilg;
3341 	ilg_stat_t ilgstat;
3342 	slist_t	*new_filter = NULL;
3343 	int	new_fmode;
3344 
3345 	ASSERT(IAM_WRITER_ILL(ill));
3346 
3347 	if (!(ill->ill_flags & ILLF_MULTICAST))
3348 		return (EADDRNOTAVAIL);
3349 
3350 	/*
3351 	 * conn_lock protects the ilg list.  Serializes 2 threads doing
3352 	 * join (sock, group1, hme0) and (sock, group2, hme1) where hme0
3353 	 * and hme1 map to different ipsq's, but both operations happen
3354 	 * on the same conn.
3355 	 */
3356 	mutex_enter(&connp->conn_lock);
3357 
3358 	/*
3359 	 * Use the ifindex to do the lookup. We can't use the ill
3360 	 * directly because ilg_ill could point to a different ill if
3361 	 * things have moved.
3362 	 */
3363 	orig_ifindex = ill->ill_phyint->phyint_ifindex;
3364 	ilg = ilg_lookup_ill_index_v6(connp, v6group, orig_ifindex);
3365 
3366 	/*
3367 	 * Depending on the option we're handling, may or may not be okay
3368 	 * if group has already been added.  Figure out our rules based
3369 	 * on fmode and src params.  Also make sure there's enough room
3370 	 * in the filter if we're adding a source to an existing filter.
3371 	 */
3372 	if (IN6_IS_ADDR_UNSPECIFIED(v6src)) {
3373 		/* we're joining for all sources, must not have joined */
3374 		if (ilg != NULL)
3375 			error = EADDRINUSE;
3376 	} else {
3377 		if (fmode == MODE_IS_EXCLUDE) {
3378 			/* (excl {addr}) => block source, must have joined */
3379 			if (ilg == NULL)
3380 				error = EADDRNOTAVAIL;
3381 		}
3382 		/* (incl {addr}) => join source, may have joined */
3383 
3384 		if (ilg != NULL &&
3385 		    SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE)
3386 			error = ENOBUFS;
3387 	}
3388 	if (error != 0) {
3389 		mutex_exit(&connp->conn_lock);
3390 		return (error);
3391 	}
3392 
3393 	/*
3394 	 * Alloc buffer to copy new state into (see below) before
3395 	 * we make any changes, so we can bail if it fails.
3396 	 */
3397 	if ((new_filter = l_alloc()) == NULL) {
3398 		mutex_exit(&connp->conn_lock);
3399 		return (ENOMEM);
3400 	}
3401 
3402 	if (ilg == NULL) {
3403 		if ((ilg = conn_ilg_alloc(connp)) == NULL) {
3404 			mutex_exit(&connp->conn_lock);
3405 			l_free(new_filter);
3406 			return (ENOMEM);
3407 		}
3408 		if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) {
3409 			ilg->ilg_filter = l_alloc();
3410 			if (ilg->ilg_filter == NULL) {
3411 				ilg_delete(connp, ilg, NULL);
3412 				mutex_exit(&connp->conn_lock);
3413 				l_free(new_filter);
3414 				return (ENOMEM);
3415 			}
3416 			ilg->ilg_filter->sl_numsrc = 1;
3417 			ilg->ilg_filter->sl_addr[0] = *v6src;
3418 		}
3419 		ilgstat = ILGSTAT_NEW;
3420 		ilg->ilg_v6group = *v6group;
3421 		ilg->ilg_fmode = fmode;
3422 		ilg->ilg_ipif = NULL;
3423 		/*
3424 		 * Choose our target ill to join on. This might be different
3425 		 * from the ill we've been given if it's currently down and
3426 		 * part of a group.
3427 		 *
3428 		 * new ill is not refheld; we are writer.
3429 		 */
3430 		ill = ip_choose_multi_ill(ill, v6group);
3431 		ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED));
3432 		ilg->ilg_ill = ill;
3433 		/*
3434 		 * Remember the orig_ifindex that we joined on, so that we
3435 		 * can successfully delete them later on and also search
3436 		 * for duplicates if the application wants to join again.
3437 		 */
3438 		ilg->ilg_orig_ifindex = orig_ifindex;
3439 	} else {
3440 		int index;
3441 		if (ilg->ilg_fmode != fmode || IN6_IS_ADDR_UNSPECIFIED(v6src)) {
3442 			mutex_exit(&connp->conn_lock);
3443 			l_free(new_filter);
3444 			return (EINVAL);
3445 		}
3446 		if (ilg->ilg_filter == NULL) {
3447 			ilg->ilg_filter = l_alloc();
3448 			if (ilg->ilg_filter == NULL) {
3449 				mutex_exit(&connp->conn_lock);
3450 				l_free(new_filter);
3451 				return (ENOMEM);
3452 			}
3453 		}
3454 		if (list_has_addr(ilg->ilg_filter, v6src)) {
3455 			mutex_exit(&connp->conn_lock);
3456 			l_free(new_filter);
3457 			return (EADDRNOTAVAIL);
3458 		}
3459 		ilgstat = ILGSTAT_CHANGE;
3460 		index = ilg->ilg_filter->sl_numsrc++;
3461 		ilg->ilg_filter->sl_addr[index] = *v6src;
3462 		/*
3463 		 * The current ill might be different from the one we were
3464 		 * asked to join on (if failover has occurred); we should
3465 		 * join on the ill stored in the ilg.  The original ill
3466 		 * is noted in ilg_orig_ifindex, which matched our request.
3467 		 */
3468 		ill = ilg->ilg_ill;
3469 	}
3470 
3471 	/*
3472 	 * Save copy of ilg's filter state to pass to other functions,
3473 	 * so we can release conn_lock now.
3474 	 */
3475 	new_fmode = ilg->ilg_fmode;
3476 	l_copy(ilg->ilg_filter, new_filter);
3477 
3478 	mutex_exit(&connp->conn_lock);
3479 
3480 	/*
3481 	 * Now update the ill. We wait to do this until after the ilg
3482 	 * has been updated because we need to update the src filter
3483 	 * info for the ill, which involves looking at the status of
3484 	 * all the ilgs associated with this group/interface pair.
3485 	 */
3486 	error = ip_addmulti_v6(v6group, ill, orig_ifindex, connp->conn_zoneid,
3487 	    ilgstat, new_fmode, new_filter);
3488 	if (error != 0) {
3489 		/*
3490 		 * But because we waited, we have to undo the ilg update
3491 		 * if ip_addmulti_v6() fails.  We also must lookup ilg
3492 		 * again, since we've not been holding conn_lock.
3493 		 */
3494 		in6_addr_t delsrc =
3495 		    (ilgstat == ILGSTAT_NEW) ? ipv6_all_zeros : *v6src;
3496 		mutex_enter(&connp->conn_lock);
3497 		ilg = ilg_lookup_ill_index_v6(connp, v6group, orig_ifindex);
3498 		ASSERT(ilg != NULL);
3499 		ilg_delete(connp, ilg, &delsrc);
3500 		mutex_exit(&connp->conn_lock);
3501 		l_free(new_filter);
3502 		return (error);
3503 	}
3504 
3505 	l_free(new_filter);
3506 
3507 	return (0);
3508 }
3509 
3510 /*
3511  * Find an IPv4 ilg matching group, ill and source
3512  */
3513 ilg_t *
3514 ilg_lookup_ill_withsrc(conn_t *connp, ipaddr_t group, ipaddr_t src, ill_t *ill)
3515 {
3516 	in6_addr_t v6group, v6src;
3517 	int i;
3518 	boolean_t isinlist;
3519 	ilg_t *ilg;
3520 	ipif_t *ipif;
3521 	ill_t *ilg_ill;
3522 
3523 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3524 
3525 	/*
3526 	 * INADDR_ANY is represented as the IPv6 unspecified addr.
3527 	 */
3528 	if (group == INADDR_ANY)
3529 		v6group = ipv6_all_zeros;
3530 	else
3531 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
3532 
3533 	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3534 		/* ilg_ipif is NULL for v6; skip them */
3535 		ilg = &connp->conn_ilg[i];
3536 		if ((ipif = ilg->ilg_ipif) == NULL)
3537 			continue;
3538 		ASSERT(ilg->ilg_ill == NULL);
3539 		ilg_ill = ipif->ipif_ill;
3540 		ASSERT(!ilg_ill->ill_isv6);
3541 		if (ilg_ill == ill &&
3542 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, &v6group)) {
3543 			if (SLIST_IS_EMPTY(ilg->ilg_filter)) {
3544 				/* no source filter, so this is a match */
3545 				return (ilg);
3546 			}
3547 			break;
3548 		}
3549 	}
3550 	if (i == connp->conn_ilg_inuse)
3551 		return (NULL);
3552 
3553 	/*
3554 	 * we have an ilg with matching ill and group; but
3555 	 * the ilg has a source list that we must check.
3556 	 */
3557 	IN6_IPADDR_TO_V4MAPPED(src, &v6src);
3558 	isinlist = B_FALSE;
3559 	for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) {
3560 		if (IN6_ARE_ADDR_EQUAL(&v6src, &ilg->ilg_filter->sl_addr[i])) {
3561 			isinlist = B_TRUE;
3562 			break;
3563 		}
3564 	}
3565 
3566 	if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) ||
3567 	    (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE))
3568 		return (ilg);
3569 
3570 	return (NULL);
3571 }
3572 
3573 /*
3574  * Find an IPv6 ilg matching group, ill, and source
3575  */
3576 ilg_t *
3577 ilg_lookup_ill_withsrc_v6(conn_t *connp, const in6_addr_t *v6group,
3578     const in6_addr_t *v6src, ill_t *ill)
3579 {
3580 	int i;
3581 	boolean_t isinlist;
3582 	ilg_t *ilg;
3583 	ill_t *ilg_ill;
3584 
3585 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3586 
3587 	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3588 		ilg = &connp->conn_ilg[i];
3589 		if ((ilg_ill = ilg->ilg_ill) == NULL)
3590 			continue;
3591 		ASSERT(ilg->ilg_ipif == NULL);
3592 		ASSERT(ilg_ill->ill_isv6);
3593 		if (ilg_ill == ill &&
3594 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) {
3595 			if (SLIST_IS_EMPTY(ilg->ilg_filter)) {
3596 				/* no source filter, so this is a match */
3597 				return (ilg);
3598 			}
3599 			break;
3600 		}
3601 	}
3602 	if (i == connp->conn_ilg_inuse)
3603 		return (NULL);
3604 
3605 	/*
3606 	 * we have an ilg with matching ill and group; but
3607 	 * the ilg has a source list that we must check.
3608 	 */
3609 	isinlist = B_FALSE;
3610 	for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) {
3611 		if (IN6_ARE_ADDR_EQUAL(v6src, &ilg->ilg_filter->sl_addr[i])) {
3612 			isinlist = B_TRUE;
3613 			break;
3614 		}
3615 	}
3616 
3617 	if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) ||
3618 	    (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE))
3619 		return (ilg);
3620 
3621 	return (NULL);
3622 }
3623 
3624 /*
3625  * Get the ilg whose ilg_orig_ifindex is associated with ifindex.
3626  * This is useful when the interface fails and we have moved
3627  * to a new ill, but still would like to locate using the index
3628  * that we originally used to join. Used only for IPv6 currently.
3629  */
3630 static ilg_t *
3631 ilg_lookup_ill_index_v6(conn_t *connp, const in6_addr_t *v6group, int ifindex)
3632 {
3633 	ilg_t	*ilg;
3634 	int	i;
3635 
3636 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3637 	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3638 		ilg = &connp->conn_ilg[i];
3639 		/* ilg_ill is NULL for V4. Skip them */
3640 		if (ilg->ilg_ill == NULL)
3641 			continue;
3642 		/* ilg_ipif is NULL for V6 */
3643 		ASSERT(ilg->ilg_ipif == NULL);
3644 		ASSERT(ilg->ilg_orig_ifindex != 0);
3645 		if (IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group) &&
3646 		    ilg->ilg_orig_ifindex == ifindex) {
3647 			return (ilg);
3648 		}
3649 	}
3650 	return (NULL);
3651 }
3652 
3653 /*
3654  * Find an IPv6 ilg matching group and ill
3655  */
3656 ilg_t *
3657 ilg_lookup_ill_v6(conn_t *connp, const in6_addr_t *v6group, ill_t *ill)
3658 {
3659 	ilg_t	*ilg;
3660 	int	i;
3661 	ill_t 	*mem_ill;
3662 
3663 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3664 
3665 	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3666 		ilg = &connp->conn_ilg[i];
3667 		if ((mem_ill = ilg->ilg_ill) == NULL)
3668 			continue;
3669 		ASSERT(ilg->ilg_ipif == NULL);
3670 		ASSERT(mem_ill->ill_isv6);
3671 		if (mem_ill == ill &&
3672 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group))
3673 			return (ilg);
3674 	}
3675 	return (NULL);
3676 }
3677 
3678 /*
3679  * Find an IPv4 ilg matching group and ipif
3680  */
3681 static ilg_t *
3682 ilg_lookup_ipif(conn_t *connp, ipaddr_t group, ipif_t *ipif)
3683 {
3684 	in6_addr_t v6group;
3685 	int	i;
3686 
3687 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3688 	ASSERT(!ipif->ipif_ill->ill_isv6);
3689 
3690 	if (group == INADDR_ANY)
3691 		v6group = ipv6_all_zeros;
3692 	else
3693 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
3694 
3695 	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3696 		if (IN6_ARE_ADDR_EQUAL(&connp->conn_ilg[i].ilg_v6group,
3697 		    &v6group) &&
3698 		    connp->conn_ilg[i].ilg_ipif == ipif)
3699 			return (&connp->conn_ilg[i]);
3700 	}
3701 	return (NULL);
3702 }
3703 
3704 /*
3705  * If a source address is passed in (src != NULL and src is not
3706  * unspecified), remove the specified src addr from the given ilg's
3707  * filter list, else delete the ilg.
3708  */
3709 static void
3710 ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src)
3711 {
3712 	int	i;
3713 
3714 	ASSERT((ilg->ilg_ipif != NULL) ^ (ilg->ilg_ill != NULL));
3715 	ASSERT(ilg->ilg_ipif == NULL || IAM_WRITER_IPIF(ilg->ilg_ipif));
3716 	ASSERT(ilg->ilg_ill == NULL || IAM_WRITER_ILL(ilg->ilg_ill));
3717 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3718 	ASSERT(!(ilg->ilg_flags & ILG_DELETED));
3719 
3720 	if (src == NULL || IN6_IS_ADDR_UNSPECIFIED(src)) {
3721 		if (connp->conn_ilg_walker_cnt != 0) {
3722 			ilg->ilg_flags |= ILG_DELETED;
3723 			return;
3724 		}
3725 
3726 		FREE_SLIST(ilg->ilg_filter);
3727 
3728 		i = ilg - &connp->conn_ilg[0];
3729 		ASSERT(i >= 0 && i < connp->conn_ilg_inuse);
3730 
3731 		/* Move other entries up one step */
3732 		connp->conn_ilg_inuse--;
3733 		for (; i < connp->conn_ilg_inuse; i++)
3734 			connp->conn_ilg[i] = connp->conn_ilg[i+1];
3735 
3736 		if (connp->conn_ilg_inuse == 0) {
3737 			mi_free((char *)connp->conn_ilg);
3738 			connp->conn_ilg = NULL;
3739 			cv_broadcast(&connp->conn_refcv);
3740 		}
3741 	} else {
3742 		l_remove(ilg->ilg_filter, src);
3743 	}
3744 }
3745 
3746 /*
3747  * Called from conn close. No new ilg can be added or removed.
3748  * because CONN_CLOSING has been set by ip_close. ilg_add / ilg_delete
3749  * will return error if conn has started closing.
3750  */
3751 void
3752 ilg_delete_all(conn_t *connp)
3753 {
3754 	int	i;
3755 	ipif_t	*ipif = NULL;
3756 	ill_t	*ill = NULL;
3757 	ilg_t	*ilg;
3758 	in6_addr_t v6group;
3759 	boolean_t success;
3760 	ipsq_t	*ipsq;
3761 	int	orig_ifindex;
3762 
3763 	mutex_enter(&connp->conn_lock);
3764 retry:
3765 	ILG_WALKER_HOLD(connp);
3766 	for (i = connp->conn_ilg_inuse - 1; i >= 0; ) {
3767 		ilg = &connp->conn_ilg[i];
3768 		/*
3769 		 * Since this walk is not atomic (we drop the
3770 		 * conn_lock and wait in ipsq_enter) we need
3771 		 * to check for the ILG_DELETED flag.
3772 		 */
3773 		if (ilg->ilg_flags & ILG_DELETED) {
3774 			/* Go to the next ilg */
3775 			i--;
3776 			continue;
3777 		}
3778 		v6group = ilg->ilg_v6group;
3779 
3780 		if (IN6_IS_ADDR_V4MAPPED(&v6group)) {
3781 			ipif = ilg->ilg_ipif;
3782 			ill = ipif->ipif_ill;
3783 		} else {
3784 			ipif = NULL;
3785 			ill = ilg->ilg_ill;
3786 		}
3787 		/*
3788 		 * We may not be able to refhold the ill if the ill/ipif
3789 		 * is changing. But we need to make sure that the ill will
3790 		 * not vanish. So we just bump up the ill_waiter count.
3791 		 * If we are unable to do even that, then the ill is closing,
3792 		 * in which case the unplumb thread will handle the cleanup,
3793 		 * and we move on to the next ilg.
3794 		 */
3795 		if (!ill_waiter_inc(ill)) {
3796 			/* Go to the next ilg */
3797 			i--;
3798 			continue;
3799 		}
3800 		mutex_exit(&connp->conn_lock);
3801 		/*
3802 		 * To prevent deadlock between ill close which waits inside
3803 		 * the perimeter, and conn close, ipsq_enter returns error,
3804 		 * the moment ILL_CONDEMNED is set, in which case ill close
3805 		 * takes responsibility to cleanup the ilgs. Note that we
3806 		 * have not yet set condemned flag, otherwise the conn can't
3807 		 * be refheld for cleanup by those routines and it would be
3808 		 * a mutual deadlock.
3809 		 */
3810 		success = ipsq_enter(ill, B_FALSE);
3811 		ipsq = ill->ill_phyint->phyint_ipsq;
3812 		ill_waiter_dcr(ill);
3813 		mutex_enter(&connp->conn_lock);
3814 		if (!success) {
3815 			/* Go to the next ilg */
3816 			i--;
3817 			continue;
3818 		}
3819 
3820 		/*
3821 		 * Make sure that nothing has changed under. For eg.
3822 		 * a failover/failback can change ilg_ill while we were
3823 		 * waiting to become exclusive above
3824 		 */
3825 		if (IN6_IS_ADDR_V4MAPPED(&v6group)) {
3826 			ipif = ilg->ilg_ipif;
3827 			ill = ipif->ipif_ill;
3828 		} else {
3829 			ipif = NULL;
3830 			ill = ilg->ilg_ill;
3831 		}
3832 		if (!IAM_WRITER_ILL(ill) || (ilg->ilg_flags & ILG_DELETED)) {
3833 			/*
3834 			 * The ilg has changed under us probably due
3835 			 * to a failover or unplumb. Retry on the same ilg.
3836 			 */
3837 			mutex_exit(&connp->conn_lock);
3838 			ipsq_exit(ipsq, B_TRUE, B_TRUE);
3839 			mutex_enter(&connp->conn_lock);
3840 			continue;
3841 		}
3842 		v6group = ilg->ilg_v6group;
3843 		orig_ifindex = ilg->ilg_orig_ifindex;
3844 		ilg_delete(connp, ilg, NULL);
3845 		mutex_exit(&connp->conn_lock);
3846 
3847 		if (ipif != NULL)
3848 			(void) ip_delmulti(V4_PART_OF_V6(v6group), ipif,
3849 			    B_FALSE, B_TRUE);
3850 
3851 		else
3852 			(void) ip_delmulti_v6(&v6group, ill, orig_ifindex,
3853 			    connp->conn_zoneid, B_FALSE, B_TRUE);
3854 
3855 		ipsq_exit(ipsq, B_TRUE, B_TRUE);
3856 		mutex_enter(&connp->conn_lock);
3857 		/* Go to the next ilg */
3858 		i--;
3859 	}
3860 	ILG_WALKER_RELE(connp);
3861 
3862 	/* If any ill was skipped above wait and retry */
3863 	if (connp->conn_ilg_inuse != 0) {
3864 		cv_wait(&connp->conn_refcv, &connp->conn_lock);
3865 		goto retry;
3866 	}
3867 	mutex_exit(&connp->conn_lock);
3868 }
3869 
3870 /*
3871  * Called from ill close by ipcl_walk for clearing conn_ilg and
3872  * conn_multicast_ipif for a given ipif. conn is held by caller.
3873  * Note that ipcl_walk only walks conns that are not yet condemned.
3874  * condemned conns can't be refheld. For this reason, conn must become clean
3875  * first, i.e. it must not refer to any ill/ire/ipif and then only set
3876  * condemned flag.
3877  */
3878 static void
3879 conn_delete_ipif(conn_t *connp, caddr_t arg)
3880 {
3881 	ipif_t	*ipif = (ipif_t *)arg;
3882 	int	i;
3883 	char	group_buf1[INET6_ADDRSTRLEN];
3884 	char	group_buf2[INET6_ADDRSTRLEN];
3885 	ipaddr_t group;
3886 	ilg_t	*ilg;
3887 
3888 	/*
3889 	 * Even though conn_ilg_inuse can change while we are in this loop,
3890 	 * i.e.ilgs can be created or deleted on this connp, no new ilgs can
3891 	 * be created or deleted for this connp, on this ill, since this ill
3892 	 * is the perimeter. So we won't miss any ilg in this cleanup.
3893 	 */
3894 	mutex_enter(&connp->conn_lock);
3895 
3896 	/*
3897 	 * Increment the walker count, so that ilg repacking does not
3898 	 * occur while we are in the loop.
3899 	 */
3900 	ILG_WALKER_HOLD(connp);
3901 	for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) {
3902 		ilg = &connp->conn_ilg[i];
3903 		if (ilg->ilg_ipif != ipif || (ilg->ilg_flags & ILG_DELETED))
3904 			continue;
3905 		/*
3906 		 * ip_close cannot be cleaning this ilg at the same time.
3907 		 * since it also has to execute in this ill's perimeter which
3908 		 * we are now holding. Only a clean conn can be condemned.
3909 		 */
3910 		ASSERT(!(connp->conn_state_flags & CONN_CONDEMNED));
3911 
3912 		/* Blow away the membership */
3913 		ip1dbg(("conn_delete_ilg_ipif: %s on %s (%s)\n",
3914 		    inet_ntop(AF_INET6, &connp->conn_ilg[i].ilg_v6group,
3915 		    group_buf1, sizeof (group_buf1)),
3916 		    inet_ntop(AF_INET6, &ipif->ipif_v6lcl_addr,
3917 		    group_buf2, sizeof (group_buf2)),
3918 		    ipif->ipif_ill->ill_name));
3919 
3920 		/* ilg_ipif is NULL for V6, so we won't be here */
3921 		ASSERT(IN6_IS_ADDR_V4MAPPED(&ilg->ilg_v6group));
3922 
3923 		group = V4_PART_OF_V6(ilg->ilg_v6group);
3924 		ilg_delete(connp, &connp->conn_ilg[i], NULL);
3925 		mutex_exit(&connp->conn_lock);
3926 
3927 		(void) ip_delmulti(group, ipif, B_FALSE, B_TRUE);
3928 		mutex_enter(&connp->conn_lock);
3929 	}
3930 
3931 	/*
3932 	 * If we are the last walker, need to physically delete the
3933 	 * ilgs and repack.
3934 	 */
3935 	ILG_WALKER_RELE(connp);
3936 
3937 	if (connp->conn_multicast_ipif == ipif) {
3938 		/* Revert to late binding */
3939 		connp->conn_multicast_ipif = NULL;
3940 	}
3941 	mutex_exit(&connp->conn_lock);
3942 
3943 	conn_delete_ire(connp, (caddr_t)ipif);
3944 }
3945 
3946 /*
3947  * Called from ill close by ipcl_walk for clearing conn_ilg and
3948  * conn_multicast_ill for a given ill. conn is held by caller.
3949  * Note that ipcl_walk only walks conns that are not yet condemned.
3950  * condemned conns can't be refheld. For this reason, conn must become clean
3951  * first, i.e. it must not refer to any ill/ire/ipif and then only set
3952  * condemned flag.
3953  */
3954 static void
3955 conn_delete_ill(conn_t *connp, caddr_t arg)
3956 {
3957 	ill_t	*ill = (ill_t *)arg;
3958 	int	i;
3959 	char	group_buf[INET6_ADDRSTRLEN];
3960 	in6_addr_t v6group;
3961 	int	orig_ifindex;
3962 	ilg_t	*ilg;
3963 
3964 	/*
3965 	 * Even though conn_ilg_inuse can change while we are in this loop,
3966 	 * no new ilgs can be created/deleted for this connp, on this
3967 	 * ill, since this ill is the perimeter. So we won't miss any ilg
3968 	 * in this cleanup.
3969 	 */
3970 	mutex_enter(&connp->conn_lock);
3971 
3972 	/*
3973 	 * Increment the walker count, so that ilg repacking does not
3974 	 * occur while we are in the loop.
3975 	 */
3976 	ILG_WALKER_HOLD(connp);
3977 	for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) {
3978 		ilg = &connp->conn_ilg[i];
3979 		if ((ilg->ilg_ill == ill) && !(ilg->ilg_flags & ILG_DELETED)) {
3980 			/*
3981 			 * ip_close cannot be cleaning this ilg at the same
3982 			 * time, since it also has to execute in this ill's
3983 			 * perimeter which we are now holding. Only a clean
3984 			 * conn can be condemned.
3985 			 */
3986 			ASSERT(!(connp->conn_state_flags & CONN_CONDEMNED));
3987 
3988 			/* Blow away the membership */
3989 			ip1dbg(("conn_delete_ilg_ill: %s on %s\n",
3990 			    inet_ntop(AF_INET6, &ilg->ilg_v6group,
3991 			    group_buf, sizeof (group_buf)),
3992 			    ill->ill_name));
3993 
3994 			v6group = ilg->ilg_v6group;
3995 			orig_ifindex = ilg->ilg_orig_ifindex;
3996 			ilg_delete(connp, ilg, NULL);
3997 			mutex_exit(&connp->conn_lock);
3998 
3999 			(void) ip_delmulti_v6(&v6group, ill, orig_ifindex,
4000 			    connp->conn_zoneid, B_FALSE, B_TRUE);
4001 			mutex_enter(&connp->conn_lock);
4002 		}
4003 	}
4004 	/*
4005 	 * If we are the last walker, need to physically delete the
4006 	 * ilgs and repack.
4007 	 */
4008 	ILG_WALKER_RELE(connp);
4009 
4010 	if (connp->conn_multicast_ill == ill) {
4011 		/* Revert to late binding */
4012 		connp->conn_multicast_ill = NULL;
4013 		connp->conn_orig_multicast_ifindex = 0;
4014 	}
4015 	mutex_exit(&connp->conn_lock);
4016 }
4017 
4018 /*
4019  * Called when an ipif is unplumbed to make sure that there are no
4020  * dangling conn references to that ipif.
4021  * Handles ilg_ipif and conn_multicast_ipif
4022  */
4023 void
4024 reset_conn_ipif(ipif)
4025 	ipif_t	*ipif;
4026 {
4027 	ip_stack_t	*ipst = ipif->ipif_ill->ill_ipst;
4028 
4029 	ipcl_walk(conn_delete_ipif, (caddr_t)ipif, ipst);
4030 }
4031 
4032 /*
4033  * Called when an ill is unplumbed to make sure that there are no
4034  * dangling conn references to that ill.
4035  * Handles ilg_ill, conn_multicast_ill.
4036  */
4037 void
4038 reset_conn_ill(ill_t *ill)
4039 {
4040 	ip_stack_t	*ipst = ill->ill_ipst;
4041 
4042 	ipcl_walk(conn_delete_ill, (caddr_t)ill, ipst);
4043 }
4044 
4045 #ifdef DEBUG
4046 /*
4047  * Walk functions walk all the interfaces in the system to make
4048  * sure that there is no refernece to the ipif or ill that is
4049  * going away.
4050  */
4051 int
4052 ilm_walk_ill(ill_t *ill)
4053 {
4054 	int cnt = 0;
4055 	ill_t *till;
4056 	ilm_t *ilm;
4057 	ill_walk_context_t ctx;
4058 	ip_stack_t	*ipst = ill->ill_ipst;
4059 
4060 	rw_enter(&ipst->ips_ill_g_lock, RW_READER);
4061 	till = ILL_START_WALK_ALL(&ctx, ipst);
4062 	for (; till != NULL; till = ill_next(&ctx, till)) {
4063 		for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
4064 			if (ilm->ilm_ill == ill) {
4065 				cnt++;
4066 			}
4067 		}
4068 	}
4069 	rw_exit(&ipst->ips_ill_g_lock);
4070 
4071 	return (cnt);
4072 }
4073 
4074 /*
4075  * This function is called before the ipif is freed.
4076  */
4077 int
4078 ilm_walk_ipif(ipif_t *ipif)
4079 {
4080 	int cnt = 0;
4081 	ill_t *till;
4082 	ilm_t *ilm;
4083 	ill_walk_context_t ctx;
4084 	ip_stack_t	*ipst = ipif->ipif_ill->ill_ipst;
4085 
4086 	till = ILL_START_WALK_ALL(&ctx, ipst);
4087 	for (; till != NULL; till = ill_next(&ctx, till)) {
4088 		for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
4089 			if (ilm->ilm_ipif == ipif) {
4090 					cnt++;
4091 			}
4092 		}
4093 	}
4094 	return (cnt);
4095 }
4096 #endif
4097