xref: /titanic_51/usr/src/uts/common/inet/ip/ip_multi.c (revision 16f94f589e38fe7d164f5a85c2e9425b18c2e28d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /* Copyright (c) 1990 Mentat Inc. */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/stream.h>
31 #include <sys/dlpi.h>
32 #include <sys/stropts.h>
33 #include <sys/strsun.h>
34 #include <sys/ddi.h>
35 #include <sys/cmn_err.h>
36 #include <sys/sdt.h>
37 #include <sys/zone.h>
38 
39 #include <sys/param.h>
40 #include <sys/socket.h>
41 #include <sys/sockio.h>
42 #include <net/if.h>
43 #include <sys/systm.h>
44 #include <net/route.h>
45 #include <netinet/in.h>
46 #include <net/if_dl.h>
47 #include <netinet/ip6.h>
48 #include <netinet/icmp6.h>
49 
50 #include <inet/common.h>
51 #include <inet/mi.h>
52 #include <inet/nd.h>
53 #include <inet/arp.h>
54 #include <inet/ip.h>
55 #include <inet/ip6.h>
56 #include <inet/ip_if.h>
57 #include <inet/ip_ndp.h>
58 #include <inet/ip_multi.h>
59 #include <inet/ipclassifier.h>
60 #include <inet/ipsec_impl.h>
61 #include <inet/sctp_ip.h>
62 #include <inet/ip_listutils.h>
63 #include <inet/udp_impl.h>
64 
65 /* igmpv3/mldv2 source filter manipulation */
66 static void	ilm_bld_flists(conn_t *conn, void *arg);
67 static void	ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode,
68     slist_t *flist);
69 
70 static ilm_t	*ilm_add_v6(ipif_t *ipif, const in6_addr_t *group,
71     ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist,
72     int orig_ifindex, zoneid_t zoneid);
73 static void	ilm_delete(ilm_t *ilm);
74 static int	ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *group);
75 static int	ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *group);
76 static ilg_t	*ilg_lookup_ill_index_v6(conn_t *connp,
77     const in6_addr_t *v6group, int index);
78 static ilg_t	*ilg_lookup_ipif(conn_t *connp, ipaddr_t group,
79     ipif_t *ipif);
80 static int	ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif,
81     mcast_record_t fmode, ipaddr_t src);
82 static int	ilg_add_v6(conn_t *connp, const in6_addr_t *group, ill_t *ill,
83     mcast_record_t fmode, const in6_addr_t *v6src);
84 static void	ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src);
85 static mblk_t	*ill_create_dl(ill_t *ill, uint32_t dl_primitive,
86     uint32_t length, uint32_t *addr_lenp, uint32_t *addr_offp);
87 static mblk_t	*ill_create_squery(ill_t *ill, ipaddr_t ipaddr,
88     uint32_t addrlen, uint32_t addroff, mblk_t *mp_tail);
89 static void	conn_ilg_reap(conn_t *connp);
90 static int	ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group,
91     ipif_t *ipif, mcast_record_t fmode, ipaddr_t src);
92 static int	ip_opt_delete_group_excl_v6(conn_t *connp,
93     const in6_addr_t *v6group, ill_t *ill, mcast_record_t fmode,
94     const in6_addr_t *v6src);
95 
96 /*
97  * MT notes:
98  *
99  * Multicast joins operate on both the ilg and ilm structures. Multiple
100  * threads operating on an conn (socket) trying to do multicast joins
101  * need to synchronize  when operating on the ilg. Multiple threads
102  * potentially operating on different conn (socket endpoints) trying to
103  * do multicast joins could eventually end up trying to manipulate the
104  * ilm simulatenously and need to synchronize on the access to the ilm.
105  * Both are amenable to standard Solaris MT techniques, but it would be
106  * complex to handle a failover or failback which needs to manipulate
107  * ilg/ilms if an applications can also simultaenously join/leave
108  * multicast groups. Hence multicast join/leave also go through the ipsq_t
109  * serialization.
110  *
111  * Multicast joins and leaves are single-threaded per phyint/IPMP group
112  * using the ipsq serialization mechanism.
113  *
114  * An ilm is an IP data structure used to track multicast join/leave.
115  * An ilm is associated with a <multicast group, ipif> tuple in IPv4 and
116  * with just <multicast group> in IPv6. ilm_refcnt is the number of ilg's
117  * referencing the ilm. ilms are created / destroyed only as writer. ilms
118  * are not passed around, instead they are looked up and used under the
119  * ill_lock or as writer. So we don't need a dynamic refcount of the number
120  * of threads holding reference to an ilm.
121  *
122  * Multicast Join operation:
123  *
124  * The first step is to determine the ipif (v4) or ill (v6) on which
125  * the join operation is to be done. The join is done after becoming
126  * exclusive on the ipsq associated with the ipif or ill. The conn->conn_ilg
127  * and ill->ill_ilm are thus accessed and modified exclusively per ill.
128  * Multiple threads can attempt to join simultaneously on different ipif/ill
129  * on the same conn. In this case the ipsq serialization does not help in
130  * protecting the ilg. It is the conn_lock that is used to protect the ilg.
131  * The conn_lock also protects all the ilg_t members.
132  *
133  * Leave operation.
134  *
135  * Similar to the join operation, the first step is to determine the ipif
136  * or ill (v6) on which the leave operation is to be done. The leave operation
137  * is done after becoming exclusive on the ipsq associated with the ipif or ill.
138  * As with join ilg modification is done under the protection of the conn lock.
139  */
140 
141 #define	IPSQ_ENTER_IPIF(ipif, connp, first_mp, func, ipsq, type)	\
142 	ASSERT(connp != NULL);					\
143 	(ipsq) = ipsq_try_enter((ipif), NULL, CONNP_TO_WQ(connp),	\
144 	    (first_mp), (func), (type), B_TRUE);		\
145 	if ((ipsq) == NULL) {					\
146 		ipif_refrele(ipif);				\
147 		return (EINPROGRESS);				\
148 	}
149 
150 #define	IPSQ_ENTER_ILL(ill, connp, first_mp, func, ipsq, type)	\
151 	ASSERT(connp != NULL);					\
152 	(ipsq) = ipsq_try_enter(NULL, ill, CONNP_TO_WQ(connp),	\
153 	    (first_mp),	(func), (type), B_TRUE);		\
154 	if ((ipsq) == NULL) {					\
155 		ill_refrele(ill);				\
156 		return (EINPROGRESS);				\
157 	}
158 
159 #define	IPSQ_EXIT(ipsq)	\
160 	if (ipsq != NULL)	\
161 		ipsq_exit(ipsq, B_TRUE, B_TRUE);
162 
163 #define	ILG_WALKER_HOLD(connp)	(connp)->conn_ilg_walker_cnt++
164 
165 #define	ILG_WALKER_RELE(connp)				\
166 	{						\
167 		(connp)->conn_ilg_walker_cnt--;		\
168 		if ((connp)->conn_ilg_walker_cnt == 0)	\
169 			conn_ilg_reap(connp);		\
170 	}
171 
172 static void
173 conn_ilg_reap(conn_t *connp)
174 {
175 	int	to;
176 	int	from;
177 
178 	ASSERT(MUTEX_HELD(&connp->conn_lock));
179 
180 	to = 0;
181 	from = 0;
182 	while (from < connp->conn_ilg_inuse) {
183 		if (connp->conn_ilg[from].ilg_flags & ILG_DELETED) {
184 			FREE_SLIST(connp->conn_ilg[from].ilg_filter);
185 			from++;
186 			continue;
187 		}
188 		if (to != from)
189 			connp->conn_ilg[to] = connp->conn_ilg[from];
190 		to++;
191 		from++;
192 	}
193 
194 	connp->conn_ilg_inuse = to;
195 
196 	if (connp->conn_ilg_inuse == 0) {
197 		mi_free((char *)connp->conn_ilg);
198 		connp->conn_ilg = NULL;
199 		cv_broadcast(&connp->conn_refcv);
200 	}
201 }
202 
203 #define	GETSTRUCT(structure, number)	\
204 	((structure *)mi_zalloc(sizeof (structure) * (number)))
205 
206 #define	ILG_ALLOC_CHUNK	16
207 
208 /*
209  * Returns a pointer to the next available ilg in conn_ilg.  Allocs more
210  * buffers in size of ILG_ALLOC_CHUNK ilgs when needed, and updates conn's
211  * ilg tracking fields appropriately (conn_ilg_inuse reflects usage of the
212  * returned ilg).  Returns NULL on failure (ENOMEM).
213  *
214  * Assumes connp->conn_lock is held.
215  */
216 static ilg_t *
217 conn_ilg_alloc(conn_t *connp)
218 {
219 	ilg_t *new;
220 	int curcnt;
221 
222 	ASSERT(MUTEX_HELD(&connp->conn_lock));
223 	ASSERT(connp->conn_ilg_inuse <= connp->conn_ilg_allocated);
224 
225 	if (connp->conn_ilg == NULL) {
226 		connp->conn_ilg = GETSTRUCT(ilg_t, ILG_ALLOC_CHUNK);
227 		if (connp->conn_ilg == NULL)
228 			return (NULL);
229 		connp->conn_ilg_allocated = ILG_ALLOC_CHUNK;
230 		connp->conn_ilg_inuse = 0;
231 	}
232 	if (connp->conn_ilg_inuse == connp->conn_ilg_allocated) {
233 		curcnt = connp->conn_ilg_allocated;
234 		new = GETSTRUCT(ilg_t, curcnt + ILG_ALLOC_CHUNK);
235 		if (new == NULL)
236 			return (NULL);
237 		bcopy(connp->conn_ilg, new, sizeof (ilg_t) * curcnt);
238 		mi_free((char *)connp->conn_ilg);
239 		connp->conn_ilg = new;
240 		connp->conn_ilg_allocated += ILG_ALLOC_CHUNK;
241 	}
242 
243 	return (&connp->conn_ilg[connp->conn_ilg_inuse++]);
244 }
245 
246 typedef struct ilm_fbld_s {
247 	ilm_t		*fbld_ilm;
248 	int		fbld_in_cnt;
249 	int		fbld_ex_cnt;
250 	slist_t		fbld_in;
251 	slist_t		fbld_ex;
252 	boolean_t	fbld_in_overflow;
253 } ilm_fbld_t;
254 
255 static void
256 ilm_bld_flists(conn_t *conn, void *arg)
257 {
258 	int i;
259 	ilm_fbld_t *fbld = (ilm_fbld_t *)(arg);
260 	ilm_t *ilm = fbld->fbld_ilm;
261 	in6_addr_t *v6group = &ilm->ilm_v6addr;
262 
263 	if (conn->conn_ilg_inuse == 0)
264 		return;
265 
266 	/*
267 	 * Since we can't break out of the ipcl_walk once started, we still
268 	 * have to look at every conn.  But if we've already found one
269 	 * (EXCLUDE, NULL) list, there's no need to keep checking individual
270 	 * ilgs--that will be our state.
271 	 */
272 	if (fbld->fbld_ex_cnt > 0 && fbld->fbld_ex.sl_numsrc == 0)
273 		return;
274 
275 	/*
276 	 * Check this conn's ilgs to see if any are interested in our
277 	 * ilm (group, interface match).  If so, update the master
278 	 * include and exclude lists we're building in the fbld struct
279 	 * with this ilg's filter info.
280 	 */
281 	mutex_enter(&conn->conn_lock);
282 	for (i = 0; i < conn->conn_ilg_inuse; i++) {
283 		ilg_t *ilg = &conn->conn_ilg[i];
284 		if ((ilg->ilg_ill == ilm->ilm_ill) &&
285 		    (ilg->ilg_ipif == ilm->ilm_ipif) &&
286 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) {
287 			if (ilg->ilg_fmode == MODE_IS_INCLUDE) {
288 				fbld->fbld_in_cnt++;
289 				if (!fbld->fbld_in_overflow)
290 					l_union_in_a(&fbld->fbld_in,
291 					    ilg->ilg_filter,
292 					    &fbld->fbld_in_overflow);
293 			} else {
294 				fbld->fbld_ex_cnt++;
295 				/*
296 				 * On the first exclude list, don't try to do
297 				 * an intersection, as the master exclude list
298 				 * is intentionally empty.  If the master list
299 				 * is still empty on later iterations, that
300 				 * means we have at least one ilg with an empty
301 				 * exclude list, so that should be reflected
302 				 * when we take the intersection.
303 				 */
304 				if (fbld->fbld_ex_cnt == 1) {
305 					if (ilg->ilg_filter != NULL)
306 						l_copy(ilg->ilg_filter,
307 						    &fbld->fbld_ex);
308 				} else {
309 					l_intersection_in_a(&fbld->fbld_ex,
310 					    ilg->ilg_filter);
311 				}
312 			}
313 			/* there will only be one match, so break now. */
314 			break;
315 		}
316 	}
317 	mutex_exit(&conn->conn_lock);
318 }
319 
320 static void
321 ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, slist_t *flist)
322 {
323 	ilm_fbld_t fbld;
324 
325 	fbld.fbld_ilm = ilm;
326 	fbld.fbld_in_cnt = fbld.fbld_ex_cnt = 0;
327 	fbld.fbld_in.sl_numsrc = fbld.fbld_ex.sl_numsrc = 0;
328 	fbld.fbld_in_overflow = B_FALSE;
329 
330 	/* first, construct our master include and exclude lists */
331 	ipcl_walk(ilm_bld_flists, (caddr_t)&fbld);
332 
333 	/* now use those master lists to generate the interface filter */
334 
335 	/* if include list overflowed, filter is (EXCLUDE, NULL) */
336 	if (fbld.fbld_in_overflow) {
337 		*fmode = MODE_IS_EXCLUDE;
338 		flist->sl_numsrc = 0;
339 		return;
340 	}
341 
342 	/* if nobody interested, interface filter is (INCLUDE, NULL) */
343 	if (fbld.fbld_in_cnt == 0 && fbld.fbld_ex_cnt == 0) {
344 		*fmode = MODE_IS_INCLUDE;
345 		flist->sl_numsrc = 0;
346 		return;
347 	}
348 
349 	/*
350 	 * If there are no exclude lists, then the interface filter
351 	 * is INCLUDE, with its filter list equal to fbld_in.  A single
352 	 * exclude list makes the interface filter EXCLUDE, with its
353 	 * filter list equal to (fbld_ex - fbld_in).
354 	 */
355 	if (fbld.fbld_ex_cnt == 0) {
356 		*fmode = MODE_IS_INCLUDE;
357 		l_copy(&fbld.fbld_in, flist);
358 	} else {
359 		*fmode = MODE_IS_EXCLUDE;
360 		l_difference(&fbld.fbld_ex, &fbld.fbld_in, flist);
361 	}
362 }
363 
364 /*
365  * If the given interface has failed, choose a new one to join on so
366  * that we continue to receive packets.  ilg_orig_ifindex remembers
367  * what the application used to join on so that we know the ilg to
368  * delete even though we change the ill here.  Callers will store the
369  * ilg returned from this function in ilg_ill.  Thus when we receive
370  * a packet on ilg_ill, conn_wantpacket_v6 will deliver the packets.
371  *
372  * This function must be called as writer so we can walk the group
373  * list and examine flags without holding a lock.
374  */
375 ill_t *
376 ip_choose_multi_ill(ill_t *ill, const in6_addr_t *grp)
377 {
378 	ill_t	*till;
379 	ill_group_t *illgrp = ill->ill_group;
380 
381 	ASSERT(IAM_WRITER_ILL(ill));
382 
383 	if (IN6_IS_ADDR_UNSPECIFIED(grp) || illgrp == NULL)
384 		return (ill);
385 
386 	if ((ill->ill_phyint->phyint_flags & (PHYI_FAILED|PHYI_INACTIVE)) == 0)
387 		return (ill);
388 
389 	till = illgrp->illgrp_ill;
390 	while (till != NULL &&
391 	    (till->ill_phyint->phyint_flags & (PHYI_FAILED|PHYI_INACTIVE))) {
392 		till = till->ill_group_next;
393 	}
394 	if (till != NULL)
395 		return (till);
396 
397 	return (ill);
398 }
399 
400 static int
401 ilm_update_add(ilm_t *ilm, ilg_stat_t ilgstat, slist_t *ilg_flist,
402     boolean_t isv6)
403 {
404 	mcast_record_t fmode;
405 	slist_t *flist;
406 	boolean_t fdefault;
407 	char buf[INET6_ADDRSTRLEN];
408 	ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill;
409 
410 	/*
411 	 * There are several cases where the ilm's filter state
412 	 * defaults to (EXCLUDE, NULL):
413 	 *	- we've had previous joins without associated ilgs
414 	 *	- this join has no associated ilg
415 	 *	- the ilg's filter state is (EXCLUDE, NULL)
416 	 */
417 	fdefault = (ilm->ilm_no_ilg_cnt > 0) ||
418 	    (ilgstat == ILGSTAT_NONE) || SLIST_IS_EMPTY(ilg_flist);
419 
420 	/* attempt mallocs (if needed) before doing anything else */
421 	if ((flist = l_alloc()) == NULL)
422 		return (ENOMEM);
423 	if (!fdefault && ilm->ilm_filter == NULL) {
424 		ilm->ilm_filter = l_alloc();
425 		if (ilm->ilm_filter == NULL) {
426 			l_free(flist);
427 			return (ENOMEM);
428 		}
429 	}
430 
431 	if (ilgstat != ILGSTAT_CHANGE)
432 		ilm->ilm_refcnt++;
433 
434 	if (ilgstat == ILGSTAT_NONE)
435 		ilm->ilm_no_ilg_cnt++;
436 
437 	/*
438 	 * Determine new filter state.  If it's not the default
439 	 * (EXCLUDE, NULL), we must walk the conn list to find
440 	 * any ilgs interested in this group, and re-build the
441 	 * ilm filter.
442 	 */
443 	if (fdefault) {
444 		fmode = MODE_IS_EXCLUDE;
445 		flist->sl_numsrc = 0;
446 	} else {
447 		ilm_gen_filter(ilm, &fmode, flist);
448 	}
449 
450 	/* make sure state actually changed; nothing to do if not. */
451 	if ((ilm->ilm_fmode == fmode) &&
452 	    !lists_are_different(ilm->ilm_filter, flist)) {
453 		l_free(flist);
454 		return (0);
455 	}
456 
457 	/* send the state change report */
458 	if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0) {
459 		if (isv6)
460 			mld_statechange(ilm, fmode, flist);
461 		else
462 			igmp_statechange(ilm, fmode, flist);
463 	}
464 
465 	/* update the ilm state */
466 	ilm->ilm_fmode = fmode;
467 	if (flist->sl_numsrc > 0)
468 		l_copy(flist, ilm->ilm_filter);
469 	else
470 		CLEAR_SLIST(ilm->ilm_filter);
471 
472 	ip1dbg(("ilm_update: new if filter mode %d, group %s\n", ilm->ilm_fmode,
473 	    inet_ntop(AF_INET6, &ilm->ilm_v6addr, buf, sizeof (buf))));
474 
475 	l_free(flist);
476 	return (0);
477 }
478 
479 static int
480 ilm_update_del(ilm_t *ilm, boolean_t isv6)
481 {
482 	mcast_record_t fmode;
483 	slist_t *flist;
484 	ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill;
485 
486 	ip1dbg(("ilm_update_del: still %d left; updating state\n",
487 	    ilm->ilm_refcnt));
488 
489 	if ((flist = l_alloc()) == NULL)
490 		return (ENOMEM);
491 
492 	/*
493 	 * If present, the ilg in question has already either been
494 	 * updated or removed from our list; so all we need to do
495 	 * now is walk the list to update the ilm filter state.
496 	 *
497 	 * Skip the list walk if we have any no-ilg joins, which
498 	 * cause the filter state to revert to (EXCLUDE, NULL).
499 	 */
500 	if (ilm->ilm_no_ilg_cnt != 0) {
501 		fmode = MODE_IS_EXCLUDE;
502 		flist->sl_numsrc = 0;
503 	} else {
504 		ilm_gen_filter(ilm, &fmode, flist);
505 	}
506 
507 	/* check to see if state needs to be updated */
508 	if ((ilm->ilm_fmode == fmode) &&
509 	    (!lists_are_different(ilm->ilm_filter, flist))) {
510 		l_free(flist);
511 		return (0);
512 	}
513 
514 	if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0) {
515 		if (isv6)
516 			mld_statechange(ilm, fmode, flist);
517 		else
518 			igmp_statechange(ilm, fmode, flist);
519 	}
520 
521 	ilm->ilm_fmode = fmode;
522 	if (flist->sl_numsrc > 0) {
523 		if (ilm->ilm_filter == NULL) {
524 			ilm->ilm_filter = l_alloc();
525 			if (ilm->ilm_filter == NULL) {
526 				char buf[INET6_ADDRSTRLEN];
527 				ip1dbg(("ilm_update_del: failed to alloc ilm "
528 				    "filter; no source filtering for %s on %s",
529 				    inet_ntop(AF_INET6, &ilm->ilm_v6addr,
530 				    buf, sizeof (buf)), ill->ill_name));
531 				ilm->ilm_fmode = MODE_IS_EXCLUDE;
532 				l_free(flist);
533 				return (0);
534 			}
535 		}
536 		l_copy(flist, ilm->ilm_filter);
537 	} else {
538 		CLEAR_SLIST(ilm->ilm_filter);
539 	}
540 
541 	l_free(flist);
542 	return (0);
543 }
544 
545 /*
546  * INADDR_ANY means all multicast addresses. This is only used
547  * by the multicast router.
548  * INADDR_ANY is stored as IPv6 unspecified addr.
549  */
550 int
551 ip_addmulti(ipaddr_t group, ipif_t *ipif, ilg_stat_t ilgstat,
552     mcast_record_t ilg_fmode, slist_t *ilg_flist)
553 {
554 	ill_t	*ill = ipif->ipif_ill;
555 	ilm_t 	*ilm;
556 	in6_addr_t v6group;
557 	int	ret;
558 
559 	ASSERT(IAM_WRITER_IPIF(ipif));
560 
561 	if (!CLASSD(group) && group != INADDR_ANY)
562 		return (EINVAL);
563 
564 	/*
565 	 * INADDR_ANY is represented as the IPv6 unspecifed addr.
566 	 */
567 	if (group == INADDR_ANY)
568 		v6group = ipv6_all_zeros;
569 	else
570 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
571 
572 	ilm = ilm_lookup_ipif(ipif, group);
573 	if (ilm != NULL)
574 		return (ilm_update_add(ilm, ilgstat, ilg_flist, B_FALSE));
575 
576 	/*
577 	 * ilms are associated with ipifs in IPv4. It moves with the
578 	 * ipif if the ipif moves to a new ill when the interface
579 	 * fails. Thus we really don't check whether the ipif_ill
580 	 * has failed like in IPv6. If it has FAILED the ipif
581 	 * will move (daemon will move it) and hence the ilm, if the
582 	 * ipif is not IPIF_NOFAILOVER. For the IPIF_NOFAILOVER ipifs,
583 	 * we continue to receive in the same place even if the
584 	 * interface fails.
585 	 */
586 	ilm = ilm_add_v6(ipif, &v6group, ilgstat, ilg_fmode, ilg_flist,
587 	    ill->ill_phyint->phyint_ifindex, ipif->ipif_zoneid);
588 	if (ilm == NULL)
589 		return (ENOMEM);
590 
591 	if (group == INADDR_ANY) {
592 		/*
593 		 * Check how many ipif's have members in this group -
594 		 * if more then one we should not tell the driver to join
595 		 * this time
596 		 */
597 		if (ilm_numentries_v6(ill, &v6group) > 1)
598 			return (0);
599 		if (ill->ill_group == NULL)
600 			ret = ip_join_allmulti(ipif);
601 		else
602 			ret = ill_nominate_mcast_rcv(ill->ill_group);
603 		if (ret != 0)
604 			ilm_delete(ilm);
605 		return (ret);
606 	}
607 
608 	if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0)
609 		igmp_joingroup(ilm);
610 
611 	if (ilm_numentries_v6(ill, &v6group) > 1)
612 		return (0);
613 
614 	ret = ip_ll_addmulti_v6(ipif, &v6group);
615 	if (ret != 0)
616 		ilm_delete(ilm);
617 	return (ret);
618 }
619 
620 /*
621  * The unspecified address means all multicast addresses.
622  * This is only used by the multicast router.
623  *
624  * ill identifies the interface to join on; it may not match the
625  * interface requested by the application of a failover has taken
626  * place.  orig_ifindex always identifies the interface requested
627  * by the app.
628  *
629  * ilgstat tells us if there's an ilg associated with this join,
630  * and if so, if it's a new ilg or a change to an existing one.
631  * ilg_fmode and ilg_flist give us the current filter state of
632  * the ilg (and will be EXCLUDE {NULL} in the case of no ilg).
633  */
634 int
635 ip_addmulti_v6(const in6_addr_t *v6group, ill_t *ill, int orig_ifindex,
636     zoneid_t zoneid, ilg_stat_t ilgstat, mcast_record_t ilg_fmode,
637     slist_t *ilg_flist)
638 {
639 	ilm_t	*ilm;
640 	int	ret;
641 
642 	ASSERT(IAM_WRITER_ILL(ill));
643 
644 	if (!IN6_IS_ADDR_MULTICAST(v6group) &&
645 	    !IN6_IS_ADDR_UNSPECIFIED(v6group)) {
646 		return (EINVAL);
647 	}
648 
649 	/*
650 	 * An ilm is uniquely identified by the tuple of (group, ill,
651 	 * orig_ill).  group is the multicast group address, ill is
652 	 * the interface on which it is currently joined, and orig_ill
653 	 * is the interface on which the application requested the
654 	 * join.  orig_ill and ill are the same unless orig_ill has
655 	 * failed over.
656 	 *
657 	 * Both orig_ill and ill are required, which means we may have
658 	 * 2 ilms on an ill for the same group, but with different
659 	 * orig_ills.  These must be kept separate, so that when failback
660 	 * occurs, the appropriate ilms are moved back to their orig_ill
661 	 * without disrupting memberships on the ill to which they had
662 	 * been moved.
663 	 *
664 	 * In order to track orig_ill, we store orig_ifindex in the
665 	 * ilm and ilg.
666 	 */
667 	ilm = ilm_lookup_ill_index_v6(ill, v6group, orig_ifindex, zoneid);
668 	if (ilm != NULL)
669 		return (ilm_update_add(ilm, ilgstat, ilg_flist, B_TRUE));
670 
671 	/*
672 	 * We need to remember where the application really wanted
673 	 * to join. This will be used later if we want to failback
674 	 * to the original interface.
675 	 */
676 	ilm = ilm_add_v6(ill->ill_ipif, v6group, ilgstat, ilg_fmode,
677 	    ilg_flist, orig_ifindex, zoneid);
678 	if (ilm == NULL)
679 		return (ENOMEM);
680 
681 	if (IN6_IS_ADDR_UNSPECIFIED(v6group)) {
682 		/*
683 		 * Check how many ipif's that have members in this group -
684 		 * if more then one we should not tell the driver to join
685 		 * this time
686 		 */
687 		if (ilm_numentries_v6(ill, v6group) > 1)
688 			return (0);
689 		if (ill->ill_group == NULL)
690 			ret = ip_join_allmulti(ill->ill_ipif);
691 		else
692 			ret = ill_nominate_mcast_rcv(ill->ill_group);
693 
694 		if (ret != 0)
695 			ilm_delete(ilm);
696 		return (ret);
697 	}
698 
699 	if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0)
700 		mld_joingroup(ilm);
701 
702 	/*
703 	 * If we have more then one we should not tell the driver
704 	 * to join this time.
705 	 */
706 	if (ilm_numentries_v6(ill, v6group) > 1)
707 		return (0);
708 
709 	ret = ip_ll_addmulti_v6(ill->ill_ipif, v6group);
710 	if (ret != 0)
711 		ilm_delete(ilm);
712 	return (ret);
713 }
714 
715 /*
716  * Send a multicast request to the driver for enabling multicast reception
717  * for v6groupp address. The caller has already checked whether it is
718  * appropriate to send one or not.
719  */
720 int
721 ip_ll_send_enabmulti_req(ill_t *ill, const in6_addr_t *v6groupp)
722 {
723 	mblk_t	*mp;
724 	uint32_t addrlen, addroff;
725 	char	group_buf[INET6_ADDRSTRLEN];
726 
727 	ASSERT(IAM_WRITER_ILL(ill));
728 
729 	/*
730 	 * Create a AR_ENTRY_SQUERY message with a dl_enabmulti_req tacked
731 	 * on.
732 	 */
733 	mp = ill_create_dl(ill, DL_ENABMULTI_REQ, sizeof (dl_enabmulti_req_t),
734 	    &addrlen, &addroff);
735 	if (!mp)
736 		return (ENOMEM);
737 	if (IN6_IS_ADDR_V4MAPPED(v6groupp)) {
738 		ipaddr_t v4group;
739 
740 		IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group);
741 		/*
742 		 * NOTE!!!
743 		 * The "addroff" passed in here was calculated by
744 		 * ill_create_dl(), and will be used by ill_create_squery()
745 		 * to perform some twisted coding magic. It is the offset
746 		 * into the dl_xxx_req of the hw addr. Here, it will be
747 		 * added to b_wptr - b_rptr to create a magic number that
748 		 * is not an offset into this squery mblk.
749 		 * The actual hardware address will be accessed only in the
750 		 * dl_xxx_req, not in the squery. More importantly,
751 		 * that hardware address can *only* be accessed in this
752 		 * mblk chain by calling mi_offset_param_c(), which uses
753 		 * the magic number in the squery hw offset field to go
754 		 * to the *next* mblk (the dl_xxx_req), subtract the
755 		 * (b_wptr - b_rptr), and find the actual offset into
756 		 * the dl_xxx_req.
757 		 * Any method that depends on using the
758 		 * offset field in the dl_disabmulti_req or squery
759 		 * to find either hardware address will similarly fail.
760 		 *
761 		 * Look in ar_entry_squery() in arp.c to see how this offset
762 		 * is used.
763 		 */
764 		mp = ill_create_squery(ill, v4group, addrlen, addroff, mp);
765 		if (!mp)
766 			return (ENOMEM);
767 		ip1dbg(("ip_ll_send_enabmulti_req: IPv4 putnext %s on %s\n",
768 		    inet_ntop(AF_INET6, v6groupp, group_buf,
769 		    sizeof (group_buf)),
770 		    ill->ill_name));
771 		putnext(ill->ill_rq, mp);
772 	} else {
773 		ip1dbg(("ip_ll_send_enabmulti_req: IPv6 ndp_squery_mp %s on"
774 		    " %s\n",
775 		    inet_ntop(AF_INET6, v6groupp, group_buf,
776 		    sizeof (group_buf)),
777 		    ill->ill_name));
778 		return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp));
779 	}
780 	return (0);
781 }
782 
783 /*
784  * Send a multicast request to the driver for enabling multicast
785  * membership for v6group if appropriate.
786  */
787 static int
788 ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *v6groupp)
789 {
790 	ill_t	*ill = ipif->ipif_ill;
791 
792 	ASSERT(IAM_WRITER_IPIF(ipif));
793 
794 	if (ill->ill_net_type != IRE_IF_RESOLVER ||
795 	    ipif->ipif_flags & IPIF_POINTOPOINT) {
796 		ip1dbg(("ip_ll_addmulti_v6: not resolver\n"));
797 		return (0);	/* Must be IRE_IF_NORESOLVER */
798 	}
799 
800 	if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) {
801 		ip1dbg(("ip_ll_addmulti_v6: MULTI_BCAST\n"));
802 		return (0);
803 	}
804 	if (ill->ill_ipif_up_count == 0) {
805 		/*
806 		 * Nobody there. All multicast addresses will be re-joined
807 		 * when we get the DL_BIND_ACK bringing the interface up.
808 		 */
809 		ip1dbg(("ip_ll_addmulti_v6: nobody up\n"));
810 		return (0);
811 	}
812 	return (ip_ll_send_enabmulti_req(ill, v6groupp));
813 }
814 
815 /*
816  * INADDR_ANY means all multicast addresses. This is only used
817  * by the multicast router.
818  * INADDR_ANY is stored as the IPv6 unspecifed addr.
819  */
820 int
821 ip_delmulti(ipaddr_t group, ipif_t *ipif, boolean_t no_ilg, boolean_t leaving)
822 {
823 	ill_t	*ill = ipif->ipif_ill;
824 	ilm_t *ilm;
825 	in6_addr_t v6group;
826 	int	ret;
827 
828 	ASSERT(IAM_WRITER_IPIF(ipif));
829 
830 	if (!CLASSD(group) && group != INADDR_ANY)
831 		return (EINVAL);
832 
833 	/*
834 	 * INADDR_ANY is represented as the IPv6 unspecifed addr.
835 	 */
836 	if (group == INADDR_ANY)
837 		v6group = ipv6_all_zeros;
838 	else
839 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
840 
841 	/*
842 	 * Look for a match on the ipif.
843 	 * (IP_DROP_MEMBERSHIP specifies an ipif using an IP address).
844 	 */
845 	ilm = ilm_lookup_ipif(ipif, group);
846 	if (ilm == NULL)
847 		return (ENOENT);
848 
849 	/* Update counters */
850 	if (no_ilg)
851 		ilm->ilm_no_ilg_cnt--;
852 
853 	if (leaving)
854 		ilm->ilm_refcnt--;
855 
856 	if (ilm->ilm_refcnt > 0)
857 		return (ilm_update_del(ilm, B_FALSE));
858 
859 	if (group == INADDR_ANY) {
860 		ilm_delete(ilm);
861 		/*
862 		 * Check how many ipif's that have members in this group -
863 		 * if there are still some left then don't tell the driver
864 		 * to drop it.
865 		 */
866 		if (ilm_numentries_v6(ill, &v6group) != 0)
867 			return (0);
868 
869 		/*
870 		 * If we never joined, then don't leave.  This can happen
871 		 * if we're in an IPMP group, since only one ill per IPMP
872 		 * group receives all multicast packets.
873 		 */
874 		if (!ill->ill_join_allmulti) {
875 			ASSERT(ill->ill_group != NULL);
876 			return (0);
877 		}
878 
879 		ret = ip_leave_allmulti(ipif);
880 		if (ill->ill_group != NULL)
881 			(void) ill_nominate_mcast_rcv(ill->ill_group);
882 		return (ret);
883 	}
884 
885 	if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0)
886 		igmp_leavegroup(ilm);
887 
888 	ilm_delete(ilm);
889 	/*
890 	 * Check how many ipif's that have members in this group -
891 	 * if there are still some left then don't tell the driver
892 	 * to drop it.
893 	 */
894 	if (ilm_numentries_v6(ill, &v6group) != 0)
895 		return (0);
896 	return (ip_ll_delmulti_v6(ipif, &v6group));
897 }
898 
899 /*
900  * The unspecified address means all multicast addresses.
901  * This is only used by the multicast router.
902  */
903 int
904 ip_delmulti_v6(const in6_addr_t *v6group, ill_t *ill, int orig_ifindex,
905     zoneid_t zoneid, boolean_t no_ilg, boolean_t leaving)
906 {
907 	ipif_t	*ipif;
908 	ilm_t *ilm;
909 	int	ret;
910 
911 	ASSERT(IAM_WRITER_ILL(ill));
912 
913 	if (!IN6_IS_ADDR_MULTICAST(v6group) &&
914 	    !IN6_IS_ADDR_UNSPECIFIED(v6group))
915 		return (EINVAL);
916 
917 	/*
918 	 * Look for a match on the ill.
919 	 * (IPV6_LEAVE_GROUP specifies an ill using an ifindex).
920 	 *
921 	 * Similar to ip_addmulti_v6, we should always look using
922 	 * the orig_ifindex.
923 	 *
924 	 * 1) If orig_ifindex is different from ill's ifindex
925 	 *    we should have an ilm with orig_ifindex created in
926 	 *    ip_addmulti_v6. We should delete that here.
927 	 *
928 	 * 2) If orig_ifindex is same as ill's ifindex, we should
929 	 *    not delete the ilm that is temporarily here because of
930 	 *    a FAILOVER. Those ilms will have a ilm_orig_ifindex
931 	 *    different from ill's ifindex.
932 	 *
933 	 * Thus, always lookup using orig_ifindex.
934 	 */
935 	ilm = ilm_lookup_ill_index_v6(ill, v6group, orig_ifindex, zoneid);
936 	if (ilm == NULL)
937 		return (ENOENT);
938 
939 	ASSERT(ilm->ilm_ill == ill);
940 
941 	ipif = ill->ill_ipif;
942 
943 	/* Update counters */
944 	if (no_ilg)
945 		ilm->ilm_no_ilg_cnt--;
946 
947 	if (leaving)
948 		ilm->ilm_refcnt--;
949 
950 	if (ilm->ilm_refcnt > 0)
951 		return (ilm_update_del(ilm, B_TRUE));
952 
953 	if (IN6_IS_ADDR_UNSPECIFIED(v6group)) {
954 		ilm_delete(ilm);
955 		/*
956 		 * Check how many ipif's that have members in this group -
957 		 * if there are still some left then don't tell the driver
958 		 * to drop it.
959 		 */
960 		if (ilm_numentries_v6(ill, v6group) != 0)
961 			return (0);
962 
963 		/*
964 		 * If we never joined, then don't leave.  This can happen
965 		 * if we're in an IPMP group, since only one ill per IPMP
966 		 * group receives all multicast packets.
967 		 */
968 		if (!ill->ill_join_allmulti) {
969 			ASSERT(ill->ill_group != NULL);
970 			return (0);
971 		}
972 
973 		ret = ip_leave_allmulti(ipif);
974 		if (ill->ill_group != NULL)
975 			(void) ill_nominate_mcast_rcv(ill->ill_group);
976 		return (ret);
977 	}
978 
979 	if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0)
980 		mld_leavegroup(ilm);
981 
982 	ilm_delete(ilm);
983 	/*
984 	 * Check how many ipif's that have members in this group -
985 	 * if there are still some left then don't tell the driver
986 	 * to drop it.
987 	 */
988 	if (ilm_numentries_v6(ill, v6group) != 0)
989 		return (0);
990 	return (ip_ll_delmulti_v6(ipif, v6group));
991 }
992 
993 /*
994  * Send a multicast request to the driver for disabling multicast reception
995  * for v6groupp address. The caller has already checked whether it is
996  * appropriate to send one or not.
997  */
998 int
999 ip_ll_send_disabmulti_req(ill_t *ill, const in6_addr_t *v6groupp)
1000 {
1001 	mblk_t	*mp;
1002 	char	group_buf[INET6_ADDRSTRLEN];
1003 	uint32_t	addrlen, addroff;
1004 
1005 	ASSERT(IAM_WRITER_ILL(ill));
1006 	/*
1007 	 * Create a AR_ENTRY_SQUERY message with a dl_disabmulti_req tacked
1008 	 * on.
1009 	 */
1010 	mp = ill_create_dl(ill, DL_DISABMULTI_REQ,
1011 	    sizeof (dl_disabmulti_req_t), &addrlen, &addroff);
1012 
1013 	if (!mp)
1014 		return (ENOMEM);
1015 
1016 	if (IN6_IS_ADDR_V4MAPPED(v6groupp)) {
1017 		ipaddr_t v4group;
1018 
1019 		IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group);
1020 		/*
1021 		 * NOTE!!!
1022 		 * The "addroff" passed in here was calculated by
1023 		 * ill_create_dl(), and will be used by ill_create_squery()
1024 		 * to perform some twisted coding magic. It is the offset
1025 		 * into the dl_xxx_req of the hw addr. Here, it will be
1026 		 * added to b_wptr - b_rptr to create a magic number that
1027 		 * is not an offset into this mblk.
1028 		 *
1029 		 * Please see the comment in ip_ll_send)enabmulti_req()
1030 		 * for a complete explanation.
1031 		 *
1032 		 * Look in ar_entry_squery() in arp.c to see how this offset
1033 		 * is used.
1034 		 */
1035 		mp = ill_create_squery(ill, v4group, addrlen, addroff, mp);
1036 		if (!mp)
1037 			return (ENOMEM);
1038 		ip1dbg(("ip_ll_send_disabmulti_req: IPv4 putnext %s on %s\n",
1039 		    inet_ntop(AF_INET6, v6groupp, group_buf,
1040 		    sizeof (group_buf)),
1041 		    ill->ill_name));
1042 		putnext(ill->ill_rq, mp);
1043 	} else {
1044 		ip1dbg(("ip_ll_send_disabmulti_req: IPv6 ndp_squery_mp %s on"
1045 		    " %s\n",
1046 		    inet_ntop(AF_INET6, v6groupp, group_buf,
1047 		    sizeof (group_buf)),
1048 		    ill->ill_name));
1049 		return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp));
1050 	}
1051 	return (0);
1052 }
1053 
1054 /*
1055  * Send a multicast request to the driver for disabling multicast
1056  * membership for v6group if appropriate.
1057  */
1058 static int
1059 ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *v6group)
1060 {
1061 	ill_t	*ill = ipif->ipif_ill;
1062 
1063 	ASSERT(IAM_WRITER_IPIF(ipif));
1064 
1065 	if (ill->ill_net_type != IRE_IF_RESOLVER ||
1066 	    ipif->ipif_flags & IPIF_POINTOPOINT) {
1067 		return (0);	/* Must be IRE_IF_NORESOLVER */
1068 	}
1069 	if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) {
1070 		ip1dbg(("ip_ll_delmulti_v6: MULTI_BCAST\n"));
1071 		return (0);
1072 	}
1073 	if (ill->ill_ipif_up_count == 0) {
1074 		/*
1075 		 * Nobody there. All multicast addresses will be re-joined
1076 		 * when we get the DL_BIND_ACK bringing the interface up.
1077 		 */
1078 		ip1dbg(("ip_ll_delmulti_v6: nobody up\n"));
1079 		return (0);
1080 	}
1081 	return (ip_ll_send_disabmulti_req(ill, v6group));
1082 }
1083 
1084 /*
1085  * Make the driver pass up all multicast packets
1086  *
1087  * With ill groups, the caller makes sure that there is only
1088  * one ill joining the allmulti group.
1089  */
1090 int
1091 ip_join_allmulti(ipif_t *ipif)
1092 {
1093 	ill_t	*ill = ipif->ipif_ill;
1094 	mblk_t	*mp;
1095 	uint32_t	addrlen, addroff;
1096 
1097 	ASSERT(IAM_WRITER_IPIF(ipif));
1098 
1099 	if (ill->ill_ipif_up_count == 0) {
1100 		/*
1101 		 * Nobody there. All multicast addresses will be re-joined
1102 		 * when we get the DL_BIND_ACK bringing the interface up.
1103 		 */
1104 		return (0);
1105 	}
1106 
1107 	ASSERT(!ill->ill_join_allmulti);
1108 
1109 	/*
1110 	 * Create a DL_PROMISCON_REQ message and send it directly to
1111 	 * the DLPI provider.  We don't need to do this for certain
1112 	 * media types for which we never need to turn promiscuous
1113 	 * mode on.
1114 	 */
1115 	if ((ill->ill_net_type == IRE_IF_RESOLVER) &&
1116 	    !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) {
1117 		mp = ill_create_dl(ill, DL_PROMISCON_REQ,
1118 		    sizeof (dl_promiscon_req_t), &addrlen, &addroff);
1119 		if (mp == NULL)
1120 			return (ENOMEM);
1121 		putnext(ill->ill_wq, mp);
1122 	}
1123 
1124 	mutex_enter(&ill->ill_lock);
1125 	ill->ill_join_allmulti = B_TRUE;
1126 	mutex_exit(&ill->ill_lock);
1127 	return (0);
1128 }
1129 
1130 /*
1131  * Make the driver stop passing up all multicast packets
1132  *
1133  * With ill groups, we need to nominate some other ill as
1134  * this ipif->ipif_ill is leaving the group.
1135  */
1136 int
1137 ip_leave_allmulti(ipif_t *ipif)
1138 {
1139 	ill_t	*ill = ipif->ipif_ill;
1140 	mblk_t	*mp;
1141 	uint32_t	addrlen, addroff;
1142 
1143 	ASSERT(IAM_WRITER_IPIF(ipif));
1144 
1145 	if (ill->ill_ipif_up_count == 0) {
1146 		/*
1147 		 * Nobody there. All multicast addresses will be re-joined
1148 		 * when we get the DL_BIND_ACK bringing the interface up.
1149 		 */
1150 		return (0);
1151 	}
1152 
1153 	ASSERT(ill->ill_join_allmulti);
1154 
1155 	/*
1156 	 * Create a DL_PROMISCOFF_REQ message and send it directly to
1157 	 * the DLPI provider.  We don't need to do this for certain
1158 	 * media types for which we never need to turn promiscuous
1159 	 * mode on.
1160 	 */
1161 	if ((ill->ill_net_type == IRE_IF_RESOLVER) &&
1162 	    !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) {
1163 		mp = ill_create_dl(ill, DL_PROMISCOFF_REQ,
1164 		    sizeof (dl_promiscoff_req_t), &addrlen, &addroff);
1165 		if (mp == NULL)
1166 			return (ENOMEM);
1167 		putnext(ill->ill_wq, mp);
1168 	}
1169 
1170 	mutex_enter(&ill->ill_lock);
1171 	ill->ill_join_allmulti = B_FALSE;
1172 	mutex_exit(&ill->ill_lock);
1173 	return (0);
1174 }
1175 
1176 /*
1177  * Copy mp_orig and pass it in as a local message.
1178  */
1179 void
1180 ip_multicast_loopback(queue_t *q, ill_t *ill, mblk_t *mp_orig, int fanout_flags,
1181     zoneid_t zoneid)
1182 {
1183 	mblk_t	*mp;
1184 	mblk_t	*ipsec_mp;
1185 	ipha_t	*iph;
1186 
1187 	if (DB_TYPE(mp_orig) == M_DATA &&
1188 	    ((ipha_t *)mp_orig->b_rptr)->ipha_protocol == IPPROTO_UDP) {
1189 		uint_t hdrsz;
1190 
1191 		hdrsz = IPH_HDR_LENGTH((ipha_t *)mp_orig->b_rptr) +
1192 		    sizeof (udpha_t);
1193 		ASSERT(MBLKL(mp_orig) >= hdrsz);
1194 
1195 		if (((mp = allocb(hdrsz, BPRI_MED)) != NULL) &&
1196 		    (mp_orig = dupmsg(mp_orig)) != NULL) {
1197 			bcopy(mp_orig->b_rptr, mp->b_rptr, hdrsz);
1198 			mp->b_wptr += hdrsz;
1199 			mp->b_cont = mp_orig;
1200 			mp_orig->b_rptr += hdrsz;
1201 			if (MBLKL(mp_orig) == 0) {
1202 				mp->b_cont = mp_orig->b_cont;
1203 				mp_orig->b_cont = NULL;
1204 				freeb(mp_orig);
1205 			}
1206 		} else if (mp != NULL) {
1207 			freeb(mp);
1208 			mp = NULL;
1209 		}
1210 	} else {
1211 		mp = ip_copymsg(mp_orig);
1212 	}
1213 
1214 	if (mp == NULL)
1215 		return;
1216 	if (DB_TYPE(mp) == M_CTL) {
1217 		ipsec_mp = mp;
1218 		mp = mp->b_cont;
1219 	} else {
1220 		ipsec_mp = mp;
1221 	}
1222 
1223 	iph = (ipha_t *)mp->b_rptr;
1224 
1225 	DTRACE_PROBE4(ip4__loopback__out__start,
1226 	    ill_t *, NULL, ill_t *, ill,
1227 	    ipha_t *, iph, mblk_t *, ipsec_mp);
1228 
1229 	FW_HOOKS(ip4_loopback_out_event, ipv4firewall_loopback_out,
1230 	    MSG_FWCOOKED_OUT, NULL, ill, iph, ipsec_mp, mp);
1231 
1232 	DTRACE_PROBE1(ip4__loopback__out__end, mblk_t *, ipsec_mp);
1233 
1234 	if (ipsec_mp != NULL)
1235 		ip_wput_local(q, ill, iph, ipsec_mp, NULL,
1236 		    fanout_flags, zoneid);
1237 }
1238 
1239 static area_t	ip_aresq_template = {
1240 	AR_ENTRY_SQUERY,		/* cmd */
1241 	sizeof (area_t)+IP_ADDR_LEN,	/* name offset */
1242 	sizeof (area_t),	/* name len (filled by ill_arp_alloc) */
1243 	IP_ARP_PROTO_TYPE,		/* protocol, from arps perspective */
1244 	sizeof (area_t),			/* proto addr offset */
1245 	IP_ADDR_LEN,			/* proto addr_length */
1246 	0,				/* proto mask offset */
1247 	/* Rest is initialized when used */
1248 	0,				/* flags */
1249 	0,				/* hw addr offset */
1250 	0,				/* hw addr length */
1251 };
1252 
1253 static mblk_t *
1254 ill_create_squery(ill_t *ill, ipaddr_t ipaddr, uint32_t addrlen,
1255     uint32_t addroff, mblk_t *mp_tail)
1256 {
1257 	mblk_t	*mp;
1258 	area_t	*area;
1259 
1260 	mp = ill_arp_alloc(ill, (uchar_t *)&ip_aresq_template,
1261 				(caddr_t)&ipaddr);
1262 	if (!mp) {
1263 		freemsg(mp_tail);
1264 		return (NULL);
1265 	}
1266 	area = (area_t *)mp->b_rptr;
1267 	area->area_hw_addr_length = addrlen;
1268 	area->area_hw_addr_offset = mp->b_wptr - mp->b_rptr + addroff;
1269 	/*
1270 	 * NOTE!
1271 	 *
1272 	 * The area_hw_addr_offset, as can be seen, does not hold the
1273 	 * actual hardware address offset. Rather, it holds the offset
1274 	 * to the hw addr in the dl_xxx_req in mp_tail, modified by
1275 	 * adding (mp->b_wptr - mp->b_rptr). This allows the function
1276 	 * mi_offset_paramc() to find the hardware address in the
1277 	 * *second* mblk (dl_xxx_req), not this mblk.
1278 	 *
1279 	 * Using mi_offset_paramc() is thus the *only* way to access
1280 	 * the dl_xxx_hw address.
1281 	 *
1282 	 * The squery hw address should *not* be accessed.
1283 	 *
1284 	 * See ar_entry_squery() in arp.c for an example of how all this works.
1285 	 */
1286 
1287 	mp->b_cont = mp_tail;
1288 	return (mp);
1289 }
1290 
1291 /*
1292  * Create a dlpi message with room for phys+sap. When we come back in
1293  * ip_wput_ctl() we will strip the sap for those primitives which
1294  * only need a physical address.
1295  */
1296 static mblk_t *
1297 ill_create_dl(ill_t *ill, uint32_t dl_primitive, uint32_t length,
1298     uint32_t *addr_lenp, uint32_t *addr_offp)
1299 {
1300 	mblk_t	*mp;
1301 	uint32_t	hw_addr_length;
1302 	char		*cp;
1303 	uint32_t	offset;
1304 	uint32_t 	size;
1305 
1306 	*addr_lenp = *addr_offp = 0;
1307 
1308 	hw_addr_length = ill->ill_phys_addr_length;
1309 	if (!hw_addr_length) {
1310 		ip0dbg(("ip_create_dl: hw addr length = 0\n"));
1311 		return (NULL);
1312 	}
1313 
1314 	size = length;
1315 	switch (dl_primitive) {
1316 	case DL_ENABMULTI_REQ:
1317 	case DL_DISABMULTI_REQ:
1318 		size += hw_addr_length;
1319 		break;
1320 	case DL_PROMISCON_REQ:
1321 	case DL_PROMISCOFF_REQ:
1322 		break;
1323 	default:
1324 		return (NULL);
1325 	}
1326 	mp = allocb(size, BPRI_HI);
1327 	if (!mp)
1328 		return (NULL);
1329 	mp->b_wptr += size;
1330 	mp->b_datap->db_type = M_PROTO;
1331 
1332 	cp = (char *)mp->b_rptr;
1333 	offset = length;
1334 
1335 	switch (dl_primitive) {
1336 	case DL_ENABMULTI_REQ: {
1337 		dl_enabmulti_req_t *dl = (dl_enabmulti_req_t *)cp;
1338 
1339 		dl->dl_primitive = dl_primitive;
1340 		dl->dl_addr_offset = offset;
1341 		*addr_lenp = dl->dl_addr_length = hw_addr_length;
1342 		*addr_offp = offset;
1343 		break;
1344 	}
1345 	case DL_DISABMULTI_REQ: {
1346 		dl_disabmulti_req_t *dl = (dl_disabmulti_req_t *)cp;
1347 
1348 		dl->dl_primitive = dl_primitive;
1349 		dl->dl_addr_offset = offset;
1350 		*addr_lenp = dl->dl_addr_length = hw_addr_length;
1351 		*addr_offp = offset;
1352 		break;
1353 	}
1354 	case DL_PROMISCON_REQ:
1355 	case DL_PROMISCOFF_REQ: {
1356 		dl_promiscon_req_t *dl = (dl_promiscon_req_t *)cp;
1357 
1358 		dl->dl_primitive = dl_primitive;
1359 		dl->dl_level = DL_PROMISC_MULTI;
1360 		break;
1361 	}
1362 	}
1363 	ip1dbg(("ill_create_dl: addr_len %d, addr_off %d\n",
1364 		*addr_lenp, *addr_offp));
1365 	return (mp);
1366 }
1367 
1368 void
1369 ip_wput_ctl(queue_t *q, mblk_t *mp_orig)
1370 {
1371 	ill_t	*ill = (ill_t *)q->q_ptr;
1372 	mblk_t	*mp = mp_orig;
1373 	area_t	*area;
1374 
1375 	/* Check that we have a AR_ENTRY_SQUERY with a tacked on mblk */
1376 	if ((mp->b_wptr - mp->b_rptr) < sizeof (area_t) ||
1377 	    mp->b_cont == NULL) {
1378 		putnext(q, mp);
1379 		return;
1380 	}
1381 	area = (area_t *)mp->b_rptr;
1382 	if (area->area_cmd != AR_ENTRY_SQUERY) {
1383 		putnext(q, mp);
1384 		return;
1385 	}
1386 	mp = mp->b_cont;
1387 	/*
1388 	 * Update dl_addr_length and dl_addr_offset for primitives that
1389 	 * have physical addresses as opposed to full saps
1390 	 */
1391 	switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) {
1392 	case DL_ENABMULTI_REQ:
1393 		/* Track the state if this is the first enabmulti */
1394 		if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN)
1395 			ill->ill_dlpi_multicast_state = IDS_INPROGRESS;
1396 		ip1dbg(("ip_wput_ctl: ENABMULTI\n"));
1397 		break;
1398 	case DL_DISABMULTI_REQ:
1399 		ip1dbg(("ip_wput_ctl: DISABMULTI\n"));
1400 		break;
1401 	default:
1402 		ip1dbg(("ip_wput_ctl: default\n"));
1403 		break;
1404 	}
1405 	freeb(mp_orig);
1406 	putnext(q, mp);
1407 }
1408 
1409 /*
1410  * Rejoin any groups which have been explicitly joined by the application (we
1411  * left all explicitly joined groups as part of ill_leave_multicast() prior to
1412  * bringing the interface down).  Note that because groups can be joined and
1413  * left while an interface is down, this may not be the same set of groups
1414  * that we left in ill_leave_multicast().
1415  */
1416 void
1417 ill_recover_multicast(ill_t *ill)
1418 {
1419 	ilm_t	*ilm;
1420 	char    addrbuf[INET6_ADDRSTRLEN];
1421 
1422 	ASSERT(IAM_WRITER_ILL(ill));
1423 
1424 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1425 		/*
1426 		 * Check how many ipif's that have members in this group -
1427 		 * if more then one we make sure that this entry is first
1428 		 * in the list.
1429 		 */
1430 		if (ilm_numentries_v6(ill, &ilm->ilm_v6addr) > 1 &&
1431 		    ilm_lookup_ill_v6(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm)
1432 			continue;
1433 		ip1dbg(("ill_recover_multicast: %s\n",
1434 		    inet_ntop(AF_INET6, &ilm->ilm_v6addr, addrbuf,
1435 		    sizeof (addrbuf))));
1436 		if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
1437 			if (ill->ill_group == NULL) {
1438 				(void) ip_join_allmulti(ill->ill_ipif);
1439 			} else {
1440 				/*
1441 				 * We don't want to join on this ill,
1442 				 * if somebody else in the group has
1443 				 * already been nominated.
1444 				 */
1445 				(void) ill_nominate_mcast_rcv(ill->ill_group);
1446 			}
1447 		} else {
1448 			(void) ip_ll_addmulti_v6(ill->ill_ipif,
1449 			    &ilm->ilm_v6addr);
1450 		}
1451 	}
1452 }
1453 
1454 /*
1455  * The opposite of ill_recover_multicast() -- leaves all multicast groups
1456  * that were explicitly joined.  Note that both these functions could be
1457  * disposed of if we enhanced ARP to allow us to handle DL_DISABMULTI_REQ
1458  * and DL_ENABMULTI_REQ messages when an interface is down.
1459  */
1460 void
1461 ill_leave_multicast(ill_t *ill)
1462 {
1463 	ilm_t	*ilm;
1464 	char    addrbuf[INET6_ADDRSTRLEN];
1465 
1466 	ASSERT(IAM_WRITER_ILL(ill));
1467 
1468 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1469 		/*
1470 		 * Check how many ipif's that have members in this group -
1471 		 * if more then one we make sure that this entry is first
1472 		 * in the list.
1473 		 */
1474 		if (ilm_numentries_v6(ill, &ilm->ilm_v6addr) > 1 &&
1475 		    ilm_lookup_ill_v6(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm)
1476 			continue;
1477 		ip1dbg(("ill_leave_multicast: %s\n",
1478 		    inet_ntop(AF_INET6, &ilm->ilm_v6addr, addrbuf,
1479 		    sizeof (addrbuf))));
1480 		if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
1481 			(void) ip_leave_allmulti(ill->ill_ipif);
1482 			/*
1483 			 * If we were part of an IPMP group, then
1484 			 * ill_handoff_responsibility() has already
1485 			 * nominated a new member (so we don't).
1486 			 */
1487 			ASSERT(ill->ill_group == NULL);
1488 		} else {
1489 			(void) ip_ll_send_disabmulti_req(ill, &ilm->ilm_v6addr);
1490 		}
1491 	}
1492 }
1493 
1494 /*
1495  * Find an ilm for matching the ill and which has the source in its
1496  * INCLUDE list or does not have it in its EXCLUDE list
1497  */
1498 ilm_t *
1499 ilm_lookup_ill_withsrc(ill_t *ill, ipaddr_t group, ipaddr_t src)
1500 {
1501 	in6_addr_t	v6group, v6src;
1502 
1503 	/*
1504 	 * INADDR_ANY is represented as the IPv6 unspecified addr.
1505 	 */
1506 	if (group == INADDR_ANY)
1507 		v6group = ipv6_all_zeros;
1508 	else
1509 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1510 	IN6_IPADDR_TO_V4MAPPED(src, &v6src);
1511 
1512 	return (ilm_lookup_ill_withsrc_v6(ill, &v6group, &v6src));
1513 }
1514 
1515 ilm_t *
1516 ilm_lookup_ill_withsrc_v6(ill_t *ill, const in6_addr_t *v6group,
1517     const in6_addr_t *v6src)
1518 {
1519 	ilm_t	*ilm;
1520 	boolean_t isinlist;
1521 	int	i, numsrc;
1522 
1523 	/*
1524 	 * If the source is in any ilm's INCLUDE list, or if
1525 	 * it is not in any ilm's EXCLUDE list, we have a hit.
1526 	 */
1527 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1528 		if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group)) {
1529 
1530 			isinlist = B_FALSE;
1531 			numsrc = (ilm->ilm_filter == NULL) ?
1532 			    0 : ilm->ilm_filter->sl_numsrc;
1533 			for (i = 0; i < numsrc; i++) {
1534 				if (IN6_ARE_ADDR_EQUAL(v6src,
1535 				    &ilm->ilm_filter->sl_addr[i])) {
1536 					isinlist = B_TRUE;
1537 					break;
1538 				}
1539 			}
1540 			if ((isinlist && ilm->ilm_fmode == MODE_IS_INCLUDE) ||
1541 			    (!isinlist && ilm->ilm_fmode == MODE_IS_EXCLUDE))
1542 				return (ilm);
1543 			else
1544 				return (NULL);
1545 		}
1546 	}
1547 	return (NULL);
1548 }
1549 
1550 
1551 /* Find an ilm for matching the ill */
1552 ilm_t *
1553 ilm_lookup_ill(ill_t *ill, ipaddr_t group, zoneid_t zoneid)
1554 {
1555 	in6_addr_t	v6group;
1556 
1557 	ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) ||
1558 	    IAM_WRITER_ILL(ill));
1559 	/*
1560 	 * INADDR_ANY is represented as the IPv6 unspecifed addr.
1561 	 */
1562 	if (group == INADDR_ANY)
1563 		v6group = ipv6_all_zeros;
1564 	else
1565 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1566 
1567 	return (ilm_lookup_ill_v6(ill, &v6group, zoneid));
1568 }
1569 
1570 /*
1571  * Find an ilm for matching the ill. All the ilm lookup functions
1572  * ignore ILM_DELETED ilms. These have been logically deleted, and
1573  * igmp and linklayer disable multicast have been done. Only mi_free
1574  * yet to be done. Still there in the list due to ilm_walkers. The
1575  * last walker will release it.
1576  */
1577 ilm_t *
1578 ilm_lookup_ill_v6(ill_t *ill, const in6_addr_t *v6group, zoneid_t zoneid)
1579 {
1580 	ilm_t	*ilm;
1581 
1582 	ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) ||
1583 	    IAM_WRITER_ILL(ill));
1584 
1585 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1586 		if (ilm->ilm_flags & ILM_DELETED)
1587 			continue;
1588 		if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) &&
1589 		    (zoneid == ALL_ZONES || zoneid == ilm->ilm_zoneid))
1590 			return (ilm);
1591 	}
1592 	return (NULL);
1593 }
1594 
1595 ilm_t *
1596 ilm_lookup_ill_index_v6(ill_t *ill, const in6_addr_t *v6group, int index,
1597     zoneid_t zoneid)
1598 {
1599 	ilm_t *ilm;
1600 
1601 	ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) ||
1602 	    IAM_WRITER_ILL(ill));
1603 
1604 	for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
1605 		if (ilm->ilm_flags & ILM_DELETED)
1606 			continue;
1607 		if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) &&
1608 		    (zoneid == ALL_ZONES || zoneid == ilm->ilm_zoneid) &&
1609 		    ilm->ilm_orig_ifindex == index) {
1610 			return (ilm);
1611 		}
1612 	}
1613 	return (NULL);
1614 }
1615 
1616 ilm_t *
1617 ilm_lookup_ill_index_v4(ill_t *ill, ipaddr_t group, int index, zoneid_t zoneid)
1618 {
1619 	in6_addr_t	v6group;
1620 
1621 	ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) ||
1622 	    IAM_WRITER_ILL(ill));
1623 	/*
1624 	 * INADDR_ANY is represented as the IPv6 unspecifed addr.
1625 	 */
1626 	if (group == INADDR_ANY)
1627 		v6group = ipv6_all_zeros;
1628 	else
1629 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1630 
1631 	return (ilm_lookup_ill_index_v6(ill, &v6group, index, zoneid));
1632 }
1633 
1634 /*
1635  * Found an ilm for the ipif. Only needed for IPv4 which does
1636  * ipif specific socket options.
1637  */
1638 ilm_t *
1639 ilm_lookup_ipif(ipif_t *ipif, ipaddr_t group)
1640 {
1641 	ill_t	*ill = ipif->ipif_ill;
1642 	ilm_t	*ilm;
1643 	in6_addr_t	v6group;
1644 
1645 	ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) ||
1646 	    IAM_WRITER_ILL(ill));
1647 
1648 	/*
1649 	 * INADDR_ANY is represented as the IPv6 unspecifed addr.
1650 	 */
1651 	if (group == INADDR_ANY)
1652 		v6group = ipv6_all_zeros;
1653 	else
1654 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1655 
1656 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1657 		if (ilm->ilm_flags & ILM_DELETED)
1658 			continue;
1659 		if (ilm->ilm_ipif == ipif &&
1660 		    IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, &v6group))
1661 			return (ilm);
1662 	}
1663 	return (NULL);
1664 }
1665 
1666 /*
1667  * How many members on this ill?
1668  */
1669 int
1670 ilm_numentries_v6(ill_t *ill, const in6_addr_t *v6group)
1671 {
1672 	ilm_t	*ilm;
1673 	int i = 0;
1674 
1675 	ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) ||
1676 	    IAM_WRITER_ILL(ill));
1677 
1678 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1679 		if (ilm->ilm_flags & ILM_DELETED)
1680 			continue;
1681 		if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group)) {
1682 			i++;
1683 		}
1684 	}
1685 	return (i);
1686 }
1687 
1688 /* Caller guarantees that the group is not already on the list */
1689 static ilm_t *
1690 ilm_add_v6(ipif_t *ipif, const in6_addr_t *v6group, ilg_stat_t ilgstat,
1691     mcast_record_t ilg_fmode, slist_t *ilg_flist, int orig_ifindex,
1692     zoneid_t zoneid)
1693 {
1694 	ill_t	*ill = ipif->ipif_ill;
1695 	ilm_t	*ilm;
1696 	ilm_t	*ilm_cur;
1697 	ilm_t	**ilm_ptpn;
1698 
1699 	ASSERT(IAM_WRITER_IPIF(ipif));
1700 
1701 	ilm = GETSTRUCT(ilm_t, 1);
1702 	if (ilm == NULL)
1703 		return (NULL);
1704 	if (ilgstat != ILGSTAT_NONE && !SLIST_IS_EMPTY(ilg_flist)) {
1705 		ilm->ilm_filter = l_alloc();
1706 		if (ilm->ilm_filter == NULL) {
1707 			mi_free(ilm);
1708 			return (NULL);
1709 		}
1710 	}
1711 	ilm->ilm_v6addr = *v6group;
1712 	ilm->ilm_refcnt = 1;
1713 	ilm->ilm_zoneid = zoneid;
1714 	ilm->ilm_timer = INFINITY;
1715 	ilm->ilm_rtx.rtx_timer = INFINITY;
1716 
1717 	/*
1718 	 * IPv4 Multicast groups are joined using ipif.
1719 	 * IPv6 Multicast groups are joined using ill.
1720 	 */
1721 	if (ill->ill_isv6) {
1722 		ilm->ilm_ill = ill;
1723 		ilm->ilm_ipif = NULL;
1724 	} else {
1725 		ASSERT(ilm->ilm_zoneid == ipif->ipif_zoneid);
1726 		ilm->ilm_ipif = ipif;
1727 		ilm->ilm_ill = NULL;
1728 	}
1729 	/*
1730 	 * After this if ilm moves to a new ill, we don't change
1731 	 * the ilm_orig_ifindex. Thus, if ill_index != ilm_orig_ifindex,
1732 	 * it has been moved. Indexes don't match even when the application
1733 	 * wants to join on a FAILED/INACTIVE interface because we choose
1734 	 * a new interface to join in. This is considered as an implicit
1735 	 * move.
1736 	 */
1737 	ilm->ilm_orig_ifindex = orig_ifindex;
1738 
1739 	ASSERT(!(ipif->ipif_state_flags & IPIF_CONDEMNED));
1740 	ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED));
1741 
1742 	/*
1743 	 * Grab lock to give consistent view to readers
1744 	 */
1745 	mutex_enter(&ill->ill_lock);
1746 	/*
1747 	 * All ilms in the same zone are contiguous in the ill_ilm list.
1748 	 * The loops in ip_proto_input() and ip_wput_local() use this to avoid
1749 	 * sending duplicates up when two applications in the same zone join the
1750 	 * same group on different logical interfaces.
1751 	 */
1752 	ilm_cur = ill->ill_ilm;
1753 	ilm_ptpn = &ill->ill_ilm;
1754 	while (ilm_cur != NULL && ilm_cur->ilm_zoneid != ilm->ilm_zoneid) {
1755 		ilm_ptpn = &ilm_cur->ilm_next;
1756 		ilm_cur = ilm_cur->ilm_next;
1757 	}
1758 	ilm->ilm_next = ilm_cur;
1759 	*ilm_ptpn = ilm;
1760 
1761 	/*
1762 	 * If we have an associated ilg, use its filter state; if not,
1763 	 * default to (EXCLUDE, NULL) and set no_ilg_cnt to track this.
1764 	 */
1765 	if (ilgstat != ILGSTAT_NONE) {
1766 		if (!SLIST_IS_EMPTY(ilg_flist))
1767 			l_copy(ilg_flist, ilm->ilm_filter);
1768 		ilm->ilm_fmode = ilg_fmode;
1769 	} else {
1770 		ilm->ilm_no_ilg_cnt = 1;
1771 		ilm->ilm_fmode = MODE_IS_EXCLUDE;
1772 	}
1773 
1774 	mutex_exit(&ill->ill_lock);
1775 	return (ilm);
1776 }
1777 
1778 void
1779 ilm_walker_cleanup(ill_t *ill)
1780 {
1781 	ilm_t	**ilmp;
1782 	ilm_t	*ilm;
1783 
1784 	ASSERT(MUTEX_HELD(&ill->ill_lock));
1785 	ASSERT(ill->ill_ilm_walker_cnt == 0);
1786 
1787 	ilmp = &ill->ill_ilm;
1788 	while (*ilmp != NULL) {
1789 		if ((*ilmp)->ilm_flags & ILM_DELETED) {
1790 			ilm = *ilmp;
1791 			*ilmp = ilm->ilm_next;
1792 			FREE_SLIST(ilm->ilm_filter);
1793 			FREE_SLIST(ilm->ilm_pendsrcs);
1794 			FREE_SLIST(ilm->ilm_rtx.rtx_allow);
1795 			FREE_SLIST(ilm->ilm_rtx.rtx_block);
1796 			mi_free((char *)ilm);
1797 		} else {
1798 			ilmp = &(*ilmp)->ilm_next;
1799 		}
1800 	}
1801 	ill->ill_ilm_cleanup_reqd = 0;
1802 }
1803 
1804 /*
1805  * Unlink ilm and free it.
1806  */
1807 static void
1808 ilm_delete(ilm_t *ilm)
1809 {
1810 	ill_t	*ill;
1811 	ilm_t	**ilmp;
1812 
1813 	if (ilm->ilm_ipif != NULL) {
1814 		ASSERT(IAM_WRITER_IPIF(ilm->ilm_ipif));
1815 		ASSERT(ilm->ilm_ill == NULL);
1816 		ill = ilm->ilm_ipif->ipif_ill;
1817 		ASSERT(!ill->ill_isv6);
1818 	} else {
1819 		ASSERT(IAM_WRITER_ILL(ilm->ilm_ill));
1820 		ASSERT(ilm->ilm_ipif == NULL);
1821 		ill = ilm->ilm_ill;
1822 		ASSERT(ill->ill_isv6);
1823 	}
1824 	/*
1825 	 * Delete under lock protection so that readers don't stumble
1826 	 * on bad ilm_next
1827 	 */
1828 	mutex_enter(&ill->ill_lock);
1829 	if (ill->ill_ilm_walker_cnt != 0) {
1830 		ilm->ilm_flags |= ILM_DELETED;
1831 		ill->ill_ilm_cleanup_reqd = 1;
1832 		mutex_exit(&ill->ill_lock);
1833 		return;
1834 	}
1835 
1836 	for (ilmp = &ill->ill_ilm; *ilmp != ilm; ilmp = &(*ilmp)->ilm_next)
1837 				;
1838 	*ilmp = ilm->ilm_next;
1839 	mutex_exit(&ill->ill_lock);
1840 
1841 	FREE_SLIST(ilm->ilm_filter);
1842 	FREE_SLIST(ilm->ilm_pendsrcs);
1843 	FREE_SLIST(ilm->ilm_rtx.rtx_allow);
1844 	FREE_SLIST(ilm->ilm_rtx.rtx_block);
1845 	mi_free((char *)ilm);
1846 }
1847 
1848 /* Free all ilms for this ipif */
1849 void
1850 ilm_free(ipif_t *ipif)
1851 {
1852 	ill_t	*ill = ipif->ipif_ill;
1853 	ilm_t	*ilm;
1854 	ilm_t	 *next_ilm;
1855 
1856 	ASSERT(IAM_WRITER_IPIF(ipif));
1857 
1858 	for (ilm = ill->ill_ilm; ilm; ilm = next_ilm) {
1859 		next_ilm = ilm->ilm_next;
1860 		if (ilm->ilm_ipif == ipif)
1861 			ilm_delete(ilm);
1862 	}
1863 }
1864 
1865 /*
1866  * Looks up the appropriate ipif given a v4 multicast group and interface
1867  * address.  On success, returns 0, with *ipifpp pointing to the found
1868  * struct.  On failure, returns an errno and *ipifpp is NULL.
1869  */
1870 int
1871 ip_opt_check(conn_t *connp, ipaddr_t group, ipaddr_t src, ipaddr_t ifaddr,
1872     uint_t *ifindexp, mblk_t *first_mp, ipsq_func_t func, ipif_t **ipifpp)
1873 {
1874 	ipif_t *ipif;
1875 	int err = 0;
1876 	zoneid_t zoneid;
1877 
1878 	if (!CLASSD(group) || CLASSD(src)) {
1879 		return (EINVAL);
1880 	}
1881 	*ipifpp = NULL;
1882 
1883 	zoneid = IPCL_ZONEID(connp);
1884 
1885 	ASSERT(!(ifaddr != INADDR_ANY && ifindexp != NULL && *ifindexp != 0));
1886 	if (ifaddr != INADDR_ANY) {
1887 		ipif = ipif_lookup_addr(ifaddr, NULL, zoneid,
1888 		    CONNP_TO_WQ(connp), first_mp, func, &err);
1889 		if (err != 0 && err != EINPROGRESS)
1890 			err = EADDRNOTAVAIL;
1891 	} else if (ifindexp != NULL && *ifindexp != 0) {
1892 		ipif = ipif_lookup_on_ifindex(*ifindexp, B_FALSE, zoneid,
1893 		    CONNP_TO_WQ(connp), first_mp, func, &err);
1894 	} else {
1895 		ipif = ipif_lookup_group(group, zoneid);
1896 		if (ipif == NULL)
1897 			return (EADDRNOTAVAIL);
1898 	}
1899 	if (ipif == NULL)
1900 		return (err);
1901 
1902 	*ipifpp = ipif;
1903 	return (0);
1904 }
1905 
1906 /*
1907  * Looks up the appropriate ill (or ipif if v4mapped) given an interface
1908  * index and IPv6 multicast group.  On success, returns 0, with *illpp (or
1909  * *ipifpp if v4mapped) pointing to the found struct.  On failure, returns
1910  * an errno and *illpp and *ipifpp are undefined.
1911  */
1912 int
1913 ip_opt_check_v6(conn_t *connp, const in6_addr_t *v6group, ipaddr_t *v4group,
1914     const in6_addr_t *v6src, ipaddr_t *v4src, boolean_t *isv6, int ifindex,
1915     mblk_t *first_mp, ipsq_func_t func, ill_t **illpp, ipif_t **ipifpp)
1916 {
1917 	boolean_t src_unspec;
1918 	ill_t *ill = NULL;
1919 	ipif_t *ipif = NULL;
1920 	int err;
1921 	zoneid_t zoneid = connp->conn_zoneid;
1922 	queue_t *wq = CONNP_TO_WQ(connp);
1923 
1924 	src_unspec = IN6_IS_ADDR_UNSPECIFIED(v6src);
1925 
1926 	if (IN6_IS_ADDR_V4MAPPED(v6group)) {
1927 		if (!IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec)
1928 			return (EINVAL);
1929 		IN6_V4MAPPED_TO_IPADDR(v6group, *v4group);
1930 		if (src_unspec) {
1931 			*v4src = INADDR_ANY;
1932 		} else {
1933 			IN6_V4MAPPED_TO_IPADDR(v6src, *v4src);
1934 		}
1935 		if (!CLASSD(*v4group) || CLASSD(*v4src))
1936 			return (EINVAL);
1937 		*ipifpp = NULL;
1938 		*isv6 = B_FALSE;
1939 	} else {
1940 		if (IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec)
1941 			return (EINVAL);
1942 		if (!IN6_IS_ADDR_MULTICAST(v6group) ||
1943 		    IN6_IS_ADDR_MULTICAST(v6src)) {
1944 			return (EINVAL);
1945 		}
1946 		*illpp = NULL;
1947 		*isv6 = B_TRUE;
1948 	}
1949 
1950 	if (ifindex == 0) {
1951 		if (*isv6)
1952 			ill = ill_lookup_group_v6(v6group, zoneid);
1953 		else
1954 			ipif = ipif_lookup_group(*v4group, zoneid);
1955 		if (ill == NULL && ipif == NULL)
1956 			return (EADDRNOTAVAIL);
1957 	} else {
1958 		if (*isv6) {
1959 			ill = ill_lookup_on_ifindex(ifindex, B_TRUE,
1960 			    wq, first_mp, func, &err);
1961 			if (ill != NULL &&
1962 			    !ipif_lookup_zoneid(ill, zoneid, 0, NULL)) {
1963 				ill_refrele(ill);
1964 				ill = NULL;
1965 				err = EADDRNOTAVAIL;
1966 			}
1967 		} else {
1968 			ipif = ipif_lookup_on_ifindex(ifindex, B_FALSE,
1969 			    zoneid, wq, first_mp, func, &err);
1970 		}
1971 		if (ill == NULL && ipif == NULL)
1972 			return (err);
1973 	}
1974 
1975 	*ipifpp = ipif;
1976 	*illpp = ill;
1977 	return (0);
1978 }
1979 
1980 static int
1981 ip_get_srcfilter(conn_t *connp, struct group_filter *gf,
1982     struct ip_msfilter *imsf, ipaddr_t grp, ipif_t *ipif, boolean_t isv4mapped)
1983 {
1984 	ilg_t *ilg;
1985 	int i, numsrc, fmode, outsrcs;
1986 	struct sockaddr_in *sin;
1987 	struct sockaddr_in6 *sin6;
1988 	struct in_addr *addrp;
1989 	slist_t *fp;
1990 	boolean_t is_v4only_api;
1991 
1992 	mutex_enter(&connp->conn_lock);
1993 
1994 	ilg = ilg_lookup_ipif(connp, grp, ipif);
1995 	if (ilg == NULL) {
1996 		mutex_exit(&connp->conn_lock);
1997 		return (EADDRNOTAVAIL);
1998 	}
1999 
2000 	if (gf == NULL) {
2001 		ASSERT(imsf != NULL);
2002 		ASSERT(!isv4mapped);
2003 		is_v4only_api = B_TRUE;
2004 		outsrcs = imsf->imsf_numsrc;
2005 	} else {
2006 		ASSERT(imsf == NULL);
2007 		is_v4only_api = B_FALSE;
2008 		outsrcs = gf->gf_numsrc;
2009 	}
2010 
2011 	/*
2012 	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
2013 	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
2014 	 * So we need to translate here.
2015 	 */
2016 	fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ?
2017 	    MCAST_INCLUDE : MCAST_EXCLUDE;
2018 	if ((fp = ilg->ilg_filter) == NULL) {
2019 		numsrc = 0;
2020 	} else {
2021 		for (i = 0; i < outsrcs; i++) {
2022 			if (i == fp->sl_numsrc)
2023 				break;
2024 			if (isv4mapped) {
2025 				sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i];
2026 				sin6->sin6_family = AF_INET6;
2027 				sin6->sin6_addr = fp->sl_addr[i];
2028 			} else {
2029 				if (is_v4only_api) {
2030 					addrp = &imsf->imsf_slist[i];
2031 				} else {
2032 					sin = (struct sockaddr_in *)
2033 					    &gf->gf_slist[i];
2034 					sin->sin_family = AF_INET;
2035 					addrp = &sin->sin_addr;
2036 				}
2037 				IN6_V4MAPPED_TO_INADDR(&fp->sl_addr[i], addrp);
2038 			}
2039 		}
2040 		numsrc = fp->sl_numsrc;
2041 	}
2042 
2043 	if (is_v4only_api) {
2044 		imsf->imsf_numsrc = numsrc;
2045 		imsf->imsf_fmode = fmode;
2046 	} else {
2047 		gf->gf_numsrc = numsrc;
2048 		gf->gf_fmode = fmode;
2049 	}
2050 
2051 	mutex_exit(&connp->conn_lock);
2052 
2053 	return (0);
2054 }
2055 
2056 static int
2057 ip_get_srcfilter_v6(conn_t *connp, struct group_filter *gf,
2058     const struct in6_addr *grp, ill_t *ill)
2059 {
2060 	ilg_t *ilg;
2061 	int i;
2062 	struct sockaddr_storage *sl;
2063 	struct sockaddr_in6 *sin6;
2064 	slist_t *fp;
2065 
2066 	mutex_enter(&connp->conn_lock);
2067 
2068 	ilg = ilg_lookup_ill_v6(connp, grp, ill);
2069 	if (ilg == NULL) {
2070 		mutex_exit(&connp->conn_lock);
2071 		return (EADDRNOTAVAIL);
2072 	}
2073 
2074 	/*
2075 	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
2076 	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
2077 	 * So we need to translate here.
2078 	 */
2079 	gf->gf_fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ?
2080 	    MCAST_INCLUDE : MCAST_EXCLUDE;
2081 	if ((fp = ilg->ilg_filter) == NULL) {
2082 		gf->gf_numsrc = 0;
2083 	} else {
2084 		for (i = 0, sl = gf->gf_slist; i < gf->gf_numsrc; i++, sl++) {
2085 			if (i == fp->sl_numsrc)
2086 				break;
2087 			sin6 = (struct sockaddr_in6 *)sl;
2088 			sin6->sin6_family = AF_INET6;
2089 			sin6->sin6_addr = fp->sl_addr[i];
2090 		}
2091 		gf->gf_numsrc = fp->sl_numsrc;
2092 	}
2093 
2094 	mutex_exit(&connp->conn_lock);
2095 
2096 	return (0);
2097 }
2098 
2099 static int
2100 ip_set_srcfilter(conn_t *connp, struct group_filter *gf,
2101     struct ip_msfilter *imsf, ipaddr_t grp, ipif_t *ipif, boolean_t isv4mapped)
2102 {
2103 	ilg_t *ilg;
2104 	int i, err, insrcs, infmode, new_fmode;
2105 	struct sockaddr_in *sin;
2106 	struct sockaddr_in6 *sin6;
2107 	struct in_addr *addrp;
2108 	slist_t *orig_filter = NULL;
2109 	slist_t *new_filter = NULL;
2110 	mcast_record_t orig_fmode;
2111 	boolean_t leave_grp, is_v4only_api;
2112 	ilg_stat_t ilgstat;
2113 
2114 	if (gf == NULL) {
2115 		ASSERT(imsf != NULL);
2116 		ASSERT(!isv4mapped);
2117 		is_v4only_api = B_TRUE;
2118 		insrcs = imsf->imsf_numsrc;
2119 		infmode = imsf->imsf_fmode;
2120 	} else {
2121 		ASSERT(imsf == NULL);
2122 		is_v4only_api = B_FALSE;
2123 		insrcs = gf->gf_numsrc;
2124 		infmode = gf->gf_fmode;
2125 	}
2126 
2127 	/* Make sure we can handle the source list */
2128 	if (insrcs > MAX_FILTER_SIZE)
2129 		return (ENOBUFS);
2130 
2131 	/*
2132 	 * setting the filter to (INCLUDE, NULL) is treated
2133 	 * as a request to leave the group.
2134 	 */
2135 	leave_grp = (infmode == MCAST_INCLUDE && insrcs == 0);
2136 
2137 	ASSERT(IAM_WRITER_IPIF(ipif));
2138 
2139 	mutex_enter(&connp->conn_lock);
2140 
2141 	ilg = ilg_lookup_ipif(connp, grp, ipif);
2142 	if (ilg == NULL) {
2143 		/*
2144 		 * if the request was actually to leave, and we
2145 		 * didn't find an ilg, there's nothing to do.
2146 		 */
2147 		if (!leave_grp)
2148 			ilg = conn_ilg_alloc(connp);
2149 		if (leave_grp || ilg == NULL) {
2150 			mutex_exit(&connp->conn_lock);
2151 			return (leave_grp ? 0 : ENOMEM);
2152 		}
2153 		ilgstat = ILGSTAT_NEW;
2154 		IN6_IPADDR_TO_V4MAPPED(grp, &ilg->ilg_v6group);
2155 		ilg->ilg_ipif = ipif;
2156 		ilg->ilg_ill = NULL;
2157 		ilg->ilg_orig_ifindex = 0;
2158 	} else if (leave_grp) {
2159 		ilg_delete(connp, ilg, NULL);
2160 		mutex_exit(&connp->conn_lock);
2161 		(void) ip_delmulti(grp, ipif, B_FALSE, B_TRUE);
2162 		return (0);
2163 	} else {
2164 		ilgstat = ILGSTAT_CHANGE;
2165 		/* Preserve existing state in case ip_addmulti() fails */
2166 		orig_fmode = ilg->ilg_fmode;
2167 		if (ilg->ilg_filter == NULL) {
2168 			orig_filter = NULL;
2169 		} else {
2170 			orig_filter = l_alloc_copy(ilg->ilg_filter);
2171 			if (orig_filter == NULL) {
2172 				mutex_exit(&connp->conn_lock);
2173 				return (ENOMEM);
2174 			}
2175 		}
2176 	}
2177 
2178 	/*
2179 	 * Alloc buffer to copy new state into (see below) before
2180 	 * we make any changes, so we can bail if it fails.
2181 	 */
2182 	if ((new_filter = l_alloc()) == NULL) {
2183 		mutex_exit(&connp->conn_lock);
2184 		err = ENOMEM;
2185 		goto free_and_exit;
2186 	}
2187 
2188 	if (insrcs == 0) {
2189 		CLEAR_SLIST(ilg->ilg_filter);
2190 	} else {
2191 		slist_t *fp;
2192 		if (ilg->ilg_filter == NULL) {
2193 			fp = l_alloc();
2194 			if (fp == NULL) {
2195 				if (ilgstat == ILGSTAT_NEW)
2196 					ilg_delete(connp, ilg, NULL);
2197 				mutex_exit(&connp->conn_lock);
2198 				err = ENOMEM;
2199 				goto free_and_exit;
2200 			}
2201 		} else {
2202 			fp = ilg->ilg_filter;
2203 		}
2204 		for (i = 0; i < insrcs; i++) {
2205 			if (isv4mapped) {
2206 				sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i];
2207 				fp->sl_addr[i] = sin6->sin6_addr;
2208 			} else {
2209 				if (is_v4only_api) {
2210 					addrp = &imsf->imsf_slist[i];
2211 				} else {
2212 					sin = (struct sockaddr_in *)
2213 					    &gf->gf_slist[i];
2214 					addrp = &sin->sin_addr;
2215 				}
2216 				IN6_INADDR_TO_V4MAPPED(addrp, &fp->sl_addr[i]);
2217 			}
2218 		}
2219 		fp->sl_numsrc = insrcs;
2220 		ilg->ilg_filter = fp;
2221 	}
2222 	/*
2223 	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
2224 	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
2225 	 * So we need to translate here.
2226 	 */
2227 	ilg->ilg_fmode = (infmode == MCAST_INCLUDE) ?
2228 		    MODE_IS_INCLUDE : MODE_IS_EXCLUDE;
2229 
2230 	/*
2231 	 * Save copy of ilg's filter state to pass to other functions,
2232 	 * so we can release conn_lock now.
2233 	 */
2234 	new_fmode = ilg->ilg_fmode;
2235 	l_copy(ilg->ilg_filter, new_filter);
2236 
2237 	mutex_exit(&connp->conn_lock);
2238 
2239 	err = ip_addmulti(grp, ipif, ilgstat, new_fmode, new_filter);
2240 	if (err != 0) {
2241 		/*
2242 		 * Restore the original filter state, or delete the
2243 		 * newly-created ilg.  We need to look up the ilg
2244 		 * again, though, since we've not been holding the
2245 		 * conn_lock.
2246 		 */
2247 		mutex_enter(&connp->conn_lock);
2248 		ilg = ilg_lookup_ipif(connp, grp, ipif);
2249 		ASSERT(ilg != NULL);
2250 		if (ilgstat == ILGSTAT_NEW) {
2251 			ilg_delete(connp, ilg, NULL);
2252 		} else {
2253 			ilg->ilg_fmode = orig_fmode;
2254 			if (SLIST_IS_EMPTY(orig_filter)) {
2255 				CLEAR_SLIST(ilg->ilg_filter);
2256 			} else {
2257 				/*
2258 				 * We didn't free the filter, even if we
2259 				 * were trying to make the source list empty;
2260 				 * so if orig_filter isn't empty, the ilg
2261 				 * must still have a filter alloc'd.
2262 				 */
2263 				l_copy(orig_filter, ilg->ilg_filter);
2264 			}
2265 		}
2266 		mutex_exit(&connp->conn_lock);
2267 	}
2268 
2269 free_and_exit:
2270 	l_free(orig_filter);
2271 	l_free(new_filter);
2272 
2273 	return (err);
2274 }
2275 
2276 static int
2277 ip_set_srcfilter_v6(conn_t *connp, struct group_filter *gf,
2278     const struct in6_addr *grp, ill_t *ill)
2279 {
2280 	ilg_t *ilg;
2281 	int i, orig_ifindex, orig_fmode, new_fmode, err;
2282 	slist_t *orig_filter = NULL;
2283 	slist_t *new_filter = NULL;
2284 	struct sockaddr_storage *sl;
2285 	struct sockaddr_in6 *sin6;
2286 	boolean_t leave_grp;
2287 	ilg_stat_t ilgstat;
2288 
2289 	/* Make sure we can handle the source list */
2290 	if (gf->gf_numsrc > MAX_FILTER_SIZE)
2291 		return (ENOBUFS);
2292 
2293 	/*
2294 	 * setting the filter to (INCLUDE, NULL) is treated
2295 	 * as a request to leave the group.
2296 	 */
2297 	leave_grp = (gf->gf_fmode == MCAST_INCLUDE && gf->gf_numsrc == 0);
2298 
2299 	ASSERT(IAM_WRITER_ILL(ill));
2300 
2301 	/*
2302 	 * Use the ifindex to do the lookup.  We can't use the ill
2303 	 * directly because ilg_ill could point to a different ill
2304 	 * if things have moved.
2305 	 */
2306 	orig_ifindex = ill->ill_phyint->phyint_ifindex;
2307 
2308 	mutex_enter(&connp->conn_lock);
2309 	ilg = ilg_lookup_ill_index_v6(connp, grp, orig_ifindex);
2310 	if (ilg == NULL) {
2311 		/*
2312 		 * if the request was actually to leave, and we
2313 		 * didn't find an ilg, there's nothing to do.
2314 		 */
2315 		if (!leave_grp)
2316 			ilg = conn_ilg_alloc(connp);
2317 		if (leave_grp || ilg == NULL) {
2318 			mutex_exit(&connp->conn_lock);
2319 			return (leave_grp ? 0 : ENOMEM);
2320 		}
2321 		ilgstat = ILGSTAT_NEW;
2322 		ilg->ilg_v6group = *grp;
2323 		ilg->ilg_ipif = NULL;
2324 		/*
2325 		 * Choose our target ill to join on. This might be
2326 		 * different from the ill we've been given if it's
2327 		 * currently down and part of a group.
2328 		 *
2329 		 * new ill is not refheld; we are writer.
2330 		 */
2331 		ill = ip_choose_multi_ill(ill, grp);
2332 		ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED));
2333 		ilg->ilg_ill = ill;
2334 		/*
2335 		 * Remember the index that we joined on, so that we can
2336 		 * successfully delete them later on and also search for
2337 		 * duplicates if the application wants to join again.
2338 		 */
2339 		ilg->ilg_orig_ifindex = orig_ifindex;
2340 	} else if (leave_grp) {
2341 		/*
2342 		 * Use the ilg's current ill for the deletion,
2343 		 * we might have failed over.
2344 		 */
2345 		ill = ilg->ilg_ill;
2346 		ilg_delete(connp, ilg, NULL);
2347 		mutex_exit(&connp->conn_lock);
2348 		(void) ip_delmulti_v6(grp, ill, orig_ifindex,
2349 		    connp->conn_zoneid, B_FALSE, B_TRUE);
2350 		return (0);
2351 	} else {
2352 		ilgstat = ILGSTAT_CHANGE;
2353 		/*
2354 		 * The current ill might be different from the one we were
2355 		 * asked to join on (if failover has occurred); we should
2356 		 * join on the ill stored in the ilg.  The original ill
2357 		 * is noted in ilg_orig_ifindex, which matched our request.
2358 		 */
2359 		ill = ilg->ilg_ill;
2360 		/* preserve existing state in case ip_addmulti() fails */
2361 		orig_fmode = ilg->ilg_fmode;
2362 		if (ilg->ilg_filter == NULL) {
2363 			orig_filter = NULL;
2364 		} else {
2365 			orig_filter = l_alloc_copy(ilg->ilg_filter);
2366 			if (orig_filter == NULL) {
2367 				mutex_exit(&connp->conn_lock);
2368 				return (ENOMEM);
2369 			}
2370 		}
2371 	}
2372 
2373 	/*
2374 	 * Alloc buffer to copy new state into (see below) before
2375 	 * we make any changes, so we can bail if it fails.
2376 	 */
2377 	if ((new_filter = l_alloc()) == NULL) {
2378 		mutex_exit(&connp->conn_lock);
2379 		err = ENOMEM;
2380 		goto free_and_exit;
2381 	}
2382 
2383 	if (gf->gf_numsrc == 0) {
2384 		CLEAR_SLIST(ilg->ilg_filter);
2385 	} else {
2386 		slist_t *fp;
2387 		if (ilg->ilg_filter == NULL) {
2388 			fp = l_alloc();
2389 			if (fp == NULL) {
2390 				if (ilgstat == ILGSTAT_NEW)
2391 					ilg_delete(connp, ilg, NULL);
2392 				mutex_exit(&connp->conn_lock);
2393 				err = ENOMEM;
2394 				goto free_and_exit;
2395 			}
2396 		} else {
2397 			fp = ilg->ilg_filter;
2398 		}
2399 		for (i = 0, sl = gf->gf_slist; i < gf->gf_numsrc; i++, sl++) {
2400 			sin6 = (struct sockaddr_in6 *)sl;
2401 			fp->sl_addr[i] = sin6->sin6_addr;
2402 		}
2403 		fp->sl_numsrc = gf->gf_numsrc;
2404 		ilg->ilg_filter = fp;
2405 	}
2406 	/*
2407 	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
2408 	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
2409 	 * So we need to translate here.
2410 	 */
2411 	ilg->ilg_fmode = (gf->gf_fmode == MCAST_INCLUDE) ?
2412 	    MODE_IS_INCLUDE : MODE_IS_EXCLUDE;
2413 
2414 	/*
2415 	 * Save copy of ilg's filter state to pass to other functions,
2416 	 * so we can release conn_lock now.
2417 	 */
2418 	new_fmode = ilg->ilg_fmode;
2419 	l_copy(ilg->ilg_filter, new_filter);
2420 
2421 	mutex_exit(&connp->conn_lock);
2422 
2423 	err = ip_addmulti_v6(grp, ill, orig_ifindex, connp->conn_zoneid,
2424 	    ilgstat, new_fmode, new_filter);
2425 	if (err != 0) {
2426 		/*
2427 		 * Restore the original filter state, or delete the
2428 		 * newly-created ilg.  We need to look up the ilg
2429 		 * again, though, since we've not been holding the
2430 		 * conn_lock.
2431 		 */
2432 		mutex_enter(&connp->conn_lock);
2433 		ilg = ilg_lookup_ill_index_v6(connp, grp, orig_ifindex);
2434 		ASSERT(ilg != NULL);
2435 		if (ilgstat == ILGSTAT_NEW) {
2436 			ilg_delete(connp, ilg, NULL);
2437 		} else {
2438 			ilg->ilg_fmode = orig_fmode;
2439 			if (SLIST_IS_EMPTY(orig_filter)) {
2440 				CLEAR_SLIST(ilg->ilg_filter);
2441 			} else {
2442 				/*
2443 				 * We didn't free the filter, even if we
2444 				 * were trying to make the source list empty;
2445 				 * so if orig_filter isn't empty, the ilg
2446 				 * must still have a filter alloc'd.
2447 				 */
2448 				l_copy(orig_filter, ilg->ilg_filter);
2449 			}
2450 		}
2451 		mutex_exit(&connp->conn_lock);
2452 	}
2453 
2454 free_and_exit:
2455 	l_free(orig_filter);
2456 	l_free(new_filter);
2457 
2458 	return (err);
2459 }
2460 
2461 /*
2462  * Process the SIOC[GS]MSFILTER and SIOC[GS]IPMSFILTER ioctls.
2463  */
2464 /* ARGSUSED */
2465 int
2466 ip_sioctl_msfilter(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp,
2467     ip_ioctl_cmd_t *ipip, void *ifreq)
2468 {
2469 	struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
2470 	/* existence verified in ip_wput_nondata() */
2471 	mblk_t *data_mp = mp->b_cont->b_cont;
2472 	int datalen, err, cmd, minsize;
2473 	int expsize = 0;
2474 	conn_t *connp;
2475 	boolean_t isv6, is_v4only_api, getcmd;
2476 	struct sockaddr_in *gsin;
2477 	struct sockaddr_in6 *gsin6;
2478 	ipaddr_t v4grp;
2479 	in6_addr_t v6grp;
2480 	struct group_filter *gf = NULL;
2481 	struct ip_msfilter *imsf = NULL;
2482 	mblk_t *ndp;
2483 
2484 	if (data_mp->b_cont != NULL) {
2485 		if ((ndp = msgpullup(data_mp, -1)) == NULL)
2486 			return (ENOMEM);
2487 		freemsg(data_mp);
2488 		data_mp = ndp;
2489 		mp->b_cont->b_cont = data_mp;
2490 	}
2491 
2492 	cmd = iocp->ioc_cmd;
2493 	getcmd = (cmd == SIOCGIPMSFILTER || cmd == SIOCGMSFILTER);
2494 	is_v4only_api = (cmd == SIOCGIPMSFILTER || cmd == SIOCSIPMSFILTER);
2495 	minsize = (is_v4only_api) ? IP_MSFILTER_SIZE(0) : GROUP_FILTER_SIZE(0);
2496 	datalen = MBLKL(data_mp);
2497 
2498 	if (datalen < minsize)
2499 		return (EINVAL);
2500 
2501 	/*
2502 	 * now we know we have at least have the initial structure,
2503 	 * but need to check for the source list array.
2504 	 */
2505 	if (is_v4only_api) {
2506 		imsf = (struct ip_msfilter *)data_mp->b_rptr;
2507 		isv6 = B_FALSE;
2508 		expsize = IP_MSFILTER_SIZE(imsf->imsf_numsrc);
2509 	} else {
2510 		gf = (struct group_filter *)data_mp->b_rptr;
2511 		if (gf->gf_group.ss_family == AF_INET6) {
2512 			gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2513 			isv6 = !(IN6_IS_ADDR_V4MAPPED(&gsin6->sin6_addr));
2514 		} else {
2515 			isv6 = B_FALSE;
2516 		}
2517 		expsize = GROUP_FILTER_SIZE(gf->gf_numsrc);
2518 	}
2519 	if (datalen < expsize)
2520 		return (EINVAL);
2521 
2522 	connp = Q_TO_CONN(q);
2523 
2524 	/* operation not supported on the virtual network interface */
2525 	if (IS_VNI(ipif->ipif_ill))
2526 		return (EINVAL);
2527 
2528 	if (isv6) {
2529 		ill_t *ill = ipif->ipif_ill;
2530 		ill_refhold(ill);
2531 
2532 		gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2533 		v6grp = gsin6->sin6_addr;
2534 		if (getcmd)
2535 			err = ip_get_srcfilter_v6(connp, gf, &v6grp, ill);
2536 		else
2537 			err = ip_set_srcfilter_v6(connp, gf, &v6grp, ill);
2538 
2539 		ill_refrele(ill);
2540 	} else {
2541 		boolean_t isv4mapped = B_FALSE;
2542 		if (is_v4only_api) {
2543 			v4grp = (ipaddr_t)imsf->imsf_multiaddr.s_addr;
2544 		} else {
2545 			if (gf->gf_group.ss_family == AF_INET) {
2546 				gsin = (struct sockaddr_in *)&gf->gf_group;
2547 				v4grp = (ipaddr_t)gsin->sin_addr.s_addr;
2548 			} else {
2549 				gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2550 				IN6_V4MAPPED_TO_IPADDR(&gsin6->sin6_addr,
2551 				    v4grp);
2552 				isv4mapped = B_TRUE;
2553 			}
2554 		}
2555 		if (getcmd)
2556 			err = ip_get_srcfilter(connp, gf, imsf, v4grp, ipif,
2557 			    isv4mapped);
2558 		else
2559 			err = ip_set_srcfilter(connp, gf, imsf, v4grp, ipif,
2560 			    isv4mapped);
2561 	}
2562 
2563 	return (err);
2564 }
2565 
2566 /*
2567  * Finds the ipif based on information in the ioctl headers.  Needed to make
2568  * ip_process_ioctl() happy (it needs to know the ipif for IPI_WR-flagged
2569  * ioctls prior to calling the ioctl's handler function).  Somewhat analogous
2570  * to ip_extract_lifreq_cmn() and ip_extract_tunreq().
2571  */
2572 int
2573 ip_extract_msfilter(queue_t *q, mblk_t *mp, ipif_t **ipifpp, ipsq_func_t func)
2574 {
2575 	struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
2576 	int cmd = iocp->ioc_cmd, err = 0;
2577 	conn_t *connp;
2578 	ipif_t *ipif;
2579 	/* caller has verified this mblk exists */
2580 	char *dbuf = (char *)mp->b_cont->b_cont->b_rptr;
2581 	struct ip_msfilter *imsf;
2582 	struct group_filter *gf;
2583 	ipaddr_t v4addr, v4grp;
2584 	in6_addr_t v6grp;
2585 	uint32_t index;
2586 	zoneid_t zoneid;
2587 
2588 	connp = Q_TO_CONN(q);
2589 	zoneid = connp->conn_zoneid;
2590 
2591 	/* don't allow multicast operations on a tcp conn */
2592 	if (IPCL_IS_TCP(connp))
2593 		return (ENOPROTOOPT);
2594 
2595 	if (cmd == SIOCSIPMSFILTER || cmd == SIOCGIPMSFILTER) {
2596 		/* don't allow v4-specific ioctls on v6 socket */
2597 		if (connp->conn_af_isv6)
2598 			return (EAFNOSUPPORT);
2599 
2600 		imsf = (struct ip_msfilter *)dbuf;
2601 		v4addr = imsf->imsf_interface.s_addr;
2602 		v4grp = imsf->imsf_multiaddr.s_addr;
2603 		if (v4addr == INADDR_ANY) {
2604 			ipif = ipif_lookup_group(v4grp, zoneid);
2605 			if (ipif == NULL)
2606 				err = EADDRNOTAVAIL;
2607 		} else {
2608 			ipif = ipif_lookup_addr(v4addr, NULL, zoneid, q, mp,
2609 			    func, &err);
2610 		}
2611 	} else {
2612 		boolean_t isv6 = B_FALSE;
2613 		gf = (struct group_filter *)dbuf;
2614 		index = gf->gf_interface;
2615 		if (gf->gf_group.ss_family == AF_INET6) {
2616 			struct sockaddr_in6 *sin6;
2617 			sin6 = (struct sockaddr_in6 *)&gf->gf_group;
2618 			v6grp = sin6->sin6_addr;
2619 			if (IN6_IS_ADDR_V4MAPPED(&v6grp))
2620 				IN6_V4MAPPED_TO_IPADDR(&v6grp, v4grp);
2621 			else
2622 				isv6 = B_TRUE;
2623 		} else if (gf->gf_group.ss_family == AF_INET) {
2624 			struct sockaddr_in *sin;
2625 			sin = (struct sockaddr_in *)&gf->gf_group;
2626 			v4grp = sin->sin_addr.s_addr;
2627 		} else {
2628 			return (EAFNOSUPPORT);
2629 		}
2630 		if (index == 0) {
2631 			if (isv6)
2632 				ipif = ipif_lookup_group_v6(&v6grp, zoneid);
2633 			else
2634 				ipif = ipif_lookup_group(v4grp, zoneid);
2635 			if (ipif == NULL)
2636 				err = EADDRNOTAVAIL;
2637 		} else {
2638 			ipif = ipif_lookup_on_ifindex(index, isv6, zoneid,
2639 			    q, mp, func, &err);
2640 		}
2641 	}
2642 
2643 	*ipifpp = ipif;
2644 	return (err);
2645 }
2646 
2647 /*
2648  * The structures used for the SIOC*MSFILTER ioctls usually must be copied
2649  * in in two stages, as the first copyin tells us the size of the attached
2650  * source buffer.  This function is called by ip_wput_nondata() after the
2651  * first copyin has completed; it figures out how big the second stage
2652  * needs to be, and kicks it off.
2653  *
2654  * In some cases (numsrc < 2), the second copyin is not needed as the
2655  * first one gets a complete structure containing 1 source addr.
2656  *
2657  * The function returns 0 if a second copyin has been started (i.e. there's
2658  * no more work to be done right now), or 1 if the second copyin is not
2659  * needed and ip_wput_nondata() can continue its processing.
2660  */
2661 int
2662 ip_copyin_msfilter(queue_t *q, mblk_t *mp)
2663 {
2664 	struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
2665 	int cmd = iocp->ioc_cmd;
2666 	/* validity of this checked in ip_wput_nondata() */
2667 	mblk_t *mp1 = mp->b_cont->b_cont;
2668 	int copysize = 0;
2669 	int offset;
2670 
2671 	if (cmd == SIOCSMSFILTER || cmd == SIOCGMSFILTER) {
2672 		struct group_filter *gf = (struct group_filter *)mp1->b_rptr;
2673 		if (gf->gf_numsrc >= 2) {
2674 			offset = sizeof (struct group_filter);
2675 			copysize = GROUP_FILTER_SIZE(gf->gf_numsrc) - offset;
2676 		}
2677 	} else {
2678 		struct ip_msfilter *imsf = (struct ip_msfilter *)mp1->b_rptr;
2679 		if (imsf->imsf_numsrc >= 2) {
2680 			offset = sizeof (struct ip_msfilter);
2681 			copysize = IP_MSFILTER_SIZE(imsf->imsf_numsrc) - offset;
2682 		}
2683 	}
2684 	if (copysize > 0) {
2685 		mi_copyin_n(q, mp, offset, copysize);
2686 		return (0);
2687 	}
2688 	return (1);
2689 }
2690 
2691 /*
2692  * Handle the following optmgmt:
2693  *	IP_ADD_MEMBERSHIP		must not have joined already
2694  *	MCAST_JOIN_GROUP		must not have joined already
2695  *	IP_BLOCK_SOURCE			must have joined already
2696  *	MCAST_BLOCK_SOURCE		must have joined already
2697  *	IP_JOIN_SOURCE_GROUP		may have joined already
2698  *	MCAST_JOIN_SOURCE_GROUP		may have joined already
2699  *
2700  * fmode and src parameters may be used to determine which option is
2701  * being set, as follows (the IP_* and MCAST_* versions of each option
2702  * are functionally equivalent):
2703  *	opt			fmode			src
2704  *	IP_ADD_MEMBERSHIP	MODE_IS_EXCLUDE		INADDR_ANY
2705  *	MCAST_JOIN_GROUP	MODE_IS_EXCLUDE		INADDR_ANY
2706  *	IP_BLOCK_SOURCE		MODE_IS_EXCLUDE		v4 addr
2707  *	MCAST_BLOCK_SOURCE	MODE_IS_EXCLUDE		v4 addr
2708  *	IP_JOIN_SOURCE_GROUP	MODE_IS_INCLUDE		v4 addr
2709  *	MCAST_JOIN_SOURCE_GROUP	MODE_IS_INCLUDE		v4 addr
2710  *
2711  * Changing the filter mode is not allowed; if a matching ilg already
2712  * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
2713  *
2714  * Verifies that there is a source address of appropriate scope for
2715  * the group; if not, EADDRNOTAVAIL is returned.
2716  *
2717  * The interface to be used may be identified by an address or by an
2718  * index.  A pointer to the index is passed; if it is NULL, use the
2719  * address, otherwise, use the index.
2720  */
2721 int
2722 ip_opt_add_group(conn_t *connp, boolean_t checkonly, ipaddr_t group,
2723     ipaddr_t ifaddr, uint_t *ifindexp, mcast_record_t fmode, ipaddr_t src,
2724     mblk_t *first_mp)
2725 {
2726 	ipif_t	*ipif;
2727 	ipsq_t	*ipsq;
2728 	int err = 0;
2729 	ill_t	*ill;
2730 
2731 	err = ip_opt_check(connp, group, src, ifaddr, ifindexp, first_mp,
2732 	    ip_restart_optmgmt, &ipif);
2733 	if (err != 0) {
2734 		if (err != EINPROGRESS) {
2735 			ip1dbg(("ip_opt_add_group: no ipif for group 0x%x, "
2736 			    "ifaddr 0x%x, ifindex %d\n", ntohl(group),
2737 			    ntohl(ifaddr), (ifindexp == NULL) ? 0 : *ifindexp));
2738 		}
2739 		return (err);
2740 	}
2741 	ASSERT(ipif != NULL);
2742 
2743 	ill = ipif->ipif_ill;
2744 	/* Operation not supported on a virtual network interface */
2745 	if (IS_VNI(ill)) {
2746 		ipif_refrele(ipif);
2747 		return (EINVAL);
2748 	}
2749 
2750 	if (checkonly) {
2751 		/*
2752 		 * do not do operation, just pretend to - new T_CHECK
2753 		 * semantics. The error return case above if encountered
2754 		 * considered a good enough "check" here.
2755 		 */
2756 		ipif_refrele(ipif);
2757 		return (0);
2758 	}
2759 
2760 	IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, ipsq,
2761 	    NEW_OP);
2762 
2763 	/* unspecified source addr => no source filtering */
2764 	err = ilg_add(connp, group, ipif, fmode, src);
2765 
2766 	IPSQ_EXIT(ipsq);
2767 
2768 	ipif_refrele(ipif);
2769 	return (err);
2770 }
2771 
2772 /*
2773  * Handle the following optmgmt:
2774  *	IPV6_JOIN_GROUP			must not have joined already
2775  *	MCAST_JOIN_GROUP		must not have joined already
2776  *	MCAST_BLOCK_SOURCE		must have joined already
2777  *	MCAST_JOIN_SOURCE_GROUP		may have joined already
2778  *
2779  * fmode and src parameters may be used to determine which option is
2780  * being set, as follows (IPV6_JOIN_GROUP and MCAST_JOIN_GROUP options
2781  * are functionally equivalent):
2782  *	opt			fmode			v6src
2783  *	IPV6_JOIN_GROUP		MODE_IS_EXCLUDE		unspecified
2784  *	MCAST_JOIN_GROUP	MODE_IS_EXCLUDE		unspecified
2785  *	MCAST_BLOCK_SOURCE	MODE_IS_EXCLUDE		v6 addr
2786  *	MCAST_JOIN_SOURCE_GROUP	MODE_IS_INCLUDE		v6 addr
2787  *
2788  * Changing the filter mode is not allowed; if a matching ilg already
2789  * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
2790  *
2791  * Verifies that there is a source address of appropriate scope for
2792  * the group; if not, EADDRNOTAVAIL is returned.
2793  *
2794  * Handles IPv4-mapped IPv6 multicast addresses by associating them
2795  * with the link-local ipif.  Assumes that if v6group is v4-mapped,
2796  * v6src is also v4-mapped.
2797  */
2798 int
2799 ip_opt_add_group_v6(conn_t *connp, boolean_t checkonly,
2800     const in6_addr_t *v6group, int ifindex, mcast_record_t fmode,
2801     const in6_addr_t *v6src, mblk_t *first_mp)
2802 {
2803 	ill_t *ill;
2804 	ipif_t	*ipif;
2805 	char buf[INET6_ADDRSTRLEN];
2806 	ipaddr_t v4group, v4src;
2807 	boolean_t isv6;
2808 	ipsq_t	*ipsq;
2809 	int	err;
2810 
2811 	err = ip_opt_check_v6(connp, v6group, &v4group, v6src, &v4src, &isv6,
2812 	    ifindex, first_mp, ip_restart_optmgmt, &ill, &ipif);
2813 	if (err != 0) {
2814 		if (err != EINPROGRESS) {
2815 			ip1dbg(("ip_opt_add_group_v6: no ill for group %s/"
2816 			    "index %d\n", inet_ntop(AF_INET6, v6group, buf,
2817 			    sizeof (buf)), ifindex));
2818 		}
2819 		return (err);
2820 	}
2821 	ASSERT((!isv6 && ipif != NULL) || (isv6 && ill != NULL));
2822 
2823 	/* operation is not supported on the virtual network interface */
2824 	if (isv6) {
2825 		if (IS_VNI(ill)) {
2826 			ill_refrele(ill);
2827 			return (EINVAL);
2828 		}
2829 	} else {
2830 		if (IS_VNI(ipif->ipif_ill)) {
2831 			ipif_refrele(ipif);
2832 			return (EINVAL);
2833 		}
2834 	}
2835 
2836 	if (checkonly) {
2837 		/*
2838 		 * do not do operation, just pretend to - new T_CHECK
2839 		 * semantics. The error return case above if encountered
2840 		 * considered a good enough "check" here.
2841 		 */
2842 		if (isv6)
2843 			ill_refrele(ill);
2844 		else
2845 			ipif_refrele(ipif);
2846 		return (0);
2847 	}
2848 
2849 	if (!isv6) {
2850 		IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt,
2851 		    ipsq, NEW_OP);
2852 		err = ilg_add(connp, v4group, ipif, fmode, v4src);
2853 		IPSQ_EXIT(ipsq);
2854 		ipif_refrele(ipif);
2855 	} else {
2856 		IPSQ_ENTER_ILL(ill, connp, first_mp, ip_restart_optmgmt,
2857 		    ipsq, NEW_OP);
2858 		err = ilg_add_v6(connp, v6group, ill, fmode, v6src);
2859 		IPSQ_EXIT(ipsq);
2860 		ill_refrele(ill);
2861 	}
2862 
2863 	return (err);
2864 }
2865 
2866 static int
2867 ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group, ipif_t *ipif,
2868     mcast_record_t fmode, ipaddr_t src)
2869 {
2870 	ilg_t	*ilg;
2871 	in6_addr_t v6src;
2872 	boolean_t leaving = B_FALSE;
2873 
2874 	ASSERT(IAM_WRITER_IPIF(ipif));
2875 
2876 	/*
2877 	 * The ilg is valid only while we hold the conn lock. Once we drop
2878 	 * the lock, another thread can locate another ilg on this connp,
2879 	 * but on a different ipif, and delete it, and cause the ilg array
2880 	 * to be reallocated and copied. Hence do the ilg_delete before
2881 	 * dropping the lock.
2882 	 */
2883 	mutex_enter(&connp->conn_lock);
2884 	ilg = ilg_lookup_ipif(connp, group, ipif);
2885 	if ((ilg == NULL) || (ilg->ilg_flags & ILG_DELETED)) {
2886 		mutex_exit(&connp->conn_lock);
2887 		return (EADDRNOTAVAIL);
2888 	}
2889 
2890 	/*
2891 	 * Decide if we're actually deleting the ilg or just removing a
2892 	 * source filter address; if just removing an addr, make sure we
2893 	 * aren't trying to change the filter mode, and that the addr is
2894 	 * actually in our filter list already.  If we're removing the
2895 	 * last src in an include list, just delete the ilg.
2896 	 */
2897 	if (src == INADDR_ANY) {
2898 		v6src = ipv6_all_zeros;
2899 		leaving = B_TRUE;
2900 	} else {
2901 		int err = 0;
2902 		IN6_IPADDR_TO_V4MAPPED(src, &v6src);
2903 		if (fmode != ilg->ilg_fmode)
2904 			err = EINVAL;
2905 		else if (ilg->ilg_filter == NULL ||
2906 		    !list_has_addr(ilg->ilg_filter, &v6src))
2907 			err = EADDRNOTAVAIL;
2908 		if (err != 0) {
2909 			mutex_exit(&connp->conn_lock);
2910 			return (err);
2911 		}
2912 		if (fmode == MODE_IS_INCLUDE &&
2913 		    ilg->ilg_filter->sl_numsrc == 1) {
2914 			v6src = ipv6_all_zeros;
2915 			leaving = B_TRUE;
2916 		}
2917 	}
2918 
2919 	ilg_delete(connp, ilg, &v6src);
2920 	mutex_exit(&connp->conn_lock);
2921 
2922 	(void) ip_delmulti(group, ipif, B_FALSE, leaving);
2923 	return (0);
2924 }
2925 
2926 static int
2927 ip_opt_delete_group_excl_v6(conn_t *connp, const in6_addr_t *v6group,
2928     ill_t *ill, mcast_record_t fmode, const in6_addr_t *v6src)
2929 {
2930 	ilg_t	*ilg;
2931 	ill_t	*ilg_ill;
2932 	uint_t	ilg_orig_ifindex;
2933 	boolean_t leaving = B_TRUE;
2934 
2935 	ASSERT(IAM_WRITER_ILL(ill));
2936 
2937 	/*
2938 	 * Use the index that we originally used to join. We can't
2939 	 * use the ill directly because ilg_ill could point to
2940 	 * a new ill if things have moved.
2941 	 */
2942 	mutex_enter(&connp->conn_lock);
2943 	ilg = ilg_lookup_ill_index_v6(connp, v6group,
2944 	    ill->ill_phyint->phyint_ifindex);
2945 	if ((ilg == NULL) || (ilg->ilg_flags & ILG_DELETED)) {
2946 		mutex_exit(&connp->conn_lock);
2947 		return (EADDRNOTAVAIL);
2948 	}
2949 
2950 	/*
2951 	 * Decide if we're actually deleting the ilg or just removing a
2952 	 * source filter address; if just removing an addr, make sure we
2953 	 * aren't trying to change the filter mode, and that the addr is
2954 	 * actually in our filter list already.  If we're removing the
2955 	 * last src in an include list, just delete the ilg.
2956 	 */
2957 	if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) {
2958 		int err = 0;
2959 		if (fmode != ilg->ilg_fmode)
2960 			err = EINVAL;
2961 		else if (ilg->ilg_filter == NULL ||
2962 		    !list_has_addr(ilg->ilg_filter, v6src))
2963 			err = EADDRNOTAVAIL;
2964 		if (err != 0) {
2965 			mutex_exit(&connp->conn_lock);
2966 			return (err);
2967 		}
2968 		if (fmode == MODE_IS_INCLUDE &&
2969 		    ilg->ilg_filter->sl_numsrc == 1)
2970 			v6src = NULL;
2971 		else
2972 			leaving = B_FALSE;
2973 	}
2974 
2975 	ilg_ill = ilg->ilg_ill;
2976 	ilg_orig_ifindex = ilg->ilg_orig_ifindex;
2977 	ilg_delete(connp, ilg, v6src);
2978 	mutex_exit(&connp->conn_lock);
2979 	(void) ip_delmulti_v6(v6group, ilg_ill, ilg_orig_ifindex,
2980 	    connp->conn_zoneid, B_FALSE, leaving);
2981 
2982 	return (0);
2983 }
2984 
2985 /*
2986  * Handle the following optmgmt:
2987  *	IP_DROP_MEMBERSHIP		will leave
2988  *	MCAST_LEAVE_GROUP		will leave
2989  *	IP_UNBLOCK_SOURCE		will not leave
2990  *	MCAST_UNBLOCK_SOURCE		will not leave
2991  *	IP_LEAVE_SOURCE_GROUP		may leave (if leaving last source)
2992  *	MCAST_LEAVE_SOURCE_GROUP	may leave (if leaving last source)
2993  *
2994  * fmode and src parameters may be used to determine which option is
2995  * being set, as follows (the IP_* and MCAST_* versions of each option
2996  * are functionally equivalent):
2997  *	opt			 fmode			src
2998  *	IP_DROP_MEMBERSHIP	 MODE_IS_INCLUDE	INADDR_ANY
2999  *	MCAST_LEAVE_GROUP	 MODE_IS_INCLUDE	INADDR_ANY
3000  *	IP_UNBLOCK_SOURCE	 MODE_IS_EXCLUDE	v4 addr
3001  *	MCAST_UNBLOCK_SOURCE	 MODE_IS_EXCLUDE	v4 addr
3002  *	IP_LEAVE_SOURCE_GROUP	 MODE_IS_INCLUDE	v4 addr
3003  *	MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE	v4 addr
3004  *
3005  * Changing the filter mode is not allowed; if a matching ilg already
3006  * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
3007  *
3008  * The interface to be used may be identified by an address or by an
3009  * index.  A pointer to the index is passed; if it is NULL, use the
3010  * address, otherwise, use the index.
3011  */
3012 int
3013 ip_opt_delete_group(conn_t *connp, boolean_t checkonly, ipaddr_t group,
3014     ipaddr_t ifaddr, uint_t *ifindexp, mcast_record_t fmode, ipaddr_t src,
3015     mblk_t *first_mp)
3016 {
3017 	ipif_t	*ipif;
3018 	ipsq_t	*ipsq;
3019 	int	err;
3020 	ill_t	*ill;
3021 
3022 	err = ip_opt_check(connp, group, src, ifaddr, ifindexp, first_mp,
3023 	    ip_restart_optmgmt, &ipif);
3024 	if (err != 0) {
3025 		if (err != EINPROGRESS) {
3026 			ip1dbg(("ip_opt_delete_group: no ipif for group "
3027 			    "0x%x, ifaddr 0x%x\n",
3028 			    (int)ntohl(group), (int)ntohl(ifaddr)));
3029 		}
3030 		return (err);
3031 	}
3032 	ASSERT(ipif != NULL);
3033 
3034 	ill = ipif->ipif_ill;
3035 	/* Operation not supported on a virtual network interface */
3036 	if (IS_VNI(ill)) {
3037 		ipif_refrele(ipif);
3038 		return (EINVAL);
3039 	}
3040 
3041 	if (checkonly) {
3042 		/*
3043 		 * do not do operation, just pretend to - new T_CHECK
3044 		 * semantics. The error return case above if encountered
3045 		 * considered a good enough "check" here.
3046 		 */
3047 		ipif_refrele(ipif);
3048 		return (0);
3049 	}
3050 
3051 	IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, ipsq,
3052 	    NEW_OP);
3053 	err = ip_opt_delete_group_excl(connp, group, ipif, fmode, src);
3054 	IPSQ_EXIT(ipsq);
3055 
3056 	ipif_refrele(ipif);
3057 	return (err);
3058 }
3059 
3060 /*
3061  * Handle the following optmgmt:
3062  *	IPV6_LEAVE_GROUP		will leave
3063  *	MCAST_LEAVE_GROUP		will leave
3064  *	MCAST_UNBLOCK_SOURCE		will not leave
3065  *	MCAST_LEAVE_SOURCE_GROUP	may leave (if leaving last source)
3066  *
3067  * fmode and src parameters may be used to determine which option is
3068  * being set, as follows (IPV6_LEAVE_GROUP and MCAST_LEAVE_GROUP options
3069  * are functionally equivalent):
3070  *	opt			 fmode			v6src
3071  *	IPV6_LEAVE_GROUP	 MODE_IS_INCLUDE	unspecified
3072  *	MCAST_LEAVE_GROUP	 MODE_IS_INCLUDE	unspecified
3073  *	MCAST_UNBLOCK_SOURCE	 MODE_IS_EXCLUDE	v6 addr
3074  *	MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE	v6 addr
3075  *
3076  * Changing the filter mode is not allowed; if a matching ilg already
3077  * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
3078  *
3079  * Handles IPv4-mapped IPv6 multicast addresses by associating them
3080  * with the link-local ipif.  Assumes that if v6group is v4-mapped,
3081  * v6src is also v4-mapped.
3082  */
3083 int
3084 ip_opt_delete_group_v6(conn_t *connp, boolean_t checkonly,
3085     const in6_addr_t *v6group, int ifindex, mcast_record_t fmode,
3086     const in6_addr_t *v6src, mblk_t *first_mp)
3087 {
3088 	ill_t *ill;
3089 	ipif_t	*ipif;
3090 	char	buf[INET6_ADDRSTRLEN];
3091 	ipaddr_t v4group, v4src;
3092 	boolean_t isv6;
3093 	ipsq_t	*ipsq;
3094 	int	err;
3095 
3096 	err = ip_opt_check_v6(connp, v6group, &v4group, v6src, &v4src, &isv6,
3097 	    ifindex, first_mp, ip_restart_optmgmt, &ill, &ipif);
3098 	if (err != 0) {
3099 		if (err != EINPROGRESS) {
3100 			ip1dbg(("ip_opt_delete_group_v6: no ill for group %s/"
3101 			    "index %d\n", inet_ntop(AF_INET6, v6group, buf,
3102 			    sizeof (buf)), ifindex));
3103 		}
3104 		return (err);
3105 	}
3106 	ASSERT((isv6 && ill != NULL) || (!isv6 && ipif != NULL));
3107 
3108 	/* operation is not supported on the virtual network interface */
3109 	if (isv6) {
3110 		if (IS_VNI(ill)) {
3111 			ill_refrele(ill);
3112 			return (EINVAL);
3113 		}
3114 	} else {
3115 		if (IS_VNI(ipif->ipif_ill)) {
3116 			ipif_refrele(ipif);
3117 			return (EINVAL);
3118 		}
3119 	}
3120 
3121 	if (checkonly) {
3122 		/*
3123 		 * do not do operation, just pretend to - new T_CHECK
3124 		 * semantics. The error return case above if encountered
3125 		 * considered a good enough "check" here.
3126 		 */
3127 		if (isv6)
3128 			ill_refrele(ill);
3129 		else
3130 			ipif_refrele(ipif);
3131 		return (0);
3132 	}
3133 
3134 	if (!isv6) {
3135 		IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt,
3136 		    ipsq, NEW_OP);
3137 		err = ip_opt_delete_group_excl(connp, v4group, ipif, fmode,
3138 		    v4src);
3139 		IPSQ_EXIT(ipsq);
3140 		ipif_refrele(ipif);
3141 	} else {
3142 		IPSQ_ENTER_ILL(ill, connp, first_mp, ip_restart_optmgmt,
3143 		    ipsq, NEW_OP);
3144 		err = ip_opt_delete_group_excl_v6(connp, v6group, ill, fmode,
3145 		    v6src);
3146 		IPSQ_EXIT(ipsq);
3147 		ill_refrele(ill);
3148 	}
3149 
3150 	return (err);
3151 }
3152 
3153 /*
3154  * Group mgmt for upper conn that passes things down
3155  * to the interface multicast list (and DLPI)
3156  * These routines can handle new style options that specify an interface name
3157  * as opposed to an interface address (needed for general handling of
3158  * unnumbered interfaces.)
3159  */
3160 
3161 /*
3162  * Add a group to an upper conn group data structure and pass things down
3163  * to the interface multicast list (and DLPI)
3164  */
3165 static int
3166 ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif, mcast_record_t fmode,
3167     ipaddr_t src)
3168 {
3169 	int	error = 0;
3170 	ill_t	*ill;
3171 	ilg_t	*ilg;
3172 	ilg_stat_t ilgstat;
3173 	slist_t	*new_filter = NULL;
3174 	int	new_fmode;
3175 
3176 	ASSERT(IAM_WRITER_IPIF(ipif));
3177 
3178 	ill = ipif->ipif_ill;
3179 
3180 	if (!(ill->ill_flags & ILLF_MULTICAST))
3181 		return (EADDRNOTAVAIL);
3182 
3183 	/*
3184 	 * conn_ilg[] is protected by conn_lock. Need to hold the conn_lock
3185 	 * to walk the conn_ilg[] list in ilg_lookup_ipif(); also needed to
3186 	 * serialize 2 threads doing join (sock, group1, hme0:0) and
3187 	 * (sock, group2, hme1:0) where hme0 and hme1 map to different ipsqs,
3188 	 * but both operations happen on the same conn.
3189 	 */
3190 	mutex_enter(&connp->conn_lock);
3191 	ilg = ilg_lookup_ipif(connp, group, ipif);
3192 
3193 	/*
3194 	 * Depending on the option we're handling, may or may not be okay
3195 	 * if group has already been added.  Figure out our rules based
3196 	 * on fmode and src params.  Also make sure there's enough room
3197 	 * in the filter if we're adding a source to an existing filter.
3198 	 */
3199 	if (src == INADDR_ANY) {
3200 		/* we're joining for all sources, must not have joined */
3201 		if (ilg != NULL)
3202 			error = EADDRINUSE;
3203 	} else {
3204 		if (fmode == MODE_IS_EXCLUDE) {
3205 			/* (excl {addr}) => block source, must have joined */
3206 			if (ilg == NULL)
3207 				error = EADDRNOTAVAIL;
3208 		}
3209 		/* (incl {addr}) => join source, may have joined */
3210 
3211 		if (ilg != NULL &&
3212 		    SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE)
3213 			error = ENOBUFS;
3214 	}
3215 	if (error != 0) {
3216 		mutex_exit(&connp->conn_lock);
3217 		return (error);
3218 	}
3219 
3220 	ASSERT(!(ipif->ipif_state_flags & IPIF_CONDEMNED));
3221 
3222 	/*
3223 	 * Alloc buffer to copy new state into (see below) before
3224 	 * we make any changes, so we can bail if it fails.
3225 	 */
3226 	if ((new_filter = l_alloc()) == NULL) {
3227 		mutex_exit(&connp->conn_lock);
3228 		return (ENOMEM);
3229 	}
3230 
3231 	if (ilg == NULL) {
3232 		ilgstat = ILGSTAT_NEW;
3233 		if ((ilg = conn_ilg_alloc(connp)) == NULL) {
3234 			mutex_exit(&connp->conn_lock);
3235 			l_free(new_filter);
3236 			return (ENOMEM);
3237 		}
3238 		if (src != INADDR_ANY) {
3239 			ilg->ilg_filter = l_alloc();
3240 			if (ilg->ilg_filter == NULL) {
3241 				ilg_delete(connp, ilg, NULL);
3242 				mutex_exit(&connp->conn_lock);
3243 				l_free(new_filter);
3244 				return (ENOMEM);
3245 			}
3246 			ilg->ilg_filter->sl_numsrc = 1;
3247 			IN6_IPADDR_TO_V4MAPPED(src,
3248 			    &ilg->ilg_filter->sl_addr[0]);
3249 		}
3250 		if (group == INADDR_ANY) {
3251 			ilg->ilg_v6group = ipv6_all_zeros;
3252 		} else {
3253 			IN6_IPADDR_TO_V4MAPPED(group, &ilg->ilg_v6group);
3254 		}
3255 		ilg->ilg_ipif = ipif;
3256 		ilg->ilg_ill = NULL;
3257 		ilg->ilg_orig_ifindex = 0;
3258 		ilg->ilg_fmode = fmode;
3259 	} else {
3260 		int index;
3261 		in6_addr_t v6src;
3262 		ilgstat = ILGSTAT_CHANGE;
3263 		if (ilg->ilg_fmode != fmode || src == INADDR_ANY) {
3264 			mutex_exit(&connp->conn_lock);
3265 			l_free(new_filter);
3266 			return (EINVAL);
3267 		}
3268 		if (ilg->ilg_filter == NULL) {
3269 			ilg->ilg_filter = l_alloc();
3270 			if (ilg->ilg_filter == NULL) {
3271 				mutex_exit(&connp->conn_lock);
3272 				l_free(new_filter);
3273 				return (ENOMEM);
3274 			}
3275 		}
3276 		IN6_IPADDR_TO_V4MAPPED(src, &v6src);
3277 		if (list_has_addr(ilg->ilg_filter, &v6src)) {
3278 			mutex_exit(&connp->conn_lock);
3279 			l_free(new_filter);
3280 			return (EADDRNOTAVAIL);
3281 		}
3282 		index = ilg->ilg_filter->sl_numsrc++;
3283 		ilg->ilg_filter->sl_addr[index] = v6src;
3284 	}
3285 
3286 	/*
3287 	 * Save copy of ilg's filter state to pass to other functions,
3288 	 * so we can release conn_lock now.
3289 	 */
3290 	new_fmode = ilg->ilg_fmode;
3291 	l_copy(ilg->ilg_filter, new_filter);
3292 
3293 	mutex_exit(&connp->conn_lock);
3294 
3295 	error = ip_addmulti(group, ipif, ilgstat, new_fmode, new_filter);
3296 	if (error != 0) {
3297 		/*
3298 		 * Need to undo what we did before calling ip_addmulti()!
3299 		 * Must look up the ilg again since we've not been holding
3300 		 * conn_lock.
3301 		 */
3302 		in6_addr_t v6src;
3303 		if (ilgstat == ILGSTAT_NEW)
3304 			v6src = ipv6_all_zeros;
3305 		else
3306 			IN6_IPADDR_TO_V4MAPPED(src, &v6src);
3307 		mutex_enter(&connp->conn_lock);
3308 		ilg = ilg_lookup_ipif(connp, group, ipif);
3309 		ASSERT(ilg != NULL);
3310 		ilg_delete(connp, ilg, &v6src);
3311 		mutex_exit(&connp->conn_lock);
3312 		l_free(new_filter);
3313 		return (error);
3314 	}
3315 
3316 	l_free(new_filter);
3317 	return (0);
3318 }
3319 
3320 static int
3321 ilg_add_v6(conn_t *connp, const in6_addr_t *v6group, ill_t *ill,
3322     mcast_record_t fmode, const in6_addr_t *v6src)
3323 {
3324 	int	error = 0;
3325 	int	orig_ifindex;
3326 	ilg_t	*ilg;
3327 	ilg_stat_t ilgstat;
3328 	slist_t	*new_filter = NULL;
3329 	int	new_fmode;
3330 
3331 	ASSERT(IAM_WRITER_ILL(ill));
3332 
3333 	if (!(ill->ill_flags & ILLF_MULTICAST))
3334 		return (EADDRNOTAVAIL);
3335 
3336 	/*
3337 	 * conn_lock protects the ilg list.  Serializes 2 threads doing
3338 	 * join (sock, group1, hme0) and (sock, group2, hme1) where hme0
3339 	 * and hme1 map to different ipsq's, but both operations happen
3340 	 * on the same conn.
3341 	 */
3342 	mutex_enter(&connp->conn_lock);
3343 
3344 	/*
3345 	 * Use the ifindex to do the lookup. We can't use the ill
3346 	 * directly because ilg_ill could point to a different ill if
3347 	 * things have moved.
3348 	 */
3349 	orig_ifindex = ill->ill_phyint->phyint_ifindex;
3350 	ilg = ilg_lookup_ill_index_v6(connp, v6group, orig_ifindex);
3351 
3352 	/*
3353 	 * Depending on the option we're handling, may or may not be okay
3354 	 * if group has already been added.  Figure out our rules based
3355 	 * on fmode and src params.  Also make sure there's enough room
3356 	 * in the filter if we're adding a source to an existing filter.
3357 	 */
3358 	if (IN6_IS_ADDR_UNSPECIFIED(v6src)) {
3359 		/* we're joining for all sources, must not have joined */
3360 		if (ilg != NULL)
3361 			error = EADDRINUSE;
3362 	} else {
3363 		if (fmode == MODE_IS_EXCLUDE) {
3364 			/* (excl {addr}) => block source, must have joined */
3365 			if (ilg == NULL)
3366 				error = EADDRNOTAVAIL;
3367 		}
3368 		/* (incl {addr}) => join source, may have joined */
3369 
3370 		if (ilg != NULL &&
3371 		    SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE)
3372 			error = ENOBUFS;
3373 	}
3374 	if (error != 0) {
3375 		mutex_exit(&connp->conn_lock);
3376 		return (error);
3377 	}
3378 
3379 	/*
3380 	 * Alloc buffer to copy new state into (see below) before
3381 	 * we make any changes, so we can bail if it fails.
3382 	 */
3383 	if ((new_filter = l_alloc()) == NULL) {
3384 		mutex_exit(&connp->conn_lock);
3385 		return (ENOMEM);
3386 	}
3387 
3388 	if (ilg == NULL) {
3389 		if ((ilg = conn_ilg_alloc(connp)) == NULL) {
3390 			mutex_exit(&connp->conn_lock);
3391 			l_free(new_filter);
3392 			return (ENOMEM);
3393 		}
3394 		if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) {
3395 			ilg->ilg_filter = l_alloc();
3396 			if (ilg->ilg_filter == NULL) {
3397 				ilg_delete(connp, ilg, NULL);
3398 				mutex_exit(&connp->conn_lock);
3399 				l_free(new_filter);
3400 				return (ENOMEM);
3401 			}
3402 			ilg->ilg_filter->sl_numsrc = 1;
3403 			ilg->ilg_filter->sl_addr[0] = *v6src;
3404 		}
3405 		ilgstat = ILGSTAT_NEW;
3406 		ilg->ilg_v6group = *v6group;
3407 		ilg->ilg_fmode = fmode;
3408 		ilg->ilg_ipif = NULL;
3409 		/*
3410 		 * Choose our target ill to join on. This might be different
3411 		 * from the ill we've been given if it's currently down and
3412 		 * part of a group.
3413 		 *
3414 		 * new ill is not refheld; we are writer.
3415 		 */
3416 		ill = ip_choose_multi_ill(ill, v6group);
3417 		ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED));
3418 		ilg->ilg_ill = ill;
3419 		/*
3420 		 * Remember the orig_ifindex that we joined on, so that we
3421 		 * can successfully delete them later on and also search
3422 		 * for duplicates if the application wants to join again.
3423 		 */
3424 		ilg->ilg_orig_ifindex = orig_ifindex;
3425 	} else {
3426 		int index;
3427 		if (ilg->ilg_fmode != fmode || IN6_IS_ADDR_UNSPECIFIED(v6src)) {
3428 			mutex_exit(&connp->conn_lock);
3429 			l_free(new_filter);
3430 			return (EINVAL);
3431 		}
3432 		if (ilg->ilg_filter == NULL) {
3433 			ilg->ilg_filter = l_alloc();
3434 			if (ilg->ilg_filter == NULL) {
3435 				mutex_exit(&connp->conn_lock);
3436 				l_free(new_filter);
3437 				return (ENOMEM);
3438 			}
3439 		}
3440 		if (list_has_addr(ilg->ilg_filter, v6src)) {
3441 			mutex_exit(&connp->conn_lock);
3442 			l_free(new_filter);
3443 			return (EADDRNOTAVAIL);
3444 		}
3445 		ilgstat = ILGSTAT_CHANGE;
3446 		index = ilg->ilg_filter->sl_numsrc++;
3447 		ilg->ilg_filter->sl_addr[index] = *v6src;
3448 		/*
3449 		 * The current ill might be different from the one we were
3450 		 * asked to join on (if failover has occurred); we should
3451 		 * join on the ill stored in the ilg.  The original ill
3452 		 * is noted in ilg_orig_ifindex, which matched our request.
3453 		 */
3454 		ill = ilg->ilg_ill;
3455 	}
3456 
3457 	/*
3458 	 * Save copy of ilg's filter state to pass to other functions,
3459 	 * so we can release conn_lock now.
3460 	 */
3461 	new_fmode = ilg->ilg_fmode;
3462 	l_copy(ilg->ilg_filter, new_filter);
3463 
3464 	mutex_exit(&connp->conn_lock);
3465 
3466 	/*
3467 	 * Now update the ill. We wait to do this until after the ilg
3468 	 * has been updated because we need to update the src filter
3469 	 * info for the ill, which involves looking at the status of
3470 	 * all the ilgs associated with this group/interface pair.
3471 	 */
3472 	error = ip_addmulti_v6(v6group, ill, orig_ifindex, connp->conn_zoneid,
3473 	    ilgstat, new_fmode, new_filter);
3474 	if (error != 0) {
3475 		/*
3476 		 * But because we waited, we have to undo the ilg update
3477 		 * if ip_addmulti_v6() fails.  We also must lookup ilg
3478 		 * again, since we've not been holding conn_lock.
3479 		 */
3480 		in6_addr_t delsrc =
3481 		    (ilgstat == ILGSTAT_NEW) ? ipv6_all_zeros : *v6src;
3482 		mutex_enter(&connp->conn_lock);
3483 		ilg = ilg_lookup_ill_index_v6(connp, v6group, orig_ifindex);
3484 		ASSERT(ilg != NULL);
3485 		ilg_delete(connp, ilg, &delsrc);
3486 		mutex_exit(&connp->conn_lock);
3487 		l_free(new_filter);
3488 		return (error);
3489 	}
3490 
3491 	l_free(new_filter);
3492 
3493 	return (0);
3494 }
3495 
3496 /*
3497  * Find an IPv4 ilg matching group, ill and source
3498  */
3499 ilg_t *
3500 ilg_lookup_ill_withsrc(conn_t *connp, ipaddr_t group, ipaddr_t src, ill_t *ill)
3501 {
3502 	in6_addr_t v6group, v6src;
3503 	int i;
3504 	boolean_t isinlist;
3505 	ilg_t *ilg;
3506 	ipif_t *ipif;
3507 	ill_t *ilg_ill;
3508 
3509 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3510 
3511 	/*
3512 	 * INADDR_ANY is represented as the IPv6 unspecified addr.
3513 	 */
3514 	if (group == INADDR_ANY)
3515 		v6group = ipv6_all_zeros;
3516 	else
3517 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
3518 
3519 	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3520 		/* ilg_ipif is NULL for v6; skip them */
3521 		ilg = &connp->conn_ilg[i];
3522 		if ((ipif = ilg->ilg_ipif) == NULL)
3523 			continue;
3524 		ASSERT(ilg->ilg_ill == NULL);
3525 		ilg_ill = ipif->ipif_ill;
3526 		ASSERT(!ilg_ill->ill_isv6);
3527 		if (ilg_ill == ill &&
3528 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, &v6group)) {
3529 			if (SLIST_IS_EMPTY(ilg->ilg_filter)) {
3530 				/* no source filter, so this is a match */
3531 				return (ilg);
3532 			}
3533 			break;
3534 		}
3535 	}
3536 	if (i == connp->conn_ilg_inuse)
3537 		return (NULL);
3538 
3539 	/*
3540 	 * we have an ilg with matching ill and group; but
3541 	 * the ilg has a source list that we must check.
3542 	 */
3543 	IN6_IPADDR_TO_V4MAPPED(src, &v6src);
3544 	isinlist = B_FALSE;
3545 	for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) {
3546 		if (IN6_ARE_ADDR_EQUAL(&v6src, &ilg->ilg_filter->sl_addr[i])) {
3547 			isinlist = B_TRUE;
3548 			break;
3549 		}
3550 	}
3551 
3552 	if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) ||
3553 	    (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE))
3554 		return (ilg);
3555 
3556 	return (NULL);
3557 }
3558 
3559 /*
3560  * Find an IPv6 ilg matching group, ill, and source
3561  */
3562 ilg_t *
3563 ilg_lookup_ill_withsrc_v6(conn_t *connp, const in6_addr_t *v6group,
3564     const in6_addr_t *v6src, ill_t *ill)
3565 {
3566 	int i;
3567 	boolean_t isinlist;
3568 	ilg_t *ilg;
3569 	ill_t *ilg_ill;
3570 
3571 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3572 
3573 	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3574 		ilg = &connp->conn_ilg[i];
3575 		if ((ilg_ill = ilg->ilg_ill) == NULL)
3576 			continue;
3577 		ASSERT(ilg->ilg_ipif == NULL);
3578 		ASSERT(ilg_ill->ill_isv6);
3579 		if (ilg_ill == ill &&
3580 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) {
3581 			if (SLIST_IS_EMPTY(ilg->ilg_filter)) {
3582 				/* no source filter, so this is a match */
3583 				return (ilg);
3584 			}
3585 			break;
3586 		}
3587 	}
3588 	if (i == connp->conn_ilg_inuse)
3589 		return (NULL);
3590 
3591 	/*
3592 	 * we have an ilg with matching ill and group; but
3593 	 * the ilg has a source list that we must check.
3594 	 */
3595 	isinlist = B_FALSE;
3596 	for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) {
3597 		if (IN6_ARE_ADDR_EQUAL(v6src, &ilg->ilg_filter->sl_addr[i])) {
3598 			isinlist = B_TRUE;
3599 			break;
3600 		}
3601 	}
3602 
3603 	if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) ||
3604 	    (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE))
3605 		return (ilg);
3606 
3607 	return (NULL);
3608 }
3609 
3610 /*
3611  * Get the ilg whose ilg_orig_ifindex is associated with ifindex.
3612  * This is useful when the interface fails and we have moved
3613  * to a new ill, but still would like to locate using the index
3614  * that we originally used to join. Used only for IPv6 currently.
3615  */
3616 static ilg_t *
3617 ilg_lookup_ill_index_v6(conn_t *connp, const in6_addr_t *v6group, int ifindex)
3618 {
3619 	ilg_t	*ilg;
3620 	int	i;
3621 
3622 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3623 	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3624 		ilg = &connp->conn_ilg[i];
3625 		/* ilg_ill is NULL for V4. Skip them */
3626 		if (ilg->ilg_ill == NULL)
3627 			continue;
3628 		/* ilg_ipif is NULL for V6 */
3629 		ASSERT(ilg->ilg_ipif == NULL);
3630 		ASSERT(ilg->ilg_orig_ifindex != 0);
3631 		if (IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group) &&
3632 		    ilg->ilg_orig_ifindex == ifindex) {
3633 			return (ilg);
3634 		}
3635 	}
3636 	return (NULL);
3637 }
3638 
3639 /*
3640  * Find an IPv6 ilg matching group and ill
3641  */
3642 ilg_t *
3643 ilg_lookup_ill_v6(conn_t *connp, const in6_addr_t *v6group, ill_t *ill)
3644 {
3645 	ilg_t	*ilg;
3646 	int	i;
3647 	ill_t 	*mem_ill;
3648 
3649 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3650 
3651 	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3652 		ilg = &connp->conn_ilg[i];
3653 		if ((mem_ill = ilg->ilg_ill) == NULL)
3654 			continue;
3655 		ASSERT(ilg->ilg_ipif == NULL);
3656 		ASSERT(mem_ill->ill_isv6);
3657 		if (mem_ill == ill &&
3658 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group))
3659 			return (ilg);
3660 	}
3661 	return (NULL);
3662 }
3663 
3664 /*
3665  * Find an IPv4 ilg matching group and ipif
3666  */
3667 static ilg_t *
3668 ilg_lookup_ipif(conn_t *connp, ipaddr_t group, ipif_t *ipif)
3669 {
3670 	in6_addr_t v6group;
3671 	int	i;
3672 
3673 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3674 	ASSERT(!ipif->ipif_ill->ill_isv6);
3675 
3676 	if (group == INADDR_ANY)
3677 		v6group = ipv6_all_zeros;
3678 	else
3679 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
3680 
3681 	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3682 		if (IN6_ARE_ADDR_EQUAL(&connp->conn_ilg[i].ilg_v6group,
3683 		    &v6group) &&
3684 		    connp->conn_ilg[i].ilg_ipif == ipif)
3685 			return (&connp->conn_ilg[i]);
3686 	}
3687 	return (NULL);
3688 }
3689 
3690 /*
3691  * If a source address is passed in (src != NULL and src is not
3692  * unspecified), remove the specified src addr from the given ilg's
3693  * filter list, else delete the ilg.
3694  */
3695 static void
3696 ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src)
3697 {
3698 	int	i;
3699 
3700 	ASSERT((ilg->ilg_ipif != NULL) ^ (ilg->ilg_ill != NULL));
3701 	ASSERT(ilg->ilg_ipif == NULL || IAM_WRITER_IPIF(ilg->ilg_ipif));
3702 	ASSERT(ilg->ilg_ill == NULL || IAM_WRITER_ILL(ilg->ilg_ill));
3703 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3704 	ASSERT(!(ilg->ilg_flags & ILG_DELETED));
3705 
3706 	if (src == NULL || IN6_IS_ADDR_UNSPECIFIED(src)) {
3707 		if (connp->conn_ilg_walker_cnt != 0) {
3708 			ilg->ilg_flags |= ILG_DELETED;
3709 			return;
3710 		}
3711 
3712 		FREE_SLIST(ilg->ilg_filter);
3713 
3714 		i = ilg - &connp->conn_ilg[0];
3715 		ASSERT(i >= 0 && i < connp->conn_ilg_inuse);
3716 
3717 		/* Move other entries up one step */
3718 		connp->conn_ilg_inuse--;
3719 		for (; i < connp->conn_ilg_inuse; i++)
3720 			connp->conn_ilg[i] = connp->conn_ilg[i+1];
3721 
3722 		if (connp->conn_ilg_inuse == 0) {
3723 			mi_free((char *)connp->conn_ilg);
3724 			connp->conn_ilg = NULL;
3725 			cv_broadcast(&connp->conn_refcv);
3726 		}
3727 	} else {
3728 		l_remove(ilg->ilg_filter, src);
3729 	}
3730 }
3731 
3732 /*
3733  * Called from conn close. No new ilg can be added or removed.
3734  * because CONN_CLOSING has been set by ip_close. ilg_add / ilg_delete
3735  * will return error if conn has started closing.
3736  */
3737 void
3738 ilg_delete_all(conn_t *connp)
3739 {
3740 	int	i;
3741 	ipif_t	*ipif = NULL;
3742 	ill_t	*ill = NULL;
3743 	ilg_t	*ilg;
3744 	in6_addr_t v6group;
3745 	boolean_t success;
3746 	ipsq_t	*ipsq;
3747 	int	orig_ifindex;
3748 
3749 	mutex_enter(&connp->conn_lock);
3750 retry:
3751 	ILG_WALKER_HOLD(connp);
3752 	for (i = connp->conn_ilg_inuse - 1; i >= 0; ) {
3753 		ilg = &connp->conn_ilg[i];
3754 		/*
3755 		 * Since this walk is not atomic (we drop the
3756 		 * conn_lock and wait in ipsq_enter) we need
3757 		 * to check for the ILG_DELETED flag.
3758 		 */
3759 		if (ilg->ilg_flags & ILG_DELETED) {
3760 			/* Go to the next ilg */
3761 			i--;
3762 			continue;
3763 		}
3764 		v6group = ilg->ilg_v6group;
3765 
3766 		if (IN6_IS_ADDR_V4MAPPED(&v6group)) {
3767 			ipif = ilg->ilg_ipif;
3768 			ill = ipif->ipif_ill;
3769 		} else {
3770 			ipif = NULL;
3771 			ill = ilg->ilg_ill;
3772 		}
3773 		/*
3774 		 * We may not be able to refhold the ill if the ill/ipif
3775 		 * is changing. But we need to make sure that the ill will
3776 		 * not vanish. So we just bump up the ill_waiter count.
3777 		 * If we are unable to do even that, then the ill is closing,
3778 		 * in which case the unplumb thread will handle the cleanup,
3779 		 * and we move on to the next ilg.
3780 		 */
3781 		if (!ill_waiter_inc(ill)) {
3782 			/* Go to the next ilg */
3783 			i--;
3784 			continue;
3785 		}
3786 		mutex_exit(&connp->conn_lock);
3787 		/*
3788 		 * To prevent deadlock between ill close which waits inside
3789 		 * the perimeter, and conn close, ipsq_enter returns error,
3790 		 * the moment ILL_CONDEMNED is set, in which case ill close
3791 		 * takes responsibility to cleanup the ilgs. Note that we
3792 		 * have not yet set condemned flag, otherwise the conn can't
3793 		 * be refheld for cleanup by those routines and it would be
3794 		 * a mutual deadlock.
3795 		 */
3796 		success = ipsq_enter(ill, B_FALSE);
3797 		ipsq = ill->ill_phyint->phyint_ipsq;
3798 		ill_waiter_dcr(ill);
3799 		mutex_enter(&connp->conn_lock);
3800 		if (!success) {
3801 			/* Go to the next ilg */
3802 			i--;
3803 			continue;
3804 		}
3805 
3806 		/*
3807 		 * Make sure that nothing has changed under. For eg.
3808 		 * a failover/failback can change ilg_ill while we were
3809 		 * waiting to become exclusive above
3810 		 */
3811 		if (IN6_IS_ADDR_V4MAPPED(&v6group)) {
3812 			ipif = ilg->ilg_ipif;
3813 			ill = ipif->ipif_ill;
3814 		} else {
3815 			ipif = NULL;
3816 			ill = ilg->ilg_ill;
3817 		}
3818 		if (!IAM_WRITER_ILL(ill) || (ilg->ilg_flags & ILG_DELETED)) {
3819 			/*
3820 			 * The ilg has changed under us probably due
3821 			 * to a failover or unplumb. Retry on the same ilg.
3822 			 */
3823 			mutex_exit(&connp->conn_lock);
3824 			ipsq_exit(ipsq, B_TRUE, B_TRUE);
3825 			mutex_enter(&connp->conn_lock);
3826 			continue;
3827 		}
3828 		v6group = ilg->ilg_v6group;
3829 		orig_ifindex = ilg->ilg_orig_ifindex;
3830 		ilg_delete(connp, ilg, NULL);
3831 		mutex_exit(&connp->conn_lock);
3832 
3833 		if (ipif != NULL)
3834 			(void) ip_delmulti(V4_PART_OF_V6(v6group), ipif,
3835 			    B_FALSE, B_TRUE);
3836 
3837 		else
3838 			(void) ip_delmulti_v6(&v6group, ill, orig_ifindex,
3839 			    connp->conn_zoneid, B_FALSE, B_TRUE);
3840 
3841 		ipsq_exit(ipsq, B_TRUE, B_TRUE);
3842 		mutex_enter(&connp->conn_lock);
3843 		/* Go to the next ilg */
3844 		i--;
3845 	}
3846 	ILG_WALKER_RELE(connp);
3847 
3848 	/* If any ill was skipped above wait and retry */
3849 	if (connp->conn_ilg_inuse != 0) {
3850 		cv_wait(&connp->conn_refcv, &connp->conn_lock);
3851 		goto retry;
3852 	}
3853 	mutex_exit(&connp->conn_lock);
3854 }
3855 
3856 /*
3857  * Called from ill close by ipcl_walk for clearing conn_ilg and
3858  * conn_multicast_ipif for a given ipif. conn is held by caller.
3859  * Note that ipcl_walk only walks conns that are not yet condemned.
3860  * condemned conns can't be refheld. For this reason, conn must become clean
3861  * first, i.e. it must not refer to any ill/ire/ipif and then only set
3862  * condemned flag.
3863  */
3864 static void
3865 conn_delete_ipif(conn_t *connp, caddr_t arg)
3866 {
3867 	ipif_t	*ipif = (ipif_t *)arg;
3868 	int	i;
3869 	char	group_buf1[INET6_ADDRSTRLEN];
3870 	char	group_buf2[INET6_ADDRSTRLEN];
3871 	ipaddr_t group;
3872 	ilg_t	*ilg;
3873 
3874 	/*
3875 	 * Even though conn_ilg_inuse can change while we are in this loop,
3876 	 * i.e.ilgs can be created or deleted on this connp, no new ilgs can
3877 	 * be created or deleted for this connp, on this ill, since this ill
3878 	 * is the perimeter. So we won't miss any ilg in this cleanup.
3879 	 */
3880 	mutex_enter(&connp->conn_lock);
3881 
3882 	/*
3883 	 * Increment the walker count, so that ilg repacking does not
3884 	 * occur while we are in the loop.
3885 	 */
3886 	ILG_WALKER_HOLD(connp);
3887 	for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) {
3888 		ilg = &connp->conn_ilg[i];
3889 		if (ilg->ilg_ipif != ipif || (ilg->ilg_flags & ILG_DELETED))
3890 			continue;
3891 		/*
3892 		 * ip_close cannot be cleaning this ilg at the same time.
3893 		 * since it also has to execute in this ill's perimeter which
3894 		 * we are now holding. Only a clean conn can be condemned.
3895 		 */
3896 		ASSERT(!(connp->conn_state_flags & CONN_CONDEMNED));
3897 
3898 		/* Blow away the membership */
3899 		ip1dbg(("conn_delete_ilg_ipif: %s on %s (%s)\n",
3900 		    inet_ntop(AF_INET6, &connp->conn_ilg[i].ilg_v6group,
3901 		    group_buf1, sizeof (group_buf1)),
3902 		    inet_ntop(AF_INET6, &ipif->ipif_v6lcl_addr,
3903 		    group_buf2, sizeof (group_buf2)),
3904 		    ipif->ipif_ill->ill_name));
3905 
3906 		/* ilg_ipif is NULL for V6, so we won't be here */
3907 		ASSERT(IN6_IS_ADDR_V4MAPPED(&ilg->ilg_v6group));
3908 
3909 		group = V4_PART_OF_V6(ilg->ilg_v6group);
3910 		ilg_delete(connp, &connp->conn_ilg[i], NULL);
3911 		mutex_exit(&connp->conn_lock);
3912 
3913 		(void) ip_delmulti(group, ipif, B_FALSE, B_TRUE);
3914 		mutex_enter(&connp->conn_lock);
3915 	}
3916 
3917 	/*
3918 	 * If we are the last walker, need to physically delete the
3919 	 * ilgs and repack.
3920 	 */
3921 	ILG_WALKER_RELE(connp);
3922 
3923 	if (connp->conn_multicast_ipif == ipif) {
3924 		/* Revert to late binding */
3925 		connp->conn_multicast_ipif = NULL;
3926 	}
3927 	mutex_exit(&connp->conn_lock);
3928 
3929 	conn_delete_ire(connp, (caddr_t)ipif);
3930 }
3931 
3932 /*
3933  * Called from ill close by ipcl_walk for clearing conn_ilg and
3934  * conn_multicast_ill for a given ill. conn is held by caller.
3935  * Note that ipcl_walk only walks conns that are not yet condemned.
3936  * condemned conns can't be refheld. For this reason, conn must become clean
3937  * first, i.e. it must not refer to any ill/ire/ipif and then only set
3938  * condemned flag.
3939  */
3940 static void
3941 conn_delete_ill(conn_t *connp, caddr_t arg)
3942 {
3943 	ill_t	*ill = (ill_t *)arg;
3944 	int	i;
3945 	char	group_buf[INET6_ADDRSTRLEN];
3946 	in6_addr_t v6group;
3947 	int	orig_ifindex;
3948 	ilg_t	*ilg;
3949 
3950 	/*
3951 	 * Even though conn_ilg_inuse can change while we are in this loop,
3952 	 * no new ilgs can be created/deleted for this connp, on this
3953 	 * ill, since this ill is the perimeter. So we won't miss any ilg
3954 	 * in this cleanup.
3955 	 */
3956 	mutex_enter(&connp->conn_lock);
3957 
3958 	/*
3959 	 * Increment the walker count, so that ilg repacking does not
3960 	 * occur while we are in the loop.
3961 	 */
3962 	ILG_WALKER_HOLD(connp);
3963 	for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) {
3964 		ilg = &connp->conn_ilg[i];
3965 		if ((ilg->ilg_ill == ill) && !(ilg->ilg_flags & ILG_DELETED)) {
3966 			/*
3967 			 * ip_close cannot be cleaning this ilg at the same
3968 			 * time, since it also has to execute in this ill's
3969 			 * perimeter which we are now holding. Only a clean
3970 			 * conn can be condemned.
3971 			 */
3972 			ASSERT(!(connp->conn_state_flags & CONN_CONDEMNED));
3973 
3974 			/* Blow away the membership */
3975 			ip1dbg(("conn_delete_ilg_ill: %s on %s\n",
3976 			    inet_ntop(AF_INET6, &ilg->ilg_v6group,
3977 			    group_buf, sizeof (group_buf)),
3978 			    ill->ill_name));
3979 
3980 			v6group = ilg->ilg_v6group;
3981 			orig_ifindex = ilg->ilg_orig_ifindex;
3982 			ilg_delete(connp, ilg, NULL);
3983 			mutex_exit(&connp->conn_lock);
3984 
3985 			(void) ip_delmulti_v6(&v6group, ill, orig_ifindex,
3986 			    connp->conn_zoneid, B_FALSE, B_TRUE);
3987 			mutex_enter(&connp->conn_lock);
3988 		}
3989 	}
3990 	/*
3991 	 * If we are the last walker, need to physically delete the
3992 	 * ilgs and repack.
3993 	 */
3994 	ILG_WALKER_RELE(connp);
3995 
3996 	if (connp->conn_multicast_ill == ill) {
3997 		/* Revert to late binding */
3998 		connp->conn_multicast_ill = NULL;
3999 		connp->conn_orig_multicast_ifindex = 0;
4000 	}
4001 	mutex_exit(&connp->conn_lock);
4002 }
4003 
4004 /*
4005  * Called when an ipif is unplumbed to make sure that there are no
4006  * dangling conn references to that ipif.
4007  * Handles ilg_ipif and conn_multicast_ipif
4008  */
4009 void
4010 reset_conn_ipif(ipif)
4011 	ipif_t	*ipif;
4012 {
4013 	ipcl_walk(conn_delete_ipif, (caddr_t)ipif);
4014 }
4015 
4016 /*
4017  * Called when an ill is unplumbed to make sure that there are no
4018  * dangling conn references to that ill.
4019  * Handles ilg_ill, conn_multicast_ill.
4020  */
4021 void
4022 reset_conn_ill(ill_t *ill)
4023 {
4024 	ipcl_walk(conn_delete_ill, (caddr_t)ill);
4025 }
4026 
4027 #ifdef DEBUG
4028 /*
4029  * Walk functions walk all the interfaces in the system to make
4030  * sure that there is no refernece to the ipif or ill that is
4031  * going away.
4032  */
4033 int
4034 ilm_walk_ill(ill_t *ill)
4035 {
4036 	int cnt = 0;
4037 	ill_t *till;
4038 	ilm_t *ilm;
4039 	ill_walk_context_t ctx;
4040 
4041 	rw_enter(&ill_g_lock, RW_READER);
4042 	till = ILL_START_WALK_ALL(&ctx);
4043 	for (; till != NULL; till = ill_next(&ctx, till)) {
4044 		for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
4045 			if (ilm->ilm_ill == ill) {
4046 				cnt++;
4047 			}
4048 		}
4049 	}
4050 	rw_exit(&ill_g_lock);
4051 
4052 	return (cnt);
4053 }
4054 
4055 /*
4056  * This function is called before the ipif is freed.
4057  */
4058 int
4059 ilm_walk_ipif(ipif_t *ipif)
4060 {
4061 	int cnt = 0;
4062 	ill_t *till;
4063 	ilm_t *ilm;
4064 	ill_walk_context_t ctx;
4065 
4066 	till = ILL_START_WALK_ALL(&ctx);
4067 	for (; till != NULL; till = ill_next(&ctx, till)) {
4068 		for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
4069 			if (ilm->ilm_ipif == ipif) {
4070 					cnt++;
4071 			}
4072 		}
4073 	}
4074 	return (cnt);
4075 }
4076 #endif
4077