xref: /titanic_51/usr/src/uts/common/inet/ip/ip_multi.c (revision 35551380472894a564e057962b701af78f719377)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 /* Copyright (c) 1990 Mentat Inc. */
27 
28 #pragma ident	"%Z%%M%	%I%	%E% SMI"
29 
30 #include <sys/types.h>
31 #include <sys/stream.h>
32 #include <sys/dlpi.h>
33 #include <sys/stropts.h>
34 #include <sys/strsun.h>
35 #include <sys/strlog.h>
36 #include <sys/ddi.h>
37 #include <sys/cmn_err.h>
38 #include <sys/zone.h>
39 
40 #include <sys/param.h>
41 #include <sys/socket.h>
42 #define	_SUN_TPI_VERSION	2
43 #include <sys/tihdr.h>
44 #include <net/if.h>
45 #include <net/if_arp.h>
46 #include <sys/sockio.h>
47 #include <sys/systm.h>
48 #include <net/route.h>
49 #include <netinet/in.h>
50 #include <net/if_dl.h>
51 #include <netinet/ip6.h>
52 #include <netinet/icmp6.h>
53 
54 #include <inet/common.h>
55 #include <inet/mi.h>
56 #include <inet/nd.h>
57 #include <inet/arp.h>
58 #include <inet/ip.h>
59 #include <inet/ip6.h>
60 #include <inet/ip_if.h>
61 #include <inet/ip_ire.h>
62 #include <inet/ip_ndp.h>
63 #include <inet/ip_multi.h>
64 #include <inet/ipclassifier.h>
65 #include <inet/ipsec_impl.h>
66 #include <inet/sctp_ip.h>
67 #include <inet/ip_listutils.h>
68 #include <inet/udp_impl.h>
69 
70 #include <netinet/igmp.h>
71 
72 /* igmpv3/mldv2 source filter manipulation */
73 static void	ilm_bld_flists(conn_t *conn, void *arg);
74 static void	ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode,
75     slist_t *flist);
76 
77 static ilm_t	*ilm_add_v6(ipif_t *ipif, const in6_addr_t *group,
78     ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist,
79     int orig_ifindex, zoneid_t zoneid);
80 static void	ilm_delete(ilm_t *ilm);
81 static int	ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *group);
82 static int	ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *group);
83 static ilg_t	*ilg_lookup_ill_index_v6(conn_t *connp,
84     const in6_addr_t *v6group, int index);
85 static ilg_t	*ilg_lookup_ipif(conn_t *connp, ipaddr_t group,
86     ipif_t *ipif);
87 static int	ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif,
88     mcast_record_t fmode, ipaddr_t src);
89 static int	ilg_add_v6(conn_t *connp, const in6_addr_t *group, ill_t *ill,
90     mcast_record_t fmode, const in6_addr_t *v6src);
91 static void	ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src);
92 static mblk_t	*ill_create_dl(ill_t *ill, uint32_t dl_primitive,
93     uint32_t length, uint32_t *addr_lenp, uint32_t *addr_offp);
94 static mblk_t	*ill_create_squery(ill_t *ill, ipaddr_t ipaddr,
95     uint32_t addrlen, uint32_t addroff, mblk_t *mp_tail);
96 static void	conn_ilg_reap(conn_t *connp);
97 static int	ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group,
98     ipif_t *ipif, mcast_record_t fmode, ipaddr_t src);
99 static int	ip_opt_delete_group_excl_v6(conn_t *connp,
100     const in6_addr_t *v6group, ill_t *ill, mcast_record_t fmode,
101     const in6_addr_t *v6src);
102 
103 /*
104  * MT notes:
105  *
106  * Multicast joins operate on both the ilg and ilm structures. Multiple
107  * threads operating on an conn (socket) trying to do multicast joins
108  * need to synchronize  when operating on the ilg. Multiple threads
109  * potentially operating on different conn (socket endpoints) trying to
110  * do multicast joins could eventually end up trying to manipulate the
111  * ilm simulatenously and need to synchronize on the access to the ilm.
112  * Both are amenable to standard Solaris MT techniques, but it would be
113  * complex to handle a failover or failback which needs to manipulate
114  * ilg/ilms if an applications can also simultaenously join/leave
115  * multicast groups. Hence multicast join/leave also go through the ipsq_t
116  * serialization.
117  *
118  * Multicast joins and leaves are single-threaded per phyint/IPMP group
119  * using the ipsq serialization mechanism.
120  *
121  * An ilm is an IP data structure used to track multicast join/leave.
122  * An ilm is associated with a <multicast group, ipif> tuple in IPv4 and
123  * with just <multicast group> in IPv6. ilm_refcnt is the number of ilg's
124  * referencing the ilm. ilms are created / destroyed only as writer. ilms
125  * are not passed around, instead they are looked up and used under the
126  * ill_lock or as writer. So we don't need a dynamic refcount of the number
127  * of threads holding reference to an ilm.
128  *
129  * Multicast Join operation:
130  *
131  * The first step is to determine the ipif (v4) or ill (v6) on which
132  * the join operation is to be done. The join is done after becoming
133  * exclusive on the ipsq associated with the ipif or ill. The conn->conn_ilg
134  * and ill->ill_ilm are thus accessed and modified exclusively per ill.
135  * Multiple threads can attempt to join simultaneously on different ipif/ill
136  * on the same conn. In this case the ipsq serialization does not help in
137  * protecting the ilg. It is the conn_lock that is used to protect the ilg.
138  * The conn_lock also protects all the ilg_t members.
139  *
140  * Leave operation.
141  *
142  * Similar to the join operation, the first step is to determine the ipif
143  * or ill (v6) on which the leave operation is to be done. The leave operation
144  * is done after becoming exclusive on the ipsq associated with the ipif or ill.
145  * As with join ilg modification is done under the protection of the conn lock.
146  */
147 
148 #define	IPSQ_ENTER_IPIF(ipif, connp, first_mp, func, ipsq, type)	\
149 	ASSERT(connp != NULL);					\
150 	(ipsq) = ipsq_try_enter((ipif), NULL, CONNP_TO_WQ(connp),	\
151 	    (first_mp), (func), (type), B_TRUE);		\
152 	if ((ipsq) == NULL) {					\
153 		ipif_refrele(ipif);				\
154 		return (EINPROGRESS);				\
155 	}
156 
157 #define	IPSQ_ENTER_ILL(ill, connp, first_mp, func, ipsq, type)	\
158 	ASSERT(connp != NULL);					\
159 	(ipsq) = ipsq_try_enter(NULL, ill, CONNP_TO_WQ(connp),	\
160 	    (first_mp),	(func), (type), B_TRUE);		\
161 	if ((ipsq) == NULL) {					\
162 		ill_refrele(ill);				\
163 		return (EINPROGRESS);				\
164 	}
165 
166 #define	IPSQ_EXIT(ipsq)	\
167 	if (ipsq != NULL)	\
168 		ipsq_exit(ipsq, B_TRUE, B_TRUE);
169 
170 #define	ILG_WALKER_HOLD(connp)	(connp)->conn_ilg_walker_cnt++
171 
172 #define	ILG_WALKER_RELE(connp)				\
173 	{						\
174 		(connp)->conn_ilg_walker_cnt--;		\
175 		if ((connp)->conn_ilg_walker_cnt == 0)	\
176 			conn_ilg_reap(connp);		\
177 	}
178 
179 static void
180 conn_ilg_reap(conn_t *connp)
181 {
182 	int	to;
183 	int	from;
184 
185 	ASSERT(MUTEX_HELD(&connp->conn_lock));
186 
187 	to = 0;
188 	from = 0;
189 	while (from < connp->conn_ilg_inuse) {
190 		if (connp->conn_ilg[from].ilg_flags & ILG_DELETED) {
191 			FREE_SLIST(connp->conn_ilg[from].ilg_filter);
192 			from++;
193 			continue;
194 		}
195 		if (to != from)
196 			connp->conn_ilg[to] = connp->conn_ilg[from];
197 		to++;
198 		from++;
199 	}
200 
201 	connp->conn_ilg_inuse = to;
202 
203 	if (connp->conn_ilg_inuse == 0) {
204 		mi_free((char *)connp->conn_ilg);
205 		connp->conn_ilg = NULL;
206 		cv_broadcast(&connp->conn_refcv);
207 	}
208 }
209 
210 #define	GETSTRUCT(structure, number)	\
211 	((structure *)mi_zalloc(sizeof (structure) * (number)))
212 
213 #define	ILG_ALLOC_CHUNK	16
214 
215 /*
216  * Returns a pointer to the next available ilg in conn_ilg.  Allocs more
217  * buffers in size of ILG_ALLOC_CHUNK ilgs when needed, and updates conn's
218  * ilg tracking fields appropriately (conn_ilg_inuse reflects usage of the
219  * returned ilg).  Returns NULL on failure (ENOMEM).
220  *
221  * Assumes connp->conn_lock is held.
222  */
223 static ilg_t *
224 conn_ilg_alloc(conn_t *connp)
225 {
226 	ilg_t *new;
227 	int curcnt;
228 
229 	ASSERT(MUTEX_HELD(&connp->conn_lock));
230 	ASSERT(connp->conn_ilg_inuse <= connp->conn_ilg_allocated);
231 
232 	if (connp->conn_ilg == NULL) {
233 		connp->conn_ilg = GETSTRUCT(ilg_t, ILG_ALLOC_CHUNK);
234 		if (connp->conn_ilg == NULL)
235 			return (NULL);
236 		connp->conn_ilg_allocated = ILG_ALLOC_CHUNK;
237 		connp->conn_ilg_inuse = 0;
238 	}
239 	if (connp->conn_ilg_inuse == connp->conn_ilg_allocated) {
240 		curcnt = connp->conn_ilg_allocated;
241 		new = GETSTRUCT(ilg_t, curcnt + ILG_ALLOC_CHUNK);
242 		if (new == NULL)
243 			return (NULL);
244 		bcopy(connp->conn_ilg, new, sizeof (ilg_t) * curcnt);
245 		mi_free((char *)connp->conn_ilg);
246 		connp->conn_ilg = new;
247 		connp->conn_ilg_allocated += ILG_ALLOC_CHUNK;
248 	}
249 
250 	return (&connp->conn_ilg[connp->conn_ilg_inuse++]);
251 }
252 
253 typedef struct ilm_fbld_s {
254 	ilm_t		*fbld_ilm;
255 	int		fbld_in_cnt;
256 	int		fbld_ex_cnt;
257 	slist_t		fbld_in;
258 	slist_t		fbld_ex;
259 	boolean_t	fbld_in_overflow;
260 } ilm_fbld_t;
261 
262 static void
263 ilm_bld_flists(conn_t *conn, void *arg)
264 {
265 	int i;
266 	ilm_fbld_t *fbld = (ilm_fbld_t *)(arg);
267 	ilm_t *ilm = fbld->fbld_ilm;
268 	in6_addr_t *v6group = &ilm->ilm_v6addr;
269 
270 	if (conn->conn_ilg_inuse == 0)
271 		return;
272 
273 	/*
274 	 * Since we can't break out of the ipcl_walk once started, we still
275 	 * have to look at every conn.  But if we've already found one
276 	 * (EXCLUDE, NULL) list, there's no need to keep checking individual
277 	 * ilgs--that will be our state.
278 	 */
279 	if (fbld->fbld_ex_cnt > 0 && fbld->fbld_ex.sl_numsrc == 0)
280 		return;
281 
282 	/*
283 	 * Check this conn's ilgs to see if any are interested in our
284 	 * ilm (group, interface match).  If so, update the master
285 	 * include and exclude lists we're building in the fbld struct
286 	 * with this ilg's filter info.
287 	 */
288 	mutex_enter(&conn->conn_lock);
289 	for (i = 0; i < conn->conn_ilg_inuse; i++) {
290 		ilg_t *ilg = &conn->conn_ilg[i];
291 		if ((ilg->ilg_ill == ilm->ilm_ill) &&
292 		    (ilg->ilg_ipif == ilm->ilm_ipif) &&
293 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) {
294 			if (ilg->ilg_fmode == MODE_IS_INCLUDE) {
295 				fbld->fbld_in_cnt++;
296 				if (!fbld->fbld_in_overflow)
297 					l_union_in_a(&fbld->fbld_in,
298 					    ilg->ilg_filter,
299 					    &fbld->fbld_in_overflow);
300 			} else {
301 				fbld->fbld_ex_cnt++;
302 				/*
303 				 * On the first exclude list, don't try to do
304 				 * an intersection, as the master exclude list
305 				 * is intentionally empty.  If the master list
306 				 * is still empty on later iterations, that
307 				 * means we have at least one ilg with an empty
308 				 * exclude list, so that should be reflected
309 				 * when we take the intersection.
310 				 */
311 				if (fbld->fbld_ex_cnt == 1) {
312 					if (ilg->ilg_filter != NULL)
313 						l_copy(ilg->ilg_filter,
314 						    &fbld->fbld_ex);
315 				} else {
316 					l_intersection_in_a(&fbld->fbld_ex,
317 					    ilg->ilg_filter);
318 				}
319 			}
320 			/* there will only be one match, so break now. */
321 			break;
322 		}
323 	}
324 	mutex_exit(&conn->conn_lock);
325 }
326 
327 static void
328 ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, slist_t *flist)
329 {
330 	ilm_fbld_t fbld;
331 
332 	fbld.fbld_ilm = ilm;
333 	fbld.fbld_in_cnt = fbld.fbld_ex_cnt = 0;
334 	fbld.fbld_in.sl_numsrc = fbld.fbld_ex.sl_numsrc = 0;
335 	fbld.fbld_in_overflow = B_FALSE;
336 
337 	/* first, construct our master include and exclude lists */
338 	ipcl_walk(ilm_bld_flists, (caddr_t)&fbld);
339 
340 	/* now use those master lists to generate the interface filter */
341 
342 	/* if include list overflowed, filter is (EXCLUDE, NULL) */
343 	if (fbld.fbld_in_overflow) {
344 		*fmode = MODE_IS_EXCLUDE;
345 		flist->sl_numsrc = 0;
346 		return;
347 	}
348 
349 	/* if nobody interested, interface filter is (INCLUDE, NULL) */
350 	if (fbld.fbld_in_cnt == 0 && fbld.fbld_ex_cnt == 0) {
351 		*fmode = MODE_IS_INCLUDE;
352 		flist->sl_numsrc = 0;
353 		return;
354 	}
355 
356 	/*
357 	 * If there are no exclude lists, then the interface filter
358 	 * is INCLUDE, with its filter list equal to fbld_in.  A single
359 	 * exclude list makes the interface filter EXCLUDE, with its
360 	 * filter list equal to (fbld_ex - fbld_in).
361 	 */
362 	if (fbld.fbld_ex_cnt == 0) {
363 		*fmode = MODE_IS_INCLUDE;
364 		l_copy(&fbld.fbld_in, flist);
365 	} else {
366 		*fmode = MODE_IS_EXCLUDE;
367 		l_difference(&fbld.fbld_ex, &fbld.fbld_in, flist);
368 	}
369 }
370 
371 /*
372  * If the given interface has failed, choose a new one to join on so
373  * that we continue to receive packets.  ilg_orig_ifindex remembers
374  * what the application used to join on so that we know the ilg to
375  * delete even though we change the ill here.  Callers will store the
376  * ilg returned from this function in ilg_ill.  Thus when we receive
377  * a packet on ilg_ill, conn_wantpacket_v6 will deliver the packets.
378  *
379  * This function must be called as writer so we can walk the group
380  * list and examine flags without holding a lock.
381  */
382 ill_t *
383 ip_choose_multi_ill(ill_t *ill, const in6_addr_t *grp)
384 {
385 	ill_t	*till;
386 	ill_group_t *illgrp = ill->ill_group;
387 
388 	ASSERT(IAM_WRITER_ILL(ill));
389 
390 	if (IN6_IS_ADDR_UNSPECIFIED(grp) || illgrp == NULL)
391 		return (ill);
392 
393 	if ((ill->ill_phyint->phyint_flags & (PHYI_FAILED|PHYI_INACTIVE)) == 0)
394 		return (ill);
395 
396 	till = illgrp->illgrp_ill;
397 	while (till != NULL &&
398 	    (till->ill_phyint->phyint_flags & (PHYI_FAILED|PHYI_INACTIVE))) {
399 		till = till->ill_group_next;
400 	}
401 	if (till != NULL)
402 		return (till);
403 
404 	return (ill);
405 }
406 
407 static int
408 ilm_update_add(ilm_t *ilm, ilg_stat_t ilgstat, slist_t *ilg_flist,
409     boolean_t isv6)
410 {
411 	mcast_record_t fmode;
412 	slist_t *flist;
413 	boolean_t fdefault;
414 	char buf[INET6_ADDRSTRLEN];
415 	ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill;
416 
417 	/*
418 	 * There are several cases where the ilm's filter state
419 	 * defaults to (EXCLUDE, NULL):
420 	 *	- we've had previous joins without associated ilgs
421 	 *	- this join has no associated ilg
422 	 *	- the ilg's filter state is (EXCLUDE, NULL)
423 	 */
424 	fdefault = (ilm->ilm_no_ilg_cnt > 0) ||
425 	    (ilgstat == ILGSTAT_NONE) || SLIST_IS_EMPTY(ilg_flist);
426 
427 	/* attempt mallocs (if needed) before doing anything else */
428 	if ((flist = l_alloc()) == NULL)
429 		return (ENOMEM);
430 	if (!fdefault && ilm->ilm_filter == NULL) {
431 		ilm->ilm_filter = l_alloc();
432 		if (ilm->ilm_filter == NULL) {
433 			l_free(flist);
434 			return (ENOMEM);
435 		}
436 	}
437 
438 	if (ilgstat != ILGSTAT_CHANGE)
439 		ilm->ilm_refcnt++;
440 
441 	if (ilgstat == ILGSTAT_NONE)
442 		ilm->ilm_no_ilg_cnt++;
443 
444 	/*
445 	 * Determine new filter state.  If it's not the default
446 	 * (EXCLUDE, NULL), we must walk the conn list to find
447 	 * any ilgs interested in this group, and re-build the
448 	 * ilm filter.
449 	 */
450 	if (fdefault) {
451 		fmode = MODE_IS_EXCLUDE;
452 		flist->sl_numsrc = 0;
453 	} else {
454 		ilm_gen_filter(ilm, &fmode, flist);
455 	}
456 
457 	/* make sure state actually changed; nothing to do if not. */
458 	if ((ilm->ilm_fmode == fmode) &&
459 	    !lists_are_different(ilm->ilm_filter, flist)) {
460 		l_free(flist);
461 		return (0);
462 	}
463 
464 	/* send the state change report */
465 	if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0) {
466 		if (isv6)
467 			mld_statechange(ilm, fmode, flist);
468 		else
469 			igmp_statechange(ilm, fmode, flist);
470 	}
471 
472 	/* update the ilm state */
473 	ilm->ilm_fmode = fmode;
474 	if (flist->sl_numsrc > 0)
475 		l_copy(flist, ilm->ilm_filter);
476 	else
477 		CLEAR_SLIST(ilm->ilm_filter);
478 
479 	ip1dbg(("ilm_update: new if filter mode %d, group %s\n", ilm->ilm_fmode,
480 	    inet_ntop(AF_INET6, &ilm->ilm_v6addr, buf, sizeof (buf))));
481 
482 	l_free(flist);
483 	return (0);
484 }
485 
486 static int
487 ilm_update_del(ilm_t *ilm, boolean_t isv6)
488 {
489 	mcast_record_t fmode;
490 	slist_t *flist;
491 	ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill;
492 
493 	ip1dbg(("ilm_update_del: still %d left; updating state\n",
494 	    ilm->ilm_refcnt));
495 
496 	if ((flist = l_alloc()) == NULL)
497 		return (ENOMEM);
498 
499 	/*
500 	 * If present, the ilg in question has already either been
501 	 * updated or removed from our list; so all we need to do
502 	 * now is walk the list to update the ilm filter state.
503 	 *
504 	 * Skip the list walk if we have any no-ilg joins, which
505 	 * cause the filter state to revert to (EXCLUDE, NULL).
506 	 */
507 	if (ilm->ilm_no_ilg_cnt != 0) {
508 		fmode = MODE_IS_EXCLUDE;
509 		flist->sl_numsrc = 0;
510 	} else {
511 		ilm_gen_filter(ilm, &fmode, flist);
512 	}
513 
514 	/* check to see if state needs to be updated */
515 	if ((ilm->ilm_fmode == fmode) &&
516 	    (!lists_are_different(ilm->ilm_filter, flist))) {
517 		l_free(flist);
518 		return (0);
519 	}
520 
521 	if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0) {
522 		if (isv6)
523 			mld_statechange(ilm, fmode, flist);
524 		else
525 			igmp_statechange(ilm, fmode, flist);
526 	}
527 
528 	ilm->ilm_fmode = fmode;
529 	if (flist->sl_numsrc > 0) {
530 		if (ilm->ilm_filter == NULL) {
531 			ilm->ilm_filter = l_alloc();
532 			if (ilm->ilm_filter == NULL) {
533 				char buf[INET6_ADDRSTRLEN];
534 				ip1dbg(("ilm_update_del: failed to alloc ilm "
535 				    "filter; no source filtering for %s on %s",
536 				    inet_ntop(AF_INET6, &ilm->ilm_v6addr,
537 				    buf, sizeof (buf)), ill->ill_name));
538 				ilm->ilm_fmode = MODE_IS_EXCLUDE;
539 				l_free(flist);
540 				return (0);
541 			}
542 		}
543 		l_copy(flist, ilm->ilm_filter);
544 	} else {
545 		CLEAR_SLIST(ilm->ilm_filter);
546 	}
547 
548 	l_free(flist);
549 	return (0);
550 }
551 
552 /*
553  * INADDR_ANY means all multicast addresses. This is only used
554  * by the multicast router.
555  * INADDR_ANY is stored as IPv6 unspecified addr.
556  */
557 int
558 ip_addmulti(ipaddr_t group, ipif_t *ipif, ilg_stat_t ilgstat,
559     mcast_record_t ilg_fmode, slist_t *ilg_flist)
560 {
561 	ill_t	*ill = ipif->ipif_ill;
562 	ilm_t 	*ilm;
563 	in6_addr_t v6group;
564 	int	ret;
565 
566 	ASSERT(IAM_WRITER_IPIF(ipif));
567 
568 	if (!CLASSD(group) && group != INADDR_ANY)
569 		return (EINVAL);
570 
571 	/*
572 	 * INADDR_ANY is represented as the IPv6 unspecifed addr.
573 	 */
574 	if (group == INADDR_ANY)
575 		v6group = ipv6_all_zeros;
576 	else
577 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
578 
579 	ilm = ilm_lookup_ipif(ipif, group);
580 	if (ilm != NULL)
581 		return (ilm_update_add(ilm, ilgstat, ilg_flist, B_FALSE));
582 
583 	/*
584 	 * ilms are associated with ipifs in IPv4. It moves with the
585 	 * ipif if the ipif moves to a new ill when the interface
586 	 * fails. Thus we really don't check whether the ipif_ill
587 	 * has failed like in IPv6. If it has FAILED the ipif
588 	 * will move (daemon will move it) and hence the ilm, if the
589 	 * ipif is not IPIF_NOFAILOVER. For the IPIF_NOFAILOVER ipifs,
590 	 * we continue to receive in the same place even if the
591 	 * interface fails.
592 	 */
593 	ilm = ilm_add_v6(ipif, &v6group, ilgstat, ilg_fmode, ilg_flist,
594 	    ill->ill_phyint->phyint_ifindex, ipif->ipif_zoneid);
595 	if (ilm == NULL)
596 		return (ENOMEM);
597 
598 	if (group == INADDR_ANY) {
599 		/*
600 		 * Check how many ipif's have members in this group -
601 		 * if more then one we should not tell the driver to join
602 		 * this time
603 		 */
604 		if (ilm_numentries_v6(ill, &v6group) > 1)
605 			return (0);
606 		if (ill->ill_group == NULL)
607 			ret = ip_join_allmulti(ipif);
608 		else
609 			ret = ill_nominate_mcast_rcv(ill->ill_group);
610 		if (ret != 0)
611 			ilm_delete(ilm);
612 		return (ret);
613 	}
614 
615 	if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0)
616 		igmp_joingroup(ilm);
617 
618 	if (ilm_numentries_v6(ill, &v6group) > 1)
619 		return (0);
620 
621 	ret = ip_ll_addmulti_v6(ipif, &v6group);
622 	if (ret != 0)
623 		ilm_delete(ilm);
624 	return (ret);
625 }
626 
627 /*
628  * The unspecified address means all multicast addresses.
629  * This is only used by the multicast router.
630  *
631  * ill identifies the interface to join on; it may not match the
632  * interface requested by the application of a failover has taken
633  * place.  orig_ifindex always identifies the interface requested
634  * by the app.
635  *
636  * ilgstat tells us if there's an ilg associated with this join,
637  * and if so, if it's a new ilg or a change to an existing one.
638  * ilg_fmode and ilg_flist give us the current filter state of
639  * the ilg (and will be EXCLUDE {NULL} in the case of no ilg).
640  */
641 int
642 ip_addmulti_v6(const in6_addr_t *v6group, ill_t *ill, int orig_ifindex,
643     zoneid_t zoneid, ilg_stat_t ilgstat, mcast_record_t ilg_fmode,
644     slist_t *ilg_flist)
645 {
646 	ilm_t	*ilm;
647 	int	ret;
648 
649 	ASSERT(IAM_WRITER_ILL(ill));
650 
651 	if (!IN6_IS_ADDR_MULTICAST(v6group) &&
652 	    !IN6_IS_ADDR_UNSPECIFIED(v6group)) {
653 		return (EINVAL);
654 	}
655 
656 	/*
657 	 * An ilm is uniquely identified by the tuple of (group, ill,
658 	 * orig_ill).  group is the multicast group address, ill is
659 	 * the interface on which it is currently joined, and orig_ill
660 	 * is the interface on which the application requested the
661 	 * join.  orig_ill and ill are the same unless orig_ill has
662 	 * failed over.
663 	 *
664 	 * Both orig_ill and ill are required, which means we may have
665 	 * 2 ilms on an ill for the same group, but with different
666 	 * orig_ills.  These must be kept separate, so that when failback
667 	 * occurs, the appropriate ilms are moved back to their orig_ill
668 	 * without disrupting memberships on the ill to which they had
669 	 * been moved.
670 	 *
671 	 * In order to track orig_ill, we store orig_ifindex in the
672 	 * ilm and ilg.
673 	 */
674 	ilm = ilm_lookup_ill_index_v6(ill, v6group, orig_ifindex, zoneid);
675 	if (ilm != NULL)
676 		return (ilm_update_add(ilm, ilgstat, ilg_flist, B_TRUE));
677 
678 	/*
679 	 * We need to remember where the application really wanted
680 	 * to join. This will be used later if we want to failback
681 	 * to the original interface.
682 	 */
683 	ilm = ilm_add_v6(ill->ill_ipif, v6group, ilgstat, ilg_fmode,
684 	    ilg_flist, orig_ifindex, zoneid);
685 	if (ilm == NULL)
686 		return (ENOMEM);
687 
688 	if (IN6_IS_ADDR_UNSPECIFIED(v6group)) {
689 		/*
690 		 * Check how many ipif's that have members in this group -
691 		 * if more then one we should not tell the driver to join
692 		 * this time
693 		 */
694 		if (ilm_numentries_v6(ill, v6group) > 1)
695 			return (0);
696 		if (ill->ill_group == NULL)
697 			ret = ip_join_allmulti(ill->ill_ipif);
698 		else
699 			ret = ill_nominate_mcast_rcv(ill->ill_group);
700 
701 		if (ret != 0)
702 			ilm_delete(ilm);
703 		return (ret);
704 	}
705 
706 	if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0)
707 		mld_joingroup(ilm);
708 
709 	/*
710 	 * If we have more then one we should not tell the driver
711 	 * to join this time.
712 	 */
713 	if (ilm_numentries_v6(ill, v6group) > 1)
714 		return (0);
715 
716 	ret = ip_ll_addmulti_v6(ill->ill_ipif, v6group);
717 	if (ret != 0)
718 		ilm_delete(ilm);
719 	return (ret);
720 }
721 
722 /*
723  * Send a multicast request to the driver for enabling multicast reception
724  * for v6groupp address. The caller has already checked whether it is
725  * appropriate to send one or not.
726  */
727 int
728 ip_ll_send_enabmulti_req(ill_t *ill, const in6_addr_t *v6groupp)
729 {
730 	mblk_t	*mp;
731 	uint32_t addrlen, addroff;
732 	char	group_buf[INET6_ADDRSTRLEN];
733 
734 	ASSERT(IAM_WRITER_ILL(ill));
735 
736 	/*
737 	 * Create a AR_ENTRY_SQUERY message with a dl_enabmulti_req tacked
738 	 * on.
739 	 */
740 	mp = ill_create_dl(ill, DL_ENABMULTI_REQ, sizeof (dl_enabmulti_req_t),
741 	    &addrlen, &addroff);
742 	if (!mp)
743 		return (ENOMEM);
744 	if (IN6_IS_ADDR_V4MAPPED(v6groupp)) {
745 		ipaddr_t v4group;
746 
747 		IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group);
748 		/*
749 		 * NOTE!!!
750 		 * The "addroff" passed in here was calculated by
751 		 * ill_create_dl(), and will be used by ill_create_squery()
752 		 * to perform some twisted coding magic. It is the offset
753 		 * into the dl_xxx_req of the hw addr. Here, it will be
754 		 * added to b_wptr - b_rptr to create a magic number that
755 		 * is not an offset into this squery mblk.
756 		 * The actual hardware address will be accessed only in the
757 		 * dl_xxx_req, not in the squery. More importantly,
758 		 * that hardware address can *only* be accessed in this
759 		 * mblk chain by calling mi_offset_param_c(), which uses
760 		 * the magic number in the squery hw offset field to go
761 		 * to the *next* mblk (the dl_xxx_req), subtract the
762 		 * (b_wptr - b_rptr), and find the actual offset into
763 		 * the dl_xxx_req.
764 		 * Any method that depends on using the
765 		 * offset field in the dl_disabmulti_req or squery
766 		 * to find either hardware address will similarly fail.
767 		 *
768 		 * Look in ar_entry_squery() in arp.c to see how this offset
769 		 * is used.
770 		 */
771 		mp = ill_create_squery(ill, v4group, addrlen, addroff, mp);
772 		if (!mp)
773 			return (ENOMEM);
774 		ip1dbg(("ip_ll_send_enabmulti_req: IPv4 putnext %s on %s\n",
775 		    inet_ntop(AF_INET6, v6groupp, group_buf,
776 		    sizeof (group_buf)),
777 		    ill->ill_name));
778 		putnext(ill->ill_rq, mp);
779 	} else {
780 		ip1dbg(("ip_ll_send_enabmulti_req: IPv6 ndp_squery_mp %s on"
781 		    " %s\n",
782 		    inet_ntop(AF_INET6, v6groupp, group_buf,
783 		    sizeof (group_buf)),
784 		    ill->ill_name));
785 		return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp));
786 	}
787 	return (0);
788 }
789 
790 /*
791  * Send a multicast request to the driver for enabling multicast
792  * membership for v6group if appropriate.
793  */
794 static int
795 ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *v6groupp)
796 {
797 	ill_t	*ill = ipif->ipif_ill;
798 
799 	ASSERT(IAM_WRITER_IPIF(ipif));
800 
801 	if (ill->ill_net_type != IRE_IF_RESOLVER ||
802 	    ipif->ipif_flags & IPIF_POINTOPOINT) {
803 		ip1dbg(("ip_ll_addmulti_v6: not resolver\n"));
804 		return (0);	/* Must be IRE_IF_NORESOLVER */
805 	}
806 
807 	if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) {
808 		ip1dbg(("ip_ll_addmulti_v6: MULTI_BCAST\n"));
809 		return (0);
810 	}
811 	if (ill->ill_ipif_up_count == 0) {
812 		/*
813 		 * Nobody there. All multicast addresses will be re-joined
814 		 * when we get the DL_BIND_ACK bringing the interface up.
815 		 */
816 		ip1dbg(("ip_ll_addmulti_v6: nobody up\n"));
817 		return (0);
818 	}
819 	return (ip_ll_send_enabmulti_req(ill, v6groupp));
820 }
821 
822 /*
823  * INADDR_ANY means all multicast addresses. This is only used
824  * by the multicast router.
825  * INADDR_ANY is stored as the IPv6 unspecifed addr.
826  */
827 int
828 ip_delmulti(ipaddr_t group, ipif_t *ipif, boolean_t no_ilg, boolean_t leaving)
829 {
830 	ill_t	*ill = ipif->ipif_ill;
831 	ilm_t *ilm;
832 	in6_addr_t v6group;
833 	int	ret;
834 
835 	ASSERT(IAM_WRITER_IPIF(ipif));
836 
837 	if (!CLASSD(group) && group != INADDR_ANY)
838 		return (EINVAL);
839 
840 	/*
841 	 * INADDR_ANY is represented as the IPv6 unspecifed addr.
842 	 */
843 	if (group == INADDR_ANY)
844 		v6group = ipv6_all_zeros;
845 	else
846 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
847 
848 	/*
849 	 * Look for a match on the ipif.
850 	 * (IP_DROP_MEMBERSHIP specifies an ipif using an IP address).
851 	 */
852 	ilm = ilm_lookup_ipif(ipif, group);
853 	if (ilm == NULL)
854 		return (ENOENT);
855 
856 	/* Update counters */
857 	if (no_ilg)
858 		ilm->ilm_no_ilg_cnt--;
859 
860 	if (leaving)
861 		ilm->ilm_refcnt--;
862 
863 	if (ilm->ilm_refcnt > 0)
864 		return (ilm_update_del(ilm, B_FALSE));
865 
866 	if (group == INADDR_ANY) {
867 		ilm_delete(ilm);
868 		/*
869 		 * Check how many ipif's that have members in this group -
870 		 * if there are still some left then don't tell the driver
871 		 * to drop it.
872 		 */
873 		if (ilm_numentries_v6(ill, &v6group) != 0)
874 			return (0);
875 
876 		/*
877 		 * If we never joined, then don't leave.  This can happen
878 		 * if we're in an IPMP group, since only one ill per IPMP
879 		 * group receives all multicast packets.
880 		 */
881 		if (!ill->ill_join_allmulti) {
882 			ASSERT(ill->ill_group != NULL);
883 			return (0);
884 		}
885 
886 		ret = ip_leave_allmulti(ipif);
887 		if (ill->ill_group != NULL)
888 			(void) ill_nominate_mcast_rcv(ill->ill_group);
889 		return (ret);
890 	}
891 
892 	if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0)
893 		igmp_leavegroup(ilm);
894 
895 	ilm_delete(ilm);
896 	/*
897 	 * Check how many ipif's that have members in this group -
898 	 * if there are still some left then don't tell the driver
899 	 * to drop it.
900 	 */
901 	if (ilm_numentries_v6(ill, &v6group) != 0)
902 		return (0);
903 	return (ip_ll_delmulti_v6(ipif, &v6group));
904 }
905 
906 /*
907  * The unspecified address means all multicast addresses.
908  * This is only used by the multicast router.
909  */
910 int
911 ip_delmulti_v6(const in6_addr_t *v6group, ill_t *ill, int orig_ifindex,
912     zoneid_t zoneid, boolean_t no_ilg, boolean_t leaving)
913 {
914 	ipif_t	*ipif;
915 	ilm_t *ilm;
916 	int	ret;
917 
918 	ASSERT(IAM_WRITER_ILL(ill));
919 
920 	if (!IN6_IS_ADDR_MULTICAST(v6group) &&
921 	    !IN6_IS_ADDR_UNSPECIFIED(v6group))
922 		return (EINVAL);
923 
924 	/*
925 	 * Look for a match on the ill.
926 	 * (IPV6_LEAVE_GROUP specifies an ill using an ifindex).
927 	 *
928 	 * Similar to ip_addmulti_v6, we should always look using
929 	 * the orig_ifindex.
930 	 *
931 	 * 1) If orig_ifindex is different from ill's ifindex
932 	 *    we should have an ilm with orig_ifindex created in
933 	 *    ip_addmulti_v6. We should delete that here.
934 	 *
935 	 * 2) If orig_ifindex is same as ill's ifindex, we should
936 	 *    not delete the ilm that is temporarily here because of
937 	 *    a FAILOVER. Those ilms will have a ilm_orig_ifindex
938 	 *    different from ill's ifindex.
939 	 *
940 	 * Thus, always lookup using orig_ifindex.
941 	 */
942 	ilm = ilm_lookup_ill_index_v6(ill, v6group, orig_ifindex, zoneid);
943 	if (ilm == NULL)
944 		return (ENOENT);
945 
946 	ASSERT(ilm->ilm_ill == ill);
947 
948 	ipif = ill->ill_ipif;
949 
950 	/* Update counters */
951 	if (no_ilg)
952 		ilm->ilm_no_ilg_cnt--;
953 
954 	if (leaving)
955 		ilm->ilm_refcnt--;
956 
957 	if (ilm->ilm_refcnt > 0)
958 		return (ilm_update_del(ilm, B_TRUE));
959 
960 	if (IN6_IS_ADDR_UNSPECIFIED(v6group)) {
961 		ilm_delete(ilm);
962 		/*
963 		 * Check how many ipif's that have members in this group -
964 		 * if there are still some left then don't tell the driver
965 		 * to drop it.
966 		 */
967 		if (ilm_numentries_v6(ill, v6group) != 0)
968 			return (0);
969 
970 		/*
971 		 * If we never joined, then don't leave.  This can happen
972 		 * if we're in an IPMP group, since only one ill per IPMP
973 		 * group receives all multicast packets.
974 		 */
975 		if (!ill->ill_join_allmulti) {
976 			ASSERT(ill->ill_group != NULL);
977 			return (0);
978 		}
979 
980 		ret = ip_leave_allmulti(ipif);
981 		if (ill->ill_group != NULL)
982 			(void) ill_nominate_mcast_rcv(ill->ill_group);
983 		return (ret);
984 	}
985 
986 	if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0)
987 		mld_leavegroup(ilm);
988 
989 	ilm_delete(ilm);
990 	/*
991 	 * Check how many ipif's that have members in this group -
992 	 * if there are still some left then don't tell the driver
993 	 * to drop it.
994 	 */
995 	if (ilm_numentries_v6(ill, v6group) != 0)
996 		return (0);
997 	return (ip_ll_delmulti_v6(ipif, v6group));
998 }
999 
1000 /*
1001  * Send a multicast request to the driver for disabling multicast reception
1002  * for v6groupp address. The caller has already checked whether it is
1003  * appropriate to send one or not.
1004  */
1005 int
1006 ip_ll_send_disabmulti_req(ill_t *ill, const in6_addr_t *v6groupp)
1007 {
1008 	mblk_t	*mp;
1009 	char	group_buf[INET6_ADDRSTRLEN];
1010 	uint32_t	addrlen, addroff;
1011 
1012 	ASSERT(IAM_WRITER_ILL(ill));
1013 	/*
1014 	 * Create a AR_ENTRY_SQUERY message with a dl_disabmulti_req tacked
1015 	 * on.
1016 	 */
1017 	mp = ill_create_dl(ill, DL_DISABMULTI_REQ,
1018 	    sizeof (dl_disabmulti_req_t), &addrlen, &addroff);
1019 
1020 	if (!mp)
1021 		return (ENOMEM);
1022 
1023 	if (IN6_IS_ADDR_V4MAPPED(v6groupp)) {
1024 		ipaddr_t v4group;
1025 
1026 		IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group);
1027 		/*
1028 		 * NOTE!!!
1029 		 * The "addroff" passed in here was calculated by
1030 		 * ill_create_dl(), and will be used by ill_create_squery()
1031 		 * to perform some twisted coding magic. It is the offset
1032 		 * into the dl_xxx_req of the hw addr. Here, it will be
1033 		 * added to b_wptr - b_rptr to create a magic number that
1034 		 * is not an offset into this mblk.
1035 		 *
1036 		 * Please see the comment in ip_ll_send)enabmulti_req()
1037 		 * for a complete explanation.
1038 		 *
1039 		 * Look in ar_entry_squery() in arp.c to see how this offset
1040 		 * is used.
1041 		 */
1042 		mp = ill_create_squery(ill, v4group, addrlen, addroff, mp);
1043 		if (!mp)
1044 			return (ENOMEM);
1045 		ip1dbg(("ip_ll_send_disabmulti_req: IPv4 putnext %s on %s\n",
1046 		    inet_ntop(AF_INET6, v6groupp, group_buf,
1047 		    sizeof (group_buf)),
1048 		    ill->ill_name));
1049 		putnext(ill->ill_rq, mp);
1050 	} else {
1051 		ip1dbg(("ip_ll_send_disabmulti_req: IPv6 ndp_squery_mp %s on"
1052 		    " %s\n",
1053 		    inet_ntop(AF_INET6, v6groupp, group_buf,
1054 		    sizeof (group_buf)),
1055 		    ill->ill_name));
1056 		return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp));
1057 	}
1058 	return (0);
1059 }
1060 
1061 /*
1062  * Send a multicast request to the driver for disabling multicast
1063  * membership for v6group if appropriate.
1064  */
1065 static int
1066 ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *v6group)
1067 {
1068 	ill_t	*ill = ipif->ipif_ill;
1069 
1070 	ASSERT(IAM_WRITER_IPIF(ipif));
1071 
1072 	if (ill->ill_net_type != IRE_IF_RESOLVER ||
1073 	    ipif->ipif_flags & IPIF_POINTOPOINT) {
1074 		return (0);	/* Must be IRE_IF_NORESOLVER */
1075 	}
1076 	if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) {
1077 		ip1dbg(("ip_ll_delmulti_v6: MULTI_BCAST\n"));
1078 		return (0);
1079 	}
1080 	if (ill->ill_ipif_up_count == 0) {
1081 		/*
1082 		 * Nobody there. All multicast addresses will be re-joined
1083 		 * when we get the DL_BIND_ACK bringing the interface up.
1084 		 */
1085 		ip1dbg(("ip_ll_delmulti_v6: nobody up\n"));
1086 		return (0);
1087 	}
1088 	return (ip_ll_send_disabmulti_req(ill, v6group));
1089 }
1090 
1091 /*
1092  * Make the driver pass up all multicast packets
1093  *
1094  * With ill groups, the caller makes sure that there is only
1095  * one ill joining the allmulti group.
1096  */
1097 int
1098 ip_join_allmulti(ipif_t *ipif)
1099 {
1100 	ill_t	*ill = ipif->ipif_ill;
1101 	mblk_t	*mp;
1102 	uint32_t	addrlen, addroff;
1103 
1104 	ASSERT(IAM_WRITER_IPIF(ipif));
1105 
1106 	if (ill->ill_ipif_up_count == 0) {
1107 		/*
1108 		 * Nobody there. All multicast addresses will be re-joined
1109 		 * when we get the DL_BIND_ACK bringing the interface up.
1110 		 */
1111 		return (0);
1112 	}
1113 
1114 	ASSERT(!ill->ill_join_allmulti);
1115 
1116 	/*
1117 	 * Create a DL_PROMISCON_REQ message and send it directly to
1118 	 * the DLPI provider.  We don't need to do this for certain
1119 	 * media types for which we never need to turn promiscuous
1120 	 * mode on.
1121 	 */
1122 	if ((ill->ill_net_type == IRE_IF_RESOLVER) &&
1123 	    !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) {
1124 		mp = ill_create_dl(ill, DL_PROMISCON_REQ,
1125 		    sizeof (dl_promiscon_req_t), &addrlen, &addroff);
1126 		if (mp == NULL)
1127 			return (ENOMEM);
1128 		putnext(ill->ill_wq, mp);
1129 	}
1130 
1131 	mutex_enter(&ill->ill_lock);
1132 	ill->ill_join_allmulti = B_TRUE;
1133 	mutex_exit(&ill->ill_lock);
1134 	return (0);
1135 }
1136 
1137 /*
1138  * Make the driver stop passing up all multicast packets
1139  *
1140  * With ill groups, we need to nominate some other ill as
1141  * this ipif->ipif_ill is leaving the group.
1142  */
1143 int
1144 ip_leave_allmulti(ipif_t *ipif)
1145 {
1146 	ill_t	*ill = ipif->ipif_ill;
1147 	mblk_t	*mp;
1148 	uint32_t	addrlen, addroff;
1149 
1150 	ASSERT(IAM_WRITER_IPIF(ipif));
1151 
1152 	if (ill->ill_ipif_up_count == 0) {
1153 		/*
1154 		 * Nobody there. All multicast addresses will be re-joined
1155 		 * when we get the DL_BIND_ACK bringing the interface up.
1156 		 */
1157 		return (0);
1158 	}
1159 
1160 	ASSERT(ill->ill_join_allmulti);
1161 
1162 	/*
1163 	 * Create a DL_PROMISCOFF_REQ message and send it directly to
1164 	 * the DLPI provider.  We don't need to do this for certain
1165 	 * media types for which we never need to turn promiscuous
1166 	 * mode on.
1167 	 */
1168 	if ((ill->ill_net_type == IRE_IF_RESOLVER) &&
1169 	    !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) {
1170 		mp = ill_create_dl(ill, DL_PROMISCOFF_REQ,
1171 		    sizeof (dl_promiscoff_req_t), &addrlen, &addroff);
1172 		if (mp == NULL)
1173 			return (ENOMEM);
1174 		putnext(ill->ill_wq, mp);
1175 	}
1176 
1177 	mutex_enter(&ill->ill_lock);
1178 	ill->ill_join_allmulti = B_FALSE;
1179 	mutex_exit(&ill->ill_lock);
1180 	return (0);
1181 }
1182 
1183 /*
1184  * Copy mp_orig and pass it in as a local message.
1185  */
1186 void
1187 ip_multicast_loopback(queue_t *q, ill_t *ill, mblk_t *mp_orig, int fanout_flags,
1188     zoneid_t zoneid)
1189 {
1190 	mblk_t	*mp;
1191 	mblk_t	*ipsec_mp;
1192 
1193 	if (DB_TYPE(mp_orig) == M_DATA &&
1194 	    ((ipha_t *)mp_orig->b_rptr)->ipha_protocol == IPPROTO_UDP) {
1195 		uint_t hdrsz;
1196 
1197 		hdrsz = IPH_HDR_LENGTH((ipha_t *)mp_orig->b_rptr) +
1198 		    sizeof (udpha_t);
1199 		ASSERT(MBLKL(mp_orig) >= hdrsz);
1200 
1201 		if (((mp = allocb(hdrsz, BPRI_MED)) != NULL) &&
1202 		    (mp_orig = dupmsg(mp_orig)) != NULL) {
1203 			bcopy(mp_orig->b_rptr, mp->b_rptr, hdrsz);
1204 			mp->b_wptr += hdrsz;
1205 			mp->b_cont = mp_orig;
1206 			mp_orig->b_rptr += hdrsz;
1207 			if (MBLKL(mp_orig) == 0) {
1208 				mp->b_cont = mp_orig->b_cont;
1209 				mp_orig->b_cont = NULL;
1210 				freeb(mp_orig);
1211 			}
1212 		} else if (mp != NULL) {
1213 			freeb(mp);
1214 			mp = NULL;
1215 		}
1216 	} else {
1217 		mp = ip_copymsg(mp_orig);
1218 	}
1219 
1220 	if (mp == NULL)
1221 		return;
1222 	if (DB_TYPE(mp) == M_CTL) {
1223 		ipsec_mp = mp;
1224 		mp = mp->b_cont;
1225 	} else {
1226 		ipsec_mp = mp;
1227 	}
1228 	ip_wput_local(q, ill, (ipha_t *)mp->b_rptr, ipsec_mp, NULL,
1229 	    fanout_flags, zoneid);
1230 }
1231 
1232 static area_t	ip_aresq_template = {
1233 	AR_ENTRY_SQUERY,		/* cmd */
1234 	sizeof (area_t)+IP_ADDR_LEN,	/* name offset */
1235 	sizeof (area_t),	/* name len (filled by ill_arp_alloc) */
1236 	IP_ARP_PROTO_TYPE,		/* protocol, from arps perspective */
1237 	sizeof (area_t),			/* proto addr offset */
1238 	IP_ADDR_LEN,			/* proto addr_length */
1239 	0,				/* proto mask offset */
1240 	/* Rest is initialized when used */
1241 	0,				/* flags */
1242 	0,				/* hw addr offset */
1243 	0,				/* hw addr length */
1244 };
1245 
1246 static mblk_t *
1247 ill_create_squery(ill_t *ill, ipaddr_t ipaddr, uint32_t addrlen,
1248     uint32_t addroff, mblk_t *mp_tail)
1249 {
1250 	mblk_t	*mp;
1251 	area_t	*area;
1252 
1253 	mp = ill_arp_alloc(ill, (uchar_t *)&ip_aresq_template,
1254 				(caddr_t)&ipaddr);
1255 	if (!mp) {
1256 		freemsg(mp_tail);
1257 		return (NULL);
1258 	}
1259 	area = (area_t *)mp->b_rptr;
1260 	area->area_hw_addr_length = addrlen;
1261 	area->area_hw_addr_offset = mp->b_wptr - mp->b_rptr + addroff;
1262 	/*
1263 	 * NOTE!
1264 	 *
1265 	 * The area_hw_addr_offset, as can be seen, does not hold the
1266 	 * actual hardware address offset. Rather, it holds the offset
1267 	 * to the hw addr in the dl_xxx_req in mp_tail, modified by
1268 	 * adding (mp->b_wptr - mp->b_rptr). This allows the function
1269 	 * mi_offset_paramc() to find the hardware address in the
1270 	 * *second* mblk (dl_xxx_req), not this mblk.
1271 	 *
1272 	 * Using mi_offset_paramc() is thus the *only* way to access
1273 	 * the dl_xxx_hw address.
1274 	 *
1275 	 * The squery hw address should *not* be accessed.
1276 	 *
1277 	 * See ar_entry_squery() in arp.c for an example of how all this works.
1278 	 */
1279 
1280 	mp->b_cont = mp_tail;
1281 	return (mp);
1282 }
1283 
1284 /*
1285  * Create a dlpi message with room for phys+sap. When we come back in
1286  * ip_wput_ctl() we will strip the sap for those primitives which
1287  * only need a physical address.
1288  */
1289 static mblk_t *
1290 ill_create_dl(ill_t *ill, uint32_t dl_primitive, uint32_t length,
1291     uint32_t *addr_lenp, uint32_t *addr_offp)
1292 {
1293 	mblk_t	*mp;
1294 	uint32_t	hw_addr_length;
1295 	char		*cp;
1296 	uint32_t	offset;
1297 	uint32_t 	size;
1298 
1299 	*addr_lenp = *addr_offp = 0;
1300 
1301 	hw_addr_length = ill->ill_phys_addr_length;
1302 	if (!hw_addr_length) {
1303 		ip0dbg(("ip_create_dl: hw addr length = 0\n"));
1304 		return (NULL);
1305 	}
1306 
1307 	size = length;
1308 	switch (dl_primitive) {
1309 	case DL_ENABMULTI_REQ:
1310 	case DL_DISABMULTI_REQ:
1311 		size += hw_addr_length;
1312 		break;
1313 	case DL_PROMISCON_REQ:
1314 	case DL_PROMISCOFF_REQ:
1315 		break;
1316 	default:
1317 		return (NULL);
1318 	}
1319 	mp = allocb(size, BPRI_HI);
1320 	if (!mp)
1321 		return (NULL);
1322 	mp->b_wptr += size;
1323 	mp->b_datap->db_type = M_PROTO;
1324 
1325 	cp = (char *)mp->b_rptr;
1326 	offset = length;
1327 
1328 	switch (dl_primitive) {
1329 	case DL_ENABMULTI_REQ: {
1330 		dl_enabmulti_req_t *dl = (dl_enabmulti_req_t *)cp;
1331 
1332 		dl->dl_primitive = dl_primitive;
1333 		dl->dl_addr_offset = offset;
1334 		*addr_lenp = dl->dl_addr_length = hw_addr_length;
1335 		*addr_offp = offset;
1336 		break;
1337 	}
1338 	case DL_DISABMULTI_REQ: {
1339 		dl_disabmulti_req_t *dl = (dl_disabmulti_req_t *)cp;
1340 
1341 		dl->dl_primitive = dl_primitive;
1342 		dl->dl_addr_offset = offset;
1343 		*addr_lenp = dl->dl_addr_length = hw_addr_length;
1344 		*addr_offp = offset;
1345 		break;
1346 	}
1347 	case DL_PROMISCON_REQ:
1348 	case DL_PROMISCOFF_REQ: {
1349 		dl_promiscon_req_t *dl = (dl_promiscon_req_t *)cp;
1350 
1351 		dl->dl_primitive = dl_primitive;
1352 		dl->dl_level = DL_PROMISC_MULTI;
1353 		break;
1354 	}
1355 	}
1356 	ip1dbg(("ill_create_dl: addr_len %d, addr_off %d\n",
1357 		*addr_lenp, *addr_offp));
1358 	return (mp);
1359 }
1360 
1361 void
1362 ip_wput_ctl(queue_t *q, mblk_t *mp_orig)
1363 {
1364 	ill_t	*ill = (ill_t *)q->q_ptr;
1365 	mblk_t	*mp = mp_orig;
1366 	area_t	*area;
1367 
1368 	/* Check that we have a AR_ENTRY_SQUERY with a tacked on mblk */
1369 	if ((mp->b_wptr - mp->b_rptr) < sizeof (area_t) ||
1370 	    mp->b_cont == NULL) {
1371 		putnext(q, mp);
1372 		return;
1373 	}
1374 	area = (area_t *)mp->b_rptr;
1375 	if (area->area_cmd != AR_ENTRY_SQUERY) {
1376 		putnext(q, mp);
1377 		return;
1378 	}
1379 	mp = mp->b_cont;
1380 	/*
1381 	 * Update dl_addr_length and dl_addr_offset for primitives that
1382 	 * have physical addresses as opposed to full saps
1383 	 */
1384 	switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) {
1385 	case DL_ENABMULTI_REQ:
1386 		/* Track the state if this is the first enabmulti */
1387 		if (ill->ill_dlpi_multicast_state == IDMS_UNKNOWN)
1388 			ill->ill_dlpi_multicast_state = IDMS_INPROGRESS;
1389 		ip1dbg(("ip_wput_ctl: ENABMULTI\n"));
1390 		break;
1391 	case DL_DISABMULTI_REQ:
1392 		ip1dbg(("ip_wput_ctl: DISABMULTI\n"));
1393 		break;
1394 	default:
1395 		ip1dbg(("ip_wput_ctl: default\n"));
1396 		break;
1397 	}
1398 	freeb(mp_orig);
1399 	putnext(q, mp);
1400 }
1401 
1402 /*
1403  * Rejoin any groups which have been explicitly joined by the application (we
1404  * left all explicitly joined groups as part of ill_leave_multicast() prior to
1405  * bringing the interface down).  Note that because groups can be joined and
1406  * left while an interface is down, this may not be the same set of groups
1407  * that we left in ill_leave_multicast().
1408  */
1409 void
1410 ill_recover_multicast(ill_t *ill)
1411 {
1412 	ilm_t	*ilm;
1413 	char    addrbuf[INET6_ADDRSTRLEN];
1414 
1415 	ASSERT(IAM_WRITER_ILL(ill));
1416 
1417 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1418 		/*
1419 		 * Check how many ipif's that have members in this group -
1420 		 * if more then one we make sure that this entry is first
1421 		 * in the list.
1422 		 */
1423 		if (ilm_numentries_v6(ill, &ilm->ilm_v6addr) > 1 &&
1424 		    ilm_lookup_ill_v6(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm)
1425 			continue;
1426 		ip1dbg(("ill_recover_multicast: %s\n",
1427 		    inet_ntop(AF_INET6, &ilm->ilm_v6addr, addrbuf,
1428 		    sizeof (addrbuf))));
1429 		if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
1430 			if (ill->ill_group == NULL) {
1431 				(void) ip_join_allmulti(ill->ill_ipif);
1432 			} else {
1433 				/*
1434 				 * We don't want to join on this ill,
1435 				 * if somebody else in the group has
1436 				 * already been nominated.
1437 				 */
1438 				(void) ill_nominate_mcast_rcv(ill->ill_group);
1439 			}
1440 		} else {
1441 			(void) ip_ll_addmulti_v6(ill->ill_ipif,
1442 			    &ilm->ilm_v6addr);
1443 		}
1444 	}
1445 }
1446 
1447 /*
1448  * The opposite of ill_recover_multicast() -- leaves all multicast groups
1449  * that were explicitly joined.  Note that both these functions could be
1450  * disposed of if we enhanced ARP to allow us to handle DL_DISABMULTI_REQ
1451  * and DL_ENABMULTI_REQ messages when an interface is down.
1452  */
1453 void
1454 ill_leave_multicast(ill_t *ill)
1455 {
1456 	ilm_t	*ilm;
1457 	char    addrbuf[INET6_ADDRSTRLEN];
1458 
1459 	ASSERT(IAM_WRITER_ILL(ill));
1460 
1461 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1462 		/*
1463 		 * Check how many ipif's that have members in this group -
1464 		 * if more then one we make sure that this entry is first
1465 		 * in the list.
1466 		 */
1467 		if (ilm_numentries_v6(ill, &ilm->ilm_v6addr) > 1 &&
1468 		    ilm_lookup_ill_v6(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm)
1469 			continue;
1470 		ip1dbg(("ill_leave_multicast: %s\n",
1471 		    inet_ntop(AF_INET6, &ilm->ilm_v6addr, addrbuf,
1472 		    sizeof (addrbuf))));
1473 		if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
1474 			(void) ip_leave_allmulti(ill->ill_ipif);
1475 			/*
1476 			 * If we were part of an IPMP group, then
1477 			 * ill_handoff_responsibility() has already
1478 			 * nominated a new member (so we don't).
1479 			 */
1480 			ASSERT(ill->ill_group == NULL);
1481 		} else {
1482 			(void) ip_ll_send_disabmulti_req(ill, &ilm->ilm_v6addr);
1483 		}
1484 	}
1485 }
1486 
1487 /*
1488  * Find an ilm for matching the ill and which has the source in its
1489  * INCLUDE list or does not have it in its EXCLUDE list
1490  */
1491 ilm_t *
1492 ilm_lookup_ill_withsrc(ill_t *ill, ipaddr_t group, ipaddr_t src)
1493 {
1494 	in6_addr_t	v6group, v6src;
1495 
1496 	/*
1497 	 * INADDR_ANY is represented as the IPv6 unspecified addr.
1498 	 */
1499 	if (group == INADDR_ANY)
1500 		v6group = ipv6_all_zeros;
1501 	else
1502 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1503 	IN6_IPADDR_TO_V4MAPPED(src, &v6src);
1504 
1505 	return (ilm_lookup_ill_withsrc_v6(ill, &v6group, &v6src));
1506 }
1507 
1508 ilm_t *
1509 ilm_lookup_ill_withsrc_v6(ill_t *ill, const in6_addr_t *v6group,
1510     const in6_addr_t *v6src)
1511 {
1512 	ilm_t	*ilm;
1513 	boolean_t isinlist;
1514 	int	i, numsrc;
1515 
1516 	/*
1517 	 * If the source is in any ilm's INCLUDE list, or if
1518 	 * it is not in any ilm's EXCLUDE list, we have a hit.
1519 	 */
1520 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1521 		if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group)) {
1522 
1523 			isinlist = B_FALSE;
1524 			numsrc = (ilm->ilm_filter == NULL) ?
1525 			    0 : ilm->ilm_filter->sl_numsrc;
1526 			for (i = 0; i < numsrc; i++) {
1527 				if (IN6_ARE_ADDR_EQUAL(v6src,
1528 				    &ilm->ilm_filter->sl_addr[i])) {
1529 					isinlist = B_TRUE;
1530 					break;
1531 				}
1532 			}
1533 			if ((isinlist && ilm->ilm_fmode == MODE_IS_INCLUDE) ||
1534 			    (!isinlist && ilm->ilm_fmode == MODE_IS_EXCLUDE))
1535 				return (ilm);
1536 			else
1537 				return (NULL);
1538 		}
1539 	}
1540 	return (NULL);
1541 }
1542 
1543 
1544 /* Find an ilm for matching the ill */
1545 ilm_t *
1546 ilm_lookup_ill(ill_t *ill, ipaddr_t group, zoneid_t zoneid)
1547 {
1548 	in6_addr_t	v6group;
1549 
1550 	ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) ||
1551 	    IAM_WRITER_ILL(ill));
1552 	/*
1553 	 * INADDR_ANY is represented as the IPv6 unspecifed addr.
1554 	 */
1555 	if (group == INADDR_ANY)
1556 		v6group = ipv6_all_zeros;
1557 	else
1558 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1559 
1560 	return (ilm_lookup_ill_v6(ill, &v6group, zoneid));
1561 }
1562 
1563 /*
1564  * Find an ilm for matching the ill. All the ilm lookup functions
1565  * ignore ILM_DELETED ilms. These have been logically deleted, and
1566  * igmp and linklayer disable multicast have been done. Only mi_free
1567  * yet to be done. Still there in the list due to ilm_walkers. The
1568  * last walker will release it.
1569  */
1570 ilm_t *
1571 ilm_lookup_ill_v6(ill_t *ill, const in6_addr_t *v6group, zoneid_t zoneid)
1572 {
1573 	ilm_t	*ilm;
1574 
1575 	ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) ||
1576 	    IAM_WRITER_ILL(ill));
1577 
1578 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1579 		if (ilm->ilm_flags & ILM_DELETED)
1580 			continue;
1581 		if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) &&
1582 		    (zoneid == ALL_ZONES || zoneid == ilm->ilm_zoneid))
1583 			return (ilm);
1584 	}
1585 	return (NULL);
1586 }
1587 
1588 ilm_t *
1589 ilm_lookup_ill_index_v6(ill_t *ill, const in6_addr_t *v6group, int index,
1590     zoneid_t zoneid)
1591 {
1592 	ilm_t *ilm;
1593 
1594 	ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) ||
1595 	    IAM_WRITER_ILL(ill));
1596 
1597 	for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
1598 		if (ilm->ilm_flags & ILM_DELETED)
1599 			continue;
1600 		if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) &&
1601 		    (zoneid == ALL_ZONES || zoneid == ilm->ilm_zoneid) &&
1602 		    ilm->ilm_orig_ifindex == index) {
1603 			return (ilm);
1604 		}
1605 	}
1606 	return (NULL);
1607 }
1608 
1609 ilm_t *
1610 ilm_lookup_ill_index_v4(ill_t *ill, ipaddr_t group, int index, zoneid_t zoneid)
1611 {
1612 	in6_addr_t	v6group;
1613 
1614 	ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) ||
1615 	    IAM_WRITER_ILL(ill));
1616 	/*
1617 	 * INADDR_ANY is represented as the IPv6 unspecifed addr.
1618 	 */
1619 	if (group == INADDR_ANY)
1620 		v6group = ipv6_all_zeros;
1621 	else
1622 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1623 
1624 	return (ilm_lookup_ill_index_v6(ill, &v6group, index, zoneid));
1625 }
1626 
1627 /*
1628  * Found an ilm for the ipif. Only needed for IPv4 which does
1629  * ipif specific socket options.
1630  */
1631 ilm_t *
1632 ilm_lookup_ipif(ipif_t *ipif, ipaddr_t group)
1633 {
1634 	ill_t	*ill = ipif->ipif_ill;
1635 	ilm_t	*ilm;
1636 	in6_addr_t	v6group;
1637 
1638 	ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) ||
1639 	    IAM_WRITER_ILL(ill));
1640 
1641 	/*
1642 	 * INADDR_ANY is represented as the IPv6 unspecifed addr.
1643 	 */
1644 	if (group == INADDR_ANY)
1645 		v6group = ipv6_all_zeros;
1646 	else
1647 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1648 
1649 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1650 		if (ilm->ilm_flags & ILM_DELETED)
1651 			continue;
1652 		if (ilm->ilm_ipif == ipif &&
1653 		    IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, &v6group))
1654 			return (ilm);
1655 	}
1656 	return (NULL);
1657 }
1658 
1659 /*
1660  * How many members on this ill?
1661  */
1662 int
1663 ilm_numentries_v6(ill_t *ill, const in6_addr_t *v6group)
1664 {
1665 	ilm_t	*ilm;
1666 	int i = 0;
1667 
1668 	ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) ||
1669 	    IAM_WRITER_ILL(ill));
1670 
1671 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1672 		if (ilm->ilm_flags & ILM_DELETED)
1673 			continue;
1674 		if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group)) {
1675 			i++;
1676 		}
1677 	}
1678 	return (i);
1679 }
1680 
1681 /* Caller guarantees that the group is not already on the list */
1682 static ilm_t *
1683 ilm_add_v6(ipif_t *ipif, const in6_addr_t *v6group, ilg_stat_t ilgstat,
1684     mcast_record_t ilg_fmode, slist_t *ilg_flist, int orig_ifindex,
1685     zoneid_t zoneid)
1686 {
1687 	ill_t	*ill = ipif->ipif_ill;
1688 	ilm_t	*ilm;
1689 	ilm_t	*ilm_cur;
1690 	ilm_t	**ilm_ptpn;
1691 
1692 	ASSERT(IAM_WRITER_IPIF(ipif));
1693 
1694 	ilm = GETSTRUCT(ilm_t, 1);
1695 	if (ilm == NULL)
1696 		return (NULL);
1697 	if (ilgstat != ILGSTAT_NONE && !SLIST_IS_EMPTY(ilg_flist)) {
1698 		ilm->ilm_filter = l_alloc();
1699 		if (ilm->ilm_filter == NULL) {
1700 			mi_free(ilm);
1701 			return (NULL);
1702 		}
1703 	}
1704 	ilm->ilm_v6addr = *v6group;
1705 	ilm->ilm_refcnt = 1;
1706 	ilm->ilm_zoneid = zoneid;
1707 	ilm->ilm_timer = INFINITY;
1708 	ilm->ilm_rtx.rtx_timer = INFINITY;
1709 	/*
1710 	 * IPv4 Multicast groups are joined using ipif.
1711 	 * IPv6 Multicast groups are joined using ill.
1712 	 */
1713 	if (ill->ill_isv6) {
1714 		ilm->ilm_ill = ill;
1715 		ilm->ilm_ipif = NULL;
1716 	} else {
1717 		ASSERT(ilm->ilm_zoneid == ipif->ipif_zoneid);
1718 		ilm->ilm_ipif = ipif;
1719 		ilm->ilm_ill = NULL;
1720 	}
1721 	/*
1722 	 * After this if ilm moves to a new ill, we don't change
1723 	 * the ilm_orig_ifindex. Thus, if ill_index != ilm_orig_ifindex,
1724 	 * it has been moved. Indexes don't match even when the application
1725 	 * wants to join on a FAILED/INACTIVE interface because we choose
1726 	 * a new interface to join in. This is considered as an implicit
1727 	 * move.
1728 	 */
1729 	ilm->ilm_orig_ifindex = orig_ifindex;
1730 
1731 	ASSERT(!(ipif->ipif_state_flags & IPIF_CONDEMNED));
1732 	ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED));
1733 
1734 	/*
1735 	 * Grab lock to give consistent view to readers
1736 	 */
1737 	mutex_enter(&ill->ill_lock);
1738 	/*
1739 	 * All ilms in the same zone are contiguous in the ill_ilm list.
1740 	 * The loops in ip_proto_input() and ip_wput_local() use this to avoid
1741 	 * sending duplicates up when two applications in the same zone join the
1742 	 * same group on different logical interfaces.
1743 	 */
1744 	ilm_cur = ill->ill_ilm;
1745 	ilm_ptpn = &ill->ill_ilm;
1746 	while (ilm_cur != NULL && ilm_cur->ilm_zoneid != ilm->ilm_zoneid) {
1747 		ilm_ptpn = &ilm_cur->ilm_next;
1748 		ilm_cur = ilm_cur->ilm_next;
1749 	}
1750 	ilm->ilm_next = ilm_cur;
1751 	*ilm_ptpn = ilm;
1752 
1753 	/*
1754 	 * If we have an associated ilg, use its filter state; if not,
1755 	 * default to (EXCLUDE, NULL) and set no_ilg_cnt to track this.
1756 	 */
1757 	if (ilgstat != ILGSTAT_NONE) {
1758 		if (!SLIST_IS_EMPTY(ilg_flist))
1759 			l_copy(ilg_flist, ilm->ilm_filter);
1760 		ilm->ilm_fmode = ilg_fmode;
1761 	} else {
1762 		ilm->ilm_no_ilg_cnt = 1;
1763 		ilm->ilm_fmode = MODE_IS_EXCLUDE;
1764 	}
1765 
1766 	mutex_exit(&ill->ill_lock);
1767 	return (ilm);
1768 }
1769 
1770 void
1771 ilm_walker_cleanup(ill_t *ill)
1772 {
1773 	ilm_t	**ilmp;
1774 	ilm_t	*ilm;
1775 
1776 	ASSERT(MUTEX_HELD(&ill->ill_lock));
1777 	ASSERT(ill->ill_ilm_walker_cnt == 0);
1778 
1779 	ilmp = &ill->ill_ilm;
1780 	while (*ilmp != NULL) {
1781 		if ((*ilmp)->ilm_flags & ILM_DELETED) {
1782 			ilm = *ilmp;
1783 			*ilmp = ilm->ilm_next;
1784 			FREE_SLIST(ilm->ilm_filter);
1785 			FREE_SLIST(ilm->ilm_pendsrcs);
1786 			FREE_SLIST(ilm->ilm_rtx.rtx_allow);
1787 			FREE_SLIST(ilm->ilm_rtx.rtx_block);
1788 			mi_free((char *)ilm);
1789 		} else {
1790 			ilmp = &(*ilmp)->ilm_next;
1791 		}
1792 	}
1793 	ill->ill_ilm_cleanup_reqd = 0;
1794 }
1795 
1796 /*
1797  * Unlink ilm and free it.
1798  */
1799 static void
1800 ilm_delete(ilm_t *ilm)
1801 {
1802 	ill_t	*ill;
1803 	ilm_t	**ilmp;
1804 
1805 	if (ilm->ilm_ipif != NULL) {
1806 		ASSERT(IAM_WRITER_IPIF(ilm->ilm_ipif));
1807 		ASSERT(ilm->ilm_ill == NULL);
1808 		ill = ilm->ilm_ipif->ipif_ill;
1809 		ASSERT(!ill->ill_isv6);
1810 	} else {
1811 		ASSERT(IAM_WRITER_ILL(ilm->ilm_ill));
1812 		ASSERT(ilm->ilm_ipif == NULL);
1813 		ill = ilm->ilm_ill;
1814 		ASSERT(ill->ill_isv6);
1815 	}
1816 	/*
1817 	 * Delete under lock protection so that readers don't stumble
1818 	 * on bad ilm_next
1819 	 */
1820 	mutex_enter(&ill->ill_lock);
1821 	if (ill->ill_ilm_walker_cnt != 0) {
1822 		ilm->ilm_flags |= ILM_DELETED;
1823 		ill->ill_ilm_cleanup_reqd = 1;
1824 		mutex_exit(&ill->ill_lock);
1825 		return;
1826 	}
1827 
1828 	for (ilmp = &ill->ill_ilm; *ilmp != ilm; ilmp = &(*ilmp)->ilm_next)
1829 				;
1830 	*ilmp = ilm->ilm_next;
1831 	mutex_exit(&ill->ill_lock);
1832 
1833 	FREE_SLIST(ilm->ilm_filter);
1834 	FREE_SLIST(ilm->ilm_pendsrcs);
1835 	FREE_SLIST(ilm->ilm_rtx.rtx_allow);
1836 	FREE_SLIST(ilm->ilm_rtx.rtx_block);
1837 	mi_free((char *)ilm);
1838 }
1839 
1840 /* Free all ilms for this ipif */
1841 void
1842 ilm_free(ipif_t *ipif)
1843 {
1844 	ill_t	*ill = ipif->ipif_ill;
1845 	ilm_t	*ilm;
1846 	ilm_t	 *next_ilm;
1847 
1848 	ASSERT(IAM_WRITER_IPIF(ipif));
1849 
1850 	for (ilm = ill->ill_ilm; ilm; ilm = next_ilm) {
1851 		next_ilm = ilm->ilm_next;
1852 		if (ilm->ilm_ipif == ipif)
1853 			ilm_delete(ilm);
1854 	}
1855 }
1856 
1857 /*
1858  * Looks up the appropriate ipif given a v4 multicast group and interface
1859  * address.  On success, returns 0, with *ipifpp pointing to the found
1860  * struct.  On failure, returns an errno and *ipifpp is NULL.
1861  */
1862 int
1863 ip_opt_check(conn_t *connp, ipaddr_t group, ipaddr_t src, ipaddr_t ifaddr,
1864     uint_t *ifindexp, mblk_t *first_mp, ipsq_func_t func, ipif_t **ipifpp)
1865 {
1866 	ipif_t *ipif;
1867 	int err = 0;
1868 	zoneid_t zoneid = connp->conn_zoneid;
1869 
1870 	if (!CLASSD(group) || CLASSD(src)) {
1871 		return (EINVAL);
1872 	}
1873 	*ipifpp = NULL;
1874 
1875 	ASSERT(!(ifaddr != INADDR_ANY && ifindexp != NULL && *ifindexp != 0));
1876 	if (ifaddr != INADDR_ANY) {
1877 		ipif = ipif_lookup_addr(ifaddr, NULL, zoneid,
1878 		    CONNP_TO_WQ(connp), first_mp, func, &err);
1879 		if (err != 0 && err != EINPROGRESS)
1880 			err = EADDRNOTAVAIL;
1881 	} else if (ifindexp != NULL && *ifindexp != 0) {
1882 		ipif = ipif_lookup_on_ifindex(*ifindexp, B_FALSE, zoneid,
1883 		    CONNP_TO_WQ(connp), first_mp, func, &err);
1884 	} else {
1885 		ipif = ipif_lookup_group(group, zoneid);
1886 		if (ipif == NULL)
1887 			return (EADDRNOTAVAIL);
1888 	}
1889 	if (ipif == NULL)
1890 		return (err);
1891 
1892 	*ipifpp = ipif;
1893 	return (0);
1894 }
1895 
1896 /*
1897  * Looks up the appropriate ill (or ipif if v4mapped) given an interface
1898  * index and IPv6 multicast group.  On success, returns 0, with *illpp (or
1899  * *ipifpp if v4mapped) pointing to the found struct.  On failure, returns
1900  * an errno and *illpp and *ipifpp are undefined.
1901  */
1902 int
1903 ip_opt_check_v6(conn_t *connp, const in6_addr_t *v6group, ipaddr_t *v4group,
1904     const in6_addr_t *v6src, ipaddr_t *v4src, boolean_t *isv6, int ifindex,
1905     mblk_t *first_mp, ipsq_func_t func, ill_t **illpp, ipif_t **ipifpp)
1906 {
1907 	boolean_t src_unspec;
1908 	ill_t *ill = NULL;
1909 	ipif_t *ipif = NULL;
1910 	int err;
1911 	zoneid_t zoneid = connp->conn_zoneid;
1912 	queue_t *wq = CONNP_TO_WQ(connp);
1913 
1914 	src_unspec = IN6_IS_ADDR_UNSPECIFIED(v6src);
1915 
1916 	if (IN6_IS_ADDR_V4MAPPED(v6group)) {
1917 		if (!IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec)
1918 			return (EINVAL);
1919 		IN6_V4MAPPED_TO_IPADDR(v6group, *v4group);
1920 		if (src_unspec) {
1921 			*v4src = INADDR_ANY;
1922 		} else {
1923 			IN6_V4MAPPED_TO_IPADDR(v6src, *v4src);
1924 		}
1925 		if (!CLASSD(*v4group) || CLASSD(*v4src))
1926 			return (EINVAL);
1927 		*ipifpp = NULL;
1928 		*isv6 = B_FALSE;
1929 	} else {
1930 		if (IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec)
1931 			return (EINVAL);
1932 		if (!IN6_IS_ADDR_MULTICAST(v6group) ||
1933 		    IN6_IS_ADDR_MULTICAST(v6src)) {
1934 			return (EINVAL);
1935 		}
1936 		*illpp = NULL;
1937 		*isv6 = B_TRUE;
1938 	}
1939 
1940 	if (ifindex == 0) {
1941 		if (*isv6)
1942 			ill = ill_lookup_group_v6(v6group, zoneid);
1943 		else
1944 			ipif = ipif_lookup_group(*v4group, zoneid);
1945 		if (ill == NULL && ipif == NULL)
1946 			return (EADDRNOTAVAIL);
1947 	} else {
1948 		if (*isv6) {
1949 			ill = ill_lookup_on_ifindex(ifindex, B_TRUE,
1950 			    wq, first_mp, func, &err);
1951 			if (ill != NULL &&
1952 			    !ipif_lookup_zoneid(ill, zoneid, 0, NULL)) {
1953 				ill_refrele(ill);
1954 				ill = NULL;
1955 				err = EADDRNOTAVAIL;
1956 			}
1957 		} else {
1958 			ipif = ipif_lookup_on_ifindex(ifindex, B_FALSE,
1959 			    zoneid, wq, first_mp, func, &err);
1960 		}
1961 		if (ill == NULL && ipif == NULL)
1962 			return (err);
1963 	}
1964 
1965 	*ipifpp = ipif;
1966 	*illpp = ill;
1967 	return (0);
1968 }
1969 
1970 static int
1971 ip_get_srcfilter(conn_t *connp, struct group_filter *gf,
1972     struct ip_msfilter *imsf, ipaddr_t grp, ipif_t *ipif, boolean_t isv4mapped)
1973 {
1974 	ilg_t *ilg;
1975 	int i, numsrc, fmode, outsrcs;
1976 	struct sockaddr_in *sin;
1977 	struct sockaddr_in6 *sin6;
1978 	struct in_addr *addrp;
1979 	slist_t *fp;
1980 	boolean_t is_v4only_api;
1981 
1982 	mutex_enter(&connp->conn_lock);
1983 
1984 	ilg = ilg_lookup_ipif(connp, grp, ipif);
1985 	if (ilg == NULL) {
1986 		mutex_exit(&connp->conn_lock);
1987 		return (EADDRNOTAVAIL);
1988 	}
1989 
1990 	if (gf == NULL) {
1991 		ASSERT(imsf != NULL);
1992 		ASSERT(!isv4mapped);
1993 		is_v4only_api = B_TRUE;
1994 		outsrcs = imsf->imsf_numsrc;
1995 	} else {
1996 		ASSERT(imsf == NULL);
1997 		is_v4only_api = B_FALSE;
1998 		outsrcs = gf->gf_numsrc;
1999 	}
2000 
2001 	/*
2002 	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
2003 	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
2004 	 * So we need to translate here.
2005 	 */
2006 	fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ?
2007 	    MCAST_INCLUDE : MCAST_EXCLUDE;
2008 	if ((fp = ilg->ilg_filter) == NULL) {
2009 		numsrc = 0;
2010 	} else {
2011 		for (i = 0; i < outsrcs; i++) {
2012 			if (i == fp->sl_numsrc)
2013 				break;
2014 			if (isv4mapped) {
2015 				sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i];
2016 				sin6->sin6_family = AF_INET6;
2017 				sin6->sin6_addr = fp->sl_addr[i];
2018 			} else {
2019 				if (is_v4only_api) {
2020 					addrp = &imsf->imsf_slist[i];
2021 				} else {
2022 					sin = (struct sockaddr_in *)
2023 					    &gf->gf_slist[i];
2024 					sin->sin_family = AF_INET;
2025 					addrp = &sin->sin_addr;
2026 				}
2027 				IN6_V4MAPPED_TO_INADDR(&fp->sl_addr[i], addrp);
2028 			}
2029 		}
2030 		numsrc = fp->sl_numsrc;
2031 	}
2032 
2033 	if (is_v4only_api) {
2034 		imsf->imsf_numsrc = numsrc;
2035 		imsf->imsf_fmode = fmode;
2036 	} else {
2037 		gf->gf_numsrc = numsrc;
2038 		gf->gf_fmode = fmode;
2039 	}
2040 
2041 	mutex_exit(&connp->conn_lock);
2042 
2043 	return (0);
2044 }
2045 
2046 static int
2047 ip_get_srcfilter_v6(conn_t *connp, struct group_filter *gf,
2048     const struct in6_addr *grp, ill_t *ill)
2049 {
2050 	ilg_t *ilg;
2051 	int i;
2052 	struct sockaddr_storage *sl;
2053 	struct sockaddr_in6 *sin6;
2054 	slist_t *fp;
2055 
2056 	mutex_enter(&connp->conn_lock);
2057 
2058 	ilg = ilg_lookup_ill_v6(connp, grp, ill);
2059 	if (ilg == NULL) {
2060 		mutex_exit(&connp->conn_lock);
2061 		return (EADDRNOTAVAIL);
2062 	}
2063 
2064 	/*
2065 	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
2066 	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
2067 	 * So we need to translate here.
2068 	 */
2069 	gf->gf_fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ?
2070 	    MCAST_INCLUDE : MCAST_EXCLUDE;
2071 	if ((fp = ilg->ilg_filter) == NULL) {
2072 		gf->gf_numsrc = 0;
2073 	} else {
2074 		for (i = 0, sl = gf->gf_slist; i < gf->gf_numsrc; i++, sl++) {
2075 			if (i == fp->sl_numsrc)
2076 				break;
2077 			sin6 = (struct sockaddr_in6 *)sl;
2078 			sin6->sin6_family = AF_INET6;
2079 			sin6->sin6_addr = fp->sl_addr[i];
2080 		}
2081 		gf->gf_numsrc = fp->sl_numsrc;
2082 	}
2083 
2084 	mutex_exit(&connp->conn_lock);
2085 
2086 	return (0);
2087 }
2088 
2089 static int
2090 ip_set_srcfilter(conn_t *connp, struct group_filter *gf,
2091     struct ip_msfilter *imsf, ipaddr_t grp, ipif_t *ipif, boolean_t isv4mapped)
2092 {
2093 	ilg_t *ilg;
2094 	int i, err, insrcs, infmode, new_fmode;
2095 	struct sockaddr_in *sin;
2096 	struct sockaddr_in6 *sin6;
2097 	struct in_addr *addrp;
2098 	slist_t *orig_filter = NULL;
2099 	slist_t *new_filter = NULL;
2100 	mcast_record_t orig_fmode;
2101 	boolean_t leave_grp, is_v4only_api;
2102 	ilg_stat_t ilgstat;
2103 
2104 	if (gf == NULL) {
2105 		ASSERT(imsf != NULL);
2106 		ASSERT(!isv4mapped);
2107 		is_v4only_api = B_TRUE;
2108 		insrcs = imsf->imsf_numsrc;
2109 		infmode = imsf->imsf_fmode;
2110 	} else {
2111 		ASSERT(imsf == NULL);
2112 		is_v4only_api = B_FALSE;
2113 		insrcs = gf->gf_numsrc;
2114 		infmode = gf->gf_fmode;
2115 	}
2116 
2117 	/* Make sure we can handle the source list */
2118 	if (insrcs > MAX_FILTER_SIZE)
2119 		return (ENOBUFS);
2120 
2121 	/*
2122 	 * setting the filter to (INCLUDE, NULL) is treated
2123 	 * as a request to leave the group.
2124 	 */
2125 	leave_grp = (infmode == MCAST_INCLUDE && insrcs == 0);
2126 
2127 	ASSERT(IAM_WRITER_IPIF(ipif));
2128 
2129 	mutex_enter(&connp->conn_lock);
2130 
2131 	ilg = ilg_lookup_ipif(connp, grp, ipif);
2132 	if (ilg == NULL) {
2133 		/*
2134 		 * if the request was actually to leave, and we
2135 		 * didn't find an ilg, there's nothing to do.
2136 		 */
2137 		if (!leave_grp)
2138 			ilg = conn_ilg_alloc(connp);
2139 		if (leave_grp || ilg == NULL) {
2140 			mutex_exit(&connp->conn_lock);
2141 			return (leave_grp ? 0 : ENOMEM);
2142 		}
2143 		ilgstat = ILGSTAT_NEW;
2144 		IN6_IPADDR_TO_V4MAPPED(grp, &ilg->ilg_v6group);
2145 		ilg->ilg_ipif = ipif;
2146 		ilg->ilg_ill = NULL;
2147 		ilg->ilg_orig_ifindex = 0;
2148 	} else if (leave_grp) {
2149 		ilg_delete(connp, ilg, NULL);
2150 		mutex_exit(&connp->conn_lock);
2151 		(void) ip_delmulti(grp, ipif, B_FALSE, B_TRUE);
2152 		return (0);
2153 	} else {
2154 		ilgstat = ILGSTAT_CHANGE;
2155 		/* Preserve existing state in case ip_addmulti() fails */
2156 		orig_fmode = ilg->ilg_fmode;
2157 		if (ilg->ilg_filter == NULL) {
2158 			orig_filter = NULL;
2159 		} else {
2160 			orig_filter = l_alloc_copy(ilg->ilg_filter);
2161 			if (orig_filter == NULL) {
2162 				mutex_exit(&connp->conn_lock);
2163 				return (ENOMEM);
2164 			}
2165 		}
2166 	}
2167 
2168 	/*
2169 	 * Alloc buffer to copy new state into (see below) before
2170 	 * we make any changes, so we can bail if it fails.
2171 	 */
2172 	if ((new_filter = l_alloc()) == NULL) {
2173 		mutex_exit(&connp->conn_lock);
2174 		err = ENOMEM;
2175 		goto free_and_exit;
2176 	}
2177 
2178 	if (insrcs == 0) {
2179 		CLEAR_SLIST(ilg->ilg_filter);
2180 	} else {
2181 		slist_t *fp;
2182 		if (ilg->ilg_filter == NULL) {
2183 			fp = l_alloc();
2184 			if (fp == NULL) {
2185 				if (ilgstat == ILGSTAT_NEW)
2186 					ilg_delete(connp, ilg, NULL);
2187 				mutex_exit(&connp->conn_lock);
2188 				err = ENOMEM;
2189 				goto free_and_exit;
2190 			}
2191 		} else {
2192 			fp = ilg->ilg_filter;
2193 		}
2194 		for (i = 0; i < insrcs; i++) {
2195 			if (isv4mapped) {
2196 				sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i];
2197 				fp->sl_addr[i] = sin6->sin6_addr;
2198 			} else {
2199 				if (is_v4only_api) {
2200 					addrp = &imsf->imsf_slist[i];
2201 				} else {
2202 					sin = (struct sockaddr_in *)
2203 					    &gf->gf_slist[i];
2204 					addrp = &sin->sin_addr;
2205 				}
2206 				IN6_INADDR_TO_V4MAPPED(addrp, &fp->sl_addr[i]);
2207 			}
2208 		}
2209 		fp->sl_numsrc = insrcs;
2210 		ilg->ilg_filter = fp;
2211 	}
2212 	/*
2213 	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
2214 	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
2215 	 * So we need to translate here.
2216 	 */
2217 	ilg->ilg_fmode = (infmode == MCAST_INCLUDE) ?
2218 		    MODE_IS_INCLUDE : MODE_IS_EXCLUDE;
2219 
2220 	/*
2221 	 * Save copy of ilg's filter state to pass to other functions,
2222 	 * so we can release conn_lock now.
2223 	 */
2224 	new_fmode = ilg->ilg_fmode;
2225 	l_copy(ilg->ilg_filter, new_filter);
2226 
2227 	mutex_exit(&connp->conn_lock);
2228 
2229 	err = ip_addmulti(grp, ipif, ilgstat, new_fmode, new_filter);
2230 	if (err != 0) {
2231 		/*
2232 		 * Restore the original filter state, or delete the
2233 		 * newly-created ilg.  We need to look up the ilg
2234 		 * again, though, since we've not been holding the
2235 		 * conn_lock.
2236 		 */
2237 		mutex_enter(&connp->conn_lock);
2238 		ilg = ilg_lookup_ipif(connp, grp, ipif);
2239 		ASSERT(ilg != NULL);
2240 		if (ilgstat == ILGSTAT_NEW) {
2241 			ilg_delete(connp, ilg, NULL);
2242 		} else {
2243 			ilg->ilg_fmode = orig_fmode;
2244 			if (SLIST_IS_EMPTY(orig_filter)) {
2245 				CLEAR_SLIST(ilg->ilg_filter);
2246 			} else {
2247 				/*
2248 				 * We didn't free the filter, even if we
2249 				 * were trying to make the source list empty;
2250 				 * so if orig_filter isn't empty, the ilg
2251 				 * must still have a filter alloc'd.
2252 				 */
2253 				l_copy(orig_filter, ilg->ilg_filter);
2254 			}
2255 		}
2256 		mutex_exit(&connp->conn_lock);
2257 	}
2258 
2259 free_and_exit:
2260 	l_free(orig_filter);
2261 	l_free(new_filter);
2262 
2263 	return (err);
2264 }
2265 
2266 static int
2267 ip_set_srcfilter_v6(conn_t *connp, struct group_filter *gf,
2268     const struct in6_addr *grp, ill_t *ill)
2269 {
2270 	ilg_t *ilg;
2271 	int i, orig_ifindex, orig_fmode, new_fmode, err;
2272 	slist_t *orig_filter = NULL;
2273 	slist_t *new_filter = NULL;
2274 	struct sockaddr_storage *sl;
2275 	struct sockaddr_in6 *sin6;
2276 	boolean_t leave_grp;
2277 	ilg_stat_t ilgstat;
2278 
2279 	/* Make sure we can handle the source list */
2280 	if (gf->gf_numsrc > MAX_FILTER_SIZE)
2281 		return (ENOBUFS);
2282 
2283 	/*
2284 	 * setting the filter to (INCLUDE, NULL) is treated
2285 	 * as a request to leave the group.
2286 	 */
2287 	leave_grp = (gf->gf_fmode == MCAST_INCLUDE && gf->gf_numsrc == 0);
2288 
2289 	ASSERT(IAM_WRITER_ILL(ill));
2290 
2291 	/*
2292 	 * Use the ifindex to do the lookup.  We can't use the ill
2293 	 * directly because ilg_ill could point to a different ill
2294 	 * if things have moved.
2295 	 */
2296 	orig_ifindex = ill->ill_phyint->phyint_ifindex;
2297 
2298 	mutex_enter(&connp->conn_lock);
2299 	ilg = ilg_lookup_ill_index_v6(connp, grp, orig_ifindex);
2300 	if (ilg == NULL) {
2301 		/*
2302 		 * if the request was actually to leave, and we
2303 		 * didn't find an ilg, there's nothing to do.
2304 		 */
2305 		if (!leave_grp)
2306 			ilg = conn_ilg_alloc(connp);
2307 		if (leave_grp || ilg == NULL) {
2308 			mutex_exit(&connp->conn_lock);
2309 			return (leave_grp ? 0 : ENOMEM);
2310 		}
2311 		ilgstat = ILGSTAT_NEW;
2312 		ilg->ilg_v6group = *grp;
2313 		ilg->ilg_ipif = NULL;
2314 		/*
2315 		 * Choose our target ill to join on. This might be
2316 		 * different from the ill we've been given if it's
2317 		 * currently down and part of a group.
2318 		 *
2319 		 * new ill is not refheld; we are writer.
2320 		 */
2321 		ill = ip_choose_multi_ill(ill, grp);
2322 		ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED));
2323 		ilg->ilg_ill = ill;
2324 		/*
2325 		 * Remember the index that we joined on, so that we can
2326 		 * successfully delete them later on and also search for
2327 		 * duplicates if the application wants to join again.
2328 		 */
2329 		ilg->ilg_orig_ifindex = orig_ifindex;
2330 	} else if (leave_grp) {
2331 		/*
2332 		 * Use the ilg's current ill for the deletion,
2333 		 * we might have failed over.
2334 		 */
2335 		ill = ilg->ilg_ill;
2336 		ilg_delete(connp, ilg, NULL);
2337 		mutex_exit(&connp->conn_lock);
2338 		(void) ip_delmulti_v6(grp, ill, orig_ifindex,
2339 		    connp->conn_zoneid, B_FALSE, B_TRUE);
2340 		return (0);
2341 	} else {
2342 		ilgstat = ILGSTAT_CHANGE;
2343 		/*
2344 		 * The current ill might be different from the one we were
2345 		 * asked to join on (if failover has occurred); we should
2346 		 * join on the ill stored in the ilg.  The original ill
2347 		 * is noted in ilg_orig_ifindex, which matched our request.
2348 		 */
2349 		ill = ilg->ilg_ill;
2350 		/* preserve existing state in case ip_addmulti() fails */
2351 		orig_fmode = ilg->ilg_fmode;
2352 		if (ilg->ilg_filter == NULL) {
2353 			orig_filter = NULL;
2354 		} else {
2355 			orig_filter = l_alloc_copy(ilg->ilg_filter);
2356 			if (orig_filter == NULL) {
2357 				mutex_exit(&connp->conn_lock);
2358 				return (ENOMEM);
2359 			}
2360 		}
2361 	}
2362 
2363 	/*
2364 	 * Alloc buffer to copy new state into (see below) before
2365 	 * we make any changes, so we can bail if it fails.
2366 	 */
2367 	if ((new_filter = l_alloc()) == NULL) {
2368 		mutex_exit(&connp->conn_lock);
2369 		err = ENOMEM;
2370 		goto free_and_exit;
2371 	}
2372 
2373 	if (gf->gf_numsrc == 0) {
2374 		CLEAR_SLIST(ilg->ilg_filter);
2375 	} else {
2376 		slist_t *fp;
2377 		if (ilg->ilg_filter == NULL) {
2378 			fp = l_alloc();
2379 			if (fp == NULL) {
2380 				if (ilgstat == ILGSTAT_NEW)
2381 					ilg_delete(connp, ilg, NULL);
2382 				mutex_exit(&connp->conn_lock);
2383 				err = ENOMEM;
2384 				goto free_and_exit;
2385 			}
2386 		} else {
2387 			fp = ilg->ilg_filter;
2388 		}
2389 		for (i = 0, sl = gf->gf_slist; i < gf->gf_numsrc; i++, sl++) {
2390 			sin6 = (struct sockaddr_in6 *)sl;
2391 			fp->sl_addr[i] = sin6->sin6_addr;
2392 		}
2393 		fp->sl_numsrc = gf->gf_numsrc;
2394 		ilg->ilg_filter = fp;
2395 	}
2396 	/*
2397 	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
2398 	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
2399 	 * So we need to translate here.
2400 	 */
2401 	ilg->ilg_fmode = (gf->gf_fmode == MCAST_INCLUDE) ?
2402 	    MODE_IS_INCLUDE : MODE_IS_EXCLUDE;
2403 
2404 	/*
2405 	 * Save copy of ilg's filter state to pass to other functions,
2406 	 * so we can release conn_lock now.
2407 	 */
2408 	new_fmode = ilg->ilg_fmode;
2409 	l_copy(ilg->ilg_filter, new_filter);
2410 
2411 	mutex_exit(&connp->conn_lock);
2412 
2413 	err = ip_addmulti_v6(grp, ill, orig_ifindex, connp->conn_zoneid,
2414 	    ilgstat, new_fmode, new_filter);
2415 	if (err != 0) {
2416 		/*
2417 		 * Restore the original filter state, or delete the
2418 		 * newly-created ilg.  We need to look up the ilg
2419 		 * again, though, since we've not been holding the
2420 		 * conn_lock.
2421 		 */
2422 		mutex_enter(&connp->conn_lock);
2423 		ilg = ilg_lookup_ill_index_v6(connp, grp, orig_ifindex);
2424 		ASSERT(ilg != NULL);
2425 		if (ilgstat == ILGSTAT_NEW) {
2426 			ilg_delete(connp, ilg, NULL);
2427 		} else {
2428 			ilg->ilg_fmode = orig_fmode;
2429 			if (SLIST_IS_EMPTY(orig_filter)) {
2430 				CLEAR_SLIST(ilg->ilg_filter);
2431 			} else {
2432 				/*
2433 				 * We didn't free the filter, even if we
2434 				 * were trying to make the source list empty;
2435 				 * so if orig_filter isn't empty, the ilg
2436 				 * must still have a filter alloc'd.
2437 				 */
2438 				l_copy(orig_filter, ilg->ilg_filter);
2439 			}
2440 		}
2441 		mutex_exit(&connp->conn_lock);
2442 	}
2443 
2444 free_and_exit:
2445 	l_free(orig_filter);
2446 	l_free(new_filter);
2447 
2448 	return (err);
2449 }
2450 
2451 /*
2452  * Process the SIOC[GS]MSFILTER and SIOC[GS]IPMSFILTER ioctls.
2453  */
2454 /* ARGSUSED */
2455 int
2456 ip_sioctl_msfilter(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp,
2457     ip_ioctl_cmd_t *ipip, void *ifreq)
2458 {
2459 	struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
2460 	/* existence verified in ip_wput_nondata() */
2461 	mblk_t *data_mp = mp->b_cont->b_cont;
2462 	int datalen, err, cmd, minsize;
2463 	int expsize = 0;
2464 	conn_t *connp;
2465 	boolean_t isv6, is_v4only_api, getcmd;
2466 	struct sockaddr_in *gsin;
2467 	struct sockaddr_in6 *gsin6;
2468 	ipaddr_t v4grp;
2469 	in6_addr_t v6grp;
2470 	struct group_filter *gf = NULL;
2471 	struct ip_msfilter *imsf = NULL;
2472 	mblk_t *ndp;
2473 
2474 	if (data_mp->b_cont != NULL) {
2475 		if ((ndp = msgpullup(data_mp, -1)) == NULL)
2476 			return (ENOMEM);
2477 		freemsg(data_mp);
2478 		data_mp = ndp;
2479 		mp->b_cont->b_cont = data_mp;
2480 	}
2481 
2482 	cmd = iocp->ioc_cmd;
2483 	getcmd = (cmd == SIOCGIPMSFILTER || cmd == SIOCGMSFILTER);
2484 	is_v4only_api = (cmd == SIOCGIPMSFILTER || cmd == SIOCSIPMSFILTER);
2485 	minsize = (is_v4only_api) ? IP_MSFILTER_SIZE(0) : GROUP_FILTER_SIZE(0);
2486 	datalen = MBLKL(data_mp);
2487 
2488 	if (datalen < minsize)
2489 		return (EINVAL);
2490 
2491 	/*
2492 	 * now we know we have at least have the initial structure,
2493 	 * but need to check for the source list array.
2494 	 */
2495 	if (is_v4only_api) {
2496 		imsf = (struct ip_msfilter *)data_mp->b_rptr;
2497 		isv6 = B_FALSE;
2498 		expsize = IP_MSFILTER_SIZE(imsf->imsf_numsrc);
2499 	} else {
2500 		gf = (struct group_filter *)data_mp->b_rptr;
2501 		if (gf->gf_group.ss_family == AF_INET6) {
2502 			gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2503 			isv6 = !(IN6_IS_ADDR_V4MAPPED(&gsin6->sin6_addr));
2504 		} else {
2505 			isv6 = B_FALSE;
2506 		}
2507 		expsize = GROUP_FILTER_SIZE(gf->gf_numsrc);
2508 	}
2509 	if (datalen < expsize)
2510 		return (EINVAL);
2511 
2512 	connp = Q_TO_CONN(q);
2513 
2514 	/* operation not supported on the virtual network interface */
2515 	if (IS_VNI(ipif->ipif_ill))
2516 		return (EINVAL);
2517 
2518 	if (isv6) {
2519 		ill_t *ill = ipif->ipif_ill;
2520 		ill_refhold(ill);
2521 
2522 		gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2523 		v6grp = gsin6->sin6_addr;
2524 		if (getcmd)
2525 			err = ip_get_srcfilter_v6(connp, gf, &v6grp, ill);
2526 		else
2527 			err = ip_set_srcfilter_v6(connp, gf, &v6grp, ill);
2528 
2529 		ill_refrele(ill);
2530 	} else {
2531 		boolean_t isv4mapped = B_FALSE;
2532 		if (is_v4only_api) {
2533 			v4grp = (ipaddr_t)imsf->imsf_multiaddr.s_addr;
2534 		} else {
2535 			if (gf->gf_group.ss_family == AF_INET) {
2536 				gsin = (struct sockaddr_in *)&gf->gf_group;
2537 				v4grp = (ipaddr_t)gsin->sin_addr.s_addr;
2538 			} else {
2539 				gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2540 				IN6_V4MAPPED_TO_IPADDR(&gsin6->sin6_addr,
2541 				    v4grp);
2542 				isv4mapped = B_TRUE;
2543 			}
2544 		}
2545 		if (getcmd)
2546 			err = ip_get_srcfilter(connp, gf, imsf, v4grp, ipif,
2547 			    isv4mapped);
2548 		else
2549 			err = ip_set_srcfilter(connp, gf, imsf, v4grp, ipif,
2550 			    isv4mapped);
2551 	}
2552 
2553 	return (err);
2554 }
2555 
2556 /*
2557  * Finds the ipif based on information in the ioctl headers.  Needed to make
2558  * ip_process_ioctl() happy (it needs to know the ipif for IPI_WR-flagged
2559  * ioctls prior to calling the ioctl's handler function).  Somewhat analogous
2560  * to ip_extract_lifreq_cmn() and ip_extract_tunreq().
2561  */
2562 int
2563 ip_extract_msfilter(queue_t *q, mblk_t *mp, ipif_t **ipifpp, ipsq_func_t func)
2564 {
2565 	struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
2566 	int cmd = iocp->ioc_cmd, err = 0;
2567 	conn_t *connp;
2568 	ipif_t *ipif;
2569 	/* caller has verified this mblk exists */
2570 	char *dbuf = (char *)mp->b_cont->b_cont->b_rptr;
2571 	struct ip_msfilter *imsf;
2572 	struct group_filter *gf;
2573 	ipaddr_t v4addr, v4grp;
2574 	in6_addr_t v6grp;
2575 	uint32_t index;
2576 	zoneid_t zoneid;
2577 
2578 	connp = Q_TO_CONN(q);
2579 	zoneid = connp->conn_zoneid;
2580 
2581 	/* don't allow multicast operations on a tcp conn */
2582 	if (IPCL_IS_TCP(connp))
2583 		return (ENOPROTOOPT);
2584 
2585 	if (cmd == SIOCSIPMSFILTER || cmd == SIOCGIPMSFILTER) {
2586 		/* don't allow v4-specific ioctls on v6 socket */
2587 		if (connp->conn_af_isv6)
2588 			return (EAFNOSUPPORT);
2589 
2590 		imsf = (struct ip_msfilter *)dbuf;
2591 		v4addr = imsf->imsf_interface.s_addr;
2592 		v4grp = imsf->imsf_multiaddr.s_addr;
2593 		if (v4addr == INADDR_ANY) {
2594 			ipif = ipif_lookup_group(v4grp, zoneid);
2595 			if (ipif == NULL)
2596 				err = EADDRNOTAVAIL;
2597 		} else {
2598 			ipif = ipif_lookup_addr(v4addr, NULL, zoneid, q, mp,
2599 			    func, &err);
2600 		}
2601 	} else {
2602 		boolean_t isv6 = B_FALSE;
2603 		gf = (struct group_filter *)dbuf;
2604 		index = gf->gf_interface;
2605 		if (gf->gf_group.ss_family == AF_INET6) {
2606 			struct sockaddr_in6 *sin6;
2607 			sin6 = (struct sockaddr_in6 *)&gf->gf_group;
2608 			v6grp = sin6->sin6_addr;
2609 			if (IN6_IS_ADDR_V4MAPPED(&v6grp))
2610 				IN6_V4MAPPED_TO_IPADDR(&v6grp, v4grp);
2611 			else
2612 				isv6 = B_TRUE;
2613 		} else if (gf->gf_group.ss_family == AF_INET) {
2614 			struct sockaddr_in *sin;
2615 			sin = (struct sockaddr_in *)&gf->gf_group;
2616 			v4grp = sin->sin_addr.s_addr;
2617 		} else {
2618 			return (EAFNOSUPPORT);
2619 		}
2620 		if (index == 0) {
2621 			if (isv6)
2622 				ipif = ipif_lookup_group_v6(&v6grp, zoneid);
2623 			else
2624 				ipif = ipif_lookup_group(v4grp, zoneid);
2625 			if (ipif == NULL)
2626 				err = EADDRNOTAVAIL;
2627 		} else {
2628 			ipif = ipif_lookup_on_ifindex(index, isv6, zoneid,
2629 			    q, mp, func, &err);
2630 		}
2631 	}
2632 
2633 	*ipifpp = ipif;
2634 	return (err);
2635 }
2636 
2637 /*
2638  * The structures used for the SIOC*MSFILTER ioctls usually must be copied
2639  * in in two stages, as the first copyin tells us the size of the attached
2640  * source buffer.  This function is called by ip_wput_nondata() after the
2641  * first copyin has completed; it figures out how big the second stage
2642  * needs to be, and kicks it off.
2643  *
2644  * In some cases (numsrc < 2), the second copyin is not needed as the
2645  * first one gets a complete structure containing 1 source addr.
2646  *
2647  * The function returns 0 if a second copyin has been started (i.e. there's
2648  * no more work to be done right now), or 1 if the second copyin is not
2649  * needed and ip_wput_nondata() can continue its processing.
2650  */
2651 int
2652 ip_copyin_msfilter(queue_t *q, mblk_t *mp)
2653 {
2654 	struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
2655 	int cmd = iocp->ioc_cmd;
2656 	/* validity of this checked in ip_wput_nondata() */
2657 	mblk_t *mp1 = mp->b_cont->b_cont;
2658 	int copysize = 0;
2659 	int offset;
2660 
2661 	if (cmd == SIOCSMSFILTER || cmd == SIOCGMSFILTER) {
2662 		struct group_filter *gf = (struct group_filter *)mp1->b_rptr;
2663 		if (gf->gf_numsrc >= 2) {
2664 			offset = sizeof (struct group_filter);
2665 			copysize = GROUP_FILTER_SIZE(gf->gf_numsrc) - offset;
2666 		}
2667 	} else {
2668 		struct ip_msfilter *imsf = (struct ip_msfilter *)mp1->b_rptr;
2669 		if (imsf->imsf_numsrc >= 2) {
2670 			offset = sizeof (struct ip_msfilter);
2671 			copysize = IP_MSFILTER_SIZE(imsf->imsf_numsrc) - offset;
2672 		}
2673 	}
2674 	if (copysize > 0) {
2675 		mi_copyin_n(q, mp, offset, copysize);
2676 		return (0);
2677 	}
2678 	return (1);
2679 }
2680 
2681 /*
2682  * Handle the following optmgmt:
2683  *	IP_ADD_MEMBERSHIP		must not have joined already
2684  *	MCAST_JOIN_GROUP		must not have joined already
2685  *	IP_BLOCK_SOURCE			must have joined already
2686  *	MCAST_BLOCK_SOURCE		must have joined already
2687  *	IP_JOIN_SOURCE_GROUP		may have joined already
2688  *	MCAST_JOIN_SOURCE_GROUP		may have joined already
2689  *
2690  * fmode and src parameters may be used to determine which option is
2691  * being set, as follows (the IP_* and MCAST_* versions of each option
2692  * are functionally equivalent):
2693  *	opt			fmode			src
2694  *	IP_ADD_MEMBERSHIP	MODE_IS_EXCLUDE		INADDR_ANY
2695  *	MCAST_JOIN_GROUP	MODE_IS_EXCLUDE		INADDR_ANY
2696  *	IP_BLOCK_SOURCE		MODE_IS_EXCLUDE		v4 addr
2697  *	MCAST_BLOCK_SOURCE	MODE_IS_EXCLUDE		v4 addr
2698  *	IP_JOIN_SOURCE_GROUP	MODE_IS_INCLUDE		v4 addr
2699  *	MCAST_JOIN_SOURCE_GROUP	MODE_IS_INCLUDE		v4 addr
2700  *
2701  * Changing the filter mode is not allowed; if a matching ilg already
2702  * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
2703  *
2704  * Verifies that there is a source address of appropriate scope for
2705  * the group; if not, EADDRNOTAVAIL is returned.
2706  *
2707  * The interface to be used may be identified by an address or by an
2708  * index.  A pointer to the index is passed; if it is NULL, use the
2709  * address, otherwise, use the index.
2710  */
2711 int
2712 ip_opt_add_group(conn_t *connp, boolean_t checkonly, ipaddr_t group,
2713     ipaddr_t ifaddr, uint_t *ifindexp, mcast_record_t fmode, ipaddr_t src,
2714     mblk_t *first_mp)
2715 {
2716 	ipif_t	*ipif;
2717 	ipsq_t	*ipsq;
2718 	int err = 0;
2719 	ill_t	*ill;
2720 
2721 	err = ip_opt_check(connp, group, src, ifaddr, ifindexp, first_mp,
2722 	    ip_restart_optmgmt, &ipif);
2723 	if (err != 0) {
2724 		if (err != EINPROGRESS) {
2725 			ip1dbg(("ip_opt_add_group: no ipif for group 0x%x, "
2726 			    "ifaddr 0x%x, ifindex %d\n", ntohl(group),
2727 			    ntohl(ifaddr), (ifindexp == NULL) ? 0 : *ifindexp));
2728 		}
2729 		return (err);
2730 	}
2731 	ASSERT(ipif != NULL);
2732 
2733 	ill = ipif->ipif_ill;
2734 	/* Operation not supported on a virtual network interface */
2735 	if (IS_VNI(ill)) {
2736 		ipif_refrele(ipif);
2737 		return (EINVAL);
2738 	}
2739 
2740 	if (checkonly) {
2741 		/*
2742 		 * do not do operation, just pretend to - new T_CHECK
2743 		 * semantics. The error return case above if encountered
2744 		 * considered a good enough "check" here.
2745 		 */
2746 		ipif_refrele(ipif);
2747 		return (0);
2748 	}
2749 
2750 	IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, ipsq,
2751 	    NEW_OP);
2752 
2753 	/* unspecified source addr => no source filtering */
2754 	err = ilg_add(connp, group, ipif, fmode, src);
2755 
2756 	IPSQ_EXIT(ipsq);
2757 
2758 	ipif_refrele(ipif);
2759 	return (err);
2760 }
2761 
2762 /*
2763  * Handle the following optmgmt:
2764  *	IPV6_JOIN_GROUP			must not have joined already
2765  *	MCAST_JOIN_GROUP		must not have joined already
2766  *	MCAST_BLOCK_SOURCE		must have joined already
2767  *	MCAST_JOIN_SOURCE_GROUP		may have joined already
2768  *
2769  * fmode and src parameters may be used to determine which option is
2770  * being set, as follows (IPV6_JOIN_GROUP and MCAST_JOIN_GROUP options
2771  * are functionally equivalent):
2772  *	opt			fmode			v6src
2773  *	IPV6_JOIN_GROUP		MODE_IS_EXCLUDE		unspecified
2774  *	MCAST_JOIN_GROUP	MODE_IS_EXCLUDE		unspecified
2775  *	MCAST_BLOCK_SOURCE	MODE_IS_EXCLUDE		v6 addr
2776  *	MCAST_JOIN_SOURCE_GROUP	MODE_IS_INCLUDE		v6 addr
2777  *
2778  * Changing the filter mode is not allowed; if a matching ilg already
2779  * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
2780  *
2781  * Verifies that there is a source address of appropriate scope for
2782  * the group; if not, EADDRNOTAVAIL is returned.
2783  *
2784  * Handles IPv4-mapped IPv6 multicast addresses by associating them
2785  * with the link-local ipif.  Assumes that if v6group is v4-mapped,
2786  * v6src is also v4-mapped.
2787  */
2788 int
2789 ip_opt_add_group_v6(conn_t *connp, boolean_t checkonly,
2790     const in6_addr_t *v6group, int ifindex, mcast_record_t fmode,
2791     const in6_addr_t *v6src, mblk_t *first_mp)
2792 {
2793 	ill_t *ill;
2794 	ipif_t	*ipif;
2795 	char buf[INET6_ADDRSTRLEN];
2796 	ipaddr_t v4group, v4src;
2797 	boolean_t isv6;
2798 	ipsq_t	*ipsq;
2799 	int	err;
2800 
2801 	err = ip_opt_check_v6(connp, v6group, &v4group, v6src, &v4src, &isv6,
2802 	    ifindex, first_mp, ip_restart_optmgmt, &ill, &ipif);
2803 	if (err != 0) {
2804 		if (err != EINPROGRESS) {
2805 			ip1dbg(("ip_opt_add_group_v6: no ill for group %s/"
2806 			    "index %d\n", inet_ntop(AF_INET6, v6group, buf,
2807 			    sizeof (buf)), ifindex));
2808 		}
2809 		return (err);
2810 	}
2811 	ASSERT((!isv6 && ipif != NULL) || (isv6 && ill != NULL));
2812 
2813 	/* operation is not supported on the virtual network interface */
2814 	if (isv6) {
2815 		if (IS_VNI(ill)) {
2816 			ill_refrele(ill);
2817 			return (EINVAL);
2818 		}
2819 	} else {
2820 		if (IS_VNI(ipif->ipif_ill)) {
2821 			ipif_refrele(ipif);
2822 			return (EINVAL);
2823 		}
2824 	}
2825 
2826 	if (checkonly) {
2827 		/*
2828 		 * do not do operation, just pretend to - new T_CHECK
2829 		 * semantics. The error return case above if encountered
2830 		 * considered a good enough "check" here.
2831 		 */
2832 		if (isv6)
2833 			ill_refrele(ill);
2834 		else
2835 			ipif_refrele(ipif);
2836 		return (0);
2837 	}
2838 
2839 	if (!isv6) {
2840 		IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt,
2841 		    ipsq, NEW_OP);
2842 		err = ilg_add(connp, v4group, ipif, fmode, v4src);
2843 		IPSQ_EXIT(ipsq);
2844 		ipif_refrele(ipif);
2845 	} else {
2846 		IPSQ_ENTER_ILL(ill, connp, first_mp, ip_restart_optmgmt,
2847 		    ipsq, NEW_OP);
2848 		err = ilg_add_v6(connp, v6group, ill, fmode, v6src);
2849 		IPSQ_EXIT(ipsq);
2850 		ill_refrele(ill);
2851 	}
2852 
2853 	return (err);
2854 }
2855 
2856 static int
2857 ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group, ipif_t *ipif,
2858     mcast_record_t fmode, ipaddr_t src)
2859 {
2860 	ilg_t	*ilg;
2861 	in6_addr_t v6src;
2862 	boolean_t leaving = B_FALSE;
2863 
2864 	ASSERT(IAM_WRITER_IPIF(ipif));
2865 
2866 	/*
2867 	 * The ilg is valid only while we hold the conn lock. Once we drop
2868 	 * the lock, another thread can locate another ilg on this connp,
2869 	 * but on a different ipif, and delete it, and cause the ilg array
2870 	 * to be reallocated and copied. Hence do the ilg_delete before
2871 	 * dropping the lock.
2872 	 */
2873 	mutex_enter(&connp->conn_lock);
2874 	ilg = ilg_lookup_ipif(connp, group, ipif);
2875 	if ((ilg == NULL) || (ilg->ilg_flags & ILG_DELETED)) {
2876 		mutex_exit(&connp->conn_lock);
2877 		return (EADDRNOTAVAIL);
2878 	}
2879 
2880 	/*
2881 	 * Decide if we're actually deleting the ilg or just removing a
2882 	 * source filter address; if just removing an addr, make sure we
2883 	 * aren't trying to change the filter mode, and that the addr is
2884 	 * actually in our filter list already.  If we're removing the
2885 	 * last src in an include list, just delete the ilg.
2886 	 */
2887 	if (src == INADDR_ANY) {
2888 		v6src = ipv6_all_zeros;
2889 		leaving = B_TRUE;
2890 	} else {
2891 		int err = 0;
2892 		IN6_IPADDR_TO_V4MAPPED(src, &v6src);
2893 		if (fmode != ilg->ilg_fmode)
2894 			err = EINVAL;
2895 		else if (ilg->ilg_filter == NULL ||
2896 		    !list_has_addr(ilg->ilg_filter, &v6src))
2897 			err = EADDRNOTAVAIL;
2898 		if (err != 0) {
2899 			mutex_exit(&connp->conn_lock);
2900 			return (err);
2901 		}
2902 		if (fmode == MODE_IS_INCLUDE &&
2903 		    ilg->ilg_filter->sl_numsrc == 1) {
2904 			v6src = ipv6_all_zeros;
2905 			leaving = B_TRUE;
2906 		}
2907 	}
2908 
2909 	ilg_delete(connp, ilg, &v6src);
2910 	mutex_exit(&connp->conn_lock);
2911 
2912 	(void) ip_delmulti(group, ipif, B_FALSE, leaving);
2913 	return (0);
2914 }
2915 
2916 static int
2917 ip_opt_delete_group_excl_v6(conn_t *connp, const in6_addr_t *v6group,
2918     ill_t *ill, mcast_record_t fmode, const in6_addr_t *v6src)
2919 {
2920 	ilg_t	*ilg;
2921 	ill_t	*ilg_ill;
2922 	uint_t	ilg_orig_ifindex;
2923 	boolean_t leaving = B_TRUE;
2924 
2925 	ASSERT(IAM_WRITER_ILL(ill));
2926 
2927 	/*
2928 	 * Use the index that we originally used to join. We can't
2929 	 * use the ill directly because ilg_ill could point to
2930 	 * a new ill if things have moved.
2931 	 */
2932 	mutex_enter(&connp->conn_lock);
2933 	ilg = ilg_lookup_ill_index_v6(connp, v6group,
2934 	    ill->ill_phyint->phyint_ifindex);
2935 	if ((ilg == NULL) || (ilg->ilg_flags & ILG_DELETED)) {
2936 		mutex_exit(&connp->conn_lock);
2937 		return (EADDRNOTAVAIL);
2938 	}
2939 
2940 	/*
2941 	 * Decide if we're actually deleting the ilg or just removing a
2942 	 * source filter address; if just removing an addr, make sure we
2943 	 * aren't trying to change the filter mode, and that the addr is
2944 	 * actually in our filter list already.  If we're removing the
2945 	 * last src in an include list, just delete the ilg.
2946 	 */
2947 	if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) {
2948 		int err = 0;
2949 		if (fmode != ilg->ilg_fmode)
2950 			err = EINVAL;
2951 		else if (ilg->ilg_filter == NULL ||
2952 		    !list_has_addr(ilg->ilg_filter, v6src))
2953 			err = EADDRNOTAVAIL;
2954 		if (err != 0) {
2955 			mutex_exit(&connp->conn_lock);
2956 			return (err);
2957 		}
2958 		if (fmode == MODE_IS_INCLUDE &&
2959 		    ilg->ilg_filter->sl_numsrc == 1)
2960 			v6src = NULL;
2961 		else
2962 			leaving = B_FALSE;
2963 	}
2964 
2965 	ilg_ill = ilg->ilg_ill;
2966 	ilg_orig_ifindex = ilg->ilg_orig_ifindex;
2967 	ilg_delete(connp, ilg, v6src);
2968 	mutex_exit(&connp->conn_lock);
2969 	(void) ip_delmulti_v6(v6group, ilg_ill, ilg_orig_ifindex,
2970 	    connp->conn_zoneid, B_FALSE, leaving);
2971 
2972 	return (0);
2973 }
2974 
2975 /*
2976  * Handle the following optmgmt:
2977  *	IP_DROP_MEMBERSHIP		will leave
2978  *	MCAST_LEAVE_GROUP		will leave
2979  *	IP_UNBLOCK_SOURCE		will not leave
2980  *	MCAST_UNBLOCK_SOURCE		will not leave
2981  *	IP_LEAVE_SOURCE_GROUP		may leave (if leaving last source)
2982  *	MCAST_LEAVE_SOURCE_GROUP	may leave (if leaving last source)
2983  *
2984  * fmode and src parameters may be used to determine which option is
2985  * being set, as follows (the IP_* and MCAST_* versions of each option
2986  * are functionally equivalent):
2987  *	opt			 fmode			src
2988  *	IP_DROP_MEMBERSHIP	 MODE_IS_INCLUDE	INADDR_ANY
2989  *	MCAST_LEAVE_GROUP	 MODE_IS_INCLUDE	INADDR_ANY
2990  *	IP_UNBLOCK_SOURCE	 MODE_IS_EXCLUDE	v4 addr
2991  *	MCAST_UNBLOCK_SOURCE	 MODE_IS_EXCLUDE	v4 addr
2992  *	IP_LEAVE_SOURCE_GROUP	 MODE_IS_INCLUDE	v4 addr
2993  *	MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE	v4 addr
2994  *
2995  * Changing the filter mode is not allowed; if a matching ilg already
2996  * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
2997  *
2998  * The interface to be used may be identified by an address or by an
2999  * index.  A pointer to the index is passed; if it is NULL, use the
3000  * address, otherwise, use the index.
3001  */
3002 int
3003 ip_opt_delete_group(conn_t *connp, boolean_t checkonly, ipaddr_t group,
3004     ipaddr_t ifaddr, uint_t *ifindexp, mcast_record_t fmode, ipaddr_t src,
3005     mblk_t *first_mp)
3006 {
3007 	ipif_t	*ipif;
3008 	ipsq_t	*ipsq;
3009 	int	err;
3010 	ill_t	*ill;
3011 
3012 	err = ip_opt_check(connp, group, src, ifaddr, ifindexp, first_mp,
3013 	    ip_restart_optmgmt, &ipif);
3014 	if (err != 0) {
3015 		if (err != EINPROGRESS) {
3016 			ip1dbg(("ip_opt_delete_group: no ipif for group "
3017 			    "0x%x, ifaddr 0x%x\n",
3018 			    (int)ntohl(group), (int)ntohl(ifaddr)));
3019 		}
3020 		return (err);
3021 	}
3022 	ASSERT(ipif != NULL);
3023 
3024 	ill = ipif->ipif_ill;
3025 	/* Operation not supported on a virtual network interface */
3026 	if (IS_VNI(ill)) {
3027 		ipif_refrele(ipif);
3028 		return (EINVAL);
3029 	}
3030 
3031 	if (checkonly) {
3032 		/*
3033 		 * do not do operation, just pretend to - new T_CHECK
3034 		 * semantics. The error return case above if encountered
3035 		 * considered a good enough "check" here.
3036 		 */
3037 		ipif_refrele(ipif);
3038 		return (0);
3039 	}
3040 
3041 	IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, ipsq,
3042 	    NEW_OP);
3043 	err = ip_opt_delete_group_excl(connp, group, ipif, fmode, src);
3044 	IPSQ_EXIT(ipsq);
3045 
3046 	ipif_refrele(ipif);
3047 	return (err);
3048 }
3049 
3050 /*
3051  * Handle the following optmgmt:
3052  *	IPV6_LEAVE_GROUP		will leave
3053  *	MCAST_LEAVE_GROUP		will leave
3054  *	MCAST_UNBLOCK_SOURCE		will not leave
3055  *	MCAST_LEAVE_SOURCE_GROUP	may leave (if leaving last source)
3056  *
3057  * fmode and src parameters may be used to determine which option is
3058  * being set, as follows (IPV6_LEAVE_GROUP and MCAST_LEAVE_GROUP options
3059  * are functionally equivalent):
3060  *	opt			 fmode			v6src
3061  *	IPV6_LEAVE_GROUP	 MODE_IS_INCLUDE	unspecified
3062  *	MCAST_LEAVE_GROUP	 MODE_IS_INCLUDE	unspecified
3063  *	MCAST_UNBLOCK_SOURCE	 MODE_IS_EXCLUDE	v6 addr
3064  *	MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE	v6 addr
3065  *
3066  * Changing the filter mode is not allowed; if a matching ilg already
3067  * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
3068  *
3069  * Handles IPv4-mapped IPv6 multicast addresses by associating them
3070  * with the link-local ipif.  Assumes that if v6group is v4-mapped,
3071  * v6src is also v4-mapped.
3072  */
3073 int
3074 ip_opt_delete_group_v6(conn_t *connp, boolean_t checkonly,
3075     const in6_addr_t *v6group, int ifindex, mcast_record_t fmode,
3076     const in6_addr_t *v6src, mblk_t *first_mp)
3077 {
3078 	ill_t *ill;
3079 	ipif_t	*ipif;
3080 	char	buf[INET6_ADDRSTRLEN];
3081 	ipaddr_t v4group, v4src;
3082 	boolean_t isv6;
3083 	ipsq_t	*ipsq;
3084 	int	err;
3085 
3086 	err = ip_opt_check_v6(connp, v6group, &v4group, v6src, &v4src, &isv6,
3087 	    ifindex, first_mp, ip_restart_optmgmt, &ill, &ipif);
3088 	if (err != 0) {
3089 		if (err != EINPROGRESS) {
3090 			ip1dbg(("ip_opt_delete_group_v6: no ill for group %s/"
3091 			    "index %d\n", inet_ntop(AF_INET6, v6group, buf,
3092 			    sizeof (buf)), ifindex));
3093 		}
3094 		return (err);
3095 	}
3096 	ASSERT((isv6 && ill != NULL) || (!isv6 && ipif != NULL));
3097 
3098 	/* operation is not supported on the virtual network interface */
3099 	if (isv6) {
3100 		if (IS_VNI(ill)) {
3101 			ill_refrele(ill);
3102 			return (EINVAL);
3103 		}
3104 	} else {
3105 		if (IS_VNI(ipif->ipif_ill)) {
3106 			ipif_refrele(ipif);
3107 			return (EINVAL);
3108 		}
3109 	}
3110 
3111 	if (checkonly) {
3112 		/*
3113 		 * do not do operation, just pretend to - new T_CHECK
3114 		 * semantics. The error return case above if encountered
3115 		 * considered a good enough "check" here.
3116 		 */
3117 		if (isv6)
3118 			ill_refrele(ill);
3119 		else
3120 			ipif_refrele(ipif);
3121 		return (0);
3122 	}
3123 
3124 	if (!isv6) {
3125 		IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt,
3126 		    ipsq, NEW_OP);
3127 		err = ip_opt_delete_group_excl(connp, v4group, ipif, fmode,
3128 		    v4src);
3129 		IPSQ_EXIT(ipsq);
3130 		ipif_refrele(ipif);
3131 	} else {
3132 		IPSQ_ENTER_ILL(ill, connp, first_mp, ip_restart_optmgmt,
3133 		    ipsq, NEW_OP);
3134 		err = ip_opt_delete_group_excl_v6(connp, v6group, ill, fmode,
3135 		    v6src);
3136 		IPSQ_EXIT(ipsq);
3137 		ill_refrele(ill);
3138 	}
3139 
3140 	return (err);
3141 }
3142 
3143 /*
3144  * Group mgmt for upper conn that passes things down
3145  * to the interface multicast list (and DLPI)
3146  * These routines can handle new style options that specify an interface name
3147  * as opposed to an interface address (needed for general handling of
3148  * unnumbered interfaces.)
3149  */
3150 
3151 /*
3152  * Add a group to an upper conn group data structure and pass things down
3153  * to the interface multicast list (and DLPI)
3154  */
3155 static int
3156 ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif, mcast_record_t fmode,
3157     ipaddr_t src)
3158 {
3159 	int	error = 0;
3160 	ill_t	*ill;
3161 	ilg_t	*ilg;
3162 	ilg_stat_t ilgstat;
3163 	slist_t	*new_filter = NULL;
3164 	int	new_fmode;
3165 
3166 	ASSERT(IAM_WRITER_IPIF(ipif));
3167 
3168 	ill = ipif->ipif_ill;
3169 
3170 	if (!(ill->ill_flags & ILLF_MULTICAST))
3171 		return (EADDRNOTAVAIL);
3172 
3173 	/*
3174 	 * conn_ilg[] is protected by conn_lock. Need to hold the conn_lock
3175 	 * to walk the conn_ilg[] list in ilg_lookup_ipif(); also needed to
3176 	 * serialize 2 threads doing join (sock, group1, hme0:0) and
3177 	 * (sock, group2, hme1:0) where hme0 and hme1 map to different ipsqs,
3178 	 * but both operations happen on the same conn.
3179 	 */
3180 	mutex_enter(&connp->conn_lock);
3181 	ilg = ilg_lookup_ipif(connp, group, ipif);
3182 
3183 	/*
3184 	 * Depending on the option we're handling, may or may not be okay
3185 	 * if group has already been added.  Figure out our rules based
3186 	 * on fmode and src params.  Also make sure there's enough room
3187 	 * in the filter if we're adding a source to an existing filter.
3188 	 */
3189 	if (src == INADDR_ANY) {
3190 		/* we're joining for all sources, must not have joined */
3191 		if (ilg != NULL)
3192 			error = EADDRINUSE;
3193 	} else {
3194 		if (fmode == MODE_IS_EXCLUDE) {
3195 			/* (excl {addr}) => block source, must have joined */
3196 			if (ilg == NULL)
3197 				error = EADDRNOTAVAIL;
3198 		}
3199 		/* (incl {addr}) => join source, may have joined */
3200 
3201 		if (ilg != NULL &&
3202 		    SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE)
3203 			error = ENOBUFS;
3204 	}
3205 	if (error != 0) {
3206 		mutex_exit(&connp->conn_lock);
3207 		return (error);
3208 	}
3209 
3210 	ASSERT(!(ipif->ipif_state_flags & IPIF_CONDEMNED));
3211 
3212 	/*
3213 	 * Alloc buffer to copy new state into (see below) before
3214 	 * we make any changes, so we can bail if it fails.
3215 	 */
3216 	if ((new_filter = l_alloc()) == NULL) {
3217 		mutex_exit(&connp->conn_lock);
3218 		return (ENOMEM);
3219 	}
3220 
3221 	if (ilg == NULL) {
3222 		ilgstat = ILGSTAT_NEW;
3223 		if ((ilg = conn_ilg_alloc(connp)) == NULL) {
3224 			mutex_exit(&connp->conn_lock);
3225 			l_free(new_filter);
3226 			return (ENOMEM);
3227 		}
3228 		if (src != INADDR_ANY) {
3229 			ilg->ilg_filter = l_alloc();
3230 			if (ilg->ilg_filter == NULL) {
3231 				ilg_delete(connp, ilg, NULL);
3232 				mutex_exit(&connp->conn_lock);
3233 				l_free(new_filter);
3234 				return (ENOMEM);
3235 			}
3236 			ilg->ilg_filter->sl_numsrc = 1;
3237 			IN6_IPADDR_TO_V4MAPPED(src,
3238 			    &ilg->ilg_filter->sl_addr[0]);
3239 		}
3240 		if (group == INADDR_ANY) {
3241 			ilg->ilg_v6group = ipv6_all_zeros;
3242 		} else {
3243 			IN6_IPADDR_TO_V4MAPPED(group, &ilg->ilg_v6group);
3244 		}
3245 		ilg->ilg_ipif = ipif;
3246 		ilg->ilg_ill = NULL;
3247 		ilg->ilg_orig_ifindex = 0;
3248 		ilg->ilg_fmode = fmode;
3249 	} else {
3250 		int index;
3251 		in6_addr_t v6src;
3252 		ilgstat = ILGSTAT_CHANGE;
3253 		if (ilg->ilg_fmode != fmode || src == INADDR_ANY) {
3254 			mutex_exit(&connp->conn_lock);
3255 			l_free(new_filter);
3256 			return (EINVAL);
3257 		}
3258 		if (ilg->ilg_filter == NULL) {
3259 			ilg->ilg_filter = l_alloc();
3260 			if (ilg->ilg_filter == NULL) {
3261 				mutex_exit(&connp->conn_lock);
3262 				l_free(new_filter);
3263 				return (ENOMEM);
3264 			}
3265 		}
3266 		IN6_IPADDR_TO_V4MAPPED(src, &v6src);
3267 		if (list_has_addr(ilg->ilg_filter, &v6src)) {
3268 			mutex_exit(&connp->conn_lock);
3269 			l_free(new_filter);
3270 			return (EADDRNOTAVAIL);
3271 		}
3272 		index = ilg->ilg_filter->sl_numsrc++;
3273 		ilg->ilg_filter->sl_addr[index] = v6src;
3274 	}
3275 
3276 	/*
3277 	 * Save copy of ilg's filter state to pass to other functions,
3278 	 * so we can release conn_lock now.
3279 	 */
3280 	new_fmode = ilg->ilg_fmode;
3281 	l_copy(ilg->ilg_filter, new_filter);
3282 
3283 	mutex_exit(&connp->conn_lock);
3284 
3285 	error = ip_addmulti(group, ipif, ilgstat, new_fmode, new_filter);
3286 	if (error != 0) {
3287 		/*
3288 		 * Need to undo what we did before calling ip_addmulti()!
3289 		 * Must look up the ilg again since we've not been holding
3290 		 * conn_lock.
3291 		 */
3292 		in6_addr_t v6src;
3293 		if (ilgstat == ILGSTAT_NEW)
3294 			v6src = ipv6_all_zeros;
3295 		else
3296 			IN6_IPADDR_TO_V4MAPPED(src, &v6src);
3297 		mutex_enter(&connp->conn_lock);
3298 		ilg = ilg_lookup_ipif(connp, group, ipif);
3299 		ASSERT(ilg != NULL);
3300 		ilg_delete(connp, ilg, &v6src);
3301 		mutex_exit(&connp->conn_lock);
3302 		l_free(new_filter);
3303 		return (error);
3304 	}
3305 
3306 	l_free(new_filter);
3307 	return (0);
3308 }
3309 
3310 static int
3311 ilg_add_v6(conn_t *connp, const in6_addr_t *v6group, ill_t *ill,
3312     mcast_record_t fmode, const in6_addr_t *v6src)
3313 {
3314 	int	error = 0;
3315 	int	orig_ifindex;
3316 	ilg_t	*ilg;
3317 	ilg_stat_t ilgstat;
3318 	slist_t	*new_filter = NULL;
3319 	int	new_fmode;
3320 
3321 	ASSERT(IAM_WRITER_ILL(ill));
3322 
3323 	if (!(ill->ill_flags & ILLF_MULTICAST))
3324 		return (EADDRNOTAVAIL);
3325 
3326 	/*
3327 	 * conn_lock protects the ilg list.  Serializes 2 threads doing
3328 	 * join (sock, group1, hme0) and (sock, group2, hme1) where hme0
3329 	 * and hme1 map to different ipsq's, but both operations happen
3330 	 * on the same conn.
3331 	 */
3332 	mutex_enter(&connp->conn_lock);
3333 
3334 	/*
3335 	 * Use the ifindex to do the lookup. We can't use the ill
3336 	 * directly because ilg_ill could point to a different ill if
3337 	 * things have moved.
3338 	 */
3339 	orig_ifindex = ill->ill_phyint->phyint_ifindex;
3340 	ilg = ilg_lookup_ill_index_v6(connp, v6group, orig_ifindex);
3341 
3342 	/*
3343 	 * Depending on the option we're handling, may or may not be okay
3344 	 * if group has already been added.  Figure out our rules based
3345 	 * on fmode and src params.  Also make sure there's enough room
3346 	 * in the filter if we're adding a source to an existing filter.
3347 	 */
3348 	if (IN6_IS_ADDR_UNSPECIFIED(v6src)) {
3349 		/* we're joining for all sources, must not have joined */
3350 		if (ilg != NULL)
3351 			error = EADDRINUSE;
3352 	} else {
3353 		if (fmode == MODE_IS_EXCLUDE) {
3354 			/* (excl {addr}) => block source, must have joined */
3355 			if (ilg == NULL)
3356 				error = EADDRNOTAVAIL;
3357 		}
3358 		/* (incl {addr}) => join source, may have joined */
3359 
3360 		if (ilg != NULL &&
3361 		    SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE)
3362 			error = ENOBUFS;
3363 	}
3364 	if (error != 0) {
3365 		mutex_exit(&connp->conn_lock);
3366 		return (error);
3367 	}
3368 
3369 	/*
3370 	 * Alloc buffer to copy new state into (see below) before
3371 	 * we make any changes, so we can bail if it fails.
3372 	 */
3373 	if ((new_filter = l_alloc()) == NULL) {
3374 		mutex_exit(&connp->conn_lock);
3375 		return (ENOMEM);
3376 	}
3377 
3378 	if (ilg == NULL) {
3379 		if ((ilg = conn_ilg_alloc(connp)) == NULL) {
3380 			mutex_exit(&connp->conn_lock);
3381 			l_free(new_filter);
3382 			return (ENOMEM);
3383 		}
3384 		if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) {
3385 			ilg->ilg_filter = l_alloc();
3386 			if (ilg->ilg_filter == NULL) {
3387 				ilg_delete(connp, ilg, NULL);
3388 				mutex_exit(&connp->conn_lock);
3389 				l_free(new_filter);
3390 				return (ENOMEM);
3391 			}
3392 			ilg->ilg_filter->sl_numsrc = 1;
3393 			ilg->ilg_filter->sl_addr[0] = *v6src;
3394 		}
3395 		ilgstat = ILGSTAT_NEW;
3396 		ilg->ilg_v6group = *v6group;
3397 		ilg->ilg_fmode = fmode;
3398 		ilg->ilg_ipif = NULL;
3399 		/*
3400 		 * Choose our target ill to join on. This might be different
3401 		 * from the ill we've been given if it's currently down and
3402 		 * part of a group.
3403 		 *
3404 		 * new ill is not refheld; we are writer.
3405 		 */
3406 		ill = ip_choose_multi_ill(ill, v6group);
3407 		ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED));
3408 		ilg->ilg_ill = ill;
3409 		/*
3410 		 * Remember the orig_ifindex that we joined on, so that we
3411 		 * can successfully delete them later on and also search
3412 		 * for duplicates if the application wants to join again.
3413 		 */
3414 		ilg->ilg_orig_ifindex = orig_ifindex;
3415 	} else {
3416 		int index;
3417 		if (ilg->ilg_fmode != fmode || IN6_IS_ADDR_UNSPECIFIED(v6src)) {
3418 			mutex_exit(&connp->conn_lock);
3419 			l_free(new_filter);
3420 			return (EINVAL);
3421 		}
3422 		if (ilg->ilg_filter == NULL) {
3423 			ilg->ilg_filter = l_alloc();
3424 			if (ilg->ilg_filter == NULL) {
3425 				mutex_exit(&connp->conn_lock);
3426 				l_free(new_filter);
3427 				return (ENOMEM);
3428 			}
3429 		}
3430 		if (list_has_addr(ilg->ilg_filter, v6src)) {
3431 			mutex_exit(&connp->conn_lock);
3432 			l_free(new_filter);
3433 			return (EADDRNOTAVAIL);
3434 		}
3435 		ilgstat = ILGSTAT_CHANGE;
3436 		index = ilg->ilg_filter->sl_numsrc++;
3437 		ilg->ilg_filter->sl_addr[index] = *v6src;
3438 		/*
3439 		 * The current ill might be different from the one we were
3440 		 * asked to join on (if failover has occurred); we should
3441 		 * join on the ill stored in the ilg.  The original ill
3442 		 * is noted in ilg_orig_ifindex, which matched our request.
3443 		 */
3444 		ill = ilg->ilg_ill;
3445 	}
3446 
3447 	/*
3448 	 * Save copy of ilg's filter state to pass to other functions,
3449 	 * so we can release conn_lock now.
3450 	 */
3451 	new_fmode = ilg->ilg_fmode;
3452 	l_copy(ilg->ilg_filter, new_filter);
3453 
3454 	mutex_exit(&connp->conn_lock);
3455 
3456 	/*
3457 	 * Now update the ill. We wait to do this until after the ilg
3458 	 * has been updated because we need to update the src filter
3459 	 * info for the ill, which involves looking at the status of
3460 	 * all the ilgs associated with this group/interface pair.
3461 	 */
3462 	error = ip_addmulti_v6(v6group, ill, orig_ifindex, connp->conn_zoneid,
3463 	    ilgstat, new_fmode, new_filter);
3464 	if (error != 0) {
3465 		/*
3466 		 * But because we waited, we have to undo the ilg update
3467 		 * if ip_addmulti_v6() fails.  We also must lookup ilg
3468 		 * again, since we've not been holding conn_lock.
3469 		 */
3470 		in6_addr_t delsrc =
3471 		    (ilgstat == ILGSTAT_NEW) ? ipv6_all_zeros : *v6src;
3472 		mutex_enter(&connp->conn_lock);
3473 		ilg = ilg_lookup_ill_index_v6(connp, v6group, orig_ifindex);
3474 		ASSERT(ilg != NULL);
3475 		ilg_delete(connp, ilg, &delsrc);
3476 		mutex_exit(&connp->conn_lock);
3477 		l_free(new_filter);
3478 		return (error);
3479 	}
3480 
3481 	l_free(new_filter);
3482 
3483 	return (0);
3484 }
3485 
3486 /*
3487  * Find an IPv4 ilg matching group, ill and source
3488  */
3489 ilg_t *
3490 ilg_lookup_ill_withsrc(conn_t *connp, ipaddr_t group, ipaddr_t src, ill_t *ill)
3491 {
3492 	in6_addr_t v6group, v6src;
3493 	int i;
3494 	boolean_t isinlist;
3495 	ilg_t *ilg;
3496 	ipif_t *ipif;
3497 	ill_t *ilg_ill;
3498 
3499 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3500 
3501 	/*
3502 	 * INADDR_ANY is represented as the IPv6 unspecified addr.
3503 	 */
3504 	if (group == INADDR_ANY)
3505 		v6group = ipv6_all_zeros;
3506 	else
3507 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
3508 
3509 	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3510 		/* ilg_ipif is NULL for v6; skip them */
3511 		ilg = &connp->conn_ilg[i];
3512 		if ((ipif = ilg->ilg_ipif) == NULL)
3513 			continue;
3514 		ASSERT(ilg->ilg_ill == NULL);
3515 		ilg_ill = ipif->ipif_ill;
3516 		ASSERT(!ilg_ill->ill_isv6);
3517 		if (ilg_ill == ill &&
3518 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, &v6group)) {
3519 			if (SLIST_IS_EMPTY(ilg->ilg_filter)) {
3520 				/* no source filter, so this is a match */
3521 				return (ilg);
3522 			}
3523 			break;
3524 		}
3525 	}
3526 	if (i == connp->conn_ilg_inuse)
3527 		return (NULL);
3528 
3529 	/*
3530 	 * we have an ilg with matching ill and group; but
3531 	 * the ilg has a source list that we must check.
3532 	 */
3533 	IN6_IPADDR_TO_V4MAPPED(src, &v6src);
3534 	isinlist = B_FALSE;
3535 	for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) {
3536 		if (IN6_ARE_ADDR_EQUAL(&v6src, &ilg->ilg_filter->sl_addr[i])) {
3537 			isinlist = B_TRUE;
3538 			break;
3539 		}
3540 	}
3541 
3542 	if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) ||
3543 	    (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE))
3544 		return (ilg);
3545 
3546 	return (NULL);
3547 }
3548 
3549 /*
3550  * Find an IPv6 ilg matching group, ill, and source
3551  */
3552 ilg_t *
3553 ilg_lookup_ill_withsrc_v6(conn_t *connp, const in6_addr_t *v6group,
3554     const in6_addr_t *v6src, ill_t *ill)
3555 {
3556 	int i;
3557 	boolean_t isinlist;
3558 	ilg_t *ilg;
3559 	ill_t *ilg_ill;
3560 
3561 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3562 
3563 	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3564 		ilg = &connp->conn_ilg[i];
3565 		if ((ilg_ill = ilg->ilg_ill) == NULL)
3566 			continue;
3567 		ASSERT(ilg->ilg_ipif == NULL);
3568 		ASSERT(ilg_ill->ill_isv6);
3569 		if (ilg_ill == ill &&
3570 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) {
3571 			if (SLIST_IS_EMPTY(ilg->ilg_filter)) {
3572 				/* no source filter, so this is a match */
3573 				return (ilg);
3574 			}
3575 			break;
3576 		}
3577 	}
3578 	if (i == connp->conn_ilg_inuse)
3579 		return (NULL);
3580 
3581 	/*
3582 	 * we have an ilg with matching ill and group; but
3583 	 * the ilg has a source list that we must check.
3584 	 */
3585 	isinlist = B_FALSE;
3586 	for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) {
3587 		if (IN6_ARE_ADDR_EQUAL(v6src, &ilg->ilg_filter->sl_addr[i])) {
3588 			isinlist = B_TRUE;
3589 			break;
3590 		}
3591 	}
3592 
3593 	if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) ||
3594 	    (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE))
3595 		return (ilg);
3596 
3597 	return (NULL);
3598 }
3599 
3600 /*
3601  * Get the ilg whose ilg_orig_ifindex is associated with ifindex.
3602  * This is useful when the interface fails and we have moved
3603  * to a new ill, but still would like to locate using the index
3604  * that we originally used to join. Used only for IPv6 currently.
3605  */
3606 static ilg_t *
3607 ilg_lookup_ill_index_v6(conn_t *connp, const in6_addr_t *v6group, int ifindex)
3608 {
3609 	ilg_t	*ilg;
3610 	int	i;
3611 
3612 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3613 	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3614 		ilg = &connp->conn_ilg[i];
3615 		/* ilg_ill is NULL for V4. Skip them */
3616 		if (ilg->ilg_ill == NULL)
3617 			continue;
3618 		/* ilg_ipif is NULL for V6 */
3619 		ASSERT(ilg->ilg_ipif == NULL);
3620 		ASSERT(ilg->ilg_orig_ifindex != 0);
3621 		if (IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group) &&
3622 		    ilg->ilg_orig_ifindex == ifindex) {
3623 			return (ilg);
3624 		}
3625 	}
3626 	return (NULL);
3627 }
3628 
3629 /*
3630  * Find an IPv6 ilg matching group and ill
3631  */
3632 ilg_t *
3633 ilg_lookup_ill_v6(conn_t *connp, const in6_addr_t *v6group, ill_t *ill)
3634 {
3635 	ilg_t	*ilg;
3636 	int	i;
3637 	ill_t 	*mem_ill;
3638 
3639 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3640 
3641 	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3642 		ilg = &connp->conn_ilg[i];
3643 		if ((mem_ill = ilg->ilg_ill) == NULL)
3644 			continue;
3645 		ASSERT(ilg->ilg_ipif == NULL);
3646 		ASSERT(mem_ill->ill_isv6);
3647 		if (mem_ill == ill &&
3648 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group))
3649 			return (ilg);
3650 	}
3651 	return (NULL);
3652 }
3653 
3654 /*
3655  * Find an IPv4 ilg matching group and ipif
3656  */
3657 static ilg_t *
3658 ilg_lookup_ipif(conn_t *connp, ipaddr_t group, ipif_t *ipif)
3659 {
3660 	in6_addr_t v6group;
3661 	int	i;
3662 
3663 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3664 	ASSERT(!ipif->ipif_ill->ill_isv6);
3665 
3666 	if (group == INADDR_ANY)
3667 		v6group = ipv6_all_zeros;
3668 	else
3669 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
3670 
3671 	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3672 		if (IN6_ARE_ADDR_EQUAL(&connp->conn_ilg[i].ilg_v6group,
3673 		    &v6group) &&
3674 		    connp->conn_ilg[i].ilg_ipif == ipif)
3675 			return (&connp->conn_ilg[i]);
3676 	}
3677 	return (NULL);
3678 }
3679 
3680 /*
3681  * If a source address is passed in (src != NULL and src is not
3682  * unspecified), remove the specified src addr from the given ilg's
3683  * filter list, else delete the ilg.
3684  */
3685 static void
3686 ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src)
3687 {
3688 	int	i;
3689 
3690 	ASSERT((ilg->ilg_ipif != NULL) ^ (ilg->ilg_ill != NULL));
3691 	ASSERT(ilg->ilg_ipif == NULL || IAM_WRITER_IPIF(ilg->ilg_ipif));
3692 	ASSERT(ilg->ilg_ill == NULL || IAM_WRITER_ILL(ilg->ilg_ill));
3693 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3694 	ASSERT(!(ilg->ilg_flags & ILG_DELETED));
3695 
3696 	if (src == NULL || IN6_IS_ADDR_UNSPECIFIED(src)) {
3697 		if (connp->conn_ilg_walker_cnt != 0) {
3698 			ilg->ilg_flags |= ILG_DELETED;
3699 			return;
3700 		}
3701 
3702 		FREE_SLIST(ilg->ilg_filter);
3703 
3704 		i = ilg - &connp->conn_ilg[0];
3705 		ASSERT(i >= 0 && i < connp->conn_ilg_inuse);
3706 
3707 		/* Move other entries up one step */
3708 		connp->conn_ilg_inuse--;
3709 		for (; i < connp->conn_ilg_inuse; i++)
3710 			connp->conn_ilg[i] = connp->conn_ilg[i+1];
3711 
3712 		if (connp->conn_ilg_inuse == 0) {
3713 			mi_free((char *)connp->conn_ilg);
3714 			connp->conn_ilg = NULL;
3715 			cv_broadcast(&connp->conn_refcv);
3716 		}
3717 	} else {
3718 		l_remove(ilg->ilg_filter, src);
3719 	}
3720 }
3721 
3722 /*
3723  * Called from conn close. No new ilg can be added or removed.
3724  * because CONN_CLOSING has been set by ip_close. ilg_add / ilg_delete
3725  * will return error if conn has started closing.
3726  */
3727 void
3728 ilg_delete_all(conn_t *connp)
3729 {
3730 	int	i;
3731 	ipif_t	*ipif = NULL;
3732 	ill_t	*ill = NULL;
3733 	ilg_t	*ilg;
3734 	in6_addr_t v6group;
3735 	boolean_t success;
3736 	ipsq_t	*ipsq;
3737 	int	orig_ifindex;
3738 
3739 	mutex_enter(&connp->conn_lock);
3740 retry:
3741 	ILG_WALKER_HOLD(connp);
3742 	for (i = connp->conn_ilg_inuse - 1; i >= 0; ) {
3743 		ilg = &connp->conn_ilg[i];
3744 		/*
3745 		 * Since this walk is not atomic (we drop the
3746 		 * conn_lock and wait in ipsq_enter) we need
3747 		 * to check for the ILG_DELETED flag.
3748 		 */
3749 		if (ilg->ilg_flags & ILG_DELETED) {
3750 			/* Go to the next ilg */
3751 			i--;
3752 			continue;
3753 		}
3754 		v6group = ilg->ilg_v6group;
3755 
3756 		if (IN6_IS_ADDR_V4MAPPED(&v6group)) {
3757 			ipif = ilg->ilg_ipif;
3758 			ill = ipif->ipif_ill;
3759 		} else {
3760 			ipif = NULL;
3761 			ill = ilg->ilg_ill;
3762 		}
3763 		/*
3764 		 * We may not be able to refhold the ill if the ill/ipif
3765 		 * is changing. But we need to make sure that the ill will
3766 		 * not vanish. So we just bump up the ill_waiter count.
3767 		 * If we are unable to do even that, then the ill is closing,
3768 		 * in which case the unplumb thread will handle the cleanup,
3769 		 * and we move on to the next ilg.
3770 		 */
3771 		if (!ill_waiter_inc(ill)) {
3772 			/* Go to the next ilg */
3773 			i--;
3774 			continue;
3775 		}
3776 		mutex_exit(&connp->conn_lock);
3777 		/*
3778 		 * To prevent deadlock between ill close which waits inside
3779 		 * the perimeter, and conn close, ipsq_enter returns error,
3780 		 * the moment ILL_CONDEMNED is set, in which case ill close
3781 		 * takes responsibility to cleanup the ilgs. Note that we
3782 		 * have not yet set condemned flag, otherwise the conn can't
3783 		 * be refheld for cleanup by those routines and it would be
3784 		 * a mutual deadlock.
3785 		 */
3786 		success = ipsq_enter(ill, B_FALSE);
3787 		ipsq = ill->ill_phyint->phyint_ipsq;
3788 		ill_waiter_dcr(ill);
3789 		mutex_enter(&connp->conn_lock);
3790 		if (!success) {
3791 			/* Go to the next ilg */
3792 			i--;
3793 			continue;
3794 		}
3795 
3796 		/*
3797 		 * Make sure that nothing has changed under. For eg.
3798 		 * a failover/failback can change ilg_ill while we were
3799 		 * waiting to become exclusive above
3800 		 */
3801 		if (IN6_IS_ADDR_V4MAPPED(&v6group)) {
3802 			ipif = ilg->ilg_ipif;
3803 			ill = ipif->ipif_ill;
3804 		} else {
3805 			ipif = NULL;
3806 			ill = ilg->ilg_ill;
3807 		}
3808 		if (!IAM_WRITER_ILL(ill) || (ilg->ilg_flags & ILG_DELETED)) {
3809 			/*
3810 			 * The ilg has changed under us probably due
3811 			 * to a failover or unplumb. Retry on the same ilg.
3812 			 */
3813 			mutex_exit(&connp->conn_lock);
3814 			ipsq_exit(ipsq, B_TRUE, B_TRUE);
3815 			mutex_enter(&connp->conn_lock);
3816 			continue;
3817 		}
3818 		v6group = ilg->ilg_v6group;
3819 		orig_ifindex = ilg->ilg_orig_ifindex;
3820 		ilg_delete(connp, ilg, NULL);
3821 		mutex_exit(&connp->conn_lock);
3822 
3823 		if (ipif != NULL)
3824 			(void) ip_delmulti(V4_PART_OF_V6(v6group), ipif,
3825 			    B_FALSE, B_TRUE);
3826 
3827 		else
3828 			(void) ip_delmulti_v6(&v6group, ill, orig_ifindex,
3829 			    connp->conn_zoneid, B_FALSE, B_TRUE);
3830 
3831 		ipsq_exit(ipsq, B_TRUE, B_TRUE);
3832 		mutex_enter(&connp->conn_lock);
3833 		/* Go to the next ilg */
3834 		i--;
3835 	}
3836 	ILG_WALKER_RELE(connp);
3837 
3838 	/* If any ill was skipped above wait and retry */
3839 	if (connp->conn_ilg_inuse != 0) {
3840 		cv_wait(&connp->conn_refcv, &connp->conn_lock);
3841 		goto retry;
3842 	}
3843 	mutex_exit(&connp->conn_lock);
3844 }
3845 
3846 /*
3847  * Called from ill close by ipcl_walk for clearing conn_ilg and
3848  * conn_multicast_ipif for a given ipif. conn is held by caller.
3849  * Note that ipcl_walk only walks conns that are not yet condemned.
3850  * condemned conns can't be refheld. For this reason, conn must become clean
3851  * first, i.e. it must not refer to any ill/ire/ipif and then only set
3852  * condemned flag.
3853  */
3854 static void
3855 conn_delete_ipif(conn_t *connp, caddr_t arg)
3856 {
3857 	ipif_t	*ipif = (ipif_t *)arg;
3858 	int	i;
3859 	char	group_buf1[INET6_ADDRSTRLEN];
3860 	char	group_buf2[INET6_ADDRSTRLEN];
3861 	ipaddr_t group;
3862 	ilg_t	*ilg;
3863 
3864 	/*
3865 	 * Even though conn_ilg_inuse can change while we are in this loop,
3866 	 * i.e.ilgs can be created or deleted on this connp, no new ilgs can
3867 	 * be created or deleted for this connp, on this ill, since this ill
3868 	 * is the perimeter. So we won't miss any ilg in this cleanup.
3869 	 */
3870 	mutex_enter(&connp->conn_lock);
3871 
3872 	/*
3873 	 * Increment the walker count, so that ilg repacking does not
3874 	 * occur while we are in the loop.
3875 	 */
3876 	ILG_WALKER_HOLD(connp);
3877 	for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) {
3878 		ilg = &connp->conn_ilg[i];
3879 		if (ilg->ilg_ipif != ipif || (ilg->ilg_flags & ILG_DELETED))
3880 			continue;
3881 		/*
3882 		 * ip_close cannot be cleaning this ilg at the same time.
3883 		 * since it also has to execute in this ill's perimeter which
3884 		 * we are now holding. Only a clean conn can be condemned.
3885 		 */
3886 		ASSERT(!(connp->conn_state_flags & CONN_CONDEMNED));
3887 
3888 		/* Blow away the membership */
3889 		ip1dbg(("conn_delete_ilg_ipif: %s on %s (%s)\n",
3890 		    inet_ntop(AF_INET6, &connp->conn_ilg[i].ilg_v6group,
3891 		    group_buf1, sizeof (group_buf1)),
3892 		    inet_ntop(AF_INET6, &ipif->ipif_v6lcl_addr,
3893 		    group_buf2, sizeof (group_buf2)),
3894 		    ipif->ipif_ill->ill_name));
3895 
3896 		/* ilg_ipif is NULL for V6, so we won't be here */
3897 		ASSERT(IN6_IS_ADDR_V4MAPPED(&ilg->ilg_v6group));
3898 
3899 		group = V4_PART_OF_V6(ilg->ilg_v6group);
3900 		ilg_delete(connp, &connp->conn_ilg[i], NULL);
3901 		mutex_exit(&connp->conn_lock);
3902 
3903 		(void) ip_delmulti(group, ipif, B_FALSE, B_TRUE);
3904 		mutex_enter(&connp->conn_lock);
3905 	}
3906 
3907 	/*
3908 	 * If we are the last walker, need to physically delete the
3909 	 * ilgs and repack.
3910 	 */
3911 	ILG_WALKER_RELE(connp);
3912 
3913 	if (connp->conn_multicast_ipif == ipif) {
3914 		/* Revert to late binding */
3915 		connp->conn_multicast_ipif = NULL;
3916 	}
3917 	mutex_exit(&connp->conn_lock);
3918 
3919 	conn_delete_ire(connp, (caddr_t)ipif);
3920 }
3921 
3922 /*
3923  * Called from ill close by ipcl_walk for clearing conn_ilg and
3924  * conn_multicast_ill for a given ill. conn is held by caller.
3925  * Note that ipcl_walk only walks conns that are not yet condemned.
3926  * condemned conns can't be refheld. For this reason, conn must become clean
3927  * first, i.e. it must not refer to any ill/ire/ipif and then only set
3928  * condemned flag.
3929  */
3930 static void
3931 conn_delete_ill(conn_t *connp, caddr_t arg)
3932 {
3933 	ill_t	*ill = (ill_t *)arg;
3934 	int	i;
3935 	char	group_buf[INET6_ADDRSTRLEN];
3936 	in6_addr_t v6group;
3937 	int	orig_ifindex;
3938 	ilg_t	*ilg;
3939 
3940 	/*
3941 	 * Even though conn_ilg_inuse can change while we are in this loop,
3942 	 * no new ilgs can be created/deleted for this connp, on this
3943 	 * ill, since this ill is the perimeter. So we won't miss any ilg
3944 	 * in this cleanup.
3945 	 */
3946 	mutex_enter(&connp->conn_lock);
3947 
3948 	/*
3949 	 * Increment the walker count, so that ilg repacking does not
3950 	 * occur while we are in the loop.
3951 	 */
3952 	ILG_WALKER_HOLD(connp);
3953 	for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) {
3954 		ilg = &connp->conn_ilg[i];
3955 		if ((ilg->ilg_ill == ill) && !(ilg->ilg_flags & ILG_DELETED)) {
3956 			/*
3957 			 * ip_close cannot be cleaning this ilg at the same
3958 			 * time, since it also has to execute in this ill's
3959 			 * perimeter which we are now holding. Only a clean
3960 			 * conn can be condemned.
3961 			 */
3962 			ASSERT(!(connp->conn_state_flags & CONN_CONDEMNED));
3963 
3964 			/* Blow away the membership */
3965 			ip1dbg(("conn_delete_ilg_ill: %s on %s\n",
3966 			    inet_ntop(AF_INET6, &ilg->ilg_v6group,
3967 			    group_buf, sizeof (group_buf)),
3968 			    ill->ill_name));
3969 
3970 			v6group = ilg->ilg_v6group;
3971 			orig_ifindex = ilg->ilg_orig_ifindex;
3972 			ilg_delete(connp, ilg, NULL);
3973 			mutex_exit(&connp->conn_lock);
3974 
3975 			(void) ip_delmulti_v6(&v6group, ill, orig_ifindex,
3976 			    connp->conn_zoneid, B_FALSE, B_TRUE);
3977 			mutex_enter(&connp->conn_lock);
3978 		}
3979 	}
3980 	/*
3981 	 * If we are the last walker, need to physically delete the
3982 	 * ilgs and repack.
3983 	 */
3984 	ILG_WALKER_RELE(connp);
3985 
3986 	if (connp->conn_multicast_ill == ill) {
3987 		/* Revert to late binding */
3988 		connp->conn_multicast_ill = NULL;
3989 		connp->conn_orig_multicast_ifindex = 0;
3990 	}
3991 	mutex_exit(&connp->conn_lock);
3992 }
3993 
3994 /*
3995  * Called when an ipif is unplumbed to make sure that there are no
3996  * dangling conn references to that ipif.
3997  * Handles ilg_ipif and conn_multicast_ipif
3998  */
3999 void
4000 reset_conn_ipif(ipif)
4001 	ipif_t	*ipif;
4002 {
4003 	ipcl_walk(conn_delete_ipif, (caddr_t)ipif);
4004 	/* flush the SCTP ire cache for this ipif */
4005 	sctp_ire_cache_flush(ipif);
4006 }
4007 
4008 /*
4009  * Called when an ill is unplumbed to make sure that there are no
4010  * dangling conn references to that ill.
4011  * Handles ilg_ill, conn_multicast_ill.
4012  */
4013 void
4014 reset_conn_ill(ill_t *ill)
4015 {
4016 	ipcl_walk(conn_delete_ill, (caddr_t)ill);
4017 }
4018 
4019 #ifdef DEBUG
4020 /*
4021  * Walk functions walk all the interfaces in the system to make
4022  * sure that there is no refernece to the ipif or ill that is
4023  * going away.
4024  */
4025 int
4026 ilm_walk_ill(ill_t *ill)
4027 {
4028 	int cnt = 0;
4029 	ill_t *till;
4030 	ilm_t *ilm;
4031 	ill_walk_context_t ctx;
4032 
4033 	rw_enter(&ill_g_lock, RW_READER);
4034 	till = ILL_START_WALK_ALL(&ctx);
4035 	for (; till != NULL; till = ill_next(&ctx, till)) {
4036 		for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
4037 			if (ilm->ilm_ill == ill) {
4038 				cnt++;
4039 			}
4040 		}
4041 	}
4042 	rw_exit(&ill_g_lock);
4043 
4044 	return (cnt);
4045 }
4046 
4047 /*
4048  * This function is called before the ipif is freed.
4049  */
4050 int
4051 ilm_walk_ipif(ipif_t *ipif)
4052 {
4053 	int cnt = 0;
4054 	ill_t *till;
4055 	ilm_t *ilm;
4056 	ill_walk_context_t ctx;
4057 
4058 	till = ILL_START_WALK_ALL(&ctx);
4059 	for (; till != NULL; till = ill_next(&ctx, till)) {
4060 		for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
4061 			if (ilm->ilm_ipif == ipif) {
4062 					cnt++;
4063 			}
4064 		}
4065 	}
4066 	return (cnt);
4067 }
4068 #endif
4069