xref: /titanic_44/usr/src/uts/common/inet/ip/ip_multi.c (revision 77ebe684ef29c4e071249d0fcb90f306d3aa1f12)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /* Copyright (c) 1990 Mentat Inc. */
26 
27 #include <sys/types.h>
28 #include <sys/stream.h>
29 #include <sys/dlpi.h>
30 #include <sys/stropts.h>
31 #include <sys/strsun.h>
32 #include <sys/ddi.h>
33 #include <sys/cmn_err.h>
34 #include <sys/sdt.h>
35 #include <sys/zone.h>
36 
37 #include <sys/param.h>
38 #include <sys/socket.h>
39 #include <sys/sockio.h>
40 #include <net/if.h>
41 #include <sys/systm.h>
42 #include <sys/strsubr.h>
43 #include <net/route.h>
44 #include <netinet/in.h>
45 #include <net/if_dl.h>
46 #include <netinet/ip6.h>
47 #include <netinet/icmp6.h>
48 
49 #include <inet/common.h>
50 #include <inet/mi.h>
51 #include <inet/nd.h>
52 #include <inet/arp.h>
53 #include <inet/ip.h>
54 #include <inet/ip6.h>
55 #include <inet/ip_if.h>
56 #include <inet/ip_ndp.h>
57 #include <inet/ip_multi.h>
58 #include <inet/ipclassifier.h>
59 #include <inet/ipsec_impl.h>
60 #include <inet/sctp_ip.h>
61 #include <inet/ip_listutils.h>
62 #include <inet/udp_impl.h>
63 
64 /* igmpv3/mldv2 source filter manipulation */
65 static void	ilm_bld_flists(conn_t *conn, void *arg);
66 static void	ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode,
67     slist_t *flist);
68 
69 static ilm_t	*ilm_add_v6(ipif_t *ipif, const in6_addr_t *group,
70     ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist,
71     int orig_ifindex, zoneid_t zoneid);
72 static void	ilm_delete(ilm_t *ilm);
73 static int	ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *group);
74 static int	ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *group);
75 static ilg_t	*ilg_lookup_ill_index_v6(conn_t *connp,
76     const in6_addr_t *v6group, int index);
77 static ilg_t	*ilg_lookup_ipif(conn_t *connp, ipaddr_t group,
78     ipif_t *ipif);
79 static int	ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif,
80     mcast_record_t fmode, ipaddr_t src);
81 static int	ilg_add_v6(conn_t *connp, const in6_addr_t *group, ill_t *ill,
82     mcast_record_t fmode, const in6_addr_t *v6src);
83 static void	ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src);
84 static mblk_t	*ill_create_dl(ill_t *ill, uint32_t dl_primitive,
85     uint32_t length, uint32_t *addr_lenp, uint32_t *addr_offp);
86 static mblk_t	*ill_create_squery(ill_t *ill, ipaddr_t ipaddr,
87     uint32_t addrlen, uint32_t addroff, mblk_t *mp_tail);
88 static void	conn_ilg_reap(conn_t *connp);
89 static int	ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group,
90     ipif_t *ipif, mcast_record_t fmode, ipaddr_t src);
91 static int	ip_opt_delete_group_excl_v6(conn_t *connp,
92     const in6_addr_t *v6group, ill_t *ill, mcast_record_t fmode,
93     const in6_addr_t *v6src);
94 
95 /*
96  * MT notes:
97  *
98  * Multicast joins operate on both the ilg and ilm structures. Multiple
99  * threads operating on an conn (socket) trying to do multicast joins
100  * need to synchronize  when operating on the ilg. Multiple threads
101  * potentially operating on different conn (socket endpoints) trying to
102  * do multicast joins could eventually end up trying to manipulate the
103  * ilm simulatenously and need to synchronize on the access to the ilm.
104  * Both are amenable to standard Solaris MT techniques, but it would be
105  * complex to handle a failover or failback which needs to manipulate
106  * ilg/ilms if an applications can also simultaenously join/leave
107  * multicast groups. Hence multicast join/leave also go through the ipsq_t
108  * serialization.
109  *
110  * Multicast joins and leaves are single-threaded per phyint/IPMP group
111  * using the ipsq serialization mechanism.
112  *
113  * An ilm is an IP data structure used to track multicast join/leave.
114  * An ilm is associated with a <multicast group, ipif> tuple in IPv4 and
115  * with just <multicast group> in IPv6. ilm_refcnt is the number of ilg's
116  * referencing the ilm. ilms are created / destroyed only as writer. ilms
117  * are not passed around, instead they are looked up and used under the
118  * ill_lock or as writer. So we don't need a dynamic refcount of the number
119  * of threads holding reference to an ilm.
120  *
121  * Multicast Join operation:
122  *
123  * The first step is to determine the ipif (v4) or ill (v6) on which
124  * the join operation is to be done. The join is done after becoming
125  * exclusive on the ipsq associated with the ipif or ill. The conn->conn_ilg
126  * and ill->ill_ilm are thus accessed and modified exclusively per ill.
127  * Multiple threads can attempt to join simultaneously on different ipif/ill
128  * on the same conn. In this case the ipsq serialization does not help in
129  * protecting the ilg. It is the conn_lock that is used to protect the ilg.
130  * The conn_lock also protects all the ilg_t members.
131  *
132  * Leave operation.
133  *
134  * Similar to the join operation, the first step is to determine the ipif
135  * or ill (v6) on which the leave operation is to be done. The leave operation
136  * is done after becoming exclusive on the ipsq associated with the ipif or ill.
137  * As with join ilg modification is done under the protection of the conn lock.
138  */
139 
140 #define	IPSQ_ENTER_IPIF(ipif, connp, first_mp, func, ipsq, type)	\
141 	ASSERT(connp != NULL);					\
142 	(ipsq) = ipsq_try_enter((ipif), NULL, CONNP_TO_WQ(connp),	\
143 	    (first_mp), (func), (type), B_TRUE);		\
144 	if ((ipsq) == NULL) {					\
145 		ipif_refrele(ipif);				\
146 		return (EINPROGRESS);				\
147 	}
148 
149 #define	IPSQ_ENTER_ILL(ill, connp, first_mp, func, ipsq, type)	\
150 	ASSERT(connp != NULL);					\
151 	(ipsq) = ipsq_try_enter(NULL, ill, CONNP_TO_WQ(connp),	\
152 	    (first_mp),	(func), (type), B_TRUE);		\
153 	if ((ipsq) == NULL) {					\
154 		ill_refrele(ill);				\
155 		return (EINPROGRESS);				\
156 	}
157 
158 #define	IPSQ_EXIT(ipsq)	\
159 	if (ipsq != NULL)	\
160 		ipsq_exit(ipsq);
161 
162 #define	ILG_WALKER_HOLD(connp)	(connp)->conn_ilg_walker_cnt++
163 
164 #define	ILG_WALKER_RELE(connp)				\
165 	{						\
166 		(connp)->conn_ilg_walker_cnt--;		\
167 		if ((connp)->conn_ilg_walker_cnt == 0)	\
168 			conn_ilg_reap(connp);		\
169 	}
170 
171 static void
172 conn_ilg_reap(conn_t *connp)
173 {
174 	int	to;
175 	int	from;
176 	ilg_t	*ilg;
177 
178 	ASSERT(MUTEX_HELD(&connp->conn_lock));
179 
180 	to = 0;
181 	from = 0;
182 	while (from < connp->conn_ilg_inuse) {
183 		if (connp->conn_ilg[from].ilg_flags & ILG_DELETED) {
184 			ilg = &connp->conn_ilg[from];
185 			FREE_SLIST(ilg->ilg_filter);
186 			ilg->ilg_flags &= ~ILG_DELETED;
187 			from++;
188 			continue;
189 		}
190 		if (to != from)
191 			connp->conn_ilg[to] = connp->conn_ilg[from];
192 		to++;
193 		from++;
194 	}
195 
196 	connp->conn_ilg_inuse = to;
197 
198 	if (connp->conn_ilg_inuse == 0) {
199 		mi_free((char *)connp->conn_ilg);
200 		connp->conn_ilg = NULL;
201 		cv_broadcast(&connp->conn_refcv);
202 	}
203 }
204 
205 #define	GETSTRUCT(structure, number)	\
206 	((structure *)mi_zalloc(sizeof (structure) * (number)))
207 
208 #define	ILG_ALLOC_CHUNK	16
209 
210 /*
211  * Returns a pointer to the next available ilg in conn_ilg.  Allocs more
212  * buffers in size of ILG_ALLOC_CHUNK ilgs when needed, and updates conn's
213  * ilg tracking fields appropriately (conn_ilg_inuse reflects usage of the
214  * returned ilg).  Returns NULL on failure (ENOMEM).
215  *
216  * Assumes connp->conn_lock is held.
217  */
218 static ilg_t *
219 conn_ilg_alloc(conn_t *connp)
220 {
221 	ilg_t *new, *ret;
222 	int curcnt;
223 
224 	ASSERT(MUTEX_HELD(&connp->conn_lock));
225 	ASSERT(connp->conn_ilg_inuse <= connp->conn_ilg_allocated);
226 
227 	if (connp->conn_ilg == NULL) {
228 		connp->conn_ilg = GETSTRUCT(ilg_t, ILG_ALLOC_CHUNK);
229 		if (connp->conn_ilg == NULL)
230 			return (NULL);
231 		connp->conn_ilg_allocated = ILG_ALLOC_CHUNK;
232 		connp->conn_ilg_inuse = 0;
233 	}
234 	if (connp->conn_ilg_inuse == connp->conn_ilg_allocated) {
235 		if (connp->conn_ilg_walker_cnt != 0) {
236 			/*
237 			 * XXX We cannot grow the array at this point
238 			 * because a list walker could be in progress, and
239 			 * we cannot wipe out the existing array until the
240 			 * walker is done. Just return NULL for now.
241 			 * ilg_delete_all() will have to be changed when
242 			 * this logic is changed.
243 			 */
244 			return (NULL);
245 		}
246 		curcnt = connp->conn_ilg_allocated;
247 		new = GETSTRUCT(ilg_t, curcnt + ILG_ALLOC_CHUNK);
248 		if (new == NULL)
249 			return (NULL);
250 		bcopy(connp->conn_ilg, new, sizeof (ilg_t) * curcnt);
251 		mi_free((char *)connp->conn_ilg);
252 		connp->conn_ilg = new;
253 		connp->conn_ilg_allocated += ILG_ALLOC_CHUNK;
254 	}
255 
256 	ret = &connp->conn_ilg[connp->conn_ilg_inuse++];
257 	ASSERT((ret->ilg_flags & ILG_DELETED) == 0);
258 	bzero(ret, sizeof (*ret));
259 	return (ret);
260 }
261 
262 typedef struct ilm_fbld_s {
263 	ilm_t		*fbld_ilm;
264 	int		fbld_in_cnt;
265 	int		fbld_ex_cnt;
266 	slist_t		fbld_in;
267 	slist_t		fbld_ex;
268 	boolean_t	fbld_in_overflow;
269 } ilm_fbld_t;
270 
271 static void
272 ilm_bld_flists(conn_t *conn, void *arg)
273 {
274 	int i;
275 	ilm_fbld_t *fbld = (ilm_fbld_t *)(arg);
276 	ilm_t *ilm = fbld->fbld_ilm;
277 	in6_addr_t *v6group = &ilm->ilm_v6addr;
278 
279 	if (conn->conn_ilg_inuse == 0)
280 		return;
281 
282 	/*
283 	 * Since we can't break out of the ipcl_walk once started, we still
284 	 * have to look at every conn.  But if we've already found one
285 	 * (EXCLUDE, NULL) list, there's no need to keep checking individual
286 	 * ilgs--that will be our state.
287 	 */
288 	if (fbld->fbld_ex_cnt > 0 && fbld->fbld_ex.sl_numsrc == 0)
289 		return;
290 
291 	/*
292 	 * Check this conn's ilgs to see if any are interested in our
293 	 * ilm (group, interface match).  If so, update the master
294 	 * include and exclude lists we're building in the fbld struct
295 	 * with this ilg's filter info.
296 	 */
297 	mutex_enter(&conn->conn_lock);
298 	for (i = 0; i < conn->conn_ilg_inuse; i++) {
299 		ilg_t *ilg = &conn->conn_ilg[i];
300 		if ((ilg->ilg_ill == ilm->ilm_ill) &&
301 		    (ilg->ilg_ipif == ilm->ilm_ipif) &&
302 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) {
303 			if (ilg->ilg_fmode == MODE_IS_INCLUDE) {
304 				fbld->fbld_in_cnt++;
305 				if (!fbld->fbld_in_overflow)
306 					l_union_in_a(&fbld->fbld_in,
307 					    ilg->ilg_filter,
308 					    &fbld->fbld_in_overflow);
309 			} else {
310 				fbld->fbld_ex_cnt++;
311 				/*
312 				 * On the first exclude list, don't try to do
313 				 * an intersection, as the master exclude list
314 				 * is intentionally empty.  If the master list
315 				 * is still empty on later iterations, that
316 				 * means we have at least one ilg with an empty
317 				 * exclude list, so that should be reflected
318 				 * when we take the intersection.
319 				 */
320 				if (fbld->fbld_ex_cnt == 1) {
321 					if (ilg->ilg_filter != NULL)
322 						l_copy(ilg->ilg_filter,
323 						    &fbld->fbld_ex);
324 				} else {
325 					l_intersection_in_a(&fbld->fbld_ex,
326 					    ilg->ilg_filter);
327 				}
328 			}
329 			/* there will only be one match, so break now. */
330 			break;
331 		}
332 	}
333 	mutex_exit(&conn->conn_lock);
334 }
335 
336 static void
337 ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, slist_t *flist)
338 {
339 	ilm_fbld_t fbld;
340 	ip_stack_t *ipst = ilm->ilm_ipst;
341 
342 	fbld.fbld_ilm = ilm;
343 	fbld.fbld_in_cnt = fbld.fbld_ex_cnt = 0;
344 	fbld.fbld_in.sl_numsrc = fbld.fbld_ex.sl_numsrc = 0;
345 	fbld.fbld_in_overflow = B_FALSE;
346 
347 	/* first, construct our master include and exclude lists */
348 	ipcl_walk(ilm_bld_flists, (caddr_t)&fbld, ipst);
349 
350 	/* now use those master lists to generate the interface filter */
351 
352 	/* if include list overflowed, filter is (EXCLUDE, NULL) */
353 	if (fbld.fbld_in_overflow) {
354 		*fmode = MODE_IS_EXCLUDE;
355 		flist->sl_numsrc = 0;
356 		return;
357 	}
358 
359 	/* if nobody interested, interface filter is (INCLUDE, NULL) */
360 	if (fbld.fbld_in_cnt == 0 && fbld.fbld_ex_cnt == 0) {
361 		*fmode = MODE_IS_INCLUDE;
362 		flist->sl_numsrc = 0;
363 		return;
364 	}
365 
366 	/*
367 	 * If there are no exclude lists, then the interface filter
368 	 * is INCLUDE, with its filter list equal to fbld_in.  A single
369 	 * exclude list makes the interface filter EXCLUDE, with its
370 	 * filter list equal to (fbld_ex - fbld_in).
371 	 */
372 	if (fbld.fbld_ex_cnt == 0) {
373 		*fmode = MODE_IS_INCLUDE;
374 		l_copy(&fbld.fbld_in, flist);
375 	} else {
376 		*fmode = MODE_IS_EXCLUDE;
377 		l_difference(&fbld.fbld_ex, &fbld.fbld_in, flist);
378 	}
379 }
380 
381 /*
382  * If the given interface has failed, choose a new one to join on so
383  * that we continue to receive packets.  ilg_orig_ifindex remembers
384  * what the application used to join on so that we know the ilg to
385  * delete even though we change the ill here.  Callers will store the
386  * ilg returned from this function in ilg_ill.  Thus when we receive
387  * a packet on ilg_ill, conn_wantpacket_v6 will deliver the packets.
388  *
389  * This function must be called as writer so we can walk the group
390  * list and examine flags without holding a lock.
391  */
392 ill_t *
393 ip_choose_multi_ill(ill_t *ill, const in6_addr_t *grp)
394 {
395 	ill_t	*till;
396 	ill_group_t *illgrp = ill->ill_group;
397 
398 	ASSERT(IAM_WRITER_ILL(ill));
399 
400 	if (IN6_IS_ADDR_UNSPECIFIED(grp) || illgrp == NULL)
401 		return (ill);
402 
403 	if ((ill->ill_phyint->phyint_flags & (PHYI_FAILED|PHYI_INACTIVE)) == 0)
404 		return (ill);
405 
406 	till = illgrp->illgrp_ill;
407 	while (till != NULL &&
408 	    (till->ill_phyint->phyint_flags & (PHYI_FAILED|PHYI_INACTIVE))) {
409 		till = till->ill_group_next;
410 	}
411 	if (till != NULL)
412 		return (till);
413 
414 	return (ill);
415 }
416 
417 static int
418 ilm_update_add(ilm_t *ilm, ilg_stat_t ilgstat, slist_t *ilg_flist,
419     boolean_t isv6)
420 {
421 	mcast_record_t fmode;
422 	slist_t *flist;
423 	boolean_t fdefault;
424 	char buf[INET6_ADDRSTRLEN];
425 	ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill;
426 
427 	/*
428 	 * There are several cases where the ilm's filter state
429 	 * defaults to (EXCLUDE, NULL):
430 	 *	- we've had previous joins without associated ilgs
431 	 *	- this join has no associated ilg
432 	 *	- the ilg's filter state is (EXCLUDE, NULL)
433 	 */
434 	fdefault = (ilm->ilm_no_ilg_cnt > 0) ||
435 	    (ilgstat == ILGSTAT_NONE) || SLIST_IS_EMPTY(ilg_flist);
436 
437 	/* attempt mallocs (if needed) before doing anything else */
438 	if ((flist = l_alloc()) == NULL)
439 		return (ENOMEM);
440 	if (!fdefault && ilm->ilm_filter == NULL) {
441 		ilm->ilm_filter = l_alloc();
442 		if (ilm->ilm_filter == NULL) {
443 			l_free(flist);
444 			return (ENOMEM);
445 		}
446 	}
447 
448 	if (ilgstat != ILGSTAT_CHANGE)
449 		ilm->ilm_refcnt++;
450 
451 	if (ilgstat == ILGSTAT_NONE)
452 		ilm->ilm_no_ilg_cnt++;
453 
454 	/*
455 	 * Determine new filter state.  If it's not the default
456 	 * (EXCLUDE, NULL), we must walk the conn list to find
457 	 * any ilgs interested in this group, and re-build the
458 	 * ilm filter.
459 	 */
460 	if (fdefault) {
461 		fmode = MODE_IS_EXCLUDE;
462 		flist->sl_numsrc = 0;
463 	} else {
464 		ilm_gen_filter(ilm, &fmode, flist);
465 	}
466 
467 	/* make sure state actually changed; nothing to do if not. */
468 	if ((ilm->ilm_fmode == fmode) &&
469 	    !lists_are_different(ilm->ilm_filter, flist)) {
470 		l_free(flist);
471 		return (0);
472 	}
473 
474 	/* send the state change report */
475 	if (!IS_LOOPBACK(ill)) {
476 		if (isv6)
477 			mld_statechange(ilm, fmode, flist);
478 		else
479 			igmp_statechange(ilm, fmode, flist);
480 	}
481 
482 	/* update the ilm state */
483 	ilm->ilm_fmode = fmode;
484 	if (flist->sl_numsrc > 0)
485 		l_copy(flist, ilm->ilm_filter);
486 	else
487 		CLEAR_SLIST(ilm->ilm_filter);
488 
489 	ip1dbg(("ilm_update: new if filter mode %d, group %s\n", ilm->ilm_fmode,
490 	    inet_ntop(AF_INET6, &ilm->ilm_v6addr, buf, sizeof (buf))));
491 
492 	l_free(flist);
493 	return (0);
494 }
495 
496 static int
497 ilm_update_del(ilm_t *ilm, boolean_t isv6)
498 {
499 	mcast_record_t fmode;
500 	slist_t *flist;
501 	ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill;
502 
503 	ip1dbg(("ilm_update_del: still %d left; updating state\n",
504 	    ilm->ilm_refcnt));
505 
506 	if ((flist = l_alloc()) == NULL)
507 		return (ENOMEM);
508 
509 	/*
510 	 * If present, the ilg in question has already either been
511 	 * updated or removed from our list; so all we need to do
512 	 * now is walk the list to update the ilm filter state.
513 	 *
514 	 * Skip the list walk if we have any no-ilg joins, which
515 	 * cause the filter state to revert to (EXCLUDE, NULL).
516 	 */
517 	if (ilm->ilm_no_ilg_cnt != 0) {
518 		fmode = MODE_IS_EXCLUDE;
519 		flist->sl_numsrc = 0;
520 	} else {
521 		ilm_gen_filter(ilm, &fmode, flist);
522 	}
523 
524 	/* check to see if state needs to be updated */
525 	if ((ilm->ilm_fmode == fmode) &&
526 	    (!lists_are_different(ilm->ilm_filter, flist))) {
527 		l_free(flist);
528 		return (0);
529 	}
530 
531 	if (!IS_LOOPBACK(ill)) {
532 		if (isv6)
533 			mld_statechange(ilm, fmode, flist);
534 		else
535 			igmp_statechange(ilm, fmode, flist);
536 	}
537 
538 	ilm->ilm_fmode = fmode;
539 	if (flist->sl_numsrc > 0) {
540 		if (ilm->ilm_filter == NULL) {
541 			ilm->ilm_filter = l_alloc();
542 			if (ilm->ilm_filter == NULL) {
543 				char buf[INET6_ADDRSTRLEN];
544 				ip1dbg(("ilm_update_del: failed to alloc ilm "
545 				    "filter; no source filtering for %s on %s",
546 				    inet_ntop(AF_INET6, &ilm->ilm_v6addr,
547 				    buf, sizeof (buf)), ill->ill_name));
548 				ilm->ilm_fmode = MODE_IS_EXCLUDE;
549 				l_free(flist);
550 				return (0);
551 			}
552 		}
553 		l_copy(flist, ilm->ilm_filter);
554 	} else {
555 		CLEAR_SLIST(ilm->ilm_filter);
556 	}
557 
558 	l_free(flist);
559 	return (0);
560 }
561 
562 /*
563  * INADDR_ANY means all multicast addresses. This is only used
564  * by the multicast router.
565  * INADDR_ANY is stored as IPv6 unspecified addr.
566  */
567 int
568 ip_addmulti(ipaddr_t group, ipif_t *ipif, ilg_stat_t ilgstat,
569     mcast_record_t ilg_fmode, slist_t *ilg_flist)
570 {
571 	ill_t	*ill = ipif->ipif_ill;
572 	ilm_t 	*ilm;
573 	in6_addr_t v6group;
574 	int	ret;
575 
576 	ASSERT(IAM_WRITER_IPIF(ipif));
577 
578 	if (!CLASSD(group) && group != INADDR_ANY)
579 		return (EINVAL);
580 
581 	/*
582 	 * INADDR_ANY is represented as the IPv6 unspecifed addr.
583 	 */
584 	if (group == INADDR_ANY)
585 		v6group = ipv6_all_zeros;
586 	else
587 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
588 
589 	mutex_enter(&ill->ill_lock);
590 	ilm = ilm_lookup_ipif(ipif, group);
591 	mutex_exit(&ill->ill_lock);
592 	/*
593 	 * Since we are writer, we know the ilm_flags itself cannot
594 	 * change at this point, and ilm_lookup_ipif would not have
595 	 * returned a DELETED ilm. However, the data path can free
596 	 * ilm->next via ilm_walker_cleanup() so we can safely
597 	 * access anything in ilm except ilm_next (for safe access to
598 	 * ilm_next we'd have  to take the ill_lock).
599 	 */
600 	if (ilm != NULL)
601 		return (ilm_update_add(ilm, ilgstat, ilg_flist, B_FALSE));
602 
603 	/*
604 	 * ilms are associated with ipifs in IPv4. It moves with the
605 	 * ipif if the ipif moves to a new ill when the interface
606 	 * fails. Thus we really don't check whether the ipif_ill
607 	 * has failed like in IPv6. If it has FAILED the ipif
608 	 * will move (daemon will move it) and hence the ilm, if the
609 	 * ipif is not IPIF_NOFAILOVER. For the IPIF_NOFAILOVER ipifs,
610 	 * we continue to receive in the same place even if the
611 	 * interface fails.
612 	 */
613 	ilm = ilm_add_v6(ipif, &v6group, ilgstat, ilg_fmode, ilg_flist,
614 	    ill->ill_phyint->phyint_ifindex, ipif->ipif_zoneid);
615 	if (ilm == NULL)
616 		return (ENOMEM);
617 
618 	if (group == INADDR_ANY) {
619 		/*
620 		 * Check how many ipif's have members in this group -
621 		 * if more then one we should not tell the driver to join
622 		 * this time
623 		 */
624 		if (ilm_numentries_v6(ill, &v6group) > 1)
625 			return (0);
626 		if (ill->ill_group == NULL)
627 			ret = ill_join_allmulti(ill);
628 		else
629 			ret = ill_nominate_mcast_rcv(ill->ill_group);
630 		if (ret != 0)
631 			ilm_delete(ilm);
632 		return (ret);
633 	}
634 
635 	if (!IS_LOOPBACK(ill))
636 		igmp_joingroup(ilm);
637 
638 	if (ilm_numentries_v6(ill, &v6group) > 1)
639 		return (0);
640 
641 	ret = ip_ll_addmulti_v6(ipif, &v6group);
642 	if (ret != 0)
643 		ilm_delete(ilm);
644 	return (ret);
645 }
646 
647 /*
648  * The unspecified address means all multicast addresses.
649  * This is only used by the multicast router.
650  *
651  * ill identifies the interface to join on; it may not match the
652  * interface requested by the application of a failover has taken
653  * place.  orig_ifindex always identifies the interface requested
654  * by the app.
655  *
656  * ilgstat tells us if there's an ilg associated with this join,
657  * and if so, if it's a new ilg or a change to an existing one.
658  * ilg_fmode and ilg_flist give us the current filter state of
659  * the ilg (and will be EXCLUDE {NULL} in the case of no ilg).
660  */
661 int
662 ip_addmulti_v6(const in6_addr_t *v6group, ill_t *ill, int orig_ifindex,
663     zoneid_t zoneid, ilg_stat_t ilgstat, mcast_record_t ilg_fmode,
664     slist_t *ilg_flist)
665 {
666 	ilm_t	*ilm;
667 	int	ret;
668 
669 	ASSERT(IAM_WRITER_ILL(ill));
670 
671 	if (!IN6_IS_ADDR_MULTICAST(v6group) &&
672 	    !IN6_IS_ADDR_UNSPECIFIED(v6group)) {
673 		return (EINVAL);
674 	}
675 
676 	/*
677 	 * An ilm is uniquely identified by the tuple of (group, ill,
678 	 * orig_ill).  group is the multicast group address, ill is
679 	 * the interface on which it is currently joined, and orig_ill
680 	 * is the interface on which the application requested the
681 	 * join.  orig_ill and ill are the same unless orig_ill has
682 	 * failed over.
683 	 *
684 	 * Both orig_ill and ill are required, which means we may have
685 	 * 2 ilms on an ill for the same group, but with different
686 	 * orig_ills.  These must be kept separate, so that when failback
687 	 * occurs, the appropriate ilms are moved back to their orig_ill
688 	 * without disrupting memberships on the ill to which they had
689 	 * been moved.
690 	 *
691 	 * In order to track orig_ill, we store orig_ifindex in the
692 	 * ilm and ilg.
693 	 */
694 	mutex_enter(&ill->ill_lock);
695 	ilm = ilm_lookup_ill_index_v6(ill, v6group, orig_ifindex, zoneid);
696 	mutex_exit(&ill->ill_lock);
697 	if (ilm != NULL)
698 		return (ilm_update_add(ilm, ilgstat, ilg_flist, B_TRUE));
699 
700 	/*
701 	 * We need to remember where the application really wanted
702 	 * to join. This will be used later if we want to failback
703 	 * to the original interface.
704 	 */
705 	ilm = ilm_add_v6(ill->ill_ipif, v6group, ilgstat, ilg_fmode,
706 	    ilg_flist, orig_ifindex, zoneid);
707 	if (ilm == NULL)
708 		return (ENOMEM);
709 
710 	if (IN6_IS_ADDR_UNSPECIFIED(v6group)) {
711 		/*
712 		 * Check how many ipif's that have members in this group -
713 		 * if more then one we should not tell the driver to join
714 		 * this time
715 		 */
716 		if (ilm_numentries_v6(ill, v6group) > 1)
717 			return (0);
718 		if (ill->ill_group == NULL)
719 			ret = ill_join_allmulti(ill);
720 		else
721 			ret = ill_nominate_mcast_rcv(ill->ill_group);
722 
723 		if (ret != 0)
724 			ilm_delete(ilm);
725 		return (ret);
726 	}
727 
728 	if (!IS_LOOPBACK(ill))
729 		mld_joingroup(ilm);
730 
731 	/*
732 	 * If we have more then one we should not tell the driver
733 	 * to join this time.
734 	 */
735 	if (ilm_numentries_v6(ill, v6group) > 1)
736 		return (0);
737 
738 	ret = ip_ll_addmulti_v6(ill->ill_ipif, v6group);
739 	if (ret != 0)
740 		ilm_delete(ilm);
741 	return (ret);
742 }
743 
744 /*
745  * Send a multicast request to the driver for enabling multicast reception
746  * for v6groupp address. The caller has already checked whether it is
747  * appropriate to send one or not.
748  */
749 int
750 ip_ll_send_enabmulti_req(ill_t *ill, const in6_addr_t *v6groupp)
751 {
752 	mblk_t	*mp;
753 	uint32_t addrlen, addroff;
754 	char	group_buf[INET6_ADDRSTRLEN];
755 
756 	ASSERT(IAM_WRITER_ILL(ill));
757 
758 	/*
759 	 * Create a AR_ENTRY_SQUERY message with a dl_enabmulti_req tacked
760 	 * on.
761 	 */
762 	mp = ill_create_dl(ill, DL_ENABMULTI_REQ, sizeof (dl_enabmulti_req_t),
763 	    &addrlen, &addroff);
764 	if (!mp)
765 		return (ENOMEM);
766 	if (IN6_IS_ADDR_V4MAPPED(v6groupp)) {
767 		ipaddr_t v4group;
768 
769 		IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group);
770 		/*
771 		 * NOTE!!!
772 		 * The "addroff" passed in here was calculated by
773 		 * ill_create_dl(), and will be used by ill_create_squery()
774 		 * to perform some twisted coding magic. It is the offset
775 		 * into the dl_xxx_req of the hw addr. Here, it will be
776 		 * added to b_wptr - b_rptr to create a magic number that
777 		 * is not an offset into this squery mblk.
778 		 * The actual hardware address will be accessed only in the
779 		 * dl_xxx_req, not in the squery. More importantly,
780 		 * that hardware address can *only* be accessed in this
781 		 * mblk chain by calling mi_offset_param_c(), which uses
782 		 * the magic number in the squery hw offset field to go
783 		 * to the *next* mblk (the dl_xxx_req), subtract the
784 		 * (b_wptr - b_rptr), and find the actual offset into
785 		 * the dl_xxx_req.
786 		 * Any method that depends on using the
787 		 * offset field in the dl_disabmulti_req or squery
788 		 * to find either hardware address will similarly fail.
789 		 *
790 		 * Look in ar_entry_squery() in arp.c to see how this offset
791 		 * is used.
792 		 */
793 		mp = ill_create_squery(ill, v4group, addrlen, addroff, mp);
794 		if (!mp)
795 			return (ENOMEM);
796 		ip1dbg(("ip_ll_send_enabmulti_req: IPv4 putnext %s on %s\n",
797 		    inet_ntop(AF_INET6, v6groupp, group_buf,
798 		    sizeof (group_buf)),
799 		    ill->ill_name));
800 		putnext(ill->ill_rq, mp);
801 	} else {
802 		ip1dbg(("ip_ll_send_enabmulti_req: IPv6 ndp_mcastreq %s on"
803 		    " %s\n",
804 		    inet_ntop(AF_INET6, v6groupp, group_buf,
805 		    sizeof (group_buf)),
806 		    ill->ill_name));
807 		return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp));
808 	}
809 	return (0);
810 }
811 
812 /*
813  * Send a multicast request to the driver for enabling multicast
814  * membership for v6group if appropriate.
815  */
816 static int
817 ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *v6groupp)
818 {
819 	ill_t	*ill = ipif->ipif_ill;
820 
821 	ASSERT(IAM_WRITER_IPIF(ipif));
822 
823 	if (ill->ill_net_type != IRE_IF_RESOLVER ||
824 	    ipif->ipif_flags & IPIF_POINTOPOINT) {
825 		ip1dbg(("ip_ll_addmulti_v6: not resolver\n"));
826 		return (0);	/* Must be IRE_IF_NORESOLVER */
827 	}
828 
829 	if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) {
830 		ip1dbg(("ip_ll_addmulti_v6: MULTI_BCAST\n"));
831 		return (0);
832 	}
833 	if (!ill->ill_dl_up) {
834 		/*
835 		 * Nobody there. All multicast addresses will be re-joined
836 		 * when we get the DL_BIND_ACK bringing the interface up.
837 		 */
838 		ip1dbg(("ip_ll_addmulti_v6: nobody up\n"));
839 		return (0);
840 	}
841 	return (ip_ll_send_enabmulti_req(ill, v6groupp));
842 }
843 
844 /*
845  * INADDR_ANY means all multicast addresses. This is only used
846  * by the multicast router.
847  * INADDR_ANY is stored as the IPv6 unspecifed addr.
848  */
849 int
850 ip_delmulti(ipaddr_t group, ipif_t *ipif, boolean_t no_ilg, boolean_t leaving)
851 {
852 	ill_t	*ill = ipif->ipif_ill;
853 	ilm_t *ilm;
854 	in6_addr_t v6group;
855 
856 	ASSERT(IAM_WRITER_IPIF(ipif));
857 
858 	if (!CLASSD(group) && group != INADDR_ANY)
859 		return (EINVAL);
860 
861 	/*
862 	 * INADDR_ANY is represented as the IPv6 unspecifed addr.
863 	 */
864 	if (group == INADDR_ANY)
865 		v6group = ipv6_all_zeros;
866 	else
867 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
868 
869 	/*
870 	 * Look for a match on the ipif.
871 	 * (IP_DROP_MEMBERSHIP specifies an ipif using an IP address).
872 	 */
873 	mutex_enter(&ill->ill_lock);
874 	ilm = ilm_lookup_ipif(ipif, group);
875 	mutex_exit(&ill->ill_lock);
876 	if (ilm == NULL)
877 		return (ENOENT);
878 
879 	/* Update counters */
880 	if (no_ilg)
881 		ilm->ilm_no_ilg_cnt--;
882 
883 	if (leaving)
884 		ilm->ilm_refcnt--;
885 
886 	if (ilm->ilm_refcnt > 0)
887 		return (ilm_update_del(ilm, B_FALSE));
888 
889 	if (group == INADDR_ANY) {
890 		ilm_delete(ilm);
891 		/*
892 		 * Check how many ipif's that have members in this group -
893 		 * if there are still some left then don't tell the driver
894 		 * to drop it.
895 		 */
896 		if (ilm_numentries_v6(ill, &v6group) != 0)
897 			return (0);
898 
899 		/* If we never joined, then don't leave. */
900 		if (ill->ill_join_allmulti) {
901 			ill_leave_allmulti(ill);
902 			if (ill->ill_group != NULL)
903 				(void) ill_nominate_mcast_rcv(ill->ill_group);
904 		}
905 		return (0);
906 	}
907 
908 	if (!IS_LOOPBACK(ill))
909 		igmp_leavegroup(ilm);
910 
911 	ilm_delete(ilm);
912 	/*
913 	 * Check how many ipif's that have members in this group -
914 	 * if there are still some left then don't tell the driver
915 	 * to drop it.
916 	 */
917 	if (ilm_numentries_v6(ill, &v6group) != 0)
918 		return (0);
919 	return (ip_ll_delmulti_v6(ipif, &v6group));
920 }
921 
922 /*
923  * The unspecified address means all multicast addresses.
924  * This is only used by the multicast router.
925  */
926 int
927 ip_delmulti_v6(const in6_addr_t *v6group, ill_t *ill, int orig_ifindex,
928     zoneid_t zoneid, boolean_t no_ilg, boolean_t leaving)
929 {
930 	ipif_t	*ipif;
931 	ilm_t *ilm;
932 
933 	ASSERT(IAM_WRITER_ILL(ill));
934 
935 	if (!IN6_IS_ADDR_MULTICAST(v6group) &&
936 	    !IN6_IS_ADDR_UNSPECIFIED(v6group))
937 		return (EINVAL);
938 
939 	/*
940 	 * Look for a match on the ill.
941 	 * (IPV6_LEAVE_GROUP specifies an ill using an ifindex).
942 	 *
943 	 * Similar to ip_addmulti_v6, we should always look using
944 	 * the orig_ifindex.
945 	 *
946 	 * 1) If orig_ifindex is different from ill's ifindex
947 	 *    we should have an ilm with orig_ifindex created in
948 	 *    ip_addmulti_v6. We should delete that here.
949 	 *
950 	 * 2) If orig_ifindex is same as ill's ifindex, we should
951 	 *    not delete the ilm that is temporarily here because of
952 	 *    a FAILOVER. Those ilms will have a ilm_orig_ifindex
953 	 *    different from ill's ifindex.
954 	 *
955 	 * Thus, always lookup using orig_ifindex.
956 	 */
957 	mutex_enter(&ill->ill_lock);
958 	ilm = ilm_lookup_ill_index_v6(ill, v6group, orig_ifindex, zoneid);
959 	mutex_exit(&ill->ill_lock);
960 	if (ilm == NULL)
961 		return (ENOENT);
962 
963 	ASSERT(ilm->ilm_ill == ill);
964 
965 	ipif = ill->ill_ipif;
966 
967 	/* Update counters */
968 	if (no_ilg)
969 		ilm->ilm_no_ilg_cnt--;
970 
971 	if (leaving)
972 		ilm->ilm_refcnt--;
973 
974 	if (ilm->ilm_refcnt > 0)
975 		return (ilm_update_del(ilm, B_TRUE));
976 
977 	if (IN6_IS_ADDR_UNSPECIFIED(v6group)) {
978 		ilm_delete(ilm);
979 		/*
980 		 * Check how many ipif's that have members in this group -
981 		 * if there are still some left then don't tell the driver
982 		 * to drop it.
983 		 */
984 		if (ilm_numentries_v6(ill, v6group) != 0)
985 			return (0);
986 
987 		/* If we never joined, then don't leave. */
988 		if (ill->ill_join_allmulti) {
989 			ill_leave_allmulti(ill);
990 			if (ill->ill_group != NULL)
991 				(void) ill_nominate_mcast_rcv(ill->ill_group);
992 		}
993 		return (0);
994 	}
995 
996 	if (!IS_LOOPBACK(ill))
997 		mld_leavegroup(ilm);
998 
999 	ilm_delete(ilm);
1000 	/*
1001 	 * Check how many ipif's that have members in this group -
1002 	 * if there are still some left then don't tell the driver
1003 	 * to drop it.
1004 	 */
1005 	if (ilm_numentries_v6(ill, v6group) != 0)
1006 		return (0);
1007 	return (ip_ll_delmulti_v6(ipif, v6group));
1008 }
1009 
1010 /*
1011  * Send a multicast request to the driver for disabling multicast reception
1012  * for v6groupp address. The caller has already checked whether it is
1013  * appropriate to send one or not.
1014  */
1015 int
1016 ip_ll_send_disabmulti_req(ill_t *ill, const in6_addr_t *v6groupp)
1017 {
1018 	mblk_t	*mp;
1019 	char	group_buf[INET6_ADDRSTRLEN];
1020 	uint32_t	addrlen, addroff;
1021 
1022 	ASSERT(IAM_WRITER_ILL(ill));
1023 	/*
1024 	 * Create a AR_ENTRY_SQUERY message with a dl_disabmulti_req tacked
1025 	 * on.
1026 	 */
1027 	mp = ill_create_dl(ill, DL_DISABMULTI_REQ,
1028 	    sizeof (dl_disabmulti_req_t), &addrlen, &addroff);
1029 
1030 	if (!mp)
1031 		return (ENOMEM);
1032 
1033 	if (IN6_IS_ADDR_V4MAPPED(v6groupp)) {
1034 		ipaddr_t v4group;
1035 
1036 		IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group);
1037 		/*
1038 		 * NOTE!!!
1039 		 * The "addroff" passed in here was calculated by
1040 		 * ill_create_dl(), and will be used by ill_create_squery()
1041 		 * to perform some twisted coding magic. It is the offset
1042 		 * into the dl_xxx_req of the hw addr. Here, it will be
1043 		 * added to b_wptr - b_rptr to create a magic number that
1044 		 * is not an offset into this mblk.
1045 		 *
1046 		 * Please see the comment in ip_ll_send)enabmulti_req()
1047 		 * for a complete explanation.
1048 		 *
1049 		 * Look in ar_entry_squery() in arp.c to see how this offset
1050 		 * is used.
1051 		 */
1052 		mp = ill_create_squery(ill, v4group, addrlen, addroff, mp);
1053 		if (!mp)
1054 			return (ENOMEM);
1055 		ip1dbg(("ip_ll_send_disabmulti_req: IPv4 putnext %s on %s\n",
1056 		    inet_ntop(AF_INET6, v6groupp, group_buf,
1057 		    sizeof (group_buf)),
1058 		    ill->ill_name));
1059 		putnext(ill->ill_rq, mp);
1060 	} else {
1061 		ip1dbg(("ip_ll_send_disabmulti_req: IPv6 ndp_mcastreq %s on"
1062 		    " %s\n",
1063 		    inet_ntop(AF_INET6, v6groupp, group_buf,
1064 		    sizeof (group_buf)),
1065 		    ill->ill_name));
1066 		return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp));
1067 	}
1068 	return (0);
1069 }
1070 
1071 /*
1072  * Send a multicast request to the driver for disabling multicast
1073  * membership for v6group if appropriate.
1074  */
1075 static int
1076 ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *v6group)
1077 {
1078 	ill_t	*ill = ipif->ipif_ill;
1079 
1080 	ASSERT(IAM_WRITER_IPIF(ipif));
1081 
1082 	if (ill->ill_net_type != IRE_IF_RESOLVER ||
1083 	    ipif->ipif_flags & IPIF_POINTOPOINT) {
1084 		return (0);	/* Must be IRE_IF_NORESOLVER */
1085 	}
1086 	if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) {
1087 		ip1dbg(("ip_ll_delmulti_v6: MULTI_BCAST\n"));
1088 		return (0);
1089 	}
1090 	if (!ill->ill_dl_up) {
1091 		/*
1092 		 * Nobody there. All multicast addresses will be re-joined
1093 		 * when we get the DL_BIND_ACK bringing the interface up.
1094 		 */
1095 		ip1dbg(("ip_ll_delmulti_v6: nobody up\n"));
1096 		return (0);
1097 	}
1098 	return (ip_ll_send_disabmulti_req(ill, v6group));
1099 }
1100 
1101 /*
1102  * Make the driver pass up all multicast packets
1103  *
1104  * With ill groups, the caller makes sure that there is only
1105  * one ill joining the allmulti group.
1106  */
1107 int
1108 ill_join_allmulti(ill_t *ill)
1109 {
1110 	mblk_t		*promiscon_mp, *promiscoff_mp;
1111 	uint32_t	addrlen, addroff;
1112 
1113 	ASSERT(IAM_WRITER_ILL(ill));
1114 
1115 	if (!ill->ill_dl_up) {
1116 		/*
1117 		 * Nobody there. All multicast addresses will be re-joined
1118 		 * when we get the DL_BIND_ACK bringing the interface up.
1119 		 */
1120 		return (0);
1121 	}
1122 
1123 	ASSERT(!ill->ill_join_allmulti);
1124 
1125 	/*
1126 	 * Create a DL_PROMISCON_REQ message and send it directly to the DLPI
1127 	 * provider.  We don't need to do this for certain media types for
1128 	 * which we never need to turn promiscuous mode on.  While we're here,
1129 	 * pre-allocate a DL_PROMISCOFF_REQ message to make sure that
1130 	 * ill_leave_allmulti() will not fail due to low memory conditions.
1131 	 */
1132 	if ((ill->ill_net_type == IRE_IF_RESOLVER) &&
1133 	    !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) {
1134 		promiscon_mp = ill_create_dl(ill, DL_PROMISCON_REQ,
1135 		    sizeof (dl_promiscon_req_t), &addrlen, &addroff);
1136 		promiscoff_mp = ill_create_dl(ill, DL_PROMISCOFF_REQ,
1137 		    sizeof (dl_promiscoff_req_t), &addrlen, &addroff);
1138 		if (promiscon_mp == NULL || promiscoff_mp == NULL) {
1139 			freemsg(promiscon_mp);
1140 			freemsg(promiscoff_mp);
1141 			return (ENOMEM);
1142 		}
1143 		ill->ill_promiscoff_mp = promiscoff_mp;
1144 		ill_dlpi_send(ill, promiscon_mp);
1145 	}
1146 
1147 	ill->ill_join_allmulti = B_TRUE;
1148 	return (0);
1149 }
1150 
1151 /*
1152  * Make the driver stop passing up all multicast packets
1153  *
1154  * With ill groups, we need to nominate some other ill as
1155  * this ipif->ipif_ill is leaving the group.
1156  */
1157 void
1158 ill_leave_allmulti(ill_t *ill)
1159 {
1160 	mblk_t		*promiscoff_mp = ill->ill_promiscoff_mp;
1161 
1162 	ASSERT(IAM_WRITER_ILL(ill));
1163 
1164 	if (!ill->ill_dl_up) {
1165 		/*
1166 		 * Nobody there. All multicast addresses will be re-joined
1167 		 * when we get the DL_BIND_ACK bringing the interface up.
1168 		 */
1169 		return;
1170 	}
1171 
1172 	ASSERT(ill->ill_join_allmulti);
1173 
1174 	/*
1175 	 * Create a DL_PROMISCOFF_REQ message and send it directly to
1176 	 * the DLPI provider.  We don't need to do this for certain
1177 	 * media types for which we never need to turn promiscuous
1178 	 * mode on.
1179 	 */
1180 	if ((ill->ill_net_type == IRE_IF_RESOLVER) &&
1181 	    !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) {
1182 		ASSERT(promiscoff_mp != NULL);
1183 		ill->ill_promiscoff_mp = NULL;
1184 		ill_dlpi_send(ill, promiscoff_mp);
1185 	}
1186 
1187 	ill->ill_join_allmulti = B_FALSE;
1188 }
1189 
1190 static ill_t *
1191 ipsq_enter_byifindex(uint_t ifindex, boolean_t isv6, ip_stack_t *ipst)
1192 {
1193 	ill_t		*ill;
1194 	boolean_t	in_ipsq;
1195 
1196 	ill = ill_lookup_on_ifindex(ifindex, isv6, NULL, NULL, NULL, NULL,
1197 	    ipst);
1198 	if (ill != NULL) {
1199 		if (!ill_waiter_inc(ill)) {
1200 			ill_refrele(ill);
1201 			return (NULL);
1202 		}
1203 		ill_refrele(ill);
1204 		in_ipsq = ipsq_enter(ill, B_FALSE);
1205 		ill_waiter_dcr(ill);
1206 		if (!in_ipsq)
1207 			ill = NULL;
1208 	}
1209 	return (ill);
1210 }
1211 
1212 int
1213 ip_join_allmulti(uint_t ifindex, boolean_t isv6, ip_stack_t *ipst)
1214 {
1215 	ill_t		*ill;
1216 	int		ret;
1217 
1218 	if ((ill = ipsq_enter_byifindex(ifindex, isv6, ipst)) == NULL)
1219 		return (ENODEV);
1220 	if (isv6) {
1221 		ret = ip_addmulti_v6(&ipv6_all_zeros, ill, ifindex,
1222 		    ill->ill_zoneid, ILGSTAT_NONE, MODE_IS_EXCLUDE, NULL);
1223 	} else {
1224 		ret = ip_addmulti(INADDR_ANY, ill->ill_ipif, ILGSTAT_NONE,
1225 		    MODE_IS_EXCLUDE, NULL);
1226 	}
1227 	ill->ill_ipallmulti_cnt++;
1228 	ipsq_exit(ill->ill_phyint->phyint_ipsq);
1229 	return (ret);
1230 }
1231 
1232 int
1233 ip_leave_allmulti(uint_t ifindex, boolean_t isv6, ip_stack_t *ipst)
1234 {
1235 	ill_t		*ill;
1236 
1237 	if ((ill = ipsq_enter_byifindex(ifindex, isv6, ipst)) == NULL)
1238 		return (ENODEV);
1239 	ASSERT(ill->ill_ipallmulti_cnt != 0);
1240 	if (isv6) {
1241 		(void) ip_delmulti_v6(&ipv6_all_zeros, ill, ifindex,
1242 		    ill->ill_zoneid, B_TRUE, B_TRUE);
1243 	} else {
1244 		(void) ip_delmulti(INADDR_ANY, ill->ill_ipif, B_TRUE, B_TRUE);
1245 	}
1246 	ill->ill_ipallmulti_cnt--;
1247 	ipsq_exit(ill->ill_phyint->phyint_ipsq);
1248 	return (0);
1249 }
1250 
1251 /*
1252  * Delete the allmulti memberships that were added as part of
1253  * ip_join_allmulti().
1254  */
1255 void
1256 ip_purge_allmulti(ill_t *ill)
1257 {
1258 	ASSERT(IAM_WRITER_ILL(ill));
1259 
1260 	for (; ill->ill_ipallmulti_cnt > 0; ill->ill_ipallmulti_cnt--) {
1261 		if (ill->ill_isv6) {
1262 			(void) ip_delmulti_v6(&ipv6_all_zeros, ill,
1263 			    ill->ill_phyint->phyint_ifindex, ill->ill_zoneid,
1264 			    B_TRUE, B_TRUE);
1265 		} else {
1266 			(void) ip_delmulti(INADDR_ANY, ill->ill_ipif, B_TRUE,
1267 			    B_TRUE);
1268 		}
1269 	}
1270 }
1271 
1272 /*
1273  * Copy mp_orig and pass it in as a local message.
1274  */
1275 void
1276 ip_multicast_loopback(queue_t *q, ill_t *ill, mblk_t *mp_orig, int fanout_flags,
1277     zoneid_t zoneid)
1278 {
1279 	mblk_t	*mp;
1280 	mblk_t	*ipsec_mp;
1281 	ipha_t	*iph;
1282 	ip_stack_t *ipst = ill->ill_ipst;
1283 
1284 	if (DB_TYPE(mp_orig) == M_DATA &&
1285 	    ((ipha_t *)mp_orig->b_rptr)->ipha_protocol == IPPROTO_UDP) {
1286 		uint_t hdrsz;
1287 
1288 		hdrsz = IPH_HDR_LENGTH((ipha_t *)mp_orig->b_rptr) +
1289 		    sizeof (udpha_t);
1290 		ASSERT(MBLKL(mp_orig) >= hdrsz);
1291 
1292 		if (((mp = allocb(hdrsz, BPRI_MED)) != NULL) &&
1293 		    (mp_orig = dupmsg(mp_orig)) != NULL) {
1294 			bcopy(mp_orig->b_rptr, mp->b_rptr, hdrsz);
1295 			mp->b_wptr += hdrsz;
1296 			mp->b_cont = mp_orig;
1297 			mp_orig->b_rptr += hdrsz;
1298 			if (is_system_labeled() && DB_CRED(mp_orig) != NULL)
1299 				mblk_setcred(mp, DB_CRED(mp_orig));
1300 			if (MBLKL(mp_orig) == 0) {
1301 				mp->b_cont = mp_orig->b_cont;
1302 				mp_orig->b_cont = NULL;
1303 				freeb(mp_orig);
1304 			}
1305 		} else if (mp != NULL) {
1306 			freeb(mp);
1307 			mp = NULL;
1308 		}
1309 	} else {
1310 		mp = ip_copymsg(mp_orig); /* No refcnt on ipsec_out netstack */
1311 	}
1312 
1313 	if (mp == NULL)
1314 		return;
1315 	if (DB_TYPE(mp) == M_CTL) {
1316 		ipsec_mp = mp;
1317 		mp = mp->b_cont;
1318 	} else {
1319 		ipsec_mp = mp;
1320 	}
1321 
1322 	iph = (ipha_t *)mp->b_rptr;
1323 
1324 	/*
1325 	 * DTrace this as ip:::send.  A blocked packet will fire the send
1326 	 * probe, but not the receive probe.
1327 	 */
1328 	DTRACE_IP7(send, mblk_t *, ipsec_mp, conn_t *, NULL, void_ip_t *, iph,
1329 	    __dtrace_ipsr_ill_t *, ill, ipha_t *, iph, ip6_t *, NULL, int, 1);
1330 
1331 	DTRACE_PROBE4(ip4__loopback__out__start,
1332 	    ill_t *, NULL, ill_t *, ill,
1333 	    ipha_t *, iph, mblk_t *, ipsec_mp);
1334 
1335 	FW_HOOKS(ipst->ips_ip4_loopback_out_event,
1336 	    ipst->ips_ipv4firewall_loopback_out,
1337 	    NULL, ill, iph, ipsec_mp, mp, HPE_MULTICAST, ipst);
1338 
1339 	DTRACE_PROBE1(ip4__loopback__out__end, mblk_t *, ipsec_mp);
1340 
1341 	if (ipsec_mp != NULL)
1342 		ip_wput_local(q, ill, iph, ipsec_mp, NULL,
1343 		    fanout_flags, zoneid);
1344 }
1345 
1346 static area_t	ip_aresq_template = {
1347 	AR_ENTRY_SQUERY,		/* cmd */
1348 	sizeof (area_t)+IP_ADDR_LEN,	/* name offset */
1349 	sizeof (area_t),	/* name len (filled by ill_arp_alloc) */
1350 	IP_ARP_PROTO_TYPE,		/* protocol, from arps perspective */
1351 	sizeof (area_t),			/* proto addr offset */
1352 	IP_ADDR_LEN,			/* proto addr_length */
1353 	0,				/* proto mask offset */
1354 	/* Rest is initialized when used */
1355 	0,				/* flags */
1356 	0,				/* hw addr offset */
1357 	0,				/* hw addr length */
1358 };
1359 
1360 static mblk_t *
1361 ill_create_squery(ill_t *ill, ipaddr_t ipaddr, uint32_t addrlen,
1362     uint32_t addroff, mblk_t *mp_tail)
1363 {
1364 	mblk_t	*mp;
1365 	area_t	*area;
1366 
1367 	mp = ill_arp_alloc(ill, (uchar_t *)&ip_aresq_template,
1368 	    (caddr_t)&ipaddr);
1369 	if (!mp) {
1370 		freemsg(mp_tail);
1371 		return (NULL);
1372 	}
1373 	area = (area_t *)mp->b_rptr;
1374 	area->area_hw_addr_length = addrlen;
1375 	area->area_hw_addr_offset = mp->b_wptr - mp->b_rptr + addroff;
1376 	/*
1377 	 * NOTE!
1378 	 *
1379 	 * The area_hw_addr_offset, as can be seen, does not hold the
1380 	 * actual hardware address offset. Rather, it holds the offset
1381 	 * to the hw addr in the dl_xxx_req in mp_tail, modified by
1382 	 * adding (mp->b_wptr - mp->b_rptr). This allows the function
1383 	 * mi_offset_paramc() to find the hardware address in the
1384 	 * *second* mblk (dl_xxx_req), not this mblk.
1385 	 *
1386 	 * Using mi_offset_paramc() is thus the *only* way to access
1387 	 * the dl_xxx_hw address.
1388 	 *
1389 	 * The squery hw address should *not* be accessed.
1390 	 *
1391 	 * See ar_entry_squery() in arp.c for an example of how all this works.
1392 	 */
1393 
1394 	mp->b_cont = mp_tail;
1395 	return (mp);
1396 }
1397 
1398 /*
1399  * Create a DLPI message; for DL_{ENAB,DISAB}MULTI_REQ, room is left for
1400  * the hardware address.
1401  */
1402 static mblk_t *
1403 ill_create_dl(ill_t *ill, uint32_t dl_primitive, uint32_t length,
1404     uint32_t *addr_lenp, uint32_t *addr_offp)
1405 {
1406 	mblk_t	*mp;
1407 	uint32_t	hw_addr_length;
1408 	char		*cp;
1409 	uint32_t	offset;
1410 	uint32_t 	size;
1411 
1412 	*addr_lenp = *addr_offp = 0;
1413 
1414 	hw_addr_length = ill->ill_phys_addr_length;
1415 	if (!hw_addr_length) {
1416 		ip0dbg(("ip_create_dl: hw addr length = 0\n"));
1417 		return (NULL);
1418 	}
1419 
1420 	size = length;
1421 	switch (dl_primitive) {
1422 	case DL_ENABMULTI_REQ:
1423 	case DL_DISABMULTI_REQ:
1424 		size += hw_addr_length;
1425 		break;
1426 	case DL_PROMISCON_REQ:
1427 	case DL_PROMISCOFF_REQ:
1428 		break;
1429 	default:
1430 		return (NULL);
1431 	}
1432 	mp = allocb(size, BPRI_HI);
1433 	if (!mp)
1434 		return (NULL);
1435 	mp->b_wptr += size;
1436 	mp->b_datap->db_type = M_PROTO;
1437 
1438 	cp = (char *)mp->b_rptr;
1439 	offset = length;
1440 
1441 	switch (dl_primitive) {
1442 	case DL_ENABMULTI_REQ: {
1443 		dl_enabmulti_req_t *dl = (dl_enabmulti_req_t *)cp;
1444 
1445 		dl->dl_primitive = dl_primitive;
1446 		dl->dl_addr_offset = offset;
1447 		*addr_lenp = dl->dl_addr_length = hw_addr_length;
1448 		*addr_offp = offset;
1449 		break;
1450 	}
1451 	case DL_DISABMULTI_REQ: {
1452 		dl_disabmulti_req_t *dl = (dl_disabmulti_req_t *)cp;
1453 
1454 		dl->dl_primitive = dl_primitive;
1455 		dl->dl_addr_offset = offset;
1456 		*addr_lenp = dl->dl_addr_length = hw_addr_length;
1457 		*addr_offp = offset;
1458 		break;
1459 	}
1460 	case DL_PROMISCON_REQ:
1461 	case DL_PROMISCOFF_REQ: {
1462 		dl_promiscon_req_t *dl = (dl_promiscon_req_t *)cp;
1463 
1464 		dl->dl_primitive = dl_primitive;
1465 		dl->dl_level = DL_PROMISC_MULTI;
1466 		break;
1467 	}
1468 	}
1469 	ip1dbg(("ill_create_dl: addr_len %d, addr_off %d\n",
1470 	    *addr_lenp, *addr_offp));
1471 	return (mp);
1472 }
1473 
1474 /*
1475  * Writer processing for ip_wput_ctl(): send the DL_{ENAB,DISAB}MULTI_REQ
1476  * messages that had been delayed until we'd heard back from ARP.  One catch:
1477  * we need to ensure that no one else becomes writer on the IPSQ before we've
1478  * received the replies, or they'll incorrectly process our replies as part of
1479  * their unrelated IPSQ operation.  To do this, we start a new IPSQ operation,
1480  * which will complete when we process the reply in ip_rput_dlpi_writer().
1481  */
1482 /* ARGSUSED */
1483 static void
1484 ip_wput_ctl_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *arg)
1485 {
1486 	ill_t *ill = q->q_ptr;
1487 	t_uscalar_t prim = ((union DL_primitives *)mp->b_rptr)->dl_primitive;
1488 
1489 	ASSERT(IAM_WRITER_ILL(ill));
1490 	ASSERT(prim == DL_ENABMULTI_REQ || prim == DL_DISABMULTI_REQ);
1491 	ip1dbg(("ip_wput_ctl_writer: %s\n", dl_primstr(prim)));
1492 
1493 	if (prim == DL_ENABMULTI_REQ) {
1494 		/* Track the state if this is the first enabmulti */
1495 		if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN)
1496 			ill->ill_dlpi_multicast_state = IDS_INPROGRESS;
1497 	}
1498 
1499 	ipsq_current_start(ipsq, ill->ill_ipif, 0);
1500 	ill_dlpi_send(ill, mp);
1501 }
1502 
1503 void
1504 ip_wput_ctl(queue_t *q, mblk_t *mp)
1505 {
1506 	ill_t	*ill = q->q_ptr;
1507 	mblk_t	*dlmp = mp->b_cont;
1508 	area_t	*area = (area_t *)mp->b_rptr;
1509 	t_uscalar_t prim;
1510 
1511 	/* Check that we have an AR_ENTRY_SQUERY with a tacked on mblk */
1512 	if (MBLKL(mp) < sizeof (area_t) || area->area_cmd != AR_ENTRY_SQUERY ||
1513 	    dlmp == NULL) {
1514 		putnext(q, mp);
1515 		return;
1516 	}
1517 
1518 	/* Check that the tacked on mblk is a DL_{DISAB,ENAB}MULTI_REQ */
1519 	prim = ((union DL_primitives *)dlmp->b_rptr)->dl_primitive;
1520 	if (prim != DL_DISABMULTI_REQ && prim != DL_ENABMULTI_REQ) {
1521 		putnext(q, mp);
1522 		return;
1523 	}
1524 	freeb(mp);
1525 
1526 	/* See comments above ip_wput_ctl_writer() for details */
1527 	ill_refhold(ill);
1528 	qwriter_ip(ill, ill->ill_wq, dlmp, ip_wput_ctl_writer, NEW_OP, B_FALSE);
1529 }
1530 
1531 /*
1532  * Rejoin any groups which have been explicitly joined by the application (we
1533  * left all explicitly joined groups as part of ill_leave_multicast() prior to
1534  * bringing the interface down).  Note that because groups can be joined and
1535  * left while an interface is down, this may not be the same set of groups
1536  * that we left in ill_leave_multicast().
1537  */
1538 void
1539 ill_recover_multicast(ill_t *ill)
1540 {
1541 	ilm_t	*ilm;
1542 	char    addrbuf[INET6_ADDRSTRLEN];
1543 
1544 	ASSERT(IAM_WRITER_ILL(ill));
1545 
1546 	ill->ill_need_recover_multicast = 0;
1547 
1548 	ILM_WALKER_HOLD(ill);
1549 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1550 		/*
1551 		 * Check how many ipif's that have members in this group -
1552 		 * if more then one we make sure that this entry is first
1553 		 * in the list.
1554 		 */
1555 		if (ilm_numentries_v6(ill, &ilm->ilm_v6addr) > 1 &&
1556 		    ilm_lookup_ill_v6(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm)
1557 			continue;
1558 		ip1dbg(("ill_recover_multicast: %s\n",
1559 		    inet_ntop(AF_INET6, &ilm->ilm_v6addr, addrbuf,
1560 		    sizeof (addrbuf))));
1561 		if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
1562 			if (ill->ill_group == NULL) {
1563 				(void) ill_join_allmulti(ill);
1564 			} else {
1565 				/*
1566 				 * We don't want to join on this ill,
1567 				 * if somebody else in the group has
1568 				 * already been nominated.
1569 				 */
1570 				(void) ill_nominate_mcast_rcv(ill->ill_group);
1571 			}
1572 		} else {
1573 			(void) ip_ll_addmulti_v6(ill->ill_ipif,
1574 			    &ilm->ilm_v6addr);
1575 		}
1576 	}
1577 	ILM_WALKER_RELE(ill);
1578 }
1579 
1580 /*
1581  * The opposite of ill_recover_multicast() -- leaves all multicast groups
1582  * that were explicitly joined.  Note that both these functions could be
1583  * disposed of if we enhanced ARP to allow us to handle DL_DISABMULTI_REQ
1584  * and DL_ENABMULTI_REQ messages when an interface is down.
1585  */
1586 void
1587 ill_leave_multicast(ill_t *ill)
1588 {
1589 	ilm_t	*ilm;
1590 	char    addrbuf[INET6_ADDRSTRLEN];
1591 
1592 	ASSERT(IAM_WRITER_ILL(ill));
1593 
1594 	ill->ill_need_recover_multicast = 1;
1595 
1596 	ILM_WALKER_HOLD(ill);
1597 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1598 		/*
1599 		 * Check how many ipif's that have members in this group -
1600 		 * if more then one we make sure that this entry is first
1601 		 * in the list.
1602 		 */
1603 		if (ilm_numentries_v6(ill, &ilm->ilm_v6addr) > 1 &&
1604 		    ilm_lookup_ill_v6(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm)
1605 			continue;
1606 		ip1dbg(("ill_leave_multicast: %s\n",
1607 		    inet_ntop(AF_INET6, &ilm->ilm_v6addr, addrbuf,
1608 		    sizeof (addrbuf))));
1609 		if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
1610 			ill_leave_allmulti(ill);
1611 			/*
1612 			 * If we were part of an IPMP group, then
1613 			 * ill_handoff_responsibility() has already
1614 			 * nominated a new member (so we don't).
1615 			 */
1616 			ASSERT(ill->ill_group == NULL);
1617 		} else {
1618 			(void) ip_ll_delmulti_v6(ill->ill_ipif,
1619 			    &ilm->ilm_v6addr);
1620 		}
1621 	}
1622 	ILM_WALKER_RELE(ill);
1623 }
1624 
1625 /* Find an ilm for matching the ill */
1626 ilm_t *
1627 ilm_lookup_ill(ill_t *ill, ipaddr_t group, zoneid_t zoneid)
1628 {
1629 	in6_addr_t	v6group;
1630 
1631 	ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock));
1632 	/*
1633 	 * INADDR_ANY is represented as the IPv6 unspecifed addr.
1634 	 */
1635 	if (group == INADDR_ANY)
1636 		v6group = ipv6_all_zeros;
1637 	else
1638 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1639 
1640 	return (ilm_lookup_ill_v6(ill, &v6group, zoneid));
1641 }
1642 
1643 /*
1644  * Find an ilm for matching the ill. All the ilm lookup functions
1645  * ignore ILM_DELETED ilms. These have been logically deleted, and
1646  * igmp and linklayer disable multicast have been done. Only mi_free
1647  * yet to be done. Still there in the list due to ilm_walkers. The
1648  * last walker will release it.
1649  */
1650 ilm_t *
1651 ilm_lookup_ill_v6(ill_t *ill, const in6_addr_t *v6group, zoneid_t zoneid)
1652 {
1653 	ilm_t	*ilm;
1654 
1655 	ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock));
1656 
1657 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1658 		if (ilm->ilm_flags & ILM_DELETED)
1659 			continue;
1660 		if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) &&
1661 		    (zoneid == ALL_ZONES || zoneid == ilm->ilm_zoneid))
1662 			return (ilm);
1663 	}
1664 	return (NULL);
1665 }
1666 
1667 ilm_t *
1668 ilm_lookup_ill_index_v6(ill_t *ill, const in6_addr_t *v6group, int index,
1669     zoneid_t zoneid)
1670 {
1671 	ilm_t *ilm;
1672 
1673 	ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock));
1674 
1675 	for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
1676 		if (ilm->ilm_flags & ILM_DELETED)
1677 			continue;
1678 		if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) &&
1679 		    (zoneid == ALL_ZONES || zoneid == ilm->ilm_zoneid) &&
1680 		    ilm->ilm_orig_ifindex == index) {
1681 			return (ilm);
1682 		}
1683 	}
1684 	return (NULL);
1685 }
1686 
1687 
1688 /*
1689  * Found an ilm for the ipif. Only needed for IPv4 which does
1690  * ipif specific socket options.
1691  */
1692 ilm_t *
1693 ilm_lookup_ipif(ipif_t *ipif, ipaddr_t group)
1694 {
1695 	ill_t	*ill = ipif->ipif_ill;
1696 	ilm_t	*ilm;
1697 	in6_addr_t	v6group;
1698 
1699 	ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock));
1700 	/*
1701 	 * INADDR_ANY is represented as the IPv6 unspecifed addr.
1702 	 */
1703 	if (group == INADDR_ANY)
1704 		v6group = ipv6_all_zeros;
1705 	else
1706 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1707 
1708 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1709 		if (ilm->ilm_flags & ILM_DELETED)
1710 			continue;
1711 		if (ilm->ilm_ipif == ipif &&
1712 		    IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, &v6group))
1713 			return (ilm);
1714 	}
1715 	return (NULL);
1716 }
1717 
1718 /*
1719  * How many members on this ill?
1720  */
1721 int
1722 ilm_numentries_v6(ill_t *ill, const in6_addr_t *v6group)
1723 {
1724 	ilm_t	*ilm;
1725 	int i = 0;
1726 
1727 	mutex_enter(&ill->ill_lock);
1728 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1729 		if (ilm->ilm_flags & ILM_DELETED)
1730 			continue;
1731 		if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group)) {
1732 			i++;
1733 		}
1734 	}
1735 	mutex_exit(&ill->ill_lock);
1736 	return (i);
1737 }
1738 
1739 /* Caller guarantees that the group is not already on the list */
1740 static ilm_t *
1741 ilm_add_v6(ipif_t *ipif, const in6_addr_t *v6group, ilg_stat_t ilgstat,
1742     mcast_record_t ilg_fmode, slist_t *ilg_flist, int orig_ifindex,
1743     zoneid_t zoneid)
1744 {
1745 	ill_t	*ill = ipif->ipif_ill;
1746 	ilm_t	*ilm;
1747 	ilm_t	*ilm_cur;
1748 	ilm_t	**ilm_ptpn;
1749 
1750 	ASSERT(IAM_WRITER_IPIF(ipif));
1751 
1752 	ilm = GETSTRUCT(ilm_t, 1);
1753 	if (ilm == NULL)
1754 		return (NULL);
1755 	if (ilgstat != ILGSTAT_NONE && !SLIST_IS_EMPTY(ilg_flist)) {
1756 		ilm->ilm_filter = l_alloc();
1757 		if (ilm->ilm_filter == NULL) {
1758 			mi_free(ilm);
1759 			return (NULL);
1760 		}
1761 	}
1762 	ilm->ilm_v6addr = *v6group;
1763 	ilm->ilm_refcnt = 1;
1764 	ilm->ilm_zoneid = zoneid;
1765 	ilm->ilm_timer = INFINITY;
1766 	ilm->ilm_rtx.rtx_timer = INFINITY;
1767 
1768 	/*
1769 	 * IPv4 Multicast groups are joined using ipif.
1770 	 * IPv6 Multicast groups are joined using ill.
1771 	 */
1772 	if (ill->ill_isv6) {
1773 		ilm->ilm_ill = ill;
1774 		ilm->ilm_ipif = NULL;
1775 		DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ill,
1776 		    (char *), "ilm", (void *), ilm);
1777 		ill->ill_ilm_cnt++;
1778 	} else {
1779 		ASSERT(ilm->ilm_zoneid == ipif->ipif_zoneid);
1780 		ilm->ilm_ipif = ipif;
1781 		ilm->ilm_ill = NULL;
1782 		DTRACE_PROBE3(ipif__incr__cnt, (ipif_t *), ipif,
1783 		    (char *), "ilm", (void *), ilm);
1784 		ipif->ipif_ilm_cnt++;
1785 	}
1786 	ASSERT(ill->ill_ipst);
1787 	ilm->ilm_ipst = ill->ill_ipst;	/* No netstack_hold */
1788 
1789 	/*
1790 	 * After this if ilm moves to a new ill, we don't change
1791 	 * the ilm_orig_ifindex. Thus, if ill_index != ilm_orig_ifindex,
1792 	 * it has been moved. Indexes don't match even when the application
1793 	 * wants to join on a FAILED/INACTIVE interface because we choose
1794 	 * a new interface to join in. This is considered as an implicit
1795 	 * move.
1796 	 */
1797 	ilm->ilm_orig_ifindex = orig_ifindex;
1798 
1799 	ASSERT(!(ipif->ipif_state_flags & IPIF_CONDEMNED));
1800 	ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED));
1801 
1802 	/*
1803 	 * Grab lock to give consistent view to readers
1804 	 */
1805 	mutex_enter(&ill->ill_lock);
1806 	/*
1807 	 * All ilms in the same zone are contiguous in the ill_ilm list.
1808 	 * The loops in ip_proto_input() and ip_wput_local() use this to avoid
1809 	 * sending duplicates up when two applications in the same zone join the
1810 	 * same group on different logical interfaces.
1811 	 */
1812 	ilm_cur = ill->ill_ilm;
1813 	ilm_ptpn = &ill->ill_ilm;
1814 	while (ilm_cur != NULL && ilm_cur->ilm_zoneid != ilm->ilm_zoneid) {
1815 		ilm_ptpn = &ilm_cur->ilm_next;
1816 		ilm_cur = ilm_cur->ilm_next;
1817 	}
1818 	ilm->ilm_next = ilm_cur;
1819 	*ilm_ptpn = ilm;
1820 
1821 	/*
1822 	 * If we have an associated ilg, use its filter state; if not,
1823 	 * default to (EXCLUDE, NULL) and set no_ilg_cnt to track this.
1824 	 */
1825 	if (ilgstat != ILGSTAT_NONE) {
1826 		if (!SLIST_IS_EMPTY(ilg_flist))
1827 			l_copy(ilg_flist, ilm->ilm_filter);
1828 		ilm->ilm_fmode = ilg_fmode;
1829 	} else {
1830 		ilm->ilm_no_ilg_cnt = 1;
1831 		ilm->ilm_fmode = MODE_IS_EXCLUDE;
1832 	}
1833 
1834 	mutex_exit(&ill->ill_lock);
1835 	return (ilm);
1836 }
1837 
1838 void
1839 ilm_inactive(ilm_t *ilm)
1840 {
1841 	FREE_SLIST(ilm->ilm_filter);
1842 	FREE_SLIST(ilm->ilm_pendsrcs);
1843 	FREE_SLIST(ilm->ilm_rtx.rtx_allow);
1844 	FREE_SLIST(ilm->ilm_rtx.rtx_block);
1845 	ilm->ilm_ipst = NULL;
1846 	mi_free((char *)ilm);
1847 }
1848 
1849 void
1850 ilm_walker_cleanup(ill_t *ill)
1851 {
1852 	ilm_t	**ilmp;
1853 	ilm_t	*ilm;
1854 	boolean_t need_wakeup = B_FALSE;
1855 
1856 	ASSERT(MUTEX_HELD(&ill->ill_lock));
1857 	ASSERT(ill->ill_ilm_walker_cnt == 0);
1858 
1859 	ilmp = &ill->ill_ilm;
1860 	while (*ilmp != NULL) {
1861 		if ((*ilmp)->ilm_flags & ILM_DELETED) {
1862 			ilm = *ilmp;
1863 			*ilmp = ilm->ilm_next;
1864 			/*
1865 			 * check if there are any pending FREE or unplumb
1866 			 * operations that need to be restarted.
1867 			 */
1868 			if (ilm->ilm_ipif != NULL) {
1869 				/*
1870 				 * IPv4 ilms hold a ref on the ipif.
1871 				 */
1872 				DTRACE_PROBE3(ipif__decr__cnt,
1873 				    (ipif_t *), ilm->ilm_ipif,
1874 				    (char *), "ilm", (void *), ilm);
1875 				ilm->ilm_ipif->ipif_ilm_cnt--;
1876 				if (IPIF_FREE_OK(ilm->ilm_ipif))
1877 					need_wakeup = B_TRUE;
1878 			} else {
1879 				/*
1880 				 * IPv6 ilms hold a ref on the ill.
1881 				 */
1882 				ASSERT(ilm->ilm_ill == ill);
1883 				DTRACE_PROBE3(ill__decr__cnt,
1884 				    (ill_t *), ill,
1885 				    (char *), "ilm", (void *), ilm);
1886 				ASSERT(ill->ill_ilm_cnt > 0);
1887 				ill->ill_ilm_cnt--;
1888 				if (ILL_FREE_OK(ill))
1889 					need_wakeup = B_TRUE;
1890 			}
1891 			ilm_inactive(ilm); /* frees ilm */
1892 		} else {
1893 			ilmp = &(*ilmp)->ilm_next;
1894 		}
1895 	}
1896 	ill->ill_ilm_cleanup_reqd = 0;
1897 	if (need_wakeup)
1898 		ipif_ill_refrele_tail(ill);
1899 	else
1900 		mutex_exit(&ill->ill_lock);
1901 }
1902 
1903 /*
1904  * Unlink ilm and free it.
1905  */
1906 static void
1907 ilm_delete(ilm_t *ilm)
1908 {
1909 	ill_t		*ill;
1910 	ilm_t		**ilmp;
1911 	boolean_t	need_wakeup;
1912 
1913 
1914 	if (ilm->ilm_ipif != NULL) {
1915 		ASSERT(IAM_WRITER_IPIF(ilm->ilm_ipif));
1916 		ASSERT(ilm->ilm_ill == NULL);
1917 		ill = ilm->ilm_ipif->ipif_ill;
1918 		ASSERT(!ill->ill_isv6);
1919 	} else {
1920 		ASSERT(IAM_WRITER_ILL(ilm->ilm_ill));
1921 		ASSERT(ilm->ilm_ipif == NULL);
1922 		ill = ilm->ilm_ill;
1923 		ASSERT(ill->ill_isv6);
1924 	}
1925 	/*
1926 	 * Delete under lock protection so that readers don't stumble
1927 	 * on bad ilm_next
1928 	 */
1929 	mutex_enter(&ill->ill_lock);
1930 	if (ill->ill_ilm_walker_cnt != 0) {
1931 		ilm->ilm_flags |= ILM_DELETED;
1932 		ill->ill_ilm_cleanup_reqd = 1;
1933 		mutex_exit(&ill->ill_lock);
1934 		return;
1935 	}
1936 
1937 	for (ilmp = &ill->ill_ilm; *ilmp != ilm; ilmp = &(*ilmp)->ilm_next)
1938 				;
1939 	*ilmp = ilm->ilm_next;
1940 
1941 	/*
1942 	 * if we are the last reference to the ipif (for IPv4 ilms)
1943 	 * or the ill (for IPv6 ilms), we may need to wakeup any
1944 	 * pending FREE or unplumb operations.
1945 	 */
1946 	need_wakeup = B_FALSE;
1947 	if (ilm->ilm_ipif != NULL) {
1948 		DTRACE_PROBE3(ipif__decr__cnt, (ipif_t *), ilm->ilm_ipif,
1949 		    (char *), "ilm", (void *), ilm);
1950 		ilm->ilm_ipif->ipif_ilm_cnt--;
1951 		if (IPIF_FREE_OK(ilm->ilm_ipif))
1952 			need_wakeup = B_TRUE;
1953 	} else {
1954 		DTRACE_PROBE3(ill__decr__cnt, (ill_t *), ill,
1955 		    (char *), "ilm", (void *), ilm);
1956 		ASSERT(ill->ill_ilm_cnt > 0);
1957 		ill->ill_ilm_cnt--;
1958 		if (ILL_FREE_OK(ill))
1959 			need_wakeup = B_TRUE;
1960 	}
1961 
1962 	ilm_inactive(ilm); /* frees this ilm */
1963 
1964 	if (need_wakeup) {
1965 		/* drops ill lock */
1966 		ipif_ill_refrele_tail(ill);
1967 	} else {
1968 		mutex_exit(&ill->ill_lock);
1969 	}
1970 }
1971 
1972 
1973 /*
1974  * Looks up the appropriate ipif given a v4 multicast group and interface
1975  * address.  On success, returns 0, with *ipifpp pointing to the found
1976  * struct.  On failure, returns an errno and *ipifpp is NULL.
1977  */
1978 int
1979 ip_opt_check(conn_t *connp, ipaddr_t group, ipaddr_t src, ipaddr_t ifaddr,
1980     uint_t *ifindexp, mblk_t *first_mp, ipsq_func_t func, ipif_t **ipifpp)
1981 {
1982 	ipif_t *ipif;
1983 	int err = 0;
1984 	zoneid_t zoneid;
1985 	ip_stack_t	*ipst =  connp->conn_netstack->netstack_ip;
1986 
1987 	if (!CLASSD(group) || CLASSD(src)) {
1988 		return (EINVAL);
1989 	}
1990 	*ipifpp = NULL;
1991 
1992 	zoneid = IPCL_ZONEID(connp);
1993 
1994 	ASSERT(!(ifaddr != INADDR_ANY && ifindexp != NULL && *ifindexp != 0));
1995 	if (ifaddr != INADDR_ANY) {
1996 		ipif = ipif_lookup_addr(ifaddr, NULL, zoneid,
1997 		    CONNP_TO_WQ(connp), first_mp, func, &err, ipst);
1998 		if (err != 0 && err != EINPROGRESS)
1999 			err = EADDRNOTAVAIL;
2000 	} else if (ifindexp != NULL && *ifindexp != 0) {
2001 		ipif = ipif_lookup_on_ifindex(*ifindexp, B_FALSE, zoneid,
2002 		    CONNP_TO_WQ(connp), first_mp, func, &err, ipst);
2003 	} else {
2004 		ipif = ipif_lookup_group(group, zoneid, ipst);
2005 		if (ipif == NULL)
2006 			return (EADDRNOTAVAIL);
2007 	}
2008 	if (ipif == NULL)
2009 		return (err);
2010 
2011 	*ipifpp = ipif;
2012 	return (0);
2013 }
2014 
2015 /*
2016  * Looks up the appropriate ill (or ipif if v4mapped) given an interface
2017  * index and IPv6 multicast group.  On success, returns 0, with *illpp (or
2018  * *ipifpp if v4mapped) pointing to the found struct.  On failure, returns
2019  * an errno and *illpp and *ipifpp are undefined.
2020  */
2021 int
2022 ip_opt_check_v6(conn_t *connp, const in6_addr_t *v6group, ipaddr_t *v4group,
2023     const in6_addr_t *v6src, ipaddr_t *v4src, boolean_t *isv6, int ifindex,
2024     mblk_t *first_mp, ipsq_func_t func, ill_t **illpp, ipif_t **ipifpp)
2025 {
2026 	boolean_t src_unspec;
2027 	ill_t *ill = NULL;
2028 	ipif_t *ipif = NULL;
2029 	int err;
2030 	zoneid_t zoneid = connp->conn_zoneid;
2031 	queue_t *wq = CONNP_TO_WQ(connp);
2032 	ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
2033 
2034 	src_unspec = IN6_IS_ADDR_UNSPECIFIED(v6src);
2035 
2036 	if (IN6_IS_ADDR_V4MAPPED(v6group)) {
2037 		if (!IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec)
2038 			return (EINVAL);
2039 		IN6_V4MAPPED_TO_IPADDR(v6group, *v4group);
2040 		if (src_unspec) {
2041 			*v4src = INADDR_ANY;
2042 		} else {
2043 			IN6_V4MAPPED_TO_IPADDR(v6src, *v4src);
2044 		}
2045 		if (!CLASSD(*v4group) || CLASSD(*v4src))
2046 			return (EINVAL);
2047 		*ipifpp = NULL;
2048 		*isv6 = B_FALSE;
2049 	} else {
2050 		if (IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec)
2051 			return (EINVAL);
2052 		if (!IN6_IS_ADDR_MULTICAST(v6group) ||
2053 		    IN6_IS_ADDR_MULTICAST(v6src)) {
2054 			return (EINVAL);
2055 		}
2056 		*illpp = NULL;
2057 		*isv6 = B_TRUE;
2058 	}
2059 
2060 	if (ifindex == 0) {
2061 		if (*isv6)
2062 			ill = ill_lookup_group_v6(v6group, zoneid, ipst);
2063 		else
2064 			ipif = ipif_lookup_group(*v4group, zoneid, ipst);
2065 		if (ill == NULL && ipif == NULL)
2066 			return (EADDRNOTAVAIL);
2067 	} else {
2068 		if (*isv6) {
2069 			ill = ill_lookup_on_ifindex(ifindex, B_TRUE,
2070 			    wq, first_mp, func, &err, ipst);
2071 			if (ill != NULL &&
2072 			    !ipif_lookup_zoneid(ill, zoneid, 0, NULL)) {
2073 				ill_refrele(ill);
2074 				ill = NULL;
2075 				err = EADDRNOTAVAIL;
2076 			}
2077 		} else {
2078 			ipif = ipif_lookup_on_ifindex(ifindex, B_FALSE,
2079 			    zoneid, wq, first_mp, func, &err, ipst);
2080 		}
2081 		if (ill == NULL && ipif == NULL)
2082 			return (err);
2083 	}
2084 
2085 	*ipifpp = ipif;
2086 	*illpp = ill;
2087 	return (0);
2088 }
2089 
2090 static int
2091 ip_get_srcfilter(conn_t *connp, struct group_filter *gf,
2092     struct ip_msfilter *imsf, ipaddr_t grp, ipif_t *ipif, boolean_t isv4mapped)
2093 {
2094 	ilg_t *ilg;
2095 	int i, numsrc, fmode, outsrcs;
2096 	struct sockaddr_in *sin;
2097 	struct sockaddr_in6 *sin6;
2098 	struct in_addr *addrp;
2099 	slist_t *fp;
2100 	boolean_t is_v4only_api;
2101 
2102 	mutex_enter(&connp->conn_lock);
2103 
2104 	ilg = ilg_lookup_ipif(connp, grp, ipif);
2105 	if (ilg == NULL) {
2106 		mutex_exit(&connp->conn_lock);
2107 		return (EADDRNOTAVAIL);
2108 	}
2109 
2110 	if (gf == NULL) {
2111 		ASSERT(imsf != NULL);
2112 		ASSERT(!isv4mapped);
2113 		is_v4only_api = B_TRUE;
2114 		outsrcs = imsf->imsf_numsrc;
2115 	} else {
2116 		ASSERT(imsf == NULL);
2117 		is_v4only_api = B_FALSE;
2118 		outsrcs = gf->gf_numsrc;
2119 	}
2120 
2121 	/*
2122 	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
2123 	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
2124 	 * So we need to translate here.
2125 	 */
2126 	fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ?
2127 	    MCAST_INCLUDE : MCAST_EXCLUDE;
2128 	if ((fp = ilg->ilg_filter) == NULL) {
2129 		numsrc = 0;
2130 	} else {
2131 		for (i = 0; i < outsrcs; i++) {
2132 			if (i == fp->sl_numsrc)
2133 				break;
2134 			if (isv4mapped) {
2135 				sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i];
2136 				sin6->sin6_family = AF_INET6;
2137 				sin6->sin6_addr = fp->sl_addr[i];
2138 			} else {
2139 				if (is_v4only_api) {
2140 					addrp = &imsf->imsf_slist[i];
2141 				} else {
2142 					sin = (struct sockaddr_in *)
2143 					    &gf->gf_slist[i];
2144 					sin->sin_family = AF_INET;
2145 					addrp = &sin->sin_addr;
2146 				}
2147 				IN6_V4MAPPED_TO_INADDR(&fp->sl_addr[i], addrp);
2148 			}
2149 		}
2150 		numsrc = fp->sl_numsrc;
2151 	}
2152 
2153 	if (is_v4only_api) {
2154 		imsf->imsf_numsrc = numsrc;
2155 		imsf->imsf_fmode = fmode;
2156 	} else {
2157 		gf->gf_numsrc = numsrc;
2158 		gf->gf_fmode = fmode;
2159 	}
2160 
2161 	mutex_exit(&connp->conn_lock);
2162 
2163 	return (0);
2164 }
2165 
2166 static int
2167 ip_get_srcfilter_v6(conn_t *connp, struct group_filter *gf,
2168     const struct in6_addr *grp, ill_t *ill)
2169 {
2170 	ilg_t *ilg;
2171 	int i;
2172 	struct sockaddr_storage *sl;
2173 	struct sockaddr_in6 *sin6;
2174 	slist_t *fp;
2175 
2176 	mutex_enter(&connp->conn_lock);
2177 
2178 	ilg = ilg_lookup_ill_v6(connp, grp, ill);
2179 	if (ilg == NULL) {
2180 		mutex_exit(&connp->conn_lock);
2181 		return (EADDRNOTAVAIL);
2182 	}
2183 
2184 	/*
2185 	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
2186 	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
2187 	 * So we need to translate here.
2188 	 */
2189 	gf->gf_fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ?
2190 	    MCAST_INCLUDE : MCAST_EXCLUDE;
2191 	if ((fp = ilg->ilg_filter) == NULL) {
2192 		gf->gf_numsrc = 0;
2193 	} else {
2194 		for (i = 0, sl = gf->gf_slist; i < gf->gf_numsrc; i++, sl++) {
2195 			if (i == fp->sl_numsrc)
2196 				break;
2197 			sin6 = (struct sockaddr_in6 *)sl;
2198 			sin6->sin6_family = AF_INET6;
2199 			sin6->sin6_addr = fp->sl_addr[i];
2200 		}
2201 		gf->gf_numsrc = fp->sl_numsrc;
2202 	}
2203 
2204 	mutex_exit(&connp->conn_lock);
2205 
2206 	return (0);
2207 }
2208 
2209 static int
2210 ip_set_srcfilter(conn_t *connp, struct group_filter *gf,
2211     struct ip_msfilter *imsf, ipaddr_t grp, ipif_t *ipif, boolean_t isv4mapped)
2212 {
2213 	ilg_t *ilg;
2214 	int i, err, infmode, new_fmode;
2215 	uint_t insrcs;
2216 	struct sockaddr_in *sin;
2217 	struct sockaddr_in6 *sin6;
2218 	struct in_addr *addrp;
2219 	slist_t *orig_filter = NULL;
2220 	slist_t *new_filter = NULL;
2221 	mcast_record_t orig_fmode;
2222 	boolean_t leave_grp, is_v4only_api;
2223 	ilg_stat_t ilgstat;
2224 
2225 	if (gf == NULL) {
2226 		ASSERT(imsf != NULL);
2227 		ASSERT(!isv4mapped);
2228 		is_v4only_api = B_TRUE;
2229 		insrcs = imsf->imsf_numsrc;
2230 		infmode = imsf->imsf_fmode;
2231 	} else {
2232 		ASSERT(imsf == NULL);
2233 		is_v4only_api = B_FALSE;
2234 		insrcs = gf->gf_numsrc;
2235 		infmode = gf->gf_fmode;
2236 	}
2237 
2238 	/* Make sure we can handle the source list */
2239 	if (insrcs > MAX_FILTER_SIZE)
2240 		return (ENOBUFS);
2241 
2242 	/*
2243 	 * setting the filter to (INCLUDE, NULL) is treated
2244 	 * as a request to leave the group.
2245 	 */
2246 	leave_grp = (infmode == MCAST_INCLUDE && insrcs == 0);
2247 
2248 	ASSERT(IAM_WRITER_IPIF(ipif));
2249 
2250 	mutex_enter(&connp->conn_lock);
2251 
2252 	ilg = ilg_lookup_ipif(connp, grp, ipif);
2253 	if (ilg == NULL) {
2254 		/*
2255 		 * if the request was actually to leave, and we
2256 		 * didn't find an ilg, there's nothing to do.
2257 		 */
2258 		if (!leave_grp)
2259 			ilg = conn_ilg_alloc(connp);
2260 		if (leave_grp || ilg == NULL) {
2261 			mutex_exit(&connp->conn_lock);
2262 			return (leave_grp ? 0 : ENOMEM);
2263 		}
2264 		ilgstat = ILGSTAT_NEW;
2265 		IN6_IPADDR_TO_V4MAPPED(grp, &ilg->ilg_v6group);
2266 		ilg->ilg_ipif = ipif;
2267 		ilg->ilg_ill = NULL;
2268 		ilg->ilg_orig_ifindex = 0;
2269 	} else if (leave_grp) {
2270 		ilg_delete(connp, ilg, NULL);
2271 		mutex_exit(&connp->conn_lock);
2272 		(void) ip_delmulti(grp, ipif, B_FALSE, B_TRUE);
2273 		return (0);
2274 	} else {
2275 		ilgstat = ILGSTAT_CHANGE;
2276 		/* Preserve existing state in case ip_addmulti() fails */
2277 		orig_fmode = ilg->ilg_fmode;
2278 		if (ilg->ilg_filter == NULL) {
2279 			orig_filter = NULL;
2280 		} else {
2281 			orig_filter = l_alloc_copy(ilg->ilg_filter);
2282 			if (orig_filter == NULL) {
2283 				mutex_exit(&connp->conn_lock);
2284 				return (ENOMEM);
2285 			}
2286 		}
2287 	}
2288 
2289 	/*
2290 	 * Alloc buffer to copy new state into (see below) before
2291 	 * we make any changes, so we can bail if it fails.
2292 	 */
2293 	if ((new_filter = l_alloc()) == NULL) {
2294 		mutex_exit(&connp->conn_lock);
2295 		err = ENOMEM;
2296 		goto free_and_exit;
2297 	}
2298 
2299 	if (insrcs == 0) {
2300 		CLEAR_SLIST(ilg->ilg_filter);
2301 	} else {
2302 		slist_t *fp;
2303 		if (ilg->ilg_filter == NULL) {
2304 			fp = l_alloc();
2305 			if (fp == NULL) {
2306 				if (ilgstat == ILGSTAT_NEW)
2307 					ilg_delete(connp, ilg, NULL);
2308 				mutex_exit(&connp->conn_lock);
2309 				err = ENOMEM;
2310 				goto free_and_exit;
2311 			}
2312 		} else {
2313 			fp = ilg->ilg_filter;
2314 		}
2315 		for (i = 0; i < insrcs; i++) {
2316 			if (isv4mapped) {
2317 				sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i];
2318 				fp->sl_addr[i] = sin6->sin6_addr;
2319 			} else {
2320 				if (is_v4only_api) {
2321 					addrp = &imsf->imsf_slist[i];
2322 				} else {
2323 					sin = (struct sockaddr_in *)
2324 					    &gf->gf_slist[i];
2325 					addrp = &sin->sin_addr;
2326 				}
2327 				IN6_INADDR_TO_V4MAPPED(addrp, &fp->sl_addr[i]);
2328 			}
2329 		}
2330 		fp->sl_numsrc = insrcs;
2331 		ilg->ilg_filter = fp;
2332 	}
2333 	/*
2334 	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
2335 	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
2336 	 * So we need to translate here.
2337 	 */
2338 	ilg->ilg_fmode = (infmode == MCAST_INCLUDE) ?
2339 	    MODE_IS_INCLUDE : MODE_IS_EXCLUDE;
2340 
2341 	/*
2342 	 * Save copy of ilg's filter state to pass to other functions,
2343 	 * so we can release conn_lock now.
2344 	 */
2345 	new_fmode = ilg->ilg_fmode;
2346 	l_copy(ilg->ilg_filter, new_filter);
2347 
2348 	mutex_exit(&connp->conn_lock);
2349 
2350 	err = ip_addmulti(grp, ipif, ilgstat, new_fmode, new_filter);
2351 	if (err != 0) {
2352 		/*
2353 		 * Restore the original filter state, or delete the
2354 		 * newly-created ilg.  We need to look up the ilg
2355 		 * again, though, since we've not been holding the
2356 		 * conn_lock.
2357 		 */
2358 		mutex_enter(&connp->conn_lock);
2359 		ilg = ilg_lookup_ipif(connp, grp, ipif);
2360 		ASSERT(ilg != NULL);
2361 		if (ilgstat == ILGSTAT_NEW) {
2362 			ilg_delete(connp, ilg, NULL);
2363 		} else {
2364 			ilg->ilg_fmode = orig_fmode;
2365 			if (SLIST_IS_EMPTY(orig_filter)) {
2366 				CLEAR_SLIST(ilg->ilg_filter);
2367 			} else {
2368 				/*
2369 				 * We didn't free the filter, even if we
2370 				 * were trying to make the source list empty;
2371 				 * so if orig_filter isn't empty, the ilg
2372 				 * must still have a filter alloc'd.
2373 				 */
2374 				l_copy(orig_filter, ilg->ilg_filter);
2375 			}
2376 		}
2377 		mutex_exit(&connp->conn_lock);
2378 	}
2379 
2380 free_and_exit:
2381 	l_free(orig_filter);
2382 	l_free(new_filter);
2383 
2384 	return (err);
2385 }
2386 
2387 static int
2388 ip_set_srcfilter_v6(conn_t *connp, struct group_filter *gf,
2389     const struct in6_addr *grp, ill_t *ill)
2390 {
2391 	ilg_t *ilg;
2392 	int i, orig_ifindex, orig_fmode, new_fmode, err;
2393 	slist_t *orig_filter = NULL;
2394 	slist_t *new_filter = NULL;
2395 	struct sockaddr_storage *sl;
2396 	struct sockaddr_in6 *sin6;
2397 	boolean_t leave_grp;
2398 	ilg_stat_t ilgstat;
2399 
2400 	/* Make sure we can handle the source list */
2401 	if (gf->gf_numsrc > MAX_FILTER_SIZE)
2402 		return (ENOBUFS);
2403 
2404 	/*
2405 	 * setting the filter to (INCLUDE, NULL) is treated
2406 	 * as a request to leave the group.
2407 	 */
2408 	leave_grp = (gf->gf_fmode == MCAST_INCLUDE && gf->gf_numsrc == 0);
2409 
2410 	ASSERT(IAM_WRITER_ILL(ill));
2411 
2412 	/*
2413 	 * Use the ifindex to do the lookup.  We can't use the ill
2414 	 * directly because ilg_ill could point to a different ill
2415 	 * if things have moved.
2416 	 */
2417 	orig_ifindex = ill->ill_phyint->phyint_ifindex;
2418 
2419 	mutex_enter(&connp->conn_lock);
2420 	ilg = ilg_lookup_ill_index_v6(connp, grp, orig_ifindex);
2421 	if (ilg == NULL) {
2422 		/*
2423 		 * if the request was actually to leave, and we
2424 		 * didn't find an ilg, there's nothing to do.
2425 		 */
2426 		if (!leave_grp)
2427 			ilg = conn_ilg_alloc(connp);
2428 		if (leave_grp || ilg == NULL) {
2429 			mutex_exit(&connp->conn_lock);
2430 			return (leave_grp ? 0 : ENOMEM);
2431 		}
2432 		ilgstat = ILGSTAT_NEW;
2433 		ilg->ilg_v6group = *grp;
2434 		ilg->ilg_ipif = NULL;
2435 		/*
2436 		 * Choose our target ill to join on. This might be
2437 		 * different from the ill we've been given if it's
2438 		 * currently down and part of a group.
2439 		 *
2440 		 * new ill is not refheld; we are writer.
2441 		 */
2442 		ill = ip_choose_multi_ill(ill, grp);
2443 		ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED));
2444 		ilg->ilg_ill = ill;
2445 		/*
2446 		 * Remember the index that we joined on, so that we can
2447 		 * successfully delete them later on and also search for
2448 		 * duplicates if the application wants to join again.
2449 		 */
2450 		ilg->ilg_orig_ifindex = orig_ifindex;
2451 	} else if (leave_grp) {
2452 		/*
2453 		 * Use the ilg's current ill for the deletion,
2454 		 * we might have failed over.
2455 		 */
2456 		ill = ilg->ilg_ill;
2457 		ilg_delete(connp, ilg, NULL);
2458 		mutex_exit(&connp->conn_lock);
2459 		(void) ip_delmulti_v6(grp, ill, orig_ifindex,
2460 		    connp->conn_zoneid, B_FALSE, B_TRUE);
2461 		return (0);
2462 	} else {
2463 		ilgstat = ILGSTAT_CHANGE;
2464 		/*
2465 		 * The current ill might be different from the one we were
2466 		 * asked to join on (if failover has occurred); we should
2467 		 * join on the ill stored in the ilg.  The original ill
2468 		 * is noted in ilg_orig_ifindex, which matched our request.
2469 		 */
2470 		ill = ilg->ilg_ill;
2471 		/* preserve existing state in case ip_addmulti() fails */
2472 		orig_fmode = ilg->ilg_fmode;
2473 		if (ilg->ilg_filter == NULL) {
2474 			orig_filter = NULL;
2475 		} else {
2476 			orig_filter = l_alloc_copy(ilg->ilg_filter);
2477 			if (orig_filter == NULL) {
2478 				mutex_exit(&connp->conn_lock);
2479 				return (ENOMEM);
2480 			}
2481 		}
2482 	}
2483 
2484 	/*
2485 	 * Alloc buffer to copy new state into (see below) before
2486 	 * we make any changes, so we can bail if it fails.
2487 	 */
2488 	if ((new_filter = l_alloc()) == NULL) {
2489 		mutex_exit(&connp->conn_lock);
2490 		err = ENOMEM;
2491 		goto free_and_exit;
2492 	}
2493 
2494 	if (gf->gf_numsrc == 0) {
2495 		CLEAR_SLIST(ilg->ilg_filter);
2496 	} else {
2497 		slist_t *fp;
2498 		if (ilg->ilg_filter == NULL) {
2499 			fp = l_alloc();
2500 			if (fp == NULL) {
2501 				if (ilgstat == ILGSTAT_NEW)
2502 					ilg_delete(connp, ilg, NULL);
2503 				mutex_exit(&connp->conn_lock);
2504 				err = ENOMEM;
2505 				goto free_and_exit;
2506 			}
2507 		} else {
2508 			fp = ilg->ilg_filter;
2509 		}
2510 		for (i = 0, sl = gf->gf_slist; i < gf->gf_numsrc; i++, sl++) {
2511 			sin6 = (struct sockaddr_in6 *)sl;
2512 			fp->sl_addr[i] = sin6->sin6_addr;
2513 		}
2514 		fp->sl_numsrc = gf->gf_numsrc;
2515 		ilg->ilg_filter = fp;
2516 	}
2517 	/*
2518 	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
2519 	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
2520 	 * So we need to translate here.
2521 	 */
2522 	ilg->ilg_fmode = (gf->gf_fmode == MCAST_INCLUDE) ?
2523 	    MODE_IS_INCLUDE : MODE_IS_EXCLUDE;
2524 
2525 	/*
2526 	 * Save copy of ilg's filter state to pass to other functions,
2527 	 * so we can release conn_lock now.
2528 	 */
2529 	new_fmode = ilg->ilg_fmode;
2530 	l_copy(ilg->ilg_filter, new_filter);
2531 
2532 	mutex_exit(&connp->conn_lock);
2533 
2534 	err = ip_addmulti_v6(grp, ill, orig_ifindex, connp->conn_zoneid,
2535 	    ilgstat, new_fmode, new_filter);
2536 	if (err != 0) {
2537 		/*
2538 		 * Restore the original filter state, or delete the
2539 		 * newly-created ilg.  We need to look up the ilg
2540 		 * again, though, since we've not been holding the
2541 		 * conn_lock.
2542 		 */
2543 		mutex_enter(&connp->conn_lock);
2544 		ilg = ilg_lookup_ill_index_v6(connp, grp, orig_ifindex);
2545 		ASSERT(ilg != NULL);
2546 		if (ilgstat == ILGSTAT_NEW) {
2547 			ilg_delete(connp, ilg, NULL);
2548 		} else {
2549 			ilg->ilg_fmode = orig_fmode;
2550 			if (SLIST_IS_EMPTY(orig_filter)) {
2551 				CLEAR_SLIST(ilg->ilg_filter);
2552 			} else {
2553 				/*
2554 				 * We didn't free the filter, even if we
2555 				 * were trying to make the source list empty;
2556 				 * so if orig_filter isn't empty, the ilg
2557 				 * must still have a filter alloc'd.
2558 				 */
2559 				l_copy(orig_filter, ilg->ilg_filter);
2560 			}
2561 		}
2562 		mutex_exit(&connp->conn_lock);
2563 	}
2564 
2565 free_and_exit:
2566 	l_free(orig_filter);
2567 	l_free(new_filter);
2568 
2569 	return (err);
2570 }
2571 
2572 /*
2573  * Process the SIOC[GS]MSFILTER and SIOC[GS]IPMSFILTER ioctls.
2574  */
2575 /* ARGSUSED */
2576 int
2577 ip_sioctl_msfilter(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp,
2578     ip_ioctl_cmd_t *ipip, void *ifreq)
2579 {
2580 	struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
2581 	/* existence verified in ip_wput_nondata() */
2582 	mblk_t *data_mp = mp->b_cont->b_cont;
2583 	int datalen, err, cmd, minsize;
2584 	uint_t expsize = 0;
2585 	conn_t *connp;
2586 	boolean_t isv6, is_v4only_api, getcmd;
2587 	struct sockaddr_in *gsin;
2588 	struct sockaddr_in6 *gsin6;
2589 	ipaddr_t v4grp;
2590 	in6_addr_t v6grp;
2591 	struct group_filter *gf = NULL;
2592 	struct ip_msfilter *imsf = NULL;
2593 	mblk_t *ndp;
2594 
2595 	if (data_mp->b_cont != NULL) {
2596 		if ((ndp = msgpullup(data_mp, -1)) == NULL)
2597 			return (ENOMEM);
2598 		freemsg(data_mp);
2599 		data_mp = ndp;
2600 		mp->b_cont->b_cont = data_mp;
2601 	}
2602 
2603 	cmd = iocp->ioc_cmd;
2604 	getcmd = (cmd == SIOCGIPMSFILTER || cmd == SIOCGMSFILTER);
2605 	is_v4only_api = (cmd == SIOCGIPMSFILTER || cmd == SIOCSIPMSFILTER);
2606 	minsize = (is_v4only_api) ? IP_MSFILTER_SIZE(0) : GROUP_FILTER_SIZE(0);
2607 	datalen = MBLKL(data_mp);
2608 
2609 	if (datalen < minsize)
2610 		return (EINVAL);
2611 
2612 	/*
2613 	 * now we know we have at least have the initial structure,
2614 	 * but need to check for the source list array.
2615 	 */
2616 	if (is_v4only_api) {
2617 		imsf = (struct ip_msfilter *)data_mp->b_rptr;
2618 		isv6 = B_FALSE;
2619 		expsize = IP_MSFILTER_SIZE(imsf->imsf_numsrc);
2620 	} else {
2621 		gf = (struct group_filter *)data_mp->b_rptr;
2622 		if (gf->gf_group.ss_family == AF_INET6) {
2623 			gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2624 			isv6 = !(IN6_IS_ADDR_V4MAPPED(&gsin6->sin6_addr));
2625 		} else {
2626 			isv6 = B_FALSE;
2627 		}
2628 		expsize = GROUP_FILTER_SIZE(gf->gf_numsrc);
2629 	}
2630 	if (datalen < expsize)
2631 		return (EINVAL);
2632 
2633 	connp = Q_TO_CONN(q);
2634 
2635 	/* operation not supported on the virtual network interface */
2636 	if (IS_VNI(ipif->ipif_ill))
2637 		return (EINVAL);
2638 
2639 	if (isv6) {
2640 		ill_t *ill = ipif->ipif_ill;
2641 		ill_refhold(ill);
2642 
2643 		gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2644 		v6grp = gsin6->sin6_addr;
2645 		if (getcmd)
2646 			err = ip_get_srcfilter_v6(connp, gf, &v6grp, ill);
2647 		else
2648 			err = ip_set_srcfilter_v6(connp, gf, &v6grp, ill);
2649 
2650 		ill_refrele(ill);
2651 	} else {
2652 		boolean_t isv4mapped = B_FALSE;
2653 		if (is_v4only_api) {
2654 			v4grp = (ipaddr_t)imsf->imsf_multiaddr.s_addr;
2655 		} else {
2656 			if (gf->gf_group.ss_family == AF_INET) {
2657 				gsin = (struct sockaddr_in *)&gf->gf_group;
2658 				v4grp = (ipaddr_t)gsin->sin_addr.s_addr;
2659 			} else {
2660 				gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2661 				IN6_V4MAPPED_TO_IPADDR(&gsin6->sin6_addr,
2662 				    v4grp);
2663 				isv4mapped = B_TRUE;
2664 			}
2665 		}
2666 		if (getcmd)
2667 			err = ip_get_srcfilter(connp, gf, imsf, v4grp, ipif,
2668 			    isv4mapped);
2669 		else
2670 			err = ip_set_srcfilter(connp, gf, imsf, v4grp, ipif,
2671 			    isv4mapped);
2672 	}
2673 
2674 	return (err);
2675 }
2676 
2677 /*
2678  * Finds the ipif based on information in the ioctl headers.  Needed to make
2679  * ip_process_ioctl() happy (it needs to know the ipif for IPI_WR-flagged
2680  * ioctls prior to calling the ioctl's handler function).
2681  */
2682 int
2683 ip_extract_msfilter(queue_t *q, mblk_t *mp, const ip_ioctl_cmd_t *ipip,
2684     cmd_info_t *ci, ipsq_func_t func)
2685 {
2686 	int cmd = ipip->ipi_cmd;
2687 	int err = 0;
2688 	conn_t *connp;
2689 	ipif_t *ipif;
2690 	/* caller has verified this mblk exists */
2691 	char *dbuf = (char *)mp->b_cont->b_cont->b_rptr;
2692 	struct ip_msfilter *imsf;
2693 	struct group_filter *gf;
2694 	ipaddr_t v4addr, v4grp;
2695 	in6_addr_t v6grp;
2696 	uint32_t index;
2697 	zoneid_t zoneid;
2698 	ip_stack_t *ipst;
2699 
2700 	connp = Q_TO_CONN(q);
2701 	zoneid = connp->conn_zoneid;
2702 	ipst = connp->conn_netstack->netstack_ip;
2703 
2704 	/* don't allow multicast operations on a tcp conn */
2705 	if (IPCL_IS_TCP(connp))
2706 		return (ENOPROTOOPT);
2707 
2708 	if (cmd == SIOCSIPMSFILTER || cmd == SIOCGIPMSFILTER) {
2709 		/* don't allow v4-specific ioctls on v6 socket */
2710 		if (connp->conn_af_isv6)
2711 			return (EAFNOSUPPORT);
2712 
2713 		imsf = (struct ip_msfilter *)dbuf;
2714 		v4addr = imsf->imsf_interface.s_addr;
2715 		v4grp = imsf->imsf_multiaddr.s_addr;
2716 		if (v4addr == INADDR_ANY) {
2717 			ipif = ipif_lookup_group(v4grp, zoneid, ipst);
2718 			if (ipif == NULL)
2719 				err = EADDRNOTAVAIL;
2720 		} else {
2721 			ipif = ipif_lookup_addr(v4addr, NULL, zoneid, q, mp,
2722 			    func, &err, ipst);
2723 		}
2724 	} else {
2725 		boolean_t isv6 = B_FALSE;
2726 		gf = (struct group_filter *)dbuf;
2727 		index = gf->gf_interface;
2728 		if (gf->gf_group.ss_family == AF_INET6) {
2729 			struct sockaddr_in6 *sin6;
2730 			sin6 = (struct sockaddr_in6 *)&gf->gf_group;
2731 			v6grp = sin6->sin6_addr;
2732 			if (IN6_IS_ADDR_V4MAPPED(&v6grp))
2733 				IN6_V4MAPPED_TO_IPADDR(&v6grp, v4grp);
2734 			else
2735 				isv6 = B_TRUE;
2736 		} else if (gf->gf_group.ss_family == AF_INET) {
2737 			struct sockaddr_in *sin;
2738 			sin = (struct sockaddr_in *)&gf->gf_group;
2739 			v4grp = sin->sin_addr.s_addr;
2740 		} else {
2741 			return (EAFNOSUPPORT);
2742 		}
2743 		if (index == 0) {
2744 			if (isv6) {
2745 				ipif = ipif_lookup_group_v6(&v6grp, zoneid,
2746 				    ipst);
2747 			} else {
2748 				ipif = ipif_lookup_group(v4grp, zoneid, ipst);
2749 			}
2750 			if (ipif == NULL)
2751 				err = EADDRNOTAVAIL;
2752 		} else {
2753 			ipif = ipif_lookup_on_ifindex(index, isv6, zoneid,
2754 			    q, mp, func, &err, ipst);
2755 		}
2756 	}
2757 
2758 	ci->ci_ipif = ipif;
2759 	return (err);
2760 }
2761 
2762 /*
2763  * The structures used for the SIOC*MSFILTER ioctls usually must be copied
2764  * in in two stages, as the first copyin tells us the size of the attached
2765  * source buffer.  This function is called by ip_wput_nondata() after the
2766  * first copyin has completed; it figures out how big the second stage
2767  * needs to be, and kicks it off.
2768  *
2769  * In some cases (numsrc < 2), the second copyin is not needed as the
2770  * first one gets a complete structure containing 1 source addr.
2771  *
2772  * The function returns 0 if a second copyin has been started (i.e. there's
2773  * no more work to be done right now), or 1 if the second copyin is not
2774  * needed and ip_wput_nondata() can continue its processing.
2775  */
2776 int
2777 ip_copyin_msfilter(queue_t *q, mblk_t *mp)
2778 {
2779 	struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
2780 	int cmd = iocp->ioc_cmd;
2781 	/* validity of this checked in ip_wput_nondata() */
2782 	mblk_t *mp1 = mp->b_cont->b_cont;
2783 	int copysize = 0;
2784 	int offset;
2785 
2786 	if (cmd == SIOCSMSFILTER || cmd == SIOCGMSFILTER) {
2787 		struct group_filter *gf = (struct group_filter *)mp1->b_rptr;
2788 		if (gf->gf_numsrc >= 2) {
2789 			offset = sizeof (struct group_filter);
2790 			copysize = GROUP_FILTER_SIZE(gf->gf_numsrc) - offset;
2791 		}
2792 	} else {
2793 		struct ip_msfilter *imsf = (struct ip_msfilter *)mp1->b_rptr;
2794 		if (imsf->imsf_numsrc >= 2) {
2795 			offset = sizeof (struct ip_msfilter);
2796 			copysize = IP_MSFILTER_SIZE(imsf->imsf_numsrc) - offset;
2797 		}
2798 	}
2799 	if (copysize > 0) {
2800 		mi_copyin_n(q, mp, offset, copysize);
2801 		return (0);
2802 	}
2803 	return (1);
2804 }
2805 
2806 /*
2807  * Handle the following optmgmt:
2808  *	IP_ADD_MEMBERSHIP		must not have joined already
2809  *	MCAST_JOIN_GROUP		must not have joined already
2810  *	IP_BLOCK_SOURCE			must have joined already
2811  *	MCAST_BLOCK_SOURCE		must have joined already
2812  *	IP_JOIN_SOURCE_GROUP		may have joined already
2813  *	MCAST_JOIN_SOURCE_GROUP		may have joined already
2814  *
2815  * fmode and src parameters may be used to determine which option is
2816  * being set, as follows (the IP_* and MCAST_* versions of each option
2817  * are functionally equivalent):
2818  *	opt			fmode			src
2819  *	IP_ADD_MEMBERSHIP	MODE_IS_EXCLUDE		INADDR_ANY
2820  *	MCAST_JOIN_GROUP	MODE_IS_EXCLUDE		INADDR_ANY
2821  *	IP_BLOCK_SOURCE		MODE_IS_EXCLUDE		v4 addr
2822  *	MCAST_BLOCK_SOURCE	MODE_IS_EXCLUDE		v4 addr
2823  *	IP_JOIN_SOURCE_GROUP	MODE_IS_INCLUDE		v4 addr
2824  *	MCAST_JOIN_SOURCE_GROUP	MODE_IS_INCLUDE		v4 addr
2825  *
2826  * Changing the filter mode is not allowed; if a matching ilg already
2827  * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
2828  *
2829  * Verifies that there is a source address of appropriate scope for
2830  * the group; if not, EADDRNOTAVAIL is returned.
2831  *
2832  * The interface to be used may be identified by an address or by an
2833  * index.  A pointer to the index is passed; if it is NULL, use the
2834  * address, otherwise, use the index.
2835  */
2836 int
2837 ip_opt_add_group(conn_t *connp, boolean_t checkonly, ipaddr_t group,
2838     ipaddr_t ifaddr, uint_t *ifindexp, mcast_record_t fmode, ipaddr_t src,
2839     mblk_t *first_mp)
2840 {
2841 	ipif_t	*ipif;
2842 	ipsq_t	*ipsq;
2843 	int err = 0;
2844 	ill_t	*ill;
2845 
2846 	err = ip_opt_check(connp, group, src, ifaddr, ifindexp, first_mp,
2847 	    ip_restart_optmgmt, &ipif);
2848 	if (err != 0) {
2849 		if (err != EINPROGRESS) {
2850 			ip1dbg(("ip_opt_add_group: no ipif for group 0x%x, "
2851 			    "ifaddr 0x%x, ifindex %d\n", ntohl(group),
2852 			    ntohl(ifaddr), (ifindexp == NULL) ? 0 : *ifindexp));
2853 		}
2854 		return (err);
2855 	}
2856 	ASSERT(ipif != NULL);
2857 
2858 	ill = ipif->ipif_ill;
2859 	/* Operation not supported on a virtual network interface */
2860 	if (IS_VNI(ill)) {
2861 		ipif_refrele(ipif);
2862 		return (EINVAL);
2863 	}
2864 
2865 	if (checkonly) {
2866 		/*
2867 		 * do not do operation, just pretend to - new T_CHECK
2868 		 * semantics. The error return case above if encountered
2869 		 * considered a good enough "check" here.
2870 		 */
2871 		ipif_refrele(ipif);
2872 		return (0);
2873 	}
2874 
2875 	IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, ipsq,
2876 	    NEW_OP);
2877 
2878 	/* unspecified source addr => no source filtering */
2879 	err = ilg_add(connp, group, ipif, fmode, src);
2880 
2881 	IPSQ_EXIT(ipsq);
2882 
2883 	ipif_refrele(ipif);
2884 	return (err);
2885 }
2886 
2887 /*
2888  * Handle the following optmgmt:
2889  *	IPV6_JOIN_GROUP			must not have joined already
2890  *	MCAST_JOIN_GROUP		must not have joined already
2891  *	MCAST_BLOCK_SOURCE		must have joined already
2892  *	MCAST_JOIN_SOURCE_GROUP		may have joined already
2893  *
2894  * fmode and src parameters may be used to determine which option is
2895  * being set, as follows (IPV6_JOIN_GROUP and MCAST_JOIN_GROUP options
2896  * are functionally equivalent):
2897  *	opt			fmode			v6src
2898  *	IPV6_JOIN_GROUP		MODE_IS_EXCLUDE		unspecified
2899  *	MCAST_JOIN_GROUP	MODE_IS_EXCLUDE		unspecified
2900  *	MCAST_BLOCK_SOURCE	MODE_IS_EXCLUDE		v6 addr
2901  *	MCAST_JOIN_SOURCE_GROUP	MODE_IS_INCLUDE		v6 addr
2902  *
2903  * Changing the filter mode is not allowed; if a matching ilg already
2904  * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
2905  *
2906  * Verifies that there is a source address of appropriate scope for
2907  * the group; if not, EADDRNOTAVAIL is returned.
2908  *
2909  * Handles IPv4-mapped IPv6 multicast addresses by associating them
2910  * with the link-local ipif.  Assumes that if v6group is v4-mapped,
2911  * v6src is also v4-mapped.
2912  */
2913 int
2914 ip_opt_add_group_v6(conn_t *connp, boolean_t checkonly,
2915     const in6_addr_t *v6group, int ifindex, mcast_record_t fmode,
2916     const in6_addr_t *v6src, mblk_t *first_mp)
2917 {
2918 	ill_t *ill;
2919 	ipif_t	*ipif;
2920 	char buf[INET6_ADDRSTRLEN];
2921 	ipaddr_t v4group, v4src;
2922 	boolean_t isv6;
2923 	ipsq_t	*ipsq;
2924 	int	err;
2925 
2926 	err = ip_opt_check_v6(connp, v6group, &v4group, v6src, &v4src, &isv6,
2927 	    ifindex, first_mp, ip_restart_optmgmt, &ill, &ipif);
2928 	if (err != 0) {
2929 		if (err != EINPROGRESS) {
2930 			ip1dbg(("ip_opt_add_group_v6: no ill for group %s/"
2931 			    "index %d\n", inet_ntop(AF_INET6, v6group, buf,
2932 			    sizeof (buf)), ifindex));
2933 		}
2934 		return (err);
2935 	}
2936 	ASSERT((!isv6 && ipif != NULL) || (isv6 && ill != NULL));
2937 
2938 	/* operation is not supported on the virtual network interface */
2939 	if (isv6) {
2940 		if (IS_VNI(ill)) {
2941 			ill_refrele(ill);
2942 			return (EINVAL);
2943 		}
2944 	} else {
2945 		if (IS_VNI(ipif->ipif_ill)) {
2946 			ipif_refrele(ipif);
2947 			return (EINVAL);
2948 		}
2949 	}
2950 
2951 	if (checkonly) {
2952 		/*
2953 		 * do not do operation, just pretend to - new T_CHECK
2954 		 * semantics. The error return case above if encountered
2955 		 * considered a good enough "check" here.
2956 		 */
2957 		if (isv6)
2958 			ill_refrele(ill);
2959 		else
2960 			ipif_refrele(ipif);
2961 		return (0);
2962 	}
2963 
2964 	if (!isv6) {
2965 		IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt,
2966 		    ipsq, NEW_OP);
2967 		err = ilg_add(connp, v4group, ipif, fmode, v4src);
2968 		IPSQ_EXIT(ipsq);
2969 		ipif_refrele(ipif);
2970 	} else {
2971 		IPSQ_ENTER_ILL(ill, connp, first_mp, ip_restart_optmgmt,
2972 		    ipsq, NEW_OP);
2973 		err = ilg_add_v6(connp, v6group, ill, fmode, v6src);
2974 		IPSQ_EXIT(ipsq);
2975 		ill_refrele(ill);
2976 	}
2977 
2978 	return (err);
2979 }
2980 
2981 static int
2982 ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group, ipif_t *ipif,
2983     mcast_record_t fmode, ipaddr_t src)
2984 {
2985 	ilg_t	*ilg;
2986 	in6_addr_t v6src;
2987 	boolean_t leaving = B_FALSE;
2988 
2989 	ASSERT(IAM_WRITER_IPIF(ipif));
2990 
2991 	/*
2992 	 * The ilg is valid only while we hold the conn lock. Once we drop
2993 	 * the lock, another thread can locate another ilg on this connp,
2994 	 * but on a different ipif, and delete it, and cause the ilg array
2995 	 * to be reallocated and copied. Hence do the ilg_delete before
2996 	 * dropping the lock.
2997 	 */
2998 	mutex_enter(&connp->conn_lock);
2999 	ilg = ilg_lookup_ipif(connp, group, ipif);
3000 	if ((ilg == NULL) || (ilg->ilg_flags & ILG_DELETED)) {
3001 		mutex_exit(&connp->conn_lock);
3002 		return (EADDRNOTAVAIL);
3003 	}
3004 
3005 	/*
3006 	 * Decide if we're actually deleting the ilg or just removing a
3007 	 * source filter address; if just removing an addr, make sure we
3008 	 * aren't trying to change the filter mode, and that the addr is
3009 	 * actually in our filter list already.  If we're removing the
3010 	 * last src in an include list, just delete the ilg.
3011 	 */
3012 	if (src == INADDR_ANY) {
3013 		v6src = ipv6_all_zeros;
3014 		leaving = B_TRUE;
3015 	} else {
3016 		int err = 0;
3017 		IN6_IPADDR_TO_V4MAPPED(src, &v6src);
3018 		if (fmode != ilg->ilg_fmode)
3019 			err = EINVAL;
3020 		else if (ilg->ilg_filter == NULL ||
3021 		    !list_has_addr(ilg->ilg_filter, &v6src))
3022 			err = EADDRNOTAVAIL;
3023 		if (err != 0) {
3024 			mutex_exit(&connp->conn_lock);
3025 			return (err);
3026 		}
3027 		if (fmode == MODE_IS_INCLUDE &&
3028 		    ilg->ilg_filter->sl_numsrc == 1) {
3029 			v6src = ipv6_all_zeros;
3030 			leaving = B_TRUE;
3031 		}
3032 	}
3033 
3034 	ilg_delete(connp, ilg, &v6src);
3035 	mutex_exit(&connp->conn_lock);
3036 
3037 	(void) ip_delmulti(group, ipif, B_FALSE, leaving);
3038 	return (0);
3039 }
3040 
3041 static int
3042 ip_opt_delete_group_excl_v6(conn_t *connp, const in6_addr_t *v6group,
3043     ill_t *ill, mcast_record_t fmode, const in6_addr_t *v6src)
3044 {
3045 	ilg_t	*ilg;
3046 	ill_t	*ilg_ill;
3047 	uint_t	ilg_orig_ifindex;
3048 	boolean_t leaving = B_TRUE;
3049 
3050 	ASSERT(IAM_WRITER_ILL(ill));
3051 
3052 	/*
3053 	 * Use the index that we originally used to join. We can't
3054 	 * use the ill directly because ilg_ill could point to
3055 	 * a new ill if things have moved.
3056 	 */
3057 	mutex_enter(&connp->conn_lock);
3058 	ilg = ilg_lookup_ill_index_v6(connp, v6group,
3059 	    ill->ill_phyint->phyint_ifindex);
3060 	if ((ilg == NULL) || (ilg->ilg_flags & ILG_DELETED)) {
3061 		mutex_exit(&connp->conn_lock);
3062 		return (EADDRNOTAVAIL);
3063 	}
3064 
3065 	/*
3066 	 * Decide if we're actually deleting the ilg or just removing a
3067 	 * source filter address; if just removing an addr, make sure we
3068 	 * aren't trying to change the filter mode, and that the addr is
3069 	 * actually in our filter list already.  If we're removing the
3070 	 * last src in an include list, just delete the ilg.
3071 	 */
3072 	if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) {
3073 		int err = 0;
3074 		if (fmode != ilg->ilg_fmode)
3075 			err = EINVAL;
3076 		else if (ilg->ilg_filter == NULL ||
3077 		    !list_has_addr(ilg->ilg_filter, v6src))
3078 			err = EADDRNOTAVAIL;
3079 		if (err != 0) {
3080 			mutex_exit(&connp->conn_lock);
3081 			return (err);
3082 		}
3083 		if (fmode == MODE_IS_INCLUDE &&
3084 		    ilg->ilg_filter->sl_numsrc == 1)
3085 			v6src = NULL;
3086 		else
3087 			leaving = B_FALSE;
3088 	}
3089 
3090 	ilg_ill = ilg->ilg_ill;
3091 	ilg_orig_ifindex = ilg->ilg_orig_ifindex;
3092 	ilg_delete(connp, ilg, v6src);
3093 	mutex_exit(&connp->conn_lock);
3094 	(void) ip_delmulti_v6(v6group, ilg_ill, ilg_orig_ifindex,
3095 	    connp->conn_zoneid, B_FALSE, leaving);
3096 
3097 	return (0);
3098 }
3099 
3100 /*
3101  * Handle the following optmgmt:
3102  *	IP_DROP_MEMBERSHIP		will leave
3103  *	MCAST_LEAVE_GROUP		will leave
3104  *	IP_UNBLOCK_SOURCE		will not leave
3105  *	MCAST_UNBLOCK_SOURCE		will not leave
3106  *	IP_LEAVE_SOURCE_GROUP		may leave (if leaving last source)
3107  *	MCAST_LEAVE_SOURCE_GROUP	may leave (if leaving last source)
3108  *
3109  * fmode and src parameters may be used to determine which option is
3110  * being set, as follows (the IP_* and MCAST_* versions of each option
3111  * are functionally equivalent):
3112  *	opt			 fmode			src
3113  *	IP_DROP_MEMBERSHIP	 MODE_IS_INCLUDE	INADDR_ANY
3114  *	MCAST_LEAVE_GROUP	 MODE_IS_INCLUDE	INADDR_ANY
3115  *	IP_UNBLOCK_SOURCE	 MODE_IS_EXCLUDE	v4 addr
3116  *	MCAST_UNBLOCK_SOURCE	 MODE_IS_EXCLUDE	v4 addr
3117  *	IP_LEAVE_SOURCE_GROUP	 MODE_IS_INCLUDE	v4 addr
3118  *	MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE	v4 addr
3119  *
3120  * Changing the filter mode is not allowed; if a matching ilg already
3121  * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
3122  *
3123  * The interface to be used may be identified by an address or by an
3124  * index.  A pointer to the index is passed; if it is NULL, use the
3125  * address, otherwise, use the index.
3126  */
3127 int
3128 ip_opt_delete_group(conn_t *connp, boolean_t checkonly, ipaddr_t group,
3129     ipaddr_t ifaddr, uint_t *ifindexp, mcast_record_t fmode, ipaddr_t src,
3130     mblk_t *first_mp)
3131 {
3132 	ipif_t	*ipif;
3133 	ipsq_t	*ipsq;
3134 	int	err;
3135 	ill_t	*ill;
3136 
3137 	err = ip_opt_check(connp, group, src, ifaddr, ifindexp, first_mp,
3138 	    ip_restart_optmgmt, &ipif);
3139 	if (err != 0) {
3140 		if (err != EINPROGRESS) {
3141 			ip1dbg(("ip_opt_delete_group: no ipif for group "
3142 			    "0x%x, ifaddr 0x%x\n",
3143 			    (int)ntohl(group), (int)ntohl(ifaddr)));
3144 		}
3145 		return (err);
3146 	}
3147 	ASSERT(ipif != NULL);
3148 
3149 	ill = ipif->ipif_ill;
3150 	/* Operation not supported on a virtual network interface */
3151 	if (IS_VNI(ill)) {
3152 		ipif_refrele(ipif);
3153 		return (EINVAL);
3154 	}
3155 
3156 	if (checkonly) {
3157 		/*
3158 		 * do not do operation, just pretend to - new T_CHECK
3159 		 * semantics. The error return case above if encountered
3160 		 * considered a good enough "check" here.
3161 		 */
3162 		ipif_refrele(ipif);
3163 		return (0);
3164 	}
3165 
3166 	IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, ipsq,
3167 	    NEW_OP);
3168 	err = ip_opt_delete_group_excl(connp, group, ipif, fmode, src);
3169 	IPSQ_EXIT(ipsq);
3170 
3171 	ipif_refrele(ipif);
3172 	return (err);
3173 }
3174 
3175 /*
3176  * Handle the following optmgmt:
3177  *	IPV6_LEAVE_GROUP		will leave
3178  *	MCAST_LEAVE_GROUP		will leave
3179  *	MCAST_UNBLOCK_SOURCE		will not leave
3180  *	MCAST_LEAVE_SOURCE_GROUP	may leave (if leaving last source)
3181  *
3182  * fmode and src parameters may be used to determine which option is
3183  * being set, as follows (IPV6_LEAVE_GROUP and MCAST_LEAVE_GROUP options
3184  * are functionally equivalent):
3185  *	opt			 fmode			v6src
3186  *	IPV6_LEAVE_GROUP	 MODE_IS_INCLUDE	unspecified
3187  *	MCAST_LEAVE_GROUP	 MODE_IS_INCLUDE	unspecified
3188  *	MCAST_UNBLOCK_SOURCE	 MODE_IS_EXCLUDE	v6 addr
3189  *	MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE	v6 addr
3190  *
3191  * Changing the filter mode is not allowed; if a matching ilg already
3192  * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
3193  *
3194  * Handles IPv4-mapped IPv6 multicast addresses by associating them
3195  * with the link-local ipif.  Assumes that if v6group is v4-mapped,
3196  * v6src is also v4-mapped.
3197  */
3198 int
3199 ip_opt_delete_group_v6(conn_t *connp, boolean_t checkonly,
3200     const in6_addr_t *v6group, int ifindex, mcast_record_t fmode,
3201     const in6_addr_t *v6src, mblk_t *first_mp)
3202 {
3203 	ill_t *ill;
3204 	ipif_t	*ipif;
3205 	char	buf[INET6_ADDRSTRLEN];
3206 	ipaddr_t v4group, v4src;
3207 	boolean_t isv6;
3208 	ipsq_t	*ipsq;
3209 	int	err;
3210 
3211 	err = ip_opt_check_v6(connp, v6group, &v4group, v6src, &v4src, &isv6,
3212 	    ifindex, first_mp, ip_restart_optmgmt, &ill, &ipif);
3213 	if (err != 0) {
3214 		if (err != EINPROGRESS) {
3215 			ip1dbg(("ip_opt_delete_group_v6: no ill for group %s/"
3216 			    "index %d\n", inet_ntop(AF_INET6, v6group, buf,
3217 			    sizeof (buf)), ifindex));
3218 		}
3219 		return (err);
3220 	}
3221 	ASSERT((isv6 && ill != NULL) || (!isv6 && ipif != NULL));
3222 
3223 	/* operation is not supported on the virtual network interface */
3224 	if (isv6) {
3225 		if (IS_VNI(ill)) {
3226 			ill_refrele(ill);
3227 			return (EINVAL);
3228 		}
3229 	} else {
3230 		if (IS_VNI(ipif->ipif_ill)) {
3231 			ipif_refrele(ipif);
3232 			return (EINVAL);
3233 		}
3234 	}
3235 
3236 	if (checkonly) {
3237 		/*
3238 		 * do not do operation, just pretend to - new T_CHECK
3239 		 * semantics. The error return case above if encountered
3240 		 * considered a good enough "check" here.
3241 		 */
3242 		if (isv6)
3243 			ill_refrele(ill);
3244 		else
3245 			ipif_refrele(ipif);
3246 		return (0);
3247 	}
3248 
3249 	if (!isv6) {
3250 		IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt,
3251 		    ipsq, NEW_OP);
3252 		err = ip_opt_delete_group_excl(connp, v4group, ipif, fmode,
3253 		    v4src);
3254 		IPSQ_EXIT(ipsq);
3255 		ipif_refrele(ipif);
3256 	} else {
3257 		IPSQ_ENTER_ILL(ill, connp, first_mp, ip_restart_optmgmt,
3258 		    ipsq, NEW_OP);
3259 		err = ip_opt_delete_group_excl_v6(connp, v6group, ill, fmode,
3260 		    v6src);
3261 		IPSQ_EXIT(ipsq);
3262 		ill_refrele(ill);
3263 	}
3264 
3265 	return (err);
3266 }
3267 
3268 /*
3269  * Group mgmt for upper conn that passes things down
3270  * to the interface multicast list (and DLPI)
3271  * These routines can handle new style options that specify an interface name
3272  * as opposed to an interface address (needed for general handling of
3273  * unnumbered interfaces.)
3274  */
3275 
3276 /*
3277  * Add a group to an upper conn group data structure and pass things down
3278  * to the interface multicast list (and DLPI)
3279  */
3280 static int
3281 ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif, mcast_record_t fmode,
3282     ipaddr_t src)
3283 {
3284 	int	error = 0;
3285 	ill_t	*ill;
3286 	ilg_t	*ilg;
3287 	ilg_stat_t ilgstat;
3288 	slist_t	*new_filter = NULL;
3289 	int	new_fmode;
3290 
3291 	ASSERT(IAM_WRITER_IPIF(ipif));
3292 
3293 	ill = ipif->ipif_ill;
3294 
3295 	if (!(ill->ill_flags & ILLF_MULTICAST))
3296 		return (EADDRNOTAVAIL);
3297 
3298 	/*
3299 	 * conn_ilg[] is protected by conn_lock. Need to hold the conn_lock
3300 	 * to walk the conn_ilg[] list in ilg_lookup_ipif(); also needed to
3301 	 * serialize 2 threads doing join (sock, group1, hme0:0) and
3302 	 * (sock, group2, hme1:0) where hme0 and hme1 map to different ipsqs,
3303 	 * but both operations happen on the same conn.
3304 	 */
3305 	mutex_enter(&connp->conn_lock);
3306 	ilg = ilg_lookup_ipif(connp, group, ipif);
3307 
3308 	/*
3309 	 * Depending on the option we're handling, may or may not be okay
3310 	 * if group has already been added.  Figure out our rules based
3311 	 * on fmode and src params.  Also make sure there's enough room
3312 	 * in the filter if we're adding a source to an existing filter.
3313 	 */
3314 	if (src == INADDR_ANY) {
3315 		/* we're joining for all sources, must not have joined */
3316 		if (ilg != NULL)
3317 			error = EADDRINUSE;
3318 	} else {
3319 		if (fmode == MODE_IS_EXCLUDE) {
3320 			/* (excl {addr}) => block source, must have joined */
3321 			if (ilg == NULL)
3322 				error = EADDRNOTAVAIL;
3323 		}
3324 		/* (incl {addr}) => join source, may have joined */
3325 
3326 		if (ilg != NULL &&
3327 		    SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE)
3328 			error = ENOBUFS;
3329 	}
3330 	if (error != 0) {
3331 		mutex_exit(&connp->conn_lock);
3332 		return (error);
3333 	}
3334 
3335 	ASSERT(!(ipif->ipif_state_flags & IPIF_CONDEMNED));
3336 
3337 	/*
3338 	 * Alloc buffer to copy new state into (see below) before
3339 	 * we make any changes, so we can bail if it fails.
3340 	 */
3341 	if ((new_filter = l_alloc()) == NULL) {
3342 		mutex_exit(&connp->conn_lock);
3343 		return (ENOMEM);
3344 	}
3345 
3346 	if (ilg == NULL) {
3347 		ilgstat = ILGSTAT_NEW;
3348 		if ((ilg = conn_ilg_alloc(connp)) == NULL) {
3349 			mutex_exit(&connp->conn_lock);
3350 			l_free(new_filter);
3351 			return (ENOMEM);
3352 		}
3353 		if (src != INADDR_ANY) {
3354 			ilg->ilg_filter = l_alloc();
3355 			if (ilg->ilg_filter == NULL) {
3356 				ilg_delete(connp, ilg, NULL);
3357 				mutex_exit(&connp->conn_lock);
3358 				l_free(new_filter);
3359 				return (ENOMEM);
3360 			}
3361 			ilg->ilg_filter->sl_numsrc = 1;
3362 			IN6_IPADDR_TO_V4MAPPED(src,
3363 			    &ilg->ilg_filter->sl_addr[0]);
3364 		}
3365 		if (group == INADDR_ANY) {
3366 			ilg->ilg_v6group = ipv6_all_zeros;
3367 		} else {
3368 			IN6_IPADDR_TO_V4MAPPED(group, &ilg->ilg_v6group);
3369 		}
3370 		ilg->ilg_ipif = ipif;
3371 		ilg->ilg_ill = NULL;
3372 		ilg->ilg_orig_ifindex = 0;
3373 		ilg->ilg_fmode = fmode;
3374 	} else {
3375 		int index;
3376 		in6_addr_t v6src;
3377 		ilgstat = ILGSTAT_CHANGE;
3378 		if (ilg->ilg_fmode != fmode || src == INADDR_ANY) {
3379 			mutex_exit(&connp->conn_lock);
3380 			l_free(new_filter);
3381 			return (EINVAL);
3382 		}
3383 		if (ilg->ilg_filter == NULL) {
3384 			ilg->ilg_filter = l_alloc();
3385 			if (ilg->ilg_filter == NULL) {
3386 				mutex_exit(&connp->conn_lock);
3387 				l_free(new_filter);
3388 				return (ENOMEM);
3389 			}
3390 		}
3391 		IN6_IPADDR_TO_V4MAPPED(src, &v6src);
3392 		if (list_has_addr(ilg->ilg_filter, &v6src)) {
3393 			mutex_exit(&connp->conn_lock);
3394 			l_free(new_filter);
3395 			return (EADDRNOTAVAIL);
3396 		}
3397 		index = ilg->ilg_filter->sl_numsrc++;
3398 		ilg->ilg_filter->sl_addr[index] = v6src;
3399 	}
3400 
3401 	/*
3402 	 * Save copy of ilg's filter state to pass to other functions,
3403 	 * so we can release conn_lock now.
3404 	 */
3405 	new_fmode = ilg->ilg_fmode;
3406 	l_copy(ilg->ilg_filter, new_filter);
3407 
3408 	mutex_exit(&connp->conn_lock);
3409 
3410 	error = ip_addmulti(group, ipif, ilgstat, new_fmode, new_filter);
3411 	if (error != 0) {
3412 		/*
3413 		 * Need to undo what we did before calling ip_addmulti()!
3414 		 * Must look up the ilg again since we've not been holding
3415 		 * conn_lock.
3416 		 */
3417 		in6_addr_t v6src;
3418 		if (ilgstat == ILGSTAT_NEW)
3419 			v6src = ipv6_all_zeros;
3420 		else
3421 			IN6_IPADDR_TO_V4MAPPED(src, &v6src);
3422 		mutex_enter(&connp->conn_lock);
3423 		ilg = ilg_lookup_ipif(connp, group, ipif);
3424 		ASSERT(ilg != NULL);
3425 		ilg_delete(connp, ilg, &v6src);
3426 		mutex_exit(&connp->conn_lock);
3427 		l_free(new_filter);
3428 		return (error);
3429 	}
3430 
3431 	l_free(new_filter);
3432 	return (0);
3433 }
3434 
3435 static int
3436 ilg_add_v6(conn_t *connp, const in6_addr_t *v6group, ill_t *ill,
3437     mcast_record_t fmode, const in6_addr_t *v6src)
3438 {
3439 	int	error = 0;
3440 	int	orig_ifindex;
3441 	ilg_t	*ilg;
3442 	ilg_stat_t ilgstat;
3443 	slist_t	*new_filter = NULL;
3444 	int	new_fmode;
3445 
3446 	ASSERT(IAM_WRITER_ILL(ill));
3447 
3448 	if (!(ill->ill_flags & ILLF_MULTICAST))
3449 		return (EADDRNOTAVAIL);
3450 
3451 	/*
3452 	 * conn_lock protects the ilg list.  Serializes 2 threads doing
3453 	 * join (sock, group1, hme0) and (sock, group2, hme1) where hme0
3454 	 * and hme1 map to different ipsq's, but both operations happen
3455 	 * on the same conn.
3456 	 */
3457 	mutex_enter(&connp->conn_lock);
3458 
3459 	/*
3460 	 * Use the ifindex to do the lookup. We can't use the ill
3461 	 * directly because ilg_ill could point to a different ill if
3462 	 * things have moved.
3463 	 */
3464 	orig_ifindex = ill->ill_phyint->phyint_ifindex;
3465 	ilg = ilg_lookup_ill_index_v6(connp, v6group, orig_ifindex);
3466 
3467 	/*
3468 	 * Depending on the option we're handling, may or may not be okay
3469 	 * if group has already been added.  Figure out our rules based
3470 	 * on fmode and src params.  Also make sure there's enough room
3471 	 * in the filter if we're adding a source to an existing filter.
3472 	 */
3473 	if (IN6_IS_ADDR_UNSPECIFIED(v6src)) {
3474 		/* we're joining for all sources, must not have joined */
3475 		if (ilg != NULL)
3476 			error = EADDRINUSE;
3477 	} else {
3478 		if (fmode == MODE_IS_EXCLUDE) {
3479 			/* (excl {addr}) => block source, must have joined */
3480 			if (ilg == NULL)
3481 				error = EADDRNOTAVAIL;
3482 		}
3483 		/* (incl {addr}) => join source, may have joined */
3484 
3485 		if (ilg != NULL &&
3486 		    SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE)
3487 			error = ENOBUFS;
3488 	}
3489 	if (error != 0) {
3490 		mutex_exit(&connp->conn_lock);
3491 		return (error);
3492 	}
3493 
3494 	/*
3495 	 * Alloc buffer to copy new state into (see below) before
3496 	 * we make any changes, so we can bail if it fails.
3497 	 */
3498 	if ((new_filter = l_alloc()) == NULL) {
3499 		mutex_exit(&connp->conn_lock);
3500 		return (ENOMEM);
3501 	}
3502 
3503 	if (ilg == NULL) {
3504 		if ((ilg = conn_ilg_alloc(connp)) == NULL) {
3505 			mutex_exit(&connp->conn_lock);
3506 			l_free(new_filter);
3507 			return (ENOMEM);
3508 		}
3509 		if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) {
3510 			ilg->ilg_filter = l_alloc();
3511 			if (ilg->ilg_filter == NULL) {
3512 				ilg_delete(connp, ilg, NULL);
3513 				mutex_exit(&connp->conn_lock);
3514 				l_free(new_filter);
3515 				return (ENOMEM);
3516 			}
3517 			ilg->ilg_filter->sl_numsrc = 1;
3518 			ilg->ilg_filter->sl_addr[0] = *v6src;
3519 		}
3520 		ilgstat = ILGSTAT_NEW;
3521 		ilg->ilg_v6group = *v6group;
3522 		ilg->ilg_fmode = fmode;
3523 		ilg->ilg_ipif = NULL;
3524 		/*
3525 		 * Choose our target ill to join on. This might be different
3526 		 * from the ill we've been given if it's currently down and
3527 		 * part of a group.
3528 		 *
3529 		 * new ill is not refheld; we are writer.
3530 		 */
3531 		ill = ip_choose_multi_ill(ill, v6group);
3532 		ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED));
3533 		ilg->ilg_ill = ill;
3534 		/*
3535 		 * Remember the orig_ifindex that we joined on, so that we
3536 		 * can successfully delete them later on and also search
3537 		 * for duplicates if the application wants to join again.
3538 		 */
3539 		ilg->ilg_orig_ifindex = orig_ifindex;
3540 	} else {
3541 		int index;
3542 		if (ilg->ilg_fmode != fmode || IN6_IS_ADDR_UNSPECIFIED(v6src)) {
3543 			mutex_exit(&connp->conn_lock);
3544 			l_free(new_filter);
3545 			return (EINVAL);
3546 		}
3547 		if (ilg->ilg_filter == NULL) {
3548 			ilg->ilg_filter = l_alloc();
3549 			if (ilg->ilg_filter == NULL) {
3550 				mutex_exit(&connp->conn_lock);
3551 				l_free(new_filter);
3552 				return (ENOMEM);
3553 			}
3554 		}
3555 		if (list_has_addr(ilg->ilg_filter, v6src)) {
3556 			mutex_exit(&connp->conn_lock);
3557 			l_free(new_filter);
3558 			return (EADDRNOTAVAIL);
3559 		}
3560 		ilgstat = ILGSTAT_CHANGE;
3561 		index = ilg->ilg_filter->sl_numsrc++;
3562 		ilg->ilg_filter->sl_addr[index] = *v6src;
3563 		/*
3564 		 * The current ill might be different from the one we were
3565 		 * asked to join on (if failover has occurred); we should
3566 		 * join on the ill stored in the ilg.  The original ill
3567 		 * is noted in ilg_orig_ifindex, which matched our request.
3568 		 */
3569 		ill = ilg->ilg_ill;
3570 	}
3571 
3572 	/*
3573 	 * Save copy of ilg's filter state to pass to other functions,
3574 	 * so we can release conn_lock now.
3575 	 */
3576 	new_fmode = ilg->ilg_fmode;
3577 	l_copy(ilg->ilg_filter, new_filter);
3578 
3579 	mutex_exit(&connp->conn_lock);
3580 
3581 	/*
3582 	 * Now update the ill. We wait to do this until after the ilg
3583 	 * has been updated because we need to update the src filter
3584 	 * info for the ill, which involves looking at the status of
3585 	 * all the ilgs associated with this group/interface pair.
3586 	 */
3587 	error = ip_addmulti_v6(v6group, ill, orig_ifindex, connp->conn_zoneid,
3588 	    ilgstat, new_fmode, new_filter);
3589 	if (error != 0) {
3590 		/*
3591 		 * But because we waited, we have to undo the ilg update
3592 		 * if ip_addmulti_v6() fails.  We also must lookup ilg
3593 		 * again, since we've not been holding conn_lock.
3594 		 */
3595 		in6_addr_t delsrc =
3596 		    (ilgstat == ILGSTAT_NEW) ? ipv6_all_zeros : *v6src;
3597 		mutex_enter(&connp->conn_lock);
3598 		ilg = ilg_lookup_ill_index_v6(connp, v6group, orig_ifindex);
3599 		ASSERT(ilg != NULL);
3600 		ilg_delete(connp, ilg, &delsrc);
3601 		mutex_exit(&connp->conn_lock);
3602 		l_free(new_filter);
3603 		return (error);
3604 	}
3605 
3606 	l_free(new_filter);
3607 
3608 	return (0);
3609 }
3610 
3611 /*
3612  * Find an IPv4 ilg matching group, ill and source
3613  */
3614 ilg_t *
3615 ilg_lookup_ill_withsrc(conn_t *connp, ipaddr_t group, ipaddr_t src, ill_t *ill)
3616 {
3617 	in6_addr_t v6group, v6src;
3618 	int i;
3619 	boolean_t isinlist;
3620 	ilg_t *ilg;
3621 	ipif_t *ipif;
3622 	ill_t *ilg_ill;
3623 
3624 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3625 
3626 	/*
3627 	 * INADDR_ANY is represented as the IPv6 unspecified addr.
3628 	 */
3629 	if (group == INADDR_ANY)
3630 		v6group = ipv6_all_zeros;
3631 	else
3632 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
3633 
3634 	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3635 		ilg = &connp->conn_ilg[i];
3636 		if ((ipif = ilg->ilg_ipif) == NULL ||
3637 		    (ilg->ilg_flags & ILG_DELETED) != 0)
3638 			continue;
3639 		ASSERT(ilg->ilg_ill == NULL);
3640 		ilg_ill = ipif->ipif_ill;
3641 		ASSERT(!ilg_ill->ill_isv6);
3642 		if (ilg_ill == ill &&
3643 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, &v6group)) {
3644 			if (SLIST_IS_EMPTY(ilg->ilg_filter)) {
3645 				/* no source filter, so this is a match */
3646 				return (ilg);
3647 			}
3648 			break;
3649 		}
3650 	}
3651 	if (i == connp->conn_ilg_inuse)
3652 		return (NULL);
3653 
3654 	/*
3655 	 * we have an ilg with matching ill and group; but
3656 	 * the ilg has a source list that we must check.
3657 	 */
3658 	IN6_IPADDR_TO_V4MAPPED(src, &v6src);
3659 	isinlist = B_FALSE;
3660 	for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) {
3661 		if (IN6_ARE_ADDR_EQUAL(&v6src, &ilg->ilg_filter->sl_addr[i])) {
3662 			isinlist = B_TRUE;
3663 			break;
3664 		}
3665 	}
3666 
3667 	if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) ||
3668 	    (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE))
3669 		return (ilg);
3670 
3671 	return (NULL);
3672 }
3673 
3674 /*
3675  * Find an IPv6 ilg matching group, ill, and source
3676  */
3677 ilg_t *
3678 ilg_lookup_ill_withsrc_v6(conn_t *connp, const in6_addr_t *v6group,
3679     const in6_addr_t *v6src, ill_t *ill)
3680 {
3681 	int i;
3682 	boolean_t isinlist;
3683 	ilg_t *ilg;
3684 	ill_t *ilg_ill;
3685 
3686 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3687 
3688 	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3689 		ilg = &connp->conn_ilg[i];
3690 		if ((ilg_ill = ilg->ilg_ill) == NULL ||
3691 		    (ilg->ilg_flags & ILG_DELETED) != 0)
3692 			continue;
3693 		ASSERT(ilg->ilg_ipif == NULL);
3694 		ASSERT(ilg_ill->ill_isv6);
3695 		if (ilg_ill == ill &&
3696 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) {
3697 			if (SLIST_IS_EMPTY(ilg->ilg_filter)) {
3698 				/* no source filter, so this is a match */
3699 				return (ilg);
3700 			}
3701 			break;
3702 		}
3703 	}
3704 	if (i == connp->conn_ilg_inuse)
3705 		return (NULL);
3706 
3707 	/*
3708 	 * we have an ilg with matching ill and group; but
3709 	 * the ilg has a source list that we must check.
3710 	 */
3711 	isinlist = B_FALSE;
3712 	for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) {
3713 		if (IN6_ARE_ADDR_EQUAL(v6src, &ilg->ilg_filter->sl_addr[i])) {
3714 			isinlist = B_TRUE;
3715 			break;
3716 		}
3717 	}
3718 
3719 	if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) ||
3720 	    (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE))
3721 		return (ilg);
3722 
3723 	return (NULL);
3724 }
3725 
3726 /*
3727  * Get the ilg whose ilg_orig_ifindex is associated with ifindex.
3728  * This is useful when the interface fails and we have moved
3729  * to a new ill, but still would like to locate using the index
3730  * that we originally used to join. Used only for IPv6 currently.
3731  */
3732 static ilg_t *
3733 ilg_lookup_ill_index_v6(conn_t *connp, const in6_addr_t *v6group, int ifindex)
3734 {
3735 	ilg_t	*ilg;
3736 	int	i;
3737 
3738 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3739 	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3740 		ilg = &connp->conn_ilg[i];
3741 		if (ilg->ilg_ill == NULL ||
3742 		    (ilg->ilg_flags & ILG_DELETED) != 0)
3743 			continue;
3744 		/* ilg_ipif is NULL for V6 */
3745 		ASSERT(ilg->ilg_ipif == NULL);
3746 		ASSERT(ilg->ilg_orig_ifindex != 0);
3747 		if (IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group) &&
3748 		    ilg->ilg_orig_ifindex == ifindex) {
3749 			return (ilg);
3750 		}
3751 	}
3752 	return (NULL);
3753 }
3754 
3755 /*
3756  * Find an IPv6 ilg matching group and ill
3757  */
3758 ilg_t *
3759 ilg_lookup_ill_v6(conn_t *connp, const in6_addr_t *v6group, ill_t *ill)
3760 {
3761 	ilg_t	*ilg;
3762 	int	i;
3763 	ill_t 	*mem_ill;
3764 
3765 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3766 
3767 	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3768 		ilg = &connp->conn_ilg[i];
3769 		if ((mem_ill = ilg->ilg_ill) == NULL ||
3770 		    (ilg->ilg_flags & ILG_DELETED) != 0)
3771 			continue;
3772 		ASSERT(ilg->ilg_ipif == NULL);
3773 		ASSERT(mem_ill->ill_isv6);
3774 		if (mem_ill == ill &&
3775 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group))
3776 			return (ilg);
3777 	}
3778 	return (NULL);
3779 }
3780 
3781 /*
3782  * Find an IPv4 ilg matching group and ipif
3783  */
3784 static ilg_t *
3785 ilg_lookup_ipif(conn_t *connp, ipaddr_t group, ipif_t *ipif)
3786 {
3787 	in6_addr_t v6group;
3788 	int	i;
3789 	ilg_t	*ilg;
3790 
3791 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3792 	ASSERT(!ipif->ipif_ill->ill_isv6);
3793 
3794 	if (group == INADDR_ANY)
3795 		v6group = ipv6_all_zeros;
3796 	else
3797 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
3798 
3799 	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3800 		ilg = &connp->conn_ilg[i];
3801 		if ((ilg->ilg_flags & ILG_DELETED) == 0 &&
3802 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, &v6group) &&
3803 		    ilg->ilg_ipif == ipif)
3804 			return (ilg);
3805 	}
3806 	return (NULL);
3807 }
3808 
3809 /*
3810  * If a source address is passed in (src != NULL and src is not
3811  * unspecified), remove the specified src addr from the given ilg's
3812  * filter list, else delete the ilg.
3813  */
3814 static void
3815 ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src)
3816 {
3817 	int	i;
3818 
3819 	ASSERT((ilg->ilg_ipif != NULL) ^ (ilg->ilg_ill != NULL));
3820 	ASSERT(ilg->ilg_ipif == NULL || IAM_WRITER_IPIF(ilg->ilg_ipif));
3821 	ASSERT(ilg->ilg_ill == NULL || IAM_WRITER_ILL(ilg->ilg_ill));
3822 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3823 	ASSERT(!(ilg->ilg_flags & ILG_DELETED));
3824 
3825 	if (src == NULL || IN6_IS_ADDR_UNSPECIFIED(src)) {
3826 		if (connp->conn_ilg_walker_cnt != 0) {
3827 			ilg->ilg_flags |= ILG_DELETED;
3828 			return;
3829 		}
3830 
3831 		FREE_SLIST(ilg->ilg_filter);
3832 
3833 		i = ilg - &connp->conn_ilg[0];
3834 		ASSERT(i >= 0 && i < connp->conn_ilg_inuse);
3835 
3836 		/* Move other entries up one step */
3837 		connp->conn_ilg_inuse--;
3838 		for (; i < connp->conn_ilg_inuse; i++)
3839 			connp->conn_ilg[i] = connp->conn_ilg[i+1];
3840 
3841 		if (connp->conn_ilg_inuse == 0) {
3842 			mi_free((char *)connp->conn_ilg);
3843 			connp->conn_ilg = NULL;
3844 			cv_broadcast(&connp->conn_refcv);
3845 		}
3846 	} else {
3847 		l_remove(ilg->ilg_filter, src);
3848 	}
3849 }
3850 
3851 /*
3852  * Called from conn close. No new ilg can be added or removed.
3853  * because CONN_CLOSING has been set by ip_close. ilg_add / ilg_delete
3854  * will return error if conn has started closing.
3855  */
3856 void
3857 ilg_delete_all(conn_t *connp)
3858 {
3859 	int	i;
3860 	ipif_t	*ipif = NULL;
3861 	ill_t	*ill = NULL;
3862 	ilg_t	*ilg;
3863 	in6_addr_t v6group;
3864 	boolean_t success;
3865 	ipsq_t	*ipsq;
3866 	int	orig_ifindex;
3867 
3868 	mutex_enter(&connp->conn_lock);
3869 retry:
3870 	ILG_WALKER_HOLD(connp);
3871 	for (i = connp->conn_ilg_inuse - 1; i >= 0; ) {
3872 		ilg = &connp->conn_ilg[i];
3873 		/*
3874 		 * Since this walk is not atomic (we drop the
3875 		 * conn_lock and wait in ipsq_enter) we need
3876 		 * to check for the ILG_DELETED flag.
3877 		 */
3878 		if (ilg->ilg_flags & ILG_DELETED) {
3879 			/* Go to the next ilg */
3880 			i--;
3881 			continue;
3882 		}
3883 		v6group = ilg->ilg_v6group;
3884 
3885 		if (IN6_IS_ADDR_V4MAPPED(&v6group)) {
3886 			ipif = ilg->ilg_ipif;
3887 			ill = ipif->ipif_ill;
3888 		} else {
3889 			ipif = NULL;
3890 			ill = ilg->ilg_ill;
3891 		}
3892 		/*
3893 		 * We may not be able to refhold the ill if the ill/ipif
3894 		 * is changing. But we need to make sure that the ill will
3895 		 * not vanish. So we just bump up the ill_waiter count.
3896 		 * If we are unable to do even that, then the ill is closing,
3897 		 * in which case the unplumb thread will handle the cleanup,
3898 		 * and we move on to the next ilg.
3899 		 */
3900 		if (!ill_waiter_inc(ill)) {
3901 			/* Go to the next ilg */
3902 			i--;
3903 			continue;
3904 		}
3905 		mutex_exit(&connp->conn_lock);
3906 		/*
3907 		 * To prevent deadlock between ill close which waits inside
3908 		 * the perimeter, and conn close, ipsq_enter returns error,
3909 		 * the moment ILL_CONDEMNED is set, in which case ill close
3910 		 * takes responsibility to cleanup the ilgs. Note that we
3911 		 * have not yet set condemned flag, otherwise the conn can't
3912 		 * be refheld for cleanup by those routines and it would be
3913 		 * a mutual deadlock.
3914 		 */
3915 		success = ipsq_enter(ill, B_FALSE);
3916 		ipsq = ill->ill_phyint->phyint_ipsq;
3917 		ill_waiter_dcr(ill);
3918 		mutex_enter(&connp->conn_lock);
3919 		if (!success) {
3920 			/* Go to the next ilg */
3921 			i--;
3922 			continue;
3923 		}
3924 
3925 		/*
3926 		 * Make sure that nothing has changed under. For eg.
3927 		 * a failover/failback can change ilg_ill while we were
3928 		 * waiting to become exclusive above
3929 		 */
3930 		if (IN6_IS_ADDR_V4MAPPED(&v6group)) {
3931 			ipif = ilg->ilg_ipif;
3932 			ill = ipif->ipif_ill;
3933 		} else {
3934 			ipif = NULL;
3935 			ill = ilg->ilg_ill;
3936 		}
3937 		if (!IAM_WRITER_ILL(ill) || (ilg->ilg_flags & ILG_DELETED)) {
3938 			/*
3939 			 * The ilg has changed under us probably due
3940 			 * to a failover or unplumb. Retry on the same ilg.
3941 			 */
3942 			mutex_exit(&connp->conn_lock);
3943 			ipsq_exit(ipsq);
3944 			mutex_enter(&connp->conn_lock);
3945 			continue;
3946 		}
3947 		v6group = ilg->ilg_v6group;
3948 		orig_ifindex = ilg->ilg_orig_ifindex;
3949 		ilg_delete(connp, ilg, NULL);
3950 		mutex_exit(&connp->conn_lock);
3951 
3952 		if (ipif != NULL)
3953 			(void) ip_delmulti(V4_PART_OF_V6(v6group), ipif,
3954 			    B_FALSE, B_TRUE);
3955 
3956 		else
3957 			(void) ip_delmulti_v6(&v6group, ill, orig_ifindex,
3958 			    connp->conn_zoneid, B_FALSE, B_TRUE);
3959 
3960 		ipsq_exit(ipsq);
3961 		mutex_enter(&connp->conn_lock);
3962 		/* Go to the next ilg */
3963 		i--;
3964 	}
3965 	ILG_WALKER_RELE(connp);
3966 
3967 	/* If any ill was skipped above wait and retry */
3968 	if (connp->conn_ilg_inuse != 0) {
3969 		cv_wait(&connp->conn_refcv, &connp->conn_lock);
3970 		goto retry;
3971 	}
3972 	mutex_exit(&connp->conn_lock);
3973 }
3974 
3975 /*
3976  * Called from ill close by ipcl_walk for clearing conn_ilg and
3977  * conn_multicast_ipif for a given ipif. conn is held by caller.
3978  * Note that ipcl_walk only walks conns that are not yet condemned.
3979  * condemned conns can't be refheld. For this reason, conn must become clean
3980  * first, i.e. it must not refer to any ill/ire/ipif and then only set
3981  * condemned flag.
3982  */
3983 static void
3984 conn_delete_ipif(conn_t *connp, caddr_t arg)
3985 {
3986 	ipif_t	*ipif = (ipif_t *)arg;
3987 	int	i;
3988 	char	group_buf1[INET6_ADDRSTRLEN];
3989 	char	group_buf2[INET6_ADDRSTRLEN];
3990 	ipaddr_t group;
3991 	ilg_t	*ilg;
3992 
3993 	/*
3994 	 * Even though conn_ilg_inuse can change while we are in this loop,
3995 	 * i.e.ilgs can be created or deleted on this connp, no new ilgs can
3996 	 * be created or deleted for this connp, on this ill, since this ill
3997 	 * is the perimeter. So we won't miss any ilg in this cleanup.
3998 	 */
3999 	mutex_enter(&connp->conn_lock);
4000 
4001 	/*
4002 	 * Increment the walker count, so that ilg repacking does not
4003 	 * occur while we are in the loop.
4004 	 */
4005 	ILG_WALKER_HOLD(connp);
4006 	for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) {
4007 		ilg = &connp->conn_ilg[i];
4008 		if (ilg->ilg_ipif != ipif || (ilg->ilg_flags & ILG_DELETED))
4009 			continue;
4010 		/*
4011 		 * ip_close cannot be cleaning this ilg at the same time.
4012 		 * since it also has to execute in this ill's perimeter which
4013 		 * we are now holding. Only a clean conn can be condemned.
4014 		 */
4015 		ASSERT(!(connp->conn_state_flags & CONN_CONDEMNED));
4016 
4017 		/* Blow away the membership */
4018 		ip1dbg(("conn_delete_ilg_ipif: %s on %s (%s)\n",
4019 		    inet_ntop(AF_INET6, &connp->conn_ilg[i].ilg_v6group,
4020 		    group_buf1, sizeof (group_buf1)),
4021 		    inet_ntop(AF_INET6, &ipif->ipif_v6lcl_addr,
4022 		    group_buf2, sizeof (group_buf2)),
4023 		    ipif->ipif_ill->ill_name));
4024 
4025 		/* ilg_ipif is NULL for V6, so we won't be here */
4026 		ASSERT(IN6_IS_ADDR_V4MAPPED(&ilg->ilg_v6group));
4027 
4028 		group = V4_PART_OF_V6(ilg->ilg_v6group);
4029 		ilg_delete(connp, &connp->conn_ilg[i], NULL);
4030 		mutex_exit(&connp->conn_lock);
4031 
4032 		(void) ip_delmulti(group, ipif, B_FALSE, B_TRUE);
4033 		mutex_enter(&connp->conn_lock);
4034 	}
4035 
4036 	/*
4037 	 * If we are the last walker, need to physically delete the
4038 	 * ilgs and repack.
4039 	 */
4040 	ILG_WALKER_RELE(connp);
4041 
4042 	if (connp->conn_multicast_ipif == ipif) {
4043 		/* Revert to late binding */
4044 		connp->conn_multicast_ipif = NULL;
4045 	}
4046 	mutex_exit(&connp->conn_lock);
4047 
4048 	conn_delete_ire(connp, (caddr_t)ipif);
4049 }
4050 
4051 /*
4052  * Called from ill close by ipcl_walk for clearing conn_ilg and
4053  * conn_multicast_ill for a given ill. conn is held by caller.
4054  * Note that ipcl_walk only walks conns that are not yet condemned.
4055  * condemned conns can't be refheld. For this reason, conn must become clean
4056  * first, i.e. it must not refer to any ill/ire/ipif and then only set
4057  * condemned flag.
4058  */
4059 static void
4060 conn_delete_ill(conn_t *connp, caddr_t arg)
4061 {
4062 	ill_t	*ill = (ill_t *)arg;
4063 	int	i;
4064 	char	group_buf[INET6_ADDRSTRLEN];
4065 	in6_addr_t v6group;
4066 	int	orig_ifindex;
4067 	ilg_t	*ilg;
4068 
4069 	/*
4070 	 * Even though conn_ilg_inuse can change while we are in this loop,
4071 	 * no new ilgs can be created/deleted for this connp, on this
4072 	 * ill, since this ill is the perimeter. So we won't miss any ilg
4073 	 * in this cleanup.
4074 	 */
4075 	mutex_enter(&connp->conn_lock);
4076 
4077 	/*
4078 	 * Increment the walker count, so that ilg repacking does not
4079 	 * occur while we are in the loop.
4080 	 */
4081 	ILG_WALKER_HOLD(connp);
4082 	for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) {
4083 		ilg = &connp->conn_ilg[i];
4084 		if ((ilg->ilg_ill == ill) && !(ilg->ilg_flags & ILG_DELETED)) {
4085 			/*
4086 			 * ip_close cannot be cleaning this ilg at the same
4087 			 * time, since it also has to execute in this ill's
4088 			 * perimeter which we are now holding. Only a clean
4089 			 * conn can be condemned.
4090 			 */
4091 			ASSERT(!(connp->conn_state_flags & CONN_CONDEMNED));
4092 
4093 			/* Blow away the membership */
4094 			ip1dbg(("conn_delete_ilg_ill: %s on %s\n",
4095 			    inet_ntop(AF_INET6, &ilg->ilg_v6group,
4096 			    group_buf, sizeof (group_buf)),
4097 			    ill->ill_name));
4098 
4099 			v6group = ilg->ilg_v6group;
4100 			orig_ifindex = ilg->ilg_orig_ifindex;
4101 			ilg_delete(connp, ilg, NULL);
4102 			mutex_exit(&connp->conn_lock);
4103 
4104 			(void) ip_delmulti_v6(&v6group, ill, orig_ifindex,
4105 			    connp->conn_zoneid, B_FALSE, B_TRUE);
4106 			mutex_enter(&connp->conn_lock);
4107 		}
4108 	}
4109 	/*
4110 	 * If we are the last walker, need to physically delete the
4111 	 * ilgs and repack.
4112 	 */
4113 	ILG_WALKER_RELE(connp);
4114 
4115 	if (connp->conn_multicast_ill == ill) {
4116 		/* Revert to late binding */
4117 		connp->conn_multicast_ill = NULL;
4118 		connp->conn_orig_multicast_ifindex = 0;
4119 	}
4120 	mutex_exit(&connp->conn_lock);
4121 }
4122 
4123 /*
4124  * Called when an ipif is unplumbed to make sure that there are no
4125  * dangling conn references to that ipif.
4126  * Handles ilg_ipif and conn_multicast_ipif
4127  */
4128 void
4129 reset_conn_ipif(ipif)
4130 	ipif_t	*ipif;
4131 {
4132 	ip_stack_t	*ipst = ipif->ipif_ill->ill_ipst;
4133 
4134 	ipcl_walk(conn_delete_ipif, (caddr_t)ipif, ipst);
4135 }
4136 
4137 /*
4138  * Called when an ill is unplumbed to make sure that there are no
4139  * dangling conn references to that ill.
4140  * Handles ilg_ill, conn_multicast_ill.
4141  */
4142 void
4143 reset_conn_ill(ill_t *ill)
4144 {
4145 	ip_stack_t	*ipst = ill->ill_ipst;
4146 
4147 	ipcl_walk(conn_delete_ill, (caddr_t)ill, ipst);
4148 }
4149 
4150 #ifdef DEBUG
4151 /*
4152  * Walk functions walk all the interfaces in the system to make
4153  * sure that there is no refernece to the ipif or ill that is
4154  * going away.
4155  */
4156 int
4157 ilm_walk_ill(ill_t *ill)
4158 {
4159 	int cnt = 0;
4160 	ill_t *till;
4161 	ilm_t *ilm;
4162 	ill_walk_context_t ctx;
4163 	ip_stack_t	*ipst = ill->ill_ipst;
4164 
4165 	rw_enter(&ipst->ips_ill_g_lock, RW_READER);
4166 	till = ILL_START_WALK_ALL(&ctx, ipst);
4167 	for (; till != NULL; till = ill_next(&ctx, till)) {
4168 		mutex_enter(&till->ill_lock);
4169 		for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
4170 			if (ilm->ilm_ill == ill) {
4171 				cnt++;
4172 			}
4173 		}
4174 		mutex_exit(&till->ill_lock);
4175 	}
4176 	rw_exit(&ipst->ips_ill_g_lock);
4177 
4178 	return (cnt);
4179 }
4180 
4181 /*
4182  * This function is called before the ipif is freed.
4183  */
4184 int
4185 ilm_walk_ipif(ipif_t *ipif)
4186 {
4187 	int cnt = 0;
4188 	ill_t *till;
4189 	ilm_t *ilm;
4190 	ill_walk_context_t ctx;
4191 	ip_stack_t	*ipst = ipif->ipif_ill->ill_ipst;
4192 
4193 	till = ILL_START_WALK_ALL(&ctx, ipst);
4194 	for (; till != NULL; till = ill_next(&ctx, till)) {
4195 		mutex_enter(&till->ill_lock);
4196 		for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
4197 			if (ilm->ilm_ipif == ipif) {
4198 					cnt++;
4199 			}
4200 		}
4201 		mutex_exit(&till->ill_lock);
4202 	}
4203 	return (cnt);
4204 }
4205 #endif
4206