xref: /titanic_44/usr/src/uts/common/inet/ip/ip_multi.c (revision d00756ccb34596a328f8a15d1965da5412d366d0)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /* Copyright (c) 1990 Mentat Inc. */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/stream.h>
31 #include <sys/dlpi.h>
32 #include <sys/stropts.h>
33 #include <sys/strsun.h>
34 #include <sys/ddi.h>
35 #include <sys/cmn_err.h>
36 #include <sys/sdt.h>
37 #include <sys/zone.h>
38 
39 #include <sys/param.h>
40 #include <sys/socket.h>
41 #include <sys/sockio.h>
42 #include <net/if.h>
43 #include <sys/systm.h>
44 #include <sys/strsubr.h>
45 #include <net/route.h>
46 #include <netinet/in.h>
47 #include <net/if_dl.h>
48 #include <netinet/ip6.h>
49 #include <netinet/icmp6.h>
50 
51 #include <inet/common.h>
52 #include <inet/mi.h>
53 #include <inet/nd.h>
54 #include <inet/arp.h>
55 #include <inet/ip.h>
56 #include <inet/ip6.h>
57 #include <inet/ip_if.h>
58 #include <inet/ip_ndp.h>
59 #include <inet/ip_multi.h>
60 #include <inet/ipclassifier.h>
61 #include <inet/ipsec_impl.h>
62 #include <inet/sctp_ip.h>
63 #include <inet/ip_listutils.h>
64 #include <inet/udp_impl.h>
65 
66 /* igmpv3/mldv2 source filter manipulation */
67 static void	ilm_bld_flists(conn_t *conn, void *arg);
68 static void	ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode,
69     slist_t *flist);
70 
71 static ilm_t	*ilm_add_v6(ipif_t *ipif, const in6_addr_t *group,
72     ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist,
73     int orig_ifindex, zoneid_t zoneid);
74 static void	ilm_delete(ilm_t *ilm);
75 static int	ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *group);
76 static int	ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *group);
77 static ilg_t	*ilg_lookup_ill_index_v6(conn_t *connp,
78     const in6_addr_t *v6group, int index);
79 static ilg_t	*ilg_lookup_ipif(conn_t *connp, ipaddr_t group,
80     ipif_t *ipif);
81 static int	ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif,
82     mcast_record_t fmode, ipaddr_t src);
83 static int	ilg_add_v6(conn_t *connp, const in6_addr_t *group, ill_t *ill,
84     mcast_record_t fmode, const in6_addr_t *v6src);
85 static void	ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src);
86 static mblk_t	*ill_create_dl(ill_t *ill, uint32_t dl_primitive,
87     uint32_t length, uint32_t *addr_lenp, uint32_t *addr_offp);
88 static mblk_t	*ill_create_squery(ill_t *ill, ipaddr_t ipaddr,
89     uint32_t addrlen, uint32_t addroff, mblk_t *mp_tail);
90 static void	conn_ilg_reap(conn_t *connp);
91 static int	ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group,
92     ipif_t *ipif, mcast_record_t fmode, ipaddr_t src);
93 static int	ip_opt_delete_group_excl_v6(conn_t *connp,
94     const in6_addr_t *v6group, ill_t *ill, mcast_record_t fmode,
95     const in6_addr_t *v6src);
96 
97 /*
98  * MT notes:
99  *
100  * Multicast joins operate on both the ilg and ilm structures. Multiple
101  * threads operating on an conn (socket) trying to do multicast joins
102  * need to synchronize  when operating on the ilg. Multiple threads
103  * potentially operating on different conn (socket endpoints) trying to
104  * do multicast joins could eventually end up trying to manipulate the
105  * ilm simulatenously and need to synchronize on the access to the ilm.
106  * Both are amenable to standard Solaris MT techniques, but it would be
107  * complex to handle a failover or failback which needs to manipulate
108  * ilg/ilms if an applications can also simultaenously join/leave
109  * multicast groups. Hence multicast join/leave also go through the ipsq_t
110  * serialization.
111  *
112  * Multicast joins and leaves are single-threaded per phyint/IPMP group
113  * using the ipsq serialization mechanism.
114  *
115  * An ilm is an IP data structure used to track multicast join/leave.
116  * An ilm is associated with a <multicast group, ipif> tuple in IPv4 and
117  * with just <multicast group> in IPv6. ilm_refcnt is the number of ilg's
118  * referencing the ilm. ilms are created / destroyed only as writer. ilms
119  * are not passed around, instead they are looked up and used under the
120  * ill_lock or as writer. So we don't need a dynamic refcount of the number
121  * of threads holding reference to an ilm.
122  *
123  * Multicast Join operation:
124  *
125  * The first step is to determine the ipif (v4) or ill (v6) on which
126  * the join operation is to be done. The join is done after becoming
127  * exclusive on the ipsq associated with the ipif or ill. The conn->conn_ilg
128  * and ill->ill_ilm are thus accessed and modified exclusively per ill.
129  * Multiple threads can attempt to join simultaneously on different ipif/ill
130  * on the same conn. In this case the ipsq serialization does not help in
131  * protecting the ilg. It is the conn_lock that is used to protect the ilg.
132  * The conn_lock also protects all the ilg_t members.
133  *
134  * Leave operation.
135  *
136  * Similar to the join operation, the first step is to determine the ipif
137  * or ill (v6) on which the leave operation is to be done. The leave operation
138  * is done after becoming exclusive on the ipsq associated with the ipif or ill.
139  * As with join ilg modification is done under the protection of the conn lock.
140  */
141 
142 #define	IPSQ_ENTER_IPIF(ipif, connp, first_mp, func, ipsq, type)	\
143 	ASSERT(connp != NULL);					\
144 	(ipsq) = ipsq_try_enter((ipif), NULL, CONNP_TO_WQ(connp),	\
145 	    (first_mp), (func), (type), B_TRUE);		\
146 	if ((ipsq) == NULL) {					\
147 		ipif_refrele(ipif);				\
148 		return (EINPROGRESS);				\
149 	}
150 
151 #define	IPSQ_ENTER_ILL(ill, connp, first_mp, func, ipsq, type)	\
152 	ASSERT(connp != NULL);					\
153 	(ipsq) = ipsq_try_enter(NULL, ill, CONNP_TO_WQ(connp),	\
154 	    (first_mp),	(func), (type), B_TRUE);		\
155 	if ((ipsq) == NULL) {					\
156 		ill_refrele(ill);				\
157 		return (EINPROGRESS);				\
158 	}
159 
160 #define	IPSQ_EXIT(ipsq)	\
161 	if (ipsq != NULL)	\
162 		ipsq_exit(ipsq, B_TRUE, B_TRUE);
163 
164 #define	ILG_WALKER_HOLD(connp)	(connp)->conn_ilg_walker_cnt++
165 
166 #define	ILG_WALKER_RELE(connp)				\
167 	{						\
168 		(connp)->conn_ilg_walker_cnt--;		\
169 		if ((connp)->conn_ilg_walker_cnt == 0)	\
170 			conn_ilg_reap(connp);		\
171 	}
172 
173 static void
174 conn_ilg_reap(conn_t *connp)
175 {
176 	int	to;
177 	int	from;
178 
179 	ASSERT(MUTEX_HELD(&connp->conn_lock));
180 
181 	to = 0;
182 	from = 0;
183 	while (from < connp->conn_ilg_inuse) {
184 		if (connp->conn_ilg[from].ilg_flags & ILG_DELETED) {
185 			FREE_SLIST(connp->conn_ilg[from].ilg_filter);
186 			from++;
187 			continue;
188 		}
189 		if (to != from)
190 			connp->conn_ilg[to] = connp->conn_ilg[from];
191 		to++;
192 		from++;
193 	}
194 
195 	connp->conn_ilg_inuse = to;
196 
197 	if (connp->conn_ilg_inuse == 0) {
198 		mi_free((char *)connp->conn_ilg);
199 		connp->conn_ilg = NULL;
200 		cv_broadcast(&connp->conn_refcv);
201 	}
202 }
203 
204 #define	GETSTRUCT(structure, number)	\
205 	((structure *)mi_zalloc(sizeof (structure) * (number)))
206 
207 #define	ILG_ALLOC_CHUNK	16
208 
209 /*
210  * Returns a pointer to the next available ilg in conn_ilg.  Allocs more
211  * buffers in size of ILG_ALLOC_CHUNK ilgs when needed, and updates conn's
212  * ilg tracking fields appropriately (conn_ilg_inuse reflects usage of the
213  * returned ilg).  Returns NULL on failure (ENOMEM).
214  *
215  * Assumes connp->conn_lock is held.
216  */
217 static ilg_t *
218 conn_ilg_alloc(conn_t *connp)
219 {
220 	ilg_t *new;
221 	int curcnt;
222 
223 	ASSERT(MUTEX_HELD(&connp->conn_lock));
224 	ASSERT(connp->conn_ilg_inuse <= connp->conn_ilg_allocated);
225 
226 	if (connp->conn_ilg == NULL) {
227 		connp->conn_ilg = GETSTRUCT(ilg_t, ILG_ALLOC_CHUNK);
228 		if (connp->conn_ilg == NULL)
229 			return (NULL);
230 		connp->conn_ilg_allocated = ILG_ALLOC_CHUNK;
231 		connp->conn_ilg_inuse = 0;
232 	}
233 	if (connp->conn_ilg_inuse == connp->conn_ilg_allocated) {
234 		curcnt = connp->conn_ilg_allocated;
235 		new = GETSTRUCT(ilg_t, curcnt + ILG_ALLOC_CHUNK);
236 		if (new == NULL)
237 			return (NULL);
238 		bcopy(connp->conn_ilg, new, sizeof (ilg_t) * curcnt);
239 		mi_free((char *)connp->conn_ilg);
240 		connp->conn_ilg = new;
241 		connp->conn_ilg_allocated += ILG_ALLOC_CHUNK;
242 	}
243 
244 	return (&connp->conn_ilg[connp->conn_ilg_inuse++]);
245 }
246 
247 typedef struct ilm_fbld_s {
248 	ilm_t		*fbld_ilm;
249 	int		fbld_in_cnt;
250 	int		fbld_ex_cnt;
251 	slist_t		fbld_in;
252 	slist_t		fbld_ex;
253 	boolean_t	fbld_in_overflow;
254 } ilm_fbld_t;
255 
256 static void
257 ilm_bld_flists(conn_t *conn, void *arg)
258 {
259 	int i;
260 	ilm_fbld_t *fbld = (ilm_fbld_t *)(arg);
261 	ilm_t *ilm = fbld->fbld_ilm;
262 	in6_addr_t *v6group = &ilm->ilm_v6addr;
263 
264 	if (conn->conn_ilg_inuse == 0)
265 		return;
266 
267 	/*
268 	 * Since we can't break out of the ipcl_walk once started, we still
269 	 * have to look at every conn.  But if we've already found one
270 	 * (EXCLUDE, NULL) list, there's no need to keep checking individual
271 	 * ilgs--that will be our state.
272 	 */
273 	if (fbld->fbld_ex_cnt > 0 && fbld->fbld_ex.sl_numsrc == 0)
274 		return;
275 
276 	/*
277 	 * Check this conn's ilgs to see if any are interested in our
278 	 * ilm (group, interface match).  If so, update the master
279 	 * include and exclude lists we're building in the fbld struct
280 	 * with this ilg's filter info.
281 	 */
282 	mutex_enter(&conn->conn_lock);
283 	for (i = 0; i < conn->conn_ilg_inuse; i++) {
284 		ilg_t *ilg = &conn->conn_ilg[i];
285 		if ((ilg->ilg_ill == ilm->ilm_ill) &&
286 		    (ilg->ilg_ipif == ilm->ilm_ipif) &&
287 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) {
288 			if (ilg->ilg_fmode == MODE_IS_INCLUDE) {
289 				fbld->fbld_in_cnt++;
290 				if (!fbld->fbld_in_overflow)
291 					l_union_in_a(&fbld->fbld_in,
292 					    ilg->ilg_filter,
293 					    &fbld->fbld_in_overflow);
294 			} else {
295 				fbld->fbld_ex_cnt++;
296 				/*
297 				 * On the first exclude list, don't try to do
298 				 * an intersection, as the master exclude list
299 				 * is intentionally empty.  If the master list
300 				 * is still empty on later iterations, that
301 				 * means we have at least one ilg with an empty
302 				 * exclude list, so that should be reflected
303 				 * when we take the intersection.
304 				 */
305 				if (fbld->fbld_ex_cnt == 1) {
306 					if (ilg->ilg_filter != NULL)
307 						l_copy(ilg->ilg_filter,
308 						    &fbld->fbld_ex);
309 				} else {
310 					l_intersection_in_a(&fbld->fbld_ex,
311 					    ilg->ilg_filter);
312 				}
313 			}
314 			/* there will only be one match, so break now. */
315 			break;
316 		}
317 	}
318 	mutex_exit(&conn->conn_lock);
319 }
320 
321 static void
322 ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, slist_t *flist)
323 {
324 	ilm_fbld_t fbld;
325 	ip_stack_t *ipst = ilm->ilm_ipst;
326 
327 	fbld.fbld_ilm = ilm;
328 	fbld.fbld_in_cnt = fbld.fbld_ex_cnt = 0;
329 	fbld.fbld_in.sl_numsrc = fbld.fbld_ex.sl_numsrc = 0;
330 	fbld.fbld_in_overflow = B_FALSE;
331 
332 	/* first, construct our master include and exclude lists */
333 	ipcl_walk(ilm_bld_flists, (caddr_t)&fbld, ipst);
334 
335 	/* now use those master lists to generate the interface filter */
336 
337 	/* if include list overflowed, filter is (EXCLUDE, NULL) */
338 	if (fbld.fbld_in_overflow) {
339 		*fmode = MODE_IS_EXCLUDE;
340 		flist->sl_numsrc = 0;
341 		return;
342 	}
343 
344 	/* if nobody interested, interface filter is (INCLUDE, NULL) */
345 	if (fbld.fbld_in_cnt == 0 && fbld.fbld_ex_cnt == 0) {
346 		*fmode = MODE_IS_INCLUDE;
347 		flist->sl_numsrc = 0;
348 		return;
349 	}
350 
351 	/*
352 	 * If there are no exclude lists, then the interface filter
353 	 * is INCLUDE, with its filter list equal to fbld_in.  A single
354 	 * exclude list makes the interface filter EXCLUDE, with its
355 	 * filter list equal to (fbld_ex - fbld_in).
356 	 */
357 	if (fbld.fbld_ex_cnt == 0) {
358 		*fmode = MODE_IS_INCLUDE;
359 		l_copy(&fbld.fbld_in, flist);
360 	} else {
361 		*fmode = MODE_IS_EXCLUDE;
362 		l_difference(&fbld.fbld_ex, &fbld.fbld_in, flist);
363 	}
364 }
365 
366 /*
367  * If the given interface has failed, choose a new one to join on so
368  * that we continue to receive packets.  ilg_orig_ifindex remembers
369  * what the application used to join on so that we know the ilg to
370  * delete even though we change the ill here.  Callers will store the
371  * ilg returned from this function in ilg_ill.  Thus when we receive
372  * a packet on ilg_ill, conn_wantpacket_v6 will deliver the packets.
373  *
374  * This function must be called as writer so we can walk the group
375  * list and examine flags without holding a lock.
376  */
377 ill_t *
378 ip_choose_multi_ill(ill_t *ill, const in6_addr_t *grp)
379 {
380 	ill_t	*till;
381 	ill_group_t *illgrp = ill->ill_group;
382 
383 	ASSERT(IAM_WRITER_ILL(ill));
384 
385 	if (IN6_IS_ADDR_UNSPECIFIED(grp) || illgrp == NULL)
386 		return (ill);
387 
388 	if ((ill->ill_phyint->phyint_flags & (PHYI_FAILED|PHYI_INACTIVE)) == 0)
389 		return (ill);
390 
391 	till = illgrp->illgrp_ill;
392 	while (till != NULL &&
393 	    (till->ill_phyint->phyint_flags & (PHYI_FAILED|PHYI_INACTIVE))) {
394 		till = till->ill_group_next;
395 	}
396 	if (till != NULL)
397 		return (till);
398 
399 	return (ill);
400 }
401 
402 static int
403 ilm_update_add(ilm_t *ilm, ilg_stat_t ilgstat, slist_t *ilg_flist,
404     boolean_t isv6)
405 {
406 	mcast_record_t fmode;
407 	slist_t *flist;
408 	boolean_t fdefault;
409 	char buf[INET6_ADDRSTRLEN];
410 	ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill;
411 
412 	/*
413 	 * There are several cases where the ilm's filter state
414 	 * defaults to (EXCLUDE, NULL):
415 	 *	- we've had previous joins without associated ilgs
416 	 *	- this join has no associated ilg
417 	 *	- the ilg's filter state is (EXCLUDE, NULL)
418 	 */
419 	fdefault = (ilm->ilm_no_ilg_cnt > 0) ||
420 	    (ilgstat == ILGSTAT_NONE) || SLIST_IS_EMPTY(ilg_flist);
421 
422 	/* attempt mallocs (if needed) before doing anything else */
423 	if ((flist = l_alloc()) == NULL)
424 		return (ENOMEM);
425 	if (!fdefault && ilm->ilm_filter == NULL) {
426 		ilm->ilm_filter = l_alloc();
427 		if (ilm->ilm_filter == NULL) {
428 			l_free(flist);
429 			return (ENOMEM);
430 		}
431 	}
432 
433 	if (ilgstat != ILGSTAT_CHANGE)
434 		ilm->ilm_refcnt++;
435 
436 	if (ilgstat == ILGSTAT_NONE)
437 		ilm->ilm_no_ilg_cnt++;
438 
439 	/*
440 	 * Determine new filter state.  If it's not the default
441 	 * (EXCLUDE, NULL), we must walk the conn list to find
442 	 * any ilgs interested in this group, and re-build the
443 	 * ilm filter.
444 	 */
445 	if (fdefault) {
446 		fmode = MODE_IS_EXCLUDE;
447 		flist->sl_numsrc = 0;
448 	} else {
449 		ilm_gen_filter(ilm, &fmode, flist);
450 	}
451 
452 	/* make sure state actually changed; nothing to do if not. */
453 	if ((ilm->ilm_fmode == fmode) &&
454 	    !lists_are_different(ilm->ilm_filter, flist)) {
455 		l_free(flist);
456 		return (0);
457 	}
458 
459 	/* send the state change report */
460 	if (!IS_LOOPBACK(ill)) {
461 		if (isv6)
462 			mld_statechange(ilm, fmode, flist);
463 		else
464 			igmp_statechange(ilm, fmode, flist);
465 	}
466 
467 	/* update the ilm state */
468 	ilm->ilm_fmode = fmode;
469 	if (flist->sl_numsrc > 0)
470 		l_copy(flist, ilm->ilm_filter);
471 	else
472 		CLEAR_SLIST(ilm->ilm_filter);
473 
474 	ip1dbg(("ilm_update: new if filter mode %d, group %s\n", ilm->ilm_fmode,
475 	    inet_ntop(AF_INET6, &ilm->ilm_v6addr, buf, sizeof (buf))));
476 
477 	l_free(flist);
478 	return (0);
479 }
480 
481 static int
482 ilm_update_del(ilm_t *ilm, boolean_t isv6)
483 {
484 	mcast_record_t fmode;
485 	slist_t *flist;
486 	ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill;
487 
488 	ip1dbg(("ilm_update_del: still %d left; updating state\n",
489 	    ilm->ilm_refcnt));
490 
491 	if ((flist = l_alloc()) == NULL)
492 		return (ENOMEM);
493 
494 	/*
495 	 * If present, the ilg in question has already either been
496 	 * updated or removed from our list; so all we need to do
497 	 * now is walk the list to update the ilm filter state.
498 	 *
499 	 * Skip the list walk if we have any no-ilg joins, which
500 	 * cause the filter state to revert to (EXCLUDE, NULL).
501 	 */
502 	if (ilm->ilm_no_ilg_cnt != 0) {
503 		fmode = MODE_IS_EXCLUDE;
504 		flist->sl_numsrc = 0;
505 	} else {
506 		ilm_gen_filter(ilm, &fmode, flist);
507 	}
508 
509 	/* check to see if state needs to be updated */
510 	if ((ilm->ilm_fmode == fmode) &&
511 	    (!lists_are_different(ilm->ilm_filter, flist))) {
512 		l_free(flist);
513 		return (0);
514 	}
515 
516 	if (!IS_LOOPBACK(ill)) {
517 		if (isv6)
518 			mld_statechange(ilm, fmode, flist);
519 		else
520 			igmp_statechange(ilm, fmode, flist);
521 	}
522 
523 	ilm->ilm_fmode = fmode;
524 	if (flist->sl_numsrc > 0) {
525 		if (ilm->ilm_filter == NULL) {
526 			ilm->ilm_filter = l_alloc();
527 			if (ilm->ilm_filter == NULL) {
528 				char buf[INET6_ADDRSTRLEN];
529 				ip1dbg(("ilm_update_del: failed to alloc ilm "
530 				    "filter; no source filtering for %s on %s",
531 				    inet_ntop(AF_INET6, &ilm->ilm_v6addr,
532 				    buf, sizeof (buf)), ill->ill_name));
533 				ilm->ilm_fmode = MODE_IS_EXCLUDE;
534 				l_free(flist);
535 				return (0);
536 			}
537 		}
538 		l_copy(flist, ilm->ilm_filter);
539 	} else {
540 		CLEAR_SLIST(ilm->ilm_filter);
541 	}
542 
543 	l_free(flist);
544 	return (0);
545 }
546 
547 /*
548  * INADDR_ANY means all multicast addresses. This is only used
549  * by the multicast router.
550  * INADDR_ANY is stored as IPv6 unspecified addr.
551  */
552 int
553 ip_addmulti(ipaddr_t group, ipif_t *ipif, ilg_stat_t ilgstat,
554     mcast_record_t ilg_fmode, slist_t *ilg_flist)
555 {
556 	ill_t	*ill = ipif->ipif_ill;
557 	ilm_t 	*ilm;
558 	in6_addr_t v6group;
559 	int	ret;
560 
561 	ASSERT(IAM_WRITER_IPIF(ipif));
562 
563 	if (!CLASSD(group) && group != INADDR_ANY)
564 		return (EINVAL);
565 
566 	/*
567 	 * INADDR_ANY is represented as the IPv6 unspecifed addr.
568 	 */
569 	if (group == INADDR_ANY)
570 		v6group = ipv6_all_zeros;
571 	else
572 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
573 
574 	ilm = ilm_lookup_ipif(ipif, group);
575 	if (ilm != NULL)
576 		return (ilm_update_add(ilm, ilgstat, ilg_flist, B_FALSE));
577 
578 	/*
579 	 * ilms are associated with ipifs in IPv4. It moves with the
580 	 * ipif if the ipif moves to a new ill when the interface
581 	 * fails. Thus we really don't check whether the ipif_ill
582 	 * has failed like in IPv6. If it has FAILED the ipif
583 	 * will move (daemon will move it) and hence the ilm, if the
584 	 * ipif is not IPIF_NOFAILOVER. For the IPIF_NOFAILOVER ipifs,
585 	 * we continue to receive in the same place even if the
586 	 * interface fails.
587 	 */
588 	ilm = ilm_add_v6(ipif, &v6group, ilgstat, ilg_fmode, ilg_flist,
589 	    ill->ill_phyint->phyint_ifindex, ipif->ipif_zoneid);
590 	if (ilm == NULL)
591 		return (ENOMEM);
592 
593 	if (group == INADDR_ANY) {
594 		/*
595 		 * Check how many ipif's have members in this group -
596 		 * if more then one we should not tell the driver to join
597 		 * this time
598 		 */
599 		if (ilm_numentries_v6(ill, &v6group) > 1)
600 			return (0);
601 		if (ill->ill_group == NULL)
602 			ret = ip_join_allmulti(ipif);
603 		else
604 			ret = ill_nominate_mcast_rcv(ill->ill_group);
605 		if (ret != 0)
606 			ilm_delete(ilm);
607 		return (ret);
608 	}
609 
610 	if (!IS_LOOPBACK(ill))
611 		igmp_joingroup(ilm);
612 
613 	if (ilm_numentries_v6(ill, &v6group) > 1)
614 		return (0);
615 
616 	ret = ip_ll_addmulti_v6(ipif, &v6group);
617 	if (ret != 0)
618 		ilm_delete(ilm);
619 	return (ret);
620 }
621 
622 /*
623  * The unspecified address means all multicast addresses.
624  * This is only used by the multicast router.
625  *
626  * ill identifies the interface to join on; it may not match the
627  * interface requested by the application of a failover has taken
628  * place.  orig_ifindex always identifies the interface requested
629  * by the app.
630  *
631  * ilgstat tells us if there's an ilg associated with this join,
632  * and if so, if it's a new ilg or a change to an existing one.
633  * ilg_fmode and ilg_flist give us the current filter state of
634  * the ilg (and will be EXCLUDE {NULL} in the case of no ilg).
635  */
636 int
637 ip_addmulti_v6(const in6_addr_t *v6group, ill_t *ill, int orig_ifindex,
638     zoneid_t zoneid, ilg_stat_t ilgstat, mcast_record_t ilg_fmode,
639     slist_t *ilg_flist)
640 {
641 	ilm_t	*ilm;
642 	int	ret;
643 
644 	ASSERT(IAM_WRITER_ILL(ill));
645 
646 	if (!IN6_IS_ADDR_MULTICAST(v6group) &&
647 	    !IN6_IS_ADDR_UNSPECIFIED(v6group)) {
648 		return (EINVAL);
649 	}
650 
651 	/*
652 	 * An ilm is uniquely identified by the tuple of (group, ill,
653 	 * orig_ill).  group is the multicast group address, ill is
654 	 * the interface on which it is currently joined, and orig_ill
655 	 * is the interface on which the application requested the
656 	 * join.  orig_ill and ill are the same unless orig_ill has
657 	 * failed over.
658 	 *
659 	 * Both orig_ill and ill are required, which means we may have
660 	 * 2 ilms on an ill for the same group, but with different
661 	 * orig_ills.  These must be kept separate, so that when failback
662 	 * occurs, the appropriate ilms are moved back to their orig_ill
663 	 * without disrupting memberships on the ill to which they had
664 	 * been moved.
665 	 *
666 	 * In order to track orig_ill, we store orig_ifindex in the
667 	 * ilm and ilg.
668 	 */
669 	ilm = ilm_lookup_ill_index_v6(ill, v6group, orig_ifindex, zoneid);
670 	if (ilm != NULL)
671 		return (ilm_update_add(ilm, ilgstat, ilg_flist, B_TRUE));
672 
673 	/*
674 	 * We need to remember where the application really wanted
675 	 * to join. This will be used later if we want to failback
676 	 * to the original interface.
677 	 */
678 	ilm = ilm_add_v6(ill->ill_ipif, v6group, ilgstat, ilg_fmode,
679 	    ilg_flist, orig_ifindex, zoneid);
680 	if (ilm == NULL)
681 		return (ENOMEM);
682 
683 	if (IN6_IS_ADDR_UNSPECIFIED(v6group)) {
684 		/*
685 		 * Check how many ipif's that have members in this group -
686 		 * if more then one we should not tell the driver to join
687 		 * this time
688 		 */
689 		if (ilm_numentries_v6(ill, v6group) > 1)
690 			return (0);
691 		if (ill->ill_group == NULL)
692 			ret = ip_join_allmulti(ill->ill_ipif);
693 		else
694 			ret = ill_nominate_mcast_rcv(ill->ill_group);
695 
696 		if (ret != 0)
697 			ilm_delete(ilm);
698 		return (ret);
699 	}
700 
701 	if (!IS_LOOPBACK(ill))
702 		mld_joingroup(ilm);
703 
704 	/*
705 	 * If we have more then one we should not tell the driver
706 	 * to join this time.
707 	 */
708 	if (ilm_numentries_v6(ill, v6group) > 1)
709 		return (0);
710 
711 	ret = ip_ll_addmulti_v6(ill->ill_ipif, v6group);
712 	if (ret != 0)
713 		ilm_delete(ilm);
714 	return (ret);
715 }
716 
717 /*
718  * Send a multicast request to the driver for enabling multicast reception
719  * for v6groupp address. The caller has already checked whether it is
720  * appropriate to send one or not.
721  */
722 int
723 ip_ll_send_enabmulti_req(ill_t *ill, const in6_addr_t *v6groupp)
724 {
725 	mblk_t	*mp;
726 	uint32_t addrlen, addroff;
727 	char	group_buf[INET6_ADDRSTRLEN];
728 
729 	ASSERT(IAM_WRITER_ILL(ill));
730 
731 	/*
732 	 * Create a AR_ENTRY_SQUERY message with a dl_enabmulti_req tacked
733 	 * on.
734 	 */
735 	mp = ill_create_dl(ill, DL_ENABMULTI_REQ, sizeof (dl_enabmulti_req_t),
736 	    &addrlen, &addroff);
737 	if (!mp)
738 		return (ENOMEM);
739 	if (IN6_IS_ADDR_V4MAPPED(v6groupp)) {
740 		ipaddr_t v4group;
741 
742 		IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group);
743 		/*
744 		 * NOTE!!!
745 		 * The "addroff" passed in here was calculated by
746 		 * ill_create_dl(), and will be used by ill_create_squery()
747 		 * to perform some twisted coding magic. It is the offset
748 		 * into the dl_xxx_req of the hw addr. Here, it will be
749 		 * added to b_wptr - b_rptr to create a magic number that
750 		 * is not an offset into this squery mblk.
751 		 * The actual hardware address will be accessed only in the
752 		 * dl_xxx_req, not in the squery. More importantly,
753 		 * that hardware address can *only* be accessed in this
754 		 * mblk chain by calling mi_offset_param_c(), which uses
755 		 * the magic number in the squery hw offset field to go
756 		 * to the *next* mblk (the dl_xxx_req), subtract the
757 		 * (b_wptr - b_rptr), and find the actual offset into
758 		 * the dl_xxx_req.
759 		 * Any method that depends on using the
760 		 * offset field in the dl_disabmulti_req or squery
761 		 * to find either hardware address will similarly fail.
762 		 *
763 		 * Look in ar_entry_squery() in arp.c to see how this offset
764 		 * is used.
765 		 */
766 		mp = ill_create_squery(ill, v4group, addrlen, addroff, mp);
767 		if (!mp)
768 			return (ENOMEM);
769 		ip1dbg(("ip_ll_send_enabmulti_req: IPv4 putnext %s on %s\n",
770 		    inet_ntop(AF_INET6, v6groupp, group_buf,
771 		    sizeof (group_buf)),
772 		    ill->ill_name));
773 		putnext(ill->ill_rq, mp);
774 	} else {
775 		ip1dbg(("ip_ll_send_enabmulti_req: IPv6 ndp_mcastreq %s on"
776 		    " %s\n",
777 		    inet_ntop(AF_INET6, v6groupp, group_buf,
778 		    sizeof (group_buf)),
779 		    ill->ill_name));
780 		return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp));
781 	}
782 	return (0);
783 }
784 
785 /*
786  * Send a multicast request to the driver for enabling multicast
787  * membership for v6group if appropriate.
788  */
789 static int
790 ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *v6groupp)
791 {
792 	ill_t	*ill = ipif->ipif_ill;
793 
794 	ASSERT(IAM_WRITER_IPIF(ipif));
795 
796 	if (ill->ill_net_type != IRE_IF_RESOLVER ||
797 	    ipif->ipif_flags & IPIF_POINTOPOINT) {
798 		ip1dbg(("ip_ll_addmulti_v6: not resolver\n"));
799 		return (0);	/* Must be IRE_IF_NORESOLVER */
800 	}
801 
802 	if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) {
803 		ip1dbg(("ip_ll_addmulti_v6: MULTI_BCAST\n"));
804 		return (0);
805 	}
806 	if (!ill->ill_dl_up) {
807 		/*
808 		 * Nobody there. All multicast addresses will be re-joined
809 		 * when we get the DL_BIND_ACK bringing the interface up.
810 		 */
811 		ip1dbg(("ip_ll_addmulti_v6: nobody up\n"));
812 		return (0);
813 	}
814 	return (ip_ll_send_enabmulti_req(ill, v6groupp));
815 }
816 
817 /*
818  * INADDR_ANY means all multicast addresses. This is only used
819  * by the multicast router.
820  * INADDR_ANY is stored as the IPv6 unspecifed addr.
821  */
822 int
823 ip_delmulti(ipaddr_t group, ipif_t *ipif, boolean_t no_ilg, boolean_t leaving)
824 {
825 	ill_t	*ill = ipif->ipif_ill;
826 	ilm_t *ilm;
827 	in6_addr_t v6group;
828 	int	ret;
829 
830 	ASSERT(IAM_WRITER_IPIF(ipif));
831 
832 	if (!CLASSD(group) && group != INADDR_ANY)
833 		return (EINVAL);
834 
835 	/*
836 	 * INADDR_ANY is represented as the IPv6 unspecifed addr.
837 	 */
838 	if (group == INADDR_ANY)
839 		v6group = ipv6_all_zeros;
840 	else
841 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
842 
843 	/*
844 	 * Look for a match on the ipif.
845 	 * (IP_DROP_MEMBERSHIP specifies an ipif using an IP address).
846 	 */
847 	ilm = ilm_lookup_ipif(ipif, group);
848 	if (ilm == NULL)
849 		return (ENOENT);
850 
851 	/* Update counters */
852 	if (no_ilg)
853 		ilm->ilm_no_ilg_cnt--;
854 
855 	if (leaving)
856 		ilm->ilm_refcnt--;
857 
858 	if (ilm->ilm_refcnt > 0)
859 		return (ilm_update_del(ilm, B_FALSE));
860 
861 	if (group == INADDR_ANY) {
862 		ilm_delete(ilm);
863 		/*
864 		 * Check how many ipif's that have members in this group -
865 		 * if there are still some left then don't tell the driver
866 		 * to drop it.
867 		 */
868 		if (ilm_numentries_v6(ill, &v6group) != 0)
869 			return (0);
870 
871 		/*
872 		 * If we never joined, then don't leave.  This can happen
873 		 * if we're in an IPMP group, since only one ill per IPMP
874 		 * group receives all multicast packets.
875 		 */
876 		if (!ill->ill_join_allmulti) {
877 			ASSERT(ill->ill_group != NULL);
878 			return (0);
879 		}
880 
881 		ret = ip_leave_allmulti(ipif);
882 		if (ill->ill_group != NULL)
883 			(void) ill_nominate_mcast_rcv(ill->ill_group);
884 		return (ret);
885 	}
886 
887 	if (!IS_LOOPBACK(ill))
888 		igmp_leavegroup(ilm);
889 
890 	ilm_delete(ilm);
891 	/*
892 	 * Check how many ipif's that have members in this group -
893 	 * if there are still some left then don't tell the driver
894 	 * to drop it.
895 	 */
896 	if (ilm_numentries_v6(ill, &v6group) != 0)
897 		return (0);
898 	return (ip_ll_delmulti_v6(ipif, &v6group));
899 }
900 
901 /*
902  * The unspecified address means all multicast addresses.
903  * This is only used by the multicast router.
904  */
905 int
906 ip_delmulti_v6(const in6_addr_t *v6group, ill_t *ill, int orig_ifindex,
907     zoneid_t zoneid, boolean_t no_ilg, boolean_t leaving)
908 {
909 	ipif_t	*ipif;
910 	ilm_t *ilm;
911 	int	ret;
912 
913 	ASSERT(IAM_WRITER_ILL(ill));
914 
915 	if (!IN6_IS_ADDR_MULTICAST(v6group) &&
916 	    !IN6_IS_ADDR_UNSPECIFIED(v6group))
917 		return (EINVAL);
918 
919 	/*
920 	 * Look for a match on the ill.
921 	 * (IPV6_LEAVE_GROUP specifies an ill using an ifindex).
922 	 *
923 	 * Similar to ip_addmulti_v6, we should always look using
924 	 * the orig_ifindex.
925 	 *
926 	 * 1) If orig_ifindex is different from ill's ifindex
927 	 *    we should have an ilm with orig_ifindex created in
928 	 *    ip_addmulti_v6. We should delete that here.
929 	 *
930 	 * 2) If orig_ifindex is same as ill's ifindex, we should
931 	 *    not delete the ilm that is temporarily here because of
932 	 *    a FAILOVER. Those ilms will have a ilm_orig_ifindex
933 	 *    different from ill's ifindex.
934 	 *
935 	 * Thus, always lookup using orig_ifindex.
936 	 */
937 	ilm = ilm_lookup_ill_index_v6(ill, v6group, orig_ifindex, zoneid);
938 	if (ilm == NULL)
939 		return (ENOENT);
940 
941 	ASSERT(ilm->ilm_ill == ill);
942 
943 	ipif = ill->ill_ipif;
944 
945 	/* Update counters */
946 	if (no_ilg)
947 		ilm->ilm_no_ilg_cnt--;
948 
949 	if (leaving)
950 		ilm->ilm_refcnt--;
951 
952 	if (ilm->ilm_refcnt > 0)
953 		return (ilm_update_del(ilm, B_TRUE));
954 
955 	if (IN6_IS_ADDR_UNSPECIFIED(v6group)) {
956 		ilm_delete(ilm);
957 		/*
958 		 * Check how many ipif's that have members in this group -
959 		 * if there are still some left then don't tell the driver
960 		 * to drop it.
961 		 */
962 		if (ilm_numentries_v6(ill, v6group) != 0)
963 			return (0);
964 
965 		/*
966 		 * If we never joined, then don't leave.  This can happen
967 		 * if we're in an IPMP group, since only one ill per IPMP
968 		 * group receives all multicast packets.
969 		 */
970 		if (!ill->ill_join_allmulti) {
971 			ASSERT(ill->ill_group != NULL);
972 			return (0);
973 		}
974 
975 		ret = ip_leave_allmulti(ipif);
976 		if (ill->ill_group != NULL)
977 			(void) ill_nominate_mcast_rcv(ill->ill_group);
978 		return (ret);
979 	}
980 
981 	if (!IS_LOOPBACK(ill))
982 		mld_leavegroup(ilm);
983 
984 	ilm_delete(ilm);
985 	/*
986 	 * Check how many ipif's that have members in this group -
987 	 * if there are still some left then don't tell the driver
988 	 * to drop it.
989 	 */
990 	if (ilm_numentries_v6(ill, v6group) != 0)
991 		return (0);
992 	return (ip_ll_delmulti_v6(ipif, v6group));
993 }
994 
995 /*
996  * Send a multicast request to the driver for disabling multicast reception
997  * for v6groupp address. The caller has already checked whether it is
998  * appropriate to send one or not.
999  */
1000 int
1001 ip_ll_send_disabmulti_req(ill_t *ill, const in6_addr_t *v6groupp)
1002 {
1003 	mblk_t	*mp;
1004 	char	group_buf[INET6_ADDRSTRLEN];
1005 	uint32_t	addrlen, addroff;
1006 
1007 	ASSERT(IAM_WRITER_ILL(ill));
1008 	/*
1009 	 * Create a AR_ENTRY_SQUERY message with a dl_disabmulti_req tacked
1010 	 * on.
1011 	 */
1012 	mp = ill_create_dl(ill, DL_DISABMULTI_REQ,
1013 	    sizeof (dl_disabmulti_req_t), &addrlen, &addroff);
1014 
1015 	if (!mp)
1016 		return (ENOMEM);
1017 
1018 	if (IN6_IS_ADDR_V4MAPPED(v6groupp)) {
1019 		ipaddr_t v4group;
1020 
1021 		IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group);
1022 		/*
1023 		 * NOTE!!!
1024 		 * The "addroff" passed in here was calculated by
1025 		 * ill_create_dl(), and will be used by ill_create_squery()
1026 		 * to perform some twisted coding magic. It is the offset
1027 		 * into the dl_xxx_req of the hw addr. Here, it will be
1028 		 * added to b_wptr - b_rptr to create a magic number that
1029 		 * is not an offset into this mblk.
1030 		 *
1031 		 * Please see the comment in ip_ll_send)enabmulti_req()
1032 		 * for a complete explanation.
1033 		 *
1034 		 * Look in ar_entry_squery() in arp.c to see how this offset
1035 		 * is used.
1036 		 */
1037 		mp = ill_create_squery(ill, v4group, addrlen, addroff, mp);
1038 		if (!mp)
1039 			return (ENOMEM);
1040 		ip1dbg(("ip_ll_send_disabmulti_req: IPv4 putnext %s on %s\n",
1041 		    inet_ntop(AF_INET6, v6groupp, group_buf,
1042 		    sizeof (group_buf)),
1043 		    ill->ill_name));
1044 		putnext(ill->ill_rq, mp);
1045 	} else {
1046 		ip1dbg(("ip_ll_send_disabmulti_req: IPv6 ndp_mcastreq %s on"
1047 		    " %s\n",
1048 		    inet_ntop(AF_INET6, v6groupp, group_buf,
1049 		    sizeof (group_buf)),
1050 		    ill->ill_name));
1051 		return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp));
1052 	}
1053 	return (0);
1054 }
1055 
1056 /*
1057  * Send a multicast request to the driver for disabling multicast
1058  * membership for v6group if appropriate.
1059  */
1060 static int
1061 ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *v6group)
1062 {
1063 	ill_t	*ill = ipif->ipif_ill;
1064 
1065 	ASSERT(IAM_WRITER_IPIF(ipif));
1066 
1067 	if (ill->ill_net_type != IRE_IF_RESOLVER ||
1068 	    ipif->ipif_flags & IPIF_POINTOPOINT) {
1069 		return (0);	/* Must be IRE_IF_NORESOLVER */
1070 	}
1071 	if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) {
1072 		ip1dbg(("ip_ll_delmulti_v6: MULTI_BCAST\n"));
1073 		return (0);
1074 	}
1075 	if (!ill->ill_dl_up) {
1076 		/*
1077 		 * Nobody there. All multicast addresses will be re-joined
1078 		 * when we get the DL_BIND_ACK bringing the interface up.
1079 		 */
1080 		ip1dbg(("ip_ll_delmulti_v6: nobody up\n"));
1081 		return (0);
1082 	}
1083 	return (ip_ll_send_disabmulti_req(ill, v6group));
1084 }
1085 
1086 /*
1087  * Make the driver pass up all multicast packets
1088  *
1089  * With ill groups, the caller makes sure that there is only
1090  * one ill joining the allmulti group.
1091  */
1092 int
1093 ip_join_allmulti(ipif_t *ipif)
1094 {
1095 	ill_t	*ill = ipif->ipif_ill;
1096 	mblk_t	*mp;
1097 	uint32_t	addrlen, addroff;
1098 
1099 	ASSERT(IAM_WRITER_IPIF(ipif));
1100 
1101 	if (!ill->ill_dl_up) {
1102 		/*
1103 		 * Nobody there. All multicast addresses will be re-joined
1104 		 * when we get the DL_BIND_ACK bringing the interface up.
1105 		 */
1106 		return (0);
1107 	}
1108 
1109 	ASSERT(!ill->ill_join_allmulti);
1110 
1111 	/*
1112 	 * Create a DL_PROMISCON_REQ message and send it directly to
1113 	 * the DLPI provider.  We don't need to do this for certain
1114 	 * media types for which we never need to turn promiscuous
1115 	 * mode on.
1116 	 */
1117 	if ((ill->ill_net_type == IRE_IF_RESOLVER) &&
1118 	    !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) {
1119 		mp = ill_create_dl(ill, DL_PROMISCON_REQ,
1120 		    sizeof (dl_promiscon_req_t), &addrlen, &addroff);
1121 		if (mp == NULL)
1122 			return (ENOMEM);
1123 		ill_dlpi_send(ill, mp);
1124 	}
1125 
1126 	mutex_enter(&ill->ill_lock);
1127 	ill->ill_join_allmulti = B_TRUE;
1128 	mutex_exit(&ill->ill_lock);
1129 	return (0);
1130 }
1131 
1132 /*
1133  * Make the driver stop passing up all multicast packets
1134  *
1135  * With ill groups, we need to nominate some other ill as
1136  * this ipif->ipif_ill is leaving the group.
1137  */
1138 int
1139 ip_leave_allmulti(ipif_t *ipif)
1140 {
1141 	ill_t	*ill = ipif->ipif_ill;
1142 	mblk_t	*mp;
1143 	uint32_t	addrlen, addroff;
1144 
1145 	ASSERT(IAM_WRITER_IPIF(ipif));
1146 
1147 	if (!ill->ill_dl_up) {
1148 		/*
1149 		 * Nobody there. All multicast addresses will be re-joined
1150 		 * when we get the DL_BIND_ACK bringing the interface up.
1151 		 */
1152 		return (0);
1153 	}
1154 
1155 	ASSERT(ill->ill_join_allmulti);
1156 
1157 	/*
1158 	 * Create a DL_PROMISCOFF_REQ message and send it directly to
1159 	 * the DLPI provider.  We don't need to do this for certain
1160 	 * media types for which we never need to turn promiscuous
1161 	 * mode on.
1162 	 */
1163 	if ((ill->ill_net_type == IRE_IF_RESOLVER) &&
1164 	    !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) {
1165 		mp = ill_create_dl(ill, DL_PROMISCOFF_REQ,
1166 		    sizeof (dl_promiscoff_req_t), &addrlen, &addroff);
1167 		if (mp == NULL)
1168 			return (ENOMEM);
1169 		ill_dlpi_send(ill, mp);
1170 	}
1171 
1172 	mutex_enter(&ill->ill_lock);
1173 	ill->ill_join_allmulti = B_FALSE;
1174 	mutex_exit(&ill->ill_lock);
1175 	return (0);
1176 }
1177 
1178 /*
1179  * Copy mp_orig and pass it in as a local message.
1180  */
1181 void
1182 ip_multicast_loopback(queue_t *q, ill_t *ill, mblk_t *mp_orig, int fanout_flags,
1183     zoneid_t zoneid)
1184 {
1185 	mblk_t	*mp;
1186 	mblk_t	*ipsec_mp;
1187 	ipha_t	*iph;
1188 	ip_stack_t *ipst = ill->ill_ipst;
1189 
1190 	if (DB_TYPE(mp_orig) == M_DATA &&
1191 	    ((ipha_t *)mp_orig->b_rptr)->ipha_protocol == IPPROTO_UDP) {
1192 		uint_t hdrsz;
1193 
1194 		hdrsz = IPH_HDR_LENGTH((ipha_t *)mp_orig->b_rptr) +
1195 		    sizeof (udpha_t);
1196 		ASSERT(MBLKL(mp_orig) >= hdrsz);
1197 
1198 		if (((mp = allocb(hdrsz, BPRI_MED)) != NULL) &&
1199 		    (mp_orig = dupmsg(mp_orig)) != NULL) {
1200 			bcopy(mp_orig->b_rptr, mp->b_rptr, hdrsz);
1201 			mp->b_wptr += hdrsz;
1202 			mp->b_cont = mp_orig;
1203 			mp_orig->b_rptr += hdrsz;
1204 			if (is_system_labeled() && DB_CRED(mp_orig) != NULL)
1205 				mblk_setcred(mp, DB_CRED(mp_orig));
1206 			if (MBLKL(mp_orig) == 0) {
1207 				mp->b_cont = mp_orig->b_cont;
1208 				mp_orig->b_cont = NULL;
1209 				freeb(mp_orig);
1210 			}
1211 		} else if (mp != NULL) {
1212 			freeb(mp);
1213 			mp = NULL;
1214 		}
1215 	} else {
1216 		mp = ip_copymsg(mp_orig); /* No refcnt on ipsec_out netstack */
1217 	}
1218 
1219 	if (mp == NULL)
1220 		return;
1221 	if (DB_TYPE(mp) == M_CTL) {
1222 		ipsec_mp = mp;
1223 		mp = mp->b_cont;
1224 	} else {
1225 		ipsec_mp = mp;
1226 	}
1227 
1228 	iph = (ipha_t *)mp->b_rptr;
1229 
1230 	DTRACE_PROBE4(ip4__loopback__out__start,
1231 	    ill_t *, NULL, ill_t *, ill,
1232 	    ipha_t *, iph, mblk_t *, ipsec_mp);
1233 
1234 	FW_HOOKS(ipst->ips_ip4_loopback_out_event,
1235 	    ipst->ips_ipv4firewall_loopback_out,
1236 	    NULL, ill, iph, ipsec_mp, mp, HPE_MULTICAST, ipst);
1237 
1238 	DTRACE_PROBE1(ip4__loopback__out__end, mblk_t *, ipsec_mp);
1239 
1240 	if (ipsec_mp != NULL)
1241 		ip_wput_local(q, ill, iph, ipsec_mp, NULL,
1242 		    fanout_flags, zoneid);
1243 }
1244 
1245 static area_t	ip_aresq_template = {
1246 	AR_ENTRY_SQUERY,		/* cmd */
1247 	sizeof (area_t)+IP_ADDR_LEN,	/* name offset */
1248 	sizeof (area_t),	/* name len (filled by ill_arp_alloc) */
1249 	IP_ARP_PROTO_TYPE,		/* protocol, from arps perspective */
1250 	sizeof (area_t),			/* proto addr offset */
1251 	IP_ADDR_LEN,			/* proto addr_length */
1252 	0,				/* proto mask offset */
1253 	/* Rest is initialized when used */
1254 	0,				/* flags */
1255 	0,				/* hw addr offset */
1256 	0,				/* hw addr length */
1257 };
1258 
1259 static mblk_t *
1260 ill_create_squery(ill_t *ill, ipaddr_t ipaddr, uint32_t addrlen,
1261     uint32_t addroff, mblk_t *mp_tail)
1262 {
1263 	mblk_t	*mp;
1264 	area_t	*area;
1265 
1266 	mp = ill_arp_alloc(ill, (uchar_t *)&ip_aresq_template,
1267 	    (caddr_t)&ipaddr);
1268 	if (!mp) {
1269 		freemsg(mp_tail);
1270 		return (NULL);
1271 	}
1272 	area = (area_t *)mp->b_rptr;
1273 	area->area_hw_addr_length = addrlen;
1274 	area->area_hw_addr_offset = mp->b_wptr - mp->b_rptr + addroff;
1275 	/*
1276 	 * NOTE!
1277 	 *
1278 	 * The area_hw_addr_offset, as can be seen, does not hold the
1279 	 * actual hardware address offset. Rather, it holds the offset
1280 	 * to the hw addr in the dl_xxx_req in mp_tail, modified by
1281 	 * adding (mp->b_wptr - mp->b_rptr). This allows the function
1282 	 * mi_offset_paramc() to find the hardware address in the
1283 	 * *second* mblk (dl_xxx_req), not this mblk.
1284 	 *
1285 	 * Using mi_offset_paramc() is thus the *only* way to access
1286 	 * the dl_xxx_hw address.
1287 	 *
1288 	 * The squery hw address should *not* be accessed.
1289 	 *
1290 	 * See ar_entry_squery() in arp.c for an example of how all this works.
1291 	 */
1292 
1293 	mp->b_cont = mp_tail;
1294 	return (mp);
1295 }
1296 
1297 /*
1298  * Create a dlpi message with room for phys+sap. When we come back in
1299  * ip_wput_ctl() we will strip the sap for those primitives which
1300  * only need a physical address.
1301  */
1302 static mblk_t *
1303 ill_create_dl(ill_t *ill, uint32_t dl_primitive, uint32_t length,
1304     uint32_t *addr_lenp, uint32_t *addr_offp)
1305 {
1306 	mblk_t	*mp;
1307 	uint32_t	hw_addr_length;
1308 	char		*cp;
1309 	uint32_t	offset;
1310 	uint32_t 	size;
1311 
1312 	*addr_lenp = *addr_offp = 0;
1313 
1314 	hw_addr_length = ill->ill_phys_addr_length;
1315 	if (!hw_addr_length) {
1316 		ip0dbg(("ip_create_dl: hw addr length = 0\n"));
1317 		return (NULL);
1318 	}
1319 
1320 	size = length;
1321 	switch (dl_primitive) {
1322 	case DL_ENABMULTI_REQ:
1323 	case DL_DISABMULTI_REQ:
1324 		size += hw_addr_length;
1325 		break;
1326 	case DL_PROMISCON_REQ:
1327 	case DL_PROMISCOFF_REQ:
1328 		break;
1329 	default:
1330 		return (NULL);
1331 	}
1332 	mp = allocb(size, BPRI_HI);
1333 	if (!mp)
1334 		return (NULL);
1335 	mp->b_wptr += size;
1336 	mp->b_datap->db_type = M_PROTO;
1337 
1338 	cp = (char *)mp->b_rptr;
1339 	offset = length;
1340 
1341 	switch (dl_primitive) {
1342 	case DL_ENABMULTI_REQ: {
1343 		dl_enabmulti_req_t *dl = (dl_enabmulti_req_t *)cp;
1344 
1345 		dl->dl_primitive = dl_primitive;
1346 		dl->dl_addr_offset = offset;
1347 		*addr_lenp = dl->dl_addr_length = hw_addr_length;
1348 		*addr_offp = offset;
1349 		break;
1350 	}
1351 	case DL_DISABMULTI_REQ: {
1352 		dl_disabmulti_req_t *dl = (dl_disabmulti_req_t *)cp;
1353 
1354 		dl->dl_primitive = dl_primitive;
1355 		dl->dl_addr_offset = offset;
1356 		*addr_lenp = dl->dl_addr_length = hw_addr_length;
1357 		*addr_offp = offset;
1358 		break;
1359 	}
1360 	case DL_PROMISCON_REQ:
1361 	case DL_PROMISCOFF_REQ: {
1362 		dl_promiscon_req_t *dl = (dl_promiscon_req_t *)cp;
1363 
1364 		dl->dl_primitive = dl_primitive;
1365 		dl->dl_level = DL_PROMISC_MULTI;
1366 		break;
1367 	}
1368 	}
1369 	ip1dbg(("ill_create_dl: addr_len %d, addr_off %d\n",
1370 	    *addr_lenp, *addr_offp));
1371 	return (mp);
1372 }
1373 
1374 void
1375 ip_wput_ctl(queue_t *q, mblk_t *mp_orig)
1376 {
1377 	ill_t	*ill = (ill_t *)q->q_ptr;
1378 	mblk_t	*mp = mp_orig;
1379 	area_t	*area = (area_t *)mp->b_rptr;
1380 
1381 	/* Check that we have a AR_ENTRY_SQUERY with a tacked on mblk */
1382 	if (MBLKL(mp) < sizeof (area_t) || mp->b_cont == NULL ||
1383 	    area->area_cmd != AR_ENTRY_SQUERY) {
1384 		putnext(q, mp);
1385 		return;
1386 	}
1387 	mp = mp->b_cont;
1388 
1389 	/*
1390 	 * Update dl_addr_length and dl_addr_offset for primitives that
1391 	 * have physical addresses as opposed to full saps
1392 	 */
1393 	switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) {
1394 	case DL_ENABMULTI_REQ:
1395 		/* Track the state if this is the first enabmulti */
1396 		if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN)
1397 			ill->ill_dlpi_multicast_state = IDS_INPROGRESS;
1398 		ip1dbg(("ip_wput_ctl: ENABMULTI\n"));
1399 		break;
1400 	case DL_DISABMULTI_REQ:
1401 		ip1dbg(("ip_wput_ctl: DISABMULTI\n"));
1402 		break;
1403 	default:
1404 		ip1dbg(("ip_wput_ctl: default\n"));
1405 		break;
1406 	}
1407 	freeb(mp_orig);
1408 	ill_dlpi_send(ill, mp);
1409 }
1410 
1411 /*
1412  * Rejoin any groups which have been explicitly joined by the application (we
1413  * left all explicitly joined groups as part of ill_leave_multicast() prior to
1414  * bringing the interface down).  Note that because groups can be joined and
1415  * left while an interface is down, this may not be the same set of groups
1416  * that we left in ill_leave_multicast().
1417  */
1418 void
1419 ill_recover_multicast(ill_t *ill)
1420 {
1421 	ilm_t	*ilm;
1422 	char    addrbuf[INET6_ADDRSTRLEN];
1423 
1424 	ASSERT(IAM_WRITER_ILL(ill));
1425 
1426 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1427 		/*
1428 		 * Check how many ipif's that have members in this group -
1429 		 * if more then one we make sure that this entry is first
1430 		 * in the list.
1431 		 */
1432 		if (ilm_numentries_v6(ill, &ilm->ilm_v6addr) > 1 &&
1433 		    ilm_lookup_ill_v6(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm)
1434 			continue;
1435 		ip1dbg(("ill_recover_multicast: %s\n",
1436 		    inet_ntop(AF_INET6, &ilm->ilm_v6addr, addrbuf,
1437 		    sizeof (addrbuf))));
1438 		if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
1439 			if (ill->ill_group == NULL) {
1440 				(void) ip_join_allmulti(ill->ill_ipif);
1441 			} else {
1442 				/*
1443 				 * We don't want to join on this ill,
1444 				 * if somebody else in the group has
1445 				 * already been nominated.
1446 				 */
1447 				(void) ill_nominate_mcast_rcv(ill->ill_group);
1448 			}
1449 		} else {
1450 			(void) ip_ll_addmulti_v6(ill->ill_ipif,
1451 			    &ilm->ilm_v6addr);
1452 		}
1453 	}
1454 }
1455 
1456 /*
1457  * The opposite of ill_recover_multicast() -- leaves all multicast groups
1458  * that were explicitly joined.  Note that both these functions could be
1459  * disposed of if we enhanced ARP to allow us to handle DL_DISABMULTI_REQ
1460  * and DL_ENABMULTI_REQ messages when an interface is down.
1461  */
1462 void
1463 ill_leave_multicast(ill_t *ill)
1464 {
1465 	ilm_t	*ilm;
1466 	char    addrbuf[INET6_ADDRSTRLEN];
1467 
1468 	ASSERT(IAM_WRITER_ILL(ill));
1469 
1470 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1471 		/*
1472 		 * Check how many ipif's that have members in this group -
1473 		 * if more then one we make sure that this entry is first
1474 		 * in the list.
1475 		 */
1476 		if (ilm_numentries_v6(ill, &ilm->ilm_v6addr) > 1 &&
1477 		    ilm_lookup_ill_v6(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm)
1478 			continue;
1479 		ip1dbg(("ill_leave_multicast: %s\n",
1480 		    inet_ntop(AF_INET6, &ilm->ilm_v6addr, addrbuf,
1481 		    sizeof (addrbuf))));
1482 		if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
1483 			(void) ip_leave_allmulti(ill->ill_ipif);
1484 			/*
1485 			 * If we were part of an IPMP group, then
1486 			 * ill_handoff_responsibility() has already
1487 			 * nominated a new member (so we don't).
1488 			 */
1489 			ASSERT(ill->ill_group == NULL);
1490 		} else {
1491 			(void) ip_ll_delmulti_v6(ill->ill_ipif,
1492 			    &ilm->ilm_v6addr);
1493 		}
1494 	}
1495 }
1496 
1497 /* Find an ilm for matching the ill */
1498 ilm_t *
1499 ilm_lookup_ill(ill_t *ill, ipaddr_t group, zoneid_t zoneid)
1500 {
1501 	in6_addr_t	v6group;
1502 
1503 	ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) ||
1504 	    IAM_WRITER_ILL(ill));
1505 	/*
1506 	 * INADDR_ANY is represented as the IPv6 unspecifed addr.
1507 	 */
1508 	if (group == INADDR_ANY)
1509 		v6group = ipv6_all_zeros;
1510 	else
1511 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1512 
1513 	return (ilm_lookup_ill_v6(ill, &v6group, zoneid));
1514 }
1515 
1516 /*
1517  * Find an ilm for matching the ill. All the ilm lookup functions
1518  * ignore ILM_DELETED ilms. These have been logically deleted, and
1519  * igmp and linklayer disable multicast have been done. Only mi_free
1520  * yet to be done. Still there in the list due to ilm_walkers. The
1521  * last walker will release it.
1522  */
1523 ilm_t *
1524 ilm_lookup_ill_v6(ill_t *ill, const in6_addr_t *v6group, zoneid_t zoneid)
1525 {
1526 	ilm_t	*ilm;
1527 
1528 	ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) ||
1529 	    IAM_WRITER_ILL(ill));
1530 
1531 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1532 		if (ilm->ilm_flags & ILM_DELETED)
1533 			continue;
1534 		if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) &&
1535 		    (zoneid == ALL_ZONES || zoneid == ilm->ilm_zoneid))
1536 			return (ilm);
1537 	}
1538 	return (NULL);
1539 }
1540 
1541 ilm_t *
1542 ilm_lookup_ill_index_v6(ill_t *ill, const in6_addr_t *v6group, int index,
1543     zoneid_t zoneid)
1544 {
1545 	ilm_t *ilm;
1546 
1547 	ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) ||
1548 	    IAM_WRITER_ILL(ill));
1549 
1550 	for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
1551 		if (ilm->ilm_flags & ILM_DELETED)
1552 			continue;
1553 		if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) &&
1554 		    (zoneid == ALL_ZONES || zoneid == ilm->ilm_zoneid) &&
1555 		    ilm->ilm_orig_ifindex == index) {
1556 			return (ilm);
1557 		}
1558 	}
1559 	return (NULL);
1560 }
1561 
1562 ilm_t *
1563 ilm_lookup_ill_index_v4(ill_t *ill, ipaddr_t group, int index, zoneid_t zoneid)
1564 {
1565 	in6_addr_t	v6group;
1566 
1567 	ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) ||
1568 	    IAM_WRITER_ILL(ill));
1569 	/*
1570 	 * INADDR_ANY is represented as the IPv6 unspecifed addr.
1571 	 */
1572 	if (group == INADDR_ANY)
1573 		v6group = ipv6_all_zeros;
1574 	else
1575 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1576 
1577 	return (ilm_lookup_ill_index_v6(ill, &v6group, index, zoneid));
1578 }
1579 
1580 /*
1581  * Found an ilm for the ipif. Only needed for IPv4 which does
1582  * ipif specific socket options.
1583  */
1584 ilm_t *
1585 ilm_lookup_ipif(ipif_t *ipif, ipaddr_t group)
1586 {
1587 	ill_t	*ill = ipif->ipif_ill;
1588 	ilm_t	*ilm;
1589 	in6_addr_t	v6group;
1590 
1591 	ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) ||
1592 	    IAM_WRITER_ILL(ill));
1593 
1594 	/*
1595 	 * INADDR_ANY is represented as the IPv6 unspecifed addr.
1596 	 */
1597 	if (group == INADDR_ANY)
1598 		v6group = ipv6_all_zeros;
1599 	else
1600 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1601 
1602 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1603 		if (ilm->ilm_flags & ILM_DELETED)
1604 			continue;
1605 		if (ilm->ilm_ipif == ipif &&
1606 		    IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, &v6group))
1607 			return (ilm);
1608 	}
1609 	return (NULL);
1610 }
1611 
1612 /*
1613  * How many members on this ill?
1614  */
1615 int
1616 ilm_numentries_v6(ill_t *ill, const in6_addr_t *v6group)
1617 {
1618 	ilm_t	*ilm;
1619 	int i = 0;
1620 
1621 	ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) ||
1622 	    IAM_WRITER_ILL(ill));
1623 
1624 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1625 		if (ilm->ilm_flags & ILM_DELETED)
1626 			continue;
1627 		if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group)) {
1628 			i++;
1629 		}
1630 	}
1631 	return (i);
1632 }
1633 
1634 /* Caller guarantees that the group is not already on the list */
1635 static ilm_t *
1636 ilm_add_v6(ipif_t *ipif, const in6_addr_t *v6group, ilg_stat_t ilgstat,
1637     mcast_record_t ilg_fmode, slist_t *ilg_flist, int orig_ifindex,
1638     zoneid_t zoneid)
1639 {
1640 	ill_t	*ill = ipif->ipif_ill;
1641 	ilm_t	*ilm;
1642 	ilm_t	*ilm_cur;
1643 	ilm_t	**ilm_ptpn;
1644 
1645 	ASSERT(IAM_WRITER_IPIF(ipif));
1646 
1647 	ilm = GETSTRUCT(ilm_t, 1);
1648 	if (ilm == NULL)
1649 		return (NULL);
1650 	if (ilgstat != ILGSTAT_NONE && !SLIST_IS_EMPTY(ilg_flist)) {
1651 		ilm->ilm_filter = l_alloc();
1652 		if (ilm->ilm_filter == NULL) {
1653 			mi_free(ilm);
1654 			return (NULL);
1655 		}
1656 	}
1657 	ilm->ilm_v6addr = *v6group;
1658 	ilm->ilm_refcnt = 1;
1659 	ilm->ilm_zoneid = zoneid;
1660 	ilm->ilm_timer = INFINITY;
1661 	ilm->ilm_rtx.rtx_timer = INFINITY;
1662 
1663 	/*
1664 	 * IPv4 Multicast groups are joined using ipif.
1665 	 * IPv6 Multicast groups are joined using ill.
1666 	 */
1667 	if (ill->ill_isv6) {
1668 		ilm->ilm_ill = ill;
1669 		ilm->ilm_ipif = NULL;
1670 	} else {
1671 		ASSERT(ilm->ilm_zoneid == ipif->ipif_zoneid);
1672 		ilm->ilm_ipif = ipif;
1673 		ilm->ilm_ill = NULL;
1674 	}
1675 	ASSERT(ill->ill_ipst);
1676 	ilm->ilm_ipst = ill->ill_ipst;	/* No netstack_hold */
1677 
1678 	/*
1679 	 * After this if ilm moves to a new ill, we don't change
1680 	 * the ilm_orig_ifindex. Thus, if ill_index != ilm_orig_ifindex,
1681 	 * it has been moved. Indexes don't match even when the application
1682 	 * wants to join on a FAILED/INACTIVE interface because we choose
1683 	 * a new interface to join in. This is considered as an implicit
1684 	 * move.
1685 	 */
1686 	ilm->ilm_orig_ifindex = orig_ifindex;
1687 
1688 	ASSERT(!(ipif->ipif_state_flags & IPIF_CONDEMNED));
1689 	ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED));
1690 
1691 	/*
1692 	 * Grab lock to give consistent view to readers
1693 	 */
1694 	mutex_enter(&ill->ill_lock);
1695 	/*
1696 	 * All ilms in the same zone are contiguous in the ill_ilm list.
1697 	 * The loops in ip_proto_input() and ip_wput_local() use this to avoid
1698 	 * sending duplicates up when two applications in the same zone join the
1699 	 * same group on different logical interfaces.
1700 	 */
1701 	ilm_cur = ill->ill_ilm;
1702 	ilm_ptpn = &ill->ill_ilm;
1703 	while (ilm_cur != NULL && ilm_cur->ilm_zoneid != ilm->ilm_zoneid) {
1704 		ilm_ptpn = &ilm_cur->ilm_next;
1705 		ilm_cur = ilm_cur->ilm_next;
1706 	}
1707 	ilm->ilm_next = ilm_cur;
1708 	*ilm_ptpn = ilm;
1709 
1710 	/*
1711 	 * If we have an associated ilg, use its filter state; if not,
1712 	 * default to (EXCLUDE, NULL) and set no_ilg_cnt to track this.
1713 	 */
1714 	if (ilgstat != ILGSTAT_NONE) {
1715 		if (!SLIST_IS_EMPTY(ilg_flist))
1716 			l_copy(ilg_flist, ilm->ilm_filter);
1717 		ilm->ilm_fmode = ilg_fmode;
1718 	} else {
1719 		ilm->ilm_no_ilg_cnt = 1;
1720 		ilm->ilm_fmode = MODE_IS_EXCLUDE;
1721 	}
1722 
1723 	mutex_exit(&ill->ill_lock);
1724 	return (ilm);
1725 }
1726 
1727 void
1728 ilm_walker_cleanup(ill_t *ill)
1729 {
1730 	ilm_t	**ilmp;
1731 	ilm_t	*ilm;
1732 
1733 	ASSERT(MUTEX_HELD(&ill->ill_lock));
1734 	ASSERT(ill->ill_ilm_walker_cnt == 0);
1735 
1736 	ilmp = &ill->ill_ilm;
1737 	while (*ilmp != NULL) {
1738 		if ((*ilmp)->ilm_flags & ILM_DELETED) {
1739 			ilm = *ilmp;
1740 			*ilmp = ilm->ilm_next;
1741 			FREE_SLIST(ilm->ilm_filter);
1742 			FREE_SLIST(ilm->ilm_pendsrcs);
1743 			FREE_SLIST(ilm->ilm_rtx.rtx_allow);
1744 			FREE_SLIST(ilm->ilm_rtx.rtx_block);
1745 			ilm->ilm_ipst = NULL;
1746 			mi_free((char *)ilm);
1747 		} else {
1748 			ilmp = &(*ilmp)->ilm_next;
1749 		}
1750 	}
1751 	ill->ill_ilm_cleanup_reqd = 0;
1752 }
1753 
1754 /*
1755  * Unlink ilm and free it.
1756  */
1757 static void
1758 ilm_delete(ilm_t *ilm)
1759 {
1760 	ill_t	*ill;
1761 	ilm_t	**ilmp;
1762 
1763 	if (ilm->ilm_ipif != NULL) {
1764 		ASSERT(IAM_WRITER_IPIF(ilm->ilm_ipif));
1765 		ASSERT(ilm->ilm_ill == NULL);
1766 		ill = ilm->ilm_ipif->ipif_ill;
1767 		ASSERT(!ill->ill_isv6);
1768 	} else {
1769 		ASSERT(IAM_WRITER_ILL(ilm->ilm_ill));
1770 		ASSERT(ilm->ilm_ipif == NULL);
1771 		ill = ilm->ilm_ill;
1772 		ASSERT(ill->ill_isv6);
1773 	}
1774 	/*
1775 	 * Delete under lock protection so that readers don't stumble
1776 	 * on bad ilm_next
1777 	 */
1778 	mutex_enter(&ill->ill_lock);
1779 	if (ill->ill_ilm_walker_cnt != 0) {
1780 		ilm->ilm_flags |= ILM_DELETED;
1781 		ill->ill_ilm_cleanup_reqd = 1;
1782 		mutex_exit(&ill->ill_lock);
1783 		return;
1784 	}
1785 
1786 	for (ilmp = &ill->ill_ilm; *ilmp != ilm; ilmp = &(*ilmp)->ilm_next)
1787 				;
1788 	*ilmp = ilm->ilm_next;
1789 	mutex_exit(&ill->ill_lock);
1790 
1791 	FREE_SLIST(ilm->ilm_filter);
1792 	FREE_SLIST(ilm->ilm_pendsrcs);
1793 	FREE_SLIST(ilm->ilm_rtx.rtx_allow);
1794 	FREE_SLIST(ilm->ilm_rtx.rtx_block);
1795 	ilm->ilm_ipst = NULL;
1796 	mi_free((char *)ilm);
1797 }
1798 
1799 /* Free all ilms for this ipif */
1800 void
1801 ilm_free(ipif_t *ipif)
1802 {
1803 	ill_t	*ill = ipif->ipif_ill;
1804 	ilm_t	*ilm;
1805 	ilm_t	 *next_ilm;
1806 
1807 	ASSERT(IAM_WRITER_IPIF(ipif));
1808 
1809 	for (ilm = ill->ill_ilm; ilm; ilm = next_ilm) {
1810 		next_ilm = ilm->ilm_next;
1811 		if (ilm->ilm_ipif == ipif)
1812 			ilm_delete(ilm);
1813 	}
1814 }
1815 
1816 /*
1817  * Looks up the appropriate ipif given a v4 multicast group and interface
1818  * address.  On success, returns 0, with *ipifpp pointing to the found
1819  * struct.  On failure, returns an errno and *ipifpp is NULL.
1820  */
1821 int
1822 ip_opt_check(conn_t *connp, ipaddr_t group, ipaddr_t src, ipaddr_t ifaddr,
1823     uint_t *ifindexp, mblk_t *first_mp, ipsq_func_t func, ipif_t **ipifpp)
1824 {
1825 	ipif_t *ipif;
1826 	int err = 0;
1827 	zoneid_t zoneid;
1828 	ip_stack_t	*ipst =  connp->conn_netstack->netstack_ip;
1829 
1830 	if (!CLASSD(group) || CLASSD(src)) {
1831 		return (EINVAL);
1832 	}
1833 	*ipifpp = NULL;
1834 
1835 	zoneid = IPCL_ZONEID(connp);
1836 
1837 	ASSERT(!(ifaddr != INADDR_ANY && ifindexp != NULL && *ifindexp != 0));
1838 	if (ifaddr != INADDR_ANY) {
1839 		ipif = ipif_lookup_addr(ifaddr, NULL, zoneid,
1840 		    CONNP_TO_WQ(connp), first_mp, func, &err, ipst);
1841 		if (err != 0 && err != EINPROGRESS)
1842 			err = EADDRNOTAVAIL;
1843 	} else if (ifindexp != NULL && *ifindexp != 0) {
1844 		ipif = ipif_lookup_on_ifindex(*ifindexp, B_FALSE, zoneid,
1845 		    CONNP_TO_WQ(connp), first_mp, func, &err, ipst);
1846 	} else {
1847 		ipif = ipif_lookup_group(group, zoneid, ipst);
1848 		if (ipif == NULL)
1849 			return (EADDRNOTAVAIL);
1850 	}
1851 	if (ipif == NULL)
1852 		return (err);
1853 
1854 	*ipifpp = ipif;
1855 	return (0);
1856 }
1857 
1858 /*
1859  * Looks up the appropriate ill (or ipif if v4mapped) given an interface
1860  * index and IPv6 multicast group.  On success, returns 0, with *illpp (or
1861  * *ipifpp if v4mapped) pointing to the found struct.  On failure, returns
1862  * an errno and *illpp and *ipifpp are undefined.
1863  */
1864 int
1865 ip_opt_check_v6(conn_t *connp, const in6_addr_t *v6group, ipaddr_t *v4group,
1866     const in6_addr_t *v6src, ipaddr_t *v4src, boolean_t *isv6, int ifindex,
1867     mblk_t *first_mp, ipsq_func_t func, ill_t **illpp, ipif_t **ipifpp)
1868 {
1869 	boolean_t src_unspec;
1870 	ill_t *ill = NULL;
1871 	ipif_t *ipif = NULL;
1872 	int err;
1873 	zoneid_t zoneid = connp->conn_zoneid;
1874 	queue_t *wq = CONNP_TO_WQ(connp);
1875 	ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
1876 
1877 	src_unspec = IN6_IS_ADDR_UNSPECIFIED(v6src);
1878 
1879 	if (IN6_IS_ADDR_V4MAPPED(v6group)) {
1880 		if (!IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec)
1881 			return (EINVAL);
1882 		IN6_V4MAPPED_TO_IPADDR(v6group, *v4group);
1883 		if (src_unspec) {
1884 			*v4src = INADDR_ANY;
1885 		} else {
1886 			IN6_V4MAPPED_TO_IPADDR(v6src, *v4src);
1887 		}
1888 		if (!CLASSD(*v4group) || CLASSD(*v4src))
1889 			return (EINVAL);
1890 		*ipifpp = NULL;
1891 		*isv6 = B_FALSE;
1892 	} else {
1893 		if (IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec)
1894 			return (EINVAL);
1895 		if (!IN6_IS_ADDR_MULTICAST(v6group) ||
1896 		    IN6_IS_ADDR_MULTICAST(v6src)) {
1897 			return (EINVAL);
1898 		}
1899 		*illpp = NULL;
1900 		*isv6 = B_TRUE;
1901 	}
1902 
1903 	if (ifindex == 0) {
1904 		if (*isv6)
1905 			ill = ill_lookup_group_v6(v6group, zoneid, ipst);
1906 		else
1907 			ipif = ipif_lookup_group(*v4group, zoneid, ipst);
1908 		if (ill == NULL && ipif == NULL)
1909 			return (EADDRNOTAVAIL);
1910 	} else {
1911 		if (*isv6) {
1912 			ill = ill_lookup_on_ifindex(ifindex, B_TRUE,
1913 			    wq, first_mp, func, &err, ipst);
1914 			if (ill != NULL &&
1915 			    !ipif_lookup_zoneid(ill, zoneid, 0, NULL)) {
1916 				ill_refrele(ill);
1917 				ill = NULL;
1918 				err = EADDRNOTAVAIL;
1919 			}
1920 		} else {
1921 			ipif = ipif_lookup_on_ifindex(ifindex, B_FALSE,
1922 			    zoneid, wq, first_mp, func, &err, ipst);
1923 		}
1924 		if (ill == NULL && ipif == NULL)
1925 			return (err);
1926 	}
1927 
1928 	*ipifpp = ipif;
1929 	*illpp = ill;
1930 	return (0);
1931 }
1932 
1933 static int
1934 ip_get_srcfilter(conn_t *connp, struct group_filter *gf,
1935     struct ip_msfilter *imsf, ipaddr_t grp, ipif_t *ipif, boolean_t isv4mapped)
1936 {
1937 	ilg_t *ilg;
1938 	int i, numsrc, fmode, outsrcs;
1939 	struct sockaddr_in *sin;
1940 	struct sockaddr_in6 *sin6;
1941 	struct in_addr *addrp;
1942 	slist_t *fp;
1943 	boolean_t is_v4only_api;
1944 
1945 	mutex_enter(&connp->conn_lock);
1946 
1947 	ilg = ilg_lookup_ipif(connp, grp, ipif);
1948 	if (ilg == NULL) {
1949 		mutex_exit(&connp->conn_lock);
1950 		return (EADDRNOTAVAIL);
1951 	}
1952 
1953 	if (gf == NULL) {
1954 		ASSERT(imsf != NULL);
1955 		ASSERT(!isv4mapped);
1956 		is_v4only_api = B_TRUE;
1957 		outsrcs = imsf->imsf_numsrc;
1958 	} else {
1959 		ASSERT(imsf == NULL);
1960 		is_v4only_api = B_FALSE;
1961 		outsrcs = gf->gf_numsrc;
1962 	}
1963 
1964 	/*
1965 	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
1966 	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
1967 	 * So we need to translate here.
1968 	 */
1969 	fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ?
1970 	    MCAST_INCLUDE : MCAST_EXCLUDE;
1971 	if ((fp = ilg->ilg_filter) == NULL) {
1972 		numsrc = 0;
1973 	} else {
1974 		for (i = 0; i < outsrcs; i++) {
1975 			if (i == fp->sl_numsrc)
1976 				break;
1977 			if (isv4mapped) {
1978 				sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i];
1979 				sin6->sin6_family = AF_INET6;
1980 				sin6->sin6_addr = fp->sl_addr[i];
1981 			} else {
1982 				if (is_v4only_api) {
1983 					addrp = &imsf->imsf_slist[i];
1984 				} else {
1985 					sin = (struct sockaddr_in *)
1986 					    &gf->gf_slist[i];
1987 					sin->sin_family = AF_INET;
1988 					addrp = &sin->sin_addr;
1989 				}
1990 				IN6_V4MAPPED_TO_INADDR(&fp->sl_addr[i], addrp);
1991 			}
1992 		}
1993 		numsrc = fp->sl_numsrc;
1994 	}
1995 
1996 	if (is_v4only_api) {
1997 		imsf->imsf_numsrc = numsrc;
1998 		imsf->imsf_fmode = fmode;
1999 	} else {
2000 		gf->gf_numsrc = numsrc;
2001 		gf->gf_fmode = fmode;
2002 	}
2003 
2004 	mutex_exit(&connp->conn_lock);
2005 
2006 	return (0);
2007 }
2008 
2009 static int
2010 ip_get_srcfilter_v6(conn_t *connp, struct group_filter *gf,
2011     const struct in6_addr *grp, ill_t *ill)
2012 {
2013 	ilg_t *ilg;
2014 	int i;
2015 	struct sockaddr_storage *sl;
2016 	struct sockaddr_in6 *sin6;
2017 	slist_t *fp;
2018 
2019 	mutex_enter(&connp->conn_lock);
2020 
2021 	ilg = ilg_lookup_ill_v6(connp, grp, ill);
2022 	if (ilg == NULL) {
2023 		mutex_exit(&connp->conn_lock);
2024 		return (EADDRNOTAVAIL);
2025 	}
2026 
2027 	/*
2028 	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
2029 	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
2030 	 * So we need to translate here.
2031 	 */
2032 	gf->gf_fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ?
2033 	    MCAST_INCLUDE : MCAST_EXCLUDE;
2034 	if ((fp = ilg->ilg_filter) == NULL) {
2035 		gf->gf_numsrc = 0;
2036 	} else {
2037 		for (i = 0, sl = gf->gf_slist; i < gf->gf_numsrc; i++, sl++) {
2038 			if (i == fp->sl_numsrc)
2039 				break;
2040 			sin6 = (struct sockaddr_in6 *)sl;
2041 			sin6->sin6_family = AF_INET6;
2042 			sin6->sin6_addr = fp->sl_addr[i];
2043 		}
2044 		gf->gf_numsrc = fp->sl_numsrc;
2045 	}
2046 
2047 	mutex_exit(&connp->conn_lock);
2048 
2049 	return (0);
2050 }
2051 
2052 static int
2053 ip_set_srcfilter(conn_t *connp, struct group_filter *gf,
2054     struct ip_msfilter *imsf, ipaddr_t grp, ipif_t *ipif, boolean_t isv4mapped)
2055 {
2056 	ilg_t *ilg;
2057 	int i, err, insrcs, infmode, new_fmode;
2058 	struct sockaddr_in *sin;
2059 	struct sockaddr_in6 *sin6;
2060 	struct in_addr *addrp;
2061 	slist_t *orig_filter = NULL;
2062 	slist_t *new_filter = NULL;
2063 	mcast_record_t orig_fmode;
2064 	boolean_t leave_grp, is_v4only_api;
2065 	ilg_stat_t ilgstat;
2066 
2067 	if (gf == NULL) {
2068 		ASSERT(imsf != NULL);
2069 		ASSERT(!isv4mapped);
2070 		is_v4only_api = B_TRUE;
2071 		insrcs = imsf->imsf_numsrc;
2072 		infmode = imsf->imsf_fmode;
2073 	} else {
2074 		ASSERT(imsf == NULL);
2075 		is_v4only_api = B_FALSE;
2076 		insrcs = gf->gf_numsrc;
2077 		infmode = gf->gf_fmode;
2078 	}
2079 
2080 	/* Make sure we can handle the source list */
2081 	if (insrcs > MAX_FILTER_SIZE)
2082 		return (ENOBUFS);
2083 
2084 	/*
2085 	 * setting the filter to (INCLUDE, NULL) is treated
2086 	 * as a request to leave the group.
2087 	 */
2088 	leave_grp = (infmode == MCAST_INCLUDE && insrcs == 0);
2089 
2090 	ASSERT(IAM_WRITER_IPIF(ipif));
2091 
2092 	mutex_enter(&connp->conn_lock);
2093 
2094 	ilg = ilg_lookup_ipif(connp, grp, ipif);
2095 	if (ilg == NULL) {
2096 		/*
2097 		 * if the request was actually to leave, and we
2098 		 * didn't find an ilg, there's nothing to do.
2099 		 */
2100 		if (!leave_grp)
2101 			ilg = conn_ilg_alloc(connp);
2102 		if (leave_grp || ilg == NULL) {
2103 			mutex_exit(&connp->conn_lock);
2104 			return (leave_grp ? 0 : ENOMEM);
2105 		}
2106 		ilgstat = ILGSTAT_NEW;
2107 		IN6_IPADDR_TO_V4MAPPED(grp, &ilg->ilg_v6group);
2108 		ilg->ilg_ipif = ipif;
2109 		ilg->ilg_ill = NULL;
2110 		ilg->ilg_orig_ifindex = 0;
2111 	} else if (leave_grp) {
2112 		ilg_delete(connp, ilg, NULL);
2113 		mutex_exit(&connp->conn_lock);
2114 		(void) ip_delmulti(grp, ipif, B_FALSE, B_TRUE);
2115 		return (0);
2116 	} else {
2117 		ilgstat = ILGSTAT_CHANGE;
2118 		/* Preserve existing state in case ip_addmulti() fails */
2119 		orig_fmode = ilg->ilg_fmode;
2120 		if (ilg->ilg_filter == NULL) {
2121 			orig_filter = NULL;
2122 		} else {
2123 			orig_filter = l_alloc_copy(ilg->ilg_filter);
2124 			if (orig_filter == NULL) {
2125 				mutex_exit(&connp->conn_lock);
2126 				return (ENOMEM);
2127 			}
2128 		}
2129 	}
2130 
2131 	/*
2132 	 * Alloc buffer to copy new state into (see below) before
2133 	 * we make any changes, so we can bail if it fails.
2134 	 */
2135 	if ((new_filter = l_alloc()) == NULL) {
2136 		mutex_exit(&connp->conn_lock);
2137 		err = ENOMEM;
2138 		goto free_and_exit;
2139 	}
2140 
2141 	if (insrcs == 0) {
2142 		CLEAR_SLIST(ilg->ilg_filter);
2143 	} else {
2144 		slist_t *fp;
2145 		if (ilg->ilg_filter == NULL) {
2146 			fp = l_alloc();
2147 			if (fp == NULL) {
2148 				if (ilgstat == ILGSTAT_NEW)
2149 					ilg_delete(connp, ilg, NULL);
2150 				mutex_exit(&connp->conn_lock);
2151 				err = ENOMEM;
2152 				goto free_and_exit;
2153 			}
2154 		} else {
2155 			fp = ilg->ilg_filter;
2156 		}
2157 		for (i = 0; i < insrcs; i++) {
2158 			if (isv4mapped) {
2159 				sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i];
2160 				fp->sl_addr[i] = sin6->sin6_addr;
2161 			} else {
2162 				if (is_v4only_api) {
2163 					addrp = &imsf->imsf_slist[i];
2164 				} else {
2165 					sin = (struct sockaddr_in *)
2166 					    &gf->gf_slist[i];
2167 					addrp = &sin->sin_addr;
2168 				}
2169 				IN6_INADDR_TO_V4MAPPED(addrp, &fp->sl_addr[i]);
2170 			}
2171 		}
2172 		fp->sl_numsrc = insrcs;
2173 		ilg->ilg_filter = fp;
2174 	}
2175 	/*
2176 	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
2177 	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
2178 	 * So we need to translate here.
2179 	 */
2180 	ilg->ilg_fmode = (infmode == MCAST_INCLUDE) ?
2181 	    MODE_IS_INCLUDE : MODE_IS_EXCLUDE;
2182 
2183 	/*
2184 	 * Save copy of ilg's filter state to pass to other functions,
2185 	 * so we can release conn_lock now.
2186 	 */
2187 	new_fmode = ilg->ilg_fmode;
2188 	l_copy(ilg->ilg_filter, new_filter);
2189 
2190 	mutex_exit(&connp->conn_lock);
2191 
2192 	err = ip_addmulti(grp, ipif, ilgstat, new_fmode, new_filter);
2193 	if (err != 0) {
2194 		/*
2195 		 * Restore the original filter state, or delete the
2196 		 * newly-created ilg.  We need to look up the ilg
2197 		 * again, though, since we've not been holding the
2198 		 * conn_lock.
2199 		 */
2200 		mutex_enter(&connp->conn_lock);
2201 		ilg = ilg_lookup_ipif(connp, grp, ipif);
2202 		ASSERT(ilg != NULL);
2203 		if (ilgstat == ILGSTAT_NEW) {
2204 			ilg_delete(connp, ilg, NULL);
2205 		} else {
2206 			ilg->ilg_fmode = orig_fmode;
2207 			if (SLIST_IS_EMPTY(orig_filter)) {
2208 				CLEAR_SLIST(ilg->ilg_filter);
2209 			} else {
2210 				/*
2211 				 * We didn't free the filter, even if we
2212 				 * were trying to make the source list empty;
2213 				 * so if orig_filter isn't empty, the ilg
2214 				 * must still have a filter alloc'd.
2215 				 */
2216 				l_copy(orig_filter, ilg->ilg_filter);
2217 			}
2218 		}
2219 		mutex_exit(&connp->conn_lock);
2220 	}
2221 
2222 free_and_exit:
2223 	l_free(orig_filter);
2224 	l_free(new_filter);
2225 
2226 	return (err);
2227 }
2228 
2229 static int
2230 ip_set_srcfilter_v6(conn_t *connp, struct group_filter *gf,
2231     const struct in6_addr *grp, ill_t *ill)
2232 {
2233 	ilg_t *ilg;
2234 	int i, orig_ifindex, orig_fmode, new_fmode, err;
2235 	slist_t *orig_filter = NULL;
2236 	slist_t *new_filter = NULL;
2237 	struct sockaddr_storage *sl;
2238 	struct sockaddr_in6 *sin6;
2239 	boolean_t leave_grp;
2240 	ilg_stat_t ilgstat;
2241 
2242 	/* Make sure we can handle the source list */
2243 	if (gf->gf_numsrc > MAX_FILTER_SIZE)
2244 		return (ENOBUFS);
2245 
2246 	/*
2247 	 * setting the filter to (INCLUDE, NULL) is treated
2248 	 * as a request to leave the group.
2249 	 */
2250 	leave_grp = (gf->gf_fmode == MCAST_INCLUDE && gf->gf_numsrc == 0);
2251 
2252 	ASSERT(IAM_WRITER_ILL(ill));
2253 
2254 	/*
2255 	 * Use the ifindex to do the lookup.  We can't use the ill
2256 	 * directly because ilg_ill could point to a different ill
2257 	 * if things have moved.
2258 	 */
2259 	orig_ifindex = ill->ill_phyint->phyint_ifindex;
2260 
2261 	mutex_enter(&connp->conn_lock);
2262 	ilg = ilg_lookup_ill_index_v6(connp, grp, orig_ifindex);
2263 	if (ilg == NULL) {
2264 		/*
2265 		 * if the request was actually to leave, and we
2266 		 * didn't find an ilg, there's nothing to do.
2267 		 */
2268 		if (!leave_grp)
2269 			ilg = conn_ilg_alloc(connp);
2270 		if (leave_grp || ilg == NULL) {
2271 			mutex_exit(&connp->conn_lock);
2272 			return (leave_grp ? 0 : ENOMEM);
2273 		}
2274 		ilgstat = ILGSTAT_NEW;
2275 		ilg->ilg_v6group = *grp;
2276 		ilg->ilg_ipif = NULL;
2277 		/*
2278 		 * Choose our target ill to join on. This might be
2279 		 * different from the ill we've been given if it's
2280 		 * currently down and part of a group.
2281 		 *
2282 		 * new ill is not refheld; we are writer.
2283 		 */
2284 		ill = ip_choose_multi_ill(ill, grp);
2285 		ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED));
2286 		ilg->ilg_ill = ill;
2287 		/*
2288 		 * Remember the index that we joined on, so that we can
2289 		 * successfully delete them later on and also search for
2290 		 * duplicates if the application wants to join again.
2291 		 */
2292 		ilg->ilg_orig_ifindex = orig_ifindex;
2293 	} else if (leave_grp) {
2294 		/*
2295 		 * Use the ilg's current ill for the deletion,
2296 		 * we might have failed over.
2297 		 */
2298 		ill = ilg->ilg_ill;
2299 		ilg_delete(connp, ilg, NULL);
2300 		mutex_exit(&connp->conn_lock);
2301 		(void) ip_delmulti_v6(grp, ill, orig_ifindex,
2302 		    connp->conn_zoneid, B_FALSE, B_TRUE);
2303 		return (0);
2304 	} else {
2305 		ilgstat = ILGSTAT_CHANGE;
2306 		/*
2307 		 * The current ill might be different from the one we were
2308 		 * asked to join on (if failover has occurred); we should
2309 		 * join on the ill stored in the ilg.  The original ill
2310 		 * is noted in ilg_orig_ifindex, which matched our request.
2311 		 */
2312 		ill = ilg->ilg_ill;
2313 		/* preserve existing state in case ip_addmulti() fails */
2314 		orig_fmode = ilg->ilg_fmode;
2315 		if (ilg->ilg_filter == NULL) {
2316 			orig_filter = NULL;
2317 		} else {
2318 			orig_filter = l_alloc_copy(ilg->ilg_filter);
2319 			if (orig_filter == NULL) {
2320 				mutex_exit(&connp->conn_lock);
2321 				return (ENOMEM);
2322 			}
2323 		}
2324 	}
2325 
2326 	/*
2327 	 * Alloc buffer to copy new state into (see below) before
2328 	 * we make any changes, so we can bail if it fails.
2329 	 */
2330 	if ((new_filter = l_alloc()) == NULL) {
2331 		mutex_exit(&connp->conn_lock);
2332 		err = ENOMEM;
2333 		goto free_and_exit;
2334 	}
2335 
2336 	if (gf->gf_numsrc == 0) {
2337 		CLEAR_SLIST(ilg->ilg_filter);
2338 	} else {
2339 		slist_t *fp;
2340 		if (ilg->ilg_filter == NULL) {
2341 			fp = l_alloc();
2342 			if (fp == NULL) {
2343 				if (ilgstat == ILGSTAT_NEW)
2344 					ilg_delete(connp, ilg, NULL);
2345 				mutex_exit(&connp->conn_lock);
2346 				err = ENOMEM;
2347 				goto free_and_exit;
2348 			}
2349 		} else {
2350 			fp = ilg->ilg_filter;
2351 		}
2352 		for (i = 0, sl = gf->gf_slist; i < gf->gf_numsrc; i++, sl++) {
2353 			sin6 = (struct sockaddr_in6 *)sl;
2354 			fp->sl_addr[i] = sin6->sin6_addr;
2355 		}
2356 		fp->sl_numsrc = gf->gf_numsrc;
2357 		ilg->ilg_filter = fp;
2358 	}
2359 	/*
2360 	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
2361 	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
2362 	 * So we need to translate here.
2363 	 */
2364 	ilg->ilg_fmode = (gf->gf_fmode == MCAST_INCLUDE) ?
2365 	    MODE_IS_INCLUDE : MODE_IS_EXCLUDE;
2366 
2367 	/*
2368 	 * Save copy of ilg's filter state to pass to other functions,
2369 	 * so we can release conn_lock now.
2370 	 */
2371 	new_fmode = ilg->ilg_fmode;
2372 	l_copy(ilg->ilg_filter, new_filter);
2373 
2374 	mutex_exit(&connp->conn_lock);
2375 
2376 	err = ip_addmulti_v6(grp, ill, orig_ifindex, connp->conn_zoneid,
2377 	    ilgstat, new_fmode, new_filter);
2378 	if (err != 0) {
2379 		/*
2380 		 * Restore the original filter state, or delete the
2381 		 * newly-created ilg.  We need to look up the ilg
2382 		 * again, though, since we've not been holding the
2383 		 * conn_lock.
2384 		 */
2385 		mutex_enter(&connp->conn_lock);
2386 		ilg = ilg_lookup_ill_index_v6(connp, grp, orig_ifindex);
2387 		ASSERT(ilg != NULL);
2388 		if (ilgstat == ILGSTAT_NEW) {
2389 			ilg_delete(connp, ilg, NULL);
2390 		} else {
2391 			ilg->ilg_fmode = orig_fmode;
2392 			if (SLIST_IS_EMPTY(orig_filter)) {
2393 				CLEAR_SLIST(ilg->ilg_filter);
2394 			} else {
2395 				/*
2396 				 * We didn't free the filter, even if we
2397 				 * were trying to make the source list empty;
2398 				 * so if orig_filter isn't empty, the ilg
2399 				 * must still have a filter alloc'd.
2400 				 */
2401 				l_copy(orig_filter, ilg->ilg_filter);
2402 			}
2403 		}
2404 		mutex_exit(&connp->conn_lock);
2405 	}
2406 
2407 free_and_exit:
2408 	l_free(orig_filter);
2409 	l_free(new_filter);
2410 
2411 	return (err);
2412 }
2413 
2414 /*
2415  * Process the SIOC[GS]MSFILTER and SIOC[GS]IPMSFILTER ioctls.
2416  */
2417 /* ARGSUSED */
2418 int
2419 ip_sioctl_msfilter(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp,
2420     ip_ioctl_cmd_t *ipip, void *ifreq)
2421 {
2422 	struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
2423 	/* existence verified in ip_wput_nondata() */
2424 	mblk_t *data_mp = mp->b_cont->b_cont;
2425 	int datalen, err, cmd, minsize;
2426 	int expsize = 0;
2427 	conn_t *connp;
2428 	boolean_t isv6, is_v4only_api, getcmd;
2429 	struct sockaddr_in *gsin;
2430 	struct sockaddr_in6 *gsin6;
2431 	ipaddr_t v4grp;
2432 	in6_addr_t v6grp;
2433 	struct group_filter *gf = NULL;
2434 	struct ip_msfilter *imsf = NULL;
2435 	mblk_t *ndp;
2436 
2437 	if (data_mp->b_cont != NULL) {
2438 		if ((ndp = msgpullup(data_mp, -1)) == NULL)
2439 			return (ENOMEM);
2440 		freemsg(data_mp);
2441 		data_mp = ndp;
2442 		mp->b_cont->b_cont = data_mp;
2443 	}
2444 
2445 	cmd = iocp->ioc_cmd;
2446 	getcmd = (cmd == SIOCGIPMSFILTER || cmd == SIOCGMSFILTER);
2447 	is_v4only_api = (cmd == SIOCGIPMSFILTER || cmd == SIOCSIPMSFILTER);
2448 	minsize = (is_v4only_api) ? IP_MSFILTER_SIZE(0) : GROUP_FILTER_SIZE(0);
2449 	datalen = MBLKL(data_mp);
2450 
2451 	if (datalen < minsize)
2452 		return (EINVAL);
2453 
2454 	/*
2455 	 * now we know we have at least have the initial structure,
2456 	 * but need to check for the source list array.
2457 	 */
2458 	if (is_v4only_api) {
2459 		imsf = (struct ip_msfilter *)data_mp->b_rptr;
2460 		isv6 = B_FALSE;
2461 		expsize = IP_MSFILTER_SIZE(imsf->imsf_numsrc);
2462 	} else {
2463 		gf = (struct group_filter *)data_mp->b_rptr;
2464 		if (gf->gf_group.ss_family == AF_INET6) {
2465 			gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2466 			isv6 = !(IN6_IS_ADDR_V4MAPPED(&gsin6->sin6_addr));
2467 		} else {
2468 			isv6 = B_FALSE;
2469 		}
2470 		expsize = GROUP_FILTER_SIZE(gf->gf_numsrc);
2471 	}
2472 	if (datalen < expsize)
2473 		return (EINVAL);
2474 
2475 	connp = Q_TO_CONN(q);
2476 
2477 	/* operation not supported on the virtual network interface */
2478 	if (IS_VNI(ipif->ipif_ill))
2479 		return (EINVAL);
2480 
2481 	if (isv6) {
2482 		ill_t *ill = ipif->ipif_ill;
2483 		ill_refhold(ill);
2484 
2485 		gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2486 		v6grp = gsin6->sin6_addr;
2487 		if (getcmd)
2488 			err = ip_get_srcfilter_v6(connp, gf, &v6grp, ill);
2489 		else
2490 			err = ip_set_srcfilter_v6(connp, gf, &v6grp, ill);
2491 
2492 		ill_refrele(ill);
2493 	} else {
2494 		boolean_t isv4mapped = B_FALSE;
2495 		if (is_v4only_api) {
2496 			v4grp = (ipaddr_t)imsf->imsf_multiaddr.s_addr;
2497 		} else {
2498 			if (gf->gf_group.ss_family == AF_INET) {
2499 				gsin = (struct sockaddr_in *)&gf->gf_group;
2500 				v4grp = (ipaddr_t)gsin->sin_addr.s_addr;
2501 			} else {
2502 				gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2503 				IN6_V4MAPPED_TO_IPADDR(&gsin6->sin6_addr,
2504 				    v4grp);
2505 				isv4mapped = B_TRUE;
2506 			}
2507 		}
2508 		if (getcmd)
2509 			err = ip_get_srcfilter(connp, gf, imsf, v4grp, ipif,
2510 			    isv4mapped);
2511 		else
2512 			err = ip_set_srcfilter(connp, gf, imsf, v4grp, ipif,
2513 			    isv4mapped);
2514 	}
2515 
2516 	return (err);
2517 }
2518 
2519 /*
2520  * Finds the ipif based on information in the ioctl headers.  Needed to make
2521  * ip_process_ioctl() happy (it needs to know the ipif for IPI_WR-flagged
2522  * ioctls prior to calling the ioctl's handler function).
2523  */
2524 int
2525 ip_extract_msfilter(queue_t *q, mblk_t *mp, const ip_ioctl_cmd_t *ipip,
2526     cmd_info_t *ci, ipsq_func_t func)
2527 {
2528 	int cmd = ipip->ipi_cmd;
2529 	int err = 0;
2530 	conn_t *connp;
2531 	ipif_t *ipif;
2532 	/* caller has verified this mblk exists */
2533 	char *dbuf = (char *)mp->b_cont->b_cont->b_rptr;
2534 	struct ip_msfilter *imsf;
2535 	struct group_filter *gf;
2536 	ipaddr_t v4addr, v4grp;
2537 	in6_addr_t v6grp;
2538 	uint32_t index;
2539 	zoneid_t zoneid;
2540 	ip_stack_t *ipst;
2541 
2542 	connp = Q_TO_CONN(q);
2543 	zoneid = connp->conn_zoneid;
2544 	ipst = connp->conn_netstack->netstack_ip;
2545 
2546 	/* don't allow multicast operations on a tcp conn */
2547 	if (IPCL_IS_TCP(connp))
2548 		return (ENOPROTOOPT);
2549 
2550 	if (cmd == SIOCSIPMSFILTER || cmd == SIOCGIPMSFILTER) {
2551 		/* don't allow v4-specific ioctls on v6 socket */
2552 		if (connp->conn_af_isv6)
2553 			return (EAFNOSUPPORT);
2554 
2555 		imsf = (struct ip_msfilter *)dbuf;
2556 		v4addr = imsf->imsf_interface.s_addr;
2557 		v4grp = imsf->imsf_multiaddr.s_addr;
2558 		if (v4addr == INADDR_ANY) {
2559 			ipif = ipif_lookup_group(v4grp, zoneid, ipst);
2560 			if (ipif == NULL)
2561 				err = EADDRNOTAVAIL;
2562 		} else {
2563 			ipif = ipif_lookup_addr(v4addr, NULL, zoneid, q, mp,
2564 			    func, &err, ipst);
2565 		}
2566 	} else {
2567 		boolean_t isv6 = B_FALSE;
2568 		gf = (struct group_filter *)dbuf;
2569 		index = gf->gf_interface;
2570 		if (gf->gf_group.ss_family == AF_INET6) {
2571 			struct sockaddr_in6 *sin6;
2572 			sin6 = (struct sockaddr_in6 *)&gf->gf_group;
2573 			v6grp = sin6->sin6_addr;
2574 			if (IN6_IS_ADDR_V4MAPPED(&v6grp))
2575 				IN6_V4MAPPED_TO_IPADDR(&v6grp, v4grp);
2576 			else
2577 				isv6 = B_TRUE;
2578 		} else if (gf->gf_group.ss_family == AF_INET) {
2579 			struct sockaddr_in *sin;
2580 			sin = (struct sockaddr_in *)&gf->gf_group;
2581 			v4grp = sin->sin_addr.s_addr;
2582 		} else {
2583 			return (EAFNOSUPPORT);
2584 		}
2585 		if (index == 0) {
2586 			if (isv6) {
2587 				ipif = ipif_lookup_group_v6(&v6grp, zoneid,
2588 				    ipst);
2589 			} else {
2590 				ipif = ipif_lookup_group(v4grp, zoneid, ipst);
2591 			}
2592 			if (ipif == NULL)
2593 				err = EADDRNOTAVAIL;
2594 		} else {
2595 			ipif = ipif_lookup_on_ifindex(index, isv6, zoneid,
2596 			    q, mp, func, &err, ipst);
2597 		}
2598 	}
2599 
2600 	ci->ci_ipif = ipif;
2601 	return (err);
2602 }
2603 
2604 /*
2605  * The structures used for the SIOC*MSFILTER ioctls usually must be copied
2606  * in in two stages, as the first copyin tells us the size of the attached
2607  * source buffer.  This function is called by ip_wput_nondata() after the
2608  * first copyin has completed; it figures out how big the second stage
2609  * needs to be, and kicks it off.
2610  *
2611  * In some cases (numsrc < 2), the second copyin is not needed as the
2612  * first one gets a complete structure containing 1 source addr.
2613  *
2614  * The function returns 0 if a second copyin has been started (i.e. there's
2615  * no more work to be done right now), or 1 if the second copyin is not
2616  * needed and ip_wput_nondata() can continue its processing.
2617  */
2618 int
2619 ip_copyin_msfilter(queue_t *q, mblk_t *mp)
2620 {
2621 	struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
2622 	int cmd = iocp->ioc_cmd;
2623 	/* validity of this checked in ip_wput_nondata() */
2624 	mblk_t *mp1 = mp->b_cont->b_cont;
2625 	int copysize = 0;
2626 	int offset;
2627 
2628 	if (cmd == SIOCSMSFILTER || cmd == SIOCGMSFILTER) {
2629 		struct group_filter *gf = (struct group_filter *)mp1->b_rptr;
2630 		if (gf->gf_numsrc >= 2) {
2631 			offset = sizeof (struct group_filter);
2632 			copysize = GROUP_FILTER_SIZE(gf->gf_numsrc) - offset;
2633 		}
2634 	} else {
2635 		struct ip_msfilter *imsf = (struct ip_msfilter *)mp1->b_rptr;
2636 		if (imsf->imsf_numsrc >= 2) {
2637 			offset = sizeof (struct ip_msfilter);
2638 			copysize = IP_MSFILTER_SIZE(imsf->imsf_numsrc) - offset;
2639 		}
2640 	}
2641 	if (copysize > 0) {
2642 		mi_copyin_n(q, mp, offset, copysize);
2643 		return (0);
2644 	}
2645 	return (1);
2646 }
2647 
2648 /*
2649  * Handle the following optmgmt:
2650  *	IP_ADD_MEMBERSHIP		must not have joined already
2651  *	MCAST_JOIN_GROUP		must not have joined already
2652  *	IP_BLOCK_SOURCE			must have joined already
2653  *	MCAST_BLOCK_SOURCE		must have joined already
2654  *	IP_JOIN_SOURCE_GROUP		may have joined already
2655  *	MCAST_JOIN_SOURCE_GROUP		may have joined already
2656  *
2657  * fmode and src parameters may be used to determine which option is
2658  * being set, as follows (the IP_* and MCAST_* versions of each option
2659  * are functionally equivalent):
2660  *	opt			fmode			src
2661  *	IP_ADD_MEMBERSHIP	MODE_IS_EXCLUDE		INADDR_ANY
2662  *	MCAST_JOIN_GROUP	MODE_IS_EXCLUDE		INADDR_ANY
2663  *	IP_BLOCK_SOURCE		MODE_IS_EXCLUDE		v4 addr
2664  *	MCAST_BLOCK_SOURCE	MODE_IS_EXCLUDE		v4 addr
2665  *	IP_JOIN_SOURCE_GROUP	MODE_IS_INCLUDE		v4 addr
2666  *	MCAST_JOIN_SOURCE_GROUP	MODE_IS_INCLUDE		v4 addr
2667  *
2668  * Changing the filter mode is not allowed; if a matching ilg already
2669  * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
2670  *
2671  * Verifies that there is a source address of appropriate scope for
2672  * the group; if not, EADDRNOTAVAIL is returned.
2673  *
2674  * The interface to be used may be identified by an address or by an
2675  * index.  A pointer to the index is passed; if it is NULL, use the
2676  * address, otherwise, use the index.
2677  */
2678 int
2679 ip_opt_add_group(conn_t *connp, boolean_t checkonly, ipaddr_t group,
2680     ipaddr_t ifaddr, uint_t *ifindexp, mcast_record_t fmode, ipaddr_t src,
2681     mblk_t *first_mp)
2682 {
2683 	ipif_t	*ipif;
2684 	ipsq_t	*ipsq;
2685 	int err = 0;
2686 	ill_t	*ill;
2687 
2688 	err = ip_opt_check(connp, group, src, ifaddr, ifindexp, first_mp,
2689 	    ip_restart_optmgmt, &ipif);
2690 	if (err != 0) {
2691 		if (err != EINPROGRESS) {
2692 			ip1dbg(("ip_opt_add_group: no ipif for group 0x%x, "
2693 			    "ifaddr 0x%x, ifindex %d\n", ntohl(group),
2694 			    ntohl(ifaddr), (ifindexp == NULL) ? 0 : *ifindexp));
2695 		}
2696 		return (err);
2697 	}
2698 	ASSERT(ipif != NULL);
2699 
2700 	ill = ipif->ipif_ill;
2701 	/* Operation not supported on a virtual network interface */
2702 	if (IS_VNI(ill)) {
2703 		ipif_refrele(ipif);
2704 		return (EINVAL);
2705 	}
2706 
2707 	if (checkonly) {
2708 		/*
2709 		 * do not do operation, just pretend to - new T_CHECK
2710 		 * semantics. The error return case above if encountered
2711 		 * considered a good enough "check" here.
2712 		 */
2713 		ipif_refrele(ipif);
2714 		return (0);
2715 	}
2716 
2717 	IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, ipsq,
2718 	    NEW_OP);
2719 
2720 	/* unspecified source addr => no source filtering */
2721 	err = ilg_add(connp, group, ipif, fmode, src);
2722 
2723 	IPSQ_EXIT(ipsq);
2724 
2725 	ipif_refrele(ipif);
2726 	return (err);
2727 }
2728 
2729 /*
2730  * Handle the following optmgmt:
2731  *	IPV6_JOIN_GROUP			must not have joined already
2732  *	MCAST_JOIN_GROUP		must not have joined already
2733  *	MCAST_BLOCK_SOURCE		must have joined already
2734  *	MCAST_JOIN_SOURCE_GROUP		may have joined already
2735  *
2736  * fmode and src parameters may be used to determine which option is
2737  * being set, as follows (IPV6_JOIN_GROUP and MCAST_JOIN_GROUP options
2738  * are functionally equivalent):
2739  *	opt			fmode			v6src
2740  *	IPV6_JOIN_GROUP		MODE_IS_EXCLUDE		unspecified
2741  *	MCAST_JOIN_GROUP	MODE_IS_EXCLUDE		unspecified
2742  *	MCAST_BLOCK_SOURCE	MODE_IS_EXCLUDE		v6 addr
2743  *	MCAST_JOIN_SOURCE_GROUP	MODE_IS_INCLUDE		v6 addr
2744  *
2745  * Changing the filter mode is not allowed; if a matching ilg already
2746  * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
2747  *
2748  * Verifies that there is a source address of appropriate scope for
2749  * the group; if not, EADDRNOTAVAIL is returned.
2750  *
2751  * Handles IPv4-mapped IPv6 multicast addresses by associating them
2752  * with the link-local ipif.  Assumes that if v6group is v4-mapped,
2753  * v6src is also v4-mapped.
2754  */
2755 int
2756 ip_opt_add_group_v6(conn_t *connp, boolean_t checkonly,
2757     const in6_addr_t *v6group, int ifindex, mcast_record_t fmode,
2758     const in6_addr_t *v6src, mblk_t *first_mp)
2759 {
2760 	ill_t *ill;
2761 	ipif_t	*ipif;
2762 	char buf[INET6_ADDRSTRLEN];
2763 	ipaddr_t v4group, v4src;
2764 	boolean_t isv6;
2765 	ipsq_t	*ipsq;
2766 	int	err;
2767 
2768 	err = ip_opt_check_v6(connp, v6group, &v4group, v6src, &v4src, &isv6,
2769 	    ifindex, first_mp, ip_restart_optmgmt, &ill, &ipif);
2770 	if (err != 0) {
2771 		if (err != EINPROGRESS) {
2772 			ip1dbg(("ip_opt_add_group_v6: no ill for group %s/"
2773 			    "index %d\n", inet_ntop(AF_INET6, v6group, buf,
2774 			    sizeof (buf)), ifindex));
2775 		}
2776 		return (err);
2777 	}
2778 	ASSERT((!isv6 && ipif != NULL) || (isv6 && ill != NULL));
2779 
2780 	/* operation is not supported on the virtual network interface */
2781 	if (isv6) {
2782 		if (IS_VNI(ill)) {
2783 			ill_refrele(ill);
2784 			return (EINVAL);
2785 		}
2786 	} else {
2787 		if (IS_VNI(ipif->ipif_ill)) {
2788 			ipif_refrele(ipif);
2789 			return (EINVAL);
2790 		}
2791 	}
2792 
2793 	if (checkonly) {
2794 		/*
2795 		 * do not do operation, just pretend to - new T_CHECK
2796 		 * semantics. The error return case above if encountered
2797 		 * considered a good enough "check" here.
2798 		 */
2799 		if (isv6)
2800 			ill_refrele(ill);
2801 		else
2802 			ipif_refrele(ipif);
2803 		return (0);
2804 	}
2805 
2806 	if (!isv6) {
2807 		IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt,
2808 		    ipsq, NEW_OP);
2809 		err = ilg_add(connp, v4group, ipif, fmode, v4src);
2810 		IPSQ_EXIT(ipsq);
2811 		ipif_refrele(ipif);
2812 	} else {
2813 		IPSQ_ENTER_ILL(ill, connp, first_mp, ip_restart_optmgmt,
2814 		    ipsq, NEW_OP);
2815 		err = ilg_add_v6(connp, v6group, ill, fmode, v6src);
2816 		IPSQ_EXIT(ipsq);
2817 		ill_refrele(ill);
2818 	}
2819 
2820 	return (err);
2821 }
2822 
2823 static int
2824 ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group, ipif_t *ipif,
2825     mcast_record_t fmode, ipaddr_t src)
2826 {
2827 	ilg_t	*ilg;
2828 	in6_addr_t v6src;
2829 	boolean_t leaving = B_FALSE;
2830 
2831 	ASSERT(IAM_WRITER_IPIF(ipif));
2832 
2833 	/*
2834 	 * The ilg is valid only while we hold the conn lock. Once we drop
2835 	 * the lock, another thread can locate another ilg on this connp,
2836 	 * but on a different ipif, and delete it, and cause the ilg array
2837 	 * to be reallocated and copied. Hence do the ilg_delete before
2838 	 * dropping the lock.
2839 	 */
2840 	mutex_enter(&connp->conn_lock);
2841 	ilg = ilg_lookup_ipif(connp, group, ipif);
2842 	if ((ilg == NULL) || (ilg->ilg_flags & ILG_DELETED)) {
2843 		mutex_exit(&connp->conn_lock);
2844 		return (EADDRNOTAVAIL);
2845 	}
2846 
2847 	/*
2848 	 * Decide if we're actually deleting the ilg or just removing a
2849 	 * source filter address; if just removing an addr, make sure we
2850 	 * aren't trying to change the filter mode, and that the addr is
2851 	 * actually in our filter list already.  If we're removing the
2852 	 * last src in an include list, just delete the ilg.
2853 	 */
2854 	if (src == INADDR_ANY) {
2855 		v6src = ipv6_all_zeros;
2856 		leaving = B_TRUE;
2857 	} else {
2858 		int err = 0;
2859 		IN6_IPADDR_TO_V4MAPPED(src, &v6src);
2860 		if (fmode != ilg->ilg_fmode)
2861 			err = EINVAL;
2862 		else if (ilg->ilg_filter == NULL ||
2863 		    !list_has_addr(ilg->ilg_filter, &v6src))
2864 			err = EADDRNOTAVAIL;
2865 		if (err != 0) {
2866 			mutex_exit(&connp->conn_lock);
2867 			return (err);
2868 		}
2869 		if (fmode == MODE_IS_INCLUDE &&
2870 		    ilg->ilg_filter->sl_numsrc == 1) {
2871 			v6src = ipv6_all_zeros;
2872 			leaving = B_TRUE;
2873 		}
2874 	}
2875 
2876 	ilg_delete(connp, ilg, &v6src);
2877 	mutex_exit(&connp->conn_lock);
2878 
2879 	(void) ip_delmulti(group, ipif, B_FALSE, leaving);
2880 	return (0);
2881 }
2882 
2883 static int
2884 ip_opt_delete_group_excl_v6(conn_t *connp, const in6_addr_t *v6group,
2885     ill_t *ill, mcast_record_t fmode, const in6_addr_t *v6src)
2886 {
2887 	ilg_t	*ilg;
2888 	ill_t	*ilg_ill;
2889 	uint_t	ilg_orig_ifindex;
2890 	boolean_t leaving = B_TRUE;
2891 
2892 	ASSERT(IAM_WRITER_ILL(ill));
2893 
2894 	/*
2895 	 * Use the index that we originally used to join. We can't
2896 	 * use the ill directly because ilg_ill could point to
2897 	 * a new ill if things have moved.
2898 	 */
2899 	mutex_enter(&connp->conn_lock);
2900 	ilg = ilg_lookup_ill_index_v6(connp, v6group,
2901 	    ill->ill_phyint->phyint_ifindex);
2902 	if ((ilg == NULL) || (ilg->ilg_flags & ILG_DELETED)) {
2903 		mutex_exit(&connp->conn_lock);
2904 		return (EADDRNOTAVAIL);
2905 	}
2906 
2907 	/*
2908 	 * Decide if we're actually deleting the ilg or just removing a
2909 	 * source filter address; if just removing an addr, make sure we
2910 	 * aren't trying to change the filter mode, and that the addr is
2911 	 * actually in our filter list already.  If we're removing the
2912 	 * last src in an include list, just delete the ilg.
2913 	 */
2914 	if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) {
2915 		int err = 0;
2916 		if (fmode != ilg->ilg_fmode)
2917 			err = EINVAL;
2918 		else if (ilg->ilg_filter == NULL ||
2919 		    !list_has_addr(ilg->ilg_filter, v6src))
2920 			err = EADDRNOTAVAIL;
2921 		if (err != 0) {
2922 			mutex_exit(&connp->conn_lock);
2923 			return (err);
2924 		}
2925 		if (fmode == MODE_IS_INCLUDE &&
2926 		    ilg->ilg_filter->sl_numsrc == 1)
2927 			v6src = NULL;
2928 		else
2929 			leaving = B_FALSE;
2930 	}
2931 
2932 	ilg_ill = ilg->ilg_ill;
2933 	ilg_orig_ifindex = ilg->ilg_orig_ifindex;
2934 	ilg_delete(connp, ilg, v6src);
2935 	mutex_exit(&connp->conn_lock);
2936 	(void) ip_delmulti_v6(v6group, ilg_ill, ilg_orig_ifindex,
2937 	    connp->conn_zoneid, B_FALSE, leaving);
2938 
2939 	return (0);
2940 }
2941 
2942 /*
2943  * Handle the following optmgmt:
2944  *	IP_DROP_MEMBERSHIP		will leave
2945  *	MCAST_LEAVE_GROUP		will leave
2946  *	IP_UNBLOCK_SOURCE		will not leave
2947  *	MCAST_UNBLOCK_SOURCE		will not leave
2948  *	IP_LEAVE_SOURCE_GROUP		may leave (if leaving last source)
2949  *	MCAST_LEAVE_SOURCE_GROUP	may leave (if leaving last source)
2950  *
2951  * fmode and src parameters may be used to determine which option is
2952  * being set, as follows (the IP_* and MCAST_* versions of each option
2953  * are functionally equivalent):
2954  *	opt			 fmode			src
2955  *	IP_DROP_MEMBERSHIP	 MODE_IS_INCLUDE	INADDR_ANY
2956  *	MCAST_LEAVE_GROUP	 MODE_IS_INCLUDE	INADDR_ANY
2957  *	IP_UNBLOCK_SOURCE	 MODE_IS_EXCLUDE	v4 addr
2958  *	MCAST_UNBLOCK_SOURCE	 MODE_IS_EXCLUDE	v4 addr
2959  *	IP_LEAVE_SOURCE_GROUP	 MODE_IS_INCLUDE	v4 addr
2960  *	MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE	v4 addr
2961  *
2962  * Changing the filter mode is not allowed; if a matching ilg already
2963  * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
2964  *
2965  * The interface to be used may be identified by an address or by an
2966  * index.  A pointer to the index is passed; if it is NULL, use the
2967  * address, otherwise, use the index.
2968  */
2969 int
2970 ip_opt_delete_group(conn_t *connp, boolean_t checkonly, ipaddr_t group,
2971     ipaddr_t ifaddr, uint_t *ifindexp, mcast_record_t fmode, ipaddr_t src,
2972     mblk_t *first_mp)
2973 {
2974 	ipif_t	*ipif;
2975 	ipsq_t	*ipsq;
2976 	int	err;
2977 	ill_t	*ill;
2978 
2979 	err = ip_opt_check(connp, group, src, ifaddr, ifindexp, first_mp,
2980 	    ip_restart_optmgmt, &ipif);
2981 	if (err != 0) {
2982 		if (err != EINPROGRESS) {
2983 			ip1dbg(("ip_opt_delete_group: no ipif for group "
2984 			    "0x%x, ifaddr 0x%x\n",
2985 			    (int)ntohl(group), (int)ntohl(ifaddr)));
2986 		}
2987 		return (err);
2988 	}
2989 	ASSERT(ipif != NULL);
2990 
2991 	ill = ipif->ipif_ill;
2992 	/* Operation not supported on a virtual network interface */
2993 	if (IS_VNI(ill)) {
2994 		ipif_refrele(ipif);
2995 		return (EINVAL);
2996 	}
2997 
2998 	if (checkonly) {
2999 		/*
3000 		 * do not do operation, just pretend to - new T_CHECK
3001 		 * semantics. The error return case above if encountered
3002 		 * considered a good enough "check" here.
3003 		 */
3004 		ipif_refrele(ipif);
3005 		return (0);
3006 	}
3007 
3008 	IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, ipsq,
3009 	    NEW_OP);
3010 	err = ip_opt_delete_group_excl(connp, group, ipif, fmode, src);
3011 	IPSQ_EXIT(ipsq);
3012 
3013 	ipif_refrele(ipif);
3014 	return (err);
3015 }
3016 
3017 /*
3018  * Handle the following optmgmt:
3019  *	IPV6_LEAVE_GROUP		will leave
3020  *	MCAST_LEAVE_GROUP		will leave
3021  *	MCAST_UNBLOCK_SOURCE		will not leave
3022  *	MCAST_LEAVE_SOURCE_GROUP	may leave (if leaving last source)
3023  *
3024  * fmode and src parameters may be used to determine which option is
3025  * being set, as follows (IPV6_LEAVE_GROUP and MCAST_LEAVE_GROUP options
3026  * are functionally equivalent):
3027  *	opt			 fmode			v6src
3028  *	IPV6_LEAVE_GROUP	 MODE_IS_INCLUDE	unspecified
3029  *	MCAST_LEAVE_GROUP	 MODE_IS_INCLUDE	unspecified
3030  *	MCAST_UNBLOCK_SOURCE	 MODE_IS_EXCLUDE	v6 addr
3031  *	MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE	v6 addr
3032  *
3033  * Changing the filter mode is not allowed; if a matching ilg already
3034  * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
3035  *
3036  * Handles IPv4-mapped IPv6 multicast addresses by associating them
3037  * with the link-local ipif.  Assumes that if v6group is v4-mapped,
3038  * v6src is also v4-mapped.
3039  */
3040 int
3041 ip_opt_delete_group_v6(conn_t *connp, boolean_t checkonly,
3042     const in6_addr_t *v6group, int ifindex, mcast_record_t fmode,
3043     const in6_addr_t *v6src, mblk_t *first_mp)
3044 {
3045 	ill_t *ill;
3046 	ipif_t	*ipif;
3047 	char	buf[INET6_ADDRSTRLEN];
3048 	ipaddr_t v4group, v4src;
3049 	boolean_t isv6;
3050 	ipsq_t	*ipsq;
3051 	int	err;
3052 
3053 	err = ip_opt_check_v6(connp, v6group, &v4group, v6src, &v4src, &isv6,
3054 	    ifindex, first_mp, ip_restart_optmgmt, &ill, &ipif);
3055 	if (err != 0) {
3056 		if (err != EINPROGRESS) {
3057 			ip1dbg(("ip_opt_delete_group_v6: no ill for group %s/"
3058 			    "index %d\n", inet_ntop(AF_INET6, v6group, buf,
3059 			    sizeof (buf)), ifindex));
3060 		}
3061 		return (err);
3062 	}
3063 	ASSERT((isv6 && ill != NULL) || (!isv6 && ipif != NULL));
3064 
3065 	/* operation is not supported on the virtual network interface */
3066 	if (isv6) {
3067 		if (IS_VNI(ill)) {
3068 			ill_refrele(ill);
3069 			return (EINVAL);
3070 		}
3071 	} else {
3072 		if (IS_VNI(ipif->ipif_ill)) {
3073 			ipif_refrele(ipif);
3074 			return (EINVAL);
3075 		}
3076 	}
3077 
3078 	if (checkonly) {
3079 		/*
3080 		 * do not do operation, just pretend to - new T_CHECK
3081 		 * semantics. The error return case above if encountered
3082 		 * considered a good enough "check" here.
3083 		 */
3084 		if (isv6)
3085 			ill_refrele(ill);
3086 		else
3087 			ipif_refrele(ipif);
3088 		return (0);
3089 	}
3090 
3091 	if (!isv6) {
3092 		IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt,
3093 		    ipsq, NEW_OP);
3094 		err = ip_opt_delete_group_excl(connp, v4group, ipif, fmode,
3095 		    v4src);
3096 		IPSQ_EXIT(ipsq);
3097 		ipif_refrele(ipif);
3098 	} else {
3099 		IPSQ_ENTER_ILL(ill, connp, first_mp, ip_restart_optmgmt,
3100 		    ipsq, NEW_OP);
3101 		err = ip_opt_delete_group_excl_v6(connp, v6group, ill, fmode,
3102 		    v6src);
3103 		IPSQ_EXIT(ipsq);
3104 		ill_refrele(ill);
3105 	}
3106 
3107 	return (err);
3108 }
3109 
3110 /*
3111  * Group mgmt for upper conn that passes things down
3112  * to the interface multicast list (and DLPI)
3113  * These routines can handle new style options that specify an interface name
3114  * as opposed to an interface address (needed for general handling of
3115  * unnumbered interfaces.)
3116  */
3117 
3118 /*
3119  * Add a group to an upper conn group data structure and pass things down
3120  * to the interface multicast list (and DLPI)
3121  */
3122 static int
3123 ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif, mcast_record_t fmode,
3124     ipaddr_t src)
3125 {
3126 	int	error = 0;
3127 	ill_t	*ill;
3128 	ilg_t	*ilg;
3129 	ilg_stat_t ilgstat;
3130 	slist_t	*new_filter = NULL;
3131 	int	new_fmode;
3132 
3133 	ASSERT(IAM_WRITER_IPIF(ipif));
3134 
3135 	ill = ipif->ipif_ill;
3136 
3137 	if (!(ill->ill_flags & ILLF_MULTICAST))
3138 		return (EADDRNOTAVAIL);
3139 
3140 	/*
3141 	 * conn_ilg[] is protected by conn_lock. Need to hold the conn_lock
3142 	 * to walk the conn_ilg[] list in ilg_lookup_ipif(); also needed to
3143 	 * serialize 2 threads doing join (sock, group1, hme0:0) and
3144 	 * (sock, group2, hme1:0) where hme0 and hme1 map to different ipsqs,
3145 	 * but both operations happen on the same conn.
3146 	 */
3147 	mutex_enter(&connp->conn_lock);
3148 	ilg = ilg_lookup_ipif(connp, group, ipif);
3149 
3150 	/*
3151 	 * Depending on the option we're handling, may or may not be okay
3152 	 * if group has already been added.  Figure out our rules based
3153 	 * on fmode and src params.  Also make sure there's enough room
3154 	 * in the filter if we're adding a source to an existing filter.
3155 	 */
3156 	if (src == INADDR_ANY) {
3157 		/* we're joining for all sources, must not have joined */
3158 		if (ilg != NULL)
3159 			error = EADDRINUSE;
3160 	} else {
3161 		if (fmode == MODE_IS_EXCLUDE) {
3162 			/* (excl {addr}) => block source, must have joined */
3163 			if (ilg == NULL)
3164 				error = EADDRNOTAVAIL;
3165 		}
3166 		/* (incl {addr}) => join source, may have joined */
3167 
3168 		if (ilg != NULL &&
3169 		    SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE)
3170 			error = ENOBUFS;
3171 	}
3172 	if (error != 0) {
3173 		mutex_exit(&connp->conn_lock);
3174 		return (error);
3175 	}
3176 
3177 	ASSERT(!(ipif->ipif_state_flags & IPIF_CONDEMNED));
3178 
3179 	/*
3180 	 * Alloc buffer to copy new state into (see below) before
3181 	 * we make any changes, so we can bail if it fails.
3182 	 */
3183 	if ((new_filter = l_alloc()) == NULL) {
3184 		mutex_exit(&connp->conn_lock);
3185 		return (ENOMEM);
3186 	}
3187 
3188 	if (ilg == NULL) {
3189 		ilgstat = ILGSTAT_NEW;
3190 		if ((ilg = conn_ilg_alloc(connp)) == NULL) {
3191 			mutex_exit(&connp->conn_lock);
3192 			l_free(new_filter);
3193 			return (ENOMEM);
3194 		}
3195 		if (src != INADDR_ANY) {
3196 			ilg->ilg_filter = l_alloc();
3197 			if (ilg->ilg_filter == NULL) {
3198 				ilg_delete(connp, ilg, NULL);
3199 				mutex_exit(&connp->conn_lock);
3200 				l_free(new_filter);
3201 				return (ENOMEM);
3202 			}
3203 			ilg->ilg_filter->sl_numsrc = 1;
3204 			IN6_IPADDR_TO_V4MAPPED(src,
3205 			    &ilg->ilg_filter->sl_addr[0]);
3206 		}
3207 		if (group == INADDR_ANY) {
3208 			ilg->ilg_v6group = ipv6_all_zeros;
3209 		} else {
3210 			IN6_IPADDR_TO_V4MAPPED(group, &ilg->ilg_v6group);
3211 		}
3212 		ilg->ilg_ipif = ipif;
3213 		ilg->ilg_ill = NULL;
3214 		ilg->ilg_orig_ifindex = 0;
3215 		ilg->ilg_fmode = fmode;
3216 	} else {
3217 		int index;
3218 		in6_addr_t v6src;
3219 		ilgstat = ILGSTAT_CHANGE;
3220 		if (ilg->ilg_fmode != fmode || src == INADDR_ANY) {
3221 			mutex_exit(&connp->conn_lock);
3222 			l_free(new_filter);
3223 			return (EINVAL);
3224 		}
3225 		if (ilg->ilg_filter == NULL) {
3226 			ilg->ilg_filter = l_alloc();
3227 			if (ilg->ilg_filter == NULL) {
3228 				mutex_exit(&connp->conn_lock);
3229 				l_free(new_filter);
3230 				return (ENOMEM);
3231 			}
3232 		}
3233 		IN6_IPADDR_TO_V4MAPPED(src, &v6src);
3234 		if (list_has_addr(ilg->ilg_filter, &v6src)) {
3235 			mutex_exit(&connp->conn_lock);
3236 			l_free(new_filter);
3237 			return (EADDRNOTAVAIL);
3238 		}
3239 		index = ilg->ilg_filter->sl_numsrc++;
3240 		ilg->ilg_filter->sl_addr[index] = v6src;
3241 	}
3242 
3243 	/*
3244 	 * Save copy of ilg's filter state to pass to other functions,
3245 	 * so we can release conn_lock now.
3246 	 */
3247 	new_fmode = ilg->ilg_fmode;
3248 	l_copy(ilg->ilg_filter, new_filter);
3249 
3250 	mutex_exit(&connp->conn_lock);
3251 
3252 	error = ip_addmulti(group, ipif, ilgstat, new_fmode, new_filter);
3253 	if (error != 0) {
3254 		/*
3255 		 * Need to undo what we did before calling ip_addmulti()!
3256 		 * Must look up the ilg again since we've not been holding
3257 		 * conn_lock.
3258 		 */
3259 		in6_addr_t v6src;
3260 		if (ilgstat == ILGSTAT_NEW)
3261 			v6src = ipv6_all_zeros;
3262 		else
3263 			IN6_IPADDR_TO_V4MAPPED(src, &v6src);
3264 		mutex_enter(&connp->conn_lock);
3265 		ilg = ilg_lookup_ipif(connp, group, ipif);
3266 		ASSERT(ilg != NULL);
3267 		ilg_delete(connp, ilg, &v6src);
3268 		mutex_exit(&connp->conn_lock);
3269 		l_free(new_filter);
3270 		return (error);
3271 	}
3272 
3273 	l_free(new_filter);
3274 	return (0);
3275 }
3276 
3277 static int
3278 ilg_add_v6(conn_t *connp, const in6_addr_t *v6group, ill_t *ill,
3279     mcast_record_t fmode, const in6_addr_t *v6src)
3280 {
3281 	int	error = 0;
3282 	int	orig_ifindex;
3283 	ilg_t	*ilg;
3284 	ilg_stat_t ilgstat;
3285 	slist_t	*new_filter = NULL;
3286 	int	new_fmode;
3287 
3288 	ASSERT(IAM_WRITER_ILL(ill));
3289 
3290 	if (!(ill->ill_flags & ILLF_MULTICAST))
3291 		return (EADDRNOTAVAIL);
3292 
3293 	/*
3294 	 * conn_lock protects the ilg list.  Serializes 2 threads doing
3295 	 * join (sock, group1, hme0) and (sock, group2, hme1) where hme0
3296 	 * and hme1 map to different ipsq's, but both operations happen
3297 	 * on the same conn.
3298 	 */
3299 	mutex_enter(&connp->conn_lock);
3300 
3301 	/*
3302 	 * Use the ifindex to do the lookup. We can't use the ill
3303 	 * directly because ilg_ill could point to a different ill if
3304 	 * things have moved.
3305 	 */
3306 	orig_ifindex = ill->ill_phyint->phyint_ifindex;
3307 	ilg = ilg_lookup_ill_index_v6(connp, v6group, orig_ifindex);
3308 
3309 	/*
3310 	 * Depending on the option we're handling, may or may not be okay
3311 	 * if group has already been added.  Figure out our rules based
3312 	 * on fmode and src params.  Also make sure there's enough room
3313 	 * in the filter if we're adding a source to an existing filter.
3314 	 */
3315 	if (IN6_IS_ADDR_UNSPECIFIED(v6src)) {
3316 		/* we're joining for all sources, must not have joined */
3317 		if (ilg != NULL)
3318 			error = EADDRINUSE;
3319 	} else {
3320 		if (fmode == MODE_IS_EXCLUDE) {
3321 			/* (excl {addr}) => block source, must have joined */
3322 			if (ilg == NULL)
3323 				error = EADDRNOTAVAIL;
3324 		}
3325 		/* (incl {addr}) => join source, may have joined */
3326 
3327 		if (ilg != NULL &&
3328 		    SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE)
3329 			error = ENOBUFS;
3330 	}
3331 	if (error != 0) {
3332 		mutex_exit(&connp->conn_lock);
3333 		return (error);
3334 	}
3335 
3336 	/*
3337 	 * Alloc buffer to copy new state into (see below) before
3338 	 * we make any changes, so we can bail if it fails.
3339 	 */
3340 	if ((new_filter = l_alloc()) == NULL) {
3341 		mutex_exit(&connp->conn_lock);
3342 		return (ENOMEM);
3343 	}
3344 
3345 	if (ilg == NULL) {
3346 		if ((ilg = conn_ilg_alloc(connp)) == NULL) {
3347 			mutex_exit(&connp->conn_lock);
3348 			l_free(new_filter);
3349 			return (ENOMEM);
3350 		}
3351 		if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) {
3352 			ilg->ilg_filter = l_alloc();
3353 			if (ilg->ilg_filter == NULL) {
3354 				ilg_delete(connp, ilg, NULL);
3355 				mutex_exit(&connp->conn_lock);
3356 				l_free(new_filter);
3357 				return (ENOMEM);
3358 			}
3359 			ilg->ilg_filter->sl_numsrc = 1;
3360 			ilg->ilg_filter->sl_addr[0] = *v6src;
3361 		}
3362 		ilgstat = ILGSTAT_NEW;
3363 		ilg->ilg_v6group = *v6group;
3364 		ilg->ilg_fmode = fmode;
3365 		ilg->ilg_ipif = NULL;
3366 		/*
3367 		 * Choose our target ill to join on. This might be different
3368 		 * from the ill we've been given if it's currently down and
3369 		 * part of a group.
3370 		 *
3371 		 * new ill is not refheld; we are writer.
3372 		 */
3373 		ill = ip_choose_multi_ill(ill, v6group);
3374 		ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED));
3375 		ilg->ilg_ill = ill;
3376 		/*
3377 		 * Remember the orig_ifindex that we joined on, so that we
3378 		 * can successfully delete them later on and also search
3379 		 * for duplicates if the application wants to join again.
3380 		 */
3381 		ilg->ilg_orig_ifindex = orig_ifindex;
3382 	} else {
3383 		int index;
3384 		if (ilg->ilg_fmode != fmode || IN6_IS_ADDR_UNSPECIFIED(v6src)) {
3385 			mutex_exit(&connp->conn_lock);
3386 			l_free(new_filter);
3387 			return (EINVAL);
3388 		}
3389 		if (ilg->ilg_filter == NULL) {
3390 			ilg->ilg_filter = l_alloc();
3391 			if (ilg->ilg_filter == NULL) {
3392 				mutex_exit(&connp->conn_lock);
3393 				l_free(new_filter);
3394 				return (ENOMEM);
3395 			}
3396 		}
3397 		if (list_has_addr(ilg->ilg_filter, v6src)) {
3398 			mutex_exit(&connp->conn_lock);
3399 			l_free(new_filter);
3400 			return (EADDRNOTAVAIL);
3401 		}
3402 		ilgstat = ILGSTAT_CHANGE;
3403 		index = ilg->ilg_filter->sl_numsrc++;
3404 		ilg->ilg_filter->sl_addr[index] = *v6src;
3405 		/*
3406 		 * The current ill might be different from the one we were
3407 		 * asked to join on (if failover has occurred); we should
3408 		 * join on the ill stored in the ilg.  The original ill
3409 		 * is noted in ilg_orig_ifindex, which matched our request.
3410 		 */
3411 		ill = ilg->ilg_ill;
3412 	}
3413 
3414 	/*
3415 	 * Save copy of ilg's filter state to pass to other functions,
3416 	 * so we can release conn_lock now.
3417 	 */
3418 	new_fmode = ilg->ilg_fmode;
3419 	l_copy(ilg->ilg_filter, new_filter);
3420 
3421 	mutex_exit(&connp->conn_lock);
3422 
3423 	/*
3424 	 * Now update the ill. We wait to do this until after the ilg
3425 	 * has been updated because we need to update the src filter
3426 	 * info for the ill, which involves looking at the status of
3427 	 * all the ilgs associated with this group/interface pair.
3428 	 */
3429 	error = ip_addmulti_v6(v6group, ill, orig_ifindex, connp->conn_zoneid,
3430 	    ilgstat, new_fmode, new_filter);
3431 	if (error != 0) {
3432 		/*
3433 		 * But because we waited, we have to undo the ilg update
3434 		 * if ip_addmulti_v6() fails.  We also must lookup ilg
3435 		 * again, since we've not been holding conn_lock.
3436 		 */
3437 		in6_addr_t delsrc =
3438 		    (ilgstat == ILGSTAT_NEW) ? ipv6_all_zeros : *v6src;
3439 		mutex_enter(&connp->conn_lock);
3440 		ilg = ilg_lookup_ill_index_v6(connp, v6group, orig_ifindex);
3441 		ASSERT(ilg != NULL);
3442 		ilg_delete(connp, ilg, &delsrc);
3443 		mutex_exit(&connp->conn_lock);
3444 		l_free(new_filter);
3445 		return (error);
3446 	}
3447 
3448 	l_free(new_filter);
3449 
3450 	return (0);
3451 }
3452 
3453 /*
3454  * Find an IPv4 ilg matching group, ill and source
3455  */
3456 ilg_t *
3457 ilg_lookup_ill_withsrc(conn_t *connp, ipaddr_t group, ipaddr_t src, ill_t *ill)
3458 {
3459 	in6_addr_t v6group, v6src;
3460 	int i;
3461 	boolean_t isinlist;
3462 	ilg_t *ilg;
3463 	ipif_t *ipif;
3464 	ill_t *ilg_ill;
3465 
3466 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3467 
3468 	/*
3469 	 * INADDR_ANY is represented as the IPv6 unspecified addr.
3470 	 */
3471 	if (group == INADDR_ANY)
3472 		v6group = ipv6_all_zeros;
3473 	else
3474 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
3475 
3476 	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3477 		/* ilg_ipif is NULL for v6; skip them */
3478 		ilg = &connp->conn_ilg[i];
3479 		if ((ipif = ilg->ilg_ipif) == NULL)
3480 			continue;
3481 		ASSERT(ilg->ilg_ill == NULL);
3482 		ilg_ill = ipif->ipif_ill;
3483 		ASSERT(!ilg_ill->ill_isv6);
3484 		if (ilg_ill == ill &&
3485 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, &v6group)) {
3486 			if (SLIST_IS_EMPTY(ilg->ilg_filter)) {
3487 				/* no source filter, so this is a match */
3488 				return (ilg);
3489 			}
3490 			break;
3491 		}
3492 	}
3493 	if (i == connp->conn_ilg_inuse)
3494 		return (NULL);
3495 
3496 	/*
3497 	 * we have an ilg with matching ill and group; but
3498 	 * the ilg has a source list that we must check.
3499 	 */
3500 	IN6_IPADDR_TO_V4MAPPED(src, &v6src);
3501 	isinlist = B_FALSE;
3502 	for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) {
3503 		if (IN6_ARE_ADDR_EQUAL(&v6src, &ilg->ilg_filter->sl_addr[i])) {
3504 			isinlist = B_TRUE;
3505 			break;
3506 		}
3507 	}
3508 
3509 	if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) ||
3510 	    (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE))
3511 		return (ilg);
3512 
3513 	return (NULL);
3514 }
3515 
3516 /*
3517  * Find an IPv6 ilg matching group, ill, and source
3518  */
3519 ilg_t *
3520 ilg_lookup_ill_withsrc_v6(conn_t *connp, const in6_addr_t *v6group,
3521     const in6_addr_t *v6src, ill_t *ill)
3522 {
3523 	int i;
3524 	boolean_t isinlist;
3525 	ilg_t *ilg;
3526 	ill_t *ilg_ill;
3527 
3528 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3529 
3530 	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3531 		ilg = &connp->conn_ilg[i];
3532 		if ((ilg_ill = ilg->ilg_ill) == NULL)
3533 			continue;
3534 		ASSERT(ilg->ilg_ipif == NULL);
3535 		ASSERT(ilg_ill->ill_isv6);
3536 		if (ilg_ill == ill &&
3537 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) {
3538 			if (SLIST_IS_EMPTY(ilg->ilg_filter)) {
3539 				/* no source filter, so this is a match */
3540 				return (ilg);
3541 			}
3542 			break;
3543 		}
3544 	}
3545 	if (i == connp->conn_ilg_inuse)
3546 		return (NULL);
3547 
3548 	/*
3549 	 * we have an ilg with matching ill and group; but
3550 	 * the ilg has a source list that we must check.
3551 	 */
3552 	isinlist = B_FALSE;
3553 	for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) {
3554 		if (IN6_ARE_ADDR_EQUAL(v6src, &ilg->ilg_filter->sl_addr[i])) {
3555 			isinlist = B_TRUE;
3556 			break;
3557 		}
3558 	}
3559 
3560 	if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) ||
3561 	    (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE))
3562 		return (ilg);
3563 
3564 	return (NULL);
3565 }
3566 
3567 /*
3568  * Get the ilg whose ilg_orig_ifindex is associated with ifindex.
3569  * This is useful when the interface fails and we have moved
3570  * to a new ill, but still would like to locate using the index
3571  * that we originally used to join. Used only for IPv6 currently.
3572  */
3573 static ilg_t *
3574 ilg_lookup_ill_index_v6(conn_t *connp, const in6_addr_t *v6group, int ifindex)
3575 {
3576 	ilg_t	*ilg;
3577 	int	i;
3578 
3579 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3580 	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3581 		ilg = &connp->conn_ilg[i];
3582 		/* ilg_ill is NULL for V4. Skip them */
3583 		if (ilg->ilg_ill == NULL)
3584 			continue;
3585 		/* ilg_ipif is NULL for V6 */
3586 		ASSERT(ilg->ilg_ipif == NULL);
3587 		ASSERT(ilg->ilg_orig_ifindex != 0);
3588 		if (IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group) &&
3589 		    ilg->ilg_orig_ifindex == ifindex) {
3590 			return (ilg);
3591 		}
3592 	}
3593 	return (NULL);
3594 }
3595 
3596 /*
3597  * Find an IPv6 ilg matching group and ill
3598  */
3599 ilg_t *
3600 ilg_lookup_ill_v6(conn_t *connp, const in6_addr_t *v6group, ill_t *ill)
3601 {
3602 	ilg_t	*ilg;
3603 	int	i;
3604 	ill_t 	*mem_ill;
3605 
3606 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3607 
3608 	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3609 		ilg = &connp->conn_ilg[i];
3610 		if ((mem_ill = ilg->ilg_ill) == NULL)
3611 			continue;
3612 		ASSERT(ilg->ilg_ipif == NULL);
3613 		ASSERT(mem_ill->ill_isv6);
3614 		if (mem_ill == ill &&
3615 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group))
3616 			return (ilg);
3617 	}
3618 	return (NULL);
3619 }
3620 
3621 /*
3622  * Find an IPv4 ilg matching group and ipif
3623  */
3624 static ilg_t *
3625 ilg_lookup_ipif(conn_t *connp, ipaddr_t group, ipif_t *ipif)
3626 {
3627 	in6_addr_t v6group;
3628 	int	i;
3629 
3630 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3631 	ASSERT(!ipif->ipif_ill->ill_isv6);
3632 
3633 	if (group == INADDR_ANY)
3634 		v6group = ipv6_all_zeros;
3635 	else
3636 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
3637 
3638 	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3639 		if (IN6_ARE_ADDR_EQUAL(&connp->conn_ilg[i].ilg_v6group,
3640 		    &v6group) &&
3641 		    connp->conn_ilg[i].ilg_ipif == ipif)
3642 			return (&connp->conn_ilg[i]);
3643 	}
3644 	return (NULL);
3645 }
3646 
3647 /*
3648  * If a source address is passed in (src != NULL and src is not
3649  * unspecified), remove the specified src addr from the given ilg's
3650  * filter list, else delete the ilg.
3651  */
3652 static void
3653 ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src)
3654 {
3655 	int	i;
3656 
3657 	ASSERT((ilg->ilg_ipif != NULL) ^ (ilg->ilg_ill != NULL));
3658 	ASSERT(ilg->ilg_ipif == NULL || IAM_WRITER_IPIF(ilg->ilg_ipif));
3659 	ASSERT(ilg->ilg_ill == NULL || IAM_WRITER_ILL(ilg->ilg_ill));
3660 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3661 	ASSERT(!(ilg->ilg_flags & ILG_DELETED));
3662 
3663 	if (src == NULL || IN6_IS_ADDR_UNSPECIFIED(src)) {
3664 		if (connp->conn_ilg_walker_cnt != 0) {
3665 			ilg->ilg_flags |= ILG_DELETED;
3666 			return;
3667 		}
3668 
3669 		FREE_SLIST(ilg->ilg_filter);
3670 
3671 		i = ilg - &connp->conn_ilg[0];
3672 		ASSERT(i >= 0 && i < connp->conn_ilg_inuse);
3673 
3674 		/* Move other entries up one step */
3675 		connp->conn_ilg_inuse--;
3676 		for (; i < connp->conn_ilg_inuse; i++)
3677 			connp->conn_ilg[i] = connp->conn_ilg[i+1];
3678 
3679 		if (connp->conn_ilg_inuse == 0) {
3680 			mi_free((char *)connp->conn_ilg);
3681 			connp->conn_ilg = NULL;
3682 			cv_broadcast(&connp->conn_refcv);
3683 		}
3684 	} else {
3685 		l_remove(ilg->ilg_filter, src);
3686 	}
3687 }
3688 
3689 /*
3690  * Called from conn close. No new ilg can be added or removed.
3691  * because CONN_CLOSING has been set by ip_close. ilg_add / ilg_delete
3692  * will return error if conn has started closing.
3693  */
3694 void
3695 ilg_delete_all(conn_t *connp)
3696 {
3697 	int	i;
3698 	ipif_t	*ipif = NULL;
3699 	ill_t	*ill = NULL;
3700 	ilg_t	*ilg;
3701 	in6_addr_t v6group;
3702 	boolean_t success;
3703 	ipsq_t	*ipsq;
3704 	int	orig_ifindex;
3705 
3706 	mutex_enter(&connp->conn_lock);
3707 retry:
3708 	ILG_WALKER_HOLD(connp);
3709 	for (i = connp->conn_ilg_inuse - 1; i >= 0; ) {
3710 		ilg = &connp->conn_ilg[i];
3711 		/*
3712 		 * Since this walk is not atomic (we drop the
3713 		 * conn_lock and wait in ipsq_enter) we need
3714 		 * to check for the ILG_DELETED flag.
3715 		 */
3716 		if (ilg->ilg_flags & ILG_DELETED) {
3717 			/* Go to the next ilg */
3718 			i--;
3719 			continue;
3720 		}
3721 		v6group = ilg->ilg_v6group;
3722 
3723 		if (IN6_IS_ADDR_V4MAPPED(&v6group)) {
3724 			ipif = ilg->ilg_ipif;
3725 			ill = ipif->ipif_ill;
3726 		} else {
3727 			ipif = NULL;
3728 			ill = ilg->ilg_ill;
3729 		}
3730 		/*
3731 		 * We may not be able to refhold the ill if the ill/ipif
3732 		 * is changing. But we need to make sure that the ill will
3733 		 * not vanish. So we just bump up the ill_waiter count.
3734 		 * If we are unable to do even that, then the ill is closing,
3735 		 * in which case the unplumb thread will handle the cleanup,
3736 		 * and we move on to the next ilg.
3737 		 */
3738 		if (!ill_waiter_inc(ill)) {
3739 			/* Go to the next ilg */
3740 			i--;
3741 			continue;
3742 		}
3743 		mutex_exit(&connp->conn_lock);
3744 		/*
3745 		 * To prevent deadlock between ill close which waits inside
3746 		 * the perimeter, and conn close, ipsq_enter returns error,
3747 		 * the moment ILL_CONDEMNED is set, in which case ill close
3748 		 * takes responsibility to cleanup the ilgs. Note that we
3749 		 * have not yet set condemned flag, otherwise the conn can't
3750 		 * be refheld for cleanup by those routines and it would be
3751 		 * a mutual deadlock.
3752 		 */
3753 		success = ipsq_enter(ill, B_FALSE);
3754 		ipsq = ill->ill_phyint->phyint_ipsq;
3755 		ill_waiter_dcr(ill);
3756 		mutex_enter(&connp->conn_lock);
3757 		if (!success) {
3758 			/* Go to the next ilg */
3759 			i--;
3760 			continue;
3761 		}
3762 
3763 		/*
3764 		 * Make sure that nothing has changed under. For eg.
3765 		 * a failover/failback can change ilg_ill while we were
3766 		 * waiting to become exclusive above
3767 		 */
3768 		if (IN6_IS_ADDR_V4MAPPED(&v6group)) {
3769 			ipif = ilg->ilg_ipif;
3770 			ill = ipif->ipif_ill;
3771 		} else {
3772 			ipif = NULL;
3773 			ill = ilg->ilg_ill;
3774 		}
3775 		if (!IAM_WRITER_ILL(ill) || (ilg->ilg_flags & ILG_DELETED)) {
3776 			/*
3777 			 * The ilg has changed under us probably due
3778 			 * to a failover or unplumb. Retry on the same ilg.
3779 			 */
3780 			mutex_exit(&connp->conn_lock);
3781 			ipsq_exit(ipsq, B_TRUE, B_TRUE);
3782 			mutex_enter(&connp->conn_lock);
3783 			continue;
3784 		}
3785 		v6group = ilg->ilg_v6group;
3786 		orig_ifindex = ilg->ilg_orig_ifindex;
3787 		ilg_delete(connp, ilg, NULL);
3788 		mutex_exit(&connp->conn_lock);
3789 
3790 		if (ipif != NULL)
3791 			(void) ip_delmulti(V4_PART_OF_V6(v6group), ipif,
3792 			    B_FALSE, B_TRUE);
3793 
3794 		else
3795 			(void) ip_delmulti_v6(&v6group, ill, orig_ifindex,
3796 			    connp->conn_zoneid, B_FALSE, B_TRUE);
3797 
3798 		ipsq_exit(ipsq, B_TRUE, B_TRUE);
3799 		mutex_enter(&connp->conn_lock);
3800 		/* Go to the next ilg */
3801 		i--;
3802 	}
3803 	ILG_WALKER_RELE(connp);
3804 
3805 	/* If any ill was skipped above wait and retry */
3806 	if (connp->conn_ilg_inuse != 0) {
3807 		cv_wait(&connp->conn_refcv, &connp->conn_lock);
3808 		goto retry;
3809 	}
3810 	mutex_exit(&connp->conn_lock);
3811 }
3812 
3813 /*
3814  * Called from ill close by ipcl_walk for clearing conn_ilg and
3815  * conn_multicast_ipif for a given ipif. conn is held by caller.
3816  * Note that ipcl_walk only walks conns that are not yet condemned.
3817  * condemned conns can't be refheld. For this reason, conn must become clean
3818  * first, i.e. it must not refer to any ill/ire/ipif and then only set
3819  * condemned flag.
3820  */
3821 static void
3822 conn_delete_ipif(conn_t *connp, caddr_t arg)
3823 {
3824 	ipif_t	*ipif = (ipif_t *)arg;
3825 	int	i;
3826 	char	group_buf1[INET6_ADDRSTRLEN];
3827 	char	group_buf2[INET6_ADDRSTRLEN];
3828 	ipaddr_t group;
3829 	ilg_t	*ilg;
3830 
3831 	/*
3832 	 * Even though conn_ilg_inuse can change while we are in this loop,
3833 	 * i.e.ilgs can be created or deleted on this connp, no new ilgs can
3834 	 * be created or deleted for this connp, on this ill, since this ill
3835 	 * is the perimeter. So we won't miss any ilg in this cleanup.
3836 	 */
3837 	mutex_enter(&connp->conn_lock);
3838 
3839 	/*
3840 	 * Increment the walker count, so that ilg repacking does not
3841 	 * occur while we are in the loop.
3842 	 */
3843 	ILG_WALKER_HOLD(connp);
3844 	for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) {
3845 		ilg = &connp->conn_ilg[i];
3846 		if (ilg->ilg_ipif != ipif || (ilg->ilg_flags & ILG_DELETED))
3847 			continue;
3848 		/*
3849 		 * ip_close cannot be cleaning this ilg at the same time.
3850 		 * since it also has to execute in this ill's perimeter which
3851 		 * we are now holding. Only a clean conn can be condemned.
3852 		 */
3853 		ASSERT(!(connp->conn_state_flags & CONN_CONDEMNED));
3854 
3855 		/* Blow away the membership */
3856 		ip1dbg(("conn_delete_ilg_ipif: %s on %s (%s)\n",
3857 		    inet_ntop(AF_INET6, &connp->conn_ilg[i].ilg_v6group,
3858 		    group_buf1, sizeof (group_buf1)),
3859 		    inet_ntop(AF_INET6, &ipif->ipif_v6lcl_addr,
3860 		    group_buf2, sizeof (group_buf2)),
3861 		    ipif->ipif_ill->ill_name));
3862 
3863 		/* ilg_ipif is NULL for V6, so we won't be here */
3864 		ASSERT(IN6_IS_ADDR_V4MAPPED(&ilg->ilg_v6group));
3865 
3866 		group = V4_PART_OF_V6(ilg->ilg_v6group);
3867 		ilg_delete(connp, &connp->conn_ilg[i], NULL);
3868 		mutex_exit(&connp->conn_lock);
3869 
3870 		(void) ip_delmulti(group, ipif, B_FALSE, B_TRUE);
3871 		mutex_enter(&connp->conn_lock);
3872 	}
3873 
3874 	/*
3875 	 * If we are the last walker, need to physically delete the
3876 	 * ilgs and repack.
3877 	 */
3878 	ILG_WALKER_RELE(connp);
3879 
3880 	if (connp->conn_multicast_ipif == ipif) {
3881 		/* Revert to late binding */
3882 		connp->conn_multicast_ipif = NULL;
3883 	}
3884 	mutex_exit(&connp->conn_lock);
3885 
3886 	conn_delete_ire(connp, (caddr_t)ipif);
3887 }
3888 
3889 /*
3890  * Called from ill close by ipcl_walk for clearing conn_ilg and
3891  * conn_multicast_ill for a given ill. conn is held by caller.
3892  * Note that ipcl_walk only walks conns that are not yet condemned.
3893  * condemned conns can't be refheld. For this reason, conn must become clean
3894  * first, i.e. it must not refer to any ill/ire/ipif and then only set
3895  * condemned flag.
3896  */
3897 static void
3898 conn_delete_ill(conn_t *connp, caddr_t arg)
3899 {
3900 	ill_t	*ill = (ill_t *)arg;
3901 	int	i;
3902 	char	group_buf[INET6_ADDRSTRLEN];
3903 	in6_addr_t v6group;
3904 	int	orig_ifindex;
3905 	ilg_t	*ilg;
3906 
3907 	/*
3908 	 * Even though conn_ilg_inuse can change while we are in this loop,
3909 	 * no new ilgs can be created/deleted for this connp, on this
3910 	 * ill, since this ill is the perimeter. So we won't miss any ilg
3911 	 * in this cleanup.
3912 	 */
3913 	mutex_enter(&connp->conn_lock);
3914 
3915 	/*
3916 	 * Increment the walker count, so that ilg repacking does not
3917 	 * occur while we are in the loop.
3918 	 */
3919 	ILG_WALKER_HOLD(connp);
3920 	for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) {
3921 		ilg = &connp->conn_ilg[i];
3922 		if ((ilg->ilg_ill == ill) && !(ilg->ilg_flags & ILG_DELETED)) {
3923 			/*
3924 			 * ip_close cannot be cleaning this ilg at the same
3925 			 * time, since it also has to execute in this ill's
3926 			 * perimeter which we are now holding. Only a clean
3927 			 * conn can be condemned.
3928 			 */
3929 			ASSERT(!(connp->conn_state_flags & CONN_CONDEMNED));
3930 
3931 			/* Blow away the membership */
3932 			ip1dbg(("conn_delete_ilg_ill: %s on %s\n",
3933 			    inet_ntop(AF_INET6, &ilg->ilg_v6group,
3934 			    group_buf, sizeof (group_buf)),
3935 			    ill->ill_name));
3936 
3937 			v6group = ilg->ilg_v6group;
3938 			orig_ifindex = ilg->ilg_orig_ifindex;
3939 			ilg_delete(connp, ilg, NULL);
3940 			mutex_exit(&connp->conn_lock);
3941 
3942 			(void) ip_delmulti_v6(&v6group, ill, orig_ifindex,
3943 			    connp->conn_zoneid, B_FALSE, B_TRUE);
3944 			mutex_enter(&connp->conn_lock);
3945 		}
3946 	}
3947 	/*
3948 	 * If we are the last walker, need to physically delete the
3949 	 * ilgs and repack.
3950 	 */
3951 	ILG_WALKER_RELE(connp);
3952 
3953 	if (connp->conn_multicast_ill == ill) {
3954 		/* Revert to late binding */
3955 		connp->conn_multicast_ill = NULL;
3956 		connp->conn_orig_multicast_ifindex = 0;
3957 	}
3958 	mutex_exit(&connp->conn_lock);
3959 }
3960 
3961 /*
3962  * Called when an ipif is unplumbed to make sure that there are no
3963  * dangling conn references to that ipif.
3964  * Handles ilg_ipif and conn_multicast_ipif
3965  */
3966 void
3967 reset_conn_ipif(ipif)
3968 	ipif_t	*ipif;
3969 {
3970 	ip_stack_t	*ipst = ipif->ipif_ill->ill_ipst;
3971 
3972 	ipcl_walk(conn_delete_ipif, (caddr_t)ipif, ipst);
3973 }
3974 
3975 /*
3976  * Called when an ill is unplumbed to make sure that there are no
3977  * dangling conn references to that ill.
3978  * Handles ilg_ill, conn_multicast_ill.
3979  */
3980 void
3981 reset_conn_ill(ill_t *ill)
3982 {
3983 	ip_stack_t	*ipst = ill->ill_ipst;
3984 
3985 	ipcl_walk(conn_delete_ill, (caddr_t)ill, ipst);
3986 }
3987 
3988 #ifdef DEBUG
3989 /*
3990  * Walk functions walk all the interfaces in the system to make
3991  * sure that there is no refernece to the ipif or ill that is
3992  * going away.
3993  */
3994 int
3995 ilm_walk_ill(ill_t *ill)
3996 {
3997 	int cnt = 0;
3998 	ill_t *till;
3999 	ilm_t *ilm;
4000 	ill_walk_context_t ctx;
4001 	ip_stack_t	*ipst = ill->ill_ipst;
4002 
4003 	rw_enter(&ipst->ips_ill_g_lock, RW_READER);
4004 	till = ILL_START_WALK_ALL(&ctx, ipst);
4005 	for (; till != NULL; till = ill_next(&ctx, till)) {
4006 		for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
4007 			if (ilm->ilm_ill == ill) {
4008 				cnt++;
4009 			}
4010 		}
4011 	}
4012 	rw_exit(&ipst->ips_ill_g_lock);
4013 
4014 	return (cnt);
4015 }
4016 
4017 /*
4018  * This function is called before the ipif is freed.
4019  */
4020 int
4021 ilm_walk_ipif(ipif_t *ipif)
4022 {
4023 	int cnt = 0;
4024 	ill_t *till;
4025 	ilm_t *ilm;
4026 	ill_walk_context_t ctx;
4027 	ip_stack_t	*ipst = ipif->ipif_ill->ill_ipst;
4028 
4029 	till = ILL_START_WALK_ALL(&ctx, ipst);
4030 	for (; till != NULL; till = ill_next(&ctx, till)) {
4031 		for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
4032 			if (ilm->ilm_ipif == ipif) {
4033 					cnt++;
4034 			}
4035 		}
4036 	}
4037 	return (cnt);
4038 }
4039 #endif
4040