xref: /titanic_50/usr/src/uts/common/inet/ip/ip_multi.c (revision 8461248208fabd3a8230615f8615e5bf1b4dcdcb)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 /* Copyright (c) 1990 Mentat Inc. */
27 
28 #pragma ident	"%Z%%M%	%I%	%E% SMI"
29 
30 #include <sys/types.h>
31 #include <sys/stream.h>
32 #include <sys/dlpi.h>
33 #include <sys/stropts.h>
34 #include <sys/strsun.h>
35 #include <sys/strlog.h>
36 #include <sys/ddi.h>
37 #include <sys/cmn_err.h>
38 #include <sys/zone.h>
39 
40 #include <sys/param.h>
41 #include <sys/socket.h>
42 #define	_SUN_TPI_VERSION	2
43 #include <sys/tihdr.h>
44 #include <net/if.h>
45 #include <net/if_arp.h>
46 #include <sys/sockio.h>
47 #include <sys/systm.h>
48 #include <net/route.h>
49 #include <netinet/in.h>
50 #include <net/if_dl.h>
51 #include <netinet/ip6.h>
52 #include <netinet/icmp6.h>
53 
54 #include <inet/common.h>
55 #include <inet/mi.h>
56 #include <inet/nd.h>
57 #include <inet/arp.h>
58 #include <inet/ip.h>
59 #include <inet/ip6.h>
60 #include <inet/ip_if.h>
61 #include <inet/ip_ire.h>
62 #include <inet/ip_ndp.h>
63 #include <inet/ip_multi.h>
64 #include <inet/ipclassifier.h>
65 #include <inet/ipsec_impl.h>
66 #include <inet/sctp_ip.h>
67 #include <inet/ip_listutils.h>
68 
69 #include <netinet/igmp.h>
70 
71 /* igmpv3/mldv2 source filter manipulation */
72 static void	ilm_bld_flists(conn_t *conn, void *arg);
73 static void	ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode,
74     slist_t *flist);
75 
76 static ilm_t	*ilm_add_v6(ipif_t *ipif, const in6_addr_t *group,
77     ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist,
78     int orig_ifindex, zoneid_t zoneid);
79 static void	ilm_delete(ilm_t *ilm);
80 static int	ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *group);
81 static int	ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *group);
82 static ilg_t	*ilg_lookup_ill_index_v6(conn_t *connp,
83     const in6_addr_t *v6group, int index);
84 static ilg_t	*ilg_lookup_ipif(conn_t *connp, ipaddr_t group,
85     ipif_t *ipif);
86 static int	ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif,
87     mcast_record_t fmode, ipaddr_t src);
88 static int	ilg_add_v6(conn_t *connp, const in6_addr_t *group, ill_t *ill,
89     mcast_record_t fmode, const in6_addr_t *v6src);
90 static void	ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src);
91 static mblk_t	*ill_create_dl(ill_t *ill, uint32_t dl_primitive,
92     uint32_t length, uint32_t *addr_lenp, uint32_t *addr_offp);
93 static mblk_t	*ill_create_squery(ill_t *ill, ipaddr_t ipaddr,
94     uint32_t addrlen, uint32_t addroff, mblk_t *mp_tail);
95 static void	conn_ilg_reap(conn_t *connp);
96 static int	ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group,
97     ipif_t *ipif, mcast_record_t fmode, ipaddr_t src);
98 static int	ip_opt_delete_group_excl_v6(conn_t *connp,
99     const in6_addr_t *v6group, ill_t *ill, mcast_record_t fmode,
100     const in6_addr_t *v6src);
101 
102 /*
103  * MT notes:
104  *
105  * Multicast joins operate on both the ilg and ilm structures. Multiple
106  * threads operating on an conn (socket) trying to do multicast joins
107  * need to synchronize  when operating on the ilg. Multiple threads
108  * potentially operating on different conn (socket endpoints) trying to
109  * do multicast joins could eventually end up trying to manipulate the
110  * ilm simulatenously and need to synchronize on the access to the ilm.
111  * Both are amenable to standard Solaris MT techniques, but it would be
112  * complex to handle a failover or failback which needs to manipulate
113  * ilg/ilms if an applications can also simultaenously join/leave
114  * multicast groups. Hence multicast join/leave also go through the ipsq_t
115  * serialization.
116  *
117  * Multicast joins and leaves are single-threaded per phyint/IPMP group
118  * using the ipsq serialization mechanism.
119  *
120  * An ilm is an IP data structure used to track multicast join/leave.
121  * An ilm is associated with a <multicast group, ipif> tuple in IPv4 and
122  * with just <multicast group> in IPv6. ilm_refcnt is the number of ilg's
123  * referencing the ilm. ilms are created / destroyed only as writer. ilms
124  * are not passed around, instead they are looked up and used under the
125  * ill_lock or as writer. So we don't need a dynamic refcount of the number
126  * of threads holding reference to an ilm.
127  *
128  * Multicast Join operation:
129  *
130  * The first step is to determine the ipif (v4) or ill (v6) on which
131  * the join operation is to be done. The join is done after becoming
132  * exclusive on the ipsq associated with the ipif or ill. The conn->conn_ilg
133  * and ill->ill_ilm are thus accessed and modified exclusively per ill.
134  * Multiple threads can attempt to join simultaneously on different ipif/ill
135  * on the same conn. In this case the ipsq serialization does not help in
136  * protecting the ilg. It is the conn_lock that is used to protect the ilg.
137  * The conn_lock also protects all the ilg_t members.
138  *
139  * Leave operation.
140  *
141  * Similar to the join operation, the first step is to determine the ipif
142  * or ill (v6) on which the leave operation is to be done. The leave operation
143  * is done after becoming exclusive on the ipsq associated with the ipif or ill.
144  * As with join ilg modification is done under the protection of the conn lock.
145  */
146 
147 #define	IPSQ_ENTER_IPIF(ipif, connp, first_mp, func, ipsq, type)	\
148 	ASSERT(connp != NULL);					\
149 	(ipsq) = ipsq_try_enter((ipif), NULL, CONNP_TO_WQ(connp),	\
150 	    (first_mp), (func), (type), B_TRUE);		\
151 	if ((ipsq) == NULL) {					\
152 		ipif_refrele(ipif);				\
153 		return (EINPROGRESS);				\
154 	}
155 
156 #define	IPSQ_ENTER_ILL(ill, connp, first_mp, func, ipsq, type)	\
157 	ASSERT(connp != NULL);					\
158 	(ipsq) = ipsq_try_enter(NULL, ill, CONNP_TO_WQ(connp),	\
159 	    (first_mp),	(func), (type), B_TRUE);		\
160 	if ((ipsq) == NULL) {					\
161 		ill_refrele(ill);				\
162 		return (EINPROGRESS);				\
163 	}
164 
165 #define	IPSQ_EXIT(ipsq)	\
166 	if (ipsq != NULL)	\
167 		ipsq_exit(ipsq, B_TRUE, B_TRUE);
168 
169 #define	ILG_WALKER_HOLD(connp)	(connp)->conn_ilg_walker_cnt++
170 
171 #define	ILG_WALKER_RELE(connp)				\
172 	{						\
173 		(connp)->conn_ilg_walker_cnt--;		\
174 		if ((connp)->conn_ilg_walker_cnt == 0)	\
175 			conn_ilg_reap(connp);		\
176 	}
177 
178 static void
179 conn_ilg_reap(conn_t *connp)
180 {
181 	int	to;
182 	int	from;
183 
184 	ASSERT(MUTEX_HELD(&connp->conn_lock));
185 
186 	to = 0;
187 	from = 0;
188 	while (from < connp->conn_ilg_inuse) {
189 		if (connp->conn_ilg[from].ilg_flags & ILG_DELETED) {
190 			FREE_SLIST(connp->conn_ilg[from].ilg_filter);
191 			from++;
192 			continue;
193 		}
194 		if (to != from)
195 			connp->conn_ilg[to] = connp->conn_ilg[from];
196 		to++;
197 		from++;
198 	}
199 
200 	connp->conn_ilg_inuse = to;
201 
202 	if (connp->conn_ilg_inuse == 0) {
203 		mi_free((char *)connp->conn_ilg);
204 		connp->conn_ilg = NULL;
205 		cv_broadcast(&connp->conn_refcv);
206 	}
207 }
208 
209 #define	GETSTRUCT(structure, number)	\
210 	((structure *)mi_zalloc(sizeof (structure) * (number)))
211 
212 #define	ILG_ALLOC_CHUNK	16
213 
214 /*
215  * Returns a pointer to the next available ilg in conn_ilg.  Allocs more
216  * buffers in size of ILG_ALLOC_CHUNK ilgs when needed, and updates conn's
217  * ilg tracking fields appropriately (conn_ilg_inuse reflects usage of the
218  * returned ilg).  Returns NULL on failure (ENOMEM).
219  *
220  * Assumes connp->conn_lock is held.
221  */
222 static ilg_t *
223 conn_ilg_alloc(conn_t *connp)
224 {
225 	ilg_t *new;
226 	int curcnt;
227 
228 	ASSERT(MUTEX_HELD(&connp->conn_lock));
229 	ASSERT(connp->conn_ilg_inuse <= connp->conn_ilg_allocated);
230 
231 	if (connp->conn_ilg == NULL) {
232 		connp->conn_ilg = GETSTRUCT(ilg_t, ILG_ALLOC_CHUNK);
233 		if (connp->conn_ilg == NULL)
234 			return (NULL);
235 		connp->conn_ilg_allocated = ILG_ALLOC_CHUNK;
236 		connp->conn_ilg_inuse = 0;
237 	}
238 	if (connp->conn_ilg_inuse == connp->conn_ilg_allocated) {
239 		curcnt = connp->conn_ilg_allocated;
240 		new = GETSTRUCT(ilg_t, curcnt + ILG_ALLOC_CHUNK);
241 		if (new == NULL)
242 			return (NULL);
243 		bcopy(connp->conn_ilg, new, sizeof (ilg_t) * curcnt);
244 		mi_free((char *)connp->conn_ilg);
245 		connp->conn_ilg = new;
246 		connp->conn_ilg_allocated += ILG_ALLOC_CHUNK;
247 	}
248 
249 	return (&connp->conn_ilg[connp->conn_ilg_inuse++]);
250 }
251 
252 typedef struct ilm_fbld_s {
253 	ilm_t		*fbld_ilm;
254 	int		fbld_in_cnt;
255 	int		fbld_ex_cnt;
256 	slist_t		fbld_in;
257 	slist_t		fbld_ex;
258 	boolean_t	fbld_in_overflow;
259 } ilm_fbld_t;
260 
261 static void
262 ilm_bld_flists(conn_t *conn, void *arg)
263 {
264 	int i;
265 	ilm_fbld_t *fbld = (ilm_fbld_t *)(arg);
266 	ilm_t *ilm = fbld->fbld_ilm;
267 	in6_addr_t *v6group = &ilm->ilm_v6addr;
268 
269 	if (conn->conn_ilg_inuse == 0)
270 		return;
271 
272 	/*
273 	 * Since we can't break out of the ipcl_walk once started, we still
274 	 * have to look at every conn.  But if we've already found one
275 	 * (EXCLUDE, NULL) list, there's no need to keep checking individual
276 	 * ilgs--that will be our state.
277 	 */
278 	if (fbld->fbld_ex_cnt > 0 && fbld->fbld_ex.sl_numsrc == 0)
279 		return;
280 
281 	/*
282 	 * Check this conn's ilgs to see if any are interested in our
283 	 * ilm (group, interface match).  If so, update the master
284 	 * include and exclude lists we're building in the fbld struct
285 	 * with this ilg's filter info.
286 	 */
287 	mutex_enter(&conn->conn_lock);
288 	for (i = 0; i < conn->conn_ilg_inuse; i++) {
289 		ilg_t *ilg = &conn->conn_ilg[i];
290 		if ((ilg->ilg_ill == ilm->ilm_ill) &&
291 		    (ilg->ilg_ipif == ilm->ilm_ipif) &&
292 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) {
293 			if (ilg->ilg_fmode == MODE_IS_INCLUDE) {
294 				fbld->fbld_in_cnt++;
295 				if (!fbld->fbld_in_overflow)
296 					l_union_in_a(&fbld->fbld_in,
297 					    ilg->ilg_filter,
298 					    &fbld->fbld_in_overflow);
299 			} else {
300 				fbld->fbld_ex_cnt++;
301 				/*
302 				 * On the first exclude list, don't try to do
303 				 * an intersection, as the master exclude list
304 				 * is intentionally empty.  If the master list
305 				 * is still empty on later iterations, that
306 				 * means we have at least one ilg with an empty
307 				 * exclude list, so that should be reflected
308 				 * when we take the intersection.
309 				 */
310 				if (fbld->fbld_ex_cnt == 1) {
311 					if (ilg->ilg_filter != NULL)
312 						l_copy(ilg->ilg_filter,
313 						    &fbld->fbld_ex);
314 				} else {
315 					l_intersection_in_a(&fbld->fbld_ex,
316 					    ilg->ilg_filter);
317 				}
318 			}
319 			/* there will only be one match, so break now. */
320 			break;
321 		}
322 	}
323 	mutex_exit(&conn->conn_lock);
324 }
325 
326 static void
327 ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, slist_t *flist)
328 {
329 	ilm_fbld_t fbld;
330 
331 	fbld.fbld_ilm = ilm;
332 	fbld.fbld_in_cnt = fbld.fbld_ex_cnt = 0;
333 	fbld.fbld_in.sl_numsrc = fbld.fbld_ex.sl_numsrc = 0;
334 	fbld.fbld_in_overflow = B_FALSE;
335 
336 	/* first, construct our master include and exclude lists */
337 	ipcl_walk(ilm_bld_flists, (caddr_t)&fbld);
338 
339 	/* now use those master lists to generate the interface filter */
340 
341 	/* if include list overflowed, filter is (EXCLUDE, NULL) */
342 	if (fbld.fbld_in_overflow) {
343 		*fmode = MODE_IS_EXCLUDE;
344 		flist->sl_numsrc = 0;
345 		return;
346 	}
347 
348 	/* if nobody interested, interface filter is (INCLUDE, NULL) */
349 	if (fbld.fbld_in_cnt == 0 && fbld.fbld_ex_cnt == 0) {
350 		*fmode = MODE_IS_INCLUDE;
351 		flist->sl_numsrc = 0;
352 		return;
353 	}
354 
355 	/*
356 	 * If there are no exclude lists, then the interface filter
357 	 * is INCLUDE, with its filter list equal to fbld_in.  A single
358 	 * exclude list makes the interface filter EXCLUDE, with its
359 	 * filter list equal to (fbld_ex - fbld_in).
360 	 */
361 	if (fbld.fbld_ex_cnt == 0) {
362 		*fmode = MODE_IS_INCLUDE;
363 		l_copy(&fbld.fbld_in, flist);
364 	} else {
365 		*fmode = MODE_IS_EXCLUDE;
366 		l_difference(&fbld.fbld_ex, &fbld.fbld_in, flist);
367 	}
368 }
369 
370 /*
371  * If the given interface has failed, choose a new one to join on so
372  * that we continue to receive packets.  ilg_orig_ifindex remembers
373  * what the application used to join on so that we know the ilg to
374  * delete even though we change the ill here.  Callers will store the
375  * ilg returned from this function in ilg_ill.  Thus when we receive
376  * a packet on ilg_ill, conn_wantpacket_v6 will deliver the packets.
377  *
378  * This function must be called as writer so we can walk the group
379  * list and examine flags without holding a lock.
380  */
381 ill_t *
382 ip_choose_multi_ill(ill_t *ill, const in6_addr_t *grp)
383 {
384 	ill_t	*till;
385 	ill_group_t *illgrp = ill->ill_group;
386 
387 	ASSERT(IAM_WRITER_ILL(ill));
388 
389 	if (IN6_IS_ADDR_UNSPECIFIED(grp) || illgrp == NULL)
390 		return (ill);
391 
392 	if ((ill->ill_phyint->phyint_flags & (PHYI_FAILED|PHYI_INACTIVE)) == 0)
393 		return (ill);
394 
395 	till = illgrp->illgrp_ill;
396 	while (till != NULL &&
397 	    (till->ill_phyint->phyint_flags & (PHYI_FAILED|PHYI_INACTIVE))) {
398 		till = till->ill_group_next;
399 	}
400 	if (till != NULL)
401 		return (till);
402 
403 	return (ill);
404 }
405 
406 static int
407 ilm_update_add(ilm_t *ilm, ilg_stat_t ilgstat, slist_t *ilg_flist,
408     boolean_t isv6)
409 {
410 	mcast_record_t fmode;
411 	slist_t *flist;
412 	boolean_t fdefault;
413 	char buf[INET6_ADDRSTRLEN];
414 	ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill;
415 
416 	/*
417 	 * There are several cases where the ilm's filter state
418 	 * defaults to (EXCLUDE, NULL):
419 	 *	- we've had previous joins without associated ilgs
420 	 *	- this join has no associated ilg
421 	 *	- the ilg's filter state is (EXCLUDE, NULL)
422 	 */
423 	fdefault = (ilm->ilm_no_ilg_cnt > 0) ||
424 	    (ilgstat == ILGSTAT_NONE) || SLIST_IS_EMPTY(ilg_flist);
425 
426 	/* attempt mallocs (if needed) before doing anything else */
427 	if ((flist = l_alloc()) == NULL)
428 		return (ENOMEM);
429 	if (!fdefault && ilm->ilm_filter == NULL) {
430 		ilm->ilm_filter = l_alloc();
431 		if (ilm->ilm_filter == NULL) {
432 			l_free(flist);
433 			return (ENOMEM);
434 		}
435 	}
436 
437 	if (ilgstat != ILGSTAT_CHANGE)
438 		ilm->ilm_refcnt++;
439 
440 	if (ilgstat == ILGSTAT_NONE)
441 		ilm->ilm_no_ilg_cnt++;
442 
443 	/*
444 	 * Determine new filter state.  If it's not the default
445 	 * (EXCLUDE, NULL), we must walk the conn list to find
446 	 * any ilgs interested in this group, and re-build the
447 	 * ilm filter.
448 	 */
449 	if (fdefault) {
450 		fmode = MODE_IS_EXCLUDE;
451 		flist->sl_numsrc = 0;
452 	} else {
453 		ilm_gen_filter(ilm, &fmode, flist);
454 	}
455 
456 	/* make sure state actually changed; nothing to do if not. */
457 	if ((ilm->ilm_fmode == fmode) &&
458 	    !lists_are_different(ilm->ilm_filter, flist)) {
459 		l_free(flist);
460 		return (0);
461 	}
462 
463 	/* send the state change report */
464 	if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0) {
465 		if (isv6)
466 			mld_statechange(ilm, fmode, flist);
467 		else
468 			igmp_statechange(ilm, fmode, flist);
469 	}
470 
471 	/* update the ilm state */
472 	ilm->ilm_fmode = fmode;
473 	if (flist->sl_numsrc > 0)
474 		l_copy(flist, ilm->ilm_filter);
475 	else
476 		CLEAR_SLIST(ilm->ilm_filter);
477 
478 	ip1dbg(("ilm_update: new if filter mode %d, group %s\n", ilm->ilm_fmode,
479 	    inet_ntop(AF_INET6, &ilm->ilm_v6addr, buf, sizeof (buf))));
480 
481 	l_free(flist);
482 	return (0);
483 }
484 
485 static int
486 ilm_update_del(ilm_t *ilm, boolean_t isv6)
487 {
488 	mcast_record_t fmode;
489 	slist_t *flist;
490 	ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill;
491 
492 	ip1dbg(("ilm_update_del: still %d left; updating state\n",
493 	    ilm->ilm_refcnt));
494 
495 	if ((flist = l_alloc()) == NULL)
496 		return (ENOMEM);
497 
498 	/*
499 	 * If present, the ilg in question has already either been
500 	 * updated or removed from our list; so all we need to do
501 	 * now is walk the list to update the ilm filter state.
502 	 *
503 	 * Skip the list walk if we have any no-ilg joins, which
504 	 * cause the filter state to revert to (EXCLUDE, NULL).
505 	 */
506 	if (ilm->ilm_no_ilg_cnt != 0) {
507 		fmode = MODE_IS_EXCLUDE;
508 		flist->sl_numsrc = 0;
509 	} else {
510 		ilm_gen_filter(ilm, &fmode, flist);
511 	}
512 
513 	/* check to see if state needs to be updated */
514 	if ((ilm->ilm_fmode == fmode) &&
515 	    (!lists_are_different(ilm->ilm_filter, flist))) {
516 		l_free(flist);
517 		return (0);
518 	}
519 
520 	if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0) {
521 		if (isv6)
522 			mld_statechange(ilm, fmode, flist);
523 		else
524 			igmp_statechange(ilm, fmode, flist);
525 	}
526 
527 	ilm->ilm_fmode = fmode;
528 	if (flist->sl_numsrc > 0) {
529 		if (ilm->ilm_filter == NULL) {
530 			ilm->ilm_filter = l_alloc();
531 			if (ilm->ilm_filter == NULL) {
532 				char buf[INET6_ADDRSTRLEN];
533 				ip1dbg(("ilm_update_del: failed to alloc ilm "
534 				    "filter; no source filtering for %s on %s",
535 				    inet_ntop(AF_INET6, &ilm->ilm_v6addr,
536 				    buf, sizeof (buf)), ill->ill_name));
537 				ilm->ilm_fmode = MODE_IS_EXCLUDE;
538 				l_free(flist);
539 				return (0);
540 			}
541 		}
542 		l_copy(flist, ilm->ilm_filter);
543 	} else {
544 		CLEAR_SLIST(ilm->ilm_filter);
545 	}
546 
547 	l_free(flist);
548 	return (0);
549 }
550 
551 /*
552  * INADDR_ANY means all multicast addresses. This is only used
553  * by the multicast router.
554  * INADDR_ANY is stored as IPv6 unspecified addr.
555  */
556 int
557 ip_addmulti(ipaddr_t group, ipif_t *ipif, ilg_stat_t ilgstat,
558     mcast_record_t ilg_fmode, slist_t *ilg_flist)
559 {
560 	ill_t	*ill = ipif->ipif_ill;
561 	ilm_t 	*ilm;
562 	in6_addr_t v6group;
563 	int	ret;
564 
565 	ASSERT(IAM_WRITER_IPIF(ipif));
566 
567 	if (!CLASSD(group) && group != INADDR_ANY)
568 		return (EINVAL);
569 
570 	/*
571 	 * INADDR_ANY is represented as the IPv6 unspecifed addr.
572 	 */
573 	if (group == INADDR_ANY)
574 		v6group = ipv6_all_zeros;
575 	else
576 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
577 
578 	ilm = ilm_lookup_ipif(ipif, group);
579 	if (ilm != NULL)
580 		return (ilm_update_add(ilm, ilgstat, ilg_flist, B_FALSE));
581 
582 	/*
583 	 * ilms are associated with ipifs in IPv4. It moves with the
584 	 * ipif if the ipif moves to a new ill when the interface
585 	 * fails. Thus we really don't check whether the ipif_ill
586 	 * has failed like in IPv6. If it has FAILED the ipif
587 	 * will move (daemon will move it) and hence the ilm, if the
588 	 * ipif is not IPIF_NOFAILOVER. For the IPIF_NOFAILOVER ipifs,
589 	 * we continue to receive in the same place even if the
590 	 * interface fails.
591 	 */
592 	ilm = ilm_add_v6(ipif, &v6group, ilgstat, ilg_fmode, ilg_flist,
593 	    ill->ill_phyint->phyint_ifindex, ipif->ipif_zoneid);
594 	if (ilm == NULL)
595 		return (ENOMEM);
596 
597 	if (group == INADDR_ANY) {
598 		/*
599 		 * Check how many ipif's have members in this group -
600 		 * if more then one we should not tell the driver to join
601 		 * this time
602 		 */
603 		if (ilm_numentries_v6(ill, &v6group) > 1)
604 			return (0);
605 		if (ill->ill_group == NULL)
606 			ret = ip_join_allmulti(ipif);
607 		else
608 			ret = ill_nominate_mcast_rcv(ill->ill_group);
609 		if (ret != 0)
610 			ilm_delete(ilm);
611 		return (ret);
612 	}
613 
614 	if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0)
615 		igmp_joingroup(ilm);
616 
617 	if (ilm_numentries_v6(ill, &v6group) > 1)
618 		return (0);
619 
620 	ret = ip_ll_addmulti_v6(ipif, &v6group);
621 	if (ret != 0)
622 		ilm_delete(ilm);
623 	return (ret);
624 }
625 
626 /*
627  * The unspecified address means all multicast addresses.
628  * This is only used by the multicast router.
629  *
630  * ill identifies the interface to join on; it may not match the
631  * interface requested by the application of a failover has taken
632  * place.  orig_ifindex always identifies the interface requested
633  * by the app.
634  *
635  * ilgstat tells us if there's an ilg associated with this join,
636  * and if so, if it's a new ilg or a change to an existing one.
637  * ilg_fmode and ilg_flist give us the current filter state of
638  * the ilg (and will be EXCLUDE {NULL} in the case of no ilg).
639  */
640 int
641 ip_addmulti_v6(const in6_addr_t *v6group, ill_t *ill, int orig_ifindex,
642     zoneid_t zoneid, ilg_stat_t ilgstat, mcast_record_t ilg_fmode,
643     slist_t *ilg_flist)
644 {
645 	ilm_t	*ilm;
646 	int	ret;
647 
648 	ASSERT(IAM_WRITER_ILL(ill));
649 
650 	if (!IN6_IS_ADDR_MULTICAST(v6group) &&
651 	    !IN6_IS_ADDR_UNSPECIFIED(v6group)) {
652 		return (EINVAL);
653 	}
654 
655 	/*
656 	 * An ilm is uniquely identified by the tuple of (group, ill,
657 	 * orig_ill).  group is the multicast group address, ill is
658 	 * the interface on which it is currently joined, and orig_ill
659 	 * is the interface on which the application requested the
660 	 * join.  orig_ill and ill are the same unless orig_ill has
661 	 * failed over.
662 	 *
663 	 * Both orig_ill and ill are required, which means we may have
664 	 * 2 ilms on an ill for the same group, but with different
665 	 * orig_ills.  These must be kept separate, so that when failback
666 	 * occurs, the appropriate ilms are moved back to their orig_ill
667 	 * without disrupting memberships on the ill to which they had
668 	 * been moved.
669 	 *
670 	 * In order to track orig_ill, we store orig_ifindex in the
671 	 * ilm and ilg.
672 	 */
673 	ilm = ilm_lookup_ill_index_v6(ill, v6group, orig_ifindex, zoneid);
674 	if (ilm != NULL)
675 		return (ilm_update_add(ilm, ilgstat, ilg_flist, B_TRUE));
676 
677 	/*
678 	 * We need to remember where the application really wanted
679 	 * to join. This will be used later if we want to failback
680 	 * to the original interface.
681 	 */
682 	ilm = ilm_add_v6(ill->ill_ipif, v6group, ilgstat, ilg_fmode,
683 	    ilg_flist, orig_ifindex, zoneid);
684 	if (ilm == NULL)
685 		return (ENOMEM);
686 
687 	if (IN6_IS_ADDR_UNSPECIFIED(v6group)) {
688 		/*
689 		 * Check how many ipif's that have members in this group -
690 		 * if more then one we should not tell the driver to join
691 		 * this time
692 		 */
693 		if (ilm_numentries_v6(ill, v6group) > 1)
694 			return (0);
695 		if (ill->ill_group == NULL)
696 			ret = ip_join_allmulti(ill->ill_ipif);
697 		else
698 			ret = ill_nominate_mcast_rcv(ill->ill_group);
699 
700 		if (ret != 0)
701 			ilm_delete(ilm);
702 		return (ret);
703 	}
704 
705 	if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0)
706 		mld_joingroup(ilm);
707 
708 	/*
709 	 * If we have more then one we should not tell the driver
710 	 * to join this time.
711 	 */
712 	if (ilm_numentries_v6(ill, v6group) > 1)
713 		return (0);
714 
715 	ret = ip_ll_addmulti_v6(ill->ill_ipif, v6group);
716 	if (ret != 0)
717 		ilm_delete(ilm);
718 	return (ret);
719 }
720 
721 /*
722  * Send a multicast request to the driver for enabling multicast reception
723  * for v6groupp address. The caller has already checked whether it is
724  * appropriate to send one or not.
725  */
726 int
727 ip_ll_send_enabmulti_req(ill_t *ill, const in6_addr_t *v6groupp)
728 {
729 	mblk_t	*mp;
730 	uint32_t addrlen, addroff;
731 	char	group_buf[INET6_ADDRSTRLEN];
732 
733 	ASSERT(IAM_WRITER_ILL(ill));
734 
735 	/*
736 	 * Create a AR_ENTRY_SQUERY message with a dl_enabmulti_req tacked
737 	 * on.
738 	 */
739 	mp = ill_create_dl(ill, DL_ENABMULTI_REQ, sizeof (dl_enabmulti_req_t),
740 	    &addrlen, &addroff);
741 	if (!mp)
742 		return (ENOMEM);
743 	if (IN6_IS_ADDR_V4MAPPED(v6groupp)) {
744 		ipaddr_t v4group;
745 
746 		IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group);
747 		/*
748 		 * NOTE!!!
749 		 * The "addroff" passed in here was calculated by
750 		 * ill_create_dl(), and will be used by ill_create_squery()
751 		 * to perform some twisted coding magic. It is the offset
752 		 * into the dl_xxx_req of the hw addr. Here, it will be
753 		 * added to b_wptr - b_rptr to create a magic number that
754 		 * is not an offset into this squery mblk.
755 		 * The actual hardware address will be accessed only in the
756 		 * dl_xxx_req, not in the squery. More importantly,
757 		 * that hardware address can *only* be accessed in this
758 		 * mblk chain by calling mi_offset_param_c(), which uses
759 		 * the magic number in the squery hw offset field to go
760 		 * to the *next* mblk (the dl_xxx_req), subtract the
761 		 * (b_wptr - b_rptr), and find the actual offset into
762 		 * the dl_xxx_req.
763 		 * Any method that depends on using the
764 		 * offset field in the dl_disabmulti_req or squery
765 		 * to find either hardware address will similarly fail.
766 		 *
767 		 * Look in ar_entry_squery() in arp.c to see how this offset
768 		 * is used.
769 		 */
770 		mp = ill_create_squery(ill, v4group, addrlen, addroff, mp);
771 		if (!mp)
772 			return (ENOMEM);
773 		ip1dbg(("ip_ll_send_enabmulti_req: IPv4 putnext %s on %s\n",
774 		    inet_ntop(AF_INET6, v6groupp, group_buf,
775 		    sizeof (group_buf)),
776 		    ill->ill_name));
777 		putnext(ill->ill_rq, mp);
778 	} else {
779 		ip1dbg(("ip_ll_send_enabmulti_req: IPv6 ndp_squery_mp %s on"
780 		    " %s\n",
781 		    inet_ntop(AF_INET6, v6groupp, group_buf,
782 		    sizeof (group_buf)),
783 		    ill->ill_name));
784 		return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp));
785 	}
786 	return (0);
787 }
788 
789 /*
790  * Send a multicast request to the driver for enabling multicast
791  * membership for v6group if appropriate.
792  */
793 static int
794 ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *v6groupp)
795 {
796 	ill_t	*ill = ipif->ipif_ill;
797 
798 	ASSERT(IAM_WRITER_IPIF(ipif));
799 
800 	if (ill->ill_net_type != IRE_IF_RESOLVER ||
801 	    ipif->ipif_flags & IPIF_POINTOPOINT) {
802 		ip1dbg(("ip_ll_addmulti_v6: not resolver\n"));
803 		return (0);	/* Must be IRE_IF_NORESOLVER */
804 	}
805 
806 	if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) {
807 		ip1dbg(("ip_ll_addmulti_v6: MULTI_BCAST\n"));
808 		return (0);
809 	}
810 	if (ill->ill_ipif_up_count == 0) {
811 		/*
812 		 * Nobody there. All multicast addresses will be re-joined
813 		 * when we get the DL_BIND_ACK bringing the interface up.
814 		 */
815 		ip1dbg(("ip_ll_addmulti_v6: nobody up\n"));
816 		return (0);
817 	}
818 	return (ip_ll_send_enabmulti_req(ill, v6groupp));
819 }
820 
821 /*
822  * INADDR_ANY means all multicast addresses. This is only used
823  * by the multicast router.
824  * INADDR_ANY is stored as the IPv6 unspecifed addr.
825  */
826 int
827 ip_delmulti(ipaddr_t group, ipif_t *ipif, boolean_t no_ilg, boolean_t leaving)
828 {
829 	ill_t	*ill = ipif->ipif_ill;
830 	ilm_t *ilm;
831 	in6_addr_t v6group;
832 	int	ret;
833 
834 	ASSERT(IAM_WRITER_IPIF(ipif));
835 
836 	if (!CLASSD(group) && group != INADDR_ANY)
837 		return (EINVAL);
838 
839 	/*
840 	 * INADDR_ANY is represented as the IPv6 unspecifed addr.
841 	 */
842 	if (group == INADDR_ANY)
843 		v6group = ipv6_all_zeros;
844 	else
845 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
846 
847 	/*
848 	 * Look for a match on the ipif.
849 	 * (IP_DROP_MEMBERSHIP specifies an ipif using an IP address).
850 	 */
851 	ilm = ilm_lookup_ipif(ipif, group);
852 	if (ilm == NULL)
853 		return (ENOENT);
854 
855 	/* Update counters */
856 	if (no_ilg)
857 		ilm->ilm_no_ilg_cnt--;
858 
859 	if (leaving)
860 		ilm->ilm_refcnt--;
861 
862 	if (ilm->ilm_refcnt > 0)
863 		return (ilm_update_del(ilm, B_FALSE));
864 
865 	if (group == INADDR_ANY) {
866 		ilm_delete(ilm);
867 		/*
868 		 * Check how many ipif's that have members in this group -
869 		 * if there are still some left then don't tell the driver
870 		 * to drop it.
871 		 */
872 		if (ilm_numentries_v6(ill, &v6group) != 0)
873 			return (0);
874 
875 		/*
876 		 * If we never joined, then don't leave.  This can happen
877 		 * if we're in an IPMP group, since only one ill per IPMP
878 		 * group receives all multicast packets.
879 		 */
880 		if (!ill->ill_join_allmulti) {
881 			ASSERT(ill->ill_group != NULL);
882 			return (0);
883 		}
884 
885 		ret = ip_leave_allmulti(ipif);
886 		if (ill->ill_group != NULL)
887 			(void) ill_nominate_mcast_rcv(ill->ill_group);
888 		return (ret);
889 	}
890 
891 	if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0)
892 		igmp_leavegroup(ilm);
893 
894 	ilm_delete(ilm);
895 	/*
896 	 * Check how many ipif's that have members in this group -
897 	 * if there are still some left then don't tell the driver
898 	 * to drop it.
899 	 */
900 	if (ilm_numentries_v6(ill, &v6group) != 0)
901 		return (0);
902 	return (ip_ll_delmulti_v6(ipif, &v6group));
903 }
904 
905 /*
906  * The unspecified address means all multicast addresses.
907  * This is only used by the multicast router.
908  */
909 int
910 ip_delmulti_v6(const in6_addr_t *v6group, ill_t *ill, int orig_ifindex,
911     zoneid_t zoneid, boolean_t no_ilg, boolean_t leaving)
912 {
913 	ipif_t	*ipif;
914 	ilm_t *ilm;
915 	int	ret;
916 
917 	ASSERT(IAM_WRITER_ILL(ill));
918 
919 	if (!IN6_IS_ADDR_MULTICAST(v6group) &&
920 	    !IN6_IS_ADDR_UNSPECIFIED(v6group))
921 		return (EINVAL);
922 
923 	/*
924 	 * Look for a match on the ill.
925 	 * (IPV6_LEAVE_GROUP specifies an ill using an ifindex).
926 	 *
927 	 * Similar to ip_addmulti_v6, we should always look using
928 	 * the orig_ifindex.
929 	 *
930 	 * 1) If orig_ifindex is different from ill's ifindex
931 	 *    we should have an ilm with orig_ifindex created in
932 	 *    ip_addmulti_v6. We should delete that here.
933 	 *
934 	 * 2) If orig_ifindex is same as ill's ifindex, we should
935 	 *    not delete the ilm that is temporarily here because of
936 	 *    a FAILOVER. Those ilms will have a ilm_orig_ifindex
937 	 *    different from ill's ifindex.
938 	 *
939 	 * Thus, always lookup using orig_ifindex.
940 	 */
941 	ilm = ilm_lookup_ill_index_v6(ill, v6group, orig_ifindex, zoneid);
942 	if (ilm == NULL)
943 		return (ENOENT);
944 
945 	ASSERT(ilm->ilm_ill == ill);
946 
947 	ipif = ill->ill_ipif;
948 
949 	/* Update counters */
950 	if (no_ilg)
951 		ilm->ilm_no_ilg_cnt--;
952 
953 	if (leaving)
954 		ilm->ilm_refcnt--;
955 
956 	if (ilm->ilm_refcnt > 0)
957 		return (ilm_update_del(ilm, B_TRUE));
958 
959 	if (IN6_IS_ADDR_UNSPECIFIED(v6group)) {
960 		ilm_delete(ilm);
961 		/*
962 		 * Check how many ipif's that have members in this group -
963 		 * if there are still some left then don't tell the driver
964 		 * to drop it.
965 		 */
966 		if (ilm_numentries_v6(ill, v6group) != 0)
967 			return (0);
968 
969 		/*
970 		 * If we never joined, then don't leave.  This can happen
971 		 * if we're in an IPMP group, since only one ill per IPMP
972 		 * group receives all multicast packets.
973 		 */
974 		if (!ill->ill_join_allmulti) {
975 			ASSERT(ill->ill_group != NULL);
976 			return (0);
977 		}
978 
979 		ret = ip_leave_allmulti(ipif);
980 		if (ill->ill_group != NULL)
981 			(void) ill_nominate_mcast_rcv(ill->ill_group);
982 		return (ret);
983 	}
984 
985 	if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0)
986 		mld_leavegroup(ilm);
987 
988 	ilm_delete(ilm);
989 	/*
990 	 * Check how many ipif's that have members in this group -
991 	 * if there are still some left then don't tell the driver
992 	 * to drop it.
993 	 */
994 	if (ilm_numentries_v6(ill, v6group) != 0)
995 		return (0);
996 	return (ip_ll_delmulti_v6(ipif, v6group));
997 }
998 
999 /*
1000  * Send a multicast request to the driver for disabling multicast reception
1001  * for v6groupp address. The caller has already checked whether it is
1002  * appropriate to send one or not.
1003  */
1004 int
1005 ip_ll_send_disabmulti_req(ill_t *ill, const in6_addr_t *v6groupp)
1006 {
1007 	mblk_t	*mp;
1008 	char	group_buf[INET6_ADDRSTRLEN];
1009 	uint32_t	addrlen, addroff;
1010 
1011 	ASSERT(IAM_WRITER_ILL(ill));
1012 	/*
1013 	 * Create a AR_ENTRY_SQUERY message with a dl_disabmulti_req tacked
1014 	 * on.
1015 	 */
1016 	mp = ill_create_dl(ill, DL_DISABMULTI_REQ,
1017 	    sizeof (dl_disabmulti_req_t), &addrlen, &addroff);
1018 
1019 	if (!mp)
1020 		return (ENOMEM);
1021 
1022 	if (IN6_IS_ADDR_V4MAPPED(v6groupp)) {
1023 		ipaddr_t v4group;
1024 
1025 		IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group);
1026 		/*
1027 		 * NOTE!!!
1028 		 * The "addroff" passed in here was calculated by
1029 		 * ill_create_dl(), and will be used by ill_create_squery()
1030 		 * to perform some twisted coding magic. It is the offset
1031 		 * into the dl_xxx_req of the hw addr. Here, it will be
1032 		 * added to b_wptr - b_rptr to create a magic number that
1033 		 * is not an offset into this mblk.
1034 		 *
1035 		 * Please see the comment in ip_ll_send)enabmulti_req()
1036 		 * for a complete explanation.
1037 		 *
1038 		 * Look in ar_entry_squery() in arp.c to see how this offset
1039 		 * is used.
1040 		 */
1041 		mp = ill_create_squery(ill, v4group, addrlen, addroff, mp);
1042 		if (!mp)
1043 			return (ENOMEM);
1044 		ip1dbg(("ip_ll_send_disabmulti_req: IPv4 putnext %s on %s\n",
1045 		    inet_ntop(AF_INET6, v6groupp, group_buf,
1046 		    sizeof (group_buf)),
1047 		    ill->ill_name));
1048 		putnext(ill->ill_rq, mp);
1049 	} else {
1050 		ip1dbg(("ip_ll_send_disabmulti_req: IPv6 ndp_squery_mp %s on"
1051 		    " %s\n",
1052 		    inet_ntop(AF_INET6, v6groupp, group_buf,
1053 		    sizeof (group_buf)),
1054 		    ill->ill_name));
1055 		return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp));
1056 	}
1057 	return (0);
1058 }
1059 
1060 /*
1061  * Send a multicast request to the driver for disabling multicast
1062  * membership for v6group if appropriate.
1063  */
1064 static int
1065 ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *v6group)
1066 {
1067 	ill_t	*ill = ipif->ipif_ill;
1068 
1069 	ASSERT(IAM_WRITER_IPIF(ipif));
1070 
1071 	if (ill->ill_net_type != IRE_IF_RESOLVER ||
1072 	    ipif->ipif_flags & IPIF_POINTOPOINT) {
1073 		return (0);	/* Must be IRE_IF_NORESOLVER */
1074 	}
1075 	if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) {
1076 		ip1dbg(("ip_ll_delmulti_v6: MULTI_BCAST\n"));
1077 		return (0);
1078 	}
1079 	if (ill->ill_ipif_up_count == 0) {
1080 		/*
1081 		 * Nobody there. All multicast addresses will be re-joined
1082 		 * when we get the DL_BIND_ACK bringing the interface up.
1083 		 */
1084 		ip1dbg(("ip_ll_delmulti_v6: nobody up\n"));
1085 		return (0);
1086 	}
1087 	return (ip_ll_send_disabmulti_req(ill, v6group));
1088 }
1089 
1090 /*
1091  * Make the driver pass up all multicast packets
1092  *
1093  * With ill groups, the caller makes sure that there is only
1094  * one ill joining the allmulti group.
1095  */
1096 int
1097 ip_join_allmulti(ipif_t *ipif)
1098 {
1099 	ill_t	*ill = ipif->ipif_ill;
1100 	mblk_t	*mp;
1101 	uint32_t	addrlen, addroff;
1102 
1103 	ASSERT(IAM_WRITER_IPIF(ipif));
1104 
1105 	if (ill->ill_ipif_up_count == 0) {
1106 		/*
1107 		 * Nobody there. All multicast addresses will be re-joined
1108 		 * when we get the DL_BIND_ACK bringing the interface up.
1109 		 */
1110 		return (0);
1111 	}
1112 
1113 	ASSERT(!ill->ill_join_allmulti);
1114 
1115 	/*
1116 	 * Create a DL_PROMISCON_REQ message and send it directly to
1117 	 * the DLPI provider.  We don't need to do this for certain
1118 	 * media types for which we never need to turn promiscuous
1119 	 * mode on.
1120 	 */
1121 	if ((ill->ill_net_type == IRE_IF_RESOLVER) &&
1122 	    !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) {
1123 		mp = ill_create_dl(ill, DL_PROMISCON_REQ,
1124 		    sizeof (dl_promiscon_req_t), &addrlen, &addroff);
1125 		if (mp == NULL)
1126 			return (ENOMEM);
1127 		putnext(ill->ill_wq, mp);
1128 	}
1129 
1130 	mutex_enter(&ill->ill_lock);
1131 	ill->ill_join_allmulti = B_TRUE;
1132 	mutex_exit(&ill->ill_lock);
1133 	return (0);
1134 }
1135 
1136 /*
1137  * Make the driver stop passing up all multicast packets
1138  *
1139  * With ill groups, we need to nominate some other ill as
1140  * this ipif->ipif_ill is leaving the group.
1141  */
1142 int
1143 ip_leave_allmulti(ipif_t *ipif)
1144 {
1145 	ill_t	*ill = ipif->ipif_ill;
1146 	mblk_t	*mp;
1147 	uint32_t	addrlen, addroff;
1148 
1149 	ASSERT(IAM_WRITER_IPIF(ipif));
1150 
1151 	if (ill->ill_ipif_up_count == 0) {
1152 		/*
1153 		 * Nobody there. All multicast addresses will be re-joined
1154 		 * when we get the DL_BIND_ACK bringing the interface up.
1155 		 */
1156 		return (0);
1157 	}
1158 
1159 	ASSERT(ill->ill_join_allmulti);
1160 
1161 	/*
1162 	 * Create a DL_PROMISCOFF_REQ message and send it directly to
1163 	 * the DLPI provider.  We don't need to do this for certain
1164 	 * media types for which we never need to turn promiscuous
1165 	 * mode on.
1166 	 */
1167 	if ((ill->ill_net_type == IRE_IF_RESOLVER) &&
1168 	    !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) {
1169 		mp = ill_create_dl(ill, DL_PROMISCOFF_REQ,
1170 		    sizeof (dl_promiscoff_req_t), &addrlen, &addroff);
1171 		if (mp == NULL)
1172 			return (ENOMEM);
1173 		putnext(ill->ill_wq, mp);
1174 	}
1175 
1176 	mutex_enter(&ill->ill_lock);
1177 	ill->ill_join_allmulti = B_FALSE;
1178 	mutex_exit(&ill->ill_lock);
1179 	return (0);
1180 }
1181 
1182 /*
1183  * Copy mp_orig and pass it in as a local message.
1184  */
1185 void
1186 ip_multicast_loopback(queue_t *q, ill_t *ill, mblk_t *mp_orig, int fanout_flags,
1187     zoneid_t zoneid)
1188 {
1189 	mblk_t		*mp;
1190 	mblk_t		*ipsec_mp;
1191 
1192 	/* TODO this could use dup'ed messages except for the IP header. */
1193 	mp = ip_copymsg(mp_orig);
1194 	if (mp == NULL)
1195 		return;
1196 	if (mp->b_datap->db_type == M_CTL) {
1197 		ipsec_mp = mp;
1198 		mp = mp->b_cont;
1199 	} else {
1200 		ipsec_mp = mp;
1201 	}
1202 	ip_wput_local(q, ill, (ipha_t *)mp->b_rptr, ipsec_mp, NULL,
1203 	    fanout_flags, zoneid);
1204 }
1205 
1206 static area_t	ip_aresq_template = {
1207 	AR_ENTRY_SQUERY,		/* cmd */
1208 	sizeof (area_t)+IP_ADDR_LEN,	/* name offset */
1209 	sizeof (area_t),	/* name len (filled by ill_arp_alloc) */
1210 	IP_ARP_PROTO_TYPE,		/* protocol, from arps perspective */
1211 	sizeof (area_t),			/* proto addr offset */
1212 	IP_ADDR_LEN,			/* proto addr_length */
1213 	0,				/* proto mask offset */
1214 	/* Rest is initialized when used */
1215 	0,				/* flags */
1216 	0,				/* hw addr offset */
1217 	0,				/* hw addr length */
1218 };
1219 
1220 static mblk_t *
1221 ill_create_squery(ill_t *ill, ipaddr_t ipaddr, uint32_t addrlen,
1222     uint32_t addroff, mblk_t *mp_tail)
1223 {
1224 	mblk_t	*mp;
1225 	area_t	*area;
1226 
1227 	mp = ill_arp_alloc(ill, (uchar_t *)&ip_aresq_template,
1228 				(caddr_t)&ipaddr);
1229 	if (!mp) {
1230 		freemsg(mp_tail);
1231 		return (NULL);
1232 	}
1233 	area = (area_t *)mp->b_rptr;
1234 	area->area_hw_addr_length = addrlen;
1235 	area->area_hw_addr_offset = mp->b_wptr - mp->b_rptr + addroff;
1236 	/*
1237 	 * NOTE!
1238 	 *
1239 	 * The area_hw_addr_offset, as can be seen, does not hold the
1240 	 * actual hardware address offset. Rather, it holds the offset
1241 	 * to the hw addr in the dl_xxx_req in mp_tail, modified by
1242 	 * adding (mp->b_wptr - mp->b_rptr). This allows the function
1243 	 * mi_offset_paramc() to find the hardware address in the
1244 	 * *second* mblk (dl_xxx_req), not this mblk.
1245 	 *
1246 	 * Using mi_offset_paramc() is thus the *only* way to access
1247 	 * the dl_xxx_hw address.
1248 	 *
1249 	 * The squery hw address should *not* be accessed.
1250 	 *
1251 	 * See ar_entry_squery() in arp.c for an example of how all this works.
1252 	 */
1253 
1254 	mp->b_cont = mp_tail;
1255 	return (mp);
1256 }
1257 
1258 /*
1259  * Create a dlpi message with room for phys+sap. When we come back in
1260  * ip_wput_ctl() we will strip the sap for those primitives which
1261  * only need a physical address.
1262  */
1263 static mblk_t *
1264 ill_create_dl(ill_t *ill, uint32_t dl_primitive, uint32_t length,
1265     uint32_t *addr_lenp, uint32_t *addr_offp)
1266 {
1267 	mblk_t	*mp;
1268 	uint32_t	hw_addr_length;
1269 	char		*cp;
1270 	uint32_t	offset;
1271 	uint32_t 	size;
1272 
1273 	*addr_lenp = *addr_offp = 0;
1274 
1275 	hw_addr_length = ill->ill_phys_addr_length;
1276 	if (!hw_addr_length) {
1277 		ip0dbg(("ip_create_dl: hw addr length = 0\n"));
1278 		return (NULL);
1279 	}
1280 
1281 	size = length;
1282 	switch (dl_primitive) {
1283 	case DL_ENABMULTI_REQ:
1284 	case DL_DISABMULTI_REQ:
1285 		size += hw_addr_length;
1286 		break;
1287 	case DL_PROMISCON_REQ:
1288 	case DL_PROMISCOFF_REQ:
1289 		break;
1290 	default:
1291 		return (NULL);
1292 	}
1293 	mp = allocb(size, BPRI_HI);
1294 	if (!mp)
1295 		return (NULL);
1296 	mp->b_wptr += size;
1297 	mp->b_datap->db_type = M_PROTO;
1298 
1299 	cp = (char *)mp->b_rptr;
1300 	offset = length;
1301 
1302 	switch (dl_primitive) {
1303 	case DL_ENABMULTI_REQ: {
1304 		dl_enabmulti_req_t *dl = (dl_enabmulti_req_t *)cp;
1305 
1306 		dl->dl_primitive = dl_primitive;
1307 		dl->dl_addr_offset = offset;
1308 		*addr_lenp = dl->dl_addr_length = hw_addr_length;
1309 		*addr_offp = offset;
1310 		break;
1311 	}
1312 	case DL_DISABMULTI_REQ: {
1313 		dl_disabmulti_req_t *dl = (dl_disabmulti_req_t *)cp;
1314 
1315 		dl->dl_primitive = dl_primitive;
1316 		dl->dl_addr_offset = offset;
1317 		*addr_lenp = dl->dl_addr_length = hw_addr_length;
1318 		*addr_offp = offset;
1319 		break;
1320 	}
1321 	case DL_PROMISCON_REQ:
1322 	case DL_PROMISCOFF_REQ: {
1323 		dl_promiscon_req_t *dl = (dl_promiscon_req_t *)cp;
1324 
1325 		dl->dl_primitive = dl_primitive;
1326 		dl->dl_level = DL_PROMISC_MULTI;
1327 		break;
1328 	}
1329 	}
1330 	ip1dbg(("ill_create_dl: addr_len %d, addr_off %d\n",
1331 		*addr_lenp, *addr_offp));
1332 	return (mp);
1333 }
1334 
1335 void
1336 ip_wput_ctl(queue_t *q, mblk_t *mp_orig)
1337 {
1338 	ill_t	*ill = (ill_t *)q->q_ptr;
1339 	mblk_t	*mp = mp_orig;
1340 	area_t	*area;
1341 
1342 	/* Check that we have a AR_ENTRY_SQUERY with a tacked on mblk */
1343 	if ((mp->b_wptr - mp->b_rptr) < sizeof (area_t) ||
1344 	    mp->b_cont == NULL) {
1345 		putnext(q, mp);
1346 		return;
1347 	}
1348 	area = (area_t *)mp->b_rptr;
1349 	if (area->area_cmd != AR_ENTRY_SQUERY) {
1350 		putnext(q, mp);
1351 		return;
1352 	}
1353 	mp = mp->b_cont;
1354 	/*
1355 	 * Update dl_addr_length and dl_addr_offset for primitives that
1356 	 * have physical addresses as opposed to full saps
1357 	 */
1358 	switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) {
1359 	case DL_ENABMULTI_REQ:
1360 		/* Track the state if this is the first enabmulti */
1361 		if (ill->ill_dlpi_multicast_state == IDMS_UNKNOWN)
1362 			ill->ill_dlpi_multicast_state = IDMS_INPROGRESS;
1363 		ip1dbg(("ip_wput_ctl: ENABMULTI\n"));
1364 		break;
1365 	case DL_DISABMULTI_REQ:
1366 		ip1dbg(("ip_wput_ctl: DISABMULTI\n"));
1367 		break;
1368 	default:
1369 		ip1dbg(("ip_wput_ctl: default\n"));
1370 		break;
1371 	}
1372 	freeb(mp_orig);
1373 	putnext(q, mp);
1374 }
1375 
1376 /*
1377  * Rejoin any groups which have been explicitly joined by the application (we
1378  * left all explicitly joined groups as part of ill_leave_multicast() prior to
1379  * bringing the interface down).  Note that because groups can be joined and
1380  * left while an interface is down, this may not be the same set of groups
1381  * that we left in ill_leave_multicast().
1382  */
1383 void
1384 ill_recover_multicast(ill_t *ill)
1385 {
1386 	ilm_t	*ilm;
1387 	char    addrbuf[INET6_ADDRSTRLEN];
1388 
1389 	ASSERT(IAM_WRITER_ILL(ill));
1390 
1391 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1392 		/*
1393 		 * Check how many ipif's that have members in this group -
1394 		 * if more then one we make sure that this entry is first
1395 		 * in the list.
1396 		 */
1397 		if (ilm_numentries_v6(ill, &ilm->ilm_v6addr) > 1 &&
1398 		    ilm_lookup_ill_v6(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm)
1399 			continue;
1400 		ip1dbg(("ill_recover_multicast: %s\n",
1401 		    inet_ntop(AF_INET6, &ilm->ilm_v6addr, addrbuf,
1402 		    sizeof (addrbuf))));
1403 		if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
1404 			if (ill->ill_group == NULL) {
1405 				(void) ip_join_allmulti(ill->ill_ipif);
1406 			} else {
1407 				/*
1408 				 * We don't want to join on this ill,
1409 				 * if somebody else in the group has
1410 				 * already been nominated.
1411 				 */
1412 				(void) ill_nominate_mcast_rcv(ill->ill_group);
1413 			}
1414 		} else {
1415 			(void) ip_ll_addmulti_v6(ill->ill_ipif,
1416 			    &ilm->ilm_v6addr);
1417 		}
1418 	}
1419 }
1420 
1421 /*
1422  * The opposite of ill_recover_multicast() -- leaves all multicast groups
1423  * that were explicitly joined.  Note that both these functions could be
1424  * disposed of if we enhanced ARP to allow us to handle DL_DISABMULTI_REQ
1425  * and DL_ENABMULTI_REQ messages when an interface is down.
1426  */
1427 void
1428 ill_leave_multicast(ill_t *ill)
1429 {
1430 	ilm_t	*ilm;
1431 	char    addrbuf[INET6_ADDRSTRLEN];
1432 
1433 	ASSERT(IAM_WRITER_ILL(ill));
1434 
1435 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1436 		/*
1437 		 * Check how many ipif's that have members in this group -
1438 		 * if more then one we make sure that this entry is first
1439 		 * in the list.
1440 		 */
1441 		if (ilm_numentries_v6(ill, &ilm->ilm_v6addr) > 1 &&
1442 		    ilm_lookup_ill_v6(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm)
1443 			continue;
1444 		ip1dbg(("ill_leave_multicast: %s\n",
1445 		    inet_ntop(AF_INET6, &ilm->ilm_v6addr, addrbuf,
1446 		    sizeof (addrbuf))));
1447 		if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
1448 			(void) ip_leave_allmulti(ill->ill_ipif);
1449 			/*
1450 			 * If we were part of an IPMP group, then
1451 			 * ill_handoff_responsibility() has already
1452 			 * nominated a new member (so we don't).
1453 			 */
1454 			ASSERT(ill->ill_group == NULL);
1455 		} else {
1456 			(void) ip_ll_send_disabmulti_req(ill, &ilm->ilm_v6addr);
1457 		}
1458 	}
1459 }
1460 
1461 /*
1462  * Find an ilm for matching the ill and which has the source in its
1463  * INCLUDE list or does not have it in its EXCLUDE list
1464  */
1465 ilm_t *
1466 ilm_lookup_ill_withsrc(ill_t *ill, ipaddr_t group, ipaddr_t src)
1467 {
1468 	in6_addr_t	v6group, v6src;
1469 
1470 	/*
1471 	 * INADDR_ANY is represented as the IPv6 unspecified addr.
1472 	 */
1473 	if (group == INADDR_ANY)
1474 		v6group = ipv6_all_zeros;
1475 	else
1476 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1477 	IN6_IPADDR_TO_V4MAPPED(src, &v6src);
1478 
1479 	return (ilm_lookup_ill_withsrc_v6(ill, &v6group, &v6src));
1480 }
1481 
1482 ilm_t *
1483 ilm_lookup_ill_withsrc_v6(ill_t *ill, const in6_addr_t *v6group,
1484     const in6_addr_t *v6src)
1485 {
1486 	ilm_t	*ilm;
1487 	boolean_t isinlist;
1488 	int	i, numsrc;
1489 
1490 	/*
1491 	 * If the source is in any ilm's INCLUDE list, or if
1492 	 * it is not in any ilm's EXCLUDE list, we have a hit.
1493 	 */
1494 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1495 		if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group)) {
1496 
1497 			isinlist = B_FALSE;
1498 			numsrc = (ilm->ilm_filter == NULL) ?
1499 			    0 : ilm->ilm_filter->sl_numsrc;
1500 			for (i = 0; i < numsrc; i++) {
1501 				if (IN6_ARE_ADDR_EQUAL(v6src,
1502 				    &ilm->ilm_filter->sl_addr[i])) {
1503 					isinlist = B_TRUE;
1504 					break;
1505 				}
1506 			}
1507 			if ((isinlist && ilm->ilm_fmode == MODE_IS_INCLUDE) ||
1508 			    (!isinlist && ilm->ilm_fmode == MODE_IS_EXCLUDE))
1509 				return (ilm);
1510 			else
1511 				return (NULL);
1512 		}
1513 	}
1514 	return (NULL);
1515 }
1516 
1517 
1518 /* Find an ilm for matching the ill */
1519 ilm_t *
1520 ilm_lookup_ill(ill_t *ill, ipaddr_t group, zoneid_t zoneid)
1521 {
1522 	in6_addr_t	v6group;
1523 
1524 	ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) ||
1525 	    IAM_WRITER_ILL(ill));
1526 	/*
1527 	 * INADDR_ANY is represented as the IPv6 unspecifed addr.
1528 	 */
1529 	if (group == INADDR_ANY)
1530 		v6group = ipv6_all_zeros;
1531 	else
1532 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1533 
1534 	return (ilm_lookup_ill_v6(ill, &v6group, zoneid));
1535 }
1536 
1537 /*
1538  * Find an ilm for matching the ill. All the ilm lookup functions
1539  * ignore ILM_DELETED ilms. These have been logically deleted, and
1540  * igmp and linklayer disable multicast have been done. Only mi_free
1541  * yet to be done. Still there in the list due to ilm_walkers. The
1542  * last walker will release it.
1543  */
1544 ilm_t *
1545 ilm_lookup_ill_v6(ill_t *ill, const in6_addr_t *v6group, zoneid_t zoneid)
1546 {
1547 	ilm_t	*ilm;
1548 
1549 	ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) ||
1550 	    IAM_WRITER_ILL(ill));
1551 
1552 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1553 		if (ilm->ilm_flags & ILM_DELETED)
1554 			continue;
1555 		if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) &&
1556 		    (zoneid == ALL_ZONES || zoneid == ilm->ilm_zoneid))
1557 			return (ilm);
1558 	}
1559 	return (NULL);
1560 }
1561 
1562 ilm_t *
1563 ilm_lookup_ill_index_v6(ill_t *ill, const in6_addr_t *v6group, int index,
1564     zoneid_t zoneid)
1565 {
1566 	ilm_t *ilm;
1567 
1568 	ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) ||
1569 	    IAM_WRITER_ILL(ill));
1570 
1571 	for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
1572 		if (ilm->ilm_flags & ILM_DELETED)
1573 			continue;
1574 		if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) &&
1575 		    (zoneid == ALL_ZONES || zoneid == ilm->ilm_zoneid) &&
1576 		    ilm->ilm_orig_ifindex == index) {
1577 			return (ilm);
1578 		}
1579 	}
1580 	return (NULL);
1581 }
1582 
1583 ilm_t *
1584 ilm_lookup_ill_index_v4(ill_t *ill, ipaddr_t group, int index, zoneid_t zoneid)
1585 {
1586 	in6_addr_t	v6group;
1587 
1588 	ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) ||
1589 	    IAM_WRITER_ILL(ill));
1590 	/*
1591 	 * INADDR_ANY is represented as the IPv6 unspecifed addr.
1592 	 */
1593 	if (group == INADDR_ANY)
1594 		v6group = ipv6_all_zeros;
1595 	else
1596 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1597 
1598 	return (ilm_lookup_ill_index_v6(ill, &v6group, index, zoneid));
1599 }
1600 
1601 /*
1602  * Found an ilm for the ipif. Only needed for IPv4 which does
1603  * ipif specific socket options.
1604  */
1605 ilm_t *
1606 ilm_lookup_ipif(ipif_t *ipif, ipaddr_t group)
1607 {
1608 	ill_t	*ill = ipif->ipif_ill;
1609 	ilm_t	*ilm;
1610 	in6_addr_t	v6group;
1611 
1612 	ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) ||
1613 	    IAM_WRITER_ILL(ill));
1614 
1615 	/*
1616 	 * INADDR_ANY is represented as the IPv6 unspecifed addr.
1617 	 */
1618 	if (group == INADDR_ANY)
1619 		v6group = ipv6_all_zeros;
1620 	else
1621 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1622 
1623 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1624 		if (ilm->ilm_flags & ILM_DELETED)
1625 			continue;
1626 		if (ilm->ilm_ipif == ipif &&
1627 		    IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, &v6group))
1628 			return (ilm);
1629 	}
1630 	return (NULL);
1631 }
1632 
1633 /*
1634  * How many members on this ill?
1635  */
1636 int
1637 ilm_numentries_v6(ill_t *ill, const in6_addr_t *v6group)
1638 {
1639 	ilm_t	*ilm;
1640 	int i = 0;
1641 
1642 	ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) ||
1643 	    IAM_WRITER_ILL(ill));
1644 
1645 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1646 		if (ilm->ilm_flags & ILM_DELETED)
1647 			continue;
1648 		if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group)) {
1649 			i++;
1650 		}
1651 	}
1652 	return (i);
1653 }
1654 
1655 /* Caller guarantees that the group is not already on the list */
1656 static ilm_t *
1657 ilm_add_v6(ipif_t *ipif, const in6_addr_t *v6group, ilg_stat_t ilgstat,
1658     mcast_record_t ilg_fmode, slist_t *ilg_flist, int orig_ifindex,
1659     zoneid_t zoneid)
1660 {
1661 	ill_t	*ill = ipif->ipif_ill;
1662 	ilm_t	*ilm;
1663 	ilm_t	*ilm_cur;
1664 	ilm_t	**ilm_ptpn;
1665 
1666 	ASSERT(IAM_WRITER_IPIF(ipif));
1667 
1668 	ilm = GETSTRUCT(ilm_t, 1);
1669 	if (ilm == NULL)
1670 		return (NULL);
1671 	if (ilgstat != ILGSTAT_NONE && !SLIST_IS_EMPTY(ilg_flist)) {
1672 		ilm->ilm_filter = l_alloc();
1673 		if (ilm->ilm_filter == NULL) {
1674 			mi_free(ilm);
1675 			return (NULL);
1676 		}
1677 	}
1678 	ilm->ilm_v6addr = *v6group;
1679 	ilm->ilm_refcnt = 1;
1680 	ilm->ilm_zoneid = zoneid;
1681 	ilm->ilm_timer = INFINITY;
1682 	ilm->ilm_rtx.rtx_timer = INFINITY;
1683 	/*
1684 	 * IPv4 Multicast groups are joined using ipif.
1685 	 * IPv6 Multicast groups are joined using ill.
1686 	 */
1687 	if (ill->ill_isv6) {
1688 		ilm->ilm_ill = ill;
1689 		ilm->ilm_ipif = NULL;
1690 	} else {
1691 		ASSERT(ilm->ilm_zoneid == ipif->ipif_zoneid);
1692 		ilm->ilm_ipif = ipif;
1693 		ilm->ilm_ill = NULL;
1694 	}
1695 	/*
1696 	 * After this if ilm moves to a new ill, we don't change
1697 	 * the ilm_orig_ifindex. Thus, if ill_index != ilm_orig_ifindex,
1698 	 * it has been moved. Indexes don't match even when the application
1699 	 * wants to join on a FAILED/INACTIVE interface because we choose
1700 	 * a new interface to join in. This is considered as an implicit
1701 	 * move.
1702 	 */
1703 	ilm->ilm_orig_ifindex = orig_ifindex;
1704 
1705 	ASSERT(!(ipif->ipif_state_flags & IPIF_CONDEMNED));
1706 	ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED));
1707 
1708 	/*
1709 	 * Grab lock to give consistent view to readers
1710 	 */
1711 	mutex_enter(&ill->ill_lock);
1712 	/*
1713 	 * All ilms in the same zone are contiguous in the ill_ilm list.
1714 	 * The loops in ip_proto_input() and ip_wput_local() use this to avoid
1715 	 * sending duplicates up when two applications in the same zone join the
1716 	 * same group on different logical interfaces.
1717 	 */
1718 	ilm_cur = ill->ill_ilm;
1719 	ilm_ptpn = &ill->ill_ilm;
1720 	while (ilm_cur != NULL && ilm_cur->ilm_zoneid != ilm->ilm_zoneid) {
1721 		ilm_ptpn = &ilm_cur->ilm_next;
1722 		ilm_cur = ilm_cur->ilm_next;
1723 	}
1724 	ilm->ilm_next = ilm_cur;
1725 	*ilm_ptpn = ilm;
1726 
1727 	/*
1728 	 * If we have an associated ilg, use its filter state; if not,
1729 	 * default to (EXCLUDE, NULL) and set no_ilg_cnt to track this.
1730 	 */
1731 	if (ilgstat != ILGSTAT_NONE) {
1732 		if (!SLIST_IS_EMPTY(ilg_flist))
1733 			l_copy(ilg_flist, ilm->ilm_filter);
1734 		ilm->ilm_fmode = ilg_fmode;
1735 	} else {
1736 		ilm->ilm_no_ilg_cnt = 1;
1737 		ilm->ilm_fmode = MODE_IS_EXCLUDE;
1738 	}
1739 
1740 	mutex_exit(&ill->ill_lock);
1741 	return (ilm);
1742 }
1743 
1744 void
1745 ilm_walker_cleanup(ill_t *ill)
1746 {
1747 	ilm_t	**ilmp;
1748 	ilm_t	*ilm;
1749 
1750 	ASSERT(MUTEX_HELD(&ill->ill_lock));
1751 	ASSERT(ill->ill_ilm_walker_cnt == 0);
1752 
1753 	ilmp = &ill->ill_ilm;
1754 	while (*ilmp != NULL) {
1755 		if ((*ilmp)->ilm_flags & ILM_DELETED) {
1756 			ilm = *ilmp;
1757 			*ilmp = ilm->ilm_next;
1758 			FREE_SLIST(ilm->ilm_filter);
1759 			FREE_SLIST(ilm->ilm_pendsrcs);
1760 			FREE_SLIST(ilm->ilm_rtx.rtx_allow);
1761 			FREE_SLIST(ilm->ilm_rtx.rtx_block);
1762 			mi_free((char *)ilm);
1763 		} else {
1764 			ilmp = &(*ilmp)->ilm_next;
1765 		}
1766 	}
1767 	ill->ill_ilm_cleanup_reqd = 0;
1768 }
1769 
1770 /*
1771  * Unlink ilm and free it.
1772  */
1773 static void
1774 ilm_delete(ilm_t *ilm)
1775 {
1776 	ill_t	*ill;
1777 	ilm_t	**ilmp;
1778 
1779 	if (ilm->ilm_ipif != NULL) {
1780 		ASSERT(IAM_WRITER_IPIF(ilm->ilm_ipif));
1781 		ASSERT(ilm->ilm_ill == NULL);
1782 		ill = ilm->ilm_ipif->ipif_ill;
1783 		ASSERT(!ill->ill_isv6);
1784 	} else {
1785 		ASSERT(IAM_WRITER_ILL(ilm->ilm_ill));
1786 		ASSERT(ilm->ilm_ipif == NULL);
1787 		ill = ilm->ilm_ill;
1788 		ASSERT(ill->ill_isv6);
1789 	}
1790 	/*
1791 	 * Delete under lock protection so that readers don't stumble
1792 	 * on bad ilm_next
1793 	 */
1794 	mutex_enter(&ill->ill_lock);
1795 	if (ill->ill_ilm_walker_cnt != 0) {
1796 		ilm->ilm_flags |= ILM_DELETED;
1797 		ill->ill_ilm_cleanup_reqd = 1;
1798 		mutex_exit(&ill->ill_lock);
1799 		return;
1800 	}
1801 
1802 	for (ilmp = &ill->ill_ilm; *ilmp != ilm; ilmp = &(*ilmp)->ilm_next)
1803 				;
1804 	*ilmp = ilm->ilm_next;
1805 	mutex_exit(&ill->ill_lock);
1806 
1807 	FREE_SLIST(ilm->ilm_filter);
1808 	FREE_SLIST(ilm->ilm_pendsrcs);
1809 	FREE_SLIST(ilm->ilm_rtx.rtx_allow);
1810 	FREE_SLIST(ilm->ilm_rtx.rtx_block);
1811 	mi_free((char *)ilm);
1812 }
1813 
1814 /* Free all ilms for this ipif */
1815 void
1816 ilm_free(ipif_t *ipif)
1817 {
1818 	ill_t	*ill = ipif->ipif_ill;
1819 	ilm_t	*ilm;
1820 	ilm_t	 *next_ilm;
1821 
1822 	ASSERT(IAM_WRITER_IPIF(ipif));
1823 
1824 	for (ilm = ill->ill_ilm; ilm; ilm = next_ilm) {
1825 		next_ilm = ilm->ilm_next;
1826 		if (ilm->ilm_ipif == ipif)
1827 			ilm_delete(ilm);
1828 	}
1829 }
1830 
1831 /*
1832  * Looks up the appropriate ipif given a v4 multicast group and interface
1833  * address.  On success, returns 0, with *ipifpp pointing to the found
1834  * struct.  On failure, returns an errno and *ipifpp is NULL.
1835  */
1836 int
1837 ip_opt_check(conn_t *connp, ipaddr_t group, ipaddr_t src, ipaddr_t ifaddr,
1838     uint_t *ifindexp, mblk_t *first_mp, ipsq_func_t func, ipif_t **ipifpp)
1839 {
1840 	ipif_t *ipif;
1841 	int err = 0;
1842 	zoneid_t zoneid = connp->conn_zoneid;
1843 
1844 	if (!CLASSD(group) || CLASSD(src)) {
1845 		return (EINVAL);
1846 	}
1847 	*ipifpp = NULL;
1848 
1849 	ASSERT(!(ifaddr != INADDR_ANY && ifindexp != NULL && *ifindexp != 0));
1850 	if (ifaddr != INADDR_ANY) {
1851 		ipif = ipif_lookup_addr(ifaddr, NULL, zoneid,
1852 		    CONNP_TO_WQ(connp), first_mp, func, &err);
1853 		if (err != 0 && err != EINPROGRESS)
1854 			err = EADDRNOTAVAIL;
1855 	} else if (ifindexp != NULL && *ifindexp != 0) {
1856 		ipif = ipif_lookup_on_ifindex(*ifindexp, B_FALSE, zoneid,
1857 		    CONNP_TO_WQ(connp), first_mp, func, &err);
1858 	} else {
1859 		ipif = ipif_lookup_group(group, zoneid);
1860 		if (ipif == NULL)
1861 			return (EADDRNOTAVAIL);
1862 	}
1863 	if (ipif == NULL)
1864 		return (err);
1865 
1866 	*ipifpp = ipif;
1867 	return (0);
1868 }
1869 
1870 /*
1871  * Looks up the appropriate ill (or ipif if v4mapped) given an interface
1872  * index and IPv6 multicast group.  On success, returns 0, with *illpp (or
1873  * *ipifpp if v4mapped) pointing to the found struct.  On failure, returns
1874  * an errno and *illpp and *ipifpp are undefined.
1875  */
1876 int
1877 ip_opt_check_v6(conn_t *connp, const in6_addr_t *v6group, ipaddr_t *v4group,
1878     const in6_addr_t *v6src, ipaddr_t *v4src, boolean_t *isv6, int ifindex,
1879     mblk_t *first_mp, ipsq_func_t func, ill_t **illpp, ipif_t **ipifpp)
1880 {
1881 	boolean_t src_unspec;
1882 	ill_t *ill = NULL;
1883 	ipif_t *ipif = NULL;
1884 	int err;
1885 	zoneid_t zoneid = connp->conn_zoneid;
1886 	queue_t *wq = CONNP_TO_WQ(connp);
1887 
1888 	src_unspec = IN6_IS_ADDR_UNSPECIFIED(v6src);
1889 
1890 	if (IN6_IS_ADDR_V4MAPPED(v6group)) {
1891 		if (!IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec)
1892 			return (EINVAL);
1893 		IN6_V4MAPPED_TO_IPADDR(v6group, *v4group);
1894 		if (src_unspec) {
1895 			*v4src = INADDR_ANY;
1896 		} else {
1897 			IN6_V4MAPPED_TO_IPADDR(v6src, *v4src);
1898 		}
1899 		if (!CLASSD(*v4group) || CLASSD(*v4src))
1900 			return (EINVAL);
1901 		*ipifpp = NULL;
1902 		*isv6 = B_FALSE;
1903 	} else {
1904 		if (IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec)
1905 			return (EINVAL);
1906 		if (!IN6_IS_ADDR_MULTICAST(v6group) ||
1907 		    IN6_IS_ADDR_MULTICAST(v6src)) {
1908 			return (EINVAL);
1909 		}
1910 		*illpp = NULL;
1911 		*isv6 = B_TRUE;
1912 	}
1913 
1914 	if (ifindex == 0) {
1915 		if (*isv6)
1916 			ill = ill_lookup_group_v6(v6group, zoneid);
1917 		else
1918 			ipif = ipif_lookup_group(*v4group, zoneid);
1919 		if (ill == NULL && ipif == NULL)
1920 			return (EADDRNOTAVAIL);
1921 	} else {
1922 		if (*isv6) {
1923 			ill = ill_lookup_on_ifindex(ifindex, B_TRUE,
1924 			    wq, first_mp, func, &err);
1925 			if (ill != NULL &&
1926 			    !ipif_lookup_zoneid(ill, zoneid, 0, NULL)) {
1927 				ill_refrele(ill);
1928 				ill = NULL;
1929 				err = EADDRNOTAVAIL;
1930 			}
1931 		} else {
1932 			ipif = ipif_lookup_on_ifindex(ifindex, B_FALSE,
1933 			    zoneid, wq, first_mp, func, &err);
1934 		}
1935 		if (ill == NULL && ipif == NULL)
1936 			return (err);
1937 	}
1938 
1939 	*ipifpp = ipif;
1940 	*illpp = ill;
1941 	return (0);
1942 }
1943 
1944 static int
1945 ip_get_srcfilter(conn_t *connp, struct group_filter *gf,
1946     struct ip_msfilter *imsf, ipaddr_t grp, ipif_t *ipif, boolean_t isv4mapped)
1947 {
1948 	ilg_t *ilg;
1949 	int i, numsrc, fmode, outsrcs;
1950 	struct sockaddr_in *sin;
1951 	struct sockaddr_in6 *sin6;
1952 	struct in_addr *addrp;
1953 	slist_t *fp;
1954 	boolean_t is_v4only_api;
1955 
1956 	mutex_enter(&connp->conn_lock);
1957 
1958 	ilg = ilg_lookup_ipif(connp, grp, ipif);
1959 	if (ilg == NULL) {
1960 		mutex_exit(&connp->conn_lock);
1961 		return (EADDRNOTAVAIL);
1962 	}
1963 
1964 	if (gf == NULL) {
1965 		ASSERT(imsf != NULL);
1966 		ASSERT(!isv4mapped);
1967 		is_v4only_api = B_TRUE;
1968 		outsrcs = imsf->imsf_numsrc;
1969 	} else {
1970 		ASSERT(imsf == NULL);
1971 		is_v4only_api = B_FALSE;
1972 		outsrcs = gf->gf_numsrc;
1973 	}
1974 
1975 	/*
1976 	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
1977 	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
1978 	 * So we need to translate here.
1979 	 */
1980 	fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ?
1981 	    MCAST_INCLUDE : MCAST_EXCLUDE;
1982 	if ((fp = ilg->ilg_filter) == NULL) {
1983 		numsrc = 0;
1984 	} else {
1985 		for (i = 0; i < outsrcs; i++) {
1986 			if (i == fp->sl_numsrc)
1987 				break;
1988 			if (isv4mapped) {
1989 				sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i];
1990 				sin6->sin6_family = AF_INET6;
1991 				sin6->sin6_addr = fp->sl_addr[i];
1992 			} else {
1993 				if (is_v4only_api) {
1994 					addrp = &imsf->imsf_slist[i];
1995 				} else {
1996 					sin = (struct sockaddr_in *)
1997 					    &gf->gf_slist[i];
1998 					sin->sin_family = AF_INET;
1999 					addrp = &sin->sin_addr;
2000 				}
2001 				IN6_V4MAPPED_TO_INADDR(&fp->sl_addr[i], addrp);
2002 			}
2003 		}
2004 		numsrc = fp->sl_numsrc;
2005 	}
2006 
2007 	if (is_v4only_api) {
2008 		imsf->imsf_numsrc = numsrc;
2009 		imsf->imsf_fmode = fmode;
2010 	} else {
2011 		gf->gf_numsrc = numsrc;
2012 		gf->gf_fmode = fmode;
2013 	}
2014 
2015 	mutex_exit(&connp->conn_lock);
2016 
2017 	return (0);
2018 }
2019 
2020 static int
2021 ip_get_srcfilter_v6(conn_t *connp, struct group_filter *gf,
2022     const struct in6_addr *grp, ill_t *ill)
2023 {
2024 	ilg_t *ilg;
2025 	int i;
2026 	struct sockaddr_storage *sl;
2027 	struct sockaddr_in6 *sin6;
2028 	slist_t *fp;
2029 
2030 	mutex_enter(&connp->conn_lock);
2031 
2032 	ilg = ilg_lookup_ill_v6(connp, grp, ill);
2033 	if (ilg == NULL) {
2034 		mutex_exit(&connp->conn_lock);
2035 		return (EADDRNOTAVAIL);
2036 	}
2037 
2038 	/*
2039 	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
2040 	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
2041 	 * So we need to translate here.
2042 	 */
2043 	gf->gf_fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ?
2044 	    MCAST_INCLUDE : MCAST_EXCLUDE;
2045 	if ((fp = ilg->ilg_filter) == NULL) {
2046 		gf->gf_numsrc = 0;
2047 	} else {
2048 		for (i = 0, sl = gf->gf_slist; i < gf->gf_numsrc; i++, sl++) {
2049 			if (i == fp->sl_numsrc)
2050 				break;
2051 			sin6 = (struct sockaddr_in6 *)sl;
2052 			sin6->sin6_family = AF_INET6;
2053 			sin6->sin6_addr = fp->sl_addr[i];
2054 		}
2055 		gf->gf_numsrc = fp->sl_numsrc;
2056 	}
2057 
2058 	mutex_exit(&connp->conn_lock);
2059 
2060 	return (0);
2061 }
2062 
2063 static int
2064 ip_set_srcfilter(conn_t *connp, struct group_filter *gf,
2065     struct ip_msfilter *imsf, ipaddr_t grp, ipif_t *ipif, boolean_t isv4mapped)
2066 {
2067 	ilg_t *ilg;
2068 	int i, err, insrcs, infmode, new_fmode;
2069 	struct sockaddr_in *sin;
2070 	struct sockaddr_in6 *sin6;
2071 	struct in_addr *addrp;
2072 	slist_t *orig_filter = NULL;
2073 	slist_t *new_filter = NULL;
2074 	mcast_record_t orig_fmode;
2075 	boolean_t leave_grp, is_v4only_api;
2076 	ilg_stat_t ilgstat;
2077 
2078 	if (gf == NULL) {
2079 		ASSERT(imsf != NULL);
2080 		ASSERT(!isv4mapped);
2081 		is_v4only_api = B_TRUE;
2082 		insrcs = imsf->imsf_numsrc;
2083 		infmode = imsf->imsf_fmode;
2084 	} else {
2085 		ASSERT(imsf == NULL);
2086 		is_v4only_api = B_FALSE;
2087 		insrcs = gf->gf_numsrc;
2088 		infmode = gf->gf_fmode;
2089 	}
2090 
2091 	/* Make sure we can handle the source list */
2092 	if (insrcs > MAX_FILTER_SIZE)
2093 		return (ENOBUFS);
2094 
2095 	/*
2096 	 * setting the filter to (INCLUDE, NULL) is treated
2097 	 * as a request to leave the group.
2098 	 */
2099 	leave_grp = (infmode == MCAST_INCLUDE && insrcs == 0);
2100 
2101 	ASSERT(IAM_WRITER_IPIF(ipif));
2102 
2103 	mutex_enter(&connp->conn_lock);
2104 
2105 	ilg = ilg_lookup_ipif(connp, grp, ipif);
2106 	if (ilg == NULL) {
2107 		/*
2108 		 * if the request was actually to leave, and we
2109 		 * didn't find an ilg, there's nothing to do.
2110 		 */
2111 		if (!leave_grp)
2112 			ilg = conn_ilg_alloc(connp);
2113 		if (leave_grp || ilg == NULL) {
2114 			mutex_exit(&connp->conn_lock);
2115 			return (leave_grp ? 0 : ENOMEM);
2116 		}
2117 		ilgstat = ILGSTAT_NEW;
2118 		IN6_IPADDR_TO_V4MAPPED(grp, &ilg->ilg_v6group);
2119 		ilg->ilg_ipif = ipif;
2120 		ilg->ilg_ill = NULL;
2121 		ilg->ilg_orig_ifindex = 0;
2122 	} else if (leave_grp) {
2123 		ilg_delete(connp, ilg, NULL);
2124 		mutex_exit(&connp->conn_lock);
2125 		(void) ip_delmulti(grp, ipif, B_FALSE, B_TRUE);
2126 		return (0);
2127 	} else {
2128 		ilgstat = ILGSTAT_CHANGE;
2129 		/* Preserve existing state in case ip_addmulti() fails */
2130 		orig_fmode = ilg->ilg_fmode;
2131 		if (ilg->ilg_filter == NULL) {
2132 			orig_filter = NULL;
2133 		} else {
2134 			orig_filter = l_alloc_copy(ilg->ilg_filter);
2135 			if (orig_filter == NULL) {
2136 				mutex_exit(&connp->conn_lock);
2137 				return (ENOMEM);
2138 			}
2139 		}
2140 	}
2141 
2142 	/*
2143 	 * Alloc buffer to copy new state into (see below) before
2144 	 * we make any changes, so we can bail if it fails.
2145 	 */
2146 	if ((new_filter = l_alloc()) == NULL) {
2147 		mutex_exit(&connp->conn_lock);
2148 		err = ENOMEM;
2149 		goto free_and_exit;
2150 	}
2151 
2152 	if (insrcs == 0) {
2153 		CLEAR_SLIST(ilg->ilg_filter);
2154 	} else {
2155 		slist_t *fp;
2156 		if (ilg->ilg_filter == NULL) {
2157 			fp = l_alloc();
2158 			if (fp == NULL) {
2159 				if (ilgstat == ILGSTAT_NEW)
2160 					ilg_delete(connp, ilg, NULL);
2161 				mutex_exit(&connp->conn_lock);
2162 				err = ENOMEM;
2163 				goto free_and_exit;
2164 			}
2165 		} else {
2166 			fp = ilg->ilg_filter;
2167 		}
2168 		for (i = 0; i < insrcs; i++) {
2169 			if (isv4mapped) {
2170 				sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i];
2171 				fp->sl_addr[i] = sin6->sin6_addr;
2172 			} else {
2173 				if (is_v4only_api) {
2174 					addrp = &imsf->imsf_slist[i];
2175 				} else {
2176 					sin = (struct sockaddr_in *)
2177 					    &gf->gf_slist[i];
2178 					addrp = &sin->sin_addr;
2179 				}
2180 				IN6_INADDR_TO_V4MAPPED(addrp, &fp->sl_addr[i]);
2181 			}
2182 		}
2183 		fp->sl_numsrc = insrcs;
2184 		ilg->ilg_filter = fp;
2185 	}
2186 	/*
2187 	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
2188 	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
2189 	 * So we need to translate here.
2190 	 */
2191 	ilg->ilg_fmode = (infmode == MCAST_INCLUDE) ?
2192 		    MODE_IS_INCLUDE : MODE_IS_EXCLUDE;
2193 
2194 	/*
2195 	 * Save copy of ilg's filter state to pass to other functions,
2196 	 * so we can release conn_lock now.
2197 	 */
2198 	new_fmode = ilg->ilg_fmode;
2199 	l_copy(ilg->ilg_filter, new_filter);
2200 
2201 	mutex_exit(&connp->conn_lock);
2202 
2203 	err = ip_addmulti(grp, ipif, ilgstat, new_fmode, new_filter);
2204 	if (err != 0) {
2205 		/*
2206 		 * Restore the original filter state, or delete the
2207 		 * newly-created ilg.  We need to look up the ilg
2208 		 * again, though, since we've not been holding the
2209 		 * conn_lock.
2210 		 */
2211 		mutex_enter(&connp->conn_lock);
2212 		ilg = ilg_lookup_ipif(connp, grp, ipif);
2213 		ASSERT(ilg != NULL);
2214 		if (ilgstat == ILGSTAT_NEW) {
2215 			ilg_delete(connp, ilg, NULL);
2216 		} else {
2217 			ilg->ilg_fmode = orig_fmode;
2218 			if (SLIST_IS_EMPTY(orig_filter)) {
2219 				CLEAR_SLIST(ilg->ilg_filter);
2220 			} else {
2221 				/*
2222 				 * We didn't free the filter, even if we
2223 				 * were trying to make the source list empty;
2224 				 * so if orig_filter isn't empty, the ilg
2225 				 * must still have a filter alloc'd.
2226 				 */
2227 				l_copy(orig_filter, ilg->ilg_filter);
2228 			}
2229 		}
2230 		mutex_exit(&connp->conn_lock);
2231 	}
2232 
2233 free_and_exit:
2234 	l_free(orig_filter);
2235 	l_free(new_filter);
2236 
2237 	return (err);
2238 }
2239 
2240 static int
2241 ip_set_srcfilter_v6(conn_t *connp, struct group_filter *gf,
2242     const struct in6_addr *grp, ill_t *ill)
2243 {
2244 	ilg_t *ilg;
2245 	int i, orig_ifindex, orig_fmode, new_fmode, err;
2246 	slist_t *orig_filter = NULL;
2247 	slist_t *new_filter = NULL;
2248 	struct sockaddr_storage *sl;
2249 	struct sockaddr_in6 *sin6;
2250 	boolean_t leave_grp;
2251 	ilg_stat_t ilgstat;
2252 
2253 	/* Make sure we can handle the source list */
2254 	if (gf->gf_numsrc > MAX_FILTER_SIZE)
2255 		return (ENOBUFS);
2256 
2257 	/*
2258 	 * setting the filter to (INCLUDE, NULL) is treated
2259 	 * as a request to leave the group.
2260 	 */
2261 	leave_grp = (gf->gf_fmode == MCAST_INCLUDE && gf->gf_numsrc == 0);
2262 
2263 	ASSERT(IAM_WRITER_ILL(ill));
2264 
2265 	/*
2266 	 * Use the ifindex to do the lookup.  We can't use the ill
2267 	 * directly because ilg_ill could point to a different ill
2268 	 * if things have moved.
2269 	 */
2270 	orig_ifindex = ill->ill_phyint->phyint_ifindex;
2271 
2272 	mutex_enter(&connp->conn_lock);
2273 	ilg = ilg_lookup_ill_index_v6(connp, grp, orig_ifindex);
2274 	if (ilg == NULL) {
2275 		/*
2276 		 * if the request was actually to leave, and we
2277 		 * didn't find an ilg, there's nothing to do.
2278 		 */
2279 		if (!leave_grp)
2280 			ilg = conn_ilg_alloc(connp);
2281 		if (leave_grp || ilg == NULL) {
2282 			mutex_exit(&connp->conn_lock);
2283 			return (leave_grp ? 0 : ENOMEM);
2284 		}
2285 		ilgstat = ILGSTAT_NEW;
2286 		ilg->ilg_v6group = *grp;
2287 		ilg->ilg_ipif = NULL;
2288 		/*
2289 		 * Choose our target ill to join on. This might be
2290 		 * different from the ill we've been given if it's
2291 		 * currently down and part of a group.
2292 		 *
2293 		 * new ill is not refheld; we are writer.
2294 		 */
2295 		ill = ip_choose_multi_ill(ill, grp);
2296 		ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED));
2297 		ilg->ilg_ill = ill;
2298 		/*
2299 		 * Remember the index that we joined on, so that we can
2300 		 * successfully delete them later on and also search for
2301 		 * duplicates if the application wants to join again.
2302 		 */
2303 		ilg->ilg_orig_ifindex = orig_ifindex;
2304 	} else if (leave_grp) {
2305 		/*
2306 		 * Use the ilg's current ill for the deletion,
2307 		 * we might have failed over.
2308 		 */
2309 		ill = ilg->ilg_ill;
2310 		ilg_delete(connp, ilg, NULL);
2311 		mutex_exit(&connp->conn_lock);
2312 		(void) ip_delmulti_v6(grp, ill, orig_ifindex,
2313 		    connp->conn_zoneid, B_FALSE, B_TRUE);
2314 		return (0);
2315 	} else {
2316 		ilgstat = ILGSTAT_CHANGE;
2317 		/*
2318 		 * The current ill might be different from the one we were
2319 		 * asked to join on (if failover has occurred); we should
2320 		 * join on the ill stored in the ilg.  The original ill
2321 		 * is noted in ilg_orig_ifindex, which matched our request.
2322 		 */
2323 		ill = ilg->ilg_ill;
2324 		/* preserve existing state in case ip_addmulti() fails */
2325 		orig_fmode = ilg->ilg_fmode;
2326 		if (ilg->ilg_filter == NULL) {
2327 			orig_filter = NULL;
2328 		} else {
2329 			orig_filter = l_alloc_copy(ilg->ilg_filter);
2330 			if (orig_filter == NULL) {
2331 				mutex_exit(&connp->conn_lock);
2332 				return (ENOMEM);
2333 			}
2334 		}
2335 	}
2336 
2337 	/*
2338 	 * Alloc buffer to copy new state into (see below) before
2339 	 * we make any changes, so we can bail if it fails.
2340 	 */
2341 	if ((new_filter = l_alloc()) == NULL) {
2342 		mutex_exit(&connp->conn_lock);
2343 		err = ENOMEM;
2344 		goto free_and_exit;
2345 	}
2346 
2347 	if (gf->gf_numsrc == 0) {
2348 		CLEAR_SLIST(ilg->ilg_filter);
2349 	} else {
2350 		slist_t *fp;
2351 		if (ilg->ilg_filter == NULL) {
2352 			fp = l_alloc();
2353 			if (fp == NULL) {
2354 				if (ilgstat == ILGSTAT_NEW)
2355 					ilg_delete(connp, ilg, NULL);
2356 				mutex_exit(&connp->conn_lock);
2357 				err = ENOMEM;
2358 				goto free_and_exit;
2359 			}
2360 		} else {
2361 			fp = ilg->ilg_filter;
2362 		}
2363 		for (i = 0, sl = gf->gf_slist; i < gf->gf_numsrc; i++, sl++) {
2364 			sin6 = (struct sockaddr_in6 *)sl;
2365 			fp->sl_addr[i] = sin6->sin6_addr;
2366 		}
2367 		fp->sl_numsrc = gf->gf_numsrc;
2368 		ilg->ilg_filter = fp;
2369 	}
2370 	/*
2371 	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
2372 	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
2373 	 * So we need to translate here.
2374 	 */
2375 	ilg->ilg_fmode = (gf->gf_fmode == MCAST_INCLUDE) ?
2376 	    MODE_IS_INCLUDE : MODE_IS_EXCLUDE;
2377 
2378 	/*
2379 	 * Save copy of ilg's filter state to pass to other functions,
2380 	 * so we can release conn_lock now.
2381 	 */
2382 	new_fmode = ilg->ilg_fmode;
2383 	l_copy(ilg->ilg_filter, new_filter);
2384 
2385 	mutex_exit(&connp->conn_lock);
2386 
2387 	err = ip_addmulti_v6(grp, ill, orig_ifindex, connp->conn_zoneid,
2388 	    ilgstat, new_fmode, new_filter);
2389 	if (err != 0) {
2390 		/*
2391 		 * Restore the original filter state, or delete the
2392 		 * newly-created ilg.  We need to look up the ilg
2393 		 * again, though, since we've not been holding the
2394 		 * conn_lock.
2395 		 */
2396 		mutex_enter(&connp->conn_lock);
2397 		ilg = ilg_lookup_ill_index_v6(connp, grp, orig_ifindex);
2398 		ASSERT(ilg != NULL);
2399 		if (ilgstat == ILGSTAT_NEW) {
2400 			ilg_delete(connp, ilg, NULL);
2401 		} else {
2402 			ilg->ilg_fmode = orig_fmode;
2403 			if (SLIST_IS_EMPTY(orig_filter)) {
2404 				CLEAR_SLIST(ilg->ilg_filter);
2405 			} else {
2406 				/*
2407 				 * We didn't free the filter, even if we
2408 				 * were trying to make the source list empty;
2409 				 * so if orig_filter isn't empty, the ilg
2410 				 * must still have a filter alloc'd.
2411 				 */
2412 				l_copy(orig_filter, ilg->ilg_filter);
2413 			}
2414 		}
2415 		mutex_exit(&connp->conn_lock);
2416 	}
2417 
2418 free_and_exit:
2419 	l_free(orig_filter);
2420 	l_free(new_filter);
2421 
2422 	return (err);
2423 }
2424 
2425 /*
2426  * Process the SIOC[GS]MSFILTER and SIOC[GS]IPMSFILTER ioctls.
2427  */
2428 /* ARGSUSED */
2429 int
2430 ip_sioctl_msfilter(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp,
2431     ip_ioctl_cmd_t *ipip, void *ifreq)
2432 {
2433 	struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
2434 	/* existence verified in ip_wput_nondata() */
2435 	mblk_t *data_mp = mp->b_cont->b_cont;
2436 	int datalen, err, cmd, minsize;
2437 	int expsize = 0;
2438 	conn_t *connp;
2439 	boolean_t isv6, is_v4only_api, getcmd;
2440 	struct sockaddr_in *gsin;
2441 	struct sockaddr_in6 *gsin6;
2442 	ipaddr_t v4grp;
2443 	in6_addr_t v6grp;
2444 	struct group_filter *gf = NULL;
2445 	struct ip_msfilter *imsf = NULL;
2446 	mblk_t *ndp;
2447 
2448 	if (data_mp->b_cont != NULL) {
2449 		if ((ndp = msgpullup(data_mp, -1)) == NULL)
2450 			return (ENOMEM);
2451 		freemsg(data_mp);
2452 		data_mp = ndp;
2453 		mp->b_cont->b_cont = data_mp;
2454 	}
2455 
2456 	cmd = iocp->ioc_cmd;
2457 	getcmd = (cmd == SIOCGIPMSFILTER || cmd == SIOCGMSFILTER);
2458 	is_v4only_api = (cmd == SIOCGIPMSFILTER || cmd == SIOCSIPMSFILTER);
2459 	minsize = (is_v4only_api) ? IP_MSFILTER_SIZE(0) : GROUP_FILTER_SIZE(0);
2460 	datalen = MBLKL(data_mp);
2461 
2462 	if (datalen < minsize)
2463 		return (EINVAL);
2464 
2465 	/*
2466 	 * now we know we have at least have the initial structure,
2467 	 * but need to check for the source list array.
2468 	 */
2469 	if (is_v4only_api) {
2470 		imsf = (struct ip_msfilter *)data_mp->b_rptr;
2471 		isv6 = B_FALSE;
2472 		expsize = IP_MSFILTER_SIZE(imsf->imsf_numsrc);
2473 	} else {
2474 		gf = (struct group_filter *)data_mp->b_rptr;
2475 		if (gf->gf_group.ss_family == AF_INET6) {
2476 			gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2477 			isv6 = !(IN6_IS_ADDR_V4MAPPED(&gsin6->sin6_addr));
2478 		} else {
2479 			isv6 = B_FALSE;
2480 		}
2481 		expsize = GROUP_FILTER_SIZE(gf->gf_numsrc);
2482 	}
2483 	if (datalen < expsize)
2484 		return (EINVAL);
2485 
2486 	connp = Q_TO_CONN(q);
2487 
2488 	/* operation not supported on the virtual network interface */
2489 	if (IS_VNI(ipif->ipif_ill))
2490 		return (EINVAL);
2491 
2492 	if (isv6) {
2493 		ill_t *ill = ipif->ipif_ill;
2494 		ill_refhold(ill);
2495 
2496 		gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2497 		v6grp = gsin6->sin6_addr;
2498 		if (getcmd)
2499 			err = ip_get_srcfilter_v6(connp, gf, &v6grp, ill);
2500 		else
2501 			err = ip_set_srcfilter_v6(connp, gf, &v6grp, ill);
2502 
2503 		ill_refrele(ill);
2504 	} else {
2505 		boolean_t isv4mapped = B_FALSE;
2506 		if (is_v4only_api) {
2507 			v4grp = (ipaddr_t)imsf->imsf_multiaddr.s_addr;
2508 		} else {
2509 			if (gf->gf_group.ss_family == AF_INET) {
2510 				gsin = (struct sockaddr_in *)&gf->gf_group;
2511 				v4grp = (ipaddr_t)gsin->sin_addr.s_addr;
2512 			} else {
2513 				gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2514 				IN6_V4MAPPED_TO_IPADDR(&gsin6->sin6_addr,
2515 				    v4grp);
2516 				isv4mapped = B_TRUE;
2517 			}
2518 		}
2519 		if (getcmd)
2520 			err = ip_get_srcfilter(connp, gf, imsf, v4grp, ipif,
2521 			    isv4mapped);
2522 		else
2523 			err = ip_set_srcfilter(connp, gf, imsf, v4grp, ipif,
2524 			    isv4mapped);
2525 	}
2526 
2527 	return (err);
2528 }
2529 
2530 /*
2531  * Finds the ipif based on information in the ioctl headers.  Needed to make
2532  * ip_process_ioctl() happy (it needs to know the ipif for IPI_WR-flagged
2533  * ioctls prior to calling the ioctl's handler function).  Somewhat analogous
2534  * to ip_extract_lifreq_cmn() and ip_extract_tunreq().
2535  */
2536 int
2537 ip_extract_msfilter(queue_t *q, mblk_t *mp, ipif_t **ipifpp, ipsq_func_t func)
2538 {
2539 	struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
2540 	int cmd = iocp->ioc_cmd, err = 0;
2541 	conn_t *connp;
2542 	ipif_t *ipif;
2543 	/* caller has verified this mblk exists */
2544 	char *dbuf = (char *)mp->b_cont->b_cont->b_rptr;
2545 	struct ip_msfilter *imsf;
2546 	struct group_filter *gf;
2547 	ipaddr_t v4addr, v4grp;
2548 	in6_addr_t v6grp;
2549 	uint32_t index;
2550 	zoneid_t zoneid;
2551 
2552 	connp = Q_TO_CONN(q);
2553 	zoneid = connp->conn_zoneid;
2554 
2555 	/* don't allow multicast operations on a tcp conn */
2556 	if (IS_TCP_CONN(connp))
2557 		return (ENOPROTOOPT);
2558 
2559 	if (cmd == SIOCSIPMSFILTER || cmd == SIOCGIPMSFILTER) {
2560 		/* don't allow v4-specific ioctls on v6 socket */
2561 		if (connp->conn_af_isv6)
2562 			return (EAFNOSUPPORT);
2563 
2564 		imsf = (struct ip_msfilter *)dbuf;
2565 		v4addr = imsf->imsf_interface.s_addr;
2566 		v4grp = imsf->imsf_multiaddr.s_addr;
2567 		if (v4addr == INADDR_ANY) {
2568 			ipif = ipif_lookup_group(v4grp, zoneid);
2569 			if (ipif == NULL)
2570 				err = EADDRNOTAVAIL;
2571 		} else {
2572 			ipif = ipif_lookup_addr(v4addr, NULL, zoneid, q, mp,
2573 			    func, &err);
2574 		}
2575 	} else {
2576 		boolean_t isv6 = B_FALSE;
2577 		gf = (struct group_filter *)dbuf;
2578 		index = gf->gf_interface;
2579 		if (gf->gf_group.ss_family == AF_INET6) {
2580 			struct sockaddr_in6 *sin6;
2581 			sin6 = (struct sockaddr_in6 *)&gf->gf_group;
2582 			v6grp = sin6->sin6_addr;
2583 			if (IN6_IS_ADDR_V4MAPPED(&v6grp))
2584 				IN6_V4MAPPED_TO_IPADDR(&v6grp, v4grp);
2585 			else
2586 				isv6 = B_TRUE;
2587 		} else if (gf->gf_group.ss_family == AF_INET) {
2588 			struct sockaddr_in *sin;
2589 			sin = (struct sockaddr_in *)&gf->gf_group;
2590 			v4grp = sin->sin_addr.s_addr;
2591 		} else {
2592 			return (EAFNOSUPPORT);
2593 		}
2594 		if (index == 0) {
2595 			if (isv6)
2596 				ipif = ipif_lookup_group_v6(&v6grp, zoneid);
2597 			else
2598 				ipif = ipif_lookup_group(v4grp, zoneid);
2599 			if (ipif == NULL)
2600 				err = EADDRNOTAVAIL;
2601 		} else {
2602 			ipif = ipif_lookup_on_ifindex(index, isv6, zoneid,
2603 			    q, mp, func, &err);
2604 		}
2605 	}
2606 
2607 	*ipifpp = ipif;
2608 	return (err);
2609 }
2610 
2611 /*
2612  * The structures used for the SIOC*MSFILTER ioctls usually must be copied
2613  * in in two stages, as the first copyin tells us the size of the attached
2614  * source buffer.  This function is called by ip_wput_nondata() after the
2615  * first copyin has completed; it figures out how big the second stage
2616  * needs to be, and kicks it off.
2617  *
2618  * In some cases (numsrc < 2), the second copyin is not needed as the
2619  * first one gets a complete structure containing 1 source addr.
2620  *
2621  * The function returns 0 if a second copyin has been started (i.e. there's
2622  * no more work to be done right now), or 1 if the second copyin is not
2623  * needed and ip_wput_nondata() can continue its processing.
2624  */
2625 int
2626 ip_copyin_msfilter(queue_t *q, mblk_t *mp)
2627 {
2628 	struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
2629 	int cmd = iocp->ioc_cmd;
2630 	/* validity of this checked in ip_wput_nondata() */
2631 	mblk_t *mp1 = mp->b_cont->b_cont;
2632 	int copysize = 0;
2633 	int offset;
2634 
2635 	if (cmd == SIOCSMSFILTER || cmd == SIOCGMSFILTER) {
2636 		struct group_filter *gf = (struct group_filter *)mp1->b_rptr;
2637 		if (gf->gf_numsrc >= 2) {
2638 			offset = sizeof (struct group_filter);
2639 			copysize = GROUP_FILTER_SIZE(gf->gf_numsrc) - offset;
2640 		}
2641 	} else {
2642 		struct ip_msfilter *imsf = (struct ip_msfilter *)mp1->b_rptr;
2643 		if (imsf->imsf_numsrc >= 2) {
2644 			offset = sizeof (struct ip_msfilter);
2645 			copysize = IP_MSFILTER_SIZE(imsf->imsf_numsrc) - offset;
2646 		}
2647 	}
2648 	if (copysize > 0) {
2649 		mi_copyin_n(q, mp, offset, copysize);
2650 		return (0);
2651 	}
2652 	return (1);
2653 }
2654 
2655 /*
2656  * Handle the following optmgmt:
2657  *	IP_ADD_MEMBERSHIP		must not have joined already
2658  *	MCAST_JOIN_GROUP		must not have joined already
2659  *	IP_BLOCK_SOURCE			must have joined already
2660  *	MCAST_BLOCK_SOURCE		must have joined already
2661  *	IP_JOIN_SOURCE_GROUP		may have joined already
2662  *	MCAST_JOIN_SOURCE_GROUP		may have joined already
2663  *
2664  * fmode and src parameters may be used to determine which option is
2665  * being set, as follows (the IP_* and MCAST_* versions of each option
2666  * are functionally equivalent):
2667  *	opt			fmode			src
2668  *	IP_ADD_MEMBERSHIP	MODE_IS_EXCLUDE		INADDR_ANY
2669  *	MCAST_JOIN_GROUP	MODE_IS_EXCLUDE		INADDR_ANY
2670  *	IP_BLOCK_SOURCE		MODE_IS_EXCLUDE		v4 addr
2671  *	MCAST_BLOCK_SOURCE	MODE_IS_EXCLUDE		v4 addr
2672  *	IP_JOIN_SOURCE_GROUP	MODE_IS_INCLUDE		v4 addr
2673  *	MCAST_JOIN_SOURCE_GROUP	MODE_IS_INCLUDE		v4 addr
2674  *
2675  * Changing the filter mode is not allowed; if a matching ilg already
2676  * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
2677  *
2678  * Verifies that there is a source address of appropriate scope for
2679  * the group; if not, EADDRNOTAVAIL is returned.
2680  *
2681  * The interface to be used may be identified by an address or by an
2682  * index.  A pointer to the index is passed; if it is NULL, use the
2683  * address, otherwise, use the index.
2684  */
2685 int
2686 ip_opt_add_group(conn_t *connp, boolean_t checkonly, ipaddr_t group,
2687     ipaddr_t ifaddr, uint_t *ifindexp, mcast_record_t fmode, ipaddr_t src,
2688     mblk_t *first_mp)
2689 {
2690 	ipif_t	*ipif;
2691 	ipsq_t	*ipsq;
2692 	int err = 0;
2693 	ill_t	*ill;
2694 
2695 	err = ip_opt_check(connp, group, src, ifaddr, ifindexp, first_mp,
2696 	    ip_restart_optmgmt, &ipif);
2697 	if (err != 0) {
2698 		if (err != EINPROGRESS) {
2699 			ip1dbg(("ip_opt_add_group: no ipif for group 0x%x, "
2700 			    "ifaddr 0x%x, ifindex %d\n", ntohl(group),
2701 			    ntohl(ifaddr), (ifindexp == NULL) ? 0 : *ifindexp));
2702 		}
2703 		return (err);
2704 	}
2705 	ASSERT(ipif != NULL);
2706 
2707 	ill = ipif->ipif_ill;
2708 	/* Operation not supported on a virtual network interface */
2709 	if (IS_VNI(ill)) {
2710 		ipif_refrele(ipif);
2711 		return (EINVAL);
2712 	}
2713 
2714 	if (checkonly) {
2715 		/*
2716 		 * do not do operation, just pretend to - new T_CHECK
2717 		 * semantics. The error return case above if encountered
2718 		 * considered a good enough "check" here.
2719 		 */
2720 		ipif_refrele(ipif);
2721 		return (0);
2722 	}
2723 
2724 	IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, ipsq,
2725 	    NEW_OP);
2726 
2727 	/* unspecified source addr => no source filtering */
2728 	err = ilg_add(connp, group, ipif, fmode, src);
2729 
2730 	IPSQ_EXIT(ipsq);
2731 
2732 	ipif_refrele(ipif);
2733 	return (err);
2734 }
2735 
2736 /*
2737  * Handle the following optmgmt:
2738  *	IPV6_JOIN_GROUP			must not have joined already
2739  *	MCAST_JOIN_GROUP		must not have joined already
2740  *	MCAST_BLOCK_SOURCE		must have joined already
2741  *	MCAST_JOIN_SOURCE_GROUP		may have joined already
2742  *
2743  * fmode and src parameters may be used to determine which option is
2744  * being set, as follows (IPV6_JOIN_GROUP and MCAST_JOIN_GROUP options
2745  * are functionally equivalent):
2746  *	opt			fmode			v6src
2747  *	IPV6_JOIN_GROUP		MODE_IS_EXCLUDE		unspecified
2748  *	MCAST_JOIN_GROUP	MODE_IS_EXCLUDE		unspecified
2749  *	MCAST_BLOCK_SOURCE	MODE_IS_EXCLUDE		v6 addr
2750  *	MCAST_JOIN_SOURCE_GROUP	MODE_IS_INCLUDE		v6 addr
2751  *
2752  * Changing the filter mode is not allowed; if a matching ilg already
2753  * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
2754  *
2755  * Verifies that there is a source address of appropriate scope for
2756  * the group; if not, EADDRNOTAVAIL is returned.
2757  *
2758  * Handles IPv4-mapped IPv6 multicast addresses by associating them
2759  * with the link-local ipif.  Assumes that if v6group is v4-mapped,
2760  * v6src is also v4-mapped.
2761  */
2762 int
2763 ip_opt_add_group_v6(conn_t *connp, boolean_t checkonly,
2764     const in6_addr_t *v6group, int ifindex, mcast_record_t fmode,
2765     const in6_addr_t *v6src, mblk_t *first_mp)
2766 {
2767 	ill_t *ill;
2768 	ipif_t	*ipif;
2769 	char buf[INET6_ADDRSTRLEN];
2770 	ipaddr_t v4group, v4src;
2771 	boolean_t isv6;
2772 	ipsq_t	*ipsq;
2773 	int	err;
2774 
2775 	err = ip_opt_check_v6(connp, v6group, &v4group, v6src, &v4src, &isv6,
2776 	    ifindex, first_mp, ip_restart_optmgmt, &ill, &ipif);
2777 	if (err != 0) {
2778 		if (err != EINPROGRESS) {
2779 			ip1dbg(("ip_opt_add_group_v6: no ill for group %s/"
2780 			    "index %d\n", inet_ntop(AF_INET6, v6group, buf,
2781 			    sizeof (buf)), ifindex));
2782 		}
2783 		return (err);
2784 	}
2785 	ASSERT((!isv6 && ipif != NULL) || (isv6 && ill != NULL));
2786 
2787 	/* operation is not supported on the virtual network interface */
2788 	if (isv6) {
2789 		if (IS_VNI(ill)) {
2790 			ill_refrele(ill);
2791 			return (EINVAL);
2792 		}
2793 	} else {
2794 		if (IS_VNI(ipif->ipif_ill)) {
2795 			ipif_refrele(ipif);
2796 			return (EINVAL);
2797 		}
2798 	}
2799 
2800 	if (checkonly) {
2801 		/*
2802 		 * do not do operation, just pretend to - new T_CHECK
2803 		 * semantics. The error return case above if encountered
2804 		 * considered a good enough "check" here.
2805 		 */
2806 		if (isv6)
2807 			ill_refrele(ill);
2808 		else
2809 			ipif_refrele(ipif);
2810 		return (0);
2811 	}
2812 
2813 	if (!isv6) {
2814 		IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt,
2815 		    ipsq, NEW_OP);
2816 		err = ilg_add(connp, v4group, ipif, fmode, v4src);
2817 		IPSQ_EXIT(ipsq);
2818 		ipif_refrele(ipif);
2819 	} else {
2820 		IPSQ_ENTER_ILL(ill, connp, first_mp, ip_restart_optmgmt,
2821 		    ipsq, NEW_OP);
2822 		err = ilg_add_v6(connp, v6group, ill, fmode, v6src);
2823 		IPSQ_EXIT(ipsq);
2824 		ill_refrele(ill);
2825 	}
2826 
2827 	return (err);
2828 }
2829 
2830 static int
2831 ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group, ipif_t *ipif,
2832     mcast_record_t fmode, ipaddr_t src)
2833 {
2834 	ilg_t	*ilg;
2835 	in6_addr_t v6src;
2836 	boolean_t leaving = B_FALSE;
2837 
2838 	ASSERT(IAM_WRITER_IPIF(ipif));
2839 
2840 	/*
2841 	 * The ilg is valid only while we hold the conn lock. Once we drop
2842 	 * the lock, another thread can locate another ilg on this connp,
2843 	 * but on a different ipif, and delete it, and cause the ilg array
2844 	 * to be reallocated and copied. Hence do the ilg_delete before
2845 	 * dropping the lock.
2846 	 */
2847 	mutex_enter(&connp->conn_lock);
2848 	ilg = ilg_lookup_ipif(connp, group, ipif);
2849 	if ((ilg == NULL) || (ilg->ilg_flags & ILG_DELETED)) {
2850 		mutex_exit(&connp->conn_lock);
2851 		return (EADDRNOTAVAIL);
2852 	}
2853 
2854 	/*
2855 	 * Decide if we're actually deleting the ilg or just removing a
2856 	 * source filter address; if just removing an addr, make sure we
2857 	 * aren't trying to change the filter mode, and that the addr is
2858 	 * actually in our filter list already.  If we're removing the
2859 	 * last src in an include list, just delete the ilg.
2860 	 */
2861 	if (src == INADDR_ANY) {
2862 		v6src = ipv6_all_zeros;
2863 		leaving = B_TRUE;
2864 	} else {
2865 		int err = 0;
2866 		IN6_IPADDR_TO_V4MAPPED(src, &v6src);
2867 		if (fmode != ilg->ilg_fmode)
2868 			err = EINVAL;
2869 		else if (ilg->ilg_filter == NULL ||
2870 		    !list_has_addr(ilg->ilg_filter, &v6src))
2871 			err = EADDRNOTAVAIL;
2872 		if (err != 0) {
2873 			mutex_exit(&connp->conn_lock);
2874 			return (err);
2875 		}
2876 		if (fmode == MODE_IS_INCLUDE &&
2877 		    ilg->ilg_filter->sl_numsrc == 1) {
2878 			v6src = ipv6_all_zeros;
2879 			leaving = B_TRUE;
2880 		}
2881 	}
2882 
2883 	ilg_delete(connp, ilg, &v6src);
2884 	mutex_exit(&connp->conn_lock);
2885 
2886 	(void) ip_delmulti(group, ipif, B_FALSE, leaving);
2887 	return (0);
2888 }
2889 
2890 static int
2891 ip_opt_delete_group_excl_v6(conn_t *connp, const in6_addr_t *v6group,
2892     ill_t *ill, mcast_record_t fmode, const in6_addr_t *v6src)
2893 {
2894 	ilg_t	*ilg;
2895 	ill_t	*ilg_ill;
2896 	uint_t	ilg_orig_ifindex;
2897 	boolean_t leaving = B_TRUE;
2898 
2899 	ASSERT(IAM_WRITER_ILL(ill));
2900 
2901 	/*
2902 	 * Use the index that we originally used to join. We can't
2903 	 * use the ill directly because ilg_ill could point to
2904 	 * a new ill if things have moved.
2905 	 */
2906 	mutex_enter(&connp->conn_lock);
2907 	ilg = ilg_lookup_ill_index_v6(connp, v6group,
2908 	    ill->ill_phyint->phyint_ifindex);
2909 	if ((ilg == NULL) || (ilg->ilg_flags & ILG_DELETED)) {
2910 		mutex_exit(&connp->conn_lock);
2911 		return (EADDRNOTAVAIL);
2912 	}
2913 
2914 	/*
2915 	 * Decide if we're actually deleting the ilg or just removing a
2916 	 * source filter address; if just removing an addr, make sure we
2917 	 * aren't trying to change the filter mode, and that the addr is
2918 	 * actually in our filter list already.  If we're removing the
2919 	 * last src in an include list, just delete the ilg.
2920 	 */
2921 	if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) {
2922 		int err = 0;
2923 		if (fmode != ilg->ilg_fmode)
2924 			err = EINVAL;
2925 		else if (ilg->ilg_filter == NULL ||
2926 		    !list_has_addr(ilg->ilg_filter, v6src))
2927 			err = EADDRNOTAVAIL;
2928 		if (err != 0) {
2929 			mutex_exit(&connp->conn_lock);
2930 			return (err);
2931 		}
2932 		if (fmode == MODE_IS_INCLUDE &&
2933 		    ilg->ilg_filter->sl_numsrc == 1)
2934 			v6src = NULL;
2935 		else
2936 			leaving = B_FALSE;
2937 	}
2938 
2939 	ilg_ill = ilg->ilg_ill;
2940 	ilg_orig_ifindex = ilg->ilg_orig_ifindex;
2941 	ilg_delete(connp, ilg, v6src);
2942 	mutex_exit(&connp->conn_lock);
2943 	(void) ip_delmulti_v6(v6group, ilg_ill, ilg_orig_ifindex,
2944 	    connp->conn_zoneid, B_FALSE, leaving);
2945 
2946 	return (0);
2947 }
2948 
2949 /*
2950  * Handle the following optmgmt:
2951  *	IP_DROP_MEMBERSHIP		will leave
2952  *	MCAST_LEAVE_GROUP		will leave
2953  *	IP_UNBLOCK_SOURCE		will not leave
2954  *	MCAST_UNBLOCK_SOURCE		will not leave
2955  *	IP_LEAVE_SOURCE_GROUP		may leave (if leaving last source)
2956  *	MCAST_LEAVE_SOURCE_GROUP	may leave (if leaving last source)
2957  *
2958  * fmode and src parameters may be used to determine which option is
2959  * being set, as follows (the IP_* and MCAST_* versions of each option
2960  * are functionally equivalent):
2961  *	opt			 fmode			src
2962  *	IP_DROP_MEMBERSHIP	 MODE_IS_INCLUDE	INADDR_ANY
2963  *	MCAST_LEAVE_GROUP	 MODE_IS_INCLUDE	INADDR_ANY
2964  *	IP_UNBLOCK_SOURCE	 MODE_IS_EXCLUDE	v4 addr
2965  *	MCAST_UNBLOCK_SOURCE	 MODE_IS_EXCLUDE	v4 addr
2966  *	IP_LEAVE_SOURCE_GROUP	 MODE_IS_INCLUDE	v4 addr
2967  *	MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE	v4 addr
2968  *
2969  * Changing the filter mode is not allowed; if a matching ilg already
2970  * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
2971  *
2972  * The interface to be used may be identified by an address or by an
2973  * index.  A pointer to the index is passed; if it is NULL, use the
2974  * address, otherwise, use the index.
2975  */
2976 int
2977 ip_opt_delete_group(conn_t *connp, boolean_t checkonly, ipaddr_t group,
2978     ipaddr_t ifaddr, uint_t *ifindexp, mcast_record_t fmode, ipaddr_t src,
2979     mblk_t *first_mp)
2980 {
2981 	ipif_t	*ipif;
2982 	ipsq_t	*ipsq;
2983 	int	err;
2984 	ill_t	*ill;
2985 
2986 	err = ip_opt_check(connp, group, src, ifaddr, ifindexp, first_mp,
2987 	    ip_restart_optmgmt, &ipif);
2988 	if (err != 0) {
2989 		if (err != EINPROGRESS) {
2990 			ip1dbg(("ip_opt_delete_group: no ipif for group "
2991 			    "0x%x, ifaddr 0x%x\n",
2992 			    (int)ntohl(group), (int)ntohl(ifaddr)));
2993 		}
2994 		return (err);
2995 	}
2996 	ASSERT(ipif != NULL);
2997 
2998 	ill = ipif->ipif_ill;
2999 	/* Operation not supported on a virtual network interface */
3000 	if (IS_VNI(ill)) {
3001 		ipif_refrele(ipif);
3002 		return (EINVAL);
3003 	}
3004 
3005 	if (checkonly) {
3006 		/*
3007 		 * do not do operation, just pretend to - new T_CHECK
3008 		 * semantics. The error return case above if encountered
3009 		 * considered a good enough "check" here.
3010 		 */
3011 		ipif_refrele(ipif);
3012 		return (0);
3013 	}
3014 
3015 	IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, ipsq,
3016 	    NEW_OP);
3017 	err = ip_opt_delete_group_excl(connp, group, ipif, fmode, src);
3018 	IPSQ_EXIT(ipsq);
3019 
3020 	ipif_refrele(ipif);
3021 	return (err);
3022 }
3023 
3024 /*
3025  * Handle the following optmgmt:
3026  *	IPV6_LEAVE_GROUP		will leave
3027  *	MCAST_LEAVE_GROUP		will leave
3028  *	MCAST_UNBLOCK_SOURCE		will not leave
3029  *	MCAST_LEAVE_SOURCE_GROUP	may leave (if leaving last source)
3030  *
3031  * fmode and src parameters may be used to determine which option is
3032  * being set, as follows (IPV6_LEAVE_GROUP and MCAST_LEAVE_GROUP options
3033  * are functionally equivalent):
3034  *	opt			 fmode			v6src
3035  *	IPV6_LEAVE_GROUP	 MODE_IS_INCLUDE	unspecified
3036  *	MCAST_LEAVE_GROUP	 MODE_IS_INCLUDE	unspecified
3037  *	MCAST_UNBLOCK_SOURCE	 MODE_IS_EXCLUDE	v6 addr
3038  *	MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE	v6 addr
3039  *
3040  * Changing the filter mode is not allowed; if a matching ilg already
3041  * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
3042  *
3043  * Handles IPv4-mapped IPv6 multicast addresses by associating them
3044  * with the link-local ipif.  Assumes that if v6group is v4-mapped,
3045  * v6src is also v4-mapped.
3046  */
3047 int
3048 ip_opt_delete_group_v6(conn_t *connp, boolean_t checkonly,
3049     const in6_addr_t *v6group, int ifindex, mcast_record_t fmode,
3050     const in6_addr_t *v6src, mblk_t *first_mp)
3051 {
3052 	ill_t *ill;
3053 	ipif_t	*ipif;
3054 	char	buf[INET6_ADDRSTRLEN];
3055 	ipaddr_t v4group, v4src;
3056 	boolean_t isv6;
3057 	ipsq_t	*ipsq;
3058 	int	err;
3059 
3060 	err = ip_opt_check_v6(connp, v6group, &v4group, v6src, &v4src, &isv6,
3061 	    ifindex, first_mp, ip_restart_optmgmt, &ill, &ipif);
3062 	if (err != 0) {
3063 		if (err != EINPROGRESS) {
3064 			ip1dbg(("ip_opt_delete_group_v6: no ill for group %s/"
3065 			    "index %d\n", inet_ntop(AF_INET6, v6group, buf,
3066 			    sizeof (buf)), ifindex));
3067 		}
3068 		return (err);
3069 	}
3070 	ASSERT((isv6 && ill != NULL) || (!isv6 && ipif != NULL));
3071 
3072 	/* operation is not supported on the virtual network interface */
3073 	if (isv6) {
3074 		if (IS_VNI(ill)) {
3075 			ill_refrele(ill);
3076 			return (EINVAL);
3077 		}
3078 	} else {
3079 		if (IS_VNI(ipif->ipif_ill)) {
3080 			ipif_refrele(ipif);
3081 			return (EINVAL);
3082 		}
3083 	}
3084 
3085 	if (checkonly) {
3086 		/*
3087 		 * do not do operation, just pretend to - new T_CHECK
3088 		 * semantics. The error return case above if encountered
3089 		 * considered a good enough "check" here.
3090 		 */
3091 		if (isv6)
3092 			ill_refrele(ill);
3093 		else
3094 			ipif_refrele(ipif);
3095 		return (0);
3096 	}
3097 
3098 	if (!isv6) {
3099 		IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt,
3100 		    ipsq, NEW_OP);
3101 		err = ip_opt_delete_group_excl(connp, v4group, ipif, fmode,
3102 		    v4src);
3103 		IPSQ_EXIT(ipsq);
3104 		ipif_refrele(ipif);
3105 	} else {
3106 		IPSQ_ENTER_ILL(ill, connp, first_mp, ip_restart_optmgmt,
3107 		    ipsq, NEW_OP);
3108 		err = ip_opt_delete_group_excl_v6(connp, v6group, ill, fmode,
3109 		    v6src);
3110 		IPSQ_EXIT(ipsq);
3111 		ill_refrele(ill);
3112 	}
3113 
3114 	return (err);
3115 }
3116 
3117 /*
3118  * Group mgmt for upper conn that passes things down
3119  * to the interface multicast list (and DLPI)
3120  * These routines can handle new style options that specify an interface name
3121  * as opposed to an interface address (needed for general handling of
3122  * unnumbered interfaces.)
3123  */
3124 
3125 /*
3126  * Add a group to an upper conn group data structure and pass things down
3127  * to the interface multicast list (and DLPI)
3128  */
3129 static int
3130 ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif, mcast_record_t fmode,
3131     ipaddr_t src)
3132 {
3133 	int	error = 0;
3134 	ill_t	*ill;
3135 	ilg_t	*ilg;
3136 	ilg_stat_t ilgstat;
3137 	slist_t	*new_filter = NULL;
3138 	int	new_fmode;
3139 
3140 	ASSERT(IAM_WRITER_IPIF(ipif));
3141 
3142 	ill = ipif->ipif_ill;
3143 
3144 	if (!(ill->ill_flags & ILLF_MULTICAST))
3145 		return (EADDRNOTAVAIL);
3146 
3147 	/*
3148 	 * conn_ilg[] is protected by conn_lock. Need to hold the conn_lock
3149 	 * to walk the conn_ilg[] list in ilg_lookup_ipif(); also needed to
3150 	 * serialize 2 threads doing join (sock, group1, hme0:0) and
3151 	 * (sock, group2, hme1:0) where hme0 and hme1 map to different ipsqs,
3152 	 * but both operations happen on the same conn.
3153 	 */
3154 	mutex_enter(&connp->conn_lock);
3155 	ilg = ilg_lookup_ipif(connp, group, ipif);
3156 
3157 	/*
3158 	 * Depending on the option we're handling, may or may not be okay
3159 	 * if group has already been added.  Figure out our rules based
3160 	 * on fmode and src params.  Also make sure there's enough room
3161 	 * in the filter if we're adding a source to an existing filter.
3162 	 */
3163 	if (src == INADDR_ANY) {
3164 		/* we're joining for all sources, must not have joined */
3165 		if (ilg != NULL)
3166 			error = EADDRINUSE;
3167 	} else {
3168 		if (fmode == MODE_IS_EXCLUDE) {
3169 			/* (excl {addr}) => block source, must have joined */
3170 			if (ilg == NULL)
3171 				error = EADDRNOTAVAIL;
3172 		}
3173 		/* (incl {addr}) => join source, may have joined */
3174 
3175 		if (ilg != NULL &&
3176 		    SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE)
3177 			error = ENOBUFS;
3178 	}
3179 	if (error != 0) {
3180 		mutex_exit(&connp->conn_lock);
3181 		return (error);
3182 	}
3183 
3184 	ASSERT(!(ipif->ipif_state_flags & IPIF_CONDEMNED));
3185 
3186 	/*
3187 	 * Alloc buffer to copy new state into (see below) before
3188 	 * we make any changes, so we can bail if it fails.
3189 	 */
3190 	if ((new_filter = l_alloc()) == NULL) {
3191 		mutex_exit(&connp->conn_lock);
3192 		return (ENOMEM);
3193 	}
3194 
3195 	if (ilg == NULL) {
3196 		ilgstat = ILGSTAT_NEW;
3197 		if ((ilg = conn_ilg_alloc(connp)) == NULL) {
3198 			mutex_exit(&connp->conn_lock);
3199 			l_free(new_filter);
3200 			return (ENOMEM);
3201 		}
3202 		if (src != INADDR_ANY) {
3203 			ilg->ilg_filter = l_alloc();
3204 			if (ilg->ilg_filter == NULL) {
3205 				ilg_delete(connp, ilg, NULL);
3206 				mutex_exit(&connp->conn_lock);
3207 				l_free(new_filter);
3208 				return (ENOMEM);
3209 			}
3210 			ilg->ilg_filter->sl_numsrc = 1;
3211 			IN6_IPADDR_TO_V4MAPPED(src,
3212 			    &ilg->ilg_filter->sl_addr[0]);
3213 		}
3214 		if (group == INADDR_ANY) {
3215 			ilg->ilg_v6group = ipv6_all_zeros;
3216 		} else {
3217 			IN6_IPADDR_TO_V4MAPPED(group, &ilg->ilg_v6group);
3218 		}
3219 		ilg->ilg_ipif = ipif;
3220 		ilg->ilg_ill = NULL;
3221 		ilg->ilg_orig_ifindex = 0;
3222 		ilg->ilg_fmode = fmode;
3223 	} else {
3224 		int index;
3225 		in6_addr_t v6src;
3226 		ilgstat = ILGSTAT_CHANGE;
3227 		if (ilg->ilg_fmode != fmode || src == INADDR_ANY) {
3228 			mutex_exit(&connp->conn_lock);
3229 			l_free(new_filter);
3230 			return (EINVAL);
3231 		}
3232 		if (ilg->ilg_filter == NULL) {
3233 			ilg->ilg_filter = l_alloc();
3234 			if (ilg->ilg_filter == NULL) {
3235 				mutex_exit(&connp->conn_lock);
3236 				l_free(new_filter);
3237 				return (ENOMEM);
3238 			}
3239 		}
3240 		IN6_IPADDR_TO_V4MAPPED(src, &v6src);
3241 		if (list_has_addr(ilg->ilg_filter, &v6src)) {
3242 			mutex_exit(&connp->conn_lock);
3243 			l_free(new_filter);
3244 			return (EADDRNOTAVAIL);
3245 		}
3246 		index = ilg->ilg_filter->sl_numsrc++;
3247 		ilg->ilg_filter->sl_addr[index] = v6src;
3248 	}
3249 
3250 	/*
3251 	 * Save copy of ilg's filter state to pass to other functions,
3252 	 * so we can release conn_lock now.
3253 	 */
3254 	new_fmode = ilg->ilg_fmode;
3255 	l_copy(ilg->ilg_filter, new_filter);
3256 
3257 	mutex_exit(&connp->conn_lock);
3258 
3259 	error = ip_addmulti(group, ipif, ilgstat, new_fmode, new_filter);
3260 	if (error != 0) {
3261 		/*
3262 		 * Need to undo what we did before calling ip_addmulti()!
3263 		 * Must look up the ilg again since we've not been holding
3264 		 * conn_lock.
3265 		 */
3266 		in6_addr_t v6src;
3267 		if (ilgstat == ILGSTAT_NEW)
3268 			v6src = ipv6_all_zeros;
3269 		else
3270 			IN6_IPADDR_TO_V4MAPPED(src, &v6src);
3271 		mutex_enter(&connp->conn_lock);
3272 		ilg = ilg_lookup_ipif(connp, group, ipif);
3273 		ASSERT(ilg != NULL);
3274 		ilg_delete(connp, ilg, &v6src);
3275 		mutex_exit(&connp->conn_lock);
3276 		l_free(new_filter);
3277 		return (error);
3278 	}
3279 
3280 	l_free(new_filter);
3281 	return (0);
3282 }
3283 
3284 static int
3285 ilg_add_v6(conn_t *connp, const in6_addr_t *v6group, ill_t *ill,
3286     mcast_record_t fmode, const in6_addr_t *v6src)
3287 {
3288 	int	error = 0;
3289 	int	orig_ifindex;
3290 	ilg_t	*ilg;
3291 	ilg_stat_t ilgstat;
3292 	slist_t	*new_filter = NULL;
3293 	int	new_fmode;
3294 
3295 	ASSERT(IAM_WRITER_ILL(ill));
3296 
3297 	if (!(ill->ill_flags & ILLF_MULTICAST))
3298 		return (EADDRNOTAVAIL);
3299 
3300 	/*
3301 	 * conn_lock protects the ilg list.  Serializes 2 threads doing
3302 	 * join (sock, group1, hme0) and (sock, group2, hme1) where hme0
3303 	 * and hme1 map to different ipsq's, but both operations happen
3304 	 * on the same conn.
3305 	 */
3306 	mutex_enter(&connp->conn_lock);
3307 
3308 	/*
3309 	 * Use the ifindex to do the lookup. We can't use the ill
3310 	 * directly because ilg_ill could point to a different ill if
3311 	 * things have moved.
3312 	 */
3313 	orig_ifindex = ill->ill_phyint->phyint_ifindex;
3314 	ilg = ilg_lookup_ill_index_v6(connp, v6group, orig_ifindex);
3315 
3316 	/*
3317 	 * Depending on the option we're handling, may or may not be okay
3318 	 * if group has already been added.  Figure out our rules based
3319 	 * on fmode and src params.  Also make sure there's enough room
3320 	 * in the filter if we're adding a source to an existing filter.
3321 	 */
3322 	if (IN6_IS_ADDR_UNSPECIFIED(v6src)) {
3323 		/* we're joining for all sources, must not have joined */
3324 		if (ilg != NULL)
3325 			error = EADDRINUSE;
3326 	} else {
3327 		if (fmode == MODE_IS_EXCLUDE) {
3328 			/* (excl {addr}) => block source, must have joined */
3329 			if (ilg == NULL)
3330 				error = EADDRNOTAVAIL;
3331 		}
3332 		/* (incl {addr}) => join source, may have joined */
3333 
3334 		if (ilg != NULL &&
3335 		    SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE)
3336 			error = ENOBUFS;
3337 	}
3338 	if (error != 0) {
3339 		mutex_exit(&connp->conn_lock);
3340 		return (error);
3341 	}
3342 
3343 	/*
3344 	 * Alloc buffer to copy new state into (see below) before
3345 	 * we make any changes, so we can bail if it fails.
3346 	 */
3347 	if ((new_filter = l_alloc()) == NULL) {
3348 		mutex_exit(&connp->conn_lock);
3349 		return (ENOMEM);
3350 	}
3351 
3352 	if (ilg == NULL) {
3353 		if ((ilg = conn_ilg_alloc(connp)) == NULL) {
3354 			mutex_exit(&connp->conn_lock);
3355 			l_free(new_filter);
3356 			return (ENOMEM);
3357 		}
3358 		if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) {
3359 			ilg->ilg_filter = l_alloc();
3360 			if (ilg->ilg_filter == NULL) {
3361 				ilg_delete(connp, ilg, NULL);
3362 				mutex_exit(&connp->conn_lock);
3363 				l_free(new_filter);
3364 				return (ENOMEM);
3365 			}
3366 			ilg->ilg_filter->sl_numsrc = 1;
3367 			ilg->ilg_filter->sl_addr[0] = *v6src;
3368 		}
3369 		ilgstat = ILGSTAT_NEW;
3370 		ilg->ilg_v6group = *v6group;
3371 		ilg->ilg_fmode = fmode;
3372 		ilg->ilg_ipif = NULL;
3373 		/*
3374 		 * Choose our target ill to join on. This might be different
3375 		 * from the ill we've been given if it's currently down and
3376 		 * part of a group.
3377 		 *
3378 		 * new ill is not refheld; we are writer.
3379 		 */
3380 		ill = ip_choose_multi_ill(ill, v6group);
3381 		ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED));
3382 		ilg->ilg_ill = ill;
3383 		/*
3384 		 * Remember the orig_ifindex that we joined on, so that we
3385 		 * can successfully delete them later on and also search
3386 		 * for duplicates if the application wants to join again.
3387 		 */
3388 		ilg->ilg_orig_ifindex = orig_ifindex;
3389 	} else {
3390 		int index;
3391 		if (ilg->ilg_fmode != fmode || IN6_IS_ADDR_UNSPECIFIED(v6src)) {
3392 			mutex_exit(&connp->conn_lock);
3393 			l_free(new_filter);
3394 			return (EINVAL);
3395 		}
3396 		if (ilg->ilg_filter == NULL) {
3397 			ilg->ilg_filter = l_alloc();
3398 			if (ilg->ilg_filter == NULL) {
3399 				mutex_exit(&connp->conn_lock);
3400 				l_free(new_filter);
3401 				return (ENOMEM);
3402 			}
3403 		}
3404 		if (list_has_addr(ilg->ilg_filter, v6src)) {
3405 			mutex_exit(&connp->conn_lock);
3406 			l_free(new_filter);
3407 			return (EADDRNOTAVAIL);
3408 		}
3409 		ilgstat = ILGSTAT_CHANGE;
3410 		index = ilg->ilg_filter->sl_numsrc++;
3411 		ilg->ilg_filter->sl_addr[index] = *v6src;
3412 		/*
3413 		 * The current ill might be different from the one we were
3414 		 * asked to join on (if failover has occurred); we should
3415 		 * join on the ill stored in the ilg.  The original ill
3416 		 * is noted in ilg_orig_ifindex, which matched our request.
3417 		 */
3418 		ill = ilg->ilg_ill;
3419 	}
3420 
3421 	/*
3422 	 * Save copy of ilg's filter state to pass to other functions,
3423 	 * so we can release conn_lock now.
3424 	 */
3425 	new_fmode = ilg->ilg_fmode;
3426 	l_copy(ilg->ilg_filter, new_filter);
3427 
3428 	mutex_exit(&connp->conn_lock);
3429 
3430 	/*
3431 	 * Now update the ill. We wait to do this until after the ilg
3432 	 * has been updated because we need to update the src filter
3433 	 * info for the ill, which involves looking at the status of
3434 	 * all the ilgs associated with this group/interface pair.
3435 	 */
3436 	error = ip_addmulti_v6(v6group, ill, orig_ifindex, connp->conn_zoneid,
3437 	    ilgstat, new_fmode, new_filter);
3438 	if (error != 0) {
3439 		/*
3440 		 * But because we waited, we have to undo the ilg update
3441 		 * if ip_addmulti_v6() fails.  We also must lookup ilg
3442 		 * again, since we've not been holding conn_lock.
3443 		 */
3444 		in6_addr_t delsrc =
3445 		    (ilgstat == ILGSTAT_NEW) ? ipv6_all_zeros : *v6src;
3446 		mutex_enter(&connp->conn_lock);
3447 		ilg = ilg_lookup_ill_index_v6(connp, v6group, orig_ifindex);
3448 		ASSERT(ilg != NULL);
3449 		ilg_delete(connp, ilg, &delsrc);
3450 		mutex_exit(&connp->conn_lock);
3451 		l_free(new_filter);
3452 		return (error);
3453 	}
3454 
3455 	l_free(new_filter);
3456 
3457 	return (0);
3458 }
3459 
3460 /*
3461  * Find an IPv4 ilg matching group, ill and source
3462  */
3463 ilg_t *
3464 ilg_lookup_ill_withsrc(conn_t *connp, ipaddr_t group, ipaddr_t src, ill_t *ill)
3465 {
3466 	in6_addr_t v6group, v6src;
3467 	int i;
3468 	boolean_t isinlist;
3469 	ilg_t *ilg;
3470 	ipif_t *ipif;
3471 	ill_t *ilg_ill;
3472 
3473 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3474 
3475 	/*
3476 	 * INADDR_ANY is represented as the IPv6 unspecified addr.
3477 	 */
3478 	if (group == INADDR_ANY)
3479 		v6group = ipv6_all_zeros;
3480 	else
3481 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
3482 
3483 	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3484 		/* ilg_ipif is NULL for v6; skip them */
3485 		ilg = &connp->conn_ilg[i];
3486 		if ((ipif = ilg->ilg_ipif) == NULL)
3487 			continue;
3488 		ASSERT(ilg->ilg_ill == NULL);
3489 		ilg_ill = ipif->ipif_ill;
3490 		ASSERT(!ilg_ill->ill_isv6);
3491 		if (ilg_ill == ill &&
3492 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, &v6group)) {
3493 			if (SLIST_IS_EMPTY(ilg->ilg_filter)) {
3494 				/* no source filter, so this is a match */
3495 				return (ilg);
3496 			}
3497 			break;
3498 		}
3499 	}
3500 	if (i == connp->conn_ilg_inuse)
3501 		return (NULL);
3502 
3503 	/*
3504 	 * we have an ilg with matching ill and group; but
3505 	 * the ilg has a source list that we must check.
3506 	 */
3507 	IN6_IPADDR_TO_V4MAPPED(src, &v6src);
3508 	isinlist = B_FALSE;
3509 	for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) {
3510 		if (IN6_ARE_ADDR_EQUAL(&v6src, &ilg->ilg_filter->sl_addr[i])) {
3511 			isinlist = B_TRUE;
3512 			break;
3513 		}
3514 	}
3515 
3516 	if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) ||
3517 	    (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE))
3518 		return (ilg);
3519 
3520 	return (NULL);
3521 }
3522 
3523 /*
3524  * Find an IPv6 ilg matching group, ill, and source
3525  */
3526 ilg_t *
3527 ilg_lookup_ill_withsrc_v6(conn_t *connp, const in6_addr_t *v6group,
3528     const in6_addr_t *v6src, ill_t *ill)
3529 {
3530 	int i;
3531 	boolean_t isinlist;
3532 	ilg_t *ilg;
3533 	ill_t *ilg_ill;
3534 
3535 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3536 
3537 	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3538 		ilg = &connp->conn_ilg[i];
3539 		if ((ilg_ill = ilg->ilg_ill) == NULL)
3540 			continue;
3541 		ASSERT(ilg->ilg_ipif == NULL);
3542 		ASSERT(ilg_ill->ill_isv6);
3543 		if (ilg_ill == ill &&
3544 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) {
3545 			if (SLIST_IS_EMPTY(ilg->ilg_filter)) {
3546 				/* no source filter, so this is a match */
3547 				return (ilg);
3548 			}
3549 			break;
3550 		}
3551 	}
3552 	if (i == connp->conn_ilg_inuse)
3553 		return (NULL);
3554 
3555 	/*
3556 	 * we have an ilg with matching ill and group; but
3557 	 * the ilg has a source list that we must check.
3558 	 */
3559 	isinlist = B_FALSE;
3560 	for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) {
3561 		if (IN6_ARE_ADDR_EQUAL(v6src, &ilg->ilg_filter->sl_addr[i])) {
3562 			isinlist = B_TRUE;
3563 			break;
3564 		}
3565 	}
3566 
3567 	if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) ||
3568 	    (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE))
3569 		return (ilg);
3570 
3571 	return (NULL);
3572 }
3573 
3574 /*
3575  * Get the ilg whose ilg_orig_ifindex is associated with ifindex.
3576  * This is useful when the interface fails and we have moved
3577  * to a new ill, but still would like to locate using the index
3578  * that we originally used to join. Used only for IPv6 currently.
3579  */
3580 static ilg_t *
3581 ilg_lookup_ill_index_v6(conn_t *connp, const in6_addr_t *v6group, int ifindex)
3582 {
3583 	ilg_t	*ilg;
3584 	int	i;
3585 
3586 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3587 	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3588 		ilg = &connp->conn_ilg[i];
3589 		/* ilg_ill is NULL for V4. Skip them */
3590 		if (ilg->ilg_ill == NULL)
3591 			continue;
3592 		/* ilg_ipif is NULL for V6 */
3593 		ASSERT(ilg->ilg_ipif == NULL);
3594 		ASSERT(ilg->ilg_orig_ifindex != 0);
3595 		if (IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group) &&
3596 		    ilg->ilg_orig_ifindex == ifindex) {
3597 			return (ilg);
3598 		}
3599 	}
3600 	return (NULL);
3601 }
3602 
3603 /*
3604  * Find an IPv6 ilg matching group and ill
3605  */
3606 ilg_t *
3607 ilg_lookup_ill_v6(conn_t *connp, const in6_addr_t *v6group, ill_t *ill)
3608 {
3609 	ilg_t	*ilg;
3610 	int	i;
3611 	ill_t 	*mem_ill;
3612 
3613 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3614 
3615 	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3616 		ilg = &connp->conn_ilg[i];
3617 		if ((mem_ill = ilg->ilg_ill) == NULL)
3618 			continue;
3619 		ASSERT(ilg->ilg_ipif == NULL);
3620 		ASSERT(mem_ill->ill_isv6);
3621 		if (mem_ill == ill &&
3622 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group))
3623 			return (ilg);
3624 	}
3625 	return (NULL);
3626 }
3627 
3628 /*
3629  * Find an IPv4 ilg matching group and ipif
3630  */
3631 static ilg_t *
3632 ilg_lookup_ipif(conn_t *connp, ipaddr_t group, ipif_t *ipif)
3633 {
3634 	in6_addr_t v6group;
3635 	int	i;
3636 
3637 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3638 	ASSERT(!ipif->ipif_ill->ill_isv6);
3639 
3640 	if (group == INADDR_ANY)
3641 		v6group = ipv6_all_zeros;
3642 	else
3643 		IN6_IPADDR_TO_V4MAPPED(group, &v6group);
3644 
3645 	for (i = 0; i < connp->conn_ilg_inuse; i++) {
3646 		if (IN6_ARE_ADDR_EQUAL(&connp->conn_ilg[i].ilg_v6group,
3647 		    &v6group) &&
3648 		    connp->conn_ilg[i].ilg_ipif == ipif)
3649 			return (&connp->conn_ilg[i]);
3650 	}
3651 	return (NULL);
3652 }
3653 
3654 /*
3655  * If a source address is passed in (src != NULL and src is not
3656  * unspecified), remove the specified src addr from the given ilg's
3657  * filter list, else delete the ilg.
3658  */
3659 static void
3660 ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src)
3661 {
3662 	int	i;
3663 
3664 	ASSERT((ilg->ilg_ipif != NULL) ^ (ilg->ilg_ill != NULL));
3665 	ASSERT(ilg->ilg_ipif == NULL || IAM_WRITER_IPIF(ilg->ilg_ipif));
3666 	ASSERT(ilg->ilg_ill == NULL || IAM_WRITER_ILL(ilg->ilg_ill));
3667 	ASSERT(MUTEX_HELD(&connp->conn_lock));
3668 	ASSERT(!(ilg->ilg_flags & ILG_DELETED));
3669 
3670 	if (src == NULL || IN6_IS_ADDR_UNSPECIFIED(src)) {
3671 		if (connp->conn_ilg_walker_cnt != 0) {
3672 			ilg->ilg_flags |= ILG_DELETED;
3673 			return;
3674 		}
3675 
3676 		FREE_SLIST(ilg->ilg_filter);
3677 
3678 		i = ilg - &connp->conn_ilg[0];
3679 		ASSERT(i >= 0 && i < connp->conn_ilg_inuse);
3680 
3681 		/* Move other entries up one step */
3682 		connp->conn_ilg_inuse--;
3683 		for (; i < connp->conn_ilg_inuse; i++)
3684 			connp->conn_ilg[i] = connp->conn_ilg[i+1];
3685 
3686 		if (connp->conn_ilg_inuse == 0) {
3687 			mi_free((char *)connp->conn_ilg);
3688 			connp->conn_ilg = NULL;
3689 			cv_broadcast(&connp->conn_refcv);
3690 		}
3691 	} else {
3692 		l_remove(ilg->ilg_filter, src);
3693 	}
3694 }
3695 
3696 /*
3697  * Called from conn close. No new ilg can be added or removed.
3698  * because CONN_CLOSING has been set by ip_close. ilg_add / ilg_delete
3699  * will return error if conn has started closing.
3700  */
3701 void
3702 ilg_delete_all(conn_t *connp)
3703 {
3704 	int	i;
3705 	ipif_t	*ipif = NULL;
3706 	ill_t	*ill = NULL;
3707 	ilg_t	*ilg;
3708 	in6_addr_t v6group;
3709 	boolean_t success;
3710 	ipsq_t	*ipsq;
3711 	int	orig_ifindex;
3712 
3713 	mutex_enter(&connp->conn_lock);
3714 retry:
3715 	ILG_WALKER_HOLD(connp);
3716 	for (i = connp->conn_ilg_inuse - 1; i >= 0; ) {
3717 		ilg = &connp->conn_ilg[i];
3718 		/*
3719 		 * Since this walk is not atomic (we drop the
3720 		 * conn_lock and wait in ipsq_enter) we need
3721 		 * to check for the ILG_DELETED flag.
3722 		 */
3723 		if (ilg->ilg_flags & ILG_DELETED) {
3724 			/* Go to the next ilg */
3725 			i--;
3726 			continue;
3727 		}
3728 		v6group = ilg->ilg_v6group;
3729 
3730 		if (IN6_IS_ADDR_V4MAPPED(&v6group)) {
3731 			ipif = ilg->ilg_ipif;
3732 			ill = ipif->ipif_ill;
3733 		} else {
3734 			ipif = NULL;
3735 			ill = ilg->ilg_ill;
3736 		}
3737 		/*
3738 		 * We may not be able to refhold the ill if the ill/ipif
3739 		 * is changing. But we need to make sure that the ill will
3740 		 * not vanish. So we just bump up the ill_waiter count.
3741 		 * If we are unable to do even that, then the ill is closing,
3742 		 * in which case the unplumb thread will handle the cleanup,
3743 		 * and we move on to the next ilg.
3744 		 */
3745 		if (!ill_waiter_inc(ill)) {
3746 			/* Go to the next ilg */
3747 			i--;
3748 			continue;
3749 		}
3750 		mutex_exit(&connp->conn_lock);
3751 		/*
3752 		 * To prevent deadlock between ill close which waits inside
3753 		 * the perimeter, and conn close, ipsq_enter returns error,
3754 		 * the moment ILL_CONDEMNED is set, in which case ill close
3755 		 * takes responsibility to cleanup the ilgs. Note that we
3756 		 * have not yet set condemned flag, otherwise the conn can't
3757 		 * be refheld for cleanup by those routines and it would be
3758 		 * a mutual deadlock.
3759 		 */
3760 		success = ipsq_enter(ill, B_FALSE);
3761 		ipsq = ill->ill_phyint->phyint_ipsq;
3762 		ill_waiter_dcr(ill);
3763 		mutex_enter(&connp->conn_lock);
3764 		if (!success) {
3765 			/* Go to the next ilg */
3766 			i--;
3767 			continue;
3768 		}
3769 
3770 		/*
3771 		 * Make sure that nothing has changed under. For eg.
3772 		 * a failover/failback can change ilg_ill while we were
3773 		 * waiting to become exclusive above
3774 		 */
3775 		if (IN6_IS_ADDR_V4MAPPED(&v6group)) {
3776 			ipif = ilg->ilg_ipif;
3777 			ill = ipif->ipif_ill;
3778 		} else {
3779 			ipif = NULL;
3780 			ill = ilg->ilg_ill;
3781 		}
3782 		if (!IAM_WRITER_ILL(ill) || (ilg->ilg_flags & ILG_DELETED)) {
3783 			/*
3784 			 * The ilg has changed under us probably due
3785 			 * to a failover or unplumb. Retry on the same ilg.
3786 			 */
3787 			mutex_exit(&connp->conn_lock);
3788 			ipsq_exit(ipsq, B_TRUE, B_TRUE);
3789 			mutex_enter(&connp->conn_lock);
3790 			continue;
3791 		}
3792 		v6group = ilg->ilg_v6group;
3793 		orig_ifindex = ilg->ilg_orig_ifindex;
3794 		ilg_delete(connp, ilg, NULL);
3795 		mutex_exit(&connp->conn_lock);
3796 
3797 		if (ipif != NULL)
3798 			(void) ip_delmulti(V4_PART_OF_V6(v6group), ipif,
3799 			    B_FALSE, B_TRUE);
3800 
3801 		else
3802 			(void) ip_delmulti_v6(&v6group, ill, orig_ifindex,
3803 			    connp->conn_zoneid, B_FALSE, B_TRUE);
3804 
3805 		ipsq_exit(ipsq, B_TRUE, B_TRUE);
3806 		mutex_enter(&connp->conn_lock);
3807 		/* Go to the next ilg */
3808 		i--;
3809 	}
3810 	ILG_WALKER_RELE(connp);
3811 
3812 	/* If any ill was skipped above wait and retry */
3813 	if (connp->conn_ilg_inuse != 0) {
3814 		cv_wait(&connp->conn_refcv, &connp->conn_lock);
3815 		goto retry;
3816 	}
3817 	mutex_exit(&connp->conn_lock);
3818 }
3819 
3820 /*
3821  * Called from ill close by ipcl_walk for clearing conn_ilg and
3822  * conn_multicast_ipif for a given ipif. conn is held by caller.
3823  * Note that ipcl_walk only walks conns that are not yet condemned.
3824  * condemned conns can't be refheld. For this reason, conn must become clean
3825  * first, i.e. it must not refer to any ill/ire/ipif and then only set
3826  * condemned flag.
3827  */
3828 static void
3829 conn_delete_ipif(conn_t *connp, caddr_t arg)
3830 {
3831 	ipif_t	*ipif = (ipif_t *)arg;
3832 	int	i;
3833 	char	group_buf1[INET6_ADDRSTRLEN];
3834 	char	group_buf2[INET6_ADDRSTRLEN];
3835 	ipaddr_t group;
3836 	ilg_t	*ilg;
3837 
3838 	/*
3839 	 * Even though conn_ilg_inuse can change while we are in this loop,
3840 	 * i.e.ilgs can be created or deleted on this connp, no new ilgs can
3841 	 * be created or deleted for this connp, on this ill, since this ill
3842 	 * is the perimeter. So we won't miss any ilg in this cleanup.
3843 	 */
3844 	mutex_enter(&connp->conn_lock);
3845 
3846 	/*
3847 	 * Increment the walker count, so that ilg repacking does not
3848 	 * occur while we are in the loop.
3849 	 */
3850 	ILG_WALKER_HOLD(connp);
3851 	for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) {
3852 		ilg = &connp->conn_ilg[i];
3853 		if (ilg->ilg_ipif != ipif || (ilg->ilg_flags & ILG_DELETED))
3854 			continue;
3855 		/*
3856 		 * ip_close cannot be cleaning this ilg at the same time.
3857 		 * since it also has to execute in this ill's perimeter which
3858 		 * we are now holding. Only a clean conn can be condemned.
3859 		 */
3860 		ASSERT(!(connp->conn_state_flags & CONN_CONDEMNED));
3861 
3862 		/* Blow away the membership */
3863 		ip1dbg(("conn_delete_ilg_ipif: %s on %s (%s)\n",
3864 		    inet_ntop(AF_INET6, &connp->conn_ilg[i].ilg_v6group,
3865 		    group_buf1, sizeof (group_buf1)),
3866 		    inet_ntop(AF_INET6, &ipif->ipif_v6lcl_addr,
3867 		    group_buf2, sizeof (group_buf2)),
3868 		    ipif->ipif_ill->ill_name));
3869 
3870 		/* ilg_ipif is NULL for V6, so we won't be here */
3871 		ASSERT(IN6_IS_ADDR_V4MAPPED(&ilg->ilg_v6group));
3872 
3873 		group = V4_PART_OF_V6(ilg->ilg_v6group);
3874 		ilg_delete(connp, &connp->conn_ilg[i], NULL);
3875 		mutex_exit(&connp->conn_lock);
3876 
3877 		(void) ip_delmulti(group, ipif, B_FALSE, B_TRUE);
3878 		mutex_enter(&connp->conn_lock);
3879 	}
3880 
3881 	/*
3882 	 * If we are the last walker, need to physically delete the
3883 	 * ilgs and repack.
3884 	 */
3885 	ILG_WALKER_RELE(connp);
3886 
3887 	if (connp->conn_multicast_ipif == ipif) {
3888 		/* Revert to late binding */
3889 		connp->conn_multicast_ipif = NULL;
3890 	}
3891 	mutex_exit(&connp->conn_lock);
3892 
3893 	conn_delete_ire(connp, (caddr_t)ipif);
3894 }
3895 
3896 /*
3897  * Called from ill close by ipcl_walk for clearing conn_ilg and
3898  * conn_multicast_ill for a given ill. conn is held by caller.
3899  * Note that ipcl_walk only walks conns that are not yet condemned.
3900  * condemned conns can't be refheld. For this reason, conn must become clean
3901  * first, i.e. it must not refer to any ill/ire/ipif and then only set
3902  * condemned flag.
3903  */
3904 static void
3905 conn_delete_ill(conn_t *connp, caddr_t arg)
3906 {
3907 	ill_t	*ill = (ill_t *)arg;
3908 	int	i;
3909 	char	group_buf[INET6_ADDRSTRLEN];
3910 	in6_addr_t v6group;
3911 	int	orig_ifindex;
3912 	ilg_t	*ilg;
3913 
3914 	/*
3915 	 * Even though conn_ilg_inuse can change while we are in this loop,
3916 	 * no new ilgs can be created/deleted for this connp, on this
3917 	 * ill, since this ill is the perimeter. So we won't miss any ilg
3918 	 * in this cleanup.
3919 	 */
3920 	mutex_enter(&connp->conn_lock);
3921 
3922 	/*
3923 	 * Increment the walker count, so that ilg repacking does not
3924 	 * occur while we are in the loop.
3925 	 */
3926 	ILG_WALKER_HOLD(connp);
3927 	for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) {
3928 		ilg = &connp->conn_ilg[i];
3929 		if ((ilg->ilg_ill == ill) && !(ilg->ilg_flags & ILG_DELETED)) {
3930 			/*
3931 			 * ip_close cannot be cleaning this ilg at the same
3932 			 * time, since it also has to execute in this ill's
3933 			 * perimeter which we are now holding. Only a clean
3934 			 * conn can be condemned.
3935 			 */
3936 			ASSERT(!(connp->conn_state_flags & CONN_CONDEMNED));
3937 
3938 			/* Blow away the membership */
3939 			ip1dbg(("conn_delete_ilg_ill: %s on %s\n",
3940 			    inet_ntop(AF_INET6, &ilg->ilg_v6group,
3941 			    group_buf, sizeof (group_buf)),
3942 			    ill->ill_name));
3943 
3944 			v6group = ilg->ilg_v6group;
3945 			orig_ifindex = ilg->ilg_orig_ifindex;
3946 			ilg_delete(connp, ilg, NULL);
3947 			mutex_exit(&connp->conn_lock);
3948 
3949 			(void) ip_delmulti_v6(&v6group, ill, orig_ifindex,
3950 			    connp->conn_zoneid, B_FALSE, B_TRUE);
3951 			mutex_enter(&connp->conn_lock);
3952 		}
3953 	}
3954 	/*
3955 	 * If we are the last walker, need to physically delete the
3956 	 * ilgs and repack.
3957 	 */
3958 	ILG_WALKER_RELE(connp);
3959 
3960 	if (connp->conn_multicast_ill == ill) {
3961 		/* Revert to late binding */
3962 		connp->conn_multicast_ill = NULL;
3963 		connp->conn_orig_multicast_ifindex = 0;
3964 	}
3965 	mutex_exit(&connp->conn_lock);
3966 }
3967 
3968 /*
3969  * Called when an ipif is unplumbed to make sure that there are no
3970  * dangling conn references to that ipif.
3971  * Handles ilg_ipif and conn_multicast_ipif
3972  */
3973 void
3974 reset_conn_ipif(ipif)
3975 	ipif_t	*ipif;
3976 {
3977 	ipcl_walk(conn_delete_ipif, (caddr_t)ipif);
3978 	/* flush the SCTP ire cache for this ipif */
3979 	sctp_ire_cache_flush(ipif);
3980 }
3981 
3982 /*
3983  * Called when an ill is unplumbed to make sure that there are no
3984  * dangling conn references to that ill.
3985  * Handles ilg_ill, conn_multicast_ill.
3986  */
3987 void
3988 reset_conn_ill(ill_t *ill)
3989 {
3990 	ipcl_walk(conn_delete_ill, (caddr_t)ill);
3991 }
3992 
3993 #ifdef DEBUG
3994 /*
3995  * Walk functions walk all the interfaces in the system to make
3996  * sure that there is no refernece to the ipif or ill that is
3997  * going away.
3998  */
3999 int
4000 ilm_walk_ill(ill_t *ill)
4001 {
4002 	int cnt = 0;
4003 	ill_t *till;
4004 	ilm_t *ilm;
4005 	ill_walk_context_t ctx;
4006 
4007 	rw_enter(&ill_g_lock, RW_READER);
4008 	till = ILL_START_WALK_ALL(&ctx);
4009 	for (; till != NULL; till = ill_next(&ctx, till)) {
4010 		for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
4011 			if (ilm->ilm_ill == ill) {
4012 				cnt++;
4013 			}
4014 		}
4015 	}
4016 	rw_exit(&ill_g_lock);
4017 
4018 	return (cnt);
4019 }
4020 
4021 /*
4022  * This function is called before the ipif is freed.
4023  */
4024 int
4025 ilm_walk_ipif(ipif_t *ipif)
4026 {
4027 	int cnt = 0;
4028 	ill_t *till;
4029 	ilm_t *ilm;
4030 	ill_walk_context_t ctx;
4031 
4032 	till = ILL_START_WALK_ALL(&ctx);
4033 	for (; till != NULL; till = ill_next(&ctx, till)) {
4034 		for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) {
4035 			if (ilm->ilm_ipif == ipif) {
4036 					cnt++;
4037 			}
4038 		}
4039 	}
4040 	return (cnt);
4041 }
4042 #endif
4043