xref: /titanic_50/usr/src/uts/common/inet/ip/ip_multi.c (revision 3ae945c326c1fc078149f2c8b11fac0cc8f6d1d6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /* Copyright (c) 1990 Mentat Inc. */
26 
27 #include <sys/types.h>
28 #include <sys/stream.h>
29 #include <sys/dlpi.h>
30 #include <sys/stropts.h>
31 #include <sys/strsun.h>
32 #include <sys/ddi.h>
33 #include <sys/cmn_err.h>
34 #include <sys/sdt.h>
35 #include <sys/zone.h>
36 
37 #include <sys/param.h>
38 #include <sys/socket.h>
39 #include <sys/sockio.h>
40 #include <net/if.h>
41 #include <sys/systm.h>
42 #include <sys/strsubr.h>
43 #include <net/route.h>
44 #include <netinet/in.h>
45 #include <net/if_dl.h>
46 #include <netinet/ip6.h>
47 #include <netinet/icmp6.h>
48 
49 #include <inet/common.h>
50 #include <inet/mi.h>
51 #include <inet/nd.h>
52 #include <inet/arp.h>
53 #include <inet/ip.h>
54 #include <inet/ip6.h>
55 #include <inet/ip_if.h>
56 #include <inet/ip_ndp.h>
57 #include <inet/ip_multi.h>
58 #include <inet/ipclassifier.h>
59 #include <inet/ipsec_impl.h>
60 #include <inet/sctp_ip.h>
61 #include <inet/ip_listutils.h>
62 #include <inet/udp_impl.h>
63 
64 /* igmpv3/mldv2 source filter manipulation */
65 static void	ilm_bld_flists(conn_t *conn, void *arg);
66 static void	ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode,
67     slist_t *flist);
68 
69 static ilm_t	*ilm_add(ill_t *ill, const in6_addr_t *group,
70     ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist,
71     zoneid_t zoneid);
72 static void	ilm_delete(ilm_t *ilm);
73 static int	ilm_numentries(ill_t *, const in6_addr_t *);
74 
75 static ilm_t	*ip_addmulti_serial(const in6_addr_t *, ill_t *, zoneid_t,
76     ilg_stat_t, mcast_record_t, slist_t *, int *);
77 static ilm_t	*ip_addmulti_impl(const in6_addr_t *, ill_t *,
78     zoneid_t, ilg_stat_t, mcast_record_t, slist_t *, int *);
79 static int	ip_delmulti_serial(ilm_t *, boolean_t, boolean_t);
80 static int	ip_delmulti_impl(ilm_t *, boolean_t, boolean_t);
81 
82 static int	ip_ll_multireq(ill_t *ill, const in6_addr_t *group,
83     t_uscalar_t);
84 static ilg_t	*ilg_lookup(conn_t *, const in6_addr_t *, ipaddr_t ifaddr,
85     uint_t ifindex);
86 
87 static int	ilg_add(conn_t *connp, const in6_addr_t *group,
88     ipaddr_t ifaddr, uint_t ifindex, ill_t *ill, mcast_record_t fmode,
89     const in6_addr_t *v6src);
90 static void	ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src);
91 static mblk_t	*ill_create_dl(ill_t *ill, uint32_t dl_primitive,
92     uint32_t *addr_lenp, uint32_t *addr_offp);
93 static int	ip_opt_delete_group_excl(conn_t *connp,
94     const in6_addr_t *v6group, ipaddr_t ifaddr, uint_t ifindex,
95     mcast_record_t fmode, const in6_addr_t *v6src);
96 
97 static	ilm_t	*ilm_lookup(ill_t *, const in6_addr_t *, zoneid_t);
98 
99 static int	ip_msfilter_ill(conn_t *, mblk_t *, const ip_ioctl_cmd_t *,
100     ill_t **);
101 
102 static void	ilg_check_detach(conn_t *, ill_t *);
103 static void	ilg_check_reattach(conn_t *);
104 
105 /*
106  * MT notes:
107  *
108  * Multicast joins operate on both the ilg and ilm structures. Multiple
109  * threads operating on an conn (socket) trying to do multicast joins
110  * need to synchronize when operating on the ilg. Multiple threads
111  * potentially operating on different conn (socket endpoints) trying to
112  * do multicast joins could eventually end up trying to manipulate the
113  * ilm simulatenously and need to synchronize on the access to the ilm.
114  * The access and lookup of the ilm, as well as other ill multicast state,
115  * is under ill_mcast_lock.
116  * The modifications and lookup of ilg entries is serialized using conn_ilg_lock
117  * rwlock. An ilg will not be freed until ilg_refcnt drops to zero.
118  *
119  * In some cases we hold ill_mcast_lock and then acquire conn_ilg_lock, but
120  * never the other way around.
121  *
122  * An ilm is an IP data structure used to track multicast join/leave.
123  * An ilm is associated with a <multicast group, ipif> tuple in IPv4 and
124  * with just <multicast group> in IPv6. ilm_refcnt is the number of ilg's
125  * referencing the ilm.
126  * The modifications and lookup of ilm entries is serialized using the
127  * ill_mcast_lock rwlock; that lock handles all the igmp/mld modifications
128  * of the ilm state.
129  * ilms are created / destroyed only as writer. ilms
130  * are not passed around. The datapath (anything outside of this file
131  * and igmp.c) use functions that do not return ilms - just the number
132  * of members. So we don't need a dynamic refcount of the number
133  * of threads holding reference to an ilm.
134  *
135  * In the cases where we serially access the ilg and ilm, which happens when
136  * we handle the applications requests to join or leave groups and sources,
137  * we use the ill_mcast_serializer mutex to ensure that a multithreaded
138  * application which does concurrent joins and/or leaves on the same group on
139  * the same socket always results in a consistent order for the ilg and ilm
140  * modifications.
141  *
142  * When a multicast operation results in needing to send a message to
143  * the driver (to join/leave a L2 multicast address), we use ill_dlpi_queue()
144  * which serialized the DLPI requests. The IGMP/MLD code uses ill_mcast_queue()
145  * to send IGMP/MLD IP packet to avoid dropping the lock just to send a packet.
146  */
147 
148 #define	GETSTRUCT(structure, number)	\
149 	((structure *)mi_zalloc(sizeof (structure) * (number)))
150 
151 /*
152  * Caller must ensure that the ilg has not been condemned
153  * The condemned flag is only set in ilg_delete under conn_ilg_lock.
154  *
155  * The caller must hold conn_ilg_lock as writer.
156  */
157 static void
158 ilg_refhold(ilg_t *ilg)
159 {
160 	ASSERT(ilg->ilg_refcnt != 0);
161 	ASSERT(!ilg->ilg_condemned);
162 	ASSERT(RW_WRITE_HELD(&ilg->ilg_connp->conn_ilg_lock));
163 
164 	ilg->ilg_refcnt++;
165 }
166 
167 static void
168 ilg_inactive(ilg_t *ilg)
169 {
170 	ASSERT(ilg->ilg_ill == NULL);
171 	ASSERT(ilg->ilg_ilm == NULL);
172 	ASSERT(ilg->ilg_filter == NULL);
173 	ASSERT(ilg->ilg_condemned);
174 
175 	/* Unlink from list */
176 	*ilg->ilg_ptpn = ilg->ilg_next;
177 	if (ilg->ilg_next != NULL)
178 		ilg->ilg_next->ilg_ptpn = ilg->ilg_ptpn;
179 	ilg->ilg_next = NULL;
180 	ilg->ilg_ptpn = NULL;
181 
182 	ilg->ilg_connp = NULL;
183 	kmem_free(ilg, sizeof (*ilg));
184 }
185 
186 /*
187  * The caller must hold conn_ilg_lock as writer.
188  */
189 static void
190 ilg_refrele(ilg_t *ilg)
191 {
192 	ASSERT(RW_WRITE_HELD(&ilg->ilg_connp->conn_ilg_lock));
193 	ASSERT(ilg->ilg_refcnt != 0);
194 	if (--ilg->ilg_refcnt == 0)
195 		ilg_inactive(ilg);
196 }
197 
198 /*
199  * Acquire reference on ilg and drop reference on held_ilg.
200  * In the case when held_ilg is the same as ilg we already have
201  * a reference, but the held_ilg might be condemned. In that case
202  * we avoid the ilg_refhold/rele so that we can assert in ire_refhold
203  * that the ilg isn't condemned.
204  */
205 static void
206 ilg_transfer_hold(ilg_t *held_ilg, ilg_t *ilg)
207 {
208 	if (held_ilg == ilg)
209 		return;
210 
211 	ilg_refhold(ilg);
212 	if (held_ilg != NULL)
213 		ilg_refrele(held_ilg);
214 }
215 
216 /*
217  * Allocate a new ilg_t and links it into conn_ilg.
218  * Returns NULL on failure, in which case `*errp' will be
219  * filled in with the reason.
220  *
221  * Assumes connp->conn_ilg_lock is held.
222  */
223 static ilg_t *
224 conn_ilg_alloc(conn_t *connp, int *errp)
225 {
226 	ilg_t *ilg;
227 
228 	ASSERT(RW_WRITE_HELD(&connp->conn_ilg_lock));
229 
230 	/*
231 	 * If CONN_CLOSING is set, conn_ilg cleanup has begun and we must not
232 	 * create any ilgs.
233 	 */
234 	if (connp->conn_state_flags & CONN_CLOSING) {
235 		*errp = EINVAL;
236 		return (NULL);
237 	}
238 
239 	ilg = kmem_zalloc(sizeof (ilg_t), KM_NOSLEEP);
240 	if (ilg == NULL) {
241 		*errp = ENOMEM;
242 		return (NULL);
243 	}
244 
245 	ilg->ilg_refcnt = 1;
246 
247 	/* Insert at head */
248 	if (connp->conn_ilg != NULL)
249 		connp->conn_ilg->ilg_ptpn = &ilg->ilg_next;
250 	ilg->ilg_next = connp->conn_ilg;
251 	ilg->ilg_ptpn = &connp->conn_ilg;
252 	connp->conn_ilg = ilg;
253 
254 	ilg->ilg_connp = connp;
255 	return (ilg);
256 }
257 
258 typedef struct ilm_fbld_s {
259 	ilm_t		*fbld_ilm;
260 	int		fbld_in_cnt;
261 	int		fbld_ex_cnt;
262 	slist_t		fbld_in;
263 	slist_t		fbld_ex;
264 	boolean_t	fbld_in_overflow;
265 } ilm_fbld_t;
266 
267 /*
268  * Caller must hold ill_mcast_lock
269  */
270 static void
271 ilm_bld_flists(conn_t *connp, void *arg)
272 {
273 	ilg_t *ilg;
274 	ilm_fbld_t *fbld = (ilm_fbld_t *)(arg);
275 	ilm_t *ilm = fbld->fbld_ilm;
276 	in6_addr_t *v6group = &ilm->ilm_v6addr;
277 
278 	if (connp->conn_ilg == NULL)
279 		return;
280 
281 	/*
282 	 * Since we can't break out of the ipcl_walk once started, we still
283 	 * have to look at every conn.  But if we've already found one
284 	 * (EXCLUDE, NULL) list, there's no need to keep checking individual
285 	 * ilgs--that will be our state.
286 	 */
287 	if (fbld->fbld_ex_cnt > 0 && fbld->fbld_ex.sl_numsrc == 0)
288 		return;
289 
290 	/*
291 	 * Check this conn's ilgs to see if any are interested in our
292 	 * ilm (group, interface match).  If so, update the master
293 	 * include and exclude lists we're building in the fbld struct
294 	 * with this ilg's filter info.
295 	 *
296 	 * Note that the caller has already serialized on the ill we care
297 	 * about.
298 	 */
299 	ASSERT(MUTEX_HELD(&ilm->ilm_ill->ill_mcast_serializer));
300 
301 	rw_enter(&connp->conn_ilg_lock, RW_READER);
302 	for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) {
303 		if (ilg->ilg_condemned)
304 			continue;
305 
306 		/*
307 		 * Since we are under the ill_mcast_serializer we know
308 		 * that any ilg+ilm operations on this ilm have either
309 		 * not started or completed, except for the last ilg
310 		 * (the one that caused us to be called) which doesn't
311 		 * have ilg_ilm set yet. Hence we compare using ilg_ill
312 		 * and the address.
313 		 */
314 		if ((ilg->ilg_ill == ilm->ilm_ill) &&
315 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) {
316 			if (ilg->ilg_fmode == MODE_IS_INCLUDE) {
317 				fbld->fbld_in_cnt++;
318 				if (!fbld->fbld_in_overflow)
319 					l_union_in_a(&fbld->fbld_in,
320 					    ilg->ilg_filter,
321 					    &fbld->fbld_in_overflow);
322 			} else {
323 				fbld->fbld_ex_cnt++;
324 				/*
325 				 * On the first exclude list, don't try to do
326 				 * an intersection, as the master exclude list
327 				 * is intentionally empty.  If the master list
328 				 * is still empty on later iterations, that
329 				 * means we have at least one ilg with an empty
330 				 * exclude list, so that should be reflected
331 				 * when we take the intersection.
332 				 */
333 				if (fbld->fbld_ex_cnt == 1) {
334 					if (ilg->ilg_filter != NULL)
335 						l_copy(ilg->ilg_filter,
336 						    &fbld->fbld_ex);
337 				} else {
338 					l_intersection_in_a(&fbld->fbld_ex,
339 					    ilg->ilg_filter);
340 				}
341 			}
342 			/* there will only be one match, so break now. */
343 			break;
344 		}
345 	}
346 	rw_exit(&connp->conn_ilg_lock);
347 }
348 
349 /*
350  * Caller must hold ill_mcast_lock
351  */
352 static void
353 ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, slist_t *flist)
354 {
355 	ilm_fbld_t fbld;
356 	ip_stack_t *ipst = ilm->ilm_ipst;
357 
358 	fbld.fbld_ilm = ilm;
359 	fbld.fbld_in_cnt = fbld.fbld_ex_cnt = 0;
360 	fbld.fbld_in.sl_numsrc = fbld.fbld_ex.sl_numsrc = 0;
361 	fbld.fbld_in_overflow = B_FALSE;
362 
363 	/* first, construct our master include and exclude lists */
364 	ipcl_walk(ilm_bld_flists, (caddr_t)&fbld, ipst);
365 
366 	/* now use those master lists to generate the interface filter */
367 
368 	/* if include list overflowed, filter is (EXCLUDE, NULL) */
369 	if (fbld.fbld_in_overflow) {
370 		*fmode = MODE_IS_EXCLUDE;
371 		flist->sl_numsrc = 0;
372 		return;
373 	}
374 
375 	/* if nobody interested, interface filter is (INCLUDE, NULL) */
376 	if (fbld.fbld_in_cnt == 0 && fbld.fbld_ex_cnt == 0) {
377 		*fmode = MODE_IS_INCLUDE;
378 		flist->sl_numsrc = 0;
379 		return;
380 	}
381 
382 	/*
383 	 * If there are no exclude lists, then the interface filter
384 	 * is INCLUDE, with its filter list equal to fbld_in.  A single
385 	 * exclude list makes the interface filter EXCLUDE, with its
386 	 * filter list equal to (fbld_ex - fbld_in).
387 	 */
388 	if (fbld.fbld_ex_cnt == 0) {
389 		*fmode = MODE_IS_INCLUDE;
390 		l_copy(&fbld.fbld_in, flist);
391 	} else {
392 		*fmode = MODE_IS_EXCLUDE;
393 		l_difference(&fbld.fbld_ex, &fbld.fbld_in, flist);
394 	}
395 }
396 
397 /*
398  * Caller must hold ill_mcast_lock
399  */
400 static int
401 ilm_update_add(ilm_t *ilm, ilg_stat_t ilgstat, slist_t *ilg_flist)
402 {
403 	mcast_record_t fmode;
404 	slist_t *flist;
405 	boolean_t fdefault;
406 	char buf[INET6_ADDRSTRLEN];
407 	ill_t *ill = ilm->ilm_ill;
408 
409 	/*
410 	 * There are several cases where the ilm's filter state
411 	 * defaults to (EXCLUDE, NULL):
412 	 *	- we've had previous joins without associated ilgs
413 	 *	- this join has no associated ilg
414 	 *	- the ilg's filter state is (EXCLUDE, NULL)
415 	 */
416 	fdefault = (ilm->ilm_no_ilg_cnt > 0) ||
417 	    (ilgstat == ILGSTAT_NONE) || SLIST_IS_EMPTY(ilg_flist);
418 
419 	/* attempt mallocs (if needed) before doing anything else */
420 	if ((flist = l_alloc()) == NULL)
421 		return (ENOMEM);
422 	if (!fdefault && ilm->ilm_filter == NULL) {
423 		ilm->ilm_filter = l_alloc();
424 		if (ilm->ilm_filter == NULL) {
425 			l_free(flist);
426 			return (ENOMEM);
427 		}
428 	}
429 
430 	if (ilgstat != ILGSTAT_CHANGE)
431 		ilm->ilm_refcnt++;
432 
433 	if (ilgstat == ILGSTAT_NONE)
434 		ilm->ilm_no_ilg_cnt++;
435 
436 	/*
437 	 * Determine new filter state.  If it's not the default
438 	 * (EXCLUDE, NULL), we must walk the conn list to find
439 	 * any ilgs interested in this group, and re-build the
440 	 * ilm filter.
441 	 */
442 	if (fdefault) {
443 		fmode = MODE_IS_EXCLUDE;
444 		flist->sl_numsrc = 0;
445 	} else {
446 		ilm_gen_filter(ilm, &fmode, flist);
447 	}
448 
449 	/* make sure state actually changed; nothing to do if not. */
450 	if ((ilm->ilm_fmode == fmode) &&
451 	    !lists_are_different(ilm->ilm_filter, flist)) {
452 		l_free(flist);
453 		return (0);
454 	}
455 
456 	/* send the state change report */
457 	if (!IS_LOOPBACK(ill)) {
458 		if (ill->ill_isv6)
459 			mld_statechange(ilm, fmode, flist);
460 		else
461 			igmp_statechange(ilm, fmode, flist);
462 	}
463 
464 	/* update the ilm state */
465 	ilm->ilm_fmode = fmode;
466 	if (flist->sl_numsrc > 0)
467 		l_copy(flist, ilm->ilm_filter);
468 	else
469 		CLEAR_SLIST(ilm->ilm_filter);
470 
471 	ip1dbg(("ilm_update: new if filter mode %d, group %s\n", ilm->ilm_fmode,
472 	    inet_ntop(AF_INET6, &ilm->ilm_v6addr, buf, sizeof (buf))));
473 
474 	l_free(flist);
475 	return (0);
476 }
477 
478 /*
479  * Caller must hold ill_mcast_lock
480  */
481 static int
482 ilm_update_del(ilm_t *ilm)
483 {
484 	mcast_record_t fmode;
485 	slist_t *flist;
486 	ill_t *ill = ilm->ilm_ill;
487 
488 	ip1dbg(("ilm_update_del: still %d left; updating state\n",
489 	    ilm->ilm_refcnt));
490 
491 	if ((flist = l_alloc()) == NULL)
492 		return (ENOMEM);
493 
494 	/*
495 	 * If present, the ilg in question has already either been
496 	 * updated or removed from our list; so all we need to do
497 	 * now is walk the list to update the ilm filter state.
498 	 *
499 	 * Skip the list walk if we have any no-ilg joins, which
500 	 * cause the filter state to revert to (EXCLUDE, NULL).
501 	 */
502 	if (ilm->ilm_no_ilg_cnt != 0) {
503 		fmode = MODE_IS_EXCLUDE;
504 		flist->sl_numsrc = 0;
505 	} else {
506 		ilm_gen_filter(ilm, &fmode, flist);
507 	}
508 
509 	/* check to see if state needs to be updated */
510 	if ((ilm->ilm_fmode == fmode) &&
511 	    (!lists_are_different(ilm->ilm_filter, flist))) {
512 		l_free(flist);
513 		return (0);
514 	}
515 
516 	if (!IS_LOOPBACK(ill)) {
517 		if (ill->ill_isv6)
518 			mld_statechange(ilm, fmode, flist);
519 		else
520 			igmp_statechange(ilm, fmode, flist);
521 	}
522 
523 	ilm->ilm_fmode = fmode;
524 	if (flist->sl_numsrc > 0) {
525 		if (ilm->ilm_filter == NULL) {
526 			ilm->ilm_filter = l_alloc();
527 			if (ilm->ilm_filter == NULL) {
528 				char buf[INET6_ADDRSTRLEN];
529 				ip1dbg(("ilm_update_del: failed to alloc ilm "
530 				    "filter; no source filtering for %s on %s",
531 				    inet_ntop(AF_INET6, &ilm->ilm_v6addr,
532 				    buf, sizeof (buf)), ill->ill_name));
533 				ilm->ilm_fmode = MODE_IS_EXCLUDE;
534 				l_free(flist);
535 				return (0);
536 			}
537 		}
538 		l_copy(flist, ilm->ilm_filter);
539 	} else {
540 		CLEAR_SLIST(ilm->ilm_filter);
541 	}
542 
543 	l_free(flist);
544 	return (0);
545 }
546 
547 /*
548  * Create/update the ilm for the group/ill. Used by other parts of IP to
549  * do the ILGSTAT_NONE (no ilg), MODE_IS_EXCLUDE, with no slist join.
550  * Returns with a refhold on the ilm.
551  *
552  * The unspecified address means all multicast addresses for in both the
553  * case of IPv4 and IPv6.
554  *
555  * The caller should have already mapped an IPMP under ill to the upper.
556  */
557 ilm_t *
558 ip_addmulti(const in6_addr_t *v6group, ill_t *ill, zoneid_t zoneid,
559     int *errorp)
560 {
561 	ilm_t *ilm;
562 
563 	/* Acquire serializer to keep assert in ilm_bld_flists happy */
564 	mutex_enter(&ill->ill_mcast_serializer);
565 	ilm = ip_addmulti_serial(v6group, ill, zoneid, ILGSTAT_NONE,
566 	    MODE_IS_EXCLUDE, NULL, errorp);
567 	mutex_exit(&ill->ill_mcast_serializer);
568 	return (ilm);
569 }
570 
571 /*
572  * Create/update the ilm for the group/ill. If ILGSTAT_CHANGE is not set
573  * then this returns with a refhold on the ilm.
574  *
575  * Internal routine which assumes the caller has already acquired
576  * ill_multi_serializer.
577  *
578  * The unspecified address means all multicast addresses for in both the
579  * case of IPv4 and IPv6.
580  *
581  * ilgstat tells us if there's an ilg associated with this join,
582  * and if so, if it's a new ilg or a change to an existing one.
583  * ilg_fmode and ilg_flist give us the current filter state of
584  * the ilg (and will be EXCLUDE {NULL} in the case of no ilg).
585  *
586  * The caller should have already mapped an IPMP under ill to the upper.
587  */
588 static ilm_t *
589 ip_addmulti_serial(const in6_addr_t *v6group, ill_t *ill, zoneid_t zoneid,
590     ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist,
591     int *errorp)
592 {
593 	ilm_t *ilm;
594 
595 	ASSERT(MUTEX_HELD(&ill->ill_mcast_serializer));
596 
597 	if (ill->ill_isv6) {
598 		if (!IN6_IS_ADDR_MULTICAST(v6group) &&
599 		    !IN6_IS_ADDR_UNSPECIFIED(v6group)) {
600 			*errorp = EINVAL;
601 			return (NULL);
602 		}
603 	} else {
604 		if (IN6_IS_ADDR_V4MAPPED(v6group)) {
605 			ipaddr_t v4group;
606 
607 			IN6_V4MAPPED_TO_IPADDR(v6group, v4group);
608 			if (!CLASSD(v4group)) {
609 				*errorp = EINVAL;
610 				return (NULL);
611 			}
612 		} else if (!IN6_IS_ADDR_UNSPECIFIED(v6group)) {
613 			*errorp = EINVAL;
614 			return (NULL);
615 		}
616 	}
617 
618 	if (IS_UNDER_IPMP(ill)) {
619 		*errorp = EINVAL;
620 		return (NULL);
621 	}
622 
623 	rw_enter(&ill->ill_mcast_lock, RW_WRITER);
624 	/*
625 	 * We do the equivalent of a lookup by checking after we get the lock
626 	 * This is needed since the ill could have been condemned after
627 	 * we looked it up, and we need to check condemned after we hold
628 	 * ill_mcast_lock to synchronize with the unplumb code.
629 	 */
630 	if (ill->ill_state_flags & ILL_CONDEMNED) {
631 		rw_exit(&ill->ill_mcast_lock);
632 		*errorp = ENXIO;
633 		return (NULL);
634 	}
635 	ilm = ip_addmulti_impl(v6group, ill, zoneid, ilgstat, ilg_fmode,
636 	    ilg_flist, errorp);
637 	rw_exit(&ill->ill_mcast_lock);
638 
639 	/* Send any deferred/queued DLPI or IP packets */
640 	ill_mcast_send_queued(ill);
641 	ill_dlpi_send_queued(ill);
642 	ill_mcast_timer_start(ill->ill_ipst);
643 	return (ilm);
644 }
645 
646 static ilm_t *
647 ip_addmulti_impl(const in6_addr_t *v6group, ill_t *ill, zoneid_t zoneid,
648     ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist,
649     int *errorp)
650 {
651 	ilm_t	*ilm;
652 	int	ret = 0;
653 
654 	ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
655 	*errorp = 0;
656 
657 	/*
658 	 * An ilm is uniquely identified by the tuple of (group, ill) where
659 	 * `group' is the multicast group address, and `ill' is the interface
660 	 * on which it is currently joined.
661 	 */
662 
663 	ilm = ilm_lookup(ill, v6group, zoneid);
664 	if (ilm != NULL) {
665 		/* ilm_update_add bumps ilm_refcnt unless ILGSTAT_CHANGE */
666 		ret = ilm_update_add(ilm, ilgstat, ilg_flist);
667 		if (ret == 0)
668 			return (ilm);
669 
670 		*errorp = ret;
671 		return (NULL);
672 	}
673 
674 	/*
675 	 * The callers checks on the ilg and the ilg+ilm consistency under
676 	 * ill_mcast_serializer ensures that we can not have ILGSTAT_CHANGE
677 	 * and no ilm.
678 	 */
679 	ASSERT(ilgstat != ILGSTAT_CHANGE);
680 	ilm = ilm_add(ill, v6group, ilgstat, ilg_fmode, ilg_flist, zoneid);
681 	if (ilm == NULL) {
682 		*errorp = ENOMEM;
683 		return (NULL);
684 	}
685 
686 	if (IN6_IS_ADDR_UNSPECIFIED(v6group)) {
687 		/*
688 		 * If we have more then one we should not tell the driver
689 		 * to join this time.
690 		 */
691 		if (ilm_numentries(ill, v6group) == 1) {
692 			ret = ill_join_allmulti(ill);
693 		}
694 	} else {
695 		if (!IS_LOOPBACK(ill)) {
696 			if (ill->ill_isv6)
697 				mld_joingroup(ilm);
698 			else
699 				igmp_joingroup(ilm);
700 		}
701 
702 		/*
703 		 * If we have more then one we should not tell the driver
704 		 * to join this time.
705 		 */
706 		if (ilm_numentries(ill, v6group) == 1) {
707 			ret = ip_ll_multireq(ill, v6group, DL_ENABMULTI_REQ);
708 		}
709 	}
710 	if (ret != 0) {
711 		if (ret == ENETDOWN) {
712 			char buf[INET6_ADDRSTRLEN];
713 
714 			ip0dbg(("ip_addmulti: ENETDOWN for %s on %s",
715 			    inet_ntop(AF_INET6, &ilm->ilm_v6addr,
716 			    buf, sizeof (buf)), ill->ill_name));
717 		}
718 		ilm_delete(ilm);
719 		*errorp = ret;
720 		return (NULL);
721 	} else {
722 		return (ilm);
723 	}
724 }
725 
726 /*
727  * Send a multicast request to the driver for enabling or disabling
728  * multicast reception for v6groupp address. The caller has already
729  * checked whether it is appropriate to send one or not.
730  *
731  * For IPMP we switch to the cast_ill since it has the right hardware
732  * information.
733  */
734 static int
735 ip_ll_send_multireq(ill_t *ill, const in6_addr_t *v6groupp, t_uscalar_t prim)
736 {
737 	mblk_t	*mp;
738 	uint32_t addrlen, addroff;
739 	ill_t *release_ill = NULL;
740 	int err = 0;
741 
742 	ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock));
743 
744 	if (IS_IPMP(ill)) {
745 		/* On the upper IPMP ill. */
746 		release_ill = ipmp_illgrp_hold_cast_ill(ill->ill_grp);
747 		if (release_ill == NULL) {
748 			/*
749 			 * Avoid sending it down to the ipmpstub.
750 			 * We will be called again once the members of the
751 			 * group are in place
752 			 */
753 			ip1dbg(("ip_ll_send_multireq: no cast_ill for %s %d\n",
754 			    ill->ill_name, ill->ill_isv6));
755 			return (0);
756 		}
757 		ill = release_ill;
758 	}
759 	/* Create a DL_ENABMULTI_REQ or DL_DISABMULTI_REQ message. */
760 	mp = ill_create_dl(ill, prim, &addrlen, &addroff);
761 	if (mp == NULL) {
762 		err = ENOMEM;
763 		goto done;
764 	}
765 
766 	mp = ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp);
767 	if (mp == NULL) {
768 		ip0dbg(("null from ndp_mcastreq(ill %s)\n", ill->ill_name));
769 		err = ENOMEM;
770 		goto done;
771 	}
772 
773 	switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) {
774 	case DL_ENABMULTI_REQ:
775 		mutex_enter(&ill->ill_lock);
776 		/* Track the state if this is the first enabmulti */
777 		if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN)
778 			ill->ill_dlpi_multicast_state = IDS_INPROGRESS;
779 		mutex_exit(&ill->ill_lock);
780 		break;
781 	}
782 	ill_dlpi_queue(ill, mp);
783 done:
784 	if (release_ill != NULL)
785 		ill_refrele(release_ill);
786 	return (err);
787 }
788 
789 /*
790  * Send a multicast request to the driver for enabling multicast
791  * membership for v6group if appropriate.
792  */
793 static int
794 ip_ll_multireq(ill_t *ill, const in6_addr_t *v6groupp, t_uscalar_t prim)
795 {
796 	if (ill->ill_net_type != IRE_IF_RESOLVER ||
797 	    ill->ill_ipif->ipif_flags & IPIF_POINTOPOINT) {
798 		ip1dbg(("ip_ll_multireq: not resolver\n"));
799 		return (0);	/* Must be IRE_IF_NORESOLVER */
800 	}
801 
802 	if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) {
803 		ip1dbg(("ip_ll_multireq: MULTI_BCAST\n"));
804 		return (0);
805 	}
806 	return (ip_ll_send_multireq(ill, v6groupp, prim));
807 }
808 
809 /*
810  * Delete the ilm. Used by other parts of IP for the case of no_ilg/leaving
811  * being true.
812  */
813 int
814 ip_delmulti(ilm_t *ilm)
815 {
816 	ill_t *ill = ilm->ilm_ill;
817 	int error;
818 
819 	/* Acquire serializer to keep assert in ilm_bld_flists happy */
820 	mutex_enter(&ill->ill_mcast_serializer);
821 	error = ip_delmulti_serial(ilm, B_TRUE, B_TRUE);
822 	mutex_exit(&ill->ill_mcast_serializer);
823 	return (error);
824 }
825 
826 
827 /*
828  * Delete the ilm.
829  * Assumes ill_multi_serializer is held by the caller.
830  */
831 static int
832 ip_delmulti_serial(ilm_t *ilm, boolean_t no_ilg, boolean_t leaving)
833 {
834 	ill_t *ill = ilm->ilm_ill;
835 	int ret;
836 
837 	ASSERT(MUTEX_HELD(&ill->ill_mcast_serializer));
838 	ASSERT(!(IS_UNDER_IPMP(ill)));
839 
840 	rw_enter(&ill->ill_mcast_lock, RW_WRITER);
841 	ret = ip_delmulti_impl(ilm, no_ilg, leaving);
842 	rw_exit(&ill->ill_mcast_lock);
843 	/* Send any deferred/queued DLPI or IP packets */
844 	ill_mcast_send_queued(ill);
845 	ill_dlpi_send_queued(ill);
846 	ill_mcast_timer_start(ill->ill_ipst);
847 
848 	return (ret);
849 }
850 
851 static int
852 ip_delmulti_impl(ilm_t *ilm, boolean_t no_ilg, boolean_t leaving)
853 {
854 	ill_t *ill = ilm->ilm_ill;
855 	int error;
856 	in6_addr_t v6group;
857 
858 	ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
859 
860 	/* Update counters */
861 	if (no_ilg)
862 		ilm->ilm_no_ilg_cnt--;
863 
864 	if (leaving)
865 		ilm->ilm_refcnt--;
866 
867 	if (ilm->ilm_refcnt > 0)
868 		return (ilm_update_del(ilm));
869 
870 	v6group = ilm->ilm_v6addr;
871 
872 	if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
873 		ilm_delete(ilm);
874 		/*
875 		 * If we have some left then one we should not tell the driver
876 		 * to leave.
877 		 */
878 		if (ilm_numentries(ill, &v6group) != 0)
879 			return (0);
880 
881 		ill_leave_allmulti(ill);
882 
883 		return (0);
884 	}
885 
886 	if (!IS_LOOPBACK(ill)) {
887 		if (ill->ill_isv6)
888 			mld_leavegroup(ilm);
889 		else
890 			igmp_leavegroup(ilm);
891 	}
892 
893 	ilm_delete(ilm);
894 	/*
895 	 * If we have some left then one we should not tell the driver
896 	 * to leave.
897 	 */
898 	if (ilm_numentries(ill, &v6group) != 0)
899 		return (0);
900 
901 	error = ip_ll_multireq(ill, &v6group, DL_DISABMULTI_REQ);
902 	/* We ignore the case when ill_dl_up is not set */
903 	if (error == ENETDOWN) {
904 		char buf[INET6_ADDRSTRLEN];
905 
906 		ip0dbg(("ip_delmulti: ENETDOWN for %s on %s",
907 		    inet_ntop(AF_INET6, &v6group, buf, sizeof (buf)),
908 		    ill->ill_name));
909 	}
910 	return (error);
911 }
912 
913 /*
914  * Make the driver pass up all multicast packets.
915  */
916 int
917 ill_join_allmulti(ill_t *ill)
918 {
919 	mblk_t		*promiscon_mp, *promiscoff_mp = NULL;
920 	uint32_t	addrlen, addroff;
921 	ill_t		*release_ill = NULL;
922 
923 	ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
924 
925 	if (IS_LOOPBACK(ill))
926 		return (0);
927 
928 	if (!ill->ill_dl_up) {
929 		/*
930 		 * Nobody there. All multicast addresses will be re-joined
931 		 * when we get the DL_BIND_ACK bringing the interface up.
932 		 */
933 		return (ENETDOWN);
934 	}
935 
936 	if (IS_IPMP(ill)) {
937 		/* On the upper IPMP ill. */
938 		release_ill = ipmp_illgrp_hold_cast_ill(ill->ill_grp);
939 		if (release_ill == NULL) {
940 			/*
941 			 * Avoid sending it down to the ipmpstub.
942 			 * We will be called again once the members of the
943 			 * group are in place
944 			 */
945 			ip1dbg(("ill_join_allmulti: no cast_ill for %s %d\n",
946 			    ill->ill_name, ill->ill_isv6));
947 			return (0);
948 		}
949 		ill = release_ill;
950 		if (!ill->ill_dl_up) {
951 			ill_refrele(ill);
952 			return (ENETDOWN);
953 		}
954 	}
955 
956 	/*
957 	 * Create a DL_PROMISCON_REQ message and send it directly to the DLPI
958 	 * provider.  We don't need to do this for certain media types for
959 	 * which we never need to turn promiscuous mode on.  While we're here,
960 	 * pre-allocate a DL_PROMISCOFF_REQ message to make sure that
961 	 * ill_leave_allmulti() will not fail due to low memory conditions.
962 	 */
963 	if ((ill->ill_net_type == IRE_IF_RESOLVER) &&
964 	    !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) {
965 		promiscon_mp = ill_create_dl(ill, DL_PROMISCON_REQ,
966 		    &addrlen, &addroff);
967 		if (ill->ill_promiscoff_mp == NULL)
968 			promiscoff_mp = ill_create_dl(ill, DL_PROMISCOFF_REQ,
969 			    &addrlen, &addroff);
970 		if (promiscon_mp == NULL ||
971 		    (ill->ill_promiscoff_mp == NULL && promiscoff_mp == NULL)) {
972 			freemsg(promiscon_mp);
973 			freemsg(promiscoff_mp);
974 			if (release_ill != NULL)
975 				ill_refrele(release_ill);
976 			return (ENOMEM);
977 		}
978 		if (ill->ill_promiscoff_mp == NULL)
979 			ill->ill_promiscoff_mp = promiscoff_mp;
980 		ill_dlpi_queue(ill, promiscon_mp);
981 	}
982 	if (release_ill != NULL)
983 		ill_refrele(release_ill);
984 	return (0);
985 }
986 
987 /*
988  * Make the driver stop passing up all multicast packets
989  */
990 void
991 ill_leave_allmulti(ill_t *ill)
992 {
993 	mblk_t	*promiscoff_mp;
994 	ill_t	*release_ill = NULL;
995 
996 	ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
997 
998 	if (IS_LOOPBACK(ill))
999 		return;
1000 
1001 	if (!ill->ill_dl_up) {
1002 		/*
1003 		 * Nobody there. All multicast addresses will be re-joined
1004 		 * when we get the DL_BIND_ACK bringing the interface up.
1005 		 */
1006 		return;
1007 	}
1008 
1009 	if (IS_IPMP(ill)) {
1010 		/* On the upper IPMP ill. */
1011 		release_ill = ipmp_illgrp_hold_cast_ill(ill->ill_grp);
1012 		if (release_ill == NULL) {
1013 			/*
1014 			 * Avoid sending it down to the ipmpstub.
1015 			 * We will be called again once the members of the
1016 			 * group are in place
1017 			 */
1018 			ip1dbg(("ill_leave_allmulti: no cast_ill on %s %d\n",
1019 			    ill->ill_name, ill->ill_isv6));
1020 			return;
1021 		}
1022 		ill = release_ill;
1023 		if (!ill->ill_dl_up)
1024 			goto done;
1025 	}
1026 
1027 	/*
1028 	 * In the case of IPMP and ill_dl_up not being set when we joined
1029 	 * we didn't allocate a promiscoff_mp. In that case we have
1030 	 * nothing to do when we leave.
1031 	 * Ditto for PHYI_MULTI_BCAST
1032 	 */
1033 	promiscoff_mp = ill->ill_promiscoff_mp;
1034 	if (promiscoff_mp != NULL) {
1035 		ill->ill_promiscoff_mp = NULL;
1036 		ill_dlpi_queue(ill, promiscoff_mp);
1037 	}
1038 done:
1039 	if (release_ill != NULL)
1040 		ill_refrele(release_ill);
1041 }
1042 
1043 int
1044 ip_join_allmulti(uint_t ifindex, boolean_t isv6, ip_stack_t *ipst)
1045 {
1046 	ill_t		*ill;
1047 	int		ret;
1048 	ilm_t		*ilm;
1049 
1050 	ill = ill_lookup_on_ifindex(ifindex, isv6, ipst);
1051 	if (ill == NULL)
1052 		return (ENODEV);
1053 
1054 	/*
1055 	 * The ip_addmulti() function doesn't allow IPMP underlying interfaces
1056 	 * to join allmulti since only the nominated underlying interface in
1057 	 * the group should receive multicast.  We silently succeed to avoid
1058 	 * having to teach IPobs (currently the only caller of this routine)
1059 	 * to ignore failures in this case.
1060 	 */
1061 	if (IS_UNDER_IPMP(ill)) {
1062 		ill_refrele(ill);
1063 		return (0);
1064 	}
1065 	mutex_enter(&ill->ill_lock);
1066 	if (ill->ill_ipallmulti_cnt > 0) {
1067 		/* Already joined */
1068 		ASSERT(ill->ill_ipallmulti_ilm != NULL);
1069 		ill->ill_ipallmulti_cnt++;
1070 		mutex_exit(&ill->ill_lock);
1071 		goto done;
1072 	}
1073 	mutex_exit(&ill->ill_lock);
1074 
1075 	ilm = ip_addmulti(&ipv6_all_zeros, ill, ill->ill_zoneid, &ret);
1076 	if (ilm == NULL) {
1077 		ASSERT(ret != 0);
1078 		ill_refrele(ill);
1079 		return (ret);
1080 	}
1081 
1082 	mutex_enter(&ill->ill_lock);
1083 	if (ill->ill_ipallmulti_cnt > 0) {
1084 		/* Another thread added it concurrently */
1085 		(void) ip_delmulti(ilm);
1086 		mutex_exit(&ill->ill_lock);
1087 		goto done;
1088 	}
1089 	ASSERT(ill->ill_ipallmulti_ilm == NULL);
1090 	ill->ill_ipallmulti_ilm = ilm;
1091 	ill->ill_ipallmulti_cnt++;
1092 	mutex_exit(&ill->ill_lock);
1093 done:
1094 	ill_refrele(ill);
1095 	return (0);
1096 }
1097 
1098 int
1099 ip_leave_allmulti(uint_t ifindex, boolean_t isv6, ip_stack_t *ipst)
1100 {
1101 	ill_t		*ill;
1102 	ilm_t		*ilm;
1103 
1104 	ill = ill_lookup_on_ifindex(ifindex, isv6, ipst);
1105 	if (ill == NULL)
1106 		return (ENODEV);
1107 
1108 	if (IS_UNDER_IPMP(ill)) {
1109 		ill_refrele(ill);
1110 		return (0);
1111 	}
1112 
1113 	mutex_enter(&ill->ill_lock);
1114 	if (ill->ill_ipallmulti_cnt == 0) {
1115 		/* ip_purge_allmulti could have removed them all */
1116 		mutex_exit(&ill->ill_lock);
1117 		goto done;
1118 	}
1119 	ill->ill_ipallmulti_cnt--;
1120 	if (ill->ill_ipallmulti_cnt == 0) {
1121 		/* Last one */
1122 		ilm = ill->ill_ipallmulti_ilm;
1123 		ill->ill_ipallmulti_ilm = NULL;
1124 	} else {
1125 		ilm = NULL;
1126 	}
1127 	mutex_exit(&ill->ill_lock);
1128 	if (ilm != NULL)
1129 		(void) ip_delmulti(ilm);
1130 
1131 done:
1132 	ill_refrele(ill);
1133 	return (0);
1134 }
1135 
1136 /*
1137  * Delete the allmulti memberships that were added as part of
1138  * ip_join_allmulti().
1139  */
1140 void
1141 ip_purge_allmulti(ill_t *ill)
1142 {
1143 	ilm_t	*ilm;
1144 
1145 	ASSERT(IAM_WRITER_ILL(ill));
1146 
1147 	mutex_enter(&ill->ill_lock);
1148 	ilm = ill->ill_ipallmulti_ilm;
1149 	ill->ill_ipallmulti_ilm = NULL;
1150 	ill->ill_ipallmulti_cnt = 0;
1151 	mutex_exit(&ill->ill_lock);
1152 
1153 	if (ilm != NULL)
1154 		(void) ip_delmulti(ilm);
1155 }
1156 
1157 /*
1158  * Create a dlpi message with room for phys+sap. Later
1159  * we will strip the sap for those primitives which
1160  * only need a physical address.
1161  */
1162 static mblk_t *
1163 ill_create_dl(ill_t *ill, uint32_t dl_primitive,
1164     uint32_t *addr_lenp, uint32_t *addr_offp)
1165 {
1166 	mblk_t	*mp;
1167 	uint32_t	hw_addr_length;
1168 	char		*cp;
1169 	uint32_t	offset;
1170 	uint32_t	length;
1171 	uint32_t 	size;
1172 
1173 	*addr_lenp = *addr_offp = 0;
1174 
1175 	hw_addr_length = ill->ill_phys_addr_length;
1176 	if (!hw_addr_length) {
1177 		ip0dbg(("ip_create_dl: hw addr length = 0\n"));
1178 		return (NULL);
1179 	}
1180 
1181 	switch (dl_primitive) {
1182 	case DL_ENABMULTI_REQ:
1183 		length = sizeof (dl_enabmulti_req_t);
1184 		size = length + hw_addr_length;
1185 		break;
1186 	case DL_DISABMULTI_REQ:
1187 		length = sizeof (dl_disabmulti_req_t);
1188 		size = length + hw_addr_length;
1189 		break;
1190 	case DL_PROMISCON_REQ:
1191 	case DL_PROMISCOFF_REQ:
1192 		size = length = sizeof (dl_promiscon_req_t);
1193 		break;
1194 	default:
1195 		return (NULL);
1196 	}
1197 	mp = allocb(size, BPRI_HI);
1198 	if (!mp)
1199 		return (NULL);
1200 	mp->b_wptr += size;
1201 	mp->b_datap->db_type = M_PROTO;
1202 
1203 	cp = (char *)mp->b_rptr;
1204 	offset = length;
1205 
1206 	switch (dl_primitive) {
1207 	case DL_ENABMULTI_REQ: {
1208 		dl_enabmulti_req_t *dl = (dl_enabmulti_req_t *)cp;
1209 
1210 		dl->dl_primitive = dl_primitive;
1211 		dl->dl_addr_offset = offset;
1212 		*addr_lenp = dl->dl_addr_length = hw_addr_length;
1213 		*addr_offp = offset;
1214 		break;
1215 	}
1216 	case DL_DISABMULTI_REQ: {
1217 		dl_disabmulti_req_t *dl = (dl_disabmulti_req_t *)cp;
1218 
1219 		dl->dl_primitive = dl_primitive;
1220 		dl->dl_addr_offset = offset;
1221 		*addr_lenp = dl->dl_addr_length = hw_addr_length;
1222 		*addr_offp = offset;
1223 		break;
1224 	}
1225 	case DL_PROMISCON_REQ:
1226 	case DL_PROMISCOFF_REQ: {
1227 		dl_promiscon_req_t *dl = (dl_promiscon_req_t *)cp;
1228 
1229 		dl->dl_primitive = dl_primitive;
1230 		dl->dl_level = DL_PROMISC_MULTI;
1231 		break;
1232 	}
1233 	}
1234 	ip1dbg(("ill_create_dl: addr_len %d, addr_off %d\n",
1235 	    *addr_lenp, *addr_offp));
1236 	return (mp);
1237 }
1238 
1239 /*
1240  * Rejoin any groups for which we have ilms.
1241  *
1242  * This is only needed for IPMP when the cast_ill changes since that
1243  * change is invisible to the ilm. Other interface changes are handled
1244  * by conn_update_ill.
1245  */
1246 void
1247 ill_recover_multicast(ill_t *ill)
1248 {
1249 	ilm_t	*ilm;
1250 	char    addrbuf[INET6_ADDRSTRLEN];
1251 
1252 	ill->ill_need_recover_multicast = 0;
1253 
1254 	rw_enter(&ill->ill_mcast_lock, RW_WRITER);
1255 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1256 		/*
1257 		 * If we have more then one ilm for the group (e.g., with
1258 		 * different zoneid) then we should not tell the driver
1259 		 * to join unless this is the first ilm for the group.
1260 		 */
1261 		if (ilm_numentries(ill, &ilm->ilm_v6addr) > 1 &&
1262 		    ilm_lookup(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm) {
1263 			continue;
1264 		}
1265 
1266 		ip1dbg(("ill_recover_multicast: %s\n", inet_ntop(AF_INET6,
1267 		    &ilm->ilm_v6addr, addrbuf, sizeof (addrbuf))));
1268 
1269 		if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
1270 			(void) ill_join_allmulti(ill);
1271 		} else {
1272 			if (ill->ill_isv6)
1273 				mld_joingroup(ilm);
1274 			else
1275 				igmp_joingroup(ilm);
1276 
1277 			(void) ip_ll_multireq(ill, &ilm->ilm_v6addr,
1278 			    DL_ENABMULTI_REQ);
1279 		}
1280 	}
1281 	rw_exit(&ill->ill_mcast_lock);
1282 	/* Send any deferred/queued DLPI or IP packets */
1283 	ill_mcast_send_queued(ill);
1284 	ill_dlpi_send_queued(ill);
1285 	ill_mcast_timer_start(ill->ill_ipst);
1286 }
1287 
1288 /*
1289  * The opposite of ill_recover_multicast() -- leaves all multicast groups
1290  * that were explicitly joined.
1291  *
1292  * This is only needed for IPMP when the cast_ill changes since that
1293  * change is invisible to the ilm. Other interface changes are handled
1294  * by conn_update_ill.
1295  */
1296 void
1297 ill_leave_multicast(ill_t *ill)
1298 {
1299 	ilm_t	*ilm;
1300 	char    addrbuf[INET6_ADDRSTRLEN];
1301 
1302 	ill->ill_need_recover_multicast = 1;
1303 
1304 	rw_enter(&ill->ill_mcast_lock, RW_WRITER);
1305 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1306 		/*
1307 		 * If we have more then one ilm for the group (e.g., with
1308 		 * different zoneid) then we should not tell the driver
1309 		 * to leave unless this is the first ilm for the group.
1310 		 */
1311 		if (ilm_numentries(ill, &ilm->ilm_v6addr) > 1 &&
1312 		    ilm_lookup(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm) {
1313 			continue;
1314 		}
1315 
1316 		ip1dbg(("ill_leave_multicast: %s\n", inet_ntop(AF_INET6,
1317 		    &ilm->ilm_v6addr, addrbuf, sizeof (addrbuf))));
1318 
1319 		if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
1320 			ill_leave_allmulti(ill);
1321 		} else {
1322 			if (ill->ill_isv6)
1323 				mld_leavegroup(ilm);
1324 			else
1325 				igmp_leavegroup(ilm);
1326 
1327 			(void) ip_ll_multireq(ill, &ilm->ilm_v6addr,
1328 			    DL_DISABMULTI_REQ);
1329 		}
1330 	}
1331 	rw_exit(&ill->ill_mcast_lock);
1332 	/* Send any deferred/queued DLPI or IP packets */
1333 	ill_mcast_send_queued(ill);
1334 	ill_dlpi_send_queued(ill);
1335 	ill_mcast_timer_start(ill->ill_ipst);
1336 }
1337 
1338 /*
1339  * Interface used by IP input/output.
1340  * Returns true if there is a member on the ill for any zoneid.
1341  */
1342 boolean_t
1343 ill_hasmembers_v6(ill_t *ill, const in6_addr_t *v6group)
1344 {
1345 	ilm_t		*ilm;
1346 
1347 	rw_enter(&ill->ill_mcast_lock, RW_READER);
1348 	ilm = ilm_lookup(ill, v6group, ALL_ZONES);
1349 	rw_exit(&ill->ill_mcast_lock);
1350 	return (ilm != NULL);
1351 }
1352 
1353 /*
1354  * Interface used by IP input/output.
1355  * Returns true if there is a member on the ill for any zoneid.
1356  *
1357  * The group and source can't be INADDR_ANY here so no need to translate to
1358  * the unspecified IPv6 address.
1359  */
1360 boolean_t
1361 ill_hasmembers_v4(ill_t *ill, ipaddr_t group)
1362 {
1363 	in6_addr_t	v6group;
1364 
1365 	IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1366 	return (ill_hasmembers_v6(ill, &v6group));
1367 }
1368 
1369 /*
1370  * Interface used by IP input/output.
1371  * Returns true if there is a member on the ill for any zoneid except skipzone.
1372  */
1373 boolean_t
1374 ill_hasmembers_otherzones_v6(ill_t *ill, const in6_addr_t *v6group,
1375     zoneid_t skipzone)
1376 {
1377 	ilm_t		*ilm;
1378 
1379 	rw_enter(&ill->ill_mcast_lock, RW_READER);
1380 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1381 		if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) &&
1382 		    ilm->ilm_zoneid != skipzone) {
1383 			rw_exit(&ill->ill_mcast_lock);
1384 			return (B_TRUE);
1385 		}
1386 	}
1387 	rw_exit(&ill->ill_mcast_lock);
1388 	return (B_FALSE);
1389 }
1390 
1391 /*
1392  * Interface used by IP input/output.
1393  * Returns true if there is a member on the ill for any zoneid except skipzone.
1394  *
1395  * The group and source can't be INADDR_ANY here so no need to translate to
1396  * the unspecified IPv6 address.
1397  */
1398 boolean_t
1399 ill_hasmembers_otherzones_v4(ill_t *ill, ipaddr_t group, zoneid_t skipzone)
1400 {
1401 	in6_addr_t	v6group;
1402 
1403 	IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1404 	return (ill_hasmembers_otherzones_v6(ill, &v6group, skipzone));
1405 }
1406 
1407 /*
1408  * Interface used by IP input.
1409  * Returns the next numerically larger zoneid that has a member. If none exist
1410  * then returns -1 (ALL_ZONES).
1411  * The normal usage is for the caller to start with a -1 zoneid (ALL_ZONES)
1412  * to find the first zoneid which has a member, and then pass that in for
1413  * subsequent calls until ALL_ZONES is returned.
1414  *
1415  * The implementation of ill_hasmembers_nextzone() assumes the ilms
1416  * are sorted by zoneid for efficiency.
1417  */
1418 zoneid_t
1419 ill_hasmembers_nextzone_v6(ill_t *ill, const in6_addr_t *v6group,
1420     zoneid_t zoneid)
1421 {
1422 	ilm_t		*ilm;
1423 
1424 	rw_enter(&ill->ill_mcast_lock, RW_READER);
1425 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1426 		if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) &&
1427 		    ilm->ilm_zoneid > zoneid) {
1428 			zoneid = ilm->ilm_zoneid;
1429 			rw_exit(&ill->ill_mcast_lock);
1430 			return (zoneid);
1431 		}
1432 	}
1433 	rw_exit(&ill->ill_mcast_lock);
1434 	return (ALL_ZONES);
1435 }
1436 
1437 /*
1438  * Interface used by IP input.
1439  * Returns the next numerically larger zoneid that has a member. If none exist
1440  * then returns -1 (ALL_ZONES).
1441  *
1442  * The group and source can't be INADDR_ANY here so no need to translate to
1443  * the unspecified IPv6 address.
1444  */
1445 zoneid_t
1446 ill_hasmembers_nextzone_v4(ill_t *ill, ipaddr_t group, zoneid_t zoneid)
1447 {
1448 	in6_addr_t	v6group;
1449 
1450 	IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1451 
1452 	return (ill_hasmembers_nextzone_v6(ill, &v6group, zoneid));
1453 }
1454 
1455 /*
1456  * Find an ilm matching the ill, group, and zoneid.
1457  */
1458 static ilm_t *
1459 ilm_lookup(ill_t *ill, const in6_addr_t *v6group, zoneid_t zoneid)
1460 {
1461 	ilm_t	*ilm;
1462 
1463 	ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock));
1464 
1465 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1466 		if (!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group))
1467 			continue;
1468 		if (zoneid != ALL_ZONES && zoneid != ilm->ilm_zoneid)
1469 			continue;
1470 
1471 		ASSERT(ilm->ilm_ill == ill);
1472 		return (ilm);
1473 	}
1474 	return (NULL);
1475 }
1476 
1477 /*
1478  * How many members on this ill?
1479  * Since each shared-IP zone has a separate ilm for the same group/ill
1480  * we can have several.
1481  */
1482 static int
1483 ilm_numentries(ill_t *ill, const in6_addr_t *v6group)
1484 {
1485 	ilm_t	*ilm;
1486 	int i = 0;
1487 
1488 	ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock));
1489 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1490 		if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group)) {
1491 			i++;
1492 		}
1493 	}
1494 	return (i);
1495 }
1496 
1497 /* Caller guarantees that the group is not already on the list */
1498 static ilm_t *
1499 ilm_add(ill_t *ill, const in6_addr_t *v6group, ilg_stat_t ilgstat,
1500     mcast_record_t ilg_fmode, slist_t *ilg_flist, zoneid_t zoneid)
1501 {
1502 	ilm_t	*ilm;
1503 	ilm_t	*ilm_cur;
1504 	ilm_t	**ilm_ptpn;
1505 
1506 	ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
1507 	ilm = GETSTRUCT(ilm_t, 1);
1508 	if (ilm == NULL)
1509 		return (NULL);
1510 	if (ilgstat != ILGSTAT_NONE && !SLIST_IS_EMPTY(ilg_flist)) {
1511 		ilm->ilm_filter = l_alloc();
1512 		if (ilm->ilm_filter == NULL) {
1513 			mi_free(ilm);
1514 			return (NULL);
1515 		}
1516 	}
1517 	ilm->ilm_v6addr = *v6group;
1518 	ilm->ilm_refcnt = 1;
1519 	ilm->ilm_zoneid = zoneid;
1520 	ilm->ilm_timer = INFINITY;
1521 	ilm->ilm_rtx.rtx_timer = INFINITY;
1522 
1523 	ilm->ilm_ill = ill;
1524 	DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ill,
1525 	    (char *), "ilm", (void *), ilm);
1526 	ill->ill_ilm_cnt++;
1527 
1528 	ASSERT(ill->ill_ipst);
1529 	ilm->ilm_ipst = ill->ill_ipst;	/* No netstack_hold */
1530 
1531 	/* The ill/ipif could have just been marked as condemned */
1532 
1533 	/*
1534 	 * To make ill_hasmembers_nextzone_v6 work we keep the list
1535 	 * sorted by zoneid.
1536 	 */
1537 	ilm_cur = ill->ill_ilm;
1538 	ilm_ptpn = &ill->ill_ilm;
1539 	while (ilm_cur != NULL && ilm_cur->ilm_zoneid < ilm->ilm_zoneid) {
1540 		ilm_ptpn = &ilm_cur->ilm_next;
1541 		ilm_cur = ilm_cur->ilm_next;
1542 	}
1543 	ilm->ilm_next = ilm_cur;
1544 	*ilm_ptpn = ilm;
1545 
1546 	/*
1547 	 * If we have an associated ilg, use its filter state; if not,
1548 	 * default to (EXCLUDE, NULL) and set no_ilg_cnt to track this.
1549 	 */
1550 	if (ilgstat != ILGSTAT_NONE) {
1551 		if (!SLIST_IS_EMPTY(ilg_flist))
1552 			l_copy(ilg_flist, ilm->ilm_filter);
1553 		ilm->ilm_fmode = ilg_fmode;
1554 	} else {
1555 		ilm->ilm_no_ilg_cnt = 1;
1556 		ilm->ilm_fmode = MODE_IS_EXCLUDE;
1557 	}
1558 
1559 	return (ilm);
1560 }
1561 
1562 void
1563 ilm_inactive(ilm_t *ilm)
1564 {
1565 	FREE_SLIST(ilm->ilm_filter);
1566 	FREE_SLIST(ilm->ilm_pendsrcs);
1567 	FREE_SLIST(ilm->ilm_rtx.rtx_allow);
1568 	FREE_SLIST(ilm->ilm_rtx.rtx_block);
1569 	ilm->ilm_ipst = NULL;
1570 	mi_free((char *)ilm);
1571 }
1572 
1573 /*
1574  * Unlink ilm and free it.
1575  */
1576 static void
1577 ilm_delete(ilm_t *ilm)
1578 {
1579 	ill_t		*ill = ilm->ilm_ill;
1580 	ilm_t		**ilmp;
1581 	boolean_t	need_wakeup;
1582 
1583 	/*
1584 	 * Delete under lock protection so that readers don't stumble
1585 	 * on bad ilm_next
1586 	 */
1587 	ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
1588 
1589 	for (ilmp = &ill->ill_ilm; *ilmp != ilm; ilmp = &(*ilmp)->ilm_next)
1590 		;
1591 
1592 	*ilmp = ilm->ilm_next;
1593 
1594 	mutex_enter(&ill->ill_lock);
1595 	/*
1596 	 * if we are the last reference to the ill, we may need to wakeup any
1597 	 * pending FREE or unplumb operations. This is because conn_update_ill
1598 	 * bails if there is a ilg_delete_all in progress.
1599 	 */
1600 	need_wakeup = B_FALSE;
1601 	DTRACE_PROBE3(ill__decr__cnt, (ill_t *), ill,
1602 	    (char *), "ilm", (void *), ilm);
1603 	ASSERT(ill->ill_ilm_cnt > 0);
1604 	ill->ill_ilm_cnt--;
1605 	if (ILL_FREE_OK(ill))
1606 		need_wakeup = B_TRUE;
1607 
1608 	ilm_inactive(ilm); /* frees this ilm */
1609 
1610 	if (need_wakeup) {
1611 		/* drops ill lock */
1612 		ipif_ill_refrele_tail(ill);
1613 	} else {
1614 		mutex_exit(&ill->ill_lock);
1615 	}
1616 }
1617 
1618 /*
1619  * Lookup an ill based on the group, ifindex, ifaddr, and zoneid.
1620  * Applies to both IPv4 and IPv6, although ifaddr is only used with
1621  * IPv4.
1622  * Returns an error for IS_UNDER_IPMP and VNI interfaces.
1623  * On error it sets *errorp.
1624  */
1625 static ill_t *
1626 ill_mcast_lookup(const in6_addr_t *group, ipaddr_t ifaddr, uint_t ifindex,
1627     zoneid_t zoneid, ip_stack_t *ipst, int *errorp)
1628 {
1629 	ill_t *ill;
1630 	ipaddr_t v4group;
1631 
1632 	if (IN6_IS_ADDR_V4MAPPED(group)) {
1633 		IN6_V4MAPPED_TO_IPADDR(group, v4group);
1634 
1635 		if (ifindex != 0) {
1636 			ill = ill_lookup_on_ifindex_zoneid(ifindex, zoneid,
1637 			    B_FALSE, ipst);
1638 		} else if (ifaddr != INADDR_ANY) {
1639 			ipif_t *ipif;
1640 
1641 			ipif = ipif_lookup_addr(ifaddr, NULL, zoneid, ipst);
1642 			if (ipif == NULL) {
1643 				ill = NULL;
1644 			} else {
1645 				ill = ipif->ipif_ill;
1646 				ill_refhold(ill);
1647 				ipif_refrele(ipif);
1648 			}
1649 		} else {
1650 			ill = ill_lookup_group_v4(v4group, zoneid, ipst, NULL,
1651 			    NULL);
1652 		}
1653 	} else {
1654 		if (ifindex != 0) {
1655 			ill = ill_lookup_on_ifindex_zoneid(ifindex, zoneid,
1656 			    B_TRUE, ipst);
1657 		} else {
1658 			ill = ill_lookup_group_v6(group, zoneid, ipst, NULL,
1659 			    NULL);
1660 		}
1661 	}
1662 	if (ill == NULL) {
1663 		if (ifindex != 0)
1664 			*errorp = ENXIO;
1665 		else
1666 			*errorp = EADDRNOTAVAIL;
1667 		return (NULL);
1668 	}
1669 	/* operation not supported on the virtual network interface */
1670 	if (IS_UNDER_IPMP(ill) || IS_VNI(ill)) {
1671 		ill_refrele(ill);
1672 		*errorp = EINVAL;
1673 		return (NULL);
1674 	}
1675 	return (ill);
1676 }
1677 
1678 /*
1679  * Looks up the appropriate ill given an interface index (or interface address)
1680  * and multicast group.  On success, returns 0, with *illpp pointing to the
1681  * found struct.  On failure, returns an errno and *illpp is set to NULL.
1682  *
1683  * Returns an error for IS_UNDER_IPMP and VNI interfaces.
1684  *
1685  * Handles both IPv4 and IPv6. The ifaddr argument only applies in the
1686  * case of IPv4.
1687  */
1688 int
1689 ip_opt_check(conn_t *connp, const in6_addr_t *v6group,
1690     const in6_addr_t *v6src, ipaddr_t ifaddr, uint_t ifindex, ill_t **illpp)
1691 {
1692 	boolean_t src_unspec;
1693 	ill_t *ill = NULL;
1694 	ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
1695 	int error = 0;
1696 
1697 	*illpp = NULL;
1698 
1699 	src_unspec = IN6_IS_ADDR_UNSPECIFIED(v6src);
1700 
1701 	if (IN6_IS_ADDR_V4MAPPED(v6group)) {
1702 		ipaddr_t v4group;
1703 		ipaddr_t v4src;
1704 
1705 		if (!IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec)
1706 			return (EINVAL);
1707 		IN6_V4MAPPED_TO_IPADDR(v6group, v4group);
1708 		if (src_unspec) {
1709 			v4src = INADDR_ANY;
1710 		} else {
1711 			IN6_V4MAPPED_TO_IPADDR(v6src, v4src);
1712 		}
1713 		if (!CLASSD(v4group) || CLASSD(v4src))
1714 			return (EINVAL);
1715 	} else {
1716 		if (IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec)
1717 			return (EINVAL);
1718 		if (!IN6_IS_ADDR_MULTICAST(v6group) ||
1719 		    IN6_IS_ADDR_MULTICAST(v6src)) {
1720 			return (EINVAL);
1721 		}
1722 	}
1723 
1724 	ill = ill_mcast_lookup(v6group, ifaddr, ifindex, IPCL_ZONEID(connp),
1725 	    ipst, &error);
1726 	*illpp = ill;
1727 	return (error);
1728 }
1729 
1730 static int
1731 ip_get_srcfilter(conn_t *connp, struct group_filter *gf,
1732     struct ip_msfilter *imsf, const struct in6_addr *group, boolean_t issin6)
1733 {
1734 	ilg_t *ilg;
1735 	int i, numsrc, fmode, outsrcs;
1736 	struct sockaddr_in *sin;
1737 	struct sockaddr_in6 *sin6;
1738 	struct in_addr *addrp;
1739 	slist_t *fp;
1740 	boolean_t is_v4only_api;
1741 	ipaddr_t ifaddr;
1742 	uint_t ifindex;
1743 
1744 	if (gf == NULL) {
1745 		ASSERT(imsf != NULL);
1746 		ASSERT(!issin6);
1747 		is_v4only_api = B_TRUE;
1748 		outsrcs = imsf->imsf_numsrc;
1749 		ifaddr = imsf->imsf_interface.s_addr;
1750 		ifindex = 0;
1751 	} else {
1752 		ASSERT(imsf == NULL);
1753 		is_v4only_api = B_FALSE;
1754 		outsrcs = gf->gf_numsrc;
1755 		ifaddr = INADDR_ANY;
1756 		ifindex = gf->gf_interface;
1757 	}
1758 
1759 	/* No need to use ill_mcast_serializer for the reader */
1760 	rw_enter(&connp->conn_ilg_lock, RW_READER);
1761 	ilg = ilg_lookup(connp, group, ifaddr, ifindex);
1762 	if (ilg == NULL) {
1763 		rw_exit(&connp->conn_ilg_lock);
1764 		return (EADDRNOTAVAIL);
1765 	}
1766 
1767 	/*
1768 	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
1769 	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
1770 	 * So we need to translate here.
1771 	 */
1772 	fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ?
1773 	    MCAST_INCLUDE : MCAST_EXCLUDE;
1774 	if ((fp = ilg->ilg_filter) == NULL) {
1775 		numsrc = 0;
1776 	} else {
1777 		for (i = 0; i < outsrcs; i++) {
1778 			if (i == fp->sl_numsrc)
1779 				break;
1780 			if (issin6) {
1781 				sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i];
1782 				sin6->sin6_family = AF_INET6;
1783 				sin6->sin6_addr = fp->sl_addr[i];
1784 			} else {
1785 				if (is_v4only_api) {
1786 					addrp = &imsf->imsf_slist[i];
1787 				} else {
1788 					sin = (struct sockaddr_in *)
1789 					    &gf->gf_slist[i];
1790 					sin->sin_family = AF_INET;
1791 					addrp = &sin->sin_addr;
1792 				}
1793 				IN6_V4MAPPED_TO_INADDR(&fp->sl_addr[i], addrp);
1794 			}
1795 		}
1796 		numsrc = fp->sl_numsrc;
1797 	}
1798 
1799 	if (is_v4only_api) {
1800 		imsf->imsf_numsrc = numsrc;
1801 		imsf->imsf_fmode = fmode;
1802 	} else {
1803 		gf->gf_numsrc = numsrc;
1804 		gf->gf_fmode = fmode;
1805 	}
1806 
1807 	rw_exit(&connp->conn_ilg_lock);
1808 
1809 	return (0);
1810 }
1811 
1812 /*
1813  * Common for IPv4 and IPv6.
1814  */
1815 static int
1816 ip_set_srcfilter(conn_t *connp, struct group_filter *gf,
1817     struct ip_msfilter *imsf, const struct in6_addr *group, ill_t *ill,
1818     boolean_t issin6)
1819 {
1820 	ilg_t *ilg;
1821 	int i, err, infmode, new_fmode;
1822 	uint_t insrcs;
1823 	struct sockaddr_in *sin;
1824 	struct sockaddr_in6 *sin6;
1825 	struct in_addr *addrp;
1826 	slist_t *orig_filter = NULL;
1827 	slist_t *new_filter = NULL;
1828 	mcast_record_t orig_fmode;
1829 	boolean_t leave_group, is_v4only_api;
1830 	ilg_stat_t ilgstat;
1831 	ilm_t *ilm;
1832 	ipaddr_t ifaddr;
1833 	uint_t ifindex;
1834 
1835 	if (gf == NULL) {
1836 		ASSERT(imsf != NULL);
1837 		ASSERT(!issin6);
1838 		is_v4only_api = B_TRUE;
1839 		insrcs = imsf->imsf_numsrc;
1840 		infmode = imsf->imsf_fmode;
1841 		ifaddr = imsf->imsf_interface.s_addr;
1842 		ifindex = 0;
1843 	} else {
1844 		ASSERT(imsf == NULL);
1845 		is_v4only_api = B_FALSE;
1846 		insrcs = gf->gf_numsrc;
1847 		infmode = gf->gf_fmode;
1848 		ifaddr = INADDR_ANY;
1849 		ifindex = gf->gf_interface;
1850 	}
1851 
1852 	/* Make sure we can handle the source list */
1853 	if (insrcs > MAX_FILTER_SIZE)
1854 		return (ENOBUFS);
1855 
1856 	/*
1857 	 * setting the filter to (INCLUDE, NULL) is treated
1858 	 * as a request to leave the group.
1859 	 */
1860 	leave_group = (infmode == MCAST_INCLUDE && insrcs == 0);
1861 
1862 	mutex_enter(&ill->ill_mcast_serializer);
1863 	rw_enter(&connp->conn_ilg_lock, RW_WRITER);
1864 	ilg = ilg_lookup(connp, group, ifaddr, ifindex);
1865 	if (ilg == NULL) {
1866 		/*
1867 		 * if the request was actually to leave, and we
1868 		 * didn't find an ilg, there's nothing to do.
1869 		 */
1870 		if (leave_group) {
1871 			rw_exit(&connp->conn_ilg_lock);
1872 			mutex_exit(&ill->ill_mcast_serializer);
1873 			return (0);
1874 		}
1875 		ilg = conn_ilg_alloc(connp, &err);
1876 		if (ilg == NULL) {
1877 			rw_exit(&connp->conn_ilg_lock);
1878 			mutex_exit(&ill->ill_mcast_serializer);
1879 			return (err);
1880 		}
1881 		ilgstat = ILGSTAT_NEW;
1882 		ilg->ilg_v6group = *group;
1883 		ilg->ilg_ill = ill;
1884 		ilg->ilg_ifaddr = ifaddr;
1885 		ilg->ilg_ifindex = ifindex;
1886 	} else if (leave_group) {
1887 		/*
1888 		 * Make sure we have the correct serializer. The ill argument
1889 		 * might not match ilg_ill.
1890 		 */
1891 		ilg_refhold(ilg);
1892 		mutex_exit(&ill->ill_mcast_serializer);
1893 		ill = ilg->ilg_ill;
1894 		rw_exit(&connp->conn_ilg_lock);
1895 
1896 		mutex_enter(&ill->ill_mcast_serializer);
1897 		rw_enter(&connp->conn_ilg_lock, RW_WRITER);
1898 		ilm = ilg->ilg_ilm;
1899 		ilg->ilg_ilm = NULL;
1900 		ilg_delete(connp, ilg, NULL);
1901 		ilg_refrele(ilg);
1902 		rw_exit(&connp->conn_ilg_lock);
1903 		if (ilm != NULL)
1904 			(void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE);
1905 		mutex_exit(&ill->ill_mcast_serializer);
1906 		return (0);
1907 	} else {
1908 		ilgstat = ILGSTAT_CHANGE;
1909 		/* Preserve existing state in case ip_addmulti() fails */
1910 		orig_fmode = ilg->ilg_fmode;
1911 		if (ilg->ilg_filter == NULL) {
1912 			orig_filter = NULL;
1913 		} else {
1914 			orig_filter = l_alloc_copy(ilg->ilg_filter);
1915 			if (orig_filter == NULL) {
1916 				rw_exit(&connp->conn_ilg_lock);
1917 				mutex_exit(&ill->ill_mcast_serializer);
1918 				return (ENOMEM);
1919 			}
1920 		}
1921 	}
1922 
1923 	/*
1924 	 * Alloc buffer to copy new state into (see below) before
1925 	 * we make any changes, so we can bail if it fails.
1926 	 */
1927 	if ((new_filter = l_alloc()) == NULL) {
1928 		rw_exit(&connp->conn_ilg_lock);
1929 		err = ENOMEM;
1930 		goto free_and_exit;
1931 	}
1932 
1933 	if (insrcs == 0) {
1934 		CLEAR_SLIST(ilg->ilg_filter);
1935 	} else {
1936 		slist_t *fp;
1937 		if (ilg->ilg_filter == NULL) {
1938 			fp = l_alloc();
1939 			if (fp == NULL) {
1940 				if (ilgstat == ILGSTAT_NEW)
1941 					ilg_delete(connp, ilg, NULL);
1942 				rw_exit(&connp->conn_ilg_lock);
1943 				err = ENOMEM;
1944 				goto free_and_exit;
1945 			}
1946 		} else {
1947 			fp = ilg->ilg_filter;
1948 		}
1949 		for (i = 0; i < insrcs; i++) {
1950 			if (issin6) {
1951 				sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i];
1952 				fp->sl_addr[i] = sin6->sin6_addr;
1953 			} else {
1954 				if (is_v4only_api) {
1955 					addrp = &imsf->imsf_slist[i];
1956 				} else {
1957 					sin = (struct sockaddr_in *)
1958 					    &gf->gf_slist[i];
1959 					addrp = &sin->sin_addr;
1960 				}
1961 				IN6_INADDR_TO_V4MAPPED(addrp, &fp->sl_addr[i]);
1962 			}
1963 		}
1964 		fp->sl_numsrc = insrcs;
1965 		ilg->ilg_filter = fp;
1966 	}
1967 	/*
1968 	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
1969 	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
1970 	 * So we need to translate here.
1971 	 */
1972 	ilg->ilg_fmode = (infmode == MCAST_INCLUDE) ?
1973 	    MODE_IS_INCLUDE : MODE_IS_EXCLUDE;
1974 
1975 	/*
1976 	 * Save copy of ilg's filter state to pass to other functions,
1977 	 * so we can release conn_ilg_lock now.
1978 	 */
1979 	new_fmode = ilg->ilg_fmode;
1980 	l_copy(ilg->ilg_filter, new_filter);
1981 
1982 	rw_exit(&connp->conn_ilg_lock);
1983 
1984 	/*
1985 	 * Now update the ill. We wait to do this until after the ilg
1986 	 * has been updated because we need to update the src filter
1987 	 * info for the ill, which involves looking at the status of
1988 	 * all the ilgs associated with this group/interface pair.
1989 	 */
1990 	ilm = ip_addmulti_serial(group, ill, connp->conn_zoneid, ilgstat,
1991 	    new_fmode, new_filter, &err);
1992 
1993 	rw_enter(&connp->conn_ilg_lock, RW_WRITER);
1994 	/*
1995 	 * Must look up the ilg again since we've not been holding
1996 	 * conn_ilg_lock. The ilg could have disappeared due to an unplumb
1997 	 * having called conn_update_ill, which can run once we dropped the
1998 	 * conn_ilg_lock above.
1999 	 */
2000 	ilg = ilg_lookup(connp, group, ifaddr, ifindex);
2001 	if (ilg == NULL) {
2002 		rw_exit(&connp->conn_ilg_lock);
2003 		if (ilm != NULL) {
2004 			(void) ip_delmulti_serial(ilm, B_FALSE,
2005 			    (ilgstat == ILGSTAT_NEW));
2006 		}
2007 		err = ENXIO;
2008 		goto free_and_exit;
2009 	}
2010 
2011 	if (ilm != NULL) {
2012 		/* Succeeded. Update the ilg to point at the ilm */
2013 		if (ilgstat == ILGSTAT_NEW) {
2014 			ASSERT(ilg->ilg_ilm == NULL);
2015 			ilg->ilg_ilm = ilm;
2016 			ilm->ilm_ifaddr = ifaddr;	/* For netstat */
2017 		} else {
2018 			/*
2019 			 * ip_addmulti didn't get a held ilm for
2020 			 * ILGSTAT_CHANGE; ilm_refcnt was unchanged.
2021 			 */
2022 			ASSERT(ilg->ilg_ilm == ilm);
2023 		}
2024 	} else {
2025 		ASSERT(err != 0);
2026 		/*
2027 		 * Failed to allocate the ilm.
2028 		 * Restore the original filter state, or delete the
2029 		 * newly-created ilg.
2030 		 * If ENETDOWN just clear ill_ilg since so that we
2031 		 * will rejoin when the ill comes back; don't report ENETDOWN
2032 		 * to application.
2033 		 */
2034 		if (ilgstat == ILGSTAT_NEW) {
2035 			if (err == ENETDOWN) {
2036 				ilg->ilg_ill = NULL;
2037 				err = 0;
2038 			} else {
2039 				ilg_delete(connp, ilg, NULL);
2040 			}
2041 		} else {
2042 			ilg->ilg_fmode = orig_fmode;
2043 			if (SLIST_IS_EMPTY(orig_filter)) {
2044 				CLEAR_SLIST(ilg->ilg_filter);
2045 			} else {
2046 				/*
2047 				 * We didn't free the filter, even if we
2048 				 * were trying to make the source list empty;
2049 				 * so if orig_filter isn't empty, the ilg
2050 				 * must still have a filter alloc'd.
2051 				 */
2052 				l_copy(orig_filter, ilg->ilg_filter);
2053 			}
2054 		}
2055 	}
2056 	rw_exit(&connp->conn_ilg_lock);
2057 
2058 free_and_exit:
2059 	mutex_exit(&ill->ill_mcast_serializer);
2060 	l_free(orig_filter);
2061 	l_free(new_filter);
2062 
2063 	return (err);
2064 }
2065 
2066 /*
2067  * Process the SIOC[GS]MSFILTER and SIOC[GS]IPMSFILTER ioctls.
2068  */
2069 /* ARGSUSED */
2070 int
2071 ip_sioctl_msfilter(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp,
2072     ip_ioctl_cmd_t *ipip, void *ifreq)
2073 {
2074 	struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
2075 	/* existence verified in ip_wput_nondata() */
2076 	mblk_t *data_mp = mp->b_cont->b_cont;
2077 	int datalen, err, cmd, minsize;
2078 	uint_t expsize = 0;
2079 	conn_t *connp;
2080 	boolean_t isv6, is_v4only_api, getcmd;
2081 	struct sockaddr_in *gsin;
2082 	struct sockaddr_in6 *gsin6;
2083 	ipaddr_t v4group;
2084 	in6_addr_t v6group;
2085 	struct group_filter *gf = NULL;
2086 	struct ip_msfilter *imsf = NULL;
2087 	mblk_t *ndp;
2088 	ill_t *ill;
2089 
2090 	connp = Q_TO_CONN(q);
2091 	err = ip_msfilter_ill(connp, mp, ipip, &ill);
2092 	if (err != 0)
2093 		return (err);
2094 
2095 	if (data_mp->b_cont != NULL) {
2096 		if ((ndp = msgpullup(data_mp, -1)) == NULL)
2097 			return (ENOMEM);
2098 		freemsg(data_mp);
2099 		data_mp = ndp;
2100 		mp->b_cont->b_cont = data_mp;
2101 	}
2102 
2103 	cmd = iocp->ioc_cmd;
2104 	getcmd = (cmd == SIOCGIPMSFILTER || cmd == SIOCGMSFILTER);
2105 	is_v4only_api = (cmd == SIOCGIPMSFILTER || cmd == SIOCSIPMSFILTER);
2106 	minsize = (is_v4only_api) ? IP_MSFILTER_SIZE(0) : GROUP_FILTER_SIZE(0);
2107 	datalen = MBLKL(data_mp);
2108 
2109 	if (datalen < minsize)
2110 		return (EINVAL);
2111 
2112 	/*
2113 	 * now we know we have at least have the initial structure,
2114 	 * but need to check for the source list array.
2115 	 */
2116 	if (is_v4only_api) {
2117 		imsf = (struct ip_msfilter *)data_mp->b_rptr;
2118 		isv6 = B_FALSE;
2119 		expsize = IP_MSFILTER_SIZE(imsf->imsf_numsrc);
2120 	} else {
2121 		gf = (struct group_filter *)data_mp->b_rptr;
2122 		if (gf->gf_group.ss_family == AF_INET6) {
2123 			gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2124 			isv6 = !(IN6_IS_ADDR_V4MAPPED(&gsin6->sin6_addr));
2125 		} else {
2126 			isv6 = B_FALSE;
2127 		}
2128 		expsize = GROUP_FILTER_SIZE(gf->gf_numsrc);
2129 	}
2130 	if (datalen < expsize)
2131 		return (EINVAL);
2132 
2133 	if (isv6) {
2134 		gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2135 		v6group = gsin6->sin6_addr;
2136 		if (getcmd) {
2137 			err = ip_get_srcfilter(connp, gf, NULL, &v6group,
2138 			    B_TRUE);
2139 		} else {
2140 			err = ip_set_srcfilter(connp, gf, NULL, &v6group, ill,
2141 			    B_TRUE);
2142 		}
2143 	} else {
2144 		boolean_t issin6 = B_FALSE;
2145 		if (is_v4only_api) {
2146 			v4group = (ipaddr_t)imsf->imsf_multiaddr.s_addr;
2147 			IN6_IPADDR_TO_V4MAPPED(v4group, &v6group);
2148 		} else {
2149 			if (gf->gf_group.ss_family == AF_INET) {
2150 				gsin = (struct sockaddr_in *)&gf->gf_group;
2151 				v4group = (ipaddr_t)gsin->sin_addr.s_addr;
2152 				IN6_IPADDR_TO_V4MAPPED(v4group, &v6group);
2153 			} else {
2154 				gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2155 				IN6_V4MAPPED_TO_IPADDR(&gsin6->sin6_addr,
2156 				    v4group);
2157 				issin6 = B_TRUE;
2158 			}
2159 		}
2160 		/*
2161 		 * INADDR_ANY is represented as the IPv6 unspecifed addr.
2162 		 */
2163 		if (v4group == INADDR_ANY)
2164 			v6group = ipv6_all_zeros;
2165 		else
2166 			IN6_IPADDR_TO_V4MAPPED(v4group, &v6group);
2167 
2168 		if (getcmd) {
2169 			err = ip_get_srcfilter(connp, gf, imsf, &v6group,
2170 			    issin6);
2171 		} else {
2172 			err = ip_set_srcfilter(connp, gf, imsf, &v6group, ill,
2173 			    issin6);
2174 		}
2175 	}
2176 	ill_refrele(ill);
2177 
2178 	return (err);
2179 }
2180 
2181 /*
2182  * Determine the ill for the SIOC*MSFILTER ioctls
2183  *
2184  * Returns an error for IS_UNDER_IPMP interfaces.
2185  *
2186  * Finds the ill based on information in the ioctl headers.
2187  */
2188 static int
2189 ip_msfilter_ill(conn_t *connp, mblk_t *mp, const ip_ioctl_cmd_t *ipip,
2190     ill_t **illp)
2191 {
2192 	int cmd = ipip->ipi_cmd;
2193 	int err = 0;
2194 	ill_t *ill;
2195 	/* caller has verified this mblk exists */
2196 	char *dbuf = (char *)mp->b_cont->b_cont->b_rptr;
2197 	struct ip_msfilter *imsf;
2198 	struct group_filter *gf;
2199 	ipaddr_t v4addr, v4group;
2200 	in6_addr_t v6group;
2201 	uint32_t index;
2202 	ip_stack_t *ipst;
2203 
2204 	ipst = connp->conn_netstack->netstack_ip;
2205 
2206 	*illp = NULL;
2207 
2208 	/* don't allow multicast operations on a tcp conn */
2209 	if (IPCL_IS_TCP(connp))
2210 		return (ENOPROTOOPT);
2211 
2212 	if (cmd == SIOCSIPMSFILTER || cmd == SIOCGIPMSFILTER) {
2213 		/* don't allow v4-specific ioctls on v6 socket */
2214 		if (connp->conn_family == AF_INET6)
2215 			return (EAFNOSUPPORT);
2216 
2217 		imsf = (struct ip_msfilter *)dbuf;
2218 		v4addr = imsf->imsf_interface.s_addr;
2219 		v4group = imsf->imsf_multiaddr.s_addr;
2220 		IN6_IPADDR_TO_V4MAPPED(v4group, &v6group);
2221 		ill = ill_mcast_lookup(&v6group, v4addr, 0, IPCL_ZONEID(connp),
2222 		    ipst, &err);
2223 		if (ill == NULL && v4addr != INADDR_ANY)
2224 			err = ENXIO;
2225 	} else {
2226 		gf = (struct group_filter *)dbuf;
2227 		index = gf->gf_interface;
2228 		if (gf->gf_group.ss_family == AF_INET6) {
2229 			struct sockaddr_in6 *sin6;
2230 
2231 			sin6 = (struct sockaddr_in6 *)&gf->gf_group;
2232 			v6group = sin6->sin6_addr;
2233 		} else if (gf->gf_group.ss_family == AF_INET) {
2234 			struct sockaddr_in *sin;
2235 
2236 			sin = (struct sockaddr_in *)&gf->gf_group;
2237 			v4group = sin->sin_addr.s_addr;
2238 			IN6_IPADDR_TO_V4MAPPED(v4group, &v6group);
2239 		} else {
2240 			return (EAFNOSUPPORT);
2241 		}
2242 		ill = ill_mcast_lookup(&v6group, INADDR_ANY, index,
2243 		    IPCL_ZONEID(connp), ipst, &err);
2244 	}
2245 	*illp = ill;
2246 	return (err);
2247 }
2248 
2249 /*
2250  * The structures used for the SIOC*MSFILTER ioctls usually must be copied
2251  * in in two stages, as the first copyin tells us the size of the attached
2252  * source buffer.  This function is called by ip_wput_nondata() after the
2253  * first copyin has completed; it figures out how big the second stage
2254  * needs to be, and kicks it off.
2255  *
2256  * In some cases (numsrc < 2), the second copyin is not needed as the
2257  * first one gets a complete structure containing 1 source addr.
2258  *
2259  * The function returns 0 if a second copyin has been started (i.e. there's
2260  * no more work to be done right now), or 1 if the second copyin is not
2261  * needed and ip_wput_nondata() can continue its processing.
2262  */
2263 int
2264 ip_copyin_msfilter(queue_t *q, mblk_t *mp)
2265 {
2266 	struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
2267 	int cmd = iocp->ioc_cmd;
2268 	/* validity of this checked in ip_wput_nondata() */
2269 	mblk_t *mp1 = mp->b_cont->b_cont;
2270 	int copysize = 0;
2271 	int offset;
2272 
2273 	if (cmd == SIOCSMSFILTER || cmd == SIOCGMSFILTER) {
2274 		struct group_filter *gf = (struct group_filter *)mp1->b_rptr;
2275 		if (gf->gf_numsrc >= 2) {
2276 			offset = sizeof (struct group_filter);
2277 			copysize = GROUP_FILTER_SIZE(gf->gf_numsrc) - offset;
2278 		}
2279 	} else {
2280 		struct ip_msfilter *imsf = (struct ip_msfilter *)mp1->b_rptr;
2281 		if (imsf->imsf_numsrc >= 2) {
2282 			offset = sizeof (struct ip_msfilter);
2283 			copysize = IP_MSFILTER_SIZE(imsf->imsf_numsrc) - offset;
2284 		}
2285 	}
2286 	if (copysize > 0) {
2287 		mi_copyin_n(q, mp, offset, copysize);
2288 		return (0);
2289 	}
2290 	return (1);
2291 }
2292 
2293 /*
2294  * Handle the following optmgmt:
2295  *	IP_ADD_MEMBERSHIP		must not have joined already
2296  *	IPV6_JOIN_GROUP			must not have joined already
2297  *	MCAST_JOIN_GROUP		must not have joined already
2298  *	IP_BLOCK_SOURCE			must have joined already
2299  *	MCAST_BLOCK_SOURCE		must have joined already
2300  *	IP_JOIN_SOURCE_GROUP		may have joined already
2301  *	MCAST_JOIN_SOURCE_GROUP		may have joined already
2302  *
2303  * fmode and src parameters may be used to determine which option is
2304  * being set, as follows (IPV6_JOIN_GROUP and MCAST_JOIN_GROUP options
2305  * are functionally equivalent):
2306  *	opt			fmode			v6src
2307  *	IP_ADD_MEMBERSHIP	MODE_IS_EXCLUDE		unspecified
2308  *	IPV6_JOIN_GROUP		MODE_IS_EXCLUDE		unspecified
2309  *	MCAST_JOIN_GROUP	MODE_IS_EXCLUDE		unspecified
2310  *	IP_BLOCK_SOURCE		MODE_IS_EXCLUDE		IPv4-mapped addr
2311  *	MCAST_BLOCK_SOURCE	MODE_IS_EXCLUDE		v6 addr
2312  *	IP_JOIN_SOURCE_GROUP	MODE_IS_INCLUDE		IPv4-mapped addr
2313  *	MCAST_JOIN_SOURCE_GROUP	MODE_IS_INCLUDE		v6 addr
2314  *
2315  * Changing the filter mode is not allowed; if a matching ilg already
2316  * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
2317  *
2318  * Verifies that there is a source address of appropriate scope for
2319  * the group; if not, EADDRNOTAVAIL is returned.
2320  *
2321  * The interface to be used may be identified by an IPv4 address or by an
2322  * interface index.
2323  *
2324  * Handles IPv4-mapped IPv6 multicast addresses by associating them
2325  * with the IPv4 address.  Assumes that if v6group is v4-mapped,
2326  * v6src is also v4-mapped.
2327  */
2328 int
2329 ip_opt_add_group(conn_t *connp, boolean_t checkonly,
2330     const in6_addr_t *v6group, ipaddr_t ifaddr, uint_t ifindex,
2331     mcast_record_t fmode, const in6_addr_t *v6src)
2332 {
2333 	ill_t *ill;
2334 	char buf[INET6_ADDRSTRLEN];
2335 	int	err;
2336 
2337 	err = ip_opt_check(connp, v6group, v6src, ifaddr, ifindex, &ill);
2338 	if (err != 0) {
2339 		ip1dbg(("ip_opt_add_group: no ill for group %s/"
2340 		    "index %d\n", inet_ntop(AF_INET6, v6group, buf,
2341 		    sizeof (buf)), ifindex));
2342 		return (err);
2343 	}
2344 
2345 	if (checkonly) {
2346 		/*
2347 		 * do not do operation, just pretend to - new T_CHECK
2348 		 * semantics. The error return case above if encountered
2349 		 * considered a good enough "check" here.
2350 		 */
2351 		ill_refrele(ill);
2352 		return (0);
2353 	}
2354 
2355 	mutex_enter(&ill->ill_mcast_serializer);
2356 	err = ilg_add(connp, v6group, ifaddr, ifindex, ill, fmode, v6src);
2357 	mutex_exit(&ill->ill_mcast_serializer);
2358 	ill_refrele(ill);
2359 	return (err);
2360 }
2361 
2362 /*
2363  * Common for IPv6 and IPv4.
2364  * Here we handle ilgs that are still attached to their original ill
2365  * (the one ifaddr/ifindex points at), as well as detached ones.
2366  * The detached ones might have been attached to some other ill.
2367  */
2368 static int
2369 ip_opt_delete_group_excl(conn_t *connp, const in6_addr_t *v6group,
2370     ipaddr_t ifaddr, uint_t ifindex, mcast_record_t fmode,
2371     const in6_addr_t *v6src)
2372 {
2373 	ilg_t	*ilg;
2374 	boolean_t leaving;
2375 	ilm_t *ilm;
2376 	ill_t *ill;
2377 	int err = 0;
2378 
2379 retry:
2380 	rw_enter(&connp->conn_ilg_lock, RW_WRITER);
2381 	ilg = ilg_lookup(connp, v6group, ifaddr, ifindex);
2382 	if (ilg == NULL) {
2383 		rw_exit(&connp->conn_ilg_lock);
2384 		/*
2385 		 * Since we didn't have any ilg we now do the error checks
2386 		 * to determine the best errno.
2387 		 */
2388 		err = ip_opt_check(connp, v6group, v6src, ifaddr, ifindex,
2389 		    &ill);
2390 		if (ill != NULL) {
2391 			/* The only error was a missing ilg for the group */
2392 			ill_refrele(ill);
2393 			err = EADDRNOTAVAIL;
2394 		}
2395 		return (err);
2396 	}
2397 
2398 	/* If the ilg is attached then we serialize using that ill */
2399 	ill = ilg->ilg_ill;
2400 	if (ill != NULL) {
2401 		/* Prevent the ill and ilg from being freed */
2402 		ill_refhold(ill);
2403 		ilg_refhold(ilg);
2404 		rw_exit(&connp->conn_ilg_lock);
2405 		mutex_enter(&ill->ill_mcast_serializer);
2406 		rw_enter(&connp->conn_ilg_lock, RW_WRITER);
2407 		if (ilg->ilg_condemned) {
2408 			/* Disappeared */
2409 			ilg_refrele(ilg);
2410 			rw_exit(&connp->conn_ilg_lock);
2411 			mutex_exit(&ill->ill_mcast_serializer);
2412 			ill_refrele(ill);
2413 			goto retry;
2414 		}
2415 	}
2416 
2417 	/*
2418 	 * Decide if we're actually deleting the ilg or just removing a
2419 	 * source filter address; if just removing an addr, make sure we
2420 	 * aren't trying to change the filter mode, and that the addr is
2421 	 * actually in our filter list already.  If we're removing the
2422 	 * last src in an include list, just delete the ilg.
2423 	 */
2424 	if (IN6_IS_ADDR_UNSPECIFIED(v6src)) {
2425 		leaving = B_TRUE;
2426 	} else {
2427 		if (fmode != ilg->ilg_fmode)
2428 			err = EINVAL;
2429 		else if (ilg->ilg_filter == NULL ||
2430 		    !list_has_addr(ilg->ilg_filter, v6src))
2431 			err = EADDRNOTAVAIL;
2432 		if (err != 0) {
2433 			if (ill != NULL)
2434 				ilg_refrele(ilg);
2435 			rw_exit(&connp->conn_ilg_lock);
2436 			goto done;
2437 		}
2438 		if (fmode == MODE_IS_INCLUDE &&
2439 		    ilg->ilg_filter->sl_numsrc == 1) {
2440 			leaving = B_TRUE;
2441 			v6src = NULL;
2442 		} else {
2443 			leaving = B_FALSE;
2444 		}
2445 	}
2446 	ilm = ilg->ilg_ilm;
2447 	if (leaving)
2448 		ilg->ilg_ilm = NULL;
2449 
2450 	ilg_delete(connp, ilg, v6src);
2451 	if (ill != NULL)
2452 		ilg_refrele(ilg);
2453 	rw_exit(&connp->conn_ilg_lock);
2454 
2455 	if (ilm != NULL) {
2456 		ASSERT(ill != NULL);
2457 		(void) ip_delmulti_serial(ilm, B_FALSE, leaving);
2458 	}
2459 done:
2460 	if (ill != NULL) {
2461 		mutex_exit(&ill->ill_mcast_serializer);
2462 		ill_refrele(ill);
2463 	}
2464 	return (err);
2465 }
2466 
2467 /*
2468  * Handle the following optmgmt:
2469  *	IP_DROP_MEMBERSHIP		will leave
2470  *	IPV6_LEAVE_GROUP		will leave
2471  *	MCAST_LEAVE_GROUP		will leave
2472  *	IP_UNBLOCK_SOURCE		will not leave
2473  *	MCAST_UNBLOCK_SOURCE		will not leave
2474  *	IP_LEAVE_SOURCE_GROUP		may leave (if leaving last source)
2475  *	MCAST_LEAVE_SOURCE_GROUP	may leave (if leaving last source)
2476  *
2477  * fmode and src parameters may be used to determine which option is
2478  * being set, as follows:
2479  *	opt			 fmode			v6src
2480  *	IP_DROP_MEMBERSHIP	 MODE_IS_INCLUDE	unspecified
2481  *	IPV6_LEAVE_GROUP	 MODE_IS_INCLUDE	unspecified
2482  *	MCAST_LEAVE_GROUP	 MODE_IS_INCLUDE	unspecified
2483  *	IP_UNBLOCK_SOURCE	 MODE_IS_EXCLUDE	IPv4-mapped addr
2484  *	MCAST_UNBLOCK_SOURCE	 MODE_IS_EXCLUDE	v6 addr
2485  *	IP_LEAVE_SOURCE_GROUP	 MODE_IS_INCLUDE	IPv4-mapped addr
2486  *	MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE	v6 addr
2487  *
2488  * Changing the filter mode is not allowed; if a matching ilg already
2489  * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
2490  *
2491  * The interface to be used may be identified by an IPv4 address or by an
2492  * interface index.
2493  *
2494  * Handles IPv4-mapped IPv6 multicast addresses by associating them
2495  * with the IPv4 address.  Assumes that if v6group is v4-mapped,
2496  * v6src is also v4-mapped.
2497  */
2498 int
2499 ip_opt_delete_group(conn_t *connp, boolean_t checkonly,
2500     const in6_addr_t *v6group, ipaddr_t ifaddr, uint_t ifindex,
2501     mcast_record_t fmode, const in6_addr_t *v6src)
2502 {
2503 
2504 	/*
2505 	 * In the normal case below we don't check for the ill existing.
2506 	 * Instead we look for an existing ilg in _excl.
2507 	 * If checkonly we sanity check the arguments
2508 	 */
2509 	if (checkonly) {
2510 		ill_t	*ill;
2511 		int	err;
2512 
2513 		err = ip_opt_check(connp, v6group, v6src, ifaddr, ifindex,
2514 		    &ill);
2515 		/*
2516 		 * do not do operation, just pretend to - new T_CHECK semantics.
2517 		 * ip_opt_check is considered a good enough "check" here.
2518 		 */
2519 		if (ill != NULL)
2520 			ill_refrele(ill);
2521 		return (err);
2522 	}
2523 	return (ip_opt_delete_group_excl(connp, v6group, ifaddr, ifindex,
2524 	    fmode, v6src));
2525 }
2526 
2527 /*
2528  * Group mgmt for upper conn that passes things down
2529  * to the interface multicast list (and DLPI)
2530  * These routines can handle new style options that specify an interface name
2531  * as opposed to an interface address (needed for general handling of
2532  * unnumbered interfaces.)
2533  */
2534 
2535 /*
2536  * Add a group to an upper conn group data structure and pass things down
2537  * to the interface multicast list (and DLPI)
2538  * Common for IPv4 and IPv6; for IPv4 we can have an ifaddr.
2539  */
2540 static int
2541 ilg_add(conn_t *connp, const in6_addr_t *v6group, ipaddr_t ifaddr,
2542     uint_t ifindex, ill_t *ill, mcast_record_t fmode, const in6_addr_t *v6src)
2543 {
2544 	int	error = 0;
2545 	ilg_t	*ilg;
2546 	ilg_stat_t ilgstat;
2547 	slist_t	*new_filter = NULL;
2548 	int	new_fmode;
2549 	ilm_t *ilm;
2550 
2551 	if (!(ill->ill_flags & ILLF_MULTICAST))
2552 		return (EADDRNOTAVAIL);
2553 
2554 	/* conn_ilg_lock protects the ilg list. */
2555 	ASSERT(MUTEX_HELD(&ill->ill_mcast_serializer));
2556 	rw_enter(&connp->conn_ilg_lock, RW_WRITER);
2557 	ilg = ilg_lookup(connp, v6group, ifaddr, ifindex);
2558 
2559 	/*
2560 	 * Depending on the option we're handling, may or may not be okay
2561 	 * if group has already been added.  Figure out our rules based
2562 	 * on fmode and src params.  Also make sure there's enough room
2563 	 * in the filter if we're adding a source to an existing filter.
2564 	 */
2565 	if (IN6_IS_ADDR_UNSPECIFIED(v6src)) {
2566 		/* we're joining for all sources, must not have joined */
2567 		if (ilg != NULL)
2568 			error = EADDRINUSE;
2569 	} else {
2570 		if (fmode == MODE_IS_EXCLUDE) {
2571 			/* (excl {addr}) => block source, must have joined */
2572 			if (ilg == NULL)
2573 				error = EADDRNOTAVAIL;
2574 		}
2575 		/* (incl {addr}) => join source, may have joined */
2576 
2577 		if (ilg != NULL &&
2578 		    SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE)
2579 			error = ENOBUFS;
2580 	}
2581 	if (error != 0) {
2582 		rw_exit(&connp->conn_ilg_lock);
2583 		return (error);
2584 	}
2585 
2586 	/*
2587 	 * Alloc buffer to copy new state into (see below) before
2588 	 * we make any changes, so we can bail if it fails.
2589 	 */
2590 	if ((new_filter = l_alloc()) == NULL) {
2591 		rw_exit(&connp->conn_ilg_lock);
2592 		return (ENOMEM);
2593 	}
2594 
2595 	if (ilg == NULL) {
2596 		if ((ilg = conn_ilg_alloc(connp, &error)) == NULL) {
2597 			rw_exit(&connp->conn_ilg_lock);
2598 			l_free(new_filter);
2599 			return (error);
2600 		}
2601 		ilg->ilg_ifindex = ifindex;
2602 		ilg->ilg_ifaddr = ifaddr;
2603 		if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) {
2604 			ilg->ilg_filter = l_alloc();
2605 			if (ilg->ilg_filter == NULL) {
2606 				ilg_delete(connp, ilg, NULL);
2607 				rw_exit(&connp->conn_ilg_lock);
2608 				l_free(new_filter);
2609 				return (ENOMEM);
2610 			}
2611 			ilg->ilg_filter->sl_numsrc = 1;
2612 			ilg->ilg_filter->sl_addr[0] = *v6src;
2613 		}
2614 		ilgstat = ILGSTAT_NEW;
2615 		ilg->ilg_v6group = *v6group;
2616 		ilg->ilg_fmode = fmode;
2617 		ilg->ilg_ill = ill;
2618 	} else {
2619 		int index;
2620 		if (ilg->ilg_fmode != fmode || IN6_IS_ADDR_UNSPECIFIED(v6src)) {
2621 			rw_exit(&connp->conn_ilg_lock);
2622 			l_free(new_filter);
2623 			return (EINVAL);
2624 		}
2625 		if (ilg->ilg_filter == NULL) {
2626 			ilg->ilg_filter = l_alloc();
2627 			if (ilg->ilg_filter == NULL) {
2628 				rw_exit(&connp->conn_ilg_lock);
2629 				l_free(new_filter);
2630 				return (ENOMEM);
2631 			}
2632 		}
2633 		if (list_has_addr(ilg->ilg_filter, v6src)) {
2634 			rw_exit(&connp->conn_ilg_lock);
2635 			l_free(new_filter);
2636 			return (EADDRNOTAVAIL);
2637 		}
2638 		ilgstat = ILGSTAT_CHANGE;
2639 		index = ilg->ilg_filter->sl_numsrc++;
2640 		ilg->ilg_filter->sl_addr[index] = *v6src;
2641 	}
2642 
2643 	/*
2644 	 * Save copy of ilg's filter state to pass to other functions,
2645 	 * so we can release conn_ilg_lock now.
2646 	 */
2647 	new_fmode = ilg->ilg_fmode;
2648 	l_copy(ilg->ilg_filter, new_filter);
2649 
2650 	rw_exit(&connp->conn_ilg_lock);
2651 
2652 	/*
2653 	 * Now update the ill. We wait to do this until after the ilg
2654 	 * has been updated because we need to update the src filter
2655 	 * info for the ill, which involves looking at the status of
2656 	 * all the ilgs associated with this group/interface pair.
2657 	 */
2658 	ilm = ip_addmulti_serial(v6group, ill, connp->conn_zoneid, ilgstat,
2659 	    new_fmode, new_filter, &error);
2660 
2661 	rw_enter(&connp->conn_ilg_lock, RW_WRITER);
2662 	/*
2663 	 * Must look up the ilg again since we've not been holding
2664 	 * conn_ilg_lock. The ilg could have disappeared due to an unplumb
2665 	 * having called conn_update_ill, which can run once we dropped the
2666 	 * conn_ilg_lock above.
2667 	 */
2668 	ilg = ilg_lookup(connp, v6group, ifaddr, ifindex);
2669 	if (ilg == NULL) {
2670 		rw_exit(&connp->conn_ilg_lock);
2671 		if (ilm != NULL) {
2672 			(void) ip_delmulti_serial(ilm, B_FALSE,
2673 			    (ilgstat == ILGSTAT_NEW));
2674 		}
2675 		error = ENXIO;
2676 		goto free_and_exit;
2677 	}
2678 
2679 	if (ilm != NULL) {
2680 		/* Succeeded. Update the ilg to point at the ilm */
2681 		if (ilgstat == ILGSTAT_NEW) {
2682 			ASSERT(ilg->ilg_ilm == NULL);
2683 			ilg->ilg_ilm = ilm;
2684 			ilm->ilm_ifaddr = ifaddr;	/* For netstat */
2685 		} else {
2686 			/*
2687 			 * ip_addmulti didn't get a held ilm for
2688 			 * ILGSTAT_CHANGE; ilm_refcnt was unchanged.
2689 			 */
2690 			ASSERT(ilg->ilg_ilm == ilm);
2691 		}
2692 	} else {
2693 		ASSERT(error != 0);
2694 		/*
2695 		 * Failed to allocate the ilm.
2696 		 * Need to undo what we did before calling ip_addmulti()
2697 		 * If ENETDOWN just clear ill_ilg since so that we
2698 		 * will rejoin when the ill comes back; don't report ENETDOWN
2699 		 * to application.
2700 		 */
2701 		if (ilgstat == ILGSTAT_NEW && error == ENETDOWN) {
2702 			ilg->ilg_ill = NULL;
2703 			error = 0;
2704 		} else {
2705 			in6_addr_t delsrc =
2706 			    (ilgstat == ILGSTAT_NEW) ? ipv6_all_zeros : *v6src;
2707 
2708 			ilg_delete(connp, ilg, &delsrc);
2709 		}
2710 	}
2711 	rw_exit(&connp->conn_ilg_lock);
2712 
2713 free_and_exit:
2714 	l_free(new_filter);
2715 	return (error);
2716 }
2717 
2718 /*
2719  * Find an IPv4 ilg matching group, ill and source.
2720  * The group and source can't be INADDR_ANY here so no need to translate to
2721  * the unspecified IPv6 address.
2722  */
2723 boolean_t
2724 conn_hasmembers_ill_withsrc_v4(conn_t *connp, ipaddr_t group, ipaddr_t src,
2725     ill_t *ill)
2726 {
2727 	in6_addr_t v6group, v6src;
2728 	int i;
2729 	boolean_t isinlist;
2730 	ilg_t *ilg;
2731 
2732 	rw_enter(&connp->conn_ilg_lock, RW_READER);
2733 	IN6_IPADDR_TO_V4MAPPED(group, &v6group);
2734 	for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) {
2735 		if (ilg->ilg_condemned)
2736 			continue;
2737 
2738 		/* ilg_ill could be NULL if an add is in progress */
2739 		if (ilg->ilg_ill != ill)
2740 			continue;
2741 
2742 		/* The callers use upper ill for IPMP */
2743 		ASSERT(!IS_UNDER_IPMP(ill));
2744 		if (IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, &v6group)) {
2745 			if (SLIST_IS_EMPTY(ilg->ilg_filter)) {
2746 				/* no source filter, so this is a match */
2747 				rw_exit(&connp->conn_ilg_lock);
2748 				return (B_TRUE);
2749 			}
2750 			break;
2751 		}
2752 	}
2753 	if (ilg == NULL) {
2754 		rw_exit(&connp->conn_ilg_lock);
2755 		return (B_FALSE);
2756 	}
2757 
2758 	/*
2759 	 * we have an ilg with matching ill and group; but
2760 	 * the ilg has a source list that we must check.
2761 	 */
2762 	IN6_IPADDR_TO_V4MAPPED(src, &v6src);
2763 	isinlist = B_FALSE;
2764 	for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) {
2765 		if (IN6_ARE_ADDR_EQUAL(&v6src, &ilg->ilg_filter->sl_addr[i])) {
2766 			isinlist = B_TRUE;
2767 			break;
2768 		}
2769 	}
2770 
2771 	if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) ||
2772 	    (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE)) {
2773 		rw_exit(&connp->conn_ilg_lock);
2774 		return (B_TRUE);
2775 	}
2776 	rw_exit(&connp->conn_ilg_lock);
2777 	return (B_FALSE);
2778 }
2779 
2780 /*
2781  * Find an IPv6 ilg matching group, ill, and source
2782  */
2783 boolean_t
2784 conn_hasmembers_ill_withsrc_v6(conn_t *connp, const in6_addr_t *v6group,
2785     const in6_addr_t *v6src, ill_t *ill)
2786 {
2787 	int i;
2788 	boolean_t isinlist;
2789 	ilg_t *ilg;
2790 
2791 	rw_enter(&connp->conn_ilg_lock, RW_READER);
2792 	for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) {
2793 		if (ilg->ilg_condemned)
2794 			continue;
2795 
2796 		/* ilg_ill could be NULL if an add is in progress */
2797 		if (ilg->ilg_ill != ill)
2798 			continue;
2799 
2800 		/* The callers use upper ill for IPMP */
2801 		ASSERT(!IS_UNDER_IPMP(ill));
2802 		if (IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) {
2803 			if (SLIST_IS_EMPTY(ilg->ilg_filter)) {
2804 				/* no source filter, so this is a match */
2805 				rw_exit(&connp->conn_ilg_lock);
2806 				return (B_TRUE);
2807 			}
2808 			break;
2809 		}
2810 	}
2811 	if (ilg == NULL) {
2812 		rw_exit(&connp->conn_ilg_lock);
2813 		return (B_FALSE);
2814 	}
2815 
2816 	/*
2817 	 * we have an ilg with matching ill and group; but
2818 	 * the ilg has a source list that we must check.
2819 	 */
2820 	isinlist = B_FALSE;
2821 	for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) {
2822 		if (IN6_ARE_ADDR_EQUAL(v6src, &ilg->ilg_filter->sl_addr[i])) {
2823 			isinlist = B_TRUE;
2824 			break;
2825 		}
2826 	}
2827 
2828 	if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) ||
2829 	    (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE)) {
2830 		rw_exit(&connp->conn_ilg_lock);
2831 		return (B_TRUE);
2832 	}
2833 	rw_exit(&connp->conn_ilg_lock);
2834 	return (B_FALSE);
2835 }
2836 
2837 /*
2838  * Find an ilg matching group and ifaddr/ifindex.
2839  * We check both ifaddr and ifindex even though at most one of them
2840  * will be non-zero; that way we always find the right one.
2841  */
2842 static ilg_t *
2843 ilg_lookup(conn_t *connp, const in6_addr_t *v6group, ipaddr_t ifaddr,
2844     uint_t ifindex)
2845 {
2846 	ilg_t	*ilg;
2847 
2848 	ASSERT(RW_LOCK_HELD(&connp->conn_ilg_lock));
2849 
2850 	for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) {
2851 		if (ilg->ilg_condemned)
2852 			continue;
2853 
2854 		if (ilg->ilg_ifaddr == ifaddr &&
2855 		    ilg->ilg_ifindex == ifindex &&
2856 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group))
2857 			return (ilg);
2858 	}
2859 	return (NULL);
2860 }
2861 
2862 /*
2863  * If a source address is passed in (src != NULL and src is not
2864  * unspecified), remove the specified src addr from the given ilg's
2865  * filter list, else delete the ilg.
2866  */
2867 static void
2868 ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src)
2869 {
2870 	ASSERT(RW_WRITE_HELD(&connp->conn_ilg_lock));
2871 	ASSERT(ilg->ilg_ptpn != NULL);
2872 	ASSERT(!ilg->ilg_condemned);
2873 
2874 	if (src == NULL || IN6_IS_ADDR_UNSPECIFIED(src)) {
2875 		FREE_SLIST(ilg->ilg_filter);
2876 		ilg->ilg_filter = NULL;
2877 
2878 		ASSERT(ilg->ilg_ilm == NULL);
2879 		ilg->ilg_ill = NULL;
2880 		ilg->ilg_condemned = B_TRUE;
2881 
2882 		/* ilg_inactive will unlink from the list */
2883 		ilg_refrele(ilg);
2884 	} else {
2885 		l_remove(ilg->ilg_filter, src);
2886 	}
2887 }
2888 
2889 /*
2890  * Called from conn close. No new ilg can be added or removed
2891  * because CONN_CLOSING has been set by ip_close. ilg_add / ilg_delete
2892  * will return error if conn has started closing.
2893  *
2894  * We handle locking as follows.
2895  * Under conn_ilg_lock we get the first ilg. As we drop the conn_ilg_lock to
2896  * proceed with the ilm part of the delete we hold a reference on both the ill
2897  * and the ilg. This doesn't prevent changes to the ilg, but prevents it from
2898  * being deleted.
2899  *
2900  * Since the ilg_add code path uses two locks (conn_ilg_lock for the ilg part,
2901  * and ill_mcast_lock for the ip_addmulti part) we can run at a point between
2902  * the two. At that point ilg_ill is set, but ilg_ilm hasn't yet been set. In
2903  * that case we delete the ilg here, which makes ilg_add discover that the ilg
2904  * has disappeared when ip_addmulti returns, so it will discard the ilm it just
2905  * added.
2906  */
2907 void
2908 ilg_delete_all(conn_t *connp)
2909 {
2910 	ilg_t	*ilg, *next_ilg, *held_ilg;
2911 	ilm_t	*ilm;
2912 	ill_t	*ill;
2913 	boolean_t need_refrele;
2914 
2915 	/*
2916 	 * Can not run if there is a conn_update_ill already running.
2917 	 * Wait for it to complete. Caller should have already set CONN_CLOSING
2918 	 * which prevents any new threads to run in conn_update_ill.
2919 	 */
2920 	mutex_enter(&connp->conn_lock);
2921 	ASSERT(connp->conn_state_flags & CONN_CLOSING);
2922 	while (connp->conn_state_flags & CONN_UPDATE_ILL)
2923 		cv_wait(&connp->conn_cv, &connp->conn_lock);
2924 	mutex_exit(&connp->conn_lock);
2925 
2926 	rw_enter(&connp->conn_ilg_lock, RW_WRITER);
2927 	ilg = connp->conn_ilg;
2928 	held_ilg = NULL;
2929 	while (ilg != NULL) {
2930 		if (ilg->ilg_condemned) {
2931 			ilg = ilg->ilg_next;
2932 			continue;
2933 		}
2934 		/* If the ilg is detached then no need to serialize */
2935 		if (ilg->ilg_ilm == NULL) {
2936 			next_ilg = ilg->ilg_next;
2937 			ilg_delete(connp, ilg, NULL);
2938 			ilg = next_ilg;
2939 			continue;
2940 		}
2941 		ill = ilg->ilg_ilm->ilm_ill;
2942 
2943 		/*
2944 		 * In order to serialize on the ill we try to enter
2945 		 * and if that fails we unlock and relock and then
2946 		 * check that we still have an ilm.
2947 		 */
2948 		need_refrele = B_FALSE;
2949 		if (!mutex_tryenter(&ill->ill_mcast_serializer)) {
2950 			ill_refhold(ill);
2951 			need_refrele = B_TRUE;
2952 			ilg_refhold(ilg);
2953 			if (held_ilg != NULL)
2954 				ilg_refrele(held_ilg);
2955 			held_ilg = ilg;
2956 			rw_exit(&connp->conn_ilg_lock);
2957 			mutex_enter(&ill->ill_mcast_serializer);
2958 			rw_enter(&connp->conn_ilg_lock, RW_WRITER);
2959 			if (ilg->ilg_condemned) {
2960 				ilg = ilg->ilg_next;
2961 				goto next;
2962 			}
2963 		}
2964 		ilm = ilg->ilg_ilm;
2965 		ilg->ilg_ilm = NULL;
2966 		next_ilg = ilg->ilg_next;
2967 		ilg_delete(connp, ilg, NULL);
2968 		ilg = next_ilg;
2969 		rw_exit(&connp->conn_ilg_lock);
2970 
2971 		if (ilm != NULL)
2972 			(void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE);
2973 
2974 	next:
2975 		mutex_exit(&ill->ill_mcast_serializer);
2976 		if (need_refrele) {
2977 			/* Drop ill reference while we hold no locks */
2978 			ill_refrele(ill);
2979 		}
2980 		rw_enter(&connp->conn_ilg_lock, RW_WRITER);
2981 	}
2982 	if (held_ilg != NULL)
2983 		ilg_refrele(held_ilg);
2984 	rw_exit(&connp->conn_ilg_lock);
2985 }
2986 
2987 /*
2988  * Attach the ilg to an ilm on the ill. If it fails we leave ilg_ill as NULL so
2989  * that a subsequent attempt can attach it.
2990  * Drops and reacquires conn_ilg_lock.
2991  */
2992 static void
2993 ilg_attach(conn_t *connp, ilg_t *ilg, ill_t *ill)
2994 {
2995 	ilg_stat_t	ilgstat;
2996 	slist_t		*new_filter;
2997 	int		new_fmode;
2998 	in6_addr_t	v6group;
2999 	ipaddr_t	ifaddr;
3000 	uint_t		ifindex;
3001 	ilm_t		*ilm;
3002 	int		error = 0;
3003 
3004 	ASSERT(RW_WRITE_HELD(&connp->conn_ilg_lock));
3005 	/*
3006 	 * Alloc buffer to copy new state into (see below) before
3007 	 * we make any changes, so we can bail if it fails.
3008 	 */
3009 	if ((new_filter = l_alloc()) == NULL)
3010 		return;
3011 
3012 	/*
3013 	 * Save copy of ilg's filter state to pass to other functions, so
3014 	 * we can release conn_ilg_lock now.
3015 	 * Set ilg_ill so that an unplumb can find us.
3016 	 */
3017 	new_fmode = ilg->ilg_fmode;
3018 	l_copy(ilg->ilg_filter, new_filter);
3019 	v6group = ilg->ilg_v6group;
3020 	ifaddr = ilg->ilg_ifaddr;
3021 	ifindex = ilg->ilg_ifindex;
3022 	ilgstat = ILGSTAT_NEW;
3023 
3024 	ilg->ilg_ill = ill;
3025 	ASSERT(ilg->ilg_ilm == NULL);
3026 	rw_exit(&connp->conn_ilg_lock);
3027 
3028 	ilm = ip_addmulti_serial(&v6group, ill, connp->conn_zoneid, ilgstat,
3029 	    new_fmode, new_filter, &error);
3030 	l_free(new_filter);
3031 
3032 	rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3033 	/*
3034 	 * Must look up the ilg again since we've not been holding
3035 	 * conn_ilg_lock. The ilg could have disappeared due to an unplumb
3036 	 * having called conn_update_ill, which can run once we dropped the
3037 	 * conn_ilg_lock above.
3038 	 */
3039 	ilg = ilg_lookup(connp, &v6group, ifaddr, ifindex);
3040 	if (ilg == NULL) {
3041 		if (ilm != NULL) {
3042 			rw_exit(&connp->conn_ilg_lock);
3043 			(void) ip_delmulti_serial(ilm, B_FALSE,
3044 			    (ilgstat == ILGSTAT_NEW));
3045 			rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3046 		}
3047 		return;
3048 	}
3049 	if (ilm == NULL) {
3050 		ilg->ilg_ill = NULL;
3051 		return;
3052 	}
3053 	ASSERT(ilg->ilg_ilm == NULL);
3054 	ilg->ilg_ilm = ilm;
3055 	ilm->ilm_ifaddr = ifaddr;	/* For netstat */
3056 }
3057 
3058 /*
3059  * Called when an ill is unplumbed to make sure that there are no
3060  * dangling conn references to that ill. In that case ill is non-NULL and
3061  * we make sure we remove all references to it.
3062  * Also called when we should revisit the ilg_ill used for multicast
3063  * memberships, in which case ill is NULL.
3064  *
3065  * conn is held by caller.
3066  *
3067  * Note that ipcl_walk only walks conns that are not yet condemned.
3068  * condemned conns can't be refheld. For this reason, conn must become clean
3069  * first, i.e. it must not refer to any ill/ire and then only set
3070  * condemned flag.
3071  *
3072  * We leave ixa_multicast_ifindex in place. We prefer dropping
3073  * packets instead of sending them out the wrong interface.
3074  *
3075  * We keep the ilg around in a detached state (with ilg_ill and ilg_ilm being
3076  * NULL) so that the application can leave it later. Also, if ilg_ifaddr and
3077  * ilg_ifindex are zero, indicating that the system should pick the interface,
3078  * then we attempt to reselect the ill and join on it.
3079  *
3080  * Locking notes:
3081  * Under conn_ilg_lock we get the first ilg. As we drop the conn_ilg_lock to
3082  * proceed with the ilm part of the delete we hold a reference on both the ill
3083  * and the ilg. This doesn't prevent changes to the ilg, but prevents it from
3084  * being deleted.
3085  *
3086  * Note: if this function is called when new ill/ipif's arrive or change status
3087  * (SIOCSLIFINDEX, SIOCSLIFADDR) then we will attempt to attach any ilgs with
3088  * a NULL ilg_ill to an ill/ilm.
3089  */
3090 static void
3091 conn_update_ill(conn_t *connp, caddr_t arg)
3092 {
3093 	ill_t	*ill = (ill_t *)arg;
3094 
3095 	/*
3096 	 * We have to prevent ip_close/ilg_delete_all from running at
3097 	 * the same time. ip_close sets CONN_CLOSING before doing the ilg_delete
3098 	 * all, and we set CONN_UPDATE_ILL. That ensures that only one of
3099 	 * ilg_delete_all and conn_update_ill run at a time for a given conn.
3100 	 * If ilg_delete_all got here first, then we have nothing to do.
3101 	 */
3102 	mutex_enter(&connp->conn_lock);
3103 	if (connp->conn_state_flags & (CONN_CLOSING|CONN_UPDATE_ILL)) {
3104 		/* Caller has to wait for ill_ilm_cnt to drop to zero */
3105 		mutex_exit(&connp->conn_lock);
3106 		return;
3107 	}
3108 	connp->conn_state_flags |= CONN_UPDATE_ILL;
3109 	mutex_exit(&connp->conn_lock);
3110 
3111 	if (ill != NULL)
3112 		ilg_check_detach(connp, ill);
3113 
3114 	ilg_check_reattach(connp);
3115 
3116 	/* Do we need to wake up a thread in ilg_delete_all? */
3117 	mutex_enter(&connp->conn_lock);
3118 	connp->conn_state_flags &= ~CONN_UPDATE_ILL;
3119 	if (connp->conn_state_flags & CONN_CLOSING)
3120 		cv_broadcast(&connp->conn_cv);
3121 	mutex_exit(&connp->conn_lock);
3122 }
3123 
3124 /* Detach from an ill that is going away */
3125 static void
3126 ilg_check_detach(conn_t *connp, ill_t *ill)
3127 {
3128 	char	group_buf[INET6_ADDRSTRLEN];
3129 	ilg_t	*ilg, *held_ilg;
3130 	ilm_t	*ilm;
3131 
3132 	mutex_enter(&ill->ill_mcast_serializer);
3133 	rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3134 	held_ilg = NULL;
3135 	for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) {
3136 		if (ilg->ilg_condemned)
3137 			continue;
3138 
3139 		if (ilg->ilg_ill != ill)
3140 			continue;
3141 
3142 		/* Detach from current ill */
3143 		ip1dbg(("ilg_check_detach: detach %s on %s\n",
3144 		    inet_ntop(AF_INET6, &ilg->ilg_v6group,
3145 		    group_buf, sizeof (group_buf)),
3146 		    ilg->ilg_ill->ill_name));
3147 
3148 		/* Detach this ilg from the ill/ilm */
3149 		ilm = ilg->ilg_ilm;
3150 		ilg->ilg_ilm = NULL;
3151 		ilg->ilg_ill = NULL;
3152 		if (ilm == NULL)
3153 			continue;
3154 
3155 		/* Prevent ilg from disappearing */
3156 		ilg_transfer_hold(held_ilg, ilg);
3157 		held_ilg = ilg;
3158 		rw_exit(&connp->conn_ilg_lock);
3159 
3160 		(void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE);
3161 		rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3162 	}
3163 	if (held_ilg != NULL)
3164 		ilg_refrele(held_ilg);
3165 	rw_exit(&connp->conn_ilg_lock);
3166 	mutex_exit(&ill->ill_mcast_serializer);
3167 }
3168 
3169 /*
3170  * Check if there is a place to attach the conn_ilgs. We do this for both
3171  * detached ilgs and attached ones, since for the latter there could be
3172  * a better ill to attach them to.
3173  */
3174 static void
3175 ilg_check_reattach(conn_t *connp)
3176 {
3177 	ill_t	*ill;
3178 	char	group_buf[INET6_ADDRSTRLEN];
3179 	ilg_t	*ilg, *held_ilg;
3180 	ilm_t	*ilm;
3181 	zoneid_t zoneid = IPCL_ZONEID(connp);
3182 	int	error;
3183 	ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
3184 
3185 	rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3186 	held_ilg = NULL;
3187 	for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) {
3188 		if (ilg->ilg_condemned)
3189 			continue;
3190 
3191 		/* Check if the conn_ill matches what we would pick now */
3192 		ill = ill_mcast_lookup(&ilg->ilg_v6group, ilg->ilg_ifaddr,
3193 		    ilg->ilg_ifindex, zoneid, ipst, &error);
3194 
3195 		/*
3196 		 * Make sure the ill is usable for multicast and that
3197 		 * we can send the DL_ADDMULTI_REQ before we create an
3198 		 * ilm.
3199 		 */
3200 		if (ill != NULL &&
3201 		    (!(ill->ill_flags & ILLF_MULTICAST) || !ill->ill_dl_up)) {
3202 			/* Drop locks across ill_refrele */
3203 			ilg_transfer_hold(held_ilg, ilg);
3204 			held_ilg = ilg;
3205 			rw_exit(&connp->conn_ilg_lock);
3206 			ill_refrele(ill);
3207 			ill = NULL;
3208 			rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3209 			/* Note that ilg could have become condemned */
3210 		}
3211 
3212 		/* Is the ill unchanged, even if both are NULL? */
3213 		if (ill == ilg->ilg_ill) {
3214 			if (ill != NULL) {
3215 				/* Drop locks across ill_refrele */
3216 				ilg_transfer_hold(held_ilg, ilg);
3217 				held_ilg = ilg;
3218 				rw_exit(&connp->conn_ilg_lock);
3219 				ill_refrele(ill);
3220 				rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3221 			}
3222 			continue;
3223 		}
3224 
3225 		/* Something changed; detach from old first if needed */
3226 		if (ilg->ilg_ill != NULL) {
3227 			ill_t *ill2 = ilg->ilg_ill;
3228 			boolean_t need_refrele = B_FALSE;
3229 
3230 			/*
3231 			 * In order to serialize on the ill we try to enter
3232 			 * and if that fails we unlock and relock.
3233 			 */
3234 			if (!mutex_tryenter(&ill2->ill_mcast_serializer)) {
3235 				ill_refhold(ill2);
3236 				need_refrele = B_TRUE;
3237 				ilg_transfer_hold(held_ilg, ilg);
3238 				held_ilg = ilg;
3239 				rw_exit(&connp->conn_ilg_lock);
3240 				mutex_enter(&ill2->ill_mcast_serializer);
3241 				rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3242 				/* Note that ilg could have become condemned */
3243 			}
3244 			/*
3245 			 * Check that nobody else re-attached the ilg while we
3246 			 * dropped the lock.
3247 			 */
3248 			if (ilg->ilg_ill == ill2) {
3249 				ASSERT(!ilg->ilg_condemned);
3250 				/* Detach from current ill */
3251 				ip1dbg(("conn_check_reattach: detach %s/%s\n",
3252 				    inet_ntop(AF_INET6, &ilg->ilg_v6group,
3253 				    group_buf, sizeof (group_buf)),
3254 				    ill2->ill_name));
3255 
3256 				ilm = ilg->ilg_ilm;
3257 				ilg->ilg_ilm = NULL;
3258 				ilg->ilg_ill = NULL;
3259 			} else {
3260 				ilm = NULL;
3261 			}
3262 			rw_exit(&connp->conn_ilg_lock);
3263 			if (ilm != NULL)
3264 				(void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE);
3265 			mutex_exit(&ill2->ill_mcast_serializer);
3266 			if (need_refrele) {
3267 				/* Drop ill reference while we hold no locks */
3268 				ill_refrele(ill2);
3269 			}
3270 			rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3271 			/*
3272 			 * While we dropped conn_ilg_lock some other thread
3273 			 * could have attached this ilg, thus we check again.
3274 			 */
3275 			if (ilg->ilg_ill != NULL) {
3276 				if (ill != NULL) {
3277 					/* Drop locks across ill_refrele */
3278 					ilg_transfer_hold(held_ilg, ilg);
3279 					held_ilg = ilg;
3280 					rw_exit(&connp->conn_ilg_lock);
3281 					ill_refrele(ill);
3282 					rw_enter(&connp->conn_ilg_lock,
3283 					    RW_WRITER);
3284 				}
3285 				continue;
3286 			}
3287 		}
3288 		if (ill != NULL) {
3289 			/*
3290 			 * In order to serialize on the ill we try to enter
3291 			 * and if that fails we unlock and relock.
3292 			 */
3293 			if (!mutex_tryenter(&ill->ill_mcast_serializer)) {
3294 				/* Already have a refhold on ill */
3295 				ilg_transfer_hold(held_ilg, ilg);
3296 				held_ilg = ilg;
3297 				rw_exit(&connp->conn_ilg_lock);
3298 				mutex_enter(&ill->ill_mcast_serializer);
3299 				rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3300 				/* Note that ilg could have become condemned */
3301 			}
3302 
3303 			/*
3304 			 * Check that nobody else attached the ilg and that
3305 			 * it wasn't condemned while we dropped the lock.
3306 			 */
3307 			if (ilg->ilg_ill == NULL && !ilg->ilg_condemned) {
3308 				/*
3309 				 * Attach to the new ill. Can fail in which
3310 				 * case ilg_ill will remain NULL. ilg_attach
3311 				 * drops and reacquires conn_ilg_lock.
3312 				 */
3313 				ip1dbg(("conn_check_reattach: attach %s/%s\n",
3314 				    inet_ntop(AF_INET6, &ilg->ilg_v6group,
3315 				    group_buf, sizeof (group_buf)),
3316 				    ill->ill_name));
3317 				ilg_attach(connp, ilg, ill);
3318 				ASSERT(RW_WRITE_HELD(&connp->conn_ilg_lock));
3319 			}
3320 			mutex_exit(&ill->ill_mcast_serializer);
3321 			/* Drop locks across ill_refrele */
3322 			ilg_transfer_hold(held_ilg, ilg);
3323 			held_ilg = ilg;
3324 			rw_exit(&connp->conn_ilg_lock);
3325 			ill_refrele(ill);
3326 			rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3327 		}
3328 	}
3329 	if (held_ilg != NULL)
3330 		ilg_refrele(held_ilg);
3331 	rw_exit(&connp->conn_ilg_lock);
3332 }
3333 
3334 /*
3335  * Called when an ill is unplumbed to make sure that there are no
3336  * dangling conn references to that ill. In that case ill is non-NULL and
3337  * we make sure we remove all references to it.
3338  * Also called when we should revisit the ilg_ill used for multicast
3339  * memberships, in which case ill is NULL.
3340  */
3341 void
3342 update_conn_ill(ill_t *ill, ip_stack_t *ipst)
3343 {
3344 	ipcl_walk(conn_update_ill, (caddr_t)ill, ipst);
3345 }
3346