xref: /titanic_50/usr/src/uts/common/inet/ip/ip_multi.c (revision d3d50737e566cade9a08d73d2af95105ac7cd960)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /* Copyright (c) 1990 Mentat Inc. */
26 
27 #include <sys/types.h>
28 #include <sys/stream.h>
29 #include <sys/dlpi.h>
30 #include <sys/stropts.h>
31 #include <sys/strsun.h>
32 #include <sys/ddi.h>
33 #include <sys/cmn_err.h>
34 #include <sys/sdt.h>
35 #include <sys/zone.h>
36 
37 #include <sys/param.h>
38 #include <sys/socket.h>
39 #include <sys/sockio.h>
40 #include <net/if.h>
41 #include <sys/systm.h>
42 #include <sys/strsubr.h>
43 #include <net/route.h>
44 #include <netinet/in.h>
45 #include <net/if_dl.h>
46 #include <netinet/ip6.h>
47 #include <netinet/icmp6.h>
48 
49 #include <inet/common.h>
50 #include <inet/mi.h>
51 #include <inet/nd.h>
52 #include <inet/arp.h>
53 #include <inet/ip.h>
54 #include <inet/ip6.h>
55 #include <inet/ip_if.h>
56 #include <inet/ip_ndp.h>
57 #include <inet/ip_multi.h>
58 #include <inet/ipclassifier.h>
59 #include <inet/ipsec_impl.h>
60 #include <inet/sctp_ip.h>
61 #include <inet/ip_listutils.h>
62 #include <inet/udp_impl.h>
63 
64 /* igmpv3/mldv2 source filter manipulation */
65 static void	ilm_bld_flists(conn_t *conn, void *arg);
66 static void	ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode,
67     slist_t *flist);
68 
69 static ilm_t	*ilm_add(ill_t *ill, const in6_addr_t *group,
70     ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist,
71     zoneid_t zoneid);
72 static void	ilm_delete(ilm_t *ilm);
73 static int	ilm_numentries(ill_t *, const in6_addr_t *);
74 
75 static ilm_t	*ip_addmulti_serial(const in6_addr_t *, ill_t *, zoneid_t,
76     ilg_stat_t, mcast_record_t, slist_t *, int *);
77 static ilm_t	*ip_addmulti_impl(const in6_addr_t *, ill_t *,
78     zoneid_t, ilg_stat_t, mcast_record_t, slist_t *, int *);
79 static int	ip_delmulti_serial(ilm_t *, boolean_t, boolean_t);
80 static int	ip_delmulti_impl(ilm_t *, boolean_t, boolean_t);
81 
82 static int	ip_ll_multireq(ill_t *ill, const in6_addr_t *group,
83     t_uscalar_t);
84 static ilg_t	*ilg_lookup(conn_t *, const in6_addr_t *, ipaddr_t ifaddr,
85     uint_t ifindex);
86 
87 static int	ilg_add(conn_t *connp, const in6_addr_t *group,
88     ipaddr_t ifaddr, uint_t ifindex, ill_t *ill, mcast_record_t fmode,
89     const in6_addr_t *v6src);
90 static void	ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src);
91 static mblk_t	*ill_create_dl(ill_t *ill, uint32_t dl_primitive,
92     uint32_t *addr_lenp, uint32_t *addr_offp);
93 static int	ip_opt_delete_group_excl(conn_t *connp,
94     const in6_addr_t *v6group, ipaddr_t ifaddr, uint_t ifindex,
95     mcast_record_t fmode, const in6_addr_t *v6src);
96 
97 static	ilm_t	*ilm_lookup(ill_t *, const in6_addr_t *, zoneid_t);
98 
99 static int	ip_msfilter_ill(conn_t *, mblk_t *, const ip_ioctl_cmd_t *,
100     ill_t **);
101 
102 static void	ilg_check_detach(conn_t *, ill_t *);
103 static void	ilg_check_reattach(conn_t *);
104 
105 /*
106  * MT notes:
107  *
108  * Multicast joins operate on both the ilg and ilm structures. Multiple
109  * threads operating on an conn (socket) trying to do multicast joins
110  * need to synchronize when operating on the ilg. Multiple threads
111  * potentially operating on different conn (socket endpoints) trying to
112  * do multicast joins could eventually end up trying to manipulate the
113  * ilm simulatenously and need to synchronize on the access to the ilm.
114  * The access and lookup of the ilm, as well as other ill multicast state,
115  * is under ill_mcast_lock.
116  * The modifications and lookup of ilg entries is serialized using conn_ilg_lock
117  * rwlock. An ilg will not be freed until ilg_refcnt drops to zero.
118  *
119  * In some cases we hold ill_mcast_lock and then acquire conn_ilg_lock, but
120  * never the other way around.
121  *
122  * An ilm is an IP data structure used to track multicast join/leave.
123  * An ilm is associated with a <multicast group, ipif> tuple in IPv4 and
124  * with just <multicast group> in IPv6. ilm_refcnt is the number of ilg's
125  * referencing the ilm.
126  * The modifications and lookup of ilm entries is serialized using the
127  * ill_mcast_lock rwlock; that lock handles all the igmp/mld modifications
128  * of the ilm state.
129  * ilms are created / destroyed only as writer. ilms
130  * are not passed around. The datapath (anything outside of this file
131  * and igmp.c) use functions that do not return ilms - just the number
132  * of members. So we don't need a dynamic refcount of the number
133  * of threads holding reference to an ilm.
134  *
135  * In the cases where we serially access the ilg and ilm, which happens when
136  * we handle the applications requests to join or leave groups and sources,
137  * we use the ill_mcast_serializer mutex to ensure that a multithreaded
138  * application which does concurrent joins and/or leaves on the same group on
139  * the same socket always results in a consistent order for the ilg and ilm
140  * modifications.
141  *
142  * When a multicast operation results in needing to send a message to
143  * the driver (to join/leave a L2 multicast address), we use ill_dlpi_queue()
144  * which serialized the DLPI requests. The IGMP/MLD code uses ill_mcast_queue()
145  * to send IGMP/MLD IP packet to avoid dropping the lock just to send a packet.
146  */
147 
148 #define	GETSTRUCT(structure, number)	\
149 	((structure *)mi_zalloc(sizeof (structure) * (number)))
150 
151 /*
152  * Caller must ensure that the ilg has not been condemned
153  * The condemned flag is only set in ilg_delete under conn_ilg_lock.
154  *
155  * The caller must hold conn_ilg_lock as writer.
156  */
157 static void
158 ilg_refhold(ilg_t *ilg)
159 {
160 	ASSERT(ilg->ilg_refcnt != 0);
161 	ASSERT(!ilg->ilg_condemned);
162 	ASSERT(RW_WRITE_HELD(&ilg->ilg_connp->conn_ilg_lock));
163 
164 	ilg->ilg_refcnt++;
165 }
166 
167 static void
168 ilg_inactive(ilg_t *ilg)
169 {
170 	ASSERT(ilg->ilg_ill == NULL);
171 	ASSERT(ilg->ilg_ilm == NULL);
172 	ASSERT(ilg->ilg_filter == NULL);
173 	ASSERT(ilg->ilg_condemned);
174 
175 	/* Unlink from list */
176 	*ilg->ilg_ptpn = ilg->ilg_next;
177 	if (ilg->ilg_next != NULL)
178 		ilg->ilg_next->ilg_ptpn = ilg->ilg_ptpn;
179 	ilg->ilg_next = NULL;
180 	ilg->ilg_ptpn = NULL;
181 
182 	ilg->ilg_connp = NULL;
183 	kmem_free(ilg, sizeof (*ilg));
184 }
185 
186 /*
187  * The caller must hold conn_ilg_lock as writer.
188  */
189 static void
190 ilg_refrele(ilg_t *ilg)
191 {
192 	ASSERT(RW_WRITE_HELD(&ilg->ilg_connp->conn_ilg_lock));
193 	ASSERT(ilg->ilg_refcnt != 0);
194 	if (--ilg->ilg_refcnt == 0)
195 		ilg_inactive(ilg);
196 }
197 
198 /*
199  * Acquire reference on ilg and drop reference on held_ilg.
200  * In the case when held_ilg is the same as ilg we already have
201  * a reference, but the held_ilg might be condemned. In that case
202  * we avoid the ilg_refhold/rele so that we can assert in ire_refhold
203  * that the ilg isn't condemned.
204  */
205 static void
206 ilg_transfer_hold(ilg_t *held_ilg, ilg_t *ilg)
207 {
208 	if (held_ilg == ilg)
209 		return;
210 
211 	ilg_refhold(ilg);
212 	if (held_ilg != NULL)
213 		ilg_refrele(held_ilg);
214 }
215 
216 /*
217  * Allocate a new ilg_t and links it into conn_ilg.
218  * Returns NULL on failure, in which case `*errp' will be
219  * filled in with the reason.
220  *
221  * Assumes connp->conn_ilg_lock is held.
222  */
223 static ilg_t *
224 conn_ilg_alloc(conn_t *connp, int *errp)
225 {
226 	ilg_t *ilg;
227 
228 	ASSERT(RW_WRITE_HELD(&connp->conn_ilg_lock));
229 
230 	/*
231 	 * If CONN_CLOSING is set, conn_ilg cleanup has begun and we must not
232 	 * create any ilgs.
233 	 */
234 	if (connp->conn_state_flags & CONN_CLOSING) {
235 		*errp = EINVAL;
236 		return (NULL);
237 	}
238 
239 	ilg = kmem_zalloc(sizeof (ilg_t), KM_NOSLEEP);
240 	if (ilg == NULL) {
241 		*errp = ENOMEM;
242 		return (NULL);
243 	}
244 
245 	ilg->ilg_refcnt = 1;
246 
247 	/* Insert at head */
248 	if (connp->conn_ilg != NULL)
249 		connp->conn_ilg->ilg_ptpn = &ilg->ilg_next;
250 	ilg->ilg_next = connp->conn_ilg;
251 	ilg->ilg_ptpn = &connp->conn_ilg;
252 	connp->conn_ilg = ilg;
253 
254 	ilg->ilg_connp = connp;
255 	return (ilg);
256 }
257 
258 typedef struct ilm_fbld_s {
259 	ilm_t		*fbld_ilm;
260 	int		fbld_in_cnt;
261 	int		fbld_ex_cnt;
262 	slist_t		fbld_in;
263 	slist_t		fbld_ex;
264 	boolean_t	fbld_in_overflow;
265 } ilm_fbld_t;
266 
267 /*
268  * Caller must hold ill_mcast_lock
269  */
270 static void
271 ilm_bld_flists(conn_t *connp, void *arg)
272 {
273 	ilg_t *ilg;
274 	ilm_fbld_t *fbld = (ilm_fbld_t *)(arg);
275 	ilm_t *ilm = fbld->fbld_ilm;
276 	in6_addr_t *v6group = &ilm->ilm_v6addr;
277 
278 	if (connp->conn_ilg == NULL)
279 		return;
280 
281 	/*
282 	 * Since we can't break out of the ipcl_walk once started, we still
283 	 * have to look at every conn.  But if we've already found one
284 	 * (EXCLUDE, NULL) list, there's no need to keep checking individual
285 	 * ilgs--that will be our state.
286 	 */
287 	if (fbld->fbld_ex_cnt > 0 && fbld->fbld_ex.sl_numsrc == 0)
288 		return;
289 
290 	/*
291 	 * Check this conn's ilgs to see if any are interested in our
292 	 * ilm (group, interface match).  If so, update the master
293 	 * include and exclude lists we're building in the fbld struct
294 	 * with this ilg's filter info.
295 	 *
296 	 * Note that the caller has already serialized on the ill we care
297 	 * about.
298 	 */
299 	ASSERT(MUTEX_HELD(&ilm->ilm_ill->ill_mcast_serializer));
300 
301 	rw_enter(&connp->conn_ilg_lock, RW_READER);
302 	for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) {
303 		if (ilg->ilg_condemned)
304 			continue;
305 
306 		/*
307 		 * Since we are under the ill_mcast_serializer we know
308 		 * that any ilg+ilm operations on this ilm have either
309 		 * not started or completed, except for the last ilg
310 		 * (the one that caused us to be called) which doesn't
311 		 * have ilg_ilm set yet. Hence we compare using ilg_ill
312 		 * and the address.
313 		 */
314 		if ((ilg->ilg_ill == ilm->ilm_ill) &&
315 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) {
316 			if (ilg->ilg_fmode == MODE_IS_INCLUDE) {
317 				fbld->fbld_in_cnt++;
318 				if (!fbld->fbld_in_overflow)
319 					l_union_in_a(&fbld->fbld_in,
320 					    ilg->ilg_filter,
321 					    &fbld->fbld_in_overflow);
322 			} else {
323 				fbld->fbld_ex_cnt++;
324 				/*
325 				 * On the first exclude list, don't try to do
326 				 * an intersection, as the master exclude list
327 				 * is intentionally empty.  If the master list
328 				 * is still empty on later iterations, that
329 				 * means we have at least one ilg with an empty
330 				 * exclude list, so that should be reflected
331 				 * when we take the intersection.
332 				 */
333 				if (fbld->fbld_ex_cnt == 1) {
334 					if (ilg->ilg_filter != NULL)
335 						l_copy(ilg->ilg_filter,
336 						    &fbld->fbld_ex);
337 				} else {
338 					l_intersection_in_a(&fbld->fbld_ex,
339 					    ilg->ilg_filter);
340 				}
341 			}
342 			/* there will only be one match, so break now. */
343 			break;
344 		}
345 	}
346 	rw_exit(&connp->conn_ilg_lock);
347 }
348 
349 /*
350  * Caller must hold ill_mcast_lock
351  */
352 static void
353 ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, slist_t *flist)
354 {
355 	ilm_fbld_t fbld;
356 	ip_stack_t *ipst = ilm->ilm_ipst;
357 
358 	fbld.fbld_ilm = ilm;
359 	fbld.fbld_in_cnt = fbld.fbld_ex_cnt = 0;
360 	fbld.fbld_in.sl_numsrc = fbld.fbld_ex.sl_numsrc = 0;
361 	fbld.fbld_in_overflow = B_FALSE;
362 
363 	/* first, construct our master include and exclude lists */
364 	ipcl_walk(ilm_bld_flists, (caddr_t)&fbld, ipst);
365 
366 	/* now use those master lists to generate the interface filter */
367 
368 	/* if include list overflowed, filter is (EXCLUDE, NULL) */
369 	if (fbld.fbld_in_overflow) {
370 		*fmode = MODE_IS_EXCLUDE;
371 		flist->sl_numsrc = 0;
372 		return;
373 	}
374 
375 	/* if nobody interested, interface filter is (INCLUDE, NULL) */
376 	if (fbld.fbld_in_cnt == 0 && fbld.fbld_ex_cnt == 0) {
377 		*fmode = MODE_IS_INCLUDE;
378 		flist->sl_numsrc = 0;
379 		return;
380 	}
381 
382 	/*
383 	 * If there are no exclude lists, then the interface filter
384 	 * is INCLUDE, with its filter list equal to fbld_in.  A single
385 	 * exclude list makes the interface filter EXCLUDE, with its
386 	 * filter list equal to (fbld_ex - fbld_in).
387 	 */
388 	if (fbld.fbld_ex_cnt == 0) {
389 		*fmode = MODE_IS_INCLUDE;
390 		l_copy(&fbld.fbld_in, flist);
391 	} else {
392 		*fmode = MODE_IS_EXCLUDE;
393 		l_difference(&fbld.fbld_ex, &fbld.fbld_in, flist);
394 	}
395 }
396 
397 /*
398  * Caller must hold ill_mcast_lock
399  */
400 static int
401 ilm_update_add(ilm_t *ilm, ilg_stat_t ilgstat, slist_t *ilg_flist)
402 {
403 	mcast_record_t fmode;
404 	slist_t *flist;
405 	boolean_t fdefault;
406 	char buf[INET6_ADDRSTRLEN];
407 	ill_t *ill = ilm->ilm_ill;
408 
409 	/*
410 	 * There are several cases where the ilm's filter state
411 	 * defaults to (EXCLUDE, NULL):
412 	 *	- we've had previous joins without associated ilgs
413 	 *	- this join has no associated ilg
414 	 *	- the ilg's filter state is (EXCLUDE, NULL)
415 	 */
416 	fdefault = (ilm->ilm_no_ilg_cnt > 0) ||
417 	    (ilgstat == ILGSTAT_NONE) || SLIST_IS_EMPTY(ilg_flist);
418 
419 	/* attempt mallocs (if needed) before doing anything else */
420 	if ((flist = l_alloc()) == NULL)
421 		return (ENOMEM);
422 	if (!fdefault && ilm->ilm_filter == NULL) {
423 		ilm->ilm_filter = l_alloc();
424 		if (ilm->ilm_filter == NULL) {
425 			l_free(flist);
426 			return (ENOMEM);
427 		}
428 	}
429 
430 	if (ilgstat != ILGSTAT_CHANGE)
431 		ilm->ilm_refcnt++;
432 
433 	if (ilgstat == ILGSTAT_NONE)
434 		ilm->ilm_no_ilg_cnt++;
435 
436 	/*
437 	 * Determine new filter state.  If it's not the default
438 	 * (EXCLUDE, NULL), we must walk the conn list to find
439 	 * any ilgs interested in this group, and re-build the
440 	 * ilm filter.
441 	 */
442 	if (fdefault) {
443 		fmode = MODE_IS_EXCLUDE;
444 		flist->sl_numsrc = 0;
445 	} else {
446 		ilm_gen_filter(ilm, &fmode, flist);
447 	}
448 
449 	/* make sure state actually changed; nothing to do if not. */
450 	if ((ilm->ilm_fmode == fmode) &&
451 	    !lists_are_different(ilm->ilm_filter, flist)) {
452 		l_free(flist);
453 		return (0);
454 	}
455 
456 	/* send the state change report */
457 	if (!IS_LOOPBACK(ill)) {
458 		if (ill->ill_isv6)
459 			mld_statechange(ilm, fmode, flist);
460 		else
461 			igmp_statechange(ilm, fmode, flist);
462 	}
463 
464 	/* update the ilm state */
465 	ilm->ilm_fmode = fmode;
466 	if (flist->sl_numsrc > 0)
467 		l_copy(flist, ilm->ilm_filter);
468 	else
469 		CLEAR_SLIST(ilm->ilm_filter);
470 
471 	ip1dbg(("ilm_update: new if filter mode %d, group %s\n", ilm->ilm_fmode,
472 	    inet_ntop(AF_INET6, &ilm->ilm_v6addr, buf, sizeof (buf))));
473 
474 	l_free(flist);
475 	return (0);
476 }
477 
478 /*
479  * Caller must hold ill_mcast_lock
480  */
481 static int
482 ilm_update_del(ilm_t *ilm)
483 {
484 	mcast_record_t fmode;
485 	slist_t *flist;
486 	ill_t *ill = ilm->ilm_ill;
487 
488 	ip1dbg(("ilm_update_del: still %d left; updating state\n",
489 	    ilm->ilm_refcnt));
490 
491 	if ((flist = l_alloc()) == NULL)
492 		return (ENOMEM);
493 
494 	/*
495 	 * If present, the ilg in question has already either been
496 	 * updated or removed from our list; so all we need to do
497 	 * now is walk the list to update the ilm filter state.
498 	 *
499 	 * Skip the list walk if we have any no-ilg joins, which
500 	 * cause the filter state to revert to (EXCLUDE, NULL).
501 	 */
502 	if (ilm->ilm_no_ilg_cnt != 0) {
503 		fmode = MODE_IS_EXCLUDE;
504 		flist->sl_numsrc = 0;
505 	} else {
506 		ilm_gen_filter(ilm, &fmode, flist);
507 	}
508 
509 	/* check to see if state needs to be updated */
510 	if ((ilm->ilm_fmode == fmode) &&
511 	    (!lists_are_different(ilm->ilm_filter, flist))) {
512 		l_free(flist);
513 		return (0);
514 	}
515 
516 	if (!IS_LOOPBACK(ill)) {
517 		if (ill->ill_isv6)
518 			mld_statechange(ilm, fmode, flist);
519 		else
520 			igmp_statechange(ilm, fmode, flist);
521 	}
522 
523 	ilm->ilm_fmode = fmode;
524 	if (flist->sl_numsrc > 0) {
525 		if (ilm->ilm_filter == NULL) {
526 			ilm->ilm_filter = l_alloc();
527 			if (ilm->ilm_filter == NULL) {
528 				char buf[INET6_ADDRSTRLEN];
529 				ip1dbg(("ilm_update_del: failed to alloc ilm "
530 				    "filter; no source filtering for %s on %s",
531 				    inet_ntop(AF_INET6, &ilm->ilm_v6addr,
532 				    buf, sizeof (buf)), ill->ill_name));
533 				ilm->ilm_fmode = MODE_IS_EXCLUDE;
534 				l_free(flist);
535 				return (0);
536 			}
537 		}
538 		l_copy(flist, ilm->ilm_filter);
539 	} else {
540 		CLEAR_SLIST(ilm->ilm_filter);
541 	}
542 
543 	l_free(flist);
544 	return (0);
545 }
546 
547 /*
548  * Create/update the ilm for the group/ill. Used by other parts of IP to
549  * do the ILGSTAT_NONE (no ilg), MODE_IS_EXCLUDE, with no slist join.
550  * Returns with a refhold on the ilm.
551  *
552  * The unspecified address means all multicast addresses for in both the
553  * case of IPv4 and IPv6.
554  *
555  * The caller should have already mapped an IPMP under ill to the upper.
556  */
557 ilm_t *
558 ip_addmulti(const in6_addr_t *v6group, ill_t *ill, zoneid_t zoneid,
559     int *errorp)
560 {
561 	ilm_t *ilm;
562 
563 	/* Acquire serializer to keep assert in ilm_bld_flists happy */
564 	mutex_enter(&ill->ill_mcast_serializer);
565 	ilm = ip_addmulti_serial(v6group, ill, zoneid, ILGSTAT_NONE,
566 	    MODE_IS_EXCLUDE, NULL, errorp);
567 	mutex_exit(&ill->ill_mcast_serializer);
568 	return (ilm);
569 }
570 
571 /*
572  * Create/update the ilm for the group/ill. If ILGSTAT_CHANGE is not set
573  * then this returns with a refhold on the ilm.
574  *
575  * Internal routine which assumes the caller has already acquired
576  * ill_multi_serializer.
577  *
578  * The unspecified address means all multicast addresses for in both the
579  * case of IPv4 and IPv6.
580  *
581  * ilgstat tells us if there's an ilg associated with this join,
582  * and if so, if it's a new ilg or a change to an existing one.
583  * ilg_fmode and ilg_flist give us the current filter state of
584  * the ilg (and will be EXCLUDE {NULL} in the case of no ilg).
585  *
586  * The caller should have already mapped an IPMP under ill to the upper.
587  */
588 static ilm_t *
589 ip_addmulti_serial(const in6_addr_t *v6group, ill_t *ill, zoneid_t zoneid,
590     ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist,
591     int *errorp)
592 {
593 	ilm_t *ilm;
594 
595 	ASSERT(MUTEX_HELD(&ill->ill_mcast_serializer));
596 
597 	if (ill->ill_isv6) {
598 		if (!IN6_IS_ADDR_MULTICAST(v6group) &&
599 		    !IN6_IS_ADDR_UNSPECIFIED(v6group)) {
600 			*errorp = EINVAL;
601 			return (NULL);
602 		}
603 	} else {
604 		if (IN6_IS_ADDR_V4MAPPED(v6group)) {
605 			ipaddr_t v4group;
606 
607 			IN6_V4MAPPED_TO_IPADDR(v6group, v4group);
608 			if (!CLASSD(v4group)) {
609 				*errorp = EINVAL;
610 				return (NULL);
611 			}
612 		} else if (!IN6_IS_ADDR_UNSPECIFIED(v6group)) {
613 			*errorp = EINVAL;
614 			return (NULL);
615 		}
616 	}
617 
618 	if (IS_UNDER_IPMP(ill)) {
619 		*errorp = EINVAL;
620 		return (NULL);
621 	}
622 
623 	rw_enter(&ill->ill_mcast_lock, RW_WRITER);
624 	/*
625 	 * We do the equivalent of a lookup by checking after we get the lock
626 	 * This is needed since the ill could have been condemned after
627 	 * we looked it up, and we need to check condemned after we hold
628 	 * ill_mcast_lock to synchronize with the unplumb code.
629 	 */
630 	if (ill->ill_state_flags & ILL_CONDEMNED) {
631 		rw_exit(&ill->ill_mcast_lock);
632 		*errorp = ENXIO;
633 		return (NULL);
634 	}
635 	ilm = ip_addmulti_impl(v6group, ill, zoneid, ilgstat, ilg_fmode,
636 	    ilg_flist, errorp);
637 	rw_exit(&ill->ill_mcast_lock);
638 
639 	/* Send any deferred/queued DLPI or IP packets */
640 	ill_mcast_send_queued(ill);
641 	ill_dlpi_send_queued(ill);
642 	ill_mcast_timer_start(ill->ill_ipst);
643 	return (ilm);
644 }
645 
646 static ilm_t *
647 ip_addmulti_impl(const in6_addr_t *v6group, ill_t *ill, zoneid_t zoneid,
648     ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist,
649     int *errorp)
650 {
651 	ilm_t	*ilm;
652 	int	ret = 0;
653 
654 	ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
655 	*errorp = 0;
656 
657 	/*
658 	 * An ilm is uniquely identified by the tuple of (group, ill) where
659 	 * `group' is the multicast group address, and `ill' is the interface
660 	 * on which it is currently joined.
661 	 */
662 
663 	ilm = ilm_lookup(ill, v6group, zoneid);
664 	if (ilm != NULL) {
665 		/* ilm_update_add bumps ilm_refcnt unless ILGSTAT_CHANGE */
666 		ret = ilm_update_add(ilm, ilgstat, ilg_flist);
667 		if (ret == 0)
668 			return (ilm);
669 
670 		*errorp = ret;
671 		return (NULL);
672 	}
673 
674 	/*
675 	 * The callers checks on the ilg and the ilg+ilm consistency under
676 	 * ill_mcast_serializer ensures that we can not have ILGSTAT_CHANGE
677 	 * and no ilm.
678 	 */
679 	ASSERT(ilgstat != ILGSTAT_CHANGE);
680 	ilm = ilm_add(ill, v6group, ilgstat, ilg_fmode, ilg_flist, zoneid);
681 	if (ilm == NULL) {
682 		*errorp = ENOMEM;
683 		return (NULL);
684 	}
685 
686 	if (IN6_IS_ADDR_UNSPECIFIED(v6group)) {
687 		/*
688 		 * If we have more then one we should not tell the driver
689 		 * to join this time.
690 		 */
691 		if (ilm_numentries(ill, v6group) == 1) {
692 			ret = ill_join_allmulti(ill);
693 		}
694 	} else {
695 		if (!IS_LOOPBACK(ill)) {
696 			if (ill->ill_isv6)
697 				mld_joingroup(ilm);
698 			else
699 				igmp_joingroup(ilm);
700 		}
701 
702 		/*
703 		 * If we have more then one we should not tell the driver
704 		 * to join this time.
705 		 */
706 		if (ilm_numentries(ill, v6group) == 1) {
707 			ret = ip_ll_multireq(ill, v6group, DL_ENABMULTI_REQ);
708 		}
709 	}
710 	if (ret != 0) {
711 		if (ret == ENETDOWN) {
712 			char buf[INET6_ADDRSTRLEN];
713 
714 			ip0dbg(("ip_addmulti: ENETDOWN for %s on %s",
715 			    inet_ntop(AF_INET6, &ilm->ilm_v6addr,
716 			    buf, sizeof (buf)), ill->ill_name));
717 		}
718 		ilm_delete(ilm);
719 		*errorp = ret;
720 		return (NULL);
721 	} else {
722 		return (ilm);
723 	}
724 }
725 
726 /*
727  * Send a multicast request to the driver for enabling or disabling
728  * multicast reception for v6groupp address. The caller has already
729  * checked whether it is appropriate to send one or not.
730  *
731  * For IPMP we switch to the cast_ill since it has the right hardware
732  * information.
733  */
734 static int
735 ip_ll_send_multireq(ill_t *ill, const in6_addr_t *v6groupp, t_uscalar_t prim)
736 {
737 	mblk_t	*mp;
738 	uint32_t addrlen, addroff;
739 	ill_t *release_ill = NULL;
740 	int err = 0;
741 
742 	ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock));
743 
744 	if (IS_IPMP(ill)) {
745 		/* On the upper IPMP ill. */
746 		release_ill = ipmp_illgrp_hold_cast_ill(ill->ill_grp);
747 		if (release_ill == NULL) {
748 			/*
749 			 * Avoid sending it down to the ipmpstub.
750 			 * We will be called again once the members of the
751 			 * group are in place
752 			 */
753 			ip1dbg(("ip_ll_send_multireq: no cast_ill for %s %d\n",
754 			    ill->ill_name, ill->ill_isv6));
755 			return (0);
756 		}
757 		ill = release_ill;
758 	}
759 	/* Create a DL_ENABMULTI_REQ or DL_DISABMULTI_REQ message. */
760 	mp = ill_create_dl(ill, prim, &addrlen, &addroff);
761 	if (mp == NULL) {
762 		err = ENOMEM;
763 		goto done;
764 	}
765 
766 	mp = ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp);
767 	if (mp == NULL) {
768 		ip0dbg(("null from ndp_mcastreq(ill %s)\n", ill->ill_name));
769 		err = ENOMEM;
770 		goto done;
771 	}
772 
773 	switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) {
774 	case DL_ENABMULTI_REQ:
775 		mutex_enter(&ill->ill_lock);
776 		/* Track the state if this is the first enabmulti */
777 		if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN)
778 			ill->ill_dlpi_multicast_state = IDS_INPROGRESS;
779 		mutex_exit(&ill->ill_lock);
780 		break;
781 	}
782 	ill_dlpi_queue(ill, mp);
783 done:
784 	if (release_ill != NULL)
785 		ill_refrele(release_ill);
786 	return (err);
787 }
788 
789 /*
790  * Send a multicast request to the driver for enabling multicast
791  * membership for v6group if appropriate.
792  */
793 static int
794 ip_ll_multireq(ill_t *ill, const in6_addr_t *v6groupp, t_uscalar_t prim)
795 {
796 	if (ill->ill_net_type != IRE_IF_RESOLVER ||
797 	    ill->ill_ipif->ipif_flags & IPIF_POINTOPOINT) {
798 		ip1dbg(("ip_ll_multireq: not resolver\n"));
799 		return (0);	/* Must be IRE_IF_NORESOLVER */
800 	}
801 
802 	if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) {
803 		ip1dbg(("ip_ll_multireq: MULTI_BCAST\n"));
804 		return (0);
805 	}
806 	return (ip_ll_send_multireq(ill, v6groupp, prim));
807 }
808 
809 /*
810  * Delete the ilm. Used by other parts of IP for the case of no_ilg/leaving
811  * being true.
812  */
813 int
814 ip_delmulti(ilm_t *ilm)
815 {
816 	ill_t *ill = ilm->ilm_ill;
817 	int error;
818 
819 	/* Acquire serializer to keep assert in ilm_bld_flists happy */
820 	mutex_enter(&ill->ill_mcast_serializer);
821 	error = ip_delmulti_serial(ilm, B_TRUE, B_TRUE);
822 	mutex_exit(&ill->ill_mcast_serializer);
823 	return (error);
824 }
825 
826 
827 /*
828  * Delete the ilm.
829  * Assumes ill_multi_serializer is held by the caller.
830  */
831 static int
832 ip_delmulti_serial(ilm_t *ilm, boolean_t no_ilg, boolean_t leaving)
833 {
834 	ill_t *ill = ilm->ilm_ill;
835 	int ret;
836 
837 	ASSERT(MUTEX_HELD(&ill->ill_mcast_serializer));
838 	ASSERT(!(IS_UNDER_IPMP(ill)));
839 
840 	rw_enter(&ill->ill_mcast_lock, RW_WRITER);
841 	ret = ip_delmulti_impl(ilm, no_ilg, leaving);
842 	rw_exit(&ill->ill_mcast_lock);
843 	/* Send any deferred/queued DLPI or IP packets */
844 	ill_mcast_send_queued(ill);
845 	ill_dlpi_send_queued(ill);
846 	ill_mcast_timer_start(ill->ill_ipst);
847 
848 	return (ret);
849 }
850 
851 static int
852 ip_delmulti_impl(ilm_t *ilm, boolean_t no_ilg, boolean_t leaving)
853 {
854 	ill_t *ill = ilm->ilm_ill;
855 	int error;
856 	in6_addr_t v6group;
857 
858 	ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
859 
860 	/* Update counters */
861 	if (no_ilg)
862 		ilm->ilm_no_ilg_cnt--;
863 
864 	if (leaving)
865 		ilm->ilm_refcnt--;
866 
867 	if (ilm->ilm_refcnt > 0)
868 		return (ilm_update_del(ilm));
869 
870 	v6group = ilm->ilm_v6addr;
871 
872 	if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
873 		ilm_delete(ilm);
874 		/*
875 		 * If we have some left then one we should not tell the driver
876 		 * to leave.
877 		 */
878 		if (ilm_numentries(ill, &v6group) != 0)
879 			return (0);
880 
881 		ill_leave_allmulti(ill);
882 
883 		return (0);
884 	}
885 
886 	if (!IS_LOOPBACK(ill)) {
887 		if (ill->ill_isv6)
888 			mld_leavegroup(ilm);
889 		else
890 			igmp_leavegroup(ilm);
891 	}
892 
893 	ilm_delete(ilm);
894 	/*
895 	 * If we have some left then one we should not tell the driver
896 	 * to leave.
897 	 */
898 	if (ilm_numentries(ill, &v6group) != 0)
899 		return (0);
900 
901 	error = ip_ll_multireq(ill, &v6group, DL_DISABMULTI_REQ);
902 	/* We ignore the case when ill_dl_up is not set */
903 	if (error == ENETDOWN) {
904 		char buf[INET6_ADDRSTRLEN];
905 
906 		ip0dbg(("ip_delmulti: ENETDOWN for %s on %s",
907 		    inet_ntop(AF_INET6, &v6group, buf, sizeof (buf)),
908 		    ill->ill_name));
909 	}
910 	return (error);
911 }
912 
913 /*
914  * Make the driver pass up all multicast packets.
915  */
916 int
917 ill_join_allmulti(ill_t *ill)
918 {
919 	mblk_t		*promiscon_mp, *promiscoff_mp = NULL;
920 	uint32_t	addrlen, addroff;
921 	ill_t		*release_ill = NULL;
922 
923 	ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
924 
925 	if (!ill->ill_dl_up) {
926 		/*
927 		 * Nobody there. All multicast addresses will be re-joined
928 		 * when we get the DL_BIND_ACK bringing the interface up.
929 		 */
930 		return (ENETDOWN);
931 	}
932 
933 	if (IS_IPMP(ill)) {
934 		/* On the upper IPMP ill. */
935 		release_ill = ipmp_illgrp_hold_cast_ill(ill->ill_grp);
936 		if (release_ill == NULL) {
937 			/*
938 			 * Avoid sending it down to the ipmpstub.
939 			 * We will be called again once the members of the
940 			 * group are in place
941 			 */
942 			ip1dbg(("ill_join_allmulti: no cast_ill for %s %d\n",
943 			    ill->ill_name, ill->ill_isv6));
944 			return (0);
945 		}
946 		ill = release_ill;
947 		if (!ill->ill_dl_up) {
948 			ill_refrele(ill);
949 			return (ENETDOWN);
950 		}
951 	}
952 
953 	/*
954 	 * Create a DL_PROMISCON_REQ message and send it directly to the DLPI
955 	 * provider.  We don't need to do this for certain media types for
956 	 * which we never need to turn promiscuous mode on.  While we're here,
957 	 * pre-allocate a DL_PROMISCOFF_REQ message to make sure that
958 	 * ill_leave_allmulti() will not fail due to low memory conditions.
959 	 */
960 	if ((ill->ill_net_type == IRE_IF_RESOLVER) &&
961 	    !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) {
962 		promiscon_mp = ill_create_dl(ill, DL_PROMISCON_REQ,
963 		    &addrlen, &addroff);
964 		if (ill->ill_promiscoff_mp == NULL)
965 			promiscoff_mp = ill_create_dl(ill, DL_PROMISCOFF_REQ,
966 			    &addrlen, &addroff);
967 		if (promiscon_mp == NULL ||
968 		    (ill->ill_promiscoff_mp == NULL && promiscoff_mp == NULL)) {
969 			freemsg(promiscon_mp);
970 			freemsg(promiscoff_mp);
971 			if (release_ill != NULL)
972 				ill_refrele(release_ill);
973 			return (ENOMEM);
974 		}
975 		if (ill->ill_promiscoff_mp == NULL)
976 			ill->ill_promiscoff_mp = promiscoff_mp;
977 		ill_dlpi_queue(ill, promiscon_mp);
978 	}
979 	if (release_ill != NULL)
980 		ill_refrele(release_ill);
981 	return (0);
982 }
983 
984 /*
985  * Make the driver stop passing up all multicast packets
986  */
987 void
988 ill_leave_allmulti(ill_t *ill)
989 {
990 	mblk_t	*promiscoff_mp;
991 	ill_t	*release_ill = NULL;
992 
993 	ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
994 
995 	if (!ill->ill_dl_up) {
996 		/*
997 		 * Nobody there. All multicast addresses will be re-joined
998 		 * when we get the DL_BIND_ACK bringing the interface up.
999 		 */
1000 		return;
1001 	}
1002 
1003 	if (IS_IPMP(ill)) {
1004 		/* On the upper IPMP ill. */
1005 		release_ill = ipmp_illgrp_hold_cast_ill(ill->ill_grp);
1006 		if (release_ill == NULL) {
1007 			/*
1008 			 * Avoid sending it down to the ipmpstub.
1009 			 * We will be called again once the members of the
1010 			 * group are in place
1011 			 */
1012 			ip1dbg(("ill_leave_allmulti: no cast_ill on %s %d\n",
1013 			    ill->ill_name, ill->ill_isv6));
1014 			return;
1015 		}
1016 		ill = release_ill;
1017 		if (!ill->ill_dl_up)
1018 			goto done;
1019 	}
1020 
1021 	/*
1022 	 * In the case of IPMP and ill_dl_up not being set when we joined
1023 	 * we didn't allocate a promiscoff_mp. In that case we have
1024 	 * nothing to do when we leave.
1025 	 * Ditto for PHYI_MULTI_BCAST
1026 	 */
1027 	promiscoff_mp = ill->ill_promiscoff_mp;
1028 	if (promiscoff_mp != NULL) {
1029 		ill->ill_promiscoff_mp = NULL;
1030 		ill_dlpi_queue(ill, promiscoff_mp);
1031 	}
1032 done:
1033 	if (release_ill != NULL)
1034 		ill_refrele(release_ill);
1035 }
1036 
1037 int
1038 ip_join_allmulti(uint_t ifindex, boolean_t isv6, ip_stack_t *ipst)
1039 {
1040 	ill_t		*ill;
1041 	int		ret;
1042 	ilm_t		*ilm;
1043 
1044 	ill = ill_lookup_on_ifindex(ifindex, isv6, ipst);
1045 	if (ill == NULL)
1046 		return (ENODEV);
1047 
1048 	/*
1049 	 * The ip_addmulti() function doesn't allow IPMP underlying interfaces
1050 	 * to join allmulti since only the nominated underlying interface in
1051 	 * the group should receive multicast.  We silently succeed to avoid
1052 	 * having to teach IPobs (currently the only caller of this routine)
1053 	 * to ignore failures in this case.
1054 	 */
1055 	if (IS_UNDER_IPMP(ill)) {
1056 		ill_refrele(ill);
1057 		return (0);
1058 	}
1059 	mutex_enter(&ill->ill_lock);
1060 	if (ill->ill_ipallmulti_cnt > 0) {
1061 		/* Already joined */
1062 		ASSERT(ill->ill_ipallmulti_ilm != NULL);
1063 		ill->ill_ipallmulti_cnt++;
1064 		mutex_exit(&ill->ill_lock);
1065 		goto done;
1066 	}
1067 	mutex_exit(&ill->ill_lock);
1068 
1069 	ilm = ip_addmulti(&ipv6_all_zeros, ill, ill->ill_zoneid, &ret);
1070 	if (ilm == NULL) {
1071 		ASSERT(ret != 0);
1072 		ill_refrele(ill);
1073 		return (ret);
1074 	}
1075 
1076 	mutex_enter(&ill->ill_lock);
1077 	if (ill->ill_ipallmulti_cnt > 0) {
1078 		/* Another thread added it concurrently */
1079 		(void) ip_delmulti(ilm);
1080 		mutex_exit(&ill->ill_lock);
1081 		goto done;
1082 	}
1083 	ASSERT(ill->ill_ipallmulti_ilm == NULL);
1084 	ill->ill_ipallmulti_ilm = ilm;
1085 	ill->ill_ipallmulti_cnt++;
1086 	mutex_exit(&ill->ill_lock);
1087 done:
1088 	ill_refrele(ill);
1089 	return (0);
1090 }
1091 
1092 int
1093 ip_leave_allmulti(uint_t ifindex, boolean_t isv6, ip_stack_t *ipst)
1094 {
1095 	ill_t		*ill;
1096 	ilm_t		*ilm;
1097 
1098 	ill = ill_lookup_on_ifindex(ifindex, isv6, ipst);
1099 	if (ill == NULL)
1100 		return (ENODEV);
1101 
1102 	if (IS_UNDER_IPMP(ill)) {
1103 		ill_refrele(ill);
1104 		return (0);
1105 	}
1106 
1107 	mutex_enter(&ill->ill_lock);
1108 	if (ill->ill_ipallmulti_cnt == 0) {
1109 		/* ip_purge_allmulti could have removed them all */
1110 		mutex_exit(&ill->ill_lock);
1111 		goto done;
1112 	}
1113 	ill->ill_ipallmulti_cnt--;
1114 	if (ill->ill_ipallmulti_cnt == 0) {
1115 		/* Last one */
1116 		ilm = ill->ill_ipallmulti_ilm;
1117 		ill->ill_ipallmulti_ilm = NULL;
1118 	} else {
1119 		ilm = NULL;
1120 	}
1121 	mutex_exit(&ill->ill_lock);
1122 	if (ilm != NULL)
1123 		(void) ip_delmulti(ilm);
1124 
1125 done:
1126 	ill_refrele(ill);
1127 	return (0);
1128 }
1129 
1130 /*
1131  * Delete the allmulti memberships that were added as part of
1132  * ip_join_allmulti().
1133  */
1134 void
1135 ip_purge_allmulti(ill_t *ill)
1136 {
1137 	ilm_t	*ilm;
1138 
1139 	ASSERT(IAM_WRITER_ILL(ill));
1140 
1141 	mutex_enter(&ill->ill_lock);
1142 	ilm = ill->ill_ipallmulti_ilm;
1143 	ill->ill_ipallmulti_ilm = NULL;
1144 	ill->ill_ipallmulti_cnt = 0;
1145 	mutex_exit(&ill->ill_lock);
1146 
1147 	if (ilm != NULL)
1148 		(void) ip_delmulti(ilm);
1149 }
1150 
1151 /*
1152  * Create a dlpi message with room for phys+sap. Later
1153  * we will strip the sap for those primitives which
1154  * only need a physical address.
1155  */
1156 static mblk_t *
1157 ill_create_dl(ill_t *ill, uint32_t dl_primitive,
1158     uint32_t *addr_lenp, uint32_t *addr_offp)
1159 {
1160 	mblk_t	*mp;
1161 	uint32_t	hw_addr_length;
1162 	char		*cp;
1163 	uint32_t	offset;
1164 	uint32_t	length;
1165 	uint32_t 	size;
1166 
1167 	*addr_lenp = *addr_offp = 0;
1168 
1169 	hw_addr_length = ill->ill_phys_addr_length;
1170 	if (!hw_addr_length) {
1171 		ip0dbg(("ip_create_dl: hw addr length = 0\n"));
1172 		return (NULL);
1173 	}
1174 
1175 	switch (dl_primitive) {
1176 	case DL_ENABMULTI_REQ:
1177 		length = sizeof (dl_enabmulti_req_t);
1178 		size = length + hw_addr_length;
1179 		break;
1180 	case DL_DISABMULTI_REQ:
1181 		length = sizeof (dl_disabmulti_req_t);
1182 		size = length + hw_addr_length;
1183 		break;
1184 	case DL_PROMISCON_REQ:
1185 	case DL_PROMISCOFF_REQ:
1186 		size = length = sizeof (dl_promiscon_req_t);
1187 		break;
1188 	default:
1189 		return (NULL);
1190 	}
1191 	mp = allocb(size, BPRI_HI);
1192 	if (!mp)
1193 		return (NULL);
1194 	mp->b_wptr += size;
1195 	mp->b_datap->db_type = M_PROTO;
1196 
1197 	cp = (char *)mp->b_rptr;
1198 	offset = length;
1199 
1200 	switch (dl_primitive) {
1201 	case DL_ENABMULTI_REQ: {
1202 		dl_enabmulti_req_t *dl = (dl_enabmulti_req_t *)cp;
1203 
1204 		dl->dl_primitive = dl_primitive;
1205 		dl->dl_addr_offset = offset;
1206 		*addr_lenp = dl->dl_addr_length = hw_addr_length;
1207 		*addr_offp = offset;
1208 		break;
1209 	}
1210 	case DL_DISABMULTI_REQ: {
1211 		dl_disabmulti_req_t *dl = (dl_disabmulti_req_t *)cp;
1212 
1213 		dl->dl_primitive = dl_primitive;
1214 		dl->dl_addr_offset = offset;
1215 		*addr_lenp = dl->dl_addr_length = hw_addr_length;
1216 		*addr_offp = offset;
1217 		break;
1218 	}
1219 	case DL_PROMISCON_REQ:
1220 	case DL_PROMISCOFF_REQ: {
1221 		dl_promiscon_req_t *dl = (dl_promiscon_req_t *)cp;
1222 
1223 		dl->dl_primitive = dl_primitive;
1224 		dl->dl_level = DL_PROMISC_MULTI;
1225 		break;
1226 	}
1227 	}
1228 	ip1dbg(("ill_create_dl: addr_len %d, addr_off %d\n",
1229 	    *addr_lenp, *addr_offp));
1230 	return (mp);
1231 }
1232 
1233 /*
1234  * Rejoin any groups for which we have ilms.
1235  *
1236  * This is only needed for IPMP when the cast_ill changes since that
1237  * change is invisible to the ilm. Other interface changes are handled
1238  * by conn_update_ill.
1239  */
1240 void
1241 ill_recover_multicast(ill_t *ill)
1242 {
1243 	ilm_t	*ilm;
1244 	char    addrbuf[INET6_ADDRSTRLEN];
1245 
1246 	ill->ill_need_recover_multicast = 0;
1247 
1248 	rw_enter(&ill->ill_mcast_lock, RW_WRITER);
1249 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1250 		/*
1251 		 * If we have more then one ilm for the group (e.g., with
1252 		 * different zoneid) then we should not tell the driver
1253 		 * to join unless this is the first ilm for the group.
1254 		 */
1255 		if (ilm_numentries(ill, &ilm->ilm_v6addr) > 1 &&
1256 		    ilm_lookup(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm) {
1257 			continue;
1258 		}
1259 
1260 		ip1dbg(("ill_recover_multicast: %s\n", inet_ntop(AF_INET6,
1261 		    &ilm->ilm_v6addr, addrbuf, sizeof (addrbuf))));
1262 
1263 		if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
1264 			(void) ill_join_allmulti(ill);
1265 		} else {
1266 			if (ill->ill_isv6)
1267 				mld_joingroup(ilm);
1268 			else
1269 				igmp_joingroup(ilm);
1270 
1271 			(void) ip_ll_multireq(ill, &ilm->ilm_v6addr,
1272 			    DL_ENABMULTI_REQ);
1273 		}
1274 	}
1275 	rw_exit(&ill->ill_mcast_lock);
1276 	/* Send any deferred/queued DLPI or IP packets */
1277 	ill_mcast_send_queued(ill);
1278 	ill_dlpi_send_queued(ill);
1279 	ill_mcast_timer_start(ill->ill_ipst);
1280 }
1281 
1282 /*
1283  * The opposite of ill_recover_multicast() -- leaves all multicast groups
1284  * that were explicitly joined.
1285  *
1286  * This is only needed for IPMP when the cast_ill changes since that
1287  * change is invisible to the ilm. Other interface changes are handled
1288  * by conn_update_ill.
1289  */
1290 void
1291 ill_leave_multicast(ill_t *ill)
1292 {
1293 	ilm_t	*ilm;
1294 	char    addrbuf[INET6_ADDRSTRLEN];
1295 
1296 	ill->ill_need_recover_multicast = 1;
1297 
1298 	rw_enter(&ill->ill_mcast_lock, RW_WRITER);
1299 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1300 		/*
1301 		 * If we have more then one ilm for the group (e.g., with
1302 		 * different zoneid) then we should not tell the driver
1303 		 * to leave unless this is the first ilm for the group.
1304 		 */
1305 		if (ilm_numentries(ill, &ilm->ilm_v6addr) > 1 &&
1306 		    ilm_lookup(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm) {
1307 			continue;
1308 		}
1309 
1310 		ip1dbg(("ill_leave_multicast: %s\n", inet_ntop(AF_INET6,
1311 		    &ilm->ilm_v6addr, addrbuf, sizeof (addrbuf))));
1312 
1313 		if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
1314 			ill_leave_allmulti(ill);
1315 		} else {
1316 			if (ill->ill_isv6)
1317 				mld_leavegroup(ilm);
1318 			else
1319 				igmp_leavegroup(ilm);
1320 
1321 			(void) ip_ll_multireq(ill, &ilm->ilm_v6addr,
1322 			    DL_DISABMULTI_REQ);
1323 		}
1324 	}
1325 	rw_exit(&ill->ill_mcast_lock);
1326 	/* Send any deferred/queued DLPI or IP packets */
1327 	ill_mcast_send_queued(ill);
1328 	ill_dlpi_send_queued(ill);
1329 	ill_mcast_timer_start(ill->ill_ipst);
1330 }
1331 
1332 /*
1333  * Interface used by IP input/output.
1334  * Returns true if there is a member on the ill for any zoneid.
1335  */
1336 boolean_t
1337 ill_hasmembers_v6(ill_t *ill, const in6_addr_t *v6group)
1338 {
1339 	ilm_t		*ilm;
1340 
1341 	rw_enter(&ill->ill_mcast_lock, RW_READER);
1342 	ilm = ilm_lookup(ill, v6group, ALL_ZONES);
1343 	rw_exit(&ill->ill_mcast_lock);
1344 	return (ilm != NULL);
1345 }
1346 
1347 /*
1348  * Interface used by IP input/output.
1349  * Returns true if there is a member on the ill for any zoneid.
1350  *
1351  * The group and source can't be INADDR_ANY here so no need to translate to
1352  * the unspecified IPv6 address.
1353  */
1354 boolean_t
1355 ill_hasmembers_v4(ill_t *ill, ipaddr_t group)
1356 {
1357 	in6_addr_t	v6group;
1358 
1359 	IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1360 	return (ill_hasmembers_v6(ill, &v6group));
1361 }
1362 
1363 /*
1364  * Interface used by IP input/output.
1365  * Returns true if there is a member on the ill for any zoneid except skipzone.
1366  */
1367 boolean_t
1368 ill_hasmembers_otherzones_v6(ill_t *ill, const in6_addr_t *v6group,
1369     zoneid_t skipzone)
1370 {
1371 	ilm_t		*ilm;
1372 
1373 	rw_enter(&ill->ill_mcast_lock, RW_READER);
1374 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1375 		if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) &&
1376 		    ilm->ilm_zoneid != skipzone) {
1377 			rw_exit(&ill->ill_mcast_lock);
1378 			return (B_TRUE);
1379 		}
1380 	}
1381 	rw_exit(&ill->ill_mcast_lock);
1382 	return (B_FALSE);
1383 }
1384 
1385 /*
1386  * Interface used by IP input/output.
1387  * Returns true if there is a member on the ill for any zoneid except skipzone.
1388  *
1389  * The group and source can't be INADDR_ANY here so no need to translate to
1390  * the unspecified IPv6 address.
1391  */
1392 boolean_t
1393 ill_hasmembers_otherzones_v4(ill_t *ill, ipaddr_t group, zoneid_t skipzone)
1394 {
1395 	in6_addr_t	v6group;
1396 
1397 	IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1398 	return (ill_hasmembers_otherzones_v6(ill, &v6group, skipzone));
1399 }
1400 
1401 /*
1402  * Interface used by IP input.
1403  * Returns the next numerically larger zoneid that has a member. If none exist
1404  * then returns -1 (ALL_ZONES).
1405  * The normal usage is for the caller to start with a -1 zoneid (ALL_ZONES)
1406  * to find the first zoneid which has a member, and then pass that in for
1407  * subsequent calls until ALL_ZONES is returned.
1408  *
1409  * The implementation of ill_hasmembers_nextzone() assumes the ilms
1410  * are sorted by zoneid for efficiency.
1411  */
1412 zoneid_t
1413 ill_hasmembers_nextzone_v6(ill_t *ill, const in6_addr_t *v6group,
1414     zoneid_t zoneid)
1415 {
1416 	ilm_t		*ilm;
1417 
1418 	rw_enter(&ill->ill_mcast_lock, RW_READER);
1419 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1420 		if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) &&
1421 		    ilm->ilm_zoneid > zoneid) {
1422 			zoneid = ilm->ilm_zoneid;
1423 			rw_exit(&ill->ill_mcast_lock);
1424 			return (zoneid);
1425 		}
1426 	}
1427 	rw_exit(&ill->ill_mcast_lock);
1428 	return (ALL_ZONES);
1429 }
1430 
1431 /*
1432  * Interface used by IP input.
1433  * Returns the next numerically larger zoneid that has a member. If none exist
1434  * then returns -1 (ALL_ZONES).
1435  *
1436  * The group and source can't be INADDR_ANY here so no need to translate to
1437  * the unspecified IPv6 address.
1438  */
1439 zoneid_t
1440 ill_hasmembers_nextzone_v4(ill_t *ill, ipaddr_t group, zoneid_t zoneid)
1441 {
1442 	in6_addr_t	v6group;
1443 
1444 	IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1445 
1446 	return (ill_hasmembers_nextzone_v6(ill, &v6group, zoneid));
1447 }
1448 
1449 /*
1450  * Find an ilm matching the ill, group, and zoneid.
1451  */
1452 static ilm_t *
1453 ilm_lookup(ill_t *ill, const in6_addr_t *v6group, zoneid_t zoneid)
1454 {
1455 	ilm_t	*ilm;
1456 
1457 	ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock));
1458 
1459 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1460 		if (!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group))
1461 			continue;
1462 		if (zoneid != ALL_ZONES && zoneid != ilm->ilm_zoneid)
1463 			continue;
1464 
1465 		ASSERT(ilm->ilm_ill == ill);
1466 		return (ilm);
1467 	}
1468 	return (NULL);
1469 }
1470 
1471 /*
1472  * How many members on this ill?
1473  * Since each shared-IP zone has a separate ilm for the same group/ill
1474  * we can have several.
1475  */
1476 static int
1477 ilm_numentries(ill_t *ill, const in6_addr_t *v6group)
1478 {
1479 	ilm_t	*ilm;
1480 	int i = 0;
1481 
1482 	ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock));
1483 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1484 		if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group)) {
1485 			i++;
1486 		}
1487 	}
1488 	return (i);
1489 }
1490 
1491 /* Caller guarantees that the group is not already on the list */
1492 static ilm_t *
1493 ilm_add(ill_t *ill, const in6_addr_t *v6group, ilg_stat_t ilgstat,
1494     mcast_record_t ilg_fmode, slist_t *ilg_flist, zoneid_t zoneid)
1495 {
1496 	ilm_t	*ilm;
1497 	ilm_t	*ilm_cur;
1498 	ilm_t	**ilm_ptpn;
1499 
1500 	ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
1501 	ilm = GETSTRUCT(ilm_t, 1);
1502 	if (ilm == NULL)
1503 		return (NULL);
1504 	if (ilgstat != ILGSTAT_NONE && !SLIST_IS_EMPTY(ilg_flist)) {
1505 		ilm->ilm_filter = l_alloc();
1506 		if (ilm->ilm_filter == NULL) {
1507 			mi_free(ilm);
1508 			return (NULL);
1509 		}
1510 	}
1511 	ilm->ilm_v6addr = *v6group;
1512 	ilm->ilm_refcnt = 1;
1513 	ilm->ilm_zoneid = zoneid;
1514 	ilm->ilm_timer = INFINITY;
1515 	ilm->ilm_rtx.rtx_timer = INFINITY;
1516 
1517 	ilm->ilm_ill = ill;
1518 	DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ill,
1519 	    (char *), "ilm", (void *), ilm);
1520 	ill->ill_ilm_cnt++;
1521 
1522 	ASSERT(ill->ill_ipst);
1523 	ilm->ilm_ipst = ill->ill_ipst;	/* No netstack_hold */
1524 
1525 	/* The ill/ipif could have just been marked as condemned */
1526 
1527 	/*
1528 	 * To make ill_hasmembers_nextzone_v6 work we keep the list
1529 	 * sorted by zoneid.
1530 	 */
1531 	ilm_cur = ill->ill_ilm;
1532 	ilm_ptpn = &ill->ill_ilm;
1533 	while (ilm_cur != NULL && ilm_cur->ilm_zoneid < ilm->ilm_zoneid) {
1534 		ilm_ptpn = &ilm_cur->ilm_next;
1535 		ilm_cur = ilm_cur->ilm_next;
1536 	}
1537 	ilm->ilm_next = ilm_cur;
1538 	*ilm_ptpn = ilm;
1539 
1540 	/*
1541 	 * If we have an associated ilg, use its filter state; if not,
1542 	 * default to (EXCLUDE, NULL) and set no_ilg_cnt to track this.
1543 	 */
1544 	if (ilgstat != ILGSTAT_NONE) {
1545 		if (!SLIST_IS_EMPTY(ilg_flist))
1546 			l_copy(ilg_flist, ilm->ilm_filter);
1547 		ilm->ilm_fmode = ilg_fmode;
1548 	} else {
1549 		ilm->ilm_no_ilg_cnt = 1;
1550 		ilm->ilm_fmode = MODE_IS_EXCLUDE;
1551 	}
1552 
1553 	return (ilm);
1554 }
1555 
1556 void
1557 ilm_inactive(ilm_t *ilm)
1558 {
1559 	FREE_SLIST(ilm->ilm_filter);
1560 	FREE_SLIST(ilm->ilm_pendsrcs);
1561 	FREE_SLIST(ilm->ilm_rtx.rtx_allow);
1562 	FREE_SLIST(ilm->ilm_rtx.rtx_block);
1563 	ilm->ilm_ipst = NULL;
1564 	mi_free((char *)ilm);
1565 }
1566 
1567 /*
1568  * Unlink ilm and free it.
1569  */
1570 static void
1571 ilm_delete(ilm_t *ilm)
1572 {
1573 	ill_t		*ill = ilm->ilm_ill;
1574 	ilm_t		**ilmp;
1575 	boolean_t	need_wakeup;
1576 
1577 	/*
1578 	 * Delete under lock protection so that readers don't stumble
1579 	 * on bad ilm_next
1580 	 */
1581 	ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
1582 
1583 	for (ilmp = &ill->ill_ilm; *ilmp != ilm; ilmp = &(*ilmp)->ilm_next)
1584 		;
1585 
1586 	*ilmp = ilm->ilm_next;
1587 
1588 	mutex_enter(&ill->ill_lock);
1589 	/*
1590 	 * if we are the last reference to the ill, we may need to wakeup any
1591 	 * pending FREE or unplumb operations. This is because conn_update_ill
1592 	 * bails if there is a ilg_delete_all in progress.
1593 	 */
1594 	need_wakeup = B_FALSE;
1595 	DTRACE_PROBE3(ill__decr__cnt, (ill_t *), ill,
1596 	    (char *), "ilm", (void *), ilm);
1597 	ASSERT(ill->ill_ilm_cnt > 0);
1598 	ill->ill_ilm_cnt--;
1599 	if (ILL_FREE_OK(ill))
1600 		need_wakeup = B_TRUE;
1601 
1602 	ilm_inactive(ilm); /* frees this ilm */
1603 
1604 	if (need_wakeup) {
1605 		/* drops ill lock */
1606 		ipif_ill_refrele_tail(ill);
1607 	} else {
1608 		mutex_exit(&ill->ill_lock);
1609 	}
1610 }
1611 
1612 /*
1613  * Lookup an ill based on the group, ifindex, ifaddr, and zoneid.
1614  * Applies to both IPv4 and IPv6, although ifaddr is only used with
1615  * IPv4.
1616  * Returns an error for IS_UNDER_IPMP and VNI interfaces.
1617  * On error it sets *errorp.
1618  */
1619 static ill_t *
1620 ill_mcast_lookup(const in6_addr_t *group, ipaddr_t ifaddr, uint_t ifindex,
1621     zoneid_t zoneid, ip_stack_t *ipst, int *errorp)
1622 {
1623 	ill_t *ill;
1624 	ipaddr_t v4group;
1625 
1626 	if (IN6_IS_ADDR_V4MAPPED(group)) {
1627 		IN6_V4MAPPED_TO_IPADDR(group, v4group);
1628 
1629 		if (ifindex != 0) {
1630 			ill = ill_lookup_on_ifindex_zoneid(ifindex, zoneid,
1631 			    B_FALSE, ipst);
1632 		} else if (ifaddr != INADDR_ANY) {
1633 			ipif_t *ipif;
1634 
1635 			ipif = ipif_lookup_addr(ifaddr, NULL, zoneid, ipst);
1636 			if (ipif == NULL) {
1637 				ill = NULL;
1638 			} else {
1639 				ill = ipif->ipif_ill;
1640 				ill_refhold(ill);
1641 				ipif_refrele(ipif);
1642 			}
1643 		} else {
1644 			ill = ill_lookup_group_v4(v4group, zoneid, ipst, NULL,
1645 			    NULL);
1646 		}
1647 	} else {
1648 		if (ifindex != 0) {
1649 			ill = ill_lookup_on_ifindex_zoneid(ifindex, zoneid,
1650 			    B_TRUE, ipst);
1651 		} else {
1652 			ill = ill_lookup_group_v6(group, zoneid, ipst, NULL,
1653 			    NULL);
1654 		}
1655 	}
1656 	if (ill == NULL) {
1657 		if (ifindex != 0)
1658 			*errorp = ENXIO;
1659 		else
1660 			*errorp = EADDRNOTAVAIL;
1661 		return (NULL);
1662 	}
1663 	/* operation not supported on the virtual network interface */
1664 	if (IS_UNDER_IPMP(ill) || IS_VNI(ill)) {
1665 		ill_refrele(ill);
1666 		*errorp = EINVAL;
1667 		return (NULL);
1668 	}
1669 	return (ill);
1670 }
1671 
1672 /*
1673  * Looks up the appropriate ill given an interface index (or interface address)
1674  * and multicast group.  On success, returns 0, with *illpp pointing to the
1675  * found struct.  On failure, returns an errno and *illpp is set to NULL.
1676  *
1677  * Returns an error for IS_UNDER_IPMP and VNI interfaces.
1678  *
1679  * Handles both IPv4 and IPv6. The ifaddr argument only applies in the
1680  * case of IPv4.
1681  */
1682 int
1683 ip_opt_check(conn_t *connp, const in6_addr_t *v6group,
1684     const in6_addr_t *v6src, ipaddr_t ifaddr, uint_t ifindex, ill_t **illpp)
1685 {
1686 	boolean_t src_unspec;
1687 	ill_t *ill = NULL;
1688 	ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
1689 	int error = 0;
1690 
1691 	*illpp = NULL;
1692 
1693 	src_unspec = IN6_IS_ADDR_UNSPECIFIED(v6src);
1694 
1695 	if (IN6_IS_ADDR_V4MAPPED(v6group)) {
1696 		ipaddr_t v4group;
1697 		ipaddr_t v4src;
1698 
1699 		if (!IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec)
1700 			return (EINVAL);
1701 		IN6_V4MAPPED_TO_IPADDR(v6group, v4group);
1702 		if (src_unspec) {
1703 			v4src = INADDR_ANY;
1704 		} else {
1705 			IN6_V4MAPPED_TO_IPADDR(v6src, v4src);
1706 		}
1707 		if (!CLASSD(v4group) || CLASSD(v4src))
1708 			return (EINVAL);
1709 	} else {
1710 		if (IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec)
1711 			return (EINVAL);
1712 		if (!IN6_IS_ADDR_MULTICAST(v6group) ||
1713 		    IN6_IS_ADDR_MULTICAST(v6src)) {
1714 			return (EINVAL);
1715 		}
1716 	}
1717 
1718 	ill = ill_mcast_lookup(v6group, ifaddr, ifindex, IPCL_ZONEID(connp),
1719 	    ipst, &error);
1720 	*illpp = ill;
1721 	return (error);
1722 }
1723 
1724 static int
1725 ip_get_srcfilter(conn_t *connp, struct group_filter *gf,
1726     struct ip_msfilter *imsf, const struct in6_addr *group, boolean_t issin6)
1727 {
1728 	ilg_t *ilg;
1729 	int i, numsrc, fmode, outsrcs;
1730 	struct sockaddr_in *sin;
1731 	struct sockaddr_in6 *sin6;
1732 	struct in_addr *addrp;
1733 	slist_t *fp;
1734 	boolean_t is_v4only_api;
1735 	ipaddr_t ifaddr;
1736 	uint_t ifindex;
1737 
1738 	if (gf == NULL) {
1739 		ASSERT(imsf != NULL);
1740 		ASSERT(!issin6);
1741 		is_v4only_api = B_TRUE;
1742 		outsrcs = imsf->imsf_numsrc;
1743 		ifaddr = imsf->imsf_interface.s_addr;
1744 		ifindex = 0;
1745 	} else {
1746 		ASSERT(imsf == NULL);
1747 		is_v4only_api = B_FALSE;
1748 		outsrcs = gf->gf_numsrc;
1749 		ifaddr = INADDR_ANY;
1750 		ifindex = gf->gf_interface;
1751 	}
1752 
1753 	/* No need to use ill_mcast_serializer for the reader */
1754 	rw_enter(&connp->conn_ilg_lock, RW_READER);
1755 	ilg = ilg_lookup(connp, group, ifaddr, ifindex);
1756 	if (ilg == NULL) {
1757 		rw_exit(&connp->conn_ilg_lock);
1758 		return (EADDRNOTAVAIL);
1759 	}
1760 
1761 	/*
1762 	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
1763 	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
1764 	 * So we need to translate here.
1765 	 */
1766 	fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ?
1767 	    MCAST_INCLUDE : MCAST_EXCLUDE;
1768 	if ((fp = ilg->ilg_filter) == NULL) {
1769 		numsrc = 0;
1770 	} else {
1771 		for (i = 0; i < outsrcs; i++) {
1772 			if (i == fp->sl_numsrc)
1773 				break;
1774 			if (issin6) {
1775 				sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i];
1776 				sin6->sin6_family = AF_INET6;
1777 				sin6->sin6_addr = fp->sl_addr[i];
1778 			} else {
1779 				if (is_v4only_api) {
1780 					addrp = &imsf->imsf_slist[i];
1781 				} else {
1782 					sin = (struct sockaddr_in *)
1783 					    &gf->gf_slist[i];
1784 					sin->sin_family = AF_INET;
1785 					addrp = &sin->sin_addr;
1786 				}
1787 				IN6_V4MAPPED_TO_INADDR(&fp->sl_addr[i], addrp);
1788 			}
1789 		}
1790 		numsrc = fp->sl_numsrc;
1791 	}
1792 
1793 	if (is_v4only_api) {
1794 		imsf->imsf_numsrc = numsrc;
1795 		imsf->imsf_fmode = fmode;
1796 	} else {
1797 		gf->gf_numsrc = numsrc;
1798 		gf->gf_fmode = fmode;
1799 	}
1800 
1801 	rw_exit(&connp->conn_ilg_lock);
1802 
1803 	return (0);
1804 }
1805 
1806 /*
1807  * Common for IPv4 and IPv6.
1808  */
1809 static int
1810 ip_set_srcfilter(conn_t *connp, struct group_filter *gf,
1811     struct ip_msfilter *imsf, const struct in6_addr *group, ill_t *ill,
1812     boolean_t issin6)
1813 {
1814 	ilg_t *ilg;
1815 	int i, err, infmode, new_fmode;
1816 	uint_t insrcs;
1817 	struct sockaddr_in *sin;
1818 	struct sockaddr_in6 *sin6;
1819 	struct in_addr *addrp;
1820 	slist_t *orig_filter = NULL;
1821 	slist_t *new_filter = NULL;
1822 	mcast_record_t orig_fmode;
1823 	boolean_t leave_group, is_v4only_api;
1824 	ilg_stat_t ilgstat;
1825 	ilm_t *ilm;
1826 	ipaddr_t ifaddr;
1827 	uint_t ifindex;
1828 
1829 	if (gf == NULL) {
1830 		ASSERT(imsf != NULL);
1831 		ASSERT(!issin6);
1832 		is_v4only_api = B_TRUE;
1833 		insrcs = imsf->imsf_numsrc;
1834 		infmode = imsf->imsf_fmode;
1835 		ifaddr = imsf->imsf_interface.s_addr;
1836 		ifindex = 0;
1837 	} else {
1838 		ASSERT(imsf == NULL);
1839 		is_v4only_api = B_FALSE;
1840 		insrcs = gf->gf_numsrc;
1841 		infmode = gf->gf_fmode;
1842 		ifaddr = INADDR_ANY;
1843 		ifindex = gf->gf_interface;
1844 	}
1845 
1846 	/* Make sure we can handle the source list */
1847 	if (insrcs > MAX_FILTER_SIZE)
1848 		return (ENOBUFS);
1849 
1850 	/*
1851 	 * setting the filter to (INCLUDE, NULL) is treated
1852 	 * as a request to leave the group.
1853 	 */
1854 	leave_group = (infmode == MCAST_INCLUDE && insrcs == 0);
1855 
1856 	mutex_enter(&ill->ill_mcast_serializer);
1857 	rw_enter(&connp->conn_ilg_lock, RW_WRITER);
1858 	ilg = ilg_lookup(connp, group, ifaddr, ifindex);
1859 	if (ilg == NULL) {
1860 		/*
1861 		 * if the request was actually to leave, and we
1862 		 * didn't find an ilg, there's nothing to do.
1863 		 */
1864 		if (leave_group) {
1865 			rw_exit(&connp->conn_ilg_lock);
1866 			mutex_exit(&ill->ill_mcast_serializer);
1867 			return (0);
1868 		}
1869 		ilg = conn_ilg_alloc(connp, &err);
1870 		if (ilg == NULL) {
1871 			rw_exit(&connp->conn_ilg_lock);
1872 			mutex_exit(&ill->ill_mcast_serializer);
1873 			return (err);
1874 		}
1875 		ilgstat = ILGSTAT_NEW;
1876 		ilg->ilg_v6group = *group;
1877 		ilg->ilg_ill = ill;
1878 		ilg->ilg_ifaddr = ifaddr;
1879 		ilg->ilg_ifindex = ifindex;
1880 	} else if (leave_group) {
1881 		/*
1882 		 * Make sure we have the correct serializer. The ill argument
1883 		 * might not match ilg_ill.
1884 		 */
1885 		ilg_refhold(ilg);
1886 		mutex_exit(&ill->ill_mcast_serializer);
1887 		ill = ilg->ilg_ill;
1888 		rw_exit(&connp->conn_ilg_lock);
1889 
1890 		mutex_enter(&ill->ill_mcast_serializer);
1891 		rw_enter(&connp->conn_ilg_lock, RW_WRITER);
1892 		ilm = ilg->ilg_ilm;
1893 		ilg->ilg_ilm = NULL;
1894 		ilg_delete(connp, ilg, NULL);
1895 		ilg_refrele(ilg);
1896 		rw_exit(&connp->conn_ilg_lock);
1897 		if (ilm != NULL)
1898 			(void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE);
1899 		mutex_exit(&ill->ill_mcast_serializer);
1900 		return (0);
1901 	} else {
1902 		ilgstat = ILGSTAT_CHANGE;
1903 		/* Preserve existing state in case ip_addmulti() fails */
1904 		orig_fmode = ilg->ilg_fmode;
1905 		if (ilg->ilg_filter == NULL) {
1906 			orig_filter = NULL;
1907 		} else {
1908 			orig_filter = l_alloc_copy(ilg->ilg_filter);
1909 			if (orig_filter == NULL) {
1910 				rw_exit(&connp->conn_ilg_lock);
1911 				mutex_exit(&ill->ill_mcast_serializer);
1912 				return (ENOMEM);
1913 			}
1914 		}
1915 	}
1916 
1917 	/*
1918 	 * Alloc buffer to copy new state into (see below) before
1919 	 * we make any changes, so we can bail if it fails.
1920 	 */
1921 	if ((new_filter = l_alloc()) == NULL) {
1922 		rw_exit(&connp->conn_ilg_lock);
1923 		err = ENOMEM;
1924 		goto free_and_exit;
1925 	}
1926 
1927 	if (insrcs == 0) {
1928 		CLEAR_SLIST(ilg->ilg_filter);
1929 	} else {
1930 		slist_t *fp;
1931 		if (ilg->ilg_filter == NULL) {
1932 			fp = l_alloc();
1933 			if (fp == NULL) {
1934 				if (ilgstat == ILGSTAT_NEW)
1935 					ilg_delete(connp, ilg, NULL);
1936 				rw_exit(&connp->conn_ilg_lock);
1937 				err = ENOMEM;
1938 				goto free_and_exit;
1939 			}
1940 		} else {
1941 			fp = ilg->ilg_filter;
1942 		}
1943 		for (i = 0; i < insrcs; i++) {
1944 			if (issin6) {
1945 				sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i];
1946 				fp->sl_addr[i] = sin6->sin6_addr;
1947 			} else {
1948 				if (is_v4only_api) {
1949 					addrp = &imsf->imsf_slist[i];
1950 				} else {
1951 					sin = (struct sockaddr_in *)
1952 					    &gf->gf_slist[i];
1953 					addrp = &sin->sin_addr;
1954 				}
1955 				IN6_INADDR_TO_V4MAPPED(addrp, &fp->sl_addr[i]);
1956 			}
1957 		}
1958 		fp->sl_numsrc = insrcs;
1959 		ilg->ilg_filter = fp;
1960 	}
1961 	/*
1962 	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
1963 	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
1964 	 * So we need to translate here.
1965 	 */
1966 	ilg->ilg_fmode = (infmode == MCAST_INCLUDE) ?
1967 	    MODE_IS_INCLUDE : MODE_IS_EXCLUDE;
1968 
1969 	/*
1970 	 * Save copy of ilg's filter state to pass to other functions,
1971 	 * so we can release conn_ilg_lock now.
1972 	 */
1973 	new_fmode = ilg->ilg_fmode;
1974 	l_copy(ilg->ilg_filter, new_filter);
1975 
1976 	rw_exit(&connp->conn_ilg_lock);
1977 
1978 	/*
1979 	 * Now update the ill. We wait to do this until after the ilg
1980 	 * has been updated because we need to update the src filter
1981 	 * info for the ill, which involves looking at the status of
1982 	 * all the ilgs associated with this group/interface pair.
1983 	 */
1984 	ilm = ip_addmulti_serial(group, ill, connp->conn_zoneid, ilgstat,
1985 	    new_fmode, new_filter, &err);
1986 
1987 	rw_enter(&connp->conn_ilg_lock, RW_WRITER);
1988 	/*
1989 	 * Must look up the ilg again since we've not been holding
1990 	 * conn_ilg_lock. The ilg could have disappeared due to an unplumb
1991 	 * having called conn_update_ill, which can run once we dropped the
1992 	 * conn_ilg_lock above.
1993 	 */
1994 	ilg = ilg_lookup(connp, group, ifaddr, ifindex);
1995 	if (ilg == NULL) {
1996 		rw_exit(&connp->conn_ilg_lock);
1997 		if (ilm != NULL) {
1998 			(void) ip_delmulti_serial(ilm, B_FALSE,
1999 			    (ilgstat == ILGSTAT_NEW));
2000 		}
2001 		err = ENXIO;
2002 		goto free_and_exit;
2003 	}
2004 
2005 	if (ilm != NULL) {
2006 		/* Succeeded. Update the ilg to point at the ilm */
2007 		if (ilgstat == ILGSTAT_NEW) {
2008 			ASSERT(ilg->ilg_ilm == NULL);
2009 			ilg->ilg_ilm = ilm;
2010 			ilm->ilm_ifaddr = ifaddr;	/* For netstat */
2011 		} else {
2012 			/*
2013 			 * ip_addmulti didn't get a held ilm for
2014 			 * ILGSTAT_CHANGE; ilm_refcnt was unchanged.
2015 			 */
2016 			ASSERT(ilg->ilg_ilm == ilm);
2017 		}
2018 	} else {
2019 		ASSERT(err != 0);
2020 		/*
2021 		 * Failed to allocate the ilm.
2022 		 * Restore the original filter state, or delete the
2023 		 * newly-created ilg.
2024 		 * If ENETDOWN just clear ill_ilg since so that we
2025 		 * will rejoin when the ill comes back; don't report ENETDOWN
2026 		 * to application.
2027 		 */
2028 		if (ilgstat == ILGSTAT_NEW) {
2029 			if (err == ENETDOWN) {
2030 				ilg->ilg_ill = NULL;
2031 				err = 0;
2032 			} else {
2033 				ilg_delete(connp, ilg, NULL);
2034 			}
2035 		} else {
2036 			ilg->ilg_fmode = orig_fmode;
2037 			if (SLIST_IS_EMPTY(orig_filter)) {
2038 				CLEAR_SLIST(ilg->ilg_filter);
2039 			} else {
2040 				/*
2041 				 * We didn't free the filter, even if we
2042 				 * were trying to make the source list empty;
2043 				 * so if orig_filter isn't empty, the ilg
2044 				 * must still have a filter alloc'd.
2045 				 */
2046 				l_copy(orig_filter, ilg->ilg_filter);
2047 			}
2048 		}
2049 	}
2050 	rw_exit(&connp->conn_ilg_lock);
2051 
2052 free_and_exit:
2053 	mutex_exit(&ill->ill_mcast_serializer);
2054 	l_free(orig_filter);
2055 	l_free(new_filter);
2056 
2057 	return (err);
2058 }
2059 
2060 /*
2061  * Process the SIOC[GS]MSFILTER and SIOC[GS]IPMSFILTER ioctls.
2062  */
2063 /* ARGSUSED */
2064 int
2065 ip_sioctl_msfilter(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp,
2066     ip_ioctl_cmd_t *ipip, void *ifreq)
2067 {
2068 	struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
2069 	/* existence verified in ip_wput_nondata() */
2070 	mblk_t *data_mp = mp->b_cont->b_cont;
2071 	int datalen, err, cmd, minsize;
2072 	uint_t expsize = 0;
2073 	conn_t *connp;
2074 	boolean_t isv6, is_v4only_api, getcmd;
2075 	struct sockaddr_in *gsin;
2076 	struct sockaddr_in6 *gsin6;
2077 	ipaddr_t v4group;
2078 	in6_addr_t v6group;
2079 	struct group_filter *gf = NULL;
2080 	struct ip_msfilter *imsf = NULL;
2081 	mblk_t *ndp;
2082 	ill_t *ill;
2083 
2084 	connp = Q_TO_CONN(q);
2085 	err = ip_msfilter_ill(connp, mp, ipip, &ill);
2086 	if (err != 0)
2087 		return (err);
2088 
2089 	if (data_mp->b_cont != NULL) {
2090 		if ((ndp = msgpullup(data_mp, -1)) == NULL)
2091 			return (ENOMEM);
2092 		freemsg(data_mp);
2093 		data_mp = ndp;
2094 		mp->b_cont->b_cont = data_mp;
2095 	}
2096 
2097 	cmd = iocp->ioc_cmd;
2098 	getcmd = (cmd == SIOCGIPMSFILTER || cmd == SIOCGMSFILTER);
2099 	is_v4only_api = (cmd == SIOCGIPMSFILTER || cmd == SIOCSIPMSFILTER);
2100 	minsize = (is_v4only_api) ? IP_MSFILTER_SIZE(0) : GROUP_FILTER_SIZE(0);
2101 	datalen = MBLKL(data_mp);
2102 
2103 	if (datalen < minsize)
2104 		return (EINVAL);
2105 
2106 	/*
2107 	 * now we know we have at least have the initial structure,
2108 	 * but need to check for the source list array.
2109 	 */
2110 	if (is_v4only_api) {
2111 		imsf = (struct ip_msfilter *)data_mp->b_rptr;
2112 		isv6 = B_FALSE;
2113 		expsize = IP_MSFILTER_SIZE(imsf->imsf_numsrc);
2114 	} else {
2115 		gf = (struct group_filter *)data_mp->b_rptr;
2116 		if (gf->gf_group.ss_family == AF_INET6) {
2117 			gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2118 			isv6 = !(IN6_IS_ADDR_V4MAPPED(&gsin6->sin6_addr));
2119 		} else {
2120 			isv6 = B_FALSE;
2121 		}
2122 		expsize = GROUP_FILTER_SIZE(gf->gf_numsrc);
2123 	}
2124 	if (datalen < expsize)
2125 		return (EINVAL);
2126 
2127 	if (isv6) {
2128 		gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2129 		v6group = gsin6->sin6_addr;
2130 		if (getcmd) {
2131 			err = ip_get_srcfilter(connp, gf, NULL, &v6group,
2132 			    B_TRUE);
2133 		} else {
2134 			err = ip_set_srcfilter(connp, gf, NULL, &v6group, ill,
2135 			    B_TRUE);
2136 		}
2137 	} else {
2138 		boolean_t issin6 = B_FALSE;
2139 		if (is_v4only_api) {
2140 			v4group = (ipaddr_t)imsf->imsf_multiaddr.s_addr;
2141 			IN6_IPADDR_TO_V4MAPPED(v4group, &v6group);
2142 		} else {
2143 			if (gf->gf_group.ss_family == AF_INET) {
2144 				gsin = (struct sockaddr_in *)&gf->gf_group;
2145 				v4group = (ipaddr_t)gsin->sin_addr.s_addr;
2146 				IN6_IPADDR_TO_V4MAPPED(v4group, &v6group);
2147 			} else {
2148 				gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2149 				IN6_V4MAPPED_TO_IPADDR(&gsin6->sin6_addr,
2150 				    v4group);
2151 				issin6 = B_TRUE;
2152 			}
2153 		}
2154 		/*
2155 		 * INADDR_ANY is represented as the IPv6 unspecifed addr.
2156 		 */
2157 		if (v4group == INADDR_ANY)
2158 			v6group = ipv6_all_zeros;
2159 		else
2160 			IN6_IPADDR_TO_V4MAPPED(v4group, &v6group);
2161 
2162 		if (getcmd) {
2163 			err = ip_get_srcfilter(connp, gf, imsf, &v6group,
2164 			    issin6);
2165 		} else {
2166 			err = ip_set_srcfilter(connp, gf, imsf, &v6group, ill,
2167 			    issin6);
2168 		}
2169 	}
2170 	ill_refrele(ill);
2171 
2172 	return (err);
2173 }
2174 
2175 /*
2176  * Determine the ill for the SIOC*MSFILTER ioctls
2177  *
2178  * Returns an error for IS_UNDER_IPMP interfaces.
2179  *
2180  * Finds the ill based on information in the ioctl headers.
2181  */
2182 static int
2183 ip_msfilter_ill(conn_t *connp, mblk_t *mp, const ip_ioctl_cmd_t *ipip,
2184     ill_t **illp)
2185 {
2186 	int cmd = ipip->ipi_cmd;
2187 	int err = 0;
2188 	ill_t *ill;
2189 	/* caller has verified this mblk exists */
2190 	char *dbuf = (char *)mp->b_cont->b_cont->b_rptr;
2191 	struct ip_msfilter *imsf;
2192 	struct group_filter *gf;
2193 	ipaddr_t v4addr, v4group;
2194 	in6_addr_t v6group;
2195 	uint32_t index;
2196 	ip_stack_t *ipst;
2197 
2198 	ipst = connp->conn_netstack->netstack_ip;
2199 
2200 	*illp = NULL;
2201 
2202 	/* don't allow multicast operations on a tcp conn */
2203 	if (IPCL_IS_TCP(connp))
2204 		return (ENOPROTOOPT);
2205 
2206 	if (cmd == SIOCSIPMSFILTER || cmd == SIOCGIPMSFILTER) {
2207 		/* don't allow v4-specific ioctls on v6 socket */
2208 		if (connp->conn_family == AF_INET6)
2209 			return (EAFNOSUPPORT);
2210 
2211 		imsf = (struct ip_msfilter *)dbuf;
2212 		v4addr = imsf->imsf_interface.s_addr;
2213 		v4group = imsf->imsf_multiaddr.s_addr;
2214 		IN6_IPADDR_TO_V4MAPPED(v4group, &v6group);
2215 		ill = ill_mcast_lookup(&v6group, v4addr, 0, IPCL_ZONEID(connp),
2216 		    ipst, &err);
2217 		if (ill == NULL && v4addr != INADDR_ANY)
2218 			err = ENXIO;
2219 	} else {
2220 		gf = (struct group_filter *)dbuf;
2221 		index = gf->gf_interface;
2222 		if (gf->gf_group.ss_family == AF_INET6) {
2223 			struct sockaddr_in6 *sin6;
2224 
2225 			sin6 = (struct sockaddr_in6 *)&gf->gf_group;
2226 			v6group = sin6->sin6_addr;
2227 		} else if (gf->gf_group.ss_family == AF_INET) {
2228 			struct sockaddr_in *sin;
2229 
2230 			sin = (struct sockaddr_in *)&gf->gf_group;
2231 			v4group = sin->sin_addr.s_addr;
2232 			IN6_IPADDR_TO_V4MAPPED(v4group, &v6group);
2233 		} else {
2234 			return (EAFNOSUPPORT);
2235 		}
2236 		ill = ill_mcast_lookup(&v6group, INADDR_ANY, index,
2237 		    IPCL_ZONEID(connp), ipst, &err);
2238 	}
2239 	*illp = ill;
2240 	return (err);
2241 }
2242 
2243 /*
2244  * The structures used for the SIOC*MSFILTER ioctls usually must be copied
2245  * in in two stages, as the first copyin tells us the size of the attached
2246  * source buffer.  This function is called by ip_wput_nondata() after the
2247  * first copyin has completed; it figures out how big the second stage
2248  * needs to be, and kicks it off.
2249  *
2250  * In some cases (numsrc < 2), the second copyin is not needed as the
2251  * first one gets a complete structure containing 1 source addr.
2252  *
2253  * The function returns 0 if a second copyin has been started (i.e. there's
2254  * no more work to be done right now), or 1 if the second copyin is not
2255  * needed and ip_wput_nondata() can continue its processing.
2256  */
2257 int
2258 ip_copyin_msfilter(queue_t *q, mblk_t *mp)
2259 {
2260 	struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
2261 	int cmd = iocp->ioc_cmd;
2262 	/* validity of this checked in ip_wput_nondata() */
2263 	mblk_t *mp1 = mp->b_cont->b_cont;
2264 	int copysize = 0;
2265 	int offset;
2266 
2267 	if (cmd == SIOCSMSFILTER || cmd == SIOCGMSFILTER) {
2268 		struct group_filter *gf = (struct group_filter *)mp1->b_rptr;
2269 		if (gf->gf_numsrc >= 2) {
2270 			offset = sizeof (struct group_filter);
2271 			copysize = GROUP_FILTER_SIZE(gf->gf_numsrc) - offset;
2272 		}
2273 	} else {
2274 		struct ip_msfilter *imsf = (struct ip_msfilter *)mp1->b_rptr;
2275 		if (imsf->imsf_numsrc >= 2) {
2276 			offset = sizeof (struct ip_msfilter);
2277 			copysize = IP_MSFILTER_SIZE(imsf->imsf_numsrc) - offset;
2278 		}
2279 	}
2280 	if (copysize > 0) {
2281 		mi_copyin_n(q, mp, offset, copysize);
2282 		return (0);
2283 	}
2284 	return (1);
2285 }
2286 
2287 /*
2288  * Handle the following optmgmt:
2289  *	IP_ADD_MEMBERSHIP		must not have joined already
2290  *	IPV6_JOIN_GROUP			must not have joined already
2291  *	MCAST_JOIN_GROUP		must not have joined already
2292  *	IP_BLOCK_SOURCE			must have joined already
2293  *	MCAST_BLOCK_SOURCE		must have joined already
2294  *	IP_JOIN_SOURCE_GROUP		may have joined already
2295  *	MCAST_JOIN_SOURCE_GROUP		may have joined already
2296  *
2297  * fmode and src parameters may be used to determine which option is
2298  * being set, as follows (IPV6_JOIN_GROUP and MCAST_JOIN_GROUP options
2299  * are functionally equivalent):
2300  *	opt			fmode			v6src
2301  *	IP_ADD_MEMBERSHIP	MODE_IS_EXCLUDE		unspecified
2302  *	IPV6_JOIN_GROUP		MODE_IS_EXCLUDE		unspecified
2303  *	MCAST_JOIN_GROUP	MODE_IS_EXCLUDE		unspecified
2304  *	IP_BLOCK_SOURCE		MODE_IS_EXCLUDE		IPv4-mapped addr
2305  *	MCAST_BLOCK_SOURCE	MODE_IS_EXCLUDE		v6 addr
2306  *	IP_JOIN_SOURCE_GROUP	MODE_IS_INCLUDE		IPv4-mapped addr
2307  *	MCAST_JOIN_SOURCE_GROUP	MODE_IS_INCLUDE		v6 addr
2308  *
2309  * Changing the filter mode is not allowed; if a matching ilg already
2310  * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
2311  *
2312  * Verifies that there is a source address of appropriate scope for
2313  * the group; if not, EADDRNOTAVAIL is returned.
2314  *
2315  * The interface to be used may be identified by an IPv4 address or by an
2316  * interface index.
2317  *
2318  * Handles IPv4-mapped IPv6 multicast addresses by associating them
2319  * with the IPv4 address.  Assumes that if v6group is v4-mapped,
2320  * v6src is also v4-mapped.
2321  */
2322 int
2323 ip_opt_add_group(conn_t *connp, boolean_t checkonly,
2324     const in6_addr_t *v6group, ipaddr_t ifaddr, uint_t ifindex,
2325     mcast_record_t fmode, const in6_addr_t *v6src)
2326 {
2327 	ill_t *ill;
2328 	char buf[INET6_ADDRSTRLEN];
2329 	int	err;
2330 
2331 	err = ip_opt_check(connp, v6group, v6src, ifaddr, ifindex, &ill);
2332 	if (err != 0) {
2333 		ip1dbg(("ip_opt_add_group: no ill for group %s/"
2334 		    "index %d\n", inet_ntop(AF_INET6, v6group, buf,
2335 		    sizeof (buf)), ifindex));
2336 		return (err);
2337 	}
2338 
2339 	if (checkonly) {
2340 		/*
2341 		 * do not do operation, just pretend to - new T_CHECK
2342 		 * semantics. The error return case above if encountered
2343 		 * considered a good enough "check" here.
2344 		 */
2345 		ill_refrele(ill);
2346 		return (0);
2347 	}
2348 
2349 	mutex_enter(&ill->ill_mcast_serializer);
2350 	err = ilg_add(connp, v6group, ifaddr, ifindex, ill, fmode, v6src);
2351 	mutex_exit(&ill->ill_mcast_serializer);
2352 	ill_refrele(ill);
2353 	return (err);
2354 }
2355 
2356 /*
2357  * Common for IPv6 and IPv4.
2358  * Here we handle ilgs that are still attached to their original ill
2359  * (the one ifaddr/ifindex points at), as well as detached ones.
2360  * The detached ones might have been attached to some other ill.
2361  */
2362 static int
2363 ip_opt_delete_group_excl(conn_t *connp, const in6_addr_t *v6group,
2364     ipaddr_t ifaddr, uint_t ifindex, mcast_record_t fmode,
2365     const in6_addr_t *v6src)
2366 {
2367 	ilg_t	*ilg;
2368 	boolean_t leaving;
2369 	ilm_t *ilm;
2370 	ill_t *ill;
2371 	int err = 0;
2372 
2373 retry:
2374 	rw_enter(&connp->conn_ilg_lock, RW_WRITER);
2375 	ilg = ilg_lookup(connp, v6group, ifaddr, ifindex);
2376 	if (ilg == NULL) {
2377 		rw_exit(&connp->conn_ilg_lock);
2378 		/*
2379 		 * Since we didn't have any ilg we now do the error checks
2380 		 * to determine the best errno.
2381 		 */
2382 		err = ip_opt_check(connp, v6group, v6src, ifaddr, ifindex,
2383 		    &ill);
2384 		if (ill != NULL) {
2385 			/* The only error was a missing ilg for the group */
2386 			ill_refrele(ill);
2387 			err = EADDRNOTAVAIL;
2388 		}
2389 		return (err);
2390 	}
2391 
2392 	/* If the ilg is attached then we serialize using that ill */
2393 	ill = ilg->ilg_ill;
2394 	if (ill != NULL) {
2395 		/* Prevent the ill and ilg from being freed */
2396 		ill_refhold(ill);
2397 		ilg_refhold(ilg);
2398 		rw_exit(&connp->conn_ilg_lock);
2399 		mutex_enter(&ill->ill_mcast_serializer);
2400 		rw_enter(&connp->conn_ilg_lock, RW_WRITER);
2401 		if (ilg->ilg_condemned) {
2402 			/* Disappeared */
2403 			ilg_refrele(ilg);
2404 			rw_exit(&connp->conn_ilg_lock);
2405 			mutex_exit(&ill->ill_mcast_serializer);
2406 			ill_refrele(ill);
2407 			goto retry;
2408 		}
2409 	}
2410 
2411 	/*
2412 	 * Decide if we're actually deleting the ilg or just removing a
2413 	 * source filter address; if just removing an addr, make sure we
2414 	 * aren't trying to change the filter mode, and that the addr is
2415 	 * actually in our filter list already.  If we're removing the
2416 	 * last src in an include list, just delete the ilg.
2417 	 */
2418 	if (IN6_IS_ADDR_UNSPECIFIED(v6src)) {
2419 		leaving = B_TRUE;
2420 	} else {
2421 		if (fmode != ilg->ilg_fmode)
2422 			err = EINVAL;
2423 		else if (ilg->ilg_filter == NULL ||
2424 		    !list_has_addr(ilg->ilg_filter, v6src))
2425 			err = EADDRNOTAVAIL;
2426 		if (err != 0) {
2427 			if (ill != NULL)
2428 				ilg_refrele(ilg);
2429 			rw_exit(&connp->conn_ilg_lock);
2430 			goto done;
2431 		}
2432 		if (fmode == MODE_IS_INCLUDE &&
2433 		    ilg->ilg_filter->sl_numsrc == 1) {
2434 			leaving = B_TRUE;
2435 			v6src = NULL;
2436 		} else {
2437 			leaving = B_FALSE;
2438 		}
2439 	}
2440 	ilm = ilg->ilg_ilm;
2441 	if (leaving)
2442 		ilg->ilg_ilm = NULL;
2443 
2444 	ilg_delete(connp, ilg, v6src);
2445 	if (ill != NULL)
2446 		ilg_refrele(ilg);
2447 	rw_exit(&connp->conn_ilg_lock);
2448 
2449 	if (ilm != NULL) {
2450 		ASSERT(ill != NULL);
2451 		(void) ip_delmulti_serial(ilm, B_FALSE, leaving);
2452 	}
2453 done:
2454 	if (ill != NULL) {
2455 		mutex_exit(&ill->ill_mcast_serializer);
2456 		ill_refrele(ill);
2457 	}
2458 	return (err);
2459 }
2460 
2461 /*
2462  * Handle the following optmgmt:
2463  *	IP_DROP_MEMBERSHIP		will leave
2464  *	IPV6_LEAVE_GROUP		will leave
2465  *	MCAST_LEAVE_GROUP		will leave
2466  *	IP_UNBLOCK_SOURCE		will not leave
2467  *	MCAST_UNBLOCK_SOURCE		will not leave
2468  *	IP_LEAVE_SOURCE_GROUP		may leave (if leaving last source)
2469  *	MCAST_LEAVE_SOURCE_GROUP	may leave (if leaving last source)
2470  *
2471  * fmode and src parameters may be used to determine which option is
2472  * being set, as follows:
2473  *	opt			 fmode			v6src
2474  *	IP_DROP_MEMBERSHIP	 MODE_IS_INCLUDE	unspecified
2475  *	IPV6_LEAVE_GROUP	 MODE_IS_INCLUDE	unspecified
2476  *	MCAST_LEAVE_GROUP	 MODE_IS_INCLUDE	unspecified
2477  *	IP_UNBLOCK_SOURCE	 MODE_IS_EXCLUDE	IPv4-mapped addr
2478  *	MCAST_UNBLOCK_SOURCE	 MODE_IS_EXCLUDE	v6 addr
2479  *	IP_LEAVE_SOURCE_GROUP	 MODE_IS_INCLUDE	IPv4-mapped addr
2480  *	MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE	v6 addr
2481  *
2482  * Changing the filter mode is not allowed; if a matching ilg already
2483  * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
2484  *
2485  * The interface to be used may be identified by an IPv4 address or by an
2486  * interface index.
2487  *
2488  * Handles IPv4-mapped IPv6 multicast addresses by associating them
2489  * with the IPv4 address.  Assumes that if v6group is v4-mapped,
2490  * v6src is also v4-mapped.
2491  */
2492 int
2493 ip_opt_delete_group(conn_t *connp, boolean_t checkonly,
2494     const in6_addr_t *v6group, ipaddr_t ifaddr, uint_t ifindex,
2495     mcast_record_t fmode, const in6_addr_t *v6src)
2496 {
2497 
2498 	/*
2499 	 * In the normal case below we don't check for the ill existing.
2500 	 * Instead we look for an existing ilg in _excl.
2501 	 * If checkonly we sanity check the arguments
2502 	 */
2503 	if (checkonly) {
2504 		ill_t	*ill;
2505 		int	err;
2506 
2507 		err = ip_opt_check(connp, v6group, v6src, ifaddr, ifindex,
2508 		    &ill);
2509 		/*
2510 		 * do not do operation, just pretend to - new T_CHECK semantics.
2511 		 * ip_opt_check is considered a good enough "check" here.
2512 		 */
2513 		if (ill != NULL)
2514 			ill_refrele(ill);
2515 		return (err);
2516 	}
2517 	return (ip_opt_delete_group_excl(connp, v6group, ifaddr, ifindex,
2518 	    fmode, v6src));
2519 }
2520 
2521 /*
2522  * Group mgmt for upper conn that passes things down
2523  * to the interface multicast list (and DLPI)
2524  * These routines can handle new style options that specify an interface name
2525  * as opposed to an interface address (needed for general handling of
2526  * unnumbered interfaces.)
2527  */
2528 
2529 /*
2530  * Add a group to an upper conn group data structure and pass things down
2531  * to the interface multicast list (and DLPI)
2532  * Common for IPv4 and IPv6; for IPv4 we can have an ifaddr.
2533  */
2534 static int
2535 ilg_add(conn_t *connp, const in6_addr_t *v6group, ipaddr_t ifaddr,
2536     uint_t ifindex, ill_t *ill, mcast_record_t fmode, const in6_addr_t *v6src)
2537 {
2538 	int	error = 0;
2539 	ilg_t	*ilg;
2540 	ilg_stat_t ilgstat;
2541 	slist_t	*new_filter = NULL;
2542 	int	new_fmode;
2543 	ilm_t *ilm;
2544 
2545 	if (!(ill->ill_flags & ILLF_MULTICAST))
2546 		return (EADDRNOTAVAIL);
2547 
2548 	/* conn_ilg_lock protects the ilg list. */
2549 	ASSERT(MUTEX_HELD(&ill->ill_mcast_serializer));
2550 	rw_enter(&connp->conn_ilg_lock, RW_WRITER);
2551 	ilg = ilg_lookup(connp, v6group, ifaddr, ifindex);
2552 
2553 	/*
2554 	 * Depending on the option we're handling, may or may not be okay
2555 	 * if group has already been added.  Figure out our rules based
2556 	 * on fmode and src params.  Also make sure there's enough room
2557 	 * in the filter if we're adding a source to an existing filter.
2558 	 */
2559 	if (IN6_IS_ADDR_UNSPECIFIED(v6src)) {
2560 		/* we're joining for all sources, must not have joined */
2561 		if (ilg != NULL)
2562 			error = EADDRINUSE;
2563 	} else {
2564 		if (fmode == MODE_IS_EXCLUDE) {
2565 			/* (excl {addr}) => block source, must have joined */
2566 			if (ilg == NULL)
2567 				error = EADDRNOTAVAIL;
2568 		}
2569 		/* (incl {addr}) => join source, may have joined */
2570 
2571 		if (ilg != NULL &&
2572 		    SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE)
2573 			error = ENOBUFS;
2574 	}
2575 	if (error != 0) {
2576 		rw_exit(&connp->conn_ilg_lock);
2577 		return (error);
2578 	}
2579 
2580 	/*
2581 	 * Alloc buffer to copy new state into (see below) before
2582 	 * we make any changes, so we can bail if it fails.
2583 	 */
2584 	if ((new_filter = l_alloc()) == NULL) {
2585 		rw_exit(&connp->conn_ilg_lock);
2586 		return (ENOMEM);
2587 	}
2588 
2589 	if (ilg == NULL) {
2590 		if ((ilg = conn_ilg_alloc(connp, &error)) == NULL) {
2591 			rw_exit(&connp->conn_ilg_lock);
2592 			l_free(new_filter);
2593 			return (error);
2594 		}
2595 		ilg->ilg_ifindex = ifindex;
2596 		ilg->ilg_ifaddr = ifaddr;
2597 		if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) {
2598 			ilg->ilg_filter = l_alloc();
2599 			if (ilg->ilg_filter == NULL) {
2600 				ilg_delete(connp, ilg, NULL);
2601 				rw_exit(&connp->conn_ilg_lock);
2602 				l_free(new_filter);
2603 				return (ENOMEM);
2604 			}
2605 			ilg->ilg_filter->sl_numsrc = 1;
2606 			ilg->ilg_filter->sl_addr[0] = *v6src;
2607 		}
2608 		ilgstat = ILGSTAT_NEW;
2609 		ilg->ilg_v6group = *v6group;
2610 		ilg->ilg_fmode = fmode;
2611 		ilg->ilg_ill = ill;
2612 	} else {
2613 		int index;
2614 		if (ilg->ilg_fmode != fmode || IN6_IS_ADDR_UNSPECIFIED(v6src)) {
2615 			rw_exit(&connp->conn_ilg_lock);
2616 			l_free(new_filter);
2617 			return (EINVAL);
2618 		}
2619 		if (ilg->ilg_filter == NULL) {
2620 			ilg->ilg_filter = l_alloc();
2621 			if (ilg->ilg_filter == NULL) {
2622 				rw_exit(&connp->conn_ilg_lock);
2623 				l_free(new_filter);
2624 				return (ENOMEM);
2625 			}
2626 		}
2627 		if (list_has_addr(ilg->ilg_filter, v6src)) {
2628 			rw_exit(&connp->conn_ilg_lock);
2629 			l_free(new_filter);
2630 			return (EADDRNOTAVAIL);
2631 		}
2632 		ilgstat = ILGSTAT_CHANGE;
2633 		index = ilg->ilg_filter->sl_numsrc++;
2634 		ilg->ilg_filter->sl_addr[index] = *v6src;
2635 	}
2636 
2637 	/*
2638 	 * Save copy of ilg's filter state to pass to other functions,
2639 	 * so we can release conn_ilg_lock now.
2640 	 */
2641 	new_fmode = ilg->ilg_fmode;
2642 	l_copy(ilg->ilg_filter, new_filter);
2643 
2644 	rw_exit(&connp->conn_ilg_lock);
2645 
2646 	/*
2647 	 * Now update the ill. We wait to do this until after the ilg
2648 	 * has been updated because we need to update the src filter
2649 	 * info for the ill, which involves looking at the status of
2650 	 * all the ilgs associated with this group/interface pair.
2651 	 */
2652 	ilm = ip_addmulti_serial(v6group, ill, connp->conn_zoneid, ilgstat,
2653 	    new_fmode, new_filter, &error);
2654 
2655 	rw_enter(&connp->conn_ilg_lock, RW_WRITER);
2656 	/*
2657 	 * Must look up the ilg again since we've not been holding
2658 	 * conn_ilg_lock. The ilg could have disappeared due to an unplumb
2659 	 * having called conn_update_ill, which can run once we dropped the
2660 	 * conn_ilg_lock above.
2661 	 */
2662 	ilg = ilg_lookup(connp, v6group, ifaddr, ifindex);
2663 	if (ilg == NULL) {
2664 		rw_exit(&connp->conn_ilg_lock);
2665 		if (ilm != NULL) {
2666 			(void) ip_delmulti_serial(ilm, B_FALSE,
2667 			    (ilgstat == ILGSTAT_NEW));
2668 		}
2669 		error = ENXIO;
2670 		goto free_and_exit;
2671 	}
2672 
2673 	if (ilm != NULL) {
2674 		/* Succeeded. Update the ilg to point at the ilm */
2675 		if (ilgstat == ILGSTAT_NEW) {
2676 			ASSERT(ilg->ilg_ilm == NULL);
2677 			ilg->ilg_ilm = ilm;
2678 			ilm->ilm_ifaddr = ifaddr;	/* For netstat */
2679 		} else {
2680 			/*
2681 			 * ip_addmulti didn't get a held ilm for
2682 			 * ILGSTAT_CHANGE; ilm_refcnt was unchanged.
2683 			 */
2684 			ASSERT(ilg->ilg_ilm == ilm);
2685 		}
2686 	} else {
2687 		ASSERT(error != 0);
2688 		/*
2689 		 * Failed to allocate the ilm.
2690 		 * Need to undo what we did before calling ip_addmulti()
2691 		 * If ENETDOWN just clear ill_ilg since so that we
2692 		 * will rejoin when the ill comes back; don't report ENETDOWN
2693 		 * to application.
2694 		 */
2695 		if (ilgstat == ILGSTAT_NEW && error == ENETDOWN) {
2696 			ilg->ilg_ill = NULL;
2697 			error = 0;
2698 		} else {
2699 			in6_addr_t delsrc =
2700 			    (ilgstat == ILGSTAT_NEW) ? ipv6_all_zeros : *v6src;
2701 
2702 			ilg_delete(connp, ilg, &delsrc);
2703 		}
2704 	}
2705 	rw_exit(&connp->conn_ilg_lock);
2706 
2707 free_and_exit:
2708 	l_free(new_filter);
2709 	return (error);
2710 }
2711 
2712 /*
2713  * Find an IPv4 ilg matching group, ill and source.
2714  * The group and source can't be INADDR_ANY here so no need to translate to
2715  * the unspecified IPv6 address.
2716  */
2717 boolean_t
2718 conn_hasmembers_ill_withsrc_v4(conn_t *connp, ipaddr_t group, ipaddr_t src,
2719     ill_t *ill)
2720 {
2721 	in6_addr_t v6group, v6src;
2722 	int i;
2723 	boolean_t isinlist;
2724 	ilg_t *ilg;
2725 
2726 	rw_enter(&connp->conn_ilg_lock, RW_READER);
2727 	IN6_IPADDR_TO_V4MAPPED(group, &v6group);
2728 	for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) {
2729 		if (ilg->ilg_condemned)
2730 			continue;
2731 
2732 		/* ilg_ill could be NULL if an add is in progress */
2733 		if (ilg->ilg_ill != ill)
2734 			continue;
2735 
2736 		/* The callers use upper ill for IPMP */
2737 		ASSERT(!IS_UNDER_IPMP(ill));
2738 		if (IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, &v6group)) {
2739 			if (SLIST_IS_EMPTY(ilg->ilg_filter)) {
2740 				/* no source filter, so this is a match */
2741 				rw_exit(&connp->conn_ilg_lock);
2742 				return (B_TRUE);
2743 			}
2744 			break;
2745 		}
2746 	}
2747 	if (ilg == NULL) {
2748 		rw_exit(&connp->conn_ilg_lock);
2749 		return (B_FALSE);
2750 	}
2751 
2752 	/*
2753 	 * we have an ilg with matching ill and group; but
2754 	 * the ilg has a source list that we must check.
2755 	 */
2756 	IN6_IPADDR_TO_V4MAPPED(src, &v6src);
2757 	isinlist = B_FALSE;
2758 	for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) {
2759 		if (IN6_ARE_ADDR_EQUAL(&v6src, &ilg->ilg_filter->sl_addr[i])) {
2760 			isinlist = B_TRUE;
2761 			break;
2762 		}
2763 	}
2764 
2765 	if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) ||
2766 	    (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE)) {
2767 		rw_exit(&connp->conn_ilg_lock);
2768 		return (B_TRUE);
2769 	}
2770 	rw_exit(&connp->conn_ilg_lock);
2771 	return (B_FALSE);
2772 }
2773 
2774 /*
2775  * Find an IPv6 ilg matching group, ill, and source
2776  */
2777 boolean_t
2778 conn_hasmembers_ill_withsrc_v6(conn_t *connp, const in6_addr_t *v6group,
2779     const in6_addr_t *v6src, ill_t *ill)
2780 {
2781 	int i;
2782 	boolean_t isinlist;
2783 	ilg_t *ilg;
2784 
2785 	rw_enter(&connp->conn_ilg_lock, RW_READER);
2786 	for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) {
2787 		if (ilg->ilg_condemned)
2788 			continue;
2789 
2790 		/* ilg_ill could be NULL if an add is in progress */
2791 		if (ilg->ilg_ill != ill)
2792 			continue;
2793 
2794 		/* The callers use upper ill for IPMP */
2795 		ASSERT(!IS_UNDER_IPMP(ill));
2796 		if (IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) {
2797 			if (SLIST_IS_EMPTY(ilg->ilg_filter)) {
2798 				/* no source filter, so this is a match */
2799 				rw_exit(&connp->conn_ilg_lock);
2800 				return (B_TRUE);
2801 			}
2802 			break;
2803 		}
2804 	}
2805 	if (ilg == NULL) {
2806 		rw_exit(&connp->conn_ilg_lock);
2807 		return (B_FALSE);
2808 	}
2809 
2810 	/*
2811 	 * we have an ilg with matching ill and group; but
2812 	 * the ilg has a source list that we must check.
2813 	 */
2814 	isinlist = B_FALSE;
2815 	for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) {
2816 		if (IN6_ARE_ADDR_EQUAL(v6src, &ilg->ilg_filter->sl_addr[i])) {
2817 			isinlist = B_TRUE;
2818 			break;
2819 		}
2820 	}
2821 
2822 	if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) ||
2823 	    (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE)) {
2824 		rw_exit(&connp->conn_ilg_lock);
2825 		return (B_TRUE);
2826 	}
2827 	rw_exit(&connp->conn_ilg_lock);
2828 	return (B_FALSE);
2829 }
2830 
2831 /*
2832  * Find an ilg matching group and ifaddr/ifindex.
2833  * We check both ifaddr and ifindex even though at most one of them
2834  * will be non-zero; that way we always find the right one.
2835  */
2836 static ilg_t *
2837 ilg_lookup(conn_t *connp, const in6_addr_t *v6group, ipaddr_t ifaddr,
2838     uint_t ifindex)
2839 {
2840 	ilg_t	*ilg;
2841 
2842 	ASSERT(RW_LOCK_HELD(&connp->conn_ilg_lock));
2843 
2844 	for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) {
2845 		if (ilg->ilg_condemned)
2846 			continue;
2847 
2848 		if (ilg->ilg_ifaddr == ifaddr &&
2849 		    ilg->ilg_ifindex == ifindex &&
2850 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group))
2851 			return (ilg);
2852 	}
2853 	return (NULL);
2854 }
2855 
2856 /*
2857  * If a source address is passed in (src != NULL and src is not
2858  * unspecified), remove the specified src addr from the given ilg's
2859  * filter list, else delete the ilg.
2860  */
2861 static void
2862 ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src)
2863 {
2864 	ASSERT(RW_WRITE_HELD(&connp->conn_ilg_lock));
2865 	ASSERT(ilg->ilg_ptpn != NULL);
2866 	ASSERT(!ilg->ilg_condemned);
2867 
2868 	if (src == NULL || IN6_IS_ADDR_UNSPECIFIED(src)) {
2869 		FREE_SLIST(ilg->ilg_filter);
2870 		ilg->ilg_filter = NULL;
2871 
2872 		ASSERT(ilg->ilg_ilm == NULL);
2873 		ilg->ilg_ill = NULL;
2874 		ilg->ilg_condemned = B_TRUE;
2875 
2876 		/* ilg_inactive will unlink from the list */
2877 		ilg_refrele(ilg);
2878 	} else {
2879 		l_remove(ilg->ilg_filter, src);
2880 	}
2881 }
2882 
2883 /*
2884  * Called from conn close. No new ilg can be added or removed
2885  * because CONN_CLOSING has been set by ip_close. ilg_add / ilg_delete
2886  * will return error if conn has started closing.
2887  *
2888  * We handle locking as follows.
2889  * Under conn_ilg_lock we get the first ilg. As we drop the conn_ilg_lock to
2890  * proceed with the ilm part of the delete we hold a reference on both the ill
2891  * and the ilg. This doesn't prevent changes to the ilg, but prevents it from
2892  * being deleted.
2893  *
2894  * Since the ilg_add code path uses two locks (conn_ilg_lock for the ilg part,
2895  * and ill_mcast_lock for the ip_addmulti part) we can run at a point between
2896  * the two. At that point ilg_ill is set, but ilg_ilm hasn't yet been set. In
2897  * that case we delete the ilg here, which makes ilg_add discover that the ilg
2898  * has disappeared when ip_addmulti returns, so it will discard the ilm it just
2899  * added.
2900  */
2901 void
2902 ilg_delete_all(conn_t *connp)
2903 {
2904 	ilg_t	*ilg, *next_ilg, *held_ilg;
2905 	ilm_t	*ilm;
2906 	ill_t	*ill;
2907 	boolean_t need_refrele;
2908 
2909 	/*
2910 	 * Can not run if there is a conn_update_ill already running.
2911 	 * Wait for it to complete. Caller should have already set CONN_CLOSING
2912 	 * which prevents any new threads to run in conn_update_ill.
2913 	 */
2914 	mutex_enter(&connp->conn_lock);
2915 	ASSERT(connp->conn_state_flags & CONN_CLOSING);
2916 	while (connp->conn_state_flags & CONN_UPDATE_ILL)
2917 		cv_wait(&connp->conn_cv, &connp->conn_lock);
2918 	mutex_exit(&connp->conn_lock);
2919 
2920 	rw_enter(&connp->conn_ilg_lock, RW_WRITER);
2921 	ilg = connp->conn_ilg;
2922 	held_ilg = NULL;
2923 	while (ilg != NULL) {
2924 		if (ilg->ilg_condemned) {
2925 			ilg = ilg->ilg_next;
2926 			continue;
2927 		}
2928 		/* If the ilg is detached then no need to serialize */
2929 		if (ilg->ilg_ilm == NULL) {
2930 			next_ilg = ilg->ilg_next;
2931 			ilg_delete(connp, ilg, NULL);
2932 			ilg = next_ilg;
2933 			continue;
2934 		}
2935 		ill = ilg->ilg_ilm->ilm_ill;
2936 
2937 		/*
2938 		 * In order to serialize on the ill we try to enter
2939 		 * and if that fails we unlock and relock and then
2940 		 * check that we still have an ilm.
2941 		 */
2942 		need_refrele = B_FALSE;
2943 		if (!mutex_tryenter(&ill->ill_mcast_serializer)) {
2944 			ill_refhold(ill);
2945 			need_refrele = B_TRUE;
2946 			ilg_refhold(ilg);
2947 			if (held_ilg != NULL)
2948 				ilg_refrele(held_ilg);
2949 			held_ilg = ilg;
2950 			rw_exit(&connp->conn_ilg_lock);
2951 			mutex_enter(&ill->ill_mcast_serializer);
2952 			rw_enter(&connp->conn_ilg_lock, RW_WRITER);
2953 			if (ilg->ilg_condemned) {
2954 				ilg = ilg->ilg_next;
2955 				goto next;
2956 			}
2957 		}
2958 		ilm = ilg->ilg_ilm;
2959 		ilg->ilg_ilm = NULL;
2960 		next_ilg = ilg->ilg_next;
2961 		ilg_delete(connp, ilg, NULL);
2962 		ilg = next_ilg;
2963 		rw_exit(&connp->conn_ilg_lock);
2964 
2965 		if (ilm != NULL)
2966 			(void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE);
2967 
2968 	next:
2969 		mutex_exit(&ill->ill_mcast_serializer);
2970 		if (need_refrele) {
2971 			/* Drop ill reference while we hold no locks */
2972 			ill_refrele(ill);
2973 		}
2974 		rw_enter(&connp->conn_ilg_lock, RW_WRITER);
2975 	}
2976 	if (held_ilg != NULL)
2977 		ilg_refrele(held_ilg);
2978 	rw_exit(&connp->conn_ilg_lock);
2979 }
2980 
2981 /*
2982  * Attach the ilg to an ilm on the ill. If it fails we leave ilg_ill as NULL so
2983  * that a subsequent attempt can attach it.
2984  * Drops and reacquires conn_ilg_lock.
2985  */
2986 static void
2987 ilg_attach(conn_t *connp, ilg_t *ilg, ill_t *ill)
2988 {
2989 	ilg_stat_t	ilgstat;
2990 	slist_t		*new_filter;
2991 	int		new_fmode;
2992 	in6_addr_t	v6group;
2993 	ipaddr_t	ifaddr;
2994 	uint_t		ifindex;
2995 	ilm_t		*ilm;
2996 	int		error = 0;
2997 
2998 	ASSERT(RW_WRITE_HELD(&connp->conn_ilg_lock));
2999 	/*
3000 	 * Alloc buffer to copy new state into (see below) before
3001 	 * we make any changes, so we can bail if it fails.
3002 	 */
3003 	if ((new_filter = l_alloc()) == NULL)
3004 		return;
3005 
3006 	/*
3007 	 * Save copy of ilg's filter state to pass to other functions, so
3008 	 * we can release conn_ilg_lock now.
3009 	 * Set ilg_ill so that an unplumb can find us.
3010 	 */
3011 	new_fmode = ilg->ilg_fmode;
3012 	l_copy(ilg->ilg_filter, new_filter);
3013 	v6group = ilg->ilg_v6group;
3014 	ifaddr = ilg->ilg_ifaddr;
3015 	ifindex = ilg->ilg_ifindex;
3016 	ilgstat = ILGSTAT_NEW;
3017 
3018 	ilg->ilg_ill = ill;
3019 	ASSERT(ilg->ilg_ilm == NULL);
3020 	rw_exit(&connp->conn_ilg_lock);
3021 
3022 	ilm = ip_addmulti_serial(&v6group, ill, connp->conn_zoneid, ilgstat,
3023 	    new_fmode, new_filter, &error);
3024 	l_free(new_filter);
3025 
3026 	rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3027 	/*
3028 	 * Must look up the ilg again since we've not been holding
3029 	 * conn_ilg_lock. The ilg could have disappeared due to an unplumb
3030 	 * having called conn_update_ill, which can run once we dropped the
3031 	 * conn_ilg_lock above.
3032 	 */
3033 	ilg = ilg_lookup(connp, &v6group, ifaddr, ifindex);
3034 	if (ilg == NULL) {
3035 		if (ilm != NULL) {
3036 			rw_exit(&connp->conn_ilg_lock);
3037 			(void) ip_delmulti_serial(ilm, B_FALSE,
3038 			    (ilgstat == ILGSTAT_NEW));
3039 			rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3040 		}
3041 		return;
3042 	}
3043 	if (ilm == NULL) {
3044 		ilg->ilg_ill = NULL;
3045 		return;
3046 	}
3047 	ASSERT(ilg->ilg_ilm == NULL);
3048 	ilg->ilg_ilm = ilm;
3049 	ilm->ilm_ifaddr = ifaddr;	/* For netstat */
3050 }
3051 
3052 /*
3053  * Called when an ill is unplumbed to make sure that there are no
3054  * dangling conn references to that ill. In that case ill is non-NULL and
3055  * we make sure we remove all references to it.
3056  * Also called when we should revisit the ilg_ill used for multicast
3057  * memberships, in which case ill is NULL.
3058  *
3059  * conn is held by caller.
3060  *
3061  * Note that ipcl_walk only walks conns that are not yet condemned.
3062  * condemned conns can't be refheld. For this reason, conn must become clean
3063  * first, i.e. it must not refer to any ill/ire and then only set
3064  * condemned flag.
3065  *
3066  * We leave ixa_multicast_ifindex in place. We prefer dropping
3067  * packets instead of sending them out the wrong interface.
3068  *
3069  * We keep the ilg around in a detached state (with ilg_ill and ilg_ilm being
3070  * NULL) so that the application can leave it later. Also, if ilg_ifaddr and
3071  * ilg_ifindex are zero, indicating that the system should pick the interface,
3072  * then we attempt to reselect the ill and join on it.
3073  *
3074  * Locking notes:
3075  * Under conn_ilg_lock we get the first ilg. As we drop the conn_ilg_lock to
3076  * proceed with the ilm part of the delete we hold a reference on both the ill
3077  * and the ilg. This doesn't prevent changes to the ilg, but prevents it from
3078  * being deleted.
3079  *
3080  * Note: if this function is called when new ill/ipif's arrive or change status
3081  * (SIOCSLIFINDEX, SIOCSLIFADDR) then we will attempt to attach any ilgs with
3082  * a NULL ilg_ill to an ill/ilm.
3083  */
3084 static void
3085 conn_update_ill(conn_t *connp, caddr_t arg)
3086 {
3087 	ill_t	*ill = (ill_t *)arg;
3088 
3089 	/*
3090 	 * We have to prevent ip_close/ilg_delete_all from running at
3091 	 * the same time. ip_close sets CONN_CLOSING before doing the ilg_delete
3092 	 * all, and we set CONN_UPDATE_ILL. That ensures that only one of
3093 	 * ilg_delete_all and conn_update_ill run at a time for a given conn.
3094 	 * If ilg_delete_all got here first, then we have nothing to do.
3095 	 */
3096 	mutex_enter(&connp->conn_lock);
3097 	if (connp->conn_state_flags & (CONN_CLOSING|CONN_UPDATE_ILL)) {
3098 		/* Caller has to wait for ill_ilm_cnt to drop to zero */
3099 		mutex_exit(&connp->conn_lock);
3100 		return;
3101 	}
3102 	connp->conn_state_flags |= CONN_UPDATE_ILL;
3103 	mutex_exit(&connp->conn_lock);
3104 
3105 	if (ill != NULL)
3106 		ilg_check_detach(connp, ill);
3107 
3108 	ilg_check_reattach(connp);
3109 
3110 	/* Do we need to wake up a thread in ilg_delete_all? */
3111 	mutex_enter(&connp->conn_lock);
3112 	connp->conn_state_flags &= ~CONN_UPDATE_ILL;
3113 	if (connp->conn_state_flags & CONN_CLOSING)
3114 		cv_broadcast(&connp->conn_cv);
3115 	mutex_exit(&connp->conn_lock);
3116 }
3117 
3118 /* Detach from an ill that is going away */
3119 static void
3120 ilg_check_detach(conn_t *connp, ill_t *ill)
3121 {
3122 	char	group_buf[INET6_ADDRSTRLEN];
3123 	ilg_t	*ilg, *held_ilg;
3124 	ilm_t	*ilm;
3125 
3126 	mutex_enter(&ill->ill_mcast_serializer);
3127 	rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3128 	held_ilg = NULL;
3129 	for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) {
3130 		if (ilg->ilg_condemned)
3131 			continue;
3132 
3133 		if (ilg->ilg_ill != ill)
3134 			continue;
3135 
3136 		/* Detach from current ill */
3137 		ip1dbg(("ilg_check_detach: detach %s on %s\n",
3138 		    inet_ntop(AF_INET6, &ilg->ilg_v6group,
3139 		    group_buf, sizeof (group_buf)),
3140 		    ilg->ilg_ill->ill_name));
3141 
3142 		/* Detach this ilg from the ill/ilm */
3143 		ilm = ilg->ilg_ilm;
3144 		ilg->ilg_ilm = NULL;
3145 		ilg->ilg_ill = NULL;
3146 		if (ilm == NULL)
3147 			continue;
3148 
3149 		/* Prevent ilg from disappearing */
3150 		ilg_transfer_hold(held_ilg, ilg);
3151 		held_ilg = ilg;
3152 		rw_exit(&connp->conn_ilg_lock);
3153 
3154 		(void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE);
3155 		rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3156 	}
3157 	if (held_ilg != NULL)
3158 		ilg_refrele(held_ilg);
3159 	rw_exit(&connp->conn_ilg_lock);
3160 	mutex_exit(&ill->ill_mcast_serializer);
3161 }
3162 
3163 /*
3164  * Check if there is a place to attach the conn_ilgs. We do this for both
3165  * detached ilgs and attached ones, since for the latter there could be
3166  * a better ill to attach them to.
3167  */
3168 static void
3169 ilg_check_reattach(conn_t *connp)
3170 {
3171 	ill_t	*ill;
3172 	char	group_buf[INET6_ADDRSTRLEN];
3173 	ilg_t	*ilg, *held_ilg;
3174 	ilm_t	*ilm;
3175 	zoneid_t zoneid = IPCL_ZONEID(connp);
3176 	int	error;
3177 	ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
3178 
3179 	rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3180 	held_ilg = NULL;
3181 	for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) {
3182 		if (ilg->ilg_condemned)
3183 			continue;
3184 
3185 		/* Check if the conn_ill matches what we would pick now */
3186 		ill = ill_mcast_lookup(&ilg->ilg_v6group, ilg->ilg_ifaddr,
3187 		    ilg->ilg_ifindex, zoneid, ipst, &error);
3188 
3189 		/*
3190 		 * Make sure the ill is usable for multicast and that
3191 		 * we can send the DL_ADDMULTI_REQ before we create an
3192 		 * ilm.
3193 		 */
3194 		if (ill != NULL &&
3195 		    (!(ill->ill_flags & ILLF_MULTICAST) || !ill->ill_dl_up)) {
3196 			/* Drop locks across ill_refrele */
3197 			ilg_transfer_hold(held_ilg, ilg);
3198 			held_ilg = ilg;
3199 			rw_exit(&connp->conn_ilg_lock);
3200 			ill_refrele(ill);
3201 			ill = NULL;
3202 			rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3203 			/* Note that ilg could have become condemned */
3204 		}
3205 
3206 		/* Is the ill unchanged, even if both are NULL? */
3207 		if (ill == ilg->ilg_ill) {
3208 			if (ill != NULL) {
3209 				/* Drop locks across ill_refrele */
3210 				ilg_transfer_hold(held_ilg, ilg);
3211 				held_ilg = ilg;
3212 				rw_exit(&connp->conn_ilg_lock);
3213 				ill_refrele(ill);
3214 				rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3215 			}
3216 			continue;
3217 		}
3218 
3219 		/* Something changed; detach from old first if needed */
3220 		if (ilg->ilg_ill != NULL) {
3221 			ill_t *ill2 = ilg->ilg_ill;
3222 			boolean_t need_refrele = B_FALSE;
3223 
3224 			/*
3225 			 * In order to serialize on the ill we try to enter
3226 			 * and if that fails we unlock and relock.
3227 			 */
3228 			if (!mutex_tryenter(&ill2->ill_mcast_serializer)) {
3229 				ill_refhold(ill2);
3230 				need_refrele = B_TRUE;
3231 				ilg_transfer_hold(held_ilg, ilg);
3232 				held_ilg = ilg;
3233 				rw_exit(&connp->conn_ilg_lock);
3234 				mutex_enter(&ill2->ill_mcast_serializer);
3235 				rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3236 				/* Note that ilg could have become condemned */
3237 			}
3238 			/*
3239 			 * Check that nobody else re-attached the ilg while we
3240 			 * dropped the lock.
3241 			 */
3242 			if (ilg->ilg_ill == ill2) {
3243 				ASSERT(!ilg->ilg_condemned);
3244 				/* Detach from current ill */
3245 				ip1dbg(("conn_check_reattach: detach %s/%s\n",
3246 				    inet_ntop(AF_INET6, &ilg->ilg_v6group,
3247 				    group_buf, sizeof (group_buf)),
3248 				    ill2->ill_name));
3249 
3250 				ilm = ilg->ilg_ilm;
3251 				ilg->ilg_ilm = NULL;
3252 				ilg->ilg_ill = NULL;
3253 			} else {
3254 				ilm = NULL;
3255 			}
3256 			rw_exit(&connp->conn_ilg_lock);
3257 			if (ilm != NULL)
3258 				(void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE);
3259 			mutex_exit(&ill2->ill_mcast_serializer);
3260 			if (need_refrele) {
3261 				/* Drop ill reference while we hold no locks */
3262 				ill_refrele(ill2);
3263 			}
3264 			rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3265 			/*
3266 			 * While we dropped conn_ilg_lock some other thread
3267 			 * could have attached this ilg, thus we check again.
3268 			 */
3269 			if (ilg->ilg_ill != NULL) {
3270 				if (ill != NULL) {
3271 					/* Drop locks across ill_refrele */
3272 					ilg_transfer_hold(held_ilg, ilg);
3273 					held_ilg = ilg;
3274 					rw_exit(&connp->conn_ilg_lock);
3275 					ill_refrele(ill);
3276 					rw_enter(&connp->conn_ilg_lock,
3277 					    RW_WRITER);
3278 				}
3279 				continue;
3280 			}
3281 		}
3282 		if (ill != NULL) {
3283 			/*
3284 			 * In order to serialize on the ill we try to enter
3285 			 * and if that fails we unlock and relock.
3286 			 */
3287 			if (!mutex_tryenter(&ill->ill_mcast_serializer)) {
3288 				/* Already have a refhold on ill */
3289 				ilg_transfer_hold(held_ilg, ilg);
3290 				held_ilg = ilg;
3291 				rw_exit(&connp->conn_ilg_lock);
3292 				mutex_enter(&ill->ill_mcast_serializer);
3293 				rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3294 				/* Note that ilg could have become condemned */
3295 			}
3296 
3297 			/*
3298 			 * Check that nobody else attached the ilg and that
3299 			 * it wasn't condemned while we dropped the lock.
3300 			 */
3301 			if (ilg->ilg_ill == NULL && !ilg->ilg_condemned) {
3302 				/*
3303 				 * Attach to the new ill. Can fail in which
3304 				 * case ilg_ill will remain NULL. ilg_attach
3305 				 * drops and reacquires conn_ilg_lock.
3306 				 */
3307 				ip1dbg(("conn_check_reattach: attach %s/%s\n",
3308 				    inet_ntop(AF_INET6, &ilg->ilg_v6group,
3309 				    group_buf, sizeof (group_buf)),
3310 				    ill->ill_name));
3311 				ilg_attach(connp, ilg, ill);
3312 				ASSERT(RW_WRITE_HELD(&connp->conn_ilg_lock));
3313 			}
3314 			mutex_exit(&ill->ill_mcast_serializer);
3315 			/* Drop locks across ill_refrele */
3316 			ilg_transfer_hold(held_ilg, ilg);
3317 			held_ilg = ilg;
3318 			rw_exit(&connp->conn_ilg_lock);
3319 			ill_refrele(ill);
3320 			rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3321 		}
3322 	}
3323 	if (held_ilg != NULL)
3324 		ilg_refrele(held_ilg);
3325 	rw_exit(&connp->conn_ilg_lock);
3326 }
3327 
3328 /*
3329  * Called when an ill is unplumbed to make sure that there are no
3330  * dangling conn references to that ill. In that case ill is non-NULL and
3331  * we make sure we remove all references to it.
3332  * Also called when we should revisit the ilg_ill used for multicast
3333  * memberships, in which case ill is NULL.
3334  */
3335 void
3336 update_conn_ill(ill_t *ill, ip_stack_t *ipst)
3337 {
3338 	ipcl_walk(conn_update_ill, (caddr_t)ill, ipst);
3339 }
3340