xref: /titanic_50/usr/src/uts/common/inet/ip/ip_multi.c (revision 36802407db97b1bcd32a63b16112e95ffcc5bb98)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2010 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 /* Copyright (c) 1990 Mentat Inc. */
26 
27 #include <sys/types.h>
28 #include <sys/stream.h>
29 #include <sys/dlpi.h>
30 #include <sys/stropts.h>
31 #include <sys/strsun.h>
32 #include <sys/ddi.h>
33 #include <sys/cmn_err.h>
34 #include <sys/sdt.h>
35 #include <sys/zone.h>
36 
37 #include <sys/param.h>
38 #include <sys/socket.h>
39 #include <sys/sockio.h>
40 #include <net/if.h>
41 #include <sys/systm.h>
42 #include <sys/strsubr.h>
43 #include <net/route.h>
44 #include <netinet/in.h>
45 #include <net/if_dl.h>
46 #include <netinet/ip6.h>
47 #include <netinet/icmp6.h>
48 
49 #include <inet/common.h>
50 #include <inet/mi.h>
51 #include <inet/nd.h>
52 #include <inet/arp.h>
53 #include <inet/ip.h>
54 #include <inet/ip6.h>
55 #include <inet/ip_if.h>
56 #include <inet/ip_ndp.h>
57 #include <inet/ip_multi.h>
58 #include <inet/ipclassifier.h>
59 #include <inet/ipsec_impl.h>
60 #include <inet/sctp_ip.h>
61 #include <inet/ip_listutils.h>
62 #include <inet/udp_impl.h>
63 
64 /* igmpv3/mldv2 source filter manipulation */
65 static void	ilm_bld_flists(conn_t *conn, void *arg);
66 static void	ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode,
67     slist_t *flist);
68 
69 static ilm_t	*ilm_add(ill_t *ill, const in6_addr_t *group,
70     ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist,
71     zoneid_t zoneid);
72 static void	ilm_delete(ilm_t *ilm);
73 static int	ilm_numentries(ill_t *, const in6_addr_t *);
74 
75 static ilm_t	*ip_addmulti_serial(const in6_addr_t *, ill_t *, zoneid_t,
76     ilg_stat_t, mcast_record_t, slist_t *, int *);
77 static ilm_t	*ip_addmulti_impl(const in6_addr_t *, ill_t *,
78     zoneid_t, ilg_stat_t, mcast_record_t, slist_t *, int *);
79 static int	ip_delmulti_serial(ilm_t *, boolean_t, boolean_t);
80 static int	ip_delmulti_impl(ilm_t *, boolean_t, boolean_t);
81 
82 static int	ip_ll_multireq(ill_t *ill, const in6_addr_t *group,
83     t_uscalar_t);
84 static ilg_t	*ilg_lookup(conn_t *, const in6_addr_t *, ipaddr_t ifaddr,
85     uint_t ifindex);
86 
87 static int	ilg_add(conn_t *connp, const in6_addr_t *group,
88     ipaddr_t ifaddr, uint_t ifindex, ill_t *ill, mcast_record_t fmode,
89     const in6_addr_t *v6src);
90 static void	ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src);
91 static mblk_t	*ill_create_dl(ill_t *ill, uint32_t dl_primitive,
92     uint32_t *addr_lenp, uint32_t *addr_offp);
93 static int	ip_opt_delete_group_excl(conn_t *connp,
94     const in6_addr_t *v6group, ipaddr_t ifaddr, uint_t ifindex,
95     mcast_record_t fmode, const in6_addr_t *v6src);
96 
97 static	ilm_t	*ilm_lookup(ill_t *, const in6_addr_t *, zoneid_t);
98 
99 static int	ip_msfilter_ill(conn_t *, mblk_t *, const ip_ioctl_cmd_t *,
100     ill_t **);
101 
102 static void	ilg_check_detach(conn_t *, ill_t *);
103 static void	ilg_check_reattach(conn_t *, ill_t *);
104 
105 /*
106  * MT notes:
107  *
108  * Multicast joins operate on both the ilg and ilm structures. Multiple
109  * threads operating on an conn (socket) trying to do multicast joins
110  * need to synchronize when operating on the ilg. Multiple threads
111  * potentially operating on different conn (socket endpoints) trying to
112  * do multicast joins could eventually end up trying to manipulate the
113  * ilm simulatenously and need to synchronize on the access to the ilm.
114  * The access and lookup of the ilm, as well as other ill multicast state,
115  * is under ill_mcast_lock.
116  * The modifications and lookup of ilg entries is serialized using conn_ilg_lock
117  * rwlock. An ilg will not be freed until ilg_refcnt drops to zero.
118  *
119  * In some cases we hold ill_mcast_lock and then acquire conn_ilg_lock, but
120  * never the other way around.
121  *
122  * An ilm is an IP data structure used to track multicast join/leave.
123  * An ilm is associated with a <multicast group, ipif> tuple in IPv4 and
124  * with just <multicast group> in IPv6. ilm_refcnt is the number of ilg's
125  * referencing the ilm.
126  * The modifications and lookup of ilm entries is serialized using the
127  * ill_mcast_lock rwlock; that lock handles all the igmp/mld modifications
128  * of the ilm state.
129  * ilms are created / destroyed only as writer. ilms
130  * are not passed around. The datapath (anything outside of this file
131  * and igmp.c) use functions that do not return ilms - just the number
132  * of members. So we don't need a dynamic refcount of the number
133  * of threads holding reference to an ilm.
134  *
135  * In the cases where we serially access the ilg and ilm, which happens when
136  * we handle the applications requests to join or leave groups and sources,
137  * we use the ill_mcast_serializer mutex to ensure that a multithreaded
138  * application which does concurrent joins and/or leaves on the same group on
139  * the same socket always results in a consistent order for the ilg and ilm
140  * modifications.
141  *
142  * When a multicast operation results in needing to send a message to
143  * the driver (to join/leave a L2 multicast address), we use ill_dlpi_queue()
144  * which serialized the DLPI requests. The IGMP/MLD code uses ill_mcast_queue()
145  * to send IGMP/MLD IP packet to avoid dropping the lock just to send a packet.
146  */
147 
148 #define	GETSTRUCT(structure, number)	\
149 	((structure *)mi_zalloc(sizeof (structure) * (number)))
150 
151 /*
152  * Caller must ensure that the ilg has not been condemned
153  * The condemned flag is only set in ilg_delete under conn_ilg_lock.
154  *
155  * The caller must hold conn_ilg_lock as writer.
156  */
157 static void
158 ilg_refhold(ilg_t *ilg)
159 {
160 	ASSERT(ilg->ilg_refcnt != 0);
161 	ASSERT(!ilg->ilg_condemned);
162 	ASSERT(RW_WRITE_HELD(&ilg->ilg_connp->conn_ilg_lock));
163 
164 	ilg->ilg_refcnt++;
165 }
166 
167 static void
168 ilg_inactive(ilg_t *ilg)
169 {
170 	ASSERT(ilg->ilg_ill == NULL);
171 	ASSERT(ilg->ilg_ilm == NULL);
172 	ASSERT(ilg->ilg_filter == NULL);
173 	ASSERT(ilg->ilg_condemned);
174 
175 	/* Unlink from list */
176 	*ilg->ilg_ptpn = ilg->ilg_next;
177 	if (ilg->ilg_next != NULL)
178 		ilg->ilg_next->ilg_ptpn = ilg->ilg_ptpn;
179 	ilg->ilg_next = NULL;
180 	ilg->ilg_ptpn = NULL;
181 
182 	ilg->ilg_connp = NULL;
183 	kmem_free(ilg, sizeof (*ilg));
184 }
185 
186 /*
187  * The caller must hold conn_ilg_lock as writer.
188  */
189 static void
190 ilg_refrele(ilg_t *ilg)
191 {
192 	ASSERT(RW_WRITE_HELD(&ilg->ilg_connp->conn_ilg_lock));
193 	ASSERT(ilg->ilg_refcnt != 0);
194 	if (--ilg->ilg_refcnt == 0)
195 		ilg_inactive(ilg);
196 }
197 
198 /*
199  * Acquire reference on ilg and drop reference on held_ilg.
200  * In the case when held_ilg is the same as ilg we already have
201  * a reference, but the held_ilg might be condemned. In that case
202  * we avoid the ilg_refhold/rele so that we can assert in ire_refhold
203  * that the ilg isn't condemned.
204  */
205 static void
206 ilg_transfer_hold(ilg_t *held_ilg, ilg_t *ilg)
207 {
208 	if (held_ilg == ilg)
209 		return;
210 
211 	ilg_refhold(ilg);
212 	if (held_ilg != NULL)
213 		ilg_refrele(held_ilg);
214 }
215 
216 /*
217  * Allocate a new ilg_t and links it into conn_ilg.
218  * Returns NULL on failure, in which case `*errp' will be
219  * filled in with the reason.
220  *
221  * Assumes connp->conn_ilg_lock is held.
222  */
223 static ilg_t *
224 conn_ilg_alloc(conn_t *connp, int *errp)
225 {
226 	ilg_t *ilg;
227 
228 	ASSERT(RW_WRITE_HELD(&connp->conn_ilg_lock));
229 
230 	/*
231 	 * If CONN_CLOSING is set, conn_ilg cleanup has begun and we must not
232 	 * create any ilgs.
233 	 */
234 	if (connp->conn_state_flags & CONN_CLOSING) {
235 		*errp = EINVAL;
236 		return (NULL);
237 	}
238 
239 	ilg = kmem_zalloc(sizeof (ilg_t), KM_NOSLEEP);
240 	if (ilg == NULL) {
241 		*errp = ENOMEM;
242 		return (NULL);
243 	}
244 
245 	ilg->ilg_refcnt = 1;
246 
247 	/* Insert at head */
248 	if (connp->conn_ilg != NULL)
249 		connp->conn_ilg->ilg_ptpn = &ilg->ilg_next;
250 	ilg->ilg_next = connp->conn_ilg;
251 	ilg->ilg_ptpn = &connp->conn_ilg;
252 	connp->conn_ilg = ilg;
253 
254 	ilg->ilg_connp = connp;
255 	return (ilg);
256 }
257 
258 typedef struct ilm_fbld_s {
259 	ilm_t		*fbld_ilm;
260 	int		fbld_in_cnt;
261 	int		fbld_ex_cnt;
262 	slist_t		fbld_in;
263 	slist_t		fbld_ex;
264 	boolean_t	fbld_in_overflow;
265 } ilm_fbld_t;
266 
267 /*
268  * Caller must hold ill_mcast_lock
269  */
270 static void
271 ilm_bld_flists(conn_t *connp, void *arg)
272 {
273 	ilg_t *ilg;
274 	ilm_fbld_t *fbld = (ilm_fbld_t *)(arg);
275 	ilm_t *ilm = fbld->fbld_ilm;
276 	in6_addr_t *v6group = &ilm->ilm_v6addr;
277 
278 	if (connp->conn_ilg == NULL)
279 		return;
280 
281 	/*
282 	 * Since we can't break out of the ipcl_walk once started, we still
283 	 * have to look at every conn.  But if we've already found one
284 	 * (EXCLUDE, NULL) list, there's no need to keep checking individual
285 	 * ilgs--that will be our state.
286 	 */
287 	if (fbld->fbld_ex_cnt > 0 && fbld->fbld_ex.sl_numsrc == 0)
288 		return;
289 
290 	/*
291 	 * Check this conn's ilgs to see if any are interested in our
292 	 * ilm (group, interface match).  If so, update the master
293 	 * include and exclude lists we're building in the fbld struct
294 	 * with this ilg's filter info.
295 	 *
296 	 * Note that the caller has already serialized on the ill we care
297 	 * about.
298 	 */
299 	ASSERT(MUTEX_HELD(&ilm->ilm_ill->ill_mcast_serializer));
300 
301 	rw_enter(&connp->conn_ilg_lock, RW_READER);
302 	for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) {
303 		if (ilg->ilg_condemned)
304 			continue;
305 
306 		/*
307 		 * Since we are under the ill_mcast_serializer we know
308 		 * that any ilg+ilm operations on this ilm have either
309 		 * not started or completed, except for the last ilg
310 		 * (the one that caused us to be called) which doesn't
311 		 * have ilg_ilm set yet. Hence we compare using ilg_ill
312 		 * and the address.
313 		 */
314 		if ((ilg->ilg_ill == ilm->ilm_ill) &&
315 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) {
316 			if (ilg->ilg_fmode == MODE_IS_INCLUDE) {
317 				fbld->fbld_in_cnt++;
318 				if (!fbld->fbld_in_overflow)
319 					l_union_in_a(&fbld->fbld_in,
320 					    ilg->ilg_filter,
321 					    &fbld->fbld_in_overflow);
322 			} else {
323 				fbld->fbld_ex_cnt++;
324 				/*
325 				 * On the first exclude list, don't try to do
326 				 * an intersection, as the master exclude list
327 				 * is intentionally empty.  If the master list
328 				 * is still empty on later iterations, that
329 				 * means we have at least one ilg with an empty
330 				 * exclude list, so that should be reflected
331 				 * when we take the intersection.
332 				 */
333 				if (fbld->fbld_ex_cnt == 1) {
334 					if (ilg->ilg_filter != NULL)
335 						l_copy(ilg->ilg_filter,
336 						    &fbld->fbld_ex);
337 				} else {
338 					l_intersection_in_a(&fbld->fbld_ex,
339 					    ilg->ilg_filter);
340 				}
341 			}
342 			/* there will only be one match, so break now. */
343 			break;
344 		}
345 	}
346 	rw_exit(&connp->conn_ilg_lock);
347 }
348 
349 /*
350  * Caller must hold ill_mcast_lock
351  */
352 static void
353 ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, slist_t *flist)
354 {
355 	ilm_fbld_t fbld;
356 	ip_stack_t *ipst = ilm->ilm_ipst;
357 
358 	fbld.fbld_ilm = ilm;
359 	fbld.fbld_in_cnt = fbld.fbld_ex_cnt = 0;
360 	fbld.fbld_in.sl_numsrc = fbld.fbld_ex.sl_numsrc = 0;
361 	fbld.fbld_in_overflow = B_FALSE;
362 
363 	/* first, construct our master include and exclude lists */
364 	ipcl_walk(ilm_bld_flists, (caddr_t)&fbld, ipst);
365 
366 	/* now use those master lists to generate the interface filter */
367 
368 	/* if include list overflowed, filter is (EXCLUDE, NULL) */
369 	if (fbld.fbld_in_overflow) {
370 		*fmode = MODE_IS_EXCLUDE;
371 		flist->sl_numsrc = 0;
372 		return;
373 	}
374 
375 	/* if nobody interested, interface filter is (INCLUDE, NULL) */
376 	if (fbld.fbld_in_cnt == 0 && fbld.fbld_ex_cnt == 0) {
377 		*fmode = MODE_IS_INCLUDE;
378 		flist->sl_numsrc = 0;
379 		return;
380 	}
381 
382 	/*
383 	 * If there are no exclude lists, then the interface filter
384 	 * is INCLUDE, with its filter list equal to fbld_in.  A single
385 	 * exclude list makes the interface filter EXCLUDE, with its
386 	 * filter list equal to (fbld_ex - fbld_in).
387 	 */
388 	if (fbld.fbld_ex_cnt == 0) {
389 		*fmode = MODE_IS_INCLUDE;
390 		l_copy(&fbld.fbld_in, flist);
391 	} else {
392 		*fmode = MODE_IS_EXCLUDE;
393 		l_difference(&fbld.fbld_ex, &fbld.fbld_in, flist);
394 	}
395 }
396 
397 /*
398  * Caller must hold ill_mcast_lock
399  */
400 static int
401 ilm_update_add(ilm_t *ilm, ilg_stat_t ilgstat, slist_t *ilg_flist)
402 {
403 	mcast_record_t fmode;
404 	slist_t *flist;
405 	boolean_t fdefault;
406 	char buf[INET6_ADDRSTRLEN];
407 	ill_t *ill = ilm->ilm_ill;
408 
409 	/*
410 	 * There are several cases where the ilm's filter state
411 	 * defaults to (EXCLUDE, NULL):
412 	 *	- we've had previous joins without associated ilgs
413 	 *	- this join has no associated ilg
414 	 *	- the ilg's filter state is (EXCLUDE, NULL)
415 	 */
416 	fdefault = (ilm->ilm_no_ilg_cnt > 0) ||
417 	    (ilgstat == ILGSTAT_NONE) || SLIST_IS_EMPTY(ilg_flist);
418 
419 	/* attempt mallocs (if needed) before doing anything else */
420 	if ((flist = l_alloc()) == NULL)
421 		return (ENOMEM);
422 	if (!fdefault && ilm->ilm_filter == NULL) {
423 		ilm->ilm_filter = l_alloc();
424 		if (ilm->ilm_filter == NULL) {
425 			l_free(flist);
426 			return (ENOMEM);
427 		}
428 	}
429 
430 	if (ilgstat != ILGSTAT_CHANGE)
431 		ilm->ilm_refcnt++;
432 
433 	if (ilgstat == ILGSTAT_NONE)
434 		ilm->ilm_no_ilg_cnt++;
435 
436 	/*
437 	 * Determine new filter state.  If it's not the default
438 	 * (EXCLUDE, NULL), we must walk the conn list to find
439 	 * any ilgs interested in this group, and re-build the
440 	 * ilm filter.
441 	 */
442 	if (fdefault) {
443 		fmode = MODE_IS_EXCLUDE;
444 		flist->sl_numsrc = 0;
445 	} else {
446 		ilm_gen_filter(ilm, &fmode, flist);
447 	}
448 
449 	/* make sure state actually changed; nothing to do if not. */
450 	if ((ilm->ilm_fmode == fmode) &&
451 	    !lists_are_different(ilm->ilm_filter, flist)) {
452 		l_free(flist);
453 		return (0);
454 	}
455 
456 	/* send the state change report */
457 	if (!IS_LOOPBACK(ill)) {
458 		if (ill->ill_isv6)
459 			mld_statechange(ilm, fmode, flist);
460 		else
461 			igmp_statechange(ilm, fmode, flist);
462 	}
463 
464 	/* update the ilm state */
465 	ilm->ilm_fmode = fmode;
466 	if (flist->sl_numsrc > 0)
467 		l_copy(flist, ilm->ilm_filter);
468 	else
469 		CLEAR_SLIST(ilm->ilm_filter);
470 
471 	ip1dbg(("ilm_update: new if filter mode %d, group %s\n", ilm->ilm_fmode,
472 	    inet_ntop(AF_INET6, &ilm->ilm_v6addr, buf, sizeof (buf))));
473 
474 	l_free(flist);
475 	return (0);
476 }
477 
478 /*
479  * Caller must hold ill_mcast_lock
480  */
481 static int
482 ilm_update_del(ilm_t *ilm)
483 {
484 	mcast_record_t fmode;
485 	slist_t *flist;
486 	ill_t *ill = ilm->ilm_ill;
487 
488 	ip1dbg(("ilm_update_del: still %d left; updating state\n",
489 	    ilm->ilm_refcnt));
490 
491 	if ((flist = l_alloc()) == NULL)
492 		return (ENOMEM);
493 
494 	/*
495 	 * If present, the ilg in question has already either been
496 	 * updated or removed from our list; so all we need to do
497 	 * now is walk the list to update the ilm filter state.
498 	 *
499 	 * Skip the list walk if we have any no-ilg joins, which
500 	 * cause the filter state to revert to (EXCLUDE, NULL).
501 	 */
502 	if (ilm->ilm_no_ilg_cnt != 0) {
503 		fmode = MODE_IS_EXCLUDE;
504 		flist->sl_numsrc = 0;
505 	} else {
506 		ilm_gen_filter(ilm, &fmode, flist);
507 	}
508 
509 	/* check to see if state needs to be updated */
510 	if ((ilm->ilm_fmode == fmode) &&
511 	    (!lists_are_different(ilm->ilm_filter, flist))) {
512 		l_free(flist);
513 		return (0);
514 	}
515 
516 	if (!IS_LOOPBACK(ill)) {
517 		if (ill->ill_isv6)
518 			mld_statechange(ilm, fmode, flist);
519 		else
520 			igmp_statechange(ilm, fmode, flist);
521 	}
522 
523 	ilm->ilm_fmode = fmode;
524 	if (flist->sl_numsrc > 0) {
525 		if (ilm->ilm_filter == NULL) {
526 			ilm->ilm_filter = l_alloc();
527 			if (ilm->ilm_filter == NULL) {
528 				char buf[INET6_ADDRSTRLEN];
529 				ip1dbg(("ilm_update_del: failed to alloc ilm "
530 				    "filter; no source filtering for %s on %s",
531 				    inet_ntop(AF_INET6, &ilm->ilm_v6addr,
532 				    buf, sizeof (buf)), ill->ill_name));
533 				ilm->ilm_fmode = MODE_IS_EXCLUDE;
534 				l_free(flist);
535 				return (0);
536 			}
537 		}
538 		l_copy(flist, ilm->ilm_filter);
539 	} else {
540 		CLEAR_SLIST(ilm->ilm_filter);
541 	}
542 
543 	l_free(flist);
544 	return (0);
545 }
546 
547 /*
548  * Create/update the ilm for the group/ill. Used by other parts of IP to
549  * do the ILGSTAT_NONE (no ilg), MODE_IS_EXCLUDE, with no slist join.
550  * Returns with a refhold on the ilm.
551  *
552  * The unspecified address means all multicast addresses for in both the
553  * case of IPv4 and IPv6.
554  *
555  * The caller should have already mapped an IPMP under ill to the upper.
556  */
557 ilm_t *
558 ip_addmulti(const in6_addr_t *v6group, ill_t *ill, zoneid_t zoneid,
559     int *errorp)
560 {
561 	ilm_t *ilm;
562 
563 	/* Acquire serializer to keep assert in ilm_bld_flists happy */
564 	mutex_enter(&ill->ill_mcast_serializer);
565 	ilm = ip_addmulti_serial(v6group, ill, zoneid, ILGSTAT_NONE,
566 	    MODE_IS_EXCLUDE, NULL, errorp);
567 	mutex_exit(&ill->ill_mcast_serializer);
568 	/*
569 	 * Now that all locks have been dropped, we can send any
570 	 * deferred/queued DLPI or IP packets
571 	 */
572 	ill_mcast_send_queued(ill);
573 	ill_dlpi_send_queued(ill);
574 	return (ilm);
575 }
576 
577 /*
578  * Create/update the ilm for the group/ill. If ILGSTAT_CHANGE is not set
579  * then this returns with a refhold on the ilm.
580  *
581  * Internal routine which assumes the caller has already acquired
582  * ill_mcast_serializer. It is the caller's responsibility to send out
583  * queued DLPI/multicast packets after all locks are dropped.
584  *
585  * The unspecified address means all multicast addresses for in both the
586  * case of IPv4 and IPv6.
587  *
588  * ilgstat tells us if there's an ilg associated with this join,
589  * and if so, if it's a new ilg or a change to an existing one.
590  * ilg_fmode and ilg_flist give us the current filter state of
591  * the ilg (and will be EXCLUDE {NULL} in the case of no ilg).
592  *
593  * The caller should have already mapped an IPMP under ill to the upper.
594  */
595 static ilm_t *
596 ip_addmulti_serial(const in6_addr_t *v6group, ill_t *ill, zoneid_t zoneid,
597     ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist,
598     int *errorp)
599 {
600 	ilm_t *ilm;
601 
602 	ASSERT(MUTEX_HELD(&ill->ill_mcast_serializer));
603 
604 	if (ill->ill_isv6) {
605 		if (!IN6_IS_ADDR_MULTICAST(v6group) &&
606 		    !IN6_IS_ADDR_UNSPECIFIED(v6group)) {
607 			*errorp = EINVAL;
608 			return (NULL);
609 		}
610 	} else {
611 		if (IN6_IS_ADDR_V4MAPPED(v6group)) {
612 			ipaddr_t v4group;
613 
614 			IN6_V4MAPPED_TO_IPADDR(v6group, v4group);
615 			ASSERT(!IS_UNDER_IPMP(ill));
616 			if (!CLASSD(v4group)) {
617 				*errorp = EINVAL;
618 				return (NULL);
619 			}
620 		} else if (!IN6_IS_ADDR_UNSPECIFIED(v6group)) {
621 			*errorp = EINVAL;
622 			return (NULL);
623 		}
624 	}
625 
626 	if (IS_UNDER_IPMP(ill)) {
627 		*errorp = EINVAL;
628 		return (NULL);
629 	}
630 
631 	rw_enter(&ill->ill_mcast_lock, RW_WRITER);
632 	/*
633 	 * We do the equivalent of a lookup by checking after we get the lock
634 	 * This is needed since the ill could have been condemned after
635 	 * we looked it up, and we need to check condemned after we hold
636 	 * ill_mcast_lock to synchronize with the unplumb code.
637 	 */
638 	if (ill->ill_state_flags & ILL_CONDEMNED) {
639 		rw_exit(&ill->ill_mcast_lock);
640 		*errorp = ENXIO;
641 		return (NULL);
642 	}
643 	ilm = ip_addmulti_impl(v6group, ill, zoneid, ilgstat, ilg_fmode,
644 	    ilg_flist, errorp);
645 	rw_exit(&ill->ill_mcast_lock);
646 
647 	ill_mcast_timer_start(ill->ill_ipst);
648 	return (ilm);
649 }
650 
651 static ilm_t *
652 ip_addmulti_impl(const in6_addr_t *v6group, ill_t *ill, zoneid_t zoneid,
653     ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist,
654     int *errorp)
655 {
656 	ilm_t	*ilm;
657 	int	ret = 0;
658 
659 	ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
660 	*errorp = 0;
661 
662 	/*
663 	 * An ilm is uniquely identified by the tuple of (group, ill) where
664 	 * `group' is the multicast group address, and `ill' is the interface
665 	 * on which it is currently joined.
666 	 */
667 
668 	ilm = ilm_lookup(ill, v6group, zoneid);
669 	if (ilm != NULL) {
670 		/* ilm_update_add bumps ilm_refcnt unless ILGSTAT_CHANGE */
671 		ret = ilm_update_add(ilm, ilgstat, ilg_flist);
672 		if (ret == 0)
673 			return (ilm);
674 
675 		*errorp = ret;
676 		return (NULL);
677 	}
678 
679 	/*
680 	 * The callers checks on the ilg and the ilg+ilm consistency under
681 	 * ill_mcast_serializer ensures that we can not have ILGSTAT_CHANGE
682 	 * and no ilm.
683 	 */
684 	ASSERT(ilgstat != ILGSTAT_CHANGE);
685 	ilm = ilm_add(ill, v6group, ilgstat, ilg_fmode, ilg_flist, zoneid);
686 	if (ilm == NULL) {
687 		*errorp = ENOMEM;
688 		return (NULL);
689 	}
690 
691 	if (IN6_IS_ADDR_UNSPECIFIED(v6group)) {
692 		/*
693 		 * If we have more then one we should not tell the driver
694 		 * to join this time.
695 		 */
696 		if (ilm_numentries(ill, v6group) == 1) {
697 			ret = ill_join_allmulti(ill);
698 		}
699 	} else {
700 		if (!IS_LOOPBACK(ill)) {
701 			if (ill->ill_isv6)
702 				mld_joingroup(ilm);
703 			else
704 				igmp_joingroup(ilm);
705 		}
706 
707 		/*
708 		 * If we have more then one we should not tell the driver
709 		 * to join this time.
710 		 */
711 		if (ilm_numentries(ill, v6group) == 1) {
712 			ret = ip_ll_multireq(ill, v6group, DL_ENABMULTI_REQ);
713 		}
714 	}
715 	if (ret != 0) {
716 		if (ret == ENETDOWN) {
717 			char buf[INET6_ADDRSTRLEN];
718 
719 			ip0dbg(("ip_addmulti: ENETDOWN for %s on %s",
720 			    inet_ntop(AF_INET6, &ilm->ilm_v6addr,
721 			    buf, sizeof (buf)), ill->ill_name));
722 		}
723 		ilm_delete(ilm);
724 		*errorp = ret;
725 		return (NULL);
726 	} else {
727 		return (ilm);
728 	}
729 }
730 
731 /*
732  * Send a multicast request to the driver for enabling or disabling
733  * multicast reception for v6groupp address. The caller has already
734  * checked whether it is appropriate to send one or not.
735  *
736  * For IPMP we switch to the cast_ill since it has the right hardware
737  * information.
738  */
739 static int
740 ip_ll_send_multireq(ill_t *ill, const in6_addr_t *v6groupp, t_uscalar_t prim)
741 {
742 	mblk_t	*mp;
743 	uint32_t addrlen, addroff;
744 	ill_t *release_ill = NULL;
745 	int err = 0;
746 
747 	ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock));
748 
749 	if (IS_IPMP(ill)) {
750 		/* On the upper IPMP ill. */
751 		release_ill = ipmp_illgrp_hold_cast_ill(ill->ill_grp);
752 		if (release_ill == NULL) {
753 			/*
754 			 * Avoid sending it down to the ipmpstub.
755 			 * We will be called again once the members of the
756 			 * group are in place
757 			 */
758 			ip1dbg(("ip_ll_send_multireq: no cast_ill for %s %d\n",
759 			    ill->ill_name, ill->ill_isv6));
760 			return (0);
761 		}
762 		ill = release_ill;
763 	}
764 	/* Create a DL_ENABMULTI_REQ or DL_DISABMULTI_REQ message. */
765 	mp = ill_create_dl(ill, prim, &addrlen, &addroff);
766 	if (mp == NULL) {
767 		err = ENOMEM;
768 		goto done;
769 	}
770 
771 	mp = ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp);
772 	if (mp == NULL) {
773 		ip0dbg(("null from ndp_mcastreq(ill %s)\n", ill->ill_name));
774 		err = ENOMEM;
775 		goto done;
776 	}
777 
778 	switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) {
779 	case DL_ENABMULTI_REQ:
780 		mutex_enter(&ill->ill_lock);
781 		/* Track the state if this is the first enabmulti */
782 		if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN)
783 			ill->ill_dlpi_multicast_state = IDS_INPROGRESS;
784 		mutex_exit(&ill->ill_lock);
785 		break;
786 	}
787 	ill_dlpi_queue(ill, mp);
788 done:
789 	if (release_ill != NULL)
790 		ill_refrele(release_ill);
791 	return (err);
792 }
793 
794 /*
795  * Send a multicast request to the driver for enabling multicast
796  * membership for v6group if appropriate.
797  */
798 static int
799 ip_ll_multireq(ill_t *ill, const in6_addr_t *v6groupp, t_uscalar_t prim)
800 {
801 	if (ill->ill_net_type != IRE_IF_RESOLVER ||
802 	    ill->ill_ipif->ipif_flags & IPIF_POINTOPOINT) {
803 		ip1dbg(("ip_ll_multireq: not resolver\n"));
804 		return (0);	/* Must be IRE_IF_NORESOLVER */
805 	}
806 
807 	if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) {
808 		ip1dbg(("ip_ll_multireq: MULTI_BCAST\n"));
809 		return (0);
810 	}
811 	return (ip_ll_send_multireq(ill, v6groupp, prim));
812 }
813 
814 /*
815  * Delete the ilm. Used by other parts of IP for the case of no_ilg/leaving
816  * being true.
817  */
818 int
819 ip_delmulti(ilm_t *ilm)
820 {
821 	ill_t *ill = ilm->ilm_ill;
822 	int error;
823 
824 	/* Acquire serializer to keep assert in ilm_bld_flists happy */
825 	mutex_enter(&ill->ill_mcast_serializer);
826 	error = ip_delmulti_serial(ilm, B_TRUE, B_TRUE);
827 	mutex_exit(&ill->ill_mcast_serializer);
828 	/*
829 	 * Now that all locks have been dropped, we can send any
830 	 * deferred/queued DLPI or IP packets
831 	 */
832 	ill_mcast_send_queued(ill);
833 	ill_dlpi_send_queued(ill);
834 	return (error);
835 }
836 
837 
838 /*
839  * Delete the ilm.
840  * Assumes ill_mcast_serializer is held by the caller.
841  * Caller must send out queued dlpi/multicast packets after dropping
842  * all locks.
843  */
844 static int
845 ip_delmulti_serial(ilm_t *ilm, boolean_t no_ilg, boolean_t leaving)
846 {
847 	ill_t *ill = ilm->ilm_ill;
848 	int ret;
849 
850 	ASSERT(MUTEX_HELD(&ill->ill_mcast_serializer));
851 	ASSERT(!(IS_UNDER_IPMP(ill)));
852 
853 	rw_enter(&ill->ill_mcast_lock, RW_WRITER);
854 	ret = ip_delmulti_impl(ilm, no_ilg, leaving);
855 	rw_exit(&ill->ill_mcast_lock);
856 	ill_mcast_timer_start(ill->ill_ipst);
857 	return (ret);
858 }
859 
860 static int
861 ip_delmulti_impl(ilm_t *ilm, boolean_t no_ilg, boolean_t leaving)
862 {
863 	ill_t *ill = ilm->ilm_ill;
864 	int error;
865 	in6_addr_t v6group;
866 
867 	ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
868 
869 	/* Update counters */
870 	if (no_ilg)
871 		ilm->ilm_no_ilg_cnt--;
872 
873 	if (leaving)
874 		ilm->ilm_refcnt--;
875 
876 	if (ilm->ilm_refcnt > 0)
877 		return (ilm_update_del(ilm));
878 
879 	v6group = ilm->ilm_v6addr;
880 
881 	if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
882 		ilm_delete(ilm);
883 		/*
884 		 * If we have some left then one we should not tell the driver
885 		 * to leave.
886 		 */
887 		if (ilm_numentries(ill, &v6group) != 0)
888 			return (0);
889 
890 		ill_leave_allmulti(ill);
891 
892 		return (0);
893 	}
894 
895 	if (!IS_LOOPBACK(ill)) {
896 		if (ill->ill_isv6)
897 			mld_leavegroup(ilm);
898 		else
899 			igmp_leavegroup(ilm);
900 	}
901 
902 	ilm_delete(ilm);
903 	/*
904 	 * If we have some left then one we should not tell the driver
905 	 * to leave.
906 	 */
907 	if (ilm_numentries(ill, &v6group) != 0)
908 		return (0);
909 
910 	error = ip_ll_multireq(ill, &v6group, DL_DISABMULTI_REQ);
911 	/* We ignore the case when ill_dl_up is not set */
912 	if (error == ENETDOWN) {
913 		char buf[INET6_ADDRSTRLEN];
914 
915 		ip0dbg(("ip_delmulti: ENETDOWN for %s on %s",
916 		    inet_ntop(AF_INET6, &v6group, buf, sizeof (buf)),
917 		    ill->ill_name));
918 	}
919 	return (error);
920 }
921 
922 /*
923  * Make the driver pass up all multicast packets.
924  */
925 int
926 ill_join_allmulti(ill_t *ill)
927 {
928 	mblk_t		*promiscon_mp, *promiscoff_mp = NULL;
929 	uint32_t	addrlen, addroff;
930 	ill_t		*release_ill = NULL;
931 
932 	ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
933 
934 	if (IS_LOOPBACK(ill))
935 		return (0);
936 
937 	if (!ill->ill_dl_up) {
938 		/*
939 		 * Nobody there. All multicast addresses will be re-joined
940 		 * when we get the DL_BIND_ACK bringing the interface up.
941 		 */
942 		return (ENETDOWN);
943 	}
944 
945 	if (IS_IPMP(ill)) {
946 		/* On the upper IPMP ill. */
947 		release_ill = ipmp_illgrp_hold_cast_ill(ill->ill_grp);
948 		if (release_ill == NULL) {
949 			/*
950 			 * Avoid sending it down to the ipmpstub.
951 			 * We will be called again once the members of the
952 			 * group are in place
953 			 */
954 			ip1dbg(("ill_join_allmulti: no cast_ill for %s %d\n",
955 			    ill->ill_name, ill->ill_isv6));
956 			return (0);
957 		}
958 		ill = release_ill;
959 		if (!ill->ill_dl_up) {
960 			ill_refrele(ill);
961 			return (ENETDOWN);
962 		}
963 	}
964 
965 	/*
966 	 * Create a DL_PROMISCON_REQ message and send it directly to the DLPI
967 	 * provider.  We don't need to do this for certain media types for
968 	 * which we never need to turn promiscuous mode on.  While we're here,
969 	 * pre-allocate a DL_PROMISCOFF_REQ message to make sure that
970 	 * ill_leave_allmulti() will not fail due to low memory conditions.
971 	 */
972 	if ((ill->ill_net_type == IRE_IF_RESOLVER) &&
973 	    !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) {
974 		promiscon_mp = ill_create_dl(ill, DL_PROMISCON_REQ,
975 		    &addrlen, &addroff);
976 		if (ill->ill_promiscoff_mp == NULL)
977 			promiscoff_mp = ill_create_dl(ill, DL_PROMISCOFF_REQ,
978 			    &addrlen, &addroff);
979 		if (promiscon_mp == NULL ||
980 		    (ill->ill_promiscoff_mp == NULL && promiscoff_mp == NULL)) {
981 			freemsg(promiscon_mp);
982 			freemsg(promiscoff_mp);
983 			if (release_ill != NULL)
984 				ill_refrele(release_ill);
985 			return (ENOMEM);
986 		}
987 		if (ill->ill_promiscoff_mp == NULL)
988 			ill->ill_promiscoff_mp = promiscoff_mp;
989 		ill_dlpi_queue(ill, promiscon_mp);
990 	}
991 	if (release_ill != NULL)
992 		ill_refrele(release_ill);
993 	return (0);
994 }
995 
996 /*
997  * Make the driver stop passing up all multicast packets
998  */
999 void
1000 ill_leave_allmulti(ill_t *ill)
1001 {
1002 	mblk_t	*promiscoff_mp;
1003 	ill_t	*release_ill = NULL;
1004 
1005 	ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
1006 
1007 	if (IS_LOOPBACK(ill))
1008 		return;
1009 
1010 	if (!ill->ill_dl_up) {
1011 		/*
1012 		 * Nobody there. All multicast addresses will be re-joined
1013 		 * when we get the DL_BIND_ACK bringing the interface up.
1014 		 */
1015 		return;
1016 	}
1017 
1018 	if (IS_IPMP(ill)) {
1019 		/* On the upper IPMP ill. */
1020 		release_ill = ipmp_illgrp_hold_cast_ill(ill->ill_grp);
1021 		if (release_ill == NULL) {
1022 			/*
1023 			 * Avoid sending it down to the ipmpstub.
1024 			 * We will be called again once the members of the
1025 			 * group are in place
1026 			 */
1027 			ip1dbg(("ill_leave_allmulti: no cast_ill on %s %d\n",
1028 			    ill->ill_name, ill->ill_isv6));
1029 			return;
1030 		}
1031 		ill = release_ill;
1032 		if (!ill->ill_dl_up)
1033 			goto done;
1034 	}
1035 
1036 	/*
1037 	 * In the case of IPMP and ill_dl_up not being set when we joined
1038 	 * we didn't allocate a promiscoff_mp. In that case we have
1039 	 * nothing to do when we leave.
1040 	 * Ditto for PHYI_MULTI_BCAST
1041 	 */
1042 	promiscoff_mp = ill->ill_promiscoff_mp;
1043 	if (promiscoff_mp != NULL) {
1044 		ill->ill_promiscoff_mp = NULL;
1045 		ill_dlpi_queue(ill, promiscoff_mp);
1046 	}
1047 done:
1048 	if (release_ill != NULL)
1049 		ill_refrele(release_ill);
1050 }
1051 
1052 int
1053 ip_join_allmulti(uint_t ifindex, boolean_t isv6, ip_stack_t *ipst)
1054 {
1055 	ill_t		*ill;
1056 	int		ret;
1057 	ilm_t		*ilm;
1058 
1059 	ill = ill_lookup_on_ifindex(ifindex, isv6, ipst);
1060 	if (ill == NULL)
1061 		return (ENODEV);
1062 
1063 	/*
1064 	 * The ip_addmulti() function doesn't allow IPMP underlying interfaces
1065 	 * to join allmulti since only the nominated underlying interface in
1066 	 * the group should receive multicast.  We silently succeed to avoid
1067 	 * having to teach IPobs (currently the only caller of this routine)
1068 	 * to ignore failures in this case.
1069 	 */
1070 	if (IS_UNDER_IPMP(ill)) {
1071 		ill_refrele(ill);
1072 		return (0);
1073 	}
1074 	mutex_enter(&ill->ill_lock);
1075 	if (ill->ill_ipallmulti_cnt > 0) {
1076 		/* Already joined */
1077 		ASSERT(ill->ill_ipallmulti_ilm != NULL);
1078 		ill->ill_ipallmulti_cnt++;
1079 		mutex_exit(&ill->ill_lock);
1080 		goto done;
1081 	}
1082 	mutex_exit(&ill->ill_lock);
1083 
1084 	ilm = ip_addmulti(&ipv6_all_zeros, ill, ill->ill_zoneid, &ret);
1085 	if (ilm == NULL) {
1086 		ASSERT(ret != 0);
1087 		ill_refrele(ill);
1088 		return (ret);
1089 	}
1090 
1091 	mutex_enter(&ill->ill_lock);
1092 	if (ill->ill_ipallmulti_cnt > 0) {
1093 		/* Another thread added it concurrently */
1094 		(void) ip_delmulti(ilm);
1095 		mutex_exit(&ill->ill_lock);
1096 		goto done;
1097 	}
1098 	ASSERT(ill->ill_ipallmulti_ilm == NULL);
1099 	ill->ill_ipallmulti_ilm = ilm;
1100 	ill->ill_ipallmulti_cnt++;
1101 	mutex_exit(&ill->ill_lock);
1102 done:
1103 	ill_refrele(ill);
1104 	return (0);
1105 }
1106 
1107 int
1108 ip_leave_allmulti(uint_t ifindex, boolean_t isv6, ip_stack_t *ipst)
1109 {
1110 	ill_t		*ill;
1111 	ilm_t		*ilm;
1112 
1113 	ill = ill_lookup_on_ifindex(ifindex, isv6, ipst);
1114 	if (ill == NULL)
1115 		return (ENODEV);
1116 
1117 	if (IS_UNDER_IPMP(ill)) {
1118 		ill_refrele(ill);
1119 		return (0);
1120 	}
1121 
1122 	mutex_enter(&ill->ill_lock);
1123 	if (ill->ill_ipallmulti_cnt == 0) {
1124 		/* ip_purge_allmulti could have removed them all */
1125 		mutex_exit(&ill->ill_lock);
1126 		goto done;
1127 	}
1128 	ill->ill_ipallmulti_cnt--;
1129 	if (ill->ill_ipallmulti_cnt == 0) {
1130 		/* Last one */
1131 		ilm = ill->ill_ipallmulti_ilm;
1132 		ill->ill_ipallmulti_ilm = NULL;
1133 	} else {
1134 		ilm = NULL;
1135 	}
1136 	mutex_exit(&ill->ill_lock);
1137 	if (ilm != NULL)
1138 		(void) ip_delmulti(ilm);
1139 
1140 done:
1141 	ill_refrele(ill);
1142 	return (0);
1143 }
1144 
1145 /*
1146  * Delete the allmulti memberships that were added as part of
1147  * ip_join_allmulti().
1148  */
1149 void
1150 ip_purge_allmulti(ill_t *ill)
1151 {
1152 	ilm_t	*ilm;
1153 
1154 	ASSERT(IAM_WRITER_ILL(ill));
1155 
1156 	mutex_enter(&ill->ill_lock);
1157 	ilm = ill->ill_ipallmulti_ilm;
1158 	ill->ill_ipallmulti_ilm = NULL;
1159 	ill->ill_ipallmulti_cnt = 0;
1160 	mutex_exit(&ill->ill_lock);
1161 
1162 	if (ilm != NULL)
1163 		(void) ip_delmulti(ilm);
1164 }
1165 
1166 /*
1167  * Create a dlpi message with room for phys+sap. Later
1168  * we will strip the sap for those primitives which
1169  * only need a physical address.
1170  */
1171 static mblk_t *
1172 ill_create_dl(ill_t *ill, uint32_t dl_primitive,
1173     uint32_t *addr_lenp, uint32_t *addr_offp)
1174 {
1175 	mblk_t	*mp;
1176 	uint32_t	hw_addr_length;
1177 	char		*cp;
1178 	uint32_t	offset;
1179 	uint32_t	length;
1180 	uint32_t 	size;
1181 
1182 	*addr_lenp = *addr_offp = 0;
1183 
1184 	hw_addr_length = ill->ill_phys_addr_length;
1185 	if (!hw_addr_length) {
1186 		ip0dbg(("ip_create_dl: hw addr length = 0\n"));
1187 		return (NULL);
1188 	}
1189 
1190 	switch (dl_primitive) {
1191 	case DL_ENABMULTI_REQ:
1192 		length = sizeof (dl_enabmulti_req_t);
1193 		size = length + hw_addr_length;
1194 		break;
1195 	case DL_DISABMULTI_REQ:
1196 		length = sizeof (dl_disabmulti_req_t);
1197 		size = length + hw_addr_length;
1198 		break;
1199 	case DL_PROMISCON_REQ:
1200 	case DL_PROMISCOFF_REQ:
1201 		size = length = sizeof (dl_promiscon_req_t);
1202 		break;
1203 	default:
1204 		return (NULL);
1205 	}
1206 	mp = allocb(size, BPRI_HI);
1207 	if (!mp)
1208 		return (NULL);
1209 	mp->b_wptr += size;
1210 	mp->b_datap->db_type = M_PROTO;
1211 
1212 	cp = (char *)mp->b_rptr;
1213 	offset = length;
1214 
1215 	switch (dl_primitive) {
1216 	case DL_ENABMULTI_REQ: {
1217 		dl_enabmulti_req_t *dl = (dl_enabmulti_req_t *)cp;
1218 
1219 		dl->dl_primitive = dl_primitive;
1220 		dl->dl_addr_offset = offset;
1221 		*addr_lenp = dl->dl_addr_length = hw_addr_length;
1222 		*addr_offp = offset;
1223 		break;
1224 	}
1225 	case DL_DISABMULTI_REQ: {
1226 		dl_disabmulti_req_t *dl = (dl_disabmulti_req_t *)cp;
1227 
1228 		dl->dl_primitive = dl_primitive;
1229 		dl->dl_addr_offset = offset;
1230 		*addr_lenp = dl->dl_addr_length = hw_addr_length;
1231 		*addr_offp = offset;
1232 		break;
1233 	}
1234 	case DL_PROMISCON_REQ:
1235 	case DL_PROMISCOFF_REQ: {
1236 		dl_promiscon_req_t *dl = (dl_promiscon_req_t *)cp;
1237 
1238 		dl->dl_primitive = dl_primitive;
1239 		dl->dl_level = DL_PROMISC_MULTI;
1240 		break;
1241 	}
1242 	}
1243 	ip1dbg(("ill_create_dl: addr_len %d, addr_off %d\n",
1244 	    *addr_lenp, *addr_offp));
1245 	return (mp);
1246 }
1247 
1248 /*
1249  * Rejoin any groups for which we have ilms.
1250  *
1251  * This is only needed for IPMP when the cast_ill changes since that
1252  * change is invisible to the ilm. Other interface changes are handled
1253  * by conn_update_ill.
1254  */
1255 void
1256 ill_recover_multicast(ill_t *ill)
1257 {
1258 	ilm_t	*ilm;
1259 	char    addrbuf[INET6_ADDRSTRLEN];
1260 
1261 	ill->ill_need_recover_multicast = 0;
1262 
1263 	rw_enter(&ill->ill_mcast_lock, RW_WRITER);
1264 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1265 		/*
1266 		 * If we have more then one ilm for the group (e.g., with
1267 		 * different zoneid) then we should not tell the driver
1268 		 * to join unless this is the first ilm for the group.
1269 		 */
1270 		if (ilm_numentries(ill, &ilm->ilm_v6addr) > 1 &&
1271 		    ilm_lookup(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm) {
1272 			continue;
1273 		}
1274 
1275 		ip1dbg(("ill_recover_multicast: %s\n", inet_ntop(AF_INET6,
1276 		    &ilm->ilm_v6addr, addrbuf, sizeof (addrbuf))));
1277 
1278 		if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
1279 			(void) ill_join_allmulti(ill);
1280 		} else {
1281 			if (ill->ill_isv6)
1282 				mld_joingroup(ilm);
1283 			else
1284 				igmp_joingroup(ilm);
1285 
1286 			(void) ip_ll_multireq(ill, &ilm->ilm_v6addr,
1287 			    DL_ENABMULTI_REQ);
1288 		}
1289 	}
1290 	rw_exit(&ill->ill_mcast_lock);
1291 	/* Send any deferred/queued DLPI or IP packets */
1292 	ill_mcast_send_queued(ill);
1293 	ill_dlpi_send_queued(ill);
1294 	ill_mcast_timer_start(ill->ill_ipst);
1295 }
1296 
1297 /*
1298  * The opposite of ill_recover_multicast() -- leaves all multicast groups
1299  * that were explicitly joined.
1300  *
1301  * This is only needed for IPMP when the cast_ill changes since that
1302  * change is invisible to the ilm. Other interface changes are handled
1303  * by conn_update_ill.
1304  */
1305 void
1306 ill_leave_multicast(ill_t *ill)
1307 {
1308 	ilm_t	*ilm;
1309 	char    addrbuf[INET6_ADDRSTRLEN];
1310 
1311 	ill->ill_need_recover_multicast = 1;
1312 
1313 	rw_enter(&ill->ill_mcast_lock, RW_WRITER);
1314 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1315 		/*
1316 		 * If we have more then one ilm for the group (e.g., with
1317 		 * different zoneid) then we should not tell the driver
1318 		 * to leave unless this is the first ilm for the group.
1319 		 */
1320 		if (ilm_numentries(ill, &ilm->ilm_v6addr) > 1 &&
1321 		    ilm_lookup(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm) {
1322 			continue;
1323 		}
1324 
1325 		ip1dbg(("ill_leave_multicast: %s\n", inet_ntop(AF_INET6,
1326 		    &ilm->ilm_v6addr, addrbuf, sizeof (addrbuf))));
1327 
1328 		if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
1329 			ill_leave_allmulti(ill);
1330 		} else {
1331 			if (ill->ill_isv6)
1332 				mld_leavegroup(ilm);
1333 			else
1334 				igmp_leavegroup(ilm);
1335 
1336 			(void) ip_ll_multireq(ill, &ilm->ilm_v6addr,
1337 			    DL_DISABMULTI_REQ);
1338 		}
1339 	}
1340 	rw_exit(&ill->ill_mcast_lock);
1341 	/* Send any deferred/queued DLPI or IP packets */
1342 	ill_mcast_send_queued(ill);
1343 	ill_dlpi_send_queued(ill);
1344 	ill_mcast_timer_start(ill->ill_ipst);
1345 }
1346 
1347 /*
1348  * Interface used by IP input/output.
1349  * Returns true if there is a member on the ill for any zoneid.
1350  */
1351 boolean_t
1352 ill_hasmembers_v6(ill_t *ill, const in6_addr_t *v6group)
1353 {
1354 	ilm_t		*ilm;
1355 
1356 	rw_enter(&ill->ill_mcast_lock, RW_READER);
1357 	ilm = ilm_lookup(ill, v6group, ALL_ZONES);
1358 	rw_exit(&ill->ill_mcast_lock);
1359 	return (ilm != NULL);
1360 }
1361 
1362 /*
1363  * Interface used by IP input/output.
1364  * Returns true if there is a member on the ill for any zoneid.
1365  *
1366  * The group and source can't be INADDR_ANY here so no need to translate to
1367  * the unspecified IPv6 address.
1368  */
1369 boolean_t
1370 ill_hasmembers_v4(ill_t *ill, ipaddr_t group)
1371 {
1372 	in6_addr_t	v6group;
1373 
1374 	IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1375 	return (ill_hasmembers_v6(ill, &v6group));
1376 }
1377 
1378 /*
1379  * Interface used by IP input/output.
1380  * Returns true if there is a member on the ill for any zoneid except skipzone.
1381  */
1382 boolean_t
1383 ill_hasmembers_otherzones_v6(ill_t *ill, const in6_addr_t *v6group,
1384     zoneid_t skipzone)
1385 {
1386 	ilm_t		*ilm;
1387 
1388 	rw_enter(&ill->ill_mcast_lock, RW_READER);
1389 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1390 		if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) &&
1391 		    ilm->ilm_zoneid != skipzone) {
1392 			rw_exit(&ill->ill_mcast_lock);
1393 			return (B_TRUE);
1394 		}
1395 	}
1396 	rw_exit(&ill->ill_mcast_lock);
1397 	return (B_FALSE);
1398 }
1399 
1400 /*
1401  * Interface used by IP input/output.
1402  * Returns true if there is a member on the ill for any zoneid except skipzone.
1403  *
1404  * The group and source can't be INADDR_ANY here so no need to translate to
1405  * the unspecified IPv6 address.
1406  */
1407 boolean_t
1408 ill_hasmembers_otherzones_v4(ill_t *ill, ipaddr_t group, zoneid_t skipzone)
1409 {
1410 	in6_addr_t	v6group;
1411 
1412 	IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1413 	return (ill_hasmembers_otherzones_v6(ill, &v6group, skipzone));
1414 }
1415 
1416 /*
1417  * Interface used by IP input.
1418  * Returns the next numerically larger zoneid that has a member. If none exist
1419  * then returns -1 (ALL_ZONES).
1420  * The normal usage is for the caller to start with a -1 zoneid (ALL_ZONES)
1421  * to find the first zoneid which has a member, and then pass that in for
1422  * subsequent calls until ALL_ZONES is returned.
1423  *
1424  * The implementation of ill_hasmembers_nextzone() assumes the ilms
1425  * are sorted by zoneid for efficiency.
1426  */
1427 zoneid_t
1428 ill_hasmembers_nextzone_v6(ill_t *ill, const in6_addr_t *v6group,
1429     zoneid_t zoneid)
1430 {
1431 	ilm_t		*ilm;
1432 
1433 	rw_enter(&ill->ill_mcast_lock, RW_READER);
1434 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1435 		if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) &&
1436 		    ilm->ilm_zoneid > zoneid) {
1437 			zoneid = ilm->ilm_zoneid;
1438 			rw_exit(&ill->ill_mcast_lock);
1439 			return (zoneid);
1440 		}
1441 	}
1442 	rw_exit(&ill->ill_mcast_lock);
1443 	return (ALL_ZONES);
1444 }
1445 
1446 /*
1447  * Interface used by IP input.
1448  * Returns the next numerically larger zoneid that has a member. If none exist
1449  * then returns -1 (ALL_ZONES).
1450  *
1451  * The group and source can't be INADDR_ANY here so no need to translate to
1452  * the unspecified IPv6 address.
1453  */
1454 zoneid_t
1455 ill_hasmembers_nextzone_v4(ill_t *ill, ipaddr_t group, zoneid_t zoneid)
1456 {
1457 	in6_addr_t	v6group;
1458 
1459 	IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1460 
1461 	return (ill_hasmembers_nextzone_v6(ill, &v6group, zoneid));
1462 }
1463 
1464 /*
1465  * Find an ilm matching the ill, group, and zoneid.
1466  */
1467 static ilm_t *
1468 ilm_lookup(ill_t *ill, const in6_addr_t *v6group, zoneid_t zoneid)
1469 {
1470 	ilm_t	*ilm;
1471 
1472 	ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock));
1473 
1474 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1475 		if (!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group))
1476 			continue;
1477 		if (zoneid != ALL_ZONES && zoneid != ilm->ilm_zoneid)
1478 			continue;
1479 
1480 		ASSERT(ilm->ilm_ill == ill);
1481 		return (ilm);
1482 	}
1483 	return (NULL);
1484 }
1485 
1486 /*
1487  * How many members on this ill?
1488  * Since each shared-IP zone has a separate ilm for the same group/ill
1489  * we can have several.
1490  */
1491 static int
1492 ilm_numentries(ill_t *ill, const in6_addr_t *v6group)
1493 {
1494 	ilm_t	*ilm;
1495 	int i = 0;
1496 
1497 	ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock));
1498 	for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1499 		if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group)) {
1500 			i++;
1501 		}
1502 	}
1503 	return (i);
1504 }
1505 
1506 /* Caller guarantees that the group is not already on the list */
1507 static ilm_t *
1508 ilm_add(ill_t *ill, const in6_addr_t *v6group, ilg_stat_t ilgstat,
1509     mcast_record_t ilg_fmode, slist_t *ilg_flist, zoneid_t zoneid)
1510 {
1511 	ilm_t	*ilm;
1512 	ilm_t	*ilm_cur;
1513 	ilm_t	**ilm_ptpn;
1514 
1515 	ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
1516 	ilm = GETSTRUCT(ilm_t, 1);
1517 	if (ilm == NULL)
1518 		return (NULL);
1519 	if (ilgstat != ILGSTAT_NONE && !SLIST_IS_EMPTY(ilg_flist)) {
1520 		ilm->ilm_filter = l_alloc();
1521 		if (ilm->ilm_filter == NULL) {
1522 			mi_free(ilm);
1523 			return (NULL);
1524 		}
1525 	}
1526 	ilm->ilm_v6addr = *v6group;
1527 	ilm->ilm_refcnt = 1;
1528 	ilm->ilm_zoneid = zoneid;
1529 	ilm->ilm_timer = INFINITY;
1530 	ilm->ilm_rtx.rtx_timer = INFINITY;
1531 
1532 	ilm->ilm_ill = ill;
1533 	DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ill,
1534 	    (char *), "ilm", (void *), ilm);
1535 	ill->ill_ilm_cnt++;
1536 
1537 	ASSERT(ill->ill_ipst);
1538 	ilm->ilm_ipst = ill->ill_ipst;	/* No netstack_hold */
1539 
1540 	/* The ill/ipif could have just been marked as condemned */
1541 
1542 	/*
1543 	 * To make ill_hasmembers_nextzone_v6 work we keep the list
1544 	 * sorted by zoneid.
1545 	 */
1546 	ilm_cur = ill->ill_ilm;
1547 	ilm_ptpn = &ill->ill_ilm;
1548 	while (ilm_cur != NULL && ilm_cur->ilm_zoneid < ilm->ilm_zoneid) {
1549 		ilm_ptpn = &ilm_cur->ilm_next;
1550 		ilm_cur = ilm_cur->ilm_next;
1551 	}
1552 	ilm->ilm_next = ilm_cur;
1553 	*ilm_ptpn = ilm;
1554 
1555 	/*
1556 	 * If we have an associated ilg, use its filter state; if not,
1557 	 * default to (EXCLUDE, NULL) and set no_ilg_cnt to track this.
1558 	 */
1559 	if (ilgstat != ILGSTAT_NONE) {
1560 		if (!SLIST_IS_EMPTY(ilg_flist))
1561 			l_copy(ilg_flist, ilm->ilm_filter);
1562 		ilm->ilm_fmode = ilg_fmode;
1563 	} else {
1564 		ilm->ilm_no_ilg_cnt = 1;
1565 		ilm->ilm_fmode = MODE_IS_EXCLUDE;
1566 	}
1567 
1568 	return (ilm);
1569 }
1570 
1571 void
1572 ilm_inactive(ilm_t *ilm)
1573 {
1574 	FREE_SLIST(ilm->ilm_filter);
1575 	FREE_SLIST(ilm->ilm_pendsrcs);
1576 	FREE_SLIST(ilm->ilm_rtx.rtx_allow);
1577 	FREE_SLIST(ilm->ilm_rtx.rtx_block);
1578 	ilm->ilm_ipst = NULL;
1579 	mi_free((char *)ilm);
1580 }
1581 
1582 /*
1583  * Unlink ilm and free it.
1584  */
1585 static void
1586 ilm_delete(ilm_t *ilm)
1587 {
1588 	ill_t		*ill = ilm->ilm_ill;
1589 	ilm_t		**ilmp;
1590 	boolean_t	need_wakeup;
1591 
1592 	/*
1593 	 * Delete under lock protection so that readers don't stumble
1594 	 * on bad ilm_next
1595 	 */
1596 	ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
1597 
1598 	for (ilmp = &ill->ill_ilm; *ilmp != ilm; ilmp = &(*ilmp)->ilm_next)
1599 		;
1600 
1601 	*ilmp = ilm->ilm_next;
1602 
1603 	mutex_enter(&ill->ill_lock);
1604 	/*
1605 	 * if we are the last reference to the ill, we may need to wakeup any
1606 	 * pending FREE or unplumb operations. This is because conn_update_ill
1607 	 * bails if there is a ilg_delete_all in progress.
1608 	 */
1609 	need_wakeup = B_FALSE;
1610 	DTRACE_PROBE3(ill__decr__cnt, (ill_t *), ill,
1611 	    (char *), "ilm", (void *), ilm);
1612 	ASSERT(ill->ill_ilm_cnt > 0);
1613 	ill->ill_ilm_cnt--;
1614 	if (ILL_FREE_OK(ill))
1615 		need_wakeup = B_TRUE;
1616 
1617 	ilm_inactive(ilm); /* frees this ilm */
1618 
1619 	if (need_wakeup) {
1620 		/* drops ill lock */
1621 		ipif_ill_refrele_tail(ill);
1622 	} else {
1623 		mutex_exit(&ill->ill_lock);
1624 	}
1625 }
1626 
1627 /*
1628  * Lookup an ill based on the group, ifindex, ifaddr, and zoneid.
1629  * Applies to both IPv4 and IPv6, although ifaddr is only used with
1630  * IPv4.
1631  * Returns an error for IS_UNDER_IPMP and VNI interfaces.
1632  * On error it sets *errorp.
1633  */
1634 static ill_t *
1635 ill_mcast_lookup(const in6_addr_t *group, ipaddr_t ifaddr, uint_t ifindex,
1636     zoneid_t zoneid, ip_stack_t *ipst, int *errorp)
1637 {
1638 	ill_t *ill;
1639 	ipaddr_t v4group;
1640 
1641 	if (IN6_IS_ADDR_V4MAPPED(group)) {
1642 		IN6_V4MAPPED_TO_IPADDR(group, v4group);
1643 
1644 		if (ifindex != 0) {
1645 			ill = ill_lookup_on_ifindex_zoneid(ifindex, zoneid,
1646 			    B_FALSE, ipst);
1647 		} else if (ifaddr != INADDR_ANY) {
1648 			ipif_t *ipif;
1649 
1650 			ipif = ipif_lookup_addr(ifaddr, NULL, zoneid, ipst);
1651 			if (ipif == NULL) {
1652 				ill = NULL;
1653 			} else {
1654 				ill = ipif->ipif_ill;
1655 				ill_refhold(ill);
1656 				ipif_refrele(ipif);
1657 			}
1658 		} else {
1659 			ill = ill_lookup_group_v4(v4group, zoneid, ipst, NULL,
1660 			    NULL);
1661 		}
1662 	} else {
1663 		if (ifindex != 0) {
1664 			ill = ill_lookup_on_ifindex_zoneid(ifindex, zoneid,
1665 			    B_TRUE, ipst);
1666 		} else {
1667 			ill = ill_lookup_group_v6(group, zoneid, ipst, NULL,
1668 			    NULL);
1669 		}
1670 	}
1671 	if (ill == NULL) {
1672 		if (ifindex != 0)
1673 			*errorp = ENXIO;
1674 		else
1675 			*errorp = EADDRNOTAVAIL;
1676 		return (NULL);
1677 	}
1678 	/* operation not supported on the virtual network interface */
1679 	if (IS_UNDER_IPMP(ill) || IS_VNI(ill)) {
1680 		ill_refrele(ill);
1681 		*errorp = EINVAL;
1682 		return (NULL);
1683 	}
1684 	return (ill);
1685 }
1686 
1687 /*
1688  * Looks up the appropriate ill given an interface index (or interface address)
1689  * and multicast group.  On success, returns 0, with *illpp pointing to the
1690  * found struct.  On failure, returns an errno and *illpp is set to NULL.
1691  *
1692  * Returns an error for IS_UNDER_IPMP and VNI interfaces.
1693  *
1694  * Handles both IPv4 and IPv6. The ifaddr argument only applies in the
1695  * case of IPv4.
1696  */
1697 int
1698 ip_opt_check(conn_t *connp, const in6_addr_t *v6group,
1699     const in6_addr_t *v6src, ipaddr_t ifaddr, uint_t ifindex, ill_t **illpp)
1700 {
1701 	boolean_t src_unspec;
1702 	ill_t *ill = NULL;
1703 	ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
1704 	int error = 0;
1705 
1706 	*illpp = NULL;
1707 
1708 	src_unspec = IN6_IS_ADDR_UNSPECIFIED(v6src);
1709 
1710 	if (IN6_IS_ADDR_V4MAPPED(v6group)) {
1711 		ipaddr_t v4group;
1712 		ipaddr_t v4src;
1713 
1714 		if (!IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec)
1715 			return (EINVAL);
1716 		IN6_V4MAPPED_TO_IPADDR(v6group, v4group);
1717 		if (src_unspec) {
1718 			v4src = INADDR_ANY;
1719 		} else {
1720 			IN6_V4MAPPED_TO_IPADDR(v6src, v4src);
1721 		}
1722 		if (!CLASSD(v4group) || CLASSD(v4src))
1723 			return (EINVAL);
1724 	} else {
1725 		if (IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec)
1726 			return (EINVAL);
1727 		if (!IN6_IS_ADDR_MULTICAST(v6group) ||
1728 		    IN6_IS_ADDR_MULTICAST(v6src)) {
1729 			return (EINVAL);
1730 		}
1731 	}
1732 
1733 	ill = ill_mcast_lookup(v6group, ifaddr, ifindex, IPCL_ZONEID(connp),
1734 	    ipst, &error);
1735 	*illpp = ill;
1736 	return (error);
1737 }
1738 
1739 static int
1740 ip_get_srcfilter(conn_t *connp, struct group_filter *gf,
1741     struct ip_msfilter *imsf, const struct in6_addr *group, boolean_t issin6)
1742 {
1743 	ilg_t *ilg;
1744 	int i, numsrc, fmode, outsrcs;
1745 	struct sockaddr_in *sin;
1746 	struct sockaddr_in6 *sin6;
1747 	struct in_addr *addrp;
1748 	slist_t *fp;
1749 	boolean_t is_v4only_api;
1750 	ipaddr_t ifaddr;
1751 	uint_t ifindex;
1752 
1753 	if (gf == NULL) {
1754 		ASSERT(imsf != NULL);
1755 		ASSERT(!issin6);
1756 		is_v4only_api = B_TRUE;
1757 		outsrcs = imsf->imsf_numsrc;
1758 		ifaddr = imsf->imsf_interface.s_addr;
1759 		ifindex = 0;
1760 	} else {
1761 		ASSERT(imsf == NULL);
1762 		is_v4only_api = B_FALSE;
1763 		outsrcs = gf->gf_numsrc;
1764 		ifaddr = INADDR_ANY;
1765 		ifindex = gf->gf_interface;
1766 	}
1767 
1768 	/* No need to use ill_mcast_serializer for the reader */
1769 	rw_enter(&connp->conn_ilg_lock, RW_READER);
1770 	ilg = ilg_lookup(connp, group, ifaddr, ifindex);
1771 	if (ilg == NULL) {
1772 		rw_exit(&connp->conn_ilg_lock);
1773 		return (EADDRNOTAVAIL);
1774 	}
1775 
1776 	/*
1777 	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
1778 	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
1779 	 * So we need to translate here.
1780 	 */
1781 	fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ?
1782 	    MCAST_INCLUDE : MCAST_EXCLUDE;
1783 	if ((fp = ilg->ilg_filter) == NULL) {
1784 		numsrc = 0;
1785 	} else {
1786 		for (i = 0; i < outsrcs; i++) {
1787 			if (i == fp->sl_numsrc)
1788 				break;
1789 			if (issin6) {
1790 				sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i];
1791 				sin6->sin6_family = AF_INET6;
1792 				sin6->sin6_addr = fp->sl_addr[i];
1793 			} else {
1794 				if (is_v4only_api) {
1795 					addrp = &imsf->imsf_slist[i];
1796 				} else {
1797 					sin = (struct sockaddr_in *)
1798 					    &gf->gf_slist[i];
1799 					sin->sin_family = AF_INET;
1800 					addrp = &sin->sin_addr;
1801 				}
1802 				IN6_V4MAPPED_TO_INADDR(&fp->sl_addr[i], addrp);
1803 			}
1804 		}
1805 		numsrc = fp->sl_numsrc;
1806 	}
1807 
1808 	if (is_v4only_api) {
1809 		imsf->imsf_numsrc = numsrc;
1810 		imsf->imsf_fmode = fmode;
1811 	} else {
1812 		gf->gf_numsrc = numsrc;
1813 		gf->gf_fmode = fmode;
1814 	}
1815 
1816 	rw_exit(&connp->conn_ilg_lock);
1817 
1818 	return (0);
1819 }
1820 
1821 /*
1822  * Common for IPv4 and IPv6.
1823  */
1824 static int
1825 ip_set_srcfilter(conn_t *connp, struct group_filter *gf,
1826     struct ip_msfilter *imsf, const struct in6_addr *group, ill_t *ill,
1827     boolean_t issin6)
1828 {
1829 	ilg_t *ilg;
1830 	int i, err, infmode, new_fmode;
1831 	uint_t insrcs;
1832 	struct sockaddr_in *sin;
1833 	struct sockaddr_in6 *sin6;
1834 	struct in_addr *addrp;
1835 	slist_t *orig_filter = NULL;
1836 	slist_t *new_filter = NULL;
1837 	mcast_record_t orig_fmode;
1838 	boolean_t leave_group, is_v4only_api;
1839 	ilg_stat_t ilgstat;
1840 	ilm_t *ilm;
1841 	ipaddr_t ifaddr;
1842 	uint_t ifindex;
1843 
1844 	if (gf == NULL) {
1845 		ASSERT(imsf != NULL);
1846 		ASSERT(!issin6);
1847 		is_v4only_api = B_TRUE;
1848 		insrcs = imsf->imsf_numsrc;
1849 		infmode = imsf->imsf_fmode;
1850 		ifaddr = imsf->imsf_interface.s_addr;
1851 		ifindex = 0;
1852 	} else {
1853 		ASSERT(imsf == NULL);
1854 		is_v4only_api = B_FALSE;
1855 		insrcs = gf->gf_numsrc;
1856 		infmode = gf->gf_fmode;
1857 		ifaddr = INADDR_ANY;
1858 		ifindex = gf->gf_interface;
1859 	}
1860 
1861 	/* Make sure we can handle the source list */
1862 	if (insrcs > MAX_FILTER_SIZE)
1863 		return (ENOBUFS);
1864 
1865 	/*
1866 	 * setting the filter to (INCLUDE, NULL) is treated
1867 	 * as a request to leave the group.
1868 	 */
1869 	leave_group = (infmode == MCAST_INCLUDE && insrcs == 0);
1870 
1871 	mutex_enter(&ill->ill_mcast_serializer);
1872 	rw_enter(&connp->conn_ilg_lock, RW_WRITER);
1873 	ilg = ilg_lookup(connp, group, ifaddr, ifindex);
1874 	if (ilg == NULL) {
1875 		/*
1876 		 * if the request was actually to leave, and we
1877 		 * didn't find an ilg, there's nothing to do.
1878 		 */
1879 		if (leave_group) {
1880 			rw_exit(&connp->conn_ilg_lock);
1881 			mutex_exit(&ill->ill_mcast_serializer);
1882 			return (0);
1883 		}
1884 		ilg = conn_ilg_alloc(connp, &err);
1885 		if (ilg == NULL) {
1886 			rw_exit(&connp->conn_ilg_lock);
1887 			mutex_exit(&ill->ill_mcast_serializer);
1888 			return (err);
1889 		}
1890 		ilgstat = ILGSTAT_NEW;
1891 		ilg->ilg_v6group = *group;
1892 		ilg->ilg_ill = ill;
1893 		ilg->ilg_ifaddr = ifaddr;
1894 		ilg->ilg_ifindex = ifindex;
1895 	} else if (leave_group) {
1896 		/*
1897 		 * Make sure we have the correct serializer. The ill argument
1898 		 * might not match ilg_ill.
1899 		 */
1900 		ilg_refhold(ilg);
1901 		mutex_exit(&ill->ill_mcast_serializer);
1902 		ill = ilg->ilg_ill;
1903 		rw_exit(&connp->conn_ilg_lock);
1904 
1905 		mutex_enter(&ill->ill_mcast_serializer);
1906 		rw_enter(&connp->conn_ilg_lock, RW_WRITER);
1907 		ilm = ilg->ilg_ilm;
1908 		ilg->ilg_ilm = NULL;
1909 		ilg_delete(connp, ilg, NULL);
1910 		ilg_refrele(ilg);
1911 		rw_exit(&connp->conn_ilg_lock);
1912 		if (ilm != NULL)
1913 			(void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE);
1914 		mutex_exit(&ill->ill_mcast_serializer);
1915 		/*
1916 		 * Now that all locks have been dropped, we can send any
1917 		 * deferred/queued DLPI or IP packets
1918 		 */
1919 		ill_mcast_send_queued(ill);
1920 		ill_dlpi_send_queued(ill);
1921 		return (0);
1922 	} else {
1923 		ilgstat = ILGSTAT_CHANGE;
1924 		/* Preserve existing state in case ip_addmulti() fails */
1925 		orig_fmode = ilg->ilg_fmode;
1926 		if (ilg->ilg_filter == NULL) {
1927 			orig_filter = NULL;
1928 		} else {
1929 			orig_filter = l_alloc_copy(ilg->ilg_filter);
1930 			if (orig_filter == NULL) {
1931 				rw_exit(&connp->conn_ilg_lock);
1932 				mutex_exit(&ill->ill_mcast_serializer);
1933 				return (ENOMEM);
1934 			}
1935 		}
1936 	}
1937 
1938 	/*
1939 	 * Alloc buffer to copy new state into (see below) before
1940 	 * we make any changes, so we can bail if it fails.
1941 	 */
1942 	if ((new_filter = l_alloc()) == NULL) {
1943 		rw_exit(&connp->conn_ilg_lock);
1944 		err = ENOMEM;
1945 		goto free_and_exit;
1946 	}
1947 
1948 	if (insrcs == 0) {
1949 		CLEAR_SLIST(ilg->ilg_filter);
1950 	} else {
1951 		slist_t *fp;
1952 		if (ilg->ilg_filter == NULL) {
1953 			fp = l_alloc();
1954 			if (fp == NULL) {
1955 				if (ilgstat == ILGSTAT_NEW)
1956 					ilg_delete(connp, ilg, NULL);
1957 				rw_exit(&connp->conn_ilg_lock);
1958 				err = ENOMEM;
1959 				goto free_and_exit;
1960 			}
1961 		} else {
1962 			fp = ilg->ilg_filter;
1963 		}
1964 		for (i = 0; i < insrcs; i++) {
1965 			if (issin6) {
1966 				sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i];
1967 				fp->sl_addr[i] = sin6->sin6_addr;
1968 			} else {
1969 				if (is_v4only_api) {
1970 					addrp = &imsf->imsf_slist[i];
1971 				} else {
1972 					sin = (struct sockaddr_in *)
1973 					    &gf->gf_slist[i];
1974 					addrp = &sin->sin_addr;
1975 				}
1976 				IN6_INADDR_TO_V4MAPPED(addrp, &fp->sl_addr[i]);
1977 			}
1978 		}
1979 		fp->sl_numsrc = insrcs;
1980 		ilg->ilg_filter = fp;
1981 	}
1982 	/*
1983 	 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
1984 	 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
1985 	 * So we need to translate here.
1986 	 */
1987 	ilg->ilg_fmode = (infmode == MCAST_INCLUDE) ?
1988 	    MODE_IS_INCLUDE : MODE_IS_EXCLUDE;
1989 
1990 	/*
1991 	 * Save copy of ilg's filter state to pass to other functions,
1992 	 * so we can release conn_ilg_lock now.
1993 	 */
1994 	new_fmode = ilg->ilg_fmode;
1995 	l_copy(ilg->ilg_filter, new_filter);
1996 
1997 	rw_exit(&connp->conn_ilg_lock);
1998 
1999 	/*
2000 	 * Now update the ill. We wait to do this until after the ilg
2001 	 * has been updated because we need to update the src filter
2002 	 * info for the ill, which involves looking at the status of
2003 	 * all the ilgs associated with this group/interface pair.
2004 	 */
2005 	ilm = ip_addmulti_serial(group, ill, connp->conn_zoneid, ilgstat,
2006 	    new_fmode, new_filter, &err);
2007 
2008 	rw_enter(&connp->conn_ilg_lock, RW_WRITER);
2009 	/*
2010 	 * Must look up the ilg again since we've not been holding
2011 	 * conn_ilg_lock. The ilg could have disappeared due to an unplumb
2012 	 * having called conn_update_ill, which can run once we dropped the
2013 	 * conn_ilg_lock above.
2014 	 */
2015 	ilg = ilg_lookup(connp, group, ifaddr, ifindex);
2016 	if (ilg == NULL) {
2017 		rw_exit(&connp->conn_ilg_lock);
2018 		if (ilm != NULL) {
2019 			(void) ip_delmulti_serial(ilm, B_FALSE,
2020 			    (ilgstat == ILGSTAT_NEW));
2021 		}
2022 		err = ENXIO;
2023 		goto free_and_exit;
2024 	}
2025 
2026 	if (ilm != NULL) {
2027 		if (ilg->ilg_ill == NULL) {
2028 			/* some other thread is re-attaching this.  */
2029 			rw_exit(&connp->conn_ilg_lock);
2030 			(void) ip_delmulti_serial(ilm, B_FALSE,
2031 			    (ilgstat == ILGSTAT_NEW));
2032 			err = 0;
2033 			goto free_and_exit;
2034 		}
2035 		/* Succeeded. Update the ilg to point at the ilm */
2036 		if (ilgstat == ILGSTAT_NEW) {
2037 			if (ilg->ilg_ilm == NULL) {
2038 				ilg->ilg_ilm = ilm;
2039 				ilm->ilm_ifaddr = ifaddr; /* For netstat */
2040 			} else {
2041 				/* some other thread is re-attaching this. */
2042 				rw_exit(&connp->conn_ilg_lock);
2043 				(void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE);
2044 				err = 0;
2045 				goto free_and_exit;
2046 			}
2047 		} else {
2048 			/*
2049 			 * ip_addmulti didn't get a held ilm for
2050 			 * ILGSTAT_CHANGE; ilm_refcnt was unchanged.
2051 			 */
2052 			ASSERT(ilg->ilg_ilm == ilm);
2053 		}
2054 	} else {
2055 		ASSERT(err != 0);
2056 		/*
2057 		 * Failed to allocate the ilm.
2058 		 * Restore the original filter state, or delete the
2059 		 * newly-created ilg.
2060 		 * If ENETDOWN just clear ill_ilg since so that we
2061 		 * will rejoin when the ill comes back; don't report ENETDOWN
2062 		 * to application.
2063 		 */
2064 		if (ilgstat == ILGSTAT_NEW) {
2065 			if (err == ENETDOWN) {
2066 				ilg->ilg_ill = NULL;
2067 				err = 0;
2068 			} else {
2069 				ilg_delete(connp, ilg, NULL);
2070 			}
2071 		} else {
2072 			ilg->ilg_fmode = orig_fmode;
2073 			if (SLIST_IS_EMPTY(orig_filter)) {
2074 				CLEAR_SLIST(ilg->ilg_filter);
2075 			} else {
2076 				/*
2077 				 * We didn't free the filter, even if we
2078 				 * were trying to make the source list empty;
2079 				 * so if orig_filter isn't empty, the ilg
2080 				 * must still have a filter alloc'd.
2081 				 */
2082 				l_copy(orig_filter, ilg->ilg_filter);
2083 			}
2084 		}
2085 	}
2086 	rw_exit(&connp->conn_ilg_lock);
2087 
2088 free_and_exit:
2089 	mutex_exit(&ill->ill_mcast_serializer);
2090 	ill_mcast_send_queued(ill);
2091 	ill_dlpi_send_queued(ill);
2092 	l_free(orig_filter);
2093 	l_free(new_filter);
2094 
2095 	return (err);
2096 }
2097 
2098 /*
2099  * Process the SIOC[GS]MSFILTER and SIOC[GS]IPMSFILTER ioctls.
2100  */
2101 /* ARGSUSED */
2102 int
2103 ip_sioctl_msfilter(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp,
2104     ip_ioctl_cmd_t *ipip, void *ifreq)
2105 {
2106 	struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
2107 	/* existence verified in ip_wput_nondata() */
2108 	mblk_t *data_mp = mp->b_cont->b_cont;
2109 	int datalen, err, cmd, minsize;
2110 	uint_t expsize = 0;
2111 	conn_t *connp;
2112 	boolean_t isv6, is_v4only_api, getcmd;
2113 	struct sockaddr_in *gsin;
2114 	struct sockaddr_in6 *gsin6;
2115 	ipaddr_t v4group;
2116 	in6_addr_t v6group;
2117 	struct group_filter *gf = NULL;
2118 	struct ip_msfilter *imsf = NULL;
2119 	mblk_t *ndp;
2120 	ill_t *ill;
2121 
2122 	connp = Q_TO_CONN(q);
2123 	err = ip_msfilter_ill(connp, mp, ipip, &ill);
2124 	if (err != 0)
2125 		return (err);
2126 
2127 	if (data_mp->b_cont != NULL) {
2128 		if ((ndp = msgpullup(data_mp, -1)) == NULL)
2129 			return (ENOMEM);
2130 		freemsg(data_mp);
2131 		data_mp = ndp;
2132 		mp->b_cont->b_cont = data_mp;
2133 	}
2134 
2135 	cmd = iocp->ioc_cmd;
2136 	getcmd = (cmd == SIOCGIPMSFILTER || cmd == SIOCGMSFILTER);
2137 	is_v4only_api = (cmd == SIOCGIPMSFILTER || cmd == SIOCSIPMSFILTER);
2138 	minsize = (is_v4only_api) ? IP_MSFILTER_SIZE(0) : GROUP_FILTER_SIZE(0);
2139 	datalen = MBLKL(data_mp);
2140 
2141 	if (datalen < minsize)
2142 		return (EINVAL);
2143 
2144 	/*
2145 	 * now we know we have at least have the initial structure,
2146 	 * but need to check for the source list array.
2147 	 */
2148 	if (is_v4only_api) {
2149 		imsf = (struct ip_msfilter *)data_mp->b_rptr;
2150 		isv6 = B_FALSE;
2151 		expsize = IP_MSFILTER_SIZE(imsf->imsf_numsrc);
2152 	} else {
2153 		gf = (struct group_filter *)data_mp->b_rptr;
2154 		if (gf->gf_group.ss_family == AF_INET6) {
2155 			gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2156 			isv6 = !(IN6_IS_ADDR_V4MAPPED(&gsin6->sin6_addr));
2157 		} else {
2158 			isv6 = B_FALSE;
2159 		}
2160 		expsize = GROUP_FILTER_SIZE(gf->gf_numsrc);
2161 	}
2162 	if (datalen < expsize)
2163 		return (EINVAL);
2164 
2165 	if (isv6) {
2166 		gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2167 		v6group = gsin6->sin6_addr;
2168 		if (getcmd) {
2169 			err = ip_get_srcfilter(connp, gf, NULL, &v6group,
2170 			    B_TRUE);
2171 		} else {
2172 			err = ip_set_srcfilter(connp, gf, NULL, &v6group, ill,
2173 			    B_TRUE);
2174 		}
2175 	} else {
2176 		boolean_t issin6 = B_FALSE;
2177 		if (is_v4only_api) {
2178 			v4group = (ipaddr_t)imsf->imsf_multiaddr.s_addr;
2179 			IN6_IPADDR_TO_V4MAPPED(v4group, &v6group);
2180 		} else {
2181 			if (gf->gf_group.ss_family == AF_INET) {
2182 				gsin = (struct sockaddr_in *)&gf->gf_group;
2183 				v4group = (ipaddr_t)gsin->sin_addr.s_addr;
2184 				IN6_IPADDR_TO_V4MAPPED(v4group, &v6group);
2185 			} else {
2186 				gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2187 				IN6_V4MAPPED_TO_IPADDR(&gsin6->sin6_addr,
2188 				    v4group);
2189 				issin6 = B_TRUE;
2190 			}
2191 		}
2192 		/*
2193 		 * INADDR_ANY is represented as the IPv6 unspecifed addr.
2194 		 */
2195 		if (v4group == INADDR_ANY)
2196 			v6group = ipv6_all_zeros;
2197 		else
2198 			IN6_IPADDR_TO_V4MAPPED(v4group, &v6group);
2199 
2200 		if (getcmd) {
2201 			err = ip_get_srcfilter(connp, gf, imsf, &v6group,
2202 			    issin6);
2203 		} else {
2204 			err = ip_set_srcfilter(connp, gf, imsf, &v6group, ill,
2205 			    issin6);
2206 		}
2207 	}
2208 	ill_refrele(ill);
2209 
2210 	return (err);
2211 }
2212 
2213 /*
2214  * Determine the ill for the SIOC*MSFILTER ioctls
2215  *
2216  * Returns an error for IS_UNDER_IPMP interfaces.
2217  *
2218  * Finds the ill based on information in the ioctl headers.
2219  */
2220 static int
2221 ip_msfilter_ill(conn_t *connp, mblk_t *mp, const ip_ioctl_cmd_t *ipip,
2222     ill_t **illp)
2223 {
2224 	int cmd = ipip->ipi_cmd;
2225 	int err = 0;
2226 	ill_t *ill;
2227 	/* caller has verified this mblk exists */
2228 	char *dbuf = (char *)mp->b_cont->b_cont->b_rptr;
2229 	struct ip_msfilter *imsf;
2230 	struct group_filter *gf;
2231 	ipaddr_t v4addr, v4group;
2232 	in6_addr_t v6group;
2233 	uint32_t index;
2234 	ip_stack_t *ipst;
2235 
2236 	ipst = connp->conn_netstack->netstack_ip;
2237 
2238 	*illp = NULL;
2239 
2240 	/* don't allow multicast operations on a tcp conn */
2241 	if (IPCL_IS_TCP(connp))
2242 		return (ENOPROTOOPT);
2243 
2244 	if (cmd == SIOCSIPMSFILTER || cmd == SIOCGIPMSFILTER) {
2245 		/* don't allow v4-specific ioctls on v6 socket */
2246 		if (connp->conn_family == AF_INET6)
2247 			return (EAFNOSUPPORT);
2248 
2249 		imsf = (struct ip_msfilter *)dbuf;
2250 		v4addr = imsf->imsf_interface.s_addr;
2251 		v4group = imsf->imsf_multiaddr.s_addr;
2252 		IN6_IPADDR_TO_V4MAPPED(v4group, &v6group);
2253 		ill = ill_mcast_lookup(&v6group, v4addr, 0, IPCL_ZONEID(connp),
2254 		    ipst, &err);
2255 		if (ill == NULL && v4addr != INADDR_ANY)
2256 			err = ENXIO;
2257 	} else {
2258 		gf = (struct group_filter *)dbuf;
2259 		index = gf->gf_interface;
2260 		if (gf->gf_group.ss_family == AF_INET6) {
2261 			struct sockaddr_in6 *sin6;
2262 
2263 			sin6 = (struct sockaddr_in6 *)&gf->gf_group;
2264 			v6group = sin6->sin6_addr;
2265 		} else if (gf->gf_group.ss_family == AF_INET) {
2266 			struct sockaddr_in *sin;
2267 
2268 			sin = (struct sockaddr_in *)&gf->gf_group;
2269 			v4group = sin->sin_addr.s_addr;
2270 			IN6_IPADDR_TO_V4MAPPED(v4group, &v6group);
2271 		} else {
2272 			return (EAFNOSUPPORT);
2273 		}
2274 		ill = ill_mcast_lookup(&v6group, INADDR_ANY, index,
2275 		    IPCL_ZONEID(connp), ipst, &err);
2276 	}
2277 	*illp = ill;
2278 	return (err);
2279 }
2280 
2281 /*
2282  * The structures used for the SIOC*MSFILTER ioctls usually must be copied
2283  * in in two stages, as the first copyin tells us the size of the attached
2284  * source buffer.  This function is called by ip_wput_nondata() after the
2285  * first copyin has completed; it figures out how big the second stage
2286  * needs to be, and kicks it off.
2287  *
2288  * In some cases (numsrc < 2), the second copyin is not needed as the
2289  * first one gets a complete structure containing 1 source addr.
2290  *
2291  * The function returns 0 if a second copyin has been started (i.e. there's
2292  * no more work to be done right now), or 1 if the second copyin is not
2293  * needed and ip_wput_nondata() can continue its processing.
2294  */
2295 int
2296 ip_copyin_msfilter(queue_t *q, mblk_t *mp)
2297 {
2298 	struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
2299 	int cmd = iocp->ioc_cmd;
2300 	/* validity of this checked in ip_wput_nondata() */
2301 	mblk_t *mp1 = mp->b_cont->b_cont;
2302 	int copysize = 0;
2303 	int offset;
2304 
2305 	if (cmd == SIOCSMSFILTER || cmd == SIOCGMSFILTER) {
2306 		struct group_filter *gf = (struct group_filter *)mp1->b_rptr;
2307 		if (gf->gf_numsrc >= 2) {
2308 			offset = sizeof (struct group_filter);
2309 			copysize = GROUP_FILTER_SIZE(gf->gf_numsrc) - offset;
2310 		}
2311 	} else {
2312 		struct ip_msfilter *imsf = (struct ip_msfilter *)mp1->b_rptr;
2313 		if (imsf->imsf_numsrc >= 2) {
2314 			offset = sizeof (struct ip_msfilter);
2315 			copysize = IP_MSFILTER_SIZE(imsf->imsf_numsrc) - offset;
2316 		}
2317 	}
2318 	if (copysize > 0) {
2319 		mi_copyin_n(q, mp, offset, copysize);
2320 		return (0);
2321 	}
2322 	return (1);
2323 }
2324 
2325 /*
2326  * Handle the following optmgmt:
2327  *	IP_ADD_MEMBERSHIP		must not have joined already
2328  *	IPV6_JOIN_GROUP			must not have joined already
2329  *	MCAST_JOIN_GROUP		must not have joined already
2330  *	IP_BLOCK_SOURCE			must have joined already
2331  *	MCAST_BLOCK_SOURCE		must have joined already
2332  *	IP_JOIN_SOURCE_GROUP		may have joined already
2333  *	MCAST_JOIN_SOURCE_GROUP		may have joined already
2334  *
2335  * fmode and src parameters may be used to determine which option is
2336  * being set, as follows (IPV6_JOIN_GROUP and MCAST_JOIN_GROUP options
2337  * are functionally equivalent):
2338  *	opt			fmode			v6src
2339  *	IP_ADD_MEMBERSHIP	MODE_IS_EXCLUDE		unspecified
2340  *	IPV6_JOIN_GROUP		MODE_IS_EXCLUDE		unspecified
2341  *	MCAST_JOIN_GROUP	MODE_IS_EXCLUDE		unspecified
2342  *	IP_BLOCK_SOURCE		MODE_IS_EXCLUDE		IPv4-mapped addr
2343  *	MCAST_BLOCK_SOURCE	MODE_IS_EXCLUDE		v6 addr
2344  *	IP_JOIN_SOURCE_GROUP	MODE_IS_INCLUDE		IPv4-mapped addr
2345  *	MCAST_JOIN_SOURCE_GROUP	MODE_IS_INCLUDE		v6 addr
2346  *
2347  * Changing the filter mode is not allowed; if a matching ilg already
2348  * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
2349  *
2350  * Verifies that there is a source address of appropriate scope for
2351  * the group; if not, EADDRNOTAVAIL is returned.
2352  *
2353  * The interface to be used may be identified by an IPv4 address or by an
2354  * interface index.
2355  *
2356  * Handles IPv4-mapped IPv6 multicast addresses by associating them
2357  * with the IPv4 address.  Assumes that if v6group is v4-mapped,
2358  * v6src is also v4-mapped.
2359  */
2360 int
2361 ip_opt_add_group(conn_t *connp, boolean_t checkonly,
2362     const in6_addr_t *v6group, ipaddr_t ifaddr, uint_t ifindex,
2363     mcast_record_t fmode, const in6_addr_t *v6src)
2364 {
2365 	ill_t *ill;
2366 	char buf[INET6_ADDRSTRLEN];
2367 	int	err;
2368 
2369 	err = ip_opt_check(connp, v6group, v6src, ifaddr, ifindex, &ill);
2370 	if (err != 0) {
2371 		ip1dbg(("ip_opt_add_group: no ill for group %s/"
2372 		    "index %d\n", inet_ntop(AF_INET6, v6group, buf,
2373 		    sizeof (buf)), ifindex));
2374 		return (err);
2375 	}
2376 
2377 	if (checkonly) {
2378 		/*
2379 		 * do not do operation, just pretend to - new T_CHECK
2380 		 * semantics. The error return case above if encountered
2381 		 * considered a good enough "check" here.
2382 		 */
2383 		ill_refrele(ill);
2384 		return (0);
2385 	}
2386 	mutex_enter(&ill->ill_mcast_serializer);
2387 	/*
2388 	 * Multicast groups may not be joined on interfaces that are either
2389 	 * already underlying interfaces in an IPMP group, or in the process
2390 	 * of joining the IPMP group. The latter condition is enforced by
2391 	 * checking the value of ill->ill_grp_pending under the
2392 	 * ill_mcast_serializer lock.  We cannot serialize the
2393 	 * ill_grp_pending check on the ill_g_lock across ilg_add() because
2394 	 *  ill_mcast_send_queued -> ip_output_simple -> ill_lookup_on_ifindex
2395 	 * will take the ill_g_lock itself. Instead, we hold the
2396 	 * ill_mcast_serializer.
2397 	 */
2398 	if (ill->ill_grp_pending || IS_UNDER_IPMP(ill)) {
2399 		DTRACE_PROBE2(group__add__on__under, ill_t *, ill,
2400 		    in6_addr_t *, v6group);
2401 		mutex_exit(&ill->ill_mcast_serializer);
2402 		ill_refrele(ill);
2403 		return (EADDRNOTAVAIL);
2404 	}
2405 	err = ilg_add(connp, v6group, ifaddr, ifindex, ill, fmode, v6src);
2406 	mutex_exit(&ill->ill_mcast_serializer);
2407 	/*
2408 	 * We have done an addmulti_impl and/or delmulti_impl.
2409 	 * All locks have been dropped, we can send any
2410 	 * deferred/queued DLPI or IP packets
2411 	 */
2412 	ill_mcast_send_queued(ill);
2413 	ill_dlpi_send_queued(ill);
2414 	ill_refrele(ill);
2415 	return (err);
2416 }
2417 
2418 /*
2419  * Common for IPv6 and IPv4.
2420  * Here we handle ilgs that are still attached to their original ill
2421  * (the one ifaddr/ifindex points at), as well as detached ones.
2422  * The detached ones might have been attached to some other ill.
2423  */
2424 static int
2425 ip_opt_delete_group_excl(conn_t *connp, const in6_addr_t *v6group,
2426     ipaddr_t ifaddr, uint_t ifindex, mcast_record_t fmode,
2427     const in6_addr_t *v6src)
2428 {
2429 	ilg_t	*ilg;
2430 	boolean_t leaving;
2431 	ilm_t *ilm;
2432 	ill_t *ill;
2433 	int err = 0;
2434 
2435 retry:
2436 	rw_enter(&connp->conn_ilg_lock, RW_WRITER);
2437 	ilg = ilg_lookup(connp, v6group, ifaddr, ifindex);
2438 	if (ilg == NULL) {
2439 		rw_exit(&connp->conn_ilg_lock);
2440 		/*
2441 		 * Since we didn't have any ilg we now do the error checks
2442 		 * to determine the best errno.
2443 		 */
2444 		err = ip_opt_check(connp, v6group, v6src, ifaddr, ifindex,
2445 		    &ill);
2446 		if (ill != NULL) {
2447 			/* The only error was a missing ilg for the group */
2448 			ill_refrele(ill);
2449 			err = EADDRNOTAVAIL;
2450 		}
2451 		return (err);
2452 	}
2453 
2454 	/* If the ilg is attached then we serialize using that ill */
2455 	ill = ilg->ilg_ill;
2456 	if (ill != NULL) {
2457 		/* Prevent the ill and ilg from being freed */
2458 		ill_refhold(ill);
2459 		ilg_refhold(ilg);
2460 		rw_exit(&connp->conn_ilg_lock);
2461 		mutex_enter(&ill->ill_mcast_serializer);
2462 		rw_enter(&connp->conn_ilg_lock, RW_WRITER);
2463 		if (ilg->ilg_condemned) {
2464 			/* Disappeared */
2465 			ilg_refrele(ilg);
2466 			rw_exit(&connp->conn_ilg_lock);
2467 			mutex_exit(&ill->ill_mcast_serializer);
2468 			ill_refrele(ill);
2469 			goto retry;
2470 		}
2471 	}
2472 
2473 	/*
2474 	 * Decide if we're actually deleting the ilg or just removing a
2475 	 * source filter address; if just removing an addr, make sure we
2476 	 * aren't trying to change the filter mode, and that the addr is
2477 	 * actually in our filter list already.  If we're removing the
2478 	 * last src in an include list, just delete the ilg.
2479 	 */
2480 	if (IN6_IS_ADDR_UNSPECIFIED(v6src)) {
2481 		leaving = B_TRUE;
2482 	} else {
2483 		if (fmode != ilg->ilg_fmode)
2484 			err = EINVAL;
2485 		else if (ilg->ilg_filter == NULL ||
2486 		    !list_has_addr(ilg->ilg_filter, v6src))
2487 			err = EADDRNOTAVAIL;
2488 		if (err != 0) {
2489 			if (ill != NULL)
2490 				ilg_refrele(ilg);
2491 			rw_exit(&connp->conn_ilg_lock);
2492 			goto done;
2493 		}
2494 		if (fmode == MODE_IS_INCLUDE &&
2495 		    ilg->ilg_filter->sl_numsrc == 1) {
2496 			leaving = B_TRUE;
2497 			v6src = NULL;
2498 		} else {
2499 			leaving = B_FALSE;
2500 		}
2501 	}
2502 	ilm = ilg->ilg_ilm;
2503 	if (leaving)
2504 		ilg->ilg_ilm = NULL;
2505 
2506 	ilg_delete(connp, ilg, v6src);
2507 	if (ill != NULL)
2508 		ilg_refrele(ilg);
2509 	rw_exit(&connp->conn_ilg_lock);
2510 
2511 	if (ilm != NULL) {
2512 		ASSERT(ill != NULL);
2513 		(void) ip_delmulti_serial(ilm, B_FALSE, leaving);
2514 	}
2515 done:
2516 	if (ill != NULL) {
2517 		mutex_exit(&ill->ill_mcast_serializer);
2518 		/*
2519 		 * Now that all locks have been dropped, we can
2520 		 * send any deferred/queued DLPI or IP packets
2521 		 */
2522 		ill_mcast_send_queued(ill);
2523 		ill_dlpi_send_queued(ill);
2524 		ill_refrele(ill);
2525 	}
2526 	return (err);
2527 }
2528 
2529 /*
2530  * Handle the following optmgmt:
2531  *	IP_DROP_MEMBERSHIP		will leave
2532  *	IPV6_LEAVE_GROUP		will leave
2533  *	MCAST_LEAVE_GROUP		will leave
2534  *	IP_UNBLOCK_SOURCE		will not leave
2535  *	MCAST_UNBLOCK_SOURCE		will not leave
2536  *	IP_LEAVE_SOURCE_GROUP		may leave (if leaving last source)
2537  *	MCAST_LEAVE_SOURCE_GROUP	may leave (if leaving last source)
2538  *
2539  * fmode and src parameters may be used to determine which option is
2540  * being set, as follows:
2541  *	opt			 fmode			v6src
2542  *	IP_DROP_MEMBERSHIP	 MODE_IS_INCLUDE	unspecified
2543  *	IPV6_LEAVE_GROUP	 MODE_IS_INCLUDE	unspecified
2544  *	MCAST_LEAVE_GROUP	 MODE_IS_INCLUDE	unspecified
2545  *	IP_UNBLOCK_SOURCE	 MODE_IS_EXCLUDE	IPv4-mapped addr
2546  *	MCAST_UNBLOCK_SOURCE	 MODE_IS_EXCLUDE	v6 addr
2547  *	IP_LEAVE_SOURCE_GROUP	 MODE_IS_INCLUDE	IPv4-mapped addr
2548  *	MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE	v6 addr
2549  *
2550  * Changing the filter mode is not allowed; if a matching ilg already
2551  * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
2552  *
2553  * The interface to be used may be identified by an IPv4 address or by an
2554  * interface index.
2555  *
2556  * Handles IPv4-mapped IPv6 multicast addresses by associating them
2557  * with the IPv4 address.  Assumes that if v6group is v4-mapped,
2558  * v6src is also v4-mapped.
2559  */
2560 int
2561 ip_opt_delete_group(conn_t *connp, boolean_t checkonly,
2562     const in6_addr_t *v6group, ipaddr_t ifaddr, uint_t ifindex,
2563     mcast_record_t fmode, const in6_addr_t *v6src)
2564 {
2565 
2566 	/*
2567 	 * In the normal case below we don't check for the ill existing.
2568 	 * Instead we look for an existing ilg in _excl.
2569 	 * If checkonly we sanity check the arguments
2570 	 */
2571 	if (checkonly) {
2572 		ill_t	*ill;
2573 		int	err;
2574 
2575 		err = ip_opt_check(connp, v6group, v6src, ifaddr, ifindex,
2576 		    &ill);
2577 		/*
2578 		 * do not do operation, just pretend to - new T_CHECK semantics.
2579 		 * ip_opt_check is considered a good enough "check" here.
2580 		 */
2581 		if (ill != NULL)
2582 			ill_refrele(ill);
2583 		return (err);
2584 	}
2585 	return (ip_opt_delete_group_excl(connp, v6group, ifaddr, ifindex,
2586 	    fmode, v6src));
2587 }
2588 
2589 /*
2590  * Group mgmt for upper conn that passes things down
2591  * to the interface multicast list (and DLPI)
2592  * These routines can handle new style options that specify an interface name
2593  * as opposed to an interface address (needed for general handling of
2594  * unnumbered interfaces.)
2595  */
2596 
2597 /*
2598  * Add a group to an upper conn group data structure and pass things down
2599  * to the interface multicast list (and DLPI)
2600  * Common for IPv4 and IPv6; for IPv4 we can have an ifaddr.
2601  */
2602 static int
2603 ilg_add(conn_t *connp, const in6_addr_t *v6group, ipaddr_t ifaddr,
2604     uint_t ifindex, ill_t *ill, mcast_record_t fmode, const in6_addr_t *v6src)
2605 {
2606 	int	error = 0;
2607 	ilg_t	*ilg;
2608 	ilg_stat_t ilgstat;
2609 	slist_t	*new_filter = NULL;
2610 	int	new_fmode;
2611 	ilm_t *ilm;
2612 
2613 	if (!(ill->ill_flags & ILLF_MULTICAST))
2614 		return (EADDRNOTAVAIL);
2615 
2616 	/* conn_ilg_lock protects the ilg list. */
2617 	ASSERT(MUTEX_HELD(&ill->ill_mcast_serializer));
2618 	rw_enter(&connp->conn_ilg_lock, RW_WRITER);
2619 	ilg = ilg_lookup(connp, v6group, ifaddr, ifindex);
2620 
2621 	/*
2622 	 * Depending on the option we're handling, may or may not be okay
2623 	 * if group has already been added.  Figure out our rules based
2624 	 * on fmode and src params.  Also make sure there's enough room
2625 	 * in the filter if we're adding a source to an existing filter.
2626 	 */
2627 	if (IN6_IS_ADDR_UNSPECIFIED(v6src)) {
2628 		/* we're joining for all sources, must not have joined */
2629 		if (ilg != NULL)
2630 			error = EADDRINUSE;
2631 	} else {
2632 		if (fmode == MODE_IS_EXCLUDE) {
2633 			/* (excl {addr}) => block source, must have joined */
2634 			if (ilg == NULL)
2635 				error = EADDRNOTAVAIL;
2636 		}
2637 		/* (incl {addr}) => join source, may have joined */
2638 
2639 		if (ilg != NULL &&
2640 		    SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE)
2641 			error = ENOBUFS;
2642 	}
2643 	if (error != 0) {
2644 		rw_exit(&connp->conn_ilg_lock);
2645 		return (error);
2646 	}
2647 
2648 	/*
2649 	 * Alloc buffer to copy new state into (see below) before
2650 	 * we make any changes, so we can bail if it fails.
2651 	 */
2652 	if ((new_filter = l_alloc()) == NULL) {
2653 		rw_exit(&connp->conn_ilg_lock);
2654 		return (ENOMEM);
2655 	}
2656 
2657 	if (ilg == NULL) {
2658 		if ((ilg = conn_ilg_alloc(connp, &error)) == NULL) {
2659 			rw_exit(&connp->conn_ilg_lock);
2660 			l_free(new_filter);
2661 			return (error);
2662 		}
2663 		ilg->ilg_ifindex = ifindex;
2664 		ilg->ilg_ifaddr = ifaddr;
2665 		if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) {
2666 			ilg->ilg_filter = l_alloc();
2667 			if (ilg->ilg_filter == NULL) {
2668 				ilg_delete(connp, ilg, NULL);
2669 				rw_exit(&connp->conn_ilg_lock);
2670 				l_free(new_filter);
2671 				return (ENOMEM);
2672 			}
2673 			ilg->ilg_filter->sl_numsrc = 1;
2674 			ilg->ilg_filter->sl_addr[0] = *v6src;
2675 		}
2676 		ilgstat = ILGSTAT_NEW;
2677 		ilg->ilg_v6group = *v6group;
2678 		ilg->ilg_fmode = fmode;
2679 		ilg->ilg_ill = ill;
2680 	} else {
2681 		int index;
2682 
2683 		if (ilg->ilg_fmode != fmode || IN6_IS_ADDR_UNSPECIFIED(v6src)) {
2684 			rw_exit(&connp->conn_ilg_lock);
2685 			l_free(new_filter);
2686 			return (EINVAL);
2687 		}
2688 		if (ilg->ilg_filter == NULL) {
2689 			ilg->ilg_filter = l_alloc();
2690 			if (ilg->ilg_filter == NULL) {
2691 				rw_exit(&connp->conn_ilg_lock);
2692 				l_free(new_filter);
2693 				return (ENOMEM);
2694 			}
2695 		}
2696 		if (list_has_addr(ilg->ilg_filter, v6src)) {
2697 			rw_exit(&connp->conn_ilg_lock);
2698 			l_free(new_filter);
2699 			return (EADDRNOTAVAIL);
2700 		}
2701 		ilgstat = ILGSTAT_CHANGE;
2702 		index = ilg->ilg_filter->sl_numsrc++;
2703 		ilg->ilg_filter->sl_addr[index] = *v6src;
2704 	}
2705 
2706 	/*
2707 	 * Save copy of ilg's filter state to pass to other functions,
2708 	 * so we can release conn_ilg_lock now.
2709 	 */
2710 	new_fmode = ilg->ilg_fmode;
2711 	l_copy(ilg->ilg_filter, new_filter);
2712 
2713 	rw_exit(&connp->conn_ilg_lock);
2714 
2715 	/*
2716 	 * Now update the ill. We wait to do this until after the ilg
2717 	 * has been updated because we need to update the src filter
2718 	 * info for the ill, which involves looking at the status of
2719 	 * all the ilgs associated with this group/interface pair.
2720 	 */
2721 	ilm = ip_addmulti_serial(v6group, ill, connp->conn_zoneid, ilgstat,
2722 	    new_fmode, new_filter, &error);
2723 
2724 	rw_enter(&connp->conn_ilg_lock, RW_WRITER);
2725 	/*
2726 	 * Must look up the ilg again since we've not been holding
2727 	 * conn_ilg_lock. The ilg could have disappeared due to an unplumb
2728 	 * having called conn_update_ill, which can run once we dropped the
2729 	 * conn_ilg_lock above.
2730 	 */
2731 	ilg = ilg_lookup(connp, v6group, ifaddr, ifindex);
2732 	if (ilg == NULL) {
2733 		rw_exit(&connp->conn_ilg_lock);
2734 		if (ilm != NULL) {
2735 			(void) ip_delmulti_serial(ilm, B_FALSE,
2736 			    (ilgstat == ILGSTAT_NEW));
2737 		}
2738 		error = ENXIO;
2739 		goto free_and_exit;
2740 	}
2741 	if (ilm != NULL) {
2742 		if (ilg->ilg_ill == NULL) {
2743 			/* some other thread is re-attaching this.  */
2744 			rw_exit(&connp->conn_ilg_lock);
2745 			(void) ip_delmulti_serial(ilm, B_FALSE,
2746 			    (ilgstat == ILGSTAT_NEW));
2747 			error = 0;
2748 			goto free_and_exit;
2749 		}
2750 		/* Succeeded. Update the ilg to point at the ilm */
2751 		if (ilgstat == ILGSTAT_NEW) {
2752 			if (ilg->ilg_ilm == NULL) {
2753 				ilg->ilg_ilm = ilm;
2754 				ilm->ilm_ifaddr = ifaddr; /* For netstat */
2755 			} else {
2756 				/* some other thread is re-attaching this. */
2757 				rw_exit(&connp->conn_ilg_lock);
2758 				(void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE);
2759 				error = 0;
2760 				goto free_and_exit;
2761 			}
2762 		} else {
2763 			/*
2764 			 * ip_addmulti didn't get a held ilm for
2765 			 * ILGSTAT_CHANGE; ilm_refcnt was unchanged.
2766 			 */
2767 			ASSERT(ilg->ilg_ilm == ilm);
2768 		}
2769 	} else {
2770 		ASSERT(error != 0);
2771 		/*
2772 		 * Failed to allocate the ilm.
2773 		 * Need to undo what we did before calling ip_addmulti()
2774 		 * If ENETDOWN just clear ill_ilg since so that we
2775 		 * will rejoin when the ill comes back; don't report ENETDOWN
2776 		 * to application.
2777 		 */
2778 		if (ilgstat == ILGSTAT_NEW && error == ENETDOWN) {
2779 			ilg->ilg_ill = NULL;
2780 			error = 0;
2781 		} else {
2782 			in6_addr_t delsrc =
2783 			    (ilgstat == ILGSTAT_NEW) ? ipv6_all_zeros : *v6src;
2784 
2785 			ilg_delete(connp, ilg, &delsrc);
2786 		}
2787 	}
2788 	rw_exit(&connp->conn_ilg_lock);
2789 
2790 free_and_exit:
2791 	l_free(new_filter);
2792 	return (error);
2793 }
2794 
2795 /*
2796  * Find an IPv4 ilg matching group, ill and source.
2797  * The group and source can't be INADDR_ANY here so no need to translate to
2798  * the unspecified IPv6 address.
2799  */
2800 boolean_t
2801 conn_hasmembers_ill_withsrc_v4(conn_t *connp, ipaddr_t group, ipaddr_t src,
2802     ill_t *ill)
2803 {
2804 	in6_addr_t v6group, v6src;
2805 	int i;
2806 	boolean_t isinlist;
2807 	ilg_t *ilg;
2808 
2809 	rw_enter(&connp->conn_ilg_lock, RW_READER);
2810 	IN6_IPADDR_TO_V4MAPPED(group, &v6group);
2811 	for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) {
2812 		if (ilg->ilg_condemned)
2813 			continue;
2814 
2815 		/* ilg_ill could be NULL if an add is in progress */
2816 		if (ilg->ilg_ill != ill)
2817 			continue;
2818 
2819 		/* The callers use upper ill for IPMP */
2820 		ASSERT(!IS_UNDER_IPMP(ill));
2821 		if (IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, &v6group)) {
2822 			if (SLIST_IS_EMPTY(ilg->ilg_filter)) {
2823 				/* no source filter, so this is a match */
2824 				rw_exit(&connp->conn_ilg_lock);
2825 				return (B_TRUE);
2826 			}
2827 			break;
2828 		}
2829 	}
2830 	if (ilg == NULL) {
2831 		rw_exit(&connp->conn_ilg_lock);
2832 		return (B_FALSE);
2833 	}
2834 
2835 	/*
2836 	 * we have an ilg with matching ill and group; but
2837 	 * the ilg has a source list that we must check.
2838 	 */
2839 	IN6_IPADDR_TO_V4MAPPED(src, &v6src);
2840 	isinlist = B_FALSE;
2841 	for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) {
2842 		if (IN6_ARE_ADDR_EQUAL(&v6src, &ilg->ilg_filter->sl_addr[i])) {
2843 			isinlist = B_TRUE;
2844 			break;
2845 		}
2846 	}
2847 
2848 	if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) ||
2849 	    (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE)) {
2850 		rw_exit(&connp->conn_ilg_lock);
2851 		return (B_TRUE);
2852 	}
2853 	rw_exit(&connp->conn_ilg_lock);
2854 	return (B_FALSE);
2855 }
2856 
2857 /*
2858  * Find an IPv6 ilg matching group, ill, and source
2859  */
2860 boolean_t
2861 conn_hasmembers_ill_withsrc_v6(conn_t *connp, const in6_addr_t *v6group,
2862     const in6_addr_t *v6src, ill_t *ill)
2863 {
2864 	int i;
2865 	boolean_t isinlist;
2866 	ilg_t *ilg;
2867 
2868 	rw_enter(&connp->conn_ilg_lock, RW_READER);
2869 	for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) {
2870 		if (ilg->ilg_condemned)
2871 			continue;
2872 
2873 		/* ilg_ill could be NULL if an add is in progress */
2874 		if (ilg->ilg_ill != ill)
2875 			continue;
2876 
2877 		/* The callers use upper ill for IPMP */
2878 		ASSERT(!IS_UNDER_IPMP(ill));
2879 		if (IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) {
2880 			if (SLIST_IS_EMPTY(ilg->ilg_filter)) {
2881 				/* no source filter, so this is a match */
2882 				rw_exit(&connp->conn_ilg_lock);
2883 				return (B_TRUE);
2884 			}
2885 			break;
2886 		}
2887 	}
2888 	if (ilg == NULL) {
2889 		rw_exit(&connp->conn_ilg_lock);
2890 		return (B_FALSE);
2891 	}
2892 
2893 	/*
2894 	 * we have an ilg with matching ill and group; but
2895 	 * the ilg has a source list that we must check.
2896 	 */
2897 	isinlist = B_FALSE;
2898 	for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) {
2899 		if (IN6_ARE_ADDR_EQUAL(v6src, &ilg->ilg_filter->sl_addr[i])) {
2900 			isinlist = B_TRUE;
2901 			break;
2902 		}
2903 	}
2904 
2905 	if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) ||
2906 	    (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE)) {
2907 		rw_exit(&connp->conn_ilg_lock);
2908 		return (B_TRUE);
2909 	}
2910 	rw_exit(&connp->conn_ilg_lock);
2911 	return (B_FALSE);
2912 }
2913 
2914 /*
2915  * Find an ilg matching group and ifaddr/ifindex.
2916  * We check both ifaddr and ifindex even though at most one of them
2917  * will be non-zero; that way we always find the right one.
2918  */
2919 static ilg_t *
2920 ilg_lookup(conn_t *connp, const in6_addr_t *v6group, ipaddr_t ifaddr,
2921     uint_t ifindex)
2922 {
2923 	ilg_t	*ilg;
2924 
2925 	ASSERT(RW_LOCK_HELD(&connp->conn_ilg_lock));
2926 
2927 	for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) {
2928 		if (ilg->ilg_condemned)
2929 			continue;
2930 
2931 		if (ilg->ilg_ifaddr == ifaddr &&
2932 		    ilg->ilg_ifindex == ifindex &&
2933 		    IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group))
2934 			return (ilg);
2935 	}
2936 	return (NULL);
2937 }
2938 
2939 /*
2940  * If a source address is passed in (src != NULL and src is not
2941  * unspecified), remove the specified src addr from the given ilg's
2942  * filter list, else delete the ilg.
2943  */
2944 static void
2945 ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src)
2946 {
2947 	ASSERT(RW_WRITE_HELD(&connp->conn_ilg_lock));
2948 	ASSERT(ilg->ilg_ptpn != NULL);
2949 	ASSERT(!ilg->ilg_condemned);
2950 
2951 	if (src == NULL || IN6_IS_ADDR_UNSPECIFIED(src)) {
2952 		FREE_SLIST(ilg->ilg_filter);
2953 		ilg->ilg_filter = NULL;
2954 
2955 		ASSERT(ilg->ilg_ilm == NULL);
2956 		ilg->ilg_ill = NULL;
2957 		ilg->ilg_condemned = B_TRUE;
2958 
2959 		/* ilg_inactive will unlink from the list */
2960 		ilg_refrele(ilg);
2961 	} else {
2962 		l_remove(ilg->ilg_filter, src);
2963 	}
2964 }
2965 
2966 /*
2967  * Called from conn close. No new ilg can be added or removed
2968  * because CONN_CLOSING has been set by ip_close. ilg_add / ilg_delete
2969  * will return error if conn has started closing.
2970  *
2971  * We handle locking as follows.
2972  * Under conn_ilg_lock we get the first ilg. As we drop the conn_ilg_lock to
2973  * proceed with the ilm part of the delete we hold a reference on both the ill
2974  * and the ilg. This doesn't prevent changes to the ilg, but prevents it from
2975  * being deleted.
2976  *
2977  * Since the ilg_add code path uses two locks (conn_ilg_lock for the ilg part,
2978  * and ill_mcast_lock for the ip_addmulti part) we can run at a point between
2979  * the two. At that point ilg_ill is set, but ilg_ilm hasn't yet been set. In
2980  * that case we delete the ilg here, which makes ilg_add discover that the ilg
2981  * has disappeared when ip_addmulti returns, so it will discard the ilm it just
2982  * added.
2983  */
2984 void
2985 ilg_delete_all(conn_t *connp)
2986 {
2987 	ilg_t	*ilg, *next_ilg, *held_ilg;
2988 	ilm_t	*ilm;
2989 	ill_t	*ill;
2990 	boolean_t need_refrele;
2991 
2992 	/*
2993 	 * Can not run if there is a conn_update_ill already running.
2994 	 * Wait for it to complete. Caller should have already set CONN_CLOSING
2995 	 * which prevents any new threads to run in conn_update_ill.
2996 	 */
2997 	mutex_enter(&connp->conn_lock);
2998 	ASSERT(connp->conn_state_flags & CONN_CLOSING);
2999 	while (connp->conn_state_flags & CONN_UPDATE_ILL)
3000 		cv_wait(&connp->conn_cv, &connp->conn_lock);
3001 	mutex_exit(&connp->conn_lock);
3002 
3003 	rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3004 	ilg = connp->conn_ilg;
3005 	held_ilg = NULL;
3006 	while (ilg != NULL) {
3007 		if (ilg->ilg_condemned) {
3008 			ilg = ilg->ilg_next;
3009 			continue;
3010 		}
3011 		/* If the ilg is detached then no need to serialize */
3012 		if (ilg->ilg_ilm == NULL) {
3013 			next_ilg = ilg->ilg_next;
3014 			ilg_delete(connp, ilg, NULL);
3015 			ilg = next_ilg;
3016 			continue;
3017 		}
3018 		ill = ilg->ilg_ilm->ilm_ill;
3019 
3020 		/*
3021 		 * In order to serialize on the ill we try to enter
3022 		 * and if that fails we unlock and relock and then
3023 		 * check that we still have an ilm.
3024 		 */
3025 		need_refrele = B_FALSE;
3026 		if (!mutex_tryenter(&ill->ill_mcast_serializer)) {
3027 			ill_refhold(ill);
3028 			need_refrele = B_TRUE;
3029 			ilg_refhold(ilg);
3030 			if (held_ilg != NULL)
3031 				ilg_refrele(held_ilg);
3032 			held_ilg = ilg;
3033 			rw_exit(&connp->conn_ilg_lock);
3034 			mutex_enter(&ill->ill_mcast_serializer);
3035 			rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3036 			if (ilg->ilg_condemned) {
3037 				ilg = ilg->ilg_next;
3038 				goto next;
3039 			}
3040 		}
3041 		ilm = ilg->ilg_ilm;
3042 		ilg->ilg_ilm = NULL;
3043 		next_ilg = ilg->ilg_next;
3044 		ilg_delete(connp, ilg, NULL);
3045 		ilg = next_ilg;
3046 		rw_exit(&connp->conn_ilg_lock);
3047 
3048 		if (ilm != NULL)
3049 			(void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE);
3050 
3051 	next:
3052 		mutex_exit(&ill->ill_mcast_serializer);
3053 		/*
3054 		 * Now that all locks have been dropped, we can send any
3055 		 * deferred/queued DLPI or IP packets
3056 		 */
3057 		ill_mcast_send_queued(ill);
3058 		ill_dlpi_send_queued(ill);
3059 		if (need_refrele) {
3060 			/* Drop ill reference while we hold no locks */
3061 			ill_refrele(ill);
3062 		}
3063 		rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3064 	}
3065 	if (held_ilg != NULL)
3066 		ilg_refrele(held_ilg);
3067 	rw_exit(&connp->conn_ilg_lock);
3068 }
3069 
3070 /*
3071  * Attach the ilg to an ilm on the ill. If it fails we leave ilg_ill as NULL so
3072  * that a subsequent attempt can attach it. Drops and reacquires conn_ilg_lock.
3073  */
3074 static void
3075 ilg_attach(conn_t *connp, ilg_t *ilg, ill_t *ill)
3076 {
3077 	ilg_stat_t	ilgstat;
3078 	slist_t		*new_filter;
3079 	int		new_fmode;
3080 	in6_addr_t	v6group;
3081 	ipaddr_t	ifaddr;
3082 	uint_t		ifindex;
3083 	ilm_t		*ilm;
3084 	int		error = 0;
3085 
3086 	ASSERT(RW_WRITE_HELD(&connp->conn_ilg_lock));
3087 	/*
3088 	 * Alloc buffer to copy new state into (see below) before
3089 	 * we make any changes, so we can bail if it fails.
3090 	 */
3091 	if ((new_filter = l_alloc()) == NULL)
3092 		return;
3093 
3094 	/*
3095 	 * Save copy of ilg's filter state to pass to other functions, so
3096 	 * we can release conn_ilg_lock now.
3097 	 * Set ilg_ill so that an unplumb can find us.
3098 	 */
3099 	new_fmode = ilg->ilg_fmode;
3100 	l_copy(ilg->ilg_filter, new_filter);
3101 	v6group = ilg->ilg_v6group;
3102 	ifaddr = ilg->ilg_ifaddr;
3103 	ifindex = ilg->ilg_ifindex;
3104 	ilgstat = ILGSTAT_NEW;
3105 
3106 	ilg->ilg_ill = ill;
3107 	ASSERT(ilg->ilg_ilm == NULL);
3108 	rw_exit(&connp->conn_ilg_lock);
3109 
3110 	ilm = ip_addmulti_serial(&v6group, ill, connp->conn_zoneid, ilgstat,
3111 	    new_fmode, new_filter, &error);
3112 	l_free(new_filter);
3113 
3114 	rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3115 	/*
3116 	 * Must look up the ilg again since we've not been holding
3117 	 * conn_ilg_lock. The ilg could have disappeared due to an unplumb
3118 	 * having called conn_update_ill, which can run once we dropped the
3119 	 * conn_ilg_lock above. Alternatively, the ilg could have been attached
3120 	 * when the lock was dropped
3121 	 */
3122 	ilg = ilg_lookup(connp, &v6group, ifaddr, ifindex);
3123 	if (ilg == NULL || ilg->ilg_ilm != NULL) {
3124 		if (ilm != NULL) {
3125 			rw_exit(&connp->conn_ilg_lock);
3126 			(void) ip_delmulti_serial(ilm, B_FALSE,
3127 			    (ilgstat == ILGSTAT_NEW));
3128 			rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3129 		}
3130 		return;
3131 	}
3132 	if (ilm == NULL) {
3133 		ilg->ilg_ill = NULL;
3134 		return;
3135 	}
3136 	ilg->ilg_ilm = ilm;
3137 	ilm->ilm_ifaddr = ifaddr;	/* For netstat */
3138 }
3139 
3140 /*
3141  * Called when an ill is unplumbed to make sure that there are no
3142  * dangling conn references to that ill. In that case ill is non-NULL and
3143  * we make sure we remove all references to it.
3144  * Also called when we should revisit the ilg_ill used for multicast
3145  * memberships, in which case ill is NULL.
3146  *
3147  * conn is held by caller.
3148  *
3149  * Note that ipcl_walk only walks conns that are not yet condemned.
3150  * condemned conns can't be refheld. For this reason, conn must become clean
3151  * first, i.e. it must not refer to any ill/ire and then only set
3152  * condemned flag.
3153  *
3154  * We leave ixa_multicast_ifindex in place. We prefer dropping
3155  * packets instead of sending them out the wrong interface.
3156  *
3157  * We keep the ilg around in a detached state (with ilg_ill and ilg_ilm being
3158  * NULL) so that the application can leave it later. Also, if ilg_ifaddr and
3159  * ilg_ifindex are zero, indicating that the system should pick the interface,
3160  * then we attempt to reselect the ill and join on it.
3161  *
3162  * Locking notes:
3163  * Under conn_ilg_lock we get the first ilg. As we drop the conn_ilg_lock to
3164  * proceed with the ilm part of the delete we hold a reference on both the ill
3165  * and the ilg. This doesn't prevent changes to the ilg, but prevents it from
3166  * being deleted.
3167  *
3168  * Note: if this function is called when new ill/ipif's arrive or change status
3169  * (SIOCSLIFINDEX, SIOCSLIFADDR) then we will attempt to attach any ilgs with
3170  * a NULL ilg_ill to an ill/ilm.
3171  */
3172 static void
3173 conn_update_ill(conn_t *connp, caddr_t arg)
3174 {
3175 	ill_t	*ill = (ill_t *)arg;
3176 
3177 	/*
3178 	 * We have to prevent ip_close/ilg_delete_all from running at
3179 	 * the same time. ip_close sets CONN_CLOSING before doing the ilg_delete
3180 	 * all, and we set CONN_UPDATE_ILL. That ensures that only one of
3181 	 * ilg_delete_all and conn_update_ill run at a time for a given conn.
3182 	 * If ilg_delete_all got here first, then we have nothing to do.
3183 	 */
3184 	mutex_enter(&connp->conn_lock);
3185 	if (connp->conn_state_flags & (CONN_CLOSING|CONN_UPDATE_ILL)) {
3186 		/* Caller has to wait for ill_ilm_cnt to drop to zero */
3187 		mutex_exit(&connp->conn_lock);
3188 		return;
3189 	}
3190 	connp->conn_state_flags |= CONN_UPDATE_ILL;
3191 	mutex_exit(&connp->conn_lock);
3192 
3193 	if (ill != NULL)
3194 		ilg_check_detach(connp, ill);
3195 
3196 	ilg_check_reattach(connp, ill);
3197 
3198 	/* Do we need to wake up a thread in ilg_delete_all? */
3199 	mutex_enter(&connp->conn_lock);
3200 	connp->conn_state_flags &= ~CONN_UPDATE_ILL;
3201 	if (connp->conn_state_flags & CONN_CLOSING)
3202 		cv_broadcast(&connp->conn_cv);
3203 	mutex_exit(&connp->conn_lock);
3204 }
3205 
3206 /* Detach from an ill that is going away */
3207 static void
3208 ilg_check_detach(conn_t *connp, ill_t *ill)
3209 {
3210 	char	group_buf[INET6_ADDRSTRLEN];
3211 	ilg_t	*ilg, *held_ilg;
3212 	ilm_t	*ilm;
3213 
3214 	mutex_enter(&ill->ill_mcast_serializer);
3215 	rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3216 	held_ilg = NULL;
3217 	for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) {
3218 		if (ilg->ilg_condemned)
3219 			continue;
3220 
3221 		if (ilg->ilg_ill != ill)
3222 			continue;
3223 
3224 		/* Detach from current ill */
3225 		ip1dbg(("ilg_check_detach: detach %s on %s\n",
3226 		    inet_ntop(AF_INET6, &ilg->ilg_v6group,
3227 		    group_buf, sizeof (group_buf)),
3228 		    ilg->ilg_ill->ill_name));
3229 
3230 		/* Detach this ilg from the ill/ilm */
3231 		ilm = ilg->ilg_ilm;
3232 		ilg->ilg_ilm = NULL;
3233 		ilg->ilg_ill = NULL;
3234 		if (ilm == NULL)
3235 			continue;
3236 
3237 		/* Prevent ilg from disappearing */
3238 		ilg_transfer_hold(held_ilg, ilg);
3239 		held_ilg = ilg;
3240 		rw_exit(&connp->conn_ilg_lock);
3241 
3242 		(void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE);
3243 		rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3244 	}
3245 	if (held_ilg != NULL)
3246 		ilg_refrele(held_ilg);
3247 	rw_exit(&connp->conn_ilg_lock);
3248 	mutex_exit(&ill->ill_mcast_serializer);
3249 	/*
3250 	 * Now that all locks have been dropped, we can send any
3251 	 * deferred/queued DLPI or IP packets
3252 	 */
3253 	ill_mcast_send_queued(ill);
3254 	ill_dlpi_send_queued(ill);
3255 }
3256 
3257 /*
3258  * Check if there is a place to attach the conn_ilgs. We do this for both
3259  * detached ilgs and attached ones, since for the latter there could be
3260  * a better ill to attach them to. oill is non-null if we just detached from
3261  * that ill.
3262  */
3263 static void
3264 ilg_check_reattach(conn_t *connp, ill_t *oill)
3265 {
3266 	ill_t	*ill;
3267 	char	group_buf[INET6_ADDRSTRLEN];
3268 	ilg_t	*ilg, *held_ilg;
3269 	ilm_t	*ilm;
3270 	zoneid_t zoneid = IPCL_ZONEID(connp);
3271 	int	error;
3272 	ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
3273 
3274 	rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3275 	held_ilg = NULL;
3276 	for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) {
3277 		if (ilg->ilg_condemned)
3278 			continue;
3279 
3280 		/* Check if the conn_ill matches what we would pick now */
3281 		ill = ill_mcast_lookup(&ilg->ilg_v6group, ilg->ilg_ifaddr,
3282 		    ilg->ilg_ifindex, zoneid, ipst, &error);
3283 
3284 		/*
3285 		 * Make sure the ill is usable for multicast and that
3286 		 * we can send the DL_ADDMULTI_REQ before we create an
3287 		 * ilm.
3288 		 */
3289 		if (ill != NULL &&
3290 		    (!(ill->ill_flags & ILLF_MULTICAST) || !ill->ill_dl_up)) {
3291 			/* Drop locks across ill_refrele */
3292 			ilg_transfer_hold(held_ilg, ilg);
3293 			held_ilg = ilg;
3294 			rw_exit(&connp->conn_ilg_lock);
3295 			ill_refrele(ill);
3296 			ill = NULL;
3297 			rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3298 			/* Note that ilg could have become condemned */
3299 		}
3300 
3301 		/*
3302 		 * Is the ill unchanged, even if both are NULL?
3303 		 * Did we just detach from that ill?
3304 		 */
3305 		if (ill == ilg->ilg_ill || (ill != NULL && ill == oill)) {
3306 			if (ill != NULL) {
3307 				/* Drop locks across ill_refrele */
3308 				ilg_transfer_hold(held_ilg, ilg);
3309 				held_ilg = ilg;
3310 				rw_exit(&connp->conn_ilg_lock);
3311 				ill_refrele(ill);
3312 				rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3313 			}
3314 			continue;
3315 		}
3316 
3317 		/* Something changed; detach from old first if needed */
3318 		if (ilg->ilg_ill != NULL) {
3319 			ill_t *ill2 = ilg->ilg_ill;
3320 			boolean_t need_refrele = B_FALSE;
3321 
3322 			/*
3323 			 * In order to serialize on the ill we try to enter
3324 			 * and if that fails we unlock and relock.
3325 			 */
3326 			if (!mutex_tryenter(&ill2->ill_mcast_serializer)) {
3327 				ill_refhold(ill2);
3328 				need_refrele = B_TRUE;
3329 				ilg_transfer_hold(held_ilg, ilg);
3330 				held_ilg = ilg;
3331 				rw_exit(&connp->conn_ilg_lock);
3332 				mutex_enter(&ill2->ill_mcast_serializer);
3333 				rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3334 				/* Note that ilg could have become condemned */
3335 			}
3336 			/*
3337 			 * Check that nobody else re-attached the ilg while we
3338 			 * dropped the lock.
3339 			 */
3340 			if (ilg->ilg_ill == ill2) {
3341 				ASSERT(!ilg->ilg_condemned);
3342 				/* Detach from current ill */
3343 				ip1dbg(("conn_check_reattach: detach %s/%s\n",
3344 				    inet_ntop(AF_INET6, &ilg->ilg_v6group,
3345 				    group_buf, sizeof (group_buf)),
3346 				    ill2->ill_name));
3347 
3348 				ilm = ilg->ilg_ilm;
3349 				ilg->ilg_ilm = NULL;
3350 				ilg->ilg_ill = NULL;
3351 			} else {
3352 				ilm = NULL;
3353 			}
3354 			ilg_transfer_hold(held_ilg, ilg);
3355 			held_ilg = ilg;
3356 			rw_exit(&connp->conn_ilg_lock);
3357 			if (ilm != NULL)
3358 				(void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE);
3359 			mutex_exit(&ill2->ill_mcast_serializer);
3360 			/*
3361 			 * Now that all locks have been dropped, we can send any
3362 			 * deferred/queued DLPI or IP packets
3363 			 */
3364 			ill_mcast_send_queued(ill2);
3365 			ill_dlpi_send_queued(ill2);
3366 			if (need_refrele) {
3367 				/* Drop ill reference while we hold no locks */
3368 				ill_refrele(ill2);
3369 			}
3370 			rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3371 			/*
3372 			 * While we dropped conn_ilg_lock some other thread
3373 			 * could have attached this ilg, thus we check again.
3374 			 */
3375 			if (ilg->ilg_ill != NULL) {
3376 				if (ill != NULL) {
3377 					/* Drop locks across ill_refrele */
3378 					ilg_transfer_hold(held_ilg, ilg);
3379 					held_ilg = ilg;
3380 					rw_exit(&connp->conn_ilg_lock);
3381 					ill_refrele(ill);
3382 					rw_enter(&connp->conn_ilg_lock,
3383 					    RW_WRITER);
3384 				}
3385 				continue;
3386 			}
3387 		}
3388 		if (ill != NULL) {
3389 			/*
3390 			 * In order to serialize on the ill we try to enter
3391 			 * and if that fails we unlock and relock.
3392 			 */
3393 			if (!mutex_tryenter(&ill->ill_mcast_serializer)) {
3394 				/* Already have a refhold on ill */
3395 				ilg_transfer_hold(held_ilg, ilg);
3396 				held_ilg = ilg;
3397 				rw_exit(&connp->conn_ilg_lock);
3398 				mutex_enter(&ill->ill_mcast_serializer);
3399 				rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3400 				/* Note that ilg could have become condemned */
3401 			}
3402 			ilg_transfer_hold(held_ilg, ilg);
3403 			held_ilg = ilg;
3404 			/*
3405 			 * Check that nobody else attached the ilg and that
3406 			 * it wasn't condemned while we dropped the lock.
3407 			 */
3408 			if (ilg->ilg_ill == NULL && !ilg->ilg_condemned) {
3409 				/*
3410 				 * Attach to the new ill. Can fail in which
3411 				 * case ilg_ill will remain NULL. ilg_attach
3412 				 * drops and reacquires conn_ilg_lock.
3413 				 */
3414 				ip1dbg(("conn_check_reattach: attach %s/%s\n",
3415 				    inet_ntop(AF_INET6, &ilg->ilg_v6group,
3416 				    group_buf, sizeof (group_buf)),
3417 				    ill->ill_name));
3418 				ilg_attach(connp, ilg, ill);
3419 				ASSERT(RW_WRITE_HELD(&connp->conn_ilg_lock));
3420 			}
3421 			/* Drop locks across ill_refrele */
3422 			rw_exit(&connp->conn_ilg_lock);
3423 			mutex_exit(&ill->ill_mcast_serializer);
3424 			/*
3425 			 * Now that all locks have been
3426 			 * dropped, we can send any
3427 			 * deferred/queued DLPI or IP packets
3428 			 */
3429 			ill_mcast_send_queued(ill);
3430 			ill_dlpi_send_queued(ill);
3431 			ill_refrele(ill);
3432 			rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3433 		}
3434 	}
3435 	if (held_ilg != NULL)
3436 		ilg_refrele(held_ilg);
3437 	rw_exit(&connp->conn_ilg_lock);
3438 }
3439 
3440 /*
3441  * Called when an ill is unplumbed to make sure that there are no
3442  * dangling conn references to that ill. In that case ill is non-NULL and
3443  * we make sure we remove all references to it.
3444  * Also called when we should revisit the ilg_ill used for multicast
3445  * memberships, in which case ill is NULL.
3446  */
3447 void
3448 update_conn_ill(ill_t *ill, ip_stack_t *ipst)
3449 {
3450 	ipcl_walk(conn_update_ill, (caddr_t)ill, ipst);
3451 }
3452