1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright (c) 1990 Mentat Inc.
24 */
25
26 #include <sys/types.h>
27 #include <sys/stream.h>
28 #include <sys/dlpi.h>
29 #include <sys/stropts.h>
30 #include <sys/strsun.h>
31 #include <sys/ddi.h>
32 #include <sys/cmn_err.h>
33 #include <sys/sdt.h>
34 #include <sys/zone.h>
35
36 #include <sys/param.h>
37 #include <sys/socket.h>
38 #include <sys/sockio.h>
39 #include <net/if.h>
40 #include <sys/systm.h>
41 #include <sys/strsubr.h>
42 #include <net/route.h>
43 #include <netinet/in.h>
44 #include <net/if_dl.h>
45 #include <netinet/ip6.h>
46 #include <netinet/icmp6.h>
47
48 #include <inet/common.h>
49 #include <inet/mi.h>
50 #include <inet/nd.h>
51 #include <inet/arp.h>
52 #include <inet/ip.h>
53 #include <inet/ip6.h>
54 #include <inet/ip_if.h>
55 #include <inet/ip_ndp.h>
56 #include <inet/ip_multi.h>
57 #include <inet/ipclassifier.h>
58 #include <inet/ipsec_impl.h>
59 #include <inet/sctp_ip.h>
60 #include <inet/ip_listutils.h>
61 #include <inet/udp_impl.h>
62
63 /* igmpv3/mldv2 source filter manipulation */
64 static void ilm_bld_flists(conn_t *conn, void *arg);
65 static void ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode,
66 slist_t *flist);
67
68 static ilm_t *ilm_add(ill_t *ill, const in6_addr_t *group,
69 ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist,
70 zoneid_t zoneid);
71 static void ilm_delete(ilm_t *ilm);
72 static int ilm_numentries(ill_t *, const in6_addr_t *);
73
74 static ilm_t *ip_addmulti_serial(const in6_addr_t *, ill_t *, zoneid_t,
75 ilg_stat_t, mcast_record_t, slist_t *, int *);
76 static ilm_t *ip_addmulti_impl(const in6_addr_t *, ill_t *,
77 zoneid_t, ilg_stat_t, mcast_record_t, slist_t *, int *);
78 static int ip_delmulti_serial(ilm_t *, boolean_t, boolean_t);
79 static int ip_delmulti_impl(ilm_t *, boolean_t, boolean_t);
80
81 static int ip_ll_multireq(ill_t *ill, const in6_addr_t *group,
82 t_uscalar_t);
83 static ilg_t *ilg_lookup(conn_t *, const in6_addr_t *, ipaddr_t ifaddr,
84 uint_t ifindex);
85
86 static int ilg_add(conn_t *connp, const in6_addr_t *group,
87 ipaddr_t ifaddr, uint_t ifindex, ill_t *ill, mcast_record_t fmode,
88 const in6_addr_t *v6src);
89 static void ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src);
90 static mblk_t *ill_create_dl(ill_t *ill, uint32_t dl_primitive,
91 uint32_t *addr_lenp, uint32_t *addr_offp);
92 static int ip_opt_delete_group_excl(conn_t *connp,
93 const in6_addr_t *v6group, ipaddr_t ifaddr, uint_t ifindex,
94 mcast_record_t fmode, const in6_addr_t *v6src);
95
96 static ilm_t *ilm_lookup(ill_t *, const in6_addr_t *, zoneid_t);
97
98 static int ip_msfilter_ill(conn_t *, mblk_t *, const ip_ioctl_cmd_t *,
99 ill_t **);
100
101 static void ilg_check_detach(conn_t *, ill_t *);
102 static void ilg_check_reattach(conn_t *, ill_t *);
103
104 /*
105 * MT notes:
106 *
107 * Multicast joins operate on both the ilg and ilm structures. Multiple
108 * threads operating on an conn (socket) trying to do multicast joins
109 * need to synchronize when operating on the ilg. Multiple threads
110 * potentially operating on different conn (socket endpoints) trying to
111 * do multicast joins could eventually end up trying to manipulate the
112 * ilm simulatenously and need to synchronize on the access to the ilm.
113 * The access and lookup of the ilm, as well as other ill multicast state,
114 * is under ill_mcast_lock.
115 * The modifications and lookup of ilg entries is serialized using conn_ilg_lock
116 * rwlock. An ilg will not be freed until ilg_refcnt drops to zero.
117 *
118 * In some cases we hold ill_mcast_lock and then acquire conn_ilg_lock, but
119 * never the other way around.
120 *
121 * An ilm is an IP data structure used to track multicast join/leave.
122 * An ilm is associated with a <multicast group, ipif> tuple in IPv4 and
123 * with just <multicast group> in IPv6. ilm_refcnt is the number of ilg's
124 * referencing the ilm.
125 * The modifications and lookup of ilm entries is serialized using the
126 * ill_mcast_lock rwlock; that lock handles all the igmp/mld modifications
127 * of the ilm state.
128 * ilms are created / destroyed only as writer. ilms
129 * are not passed around. The datapath (anything outside of this file
130 * and igmp.c) use functions that do not return ilms - just the number
131 * of members. So we don't need a dynamic refcount of the number
132 * of threads holding reference to an ilm.
133 *
134 * In the cases where we serially access the ilg and ilm, which happens when
135 * we handle the applications requests to join or leave groups and sources,
136 * we use the ill_mcast_serializer mutex to ensure that a multithreaded
137 * application which does concurrent joins and/or leaves on the same group on
138 * the same socket always results in a consistent order for the ilg and ilm
139 * modifications.
140 *
141 * When a multicast operation results in needing to send a message to
142 * the driver (to join/leave a L2 multicast address), we use ill_dlpi_queue()
143 * which serialized the DLPI requests. The IGMP/MLD code uses ill_mcast_queue()
144 * to send IGMP/MLD IP packet to avoid dropping the lock just to send a packet.
145 */
146
147 #define GETSTRUCT(structure, number) \
148 ((structure *)mi_zalloc(sizeof (structure) * (number)))
149
150 /*
151 * Caller must ensure that the ilg has not been condemned
152 * The condemned flag is only set in ilg_delete under conn_ilg_lock.
153 *
154 * The caller must hold conn_ilg_lock as writer.
155 */
156 static void
ilg_refhold(ilg_t * ilg)157 ilg_refhold(ilg_t *ilg)
158 {
159 ASSERT(ilg->ilg_refcnt != 0);
160 ASSERT(!ilg->ilg_condemned);
161 ASSERT(RW_WRITE_HELD(&ilg->ilg_connp->conn_ilg_lock));
162
163 ilg->ilg_refcnt++;
164 }
165
166 static void
ilg_inactive(ilg_t * ilg)167 ilg_inactive(ilg_t *ilg)
168 {
169 ASSERT(ilg->ilg_ill == NULL);
170 ASSERT(ilg->ilg_ilm == NULL);
171 ASSERT(ilg->ilg_filter == NULL);
172 ASSERT(ilg->ilg_condemned);
173
174 /* Unlink from list */
175 *ilg->ilg_ptpn = ilg->ilg_next;
176 if (ilg->ilg_next != NULL)
177 ilg->ilg_next->ilg_ptpn = ilg->ilg_ptpn;
178 ilg->ilg_next = NULL;
179 ilg->ilg_ptpn = NULL;
180
181 ilg->ilg_connp = NULL;
182 kmem_free(ilg, sizeof (*ilg));
183 }
184
185 /*
186 * The caller must hold conn_ilg_lock as writer.
187 */
188 static void
ilg_refrele(ilg_t * ilg)189 ilg_refrele(ilg_t *ilg)
190 {
191 ASSERT(RW_WRITE_HELD(&ilg->ilg_connp->conn_ilg_lock));
192 ASSERT(ilg->ilg_refcnt != 0);
193 if (--ilg->ilg_refcnt == 0)
194 ilg_inactive(ilg);
195 }
196
197 /*
198 * Acquire reference on ilg and drop reference on held_ilg.
199 * In the case when held_ilg is the same as ilg we already have
200 * a reference, but the held_ilg might be condemned. In that case
201 * we avoid the ilg_refhold/rele so that we can assert in ire_refhold
202 * that the ilg isn't condemned.
203 */
204 static void
ilg_transfer_hold(ilg_t * held_ilg,ilg_t * ilg)205 ilg_transfer_hold(ilg_t *held_ilg, ilg_t *ilg)
206 {
207 if (held_ilg == ilg)
208 return;
209
210 ilg_refhold(ilg);
211 if (held_ilg != NULL)
212 ilg_refrele(held_ilg);
213 }
214
215 /*
216 * Allocate a new ilg_t and links it into conn_ilg.
217 * Returns NULL on failure, in which case `*errp' will be
218 * filled in with the reason.
219 *
220 * Assumes connp->conn_ilg_lock is held.
221 */
222 static ilg_t *
conn_ilg_alloc(conn_t * connp,int * errp)223 conn_ilg_alloc(conn_t *connp, int *errp)
224 {
225 ilg_t *ilg;
226
227 ASSERT(RW_WRITE_HELD(&connp->conn_ilg_lock));
228
229 /*
230 * If CONN_CLOSING is set, conn_ilg cleanup has begun and we must not
231 * create any ilgs.
232 */
233 if (connp->conn_state_flags & CONN_CLOSING) {
234 *errp = EINVAL;
235 return (NULL);
236 }
237
238 ilg = kmem_zalloc(sizeof (ilg_t), KM_NOSLEEP);
239 if (ilg == NULL) {
240 *errp = ENOMEM;
241 return (NULL);
242 }
243
244 ilg->ilg_refcnt = 1;
245
246 /* Insert at head */
247 if (connp->conn_ilg != NULL)
248 connp->conn_ilg->ilg_ptpn = &ilg->ilg_next;
249 ilg->ilg_next = connp->conn_ilg;
250 ilg->ilg_ptpn = &connp->conn_ilg;
251 connp->conn_ilg = ilg;
252
253 ilg->ilg_connp = connp;
254 return (ilg);
255 }
256
257 typedef struct ilm_fbld_s {
258 ilm_t *fbld_ilm;
259 int fbld_in_cnt;
260 int fbld_ex_cnt;
261 slist_t fbld_in;
262 slist_t fbld_ex;
263 boolean_t fbld_in_overflow;
264 } ilm_fbld_t;
265
266 /*
267 * Caller must hold ill_mcast_lock
268 */
269 static void
ilm_bld_flists(conn_t * connp,void * arg)270 ilm_bld_flists(conn_t *connp, void *arg)
271 {
272 ilg_t *ilg;
273 ilm_fbld_t *fbld = (ilm_fbld_t *)(arg);
274 ilm_t *ilm = fbld->fbld_ilm;
275 in6_addr_t *v6group = &ilm->ilm_v6addr;
276
277 if (connp->conn_ilg == NULL)
278 return;
279
280 /*
281 * Since we can't break out of the ipcl_walk once started, we still
282 * have to look at every conn. But if we've already found one
283 * (EXCLUDE, NULL) list, there's no need to keep checking individual
284 * ilgs--that will be our state.
285 */
286 if (fbld->fbld_ex_cnt > 0 && fbld->fbld_ex.sl_numsrc == 0)
287 return;
288
289 /*
290 * Check this conn's ilgs to see if any are interested in our
291 * ilm (group, interface match). If so, update the master
292 * include and exclude lists we're building in the fbld struct
293 * with this ilg's filter info.
294 *
295 * Note that the caller has already serialized on the ill we care
296 * about.
297 */
298 ASSERT(MUTEX_HELD(&ilm->ilm_ill->ill_mcast_serializer));
299
300 rw_enter(&connp->conn_ilg_lock, RW_READER);
301 for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) {
302 if (ilg->ilg_condemned)
303 continue;
304
305 /*
306 * Since we are under the ill_mcast_serializer we know
307 * that any ilg+ilm operations on this ilm have either
308 * not started or completed, except for the last ilg
309 * (the one that caused us to be called) which doesn't
310 * have ilg_ilm set yet. Hence we compare using ilg_ill
311 * and the address.
312 */
313 if ((ilg->ilg_ill == ilm->ilm_ill) &&
314 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) {
315 if (ilg->ilg_fmode == MODE_IS_INCLUDE) {
316 fbld->fbld_in_cnt++;
317 if (!fbld->fbld_in_overflow)
318 l_union_in_a(&fbld->fbld_in,
319 ilg->ilg_filter,
320 &fbld->fbld_in_overflow);
321 } else {
322 fbld->fbld_ex_cnt++;
323 /*
324 * On the first exclude list, don't try to do
325 * an intersection, as the master exclude list
326 * is intentionally empty. If the master list
327 * is still empty on later iterations, that
328 * means we have at least one ilg with an empty
329 * exclude list, so that should be reflected
330 * when we take the intersection.
331 */
332 if (fbld->fbld_ex_cnt == 1) {
333 if (ilg->ilg_filter != NULL)
334 l_copy(ilg->ilg_filter,
335 &fbld->fbld_ex);
336 } else {
337 l_intersection_in_a(&fbld->fbld_ex,
338 ilg->ilg_filter);
339 }
340 }
341 /* there will only be one match, so break now. */
342 break;
343 }
344 }
345 rw_exit(&connp->conn_ilg_lock);
346 }
347
348 /*
349 * Caller must hold ill_mcast_lock
350 */
351 static void
ilm_gen_filter(ilm_t * ilm,mcast_record_t * fmode,slist_t * flist)352 ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, slist_t *flist)
353 {
354 ilm_fbld_t fbld;
355 ip_stack_t *ipst = ilm->ilm_ipst;
356
357 fbld.fbld_ilm = ilm;
358 fbld.fbld_in_cnt = fbld.fbld_ex_cnt = 0;
359 fbld.fbld_in.sl_numsrc = fbld.fbld_ex.sl_numsrc = 0;
360 fbld.fbld_in_overflow = B_FALSE;
361
362 /* first, construct our master include and exclude lists */
363 ipcl_walk(ilm_bld_flists, (caddr_t)&fbld, ipst);
364
365 /* now use those master lists to generate the interface filter */
366
367 /* if include list overflowed, filter is (EXCLUDE, NULL) */
368 if (fbld.fbld_in_overflow) {
369 *fmode = MODE_IS_EXCLUDE;
370 flist->sl_numsrc = 0;
371 return;
372 }
373
374 /* if nobody interested, interface filter is (INCLUDE, NULL) */
375 if (fbld.fbld_in_cnt == 0 && fbld.fbld_ex_cnt == 0) {
376 *fmode = MODE_IS_INCLUDE;
377 flist->sl_numsrc = 0;
378 return;
379 }
380
381 /*
382 * If there are no exclude lists, then the interface filter
383 * is INCLUDE, with its filter list equal to fbld_in. A single
384 * exclude list makes the interface filter EXCLUDE, with its
385 * filter list equal to (fbld_ex - fbld_in).
386 */
387 if (fbld.fbld_ex_cnt == 0) {
388 *fmode = MODE_IS_INCLUDE;
389 l_copy(&fbld.fbld_in, flist);
390 } else {
391 *fmode = MODE_IS_EXCLUDE;
392 l_difference(&fbld.fbld_ex, &fbld.fbld_in, flist);
393 }
394 }
395
396 /*
397 * Caller must hold ill_mcast_lock
398 */
399 static int
ilm_update_add(ilm_t * ilm,ilg_stat_t ilgstat,slist_t * ilg_flist)400 ilm_update_add(ilm_t *ilm, ilg_stat_t ilgstat, slist_t *ilg_flist)
401 {
402 mcast_record_t fmode;
403 slist_t *flist;
404 boolean_t fdefault;
405 char buf[INET6_ADDRSTRLEN];
406 ill_t *ill = ilm->ilm_ill;
407
408 /*
409 * There are several cases where the ilm's filter state
410 * defaults to (EXCLUDE, NULL):
411 * - we've had previous joins without associated ilgs
412 * - this join has no associated ilg
413 * - the ilg's filter state is (EXCLUDE, NULL)
414 */
415 fdefault = (ilm->ilm_no_ilg_cnt > 0) ||
416 (ilgstat == ILGSTAT_NONE) || SLIST_IS_EMPTY(ilg_flist);
417
418 /* attempt mallocs (if needed) before doing anything else */
419 if ((flist = l_alloc()) == NULL)
420 return (ENOMEM);
421 if (!fdefault && ilm->ilm_filter == NULL) {
422 ilm->ilm_filter = l_alloc();
423 if (ilm->ilm_filter == NULL) {
424 l_free(flist);
425 return (ENOMEM);
426 }
427 }
428
429 if (ilgstat != ILGSTAT_CHANGE)
430 ilm->ilm_refcnt++;
431
432 if (ilgstat == ILGSTAT_NONE)
433 ilm->ilm_no_ilg_cnt++;
434
435 /*
436 * Determine new filter state. If it's not the default
437 * (EXCLUDE, NULL), we must walk the conn list to find
438 * any ilgs interested in this group, and re-build the
439 * ilm filter.
440 */
441 if (fdefault) {
442 fmode = MODE_IS_EXCLUDE;
443 flist->sl_numsrc = 0;
444 } else {
445 ilm_gen_filter(ilm, &fmode, flist);
446 }
447
448 /* make sure state actually changed; nothing to do if not. */
449 if ((ilm->ilm_fmode == fmode) &&
450 !lists_are_different(ilm->ilm_filter, flist)) {
451 l_free(flist);
452 return (0);
453 }
454
455 /* send the state change report */
456 if (!IS_LOOPBACK(ill)) {
457 if (ill->ill_isv6)
458 mld_statechange(ilm, fmode, flist);
459 else
460 igmp_statechange(ilm, fmode, flist);
461 }
462
463 /* update the ilm state */
464 ilm->ilm_fmode = fmode;
465 if (flist->sl_numsrc > 0)
466 l_copy(flist, ilm->ilm_filter);
467 else
468 CLEAR_SLIST(ilm->ilm_filter);
469
470 ip1dbg(("ilm_update: new if filter mode %d, group %s\n", ilm->ilm_fmode,
471 inet_ntop(AF_INET6, &ilm->ilm_v6addr, buf, sizeof (buf))));
472
473 l_free(flist);
474 return (0);
475 }
476
477 /*
478 * Caller must hold ill_mcast_lock
479 */
480 static int
ilm_update_del(ilm_t * ilm)481 ilm_update_del(ilm_t *ilm)
482 {
483 mcast_record_t fmode;
484 slist_t *flist;
485 ill_t *ill = ilm->ilm_ill;
486
487 ip1dbg(("ilm_update_del: still %d left; updating state\n",
488 ilm->ilm_refcnt));
489
490 if ((flist = l_alloc()) == NULL)
491 return (ENOMEM);
492
493 /*
494 * If present, the ilg in question has already either been
495 * updated or removed from our list; so all we need to do
496 * now is walk the list to update the ilm filter state.
497 *
498 * Skip the list walk if we have any no-ilg joins, which
499 * cause the filter state to revert to (EXCLUDE, NULL).
500 */
501 if (ilm->ilm_no_ilg_cnt != 0) {
502 fmode = MODE_IS_EXCLUDE;
503 flist->sl_numsrc = 0;
504 } else {
505 ilm_gen_filter(ilm, &fmode, flist);
506 }
507
508 /* check to see if state needs to be updated */
509 if ((ilm->ilm_fmode == fmode) &&
510 (!lists_are_different(ilm->ilm_filter, flist))) {
511 l_free(flist);
512 return (0);
513 }
514
515 if (!IS_LOOPBACK(ill)) {
516 if (ill->ill_isv6)
517 mld_statechange(ilm, fmode, flist);
518 else
519 igmp_statechange(ilm, fmode, flist);
520 }
521
522 ilm->ilm_fmode = fmode;
523 if (flist->sl_numsrc > 0) {
524 if (ilm->ilm_filter == NULL) {
525 ilm->ilm_filter = l_alloc();
526 if (ilm->ilm_filter == NULL) {
527 char buf[INET6_ADDRSTRLEN];
528 ip1dbg(("ilm_update_del: failed to alloc ilm "
529 "filter; no source filtering for %s on %s",
530 inet_ntop(AF_INET6, &ilm->ilm_v6addr,
531 buf, sizeof (buf)), ill->ill_name));
532 ilm->ilm_fmode = MODE_IS_EXCLUDE;
533 l_free(flist);
534 return (0);
535 }
536 }
537 l_copy(flist, ilm->ilm_filter);
538 } else {
539 CLEAR_SLIST(ilm->ilm_filter);
540 }
541
542 l_free(flist);
543 return (0);
544 }
545
546 /*
547 * Create/update the ilm for the group/ill. Used by other parts of IP to
548 * do the ILGSTAT_NONE (no ilg), MODE_IS_EXCLUDE, with no slist join.
549 * Returns with a refhold on the ilm.
550 *
551 * The unspecified address means all multicast addresses for in both the
552 * case of IPv4 and IPv6.
553 *
554 * The caller should have already mapped an IPMP under ill to the upper.
555 */
556 ilm_t *
ip_addmulti(const in6_addr_t * v6group,ill_t * ill,zoneid_t zoneid,int * errorp)557 ip_addmulti(const in6_addr_t *v6group, ill_t *ill, zoneid_t zoneid,
558 int *errorp)
559 {
560 ilm_t *ilm;
561
562 /* Acquire serializer to keep assert in ilm_bld_flists happy */
563 mutex_enter(&ill->ill_mcast_serializer);
564 ilm = ip_addmulti_serial(v6group, ill, zoneid, ILGSTAT_NONE,
565 MODE_IS_EXCLUDE, NULL, errorp);
566 mutex_exit(&ill->ill_mcast_serializer);
567 /*
568 * Now that all locks have been dropped, we can send any
569 * deferred/queued DLPI or IP packets
570 */
571 ill_mcast_send_queued(ill);
572 ill_dlpi_send_queued(ill);
573 return (ilm);
574 }
575
576 /*
577 * Create/update the ilm for the group/ill. If ILGSTAT_CHANGE is not set
578 * then this returns with a refhold on the ilm.
579 *
580 * Internal routine which assumes the caller has already acquired
581 * ill_mcast_serializer. It is the caller's responsibility to send out
582 * queued DLPI/multicast packets after all locks are dropped.
583 *
584 * The unspecified address means all multicast addresses for in both the
585 * case of IPv4 and IPv6.
586 *
587 * ilgstat tells us if there's an ilg associated with this join,
588 * and if so, if it's a new ilg or a change to an existing one.
589 * ilg_fmode and ilg_flist give us the current filter state of
590 * the ilg (and will be EXCLUDE {NULL} in the case of no ilg).
591 *
592 * The caller should have already mapped an IPMP under ill to the upper.
593 */
594 static ilm_t *
ip_addmulti_serial(const in6_addr_t * v6group,ill_t * ill,zoneid_t zoneid,ilg_stat_t ilgstat,mcast_record_t ilg_fmode,slist_t * ilg_flist,int * errorp)595 ip_addmulti_serial(const in6_addr_t *v6group, ill_t *ill, zoneid_t zoneid,
596 ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist,
597 int *errorp)
598 {
599 ilm_t *ilm;
600
601 ASSERT(MUTEX_HELD(&ill->ill_mcast_serializer));
602
603 if (ill->ill_isv6) {
604 if (!IN6_IS_ADDR_MULTICAST(v6group) &&
605 !IN6_IS_ADDR_UNSPECIFIED(v6group)) {
606 *errorp = EINVAL;
607 return (NULL);
608 }
609 } else {
610 if (IN6_IS_ADDR_V4MAPPED(v6group)) {
611 ipaddr_t v4group;
612
613 IN6_V4MAPPED_TO_IPADDR(v6group, v4group);
614 ASSERT(!IS_UNDER_IPMP(ill));
615 if (!CLASSD(v4group)) {
616 *errorp = EINVAL;
617 return (NULL);
618 }
619 } else if (!IN6_IS_ADDR_UNSPECIFIED(v6group)) {
620 *errorp = EINVAL;
621 return (NULL);
622 }
623 }
624
625 if (IS_UNDER_IPMP(ill)) {
626 *errorp = EINVAL;
627 return (NULL);
628 }
629
630 rw_enter(&ill->ill_mcast_lock, RW_WRITER);
631 /*
632 * We do the equivalent of a lookup by checking after we get the lock
633 * This is needed since the ill could have been condemned after
634 * we looked it up, and we need to check condemned after we hold
635 * ill_mcast_lock to synchronize with the unplumb code.
636 */
637 if (ill->ill_state_flags & ILL_CONDEMNED) {
638 rw_exit(&ill->ill_mcast_lock);
639 *errorp = ENXIO;
640 return (NULL);
641 }
642 ilm = ip_addmulti_impl(v6group, ill, zoneid, ilgstat, ilg_fmode,
643 ilg_flist, errorp);
644 rw_exit(&ill->ill_mcast_lock);
645
646 ill_mcast_timer_start(ill->ill_ipst);
647 return (ilm);
648 }
649
650 static ilm_t *
ip_addmulti_impl(const in6_addr_t * v6group,ill_t * ill,zoneid_t zoneid,ilg_stat_t ilgstat,mcast_record_t ilg_fmode,slist_t * ilg_flist,int * errorp)651 ip_addmulti_impl(const in6_addr_t *v6group, ill_t *ill, zoneid_t zoneid,
652 ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist,
653 int *errorp)
654 {
655 ilm_t *ilm;
656 int ret = 0;
657
658 ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
659 *errorp = 0;
660
661 /*
662 * An ilm is uniquely identified by the tuple of (group, ill) where
663 * `group' is the multicast group address, and `ill' is the interface
664 * on which it is currently joined.
665 */
666
667 ilm = ilm_lookup(ill, v6group, zoneid);
668 if (ilm != NULL) {
669 /* ilm_update_add bumps ilm_refcnt unless ILGSTAT_CHANGE */
670 ret = ilm_update_add(ilm, ilgstat, ilg_flist);
671 if (ret == 0)
672 return (ilm);
673
674 *errorp = ret;
675 return (NULL);
676 }
677
678 /*
679 * The callers checks on the ilg and the ilg+ilm consistency under
680 * ill_mcast_serializer ensures that we can not have ILGSTAT_CHANGE
681 * and no ilm.
682 */
683 ASSERT(ilgstat != ILGSTAT_CHANGE);
684 ilm = ilm_add(ill, v6group, ilgstat, ilg_fmode, ilg_flist, zoneid);
685 if (ilm == NULL) {
686 *errorp = ENOMEM;
687 return (NULL);
688 }
689
690 if (IN6_IS_ADDR_UNSPECIFIED(v6group)) {
691 /*
692 * If we have more then one we should not tell the driver
693 * to join this time.
694 */
695 if (ilm_numentries(ill, v6group) == 1) {
696 ret = ill_join_allmulti(ill);
697 }
698 } else {
699 if (!IS_LOOPBACK(ill)) {
700 if (ill->ill_isv6)
701 mld_joingroup(ilm);
702 else
703 igmp_joingroup(ilm);
704 }
705
706 /*
707 * If we have more then one we should not tell the driver
708 * to join this time.
709 */
710 if (ilm_numentries(ill, v6group) == 1) {
711 ret = ip_ll_multireq(ill, v6group, DL_ENABMULTI_REQ);
712 }
713 }
714 if (ret != 0) {
715 if (ret == ENETDOWN) {
716 char buf[INET6_ADDRSTRLEN];
717
718 ip0dbg(("ip_addmulti: ENETDOWN for %s on %s",
719 inet_ntop(AF_INET6, &ilm->ilm_v6addr,
720 buf, sizeof (buf)), ill->ill_name));
721 }
722 ilm_delete(ilm);
723 *errorp = ret;
724 return (NULL);
725 } else {
726 return (ilm);
727 }
728 }
729
730 /*
731 * Looks up the list of multicast physical addresses this interface
732 * listens to. Add to the list if not present already.
733 */
734 boolean_t
ip_mphysaddr_add(ill_t * ill,uchar_t * hw_addr)735 ip_mphysaddr_add(ill_t *ill, uchar_t *hw_addr)
736 {
737 multiphysaddr_t *mpa = NULL;
738 int hw_addr_length = ill->ill_phys_addr_length;
739
740 mutex_enter(&ill->ill_lock);
741 for (mpa = ill->ill_mphysaddr_list; mpa != NULL; mpa = mpa->mpa_next) {
742 if (bcmp(hw_addr, &(mpa->mpa_addr[0]), hw_addr_length) == 0) {
743 mpa->mpa_refcnt++;
744 mutex_exit(&ill->ill_lock);
745 return (B_FALSE);
746 }
747 }
748
749 mpa = kmem_zalloc(sizeof (multiphysaddr_t), KM_NOSLEEP);
750 if (mpa == NULL) {
751 /*
752 * We risk not having the multiphysadd structure. At this
753 * point we can't fail. We can't afford to not send a
754 * DL_ENABMULTI_REQ also. It is better than pre-allocating
755 * the structure and having the code to track it also.
756 */
757 ip0dbg(("ip_mphysaddr_add: ENOMEM. Some multicast apps"
758 " may have issues. hw_addr: %p ill_name: %s\n",
759 (void *)hw_addr, ill->ill_name));
760 mutex_exit(&ill->ill_lock);
761 return (B_TRUE);
762 }
763 bcopy(hw_addr, &(mpa->mpa_addr[0]), hw_addr_length);
764 mpa->mpa_refcnt = 1;
765 mpa->mpa_next = ill->ill_mphysaddr_list;
766 ill->ill_mphysaddr_list = mpa;
767 mutex_exit(&ill->ill_lock);
768 return (B_TRUE);
769 }
770
771 /*
772 * Look up hw_addr from the list of physical multicast addresses this interface
773 * listens to.
774 * Remove the entry if the refcnt is 0
775 */
776 boolean_t
ip_mphysaddr_del(ill_t * ill,uchar_t * hw_addr)777 ip_mphysaddr_del(ill_t *ill, uchar_t *hw_addr)
778 {
779 multiphysaddr_t *mpap = NULL, **mpapp = NULL;
780 int hw_addr_length = ill->ill_phys_addr_length;
781 boolean_t ret = B_FALSE;
782
783 mutex_enter(&ill->ill_lock);
784 for (mpapp = &ill->ill_mphysaddr_list; (mpap = *mpapp) != NULL;
785 mpapp = &(mpap->mpa_next)) {
786 if (bcmp(hw_addr, &(mpap->mpa_addr[0]), hw_addr_length) == 0)
787 break;
788 }
789 if (mpap == NULL) {
790 /*
791 * Should be coming here only when there was a memory
792 * exhaustion and we were not able to allocate
793 * a multiphysaddr_t. We still send a DL_DISABMULTI_REQ down.
794 */
795
796 ip0dbg(("ip_mphysaddr_del: No entry for this addr. Some "
797 "multicast apps might have had issues. hw_addr: %p "
798 " ill_name: %s\n", (void *)hw_addr, ill->ill_name));
799 ret = B_TRUE;
800 } else if (--mpap->mpa_refcnt == 0) {
801 *mpapp = mpap->mpa_next;
802 kmem_free(mpap, sizeof (multiphysaddr_t));
803 ret = B_TRUE;
804 }
805 mutex_exit(&ill->ill_lock);
806 return (ret);
807 }
808
809 /*
810 * Send a multicast request to the driver for enabling or disabling
811 * multicast reception for v6groupp address. The caller has already
812 * checked whether it is appropriate to send one or not.
813 *
814 * For IPMP we switch to the cast_ill since it has the right hardware
815 * information.
816 */
817 static int
ip_ll_send_multireq(ill_t * ill,const in6_addr_t * v6groupp,t_uscalar_t prim)818 ip_ll_send_multireq(ill_t *ill, const in6_addr_t *v6groupp, t_uscalar_t prim)
819 {
820 mblk_t *mp;
821 uint32_t addrlen, addroff;
822 ill_t *release_ill = NULL;
823 uchar_t *cp;
824 int err = 0;
825
826 ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock));
827
828 if (IS_IPMP(ill)) {
829 /* On the upper IPMP ill. */
830 release_ill = ipmp_illgrp_hold_cast_ill(ill->ill_grp);
831 if (release_ill == NULL) {
832 /*
833 * Avoid sending it down to the ipmpstub.
834 * We will be called again once the members of the
835 * group are in place
836 */
837 ip1dbg(("ip_ll_send_multireq: no cast_ill for %s %d\n",
838 ill->ill_name, ill->ill_isv6));
839 return (0);
840 }
841 ill = release_ill;
842 }
843 /* Create a DL_ENABMULTI_REQ or DL_DISABMULTI_REQ message. */
844 mp = ill_create_dl(ill, prim, &addrlen, &addroff);
845 if (mp == NULL) {
846 err = ENOMEM;
847 goto done;
848 }
849
850 mp = ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp);
851 if (mp == NULL) {
852 ip0dbg(("null from ndp_mcastreq(ill %s)\n", ill->ill_name));
853 err = ENOMEM;
854 goto done;
855 }
856 cp = mp->b_rptr;
857
858 switch (((union DL_primitives *)cp)->dl_primitive) {
859 case DL_ENABMULTI_REQ:
860 cp += ((dl_enabmulti_req_t *)cp)->dl_addr_offset;
861 if (!ip_mphysaddr_add(ill, cp)) {
862 freemsg(mp);
863 err = 0;
864 goto done;
865 }
866 mutex_enter(&ill->ill_lock);
867 /* Track the state if this is the first enabmulti */
868 if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN)
869 ill->ill_dlpi_multicast_state = IDS_INPROGRESS;
870 mutex_exit(&ill->ill_lock);
871 break;
872 case DL_DISABMULTI_REQ:
873 cp += ((dl_disabmulti_req_t *)cp)->dl_addr_offset;
874 if (!ip_mphysaddr_del(ill, cp)) {
875 freemsg(mp);
876 err = 0;
877 goto done;
878 }
879 }
880 ill_dlpi_queue(ill, mp);
881 done:
882 if (release_ill != NULL)
883 ill_refrele(release_ill);
884 return (err);
885 }
886
887 /*
888 * Send a multicast request to the driver for enabling multicast
889 * membership for v6group if appropriate.
890 */
891 static int
ip_ll_multireq(ill_t * ill,const in6_addr_t * v6groupp,t_uscalar_t prim)892 ip_ll_multireq(ill_t *ill, const in6_addr_t *v6groupp, t_uscalar_t prim)
893 {
894 if (ill->ill_net_type != IRE_IF_RESOLVER ||
895 ill->ill_ipif->ipif_flags & IPIF_POINTOPOINT) {
896 ip1dbg(("ip_ll_multireq: not resolver\n"));
897 return (0); /* Must be IRE_IF_NORESOLVER */
898 }
899
900 if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) {
901 ip1dbg(("ip_ll_multireq: MULTI_BCAST\n"));
902 return (0);
903 }
904 return (ip_ll_send_multireq(ill, v6groupp, prim));
905 }
906
907 /*
908 * Delete the ilm. Used by other parts of IP for the case of no_ilg/leaving
909 * being true.
910 */
911 int
ip_delmulti(ilm_t * ilm)912 ip_delmulti(ilm_t *ilm)
913 {
914 ill_t *ill = ilm->ilm_ill;
915 int error;
916
917 /* Acquire serializer to keep assert in ilm_bld_flists happy */
918 mutex_enter(&ill->ill_mcast_serializer);
919 error = ip_delmulti_serial(ilm, B_TRUE, B_TRUE);
920 mutex_exit(&ill->ill_mcast_serializer);
921 /*
922 * Now that all locks have been dropped, we can send any
923 * deferred/queued DLPI or IP packets
924 */
925 ill_mcast_send_queued(ill);
926 ill_dlpi_send_queued(ill);
927 return (error);
928 }
929
930
931 /*
932 * Delete the ilm.
933 * Assumes ill_mcast_serializer is held by the caller.
934 * Caller must send out queued dlpi/multicast packets after dropping
935 * all locks.
936 */
937 static int
ip_delmulti_serial(ilm_t * ilm,boolean_t no_ilg,boolean_t leaving)938 ip_delmulti_serial(ilm_t *ilm, boolean_t no_ilg, boolean_t leaving)
939 {
940 ill_t *ill = ilm->ilm_ill;
941 int ret;
942
943 ASSERT(MUTEX_HELD(&ill->ill_mcast_serializer));
944 ASSERT(!(IS_UNDER_IPMP(ill)));
945
946 rw_enter(&ill->ill_mcast_lock, RW_WRITER);
947 ret = ip_delmulti_impl(ilm, no_ilg, leaving);
948 rw_exit(&ill->ill_mcast_lock);
949 ill_mcast_timer_start(ill->ill_ipst);
950 return (ret);
951 }
952
953 static int
ip_delmulti_impl(ilm_t * ilm,boolean_t no_ilg,boolean_t leaving)954 ip_delmulti_impl(ilm_t *ilm, boolean_t no_ilg, boolean_t leaving)
955 {
956 ill_t *ill = ilm->ilm_ill;
957 int error;
958 in6_addr_t v6group;
959
960 ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
961
962 /* Update counters */
963 if (no_ilg)
964 ilm->ilm_no_ilg_cnt--;
965
966 if (leaving)
967 ilm->ilm_refcnt--;
968
969 if (ilm->ilm_refcnt > 0)
970 return (ilm_update_del(ilm));
971
972 v6group = ilm->ilm_v6addr;
973
974 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
975 ilm_delete(ilm);
976 /*
977 * If we have some left then one we should not tell the driver
978 * to leave.
979 */
980 if (ilm_numentries(ill, &v6group) != 0)
981 return (0);
982
983 ill_leave_allmulti(ill);
984
985 return (0);
986 }
987
988 if (!IS_LOOPBACK(ill)) {
989 if (ill->ill_isv6)
990 mld_leavegroup(ilm);
991 else
992 igmp_leavegroup(ilm);
993 }
994
995 ilm_delete(ilm);
996 /*
997 * If we have some left then one we should not tell the driver
998 * to leave.
999 */
1000 if (ilm_numentries(ill, &v6group) != 0)
1001 return (0);
1002
1003 error = ip_ll_multireq(ill, &v6group, DL_DISABMULTI_REQ);
1004 /* We ignore the case when ill_dl_up is not set */
1005 if (error == ENETDOWN) {
1006 char buf[INET6_ADDRSTRLEN];
1007
1008 ip0dbg(("ip_delmulti: ENETDOWN for %s on %s",
1009 inet_ntop(AF_INET6, &v6group, buf, sizeof (buf)),
1010 ill->ill_name));
1011 }
1012 return (error);
1013 }
1014
1015 /*
1016 * Make the driver pass up all multicast packets.
1017 */
1018 int
ill_join_allmulti(ill_t * ill)1019 ill_join_allmulti(ill_t *ill)
1020 {
1021 mblk_t *promiscon_mp, *promiscoff_mp = NULL;
1022 uint32_t addrlen, addroff;
1023 ill_t *release_ill = NULL;
1024
1025 ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
1026
1027 if (IS_LOOPBACK(ill))
1028 return (0);
1029
1030 if (!ill->ill_dl_up) {
1031 /*
1032 * Nobody there. All multicast addresses will be re-joined
1033 * when we get the DL_BIND_ACK bringing the interface up.
1034 */
1035 return (ENETDOWN);
1036 }
1037
1038 if (IS_IPMP(ill)) {
1039 /* On the upper IPMP ill. */
1040 release_ill = ipmp_illgrp_hold_cast_ill(ill->ill_grp);
1041 if (release_ill == NULL) {
1042 /*
1043 * Avoid sending it down to the ipmpstub.
1044 * We will be called again once the members of the
1045 * group are in place
1046 */
1047 ip1dbg(("ill_join_allmulti: no cast_ill for %s %d\n",
1048 ill->ill_name, ill->ill_isv6));
1049 return (0);
1050 }
1051 ill = release_ill;
1052 if (!ill->ill_dl_up) {
1053 ill_refrele(ill);
1054 return (ENETDOWN);
1055 }
1056 }
1057
1058 /*
1059 * Create a DL_PROMISCON_REQ message and send it directly to the DLPI
1060 * provider. We don't need to do this for certain media types for
1061 * which we never need to turn promiscuous mode on. While we're here,
1062 * pre-allocate a DL_PROMISCOFF_REQ message to make sure that
1063 * ill_leave_allmulti() will not fail due to low memory conditions.
1064 */
1065 if ((ill->ill_net_type == IRE_IF_RESOLVER) &&
1066 !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) {
1067 promiscon_mp = ill_create_dl(ill, DL_PROMISCON_REQ,
1068 &addrlen, &addroff);
1069 if (ill->ill_promiscoff_mp == NULL)
1070 promiscoff_mp = ill_create_dl(ill, DL_PROMISCOFF_REQ,
1071 &addrlen, &addroff);
1072 if (promiscon_mp == NULL ||
1073 (ill->ill_promiscoff_mp == NULL && promiscoff_mp == NULL)) {
1074 freemsg(promiscon_mp);
1075 freemsg(promiscoff_mp);
1076 if (release_ill != NULL)
1077 ill_refrele(release_ill);
1078 return (ENOMEM);
1079 }
1080 if (ill->ill_promiscoff_mp == NULL)
1081 ill->ill_promiscoff_mp = promiscoff_mp;
1082 ill_dlpi_queue(ill, promiscon_mp);
1083 }
1084 if (release_ill != NULL)
1085 ill_refrele(release_ill);
1086 return (0);
1087 }
1088
1089 /*
1090 * Make the driver stop passing up all multicast packets
1091 */
1092 void
ill_leave_allmulti(ill_t * ill)1093 ill_leave_allmulti(ill_t *ill)
1094 {
1095 mblk_t *promiscoff_mp;
1096 ill_t *release_ill = NULL;
1097
1098 ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
1099
1100 if (IS_LOOPBACK(ill))
1101 return;
1102
1103 if (!ill->ill_dl_up) {
1104 /*
1105 * Nobody there. All multicast addresses will be re-joined
1106 * when we get the DL_BIND_ACK bringing the interface up.
1107 */
1108 return;
1109 }
1110
1111 if (IS_IPMP(ill)) {
1112 /* On the upper IPMP ill. */
1113 release_ill = ipmp_illgrp_hold_cast_ill(ill->ill_grp);
1114 if (release_ill == NULL) {
1115 /*
1116 * Avoid sending it down to the ipmpstub.
1117 * We will be called again once the members of the
1118 * group are in place
1119 */
1120 ip1dbg(("ill_leave_allmulti: no cast_ill on %s %d\n",
1121 ill->ill_name, ill->ill_isv6));
1122 return;
1123 }
1124 ill = release_ill;
1125 if (!ill->ill_dl_up)
1126 goto done;
1127 }
1128
1129 /*
1130 * In the case of IPMP and ill_dl_up not being set when we joined
1131 * we didn't allocate a promiscoff_mp. In that case we have
1132 * nothing to do when we leave.
1133 * Ditto for PHYI_MULTI_BCAST
1134 */
1135 promiscoff_mp = ill->ill_promiscoff_mp;
1136 if (promiscoff_mp != NULL) {
1137 ill->ill_promiscoff_mp = NULL;
1138 ill_dlpi_queue(ill, promiscoff_mp);
1139 }
1140 done:
1141 if (release_ill != NULL)
1142 ill_refrele(release_ill);
1143 }
1144
1145 int
ip_join_allmulti(uint_t ifindex,boolean_t isv6,ip_stack_t * ipst)1146 ip_join_allmulti(uint_t ifindex, boolean_t isv6, ip_stack_t *ipst)
1147 {
1148 ill_t *ill;
1149 int ret;
1150 ilm_t *ilm;
1151
1152 ill = ill_lookup_on_ifindex(ifindex, isv6, ipst);
1153 if (ill == NULL)
1154 return (ENODEV);
1155
1156 /*
1157 * The ip_addmulti() function doesn't allow IPMP underlying interfaces
1158 * to join allmulti since only the nominated underlying interface in
1159 * the group should receive multicast. We silently succeed to avoid
1160 * having to teach IPobs (currently the only caller of this routine)
1161 * to ignore failures in this case.
1162 */
1163 if (IS_UNDER_IPMP(ill)) {
1164 ill_refrele(ill);
1165 return (0);
1166 }
1167 mutex_enter(&ill->ill_lock);
1168 if (ill->ill_ipallmulti_cnt > 0) {
1169 /* Already joined */
1170 ASSERT(ill->ill_ipallmulti_ilm != NULL);
1171 ill->ill_ipallmulti_cnt++;
1172 mutex_exit(&ill->ill_lock);
1173 goto done;
1174 }
1175 mutex_exit(&ill->ill_lock);
1176
1177 ilm = ip_addmulti(&ipv6_all_zeros, ill, ill->ill_zoneid, &ret);
1178 if (ilm == NULL) {
1179 ASSERT(ret != 0);
1180 ill_refrele(ill);
1181 return (ret);
1182 }
1183
1184 mutex_enter(&ill->ill_lock);
1185 if (ill->ill_ipallmulti_cnt > 0) {
1186 /* Another thread added it concurrently */
1187 (void) ip_delmulti(ilm);
1188 mutex_exit(&ill->ill_lock);
1189 goto done;
1190 }
1191 ASSERT(ill->ill_ipallmulti_ilm == NULL);
1192 ill->ill_ipallmulti_ilm = ilm;
1193 ill->ill_ipallmulti_cnt++;
1194 mutex_exit(&ill->ill_lock);
1195 done:
1196 ill_refrele(ill);
1197 return (0);
1198 }
1199
1200 int
ip_leave_allmulti(uint_t ifindex,boolean_t isv6,ip_stack_t * ipst)1201 ip_leave_allmulti(uint_t ifindex, boolean_t isv6, ip_stack_t *ipst)
1202 {
1203 ill_t *ill;
1204 ilm_t *ilm;
1205
1206 ill = ill_lookup_on_ifindex(ifindex, isv6, ipst);
1207 if (ill == NULL)
1208 return (ENODEV);
1209
1210 if (IS_UNDER_IPMP(ill)) {
1211 ill_refrele(ill);
1212 return (0);
1213 }
1214
1215 mutex_enter(&ill->ill_lock);
1216 if (ill->ill_ipallmulti_cnt == 0) {
1217 /* ip_purge_allmulti could have removed them all */
1218 mutex_exit(&ill->ill_lock);
1219 goto done;
1220 }
1221 ill->ill_ipallmulti_cnt--;
1222 if (ill->ill_ipallmulti_cnt == 0) {
1223 /* Last one */
1224 ilm = ill->ill_ipallmulti_ilm;
1225 ill->ill_ipallmulti_ilm = NULL;
1226 } else {
1227 ilm = NULL;
1228 }
1229 mutex_exit(&ill->ill_lock);
1230 if (ilm != NULL)
1231 (void) ip_delmulti(ilm);
1232
1233 done:
1234 ill_refrele(ill);
1235 return (0);
1236 }
1237
1238 /*
1239 * Delete the allmulti memberships that were added as part of
1240 * ip_join_allmulti().
1241 */
1242 void
ip_purge_allmulti(ill_t * ill)1243 ip_purge_allmulti(ill_t *ill)
1244 {
1245 ilm_t *ilm;
1246
1247 ASSERT(IAM_WRITER_ILL(ill));
1248
1249 mutex_enter(&ill->ill_lock);
1250 ilm = ill->ill_ipallmulti_ilm;
1251 ill->ill_ipallmulti_ilm = NULL;
1252 ill->ill_ipallmulti_cnt = 0;
1253 mutex_exit(&ill->ill_lock);
1254
1255 if (ilm != NULL)
1256 (void) ip_delmulti(ilm);
1257 }
1258
1259 /*
1260 * Create a dlpi message with room for phys+sap. Later
1261 * we will strip the sap for those primitives which
1262 * only need a physical address.
1263 */
1264 static mblk_t *
ill_create_dl(ill_t * ill,uint32_t dl_primitive,uint32_t * addr_lenp,uint32_t * addr_offp)1265 ill_create_dl(ill_t *ill, uint32_t dl_primitive,
1266 uint32_t *addr_lenp, uint32_t *addr_offp)
1267 {
1268 mblk_t *mp;
1269 uint32_t hw_addr_length;
1270 char *cp;
1271 uint32_t offset;
1272 uint32_t length;
1273 uint32_t size;
1274
1275 *addr_lenp = *addr_offp = 0;
1276
1277 hw_addr_length = ill->ill_phys_addr_length;
1278 if (!hw_addr_length) {
1279 ip0dbg(("ip_create_dl: hw addr length = 0\n"));
1280 return (NULL);
1281 }
1282
1283 switch (dl_primitive) {
1284 case DL_ENABMULTI_REQ:
1285 length = sizeof (dl_enabmulti_req_t);
1286 size = length + hw_addr_length;
1287 break;
1288 case DL_DISABMULTI_REQ:
1289 length = sizeof (dl_disabmulti_req_t);
1290 size = length + hw_addr_length;
1291 break;
1292 case DL_PROMISCON_REQ:
1293 case DL_PROMISCOFF_REQ:
1294 size = length = sizeof (dl_promiscon_req_t);
1295 break;
1296 default:
1297 return (NULL);
1298 }
1299 mp = allocb(size, BPRI_HI);
1300 if (!mp)
1301 return (NULL);
1302 mp->b_wptr += size;
1303 mp->b_datap->db_type = M_PROTO;
1304
1305 cp = (char *)mp->b_rptr;
1306 offset = length;
1307
1308 switch (dl_primitive) {
1309 case DL_ENABMULTI_REQ: {
1310 dl_enabmulti_req_t *dl = (dl_enabmulti_req_t *)cp;
1311
1312 dl->dl_primitive = dl_primitive;
1313 dl->dl_addr_offset = offset;
1314 *addr_lenp = dl->dl_addr_length = hw_addr_length;
1315 *addr_offp = offset;
1316 break;
1317 }
1318 case DL_DISABMULTI_REQ: {
1319 dl_disabmulti_req_t *dl = (dl_disabmulti_req_t *)cp;
1320
1321 dl->dl_primitive = dl_primitive;
1322 dl->dl_addr_offset = offset;
1323 *addr_lenp = dl->dl_addr_length = hw_addr_length;
1324 *addr_offp = offset;
1325 break;
1326 }
1327 case DL_PROMISCON_REQ:
1328 case DL_PROMISCOFF_REQ: {
1329 dl_promiscon_req_t *dl = (dl_promiscon_req_t *)cp;
1330
1331 dl->dl_primitive = dl_primitive;
1332 dl->dl_level = DL_PROMISC_MULTI;
1333 break;
1334 }
1335 }
1336 ip1dbg(("ill_create_dl: addr_len %d, addr_off %d\n",
1337 *addr_lenp, *addr_offp));
1338 return (mp);
1339 }
1340
1341 /*
1342 * Rejoin any groups for which we have ilms.
1343 *
1344 * This is only needed for IPMP when the cast_ill changes since that
1345 * change is invisible to the ilm. Other interface changes are handled
1346 * by conn_update_ill.
1347 */
1348 void
ill_recover_multicast(ill_t * ill)1349 ill_recover_multicast(ill_t *ill)
1350 {
1351 ilm_t *ilm;
1352 char addrbuf[INET6_ADDRSTRLEN];
1353
1354 ill->ill_need_recover_multicast = 0;
1355
1356 rw_enter(&ill->ill_mcast_lock, RW_WRITER);
1357 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1358 /*
1359 * If we have more then one ilm for the group (e.g., with
1360 * different zoneid) then we should not tell the driver
1361 * to join unless this is the first ilm for the group.
1362 */
1363 if (ilm_numentries(ill, &ilm->ilm_v6addr) > 1 &&
1364 ilm_lookup(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm) {
1365 continue;
1366 }
1367
1368 ip1dbg(("ill_recover_multicast: %s\n", inet_ntop(AF_INET6,
1369 &ilm->ilm_v6addr, addrbuf, sizeof (addrbuf))));
1370
1371 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
1372 (void) ill_join_allmulti(ill);
1373 } else {
1374 if (ill->ill_isv6)
1375 mld_joingroup(ilm);
1376 else
1377 igmp_joingroup(ilm);
1378
1379 (void) ip_ll_multireq(ill, &ilm->ilm_v6addr,
1380 DL_ENABMULTI_REQ);
1381 }
1382 }
1383 rw_exit(&ill->ill_mcast_lock);
1384 /* Send any deferred/queued DLPI or IP packets */
1385 ill_mcast_send_queued(ill);
1386 ill_dlpi_send_queued(ill);
1387 ill_mcast_timer_start(ill->ill_ipst);
1388 }
1389
1390 /*
1391 * The opposite of ill_recover_multicast() -- leaves all multicast groups
1392 * that were explicitly joined.
1393 *
1394 * This is only needed for IPMP when the cast_ill changes since that
1395 * change is invisible to the ilm. Other interface changes are handled
1396 * by conn_update_ill.
1397 */
1398 void
ill_leave_multicast(ill_t * ill)1399 ill_leave_multicast(ill_t *ill)
1400 {
1401 ilm_t *ilm;
1402 char addrbuf[INET6_ADDRSTRLEN];
1403
1404 ill->ill_need_recover_multicast = 1;
1405
1406 rw_enter(&ill->ill_mcast_lock, RW_WRITER);
1407 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1408 /*
1409 * If we have more then one ilm for the group (e.g., with
1410 * different zoneid) then we should not tell the driver
1411 * to leave unless this is the first ilm for the group.
1412 */
1413 if (ilm_numentries(ill, &ilm->ilm_v6addr) > 1 &&
1414 ilm_lookup(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm) {
1415 continue;
1416 }
1417
1418 ip1dbg(("ill_leave_multicast: %s\n", inet_ntop(AF_INET6,
1419 &ilm->ilm_v6addr, addrbuf, sizeof (addrbuf))));
1420
1421 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) {
1422 ill_leave_allmulti(ill);
1423 } else {
1424 if (ill->ill_isv6)
1425 mld_leavegroup(ilm);
1426 else
1427 igmp_leavegroup(ilm);
1428
1429 (void) ip_ll_multireq(ill, &ilm->ilm_v6addr,
1430 DL_DISABMULTI_REQ);
1431 }
1432 }
1433 rw_exit(&ill->ill_mcast_lock);
1434 /* Send any deferred/queued DLPI or IP packets */
1435 ill_mcast_send_queued(ill);
1436 ill_dlpi_send_queued(ill);
1437 ill_mcast_timer_start(ill->ill_ipst);
1438 }
1439
1440 /*
1441 * Interface used by IP input/output.
1442 * Returns true if there is a member on the ill for any zoneid.
1443 */
1444 boolean_t
ill_hasmembers_v6(ill_t * ill,const in6_addr_t * v6group)1445 ill_hasmembers_v6(ill_t *ill, const in6_addr_t *v6group)
1446 {
1447 ilm_t *ilm;
1448
1449 rw_enter(&ill->ill_mcast_lock, RW_READER);
1450 ilm = ilm_lookup(ill, v6group, ALL_ZONES);
1451 rw_exit(&ill->ill_mcast_lock);
1452 return (ilm != NULL);
1453 }
1454
1455 /*
1456 * Interface used by IP input/output.
1457 * Returns true if there is a member on the ill for any zoneid.
1458 *
1459 * The group and source can't be INADDR_ANY here so no need to translate to
1460 * the unspecified IPv6 address.
1461 */
1462 boolean_t
ill_hasmembers_v4(ill_t * ill,ipaddr_t group)1463 ill_hasmembers_v4(ill_t *ill, ipaddr_t group)
1464 {
1465 in6_addr_t v6group;
1466
1467 IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1468 return (ill_hasmembers_v6(ill, &v6group));
1469 }
1470
1471 /*
1472 * Interface used by IP input/output.
1473 * Returns true if there is a member on the ill for any zoneid except skipzone.
1474 */
1475 boolean_t
ill_hasmembers_otherzones_v6(ill_t * ill,const in6_addr_t * v6group,zoneid_t skipzone)1476 ill_hasmembers_otherzones_v6(ill_t *ill, const in6_addr_t *v6group,
1477 zoneid_t skipzone)
1478 {
1479 ilm_t *ilm;
1480
1481 rw_enter(&ill->ill_mcast_lock, RW_READER);
1482 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1483 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) &&
1484 ilm->ilm_zoneid != skipzone) {
1485 rw_exit(&ill->ill_mcast_lock);
1486 return (B_TRUE);
1487 }
1488 }
1489 rw_exit(&ill->ill_mcast_lock);
1490 return (B_FALSE);
1491 }
1492
1493 /*
1494 * Interface used by IP input/output.
1495 * Returns true if there is a member on the ill for any zoneid except skipzone.
1496 *
1497 * The group and source can't be INADDR_ANY here so no need to translate to
1498 * the unspecified IPv6 address.
1499 */
1500 boolean_t
ill_hasmembers_otherzones_v4(ill_t * ill,ipaddr_t group,zoneid_t skipzone)1501 ill_hasmembers_otherzones_v4(ill_t *ill, ipaddr_t group, zoneid_t skipzone)
1502 {
1503 in6_addr_t v6group;
1504
1505 IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1506 return (ill_hasmembers_otherzones_v6(ill, &v6group, skipzone));
1507 }
1508
1509 /*
1510 * Interface used by IP input.
1511 * Returns the next numerically larger zoneid that has a member. If none exist
1512 * then returns -1 (ALL_ZONES).
1513 * The normal usage is for the caller to start with a -1 zoneid (ALL_ZONES)
1514 * to find the first zoneid which has a member, and then pass that in for
1515 * subsequent calls until ALL_ZONES is returned.
1516 *
1517 * The implementation of ill_hasmembers_nextzone() assumes the ilms
1518 * are sorted by zoneid for efficiency.
1519 */
1520 zoneid_t
ill_hasmembers_nextzone_v6(ill_t * ill,const in6_addr_t * v6group,zoneid_t zoneid)1521 ill_hasmembers_nextzone_v6(ill_t *ill, const in6_addr_t *v6group,
1522 zoneid_t zoneid)
1523 {
1524 ilm_t *ilm;
1525
1526 rw_enter(&ill->ill_mcast_lock, RW_READER);
1527 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1528 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) &&
1529 ilm->ilm_zoneid > zoneid) {
1530 zoneid = ilm->ilm_zoneid;
1531 rw_exit(&ill->ill_mcast_lock);
1532 return (zoneid);
1533 }
1534 }
1535 rw_exit(&ill->ill_mcast_lock);
1536 return (ALL_ZONES);
1537 }
1538
1539 /*
1540 * Interface used by IP input.
1541 * Returns the next numerically larger zoneid that has a member. If none exist
1542 * then returns -1 (ALL_ZONES).
1543 *
1544 * The group and source can't be INADDR_ANY here so no need to translate to
1545 * the unspecified IPv6 address.
1546 */
1547 zoneid_t
ill_hasmembers_nextzone_v4(ill_t * ill,ipaddr_t group,zoneid_t zoneid)1548 ill_hasmembers_nextzone_v4(ill_t *ill, ipaddr_t group, zoneid_t zoneid)
1549 {
1550 in6_addr_t v6group;
1551
1552 IN6_IPADDR_TO_V4MAPPED(group, &v6group);
1553
1554 return (ill_hasmembers_nextzone_v6(ill, &v6group, zoneid));
1555 }
1556
1557 /*
1558 * Find an ilm matching the ill, group, and zoneid.
1559 */
1560 static ilm_t *
ilm_lookup(ill_t * ill,const in6_addr_t * v6group,zoneid_t zoneid)1561 ilm_lookup(ill_t *ill, const in6_addr_t *v6group, zoneid_t zoneid)
1562 {
1563 ilm_t *ilm;
1564
1565 ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock));
1566
1567 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1568 if (!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group))
1569 continue;
1570 if (zoneid != ALL_ZONES && zoneid != ilm->ilm_zoneid)
1571 continue;
1572
1573 ASSERT(ilm->ilm_ill == ill);
1574 return (ilm);
1575 }
1576 return (NULL);
1577 }
1578
1579 /*
1580 * How many members on this ill?
1581 * Since each shared-IP zone has a separate ilm for the same group/ill
1582 * we can have several.
1583 */
1584 static int
ilm_numentries(ill_t * ill,const in6_addr_t * v6group)1585 ilm_numentries(ill_t *ill, const in6_addr_t *v6group)
1586 {
1587 ilm_t *ilm;
1588 int i = 0;
1589
1590 ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock));
1591 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) {
1592 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group)) {
1593 i++;
1594 }
1595 }
1596 return (i);
1597 }
1598
1599 /* Caller guarantees that the group is not already on the list */
1600 static ilm_t *
ilm_add(ill_t * ill,const in6_addr_t * v6group,ilg_stat_t ilgstat,mcast_record_t ilg_fmode,slist_t * ilg_flist,zoneid_t zoneid)1601 ilm_add(ill_t *ill, const in6_addr_t *v6group, ilg_stat_t ilgstat,
1602 mcast_record_t ilg_fmode, slist_t *ilg_flist, zoneid_t zoneid)
1603 {
1604 ilm_t *ilm;
1605 ilm_t *ilm_cur;
1606 ilm_t **ilm_ptpn;
1607
1608 ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
1609 ilm = GETSTRUCT(ilm_t, 1);
1610 if (ilm == NULL)
1611 return (NULL);
1612 if (ilgstat != ILGSTAT_NONE && !SLIST_IS_EMPTY(ilg_flist)) {
1613 ilm->ilm_filter = l_alloc();
1614 if (ilm->ilm_filter == NULL) {
1615 mi_free(ilm);
1616 return (NULL);
1617 }
1618 }
1619 ilm->ilm_v6addr = *v6group;
1620 ilm->ilm_refcnt = 1;
1621 ilm->ilm_zoneid = zoneid;
1622 ilm->ilm_timer = INFINITY;
1623 ilm->ilm_rtx.rtx_timer = INFINITY;
1624
1625 ilm->ilm_ill = ill;
1626 DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ill,
1627 (char *), "ilm", (void *), ilm);
1628 ill->ill_ilm_cnt++;
1629
1630 ASSERT(ill->ill_ipst);
1631 ilm->ilm_ipst = ill->ill_ipst; /* No netstack_hold */
1632
1633 /* The ill/ipif could have just been marked as condemned */
1634
1635 /*
1636 * To make ill_hasmembers_nextzone_v6 work we keep the list
1637 * sorted by zoneid.
1638 */
1639 ilm_cur = ill->ill_ilm;
1640 ilm_ptpn = &ill->ill_ilm;
1641 while (ilm_cur != NULL && ilm_cur->ilm_zoneid < ilm->ilm_zoneid) {
1642 ilm_ptpn = &ilm_cur->ilm_next;
1643 ilm_cur = ilm_cur->ilm_next;
1644 }
1645 ilm->ilm_next = ilm_cur;
1646 *ilm_ptpn = ilm;
1647
1648 /*
1649 * If we have an associated ilg, use its filter state; if not,
1650 * default to (EXCLUDE, NULL) and set no_ilg_cnt to track this.
1651 */
1652 if (ilgstat != ILGSTAT_NONE) {
1653 if (!SLIST_IS_EMPTY(ilg_flist))
1654 l_copy(ilg_flist, ilm->ilm_filter);
1655 ilm->ilm_fmode = ilg_fmode;
1656 } else {
1657 ilm->ilm_no_ilg_cnt = 1;
1658 ilm->ilm_fmode = MODE_IS_EXCLUDE;
1659 }
1660
1661 return (ilm);
1662 }
1663
1664 void
ilm_inactive(ilm_t * ilm)1665 ilm_inactive(ilm_t *ilm)
1666 {
1667 FREE_SLIST(ilm->ilm_filter);
1668 FREE_SLIST(ilm->ilm_pendsrcs);
1669 FREE_SLIST(ilm->ilm_rtx.rtx_allow);
1670 FREE_SLIST(ilm->ilm_rtx.rtx_block);
1671 ilm->ilm_ipst = NULL;
1672 mi_free((char *)ilm);
1673 }
1674
1675 /*
1676 * Unlink ilm and free it.
1677 */
1678 static void
ilm_delete(ilm_t * ilm)1679 ilm_delete(ilm_t *ilm)
1680 {
1681 ill_t *ill = ilm->ilm_ill;
1682 ilm_t **ilmp;
1683 boolean_t need_wakeup;
1684
1685 /*
1686 * Delete under lock protection so that readers don't stumble
1687 * on bad ilm_next
1688 */
1689 ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock));
1690
1691 for (ilmp = &ill->ill_ilm; *ilmp != ilm; ilmp = &(*ilmp)->ilm_next)
1692 ;
1693
1694 *ilmp = ilm->ilm_next;
1695
1696 mutex_enter(&ill->ill_lock);
1697 /*
1698 * if we are the last reference to the ill, we may need to wakeup any
1699 * pending FREE or unplumb operations. This is because conn_update_ill
1700 * bails if there is a ilg_delete_all in progress.
1701 */
1702 need_wakeup = B_FALSE;
1703 DTRACE_PROBE3(ill__decr__cnt, (ill_t *), ill,
1704 (char *), "ilm", (void *), ilm);
1705 ASSERT(ill->ill_ilm_cnt > 0);
1706 ill->ill_ilm_cnt--;
1707 if (ILL_FREE_OK(ill))
1708 need_wakeup = B_TRUE;
1709
1710 ilm_inactive(ilm); /* frees this ilm */
1711
1712 if (need_wakeup) {
1713 /* drops ill lock */
1714 ipif_ill_refrele_tail(ill);
1715 } else {
1716 mutex_exit(&ill->ill_lock);
1717 }
1718 }
1719
1720 /*
1721 * Lookup an ill based on the group, ifindex, ifaddr, and zoneid.
1722 * Applies to both IPv4 and IPv6, although ifaddr is only used with
1723 * IPv4.
1724 * Returns an error for IS_UNDER_IPMP and VNI interfaces.
1725 * On error it sets *errorp.
1726 */
1727 static ill_t *
ill_mcast_lookup(const in6_addr_t * group,ipaddr_t ifaddr,uint_t ifindex,zoneid_t zoneid,ip_stack_t * ipst,int * errorp)1728 ill_mcast_lookup(const in6_addr_t *group, ipaddr_t ifaddr, uint_t ifindex,
1729 zoneid_t zoneid, ip_stack_t *ipst, int *errorp)
1730 {
1731 ill_t *ill;
1732 ipaddr_t v4group;
1733
1734 if (IN6_IS_ADDR_V4MAPPED(group)) {
1735 IN6_V4MAPPED_TO_IPADDR(group, v4group);
1736
1737 if (ifindex != 0) {
1738 ill = ill_lookup_on_ifindex_zoneid(ifindex, zoneid,
1739 B_FALSE, ipst);
1740 } else if (ifaddr != INADDR_ANY) {
1741 ipif_t *ipif;
1742
1743 ipif = ipif_lookup_addr(ifaddr, NULL, zoneid, ipst);
1744 if (ipif == NULL) {
1745 ill = NULL;
1746 } else {
1747 ill = ipif->ipif_ill;
1748 ill_refhold(ill);
1749 ipif_refrele(ipif);
1750 }
1751 } else {
1752 ill = ill_lookup_group_v4(v4group, zoneid, ipst, NULL,
1753 NULL);
1754 }
1755 } else {
1756 if (ifindex != 0) {
1757 ill = ill_lookup_on_ifindex_zoneid(ifindex, zoneid,
1758 B_TRUE, ipst);
1759 } else {
1760 ill = ill_lookup_group_v6(group, zoneid, ipst, NULL,
1761 NULL);
1762 }
1763 }
1764 if (ill == NULL) {
1765 if (ifindex != 0)
1766 *errorp = ENXIO;
1767 else
1768 *errorp = EADDRNOTAVAIL;
1769 return (NULL);
1770 }
1771 /* operation not supported on the virtual network interface */
1772 if (IS_UNDER_IPMP(ill) || IS_VNI(ill)) {
1773 ill_refrele(ill);
1774 *errorp = EINVAL;
1775 return (NULL);
1776 }
1777 return (ill);
1778 }
1779
1780 /*
1781 * Looks up the appropriate ill given an interface index (or interface address)
1782 * and multicast group. On success, returns 0, with *illpp pointing to the
1783 * found struct. On failure, returns an errno and *illpp is set to NULL.
1784 *
1785 * Returns an error for IS_UNDER_IPMP and VNI interfaces.
1786 *
1787 * Handles both IPv4 and IPv6. The ifaddr argument only applies in the
1788 * case of IPv4.
1789 */
1790 int
ip_opt_check(conn_t * connp,const in6_addr_t * v6group,const in6_addr_t * v6src,ipaddr_t ifaddr,uint_t ifindex,ill_t ** illpp)1791 ip_opt_check(conn_t *connp, const in6_addr_t *v6group,
1792 const in6_addr_t *v6src, ipaddr_t ifaddr, uint_t ifindex, ill_t **illpp)
1793 {
1794 boolean_t src_unspec;
1795 ill_t *ill = NULL;
1796 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
1797 int error = 0;
1798
1799 *illpp = NULL;
1800
1801 src_unspec = IN6_IS_ADDR_UNSPECIFIED(v6src);
1802
1803 if (IN6_IS_ADDR_V4MAPPED(v6group)) {
1804 ipaddr_t v4group;
1805 ipaddr_t v4src;
1806
1807 if (!IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec)
1808 return (EINVAL);
1809 IN6_V4MAPPED_TO_IPADDR(v6group, v4group);
1810 if (src_unspec) {
1811 v4src = INADDR_ANY;
1812 } else {
1813 IN6_V4MAPPED_TO_IPADDR(v6src, v4src);
1814 }
1815 if (!CLASSD(v4group) || CLASSD(v4src))
1816 return (EINVAL);
1817 } else {
1818 if (IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec)
1819 return (EINVAL);
1820 if (!IN6_IS_ADDR_MULTICAST(v6group) ||
1821 IN6_IS_ADDR_MULTICAST(v6src)) {
1822 return (EINVAL);
1823 }
1824 }
1825
1826 ill = ill_mcast_lookup(v6group, ifaddr, ifindex, IPCL_ZONEID(connp),
1827 ipst, &error);
1828 *illpp = ill;
1829 return (error);
1830 }
1831
1832 static int
ip_get_srcfilter(conn_t * connp,struct group_filter * gf,struct ip_msfilter * imsf,const struct in6_addr * group,boolean_t issin6)1833 ip_get_srcfilter(conn_t *connp, struct group_filter *gf,
1834 struct ip_msfilter *imsf, const struct in6_addr *group, boolean_t issin6)
1835 {
1836 ilg_t *ilg;
1837 int i, numsrc, fmode, outsrcs;
1838 struct sockaddr_in *sin;
1839 struct sockaddr_in6 *sin6;
1840 struct in_addr *addrp;
1841 slist_t *fp;
1842 boolean_t is_v4only_api;
1843 ipaddr_t ifaddr;
1844 uint_t ifindex;
1845
1846 if (gf == NULL) {
1847 ASSERT(imsf != NULL);
1848 ASSERT(!issin6);
1849 is_v4only_api = B_TRUE;
1850 outsrcs = imsf->imsf_numsrc;
1851 ifaddr = imsf->imsf_interface.s_addr;
1852 ifindex = 0;
1853 } else {
1854 ASSERT(imsf == NULL);
1855 is_v4only_api = B_FALSE;
1856 outsrcs = gf->gf_numsrc;
1857 ifaddr = INADDR_ANY;
1858 ifindex = gf->gf_interface;
1859 }
1860
1861 /* No need to use ill_mcast_serializer for the reader */
1862 rw_enter(&connp->conn_ilg_lock, RW_READER);
1863 ilg = ilg_lookup(connp, group, ifaddr, ifindex);
1864 if (ilg == NULL) {
1865 rw_exit(&connp->conn_ilg_lock);
1866 return (EADDRNOTAVAIL);
1867 }
1868
1869 /*
1870 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
1871 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
1872 * So we need to translate here.
1873 */
1874 fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ?
1875 MCAST_INCLUDE : MCAST_EXCLUDE;
1876 if ((fp = ilg->ilg_filter) == NULL) {
1877 numsrc = 0;
1878 } else {
1879 for (i = 0; i < outsrcs; i++) {
1880 if (i == fp->sl_numsrc)
1881 break;
1882 if (issin6) {
1883 sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i];
1884 sin6->sin6_family = AF_INET6;
1885 sin6->sin6_addr = fp->sl_addr[i];
1886 } else {
1887 if (is_v4only_api) {
1888 addrp = &imsf->imsf_slist[i];
1889 } else {
1890 sin = (struct sockaddr_in *)
1891 &gf->gf_slist[i];
1892 sin->sin_family = AF_INET;
1893 addrp = &sin->sin_addr;
1894 }
1895 IN6_V4MAPPED_TO_INADDR(&fp->sl_addr[i], addrp);
1896 }
1897 }
1898 numsrc = fp->sl_numsrc;
1899 }
1900
1901 if (is_v4only_api) {
1902 imsf->imsf_numsrc = numsrc;
1903 imsf->imsf_fmode = fmode;
1904 } else {
1905 gf->gf_numsrc = numsrc;
1906 gf->gf_fmode = fmode;
1907 }
1908
1909 rw_exit(&connp->conn_ilg_lock);
1910
1911 return (0);
1912 }
1913
1914 /*
1915 * Common for IPv4 and IPv6.
1916 */
1917 static int
ip_set_srcfilter(conn_t * connp,struct group_filter * gf,struct ip_msfilter * imsf,const struct in6_addr * group,ill_t * ill,boolean_t issin6)1918 ip_set_srcfilter(conn_t *connp, struct group_filter *gf,
1919 struct ip_msfilter *imsf, const struct in6_addr *group, ill_t *ill,
1920 boolean_t issin6)
1921 {
1922 ilg_t *ilg;
1923 int i, err, infmode, new_fmode;
1924 uint_t insrcs;
1925 struct sockaddr_in *sin;
1926 struct sockaddr_in6 *sin6;
1927 struct in_addr *addrp;
1928 slist_t *orig_filter = NULL;
1929 slist_t *new_filter = NULL;
1930 mcast_record_t orig_fmode;
1931 boolean_t leave_group, is_v4only_api;
1932 ilg_stat_t ilgstat;
1933 ilm_t *ilm;
1934 ipaddr_t ifaddr;
1935 uint_t ifindex;
1936
1937 if (gf == NULL) {
1938 ASSERT(imsf != NULL);
1939 ASSERT(!issin6);
1940 is_v4only_api = B_TRUE;
1941 insrcs = imsf->imsf_numsrc;
1942 infmode = imsf->imsf_fmode;
1943 ifaddr = imsf->imsf_interface.s_addr;
1944 ifindex = 0;
1945 } else {
1946 ASSERT(imsf == NULL);
1947 is_v4only_api = B_FALSE;
1948 insrcs = gf->gf_numsrc;
1949 infmode = gf->gf_fmode;
1950 ifaddr = INADDR_ANY;
1951 ifindex = gf->gf_interface;
1952 }
1953
1954 /* Make sure we can handle the source list */
1955 if (insrcs > MAX_FILTER_SIZE)
1956 return (ENOBUFS);
1957
1958 /*
1959 * setting the filter to (INCLUDE, NULL) is treated
1960 * as a request to leave the group.
1961 */
1962 leave_group = (infmode == MCAST_INCLUDE && insrcs == 0);
1963
1964 mutex_enter(&ill->ill_mcast_serializer);
1965 rw_enter(&connp->conn_ilg_lock, RW_WRITER);
1966 ilg = ilg_lookup(connp, group, ifaddr, ifindex);
1967 if (ilg == NULL) {
1968 /*
1969 * if the request was actually to leave, and we
1970 * didn't find an ilg, there's nothing to do.
1971 */
1972 if (leave_group) {
1973 rw_exit(&connp->conn_ilg_lock);
1974 mutex_exit(&ill->ill_mcast_serializer);
1975 return (0);
1976 }
1977 ilg = conn_ilg_alloc(connp, &err);
1978 if (ilg == NULL) {
1979 rw_exit(&connp->conn_ilg_lock);
1980 mutex_exit(&ill->ill_mcast_serializer);
1981 return (err);
1982 }
1983 ilgstat = ILGSTAT_NEW;
1984 ilg->ilg_v6group = *group;
1985 ilg->ilg_ill = ill;
1986 ilg->ilg_ifaddr = ifaddr;
1987 ilg->ilg_ifindex = ifindex;
1988 } else if (leave_group) {
1989 /*
1990 * Make sure we have the correct serializer. The ill argument
1991 * might not match ilg_ill.
1992 */
1993 ilg_refhold(ilg);
1994 mutex_exit(&ill->ill_mcast_serializer);
1995 ill = ilg->ilg_ill;
1996 rw_exit(&connp->conn_ilg_lock);
1997
1998 mutex_enter(&ill->ill_mcast_serializer);
1999 rw_enter(&connp->conn_ilg_lock, RW_WRITER);
2000 ilm = ilg->ilg_ilm;
2001 ilg->ilg_ilm = NULL;
2002 ilg_delete(connp, ilg, NULL);
2003 ilg_refrele(ilg);
2004 rw_exit(&connp->conn_ilg_lock);
2005 if (ilm != NULL)
2006 (void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE);
2007 mutex_exit(&ill->ill_mcast_serializer);
2008 /*
2009 * Now that all locks have been dropped, we can send any
2010 * deferred/queued DLPI or IP packets
2011 */
2012 ill_mcast_send_queued(ill);
2013 ill_dlpi_send_queued(ill);
2014 return (0);
2015 } else {
2016 ilgstat = ILGSTAT_CHANGE;
2017 /* Preserve existing state in case ip_addmulti() fails */
2018 orig_fmode = ilg->ilg_fmode;
2019 if (ilg->ilg_filter == NULL) {
2020 orig_filter = NULL;
2021 } else {
2022 orig_filter = l_alloc_copy(ilg->ilg_filter);
2023 if (orig_filter == NULL) {
2024 rw_exit(&connp->conn_ilg_lock);
2025 mutex_exit(&ill->ill_mcast_serializer);
2026 return (ENOMEM);
2027 }
2028 }
2029 }
2030
2031 /*
2032 * Alloc buffer to copy new state into (see below) before
2033 * we make any changes, so we can bail if it fails.
2034 */
2035 if ((new_filter = l_alloc()) == NULL) {
2036 rw_exit(&connp->conn_ilg_lock);
2037 err = ENOMEM;
2038 goto free_and_exit;
2039 }
2040
2041 if (insrcs == 0) {
2042 CLEAR_SLIST(ilg->ilg_filter);
2043 } else {
2044 slist_t *fp;
2045 if (ilg->ilg_filter == NULL) {
2046 fp = l_alloc();
2047 if (fp == NULL) {
2048 if (ilgstat == ILGSTAT_NEW)
2049 ilg_delete(connp, ilg, NULL);
2050 rw_exit(&connp->conn_ilg_lock);
2051 err = ENOMEM;
2052 goto free_and_exit;
2053 }
2054 } else {
2055 fp = ilg->ilg_filter;
2056 }
2057 for (i = 0; i < insrcs; i++) {
2058 if (issin6) {
2059 sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i];
2060 fp->sl_addr[i] = sin6->sin6_addr;
2061 } else {
2062 if (is_v4only_api) {
2063 addrp = &imsf->imsf_slist[i];
2064 } else {
2065 sin = (struct sockaddr_in *)
2066 &gf->gf_slist[i];
2067 addrp = &sin->sin_addr;
2068 }
2069 IN6_INADDR_TO_V4MAPPED(addrp, &fp->sl_addr[i]);
2070 }
2071 }
2072 fp->sl_numsrc = insrcs;
2073 ilg->ilg_filter = fp;
2074 }
2075 /*
2076 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE
2077 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE.
2078 * So we need to translate here.
2079 */
2080 ilg->ilg_fmode = (infmode == MCAST_INCLUDE) ?
2081 MODE_IS_INCLUDE : MODE_IS_EXCLUDE;
2082
2083 /*
2084 * Save copy of ilg's filter state to pass to other functions,
2085 * so we can release conn_ilg_lock now.
2086 */
2087 new_fmode = ilg->ilg_fmode;
2088 l_copy(ilg->ilg_filter, new_filter);
2089
2090 rw_exit(&connp->conn_ilg_lock);
2091
2092 /*
2093 * Now update the ill. We wait to do this until after the ilg
2094 * has been updated because we need to update the src filter
2095 * info for the ill, which involves looking at the status of
2096 * all the ilgs associated with this group/interface pair.
2097 */
2098 ilm = ip_addmulti_serial(group, ill, connp->conn_zoneid, ilgstat,
2099 new_fmode, new_filter, &err);
2100
2101 rw_enter(&connp->conn_ilg_lock, RW_WRITER);
2102 /*
2103 * Must look up the ilg again since we've not been holding
2104 * conn_ilg_lock. The ilg could have disappeared due to an unplumb
2105 * having called conn_update_ill, which can run once we dropped the
2106 * conn_ilg_lock above.
2107 */
2108 ilg = ilg_lookup(connp, group, ifaddr, ifindex);
2109 if (ilg == NULL) {
2110 rw_exit(&connp->conn_ilg_lock);
2111 if (ilm != NULL) {
2112 (void) ip_delmulti_serial(ilm, B_FALSE,
2113 (ilgstat == ILGSTAT_NEW));
2114 }
2115 err = ENXIO;
2116 goto free_and_exit;
2117 }
2118
2119 if (ilm != NULL) {
2120 if (ilg->ilg_ill == NULL) {
2121 /* some other thread is re-attaching this. */
2122 rw_exit(&connp->conn_ilg_lock);
2123 (void) ip_delmulti_serial(ilm, B_FALSE,
2124 (ilgstat == ILGSTAT_NEW));
2125 err = 0;
2126 goto free_and_exit;
2127 }
2128 /* Succeeded. Update the ilg to point at the ilm */
2129 if (ilgstat == ILGSTAT_NEW) {
2130 if (ilg->ilg_ilm == NULL) {
2131 ilg->ilg_ilm = ilm;
2132 ilm->ilm_ifaddr = ifaddr; /* For netstat */
2133 } else {
2134 /* some other thread is re-attaching this. */
2135 rw_exit(&connp->conn_ilg_lock);
2136 (void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE);
2137 err = 0;
2138 goto free_and_exit;
2139 }
2140 } else {
2141 /*
2142 * ip_addmulti didn't get a held ilm for
2143 * ILGSTAT_CHANGE; ilm_refcnt was unchanged.
2144 */
2145 ASSERT(ilg->ilg_ilm == ilm);
2146 }
2147 } else {
2148 ASSERT(err != 0);
2149 /*
2150 * Failed to allocate the ilm.
2151 * Restore the original filter state, or delete the
2152 * newly-created ilg.
2153 * If ENETDOWN just clear ill_ilg since so that we
2154 * will rejoin when the ill comes back; don't report ENETDOWN
2155 * to application.
2156 */
2157 if (ilgstat == ILGSTAT_NEW) {
2158 if (err == ENETDOWN) {
2159 ilg->ilg_ill = NULL;
2160 err = 0;
2161 } else {
2162 ilg_delete(connp, ilg, NULL);
2163 }
2164 } else {
2165 ilg->ilg_fmode = orig_fmode;
2166 if (SLIST_IS_EMPTY(orig_filter)) {
2167 CLEAR_SLIST(ilg->ilg_filter);
2168 } else {
2169 /*
2170 * We didn't free the filter, even if we
2171 * were trying to make the source list empty;
2172 * so if orig_filter isn't empty, the ilg
2173 * must still have a filter alloc'd.
2174 */
2175 l_copy(orig_filter, ilg->ilg_filter);
2176 }
2177 }
2178 }
2179 rw_exit(&connp->conn_ilg_lock);
2180
2181 free_and_exit:
2182 mutex_exit(&ill->ill_mcast_serializer);
2183 ill_mcast_send_queued(ill);
2184 ill_dlpi_send_queued(ill);
2185 l_free(orig_filter);
2186 l_free(new_filter);
2187
2188 return (err);
2189 }
2190
2191 /*
2192 * Process the SIOC[GS]MSFILTER and SIOC[GS]IPMSFILTER ioctls.
2193 */
2194 /* ARGSUSED */
2195 int
ip_sioctl_msfilter(ipif_t * ipif,sin_t * dummy_sin,queue_t * q,mblk_t * mp,ip_ioctl_cmd_t * ipip,void * ifreq)2196 ip_sioctl_msfilter(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp,
2197 ip_ioctl_cmd_t *ipip, void *ifreq)
2198 {
2199 struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
2200 /* existence verified in ip_wput_nondata() */
2201 mblk_t *data_mp = mp->b_cont->b_cont;
2202 int datalen, err, cmd, minsize;
2203 uint_t expsize = 0;
2204 conn_t *connp;
2205 boolean_t isv6, is_v4only_api, getcmd;
2206 struct sockaddr_in *gsin;
2207 struct sockaddr_in6 *gsin6;
2208 ipaddr_t v4group;
2209 in6_addr_t v6group;
2210 struct group_filter *gf = NULL;
2211 struct ip_msfilter *imsf = NULL;
2212 mblk_t *ndp;
2213 ill_t *ill;
2214
2215 connp = Q_TO_CONN(q);
2216 err = ip_msfilter_ill(connp, mp, ipip, &ill);
2217 if (err != 0)
2218 return (err);
2219
2220 if (data_mp->b_cont != NULL) {
2221 if ((ndp = msgpullup(data_mp, -1)) == NULL)
2222 return (ENOMEM);
2223 freemsg(data_mp);
2224 data_mp = ndp;
2225 mp->b_cont->b_cont = data_mp;
2226 }
2227
2228 cmd = iocp->ioc_cmd;
2229 getcmd = (cmd == SIOCGIPMSFILTER || cmd == SIOCGMSFILTER);
2230 is_v4only_api = (cmd == SIOCGIPMSFILTER || cmd == SIOCSIPMSFILTER);
2231 minsize = (is_v4only_api) ? IP_MSFILTER_SIZE(0) : GROUP_FILTER_SIZE(0);
2232 datalen = MBLKL(data_mp);
2233
2234 if (datalen < minsize)
2235 return (EINVAL);
2236
2237 /*
2238 * now we know we have at least have the initial structure,
2239 * but need to check for the source list array.
2240 */
2241 if (is_v4only_api) {
2242 imsf = (struct ip_msfilter *)data_mp->b_rptr;
2243 isv6 = B_FALSE;
2244 expsize = IP_MSFILTER_SIZE(imsf->imsf_numsrc);
2245 } else {
2246 gf = (struct group_filter *)data_mp->b_rptr;
2247 if (gf->gf_group.ss_family == AF_INET6) {
2248 gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2249 isv6 = !(IN6_IS_ADDR_V4MAPPED(&gsin6->sin6_addr));
2250 } else {
2251 isv6 = B_FALSE;
2252 }
2253 expsize = GROUP_FILTER_SIZE(gf->gf_numsrc);
2254 }
2255 if (datalen < expsize)
2256 return (EINVAL);
2257
2258 if (isv6) {
2259 gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2260 v6group = gsin6->sin6_addr;
2261 if (getcmd) {
2262 err = ip_get_srcfilter(connp, gf, NULL, &v6group,
2263 B_TRUE);
2264 } else {
2265 err = ip_set_srcfilter(connp, gf, NULL, &v6group, ill,
2266 B_TRUE);
2267 }
2268 } else {
2269 boolean_t issin6 = B_FALSE;
2270 if (is_v4only_api) {
2271 v4group = (ipaddr_t)imsf->imsf_multiaddr.s_addr;
2272 IN6_IPADDR_TO_V4MAPPED(v4group, &v6group);
2273 } else {
2274 if (gf->gf_group.ss_family == AF_INET) {
2275 gsin = (struct sockaddr_in *)&gf->gf_group;
2276 v4group = (ipaddr_t)gsin->sin_addr.s_addr;
2277 IN6_IPADDR_TO_V4MAPPED(v4group, &v6group);
2278 } else {
2279 gsin6 = (struct sockaddr_in6 *)&gf->gf_group;
2280 IN6_V4MAPPED_TO_IPADDR(&gsin6->sin6_addr,
2281 v4group);
2282 issin6 = B_TRUE;
2283 }
2284 }
2285 /*
2286 * INADDR_ANY is represented as the IPv6 unspecifed addr.
2287 */
2288 if (v4group == INADDR_ANY)
2289 v6group = ipv6_all_zeros;
2290 else
2291 IN6_IPADDR_TO_V4MAPPED(v4group, &v6group);
2292
2293 if (getcmd) {
2294 err = ip_get_srcfilter(connp, gf, imsf, &v6group,
2295 issin6);
2296 } else {
2297 err = ip_set_srcfilter(connp, gf, imsf, &v6group, ill,
2298 issin6);
2299 }
2300 }
2301 ill_refrele(ill);
2302
2303 return (err);
2304 }
2305
2306 /*
2307 * Determine the ill for the SIOC*MSFILTER ioctls
2308 *
2309 * Returns an error for IS_UNDER_IPMP interfaces.
2310 *
2311 * Finds the ill based on information in the ioctl headers.
2312 */
2313 static int
ip_msfilter_ill(conn_t * connp,mblk_t * mp,const ip_ioctl_cmd_t * ipip,ill_t ** illp)2314 ip_msfilter_ill(conn_t *connp, mblk_t *mp, const ip_ioctl_cmd_t *ipip,
2315 ill_t **illp)
2316 {
2317 int cmd = ipip->ipi_cmd;
2318 int err = 0;
2319 ill_t *ill;
2320 /* caller has verified this mblk exists */
2321 char *dbuf = (char *)mp->b_cont->b_cont->b_rptr;
2322 struct ip_msfilter *imsf;
2323 struct group_filter *gf;
2324 ipaddr_t v4addr, v4group;
2325 in6_addr_t v6group;
2326 uint32_t index;
2327 ip_stack_t *ipst;
2328
2329 ipst = connp->conn_netstack->netstack_ip;
2330
2331 *illp = NULL;
2332
2333 /* don't allow multicast operations on a tcp conn */
2334 if (IPCL_IS_TCP(connp))
2335 return (ENOPROTOOPT);
2336
2337 if (cmd == SIOCSIPMSFILTER || cmd == SIOCGIPMSFILTER) {
2338 /* don't allow v4-specific ioctls on v6 socket */
2339 if (connp->conn_family == AF_INET6)
2340 return (EAFNOSUPPORT);
2341
2342 imsf = (struct ip_msfilter *)dbuf;
2343 v4addr = imsf->imsf_interface.s_addr;
2344 v4group = imsf->imsf_multiaddr.s_addr;
2345 IN6_IPADDR_TO_V4MAPPED(v4group, &v6group);
2346 ill = ill_mcast_lookup(&v6group, v4addr, 0, IPCL_ZONEID(connp),
2347 ipst, &err);
2348 if (ill == NULL && v4addr != INADDR_ANY)
2349 err = ENXIO;
2350 } else {
2351 gf = (struct group_filter *)dbuf;
2352 index = gf->gf_interface;
2353 if (gf->gf_group.ss_family == AF_INET6) {
2354 struct sockaddr_in6 *sin6;
2355
2356 sin6 = (struct sockaddr_in6 *)&gf->gf_group;
2357 v6group = sin6->sin6_addr;
2358 } else if (gf->gf_group.ss_family == AF_INET) {
2359 struct sockaddr_in *sin;
2360
2361 sin = (struct sockaddr_in *)&gf->gf_group;
2362 v4group = sin->sin_addr.s_addr;
2363 IN6_IPADDR_TO_V4MAPPED(v4group, &v6group);
2364 } else {
2365 return (EAFNOSUPPORT);
2366 }
2367 ill = ill_mcast_lookup(&v6group, INADDR_ANY, index,
2368 IPCL_ZONEID(connp), ipst, &err);
2369 }
2370 *illp = ill;
2371 return (err);
2372 }
2373
2374 /*
2375 * The structures used for the SIOC*MSFILTER ioctls usually must be copied
2376 * in in two stages, as the first copyin tells us the size of the attached
2377 * source buffer. This function is called by ip_wput_nondata() after the
2378 * first copyin has completed; it figures out how big the second stage
2379 * needs to be, and kicks it off.
2380 *
2381 * In some cases (numsrc < 2), the second copyin is not needed as the
2382 * first one gets a complete structure containing 1 source addr.
2383 *
2384 * The function returns 0 if a second copyin has been started (i.e. there's
2385 * no more work to be done right now), or 1 if the second copyin is not
2386 * needed and ip_wput_nondata() can continue its processing.
2387 */
2388 int
ip_copyin_msfilter(queue_t * q,mblk_t * mp)2389 ip_copyin_msfilter(queue_t *q, mblk_t *mp)
2390 {
2391 struct iocblk *iocp = (struct iocblk *)mp->b_rptr;
2392 int cmd = iocp->ioc_cmd;
2393 /* validity of this checked in ip_wput_nondata() */
2394 mblk_t *mp1 = mp->b_cont->b_cont;
2395 int copysize = 0;
2396 int offset;
2397
2398 if (cmd == SIOCSMSFILTER || cmd == SIOCGMSFILTER) {
2399 struct group_filter *gf = (struct group_filter *)mp1->b_rptr;
2400 if (gf->gf_numsrc >= 2) {
2401 offset = sizeof (struct group_filter);
2402 copysize = GROUP_FILTER_SIZE(gf->gf_numsrc) - offset;
2403 }
2404 } else {
2405 struct ip_msfilter *imsf = (struct ip_msfilter *)mp1->b_rptr;
2406 if (imsf->imsf_numsrc >= 2) {
2407 offset = sizeof (struct ip_msfilter);
2408 copysize = IP_MSFILTER_SIZE(imsf->imsf_numsrc) - offset;
2409 }
2410 }
2411 if (copysize > 0) {
2412 mi_copyin_n(q, mp, offset, copysize);
2413 return (0);
2414 }
2415 return (1);
2416 }
2417
2418 /*
2419 * Handle the following optmgmt:
2420 * IP_ADD_MEMBERSHIP must not have joined already
2421 * IPV6_JOIN_GROUP must not have joined already
2422 * MCAST_JOIN_GROUP must not have joined already
2423 * IP_BLOCK_SOURCE must have joined already
2424 * MCAST_BLOCK_SOURCE must have joined already
2425 * IP_JOIN_SOURCE_GROUP may have joined already
2426 * MCAST_JOIN_SOURCE_GROUP may have joined already
2427 *
2428 * fmode and src parameters may be used to determine which option is
2429 * being set, as follows (IPV6_JOIN_GROUP and MCAST_JOIN_GROUP options
2430 * are functionally equivalent):
2431 * opt fmode v6src
2432 * IP_ADD_MEMBERSHIP MODE_IS_EXCLUDE unspecified
2433 * IPV6_JOIN_GROUP MODE_IS_EXCLUDE unspecified
2434 * MCAST_JOIN_GROUP MODE_IS_EXCLUDE unspecified
2435 * IP_BLOCK_SOURCE MODE_IS_EXCLUDE IPv4-mapped addr
2436 * MCAST_BLOCK_SOURCE MODE_IS_EXCLUDE v6 addr
2437 * IP_JOIN_SOURCE_GROUP MODE_IS_INCLUDE IPv4-mapped addr
2438 * MCAST_JOIN_SOURCE_GROUP MODE_IS_INCLUDE v6 addr
2439 *
2440 * Changing the filter mode is not allowed; if a matching ilg already
2441 * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
2442 *
2443 * Verifies that there is a source address of appropriate scope for
2444 * the group; if not, EADDRNOTAVAIL is returned.
2445 *
2446 * The interface to be used may be identified by an IPv4 address or by an
2447 * interface index.
2448 *
2449 * Handles IPv4-mapped IPv6 multicast addresses by associating them
2450 * with the IPv4 address. Assumes that if v6group is v4-mapped,
2451 * v6src is also v4-mapped.
2452 */
2453 int
ip_opt_add_group(conn_t * connp,boolean_t checkonly,const in6_addr_t * v6group,ipaddr_t ifaddr,uint_t ifindex,mcast_record_t fmode,const in6_addr_t * v6src)2454 ip_opt_add_group(conn_t *connp, boolean_t checkonly,
2455 const in6_addr_t *v6group, ipaddr_t ifaddr, uint_t ifindex,
2456 mcast_record_t fmode, const in6_addr_t *v6src)
2457 {
2458 ill_t *ill;
2459 char buf[INET6_ADDRSTRLEN];
2460 int err;
2461
2462 err = ip_opt_check(connp, v6group, v6src, ifaddr, ifindex, &ill);
2463 if (err != 0) {
2464 ip1dbg(("ip_opt_add_group: no ill for group %s/"
2465 "index %d\n", inet_ntop(AF_INET6, v6group, buf,
2466 sizeof (buf)), ifindex));
2467 return (err);
2468 }
2469
2470 if (checkonly) {
2471 /*
2472 * do not do operation, just pretend to - new T_CHECK
2473 * semantics. The error return case above if encountered
2474 * considered a good enough "check" here.
2475 */
2476 ill_refrele(ill);
2477 return (0);
2478 }
2479 mutex_enter(&ill->ill_mcast_serializer);
2480 /*
2481 * Multicast groups may not be joined on interfaces that are either
2482 * already underlying interfaces in an IPMP group, or in the process
2483 * of joining the IPMP group. The latter condition is enforced by
2484 * checking the value of ill->ill_grp_pending under the
2485 * ill_mcast_serializer lock. We cannot serialize the
2486 * ill_grp_pending check on the ill_g_lock across ilg_add() because
2487 * ill_mcast_send_queued -> ip_output_simple -> ill_lookup_on_ifindex
2488 * will take the ill_g_lock itself. Instead, we hold the
2489 * ill_mcast_serializer.
2490 */
2491 if (ill->ill_grp_pending || IS_UNDER_IPMP(ill)) {
2492 DTRACE_PROBE2(group__add__on__under, ill_t *, ill,
2493 in6_addr_t *, v6group);
2494 mutex_exit(&ill->ill_mcast_serializer);
2495 ill_refrele(ill);
2496 return (EADDRNOTAVAIL);
2497 }
2498 err = ilg_add(connp, v6group, ifaddr, ifindex, ill, fmode, v6src);
2499 mutex_exit(&ill->ill_mcast_serializer);
2500 /*
2501 * We have done an addmulti_impl and/or delmulti_impl.
2502 * All locks have been dropped, we can send any
2503 * deferred/queued DLPI or IP packets
2504 */
2505 ill_mcast_send_queued(ill);
2506 ill_dlpi_send_queued(ill);
2507 ill_refrele(ill);
2508 return (err);
2509 }
2510
2511 /*
2512 * Common for IPv6 and IPv4.
2513 * Here we handle ilgs that are still attached to their original ill
2514 * (the one ifaddr/ifindex points at), as well as detached ones.
2515 * The detached ones might have been attached to some other ill.
2516 */
2517 static int
ip_opt_delete_group_excl(conn_t * connp,const in6_addr_t * v6group,ipaddr_t ifaddr,uint_t ifindex,mcast_record_t fmode,const in6_addr_t * v6src)2518 ip_opt_delete_group_excl(conn_t *connp, const in6_addr_t *v6group,
2519 ipaddr_t ifaddr, uint_t ifindex, mcast_record_t fmode,
2520 const in6_addr_t *v6src)
2521 {
2522 ilg_t *ilg;
2523 boolean_t leaving;
2524 ilm_t *ilm;
2525 ill_t *ill;
2526 int err = 0;
2527
2528 retry:
2529 rw_enter(&connp->conn_ilg_lock, RW_WRITER);
2530 ilg = ilg_lookup(connp, v6group, ifaddr, ifindex);
2531 if (ilg == NULL) {
2532 rw_exit(&connp->conn_ilg_lock);
2533 /*
2534 * Since we didn't have any ilg we now do the error checks
2535 * to determine the best errno.
2536 */
2537 err = ip_opt_check(connp, v6group, v6src, ifaddr, ifindex,
2538 &ill);
2539 if (ill != NULL) {
2540 /* The only error was a missing ilg for the group */
2541 ill_refrele(ill);
2542 err = EADDRNOTAVAIL;
2543 }
2544 return (err);
2545 }
2546
2547 /* If the ilg is attached then we serialize using that ill */
2548 ill = ilg->ilg_ill;
2549 if (ill != NULL) {
2550 /* Prevent the ill and ilg from being freed */
2551 ill_refhold(ill);
2552 ilg_refhold(ilg);
2553 rw_exit(&connp->conn_ilg_lock);
2554 mutex_enter(&ill->ill_mcast_serializer);
2555 rw_enter(&connp->conn_ilg_lock, RW_WRITER);
2556 if (ilg->ilg_condemned) {
2557 /* Disappeared */
2558 ilg_refrele(ilg);
2559 rw_exit(&connp->conn_ilg_lock);
2560 mutex_exit(&ill->ill_mcast_serializer);
2561 ill_refrele(ill);
2562 goto retry;
2563 }
2564 }
2565
2566 /*
2567 * Decide if we're actually deleting the ilg or just removing a
2568 * source filter address; if just removing an addr, make sure we
2569 * aren't trying to change the filter mode, and that the addr is
2570 * actually in our filter list already. If we're removing the
2571 * last src in an include list, just delete the ilg.
2572 */
2573 if (IN6_IS_ADDR_UNSPECIFIED(v6src)) {
2574 leaving = B_TRUE;
2575 } else {
2576 if (fmode != ilg->ilg_fmode)
2577 err = EINVAL;
2578 else if (ilg->ilg_filter == NULL ||
2579 !list_has_addr(ilg->ilg_filter, v6src))
2580 err = EADDRNOTAVAIL;
2581 if (err != 0) {
2582 if (ill != NULL)
2583 ilg_refrele(ilg);
2584 rw_exit(&connp->conn_ilg_lock);
2585 goto done;
2586 }
2587 if (fmode == MODE_IS_INCLUDE &&
2588 ilg->ilg_filter->sl_numsrc == 1) {
2589 leaving = B_TRUE;
2590 v6src = NULL;
2591 } else {
2592 leaving = B_FALSE;
2593 }
2594 }
2595 ilm = ilg->ilg_ilm;
2596 if (leaving)
2597 ilg->ilg_ilm = NULL;
2598
2599 ilg_delete(connp, ilg, v6src);
2600 if (ill != NULL)
2601 ilg_refrele(ilg);
2602 rw_exit(&connp->conn_ilg_lock);
2603
2604 if (ilm != NULL) {
2605 ASSERT(ill != NULL);
2606 (void) ip_delmulti_serial(ilm, B_FALSE, leaving);
2607 }
2608 done:
2609 if (ill != NULL) {
2610 mutex_exit(&ill->ill_mcast_serializer);
2611 /*
2612 * Now that all locks have been dropped, we can
2613 * send any deferred/queued DLPI or IP packets
2614 */
2615 ill_mcast_send_queued(ill);
2616 ill_dlpi_send_queued(ill);
2617 ill_refrele(ill);
2618 }
2619 return (err);
2620 }
2621
2622 /*
2623 * Handle the following optmgmt:
2624 * IP_DROP_MEMBERSHIP will leave
2625 * IPV6_LEAVE_GROUP will leave
2626 * MCAST_LEAVE_GROUP will leave
2627 * IP_UNBLOCK_SOURCE will not leave
2628 * MCAST_UNBLOCK_SOURCE will not leave
2629 * IP_LEAVE_SOURCE_GROUP may leave (if leaving last source)
2630 * MCAST_LEAVE_SOURCE_GROUP may leave (if leaving last source)
2631 *
2632 * fmode and src parameters may be used to determine which option is
2633 * being set, as follows:
2634 * opt fmode v6src
2635 * IP_DROP_MEMBERSHIP MODE_IS_INCLUDE unspecified
2636 * IPV6_LEAVE_GROUP MODE_IS_INCLUDE unspecified
2637 * MCAST_LEAVE_GROUP MODE_IS_INCLUDE unspecified
2638 * IP_UNBLOCK_SOURCE MODE_IS_EXCLUDE IPv4-mapped addr
2639 * MCAST_UNBLOCK_SOURCE MODE_IS_EXCLUDE v6 addr
2640 * IP_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE IPv4-mapped addr
2641 * MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE v6 addr
2642 *
2643 * Changing the filter mode is not allowed; if a matching ilg already
2644 * exists and fmode != ilg->ilg_fmode, EINVAL is returned.
2645 *
2646 * The interface to be used may be identified by an IPv4 address or by an
2647 * interface index.
2648 *
2649 * Handles IPv4-mapped IPv6 multicast addresses by associating them
2650 * with the IPv4 address. Assumes that if v6group is v4-mapped,
2651 * v6src is also v4-mapped.
2652 */
2653 int
ip_opt_delete_group(conn_t * connp,boolean_t checkonly,const in6_addr_t * v6group,ipaddr_t ifaddr,uint_t ifindex,mcast_record_t fmode,const in6_addr_t * v6src)2654 ip_opt_delete_group(conn_t *connp, boolean_t checkonly,
2655 const in6_addr_t *v6group, ipaddr_t ifaddr, uint_t ifindex,
2656 mcast_record_t fmode, const in6_addr_t *v6src)
2657 {
2658
2659 /*
2660 * In the normal case below we don't check for the ill existing.
2661 * Instead we look for an existing ilg in _excl.
2662 * If checkonly we sanity check the arguments
2663 */
2664 if (checkonly) {
2665 ill_t *ill;
2666 int err;
2667
2668 err = ip_opt_check(connp, v6group, v6src, ifaddr, ifindex,
2669 &ill);
2670 /*
2671 * do not do operation, just pretend to - new T_CHECK semantics.
2672 * ip_opt_check is considered a good enough "check" here.
2673 */
2674 if (ill != NULL)
2675 ill_refrele(ill);
2676 return (err);
2677 }
2678 return (ip_opt_delete_group_excl(connp, v6group, ifaddr, ifindex,
2679 fmode, v6src));
2680 }
2681
2682 /*
2683 * Group mgmt for upper conn that passes things down
2684 * to the interface multicast list (and DLPI)
2685 * These routines can handle new style options that specify an interface name
2686 * as opposed to an interface address (needed for general handling of
2687 * unnumbered interfaces.)
2688 */
2689
2690 /*
2691 * Add a group to an upper conn group data structure and pass things down
2692 * to the interface multicast list (and DLPI)
2693 * Common for IPv4 and IPv6; for IPv4 we can have an ifaddr.
2694 */
2695 static int
ilg_add(conn_t * connp,const in6_addr_t * v6group,ipaddr_t ifaddr,uint_t ifindex,ill_t * ill,mcast_record_t fmode,const in6_addr_t * v6src)2696 ilg_add(conn_t *connp, const in6_addr_t *v6group, ipaddr_t ifaddr,
2697 uint_t ifindex, ill_t *ill, mcast_record_t fmode, const in6_addr_t *v6src)
2698 {
2699 int error = 0;
2700 ilg_t *ilg;
2701 ilg_stat_t ilgstat;
2702 slist_t *new_filter = NULL;
2703 int new_fmode;
2704 ilm_t *ilm;
2705
2706 if (!(ill->ill_flags & ILLF_MULTICAST))
2707 return (EADDRNOTAVAIL);
2708
2709 /* conn_ilg_lock protects the ilg list. */
2710 ASSERT(MUTEX_HELD(&ill->ill_mcast_serializer));
2711 rw_enter(&connp->conn_ilg_lock, RW_WRITER);
2712 ilg = ilg_lookup(connp, v6group, ifaddr, ifindex);
2713
2714 /*
2715 * Depending on the option we're handling, may or may not be okay
2716 * if group has already been added. Figure out our rules based
2717 * on fmode and src params. Also make sure there's enough room
2718 * in the filter if we're adding a source to an existing filter.
2719 */
2720 if (IN6_IS_ADDR_UNSPECIFIED(v6src)) {
2721 /* we're joining for all sources, must not have joined */
2722 if (ilg != NULL)
2723 error = EADDRINUSE;
2724 } else {
2725 if (fmode == MODE_IS_EXCLUDE) {
2726 /* (excl {addr}) => block source, must have joined */
2727 if (ilg == NULL)
2728 error = EADDRNOTAVAIL;
2729 }
2730 /* (incl {addr}) => join source, may have joined */
2731
2732 if (ilg != NULL &&
2733 SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE)
2734 error = ENOBUFS;
2735 }
2736 if (error != 0) {
2737 rw_exit(&connp->conn_ilg_lock);
2738 return (error);
2739 }
2740
2741 /*
2742 * Alloc buffer to copy new state into (see below) before
2743 * we make any changes, so we can bail if it fails.
2744 */
2745 if ((new_filter = l_alloc()) == NULL) {
2746 rw_exit(&connp->conn_ilg_lock);
2747 return (ENOMEM);
2748 }
2749
2750 if (ilg == NULL) {
2751 if ((ilg = conn_ilg_alloc(connp, &error)) == NULL) {
2752 rw_exit(&connp->conn_ilg_lock);
2753 l_free(new_filter);
2754 return (error);
2755 }
2756 ilg->ilg_ifindex = ifindex;
2757 ilg->ilg_ifaddr = ifaddr;
2758 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) {
2759 ilg->ilg_filter = l_alloc();
2760 if (ilg->ilg_filter == NULL) {
2761 ilg_delete(connp, ilg, NULL);
2762 rw_exit(&connp->conn_ilg_lock);
2763 l_free(new_filter);
2764 return (ENOMEM);
2765 }
2766 ilg->ilg_filter->sl_numsrc = 1;
2767 ilg->ilg_filter->sl_addr[0] = *v6src;
2768 }
2769 ilgstat = ILGSTAT_NEW;
2770 ilg->ilg_v6group = *v6group;
2771 ilg->ilg_fmode = fmode;
2772 ilg->ilg_ill = ill;
2773 } else {
2774 int index;
2775
2776 if (ilg->ilg_fmode != fmode || IN6_IS_ADDR_UNSPECIFIED(v6src)) {
2777 rw_exit(&connp->conn_ilg_lock);
2778 l_free(new_filter);
2779 return (EINVAL);
2780 }
2781 if (ilg->ilg_filter == NULL) {
2782 ilg->ilg_filter = l_alloc();
2783 if (ilg->ilg_filter == NULL) {
2784 rw_exit(&connp->conn_ilg_lock);
2785 l_free(new_filter);
2786 return (ENOMEM);
2787 }
2788 }
2789 if (list_has_addr(ilg->ilg_filter, v6src)) {
2790 rw_exit(&connp->conn_ilg_lock);
2791 l_free(new_filter);
2792 return (EADDRNOTAVAIL);
2793 }
2794 ilgstat = ILGSTAT_CHANGE;
2795 index = ilg->ilg_filter->sl_numsrc++;
2796 ilg->ilg_filter->sl_addr[index] = *v6src;
2797 }
2798
2799 /*
2800 * Save copy of ilg's filter state to pass to other functions,
2801 * so we can release conn_ilg_lock now.
2802 */
2803 new_fmode = ilg->ilg_fmode;
2804 l_copy(ilg->ilg_filter, new_filter);
2805
2806 rw_exit(&connp->conn_ilg_lock);
2807
2808 /*
2809 * Now update the ill. We wait to do this until after the ilg
2810 * has been updated because we need to update the src filter
2811 * info for the ill, which involves looking at the status of
2812 * all the ilgs associated with this group/interface pair.
2813 */
2814 ilm = ip_addmulti_serial(v6group, ill, connp->conn_zoneid, ilgstat,
2815 new_fmode, new_filter, &error);
2816
2817 rw_enter(&connp->conn_ilg_lock, RW_WRITER);
2818 /*
2819 * Must look up the ilg again since we've not been holding
2820 * conn_ilg_lock. The ilg could have disappeared due to an unplumb
2821 * having called conn_update_ill, which can run once we dropped the
2822 * conn_ilg_lock above.
2823 */
2824 ilg = ilg_lookup(connp, v6group, ifaddr, ifindex);
2825 if (ilg == NULL) {
2826 rw_exit(&connp->conn_ilg_lock);
2827 if (ilm != NULL) {
2828 (void) ip_delmulti_serial(ilm, B_FALSE,
2829 (ilgstat == ILGSTAT_NEW));
2830 }
2831 error = ENXIO;
2832 goto free_and_exit;
2833 }
2834 if (ilm != NULL) {
2835 if (ilg->ilg_ill == NULL) {
2836 /* some other thread is re-attaching this. */
2837 rw_exit(&connp->conn_ilg_lock);
2838 (void) ip_delmulti_serial(ilm, B_FALSE,
2839 (ilgstat == ILGSTAT_NEW));
2840 error = 0;
2841 goto free_and_exit;
2842 }
2843 /* Succeeded. Update the ilg to point at the ilm */
2844 if (ilgstat == ILGSTAT_NEW) {
2845 if (ilg->ilg_ilm == NULL) {
2846 ilg->ilg_ilm = ilm;
2847 ilm->ilm_ifaddr = ifaddr; /* For netstat */
2848 } else {
2849 /* some other thread is re-attaching this. */
2850 rw_exit(&connp->conn_ilg_lock);
2851 (void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE);
2852 error = 0;
2853 goto free_and_exit;
2854 }
2855 } else {
2856 /*
2857 * ip_addmulti didn't get a held ilm for
2858 * ILGSTAT_CHANGE; ilm_refcnt was unchanged.
2859 */
2860 ASSERT(ilg->ilg_ilm == ilm);
2861 }
2862 } else {
2863 ASSERT(error != 0);
2864 /*
2865 * Failed to allocate the ilm.
2866 * Need to undo what we did before calling ip_addmulti()
2867 * If ENETDOWN just clear ill_ilg since so that we
2868 * will rejoin when the ill comes back; don't report ENETDOWN
2869 * to application.
2870 */
2871 if (ilgstat == ILGSTAT_NEW && error == ENETDOWN) {
2872 ilg->ilg_ill = NULL;
2873 error = 0;
2874 } else {
2875 in6_addr_t delsrc =
2876 (ilgstat == ILGSTAT_NEW) ? ipv6_all_zeros : *v6src;
2877
2878 ilg_delete(connp, ilg, &delsrc);
2879 }
2880 }
2881 rw_exit(&connp->conn_ilg_lock);
2882
2883 free_and_exit:
2884 l_free(new_filter);
2885 return (error);
2886 }
2887
2888 /*
2889 * Find an IPv4 ilg matching group, ill and source.
2890 * The group and source can't be INADDR_ANY here so no need to translate to
2891 * the unspecified IPv6 address.
2892 */
2893 boolean_t
conn_hasmembers_ill_withsrc_v4(conn_t * connp,ipaddr_t group,ipaddr_t src,ill_t * ill)2894 conn_hasmembers_ill_withsrc_v4(conn_t *connp, ipaddr_t group, ipaddr_t src,
2895 ill_t *ill)
2896 {
2897 in6_addr_t v6group, v6src;
2898 int i;
2899 boolean_t isinlist;
2900 ilg_t *ilg;
2901
2902 rw_enter(&connp->conn_ilg_lock, RW_READER);
2903 IN6_IPADDR_TO_V4MAPPED(group, &v6group);
2904 for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) {
2905 if (ilg->ilg_condemned)
2906 continue;
2907
2908 /* ilg_ill could be NULL if an add is in progress */
2909 if (ilg->ilg_ill != ill)
2910 continue;
2911
2912 /* The callers use upper ill for IPMP */
2913 ASSERT(!IS_UNDER_IPMP(ill));
2914 if (IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, &v6group)) {
2915 if (SLIST_IS_EMPTY(ilg->ilg_filter)) {
2916 /* no source filter, so this is a match */
2917 rw_exit(&connp->conn_ilg_lock);
2918 return (B_TRUE);
2919 }
2920 break;
2921 }
2922 }
2923 if (ilg == NULL) {
2924 rw_exit(&connp->conn_ilg_lock);
2925 return (B_FALSE);
2926 }
2927
2928 /*
2929 * we have an ilg with matching ill and group; but
2930 * the ilg has a source list that we must check.
2931 */
2932 IN6_IPADDR_TO_V4MAPPED(src, &v6src);
2933 isinlist = B_FALSE;
2934 for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) {
2935 if (IN6_ARE_ADDR_EQUAL(&v6src, &ilg->ilg_filter->sl_addr[i])) {
2936 isinlist = B_TRUE;
2937 break;
2938 }
2939 }
2940
2941 if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) ||
2942 (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE)) {
2943 rw_exit(&connp->conn_ilg_lock);
2944 return (B_TRUE);
2945 }
2946 rw_exit(&connp->conn_ilg_lock);
2947 return (B_FALSE);
2948 }
2949
2950 /*
2951 * Find an IPv6 ilg matching group, ill, and source
2952 */
2953 boolean_t
conn_hasmembers_ill_withsrc_v6(conn_t * connp,const in6_addr_t * v6group,const in6_addr_t * v6src,ill_t * ill)2954 conn_hasmembers_ill_withsrc_v6(conn_t *connp, const in6_addr_t *v6group,
2955 const in6_addr_t *v6src, ill_t *ill)
2956 {
2957 int i;
2958 boolean_t isinlist;
2959 ilg_t *ilg;
2960
2961 rw_enter(&connp->conn_ilg_lock, RW_READER);
2962 for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) {
2963 if (ilg->ilg_condemned)
2964 continue;
2965
2966 /* ilg_ill could be NULL if an add is in progress */
2967 if (ilg->ilg_ill != ill)
2968 continue;
2969
2970 /* The callers use upper ill for IPMP */
2971 ASSERT(!IS_UNDER_IPMP(ill));
2972 if (IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) {
2973 if (SLIST_IS_EMPTY(ilg->ilg_filter)) {
2974 /* no source filter, so this is a match */
2975 rw_exit(&connp->conn_ilg_lock);
2976 return (B_TRUE);
2977 }
2978 break;
2979 }
2980 }
2981 if (ilg == NULL) {
2982 rw_exit(&connp->conn_ilg_lock);
2983 return (B_FALSE);
2984 }
2985
2986 /*
2987 * we have an ilg with matching ill and group; but
2988 * the ilg has a source list that we must check.
2989 */
2990 isinlist = B_FALSE;
2991 for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) {
2992 if (IN6_ARE_ADDR_EQUAL(v6src, &ilg->ilg_filter->sl_addr[i])) {
2993 isinlist = B_TRUE;
2994 break;
2995 }
2996 }
2997
2998 if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) ||
2999 (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE)) {
3000 rw_exit(&connp->conn_ilg_lock);
3001 return (B_TRUE);
3002 }
3003 rw_exit(&connp->conn_ilg_lock);
3004 return (B_FALSE);
3005 }
3006
3007 /*
3008 * Find an ilg matching group and ifaddr/ifindex.
3009 * We check both ifaddr and ifindex even though at most one of them
3010 * will be non-zero; that way we always find the right one.
3011 */
3012 static ilg_t *
ilg_lookup(conn_t * connp,const in6_addr_t * v6group,ipaddr_t ifaddr,uint_t ifindex)3013 ilg_lookup(conn_t *connp, const in6_addr_t *v6group, ipaddr_t ifaddr,
3014 uint_t ifindex)
3015 {
3016 ilg_t *ilg;
3017
3018 ASSERT(RW_LOCK_HELD(&connp->conn_ilg_lock));
3019
3020 for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) {
3021 if (ilg->ilg_condemned)
3022 continue;
3023
3024 if (ilg->ilg_ifaddr == ifaddr &&
3025 ilg->ilg_ifindex == ifindex &&
3026 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group))
3027 return (ilg);
3028 }
3029 return (NULL);
3030 }
3031
3032 /*
3033 * If a source address is passed in (src != NULL and src is not
3034 * unspecified), remove the specified src addr from the given ilg's
3035 * filter list, else delete the ilg.
3036 */
3037 static void
ilg_delete(conn_t * connp,ilg_t * ilg,const in6_addr_t * src)3038 ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src)
3039 {
3040 ASSERT(RW_WRITE_HELD(&connp->conn_ilg_lock));
3041 ASSERT(ilg->ilg_ptpn != NULL);
3042 ASSERT(!ilg->ilg_condemned);
3043
3044 if (src == NULL || IN6_IS_ADDR_UNSPECIFIED(src)) {
3045 FREE_SLIST(ilg->ilg_filter);
3046 ilg->ilg_filter = NULL;
3047
3048 ASSERT(ilg->ilg_ilm == NULL);
3049 ilg->ilg_ill = NULL;
3050 ilg->ilg_condemned = B_TRUE;
3051
3052 /* ilg_inactive will unlink from the list */
3053 ilg_refrele(ilg);
3054 } else {
3055 l_remove(ilg->ilg_filter, src);
3056 }
3057 }
3058
3059 /*
3060 * Called from conn close. No new ilg can be added or removed
3061 * because CONN_CLOSING has been set by ip_close. ilg_add / ilg_delete
3062 * will return error if conn has started closing.
3063 *
3064 * We handle locking as follows.
3065 * Under conn_ilg_lock we get the first ilg. As we drop the conn_ilg_lock to
3066 * proceed with the ilm part of the delete we hold a reference on both the ill
3067 * and the ilg. This doesn't prevent changes to the ilg, but prevents it from
3068 * being deleted.
3069 *
3070 * Since the ilg_add code path uses two locks (conn_ilg_lock for the ilg part,
3071 * and ill_mcast_lock for the ip_addmulti part) we can run at a point between
3072 * the two. At that point ilg_ill is set, but ilg_ilm hasn't yet been set. In
3073 * that case we delete the ilg here, which makes ilg_add discover that the ilg
3074 * has disappeared when ip_addmulti returns, so it will discard the ilm it just
3075 * added.
3076 */
3077 void
ilg_delete_all(conn_t * connp)3078 ilg_delete_all(conn_t *connp)
3079 {
3080 ilg_t *ilg, *next_ilg, *held_ilg;
3081 ilm_t *ilm;
3082 ill_t *ill;
3083 boolean_t need_refrele;
3084
3085 /*
3086 * Can not run if there is a conn_update_ill already running.
3087 * Wait for it to complete. Caller should have already set CONN_CLOSING
3088 * which prevents any new threads to run in conn_update_ill.
3089 */
3090 mutex_enter(&connp->conn_lock);
3091 ASSERT(connp->conn_state_flags & CONN_CLOSING);
3092 while (connp->conn_state_flags & CONN_UPDATE_ILL)
3093 cv_wait(&connp->conn_cv, &connp->conn_lock);
3094 mutex_exit(&connp->conn_lock);
3095
3096 rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3097 ilg = connp->conn_ilg;
3098 held_ilg = NULL;
3099 while (ilg != NULL) {
3100 if (ilg->ilg_condemned) {
3101 ilg = ilg->ilg_next;
3102 continue;
3103 }
3104 /* If the ilg is detached then no need to serialize */
3105 if (ilg->ilg_ilm == NULL) {
3106 next_ilg = ilg->ilg_next;
3107 ilg_delete(connp, ilg, NULL);
3108 ilg = next_ilg;
3109 continue;
3110 }
3111 ill = ilg->ilg_ilm->ilm_ill;
3112
3113 /*
3114 * In order to serialize on the ill we try to enter
3115 * and if that fails we unlock and relock and then
3116 * check that we still have an ilm.
3117 */
3118 need_refrele = B_FALSE;
3119 if (!mutex_tryenter(&ill->ill_mcast_serializer)) {
3120 ill_refhold(ill);
3121 need_refrele = B_TRUE;
3122 ilg_refhold(ilg);
3123 if (held_ilg != NULL)
3124 ilg_refrele(held_ilg);
3125 held_ilg = ilg;
3126 rw_exit(&connp->conn_ilg_lock);
3127 mutex_enter(&ill->ill_mcast_serializer);
3128 rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3129 if (ilg->ilg_condemned) {
3130 ilg = ilg->ilg_next;
3131 goto next;
3132 }
3133 }
3134 ilm = ilg->ilg_ilm;
3135 ilg->ilg_ilm = NULL;
3136 next_ilg = ilg->ilg_next;
3137 ilg_delete(connp, ilg, NULL);
3138 ilg = next_ilg;
3139 rw_exit(&connp->conn_ilg_lock);
3140
3141 if (ilm != NULL)
3142 (void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE);
3143
3144 next:
3145 mutex_exit(&ill->ill_mcast_serializer);
3146 /*
3147 * Now that all locks have been dropped, we can send any
3148 * deferred/queued DLPI or IP packets
3149 */
3150 ill_mcast_send_queued(ill);
3151 ill_dlpi_send_queued(ill);
3152 if (need_refrele) {
3153 /* Drop ill reference while we hold no locks */
3154 ill_refrele(ill);
3155 }
3156 rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3157 }
3158 if (held_ilg != NULL)
3159 ilg_refrele(held_ilg);
3160 rw_exit(&connp->conn_ilg_lock);
3161 }
3162
3163 /*
3164 * Attach the ilg to an ilm on the ill. If it fails we leave ilg_ill as NULL so
3165 * that a subsequent attempt can attach it. Drops and reacquires conn_ilg_lock.
3166 */
3167 static void
ilg_attach(conn_t * connp,ilg_t * ilg,ill_t * ill)3168 ilg_attach(conn_t *connp, ilg_t *ilg, ill_t *ill)
3169 {
3170 ilg_stat_t ilgstat;
3171 slist_t *new_filter;
3172 int new_fmode;
3173 in6_addr_t v6group;
3174 ipaddr_t ifaddr;
3175 uint_t ifindex;
3176 ilm_t *ilm;
3177 int error = 0;
3178
3179 ASSERT(RW_WRITE_HELD(&connp->conn_ilg_lock));
3180 /*
3181 * Alloc buffer to copy new state into (see below) before
3182 * we make any changes, so we can bail if it fails.
3183 */
3184 if ((new_filter = l_alloc()) == NULL)
3185 return;
3186
3187 /*
3188 * Save copy of ilg's filter state to pass to other functions, so
3189 * we can release conn_ilg_lock now.
3190 * Set ilg_ill so that an unplumb can find us.
3191 */
3192 new_fmode = ilg->ilg_fmode;
3193 l_copy(ilg->ilg_filter, new_filter);
3194 v6group = ilg->ilg_v6group;
3195 ifaddr = ilg->ilg_ifaddr;
3196 ifindex = ilg->ilg_ifindex;
3197 ilgstat = ILGSTAT_NEW;
3198
3199 ilg->ilg_ill = ill;
3200 ASSERT(ilg->ilg_ilm == NULL);
3201 rw_exit(&connp->conn_ilg_lock);
3202
3203 ilm = ip_addmulti_serial(&v6group, ill, connp->conn_zoneid, ilgstat,
3204 new_fmode, new_filter, &error);
3205 l_free(new_filter);
3206
3207 rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3208 /*
3209 * Must look up the ilg again since we've not been holding
3210 * conn_ilg_lock. The ilg could have disappeared due to an unplumb
3211 * having called conn_update_ill, which can run once we dropped the
3212 * conn_ilg_lock above. Alternatively, the ilg could have been attached
3213 * when the lock was dropped
3214 */
3215 ilg = ilg_lookup(connp, &v6group, ifaddr, ifindex);
3216 if (ilg == NULL || ilg->ilg_ilm != NULL) {
3217 if (ilm != NULL) {
3218 rw_exit(&connp->conn_ilg_lock);
3219 (void) ip_delmulti_serial(ilm, B_FALSE,
3220 (ilgstat == ILGSTAT_NEW));
3221 rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3222 }
3223 return;
3224 }
3225 if (ilm == NULL) {
3226 ilg->ilg_ill = NULL;
3227 return;
3228 }
3229 ilg->ilg_ilm = ilm;
3230 ilm->ilm_ifaddr = ifaddr; /* For netstat */
3231 }
3232
3233 /*
3234 * Called when an ill is unplumbed to make sure that there are no
3235 * dangling conn references to that ill. In that case ill is non-NULL and
3236 * we make sure we remove all references to it.
3237 * Also called when we should revisit the ilg_ill used for multicast
3238 * memberships, in which case ill is NULL.
3239 *
3240 * conn is held by caller.
3241 *
3242 * Note that ipcl_walk only walks conns that are not yet condemned.
3243 * condemned conns can't be refheld. For this reason, conn must become clean
3244 * first, i.e. it must not refer to any ill/ire and then only set
3245 * condemned flag.
3246 *
3247 * We leave ixa_multicast_ifindex in place. We prefer dropping
3248 * packets instead of sending them out the wrong interface.
3249 *
3250 * We keep the ilg around in a detached state (with ilg_ill and ilg_ilm being
3251 * NULL) so that the application can leave it later. Also, if ilg_ifaddr and
3252 * ilg_ifindex are zero, indicating that the system should pick the interface,
3253 * then we attempt to reselect the ill and join on it.
3254 *
3255 * Locking notes:
3256 * Under conn_ilg_lock we get the first ilg. As we drop the conn_ilg_lock to
3257 * proceed with the ilm part of the delete we hold a reference on both the ill
3258 * and the ilg. This doesn't prevent changes to the ilg, but prevents it from
3259 * being deleted.
3260 *
3261 * Note: if this function is called when new ill/ipif's arrive or change status
3262 * (SIOCSLIFINDEX, SIOCSLIFADDR) then we will attempt to attach any ilgs with
3263 * a NULL ilg_ill to an ill/ilm.
3264 */
3265 static void
conn_update_ill(conn_t * connp,caddr_t arg)3266 conn_update_ill(conn_t *connp, caddr_t arg)
3267 {
3268 ill_t *ill = (ill_t *)arg;
3269
3270 /*
3271 * We have to prevent ip_close/ilg_delete_all from running at
3272 * the same time. ip_close sets CONN_CLOSING before doing the ilg_delete
3273 * all, and we set CONN_UPDATE_ILL. That ensures that only one of
3274 * ilg_delete_all and conn_update_ill run at a time for a given conn.
3275 * If ilg_delete_all got here first, then we have nothing to do.
3276 */
3277 mutex_enter(&connp->conn_lock);
3278 if (connp->conn_state_flags & (CONN_CLOSING|CONN_UPDATE_ILL)) {
3279 /* Caller has to wait for ill_ilm_cnt to drop to zero */
3280 mutex_exit(&connp->conn_lock);
3281 return;
3282 }
3283 connp->conn_state_flags |= CONN_UPDATE_ILL;
3284 mutex_exit(&connp->conn_lock);
3285
3286 if (ill != NULL)
3287 ilg_check_detach(connp, ill);
3288
3289 ilg_check_reattach(connp, ill);
3290
3291 /* Do we need to wake up a thread in ilg_delete_all? */
3292 mutex_enter(&connp->conn_lock);
3293 connp->conn_state_flags &= ~CONN_UPDATE_ILL;
3294 if (connp->conn_state_flags & CONN_CLOSING)
3295 cv_broadcast(&connp->conn_cv);
3296 mutex_exit(&connp->conn_lock);
3297 }
3298
3299 /* Detach from an ill that is going away */
3300 static void
ilg_check_detach(conn_t * connp,ill_t * ill)3301 ilg_check_detach(conn_t *connp, ill_t *ill)
3302 {
3303 char group_buf[INET6_ADDRSTRLEN];
3304 ilg_t *ilg, *held_ilg;
3305 ilm_t *ilm;
3306
3307 mutex_enter(&ill->ill_mcast_serializer);
3308 rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3309 held_ilg = NULL;
3310 for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) {
3311 if (ilg->ilg_condemned)
3312 continue;
3313
3314 if (ilg->ilg_ill != ill)
3315 continue;
3316
3317 /* Detach from current ill */
3318 ip1dbg(("ilg_check_detach: detach %s on %s\n",
3319 inet_ntop(AF_INET6, &ilg->ilg_v6group,
3320 group_buf, sizeof (group_buf)),
3321 ilg->ilg_ill->ill_name));
3322
3323 /* Detach this ilg from the ill/ilm */
3324 ilm = ilg->ilg_ilm;
3325 ilg->ilg_ilm = NULL;
3326 ilg->ilg_ill = NULL;
3327 if (ilm == NULL)
3328 continue;
3329
3330 /* Prevent ilg from disappearing */
3331 ilg_transfer_hold(held_ilg, ilg);
3332 held_ilg = ilg;
3333 rw_exit(&connp->conn_ilg_lock);
3334
3335 (void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE);
3336 rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3337 }
3338 if (held_ilg != NULL)
3339 ilg_refrele(held_ilg);
3340 rw_exit(&connp->conn_ilg_lock);
3341 mutex_exit(&ill->ill_mcast_serializer);
3342 /*
3343 * Now that all locks have been dropped, we can send any
3344 * deferred/queued DLPI or IP packets
3345 */
3346 ill_mcast_send_queued(ill);
3347 ill_dlpi_send_queued(ill);
3348 }
3349
3350 /*
3351 * Check if there is a place to attach the conn_ilgs. We do this for both
3352 * detached ilgs and attached ones, since for the latter there could be
3353 * a better ill to attach them to. oill is non-null if we just detached from
3354 * that ill.
3355 */
3356 static void
ilg_check_reattach(conn_t * connp,ill_t * oill)3357 ilg_check_reattach(conn_t *connp, ill_t *oill)
3358 {
3359 ill_t *ill;
3360 char group_buf[INET6_ADDRSTRLEN];
3361 ilg_t *ilg, *held_ilg;
3362 ilm_t *ilm;
3363 zoneid_t zoneid = IPCL_ZONEID(connp);
3364 int error;
3365 ip_stack_t *ipst = connp->conn_netstack->netstack_ip;
3366
3367 rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3368 held_ilg = NULL;
3369 for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) {
3370 if (ilg->ilg_condemned)
3371 continue;
3372
3373 /* Check if the conn_ill matches what we would pick now */
3374 ill = ill_mcast_lookup(&ilg->ilg_v6group, ilg->ilg_ifaddr,
3375 ilg->ilg_ifindex, zoneid, ipst, &error);
3376
3377 /*
3378 * Make sure the ill is usable for multicast and that
3379 * we can send the DL_ADDMULTI_REQ before we create an
3380 * ilm.
3381 */
3382 if (ill != NULL &&
3383 (!(ill->ill_flags & ILLF_MULTICAST) || !ill->ill_dl_up)) {
3384 /* Drop locks across ill_refrele */
3385 ilg_transfer_hold(held_ilg, ilg);
3386 held_ilg = ilg;
3387 rw_exit(&connp->conn_ilg_lock);
3388 ill_refrele(ill);
3389 ill = NULL;
3390 rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3391 /* Note that ilg could have become condemned */
3392 }
3393
3394 /*
3395 * Is the ill unchanged, even if both are NULL?
3396 * Did we just detach from that ill?
3397 */
3398 if (ill == ilg->ilg_ill || (ill != NULL && ill == oill)) {
3399 if (ill != NULL) {
3400 /* Drop locks across ill_refrele */
3401 ilg_transfer_hold(held_ilg, ilg);
3402 held_ilg = ilg;
3403 rw_exit(&connp->conn_ilg_lock);
3404 ill_refrele(ill);
3405 rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3406 }
3407 continue;
3408 }
3409
3410 /* Something changed; detach from old first if needed */
3411 if (ilg->ilg_ill != NULL) {
3412 ill_t *ill2 = ilg->ilg_ill;
3413 boolean_t need_refrele = B_FALSE;
3414
3415 /*
3416 * In order to serialize on the ill we try to enter
3417 * and if that fails we unlock and relock.
3418 */
3419 if (!mutex_tryenter(&ill2->ill_mcast_serializer)) {
3420 ill_refhold(ill2);
3421 need_refrele = B_TRUE;
3422 ilg_transfer_hold(held_ilg, ilg);
3423 held_ilg = ilg;
3424 rw_exit(&connp->conn_ilg_lock);
3425 mutex_enter(&ill2->ill_mcast_serializer);
3426 rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3427 /* Note that ilg could have become condemned */
3428 }
3429 /*
3430 * Check that nobody else re-attached the ilg while we
3431 * dropped the lock.
3432 */
3433 if (ilg->ilg_ill == ill2) {
3434 ASSERT(!ilg->ilg_condemned);
3435 /* Detach from current ill */
3436 ip1dbg(("conn_check_reattach: detach %s/%s\n",
3437 inet_ntop(AF_INET6, &ilg->ilg_v6group,
3438 group_buf, sizeof (group_buf)),
3439 ill2->ill_name));
3440
3441 ilm = ilg->ilg_ilm;
3442 ilg->ilg_ilm = NULL;
3443 ilg->ilg_ill = NULL;
3444 } else {
3445 ilm = NULL;
3446 }
3447 ilg_transfer_hold(held_ilg, ilg);
3448 held_ilg = ilg;
3449 rw_exit(&connp->conn_ilg_lock);
3450 if (ilm != NULL)
3451 (void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE);
3452 mutex_exit(&ill2->ill_mcast_serializer);
3453 /*
3454 * Now that all locks have been dropped, we can send any
3455 * deferred/queued DLPI or IP packets
3456 */
3457 ill_mcast_send_queued(ill2);
3458 ill_dlpi_send_queued(ill2);
3459 if (need_refrele) {
3460 /* Drop ill reference while we hold no locks */
3461 ill_refrele(ill2);
3462 }
3463 rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3464 /*
3465 * While we dropped conn_ilg_lock some other thread
3466 * could have attached this ilg, thus we check again.
3467 */
3468 if (ilg->ilg_ill != NULL) {
3469 if (ill != NULL) {
3470 /* Drop locks across ill_refrele */
3471 ilg_transfer_hold(held_ilg, ilg);
3472 held_ilg = ilg;
3473 rw_exit(&connp->conn_ilg_lock);
3474 ill_refrele(ill);
3475 rw_enter(&connp->conn_ilg_lock,
3476 RW_WRITER);
3477 }
3478 continue;
3479 }
3480 }
3481 if (ill != NULL) {
3482 /*
3483 * In order to serialize on the ill we try to enter
3484 * and if that fails we unlock and relock.
3485 */
3486 if (!mutex_tryenter(&ill->ill_mcast_serializer)) {
3487 /* Already have a refhold on ill */
3488 ilg_transfer_hold(held_ilg, ilg);
3489 held_ilg = ilg;
3490 rw_exit(&connp->conn_ilg_lock);
3491 mutex_enter(&ill->ill_mcast_serializer);
3492 rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3493 /* Note that ilg could have become condemned */
3494 }
3495 ilg_transfer_hold(held_ilg, ilg);
3496 held_ilg = ilg;
3497 /*
3498 * Check that nobody else attached the ilg and that
3499 * it wasn't condemned while we dropped the lock.
3500 */
3501 if (ilg->ilg_ill == NULL && !ilg->ilg_condemned) {
3502 /*
3503 * Attach to the new ill. Can fail in which
3504 * case ilg_ill will remain NULL. ilg_attach
3505 * drops and reacquires conn_ilg_lock.
3506 */
3507 ip1dbg(("conn_check_reattach: attach %s/%s\n",
3508 inet_ntop(AF_INET6, &ilg->ilg_v6group,
3509 group_buf, sizeof (group_buf)),
3510 ill->ill_name));
3511 ilg_attach(connp, ilg, ill);
3512 ASSERT(RW_WRITE_HELD(&connp->conn_ilg_lock));
3513 }
3514 /* Drop locks across ill_refrele */
3515 rw_exit(&connp->conn_ilg_lock);
3516 mutex_exit(&ill->ill_mcast_serializer);
3517 /*
3518 * Now that all locks have been
3519 * dropped, we can send any
3520 * deferred/queued DLPI or IP packets
3521 */
3522 ill_mcast_send_queued(ill);
3523 ill_dlpi_send_queued(ill);
3524 ill_refrele(ill);
3525 rw_enter(&connp->conn_ilg_lock, RW_WRITER);
3526 }
3527 }
3528 if (held_ilg != NULL)
3529 ilg_refrele(held_ilg);
3530 rw_exit(&connp->conn_ilg_lock);
3531 }
3532
3533 /*
3534 * Called when an ill is unplumbed to make sure that there are no
3535 * dangling conn references to that ill. In that case ill is non-NULL and
3536 * we make sure we remove all references to it.
3537 * Also called when we should revisit the ilg_ill used for multicast
3538 * memberships, in which case ill is NULL.
3539 */
3540 void
update_conn_ill(ill_t * ill,ip_stack_t * ipst)3541 update_conn_ill(ill_t *ill, ip_stack_t *ipst)
3542 {
3543 ipcl_walk(conn_update_ill, (caddr_t)ill, ipst);
3544 }
3545