1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2010 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #include <sys/types.h> 28 #include <sys/stream.h> 29 #include <sys/dlpi.h> 30 #include <sys/stropts.h> 31 #include <sys/strsun.h> 32 #include <sys/ddi.h> 33 #include <sys/cmn_err.h> 34 #include <sys/sdt.h> 35 #include <sys/zone.h> 36 37 #include <sys/param.h> 38 #include <sys/socket.h> 39 #include <sys/sockio.h> 40 #include <net/if.h> 41 #include <sys/systm.h> 42 #include <sys/strsubr.h> 43 #include <net/route.h> 44 #include <netinet/in.h> 45 #include <net/if_dl.h> 46 #include <netinet/ip6.h> 47 #include <netinet/icmp6.h> 48 49 #include <inet/common.h> 50 #include <inet/mi.h> 51 #include <inet/nd.h> 52 #include <inet/arp.h> 53 #include <inet/ip.h> 54 #include <inet/ip6.h> 55 #include <inet/ip_if.h> 56 #include <inet/ip_ndp.h> 57 #include <inet/ip_multi.h> 58 #include <inet/ipclassifier.h> 59 #include <inet/ipsec_impl.h> 60 #include <inet/sctp_ip.h> 61 #include <inet/ip_listutils.h> 62 #include <inet/udp_impl.h> 63 64 /* igmpv3/mldv2 source filter manipulation */ 65 static void ilm_bld_flists(conn_t *conn, void *arg); 66 static void ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, 67 slist_t *flist); 68 69 static ilm_t *ilm_add(ill_t *ill, const in6_addr_t *group, 70 ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist, 71 zoneid_t zoneid); 72 static void ilm_delete(ilm_t *ilm); 73 static int ilm_numentries(ill_t *, const in6_addr_t *); 74 75 static ilm_t *ip_addmulti_serial(const in6_addr_t *, ill_t *, zoneid_t, 76 ilg_stat_t, mcast_record_t, slist_t *, int *); 77 static ilm_t *ip_addmulti_impl(const in6_addr_t *, ill_t *, 78 zoneid_t, ilg_stat_t, mcast_record_t, slist_t *, int *); 79 static int ip_delmulti_serial(ilm_t *, boolean_t, boolean_t); 80 static int ip_delmulti_impl(ilm_t *, boolean_t, boolean_t); 81 82 static int ip_ll_multireq(ill_t *ill, const in6_addr_t *group, 83 t_uscalar_t); 84 static ilg_t *ilg_lookup(conn_t *, const in6_addr_t *, ipaddr_t ifaddr, 85 uint_t ifindex); 86 87 static int ilg_add(conn_t *connp, const in6_addr_t *group, 88 ipaddr_t ifaddr, uint_t ifindex, ill_t *ill, mcast_record_t fmode, 89 const in6_addr_t *v6src); 90 static void ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src); 91 static mblk_t *ill_create_dl(ill_t *ill, uint32_t dl_primitive, 92 uint32_t *addr_lenp, uint32_t *addr_offp); 93 static int ip_opt_delete_group_excl(conn_t *connp, 94 const in6_addr_t *v6group, ipaddr_t ifaddr, uint_t ifindex, 95 mcast_record_t fmode, const in6_addr_t *v6src); 96 97 static ilm_t *ilm_lookup(ill_t *, const in6_addr_t *, zoneid_t); 98 99 static int ip_msfilter_ill(conn_t *, mblk_t *, const ip_ioctl_cmd_t *, 100 ill_t **); 101 102 static void ilg_check_detach(conn_t *, ill_t *); 103 static void ilg_check_reattach(conn_t *, ill_t *); 104 105 /* 106 * MT notes: 107 * 108 * Multicast joins operate on both the ilg and ilm structures. Multiple 109 * threads operating on an conn (socket) trying to do multicast joins 110 * need to synchronize when operating on the ilg. Multiple threads 111 * potentially operating on different conn (socket endpoints) trying to 112 * do multicast joins could eventually end up trying to manipulate the 113 * ilm simulatenously and need to synchronize on the access to the ilm. 114 * The access and lookup of the ilm, as well as other ill multicast state, 115 * is under ill_mcast_lock. 116 * The modifications and lookup of ilg entries is serialized using conn_ilg_lock 117 * rwlock. An ilg will not be freed until ilg_refcnt drops to zero. 118 * 119 * In some cases we hold ill_mcast_lock and then acquire conn_ilg_lock, but 120 * never the other way around. 121 * 122 * An ilm is an IP data structure used to track multicast join/leave. 123 * An ilm is associated with a <multicast group, ipif> tuple in IPv4 and 124 * with just <multicast group> in IPv6. ilm_refcnt is the number of ilg's 125 * referencing the ilm. 126 * The modifications and lookup of ilm entries is serialized using the 127 * ill_mcast_lock rwlock; that lock handles all the igmp/mld modifications 128 * of the ilm state. 129 * ilms are created / destroyed only as writer. ilms 130 * are not passed around. The datapath (anything outside of this file 131 * and igmp.c) use functions that do not return ilms - just the number 132 * of members. So we don't need a dynamic refcount of the number 133 * of threads holding reference to an ilm. 134 * 135 * In the cases where we serially access the ilg and ilm, which happens when 136 * we handle the applications requests to join or leave groups and sources, 137 * we use the ill_mcast_serializer mutex to ensure that a multithreaded 138 * application which does concurrent joins and/or leaves on the same group on 139 * the same socket always results in a consistent order for the ilg and ilm 140 * modifications. 141 * 142 * When a multicast operation results in needing to send a message to 143 * the driver (to join/leave a L2 multicast address), we use ill_dlpi_queue() 144 * which serialized the DLPI requests. The IGMP/MLD code uses ill_mcast_queue() 145 * to send IGMP/MLD IP packet to avoid dropping the lock just to send a packet. 146 */ 147 148 #define GETSTRUCT(structure, number) \ 149 ((structure *)mi_zalloc(sizeof (structure) * (number))) 150 151 /* 152 * Caller must ensure that the ilg has not been condemned 153 * The condemned flag is only set in ilg_delete under conn_ilg_lock. 154 * 155 * The caller must hold conn_ilg_lock as writer. 156 */ 157 static void 158 ilg_refhold(ilg_t *ilg) 159 { 160 ASSERT(ilg->ilg_refcnt != 0); 161 ASSERT(!ilg->ilg_condemned); 162 ASSERT(RW_WRITE_HELD(&ilg->ilg_connp->conn_ilg_lock)); 163 164 ilg->ilg_refcnt++; 165 } 166 167 static void 168 ilg_inactive(ilg_t *ilg) 169 { 170 ASSERT(ilg->ilg_ill == NULL); 171 ASSERT(ilg->ilg_ilm == NULL); 172 ASSERT(ilg->ilg_filter == NULL); 173 ASSERT(ilg->ilg_condemned); 174 175 /* Unlink from list */ 176 *ilg->ilg_ptpn = ilg->ilg_next; 177 if (ilg->ilg_next != NULL) 178 ilg->ilg_next->ilg_ptpn = ilg->ilg_ptpn; 179 ilg->ilg_next = NULL; 180 ilg->ilg_ptpn = NULL; 181 182 ilg->ilg_connp = NULL; 183 kmem_free(ilg, sizeof (*ilg)); 184 } 185 186 /* 187 * The caller must hold conn_ilg_lock as writer. 188 */ 189 static void 190 ilg_refrele(ilg_t *ilg) 191 { 192 ASSERT(RW_WRITE_HELD(&ilg->ilg_connp->conn_ilg_lock)); 193 ASSERT(ilg->ilg_refcnt != 0); 194 if (--ilg->ilg_refcnt == 0) 195 ilg_inactive(ilg); 196 } 197 198 /* 199 * Acquire reference on ilg and drop reference on held_ilg. 200 * In the case when held_ilg is the same as ilg we already have 201 * a reference, but the held_ilg might be condemned. In that case 202 * we avoid the ilg_refhold/rele so that we can assert in ire_refhold 203 * that the ilg isn't condemned. 204 */ 205 static void 206 ilg_transfer_hold(ilg_t *held_ilg, ilg_t *ilg) 207 { 208 if (held_ilg == ilg) 209 return; 210 211 ilg_refhold(ilg); 212 if (held_ilg != NULL) 213 ilg_refrele(held_ilg); 214 } 215 216 /* 217 * Allocate a new ilg_t and links it into conn_ilg. 218 * Returns NULL on failure, in which case `*errp' will be 219 * filled in with the reason. 220 * 221 * Assumes connp->conn_ilg_lock is held. 222 */ 223 static ilg_t * 224 conn_ilg_alloc(conn_t *connp, int *errp) 225 { 226 ilg_t *ilg; 227 228 ASSERT(RW_WRITE_HELD(&connp->conn_ilg_lock)); 229 230 /* 231 * If CONN_CLOSING is set, conn_ilg cleanup has begun and we must not 232 * create any ilgs. 233 */ 234 if (connp->conn_state_flags & CONN_CLOSING) { 235 *errp = EINVAL; 236 return (NULL); 237 } 238 239 ilg = kmem_zalloc(sizeof (ilg_t), KM_NOSLEEP); 240 if (ilg == NULL) { 241 *errp = ENOMEM; 242 return (NULL); 243 } 244 245 ilg->ilg_refcnt = 1; 246 247 /* Insert at head */ 248 if (connp->conn_ilg != NULL) 249 connp->conn_ilg->ilg_ptpn = &ilg->ilg_next; 250 ilg->ilg_next = connp->conn_ilg; 251 ilg->ilg_ptpn = &connp->conn_ilg; 252 connp->conn_ilg = ilg; 253 254 ilg->ilg_connp = connp; 255 return (ilg); 256 } 257 258 typedef struct ilm_fbld_s { 259 ilm_t *fbld_ilm; 260 int fbld_in_cnt; 261 int fbld_ex_cnt; 262 slist_t fbld_in; 263 slist_t fbld_ex; 264 boolean_t fbld_in_overflow; 265 } ilm_fbld_t; 266 267 /* 268 * Caller must hold ill_mcast_lock 269 */ 270 static void 271 ilm_bld_flists(conn_t *connp, void *arg) 272 { 273 ilg_t *ilg; 274 ilm_fbld_t *fbld = (ilm_fbld_t *)(arg); 275 ilm_t *ilm = fbld->fbld_ilm; 276 in6_addr_t *v6group = &ilm->ilm_v6addr; 277 278 if (connp->conn_ilg == NULL) 279 return; 280 281 /* 282 * Since we can't break out of the ipcl_walk once started, we still 283 * have to look at every conn. But if we've already found one 284 * (EXCLUDE, NULL) list, there's no need to keep checking individual 285 * ilgs--that will be our state. 286 */ 287 if (fbld->fbld_ex_cnt > 0 && fbld->fbld_ex.sl_numsrc == 0) 288 return; 289 290 /* 291 * Check this conn's ilgs to see if any are interested in our 292 * ilm (group, interface match). If so, update the master 293 * include and exclude lists we're building in the fbld struct 294 * with this ilg's filter info. 295 * 296 * Note that the caller has already serialized on the ill we care 297 * about. 298 */ 299 ASSERT(MUTEX_HELD(&ilm->ilm_ill->ill_mcast_serializer)); 300 301 rw_enter(&connp->conn_ilg_lock, RW_READER); 302 for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) { 303 if (ilg->ilg_condemned) 304 continue; 305 306 /* 307 * Since we are under the ill_mcast_serializer we know 308 * that any ilg+ilm operations on this ilm have either 309 * not started or completed, except for the last ilg 310 * (the one that caused us to be called) which doesn't 311 * have ilg_ilm set yet. Hence we compare using ilg_ill 312 * and the address. 313 */ 314 if ((ilg->ilg_ill == ilm->ilm_ill) && 315 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) { 316 if (ilg->ilg_fmode == MODE_IS_INCLUDE) { 317 fbld->fbld_in_cnt++; 318 if (!fbld->fbld_in_overflow) 319 l_union_in_a(&fbld->fbld_in, 320 ilg->ilg_filter, 321 &fbld->fbld_in_overflow); 322 } else { 323 fbld->fbld_ex_cnt++; 324 /* 325 * On the first exclude list, don't try to do 326 * an intersection, as the master exclude list 327 * is intentionally empty. If the master list 328 * is still empty on later iterations, that 329 * means we have at least one ilg with an empty 330 * exclude list, so that should be reflected 331 * when we take the intersection. 332 */ 333 if (fbld->fbld_ex_cnt == 1) { 334 if (ilg->ilg_filter != NULL) 335 l_copy(ilg->ilg_filter, 336 &fbld->fbld_ex); 337 } else { 338 l_intersection_in_a(&fbld->fbld_ex, 339 ilg->ilg_filter); 340 } 341 } 342 /* there will only be one match, so break now. */ 343 break; 344 } 345 } 346 rw_exit(&connp->conn_ilg_lock); 347 } 348 349 /* 350 * Caller must hold ill_mcast_lock 351 */ 352 static void 353 ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, slist_t *flist) 354 { 355 ilm_fbld_t fbld; 356 ip_stack_t *ipst = ilm->ilm_ipst; 357 358 fbld.fbld_ilm = ilm; 359 fbld.fbld_in_cnt = fbld.fbld_ex_cnt = 0; 360 fbld.fbld_in.sl_numsrc = fbld.fbld_ex.sl_numsrc = 0; 361 fbld.fbld_in_overflow = B_FALSE; 362 363 /* first, construct our master include and exclude lists */ 364 ipcl_walk(ilm_bld_flists, (caddr_t)&fbld, ipst); 365 366 /* now use those master lists to generate the interface filter */ 367 368 /* if include list overflowed, filter is (EXCLUDE, NULL) */ 369 if (fbld.fbld_in_overflow) { 370 *fmode = MODE_IS_EXCLUDE; 371 flist->sl_numsrc = 0; 372 return; 373 } 374 375 /* if nobody interested, interface filter is (INCLUDE, NULL) */ 376 if (fbld.fbld_in_cnt == 0 && fbld.fbld_ex_cnt == 0) { 377 *fmode = MODE_IS_INCLUDE; 378 flist->sl_numsrc = 0; 379 return; 380 } 381 382 /* 383 * If there are no exclude lists, then the interface filter 384 * is INCLUDE, with its filter list equal to fbld_in. A single 385 * exclude list makes the interface filter EXCLUDE, with its 386 * filter list equal to (fbld_ex - fbld_in). 387 */ 388 if (fbld.fbld_ex_cnt == 0) { 389 *fmode = MODE_IS_INCLUDE; 390 l_copy(&fbld.fbld_in, flist); 391 } else { 392 *fmode = MODE_IS_EXCLUDE; 393 l_difference(&fbld.fbld_ex, &fbld.fbld_in, flist); 394 } 395 } 396 397 /* 398 * Caller must hold ill_mcast_lock 399 */ 400 static int 401 ilm_update_add(ilm_t *ilm, ilg_stat_t ilgstat, slist_t *ilg_flist) 402 { 403 mcast_record_t fmode; 404 slist_t *flist; 405 boolean_t fdefault; 406 char buf[INET6_ADDRSTRLEN]; 407 ill_t *ill = ilm->ilm_ill; 408 409 /* 410 * There are several cases where the ilm's filter state 411 * defaults to (EXCLUDE, NULL): 412 * - we've had previous joins without associated ilgs 413 * - this join has no associated ilg 414 * - the ilg's filter state is (EXCLUDE, NULL) 415 */ 416 fdefault = (ilm->ilm_no_ilg_cnt > 0) || 417 (ilgstat == ILGSTAT_NONE) || SLIST_IS_EMPTY(ilg_flist); 418 419 /* attempt mallocs (if needed) before doing anything else */ 420 if ((flist = l_alloc()) == NULL) 421 return (ENOMEM); 422 if (!fdefault && ilm->ilm_filter == NULL) { 423 ilm->ilm_filter = l_alloc(); 424 if (ilm->ilm_filter == NULL) { 425 l_free(flist); 426 return (ENOMEM); 427 } 428 } 429 430 if (ilgstat != ILGSTAT_CHANGE) 431 ilm->ilm_refcnt++; 432 433 if (ilgstat == ILGSTAT_NONE) 434 ilm->ilm_no_ilg_cnt++; 435 436 /* 437 * Determine new filter state. If it's not the default 438 * (EXCLUDE, NULL), we must walk the conn list to find 439 * any ilgs interested in this group, and re-build the 440 * ilm filter. 441 */ 442 if (fdefault) { 443 fmode = MODE_IS_EXCLUDE; 444 flist->sl_numsrc = 0; 445 } else { 446 ilm_gen_filter(ilm, &fmode, flist); 447 } 448 449 /* make sure state actually changed; nothing to do if not. */ 450 if ((ilm->ilm_fmode == fmode) && 451 !lists_are_different(ilm->ilm_filter, flist)) { 452 l_free(flist); 453 return (0); 454 } 455 456 /* send the state change report */ 457 if (!IS_LOOPBACK(ill)) { 458 if (ill->ill_isv6) 459 mld_statechange(ilm, fmode, flist); 460 else 461 igmp_statechange(ilm, fmode, flist); 462 } 463 464 /* update the ilm state */ 465 ilm->ilm_fmode = fmode; 466 if (flist->sl_numsrc > 0) 467 l_copy(flist, ilm->ilm_filter); 468 else 469 CLEAR_SLIST(ilm->ilm_filter); 470 471 ip1dbg(("ilm_update: new if filter mode %d, group %s\n", ilm->ilm_fmode, 472 inet_ntop(AF_INET6, &ilm->ilm_v6addr, buf, sizeof (buf)))); 473 474 l_free(flist); 475 return (0); 476 } 477 478 /* 479 * Caller must hold ill_mcast_lock 480 */ 481 static int 482 ilm_update_del(ilm_t *ilm) 483 { 484 mcast_record_t fmode; 485 slist_t *flist; 486 ill_t *ill = ilm->ilm_ill; 487 488 ip1dbg(("ilm_update_del: still %d left; updating state\n", 489 ilm->ilm_refcnt)); 490 491 if ((flist = l_alloc()) == NULL) 492 return (ENOMEM); 493 494 /* 495 * If present, the ilg in question has already either been 496 * updated or removed from our list; so all we need to do 497 * now is walk the list to update the ilm filter state. 498 * 499 * Skip the list walk if we have any no-ilg joins, which 500 * cause the filter state to revert to (EXCLUDE, NULL). 501 */ 502 if (ilm->ilm_no_ilg_cnt != 0) { 503 fmode = MODE_IS_EXCLUDE; 504 flist->sl_numsrc = 0; 505 } else { 506 ilm_gen_filter(ilm, &fmode, flist); 507 } 508 509 /* check to see if state needs to be updated */ 510 if ((ilm->ilm_fmode == fmode) && 511 (!lists_are_different(ilm->ilm_filter, flist))) { 512 l_free(flist); 513 return (0); 514 } 515 516 if (!IS_LOOPBACK(ill)) { 517 if (ill->ill_isv6) 518 mld_statechange(ilm, fmode, flist); 519 else 520 igmp_statechange(ilm, fmode, flist); 521 } 522 523 ilm->ilm_fmode = fmode; 524 if (flist->sl_numsrc > 0) { 525 if (ilm->ilm_filter == NULL) { 526 ilm->ilm_filter = l_alloc(); 527 if (ilm->ilm_filter == NULL) { 528 char buf[INET6_ADDRSTRLEN]; 529 ip1dbg(("ilm_update_del: failed to alloc ilm " 530 "filter; no source filtering for %s on %s", 531 inet_ntop(AF_INET6, &ilm->ilm_v6addr, 532 buf, sizeof (buf)), ill->ill_name)); 533 ilm->ilm_fmode = MODE_IS_EXCLUDE; 534 l_free(flist); 535 return (0); 536 } 537 } 538 l_copy(flist, ilm->ilm_filter); 539 } else { 540 CLEAR_SLIST(ilm->ilm_filter); 541 } 542 543 l_free(flist); 544 return (0); 545 } 546 547 /* 548 * Create/update the ilm for the group/ill. Used by other parts of IP to 549 * do the ILGSTAT_NONE (no ilg), MODE_IS_EXCLUDE, with no slist join. 550 * Returns with a refhold on the ilm. 551 * 552 * The unspecified address means all multicast addresses for in both the 553 * case of IPv4 and IPv6. 554 * 555 * The caller should have already mapped an IPMP under ill to the upper. 556 */ 557 ilm_t * 558 ip_addmulti(const in6_addr_t *v6group, ill_t *ill, zoneid_t zoneid, 559 int *errorp) 560 { 561 ilm_t *ilm; 562 563 /* Acquire serializer to keep assert in ilm_bld_flists happy */ 564 mutex_enter(&ill->ill_mcast_serializer); 565 ilm = ip_addmulti_serial(v6group, ill, zoneid, ILGSTAT_NONE, 566 MODE_IS_EXCLUDE, NULL, errorp); 567 mutex_exit(&ill->ill_mcast_serializer); 568 /* 569 * Now that all locks have been dropped, we can send any 570 * deferred/queued DLPI or IP packets 571 */ 572 ill_mcast_send_queued(ill); 573 ill_dlpi_send_queued(ill); 574 return (ilm); 575 } 576 577 /* 578 * Create/update the ilm for the group/ill. If ILGSTAT_CHANGE is not set 579 * then this returns with a refhold on the ilm. 580 * 581 * Internal routine which assumes the caller has already acquired 582 * ill_mcast_serializer. It is the caller's responsibility to send out 583 * queued DLPI/multicast packets after all locks are dropped. 584 * 585 * The unspecified address means all multicast addresses for in both the 586 * case of IPv4 and IPv6. 587 * 588 * ilgstat tells us if there's an ilg associated with this join, 589 * and if so, if it's a new ilg or a change to an existing one. 590 * ilg_fmode and ilg_flist give us the current filter state of 591 * the ilg (and will be EXCLUDE {NULL} in the case of no ilg). 592 * 593 * The caller should have already mapped an IPMP under ill to the upper. 594 */ 595 static ilm_t * 596 ip_addmulti_serial(const in6_addr_t *v6group, ill_t *ill, zoneid_t zoneid, 597 ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist, 598 int *errorp) 599 { 600 ilm_t *ilm; 601 602 ASSERT(MUTEX_HELD(&ill->ill_mcast_serializer)); 603 604 if (ill->ill_isv6) { 605 if (!IN6_IS_ADDR_MULTICAST(v6group) && 606 !IN6_IS_ADDR_UNSPECIFIED(v6group)) { 607 *errorp = EINVAL; 608 return (NULL); 609 } 610 } else { 611 if (IN6_IS_ADDR_V4MAPPED(v6group)) { 612 ipaddr_t v4group; 613 614 IN6_V4MAPPED_TO_IPADDR(v6group, v4group); 615 ASSERT(!IS_UNDER_IPMP(ill)); 616 if (!CLASSD(v4group)) { 617 *errorp = EINVAL; 618 return (NULL); 619 } 620 } else if (!IN6_IS_ADDR_UNSPECIFIED(v6group)) { 621 *errorp = EINVAL; 622 return (NULL); 623 } 624 } 625 626 if (IS_UNDER_IPMP(ill)) { 627 *errorp = EINVAL; 628 return (NULL); 629 } 630 631 rw_enter(&ill->ill_mcast_lock, RW_WRITER); 632 /* 633 * We do the equivalent of a lookup by checking after we get the lock 634 * This is needed since the ill could have been condemned after 635 * we looked it up, and we need to check condemned after we hold 636 * ill_mcast_lock to synchronize with the unplumb code. 637 */ 638 if (ill->ill_state_flags & ILL_CONDEMNED) { 639 rw_exit(&ill->ill_mcast_lock); 640 *errorp = ENXIO; 641 return (NULL); 642 } 643 ilm = ip_addmulti_impl(v6group, ill, zoneid, ilgstat, ilg_fmode, 644 ilg_flist, errorp); 645 rw_exit(&ill->ill_mcast_lock); 646 647 ill_mcast_timer_start(ill->ill_ipst); 648 return (ilm); 649 } 650 651 static ilm_t * 652 ip_addmulti_impl(const in6_addr_t *v6group, ill_t *ill, zoneid_t zoneid, 653 ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist, 654 int *errorp) 655 { 656 ilm_t *ilm; 657 int ret = 0; 658 659 ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock)); 660 *errorp = 0; 661 662 /* 663 * An ilm is uniquely identified by the tuple of (group, ill) where 664 * `group' is the multicast group address, and `ill' is the interface 665 * on which it is currently joined. 666 */ 667 668 ilm = ilm_lookup(ill, v6group, zoneid); 669 if (ilm != NULL) { 670 /* ilm_update_add bumps ilm_refcnt unless ILGSTAT_CHANGE */ 671 ret = ilm_update_add(ilm, ilgstat, ilg_flist); 672 if (ret == 0) 673 return (ilm); 674 675 *errorp = ret; 676 return (NULL); 677 } 678 679 /* 680 * The callers checks on the ilg and the ilg+ilm consistency under 681 * ill_mcast_serializer ensures that we can not have ILGSTAT_CHANGE 682 * and no ilm. 683 */ 684 ASSERT(ilgstat != ILGSTAT_CHANGE); 685 ilm = ilm_add(ill, v6group, ilgstat, ilg_fmode, ilg_flist, zoneid); 686 if (ilm == NULL) { 687 *errorp = ENOMEM; 688 return (NULL); 689 } 690 691 if (IN6_IS_ADDR_UNSPECIFIED(v6group)) { 692 /* 693 * If we have more then one we should not tell the driver 694 * to join this time. 695 */ 696 if (ilm_numentries(ill, v6group) == 1) { 697 ret = ill_join_allmulti(ill); 698 } 699 } else { 700 if (!IS_LOOPBACK(ill)) { 701 if (ill->ill_isv6) 702 mld_joingroup(ilm); 703 else 704 igmp_joingroup(ilm); 705 } 706 707 /* 708 * If we have more then one we should not tell the driver 709 * to join this time. 710 */ 711 if (ilm_numentries(ill, v6group) == 1) { 712 ret = ip_ll_multireq(ill, v6group, DL_ENABMULTI_REQ); 713 } 714 } 715 if (ret != 0) { 716 if (ret == ENETDOWN) { 717 char buf[INET6_ADDRSTRLEN]; 718 719 ip0dbg(("ip_addmulti: ENETDOWN for %s on %s", 720 inet_ntop(AF_INET6, &ilm->ilm_v6addr, 721 buf, sizeof (buf)), ill->ill_name)); 722 } 723 ilm_delete(ilm); 724 *errorp = ret; 725 return (NULL); 726 } else { 727 return (ilm); 728 } 729 } 730 731 /* 732 * Send a multicast request to the driver for enabling or disabling 733 * multicast reception for v6groupp address. The caller has already 734 * checked whether it is appropriate to send one or not. 735 * 736 * For IPMP we switch to the cast_ill since it has the right hardware 737 * information. 738 */ 739 static int 740 ip_ll_send_multireq(ill_t *ill, const in6_addr_t *v6groupp, t_uscalar_t prim) 741 { 742 mblk_t *mp; 743 uint32_t addrlen, addroff; 744 ill_t *release_ill = NULL; 745 int err = 0; 746 747 ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock)); 748 749 if (IS_IPMP(ill)) { 750 /* On the upper IPMP ill. */ 751 release_ill = ipmp_illgrp_hold_cast_ill(ill->ill_grp); 752 if (release_ill == NULL) { 753 /* 754 * Avoid sending it down to the ipmpstub. 755 * We will be called again once the members of the 756 * group are in place 757 */ 758 ip1dbg(("ip_ll_send_multireq: no cast_ill for %s %d\n", 759 ill->ill_name, ill->ill_isv6)); 760 return (0); 761 } 762 ill = release_ill; 763 } 764 /* Create a DL_ENABMULTI_REQ or DL_DISABMULTI_REQ message. */ 765 mp = ill_create_dl(ill, prim, &addrlen, &addroff); 766 if (mp == NULL) { 767 err = ENOMEM; 768 goto done; 769 } 770 771 mp = ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp); 772 if (mp == NULL) { 773 ip0dbg(("null from ndp_mcastreq(ill %s)\n", ill->ill_name)); 774 err = ENOMEM; 775 goto done; 776 } 777 778 switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) { 779 case DL_ENABMULTI_REQ: 780 mutex_enter(&ill->ill_lock); 781 /* Track the state if this is the first enabmulti */ 782 if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN) 783 ill->ill_dlpi_multicast_state = IDS_INPROGRESS; 784 mutex_exit(&ill->ill_lock); 785 break; 786 } 787 ill_dlpi_queue(ill, mp); 788 done: 789 if (release_ill != NULL) 790 ill_refrele(release_ill); 791 return (err); 792 } 793 794 /* 795 * Send a multicast request to the driver for enabling multicast 796 * membership for v6group if appropriate. 797 */ 798 static int 799 ip_ll_multireq(ill_t *ill, const in6_addr_t *v6groupp, t_uscalar_t prim) 800 { 801 if (ill->ill_net_type != IRE_IF_RESOLVER || 802 ill->ill_ipif->ipif_flags & IPIF_POINTOPOINT) { 803 ip1dbg(("ip_ll_multireq: not resolver\n")); 804 return (0); /* Must be IRE_IF_NORESOLVER */ 805 } 806 807 if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) { 808 ip1dbg(("ip_ll_multireq: MULTI_BCAST\n")); 809 return (0); 810 } 811 return (ip_ll_send_multireq(ill, v6groupp, prim)); 812 } 813 814 /* 815 * Delete the ilm. Used by other parts of IP for the case of no_ilg/leaving 816 * being true. 817 */ 818 int 819 ip_delmulti(ilm_t *ilm) 820 { 821 ill_t *ill = ilm->ilm_ill; 822 int error; 823 824 /* Acquire serializer to keep assert in ilm_bld_flists happy */ 825 mutex_enter(&ill->ill_mcast_serializer); 826 error = ip_delmulti_serial(ilm, B_TRUE, B_TRUE); 827 mutex_exit(&ill->ill_mcast_serializer); 828 /* 829 * Now that all locks have been dropped, we can send any 830 * deferred/queued DLPI or IP packets 831 */ 832 ill_mcast_send_queued(ill); 833 ill_dlpi_send_queued(ill); 834 return (error); 835 } 836 837 838 /* 839 * Delete the ilm. 840 * Assumes ill_mcast_serializer is held by the caller. 841 * Caller must send out queued dlpi/multicast packets after dropping 842 * all locks. 843 */ 844 static int 845 ip_delmulti_serial(ilm_t *ilm, boolean_t no_ilg, boolean_t leaving) 846 { 847 ill_t *ill = ilm->ilm_ill; 848 int ret; 849 850 ASSERT(MUTEX_HELD(&ill->ill_mcast_serializer)); 851 ASSERT(!(IS_UNDER_IPMP(ill))); 852 853 rw_enter(&ill->ill_mcast_lock, RW_WRITER); 854 ret = ip_delmulti_impl(ilm, no_ilg, leaving); 855 rw_exit(&ill->ill_mcast_lock); 856 ill_mcast_timer_start(ill->ill_ipst); 857 return (ret); 858 } 859 860 static int 861 ip_delmulti_impl(ilm_t *ilm, boolean_t no_ilg, boolean_t leaving) 862 { 863 ill_t *ill = ilm->ilm_ill; 864 int error; 865 in6_addr_t v6group; 866 867 ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock)); 868 869 /* Update counters */ 870 if (no_ilg) 871 ilm->ilm_no_ilg_cnt--; 872 873 if (leaving) 874 ilm->ilm_refcnt--; 875 876 if (ilm->ilm_refcnt > 0) 877 return (ilm_update_del(ilm)); 878 879 v6group = ilm->ilm_v6addr; 880 881 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) { 882 ilm_delete(ilm); 883 /* 884 * If we have some left then one we should not tell the driver 885 * to leave. 886 */ 887 if (ilm_numentries(ill, &v6group) != 0) 888 return (0); 889 890 ill_leave_allmulti(ill); 891 892 return (0); 893 } 894 895 if (!IS_LOOPBACK(ill)) { 896 if (ill->ill_isv6) 897 mld_leavegroup(ilm); 898 else 899 igmp_leavegroup(ilm); 900 } 901 902 ilm_delete(ilm); 903 /* 904 * If we have some left then one we should not tell the driver 905 * to leave. 906 */ 907 if (ilm_numentries(ill, &v6group) != 0) 908 return (0); 909 910 error = ip_ll_multireq(ill, &v6group, DL_DISABMULTI_REQ); 911 /* We ignore the case when ill_dl_up is not set */ 912 if (error == ENETDOWN) { 913 char buf[INET6_ADDRSTRLEN]; 914 915 ip0dbg(("ip_delmulti: ENETDOWN for %s on %s", 916 inet_ntop(AF_INET6, &v6group, buf, sizeof (buf)), 917 ill->ill_name)); 918 } 919 return (error); 920 } 921 922 /* 923 * Make the driver pass up all multicast packets. 924 */ 925 int 926 ill_join_allmulti(ill_t *ill) 927 { 928 mblk_t *promiscon_mp, *promiscoff_mp = NULL; 929 uint32_t addrlen, addroff; 930 ill_t *release_ill = NULL; 931 932 ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock)); 933 934 if (IS_LOOPBACK(ill)) 935 return (0); 936 937 if (!ill->ill_dl_up) { 938 /* 939 * Nobody there. All multicast addresses will be re-joined 940 * when we get the DL_BIND_ACK bringing the interface up. 941 */ 942 return (ENETDOWN); 943 } 944 945 if (IS_IPMP(ill)) { 946 /* On the upper IPMP ill. */ 947 release_ill = ipmp_illgrp_hold_cast_ill(ill->ill_grp); 948 if (release_ill == NULL) { 949 /* 950 * Avoid sending it down to the ipmpstub. 951 * We will be called again once the members of the 952 * group are in place 953 */ 954 ip1dbg(("ill_join_allmulti: no cast_ill for %s %d\n", 955 ill->ill_name, ill->ill_isv6)); 956 return (0); 957 } 958 ill = release_ill; 959 if (!ill->ill_dl_up) { 960 ill_refrele(ill); 961 return (ENETDOWN); 962 } 963 } 964 965 /* 966 * Create a DL_PROMISCON_REQ message and send it directly to the DLPI 967 * provider. We don't need to do this for certain media types for 968 * which we never need to turn promiscuous mode on. While we're here, 969 * pre-allocate a DL_PROMISCOFF_REQ message to make sure that 970 * ill_leave_allmulti() will not fail due to low memory conditions. 971 */ 972 if ((ill->ill_net_type == IRE_IF_RESOLVER) && 973 !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) { 974 promiscon_mp = ill_create_dl(ill, DL_PROMISCON_REQ, 975 &addrlen, &addroff); 976 if (ill->ill_promiscoff_mp == NULL) 977 promiscoff_mp = ill_create_dl(ill, DL_PROMISCOFF_REQ, 978 &addrlen, &addroff); 979 if (promiscon_mp == NULL || 980 (ill->ill_promiscoff_mp == NULL && promiscoff_mp == NULL)) { 981 freemsg(promiscon_mp); 982 freemsg(promiscoff_mp); 983 if (release_ill != NULL) 984 ill_refrele(release_ill); 985 return (ENOMEM); 986 } 987 if (ill->ill_promiscoff_mp == NULL) 988 ill->ill_promiscoff_mp = promiscoff_mp; 989 ill_dlpi_queue(ill, promiscon_mp); 990 } 991 if (release_ill != NULL) 992 ill_refrele(release_ill); 993 return (0); 994 } 995 996 /* 997 * Make the driver stop passing up all multicast packets 998 */ 999 void 1000 ill_leave_allmulti(ill_t *ill) 1001 { 1002 mblk_t *promiscoff_mp; 1003 ill_t *release_ill = NULL; 1004 1005 ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock)); 1006 1007 if (IS_LOOPBACK(ill)) 1008 return; 1009 1010 if (!ill->ill_dl_up) { 1011 /* 1012 * Nobody there. All multicast addresses will be re-joined 1013 * when we get the DL_BIND_ACK bringing the interface up. 1014 */ 1015 return; 1016 } 1017 1018 if (IS_IPMP(ill)) { 1019 /* On the upper IPMP ill. */ 1020 release_ill = ipmp_illgrp_hold_cast_ill(ill->ill_grp); 1021 if (release_ill == NULL) { 1022 /* 1023 * Avoid sending it down to the ipmpstub. 1024 * We will be called again once the members of the 1025 * group are in place 1026 */ 1027 ip1dbg(("ill_leave_allmulti: no cast_ill on %s %d\n", 1028 ill->ill_name, ill->ill_isv6)); 1029 return; 1030 } 1031 ill = release_ill; 1032 if (!ill->ill_dl_up) 1033 goto done; 1034 } 1035 1036 /* 1037 * In the case of IPMP and ill_dl_up not being set when we joined 1038 * we didn't allocate a promiscoff_mp. In that case we have 1039 * nothing to do when we leave. 1040 * Ditto for PHYI_MULTI_BCAST 1041 */ 1042 promiscoff_mp = ill->ill_promiscoff_mp; 1043 if (promiscoff_mp != NULL) { 1044 ill->ill_promiscoff_mp = NULL; 1045 ill_dlpi_queue(ill, promiscoff_mp); 1046 } 1047 done: 1048 if (release_ill != NULL) 1049 ill_refrele(release_ill); 1050 } 1051 1052 int 1053 ip_join_allmulti(uint_t ifindex, boolean_t isv6, ip_stack_t *ipst) 1054 { 1055 ill_t *ill; 1056 int ret; 1057 ilm_t *ilm; 1058 1059 ill = ill_lookup_on_ifindex(ifindex, isv6, ipst); 1060 if (ill == NULL) 1061 return (ENODEV); 1062 1063 /* 1064 * The ip_addmulti() function doesn't allow IPMP underlying interfaces 1065 * to join allmulti since only the nominated underlying interface in 1066 * the group should receive multicast. We silently succeed to avoid 1067 * having to teach IPobs (currently the only caller of this routine) 1068 * to ignore failures in this case. 1069 */ 1070 if (IS_UNDER_IPMP(ill)) { 1071 ill_refrele(ill); 1072 return (0); 1073 } 1074 mutex_enter(&ill->ill_lock); 1075 if (ill->ill_ipallmulti_cnt > 0) { 1076 /* Already joined */ 1077 ASSERT(ill->ill_ipallmulti_ilm != NULL); 1078 ill->ill_ipallmulti_cnt++; 1079 mutex_exit(&ill->ill_lock); 1080 goto done; 1081 } 1082 mutex_exit(&ill->ill_lock); 1083 1084 ilm = ip_addmulti(&ipv6_all_zeros, ill, ill->ill_zoneid, &ret); 1085 if (ilm == NULL) { 1086 ASSERT(ret != 0); 1087 ill_refrele(ill); 1088 return (ret); 1089 } 1090 1091 mutex_enter(&ill->ill_lock); 1092 if (ill->ill_ipallmulti_cnt > 0) { 1093 /* Another thread added it concurrently */ 1094 (void) ip_delmulti(ilm); 1095 mutex_exit(&ill->ill_lock); 1096 goto done; 1097 } 1098 ASSERT(ill->ill_ipallmulti_ilm == NULL); 1099 ill->ill_ipallmulti_ilm = ilm; 1100 ill->ill_ipallmulti_cnt++; 1101 mutex_exit(&ill->ill_lock); 1102 done: 1103 ill_refrele(ill); 1104 return (0); 1105 } 1106 1107 int 1108 ip_leave_allmulti(uint_t ifindex, boolean_t isv6, ip_stack_t *ipst) 1109 { 1110 ill_t *ill; 1111 ilm_t *ilm; 1112 1113 ill = ill_lookup_on_ifindex(ifindex, isv6, ipst); 1114 if (ill == NULL) 1115 return (ENODEV); 1116 1117 if (IS_UNDER_IPMP(ill)) { 1118 ill_refrele(ill); 1119 return (0); 1120 } 1121 1122 mutex_enter(&ill->ill_lock); 1123 if (ill->ill_ipallmulti_cnt == 0) { 1124 /* ip_purge_allmulti could have removed them all */ 1125 mutex_exit(&ill->ill_lock); 1126 goto done; 1127 } 1128 ill->ill_ipallmulti_cnt--; 1129 if (ill->ill_ipallmulti_cnt == 0) { 1130 /* Last one */ 1131 ilm = ill->ill_ipallmulti_ilm; 1132 ill->ill_ipallmulti_ilm = NULL; 1133 } else { 1134 ilm = NULL; 1135 } 1136 mutex_exit(&ill->ill_lock); 1137 if (ilm != NULL) 1138 (void) ip_delmulti(ilm); 1139 1140 done: 1141 ill_refrele(ill); 1142 return (0); 1143 } 1144 1145 /* 1146 * Delete the allmulti memberships that were added as part of 1147 * ip_join_allmulti(). 1148 */ 1149 void 1150 ip_purge_allmulti(ill_t *ill) 1151 { 1152 ilm_t *ilm; 1153 1154 ASSERT(IAM_WRITER_ILL(ill)); 1155 1156 mutex_enter(&ill->ill_lock); 1157 ilm = ill->ill_ipallmulti_ilm; 1158 ill->ill_ipallmulti_ilm = NULL; 1159 ill->ill_ipallmulti_cnt = 0; 1160 mutex_exit(&ill->ill_lock); 1161 1162 if (ilm != NULL) 1163 (void) ip_delmulti(ilm); 1164 } 1165 1166 /* 1167 * Create a dlpi message with room for phys+sap. Later 1168 * we will strip the sap for those primitives which 1169 * only need a physical address. 1170 */ 1171 static mblk_t * 1172 ill_create_dl(ill_t *ill, uint32_t dl_primitive, 1173 uint32_t *addr_lenp, uint32_t *addr_offp) 1174 { 1175 mblk_t *mp; 1176 uint32_t hw_addr_length; 1177 char *cp; 1178 uint32_t offset; 1179 uint32_t length; 1180 uint32_t size; 1181 1182 *addr_lenp = *addr_offp = 0; 1183 1184 hw_addr_length = ill->ill_phys_addr_length; 1185 if (!hw_addr_length) { 1186 ip0dbg(("ip_create_dl: hw addr length = 0\n")); 1187 return (NULL); 1188 } 1189 1190 switch (dl_primitive) { 1191 case DL_ENABMULTI_REQ: 1192 length = sizeof (dl_enabmulti_req_t); 1193 size = length + hw_addr_length; 1194 break; 1195 case DL_DISABMULTI_REQ: 1196 length = sizeof (dl_disabmulti_req_t); 1197 size = length + hw_addr_length; 1198 break; 1199 case DL_PROMISCON_REQ: 1200 case DL_PROMISCOFF_REQ: 1201 size = length = sizeof (dl_promiscon_req_t); 1202 break; 1203 default: 1204 return (NULL); 1205 } 1206 mp = allocb(size, BPRI_HI); 1207 if (!mp) 1208 return (NULL); 1209 mp->b_wptr += size; 1210 mp->b_datap->db_type = M_PROTO; 1211 1212 cp = (char *)mp->b_rptr; 1213 offset = length; 1214 1215 switch (dl_primitive) { 1216 case DL_ENABMULTI_REQ: { 1217 dl_enabmulti_req_t *dl = (dl_enabmulti_req_t *)cp; 1218 1219 dl->dl_primitive = dl_primitive; 1220 dl->dl_addr_offset = offset; 1221 *addr_lenp = dl->dl_addr_length = hw_addr_length; 1222 *addr_offp = offset; 1223 break; 1224 } 1225 case DL_DISABMULTI_REQ: { 1226 dl_disabmulti_req_t *dl = (dl_disabmulti_req_t *)cp; 1227 1228 dl->dl_primitive = dl_primitive; 1229 dl->dl_addr_offset = offset; 1230 *addr_lenp = dl->dl_addr_length = hw_addr_length; 1231 *addr_offp = offset; 1232 break; 1233 } 1234 case DL_PROMISCON_REQ: 1235 case DL_PROMISCOFF_REQ: { 1236 dl_promiscon_req_t *dl = (dl_promiscon_req_t *)cp; 1237 1238 dl->dl_primitive = dl_primitive; 1239 dl->dl_level = DL_PROMISC_MULTI; 1240 break; 1241 } 1242 } 1243 ip1dbg(("ill_create_dl: addr_len %d, addr_off %d\n", 1244 *addr_lenp, *addr_offp)); 1245 return (mp); 1246 } 1247 1248 /* 1249 * Rejoin any groups for which we have ilms. 1250 * 1251 * This is only needed for IPMP when the cast_ill changes since that 1252 * change is invisible to the ilm. Other interface changes are handled 1253 * by conn_update_ill. 1254 */ 1255 void 1256 ill_recover_multicast(ill_t *ill) 1257 { 1258 ilm_t *ilm; 1259 char addrbuf[INET6_ADDRSTRLEN]; 1260 1261 ill->ill_need_recover_multicast = 0; 1262 1263 rw_enter(&ill->ill_mcast_lock, RW_WRITER); 1264 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1265 /* 1266 * If we have more then one ilm for the group (e.g., with 1267 * different zoneid) then we should not tell the driver 1268 * to join unless this is the first ilm for the group. 1269 */ 1270 if (ilm_numentries(ill, &ilm->ilm_v6addr) > 1 && 1271 ilm_lookup(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm) { 1272 continue; 1273 } 1274 1275 ip1dbg(("ill_recover_multicast: %s\n", inet_ntop(AF_INET6, 1276 &ilm->ilm_v6addr, addrbuf, sizeof (addrbuf)))); 1277 1278 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) { 1279 (void) ill_join_allmulti(ill); 1280 } else { 1281 if (ill->ill_isv6) 1282 mld_joingroup(ilm); 1283 else 1284 igmp_joingroup(ilm); 1285 1286 (void) ip_ll_multireq(ill, &ilm->ilm_v6addr, 1287 DL_ENABMULTI_REQ); 1288 } 1289 } 1290 rw_exit(&ill->ill_mcast_lock); 1291 /* Send any deferred/queued DLPI or IP packets */ 1292 ill_mcast_send_queued(ill); 1293 ill_dlpi_send_queued(ill); 1294 ill_mcast_timer_start(ill->ill_ipst); 1295 } 1296 1297 /* 1298 * The opposite of ill_recover_multicast() -- leaves all multicast groups 1299 * that were explicitly joined. 1300 * 1301 * This is only needed for IPMP when the cast_ill changes since that 1302 * change is invisible to the ilm. Other interface changes are handled 1303 * by conn_update_ill. 1304 */ 1305 void 1306 ill_leave_multicast(ill_t *ill) 1307 { 1308 ilm_t *ilm; 1309 char addrbuf[INET6_ADDRSTRLEN]; 1310 1311 ill->ill_need_recover_multicast = 1; 1312 1313 rw_enter(&ill->ill_mcast_lock, RW_WRITER); 1314 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1315 /* 1316 * If we have more then one ilm for the group (e.g., with 1317 * different zoneid) then we should not tell the driver 1318 * to leave unless this is the first ilm for the group. 1319 */ 1320 if (ilm_numentries(ill, &ilm->ilm_v6addr) > 1 && 1321 ilm_lookup(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm) { 1322 continue; 1323 } 1324 1325 ip1dbg(("ill_leave_multicast: %s\n", inet_ntop(AF_INET6, 1326 &ilm->ilm_v6addr, addrbuf, sizeof (addrbuf)))); 1327 1328 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) { 1329 ill_leave_allmulti(ill); 1330 } else { 1331 if (ill->ill_isv6) 1332 mld_leavegroup(ilm); 1333 else 1334 igmp_leavegroup(ilm); 1335 1336 (void) ip_ll_multireq(ill, &ilm->ilm_v6addr, 1337 DL_DISABMULTI_REQ); 1338 } 1339 } 1340 rw_exit(&ill->ill_mcast_lock); 1341 /* Send any deferred/queued DLPI or IP packets */ 1342 ill_mcast_send_queued(ill); 1343 ill_dlpi_send_queued(ill); 1344 ill_mcast_timer_start(ill->ill_ipst); 1345 } 1346 1347 /* 1348 * Interface used by IP input/output. 1349 * Returns true if there is a member on the ill for any zoneid. 1350 */ 1351 boolean_t 1352 ill_hasmembers_v6(ill_t *ill, const in6_addr_t *v6group) 1353 { 1354 ilm_t *ilm; 1355 1356 rw_enter(&ill->ill_mcast_lock, RW_READER); 1357 ilm = ilm_lookup(ill, v6group, ALL_ZONES); 1358 rw_exit(&ill->ill_mcast_lock); 1359 return (ilm != NULL); 1360 } 1361 1362 /* 1363 * Interface used by IP input/output. 1364 * Returns true if there is a member on the ill for any zoneid. 1365 * 1366 * The group and source can't be INADDR_ANY here so no need to translate to 1367 * the unspecified IPv6 address. 1368 */ 1369 boolean_t 1370 ill_hasmembers_v4(ill_t *ill, ipaddr_t group) 1371 { 1372 in6_addr_t v6group; 1373 1374 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 1375 return (ill_hasmembers_v6(ill, &v6group)); 1376 } 1377 1378 /* 1379 * Interface used by IP input/output. 1380 * Returns true if there is a member on the ill for any zoneid except skipzone. 1381 */ 1382 boolean_t 1383 ill_hasmembers_otherzones_v6(ill_t *ill, const in6_addr_t *v6group, 1384 zoneid_t skipzone) 1385 { 1386 ilm_t *ilm; 1387 1388 rw_enter(&ill->ill_mcast_lock, RW_READER); 1389 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1390 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) && 1391 ilm->ilm_zoneid != skipzone) { 1392 rw_exit(&ill->ill_mcast_lock); 1393 return (B_TRUE); 1394 } 1395 } 1396 rw_exit(&ill->ill_mcast_lock); 1397 return (B_FALSE); 1398 } 1399 1400 /* 1401 * Interface used by IP input/output. 1402 * Returns true if there is a member on the ill for any zoneid except skipzone. 1403 * 1404 * The group and source can't be INADDR_ANY here so no need to translate to 1405 * the unspecified IPv6 address. 1406 */ 1407 boolean_t 1408 ill_hasmembers_otherzones_v4(ill_t *ill, ipaddr_t group, zoneid_t skipzone) 1409 { 1410 in6_addr_t v6group; 1411 1412 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 1413 return (ill_hasmembers_otherzones_v6(ill, &v6group, skipzone)); 1414 } 1415 1416 /* 1417 * Interface used by IP input. 1418 * Returns the next numerically larger zoneid that has a member. If none exist 1419 * then returns -1 (ALL_ZONES). 1420 * The normal usage is for the caller to start with a -1 zoneid (ALL_ZONES) 1421 * to find the first zoneid which has a member, and then pass that in for 1422 * subsequent calls until ALL_ZONES is returned. 1423 * 1424 * The implementation of ill_hasmembers_nextzone() assumes the ilms 1425 * are sorted by zoneid for efficiency. 1426 */ 1427 zoneid_t 1428 ill_hasmembers_nextzone_v6(ill_t *ill, const in6_addr_t *v6group, 1429 zoneid_t zoneid) 1430 { 1431 ilm_t *ilm; 1432 1433 rw_enter(&ill->ill_mcast_lock, RW_READER); 1434 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1435 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) && 1436 ilm->ilm_zoneid > zoneid) { 1437 zoneid = ilm->ilm_zoneid; 1438 rw_exit(&ill->ill_mcast_lock); 1439 return (zoneid); 1440 } 1441 } 1442 rw_exit(&ill->ill_mcast_lock); 1443 return (ALL_ZONES); 1444 } 1445 1446 /* 1447 * Interface used by IP input. 1448 * Returns the next numerically larger zoneid that has a member. If none exist 1449 * then returns -1 (ALL_ZONES). 1450 * 1451 * The group and source can't be INADDR_ANY here so no need to translate to 1452 * the unspecified IPv6 address. 1453 */ 1454 zoneid_t 1455 ill_hasmembers_nextzone_v4(ill_t *ill, ipaddr_t group, zoneid_t zoneid) 1456 { 1457 in6_addr_t v6group; 1458 1459 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 1460 1461 return (ill_hasmembers_nextzone_v6(ill, &v6group, zoneid)); 1462 } 1463 1464 /* 1465 * Find an ilm matching the ill, group, and zoneid. 1466 */ 1467 static ilm_t * 1468 ilm_lookup(ill_t *ill, const in6_addr_t *v6group, zoneid_t zoneid) 1469 { 1470 ilm_t *ilm; 1471 1472 ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock)); 1473 1474 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1475 if (!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group)) 1476 continue; 1477 if (zoneid != ALL_ZONES && zoneid != ilm->ilm_zoneid) 1478 continue; 1479 1480 ASSERT(ilm->ilm_ill == ill); 1481 return (ilm); 1482 } 1483 return (NULL); 1484 } 1485 1486 /* 1487 * How many members on this ill? 1488 * Since each shared-IP zone has a separate ilm for the same group/ill 1489 * we can have several. 1490 */ 1491 static int 1492 ilm_numentries(ill_t *ill, const in6_addr_t *v6group) 1493 { 1494 ilm_t *ilm; 1495 int i = 0; 1496 1497 ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock)); 1498 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1499 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group)) { 1500 i++; 1501 } 1502 } 1503 return (i); 1504 } 1505 1506 /* Caller guarantees that the group is not already on the list */ 1507 static ilm_t * 1508 ilm_add(ill_t *ill, const in6_addr_t *v6group, ilg_stat_t ilgstat, 1509 mcast_record_t ilg_fmode, slist_t *ilg_flist, zoneid_t zoneid) 1510 { 1511 ilm_t *ilm; 1512 ilm_t *ilm_cur; 1513 ilm_t **ilm_ptpn; 1514 1515 ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock)); 1516 ilm = GETSTRUCT(ilm_t, 1); 1517 if (ilm == NULL) 1518 return (NULL); 1519 if (ilgstat != ILGSTAT_NONE && !SLIST_IS_EMPTY(ilg_flist)) { 1520 ilm->ilm_filter = l_alloc(); 1521 if (ilm->ilm_filter == NULL) { 1522 mi_free(ilm); 1523 return (NULL); 1524 } 1525 } 1526 ilm->ilm_v6addr = *v6group; 1527 ilm->ilm_refcnt = 1; 1528 ilm->ilm_zoneid = zoneid; 1529 ilm->ilm_timer = INFINITY; 1530 ilm->ilm_rtx.rtx_timer = INFINITY; 1531 1532 ilm->ilm_ill = ill; 1533 DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ill, 1534 (char *), "ilm", (void *), ilm); 1535 ill->ill_ilm_cnt++; 1536 1537 ASSERT(ill->ill_ipst); 1538 ilm->ilm_ipst = ill->ill_ipst; /* No netstack_hold */ 1539 1540 /* The ill/ipif could have just been marked as condemned */ 1541 1542 /* 1543 * To make ill_hasmembers_nextzone_v6 work we keep the list 1544 * sorted by zoneid. 1545 */ 1546 ilm_cur = ill->ill_ilm; 1547 ilm_ptpn = &ill->ill_ilm; 1548 while (ilm_cur != NULL && ilm_cur->ilm_zoneid < ilm->ilm_zoneid) { 1549 ilm_ptpn = &ilm_cur->ilm_next; 1550 ilm_cur = ilm_cur->ilm_next; 1551 } 1552 ilm->ilm_next = ilm_cur; 1553 *ilm_ptpn = ilm; 1554 1555 /* 1556 * If we have an associated ilg, use its filter state; if not, 1557 * default to (EXCLUDE, NULL) and set no_ilg_cnt to track this. 1558 */ 1559 if (ilgstat != ILGSTAT_NONE) { 1560 if (!SLIST_IS_EMPTY(ilg_flist)) 1561 l_copy(ilg_flist, ilm->ilm_filter); 1562 ilm->ilm_fmode = ilg_fmode; 1563 } else { 1564 ilm->ilm_no_ilg_cnt = 1; 1565 ilm->ilm_fmode = MODE_IS_EXCLUDE; 1566 } 1567 1568 return (ilm); 1569 } 1570 1571 void 1572 ilm_inactive(ilm_t *ilm) 1573 { 1574 FREE_SLIST(ilm->ilm_filter); 1575 FREE_SLIST(ilm->ilm_pendsrcs); 1576 FREE_SLIST(ilm->ilm_rtx.rtx_allow); 1577 FREE_SLIST(ilm->ilm_rtx.rtx_block); 1578 ilm->ilm_ipst = NULL; 1579 mi_free((char *)ilm); 1580 } 1581 1582 /* 1583 * Unlink ilm and free it. 1584 */ 1585 static void 1586 ilm_delete(ilm_t *ilm) 1587 { 1588 ill_t *ill = ilm->ilm_ill; 1589 ilm_t **ilmp; 1590 boolean_t need_wakeup; 1591 1592 /* 1593 * Delete under lock protection so that readers don't stumble 1594 * on bad ilm_next 1595 */ 1596 ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock)); 1597 1598 for (ilmp = &ill->ill_ilm; *ilmp != ilm; ilmp = &(*ilmp)->ilm_next) 1599 ; 1600 1601 *ilmp = ilm->ilm_next; 1602 1603 mutex_enter(&ill->ill_lock); 1604 /* 1605 * if we are the last reference to the ill, we may need to wakeup any 1606 * pending FREE or unplumb operations. This is because conn_update_ill 1607 * bails if there is a ilg_delete_all in progress. 1608 */ 1609 need_wakeup = B_FALSE; 1610 DTRACE_PROBE3(ill__decr__cnt, (ill_t *), ill, 1611 (char *), "ilm", (void *), ilm); 1612 ASSERT(ill->ill_ilm_cnt > 0); 1613 ill->ill_ilm_cnt--; 1614 if (ILL_FREE_OK(ill)) 1615 need_wakeup = B_TRUE; 1616 1617 ilm_inactive(ilm); /* frees this ilm */ 1618 1619 if (need_wakeup) { 1620 /* drops ill lock */ 1621 ipif_ill_refrele_tail(ill); 1622 } else { 1623 mutex_exit(&ill->ill_lock); 1624 } 1625 } 1626 1627 /* 1628 * Lookup an ill based on the group, ifindex, ifaddr, and zoneid. 1629 * Applies to both IPv4 and IPv6, although ifaddr is only used with 1630 * IPv4. 1631 * Returns an error for IS_UNDER_IPMP and VNI interfaces. 1632 * On error it sets *errorp. 1633 */ 1634 static ill_t * 1635 ill_mcast_lookup(const in6_addr_t *group, ipaddr_t ifaddr, uint_t ifindex, 1636 zoneid_t zoneid, ip_stack_t *ipst, int *errorp) 1637 { 1638 ill_t *ill; 1639 ipaddr_t v4group; 1640 1641 if (IN6_IS_ADDR_V4MAPPED(group)) { 1642 IN6_V4MAPPED_TO_IPADDR(group, v4group); 1643 1644 if (ifindex != 0) { 1645 ill = ill_lookup_on_ifindex_zoneid(ifindex, zoneid, 1646 B_FALSE, ipst); 1647 } else if (ifaddr != INADDR_ANY) { 1648 ipif_t *ipif; 1649 1650 ipif = ipif_lookup_addr(ifaddr, NULL, zoneid, ipst); 1651 if (ipif == NULL) { 1652 ill = NULL; 1653 } else { 1654 ill = ipif->ipif_ill; 1655 ill_refhold(ill); 1656 ipif_refrele(ipif); 1657 } 1658 } else { 1659 ill = ill_lookup_group_v4(v4group, zoneid, ipst, NULL, 1660 NULL); 1661 } 1662 } else { 1663 if (ifindex != 0) { 1664 ill = ill_lookup_on_ifindex_zoneid(ifindex, zoneid, 1665 B_TRUE, ipst); 1666 } else { 1667 ill = ill_lookup_group_v6(group, zoneid, ipst, NULL, 1668 NULL); 1669 } 1670 } 1671 if (ill == NULL) { 1672 if (ifindex != 0) 1673 *errorp = ENXIO; 1674 else 1675 *errorp = EADDRNOTAVAIL; 1676 return (NULL); 1677 } 1678 /* operation not supported on the virtual network interface */ 1679 if (IS_UNDER_IPMP(ill) || IS_VNI(ill)) { 1680 ill_refrele(ill); 1681 *errorp = EINVAL; 1682 return (NULL); 1683 } 1684 return (ill); 1685 } 1686 1687 /* 1688 * Looks up the appropriate ill given an interface index (or interface address) 1689 * and multicast group. On success, returns 0, with *illpp pointing to the 1690 * found struct. On failure, returns an errno and *illpp is set to NULL. 1691 * 1692 * Returns an error for IS_UNDER_IPMP and VNI interfaces. 1693 * 1694 * Handles both IPv4 and IPv6. The ifaddr argument only applies in the 1695 * case of IPv4. 1696 */ 1697 int 1698 ip_opt_check(conn_t *connp, const in6_addr_t *v6group, 1699 const in6_addr_t *v6src, ipaddr_t ifaddr, uint_t ifindex, ill_t **illpp) 1700 { 1701 boolean_t src_unspec; 1702 ill_t *ill = NULL; 1703 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1704 int error = 0; 1705 1706 *illpp = NULL; 1707 1708 src_unspec = IN6_IS_ADDR_UNSPECIFIED(v6src); 1709 1710 if (IN6_IS_ADDR_V4MAPPED(v6group)) { 1711 ipaddr_t v4group; 1712 ipaddr_t v4src; 1713 1714 if (!IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec) 1715 return (EINVAL); 1716 IN6_V4MAPPED_TO_IPADDR(v6group, v4group); 1717 if (src_unspec) { 1718 v4src = INADDR_ANY; 1719 } else { 1720 IN6_V4MAPPED_TO_IPADDR(v6src, v4src); 1721 } 1722 if (!CLASSD(v4group) || CLASSD(v4src)) 1723 return (EINVAL); 1724 } else { 1725 if (IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec) 1726 return (EINVAL); 1727 if (!IN6_IS_ADDR_MULTICAST(v6group) || 1728 IN6_IS_ADDR_MULTICAST(v6src)) { 1729 return (EINVAL); 1730 } 1731 } 1732 1733 ill = ill_mcast_lookup(v6group, ifaddr, ifindex, IPCL_ZONEID(connp), 1734 ipst, &error); 1735 *illpp = ill; 1736 return (error); 1737 } 1738 1739 static int 1740 ip_get_srcfilter(conn_t *connp, struct group_filter *gf, 1741 struct ip_msfilter *imsf, const struct in6_addr *group, boolean_t issin6) 1742 { 1743 ilg_t *ilg; 1744 int i, numsrc, fmode, outsrcs; 1745 struct sockaddr_in *sin; 1746 struct sockaddr_in6 *sin6; 1747 struct in_addr *addrp; 1748 slist_t *fp; 1749 boolean_t is_v4only_api; 1750 ipaddr_t ifaddr; 1751 uint_t ifindex; 1752 1753 if (gf == NULL) { 1754 ASSERT(imsf != NULL); 1755 ASSERT(!issin6); 1756 is_v4only_api = B_TRUE; 1757 outsrcs = imsf->imsf_numsrc; 1758 ifaddr = imsf->imsf_interface.s_addr; 1759 ifindex = 0; 1760 } else { 1761 ASSERT(imsf == NULL); 1762 is_v4only_api = B_FALSE; 1763 outsrcs = gf->gf_numsrc; 1764 ifaddr = INADDR_ANY; 1765 ifindex = gf->gf_interface; 1766 } 1767 1768 /* No need to use ill_mcast_serializer for the reader */ 1769 rw_enter(&connp->conn_ilg_lock, RW_READER); 1770 ilg = ilg_lookup(connp, group, ifaddr, ifindex); 1771 if (ilg == NULL) { 1772 rw_exit(&connp->conn_ilg_lock); 1773 return (EADDRNOTAVAIL); 1774 } 1775 1776 /* 1777 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 1778 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 1779 * So we need to translate here. 1780 */ 1781 fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ? 1782 MCAST_INCLUDE : MCAST_EXCLUDE; 1783 if ((fp = ilg->ilg_filter) == NULL) { 1784 numsrc = 0; 1785 } else { 1786 for (i = 0; i < outsrcs; i++) { 1787 if (i == fp->sl_numsrc) 1788 break; 1789 if (issin6) { 1790 sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i]; 1791 sin6->sin6_family = AF_INET6; 1792 sin6->sin6_addr = fp->sl_addr[i]; 1793 } else { 1794 if (is_v4only_api) { 1795 addrp = &imsf->imsf_slist[i]; 1796 } else { 1797 sin = (struct sockaddr_in *) 1798 &gf->gf_slist[i]; 1799 sin->sin_family = AF_INET; 1800 addrp = &sin->sin_addr; 1801 } 1802 IN6_V4MAPPED_TO_INADDR(&fp->sl_addr[i], addrp); 1803 } 1804 } 1805 numsrc = fp->sl_numsrc; 1806 } 1807 1808 if (is_v4only_api) { 1809 imsf->imsf_numsrc = numsrc; 1810 imsf->imsf_fmode = fmode; 1811 } else { 1812 gf->gf_numsrc = numsrc; 1813 gf->gf_fmode = fmode; 1814 } 1815 1816 rw_exit(&connp->conn_ilg_lock); 1817 1818 return (0); 1819 } 1820 1821 /* 1822 * Common for IPv4 and IPv6. 1823 */ 1824 static int 1825 ip_set_srcfilter(conn_t *connp, struct group_filter *gf, 1826 struct ip_msfilter *imsf, const struct in6_addr *group, ill_t *ill, 1827 boolean_t issin6) 1828 { 1829 ilg_t *ilg; 1830 int i, err, infmode, new_fmode; 1831 uint_t insrcs; 1832 struct sockaddr_in *sin; 1833 struct sockaddr_in6 *sin6; 1834 struct in_addr *addrp; 1835 slist_t *orig_filter = NULL; 1836 slist_t *new_filter = NULL; 1837 mcast_record_t orig_fmode; 1838 boolean_t leave_group, is_v4only_api; 1839 ilg_stat_t ilgstat; 1840 ilm_t *ilm; 1841 ipaddr_t ifaddr; 1842 uint_t ifindex; 1843 1844 if (gf == NULL) { 1845 ASSERT(imsf != NULL); 1846 ASSERT(!issin6); 1847 is_v4only_api = B_TRUE; 1848 insrcs = imsf->imsf_numsrc; 1849 infmode = imsf->imsf_fmode; 1850 ifaddr = imsf->imsf_interface.s_addr; 1851 ifindex = 0; 1852 } else { 1853 ASSERT(imsf == NULL); 1854 is_v4only_api = B_FALSE; 1855 insrcs = gf->gf_numsrc; 1856 infmode = gf->gf_fmode; 1857 ifaddr = INADDR_ANY; 1858 ifindex = gf->gf_interface; 1859 } 1860 1861 /* Make sure we can handle the source list */ 1862 if (insrcs > MAX_FILTER_SIZE) 1863 return (ENOBUFS); 1864 1865 /* 1866 * setting the filter to (INCLUDE, NULL) is treated 1867 * as a request to leave the group. 1868 */ 1869 leave_group = (infmode == MCAST_INCLUDE && insrcs == 0); 1870 1871 mutex_enter(&ill->ill_mcast_serializer); 1872 rw_enter(&connp->conn_ilg_lock, RW_WRITER); 1873 ilg = ilg_lookup(connp, group, ifaddr, ifindex); 1874 if (ilg == NULL) { 1875 /* 1876 * if the request was actually to leave, and we 1877 * didn't find an ilg, there's nothing to do. 1878 */ 1879 if (leave_group) { 1880 rw_exit(&connp->conn_ilg_lock); 1881 mutex_exit(&ill->ill_mcast_serializer); 1882 return (0); 1883 } 1884 ilg = conn_ilg_alloc(connp, &err); 1885 if (ilg == NULL) { 1886 rw_exit(&connp->conn_ilg_lock); 1887 mutex_exit(&ill->ill_mcast_serializer); 1888 return (err); 1889 } 1890 ilgstat = ILGSTAT_NEW; 1891 ilg->ilg_v6group = *group; 1892 ilg->ilg_ill = ill; 1893 ilg->ilg_ifaddr = ifaddr; 1894 ilg->ilg_ifindex = ifindex; 1895 } else if (leave_group) { 1896 /* 1897 * Make sure we have the correct serializer. The ill argument 1898 * might not match ilg_ill. 1899 */ 1900 ilg_refhold(ilg); 1901 mutex_exit(&ill->ill_mcast_serializer); 1902 ill = ilg->ilg_ill; 1903 rw_exit(&connp->conn_ilg_lock); 1904 1905 mutex_enter(&ill->ill_mcast_serializer); 1906 rw_enter(&connp->conn_ilg_lock, RW_WRITER); 1907 ilm = ilg->ilg_ilm; 1908 ilg->ilg_ilm = NULL; 1909 ilg_delete(connp, ilg, NULL); 1910 ilg_refrele(ilg); 1911 rw_exit(&connp->conn_ilg_lock); 1912 if (ilm != NULL) 1913 (void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE); 1914 mutex_exit(&ill->ill_mcast_serializer); 1915 /* 1916 * Now that all locks have been dropped, we can send any 1917 * deferred/queued DLPI or IP packets 1918 */ 1919 ill_mcast_send_queued(ill); 1920 ill_dlpi_send_queued(ill); 1921 return (0); 1922 } else { 1923 ilgstat = ILGSTAT_CHANGE; 1924 /* Preserve existing state in case ip_addmulti() fails */ 1925 orig_fmode = ilg->ilg_fmode; 1926 if (ilg->ilg_filter == NULL) { 1927 orig_filter = NULL; 1928 } else { 1929 orig_filter = l_alloc_copy(ilg->ilg_filter); 1930 if (orig_filter == NULL) { 1931 rw_exit(&connp->conn_ilg_lock); 1932 mutex_exit(&ill->ill_mcast_serializer); 1933 return (ENOMEM); 1934 } 1935 } 1936 } 1937 1938 /* 1939 * Alloc buffer to copy new state into (see below) before 1940 * we make any changes, so we can bail if it fails. 1941 */ 1942 if ((new_filter = l_alloc()) == NULL) { 1943 rw_exit(&connp->conn_ilg_lock); 1944 err = ENOMEM; 1945 goto free_and_exit; 1946 } 1947 1948 if (insrcs == 0) { 1949 CLEAR_SLIST(ilg->ilg_filter); 1950 } else { 1951 slist_t *fp; 1952 if (ilg->ilg_filter == NULL) { 1953 fp = l_alloc(); 1954 if (fp == NULL) { 1955 if (ilgstat == ILGSTAT_NEW) 1956 ilg_delete(connp, ilg, NULL); 1957 rw_exit(&connp->conn_ilg_lock); 1958 err = ENOMEM; 1959 goto free_and_exit; 1960 } 1961 } else { 1962 fp = ilg->ilg_filter; 1963 } 1964 for (i = 0; i < insrcs; i++) { 1965 if (issin6) { 1966 sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i]; 1967 fp->sl_addr[i] = sin6->sin6_addr; 1968 } else { 1969 if (is_v4only_api) { 1970 addrp = &imsf->imsf_slist[i]; 1971 } else { 1972 sin = (struct sockaddr_in *) 1973 &gf->gf_slist[i]; 1974 addrp = &sin->sin_addr; 1975 } 1976 IN6_INADDR_TO_V4MAPPED(addrp, &fp->sl_addr[i]); 1977 } 1978 } 1979 fp->sl_numsrc = insrcs; 1980 ilg->ilg_filter = fp; 1981 } 1982 /* 1983 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 1984 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 1985 * So we need to translate here. 1986 */ 1987 ilg->ilg_fmode = (infmode == MCAST_INCLUDE) ? 1988 MODE_IS_INCLUDE : MODE_IS_EXCLUDE; 1989 1990 /* 1991 * Save copy of ilg's filter state to pass to other functions, 1992 * so we can release conn_ilg_lock now. 1993 */ 1994 new_fmode = ilg->ilg_fmode; 1995 l_copy(ilg->ilg_filter, new_filter); 1996 1997 rw_exit(&connp->conn_ilg_lock); 1998 1999 /* 2000 * Now update the ill. We wait to do this until after the ilg 2001 * has been updated because we need to update the src filter 2002 * info for the ill, which involves looking at the status of 2003 * all the ilgs associated with this group/interface pair. 2004 */ 2005 ilm = ip_addmulti_serial(group, ill, connp->conn_zoneid, ilgstat, 2006 new_fmode, new_filter, &err); 2007 2008 rw_enter(&connp->conn_ilg_lock, RW_WRITER); 2009 /* 2010 * Must look up the ilg again since we've not been holding 2011 * conn_ilg_lock. The ilg could have disappeared due to an unplumb 2012 * having called conn_update_ill, which can run once we dropped the 2013 * conn_ilg_lock above. 2014 */ 2015 ilg = ilg_lookup(connp, group, ifaddr, ifindex); 2016 if (ilg == NULL) { 2017 rw_exit(&connp->conn_ilg_lock); 2018 if (ilm != NULL) { 2019 (void) ip_delmulti_serial(ilm, B_FALSE, 2020 (ilgstat == ILGSTAT_NEW)); 2021 } 2022 err = ENXIO; 2023 goto free_and_exit; 2024 } 2025 2026 if (ilm != NULL) { 2027 if (ilg->ilg_ill == NULL) { 2028 /* some other thread is re-attaching this. */ 2029 rw_exit(&connp->conn_ilg_lock); 2030 (void) ip_delmulti_serial(ilm, B_FALSE, 2031 (ilgstat == ILGSTAT_NEW)); 2032 err = 0; 2033 goto free_and_exit; 2034 } 2035 /* Succeeded. Update the ilg to point at the ilm */ 2036 if (ilgstat == ILGSTAT_NEW) { 2037 if (ilg->ilg_ilm == NULL) { 2038 ilg->ilg_ilm = ilm; 2039 ilm->ilm_ifaddr = ifaddr; /* For netstat */ 2040 } else { 2041 /* some other thread is re-attaching this. */ 2042 rw_exit(&connp->conn_ilg_lock); 2043 (void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE); 2044 err = 0; 2045 goto free_and_exit; 2046 } 2047 } else { 2048 /* 2049 * ip_addmulti didn't get a held ilm for 2050 * ILGSTAT_CHANGE; ilm_refcnt was unchanged. 2051 */ 2052 ASSERT(ilg->ilg_ilm == ilm); 2053 } 2054 } else { 2055 ASSERT(err != 0); 2056 /* 2057 * Failed to allocate the ilm. 2058 * Restore the original filter state, or delete the 2059 * newly-created ilg. 2060 * If ENETDOWN just clear ill_ilg since so that we 2061 * will rejoin when the ill comes back; don't report ENETDOWN 2062 * to application. 2063 */ 2064 if (ilgstat == ILGSTAT_NEW) { 2065 if (err == ENETDOWN) { 2066 ilg->ilg_ill = NULL; 2067 err = 0; 2068 } else { 2069 ilg_delete(connp, ilg, NULL); 2070 } 2071 } else { 2072 ilg->ilg_fmode = orig_fmode; 2073 if (SLIST_IS_EMPTY(orig_filter)) { 2074 CLEAR_SLIST(ilg->ilg_filter); 2075 } else { 2076 /* 2077 * We didn't free the filter, even if we 2078 * were trying to make the source list empty; 2079 * so if orig_filter isn't empty, the ilg 2080 * must still have a filter alloc'd. 2081 */ 2082 l_copy(orig_filter, ilg->ilg_filter); 2083 } 2084 } 2085 } 2086 rw_exit(&connp->conn_ilg_lock); 2087 2088 free_and_exit: 2089 mutex_exit(&ill->ill_mcast_serializer); 2090 ill_mcast_send_queued(ill); 2091 ill_dlpi_send_queued(ill); 2092 l_free(orig_filter); 2093 l_free(new_filter); 2094 2095 return (err); 2096 } 2097 2098 /* 2099 * Process the SIOC[GS]MSFILTER and SIOC[GS]IPMSFILTER ioctls. 2100 */ 2101 /* ARGSUSED */ 2102 int 2103 ip_sioctl_msfilter(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, 2104 ip_ioctl_cmd_t *ipip, void *ifreq) 2105 { 2106 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 2107 /* existence verified in ip_wput_nondata() */ 2108 mblk_t *data_mp = mp->b_cont->b_cont; 2109 int datalen, err, cmd, minsize; 2110 uint_t expsize = 0; 2111 conn_t *connp; 2112 boolean_t isv6, is_v4only_api, getcmd; 2113 struct sockaddr_in *gsin; 2114 struct sockaddr_in6 *gsin6; 2115 ipaddr_t v4group; 2116 in6_addr_t v6group; 2117 struct group_filter *gf = NULL; 2118 struct ip_msfilter *imsf = NULL; 2119 mblk_t *ndp; 2120 ill_t *ill; 2121 2122 connp = Q_TO_CONN(q); 2123 err = ip_msfilter_ill(connp, mp, ipip, &ill); 2124 if (err != 0) 2125 return (err); 2126 2127 if (data_mp->b_cont != NULL) { 2128 if ((ndp = msgpullup(data_mp, -1)) == NULL) 2129 return (ENOMEM); 2130 freemsg(data_mp); 2131 data_mp = ndp; 2132 mp->b_cont->b_cont = data_mp; 2133 } 2134 2135 cmd = iocp->ioc_cmd; 2136 getcmd = (cmd == SIOCGIPMSFILTER || cmd == SIOCGMSFILTER); 2137 is_v4only_api = (cmd == SIOCGIPMSFILTER || cmd == SIOCSIPMSFILTER); 2138 minsize = (is_v4only_api) ? IP_MSFILTER_SIZE(0) : GROUP_FILTER_SIZE(0); 2139 datalen = MBLKL(data_mp); 2140 2141 if (datalen < minsize) 2142 return (EINVAL); 2143 2144 /* 2145 * now we know we have at least have the initial structure, 2146 * but need to check for the source list array. 2147 */ 2148 if (is_v4only_api) { 2149 imsf = (struct ip_msfilter *)data_mp->b_rptr; 2150 isv6 = B_FALSE; 2151 expsize = IP_MSFILTER_SIZE(imsf->imsf_numsrc); 2152 } else { 2153 gf = (struct group_filter *)data_mp->b_rptr; 2154 if (gf->gf_group.ss_family == AF_INET6) { 2155 gsin6 = (struct sockaddr_in6 *)&gf->gf_group; 2156 isv6 = !(IN6_IS_ADDR_V4MAPPED(&gsin6->sin6_addr)); 2157 } else { 2158 isv6 = B_FALSE; 2159 } 2160 expsize = GROUP_FILTER_SIZE(gf->gf_numsrc); 2161 } 2162 if (datalen < expsize) 2163 return (EINVAL); 2164 2165 if (isv6) { 2166 gsin6 = (struct sockaddr_in6 *)&gf->gf_group; 2167 v6group = gsin6->sin6_addr; 2168 if (getcmd) { 2169 err = ip_get_srcfilter(connp, gf, NULL, &v6group, 2170 B_TRUE); 2171 } else { 2172 err = ip_set_srcfilter(connp, gf, NULL, &v6group, ill, 2173 B_TRUE); 2174 } 2175 } else { 2176 boolean_t issin6 = B_FALSE; 2177 if (is_v4only_api) { 2178 v4group = (ipaddr_t)imsf->imsf_multiaddr.s_addr; 2179 IN6_IPADDR_TO_V4MAPPED(v4group, &v6group); 2180 } else { 2181 if (gf->gf_group.ss_family == AF_INET) { 2182 gsin = (struct sockaddr_in *)&gf->gf_group; 2183 v4group = (ipaddr_t)gsin->sin_addr.s_addr; 2184 IN6_IPADDR_TO_V4MAPPED(v4group, &v6group); 2185 } else { 2186 gsin6 = (struct sockaddr_in6 *)&gf->gf_group; 2187 IN6_V4MAPPED_TO_IPADDR(&gsin6->sin6_addr, 2188 v4group); 2189 issin6 = B_TRUE; 2190 } 2191 } 2192 /* 2193 * INADDR_ANY is represented as the IPv6 unspecifed addr. 2194 */ 2195 if (v4group == INADDR_ANY) 2196 v6group = ipv6_all_zeros; 2197 else 2198 IN6_IPADDR_TO_V4MAPPED(v4group, &v6group); 2199 2200 if (getcmd) { 2201 err = ip_get_srcfilter(connp, gf, imsf, &v6group, 2202 issin6); 2203 } else { 2204 err = ip_set_srcfilter(connp, gf, imsf, &v6group, ill, 2205 issin6); 2206 } 2207 } 2208 ill_refrele(ill); 2209 2210 return (err); 2211 } 2212 2213 /* 2214 * Determine the ill for the SIOC*MSFILTER ioctls 2215 * 2216 * Returns an error for IS_UNDER_IPMP interfaces. 2217 * 2218 * Finds the ill based on information in the ioctl headers. 2219 */ 2220 static int 2221 ip_msfilter_ill(conn_t *connp, mblk_t *mp, const ip_ioctl_cmd_t *ipip, 2222 ill_t **illp) 2223 { 2224 int cmd = ipip->ipi_cmd; 2225 int err = 0; 2226 ill_t *ill; 2227 /* caller has verified this mblk exists */ 2228 char *dbuf = (char *)mp->b_cont->b_cont->b_rptr; 2229 struct ip_msfilter *imsf; 2230 struct group_filter *gf; 2231 ipaddr_t v4addr, v4group; 2232 in6_addr_t v6group; 2233 uint32_t index; 2234 ip_stack_t *ipst; 2235 2236 ipst = connp->conn_netstack->netstack_ip; 2237 2238 *illp = NULL; 2239 2240 /* don't allow multicast operations on a tcp conn */ 2241 if (IPCL_IS_TCP(connp)) 2242 return (ENOPROTOOPT); 2243 2244 if (cmd == SIOCSIPMSFILTER || cmd == SIOCGIPMSFILTER) { 2245 /* don't allow v4-specific ioctls on v6 socket */ 2246 if (connp->conn_family == AF_INET6) 2247 return (EAFNOSUPPORT); 2248 2249 imsf = (struct ip_msfilter *)dbuf; 2250 v4addr = imsf->imsf_interface.s_addr; 2251 v4group = imsf->imsf_multiaddr.s_addr; 2252 IN6_IPADDR_TO_V4MAPPED(v4group, &v6group); 2253 ill = ill_mcast_lookup(&v6group, v4addr, 0, IPCL_ZONEID(connp), 2254 ipst, &err); 2255 if (ill == NULL && v4addr != INADDR_ANY) 2256 err = ENXIO; 2257 } else { 2258 gf = (struct group_filter *)dbuf; 2259 index = gf->gf_interface; 2260 if (gf->gf_group.ss_family == AF_INET6) { 2261 struct sockaddr_in6 *sin6; 2262 2263 sin6 = (struct sockaddr_in6 *)&gf->gf_group; 2264 v6group = sin6->sin6_addr; 2265 } else if (gf->gf_group.ss_family == AF_INET) { 2266 struct sockaddr_in *sin; 2267 2268 sin = (struct sockaddr_in *)&gf->gf_group; 2269 v4group = sin->sin_addr.s_addr; 2270 IN6_IPADDR_TO_V4MAPPED(v4group, &v6group); 2271 } else { 2272 return (EAFNOSUPPORT); 2273 } 2274 ill = ill_mcast_lookup(&v6group, INADDR_ANY, index, 2275 IPCL_ZONEID(connp), ipst, &err); 2276 } 2277 *illp = ill; 2278 return (err); 2279 } 2280 2281 /* 2282 * The structures used for the SIOC*MSFILTER ioctls usually must be copied 2283 * in in two stages, as the first copyin tells us the size of the attached 2284 * source buffer. This function is called by ip_wput_nondata() after the 2285 * first copyin has completed; it figures out how big the second stage 2286 * needs to be, and kicks it off. 2287 * 2288 * In some cases (numsrc < 2), the second copyin is not needed as the 2289 * first one gets a complete structure containing 1 source addr. 2290 * 2291 * The function returns 0 if a second copyin has been started (i.e. there's 2292 * no more work to be done right now), or 1 if the second copyin is not 2293 * needed and ip_wput_nondata() can continue its processing. 2294 */ 2295 int 2296 ip_copyin_msfilter(queue_t *q, mblk_t *mp) 2297 { 2298 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 2299 int cmd = iocp->ioc_cmd; 2300 /* validity of this checked in ip_wput_nondata() */ 2301 mblk_t *mp1 = mp->b_cont->b_cont; 2302 int copysize = 0; 2303 int offset; 2304 2305 if (cmd == SIOCSMSFILTER || cmd == SIOCGMSFILTER) { 2306 struct group_filter *gf = (struct group_filter *)mp1->b_rptr; 2307 if (gf->gf_numsrc >= 2) { 2308 offset = sizeof (struct group_filter); 2309 copysize = GROUP_FILTER_SIZE(gf->gf_numsrc) - offset; 2310 } 2311 } else { 2312 struct ip_msfilter *imsf = (struct ip_msfilter *)mp1->b_rptr; 2313 if (imsf->imsf_numsrc >= 2) { 2314 offset = sizeof (struct ip_msfilter); 2315 copysize = IP_MSFILTER_SIZE(imsf->imsf_numsrc) - offset; 2316 } 2317 } 2318 if (copysize > 0) { 2319 mi_copyin_n(q, mp, offset, copysize); 2320 return (0); 2321 } 2322 return (1); 2323 } 2324 2325 /* 2326 * Handle the following optmgmt: 2327 * IP_ADD_MEMBERSHIP must not have joined already 2328 * IPV6_JOIN_GROUP must not have joined already 2329 * MCAST_JOIN_GROUP must not have joined already 2330 * IP_BLOCK_SOURCE must have joined already 2331 * MCAST_BLOCK_SOURCE must have joined already 2332 * IP_JOIN_SOURCE_GROUP may have joined already 2333 * MCAST_JOIN_SOURCE_GROUP may have joined already 2334 * 2335 * fmode and src parameters may be used to determine which option is 2336 * being set, as follows (IPV6_JOIN_GROUP and MCAST_JOIN_GROUP options 2337 * are functionally equivalent): 2338 * opt fmode v6src 2339 * IP_ADD_MEMBERSHIP MODE_IS_EXCLUDE unspecified 2340 * IPV6_JOIN_GROUP MODE_IS_EXCLUDE unspecified 2341 * MCAST_JOIN_GROUP MODE_IS_EXCLUDE unspecified 2342 * IP_BLOCK_SOURCE MODE_IS_EXCLUDE IPv4-mapped addr 2343 * MCAST_BLOCK_SOURCE MODE_IS_EXCLUDE v6 addr 2344 * IP_JOIN_SOURCE_GROUP MODE_IS_INCLUDE IPv4-mapped addr 2345 * MCAST_JOIN_SOURCE_GROUP MODE_IS_INCLUDE v6 addr 2346 * 2347 * Changing the filter mode is not allowed; if a matching ilg already 2348 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 2349 * 2350 * Verifies that there is a source address of appropriate scope for 2351 * the group; if not, EADDRNOTAVAIL is returned. 2352 * 2353 * The interface to be used may be identified by an IPv4 address or by an 2354 * interface index. 2355 * 2356 * Handles IPv4-mapped IPv6 multicast addresses by associating them 2357 * with the IPv4 address. Assumes that if v6group is v4-mapped, 2358 * v6src is also v4-mapped. 2359 */ 2360 int 2361 ip_opt_add_group(conn_t *connp, boolean_t checkonly, 2362 const in6_addr_t *v6group, ipaddr_t ifaddr, uint_t ifindex, 2363 mcast_record_t fmode, const in6_addr_t *v6src) 2364 { 2365 ill_t *ill; 2366 char buf[INET6_ADDRSTRLEN]; 2367 int err; 2368 2369 err = ip_opt_check(connp, v6group, v6src, ifaddr, ifindex, &ill); 2370 if (err != 0) { 2371 ip1dbg(("ip_opt_add_group: no ill for group %s/" 2372 "index %d\n", inet_ntop(AF_INET6, v6group, buf, 2373 sizeof (buf)), ifindex)); 2374 return (err); 2375 } 2376 2377 if (checkonly) { 2378 /* 2379 * do not do operation, just pretend to - new T_CHECK 2380 * semantics. The error return case above if encountered 2381 * considered a good enough "check" here. 2382 */ 2383 ill_refrele(ill); 2384 return (0); 2385 } 2386 mutex_enter(&ill->ill_mcast_serializer); 2387 /* 2388 * Multicast groups may not be joined on interfaces that are either 2389 * already underlying interfaces in an IPMP group, or in the process 2390 * of joining the IPMP group. The latter condition is enforced by 2391 * checking the value of ill->ill_grp_pending under the 2392 * ill_mcast_serializer lock. We cannot serialize the 2393 * ill_grp_pending check on the ill_g_lock across ilg_add() because 2394 * ill_mcast_send_queued -> ip_output_simple -> ill_lookup_on_ifindex 2395 * will take the ill_g_lock itself. Instead, we hold the 2396 * ill_mcast_serializer. 2397 */ 2398 if (ill->ill_grp_pending || IS_UNDER_IPMP(ill)) { 2399 DTRACE_PROBE2(group__add__on__under, ill_t *, ill, 2400 in6_addr_t *, v6group); 2401 mutex_exit(&ill->ill_mcast_serializer); 2402 ill_refrele(ill); 2403 return (EADDRNOTAVAIL); 2404 } 2405 err = ilg_add(connp, v6group, ifaddr, ifindex, ill, fmode, v6src); 2406 mutex_exit(&ill->ill_mcast_serializer); 2407 /* 2408 * We have done an addmulti_impl and/or delmulti_impl. 2409 * All locks have been dropped, we can send any 2410 * deferred/queued DLPI or IP packets 2411 */ 2412 ill_mcast_send_queued(ill); 2413 ill_dlpi_send_queued(ill); 2414 ill_refrele(ill); 2415 return (err); 2416 } 2417 2418 /* 2419 * Common for IPv6 and IPv4. 2420 * Here we handle ilgs that are still attached to their original ill 2421 * (the one ifaddr/ifindex points at), as well as detached ones. 2422 * The detached ones might have been attached to some other ill. 2423 */ 2424 static int 2425 ip_opt_delete_group_excl(conn_t *connp, const in6_addr_t *v6group, 2426 ipaddr_t ifaddr, uint_t ifindex, mcast_record_t fmode, 2427 const in6_addr_t *v6src) 2428 { 2429 ilg_t *ilg; 2430 boolean_t leaving; 2431 ilm_t *ilm; 2432 ill_t *ill; 2433 int err = 0; 2434 2435 retry: 2436 rw_enter(&connp->conn_ilg_lock, RW_WRITER); 2437 ilg = ilg_lookup(connp, v6group, ifaddr, ifindex); 2438 if (ilg == NULL) { 2439 rw_exit(&connp->conn_ilg_lock); 2440 /* 2441 * Since we didn't have any ilg we now do the error checks 2442 * to determine the best errno. 2443 */ 2444 err = ip_opt_check(connp, v6group, v6src, ifaddr, ifindex, 2445 &ill); 2446 if (ill != NULL) { 2447 /* The only error was a missing ilg for the group */ 2448 ill_refrele(ill); 2449 err = EADDRNOTAVAIL; 2450 } 2451 return (err); 2452 } 2453 2454 /* If the ilg is attached then we serialize using that ill */ 2455 ill = ilg->ilg_ill; 2456 if (ill != NULL) { 2457 /* Prevent the ill and ilg from being freed */ 2458 ill_refhold(ill); 2459 ilg_refhold(ilg); 2460 rw_exit(&connp->conn_ilg_lock); 2461 mutex_enter(&ill->ill_mcast_serializer); 2462 rw_enter(&connp->conn_ilg_lock, RW_WRITER); 2463 if (ilg->ilg_condemned) { 2464 /* Disappeared */ 2465 ilg_refrele(ilg); 2466 rw_exit(&connp->conn_ilg_lock); 2467 mutex_exit(&ill->ill_mcast_serializer); 2468 ill_refrele(ill); 2469 goto retry; 2470 } 2471 } 2472 2473 /* 2474 * Decide if we're actually deleting the ilg or just removing a 2475 * source filter address; if just removing an addr, make sure we 2476 * aren't trying to change the filter mode, and that the addr is 2477 * actually in our filter list already. If we're removing the 2478 * last src in an include list, just delete the ilg. 2479 */ 2480 if (IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2481 leaving = B_TRUE; 2482 } else { 2483 if (fmode != ilg->ilg_fmode) 2484 err = EINVAL; 2485 else if (ilg->ilg_filter == NULL || 2486 !list_has_addr(ilg->ilg_filter, v6src)) 2487 err = EADDRNOTAVAIL; 2488 if (err != 0) { 2489 if (ill != NULL) 2490 ilg_refrele(ilg); 2491 rw_exit(&connp->conn_ilg_lock); 2492 goto done; 2493 } 2494 if (fmode == MODE_IS_INCLUDE && 2495 ilg->ilg_filter->sl_numsrc == 1) { 2496 leaving = B_TRUE; 2497 v6src = NULL; 2498 } else { 2499 leaving = B_FALSE; 2500 } 2501 } 2502 ilm = ilg->ilg_ilm; 2503 if (leaving) 2504 ilg->ilg_ilm = NULL; 2505 2506 ilg_delete(connp, ilg, v6src); 2507 if (ill != NULL) 2508 ilg_refrele(ilg); 2509 rw_exit(&connp->conn_ilg_lock); 2510 2511 if (ilm != NULL) { 2512 ASSERT(ill != NULL); 2513 (void) ip_delmulti_serial(ilm, B_FALSE, leaving); 2514 } 2515 done: 2516 if (ill != NULL) { 2517 mutex_exit(&ill->ill_mcast_serializer); 2518 /* 2519 * Now that all locks have been dropped, we can 2520 * send any deferred/queued DLPI or IP packets 2521 */ 2522 ill_mcast_send_queued(ill); 2523 ill_dlpi_send_queued(ill); 2524 ill_refrele(ill); 2525 } 2526 return (err); 2527 } 2528 2529 /* 2530 * Handle the following optmgmt: 2531 * IP_DROP_MEMBERSHIP will leave 2532 * IPV6_LEAVE_GROUP will leave 2533 * MCAST_LEAVE_GROUP will leave 2534 * IP_UNBLOCK_SOURCE will not leave 2535 * MCAST_UNBLOCK_SOURCE will not leave 2536 * IP_LEAVE_SOURCE_GROUP may leave (if leaving last source) 2537 * MCAST_LEAVE_SOURCE_GROUP may leave (if leaving last source) 2538 * 2539 * fmode and src parameters may be used to determine which option is 2540 * being set, as follows: 2541 * opt fmode v6src 2542 * IP_DROP_MEMBERSHIP MODE_IS_INCLUDE unspecified 2543 * IPV6_LEAVE_GROUP MODE_IS_INCLUDE unspecified 2544 * MCAST_LEAVE_GROUP MODE_IS_INCLUDE unspecified 2545 * IP_UNBLOCK_SOURCE MODE_IS_EXCLUDE IPv4-mapped addr 2546 * MCAST_UNBLOCK_SOURCE MODE_IS_EXCLUDE v6 addr 2547 * IP_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE IPv4-mapped addr 2548 * MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE v6 addr 2549 * 2550 * Changing the filter mode is not allowed; if a matching ilg already 2551 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 2552 * 2553 * The interface to be used may be identified by an IPv4 address or by an 2554 * interface index. 2555 * 2556 * Handles IPv4-mapped IPv6 multicast addresses by associating them 2557 * with the IPv4 address. Assumes that if v6group is v4-mapped, 2558 * v6src is also v4-mapped. 2559 */ 2560 int 2561 ip_opt_delete_group(conn_t *connp, boolean_t checkonly, 2562 const in6_addr_t *v6group, ipaddr_t ifaddr, uint_t ifindex, 2563 mcast_record_t fmode, const in6_addr_t *v6src) 2564 { 2565 2566 /* 2567 * In the normal case below we don't check for the ill existing. 2568 * Instead we look for an existing ilg in _excl. 2569 * If checkonly we sanity check the arguments 2570 */ 2571 if (checkonly) { 2572 ill_t *ill; 2573 int err; 2574 2575 err = ip_opt_check(connp, v6group, v6src, ifaddr, ifindex, 2576 &ill); 2577 /* 2578 * do not do operation, just pretend to - new T_CHECK semantics. 2579 * ip_opt_check is considered a good enough "check" here. 2580 */ 2581 if (ill != NULL) 2582 ill_refrele(ill); 2583 return (err); 2584 } 2585 return (ip_opt_delete_group_excl(connp, v6group, ifaddr, ifindex, 2586 fmode, v6src)); 2587 } 2588 2589 /* 2590 * Group mgmt for upper conn that passes things down 2591 * to the interface multicast list (and DLPI) 2592 * These routines can handle new style options that specify an interface name 2593 * as opposed to an interface address (needed for general handling of 2594 * unnumbered interfaces.) 2595 */ 2596 2597 /* 2598 * Add a group to an upper conn group data structure and pass things down 2599 * to the interface multicast list (and DLPI) 2600 * Common for IPv4 and IPv6; for IPv4 we can have an ifaddr. 2601 */ 2602 static int 2603 ilg_add(conn_t *connp, const in6_addr_t *v6group, ipaddr_t ifaddr, 2604 uint_t ifindex, ill_t *ill, mcast_record_t fmode, const in6_addr_t *v6src) 2605 { 2606 int error = 0; 2607 ilg_t *ilg; 2608 ilg_stat_t ilgstat; 2609 slist_t *new_filter = NULL; 2610 int new_fmode; 2611 ilm_t *ilm; 2612 2613 if (!(ill->ill_flags & ILLF_MULTICAST)) 2614 return (EADDRNOTAVAIL); 2615 2616 /* conn_ilg_lock protects the ilg list. */ 2617 ASSERT(MUTEX_HELD(&ill->ill_mcast_serializer)); 2618 rw_enter(&connp->conn_ilg_lock, RW_WRITER); 2619 ilg = ilg_lookup(connp, v6group, ifaddr, ifindex); 2620 2621 /* 2622 * Depending on the option we're handling, may or may not be okay 2623 * if group has already been added. Figure out our rules based 2624 * on fmode and src params. Also make sure there's enough room 2625 * in the filter if we're adding a source to an existing filter. 2626 */ 2627 if (IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2628 /* we're joining for all sources, must not have joined */ 2629 if (ilg != NULL) 2630 error = EADDRINUSE; 2631 } else { 2632 if (fmode == MODE_IS_EXCLUDE) { 2633 /* (excl {addr}) => block source, must have joined */ 2634 if (ilg == NULL) 2635 error = EADDRNOTAVAIL; 2636 } 2637 /* (incl {addr}) => join source, may have joined */ 2638 2639 if (ilg != NULL && 2640 SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE) 2641 error = ENOBUFS; 2642 } 2643 if (error != 0) { 2644 rw_exit(&connp->conn_ilg_lock); 2645 return (error); 2646 } 2647 2648 /* 2649 * Alloc buffer to copy new state into (see below) before 2650 * we make any changes, so we can bail if it fails. 2651 */ 2652 if ((new_filter = l_alloc()) == NULL) { 2653 rw_exit(&connp->conn_ilg_lock); 2654 return (ENOMEM); 2655 } 2656 2657 if (ilg == NULL) { 2658 if ((ilg = conn_ilg_alloc(connp, &error)) == NULL) { 2659 rw_exit(&connp->conn_ilg_lock); 2660 l_free(new_filter); 2661 return (error); 2662 } 2663 ilg->ilg_ifindex = ifindex; 2664 ilg->ilg_ifaddr = ifaddr; 2665 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2666 ilg->ilg_filter = l_alloc(); 2667 if (ilg->ilg_filter == NULL) { 2668 ilg_delete(connp, ilg, NULL); 2669 rw_exit(&connp->conn_ilg_lock); 2670 l_free(new_filter); 2671 return (ENOMEM); 2672 } 2673 ilg->ilg_filter->sl_numsrc = 1; 2674 ilg->ilg_filter->sl_addr[0] = *v6src; 2675 } 2676 ilgstat = ILGSTAT_NEW; 2677 ilg->ilg_v6group = *v6group; 2678 ilg->ilg_fmode = fmode; 2679 ilg->ilg_ill = ill; 2680 } else { 2681 int index; 2682 2683 if (ilg->ilg_fmode != fmode || IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2684 rw_exit(&connp->conn_ilg_lock); 2685 l_free(new_filter); 2686 return (EINVAL); 2687 } 2688 if (ilg->ilg_filter == NULL) { 2689 ilg->ilg_filter = l_alloc(); 2690 if (ilg->ilg_filter == NULL) { 2691 rw_exit(&connp->conn_ilg_lock); 2692 l_free(new_filter); 2693 return (ENOMEM); 2694 } 2695 } 2696 if (list_has_addr(ilg->ilg_filter, v6src)) { 2697 rw_exit(&connp->conn_ilg_lock); 2698 l_free(new_filter); 2699 return (EADDRNOTAVAIL); 2700 } 2701 ilgstat = ILGSTAT_CHANGE; 2702 index = ilg->ilg_filter->sl_numsrc++; 2703 ilg->ilg_filter->sl_addr[index] = *v6src; 2704 } 2705 2706 /* 2707 * Save copy of ilg's filter state to pass to other functions, 2708 * so we can release conn_ilg_lock now. 2709 */ 2710 new_fmode = ilg->ilg_fmode; 2711 l_copy(ilg->ilg_filter, new_filter); 2712 2713 rw_exit(&connp->conn_ilg_lock); 2714 2715 /* 2716 * Now update the ill. We wait to do this until after the ilg 2717 * has been updated because we need to update the src filter 2718 * info for the ill, which involves looking at the status of 2719 * all the ilgs associated with this group/interface pair. 2720 */ 2721 ilm = ip_addmulti_serial(v6group, ill, connp->conn_zoneid, ilgstat, 2722 new_fmode, new_filter, &error); 2723 2724 rw_enter(&connp->conn_ilg_lock, RW_WRITER); 2725 /* 2726 * Must look up the ilg again since we've not been holding 2727 * conn_ilg_lock. The ilg could have disappeared due to an unplumb 2728 * having called conn_update_ill, which can run once we dropped the 2729 * conn_ilg_lock above. 2730 */ 2731 ilg = ilg_lookup(connp, v6group, ifaddr, ifindex); 2732 if (ilg == NULL) { 2733 rw_exit(&connp->conn_ilg_lock); 2734 if (ilm != NULL) { 2735 (void) ip_delmulti_serial(ilm, B_FALSE, 2736 (ilgstat == ILGSTAT_NEW)); 2737 } 2738 error = ENXIO; 2739 goto free_and_exit; 2740 } 2741 if (ilm != NULL) { 2742 if (ilg->ilg_ill == NULL) { 2743 /* some other thread is re-attaching this. */ 2744 rw_exit(&connp->conn_ilg_lock); 2745 (void) ip_delmulti_serial(ilm, B_FALSE, 2746 (ilgstat == ILGSTAT_NEW)); 2747 error = 0; 2748 goto free_and_exit; 2749 } 2750 /* Succeeded. Update the ilg to point at the ilm */ 2751 if (ilgstat == ILGSTAT_NEW) { 2752 if (ilg->ilg_ilm == NULL) { 2753 ilg->ilg_ilm = ilm; 2754 ilm->ilm_ifaddr = ifaddr; /* For netstat */ 2755 } else { 2756 /* some other thread is re-attaching this. */ 2757 rw_exit(&connp->conn_ilg_lock); 2758 (void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE); 2759 error = 0; 2760 goto free_and_exit; 2761 } 2762 } else { 2763 /* 2764 * ip_addmulti didn't get a held ilm for 2765 * ILGSTAT_CHANGE; ilm_refcnt was unchanged. 2766 */ 2767 ASSERT(ilg->ilg_ilm == ilm); 2768 } 2769 } else { 2770 ASSERT(error != 0); 2771 /* 2772 * Failed to allocate the ilm. 2773 * Need to undo what we did before calling ip_addmulti() 2774 * If ENETDOWN just clear ill_ilg since so that we 2775 * will rejoin when the ill comes back; don't report ENETDOWN 2776 * to application. 2777 */ 2778 if (ilgstat == ILGSTAT_NEW && error == ENETDOWN) { 2779 ilg->ilg_ill = NULL; 2780 error = 0; 2781 } else { 2782 in6_addr_t delsrc = 2783 (ilgstat == ILGSTAT_NEW) ? ipv6_all_zeros : *v6src; 2784 2785 ilg_delete(connp, ilg, &delsrc); 2786 } 2787 } 2788 rw_exit(&connp->conn_ilg_lock); 2789 2790 free_and_exit: 2791 l_free(new_filter); 2792 return (error); 2793 } 2794 2795 /* 2796 * Find an IPv4 ilg matching group, ill and source. 2797 * The group and source can't be INADDR_ANY here so no need to translate to 2798 * the unspecified IPv6 address. 2799 */ 2800 boolean_t 2801 conn_hasmembers_ill_withsrc_v4(conn_t *connp, ipaddr_t group, ipaddr_t src, 2802 ill_t *ill) 2803 { 2804 in6_addr_t v6group, v6src; 2805 int i; 2806 boolean_t isinlist; 2807 ilg_t *ilg; 2808 2809 rw_enter(&connp->conn_ilg_lock, RW_READER); 2810 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 2811 for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) { 2812 if (ilg->ilg_condemned) 2813 continue; 2814 2815 /* ilg_ill could be NULL if an add is in progress */ 2816 if (ilg->ilg_ill != ill) 2817 continue; 2818 2819 /* The callers use upper ill for IPMP */ 2820 ASSERT(!IS_UNDER_IPMP(ill)); 2821 if (IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, &v6group)) { 2822 if (SLIST_IS_EMPTY(ilg->ilg_filter)) { 2823 /* no source filter, so this is a match */ 2824 rw_exit(&connp->conn_ilg_lock); 2825 return (B_TRUE); 2826 } 2827 break; 2828 } 2829 } 2830 if (ilg == NULL) { 2831 rw_exit(&connp->conn_ilg_lock); 2832 return (B_FALSE); 2833 } 2834 2835 /* 2836 * we have an ilg with matching ill and group; but 2837 * the ilg has a source list that we must check. 2838 */ 2839 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 2840 isinlist = B_FALSE; 2841 for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) { 2842 if (IN6_ARE_ADDR_EQUAL(&v6src, &ilg->ilg_filter->sl_addr[i])) { 2843 isinlist = B_TRUE; 2844 break; 2845 } 2846 } 2847 2848 if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) || 2849 (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE)) { 2850 rw_exit(&connp->conn_ilg_lock); 2851 return (B_TRUE); 2852 } 2853 rw_exit(&connp->conn_ilg_lock); 2854 return (B_FALSE); 2855 } 2856 2857 /* 2858 * Find an IPv6 ilg matching group, ill, and source 2859 */ 2860 boolean_t 2861 conn_hasmembers_ill_withsrc_v6(conn_t *connp, const in6_addr_t *v6group, 2862 const in6_addr_t *v6src, ill_t *ill) 2863 { 2864 int i; 2865 boolean_t isinlist; 2866 ilg_t *ilg; 2867 2868 rw_enter(&connp->conn_ilg_lock, RW_READER); 2869 for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) { 2870 if (ilg->ilg_condemned) 2871 continue; 2872 2873 /* ilg_ill could be NULL if an add is in progress */ 2874 if (ilg->ilg_ill != ill) 2875 continue; 2876 2877 /* The callers use upper ill for IPMP */ 2878 ASSERT(!IS_UNDER_IPMP(ill)); 2879 if (IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) { 2880 if (SLIST_IS_EMPTY(ilg->ilg_filter)) { 2881 /* no source filter, so this is a match */ 2882 rw_exit(&connp->conn_ilg_lock); 2883 return (B_TRUE); 2884 } 2885 break; 2886 } 2887 } 2888 if (ilg == NULL) { 2889 rw_exit(&connp->conn_ilg_lock); 2890 return (B_FALSE); 2891 } 2892 2893 /* 2894 * we have an ilg with matching ill and group; but 2895 * the ilg has a source list that we must check. 2896 */ 2897 isinlist = B_FALSE; 2898 for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) { 2899 if (IN6_ARE_ADDR_EQUAL(v6src, &ilg->ilg_filter->sl_addr[i])) { 2900 isinlist = B_TRUE; 2901 break; 2902 } 2903 } 2904 2905 if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) || 2906 (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE)) { 2907 rw_exit(&connp->conn_ilg_lock); 2908 return (B_TRUE); 2909 } 2910 rw_exit(&connp->conn_ilg_lock); 2911 return (B_FALSE); 2912 } 2913 2914 /* 2915 * Find an ilg matching group and ifaddr/ifindex. 2916 * We check both ifaddr and ifindex even though at most one of them 2917 * will be non-zero; that way we always find the right one. 2918 */ 2919 static ilg_t * 2920 ilg_lookup(conn_t *connp, const in6_addr_t *v6group, ipaddr_t ifaddr, 2921 uint_t ifindex) 2922 { 2923 ilg_t *ilg; 2924 2925 ASSERT(RW_LOCK_HELD(&connp->conn_ilg_lock)); 2926 2927 for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) { 2928 if (ilg->ilg_condemned) 2929 continue; 2930 2931 if (ilg->ilg_ifaddr == ifaddr && 2932 ilg->ilg_ifindex == ifindex && 2933 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) 2934 return (ilg); 2935 } 2936 return (NULL); 2937 } 2938 2939 /* 2940 * If a source address is passed in (src != NULL and src is not 2941 * unspecified), remove the specified src addr from the given ilg's 2942 * filter list, else delete the ilg. 2943 */ 2944 static void 2945 ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src) 2946 { 2947 ASSERT(RW_WRITE_HELD(&connp->conn_ilg_lock)); 2948 ASSERT(ilg->ilg_ptpn != NULL); 2949 ASSERT(!ilg->ilg_condemned); 2950 2951 if (src == NULL || IN6_IS_ADDR_UNSPECIFIED(src)) { 2952 FREE_SLIST(ilg->ilg_filter); 2953 ilg->ilg_filter = NULL; 2954 2955 ASSERT(ilg->ilg_ilm == NULL); 2956 ilg->ilg_ill = NULL; 2957 ilg->ilg_condemned = B_TRUE; 2958 2959 /* ilg_inactive will unlink from the list */ 2960 ilg_refrele(ilg); 2961 } else { 2962 l_remove(ilg->ilg_filter, src); 2963 } 2964 } 2965 2966 /* 2967 * Called from conn close. No new ilg can be added or removed 2968 * because CONN_CLOSING has been set by ip_close. ilg_add / ilg_delete 2969 * will return error if conn has started closing. 2970 * 2971 * We handle locking as follows. 2972 * Under conn_ilg_lock we get the first ilg. As we drop the conn_ilg_lock to 2973 * proceed with the ilm part of the delete we hold a reference on both the ill 2974 * and the ilg. This doesn't prevent changes to the ilg, but prevents it from 2975 * being deleted. 2976 * 2977 * Since the ilg_add code path uses two locks (conn_ilg_lock for the ilg part, 2978 * and ill_mcast_lock for the ip_addmulti part) we can run at a point between 2979 * the two. At that point ilg_ill is set, but ilg_ilm hasn't yet been set. In 2980 * that case we delete the ilg here, which makes ilg_add discover that the ilg 2981 * has disappeared when ip_addmulti returns, so it will discard the ilm it just 2982 * added. 2983 */ 2984 void 2985 ilg_delete_all(conn_t *connp) 2986 { 2987 ilg_t *ilg, *next_ilg, *held_ilg; 2988 ilm_t *ilm; 2989 ill_t *ill; 2990 boolean_t need_refrele; 2991 2992 /* 2993 * Can not run if there is a conn_update_ill already running. 2994 * Wait for it to complete. Caller should have already set CONN_CLOSING 2995 * which prevents any new threads to run in conn_update_ill. 2996 */ 2997 mutex_enter(&connp->conn_lock); 2998 ASSERT(connp->conn_state_flags & CONN_CLOSING); 2999 while (connp->conn_state_flags & CONN_UPDATE_ILL) 3000 cv_wait(&connp->conn_cv, &connp->conn_lock); 3001 mutex_exit(&connp->conn_lock); 3002 3003 rw_enter(&connp->conn_ilg_lock, RW_WRITER); 3004 ilg = connp->conn_ilg; 3005 held_ilg = NULL; 3006 while (ilg != NULL) { 3007 if (ilg->ilg_condemned) { 3008 ilg = ilg->ilg_next; 3009 continue; 3010 } 3011 /* If the ilg is detached then no need to serialize */ 3012 if (ilg->ilg_ilm == NULL) { 3013 next_ilg = ilg->ilg_next; 3014 ilg_delete(connp, ilg, NULL); 3015 ilg = next_ilg; 3016 continue; 3017 } 3018 ill = ilg->ilg_ilm->ilm_ill; 3019 3020 /* 3021 * In order to serialize on the ill we try to enter 3022 * and if that fails we unlock and relock and then 3023 * check that we still have an ilm. 3024 */ 3025 need_refrele = B_FALSE; 3026 if (!mutex_tryenter(&ill->ill_mcast_serializer)) { 3027 ill_refhold(ill); 3028 need_refrele = B_TRUE; 3029 ilg_refhold(ilg); 3030 if (held_ilg != NULL) 3031 ilg_refrele(held_ilg); 3032 held_ilg = ilg; 3033 rw_exit(&connp->conn_ilg_lock); 3034 mutex_enter(&ill->ill_mcast_serializer); 3035 rw_enter(&connp->conn_ilg_lock, RW_WRITER); 3036 if (ilg->ilg_condemned) { 3037 ilg = ilg->ilg_next; 3038 goto next; 3039 } 3040 } 3041 ilm = ilg->ilg_ilm; 3042 ilg->ilg_ilm = NULL; 3043 next_ilg = ilg->ilg_next; 3044 ilg_delete(connp, ilg, NULL); 3045 ilg = next_ilg; 3046 rw_exit(&connp->conn_ilg_lock); 3047 3048 if (ilm != NULL) 3049 (void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE); 3050 3051 next: 3052 mutex_exit(&ill->ill_mcast_serializer); 3053 /* 3054 * Now that all locks have been dropped, we can send any 3055 * deferred/queued DLPI or IP packets 3056 */ 3057 ill_mcast_send_queued(ill); 3058 ill_dlpi_send_queued(ill); 3059 if (need_refrele) { 3060 /* Drop ill reference while we hold no locks */ 3061 ill_refrele(ill); 3062 } 3063 rw_enter(&connp->conn_ilg_lock, RW_WRITER); 3064 } 3065 if (held_ilg != NULL) 3066 ilg_refrele(held_ilg); 3067 rw_exit(&connp->conn_ilg_lock); 3068 } 3069 3070 /* 3071 * Attach the ilg to an ilm on the ill. If it fails we leave ilg_ill as NULL so 3072 * that a subsequent attempt can attach it. Drops and reacquires conn_ilg_lock. 3073 */ 3074 static void 3075 ilg_attach(conn_t *connp, ilg_t *ilg, ill_t *ill) 3076 { 3077 ilg_stat_t ilgstat; 3078 slist_t *new_filter; 3079 int new_fmode; 3080 in6_addr_t v6group; 3081 ipaddr_t ifaddr; 3082 uint_t ifindex; 3083 ilm_t *ilm; 3084 int error = 0; 3085 3086 ASSERT(RW_WRITE_HELD(&connp->conn_ilg_lock)); 3087 /* 3088 * Alloc buffer to copy new state into (see below) before 3089 * we make any changes, so we can bail if it fails. 3090 */ 3091 if ((new_filter = l_alloc()) == NULL) 3092 return; 3093 3094 /* 3095 * Save copy of ilg's filter state to pass to other functions, so 3096 * we can release conn_ilg_lock now. 3097 * Set ilg_ill so that an unplumb can find us. 3098 */ 3099 new_fmode = ilg->ilg_fmode; 3100 l_copy(ilg->ilg_filter, new_filter); 3101 v6group = ilg->ilg_v6group; 3102 ifaddr = ilg->ilg_ifaddr; 3103 ifindex = ilg->ilg_ifindex; 3104 ilgstat = ILGSTAT_NEW; 3105 3106 ilg->ilg_ill = ill; 3107 ASSERT(ilg->ilg_ilm == NULL); 3108 rw_exit(&connp->conn_ilg_lock); 3109 3110 ilm = ip_addmulti_serial(&v6group, ill, connp->conn_zoneid, ilgstat, 3111 new_fmode, new_filter, &error); 3112 l_free(new_filter); 3113 3114 rw_enter(&connp->conn_ilg_lock, RW_WRITER); 3115 /* 3116 * Must look up the ilg again since we've not been holding 3117 * conn_ilg_lock. The ilg could have disappeared due to an unplumb 3118 * having called conn_update_ill, which can run once we dropped the 3119 * conn_ilg_lock above. Alternatively, the ilg could have been attached 3120 * when the lock was dropped 3121 */ 3122 ilg = ilg_lookup(connp, &v6group, ifaddr, ifindex); 3123 if (ilg == NULL || ilg->ilg_ilm != NULL) { 3124 if (ilm != NULL) { 3125 rw_exit(&connp->conn_ilg_lock); 3126 (void) ip_delmulti_serial(ilm, B_FALSE, 3127 (ilgstat == ILGSTAT_NEW)); 3128 rw_enter(&connp->conn_ilg_lock, RW_WRITER); 3129 } 3130 return; 3131 } 3132 if (ilm == NULL) { 3133 ilg->ilg_ill = NULL; 3134 return; 3135 } 3136 ilg->ilg_ilm = ilm; 3137 ilm->ilm_ifaddr = ifaddr; /* For netstat */ 3138 } 3139 3140 /* 3141 * Called when an ill is unplumbed to make sure that there are no 3142 * dangling conn references to that ill. In that case ill is non-NULL and 3143 * we make sure we remove all references to it. 3144 * Also called when we should revisit the ilg_ill used for multicast 3145 * memberships, in which case ill is NULL. 3146 * 3147 * conn is held by caller. 3148 * 3149 * Note that ipcl_walk only walks conns that are not yet condemned. 3150 * condemned conns can't be refheld. For this reason, conn must become clean 3151 * first, i.e. it must not refer to any ill/ire and then only set 3152 * condemned flag. 3153 * 3154 * We leave ixa_multicast_ifindex in place. We prefer dropping 3155 * packets instead of sending them out the wrong interface. 3156 * 3157 * We keep the ilg around in a detached state (with ilg_ill and ilg_ilm being 3158 * NULL) so that the application can leave it later. Also, if ilg_ifaddr and 3159 * ilg_ifindex are zero, indicating that the system should pick the interface, 3160 * then we attempt to reselect the ill and join on it. 3161 * 3162 * Locking notes: 3163 * Under conn_ilg_lock we get the first ilg. As we drop the conn_ilg_lock to 3164 * proceed with the ilm part of the delete we hold a reference on both the ill 3165 * and the ilg. This doesn't prevent changes to the ilg, but prevents it from 3166 * being deleted. 3167 * 3168 * Note: if this function is called when new ill/ipif's arrive or change status 3169 * (SIOCSLIFINDEX, SIOCSLIFADDR) then we will attempt to attach any ilgs with 3170 * a NULL ilg_ill to an ill/ilm. 3171 */ 3172 static void 3173 conn_update_ill(conn_t *connp, caddr_t arg) 3174 { 3175 ill_t *ill = (ill_t *)arg; 3176 3177 /* 3178 * We have to prevent ip_close/ilg_delete_all from running at 3179 * the same time. ip_close sets CONN_CLOSING before doing the ilg_delete 3180 * all, and we set CONN_UPDATE_ILL. That ensures that only one of 3181 * ilg_delete_all and conn_update_ill run at a time for a given conn. 3182 * If ilg_delete_all got here first, then we have nothing to do. 3183 */ 3184 mutex_enter(&connp->conn_lock); 3185 if (connp->conn_state_flags & (CONN_CLOSING|CONN_UPDATE_ILL)) { 3186 /* Caller has to wait for ill_ilm_cnt to drop to zero */ 3187 mutex_exit(&connp->conn_lock); 3188 return; 3189 } 3190 connp->conn_state_flags |= CONN_UPDATE_ILL; 3191 mutex_exit(&connp->conn_lock); 3192 3193 if (ill != NULL) 3194 ilg_check_detach(connp, ill); 3195 3196 ilg_check_reattach(connp, ill); 3197 3198 /* Do we need to wake up a thread in ilg_delete_all? */ 3199 mutex_enter(&connp->conn_lock); 3200 connp->conn_state_flags &= ~CONN_UPDATE_ILL; 3201 if (connp->conn_state_flags & CONN_CLOSING) 3202 cv_broadcast(&connp->conn_cv); 3203 mutex_exit(&connp->conn_lock); 3204 } 3205 3206 /* Detach from an ill that is going away */ 3207 static void 3208 ilg_check_detach(conn_t *connp, ill_t *ill) 3209 { 3210 char group_buf[INET6_ADDRSTRLEN]; 3211 ilg_t *ilg, *held_ilg; 3212 ilm_t *ilm; 3213 3214 mutex_enter(&ill->ill_mcast_serializer); 3215 rw_enter(&connp->conn_ilg_lock, RW_WRITER); 3216 held_ilg = NULL; 3217 for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) { 3218 if (ilg->ilg_condemned) 3219 continue; 3220 3221 if (ilg->ilg_ill != ill) 3222 continue; 3223 3224 /* Detach from current ill */ 3225 ip1dbg(("ilg_check_detach: detach %s on %s\n", 3226 inet_ntop(AF_INET6, &ilg->ilg_v6group, 3227 group_buf, sizeof (group_buf)), 3228 ilg->ilg_ill->ill_name)); 3229 3230 /* Detach this ilg from the ill/ilm */ 3231 ilm = ilg->ilg_ilm; 3232 ilg->ilg_ilm = NULL; 3233 ilg->ilg_ill = NULL; 3234 if (ilm == NULL) 3235 continue; 3236 3237 /* Prevent ilg from disappearing */ 3238 ilg_transfer_hold(held_ilg, ilg); 3239 held_ilg = ilg; 3240 rw_exit(&connp->conn_ilg_lock); 3241 3242 (void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE); 3243 rw_enter(&connp->conn_ilg_lock, RW_WRITER); 3244 } 3245 if (held_ilg != NULL) 3246 ilg_refrele(held_ilg); 3247 rw_exit(&connp->conn_ilg_lock); 3248 mutex_exit(&ill->ill_mcast_serializer); 3249 /* 3250 * Now that all locks have been dropped, we can send any 3251 * deferred/queued DLPI or IP packets 3252 */ 3253 ill_mcast_send_queued(ill); 3254 ill_dlpi_send_queued(ill); 3255 } 3256 3257 /* 3258 * Check if there is a place to attach the conn_ilgs. We do this for both 3259 * detached ilgs and attached ones, since for the latter there could be 3260 * a better ill to attach them to. oill is non-null if we just detached from 3261 * that ill. 3262 */ 3263 static void 3264 ilg_check_reattach(conn_t *connp, ill_t *oill) 3265 { 3266 ill_t *ill; 3267 char group_buf[INET6_ADDRSTRLEN]; 3268 ilg_t *ilg, *held_ilg; 3269 ilm_t *ilm; 3270 zoneid_t zoneid = IPCL_ZONEID(connp); 3271 int error; 3272 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 3273 3274 rw_enter(&connp->conn_ilg_lock, RW_WRITER); 3275 held_ilg = NULL; 3276 for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) { 3277 if (ilg->ilg_condemned) 3278 continue; 3279 3280 /* Check if the conn_ill matches what we would pick now */ 3281 ill = ill_mcast_lookup(&ilg->ilg_v6group, ilg->ilg_ifaddr, 3282 ilg->ilg_ifindex, zoneid, ipst, &error); 3283 3284 /* 3285 * Make sure the ill is usable for multicast and that 3286 * we can send the DL_ADDMULTI_REQ before we create an 3287 * ilm. 3288 */ 3289 if (ill != NULL && 3290 (!(ill->ill_flags & ILLF_MULTICAST) || !ill->ill_dl_up)) { 3291 /* Drop locks across ill_refrele */ 3292 ilg_transfer_hold(held_ilg, ilg); 3293 held_ilg = ilg; 3294 rw_exit(&connp->conn_ilg_lock); 3295 ill_refrele(ill); 3296 ill = NULL; 3297 rw_enter(&connp->conn_ilg_lock, RW_WRITER); 3298 /* Note that ilg could have become condemned */ 3299 } 3300 3301 /* 3302 * Is the ill unchanged, even if both are NULL? 3303 * Did we just detach from that ill? 3304 */ 3305 if (ill == ilg->ilg_ill || (ill != NULL && ill == oill)) { 3306 if (ill != NULL) { 3307 /* Drop locks across ill_refrele */ 3308 ilg_transfer_hold(held_ilg, ilg); 3309 held_ilg = ilg; 3310 rw_exit(&connp->conn_ilg_lock); 3311 ill_refrele(ill); 3312 rw_enter(&connp->conn_ilg_lock, RW_WRITER); 3313 } 3314 continue; 3315 } 3316 3317 /* Something changed; detach from old first if needed */ 3318 if (ilg->ilg_ill != NULL) { 3319 ill_t *ill2 = ilg->ilg_ill; 3320 boolean_t need_refrele = B_FALSE; 3321 3322 /* 3323 * In order to serialize on the ill we try to enter 3324 * and if that fails we unlock and relock. 3325 */ 3326 if (!mutex_tryenter(&ill2->ill_mcast_serializer)) { 3327 ill_refhold(ill2); 3328 need_refrele = B_TRUE; 3329 ilg_transfer_hold(held_ilg, ilg); 3330 held_ilg = ilg; 3331 rw_exit(&connp->conn_ilg_lock); 3332 mutex_enter(&ill2->ill_mcast_serializer); 3333 rw_enter(&connp->conn_ilg_lock, RW_WRITER); 3334 /* Note that ilg could have become condemned */ 3335 } 3336 /* 3337 * Check that nobody else re-attached the ilg while we 3338 * dropped the lock. 3339 */ 3340 if (ilg->ilg_ill == ill2) { 3341 ASSERT(!ilg->ilg_condemned); 3342 /* Detach from current ill */ 3343 ip1dbg(("conn_check_reattach: detach %s/%s\n", 3344 inet_ntop(AF_INET6, &ilg->ilg_v6group, 3345 group_buf, sizeof (group_buf)), 3346 ill2->ill_name)); 3347 3348 ilm = ilg->ilg_ilm; 3349 ilg->ilg_ilm = NULL; 3350 ilg->ilg_ill = NULL; 3351 } else { 3352 ilm = NULL; 3353 } 3354 ilg_transfer_hold(held_ilg, ilg); 3355 held_ilg = ilg; 3356 rw_exit(&connp->conn_ilg_lock); 3357 if (ilm != NULL) 3358 (void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE); 3359 mutex_exit(&ill2->ill_mcast_serializer); 3360 /* 3361 * Now that all locks have been dropped, we can send any 3362 * deferred/queued DLPI or IP packets 3363 */ 3364 ill_mcast_send_queued(ill2); 3365 ill_dlpi_send_queued(ill2); 3366 if (need_refrele) { 3367 /* Drop ill reference while we hold no locks */ 3368 ill_refrele(ill2); 3369 } 3370 rw_enter(&connp->conn_ilg_lock, RW_WRITER); 3371 /* 3372 * While we dropped conn_ilg_lock some other thread 3373 * could have attached this ilg, thus we check again. 3374 */ 3375 if (ilg->ilg_ill != NULL) { 3376 if (ill != NULL) { 3377 /* Drop locks across ill_refrele */ 3378 ilg_transfer_hold(held_ilg, ilg); 3379 held_ilg = ilg; 3380 rw_exit(&connp->conn_ilg_lock); 3381 ill_refrele(ill); 3382 rw_enter(&connp->conn_ilg_lock, 3383 RW_WRITER); 3384 } 3385 continue; 3386 } 3387 } 3388 if (ill != NULL) { 3389 /* 3390 * In order to serialize on the ill we try to enter 3391 * and if that fails we unlock and relock. 3392 */ 3393 if (!mutex_tryenter(&ill->ill_mcast_serializer)) { 3394 /* Already have a refhold on ill */ 3395 ilg_transfer_hold(held_ilg, ilg); 3396 held_ilg = ilg; 3397 rw_exit(&connp->conn_ilg_lock); 3398 mutex_enter(&ill->ill_mcast_serializer); 3399 rw_enter(&connp->conn_ilg_lock, RW_WRITER); 3400 /* Note that ilg could have become condemned */ 3401 } 3402 ilg_transfer_hold(held_ilg, ilg); 3403 held_ilg = ilg; 3404 /* 3405 * Check that nobody else attached the ilg and that 3406 * it wasn't condemned while we dropped the lock. 3407 */ 3408 if (ilg->ilg_ill == NULL && !ilg->ilg_condemned) { 3409 /* 3410 * Attach to the new ill. Can fail in which 3411 * case ilg_ill will remain NULL. ilg_attach 3412 * drops and reacquires conn_ilg_lock. 3413 */ 3414 ip1dbg(("conn_check_reattach: attach %s/%s\n", 3415 inet_ntop(AF_INET6, &ilg->ilg_v6group, 3416 group_buf, sizeof (group_buf)), 3417 ill->ill_name)); 3418 ilg_attach(connp, ilg, ill); 3419 ASSERT(RW_WRITE_HELD(&connp->conn_ilg_lock)); 3420 } 3421 /* Drop locks across ill_refrele */ 3422 rw_exit(&connp->conn_ilg_lock); 3423 mutex_exit(&ill->ill_mcast_serializer); 3424 /* 3425 * Now that all locks have been 3426 * dropped, we can send any 3427 * deferred/queued DLPI or IP packets 3428 */ 3429 ill_mcast_send_queued(ill); 3430 ill_dlpi_send_queued(ill); 3431 ill_refrele(ill); 3432 rw_enter(&connp->conn_ilg_lock, RW_WRITER); 3433 } 3434 } 3435 if (held_ilg != NULL) 3436 ilg_refrele(held_ilg); 3437 rw_exit(&connp->conn_ilg_lock); 3438 } 3439 3440 /* 3441 * Called when an ill is unplumbed to make sure that there are no 3442 * dangling conn references to that ill. In that case ill is non-NULL and 3443 * we make sure we remove all references to it. 3444 * Also called when we should revisit the ilg_ill used for multicast 3445 * memberships, in which case ill is NULL. 3446 */ 3447 void 3448 update_conn_ill(ill_t *ill, ip_stack_t *ipst) 3449 { 3450 ipcl_walk(conn_update_ill, (caddr_t)ill, ipst); 3451 } 3452