1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright (c) 1991, 2010, Oracle and/or its affiliates. All rights reserved. 23 * Copyright (c) 1990 Mentat Inc. 24 */ 25 26 #include <sys/types.h> 27 #include <sys/stream.h> 28 #include <sys/dlpi.h> 29 #include <sys/stropts.h> 30 #include <sys/strsun.h> 31 #include <sys/ddi.h> 32 #include <sys/cmn_err.h> 33 #include <sys/sdt.h> 34 #include <sys/zone.h> 35 36 #include <sys/param.h> 37 #include <sys/socket.h> 38 #include <sys/sockio.h> 39 #include <net/if.h> 40 #include <sys/systm.h> 41 #include <sys/strsubr.h> 42 #include <net/route.h> 43 #include <netinet/in.h> 44 #include <net/if_dl.h> 45 #include <netinet/ip6.h> 46 #include <netinet/icmp6.h> 47 48 #include <inet/common.h> 49 #include <inet/mi.h> 50 #include <inet/nd.h> 51 #include <inet/arp.h> 52 #include <inet/ip.h> 53 #include <inet/ip6.h> 54 #include <inet/ip_if.h> 55 #include <inet/ip_ndp.h> 56 #include <inet/ip_multi.h> 57 #include <inet/ipclassifier.h> 58 #include <inet/ipsec_impl.h> 59 #include <inet/sctp_ip.h> 60 #include <inet/ip_listutils.h> 61 #include <inet/udp_impl.h> 62 63 /* igmpv3/mldv2 source filter manipulation */ 64 static void ilm_bld_flists(conn_t *conn, void *arg); 65 static void ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, 66 slist_t *flist); 67 68 static ilm_t *ilm_add(ill_t *ill, const in6_addr_t *group, 69 ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist, 70 zoneid_t zoneid); 71 static void ilm_delete(ilm_t *ilm); 72 static int ilm_numentries(ill_t *, const in6_addr_t *); 73 74 static ilm_t *ip_addmulti_serial(const in6_addr_t *, ill_t *, zoneid_t, 75 ilg_stat_t, mcast_record_t, slist_t *, int *); 76 static ilm_t *ip_addmulti_impl(const in6_addr_t *, ill_t *, 77 zoneid_t, ilg_stat_t, mcast_record_t, slist_t *, int *); 78 static int ip_delmulti_serial(ilm_t *, boolean_t, boolean_t); 79 static int ip_delmulti_impl(ilm_t *, boolean_t, boolean_t); 80 81 static int ip_ll_multireq(ill_t *ill, const in6_addr_t *group, 82 t_uscalar_t); 83 static ilg_t *ilg_lookup(conn_t *, const in6_addr_t *, ipaddr_t ifaddr, 84 uint_t ifindex); 85 86 static int ilg_add(conn_t *connp, const in6_addr_t *group, 87 ipaddr_t ifaddr, uint_t ifindex, ill_t *ill, mcast_record_t fmode, 88 const in6_addr_t *v6src); 89 static void ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src); 90 static mblk_t *ill_create_dl(ill_t *ill, uint32_t dl_primitive, 91 uint32_t *addr_lenp, uint32_t *addr_offp); 92 static int ip_opt_delete_group_excl(conn_t *connp, 93 const in6_addr_t *v6group, ipaddr_t ifaddr, uint_t ifindex, 94 mcast_record_t fmode, const in6_addr_t *v6src); 95 96 static ilm_t *ilm_lookup(ill_t *, const in6_addr_t *, zoneid_t); 97 98 static int ip_msfilter_ill(conn_t *, mblk_t *, const ip_ioctl_cmd_t *, 99 ill_t **); 100 101 static void ilg_check_detach(conn_t *, ill_t *); 102 static void ilg_check_reattach(conn_t *, ill_t *); 103 104 /* 105 * MT notes: 106 * 107 * Multicast joins operate on both the ilg and ilm structures. Multiple 108 * threads operating on an conn (socket) trying to do multicast joins 109 * need to synchronize when operating on the ilg. Multiple threads 110 * potentially operating on different conn (socket endpoints) trying to 111 * do multicast joins could eventually end up trying to manipulate the 112 * ilm simulatenously and need to synchronize on the access to the ilm. 113 * The access and lookup of the ilm, as well as other ill multicast state, 114 * is under ill_mcast_lock. 115 * The modifications and lookup of ilg entries is serialized using conn_ilg_lock 116 * rwlock. An ilg will not be freed until ilg_refcnt drops to zero. 117 * 118 * In some cases we hold ill_mcast_lock and then acquire conn_ilg_lock, but 119 * never the other way around. 120 * 121 * An ilm is an IP data structure used to track multicast join/leave. 122 * An ilm is associated with a <multicast group, ipif> tuple in IPv4 and 123 * with just <multicast group> in IPv6. ilm_refcnt is the number of ilg's 124 * referencing the ilm. 125 * The modifications and lookup of ilm entries is serialized using the 126 * ill_mcast_lock rwlock; that lock handles all the igmp/mld modifications 127 * of the ilm state. 128 * ilms are created / destroyed only as writer. ilms 129 * are not passed around. The datapath (anything outside of this file 130 * and igmp.c) use functions that do not return ilms - just the number 131 * of members. So we don't need a dynamic refcount of the number 132 * of threads holding reference to an ilm. 133 * 134 * In the cases where we serially access the ilg and ilm, which happens when 135 * we handle the applications requests to join or leave groups and sources, 136 * we use the ill_mcast_serializer mutex to ensure that a multithreaded 137 * application which does concurrent joins and/or leaves on the same group on 138 * the same socket always results in a consistent order for the ilg and ilm 139 * modifications. 140 * 141 * When a multicast operation results in needing to send a message to 142 * the driver (to join/leave a L2 multicast address), we use ill_dlpi_queue() 143 * which serialized the DLPI requests. The IGMP/MLD code uses ill_mcast_queue() 144 * to send IGMP/MLD IP packet to avoid dropping the lock just to send a packet. 145 */ 146 147 #define GETSTRUCT(structure, number) \ 148 ((structure *)mi_zalloc(sizeof (structure) * (number))) 149 150 /* 151 * Caller must ensure that the ilg has not been condemned 152 * The condemned flag is only set in ilg_delete under conn_ilg_lock. 153 * 154 * The caller must hold conn_ilg_lock as writer. 155 */ 156 static void 157 ilg_refhold(ilg_t *ilg) 158 { 159 ASSERT(ilg->ilg_refcnt != 0); 160 ASSERT(!ilg->ilg_condemned); 161 ASSERT(RW_WRITE_HELD(&ilg->ilg_connp->conn_ilg_lock)); 162 163 ilg->ilg_refcnt++; 164 } 165 166 static void 167 ilg_inactive(ilg_t *ilg) 168 { 169 ASSERT(ilg->ilg_ill == NULL); 170 ASSERT(ilg->ilg_ilm == NULL); 171 ASSERT(ilg->ilg_filter == NULL); 172 ASSERT(ilg->ilg_condemned); 173 174 /* Unlink from list */ 175 *ilg->ilg_ptpn = ilg->ilg_next; 176 if (ilg->ilg_next != NULL) 177 ilg->ilg_next->ilg_ptpn = ilg->ilg_ptpn; 178 ilg->ilg_next = NULL; 179 ilg->ilg_ptpn = NULL; 180 181 ilg->ilg_connp = NULL; 182 kmem_free(ilg, sizeof (*ilg)); 183 } 184 185 /* 186 * The caller must hold conn_ilg_lock as writer. 187 */ 188 static void 189 ilg_refrele(ilg_t *ilg) 190 { 191 ASSERT(RW_WRITE_HELD(&ilg->ilg_connp->conn_ilg_lock)); 192 ASSERT(ilg->ilg_refcnt != 0); 193 if (--ilg->ilg_refcnt == 0) 194 ilg_inactive(ilg); 195 } 196 197 /* 198 * Acquire reference on ilg and drop reference on held_ilg. 199 * In the case when held_ilg is the same as ilg we already have 200 * a reference, but the held_ilg might be condemned. In that case 201 * we avoid the ilg_refhold/rele so that we can assert in ire_refhold 202 * that the ilg isn't condemned. 203 */ 204 static void 205 ilg_transfer_hold(ilg_t *held_ilg, ilg_t *ilg) 206 { 207 if (held_ilg == ilg) 208 return; 209 210 ilg_refhold(ilg); 211 if (held_ilg != NULL) 212 ilg_refrele(held_ilg); 213 } 214 215 /* 216 * Allocate a new ilg_t and links it into conn_ilg. 217 * Returns NULL on failure, in which case `*errp' will be 218 * filled in with the reason. 219 * 220 * Assumes connp->conn_ilg_lock is held. 221 */ 222 static ilg_t * 223 conn_ilg_alloc(conn_t *connp, int *errp) 224 { 225 ilg_t *ilg; 226 227 ASSERT(RW_WRITE_HELD(&connp->conn_ilg_lock)); 228 229 /* 230 * If CONN_CLOSING is set, conn_ilg cleanup has begun and we must not 231 * create any ilgs. 232 */ 233 if (connp->conn_state_flags & CONN_CLOSING) { 234 *errp = EINVAL; 235 return (NULL); 236 } 237 238 ilg = kmem_zalloc(sizeof (ilg_t), KM_NOSLEEP); 239 if (ilg == NULL) { 240 *errp = ENOMEM; 241 return (NULL); 242 } 243 244 ilg->ilg_refcnt = 1; 245 246 /* Insert at head */ 247 if (connp->conn_ilg != NULL) 248 connp->conn_ilg->ilg_ptpn = &ilg->ilg_next; 249 ilg->ilg_next = connp->conn_ilg; 250 ilg->ilg_ptpn = &connp->conn_ilg; 251 connp->conn_ilg = ilg; 252 253 ilg->ilg_connp = connp; 254 return (ilg); 255 } 256 257 typedef struct ilm_fbld_s { 258 ilm_t *fbld_ilm; 259 int fbld_in_cnt; 260 int fbld_ex_cnt; 261 slist_t fbld_in; 262 slist_t fbld_ex; 263 boolean_t fbld_in_overflow; 264 } ilm_fbld_t; 265 266 /* 267 * Caller must hold ill_mcast_lock 268 */ 269 static void 270 ilm_bld_flists(conn_t *connp, void *arg) 271 { 272 ilg_t *ilg; 273 ilm_fbld_t *fbld = (ilm_fbld_t *)(arg); 274 ilm_t *ilm = fbld->fbld_ilm; 275 in6_addr_t *v6group = &ilm->ilm_v6addr; 276 277 if (connp->conn_ilg == NULL) 278 return; 279 280 /* 281 * Since we can't break out of the ipcl_walk once started, we still 282 * have to look at every conn. But if we've already found one 283 * (EXCLUDE, NULL) list, there's no need to keep checking individual 284 * ilgs--that will be our state. 285 */ 286 if (fbld->fbld_ex_cnt > 0 && fbld->fbld_ex.sl_numsrc == 0) 287 return; 288 289 /* 290 * Check this conn's ilgs to see if any are interested in our 291 * ilm (group, interface match). If so, update the master 292 * include and exclude lists we're building in the fbld struct 293 * with this ilg's filter info. 294 * 295 * Note that the caller has already serialized on the ill we care 296 * about. 297 */ 298 ASSERT(MUTEX_HELD(&ilm->ilm_ill->ill_mcast_serializer)); 299 300 rw_enter(&connp->conn_ilg_lock, RW_READER); 301 for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) { 302 if (ilg->ilg_condemned) 303 continue; 304 305 /* 306 * Since we are under the ill_mcast_serializer we know 307 * that any ilg+ilm operations on this ilm have either 308 * not started or completed, except for the last ilg 309 * (the one that caused us to be called) which doesn't 310 * have ilg_ilm set yet. Hence we compare using ilg_ill 311 * and the address. 312 */ 313 if ((ilg->ilg_ill == ilm->ilm_ill) && 314 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) { 315 if (ilg->ilg_fmode == MODE_IS_INCLUDE) { 316 fbld->fbld_in_cnt++; 317 if (!fbld->fbld_in_overflow) 318 l_union_in_a(&fbld->fbld_in, 319 ilg->ilg_filter, 320 &fbld->fbld_in_overflow); 321 } else { 322 fbld->fbld_ex_cnt++; 323 /* 324 * On the first exclude list, don't try to do 325 * an intersection, as the master exclude list 326 * is intentionally empty. If the master list 327 * is still empty on later iterations, that 328 * means we have at least one ilg with an empty 329 * exclude list, so that should be reflected 330 * when we take the intersection. 331 */ 332 if (fbld->fbld_ex_cnt == 1) { 333 if (ilg->ilg_filter != NULL) 334 l_copy(ilg->ilg_filter, 335 &fbld->fbld_ex); 336 } else { 337 l_intersection_in_a(&fbld->fbld_ex, 338 ilg->ilg_filter); 339 } 340 } 341 /* there will only be one match, so break now. */ 342 break; 343 } 344 } 345 rw_exit(&connp->conn_ilg_lock); 346 } 347 348 /* 349 * Caller must hold ill_mcast_lock 350 */ 351 static void 352 ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, slist_t *flist) 353 { 354 ilm_fbld_t fbld; 355 ip_stack_t *ipst = ilm->ilm_ipst; 356 357 fbld.fbld_ilm = ilm; 358 fbld.fbld_in_cnt = fbld.fbld_ex_cnt = 0; 359 fbld.fbld_in.sl_numsrc = fbld.fbld_ex.sl_numsrc = 0; 360 fbld.fbld_in_overflow = B_FALSE; 361 362 /* first, construct our master include and exclude lists */ 363 ipcl_walk(ilm_bld_flists, (caddr_t)&fbld, ipst); 364 365 /* now use those master lists to generate the interface filter */ 366 367 /* if include list overflowed, filter is (EXCLUDE, NULL) */ 368 if (fbld.fbld_in_overflow) { 369 *fmode = MODE_IS_EXCLUDE; 370 flist->sl_numsrc = 0; 371 return; 372 } 373 374 /* if nobody interested, interface filter is (INCLUDE, NULL) */ 375 if (fbld.fbld_in_cnt == 0 && fbld.fbld_ex_cnt == 0) { 376 *fmode = MODE_IS_INCLUDE; 377 flist->sl_numsrc = 0; 378 return; 379 } 380 381 /* 382 * If there are no exclude lists, then the interface filter 383 * is INCLUDE, with its filter list equal to fbld_in. A single 384 * exclude list makes the interface filter EXCLUDE, with its 385 * filter list equal to (fbld_ex - fbld_in). 386 */ 387 if (fbld.fbld_ex_cnt == 0) { 388 *fmode = MODE_IS_INCLUDE; 389 l_copy(&fbld.fbld_in, flist); 390 } else { 391 *fmode = MODE_IS_EXCLUDE; 392 l_difference(&fbld.fbld_ex, &fbld.fbld_in, flist); 393 } 394 } 395 396 /* 397 * Caller must hold ill_mcast_lock 398 */ 399 static int 400 ilm_update_add(ilm_t *ilm, ilg_stat_t ilgstat, slist_t *ilg_flist) 401 { 402 mcast_record_t fmode; 403 slist_t *flist; 404 boolean_t fdefault; 405 char buf[INET6_ADDRSTRLEN]; 406 ill_t *ill = ilm->ilm_ill; 407 408 /* 409 * There are several cases where the ilm's filter state 410 * defaults to (EXCLUDE, NULL): 411 * - we've had previous joins without associated ilgs 412 * - this join has no associated ilg 413 * - the ilg's filter state is (EXCLUDE, NULL) 414 */ 415 fdefault = (ilm->ilm_no_ilg_cnt > 0) || 416 (ilgstat == ILGSTAT_NONE) || SLIST_IS_EMPTY(ilg_flist); 417 418 /* attempt mallocs (if needed) before doing anything else */ 419 if ((flist = l_alloc()) == NULL) 420 return (ENOMEM); 421 if (!fdefault && ilm->ilm_filter == NULL) { 422 ilm->ilm_filter = l_alloc(); 423 if (ilm->ilm_filter == NULL) { 424 l_free(flist); 425 return (ENOMEM); 426 } 427 } 428 429 if (ilgstat != ILGSTAT_CHANGE) 430 ilm->ilm_refcnt++; 431 432 if (ilgstat == ILGSTAT_NONE) 433 ilm->ilm_no_ilg_cnt++; 434 435 /* 436 * Determine new filter state. If it's not the default 437 * (EXCLUDE, NULL), we must walk the conn list to find 438 * any ilgs interested in this group, and re-build the 439 * ilm filter. 440 */ 441 if (fdefault) { 442 fmode = MODE_IS_EXCLUDE; 443 flist->sl_numsrc = 0; 444 } else { 445 ilm_gen_filter(ilm, &fmode, flist); 446 } 447 448 /* make sure state actually changed; nothing to do if not. */ 449 if ((ilm->ilm_fmode == fmode) && 450 !lists_are_different(ilm->ilm_filter, flist)) { 451 l_free(flist); 452 return (0); 453 } 454 455 /* send the state change report */ 456 if (!IS_LOOPBACK(ill)) { 457 if (ill->ill_isv6) 458 mld_statechange(ilm, fmode, flist); 459 else 460 igmp_statechange(ilm, fmode, flist); 461 } 462 463 /* update the ilm state */ 464 ilm->ilm_fmode = fmode; 465 if (flist->sl_numsrc > 0) 466 l_copy(flist, ilm->ilm_filter); 467 else 468 CLEAR_SLIST(ilm->ilm_filter); 469 470 ip1dbg(("ilm_update: new if filter mode %d, group %s\n", ilm->ilm_fmode, 471 inet_ntop(AF_INET6, &ilm->ilm_v6addr, buf, sizeof (buf)))); 472 473 l_free(flist); 474 return (0); 475 } 476 477 /* 478 * Caller must hold ill_mcast_lock 479 */ 480 static int 481 ilm_update_del(ilm_t *ilm) 482 { 483 mcast_record_t fmode; 484 slist_t *flist; 485 ill_t *ill = ilm->ilm_ill; 486 487 ip1dbg(("ilm_update_del: still %d left; updating state\n", 488 ilm->ilm_refcnt)); 489 490 if ((flist = l_alloc()) == NULL) 491 return (ENOMEM); 492 493 /* 494 * If present, the ilg in question has already either been 495 * updated or removed from our list; so all we need to do 496 * now is walk the list to update the ilm filter state. 497 * 498 * Skip the list walk if we have any no-ilg joins, which 499 * cause the filter state to revert to (EXCLUDE, NULL). 500 */ 501 if (ilm->ilm_no_ilg_cnt != 0) { 502 fmode = MODE_IS_EXCLUDE; 503 flist->sl_numsrc = 0; 504 } else { 505 ilm_gen_filter(ilm, &fmode, flist); 506 } 507 508 /* check to see if state needs to be updated */ 509 if ((ilm->ilm_fmode == fmode) && 510 (!lists_are_different(ilm->ilm_filter, flist))) { 511 l_free(flist); 512 return (0); 513 } 514 515 if (!IS_LOOPBACK(ill)) { 516 if (ill->ill_isv6) 517 mld_statechange(ilm, fmode, flist); 518 else 519 igmp_statechange(ilm, fmode, flist); 520 } 521 522 ilm->ilm_fmode = fmode; 523 if (flist->sl_numsrc > 0) { 524 if (ilm->ilm_filter == NULL) { 525 ilm->ilm_filter = l_alloc(); 526 if (ilm->ilm_filter == NULL) { 527 char buf[INET6_ADDRSTRLEN]; 528 ip1dbg(("ilm_update_del: failed to alloc ilm " 529 "filter; no source filtering for %s on %s", 530 inet_ntop(AF_INET6, &ilm->ilm_v6addr, 531 buf, sizeof (buf)), ill->ill_name)); 532 ilm->ilm_fmode = MODE_IS_EXCLUDE; 533 l_free(flist); 534 return (0); 535 } 536 } 537 l_copy(flist, ilm->ilm_filter); 538 } else { 539 CLEAR_SLIST(ilm->ilm_filter); 540 } 541 542 l_free(flist); 543 return (0); 544 } 545 546 /* 547 * Create/update the ilm for the group/ill. Used by other parts of IP to 548 * do the ILGSTAT_NONE (no ilg), MODE_IS_EXCLUDE, with no slist join. 549 * Returns with a refhold on the ilm. 550 * 551 * The unspecified address means all multicast addresses for in both the 552 * case of IPv4 and IPv6. 553 * 554 * The caller should have already mapped an IPMP under ill to the upper. 555 */ 556 ilm_t * 557 ip_addmulti(const in6_addr_t *v6group, ill_t *ill, zoneid_t zoneid, 558 int *errorp) 559 { 560 ilm_t *ilm; 561 562 /* Acquire serializer to keep assert in ilm_bld_flists happy */ 563 mutex_enter(&ill->ill_mcast_serializer); 564 ilm = ip_addmulti_serial(v6group, ill, zoneid, ILGSTAT_NONE, 565 MODE_IS_EXCLUDE, NULL, errorp); 566 mutex_exit(&ill->ill_mcast_serializer); 567 /* 568 * Now that all locks have been dropped, we can send any 569 * deferred/queued DLPI or IP packets 570 */ 571 ill_mcast_send_queued(ill); 572 ill_dlpi_send_queued(ill); 573 return (ilm); 574 } 575 576 /* 577 * Create/update the ilm for the group/ill. If ILGSTAT_CHANGE is not set 578 * then this returns with a refhold on the ilm. 579 * 580 * Internal routine which assumes the caller has already acquired 581 * ill_mcast_serializer. It is the caller's responsibility to send out 582 * queued DLPI/multicast packets after all locks are dropped. 583 * 584 * The unspecified address means all multicast addresses for in both the 585 * case of IPv4 and IPv6. 586 * 587 * ilgstat tells us if there's an ilg associated with this join, 588 * and if so, if it's a new ilg or a change to an existing one. 589 * ilg_fmode and ilg_flist give us the current filter state of 590 * the ilg (and will be EXCLUDE {NULL} in the case of no ilg). 591 * 592 * The caller should have already mapped an IPMP under ill to the upper. 593 */ 594 static ilm_t * 595 ip_addmulti_serial(const in6_addr_t *v6group, ill_t *ill, zoneid_t zoneid, 596 ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist, 597 int *errorp) 598 { 599 ilm_t *ilm; 600 601 ASSERT(MUTEX_HELD(&ill->ill_mcast_serializer)); 602 603 if (ill->ill_isv6) { 604 if (!IN6_IS_ADDR_MULTICAST(v6group) && 605 !IN6_IS_ADDR_UNSPECIFIED(v6group)) { 606 *errorp = EINVAL; 607 return (NULL); 608 } 609 } else { 610 if (IN6_IS_ADDR_V4MAPPED(v6group)) { 611 ipaddr_t v4group; 612 613 IN6_V4MAPPED_TO_IPADDR(v6group, v4group); 614 ASSERT(!IS_UNDER_IPMP(ill)); 615 if (!CLASSD(v4group)) { 616 *errorp = EINVAL; 617 return (NULL); 618 } 619 } else if (!IN6_IS_ADDR_UNSPECIFIED(v6group)) { 620 *errorp = EINVAL; 621 return (NULL); 622 } 623 } 624 625 if (IS_UNDER_IPMP(ill)) { 626 *errorp = EINVAL; 627 return (NULL); 628 } 629 630 rw_enter(&ill->ill_mcast_lock, RW_WRITER); 631 /* 632 * We do the equivalent of a lookup by checking after we get the lock 633 * This is needed since the ill could have been condemned after 634 * we looked it up, and we need to check condemned after we hold 635 * ill_mcast_lock to synchronize with the unplumb code. 636 */ 637 if (ill->ill_state_flags & ILL_CONDEMNED) { 638 rw_exit(&ill->ill_mcast_lock); 639 *errorp = ENXIO; 640 return (NULL); 641 } 642 ilm = ip_addmulti_impl(v6group, ill, zoneid, ilgstat, ilg_fmode, 643 ilg_flist, errorp); 644 rw_exit(&ill->ill_mcast_lock); 645 646 ill_mcast_timer_start(ill->ill_ipst); 647 return (ilm); 648 } 649 650 static ilm_t * 651 ip_addmulti_impl(const in6_addr_t *v6group, ill_t *ill, zoneid_t zoneid, 652 ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist, 653 int *errorp) 654 { 655 ilm_t *ilm; 656 int ret = 0; 657 658 ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock)); 659 *errorp = 0; 660 661 /* 662 * An ilm is uniquely identified by the tuple of (group, ill) where 663 * `group' is the multicast group address, and `ill' is the interface 664 * on which it is currently joined. 665 */ 666 667 ilm = ilm_lookup(ill, v6group, zoneid); 668 if (ilm != NULL) { 669 /* ilm_update_add bumps ilm_refcnt unless ILGSTAT_CHANGE */ 670 ret = ilm_update_add(ilm, ilgstat, ilg_flist); 671 if (ret == 0) 672 return (ilm); 673 674 *errorp = ret; 675 return (NULL); 676 } 677 678 /* 679 * The callers checks on the ilg and the ilg+ilm consistency under 680 * ill_mcast_serializer ensures that we can not have ILGSTAT_CHANGE 681 * and no ilm. 682 */ 683 ASSERT(ilgstat != ILGSTAT_CHANGE); 684 ilm = ilm_add(ill, v6group, ilgstat, ilg_fmode, ilg_flist, zoneid); 685 if (ilm == NULL) { 686 *errorp = ENOMEM; 687 return (NULL); 688 } 689 690 if (IN6_IS_ADDR_UNSPECIFIED(v6group)) { 691 /* 692 * If we have more then one we should not tell the driver 693 * to join this time. 694 */ 695 if (ilm_numentries(ill, v6group) == 1) { 696 ret = ill_join_allmulti(ill); 697 } 698 } else { 699 if (!IS_LOOPBACK(ill)) { 700 if (ill->ill_isv6) 701 mld_joingroup(ilm); 702 else 703 igmp_joingroup(ilm); 704 } 705 706 /* 707 * If we have more then one we should not tell the driver 708 * to join this time. 709 */ 710 if (ilm_numentries(ill, v6group) == 1) { 711 ret = ip_ll_multireq(ill, v6group, DL_ENABMULTI_REQ); 712 } 713 } 714 if (ret != 0) { 715 if (ret == ENETDOWN) { 716 char buf[INET6_ADDRSTRLEN]; 717 718 ip0dbg(("ip_addmulti: ENETDOWN for %s on %s", 719 inet_ntop(AF_INET6, &ilm->ilm_v6addr, 720 buf, sizeof (buf)), ill->ill_name)); 721 } 722 ilm_delete(ilm); 723 *errorp = ret; 724 return (NULL); 725 } else { 726 return (ilm); 727 } 728 } 729 730 /* 731 * Looks up the list of multicast physical addresses this interface 732 * listens to. Add to the list if not present already. 733 */ 734 boolean_t 735 ip_mphysaddr_add(ill_t *ill, uchar_t *hw_addr) 736 { 737 multiphysaddr_t *mpa = NULL; 738 int hw_addr_length = ill->ill_phys_addr_length; 739 740 mutex_enter(&ill->ill_lock); 741 for (mpa = ill->ill_mphysaddr_list; mpa != NULL; mpa = mpa->mpa_next) { 742 if (bcmp(hw_addr, &(mpa->mpa_addr[0]), hw_addr_length) == 0) { 743 mpa->mpa_refcnt++; 744 mutex_exit(&ill->ill_lock); 745 return (B_FALSE); 746 } 747 } 748 749 mpa = kmem_zalloc(sizeof (multiphysaddr_t), KM_NOSLEEP); 750 if (mpa == NULL) { 751 /* 752 * We risk not having the multiphysadd structure. At this 753 * point we can't fail. We can't afford to not send a 754 * DL_ENABMULTI_REQ also. It is better than pre-allocating 755 * the structure and having the code to track it also. 756 */ 757 ip0dbg(("ip_mphysaddr_add: ENOMEM. Some multicast apps" 758 " may have issues. hw_addr: %p ill_name: %s\n", 759 (void *)hw_addr, ill->ill_name)); 760 mutex_exit(&ill->ill_lock); 761 return (B_TRUE); 762 } 763 bcopy(hw_addr, &(mpa->mpa_addr[0]), hw_addr_length); 764 mpa->mpa_refcnt = 1; 765 mpa->mpa_next = ill->ill_mphysaddr_list; 766 ill->ill_mphysaddr_list = mpa; 767 mutex_exit(&ill->ill_lock); 768 return (B_TRUE); 769 } 770 771 /* 772 * Look up hw_addr from the list of physical multicast addresses this interface 773 * listens to. 774 * Remove the entry if the refcnt is 0 775 */ 776 boolean_t 777 ip_mphysaddr_del(ill_t *ill, uchar_t *hw_addr) 778 { 779 multiphysaddr_t *mpap = NULL, **mpapp = NULL; 780 int hw_addr_length = ill->ill_phys_addr_length; 781 boolean_t ret = B_FALSE; 782 783 mutex_enter(&ill->ill_lock); 784 for (mpapp = &ill->ill_mphysaddr_list; (mpap = *mpapp) != NULL; 785 mpapp = &(mpap->mpa_next)) { 786 if (bcmp(hw_addr, &(mpap->mpa_addr[0]), hw_addr_length) == 0) 787 break; 788 } 789 if (mpap == NULL) { 790 /* 791 * Should be coming here only when there was a memory 792 * exhaustion and we were not able to allocate 793 * a multiphysaddr_t. We still send a DL_DISABMULTI_REQ down. 794 */ 795 796 ip0dbg(("ip_mphysaddr_del: No entry for this addr. Some " 797 "multicast apps might have had issues. hw_addr: %p " 798 " ill_name: %s\n", (void *)hw_addr, ill->ill_name)); 799 ret = B_TRUE; 800 } else if (--mpap->mpa_refcnt == 0) { 801 *mpapp = mpap->mpa_next; 802 kmem_free(mpap, sizeof (multiphysaddr_t)); 803 ret = B_TRUE; 804 } 805 mutex_exit(&ill->ill_lock); 806 return (ret); 807 } 808 809 /* 810 * Send a multicast request to the driver for enabling or disabling 811 * multicast reception for v6groupp address. The caller has already 812 * checked whether it is appropriate to send one or not. 813 * 814 * For IPMP we switch to the cast_ill since it has the right hardware 815 * information. 816 */ 817 static int 818 ip_ll_send_multireq(ill_t *ill, const in6_addr_t *v6groupp, t_uscalar_t prim) 819 { 820 mblk_t *mp; 821 uint32_t addrlen, addroff; 822 ill_t *release_ill = NULL; 823 uchar_t *cp; 824 int err = 0; 825 826 ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock)); 827 828 if (IS_IPMP(ill)) { 829 /* On the upper IPMP ill. */ 830 release_ill = ipmp_illgrp_hold_cast_ill(ill->ill_grp); 831 if (release_ill == NULL) { 832 /* 833 * Avoid sending it down to the ipmpstub. 834 * We will be called again once the members of the 835 * group are in place 836 */ 837 ip1dbg(("ip_ll_send_multireq: no cast_ill for %s %d\n", 838 ill->ill_name, ill->ill_isv6)); 839 return (0); 840 } 841 ill = release_ill; 842 } 843 /* Create a DL_ENABMULTI_REQ or DL_DISABMULTI_REQ message. */ 844 mp = ill_create_dl(ill, prim, &addrlen, &addroff); 845 if (mp == NULL) { 846 err = ENOMEM; 847 goto done; 848 } 849 850 mp = ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp); 851 if (mp == NULL) { 852 ip0dbg(("null from ndp_mcastreq(ill %s)\n", ill->ill_name)); 853 err = ENOMEM; 854 goto done; 855 } 856 cp = mp->b_rptr; 857 858 switch (((union DL_primitives *)cp)->dl_primitive) { 859 case DL_ENABMULTI_REQ: 860 cp += ((dl_enabmulti_req_t *)cp)->dl_addr_offset; 861 if (!ip_mphysaddr_add(ill, cp)) { 862 freemsg(mp); 863 err = 0; 864 goto done; 865 } 866 mutex_enter(&ill->ill_lock); 867 /* Track the state if this is the first enabmulti */ 868 if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN) 869 ill->ill_dlpi_multicast_state = IDS_INPROGRESS; 870 mutex_exit(&ill->ill_lock); 871 break; 872 case DL_DISABMULTI_REQ: 873 cp += ((dl_disabmulti_req_t *)cp)->dl_addr_offset; 874 if (!ip_mphysaddr_del(ill, cp)) { 875 freemsg(mp); 876 err = 0; 877 goto done; 878 } 879 } 880 ill_dlpi_queue(ill, mp); 881 done: 882 if (release_ill != NULL) 883 ill_refrele(release_ill); 884 return (err); 885 } 886 887 /* 888 * Send a multicast request to the driver for enabling multicast 889 * membership for v6group if appropriate. 890 */ 891 static int 892 ip_ll_multireq(ill_t *ill, const in6_addr_t *v6groupp, t_uscalar_t prim) 893 { 894 if (ill->ill_net_type != IRE_IF_RESOLVER || 895 ill->ill_ipif->ipif_flags & IPIF_POINTOPOINT) { 896 ip1dbg(("ip_ll_multireq: not resolver\n")); 897 return (0); /* Must be IRE_IF_NORESOLVER */ 898 } 899 900 if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) { 901 ip1dbg(("ip_ll_multireq: MULTI_BCAST\n")); 902 return (0); 903 } 904 return (ip_ll_send_multireq(ill, v6groupp, prim)); 905 } 906 907 /* 908 * Delete the ilm. Used by other parts of IP for the case of no_ilg/leaving 909 * being true. 910 */ 911 int 912 ip_delmulti(ilm_t *ilm) 913 { 914 ill_t *ill = ilm->ilm_ill; 915 int error; 916 917 /* Acquire serializer to keep assert in ilm_bld_flists happy */ 918 mutex_enter(&ill->ill_mcast_serializer); 919 error = ip_delmulti_serial(ilm, B_TRUE, B_TRUE); 920 mutex_exit(&ill->ill_mcast_serializer); 921 /* 922 * Now that all locks have been dropped, we can send any 923 * deferred/queued DLPI or IP packets 924 */ 925 ill_mcast_send_queued(ill); 926 ill_dlpi_send_queued(ill); 927 return (error); 928 } 929 930 931 /* 932 * Delete the ilm. 933 * Assumes ill_mcast_serializer is held by the caller. 934 * Caller must send out queued dlpi/multicast packets after dropping 935 * all locks. 936 */ 937 static int 938 ip_delmulti_serial(ilm_t *ilm, boolean_t no_ilg, boolean_t leaving) 939 { 940 ill_t *ill = ilm->ilm_ill; 941 int ret; 942 943 ASSERT(MUTEX_HELD(&ill->ill_mcast_serializer)); 944 ASSERT(!(IS_UNDER_IPMP(ill))); 945 946 rw_enter(&ill->ill_mcast_lock, RW_WRITER); 947 ret = ip_delmulti_impl(ilm, no_ilg, leaving); 948 rw_exit(&ill->ill_mcast_lock); 949 ill_mcast_timer_start(ill->ill_ipst); 950 return (ret); 951 } 952 953 static int 954 ip_delmulti_impl(ilm_t *ilm, boolean_t no_ilg, boolean_t leaving) 955 { 956 ill_t *ill = ilm->ilm_ill; 957 int error; 958 in6_addr_t v6group; 959 960 ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock)); 961 962 /* Update counters */ 963 if (no_ilg) 964 ilm->ilm_no_ilg_cnt--; 965 966 if (leaving) 967 ilm->ilm_refcnt--; 968 969 if (ilm->ilm_refcnt > 0) 970 return (ilm_update_del(ilm)); 971 972 v6group = ilm->ilm_v6addr; 973 974 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) { 975 ilm_delete(ilm); 976 /* 977 * If we have some left then one we should not tell the driver 978 * to leave. 979 */ 980 if (ilm_numentries(ill, &v6group) != 0) 981 return (0); 982 983 ill_leave_allmulti(ill); 984 985 return (0); 986 } 987 988 if (!IS_LOOPBACK(ill)) { 989 if (ill->ill_isv6) 990 mld_leavegroup(ilm); 991 else 992 igmp_leavegroup(ilm); 993 } 994 995 ilm_delete(ilm); 996 /* 997 * If we have some left then one we should not tell the driver 998 * to leave. 999 */ 1000 if (ilm_numentries(ill, &v6group) != 0) 1001 return (0); 1002 1003 error = ip_ll_multireq(ill, &v6group, DL_DISABMULTI_REQ); 1004 /* We ignore the case when ill_dl_up is not set */ 1005 if (error == ENETDOWN) { 1006 char buf[INET6_ADDRSTRLEN]; 1007 1008 ip0dbg(("ip_delmulti: ENETDOWN for %s on %s", 1009 inet_ntop(AF_INET6, &v6group, buf, sizeof (buf)), 1010 ill->ill_name)); 1011 } 1012 return (error); 1013 } 1014 1015 /* 1016 * Make the driver pass up all multicast packets. 1017 */ 1018 int 1019 ill_join_allmulti(ill_t *ill) 1020 { 1021 mblk_t *promiscon_mp, *promiscoff_mp = NULL; 1022 uint32_t addrlen, addroff; 1023 ill_t *release_ill = NULL; 1024 1025 ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock)); 1026 1027 if (IS_LOOPBACK(ill)) 1028 return (0); 1029 1030 if (!ill->ill_dl_up) { 1031 /* 1032 * Nobody there. All multicast addresses will be re-joined 1033 * when we get the DL_BIND_ACK bringing the interface up. 1034 */ 1035 return (ENETDOWN); 1036 } 1037 1038 if (IS_IPMP(ill)) { 1039 /* On the upper IPMP ill. */ 1040 release_ill = ipmp_illgrp_hold_cast_ill(ill->ill_grp); 1041 if (release_ill == NULL) { 1042 /* 1043 * Avoid sending it down to the ipmpstub. 1044 * We will be called again once the members of the 1045 * group are in place 1046 */ 1047 ip1dbg(("ill_join_allmulti: no cast_ill for %s %d\n", 1048 ill->ill_name, ill->ill_isv6)); 1049 return (0); 1050 } 1051 ill = release_ill; 1052 if (!ill->ill_dl_up) { 1053 ill_refrele(ill); 1054 return (ENETDOWN); 1055 } 1056 } 1057 1058 /* 1059 * Create a DL_PROMISCON_REQ message and send it directly to the DLPI 1060 * provider. We don't need to do this for certain media types for 1061 * which we never need to turn promiscuous mode on. While we're here, 1062 * pre-allocate a DL_PROMISCOFF_REQ message to make sure that 1063 * ill_leave_allmulti() will not fail due to low memory conditions. 1064 */ 1065 if ((ill->ill_net_type == IRE_IF_RESOLVER) && 1066 !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) { 1067 promiscon_mp = ill_create_dl(ill, DL_PROMISCON_REQ, 1068 &addrlen, &addroff); 1069 if (ill->ill_promiscoff_mp == NULL) 1070 promiscoff_mp = ill_create_dl(ill, DL_PROMISCOFF_REQ, 1071 &addrlen, &addroff); 1072 if (promiscon_mp == NULL || 1073 (ill->ill_promiscoff_mp == NULL && promiscoff_mp == NULL)) { 1074 freemsg(promiscon_mp); 1075 freemsg(promiscoff_mp); 1076 if (release_ill != NULL) 1077 ill_refrele(release_ill); 1078 return (ENOMEM); 1079 } 1080 if (ill->ill_promiscoff_mp == NULL) 1081 ill->ill_promiscoff_mp = promiscoff_mp; 1082 ill_dlpi_queue(ill, promiscon_mp); 1083 } 1084 if (release_ill != NULL) 1085 ill_refrele(release_ill); 1086 return (0); 1087 } 1088 1089 /* 1090 * Make the driver stop passing up all multicast packets 1091 */ 1092 void 1093 ill_leave_allmulti(ill_t *ill) 1094 { 1095 mblk_t *promiscoff_mp; 1096 ill_t *release_ill = NULL; 1097 1098 ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock)); 1099 1100 if (IS_LOOPBACK(ill)) 1101 return; 1102 1103 if (!ill->ill_dl_up) { 1104 /* 1105 * Nobody there. All multicast addresses will be re-joined 1106 * when we get the DL_BIND_ACK bringing the interface up. 1107 */ 1108 return; 1109 } 1110 1111 if (IS_IPMP(ill)) { 1112 /* On the upper IPMP ill. */ 1113 release_ill = ipmp_illgrp_hold_cast_ill(ill->ill_grp); 1114 if (release_ill == NULL) { 1115 /* 1116 * Avoid sending it down to the ipmpstub. 1117 * We will be called again once the members of the 1118 * group are in place 1119 */ 1120 ip1dbg(("ill_leave_allmulti: no cast_ill on %s %d\n", 1121 ill->ill_name, ill->ill_isv6)); 1122 return; 1123 } 1124 ill = release_ill; 1125 if (!ill->ill_dl_up) 1126 goto done; 1127 } 1128 1129 /* 1130 * In the case of IPMP and ill_dl_up not being set when we joined 1131 * we didn't allocate a promiscoff_mp. In that case we have 1132 * nothing to do when we leave. 1133 * Ditto for PHYI_MULTI_BCAST 1134 */ 1135 promiscoff_mp = ill->ill_promiscoff_mp; 1136 if (promiscoff_mp != NULL) { 1137 ill->ill_promiscoff_mp = NULL; 1138 ill_dlpi_queue(ill, promiscoff_mp); 1139 } 1140 done: 1141 if (release_ill != NULL) 1142 ill_refrele(release_ill); 1143 } 1144 1145 int 1146 ip_join_allmulti(uint_t ifindex, boolean_t isv6, ip_stack_t *ipst) 1147 { 1148 ill_t *ill; 1149 int ret; 1150 ilm_t *ilm; 1151 1152 ill = ill_lookup_on_ifindex(ifindex, isv6, ipst); 1153 if (ill == NULL) 1154 return (ENODEV); 1155 1156 /* 1157 * The ip_addmulti() function doesn't allow IPMP underlying interfaces 1158 * to join allmulti since only the nominated underlying interface in 1159 * the group should receive multicast. We silently succeed to avoid 1160 * having to teach IPobs (currently the only caller of this routine) 1161 * to ignore failures in this case. 1162 */ 1163 if (IS_UNDER_IPMP(ill)) { 1164 ill_refrele(ill); 1165 return (0); 1166 } 1167 mutex_enter(&ill->ill_lock); 1168 if (ill->ill_ipallmulti_cnt > 0) { 1169 /* Already joined */ 1170 ASSERT(ill->ill_ipallmulti_ilm != NULL); 1171 ill->ill_ipallmulti_cnt++; 1172 mutex_exit(&ill->ill_lock); 1173 goto done; 1174 } 1175 mutex_exit(&ill->ill_lock); 1176 1177 ilm = ip_addmulti(&ipv6_all_zeros, ill, ill->ill_zoneid, &ret); 1178 if (ilm == NULL) { 1179 ASSERT(ret != 0); 1180 ill_refrele(ill); 1181 return (ret); 1182 } 1183 1184 mutex_enter(&ill->ill_lock); 1185 if (ill->ill_ipallmulti_cnt > 0) { 1186 /* Another thread added it concurrently */ 1187 (void) ip_delmulti(ilm); 1188 mutex_exit(&ill->ill_lock); 1189 goto done; 1190 } 1191 ASSERT(ill->ill_ipallmulti_ilm == NULL); 1192 ill->ill_ipallmulti_ilm = ilm; 1193 ill->ill_ipallmulti_cnt++; 1194 mutex_exit(&ill->ill_lock); 1195 done: 1196 ill_refrele(ill); 1197 return (0); 1198 } 1199 1200 int 1201 ip_leave_allmulti(uint_t ifindex, boolean_t isv6, ip_stack_t *ipst) 1202 { 1203 ill_t *ill; 1204 ilm_t *ilm; 1205 1206 ill = ill_lookup_on_ifindex(ifindex, isv6, ipst); 1207 if (ill == NULL) 1208 return (ENODEV); 1209 1210 if (IS_UNDER_IPMP(ill)) { 1211 ill_refrele(ill); 1212 return (0); 1213 } 1214 1215 mutex_enter(&ill->ill_lock); 1216 if (ill->ill_ipallmulti_cnt == 0) { 1217 /* ip_purge_allmulti could have removed them all */ 1218 mutex_exit(&ill->ill_lock); 1219 goto done; 1220 } 1221 ill->ill_ipallmulti_cnt--; 1222 if (ill->ill_ipallmulti_cnt == 0) { 1223 /* Last one */ 1224 ilm = ill->ill_ipallmulti_ilm; 1225 ill->ill_ipallmulti_ilm = NULL; 1226 } else { 1227 ilm = NULL; 1228 } 1229 mutex_exit(&ill->ill_lock); 1230 if (ilm != NULL) 1231 (void) ip_delmulti(ilm); 1232 1233 done: 1234 ill_refrele(ill); 1235 return (0); 1236 } 1237 1238 /* 1239 * Delete the allmulti memberships that were added as part of 1240 * ip_join_allmulti(). 1241 */ 1242 void 1243 ip_purge_allmulti(ill_t *ill) 1244 { 1245 ilm_t *ilm; 1246 1247 ASSERT(IAM_WRITER_ILL(ill)); 1248 1249 mutex_enter(&ill->ill_lock); 1250 ilm = ill->ill_ipallmulti_ilm; 1251 ill->ill_ipallmulti_ilm = NULL; 1252 ill->ill_ipallmulti_cnt = 0; 1253 mutex_exit(&ill->ill_lock); 1254 1255 if (ilm != NULL) 1256 (void) ip_delmulti(ilm); 1257 } 1258 1259 /* 1260 * Create a dlpi message with room for phys+sap. Later 1261 * we will strip the sap for those primitives which 1262 * only need a physical address. 1263 */ 1264 static mblk_t * 1265 ill_create_dl(ill_t *ill, uint32_t dl_primitive, 1266 uint32_t *addr_lenp, uint32_t *addr_offp) 1267 { 1268 mblk_t *mp; 1269 uint32_t hw_addr_length; 1270 char *cp; 1271 uint32_t offset; 1272 uint32_t length; 1273 uint32_t size; 1274 1275 *addr_lenp = *addr_offp = 0; 1276 1277 hw_addr_length = ill->ill_phys_addr_length; 1278 if (!hw_addr_length) { 1279 ip0dbg(("ip_create_dl: hw addr length = 0\n")); 1280 return (NULL); 1281 } 1282 1283 switch (dl_primitive) { 1284 case DL_ENABMULTI_REQ: 1285 length = sizeof (dl_enabmulti_req_t); 1286 size = length + hw_addr_length; 1287 break; 1288 case DL_DISABMULTI_REQ: 1289 length = sizeof (dl_disabmulti_req_t); 1290 size = length + hw_addr_length; 1291 break; 1292 case DL_PROMISCON_REQ: 1293 case DL_PROMISCOFF_REQ: 1294 size = length = sizeof (dl_promiscon_req_t); 1295 break; 1296 default: 1297 return (NULL); 1298 } 1299 mp = allocb(size, BPRI_HI); 1300 if (!mp) 1301 return (NULL); 1302 mp->b_wptr += size; 1303 mp->b_datap->db_type = M_PROTO; 1304 1305 cp = (char *)mp->b_rptr; 1306 offset = length; 1307 1308 switch (dl_primitive) { 1309 case DL_ENABMULTI_REQ: { 1310 dl_enabmulti_req_t *dl = (dl_enabmulti_req_t *)cp; 1311 1312 dl->dl_primitive = dl_primitive; 1313 dl->dl_addr_offset = offset; 1314 *addr_lenp = dl->dl_addr_length = hw_addr_length; 1315 *addr_offp = offset; 1316 break; 1317 } 1318 case DL_DISABMULTI_REQ: { 1319 dl_disabmulti_req_t *dl = (dl_disabmulti_req_t *)cp; 1320 1321 dl->dl_primitive = dl_primitive; 1322 dl->dl_addr_offset = offset; 1323 *addr_lenp = dl->dl_addr_length = hw_addr_length; 1324 *addr_offp = offset; 1325 break; 1326 } 1327 case DL_PROMISCON_REQ: 1328 case DL_PROMISCOFF_REQ: { 1329 dl_promiscon_req_t *dl = (dl_promiscon_req_t *)cp; 1330 1331 dl->dl_primitive = dl_primitive; 1332 dl->dl_level = DL_PROMISC_MULTI; 1333 break; 1334 } 1335 } 1336 ip1dbg(("ill_create_dl: addr_len %d, addr_off %d\n", 1337 *addr_lenp, *addr_offp)); 1338 return (mp); 1339 } 1340 1341 /* 1342 * Rejoin any groups for which we have ilms. 1343 * 1344 * This is only needed for IPMP when the cast_ill changes since that 1345 * change is invisible to the ilm. Other interface changes are handled 1346 * by conn_update_ill. 1347 */ 1348 void 1349 ill_recover_multicast(ill_t *ill) 1350 { 1351 ilm_t *ilm; 1352 char addrbuf[INET6_ADDRSTRLEN]; 1353 1354 ill->ill_need_recover_multicast = 0; 1355 1356 rw_enter(&ill->ill_mcast_lock, RW_WRITER); 1357 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1358 /* 1359 * If we have more then one ilm for the group (e.g., with 1360 * different zoneid) then we should not tell the driver 1361 * to join unless this is the first ilm for the group. 1362 */ 1363 if (ilm_numentries(ill, &ilm->ilm_v6addr) > 1 && 1364 ilm_lookup(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm) { 1365 continue; 1366 } 1367 1368 ip1dbg(("ill_recover_multicast: %s\n", inet_ntop(AF_INET6, 1369 &ilm->ilm_v6addr, addrbuf, sizeof (addrbuf)))); 1370 1371 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) { 1372 (void) ill_join_allmulti(ill); 1373 } else { 1374 if (ill->ill_isv6) 1375 mld_joingroup(ilm); 1376 else 1377 igmp_joingroup(ilm); 1378 1379 (void) ip_ll_multireq(ill, &ilm->ilm_v6addr, 1380 DL_ENABMULTI_REQ); 1381 } 1382 } 1383 rw_exit(&ill->ill_mcast_lock); 1384 /* Send any deferred/queued DLPI or IP packets */ 1385 ill_mcast_send_queued(ill); 1386 ill_dlpi_send_queued(ill); 1387 ill_mcast_timer_start(ill->ill_ipst); 1388 } 1389 1390 /* 1391 * The opposite of ill_recover_multicast() -- leaves all multicast groups 1392 * that were explicitly joined. 1393 * 1394 * This is only needed for IPMP when the cast_ill changes since that 1395 * change is invisible to the ilm. Other interface changes are handled 1396 * by conn_update_ill. 1397 */ 1398 void 1399 ill_leave_multicast(ill_t *ill) 1400 { 1401 ilm_t *ilm; 1402 char addrbuf[INET6_ADDRSTRLEN]; 1403 1404 ill->ill_need_recover_multicast = 1; 1405 1406 rw_enter(&ill->ill_mcast_lock, RW_WRITER); 1407 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1408 /* 1409 * If we have more then one ilm for the group (e.g., with 1410 * different zoneid) then we should not tell the driver 1411 * to leave unless this is the first ilm for the group. 1412 */ 1413 if (ilm_numentries(ill, &ilm->ilm_v6addr) > 1 && 1414 ilm_lookup(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm) { 1415 continue; 1416 } 1417 1418 ip1dbg(("ill_leave_multicast: %s\n", inet_ntop(AF_INET6, 1419 &ilm->ilm_v6addr, addrbuf, sizeof (addrbuf)))); 1420 1421 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) { 1422 ill_leave_allmulti(ill); 1423 } else { 1424 if (ill->ill_isv6) 1425 mld_leavegroup(ilm); 1426 else 1427 igmp_leavegroup(ilm); 1428 1429 (void) ip_ll_multireq(ill, &ilm->ilm_v6addr, 1430 DL_DISABMULTI_REQ); 1431 } 1432 } 1433 rw_exit(&ill->ill_mcast_lock); 1434 /* Send any deferred/queued DLPI or IP packets */ 1435 ill_mcast_send_queued(ill); 1436 ill_dlpi_send_queued(ill); 1437 ill_mcast_timer_start(ill->ill_ipst); 1438 } 1439 1440 /* 1441 * Interface used by IP input/output. 1442 * Returns true if there is a member on the ill for any zoneid. 1443 */ 1444 boolean_t 1445 ill_hasmembers_v6(ill_t *ill, const in6_addr_t *v6group) 1446 { 1447 ilm_t *ilm; 1448 1449 rw_enter(&ill->ill_mcast_lock, RW_READER); 1450 ilm = ilm_lookup(ill, v6group, ALL_ZONES); 1451 rw_exit(&ill->ill_mcast_lock); 1452 return (ilm != NULL); 1453 } 1454 1455 /* 1456 * Interface used by IP input/output. 1457 * Returns true if there is a member on the ill for any zoneid. 1458 * 1459 * The group and source can't be INADDR_ANY here so no need to translate to 1460 * the unspecified IPv6 address. 1461 */ 1462 boolean_t 1463 ill_hasmembers_v4(ill_t *ill, ipaddr_t group) 1464 { 1465 in6_addr_t v6group; 1466 1467 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 1468 return (ill_hasmembers_v6(ill, &v6group)); 1469 } 1470 1471 /* 1472 * Interface used by IP input/output. 1473 * Returns true if there is a member on the ill for any zoneid except skipzone. 1474 */ 1475 boolean_t 1476 ill_hasmembers_otherzones_v6(ill_t *ill, const in6_addr_t *v6group, 1477 zoneid_t skipzone) 1478 { 1479 ilm_t *ilm; 1480 1481 rw_enter(&ill->ill_mcast_lock, RW_READER); 1482 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1483 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) && 1484 ilm->ilm_zoneid != skipzone) { 1485 rw_exit(&ill->ill_mcast_lock); 1486 return (B_TRUE); 1487 } 1488 } 1489 rw_exit(&ill->ill_mcast_lock); 1490 return (B_FALSE); 1491 } 1492 1493 /* 1494 * Interface used by IP input/output. 1495 * Returns true if there is a member on the ill for any zoneid except skipzone. 1496 * 1497 * The group and source can't be INADDR_ANY here so no need to translate to 1498 * the unspecified IPv6 address. 1499 */ 1500 boolean_t 1501 ill_hasmembers_otherzones_v4(ill_t *ill, ipaddr_t group, zoneid_t skipzone) 1502 { 1503 in6_addr_t v6group; 1504 1505 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 1506 return (ill_hasmembers_otherzones_v6(ill, &v6group, skipzone)); 1507 } 1508 1509 /* 1510 * Interface used by IP input. 1511 * Returns the next numerically larger zoneid that has a member. If none exist 1512 * then returns -1 (ALL_ZONES). 1513 * The normal usage is for the caller to start with a -1 zoneid (ALL_ZONES) 1514 * to find the first zoneid which has a member, and then pass that in for 1515 * subsequent calls until ALL_ZONES is returned. 1516 * 1517 * The implementation of ill_hasmembers_nextzone() assumes the ilms 1518 * are sorted by zoneid for efficiency. 1519 */ 1520 zoneid_t 1521 ill_hasmembers_nextzone_v6(ill_t *ill, const in6_addr_t *v6group, 1522 zoneid_t zoneid) 1523 { 1524 ilm_t *ilm; 1525 1526 rw_enter(&ill->ill_mcast_lock, RW_READER); 1527 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1528 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) && 1529 ilm->ilm_zoneid > zoneid) { 1530 zoneid = ilm->ilm_zoneid; 1531 rw_exit(&ill->ill_mcast_lock); 1532 return (zoneid); 1533 } 1534 } 1535 rw_exit(&ill->ill_mcast_lock); 1536 return (ALL_ZONES); 1537 } 1538 1539 /* 1540 * Interface used by IP input. 1541 * Returns the next numerically larger zoneid that has a member. If none exist 1542 * then returns -1 (ALL_ZONES). 1543 * 1544 * The group and source can't be INADDR_ANY here so no need to translate to 1545 * the unspecified IPv6 address. 1546 */ 1547 zoneid_t 1548 ill_hasmembers_nextzone_v4(ill_t *ill, ipaddr_t group, zoneid_t zoneid) 1549 { 1550 in6_addr_t v6group; 1551 1552 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 1553 1554 return (ill_hasmembers_nextzone_v6(ill, &v6group, zoneid)); 1555 } 1556 1557 /* 1558 * Find an ilm matching the ill, group, and zoneid. 1559 */ 1560 static ilm_t * 1561 ilm_lookup(ill_t *ill, const in6_addr_t *v6group, zoneid_t zoneid) 1562 { 1563 ilm_t *ilm; 1564 1565 ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock)); 1566 1567 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1568 if (!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group)) 1569 continue; 1570 if (zoneid != ALL_ZONES && zoneid != ilm->ilm_zoneid) 1571 continue; 1572 1573 ASSERT(ilm->ilm_ill == ill); 1574 return (ilm); 1575 } 1576 return (NULL); 1577 } 1578 1579 /* 1580 * How many members on this ill? 1581 * Since each shared-IP zone has a separate ilm for the same group/ill 1582 * we can have several. 1583 */ 1584 static int 1585 ilm_numentries(ill_t *ill, const in6_addr_t *v6group) 1586 { 1587 ilm_t *ilm; 1588 int i = 0; 1589 1590 ASSERT(RW_LOCK_HELD(&ill->ill_mcast_lock)); 1591 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1592 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group)) { 1593 i++; 1594 } 1595 } 1596 return (i); 1597 } 1598 1599 /* Caller guarantees that the group is not already on the list */ 1600 static ilm_t * 1601 ilm_add(ill_t *ill, const in6_addr_t *v6group, ilg_stat_t ilgstat, 1602 mcast_record_t ilg_fmode, slist_t *ilg_flist, zoneid_t zoneid) 1603 { 1604 ilm_t *ilm; 1605 ilm_t *ilm_cur; 1606 ilm_t **ilm_ptpn; 1607 1608 ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock)); 1609 ilm = GETSTRUCT(ilm_t, 1); 1610 if (ilm == NULL) 1611 return (NULL); 1612 if (ilgstat != ILGSTAT_NONE && !SLIST_IS_EMPTY(ilg_flist)) { 1613 ilm->ilm_filter = l_alloc(); 1614 if (ilm->ilm_filter == NULL) { 1615 mi_free(ilm); 1616 return (NULL); 1617 } 1618 } 1619 ilm->ilm_v6addr = *v6group; 1620 ilm->ilm_refcnt = 1; 1621 ilm->ilm_zoneid = zoneid; 1622 ilm->ilm_timer = INFINITY; 1623 ilm->ilm_rtx.rtx_timer = INFINITY; 1624 1625 ilm->ilm_ill = ill; 1626 DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ill, 1627 (char *), "ilm", (void *), ilm); 1628 ill->ill_ilm_cnt++; 1629 1630 ASSERT(ill->ill_ipst); 1631 ilm->ilm_ipst = ill->ill_ipst; /* No netstack_hold */ 1632 1633 /* The ill/ipif could have just been marked as condemned */ 1634 1635 /* 1636 * To make ill_hasmembers_nextzone_v6 work we keep the list 1637 * sorted by zoneid. 1638 */ 1639 ilm_cur = ill->ill_ilm; 1640 ilm_ptpn = &ill->ill_ilm; 1641 while (ilm_cur != NULL && ilm_cur->ilm_zoneid < ilm->ilm_zoneid) { 1642 ilm_ptpn = &ilm_cur->ilm_next; 1643 ilm_cur = ilm_cur->ilm_next; 1644 } 1645 ilm->ilm_next = ilm_cur; 1646 *ilm_ptpn = ilm; 1647 1648 /* 1649 * If we have an associated ilg, use its filter state; if not, 1650 * default to (EXCLUDE, NULL) and set no_ilg_cnt to track this. 1651 */ 1652 if (ilgstat != ILGSTAT_NONE) { 1653 if (!SLIST_IS_EMPTY(ilg_flist)) 1654 l_copy(ilg_flist, ilm->ilm_filter); 1655 ilm->ilm_fmode = ilg_fmode; 1656 } else { 1657 ilm->ilm_no_ilg_cnt = 1; 1658 ilm->ilm_fmode = MODE_IS_EXCLUDE; 1659 } 1660 1661 return (ilm); 1662 } 1663 1664 void 1665 ilm_inactive(ilm_t *ilm) 1666 { 1667 FREE_SLIST(ilm->ilm_filter); 1668 FREE_SLIST(ilm->ilm_pendsrcs); 1669 FREE_SLIST(ilm->ilm_rtx.rtx_allow); 1670 FREE_SLIST(ilm->ilm_rtx.rtx_block); 1671 ilm->ilm_ipst = NULL; 1672 mi_free((char *)ilm); 1673 } 1674 1675 /* 1676 * Unlink ilm and free it. 1677 */ 1678 static void 1679 ilm_delete(ilm_t *ilm) 1680 { 1681 ill_t *ill = ilm->ilm_ill; 1682 ilm_t **ilmp; 1683 boolean_t need_wakeup; 1684 1685 /* 1686 * Delete under lock protection so that readers don't stumble 1687 * on bad ilm_next 1688 */ 1689 ASSERT(RW_WRITE_HELD(&ill->ill_mcast_lock)); 1690 1691 for (ilmp = &ill->ill_ilm; *ilmp != ilm; ilmp = &(*ilmp)->ilm_next) 1692 ; 1693 1694 *ilmp = ilm->ilm_next; 1695 1696 mutex_enter(&ill->ill_lock); 1697 /* 1698 * if we are the last reference to the ill, we may need to wakeup any 1699 * pending FREE or unplumb operations. This is because conn_update_ill 1700 * bails if there is a ilg_delete_all in progress. 1701 */ 1702 need_wakeup = B_FALSE; 1703 DTRACE_PROBE3(ill__decr__cnt, (ill_t *), ill, 1704 (char *), "ilm", (void *), ilm); 1705 ASSERT(ill->ill_ilm_cnt > 0); 1706 ill->ill_ilm_cnt--; 1707 if (ILL_FREE_OK(ill)) 1708 need_wakeup = B_TRUE; 1709 1710 ilm_inactive(ilm); /* frees this ilm */ 1711 1712 if (need_wakeup) { 1713 /* drops ill lock */ 1714 ipif_ill_refrele_tail(ill); 1715 } else { 1716 mutex_exit(&ill->ill_lock); 1717 } 1718 } 1719 1720 /* 1721 * Lookup an ill based on the group, ifindex, ifaddr, and zoneid. 1722 * Applies to both IPv4 and IPv6, although ifaddr is only used with 1723 * IPv4. 1724 * Returns an error for IS_UNDER_IPMP and VNI interfaces. 1725 * On error it sets *errorp. 1726 */ 1727 static ill_t * 1728 ill_mcast_lookup(const in6_addr_t *group, ipaddr_t ifaddr, uint_t ifindex, 1729 zoneid_t zoneid, ip_stack_t *ipst, int *errorp) 1730 { 1731 ill_t *ill; 1732 ipaddr_t v4group; 1733 1734 if (IN6_IS_ADDR_V4MAPPED(group)) { 1735 IN6_V4MAPPED_TO_IPADDR(group, v4group); 1736 1737 if (ifindex != 0) { 1738 ill = ill_lookup_on_ifindex_zoneid(ifindex, zoneid, 1739 B_FALSE, ipst); 1740 } else if (ifaddr != INADDR_ANY) { 1741 ipif_t *ipif; 1742 1743 ipif = ipif_lookup_addr(ifaddr, NULL, zoneid, ipst); 1744 if (ipif == NULL) { 1745 ill = NULL; 1746 } else { 1747 ill = ipif->ipif_ill; 1748 ill_refhold(ill); 1749 ipif_refrele(ipif); 1750 } 1751 } else { 1752 ill = ill_lookup_group_v4(v4group, zoneid, ipst, NULL, 1753 NULL); 1754 } 1755 } else { 1756 if (ifindex != 0) { 1757 ill = ill_lookup_on_ifindex_zoneid(ifindex, zoneid, 1758 B_TRUE, ipst); 1759 } else { 1760 ill = ill_lookup_group_v6(group, zoneid, ipst, NULL, 1761 NULL); 1762 } 1763 } 1764 if (ill == NULL) { 1765 if (ifindex != 0) 1766 *errorp = ENXIO; 1767 else 1768 *errorp = EADDRNOTAVAIL; 1769 return (NULL); 1770 } 1771 /* operation not supported on the virtual network interface */ 1772 if (IS_UNDER_IPMP(ill) || IS_VNI(ill)) { 1773 ill_refrele(ill); 1774 *errorp = EINVAL; 1775 return (NULL); 1776 } 1777 return (ill); 1778 } 1779 1780 /* 1781 * Looks up the appropriate ill given an interface index (or interface address) 1782 * and multicast group. On success, returns 0, with *illpp pointing to the 1783 * found struct. On failure, returns an errno and *illpp is set to NULL. 1784 * 1785 * Returns an error for IS_UNDER_IPMP and VNI interfaces. 1786 * 1787 * Handles both IPv4 and IPv6. The ifaddr argument only applies in the 1788 * case of IPv4. 1789 */ 1790 int 1791 ip_opt_check(conn_t *connp, const in6_addr_t *v6group, 1792 const in6_addr_t *v6src, ipaddr_t ifaddr, uint_t ifindex, ill_t **illpp) 1793 { 1794 boolean_t src_unspec; 1795 ill_t *ill = NULL; 1796 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1797 int error = 0; 1798 1799 *illpp = NULL; 1800 1801 src_unspec = IN6_IS_ADDR_UNSPECIFIED(v6src); 1802 1803 if (IN6_IS_ADDR_V4MAPPED(v6group)) { 1804 ipaddr_t v4group; 1805 ipaddr_t v4src; 1806 1807 if (!IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec) 1808 return (EINVAL); 1809 IN6_V4MAPPED_TO_IPADDR(v6group, v4group); 1810 if (src_unspec) { 1811 v4src = INADDR_ANY; 1812 } else { 1813 IN6_V4MAPPED_TO_IPADDR(v6src, v4src); 1814 } 1815 if (!CLASSD(v4group) || CLASSD(v4src)) 1816 return (EINVAL); 1817 } else { 1818 if (IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec) 1819 return (EINVAL); 1820 if (!IN6_IS_ADDR_MULTICAST(v6group) || 1821 IN6_IS_ADDR_MULTICAST(v6src)) { 1822 return (EINVAL); 1823 } 1824 } 1825 1826 ill = ill_mcast_lookup(v6group, ifaddr, ifindex, IPCL_ZONEID(connp), 1827 ipst, &error); 1828 *illpp = ill; 1829 return (error); 1830 } 1831 1832 static int 1833 ip_get_srcfilter(conn_t *connp, struct group_filter *gf, 1834 struct ip_msfilter *imsf, const struct in6_addr *group, boolean_t issin6) 1835 { 1836 ilg_t *ilg; 1837 int i, numsrc, fmode, outsrcs; 1838 struct sockaddr_in *sin; 1839 struct sockaddr_in6 *sin6; 1840 struct in_addr *addrp; 1841 slist_t *fp; 1842 boolean_t is_v4only_api; 1843 ipaddr_t ifaddr; 1844 uint_t ifindex; 1845 1846 if (gf == NULL) { 1847 ASSERT(imsf != NULL); 1848 ASSERT(!issin6); 1849 is_v4only_api = B_TRUE; 1850 outsrcs = imsf->imsf_numsrc; 1851 ifaddr = imsf->imsf_interface.s_addr; 1852 ifindex = 0; 1853 } else { 1854 ASSERT(imsf == NULL); 1855 is_v4only_api = B_FALSE; 1856 outsrcs = gf->gf_numsrc; 1857 ifaddr = INADDR_ANY; 1858 ifindex = gf->gf_interface; 1859 } 1860 1861 /* No need to use ill_mcast_serializer for the reader */ 1862 rw_enter(&connp->conn_ilg_lock, RW_READER); 1863 ilg = ilg_lookup(connp, group, ifaddr, ifindex); 1864 if (ilg == NULL) { 1865 rw_exit(&connp->conn_ilg_lock); 1866 return (EADDRNOTAVAIL); 1867 } 1868 1869 /* 1870 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 1871 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 1872 * So we need to translate here. 1873 */ 1874 fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ? 1875 MCAST_INCLUDE : MCAST_EXCLUDE; 1876 if ((fp = ilg->ilg_filter) == NULL) { 1877 numsrc = 0; 1878 } else { 1879 for (i = 0; i < outsrcs; i++) { 1880 if (i == fp->sl_numsrc) 1881 break; 1882 if (issin6) { 1883 sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i]; 1884 sin6->sin6_family = AF_INET6; 1885 sin6->sin6_addr = fp->sl_addr[i]; 1886 } else { 1887 if (is_v4only_api) { 1888 addrp = &imsf->imsf_slist[i]; 1889 } else { 1890 sin = (struct sockaddr_in *) 1891 &gf->gf_slist[i]; 1892 sin->sin_family = AF_INET; 1893 addrp = &sin->sin_addr; 1894 } 1895 IN6_V4MAPPED_TO_INADDR(&fp->sl_addr[i], addrp); 1896 } 1897 } 1898 numsrc = fp->sl_numsrc; 1899 } 1900 1901 if (is_v4only_api) { 1902 imsf->imsf_numsrc = numsrc; 1903 imsf->imsf_fmode = fmode; 1904 } else { 1905 gf->gf_numsrc = numsrc; 1906 gf->gf_fmode = fmode; 1907 } 1908 1909 rw_exit(&connp->conn_ilg_lock); 1910 1911 return (0); 1912 } 1913 1914 /* 1915 * Common for IPv4 and IPv6. 1916 */ 1917 static int 1918 ip_set_srcfilter(conn_t *connp, struct group_filter *gf, 1919 struct ip_msfilter *imsf, const struct in6_addr *group, ill_t *ill, 1920 boolean_t issin6) 1921 { 1922 ilg_t *ilg; 1923 int i, err, infmode, new_fmode; 1924 uint_t insrcs; 1925 struct sockaddr_in *sin; 1926 struct sockaddr_in6 *sin6; 1927 struct in_addr *addrp; 1928 slist_t *orig_filter = NULL; 1929 slist_t *new_filter = NULL; 1930 mcast_record_t orig_fmode; 1931 boolean_t leave_group, is_v4only_api; 1932 ilg_stat_t ilgstat; 1933 ilm_t *ilm; 1934 ipaddr_t ifaddr; 1935 uint_t ifindex; 1936 1937 if (gf == NULL) { 1938 ASSERT(imsf != NULL); 1939 ASSERT(!issin6); 1940 is_v4only_api = B_TRUE; 1941 insrcs = imsf->imsf_numsrc; 1942 infmode = imsf->imsf_fmode; 1943 ifaddr = imsf->imsf_interface.s_addr; 1944 ifindex = 0; 1945 } else { 1946 ASSERT(imsf == NULL); 1947 is_v4only_api = B_FALSE; 1948 insrcs = gf->gf_numsrc; 1949 infmode = gf->gf_fmode; 1950 ifaddr = INADDR_ANY; 1951 ifindex = gf->gf_interface; 1952 } 1953 1954 /* Make sure we can handle the source list */ 1955 if (insrcs > MAX_FILTER_SIZE) 1956 return (ENOBUFS); 1957 1958 /* 1959 * setting the filter to (INCLUDE, NULL) is treated 1960 * as a request to leave the group. 1961 */ 1962 leave_group = (infmode == MCAST_INCLUDE && insrcs == 0); 1963 1964 mutex_enter(&ill->ill_mcast_serializer); 1965 rw_enter(&connp->conn_ilg_lock, RW_WRITER); 1966 ilg = ilg_lookup(connp, group, ifaddr, ifindex); 1967 if (ilg == NULL) { 1968 /* 1969 * if the request was actually to leave, and we 1970 * didn't find an ilg, there's nothing to do. 1971 */ 1972 if (leave_group) { 1973 rw_exit(&connp->conn_ilg_lock); 1974 mutex_exit(&ill->ill_mcast_serializer); 1975 return (0); 1976 } 1977 ilg = conn_ilg_alloc(connp, &err); 1978 if (ilg == NULL) { 1979 rw_exit(&connp->conn_ilg_lock); 1980 mutex_exit(&ill->ill_mcast_serializer); 1981 return (err); 1982 } 1983 ilgstat = ILGSTAT_NEW; 1984 ilg->ilg_v6group = *group; 1985 ilg->ilg_ill = ill; 1986 ilg->ilg_ifaddr = ifaddr; 1987 ilg->ilg_ifindex = ifindex; 1988 } else if (leave_group) { 1989 /* 1990 * Make sure we have the correct serializer. The ill argument 1991 * might not match ilg_ill. 1992 */ 1993 ilg_refhold(ilg); 1994 mutex_exit(&ill->ill_mcast_serializer); 1995 ill = ilg->ilg_ill; 1996 rw_exit(&connp->conn_ilg_lock); 1997 1998 mutex_enter(&ill->ill_mcast_serializer); 1999 rw_enter(&connp->conn_ilg_lock, RW_WRITER); 2000 ilm = ilg->ilg_ilm; 2001 ilg->ilg_ilm = NULL; 2002 ilg_delete(connp, ilg, NULL); 2003 ilg_refrele(ilg); 2004 rw_exit(&connp->conn_ilg_lock); 2005 if (ilm != NULL) 2006 (void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE); 2007 mutex_exit(&ill->ill_mcast_serializer); 2008 /* 2009 * Now that all locks have been dropped, we can send any 2010 * deferred/queued DLPI or IP packets 2011 */ 2012 ill_mcast_send_queued(ill); 2013 ill_dlpi_send_queued(ill); 2014 return (0); 2015 } else { 2016 ilgstat = ILGSTAT_CHANGE; 2017 /* Preserve existing state in case ip_addmulti() fails */ 2018 orig_fmode = ilg->ilg_fmode; 2019 if (ilg->ilg_filter == NULL) { 2020 orig_filter = NULL; 2021 } else { 2022 orig_filter = l_alloc_copy(ilg->ilg_filter); 2023 if (orig_filter == NULL) { 2024 rw_exit(&connp->conn_ilg_lock); 2025 mutex_exit(&ill->ill_mcast_serializer); 2026 return (ENOMEM); 2027 } 2028 } 2029 } 2030 2031 /* 2032 * Alloc buffer to copy new state into (see below) before 2033 * we make any changes, so we can bail if it fails. 2034 */ 2035 if ((new_filter = l_alloc()) == NULL) { 2036 rw_exit(&connp->conn_ilg_lock); 2037 err = ENOMEM; 2038 goto free_and_exit; 2039 } 2040 2041 if (insrcs == 0) { 2042 CLEAR_SLIST(ilg->ilg_filter); 2043 } else { 2044 slist_t *fp; 2045 if (ilg->ilg_filter == NULL) { 2046 fp = l_alloc(); 2047 if (fp == NULL) { 2048 if (ilgstat == ILGSTAT_NEW) 2049 ilg_delete(connp, ilg, NULL); 2050 rw_exit(&connp->conn_ilg_lock); 2051 err = ENOMEM; 2052 goto free_and_exit; 2053 } 2054 } else { 2055 fp = ilg->ilg_filter; 2056 } 2057 for (i = 0; i < insrcs; i++) { 2058 if (issin6) { 2059 sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i]; 2060 fp->sl_addr[i] = sin6->sin6_addr; 2061 } else { 2062 if (is_v4only_api) { 2063 addrp = &imsf->imsf_slist[i]; 2064 } else { 2065 sin = (struct sockaddr_in *) 2066 &gf->gf_slist[i]; 2067 addrp = &sin->sin_addr; 2068 } 2069 IN6_INADDR_TO_V4MAPPED(addrp, &fp->sl_addr[i]); 2070 } 2071 } 2072 fp->sl_numsrc = insrcs; 2073 ilg->ilg_filter = fp; 2074 } 2075 /* 2076 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 2077 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 2078 * So we need to translate here. 2079 */ 2080 ilg->ilg_fmode = (infmode == MCAST_INCLUDE) ? 2081 MODE_IS_INCLUDE : MODE_IS_EXCLUDE; 2082 2083 /* 2084 * Save copy of ilg's filter state to pass to other functions, 2085 * so we can release conn_ilg_lock now. 2086 */ 2087 new_fmode = ilg->ilg_fmode; 2088 l_copy(ilg->ilg_filter, new_filter); 2089 2090 rw_exit(&connp->conn_ilg_lock); 2091 2092 /* 2093 * Now update the ill. We wait to do this until after the ilg 2094 * has been updated because we need to update the src filter 2095 * info for the ill, which involves looking at the status of 2096 * all the ilgs associated with this group/interface pair. 2097 */ 2098 ilm = ip_addmulti_serial(group, ill, connp->conn_zoneid, ilgstat, 2099 new_fmode, new_filter, &err); 2100 2101 rw_enter(&connp->conn_ilg_lock, RW_WRITER); 2102 /* 2103 * Must look up the ilg again since we've not been holding 2104 * conn_ilg_lock. The ilg could have disappeared due to an unplumb 2105 * having called conn_update_ill, which can run once we dropped the 2106 * conn_ilg_lock above. 2107 */ 2108 ilg = ilg_lookup(connp, group, ifaddr, ifindex); 2109 if (ilg == NULL) { 2110 rw_exit(&connp->conn_ilg_lock); 2111 if (ilm != NULL) { 2112 (void) ip_delmulti_serial(ilm, B_FALSE, 2113 (ilgstat == ILGSTAT_NEW)); 2114 } 2115 err = ENXIO; 2116 goto free_and_exit; 2117 } 2118 2119 if (ilm != NULL) { 2120 if (ilg->ilg_ill == NULL) { 2121 /* some other thread is re-attaching this. */ 2122 rw_exit(&connp->conn_ilg_lock); 2123 (void) ip_delmulti_serial(ilm, B_FALSE, 2124 (ilgstat == ILGSTAT_NEW)); 2125 err = 0; 2126 goto free_and_exit; 2127 } 2128 /* Succeeded. Update the ilg to point at the ilm */ 2129 if (ilgstat == ILGSTAT_NEW) { 2130 if (ilg->ilg_ilm == NULL) { 2131 ilg->ilg_ilm = ilm; 2132 ilm->ilm_ifaddr = ifaddr; /* For netstat */ 2133 } else { 2134 /* some other thread is re-attaching this. */ 2135 rw_exit(&connp->conn_ilg_lock); 2136 (void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE); 2137 err = 0; 2138 goto free_and_exit; 2139 } 2140 } else { 2141 /* 2142 * ip_addmulti didn't get a held ilm for 2143 * ILGSTAT_CHANGE; ilm_refcnt was unchanged. 2144 */ 2145 ASSERT(ilg->ilg_ilm == ilm); 2146 } 2147 } else { 2148 ASSERT(err != 0); 2149 /* 2150 * Failed to allocate the ilm. 2151 * Restore the original filter state, or delete the 2152 * newly-created ilg. 2153 * If ENETDOWN just clear ill_ilg since so that we 2154 * will rejoin when the ill comes back; don't report ENETDOWN 2155 * to application. 2156 */ 2157 if (ilgstat == ILGSTAT_NEW) { 2158 if (err == ENETDOWN) { 2159 ilg->ilg_ill = NULL; 2160 err = 0; 2161 } else { 2162 ilg_delete(connp, ilg, NULL); 2163 } 2164 } else { 2165 ilg->ilg_fmode = orig_fmode; 2166 if (SLIST_IS_EMPTY(orig_filter)) { 2167 CLEAR_SLIST(ilg->ilg_filter); 2168 } else { 2169 /* 2170 * We didn't free the filter, even if we 2171 * were trying to make the source list empty; 2172 * so if orig_filter isn't empty, the ilg 2173 * must still have a filter alloc'd. 2174 */ 2175 l_copy(orig_filter, ilg->ilg_filter); 2176 } 2177 } 2178 } 2179 rw_exit(&connp->conn_ilg_lock); 2180 2181 free_and_exit: 2182 mutex_exit(&ill->ill_mcast_serializer); 2183 ill_mcast_send_queued(ill); 2184 ill_dlpi_send_queued(ill); 2185 l_free(orig_filter); 2186 l_free(new_filter); 2187 2188 return (err); 2189 } 2190 2191 /* 2192 * Process the SIOC[GS]MSFILTER and SIOC[GS]IPMSFILTER ioctls. 2193 */ 2194 /* ARGSUSED */ 2195 int 2196 ip_sioctl_msfilter(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, 2197 ip_ioctl_cmd_t *ipip, void *ifreq) 2198 { 2199 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 2200 /* existence verified in ip_wput_nondata() */ 2201 mblk_t *data_mp = mp->b_cont->b_cont; 2202 int datalen, err, cmd, minsize; 2203 uint_t expsize = 0; 2204 conn_t *connp; 2205 boolean_t isv6, is_v4only_api, getcmd; 2206 struct sockaddr_in *gsin; 2207 struct sockaddr_in6 *gsin6; 2208 ipaddr_t v4group; 2209 in6_addr_t v6group; 2210 struct group_filter *gf = NULL; 2211 struct ip_msfilter *imsf = NULL; 2212 mblk_t *ndp; 2213 ill_t *ill; 2214 2215 connp = Q_TO_CONN(q); 2216 err = ip_msfilter_ill(connp, mp, ipip, &ill); 2217 if (err != 0) 2218 return (err); 2219 2220 if (data_mp->b_cont != NULL) { 2221 if ((ndp = msgpullup(data_mp, -1)) == NULL) 2222 return (ENOMEM); 2223 freemsg(data_mp); 2224 data_mp = ndp; 2225 mp->b_cont->b_cont = data_mp; 2226 } 2227 2228 cmd = iocp->ioc_cmd; 2229 getcmd = (cmd == SIOCGIPMSFILTER || cmd == SIOCGMSFILTER); 2230 is_v4only_api = (cmd == SIOCGIPMSFILTER || cmd == SIOCSIPMSFILTER); 2231 minsize = (is_v4only_api) ? IP_MSFILTER_SIZE(0) : GROUP_FILTER_SIZE(0); 2232 datalen = MBLKL(data_mp); 2233 2234 if (datalen < minsize) 2235 return (EINVAL); 2236 2237 /* 2238 * now we know we have at least have the initial structure, 2239 * but need to check for the source list array. 2240 */ 2241 if (is_v4only_api) { 2242 imsf = (struct ip_msfilter *)data_mp->b_rptr; 2243 isv6 = B_FALSE; 2244 expsize = IP_MSFILTER_SIZE(imsf->imsf_numsrc); 2245 } else { 2246 gf = (struct group_filter *)data_mp->b_rptr; 2247 if (gf->gf_group.ss_family == AF_INET6) { 2248 gsin6 = (struct sockaddr_in6 *)&gf->gf_group; 2249 isv6 = !(IN6_IS_ADDR_V4MAPPED(&gsin6->sin6_addr)); 2250 } else { 2251 isv6 = B_FALSE; 2252 } 2253 expsize = GROUP_FILTER_SIZE(gf->gf_numsrc); 2254 } 2255 if (datalen < expsize) 2256 return (EINVAL); 2257 2258 if (isv6) { 2259 gsin6 = (struct sockaddr_in6 *)&gf->gf_group; 2260 v6group = gsin6->sin6_addr; 2261 if (getcmd) { 2262 err = ip_get_srcfilter(connp, gf, NULL, &v6group, 2263 B_TRUE); 2264 } else { 2265 err = ip_set_srcfilter(connp, gf, NULL, &v6group, ill, 2266 B_TRUE); 2267 } 2268 } else { 2269 boolean_t issin6 = B_FALSE; 2270 if (is_v4only_api) { 2271 v4group = (ipaddr_t)imsf->imsf_multiaddr.s_addr; 2272 IN6_IPADDR_TO_V4MAPPED(v4group, &v6group); 2273 } else { 2274 if (gf->gf_group.ss_family == AF_INET) { 2275 gsin = (struct sockaddr_in *)&gf->gf_group; 2276 v4group = (ipaddr_t)gsin->sin_addr.s_addr; 2277 IN6_IPADDR_TO_V4MAPPED(v4group, &v6group); 2278 } else { 2279 gsin6 = (struct sockaddr_in6 *)&gf->gf_group; 2280 IN6_V4MAPPED_TO_IPADDR(&gsin6->sin6_addr, 2281 v4group); 2282 issin6 = B_TRUE; 2283 } 2284 } 2285 /* 2286 * INADDR_ANY is represented as the IPv6 unspecifed addr. 2287 */ 2288 if (v4group == INADDR_ANY) 2289 v6group = ipv6_all_zeros; 2290 else 2291 IN6_IPADDR_TO_V4MAPPED(v4group, &v6group); 2292 2293 if (getcmd) { 2294 err = ip_get_srcfilter(connp, gf, imsf, &v6group, 2295 issin6); 2296 } else { 2297 err = ip_set_srcfilter(connp, gf, imsf, &v6group, ill, 2298 issin6); 2299 } 2300 } 2301 ill_refrele(ill); 2302 2303 return (err); 2304 } 2305 2306 /* 2307 * Determine the ill for the SIOC*MSFILTER ioctls 2308 * 2309 * Returns an error for IS_UNDER_IPMP interfaces. 2310 * 2311 * Finds the ill based on information in the ioctl headers. 2312 */ 2313 static int 2314 ip_msfilter_ill(conn_t *connp, mblk_t *mp, const ip_ioctl_cmd_t *ipip, 2315 ill_t **illp) 2316 { 2317 int cmd = ipip->ipi_cmd; 2318 int err = 0; 2319 ill_t *ill; 2320 /* caller has verified this mblk exists */ 2321 char *dbuf = (char *)mp->b_cont->b_cont->b_rptr; 2322 struct ip_msfilter *imsf; 2323 struct group_filter *gf; 2324 ipaddr_t v4addr, v4group; 2325 in6_addr_t v6group; 2326 uint32_t index; 2327 ip_stack_t *ipst; 2328 2329 ipst = connp->conn_netstack->netstack_ip; 2330 2331 *illp = NULL; 2332 2333 /* don't allow multicast operations on a tcp conn */ 2334 if (IPCL_IS_TCP(connp)) 2335 return (ENOPROTOOPT); 2336 2337 if (cmd == SIOCSIPMSFILTER || cmd == SIOCGIPMSFILTER) { 2338 /* don't allow v4-specific ioctls on v6 socket */ 2339 if (connp->conn_family == AF_INET6) 2340 return (EAFNOSUPPORT); 2341 2342 imsf = (struct ip_msfilter *)dbuf; 2343 v4addr = imsf->imsf_interface.s_addr; 2344 v4group = imsf->imsf_multiaddr.s_addr; 2345 IN6_IPADDR_TO_V4MAPPED(v4group, &v6group); 2346 ill = ill_mcast_lookup(&v6group, v4addr, 0, IPCL_ZONEID(connp), 2347 ipst, &err); 2348 if (ill == NULL && v4addr != INADDR_ANY) 2349 err = ENXIO; 2350 } else { 2351 gf = (struct group_filter *)dbuf; 2352 index = gf->gf_interface; 2353 if (gf->gf_group.ss_family == AF_INET6) { 2354 struct sockaddr_in6 *sin6; 2355 2356 sin6 = (struct sockaddr_in6 *)&gf->gf_group; 2357 v6group = sin6->sin6_addr; 2358 } else if (gf->gf_group.ss_family == AF_INET) { 2359 struct sockaddr_in *sin; 2360 2361 sin = (struct sockaddr_in *)&gf->gf_group; 2362 v4group = sin->sin_addr.s_addr; 2363 IN6_IPADDR_TO_V4MAPPED(v4group, &v6group); 2364 } else { 2365 return (EAFNOSUPPORT); 2366 } 2367 ill = ill_mcast_lookup(&v6group, INADDR_ANY, index, 2368 IPCL_ZONEID(connp), ipst, &err); 2369 } 2370 *illp = ill; 2371 return (err); 2372 } 2373 2374 /* 2375 * The structures used for the SIOC*MSFILTER ioctls usually must be copied 2376 * in in two stages, as the first copyin tells us the size of the attached 2377 * source buffer. This function is called by ip_wput_nondata() after the 2378 * first copyin has completed; it figures out how big the second stage 2379 * needs to be, and kicks it off. 2380 * 2381 * In some cases (numsrc < 2), the second copyin is not needed as the 2382 * first one gets a complete structure containing 1 source addr. 2383 * 2384 * The function returns 0 if a second copyin has been started (i.e. there's 2385 * no more work to be done right now), or 1 if the second copyin is not 2386 * needed and ip_wput_nondata() can continue its processing. 2387 */ 2388 int 2389 ip_copyin_msfilter(queue_t *q, mblk_t *mp) 2390 { 2391 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 2392 int cmd = iocp->ioc_cmd; 2393 /* validity of this checked in ip_wput_nondata() */ 2394 mblk_t *mp1 = mp->b_cont->b_cont; 2395 int copysize = 0; 2396 int offset; 2397 2398 if (cmd == SIOCSMSFILTER || cmd == SIOCGMSFILTER) { 2399 struct group_filter *gf = (struct group_filter *)mp1->b_rptr; 2400 if (gf->gf_numsrc >= 2) { 2401 offset = sizeof (struct group_filter); 2402 copysize = GROUP_FILTER_SIZE(gf->gf_numsrc) - offset; 2403 } 2404 } else { 2405 struct ip_msfilter *imsf = (struct ip_msfilter *)mp1->b_rptr; 2406 if (imsf->imsf_numsrc >= 2) { 2407 offset = sizeof (struct ip_msfilter); 2408 copysize = IP_MSFILTER_SIZE(imsf->imsf_numsrc) - offset; 2409 } 2410 } 2411 if (copysize > 0) { 2412 mi_copyin_n(q, mp, offset, copysize); 2413 return (0); 2414 } 2415 return (1); 2416 } 2417 2418 /* 2419 * Handle the following optmgmt: 2420 * IP_ADD_MEMBERSHIP must not have joined already 2421 * IPV6_JOIN_GROUP must not have joined already 2422 * MCAST_JOIN_GROUP must not have joined already 2423 * IP_BLOCK_SOURCE must have joined already 2424 * MCAST_BLOCK_SOURCE must have joined already 2425 * IP_JOIN_SOURCE_GROUP may have joined already 2426 * MCAST_JOIN_SOURCE_GROUP may have joined already 2427 * 2428 * fmode and src parameters may be used to determine which option is 2429 * being set, as follows (IPV6_JOIN_GROUP and MCAST_JOIN_GROUP options 2430 * are functionally equivalent): 2431 * opt fmode v6src 2432 * IP_ADD_MEMBERSHIP MODE_IS_EXCLUDE unspecified 2433 * IPV6_JOIN_GROUP MODE_IS_EXCLUDE unspecified 2434 * MCAST_JOIN_GROUP MODE_IS_EXCLUDE unspecified 2435 * IP_BLOCK_SOURCE MODE_IS_EXCLUDE IPv4-mapped addr 2436 * MCAST_BLOCK_SOURCE MODE_IS_EXCLUDE v6 addr 2437 * IP_JOIN_SOURCE_GROUP MODE_IS_INCLUDE IPv4-mapped addr 2438 * MCAST_JOIN_SOURCE_GROUP MODE_IS_INCLUDE v6 addr 2439 * 2440 * Changing the filter mode is not allowed; if a matching ilg already 2441 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 2442 * 2443 * Verifies that there is a source address of appropriate scope for 2444 * the group; if not, EADDRNOTAVAIL is returned. 2445 * 2446 * The interface to be used may be identified by an IPv4 address or by an 2447 * interface index. 2448 * 2449 * Handles IPv4-mapped IPv6 multicast addresses by associating them 2450 * with the IPv4 address. Assumes that if v6group is v4-mapped, 2451 * v6src is also v4-mapped. 2452 */ 2453 int 2454 ip_opt_add_group(conn_t *connp, boolean_t checkonly, 2455 const in6_addr_t *v6group, ipaddr_t ifaddr, uint_t ifindex, 2456 mcast_record_t fmode, const in6_addr_t *v6src) 2457 { 2458 ill_t *ill; 2459 char buf[INET6_ADDRSTRLEN]; 2460 int err; 2461 2462 err = ip_opt_check(connp, v6group, v6src, ifaddr, ifindex, &ill); 2463 if (err != 0) { 2464 ip1dbg(("ip_opt_add_group: no ill for group %s/" 2465 "index %d\n", inet_ntop(AF_INET6, v6group, buf, 2466 sizeof (buf)), ifindex)); 2467 return (err); 2468 } 2469 2470 if (checkonly) { 2471 /* 2472 * do not do operation, just pretend to - new T_CHECK 2473 * semantics. The error return case above if encountered 2474 * considered a good enough "check" here. 2475 */ 2476 ill_refrele(ill); 2477 return (0); 2478 } 2479 mutex_enter(&ill->ill_mcast_serializer); 2480 /* 2481 * Multicast groups may not be joined on interfaces that are either 2482 * already underlying interfaces in an IPMP group, or in the process 2483 * of joining the IPMP group. The latter condition is enforced by 2484 * checking the value of ill->ill_grp_pending under the 2485 * ill_mcast_serializer lock. We cannot serialize the 2486 * ill_grp_pending check on the ill_g_lock across ilg_add() because 2487 * ill_mcast_send_queued -> ip_output_simple -> ill_lookup_on_ifindex 2488 * will take the ill_g_lock itself. Instead, we hold the 2489 * ill_mcast_serializer. 2490 */ 2491 if (ill->ill_grp_pending || IS_UNDER_IPMP(ill)) { 2492 DTRACE_PROBE2(group__add__on__under, ill_t *, ill, 2493 in6_addr_t *, v6group); 2494 mutex_exit(&ill->ill_mcast_serializer); 2495 ill_refrele(ill); 2496 return (EADDRNOTAVAIL); 2497 } 2498 err = ilg_add(connp, v6group, ifaddr, ifindex, ill, fmode, v6src); 2499 mutex_exit(&ill->ill_mcast_serializer); 2500 /* 2501 * We have done an addmulti_impl and/or delmulti_impl. 2502 * All locks have been dropped, we can send any 2503 * deferred/queued DLPI or IP packets 2504 */ 2505 ill_mcast_send_queued(ill); 2506 ill_dlpi_send_queued(ill); 2507 ill_refrele(ill); 2508 return (err); 2509 } 2510 2511 /* 2512 * Common for IPv6 and IPv4. 2513 * Here we handle ilgs that are still attached to their original ill 2514 * (the one ifaddr/ifindex points at), as well as detached ones. 2515 * The detached ones might have been attached to some other ill. 2516 */ 2517 static int 2518 ip_opt_delete_group_excl(conn_t *connp, const in6_addr_t *v6group, 2519 ipaddr_t ifaddr, uint_t ifindex, mcast_record_t fmode, 2520 const in6_addr_t *v6src) 2521 { 2522 ilg_t *ilg; 2523 boolean_t leaving; 2524 ilm_t *ilm; 2525 ill_t *ill; 2526 int err = 0; 2527 2528 retry: 2529 rw_enter(&connp->conn_ilg_lock, RW_WRITER); 2530 ilg = ilg_lookup(connp, v6group, ifaddr, ifindex); 2531 if (ilg == NULL) { 2532 rw_exit(&connp->conn_ilg_lock); 2533 /* 2534 * Since we didn't have any ilg we now do the error checks 2535 * to determine the best errno. 2536 */ 2537 err = ip_opt_check(connp, v6group, v6src, ifaddr, ifindex, 2538 &ill); 2539 if (ill != NULL) { 2540 /* The only error was a missing ilg for the group */ 2541 ill_refrele(ill); 2542 err = EADDRNOTAVAIL; 2543 } 2544 return (err); 2545 } 2546 2547 /* If the ilg is attached then we serialize using that ill */ 2548 ill = ilg->ilg_ill; 2549 if (ill != NULL) { 2550 /* Prevent the ill and ilg from being freed */ 2551 ill_refhold(ill); 2552 ilg_refhold(ilg); 2553 rw_exit(&connp->conn_ilg_lock); 2554 mutex_enter(&ill->ill_mcast_serializer); 2555 rw_enter(&connp->conn_ilg_lock, RW_WRITER); 2556 if (ilg->ilg_condemned) { 2557 /* Disappeared */ 2558 ilg_refrele(ilg); 2559 rw_exit(&connp->conn_ilg_lock); 2560 mutex_exit(&ill->ill_mcast_serializer); 2561 ill_refrele(ill); 2562 goto retry; 2563 } 2564 } 2565 2566 /* 2567 * Decide if we're actually deleting the ilg or just removing a 2568 * source filter address; if just removing an addr, make sure we 2569 * aren't trying to change the filter mode, and that the addr is 2570 * actually in our filter list already. If we're removing the 2571 * last src in an include list, just delete the ilg. 2572 */ 2573 if (IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2574 leaving = B_TRUE; 2575 } else { 2576 if (fmode != ilg->ilg_fmode) 2577 err = EINVAL; 2578 else if (ilg->ilg_filter == NULL || 2579 !list_has_addr(ilg->ilg_filter, v6src)) 2580 err = EADDRNOTAVAIL; 2581 if (err != 0) { 2582 if (ill != NULL) 2583 ilg_refrele(ilg); 2584 rw_exit(&connp->conn_ilg_lock); 2585 goto done; 2586 } 2587 if (fmode == MODE_IS_INCLUDE && 2588 ilg->ilg_filter->sl_numsrc == 1) { 2589 leaving = B_TRUE; 2590 v6src = NULL; 2591 } else { 2592 leaving = B_FALSE; 2593 } 2594 } 2595 ilm = ilg->ilg_ilm; 2596 if (leaving) 2597 ilg->ilg_ilm = NULL; 2598 2599 ilg_delete(connp, ilg, v6src); 2600 if (ill != NULL) 2601 ilg_refrele(ilg); 2602 rw_exit(&connp->conn_ilg_lock); 2603 2604 if (ilm != NULL) { 2605 ASSERT(ill != NULL); 2606 (void) ip_delmulti_serial(ilm, B_FALSE, leaving); 2607 } 2608 done: 2609 if (ill != NULL) { 2610 mutex_exit(&ill->ill_mcast_serializer); 2611 /* 2612 * Now that all locks have been dropped, we can 2613 * send any deferred/queued DLPI or IP packets 2614 */ 2615 ill_mcast_send_queued(ill); 2616 ill_dlpi_send_queued(ill); 2617 ill_refrele(ill); 2618 } 2619 return (err); 2620 } 2621 2622 /* 2623 * Handle the following optmgmt: 2624 * IP_DROP_MEMBERSHIP will leave 2625 * IPV6_LEAVE_GROUP will leave 2626 * MCAST_LEAVE_GROUP will leave 2627 * IP_UNBLOCK_SOURCE will not leave 2628 * MCAST_UNBLOCK_SOURCE will not leave 2629 * IP_LEAVE_SOURCE_GROUP may leave (if leaving last source) 2630 * MCAST_LEAVE_SOURCE_GROUP may leave (if leaving last source) 2631 * 2632 * fmode and src parameters may be used to determine which option is 2633 * being set, as follows: 2634 * opt fmode v6src 2635 * IP_DROP_MEMBERSHIP MODE_IS_INCLUDE unspecified 2636 * IPV6_LEAVE_GROUP MODE_IS_INCLUDE unspecified 2637 * MCAST_LEAVE_GROUP MODE_IS_INCLUDE unspecified 2638 * IP_UNBLOCK_SOURCE MODE_IS_EXCLUDE IPv4-mapped addr 2639 * MCAST_UNBLOCK_SOURCE MODE_IS_EXCLUDE v6 addr 2640 * IP_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE IPv4-mapped addr 2641 * MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE v6 addr 2642 * 2643 * Changing the filter mode is not allowed; if a matching ilg already 2644 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 2645 * 2646 * The interface to be used may be identified by an IPv4 address or by an 2647 * interface index. 2648 * 2649 * Handles IPv4-mapped IPv6 multicast addresses by associating them 2650 * with the IPv4 address. Assumes that if v6group is v4-mapped, 2651 * v6src is also v4-mapped. 2652 */ 2653 int 2654 ip_opt_delete_group(conn_t *connp, boolean_t checkonly, 2655 const in6_addr_t *v6group, ipaddr_t ifaddr, uint_t ifindex, 2656 mcast_record_t fmode, const in6_addr_t *v6src) 2657 { 2658 2659 /* 2660 * In the normal case below we don't check for the ill existing. 2661 * Instead we look for an existing ilg in _excl. 2662 * If checkonly we sanity check the arguments 2663 */ 2664 if (checkonly) { 2665 ill_t *ill; 2666 int err; 2667 2668 err = ip_opt_check(connp, v6group, v6src, ifaddr, ifindex, 2669 &ill); 2670 /* 2671 * do not do operation, just pretend to - new T_CHECK semantics. 2672 * ip_opt_check is considered a good enough "check" here. 2673 */ 2674 if (ill != NULL) 2675 ill_refrele(ill); 2676 return (err); 2677 } 2678 return (ip_opt_delete_group_excl(connp, v6group, ifaddr, ifindex, 2679 fmode, v6src)); 2680 } 2681 2682 /* 2683 * Group mgmt for upper conn that passes things down 2684 * to the interface multicast list (and DLPI) 2685 * These routines can handle new style options that specify an interface name 2686 * as opposed to an interface address (needed for general handling of 2687 * unnumbered interfaces.) 2688 */ 2689 2690 /* 2691 * Add a group to an upper conn group data structure and pass things down 2692 * to the interface multicast list (and DLPI) 2693 * Common for IPv4 and IPv6; for IPv4 we can have an ifaddr. 2694 */ 2695 static int 2696 ilg_add(conn_t *connp, const in6_addr_t *v6group, ipaddr_t ifaddr, 2697 uint_t ifindex, ill_t *ill, mcast_record_t fmode, const in6_addr_t *v6src) 2698 { 2699 int error = 0; 2700 ilg_t *ilg; 2701 ilg_stat_t ilgstat; 2702 slist_t *new_filter = NULL; 2703 int new_fmode; 2704 ilm_t *ilm; 2705 2706 if (!(ill->ill_flags & ILLF_MULTICAST)) 2707 return (EADDRNOTAVAIL); 2708 2709 /* conn_ilg_lock protects the ilg list. */ 2710 ASSERT(MUTEX_HELD(&ill->ill_mcast_serializer)); 2711 rw_enter(&connp->conn_ilg_lock, RW_WRITER); 2712 ilg = ilg_lookup(connp, v6group, ifaddr, ifindex); 2713 2714 /* 2715 * Depending on the option we're handling, may or may not be okay 2716 * if group has already been added. Figure out our rules based 2717 * on fmode and src params. Also make sure there's enough room 2718 * in the filter if we're adding a source to an existing filter. 2719 */ 2720 if (IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2721 /* we're joining for all sources, must not have joined */ 2722 if (ilg != NULL) 2723 error = EADDRINUSE; 2724 } else { 2725 if (fmode == MODE_IS_EXCLUDE) { 2726 /* (excl {addr}) => block source, must have joined */ 2727 if (ilg == NULL) 2728 error = EADDRNOTAVAIL; 2729 } 2730 /* (incl {addr}) => join source, may have joined */ 2731 2732 if (ilg != NULL && 2733 SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE) 2734 error = ENOBUFS; 2735 } 2736 if (error != 0) { 2737 rw_exit(&connp->conn_ilg_lock); 2738 return (error); 2739 } 2740 2741 /* 2742 * Alloc buffer to copy new state into (see below) before 2743 * we make any changes, so we can bail if it fails. 2744 */ 2745 if ((new_filter = l_alloc()) == NULL) { 2746 rw_exit(&connp->conn_ilg_lock); 2747 return (ENOMEM); 2748 } 2749 2750 if (ilg == NULL) { 2751 if ((ilg = conn_ilg_alloc(connp, &error)) == NULL) { 2752 rw_exit(&connp->conn_ilg_lock); 2753 l_free(new_filter); 2754 return (error); 2755 } 2756 ilg->ilg_ifindex = ifindex; 2757 ilg->ilg_ifaddr = ifaddr; 2758 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2759 ilg->ilg_filter = l_alloc(); 2760 if (ilg->ilg_filter == NULL) { 2761 ilg_delete(connp, ilg, NULL); 2762 rw_exit(&connp->conn_ilg_lock); 2763 l_free(new_filter); 2764 return (ENOMEM); 2765 } 2766 ilg->ilg_filter->sl_numsrc = 1; 2767 ilg->ilg_filter->sl_addr[0] = *v6src; 2768 } 2769 ilgstat = ILGSTAT_NEW; 2770 ilg->ilg_v6group = *v6group; 2771 ilg->ilg_fmode = fmode; 2772 ilg->ilg_ill = ill; 2773 } else { 2774 int index; 2775 2776 if (ilg->ilg_fmode != fmode || IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2777 rw_exit(&connp->conn_ilg_lock); 2778 l_free(new_filter); 2779 return (EINVAL); 2780 } 2781 if (ilg->ilg_filter == NULL) { 2782 ilg->ilg_filter = l_alloc(); 2783 if (ilg->ilg_filter == NULL) { 2784 rw_exit(&connp->conn_ilg_lock); 2785 l_free(new_filter); 2786 return (ENOMEM); 2787 } 2788 } 2789 if (list_has_addr(ilg->ilg_filter, v6src)) { 2790 rw_exit(&connp->conn_ilg_lock); 2791 l_free(new_filter); 2792 return (EADDRNOTAVAIL); 2793 } 2794 ilgstat = ILGSTAT_CHANGE; 2795 index = ilg->ilg_filter->sl_numsrc++; 2796 ilg->ilg_filter->sl_addr[index] = *v6src; 2797 } 2798 2799 /* 2800 * Save copy of ilg's filter state to pass to other functions, 2801 * so we can release conn_ilg_lock now. 2802 */ 2803 new_fmode = ilg->ilg_fmode; 2804 l_copy(ilg->ilg_filter, new_filter); 2805 2806 rw_exit(&connp->conn_ilg_lock); 2807 2808 /* 2809 * Now update the ill. We wait to do this until after the ilg 2810 * has been updated because we need to update the src filter 2811 * info for the ill, which involves looking at the status of 2812 * all the ilgs associated with this group/interface pair. 2813 */ 2814 ilm = ip_addmulti_serial(v6group, ill, connp->conn_zoneid, ilgstat, 2815 new_fmode, new_filter, &error); 2816 2817 rw_enter(&connp->conn_ilg_lock, RW_WRITER); 2818 /* 2819 * Must look up the ilg again since we've not been holding 2820 * conn_ilg_lock. The ilg could have disappeared due to an unplumb 2821 * having called conn_update_ill, which can run once we dropped the 2822 * conn_ilg_lock above. 2823 */ 2824 ilg = ilg_lookup(connp, v6group, ifaddr, ifindex); 2825 if (ilg == NULL) { 2826 rw_exit(&connp->conn_ilg_lock); 2827 if (ilm != NULL) { 2828 (void) ip_delmulti_serial(ilm, B_FALSE, 2829 (ilgstat == ILGSTAT_NEW)); 2830 } 2831 error = ENXIO; 2832 goto free_and_exit; 2833 } 2834 if (ilm != NULL) { 2835 if (ilg->ilg_ill == NULL) { 2836 /* some other thread is re-attaching this. */ 2837 rw_exit(&connp->conn_ilg_lock); 2838 (void) ip_delmulti_serial(ilm, B_FALSE, 2839 (ilgstat == ILGSTAT_NEW)); 2840 error = 0; 2841 goto free_and_exit; 2842 } 2843 /* Succeeded. Update the ilg to point at the ilm */ 2844 if (ilgstat == ILGSTAT_NEW) { 2845 if (ilg->ilg_ilm == NULL) { 2846 ilg->ilg_ilm = ilm; 2847 ilm->ilm_ifaddr = ifaddr; /* For netstat */ 2848 } else { 2849 /* some other thread is re-attaching this. */ 2850 rw_exit(&connp->conn_ilg_lock); 2851 (void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE); 2852 error = 0; 2853 goto free_and_exit; 2854 } 2855 } else { 2856 /* 2857 * ip_addmulti didn't get a held ilm for 2858 * ILGSTAT_CHANGE; ilm_refcnt was unchanged. 2859 */ 2860 ASSERT(ilg->ilg_ilm == ilm); 2861 } 2862 } else { 2863 ASSERT(error != 0); 2864 /* 2865 * Failed to allocate the ilm. 2866 * Need to undo what we did before calling ip_addmulti() 2867 * If ENETDOWN just clear ill_ilg since so that we 2868 * will rejoin when the ill comes back; don't report ENETDOWN 2869 * to application. 2870 */ 2871 if (ilgstat == ILGSTAT_NEW && error == ENETDOWN) { 2872 ilg->ilg_ill = NULL; 2873 error = 0; 2874 } else { 2875 in6_addr_t delsrc = 2876 (ilgstat == ILGSTAT_NEW) ? ipv6_all_zeros : *v6src; 2877 2878 ilg_delete(connp, ilg, &delsrc); 2879 } 2880 } 2881 rw_exit(&connp->conn_ilg_lock); 2882 2883 free_and_exit: 2884 l_free(new_filter); 2885 return (error); 2886 } 2887 2888 /* 2889 * Find an IPv4 ilg matching group, ill and source. 2890 * The group and source can't be INADDR_ANY here so no need to translate to 2891 * the unspecified IPv6 address. 2892 */ 2893 boolean_t 2894 conn_hasmembers_ill_withsrc_v4(conn_t *connp, ipaddr_t group, ipaddr_t src, 2895 ill_t *ill) 2896 { 2897 in6_addr_t v6group, v6src; 2898 int i; 2899 boolean_t isinlist; 2900 ilg_t *ilg; 2901 2902 rw_enter(&connp->conn_ilg_lock, RW_READER); 2903 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 2904 for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) { 2905 if (ilg->ilg_condemned) 2906 continue; 2907 2908 /* ilg_ill could be NULL if an add is in progress */ 2909 if (ilg->ilg_ill != ill) 2910 continue; 2911 2912 /* The callers use upper ill for IPMP */ 2913 ASSERT(!IS_UNDER_IPMP(ill)); 2914 if (IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, &v6group)) { 2915 if (SLIST_IS_EMPTY(ilg->ilg_filter)) { 2916 /* no source filter, so this is a match */ 2917 rw_exit(&connp->conn_ilg_lock); 2918 return (B_TRUE); 2919 } 2920 break; 2921 } 2922 } 2923 if (ilg == NULL) { 2924 rw_exit(&connp->conn_ilg_lock); 2925 return (B_FALSE); 2926 } 2927 2928 /* 2929 * we have an ilg with matching ill and group; but 2930 * the ilg has a source list that we must check. 2931 */ 2932 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 2933 isinlist = B_FALSE; 2934 for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) { 2935 if (IN6_ARE_ADDR_EQUAL(&v6src, &ilg->ilg_filter->sl_addr[i])) { 2936 isinlist = B_TRUE; 2937 break; 2938 } 2939 } 2940 2941 if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) || 2942 (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE)) { 2943 rw_exit(&connp->conn_ilg_lock); 2944 return (B_TRUE); 2945 } 2946 rw_exit(&connp->conn_ilg_lock); 2947 return (B_FALSE); 2948 } 2949 2950 /* 2951 * Find an IPv6 ilg matching group, ill, and source 2952 */ 2953 boolean_t 2954 conn_hasmembers_ill_withsrc_v6(conn_t *connp, const in6_addr_t *v6group, 2955 const in6_addr_t *v6src, ill_t *ill) 2956 { 2957 int i; 2958 boolean_t isinlist; 2959 ilg_t *ilg; 2960 2961 rw_enter(&connp->conn_ilg_lock, RW_READER); 2962 for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) { 2963 if (ilg->ilg_condemned) 2964 continue; 2965 2966 /* ilg_ill could be NULL if an add is in progress */ 2967 if (ilg->ilg_ill != ill) 2968 continue; 2969 2970 /* The callers use upper ill for IPMP */ 2971 ASSERT(!IS_UNDER_IPMP(ill)); 2972 if (IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) { 2973 if (SLIST_IS_EMPTY(ilg->ilg_filter)) { 2974 /* no source filter, so this is a match */ 2975 rw_exit(&connp->conn_ilg_lock); 2976 return (B_TRUE); 2977 } 2978 break; 2979 } 2980 } 2981 if (ilg == NULL) { 2982 rw_exit(&connp->conn_ilg_lock); 2983 return (B_FALSE); 2984 } 2985 2986 /* 2987 * we have an ilg with matching ill and group; but 2988 * the ilg has a source list that we must check. 2989 */ 2990 isinlist = B_FALSE; 2991 for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) { 2992 if (IN6_ARE_ADDR_EQUAL(v6src, &ilg->ilg_filter->sl_addr[i])) { 2993 isinlist = B_TRUE; 2994 break; 2995 } 2996 } 2997 2998 if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) || 2999 (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE)) { 3000 rw_exit(&connp->conn_ilg_lock); 3001 return (B_TRUE); 3002 } 3003 rw_exit(&connp->conn_ilg_lock); 3004 return (B_FALSE); 3005 } 3006 3007 /* 3008 * Find an ilg matching group and ifaddr/ifindex. 3009 * We check both ifaddr and ifindex even though at most one of them 3010 * will be non-zero; that way we always find the right one. 3011 */ 3012 static ilg_t * 3013 ilg_lookup(conn_t *connp, const in6_addr_t *v6group, ipaddr_t ifaddr, 3014 uint_t ifindex) 3015 { 3016 ilg_t *ilg; 3017 3018 ASSERT(RW_LOCK_HELD(&connp->conn_ilg_lock)); 3019 3020 for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) { 3021 if (ilg->ilg_condemned) 3022 continue; 3023 3024 if (ilg->ilg_ifaddr == ifaddr && 3025 ilg->ilg_ifindex == ifindex && 3026 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) 3027 return (ilg); 3028 } 3029 return (NULL); 3030 } 3031 3032 /* 3033 * If a source address is passed in (src != NULL and src is not 3034 * unspecified), remove the specified src addr from the given ilg's 3035 * filter list, else delete the ilg. 3036 */ 3037 static void 3038 ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src) 3039 { 3040 ASSERT(RW_WRITE_HELD(&connp->conn_ilg_lock)); 3041 ASSERT(ilg->ilg_ptpn != NULL); 3042 ASSERT(!ilg->ilg_condemned); 3043 3044 if (src == NULL || IN6_IS_ADDR_UNSPECIFIED(src)) { 3045 FREE_SLIST(ilg->ilg_filter); 3046 ilg->ilg_filter = NULL; 3047 3048 ASSERT(ilg->ilg_ilm == NULL); 3049 ilg->ilg_ill = NULL; 3050 ilg->ilg_condemned = B_TRUE; 3051 3052 /* ilg_inactive will unlink from the list */ 3053 ilg_refrele(ilg); 3054 } else { 3055 l_remove(ilg->ilg_filter, src); 3056 } 3057 } 3058 3059 /* 3060 * Called from conn close. No new ilg can be added or removed 3061 * because CONN_CLOSING has been set by ip_close. ilg_add / ilg_delete 3062 * will return error if conn has started closing. 3063 * 3064 * We handle locking as follows. 3065 * Under conn_ilg_lock we get the first ilg. As we drop the conn_ilg_lock to 3066 * proceed with the ilm part of the delete we hold a reference on both the ill 3067 * and the ilg. This doesn't prevent changes to the ilg, but prevents it from 3068 * being deleted. 3069 * 3070 * Since the ilg_add code path uses two locks (conn_ilg_lock for the ilg part, 3071 * and ill_mcast_lock for the ip_addmulti part) we can run at a point between 3072 * the two. At that point ilg_ill is set, but ilg_ilm hasn't yet been set. In 3073 * that case we delete the ilg here, which makes ilg_add discover that the ilg 3074 * has disappeared when ip_addmulti returns, so it will discard the ilm it just 3075 * added. 3076 */ 3077 void 3078 ilg_delete_all(conn_t *connp) 3079 { 3080 ilg_t *ilg, *next_ilg, *held_ilg; 3081 ilm_t *ilm; 3082 ill_t *ill; 3083 boolean_t need_refrele; 3084 3085 /* 3086 * Can not run if there is a conn_update_ill already running. 3087 * Wait for it to complete. Caller should have already set CONN_CLOSING 3088 * which prevents any new threads to run in conn_update_ill. 3089 */ 3090 mutex_enter(&connp->conn_lock); 3091 ASSERT(connp->conn_state_flags & CONN_CLOSING); 3092 while (connp->conn_state_flags & CONN_UPDATE_ILL) 3093 cv_wait(&connp->conn_cv, &connp->conn_lock); 3094 mutex_exit(&connp->conn_lock); 3095 3096 rw_enter(&connp->conn_ilg_lock, RW_WRITER); 3097 ilg = connp->conn_ilg; 3098 held_ilg = NULL; 3099 while (ilg != NULL) { 3100 if (ilg->ilg_condemned) { 3101 ilg = ilg->ilg_next; 3102 continue; 3103 } 3104 /* If the ilg is detached then no need to serialize */ 3105 if (ilg->ilg_ilm == NULL) { 3106 next_ilg = ilg->ilg_next; 3107 ilg_delete(connp, ilg, NULL); 3108 ilg = next_ilg; 3109 continue; 3110 } 3111 ill = ilg->ilg_ilm->ilm_ill; 3112 3113 /* 3114 * In order to serialize on the ill we try to enter 3115 * and if that fails we unlock and relock and then 3116 * check that we still have an ilm. 3117 */ 3118 need_refrele = B_FALSE; 3119 if (!mutex_tryenter(&ill->ill_mcast_serializer)) { 3120 ill_refhold(ill); 3121 need_refrele = B_TRUE; 3122 ilg_refhold(ilg); 3123 if (held_ilg != NULL) 3124 ilg_refrele(held_ilg); 3125 held_ilg = ilg; 3126 rw_exit(&connp->conn_ilg_lock); 3127 mutex_enter(&ill->ill_mcast_serializer); 3128 rw_enter(&connp->conn_ilg_lock, RW_WRITER); 3129 if (ilg->ilg_condemned) { 3130 ilg = ilg->ilg_next; 3131 goto next; 3132 } 3133 } 3134 ilm = ilg->ilg_ilm; 3135 ilg->ilg_ilm = NULL; 3136 next_ilg = ilg->ilg_next; 3137 ilg_delete(connp, ilg, NULL); 3138 ilg = next_ilg; 3139 rw_exit(&connp->conn_ilg_lock); 3140 3141 if (ilm != NULL) 3142 (void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE); 3143 3144 next: 3145 mutex_exit(&ill->ill_mcast_serializer); 3146 /* 3147 * Now that all locks have been dropped, we can send any 3148 * deferred/queued DLPI or IP packets 3149 */ 3150 ill_mcast_send_queued(ill); 3151 ill_dlpi_send_queued(ill); 3152 if (need_refrele) { 3153 /* Drop ill reference while we hold no locks */ 3154 ill_refrele(ill); 3155 } 3156 rw_enter(&connp->conn_ilg_lock, RW_WRITER); 3157 } 3158 if (held_ilg != NULL) 3159 ilg_refrele(held_ilg); 3160 rw_exit(&connp->conn_ilg_lock); 3161 } 3162 3163 /* 3164 * Attach the ilg to an ilm on the ill. If it fails we leave ilg_ill as NULL so 3165 * that a subsequent attempt can attach it. Drops and reacquires conn_ilg_lock. 3166 */ 3167 static void 3168 ilg_attach(conn_t *connp, ilg_t *ilg, ill_t *ill) 3169 { 3170 ilg_stat_t ilgstat; 3171 slist_t *new_filter; 3172 int new_fmode; 3173 in6_addr_t v6group; 3174 ipaddr_t ifaddr; 3175 uint_t ifindex; 3176 ilm_t *ilm; 3177 int error = 0; 3178 3179 ASSERT(RW_WRITE_HELD(&connp->conn_ilg_lock)); 3180 /* 3181 * Alloc buffer to copy new state into (see below) before 3182 * we make any changes, so we can bail if it fails. 3183 */ 3184 if ((new_filter = l_alloc()) == NULL) 3185 return; 3186 3187 /* 3188 * Save copy of ilg's filter state to pass to other functions, so 3189 * we can release conn_ilg_lock now. 3190 * Set ilg_ill so that an unplumb can find us. 3191 */ 3192 new_fmode = ilg->ilg_fmode; 3193 l_copy(ilg->ilg_filter, new_filter); 3194 v6group = ilg->ilg_v6group; 3195 ifaddr = ilg->ilg_ifaddr; 3196 ifindex = ilg->ilg_ifindex; 3197 ilgstat = ILGSTAT_NEW; 3198 3199 ilg->ilg_ill = ill; 3200 ASSERT(ilg->ilg_ilm == NULL); 3201 rw_exit(&connp->conn_ilg_lock); 3202 3203 ilm = ip_addmulti_serial(&v6group, ill, connp->conn_zoneid, ilgstat, 3204 new_fmode, new_filter, &error); 3205 l_free(new_filter); 3206 3207 rw_enter(&connp->conn_ilg_lock, RW_WRITER); 3208 /* 3209 * Must look up the ilg again since we've not been holding 3210 * conn_ilg_lock. The ilg could have disappeared due to an unplumb 3211 * having called conn_update_ill, which can run once we dropped the 3212 * conn_ilg_lock above. Alternatively, the ilg could have been attached 3213 * when the lock was dropped 3214 */ 3215 ilg = ilg_lookup(connp, &v6group, ifaddr, ifindex); 3216 if (ilg == NULL || ilg->ilg_ilm != NULL) { 3217 if (ilm != NULL) { 3218 rw_exit(&connp->conn_ilg_lock); 3219 (void) ip_delmulti_serial(ilm, B_FALSE, 3220 (ilgstat == ILGSTAT_NEW)); 3221 rw_enter(&connp->conn_ilg_lock, RW_WRITER); 3222 } 3223 return; 3224 } 3225 if (ilm == NULL) { 3226 ilg->ilg_ill = NULL; 3227 return; 3228 } 3229 ilg->ilg_ilm = ilm; 3230 ilm->ilm_ifaddr = ifaddr; /* For netstat */ 3231 } 3232 3233 /* 3234 * Called when an ill is unplumbed to make sure that there are no 3235 * dangling conn references to that ill. In that case ill is non-NULL and 3236 * we make sure we remove all references to it. 3237 * Also called when we should revisit the ilg_ill used for multicast 3238 * memberships, in which case ill is NULL. 3239 * 3240 * conn is held by caller. 3241 * 3242 * Note that ipcl_walk only walks conns that are not yet condemned. 3243 * condemned conns can't be refheld. For this reason, conn must become clean 3244 * first, i.e. it must not refer to any ill/ire and then only set 3245 * condemned flag. 3246 * 3247 * We leave ixa_multicast_ifindex in place. We prefer dropping 3248 * packets instead of sending them out the wrong interface. 3249 * 3250 * We keep the ilg around in a detached state (with ilg_ill and ilg_ilm being 3251 * NULL) so that the application can leave it later. Also, if ilg_ifaddr and 3252 * ilg_ifindex are zero, indicating that the system should pick the interface, 3253 * then we attempt to reselect the ill and join on it. 3254 * 3255 * Locking notes: 3256 * Under conn_ilg_lock we get the first ilg. As we drop the conn_ilg_lock to 3257 * proceed with the ilm part of the delete we hold a reference on both the ill 3258 * and the ilg. This doesn't prevent changes to the ilg, but prevents it from 3259 * being deleted. 3260 * 3261 * Note: if this function is called when new ill/ipif's arrive or change status 3262 * (SIOCSLIFINDEX, SIOCSLIFADDR) then we will attempt to attach any ilgs with 3263 * a NULL ilg_ill to an ill/ilm. 3264 */ 3265 static void 3266 conn_update_ill(conn_t *connp, caddr_t arg) 3267 { 3268 ill_t *ill = (ill_t *)arg; 3269 3270 /* 3271 * We have to prevent ip_close/ilg_delete_all from running at 3272 * the same time. ip_close sets CONN_CLOSING before doing the ilg_delete 3273 * all, and we set CONN_UPDATE_ILL. That ensures that only one of 3274 * ilg_delete_all and conn_update_ill run at a time for a given conn. 3275 * If ilg_delete_all got here first, then we have nothing to do. 3276 */ 3277 mutex_enter(&connp->conn_lock); 3278 if (connp->conn_state_flags & (CONN_CLOSING|CONN_UPDATE_ILL)) { 3279 /* Caller has to wait for ill_ilm_cnt to drop to zero */ 3280 mutex_exit(&connp->conn_lock); 3281 return; 3282 } 3283 connp->conn_state_flags |= CONN_UPDATE_ILL; 3284 mutex_exit(&connp->conn_lock); 3285 3286 if (ill != NULL) 3287 ilg_check_detach(connp, ill); 3288 3289 ilg_check_reattach(connp, ill); 3290 3291 /* Do we need to wake up a thread in ilg_delete_all? */ 3292 mutex_enter(&connp->conn_lock); 3293 connp->conn_state_flags &= ~CONN_UPDATE_ILL; 3294 if (connp->conn_state_flags & CONN_CLOSING) 3295 cv_broadcast(&connp->conn_cv); 3296 mutex_exit(&connp->conn_lock); 3297 } 3298 3299 /* Detach from an ill that is going away */ 3300 static void 3301 ilg_check_detach(conn_t *connp, ill_t *ill) 3302 { 3303 char group_buf[INET6_ADDRSTRLEN]; 3304 ilg_t *ilg, *held_ilg; 3305 ilm_t *ilm; 3306 3307 mutex_enter(&ill->ill_mcast_serializer); 3308 rw_enter(&connp->conn_ilg_lock, RW_WRITER); 3309 held_ilg = NULL; 3310 for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) { 3311 if (ilg->ilg_condemned) 3312 continue; 3313 3314 if (ilg->ilg_ill != ill) 3315 continue; 3316 3317 /* Detach from current ill */ 3318 ip1dbg(("ilg_check_detach: detach %s on %s\n", 3319 inet_ntop(AF_INET6, &ilg->ilg_v6group, 3320 group_buf, sizeof (group_buf)), 3321 ilg->ilg_ill->ill_name)); 3322 3323 /* Detach this ilg from the ill/ilm */ 3324 ilm = ilg->ilg_ilm; 3325 ilg->ilg_ilm = NULL; 3326 ilg->ilg_ill = NULL; 3327 if (ilm == NULL) 3328 continue; 3329 3330 /* Prevent ilg from disappearing */ 3331 ilg_transfer_hold(held_ilg, ilg); 3332 held_ilg = ilg; 3333 rw_exit(&connp->conn_ilg_lock); 3334 3335 (void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE); 3336 rw_enter(&connp->conn_ilg_lock, RW_WRITER); 3337 } 3338 if (held_ilg != NULL) 3339 ilg_refrele(held_ilg); 3340 rw_exit(&connp->conn_ilg_lock); 3341 mutex_exit(&ill->ill_mcast_serializer); 3342 /* 3343 * Now that all locks have been dropped, we can send any 3344 * deferred/queued DLPI or IP packets 3345 */ 3346 ill_mcast_send_queued(ill); 3347 ill_dlpi_send_queued(ill); 3348 } 3349 3350 /* 3351 * Check if there is a place to attach the conn_ilgs. We do this for both 3352 * detached ilgs and attached ones, since for the latter there could be 3353 * a better ill to attach them to. oill is non-null if we just detached from 3354 * that ill. 3355 */ 3356 static void 3357 ilg_check_reattach(conn_t *connp, ill_t *oill) 3358 { 3359 ill_t *ill; 3360 char group_buf[INET6_ADDRSTRLEN]; 3361 ilg_t *ilg, *held_ilg; 3362 ilm_t *ilm; 3363 zoneid_t zoneid = IPCL_ZONEID(connp); 3364 int error; 3365 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 3366 3367 rw_enter(&connp->conn_ilg_lock, RW_WRITER); 3368 held_ilg = NULL; 3369 for (ilg = connp->conn_ilg; ilg != NULL; ilg = ilg->ilg_next) { 3370 if (ilg->ilg_condemned) 3371 continue; 3372 3373 /* Check if the conn_ill matches what we would pick now */ 3374 ill = ill_mcast_lookup(&ilg->ilg_v6group, ilg->ilg_ifaddr, 3375 ilg->ilg_ifindex, zoneid, ipst, &error); 3376 3377 /* 3378 * Make sure the ill is usable for multicast and that 3379 * we can send the DL_ADDMULTI_REQ before we create an 3380 * ilm. 3381 */ 3382 if (ill != NULL && 3383 (!(ill->ill_flags & ILLF_MULTICAST) || !ill->ill_dl_up)) { 3384 /* Drop locks across ill_refrele */ 3385 ilg_transfer_hold(held_ilg, ilg); 3386 held_ilg = ilg; 3387 rw_exit(&connp->conn_ilg_lock); 3388 ill_refrele(ill); 3389 ill = NULL; 3390 rw_enter(&connp->conn_ilg_lock, RW_WRITER); 3391 /* Note that ilg could have become condemned */ 3392 } 3393 3394 /* 3395 * Is the ill unchanged, even if both are NULL? 3396 * Did we just detach from that ill? 3397 */ 3398 if (ill == ilg->ilg_ill || (ill != NULL && ill == oill)) { 3399 if (ill != NULL) { 3400 /* Drop locks across ill_refrele */ 3401 ilg_transfer_hold(held_ilg, ilg); 3402 held_ilg = ilg; 3403 rw_exit(&connp->conn_ilg_lock); 3404 ill_refrele(ill); 3405 rw_enter(&connp->conn_ilg_lock, RW_WRITER); 3406 } 3407 continue; 3408 } 3409 3410 /* Something changed; detach from old first if needed */ 3411 if (ilg->ilg_ill != NULL) { 3412 ill_t *ill2 = ilg->ilg_ill; 3413 boolean_t need_refrele = B_FALSE; 3414 3415 /* 3416 * In order to serialize on the ill we try to enter 3417 * and if that fails we unlock and relock. 3418 */ 3419 if (!mutex_tryenter(&ill2->ill_mcast_serializer)) { 3420 ill_refhold(ill2); 3421 need_refrele = B_TRUE; 3422 ilg_transfer_hold(held_ilg, ilg); 3423 held_ilg = ilg; 3424 rw_exit(&connp->conn_ilg_lock); 3425 mutex_enter(&ill2->ill_mcast_serializer); 3426 rw_enter(&connp->conn_ilg_lock, RW_WRITER); 3427 /* Note that ilg could have become condemned */ 3428 } 3429 /* 3430 * Check that nobody else re-attached the ilg while we 3431 * dropped the lock. 3432 */ 3433 if (ilg->ilg_ill == ill2) { 3434 ASSERT(!ilg->ilg_condemned); 3435 /* Detach from current ill */ 3436 ip1dbg(("conn_check_reattach: detach %s/%s\n", 3437 inet_ntop(AF_INET6, &ilg->ilg_v6group, 3438 group_buf, sizeof (group_buf)), 3439 ill2->ill_name)); 3440 3441 ilm = ilg->ilg_ilm; 3442 ilg->ilg_ilm = NULL; 3443 ilg->ilg_ill = NULL; 3444 } else { 3445 ilm = NULL; 3446 } 3447 ilg_transfer_hold(held_ilg, ilg); 3448 held_ilg = ilg; 3449 rw_exit(&connp->conn_ilg_lock); 3450 if (ilm != NULL) 3451 (void) ip_delmulti_serial(ilm, B_FALSE, B_TRUE); 3452 mutex_exit(&ill2->ill_mcast_serializer); 3453 /* 3454 * Now that all locks have been dropped, we can send any 3455 * deferred/queued DLPI or IP packets 3456 */ 3457 ill_mcast_send_queued(ill2); 3458 ill_dlpi_send_queued(ill2); 3459 if (need_refrele) { 3460 /* Drop ill reference while we hold no locks */ 3461 ill_refrele(ill2); 3462 } 3463 rw_enter(&connp->conn_ilg_lock, RW_WRITER); 3464 /* 3465 * While we dropped conn_ilg_lock some other thread 3466 * could have attached this ilg, thus we check again. 3467 */ 3468 if (ilg->ilg_ill != NULL) { 3469 if (ill != NULL) { 3470 /* Drop locks across ill_refrele */ 3471 ilg_transfer_hold(held_ilg, ilg); 3472 held_ilg = ilg; 3473 rw_exit(&connp->conn_ilg_lock); 3474 ill_refrele(ill); 3475 rw_enter(&connp->conn_ilg_lock, 3476 RW_WRITER); 3477 } 3478 continue; 3479 } 3480 } 3481 if (ill != NULL) { 3482 /* 3483 * In order to serialize on the ill we try to enter 3484 * and if that fails we unlock and relock. 3485 */ 3486 if (!mutex_tryenter(&ill->ill_mcast_serializer)) { 3487 /* Already have a refhold on ill */ 3488 ilg_transfer_hold(held_ilg, ilg); 3489 held_ilg = ilg; 3490 rw_exit(&connp->conn_ilg_lock); 3491 mutex_enter(&ill->ill_mcast_serializer); 3492 rw_enter(&connp->conn_ilg_lock, RW_WRITER); 3493 /* Note that ilg could have become condemned */ 3494 } 3495 ilg_transfer_hold(held_ilg, ilg); 3496 held_ilg = ilg; 3497 /* 3498 * Check that nobody else attached the ilg and that 3499 * it wasn't condemned while we dropped the lock. 3500 */ 3501 if (ilg->ilg_ill == NULL && !ilg->ilg_condemned) { 3502 /* 3503 * Attach to the new ill. Can fail in which 3504 * case ilg_ill will remain NULL. ilg_attach 3505 * drops and reacquires conn_ilg_lock. 3506 */ 3507 ip1dbg(("conn_check_reattach: attach %s/%s\n", 3508 inet_ntop(AF_INET6, &ilg->ilg_v6group, 3509 group_buf, sizeof (group_buf)), 3510 ill->ill_name)); 3511 ilg_attach(connp, ilg, ill); 3512 ASSERT(RW_WRITE_HELD(&connp->conn_ilg_lock)); 3513 } 3514 /* Drop locks across ill_refrele */ 3515 rw_exit(&connp->conn_ilg_lock); 3516 mutex_exit(&ill->ill_mcast_serializer); 3517 /* 3518 * Now that all locks have been 3519 * dropped, we can send any 3520 * deferred/queued DLPI or IP packets 3521 */ 3522 ill_mcast_send_queued(ill); 3523 ill_dlpi_send_queued(ill); 3524 ill_refrele(ill); 3525 rw_enter(&connp->conn_ilg_lock, RW_WRITER); 3526 } 3527 } 3528 if (held_ilg != NULL) 3529 ilg_refrele(held_ilg); 3530 rw_exit(&connp->conn_ilg_lock); 3531 } 3532 3533 /* 3534 * Called when an ill is unplumbed to make sure that there are no 3535 * dangling conn references to that ill. In that case ill is non-NULL and 3536 * we make sure we remove all references to it. 3537 * Also called when we should revisit the ilg_ill used for multicast 3538 * memberships, in which case ill is NULL. 3539 */ 3540 void 3541 update_conn_ill(ill_t *ill, ip_stack_t *ipst) 3542 { 3543 ipcl_walk(conn_update_ill, (caddr_t)ill, ipst); 3544 } 3545