1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #include <sys/types.h> 28 #include <sys/stream.h> 29 #include <sys/dlpi.h> 30 #include <sys/stropts.h> 31 #include <sys/strsun.h> 32 #include <sys/ddi.h> 33 #include <sys/cmn_err.h> 34 #include <sys/sdt.h> 35 #include <sys/zone.h> 36 37 #include <sys/param.h> 38 #include <sys/socket.h> 39 #include <sys/sockio.h> 40 #include <net/if.h> 41 #include <sys/systm.h> 42 #include <sys/strsubr.h> 43 #include <net/route.h> 44 #include <netinet/in.h> 45 #include <net/if_dl.h> 46 #include <netinet/ip6.h> 47 #include <netinet/icmp6.h> 48 49 #include <inet/common.h> 50 #include <inet/mi.h> 51 #include <inet/nd.h> 52 #include <inet/arp.h> 53 #include <inet/ip.h> 54 #include <inet/ip6.h> 55 #include <inet/ip_if.h> 56 #include <inet/ip_ndp.h> 57 #include <inet/ip_multi.h> 58 #include <inet/ipclassifier.h> 59 #include <inet/ipsec_impl.h> 60 #include <inet/sctp_ip.h> 61 #include <inet/ip_listutils.h> 62 #include <inet/udp_impl.h> 63 64 /* igmpv3/mldv2 source filter manipulation */ 65 static void ilm_bld_flists(conn_t *conn, void *arg); 66 static void ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, 67 slist_t *flist); 68 69 static ilm_t *ilm_add_v6(ipif_t *ipif, const in6_addr_t *group, 70 ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist, 71 zoneid_t zoneid); 72 static void ilm_delete(ilm_t *ilm); 73 static int ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *group); 74 static int ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *group); 75 static ilg_t *ilg_lookup_ipif(conn_t *connp, ipaddr_t group, 76 ipif_t *ipif); 77 static int ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif, 78 mcast_record_t fmode, ipaddr_t src); 79 static int ilg_add_v6(conn_t *connp, const in6_addr_t *group, ill_t *ill, 80 mcast_record_t fmode, const in6_addr_t *v6src); 81 static void ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src); 82 static mblk_t *ill_create_dl(ill_t *ill, uint32_t dl_primitive, 83 uint32_t length, uint32_t *addr_lenp, uint32_t *addr_offp); 84 static void conn_ilg_reap(conn_t *connp); 85 static int ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group, 86 ipif_t *ipif, mcast_record_t fmode, ipaddr_t src); 87 static int ip_opt_delete_group_excl_v6(conn_t *connp, 88 const in6_addr_t *v6group, ill_t *ill, mcast_record_t fmode, 89 const in6_addr_t *v6src); 90 static void ill_ilm_walker_hold(ill_t *ill); 91 static void ill_ilm_walker_rele(ill_t *ill); 92 93 /* 94 * MT notes: 95 * 96 * Multicast joins operate on both the ilg and ilm structures. Multiple 97 * threads operating on an conn (socket) trying to do multicast joins 98 * need to synchronize when operating on the ilg. Multiple threads 99 * potentially operating on different conn (socket endpoints) trying to 100 * do multicast joins could eventually end up trying to manipulate the 101 * ilm simultaneously and need to synchronize access to the ilm. Currently, 102 * this is done by synchronizing join/leave via per-phyint ipsq_t 103 * serialization. 104 * 105 * An ilm is an IP data structure used to track multicast join/leave. 106 * An ilm is associated with a <multicast group, ipif> tuple in IPv4 and 107 * with just <multicast group> in IPv6. ilm_refcnt is the number of ilg's 108 * referencing the ilm. ilms are created / destroyed only as writer. ilms 109 * are not passed around, instead they are looked up and used under the 110 * ill_lock or as writer. So we don't need a dynamic refcount of the number 111 * of threads holding reference to an ilm. 112 * 113 * Multicast Join operation: 114 * 115 * The first step is to determine the ipif (v4) or ill (v6) on which 116 * the join operation is to be done. The join is done after becoming 117 * exclusive on the ipsq associated with the ipif or ill. The conn->conn_ilg 118 * and ill->ill_ilm are thus accessed and modified exclusively per ill. 119 * Multiple threads can attempt to join simultaneously on different ipif/ill 120 * on the same conn. In this case the ipsq serialization does not help in 121 * protecting the ilg. It is the conn_lock that is used to protect the ilg. 122 * The conn_lock also protects all the ilg_t members. 123 * 124 * Leave operation. 125 * 126 * Similar to the join operation, the first step is to determine the ipif 127 * or ill (v6) on which the leave operation is to be done. The leave operation 128 * is done after becoming exclusive on the ipsq associated with the ipif or ill. 129 * As with join ilg modification is done under the protection of the conn lock. 130 */ 131 132 #define IPSQ_ENTER_IPIF(ipif, connp, first_mp, func, ipsq, type) \ 133 ASSERT(connp != NULL); \ 134 (ipsq) = ipsq_try_enter((ipif), NULL, CONNP_TO_WQ(connp), \ 135 (first_mp), (func), (type), B_TRUE); \ 136 if ((ipsq) == NULL) { \ 137 ipif_refrele(ipif); \ 138 return (EINPROGRESS); \ 139 } 140 141 #define IPSQ_ENTER_ILL(ill, connp, first_mp, func, ipsq, type) \ 142 ASSERT(connp != NULL); \ 143 (ipsq) = ipsq_try_enter(NULL, ill, CONNP_TO_WQ(connp), \ 144 (first_mp), (func), (type), B_TRUE); \ 145 if ((ipsq) == NULL) { \ 146 ill_refrele(ill); \ 147 return (EINPROGRESS); \ 148 } 149 150 #define IPSQ_EXIT(ipsq) \ 151 if (ipsq != NULL) \ 152 ipsq_exit(ipsq); 153 154 #define ILG_WALKER_HOLD(connp) (connp)->conn_ilg_walker_cnt++ 155 156 #define ILG_WALKER_RELE(connp) \ 157 { \ 158 (connp)->conn_ilg_walker_cnt--; \ 159 if ((connp)->conn_ilg_walker_cnt == 0) \ 160 conn_ilg_reap(connp); \ 161 } 162 163 static void 164 conn_ilg_reap(conn_t *connp) 165 { 166 int to; 167 int from; 168 ilg_t *ilg; 169 170 ASSERT(MUTEX_HELD(&connp->conn_lock)); 171 172 to = 0; 173 from = 0; 174 while (from < connp->conn_ilg_inuse) { 175 if (connp->conn_ilg[from].ilg_flags & ILG_DELETED) { 176 ilg = &connp->conn_ilg[from]; 177 FREE_SLIST(ilg->ilg_filter); 178 ilg->ilg_flags &= ~ILG_DELETED; 179 from++; 180 continue; 181 } 182 if (to != from) 183 connp->conn_ilg[to] = connp->conn_ilg[from]; 184 to++; 185 from++; 186 } 187 188 connp->conn_ilg_inuse = to; 189 190 if (connp->conn_ilg_inuse == 0) { 191 mi_free((char *)connp->conn_ilg); 192 connp->conn_ilg = NULL; 193 cv_broadcast(&connp->conn_refcv); 194 } 195 } 196 197 #define GETSTRUCT(structure, number) \ 198 ((structure *)mi_zalloc(sizeof (structure) * (number))) 199 200 #define ILG_ALLOC_CHUNK 16 201 202 /* 203 * Returns a pointer to the next available ilg in conn_ilg. Allocs more 204 * buffers in size of ILG_ALLOC_CHUNK ilgs when needed, and updates conn's 205 * ilg tracking fields appropriately (conn_ilg_inuse reflects usage of the 206 * returned ilg). Returns NULL on failure, in which case `*errp' will be 207 * filled in with the reason. 208 * 209 * Assumes connp->conn_lock is held. 210 */ 211 static ilg_t * 212 conn_ilg_alloc(conn_t *connp, int *errp) 213 { 214 ilg_t *new, *ret; 215 int curcnt; 216 217 ASSERT(MUTEX_HELD(&connp->conn_lock)); 218 ASSERT(connp->conn_ilg_inuse <= connp->conn_ilg_allocated); 219 220 /* 221 * If CONN_CLOSING is set, conn_ilg cleanup has begun and we must not 222 * create any ilgs. 223 */ 224 if (connp->conn_state_flags & CONN_CLOSING) { 225 *errp = EINVAL; 226 return (NULL); 227 } 228 229 if (connp->conn_ilg == NULL) { 230 connp->conn_ilg = GETSTRUCT(ilg_t, ILG_ALLOC_CHUNK); 231 if (connp->conn_ilg == NULL) { 232 *errp = ENOMEM; 233 return (NULL); 234 } 235 connp->conn_ilg_allocated = ILG_ALLOC_CHUNK; 236 connp->conn_ilg_inuse = 0; 237 } 238 if (connp->conn_ilg_inuse == connp->conn_ilg_allocated) { 239 if (connp->conn_ilg_walker_cnt != 0) { 240 /* 241 * XXX We cannot grow the array at this point 242 * because a list walker could be in progress, and 243 * we cannot wipe out the existing array until the 244 * walker is done. Just return NULL for now. 245 * ilg_delete_all() will have to be changed when 246 * this logic is changed. 247 */ 248 *errp = EBUSY; 249 return (NULL); 250 } 251 curcnt = connp->conn_ilg_allocated; 252 new = GETSTRUCT(ilg_t, curcnt + ILG_ALLOC_CHUNK); 253 if (new == NULL) { 254 *errp = ENOMEM; 255 return (NULL); 256 } 257 bcopy(connp->conn_ilg, new, sizeof (ilg_t) * curcnt); 258 mi_free((char *)connp->conn_ilg); 259 connp->conn_ilg = new; 260 connp->conn_ilg_allocated += ILG_ALLOC_CHUNK; 261 } 262 263 ret = &connp->conn_ilg[connp->conn_ilg_inuse++]; 264 ASSERT((ret->ilg_flags & ILG_DELETED) == 0); 265 bzero(ret, sizeof (*ret)); 266 return (ret); 267 } 268 269 typedef struct ilm_fbld_s { 270 ilm_t *fbld_ilm; 271 int fbld_in_cnt; 272 int fbld_ex_cnt; 273 slist_t fbld_in; 274 slist_t fbld_ex; 275 boolean_t fbld_in_overflow; 276 } ilm_fbld_t; 277 278 static void 279 ilm_bld_flists(conn_t *conn, void *arg) 280 { 281 int i; 282 ilm_fbld_t *fbld = (ilm_fbld_t *)(arg); 283 ilm_t *ilm = fbld->fbld_ilm; 284 in6_addr_t *v6group = &ilm->ilm_v6addr; 285 286 if (conn->conn_ilg_inuse == 0) 287 return; 288 289 /* 290 * Since we can't break out of the ipcl_walk once started, we still 291 * have to look at every conn. But if we've already found one 292 * (EXCLUDE, NULL) list, there's no need to keep checking individual 293 * ilgs--that will be our state. 294 */ 295 if (fbld->fbld_ex_cnt > 0 && fbld->fbld_ex.sl_numsrc == 0) 296 return; 297 298 /* 299 * Check this conn's ilgs to see if any are interested in our 300 * ilm (group, interface match). If so, update the master 301 * include and exclude lists we're building in the fbld struct 302 * with this ilg's filter info. 303 */ 304 mutex_enter(&conn->conn_lock); 305 for (i = 0; i < conn->conn_ilg_inuse; i++) { 306 ilg_t *ilg = &conn->conn_ilg[i]; 307 if ((ilg->ilg_ill == ilm->ilm_ill) && 308 (ilg->ilg_ipif == ilm->ilm_ipif) && 309 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) { 310 if (ilg->ilg_fmode == MODE_IS_INCLUDE) { 311 fbld->fbld_in_cnt++; 312 if (!fbld->fbld_in_overflow) 313 l_union_in_a(&fbld->fbld_in, 314 ilg->ilg_filter, 315 &fbld->fbld_in_overflow); 316 } else { 317 fbld->fbld_ex_cnt++; 318 /* 319 * On the first exclude list, don't try to do 320 * an intersection, as the master exclude list 321 * is intentionally empty. If the master list 322 * is still empty on later iterations, that 323 * means we have at least one ilg with an empty 324 * exclude list, so that should be reflected 325 * when we take the intersection. 326 */ 327 if (fbld->fbld_ex_cnt == 1) { 328 if (ilg->ilg_filter != NULL) 329 l_copy(ilg->ilg_filter, 330 &fbld->fbld_ex); 331 } else { 332 l_intersection_in_a(&fbld->fbld_ex, 333 ilg->ilg_filter); 334 } 335 } 336 /* there will only be one match, so break now. */ 337 break; 338 } 339 } 340 mutex_exit(&conn->conn_lock); 341 } 342 343 static void 344 ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, slist_t *flist) 345 { 346 ilm_fbld_t fbld; 347 ip_stack_t *ipst = ilm->ilm_ipst; 348 349 fbld.fbld_ilm = ilm; 350 fbld.fbld_in_cnt = fbld.fbld_ex_cnt = 0; 351 fbld.fbld_in.sl_numsrc = fbld.fbld_ex.sl_numsrc = 0; 352 fbld.fbld_in_overflow = B_FALSE; 353 354 /* first, construct our master include and exclude lists */ 355 ipcl_walk(ilm_bld_flists, (caddr_t)&fbld, ipst); 356 357 /* now use those master lists to generate the interface filter */ 358 359 /* if include list overflowed, filter is (EXCLUDE, NULL) */ 360 if (fbld.fbld_in_overflow) { 361 *fmode = MODE_IS_EXCLUDE; 362 flist->sl_numsrc = 0; 363 return; 364 } 365 366 /* if nobody interested, interface filter is (INCLUDE, NULL) */ 367 if (fbld.fbld_in_cnt == 0 && fbld.fbld_ex_cnt == 0) { 368 *fmode = MODE_IS_INCLUDE; 369 flist->sl_numsrc = 0; 370 return; 371 } 372 373 /* 374 * If there are no exclude lists, then the interface filter 375 * is INCLUDE, with its filter list equal to fbld_in. A single 376 * exclude list makes the interface filter EXCLUDE, with its 377 * filter list equal to (fbld_ex - fbld_in). 378 */ 379 if (fbld.fbld_ex_cnt == 0) { 380 *fmode = MODE_IS_INCLUDE; 381 l_copy(&fbld.fbld_in, flist); 382 } else { 383 *fmode = MODE_IS_EXCLUDE; 384 l_difference(&fbld.fbld_ex, &fbld.fbld_in, flist); 385 } 386 } 387 388 static int 389 ilm_update_add(ilm_t *ilm, ilg_stat_t ilgstat, slist_t *ilg_flist, 390 boolean_t isv6) 391 { 392 mcast_record_t fmode; 393 slist_t *flist; 394 boolean_t fdefault; 395 char buf[INET6_ADDRSTRLEN]; 396 ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill; 397 398 /* 399 * There are several cases where the ilm's filter state 400 * defaults to (EXCLUDE, NULL): 401 * - we've had previous joins without associated ilgs 402 * - this join has no associated ilg 403 * - the ilg's filter state is (EXCLUDE, NULL) 404 */ 405 fdefault = (ilm->ilm_no_ilg_cnt > 0) || 406 (ilgstat == ILGSTAT_NONE) || SLIST_IS_EMPTY(ilg_flist); 407 408 /* attempt mallocs (if needed) before doing anything else */ 409 if ((flist = l_alloc()) == NULL) 410 return (ENOMEM); 411 if (!fdefault && ilm->ilm_filter == NULL) { 412 ilm->ilm_filter = l_alloc(); 413 if (ilm->ilm_filter == NULL) { 414 l_free(flist); 415 return (ENOMEM); 416 } 417 } 418 419 if (ilgstat != ILGSTAT_CHANGE) 420 ilm->ilm_refcnt++; 421 422 if (ilgstat == ILGSTAT_NONE) 423 ilm->ilm_no_ilg_cnt++; 424 425 /* 426 * Determine new filter state. If it's not the default 427 * (EXCLUDE, NULL), we must walk the conn list to find 428 * any ilgs interested in this group, and re-build the 429 * ilm filter. 430 */ 431 if (fdefault) { 432 fmode = MODE_IS_EXCLUDE; 433 flist->sl_numsrc = 0; 434 } else { 435 ilm_gen_filter(ilm, &fmode, flist); 436 } 437 438 /* make sure state actually changed; nothing to do if not. */ 439 if ((ilm->ilm_fmode == fmode) && 440 !lists_are_different(ilm->ilm_filter, flist)) { 441 l_free(flist); 442 return (0); 443 } 444 445 /* send the state change report */ 446 if (!IS_LOOPBACK(ill)) { 447 if (isv6) 448 mld_statechange(ilm, fmode, flist); 449 else 450 igmp_statechange(ilm, fmode, flist); 451 } 452 453 /* update the ilm state */ 454 ilm->ilm_fmode = fmode; 455 if (flist->sl_numsrc > 0) 456 l_copy(flist, ilm->ilm_filter); 457 else 458 CLEAR_SLIST(ilm->ilm_filter); 459 460 ip1dbg(("ilm_update: new if filter mode %d, group %s\n", ilm->ilm_fmode, 461 inet_ntop(AF_INET6, &ilm->ilm_v6addr, buf, sizeof (buf)))); 462 463 l_free(flist); 464 return (0); 465 } 466 467 static int 468 ilm_update_del(ilm_t *ilm, boolean_t isv6) 469 { 470 mcast_record_t fmode; 471 slist_t *flist; 472 ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill; 473 474 ip1dbg(("ilm_update_del: still %d left; updating state\n", 475 ilm->ilm_refcnt)); 476 477 if ((flist = l_alloc()) == NULL) 478 return (ENOMEM); 479 480 /* 481 * If present, the ilg in question has already either been 482 * updated or removed from our list; so all we need to do 483 * now is walk the list to update the ilm filter state. 484 * 485 * Skip the list walk if we have any no-ilg joins, which 486 * cause the filter state to revert to (EXCLUDE, NULL). 487 */ 488 if (ilm->ilm_no_ilg_cnt != 0) { 489 fmode = MODE_IS_EXCLUDE; 490 flist->sl_numsrc = 0; 491 } else { 492 ilm_gen_filter(ilm, &fmode, flist); 493 } 494 495 /* check to see if state needs to be updated */ 496 if ((ilm->ilm_fmode == fmode) && 497 (!lists_are_different(ilm->ilm_filter, flist))) { 498 l_free(flist); 499 return (0); 500 } 501 502 if (!IS_LOOPBACK(ill)) { 503 if (isv6) 504 mld_statechange(ilm, fmode, flist); 505 else 506 igmp_statechange(ilm, fmode, flist); 507 } 508 509 ilm->ilm_fmode = fmode; 510 if (flist->sl_numsrc > 0) { 511 if (ilm->ilm_filter == NULL) { 512 ilm->ilm_filter = l_alloc(); 513 if (ilm->ilm_filter == NULL) { 514 char buf[INET6_ADDRSTRLEN]; 515 ip1dbg(("ilm_update_del: failed to alloc ilm " 516 "filter; no source filtering for %s on %s", 517 inet_ntop(AF_INET6, &ilm->ilm_v6addr, 518 buf, sizeof (buf)), ill->ill_name)); 519 ilm->ilm_fmode = MODE_IS_EXCLUDE; 520 l_free(flist); 521 return (0); 522 } 523 } 524 l_copy(flist, ilm->ilm_filter); 525 } else { 526 CLEAR_SLIST(ilm->ilm_filter); 527 } 528 529 l_free(flist); 530 return (0); 531 } 532 533 /* 534 * INADDR_ANY means all multicast addresses. 535 * INADDR_ANY is stored as IPv6 unspecified addr. 536 */ 537 int 538 ip_addmulti(ipaddr_t group, ipif_t *ipif, ilg_stat_t ilgstat, 539 mcast_record_t ilg_fmode, slist_t *ilg_flist) 540 { 541 ill_t *ill = ipif->ipif_ill; 542 ilm_t *ilm; 543 in6_addr_t v6group; 544 int ret; 545 546 ASSERT(IAM_WRITER_IPIF(ipif)); 547 548 if (!CLASSD(group) && group != INADDR_ANY) 549 return (EINVAL); 550 551 if (IS_UNDER_IPMP(ill)) 552 return (EINVAL); 553 554 /* 555 * INADDR_ANY is represented as the IPv6 unspecified addr. 556 */ 557 if (group == INADDR_ANY) 558 v6group = ipv6_all_zeros; 559 else 560 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 561 562 ilm = ilm_lookup_ipif(ipif, group); 563 /* 564 * Since we are writer, we know the ilm_flags itself cannot 565 * change at this point, and ilm_lookup_ipif would not have 566 * returned a DELETED ilm. However, the data path can free 567 * ilm->ilm_next via ilm_walker_cleanup() so we can safely 568 * access anything in ilm except ilm_next (for safe access to 569 * ilm_next we'd have to take the ill_lock). 570 */ 571 if (ilm != NULL) 572 return (ilm_update_add(ilm, ilgstat, ilg_flist, B_FALSE)); 573 574 ilm = ilm_add_v6(ipif, &v6group, ilgstat, ilg_fmode, ilg_flist, 575 ipif->ipif_zoneid); 576 if (ilm == NULL) 577 return (ENOMEM); 578 579 if (group == INADDR_ANY) { 580 /* 581 * Check how many ipif's have members in this group - 582 * if more then one we should not tell the driver to join 583 * this time 584 */ 585 if (ilm_numentries_v6(ill, &v6group) > 1) 586 return (0); 587 ret = ill_join_allmulti(ill); 588 if (ret != 0) 589 ilm_delete(ilm); 590 return (ret); 591 } 592 593 if (!IS_LOOPBACK(ill)) 594 igmp_joingroup(ilm); 595 596 if (ilm_numentries_v6(ill, &v6group) > 1) 597 return (0); 598 599 ret = ip_ll_addmulti_v6(ipif, &v6group); 600 if (ret != 0) 601 ilm_delete(ilm); 602 return (ret); 603 } 604 605 /* 606 * The unspecified address means all multicast addresses. 607 * 608 * ill identifies the interface to join on. 609 * 610 * ilgstat tells us if there's an ilg associated with this join, 611 * and if so, if it's a new ilg or a change to an existing one. 612 * ilg_fmode and ilg_flist give us the current filter state of 613 * the ilg (and will be EXCLUDE {NULL} in the case of no ilg). 614 */ 615 int 616 ip_addmulti_v6(const in6_addr_t *v6group, ill_t *ill, zoneid_t zoneid, 617 ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist) 618 { 619 ilm_t *ilm; 620 int ret; 621 622 ASSERT(IAM_WRITER_ILL(ill)); 623 624 if (!IN6_IS_ADDR_MULTICAST(v6group) && 625 !IN6_IS_ADDR_UNSPECIFIED(v6group)) { 626 return (EINVAL); 627 } 628 629 if (IS_UNDER_IPMP(ill) && !IN6_IS_ADDR_MC_SOLICITEDNODE(v6group)) 630 return (EINVAL); 631 632 /* 633 * An ilm is uniquely identified by the tuple of (group, ill) where 634 * `group' is the multicast group address, and `ill' is the interface 635 * on which it is currently joined. 636 */ 637 ilm = ilm_lookup_ill_v6(ill, v6group, B_TRUE, zoneid); 638 if (ilm != NULL) 639 return (ilm_update_add(ilm, ilgstat, ilg_flist, B_TRUE)); 640 641 ilm = ilm_add_v6(ill->ill_ipif, v6group, ilgstat, ilg_fmode, 642 ilg_flist, zoneid); 643 if (ilm == NULL) 644 return (ENOMEM); 645 646 if (IN6_IS_ADDR_UNSPECIFIED(v6group)) { 647 /* 648 * Check how many ipif's that have members in this group - 649 * if more then one we should not tell the driver to join 650 * this time 651 */ 652 if (ilm_numentries_v6(ill, v6group) > 1) 653 return (0); 654 ret = ill_join_allmulti(ill); 655 if (ret != 0) 656 ilm_delete(ilm); 657 return (ret); 658 } 659 660 if (!IS_LOOPBACK(ill)) 661 mld_joingroup(ilm); 662 663 /* 664 * If we have more then one we should not tell the driver 665 * to join this time. 666 */ 667 if (ilm_numentries_v6(ill, v6group) > 1) 668 return (0); 669 670 ret = ip_ll_addmulti_v6(ill->ill_ipif, v6group); 671 if (ret != 0) 672 ilm_delete(ilm); 673 return (ret); 674 } 675 676 /* 677 * Mapping the given IP multicast address to the L2 multicast mac address. 678 */ 679 static void 680 ill_multicast_mapping(ill_t *ill, ipaddr_t ip_addr, uint8_t *hw_addr, 681 uint32_t hw_addrlen) 682 { 683 dl_unitdata_req_t *dlur; 684 ipaddr_t proto_extract_mask; 685 uint8_t *from, *bcast_addr; 686 uint32_t hw_extract_start; 687 int len; 688 689 ASSERT(IN_CLASSD(ntohl(ip_addr))); 690 ASSERT(hw_addrlen == ill->ill_phys_addr_length); 691 ASSERT((ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) == 0); 692 ASSERT((ill->ill_flags & ILLF_MULTICAST) != 0); 693 694 /* 695 * Find the physical broadcast address. 696 */ 697 dlur = (dl_unitdata_req_t *)ill->ill_bcast_mp->b_rptr; 698 bcast_addr = (uint8_t *)dlur + dlur->dl_dest_addr_offset; 699 if (ill->ill_sap_length > 0) 700 bcast_addr += ill->ill_sap_length; 701 702 VERIFY(MEDIA_V4MINFO(ill->ill_media, hw_addrlen, bcast_addr, 703 hw_addr, &hw_extract_start, &proto_extract_mask)); 704 705 len = MIN((int)hw_addrlen - hw_extract_start, IP_ADDR_LEN); 706 ip_addr &= proto_extract_mask; 707 from = (uint8_t *)&ip_addr; 708 while (len-- > 0) 709 hw_addr[hw_extract_start + len] |= from[len]; 710 } 711 712 /* 713 * Send a multicast request to the driver for enabling multicast reception 714 * for v6groupp address. The caller has already checked whether it is 715 * appropriate to send one or not. 716 */ 717 int 718 ip_ll_send_enabmulti_req(ill_t *ill, const in6_addr_t *v6groupp) 719 { 720 mblk_t *mp; 721 uint32_t addrlen, addroff; 722 char group_buf[INET6_ADDRSTRLEN]; 723 724 ASSERT(IAM_WRITER_ILL(ill)); 725 726 /* 727 * If we're on the IPMP ill, use the nominated multicast interface to 728 * send and receive DLPI messages, if one exists. (If none exists, 729 * there are no usable interfaces and thus nothing to do.) 730 */ 731 if (IS_IPMP(ill) && (ill = ipmp_illgrp_cast_ill(ill->ill_grp)) == NULL) 732 return (0); 733 734 /* 735 * Create a DL_ENABMULTI_REQ. 736 */ 737 mp = ill_create_dl(ill, DL_ENABMULTI_REQ, sizeof (dl_enabmulti_req_t), 738 &addrlen, &addroff); 739 if (!mp) 740 return (ENOMEM); 741 742 if (IN6_IS_ADDR_V4MAPPED(v6groupp)) { 743 ipaddr_t v4group; 744 745 IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group); 746 747 ill_multicast_mapping(ill, v4group, 748 mp->b_rptr + addroff, addrlen); 749 750 ip1dbg(("ip_ll_send_enabmulti_req: IPv4 %s on %s\n", 751 inet_ntop(AF_INET6, v6groupp, group_buf, 752 sizeof (group_buf)), 753 ill->ill_name)); 754 755 /* Track the state if this is the first enabmulti */ 756 if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN) 757 ill->ill_dlpi_multicast_state = IDS_INPROGRESS; 758 ill_dlpi_send(ill, mp); 759 } else { 760 ip1dbg(("ip_ll_send_enabmulti_req: IPv6 ndp_mcastreq %s on" 761 " %s\n", 762 inet_ntop(AF_INET6, v6groupp, group_buf, 763 sizeof (group_buf)), 764 ill->ill_name)); 765 return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp)); 766 } 767 return (0); 768 } 769 770 /* 771 * Send a multicast request to the driver for enabling multicast 772 * membership for v6group if appropriate. 773 */ 774 static int 775 ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *v6groupp) 776 { 777 ill_t *ill = ipif->ipif_ill; 778 779 ASSERT(IAM_WRITER_IPIF(ipif)); 780 781 if (ill->ill_net_type != IRE_IF_RESOLVER || 782 ipif->ipif_flags & IPIF_POINTOPOINT) { 783 ip1dbg(("ip_ll_addmulti_v6: not resolver\n")); 784 return (0); /* Must be IRE_IF_NORESOLVER */ 785 } 786 787 if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) { 788 ip1dbg(("ip_ll_addmulti_v6: MULTI_BCAST\n")); 789 return (0); 790 } 791 if (!ill->ill_dl_up) { 792 /* 793 * Nobody there. All multicast addresses will be re-joined 794 * when we get the DL_BIND_ACK bringing the interface up. 795 */ 796 ip1dbg(("ip_ll_addmulti_v6: nobody up\n")); 797 return (0); 798 } 799 return (ip_ll_send_enabmulti_req(ill, v6groupp)); 800 } 801 802 /* 803 * INADDR_ANY means all multicast addresses. 804 * INADDR_ANY is stored as the IPv6 unspecified addr. 805 */ 806 int 807 ip_delmulti(ipaddr_t group, ipif_t *ipif, boolean_t no_ilg, boolean_t leaving) 808 { 809 ill_t *ill = ipif->ipif_ill; 810 ilm_t *ilm; 811 in6_addr_t v6group; 812 813 ASSERT(IAM_WRITER_IPIF(ipif)); 814 815 if (!CLASSD(group) && group != INADDR_ANY) 816 return (EINVAL); 817 818 /* 819 * INADDR_ANY is represented as the IPv6 unspecified addr. 820 */ 821 if (group == INADDR_ANY) 822 v6group = ipv6_all_zeros; 823 else 824 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 825 826 /* 827 * Look for a match on the ipif. 828 * (IP_DROP_MEMBERSHIP specifies an ipif using an IP address). 829 */ 830 ilm = ilm_lookup_ipif(ipif, group); 831 if (ilm == NULL) 832 return (ENOENT); 833 834 /* Update counters */ 835 if (no_ilg) 836 ilm->ilm_no_ilg_cnt--; 837 838 if (leaving) 839 ilm->ilm_refcnt--; 840 841 if (ilm->ilm_refcnt > 0) 842 return (ilm_update_del(ilm, B_FALSE)); 843 844 if (group == INADDR_ANY) { 845 ilm_delete(ilm); 846 /* 847 * Check how many ipif's that have members in this group - 848 * if there are still some left then don't tell the driver 849 * to drop it. 850 */ 851 if (ilm_numentries_v6(ill, &v6group) != 0) 852 return (0); 853 854 /* If we never joined, then don't leave. */ 855 if (ill->ill_join_allmulti) 856 ill_leave_allmulti(ill); 857 858 return (0); 859 } 860 861 if (!IS_LOOPBACK(ill)) 862 igmp_leavegroup(ilm); 863 864 ilm_delete(ilm); 865 /* 866 * Check how many ipif's that have members in this group - 867 * if there are still some left then don't tell the driver 868 * to drop it. 869 */ 870 if (ilm_numentries_v6(ill, &v6group) != 0) 871 return (0); 872 return (ip_ll_delmulti_v6(ipif, &v6group)); 873 } 874 875 /* 876 * The unspecified address means all multicast addresses. 877 */ 878 int 879 ip_delmulti_v6(const in6_addr_t *v6group, ill_t *ill, zoneid_t zoneid, 880 boolean_t no_ilg, boolean_t leaving) 881 { 882 ipif_t *ipif; 883 ilm_t *ilm; 884 885 ASSERT(IAM_WRITER_ILL(ill)); 886 887 if (!IN6_IS_ADDR_MULTICAST(v6group) && 888 !IN6_IS_ADDR_UNSPECIFIED(v6group)) 889 return (EINVAL); 890 891 /* 892 * Look for a match on the ill. 893 */ 894 ilm = ilm_lookup_ill_v6(ill, v6group, B_TRUE, zoneid); 895 if (ilm == NULL) 896 return (ENOENT); 897 898 ASSERT(ilm->ilm_ill == ill); 899 900 ipif = ill->ill_ipif; 901 902 /* Update counters */ 903 if (no_ilg) 904 ilm->ilm_no_ilg_cnt--; 905 906 if (leaving) 907 ilm->ilm_refcnt--; 908 909 if (ilm->ilm_refcnt > 0) 910 return (ilm_update_del(ilm, B_TRUE)); 911 912 if (IN6_IS_ADDR_UNSPECIFIED(v6group)) { 913 ilm_delete(ilm); 914 /* 915 * Check how many ipif's that have members in this group - 916 * if there are still some left then don't tell the driver 917 * to drop it. 918 */ 919 if (ilm_numentries_v6(ill, v6group) != 0) 920 return (0); 921 922 /* If we never joined, then don't leave. */ 923 if (ill->ill_join_allmulti) 924 ill_leave_allmulti(ill); 925 926 return (0); 927 } 928 929 if (!IS_LOOPBACK(ill)) 930 mld_leavegroup(ilm); 931 932 ilm_delete(ilm); 933 /* 934 * Check how many ipif's that have members in this group - 935 * if there are still some left then don't tell the driver 936 * to drop it. 937 */ 938 if (ilm_numentries_v6(ill, v6group) != 0) 939 return (0); 940 return (ip_ll_delmulti_v6(ipif, v6group)); 941 } 942 943 /* 944 * Send a multicast request to the driver for disabling multicast reception 945 * for v6groupp address. The caller has already checked whether it is 946 * appropriate to send one or not. 947 */ 948 int 949 ip_ll_send_disabmulti_req(ill_t *ill, const in6_addr_t *v6groupp) 950 { 951 mblk_t *mp; 952 char group_buf[INET6_ADDRSTRLEN]; 953 uint32_t addrlen, addroff; 954 955 ASSERT(IAM_WRITER_ILL(ill)); 956 957 /* 958 * See comment in ip_ll_send_enabmulti_req(). 959 */ 960 if (IS_IPMP(ill) && (ill = ipmp_illgrp_cast_ill(ill->ill_grp)) == NULL) 961 return (0); 962 963 /* 964 * Create a DL_DISABMULTI_REQ. 965 */ 966 mp = ill_create_dl(ill, DL_DISABMULTI_REQ, 967 sizeof (dl_disabmulti_req_t), &addrlen, &addroff); 968 if (!mp) 969 return (ENOMEM); 970 971 if (IN6_IS_ADDR_V4MAPPED(v6groupp)) { 972 ipaddr_t v4group; 973 974 IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group); 975 976 ill_multicast_mapping(ill, v4group, 977 mp->b_rptr + addroff, addrlen); 978 979 ip1dbg(("ip_ll_send_disabmulti_req: IPv4 %s on %s\n", 980 inet_ntop(AF_INET6, v6groupp, group_buf, 981 sizeof (group_buf)), 982 ill->ill_name)); 983 ill_dlpi_send(ill, mp); 984 } else { 985 ip1dbg(("ip_ll_send_disabmulti_req: IPv6 ndp_mcastreq %s on" 986 " %s\n", 987 inet_ntop(AF_INET6, v6groupp, group_buf, 988 sizeof (group_buf)), 989 ill->ill_name)); 990 return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp)); 991 } 992 return (0); 993 } 994 995 /* 996 * Send a multicast request to the driver for disabling multicast 997 * membership for v6group if appropriate. 998 */ 999 static int 1000 ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *v6group) 1001 { 1002 ill_t *ill = ipif->ipif_ill; 1003 1004 ASSERT(IAM_WRITER_IPIF(ipif)); 1005 1006 if (ill->ill_net_type != IRE_IF_RESOLVER || 1007 ipif->ipif_flags & IPIF_POINTOPOINT) { 1008 return (0); /* Must be IRE_IF_NORESOLVER */ 1009 } 1010 if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) { 1011 ip1dbg(("ip_ll_delmulti_v6: MULTI_BCAST\n")); 1012 return (0); 1013 } 1014 if (!ill->ill_dl_up) { 1015 /* 1016 * Nobody there. All multicast addresses will be re-joined 1017 * when we get the DL_BIND_ACK bringing the interface up. 1018 */ 1019 ip1dbg(("ip_ll_delmulti_v6: nobody up\n")); 1020 return (0); 1021 } 1022 return (ip_ll_send_disabmulti_req(ill, v6group)); 1023 } 1024 1025 /* 1026 * Make the driver pass up all multicast packets. NOTE: to keep callers 1027 * IPMP-unaware, if an IPMP ill is passed in, the ill_join_allmulti flag is 1028 * set on it (rather than the cast ill). 1029 */ 1030 int 1031 ill_join_allmulti(ill_t *ill) 1032 { 1033 mblk_t *promiscon_mp, *promiscoff_mp; 1034 uint32_t addrlen, addroff; 1035 ill_t *join_ill = ill; 1036 1037 ASSERT(IAM_WRITER_ILL(ill)); 1038 1039 if (!ill->ill_dl_up) { 1040 /* 1041 * Nobody there. All multicast addresses will be re-joined 1042 * when we get the DL_BIND_ACK bringing the interface up. 1043 */ 1044 return (0); 1045 } 1046 1047 /* 1048 * See comment in ip_ll_send_enabmulti_req(). 1049 */ 1050 if (IS_IPMP(ill) && (ill = ipmp_illgrp_cast_ill(ill->ill_grp)) == NULL) 1051 return (0); 1052 1053 ASSERT(!join_ill->ill_join_allmulti); 1054 1055 /* 1056 * Create a DL_PROMISCON_REQ message and send it directly to the DLPI 1057 * provider. We don't need to do this for certain media types for 1058 * which we never need to turn promiscuous mode on. While we're here, 1059 * pre-allocate a DL_PROMISCOFF_REQ message to make sure that 1060 * ill_leave_allmulti() will not fail due to low memory conditions. 1061 */ 1062 if ((ill->ill_net_type == IRE_IF_RESOLVER) && 1063 !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) { 1064 promiscon_mp = ill_create_dl(ill, DL_PROMISCON_REQ, 1065 sizeof (dl_promiscon_req_t), &addrlen, &addroff); 1066 promiscoff_mp = ill_create_dl(ill, DL_PROMISCOFF_REQ, 1067 sizeof (dl_promiscoff_req_t), &addrlen, &addroff); 1068 if (promiscon_mp == NULL || promiscoff_mp == NULL) { 1069 freemsg(promiscon_mp); 1070 freemsg(promiscoff_mp); 1071 return (ENOMEM); 1072 } 1073 ill->ill_promiscoff_mp = promiscoff_mp; 1074 ill_dlpi_send(ill, promiscon_mp); 1075 } 1076 1077 join_ill->ill_join_allmulti = B_TRUE; 1078 return (0); 1079 } 1080 1081 /* 1082 * Make the driver stop passing up all multicast packets 1083 */ 1084 void 1085 ill_leave_allmulti(ill_t *ill) 1086 { 1087 mblk_t *promiscoff_mp; 1088 ill_t *leave_ill = ill; 1089 1090 ASSERT(IAM_WRITER_ILL(ill)); 1091 1092 if (!ill->ill_dl_up) { 1093 /* 1094 * Nobody there. All multicast addresses will be re-joined 1095 * when we get the DL_BIND_ACK bringing the interface up. 1096 */ 1097 return; 1098 } 1099 1100 /* 1101 * See comment in ip_ll_send_enabmulti_req(). 1102 */ 1103 if (IS_IPMP(ill) && (ill = ipmp_illgrp_cast_ill(ill->ill_grp)) == NULL) 1104 return; 1105 1106 ASSERT(leave_ill->ill_join_allmulti); 1107 1108 /* 1109 * Create a DL_PROMISCOFF_REQ message and send it directly to 1110 * the DLPI provider. We don't need to do this for certain 1111 * media types for which we never need to turn promiscuous 1112 * mode on. 1113 */ 1114 if ((ill->ill_net_type == IRE_IF_RESOLVER) && 1115 !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) { 1116 promiscoff_mp = ill->ill_promiscoff_mp; 1117 ASSERT(promiscoff_mp != NULL); 1118 ill->ill_promiscoff_mp = NULL; 1119 ill_dlpi_send(ill, promiscoff_mp); 1120 } 1121 1122 leave_ill->ill_join_allmulti = B_FALSE; 1123 } 1124 1125 static ill_t * 1126 ipsq_enter_byifindex(uint_t ifindex, boolean_t isv6, ip_stack_t *ipst) 1127 { 1128 ill_t *ill; 1129 boolean_t in_ipsq; 1130 1131 ill = ill_lookup_on_ifindex(ifindex, isv6, NULL, NULL, NULL, NULL, 1132 ipst); 1133 if (ill != NULL) { 1134 if (!ill_waiter_inc(ill)) { 1135 ill_refrele(ill); 1136 return (NULL); 1137 } 1138 ill_refrele(ill); 1139 in_ipsq = ipsq_enter(ill, B_FALSE, NEW_OP); 1140 ill_waiter_dcr(ill); 1141 if (!in_ipsq) 1142 ill = NULL; 1143 } 1144 return (ill); 1145 } 1146 1147 int 1148 ip_join_allmulti(uint_t ifindex, boolean_t isv6, ip_stack_t *ipst) 1149 { 1150 ill_t *ill; 1151 int ret = 0; 1152 1153 if ((ill = ipsq_enter_byifindex(ifindex, isv6, ipst)) == NULL) 1154 return (ENODEV); 1155 1156 /* 1157 * The ip_addmulti*() functions won't allow IPMP underlying interfaces 1158 * to join allmulti since only the nominated underlying interface in 1159 * the group should receive multicast. We silently succeed to avoid 1160 * having to teach IPobs (currently the only caller of this routine) 1161 * to ignore failures in this case. 1162 */ 1163 if (IS_UNDER_IPMP(ill)) 1164 goto out; 1165 1166 if (isv6) { 1167 ret = ip_addmulti_v6(&ipv6_all_zeros, ill, ill->ill_zoneid, 1168 ILGSTAT_NONE, MODE_IS_EXCLUDE, NULL); 1169 } else { 1170 ret = ip_addmulti(INADDR_ANY, ill->ill_ipif, ILGSTAT_NONE, 1171 MODE_IS_EXCLUDE, NULL); 1172 } 1173 ill->ill_ipallmulti_cnt++; 1174 out: 1175 ipsq_exit(ill->ill_phyint->phyint_ipsq); 1176 return (ret); 1177 } 1178 1179 1180 int 1181 ip_leave_allmulti(uint_t ifindex, boolean_t isv6, ip_stack_t *ipst) 1182 { 1183 ill_t *ill; 1184 1185 if ((ill = ipsq_enter_byifindex(ifindex, isv6, ipst)) == NULL) 1186 return (ENODEV); 1187 1188 if (ill->ill_ipallmulti_cnt > 0) { 1189 if (isv6) { 1190 (void) ip_delmulti_v6(&ipv6_all_zeros, ill, 1191 ill->ill_zoneid, B_TRUE, B_TRUE); 1192 } else { 1193 (void) ip_delmulti(INADDR_ANY, ill->ill_ipif, B_TRUE, 1194 B_TRUE); 1195 } 1196 ill->ill_ipallmulti_cnt--; 1197 } 1198 ipsq_exit(ill->ill_phyint->phyint_ipsq); 1199 return (0); 1200 } 1201 1202 /* 1203 * Delete the allmulti memberships that were added as part of 1204 * ip_join_allmulti(). 1205 */ 1206 void 1207 ip_purge_allmulti(ill_t *ill) 1208 { 1209 ASSERT(IAM_WRITER_ILL(ill)); 1210 1211 for (; ill->ill_ipallmulti_cnt > 0; ill->ill_ipallmulti_cnt--) { 1212 if (ill->ill_isv6) { 1213 (void) ip_delmulti_v6(&ipv6_all_zeros, ill, 1214 ill->ill_zoneid, B_TRUE, B_TRUE); 1215 } else { 1216 (void) ip_delmulti(INADDR_ANY, ill->ill_ipif, B_TRUE, 1217 B_TRUE); 1218 } 1219 } 1220 } 1221 1222 /* 1223 * Copy mp_orig and pass it in as a local message. 1224 */ 1225 void 1226 ip_multicast_loopback(queue_t *q, ill_t *ill, mblk_t *mp_orig, int fanout_flags, 1227 zoneid_t zoneid) 1228 { 1229 mblk_t *mp; 1230 mblk_t *ipsec_mp; 1231 ipha_t *iph; 1232 ip_stack_t *ipst = ill->ill_ipst; 1233 1234 if (DB_TYPE(mp_orig) == M_DATA && 1235 ((ipha_t *)mp_orig->b_rptr)->ipha_protocol == IPPROTO_UDP) { 1236 uint_t hdrsz; 1237 1238 hdrsz = IPH_HDR_LENGTH((ipha_t *)mp_orig->b_rptr) + 1239 sizeof (udpha_t); 1240 ASSERT(MBLKL(mp_orig) >= hdrsz); 1241 1242 if (((mp = allocb(hdrsz, BPRI_MED)) != NULL) && 1243 (mp_orig = dupmsg(mp_orig)) != NULL) { 1244 cred_t *cr; 1245 1246 bcopy(mp_orig->b_rptr, mp->b_rptr, hdrsz); 1247 mp->b_wptr += hdrsz; 1248 mp->b_cont = mp_orig; 1249 mp_orig->b_rptr += hdrsz; 1250 if (is_system_labeled() && 1251 (cr = msg_getcred(mp_orig, NULL)) != NULL) 1252 mblk_setcred(mp, cr, NOPID); 1253 if (MBLKL(mp_orig) == 0) { 1254 mp->b_cont = mp_orig->b_cont; 1255 mp_orig->b_cont = NULL; 1256 freeb(mp_orig); 1257 } 1258 } else if (mp != NULL) { 1259 freeb(mp); 1260 mp = NULL; 1261 } 1262 } else { 1263 mp = ip_copymsg(mp_orig); /* No refcnt on ipsec_out netstack */ 1264 } 1265 1266 if (mp == NULL) 1267 return; 1268 if (DB_TYPE(mp) == M_CTL) { 1269 ipsec_mp = mp; 1270 mp = mp->b_cont; 1271 } else { 1272 ipsec_mp = mp; 1273 } 1274 1275 iph = (ipha_t *)mp->b_rptr; 1276 1277 /* 1278 * DTrace this as ip:::send. A blocked packet will fire the send 1279 * probe, but not the receive probe. 1280 */ 1281 DTRACE_IP7(send, mblk_t *, ipsec_mp, conn_t *, NULL, void_ip_t *, iph, 1282 __dtrace_ipsr_ill_t *, ill, ipha_t *, iph, ip6_t *, NULL, int, 1); 1283 1284 DTRACE_PROBE4(ip4__loopback__out__start, 1285 ill_t *, NULL, ill_t *, ill, 1286 ipha_t *, iph, mblk_t *, ipsec_mp); 1287 1288 FW_HOOKS(ipst->ips_ip4_loopback_out_event, 1289 ipst->ips_ipv4firewall_loopback_out, 1290 NULL, ill, iph, ipsec_mp, mp, HPE_MULTICAST, ipst); 1291 1292 DTRACE_PROBE1(ip4__loopback__out__end, mblk_t *, ipsec_mp); 1293 1294 if (ipsec_mp != NULL) 1295 ip_wput_local(q, ill, iph, ipsec_mp, NULL, 1296 fanout_flags, zoneid); 1297 } 1298 1299 /* 1300 * Create a DLPI message; for DL_{ENAB,DISAB}MULTI_REQ, room is left for 1301 * the hardware address. 1302 */ 1303 static mblk_t * 1304 ill_create_dl(ill_t *ill, uint32_t dl_primitive, uint32_t length, 1305 uint32_t *addr_lenp, uint32_t *addr_offp) 1306 { 1307 mblk_t *mp; 1308 uint32_t hw_addr_length; 1309 char *cp; 1310 uint32_t offset; 1311 uint32_t size; 1312 1313 *addr_lenp = *addr_offp = 0; 1314 1315 hw_addr_length = ill->ill_phys_addr_length; 1316 if (!hw_addr_length) { 1317 ip0dbg(("ip_create_dl: hw addr length = 0\n")); 1318 return (NULL); 1319 } 1320 1321 size = length; 1322 switch (dl_primitive) { 1323 case DL_ENABMULTI_REQ: 1324 case DL_DISABMULTI_REQ: 1325 size += hw_addr_length; 1326 break; 1327 case DL_PROMISCON_REQ: 1328 case DL_PROMISCOFF_REQ: 1329 break; 1330 default: 1331 return (NULL); 1332 } 1333 mp = allocb(size, BPRI_HI); 1334 if (!mp) 1335 return (NULL); 1336 mp->b_wptr += size; 1337 mp->b_datap->db_type = M_PROTO; 1338 1339 cp = (char *)mp->b_rptr; 1340 offset = length; 1341 1342 switch (dl_primitive) { 1343 case DL_ENABMULTI_REQ: { 1344 dl_enabmulti_req_t *dl = (dl_enabmulti_req_t *)cp; 1345 1346 dl->dl_primitive = dl_primitive; 1347 dl->dl_addr_offset = offset; 1348 *addr_lenp = dl->dl_addr_length = hw_addr_length; 1349 *addr_offp = offset; 1350 break; 1351 } 1352 case DL_DISABMULTI_REQ: { 1353 dl_disabmulti_req_t *dl = (dl_disabmulti_req_t *)cp; 1354 1355 dl->dl_primitive = dl_primitive; 1356 dl->dl_addr_offset = offset; 1357 *addr_lenp = dl->dl_addr_length = hw_addr_length; 1358 *addr_offp = offset; 1359 break; 1360 } 1361 case DL_PROMISCON_REQ: 1362 case DL_PROMISCOFF_REQ: { 1363 dl_promiscon_req_t *dl = (dl_promiscon_req_t *)cp; 1364 1365 dl->dl_primitive = dl_primitive; 1366 dl->dl_level = DL_PROMISC_MULTI; 1367 break; 1368 } 1369 } 1370 ip1dbg(("ill_create_dl: addr_len %d, addr_off %d\n", 1371 *addr_lenp, *addr_offp)); 1372 return (mp); 1373 } 1374 1375 /* 1376 * Rejoin any groups which have been explicitly joined by the application (we 1377 * left all explicitly joined groups as part of ill_leave_multicast() prior to 1378 * bringing the interface down). Note that because groups can be joined and 1379 * left while an interface is down, this may not be the same set of groups 1380 * that we left in ill_leave_multicast(). 1381 */ 1382 void 1383 ill_recover_multicast(ill_t *ill) 1384 { 1385 ilm_t *ilm; 1386 ipif_t *ipif = ill->ill_ipif; 1387 char addrbuf[INET6_ADDRSTRLEN]; 1388 1389 ASSERT(IAM_WRITER_ILL(ill)); 1390 1391 ill->ill_need_recover_multicast = 0; 1392 1393 ill_ilm_walker_hold(ill); 1394 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1395 /* 1396 * Check how many ipif's that have members in this group - 1397 * if more then one we make sure that this entry is first 1398 * in the list. 1399 */ 1400 if (ilm_numentries_v6(ill, &ilm->ilm_v6addr) > 1 && 1401 ilm_lookup_ill_v6(ill, &ilm->ilm_v6addr, B_TRUE, 1402 ALL_ZONES) != ilm) { 1403 continue; 1404 } 1405 1406 ip1dbg(("ill_recover_multicast: %s\n", inet_ntop(AF_INET6, 1407 &ilm->ilm_v6addr, addrbuf, sizeof (addrbuf)))); 1408 1409 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) { 1410 (void) ill_join_allmulti(ill); 1411 } else { 1412 if (ill->ill_isv6) 1413 mld_joingroup(ilm); 1414 else 1415 igmp_joingroup(ilm); 1416 1417 (void) ip_ll_addmulti_v6(ipif, &ilm->ilm_v6addr); 1418 } 1419 } 1420 ill_ilm_walker_rele(ill); 1421 1422 } 1423 1424 /* 1425 * The opposite of ill_recover_multicast() -- leaves all multicast groups 1426 * that were explicitly joined. 1427 */ 1428 void 1429 ill_leave_multicast(ill_t *ill) 1430 { 1431 ilm_t *ilm; 1432 ipif_t *ipif = ill->ill_ipif; 1433 char addrbuf[INET6_ADDRSTRLEN]; 1434 1435 ASSERT(IAM_WRITER_ILL(ill)); 1436 1437 ill->ill_need_recover_multicast = 1; 1438 1439 ill_ilm_walker_hold(ill); 1440 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1441 /* 1442 * Check how many ipif's that have members in this group - 1443 * if more then one we make sure that this entry is first 1444 * in the list. 1445 */ 1446 if (ilm_numentries_v6(ill, &ilm->ilm_v6addr) > 1 && 1447 ilm_lookup_ill_v6(ill, &ilm->ilm_v6addr, B_TRUE, 1448 ALL_ZONES) != ilm) { 1449 continue; 1450 } 1451 1452 ip1dbg(("ill_leave_multicast: %s\n", inet_ntop(AF_INET6, 1453 &ilm->ilm_v6addr, addrbuf, sizeof (addrbuf)))); 1454 1455 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) { 1456 ill_leave_allmulti(ill); 1457 } else { 1458 if (ill->ill_isv6) 1459 mld_leavegroup(ilm); 1460 else 1461 igmp_leavegroup(ilm); 1462 1463 (void) ip_ll_delmulti_v6(ipif, &ilm->ilm_v6addr); 1464 } 1465 } 1466 ill_ilm_walker_rele(ill); 1467 } 1468 1469 /* Find an ilm for matching the ill */ 1470 ilm_t * 1471 ilm_lookup_ill(ill_t *ill, ipaddr_t group, zoneid_t zoneid) 1472 { 1473 in6_addr_t v6group; 1474 1475 /* 1476 * INADDR_ANY is represented as the IPv6 unspecified addr. 1477 */ 1478 if (group == INADDR_ANY) 1479 v6group = ipv6_all_zeros; 1480 else 1481 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 1482 1483 return (ilm_lookup_ill_v6(ill, &v6group, B_TRUE, zoneid)); 1484 } 1485 1486 /* 1487 * Find an ilm for address `v6group' on `ill' and zone `zoneid' (which may be 1488 * ALL_ZONES). In general, if `ill' is in an IPMP group, we will match 1489 * against any ill in the group. However, if `restrict_solicited' is set, 1490 * then specifically for IPv6 solicited-node multicast, the match will be 1491 * restricted to the specified `ill'. 1492 */ 1493 ilm_t * 1494 ilm_lookup_ill_v6(ill_t *ill, const in6_addr_t *v6group, 1495 boolean_t restrict_solicited, zoneid_t zoneid) 1496 { 1497 ilm_t *ilm; 1498 ilm_walker_t ilw; 1499 boolean_t restrict_ill = B_FALSE; 1500 1501 /* 1502 * In general, underlying interfaces cannot have multicast memberships 1503 * and thus lookups always match across the illgrp. However, we must 1504 * allow IPv6 solicited-node multicast memberships on underlying 1505 * interfaces, and thus an IPMP meta-interface and one of its 1506 * underlying ills may have the same solicited-node multicast address. 1507 * In that case, we need to restrict the lookup to the requested ill. 1508 * However, we may receive packets on an underlying interface that 1509 * are for the corresponding IPMP interface's solicited-node multicast 1510 * address, and thus in that case we need to match across the group -- 1511 * hence the unfortunate `restrict_solicited' argument. 1512 */ 1513 if (IN6_IS_ADDR_MC_SOLICITEDNODE(v6group) && restrict_solicited) 1514 restrict_ill = (IS_IPMP(ill) || IS_UNDER_IPMP(ill)); 1515 1516 ilm = ilm_walker_start(&ilw, ill); 1517 for (; ilm != NULL; ilm = ilm_walker_step(&ilw, ilm)) { 1518 if (!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group)) 1519 continue; 1520 if (zoneid != ALL_ZONES && zoneid != ilm->ilm_zoneid) 1521 continue; 1522 if (!restrict_ill || ill == (ill->ill_isv6 ? 1523 ilm->ilm_ill : ilm->ilm_ipif->ipif_ill)) { 1524 break; 1525 } 1526 } 1527 ilm_walker_finish(&ilw); 1528 return (ilm); 1529 } 1530 1531 /* 1532 * Find an ilm for the ipif. Only needed for IPv4 which does 1533 * ipif specific socket options. 1534 */ 1535 ilm_t * 1536 ilm_lookup_ipif(ipif_t *ipif, ipaddr_t group) 1537 { 1538 ilm_t *ilm; 1539 ilm_walker_t ilw; 1540 1541 ilm = ilm_walker_start(&ilw, ipif->ipif_ill); 1542 for (; ilm != NULL; ilm = ilm_walker_step(&ilw, ilm)) { 1543 if (ilm->ilm_ipif == ipif && ilm->ilm_addr == group) 1544 break; 1545 } 1546 ilm_walker_finish(&ilw); 1547 return (ilm); 1548 } 1549 1550 /* 1551 * How many members on this ill? 1552 */ 1553 int 1554 ilm_numentries_v6(ill_t *ill, const in6_addr_t *v6group) 1555 { 1556 ilm_t *ilm; 1557 int i = 0; 1558 1559 mutex_enter(&ill->ill_lock); 1560 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1561 if (ilm->ilm_flags & ILM_DELETED) 1562 continue; 1563 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group)) { 1564 i++; 1565 } 1566 } 1567 mutex_exit(&ill->ill_lock); 1568 return (i); 1569 } 1570 1571 /* Caller guarantees that the group is not already on the list */ 1572 static ilm_t * 1573 ilm_add_v6(ipif_t *ipif, const in6_addr_t *v6group, ilg_stat_t ilgstat, 1574 mcast_record_t ilg_fmode, slist_t *ilg_flist, zoneid_t zoneid) 1575 { 1576 ill_t *ill = ipif->ipif_ill; 1577 ilm_t *ilm; 1578 ilm_t *ilm_cur; 1579 ilm_t **ilm_ptpn; 1580 1581 ASSERT(IAM_WRITER_IPIF(ipif)); 1582 1583 ilm = GETSTRUCT(ilm_t, 1); 1584 if (ilm == NULL) 1585 return (NULL); 1586 if (ilgstat != ILGSTAT_NONE && !SLIST_IS_EMPTY(ilg_flist)) { 1587 ilm->ilm_filter = l_alloc(); 1588 if (ilm->ilm_filter == NULL) { 1589 mi_free(ilm); 1590 return (NULL); 1591 } 1592 } 1593 ilm->ilm_v6addr = *v6group; 1594 ilm->ilm_refcnt = 1; 1595 ilm->ilm_zoneid = zoneid; 1596 ilm->ilm_timer = INFINITY; 1597 ilm->ilm_rtx.rtx_timer = INFINITY; 1598 1599 /* 1600 * IPv4 Multicast groups are joined using ipif. 1601 * IPv6 Multicast groups are joined using ill. 1602 */ 1603 if (ill->ill_isv6) { 1604 ilm->ilm_ill = ill; 1605 ilm->ilm_ipif = NULL; 1606 DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ill, 1607 (char *), "ilm", (void *), ilm); 1608 ill->ill_ilm_cnt++; 1609 } else { 1610 ASSERT(ilm->ilm_zoneid == ipif->ipif_zoneid); 1611 ilm->ilm_ipif = ipif; 1612 ilm->ilm_ill = NULL; 1613 DTRACE_PROBE3(ipif__incr__cnt, (ipif_t *), ipif, 1614 (char *), "ilm", (void *), ilm); 1615 ipif->ipif_ilm_cnt++; 1616 } 1617 1618 ASSERT(ill->ill_ipst); 1619 ilm->ilm_ipst = ill->ill_ipst; /* No netstack_hold */ 1620 1621 ASSERT(!(ipif->ipif_state_flags & IPIF_CONDEMNED)); 1622 ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED)); 1623 1624 /* 1625 * Grab lock to give consistent view to readers 1626 */ 1627 mutex_enter(&ill->ill_lock); 1628 /* 1629 * All ilms in the same zone are contiguous in the ill_ilm list. 1630 * The loops in ip_proto_input() and ip_wput_local() use this to avoid 1631 * sending duplicates up when two applications in the same zone join the 1632 * same group on different logical interfaces. 1633 */ 1634 ilm_cur = ill->ill_ilm; 1635 ilm_ptpn = &ill->ill_ilm; 1636 while (ilm_cur != NULL && ilm_cur->ilm_zoneid != ilm->ilm_zoneid) { 1637 ilm_ptpn = &ilm_cur->ilm_next; 1638 ilm_cur = ilm_cur->ilm_next; 1639 } 1640 ilm->ilm_next = ilm_cur; 1641 *ilm_ptpn = ilm; 1642 1643 /* 1644 * If we have an associated ilg, use its filter state; if not, 1645 * default to (EXCLUDE, NULL) and set no_ilg_cnt to track this. 1646 */ 1647 if (ilgstat != ILGSTAT_NONE) { 1648 if (!SLIST_IS_EMPTY(ilg_flist)) 1649 l_copy(ilg_flist, ilm->ilm_filter); 1650 ilm->ilm_fmode = ilg_fmode; 1651 } else { 1652 ilm->ilm_no_ilg_cnt = 1; 1653 ilm->ilm_fmode = MODE_IS_EXCLUDE; 1654 } 1655 1656 mutex_exit(&ill->ill_lock); 1657 return (ilm); 1658 } 1659 1660 void 1661 ilm_inactive(ilm_t *ilm) 1662 { 1663 FREE_SLIST(ilm->ilm_filter); 1664 FREE_SLIST(ilm->ilm_pendsrcs); 1665 FREE_SLIST(ilm->ilm_rtx.rtx_allow); 1666 FREE_SLIST(ilm->ilm_rtx.rtx_block); 1667 ilm->ilm_ipst = NULL; 1668 mi_free((char *)ilm); 1669 } 1670 1671 void 1672 ilm_walker_cleanup(ill_t *ill) 1673 { 1674 ilm_t **ilmp; 1675 ilm_t *ilm; 1676 boolean_t need_wakeup = B_FALSE; 1677 1678 ASSERT(MUTEX_HELD(&ill->ill_lock)); 1679 ASSERT(ill->ill_ilm_walker_cnt == 0); 1680 1681 ilmp = &ill->ill_ilm; 1682 while (*ilmp != NULL) { 1683 if ((*ilmp)->ilm_flags & ILM_DELETED) { 1684 ilm = *ilmp; 1685 *ilmp = ilm->ilm_next; 1686 /* 1687 * check if there are any pending FREE or unplumb 1688 * operations that need to be restarted. 1689 */ 1690 if (ilm->ilm_ipif != NULL) { 1691 /* 1692 * IPv4 ilms hold a ref on the ipif. 1693 */ 1694 DTRACE_PROBE3(ipif__decr__cnt, 1695 (ipif_t *), ilm->ilm_ipif, 1696 (char *), "ilm", (void *), ilm); 1697 ilm->ilm_ipif->ipif_ilm_cnt--; 1698 if (IPIF_FREE_OK(ilm->ilm_ipif)) 1699 need_wakeup = B_TRUE; 1700 } else { 1701 /* 1702 * IPv6 ilms hold a ref on the ill. 1703 */ 1704 ASSERT(ilm->ilm_ill == ill); 1705 DTRACE_PROBE3(ill__decr__cnt, 1706 (ill_t *), ill, 1707 (char *), "ilm", (void *), ilm); 1708 ASSERT(ill->ill_ilm_cnt > 0); 1709 ill->ill_ilm_cnt--; 1710 if (ILL_FREE_OK(ill)) 1711 need_wakeup = B_TRUE; 1712 } 1713 ilm_inactive(ilm); /* frees ilm */ 1714 } else { 1715 ilmp = &(*ilmp)->ilm_next; 1716 } 1717 } 1718 ill->ill_ilm_cleanup_reqd = 0; 1719 if (need_wakeup) 1720 ipif_ill_refrele_tail(ill); 1721 else 1722 mutex_exit(&ill->ill_lock); 1723 } 1724 1725 /* 1726 * Unlink ilm and free it. 1727 */ 1728 static void 1729 ilm_delete(ilm_t *ilm) 1730 { 1731 ill_t *ill; 1732 ilm_t **ilmp; 1733 boolean_t need_wakeup; 1734 1735 1736 if (ilm->ilm_ipif != NULL) { 1737 ASSERT(IAM_WRITER_IPIF(ilm->ilm_ipif)); 1738 ASSERT(ilm->ilm_ill == NULL); 1739 ill = ilm->ilm_ipif->ipif_ill; 1740 ASSERT(!ill->ill_isv6); 1741 } else { 1742 ASSERT(IAM_WRITER_ILL(ilm->ilm_ill)); 1743 ASSERT(ilm->ilm_ipif == NULL); 1744 ill = ilm->ilm_ill; 1745 ASSERT(ill->ill_isv6); 1746 } 1747 /* 1748 * Delete under lock protection so that readers don't stumble 1749 * on bad ilm_next 1750 */ 1751 mutex_enter(&ill->ill_lock); 1752 if (ill->ill_ilm_walker_cnt != 0) { 1753 ilm->ilm_flags |= ILM_DELETED; 1754 ill->ill_ilm_cleanup_reqd = 1; 1755 mutex_exit(&ill->ill_lock); 1756 return; 1757 } 1758 1759 for (ilmp = &ill->ill_ilm; *ilmp != ilm; ilmp = &(*ilmp)->ilm_next) 1760 ; 1761 *ilmp = ilm->ilm_next; 1762 1763 /* 1764 * if we are the last reference to the ipif (for IPv4 ilms) 1765 * or the ill (for IPv6 ilms), we may need to wakeup any 1766 * pending FREE or unplumb operations. 1767 */ 1768 need_wakeup = B_FALSE; 1769 if (ilm->ilm_ipif != NULL) { 1770 DTRACE_PROBE3(ipif__decr__cnt, (ipif_t *), ilm->ilm_ipif, 1771 (char *), "ilm", (void *), ilm); 1772 ilm->ilm_ipif->ipif_ilm_cnt--; 1773 if (IPIF_FREE_OK(ilm->ilm_ipif)) 1774 need_wakeup = B_TRUE; 1775 } else { 1776 DTRACE_PROBE3(ill__decr__cnt, (ill_t *), ill, 1777 (char *), "ilm", (void *), ilm); 1778 ASSERT(ill->ill_ilm_cnt > 0); 1779 ill->ill_ilm_cnt--; 1780 if (ILL_FREE_OK(ill)) 1781 need_wakeup = B_TRUE; 1782 } 1783 1784 ilm_inactive(ilm); /* frees this ilm */ 1785 1786 if (need_wakeup) { 1787 /* drops ill lock */ 1788 ipif_ill_refrele_tail(ill); 1789 } else { 1790 mutex_exit(&ill->ill_lock); 1791 } 1792 } 1793 1794 /* Increment the ILM walker count for `ill' */ 1795 static void 1796 ill_ilm_walker_hold(ill_t *ill) 1797 { 1798 mutex_enter(&ill->ill_lock); 1799 ill->ill_ilm_walker_cnt++; 1800 mutex_exit(&ill->ill_lock); 1801 } 1802 1803 /* Decrement the ILM walker count for `ill' */ 1804 static void 1805 ill_ilm_walker_rele(ill_t *ill) 1806 { 1807 mutex_enter(&ill->ill_lock); 1808 ill->ill_ilm_walker_cnt--; 1809 if (ill->ill_ilm_walker_cnt == 0 && ill->ill_ilm_cleanup_reqd) 1810 ilm_walker_cleanup(ill); /* drops ill_lock */ 1811 else 1812 mutex_exit(&ill->ill_lock); 1813 } 1814 1815 /* 1816 * Start walking the ILMs associated with `ill'; the first ILM in the walk 1817 * (if any) is returned. State associated with the walk is stored in `ilw'. 1818 * Note that walks associated with interfaces under IPMP also walk the ILMs 1819 * on the associated IPMP interface; this is handled transparently to callers 1820 * via ilm_walker_step(). (Usually with IPMP all ILMs will be on the IPMP 1821 * interface; the only exception is to support IPv6 test addresses, which 1822 * require ILMs for their associated solicited-node multicast addresses.) 1823 */ 1824 ilm_t * 1825 ilm_walker_start(ilm_walker_t *ilw, ill_t *ill) 1826 { 1827 ilw->ilw_ill = ill; 1828 if (IS_UNDER_IPMP(ill)) 1829 ilw->ilw_ipmp_ill = ipmp_ill_hold_ipmp_ill(ill); 1830 else 1831 ilw->ilw_ipmp_ill = NULL; 1832 1833 ill_ilm_walker_hold(ill); 1834 if (ilw->ilw_ipmp_ill != NULL) 1835 ill_ilm_walker_hold(ilw->ilw_ipmp_ill); 1836 1837 if (ilw->ilw_ipmp_ill != NULL && ilw->ilw_ipmp_ill->ill_ilm != NULL) 1838 ilw->ilw_walk_ill = ilw->ilw_ipmp_ill; 1839 else 1840 ilw->ilw_walk_ill = ilw->ilw_ill; 1841 1842 return (ilm_walker_step(ilw, NULL)); 1843 } 1844 1845 /* 1846 * Helper function for ilm_walker_step() that returns the next ILM 1847 * associated with `ilw', regardless of whether it's deleted. 1848 */ 1849 static ilm_t * 1850 ilm_walker_step_all(ilm_walker_t *ilw, ilm_t *ilm) 1851 { 1852 if (ilm == NULL) 1853 return (ilw->ilw_walk_ill->ill_ilm); 1854 1855 if (ilm->ilm_next != NULL) 1856 return (ilm->ilm_next); 1857 1858 if (ilw->ilw_ipmp_ill != NULL && IS_IPMP(ilw->ilw_walk_ill)) { 1859 ilw->ilw_walk_ill = ilw->ilw_ill; 1860 /* 1861 * It's possible that ilw_ill left the group during our walk, 1862 * so we can't ASSERT() that it's under IPMP. Callers that 1863 * care will be writer on the IPSQ anyway. 1864 */ 1865 return (ilw->ilw_walk_ill->ill_ilm); 1866 } 1867 return (NULL); 1868 } 1869 1870 /* 1871 * Step to the next ILM associated with `ilw'. 1872 */ 1873 ilm_t * 1874 ilm_walker_step(ilm_walker_t *ilw, ilm_t *ilm) 1875 { 1876 while ((ilm = ilm_walker_step_all(ilw, ilm)) != NULL) { 1877 if (!(ilm->ilm_flags & ILM_DELETED)) 1878 break; 1879 } 1880 return (ilm); 1881 } 1882 1883 /* 1884 * Finish the ILM walk associated with `ilw'. 1885 */ 1886 void 1887 ilm_walker_finish(ilm_walker_t *ilw) 1888 { 1889 ill_ilm_walker_rele(ilw->ilw_ill); 1890 if (ilw->ilw_ipmp_ill != NULL) { 1891 ill_ilm_walker_rele(ilw->ilw_ipmp_ill); 1892 ill_refrele(ilw->ilw_ipmp_ill); 1893 } 1894 bzero(&ilw, sizeof (ilw)); 1895 } 1896 1897 /* 1898 * Looks up the appropriate ipif given a v4 multicast group and interface 1899 * address. On success, returns 0, with *ipifpp pointing to the found 1900 * struct. On failure, returns an errno and *ipifpp is NULL. 1901 */ 1902 int 1903 ip_opt_check(conn_t *connp, ipaddr_t group, ipaddr_t src, ipaddr_t ifaddr, 1904 uint_t *ifindexp, mblk_t *first_mp, ipsq_func_t func, ipif_t **ipifpp) 1905 { 1906 ipif_t *ipif; 1907 int err = 0; 1908 zoneid_t zoneid; 1909 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1910 1911 if (!CLASSD(group) || CLASSD(src)) { 1912 return (EINVAL); 1913 } 1914 *ipifpp = NULL; 1915 1916 zoneid = IPCL_ZONEID(connp); 1917 1918 ASSERT(!(ifaddr != INADDR_ANY && ifindexp != NULL && *ifindexp != 0)); 1919 if (ifaddr != INADDR_ANY) { 1920 ipif = ipif_lookup_addr(ifaddr, NULL, zoneid, 1921 CONNP_TO_WQ(connp), first_mp, func, &err, ipst); 1922 if (err != 0 && err != EINPROGRESS) 1923 err = EADDRNOTAVAIL; 1924 } else if (ifindexp != NULL && *ifindexp != 0) { 1925 ipif = ipif_lookup_on_ifindex(*ifindexp, B_FALSE, zoneid, 1926 CONNP_TO_WQ(connp), first_mp, func, &err, ipst); 1927 } else { 1928 ipif = ipif_lookup_group(group, zoneid, ipst); 1929 if (ipif == NULL) 1930 return (EADDRNOTAVAIL); 1931 } 1932 if (ipif == NULL) 1933 return (err); 1934 1935 *ipifpp = ipif; 1936 return (0); 1937 } 1938 1939 /* 1940 * Looks up the appropriate ill (or ipif if v4mapped) given an interface 1941 * index and IPv6 multicast group. On success, returns 0, with *illpp (or 1942 * *ipifpp if v4mapped) pointing to the found struct. On failure, returns 1943 * an errno and *illpp and *ipifpp are undefined. 1944 */ 1945 int 1946 ip_opt_check_v6(conn_t *connp, const in6_addr_t *v6group, ipaddr_t *v4group, 1947 const in6_addr_t *v6src, ipaddr_t *v4src, boolean_t *isv6, int ifindex, 1948 mblk_t *first_mp, ipsq_func_t func, ill_t **illpp, ipif_t **ipifpp) 1949 { 1950 boolean_t src_unspec; 1951 ill_t *ill = NULL; 1952 ipif_t *ipif = NULL; 1953 int err; 1954 zoneid_t zoneid = connp->conn_zoneid; 1955 queue_t *wq = CONNP_TO_WQ(connp); 1956 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1957 1958 src_unspec = IN6_IS_ADDR_UNSPECIFIED(v6src); 1959 1960 if (IN6_IS_ADDR_V4MAPPED(v6group)) { 1961 if (!IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec) 1962 return (EINVAL); 1963 IN6_V4MAPPED_TO_IPADDR(v6group, *v4group); 1964 if (src_unspec) { 1965 *v4src = INADDR_ANY; 1966 } else { 1967 IN6_V4MAPPED_TO_IPADDR(v6src, *v4src); 1968 } 1969 if (!CLASSD(*v4group) || CLASSD(*v4src)) 1970 return (EINVAL); 1971 *ipifpp = NULL; 1972 *isv6 = B_FALSE; 1973 } else { 1974 if (IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec) 1975 return (EINVAL); 1976 if (!IN6_IS_ADDR_MULTICAST(v6group) || 1977 IN6_IS_ADDR_MULTICAST(v6src)) { 1978 return (EINVAL); 1979 } 1980 *illpp = NULL; 1981 *isv6 = B_TRUE; 1982 } 1983 1984 if (ifindex == 0) { 1985 if (*isv6) 1986 ill = ill_lookup_group_v6(v6group, zoneid, ipst); 1987 else 1988 ipif = ipif_lookup_group(*v4group, zoneid, ipst); 1989 if (ill == NULL && ipif == NULL) 1990 return (EADDRNOTAVAIL); 1991 } else { 1992 if (*isv6) { 1993 ill = ill_lookup_on_ifindex(ifindex, B_TRUE, 1994 wq, first_mp, func, &err, ipst); 1995 if (ill != NULL && 1996 !ipif_lookup_zoneid(ill, zoneid, 0, NULL)) { 1997 ill_refrele(ill); 1998 ill = NULL; 1999 err = EADDRNOTAVAIL; 2000 } 2001 } else { 2002 ipif = ipif_lookup_on_ifindex(ifindex, B_FALSE, 2003 zoneid, wq, first_mp, func, &err, ipst); 2004 } 2005 if (ill == NULL && ipif == NULL) 2006 return (err); 2007 } 2008 2009 *ipifpp = ipif; 2010 *illpp = ill; 2011 return (0); 2012 } 2013 2014 static int 2015 ip_get_srcfilter(conn_t *connp, struct group_filter *gf, 2016 struct ip_msfilter *imsf, ipaddr_t grp, ipif_t *ipif, boolean_t isv4mapped) 2017 { 2018 ilg_t *ilg; 2019 int i, numsrc, fmode, outsrcs; 2020 struct sockaddr_in *sin; 2021 struct sockaddr_in6 *sin6; 2022 struct in_addr *addrp; 2023 slist_t *fp; 2024 boolean_t is_v4only_api; 2025 2026 mutex_enter(&connp->conn_lock); 2027 2028 ilg = ilg_lookup_ipif(connp, grp, ipif); 2029 if (ilg == NULL) { 2030 mutex_exit(&connp->conn_lock); 2031 return (EADDRNOTAVAIL); 2032 } 2033 2034 if (gf == NULL) { 2035 ASSERT(imsf != NULL); 2036 ASSERT(!isv4mapped); 2037 is_v4only_api = B_TRUE; 2038 outsrcs = imsf->imsf_numsrc; 2039 } else { 2040 ASSERT(imsf == NULL); 2041 is_v4only_api = B_FALSE; 2042 outsrcs = gf->gf_numsrc; 2043 } 2044 2045 /* 2046 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 2047 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 2048 * So we need to translate here. 2049 */ 2050 fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ? 2051 MCAST_INCLUDE : MCAST_EXCLUDE; 2052 if ((fp = ilg->ilg_filter) == NULL) { 2053 numsrc = 0; 2054 } else { 2055 for (i = 0; i < outsrcs; i++) { 2056 if (i == fp->sl_numsrc) 2057 break; 2058 if (isv4mapped) { 2059 sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i]; 2060 sin6->sin6_family = AF_INET6; 2061 sin6->sin6_addr = fp->sl_addr[i]; 2062 } else { 2063 if (is_v4only_api) { 2064 addrp = &imsf->imsf_slist[i]; 2065 } else { 2066 sin = (struct sockaddr_in *) 2067 &gf->gf_slist[i]; 2068 sin->sin_family = AF_INET; 2069 addrp = &sin->sin_addr; 2070 } 2071 IN6_V4MAPPED_TO_INADDR(&fp->sl_addr[i], addrp); 2072 } 2073 } 2074 numsrc = fp->sl_numsrc; 2075 } 2076 2077 if (is_v4only_api) { 2078 imsf->imsf_numsrc = numsrc; 2079 imsf->imsf_fmode = fmode; 2080 } else { 2081 gf->gf_numsrc = numsrc; 2082 gf->gf_fmode = fmode; 2083 } 2084 2085 mutex_exit(&connp->conn_lock); 2086 2087 return (0); 2088 } 2089 2090 static int 2091 ip_get_srcfilter_v6(conn_t *connp, struct group_filter *gf, 2092 const struct in6_addr *grp, ill_t *ill) 2093 { 2094 ilg_t *ilg; 2095 int i; 2096 struct sockaddr_storage *sl; 2097 struct sockaddr_in6 *sin6; 2098 slist_t *fp; 2099 2100 mutex_enter(&connp->conn_lock); 2101 2102 ilg = ilg_lookup_ill_v6(connp, grp, ill); 2103 if (ilg == NULL) { 2104 mutex_exit(&connp->conn_lock); 2105 return (EADDRNOTAVAIL); 2106 } 2107 2108 /* 2109 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 2110 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 2111 * So we need to translate here. 2112 */ 2113 gf->gf_fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ? 2114 MCAST_INCLUDE : MCAST_EXCLUDE; 2115 if ((fp = ilg->ilg_filter) == NULL) { 2116 gf->gf_numsrc = 0; 2117 } else { 2118 for (i = 0, sl = gf->gf_slist; i < gf->gf_numsrc; i++, sl++) { 2119 if (i == fp->sl_numsrc) 2120 break; 2121 sin6 = (struct sockaddr_in6 *)sl; 2122 sin6->sin6_family = AF_INET6; 2123 sin6->sin6_addr = fp->sl_addr[i]; 2124 } 2125 gf->gf_numsrc = fp->sl_numsrc; 2126 } 2127 2128 mutex_exit(&connp->conn_lock); 2129 2130 return (0); 2131 } 2132 2133 static int 2134 ip_set_srcfilter(conn_t *connp, struct group_filter *gf, 2135 struct ip_msfilter *imsf, ipaddr_t grp, ipif_t *ipif, boolean_t isv4mapped) 2136 { 2137 ilg_t *ilg; 2138 int i, err, infmode, new_fmode; 2139 uint_t insrcs; 2140 struct sockaddr_in *sin; 2141 struct sockaddr_in6 *sin6; 2142 struct in_addr *addrp; 2143 slist_t *orig_filter = NULL; 2144 slist_t *new_filter = NULL; 2145 mcast_record_t orig_fmode; 2146 boolean_t leave_grp, is_v4only_api; 2147 ilg_stat_t ilgstat; 2148 2149 if (gf == NULL) { 2150 ASSERT(imsf != NULL); 2151 ASSERT(!isv4mapped); 2152 is_v4only_api = B_TRUE; 2153 insrcs = imsf->imsf_numsrc; 2154 infmode = imsf->imsf_fmode; 2155 } else { 2156 ASSERT(imsf == NULL); 2157 is_v4only_api = B_FALSE; 2158 insrcs = gf->gf_numsrc; 2159 infmode = gf->gf_fmode; 2160 } 2161 2162 /* Make sure we can handle the source list */ 2163 if (insrcs > MAX_FILTER_SIZE) 2164 return (ENOBUFS); 2165 2166 /* 2167 * setting the filter to (INCLUDE, NULL) is treated 2168 * as a request to leave the group. 2169 */ 2170 leave_grp = (infmode == MCAST_INCLUDE && insrcs == 0); 2171 2172 ASSERT(IAM_WRITER_IPIF(ipif)); 2173 2174 mutex_enter(&connp->conn_lock); 2175 2176 ilg = ilg_lookup_ipif(connp, grp, ipif); 2177 if (ilg == NULL) { 2178 /* 2179 * if the request was actually to leave, and we 2180 * didn't find an ilg, there's nothing to do. 2181 */ 2182 if (!leave_grp) 2183 ilg = conn_ilg_alloc(connp, &err); 2184 if (leave_grp || ilg == NULL) { 2185 mutex_exit(&connp->conn_lock); 2186 return (leave_grp ? 0 : err); 2187 } 2188 ilgstat = ILGSTAT_NEW; 2189 IN6_IPADDR_TO_V4MAPPED(grp, &ilg->ilg_v6group); 2190 ilg->ilg_ipif = ipif; 2191 ilg->ilg_ill = NULL; 2192 } else if (leave_grp) { 2193 ilg_delete(connp, ilg, NULL); 2194 mutex_exit(&connp->conn_lock); 2195 (void) ip_delmulti(grp, ipif, B_FALSE, B_TRUE); 2196 return (0); 2197 } else { 2198 ilgstat = ILGSTAT_CHANGE; 2199 /* Preserve existing state in case ip_addmulti() fails */ 2200 orig_fmode = ilg->ilg_fmode; 2201 if (ilg->ilg_filter == NULL) { 2202 orig_filter = NULL; 2203 } else { 2204 orig_filter = l_alloc_copy(ilg->ilg_filter); 2205 if (orig_filter == NULL) { 2206 mutex_exit(&connp->conn_lock); 2207 return (ENOMEM); 2208 } 2209 } 2210 } 2211 2212 /* 2213 * Alloc buffer to copy new state into (see below) before 2214 * we make any changes, so we can bail if it fails. 2215 */ 2216 if ((new_filter = l_alloc()) == NULL) { 2217 mutex_exit(&connp->conn_lock); 2218 err = ENOMEM; 2219 goto free_and_exit; 2220 } 2221 2222 if (insrcs == 0) { 2223 CLEAR_SLIST(ilg->ilg_filter); 2224 } else { 2225 slist_t *fp; 2226 if (ilg->ilg_filter == NULL) { 2227 fp = l_alloc(); 2228 if (fp == NULL) { 2229 if (ilgstat == ILGSTAT_NEW) 2230 ilg_delete(connp, ilg, NULL); 2231 mutex_exit(&connp->conn_lock); 2232 err = ENOMEM; 2233 goto free_and_exit; 2234 } 2235 } else { 2236 fp = ilg->ilg_filter; 2237 } 2238 for (i = 0; i < insrcs; i++) { 2239 if (isv4mapped) { 2240 sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i]; 2241 fp->sl_addr[i] = sin6->sin6_addr; 2242 } else { 2243 if (is_v4only_api) { 2244 addrp = &imsf->imsf_slist[i]; 2245 } else { 2246 sin = (struct sockaddr_in *) 2247 &gf->gf_slist[i]; 2248 addrp = &sin->sin_addr; 2249 } 2250 IN6_INADDR_TO_V4MAPPED(addrp, &fp->sl_addr[i]); 2251 } 2252 } 2253 fp->sl_numsrc = insrcs; 2254 ilg->ilg_filter = fp; 2255 } 2256 /* 2257 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 2258 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 2259 * So we need to translate here. 2260 */ 2261 ilg->ilg_fmode = (infmode == MCAST_INCLUDE) ? 2262 MODE_IS_INCLUDE : MODE_IS_EXCLUDE; 2263 2264 /* 2265 * Save copy of ilg's filter state to pass to other functions, 2266 * so we can release conn_lock now. 2267 */ 2268 new_fmode = ilg->ilg_fmode; 2269 l_copy(ilg->ilg_filter, new_filter); 2270 2271 mutex_exit(&connp->conn_lock); 2272 2273 err = ip_addmulti(grp, ipif, ilgstat, new_fmode, new_filter); 2274 if (err != 0) { 2275 /* 2276 * Restore the original filter state, or delete the 2277 * newly-created ilg. We need to look up the ilg 2278 * again, though, since we've not been holding the 2279 * conn_lock. 2280 */ 2281 mutex_enter(&connp->conn_lock); 2282 ilg = ilg_lookup_ipif(connp, grp, ipif); 2283 ASSERT(ilg != NULL); 2284 if (ilgstat == ILGSTAT_NEW) { 2285 ilg_delete(connp, ilg, NULL); 2286 } else { 2287 ilg->ilg_fmode = orig_fmode; 2288 if (SLIST_IS_EMPTY(orig_filter)) { 2289 CLEAR_SLIST(ilg->ilg_filter); 2290 } else { 2291 /* 2292 * We didn't free the filter, even if we 2293 * were trying to make the source list empty; 2294 * so if orig_filter isn't empty, the ilg 2295 * must still have a filter alloc'd. 2296 */ 2297 l_copy(orig_filter, ilg->ilg_filter); 2298 } 2299 } 2300 mutex_exit(&connp->conn_lock); 2301 } 2302 2303 free_and_exit: 2304 l_free(orig_filter); 2305 l_free(new_filter); 2306 2307 return (err); 2308 } 2309 2310 static int 2311 ip_set_srcfilter_v6(conn_t *connp, struct group_filter *gf, 2312 const struct in6_addr *grp, ill_t *ill) 2313 { 2314 ilg_t *ilg; 2315 int i, orig_fmode, new_fmode, err; 2316 slist_t *orig_filter = NULL; 2317 slist_t *new_filter = NULL; 2318 struct sockaddr_storage *sl; 2319 struct sockaddr_in6 *sin6; 2320 boolean_t leave_grp; 2321 ilg_stat_t ilgstat; 2322 2323 /* Make sure we can handle the source list */ 2324 if (gf->gf_numsrc > MAX_FILTER_SIZE) 2325 return (ENOBUFS); 2326 2327 /* 2328 * setting the filter to (INCLUDE, NULL) is treated 2329 * as a request to leave the group. 2330 */ 2331 leave_grp = (gf->gf_fmode == MCAST_INCLUDE && gf->gf_numsrc == 0); 2332 2333 ASSERT(IAM_WRITER_ILL(ill)); 2334 2335 mutex_enter(&connp->conn_lock); 2336 ilg = ilg_lookup_ill_v6(connp, grp, ill); 2337 if (ilg == NULL) { 2338 /* 2339 * if the request was actually to leave, and we 2340 * didn't find an ilg, there's nothing to do. 2341 */ 2342 if (!leave_grp) 2343 ilg = conn_ilg_alloc(connp, &err); 2344 if (leave_grp || ilg == NULL) { 2345 mutex_exit(&connp->conn_lock); 2346 return (leave_grp ? 0 : err); 2347 } 2348 ilgstat = ILGSTAT_NEW; 2349 ilg->ilg_v6group = *grp; 2350 ilg->ilg_ipif = NULL; 2351 ilg->ilg_ill = ill; 2352 } else if (leave_grp) { 2353 ilg_delete(connp, ilg, NULL); 2354 mutex_exit(&connp->conn_lock); 2355 (void) ip_delmulti_v6(grp, ill, connp->conn_zoneid, B_FALSE, 2356 B_TRUE); 2357 return (0); 2358 } else { 2359 ilgstat = ILGSTAT_CHANGE; 2360 /* preserve existing state in case ip_addmulti() fails */ 2361 orig_fmode = ilg->ilg_fmode; 2362 if (ilg->ilg_filter == NULL) { 2363 orig_filter = NULL; 2364 } else { 2365 orig_filter = l_alloc_copy(ilg->ilg_filter); 2366 if (orig_filter == NULL) { 2367 mutex_exit(&connp->conn_lock); 2368 return (ENOMEM); 2369 } 2370 } 2371 } 2372 2373 /* 2374 * Alloc buffer to copy new state into (see below) before 2375 * we make any changes, so we can bail if it fails. 2376 */ 2377 if ((new_filter = l_alloc()) == NULL) { 2378 mutex_exit(&connp->conn_lock); 2379 err = ENOMEM; 2380 goto free_and_exit; 2381 } 2382 2383 if (gf->gf_numsrc == 0) { 2384 CLEAR_SLIST(ilg->ilg_filter); 2385 } else { 2386 slist_t *fp; 2387 if (ilg->ilg_filter == NULL) { 2388 fp = l_alloc(); 2389 if (fp == NULL) { 2390 if (ilgstat == ILGSTAT_NEW) 2391 ilg_delete(connp, ilg, NULL); 2392 mutex_exit(&connp->conn_lock); 2393 err = ENOMEM; 2394 goto free_and_exit; 2395 } 2396 } else { 2397 fp = ilg->ilg_filter; 2398 } 2399 for (i = 0, sl = gf->gf_slist; i < gf->gf_numsrc; i++, sl++) { 2400 sin6 = (struct sockaddr_in6 *)sl; 2401 fp->sl_addr[i] = sin6->sin6_addr; 2402 } 2403 fp->sl_numsrc = gf->gf_numsrc; 2404 ilg->ilg_filter = fp; 2405 } 2406 /* 2407 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 2408 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 2409 * So we need to translate here. 2410 */ 2411 ilg->ilg_fmode = (gf->gf_fmode == MCAST_INCLUDE) ? 2412 MODE_IS_INCLUDE : MODE_IS_EXCLUDE; 2413 2414 /* 2415 * Save copy of ilg's filter state to pass to other functions, 2416 * so we can release conn_lock now. 2417 */ 2418 new_fmode = ilg->ilg_fmode; 2419 l_copy(ilg->ilg_filter, new_filter); 2420 2421 mutex_exit(&connp->conn_lock); 2422 2423 err = ip_addmulti_v6(grp, ill, connp->conn_zoneid, ilgstat, new_fmode, 2424 new_filter); 2425 if (err != 0) { 2426 /* 2427 * Restore the original filter state, or delete the 2428 * newly-created ilg. We need to look up the ilg 2429 * again, though, since we've not been holding the 2430 * conn_lock. 2431 */ 2432 mutex_enter(&connp->conn_lock); 2433 ilg = ilg_lookup_ill_v6(connp, grp, ill); 2434 ASSERT(ilg != NULL); 2435 if (ilgstat == ILGSTAT_NEW) { 2436 ilg_delete(connp, ilg, NULL); 2437 } else { 2438 ilg->ilg_fmode = orig_fmode; 2439 if (SLIST_IS_EMPTY(orig_filter)) { 2440 CLEAR_SLIST(ilg->ilg_filter); 2441 } else { 2442 /* 2443 * We didn't free the filter, even if we 2444 * were trying to make the source list empty; 2445 * so if orig_filter isn't empty, the ilg 2446 * must still have a filter alloc'd. 2447 */ 2448 l_copy(orig_filter, ilg->ilg_filter); 2449 } 2450 } 2451 mutex_exit(&connp->conn_lock); 2452 } 2453 2454 free_and_exit: 2455 l_free(orig_filter); 2456 l_free(new_filter); 2457 2458 return (err); 2459 } 2460 2461 /* 2462 * Process the SIOC[GS]MSFILTER and SIOC[GS]IPMSFILTER ioctls. 2463 */ 2464 /* ARGSUSED */ 2465 int 2466 ip_sioctl_msfilter(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, 2467 ip_ioctl_cmd_t *ipip, void *ifreq) 2468 { 2469 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 2470 /* existence verified in ip_wput_nondata() */ 2471 mblk_t *data_mp = mp->b_cont->b_cont; 2472 int datalen, err, cmd, minsize; 2473 uint_t expsize = 0; 2474 conn_t *connp; 2475 boolean_t isv6, is_v4only_api, getcmd; 2476 struct sockaddr_in *gsin; 2477 struct sockaddr_in6 *gsin6; 2478 ipaddr_t v4grp; 2479 in6_addr_t v6grp; 2480 struct group_filter *gf = NULL; 2481 struct ip_msfilter *imsf = NULL; 2482 mblk_t *ndp; 2483 2484 if (data_mp->b_cont != NULL) { 2485 if ((ndp = msgpullup(data_mp, -1)) == NULL) 2486 return (ENOMEM); 2487 freemsg(data_mp); 2488 data_mp = ndp; 2489 mp->b_cont->b_cont = data_mp; 2490 } 2491 2492 cmd = iocp->ioc_cmd; 2493 getcmd = (cmd == SIOCGIPMSFILTER || cmd == SIOCGMSFILTER); 2494 is_v4only_api = (cmd == SIOCGIPMSFILTER || cmd == SIOCSIPMSFILTER); 2495 minsize = (is_v4only_api) ? IP_MSFILTER_SIZE(0) : GROUP_FILTER_SIZE(0); 2496 datalen = MBLKL(data_mp); 2497 2498 if (datalen < minsize) 2499 return (EINVAL); 2500 2501 /* 2502 * now we know we have at least have the initial structure, 2503 * but need to check for the source list array. 2504 */ 2505 if (is_v4only_api) { 2506 imsf = (struct ip_msfilter *)data_mp->b_rptr; 2507 isv6 = B_FALSE; 2508 expsize = IP_MSFILTER_SIZE(imsf->imsf_numsrc); 2509 } else { 2510 gf = (struct group_filter *)data_mp->b_rptr; 2511 if (gf->gf_group.ss_family == AF_INET6) { 2512 gsin6 = (struct sockaddr_in6 *)&gf->gf_group; 2513 isv6 = !(IN6_IS_ADDR_V4MAPPED(&gsin6->sin6_addr)); 2514 } else { 2515 isv6 = B_FALSE; 2516 } 2517 expsize = GROUP_FILTER_SIZE(gf->gf_numsrc); 2518 } 2519 if (datalen < expsize) 2520 return (EINVAL); 2521 2522 connp = Q_TO_CONN(q); 2523 2524 /* operation not supported on the virtual network interface */ 2525 if (IS_VNI(ipif->ipif_ill)) 2526 return (EINVAL); 2527 2528 if (isv6) { 2529 ill_t *ill = ipif->ipif_ill; 2530 ill_refhold(ill); 2531 2532 gsin6 = (struct sockaddr_in6 *)&gf->gf_group; 2533 v6grp = gsin6->sin6_addr; 2534 if (getcmd) 2535 err = ip_get_srcfilter_v6(connp, gf, &v6grp, ill); 2536 else 2537 err = ip_set_srcfilter_v6(connp, gf, &v6grp, ill); 2538 2539 ill_refrele(ill); 2540 } else { 2541 boolean_t isv4mapped = B_FALSE; 2542 if (is_v4only_api) { 2543 v4grp = (ipaddr_t)imsf->imsf_multiaddr.s_addr; 2544 } else { 2545 if (gf->gf_group.ss_family == AF_INET) { 2546 gsin = (struct sockaddr_in *)&gf->gf_group; 2547 v4grp = (ipaddr_t)gsin->sin_addr.s_addr; 2548 } else { 2549 gsin6 = (struct sockaddr_in6 *)&gf->gf_group; 2550 IN6_V4MAPPED_TO_IPADDR(&gsin6->sin6_addr, 2551 v4grp); 2552 isv4mapped = B_TRUE; 2553 } 2554 } 2555 if (getcmd) 2556 err = ip_get_srcfilter(connp, gf, imsf, v4grp, ipif, 2557 isv4mapped); 2558 else 2559 err = ip_set_srcfilter(connp, gf, imsf, v4grp, ipif, 2560 isv4mapped); 2561 } 2562 2563 return (err); 2564 } 2565 2566 /* 2567 * Finds the ipif based on information in the ioctl headers. Needed to make 2568 * ip_process_ioctl() happy (it needs to know the ipif for IPI_WR-flagged 2569 * ioctls prior to calling the ioctl's handler function). 2570 */ 2571 int 2572 ip_extract_msfilter(queue_t *q, mblk_t *mp, const ip_ioctl_cmd_t *ipip, 2573 cmd_info_t *ci, ipsq_func_t func) 2574 { 2575 int cmd = ipip->ipi_cmd; 2576 int err = 0; 2577 conn_t *connp; 2578 ipif_t *ipif; 2579 /* caller has verified this mblk exists */ 2580 char *dbuf = (char *)mp->b_cont->b_cont->b_rptr; 2581 struct ip_msfilter *imsf; 2582 struct group_filter *gf; 2583 ipaddr_t v4addr, v4grp; 2584 in6_addr_t v6grp; 2585 uint32_t index; 2586 zoneid_t zoneid; 2587 ip_stack_t *ipst; 2588 2589 connp = Q_TO_CONN(q); 2590 zoneid = connp->conn_zoneid; 2591 ipst = connp->conn_netstack->netstack_ip; 2592 2593 /* don't allow multicast operations on a tcp conn */ 2594 if (IPCL_IS_TCP(connp)) 2595 return (ENOPROTOOPT); 2596 2597 if (cmd == SIOCSIPMSFILTER || cmd == SIOCGIPMSFILTER) { 2598 /* don't allow v4-specific ioctls on v6 socket */ 2599 if (connp->conn_af_isv6) 2600 return (EAFNOSUPPORT); 2601 2602 imsf = (struct ip_msfilter *)dbuf; 2603 v4addr = imsf->imsf_interface.s_addr; 2604 v4grp = imsf->imsf_multiaddr.s_addr; 2605 if (v4addr == INADDR_ANY) { 2606 ipif = ipif_lookup_group(v4grp, zoneid, ipst); 2607 if (ipif == NULL) 2608 err = EADDRNOTAVAIL; 2609 } else { 2610 ipif = ipif_lookup_addr(v4addr, NULL, zoneid, q, mp, 2611 func, &err, ipst); 2612 } 2613 } else { 2614 boolean_t isv6 = B_FALSE; 2615 gf = (struct group_filter *)dbuf; 2616 index = gf->gf_interface; 2617 if (gf->gf_group.ss_family == AF_INET6) { 2618 struct sockaddr_in6 *sin6; 2619 sin6 = (struct sockaddr_in6 *)&gf->gf_group; 2620 v6grp = sin6->sin6_addr; 2621 if (IN6_IS_ADDR_V4MAPPED(&v6grp)) 2622 IN6_V4MAPPED_TO_IPADDR(&v6grp, v4grp); 2623 else 2624 isv6 = B_TRUE; 2625 } else if (gf->gf_group.ss_family == AF_INET) { 2626 struct sockaddr_in *sin; 2627 sin = (struct sockaddr_in *)&gf->gf_group; 2628 v4grp = sin->sin_addr.s_addr; 2629 } else { 2630 return (EAFNOSUPPORT); 2631 } 2632 if (index == 0) { 2633 if (isv6) { 2634 ipif = ipif_lookup_group_v6(&v6grp, zoneid, 2635 ipst); 2636 } else { 2637 ipif = ipif_lookup_group(v4grp, zoneid, ipst); 2638 } 2639 if (ipif == NULL) 2640 err = EADDRNOTAVAIL; 2641 } else { 2642 ipif = ipif_lookup_on_ifindex(index, isv6, zoneid, 2643 q, mp, func, &err, ipst); 2644 } 2645 } 2646 2647 ci->ci_ipif = ipif; 2648 return (err); 2649 } 2650 2651 /* 2652 * The structures used for the SIOC*MSFILTER ioctls usually must be copied 2653 * in in two stages, as the first copyin tells us the size of the attached 2654 * source buffer. This function is called by ip_wput_nondata() after the 2655 * first copyin has completed; it figures out how big the second stage 2656 * needs to be, and kicks it off. 2657 * 2658 * In some cases (numsrc < 2), the second copyin is not needed as the 2659 * first one gets a complete structure containing 1 source addr. 2660 * 2661 * The function returns 0 if a second copyin has been started (i.e. there's 2662 * no more work to be done right now), or 1 if the second copyin is not 2663 * needed and ip_wput_nondata() can continue its processing. 2664 */ 2665 int 2666 ip_copyin_msfilter(queue_t *q, mblk_t *mp) 2667 { 2668 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 2669 int cmd = iocp->ioc_cmd; 2670 /* validity of this checked in ip_wput_nondata() */ 2671 mblk_t *mp1 = mp->b_cont->b_cont; 2672 int copysize = 0; 2673 int offset; 2674 2675 if (cmd == SIOCSMSFILTER || cmd == SIOCGMSFILTER) { 2676 struct group_filter *gf = (struct group_filter *)mp1->b_rptr; 2677 if (gf->gf_numsrc >= 2) { 2678 offset = sizeof (struct group_filter); 2679 copysize = GROUP_FILTER_SIZE(gf->gf_numsrc) - offset; 2680 } 2681 } else { 2682 struct ip_msfilter *imsf = (struct ip_msfilter *)mp1->b_rptr; 2683 if (imsf->imsf_numsrc >= 2) { 2684 offset = sizeof (struct ip_msfilter); 2685 copysize = IP_MSFILTER_SIZE(imsf->imsf_numsrc) - offset; 2686 } 2687 } 2688 if (copysize > 0) { 2689 mi_copyin_n(q, mp, offset, copysize); 2690 return (0); 2691 } 2692 return (1); 2693 } 2694 2695 /* 2696 * Handle the following optmgmt: 2697 * IP_ADD_MEMBERSHIP must not have joined already 2698 * MCAST_JOIN_GROUP must not have joined already 2699 * IP_BLOCK_SOURCE must have joined already 2700 * MCAST_BLOCK_SOURCE must have joined already 2701 * IP_JOIN_SOURCE_GROUP may have joined already 2702 * MCAST_JOIN_SOURCE_GROUP may have joined already 2703 * 2704 * fmode and src parameters may be used to determine which option is 2705 * being set, as follows (the IP_* and MCAST_* versions of each option 2706 * are functionally equivalent): 2707 * opt fmode src 2708 * IP_ADD_MEMBERSHIP MODE_IS_EXCLUDE INADDR_ANY 2709 * MCAST_JOIN_GROUP MODE_IS_EXCLUDE INADDR_ANY 2710 * IP_BLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 2711 * MCAST_BLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 2712 * IP_JOIN_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 2713 * MCAST_JOIN_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 2714 * 2715 * Changing the filter mode is not allowed; if a matching ilg already 2716 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 2717 * 2718 * Verifies that there is a source address of appropriate scope for 2719 * the group; if not, EADDRNOTAVAIL is returned. 2720 * 2721 * The interface to be used may be identified by an address or by an 2722 * index. A pointer to the index is passed; if it is NULL, use the 2723 * address, otherwise, use the index. 2724 */ 2725 int 2726 ip_opt_add_group(conn_t *connp, boolean_t checkonly, ipaddr_t group, 2727 ipaddr_t ifaddr, uint_t *ifindexp, mcast_record_t fmode, ipaddr_t src, 2728 mblk_t *first_mp) 2729 { 2730 ipif_t *ipif; 2731 ipsq_t *ipsq; 2732 int err = 0; 2733 ill_t *ill; 2734 2735 err = ip_opt_check(connp, group, src, ifaddr, ifindexp, first_mp, 2736 ip_restart_optmgmt, &ipif); 2737 if (err != 0) { 2738 if (err != EINPROGRESS) { 2739 ip1dbg(("ip_opt_add_group: no ipif for group 0x%x, " 2740 "ifaddr 0x%x, ifindex %d\n", ntohl(group), 2741 ntohl(ifaddr), (ifindexp == NULL) ? 0 : *ifindexp)); 2742 } 2743 return (err); 2744 } 2745 ASSERT(ipif != NULL); 2746 2747 ill = ipif->ipif_ill; 2748 /* Operation not supported on a virtual network interface */ 2749 if (IS_VNI(ill)) { 2750 ipif_refrele(ipif); 2751 return (EINVAL); 2752 } 2753 2754 if (checkonly) { 2755 /* 2756 * do not do operation, just pretend to - new T_CHECK 2757 * semantics. The error return case above if encountered 2758 * considered a good enough "check" here. 2759 */ 2760 ipif_refrele(ipif); 2761 return (0); 2762 } 2763 2764 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, ipsq, 2765 NEW_OP); 2766 2767 /* unspecified source addr => no source filtering */ 2768 err = ilg_add(connp, group, ipif, fmode, src); 2769 2770 IPSQ_EXIT(ipsq); 2771 2772 ipif_refrele(ipif); 2773 return (err); 2774 } 2775 2776 /* 2777 * Handle the following optmgmt: 2778 * IPV6_JOIN_GROUP must not have joined already 2779 * MCAST_JOIN_GROUP must not have joined already 2780 * MCAST_BLOCK_SOURCE must have joined already 2781 * MCAST_JOIN_SOURCE_GROUP may have joined already 2782 * 2783 * fmode and src parameters may be used to determine which option is 2784 * being set, as follows (IPV6_JOIN_GROUP and MCAST_JOIN_GROUP options 2785 * are functionally equivalent): 2786 * opt fmode v6src 2787 * IPV6_JOIN_GROUP MODE_IS_EXCLUDE unspecified 2788 * MCAST_JOIN_GROUP MODE_IS_EXCLUDE unspecified 2789 * MCAST_BLOCK_SOURCE MODE_IS_EXCLUDE v6 addr 2790 * MCAST_JOIN_SOURCE_GROUP MODE_IS_INCLUDE v6 addr 2791 * 2792 * Changing the filter mode is not allowed; if a matching ilg already 2793 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 2794 * 2795 * Verifies that there is a source address of appropriate scope for 2796 * the group; if not, EADDRNOTAVAIL is returned. 2797 * 2798 * Handles IPv4-mapped IPv6 multicast addresses by associating them 2799 * with the link-local ipif. Assumes that if v6group is v4-mapped, 2800 * v6src is also v4-mapped. 2801 */ 2802 int 2803 ip_opt_add_group_v6(conn_t *connp, boolean_t checkonly, 2804 const in6_addr_t *v6group, int ifindex, mcast_record_t fmode, 2805 const in6_addr_t *v6src, mblk_t *first_mp) 2806 { 2807 ill_t *ill; 2808 ipif_t *ipif; 2809 char buf[INET6_ADDRSTRLEN]; 2810 ipaddr_t v4group, v4src; 2811 boolean_t isv6; 2812 ipsq_t *ipsq; 2813 int err; 2814 2815 err = ip_opt_check_v6(connp, v6group, &v4group, v6src, &v4src, &isv6, 2816 ifindex, first_mp, ip_restart_optmgmt, &ill, &ipif); 2817 if (err != 0) { 2818 if (err != EINPROGRESS) { 2819 ip1dbg(("ip_opt_add_group_v6: no ill for group %s/" 2820 "index %d\n", inet_ntop(AF_INET6, v6group, buf, 2821 sizeof (buf)), ifindex)); 2822 } 2823 return (err); 2824 } 2825 ASSERT((!isv6 && ipif != NULL) || (isv6 && ill != NULL)); 2826 2827 /* operation is not supported on the virtual network interface */ 2828 if (isv6) { 2829 if (IS_VNI(ill)) { 2830 ill_refrele(ill); 2831 return (EINVAL); 2832 } 2833 } else { 2834 if (IS_VNI(ipif->ipif_ill)) { 2835 ipif_refrele(ipif); 2836 return (EINVAL); 2837 } 2838 } 2839 2840 if (checkonly) { 2841 /* 2842 * do not do operation, just pretend to - new T_CHECK 2843 * semantics. The error return case above if encountered 2844 * considered a good enough "check" here. 2845 */ 2846 if (isv6) 2847 ill_refrele(ill); 2848 else 2849 ipif_refrele(ipif); 2850 return (0); 2851 } 2852 2853 if (!isv6) { 2854 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, 2855 ipsq, NEW_OP); 2856 err = ilg_add(connp, v4group, ipif, fmode, v4src); 2857 IPSQ_EXIT(ipsq); 2858 ipif_refrele(ipif); 2859 } else { 2860 IPSQ_ENTER_ILL(ill, connp, first_mp, ip_restart_optmgmt, 2861 ipsq, NEW_OP); 2862 err = ilg_add_v6(connp, v6group, ill, fmode, v6src); 2863 IPSQ_EXIT(ipsq); 2864 ill_refrele(ill); 2865 } 2866 2867 return (err); 2868 } 2869 2870 static int 2871 ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group, ipif_t *ipif, 2872 mcast_record_t fmode, ipaddr_t src) 2873 { 2874 ilg_t *ilg; 2875 in6_addr_t v6src; 2876 boolean_t leaving = B_FALSE; 2877 2878 ASSERT(IAM_WRITER_IPIF(ipif)); 2879 2880 /* 2881 * The ilg is valid only while we hold the conn lock. Once we drop 2882 * the lock, another thread can locate another ilg on this connp, 2883 * but on a different ipif, and delete it, and cause the ilg array 2884 * to be reallocated and copied. Hence do the ilg_delete before 2885 * dropping the lock. 2886 */ 2887 mutex_enter(&connp->conn_lock); 2888 ilg = ilg_lookup_ipif(connp, group, ipif); 2889 if ((ilg == NULL) || (ilg->ilg_flags & ILG_DELETED)) { 2890 mutex_exit(&connp->conn_lock); 2891 return (EADDRNOTAVAIL); 2892 } 2893 2894 /* 2895 * Decide if we're actually deleting the ilg or just removing a 2896 * source filter address; if just removing an addr, make sure we 2897 * aren't trying to change the filter mode, and that the addr is 2898 * actually in our filter list already. If we're removing the 2899 * last src in an include list, just delete the ilg. 2900 */ 2901 if (src == INADDR_ANY) { 2902 v6src = ipv6_all_zeros; 2903 leaving = B_TRUE; 2904 } else { 2905 int err = 0; 2906 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 2907 if (fmode != ilg->ilg_fmode) 2908 err = EINVAL; 2909 else if (ilg->ilg_filter == NULL || 2910 !list_has_addr(ilg->ilg_filter, &v6src)) 2911 err = EADDRNOTAVAIL; 2912 if (err != 0) { 2913 mutex_exit(&connp->conn_lock); 2914 return (err); 2915 } 2916 if (fmode == MODE_IS_INCLUDE && 2917 ilg->ilg_filter->sl_numsrc == 1) { 2918 v6src = ipv6_all_zeros; 2919 leaving = B_TRUE; 2920 } 2921 } 2922 2923 ilg_delete(connp, ilg, &v6src); 2924 mutex_exit(&connp->conn_lock); 2925 2926 (void) ip_delmulti(group, ipif, B_FALSE, leaving); 2927 return (0); 2928 } 2929 2930 static int 2931 ip_opt_delete_group_excl_v6(conn_t *connp, const in6_addr_t *v6group, 2932 ill_t *ill, mcast_record_t fmode, const in6_addr_t *v6src) 2933 { 2934 ilg_t *ilg; 2935 boolean_t leaving = B_TRUE; 2936 2937 ASSERT(IAM_WRITER_ILL(ill)); 2938 2939 mutex_enter(&connp->conn_lock); 2940 ilg = ilg_lookup_ill_v6(connp, v6group, ill); 2941 if ((ilg == NULL) || (ilg->ilg_flags & ILG_DELETED)) { 2942 mutex_exit(&connp->conn_lock); 2943 return (EADDRNOTAVAIL); 2944 } 2945 2946 /* 2947 * Decide if we're actually deleting the ilg or just removing a 2948 * source filter address; if just removing an addr, make sure we 2949 * aren't trying to change the filter mode, and that the addr is 2950 * actually in our filter list already. If we're removing the 2951 * last src in an include list, just delete the ilg. 2952 */ 2953 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2954 int err = 0; 2955 if (fmode != ilg->ilg_fmode) 2956 err = EINVAL; 2957 else if (ilg->ilg_filter == NULL || 2958 !list_has_addr(ilg->ilg_filter, v6src)) 2959 err = EADDRNOTAVAIL; 2960 if (err != 0) { 2961 mutex_exit(&connp->conn_lock); 2962 return (err); 2963 } 2964 if (fmode == MODE_IS_INCLUDE && 2965 ilg->ilg_filter->sl_numsrc == 1) 2966 v6src = NULL; 2967 else 2968 leaving = B_FALSE; 2969 } 2970 2971 ilg_delete(connp, ilg, v6src); 2972 mutex_exit(&connp->conn_lock); 2973 (void) ip_delmulti_v6(v6group, ill, connp->conn_zoneid, B_FALSE, 2974 leaving); 2975 2976 return (0); 2977 } 2978 2979 /* 2980 * Handle the following optmgmt: 2981 * IP_DROP_MEMBERSHIP will leave 2982 * MCAST_LEAVE_GROUP will leave 2983 * IP_UNBLOCK_SOURCE will not leave 2984 * MCAST_UNBLOCK_SOURCE will not leave 2985 * IP_LEAVE_SOURCE_GROUP may leave (if leaving last source) 2986 * MCAST_LEAVE_SOURCE_GROUP may leave (if leaving last source) 2987 * 2988 * fmode and src parameters may be used to determine which option is 2989 * being set, as follows (the IP_* and MCAST_* versions of each option 2990 * are functionally equivalent): 2991 * opt fmode src 2992 * IP_DROP_MEMBERSHIP MODE_IS_INCLUDE INADDR_ANY 2993 * MCAST_LEAVE_GROUP MODE_IS_INCLUDE INADDR_ANY 2994 * IP_UNBLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 2995 * MCAST_UNBLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 2996 * IP_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 2997 * MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 2998 * 2999 * Changing the filter mode is not allowed; if a matching ilg already 3000 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 3001 * 3002 * The interface to be used may be identified by an address or by an 3003 * index. A pointer to the index is passed; if it is NULL, use the 3004 * address, otherwise, use the index. 3005 */ 3006 int 3007 ip_opt_delete_group(conn_t *connp, boolean_t checkonly, ipaddr_t group, 3008 ipaddr_t ifaddr, uint_t *ifindexp, mcast_record_t fmode, ipaddr_t src, 3009 mblk_t *first_mp) 3010 { 3011 ipif_t *ipif; 3012 ipsq_t *ipsq; 3013 int err; 3014 ill_t *ill; 3015 3016 err = ip_opt_check(connp, group, src, ifaddr, ifindexp, first_mp, 3017 ip_restart_optmgmt, &ipif); 3018 if (err != 0) { 3019 if (err != EINPROGRESS) { 3020 ip1dbg(("ip_opt_delete_group: no ipif for group " 3021 "0x%x, ifaddr 0x%x\n", 3022 (int)ntohl(group), (int)ntohl(ifaddr))); 3023 } 3024 return (err); 3025 } 3026 ASSERT(ipif != NULL); 3027 3028 ill = ipif->ipif_ill; 3029 /* Operation not supported on a virtual network interface */ 3030 if (IS_VNI(ill)) { 3031 ipif_refrele(ipif); 3032 return (EINVAL); 3033 } 3034 3035 if (checkonly) { 3036 /* 3037 * do not do operation, just pretend to - new T_CHECK 3038 * semantics. The error return case above if encountered 3039 * considered a good enough "check" here. 3040 */ 3041 ipif_refrele(ipif); 3042 return (0); 3043 } 3044 3045 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, ipsq, 3046 NEW_OP); 3047 err = ip_opt_delete_group_excl(connp, group, ipif, fmode, src); 3048 IPSQ_EXIT(ipsq); 3049 3050 ipif_refrele(ipif); 3051 return (err); 3052 } 3053 3054 /* 3055 * Handle the following optmgmt: 3056 * IPV6_LEAVE_GROUP will leave 3057 * MCAST_LEAVE_GROUP will leave 3058 * MCAST_UNBLOCK_SOURCE will not leave 3059 * MCAST_LEAVE_SOURCE_GROUP may leave (if leaving last source) 3060 * 3061 * fmode and src parameters may be used to determine which option is 3062 * being set, as follows (IPV6_LEAVE_GROUP and MCAST_LEAVE_GROUP options 3063 * are functionally equivalent): 3064 * opt fmode v6src 3065 * IPV6_LEAVE_GROUP MODE_IS_INCLUDE unspecified 3066 * MCAST_LEAVE_GROUP MODE_IS_INCLUDE unspecified 3067 * MCAST_UNBLOCK_SOURCE MODE_IS_EXCLUDE v6 addr 3068 * MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE v6 addr 3069 * 3070 * Changing the filter mode is not allowed; if a matching ilg already 3071 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 3072 * 3073 * Handles IPv4-mapped IPv6 multicast addresses by associating them 3074 * with the link-local ipif. Assumes that if v6group is v4-mapped, 3075 * v6src is also v4-mapped. 3076 */ 3077 int 3078 ip_opt_delete_group_v6(conn_t *connp, boolean_t checkonly, 3079 const in6_addr_t *v6group, int ifindex, mcast_record_t fmode, 3080 const in6_addr_t *v6src, mblk_t *first_mp) 3081 { 3082 ill_t *ill; 3083 ipif_t *ipif; 3084 char buf[INET6_ADDRSTRLEN]; 3085 ipaddr_t v4group, v4src; 3086 boolean_t isv6; 3087 ipsq_t *ipsq; 3088 int err; 3089 3090 err = ip_opt_check_v6(connp, v6group, &v4group, v6src, &v4src, &isv6, 3091 ifindex, first_mp, ip_restart_optmgmt, &ill, &ipif); 3092 if (err != 0) { 3093 if (err != EINPROGRESS) { 3094 ip1dbg(("ip_opt_delete_group_v6: no ill for group %s/" 3095 "index %d\n", inet_ntop(AF_INET6, v6group, buf, 3096 sizeof (buf)), ifindex)); 3097 } 3098 return (err); 3099 } 3100 ASSERT((isv6 && ill != NULL) || (!isv6 && ipif != NULL)); 3101 3102 /* operation is not supported on the virtual network interface */ 3103 if (isv6) { 3104 if (IS_VNI(ill)) { 3105 ill_refrele(ill); 3106 return (EINVAL); 3107 } 3108 } else { 3109 if (IS_VNI(ipif->ipif_ill)) { 3110 ipif_refrele(ipif); 3111 return (EINVAL); 3112 } 3113 } 3114 3115 if (checkonly) { 3116 /* 3117 * do not do operation, just pretend to - new T_CHECK 3118 * semantics. The error return case above if encountered 3119 * considered a good enough "check" here. 3120 */ 3121 if (isv6) 3122 ill_refrele(ill); 3123 else 3124 ipif_refrele(ipif); 3125 return (0); 3126 } 3127 3128 if (!isv6) { 3129 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, 3130 ipsq, NEW_OP); 3131 err = ip_opt_delete_group_excl(connp, v4group, ipif, fmode, 3132 v4src); 3133 IPSQ_EXIT(ipsq); 3134 ipif_refrele(ipif); 3135 } else { 3136 IPSQ_ENTER_ILL(ill, connp, first_mp, ip_restart_optmgmt, 3137 ipsq, NEW_OP); 3138 err = ip_opt_delete_group_excl_v6(connp, v6group, ill, fmode, 3139 v6src); 3140 IPSQ_EXIT(ipsq); 3141 ill_refrele(ill); 3142 } 3143 3144 return (err); 3145 } 3146 3147 /* 3148 * Group mgmt for upper conn that passes things down 3149 * to the interface multicast list (and DLPI) 3150 * These routines can handle new style options that specify an interface name 3151 * as opposed to an interface address (needed for general handling of 3152 * unnumbered interfaces.) 3153 */ 3154 3155 /* 3156 * Add a group to an upper conn group data structure and pass things down 3157 * to the interface multicast list (and DLPI) 3158 */ 3159 static int 3160 ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif, mcast_record_t fmode, 3161 ipaddr_t src) 3162 { 3163 int error = 0; 3164 ill_t *ill; 3165 ilg_t *ilg; 3166 ilg_stat_t ilgstat; 3167 slist_t *new_filter = NULL; 3168 int new_fmode; 3169 3170 ASSERT(IAM_WRITER_IPIF(ipif)); 3171 3172 ill = ipif->ipif_ill; 3173 3174 if (!(ill->ill_flags & ILLF_MULTICAST)) 3175 return (EADDRNOTAVAIL); 3176 3177 /* 3178 * conn_ilg[] is protected by conn_lock. Need to hold the conn_lock 3179 * to walk the conn_ilg[] list in ilg_lookup_ipif(); also needed to 3180 * serialize 2 threads doing join (sock, group1, hme0:0) and 3181 * (sock, group2, hme1:0) where hme0 and hme1 map to different ipsqs, 3182 * but both operations happen on the same conn. 3183 */ 3184 mutex_enter(&connp->conn_lock); 3185 ilg = ilg_lookup_ipif(connp, group, ipif); 3186 3187 /* 3188 * Depending on the option we're handling, may or may not be okay 3189 * if group has already been added. Figure out our rules based 3190 * on fmode and src params. Also make sure there's enough room 3191 * in the filter if we're adding a source to an existing filter. 3192 */ 3193 if (src == INADDR_ANY) { 3194 /* we're joining for all sources, must not have joined */ 3195 if (ilg != NULL) 3196 error = EADDRINUSE; 3197 } else { 3198 if (fmode == MODE_IS_EXCLUDE) { 3199 /* (excl {addr}) => block source, must have joined */ 3200 if (ilg == NULL) 3201 error = EADDRNOTAVAIL; 3202 } 3203 /* (incl {addr}) => join source, may have joined */ 3204 3205 if (ilg != NULL && 3206 SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE) 3207 error = ENOBUFS; 3208 } 3209 if (error != 0) { 3210 mutex_exit(&connp->conn_lock); 3211 return (error); 3212 } 3213 3214 ASSERT(!(ipif->ipif_state_flags & IPIF_CONDEMNED)); 3215 3216 /* 3217 * Alloc buffer to copy new state into (see below) before 3218 * we make any changes, so we can bail if it fails. 3219 */ 3220 if ((new_filter = l_alloc()) == NULL) { 3221 mutex_exit(&connp->conn_lock); 3222 return (ENOMEM); 3223 } 3224 3225 if (ilg == NULL) { 3226 ilgstat = ILGSTAT_NEW; 3227 if ((ilg = conn_ilg_alloc(connp, &error)) == NULL) { 3228 mutex_exit(&connp->conn_lock); 3229 l_free(new_filter); 3230 return (error); 3231 } 3232 if (src != INADDR_ANY) { 3233 ilg->ilg_filter = l_alloc(); 3234 if (ilg->ilg_filter == NULL) { 3235 ilg_delete(connp, ilg, NULL); 3236 mutex_exit(&connp->conn_lock); 3237 l_free(new_filter); 3238 return (ENOMEM); 3239 } 3240 ilg->ilg_filter->sl_numsrc = 1; 3241 IN6_IPADDR_TO_V4MAPPED(src, 3242 &ilg->ilg_filter->sl_addr[0]); 3243 } 3244 if (group == INADDR_ANY) { 3245 ilg->ilg_v6group = ipv6_all_zeros; 3246 } else { 3247 IN6_IPADDR_TO_V4MAPPED(group, &ilg->ilg_v6group); 3248 } 3249 ilg->ilg_ipif = ipif; 3250 ilg->ilg_ill = NULL; 3251 ilg->ilg_fmode = fmode; 3252 } else { 3253 int index; 3254 in6_addr_t v6src; 3255 ilgstat = ILGSTAT_CHANGE; 3256 if (ilg->ilg_fmode != fmode || src == INADDR_ANY) { 3257 mutex_exit(&connp->conn_lock); 3258 l_free(new_filter); 3259 return (EINVAL); 3260 } 3261 if (ilg->ilg_filter == NULL) { 3262 ilg->ilg_filter = l_alloc(); 3263 if (ilg->ilg_filter == NULL) { 3264 mutex_exit(&connp->conn_lock); 3265 l_free(new_filter); 3266 return (ENOMEM); 3267 } 3268 } 3269 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 3270 if (list_has_addr(ilg->ilg_filter, &v6src)) { 3271 mutex_exit(&connp->conn_lock); 3272 l_free(new_filter); 3273 return (EADDRNOTAVAIL); 3274 } 3275 index = ilg->ilg_filter->sl_numsrc++; 3276 ilg->ilg_filter->sl_addr[index] = v6src; 3277 } 3278 3279 /* 3280 * Save copy of ilg's filter state to pass to other functions, 3281 * so we can release conn_lock now. 3282 */ 3283 new_fmode = ilg->ilg_fmode; 3284 l_copy(ilg->ilg_filter, new_filter); 3285 3286 mutex_exit(&connp->conn_lock); 3287 3288 error = ip_addmulti(group, ipif, ilgstat, new_fmode, new_filter); 3289 if (error != 0) { 3290 /* 3291 * Need to undo what we did before calling ip_addmulti()! 3292 * Must look up the ilg again since we've not been holding 3293 * conn_lock. 3294 */ 3295 in6_addr_t v6src; 3296 if (ilgstat == ILGSTAT_NEW) 3297 v6src = ipv6_all_zeros; 3298 else 3299 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 3300 mutex_enter(&connp->conn_lock); 3301 ilg = ilg_lookup_ipif(connp, group, ipif); 3302 ASSERT(ilg != NULL); 3303 ilg_delete(connp, ilg, &v6src); 3304 mutex_exit(&connp->conn_lock); 3305 l_free(new_filter); 3306 return (error); 3307 } 3308 3309 l_free(new_filter); 3310 return (0); 3311 } 3312 3313 static int 3314 ilg_add_v6(conn_t *connp, const in6_addr_t *v6group, ill_t *ill, 3315 mcast_record_t fmode, const in6_addr_t *v6src) 3316 { 3317 int error = 0; 3318 ilg_t *ilg; 3319 ilg_stat_t ilgstat; 3320 slist_t *new_filter = NULL; 3321 int new_fmode; 3322 3323 ASSERT(IAM_WRITER_ILL(ill)); 3324 3325 if (!(ill->ill_flags & ILLF_MULTICAST)) 3326 return (EADDRNOTAVAIL); 3327 3328 /* 3329 * conn_lock protects the ilg list. Serializes 2 threads doing 3330 * join (sock, group1, hme0) and (sock, group2, hme1) where hme0 3331 * and hme1 map to different ipsq's, but both operations happen 3332 * on the same conn. 3333 */ 3334 mutex_enter(&connp->conn_lock); 3335 3336 ilg = ilg_lookup_ill_v6(connp, v6group, ill); 3337 3338 /* 3339 * Depending on the option we're handling, may or may not be okay 3340 * if group has already been added. Figure out our rules based 3341 * on fmode and src params. Also make sure there's enough room 3342 * in the filter if we're adding a source to an existing filter. 3343 */ 3344 if (IN6_IS_ADDR_UNSPECIFIED(v6src)) { 3345 /* we're joining for all sources, must not have joined */ 3346 if (ilg != NULL) 3347 error = EADDRINUSE; 3348 } else { 3349 if (fmode == MODE_IS_EXCLUDE) { 3350 /* (excl {addr}) => block source, must have joined */ 3351 if (ilg == NULL) 3352 error = EADDRNOTAVAIL; 3353 } 3354 /* (incl {addr}) => join source, may have joined */ 3355 3356 if (ilg != NULL && 3357 SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE) 3358 error = ENOBUFS; 3359 } 3360 if (error != 0) { 3361 mutex_exit(&connp->conn_lock); 3362 return (error); 3363 } 3364 3365 /* 3366 * Alloc buffer to copy new state into (see below) before 3367 * we make any changes, so we can bail if it fails. 3368 */ 3369 if ((new_filter = l_alloc()) == NULL) { 3370 mutex_exit(&connp->conn_lock); 3371 return (ENOMEM); 3372 } 3373 3374 if (ilg == NULL) { 3375 if ((ilg = conn_ilg_alloc(connp, &error)) == NULL) { 3376 mutex_exit(&connp->conn_lock); 3377 l_free(new_filter); 3378 return (error); 3379 } 3380 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 3381 ilg->ilg_filter = l_alloc(); 3382 if (ilg->ilg_filter == NULL) { 3383 ilg_delete(connp, ilg, NULL); 3384 mutex_exit(&connp->conn_lock); 3385 l_free(new_filter); 3386 return (ENOMEM); 3387 } 3388 ilg->ilg_filter->sl_numsrc = 1; 3389 ilg->ilg_filter->sl_addr[0] = *v6src; 3390 } 3391 ilgstat = ILGSTAT_NEW; 3392 ilg->ilg_v6group = *v6group; 3393 ilg->ilg_fmode = fmode; 3394 ilg->ilg_ipif = NULL; 3395 ilg->ilg_ill = ill; 3396 } else { 3397 int index; 3398 if (ilg->ilg_fmode != fmode || IN6_IS_ADDR_UNSPECIFIED(v6src)) { 3399 mutex_exit(&connp->conn_lock); 3400 l_free(new_filter); 3401 return (EINVAL); 3402 } 3403 if (ilg->ilg_filter == NULL) { 3404 ilg->ilg_filter = l_alloc(); 3405 if (ilg->ilg_filter == NULL) { 3406 mutex_exit(&connp->conn_lock); 3407 l_free(new_filter); 3408 return (ENOMEM); 3409 } 3410 } 3411 if (list_has_addr(ilg->ilg_filter, v6src)) { 3412 mutex_exit(&connp->conn_lock); 3413 l_free(new_filter); 3414 return (EADDRNOTAVAIL); 3415 } 3416 ilgstat = ILGSTAT_CHANGE; 3417 index = ilg->ilg_filter->sl_numsrc++; 3418 ilg->ilg_filter->sl_addr[index] = *v6src; 3419 } 3420 3421 /* 3422 * Save copy of ilg's filter state to pass to other functions, 3423 * so we can release conn_lock now. 3424 */ 3425 new_fmode = ilg->ilg_fmode; 3426 l_copy(ilg->ilg_filter, new_filter); 3427 3428 mutex_exit(&connp->conn_lock); 3429 3430 /* 3431 * Now update the ill. We wait to do this until after the ilg 3432 * has been updated because we need to update the src filter 3433 * info for the ill, which involves looking at the status of 3434 * all the ilgs associated with this group/interface pair. 3435 */ 3436 error = ip_addmulti_v6(v6group, ill, connp->conn_zoneid, ilgstat, 3437 new_fmode, new_filter); 3438 if (error != 0) { 3439 /* 3440 * But because we waited, we have to undo the ilg update 3441 * if ip_addmulti_v6() fails. We also must lookup ilg 3442 * again, since we've not been holding conn_lock. 3443 */ 3444 in6_addr_t delsrc = 3445 (ilgstat == ILGSTAT_NEW) ? ipv6_all_zeros : *v6src; 3446 mutex_enter(&connp->conn_lock); 3447 ilg = ilg_lookup_ill_v6(connp, v6group, ill); 3448 ASSERT(ilg != NULL); 3449 ilg_delete(connp, ilg, &delsrc); 3450 mutex_exit(&connp->conn_lock); 3451 l_free(new_filter); 3452 return (error); 3453 } 3454 3455 l_free(new_filter); 3456 3457 return (0); 3458 } 3459 3460 /* 3461 * Find an IPv4 ilg matching group, ill and source 3462 */ 3463 ilg_t * 3464 ilg_lookup_ill_withsrc(conn_t *connp, ipaddr_t group, ipaddr_t src, ill_t *ill) 3465 { 3466 in6_addr_t v6group, v6src; 3467 int i; 3468 boolean_t isinlist; 3469 ilg_t *ilg; 3470 ipif_t *ipif; 3471 ill_t *ilg_ill; 3472 3473 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3474 3475 /* 3476 * INADDR_ANY is represented as the IPv6 unspecified addr. 3477 */ 3478 if (group == INADDR_ANY) 3479 v6group = ipv6_all_zeros; 3480 else 3481 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 3482 3483 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3484 ilg = &connp->conn_ilg[i]; 3485 if ((ipif = ilg->ilg_ipif) == NULL || 3486 (ilg->ilg_flags & ILG_DELETED) != 0) 3487 continue; 3488 ASSERT(ilg->ilg_ill == NULL); 3489 ilg_ill = ipif->ipif_ill; 3490 ASSERT(!ilg_ill->ill_isv6); 3491 if (IS_ON_SAME_LAN(ilg_ill, ill) && 3492 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, &v6group)) { 3493 if (SLIST_IS_EMPTY(ilg->ilg_filter)) { 3494 /* no source filter, so this is a match */ 3495 return (ilg); 3496 } 3497 break; 3498 } 3499 } 3500 if (i == connp->conn_ilg_inuse) 3501 return (NULL); 3502 3503 /* 3504 * we have an ilg with matching ill and group; but 3505 * the ilg has a source list that we must check. 3506 */ 3507 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 3508 isinlist = B_FALSE; 3509 for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) { 3510 if (IN6_ARE_ADDR_EQUAL(&v6src, &ilg->ilg_filter->sl_addr[i])) { 3511 isinlist = B_TRUE; 3512 break; 3513 } 3514 } 3515 3516 if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) || 3517 (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE)) 3518 return (ilg); 3519 3520 return (NULL); 3521 } 3522 3523 /* 3524 * Find an IPv6 ilg matching group, ill, and source 3525 */ 3526 ilg_t * 3527 ilg_lookup_ill_withsrc_v6(conn_t *connp, const in6_addr_t *v6group, 3528 const in6_addr_t *v6src, ill_t *ill) 3529 { 3530 int i; 3531 boolean_t isinlist; 3532 ilg_t *ilg; 3533 ill_t *ilg_ill; 3534 3535 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3536 3537 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3538 ilg = &connp->conn_ilg[i]; 3539 if ((ilg_ill = ilg->ilg_ill) == NULL || 3540 (ilg->ilg_flags & ILG_DELETED) != 0) 3541 continue; 3542 ASSERT(ilg->ilg_ipif == NULL); 3543 ASSERT(ilg_ill->ill_isv6); 3544 if (IS_ON_SAME_LAN(ilg_ill, ill) && 3545 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) { 3546 if (SLIST_IS_EMPTY(ilg->ilg_filter)) { 3547 /* no source filter, so this is a match */ 3548 return (ilg); 3549 } 3550 break; 3551 } 3552 } 3553 if (i == connp->conn_ilg_inuse) 3554 return (NULL); 3555 3556 /* 3557 * we have an ilg with matching ill and group; but 3558 * the ilg has a source list that we must check. 3559 */ 3560 isinlist = B_FALSE; 3561 for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) { 3562 if (IN6_ARE_ADDR_EQUAL(v6src, &ilg->ilg_filter->sl_addr[i])) { 3563 isinlist = B_TRUE; 3564 break; 3565 } 3566 } 3567 3568 if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) || 3569 (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE)) 3570 return (ilg); 3571 3572 return (NULL); 3573 } 3574 3575 /* 3576 * Find an IPv6 ilg matching group and ill 3577 */ 3578 ilg_t * 3579 ilg_lookup_ill_v6(conn_t *connp, const in6_addr_t *v6group, ill_t *ill) 3580 { 3581 ilg_t *ilg; 3582 int i; 3583 ill_t *mem_ill; 3584 3585 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3586 3587 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3588 ilg = &connp->conn_ilg[i]; 3589 if ((mem_ill = ilg->ilg_ill) == NULL || 3590 (ilg->ilg_flags & ILG_DELETED) != 0) 3591 continue; 3592 ASSERT(ilg->ilg_ipif == NULL); 3593 ASSERT(mem_ill->ill_isv6); 3594 if (mem_ill == ill && 3595 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) 3596 return (ilg); 3597 } 3598 return (NULL); 3599 } 3600 3601 /* 3602 * Find an IPv4 ilg matching group and ipif 3603 */ 3604 static ilg_t * 3605 ilg_lookup_ipif(conn_t *connp, ipaddr_t group, ipif_t *ipif) 3606 { 3607 in6_addr_t v6group; 3608 int i; 3609 ilg_t *ilg; 3610 3611 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3612 ASSERT(!ipif->ipif_ill->ill_isv6); 3613 3614 if (group == INADDR_ANY) 3615 v6group = ipv6_all_zeros; 3616 else 3617 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 3618 3619 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3620 ilg = &connp->conn_ilg[i]; 3621 if ((ilg->ilg_flags & ILG_DELETED) == 0 && 3622 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, &v6group) && 3623 ilg->ilg_ipif == ipif) 3624 return (ilg); 3625 } 3626 return (NULL); 3627 } 3628 3629 /* 3630 * If a source address is passed in (src != NULL and src is not 3631 * unspecified), remove the specified src addr from the given ilg's 3632 * filter list, else delete the ilg. 3633 */ 3634 static void 3635 ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src) 3636 { 3637 int i; 3638 3639 ASSERT((ilg->ilg_ipif != NULL) ^ (ilg->ilg_ill != NULL)); 3640 ASSERT(ilg->ilg_ipif == NULL || IAM_WRITER_IPIF(ilg->ilg_ipif)); 3641 ASSERT(ilg->ilg_ill == NULL || IAM_WRITER_ILL(ilg->ilg_ill)); 3642 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3643 ASSERT(!(ilg->ilg_flags & ILG_DELETED)); 3644 3645 if (src == NULL || IN6_IS_ADDR_UNSPECIFIED(src)) { 3646 if (connp->conn_ilg_walker_cnt != 0) { 3647 ilg->ilg_flags |= ILG_DELETED; 3648 return; 3649 } 3650 3651 FREE_SLIST(ilg->ilg_filter); 3652 3653 i = ilg - &connp->conn_ilg[0]; 3654 ASSERT(i >= 0 && i < connp->conn_ilg_inuse); 3655 3656 /* Move other entries up one step */ 3657 connp->conn_ilg_inuse--; 3658 for (; i < connp->conn_ilg_inuse; i++) 3659 connp->conn_ilg[i] = connp->conn_ilg[i+1]; 3660 3661 if (connp->conn_ilg_inuse == 0) { 3662 mi_free((char *)connp->conn_ilg); 3663 connp->conn_ilg = NULL; 3664 cv_broadcast(&connp->conn_refcv); 3665 } 3666 } else { 3667 l_remove(ilg->ilg_filter, src); 3668 } 3669 } 3670 3671 /* 3672 * Called from conn close. No new ilg can be added or removed. 3673 * because CONN_CLOSING has been set by ip_close. ilg_add / ilg_delete 3674 * will return error if conn has started closing. 3675 */ 3676 void 3677 ilg_delete_all(conn_t *connp) 3678 { 3679 int i; 3680 ipif_t *ipif = NULL; 3681 ill_t *ill = NULL; 3682 ilg_t *ilg; 3683 in6_addr_t v6group; 3684 boolean_t success; 3685 ipsq_t *ipsq; 3686 3687 mutex_enter(&connp->conn_lock); 3688 retry: 3689 ILG_WALKER_HOLD(connp); 3690 for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) { 3691 ilg = &connp->conn_ilg[i]; 3692 /* 3693 * Since this walk is not atomic (we drop the 3694 * conn_lock and wait in ipsq_enter) we need 3695 * to check for the ILG_DELETED flag. 3696 */ 3697 if (ilg->ilg_flags & ILG_DELETED) 3698 continue; 3699 3700 if (IN6_IS_ADDR_V4MAPPED(&ilg->ilg_v6group)) { 3701 ipif = ilg->ilg_ipif; 3702 ill = ipif->ipif_ill; 3703 } else { 3704 ipif = NULL; 3705 ill = ilg->ilg_ill; 3706 } 3707 3708 /* 3709 * We may not be able to refhold the ill if the ill/ipif 3710 * is changing. But we need to make sure that the ill will 3711 * not vanish. So we just bump up the ill_waiter count. 3712 * If we are unable to do even that, then the ill is closing, 3713 * in which case the unplumb thread will handle the cleanup, 3714 * and we move on to the next ilg. 3715 */ 3716 if (!ill_waiter_inc(ill)) 3717 continue; 3718 3719 mutex_exit(&connp->conn_lock); 3720 /* 3721 * To prevent deadlock between ill close which waits inside 3722 * the perimeter, and conn close, ipsq_enter returns error, 3723 * the moment ILL_CONDEMNED is set, in which case ill close 3724 * takes responsibility to cleanup the ilgs. Note that we 3725 * have not yet set condemned flag, otherwise the conn can't 3726 * be refheld for cleanup by those routines and it would be 3727 * a mutual deadlock. 3728 */ 3729 success = ipsq_enter(ill, B_FALSE, NEW_OP); 3730 ipsq = ill->ill_phyint->phyint_ipsq; 3731 ill_waiter_dcr(ill); 3732 mutex_enter(&connp->conn_lock); 3733 if (!success) 3734 continue; 3735 3736 /* 3737 * Move on if the ilg was deleted while conn_lock was dropped. 3738 */ 3739 if (ilg->ilg_flags & ILG_DELETED) { 3740 mutex_exit(&connp->conn_lock); 3741 ipsq_exit(ipsq); 3742 mutex_enter(&connp->conn_lock); 3743 continue; 3744 } 3745 v6group = ilg->ilg_v6group; 3746 ilg_delete(connp, ilg, NULL); 3747 mutex_exit(&connp->conn_lock); 3748 3749 if (ipif != NULL) { 3750 (void) ip_delmulti(V4_PART_OF_V6(v6group), ipif, 3751 B_FALSE, B_TRUE); 3752 } else { 3753 (void) ip_delmulti_v6(&v6group, ill, 3754 connp->conn_zoneid, B_FALSE, B_TRUE); 3755 } 3756 ipsq_exit(ipsq); 3757 mutex_enter(&connp->conn_lock); 3758 } 3759 ILG_WALKER_RELE(connp); 3760 3761 /* If any ill was skipped above wait and retry */ 3762 if (connp->conn_ilg_inuse != 0) { 3763 cv_wait(&connp->conn_refcv, &connp->conn_lock); 3764 goto retry; 3765 } 3766 mutex_exit(&connp->conn_lock); 3767 } 3768 3769 /* 3770 * Called from ill close by ipcl_walk for clearing conn_ilg and 3771 * conn_multicast_ipif for a given ipif. conn is held by caller. 3772 * Note that ipcl_walk only walks conns that are not yet condemned. 3773 * condemned conns can't be refheld. For this reason, conn must become clean 3774 * first, i.e. it must not refer to any ill/ire/ipif and then only set 3775 * condemned flag. 3776 */ 3777 static void 3778 conn_delete_ipif(conn_t *connp, caddr_t arg) 3779 { 3780 ipif_t *ipif = (ipif_t *)arg; 3781 int i; 3782 char group_buf1[INET6_ADDRSTRLEN]; 3783 char group_buf2[INET6_ADDRSTRLEN]; 3784 ipaddr_t group; 3785 ilg_t *ilg; 3786 3787 /* 3788 * Even though conn_ilg_inuse can change while we are in this loop, 3789 * i.e.ilgs can be created or deleted on this connp, no new ilgs can 3790 * be created or deleted for this connp, on this ill, since this ill 3791 * is the perimeter. So we won't miss any ilg in this cleanup. 3792 */ 3793 mutex_enter(&connp->conn_lock); 3794 3795 /* 3796 * Increment the walker count, so that ilg repacking does not 3797 * occur while we are in the loop. 3798 */ 3799 ILG_WALKER_HOLD(connp); 3800 for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) { 3801 ilg = &connp->conn_ilg[i]; 3802 if (ilg->ilg_ipif != ipif || (ilg->ilg_flags & ILG_DELETED)) 3803 continue; 3804 /* 3805 * ip_close cannot be cleaning this ilg at the same time. 3806 * since it also has to execute in this ill's perimeter which 3807 * we are now holding. Only a clean conn can be condemned. 3808 */ 3809 ASSERT(!(connp->conn_state_flags & CONN_CONDEMNED)); 3810 3811 /* Blow away the membership */ 3812 ip1dbg(("conn_delete_ilg_ipif: %s on %s (%s)\n", 3813 inet_ntop(AF_INET6, &connp->conn_ilg[i].ilg_v6group, 3814 group_buf1, sizeof (group_buf1)), 3815 inet_ntop(AF_INET6, &ipif->ipif_v6lcl_addr, 3816 group_buf2, sizeof (group_buf2)), 3817 ipif->ipif_ill->ill_name)); 3818 3819 /* ilg_ipif is NULL for V6, so we won't be here */ 3820 ASSERT(IN6_IS_ADDR_V4MAPPED(&ilg->ilg_v6group)); 3821 3822 group = V4_PART_OF_V6(ilg->ilg_v6group); 3823 ilg_delete(connp, &connp->conn_ilg[i], NULL); 3824 mutex_exit(&connp->conn_lock); 3825 3826 (void) ip_delmulti(group, ipif, B_FALSE, B_TRUE); 3827 mutex_enter(&connp->conn_lock); 3828 } 3829 3830 /* 3831 * If we are the last walker, need to physically delete the 3832 * ilgs and repack. 3833 */ 3834 ILG_WALKER_RELE(connp); 3835 3836 if (connp->conn_multicast_ipif == ipif) { 3837 /* Revert to late binding */ 3838 connp->conn_multicast_ipif = NULL; 3839 } 3840 mutex_exit(&connp->conn_lock); 3841 3842 conn_delete_ire(connp, (caddr_t)ipif); 3843 } 3844 3845 /* 3846 * Called from ill close by ipcl_walk for clearing conn_ilg and 3847 * conn_multicast_ill for a given ill. conn is held by caller. 3848 * Note that ipcl_walk only walks conns that are not yet condemned. 3849 * condemned conns can't be refheld. For this reason, conn must become clean 3850 * first, i.e. it must not refer to any ill/ire/ipif and then only set 3851 * condemned flag. 3852 */ 3853 static void 3854 conn_delete_ill(conn_t *connp, caddr_t arg) 3855 { 3856 ill_t *ill = (ill_t *)arg; 3857 int i; 3858 char group_buf[INET6_ADDRSTRLEN]; 3859 in6_addr_t v6group; 3860 ilg_t *ilg; 3861 3862 /* 3863 * Even though conn_ilg_inuse can change while we are in this loop, 3864 * no new ilgs can be created/deleted for this connp, on this 3865 * ill, since this ill is the perimeter. So we won't miss any ilg 3866 * in this cleanup. 3867 */ 3868 mutex_enter(&connp->conn_lock); 3869 3870 /* 3871 * Increment the walker count, so that ilg repacking does not 3872 * occur while we are in the loop. 3873 */ 3874 ILG_WALKER_HOLD(connp); 3875 for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) { 3876 ilg = &connp->conn_ilg[i]; 3877 if ((ilg->ilg_ill == ill) && !(ilg->ilg_flags & ILG_DELETED)) { 3878 /* 3879 * ip_close cannot be cleaning this ilg at the same 3880 * time, since it also has to execute in this ill's 3881 * perimeter which we are now holding. Only a clean 3882 * conn can be condemned. 3883 */ 3884 ASSERT(!(connp->conn_state_flags & CONN_CONDEMNED)); 3885 3886 /* Blow away the membership */ 3887 ip1dbg(("conn_delete_ilg_ill: %s on %s\n", 3888 inet_ntop(AF_INET6, &ilg->ilg_v6group, 3889 group_buf, sizeof (group_buf)), 3890 ill->ill_name)); 3891 3892 v6group = ilg->ilg_v6group; 3893 ilg_delete(connp, ilg, NULL); 3894 mutex_exit(&connp->conn_lock); 3895 3896 (void) ip_delmulti_v6(&v6group, ill, 3897 connp->conn_zoneid, B_FALSE, B_TRUE); 3898 mutex_enter(&connp->conn_lock); 3899 } 3900 } 3901 /* 3902 * If we are the last walker, need to physically delete the 3903 * ilgs and repack. 3904 */ 3905 ILG_WALKER_RELE(connp); 3906 3907 if (connp->conn_multicast_ill == ill) { 3908 /* Revert to late binding */ 3909 connp->conn_multicast_ill = NULL; 3910 } 3911 mutex_exit(&connp->conn_lock); 3912 } 3913 3914 /* 3915 * Called when an ipif is unplumbed to make sure that there are no 3916 * dangling conn references to that ipif. 3917 * Handles ilg_ipif and conn_multicast_ipif 3918 */ 3919 void 3920 reset_conn_ipif(ipif) 3921 ipif_t *ipif; 3922 { 3923 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 3924 3925 ipcl_walk(conn_delete_ipif, (caddr_t)ipif, ipst); 3926 } 3927 3928 /* 3929 * Called when an ill is unplumbed to make sure that there are no 3930 * dangling conn references to that ill. 3931 * Handles ilg_ill, conn_multicast_ill. 3932 */ 3933 void 3934 reset_conn_ill(ill_t *ill) 3935 { 3936 ip_stack_t *ipst = ill->ill_ipst; 3937 3938 ipcl_walk(conn_delete_ill, (caddr_t)ill, ipst); 3939 } 3940 3941 #ifdef DEBUG 3942 /* 3943 * Walk functions walk all the interfaces in the system to make 3944 * sure that there is no refernece to the ipif or ill that is 3945 * going away. 3946 */ 3947 int 3948 ilm_walk_ill(ill_t *ill) 3949 { 3950 int cnt = 0; 3951 ill_t *till; 3952 ilm_t *ilm; 3953 ill_walk_context_t ctx; 3954 ip_stack_t *ipst = ill->ill_ipst; 3955 3956 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 3957 till = ILL_START_WALK_ALL(&ctx, ipst); 3958 for (; till != NULL; till = ill_next(&ctx, till)) { 3959 mutex_enter(&till->ill_lock); 3960 for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 3961 if (ilm->ilm_ill == ill) { 3962 cnt++; 3963 } 3964 } 3965 mutex_exit(&till->ill_lock); 3966 } 3967 rw_exit(&ipst->ips_ill_g_lock); 3968 3969 return (cnt); 3970 } 3971 3972 /* 3973 * This function is called before the ipif is freed. 3974 */ 3975 int 3976 ilm_walk_ipif(ipif_t *ipif) 3977 { 3978 int cnt = 0; 3979 ill_t *till; 3980 ilm_t *ilm; 3981 ill_walk_context_t ctx; 3982 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 3983 3984 till = ILL_START_WALK_ALL(&ctx, ipst); 3985 for (; till != NULL; till = ill_next(&ctx, till)) { 3986 mutex_enter(&till->ill_lock); 3987 for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 3988 if (ilm->ilm_ipif == ipif) { 3989 cnt++; 3990 } 3991 } 3992 mutex_exit(&till->ill_lock); 3993 } 3994 return (cnt); 3995 } 3996 #endif 3997