1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #include <sys/types.h> 28 #include <sys/stream.h> 29 #include <sys/dlpi.h> 30 #include <sys/stropts.h> 31 #include <sys/strsun.h> 32 #include <sys/ddi.h> 33 #include <sys/cmn_err.h> 34 #include <sys/sdt.h> 35 #include <sys/zone.h> 36 37 #include <sys/param.h> 38 #include <sys/socket.h> 39 #include <sys/sockio.h> 40 #include <net/if.h> 41 #include <sys/systm.h> 42 #include <sys/strsubr.h> 43 #include <net/route.h> 44 #include <netinet/in.h> 45 #include <net/if_dl.h> 46 #include <netinet/ip6.h> 47 #include <netinet/icmp6.h> 48 49 #include <inet/common.h> 50 #include <inet/mi.h> 51 #include <inet/nd.h> 52 #include <inet/arp.h> 53 #include <inet/ip.h> 54 #include <inet/ip6.h> 55 #include <inet/ip_if.h> 56 #include <inet/ip_ndp.h> 57 #include <inet/ip_multi.h> 58 #include <inet/ipclassifier.h> 59 #include <inet/ipsec_impl.h> 60 #include <inet/sctp_ip.h> 61 #include <inet/ip_listutils.h> 62 #include <inet/udp_impl.h> 63 64 /* igmpv3/mldv2 source filter manipulation */ 65 static void ilm_bld_flists(conn_t *conn, void *arg); 66 static void ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, 67 slist_t *flist); 68 69 static ilm_t *ilm_add_v6(ipif_t *ipif, const in6_addr_t *group, 70 ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist, 71 zoneid_t zoneid); 72 static void ilm_delete(ilm_t *ilm); 73 static int ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *group); 74 static int ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *group); 75 static ilg_t *ilg_lookup_ipif(conn_t *connp, ipaddr_t group, 76 ipif_t *ipif); 77 static int ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif, 78 mcast_record_t fmode, ipaddr_t src); 79 static int ilg_add_v6(conn_t *connp, const in6_addr_t *group, ill_t *ill, 80 mcast_record_t fmode, const in6_addr_t *v6src); 81 static void ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src); 82 static mblk_t *ill_create_dl(ill_t *ill, uint32_t dl_primitive, 83 uint32_t length, uint32_t *addr_lenp, uint32_t *addr_offp); 84 static mblk_t *ill_create_squery(ill_t *ill, ipaddr_t ipaddr, 85 uint32_t addrlen, uint32_t addroff, mblk_t *mp_tail); 86 static void conn_ilg_reap(conn_t *connp); 87 static int ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group, 88 ipif_t *ipif, mcast_record_t fmode, ipaddr_t src); 89 static int ip_opt_delete_group_excl_v6(conn_t *connp, 90 const in6_addr_t *v6group, ill_t *ill, mcast_record_t fmode, 91 const in6_addr_t *v6src); 92 static void ill_ilm_walker_hold(ill_t *ill); 93 static void ill_ilm_walker_rele(ill_t *ill); 94 95 /* 96 * MT notes: 97 * 98 * Multicast joins operate on both the ilg and ilm structures. Multiple 99 * threads operating on an conn (socket) trying to do multicast joins 100 * need to synchronize when operating on the ilg. Multiple threads 101 * potentially operating on different conn (socket endpoints) trying to 102 * do multicast joins could eventually end up trying to manipulate the 103 * ilm simultaneously and need to synchronize access to the ilm. Currently, 104 * this is done by synchronizing join/leave via per-phyint ipsq_t 105 * serialization. 106 * 107 * An ilm is an IP data structure used to track multicast join/leave. 108 * An ilm is associated with a <multicast group, ipif> tuple in IPv4 and 109 * with just <multicast group> in IPv6. ilm_refcnt is the number of ilg's 110 * referencing the ilm. ilms are created / destroyed only as writer. ilms 111 * are not passed around, instead they are looked up and used under the 112 * ill_lock or as writer. So we don't need a dynamic refcount of the number 113 * of threads holding reference to an ilm. 114 * 115 * Multicast Join operation: 116 * 117 * The first step is to determine the ipif (v4) or ill (v6) on which 118 * the join operation is to be done. The join is done after becoming 119 * exclusive on the ipsq associated with the ipif or ill. The conn->conn_ilg 120 * and ill->ill_ilm are thus accessed and modified exclusively per ill. 121 * Multiple threads can attempt to join simultaneously on different ipif/ill 122 * on the same conn. In this case the ipsq serialization does not help in 123 * protecting the ilg. It is the conn_lock that is used to protect the ilg. 124 * The conn_lock also protects all the ilg_t members. 125 * 126 * Leave operation. 127 * 128 * Similar to the join operation, the first step is to determine the ipif 129 * or ill (v6) on which the leave operation is to be done. The leave operation 130 * is done after becoming exclusive on the ipsq associated with the ipif or ill. 131 * As with join ilg modification is done under the protection of the conn lock. 132 */ 133 134 #define IPSQ_ENTER_IPIF(ipif, connp, first_mp, func, ipsq, type) \ 135 ASSERT(connp != NULL); \ 136 (ipsq) = ipsq_try_enter((ipif), NULL, CONNP_TO_WQ(connp), \ 137 (first_mp), (func), (type), B_TRUE); \ 138 if ((ipsq) == NULL) { \ 139 ipif_refrele(ipif); \ 140 return (EINPROGRESS); \ 141 } 142 143 #define IPSQ_ENTER_ILL(ill, connp, first_mp, func, ipsq, type) \ 144 ASSERT(connp != NULL); \ 145 (ipsq) = ipsq_try_enter(NULL, ill, CONNP_TO_WQ(connp), \ 146 (first_mp), (func), (type), B_TRUE); \ 147 if ((ipsq) == NULL) { \ 148 ill_refrele(ill); \ 149 return (EINPROGRESS); \ 150 } 151 152 #define IPSQ_EXIT(ipsq) \ 153 if (ipsq != NULL) \ 154 ipsq_exit(ipsq); 155 156 #define ILG_WALKER_HOLD(connp) (connp)->conn_ilg_walker_cnt++ 157 158 #define ILG_WALKER_RELE(connp) \ 159 { \ 160 (connp)->conn_ilg_walker_cnt--; \ 161 if ((connp)->conn_ilg_walker_cnt == 0) \ 162 conn_ilg_reap(connp); \ 163 } 164 165 static void 166 conn_ilg_reap(conn_t *connp) 167 { 168 int to; 169 int from; 170 ilg_t *ilg; 171 172 ASSERT(MUTEX_HELD(&connp->conn_lock)); 173 174 to = 0; 175 from = 0; 176 while (from < connp->conn_ilg_inuse) { 177 if (connp->conn_ilg[from].ilg_flags & ILG_DELETED) { 178 ilg = &connp->conn_ilg[from]; 179 FREE_SLIST(ilg->ilg_filter); 180 ilg->ilg_flags &= ~ILG_DELETED; 181 from++; 182 continue; 183 } 184 if (to != from) 185 connp->conn_ilg[to] = connp->conn_ilg[from]; 186 to++; 187 from++; 188 } 189 190 connp->conn_ilg_inuse = to; 191 192 if (connp->conn_ilg_inuse == 0) { 193 mi_free((char *)connp->conn_ilg); 194 connp->conn_ilg = NULL; 195 cv_broadcast(&connp->conn_refcv); 196 } 197 } 198 199 #define GETSTRUCT(structure, number) \ 200 ((structure *)mi_zalloc(sizeof (structure) * (number))) 201 202 #define ILG_ALLOC_CHUNK 16 203 204 /* 205 * Returns a pointer to the next available ilg in conn_ilg. Allocs more 206 * buffers in size of ILG_ALLOC_CHUNK ilgs when needed, and updates conn's 207 * ilg tracking fields appropriately (conn_ilg_inuse reflects usage of the 208 * returned ilg). Returns NULL on failure, in which case `*errp' will be 209 * filled in with the reason. 210 * 211 * Assumes connp->conn_lock is held. 212 */ 213 static ilg_t * 214 conn_ilg_alloc(conn_t *connp, int *errp) 215 { 216 ilg_t *new, *ret; 217 int curcnt; 218 219 ASSERT(MUTEX_HELD(&connp->conn_lock)); 220 ASSERT(connp->conn_ilg_inuse <= connp->conn_ilg_allocated); 221 222 /* 223 * If CONN_CLOSING is set, conn_ilg cleanup has begun and we must not 224 * create any ilgs. 225 */ 226 if (connp->conn_state_flags & CONN_CLOSING) { 227 *errp = EINVAL; 228 return (NULL); 229 } 230 231 if (connp->conn_ilg == NULL) { 232 connp->conn_ilg = GETSTRUCT(ilg_t, ILG_ALLOC_CHUNK); 233 if (connp->conn_ilg == NULL) { 234 *errp = ENOMEM; 235 return (NULL); 236 } 237 connp->conn_ilg_allocated = ILG_ALLOC_CHUNK; 238 connp->conn_ilg_inuse = 0; 239 } 240 if (connp->conn_ilg_inuse == connp->conn_ilg_allocated) { 241 if (connp->conn_ilg_walker_cnt != 0) { 242 /* 243 * XXX We cannot grow the array at this point 244 * because a list walker could be in progress, and 245 * we cannot wipe out the existing array until the 246 * walker is done. Just return NULL for now. 247 * ilg_delete_all() will have to be changed when 248 * this logic is changed. 249 */ 250 *errp = EBUSY; 251 return (NULL); 252 } 253 curcnt = connp->conn_ilg_allocated; 254 new = GETSTRUCT(ilg_t, curcnt + ILG_ALLOC_CHUNK); 255 if (new == NULL) { 256 *errp = ENOMEM; 257 return (NULL); 258 } 259 bcopy(connp->conn_ilg, new, sizeof (ilg_t) * curcnt); 260 mi_free((char *)connp->conn_ilg); 261 connp->conn_ilg = new; 262 connp->conn_ilg_allocated += ILG_ALLOC_CHUNK; 263 } 264 265 ret = &connp->conn_ilg[connp->conn_ilg_inuse++]; 266 ASSERT((ret->ilg_flags & ILG_DELETED) == 0); 267 bzero(ret, sizeof (*ret)); 268 return (ret); 269 } 270 271 typedef struct ilm_fbld_s { 272 ilm_t *fbld_ilm; 273 int fbld_in_cnt; 274 int fbld_ex_cnt; 275 slist_t fbld_in; 276 slist_t fbld_ex; 277 boolean_t fbld_in_overflow; 278 } ilm_fbld_t; 279 280 static void 281 ilm_bld_flists(conn_t *conn, void *arg) 282 { 283 int i; 284 ilm_fbld_t *fbld = (ilm_fbld_t *)(arg); 285 ilm_t *ilm = fbld->fbld_ilm; 286 in6_addr_t *v6group = &ilm->ilm_v6addr; 287 288 if (conn->conn_ilg_inuse == 0) 289 return; 290 291 /* 292 * Since we can't break out of the ipcl_walk once started, we still 293 * have to look at every conn. But if we've already found one 294 * (EXCLUDE, NULL) list, there's no need to keep checking individual 295 * ilgs--that will be our state. 296 */ 297 if (fbld->fbld_ex_cnt > 0 && fbld->fbld_ex.sl_numsrc == 0) 298 return; 299 300 /* 301 * Check this conn's ilgs to see if any are interested in our 302 * ilm (group, interface match). If so, update the master 303 * include and exclude lists we're building in the fbld struct 304 * with this ilg's filter info. 305 */ 306 mutex_enter(&conn->conn_lock); 307 for (i = 0; i < conn->conn_ilg_inuse; i++) { 308 ilg_t *ilg = &conn->conn_ilg[i]; 309 if ((ilg->ilg_ill == ilm->ilm_ill) && 310 (ilg->ilg_ipif == ilm->ilm_ipif) && 311 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) { 312 if (ilg->ilg_fmode == MODE_IS_INCLUDE) { 313 fbld->fbld_in_cnt++; 314 if (!fbld->fbld_in_overflow) 315 l_union_in_a(&fbld->fbld_in, 316 ilg->ilg_filter, 317 &fbld->fbld_in_overflow); 318 } else { 319 fbld->fbld_ex_cnt++; 320 /* 321 * On the first exclude list, don't try to do 322 * an intersection, as the master exclude list 323 * is intentionally empty. If the master list 324 * is still empty on later iterations, that 325 * means we have at least one ilg with an empty 326 * exclude list, so that should be reflected 327 * when we take the intersection. 328 */ 329 if (fbld->fbld_ex_cnt == 1) { 330 if (ilg->ilg_filter != NULL) 331 l_copy(ilg->ilg_filter, 332 &fbld->fbld_ex); 333 } else { 334 l_intersection_in_a(&fbld->fbld_ex, 335 ilg->ilg_filter); 336 } 337 } 338 /* there will only be one match, so break now. */ 339 break; 340 } 341 } 342 mutex_exit(&conn->conn_lock); 343 } 344 345 static void 346 ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, slist_t *flist) 347 { 348 ilm_fbld_t fbld; 349 ip_stack_t *ipst = ilm->ilm_ipst; 350 351 fbld.fbld_ilm = ilm; 352 fbld.fbld_in_cnt = fbld.fbld_ex_cnt = 0; 353 fbld.fbld_in.sl_numsrc = fbld.fbld_ex.sl_numsrc = 0; 354 fbld.fbld_in_overflow = B_FALSE; 355 356 /* first, construct our master include and exclude lists */ 357 ipcl_walk(ilm_bld_flists, (caddr_t)&fbld, ipst); 358 359 /* now use those master lists to generate the interface filter */ 360 361 /* if include list overflowed, filter is (EXCLUDE, NULL) */ 362 if (fbld.fbld_in_overflow) { 363 *fmode = MODE_IS_EXCLUDE; 364 flist->sl_numsrc = 0; 365 return; 366 } 367 368 /* if nobody interested, interface filter is (INCLUDE, NULL) */ 369 if (fbld.fbld_in_cnt == 0 && fbld.fbld_ex_cnt == 0) { 370 *fmode = MODE_IS_INCLUDE; 371 flist->sl_numsrc = 0; 372 return; 373 } 374 375 /* 376 * If there are no exclude lists, then the interface filter 377 * is INCLUDE, with its filter list equal to fbld_in. A single 378 * exclude list makes the interface filter EXCLUDE, with its 379 * filter list equal to (fbld_ex - fbld_in). 380 */ 381 if (fbld.fbld_ex_cnt == 0) { 382 *fmode = MODE_IS_INCLUDE; 383 l_copy(&fbld.fbld_in, flist); 384 } else { 385 *fmode = MODE_IS_EXCLUDE; 386 l_difference(&fbld.fbld_ex, &fbld.fbld_in, flist); 387 } 388 } 389 390 static int 391 ilm_update_add(ilm_t *ilm, ilg_stat_t ilgstat, slist_t *ilg_flist, 392 boolean_t isv6) 393 { 394 mcast_record_t fmode; 395 slist_t *flist; 396 boolean_t fdefault; 397 char buf[INET6_ADDRSTRLEN]; 398 ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill; 399 400 /* 401 * There are several cases where the ilm's filter state 402 * defaults to (EXCLUDE, NULL): 403 * - we've had previous joins without associated ilgs 404 * - this join has no associated ilg 405 * - the ilg's filter state is (EXCLUDE, NULL) 406 */ 407 fdefault = (ilm->ilm_no_ilg_cnt > 0) || 408 (ilgstat == ILGSTAT_NONE) || SLIST_IS_EMPTY(ilg_flist); 409 410 /* attempt mallocs (if needed) before doing anything else */ 411 if ((flist = l_alloc()) == NULL) 412 return (ENOMEM); 413 if (!fdefault && ilm->ilm_filter == NULL) { 414 ilm->ilm_filter = l_alloc(); 415 if (ilm->ilm_filter == NULL) { 416 l_free(flist); 417 return (ENOMEM); 418 } 419 } 420 421 if (ilgstat != ILGSTAT_CHANGE) 422 ilm->ilm_refcnt++; 423 424 if (ilgstat == ILGSTAT_NONE) 425 ilm->ilm_no_ilg_cnt++; 426 427 /* 428 * Determine new filter state. If it's not the default 429 * (EXCLUDE, NULL), we must walk the conn list to find 430 * any ilgs interested in this group, and re-build the 431 * ilm filter. 432 */ 433 if (fdefault) { 434 fmode = MODE_IS_EXCLUDE; 435 flist->sl_numsrc = 0; 436 } else { 437 ilm_gen_filter(ilm, &fmode, flist); 438 } 439 440 /* make sure state actually changed; nothing to do if not. */ 441 if ((ilm->ilm_fmode == fmode) && 442 !lists_are_different(ilm->ilm_filter, flist)) { 443 l_free(flist); 444 return (0); 445 } 446 447 /* send the state change report */ 448 if (!IS_LOOPBACK(ill)) { 449 if (isv6) 450 mld_statechange(ilm, fmode, flist); 451 else 452 igmp_statechange(ilm, fmode, flist); 453 } 454 455 /* update the ilm state */ 456 ilm->ilm_fmode = fmode; 457 if (flist->sl_numsrc > 0) 458 l_copy(flist, ilm->ilm_filter); 459 else 460 CLEAR_SLIST(ilm->ilm_filter); 461 462 ip1dbg(("ilm_update: new if filter mode %d, group %s\n", ilm->ilm_fmode, 463 inet_ntop(AF_INET6, &ilm->ilm_v6addr, buf, sizeof (buf)))); 464 465 l_free(flist); 466 return (0); 467 } 468 469 static int 470 ilm_update_del(ilm_t *ilm, boolean_t isv6) 471 { 472 mcast_record_t fmode; 473 slist_t *flist; 474 ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill; 475 476 ip1dbg(("ilm_update_del: still %d left; updating state\n", 477 ilm->ilm_refcnt)); 478 479 if ((flist = l_alloc()) == NULL) 480 return (ENOMEM); 481 482 /* 483 * If present, the ilg in question has already either been 484 * updated or removed from our list; so all we need to do 485 * now is walk the list to update the ilm filter state. 486 * 487 * Skip the list walk if we have any no-ilg joins, which 488 * cause the filter state to revert to (EXCLUDE, NULL). 489 */ 490 if (ilm->ilm_no_ilg_cnt != 0) { 491 fmode = MODE_IS_EXCLUDE; 492 flist->sl_numsrc = 0; 493 } else { 494 ilm_gen_filter(ilm, &fmode, flist); 495 } 496 497 /* check to see if state needs to be updated */ 498 if ((ilm->ilm_fmode == fmode) && 499 (!lists_are_different(ilm->ilm_filter, flist))) { 500 l_free(flist); 501 return (0); 502 } 503 504 if (!IS_LOOPBACK(ill)) { 505 if (isv6) 506 mld_statechange(ilm, fmode, flist); 507 else 508 igmp_statechange(ilm, fmode, flist); 509 } 510 511 ilm->ilm_fmode = fmode; 512 if (flist->sl_numsrc > 0) { 513 if (ilm->ilm_filter == NULL) { 514 ilm->ilm_filter = l_alloc(); 515 if (ilm->ilm_filter == NULL) { 516 char buf[INET6_ADDRSTRLEN]; 517 ip1dbg(("ilm_update_del: failed to alloc ilm " 518 "filter; no source filtering for %s on %s", 519 inet_ntop(AF_INET6, &ilm->ilm_v6addr, 520 buf, sizeof (buf)), ill->ill_name)); 521 ilm->ilm_fmode = MODE_IS_EXCLUDE; 522 l_free(flist); 523 return (0); 524 } 525 } 526 l_copy(flist, ilm->ilm_filter); 527 } else { 528 CLEAR_SLIST(ilm->ilm_filter); 529 } 530 531 l_free(flist); 532 return (0); 533 } 534 535 /* 536 * INADDR_ANY means all multicast addresses. 537 * INADDR_ANY is stored as IPv6 unspecified addr. 538 */ 539 int 540 ip_addmulti(ipaddr_t group, ipif_t *ipif, ilg_stat_t ilgstat, 541 mcast_record_t ilg_fmode, slist_t *ilg_flist) 542 { 543 ill_t *ill = ipif->ipif_ill; 544 ilm_t *ilm; 545 in6_addr_t v6group; 546 int ret; 547 548 ASSERT(IAM_WRITER_IPIF(ipif)); 549 550 if (!CLASSD(group) && group != INADDR_ANY) 551 return (EINVAL); 552 553 if (IS_UNDER_IPMP(ill)) 554 return (EINVAL); 555 556 /* 557 * INADDR_ANY is represented as the IPv6 unspecified addr. 558 */ 559 if (group == INADDR_ANY) 560 v6group = ipv6_all_zeros; 561 else 562 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 563 564 ilm = ilm_lookup_ipif(ipif, group); 565 /* 566 * Since we are writer, we know the ilm_flags itself cannot 567 * change at this point, and ilm_lookup_ipif would not have 568 * returned a DELETED ilm. However, the data path can free 569 * ilm->ilm_next via ilm_walker_cleanup() so we can safely 570 * access anything in ilm except ilm_next (for safe access to 571 * ilm_next we'd have to take the ill_lock). 572 */ 573 if (ilm != NULL) 574 return (ilm_update_add(ilm, ilgstat, ilg_flist, B_FALSE)); 575 576 ilm = ilm_add_v6(ipif, &v6group, ilgstat, ilg_fmode, ilg_flist, 577 ipif->ipif_zoneid); 578 if (ilm == NULL) 579 return (ENOMEM); 580 581 if (group == INADDR_ANY) { 582 /* 583 * Check how many ipif's have members in this group - 584 * if more then one we should not tell the driver to join 585 * this time 586 */ 587 if (ilm_numentries_v6(ill, &v6group) > 1) 588 return (0); 589 ret = ill_join_allmulti(ill); 590 if (ret != 0) 591 ilm_delete(ilm); 592 return (ret); 593 } 594 595 if (!IS_LOOPBACK(ill)) 596 igmp_joingroup(ilm); 597 598 if (ilm_numentries_v6(ill, &v6group) > 1) 599 return (0); 600 601 ret = ip_ll_addmulti_v6(ipif, &v6group); 602 if (ret != 0) 603 ilm_delete(ilm); 604 return (ret); 605 } 606 607 /* 608 * The unspecified address means all multicast addresses. 609 * 610 * ill identifies the interface to join on. 611 * 612 * ilgstat tells us if there's an ilg associated with this join, 613 * and if so, if it's a new ilg or a change to an existing one. 614 * ilg_fmode and ilg_flist give us the current filter state of 615 * the ilg (and will be EXCLUDE {NULL} in the case of no ilg). 616 */ 617 int 618 ip_addmulti_v6(const in6_addr_t *v6group, ill_t *ill, zoneid_t zoneid, 619 ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist) 620 { 621 ilm_t *ilm; 622 int ret; 623 624 ASSERT(IAM_WRITER_ILL(ill)); 625 626 if (!IN6_IS_ADDR_MULTICAST(v6group) && 627 !IN6_IS_ADDR_UNSPECIFIED(v6group)) { 628 return (EINVAL); 629 } 630 631 if (IS_UNDER_IPMP(ill) && !IN6_IS_ADDR_MC_SOLICITEDNODE(v6group)) 632 return (EINVAL); 633 634 /* 635 * An ilm is uniquely identified by the tuple of (group, ill) where 636 * `group' is the multicast group address, and `ill' is the interface 637 * on which it is currently joined. 638 */ 639 ilm = ilm_lookup_ill_v6(ill, v6group, B_TRUE, zoneid); 640 if (ilm != NULL) 641 return (ilm_update_add(ilm, ilgstat, ilg_flist, B_TRUE)); 642 643 ilm = ilm_add_v6(ill->ill_ipif, v6group, ilgstat, ilg_fmode, 644 ilg_flist, zoneid); 645 if (ilm == NULL) 646 return (ENOMEM); 647 648 if (IN6_IS_ADDR_UNSPECIFIED(v6group)) { 649 /* 650 * Check how many ipif's that have members in this group - 651 * if more then one we should not tell the driver to join 652 * this time 653 */ 654 if (ilm_numentries_v6(ill, v6group) > 1) 655 return (0); 656 ret = ill_join_allmulti(ill); 657 if (ret != 0) 658 ilm_delete(ilm); 659 return (ret); 660 } 661 662 if (!IS_LOOPBACK(ill)) 663 mld_joingroup(ilm); 664 665 /* 666 * If we have more then one we should not tell the driver 667 * to join this time. 668 */ 669 if (ilm_numentries_v6(ill, v6group) > 1) 670 return (0); 671 672 ret = ip_ll_addmulti_v6(ill->ill_ipif, v6group); 673 if (ret != 0) 674 ilm_delete(ilm); 675 return (ret); 676 } 677 678 /* 679 * Send a multicast request to the driver for enabling multicast reception 680 * for v6groupp address. The caller has already checked whether it is 681 * appropriate to send one or not. 682 */ 683 int 684 ip_ll_send_enabmulti_req(ill_t *ill, const in6_addr_t *v6groupp) 685 { 686 mblk_t *mp; 687 uint32_t addrlen, addroff; 688 char group_buf[INET6_ADDRSTRLEN]; 689 690 ASSERT(IAM_WRITER_ILL(ill)); 691 692 /* 693 * If we're on the IPMP ill, use the nominated multicast interface to 694 * send and receive DLPI messages, if one exists. (If none exists, 695 * there are no usable interfaces and thus nothing to do.) 696 */ 697 if (IS_IPMP(ill) && (ill = ipmp_illgrp_cast_ill(ill->ill_grp)) == NULL) 698 return (0); 699 700 /* 701 * Create a AR_ENTRY_SQUERY message with a dl_enabmulti_req tacked 702 * on. 703 */ 704 mp = ill_create_dl(ill, DL_ENABMULTI_REQ, sizeof (dl_enabmulti_req_t), 705 &addrlen, &addroff); 706 if (!mp) 707 return (ENOMEM); 708 if (IN6_IS_ADDR_V4MAPPED(v6groupp)) { 709 ipaddr_t v4group; 710 711 IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group); 712 /* 713 * NOTE!!! 714 * The "addroff" passed in here was calculated by 715 * ill_create_dl(), and will be used by ill_create_squery() 716 * to perform some twisted coding magic. It is the offset 717 * into the dl_xxx_req of the hw addr. Here, it will be 718 * added to b_wptr - b_rptr to create a magic number that 719 * is not an offset into this squery mblk. 720 * The actual hardware address will be accessed only in the 721 * dl_xxx_req, not in the squery. More importantly, 722 * that hardware address can *only* be accessed in this 723 * mblk chain by calling mi_offset_param_c(), which uses 724 * the magic number in the squery hw offset field to go 725 * to the *next* mblk (the dl_xxx_req), subtract the 726 * (b_wptr - b_rptr), and find the actual offset into 727 * the dl_xxx_req. 728 * Any method that depends on using the 729 * offset field in the dl_disabmulti_req or squery 730 * to find either hardware address will similarly fail. 731 * 732 * Look in ar_entry_squery() in arp.c to see how this offset 733 * is used. 734 */ 735 mp = ill_create_squery(ill, v4group, addrlen, addroff, mp); 736 if (!mp) 737 return (ENOMEM); 738 ip1dbg(("ip_ll_send_enabmulti_req: IPv4 putnext %s on %s\n", 739 inet_ntop(AF_INET6, v6groupp, group_buf, 740 sizeof (group_buf)), 741 ill->ill_name)); 742 putnext(ill->ill_rq, mp); 743 } else { 744 ip1dbg(("ip_ll_send_enabmulti_req: IPv6 ndp_mcastreq %s on" 745 " %s\n", 746 inet_ntop(AF_INET6, v6groupp, group_buf, 747 sizeof (group_buf)), 748 ill->ill_name)); 749 return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp)); 750 } 751 return (0); 752 } 753 754 /* 755 * Send a multicast request to the driver for enabling multicast 756 * membership for v6group if appropriate. 757 */ 758 static int 759 ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *v6groupp) 760 { 761 ill_t *ill = ipif->ipif_ill; 762 763 ASSERT(IAM_WRITER_IPIF(ipif)); 764 765 if (ill->ill_net_type != IRE_IF_RESOLVER || 766 ipif->ipif_flags & IPIF_POINTOPOINT) { 767 ip1dbg(("ip_ll_addmulti_v6: not resolver\n")); 768 return (0); /* Must be IRE_IF_NORESOLVER */ 769 } 770 771 if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) { 772 ip1dbg(("ip_ll_addmulti_v6: MULTI_BCAST\n")); 773 return (0); 774 } 775 if (!ill->ill_dl_up) { 776 /* 777 * Nobody there. All multicast addresses will be re-joined 778 * when we get the DL_BIND_ACK bringing the interface up. 779 */ 780 ip1dbg(("ip_ll_addmulti_v6: nobody up\n")); 781 return (0); 782 } 783 return (ip_ll_send_enabmulti_req(ill, v6groupp)); 784 } 785 786 /* 787 * INADDR_ANY means all multicast addresses. 788 * INADDR_ANY is stored as the IPv6 unspecified addr. 789 */ 790 int 791 ip_delmulti(ipaddr_t group, ipif_t *ipif, boolean_t no_ilg, boolean_t leaving) 792 { 793 ill_t *ill = ipif->ipif_ill; 794 ilm_t *ilm; 795 in6_addr_t v6group; 796 797 ASSERT(IAM_WRITER_IPIF(ipif)); 798 799 if (!CLASSD(group) && group != INADDR_ANY) 800 return (EINVAL); 801 802 /* 803 * INADDR_ANY is represented as the IPv6 unspecified addr. 804 */ 805 if (group == INADDR_ANY) 806 v6group = ipv6_all_zeros; 807 else 808 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 809 810 /* 811 * Look for a match on the ipif. 812 * (IP_DROP_MEMBERSHIP specifies an ipif using an IP address). 813 */ 814 ilm = ilm_lookup_ipif(ipif, group); 815 if (ilm == NULL) 816 return (ENOENT); 817 818 /* Update counters */ 819 if (no_ilg) 820 ilm->ilm_no_ilg_cnt--; 821 822 if (leaving) 823 ilm->ilm_refcnt--; 824 825 if (ilm->ilm_refcnt > 0) 826 return (ilm_update_del(ilm, B_FALSE)); 827 828 if (group == INADDR_ANY) { 829 ilm_delete(ilm); 830 /* 831 * Check how many ipif's that have members in this group - 832 * if there are still some left then don't tell the driver 833 * to drop it. 834 */ 835 if (ilm_numentries_v6(ill, &v6group) != 0) 836 return (0); 837 838 /* If we never joined, then don't leave. */ 839 if (ill->ill_join_allmulti) 840 ill_leave_allmulti(ill); 841 842 return (0); 843 } 844 845 if (!IS_LOOPBACK(ill)) 846 igmp_leavegroup(ilm); 847 848 ilm_delete(ilm); 849 /* 850 * Check how many ipif's that have members in this group - 851 * if there are still some left then don't tell the driver 852 * to drop it. 853 */ 854 if (ilm_numentries_v6(ill, &v6group) != 0) 855 return (0); 856 return (ip_ll_delmulti_v6(ipif, &v6group)); 857 } 858 859 /* 860 * The unspecified address means all multicast addresses. 861 */ 862 int 863 ip_delmulti_v6(const in6_addr_t *v6group, ill_t *ill, zoneid_t zoneid, 864 boolean_t no_ilg, boolean_t leaving) 865 { 866 ipif_t *ipif; 867 ilm_t *ilm; 868 869 ASSERT(IAM_WRITER_ILL(ill)); 870 871 if (!IN6_IS_ADDR_MULTICAST(v6group) && 872 !IN6_IS_ADDR_UNSPECIFIED(v6group)) 873 return (EINVAL); 874 875 /* 876 * Look for a match on the ill. 877 */ 878 ilm = ilm_lookup_ill_v6(ill, v6group, B_TRUE, zoneid); 879 if (ilm == NULL) 880 return (ENOENT); 881 882 ASSERT(ilm->ilm_ill == ill); 883 884 ipif = ill->ill_ipif; 885 886 /* Update counters */ 887 if (no_ilg) 888 ilm->ilm_no_ilg_cnt--; 889 890 if (leaving) 891 ilm->ilm_refcnt--; 892 893 if (ilm->ilm_refcnt > 0) 894 return (ilm_update_del(ilm, B_TRUE)); 895 896 if (IN6_IS_ADDR_UNSPECIFIED(v6group)) { 897 ilm_delete(ilm); 898 /* 899 * Check how many ipif's that have members in this group - 900 * if there are still some left then don't tell the driver 901 * to drop it. 902 */ 903 if (ilm_numentries_v6(ill, v6group) != 0) 904 return (0); 905 906 /* If we never joined, then don't leave. */ 907 if (ill->ill_join_allmulti) 908 ill_leave_allmulti(ill); 909 910 return (0); 911 } 912 913 if (!IS_LOOPBACK(ill)) 914 mld_leavegroup(ilm); 915 916 ilm_delete(ilm); 917 /* 918 * Check how many ipif's that have members in this group - 919 * if there are still some left then don't tell the driver 920 * to drop it. 921 */ 922 if (ilm_numentries_v6(ill, v6group) != 0) 923 return (0); 924 return (ip_ll_delmulti_v6(ipif, v6group)); 925 } 926 927 /* 928 * Send a multicast request to the driver for disabling multicast reception 929 * for v6groupp address. The caller has already checked whether it is 930 * appropriate to send one or not. 931 */ 932 int 933 ip_ll_send_disabmulti_req(ill_t *ill, const in6_addr_t *v6groupp) 934 { 935 mblk_t *mp; 936 char group_buf[INET6_ADDRSTRLEN]; 937 uint32_t addrlen, addroff; 938 939 ASSERT(IAM_WRITER_ILL(ill)); 940 941 /* 942 * See comment in ip_ll_send_enabmulti_req(). 943 */ 944 if (IS_IPMP(ill) && (ill = ipmp_illgrp_cast_ill(ill->ill_grp)) == NULL) 945 return (0); 946 947 /* 948 * Create a AR_ENTRY_SQUERY message with a dl_disabmulti_req tacked 949 * on. 950 */ 951 mp = ill_create_dl(ill, DL_DISABMULTI_REQ, 952 sizeof (dl_disabmulti_req_t), &addrlen, &addroff); 953 954 if (!mp) 955 return (ENOMEM); 956 957 if (IN6_IS_ADDR_V4MAPPED(v6groupp)) { 958 ipaddr_t v4group; 959 960 IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group); 961 /* 962 * NOTE!!! 963 * The "addroff" passed in here was calculated by 964 * ill_create_dl(), and will be used by ill_create_squery() 965 * to perform some twisted coding magic. It is the offset 966 * into the dl_xxx_req of the hw addr. Here, it will be 967 * added to b_wptr - b_rptr to create a magic number that 968 * is not an offset into this mblk. 969 * 970 * Please see the comment in ip_ll_send)enabmulti_req() 971 * for a complete explanation. 972 * 973 * Look in ar_entry_squery() in arp.c to see how this offset 974 * is used. 975 */ 976 mp = ill_create_squery(ill, v4group, addrlen, addroff, mp); 977 if (!mp) 978 return (ENOMEM); 979 ip1dbg(("ip_ll_send_disabmulti_req: IPv4 putnext %s on %s\n", 980 inet_ntop(AF_INET6, v6groupp, group_buf, 981 sizeof (group_buf)), 982 ill->ill_name)); 983 putnext(ill->ill_rq, mp); 984 } else { 985 ip1dbg(("ip_ll_send_disabmulti_req: IPv6 ndp_mcastreq %s on" 986 " %s\n", 987 inet_ntop(AF_INET6, v6groupp, group_buf, 988 sizeof (group_buf)), 989 ill->ill_name)); 990 return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp)); 991 } 992 return (0); 993 } 994 995 /* 996 * Send a multicast request to the driver for disabling multicast 997 * membership for v6group if appropriate. 998 */ 999 static int 1000 ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *v6group) 1001 { 1002 ill_t *ill = ipif->ipif_ill; 1003 1004 ASSERT(IAM_WRITER_IPIF(ipif)); 1005 1006 if (ill->ill_net_type != IRE_IF_RESOLVER || 1007 ipif->ipif_flags & IPIF_POINTOPOINT) { 1008 return (0); /* Must be IRE_IF_NORESOLVER */ 1009 } 1010 if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) { 1011 ip1dbg(("ip_ll_delmulti_v6: MULTI_BCAST\n")); 1012 return (0); 1013 } 1014 if (!ill->ill_dl_up) { 1015 /* 1016 * Nobody there. All multicast addresses will be re-joined 1017 * when we get the DL_BIND_ACK bringing the interface up. 1018 */ 1019 ip1dbg(("ip_ll_delmulti_v6: nobody up\n")); 1020 return (0); 1021 } 1022 return (ip_ll_send_disabmulti_req(ill, v6group)); 1023 } 1024 1025 /* 1026 * Make the driver pass up all multicast packets. NOTE: to keep callers 1027 * IPMP-unaware, if an IPMP ill is passed in, the ill_join_allmulti flag is 1028 * set on it (rather than the cast ill). 1029 */ 1030 int 1031 ill_join_allmulti(ill_t *ill) 1032 { 1033 mblk_t *promiscon_mp, *promiscoff_mp; 1034 uint32_t addrlen, addroff; 1035 ill_t *join_ill = ill; 1036 1037 ASSERT(IAM_WRITER_ILL(ill)); 1038 1039 if (!ill->ill_dl_up) { 1040 /* 1041 * Nobody there. All multicast addresses will be re-joined 1042 * when we get the DL_BIND_ACK bringing the interface up. 1043 */ 1044 return (0); 1045 } 1046 1047 /* 1048 * See comment in ip_ll_send_enabmulti_req(). 1049 */ 1050 if (IS_IPMP(ill) && (ill = ipmp_illgrp_cast_ill(ill->ill_grp)) == NULL) 1051 return (0); 1052 1053 ASSERT(!join_ill->ill_join_allmulti); 1054 1055 /* 1056 * Create a DL_PROMISCON_REQ message and send it directly to the DLPI 1057 * provider. We don't need to do this for certain media types for 1058 * which we never need to turn promiscuous mode on. While we're here, 1059 * pre-allocate a DL_PROMISCOFF_REQ message to make sure that 1060 * ill_leave_allmulti() will not fail due to low memory conditions. 1061 */ 1062 if ((ill->ill_net_type == IRE_IF_RESOLVER) && 1063 !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) { 1064 promiscon_mp = ill_create_dl(ill, DL_PROMISCON_REQ, 1065 sizeof (dl_promiscon_req_t), &addrlen, &addroff); 1066 promiscoff_mp = ill_create_dl(ill, DL_PROMISCOFF_REQ, 1067 sizeof (dl_promiscoff_req_t), &addrlen, &addroff); 1068 if (promiscon_mp == NULL || promiscoff_mp == NULL) { 1069 freemsg(promiscon_mp); 1070 freemsg(promiscoff_mp); 1071 return (ENOMEM); 1072 } 1073 ill->ill_promiscoff_mp = promiscoff_mp; 1074 ill_dlpi_send(ill, promiscon_mp); 1075 } 1076 1077 join_ill->ill_join_allmulti = B_TRUE; 1078 return (0); 1079 } 1080 1081 /* 1082 * Make the driver stop passing up all multicast packets 1083 */ 1084 void 1085 ill_leave_allmulti(ill_t *ill) 1086 { 1087 mblk_t *promiscoff_mp; 1088 ill_t *leave_ill = ill; 1089 1090 ASSERT(IAM_WRITER_ILL(ill)); 1091 1092 if (!ill->ill_dl_up) { 1093 /* 1094 * Nobody there. All multicast addresses will be re-joined 1095 * when we get the DL_BIND_ACK bringing the interface up. 1096 */ 1097 return; 1098 } 1099 1100 /* 1101 * See comment in ip_ll_send_enabmulti_req(). 1102 */ 1103 if (IS_IPMP(ill) && (ill = ipmp_illgrp_cast_ill(ill->ill_grp)) == NULL) 1104 return; 1105 1106 ASSERT(leave_ill->ill_join_allmulti); 1107 1108 /* 1109 * Create a DL_PROMISCOFF_REQ message and send it directly to 1110 * the DLPI provider. We don't need to do this for certain 1111 * media types for which we never need to turn promiscuous 1112 * mode on. 1113 */ 1114 if ((ill->ill_net_type == IRE_IF_RESOLVER) && 1115 !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) { 1116 promiscoff_mp = ill->ill_promiscoff_mp; 1117 ASSERT(promiscoff_mp != NULL); 1118 ill->ill_promiscoff_mp = NULL; 1119 ill_dlpi_send(ill, promiscoff_mp); 1120 } 1121 1122 leave_ill->ill_join_allmulti = B_FALSE; 1123 } 1124 1125 static ill_t * 1126 ipsq_enter_byifindex(uint_t ifindex, boolean_t isv6, ip_stack_t *ipst) 1127 { 1128 ill_t *ill; 1129 boolean_t in_ipsq; 1130 1131 ill = ill_lookup_on_ifindex(ifindex, isv6, NULL, NULL, NULL, NULL, 1132 ipst); 1133 if (ill != NULL) { 1134 if (!ill_waiter_inc(ill)) { 1135 ill_refrele(ill); 1136 return (NULL); 1137 } 1138 ill_refrele(ill); 1139 in_ipsq = ipsq_enter(ill, B_FALSE, NEW_OP); 1140 ill_waiter_dcr(ill); 1141 if (!in_ipsq) 1142 ill = NULL; 1143 } 1144 return (ill); 1145 } 1146 1147 int 1148 ip_join_allmulti(uint_t ifindex, boolean_t isv6, ip_stack_t *ipst) 1149 { 1150 ill_t *ill; 1151 int ret = 0; 1152 1153 if ((ill = ipsq_enter_byifindex(ifindex, isv6, ipst)) == NULL) 1154 return (ENODEV); 1155 1156 /* 1157 * The ip_addmulti*() functions won't allow IPMP underlying interfaces 1158 * to join allmulti since only the nominated underlying interface in 1159 * the group should receive multicast. We silently succeed to avoid 1160 * having to teach IPobs (currently the only caller of this routine) 1161 * to ignore failures in this case. 1162 */ 1163 if (IS_UNDER_IPMP(ill)) 1164 goto out; 1165 1166 if (isv6) { 1167 ret = ip_addmulti_v6(&ipv6_all_zeros, ill, ill->ill_zoneid, 1168 ILGSTAT_NONE, MODE_IS_EXCLUDE, NULL); 1169 } else { 1170 ret = ip_addmulti(INADDR_ANY, ill->ill_ipif, ILGSTAT_NONE, 1171 MODE_IS_EXCLUDE, NULL); 1172 } 1173 ill->ill_ipallmulti_cnt++; 1174 out: 1175 ipsq_exit(ill->ill_phyint->phyint_ipsq); 1176 return (ret); 1177 } 1178 1179 1180 int 1181 ip_leave_allmulti(uint_t ifindex, boolean_t isv6, ip_stack_t *ipst) 1182 { 1183 ill_t *ill; 1184 1185 if ((ill = ipsq_enter_byifindex(ifindex, isv6, ipst)) == NULL) 1186 return (ENODEV); 1187 1188 if (ill->ill_ipallmulti_cnt > 0) { 1189 if (isv6) { 1190 (void) ip_delmulti_v6(&ipv6_all_zeros, ill, 1191 ill->ill_zoneid, B_TRUE, B_TRUE); 1192 } else { 1193 (void) ip_delmulti(INADDR_ANY, ill->ill_ipif, B_TRUE, 1194 B_TRUE); 1195 } 1196 ill->ill_ipallmulti_cnt--; 1197 } 1198 ipsq_exit(ill->ill_phyint->phyint_ipsq); 1199 return (0); 1200 } 1201 1202 /* 1203 * Delete the allmulti memberships that were added as part of 1204 * ip_join_allmulti(). 1205 */ 1206 void 1207 ip_purge_allmulti(ill_t *ill) 1208 { 1209 ASSERT(IAM_WRITER_ILL(ill)); 1210 1211 for (; ill->ill_ipallmulti_cnt > 0; ill->ill_ipallmulti_cnt--) { 1212 if (ill->ill_isv6) { 1213 (void) ip_delmulti_v6(&ipv6_all_zeros, ill, 1214 ill->ill_zoneid, B_TRUE, B_TRUE); 1215 } else { 1216 (void) ip_delmulti(INADDR_ANY, ill->ill_ipif, B_TRUE, 1217 B_TRUE); 1218 } 1219 } 1220 } 1221 1222 /* 1223 * Copy mp_orig and pass it in as a local message. 1224 */ 1225 void 1226 ip_multicast_loopback(queue_t *q, ill_t *ill, mblk_t *mp_orig, int fanout_flags, 1227 zoneid_t zoneid) 1228 { 1229 mblk_t *mp; 1230 mblk_t *ipsec_mp; 1231 ipha_t *iph; 1232 ip_stack_t *ipst = ill->ill_ipst; 1233 1234 if (DB_TYPE(mp_orig) == M_DATA && 1235 ((ipha_t *)mp_orig->b_rptr)->ipha_protocol == IPPROTO_UDP) { 1236 uint_t hdrsz; 1237 1238 hdrsz = IPH_HDR_LENGTH((ipha_t *)mp_orig->b_rptr) + 1239 sizeof (udpha_t); 1240 ASSERT(MBLKL(mp_orig) >= hdrsz); 1241 1242 if (((mp = allocb(hdrsz, BPRI_MED)) != NULL) && 1243 (mp_orig = dupmsg(mp_orig)) != NULL) { 1244 bcopy(mp_orig->b_rptr, mp->b_rptr, hdrsz); 1245 mp->b_wptr += hdrsz; 1246 mp->b_cont = mp_orig; 1247 mp_orig->b_rptr += hdrsz; 1248 if (is_system_labeled() && DB_CRED(mp_orig) != NULL) 1249 mblk_setcred(mp, DB_CRED(mp_orig)); 1250 if (MBLKL(mp_orig) == 0) { 1251 mp->b_cont = mp_orig->b_cont; 1252 mp_orig->b_cont = NULL; 1253 freeb(mp_orig); 1254 } 1255 } else if (mp != NULL) { 1256 freeb(mp); 1257 mp = NULL; 1258 } 1259 } else { 1260 mp = ip_copymsg(mp_orig); /* No refcnt on ipsec_out netstack */ 1261 } 1262 1263 if (mp == NULL) 1264 return; 1265 if (DB_TYPE(mp) == M_CTL) { 1266 ipsec_mp = mp; 1267 mp = mp->b_cont; 1268 } else { 1269 ipsec_mp = mp; 1270 } 1271 1272 iph = (ipha_t *)mp->b_rptr; 1273 1274 /* 1275 * DTrace this as ip:::send. A blocked packet will fire the send 1276 * probe, but not the receive probe. 1277 */ 1278 DTRACE_IP7(send, mblk_t *, ipsec_mp, conn_t *, NULL, void_ip_t *, iph, 1279 __dtrace_ipsr_ill_t *, ill, ipha_t *, iph, ip6_t *, NULL, int, 1); 1280 1281 DTRACE_PROBE4(ip4__loopback__out__start, 1282 ill_t *, NULL, ill_t *, ill, 1283 ipha_t *, iph, mblk_t *, ipsec_mp); 1284 1285 FW_HOOKS(ipst->ips_ip4_loopback_out_event, 1286 ipst->ips_ipv4firewall_loopback_out, 1287 NULL, ill, iph, ipsec_mp, mp, HPE_MULTICAST, ipst); 1288 1289 DTRACE_PROBE1(ip4__loopback__out__end, mblk_t *, ipsec_mp); 1290 1291 if (ipsec_mp != NULL) 1292 ip_wput_local(q, ill, iph, ipsec_mp, NULL, 1293 fanout_flags, zoneid); 1294 } 1295 1296 static area_t ip_aresq_template = { 1297 AR_ENTRY_SQUERY, /* cmd */ 1298 sizeof (area_t)+IP_ADDR_LEN, /* name offset */ 1299 sizeof (area_t), /* name len (filled by ill_arp_alloc) */ 1300 IP_ARP_PROTO_TYPE, /* protocol, from arps perspective */ 1301 sizeof (area_t), /* proto addr offset */ 1302 IP_ADDR_LEN, /* proto addr_length */ 1303 0, /* proto mask offset */ 1304 /* Rest is initialized when used */ 1305 0, /* flags */ 1306 0, /* hw addr offset */ 1307 0, /* hw addr length */ 1308 }; 1309 1310 static mblk_t * 1311 ill_create_squery(ill_t *ill, ipaddr_t ipaddr, uint32_t addrlen, 1312 uint32_t addroff, mblk_t *mp_tail) 1313 { 1314 mblk_t *mp; 1315 area_t *area; 1316 1317 mp = ill_arp_alloc(ill, (uchar_t *)&ip_aresq_template, 1318 (caddr_t)&ipaddr); 1319 if (!mp) { 1320 freemsg(mp_tail); 1321 return (NULL); 1322 } 1323 area = (area_t *)mp->b_rptr; 1324 area->area_hw_addr_length = addrlen; 1325 area->area_hw_addr_offset = mp->b_wptr - mp->b_rptr + addroff; 1326 /* 1327 * NOTE! 1328 * 1329 * The area_hw_addr_offset, as can be seen, does not hold the 1330 * actual hardware address offset. Rather, it holds the offset 1331 * to the hw addr in the dl_xxx_req in mp_tail, modified by 1332 * adding (mp->b_wptr - mp->b_rptr). This allows the function 1333 * mi_offset_paramc() to find the hardware address in the 1334 * *second* mblk (dl_xxx_req), not this mblk. 1335 * 1336 * Using mi_offset_paramc() is thus the *only* way to access 1337 * the dl_xxx_hw address. 1338 * 1339 * The squery hw address should *not* be accessed. 1340 * 1341 * See ar_entry_squery() in arp.c for an example of how all this works. 1342 */ 1343 1344 mp->b_cont = mp_tail; 1345 return (mp); 1346 } 1347 1348 /* 1349 * Create a DLPI message; for DL_{ENAB,DISAB}MULTI_REQ, room is left for 1350 * the hardware address. 1351 */ 1352 static mblk_t * 1353 ill_create_dl(ill_t *ill, uint32_t dl_primitive, uint32_t length, 1354 uint32_t *addr_lenp, uint32_t *addr_offp) 1355 { 1356 mblk_t *mp; 1357 uint32_t hw_addr_length; 1358 char *cp; 1359 uint32_t offset; 1360 uint32_t size; 1361 1362 *addr_lenp = *addr_offp = 0; 1363 1364 hw_addr_length = ill->ill_phys_addr_length; 1365 if (!hw_addr_length) { 1366 ip0dbg(("ip_create_dl: hw addr length = 0\n")); 1367 return (NULL); 1368 } 1369 1370 size = length; 1371 switch (dl_primitive) { 1372 case DL_ENABMULTI_REQ: 1373 case DL_DISABMULTI_REQ: 1374 size += hw_addr_length; 1375 break; 1376 case DL_PROMISCON_REQ: 1377 case DL_PROMISCOFF_REQ: 1378 break; 1379 default: 1380 return (NULL); 1381 } 1382 mp = allocb(size, BPRI_HI); 1383 if (!mp) 1384 return (NULL); 1385 mp->b_wptr += size; 1386 mp->b_datap->db_type = M_PROTO; 1387 1388 cp = (char *)mp->b_rptr; 1389 offset = length; 1390 1391 switch (dl_primitive) { 1392 case DL_ENABMULTI_REQ: { 1393 dl_enabmulti_req_t *dl = (dl_enabmulti_req_t *)cp; 1394 1395 dl->dl_primitive = dl_primitive; 1396 dl->dl_addr_offset = offset; 1397 *addr_lenp = dl->dl_addr_length = hw_addr_length; 1398 *addr_offp = offset; 1399 break; 1400 } 1401 case DL_DISABMULTI_REQ: { 1402 dl_disabmulti_req_t *dl = (dl_disabmulti_req_t *)cp; 1403 1404 dl->dl_primitive = dl_primitive; 1405 dl->dl_addr_offset = offset; 1406 *addr_lenp = dl->dl_addr_length = hw_addr_length; 1407 *addr_offp = offset; 1408 break; 1409 } 1410 case DL_PROMISCON_REQ: 1411 case DL_PROMISCOFF_REQ: { 1412 dl_promiscon_req_t *dl = (dl_promiscon_req_t *)cp; 1413 1414 dl->dl_primitive = dl_primitive; 1415 dl->dl_level = DL_PROMISC_MULTI; 1416 break; 1417 } 1418 } 1419 ip1dbg(("ill_create_dl: addr_len %d, addr_off %d\n", 1420 *addr_lenp, *addr_offp)); 1421 return (mp); 1422 } 1423 1424 /* 1425 * Writer processing for ip_wput_ctl(): send the DL_{ENAB,DISAB}MULTI_REQ 1426 * messages that had been delayed until we'd heard back from ARP. One catch: 1427 * we need to ensure that no one else becomes writer on the IPSQ before we've 1428 * received the replies, or they'll incorrectly process our replies as part of 1429 * their unrelated IPSQ operation. To do this, we start a new IPSQ operation, 1430 * which will complete when we process the reply in ip_rput_dlpi_writer(). 1431 */ 1432 /* ARGSUSED */ 1433 static void 1434 ip_wput_ctl_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *arg) 1435 { 1436 ill_t *ill = q->q_ptr; 1437 t_uscalar_t prim = ((union DL_primitives *)mp->b_rptr)->dl_primitive; 1438 1439 ASSERT(IAM_WRITER_ILL(ill)); 1440 ASSERT(prim == DL_ENABMULTI_REQ || prim == DL_DISABMULTI_REQ); 1441 ip1dbg(("ip_wput_ctl_writer: %s\n", dl_primstr(prim))); 1442 1443 if (prim == DL_ENABMULTI_REQ) { 1444 /* Track the state if this is the first enabmulti */ 1445 if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN) 1446 ill->ill_dlpi_multicast_state = IDS_INPROGRESS; 1447 } 1448 1449 ipsq_current_start(ipsq, ill->ill_ipif, 0); 1450 ill_dlpi_send(ill, mp); 1451 } 1452 1453 void 1454 ip_wput_ctl(queue_t *q, mblk_t *mp) 1455 { 1456 ill_t *ill = q->q_ptr; 1457 mblk_t *dlmp = mp->b_cont; 1458 area_t *area = (area_t *)mp->b_rptr; 1459 t_uscalar_t prim; 1460 1461 /* Check that we have an AR_ENTRY_SQUERY with a tacked on mblk */ 1462 if (MBLKL(mp) < sizeof (area_t) || area->area_cmd != AR_ENTRY_SQUERY || 1463 dlmp == NULL) { 1464 putnext(q, mp); 1465 return; 1466 } 1467 1468 /* Check that the tacked on mblk is a DL_{DISAB,ENAB}MULTI_REQ */ 1469 prim = ((union DL_primitives *)dlmp->b_rptr)->dl_primitive; 1470 if (prim != DL_DISABMULTI_REQ && prim != DL_ENABMULTI_REQ) { 1471 putnext(q, mp); 1472 return; 1473 } 1474 freeb(mp); 1475 1476 /* See comments above ip_wput_ctl_writer() for details */ 1477 ill_refhold(ill); 1478 qwriter_ip(ill, ill->ill_wq, dlmp, ip_wput_ctl_writer, NEW_OP, B_FALSE); 1479 } 1480 1481 /* 1482 * Rejoin any groups which have been explicitly joined by the application (we 1483 * left all explicitly joined groups as part of ill_leave_multicast() prior to 1484 * bringing the interface down). Note that because groups can be joined and 1485 * left while an interface is down, this may not be the same set of groups 1486 * that we left in ill_leave_multicast(). 1487 */ 1488 void 1489 ill_recover_multicast(ill_t *ill) 1490 { 1491 ilm_t *ilm; 1492 ipif_t *ipif = ill->ill_ipif; 1493 char addrbuf[INET6_ADDRSTRLEN]; 1494 1495 ASSERT(IAM_WRITER_ILL(ill)); 1496 1497 ill->ill_need_recover_multicast = 0; 1498 1499 ill_ilm_walker_hold(ill); 1500 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1501 /* 1502 * Check how many ipif's that have members in this group - 1503 * if more then one we make sure that this entry is first 1504 * in the list. 1505 */ 1506 if (ilm_numentries_v6(ill, &ilm->ilm_v6addr) > 1 && 1507 ilm_lookup_ill_v6(ill, &ilm->ilm_v6addr, B_TRUE, 1508 ALL_ZONES) != ilm) { 1509 continue; 1510 } 1511 1512 ip1dbg(("ill_recover_multicast: %s\n", inet_ntop(AF_INET6, 1513 &ilm->ilm_v6addr, addrbuf, sizeof (addrbuf)))); 1514 1515 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) { 1516 (void) ill_join_allmulti(ill); 1517 } else { 1518 if (ill->ill_isv6) 1519 mld_joingroup(ilm); 1520 else 1521 igmp_joingroup(ilm); 1522 1523 (void) ip_ll_addmulti_v6(ipif, &ilm->ilm_v6addr); 1524 } 1525 } 1526 ill_ilm_walker_rele(ill); 1527 1528 } 1529 1530 /* 1531 * The opposite of ill_recover_multicast() -- leaves all multicast groups 1532 * that were explicitly joined. 1533 */ 1534 void 1535 ill_leave_multicast(ill_t *ill) 1536 { 1537 ilm_t *ilm; 1538 ipif_t *ipif = ill->ill_ipif; 1539 char addrbuf[INET6_ADDRSTRLEN]; 1540 1541 ASSERT(IAM_WRITER_ILL(ill)); 1542 1543 ill->ill_need_recover_multicast = 1; 1544 1545 ill_ilm_walker_hold(ill); 1546 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1547 /* 1548 * Check how many ipif's that have members in this group - 1549 * if more then one we make sure that this entry is first 1550 * in the list. 1551 */ 1552 if (ilm_numentries_v6(ill, &ilm->ilm_v6addr) > 1 && 1553 ilm_lookup_ill_v6(ill, &ilm->ilm_v6addr, B_TRUE, 1554 ALL_ZONES) != ilm) { 1555 continue; 1556 } 1557 1558 ip1dbg(("ill_leave_multicast: %s\n", inet_ntop(AF_INET6, 1559 &ilm->ilm_v6addr, addrbuf, sizeof (addrbuf)))); 1560 1561 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) { 1562 ill_leave_allmulti(ill); 1563 } else { 1564 if (ill->ill_isv6) 1565 mld_leavegroup(ilm); 1566 else 1567 igmp_leavegroup(ilm); 1568 1569 (void) ip_ll_delmulti_v6(ipif, &ilm->ilm_v6addr); 1570 } 1571 } 1572 ill_ilm_walker_rele(ill); 1573 } 1574 1575 /* Find an ilm for matching the ill */ 1576 ilm_t * 1577 ilm_lookup_ill(ill_t *ill, ipaddr_t group, zoneid_t zoneid) 1578 { 1579 in6_addr_t v6group; 1580 1581 /* 1582 * INADDR_ANY is represented as the IPv6 unspecified addr. 1583 */ 1584 if (group == INADDR_ANY) 1585 v6group = ipv6_all_zeros; 1586 else 1587 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 1588 1589 return (ilm_lookup_ill_v6(ill, &v6group, B_TRUE, zoneid)); 1590 } 1591 1592 /* 1593 * Find an ilm for address `v6group' on `ill' and zone `zoneid' (which may be 1594 * ALL_ZONES). In general, if `ill' is in an IPMP group, we will match 1595 * against any ill in the group. However, if `restrict_solicited' is set, 1596 * then specifically for IPv6 solicited-node multicast, the match will be 1597 * restricted to the specified `ill'. 1598 */ 1599 ilm_t * 1600 ilm_lookup_ill_v6(ill_t *ill, const in6_addr_t *v6group, 1601 boolean_t restrict_solicited, zoneid_t zoneid) 1602 { 1603 ilm_t *ilm; 1604 ilm_walker_t ilw; 1605 boolean_t restrict_ill = B_FALSE; 1606 1607 /* 1608 * In general, underlying interfaces cannot have multicast memberships 1609 * and thus lookups always match across the illgrp. However, we must 1610 * allow IPv6 solicited-node multicast memberships on underlying 1611 * interfaces, and thus an IPMP meta-interface and one of its 1612 * underlying ills may have the same solicited-node multicast address. 1613 * In that case, we need to restrict the lookup to the requested ill. 1614 * However, we may receive packets on an underlying interface that 1615 * are for the corresponding IPMP interface's solicited-node multicast 1616 * address, and thus in that case we need to match across the group -- 1617 * hence the unfortunate `restrict_solicited' argument. 1618 */ 1619 if (IN6_IS_ADDR_MC_SOLICITEDNODE(v6group) && restrict_solicited) 1620 restrict_ill = (IS_IPMP(ill) || IS_UNDER_IPMP(ill)); 1621 1622 ilm = ilm_walker_start(&ilw, ill); 1623 for (; ilm != NULL; ilm = ilm_walker_step(&ilw, ilm)) { 1624 if (!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group)) 1625 continue; 1626 if (zoneid != ALL_ZONES && zoneid != ilm->ilm_zoneid) 1627 continue; 1628 if (!restrict_ill || ill == (ill->ill_isv6 ? 1629 ilm->ilm_ill : ilm->ilm_ipif->ipif_ill)) { 1630 break; 1631 } 1632 } 1633 ilm_walker_finish(&ilw); 1634 return (ilm); 1635 } 1636 1637 /* 1638 * Find an ilm for the ipif. Only needed for IPv4 which does 1639 * ipif specific socket options. 1640 */ 1641 ilm_t * 1642 ilm_lookup_ipif(ipif_t *ipif, ipaddr_t group) 1643 { 1644 ilm_t *ilm; 1645 ilm_walker_t ilw; 1646 1647 ilm = ilm_walker_start(&ilw, ipif->ipif_ill); 1648 for (; ilm != NULL; ilm = ilm_walker_step(&ilw, ilm)) { 1649 if (ilm->ilm_ipif == ipif && ilm->ilm_addr == group) 1650 break; 1651 } 1652 ilm_walker_finish(&ilw); 1653 return (ilm); 1654 } 1655 1656 /* 1657 * How many members on this ill? 1658 */ 1659 int 1660 ilm_numentries_v6(ill_t *ill, const in6_addr_t *v6group) 1661 { 1662 ilm_t *ilm; 1663 int i = 0; 1664 1665 mutex_enter(&ill->ill_lock); 1666 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1667 if (ilm->ilm_flags & ILM_DELETED) 1668 continue; 1669 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group)) { 1670 i++; 1671 } 1672 } 1673 mutex_exit(&ill->ill_lock); 1674 return (i); 1675 } 1676 1677 /* Caller guarantees that the group is not already on the list */ 1678 static ilm_t * 1679 ilm_add_v6(ipif_t *ipif, const in6_addr_t *v6group, ilg_stat_t ilgstat, 1680 mcast_record_t ilg_fmode, slist_t *ilg_flist, zoneid_t zoneid) 1681 { 1682 ill_t *ill = ipif->ipif_ill; 1683 ilm_t *ilm; 1684 ilm_t *ilm_cur; 1685 ilm_t **ilm_ptpn; 1686 1687 ASSERT(IAM_WRITER_IPIF(ipif)); 1688 1689 ilm = GETSTRUCT(ilm_t, 1); 1690 if (ilm == NULL) 1691 return (NULL); 1692 if (ilgstat != ILGSTAT_NONE && !SLIST_IS_EMPTY(ilg_flist)) { 1693 ilm->ilm_filter = l_alloc(); 1694 if (ilm->ilm_filter == NULL) { 1695 mi_free(ilm); 1696 return (NULL); 1697 } 1698 } 1699 ilm->ilm_v6addr = *v6group; 1700 ilm->ilm_refcnt = 1; 1701 ilm->ilm_zoneid = zoneid; 1702 ilm->ilm_timer = INFINITY; 1703 ilm->ilm_rtx.rtx_timer = INFINITY; 1704 1705 /* 1706 * IPv4 Multicast groups are joined using ipif. 1707 * IPv6 Multicast groups are joined using ill. 1708 */ 1709 if (ill->ill_isv6) { 1710 ilm->ilm_ill = ill; 1711 ilm->ilm_ipif = NULL; 1712 DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ill, 1713 (char *), "ilm", (void *), ilm); 1714 ill->ill_ilm_cnt++; 1715 } else { 1716 ASSERT(ilm->ilm_zoneid == ipif->ipif_zoneid); 1717 ilm->ilm_ipif = ipif; 1718 ilm->ilm_ill = NULL; 1719 DTRACE_PROBE3(ipif__incr__cnt, (ipif_t *), ipif, 1720 (char *), "ilm", (void *), ilm); 1721 ipif->ipif_ilm_cnt++; 1722 } 1723 1724 ASSERT(ill->ill_ipst); 1725 ilm->ilm_ipst = ill->ill_ipst; /* No netstack_hold */ 1726 1727 ASSERT(!(ipif->ipif_state_flags & IPIF_CONDEMNED)); 1728 ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED)); 1729 1730 /* 1731 * Grab lock to give consistent view to readers 1732 */ 1733 mutex_enter(&ill->ill_lock); 1734 /* 1735 * All ilms in the same zone are contiguous in the ill_ilm list. 1736 * The loops in ip_proto_input() and ip_wput_local() use this to avoid 1737 * sending duplicates up when two applications in the same zone join the 1738 * same group on different logical interfaces. 1739 */ 1740 ilm_cur = ill->ill_ilm; 1741 ilm_ptpn = &ill->ill_ilm; 1742 while (ilm_cur != NULL && ilm_cur->ilm_zoneid != ilm->ilm_zoneid) { 1743 ilm_ptpn = &ilm_cur->ilm_next; 1744 ilm_cur = ilm_cur->ilm_next; 1745 } 1746 ilm->ilm_next = ilm_cur; 1747 *ilm_ptpn = ilm; 1748 1749 /* 1750 * If we have an associated ilg, use its filter state; if not, 1751 * default to (EXCLUDE, NULL) and set no_ilg_cnt to track this. 1752 */ 1753 if (ilgstat != ILGSTAT_NONE) { 1754 if (!SLIST_IS_EMPTY(ilg_flist)) 1755 l_copy(ilg_flist, ilm->ilm_filter); 1756 ilm->ilm_fmode = ilg_fmode; 1757 } else { 1758 ilm->ilm_no_ilg_cnt = 1; 1759 ilm->ilm_fmode = MODE_IS_EXCLUDE; 1760 } 1761 1762 mutex_exit(&ill->ill_lock); 1763 return (ilm); 1764 } 1765 1766 void 1767 ilm_inactive(ilm_t *ilm) 1768 { 1769 FREE_SLIST(ilm->ilm_filter); 1770 FREE_SLIST(ilm->ilm_pendsrcs); 1771 FREE_SLIST(ilm->ilm_rtx.rtx_allow); 1772 FREE_SLIST(ilm->ilm_rtx.rtx_block); 1773 ilm->ilm_ipst = NULL; 1774 mi_free((char *)ilm); 1775 } 1776 1777 void 1778 ilm_walker_cleanup(ill_t *ill) 1779 { 1780 ilm_t **ilmp; 1781 ilm_t *ilm; 1782 boolean_t need_wakeup = B_FALSE; 1783 1784 ASSERT(MUTEX_HELD(&ill->ill_lock)); 1785 ASSERT(ill->ill_ilm_walker_cnt == 0); 1786 1787 ilmp = &ill->ill_ilm; 1788 while (*ilmp != NULL) { 1789 if ((*ilmp)->ilm_flags & ILM_DELETED) { 1790 ilm = *ilmp; 1791 *ilmp = ilm->ilm_next; 1792 /* 1793 * check if there are any pending FREE or unplumb 1794 * operations that need to be restarted. 1795 */ 1796 if (ilm->ilm_ipif != NULL) { 1797 /* 1798 * IPv4 ilms hold a ref on the ipif. 1799 */ 1800 DTRACE_PROBE3(ipif__decr__cnt, 1801 (ipif_t *), ilm->ilm_ipif, 1802 (char *), "ilm", (void *), ilm); 1803 ilm->ilm_ipif->ipif_ilm_cnt--; 1804 if (IPIF_FREE_OK(ilm->ilm_ipif)) 1805 need_wakeup = B_TRUE; 1806 } else { 1807 /* 1808 * IPv6 ilms hold a ref on the ill. 1809 */ 1810 ASSERT(ilm->ilm_ill == ill); 1811 DTRACE_PROBE3(ill__decr__cnt, 1812 (ill_t *), ill, 1813 (char *), "ilm", (void *), ilm); 1814 ASSERT(ill->ill_ilm_cnt > 0); 1815 ill->ill_ilm_cnt--; 1816 if (ILL_FREE_OK(ill)) 1817 need_wakeup = B_TRUE; 1818 } 1819 ilm_inactive(ilm); /* frees ilm */ 1820 } else { 1821 ilmp = &(*ilmp)->ilm_next; 1822 } 1823 } 1824 ill->ill_ilm_cleanup_reqd = 0; 1825 if (need_wakeup) 1826 ipif_ill_refrele_tail(ill); 1827 else 1828 mutex_exit(&ill->ill_lock); 1829 } 1830 1831 /* 1832 * Unlink ilm and free it. 1833 */ 1834 static void 1835 ilm_delete(ilm_t *ilm) 1836 { 1837 ill_t *ill; 1838 ilm_t **ilmp; 1839 boolean_t need_wakeup; 1840 1841 1842 if (ilm->ilm_ipif != NULL) { 1843 ASSERT(IAM_WRITER_IPIF(ilm->ilm_ipif)); 1844 ASSERT(ilm->ilm_ill == NULL); 1845 ill = ilm->ilm_ipif->ipif_ill; 1846 ASSERT(!ill->ill_isv6); 1847 } else { 1848 ASSERT(IAM_WRITER_ILL(ilm->ilm_ill)); 1849 ASSERT(ilm->ilm_ipif == NULL); 1850 ill = ilm->ilm_ill; 1851 ASSERT(ill->ill_isv6); 1852 } 1853 /* 1854 * Delete under lock protection so that readers don't stumble 1855 * on bad ilm_next 1856 */ 1857 mutex_enter(&ill->ill_lock); 1858 if (ill->ill_ilm_walker_cnt != 0) { 1859 ilm->ilm_flags |= ILM_DELETED; 1860 ill->ill_ilm_cleanup_reqd = 1; 1861 mutex_exit(&ill->ill_lock); 1862 return; 1863 } 1864 1865 for (ilmp = &ill->ill_ilm; *ilmp != ilm; ilmp = &(*ilmp)->ilm_next) 1866 ; 1867 *ilmp = ilm->ilm_next; 1868 1869 /* 1870 * if we are the last reference to the ipif (for IPv4 ilms) 1871 * or the ill (for IPv6 ilms), we may need to wakeup any 1872 * pending FREE or unplumb operations. 1873 */ 1874 need_wakeup = B_FALSE; 1875 if (ilm->ilm_ipif != NULL) { 1876 DTRACE_PROBE3(ipif__decr__cnt, (ipif_t *), ilm->ilm_ipif, 1877 (char *), "ilm", (void *), ilm); 1878 ilm->ilm_ipif->ipif_ilm_cnt--; 1879 if (IPIF_FREE_OK(ilm->ilm_ipif)) 1880 need_wakeup = B_TRUE; 1881 } else { 1882 DTRACE_PROBE3(ill__decr__cnt, (ill_t *), ill, 1883 (char *), "ilm", (void *), ilm); 1884 ASSERT(ill->ill_ilm_cnt > 0); 1885 ill->ill_ilm_cnt--; 1886 if (ILL_FREE_OK(ill)) 1887 need_wakeup = B_TRUE; 1888 } 1889 1890 ilm_inactive(ilm); /* frees this ilm */ 1891 1892 if (need_wakeup) { 1893 /* drops ill lock */ 1894 ipif_ill_refrele_tail(ill); 1895 } else { 1896 mutex_exit(&ill->ill_lock); 1897 } 1898 } 1899 1900 /* Increment the ILM walker count for `ill' */ 1901 static void 1902 ill_ilm_walker_hold(ill_t *ill) 1903 { 1904 mutex_enter(&ill->ill_lock); 1905 ill->ill_ilm_walker_cnt++; 1906 mutex_exit(&ill->ill_lock); 1907 } 1908 1909 /* Decrement the ILM walker count for `ill' */ 1910 static void 1911 ill_ilm_walker_rele(ill_t *ill) 1912 { 1913 mutex_enter(&ill->ill_lock); 1914 ill->ill_ilm_walker_cnt--; 1915 if (ill->ill_ilm_walker_cnt == 0 && ill->ill_ilm_cleanup_reqd) 1916 ilm_walker_cleanup(ill); /* drops ill_lock */ 1917 else 1918 mutex_exit(&ill->ill_lock); 1919 } 1920 1921 /* 1922 * Start walking the ILMs associated with `ill'; the first ILM in the walk 1923 * (if any) is returned. State associated with the walk is stored in `ilw'. 1924 * Note that walks associated with interfaces under IPMP also walk the ILMs 1925 * on the associated IPMP interface; this is handled transparently to callers 1926 * via ilm_walker_step(). (Usually with IPMP all ILMs will be on the IPMP 1927 * interface; the only exception is to support IPv6 test addresses, which 1928 * require ILMs for their associated solicited-node multicast addresses.) 1929 */ 1930 ilm_t * 1931 ilm_walker_start(ilm_walker_t *ilw, ill_t *ill) 1932 { 1933 ilw->ilw_ill = ill; 1934 if (IS_UNDER_IPMP(ill)) 1935 ilw->ilw_ipmp_ill = ipmp_ill_hold_ipmp_ill(ill); 1936 else 1937 ilw->ilw_ipmp_ill = NULL; 1938 1939 ill_ilm_walker_hold(ill); 1940 if (ilw->ilw_ipmp_ill != NULL) 1941 ill_ilm_walker_hold(ilw->ilw_ipmp_ill); 1942 1943 if (ilw->ilw_ipmp_ill != NULL && ilw->ilw_ipmp_ill->ill_ilm != NULL) 1944 ilw->ilw_walk_ill = ilw->ilw_ipmp_ill; 1945 else 1946 ilw->ilw_walk_ill = ilw->ilw_ill; 1947 1948 return (ilm_walker_step(ilw, NULL)); 1949 } 1950 1951 /* 1952 * Helper function for ilm_walker_step() that returns the next ILM 1953 * associated with `ilw', regardless of whether it's deleted. 1954 */ 1955 static ilm_t * 1956 ilm_walker_step_all(ilm_walker_t *ilw, ilm_t *ilm) 1957 { 1958 if (ilm == NULL) 1959 return (ilw->ilw_walk_ill->ill_ilm); 1960 1961 if (ilm->ilm_next != NULL) 1962 return (ilm->ilm_next); 1963 1964 if (ilw->ilw_ipmp_ill != NULL && IS_IPMP(ilw->ilw_walk_ill)) { 1965 ilw->ilw_walk_ill = ilw->ilw_ill; 1966 /* 1967 * It's possible that ilw_ill left the group during our walk, 1968 * so we can't ASSERT() that it's under IPMP. Callers that 1969 * care will be writer on the IPSQ anyway. 1970 */ 1971 return (ilw->ilw_walk_ill->ill_ilm); 1972 } 1973 return (NULL); 1974 } 1975 1976 /* 1977 * Step to the next ILM associated with `ilw'. 1978 */ 1979 ilm_t * 1980 ilm_walker_step(ilm_walker_t *ilw, ilm_t *ilm) 1981 { 1982 while ((ilm = ilm_walker_step_all(ilw, ilm)) != NULL) { 1983 if (!(ilm->ilm_flags & ILM_DELETED)) 1984 break; 1985 } 1986 return (ilm); 1987 } 1988 1989 /* 1990 * Finish the ILM walk associated with `ilw'. 1991 */ 1992 void 1993 ilm_walker_finish(ilm_walker_t *ilw) 1994 { 1995 ill_ilm_walker_rele(ilw->ilw_ill); 1996 if (ilw->ilw_ipmp_ill != NULL) { 1997 ill_ilm_walker_rele(ilw->ilw_ipmp_ill); 1998 ill_refrele(ilw->ilw_ipmp_ill); 1999 } 2000 bzero(&ilw, sizeof (ilw)); 2001 } 2002 2003 /* 2004 * Looks up the appropriate ipif given a v4 multicast group and interface 2005 * address. On success, returns 0, with *ipifpp pointing to the found 2006 * struct. On failure, returns an errno and *ipifpp is NULL. 2007 */ 2008 int 2009 ip_opt_check(conn_t *connp, ipaddr_t group, ipaddr_t src, ipaddr_t ifaddr, 2010 uint_t *ifindexp, mblk_t *first_mp, ipsq_func_t func, ipif_t **ipifpp) 2011 { 2012 ipif_t *ipif; 2013 int err = 0; 2014 zoneid_t zoneid; 2015 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2016 2017 if (!CLASSD(group) || CLASSD(src)) { 2018 return (EINVAL); 2019 } 2020 *ipifpp = NULL; 2021 2022 zoneid = IPCL_ZONEID(connp); 2023 2024 ASSERT(!(ifaddr != INADDR_ANY && ifindexp != NULL && *ifindexp != 0)); 2025 if (ifaddr != INADDR_ANY) { 2026 ipif = ipif_lookup_addr(ifaddr, NULL, zoneid, 2027 CONNP_TO_WQ(connp), first_mp, func, &err, ipst); 2028 if (err != 0 && err != EINPROGRESS) 2029 err = EADDRNOTAVAIL; 2030 } else if (ifindexp != NULL && *ifindexp != 0) { 2031 ipif = ipif_lookup_on_ifindex(*ifindexp, B_FALSE, zoneid, 2032 CONNP_TO_WQ(connp), first_mp, func, &err, ipst); 2033 } else { 2034 ipif = ipif_lookup_group(group, zoneid, ipst); 2035 if (ipif == NULL) 2036 return (EADDRNOTAVAIL); 2037 } 2038 if (ipif == NULL) 2039 return (err); 2040 2041 *ipifpp = ipif; 2042 return (0); 2043 } 2044 2045 /* 2046 * Looks up the appropriate ill (or ipif if v4mapped) given an interface 2047 * index and IPv6 multicast group. On success, returns 0, with *illpp (or 2048 * *ipifpp if v4mapped) pointing to the found struct. On failure, returns 2049 * an errno and *illpp and *ipifpp are undefined. 2050 */ 2051 int 2052 ip_opt_check_v6(conn_t *connp, const in6_addr_t *v6group, ipaddr_t *v4group, 2053 const in6_addr_t *v6src, ipaddr_t *v4src, boolean_t *isv6, int ifindex, 2054 mblk_t *first_mp, ipsq_func_t func, ill_t **illpp, ipif_t **ipifpp) 2055 { 2056 boolean_t src_unspec; 2057 ill_t *ill = NULL; 2058 ipif_t *ipif = NULL; 2059 int err; 2060 zoneid_t zoneid = connp->conn_zoneid; 2061 queue_t *wq = CONNP_TO_WQ(connp); 2062 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2063 2064 src_unspec = IN6_IS_ADDR_UNSPECIFIED(v6src); 2065 2066 if (IN6_IS_ADDR_V4MAPPED(v6group)) { 2067 if (!IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec) 2068 return (EINVAL); 2069 IN6_V4MAPPED_TO_IPADDR(v6group, *v4group); 2070 if (src_unspec) { 2071 *v4src = INADDR_ANY; 2072 } else { 2073 IN6_V4MAPPED_TO_IPADDR(v6src, *v4src); 2074 } 2075 if (!CLASSD(*v4group) || CLASSD(*v4src)) 2076 return (EINVAL); 2077 *ipifpp = NULL; 2078 *isv6 = B_FALSE; 2079 } else { 2080 if (IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec) 2081 return (EINVAL); 2082 if (!IN6_IS_ADDR_MULTICAST(v6group) || 2083 IN6_IS_ADDR_MULTICAST(v6src)) { 2084 return (EINVAL); 2085 } 2086 *illpp = NULL; 2087 *isv6 = B_TRUE; 2088 } 2089 2090 if (ifindex == 0) { 2091 if (*isv6) 2092 ill = ill_lookup_group_v6(v6group, zoneid, ipst); 2093 else 2094 ipif = ipif_lookup_group(*v4group, zoneid, ipst); 2095 if (ill == NULL && ipif == NULL) 2096 return (EADDRNOTAVAIL); 2097 } else { 2098 if (*isv6) { 2099 ill = ill_lookup_on_ifindex(ifindex, B_TRUE, 2100 wq, first_mp, func, &err, ipst); 2101 if (ill != NULL && 2102 !ipif_lookup_zoneid(ill, zoneid, 0, NULL)) { 2103 ill_refrele(ill); 2104 ill = NULL; 2105 err = EADDRNOTAVAIL; 2106 } 2107 } else { 2108 ipif = ipif_lookup_on_ifindex(ifindex, B_FALSE, 2109 zoneid, wq, first_mp, func, &err, ipst); 2110 } 2111 if (ill == NULL && ipif == NULL) 2112 return (err); 2113 } 2114 2115 *ipifpp = ipif; 2116 *illpp = ill; 2117 return (0); 2118 } 2119 2120 static int 2121 ip_get_srcfilter(conn_t *connp, struct group_filter *gf, 2122 struct ip_msfilter *imsf, ipaddr_t grp, ipif_t *ipif, boolean_t isv4mapped) 2123 { 2124 ilg_t *ilg; 2125 int i, numsrc, fmode, outsrcs; 2126 struct sockaddr_in *sin; 2127 struct sockaddr_in6 *sin6; 2128 struct in_addr *addrp; 2129 slist_t *fp; 2130 boolean_t is_v4only_api; 2131 2132 mutex_enter(&connp->conn_lock); 2133 2134 ilg = ilg_lookup_ipif(connp, grp, ipif); 2135 if (ilg == NULL) { 2136 mutex_exit(&connp->conn_lock); 2137 return (EADDRNOTAVAIL); 2138 } 2139 2140 if (gf == NULL) { 2141 ASSERT(imsf != NULL); 2142 ASSERT(!isv4mapped); 2143 is_v4only_api = B_TRUE; 2144 outsrcs = imsf->imsf_numsrc; 2145 } else { 2146 ASSERT(imsf == NULL); 2147 is_v4only_api = B_FALSE; 2148 outsrcs = gf->gf_numsrc; 2149 } 2150 2151 /* 2152 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 2153 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 2154 * So we need to translate here. 2155 */ 2156 fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ? 2157 MCAST_INCLUDE : MCAST_EXCLUDE; 2158 if ((fp = ilg->ilg_filter) == NULL) { 2159 numsrc = 0; 2160 } else { 2161 for (i = 0; i < outsrcs; i++) { 2162 if (i == fp->sl_numsrc) 2163 break; 2164 if (isv4mapped) { 2165 sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i]; 2166 sin6->sin6_family = AF_INET6; 2167 sin6->sin6_addr = fp->sl_addr[i]; 2168 } else { 2169 if (is_v4only_api) { 2170 addrp = &imsf->imsf_slist[i]; 2171 } else { 2172 sin = (struct sockaddr_in *) 2173 &gf->gf_slist[i]; 2174 sin->sin_family = AF_INET; 2175 addrp = &sin->sin_addr; 2176 } 2177 IN6_V4MAPPED_TO_INADDR(&fp->sl_addr[i], addrp); 2178 } 2179 } 2180 numsrc = fp->sl_numsrc; 2181 } 2182 2183 if (is_v4only_api) { 2184 imsf->imsf_numsrc = numsrc; 2185 imsf->imsf_fmode = fmode; 2186 } else { 2187 gf->gf_numsrc = numsrc; 2188 gf->gf_fmode = fmode; 2189 } 2190 2191 mutex_exit(&connp->conn_lock); 2192 2193 return (0); 2194 } 2195 2196 static int 2197 ip_get_srcfilter_v6(conn_t *connp, struct group_filter *gf, 2198 const struct in6_addr *grp, ill_t *ill) 2199 { 2200 ilg_t *ilg; 2201 int i; 2202 struct sockaddr_storage *sl; 2203 struct sockaddr_in6 *sin6; 2204 slist_t *fp; 2205 2206 mutex_enter(&connp->conn_lock); 2207 2208 ilg = ilg_lookup_ill_v6(connp, grp, ill); 2209 if (ilg == NULL) { 2210 mutex_exit(&connp->conn_lock); 2211 return (EADDRNOTAVAIL); 2212 } 2213 2214 /* 2215 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 2216 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 2217 * So we need to translate here. 2218 */ 2219 gf->gf_fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ? 2220 MCAST_INCLUDE : MCAST_EXCLUDE; 2221 if ((fp = ilg->ilg_filter) == NULL) { 2222 gf->gf_numsrc = 0; 2223 } else { 2224 for (i = 0, sl = gf->gf_slist; i < gf->gf_numsrc; i++, sl++) { 2225 if (i == fp->sl_numsrc) 2226 break; 2227 sin6 = (struct sockaddr_in6 *)sl; 2228 sin6->sin6_family = AF_INET6; 2229 sin6->sin6_addr = fp->sl_addr[i]; 2230 } 2231 gf->gf_numsrc = fp->sl_numsrc; 2232 } 2233 2234 mutex_exit(&connp->conn_lock); 2235 2236 return (0); 2237 } 2238 2239 static int 2240 ip_set_srcfilter(conn_t *connp, struct group_filter *gf, 2241 struct ip_msfilter *imsf, ipaddr_t grp, ipif_t *ipif, boolean_t isv4mapped) 2242 { 2243 ilg_t *ilg; 2244 int i, err, infmode, new_fmode; 2245 uint_t insrcs; 2246 struct sockaddr_in *sin; 2247 struct sockaddr_in6 *sin6; 2248 struct in_addr *addrp; 2249 slist_t *orig_filter = NULL; 2250 slist_t *new_filter = NULL; 2251 mcast_record_t orig_fmode; 2252 boolean_t leave_grp, is_v4only_api; 2253 ilg_stat_t ilgstat; 2254 2255 if (gf == NULL) { 2256 ASSERT(imsf != NULL); 2257 ASSERT(!isv4mapped); 2258 is_v4only_api = B_TRUE; 2259 insrcs = imsf->imsf_numsrc; 2260 infmode = imsf->imsf_fmode; 2261 } else { 2262 ASSERT(imsf == NULL); 2263 is_v4only_api = B_FALSE; 2264 insrcs = gf->gf_numsrc; 2265 infmode = gf->gf_fmode; 2266 } 2267 2268 /* Make sure we can handle the source list */ 2269 if (insrcs > MAX_FILTER_SIZE) 2270 return (ENOBUFS); 2271 2272 /* 2273 * setting the filter to (INCLUDE, NULL) is treated 2274 * as a request to leave the group. 2275 */ 2276 leave_grp = (infmode == MCAST_INCLUDE && insrcs == 0); 2277 2278 ASSERT(IAM_WRITER_IPIF(ipif)); 2279 2280 mutex_enter(&connp->conn_lock); 2281 2282 ilg = ilg_lookup_ipif(connp, grp, ipif); 2283 if (ilg == NULL) { 2284 /* 2285 * if the request was actually to leave, and we 2286 * didn't find an ilg, there's nothing to do. 2287 */ 2288 if (!leave_grp) 2289 ilg = conn_ilg_alloc(connp, &err); 2290 if (leave_grp || ilg == NULL) { 2291 mutex_exit(&connp->conn_lock); 2292 return (leave_grp ? 0 : err); 2293 } 2294 ilgstat = ILGSTAT_NEW; 2295 IN6_IPADDR_TO_V4MAPPED(grp, &ilg->ilg_v6group); 2296 ilg->ilg_ipif = ipif; 2297 ilg->ilg_ill = NULL; 2298 } else if (leave_grp) { 2299 ilg_delete(connp, ilg, NULL); 2300 mutex_exit(&connp->conn_lock); 2301 (void) ip_delmulti(grp, ipif, B_FALSE, B_TRUE); 2302 return (0); 2303 } else { 2304 ilgstat = ILGSTAT_CHANGE; 2305 /* Preserve existing state in case ip_addmulti() fails */ 2306 orig_fmode = ilg->ilg_fmode; 2307 if (ilg->ilg_filter == NULL) { 2308 orig_filter = NULL; 2309 } else { 2310 orig_filter = l_alloc_copy(ilg->ilg_filter); 2311 if (orig_filter == NULL) { 2312 mutex_exit(&connp->conn_lock); 2313 return (ENOMEM); 2314 } 2315 } 2316 } 2317 2318 /* 2319 * Alloc buffer to copy new state into (see below) before 2320 * we make any changes, so we can bail if it fails. 2321 */ 2322 if ((new_filter = l_alloc()) == NULL) { 2323 mutex_exit(&connp->conn_lock); 2324 err = ENOMEM; 2325 goto free_and_exit; 2326 } 2327 2328 if (insrcs == 0) { 2329 CLEAR_SLIST(ilg->ilg_filter); 2330 } else { 2331 slist_t *fp; 2332 if (ilg->ilg_filter == NULL) { 2333 fp = l_alloc(); 2334 if (fp == NULL) { 2335 if (ilgstat == ILGSTAT_NEW) 2336 ilg_delete(connp, ilg, NULL); 2337 mutex_exit(&connp->conn_lock); 2338 err = ENOMEM; 2339 goto free_and_exit; 2340 } 2341 } else { 2342 fp = ilg->ilg_filter; 2343 } 2344 for (i = 0; i < insrcs; i++) { 2345 if (isv4mapped) { 2346 sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i]; 2347 fp->sl_addr[i] = sin6->sin6_addr; 2348 } else { 2349 if (is_v4only_api) { 2350 addrp = &imsf->imsf_slist[i]; 2351 } else { 2352 sin = (struct sockaddr_in *) 2353 &gf->gf_slist[i]; 2354 addrp = &sin->sin_addr; 2355 } 2356 IN6_INADDR_TO_V4MAPPED(addrp, &fp->sl_addr[i]); 2357 } 2358 } 2359 fp->sl_numsrc = insrcs; 2360 ilg->ilg_filter = fp; 2361 } 2362 /* 2363 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 2364 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 2365 * So we need to translate here. 2366 */ 2367 ilg->ilg_fmode = (infmode == MCAST_INCLUDE) ? 2368 MODE_IS_INCLUDE : MODE_IS_EXCLUDE; 2369 2370 /* 2371 * Save copy of ilg's filter state to pass to other functions, 2372 * so we can release conn_lock now. 2373 */ 2374 new_fmode = ilg->ilg_fmode; 2375 l_copy(ilg->ilg_filter, new_filter); 2376 2377 mutex_exit(&connp->conn_lock); 2378 2379 err = ip_addmulti(grp, ipif, ilgstat, new_fmode, new_filter); 2380 if (err != 0) { 2381 /* 2382 * Restore the original filter state, or delete the 2383 * newly-created ilg. We need to look up the ilg 2384 * again, though, since we've not been holding the 2385 * conn_lock. 2386 */ 2387 mutex_enter(&connp->conn_lock); 2388 ilg = ilg_lookup_ipif(connp, grp, ipif); 2389 ASSERT(ilg != NULL); 2390 if (ilgstat == ILGSTAT_NEW) { 2391 ilg_delete(connp, ilg, NULL); 2392 } else { 2393 ilg->ilg_fmode = orig_fmode; 2394 if (SLIST_IS_EMPTY(orig_filter)) { 2395 CLEAR_SLIST(ilg->ilg_filter); 2396 } else { 2397 /* 2398 * We didn't free the filter, even if we 2399 * were trying to make the source list empty; 2400 * so if orig_filter isn't empty, the ilg 2401 * must still have a filter alloc'd. 2402 */ 2403 l_copy(orig_filter, ilg->ilg_filter); 2404 } 2405 } 2406 mutex_exit(&connp->conn_lock); 2407 } 2408 2409 free_and_exit: 2410 l_free(orig_filter); 2411 l_free(new_filter); 2412 2413 return (err); 2414 } 2415 2416 static int 2417 ip_set_srcfilter_v6(conn_t *connp, struct group_filter *gf, 2418 const struct in6_addr *grp, ill_t *ill) 2419 { 2420 ilg_t *ilg; 2421 int i, orig_fmode, new_fmode, err; 2422 slist_t *orig_filter = NULL; 2423 slist_t *new_filter = NULL; 2424 struct sockaddr_storage *sl; 2425 struct sockaddr_in6 *sin6; 2426 boolean_t leave_grp; 2427 ilg_stat_t ilgstat; 2428 2429 /* Make sure we can handle the source list */ 2430 if (gf->gf_numsrc > MAX_FILTER_SIZE) 2431 return (ENOBUFS); 2432 2433 /* 2434 * setting the filter to (INCLUDE, NULL) is treated 2435 * as a request to leave the group. 2436 */ 2437 leave_grp = (gf->gf_fmode == MCAST_INCLUDE && gf->gf_numsrc == 0); 2438 2439 ASSERT(IAM_WRITER_ILL(ill)); 2440 2441 mutex_enter(&connp->conn_lock); 2442 ilg = ilg_lookup_ill_v6(connp, grp, ill); 2443 if (ilg == NULL) { 2444 /* 2445 * if the request was actually to leave, and we 2446 * didn't find an ilg, there's nothing to do. 2447 */ 2448 if (!leave_grp) 2449 ilg = conn_ilg_alloc(connp, &err); 2450 if (leave_grp || ilg == NULL) { 2451 mutex_exit(&connp->conn_lock); 2452 return (leave_grp ? 0 : err); 2453 } 2454 ilgstat = ILGSTAT_NEW; 2455 ilg->ilg_v6group = *grp; 2456 ilg->ilg_ipif = NULL; 2457 ilg->ilg_ill = ill; 2458 } else if (leave_grp) { 2459 ilg_delete(connp, ilg, NULL); 2460 mutex_exit(&connp->conn_lock); 2461 (void) ip_delmulti_v6(grp, ill, connp->conn_zoneid, B_FALSE, 2462 B_TRUE); 2463 return (0); 2464 } else { 2465 ilgstat = ILGSTAT_CHANGE; 2466 /* preserve existing state in case ip_addmulti() fails */ 2467 orig_fmode = ilg->ilg_fmode; 2468 if (ilg->ilg_filter == NULL) { 2469 orig_filter = NULL; 2470 } else { 2471 orig_filter = l_alloc_copy(ilg->ilg_filter); 2472 if (orig_filter == NULL) { 2473 mutex_exit(&connp->conn_lock); 2474 return (ENOMEM); 2475 } 2476 } 2477 } 2478 2479 /* 2480 * Alloc buffer to copy new state into (see below) before 2481 * we make any changes, so we can bail if it fails. 2482 */ 2483 if ((new_filter = l_alloc()) == NULL) { 2484 mutex_exit(&connp->conn_lock); 2485 err = ENOMEM; 2486 goto free_and_exit; 2487 } 2488 2489 if (gf->gf_numsrc == 0) { 2490 CLEAR_SLIST(ilg->ilg_filter); 2491 } else { 2492 slist_t *fp; 2493 if (ilg->ilg_filter == NULL) { 2494 fp = l_alloc(); 2495 if (fp == NULL) { 2496 if (ilgstat == ILGSTAT_NEW) 2497 ilg_delete(connp, ilg, NULL); 2498 mutex_exit(&connp->conn_lock); 2499 err = ENOMEM; 2500 goto free_and_exit; 2501 } 2502 } else { 2503 fp = ilg->ilg_filter; 2504 } 2505 for (i = 0, sl = gf->gf_slist; i < gf->gf_numsrc; i++, sl++) { 2506 sin6 = (struct sockaddr_in6 *)sl; 2507 fp->sl_addr[i] = sin6->sin6_addr; 2508 } 2509 fp->sl_numsrc = gf->gf_numsrc; 2510 ilg->ilg_filter = fp; 2511 } 2512 /* 2513 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 2514 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 2515 * So we need to translate here. 2516 */ 2517 ilg->ilg_fmode = (gf->gf_fmode == MCAST_INCLUDE) ? 2518 MODE_IS_INCLUDE : MODE_IS_EXCLUDE; 2519 2520 /* 2521 * Save copy of ilg's filter state to pass to other functions, 2522 * so we can release conn_lock now. 2523 */ 2524 new_fmode = ilg->ilg_fmode; 2525 l_copy(ilg->ilg_filter, new_filter); 2526 2527 mutex_exit(&connp->conn_lock); 2528 2529 err = ip_addmulti_v6(grp, ill, connp->conn_zoneid, ilgstat, new_fmode, 2530 new_filter); 2531 if (err != 0) { 2532 /* 2533 * Restore the original filter state, or delete the 2534 * newly-created ilg. We need to look up the ilg 2535 * again, though, since we've not been holding the 2536 * conn_lock. 2537 */ 2538 mutex_enter(&connp->conn_lock); 2539 ilg = ilg_lookup_ill_v6(connp, grp, ill); 2540 ASSERT(ilg != NULL); 2541 if (ilgstat == ILGSTAT_NEW) { 2542 ilg_delete(connp, ilg, NULL); 2543 } else { 2544 ilg->ilg_fmode = orig_fmode; 2545 if (SLIST_IS_EMPTY(orig_filter)) { 2546 CLEAR_SLIST(ilg->ilg_filter); 2547 } else { 2548 /* 2549 * We didn't free the filter, even if we 2550 * were trying to make the source list empty; 2551 * so if orig_filter isn't empty, the ilg 2552 * must still have a filter alloc'd. 2553 */ 2554 l_copy(orig_filter, ilg->ilg_filter); 2555 } 2556 } 2557 mutex_exit(&connp->conn_lock); 2558 } 2559 2560 free_and_exit: 2561 l_free(orig_filter); 2562 l_free(new_filter); 2563 2564 return (err); 2565 } 2566 2567 /* 2568 * Process the SIOC[GS]MSFILTER and SIOC[GS]IPMSFILTER ioctls. 2569 */ 2570 /* ARGSUSED */ 2571 int 2572 ip_sioctl_msfilter(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, 2573 ip_ioctl_cmd_t *ipip, void *ifreq) 2574 { 2575 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 2576 /* existence verified in ip_wput_nondata() */ 2577 mblk_t *data_mp = mp->b_cont->b_cont; 2578 int datalen, err, cmd, minsize; 2579 uint_t expsize = 0; 2580 conn_t *connp; 2581 boolean_t isv6, is_v4only_api, getcmd; 2582 struct sockaddr_in *gsin; 2583 struct sockaddr_in6 *gsin6; 2584 ipaddr_t v4grp; 2585 in6_addr_t v6grp; 2586 struct group_filter *gf = NULL; 2587 struct ip_msfilter *imsf = NULL; 2588 mblk_t *ndp; 2589 2590 if (data_mp->b_cont != NULL) { 2591 if ((ndp = msgpullup(data_mp, -1)) == NULL) 2592 return (ENOMEM); 2593 freemsg(data_mp); 2594 data_mp = ndp; 2595 mp->b_cont->b_cont = data_mp; 2596 } 2597 2598 cmd = iocp->ioc_cmd; 2599 getcmd = (cmd == SIOCGIPMSFILTER || cmd == SIOCGMSFILTER); 2600 is_v4only_api = (cmd == SIOCGIPMSFILTER || cmd == SIOCSIPMSFILTER); 2601 minsize = (is_v4only_api) ? IP_MSFILTER_SIZE(0) : GROUP_FILTER_SIZE(0); 2602 datalen = MBLKL(data_mp); 2603 2604 if (datalen < minsize) 2605 return (EINVAL); 2606 2607 /* 2608 * now we know we have at least have the initial structure, 2609 * but need to check for the source list array. 2610 */ 2611 if (is_v4only_api) { 2612 imsf = (struct ip_msfilter *)data_mp->b_rptr; 2613 isv6 = B_FALSE; 2614 expsize = IP_MSFILTER_SIZE(imsf->imsf_numsrc); 2615 } else { 2616 gf = (struct group_filter *)data_mp->b_rptr; 2617 if (gf->gf_group.ss_family == AF_INET6) { 2618 gsin6 = (struct sockaddr_in6 *)&gf->gf_group; 2619 isv6 = !(IN6_IS_ADDR_V4MAPPED(&gsin6->sin6_addr)); 2620 } else { 2621 isv6 = B_FALSE; 2622 } 2623 expsize = GROUP_FILTER_SIZE(gf->gf_numsrc); 2624 } 2625 if (datalen < expsize) 2626 return (EINVAL); 2627 2628 connp = Q_TO_CONN(q); 2629 2630 /* operation not supported on the virtual network interface */ 2631 if (IS_VNI(ipif->ipif_ill)) 2632 return (EINVAL); 2633 2634 if (isv6) { 2635 ill_t *ill = ipif->ipif_ill; 2636 ill_refhold(ill); 2637 2638 gsin6 = (struct sockaddr_in6 *)&gf->gf_group; 2639 v6grp = gsin6->sin6_addr; 2640 if (getcmd) 2641 err = ip_get_srcfilter_v6(connp, gf, &v6grp, ill); 2642 else 2643 err = ip_set_srcfilter_v6(connp, gf, &v6grp, ill); 2644 2645 ill_refrele(ill); 2646 } else { 2647 boolean_t isv4mapped = B_FALSE; 2648 if (is_v4only_api) { 2649 v4grp = (ipaddr_t)imsf->imsf_multiaddr.s_addr; 2650 } else { 2651 if (gf->gf_group.ss_family == AF_INET) { 2652 gsin = (struct sockaddr_in *)&gf->gf_group; 2653 v4grp = (ipaddr_t)gsin->sin_addr.s_addr; 2654 } else { 2655 gsin6 = (struct sockaddr_in6 *)&gf->gf_group; 2656 IN6_V4MAPPED_TO_IPADDR(&gsin6->sin6_addr, 2657 v4grp); 2658 isv4mapped = B_TRUE; 2659 } 2660 } 2661 if (getcmd) 2662 err = ip_get_srcfilter(connp, gf, imsf, v4grp, ipif, 2663 isv4mapped); 2664 else 2665 err = ip_set_srcfilter(connp, gf, imsf, v4grp, ipif, 2666 isv4mapped); 2667 } 2668 2669 return (err); 2670 } 2671 2672 /* 2673 * Finds the ipif based on information in the ioctl headers. Needed to make 2674 * ip_process_ioctl() happy (it needs to know the ipif for IPI_WR-flagged 2675 * ioctls prior to calling the ioctl's handler function). 2676 */ 2677 int 2678 ip_extract_msfilter(queue_t *q, mblk_t *mp, const ip_ioctl_cmd_t *ipip, 2679 cmd_info_t *ci, ipsq_func_t func) 2680 { 2681 int cmd = ipip->ipi_cmd; 2682 int err = 0; 2683 conn_t *connp; 2684 ipif_t *ipif; 2685 /* caller has verified this mblk exists */ 2686 char *dbuf = (char *)mp->b_cont->b_cont->b_rptr; 2687 struct ip_msfilter *imsf; 2688 struct group_filter *gf; 2689 ipaddr_t v4addr, v4grp; 2690 in6_addr_t v6grp; 2691 uint32_t index; 2692 zoneid_t zoneid; 2693 ip_stack_t *ipst; 2694 2695 connp = Q_TO_CONN(q); 2696 zoneid = connp->conn_zoneid; 2697 ipst = connp->conn_netstack->netstack_ip; 2698 2699 /* don't allow multicast operations on a tcp conn */ 2700 if (IPCL_IS_TCP(connp)) 2701 return (ENOPROTOOPT); 2702 2703 if (cmd == SIOCSIPMSFILTER || cmd == SIOCGIPMSFILTER) { 2704 /* don't allow v4-specific ioctls on v6 socket */ 2705 if (connp->conn_af_isv6) 2706 return (EAFNOSUPPORT); 2707 2708 imsf = (struct ip_msfilter *)dbuf; 2709 v4addr = imsf->imsf_interface.s_addr; 2710 v4grp = imsf->imsf_multiaddr.s_addr; 2711 if (v4addr == INADDR_ANY) { 2712 ipif = ipif_lookup_group(v4grp, zoneid, ipst); 2713 if (ipif == NULL) 2714 err = EADDRNOTAVAIL; 2715 } else { 2716 ipif = ipif_lookup_addr(v4addr, NULL, zoneid, q, mp, 2717 func, &err, ipst); 2718 } 2719 } else { 2720 boolean_t isv6 = B_FALSE; 2721 gf = (struct group_filter *)dbuf; 2722 index = gf->gf_interface; 2723 if (gf->gf_group.ss_family == AF_INET6) { 2724 struct sockaddr_in6 *sin6; 2725 sin6 = (struct sockaddr_in6 *)&gf->gf_group; 2726 v6grp = sin6->sin6_addr; 2727 if (IN6_IS_ADDR_V4MAPPED(&v6grp)) 2728 IN6_V4MAPPED_TO_IPADDR(&v6grp, v4grp); 2729 else 2730 isv6 = B_TRUE; 2731 } else if (gf->gf_group.ss_family == AF_INET) { 2732 struct sockaddr_in *sin; 2733 sin = (struct sockaddr_in *)&gf->gf_group; 2734 v4grp = sin->sin_addr.s_addr; 2735 } else { 2736 return (EAFNOSUPPORT); 2737 } 2738 if (index == 0) { 2739 if (isv6) { 2740 ipif = ipif_lookup_group_v6(&v6grp, zoneid, 2741 ipst); 2742 } else { 2743 ipif = ipif_lookup_group(v4grp, zoneid, ipst); 2744 } 2745 if (ipif == NULL) 2746 err = EADDRNOTAVAIL; 2747 } else { 2748 ipif = ipif_lookup_on_ifindex(index, isv6, zoneid, 2749 q, mp, func, &err, ipst); 2750 } 2751 } 2752 2753 ci->ci_ipif = ipif; 2754 return (err); 2755 } 2756 2757 /* 2758 * The structures used for the SIOC*MSFILTER ioctls usually must be copied 2759 * in in two stages, as the first copyin tells us the size of the attached 2760 * source buffer. This function is called by ip_wput_nondata() after the 2761 * first copyin has completed; it figures out how big the second stage 2762 * needs to be, and kicks it off. 2763 * 2764 * In some cases (numsrc < 2), the second copyin is not needed as the 2765 * first one gets a complete structure containing 1 source addr. 2766 * 2767 * The function returns 0 if a second copyin has been started (i.e. there's 2768 * no more work to be done right now), or 1 if the second copyin is not 2769 * needed and ip_wput_nondata() can continue its processing. 2770 */ 2771 int 2772 ip_copyin_msfilter(queue_t *q, mblk_t *mp) 2773 { 2774 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 2775 int cmd = iocp->ioc_cmd; 2776 /* validity of this checked in ip_wput_nondata() */ 2777 mblk_t *mp1 = mp->b_cont->b_cont; 2778 int copysize = 0; 2779 int offset; 2780 2781 if (cmd == SIOCSMSFILTER || cmd == SIOCGMSFILTER) { 2782 struct group_filter *gf = (struct group_filter *)mp1->b_rptr; 2783 if (gf->gf_numsrc >= 2) { 2784 offset = sizeof (struct group_filter); 2785 copysize = GROUP_FILTER_SIZE(gf->gf_numsrc) - offset; 2786 } 2787 } else { 2788 struct ip_msfilter *imsf = (struct ip_msfilter *)mp1->b_rptr; 2789 if (imsf->imsf_numsrc >= 2) { 2790 offset = sizeof (struct ip_msfilter); 2791 copysize = IP_MSFILTER_SIZE(imsf->imsf_numsrc) - offset; 2792 } 2793 } 2794 if (copysize > 0) { 2795 mi_copyin_n(q, mp, offset, copysize); 2796 return (0); 2797 } 2798 return (1); 2799 } 2800 2801 /* 2802 * Handle the following optmgmt: 2803 * IP_ADD_MEMBERSHIP must not have joined already 2804 * MCAST_JOIN_GROUP must not have joined already 2805 * IP_BLOCK_SOURCE must have joined already 2806 * MCAST_BLOCK_SOURCE must have joined already 2807 * IP_JOIN_SOURCE_GROUP may have joined already 2808 * MCAST_JOIN_SOURCE_GROUP may have joined already 2809 * 2810 * fmode and src parameters may be used to determine which option is 2811 * being set, as follows (the IP_* and MCAST_* versions of each option 2812 * are functionally equivalent): 2813 * opt fmode src 2814 * IP_ADD_MEMBERSHIP MODE_IS_EXCLUDE INADDR_ANY 2815 * MCAST_JOIN_GROUP MODE_IS_EXCLUDE INADDR_ANY 2816 * IP_BLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 2817 * MCAST_BLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 2818 * IP_JOIN_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 2819 * MCAST_JOIN_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 2820 * 2821 * Changing the filter mode is not allowed; if a matching ilg already 2822 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 2823 * 2824 * Verifies that there is a source address of appropriate scope for 2825 * the group; if not, EADDRNOTAVAIL is returned. 2826 * 2827 * The interface to be used may be identified by an address or by an 2828 * index. A pointer to the index is passed; if it is NULL, use the 2829 * address, otherwise, use the index. 2830 */ 2831 int 2832 ip_opt_add_group(conn_t *connp, boolean_t checkonly, ipaddr_t group, 2833 ipaddr_t ifaddr, uint_t *ifindexp, mcast_record_t fmode, ipaddr_t src, 2834 mblk_t *first_mp) 2835 { 2836 ipif_t *ipif; 2837 ipsq_t *ipsq; 2838 int err = 0; 2839 ill_t *ill; 2840 2841 err = ip_opt_check(connp, group, src, ifaddr, ifindexp, first_mp, 2842 ip_restart_optmgmt, &ipif); 2843 if (err != 0) { 2844 if (err != EINPROGRESS) { 2845 ip1dbg(("ip_opt_add_group: no ipif for group 0x%x, " 2846 "ifaddr 0x%x, ifindex %d\n", ntohl(group), 2847 ntohl(ifaddr), (ifindexp == NULL) ? 0 : *ifindexp)); 2848 } 2849 return (err); 2850 } 2851 ASSERT(ipif != NULL); 2852 2853 ill = ipif->ipif_ill; 2854 /* Operation not supported on a virtual network interface */ 2855 if (IS_VNI(ill)) { 2856 ipif_refrele(ipif); 2857 return (EINVAL); 2858 } 2859 2860 if (checkonly) { 2861 /* 2862 * do not do operation, just pretend to - new T_CHECK 2863 * semantics. The error return case above if encountered 2864 * considered a good enough "check" here. 2865 */ 2866 ipif_refrele(ipif); 2867 return (0); 2868 } 2869 2870 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, ipsq, 2871 NEW_OP); 2872 2873 /* unspecified source addr => no source filtering */ 2874 err = ilg_add(connp, group, ipif, fmode, src); 2875 2876 IPSQ_EXIT(ipsq); 2877 2878 ipif_refrele(ipif); 2879 return (err); 2880 } 2881 2882 /* 2883 * Handle the following optmgmt: 2884 * IPV6_JOIN_GROUP must not have joined already 2885 * MCAST_JOIN_GROUP must not have joined already 2886 * MCAST_BLOCK_SOURCE must have joined already 2887 * MCAST_JOIN_SOURCE_GROUP may have joined already 2888 * 2889 * fmode and src parameters may be used to determine which option is 2890 * being set, as follows (IPV6_JOIN_GROUP and MCAST_JOIN_GROUP options 2891 * are functionally equivalent): 2892 * opt fmode v6src 2893 * IPV6_JOIN_GROUP MODE_IS_EXCLUDE unspecified 2894 * MCAST_JOIN_GROUP MODE_IS_EXCLUDE unspecified 2895 * MCAST_BLOCK_SOURCE MODE_IS_EXCLUDE v6 addr 2896 * MCAST_JOIN_SOURCE_GROUP MODE_IS_INCLUDE v6 addr 2897 * 2898 * Changing the filter mode is not allowed; if a matching ilg already 2899 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 2900 * 2901 * Verifies that there is a source address of appropriate scope for 2902 * the group; if not, EADDRNOTAVAIL is returned. 2903 * 2904 * Handles IPv4-mapped IPv6 multicast addresses by associating them 2905 * with the link-local ipif. Assumes that if v6group is v4-mapped, 2906 * v6src is also v4-mapped. 2907 */ 2908 int 2909 ip_opt_add_group_v6(conn_t *connp, boolean_t checkonly, 2910 const in6_addr_t *v6group, int ifindex, mcast_record_t fmode, 2911 const in6_addr_t *v6src, mblk_t *first_mp) 2912 { 2913 ill_t *ill; 2914 ipif_t *ipif; 2915 char buf[INET6_ADDRSTRLEN]; 2916 ipaddr_t v4group, v4src; 2917 boolean_t isv6; 2918 ipsq_t *ipsq; 2919 int err; 2920 2921 err = ip_opt_check_v6(connp, v6group, &v4group, v6src, &v4src, &isv6, 2922 ifindex, first_mp, ip_restart_optmgmt, &ill, &ipif); 2923 if (err != 0) { 2924 if (err != EINPROGRESS) { 2925 ip1dbg(("ip_opt_add_group_v6: no ill for group %s/" 2926 "index %d\n", inet_ntop(AF_INET6, v6group, buf, 2927 sizeof (buf)), ifindex)); 2928 } 2929 return (err); 2930 } 2931 ASSERT((!isv6 && ipif != NULL) || (isv6 && ill != NULL)); 2932 2933 /* operation is not supported on the virtual network interface */ 2934 if (isv6) { 2935 if (IS_VNI(ill)) { 2936 ill_refrele(ill); 2937 return (EINVAL); 2938 } 2939 } else { 2940 if (IS_VNI(ipif->ipif_ill)) { 2941 ipif_refrele(ipif); 2942 return (EINVAL); 2943 } 2944 } 2945 2946 if (checkonly) { 2947 /* 2948 * do not do operation, just pretend to - new T_CHECK 2949 * semantics. The error return case above if encountered 2950 * considered a good enough "check" here. 2951 */ 2952 if (isv6) 2953 ill_refrele(ill); 2954 else 2955 ipif_refrele(ipif); 2956 return (0); 2957 } 2958 2959 if (!isv6) { 2960 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, 2961 ipsq, NEW_OP); 2962 err = ilg_add(connp, v4group, ipif, fmode, v4src); 2963 IPSQ_EXIT(ipsq); 2964 ipif_refrele(ipif); 2965 } else { 2966 IPSQ_ENTER_ILL(ill, connp, first_mp, ip_restart_optmgmt, 2967 ipsq, NEW_OP); 2968 err = ilg_add_v6(connp, v6group, ill, fmode, v6src); 2969 IPSQ_EXIT(ipsq); 2970 ill_refrele(ill); 2971 } 2972 2973 return (err); 2974 } 2975 2976 static int 2977 ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group, ipif_t *ipif, 2978 mcast_record_t fmode, ipaddr_t src) 2979 { 2980 ilg_t *ilg; 2981 in6_addr_t v6src; 2982 boolean_t leaving = B_FALSE; 2983 2984 ASSERT(IAM_WRITER_IPIF(ipif)); 2985 2986 /* 2987 * The ilg is valid only while we hold the conn lock. Once we drop 2988 * the lock, another thread can locate another ilg on this connp, 2989 * but on a different ipif, and delete it, and cause the ilg array 2990 * to be reallocated and copied. Hence do the ilg_delete before 2991 * dropping the lock. 2992 */ 2993 mutex_enter(&connp->conn_lock); 2994 ilg = ilg_lookup_ipif(connp, group, ipif); 2995 if ((ilg == NULL) || (ilg->ilg_flags & ILG_DELETED)) { 2996 mutex_exit(&connp->conn_lock); 2997 return (EADDRNOTAVAIL); 2998 } 2999 3000 /* 3001 * Decide if we're actually deleting the ilg or just removing a 3002 * source filter address; if just removing an addr, make sure we 3003 * aren't trying to change the filter mode, and that the addr is 3004 * actually in our filter list already. If we're removing the 3005 * last src in an include list, just delete the ilg. 3006 */ 3007 if (src == INADDR_ANY) { 3008 v6src = ipv6_all_zeros; 3009 leaving = B_TRUE; 3010 } else { 3011 int err = 0; 3012 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 3013 if (fmode != ilg->ilg_fmode) 3014 err = EINVAL; 3015 else if (ilg->ilg_filter == NULL || 3016 !list_has_addr(ilg->ilg_filter, &v6src)) 3017 err = EADDRNOTAVAIL; 3018 if (err != 0) { 3019 mutex_exit(&connp->conn_lock); 3020 return (err); 3021 } 3022 if (fmode == MODE_IS_INCLUDE && 3023 ilg->ilg_filter->sl_numsrc == 1) { 3024 v6src = ipv6_all_zeros; 3025 leaving = B_TRUE; 3026 } 3027 } 3028 3029 ilg_delete(connp, ilg, &v6src); 3030 mutex_exit(&connp->conn_lock); 3031 3032 (void) ip_delmulti(group, ipif, B_FALSE, leaving); 3033 return (0); 3034 } 3035 3036 static int 3037 ip_opt_delete_group_excl_v6(conn_t *connp, const in6_addr_t *v6group, 3038 ill_t *ill, mcast_record_t fmode, const in6_addr_t *v6src) 3039 { 3040 ilg_t *ilg; 3041 boolean_t leaving = B_TRUE; 3042 3043 ASSERT(IAM_WRITER_ILL(ill)); 3044 3045 mutex_enter(&connp->conn_lock); 3046 ilg = ilg_lookup_ill_v6(connp, v6group, ill); 3047 if ((ilg == NULL) || (ilg->ilg_flags & ILG_DELETED)) { 3048 mutex_exit(&connp->conn_lock); 3049 return (EADDRNOTAVAIL); 3050 } 3051 3052 /* 3053 * Decide if we're actually deleting the ilg or just removing a 3054 * source filter address; if just removing an addr, make sure we 3055 * aren't trying to change the filter mode, and that the addr is 3056 * actually in our filter list already. If we're removing the 3057 * last src in an include list, just delete the ilg. 3058 */ 3059 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 3060 int err = 0; 3061 if (fmode != ilg->ilg_fmode) 3062 err = EINVAL; 3063 else if (ilg->ilg_filter == NULL || 3064 !list_has_addr(ilg->ilg_filter, v6src)) 3065 err = EADDRNOTAVAIL; 3066 if (err != 0) { 3067 mutex_exit(&connp->conn_lock); 3068 return (err); 3069 } 3070 if (fmode == MODE_IS_INCLUDE && 3071 ilg->ilg_filter->sl_numsrc == 1) 3072 v6src = NULL; 3073 else 3074 leaving = B_FALSE; 3075 } 3076 3077 ilg_delete(connp, ilg, v6src); 3078 mutex_exit(&connp->conn_lock); 3079 (void) ip_delmulti_v6(v6group, ill, connp->conn_zoneid, B_FALSE, 3080 leaving); 3081 3082 return (0); 3083 } 3084 3085 /* 3086 * Handle the following optmgmt: 3087 * IP_DROP_MEMBERSHIP will leave 3088 * MCAST_LEAVE_GROUP will leave 3089 * IP_UNBLOCK_SOURCE will not leave 3090 * MCAST_UNBLOCK_SOURCE will not leave 3091 * IP_LEAVE_SOURCE_GROUP may leave (if leaving last source) 3092 * MCAST_LEAVE_SOURCE_GROUP may leave (if leaving last source) 3093 * 3094 * fmode and src parameters may be used to determine which option is 3095 * being set, as follows (the IP_* and MCAST_* versions of each option 3096 * are functionally equivalent): 3097 * opt fmode src 3098 * IP_DROP_MEMBERSHIP MODE_IS_INCLUDE INADDR_ANY 3099 * MCAST_LEAVE_GROUP MODE_IS_INCLUDE INADDR_ANY 3100 * IP_UNBLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 3101 * MCAST_UNBLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 3102 * IP_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 3103 * MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 3104 * 3105 * Changing the filter mode is not allowed; if a matching ilg already 3106 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 3107 * 3108 * The interface to be used may be identified by an address or by an 3109 * index. A pointer to the index is passed; if it is NULL, use the 3110 * address, otherwise, use the index. 3111 */ 3112 int 3113 ip_opt_delete_group(conn_t *connp, boolean_t checkonly, ipaddr_t group, 3114 ipaddr_t ifaddr, uint_t *ifindexp, mcast_record_t fmode, ipaddr_t src, 3115 mblk_t *first_mp) 3116 { 3117 ipif_t *ipif; 3118 ipsq_t *ipsq; 3119 int err; 3120 ill_t *ill; 3121 3122 err = ip_opt_check(connp, group, src, ifaddr, ifindexp, first_mp, 3123 ip_restart_optmgmt, &ipif); 3124 if (err != 0) { 3125 if (err != EINPROGRESS) { 3126 ip1dbg(("ip_opt_delete_group: no ipif for group " 3127 "0x%x, ifaddr 0x%x\n", 3128 (int)ntohl(group), (int)ntohl(ifaddr))); 3129 } 3130 return (err); 3131 } 3132 ASSERT(ipif != NULL); 3133 3134 ill = ipif->ipif_ill; 3135 /* Operation not supported on a virtual network interface */ 3136 if (IS_VNI(ill)) { 3137 ipif_refrele(ipif); 3138 return (EINVAL); 3139 } 3140 3141 if (checkonly) { 3142 /* 3143 * do not do operation, just pretend to - new T_CHECK 3144 * semantics. The error return case above if encountered 3145 * considered a good enough "check" here. 3146 */ 3147 ipif_refrele(ipif); 3148 return (0); 3149 } 3150 3151 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, ipsq, 3152 NEW_OP); 3153 err = ip_opt_delete_group_excl(connp, group, ipif, fmode, src); 3154 IPSQ_EXIT(ipsq); 3155 3156 ipif_refrele(ipif); 3157 return (err); 3158 } 3159 3160 /* 3161 * Handle the following optmgmt: 3162 * IPV6_LEAVE_GROUP will leave 3163 * MCAST_LEAVE_GROUP will leave 3164 * MCAST_UNBLOCK_SOURCE will not leave 3165 * MCAST_LEAVE_SOURCE_GROUP may leave (if leaving last source) 3166 * 3167 * fmode and src parameters may be used to determine which option is 3168 * being set, as follows (IPV6_LEAVE_GROUP and MCAST_LEAVE_GROUP options 3169 * are functionally equivalent): 3170 * opt fmode v6src 3171 * IPV6_LEAVE_GROUP MODE_IS_INCLUDE unspecified 3172 * MCAST_LEAVE_GROUP MODE_IS_INCLUDE unspecified 3173 * MCAST_UNBLOCK_SOURCE MODE_IS_EXCLUDE v6 addr 3174 * MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE v6 addr 3175 * 3176 * Changing the filter mode is not allowed; if a matching ilg already 3177 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 3178 * 3179 * Handles IPv4-mapped IPv6 multicast addresses by associating them 3180 * with the link-local ipif. Assumes that if v6group is v4-mapped, 3181 * v6src is also v4-mapped. 3182 */ 3183 int 3184 ip_opt_delete_group_v6(conn_t *connp, boolean_t checkonly, 3185 const in6_addr_t *v6group, int ifindex, mcast_record_t fmode, 3186 const in6_addr_t *v6src, mblk_t *first_mp) 3187 { 3188 ill_t *ill; 3189 ipif_t *ipif; 3190 char buf[INET6_ADDRSTRLEN]; 3191 ipaddr_t v4group, v4src; 3192 boolean_t isv6; 3193 ipsq_t *ipsq; 3194 int err; 3195 3196 err = ip_opt_check_v6(connp, v6group, &v4group, v6src, &v4src, &isv6, 3197 ifindex, first_mp, ip_restart_optmgmt, &ill, &ipif); 3198 if (err != 0) { 3199 if (err != EINPROGRESS) { 3200 ip1dbg(("ip_opt_delete_group_v6: no ill for group %s/" 3201 "index %d\n", inet_ntop(AF_INET6, v6group, buf, 3202 sizeof (buf)), ifindex)); 3203 } 3204 return (err); 3205 } 3206 ASSERT((isv6 && ill != NULL) || (!isv6 && ipif != NULL)); 3207 3208 /* operation is not supported on the virtual network interface */ 3209 if (isv6) { 3210 if (IS_VNI(ill)) { 3211 ill_refrele(ill); 3212 return (EINVAL); 3213 } 3214 } else { 3215 if (IS_VNI(ipif->ipif_ill)) { 3216 ipif_refrele(ipif); 3217 return (EINVAL); 3218 } 3219 } 3220 3221 if (checkonly) { 3222 /* 3223 * do not do operation, just pretend to - new T_CHECK 3224 * semantics. The error return case above if encountered 3225 * considered a good enough "check" here. 3226 */ 3227 if (isv6) 3228 ill_refrele(ill); 3229 else 3230 ipif_refrele(ipif); 3231 return (0); 3232 } 3233 3234 if (!isv6) { 3235 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, 3236 ipsq, NEW_OP); 3237 err = ip_opt_delete_group_excl(connp, v4group, ipif, fmode, 3238 v4src); 3239 IPSQ_EXIT(ipsq); 3240 ipif_refrele(ipif); 3241 } else { 3242 IPSQ_ENTER_ILL(ill, connp, first_mp, ip_restart_optmgmt, 3243 ipsq, NEW_OP); 3244 err = ip_opt_delete_group_excl_v6(connp, v6group, ill, fmode, 3245 v6src); 3246 IPSQ_EXIT(ipsq); 3247 ill_refrele(ill); 3248 } 3249 3250 return (err); 3251 } 3252 3253 /* 3254 * Group mgmt for upper conn that passes things down 3255 * to the interface multicast list (and DLPI) 3256 * These routines can handle new style options that specify an interface name 3257 * as opposed to an interface address (needed for general handling of 3258 * unnumbered interfaces.) 3259 */ 3260 3261 /* 3262 * Add a group to an upper conn group data structure and pass things down 3263 * to the interface multicast list (and DLPI) 3264 */ 3265 static int 3266 ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif, mcast_record_t fmode, 3267 ipaddr_t src) 3268 { 3269 int error = 0; 3270 ill_t *ill; 3271 ilg_t *ilg; 3272 ilg_stat_t ilgstat; 3273 slist_t *new_filter = NULL; 3274 int new_fmode; 3275 3276 ASSERT(IAM_WRITER_IPIF(ipif)); 3277 3278 ill = ipif->ipif_ill; 3279 3280 if (!(ill->ill_flags & ILLF_MULTICAST)) 3281 return (EADDRNOTAVAIL); 3282 3283 /* 3284 * conn_ilg[] is protected by conn_lock. Need to hold the conn_lock 3285 * to walk the conn_ilg[] list in ilg_lookup_ipif(); also needed to 3286 * serialize 2 threads doing join (sock, group1, hme0:0) and 3287 * (sock, group2, hme1:0) where hme0 and hme1 map to different ipsqs, 3288 * but both operations happen on the same conn. 3289 */ 3290 mutex_enter(&connp->conn_lock); 3291 ilg = ilg_lookup_ipif(connp, group, ipif); 3292 3293 /* 3294 * Depending on the option we're handling, may or may not be okay 3295 * if group has already been added. Figure out our rules based 3296 * on fmode and src params. Also make sure there's enough room 3297 * in the filter if we're adding a source to an existing filter. 3298 */ 3299 if (src == INADDR_ANY) { 3300 /* we're joining for all sources, must not have joined */ 3301 if (ilg != NULL) 3302 error = EADDRINUSE; 3303 } else { 3304 if (fmode == MODE_IS_EXCLUDE) { 3305 /* (excl {addr}) => block source, must have joined */ 3306 if (ilg == NULL) 3307 error = EADDRNOTAVAIL; 3308 } 3309 /* (incl {addr}) => join source, may have joined */ 3310 3311 if (ilg != NULL && 3312 SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE) 3313 error = ENOBUFS; 3314 } 3315 if (error != 0) { 3316 mutex_exit(&connp->conn_lock); 3317 return (error); 3318 } 3319 3320 ASSERT(!(ipif->ipif_state_flags & IPIF_CONDEMNED)); 3321 3322 /* 3323 * Alloc buffer to copy new state into (see below) before 3324 * we make any changes, so we can bail if it fails. 3325 */ 3326 if ((new_filter = l_alloc()) == NULL) { 3327 mutex_exit(&connp->conn_lock); 3328 return (ENOMEM); 3329 } 3330 3331 if (ilg == NULL) { 3332 ilgstat = ILGSTAT_NEW; 3333 if ((ilg = conn_ilg_alloc(connp, &error)) == NULL) { 3334 mutex_exit(&connp->conn_lock); 3335 l_free(new_filter); 3336 return (error); 3337 } 3338 if (src != INADDR_ANY) { 3339 ilg->ilg_filter = l_alloc(); 3340 if (ilg->ilg_filter == NULL) { 3341 ilg_delete(connp, ilg, NULL); 3342 mutex_exit(&connp->conn_lock); 3343 l_free(new_filter); 3344 return (ENOMEM); 3345 } 3346 ilg->ilg_filter->sl_numsrc = 1; 3347 IN6_IPADDR_TO_V4MAPPED(src, 3348 &ilg->ilg_filter->sl_addr[0]); 3349 } 3350 if (group == INADDR_ANY) { 3351 ilg->ilg_v6group = ipv6_all_zeros; 3352 } else { 3353 IN6_IPADDR_TO_V4MAPPED(group, &ilg->ilg_v6group); 3354 } 3355 ilg->ilg_ipif = ipif; 3356 ilg->ilg_ill = NULL; 3357 ilg->ilg_fmode = fmode; 3358 } else { 3359 int index; 3360 in6_addr_t v6src; 3361 ilgstat = ILGSTAT_CHANGE; 3362 if (ilg->ilg_fmode != fmode || src == INADDR_ANY) { 3363 mutex_exit(&connp->conn_lock); 3364 l_free(new_filter); 3365 return (EINVAL); 3366 } 3367 if (ilg->ilg_filter == NULL) { 3368 ilg->ilg_filter = l_alloc(); 3369 if (ilg->ilg_filter == NULL) { 3370 mutex_exit(&connp->conn_lock); 3371 l_free(new_filter); 3372 return (ENOMEM); 3373 } 3374 } 3375 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 3376 if (list_has_addr(ilg->ilg_filter, &v6src)) { 3377 mutex_exit(&connp->conn_lock); 3378 l_free(new_filter); 3379 return (EADDRNOTAVAIL); 3380 } 3381 index = ilg->ilg_filter->sl_numsrc++; 3382 ilg->ilg_filter->sl_addr[index] = v6src; 3383 } 3384 3385 /* 3386 * Save copy of ilg's filter state to pass to other functions, 3387 * so we can release conn_lock now. 3388 */ 3389 new_fmode = ilg->ilg_fmode; 3390 l_copy(ilg->ilg_filter, new_filter); 3391 3392 mutex_exit(&connp->conn_lock); 3393 3394 error = ip_addmulti(group, ipif, ilgstat, new_fmode, new_filter); 3395 if (error != 0) { 3396 /* 3397 * Need to undo what we did before calling ip_addmulti()! 3398 * Must look up the ilg again since we've not been holding 3399 * conn_lock. 3400 */ 3401 in6_addr_t v6src; 3402 if (ilgstat == ILGSTAT_NEW) 3403 v6src = ipv6_all_zeros; 3404 else 3405 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 3406 mutex_enter(&connp->conn_lock); 3407 ilg = ilg_lookup_ipif(connp, group, ipif); 3408 ASSERT(ilg != NULL); 3409 ilg_delete(connp, ilg, &v6src); 3410 mutex_exit(&connp->conn_lock); 3411 l_free(new_filter); 3412 return (error); 3413 } 3414 3415 l_free(new_filter); 3416 return (0); 3417 } 3418 3419 static int 3420 ilg_add_v6(conn_t *connp, const in6_addr_t *v6group, ill_t *ill, 3421 mcast_record_t fmode, const in6_addr_t *v6src) 3422 { 3423 int error = 0; 3424 ilg_t *ilg; 3425 ilg_stat_t ilgstat; 3426 slist_t *new_filter = NULL; 3427 int new_fmode; 3428 3429 ASSERT(IAM_WRITER_ILL(ill)); 3430 3431 if (!(ill->ill_flags & ILLF_MULTICAST)) 3432 return (EADDRNOTAVAIL); 3433 3434 /* 3435 * conn_lock protects the ilg list. Serializes 2 threads doing 3436 * join (sock, group1, hme0) and (sock, group2, hme1) where hme0 3437 * and hme1 map to different ipsq's, but both operations happen 3438 * on the same conn. 3439 */ 3440 mutex_enter(&connp->conn_lock); 3441 3442 ilg = ilg_lookup_ill_v6(connp, v6group, ill); 3443 3444 /* 3445 * Depending on the option we're handling, may or may not be okay 3446 * if group has already been added. Figure out our rules based 3447 * on fmode and src params. Also make sure there's enough room 3448 * in the filter if we're adding a source to an existing filter. 3449 */ 3450 if (IN6_IS_ADDR_UNSPECIFIED(v6src)) { 3451 /* we're joining for all sources, must not have joined */ 3452 if (ilg != NULL) 3453 error = EADDRINUSE; 3454 } else { 3455 if (fmode == MODE_IS_EXCLUDE) { 3456 /* (excl {addr}) => block source, must have joined */ 3457 if (ilg == NULL) 3458 error = EADDRNOTAVAIL; 3459 } 3460 /* (incl {addr}) => join source, may have joined */ 3461 3462 if (ilg != NULL && 3463 SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE) 3464 error = ENOBUFS; 3465 } 3466 if (error != 0) { 3467 mutex_exit(&connp->conn_lock); 3468 return (error); 3469 } 3470 3471 /* 3472 * Alloc buffer to copy new state into (see below) before 3473 * we make any changes, so we can bail if it fails. 3474 */ 3475 if ((new_filter = l_alloc()) == NULL) { 3476 mutex_exit(&connp->conn_lock); 3477 return (ENOMEM); 3478 } 3479 3480 if (ilg == NULL) { 3481 if ((ilg = conn_ilg_alloc(connp, &error)) == NULL) { 3482 mutex_exit(&connp->conn_lock); 3483 l_free(new_filter); 3484 return (error); 3485 } 3486 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 3487 ilg->ilg_filter = l_alloc(); 3488 if (ilg->ilg_filter == NULL) { 3489 ilg_delete(connp, ilg, NULL); 3490 mutex_exit(&connp->conn_lock); 3491 l_free(new_filter); 3492 return (ENOMEM); 3493 } 3494 ilg->ilg_filter->sl_numsrc = 1; 3495 ilg->ilg_filter->sl_addr[0] = *v6src; 3496 } 3497 ilgstat = ILGSTAT_NEW; 3498 ilg->ilg_v6group = *v6group; 3499 ilg->ilg_fmode = fmode; 3500 ilg->ilg_ipif = NULL; 3501 ilg->ilg_ill = ill; 3502 } else { 3503 int index; 3504 if (ilg->ilg_fmode != fmode || IN6_IS_ADDR_UNSPECIFIED(v6src)) { 3505 mutex_exit(&connp->conn_lock); 3506 l_free(new_filter); 3507 return (EINVAL); 3508 } 3509 if (ilg->ilg_filter == NULL) { 3510 ilg->ilg_filter = l_alloc(); 3511 if (ilg->ilg_filter == NULL) { 3512 mutex_exit(&connp->conn_lock); 3513 l_free(new_filter); 3514 return (ENOMEM); 3515 } 3516 } 3517 if (list_has_addr(ilg->ilg_filter, v6src)) { 3518 mutex_exit(&connp->conn_lock); 3519 l_free(new_filter); 3520 return (EADDRNOTAVAIL); 3521 } 3522 ilgstat = ILGSTAT_CHANGE; 3523 index = ilg->ilg_filter->sl_numsrc++; 3524 ilg->ilg_filter->sl_addr[index] = *v6src; 3525 } 3526 3527 /* 3528 * Save copy of ilg's filter state to pass to other functions, 3529 * so we can release conn_lock now. 3530 */ 3531 new_fmode = ilg->ilg_fmode; 3532 l_copy(ilg->ilg_filter, new_filter); 3533 3534 mutex_exit(&connp->conn_lock); 3535 3536 /* 3537 * Now update the ill. We wait to do this until after the ilg 3538 * has been updated because we need to update the src filter 3539 * info for the ill, which involves looking at the status of 3540 * all the ilgs associated with this group/interface pair. 3541 */ 3542 error = ip_addmulti_v6(v6group, ill, connp->conn_zoneid, ilgstat, 3543 new_fmode, new_filter); 3544 if (error != 0) { 3545 /* 3546 * But because we waited, we have to undo the ilg update 3547 * if ip_addmulti_v6() fails. We also must lookup ilg 3548 * again, since we've not been holding conn_lock. 3549 */ 3550 in6_addr_t delsrc = 3551 (ilgstat == ILGSTAT_NEW) ? ipv6_all_zeros : *v6src; 3552 mutex_enter(&connp->conn_lock); 3553 ilg = ilg_lookup_ill_v6(connp, v6group, ill); 3554 ASSERT(ilg != NULL); 3555 ilg_delete(connp, ilg, &delsrc); 3556 mutex_exit(&connp->conn_lock); 3557 l_free(new_filter); 3558 return (error); 3559 } 3560 3561 l_free(new_filter); 3562 3563 return (0); 3564 } 3565 3566 /* 3567 * Find an IPv4 ilg matching group, ill and source 3568 */ 3569 ilg_t * 3570 ilg_lookup_ill_withsrc(conn_t *connp, ipaddr_t group, ipaddr_t src, ill_t *ill) 3571 { 3572 in6_addr_t v6group, v6src; 3573 int i; 3574 boolean_t isinlist; 3575 ilg_t *ilg; 3576 ipif_t *ipif; 3577 ill_t *ilg_ill; 3578 3579 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3580 3581 /* 3582 * INADDR_ANY is represented as the IPv6 unspecified addr. 3583 */ 3584 if (group == INADDR_ANY) 3585 v6group = ipv6_all_zeros; 3586 else 3587 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 3588 3589 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3590 ilg = &connp->conn_ilg[i]; 3591 if ((ipif = ilg->ilg_ipif) == NULL || 3592 (ilg->ilg_flags & ILG_DELETED) != 0) 3593 continue; 3594 ASSERT(ilg->ilg_ill == NULL); 3595 ilg_ill = ipif->ipif_ill; 3596 ASSERT(!ilg_ill->ill_isv6); 3597 if (IS_ON_SAME_LAN(ilg_ill, ill) && 3598 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, &v6group)) { 3599 if (SLIST_IS_EMPTY(ilg->ilg_filter)) { 3600 /* no source filter, so this is a match */ 3601 return (ilg); 3602 } 3603 break; 3604 } 3605 } 3606 if (i == connp->conn_ilg_inuse) 3607 return (NULL); 3608 3609 /* 3610 * we have an ilg with matching ill and group; but 3611 * the ilg has a source list that we must check. 3612 */ 3613 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 3614 isinlist = B_FALSE; 3615 for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) { 3616 if (IN6_ARE_ADDR_EQUAL(&v6src, &ilg->ilg_filter->sl_addr[i])) { 3617 isinlist = B_TRUE; 3618 break; 3619 } 3620 } 3621 3622 if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) || 3623 (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE)) 3624 return (ilg); 3625 3626 return (NULL); 3627 } 3628 3629 /* 3630 * Find an IPv6 ilg matching group, ill, and source 3631 */ 3632 ilg_t * 3633 ilg_lookup_ill_withsrc_v6(conn_t *connp, const in6_addr_t *v6group, 3634 const in6_addr_t *v6src, ill_t *ill) 3635 { 3636 int i; 3637 boolean_t isinlist; 3638 ilg_t *ilg; 3639 ill_t *ilg_ill; 3640 3641 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3642 3643 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3644 ilg = &connp->conn_ilg[i]; 3645 if ((ilg_ill = ilg->ilg_ill) == NULL || 3646 (ilg->ilg_flags & ILG_DELETED) != 0) 3647 continue; 3648 ASSERT(ilg->ilg_ipif == NULL); 3649 ASSERT(ilg_ill->ill_isv6); 3650 if (IS_ON_SAME_LAN(ilg_ill, ill) && 3651 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) { 3652 if (SLIST_IS_EMPTY(ilg->ilg_filter)) { 3653 /* no source filter, so this is a match */ 3654 return (ilg); 3655 } 3656 break; 3657 } 3658 } 3659 if (i == connp->conn_ilg_inuse) 3660 return (NULL); 3661 3662 /* 3663 * we have an ilg with matching ill and group; but 3664 * the ilg has a source list that we must check. 3665 */ 3666 isinlist = B_FALSE; 3667 for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) { 3668 if (IN6_ARE_ADDR_EQUAL(v6src, &ilg->ilg_filter->sl_addr[i])) { 3669 isinlist = B_TRUE; 3670 break; 3671 } 3672 } 3673 3674 if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) || 3675 (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE)) 3676 return (ilg); 3677 3678 return (NULL); 3679 } 3680 3681 /* 3682 * Find an IPv6 ilg matching group and ill 3683 */ 3684 ilg_t * 3685 ilg_lookup_ill_v6(conn_t *connp, const in6_addr_t *v6group, ill_t *ill) 3686 { 3687 ilg_t *ilg; 3688 int i; 3689 ill_t *mem_ill; 3690 3691 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3692 3693 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3694 ilg = &connp->conn_ilg[i]; 3695 if ((mem_ill = ilg->ilg_ill) == NULL || 3696 (ilg->ilg_flags & ILG_DELETED) != 0) 3697 continue; 3698 ASSERT(ilg->ilg_ipif == NULL); 3699 ASSERT(mem_ill->ill_isv6); 3700 if (mem_ill == ill && 3701 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) 3702 return (ilg); 3703 } 3704 return (NULL); 3705 } 3706 3707 /* 3708 * Find an IPv4 ilg matching group and ipif 3709 */ 3710 static ilg_t * 3711 ilg_lookup_ipif(conn_t *connp, ipaddr_t group, ipif_t *ipif) 3712 { 3713 in6_addr_t v6group; 3714 int i; 3715 ilg_t *ilg; 3716 3717 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3718 ASSERT(!ipif->ipif_ill->ill_isv6); 3719 3720 if (group == INADDR_ANY) 3721 v6group = ipv6_all_zeros; 3722 else 3723 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 3724 3725 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3726 ilg = &connp->conn_ilg[i]; 3727 if ((ilg->ilg_flags & ILG_DELETED) == 0 && 3728 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, &v6group) && 3729 ilg->ilg_ipif == ipif) 3730 return (ilg); 3731 } 3732 return (NULL); 3733 } 3734 3735 /* 3736 * If a source address is passed in (src != NULL and src is not 3737 * unspecified), remove the specified src addr from the given ilg's 3738 * filter list, else delete the ilg. 3739 */ 3740 static void 3741 ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src) 3742 { 3743 int i; 3744 3745 ASSERT((ilg->ilg_ipif != NULL) ^ (ilg->ilg_ill != NULL)); 3746 ASSERT(ilg->ilg_ipif == NULL || IAM_WRITER_IPIF(ilg->ilg_ipif)); 3747 ASSERT(ilg->ilg_ill == NULL || IAM_WRITER_ILL(ilg->ilg_ill)); 3748 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3749 ASSERT(!(ilg->ilg_flags & ILG_DELETED)); 3750 3751 if (src == NULL || IN6_IS_ADDR_UNSPECIFIED(src)) { 3752 if (connp->conn_ilg_walker_cnt != 0) { 3753 ilg->ilg_flags |= ILG_DELETED; 3754 return; 3755 } 3756 3757 FREE_SLIST(ilg->ilg_filter); 3758 3759 i = ilg - &connp->conn_ilg[0]; 3760 ASSERT(i >= 0 && i < connp->conn_ilg_inuse); 3761 3762 /* Move other entries up one step */ 3763 connp->conn_ilg_inuse--; 3764 for (; i < connp->conn_ilg_inuse; i++) 3765 connp->conn_ilg[i] = connp->conn_ilg[i+1]; 3766 3767 if (connp->conn_ilg_inuse == 0) { 3768 mi_free((char *)connp->conn_ilg); 3769 connp->conn_ilg = NULL; 3770 cv_broadcast(&connp->conn_refcv); 3771 } 3772 } else { 3773 l_remove(ilg->ilg_filter, src); 3774 } 3775 } 3776 3777 /* 3778 * Called from conn close. No new ilg can be added or removed. 3779 * because CONN_CLOSING has been set by ip_close. ilg_add / ilg_delete 3780 * will return error if conn has started closing. 3781 */ 3782 void 3783 ilg_delete_all(conn_t *connp) 3784 { 3785 int i; 3786 ipif_t *ipif = NULL; 3787 ill_t *ill = NULL; 3788 ilg_t *ilg; 3789 in6_addr_t v6group; 3790 boolean_t success; 3791 ipsq_t *ipsq; 3792 3793 mutex_enter(&connp->conn_lock); 3794 retry: 3795 ILG_WALKER_HOLD(connp); 3796 for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) { 3797 ilg = &connp->conn_ilg[i]; 3798 /* 3799 * Since this walk is not atomic (we drop the 3800 * conn_lock and wait in ipsq_enter) we need 3801 * to check for the ILG_DELETED flag. 3802 */ 3803 if (ilg->ilg_flags & ILG_DELETED) 3804 continue; 3805 3806 if (IN6_IS_ADDR_V4MAPPED(&ilg->ilg_v6group)) { 3807 ipif = ilg->ilg_ipif; 3808 ill = ipif->ipif_ill; 3809 } else { 3810 ipif = NULL; 3811 ill = ilg->ilg_ill; 3812 } 3813 3814 /* 3815 * We may not be able to refhold the ill if the ill/ipif 3816 * is changing. But we need to make sure that the ill will 3817 * not vanish. So we just bump up the ill_waiter count. 3818 * If we are unable to do even that, then the ill is closing, 3819 * in which case the unplumb thread will handle the cleanup, 3820 * and we move on to the next ilg. 3821 */ 3822 if (!ill_waiter_inc(ill)) 3823 continue; 3824 3825 mutex_exit(&connp->conn_lock); 3826 /* 3827 * To prevent deadlock between ill close which waits inside 3828 * the perimeter, and conn close, ipsq_enter returns error, 3829 * the moment ILL_CONDEMNED is set, in which case ill close 3830 * takes responsibility to cleanup the ilgs. Note that we 3831 * have not yet set condemned flag, otherwise the conn can't 3832 * be refheld for cleanup by those routines and it would be 3833 * a mutual deadlock. 3834 */ 3835 success = ipsq_enter(ill, B_FALSE, NEW_OP); 3836 ipsq = ill->ill_phyint->phyint_ipsq; 3837 ill_waiter_dcr(ill); 3838 mutex_enter(&connp->conn_lock); 3839 if (!success) 3840 continue; 3841 3842 /* 3843 * Move on if the ilg was deleted while conn_lock was dropped. 3844 */ 3845 if (ilg->ilg_flags & ILG_DELETED) { 3846 mutex_exit(&connp->conn_lock); 3847 ipsq_exit(ipsq); 3848 mutex_enter(&connp->conn_lock); 3849 continue; 3850 } 3851 v6group = ilg->ilg_v6group; 3852 ilg_delete(connp, ilg, NULL); 3853 mutex_exit(&connp->conn_lock); 3854 3855 if (ipif != NULL) { 3856 (void) ip_delmulti(V4_PART_OF_V6(v6group), ipif, 3857 B_FALSE, B_TRUE); 3858 } else { 3859 (void) ip_delmulti_v6(&v6group, ill, 3860 connp->conn_zoneid, B_FALSE, B_TRUE); 3861 } 3862 ipsq_exit(ipsq); 3863 mutex_enter(&connp->conn_lock); 3864 } 3865 ILG_WALKER_RELE(connp); 3866 3867 /* If any ill was skipped above wait and retry */ 3868 if (connp->conn_ilg_inuse != 0) { 3869 cv_wait(&connp->conn_refcv, &connp->conn_lock); 3870 goto retry; 3871 } 3872 mutex_exit(&connp->conn_lock); 3873 } 3874 3875 /* 3876 * Called from ill close by ipcl_walk for clearing conn_ilg and 3877 * conn_multicast_ipif for a given ipif. conn is held by caller. 3878 * Note that ipcl_walk only walks conns that are not yet condemned. 3879 * condemned conns can't be refheld. For this reason, conn must become clean 3880 * first, i.e. it must not refer to any ill/ire/ipif and then only set 3881 * condemned flag. 3882 */ 3883 static void 3884 conn_delete_ipif(conn_t *connp, caddr_t arg) 3885 { 3886 ipif_t *ipif = (ipif_t *)arg; 3887 int i; 3888 char group_buf1[INET6_ADDRSTRLEN]; 3889 char group_buf2[INET6_ADDRSTRLEN]; 3890 ipaddr_t group; 3891 ilg_t *ilg; 3892 3893 /* 3894 * Even though conn_ilg_inuse can change while we are in this loop, 3895 * i.e.ilgs can be created or deleted on this connp, no new ilgs can 3896 * be created or deleted for this connp, on this ill, since this ill 3897 * is the perimeter. So we won't miss any ilg in this cleanup. 3898 */ 3899 mutex_enter(&connp->conn_lock); 3900 3901 /* 3902 * Increment the walker count, so that ilg repacking does not 3903 * occur while we are in the loop. 3904 */ 3905 ILG_WALKER_HOLD(connp); 3906 for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) { 3907 ilg = &connp->conn_ilg[i]; 3908 if (ilg->ilg_ipif != ipif || (ilg->ilg_flags & ILG_DELETED)) 3909 continue; 3910 /* 3911 * ip_close cannot be cleaning this ilg at the same time. 3912 * since it also has to execute in this ill's perimeter which 3913 * we are now holding. Only a clean conn can be condemned. 3914 */ 3915 ASSERT(!(connp->conn_state_flags & CONN_CONDEMNED)); 3916 3917 /* Blow away the membership */ 3918 ip1dbg(("conn_delete_ilg_ipif: %s on %s (%s)\n", 3919 inet_ntop(AF_INET6, &connp->conn_ilg[i].ilg_v6group, 3920 group_buf1, sizeof (group_buf1)), 3921 inet_ntop(AF_INET6, &ipif->ipif_v6lcl_addr, 3922 group_buf2, sizeof (group_buf2)), 3923 ipif->ipif_ill->ill_name)); 3924 3925 /* ilg_ipif is NULL for V6, so we won't be here */ 3926 ASSERT(IN6_IS_ADDR_V4MAPPED(&ilg->ilg_v6group)); 3927 3928 group = V4_PART_OF_V6(ilg->ilg_v6group); 3929 ilg_delete(connp, &connp->conn_ilg[i], NULL); 3930 mutex_exit(&connp->conn_lock); 3931 3932 (void) ip_delmulti(group, ipif, B_FALSE, B_TRUE); 3933 mutex_enter(&connp->conn_lock); 3934 } 3935 3936 /* 3937 * If we are the last walker, need to physically delete the 3938 * ilgs and repack. 3939 */ 3940 ILG_WALKER_RELE(connp); 3941 3942 if (connp->conn_multicast_ipif == ipif) { 3943 /* Revert to late binding */ 3944 connp->conn_multicast_ipif = NULL; 3945 } 3946 mutex_exit(&connp->conn_lock); 3947 3948 conn_delete_ire(connp, (caddr_t)ipif); 3949 } 3950 3951 /* 3952 * Called from ill close by ipcl_walk for clearing conn_ilg and 3953 * conn_multicast_ill for a given ill. conn is held by caller. 3954 * Note that ipcl_walk only walks conns that are not yet condemned. 3955 * condemned conns can't be refheld. For this reason, conn must become clean 3956 * first, i.e. it must not refer to any ill/ire/ipif and then only set 3957 * condemned flag. 3958 */ 3959 static void 3960 conn_delete_ill(conn_t *connp, caddr_t arg) 3961 { 3962 ill_t *ill = (ill_t *)arg; 3963 int i; 3964 char group_buf[INET6_ADDRSTRLEN]; 3965 in6_addr_t v6group; 3966 ilg_t *ilg; 3967 3968 /* 3969 * Even though conn_ilg_inuse can change while we are in this loop, 3970 * no new ilgs can be created/deleted for this connp, on this 3971 * ill, since this ill is the perimeter. So we won't miss any ilg 3972 * in this cleanup. 3973 */ 3974 mutex_enter(&connp->conn_lock); 3975 3976 /* 3977 * Increment the walker count, so that ilg repacking does not 3978 * occur while we are in the loop. 3979 */ 3980 ILG_WALKER_HOLD(connp); 3981 for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) { 3982 ilg = &connp->conn_ilg[i]; 3983 if ((ilg->ilg_ill == ill) && !(ilg->ilg_flags & ILG_DELETED)) { 3984 /* 3985 * ip_close cannot be cleaning this ilg at the same 3986 * time, since it also has to execute in this ill's 3987 * perimeter which we are now holding. Only a clean 3988 * conn can be condemned. 3989 */ 3990 ASSERT(!(connp->conn_state_flags & CONN_CONDEMNED)); 3991 3992 /* Blow away the membership */ 3993 ip1dbg(("conn_delete_ilg_ill: %s on %s\n", 3994 inet_ntop(AF_INET6, &ilg->ilg_v6group, 3995 group_buf, sizeof (group_buf)), 3996 ill->ill_name)); 3997 3998 v6group = ilg->ilg_v6group; 3999 ilg_delete(connp, ilg, NULL); 4000 mutex_exit(&connp->conn_lock); 4001 4002 (void) ip_delmulti_v6(&v6group, ill, 4003 connp->conn_zoneid, B_FALSE, B_TRUE); 4004 mutex_enter(&connp->conn_lock); 4005 } 4006 } 4007 /* 4008 * If we are the last walker, need to physically delete the 4009 * ilgs and repack. 4010 */ 4011 ILG_WALKER_RELE(connp); 4012 4013 if (connp->conn_multicast_ill == ill) { 4014 /* Revert to late binding */ 4015 connp->conn_multicast_ill = NULL; 4016 } 4017 mutex_exit(&connp->conn_lock); 4018 } 4019 4020 /* 4021 * Called when an ipif is unplumbed to make sure that there are no 4022 * dangling conn references to that ipif. 4023 * Handles ilg_ipif and conn_multicast_ipif 4024 */ 4025 void 4026 reset_conn_ipif(ipif) 4027 ipif_t *ipif; 4028 { 4029 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 4030 4031 ipcl_walk(conn_delete_ipif, (caddr_t)ipif, ipst); 4032 } 4033 4034 /* 4035 * Called when an ill is unplumbed to make sure that there are no 4036 * dangling conn references to that ill. 4037 * Handles ilg_ill, conn_multicast_ill. 4038 */ 4039 void 4040 reset_conn_ill(ill_t *ill) 4041 { 4042 ip_stack_t *ipst = ill->ill_ipst; 4043 4044 ipcl_walk(conn_delete_ill, (caddr_t)ill, ipst); 4045 } 4046 4047 #ifdef DEBUG 4048 /* 4049 * Walk functions walk all the interfaces in the system to make 4050 * sure that there is no refernece to the ipif or ill that is 4051 * going away. 4052 */ 4053 int 4054 ilm_walk_ill(ill_t *ill) 4055 { 4056 int cnt = 0; 4057 ill_t *till; 4058 ilm_t *ilm; 4059 ill_walk_context_t ctx; 4060 ip_stack_t *ipst = ill->ill_ipst; 4061 4062 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 4063 till = ILL_START_WALK_ALL(&ctx, ipst); 4064 for (; till != NULL; till = ill_next(&ctx, till)) { 4065 mutex_enter(&till->ill_lock); 4066 for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 4067 if (ilm->ilm_ill == ill) { 4068 cnt++; 4069 } 4070 } 4071 mutex_exit(&till->ill_lock); 4072 } 4073 rw_exit(&ipst->ips_ill_g_lock); 4074 4075 return (cnt); 4076 } 4077 4078 /* 4079 * This function is called before the ipif is freed. 4080 */ 4081 int 4082 ilm_walk_ipif(ipif_t *ipif) 4083 { 4084 int cnt = 0; 4085 ill_t *till; 4086 ilm_t *ilm; 4087 ill_walk_context_t ctx; 4088 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 4089 4090 till = ILL_START_WALK_ALL(&ctx, ipst); 4091 for (; till != NULL; till = ill_next(&ctx, till)) { 4092 mutex_enter(&till->ill_lock); 4093 for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 4094 if (ilm->ilm_ipif == ipif) { 4095 cnt++; 4096 } 4097 } 4098 mutex_exit(&till->ill_lock); 4099 } 4100 return (cnt); 4101 } 4102 #endif 4103