1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #include <sys/types.h> 28 #include <sys/stream.h> 29 #include <sys/dlpi.h> 30 #include <sys/stropts.h> 31 #include <sys/strsun.h> 32 #include <sys/ddi.h> 33 #include <sys/cmn_err.h> 34 #include <sys/sdt.h> 35 #include <sys/zone.h> 36 37 #include <sys/param.h> 38 #include <sys/socket.h> 39 #include <sys/sockio.h> 40 #include <net/if.h> 41 #include <sys/systm.h> 42 #include <sys/strsubr.h> 43 #include <net/route.h> 44 #include <netinet/in.h> 45 #include <net/if_dl.h> 46 #include <netinet/ip6.h> 47 #include <netinet/icmp6.h> 48 49 #include <inet/common.h> 50 #include <inet/mi.h> 51 #include <inet/nd.h> 52 #include <inet/arp.h> 53 #include <inet/ip.h> 54 #include <inet/ip6.h> 55 #include <inet/ip_if.h> 56 #include <inet/ip_ndp.h> 57 #include <inet/ip_multi.h> 58 #include <inet/ipclassifier.h> 59 #include <inet/ipsec_impl.h> 60 #include <inet/sctp_ip.h> 61 #include <inet/ip_listutils.h> 62 #include <inet/udp_impl.h> 63 64 /* igmpv3/mldv2 source filter manipulation */ 65 static void ilm_bld_flists(conn_t *conn, void *arg); 66 static void ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, 67 slist_t *flist); 68 69 static ilm_t *ilm_add_v6(ipif_t *ipif, const in6_addr_t *group, 70 ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist, 71 zoneid_t zoneid); 72 static void ilm_delete(ilm_t *ilm); 73 static int ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *group); 74 static int ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *group); 75 static ilg_t *ilg_lookup_ipif(conn_t *connp, ipaddr_t group, 76 ipif_t *ipif); 77 static int ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif, 78 mcast_record_t fmode, ipaddr_t src); 79 static int ilg_add_v6(conn_t *connp, const in6_addr_t *group, ill_t *ill, 80 mcast_record_t fmode, const in6_addr_t *v6src); 81 static void ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src); 82 static mblk_t *ill_create_dl(ill_t *ill, uint32_t dl_primitive, 83 uint32_t length, uint32_t *addr_lenp, uint32_t *addr_offp); 84 static mblk_t *ill_create_squery(ill_t *ill, ipaddr_t ipaddr, 85 uint32_t addrlen, uint32_t addroff, mblk_t *mp_tail); 86 static void conn_ilg_reap(conn_t *connp); 87 static int ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group, 88 ipif_t *ipif, mcast_record_t fmode, ipaddr_t src); 89 static int ip_opt_delete_group_excl_v6(conn_t *connp, 90 const in6_addr_t *v6group, ill_t *ill, mcast_record_t fmode, 91 const in6_addr_t *v6src); 92 static void ill_ilm_walker_hold(ill_t *ill); 93 static void ill_ilm_walker_rele(ill_t *ill); 94 95 /* 96 * MT notes: 97 * 98 * Multicast joins operate on both the ilg and ilm structures. Multiple 99 * threads operating on an conn (socket) trying to do multicast joins 100 * need to synchronize when operating on the ilg. Multiple threads 101 * potentially operating on different conn (socket endpoints) trying to 102 * do multicast joins could eventually end up trying to manipulate the 103 * ilm simultaneously and need to synchronize access to the ilm. Currently, 104 * this is done by synchronizing join/leave via per-phyint ipsq_t 105 * serialization. 106 * 107 * An ilm is an IP data structure used to track multicast join/leave. 108 * An ilm is associated with a <multicast group, ipif> tuple in IPv4 and 109 * with just <multicast group> in IPv6. ilm_refcnt is the number of ilg's 110 * referencing the ilm. ilms are created / destroyed only as writer. ilms 111 * are not passed around, instead they are looked up and used under the 112 * ill_lock or as writer. So we don't need a dynamic refcount of the number 113 * of threads holding reference to an ilm. 114 * 115 * Multicast Join operation: 116 * 117 * The first step is to determine the ipif (v4) or ill (v6) on which 118 * the join operation is to be done. The join is done after becoming 119 * exclusive on the ipsq associated with the ipif or ill. The conn->conn_ilg 120 * and ill->ill_ilm are thus accessed and modified exclusively per ill. 121 * Multiple threads can attempt to join simultaneously on different ipif/ill 122 * on the same conn. In this case the ipsq serialization does not help in 123 * protecting the ilg. It is the conn_lock that is used to protect the ilg. 124 * The conn_lock also protects all the ilg_t members. 125 * 126 * Leave operation. 127 * 128 * Similar to the join operation, the first step is to determine the ipif 129 * or ill (v6) on which the leave operation is to be done. The leave operation 130 * is done after becoming exclusive on the ipsq associated with the ipif or ill. 131 * As with join ilg modification is done under the protection of the conn lock. 132 */ 133 134 #define IPSQ_ENTER_IPIF(ipif, connp, first_mp, func, ipsq, type) \ 135 ASSERT(connp != NULL); \ 136 (ipsq) = ipsq_try_enter((ipif), NULL, CONNP_TO_WQ(connp), \ 137 (first_mp), (func), (type), B_TRUE); \ 138 if ((ipsq) == NULL) { \ 139 ipif_refrele(ipif); \ 140 return (EINPROGRESS); \ 141 } 142 143 #define IPSQ_ENTER_ILL(ill, connp, first_mp, func, ipsq, type) \ 144 ASSERT(connp != NULL); \ 145 (ipsq) = ipsq_try_enter(NULL, ill, CONNP_TO_WQ(connp), \ 146 (first_mp), (func), (type), B_TRUE); \ 147 if ((ipsq) == NULL) { \ 148 ill_refrele(ill); \ 149 return (EINPROGRESS); \ 150 } 151 152 #define IPSQ_EXIT(ipsq) \ 153 if (ipsq != NULL) \ 154 ipsq_exit(ipsq); 155 156 #define ILG_WALKER_HOLD(connp) (connp)->conn_ilg_walker_cnt++ 157 158 #define ILG_WALKER_RELE(connp) \ 159 { \ 160 (connp)->conn_ilg_walker_cnt--; \ 161 if ((connp)->conn_ilg_walker_cnt == 0) \ 162 conn_ilg_reap(connp); \ 163 } 164 165 static void 166 conn_ilg_reap(conn_t *connp) 167 { 168 int to; 169 int from; 170 ilg_t *ilg; 171 172 ASSERT(MUTEX_HELD(&connp->conn_lock)); 173 174 to = 0; 175 from = 0; 176 while (from < connp->conn_ilg_inuse) { 177 if (connp->conn_ilg[from].ilg_flags & ILG_DELETED) { 178 ilg = &connp->conn_ilg[from]; 179 FREE_SLIST(ilg->ilg_filter); 180 ilg->ilg_flags &= ~ILG_DELETED; 181 from++; 182 continue; 183 } 184 if (to != from) 185 connp->conn_ilg[to] = connp->conn_ilg[from]; 186 to++; 187 from++; 188 } 189 190 connp->conn_ilg_inuse = to; 191 192 if (connp->conn_ilg_inuse == 0) { 193 mi_free((char *)connp->conn_ilg); 194 connp->conn_ilg = NULL; 195 cv_broadcast(&connp->conn_refcv); 196 } 197 } 198 199 #define GETSTRUCT(structure, number) \ 200 ((structure *)mi_zalloc(sizeof (structure) * (number))) 201 202 #define ILG_ALLOC_CHUNK 16 203 204 /* 205 * Returns a pointer to the next available ilg in conn_ilg. Allocs more 206 * buffers in size of ILG_ALLOC_CHUNK ilgs when needed, and updates conn's 207 * ilg tracking fields appropriately (conn_ilg_inuse reflects usage of the 208 * returned ilg). Returns NULL on failure, in which case `*errp' will be 209 * filled in with the reason. 210 * 211 * Assumes connp->conn_lock is held. 212 */ 213 static ilg_t * 214 conn_ilg_alloc(conn_t *connp, int *errp) 215 { 216 ilg_t *new, *ret; 217 int curcnt; 218 219 ASSERT(MUTEX_HELD(&connp->conn_lock)); 220 ASSERT(connp->conn_ilg_inuse <= connp->conn_ilg_allocated); 221 222 /* 223 * If CONN_CLOSING is set, conn_ilg cleanup has begun and we must not 224 * create any ilgs. 225 */ 226 if (connp->conn_state_flags & CONN_CLOSING) { 227 *errp = EINVAL; 228 return (NULL); 229 } 230 231 if (connp->conn_ilg == NULL) { 232 connp->conn_ilg = GETSTRUCT(ilg_t, ILG_ALLOC_CHUNK); 233 if (connp->conn_ilg == NULL) { 234 *errp = ENOMEM; 235 return (NULL); 236 } 237 connp->conn_ilg_allocated = ILG_ALLOC_CHUNK; 238 connp->conn_ilg_inuse = 0; 239 } 240 if (connp->conn_ilg_inuse == connp->conn_ilg_allocated) { 241 if (connp->conn_ilg_walker_cnt != 0) { 242 /* 243 * XXX We cannot grow the array at this point 244 * because a list walker could be in progress, and 245 * we cannot wipe out the existing array until the 246 * walker is done. Just return NULL for now. 247 * ilg_delete_all() will have to be changed when 248 * this logic is changed. 249 */ 250 *errp = EBUSY; 251 return (NULL); 252 } 253 curcnt = connp->conn_ilg_allocated; 254 new = GETSTRUCT(ilg_t, curcnt + ILG_ALLOC_CHUNK); 255 if (new == NULL) { 256 *errp = ENOMEM; 257 return (NULL); 258 } 259 bcopy(connp->conn_ilg, new, sizeof (ilg_t) * curcnt); 260 mi_free((char *)connp->conn_ilg); 261 connp->conn_ilg = new; 262 connp->conn_ilg_allocated += ILG_ALLOC_CHUNK; 263 } 264 265 ret = &connp->conn_ilg[connp->conn_ilg_inuse++]; 266 ASSERT((ret->ilg_flags & ILG_DELETED) == 0); 267 bzero(ret, sizeof (*ret)); 268 return (ret); 269 } 270 271 typedef struct ilm_fbld_s { 272 ilm_t *fbld_ilm; 273 int fbld_in_cnt; 274 int fbld_ex_cnt; 275 slist_t fbld_in; 276 slist_t fbld_ex; 277 boolean_t fbld_in_overflow; 278 } ilm_fbld_t; 279 280 static void 281 ilm_bld_flists(conn_t *conn, void *arg) 282 { 283 int i; 284 ilm_fbld_t *fbld = (ilm_fbld_t *)(arg); 285 ilm_t *ilm = fbld->fbld_ilm; 286 in6_addr_t *v6group = &ilm->ilm_v6addr; 287 288 if (conn->conn_ilg_inuse == 0) 289 return; 290 291 /* 292 * Since we can't break out of the ipcl_walk once started, we still 293 * have to look at every conn. But if we've already found one 294 * (EXCLUDE, NULL) list, there's no need to keep checking individual 295 * ilgs--that will be our state. 296 */ 297 if (fbld->fbld_ex_cnt > 0 && fbld->fbld_ex.sl_numsrc == 0) 298 return; 299 300 /* 301 * Check this conn's ilgs to see if any are interested in our 302 * ilm (group, interface match). If so, update the master 303 * include and exclude lists we're building in the fbld struct 304 * with this ilg's filter info. 305 */ 306 mutex_enter(&conn->conn_lock); 307 for (i = 0; i < conn->conn_ilg_inuse; i++) { 308 ilg_t *ilg = &conn->conn_ilg[i]; 309 if ((ilg->ilg_ill == ilm->ilm_ill) && 310 (ilg->ilg_ipif == ilm->ilm_ipif) && 311 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) { 312 if (ilg->ilg_fmode == MODE_IS_INCLUDE) { 313 fbld->fbld_in_cnt++; 314 if (!fbld->fbld_in_overflow) 315 l_union_in_a(&fbld->fbld_in, 316 ilg->ilg_filter, 317 &fbld->fbld_in_overflow); 318 } else { 319 fbld->fbld_ex_cnt++; 320 /* 321 * On the first exclude list, don't try to do 322 * an intersection, as the master exclude list 323 * is intentionally empty. If the master list 324 * is still empty on later iterations, that 325 * means we have at least one ilg with an empty 326 * exclude list, so that should be reflected 327 * when we take the intersection. 328 */ 329 if (fbld->fbld_ex_cnt == 1) { 330 if (ilg->ilg_filter != NULL) 331 l_copy(ilg->ilg_filter, 332 &fbld->fbld_ex); 333 } else { 334 l_intersection_in_a(&fbld->fbld_ex, 335 ilg->ilg_filter); 336 } 337 } 338 /* there will only be one match, so break now. */ 339 break; 340 } 341 } 342 mutex_exit(&conn->conn_lock); 343 } 344 345 static void 346 ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, slist_t *flist) 347 { 348 ilm_fbld_t fbld; 349 ip_stack_t *ipst = ilm->ilm_ipst; 350 351 fbld.fbld_ilm = ilm; 352 fbld.fbld_in_cnt = fbld.fbld_ex_cnt = 0; 353 fbld.fbld_in.sl_numsrc = fbld.fbld_ex.sl_numsrc = 0; 354 fbld.fbld_in_overflow = B_FALSE; 355 356 /* first, construct our master include and exclude lists */ 357 ipcl_walk(ilm_bld_flists, (caddr_t)&fbld, ipst); 358 359 /* now use those master lists to generate the interface filter */ 360 361 /* if include list overflowed, filter is (EXCLUDE, NULL) */ 362 if (fbld.fbld_in_overflow) { 363 *fmode = MODE_IS_EXCLUDE; 364 flist->sl_numsrc = 0; 365 return; 366 } 367 368 /* if nobody interested, interface filter is (INCLUDE, NULL) */ 369 if (fbld.fbld_in_cnt == 0 && fbld.fbld_ex_cnt == 0) { 370 *fmode = MODE_IS_INCLUDE; 371 flist->sl_numsrc = 0; 372 return; 373 } 374 375 /* 376 * If there are no exclude lists, then the interface filter 377 * is INCLUDE, with its filter list equal to fbld_in. A single 378 * exclude list makes the interface filter EXCLUDE, with its 379 * filter list equal to (fbld_ex - fbld_in). 380 */ 381 if (fbld.fbld_ex_cnt == 0) { 382 *fmode = MODE_IS_INCLUDE; 383 l_copy(&fbld.fbld_in, flist); 384 } else { 385 *fmode = MODE_IS_EXCLUDE; 386 l_difference(&fbld.fbld_ex, &fbld.fbld_in, flist); 387 } 388 } 389 390 static int 391 ilm_update_add(ilm_t *ilm, ilg_stat_t ilgstat, slist_t *ilg_flist, 392 boolean_t isv6) 393 { 394 mcast_record_t fmode; 395 slist_t *flist; 396 boolean_t fdefault; 397 char buf[INET6_ADDRSTRLEN]; 398 ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill; 399 400 /* 401 * There are several cases where the ilm's filter state 402 * defaults to (EXCLUDE, NULL): 403 * - we've had previous joins without associated ilgs 404 * - this join has no associated ilg 405 * - the ilg's filter state is (EXCLUDE, NULL) 406 */ 407 fdefault = (ilm->ilm_no_ilg_cnt > 0) || 408 (ilgstat == ILGSTAT_NONE) || SLIST_IS_EMPTY(ilg_flist); 409 410 /* attempt mallocs (if needed) before doing anything else */ 411 if ((flist = l_alloc()) == NULL) 412 return (ENOMEM); 413 if (!fdefault && ilm->ilm_filter == NULL) { 414 ilm->ilm_filter = l_alloc(); 415 if (ilm->ilm_filter == NULL) { 416 l_free(flist); 417 return (ENOMEM); 418 } 419 } 420 421 if (ilgstat != ILGSTAT_CHANGE) 422 ilm->ilm_refcnt++; 423 424 if (ilgstat == ILGSTAT_NONE) 425 ilm->ilm_no_ilg_cnt++; 426 427 /* 428 * Determine new filter state. If it's not the default 429 * (EXCLUDE, NULL), we must walk the conn list to find 430 * any ilgs interested in this group, and re-build the 431 * ilm filter. 432 */ 433 if (fdefault) { 434 fmode = MODE_IS_EXCLUDE; 435 flist->sl_numsrc = 0; 436 } else { 437 ilm_gen_filter(ilm, &fmode, flist); 438 } 439 440 /* make sure state actually changed; nothing to do if not. */ 441 if ((ilm->ilm_fmode == fmode) && 442 !lists_are_different(ilm->ilm_filter, flist)) { 443 l_free(flist); 444 return (0); 445 } 446 447 /* send the state change report */ 448 if (!IS_LOOPBACK(ill)) { 449 if (isv6) 450 mld_statechange(ilm, fmode, flist); 451 else 452 igmp_statechange(ilm, fmode, flist); 453 } 454 455 /* update the ilm state */ 456 ilm->ilm_fmode = fmode; 457 if (flist->sl_numsrc > 0) 458 l_copy(flist, ilm->ilm_filter); 459 else 460 CLEAR_SLIST(ilm->ilm_filter); 461 462 ip1dbg(("ilm_update: new if filter mode %d, group %s\n", ilm->ilm_fmode, 463 inet_ntop(AF_INET6, &ilm->ilm_v6addr, buf, sizeof (buf)))); 464 465 l_free(flist); 466 return (0); 467 } 468 469 static int 470 ilm_update_del(ilm_t *ilm, boolean_t isv6) 471 { 472 mcast_record_t fmode; 473 slist_t *flist; 474 ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill; 475 476 ip1dbg(("ilm_update_del: still %d left; updating state\n", 477 ilm->ilm_refcnt)); 478 479 if ((flist = l_alloc()) == NULL) 480 return (ENOMEM); 481 482 /* 483 * If present, the ilg in question has already either been 484 * updated or removed from our list; so all we need to do 485 * now is walk the list to update the ilm filter state. 486 * 487 * Skip the list walk if we have any no-ilg joins, which 488 * cause the filter state to revert to (EXCLUDE, NULL). 489 */ 490 if (ilm->ilm_no_ilg_cnt != 0) { 491 fmode = MODE_IS_EXCLUDE; 492 flist->sl_numsrc = 0; 493 } else { 494 ilm_gen_filter(ilm, &fmode, flist); 495 } 496 497 /* check to see if state needs to be updated */ 498 if ((ilm->ilm_fmode == fmode) && 499 (!lists_are_different(ilm->ilm_filter, flist))) { 500 l_free(flist); 501 return (0); 502 } 503 504 if (!IS_LOOPBACK(ill)) { 505 if (isv6) 506 mld_statechange(ilm, fmode, flist); 507 else 508 igmp_statechange(ilm, fmode, flist); 509 } 510 511 ilm->ilm_fmode = fmode; 512 if (flist->sl_numsrc > 0) { 513 if (ilm->ilm_filter == NULL) { 514 ilm->ilm_filter = l_alloc(); 515 if (ilm->ilm_filter == NULL) { 516 char buf[INET6_ADDRSTRLEN]; 517 ip1dbg(("ilm_update_del: failed to alloc ilm " 518 "filter; no source filtering for %s on %s", 519 inet_ntop(AF_INET6, &ilm->ilm_v6addr, 520 buf, sizeof (buf)), ill->ill_name)); 521 ilm->ilm_fmode = MODE_IS_EXCLUDE; 522 l_free(flist); 523 return (0); 524 } 525 } 526 l_copy(flist, ilm->ilm_filter); 527 } else { 528 CLEAR_SLIST(ilm->ilm_filter); 529 } 530 531 l_free(flist); 532 return (0); 533 } 534 535 /* 536 * INADDR_ANY means all multicast addresses. 537 * INADDR_ANY is stored as IPv6 unspecified addr. 538 */ 539 int 540 ip_addmulti(ipaddr_t group, ipif_t *ipif, ilg_stat_t ilgstat, 541 mcast_record_t ilg_fmode, slist_t *ilg_flist) 542 { 543 ill_t *ill = ipif->ipif_ill; 544 ilm_t *ilm; 545 in6_addr_t v6group; 546 int ret; 547 548 ASSERT(IAM_WRITER_IPIF(ipif)); 549 550 if (!CLASSD(group) && group != INADDR_ANY) 551 return (EINVAL); 552 553 if (IS_UNDER_IPMP(ill)) 554 return (EINVAL); 555 556 /* 557 * INADDR_ANY is represented as the IPv6 unspecified addr. 558 */ 559 if (group == INADDR_ANY) 560 v6group = ipv6_all_zeros; 561 else 562 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 563 564 ilm = ilm_lookup_ipif(ipif, group); 565 /* 566 * Since we are writer, we know the ilm_flags itself cannot 567 * change at this point, and ilm_lookup_ipif would not have 568 * returned a DELETED ilm. However, the data path can free 569 * ilm->ilm_next via ilm_walker_cleanup() so we can safely 570 * access anything in ilm except ilm_next (for safe access to 571 * ilm_next we'd have to take the ill_lock). 572 */ 573 if (ilm != NULL) 574 return (ilm_update_add(ilm, ilgstat, ilg_flist, B_FALSE)); 575 576 ilm = ilm_add_v6(ipif, &v6group, ilgstat, ilg_fmode, ilg_flist, 577 ipif->ipif_zoneid); 578 if (ilm == NULL) 579 return (ENOMEM); 580 581 if (group == INADDR_ANY) { 582 /* 583 * Check how many ipif's have members in this group - 584 * if more then one we should not tell the driver to join 585 * this time 586 */ 587 if (ilm_numentries_v6(ill, &v6group) > 1) 588 return (0); 589 ret = ill_join_allmulti(ill); 590 if (ret != 0) 591 ilm_delete(ilm); 592 return (ret); 593 } 594 595 if (!IS_LOOPBACK(ill)) 596 igmp_joingroup(ilm); 597 598 if (ilm_numentries_v6(ill, &v6group) > 1) 599 return (0); 600 601 ret = ip_ll_addmulti_v6(ipif, &v6group); 602 if (ret != 0) 603 ilm_delete(ilm); 604 return (ret); 605 } 606 607 /* 608 * The unspecified address means all multicast addresses. 609 * 610 * ill identifies the interface to join on. 611 * 612 * ilgstat tells us if there's an ilg associated with this join, 613 * and if so, if it's a new ilg or a change to an existing one. 614 * ilg_fmode and ilg_flist give us the current filter state of 615 * the ilg (and will be EXCLUDE {NULL} in the case of no ilg). 616 */ 617 int 618 ip_addmulti_v6(const in6_addr_t *v6group, ill_t *ill, zoneid_t zoneid, 619 ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist) 620 { 621 ilm_t *ilm; 622 int ret; 623 624 ASSERT(IAM_WRITER_ILL(ill)); 625 626 if (!IN6_IS_ADDR_MULTICAST(v6group) && 627 !IN6_IS_ADDR_UNSPECIFIED(v6group)) { 628 return (EINVAL); 629 } 630 631 if (IS_UNDER_IPMP(ill) && !IN6_IS_ADDR_MC_SOLICITEDNODE(v6group)) 632 return (EINVAL); 633 634 /* 635 * An ilm is uniquely identified by the tuple of (group, ill) where 636 * `group' is the multicast group address, and `ill' is the interface 637 * on which it is currently joined. 638 */ 639 ilm = ilm_lookup_ill_v6(ill, v6group, B_TRUE, zoneid); 640 if (ilm != NULL) 641 return (ilm_update_add(ilm, ilgstat, ilg_flist, B_TRUE)); 642 643 ilm = ilm_add_v6(ill->ill_ipif, v6group, ilgstat, ilg_fmode, 644 ilg_flist, zoneid); 645 if (ilm == NULL) 646 return (ENOMEM); 647 648 if (IN6_IS_ADDR_UNSPECIFIED(v6group)) { 649 /* 650 * Check how many ipif's that have members in this group - 651 * if more then one we should not tell the driver to join 652 * this time 653 */ 654 if (ilm_numentries_v6(ill, v6group) > 1) 655 return (0); 656 ret = ill_join_allmulti(ill); 657 if (ret != 0) 658 ilm_delete(ilm); 659 return (ret); 660 } 661 662 if (!IS_LOOPBACK(ill)) 663 mld_joingroup(ilm); 664 665 /* 666 * If we have more then one we should not tell the driver 667 * to join this time. 668 */ 669 if (ilm_numentries_v6(ill, v6group) > 1) 670 return (0); 671 672 ret = ip_ll_addmulti_v6(ill->ill_ipif, v6group); 673 if (ret != 0) 674 ilm_delete(ilm); 675 return (ret); 676 } 677 678 /* 679 * Send a multicast request to the driver for enabling multicast reception 680 * for v6groupp address. The caller has already checked whether it is 681 * appropriate to send one or not. 682 */ 683 int 684 ip_ll_send_enabmulti_req(ill_t *ill, const in6_addr_t *v6groupp) 685 { 686 mblk_t *mp; 687 uint32_t addrlen, addroff; 688 char group_buf[INET6_ADDRSTRLEN]; 689 690 ASSERT(IAM_WRITER_ILL(ill)); 691 692 /* 693 * If we're on the IPMP ill, use the nominated multicast interface to 694 * send and receive DLPI messages, if one exists. (If none exists, 695 * there are no usable interfaces and thus nothing to do.) 696 */ 697 if (IS_IPMP(ill) && (ill = ipmp_illgrp_cast_ill(ill->ill_grp)) == NULL) 698 return (0); 699 700 /* 701 * Create a AR_ENTRY_SQUERY message with a dl_enabmulti_req tacked 702 * on. 703 */ 704 mp = ill_create_dl(ill, DL_ENABMULTI_REQ, sizeof (dl_enabmulti_req_t), 705 &addrlen, &addroff); 706 if (!mp) 707 return (ENOMEM); 708 if (IN6_IS_ADDR_V4MAPPED(v6groupp)) { 709 ipaddr_t v4group; 710 711 IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group); 712 /* 713 * NOTE!!! 714 * The "addroff" passed in here was calculated by 715 * ill_create_dl(), and will be used by ill_create_squery() 716 * to perform some twisted coding magic. It is the offset 717 * into the dl_xxx_req of the hw addr. Here, it will be 718 * added to b_wptr - b_rptr to create a magic number that 719 * is not an offset into this squery mblk. 720 * The actual hardware address will be accessed only in the 721 * dl_xxx_req, not in the squery. More importantly, 722 * that hardware address can *only* be accessed in this 723 * mblk chain by calling mi_offset_param_c(), which uses 724 * the magic number in the squery hw offset field to go 725 * to the *next* mblk (the dl_xxx_req), subtract the 726 * (b_wptr - b_rptr), and find the actual offset into 727 * the dl_xxx_req. 728 * Any method that depends on using the 729 * offset field in the dl_disabmulti_req or squery 730 * to find either hardware address will similarly fail. 731 * 732 * Look in ar_entry_squery() in arp.c to see how this offset 733 * is used. 734 */ 735 mp = ill_create_squery(ill, v4group, addrlen, addroff, mp); 736 if (!mp) 737 return (ENOMEM); 738 ip1dbg(("ip_ll_send_enabmulti_req: IPv4 putnext %s on %s\n", 739 inet_ntop(AF_INET6, v6groupp, group_buf, 740 sizeof (group_buf)), 741 ill->ill_name)); 742 putnext(ill->ill_rq, mp); 743 } else { 744 ip1dbg(("ip_ll_send_enabmulti_req: IPv6 ndp_mcastreq %s on" 745 " %s\n", 746 inet_ntop(AF_INET6, v6groupp, group_buf, 747 sizeof (group_buf)), 748 ill->ill_name)); 749 return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp)); 750 } 751 return (0); 752 } 753 754 /* 755 * Send a multicast request to the driver for enabling multicast 756 * membership for v6group if appropriate. 757 */ 758 static int 759 ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *v6groupp) 760 { 761 ill_t *ill = ipif->ipif_ill; 762 763 ASSERT(IAM_WRITER_IPIF(ipif)); 764 765 if (ill->ill_net_type != IRE_IF_RESOLVER || 766 ipif->ipif_flags & IPIF_POINTOPOINT) { 767 ip1dbg(("ip_ll_addmulti_v6: not resolver\n")); 768 return (0); /* Must be IRE_IF_NORESOLVER */ 769 } 770 771 if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) { 772 ip1dbg(("ip_ll_addmulti_v6: MULTI_BCAST\n")); 773 return (0); 774 } 775 if (!ill->ill_dl_up) { 776 /* 777 * Nobody there. All multicast addresses will be re-joined 778 * when we get the DL_BIND_ACK bringing the interface up. 779 */ 780 ip1dbg(("ip_ll_addmulti_v6: nobody up\n")); 781 return (0); 782 } 783 return (ip_ll_send_enabmulti_req(ill, v6groupp)); 784 } 785 786 /* 787 * INADDR_ANY means all multicast addresses. 788 * INADDR_ANY is stored as the IPv6 unspecified addr. 789 */ 790 int 791 ip_delmulti(ipaddr_t group, ipif_t *ipif, boolean_t no_ilg, boolean_t leaving) 792 { 793 ill_t *ill = ipif->ipif_ill; 794 ilm_t *ilm; 795 in6_addr_t v6group; 796 797 ASSERT(IAM_WRITER_IPIF(ipif)); 798 799 if (!CLASSD(group) && group != INADDR_ANY) 800 return (EINVAL); 801 802 /* 803 * INADDR_ANY is represented as the IPv6 unspecified addr. 804 */ 805 if (group == INADDR_ANY) 806 v6group = ipv6_all_zeros; 807 else 808 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 809 810 /* 811 * Look for a match on the ipif. 812 * (IP_DROP_MEMBERSHIP specifies an ipif using an IP address). 813 */ 814 ilm = ilm_lookup_ipif(ipif, group); 815 if (ilm == NULL) 816 return (ENOENT); 817 818 /* Update counters */ 819 if (no_ilg) 820 ilm->ilm_no_ilg_cnt--; 821 822 if (leaving) 823 ilm->ilm_refcnt--; 824 825 if (ilm->ilm_refcnt > 0) 826 return (ilm_update_del(ilm, B_FALSE)); 827 828 if (group == INADDR_ANY) { 829 ilm_delete(ilm); 830 /* 831 * Check how many ipif's that have members in this group - 832 * if there are still some left then don't tell the driver 833 * to drop it. 834 */ 835 if (ilm_numentries_v6(ill, &v6group) != 0) 836 return (0); 837 838 /* If we never joined, then don't leave. */ 839 if (ill->ill_join_allmulti) 840 ill_leave_allmulti(ill); 841 842 return (0); 843 } 844 845 if (!IS_LOOPBACK(ill)) 846 igmp_leavegroup(ilm); 847 848 ilm_delete(ilm); 849 /* 850 * Check how many ipif's that have members in this group - 851 * if there are still some left then don't tell the driver 852 * to drop it. 853 */ 854 if (ilm_numentries_v6(ill, &v6group) != 0) 855 return (0); 856 return (ip_ll_delmulti_v6(ipif, &v6group)); 857 } 858 859 /* 860 * The unspecified address means all multicast addresses. 861 */ 862 int 863 ip_delmulti_v6(const in6_addr_t *v6group, ill_t *ill, zoneid_t zoneid, 864 boolean_t no_ilg, boolean_t leaving) 865 { 866 ipif_t *ipif; 867 ilm_t *ilm; 868 869 ASSERT(IAM_WRITER_ILL(ill)); 870 871 if (!IN6_IS_ADDR_MULTICAST(v6group) && 872 !IN6_IS_ADDR_UNSPECIFIED(v6group)) 873 return (EINVAL); 874 875 /* 876 * Look for a match on the ill. 877 */ 878 ilm = ilm_lookup_ill_v6(ill, v6group, B_TRUE, zoneid); 879 if (ilm == NULL) 880 return (ENOENT); 881 882 ASSERT(ilm->ilm_ill == ill); 883 884 ipif = ill->ill_ipif; 885 886 /* Update counters */ 887 if (no_ilg) 888 ilm->ilm_no_ilg_cnt--; 889 890 if (leaving) 891 ilm->ilm_refcnt--; 892 893 if (ilm->ilm_refcnt > 0) 894 return (ilm_update_del(ilm, B_TRUE)); 895 896 if (IN6_IS_ADDR_UNSPECIFIED(v6group)) { 897 ilm_delete(ilm); 898 /* 899 * Check how many ipif's that have members in this group - 900 * if there are still some left then don't tell the driver 901 * to drop it. 902 */ 903 if (ilm_numentries_v6(ill, v6group) != 0) 904 return (0); 905 906 /* If we never joined, then don't leave. */ 907 if (ill->ill_join_allmulti) 908 ill_leave_allmulti(ill); 909 910 return (0); 911 } 912 913 if (!IS_LOOPBACK(ill)) 914 mld_leavegroup(ilm); 915 916 ilm_delete(ilm); 917 /* 918 * Check how many ipif's that have members in this group - 919 * if there are still some left then don't tell the driver 920 * to drop it. 921 */ 922 if (ilm_numentries_v6(ill, v6group) != 0) 923 return (0); 924 return (ip_ll_delmulti_v6(ipif, v6group)); 925 } 926 927 /* 928 * Send a multicast request to the driver for disabling multicast reception 929 * for v6groupp address. The caller has already checked whether it is 930 * appropriate to send one or not. 931 */ 932 int 933 ip_ll_send_disabmulti_req(ill_t *ill, const in6_addr_t *v6groupp) 934 { 935 mblk_t *mp; 936 char group_buf[INET6_ADDRSTRLEN]; 937 uint32_t addrlen, addroff; 938 939 ASSERT(IAM_WRITER_ILL(ill)); 940 941 /* 942 * See comment in ip_ll_send_enabmulti_req(). 943 */ 944 if (IS_IPMP(ill) && (ill = ipmp_illgrp_cast_ill(ill->ill_grp)) == NULL) 945 return (0); 946 947 /* 948 * Create a AR_ENTRY_SQUERY message with a dl_disabmulti_req tacked 949 * on. 950 */ 951 mp = ill_create_dl(ill, DL_DISABMULTI_REQ, 952 sizeof (dl_disabmulti_req_t), &addrlen, &addroff); 953 954 if (!mp) 955 return (ENOMEM); 956 957 if (IN6_IS_ADDR_V4MAPPED(v6groupp)) { 958 ipaddr_t v4group; 959 960 IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group); 961 /* 962 * NOTE!!! 963 * The "addroff" passed in here was calculated by 964 * ill_create_dl(), and will be used by ill_create_squery() 965 * to perform some twisted coding magic. It is the offset 966 * into the dl_xxx_req of the hw addr. Here, it will be 967 * added to b_wptr - b_rptr to create a magic number that 968 * is not an offset into this mblk. 969 * 970 * Please see the comment in ip_ll_send)enabmulti_req() 971 * for a complete explanation. 972 * 973 * Look in ar_entry_squery() in arp.c to see how this offset 974 * is used. 975 */ 976 mp = ill_create_squery(ill, v4group, addrlen, addroff, mp); 977 if (!mp) 978 return (ENOMEM); 979 ip1dbg(("ip_ll_send_disabmulti_req: IPv4 putnext %s on %s\n", 980 inet_ntop(AF_INET6, v6groupp, group_buf, 981 sizeof (group_buf)), 982 ill->ill_name)); 983 putnext(ill->ill_rq, mp); 984 } else { 985 ip1dbg(("ip_ll_send_disabmulti_req: IPv6 ndp_mcastreq %s on" 986 " %s\n", 987 inet_ntop(AF_INET6, v6groupp, group_buf, 988 sizeof (group_buf)), 989 ill->ill_name)); 990 return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp)); 991 } 992 return (0); 993 } 994 995 /* 996 * Send a multicast request to the driver for disabling multicast 997 * membership for v6group if appropriate. 998 */ 999 static int 1000 ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *v6group) 1001 { 1002 ill_t *ill = ipif->ipif_ill; 1003 1004 ASSERT(IAM_WRITER_IPIF(ipif)); 1005 1006 if (ill->ill_net_type != IRE_IF_RESOLVER || 1007 ipif->ipif_flags & IPIF_POINTOPOINT) { 1008 return (0); /* Must be IRE_IF_NORESOLVER */ 1009 } 1010 if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) { 1011 ip1dbg(("ip_ll_delmulti_v6: MULTI_BCAST\n")); 1012 return (0); 1013 } 1014 if (!ill->ill_dl_up) { 1015 /* 1016 * Nobody there. All multicast addresses will be re-joined 1017 * when we get the DL_BIND_ACK bringing the interface up. 1018 */ 1019 ip1dbg(("ip_ll_delmulti_v6: nobody up\n")); 1020 return (0); 1021 } 1022 return (ip_ll_send_disabmulti_req(ill, v6group)); 1023 } 1024 1025 /* 1026 * Make the driver pass up all multicast packets. NOTE: to keep callers 1027 * IPMP-unaware, if an IPMP ill is passed in, the ill_join_allmulti flag is 1028 * set on it (rather than the cast ill). 1029 */ 1030 int 1031 ill_join_allmulti(ill_t *ill) 1032 { 1033 mblk_t *promiscon_mp, *promiscoff_mp; 1034 uint32_t addrlen, addroff; 1035 ill_t *join_ill = ill; 1036 1037 ASSERT(IAM_WRITER_ILL(ill)); 1038 1039 if (!ill->ill_dl_up) { 1040 /* 1041 * Nobody there. All multicast addresses will be re-joined 1042 * when we get the DL_BIND_ACK bringing the interface up. 1043 */ 1044 return (0); 1045 } 1046 1047 /* 1048 * See comment in ip_ll_send_enabmulti_req(). 1049 */ 1050 if (IS_IPMP(ill) && (ill = ipmp_illgrp_cast_ill(ill->ill_grp)) == NULL) 1051 return (0); 1052 1053 ASSERT(!join_ill->ill_join_allmulti); 1054 1055 /* 1056 * Create a DL_PROMISCON_REQ message and send it directly to the DLPI 1057 * provider. We don't need to do this for certain media types for 1058 * which we never need to turn promiscuous mode on. While we're here, 1059 * pre-allocate a DL_PROMISCOFF_REQ message to make sure that 1060 * ill_leave_allmulti() will not fail due to low memory conditions. 1061 */ 1062 if ((ill->ill_net_type == IRE_IF_RESOLVER) && 1063 !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) { 1064 promiscon_mp = ill_create_dl(ill, DL_PROMISCON_REQ, 1065 sizeof (dl_promiscon_req_t), &addrlen, &addroff); 1066 promiscoff_mp = ill_create_dl(ill, DL_PROMISCOFF_REQ, 1067 sizeof (dl_promiscoff_req_t), &addrlen, &addroff); 1068 if (promiscon_mp == NULL || promiscoff_mp == NULL) { 1069 freemsg(promiscon_mp); 1070 freemsg(promiscoff_mp); 1071 return (ENOMEM); 1072 } 1073 ill->ill_promiscoff_mp = promiscoff_mp; 1074 ill_dlpi_send(ill, promiscon_mp); 1075 } 1076 1077 join_ill->ill_join_allmulti = B_TRUE; 1078 return (0); 1079 } 1080 1081 /* 1082 * Make the driver stop passing up all multicast packets 1083 */ 1084 void 1085 ill_leave_allmulti(ill_t *ill) 1086 { 1087 mblk_t *promiscoff_mp; 1088 ill_t *leave_ill = ill; 1089 1090 ASSERT(IAM_WRITER_ILL(ill)); 1091 1092 if (!ill->ill_dl_up) { 1093 /* 1094 * Nobody there. All multicast addresses will be re-joined 1095 * when we get the DL_BIND_ACK bringing the interface up. 1096 */ 1097 return; 1098 } 1099 1100 /* 1101 * See comment in ip_ll_send_enabmulti_req(). 1102 */ 1103 if (IS_IPMP(ill) && (ill = ipmp_illgrp_cast_ill(ill->ill_grp)) == NULL) 1104 return; 1105 1106 ASSERT(leave_ill->ill_join_allmulti); 1107 1108 /* 1109 * Create a DL_PROMISCOFF_REQ message and send it directly to 1110 * the DLPI provider. We don't need to do this for certain 1111 * media types for which we never need to turn promiscuous 1112 * mode on. 1113 */ 1114 if ((ill->ill_net_type == IRE_IF_RESOLVER) && 1115 !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) { 1116 promiscoff_mp = ill->ill_promiscoff_mp; 1117 ASSERT(promiscoff_mp != NULL); 1118 ill->ill_promiscoff_mp = NULL; 1119 ill_dlpi_send(ill, promiscoff_mp); 1120 } 1121 1122 leave_ill->ill_join_allmulti = B_FALSE; 1123 } 1124 1125 static ill_t * 1126 ipsq_enter_byifindex(uint_t ifindex, boolean_t isv6, ip_stack_t *ipst) 1127 { 1128 ill_t *ill; 1129 boolean_t in_ipsq; 1130 1131 ill = ill_lookup_on_ifindex(ifindex, isv6, NULL, NULL, NULL, NULL, 1132 ipst); 1133 if (ill != NULL) { 1134 if (!ill_waiter_inc(ill)) { 1135 ill_refrele(ill); 1136 return (NULL); 1137 } 1138 ill_refrele(ill); 1139 in_ipsq = ipsq_enter(ill, B_FALSE, NEW_OP); 1140 ill_waiter_dcr(ill); 1141 if (!in_ipsq) 1142 ill = NULL; 1143 } 1144 return (ill); 1145 } 1146 1147 int 1148 ip_join_allmulti(uint_t ifindex, boolean_t isv6, ip_stack_t *ipst) 1149 { 1150 ill_t *ill; 1151 int ret = 0; 1152 1153 if ((ill = ipsq_enter_byifindex(ifindex, isv6, ipst)) == NULL) 1154 return (ENODEV); 1155 1156 /* 1157 * The ip_addmulti*() functions won't allow IPMP underlying interfaces 1158 * to join allmulti since only the nominated underlying interface in 1159 * the group should receive multicast. We silently succeed to avoid 1160 * having to teach IPobs (currently the only caller of this routine) 1161 * to ignore failures in this case. 1162 */ 1163 if (IS_UNDER_IPMP(ill)) 1164 goto out; 1165 1166 if (isv6) { 1167 ret = ip_addmulti_v6(&ipv6_all_zeros, ill, ill->ill_zoneid, 1168 ILGSTAT_NONE, MODE_IS_EXCLUDE, NULL); 1169 } else { 1170 ret = ip_addmulti(INADDR_ANY, ill->ill_ipif, ILGSTAT_NONE, 1171 MODE_IS_EXCLUDE, NULL); 1172 } 1173 ill->ill_ipallmulti_cnt++; 1174 out: 1175 ipsq_exit(ill->ill_phyint->phyint_ipsq); 1176 return (ret); 1177 } 1178 1179 1180 int 1181 ip_leave_allmulti(uint_t ifindex, boolean_t isv6, ip_stack_t *ipst) 1182 { 1183 ill_t *ill; 1184 1185 if ((ill = ipsq_enter_byifindex(ifindex, isv6, ipst)) == NULL) 1186 return (ENODEV); 1187 1188 if (ill->ill_ipallmulti_cnt > 0) { 1189 if (isv6) { 1190 (void) ip_delmulti_v6(&ipv6_all_zeros, ill, 1191 ill->ill_zoneid, B_TRUE, B_TRUE); 1192 } else { 1193 (void) ip_delmulti(INADDR_ANY, ill->ill_ipif, B_TRUE, 1194 B_TRUE); 1195 } 1196 ill->ill_ipallmulti_cnt--; 1197 } 1198 ipsq_exit(ill->ill_phyint->phyint_ipsq); 1199 return (0); 1200 } 1201 1202 /* 1203 * Delete the allmulti memberships that were added as part of 1204 * ip_join_allmulti(). 1205 */ 1206 void 1207 ip_purge_allmulti(ill_t *ill) 1208 { 1209 ASSERT(IAM_WRITER_ILL(ill)); 1210 1211 for (; ill->ill_ipallmulti_cnt > 0; ill->ill_ipallmulti_cnt--) { 1212 if (ill->ill_isv6) { 1213 (void) ip_delmulti_v6(&ipv6_all_zeros, ill, 1214 ill->ill_zoneid, B_TRUE, B_TRUE); 1215 } else { 1216 (void) ip_delmulti(INADDR_ANY, ill->ill_ipif, B_TRUE, 1217 B_TRUE); 1218 } 1219 } 1220 } 1221 1222 /* 1223 * Copy mp_orig and pass it in as a local message. 1224 */ 1225 void 1226 ip_multicast_loopback(queue_t *q, ill_t *ill, mblk_t *mp_orig, int fanout_flags, 1227 zoneid_t zoneid) 1228 { 1229 mblk_t *mp; 1230 mblk_t *ipsec_mp; 1231 ipha_t *iph; 1232 ip_stack_t *ipst = ill->ill_ipst; 1233 1234 if (DB_TYPE(mp_orig) == M_DATA && 1235 ((ipha_t *)mp_orig->b_rptr)->ipha_protocol == IPPROTO_UDP) { 1236 uint_t hdrsz; 1237 1238 hdrsz = IPH_HDR_LENGTH((ipha_t *)mp_orig->b_rptr) + 1239 sizeof (udpha_t); 1240 ASSERT(MBLKL(mp_orig) >= hdrsz); 1241 1242 if (((mp = allocb(hdrsz, BPRI_MED)) != NULL) && 1243 (mp_orig = dupmsg(mp_orig)) != NULL) { 1244 cred_t *cr; 1245 1246 bcopy(mp_orig->b_rptr, mp->b_rptr, hdrsz); 1247 mp->b_wptr += hdrsz; 1248 mp->b_cont = mp_orig; 1249 mp_orig->b_rptr += hdrsz; 1250 if (is_system_labeled() && 1251 (cr = msg_getcred(mp_orig, NULL)) != NULL) 1252 mblk_setcred(mp, cr, NOPID); 1253 if (MBLKL(mp_orig) == 0) { 1254 mp->b_cont = mp_orig->b_cont; 1255 mp_orig->b_cont = NULL; 1256 freeb(mp_orig); 1257 } 1258 } else if (mp != NULL) { 1259 freeb(mp); 1260 mp = NULL; 1261 } 1262 } else { 1263 mp = ip_copymsg(mp_orig); /* No refcnt on ipsec_out netstack */ 1264 } 1265 1266 if (mp == NULL) 1267 return; 1268 if (DB_TYPE(mp) == M_CTL) { 1269 ipsec_mp = mp; 1270 mp = mp->b_cont; 1271 } else { 1272 ipsec_mp = mp; 1273 } 1274 1275 iph = (ipha_t *)mp->b_rptr; 1276 1277 /* 1278 * DTrace this as ip:::send. A blocked packet will fire the send 1279 * probe, but not the receive probe. 1280 */ 1281 DTRACE_IP7(send, mblk_t *, ipsec_mp, conn_t *, NULL, void_ip_t *, iph, 1282 __dtrace_ipsr_ill_t *, ill, ipha_t *, iph, ip6_t *, NULL, int, 1); 1283 1284 DTRACE_PROBE4(ip4__loopback__out__start, 1285 ill_t *, NULL, ill_t *, ill, 1286 ipha_t *, iph, mblk_t *, ipsec_mp); 1287 1288 FW_HOOKS(ipst->ips_ip4_loopback_out_event, 1289 ipst->ips_ipv4firewall_loopback_out, 1290 NULL, ill, iph, ipsec_mp, mp, HPE_MULTICAST, ipst); 1291 1292 DTRACE_PROBE1(ip4__loopback__out__end, mblk_t *, ipsec_mp); 1293 1294 if (ipsec_mp != NULL) 1295 ip_wput_local(q, ill, iph, ipsec_mp, NULL, 1296 fanout_flags, zoneid); 1297 } 1298 1299 static area_t ip_aresq_template = { 1300 AR_ENTRY_SQUERY, /* cmd */ 1301 sizeof (area_t)+IP_ADDR_LEN, /* name offset */ 1302 sizeof (area_t), /* name len (filled by ill_arp_alloc) */ 1303 IP_ARP_PROTO_TYPE, /* protocol, from arps perspective */ 1304 sizeof (area_t), /* proto addr offset */ 1305 IP_ADDR_LEN, /* proto addr_length */ 1306 0, /* proto mask offset */ 1307 /* Rest is initialized when used */ 1308 0, /* flags */ 1309 0, /* hw addr offset */ 1310 0, /* hw addr length */ 1311 }; 1312 1313 static mblk_t * 1314 ill_create_squery(ill_t *ill, ipaddr_t ipaddr, uint32_t addrlen, 1315 uint32_t addroff, mblk_t *mp_tail) 1316 { 1317 mblk_t *mp; 1318 area_t *area; 1319 1320 mp = ill_arp_alloc(ill, (uchar_t *)&ip_aresq_template, 1321 (caddr_t)&ipaddr); 1322 if (!mp) { 1323 freemsg(mp_tail); 1324 return (NULL); 1325 } 1326 area = (area_t *)mp->b_rptr; 1327 area->area_hw_addr_length = addrlen; 1328 area->area_hw_addr_offset = mp->b_wptr - mp->b_rptr + addroff; 1329 /* 1330 * NOTE! 1331 * 1332 * The area_hw_addr_offset, as can be seen, does not hold the 1333 * actual hardware address offset. Rather, it holds the offset 1334 * to the hw addr in the dl_xxx_req in mp_tail, modified by 1335 * adding (mp->b_wptr - mp->b_rptr). This allows the function 1336 * mi_offset_paramc() to find the hardware address in the 1337 * *second* mblk (dl_xxx_req), not this mblk. 1338 * 1339 * Using mi_offset_paramc() is thus the *only* way to access 1340 * the dl_xxx_hw address. 1341 * 1342 * The squery hw address should *not* be accessed. 1343 * 1344 * See ar_entry_squery() in arp.c for an example of how all this works. 1345 */ 1346 1347 mp->b_cont = mp_tail; 1348 return (mp); 1349 } 1350 1351 /* 1352 * Create a DLPI message; for DL_{ENAB,DISAB}MULTI_REQ, room is left for 1353 * the hardware address. 1354 */ 1355 static mblk_t * 1356 ill_create_dl(ill_t *ill, uint32_t dl_primitive, uint32_t length, 1357 uint32_t *addr_lenp, uint32_t *addr_offp) 1358 { 1359 mblk_t *mp; 1360 uint32_t hw_addr_length; 1361 char *cp; 1362 uint32_t offset; 1363 uint32_t size; 1364 1365 *addr_lenp = *addr_offp = 0; 1366 1367 hw_addr_length = ill->ill_phys_addr_length; 1368 if (!hw_addr_length) { 1369 ip0dbg(("ip_create_dl: hw addr length = 0\n")); 1370 return (NULL); 1371 } 1372 1373 size = length; 1374 switch (dl_primitive) { 1375 case DL_ENABMULTI_REQ: 1376 case DL_DISABMULTI_REQ: 1377 size += hw_addr_length; 1378 break; 1379 case DL_PROMISCON_REQ: 1380 case DL_PROMISCOFF_REQ: 1381 break; 1382 default: 1383 return (NULL); 1384 } 1385 mp = allocb(size, BPRI_HI); 1386 if (!mp) 1387 return (NULL); 1388 mp->b_wptr += size; 1389 mp->b_datap->db_type = M_PROTO; 1390 1391 cp = (char *)mp->b_rptr; 1392 offset = length; 1393 1394 switch (dl_primitive) { 1395 case DL_ENABMULTI_REQ: { 1396 dl_enabmulti_req_t *dl = (dl_enabmulti_req_t *)cp; 1397 1398 dl->dl_primitive = dl_primitive; 1399 dl->dl_addr_offset = offset; 1400 *addr_lenp = dl->dl_addr_length = hw_addr_length; 1401 *addr_offp = offset; 1402 break; 1403 } 1404 case DL_DISABMULTI_REQ: { 1405 dl_disabmulti_req_t *dl = (dl_disabmulti_req_t *)cp; 1406 1407 dl->dl_primitive = dl_primitive; 1408 dl->dl_addr_offset = offset; 1409 *addr_lenp = dl->dl_addr_length = hw_addr_length; 1410 *addr_offp = offset; 1411 break; 1412 } 1413 case DL_PROMISCON_REQ: 1414 case DL_PROMISCOFF_REQ: { 1415 dl_promiscon_req_t *dl = (dl_promiscon_req_t *)cp; 1416 1417 dl->dl_primitive = dl_primitive; 1418 dl->dl_level = DL_PROMISC_MULTI; 1419 break; 1420 } 1421 } 1422 ip1dbg(("ill_create_dl: addr_len %d, addr_off %d\n", 1423 *addr_lenp, *addr_offp)); 1424 return (mp); 1425 } 1426 1427 /* 1428 * Writer processing for ip_wput_ctl(): send the DL_{ENAB,DISAB}MULTI_REQ 1429 * messages that had been delayed until we'd heard back from ARP. One catch: 1430 * we need to ensure that no one else becomes writer on the IPSQ before we've 1431 * received the replies, or they'll incorrectly process our replies as part of 1432 * their unrelated IPSQ operation. To do this, we start a new IPSQ operation, 1433 * which will complete when we process the reply in ip_rput_dlpi_writer(). 1434 */ 1435 /* ARGSUSED */ 1436 static void 1437 ip_wput_ctl_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *arg) 1438 { 1439 ill_t *ill = q->q_ptr; 1440 t_uscalar_t prim = ((union DL_primitives *)mp->b_rptr)->dl_primitive; 1441 1442 ASSERT(IAM_WRITER_ILL(ill)); 1443 ASSERT(prim == DL_ENABMULTI_REQ || prim == DL_DISABMULTI_REQ); 1444 ip1dbg(("ip_wput_ctl_writer: %s\n", dl_primstr(prim))); 1445 1446 if (prim == DL_ENABMULTI_REQ) { 1447 /* Track the state if this is the first enabmulti */ 1448 if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN) 1449 ill->ill_dlpi_multicast_state = IDS_INPROGRESS; 1450 } 1451 1452 ipsq_current_start(ipsq, ill->ill_ipif, 0); 1453 ill_dlpi_send(ill, mp); 1454 } 1455 1456 void 1457 ip_wput_ctl(queue_t *q, mblk_t *mp) 1458 { 1459 ill_t *ill = q->q_ptr; 1460 mblk_t *dlmp = mp->b_cont; 1461 area_t *area = (area_t *)mp->b_rptr; 1462 t_uscalar_t prim; 1463 1464 /* Check that we have an AR_ENTRY_SQUERY with a tacked on mblk */ 1465 if (MBLKL(mp) < sizeof (area_t) || area->area_cmd != AR_ENTRY_SQUERY || 1466 dlmp == NULL) { 1467 putnext(q, mp); 1468 return; 1469 } 1470 1471 /* Check that the tacked on mblk is a DL_{DISAB,ENAB}MULTI_REQ */ 1472 prim = ((union DL_primitives *)dlmp->b_rptr)->dl_primitive; 1473 if (prim != DL_DISABMULTI_REQ && prim != DL_ENABMULTI_REQ) { 1474 putnext(q, mp); 1475 return; 1476 } 1477 freeb(mp); 1478 1479 /* See comments above ip_wput_ctl_writer() for details */ 1480 ill_refhold(ill); 1481 qwriter_ip(ill, ill->ill_wq, dlmp, ip_wput_ctl_writer, NEW_OP, B_FALSE); 1482 } 1483 1484 /* 1485 * Rejoin any groups which have been explicitly joined by the application (we 1486 * left all explicitly joined groups as part of ill_leave_multicast() prior to 1487 * bringing the interface down). Note that because groups can be joined and 1488 * left while an interface is down, this may not be the same set of groups 1489 * that we left in ill_leave_multicast(). 1490 */ 1491 void 1492 ill_recover_multicast(ill_t *ill) 1493 { 1494 ilm_t *ilm; 1495 ipif_t *ipif = ill->ill_ipif; 1496 char addrbuf[INET6_ADDRSTRLEN]; 1497 1498 ASSERT(IAM_WRITER_ILL(ill)); 1499 1500 ill->ill_need_recover_multicast = 0; 1501 1502 ill_ilm_walker_hold(ill); 1503 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1504 /* 1505 * Check how many ipif's that have members in this group - 1506 * if more then one we make sure that this entry is first 1507 * in the list. 1508 */ 1509 if (ilm_numentries_v6(ill, &ilm->ilm_v6addr) > 1 && 1510 ilm_lookup_ill_v6(ill, &ilm->ilm_v6addr, B_TRUE, 1511 ALL_ZONES) != ilm) { 1512 continue; 1513 } 1514 1515 ip1dbg(("ill_recover_multicast: %s\n", inet_ntop(AF_INET6, 1516 &ilm->ilm_v6addr, addrbuf, sizeof (addrbuf)))); 1517 1518 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) { 1519 (void) ill_join_allmulti(ill); 1520 } else { 1521 if (ill->ill_isv6) 1522 mld_joingroup(ilm); 1523 else 1524 igmp_joingroup(ilm); 1525 1526 (void) ip_ll_addmulti_v6(ipif, &ilm->ilm_v6addr); 1527 } 1528 } 1529 ill_ilm_walker_rele(ill); 1530 1531 } 1532 1533 /* 1534 * The opposite of ill_recover_multicast() -- leaves all multicast groups 1535 * that were explicitly joined. 1536 */ 1537 void 1538 ill_leave_multicast(ill_t *ill) 1539 { 1540 ilm_t *ilm; 1541 ipif_t *ipif = ill->ill_ipif; 1542 char addrbuf[INET6_ADDRSTRLEN]; 1543 1544 ASSERT(IAM_WRITER_ILL(ill)); 1545 1546 ill->ill_need_recover_multicast = 1; 1547 1548 ill_ilm_walker_hold(ill); 1549 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1550 /* 1551 * Check how many ipif's that have members in this group - 1552 * if more then one we make sure that this entry is first 1553 * in the list. 1554 */ 1555 if (ilm_numentries_v6(ill, &ilm->ilm_v6addr) > 1 && 1556 ilm_lookup_ill_v6(ill, &ilm->ilm_v6addr, B_TRUE, 1557 ALL_ZONES) != ilm) { 1558 continue; 1559 } 1560 1561 ip1dbg(("ill_leave_multicast: %s\n", inet_ntop(AF_INET6, 1562 &ilm->ilm_v6addr, addrbuf, sizeof (addrbuf)))); 1563 1564 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) { 1565 ill_leave_allmulti(ill); 1566 } else { 1567 if (ill->ill_isv6) 1568 mld_leavegroup(ilm); 1569 else 1570 igmp_leavegroup(ilm); 1571 1572 (void) ip_ll_delmulti_v6(ipif, &ilm->ilm_v6addr); 1573 } 1574 } 1575 ill_ilm_walker_rele(ill); 1576 } 1577 1578 /* Find an ilm for matching the ill */ 1579 ilm_t * 1580 ilm_lookup_ill(ill_t *ill, ipaddr_t group, zoneid_t zoneid) 1581 { 1582 in6_addr_t v6group; 1583 1584 /* 1585 * INADDR_ANY is represented as the IPv6 unspecified addr. 1586 */ 1587 if (group == INADDR_ANY) 1588 v6group = ipv6_all_zeros; 1589 else 1590 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 1591 1592 return (ilm_lookup_ill_v6(ill, &v6group, B_TRUE, zoneid)); 1593 } 1594 1595 /* 1596 * Find an ilm for address `v6group' on `ill' and zone `zoneid' (which may be 1597 * ALL_ZONES). In general, if `ill' is in an IPMP group, we will match 1598 * against any ill in the group. However, if `restrict_solicited' is set, 1599 * then specifically for IPv6 solicited-node multicast, the match will be 1600 * restricted to the specified `ill'. 1601 */ 1602 ilm_t * 1603 ilm_lookup_ill_v6(ill_t *ill, const in6_addr_t *v6group, 1604 boolean_t restrict_solicited, zoneid_t zoneid) 1605 { 1606 ilm_t *ilm; 1607 ilm_walker_t ilw; 1608 boolean_t restrict_ill = B_FALSE; 1609 1610 /* 1611 * In general, underlying interfaces cannot have multicast memberships 1612 * and thus lookups always match across the illgrp. However, we must 1613 * allow IPv6 solicited-node multicast memberships on underlying 1614 * interfaces, and thus an IPMP meta-interface and one of its 1615 * underlying ills may have the same solicited-node multicast address. 1616 * In that case, we need to restrict the lookup to the requested ill. 1617 * However, we may receive packets on an underlying interface that 1618 * are for the corresponding IPMP interface's solicited-node multicast 1619 * address, and thus in that case we need to match across the group -- 1620 * hence the unfortunate `restrict_solicited' argument. 1621 */ 1622 if (IN6_IS_ADDR_MC_SOLICITEDNODE(v6group) && restrict_solicited) 1623 restrict_ill = (IS_IPMP(ill) || IS_UNDER_IPMP(ill)); 1624 1625 ilm = ilm_walker_start(&ilw, ill); 1626 for (; ilm != NULL; ilm = ilm_walker_step(&ilw, ilm)) { 1627 if (!IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group)) 1628 continue; 1629 if (zoneid != ALL_ZONES && zoneid != ilm->ilm_zoneid) 1630 continue; 1631 if (!restrict_ill || ill == (ill->ill_isv6 ? 1632 ilm->ilm_ill : ilm->ilm_ipif->ipif_ill)) { 1633 break; 1634 } 1635 } 1636 ilm_walker_finish(&ilw); 1637 return (ilm); 1638 } 1639 1640 /* 1641 * Find an ilm for the ipif. Only needed for IPv4 which does 1642 * ipif specific socket options. 1643 */ 1644 ilm_t * 1645 ilm_lookup_ipif(ipif_t *ipif, ipaddr_t group) 1646 { 1647 ilm_t *ilm; 1648 ilm_walker_t ilw; 1649 1650 ilm = ilm_walker_start(&ilw, ipif->ipif_ill); 1651 for (; ilm != NULL; ilm = ilm_walker_step(&ilw, ilm)) { 1652 if (ilm->ilm_ipif == ipif && ilm->ilm_addr == group) 1653 break; 1654 } 1655 ilm_walker_finish(&ilw); 1656 return (ilm); 1657 } 1658 1659 /* 1660 * How many members on this ill? 1661 */ 1662 int 1663 ilm_numentries_v6(ill_t *ill, const in6_addr_t *v6group) 1664 { 1665 ilm_t *ilm; 1666 int i = 0; 1667 1668 mutex_enter(&ill->ill_lock); 1669 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1670 if (ilm->ilm_flags & ILM_DELETED) 1671 continue; 1672 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group)) { 1673 i++; 1674 } 1675 } 1676 mutex_exit(&ill->ill_lock); 1677 return (i); 1678 } 1679 1680 /* Caller guarantees that the group is not already on the list */ 1681 static ilm_t * 1682 ilm_add_v6(ipif_t *ipif, const in6_addr_t *v6group, ilg_stat_t ilgstat, 1683 mcast_record_t ilg_fmode, slist_t *ilg_flist, zoneid_t zoneid) 1684 { 1685 ill_t *ill = ipif->ipif_ill; 1686 ilm_t *ilm; 1687 ilm_t *ilm_cur; 1688 ilm_t **ilm_ptpn; 1689 1690 ASSERT(IAM_WRITER_IPIF(ipif)); 1691 1692 ilm = GETSTRUCT(ilm_t, 1); 1693 if (ilm == NULL) 1694 return (NULL); 1695 if (ilgstat != ILGSTAT_NONE && !SLIST_IS_EMPTY(ilg_flist)) { 1696 ilm->ilm_filter = l_alloc(); 1697 if (ilm->ilm_filter == NULL) { 1698 mi_free(ilm); 1699 return (NULL); 1700 } 1701 } 1702 ilm->ilm_v6addr = *v6group; 1703 ilm->ilm_refcnt = 1; 1704 ilm->ilm_zoneid = zoneid; 1705 ilm->ilm_timer = INFINITY; 1706 ilm->ilm_rtx.rtx_timer = INFINITY; 1707 1708 /* 1709 * IPv4 Multicast groups are joined using ipif. 1710 * IPv6 Multicast groups are joined using ill. 1711 */ 1712 if (ill->ill_isv6) { 1713 ilm->ilm_ill = ill; 1714 ilm->ilm_ipif = NULL; 1715 DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ill, 1716 (char *), "ilm", (void *), ilm); 1717 ill->ill_ilm_cnt++; 1718 } else { 1719 ASSERT(ilm->ilm_zoneid == ipif->ipif_zoneid); 1720 ilm->ilm_ipif = ipif; 1721 ilm->ilm_ill = NULL; 1722 DTRACE_PROBE3(ipif__incr__cnt, (ipif_t *), ipif, 1723 (char *), "ilm", (void *), ilm); 1724 ipif->ipif_ilm_cnt++; 1725 } 1726 1727 ASSERT(ill->ill_ipst); 1728 ilm->ilm_ipst = ill->ill_ipst; /* No netstack_hold */ 1729 1730 ASSERT(!(ipif->ipif_state_flags & IPIF_CONDEMNED)); 1731 ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED)); 1732 1733 /* 1734 * Grab lock to give consistent view to readers 1735 */ 1736 mutex_enter(&ill->ill_lock); 1737 /* 1738 * All ilms in the same zone are contiguous in the ill_ilm list. 1739 * The loops in ip_proto_input() and ip_wput_local() use this to avoid 1740 * sending duplicates up when two applications in the same zone join the 1741 * same group on different logical interfaces. 1742 */ 1743 ilm_cur = ill->ill_ilm; 1744 ilm_ptpn = &ill->ill_ilm; 1745 while (ilm_cur != NULL && ilm_cur->ilm_zoneid != ilm->ilm_zoneid) { 1746 ilm_ptpn = &ilm_cur->ilm_next; 1747 ilm_cur = ilm_cur->ilm_next; 1748 } 1749 ilm->ilm_next = ilm_cur; 1750 *ilm_ptpn = ilm; 1751 1752 /* 1753 * If we have an associated ilg, use its filter state; if not, 1754 * default to (EXCLUDE, NULL) and set no_ilg_cnt to track this. 1755 */ 1756 if (ilgstat != ILGSTAT_NONE) { 1757 if (!SLIST_IS_EMPTY(ilg_flist)) 1758 l_copy(ilg_flist, ilm->ilm_filter); 1759 ilm->ilm_fmode = ilg_fmode; 1760 } else { 1761 ilm->ilm_no_ilg_cnt = 1; 1762 ilm->ilm_fmode = MODE_IS_EXCLUDE; 1763 } 1764 1765 mutex_exit(&ill->ill_lock); 1766 return (ilm); 1767 } 1768 1769 void 1770 ilm_inactive(ilm_t *ilm) 1771 { 1772 FREE_SLIST(ilm->ilm_filter); 1773 FREE_SLIST(ilm->ilm_pendsrcs); 1774 FREE_SLIST(ilm->ilm_rtx.rtx_allow); 1775 FREE_SLIST(ilm->ilm_rtx.rtx_block); 1776 ilm->ilm_ipst = NULL; 1777 mi_free((char *)ilm); 1778 } 1779 1780 void 1781 ilm_walker_cleanup(ill_t *ill) 1782 { 1783 ilm_t **ilmp; 1784 ilm_t *ilm; 1785 boolean_t need_wakeup = B_FALSE; 1786 1787 ASSERT(MUTEX_HELD(&ill->ill_lock)); 1788 ASSERT(ill->ill_ilm_walker_cnt == 0); 1789 1790 ilmp = &ill->ill_ilm; 1791 while (*ilmp != NULL) { 1792 if ((*ilmp)->ilm_flags & ILM_DELETED) { 1793 ilm = *ilmp; 1794 *ilmp = ilm->ilm_next; 1795 /* 1796 * check if there are any pending FREE or unplumb 1797 * operations that need to be restarted. 1798 */ 1799 if (ilm->ilm_ipif != NULL) { 1800 /* 1801 * IPv4 ilms hold a ref on the ipif. 1802 */ 1803 DTRACE_PROBE3(ipif__decr__cnt, 1804 (ipif_t *), ilm->ilm_ipif, 1805 (char *), "ilm", (void *), ilm); 1806 ilm->ilm_ipif->ipif_ilm_cnt--; 1807 if (IPIF_FREE_OK(ilm->ilm_ipif)) 1808 need_wakeup = B_TRUE; 1809 } else { 1810 /* 1811 * IPv6 ilms hold a ref on the ill. 1812 */ 1813 ASSERT(ilm->ilm_ill == ill); 1814 DTRACE_PROBE3(ill__decr__cnt, 1815 (ill_t *), ill, 1816 (char *), "ilm", (void *), ilm); 1817 ASSERT(ill->ill_ilm_cnt > 0); 1818 ill->ill_ilm_cnt--; 1819 if (ILL_FREE_OK(ill)) 1820 need_wakeup = B_TRUE; 1821 } 1822 ilm_inactive(ilm); /* frees ilm */ 1823 } else { 1824 ilmp = &(*ilmp)->ilm_next; 1825 } 1826 } 1827 ill->ill_ilm_cleanup_reqd = 0; 1828 if (need_wakeup) 1829 ipif_ill_refrele_tail(ill); 1830 else 1831 mutex_exit(&ill->ill_lock); 1832 } 1833 1834 /* 1835 * Unlink ilm and free it. 1836 */ 1837 static void 1838 ilm_delete(ilm_t *ilm) 1839 { 1840 ill_t *ill; 1841 ilm_t **ilmp; 1842 boolean_t need_wakeup; 1843 1844 1845 if (ilm->ilm_ipif != NULL) { 1846 ASSERT(IAM_WRITER_IPIF(ilm->ilm_ipif)); 1847 ASSERT(ilm->ilm_ill == NULL); 1848 ill = ilm->ilm_ipif->ipif_ill; 1849 ASSERT(!ill->ill_isv6); 1850 } else { 1851 ASSERT(IAM_WRITER_ILL(ilm->ilm_ill)); 1852 ASSERT(ilm->ilm_ipif == NULL); 1853 ill = ilm->ilm_ill; 1854 ASSERT(ill->ill_isv6); 1855 } 1856 /* 1857 * Delete under lock protection so that readers don't stumble 1858 * on bad ilm_next 1859 */ 1860 mutex_enter(&ill->ill_lock); 1861 if (ill->ill_ilm_walker_cnt != 0) { 1862 ilm->ilm_flags |= ILM_DELETED; 1863 ill->ill_ilm_cleanup_reqd = 1; 1864 mutex_exit(&ill->ill_lock); 1865 return; 1866 } 1867 1868 for (ilmp = &ill->ill_ilm; *ilmp != ilm; ilmp = &(*ilmp)->ilm_next) 1869 ; 1870 *ilmp = ilm->ilm_next; 1871 1872 /* 1873 * if we are the last reference to the ipif (for IPv4 ilms) 1874 * or the ill (for IPv6 ilms), we may need to wakeup any 1875 * pending FREE or unplumb operations. 1876 */ 1877 need_wakeup = B_FALSE; 1878 if (ilm->ilm_ipif != NULL) { 1879 DTRACE_PROBE3(ipif__decr__cnt, (ipif_t *), ilm->ilm_ipif, 1880 (char *), "ilm", (void *), ilm); 1881 ilm->ilm_ipif->ipif_ilm_cnt--; 1882 if (IPIF_FREE_OK(ilm->ilm_ipif)) 1883 need_wakeup = B_TRUE; 1884 } else { 1885 DTRACE_PROBE3(ill__decr__cnt, (ill_t *), ill, 1886 (char *), "ilm", (void *), ilm); 1887 ASSERT(ill->ill_ilm_cnt > 0); 1888 ill->ill_ilm_cnt--; 1889 if (ILL_FREE_OK(ill)) 1890 need_wakeup = B_TRUE; 1891 } 1892 1893 ilm_inactive(ilm); /* frees this ilm */ 1894 1895 if (need_wakeup) { 1896 /* drops ill lock */ 1897 ipif_ill_refrele_tail(ill); 1898 } else { 1899 mutex_exit(&ill->ill_lock); 1900 } 1901 } 1902 1903 /* Increment the ILM walker count for `ill' */ 1904 static void 1905 ill_ilm_walker_hold(ill_t *ill) 1906 { 1907 mutex_enter(&ill->ill_lock); 1908 ill->ill_ilm_walker_cnt++; 1909 mutex_exit(&ill->ill_lock); 1910 } 1911 1912 /* Decrement the ILM walker count for `ill' */ 1913 static void 1914 ill_ilm_walker_rele(ill_t *ill) 1915 { 1916 mutex_enter(&ill->ill_lock); 1917 ill->ill_ilm_walker_cnt--; 1918 if (ill->ill_ilm_walker_cnt == 0 && ill->ill_ilm_cleanup_reqd) 1919 ilm_walker_cleanup(ill); /* drops ill_lock */ 1920 else 1921 mutex_exit(&ill->ill_lock); 1922 } 1923 1924 /* 1925 * Start walking the ILMs associated with `ill'; the first ILM in the walk 1926 * (if any) is returned. State associated with the walk is stored in `ilw'. 1927 * Note that walks associated with interfaces under IPMP also walk the ILMs 1928 * on the associated IPMP interface; this is handled transparently to callers 1929 * via ilm_walker_step(). (Usually with IPMP all ILMs will be on the IPMP 1930 * interface; the only exception is to support IPv6 test addresses, which 1931 * require ILMs for their associated solicited-node multicast addresses.) 1932 */ 1933 ilm_t * 1934 ilm_walker_start(ilm_walker_t *ilw, ill_t *ill) 1935 { 1936 ilw->ilw_ill = ill; 1937 if (IS_UNDER_IPMP(ill)) 1938 ilw->ilw_ipmp_ill = ipmp_ill_hold_ipmp_ill(ill); 1939 else 1940 ilw->ilw_ipmp_ill = NULL; 1941 1942 ill_ilm_walker_hold(ill); 1943 if (ilw->ilw_ipmp_ill != NULL) 1944 ill_ilm_walker_hold(ilw->ilw_ipmp_ill); 1945 1946 if (ilw->ilw_ipmp_ill != NULL && ilw->ilw_ipmp_ill->ill_ilm != NULL) 1947 ilw->ilw_walk_ill = ilw->ilw_ipmp_ill; 1948 else 1949 ilw->ilw_walk_ill = ilw->ilw_ill; 1950 1951 return (ilm_walker_step(ilw, NULL)); 1952 } 1953 1954 /* 1955 * Helper function for ilm_walker_step() that returns the next ILM 1956 * associated with `ilw', regardless of whether it's deleted. 1957 */ 1958 static ilm_t * 1959 ilm_walker_step_all(ilm_walker_t *ilw, ilm_t *ilm) 1960 { 1961 if (ilm == NULL) 1962 return (ilw->ilw_walk_ill->ill_ilm); 1963 1964 if (ilm->ilm_next != NULL) 1965 return (ilm->ilm_next); 1966 1967 if (ilw->ilw_ipmp_ill != NULL && IS_IPMP(ilw->ilw_walk_ill)) { 1968 ilw->ilw_walk_ill = ilw->ilw_ill; 1969 /* 1970 * It's possible that ilw_ill left the group during our walk, 1971 * so we can't ASSERT() that it's under IPMP. Callers that 1972 * care will be writer on the IPSQ anyway. 1973 */ 1974 return (ilw->ilw_walk_ill->ill_ilm); 1975 } 1976 return (NULL); 1977 } 1978 1979 /* 1980 * Step to the next ILM associated with `ilw'. 1981 */ 1982 ilm_t * 1983 ilm_walker_step(ilm_walker_t *ilw, ilm_t *ilm) 1984 { 1985 while ((ilm = ilm_walker_step_all(ilw, ilm)) != NULL) { 1986 if (!(ilm->ilm_flags & ILM_DELETED)) 1987 break; 1988 } 1989 return (ilm); 1990 } 1991 1992 /* 1993 * Finish the ILM walk associated with `ilw'. 1994 */ 1995 void 1996 ilm_walker_finish(ilm_walker_t *ilw) 1997 { 1998 ill_ilm_walker_rele(ilw->ilw_ill); 1999 if (ilw->ilw_ipmp_ill != NULL) { 2000 ill_ilm_walker_rele(ilw->ilw_ipmp_ill); 2001 ill_refrele(ilw->ilw_ipmp_ill); 2002 } 2003 bzero(&ilw, sizeof (ilw)); 2004 } 2005 2006 /* 2007 * Looks up the appropriate ipif given a v4 multicast group and interface 2008 * address. On success, returns 0, with *ipifpp pointing to the found 2009 * struct. On failure, returns an errno and *ipifpp is NULL. 2010 */ 2011 int 2012 ip_opt_check(conn_t *connp, ipaddr_t group, ipaddr_t src, ipaddr_t ifaddr, 2013 uint_t *ifindexp, mblk_t *first_mp, ipsq_func_t func, ipif_t **ipifpp) 2014 { 2015 ipif_t *ipif; 2016 int err = 0; 2017 zoneid_t zoneid; 2018 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2019 2020 if (!CLASSD(group) || CLASSD(src)) { 2021 return (EINVAL); 2022 } 2023 *ipifpp = NULL; 2024 2025 zoneid = IPCL_ZONEID(connp); 2026 2027 ASSERT(!(ifaddr != INADDR_ANY && ifindexp != NULL && *ifindexp != 0)); 2028 if (ifaddr != INADDR_ANY) { 2029 ipif = ipif_lookup_addr(ifaddr, NULL, zoneid, 2030 CONNP_TO_WQ(connp), first_mp, func, &err, ipst); 2031 if (err != 0 && err != EINPROGRESS) 2032 err = EADDRNOTAVAIL; 2033 } else if (ifindexp != NULL && *ifindexp != 0) { 2034 ipif = ipif_lookup_on_ifindex(*ifindexp, B_FALSE, zoneid, 2035 CONNP_TO_WQ(connp), first_mp, func, &err, ipst); 2036 } else { 2037 ipif = ipif_lookup_group(group, zoneid, ipst); 2038 if (ipif == NULL) 2039 return (EADDRNOTAVAIL); 2040 } 2041 if (ipif == NULL) 2042 return (err); 2043 2044 *ipifpp = ipif; 2045 return (0); 2046 } 2047 2048 /* 2049 * Looks up the appropriate ill (or ipif if v4mapped) given an interface 2050 * index and IPv6 multicast group. On success, returns 0, with *illpp (or 2051 * *ipifpp if v4mapped) pointing to the found struct. On failure, returns 2052 * an errno and *illpp and *ipifpp are undefined. 2053 */ 2054 int 2055 ip_opt_check_v6(conn_t *connp, const in6_addr_t *v6group, ipaddr_t *v4group, 2056 const in6_addr_t *v6src, ipaddr_t *v4src, boolean_t *isv6, int ifindex, 2057 mblk_t *first_mp, ipsq_func_t func, ill_t **illpp, ipif_t **ipifpp) 2058 { 2059 boolean_t src_unspec; 2060 ill_t *ill = NULL; 2061 ipif_t *ipif = NULL; 2062 int err; 2063 zoneid_t zoneid = connp->conn_zoneid; 2064 queue_t *wq = CONNP_TO_WQ(connp); 2065 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2066 2067 src_unspec = IN6_IS_ADDR_UNSPECIFIED(v6src); 2068 2069 if (IN6_IS_ADDR_V4MAPPED(v6group)) { 2070 if (!IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec) 2071 return (EINVAL); 2072 IN6_V4MAPPED_TO_IPADDR(v6group, *v4group); 2073 if (src_unspec) { 2074 *v4src = INADDR_ANY; 2075 } else { 2076 IN6_V4MAPPED_TO_IPADDR(v6src, *v4src); 2077 } 2078 if (!CLASSD(*v4group) || CLASSD(*v4src)) 2079 return (EINVAL); 2080 *ipifpp = NULL; 2081 *isv6 = B_FALSE; 2082 } else { 2083 if (IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec) 2084 return (EINVAL); 2085 if (!IN6_IS_ADDR_MULTICAST(v6group) || 2086 IN6_IS_ADDR_MULTICAST(v6src)) { 2087 return (EINVAL); 2088 } 2089 *illpp = NULL; 2090 *isv6 = B_TRUE; 2091 } 2092 2093 if (ifindex == 0) { 2094 if (*isv6) 2095 ill = ill_lookup_group_v6(v6group, zoneid, ipst); 2096 else 2097 ipif = ipif_lookup_group(*v4group, zoneid, ipst); 2098 if (ill == NULL && ipif == NULL) 2099 return (EADDRNOTAVAIL); 2100 } else { 2101 if (*isv6) { 2102 ill = ill_lookup_on_ifindex(ifindex, B_TRUE, 2103 wq, first_mp, func, &err, ipst); 2104 if (ill != NULL && 2105 !ipif_lookup_zoneid(ill, zoneid, 0, NULL)) { 2106 ill_refrele(ill); 2107 ill = NULL; 2108 err = EADDRNOTAVAIL; 2109 } 2110 } else { 2111 ipif = ipif_lookup_on_ifindex(ifindex, B_FALSE, 2112 zoneid, wq, first_mp, func, &err, ipst); 2113 } 2114 if (ill == NULL && ipif == NULL) 2115 return (err); 2116 } 2117 2118 *ipifpp = ipif; 2119 *illpp = ill; 2120 return (0); 2121 } 2122 2123 static int 2124 ip_get_srcfilter(conn_t *connp, struct group_filter *gf, 2125 struct ip_msfilter *imsf, ipaddr_t grp, ipif_t *ipif, boolean_t isv4mapped) 2126 { 2127 ilg_t *ilg; 2128 int i, numsrc, fmode, outsrcs; 2129 struct sockaddr_in *sin; 2130 struct sockaddr_in6 *sin6; 2131 struct in_addr *addrp; 2132 slist_t *fp; 2133 boolean_t is_v4only_api; 2134 2135 mutex_enter(&connp->conn_lock); 2136 2137 ilg = ilg_lookup_ipif(connp, grp, ipif); 2138 if (ilg == NULL) { 2139 mutex_exit(&connp->conn_lock); 2140 return (EADDRNOTAVAIL); 2141 } 2142 2143 if (gf == NULL) { 2144 ASSERT(imsf != NULL); 2145 ASSERT(!isv4mapped); 2146 is_v4only_api = B_TRUE; 2147 outsrcs = imsf->imsf_numsrc; 2148 } else { 2149 ASSERT(imsf == NULL); 2150 is_v4only_api = B_FALSE; 2151 outsrcs = gf->gf_numsrc; 2152 } 2153 2154 /* 2155 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 2156 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 2157 * So we need to translate here. 2158 */ 2159 fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ? 2160 MCAST_INCLUDE : MCAST_EXCLUDE; 2161 if ((fp = ilg->ilg_filter) == NULL) { 2162 numsrc = 0; 2163 } else { 2164 for (i = 0; i < outsrcs; i++) { 2165 if (i == fp->sl_numsrc) 2166 break; 2167 if (isv4mapped) { 2168 sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i]; 2169 sin6->sin6_family = AF_INET6; 2170 sin6->sin6_addr = fp->sl_addr[i]; 2171 } else { 2172 if (is_v4only_api) { 2173 addrp = &imsf->imsf_slist[i]; 2174 } else { 2175 sin = (struct sockaddr_in *) 2176 &gf->gf_slist[i]; 2177 sin->sin_family = AF_INET; 2178 addrp = &sin->sin_addr; 2179 } 2180 IN6_V4MAPPED_TO_INADDR(&fp->sl_addr[i], addrp); 2181 } 2182 } 2183 numsrc = fp->sl_numsrc; 2184 } 2185 2186 if (is_v4only_api) { 2187 imsf->imsf_numsrc = numsrc; 2188 imsf->imsf_fmode = fmode; 2189 } else { 2190 gf->gf_numsrc = numsrc; 2191 gf->gf_fmode = fmode; 2192 } 2193 2194 mutex_exit(&connp->conn_lock); 2195 2196 return (0); 2197 } 2198 2199 static int 2200 ip_get_srcfilter_v6(conn_t *connp, struct group_filter *gf, 2201 const struct in6_addr *grp, ill_t *ill) 2202 { 2203 ilg_t *ilg; 2204 int i; 2205 struct sockaddr_storage *sl; 2206 struct sockaddr_in6 *sin6; 2207 slist_t *fp; 2208 2209 mutex_enter(&connp->conn_lock); 2210 2211 ilg = ilg_lookup_ill_v6(connp, grp, ill); 2212 if (ilg == NULL) { 2213 mutex_exit(&connp->conn_lock); 2214 return (EADDRNOTAVAIL); 2215 } 2216 2217 /* 2218 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 2219 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 2220 * So we need to translate here. 2221 */ 2222 gf->gf_fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ? 2223 MCAST_INCLUDE : MCAST_EXCLUDE; 2224 if ((fp = ilg->ilg_filter) == NULL) { 2225 gf->gf_numsrc = 0; 2226 } else { 2227 for (i = 0, sl = gf->gf_slist; i < gf->gf_numsrc; i++, sl++) { 2228 if (i == fp->sl_numsrc) 2229 break; 2230 sin6 = (struct sockaddr_in6 *)sl; 2231 sin6->sin6_family = AF_INET6; 2232 sin6->sin6_addr = fp->sl_addr[i]; 2233 } 2234 gf->gf_numsrc = fp->sl_numsrc; 2235 } 2236 2237 mutex_exit(&connp->conn_lock); 2238 2239 return (0); 2240 } 2241 2242 static int 2243 ip_set_srcfilter(conn_t *connp, struct group_filter *gf, 2244 struct ip_msfilter *imsf, ipaddr_t grp, ipif_t *ipif, boolean_t isv4mapped) 2245 { 2246 ilg_t *ilg; 2247 int i, err, infmode, new_fmode; 2248 uint_t insrcs; 2249 struct sockaddr_in *sin; 2250 struct sockaddr_in6 *sin6; 2251 struct in_addr *addrp; 2252 slist_t *orig_filter = NULL; 2253 slist_t *new_filter = NULL; 2254 mcast_record_t orig_fmode; 2255 boolean_t leave_grp, is_v4only_api; 2256 ilg_stat_t ilgstat; 2257 2258 if (gf == NULL) { 2259 ASSERT(imsf != NULL); 2260 ASSERT(!isv4mapped); 2261 is_v4only_api = B_TRUE; 2262 insrcs = imsf->imsf_numsrc; 2263 infmode = imsf->imsf_fmode; 2264 } else { 2265 ASSERT(imsf == NULL); 2266 is_v4only_api = B_FALSE; 2267 insrcs = gf->gf_numsrc; 2268 infmode = gf->gf_fmode; 2269 } 2270 2271 /* Make sure we can handle the source list */ 2272 if (insrcs > MAX_FILTER_SIZE) 2273 return (ENOBUFS); 2274 2275 /* 2276 * setting the filter to (INCLUDE, NULL) is treated 2277 * as a request to leave the group. 2278 */ 2279 leave_grp = (infmode == MCAST_INCLUDE && insrcs == 0); 2280 2281 ASSERT(IAM_WRITER_IPIF(ipif)); 2282 2283 mutex_enter(&connp->conn_lock); 2284 2285 ilg = ilg_lookup_ipif(connp, grp, ipif); 2286 if (ilg == NULL) { 2287 /* 2288 * if the request was actually to leave, and we 2289 * didn't find an ilg, there's nothing to do. 2290 */ 2291 if (!leave_grp) 2292 ilg = conn_ilg_alloc(connp, &err); 2293 if (leave_grp || ilg == NULL) { 2294 mutex_exit(&connp->conn_lock); 2295 return (leave_grp ? 0 : err); 2296 } 2297 ilgstat = ILGSTAT_NEW; 2298 IN6_IPADDR_TO_V4MAPPED(grp, &ilg->ilg_v6group); 2299 ilg->ilg_ipif = ipif; 2300 ilg->ilg_ill = NULL; 2301 } else if (leave_grp) { 2302 ilg_delete(connp, ilg, NULL); 2303 mutex_exit(&connp->conn_lock); 2304 (void) ip_delmulti(grp, ipif, B_FALSE, B_TRUE); 2305 return (0); 2306 } else { 2307 ilgstat = ILGSTAT_CHANGE; 2308 /* Preserve existing state in case ip_addmulti() fails */ 2309 orig_fmode = ilg->ilg_fmode; 2310 if (ilg->ilg_filter == NULL) { 2311 orig_filter = NULL; 2312 } else { 2313 orig_filter = l_alloc_copy(ilg->ilg_filter); 2314 if (orig_filter == NULL) { 2315 mutex_exit(&connp->conn_lock); 2316 return (ENOMEM); 2317 } 2318 } 2319 } 2320 2321 /* 2322 * Alloc buffer to copy new state into (see below) before 2323 * we make any changes, so we can bail if it fails. 2324 */ 2325 if ((new_filter = l_alloc()) == NULL) { 2326 mutex_exit(&connp->conn_lock); 2327 err = ENOMEM; 2328 goto free_and_exit; 2329 } 2330 2331 if (insrcs == 0) { 2332 CLEAR_SLIST(ilg->ilg_filter); 2333 } else { 2334 slist_t *fp; 2335 if (ilg->ilg_filter == NULL) { 2336 fp = l_alloc(); 2337 if (fp == NULL) { 2338 if (ilgstat == ILGSTAT_NEW) 2339 ilg_delete(connp, ilg, NULL); 2340 mutex_exit(&connp->conn_lock); 2341 err = ENOMEM; 2342 goto free_and_exit; 2343 } 2344 } else { 2345 fp = ilg->ilg_filter; 2346 } 2347 for (i = 0; i < insrcs; i++) { 2348 if (isv4mapped) { 2349 sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i]; 2350 fp->sl_addr[i] = sin6->sin6_addr; 2351 } else { 2352 if (is_v4only_api) { 2353 addrp = &imsf->imsf_slist[i]; 2354 } else { 2355 sin = (struct sockaddr_in *) 2356 &gf->gf_slist[i]; 2357 addrp = &sin->sin_addr; 2358 } 2359 IN6_INADDR_TO_V4MAPPED(addrp, &fp->sl_addr[i]); 2360 } 2361 } 2362 fp->sl_numsrc = insrcs; 2363 ilg->ilg_filter = fp; 2364 } 2365 /* 2366 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 2367 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 2368 * So we need to translate here. 2369 */ 2370 ilg->ilg_fmode = (infmode == MCAST_INCLUDE) ? 2371 MODE_IS_INCLUDE : MODE_IS_EXCLUDE; 2372 2373 /* 2374 * Save copy of ilg's filter state to pass to other functions, 2375 * so we can release conn_lock now. 2376 */ 2377 new_fmode = ilg->ilg_fmode; 2378 l_copy(ilg->ilg_filter, new_filter); 2379 2380 mutex_exit(&connp->conn_lock); 2381 2382 err = ip_addmulti(grp, ipif, ilgstat, new_fmode, new_filter); 2383 if (err != 0) { 2384 /* 2385 * Restore the original filter state, or delete the 2386 * newly-created ilg. We need to look up the ilg 2387 * again, though, since we've not been holding the 2388 * conn_lock. 2389 */ 2390 mutex_enter(&connp->conn_lock); 2391 ilg = ilg_lookup_ipif(connp, grp, ipif); 2392 ASSERT(ilg != NULL); 2393 if (ilgstat == ILGSTAT_NEW) { 2394 ilg_delete(connp, ilg, NULL); 2395 } else { 2396 ilg->ilg_fmode = orig_fmode; 2397 if (SLIST_IS_EMPTY(orig_filter)) { 2398 CLEAR_SLIST(ilg->ilg_filter); 2399 } else { 2400 /* 2401 * We didn't free the filter, even if we 2402 * were trying to make the source list empty; 2403 * so if orig_filter isn't empty, the ilg 2404 * must still have a filter alloc'd. 2405 */ 2406 l_copy(orig_filter, ilg->ilg_filter); 2407 } 2408 } 2409 mutex_exit(&connp->conn_lock); 2410 } 2411 2412 free_and_exit: 2413 l_free(orig_filter); 2414 l_free(new_filter); 2415 2416 return (err); 2417 } 2418 2419 static int 2420 ip_set_srcfilter_v6(conn_t *connp, struct group_filter *gf, 2421 const struct in6_addr *grp, ill_t *ill) 2422 { 2423 ilg_t *ilg; 2424 int i, orig_fmode, new_fmode, err; 2425 slist_t *orig_filter = NULL; 2426 slist_t *new_filter = NULL; 2427 struct sockaddr_storage *sl; 2428 struct sockaddr_in6 *sin6; 2429 boolean_t leave_grp; 2430 ilg_stat_t ilgstat; 2431 2432 /* Make sure we can handle the source list */ 2433 if (gf->gf_numsrc > MAX_FILTER_SIZE) 2434 return (ENOBUFS); 2435 2436 /* 2437 * setting the filter to (INCLUDE, NULL) is treated 2438 * as a request to leave the group. 2439 */ 2440 leave_grp = (gf->gf_fmode == MCAST_INCLUDE && gf->gf_numsrc == 0); 2441 2442 ASSERT(IAM_WRITER_ILL(ill)); 2443 2444 mutex_enter(&connp->conn_lock); 2445 ilg = ilg_lookup_ill_v6(connp, grp, ill); 2446 if (ilg == NULL) { 2447 /* 2448 * if the request was actually to leave, and we 2449 * didn't find an ilg, there's nothing to do. 2450 */ 2451 if (!leave_grp) 2452 ilg = conn_ilg_alloc(connp, &err); 2453 if (leave_grp || ilg == NULL) { 2454 mutex_exit(&connp->conn_lock); 2455 return (leave_grp ? 0 : err); 2456 } 2457 ilgstat = ILGSTAT_NEW; 2458 ilg->ilg_v6group = *grp; 2459 ilg->ilg_ipif = NULL; 2460 ilg->ilg_ill = ill; 2461 } else if (leave_grp) { 2462 ilg_delete(connp, ilg, NULL); 2463 mutex_exit(&connp->conn_lock); 2464 (void) ip_delmulti_v6(grp, ill, connp->conn_zoneid, B_FALSE, 2465 B_TRUE); 2466 return (0); 2467 } else { 2468 ilgstat = ILGSTAT_CHANGE; 2469 /* preserve existing state in case ip_addmulti() fails */ 2470 orig_fmode = ilg->ilg_fmode; 2471 if (ilg->ilg_filter == NULL) { 2472 orig_filter = NULL; 2473 } else { 2474 orig_filter = l_alloc_copy(ilg->ilg_filter); 2475 if (orig_filter == NULL) { 2476 mutex_exit(&connp->conn_lock); 2477 return (ENOMEM); 2478 } 2479 } 2480 } 2481 2482 /* 2483 * Alloc buffer to copy new state into (see below) before 2484 * we make any changes, so we can bail if it fails. 2485 */ 2486 if ((new_filter = l_alloc()) == NULL) { 2487 mutex_exit(&connp->conn_lock); 2488 err = ENOMEM; 2489 goto free_and_exit; 2490 } 2491 2492 if (gf->gf_numsrc == 0) { 2493 CLEAR_SLIST(ilg->ilg_filter); 2494 } else { 2495 slist_t *fp; 2496 if (ilg->ilg_filter == NULL) { 2497 fp = l_alloc(); 2498 if (fp == NULL) { 2499 if (ilgstat == ILGSTAT_NEW) 2500 ilg_delete(connp, ilg, NULL); 2501 mutex_exit(&connp->conn_lock); 2502 err = ENOMEM; 2503 goto free_and_exit; 2504 } 2505 } else { 2506 fp = ilg->ilg_filter; 2507 } 2508 for (i = 0, sl = gf->gf_slist; i < gf->gf_numsrc; i++, sl++) { 2509 sin6 = (struct sockaddr_in6 *)sl; 2510 fp->sl_addr[i] = sin6->sin6_addr; 2511 } 2512 fp->sl_numsrc = gf->gf_numsrc; 2513 ilg->ilg_filter = fp; 2514 } 2515 /* 2516 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 2517 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 2518 * So we need to translate here. 2519 */ 2520 ilg->ilg_fmode = (gf->gf_fmode == MCAST_INCLUDE) ? 2521 MODE_IS_INCLUDE : MODE_IS_EXCLUDE; 2522 2523 /* 2524 * Save copy of ilg's filter state to pass to other functions, 2525 * so we can release conn_lock now. 2526 */ 2527 new_fmode = ilg->ilg_fmode; 2528 l_copy(ilg->ilg_filter, new_filter); 2529 2530 mutex_exit(&connp->conn_lock); 2531 2532 err = ip_addmulti_v6(grp, ill, connp->conn_zoneid, ilgstat, new_fmode, 2533 new_filter); 2534 if (err != 0) { 2535 /* 2536 * Restore the original filter state, or delete the 2537 * newly-created ilg. We need to look up the ilg 2538 * again, though, since we've not been holding the 2539 * conn_lock. 2540 */ 2541 mutex_enter(&connp->conn_lock); 2542 ilg = ilg_lookup_ill_v6(connp, grp, ill); 2543 ASSERT(ilg != NULL); 2544 if (ilgstat == ILGSTAT_NEW) { 2545 ilg_delete(connp, ilg, NULL); 2546 } else { 2547 ilg->ilg_fmode = orig_fmode; 2548 if (SLIST_IS_EMPTY(orig_filter)) { 2549 CLEAR_SLIST(ilg->ilg_filter); 2550 } else { 2551 /* 2552 * We didn't free the filter, even if we 2553 * were trying to make the source list empty; 2554 * so if orig_filter isn't empty, the ilg 2555 * must still have a filter alloc'd. 2556 */ 2557 l_copy(orig_filter, ilg->ilg_filter); 2558 } 2559 } 2560 mutex_exit(&connp->conn_lock); 2561 } 2562 2563 free_and_exit: 2564 l_free(orig_filter); 2565 l_free(new_filter); 2566 2567 return (err); 2568 } 2569 2570 /* 2571 * Process the SIOC[GS]MSFILTER and SIOC[GS]IPMSFILTER ioctls. 2572 */ 2573 /* ARGSUSED */ 2574 int 2575 ip_sioctl_msfilter(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, 2576 ip_ioctl_cmd_t *ipip, void *ifreq) 2577 { 2578 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 2579 /* existence verified in ip_wput_nondata() */ 2580 mblk_t *data_mp = mp->b_cont->b_cont; 2581 int datalen, err, cmd, minsize; 2582 uint_t expsize = 0; 2583 conn_t *connp; 2584 boolean_t isv6, is_v4only_api, getcmd; 2585 struct sockaddr_in *gsin; 2586 struct sockaddr_in6 *gsin6; 2587 ipaddr_t v4grp; 2588 in6_addr_t v6grp; 2589 struct group_filter *gf = NULL; 2590 struct ip_msfilter *imsf = NULL; 2591 mblk_t *ndp; 2592 2593 if (data_mp->b_cont != NULL) { 2594 if ((ndp = msgpullup(data_mp, -1)) == NULL) 2595 return (ENOMEM); 2596 freemsg(data_mp); 2597 data_mp = ndp; 2598 mp->b_cont->b_cont = data_mp; 2599 } 2600 2601 cmd = iocp->ioc_cmd; 2602 getcmd = (cmd == SIOCGIPMSFILTER || cmd == SIOCGMSFILTER); 2603 is_v4only_api = (cmd == SIOCGIPMSFILTER || cmd == SIOCSIPMSFILTER); 2604 minsize = (is_v4only_api) ? IP_MSFILTER_SIZE(0) : GROUP_FILTER_SIZE(0); 2605 datalen = MBLKL(data_mp); 2606 2607 if (datalen < minsize) 2608 return (EINVAL); 2609 2610 /* 2611 * now we know we have at least have the initial structure, 2612 * but need to check for the source list array. 2613 */ 2614 if (is_v4only_api) { 2615 imsf = (struct ip_msfilter *)data_mp->b_rptr; 2616 isv6 = B_FALSE; 2617 expsize = IP_MSFILTER_SIZE(imsf->imsf_numsrc); 2618 } else { 2619 gf = (struct group_filter *)data_mp->b_rptr; 2620 if (gf->gf_group.ss_family == AF_INET6) { 2621 gsin6 = (struct sockaddr_in6 *)&gf->gf_group; 2622 isv6 = !(IN6_IS_ADDR_V4MAPPED(&gsin6->sin6_addr)); 2623 } else { 2624 isv6 = B_FALSE; 2625 } 2626 expsize = GROUP_FILTER_SIZE(gf->gf_numsrc); 2627 } 2628 if (datalen < expsize) 2629 return (EINVAL); 2630 2631 connp = Q_TO_CONN(q); 2632 2633 /* operation not supported on the virtual network interface */ 2634 if (IS_VNI(ipif->ipif_ill)) 2635 return (EINVAL); 2636 2637 if (isv6) { 2638 ill_t *ill = ipif->ipif_ill; 2639 ill_refhold(ill); 2640 2641 gsin6 = (struct sockaddr_in6 *)&gf->gf_group; 2642 v6grp = gsin6->sin6_addr; 2643 if (getcmd) 2644 err = ip_get_srcfilter_v6(connp, gf, &v6grp, ill); 2645 else 2646 err = ip_set_srcfilter_v6(connp, gf, &v6grp, ill); 2647 2648 ill_refrele(ill); 2649 } else { 2650 boolean_t isv4mapped = B_FALSE; 2651 if (is_v4only_api) { 2652 v4grp = (ipaddr_t)imsf->imsf_multiaddr.s_addr; 2653 } else { 2654 if (gf->gf_group.ss_family == AF_INET) { 2655 gsin = (struct sockaddr_in *)&gf->gf_group; 2656 v4grp = (ipaddr_t)gsin->sin_addr.s_addr; 2657 } else { 2658 gsin6 = (struct sockaddr_in6 *)&gf->gf_group; 2659 IN6_V4MAPPED_TO_IPADDR(&gsin6->sin6_addr, 2660 v4grp); 2661 isv4mapped = B_TRUE; 2662 } 2663 } 2664 if (getcmd) 2665 err = ip_get_srcfilter(connp, gf, imsf, v4grp, ipif, 2666 isv4mapped); 2667 else 2668 err = ip_set_srcfilter(connp, gf, imsf, v4grp, ipif, 2669 isv4mapped); 2670 } 2671 2672 return (err); 2673 } 2674 2675 /* 2676 * Finds the ipif based on information in the ioctl headers. Needed to make 2677 * ip_process_ioctl() happy (it needs to know the ipif for IPI_WR-flagged 2678 * ioctls prior to calling the ioctl's handler function). 2679 */ 2680 int 2681 ip_extract_msfilter(queue_t *q, mblk_t *mp, const ip_ioctl_cmd_t *ipip, 2682 cmd_info_t *ci, ipsq_func_t func) 2683 { 2684 int cmd = ipip->ipi_cmd; 2685 int err = 0; 2686 conn_t *connp; 2687 ipif_t *ipif; 2688 /* caller has verified this mblk exists */ 2689 char *dbuf = (char *)mp->b_cont->b_cont->b_rptr; 2690 struct ip_msfilter *imsf; 2691 struct group_filter *gf; 2692 ipaddr_t v4addr, v4grp; 2693 in6_addr_t v6grp; 2694 uint32_t index; 2695 zoneid_t zoneid; 2696 ip_stack_t *ipst; 2697 2698 connp = Q_TO_CONN(q); 2699 zoneid = connp->conn_zoneid; 2700 ipst = connp->conn_netstack->netstack_ip; 2701 2702 /* don't allow multicast operations on a tcp conn */ 2703 if (IPCL_IS_TCP(connp)) 2704 return (ENOPROTOOPT); 2705 2706 if (cmd == SIOCSIPMSFILTER || cmd == SIOCGIPMSFILTER) { 2707 /* don't allow v4-specific ioctls on v6 socket */ 2708 if (connp->conn_af_isv6) 2709 return (EAFNOSUPPORT); 2710 2711 imsf = (struct ip_msfilter *)dbuf; 2712 v4addr = imsf->imsf_interface.s_addr; 2713 v4grp = imsf->imsf_multiaddr.s_addr; 2714 if (v4addr == INADDR_ANY) { 2715 ipif = ipif_lookup_group(v4grp, zoneid, ipst); 2716 if (ipif == NULL) 2717 err = EADDRNOTAVAIL; 2718 } else { 2719 ipif = ipif_lookup_addr(v4addr, NULL, zoneid, q, mp, 2720 func, &err, ipst); 2721 } 2722 } else { 2723 boolean_t isv6 = B_FALSE; 2724 gf = (struct group_filter *)dbuf; 2725 index = gf->gf_interface; 2726 if (gf->gf_group.ss_family == AF_INET6) { 2727 struct sockaddr_in6 *sin6; 2728 sin6 = (struct sockaddr_in6 *)&gf->gf_group; 2729 v6grp = sin6->sin6_addr; 2730 if (IN6_IS_ADDR_V4MAPPED(&v6grp)) 2731 IN6_V4MAPPED_TO_IPADDR(&v6grp, v4grp); 2732 else 2733 isv6 = B_TRUE; 2734 } else if (gf->gf_group.ss_family == AF_INET) { 2735 struct sockaddr_in *sin; 2736 sin = (struct sockaddr_in *)&gf->gf_group; 2737 v4grp = sin->sin_addr.s_addr; 2738 } else { 2739 return (EAFNOSUPPORT); 2740 } 2741 if (index == 0) { 2742 if (isv6) { 2743 ipif = ipif_lookup_group_v6(&v6grp, zoneid, 2744 ipst); 2745 } else { 2746 ipif = ipif_lookup_group(v4grp, zoneid, ipst); 2747 } 2748 if (ipif == NULL) 2749 err = EADDRNOTAVAIL; 2750 } else { 2751 ipif = ipif_lookup_on_ifindex(index, isv6, zoneid, 2752 q, mp, func, &err, ipst); 2753 } 2754 } 2755 2756 ci->ci_ipif = ipif; 2757 return (err); 2758 } 2759 2760 /* 2761 * The structures used for the SIOC*MSFILTER ioctls usually must be copied 2762 * in in two stages, as the first copyin tells us the size of the attached 2763 * source buffer. This function is called by ip_wput_nondata() after the 2764 * first copyin has completed; it figures out how big the second stage 2765 * needs to be, and kicks it off. 2766 * 2767 * In some cases (numsrc < 2), the second copyin is not needed as the 2768 * first one gets a complete structure containing 1 source addr. 2769 * 2770 * The function returns 0 if a second copyin has been started (i.e. there's 2771 * no more work to be done right now), or 1 if the second copyin is not 2772 * needed and ip_wput_nondata() can continue its processing. 2773 */ 2774 int 2775 ip_copyin_msfilter(queue_t *q, mblk_t *mp) 2776 { 2777 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 2778 int cmd = iocp->ioc_cmd; 2779 /* validity of this checked in ip_wput_nondata() */ 2780 mblk_t *mp1 = mp->b_cont->b_cont; 2781 int copysize = 0; 2782 int offset; 2783 2784 if (cmd == SIOCSMSFILTER || cmd == SIOCGMSFILTER) { 2785 struct group_filter *gf = (struct group_filter *)mp1->b_rptr; 2786 if (gf->gf_numsrc >= 2) { 2787 offset = sizeof (struct group_filter); 2788 copysize = GROUP_FILTER_SIZE(gf->gf_numsrc) - offset; 2789 } 2790 } else { 2791 struct ip_msfilter *imsf = (struct ip_msfilter *)mp1->b_rptr; 2792 if (imsf->imsf_numsrc >= 2) { 2793 offset = sizeof (struct ip_msfilter); 2794 copysize = IP_MSFILTER_SIZE(imsf->imsf_numsrc) - offset; 2795 } 2796 } 2797 if (copysize > 0) { 2798 mi_copyin_n(q, mp, offset, copysize); 2799 return (0); 2800 } 2801 return (1); 2802 } 2803 2804 /* 2805 * Handle the following optmgmt: 2806 * IP_ADD_MEMBERSHIP must not have joined already 2807 * MCAST_JOIN_GROUP must not have joined already 2808 * IP_BLOCK_SOURCE must have joined already 2809 * MCAST_BLOCK_SOURCE must have joined already 2810 * IP_JOIN_SOURCE_GROUP may have joined already 2811 * MCAST_JOIN_SOURCE_GROUP may have joined already 2812 * 2813 * fmode and src parameters may be used to determine which option is 2814 * being set, as follows (the IP_* and MCAST_* versions of each option 2815 * are functionally equivalent): 2816 * opt fmode src 2817 * IP_ADD_MEMBERSHIP MODE_IS_EXCLUDE INADDR_ANY 2818 * MCAST_JOIN_GROUP MODE_IS_EXCLUDE INADDR_ANY 2819 * IP_BLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 2820 * MCAST_BLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 2821 * IP_JOIN_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 2822 * MCAST_JOIN_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 2823 * 2824 * Changing the filter mode is not allowed; if a matching ilg already 2825 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 2826 * 2827 * Verifies that there is a source address of appropriate scope for 2828 * the group; if not, EADDRNOTAVAIL is returned. 2829 * 2830 * The interface to be used may be identified by an address or by an 2831 * index. A pointer to the index is passed; if it is NULL, use the 2832 * address, otherwise, use the index. 2833 */ 2834 int 2835 ip_opt_add_group(conn_t *connp, boolean_t checkonly, ipaddr_t group, 2836 ipaddr_t ifaddr, uint_t *ifindexp, mcast_record_t fmode, ipaddr_t src, 2837 mblk_t *first_mp) 2838 { 2839 ipif_t *ipif; 2840 ipsq_t *ipsq; 2841 int err = 0; 2842 ill_t *ill; 2843 2844 err = ip_opt_check(connp, group, src, ifaddr, ifindexp, first_mp, 2845 ip_restart_optmgmt, &ipif); 2846 if (err != 0) { 2847 if (err != EINPROGRESS) { 2848 ip1dbg(("ip_opt_add_group: no ipif for group 0x%x, " 2849 "ifaddr 0x%x, ifindex %d\n", ntohl(group), 2850 ntohl(ifaddr), (ifindexp == NULL) ? 0 : *ifindexp)); 2851 } 2852 return (err); 2853 } 2854 ASSERT(ipif != NULL); 2855 2856 ill = ipif->ipif_ill; 2857 /* Operation not supported on a virtual network interface */ 2858 if (IS_VNI(ill)) { 2859 ipif_refrele(ipif); 2860 return (EINVAL); 2861 } 2862 2863 if (checkonly) { 2864 /* 2865 * do not do operation, just pretend to - new T_CHECK 2866 * semantics. The error return case above if encountered 2867 * considered a good enough "check" here. 2868 */ 2869 ipif_refrele(ipif); 2870 return (0); 2871 } 2872 2873 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, ipsq, 2874 NEW_OP); 2875 2876 /* unspecified source addr => no source filtering */ 2877 err = ilg_add(connp, group, ipif, fmode, src); 2878 2879 IPSQ_EXIT(ipsq); 2880 2881 ipif_refrele(ipif); 2882 return (err); 2883 } 2884 2885 /* 2886 * Handle the following optmgmt: 2887 * IPV6_JOIN_GROUP must not have joined already 2888 * MCAST_JOIN_GROUP must not have joined already 2889 * MCAST_BLOCK_SOURCE must have joined already 2890 * MCAST_JOIN_SOURCE_GROUP may have joined already 2891 * 2892 * fmode and src parameters may be used to determine which option is 2893 * being set, as follows (IPV6_JOIN_GROUP and MCAST_JOIN_GROUP options 2894 * are functionally equivalent): 2895 * opt fmode v6src 2896 * IPV6_JOIN_GROUP MODE_IS_EXCLUDE unspecified 2897 * MCAST_JOIN_GROUP MODE_IS_EXCLUDE unspecified 2898 * MCAST_BLOCK_SOURCE MODE_IS_EXCLUDE v6 addr 2899 * MCAST_JOIN_SOURCE_GROUP MODE_IS_INCLUDE v6 addr 2900 * 2901 * Changing the filter mode is not allowed; if a matching ilg already 2902 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 2903 * 2904 * Verifies that there is a source address of appropriate scope for 2905 * the group; if not, EADDRNOTAVAIL is returned. 2906 * 2907 * Handles IPv4-mapped IPv6 multicast addresses by associating them 2908 * with the link-local ipif. Assumes that if v6group is v4-mapped, 2909 * v6src is also v4-mapped. 2910 */ 2911 int 2912 ip_opt_add_group_v6(conn_t *connp, boolean_t checkonly, 2913 const in6_addr_t *v6group, int ifindex, mcast_record_t fmode, 2914 const in6_addr_t *v6src, mblk_t *first_mp) 2915 { 2916 ill_t *ill; 2917 ipif_t *ipif; 2918 char buf[INET6_ADDRSTRLEN]; 2919 ipaddr_t v4group, v4src; 2920 boolean_t isv6; 2921 ipsq_t *ipsq; 2922 int err; 2923 2924 err = ip_opt_check_v6(connp, v6group, &v4group, v6src, &v4src, &isv6, 2925 ifindex, first_mp, ip_restart_optmgmt, &ill, &ipif); 2926 if (err != 0) { 2927 if (err != EINPROGRESS) { 2928 ip1dbg(("ip_opt_add_group_v6: no ill for group %s/" 2929 "index %d\n", inet_ntop(AF_INET6, v6group, buf, 2930 sizeof (buf)), ifindex)); 2931 } 2932 return (err); 2933 } 2934 ASSERT((!isv6 && ipif != NULL) || (isv6 && ill != NULL)); 2935 2936 /* operation is not supported on the virtual network interface */ 2937 if (isv6) { 2938 if (IS_VNI(ill)) { 2939 ill_refrele(ill); 2940 return (EINVAL); 2941 } 2942 } else { 2943 if (IS_VNI(ipif->ipif_ill)) { 2944 ipif_refrele(ipif); 2945 return (EINVAL); 2946 } 2947 } 2948 2949 if (checkonly) { 2950 /* 2951 * do not do operation, just pretend to - new T_CHECK 2952 * semantics. The error return case above if encountered 2953 * considered a good enough "check" here. 2954 */ 2955 if (isv6) 2956 ill_refrele(ill); 2957 else 2958 ipif_refrele(ipif); 2959 return (0); 2960 } 2961 2962 if (!isv6) { 2963 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, 2964 ipsq, NEW_OP); 2965 err = ilg_add(connp, v4group, ipif, fmode, v4src); 2966 IPSQ_EXIT(ipsq); 2967 ipif_refrele(ipif); 2968 } else { 2969 IPSQ_ENTER_ILL(ill, connp, first_mp, ip_restart_optmgmt, 2970 ipsq, NEW_OP); 2971 err = ilg_add_v6(connp, v6group, ill, fmode, v6src); 2972 IPSQ_EXIT(ipsq); 2973 ill_refrele(ill); 2974 } 2975 2976 return (err); 2977 } 2978 2979 static int 2980 ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group, ipif_t *ipif, 2981 mcast_record_t fmode, ipaddr_t src) 2982 { 2983 ilg_t *ilg; 2984 in6_addr_t v6src; 2985 boolean_t leaving = B_FALSE; 2986 2987 ASSERT(IAM_WRITER_IPIF(ipif)); 2988 2989 /* 2990 * The ilg is valid only while we hold the conn lock. Once we drop 2991 * the lock, another thread can locate another ilg on this connp, 2992 * but on a different ipif, and delete it, and cause the ilg array 2993 * to be reallocated and copied. Hence do the ilg_delete before 2994 * dropping the lock. 2995 */ 2996 mutex_enter(&connp->conn_lock); 2997 ilg = ilg_lookup_ipif(connp, group, ipif); 2998 if ((ilg == NULL) || (ilg->ilg_flags & ILG_DELETED)) { 2999 mutex_exit(&connp->conn_lock); 3000 return (EADDRNOTAVAIL); 3001 } 3002 3003 /* 3004 * Decide if we're actually deleting the ilg or just removing a 3005 * source filter address; if just removing an addr, make sure we 3006 * aren't trying to change the filter mode, and that the addr is 3007 * actually in our filter list already. If we're removing the 3008 * last src in an include list, just delete the ilg. 3009 */ 3010 if (src == INADDR_ANY) { 3011 v6src = ipv6_all_zeros; 3012 leaving = B_TRUE; 3013 } else { 3014 int err = 0; 3015 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 3016 if (fmode != ilg->ilg_fmode) 3017 err = EINVAL; 3018 else if (ilg->ilg_filter == NULL || 3019 !list_has_addr(ilg->ilg_filter, &v6src)) 3020 err = EADDRNOTAVAIL; 3021 if (err != 0) { 3022 mutex_exit(&connp->conn_lock); 3023 return (err); 3024 } 3025 if (fmode == MODE_IS_INCLUDE && 3026 ilg->ilg_filter->sl_numsrc == 1) { 3027 v6src = ipv6_all_zeros; 3028 leaving = B_TRUE; 3029 } 3030 } 3031 3032 ilg_delete(connp, ilg, &v6src); 3033 mutex_exit(&connp->conn_lock); 3034 3035 (void) ip_delmulti(group, ipif, B_FALSE, leaving); 3036 return (0); 3037 } 3038 3039 static int 3040 ip_opt_delete_group_excl_v6(conn_t *connp, const in6_addr_t *v6group, 3041 ill_t *ill, mcast_record_t fmode, const in6_addr_t *v6src) 3042 { 3043 ilg_t *ilg; 3044 boolean_t leaving = B_TRUE; 3045 3046 ASSERT(IAM_WRITER_ILL(ill)); 3047 3048 mutex_enter(&connp->conn_lock); 3049 ilg = ilg_lookup_ill_v6(connp, v6group, ill); 3050 if ((ilg == NULL) || (ilg->ilg_flags & ILG_DELETED)) { 3051 mutex_exit(&connp->conn_lock); 3052 return (EADDRNOTAVAIL); 3053 } 3054 3055 /* 3056 * Decide if we're actually deleting the ilg or just removing a 3057 * source filter address; if just removing an addr, make sure we 3058 * aren't trying to change the filter mode, and that the addr is 3059 * actually in our filter list already. If we're removing the 3060 * last src in an include list, just delete the ilg. 3061 */ 3062 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 3063 int err = 0; 3064 if (fmode != ilg->ilg_fmode) 3065 err = EINVAL; 3066 else if (ilg->ilg_filter == NULL || 3067 !list_has_addr(ilg->ilg_filter, v6src)) 3068 err = EADDRNOTAVAIL; 3069 if (err != 0) { 3070 mutex_exit(&connp->conn_lock); 3071 return (err); 3072 } 3073 if (fmode == MODE_IS_INCLUDE && 3074 ilg->ilg_filter->sl_numsrc == 1) 3075 v6src = NULL; 3076 else 3077 leaving = B_FALSE; 3078 } 3079 3080 ilg_delete(connp, ilg, v6src); 3081 mutex_exit(&connp->conn_lock); 3082 (void) ip_delmulti_v6(v6group, ill, connp->conn_zoneid, B_FALSE, 3083 leaving); 3084 3085 return (0); 3086 } 3087 3088 /* 3089 * Handle the following optmgmt: 3090 * IP_DROP_MEMBERSHIP will leave 3091 * MCAST_LEAVE_GROUP will leave 3092 * IP_UNBLOCK_SOURCE will not leave 3093 * MCAST_UNBLOCK_SOURCE will not leave 3094 * IP_LEAVE_SOURCE_GROUP may leave (if leaving last source) 3095 * MCAST_LEAVE_SOURCE_GROUP may leave (if leaving last source) 3096 * 3097 * fmode and src parameters may be used to determine which option is 3098 * being set, as follows (the IP_* and MCAST_* versions of each option 3099 * are functionally equivalent): 3100 * opt fmode src 3101 * IP_DROP_MEMBERSHIP MODE_IS_INCLUDE INADDR_ANY 3102 * MCAST_LEAVE_GROUP MODE_IS_INCLUDE INADDR_ANY 3103 * IP_UNBLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 3104 * MCAST_UNBLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 3105 * IP_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 3106 * MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 3107 * 3108 * Changing the filter mode is not allowed; if a matching ilg already 3109 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 3110 * 3111 * The interface to be used may be identified by an address or by an 3112 * index. A pointer to the index is passed; if it is NULL, use the 3113 * address, otherwise, use the index. 3114 */ 3115 int 3116 ip_opt_delete_group(conn_t *connp, boolean_t checkonly, ipaddr_t group, 3117 ipaddr_t ifaddr, uint_t *ifindexp, mcast_record_t fmode, ipaddr_t src, 3118 mblk_t *first_mp) 3119 { 3120 ipif_t *ipif; 3121 ipsq_t *ipsq; 3122 int err; 3123 ill_t *ill; 3124 3125 err = ip_opt_check(connp, group, src, ifaddr, ifindexp, first_mp, 3126 ip_restart_optmgmt, &ipif); 3127 if (err != 0) { 3128 if (err != EINPROGRESS) { 3129 ip1dbg(("ip_opt_delete_group: no ipif for group " 3130 "0x%x, ifaddr 0x%x\n", 3131 (int)ntohl(group), (int)ntohl(ifaddr))); 3132 } 3133 return (err); 3134 } 3135 ASSERT(ipif != NULL); 3136 3137 ill = ipif->ipif_ill; 3138 /* Operation not supported on a virtual network interface */ 3139 if (IS_VNI(ill)) { 3140 ipif_refrele(ipif); 3141 return (EINVAL); 3142 } 3143 3144 if (checkonly) { 3145 /* 3146 * do not do operation, just pretend to - new T_CHECK 3147 * semantics. The error return case above if encountered 3148 * considered a good enough "check" here. 3149 */ 3150 ipif_refrele(ipif); 3151 return (0); 3152 } 3153 3154 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, ipsq, 3155 NEW_OP); 3156 err = ip_opt_delete_group_excl(connp, group, ipif, fmode, src); 3157 IPSQ_EXIT(ipsq); 3158 3159 ipif_refrele(ipif); 3160 return (err); 3161 } 3162 3163 /* 3164 * Handle the following optmgmt: 3165 * IPV6_LEAVE_GROUP will leave 3166 * MCAST_LEAVE_GROUP will leave 3167 * MCAST_UNBLOCK_SOURCE will not leave 3168 * MCAST_LEAVE_SOURCE_GROUP may leave (if leaving last source) 3169 * 3170 * fmode and src parameters may be used to determine which option is 3171 * being set, as follows (IPV6_LEAVE_GROUP and MCAST_LEAVE_GROUP options 3172 * are functionally equivalent): 3173 * opt fmode v6src 3174 * IPV6_LEAVE_GROUP MODE_IS_INCLUDE unspecified 3175 * MCAST_LEAVE_GROUP MODE_IS_INCLUDE unspecified 3176 * MCAST_UNBLOCK_SOURCE MODE_IS_EXCLUDE v6 addr 3177 * MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE v6 addr 3178 * 3179 * Changing the filter mode is not allowed; if a matching ilg already 3180 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 3181 * 3182 * Handles IPv4-mapped IPv6 multicast addresses by associating them 3183 * with the link-local ipif. Assumes that if v6group is v4-mapped, 3184 * v6src is also v4-mapped. 3185 */ 3186 int 3187 ip_opt_delete_group_v6(conn_t *connp, boolean_t checkonly, 3188 const in6_addr_t *v6group, int ifindex, mcast_record_t fmode, 3189 const in6_addr_t *v6src, mblk_t *first_mp) 3190 { 3191 ill_t *ill; 3192 ipif_t *ipif; 3193 char buf[INET6_ADDRSTRLEN]; 3194 ipaddr_t v4group, v4src; 3195 boolean_t isv6; 3196 ipsq_t *ipsq; 3197 int err; 3198 3199 err = ip_opt_check_v6(connp, v6group, &v4group, v6src, &v4src, &isv6, 3200 ifindex, first_mp, ip_restart_optmgmt, &ill, &ipif); 3201 if (err != 0) { 3202 if (err != EINPROGRESS) { 3203 ip1dbg(("ip_opt_delete_group_v6: no ill for group %s/" 3204 "index %d\n", inet_ntop(AF_INET6, v6group, buf, 3205 sizeof (buf)), ifindex)); 3206 } 3207 return (err); 3208 } 3209 ASSERT((isv6 && ill != NULL) || (!isv6 && ipif != NULL)); 3210 3211 /* operation is not supported on the virtual network interface */ 3212 if (isv6) { 3213 if (IS_VNI(ill)) { 3214 ill_refrele(ill); 3215 return (EINVAL); 3216 } 3217 } else { 3218 if (IS_VNI(ipif->ipif_ill)) { 3219 ipif_refrele(ipif); 3220 return (EINVAL); 3221 } 3222 } 3223 3224 if (checkonly) { 3225 /* 3226 * do not do operation, just pretend to - new T_CHECK 3227 * semantics. The error return case above if encountered 3228 * considered a good enough "check" here. 3229 */ 3230 if (isv6) 3231 ill_refrele(ill); 3232 else 3233 ipif_refrele(ipif); 3234 return (0); 3235 } 3236 3237 if (!isv6) { 3238 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, 3239 ipsq, NEW_OP); 3240 err = ip_opt_delete_group_excl(connp, v4group, ipif, fmode, 3241 v4src); 3242 IPSQ_EXIT(ipsq); 3243 ipif_refrele(ipif); 3244 } else { 3245 IPSQ_ENTER_ILL(ill, connp, first_mp, ip_restart_optmgmt, 3246 ipsq, NEW_OP); 3247 err = ip_opt_delete_group_excl_v6(connp, v6group, ill, fmode, 3248 v6src); 3249 IPSQ_EXIT(ipsq); 3250 ill_refrele(ill); 3251 } 3252 3253 return (err); 3254 } 3255 3256 /* 3257 * Group mgmt for upper conn that passes things down 3258 * to the interface multicast list (and DLPI) 3259 * These routines can handle new style options that specify an interface name 3260 * as opposed to an interface address (needed for general handling of 3261 * unnumbered interfaces.) 3262 */ 3263 3264 /* 3265 * Add a group to an upper conn group data structure and pass things down 3266 * to the interface multicast list (and DLPI) 3267 */ 3268 static int 3269 ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif, mcast_record_t fmode, 3270 ipaddr_t src) 3271 { 3272 int error = 0; 3273 ill_t *ill; 3274 ilg_t *ilg; 3275 ilg_stat_t ilgstat; 3276 slist_t *new_filter = NULL; 3277 int new_fmode; 3278 3279 ASSERT(IAM_WRITER_IPIF(ipif)); 3280 3281 ill = ipif->ipif_ill; 3282 3283 if (!(ill->ill_flags & ILLF_MULTICAST)) 3284 return (EADDRNOTAVAIL); 3285 3286 /* 3287 * conn_ilg[] is protected by conn_lock. Need to hold the conn_lock 3288 * to walk the conn_ilg[] list in ilg_lookup_ipif(); also needed to 3289 * serialize 2 threads doing join (sock, group1, hme0:0) and 3290 * (sock, group2, hme1:0) where hme0 and hme1 map to different ipsqs, 3291 * but both operations happen on the same conn. 3292 */ 3293 mutex_enter(&connp->conn_lock); 3294 ilg = ilg_lookup_ipif(connp, group, ipif); 3295 3296 /* 3297 * Depending on the option we're handling, may or may not be okay 3298 * if group has already been added. Figure out our rules based 3299 * on fmode and src params. Also make sure there's enough room 3300 * in the filter if we're adding a source to an existing filter. 3301 */ 3302 if (src == INADDR_ANY) { 3303 /* we're joining for all sources, must not have joined */ 3304 if (ilg != NULL) 3305 error = EADDRINUSE; 3306 } else { 3307 if (fmode == MODE_IS_EXCLUDE) { 3308 /* (excl {addr}) => block source, must have joined */ 3309 if (ilg == NULL) 3310 error = EADDRNOTAVAIL; 3311 } 3312 /* (incl {addr}) => join source, may have joined */ 3313 3314 if (ilg != NULL && 3315 SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE) 3316 error = ENOBUFS; 3317 } 3318 if (error != 0) { 3319 mutex_exit(&connp->conn_lock); 3320 return (error); 3321 } 3322 3323 ASSERT(!(ipif->ipif_state_flags & IPIF_CONDEMNED)); 3324 3325 /* 3326 * Alloc buffer to copy new state into (see below) before 3327 * we make any changes, so we can bail if it fails. 3328 */ 3329 if ((new_filter = l_alloc()) == NULL) { 3330 mutex_exit(&connp->conn_lock); 3331 return (ENOMEM); 3332 } 3333 3334 if (ilg == NULL) { 3335 ilgstat = ILGSTAT_NEW; 3336 if ((ilg = conn_ilg_alloc(connp, &error)) == NULL) { 3337 mutex_exit(&connp->conn_lock); 3338 l_free(new_filter); 3339 return (error); 3340 } 3341 if (src != INADDR_ANY) { 3342 ilg->ilg_filter = l_alloc(); 3343 if (ilg->ilg_filter == NULL) { 3344 ilg_delete(connp, ilg, NULL); 3345 mutex_exit(&connp->conn_lock); 3346 l_free(new_filter); 3347 return (ENOMEM); 3348 } 3349 ilg->ilg_filter->sl_numsrc = 1; 3350 IN6_IPADDR_TO_V4MAPPED(src, 3351 &ilg->ilg_filter->sl_addr[0]); 3352 } 3353 if (group == INADDR_ANY) { 3354 ilg->ilg_v6group = ipv6_all_zeros; 3355 } else { 3356 IN6_IPADDR_TO_V4MAPPED(group, &ilg->ilg_v6group); 3357 } 3358 ilg->ilg_ipif = ipif; 3359 ilg->ilg_ill = NULL; 3360 ilg->ilg_fmode = fmode; 3361 } else { 3362 int index; 3363 in6_addr_t v6src; 3364 ilgstat = ILGSTAT_CHANGE; 3365 if (ilg->ilg_fmode != fmode || src == INADDR_ANY) { 3366 mutex_exit(&connp->conn_lock); 3367 l_free(new_filter); 3368 return (EINVAL); 3369 } 3370 if (ilg->ilg_filter == NULL) { 3371 ilg->ilg_filter = l_alloc(); 3372 if (ilg->ilg_filter == NULL) { 3373 mutex_exit(&connp->conn_lock); 3374 l_free(new_filter); 3375 return (ENOMEM); 3376 } 3377 } 3378 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 3379 if (list_has_addr(ilg->ilg_filter, &v6src)) { 3380 mutex_exit(&connp->conn_lock); 3381 l_free(new_filter); 3382 return (EADDRNOTAVAIL); 3383 } 3384 index = ilg->ilg_filter->sl_numsrc++; 3385 ilg->ilg_filter->sl_addr[index] = v6src; 3386 } 3387 3388 /* 3389 * Save copy of ilg's filter state to pass to other functions, 3390 * so we can release conn_lock now. 3391 */ 3392 new_fmode = ilg->ilg_fmode; 3393 l_copy(ilg->ilg_filter, new_filter); 3394 3395 mutex_exit(&connp->conn_lock); 3396 3397 error = ip_addmulti(group, ipif, ilgstat, new_fmode, new_filter); 3398 if (error != 0) { 3399 /* 3400 * Need to undo what we did before calling ip_addmulti()! 3401 * Must look up the ilg again since we've not been holding 3402 * conn_lock. 3403 */ 3404 in6_addr_t v6src; 3405 if (ilgstat == ILGSTAT_NEW) 3406 v6src = ipv6_all_zeros; 3407 else 3408 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 3409 mutex_enter(&connp->conn_lock); 3410 ilg = ilg_lookup_ipif(connp, group, ipif); 3411 ASSERT(ilg != NULL); 3412 ilg_delete(connp, ilg, &v6src); 3413 mutex_exit(&connp->conn_lock); 3414 l_free(new_filter); 3415 return (error); 3416 } 3417 3418 l_free(new_filter); 3419 return (0); 3420 } 3421 3422 static int 3423 ilg_add_v6(conn_t *connp, const in6_addr_t *v6group, ill_t *ill, 3424 mcast_record_t fmode, const in6_addr_t *v6src) 3425 { 3426 int error = 0; 3427 ilg_t *ilg; 3428 ilg_stat_t ilgstat; 3429 slist_t *new_filter = NULL; 3430 int new_fmode; 3431 3432 ASSERT(IAM_WRITER_ILL(ill)); 3433 3434 if (!(ill->ill_flags & ILLF_MULTICAST)) 3435 return (EADDRNOTAVAIL); 3436 3437 /* 3438 * conn_lock protects the ilg list. Serializes 2 threads doing 3439 * join (sock, group1, hme0) and (sock, group2, hme1) where hme0 3440 * and hme1 map to different ipsq's, but both operations happen 3441 * on the same conn. 3442 */ 3443 mutex_enter(&connp->conn_lock); 3444 3445 ilg = ilg_lookup_ill_v6(connp, v6group, ill); 3446 3447 /* 3448 * Depending on the option we're handling, may or may not be okay 3449 * if group has already been added. Figure out our rules based 3450 * on fmode and src params. Also make sure there's enough room 3451 * in the filter if we're adding a source to an existing filter. 3452 */ 3453 if (IN6_IS_ADDR_UNSPECIFIED(v6src)) { 3454 /* we're joining for all sources, must not have joined */ 3455 if (ilg != NULL) 3456 error = EADDRINUSE; 3457 } else { 3458 if (fmode == MODE_IS_EXCLUDE) { 3459 /* (excl {addr}) => block source, must have joined */ 3460 if (ilg == NULL) 3461 error = EADDRNOTAVAIL; 3462 } 3463 /* (incl {addr}) => join source, may have joined */ 3464 3465 if (ilg != NULL && 3466 SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE) 3467 error = ENOBUFS; 3468 } 3469 if (error != 0) { 3470 mutex_exit(&connp->conn_lock); 3471 return (error); 3472 } 3473 3474 /* 3475 * Alloc buffer to copy new state into (see below) before 3476 * we make any changes, so we can bail if it fails. 3477 */ 3478 if ((new_filter = l_alloc()) == NULL) { 3479 mutex_exit(&connp->conn_lock); 3480 return (ENOMEM); 3481 } 3482 3483 if (ilg == NULL) { 3484 if ((ilg = conn_ilg_alloc(connp, &error)) == NULL) { 3485 mutex_exit(&connp->conn_lock); 3486 l_free(new_filter); 3487 return (error); 3488 } 3489 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 3490 ilg->ilg_filter = l_alloc(); 3491 if (ilg->ilg_filter == NULL) { 3492 ilg_delete(connp, ilg, NULL); 3493 mutex_exit(&connp->conn_lock); 3494 l_free(new_filter); 3495 return (ENOMEM); 3496 } 3497 ilg->ilg_filter->sl_numsrc = 1; 3498 ilg->ilg_filter->sl_addr[0] = *v6src; 3499 } 3500 ilgstat = ILGSTAT_NEW; 3501 ilg->ilg_v6group = *v6group; 3502 ilg->ilg_fmode = fmode; 3503 ilg->ilg_ipif = NULL; 3504 ilg->ilg_ill = ill; 3505 } else { 3506 int index; 3507 if (ilg->ilg_fmode != fmode || IN6_IS_ADDR_UNSPECIFIED(v6src)) { 3508 mutex_exit(&connp->conn_lock); 3509 l_free(new_filter); 3510 return (EINVAL); 3511 } 3512 if (ilg->ilg_filter == NULL) { 3513 ilg->ilg_filter = l_alloc(); 3514 if (ilg->ilg_filter == NULL) { 3515 mutex_exit(&connp->conn_lock); 3516 l_free(new_filter); 3517 return (ENOMEM); 3518 } 3519 } 3520 if (list_has_addr(ilg->ilg_filter, v6src)) { 3521 mutex_exit(&connp->conn_lock); 3522 l_free(new_filter); 3523 return (EADDRNOTAVAIL); 3524 } 3525 ilgstat = ILGSTAT_CHANGE; 3526 index = ilg->ilg_filter->sl_numsrc++; 3527 ilg->ilg_filter->sl_addr[index] = *v6src; 3528 } 3529 3530 /* 3531 * Save copy of ilg's filter state to pass to other functions, 3532 * so we can release conn_lock now. 3533 */ 3534 new_fmode = ilg->ilg_fmode; 3535 l_copy(ilg->ilg_filter, new_filter); 3536 3537 mutex_exit(&connp->conn_lock); 3538 3539 /* 3540 * Now update the ill. We wait to do this until after the ilg 3541 * has been updated because we need to update the src filter 3542 * info for the ill, which involves looking at the status of 3543 * all the ilgs associated with this group/interface pair. 3544 */ 3545 error = ip_addmulti_v6(v6group, ill, connp->conn_zoneid, ilgstat, 3546 new_fmode, new_filter); 3547 if (error != 0) { 3548 /* 3549 * But because we waited, we have to undo the ilg update 3550 * if ip_addmulti_v6() fails. We also must lookup ilg 3551 * again, since we've not been holding conn_lock. 3552 */ 3553 in6_addr_t delsrc = 3554 (ilgstat == ILGSTAT_NEW) ? ipv6_all_zeros : *v6src; 3555 mutex_enter(&connp->conn_lock); 3556 ilg = ilg_lookup_ill_v6(connp, v6group, ill); 3557 ASSERT(ilg != NULL); 3558 ilg_delete(connp, ilg, &delsrc); 3559 mutex_exit(&connp->conn_lock); 3560 l_free(new_filter); 3561 return (error); 3562 } 3563 3564 l_free(new_filter); 3565 3566 return (0); 3567 } 3568 3569 /* 3570 * Find an IPv4 ilg matching group, ill and source 3571 */ 3572 ilg_t * 3573 ilg_lookup_ill_withsrc(conn_t *connp, ipaddr_t group, ipaddr_t src, ill_t *ill) 3574 { 3575 in6_addr_t v6group, v6src; 3576 int i; 3577 boolean_t isinlist; 3578 ilg_t *ilg; 3579 ipif_t *ipif; 3580 ill_t *ilg_ill; 3581 3582 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3583 3584 /* 3585 * INADDR_ANY is represented as the IPv6 unspecified addr. 3586 */ 3587 if (group == INADDR_ANY) 3588 v6group = ipv6_all_zeros; 3589 else 3590 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 3591 3592 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3593 ilg = &connp->conn_ilg[i]; 3594 if ((ipif = ilg->ilg_ipif) == NULL || 3595 (ilg->ilg_flags & ILG_DELETED) != 0) 3596 continue; 3597 ASSERT(ilg->ilg_ill == NULL); 3598 ilg_ill = ipif->ipif_ill; 3599 ASSERT(!ilg_ill->ill_isv6); 3600 if (IS_ON_SAME_LAN(ilg_ill, ill) && 3601 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, &v6group)) { 3602 if (SLIST_IS_EMPTY(ilg->ilg_filter)) { 3603 /* no source filter, so this is a match */ 3604 return (ilg); 3605 } 3606 break; 3607 } 3608 } 3609 if (i == connp->conn_ilg_inuse) 3610 return (NULL); 3611 3612 /* 3613 * we have an ilg with matching ill and group; but 3614 * the ilg has a source list that we must check. 3615 */ 3616 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 3617 isinlist = B_FALSE; 3618 for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) { 3619 if (IN6_ARE_ADDR_EQUAL(&v6src, &ilg->ilg_filter->sl_addr[i])) { 3620 isinlist = B_TRUE; 3621 break; 3622 } 3623 } 3624 3625 if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) || 3626 (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE)) 3627 return (ilg); 3628 3629 return (NULL); 3630 } 3631 3632 /* 3633 * Find an IPv6 ilg matching group, ill, and source 3634 */ 3635 ilg_t * 3636 ilg_lookup_ill_withsrc_v6(conn_t *connp, const in6_addr_t *v6group, 3637 const in6_addr_t *v6src, ill_t *ill) 3638 { 3639 int i; 3640 boolean_t isinlist; 3641 ilg_t *ilg; 3642 ill_t *ilg_ill; 3643 3644 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3645 3646 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3647 ilg = &connp->conn_ilg[i]; 3648 if ((ilg_ill = ilg->ilg_ill) == NULL || 3649 (ilg->ilg_flags & ILG_DELETED) != 0) 3650 continue; 3651 ASSERT(ilg->ilg_ipif == NULL); 3652 ASSERT(ilg_ill->ill_isv6); 3653 if (IS_ON_SAME_LAN(ilg_ill, ill) && 3654 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) { 3655 if (SLIST_IS_EMPTY(ilg->ilg_filter)) { 3656 /* no source filter, so this is a match */ 3657 return (ilg); 3658 } 3659 break; 3660 } 3661 } 3662 if (i == connp->conn_ilg_inuse) 3663 return (NULL); 3664 3665 /* 3666 * we have an ilg with matching ill and group; but 3667 * the ilg has a source list that we must check. 3668 */ 3669 isinlist = B_FALSE; 3670 for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) { 3671 if (IN6_ARE_ADDR_EQUAL(v6src, &ilg->ilg_filter->sl_addr[i])) { 3672 isinlist = B_TRUE; 3673 break; 3674 } 3675 } 3676 3677 if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) || 3678 (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE)) 3679 return (ilg); 3680 3681 return (NULL); 3682 } 3683 3684 /* 3685 * Find an IPv6 ilg matching group and ill 3686 */ 3687 ilg_t * 3688 ilg_lookup_ill_v6(conn_t *connp, const in6_addr_t *v6group, ill_t *ill) 3689 { 3690 ilg_t *ilg; 3691 int i; 3692 ill_t *mem_ill; 3693 3694 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3695 3696 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3697 ilg = &connp->conn_ilg[i]; 3698 if ((mem_ill = ilg->ilg_ill) == NULL || 3699 (ilg->ilg_flags & ILG_DELETED) != 0) 3700 continue; 3701 ASSERT(ilg->ilg_ipif == NULL); 3702 ASSERT(mem_ill->ill_isv6); 3703 if (mem_ill == ill && 3704 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) 3705 return (ilg); 3706 } 3707 return (NULL); 3708 } 3709 3710 /* 3711 * Find an IPv4 ilg matching group and ipif 3712 */ 3713 static ilg_t * 3714 ilg_lookup_ipif(conn_t *connp, ipaddr_t group, ipif_t *ipif) 3715 { 3716 in6_addr_t v6group; 3717 int i; 3718 ilg_t *ilg; 3719 3720 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3721 ASSERT(!ipif->ipif_ill->ill_isv6); 3722 3723 if (group == INADDR_ANY) 3724 v6group = ipv6_all_zeros; 3725 else 3726 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 3727 3728 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3729 ilg = &connp->conn_ilg[i]; 3730 if ((ilg->ilg_flags & ILG_DELETED) == 0 && 3731 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, &v6group) && 3732 ilg->ilg_ipif == ipif) 3733 return (ilg); 3734 } 3735 return (NULL); 3736 } 3737 3738 /* 3739 * If a source address is passed in (src != NULL and src is not 3740 * unspecified), remove the specified src addr from the given ilg's 3741 * filter list, else delete the ilg. 3742 */ 3743 static void 3744 ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src) 3745 { 3746 int i; 3747 3748 ASSERT((ilg->ilg_ipif != NULL) ^ (ilg->ilg_ill != NULL)); 3749 ASSERT(ilg->ilg_ipif == NULL || IAM_WRITER_IPIF(ilg->ilg_ipif)); 3750 ASSERT(ilg->ilg_ill == NULL || IAM_WRITER_ILL(ilg->ilg_ill)); 3751 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3752 ASSERT(!(ilg->ilg_flags & ILG_DELETED)); 3753 3754 if (src == NULL || IN6_IS_ADDR_UNSPECIFIED(src)) { 3755 if (connp->conn_ilg_walker_cnt != 0) { 3756 ilg->ilg_flags |= ILG_DELETED; 3757 return; 3758 } 3759 3760 FREE_SLIST(ilg->ilg_filter); 3761 3762 i = ilg - &connp->conn_ilg[0]; 3763 ASSERT(i >= 0 && i < connp->conn_ilg_inuse); 3764 3765 /* Move other entries up one step */ 3766 connp->conn_ilg_inuse--; 3767 for (; i < connp->conn_ilg_inuse; i++) 3768 connp->conn_ilg[i] = connp->conn_ilg[i+1]; 3769 3770 if (connp->conn_ilg_inuse == 0) { 3771 mi_free((char *)connp->conn_ilg); 3772 connp->conn_ilg = NULL; 3773 cv_broadcast(&connp->conn_refcv); 3774 } 3775 } else { 3776 l_remove(ilg->ilg_filter, src); 3777 } 3778 } 3779 3780 /* 3781 * Called from conn close. No new ilg can be added or removed. 3782 * because CONN_CLOSING has been set by ip_close. ilg_add / ilg_delete 3783 * will return error if conn has started closing. 3784 */ 3785 void 3786 ilg_delete_all(conn_t *connp) 3787 { 3788 int i; 3789 ipif_t *ipif = NULL; 3790 ill_t *ill = NULL; 3791 ilg_t *ilg; 3792 in6_addr_t v6group; 3793 boolean_t success; 3794 ipsq_t *ipsq; 3795 3796 mutex_enter(&connp->conn_lock); 3797 retry: 3798 ILG_WALKER_HOLD(connp); 3799 for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) { 3800 ilg = &connp->conn_ilg[i]; 3801 /* 3802 * Since this walk is not atomic (we drop the 3803 * conn_lock and wait in ipsq_enter) we need 3804 * to check for the ILG_DELETED flag. 3805 */ 3806 if (ilg->ilg_flags & ILG_DELETED) 3807 continue; 3808 3809 if (IN6_IS_ADDR_V4MAPPED(&ilg->ilg_v6group)) { 3810 ipif = ilg->ilg_ipif; 3811 ill = ipif->ipif_ill; 3812 } else { 3813 ipif = NULL; 3814 ill = ilg->ilg_ill; 3815 } 3816 3817 /* 3818 * We may not be able to refhold the ill if the ill/ipif 3819 * is changing. But we need to make sure that the ill will 3820 * not vanish. So we just bump up the ill_waiter count. 3821 * If we are unable to do even that, then the ill is closing, 3822 * in which case the unplumb thread will handle the cleanup, 3823 * and we move on to the next ilg. 3824 */ 3825 if (!ill_waiter_inc(ill)) 3826 continue; 3827 3828 mutex_exit(&connp->conn_lock); 3829 /* 3830 * To prevent deadlock between ill close which waits inside 3831 * the perimeter, and conn close, ipsq_enter returns error, 3832 * the moment ILL_CONDEMNED is set, in which case ill close 3833 * takes responsibility to cleanup the ilgs. Note that we 3834 * have not yet set condemned flag, otherwise the conn can't 3835 * be refheld for cleanup by those routines and it would be 3836 * a mutual deadlock. 3837 */ 3838 success = ipsq_enter(ill, B_FALSE, NEW_OP); 3839 ipsq = ill->ill_phyint->phyint_ipsq; 3840 ill_waiter_dcr(ill); 3841 mutex_enter(&connp->conn_lock); 3842 if (!success) 3843 continue; 3844 3845 /* 3846 * Move on if the ilg was deleted while conn_lock was dropped. 3847 */ 3848 if (ilg->ilg_flags & ILG_DELETED) { 3849 mutex_exit(&connp->conn_lock); 3850 ipsq_exit(ipsq); 3851 mutex_enter(&connp->conn_lock); 3852 continue; 3853 } 3854 v6group = ilg->ilg_v6group; 3855 ilg_delete(connp, ilg, NULL); 3856 mutex_exit(&connp->conn_lock); 3857 3858 if (ipif != NULL) { 3859 (void) ip_delmulti(V4_PART_OF_V6(v6group), ipif, 3860 B_FALSE, B_TRUE); 3861 } else { 3862 (void) ip_delmulti_v6(&v6group, ill, 3863 connp->conn_zoneid, B_FALSE, B_TRUE); 3864 } 3865 ipsq_exit(ipsq); 3866 mutex_enter(&connp->conn_lock); 3867 } 3868 ILG_WALKER_RELE(connp); 3869 3870 /* If any ill was skipped above wait and retry */ 3871 if (connp->conn_ilg_inuse != 0) { 3872 cv_wait(&connp->conn_refcv, &connp->conn_lock); 3873 goto retry; 3874 } 3875 mutex_exit(&connp->conn_lock); 3876 } 3877 3878 /* 3879 * Called from ill close by ipcl_walk for clearing conn_ilg and 3880 * conn_multicast_ipif for a given ipif. conn is held by caller. 3881 * Note that ipcl_walk only walks conns that are not yet condemned. 3882 * condemned conns can't be refheld. For this reason, conn must become clean 3883 * first, i.e. it must not refer to any ill/ire/ipif and then only set 3884 * condemned flag. 3885 */ 3886 static void 3887 conn_delete_ipif(conn_t *connp, caddr_t arg) 3888 { 3889 ipif_t *ipif = (ipif_t *)arg; 3890 int i; 3891 char group_buf1[INET6_ADDRSTRLEN]; 3892 char group_buf2[INET6_ADDRSTRLEN]; 3893 ipaddr_t group; 3894 ilg_t *ilg; 3895 3896 /* 3897 * Even though conn_ilg_inuse can change while we are in this loop, 3898 * i.e.ilgs can be created or deleted on this connp, no new ilgs can 3899 * be created or deleted for this connp, on this ill, since this ill 3900 * is the perimeter. So we won't miss any ilg in this cleanup. 3901 */ 3902 mutex_enter(&connp->conn_lock); 3903 3904 /* 3905 * Increment the walker count, so that ilg repacking does not 3906 * occur while we are in the loop. 3907 */ 3908 ILG_WALKER_HOLD(connp); 3909 for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) { 3910 ilg = &connp->conn_ilg[i]; 3911 if (ilg->ilg_ipif != ipif || (ilg->ilg_flags & ILG_DELETED)) 3912 continue; 3913 /* 3914 * ip_close cannot be cleaning this ilg at the same time. 3915 * since it also has to execute in this ill's perimeter which 3916 * we are now holding. Only a clean conn can be condemned. 3917 */ 3918 ASSERT(!(connp->conn_state_flags & CONN_CONDEMNED)); 3919 3920 /* Blow away the membership */ 3921 ip1dbg(("conn_delete_ilg_ipif: %s on %s (%s)\n", 3922 inet_ntop(AF_INET6, &connp->conn_ilg[i].ilg_v6group, 3923 group_buf1, sizeof (group_buf1)), 3924 inet_ntop(AF_INET6, &ipif->ipif_v6lcl_addr, 3925 group_buf2, sizeof (group_buf2)), 3926 ipif->ipif_ill->ill_name)); 3927 3928 /* ilg_ipif is NULL for V6, so we won't be here */ 3929 ASSERT(IN6_IS_ADDR_V4MAPPED(&ilg->ilg_v6group)); 3930 3931 group = V4_PART_OF_V6(ilg->ilg_v6group); 3932 ilg_delete(connp, &connp->conn_ilg[i], NULL); 3933 mutex_exit(&connp->conn_lock); 3934 3935 (void) ip_delmulti(group, ipif, B_FALSE, B_TRUE); 3936 mutex_enter(&connp->conn_lock); 3937 } 3938 3939 /* 3940 * If we are the last walker, need to physically delete the 3941 * ilgs and repack. 3942 */ 3943 ILG_WALKER_RELE(connp); 3944 3945 if (connp->conn_multicast_ipif == ipif) { 3946 /* Revert to late binding */ 3947 connp->conn_multicast_ipif = NULL; 3948 } 3949 mutex_exit(&connp->conn_lock); 3950 3951 conn_delete_ire(connp, (caddr_t)ipif); 3952 } 3953 3954 /* 3955 * Called from ill close by ipcl_walk for clearing conn_ilg and 3956 * conn_multicast_ill for a given ill. conn is held by caller. 3957 * Note that ipcl_walk only walks conns that are not yet condemned. 3958 * condemned conns can't be refheld. For this reason, conn must become clean 3959 * first, i.e. it must not refer to any ill/ire/ipif and then only set 3960 * condemned flag. 3961 */ 3962 static void 3963 conn_delete_ill(conn_t *connp, caddr_t arg) 3964 { 3965 ill_t *ill = (ill_t *)arg; 3966 int i; 3967 char group_buf[INET6_ADDRSTRLEN]; 3968 in6_addr_t v6group; 3969 ilg_t *ilg; 3970 3971 /* 3972 * Even though conn_ilg_inuse can change while we are in this loop, 3973 * no new ilgs can be created/deleted for this connp, on this 3974 * ill, since this ill is the perimeter. So we won't miss any ilg 3975 * in this cleanup. 3976 */ 3977 mutex_enter(&connp->conn_lock); 3978 3979 /* 3980 * Increment the walker count, so that ilg repacking does not 3981 * occur while we are in the loop. 3982 */ 3983 ILG_WALKER_HOLD(connp); 3984 for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) { 3985 ilg = &connp->conn_ilg[i]; 3986 if ((ilg->ilg_ill == ill) && !(ilg->ilg_flags & ILG_DELETED)) { 3987 /* 3988 * ip_close cannot be cleaning this ilg at the same 3989 * time, since it also has to execute in this ill's 3990 * perimeter which we are now holding. Only a clean 3991 * conn can be condemned. 3992 */ 3993 ASSERT(!(connp->conn_state_flags & CONN_CONDEMNED)); 3994 3995 /* Blow away the membership */ 3996 ip1dbg(("conn_delete_ilg_ill: %s on %s\n", 3997 inet_ntop(AF_INET6, &ilg->ilg_v6group, 3998 group_buf, sizeof (group_buf)), 3999 ill->ill_name)); 4000 4001 v6group = ilg->ilg_v6group; 4002 ilg_delete(connp, ilg, NULL); 4003 mutex_exit(&connp->conn_lock); 4004 4005 (void) ip_delmulti_v6(&v6group, ill, 4006 connp->conn_zoneid, B_FALSE, B_TRUE); 4007 mutex_enter(&connp->conn_lock); 4008 } 4009 } 4010 /* 4011 * If we are the last walker, need to physically delete the 4012 * ilgs and repack. 4013 */ 4014 ILG_WALKER_RELE(connp); 4015 4016 if (connp->conn_multicast_ill == ill) { 4017 /* Revert to late binding */ 4018 connp->conn_multicast_ill = NULL; 4019 } 4020 mutex_exit(&connp->conn_lock); 4021 } 4022 4023 /* 4024 * Called when an ipif is unplumbed to make sure that there are no 4025 * dangling conn references to that ipif. 4026 * Handles ilg_ipif and conn_multicast_ipif 4027 */ 4028 void 4029 reset_conn_ipif(ipif) 4030 ipif_t *ipif; 4031 { 4032 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 4033 4034 ipcl_walk(conn_delete_ipif, (caddr_t)ipif, ipst); 4035 } 4036 4037 /* 4038 * Called when an ill is unplumbed to make sure that there are no 4039 * dangling conn references to that ill. 4040 * Handles ilg_ill, conn_multicast_ill. 4041 */ 4042 void 4043 reset_conn_ill(ill_t *ill) 4044 { 4045 ip_stack_t *ipst = ill->ill_ipst; 4046 4047 ipcl_walk(conn_delete_ill, (caddr_t)ill, ipst); 4048 } 4049 4050 #ifdef DEBUG 4051 /* 4052 * Walk functions walk all the interfaces in the system to make 4053 * sure that there is no refernece to the ipif or ill that is 4054 * going away. 4055 */ 4056 int 4057 ilm_walk_ill(ill_t *ill) 4058 { 4059 int cnt = 0; 4060 ill_t *till; 4061 ilm_t *ilm; 4062 ill_walk_context_t ctx; 4063 ip_stack_t *ipst = ill->ill_ipst; 4064 4065 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 4066 till = ILL_START_WALK_ALL(&ctx, ipst); 4067 for (; till != NULL; till = ill_next(&ctx, till)) { 4068 mutex_enter(&till->ill_lock); 4069 for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 4070 if (ilm->ilm_ill == ill) { 4071 cnt++; 4072 } 4073 } 4074 mutex_exit(&till->ill_lock); 4075 } 4076 rw_exit(&ipst->ips_ill_g_lock); 4077 4078 return (cnt); 4079 } 4080 4081 /* 4082 * This function is called before the ipif is freed. 4083 */ 4084 int 4085 ilm_walk_ipif(ipif_t *ipif) 4086 { 4087 int cnt = 0; 4088 ill_t *till; 4089 ilm_t *ilm; 4090 ill_walk_context_t ctx; 4091 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 4092 4093 till = ILL_START_WALK_ALL(&ctx, ipst); 4094 for (; till != NULL; till = ill_next(&ctx, till)) { 4095 mutex_enter(&till->ill_lock); 4096 for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 4097 if (ilm->ilm_ipif == ipif) { 4098 cnt++; 4099 } 4100 } 4101 mutex_exit(&till->ill_lock); 4102 } 4103 return (cnt); 4104 } 4105 #endif 4106