1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #include <sys/types.h> 28 #include <sys/stream.h> 29 #include <sys/dlpi.h> 30 #include <sys/stropts.h> 31 #include <sys/strsun.h> 32 #include <sys/ddi.h> 33 #include <sys/cmn_err.h> 34 #include <sys/sdt.h> 35 #include <sys/zone.h> 36 37 #include <sys/param.h> 38 #include <sys/socket.h> 39 #include <sys/sockio.h> 40 #include <net/if.h> 41 #include <sys/systm.h> 42 #include <sys/strsubr.h> 43 #include <net/route.h> 44 #include <netinet/in.h> 45 #include <net/if_dl.h> 46 #include <netinet/ip6.h> 47 #include <netinet/icmp6.h> 48 49 #include <inet/common.h> 50 #include <inet/mi.h> 51 #include <inet/nd.h> 52 #include <inet/arp.h> 53 #include <inet/ip.h> 54 #include <inet/ip6.h> 55 #include <inet/ip_if.h> 56 #include <inet/ip_ndp.h> 57 #include <inet/ip_multi.h> 58 #include <inet/ipclassifier.h> 59 #include <inet/ipsec_impl.h> 60 #include <inet/sctp_ip.h> 61 #include <inet/ip_listutils.h> 62 #include <inet/udp_impl.h> 63 64 /* igmpv3/mldv2 source filter manipulation */ 65 static void ilm_bld_flists(conn_t *conn, void *arg); 66 static void ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, 67 slist_t *flist); 68 69 static ilm_t *ilm_add_v6(ipif_t *ipif, const in6_addr_t *group, 70 ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist, 71 int orig_ifindex, zoneid_t zoneid); 72 static void ilm_delete(ilm_t *ilm); 73 static int ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *group); 74 static int ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *group); 75 static ilg_t *ilg_lookup_ill_index_v6(conn_t *connp, 76 const in6_addr_t *v6group, int index); 77 static ilg_t *ilg_lookup_ipif(conn_t *connp, ipaddr_t group, 78 ipif_t *ipif); 79 static int ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif, 80 mcast_record_t fmode, ipaddr_t src); 81 static int ilg_add_v6(conn_t *connp, const in6_addr_t *group, ill_t *ill, 82 mcast_record_t fmode, const in6_addr_t *v6src); 83 static void ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src); 84 static mblk_t *ill_create_dl(ill_t *ill, uint32_t dl_primitive, 85 uint32_t length, uint32_t *addr_lenp, uint32_t *addr_offp); 86 static mblk_t *ill_create_squery(ill_t *ill, ipaddr_t ipaddr, 87 uint32_t addrlen, uint32_t addroff, mblk_t *mp_tail); 88 static void conn_ilg_reap(conn_t *connp); 89 static int ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group, 90 ipif_t *ipif, mcast_record_t fmode, ipaddr_t src); 91 static int ip_opt_delete_group_excl_v6(conn_t *connp, 92 const in6_addr_t *v6group, ill_t *ill, mcast_record_t fmode, 93 const in6_addr_t *v6src); 94 95 /* 96 * MT notes: 97 * 98 * Multicast joins operate on both the ilg and ilm structures. Multiple 99 * threads operating on an conn (socket) trying to do multicast joins 100 * need to synchronize when operating on the ilg. Multiple threads 101 * potentially operating on different conn (socket endpoints) trying to 102 * do multicast joins could eventually end up trying to manipulate the 103 * ilm simulatenously and need to synchronize on the access to the ilm. 104 * Both are amenable to standard Solaris MT techniques, but it would be 105 * complex to handle a failover or failback which needs to manipulate 106 * ilg/ilms if an applications can also simultaenously join/leave 107 * multicast groups. Hence multicast join/leave also go through the ipsq_t 108 * serialization. 109 * 110 * Multicast joins and leaves are single-threaded per phyint/IPMP group 111 * using the ipsq serialization mechanism. 112 * 113 * An ilm is an IP data structure used to track multicast join/leave. 114 * An ilm is associated with a <multicast group, ipif> tuple in IPv4 and 115 * with just <multicast group> in IPv6. ilm_refcnt is the number of ilg's 116 * referencing the ilm. ilms are created / destroyed only as writer. ilms 117 * are not passed around, instead they are looked up and used under the 118 * ill_lock or as writer. So we don't need a dynamic refcount of the number 119 * of threads holding reference to an ilm. 120 * 121 * Multicast Join operation: 122 * 123 * The first step is to determine the ipif (v4) or ill (v6) on which 124 * the join operation is to be done. The join is done after becoming 125 * exclusive on the ipsq associated with the ipif or ill. The conn->conn_ilg 126 * and ill->ill_ilm are thus accessed and modified exclusively per ill. 127 * Multiple threads can attempt to join simultaneously on different ipif/ill 128 * on the same conn. In this case the ipsq serialization does not help in 129 * protecting the ilg. It is the conn_lock that is used to protect the ilg. 130 * The conn_lock also protects all the ilg_t members. 131 * 132 * Leave operation. 133 * 134 * Similar to the join operation, the first step is to determine the ipif 135 * or ill (v6) on which the leave operation is to be done. The leave operation 136 * is done after becoming exclusive on the ipsq associated with the ipif or ill. 137 * As with join ilg modification is done under the protection of the conn lock. 138 */ 139 140 #define IPSQ_ENTER_IPIF(ipif, connp, first_mp, func, ipsq, type) \ 141 ASSERT(connp != NULL); \ 142 (ipsq) = ipsq_try_enter((ipif), NULL, CONNP_TO_WQ(connp), \ 143 (first_mp), (func), (type), B_TRUE); \ 144 if ((ipsq) == NULL) { \ 145 ipif_refrele(ipif); \ 146 return (EINPROGRESS); \ 147 } 148 149 #define IPSQ_ENTER_ILL(ill, connp, first_mp, func, ipsq, type) \ 150 ASSERT(connp != NULL); \ 151 (ipsq) = ipsq_try_enter(NULL, ill, CONNP_TO_WQ(connp), \ 152 (first_mp), (func), (type), B_TRUE); \ 153 if ((ipsq) == NULL) { \ 154 ill_refrele(ill); \ 155 return (EINPROGRESS); \ 156 } 157 158 #define IPSQ_EXIT(ipsq) \ 159 if (ipsq != NULL) \ 160 ipsq_exit(ipsq); 161 162 #define ILG_WALKER_HOLD(connp) (connp)->conn_ilg_walker_cnt++ 163 164 #define ILG_WALKER_RELE(connp) \ 165 { \ 166 (connp)->conn_ilg_walker_cnt--; \ 167 if ((connp)->conn_ilg_walker_cnt == 0) \ 168 conn_ilg_reap(connp); \ 169 } 170 171 static void 172 conn_ilg_reap(conn_t *connp) 173 { 174 int to; 175 int from; 176 ilg_t *ilg; 177 178 ASSERT(MUTEX_HELD(&connp->conn_lock)); 179 180 to = 0; 181 from = 0; 182 while (from < connp->conn_ilg_inuse) { 183 if (connp->conn_ilg[from].ilg_flags & ILG_DELETED) { 184 ilg = &connp->conn_ilg[from]; 185 FREE_SLIST(ilg->ilg_filter); 186 ilg->ilg_flags &= ~ILG_DELETED; 187 from++; 188 continue; 189 } 190 if (to != from) 191 connp->conn_ilg[to] = connp->conn_ilg[from]; 192 to++; 193 from++; 194 } 195 196 connp->conn_ilg_inuse = to; 197 198 if (connp->conn_ilg_inuse == 0) { 199 mi_free((char *)connp->conn_ilg); 200 connp->conn_ilg = NULL; 201 cv_broadcast(&connp->conn_refcv); 202 } 203 } 204 205 #define GETSTRUCT(structure, number) \ 206 ((structure *)mi_zalloc(sizeof (structure) * (number))) 207 208 #define ILG_ALLOC_CHUNK 16 209 210 /* 211 * Returns a pointer to the next available ilg in conn_ilg. Allocs more 212 * buffers in size of ILG_ALLOC_CHUNK ilgs when needed, and updates conn's 213 * ilg tracking fields appropriately (conn_ilg_inuse reflects usage of the 214 * returned ilg). Returns NULL on failure (ENOMEM). 215 * 216 * Assumes connp->conn_lock is held. 217 */ 218 static ilg_t * 219 conn_ilg_alloc(conn_t *connp) 220 { 221 ilg_t *new, *ret; 222 int curcnt; 223 224 ASSERT(MUTEX_HELD(&connp->conn_lock)); 225 ASSERT(connp->conn_ilg_inuse <= connp->conn_ilg_allocated); 226 227 if (connp->conn_ilg == NULL) { 228 connp->conn_ilg = GETSTRUCT(ilg_t, ILG_ALLOC_CHUNK); 229 if (connp->conn_ilg == NULL) 230 return (NULL); 231 connp->conn_ilg_allocated = ILG_ALLOC_CHUNK; 232 connp->conn_ilg_inuse = 0; 233 } 234 if (connp->conn_ilg_inuse == connp->conn_ilg_allocated) { 235 if (connp->conn_ilg_walker_cnt != 0) { 236 /* 237 * XXX We cannot grow the array at this point 238 * because a list walker could be in progress, and 239 * we cannot wipe out the existing array until the 240 * walker is done. Just return NULL for now. 241 * ilg_delete_all() will have to be changed when 242 * this logic is changed. 243 */ 244 return (NULL); 245 } 246 curcnt = connp->conn_ilg_allocated; 247 new = GETSTRUCT(ilg_t, curcnt + ILG_ALLOC_CHUNK); 248 if (new == NULL) 249 return (NULL); 250 bcopy(connp->conn_ilg, new, sizeof (ilg_t) * curcnt); 251 mi_free((char *)connp->conn_ilg); 252 connp->conn_ilg = new; 253 connp->conn_ilg_allocated += ILG_ALLOC_CHUNK; 254 } 255 256 ret = &connp->conn_ilg[connp->conn_ilg_inuse++]; 257 ASSERT((ret->ilg_flags & ILG_DELETED) == 0); 258 bzero(ret, sizeof (*ret)); 259 return (ret); 260 } 261 262 typedef struct ilm_fbld_s { 263 ilm_t *fbld_ilm; 264 int fbld_in_cnt; 265 int fbld_ex_cnt; 266 slist_t fbld_in; 267 slist_t fbld_ex; 268 boolean_t fbld_in_overflow; 269 } ilm_fbld_t; 270 271 static void 272 ilm_bld_flists(conn_t *conn, void *arg) 273 { 274 int i; 275 ilm_fbld_t *fbld = (ilm_fbld_t *)(arg); 276 ilm_t *ilm = fbld->fbld_ilm; 277 in6_addr_t *v6group = &ilm->ilm_v6addr; 278 279 if (conn->conn_ilg_inuse == 0) 280 return; 281 282 /* 283 * Since we can't break out of the ipcl_walk once started, we still 284 * have to look at every conn. But if we've already found one 285 * (EXCLUDE, NULL) list, there's no need to keep checking individual 286 * ilgs--that will be our state. 287 */ 288 if (fbld->fbld_ex_cnt > 0 && fbld->fbld_ex.sl_numsrc == 0) 289 return; 290 291 /* 292 * Check this conn's ilgs to see if any are interested in our 293 * ilm (group, interface match). If so, update the master 294 * include and exclude lists we're building in the fbld struct 295 * with this ilg's filter info. 296 */ 297 mutex_enter(&conn->conn_lock); 298 for (i = 0; i < conn->conn_ilg_inuse; i++) { 299 ilg_t *ilg = &conn->conn_ilg[i]; 300 if ((ilg->ilg_ill == ilm->ilm_ill) && 301 (ilg->ilg_ipif == ilm->ilm_ipif) && 302 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) { 303 if (ilg->ilg_fmode == MODE_IS_INCLUDE) { 304 fbld->fbld_in_cnt++; 305 if (!fbld->fbld_in_overflow) 306 l_union_in_a(&fbld->fbld_in, 307 ilg->ilg_filter, 308 &fbld->fbld_in_overflow); 309 } else { 310 fbld->fbld_ex_cnt++; 311 /* 312 * On the first exclude list, don't try to do 313 * an intersection, as the master exclude list 314 * is intentionally empty. If the master list 315 * is still empty on later iterations, that 316 * means we have at least one ilg with an empty 317 * exclude list, so that should be reflected 318 * when we take the intersection. 319 */ 320 if (fbld->fbld_ex_cnt == 1) { 321 if (ilg->ilg_filter != NULL) 322 l_copy(ilg->ilg_filter, 323 &fbld->fbld_ex); 324 } else { 325 l_intersection_in_a(&fbld->fbld_ex, 326 ilg->ilg_filter); 327 } 328 } 329 /* there will only be one match, so break now. */ 330 break; 331 } 332 } 333 mutex_exit(&conn->conn_lock); 334 } 335 336 static void 337 ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, slist_t *flist) 338 { 339 ilm_fbld_t fbld; 340 ip_stack_t *ipst = ilm->ilm_ipst; 341 342 fbld.fbld_ilm = ilm; 343 fbld.fbld_in_cnt = fbld.fbld_ex_cnt = 0; 344 fbld.fbld_in.sl_numsrc = fbld.fbld_ex.sl_numsrc = 0; 345 fbld.fbld_in_overflow = B_FALSE; 346 347 /* first, construct our master include and exclude lists */ 348 ipcl_walk(ilm_bld_flists, (caddr_t)&fbld, ipst); 349 350 /* now use those master lists to generate the interface filter */ 351 352 /* if include list overflowed, filter is (EXCLUDE, NULL) */ 353 if (fbld.fbld_in_overflow) { 354 *fmode = MODE_IS_EXCLUDE; 355 flist->sl_numsrc = 0; 356 return; 357 } 358 359 /* if nobody interested, interface filter is (INCLUDE, NULL) */ 360 if (fbld.fbld_in_cnt == 0 && fbld.fbld_ex_cnt == 0) { 361 *fmode = MODE_IS_INCLUDE; 362 flist->sl_numsrc = 0; 363 return; 364 } 365 366 /* 367 * If there are no exclude lists, then the interface filter 368 * is INCLUDE, with its filter list equal to fbld_in. A single 369 * exclude list makes the interface filter EXCLUDE, with its 370 * filter list equal to (fbld_ex - fbld_in). 371 */ 372 if (fbld.fbld_ex_cnt == 0) { 373 *fmode = MODE_IS_INCLUDE; 374 l_copy(&fbld.fbld_in, flist); 375 } else { 376 *fmode = MODE_IS_EXCLUDE; 377 l_difference(&fbld.fbld_ex, &fbld.fbld_in, flist); 378 } 379 } 380 381 /* 382 * If the given interface has failed, choose a new one to join on so 383 * that we continue to receive packets. ilg_orig_ifindex remembers 384 * what the application used to join on so that we know the ilg to 385 * delete even though we change the ill here. Callers will store the 386 * ilg returned from this function in ilg_ill. Thus when we receive 387 * a packet on ilg_ill, conn_wantpacket_v6 will deliver the packets. 388 * 389 * This function must be called as writer so we can walk the group 390 * list and examine flags without holding a lock. 391 */ 392 ill_t * 393 ip_choose_multi_ill(ill_t *ill, const in6_addr_t *grp) 394 { 395 ill_t *till; 396 ill_group_t *illgrp = ill->ill_group; 397 398 ASSERT(IAM_WRITER_ILL(ill)); 399 400 if (IN6_IS_ADDR_UNSPECIFIED(grp) || illgrp == NULL) 401 return (ill); 402 403 if ((ill->ill_phyint->phyint_flags & (PHYI_FAILED|PHYI_INACTIVE)) == 0) 404 return (ill); 405 406 till = illgrp->illgrp_ill; 407 while (till != NULL && 408 (till->ill_phyint->phyint_flags & (PHYI_FAILED|PHYI_INACTIVE))) { 409 till = till->ill_group_next; 410 } 411 if (till != NULL) 412 return (till); 413 414 return (ill); 415 } 416 417 static int 418 ilm_update_add(ilm_t *ilm, ilg_stat_t ilgstat, slist_t *ilg_flist, 419 boolean_t isv6) 420 { 421 mcast_record_t fmode; 422 slist_t *flist; 423 boolean_t fdefault; 424 char buf[INET6_ADDRSTRLEN]; 425 ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill; 426 427 /* 428 * There are several cases where the ilm's filter state 429 * defaults to (EXCLUDE, NULL): 430 * - we've had previous joins without associated ilgs 431 * - this join has no associated ilg 432 * - the ilg's filter state is (EXCLUDE, NULL) 433 */ 434 fdefault = (ilm->ilm_no_ilg_cnt > 0) || 435 (ilgstat == ILGSTAT_NONE) || SLIST_IS_EMPTY(ilg_flist); 436 437 /* attempt mallocs (if needed) before doing anything else */ 438 if ((flist = l_alloc()) == NULL) 439 return (ENOMEM); 440 if (!fdefault && ilm->ilm_filter == NULL) { 441 ilm->ilm_filter = l_alloc(); 442 if (ilm->ilm_filter == NULL) { 443 l_free(flist); 444 return (ENOMEM); 445 } 446 } 447 448 if (ilgstat != ILGSTAT_CHANGE) 449 ilm->ilm_refcnt++; 450 451 if (ilgstat == ILGSTAT_NONE) 452 ilm->ilm_no_ilg_cnt++; 453 454 /* 455 * Determine new filter state. If it's not the default 456 * (EXCLUDE, NULL), we must walk the conn list to find 457 * any ilgs interested in this group, and re-build the 458 * ilm filter. 459 */ 460 if (fdefault) { 461 fmode = MODE_IS_EXCLUDE; 462 flist->sl_numsrc = 0; 463 } else { 464 ilm_gen_filter(ilm, &fmode, flist); 465 } 466 467 /* make sure state actually changed; nothing to do if not. */ 468 if ((ilm->ilm_fmode == fmode) && 469 !lists_are_different(ilm->ilm_filter, flist)) { 470 l_free(flist); 471 return (0); 472 } 473 474 /* send the state change report */ 475 if (!IS_LOOPBACK(ill)) { 476 if (isv6) 477 mld_statechange(ilm, fmode, flist); 478 else 479 igmp_statechange(ilm, fmode, flist); 480 } 481 482 /* update the ilm state */ 483 ilm->ilm_fmode = fmode; 484 if (flist->sl_numsrc > 0) 485 l_copy(flist, ilm->ilm_filter); 486 else 487 CLEAR_SLIST(ilm->ilm_filter); 488 489 ip1dbg(("ilm_update: new if filter mode %d, group %s\n", ilm->ilm_fmode, 490 inet_ntop(AF_INET6, &ilm->ilm_v6addr, buf, sizeof (buf)))); 491 492 l_free(flist); 493 return (0); 494 } 495 496 static int 497 ilm_update_del(ilm_t *ilm, boolean_t isv6) 498 { 499 mcast_record_t fmode; 500 slist_t *flist; 501 ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill; 502 503 ip1dbg(("ilm_update_del: still %d left; updating state\n", 504 ilm->ilm_refcnt)); 505 506 if ((flist = l_alloc()) == NULL) 507 return (ENOMEM); 508 509 /* 510 * If present, the ilg in question has already either been 511 * updated or removed from our list; so all we need to do 512 * now is walk the list to update the ilm filter state. 513 * 514 * Skip the list walk if we have any no-ilg joins, which 515 * cause the filter state to revert to (EXCLUDE, NULL). 516 */ 517 if (ilm->ilm_no_ilg_cnt != 0) { 518 fmode = MODE_IS_EXCLUDE; 519 flist->sl_numsrc = 0; 520 } else { 521 ilm_gen_filter(ilm, &fmode, flist); 522 } 523 524 /* check to see if state needs to be updated */ 525 if ((ilm->ilm_fmode == fmode) && 526 (!lists_are_different(ilm->ilm_filter, flist))) { 527 l_free(flist); 528 return (0); 529 } 530 531 if (!IS_LOOPBACK(ill)) { 532 if (isv6) 533 mld_statechange(ilm, fmode, flist); 534 else 535 igmp_statechange(ilm, fmode, flist); 536 } 537 538 ilm->ilm_fmode = fmode; 539 if (flist->sl_numsrc > 0) { 540 if (ilm->ilm_filter == NULL) { 541 ilm->ilm_filter = l_alloc(); 542 if (ilm->ilm_filter == NULL) { 543 char buf[INET6_ADDRSTRLEN]; 544 ip1dbg(("ilm_update_del: failed to alloc ilm " 545 "filter; no source filtering for %s on %s", 546 inet_ntop(AF_INET6, &ilm->ilm_v6addr, 547 buf, sizeof (buf)), ill->ill_name)); 548 ilm->ilm_fmode = MODE_IS_EXCLUDE; 549 l_free(flist); 550 return (0); 551 } 552 } 553 l_copy(flist, ilm->ilm_filter); 554 } else { 555 CLEAR_SLIST(ilm->ilm_filter); 556 } 557 558 l_free(flist); 559 return (0); 560 } 561 562 /* 563 * INADDR_ANY means all multicast addresses. This is only used 564 * by the multicast router. 565 * INADDR_ANY is stored as IPv6 unspecified addr. 566 */ 567 int 568 ip_addmulti(ipaddr_t group, ipif_t *ipif, ilg_stat_t ilgstat, 569 mcast_record_t ilg_fmode, slist_t *ilg_flist) 570 { 571 ill_t *ill = ipif->ipif_ill; 572 ilm_t *ilm; 573 in6_addr_t v6group; 574 int ret; 575 576 ASSERT(IAM_WRITER_IPIF(ipif)); 577 578 if (!CLASSD(group) && group != INADDR_ANY) 579 return (EINVAL); 580 581 /* 582 * INADDR_ANY is represented as the IPv6 unspecifed addr. 583 */ 584 if (group == INADDR_ANY) 585 v6group = ipv6_all_zeros; 586 else 587 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 588 589 mutex_enter(&ill->ill_lock); 590 ilm = ilm_lookup_ipif(ipif, group); 591 mutex_exit(&ill->ill_lock); 592 /* 593 * Since we are writer, we know the ilm_flags itself cannot 594 * change at this point, and ilm_lookup_ipif would not have 595 * returned a DELETED ilm. However, the data path can free 596 * ilm->next via ilm_walker_cleanup() so we can safely 597 * access anything in ilm except ilm_next (for safe access to 598 * ilm_next we'd have to take the ill_lock). 599 */ 600 if (ilm != NULL) 601 return (ilm_update_add(ilm, ilgstat, ilg_flist, B_FALSE)); 602 603 /* 604 * ilms are associated with ipifs in IPv4. It moves with the 605 * ipif if the ipif moves to a new ill when the interface 606 * fails. Thus we really don't check whether the ipif_ill 607 * has failed like in IPv6. If it has FAILED the ipif 608 * will move (daemon will move it) and hence the ilm, if the 609 * ipif is not IPIF_NOFAILOVER. For the IPIF_NOFAILOVER ipifs, 610 * we continue to receive in the same place even if the 611 * interface fails. 612 */ 613 ilm = ilm_add_v6(ipif, &v6group, ilgstat, ilg_fmode, ilg_flist, 614 ill->ill_phyint->phyint_ifindex, ipif->ipif_zoneid); 615 if (ilm == NULL) 616 return (ENOMEM); 617 618 if (group == INADDR_ANY) { 619 /* 620 * Check how many ipif's have members in this group - 621 * if more then one we should not tell the driver to join 622 * this time 623 */ 624 if (ilm_numentries_v6(ill, &v6group) > 1) 625 return (0); 626 if (ill->ill_group == NULL) 627 ret = ill_join_allmulti(ill); 628 else 629 ret = ill_nominate_mcast_rcv(ill->ill_group); 630 if (ret != 0) 631 ilm_delete(ilm); 632 return (ret); 633 } 634 635 if (!IS_LOOPBACK(ill)) 636 igmp_joingroup(ilm); 637 638 if (ilm_numentries_v6(ill, &v6group) > 1) 639 return (0); 640 641 ret = ip_ll_addmulti_v6(ipif, &v6group); 642 if (ret != 0) 643 ilm_delete(ilm); 644 return (ret); 645 } 646 647 /* 648 * The unspecified address means all multicast addresses. 649 * This is only used by the multicast router. 650 * 651 * ill identifies the interface to join on; it may not match the 652 * interface requested by the application of a failover has taken 653 * place. orig_ifindex always identifies the interface requested 654 * by the app. 655 * 656 * ilgstat tells us if there's an ilg associated with this join, 657 * and if so, if it's a new ilg or a change to an existing one. 658 * ilg_fmode and ilg_flist give us the current filter state of 659 * the ilg (and will be EXCLUDE {NULL} in the case of no ilg). 660 */ 661 int 662 ip_addmulti_v6(const in6_addr_t *v6group, ill_t *ill, int orig_ifindex, 663 zoneid_t zoneid, ilg_stat_t ilgstat, mcast_record_t ilg_fmode, 664 slist_t *ilg_flist) 665 { 666 ilm_t *ilm; 667 int ret; 668 669 ASSERT(IAM_WRITER_ILL(ill)); 670 671 if (!IN6_IS_ADDR_MULTICAST(v6group) && 672 !IN6_IS_ADDR_UNSPECIFIED(v6group)) { 673 return (EINVAL); 674 } 675 676 /* 677 * An ilm is uniquely identified by the tuple of (group, ill, 678 * orig_ill). group is the multicast group address, ill is 679 * the interface on which it is currently joined, and orig_ill 680 * is the interface on which the application requested the 681 * join. orig_ill and ill are the same unless orig_ill has 682 * failed over. 683 * 684 * Both orig_ill and ill are required, which means we may have 685 * 2 ilms on an ill for the same group, but with different 686 * orig_ills. These must be kept separate, so that when failback 687 * occurs, the appropriate ilms are moved back to their orig_ill 688 * without disrupting memberships on the ill to which they had 689 * been moved. 690 * 691 * In order to track orig_ill, we store orig_ifindex in the 692 * ilm and ilg. 693 */ 694 mutex_enter(&ill->ill_lock); 695 ilm = ilm_lookup_ill_index_v6(ill, v6group, orig_ifindex, zoneid); 696 mutex_exit(&ill->ill_lock); 697 if (ilm != NULL) 698 return (ilm_update_add(ilm, ilgstat, ilg_flist, B_TRUE)); 699 700 /* 701 * We need to remember where the application really wanted 702 * to join. This will be used later if we want to failback 703 * to the original interface. 704 */ 705 ilm = ilm_add_v6(ill->ill_ipif, v6group, ilgstat, ilg_fmode, 706 ilg_flist, orig_ifindex, zoneid); 707 if (ilm == NULL) 708 return (ENOMEM); 709 710 if (IN6_IS_ADDR_UNSPECIFIED(v6group)) { 711 /* 712 * Check how many ipif's that have members in this group - 713 * if more then one we should not tell the driver to join 714 * this time 715 */ 716 if (ilm_numentries_v6(ill, v6group) > 1) 717 return (0); 718 if (ill->ill_group == NULL) 719 ret = ill_join_allmulti(ill); 720 else 721 ret = ill_nominate_mcast_rcv(ill->ill_group); 722 723 if (ret != 0) 724 ilm_delete(ilm); 725 return (ret); 726 } 727 728 if (!IS_LOOPBACK(ill)) 729 mld_joingroup(ilm); 730 731 /* 732 * If we have more then one we should not tell the driver 733 * to join this time. 734 */ 735 if (ilm_numentries_v6(ill, v6group) > 1) 736 return (0); 737 738 ret = ip_ll_addmulti_v6(ill->ill_ipif, v6group); 739 if (ret != 0) 740 ilm_delete(ilm); 741 return (ret); 742 } 743 744 /* 745 * Send a multicast request to the driver for enabling multicast reception 746 * for v6groupp address. The caller has already checked whether it is 747 * appropriate to send one or not. 748 */ 749 int 750 ip_ll_send_enabmulti_req(ill_t *ill, const in6_addr_t *v6groupp) 751 { 752 mblk_t *mp; 753 uint32_t addrlen, addroff; 754 char group_buf[INET6_ADDRSTRLEN]; 755 756 ASSERT(IAM_WRITER_ILL(ill)); 757 758 /* 759 * Create a AR_ENTRY_SQUERY message with a dl_enabmulti_req tacked 760 * on. 761 */ 762 mp = ill_create_dl(ill, DL_ENABMULTI_REQ, sizeof (dl_enabmulti_req_t), 763 &addrlen, &addroff); 764 if (!mp) 765 return (ENOMEM); 766 if (IN6_IS_ADDR_V4MAPPED(v6groupp)) { 767 ipaddr_t v4group; 768 769 IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group); 770 /* 771 * NOTE!!! 772 * The "addroff" passed in here was calculated by 773 * ill_create_dl(), and will be used by ill_create_squery() 774 * to perform some twisted coding magic. It is the offset 775 * into the dl_xxx_req of the hw addr. Here, it will be 776 * added to b_wptr - b_rptr to create a magic number that 777 * is not an offset into this squery mblk. 778 * The actual hardware address will be accessed only in the 779 * dl_xxx_req, not in the squery. More importantly, 780 * that hardware address can *only* be accessed in this 781 * mblk chain by calling mi_offset_param_c(), which uses 782 * the magic number in the squery hw offset field to go 783 * to the *next* mblk (the dl_xxx_req), subtract the 784 * (b_wptr - b_rptr), and find the actual offset into 785 * the dl_xxx_req. 786 * Any method that depends on using the 787 * offset field in the dl_disabmulti_req or squery 788 * to find either hardware address will similarly fail. 789 * 790 * Look in ar_entry_squery() in arp.c to see how this offset 791 * is used. 792 */ 793 mp = ill_create_squery(ill, v4group, addrlen, addroff, mp); 794 if (!mp) 795 return (ENOMEM); 796 ip1dbg(("ip_ll_send_enabmulti_req: IPv4 putnext %s on %s\n", 797 inet_ntop(AF_INET6, v6groupp, group_buf, 798 sizeof (group_buf)), 799 ill->ill_name)); 800 putnext(ill->ill_rq, mp); 801 } else { 802 ip1dbg(("ip_ll_send_enabmulti_req: IPv6 ndp_mcastreq %s on" 803 " %s\n", 804 inet_ntop(AF_INET6, v6groupp, group_buf, 805 sizeof (group_buf)), 806 ill->ill_name)); 807 return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp)); 808 } 809 return (0); 810 } 811 812 /* 813 * Send a multicast request to the driver for enabling multicast 814 * membership for v6group if appropriate. 815 */ 816 static int 817 ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *v6groupp) 818 { 819 ill_t *ill = ipif->ipif_ill; 820 821 ASSERT(IAM_WRITER_IPIF(ipif)); 822 823 if (ill->ill_net_type != IRE_IF_RESOLVER || 824 ipif->ipif_flags & IPIF_POINTOPOINT) { 825 ip1dbg(("ip_ll_addmulti_v6: not resolver\n")); 826 return (0); /* Must be IRE_IF_NORESOLVER */ 827 } 828 829 if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) { 830 ip1dbg(("ip_ll_addmulti_v6: MULTI_BCAST\n")); 831 return (0); 832 } 833 if (!ill->ill_dl_up) { 834 /* 835 * Nobody there. All multicast addresses will be re-joined 836 * when we get the DL_BIND_ACK bringing the interface up. 837 */ 838 ip1dbg(("ip_ll_addmulti_v6: nobody up\n")); 839 return (0); 840 } 841 return (ip_ll_send_enabmulti_req(ill, v6groupp)); 842 } 843 844 /* 845 * INADDR_ANY means all multicast addresses. This is only used 846 * by the multicast router. 847 * INADDR_ANY is stored as the IPv6 unspecifed addr. 848 */ 849 int 850 ip_delmulti(ipaddr_t group, ipif_t *ipif, boolean_t no_ilg, boolean_t leaving) 851 { 852 ill_t *ill = ipif->ipif_ill; 853 ilm_t *ilm; 854 in6_addr_t v6group; 855 856 ASSERT(IAM_WRITER_IPIF(ipif)); 857 858 if (!CLASSD(group) && group != INADDR_ANY) 859 return (EINVAL); 860 861 /* 862 * INADDR_ANY is represented as the IPv6 unspecifed addr. 863 */ 864 if (group == INADDR_ANY) 865 v6group = ipv6_all_zeros; 866 else 867 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 868 869 /* 870 * Look for a match on the ipif. 871 * (IP_DROP_MEMBERSHIP specifies an ipif using an IP address). 872 */ 873 mutex_enter(&ill->ill_lock); 874 ilm = ilm_lookup_ipif(ipif, group); 875 mutex_exit(&ill->ill_lock); 876 if (ilm == NULL) 877 return (ENOENT); 878 879 /* Update counters */ 880 if (no_ilg) 881 ilm->ilm_no_ilg_cnt--; 882 883 if (leaving) 884 ilm->ilm_refcnt--; 885 886 if (ilm->ilm_refcnt > 0) 887 return (ilm_update_del(ilm, B_FALSE)); 888 889 if (group == INADDR_ANY) { 890 ilm_delete(ilm); 891 /* 892 * Check how many ipif's that have members in this group - 893 * if there are still some left then don't tell the driver 894 * to drop it. 895 */ 896 if (ilm_numentries_v6(ill, &v6group) != 0) 897 return (0); 898 899 /* If we never joined, then don't leave. */ 900 if (ill->ill_join_allmulti) { 901 ill_leave_allmulti(ill); 902 if (ill->ill_group != NULL) 903 (void) ill_nominate_mcast_rcv(ill->ill_group); 904 } 905 return (0); 906 } 907 908 if (!IS_LOOPBACK(ill)) 909 igmp_leavegroup(ilm); 910 911 ilm_delete(ilm); 912 /* 913 * Check how many ipif's that have members in this group - 914 * if there are still some left then don't tell the driver 915 * to drop it. 916 */ 917 if (ilm_numentries_v6(ill, &v6group) != 0) 918 return (0); 919 return (ip_ll_delmulti_v6(ipif, &v6group)); 920 } 921 922 /* 923 * The unspecified address means all multicast addresses. 924 * This is only used by the multicast router. 925 */ 926 int 927 ip_delmulti_v6(const in6_addr_t *v6group, ill_t *ill, int orig_ifindex, 928 zoneid_t zoneid, boolean_t no_ilg, boolean_t leaving) 929 { 930 ipif_t *ipif; 931 ilm_t *ilm; 932 933 ASSERT(IAM_WRITER_ILL(ill)); 934 935 if (!IN6_IS_ADDR_MULTICAST(v6group) && 936 !IN6_IS_ADDR_UNSPECIFIED(v6group)) 937 return (EINVAL); 938 939 /* 940 * Look for a match on the ill. 941 * (IPV6_LEAVE_GROUP specifies an ill using an ifindex). 942 * 943 * Similar to ip_addmulti_v6, we should always look using 944 * the orig_ifindex. 945 * 946 * 1) If orig_ifindex is different from ill's ifindex 947 * we should have an ilm with orig_ifindex created in 948 * ip_addmulti_v6. We should delete that here. 949 * 950 * 2) If orig_ifindex is same as ill's ifindex, we should 951 * not delete the ilm that is temporarily here because of 952 * a FAILOVER. Those ilms will have a ilm_orig_ifindex 953 * different from ill's ifindex. 954 * 955 * Thus, always lookup using orig_ifindex. 956 */ 957 mutex_enter(&ill->ill_lock); 958 ilm = ilm_lookup_ill_index_v6(ill, v6group, orig_ifindex, zoneid); 959 mutex_exit(&ill->ill_lock); 960 if (ilm == NULL) 961 return (ENOENT); 962 963 ASSERT(ilm->ilm_ill == ill); 964 965 ipif = ill->ill_ipif; 966 967 /* Update counters */ 968 if (no_ilg) 969 ilm->ilm_no_ilg_cnt--; 970 971 if (leaving) 972 ilm->ilm_refcnt--; 973 974 if (ilm->ilm_refcnt > 0) 975 return (ilm_update_del(ilm, B_TRUE)); 976 977 if (IN6_IS_ADDR_UNSPECIFIED(v6group)) { 978 ilm_delete(ilm); 979 /* 980 * Check how many ipif's that have members in this group - 981 * if there are still some left then don't tell the driver 982 * to drop it. 983 */ 984 if (ilm_numentries_v6(ill, v6group) != 0) 985 return (0); 986 987 /* If we never joined, then don't leave. */ 988 if (ill->ill_join_allmulti) { 989 ill_leave_allmulti(ill); 990 if (ill->ill_group != NULL) 991 (void) ill_nominate_mcast_rcv(ill->ill_group); 992 } 993 return (0); 994 } 995 996 if (!IS_LOOPBACK(ill)) 997 mld_leavegroup(ilm); 998 999 ilm_delete(ilm); 1000 /* 1001 * Check how many ipif's that have members in this group - 1002 * if there are still some left then don't tell the driver 1003 * to drop it. 1004 */ 1005 if (ilm_numentries_v6(ill, v6group) != 0) 1006 return (0); 1007 return (ip_ll_delmulti_v6(ipif, v6group)); 1008 } 1009 1010 /* 1011 * Send a multicast request to the driver for disabling multicast reception 1012 * for v6groupp address. The caller has already checked whether it is 1013 * appropriate to send one or not. 1014 */ 1015 int 1016 ip_ll_send_disabmulti_req(ill_t *ill, const in6_addr_t *v6groupp) 1017 { 1018 mblk_t *mp; 1019 char group_buf[INET6_ADDRSTRLEN]; 1020 uint32_t addrlen, addroff; 1021 1022 ASSERT(IAM_WRITER_ILL(ill)); 1023 /* 1024 * Create a AR_ENTRY_SQUERY message with a dl_disabmulti_req tacked 1025 * on. 1026 */ 1027 mp = ill_create_dl(ill, DL_DISABMULTI_REQ, 1028 sizeof (dl_disabmulti_req_t), &addrlen, &addroff); 1029 1030 if (!mp) 1031 return (ENOMEM); 1032 1033 if (IN6_IS_ADDR_V4MAPPED(v6groupp)) { 1034 ipaddr_t v4group; 1035 1036 IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group); 1037 /* 1038 * NOTE!!! 1039 * The "addroff" passed in here was calculated by 1040 * ill_create_dl(), and will be used by ill_create_squery() 1041 * to perform some twisted coding magic. It is the offset 1042 * into the dl_xxx_req of the hw addr. Here, it will be 1043 * added to b_wptr - b_rptr to create a magic number that 1044 * is not an offset into this mblk. 1045 * 1046 * Please see the comment in ip_ll_send)enabmulti_req() 1047 * for a complete explanation. 1048 * 1049 * Look in ar_entry_squery() in arp.c to see how this offset 1050 * is used. 1051 */ 1052 mp = ill_create_squery(ill, v4group, addrlen, addroff, mp); 1053 if (!mp) 1054 return (ENOMEM); 1055 ip1dbg(("ip_ll_send_disabmulti_req: IPv4 putnext %s on %s\n", 1056 inet_ntop(AF_INET6, v6groupp, group_buf, 1057 sizeof (group_buf)), 1058 ill->ill_name)); 1059 putnext(ill->ill_rq, mp); 1060 } else { 1061 ip1dbg(("ip_ll_send_disabmulti_req: IPv6 ndp_mcastreq %s on" 1062 " %s\n", 1063 inet_ntop(AF_INET6, v6groupp, group_buf, 1064 sizeof (group_buf)), 1065 ill->ill_name)); 1066 return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp)); 1067 } 1068 return (0); 1069 } 1070 1071 /* 1072 * Send a multicast request to the driver for disabling multicast 1073 * membership for v6group if appropriate. 1074 */ 1075 static int 1076 ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *v6group) 1077 { 1078 ill_t *ill = ipif->ipif_ill; 1079 1080 ASSERT(IAM_WRITER_IPIF(ipif)); 1081 1082 if (ill->ill_net_type != IRE_IF_RESOLVER || 1083 ipif->ipif_flags & IPIF_POINTOPOINT) { 1084 return (0); /* Must be IRE_IF_NORESOLVER */ 1085 } 1086 if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) { 1087 ip1dbg(("ip_ll_delmulti_v6: MULTI_BCAST\n")); 1088 return (0); 1089 } 1090 if (!ill->ill_dl_up) { 1091 /* 1092 * Nobody there. All multicast addresses will be re-joined 1093 * when we get the DL_BIND_ACK bringing the interface up. 1094 */ 1095 ip1dbg(("ip_ll_delmulti_v6: nobody up\n")); 1096 return (0); 1097 } 1098 return (ip_ll_send_disabmulti_req(ill, v6group)); 1099 } 1100 1101 /* 1102 * Make the driver pass up all multicast packets 1103 * 1104 * With ill groups, the caller makes sure that there is only 1105 * one ill joining the allmulti group. 1106 */ 1107 int 1108 ill_join_allmulti(ill_t *ill) 1109 { 1110 mblk_t *promiscon_mp, *promiscoff_mp; 1111 uint32_t addrlen, addroff; 1112 1113 ASSERT(IAM_WRITER_ILL(ill)); 1114 1115 if (!ill->ill_dl_up) { 1116 /* 1117 * Nobody there. All multicast addresses will be re-joined 1118 * when we get the DL_BIND_ACK bringing the interface up. 1119 */ 1120 return (0); 1121 } 1122 1123 ASSERT(!ill->ill_join_allmulti); 1124 1125 /* 1126 * Create a DL_PROMISCON_REQ message and send it directly to the DLPI 1127 * provider. We don't need to do this for certain media types for 1128 * which we never need to turn promiscuous mode on. While we're here, 1129 * pre-allocate a DL_PROMISCOFF_REQ message to make sure that 1130 * ill_leave_allmulti() will not fail due to low memory conditions. 1131 */ 1132 if ((ill->ill_net_type == IRE_IF_RESOLVER) && 1133 !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) { 1134 promiscon_mp = ill_create_dl(ill, DL_PROMISCON_REQ, 1135 sizeof (dl_promiscon_req_t), &addrlen, &addroff); 1136 promiscoff_mp = ill_create_dl(ill, DL_PROMISCOFF_REQ, 1137 sizeof (dl_promiscoff_req_t), &addrlen, &addroff); 1138 if (promiscon_mp == NULL || promiscoff_mp == NULL) { 1139 freemsg(promiscon_mp); 1140 freemsg(promiscoff_mp); 1141 return (ENOMEM); 1142 } 1143 ill->ill_promiscoff_mp = promiscoff_mp; 1144 ill_dlpi_send(ill, promiscon_mp); 1145 } 1146 1147 ill->ill_join_allmulti = B_TRUE; 1148 return (0); 1149 } 1150 1151 /* 1152 * Make the driver stop passing up all multicast packets 1153 * 1154 * With ill groups, we need to nominate some other ill as 1155 * this ipif->ipif_ill is leaving the group. 1156 */ 1157 void 1158 ill_leave_allmulti(ill_t *ill) 1159 { 1160 mblk_t *promiscoff_mp = ill->ill_promiscoff_mp; 1161 1162 ASSERT(IAM_WRITER_ILL(ill)); 1163 1164 if (!ill->ill_dl_up) { 1165 /* 1166 * Nobody there. All multicast addresses will be re-joined 1167 * when we get the DL_BIND_ACK bringing the interface up. 1168 */ 1169 return; 1170 } 1171 1172 ASSERT(ill->ill_join_allmulti); 1173 1174 /* 1175 * Create a DL_PROMISCOFF_REQ message and send it directly to 1176 * the DLPI provider. We don't need to do this for certain 1177 * media types for which we never need to turn promiscuous 1178 * mode on. 1179 */ 1180 if ((ill->ill_net_type == IRE_IF_RESOLVER) && 1181 !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) { 1182 ASSERT(promiscoff_mp != NULL); 1183 ill->ill_promiscoff_mp = NULL; 1184 ill_dlpi_send(ill, promiscoff_mp); 1185 } 1186 1187 ill->ill_join_allmulti = B_FALSE; 1188 } 1189 1190 static ill_t * 1191 ipsq_enter_byifindex(uint_t ifindex, boolean_t isv6, ip_stack_t *ipst) 1192 { 1193 ill_t *ill; 1194 boolean_t in_ipsq; 1195 1196 ill = ill_lookup_on_ifindex(ifindex, isv6, NULL, NULL, NULL, NULL, 1197 ipst); 1198 if (ill != NULL) { 1199 if (!ill_waiter_inc(ill)) { 1200 ill_refrele(ill); 1201 return (NULL); 1202 } 1203 ill_refrele(ill); 1204 in_ipsq = ipsq_enter(ill, B_FALSE); 1205 ill_waiter_dcr(ill); 1206 if (!in_ipsq) 1207 ill = NULL; 1208 } 1209 return (ill); 1210 } 1211 1212 int 1213 ip_join_allmulti(uint_t ifindex, boolean_t isv6, ip_stack_t *ipst) 1214 { 1215 ill_t *ill; 1216 int ret; 1217 1218 if ((ill = ipsq_enter_byifindex(ifindex, isv6, ipst)) == NULL) 1219 return (ENODEV); 1220 if (isv6) { 1221 ret = ip_addmulti_v6(&ipv6_all_zeros, ill, ifindex, 1222 ill->ill_zoneid, ILGSTAT_NONE, MODE_IS_EXCLUDE, NULL); 1223 } else { 1224 ret = ip_addmulti(INADDR_ANY, ill->ill_ipif, ILGSTAT_NONE, 1225 MODE_IS_EXCLUDE, NULL); 1226 } 1227 ill->ill_ipallmulti_cnt++; 1228 ipsq_exit(ill->ill_phyint->phyint_ipsq); 1229 return (ret); 1230 } 1231 1232 int 1233 ip_leave_allmulti(uint_t ifindex, boolean_t isv6, ip_stack_t *ipst) 1234 { 1235 ill_t *ill; 1236 1237 if ((ill = ipsq_enter_byifindex(ifindex, isv6, ipst)) == NULL) 1238 return (ENODEV); 1239 ASSERT(ill->ill_ipallmulti_cnt != 0); 1240 if (isv6) { 1241 (void) ip_delmulti_v6(&ipv6_all_zeros, ill, ifindex, 1242 ill->ill_zoneid, B_TRUE, B_TRUE); 1243 } else { 1244 (void) ip_delmulti(INADDR_ANY, ill->ill_ipif, B_TRUE, B_TRUE); 1245 } 1246 ill->ill_ipallmulti_cnt--; 1247 ipsq_exit(ill->ill_phyint->phyint_ipsq); 1248 return (0); 1249 } 1250 1251 /* 1252 * Delete the allmulti memberships that were added as part of 1253 * ip_join_allmulti(). 1254 */ 1255 void 1256 ip_purge_allmulti(ill_t *ill) 1257 { 1258 ASSERT(IAM_WRITER_ILL(ill)); 1259 1260 for (; ill->ill_ipallmulti_cnt > 0; ill->ill_ipallmulti_cnt--) { 1261 if (ill->ill_isv6) { 1262 (void) ip_delmulti_v6(&ipv6_all_zeros, ill, 1263 ill->ill_phyint->phyint_ifindex, ill->ill_zoneid, 1264 B_TRUE, B_TRUE); 1265 } else { 1266 (void) ip_delmulti(INADDR_ANY, ill->ill_ipif, B_TRUE, 1267 B_TRUE); 1268 } 1269 } 1270 } 1271 1272 /* 1273 * Copy mp_orig and pass it in as a local message. 1274 */ 1275 void 1276 ip_multicast_loopback(queue_t *q, ill_t *ill, mblk_t *mp_orig, int fanout_flags, 1277 zoneid_t zoneid) 1278 { 1279 mblk_t *mp; 1280 mblk_t *ipsec_mp; 1281 ipha_t *iph; 1282 ip_stack_t *ipst = ill->ill_ipst; 1283 1284 if (DB_TYPE(mp_orig) == M_DATA && 1285 ((ipha_t *)mp_orig->b_rptr)->ipha_protocol == IPPROTO_UDP) { 1286 uint_t hdrsz; 1287 1288 hdrsz = IPH_HDR_LENGTH((ipha_t *)mp_orig->b_rptr) + 1289 sizeof (udpha_t); 1290 ASSERT(MBLKL(mp_orig) >= hdrsz); 1291 1292 if (((mp = allocb(hdrsz, BPRI_MED)) != NULL) && 1293 (mp_orig = dupmsg(mp_orig)) != NULL) { 1294 bcopy(mp_orig->b_rptr, mp->b_rptr, hdrsz); 1295 mp->b_wptr += hdrsz; 1296 mp->b_cont = mp_orig; 1297 mp_orig->b_rptr += hdrsz; 1298 if (is_system_labeled() && DB_CRED(mp_orig) != NULL) 1299 mblk_setcred(mp, DB_CRED(mp_orig)); 1300 if (MBLKL(mp_orig) == 0) { 1301 mp->b_cont = mp_orig->b_cont; 1302 mp_orig->b_cont = NULL; 1303 freeb(mp_orig); 1304 } 1305 } else if (mp != NULL) { 1306 freeb(mp); 1307 mp = NULL; 1308 } 1309 } else { 1310 mp = ip_copymsg(mp_orig); /* No refcnt on ipsec_out netstack */ 1311 } 1312 1313 if (mp == NULL) 1314 return; 1315 if (DB_TYPE(mp) == M_CTL) { 1316 ipsec_mp = mp; 1317 mp = mp->b_cont; 1318 } else { 1319 ipsec_mp = mp; 1320 } 1321 1322 iph = (ipha_t *)mp->b_rptr; 1323 1324 /* 1325 * DTrace this as ip:::send. A blocked packet will fire the send 1326 * probe, but not the receive probe. 1327 */ 1328 DTRACE_IP7(send, mblk_t *, ipsec_mp, conn_t *, NULL, void_ip_t *, iph, 1329 __dtrace_ipsr_ill_t *, ill, ipha_t *, iph, ip6_t *, NULL, int, 1); 1330 1331 DTRACE_PROBE4(ip4__loopback__out__start, 1332 ill_t *, NULL, ill_t *, ill, 1333 ipha_t *, iph, mblk_t *, ipsec_mp); 1334 1335 FW_HOOKS(ipst->ips_ip4_loopback_out_event, 1336 ipst->ips_ipv4firewall_loopback_out, 1337 NULL, ill, iph, ipsec_mp, mp, HPE_MULTICAST, ipst); 1338 1339 DTRACE_PROBE1(ip4__loopback__out__end, mblk_t *, ipsec_mp); 1340 1341 if (ipsec_mp != NULL) 1342 ip_wput_local(q, ill, iph, ipsec_mp, NULL, 1343 fanout_flags, zoneid); 1344 } 1345 1346 static area_t ip_aresq_template = { 1347 AR_ENTRY_SQUERY, /* cmd */ 1348 sizeof (area_t)+IP_ADDR_LEN, /* name offset */ 1349 sizeof (area_t), /* name len (filled by ill_arp_alloc) */ 1350 IP_ARP_PROTO_TYPE, /* protocol, from arps perspective */ 1351 sizeof (area_t), /* proto addr offset */ 1352 IP_ADDR_LEN, /* proto addr_length */ 1353 0, /* proto mask offset */ 1354 /* Rest is initialized when used */ 1355 0, /* flags */ 1356 0, /* hw addr offset */ 1357 0, /* hw addr length */ 1358 }; 1359 1360 static mblk_t * 1361 ill_create_squery(ill_t *ill, ipaddr_t ipaddr, uint32_t addrlen, 1362 uint32_t addroff, mblk_t *mp_tail) 1363 { 1364 mblk_t *mp; 1365 area_t *area; 1366 1367 mp = ill_arp_alloc(ill, (uchar_t *)&ip_aresq_template, 1368 (caddr_t)&ipaddr); 1369 if (!mp) { 1370 freemsg(mp_tail); 1371 return (NULL); 1372 } 1373 area = (area_t *)mp->b_rptr; 1374 area->area_hw_addr_length = addrlen; 1375 area->area_hw_addr_offset = mp->b_wptr - mp->b_rptr + addroff; 1376 /* 1377 * NOTE! 1378 * 1379 * The area_hw_addr_offset, as can be seen, does not hold the 1380 * actual hardware address offset. Rather, it holds the offset 1381 * to the hw addr in the dl_xxx_req in mp_tail, modified by 1382 * adding (mp->b_wptr - mp->b_rptr). This allows the function 1383 * mi_offset_paramc() to find the hardware address in the 1384 * *second* mblk (dl_xxx_req), not this mblk. 1385 * 1386 * Using mi_offset_paramc() is thus the *only* way to access 1387 * the dl_xxx_hw address. 1388 * 1389 * The squery hw address should *not* be accessed. 1390 * 1391 * See ar_entry_squery() in arp.c for an example of how all this works. 1392 */ 1393 1394 mp->b_cont = mp_tail; 1395 return (mp); 1396 } 1397 1398 /* 1399 * Create a DLPI message; for DL_{ENAB,DISAB}MULTI_REQ, room is left for 1400 * the hardware address. 1401 */ 1402 static mblk_t * 1403 ill_create_dl(ill_t *ill, uint32_t dl_primitive, uint32_t length, 1404 uint32_t *addr_lenp, uint32_t *addr_offp) 1405 { 1406 mblk_t *mp; 1407 uint32_t hw_addr_length; 1408 char *cp; 1409 uint32_t offset; 1410 uint32_t size; 1411 1412 *addr_lenp = *addr_offp = 0; 1413 1414 hw_addr_length = ill->ill_phys_addr_length; 1415 if (!hw_addr_length) { 1416 ip0dbg(("ip_create_dl: hw addr length = 0\n")); 1417 return (NULL); 1418 } 1419 1420 size = length; 1421 switch (dl_primitive) { 1422 case DL_ENABMULTI_REQ: 1423 case DL_DISABMULTI_REQ: 1424 size += hw_addr_length; 1425 break; 1426 case DL_PROMISCON_REQ: 1427 case DL_PROMISCOFF_REQ: 1428 break; 1429 default: 1430 return (NULL); 1431 } 1432 mp = allocb(size, BPRI_HI); 1433 if (!mp) 1434 return (NULL); 1435 mp->b_wptr += size; 1436 mp->b_datap->db_type = M_PROTO; 1437 1438 cp = (char *)mp->b_rptr; 1439 offset = length; 1440 1441 switch (dl_primitive) { 1442 case DL_ENABMULTI_REQ: { 1443 dl_enabmulti_req_t *dl = (dl_enabmulti_req_t *)cp; 1444 1445 dl->dl_primitive = dl_primitive; 1446 dl->dl_addr_offset = offset; 1447 *addr_lenp = dl->dl_addr_length = hw_addr_length; 1448 *addr_offp = offset; 1449 break; 1450 } 1451 case DL_DISABMULTI_REQ: { 1452 dl_disabmulti_req_t *dl = (dl_disabmulti_req_t *)cp; 1453 1454 dl->dl_primitive = dl_primitive; 1455 dl->dl_addr_offset = offset; 1456 *addr_lenp = dl->dl_addr_length = hw_addr_length; 1457 *addr_offp = offset; 1458 break; 1459 } 1460 case DL_PROMISCON_REQ: 1461 case DL_PROMISCOFF_REQ: { 1462 dl_promiscon_req_t *dl = (dl_promiscon_req_t *)cp; 1463 1464 dl->dl_primitive = dl_primitive; 1465 dl->dl_level = DL_PROMISC_MULTI; 1466 break; 1467 } 1468 } 1469 ip1dbg(("ill_create_dl: addr_len %d, addr_off %d\n", 1470 *addr_lenp, *addr_offp)); 1471 return (mp); 1472 } 1473 1474 /* 1475 * Writer processing for ip_wput_ctl(): send the DL_{ENAB,DISAB}MULTI_REQ 1476 * messages that had been delayed until we'd heard back from ARP. One catch: 1477 * we need to ensure that no one else becomes writer on the IPSQ before we've 1478 * received the replies, or they'll incorrectly process our replies as part of 1479 * their unrelated IPSQ operation. To do this, we start a new IPSQ operation, 1480 * which will complete when we process the reply in ip_rput_dlpi_writer(). 1481 */ 1482 /* ARGSUSED */ 1483 static void 1484 ip_wput_ctl_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *arg) 1485 { 1486 ill_t *ill = q->q_ptr; 1487 t_uscalar_t prim = ((union DL_primitives *)mp->b_rptr)->dl_primitive; 1488 1489 ASSERT(IAM_WRITER_ILL(ill)); 1490 ASSERT(prim == DL_ENABMULTI_REQ || prim == DL_DISABMULTI_REQ); 1491 ip1dbg(("ip_wput_ctl_writer: %s\n", dl_primstr(prim))); 1492 1493 if (prim == DL_ENABMULTI_REQ) { 1494 /* Track the state if this is the first enabmulti */ 1495 if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN) 1496 ill->ill_dlpi_multicast_state = IDS_INPROGRESS; 1497 } 1498 1499 ipsq_current_start(ipsq, ill->ill_ipif, 0); 1500 ill_dlpi_send(ill, mp); 1501 } 1502 1503 void 1504 ip_wput_ctl(queue_t *q, mblk_t *mp) 1505 { 1506 ill_t *ill = q->q_ptr; 1507 mblk_t *dlmp = mp->b_cont; 1508 area_t *area = (area_t *)mp->b_rptr; 1509 t_uscalar_t prim; 1510 1511 /* Check that we have an AR_ENTRY_SQUERY with a tacked on mblk */ 1512 if (MBLKL(mp) < sizeof (area_t) || area->area_cmd != AR_ENTRY_SQUERY || 1513 dlmp == NULL) { 1514 putnext(q, mp); 1515 return; 1516 } 1517 1518 /* Check that the tacked on mblk is a DL_{DISAB,ENAB}MULTI_REQ */ 1519 prim = ((union DL_primitives *)dlmp->b_rptr)->dl_primitive; 1520 if (prim != DL_DISABMULTI_REQ && prim != DL_ENABMULTI_REQ) { 1521 putnext(q, mp); 1522 return; 1523 } 1524 freeb(mp); 1525 1526 /* See comments above ip_wput_ctl_writer() for details */ 1527 ill_refhold(ill); 1528 qwriter_ip(ill, ill->ill_wq, dlmp, ip_wput_ctl_writer, NEW_OP, B_FALSE); 1529 } 1530 1531 /* 1532 * Rejoin any groups which have been explicitly joined by the application (we 1533 * left all explicitly joined groups as part of ill_leave_multicast() prior to 1534 * bringing the interface down). Note that because groups can be joined and 1535 * left while an interface is down, this may not be the same set of groups 1536 * that we left in ill_leave_multicast(). 1537 */ 1538 void 1539 ill_recover_multicast(ill_t *ill) 1540 { 1541 ilm_t *ilm; 1542 char addrbuf[INET6_ADDRSTRLEN]; 1543 1544 ASSERT(IAM_WRITER_ILL(ill)); 1545 1546 ill->ill_need_recover_multicast = 0; 1547 1548 ILM_WALKER_HOLD(ill); 1549 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1550 /* 1551 * Check how many ipif's that have members in this group - 1552 * if more then one we make sure that this entry is first 1553 * in the list. 1554 */ 1555 if (ilm_numentries_v6(ill, &ilm->ilm_v6addr) > 1 && 1556 ilm_lookup_ill_v6(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm) 1557 continue; 1558 ip1dbg(("ill_recover_multicast: %s\n", 1559 inet_ntop(AF_INET6, &ilm->ilm_v6addr, addrbuf, 1560 sizeof (addrbuf)))); 1561 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) { 1562 if (ill->ill_group == NULL) { 1563 (void) ill_join_allmulti(ill); 1564 } else { 1565 /* 1566 * We don't want to join on this ill, 1567 * if somebody else in the group has 1568 * already been nominated. 1569 */ 1570 (void) ill_nominate_mcast_rcv(ill->ill_group); 1571 } 1572 } else { 1573 (void) ip_ll_addmulti_v6(ill->ill_ipif, 1574 &ilm->ilm_v6addr); 1575 } 1576 } 1577 ILM_WALKER_RELE(ill); 1578 } 1579 1580 /* 1581 * The opposite of ill_recover_multicast() -- leaves all multicast groups 1582 * that were explicitly joined. Note that both these functions could be 1583 * disposed of if we enhanced ARP to allow us to handle DL_DISABMULTI_REQ 1584 * and DL_ENABMULTI_REQ messages when an interface is down. 1585 */ 1586 void 1587 ill_leave_multicast(ill_t *ill) 1588 { 1589 ilm_t *ilm; 1590 char addrbuf[INET6_ADDRSTRLEN]; 1591 1592 ASSERT(IAM_WRITER_ILL(ill)); 1593 1594 ill->ill_need_recover_multicast = 1; 1595 1596 ILM_WALKER_HOLD(ill); 1597 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1598 /* 1599 * Check how many ipif's that have members in this group - 1600 * if more then one we make sure that this entry is first 1601 * in the list. 1602 */ 1603 if (ilm_numentries_v6(ill, &ilm->ilm_v6addr) > 1 && 1604 ilm_lookup_ill_v6(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm) 1605 continue; 1606 ip1dbg(("ill_leave_multicast: %s\n", 1607 inet_ntop(AF_INET6, &ilm->ilm_v6addr, addrbuf, 1608 sizeof (addrbuf)))); 1609 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) { 1610 ill_leave_allmulti(ill); 1611 /* 1612 * If we were part of an IPMP group, then 1613 * ill_handoff_responsibility() has already 1614 * nominated a new member (so we don't). 1615 */ 1616 ASSERT(ill->ill_group == NULL); 1617 } else { 1618 (void) ip_ll_delmulti_v6(ill->ill_ipif, 1619 &ilm->ilm_v6addr); 1620 } 1621 } 1622 ILM_WALKER_RELE(ill); 1623 } 1624 1625 /* Find an ilm for matching the ill */ 1626 ilm_t * 1627 ilm_lookup_ill(ill_t *ill, ipaddr_t group, zoneid_t zoneid) 1628 { 1629 in6_addr_t v6group; 1630 1631 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock)); 1632 /* 1633 * INADDR_ANY is represented as the IPv6 unspecifed addr. 1634 */ 1635 if (group == INADDR_ANY) 1636 v6group = ipv6_all_zeros; 1637 else 1638 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 1639 1640 return (ilm_lookup_ill_v6(ill, &v6group, zoneid)); 1641 } 1642 1643 /* 1644 * Find an ilm for matching the ill. All the ilm lookup functions 1645 * ignore ILM_DELETED ilms. These have been logically deleted, and 1646 * igmp and linklayer disable multicast have been done. Only mi_free 1647 * yet to be done. Still there in the list due to ilm_walkers. The 1648 * last walker will release it. 1649 */ 1650 ilm_t * 1651 ilm_lookup_ill_v6(ill_t *ill, const in6_addr_t *v6group, zoneid_t zoneid) 1652 { 1653 ilm_t *ilm; 1654 1655 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock)); 1656 1657 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1658 if (ilm->ilm_flags & ILM_DELETED) 1659 continue; 1660 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) && 1661 (zoneid == ALL_ZONES || zoneid == ilm->ilm_zoneid)) 1662 return (ilm); 1663 } 1664 return (NULL); 1665 } 1666 1667 ilm_t * 1668 ilm_lookup_ill_index_v6(ill_t *ill, const in6_addr_t *v6group, int index, 1669 zoneid_t zoneid) 1670 { 1671 ilm_t *ilm; 1672 1673 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock)); 1674 1675 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1676 if (ilm->ilm_flags & ILM_DELETED) 1677 continue; 1678 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) && 1679 (zoneid == ALL_ZONES || zoneid == ilm->ilm_zoneid) && 1680 ilm->ilm_orig_ifindex == index) { 1681 return (ilm); 1682 } 1683 } 1684 return (NULL); 1685 } 1686 1687 1688 /* 1689 * Found an ilm for the ipif. Only needed for IPv4 which does 1690 * ipif specific socket options. 1691 */ 1692 ilm_t * 1693 ilm_lookup_ipif(ipif_t *ipif, ipaddr_t group) 1694 { 1695 ill_t *ill = ipif->ipif_ill; 1696 ilm_t *ilm; 1697 in6_addr_t v6group; 1698 1699 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock)); 1700 /* 1701 * INADDR_ANY is represented as the IPv6 unspecifed addr. 1702 */ 1703 if (group == INADDR_ANY) 1704 v6group = ipv6_all_zeros; 1705 else 1706 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 1707 1708 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1709 if (ilm->ilm_flags & ILM_DELETED) 1710 continue; 1711 if (ilm->ilm_ipif == ipif && 1712 IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, &v6group)) 1713 return (ilm); 1714 } 1715 return (NULL); 1716 } 1717 1718 /* 1719 * How many members on this ill? 1720 */ 1721 int 1722 ilm_numentries_v6(ill_t *ill, const in6_addr_t *v6group) 1723 { 1724 ilm_t *ilm; 1725 int i = 0; 1726 1727 mutex_enter(&ill->ill_lock); 1728 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1729 if (ilm->ilm_flags & ILM_DELETED) 1730 continue; 1731 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group)) { 1732 i++; 1733 } 1734 } 1735 mutex_exit(&ill->ill_lock); 1736 return (i); 1737 } 1738 1739 /* Caller guarantees that the group is not already on the list */ 1740 static ilm_t * 1741 ilm_add_v6(ipif_t *ipif, const in6_addr_t *v6group, ilg_stat_t ilgstat, 1742 mcast_record_t ilg_fmode, slist_t *ilg_flist, int orig_ifindex, 1743 zoneid_t zoneid) 1744 { 1745 ill_t *ill = ipif->ipif_ill; 1746 ilm_t *ilm; 1747 ilm_t *ilm_cur; 1748 ilm_t **ilm_ptpn; 1749 1750 ASSERT(IAM_WRITER_IPIF(ipif)); 1751 1752 ilm = GETSTRUCT(ilm_t, 1); 1753 if (ilm == NULL) 1754 return (NULL); 1755 if (ilgstat != ILGSTAT_NONE && !SLIST_IS_EMPTY(ilg_flist)) { 1756 ilm->ilm_filter = l_alloc(); 1757 if (ilm->ilm_filter == NULL) { 1758 mi_free(ilm); 1759 return (NULL); 1760 } 1761 } 1762 ilm->ilm_v6addr = *v6group; 1763 ilm->ilm_refcnt = 1; 1764 ilm->ilm_zoneid = zoneid; 1765 ilm->ilm_timer = INFINITY; 1766 ilm->ilm_rtx.rtx_timer = INFINITY; 1767 1768 /* 1769 * IPv4 Multicast groups are joined using ipif. 1770 * IPv6 Multicast groups are joined using ill. 1771 */ 1772 if (ill->ill_isv6) { 1773 ilm->ilm_ill = ill; 1774 ilm->ilm_ipif = NULL; 1775 DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ill, 1776 (char *), "ilm", (void *), ilm); 1777 ill->ill_ilm_cnt++; 1778 } else { 1779 ASSERT(ilm->ilm_zoneid == ipif->ipif_zoneid); 1780 ilm->ilm_ipif = ipif; 1781 ilm->ilm_ill = NULL; 1782 DTRACE_PROBE3(ipif__incr__cnt, (ipif_t *), ipif, 1783 (char *), "ilm", (void *), ilm); 1784 ipif->ipif_ilm_cnt++; 1785 } 1786 ASSERT(ill->ill_ipst); 1787 ilm->ilm_ipst = ill->ill_ipst; /* No netstack_hold */ 1788 1789 /* 1790 * After this if ilm moves to a new ill, we don't change 1791 * the ilm_orig_ifindex. Thus, if ill_index != ilm_orig_ifindex, 1792 * it has been moved. Indexes don't match even when the application 1793 * wants to join on a FAILED/INACTIVE interface because we choose 1794 * a new interface to join in. This is considered as an implicit 1795 * move. 1796 */ 1797 ilm->ilm_orig_ifindex = orig_ifindex; 1798 1799 ASSERT(!(ipif->ipif_state_flags & IPIF_CONDEMNED)); 1800 ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED)); 1801 1802 /* 1803 * Grab lock to give consistent view to readers 1804 */ 1805 mutex_enter(&ill->ill_lock); 1806 /* 1807 * All ilms in the same zone are contiguous in the ill_ilm list. 1808 * The loops in ip_proto_input() and ip_wput_local() use this to avoid 1809 * sending duplicates up when two applications in the same zone join the 1810 * same group on different logical interfaces. 1811 */ 1812 ilm_cur = ill->ill_ilm; 1813 ilm_ptpn = &ill->ill_ilm; 1814 while (ilm_cur != NULL && ilm_cur->ilm_zoneid != ilm->ilm_zoneid) { 1815 ilm_ptpn = &ilm_cur->ilm_next; 1816 ilm_cur = ilm_cur->ilm_next; 1817 } 1818 ilm->ilm_next = ilm_cur; 1819 *ilm_ptpn = ilm; 1820 1821 /* 1822 * If we have an associated ilg, use its filter state; if not, 1823 * default to (EXCLUDE, NULL) and set no_ilg_cnt to track this. 1824 */ 1825 if (ilgstat != ILGSTAT_NONE) { 1826 if (!SLIST_IS_EMPTY(ilg_flist)) 1827 l_copy(ilg_flist, ilm->ilm_filter); 1828 ilm->ilm_fmode = ilg_fmode; 1829 } else { 1830 ilm->ilm_no_ilg_cnt = 1; 1831 ilm->ilm_fmode = MODE_IS_EXCLUDE; 1832 } 1833 1834 mutex_exit(&ill->ill_lock); 1835 return (ilm); 1836 } 1837 1838 void 1839 ilm_inactive(ilm_t *ilm) 1840 { 1841 FREE_SLIST(ilm->ilm_filter); 1842 FREE_SLIST(ilm->ilm_pendsrcs); 1843 FREE_SLIST(ilm->ilm_rtx.rtx_allow); 1844 FREE_SLIST(ilm->ilm_rtx.rtx_block); 1845 ilm->ilm_ipst = NULL; 1846 mi_free((char *)ilm); 1847 } 1848 1849 void 1850 ilm_walker_cleanup(ill_t *ill) 1851 { 1852 ilm_t **ilmp; 1853 ilm_t *ilm; 1854 boolean_t need_wakeup = B_FALSE; 1855 1856 ASSERT(MUTEX_HELD(&ill->ill_lock)); 1857 ASSERT(ill->ill_ilm_walker_cnt == 0); 1858 1859 ilmp = &ill->ill_ilm; 1860 while (*ilmp != NULL) { 1861 if ((*ilmp)->ilm_flags & ILM_DELETED) { 1862 ilm = *ilmp; 1863 *ilmp = ilm->ilm_next; 1864 /* 1865 * check if there are any pending FREE or unplumb 1866 * operations that need to be restarted. 1867 */ 1868 if (ilm->ilm_ipif != NULL) { 1869 /* 1870 * IPv4 ilms hold a ref on the ipif. 1871 */ 1872 DTRACE_PROBE3(ipif__decr__cnt, 1873 (ipif_t *), ilm->ilm_ipif, 1874 (char *), "ilm", (void *), ilm); 1875 ilm->ilm_ipif->ipif_ilm_cnt--; 1876 if (IPIF_FREE_OK(ilm->ilm_ipif)) 1877 need_wakeup = B_TRUE; 1878 } else { 1879 /* 1880 * IPv6 ilms hold a ref on the ill. 1881 */ 1882 ASSERT(ilm->ilm_ill == ill); 1883 DTRACE_PROBE3(ill__decr__cnt, 1884 (ill_t *), ill, 1885 (char *), "ilm", (void *), ilm); 1886 ASSERT(ill->ill_ilm_cnt > 0); 1887 ill->ill_ilm_cnt--; 1888 if (ILL_FREE_OK(ill)) 1889 need_wakeup = B_TRUE; 1890 } 1891 ilm_inactive(ilm); /* frees ilm */ 1892 } else { 1893 ilmp = &(*ilmp)->ilm_next; 1894 } 1895 } 1896 ill->ill_ilm_cleanup_reqd = 0; 1897 if (need_wakeup) 1898 ipif_ill_refrele_tail(ill); 1899 else 1900 mutex_exit(&ill->ill_lock); 1901 } 1902 1903 /* 1904 * Unlink ilm and free it. 1905 */ 1906 static void 1907 ilm_delete(ilm_t *ilm) 1908 { 1909 ill_t *ill; 1910 ilm_t **ilmp; 1911 boolean_t need_wakeup; 1912 1913 1914 if (ilm->ilm_ipif != NULL) { 1915 ASSERT(IAM_WRITER_IPIF(ilm->ilm_ipif)); 1916 ASSERT(ilm->ilm_ill == NULL); 1917 ill = ilm->ilm_ipif->ipif_ill; 1918 ASSERT(!ill->ill_isv6); 1919 } else { 1920 ASSERT(IAM_WRITER_ILL(ilm->ilm_ill)); 1921 ASSERT(ilm->ilm_ipif == NULL); 1922 ill = ilm->ilm_ill; 1923 ASSERT(ill->ill_isv6); 1924 } 1925 /* 1926 * Delete under lock protection so that readers don't stumble 1927 * on bad ilm_next 1928 */ 1929 mutex_enter(&ill->ill_lock); 1930 if (ill->ill_ilm_walker_cnt != 0) { 1931 ilm->ilm_flags |= ILM_DELETED; 1932 ill->ill_ilm_cleanup_reqd = 1; 1933 mutex_exit(&ill->ill_lock); 1934 return; 1935 } 1936 1937 for (ilmp = &ill->ill_ilm; *ilmp != ilm; ilmp = &(*ilmp)->ilm_next) 1938 ; 1939 *ilmp = ilm->ilm_next; 1940 1941 /* 1942 * if we are the last reference to the ipif (for IPv4 ilms) 1943 * or the ill (for IPv6 ilms), we may need to wakeup any 1944 * pending FREE or unplumb operations. 1945 */ 1946 need_wakeup = B_FALSE; 1947 if (ilm->ilm_ipif != NULL) { 1948 DTRACE_PROBE3(ipif__decr__cnt, (ipif_t *), ilm->ilm_ipif, 1949 (char *), "ilm", (void *), ilm); 1950 ilm->ilm_ipif->ipif_ilm_cnt--; 1951 if (IPIF_FREE_OK(ilm->ilm_ipif)) 1952 need_wakeup = B_TRUE; 1953 } else { 1954 DTRACE_PROBE3(ill__decr__cnt, (ill_t *), ill, 1955 (char *), "ilm", (void *), ilm); 1956 ASSERT(ill->ill_ilm_cnt > 0); 1957 ill->ill_ilm_cnt--; 1958 if (ILL_FREE_OK(ill)) 1959 need_wakeup = B_TRUE; 1960 } 1961 1962 ilm_inactive(ilm); /* frees this ilm */ 1963 1964 if (need_wakeup) { 1965 /* drops ill lock */ 1966 ipif_ill_refrele_tail(ill); 1967 } else { 1968 mutex_exit(&ill->ill_lock); 1969 } 1970 } 1971 1972 1973 /* 1974 * Looks up the appropriate ipif given a v4 multicast group and interface 1975 * address. On success, returns 0, with *ipifpp pointing to the found 1976 * struct. On failure, returns an errno and *ipifpp is NULL. 1977 */ 1978 int 1979 ip_opt_check(conn_t *connp, ipaddr_t group, ipaddr_t src, ipaddr_t ifaddr, 1980 uint_t *ifindexp, mblk_t *first_mp, ipsq_func_t func, ipif_t **ipifpp) 1981 { 1982 ipif_t *ipif; 1983 int err = 0; 1984 zoneid_t zoneid; 1985 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1986 1987 if (!CLASSD(group) || CLASSD(src)) { 1988 return (EINVAL); 1989 } 1990 *ipifpp = NULL; 1991 1992 zoneid = IPCL_ZONEID(connp); 1993 1994 ASSERT(!(ifaddr != INADDR_ANY && ifindexp != NULL && *ifindexp != 0)); 1995 if (ifaddr != INADDR_ANY) { 1996 ipif = ipif_lookup_addr(ifaddr, NULL, zoneid, 1997 CONNP_TO_WQ(connp), first_mp, func, &err, ipst); 1998 if (err != 0 && err != EINPROGRESS) 1999 err = EADDRNOTAVAIL; 2000 } else if (ifindexp != NULL && *ifindexp != 0) { 2001 ipif = ipif_lookup_on_ifindex(*ifindexp, B_FALSE, zoneid, 2002 CONNP_TO_WQ(connp), first_mp, func, &err, ipst); 2003 } else { 2004 ipif = ipif_lookup_group(group, zoneid, ipst); 2005 if (ipif == NULL) 2006 return (EADDRNOTAVAIL); 2007 } 2008 if (ipif == NULL) 2009 return (err); 2010 2011 *ipifpp = ipif; 2012 return (0); 2013 } 2014 2015 /* 2016 * Looks up the appropriate ill (or ipif if v4mapped) given an interface 2017 * index and IPv6 multicast group. On success, returns 0, with *illpp (or 2018 * *ipifpp if v4mapped) pointing to the found struct. On failure, returns 2019 * an errno and *illpp and *ipifpp are undefined. 2020 */ 2021 int 2022 ip_opt_check_v6(conn_t *connp, const in6_addr_t *v6group, ipaddr_t *v4group, 2023 const in6_addr_t *v6src, ipaddr_t *v4src, boolean_t *isv6, int ifindex, 2024 mblk_t *first_mp, ipsq_func_t func, ill_t **illpp, ipif_t **ipifpp) 2025 { 2026 boolean_t src_unspec; 2027 ill_t *ill = NULL; 2028 ipif_t *ipif = NULL; 2029 int err; 2030 zoneid_t zoneid = connp->conn_zoneid; 2031 queue_t *wq = CONNP_TO_WQ(connp); 2032 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 2033 2034 src_unspec = IN6_IS_ADDR_UNSPECIFIED(v6src); 2035 2036 if (IN6_IS_ADDR_V4MAPPED(v6group)) { 2037 if (!IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec) 2038 return (EINVAL); 2039 IN6_V4MAPPED_TO_IPADDR(v6group, *v4group); 2040 if (src_unspec) { 2041 *v4src = INADDR_ANY; 2042 } else { 2043 IN6_V4MAPPED_TO_IPADDR(v6src, *v4src); 2044 } 2045 if (!CLASSD(*v4group) || CLASSD(*v4src)) 2046 return (EINVAL); 2047 *ipifpp = NULL; 2048 *isv6 = B_FALSE; 2049 } else { 2050 if (IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec) 2051 return (EINVAL); 2052 if (!IN6_IS_ADDR_MULTICAST(v6group) || 2053 IN6_IS_ADDR_MULTICAST(v6src)) { 2054 return (EINVAL); 2055 } 2056 *illpp = NULL; 2057 *isv6 = B_TRUE; 2058 } 2059 2060 if (ifindex == 0) { 2061 if (*isv6) 2062 ill = ill_lookup_group_v6(v6group, zoneid, ipst); 2063 else 2064 ipif = ipif_lookup_group(*v4group, zoneid, ipst); 2065 if (ill == NULL && ipif == NULL) 2066 return (EADDRNOTAVAIL); 2067 } else { 2068 if (*isv6) { 2069 ill = ill_lookup_on_ifindex(ifindex, B_TRUE, 2070 wq, first_mp, func, &err, ipst); 2071 if (ill != NULL && 2072 !ipif_lookup_zoneid(ill, zoneid, 0, NULL)) { 2073 ill_refrele(ill); 2074 ill = NULL; 2075 err = EADDRNOTAVAIL; 2076 } 2077 } else { 2078 ipif = ipif_lookup_on_ifindex(ifindex, B_FALSE, 2079 zoneid, wq, first_mp, func, &err, ipst); 2080 } 2081 if (ill == NULL && ipif == NULL) 2082 return (err); 2083 } 2084 2085 *ipifpp = ipif; 2086 *illpp = ill; 2087 return (0); 2088 } 2089 2090 static int 2091 ip_get_srcfilter(conn_t *connp, struct group_filter *gf, 2092 struct ip_msfilter *imsf, ipaddr_t grp, ipif_t *ipif, boolean_t isv4mapped) 2093 { 2094 ilg_t *ilg; 2095 int i, numsrc, fmode, outsrcs; 2096 struct sockaddr_in *sin; 2097 struct sockaddr_in6 *sin6; 2098 struct in_addr *addrp; 2099 slist_t *fp; 2100 boolean_t is_v4only_api; 2101 2102 mutex_enter(&connp->conn_lock); 2103 2104 ilg = ilg_lookup_ipif(connp, grp, ipif); 2105 if (ilg == NULL) { 2106 mutex_exit(&connp->conn_lock); 2107 return (EADDRNOTAVAIL); 2108 } 2109 2110 if (gf == NULL) { 2111 ASSERT(imsf != NULL); 2112 ASSERT(!isv4mapped); 2113 is_v4only_api = B_TRUE; 2114 outsrcs = imsf->imsf_numsrc; 2115 } else { 2116 ASSERT(imsf == NULL); 2117 is_v4only_api = B_FALSE; 2118 outsrcs = gf->gf_numsrc; 2119 } 2120 2121 /* 2122 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 2123 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 2124 * So we need to translate here. 2125 */ 2126 fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ? 2127 MCAST_INCLUDE : MCAST_EXCLUDE; 2128 if ((fp = ilg->ilg_filter) == NULL) { 2129 numsrc = 0; 2130 } else { 2131 for (i = 0; i < outsrcs; i++) { 2132 if (i == fp->sl_numsrc) 2133 break; 2134 if (isv4mapped) { 2135 sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i]; 2136 sin6->sin6_family = AF_INET6; 2137 sin6->sin6_addr = fp->sl_addr[i]; 2138 } else { 2139 if (is_v4only_api) { 2140 addrp = &imsf->imsf_slist[i]; 2141 } else { 2142 sin = (struct sockaddr_in *) 2143 &gf->gf_slist[i]; 2144 sin->sin_family = AF_INET; 2145 addrp = &sin->sin_addr; 2146 } 2147 IN6_V4MAPPED_TO_INADDR(&fp->sl_addr[i], addrp); 2148 } 2149 } 2150 numsrc = fp->sl_numsrc; 2151 } 2152 2153 if (is_v4only_api) { 2154 imsf->imsf_numsrc = numsrc; 2155 imsf->imsf_fmode = fmode; 2156 } else { 2157 gf->gf_numsrc = numsrc; 2158 gf->gf_fmode = fmode; 2159 } 2160 2161 mutex_exit(&connp->conn_lock); 2162 2163 return (0); 2164 } 2165 2166 static int 2167 ip_get_srcfilter_v6(conn_t *connp, struct group_filter *gf, 2168 const struct in6_addr *grp, ill_t *ill) 2169 { 2170 ilg_t *ilg; 2171 int i; 2172 struct sockaddr_storage *sl; 2173 struct sockaddr_in6 *sin6; 2174 slist_t *fp; 2175 2176 mutex_enter(&connp->conn_lock); 2177 2178 ilg = ilg_lookup_ill_v6(connp, grp, ill); 2179 if (ilg == NULL) { 2180 mutex_exit(&connp->conn_lock); 2181 return (EADDRNOTAVAIL); 2182 } 2183 2184 /* 2185 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 2186 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 2187 * So we need to translate here. 2188 */ 2189 gf->gf_fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ? 2190 MCAST_INCLUDE : MCAST_EXCLUDE; 2191 if ((fp = ilg->ilg_filter) == NULL) { 2192 gf->gf_numsrc = 0; 2193 } else { 2194 for (i = 0, sl = gf->gf_slist; i < gf->gf_numsrc; i++, sl++) { 2195 if (i == fp->sl_numsrc) 2196 break; 2197 sin6 = (struct sockaddr_in6 *)sl; 2198 sin6->sin6_family = AF_INET6; 2199 sin6->sin6_addr = fp->sl_addr[i]; 2200 } 2201 gf->gf_numsrc = fp->sl_numsrc; 2202 } 2203 2204 mutex_exit(&connp->conn_lock); 2205 2206 return (0); 2207 } 2208 2209 static int 2210 ip_set_srcfilter(conn_t *connp, struct group_filter *gf, 2211 struct ip_msfilter *imsf, ipaddr_t grp, ipif_t *ipif, boolean_t isv4mapped) 2212 { 2213 ilg_t *ilg; 2214 int i, err, infmode, new_fmode; 2215 uint_t insrcs; 2216 struct sockaddr_in *sin; 2217 struct sockaddr_in6 *sin6; 2218 struct in_addr *addrp; 2219 slist_t *orig_filter = NULL; 2220 slist_t *new_filter = NULL; 2221 mcast_record_t orig_fmode; 2222 boolean_t leave_grp, is_v4only_api; 2223 ilg_stat_t ilgstat; 2224 2225 if (gf == NULL) { 2226 ASSERT(imsf != NULL); 2227 ASSERT(!isv4mapped); 2228 is_v4only_api = B_TRUE; 2229 insrcs = imsf->imsf_numsrc; 2230 infmode = imsf->imsf_fmode; 2231 } else { 2232 ASSERT(imsf == NULL); 2233 is_v4only_api = B_FALSE; 2234 insrcs = gf->gf_numsrc; 2235 infmode = gf->gf_fmode; 2236 } 2237 2238 /* Make sure we can handle the source list */ 2239 if (insrcs > MAX_FILTER_SIZE) 2240 return (ENOBUFS); 2241 2242 /* 2243 * setting the filter to (INCLUDE, NULL) is treated 2244 * as a request to leave the group. 2245 */ 2246 leave_grp = (infmode == MCAST_INCLUDE && insrcs == 0); 2247 2248 ASSERT(IAM_WRITER_IPIF(ipif)); 2249 2250 mutex_enter(&connp->conn_lock); 2251 2252 ilg = ilg_lookup_ipif(connp, grp, ipif); 2253 if (ilg == NULL) { 2254 /* 2255 * if the request was actually to leave, and we 2256 * didn't find an ilg, there's nothing to do. 2257 */ 2258 if (!leave_grp) 2259 ilg = conn_ilg_alloc(connp); 2260 if (leave_grp || ilg == NULL) { 2261 mutex_exit(&connp->conn_lock); 2262 return (leave_grp ? 0 : ENOMEM); 2263 } 2264 ilgstat = ILGSTAT_NEW; 2265 IN6_IPADDR_TO_V4MAPPED(grp, &ilg->ilg_v6group); 2266 ilg->ilg_ipif = ipif; 2267 ilg->ilg_ill = NULL; 2268 ilg->ilg_orig_ifindex = 0; 2269 } else if (leave_grp) { 2270 ilg_delete(connp, ilg, NULL); 2271 mutex_exit(&connp->conn_lock); 2272 (void) ip_delmulti(grp, ipif, B_FALSE, B_TRUE); 2273 return (0); 2274 } else { 2275 ilgstat = ILGSTAT_CHANGE; 2276 /* Preserve existing state in case ip_addmulti() fails */ 2277 orig_fmode = ilg->ilg_fmode; 2278 if (ilg->ilg_filter == NULL) { 2279 orig_filter = NULL; 2280 } else { 2281 orig_filter = l_alloc_copy(ilg->ilg_filter); 2282 if (orig_filter == NULL) { 2283 mutex_exit(&connp->conn_lock); 2284 return (ENOMEM); 2285 } 2286 } 2287 } 2288 2289 /* 2290 * Alloc buffer to copy new state into (see below) before 2291 * we make any changes, so we can bail if it fails. 2292 */ 2293 if ((new_filter = l_alloc()) == NULL) { 2294 mutex_exit(&connp->conn_lock); 2295 err = ENOMEM; 2296 goto free_and_exit; 2297 } 2298 2299 if (insrcs == 0) { 2300 CLEAR_SLIST(ilg->ilg_filter); 2301 } else { 2302 slist_t *fp; 2303 if (ilg->ilg_filter == NULL) { 2304 fp = l_alloc(); 2305 if (fp == NULL) { 2306 if (ilgstat == ILGSTAT_NEW) 2307 ilg_delete(connp, ilg, NULL); 2308 mutex_exit(&connp->conn_lock); 2309 err = ENOMEM; 2310 goto free_and_exit; 2311 } 2312 } else { 2313 fp = ilg->ilg_filter; 2314 } 2315 for (i = 0; i < insrcs; i++) { 2316 if (isv4mapped) { 2317 sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i]; 2318 fp->sl_addr[i] = sin6->sin6_addr; 2319 } else { 2320 if (is_v4only_api) { 2321 addrp = &imsf->imsf_slist[i]; 2322 } else { 2323 sin = (struct sockaddr_in *) 2324 &gf->gf_slist[i]; 2325 addrp = &sin->sin_addr; 2326 } 2327 IN6_INADDR_TO_V4MAPPED(addrp, &fp->sl_addr[i]); 2328 } 2329 } 2330 fp->sl_numsrc = insrcs; 2331 ilg->ilg_filter = fp; 2332 } 2333 /* 2334 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 2335 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 2336 * So we need to translate here. 2337 */ 2338 ilg->ilg_fmode = (infmode == MCAST_INCLUDE) ? 2339 MODE_IS_INCLUDE : MODE_IS_EXCLUDE; 2340 2341 /* 2342 * Save copy of ilg's filter state to pass to other functions, 2343 * so we can release conn_lock now. 2344 */ 2345 new_fmode = ilg->ilg_fmode; 2346 l_copy(ilg->ilg_filter, new_filter); 2347 2348 mutex_exit(&connp->conn_lock); 2349 2350 err = ip_addmulti(grp, ipif, ilgstat, new_fmode, new_filter); 2351 if (err != 0) { 2352 /* 2353 * Restore the original filter state, or delete the 2354 * newly-created ilg. We need to look up the ilg 2355 * again, though, since we've not been holding the 2356 * conn_lock. 2357 */ 2358 mutex_enter(&connp->conn_lock); 2359 ilg = ilg_lookup_ipif(connp, grp, ipif); 2360 ASSERT(ilg != NULL); 2361 if (ilgstat == ILGSTAT_NEW) { 2362 ilg_delete(connp, ilg, NULL); 2363 } else { 2364 ilg->ilg_fmode = orig_fmode; 2365 if (SLIST_IS_EMPTY(orig_filter)) { 2366 CLEAR_SLIST(ilg->ilg_filter); 2367 } else { 2368 /* 2369 * We didn't free the filter, even if we 2370 * were trying to make the source list empty; 2371 * so if orig_filter isn't empty, the ilg 2372 * must still have a filter alloc'd. 2373 */ 2374 l_copy(orig_filter, ilg->ilg_filter); 2375 } 2376 } 2377 mutex_exit(&connp->conn_lock); 2378 } 2379 2380 free_and_exit: 2381 l_free(orig_filter); 2382 l_free(new_filter); 2383 2384 return (err); 2385 } 2386 2387 static int 2388 ip_set_srcfilter_v6(conn_t *connp, struct group_filter *gf, 2389 const struct in6_addr *grp, ill_t *ill) 2390 { 2391 ilg_t *ilg; 2392 int i, orig_ifindex, orig_fmode, new_fmode, err; 2393 slist_t *orig_filter = NULL; 2394 slist_t *new_filter = NULL; 2395 struct sockaddr_storage *sl; 2396 struct sockaddr_in6 *sin6; 2397 boolean_t leave_grp; 2398 ilg_stat_t ilgstat; 2399 2400 /* Make sure we can handle the source list */ 2401 if (gf->gf_numsrc > MAX_FILTER_SIZE) 2402 return (ENOBUFS); 2403 2404 /* 2405 * setting the filter to (INCLUDE, NULL) is treated 2406 * as a request to leave the group. 2407 */ 2408 leave_grp = (gf->gf_fmode == MCAST_INCLUDE && gf->gf_numsrc == 0); 2409 2410 ASSERT(IAM_WRITER_ILL(ill)); 2411 2412 /* 2413 * Use the ifindex to do the lookup. We can't use the ill 2414 * directly because ilg_ill could point to a different ill 2415 * if things have moved. 2416 */ 2417 orig_ifindex = ill->ill_phyint->phyint_ifindex; 2418 2419 mutex_enter(&connp->conn_lock); 2420 ilg = ilg_lookup_ill_index_v6(connp, grp, orig_ifindex); 2421 if (ilg == NULL) { 2422 /* 2423 * if the request was actually to leave, and we 2424 * didn't find an ilg, there's nothing to do. 2425 */ 2426 if (!leave_grp) 2427 ilg = conn_ilg_alloc(connp); 2428 if (leave_grp || ilg == NULL) { 2429 mutex_exit(&connp->conn_lock); 2430 return (leave_grp ? 0 : ENOMEM); 2431 } 2432 ilgstat = ILGSTAT_NEW; 2433 ilg->ilg_v6group = *grp; 2434 ilg->ilg_ipif = NULL; 2435 /* 2436 * Choose our target ill to join on. This might be 2437 * different from the ill we've been given if it's 2438 * currently down and part of a group. 2439 * 2440 * new ill is not refheld; we are writer. 2441 */ 2442 ill = ip_choose_multi_ill(ill, grp); 2443 ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED)); 2444 ilg->ilg_ill = ill; 2445 /* 2446 * Remember the index that we joined on, so that we can 2447 * successfully delete them later on and also search for 2448 * duplicates if the application wants to join again. 2449 */ 2450 ilg->ilg_orig_ifindex = orig_ifindex; 2451 } else if (leave_grp) { 2452 /* 2453 * Use the ilg's current ill for the deletion, 2454 * we might have failed over. 2455 */ 2456 ill = ilg->ilg_ill; 2457 ilg_delete(connp, ilg, NULL); 2458 mutex_exit(&connp->conn_lock); 2459 (void) ip_delmulti_v6(grp, ill, orig_ifindex, 2460 connp->conn_zoneid, B_FALSE, B_TRUE); 2461 return (0); 2462 } else { 2463 ilgstat = ILGSTAT_CHANGE; 2464 /* 2465 * The current ill might be different from the one we were 2466 * asked to join on (if failover has occurred); we should 2467 * join on the ill stored in the ilg. The original ill 2468 * is noted in ilg_orig_ifindex, which matched our request. 2469 */ 2470 ill = ilg->ilg_ill; 2471 /* preserve existing state in case ip_addmulti() fails */ 2472 orig_fmode = ilg->ilg_fmode; 2473 if (ilg->ilg_filter == NULL) { 2474 orig_filter = NULL; 2475 } else { 2476 orig_filter = l_alloc_copy(ilg->ilg_filter); 2477 if (orig_filter == NULL) { 2478 mutex_exit(&connp->conn_lock); 2479 return (ENOMEM); 2480 } 2481 } 2482 } 2483 2484 /* 2485 * Alloc buffer to copy new state into (see below) before 2486 * we make any changes, so we can bail if it fails. 2487 */ 2488 if ((new_filter = l_alloc()) == NULL) { 2489 mutex_exit(&connp->conn_lock); 2490 err = ENOMEM; 2491 goto free_and_exit; 2492 } 2493 2494 if (gf->gf_numsrc == 0) { 2495 CLEAR_SLIST(ilg->ilg_filter); 2496 } else { 2497 slist_t *fp; 2498 if (ilg->ilg_filter == NULL) { 2499 fp = l_alloc(); 2500 if (fp == NULL) { 2501 if (ilgstat == ILGSTAT_NEW) 2502 ilg_delete(connp, ilg, NULL); 2503 mutex_exit(&connp->conn_lock); 2504 err = ENOMEM; 2505 goto free_and_exit; 2506 } 2507 } else { 2508 fp = ilg->ilg_filter; 2509 } 2510 for (i = 0, sl = gf->gf_slist; i < gf->gf_numsrc; i++, sl++) { 2511 sin6 = (struct sockaddr_in6 *)sl; 2512 fp->sl_addr[i] = sin6->sin6_addr; 2513 } 2514 fp->sl_numsrc = gf->gf_numsrc; 2515 ilg->ilg_filter = fp; 2516 } 2517 /* 2518 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 2519 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 2520 * So we need to translate here. 2521 */ 2522 ilg->ilg_fmode = (gf->gf_fmode == MCAST_INCLUDE) ? 2523 MODE_IS_INCLUDE : MODE_IS_EXCLUDE; 2524 2525 /* 2526 * Save copy of ilg's filter state to pass to other functions, 2527 * so we can release conn_lock now. 2528 */ 2529 new_fmode = ilg->ilg_fmode; 2530 l_copy(ilg->ilg_filter, new_filter); 2531 2532 mutex_exit(&connp->conn_lock); 2533 2534 err = ip_addmulti_v6(grp, ill, orig_ifindex, connp->conn_zoneid, 2535 ilgstat, new_fmode, new_filter); 2536 if (err != 0) { 2537 /* 2538 * Restore the original filter state, or delete the 2539 * newly-created ilg. We need to look up the ilg 2540 * again, though, since we've not been holding the 2541 * conn_lock. 2542 */ 2543 mutex_enter(&connp->conn_lock); 2544 ilg = ilg_lookup_ill_index_v6(connp, grp, orig_ifindex); 2545 ASSERT(ilg != NULL); 2546 if (ilgstat == ILGSTAT_NEW) { 2547 ilg_delete(connp, ilg, NULL); 2548 } else { 2549 ilg->ilg_fmode = orig_fmode; 2550 if (SLIST_IS_EMPTY(orig_filter)) { 2551 CLEAR_SLIST(ilg->ilg_filter); 2552 } else { 2553 /* 2554 * We didn't free the filter, even if we 2555 * were trying to make the source list empty; 2556 * so if orig_filter isn't empty, the ilg 2557 * must still have a filter alloc'd. 2558 */ 2559 l_copy(orig_filter, ilg->ilg_filter); 2560 } 2561 } 2562 mutex_exit(&connp->conn_lock); 2563 } 2564 2565 free_and_exit: 2566 l_free(orig_filter); 2567 l_free(new_filter); 2568 2569 return (err); 2570 } 2571 2572 /* 2573 * Process the SIOC[GS]MSFILTER and SIOC[GS]IPMSFILTER ioctls. 2574 */ 2575 /* ARGSUSED */ 2576 int 2577 ip_sioctl_msfilter(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, 2578 ip_ioctl_cmd_t *ipip, void *ifreq) 2579 { 2580 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 2581 /* existence verified in ip_wput_nondata() */ 2582 mblk_t *data_mp = mp->b_cont->b_cont; 2583 int datalen, err, cmd, minsize; 2584 uint_t expsize = 0; 2585 conn_t *connp; 2586 boolean_t isv6, is_v4only_api, getcmd; 2587 struct sockaddr_in *gsin; 2588 struct sockaddr_in6 *gsin6; 2589 ipaddr_t v4grp; 2590 in6_addr_t v6grp; 2591 struct group_filter *gf = NULL; 2592 struct ip_msfilter *imsf = NULL; 2593 mblk_t *ndp; 2594 2595 if (data_mp->b_cont != NULL) { 2596 if ((ndp = msgpullup(data_mp, -1)) == NULL) 2597 return (ENOMEM); 2598 freemsg(data_mp); 2599 data_mp = ndp; 2600 mp->b_cont->b_cont = data_mp; 2601 } 2602 2603 cmd = iocp->ioc_cmd; 2604 getcmd = (cmd == SIOCGIPMSFILTER || cmd == SIOCGMSFILTER); 2605 is_v4only_api = (cmd == SIOCGIPMSFILTER || cmd == SIOCSIPMSFILTER); 2606 minsize = (is_v4only_api) ? IP_MSFILTER_SIZE(0) : GROUP_FILTER_SIZE(0); 2607 datalen = MBLKL(data_mp); 2608 2609 if (datalen < minsize) 2610 return (EINVAL); 2611 2612 /* 2613 * now we know we have at least have the initial structure, 2614 * but need to check for the source list array. 2615 */ 2616 if (is_v4only_api) { 2617 imsf = (struct ip_msfilter *)data_mp->b_rptr; 2618 isv6 = B_FALSE; 2619 expsize = IP_MSFILTER_SIZE(imsf->imsf_numsrc); 2620 } else { 2621 gf = (struct group_filter *)data_mp->b_rptr; 2622 if (gf->gf_group.ss_family == AF_INET6) { 2623 gsin6 = (struct sockaddr_in6 *)&gf->gf_group; 2624 isv6 = !(IN6_IS_ADDR_V4MAPPED(&gsin6->sin6_addr)); 2625 } else { 2626 isv6 = B_FALSE; 2627 } 2628 expsize = GROUP_FILTER_SIZE(gf->gf_numsrc); 2629 } 2630 if (datalen < expsize) 2631 return (EINVAL); 2632 2633 connp = Q_TO_CONN(q); 2634 2635 /* operation not supported on the virtual network interface */ 2636 if (IS_VNI(ipif->ipif_ill)) 2637 return (EINVAL); 2638 2639 if (isv6) { 2640 ill_t *ill = ipif->ipif_ill; 2641 ill_refhold(ill); 2642 2643 gsin6 = (struct sockaddr_in6 *)&gf->gf_group; 2644 v6grp = gsin6->sin6_addr; 2645 if (getcmd) 2646 err = ip_get_srcfilter_v6(connp, gf, &v6grp, ill); 2647 else 2648 err = ip_set_srcfilter_v6(connp, gf, &v6grp, ill); 2649 2650 ill_refrele(ill); 2651 } else { 2652 boolean_t isv4mapped = B_FALSE; 2653 if (is_v4only_api) { 2654 v4grp = (ipaddr_t)imsf->imsf_multiaddr.s_addr; 2655 } else { 2656 if (gf->gf_group.ss_family == AF_INET) { 2657 gsin = (struct sockaddr_in *)&gf->gf_group; 2658 v4grp = (ipaddr_t)gsin->sin_addr.s_addr; 2659 } else { 2660 gsin6 = (struct sockaddr_in6 *)&gf->gf_group; 2661 IN6_V4MAPPED_TO_IPADDR(&gsin6->sin6_addr, 2662 v4grp); 2663 isv4mapped = B_TRUE; 2664 } 2665 } 2666 if (getcmd) 2667 err = ip_get_srcfilter(connp, gf, imsf, v4grp, ipif, 2668 isv4mapped); 2669 else 2670 err = ip_set_srcfilter(connp, gf, imsf, v4grp, ipif, 2671 isv4mapped); 2672 } 2673 2674 return (err); 2675 } 2676 2677 /* 2678 * Finds the ipif based on information in the ioctl headers. Needed to make 2679 * ip_process_ioctl() happy (it needs to know the ipif for IPI_WR-flagged 2680 * ioctls prior to calling the ioctl's handler function). 2681 */ 2682 int 2683 ip_extract_msfilter(queue_t *q, mblk_t *mp, const ip_ioctl_cmd_t *ipip, 2684 cmd_info_t *ci, ipsq_func_t func) 2685 { 2686 int cmd = ipip->ipi_cmd; 2687 int err = 0; 2688 conn_t *connp; 2689 ipif_t *ipif; 2690 /* caller has verified this mblk exists */ 2691 char *dbuf = (char *)mp->b_cont->b_cont->b_rptr; 2692 struct ip_msfilter *imsf; 2693 struct group_filter *gf; 2694 ipaddr_t v4addr, v4grp; 2695 in6_addr_t v6grp; 2696 uint32_t index; 2697 zoneid_t zoneid; 2698 ip_stack_t *ipst; 2699 2700 connp = Q_TO_CONN(q); 2701 zoneid = connp->conn_zoneid; 2702 ipst = connp->conn_netstack->netstack_ip; 2703 2704 /* don't allow multicast operations on a tcp conn */ 2705 if (IPCL_IS_TCP(connp)) 2706 return (ENOPROTOOPT); 2707 2708 if (cmd == SIOCSIPMSFILTER || cmd == SIOCGIPMSFILTER) { 2709 /* don't allow v4-specific ioctls on v6 socket */ 2710 if (connp->conn_af_isv6) 2711 return (EAFNOSUPPORT); 2712 2713 imsf = (struct ip_msfilter *)dbuf; 2714 v4addr = imsf->imsf_interface.s_addr; 2715 v4grp = imsf->imsf_multiaddr.s_addr; 2716 if (v4addr == INADDR_ANY) { 2717 ipif = ipif_lookup_group(v4grp, zoneid, ipst); 2718 if (ipif == NULL) 2719 err = EADDRNOTAVAIL; 2720 } else { 2721 ipif = ipif_lookup_addr(v4addr, NULL, zoneid, q, mp, 2722 func, &err, ipst); 2723 } 2724 } else { 2725 boolean_t isv6 = B_FALSE; 2726 gf = (struct group_filter *)dbuf; 2727 index = gf->gf_interface; 2728 if (gf->gf_group.ss_family == AF_INET6) { 2729 struct sockaddr_in6 *sin6; 2730 sin6 = (struct sockaddr_in6 *)&gf->gf_group; 2731 v6grp = sin6->sin6_addr; 2732 if (IN6_IS_ADDR_V4MAPPED(&v6grp)) 2733 IN6_V4MAPPED_TO_IPADDR(&v6grp, v4grp); 2734 else 2735 isv6 = B_TRUE; 2736 } else if (gf->gf_group.ss_family == AF_INET) { 2737 struct sockaddr_in *sin; 2738 sin = (struct sockaddr_in *)&gf->gf_group; 2739 v4grp = sin->sin_addr.s_addr; 2740 } else { 2741 return (EAFNOSUPPORT); 2742 } 2743 if (index == 0) { 2744 if (isv6) { 2745 ipif = ipif_lookup_group_v6(&v6grp, zoneid, 2746 ipst); 2747 } else { 2748 ipif = ipif_lookup_group(v4grp, zoneid, ipst); 2749 } 2750 if (ipif == NULL) 2751 err = EADDRNOTAVAIL; 2752 } else { 2753 ipif = ipif_lookup_on_ifindex(index, isv6, zoneid, 2754 q, mp, func, &err, ipst); 2755 } 2756 } 2757 2758 ci->ci_ipif = ipif; 2759 return (err); 2760 } 2761 2762 /* 2763 * The structures used for the SIOC*MSFILTER ioctls usually must be copied 2764 * in in two stages, as the first copyin tells us the size of the attached 2765 * source buffer. This function is called by ip_wput_nondata() after the 2766 * first copyin has completed; it figures out how big the second stage 2767 * needs to be, and kicks it off. 2768 * 2769 * In some cases (numsrc < 2), the second copyin is not needed as the 2770 * first one gets a complete structure containing 1 source addr. 2771 * 2772 * The function returns 0 if a second copyin has been started (i.e. there's 2773 * no more work to be done right now), or 1 if the second copyin is not 2774 * needed and ip_wput_nondata() can continue its processing. 2775 */ 2776 int 2777 ip_copyin_msfilter(queue_t *q, mblk_t *mp) 2778 { 2779 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 2780 int cmd = iocp->ioc_cmd; 2781 /* validity of this checked in ip_wput_nondata() */ 2782 mblk_t *mp1 = mp->b_cont->b_cont; 2783 int copysize = 0; 2784 int offset; 2785 2786 if (cmd == SIOCSMSFILTER || cmd == SIOCGMSFILTER) { 2787 struct group_filter *gf = (struct group_filter *)mp1->b_rptr; 2788 if (gf->gf_numsrc >= 2) { 2789 offset = sizeof (struct group_filter); 2790 copysize = GROUP_FILTER_SIZE(gf->gf_numsrc) - offset; 2791 } 2792 } else { 2793 struct ip_msfilter *imsf = (struct ip_msfilter *)mp1->b_rptr; 2794 if (imsf->imsf_numsrc >= 2) { 2795 offset = sizeof (struct ip_msfilter); 2796 copysize = IP_MSFILTER_SIZE(imsf->imsf_numsrc) - offset; 2797 } 2798 } 2799 if (copysize > 0) { 2800 mi_copyin_n(q, mp, offset, copysize); 2801 return (0); 2802 } 2803 return (1); 2804 } 2805 2806 /* 2807 * Handle the following optmgmt: 2808 * IP_ADD_MEMBERSHIP must not have joined already 2809 * MCAST_JOIN_GROUP must not have joined already 2810 * IP_BLOCK_SOURCE must have joined already 2811 * MCAST_BLOCK_SOURCE must have joined already 2812 * IP_JOIN_SOURCE_GROUP may have joined already 2813 * MCAST_JOIN_SOURCE_GROUP may have joined already 2814 * 2815 * fmode and src parameters may be used to determine which option is 2816 * being set, as follows (the IP_* and MCAST_* versions of each option 2817 * are functionally equivalent): 2818 * opt fmode src 2819 * IP_ADD_MEMBERSHIP MODE_IS_EXCLUDE INADDR_ANY 2820 * MCAST_JOIN_GROUP MODE_IS_EXCLUDE INADDR_ANY 2821 * IP_BLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 2822 * MCAST_BLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 2823 * IP_JOIN_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 2824 * MCAST_JOIN_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 2825 * 2826 * Changing the filter mode is not allowed; if a matching ilg already 2827 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 2828 * 2829 * Verifies that there is a source address of appropriate scope for 2830 * the group; if not, EADDRNOTAVAIL is returned. 2831 * 2832 * The interface to be used may be identified by an address or by an 2833 * index. A pointer to the index is passed; if it is NULL, use the 2834 * address, otherwise, use the index. 2835 */ 2836 int 2837 ip_opt_add_group(conn_t *connp, boolean_t checkonly, ipaddr_t group, 2838 ipaddr_t ifaddr, uint_t *ifindexp, mcast_record_t fmode, ipaddr_t src, 2839 mblk_t *first_mp) 2840 { 2841 ipif_t *ipif; 2842 ipsq_t *ipsq; 2843 int err = 0; 2844 ill_t *ill; 2845 2846 err = ip_opt_check(connp, group, src, ifaddr, ifindexp, first_mp, 2847 ip_restart_optmgmt, &ipif); 2848 if (err != 0) { 2849 if (err != EINPROGRESS) { 2850 ip1dbg(("ip_opt_add_group: no ipif for group 0x%x, " 2851 "ifaddr 0x%x, ifindex %d\n", ntohl(group), 2852 ntohl(ifaddr), (ifindexp == NULL) ? 0 : *ifindexp)); 2853 } 2854 return (err); 2855 } 2856 ASSERT(ipif != NULL); 2857 2858 ill = ipif->ipif_ill; 2859 /* Operation not supported on a virtual network interface */ 2860 if (IS_VNI(ill)) { 2861 ipif_refrele(ipif); 2862 return (EINVAL); 2863 } 2864 2865 if (checkonly) { 2866 /* 2867 * do not do operation, just pretend to - new T_CHECK 2868 * semantics. The error return case above if encountered 2869 * considered a good enough "check" here. 2870 */ 2871 ipif_refrele(ipif); 2872 return (0); 2873 } 2874 2875 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, ipsq, 2876 NEW_OP); 2877 2878 /* unspecified source addr => no source filtering */ 2879 err = ilg_add(connp, group, ipif, fmode, src); 2880 2881 IPSQ_EXIT(ipsq); 2882 2883 ipif_refrele(ipif); 2884 return (err); 2885 } 2886 2887 /* 2888 * Handle the following optmgmt: 2889 * IPV6_JOIN_GROUP must not have joined already 2890 * MCAST_JOIN_GROUP must not have joined already 2891 * MCAST_BLOCK_SOURCE must have joined already 2892 * MCAST_JOIN_SOURCE_GROUP may have joined already 2893 * 2894 * fmode and src parameters may be used to determine which option is 2895 * being set, as follows (IPV6_JOIN_GROUP and MCAST_JOIN_GROUP options 2896 * are functionally equivalent): 2897 * opt fmode v6src 2898 * IPV6_JOIN_GROUP MODE_IS_EXCLUDE unspecified 2899 * MCAST_JOIN_GROUP MODE_IS_EXCLUDE unspecified 2900 * MCAST_BLOCK_SOURCE MODE_IS_EXCLUDE v6 addr 2901 * MCAST_JOIN_SOURCE_GROUP MODE_IS_INCLUDE v6 addr 2902 * 2903 * Changing the filter mode is not allowed; if a matching ilg already 2904 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 2905 * 2906 * Verifies that there is a source address of appropriate scope for 2907 * the group; if not, EADDRNOTAVAIL is returned. 2908 * 2909 * Handles IPv4-mapped IPv6 multicast addresses by associating them 2910 * with the link-local ipif. Assumes that if v6group is v4-mapped, 2911 * v6src is also v4-mapped. 2912 */ 2913 int 2914 ip_opt_add_group_v6(conn_t *connp, boolean_t checkonly, 2915 const in6_addr_t *v6group, int ifindex, mcast_record_t fmode, 2916 const in6_addr_t *v6src, mblk_t *first_mp) 2917 { 2918 ill_t *ill; 2919 ipif_t *ipif; 2920 char buf[INET6_ADDRSTRLEN]; 2921 ipaddr_t v4group, v4src; 2922 boolean_t isv6; 2923 ipsq_t *ipsq; 2924 int err; 2925 2926 err = ip_opt_check_v6(connp, v6group, &v4group, v6src, &v4src, &isv6, 2927 ifindex, first_mp, ip_restart_optmgmt, &ill, &ipif); 2928 if (err != 0) { 2929 if (err != EINPROGRESS) { 2930 ip1dbg(("ip_opt_add_group_v6: no ill for group %s/" 2931 "index %d\n", inet_ntop(AF_INET6, v6group, buf, 2932 sizeof (buf)), ifindex)); 2933 } 2934 return (err); 2935 } 2936 ASSERT((!isv6 && ipif != NULL) || (isv6 && ill != NULL)); 2937 2938 /* operation is not supported on the virtual network interface */ 2939 if (isv6) { 2940 if (IS_VNI(ill)) { 2941 ill_refrele(ill); 2942 return (EINVAL); 2943 } 2944 } else { 2945 if (IS_VNI(ipif->ipif_ill)) { 2946 ipif_refrele(ipif); 2947 return (EINVAL); 2948 } 2949 } 2950 2951 if (checkonly) { 2952 /* 2953 * do not do operation, just pretend to - new T_CHECK 2954 * semantics. The error return case above if encountered 2955 * considered a good enough "check" here. 2956 */ 2957 if (isv6) 2958 ill_refrele(ill); 2959 else 2960 ipif_refrele(ipif); 2961 return (0); 2962 } 2963 2964 if (!isv6) { 2965 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, 2966 ipsq, NEW_OP); 2967 err = ilg_add(connp, v4group, ipif, fmode, v4src); 2968 IPSQ_EXIT(ipsq); 2969 ipif_refrele(ipif); 2970 } else { 2971 IPSQ_ENTER_ILL(ill, connp, first_mp, ip_restart_optmgmt, 2972 ipsq, NEW_OP); 2973 err = ilg_add_v6(connp, v6group, ill, fmode, v6src); 2974 IPSQ_EXIT(ipsq); 2975 ill_refrele(ill); 2976 } 2977 2978 return (err); 2979 } 2980 2981 static int 2982 ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group, ipif_t *ipif, 2983 mcast_record_t fmode, ipaddr_t src) 2984 { 2985 ilg_t *ilg; 2986 in6_addr_t v6src; 2987 boolean_t leaving = B_FALSE; 2988 2989 ASSERT(IAM_WRITER_IPIF(ipif)); 2990 2991 /* 2992 * The ilg is valid only while we hold the conn lock. Once we drop 2993 * the lock, another thread can locate another ilg on this connp, 2994 * but on a different ipif, and delete it, and cause the ilg array 2995 * to be reallocated and copied. Hence do the ilg_delete before 2996 * dropping the lock. 2997 */ 2998 mutex_enter(&connp->conn_lock); 2999 ilg = ilg_lookup_ipif(connp, group, ipif); 3000 if ((ilg == NULL) || (ilg->ilg_flags & ILG_DELETED)) { 3001 mutex_exit(&connp->conn_lock); 3002 return (EADDRNOTAVAIL); 3003 } 3004 3005 /* 3006 * Decide if we're actually deleting the ilg or just removing a 3007 * source filter address; if just removing an addr, make sure we 3008 * aren't trying to change the filter mode, and that the addr is 3009 * actually in our filter list already. If we're removing the 3010 * last src in an include list, just delete the ilg. 3011 */ 3012 if (src == INADDR_ANY) { 3013 v6src = ipv6_all_zeros; 3014 leaving = B_TRUE; 3015 } else { 3016 int err = 0; 3017 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 3018 if (fmode != ilg->ilg_fmode) 3019 err = EINVAL; 3020 else if (ilg->ilg_filter == NULL || 3021 !list_has_addr(ilg->ilg_filter, &v6src)) 3022 err = EADDRNOTAVAIL; 3023 if (err != 0) { 3024 mutex_exit(&connp->conn_lock); 3025 return (err); 3026 } 3027 if (fmode == MODE_IS_INCLUDE && 3028 ilg->ilg_filter->sl_numsrc == 1) { 3029 v6src = ipv6_all_zeros; 3030 leaving = B_TRUE; 3031 } 3032 } 3033 3034 ilg_delete(connp, ilg, &v6src); 3035 mutex_exit(&connp->conn_lock); 3036 3037 (void) ip_delmulti(group, ipif, B_FALSE, leaving); 3038 return (0); 3039 } 3040 3041 static int 3042 ip_opt_delete_group_excl_v6(conn_t *connp, const in6_addr_t *v6group, 3043 ill_t *ill, mcast_record_t fmode, const in6_addr_t *v6src) 3044 { 3045 ilg_t *ilg; 3046 ill_t *ilg_ill; 3047 uint_t ilg_orig_ifindex; 3048 boolean_t leaving = B_TRUE; 3049 3050 ASSERT(IAM_WRITER_ILL(ill)); 3051 3052 /* 3053 * Use the index that we originally used to join. We can't 3054 * use the ill directly because ilg_ill could point to 3055 * a new ill if things have moved. 3056 */ 3057 mutex_enter(&connp->conn_lock); 3058 ilg = ilg_lookup_ill_index_v6(connp, v6group, 3059 ill->ill_phyint->phyint_ifindex); 3060 if ((ilg == NULL) || (ilg->ilg_flags & ILG_DELETED)) { 3061 mutex_exit(&connp->conn_lock); 3062 return (EADDRNOTAVAIL); 3063 } 3064 3065 /* 3066 * Decide if we're actually deleting the ilg or just removing a 3067 * source filter address; if just removing an addr, make sure we 3068 * aren't trying to change the filter mode, and that the addr is 3069 * actually in our filter list already. If we're removing the 3070 * last src in an include list, just delete the ilg. 3071 */ 3072 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 3073 int err = 0; 3074 if (fmode != ilg->ilg_fmode) 3075 err = EINVAL; 3076 else if (ilg->ilg_filter == NULL || 3077 !list_has_addr(ilg->ilg_filter, v6src)) 3078 err = EADDRNOTAVAIL; 3079 if (err != 0) { 3080 mutex_exit(&connp->conn_lock); 3081 return (err); 3082 } 3083 if (fmode == MODE_IS_INCLUDE && 3084 ilg->ilg_filter->sl_numsrc == 1) 3085 v6src = NULL; 3086 else 3087 leaving = B_FALSE; 3088 } 3089 3090 ilg_ill = ilg->ilg_ill; 3091 ilg_orig_ifindex = ilg->ilg_orig_ifindex; 3092 ilg_delete(connp, ilg, v6src); 3093 mutex_exit(&connp->conn_lock); 3094 (void) ip_delmulti_v6(v6group, ilg_ill, ilg_orig_ifindex, 3095 connp->conn_zoneid, B_FALSE, leaving); 3096 3097 return (0); 3098 } 3099 3100 /* 3101 * Handle the following optmgmt: 3102 * IP_DROP_MEMBERSHIP will leave 3103 * MCAST_LEAVE_GROUP will leave 3104 * IP_UNBLOCK_SOURCE will not leave 3105 * MCAST_UNBLOCK_SOURCE will not leave 3106 * IP_LEAVE_SOURCE_GROUP may leave (if leaving last source) 3107 * MCAST_LEAVE_SOURCE_GROUP may leave (if leaving last source) 3108 * 3109 * fmode and src parameters may be used to determine which option is 3110 * being set, as follows (the IP_* and MCAST_* versions of each option 3111 * are functionally equivalent): 3112 * opt fmode src 3113 * IP_DROP_MEMBERSHIP MODE_IS_INCLUDE INADDR_ANY 3114 * MCAST_LEAVE_GROUP MODE_IS_INCLUDE INADDR_ANY 3115 * IP_UNBLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 3116 * MCAST_UNBLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 3117 * IP_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 3118 * MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 3119 * 3120 * Changing the filter mode is not allowed; if a matching ilg already 3121 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 3122 * 3123 * The interface to be used may be identified by an address or by an 3124 * index. A pointer to the index is passed; if it is NULL, use the 3125 * address, otherwise, use the index. 3126 */ 3127 int 3128 ip_opt_delete_group(conn_t *connp, boolean_t checkonly, ipaddr_t group, 3129 ipaddr_t ifaddr, uint_t *ifindexp, mcast_record_t fmode, ipaddr_t src, 3130 mblk_t *first_mp) 3131 { 3132 ipif_t *ipif; 3133 ipsq_t *ipsq; 3134 int err; 3135 ill_t *ill; 3136 3137 err = ip_opt_check(connp, group, src, ifaddr, ifindexp, first_mp, 3138 ip_restart_optmgmt, &ipif); 3139 if (err != 0) { 3140 if (err != EINPROGRESS) { 3141 ip1dbg(("ip_opt_delete_group: no ipif for group " 3142 "0x%x, ifaddr 0x%x\n", 3143 (int)ntohl(group), (int)ntohl(ifaddr))); 3144 } 3145 return (err); 3146 } 3147 ASSERT(ipif != NULL); 3148 3149 ill = ipif->ipif_ill; 3150 /* Operation not supported on a virtual network interface */ 3151 if (IS_VNI(ill)) { 3152 ipif_refrele(ipif); 3153 return (EINVAL); 3154 } 3155 3156 if (checkonly) { 3157 /* 3158 * do not do operation, just pretend to - new T_CHECK 3159 * semantics. The error return case above if encountered 3160 * considered a good enough "check" here. 3161 */ 3162 ipif_refrele(ipif); 3163 return (0); 3164 } 3165 3166 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, ipsq, 3167 NEW_OP); 3168 err = ip_opt_delete_group_excl(connp, group, ipif, fmode, src); 3169 IPSQ_EXIT(ipsq); 3170 3171 ipif_refrele(ipif); 3172 return (err); 3173 } 3174 3175 /* 3176 * Handle the following optmgmt: 3177 * IPV6_LEAVE_GROUP will leave 3178 * MCAST_LEAVE_GROUP will leave 3179 * MCAST_UNBLOCK_SOURCE will not leave 3180 * MCAST_LEAVE_SOURCE_GROUP may leave (if leaving last source) 3181 * 3182 * fmode and src parameters may be used to determine which option is 3183 * being set, as follows (IPV6_LEAVE_GROUP and MCAST_LEAVE_GROUP options 3184 * are functionally equivalent): 3185 * opt fmode v6src 3186 * IPV6_LEAVE_GROUP MODE_IS_INCLUDE unspecified 3187 * MCAST_LEAVE_GROUP MODE_IS_INCLUDE unspecified 3188 * MCAST_UNBLOCK_SOURCE MODE_IS_EXCLUDE v6 addr 3189 * MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE v6 addr 3190 * 3191 * Changing the filter mode is not allowed; if a matching ilg already 3192 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 3193 * 3194 * Handles IPv4-mapped IPv6 multicast addresses by associating them 3195 * with the link-local ipif. Assumes that if v6group is v4-mapped, 3196 * v6src is also v4-mapped. 3197 */ 3198 int 3199 ip_opt_delete_group_v6(conn_t *connp, boolean_t checkonly, 3200 const in6_addr_t *v6group, int ifindex, mcast_record_t fmode, 3201 const in6_addr_t *v6src, mblk_t *first_mp) 3202 { 3203 ill_t *ill; 3204 ipif_t *ipif; 3205 char buf[INET6_ADDRSTRLEN]; 3206 ipaddr_t v4group, v4src; 3207 boolean_t isv6; 3208 ipsq_t *ipsq; 3209 int err; 3210 3211 err = ip_opt_check_v6(connp, v6group, &v4group, v6src, &v4src, &isv6, 3212 ifindex, first_mp, ip_restart_optmgmt, &ill, &ipif); 3213 if (err != 0) { 3214 if (err != EINPROGRESS) { 3215 ip1dbg(("ip_opt_delete_group_v6: no ill for group %s/" 3216 "index %d\n", inet_ntop(AF_INET6, v6group, buf, 3217 sizeof (buf)), ifindex)); 3218 } 3219 return (err); 3220 } 3221 ASSERT((isv6 && ill != NULL) || (!isv6 && ipif != NULL)); 3222 3223 /* operation is not supported on the virtual network interface */ 3224 if (isv6) { 3225 if (IS_VNI(ill)) { 3226 ill_refrele(ill); 3227 return (EINVAL); 3228 } 3229 } else { 3230 if (IS_VNI(ipif->ipif_ill)) { 3231 ipif_refrele(ipif); 3232 return (EINVAL); 3233 } 3234 } 3235 3236 if (checkonly) { 3237 /* 3238 * do not do operation, just pretend to - new T_CHECK 3239 * semantics. The error return case above if encountered 3240 * considered a good enough "check" here. 3241 */ 3242 if (isv6) 3243 ill_refrele(ill); 3244 else 3245 ipif_refrele(ipif); 3246 return (0); 3247 } 3248 3249 if (!isv6) { 3250 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, 3251 ipsq, NEW_OP); 3252 err = ip_opt_delete_group_excl(connp, v4group, ipif, fmode, 3253 v4src); 3254 IPSQ_EXIT(ipsq); 3255 ipif_refrele(ipif); 3256 } else { 3257 IPSQ_ENTER_ILL(ill, connp, first_mp, ip_restart_optmgmt, 3258 ipsq, NEW_OP); 3259 err = ip_opt_delete_group_excl_v6(connp, v6group, ill, fmode, 3260 v6src); 3261 IPSQ_EXIT(ipsq); 3262 ill_refrele(ill); 3263 } 3264 3265 return (err); 3266 } 3267 3268 /* 3269 * Group mgmt for upper conn that passes things down 3270 * to the interface multicast list (and DLPI) 3271 * These routines can handle new style options that specify an interface name 3272 * as opposed to an interface address (needed for general handling of 3273 * unnumbered interfaces.) 3274 */ 3275 3276 /* 3277 * Add a group to an upper conn group data structure and pass things down 3278 * to the interface multicast list (and DLPI) 3279 */ 3280 static int 3281 ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif, mcast_record_t fmode, 3282 ipaddr_t src) 3283 { 3284 int error = 0; 3285 ill_t *ill; 3286 ilg_t *ilg; 3287 ilg_stat_t ilgstat; 3288 slist_t *new_filter = NULL; 3289 int new_fmode; 3290 3291 ASSERT(IAM_WRITER_IPIF(ipif)); 3292 3293 ill = ipif->ipif_ill; 3294 3295 if (!(ill->ill_flags & ILLF_MULTICAST)) 3296 return (EADDRNOTAVAIL); 3297 3298 /* 3299 * conn_ilg[] is protected by conn_lock. Need to hold the conn_lock 3300 * to walk the conn_ilg[] list in ilg_lookup_ipif(); also needed to 3301 * serialize 2 threads doing join (sock, group1, hme0:0) and 3302 * (sock, group2, hme1:0) where hme0 and hme1 map to different ipsqs, 3303 * but both operations happen on the same conn. 3304 */ 3305 mutex_enter(&connp->conn_lock); 3306 ilg = ilg_lookup_ipif(connp, group, ipif); 3307 3308 /* 3309 * Depending on the option we're handling, may or may not be okay 3310 * if group has already been added. Figure out our rules based 3311 * on fmode and src params. Also make sure there's enough room 3312 * in the filter if we're adding a source to an existing filter. 3313 */ 3314 if (src == INADDR_ANY) { 3315 /* we're joining for all sources, must not have joined */ 3316 if (ilg != NULL) 3317 error = EADDRINUSE; 3318 } else { 3319 if (fmode == MODE_IS_EXCLUDE) { 3320 /* (excl {addr}) => block source, must have joined */ 3321 if (ilg == NULL) 3322 error = EADDRNOTAVAIL; 3323 } 3324 /* (incl {addr}) => join source, may have joined */ 3325 3326 if (ilg != NULL && 3327 SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE) 3328 error = ENOBUFS; 3329 } 3330 if (error != 0) { 3331 mutex_exit(&connp->conn_lock); 3332 return (error); 3333 } 3334 3335 ASSERT(!(ipif->ipif_state_flags & IPIF_CONDEMNED)); 3336 3337 /* 3338 * Alloc buffer to copy new state into (see below) before 3339 * we make any changes, so we can bail if it fails. 3340 */ 3341 if ((new_filter = l_alloc()) == NULL) { 3342 mutex_exit(&connp->conn_lock); 3343 return (ENOMEM); 3344 } 3345 3346 if (ilg == NULL) { 3347 ilgstat = ILGSTAT_NEW; 3348 if ((ilg = conn_ilg_alloc(connp)) == NULL) { 3349 mutex_exit(&connp->conn_lock); 3350 l_free(new_filter); 3351 return (ENOMEM); 3352 } 3353 if (src != INADDR_ANY) { 3354 ilg->ilg_filter = l_alloc(); 3355 if (ilg->ilg_filter == NULL) { 3356 ilg_delete(connp, ilg, NULL); 3357 mutex_exit(&connp->conn_lock); 3358 l_free(new_filter); 3359 return (ENOMEM); 3360 } 3361 ilg->ilg_filter->sl_numsrc = 1; 3362 IN6_IPADDR_TO_V4MAPPED(src, 3363 &ilg->ilg_filter->sl_addr[0]); 3364 } 3365 if (group == INADDR_ANY) { 3366 ilg->ilg_v6group = ipv6_all_zeros; 3367 } else { 3368 IN6_IPADDR_TO_V4MAPPED(group, &ilg->ilg_v6group); 3369 } 3370 ilg->ilg_ipif = ipif; 3371 ilg->ilg_ill = NULL; 3372 ilg->ilg_orig_ifindex = 0; 3373 ilg->ilg_fmode = fmode; 3374 } else { 3375 int index; 3376 in6_addr_t v6src; 3377 ilgstat = ILGSTAT_CHANGE; 3378 if (ilg->ilg_fmode != fmode || src == INADDR_ANY) { 3379 mutex_exit(&connp->conn_lock); 3380 l_free(new_filter); 3381 return (EINVAL); 3382 } 3383 if (ilg->ilg_filter == NULL) { 3384 ilg->ilg_filter = l_alloc(); 3385 if (ilg->ilg_filter == NULL) { 3386 mutex_exit(&connp->conn_lock); 3387 l_free(new_filter); 3388 return (ENOMEM); 3389 } 3390 } 3391 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 3392 if (list_has_addr(ilg->ilg_filter, &v6src)) { 3393 mutex_exit(&connp->conn_lock); 3394 l_free(new_filter); 3395 return (EADDRNOTAVAIL); 3396 } 3397 index = ilg->ilg_filter->sl_numsrc++; 3398 ilg->ilg_filter->sl_addr[index] = v6src; 3399 } 3400 3401 /* 3402 * Save copy of ilg's filter state to pass to other functions, 3403 * so we can release conn_lock now. 3404 */ 3405 new_fmode = ilg->ilg_fmode; 3406 l_copy(ilg->ilg_filter, new_filter); 3407 3408 mutex_exit(&connp->conn_lock); 3409 3410 error = ip_addmulti(group, ipif, ilgstat, new_fmode, new_filter); 3411 if (error != 0) { 3412 /* 3413 * Need to undo what we did before calling ip_addmulti()! 3414 * Must look up the ilg again since we've not been holding 3415 * conn_lock. 3416 */ 3417 in6_addr_t v6src; 3418 if (ilgstat == ILGSTAT_NEW) 3419 v6src = ipv6_all_zeros; 3420 else 3421 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 3422 mutex_enter(&connp->conn_lock); 3423 ilg = ilg_lookup_ipif(connp, group, ipif); 3424 ASSERT(ilg != NULL); 3425 ilg_delete(connp, ilg, &v6src); 3426 mutex_exit(&connp->conn_lock); 3427 l_free(new_filter); 3428 return (error); 3429 } 3430 3431 l_free(new_filter); 3432 return (0); 3433 } 3434 3435 static int 3436 ilg_add_v6(conn_t *connp, const in6_addr_t *v6group, ill_t *ill, 3437 mcast_record_t fmode, const in6_addr_t *v6src) 3438 { 3439 int error = 0; 3440 int orig_ifindex; 3441 ilg_t *ilg; 3442 ilg_stat_t ilgstat; 3443 slist_t *new_filter = NULL; 3444 int new_fmode; 3445 3446 ASSERT(IAM_WRITER_ILL(ill)); 3447 3448 if (!(ill->ill_flags & ILLF_MULTICAST)) 3449 return (EADDRNOTAVAIL); 3450 3451 /* 3452 * conn_lock protects the ilg list. Serializes 2 threads doing 3453 * join (sock, group1, hme0) and (sock, group2, hme1) where hme0 3454 * and hme1 map to different ipsq's, but both operations happen 3455 * on the same conn. 3456 */ 3457 mutex_enter(&connp->conn_lock); 3458 3459 /* 3460 * Use the ifindex to do the lookup. We can't use the ill 3461 * directly because ilg_ill could point to a different ill if 3462 * things have moved. 3463 */ 3464 orig_ifindex = ill->ill_phyint->phyint_ifindex; 3465 ilg = ilg_lookup_ill_index_v6(connp, v6group, orig_ifindex); 3466 3467 /* 3468 * Depending on the option we're handling, may or may not be okay 3469 * if group has already been added. Figure out our rules based 3470 * on fmode and src params. Also make sure there's enough room 3471 * in the filter if we're adding a source to an existing filter. 3472 */ 3473 if (IN6_IS_ADDR_UNSPECIFIED(v6src)) { 3474 /* we're joining for all sources, must not have joined */ 3475 if (ilg != NULL) 3476 error = EADDRINUSE; 3477 } else { 3478 if (fmode == MODE_IS_EXCLUDE) { 3479 /* (excl {addr}) => block source, must have joined */ 3480 if (ilg == NULL) 3481 error = EADDRNOTAVAIL; 3482 } 3483 /* (incl {addr}) => join source, may have joined */ 3484 3485 if (ilg != NULL && 3486 SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE) 3487 error = ENOBUFS; 3488 } 3489 if (error != 0) { 3490 mutex_exit(&connp->conn_lock); 3491 return (error); 3492 } 3493 3494 /* 3495 * Alloc buffer to copy new state into (see below) before 3496 * we make any changes, so we can bail if it fails. 3497 */ 3498 if ((new_filter = l_alloc()) == NULL) { 3499 mutex_exit(&connp->conn_lock); 3500 return (ENOMEM); 3501 } 3502 3503 if (ilg == NULL) { 3504 if ((ilg = conn_ilg_alloc(connp)) == NULL) { 3505 mutex_exit(&connp->conn_lock); 3506 l_free(new_filter); 3507 return (ENOMEM); 3508 } 3509 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 3510 ilg->ilg_filter = l_alloc(); 3511 if (ilg->ilg_filter == NULL) { 3512 ilg_delete(connp, ilg, NULL); 3513 mutex_exit(&connp->conn_lock); 3514 l_free(new_filter); 3515 return (ENOMEM); 3516 } 3517 ilg->ilg_filter->sl_numsrc = 1; 3518 ilg->ilg_filter->sl_addr[0] = *v6src; 3519 } 3520 ilgstat = ILGSTAT_NEW; 3521 ilg->ilg_v6group = *v6group; 3522 ilg->ilg_fmode = fmode; 3523 ilg->ilg_ipif = NULL; 3524 /* 3525 * Choose our target ill to join on. This might be different 3526 * from the ill we've been given if it's currently down and 3527 * part of a group. 3528 * 3529 * new ill is not refheld; we are writer. 3530 */ 3531 ill = ip_choose_multi_ill(ill, v6group); 3532 ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED)); 3533 ilg->ilg_ill = ill; 3534 /* 3535 * Remember the orig_ifindex that we joined on, so that we 3536 * can successfully delete them later on and also search 3537 * for duplicates if the application wants to join again. 3538 */ 3539 ilg->ilg_orig_ifindex = orig_ifindex; 3540 } else { 3541 int index; 3542 if (ilg->ilg_fmode != fmode || IN6_IS_ADDR_UNSPECIFIED(v6src)) { 3543 mutex_exit(&connp->conn_lock); 3544 l_free(new_filter); 3545 return (EINVAL); 3546 } 3547 if (ilg->ilg_filter == NULL) { 3548 ilg->ilg_filter = l_alloc(); 3549 if (ilg->ilg_filter == NULL) { 3550 mutex_exit(&connp->conn_lock); 3551 l_free(new_filter); 3552 return (ENOMEM); 3553 } 3554 } 3555 if (list_has_addr(ilg->ilg_filter, v6src)) { 3556 mutex_exit(&connp->conn_lock); 3557 l_free(new_filter); 3558 return (EADDRNOTAVAIL); 3559 } 3560 ilgstat = ILGSTAT_CHANGE; 3561 index = ilg->ilg_filter->sl_numsrc++; 3562 ilg->ilg_filter->sl_addr[index] = *v6src; 3563 /* 3564 * The current ill might be different from the one we were 3565 * asked to join on (if failover has occurred); we should 3566 * join on the ill stored in the ilg. The original ill 3567 * is noted in ilg_orig_ifindex, which matched our request. 3568 */ 3569 ill = ilg->ilg_ill; 3570 } 3571 3572 /* 3573 * Save copy of ilg's filter state to pass to other functions, 3574 * so we can release conn_lock now. 3575 */ 3576 new_fmode = ilg->ilg_fmode; 3577 l_copy(ilg->ilg_filter, new_filter); 3578 3579 mutex_exit(&connp->conn_lock); 3580 3581 /* 3582 * Now update the ill. We wait to do this until after the ilg 3583 * has been updated because we need to update the src filter 3584 * info for the ill, which involves looking at the status of 3585 * all the ilgs associated with this group/interface pair. 3586 */ 3587 error = ip_addmulti_v6(v6group, ill, orig_ifindex, connp->conn_zoneid, 3588 ilgstat, new_fmode, new_filter); 3589 if (error != 0) { 3590 /* 3591 * But because we waited, we have to undo the ilg update 3592 * if ip_addmulti_v6() fails. We also must lookup ilg 3593 * again, since we've not been holding conn_lock. 3594 */ 3595 in6_addr_t delsrc = 3596 (ilgstat == ILGSTAT_NEW) ? ipv6_all_zeros : *v6src; 3597 mutex_enter(&connp->conn_lock); 3598 ilg = ilg_lookup_ill_index_v6(connp, v6group, orig_ifindex); 3599 ASSERT(ilg != NULL); 3600 ilg_delete(connp, ilg, &delsrc); 3601 mutex_exit(&connp->conn_lock); 3602 l_free(new_filter); 3603 return (error); 3604 } 3605 3606 l_free(new_filter); 3607 3608 return (0); 3609 } 3610 3611 /* 3612 * Find an IPv4 ilg matching group, ill and source 3613 */ 3614 ilg_t * 3615 ilg_lookup_ill_withsrc(conn_t *connp, ipaddr_t group, ipaddr_t src, ill_t *ill) 3616 { 3617 in6_addr_t v6group, v6src; 3618 int i; 3619 boolean_t isinlist; 3620 ilg_t *ilg; 3621 ipif_t *ipif; 3622 ill_t *ilg_ill; 3623 3624 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3625 3626 /* 3627 * INADDR_ANY is represented as the IPv6 unspecified addr. 3628 */ 3629 if (group == INADDR_ANY) 3630 v6group = ipv6_all_zeros; 3631 else 3632 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 3633 3634 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3635 ilg = &connp->conn_ilg[i]; 3636 if ((ipif = ilg->ilg_ipif) == NULL || 3637 (ilg->ilg_flags & ILG_DELETED) != 0) 3638 continue; 3639 ASSERT(ilg->ilg_ill == NULL); 3640 ilg_ill = ipif->ipif_ill; 3641 ASSERT(!ilg_ill->ill_isv6); 3642 if (ilg_ill == ill && 3643 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, &v6group)) { 3644 if (SLIST_IS_EMPTY(ilg->ilg_filter)) { 3645 /* no source filter, so this is a match */ 3646 return (ilg); 3647 } 3648 break; 3649 } 3650 } 3651 if (i == connp->conn_ilg_inuse) 3652 return (NULL); 3653 3654 /* 3655 * we have an ilg with matching ill and group; but 3656 * the ilg has a source list that we must check. 3657 */ 3658 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 3659 isinlist = B_FALSE; 3660 for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) { 3661 if (IN6_ARE_ADDR_EQUAL(&v6src, &ilg->ilg_filter->sl_addr[i])) { 3662 isinlist = B_TRUE; 3663 break; 3664 } 3665 } 3666 3667 if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) || 3668 (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE)) 3669 return (ilg); 3670 3671 return (NULL); 3672 } 3673 3674 /* 3675 * Find an IPv6 ilg matching group, ill, and source 3676 */ 3677 ilg_t * 3678 ilg_lookup_ill_withsrc_v6(conn_t *connp, const in6_addr_t *v6group, 3679 const in6_addr_t *v6src, ill_t *ill) 3680 { 3681 int i; 3682 boolean_t isinlist; 3683 ilg_t *ilg; 3684 ill_t *ilg_ill; 3685 3686 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3687 3688 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3689 ilg = &connp->conn_ilg[i]; 3690 if ((ilg_ill = ilg->ilg_ill) == NULL || 3691 (ilg->ilg_flags & ILG_DELETED) != 0) 3692 continue; 3693 ASSERT(ilg->ilg_ipif == NULL); 3694 ASSERT(ilg_ill->ill_isv6); 3695 if (ilg_ill == ill && 3696 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) { 3697 if (SLIST_IS_EMPTY(ilg->ilg_filter)) { 3698 /* no source filter, so this is a match */ 3699 return (ilg); 3700 } 3701 break; 3702 } 3703 } 3704 if (i == connp->conn_ilg_inuse) 3705 return (NULL); 3706 3707 /* 3708 * we have an ilg with matching ill and group; but 3709 * the ilg has a source list that we must check. 3710 */ 3711 isinlist = B_FALSE; 3712 for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) { 3713 if (IN6_ARE_ADDR_EQUAL(v6src, &ilg->ilg_filter->sl_addr[i])) { 3714 isinlist = B_TRUE; 3715 break; 3716 } 3717 } 3718 3719 if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) || 3720 (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE)) 3721 return (ilg); 3722 3723 return (NULL); 3724 } 3725 3726 /* 3727 * Get the ilg whose ilg_orig_ifindex is associated with ifindex. 3728 * This is useful when the interface fails and we have moved 3729 * to a new ill, but still would like to locate using the index 3730 * that we originally used to join. Used only for IPv6 currently. 3731 */ 3732 static ilg_t * 3733 ilg_lookup_ill_index_v6(conn_t *connp, const in6_addr_t *v6group, int ifindex) 3734 { 3735 ilg_t *ilg; 3736 int i; 3737 3738 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3739 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3740 ilg = &connp->conn_ilg[i]; 3741 if (ilg->ilg_ill == NULL || 3742 (ilg->ilg_flags & ILG_DELETED) != 0) 3743 continue; 3744 /* ilg_ipif is NULL for V6 */ 3745 ASSERT(ilg->ilg_ipif == NULL); 3746 ASSERT(ilg->ilg_orig_ifindex != 0); 3747 if (IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group) && 3748 ilg->ilg_orig_ifindex == ifindex) { 3749 return (ilg); 3750 } 3751 } 3752 return (NULL); 3753 } 3754 3755 /* 3756 * Find an IPv6 ilg matching group and ill 3757 */ 3758 ilg_t * 3759 ilg_lookup_ill_v6(conn_t *connp, const in6_addr_t *v6group, ill_t *ill) 3760 { 3761 ilg_t *ilg; 3762 int i; 3763 ill_t *mem_ill; 3764 3765 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3766 3767 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3768 ilg = &connp->conn_ilg[i]; 3769 if ((mem_ill = ilg->ilg_ill) == NULL || 3770 (ilg->ilg_flags & ILG_DELETED) != 0) 3771 continue; 3772 ASSERT(ilg->ilg_ipif == NULL); 3773 ASSERT(mem_ill->ill_isv6); 3774 if (mem_ill == ill && 3775 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) 3776 return (ilg); 3777 } 3778 return (NULL); 3779 } 3780 3781 /* 3782 * Find an IPv4 ilg matching group and ipif 3783 */ 3784 static ilg_t * 3785 ilg_lookup_ipif(conn_t *connp, ipaddr_t group, ipif_t *ipif) 3786 { 3787 in6_addr_t v6group; 3788 int i; 3789 ilg_t *ilg; 3790 3791 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3792 ASSERT(!ipif->ipif_ill->ill_isv6); 3793 3794 if (group == INADDR_ANY) 3795 v6group = ipv6_all_zeros; 3796 else 3797 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 3798 3799 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3800 ilg = &connp->conn_ilg[i]; 3801 if ((ilg->ilg_flags & ILG_DELETED) == 0 && 3802 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, &v6group) && 3803 ilg->ilg_ipif == ipif) 3804 return (ilg); 3805 } 3806 return (NULL); 3807 } 3808 3809 /* 3810 * If a source address is passed in (src != NULL and src is not 3811 * unspecified), remove the specified src addr from the given ilg's 3812 * filter list, else delete the ilg. 3813 */ 3814 static void 3815 ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src) 3816 { 3817 int i; 3818 3819 ASSERT((ilg->ilg_ipif != NULL) ^ (ilg->ilg_ill != NULL)); 3820 ASSERT(ilg->ilg_ipif == NULL || IAM_WRITER_IPIF(ilg->ilg_ipif)); 3821 ASSERT(ilg->ilg_ill == NULL || IAM_WRITER_ILL(ilg->ilg_ill)); 3822 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3823 ASSERT(!(ilg->ilg_flags & ILG_DELETED)); 3824 3825 if (src == NULL || IN6_IS_ADDR_UNSPECIFIED(src)) { 3826 if (connp->conn_ilg_walker_cnt != 0) { 3827 ilg->ilg_flags |= ILG_DELETED; 3828 return; 3829 } 3830 3831 FREE_SLIST(ilg->ilg_filter); 3832 3833 i = ilg - &connp->conn_ilg[0]; 3834 ASSERT(i >= 0 && i < connp->conn_ilg_inuse); 3835 3836 /* Move other entries up one step */ 3837 connp->conn_ilg_inuse--; 3838 for (; i < connp->conn_ilg_inuse; i++) 3839 connp->conn_ilg[i] = connp->conn_ilg[i+1]; 3840 3841 if (connp->conn_ilg_inuse == 0) { 3842 mi_free((char *)connp->conn_ilg); 3843 connp->conn_ilg = NULL; 3844 cv_broadcast(&connp->conn_refcv); 3845 } 3846 } else { 3847 l_remove(ilg->ilg_filter, src); 3848 } 3849 } 3850 3851 /* 3852 * Called from conn close. No new ilg can be added or removed. 3853 * because CONN_CLOSING has been set by ip_close. ilg_add / ilg_delete 3854 * will return error if conn has started closing. 3855 */ 3856 void 3857 ilg_delete_all(conn_t *connp) 3858 { 3859 int i; 3860 ipif_t *ipif = NULL; 3861 ill_t *ill = NULL; 3862 ilg_t *ilg; 3863 in6_addr_t v6group; 3864 boolean_t success; 3865 ipsq_t *ipsq; 3866 int orig_ifindex; 3867 3868 mutex_enter(&connp->conn_lock); 3869 retry: 3870 ILG_WALKER_HOLD(connp); 3871 for (i = connp->conn_ilg_inuse - 1; i >= 0; ) { 3872 ilg = &connp->conn_ilg[i]; 3873 /* 3874 * Since this walk is not atomic (we drop the 3875 * conn_lock and wait in ipsq_enter) we need 3876 * to check for the ILG_DELETED flag. 3877 */ 3878 if (ilg->ilg_flags & ILG_DELETED) { 3879 /* Go to the next ilg */ 3880 i--; 3881 continue; 3882 } 3883 v6group = ilg->ilg_v6group; 3884 3885 if (IN6_IS_ADDR_V4MAPPED(&v6group)) { 3886 ipif = ilg->ilg_ipif; 3887 ill = ipif->ipif_ill; 3888 } else { 3889 ipif = NULL; 3890 ill = ilg->ilg_ill; 3891 } 3892 /* 3893 * We may not be able to refhold the ill if the ill/ipif 3894 * is changing. But we need to make sure that the ill will 3895 * not vanish. So we just bump up the ill_waiter count. 3896 * If we are unable to do even that, then the ill is closing, 3897 * in which case the unplumb thread will handle the cleanup, 3898 * and we move on to the next ilg. 3899 */ 3900 if (!ill_waiter_inc(ill)) { 3901 /* Go to the next ilg */ 3902 i--; 3903 continue; 3904 } 3905 mutex_exit(&connp->conn_lock); 3906 /* 3907 * To prevent deadlock between ill close which waits inside 3908 * the perimeter, and conn close, ipsq_enter returns error, 3909 * the moment ILL_CONDEMNED is set, in which case ill close 3910 * takes responsibility to cleanup the ilgs. Note that we 3911 * have not yet set condemned flag, otherwise the conn can't 3912 * be refheld for cleanup by those routines and it would be 3913 * a mutual deadlock. 3914 */ 3915 success = ipsq_enter(ill, B_FALSE); 3916 ipsq = ill->ill_phyint->phyint_ipsq; 3917 ill_waiter_dcr(ill); 3918 mutex_enter(&connp->conn_lock); 3919 if (!success) { 3920 /* Go to the next ilg */ 3921 i--; 3922 continue; 3923 } 3924 3925 /* 3926 * Make sure that nothing has changed under. For eg. 3927 * a failover/failback can change ilg_ill while we were 3928 * waiting to become exclusive above 3929 */ 3930 if (IN6_IS_ADDR_V4MAPPED(&v6group)) { 3931 ipif = ilg->ilg_ipif; 3932 ill = ipif->ipif_ill; 3933 } else { 3934 ipif = NULL; 3935 ill = ilg->ilg_ill; 3936 } 3937 if (!IAM_WRITER_ILL(ill) || (ilg->ilg_flags & ILG_DELETED)) { 3938 /* 3939 * The ilg has changed under us probably due 3940 * to a failover or unplumb. Retry on the same ilg. 3941 */ 3942 mutex_exit(&connp->conn_lock); 3943 ipsq_exit(ipsq); 3944 mutex_enter(&connp->conn_lock); 3945 continue; 3946 } 3947 v6group = ilg->ilg_v6group; 3948 orig_ifindex = ilg->ilg_orig_ifindex; 3949 ilg_delete(connp, ilg, NULL); 3950 mutex_exit(&connp->conn_lock); 3951 3952 if (ipif != NULL) 3953 (void) ip_delmulti(V4_PART_OF_V6(v6group), ipif, 3954 B_FALSE, B_TRUE); 3955 3956 else 3957 (void) ip_delmulti_v6(&v6group, ill, orig_ifindex, 3958 connp->conn_zoneid, B_FALSE, B_TRUE); 3959 3960 ipsq_exit(ipsq); 3961 mutex_enter(&connp->conn_lock); 3962 /* Go to the next ilg */ 3963 i--; 3964 } 3965 ILG_WALKER_RELE(connp); 3966 3967 /* If any ill was skipped above wait and retry */ 3968 if (connp->conn_ilg_inuse != 0) { 3969 cv_wait(&connp->conn_refcv, &connp->conn_lock); 3970 goto retry; 3971 } 3972 mutex_exit(&connp->conn_lock); 3973 } 3974 3975 /* 3976 * Called from ill close by ipcl_walk for clearing conn_ilg and 3977 * conn_multicast_ipif for a given ipif. conn is held by caller. 3978 * Note that ipcl_walk only walks conns that are not yet condemned. 3979 * condemned conns can't be refheld. For this reason, conn must become clean 3980 * first, i.e. it must not refer to any ill/ire/ipif and then only set 3981 * condemned flag. 3982 */ 3983 static void 3984 conn_delete_ipif(conn_t *connp, caddr_t arg) 3985 { 3986 ipif_t *ipif = (ipif_t *)arg; 3987 int i; 3988 char group_buf1[INET6_ADDRSTRLEN]; 3989 char group_buf2[INET6_ADDRSTRLEN]; 3990 ipaddr_t group; 3991 ilg_t *ilg; 3992 3993 /* 3994 * Even though conn_ilg_inuse can change while we are in this loop, 3995 * i.e.ilgs can be created or deleted on this connp, no new ilgs can 3996 * be created or deleted for this connp, on this ill, since this ill 3997 * is the perimeter. So we won't miss any ilg in this cleanup. 3998 */ 3999 mutex_enter(&connp->conn_lock); 4000 4001 /* 4002 * Increment the walker count, so that ilg repacking does not 4003 * occur while we are in the loop. 4004 */ 4005 ILG_WALKER_HOLD(connp); 4006 for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) { 4007 ilg = &connp->conn_ilg[i]; 4008 if (ilg->ilg_ipif != ipif || (ilg->ilg_flags & ILG_DELETED)) 4009 continue; 4010 /* 4011 * ip_close cannot be cleaning this ilg at the same time. 4012 * since it also has to execute in this ill's perimeter which 4013 * we are now holding. Only a clean conn can be condemned. 4014 */ 4015 ASSERT(!(connp->conn_state_flags & CONN_CONDEMNED)); 4016 4017 /* Blow away the membership */ 4018 ip1dbg(("conn_delete_ilg_ipif: %s on %s (%s)\n", 4019 inet_ntop(AF_INET6, &connp->conn_ilg[i].ilg_v6group, 4020 group_buf1, sizeof (group_buf1)), 4021 inet_ntop(AF_INET6, &ipif->ipif_v6lcl_addr, 4022 group_buf2, sizeof (group_buf2)), 4023 ipif->ipif_ill->ill_name)); 4024 4025 /* ilg_ipif is NULL for V6, so we won't be here */ 4026 ASSERT(IN6_IS_ADDR_V4MAPPED(&ilg->ilg_v6group)); 4027 4028 group = V4_PART_OF_V6(ilg->ilg_v6group); 4029 ilg_delete(connp, &connp->conn_ilg[i], NULL); 4030 mutex_exit(&connp->conn_lock); 4031 4032 (void) ip_delmulti(group, ipif, B_FALSE, B_TRUE); 4033 mutex_enter(&connp->conn_lock); 4034 } 4035 4036 /* 4037 * If we are the last walker, need to physically delete the 4038 * ilgs and repack. 4039 */ 4040 ILG_WALKER_RELE(connp); 4041 4042 if (connp->conn_multicast_ipif == ipif) { 4043 /* Revert to late binding */ 4044 connp->conn_multicast_ipif = NULL; 4045 } 4046 mutex_exit(&connp->conn_lock); 4047 4048 conn_delete_ire(connp, (caddr_t)ipif); 4049 } 4050 4051 /* 4052 * Called from ill close by ipcl_walk for clearing conn_ilg and 4053 * conn_multicast_ill for a given ill. conn is held by caller. 4054 * Note that ipcl_walk only walks conns that are not yet condemned. 4055 * condemned conns can't be refheld. For this reason, conn must become clean 4056 * first, i.e. it must not refer to any ill/ire/ipif and then only set 4057 * condemned flag. 4058 */ 4059 static void 4060 conn_delete_ill(conn_t *connp, caddr_t arg) 4061 { 4062 ill_t *ill = (ill_t *)arg; 4063 int i; 4064 char group_buf[INET6_ADDRSTRLEN]; 4065 in6_addr_t v6group; 4066 int orig_ifindex; 4067 ilg_t *ilg; 4068 4069 /* 4070 * Even though conn_ilg_inuse can change while we are in this loop, 4071 * no new ilgs can be created/deleted for this connp, on this 4072 * ill, since this ill is the perimeter. So we won't miss any ilg 4073 * in this cleanup. 4074 */ 4075 mutex_enter(&connp->conn_lock); 4076 4077 /* 4078 * Increment the walker count, so that ilg repacking does not 4079 * occur while we are in the loop. 4080 */ 4081 ILG_WALKER_HOLD(connp); 4082 for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) { 4083 ilg = &connp->conn_ilg[i]; 4084 if ((ilg->ilg_ill == ill) && !(ilg->ilg_flags & ILG_DELETED)) { 4085 /* 4086 * ip_close cannot be cleaning this ilg at the same 4087 * time, since it also has to execute in this ill's 4088 * perimeter which we are now holding. Only a clean 4089 * conn can be condemned. 4090 */ 4091 ASSERT(!(connp->conn_state_flags & CONN_CONDEMNED)); 4092 4093 /* Blow away the membership */ 4094 ip1dbg(("conn_delete_ilg_ill: %s on %s\n", 4095 inet_ntop(AF_INET6, &ilg->ilg_v6group, 4096 group_buf, sizeof (group_buf)), 4097 ill->ill_name)); 4098 4099 v6group = ilg->ilg_v6group; 4100 orig_ifindex = ilg->ilg_orig_ifindex; 4101 ilg_delete(connp, ilg, NULL); 4102 mutex_exit(&connp->conn_lock); 4103 4104 (void) ip_delmulti_v6(&v6group, ill, orig_ifindex, 4105 connp->conn_zoneid, B_FALSE, B_TRUE); 4106 mutex_enter(&connp->conn_lock); 4107 } 4108 } 4109 /* 4110 * If we are the last walker, need to physically delete the 4111 * ilgs and repack. 4112 */ 4113 ILG_WALKER_RELE(connp); 4114 4115 if (connp->conn_multicast_ill == ill) { 4116 /* Revert to late binding */ 4117 connp->conn_multicast_ill = NULL; 4118 connp->conn_orig_multicast_ifindex = 0; 4119 } 4120 mutex_exit(&connp->conn_lock); 4121 } 4122 4123 /* 4124 * Called when an ipif is unplumbed to make sure that there are no 4125 * dangling conn references to that ipif. 4126 * Handles ilg_ipif and conn_multicast_ipif 4127 */ 4128 void 4129 reset_conn_ipif(ipif) 4130 ipif_t *ipif; 4131 { 4132 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 4133 4134 ipcl_walk(conn_delete_ipif, (caddr_t)ipif, ipst); 4135 } 4136 4137 /* 4138 * Called when an ill is unplumbed to make sure that there are no 4139 * dangling conn references to that ill. 4140 * Handles ilg_ill, conn_multicast_ill. 4141 */ 4142 void 4143 reset_conn_ill(ill_t *ill) 4144 { 4145 ip_stack_t *ipst = ill->ill_ipst; 4146 4147 ipcl_walk(conn_delete_ill, (caddr_t)ill, ipst); 4148 } 4149 4150 #ifdef DEBUG 4151 /* 4152 * Walk functions walk all the interfaces in the system to make 4153 * sure that there is no refernece to the ipif or ill that is 4154 * going away. 4155 */ 4156 int 4157 ilm_walk_ill(ill_t *ill) 4158 { 4159 int cnt = 0; 4160 ill_t *till; 4161 ilm_t *ilm; 4162 ill_walk_context_t ctx; 4163 ip_stack_t *ipst = ill->ill_ipst; 4164 4165 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 4166 till = ILL_START_WALK_ALL(&ctx, ipst); 4167 for (; till != NULL; till = ill_next(&ctx, till)) { 4168 mutex_enter(&till->ill_lock); 4169 for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 4170 if (ilm->ilm_ill == ill) { 4171 cnt++; 4172 } 4173 } 4174 mutex_exit(&till->ill_lock); 4175 } 4176 rw_exit(&ipst->ips_ill_g_lock); 4177 4178 return (cnt); 4179 } 4180 4181 /* 4182 * This function is called before the ipif is freed. 4183 */ 4184 int 4185 ilm_walk_ipif(ipif_t *ipif) 4186 { 4187 int cnt = 0; 4188 ill_t *till; 4189 ilm_t *ilm; 4190 ill_walk_context_t ctx; 4191 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 4192 4193 till = ILL_START_WALK_ALL(&ctx, ipst); 4194 for (; till != NULL; till = ill_next(&ctx, till)) { 4195 mutex_enter(&till->ill_lock); 4196 for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 4197 if (ilm->ilm_ipif == ipif) { 4198 cnt++; 4199 } 4200 } 4201 mutex_exit(&till->ill_lock); 4202 } 4203 return (cnt); 4204 } 4205 #endif 4206