1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/stream.h> 31 #include <sys/dlpi.h> 32 #include <sys/stropts.h> 33 #include <sys/strsun.h> 34 #include <sys/ddi.h> 35 #include <sys/cmn_err.h> 36 #include <sys/sdt.h> 37 #include <sys/zone.h> 38 39 #include <sys/param.h> 40 #include <sys/socket.h> 41 #include <sys/sockio.h> 42 #include <net/if.h> 43 #include <sys/systm.h> 44 #include <sys/strsubr.h> 45 #include <net/route.h> 46 #include <netinet/in.h> 47 #include <net/if_dl.h> 48 #include <netinet/ip6.h> 49 #include <netinet/icmp6.h> 50 51 #include <inet/common.h> 52 #include <inet/mi.h> 53 #include <inet/nd.h> 54 #include <inet/arp.h> 55 #include <inet/ip.h> 56 #include <inet/ip6.h> 57 #include <inet/ip_if.h> 58 #include <inet/ip_ndp.h> 59 #include <inet/ip_multi.h> 60 #include <inet/ipclassifier.h> 61 #include <inet/ipsec_impl.h> 62 #include <inet/sctp_ip.h> 63 #include <inet/ip_listutils.h> 64 #include <inet/udp_impl.h> 65 66 /* igmpv3/mldv2 source filter manipulation */ 67 static void ilm_bld_flists(conn_t *conn, void *arg); 68 static void ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, 69 slist_t *flist); 70 71 static ilm_t *ilm_add_v6(ipif_t *ipif, const in6_addr_t *group, 72 ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist, 73 int orig_ifindex, zoneid_t zoneid); 74 static void ilm_delete(ilm_t *ilm); 75 static int ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *group); 76 static int ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *group); 77 static ilg_t *ilg_lookup_ill_index_v6(conn_t *connp, 78 const in6_addr_t *v6group, int index); 79 static ilg_t *ilg_lookup_ipif(conn_t *connp, ipaddr_t group, 80 ipif_t *ipif); 81 static int ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif, 82 mcast_record_t fmode, ipaddr_t src); 83 static int ilg_add_v6(conn_t *connp, const in6_addr_t *group, ill_t *ill, 84 mcast_record_t fmode, const in6_addr_t *v6src); 85 static void ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src); 86 static mblk_t *ill_create_dl(ill_t *ill, uint32_t dl_primitive, 87 uint32_t length, uint32_t *addr_lenp, uint32_t *addr_offp); 88 static mblk_t *ill_create_squery(ill_t *ill, ipaddr_t ipaddr, 89 uint32_t addrlen, uint32_t addroff, mblk_t *mp_tail); 90 static void conn_ilg_reap(conn_t *connp); 91 static int ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group, 92 ipif_t *ipif, mcast_record_t fmode, ipaddr_t src); 93 static int ip_opt_delete_group_excl_v6(conn_t *connp, 94 const in6_addr_t *v6group, ill_t *ill, mcast_record_t fmode, 95 const in6_addr_t *v6src); 96 97 /* 98 * MT notes: 99 * 100 * Multicast joins operate on both the ilg and ilm structures. Multiple 101 * threads operating on an conn (socket) trying to do multicast joins 102 * need to synchronize when operating on the ilg. Multiple threads 103 * potentially operating on different conn (socket endpoints) trying to 104 * do multicast joins could eventually end up trying to manipulate the 105 * ilm simulatenously and need to synchronize on the access to the ilm. 106 * Both are amenable to standard Solaris MT techniques, but it would be 107 * complex to handle a failover or failback which needs to manipulate 108 * ilg/ilms if an applications can also simultaenously join/leave 109 * multicast groups. Hence multicast join/leave also go through the ipsq_t 110 * serialization. 111 * 112 * Multicast joins and leaves are single-threaded per phyint/IPMP group 113 * using the ipsq serialization mechanism. 114 * 115 * An ilm is an IP data structure used to track multicast join/leave. 116 * An ilm is associated with a <multicast group, ipif> tuple in IPv4 and 117 * with just <multicast group> in IPv6. ilm_refcnt is the number of ilg's 118 * referencing the ilm. ilms are created / destroyed only as writer. ilms 119 * are not passed around, instead they are looked up and used under the 120 * ill_lock or as writer. So we don't need a dynamic refcount of the number 121 * of threads holding reference to an ilm. 122 * 123 * Multicast Join operation: 124 * 125 * The first step is to determine the ipif (v4) or ill (v6) on which 126 * the join operation is to be done. The join is done after becoming 127 * exclusive on the ipsq associated with the ipif or ill. The conn->conn_ilg 128 * and ill->ill_ilm are thus accessed and modified exclusively per ill. 129 * Multiple threads can attempt to join simultaneously on different ipif/ill 130 * on the same conn. In this case the ipsq serialization does not help in 131 * protecting the ilg. It is the conn_lock that is used to protect the ilg. 132 * The conn_lock also protects all the ilg_t members. 133 * 134 * Leave operation. 135 * 136 * Similar to the join operation, the first step is to determine the ipif 137 * or ill (v6) on which the leave operation is to be done. The leave operation 138 * is done after becoming exclusive on the ipsq associated with the ipif or ill. 139 * As with join ilg modification is done under the protection of the conn lock. 140 */ 141 142 #define IPSQ_ENTER_IPIF(ipif, connp, first_mp, func, ipsq, type) \ 143 ASSERT(connp != NULL); \ 144 (ipsq) = ipsq_try_enter((ipif), NULL, CONNP_TO_WQ(connp), \ 145 (first_mp), (func), (type), B_TRUE); \ 146 if ((ipsq) == NULL) { \ 147 ipif_refrele(ipif); \ 148 return (EINPROGRESS); \ 149 } 150 151 #define IPSQ_ENTER_ILL(ill, connp, first_mp, func, ipsq, type) \ 152 ASSERT(connp != NULL); \ 153 (ipsq) = ipsq_try_enter(NULL, ill, CONNP_TO_WQ(connp), \ 154 (first_mp), (func), (type), B_TRUE); \ 155 if ((ipsq) == NULL) { \ 156 ill_refrele(ill); \ 157 return (EINPROGRESS); \ 158 } 159 160 #define IPSQ_EXIT(ipsq) \ 161 if (ipsq != NULL) \ 162 ipsq_exit(ipsq); 163 164 #define ILG_WALKER_HOLD(connp) (connp)->conn_ilg_walker_cnt++ 165 166 #define ILG_WALKER_RELE(connp) \ 167 { \ 168 (connp)->conn_ilg_walker_cnt--; \ 169 if ((connp)->conn_ilg_walker_cnt == 0) \ 170 conn_ilg_reap(connp); \ 171 } 172 173 static void 174 conn_ilg_reap(conn_t *connp) 175 { 176 int to; 177 int from; 178 ilg_t *ilg; 179 180 ASSERT(MUTEX_HELD(&connp->conn_lock)); 181 182 to = 0; 183 from = 0; 184 while (from < connp->conn_ilg_inuse) { 185 if (connp->conn_ilg[from].ilg_flags & ILG_DELETED) { 186 ilg = &connp->conn_ilg[from]; 187 FREE_SLIST(ilg->ilg_filter); 188 ilg->ilg_flags &= ~ILG_DELETED; 189 from++; 190 continue; 191 } 192 if (to != from) 193 connp->conn_ilg[to] = connp->conn_ilg[from]; 194 to++; 195 from++; 196 } 197 198 connp->conn_ilg_inuse = to; 199 200 if (connp->conn_ilg_inuse == 0) { 201 mi_free((char *)connp->conn_ilg); 202 connp->conn_ilg = NULL; 203 cv_broadcast(&connp->conn_refcv); 204 } 205 } 206 207 #define GETSTRUCT(structure, number) \ 208 ((structure *)mi_zalloc(sizeof (structure) * (number))) 209 210 #define ILG_ALLOC_CHUNK 16 211 212 /* 213 * Returns a pointer to the next available ilg in conn_ilg. Allocs more 214 * buffers in size of ILG_ALLOC_CHUNK ilgs when needed, and updates conn's 215 * ilg tracking fields appropriately (conn_ilg_inuse reflects usage of the 216 * returned ilg). Returns NULL on failure (ENOMEM). 217 * 218 * Assumes connp->conn_lock is held. 219 */ 220 static ilg_t * 221 conn_ilg_alloc(conn_t *connp) 222 { 223 ilg_t *new, *ret; 224 int curcnt; 225 226 ASSERT(MUTEX_HELD(&connp->conn_lock)); 227 ASSERT(connp->conn_ilg_inuse <= connp->conn_ilg_allocated); 228 229 if (connp->conn_ilg == NULL) { 230 connp->conn_ilg = GETSTRUCT(ilg_t, ILG_ALLOC_CHUNK); 231 if (connp->conn_ilg == NULL) 232 return (NULL); 233 connp->conn_ilg_allocated = ILG_ALLOC_CHUNK; 234 connp->conn_ilg_inuse = 0; 235 } 236 if (connp->conn_ilg_inuse == connp->conn_ilg_allocated) { 237 if (connp->conn_ilg_walker_cnt != 0) { 238 /* 239 * XXX We cannot grow the array at this point 240 * because a list walker could be in progress, and 241 * we cannot wipe out the existing array until the 242 * walker is done. Just return NULL for now. 243 * ilg_delete_all() will have to be changed when 244 * this logic is changed. 245 */ 246 return (NULL); 247 } 248 curcnt = connp->conn_ilg_allocated; 249 new = GETSTRUCT(ilg_t, curcnt + ILG_ALLOC_CHUNK); 250 if (new == NULL) 251 return (NULL); 252 bcopy(connp->conn_ilg, new, sizeof (ilg_t) * curcnt); 253 mi_free((char *)connp->conn_ilg); 254 connp->conn_ilg = new; 255 connp->conn_ilg_allocated += ILG_ALLOC_CHUNK; 256 } 257 258 ret = &connp->conn_ilg[connp->conn_ilg_inuse++]; 259 ASSERT((ret->ilg_flags & ILG_DELETED) == 0); 260 bzero(ret, sizeof (*ret)); 261 return (ret); 262 } 263 264 typedef struct ilm_fbld_s { 265 ilm_t *fbld_ilm; 266 int fbld_in_cnt; 267 int fbld_ex_cnt; 268 slist_t fbld_in; 269 slist_t fbld_ex; 270 boolean_t fbld_in_overflow; 271 } ilm_fbld_t; 272 273 static void 274 ilm_bld_flists(conn_t *conn, void *arg) 275 { 276 int i; 277 ilm_fbld_t *fbld = (ilm_fbld_t *)(arg); 278 ilm_t *ilm = fbld->fbld_ilm; 279 in6_addr_t *v6group = &ilm->ilm_v6addr; 280 281 if (conn->conn_ilg_inuse == 0) 282 return; 283 284 /* 285 * Since we can't break out of the ipcl_walk once started, we still 286 * have to look at every conn. But if we've already found one 287 * (EXCLUDE, NULL) list, there's no need to keep checking individual 288 * ilgs--that will be our state. 289 */ 290 if (fbld->fbld_ex_cnt > 0 && fbld->fbld_ex.sl_numsrc == 0) 291 return; 292 293 /* 294 * Check this conn's ilgs to see if any are interested in our 295 * ilm (group, interface match). If so, update the master 296 * include and exclude lists we're building in the fbld struct 297 * with this ilg's filter info. 298 */ 299 mutex_enter(&conn->conn_lock); 300 for (i = 0; i < conn->conn_ilg_inuse; i++) { 301 ilg_t *ilg = &conn->conn_ilg[i]; 302 if ((ilg->ilg_ill == ilm->ilm_ill) && 303 (ilg->ilg_ipif == ilm->ilm_ipif) && 304 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) { 305 if (ilg->ilg_fmode == MODE_IS_INCLUDE) { 306 fbld->fbld_in_cnt++; 307 if (!fbld->fbld_in_overflow) 308 l_union_in_a(&fbld->fbld_in, 309 ilg->ilg_filter, 310 &fbld->fbld_in_overflow); 311 } else { 312 fbld->fbld_ex_cnt++; 313 /* 314 * On the first exclude list, don't try to do 315 * an intersection, as the master exclude list 316 * is intentionally empty. If the master list 317 * is still empty on later iterations, that 318 * means we have at least one ilg with an empty 319 * exclude list, so that should be reflected 320 * when we take the intersection. 321 */ 322 if (fbld->fbld_ex_cnt == 1) { 323 if (ilg->ilg_filter != NULL) 324 l_copy(ilg->ilg_filter, 325 &fbld->fbld_ex); 326 } else { 327 l_intersection_in_a(&fbld->fbld_ex, 328 ilg->ilg_filter); 329 } 330 } 331 /* there will only be one match, so break now. */ 332 break; 333 } 334 } 335 mutex_exit(&conn->conn_lock); 336 } 337 338 static void 339 ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, slist_t *flist) 340 { 341 ilm_fbld_t fbld; 342 ip_stack_t *ipst = ilm->ilm_ipst; 343 344 fbld.fbld_ilm = ilm; 345 fbld.fbld_in_cnt = fbld.fbld_ex_cnt = 0; 346 fbld.fbld_in.sl_numsrc = fbld.fbld_ex.sl_numsrc = 0; 347 fbld.fbld_in_overflow = B_FALSE; 348 349 /* first, construct our master include and exclude lists */ 350 ipcl_walk(ilm_bld_flists, (caddr_t)&fbld, ipst); 351 352 /* now use those master lists to generate the interface filter */ 353 354 /* if include list overflowed, filter is (EXCLUDE, NULL) */ 355 if (fbld.fbld_in_overflow) { 356 *fmode = MODE_IS_EXCLUDE; 357 flist->sl_numsrc = 0; 358 return; 359 } 360 361 /* if nobody interested, interface filter is (INCLUDE, NULL) */ 362 if (fbld.fbld_in_cnt == 0 && fbld.fbld_ex_cnt == 0) { 363 *fmode = MODE_IS_INCLUDE; 364 flist->sl_numsrc = 0; 365 return; 366 } 367 368 /* 369 * If there are no exclude lists, then the interface filter 370 * is INCLUDE, with its filter list equal to fbld_in. A single 371 * exclude list makes the interface filter EXCLUDE, with its 372 * filter list equal to (fbld_ex - fbld_in). 373 */ 374 if (fbld.fbld_ex_cnt == 0) { 375 *fmode = MODE_IS_INCLUDE; 376 l_copy(&fbld.fbld_in, flist); 377 } else { 378 *fmode = MODE_IS_EXCLUDE; 379 l_difference(&fbld.fbld_ex, &fbld.fbld_in, flist); 380 } 381 } 382 383 /* 384 * If the given interface has failed, choose a new one to join on so 385 * that we continue to receive packets. ilg_orig_ifindex remembers 386 * what the application used to join on so that we know the ilg to 387 * delete even though we change the ill here. Callers will store the 388 * ilg returned from this function in ilg_ill. Thus when we receive 389 * a packet on ilg_ill, conn_wantpacket_v6 will deliver the packets. 390 * 391 * This function must be called as writer so we can walk the group 392 * list and examine flags without holding a lock. 393 */ 394 ill_t * 395 ip_choose_multi_ill(ill_t *ill, const in6_addr_t *grp) 396 { 397 ill_t *till; 398 ill_group_t *illgrp = ill->ill_group; 399 400 ASSERT(IAM_WRITER_ILL(ill)); 401 402 if (IN6_IS_ADDR_UNSPECIFIED(grp) || illgrp == NULL) 403 return (ill); 404 405 if ((ill->ill_phyint->phyint_flags & (PHYI_FAILED|PHYI_INACTIVE)) == 0) 406 return (ill); 407 408 till = illgrp->illgrp_ill; 409 while (till != NULL && 410 (till->ill_phyint->phyint_flags & (PHYI_FAILED|PHYI_INACTIVE))) { 411 till = till->ill_group_next; 412 } 413 if (till != NULL) 414 return (till); 415 416 return (ill); 417 } 418 419 static int 420 ilm_update_add(ilm_t *ilm, ilg_stat_t ilgstat, slist_t *ilg_flist, 421 boolean_t isv6) 422 { 423 mcast_record_t fmode; 424 slist_t *flist; 425 boolean_t fdefault; 426 char buf[INET6_ADDRSTRLEN]; 427 ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill; 428 429 /* 430 * There are several cases where the ilm's filter state 431 * defaults to (EXCLUDE, NULL): 432 * - we've had previous joins without associated ilgs 433 * - this join has no associated ilg 434 * - the ilg's filter state is (EXCLUDE, NULL) 435 */ 436 fdefault = (ilm->ilm_no_ilg_cnt > 0) || 437 (ilgstat == ILGSTAT_NONE) || SLIST_IS_EMPTY(ilg_flist); 438 439 /* attempt mallocs (if needed) before doing anything else */ 440 if ((flist = l_alloc()) == NULL) 441 return (ENOMEM); 442 if (!fdefault && ilm->ilm_filter == NULL) { 443 ilm->ilm_filter = l_alloc(); 444 if (ilm->ilm_filter == NULL) { 445 l_free(flist); 446 return (ENOMEM); 447 } 448 } 449 450 if (ilgstat != ILGSTAT_CHANGE) 451 ilm->ilm_refcnt++; 452 453 if (ilgstat == ILGSTAT_NONE) 454 ilm->ilm_no_ilg_cnt++; 455 456 /* 457 * Determine new filter state. If it's not the default 458 * (EXCLUDE, NULL), we must walk the conn list to find 459 * any ilgs interested in this group, and re-build the 460 * ilm filter. 461 */ 462 if (fdefault) { 463 fmode = MODE_IS_EXCLUDE; 464 flist->sl_numsrc = 0; 465 } else { 466 ilm_gen_filter(ilm, &fmode, flist); 467 } 468 469 /* make sure state actually changed; nothing to do if not. */ 470 if ((ilm->ilm_fmode == fmode) && 471 !lists_are_different(ilm->ilm_filter, flist)) { 472 l_free(flist); 473 return (0); 474 } 475 476 /* send the state change report */ 477 if (!IS_LOOPBACK(ill)) { 478 if (isv6) 479 mld_statechange(ilm, fmode, flist); 480 else 481 igmp_statechange(ilm, fmode, flist); 482 } 483 484 /* update the ilm state */ 485 ilm->ilm_fmode = fmode; 486 if (flist->sl_numsrc > 0) 487 l_copy(flist, ilm->ilm_filter); 488 else 489 CLEAR_SLIST(ilm->ilm_filter); 490 491 ip1dbg(("ilm_update: new if filter mode %d, group %s\n", ilm->ilm_fmode, 492 inet_ntop(AF_INET6, &ilm->ilm_v6addr, buf, sizeof (buf)))); 493 494 l_free(flist); 495 return (0); 496 } 497 498 static int 499 ilm_update_del(ilm_t *ilm, boolean_t isv6) 500 { 501 mcast_record_t fmode; 502 slist_t *flist; 503 ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill; 504 505 ip1dbg(("ilm_update_del: still %d left; updating state\n", 506 ilm->ilm_refcnt)); 507 508 if ((flist = l_alloc()) == NULL) 509 return (ENOMEM); 510 511 /* 512 * If present, the ilg in question has already either been 513 * updated or removed from our list; so all we need to do 514 * now is walk the list to update the ilm filter state. 515 * 516 * Skip the list walk if we have any no-ilg joins, which 517 * cause the filter state to revert to (EXCLUDE, NULL). 518 */ 519 if (ilm->ilm_no_ilg_cnt != 0) { 520 fmode = MODE_IS_EXCLUDE; 521 flist->sl_numsrc = 0; 522 } else { 523 ilm_gen_filter(ilm, &fmode, flist); 524 } 525 526 /* check to see if state needs to be updated */ 527 if ((ilm->ilm_fmode == fmode) && 528 (!lists_are_different(ilm->ilm_filter, flist))) { 529 l_free(flist); 530 return (0); 531 } 532 533 if (!IS_LOOPBACK(ill)) { 534 if (isv6) 535 mld_statechange(ilm, fmode, flist); 536 else 537 igmp_statechange(ilm, fmode, flist); 538 } 539 540 ilm->ilm_fmode = fmode; 541 if (flist->sl_numsrc > 0) { 542 if (ilm->ilm_filter == NULL) { 543 ilm->ilm_filter = l_alloc(); 544 if (ilm->ilm_filter == NULL) { 545 char buf[INET6_ADDRSTRLEN]; 546 ip1dbg(("ilm_update_del: failed to alloc ilm " 547 "filter; no source filtering for %s on %s", 548 inet_ntop(AF_INET6, &ilm->ilm_v6addr, 549 buf, sizeof (buf)), ill->ill_name)); 550 ilm->ilm_fmode = MODE_IS_EXCLUDE; 551 l_free(flist); 552 return (0); 553 } 554 } 555 l_copy(flist, ilm->ilm_filter); 556 } else { 557 CLEAR_SLIST(ilm->ilm_filter); 558 } 559 560 l_free(flist); 561 return (0); 562 } 563 564 /* 565 * INADDR_ANY means all multicast addresses. This is only used 566 * by the multicast router. 567 * INADDR_ANY is stored as IPv6 unspecified addr. 568 */ 569 int 570 ip_addmulti(ipaddr_t group, ipif_t *ipif, ilg_stat_t ilgstat, 571 mcast_record_t ilg_fmode, slist_t *ilg_flist) 572 { 573 ill_t *ill = ipif->ipif_ill; 574 ilm_t *ilm; 575 in6_addr_t v6group; 576 int ret; 577 578 ASSERT(IAM_WRITER_IPIF(ipif)); 579 580 if (!CLASSD(group) && group != INADDR_ANY) 581 return (EINVAL); 582 583 /* 584 * INADDR_ANY is represented as the IPv6 unspecifed addr. 585 */ 586 if (group == INADDR_ANY) 587 v6group = ipv6_all_zeros; 588 else 589 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 590 591 mutex_enter(&ill->ill_lock); 592 ilm = ilm_lookup_ipif(ipif, group); 593 mutex_exit(&ill->ill_lock); 594 /* 595 * Since we are writer, we know the ilm_flags itself cannot 596 * change at this point, and ilm_lookup_ipif would not have 597 * returned a DELETED ilm. However, the data path can free 598 * ilm->next via ilm_walker_cleanup() so we can safely 599 * access anything in ilm except ilm_next (for safe access to 600 * ilm_next we'd have to take the ill_lock). 601 */ 602 if (ilm != NULL) 603 return (ilm_update_add(ilm, ilgstat, ilg_flist, B_FALSE)); 604 605 /* 606 * ilms are associated with ipifs in IPv4. It moves with the 607 * ipif if the ipif moves to a new ill when the interface 608 * fails. Thus we really don't check whether the ipif_ill 609 * has failed like in IPv6. If it has FAILED the ipif 610 * will move (daemon will move it) and hence the ilm, if the 611 * ipif is not IPIF_NOFAILOVER. For the IPIF_NOFAILOVER ipifs, 612 * we continue to receive in the same place even if the 613 * interface fails. 614 */ 615 ilm = ilm_add_v6(ipif, &v6group, ilgstat, ilg_fmode, ilg_flist, 616 ill->ill_phyint->phyint_ifindex, ipif->ipif_zoneid); 617 if (ilm == NULL) 618 return (ENOMEM); 619 620 if (group == INADDR_ANY) { 621 /* 622 * Check how many ipif's have members in this group - 623 * if more then one we should not tell the driver to join 624 * this time 625 */ 626 if (ilm_numentries_v6(ill, &v6group) > 1) 627 return (0); 628 if (ill->ill_group == NULL) 629 ret = ip_join_allmulti(ipif); 630 else 631 ret = ill_nominate_mcast_rcv(ill->ill_group); 632 if (ret != 0) 633 ilm_delete(ilm); 634 return (ret); 635 } 636 637 if (!IS_LOOPBACK(ill)) 638 igmp_joingroup(ilm); 639 640 if (ilm_numentries_v6(ill, &v6group) > 1) 641 return (0); 642 643 ret = ip_ll_addmulti_v6(ipif, &v6group); 644 if (ret != 0) 645 ilm_delete(ilm); 646 return (ret); 647 } 648 649 /* 650 * The unspecified address means all multicast addresses. 651 * This is only used by the multicast router. 652 * 653 * ill identifies the interface to join on; it may not match the 654 * interface requested by the application of a failover has taken 655 * place. orig_ifindex always identifies the interface requested 656 * by the app. 657 * 658 * ilgstat tells us if there's an ilg associated with this join, 659 * and if so, if it's a new ilg or a change to an existing one. 660 * ilg_fmode and ilg_flist give us the current filter state of 661 * the ilg (and will be EXCLUDE {NULL} in the case of no ilg). 662 */ 663 int 664 ip_addmulti_v6(const in6_addr_t *v6group, ill_t *ill, int orig_ifindex, 665 zoneid_t zoneid, ilg_stat_t ilgstat, mcast_record_t ilg_fmode, 666 slist_t *ilg_flist) 667 { 668 ilm_t *ilm; 669 int ret; 670 671 ASSERT(IAM_WRITER_ILL(ill)); 672 673 if (!IN6_IS_ADDR_MULTICAST(v6group) && 674 !IN6_IS_ADDR_UNSPECIFIED(v6group)) { 675 return (EINVAL); 676 } 677 678 /* 679 * An ilm is uniquely identified by the tuple of (group, ill, 680 * orig_ill). group is the multicast group address, ill is 681 * the interface on which it is currently joined, and orig_ill 682 * is the interface on which the application requested the 683 * join. orig_ill and ill are the same unless orig_ill has 684 * failed over. 685 * 686 * Both orig_ill and ill are required, which means we may have 687 * 2 ilms on an ill for the same group, but with different 688 * orig_ills. These must be kept separate, so that when failback 689 * occurs, the appropriate ilms are moved back to their orig_ill 690 * without disrupting memberships on the ill to which they had 691 * been moved. 692 * 693 * In order to track orig_ill, we store orig_ifindex in the 694 * ilm and ilg. 695 */ 696 mutex_enter(&ill->ill_lock); 697 ilm = ilm_lookup_ill_index_v6(ill, v6group, orig_ifindex, zoneid); 698 mutex_exit(&ill->ill_lock); 699 if (ilm != NULL) 700 return (ilm_update_add(ilm, ilgstat, ilg_flist, B_TRUE)); 701 702 /* 703 * We need to remember where the application really wanted 704 * to join. This will be used later if we want to failback 705 * to the original interface. 706 */ 707 ilm = ilm_add_v6(ill->ill_ipif, v6group, ilgstat, ilg_fmode, 708 ilg_flist, orig_ifindex, zoneid); 709 if (ilm == NULL) 710 return (ENOMEM); 711 712 if (IN6_IS_ADDR_UNSPECIFIED(v6group)) { 713 /* 714 * Check how many ipif's that have members in this group - 715 * if more then one we should not tell the driver to join 716 * this time 717 */ 718 if (ilm_numentries_v6(ill, v6group) > 1) 719 return (0); 720 if (ill->ill_group == NULL) 721 ret = ip_join_allmulti(ill->ill_ipif); 722 else 723 ret = ill_nominate_mcast_rcv(ill->ill_group); 724 725 if (ret != 0) 726 ilm_delete(ilm); 727 return (ret); 728 } 729 730 if (!IS_LOOPBACK(ill)) 731 mld_joingroup(ilm); 732 733 /* 734 * If we have more then one we should not tell the driver 735 * to join this time. 736 */ 737 if (ilm_numentries_v6(ill, v6group) > 1) 738 return (0); 739 740 ret = ip_ll_addmulti_v6(ill->ill_ipif, v6group); 741 if (ret != 0) 742 ilm_delete(ilm); 743 return (ret); 744 } 745 746 /* 747 * Send a multicast request to the driver for enabling multicast reception 748 * for v6groupp address. The caller has already checked whether it is 749 * appropriate to send one or not. 750 */ 751 int 752 ip_ll_send_enabmulti_req(ill_t *ill, const in6_addr_t *v6groupp) 753 { 754 mblk_t *mp; 755 uint32_t addrlen, addroff; 756 char group_buf[INET6_ADDRSTRLEN]; 757 758 ASSERT(IAM_WRITER_ILL(ill)); 759 760 /* 761 * Create a AR_ENTRY_SQUERY message with a dl_enabmulti_req tacked 762 * on. 763 */ 764 mp = ill_create_dl(ill, DL_ENABMULTI_REQ, sizeof (dl_enabmulti_req_t), 765 &addrlen, &addroff); 766 if (!mp) 767 return (ENOMEM); 768 if (IN6_IS_ADDR_V4MAPPED(v6groupp)) { 769 ipaddr_t v4group; 770 771 IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group); 772 /* 773 * NOTE!!! 774 * The "addroff" passed in here was calculated by 775 * ill_create_dl(), and will be used by ill_create_squery() 776 * to perform some twisted coding magic. It is the offset 777 * into the dl_xxx_req of the hw addr. Here, it will be 778 * added to b_wptr - b_rptr to create a magic number that 779 * is not an offset into this squery mblk. 780 * The actual hardware address will be accessed only in the 781 * dl_xxx_req, not in the squery. More importantly, 782 * that hardware address can *only* be accessed in this 783 * mblk chain by calling mi_offset_param_c(), which uses 784 * the magic number in the squery hw offset field to go 785 * to the *next* mblk (the dl_xxx_req), subtract the 786 * (b_wptr - b_rptr), and find the actual offset into 787 * the dl_xxx_req. 788 * Any method that depends on using the 789 * offset field in the dl_disabmulti_req or squery 790 * to find either hardware address will similarly fail. 791 * 792 * Look in ar_entry_squery() in arp.c to see how this offset 793 * is used. 794 */ 795 mp = ill_create_squery(ill, v4group, addrlen, addroff, mp); 796 if (!mp) 797 return (ENOMEM); 798 ip1dbg(("ip_ll_send_enabmulti_req: IPv4 putnext %s on %s\n", 799 inet_ntop(AF_INET6, v6groupp, group_buf, 800 sizeof (group_buf)), 801 ill->ill_name)); 802 putnext(ill->ill_rq, mp); 803 } else { 804 ip1dbg(("ip_ll_send_enabmulti_req: IPv6 ndp_mcastreq %s on" 805 " %s\n", 806 inet_ntop(AF_INET6, v6groupp, group_buf, 807 sizeof (group_buf)), 808 ill->ill_name)); 809 return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp)); 810 } 811 return (0); 812 } 813 814 /* 815 * Send a multicast request to the driver for enabling multicast 816 * membership for v6group if appropriate. 817 */ 818 static int 819 ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *v6groupp) 820 { 821 ill_t *ill = ipif->ipif_ill; 822 823 ASSERT(IAM_WRITER_IPIF(ipif)); 824 825 if (ill->ill_net_type != IRE_IF_RESOLVER || 826 ipif->ipif_flags & IPIF_POINTOPOINT) { 827 ip1dbg(("ip_ll_addmulti_v6: not resolver\n")); 828 return (0); /* Must be IRE_IF_NORESOLVER */ 829 } 830 831 if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) { 832 ip1dbg(("ip_ll_addmulti_v6: MULTI_BCAST\n")); 833 return (0); 834 } 835 if (!ill->ill_dl_up) { 836 /* 837 * Nobody there. All multicast addresses will be re-joined 838 * when we get the DL_BIND_ACK bringing the interface up. 839 */ 840 ip1dbg(("ip_ll_addmulti_v6: nobody up\n")); 841 return (0); 842 } 843 return (ip_ll_send_enabmulti_req(ill, v6groupp)); 844 } 845 846 /* 847 * INADDR_ANY means all multicast addresses. This is only used 848 * by the multicast router. 849 * INADDR_ANY is stored as the IPv6 unspecifed addr. 850 */ 851 int 852 ip_delmulti(ipaddr_t group, ipif_t *ipif, boolean_t no_ilg, boolean_t leaving) 853 { 854 ill_t *ill = ipif->ipif_ill; 855 ilm_t *ilm; 856 in6_addr_t v6group; 857 int ret; 858 859 ASSERT(IAM_WRITER_IPIF(ipif)); 860 861 if (!CLASSD(group) && group != INADDR_ANY) 862 return (EINVAL); 863 864 /* 865 * INADDR_ANY is represented as the IPv6 unspecifed addr. 866 */ 867 if (group == INADDR_ANY) 868 v6group = ipv6_all_zeros; 869 else 870 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 871 872 /* 873 * Look for a match on the ipif. 874 * (IP_DROP_MEMBERSHIP specifies an ipif using an IP address). 875 */ 876 mutex_enter(&ill->ill_lock); 877 ilm = ilm_lookup_ipif(ipif, group); 878 mutex_exit(&ill->ill_lock); 879 if (ilm == NULL) 880 return (ENOENT); 881 882 /* Update counters */ 883 if (no_ilg) 884 ilm->ilm_no_ilg_cnt--; 885 886 if (leaving) 887 ilm->ilm_refcnt--; 888 889 if (ilm->ilm_refcnt > 0) 890 return (ilm_update_del(ilm, B_FALSE)); 891 892 if (group == INADDR_ANY) { 893 ilm_delete(ilm); 894 /* 895 * Check how many ipif's that have members in this group - 896 * if there are still some left then don't tell the driver 897 * to drop it. 898 */ 899 if (ilm_numentries_v6(ill, &v6group) != 0) 900 return (0); 901 902 /* 903 * If we never joined, then don't leave. This can happen 904 * if we're in an IPMP group, since only one ill per IPMP 905 * group receives all multicast packets. 906 */ 907 if (!ill->ill_join_allmulti) { 908 ASSERT(ill->ill_group != NULL); 909 return (0); 910 } 911 912 ret = ip_leave_allmulti(ipif); 913 if (ill->ill_group != NULL) 914 (void) ill_nominate_mcast_rcv(ill->ill_group); 915 return (ret); 916 } 917 918 if (!IS_LOOPBACK(ill)) 919 igmp_leavegroup(ilm); 920 921 ilm_delete(ilm); 922 /* 923 * Check how many ipif's that have members in this group - 924 * if there are still some left then don't tell the driver 925 * to drop it. 926 */ 927 if (ilm_numentries_v6(ill, &v6group) != 0) 928 return (0); 929 return (ip_ll_delmulti_v6(ipif, &v6group)); 930 } 931 932 /* 933 * The unspecified address means all multicast addresses. 934 * This is only used by the multicast router. 935 */ 936 int 937 ip_delmulti_v6(const in6_addr_t *v6group, ill_t *ill, int orig_ifindex, 938 zoneid_t zoneid, boolean_t no_ilg, boolean_t leaving) 939 { 940 ipif_t *ipif; 941 ilm_t *ilm; 942 int ret; 943 944 ASSERT(IAM_WRITER_ILL(ill)); 945 946 if (!IN6_IS_ADDR_MULTICAST(v6group) && 947 !IN6_IS_ADDR_UNSPECIFIED(v6group)) 948 return (EINVAL); 949 950 /* 951 * Look for a match on the ill. 952 * (IPV6_LEAVE_GROUP specifies an ill using an ifindex). 953 * 954 * Similar to ip_addmulti_v6, we should always look using 955 * the orig_ifindex. 956 * 957 * 1) If orig_ifindex is different from ill's ifindex 958 * we should have an ilm with orig_ifindex created in 959 * ip_addmulti_v6. We should delete that here. 960 * 961 * 2) If orig_ifindex is same as ill's ifindex, we should 962 * not delete the ilm that is temporarily here because of 963 * a FAILOVER. Those ilms will have a ilm_orig_ifindex 964 * different from ill's ifindex. 965 * 966 * Thus, always lookup using orig_ifindex. 967 */ 968 mutex_enter(&ill->ill_lock); 969 ilm = ilm_lookup_ill_index_v6(ill, v6group, orig_ifindex, zoneid); 970 mutex_exit(&ill->ill_lock); 971 if (ilm == NULL) 972 return (ENOENT); 973 974 ASSERT(ilm->ilm_ill == ill); 975 976 ipif = ill->ill_ipif; 977 978 /* Update counters */ 979 if (no_ilg) 980 ilm->ilm_no_ilg_cnt--; 981 982 if (leaving) 983 ilm->ilm_refcnt--; 984 985 if (ilm->ilm_refcnt > 0) 986 return (ilm_update_del(ilm, B_TRUE)); 987 988 if (IN6_IS_ADDR_UNSPECIFIED(v6group)) { 989 ilm_delete(ilm); 990 /* 991 * Check how many ipif's that have members in this group - 992 * if there are still some left then don't tell the driver 993 * to drop it. 994 */ 995 if (ilm_numentries_v6(ill, v6group) != 0) 996 return (0); 997 998 /* 999 * If we never joined, then don't leave. This can happen 1000 * if we're in an IPMP group, since only one ill per IPMP 1001 * group receives all multicast packets. 1002 */ 1003 if (!ill->ill_join_allmulti) { 1004 ASSERT(ill->ill_group != NULL); 1005 return (0); 1006 } 1007 1008 ret = ip_leave_allmulti(ipif); 1009 if (ill->ill_group != NULL) 1010 (void) ill_nominate_mcast_rcv(ill->ill_group); 1011 return (ret); 1012 } 1013 1014 if (!IS_LOOPBACK(ill)) 1015 mld_leavegroup(ilm); 1016 1017 ilm_delete(ilm); 1018 /* 1019 * Check how many ipif's that have members in this group - 1020 * if there are still some left then don't tell the driver 1021 * to drop it. 1022 */ 1023 if (ilm_numentries_v6(ill, v6group) != 0) 1024 return (0); 1025 return (ip_ll_delmulti_v6(ipif, v6group)); 1026 } 1027 1028 /* 1029 * Send a multicast request to the driver for disabling multicast reception 1030 * for v6groupp address. The caller has already checked whether it is 1031 * appropriate to send one or not. 1032 */ 1033 int 1034 ip_ll_send_disabmulti_req(ill_t *ill, const in6_addr_t *v6groupp) 1035 { 1036 mblk_t *mp; 1037 char group_buf[INET6_ADDRSTRLEN]; 1038 uint32_t addrlen, addroff; 1039 1040 ASSERT(IAM_WRITER_ILL(ill)); 1041 /* 1042 * Create a AR_ENTRY_SQUERY message with a dl_disabmulti_req tacked 1043 * on. 1044 */ 1045 mp = ill_create_dl(ill, DL_DISABMULTI_REQ, 1046 sizeof (dl_disabmulti_req_t), &addrlen, &addroff); 1047 1048 if (!mp) 1049 return (ENOMEM); 1050 1051 if (IN6_IS_ADDR_V4MAPPED(v6groupp)) { 1052 ipaddr_t v4group; 1053 1054 IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group); 1055 /* 1056 * NOTE!!! 1057 * The "addroff" passed in here was calculated by 1058 * ill_create_dl(), and will be used by ill_create_squery() 1059 * to perform some twisted coding magic. It is the offset 1060 * into the dl_xxx_req of the hw addr. Here, it will be 1061 * added to b_wptr - b_rptr to create a magic number that 1062 * is not an offset into this mblk. 1063 * 1064 * Please see the comment in ip_ll_send)enabmulti_req() 1065 * for a complete explanation. 1066 * 1067 * Look in ar_entry_squery() in arp.c to see how this offset 1068 * is used. 1069 */ 1070 mp = ill_create_squery(ill, v4group, addrlen, addroff, mp); 1071 if (!mp) 1072 return (ENOMEM); 1073 ip1dbg(("ip_ll_send_disabmulti_req: IPv4 putnext %s on %s\n", 1074 inet_ntop(AF_INET6, v6groupp, group_buf, 1075 sizeof (group_buf)), 1076 ill->ill_name)); 1077 putnext(ill->ill_rq, mp); 1078 } else { 1079 ip1dbg(("ip_ll_send_disabmulti_req: IPv6 ndp_mcastreq %s on" 1080 " %s\n", 1081 inet_ntop(AF_INET6, v6groupp, group_buf, 1082 sizeof (group_buf)), 1083 ill->ill_name)); 1084 return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp)); 1085 } 1086 return (0); 1087 } 1088 1089 /* 1090 * Send a multicast request to the driver for disabling multicast 1091 * membership for v6group if appropriate. 1092 */ 1093 static int 1094 ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *v6group) 1095 { 1096 ill_t *ill = ipif->ipif_ill; 1097 1098 ASSERT(IAM_WRITER_IPIF(ipif)); 1099 1100 if (ill->ill_net_type != IRE_IF_RESOLVER || 1101 ipif->ipif_flags & IPIF_POINTOPOINT) { 1102 return (0); /* Must be IRE_IF_NORESOLVER */ 1103 } 1104 if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) { 1105 ip1dbg(("ip_ll_delmulti_v6: MULTI_BCAST\n")); 1106 return (0); 1107 } 1108 if (!ill->ill_dl_up) { 1109 /* 1110 * Nobody there. All multicast addresses will be re-joined 1111 * when we get the DL_BIND_ACK bringing the interface up. 1112 */ 1113 ip1dbg(("ip_ll_delmulti_v6: nobody up\n")); 1114 return (0); 1115 } 1116 return (ip_ll_send_disabmulti_req(ill, v6group)); 1117 } 1118 1119 /* 1120 * Make the driver pass up all multicast packets 1121 * 1122 * With ill groups, the caller makes sure that there is only 1123 * one ill joining the allmulti group. 1124 */ 1125 int 1126 ip_join_allmulti(ipif_t *ipif) 1127 { 1128 ill_t *ill = ipif->ipif_ill; 1129 mblk_t *mp; 1130 uint32_t addrlen, addroff; 1131 1132 ASSERT(IAM_WRITER_IPIF(ipif)); 1133 1134 if (!ill->ill_dl_up) { 1135 /* 1136 * Nobody there. All multicast addresses will be re-joined 1137 * when we get the DL_BIND_ACK bringing the interface up. 1138 */ 1139 return (0); 1140 } 1141 1142 ASSERT(!ill->ill_join_allmulti); 1143 1144 /* 1145 * Create a DL_PROMISCON_REQ message and send it directly to 1146 * the DLPI provider. We don't need to do this for certain 1147 * media types for which we never need to turn promiscuous 1148 * mode on. 1149 */ 1150 if ((ill->ill_net_type == IRE_IF_RESOLVER) && 1151 !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) { 1152 mp = ill_create_dl(ill, DL_PROMISCON_REQ, 1153 sizeof (dl_promiscon_req_t), &addrlen, &addroff); 1154 if (mp == NULL) 1155 return (ENOMEM); 1156 ill_dlpi_send(ill, mp); 1157 } 1158 1159 ill->ill_join_allmulti = B_TRUE; 1160 return (0); 1161 } 1162 1163 /* 1164 * Make the driver stop passing up all multicast packets 1165 * 1166 * With ill groups, we need to nominate some other ill as 1167 * this ipif->ipif_ill is leaving the group. 1168 */ 1169 int 1170 ip_leave_allmulti(ipif_t *ipif) 1171 { 1172 ill_t *ill = ipif->ipif_ill; 1173 mblk_t *mp; 1174 uint32_t addrlen, addroff; 1175 1176 ASSERT(IAM_WRITER_IPIF(ipif)); 1177 1178 if (!ill->ill_dl_up) { 1179 /* 1180 * Nobody there. All multicast addresses will be re-joined 1181 * when we get the DL_BIND_ACK bringing the interface up. 1182 */ 1183 return (0); 1184 } 1185 1186 ASSERT(ill->ill_join_allmulti); 1187 1188 /* 1189 * Create a DL_PROMISCOFF_REQ message and send it directly to 1190 * the DLPI provider. We don't need to do this for certain 1191 * media types for which we never need to turn promiscuous 1192 * mode on. 1193 */ 1194 if ((ill->ill_net_type == IRE_IF_RESOLVER) && 1195 !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) { 1196 mp = ill_create_dl(ill, DL_PROMISCOFF_REQ, 1197 sizeof (dl_promiscoff_req_t), &addrlen, &addroff); 1198 if (mp == NULL) 1199 return (ENOMEM); 1200 ill_dlpi_send(ill, mp); 1201 } 1202 1203 ill->ill_join_allmulti = B_FALSE; 1204 return (0); 1205 } 1206 1207 /* 1208 * Copy mp_orig and pass it in as a local message. 1209 */ 1210 void 1211 ip_multicast_loopback(queue_t *q, ill_t *ill, mblk_t *mp_orig, int fanout_flags, 1212 zoneid_t zoneid) 1213 { 1214 mblk_t *mp; 1215 mblk_t *ipsec_mp; 1216 ipha_t *iph; 1217 ip_stack_t *ipst = ill->ill_ipst; 1218 1219 if (DB_TYPE(mp_orig) == M_DATA && 1220 ((ipha_t *)mp_orig->b_rptr)->ipha_protocol == IPPROTO_UDP) { 1221 uint_t hdrsz; 1222 1223 hdrsz = IPH_HDR_LENGTH((ipha_t *)mp_orig->b_rptr) + 1224 sizeof (udpha_t); 1225 ASSERT(MBLKL(mp_orig) >= hdrsz); 1226 1227 if (((mp = allocb(hdrsz, BPRI_MED)) != NULL) && 1228 (mp_orig = dupmsg(mp_orig)) != NULL) { 1229 bcopy(mp_orig->b_rptr, mp->b_rptr, hdrsz); 1230 mp->b_wptr += hdrsz; 1231 mp->b_cont = mp_orig; 1232 mp_orig->b_rptr += hdrsz; 1233 if (is_system_labeled() && DB_CRED(mp_orig) != NULL) 1234 mblk_setcred(mp, DB_CRED(mp_orig)); 1235 if (MBLKL(mp_orig) == 0) { 1236 mp->b_cont = mp_orig->b_cont; 1237 mp_orig->b_cont = NULL; 1238 freeb(mp_orig); 1239 } 1240 } else if (mp != NULL) { 1241 freeb(mp); 1242 mp = NULL; 1243 } 1244 } else { 1245 mp = ip_copymsg(mp_orig); /* No refcnt on ipsec_out netstack */ 1246 } 1247 1248 if (mp == NULL) 1249 return; 1250 if (DB_TYPE(mp) == M_CTL) { 1251 ipsec_mp = mp; 1252 mp = mp->b_cont; 1253 } else { 1254 ipsec_mp = mp; 1255 } 1256 1257 iph = (ipha_t *)mp->b_rptr; 1258 1259 /* 1260 * DTrace this as ip:::send. A blocked packet will fire the send 1261 * probe, but not the receive probe. 1262 */ 1263 DTRACE_IP7(send, mblk_t *, ipsec_mp, conn_t *, NULL, void_ip_t *, iph, 1264 __dtrace_ipsr_ill_t *, ill, ipha_t *, iph, ip6_t *, NULL, int, 1); 1265 1266 DTRACE_PROBE4(ip4__loopback__out__start, 1267 ill_t *, NULL, ill_t *, ill, 1268 ipha_t *, iph, mblk_t *, ipsec_mp); 1269 1270 FW_HOOKS(ipst->ips_ip4_loopback_out_event, 1271 ipst->ips_ipv4firewall_loopback_out, 1272 NULL, ill, iph, ipsec_mp, mp, HPE_MULTICAST, ipst); 1273 1274 DTRACE_PROBE1(ip4__loopback__out__end, mblk_t *, ipsec_mp); 1275 1276 if (ipsec_mp != NULL) 1277 ip_wput_local(q, ill, iph, ipsec_mp, NULL, 1278 fanout_flags, zoneid); 1279 } 1280 1281 static area_t ip_aresq_template = { 1282 AR_ENTRY_SQUERY, /* cmd */ 1283 sizeof (area_t)+IP_ADDR_LEN, /* name offset */ 1284 sizeof (area_t), /* name len (filled by ill_arp_alloc) */ 1285 IP_ARP_PROTO_TYPE, /* protocol, from arps perspective */ 1286 sizeof (area_t), /* proto addr offset */ 1287 IP_ADDR_LEN, /* proto addr_length */ 1288 0, /* proto mask offset */ 1289 /* Rest is initialized when used */ 1290 0, /* flags */ 1291 0, /* hw addr offset */ 1292 0, /* hw addr length */ 1293 }; 1294 1295 static mblk_t * 1296 ill_create_squery(ill_t *ill, ipaddr_t ipaddr, uint32_t addrlen, 1297 uint32_t addroff, mblk_t *mp_tail) 1298 { 1299 mblk_t *mp; 1300 area_t *area; 1301 1302 mp = ill_arp_alloc(ill, (uchar_t *)&ip_aresq_template, 1303 (caddr_t)&ipaddr); 1304 if (!mp) { 1305 freemsg(mp_tail); 1306 return (NULL); 1307 } 1308 area = (area_t *)mp->b_rptr; 1309 area->area_hw_addr_length = addrlen; 1310 area->area_hw_addr_offset = mp->b_wptr - mp->b_rptr + addroff; 1311 /* 1312 * NOTE! 1313 * 1314 * The area_hw_addr_offset, as can be seen, does not hold the 1315 * actual hardware address offset. Rather, it holds the offset 1316 * to the hw addr in the dl_xxx_req in mp_tail, modified by 1317 * adding (mp->b_wptr - mp->b_rptr). This allows the function 1318 * mi_offset_paramc() to find the hardware address in the 1319 * *second* mblk (dl_xxx_req), not this mblk. 1320 * 1321 * Using mi_offset_paramc() is thus the *only* way to access 1322 * the dl_xxx_hw address. 1323 * 1324 * The squery hw address should *not* be accessed. 1325 * 1326 * See ar_entry_squery() in arp.c for an example of how all this works. 1327 */ 1328 1329 mp->b_cont = mp_tail; 1330 return (mp); 1331 } 1332 1333 /* 1334 * Create a DLPI message; for DL_{ENAB,DISAB}MULTI_REQ, room is left for 1335 * the hardware address. 1336 */ 1337 static mblk_t * 1338 ill_create_dl(ill_t *ill, uint32_t dl_primitive, uint32_t length, 1339 uint32_t *addr_lenp, uint32_t *addr_offp) 1340 { 1341 mblk_t *mp; 1342 uint32_t hw_addr_length; 1343 char *cp; 1344 uint32_t offset; 1345 uint32_t size; 1346 1347 *addr_lenp = *addr_offp = 0; 1348 1349 hw_addr_length = ill->ill_phys_addr_length; 1350 if (!hw_addr_length) { 1351 ip0dbg(("ip_create_dl: hw addr length = 0\n")); 1352 return (NULL); 1353 } 1354 1355 size = length; 1356 switch (dl_primitive) { 1357 case DL_ENABMULTI_REQ: 1358 case DL_DISABMULTI_REQ: 1359 size += hw_addr_length; 1360 break; 1361 case DL_PROMISCON_REQ: 1362 case DL_PROMISCOFF_REQ: 1363 break; 1364 default: 1365 return (NULL); 1366 } 1367 mp = allocb(size, BPRI_HI); 1368 if (!mp) 1369 return (NULL); 1370 mp->b_wptr += size; 1371 mp->b_datap->db_type = M_PROTO; 1372 1373 cp = (char *)mp->b_rptr; 1374 offset = length; 1375 1376 switch (dl_primitive) { 1377 case DL_ENABMULTI_REQ: { 1378 dl_enabmulti_req_t *dl = (dl_enabmulti_req_t *)cp; 1379 1380 dl->dl_primitive = dl_primitive; 1381 dl->dl_addr_offset = offset; 1382 *addr_lenp = dl->dl_addr_length = hw_addr_length; 1383 *addr_offp = offset; 1384 break; 1385 } 1386 case DL_DISABMULTI_REQ: { 1387 dl_disabmulti_req_t *dl = (dl_disabmulti_req_t *)cp; 1388 1389 dl->dl_primitive = dl_primitive; 1390 dl->dl_addr_offset = offset; 1391 *addr_lenp = dl->dl_addr_length = hw_addr_length; 1392 *addr_offp = offset; 1393 break; 1394 } 1395 case DL_PROMISCON_REQ: 1396 case DL_PROMISCOFF_REQ: { 1397 dl_promiscon_req_t *dl = (dl_promiscon_req_t *)cp; 1398 1399 dl->dl_primitive = dl_primitive; 1400 dl->dl_level = DL_PROMISC_MULTI; 1401 break; 1402 } 1403 } 1404 ip1dbg(("ill_create_dl: addr_len %d, addr_off %d\n", 1405 *addr_lenp, *addr_offp)); 1406 return (mp); 1407 } 1408 1409 /* 1410 * Writer processing for ip_wput_ctl(): send the DL_{ENAB,DISAB}MULTI_REQ 1411 * messages that had been delayed until we'd heard back from ARP. One catch: 1412 * we need to ensure that no one else becomes writer on the IPSQ before we've 1413 * received the replies, or they'll incorrectly process our replies as part of 1414 * their unrelated IPSQ operation. To do this, we start a new IPSQ operation, 1415 * which will complete when we process the reply in ip_rput_dlpi_writer(). 1416 */ 1417 /* ARGSUSED */ 1418 static void 1419 ip_wput_ctl_writer(ipsq_t *ipsq, queue_t *q, mblk_t *mp, void *arg) 1420 { 1421 ill_t *ill = q->q_ptr; 1422 t_uscalar_t prim = ((union DL_primitives *)mp->b_rptr)->dl_primitive; 1423 1424 ASSERT(IAM_WRITER_ILL(ill)); 1425 ASSERT(prim == DL_ENABMULTI_REQ || prim == DL_DISABMULTI_REQ); 1426 ip1dbg(("ip_wput_ctl_writer: %s\n", dl_primstr(prim))); 1427 1428 if (prim == DL_ENABMULTI_REQ) { 1429 /* Track the state if this is the first enabmulti */ 1430 if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN) 1431 ill->ill_dlpi_multicast_state = IDS_INPROGRESS; 1432 } 1433 1434 ipsq_current_start(ipsq, ill->ill_ipif, 0); 1435 ill_dlpi_send(ill, mp); 1436 } 1437 1438 void 1439 ip_wput_ctl(queue_t *q, mblk_t *mp) 1440 { 1441 ill_t *ill = q->q_ptr; 1442 mblk_t *dlmp = mp->b_cont; 1443 area_t *area = (area_t *)mp->b_rptr; 1444 t_uscalar_t prim; 1445 1446 /* Check that we have an AR_ENTRY_SQUERY with a tacked on mblk */ 1447 if (MBLKL(mp) < sizeof (area_t) || area->area_cmd != AR_ENTRY_SQUERY || 1448 dlmp == NULL) { 1449 putnext(q, mp); 1450 return; 1451 } 1452 1453 /* Check that the tacked on mblk is a DL_{DISAB,ENAB}MULTI_REQ */ 1454 prim = ((union DL_primitives *)dlmp->b_rptr)->dl_primitive; 1455 if (prim != DL_DISABMULTI_REQ && prim != DL_ENABMULTI_REQ) { 1456 putnext(q, mp); 1457 return; 1458 } 1459 freeb(mp); 1460 1461 /* See comments above ip_wput_ctl_writer() for details */ 1462 ill_refhold(ill); 1463 qwriter_ip(ill, ill->ill_wq, dlmp, ip_wput_ctl_writer, NEW_OP, B_FALSE); 1464 } 1465 1466 /* 1467 * Rejoin any groups which have been explicitly joined by the application (we 1468 * left all explicitly joined groups as part of ill_leave_multicast() prior to 1469 * bringing the interface down). Note that because groups can be joined and 1470 * left while an interface is down, this may not be the same set of groups 1471 * that we left in ill_leave_multicast(). 1472 */ 1473 void 1474 ill_recover_multicast(ill_t *ill) 1475 { 1476 ilm_t *ilm; 1477 char addrbuf[INET6_ADDRSTRLEN]; 1478 1479 ASSERT(IAM_WRITER_ILL(ill)); 1480 ILM_WALKER_HOLD(ill); 1481 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1482 /* 1483 * Check how many ipif's that have members in this group - 1484 * if more then one we make sure that this entry is first 1485 * in the list. 1486 */ 1487 if (ilm_numentries_v6(ill, &ilm->ilm_v6addr) > 1 && 1488 ilm_lookup_ill_v6(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm) 1489 continue; 1490 ip1dbg(("ill_recover_multicast: %s\n", 1491 inet_ntop(AF_INET6, &ilm->ilm_v6addr, addrbuf, 1492 sizeof (addrbuf)))); 1493 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) { 1494 if (ill->ill_group == NULL) { 1495 (void) ip_join_allmulti(ill->ill_ipif); 1496 } else { 1497 /* 1498 * We don't want to join on this ill, 1499 * if somebody else in the group has 1500 * already been nominated. 1501 */ 1502 (void) ill_nominate_mcast_rcv(ill->ill_group); 1503 } 1504 } else { 1505 (void) ip_ll_addmulti_v6(ill->ill_ipif, 1506 &ilm->ilm_v6addr); 1507 } 1508 } 1509 ILM_WALKER_RELE(ill); 1510 } 1511 1512 /* 1513 * The opposite of ill_recover_multicast() -- leaves all multicast groups 1514 * that were explicitly joined. Note that both these functions could be 1515 * disposed of if we enhanced ARP to allow us to handle DL_DISABMULTI_REQ 1516 * and DL_ENABMULTI_REQ messages when an interface is down. 1517 */ 1518 void 1519 ill_leave_multicast(ill_t *ill) 1520 { 1521 ilm_t *ilm; 1522 char addrbuf[INET6_ADDRSTRLEN]; 1523 1524 ASSERT(IAM_WRITER_ILL(ill)); 1525 ILM_WALKER_HOLD(ill); 1526 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1527 /* 1528 * Check how many ipif's that have members in this group - 1529 * if more then one we make sure that this entry is first 1530 * in the list. 1531 */ 1532 if (ilm_numentries_v6(ill, &ilm->ilm_v6addr) > 1 && 1533 ilm_lookup_ill_v6(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm) 1534 continue; 1535 ip1dbg(("ill_leave_multicast: %s\n", 1536 inet_ntop(AF_INET6, &ilm->ilm_v6addr, addrbuf, 1537 sizeof (addrbuf)))); 1538 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) { 1539 (void) ip_leave_allmulti(ill->ill_ipif); 1540 /* 1541 * If we were part of an IPMP group, then 1542 * ill_handoff_responsibility() has already 1543 * nominated a new member (so we don't). 1544 */ 1545 ASSERT(ill->ill_group == NULL); 1546 } else { 1547 (void) ip_ll_delmulti_v6(ill->ill_ipif, 1548 &ilm->ilm_v6addr); 1549 } 1550 } 1551 ILM_WALKER_RELE(ill); 1552 } 1553 1554 /* Find an ilm for matching the ill */ 1555 ilm_t * 1556 ilm_lookup_ill(ill_t *ill, ipaddr_t group, zoneid_t zoneid) 1557 { 1558 in6_addr_t v6group; 1559 1560 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock)); 1561 /* 1562 * INADDR_ANY is represented as the IPv6 unspecifed addr. 1563 */ 1564 if (group == INADDR_ANY) 1565 v6group = ipv6_all_zeros; 1566 else 1567 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 1568 1569 return (ilm_lookup_ill_v6(ill, &v6group, zoneid)); 1570 } 1571 1572 /* 1573 * Find an ilm for matching the ill. All the ilm lookup functions 1574 * ignore ILM_DELETED ilms. These have been logically deleted, and 1575 * igmp and linklayer disable multicast have been done. Only mi_free 1576 * yet to be done. Still there in the list due to ilm_walkers. The 1577 * last walker will release it. 1578 */ 1579 ilm_t * 1580 ilm_lookup_ill_v6(ill_t *ill, const in6_addr_t *v6group, zoneid_t zoneid) 1581 { 1582 ilm_t *ilm; 1583 1584 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock)); 1585 1586 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1587 if (ilm->ilm_flags & ILM_DELETED) 1588 continue; 1589 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) && 1590 (zoneid == ALL_ZONES || zoneid == ilm->ilm_zoneid)) 1591 return (ilm); 1592 } 1593 return (NULL); 1594 } 1595 1596 ilm_t * 1597 ilm_lookup_ill_index_v6(ill_t *ill, const in6_addr_t *v6group, int index, 1598 zoneid_t zoneid) 1599 { 1600 ilm_t *ilm; 1601 1602 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock)); 1603 1604 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1605 if (ilm->ilm_flags & ILM_DELETED) 1606 continue; 1607 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) && 1608 (zoneid == ALL_ZONES || zoneid == ilm->ilm_zoneid) && 1609 ilm->ilm_orig_ifindex == index) { 1610 return (ilm); 1611 } 1612 } 1613 return (NULL); 1614 } 1615 1616 1617 /* 1618 * Found an ilm for the ipif. Only needed for IPv4 which does 1619 * ipif specific socket options. 1620 */ 1621 ilm_t * 1622 ilm_lookup_ipif(ipif_t *ipif, ipaddr_t group) 1623 { 1624 ill_t *ill = ipif->ipif_ill; 1625 ilm_t *ilm; 1626 in6_addr_t v6group; 1627 1628 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock)); 1629 /* 1630 * INADDR_ANY is represented as the IPv6 unspecifed addr. 1631 */ 1632 if (group == INADDR_ANY) 1633 v6group = ipv6_all_zeros; 1634 else 1635 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 1636 1637 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1638 if (ilm->ilm_flags & ILM_DELETED) 1639 continue; 1640 if (ilm->ilm_ipif == ipif && 1641 IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, &v6group)) 1642 return (ilm); 1643 } 1644 return (NULL); 1645 } 1646 1647 /* 1648 * How many members on this ill? 1649 */ 1650 int 1651 ilm_numentries_v6(ill_t *ill, const in6_addr_t *v6group) 1652 { 1653 ilm_t *ilm; 1654 int i = 0; 1655 1656 mutex_enter(&ill->ill_lock); 1657 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1658 if (ilm->ilm_flags & ILM_DELETED) 1659 continue; 1660 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group)) { 1661 i++; 1662 } 1663 } 1664 mutex_exit(&ill->ill_lock); 1665 return (i); 1666 } 1667 1668 /* Caller guarantees that the group is not already on the list */ 1669 static ilm_t * 1670 ilm_add_v6(ipif_t *ipif, const in6_addr_t *v6group, ilg_stat_t ilgstat, 1671 mcast_record_t ilg_fmode, slist_t *ilg_flist, int orig_ifindex, 1672 zoneid_t zoneid) 1673 { 1674 ill_t *ill = ipif->ipif_ill; 1675 ilm_t *ilm; 1676 ilm_t *ilm_cur; 1677 ilm_t **ilm_ptpn; 1678 1679 ASSERT(IAM_WRITER_IPIF(ipif)); 1680 1681 ilm = GETSTRUCT(ilm_t, 1); 1682 if (ilm == NULL) 1683 return (NULL); 1684 if (ilgstat != ILGSTAT_NONE && !SLIST_IS_EMPTY(ilg_flist)) { 1685 ilm->ilm_filter = l_alloc(); 1686 if (ilm->ilm_filter == NULL) { 1687 mi_free(ilm); 1688 return (NULL); 1689 } 1690 } 1691 ilm->ilm_v6addr = *v6group; 1692 ilm->ilm_refcnt = 1; 1693 ilm->ilm_zoneid = zoneid; 1694 ilm->ilm_timer = INFINITY; 1695 ilm->ilm_rtx.rtx_timer = INFINITY; 1696 1697 /* 1698 * IPv4 Multicast groups are joined using ipif. 1699 * IPv6 Multicast groups are joined using ill. 1700 */ 1701 if (ill->ill_isv6) { 1702 ilm->ilm_ill = ill; 1703 ilm->ilm_ipif = NULL; 1704 DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ill, 1705 (char *), "ilm", (void *), ilm); 1706 ill->ill_ilm_cnt++; 1707 } else { 1708 ASSERT(ilm->ilm_zoneid == ipif->ipif_zoneid); 1709 ilm->ilm_ipif = ipif; 1710 ilm->ilm_ill = NULL; 1711 DTRACE_PROBE3(ipif__incr__cnt, (ipif_t *), ipif, 1712 (char *), "ilm", (void *), ilm); 1713 ipif->ipif_ilm_cnt++; 1714 } 1715 ASSERT(ill->ill_ipst); 1716 ilm->ilm_ipst = ill->ill_ipst; /* No netstack_hold */ 1717 1718 /* 1719 * After this if ilm moves to a new ill, we don't change 1720 * the ilm_orig_ifindex. Thus, if ill_index != ilm_orig_ifindex, 1721 * it has been moved. Indexes don't match even when the application 1722 * wants to join on a FAILED/INACTIVE interface because we choose 1723 * a new interface to join in. This is considered as an implicit 1724 * move. 1725 */ 1726 ilm->ilm_orig_ifindex = orig_ifindex; 1727 1728 ASSERT(!(ipif->ipif_state_flags & IPIF_CONDEMNED)); 1729 ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED)); 1730 1731 /* 1732 * Grab lock to give consistent view to readers 1733 */ 1734 mutex_enter(&ill->ill_lock); 1735 /* 1736 * All ilms in the same zone are contiguous in the ill_ilm list. 1737 * The loops in ip_proto_input() and ip_wput_local() use this to avoid 1738 * sending duplicates up when two applications in the same zone join the 1739 * same group on different logical interfaces. 1740 */ 1741 ilm_cur = ill->ill_ilm; 1742 ilm_ptpn = &ill->ill_ilm; 1743 while (ilm_cur != NULL && ilm_cur->ilm_zoneid != ilm->ilm_zoneid) { 1744 ilm_ptpn = &ilm_cur->ilm_next; 1745 ilm_cur = ilm_cur->ilm_next; 1746 } 1747 ilm->ilm_next = ilm_cur; 1748 *ilm_ptpn = ilm; 1749 1750 /* 1751 * If we have an associated ilg, use its filter state; if not, 1752 * default to (EXCLUDE, NULL) and set no_ilg_cnt to track this. 1753 */ 1754 if (ilgstat != ILGSTAT_NONE) { 1755 if (!SLIST_IS_EMPTY(ilg_flist)) 1756 l_copy(ilg_flist, ilm->ilm_filter); 1757 ilm->ilm_fmode = ilg_fmode; 1758 } else { 1759 ilm->ilm_no_ilg_cnt = 1; 1760 ilm->ilm_fmode = MODE_IS_EXCLUDE; 1761 } 1762 1763 mutex_exit(&ill->ill_lock); 1764 return (ilm); 1765 } 1766 1767 void 1768 ilm_inactive(ilm_t *ilm) 1769 { 1770 FREE_SLIST(ilm->ilm_filter); 1771 FREE_SLIST(ilm->ilm_pendsrcs); 1772 FREE_SLIST(ilm->ilm_rtx.rtx_allow); 1773 FREE_SLIST(ilm->ilm_rtx.rtx_block); 1774 ilm->ilm_ipst = NULL; 1775 mi_free((char *)ilm); 1776 } 1777 1778 void 1779 ilm_walker_cleanup(ill_t *ill) 1780 { 1781 ilm_t **ilmp; 1782 ilm_t *ilm; 1783 boolean_t need_wakeup = B_FALSE; 1784 1785 ASSERT(MUTEX_HELD(&ill->ill_lock)); 1786 ASSERT(ill->ill_ilm_walker_cnt == 0); 1787 1788 ilmp = &ill->ill_ilm; 1789 while (*ilmp != NULL) { 1790 if ((*ilmp)->ilm_flags & ILM_DELETED) { 1791 ilm = *ilmp; 1792 *ilmp = ilm->ilm_next; 1793 /* 1794 * check if there are any pending FREE or unplumb 1795 * operations that need to be restarted. 1796 */ 1797 if (ilm->ilm_ipif != NULL) { 1798 /* 1799 * IPv4 ilms hold a ref on the ipif. 1800 */ 1801 DTRACE_PROBE3(ipif__decr__cnt, 1802 (ipif_t *), ilm->ilm_ipif, 1803 (char *), "ilm", (void *), ilm); 1804 ilm->ilm_ipif->ipif_ilm_cnt--; 1805 if (IPIF_FREE_OK(ilm->ilm_ipif)) 1806 need_wakeup = B_TRUE; 1807 } else { 1808 /* 1809 * IPv6 ilms hold a ref on the ill. 1810 */ 1811 ASSERT(ilm->ilm_ill == ill); 1812 DTRACE_PROBE3(ill__decr__cnt, 1813 (ill_t *), ill, 1814 (char *), "ilm", (void *), ilm); 1815 ASSERT(ill->ill_ilm_cnt > 0); 1816 ill->ill_ilm_cnt--; 1817 if (ILL_FREE_OK(ill)) 1818 need_wakeup = B_TRUE; 1819 } 1820 ilm_inactive(ilm); /* frees ilm */ 1821 } else { 1822 ilmp = &(*ilmp)->ilm_next; 1823 } 1824 } 1825 ill->ill_ilm_cleanup_reqd = 0; 1826 if (need_wakeup) 1827 ipif_ill_refrele_tail(ill); 1828 else 1829 mutex_exit(&ill->ill_lock); 1830 } 1831 1832 /* 1833 * Unlink ilm and free it. 1834 */ 1835 static void 1836 ilm_delete(ilm_t *ilm) 1837 { 1838 ill_t *ill; 1839 ilm_t **ilmp; 1840 boolean_t need_wakeup; 1841 1842 1843 if (ilm->ilm_ipif != NULL) { 1844 ASSERT(IAM_WRITER_IPIF(ilm->ilm_ipif)); 1845 ASSERT(ilm->ilm_ill == NULL); 1846 ill = ilm->ilm_ipif->ipif_ill; 1847 ASSERT(!ill->ill_isv6); 1848 } else { 1849 ASSERT(IAM_WRITER_ILL(ilm->ilm_ill)); 1850 ASSERT(ilm->ilm_ipif == NULL); 1851 ill = ilm->ilm_ill; 1852 ASSERT(ill->ill_isv6); 1853 } 1854 /* 1855 * Delete under lock protection so that readers don't stumble 1856 * on bad ilm_next 1857 */ 1858 mutex_enter(&ill->ill_lock); 1859 if (ill->ill_ilm_walker_cnt != 0) { 1860 ilm->ilm_flags |= ILM_DELETED; 1861 ill->ill_ilm_cleanup_reqd = 1; 1862 mutex_exit(&ill->ill_lock); 1863 return; 1864 } 1865 1866 for (ilmp = &ill->ill_ilm; *ilmp != ilm; ilmp = &(*ilmp)->ilm_next) 1867 ; 1868 *ilmp = ilm->ilm_next; 1869 1870 /* 1871 * if we are the last reference to the ipif (for IPv4 ilms) 1872 * or the ill (for IPv6 ilms), we may need to wakeup any 1873 * pending FREE or unplumb operations. 1874 */ 1875 need_wakeup = B_FALSE; 1876 if (ilm->ilm_ipif != NULL) { 1877 DTRACE_PROBE3(ipif__decr__cnt, (ipif_t *), ilm->ilm_ipif, 1878 (char *), "ilm", (void *), ilm); 1879 ilm->ilm_ipif->ipif_ilm_cnt--; 1880 if (IPIF_FREE_OK(ilm->ilm_ipif)) 1881 need_wakeup = B_TRUE; 1882 } else { 1883 DTRACE_PROBE3(ill__decr__cnt, (ill_t *), ill, 1884 (char *), "ilm", (void *), ilm); 1885 ASSERT(ill->ill_ilm_cnt > 0); 1886 ill->ill_ilm_cnt--; 1887 if (ILL_FREE_OK(ill)) 1888 need_wakeup = B_TRUE; 1889 } 1890 1891 ilm_inactive(ilm); /* frees this ilm */ 1892 1893 if (need_wakeup) { 1894 /* drops ill lock */ 1895 ipif_ill_refrele_tail(ill); 1896 } else { 1897 mutex_exit(&ill->ill_lock); 1898 } 1899 } 1900 1901 1902 /* 1903 * Looks up the appropriate ipif given a v4 multicast group and interface 1904 * address. On success, returns 0, with *ipifpp pointing to the found 1905 * struct. On failure, returns an errno and *ipifpp is NULL. 1906 */ 1907 int 1908 ip_opt_check(conn_t *connp, ipaddr_t group, ipaddr_t src, ipaddr_t ifaddr, 1909 uint_t *ifindexp, mblk_t *first_mp, ipsq_func_t func, ipif_t **ipifpp) 1910 { 1911 ipif_t *ipif; 1912 int err = 0; 1913 zoneid_t zoneid; 1914 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1915 1916 if (!CLASSD(group) || CLASSD(src)) { 1917 return (EINVAL); 1918 } 1919 *ipifpp = NULL; 1920 1921 zoneid = IPCL_ZONEID(connp); 1922 1923 ASSERT(!(ifaddr != INADDR_ANY && ifindexp != NULL && *ifindexp != 0)); 1924 if (ifaddr != INADDR_ANY) { 1925 ipif = ipif_lookup_addr(ifaddr, NULL, zoneid, 1926 CONNP_TO_WQ(connp), first_mp, func, &err, ipst); 1927 if (err != 0 && err != EINPROGRESS) 1928 err = EADDRNOTAVAIL; 1929 } else if (ifindexp != NULL && *ifindexp != 0) { 1930 ipif = ipif_lookup_on_ifindex(*ifindexp, B_FALSE, zoneid, 1931 CONNP_TO_WQ(connp), first_mp, func, &err, ipst); 1932 } else { 1933 ipif = ipif_lookup_group(group, zoneid, ipst); 1934 if (ipif == NULL) 1935 return (EADDRNOTAVAIL); 1936 } 1937 if (ipif == NULL) 1938 return (err); 1939 1940 *ipifpp = ipif; 1941 return (0); 1942 } 1943 1944 /* 1945 * Looks up the appropriate ill (or ipif if v4mapped) given an interface 1946 * index and IPv6 multicast group. On success, returns 0, with *illpp (or 1947 * *ipifpp if v4mapped) pointing to the found struct. On failure, returns 1948 * an errno and *illpp and *ipifpp are undefined. 1949 */ 1950 int 1951 ip_opt_check_v6(conn_t *connp, const in6_addr_t *v6group, ipaddr_t *v4group, 1952 const in6_addr_t *v6src, ipaddr_t *v4src, boolean_t *isv6, int ifindex, 1953 mblk_t *first_mp, ipsq_func_t func, ill_t **illpp, ipif_t **ipifpp) 1954 { 1955 boolean_t src_unspec; 1956 ill_t *ill = NULL; 1957 ipif_t *ipif = NULL; 1958 int err; 1959 zoneid_t zoneid = connp->conn_zoneid; 1960 queue_t *wq = CONNP_TO_WQ(connp); 1961 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1962 1963 src_unspec = IN6_IS_ADDR_UNSPECIFIED(v6src); 1964 1965 if (IN6_IS_ADDR_V4MAPPED(v6group)) { 1966 if (!IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec) 1967 return (EINVAL); 1968 IN6_V4MAPPED_TO_IPADDR(v6group, *v4group); 1969 if (src_unspec) { 1970 *v4src = INADDR_ANY; 1971 } else { 1972 IN6_V4MAPPED_TO_IPADDR(v6src, *v4src); 1973 } 1974 if (!CLASSD(*v4group) || CLASSD(*v4src)) 1975 return (EINVAL); 1976 *ipifpp = NULL; 1977 *isv6 = B_FALSE; 1978 } else { 1979 if (IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec) 1980 return (EINVAL); 1981 if (!IN6_IS_ADDR_MULTICAST(v6group) || 1982 IN6_IS_ADDR_MULTICAST(v6src)) { 1983 return (EINVAL); 1984 } 1985 *illpp = NULL; 1986 *isv6 = B_TRUE; 1987 } 1988 1989 if (ifindex == 0) { 1990 if (*isv6) 1991 ill = ill_lookup_group_v6(v6group, zoneid, ipst); 1992 else 1993 ipif = ipif_lookup_group(*v4group, zoneid, ipst); 1994 if (ill == NULL && ipif == NULL) 1995 return (EADDRNOTAVAIL); 1996 } else { 1997 if (*isv6) { 1998 ill = ill_lookup_on_ifindex(ifindex, B_TRUE, 1999 wq, first_mp, func, &err, ipst); 2000 if (ill != NULL && 2001 !ipif_lookup_zoneid(ill, zoneid, 0, NULL)) { 2002 ill_refrele(ill); 2003 ill = NULL; 2004 err = EADDRNOTAVAIL; 2005 } 2006 } else { 2007 ipif = ipif_lookup_on_ifindex(ifindex, B_FALSE, 2008 zoneid, wq, first_mp, func, &err, ipst); 2009 } 2010 if (ill == NULL && ipif == NULL) 2011 return (err); 2012 } 2013 2014 *ipifpp = ipif; 2015 *illpp = ill; 2016 return (0); 2017 } 2018 2019 static int 2020 ip_get_srcfilter(conn_t *connp, struct group_filter *gf, 2021 struct ip_msfilter *imsf, ipaddr_t grp, ipif_t *ipif, boolean_t isv4mapped) 2022 { 2023 ilg_t *ilg; 2024 int i, numsrc, fmode, outsrcs; 2025 struct sockaddr_in *sin; 2026 struct sockaddr_in6 *sin6; 2027 struct in_addr *addrp; 2028 slist_t *fp; 2029 boolean_t is_v4only_api; 2030 2031 mutex_enter(&connp->conn_lock); 2032 2033 ilg = ilg_lookup_ipif(connp, grp, ipif); 2034 if (ilg == NULL) { 2035 mutex_exit(&connp->conn_lock); 2036 return (EADDRNOTAVAIL); 2037 } 2038 2039 if (gf == NULL) { 2040 ASSERT(imsf != NULL); 2041 ASSERT(!isv4mapped); 2042 is_v4only_api = B_TRUE; 2043 outsrcs = imsf->imsf_numsrc; 2044 } else { 2045 ASSERT(imsf == NULL); 2046 is_v4only_api = B_FALSE; 2047 outsrcs = gf->gf_numsrc; 2048 } 2049 2050 /* 2051 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 2052 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 2053 * So we need to translate here. 2054 */ 2055 fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ? 2056 MCAST_INCLUDE : MCAST_EXCLUDE; 2057 if ((fp = ilg->ilg_filter) == NULL) { 2058 numsrc = 0; 2059 } else { 2060 for (i = 0; i < outsrcs; i++) { 2061 if (i == fp->sl_numsrc) 2062 break; 2063 if (isv4mapped) { 2064 sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i]; 2065 sin6->sin6_family = AF_INET6; 2066 sin6->sin6_addr = fp->sl_addr[i]; 2067 } else { 2068 if (is_v4only_api) { 2069 addrp = &imsf->imsf_slist[i]; 2070 } else { 2071 sin = (struct sockaddr_in *) 2072 &gf->gf_slist[i]; 2073 sin->sin_family = AF_INET; 2074 addrp = &sin->sin_addr; 2075 } 2076 IN6_V4MAPPED_TO_INADDR(&fp->sl_addr[i], addrp); 2077 } 2078 } 2079 numsrc = fp->sl_numsrc; 2080 } 2081 2082 if (is_v4only_api) { 2083 imsf->imsf_numsrc = numsrc; 2084 imsf->imsf_fmode = fmode; 2085 } else { 2086 gf->gf_numsrc = numsrc; 2087 gf->gf_fmode = fmode; 2088 } 2089 2090 mutex_exit(&connp->conn_lock); 2091 2092 return (0); 2093 } 2094 2095 static int 2096 ip_get_srcfilter_v6(conn_t *connp, struct group_filter *gf, 2097 const struct in6_addr *grp, ill_t *ill) 2098 { 2099 ilg_t *ilg; 2100 int i; 2101 struct sockaddr_storage *sl; 2102 struct sockaddr_in6 *sin6; 2103 slist_t *fp; 2104 2105 mutex_enter(&connp->conn_lock); 2106 2107 ilg = ilg_lookup_ill_v6(connp, grp, ill); 2108 if (ilg == NULL) { 2109 mutex_exit(&connp->conn_lock); 2110 return (EADDRNOTAVAIL); 2111 } 2112 2113 /* 2114 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 2115 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 2116 * So we need to translate here. 2117 */ 2118 gf->gf_fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ? 2119 MCAST_INCLUDE : MCAST_EXCLUDE; 2120 if ((fp = ilg->ilg_filter) == NULL) { 2121 gf->gf_numsrc = 0; 2122 } else { 2123 for (i = 0, sl = gf->gf_slist; i < gf->gf_numsrc; i++, sl++) { 2124 if (i == fp->sl_numsrc) 2125 break; 2126 sin6 = (struct sockaddr_in6 *)sl; 2127 sin6->sin6_family = AF_INET6; 2128 sin6->sin6_addr = fp->sl_addr[i]; 2129 } 2130 gf->gf_numsrc = fp->sl_numsrc; 2131 } 2132 2133 mutex_exit(&connp->conn_lock); 2134 2135 return (0); 2136 } 2137 2138 static int 2139 ip_set_srcfilter(conn_t *connp, struct group_filter *gf, 2140 struct ip_msfilter *imsf, ipaddr_t grp, ipif_t *ipif, boolean_t isv4mapped) 2141 { 2142 ilg_t *ilg; 2143 int i, err, infmode, new_fmode; 2144 uint_t insrcs; 2145 struct sockaddr_in *sin; 2146 struct sockaddr_in6 *sin6; 2147 struct in_addr *addrp; 2148 slist_t *orig_filter = NULL; 2149 slist_t *new_filter = NULL; 2150 mcast_record_t orig_fmode; 2151 boolean_t leave_grp, is_v4only_api; 2152 ilg_stat_t ilgstat; 2153 2154 if (gf == NULL) { 2155 ASSERT(imsf != NULL); 2156 ASSERT(!isv4mapped); 2157 is_v4only_api = B_TRUE; 2158 insrcs = imsf->imsf_numsrc; 2159 infmode = imsf->imsf_fmode; 2160 } else { 2161 ASSERT(imsf == NULL); 2162 is_v4only_api = B_FALSE; 2163 insrcs = gf->gf_numsrc; 2164 infmode = gf->gf_fmode; 2165 } 2166 2167 /* Make sure we can handle the source list */ 2168 if (insrcs > MAX_FILTER_SIZE) 2169 return (ENOBUFS); 2170 2171 /* 2172 * setting the filter to (INCLUDE, NULL) is treated 2173 * as a request to leave the group. 2174 */ 2175 leave_grp = (infmode == MCAST_INCLUDE && insrcs == 0); 2176 2177 ASSERT(IAM_WRITER_IPIF(ipif)); 2178 2179 mutex_enter(&connp->conn_lock); 2180 2181 ilg = ilg_lookup_ipif(connp, grp, ipif); 2182 if (ilg == NULL) { 2183 /* 2184 * if the request was actually to leave, and we 2185 * didn't find an ilg, there's nothing to do. 2186 */ 2187 if (!leave_grp) 2188 ilg = conn_ilg_alloc(connp); 2189 if (leave_grp || ilg == NULL) { 2190 mutex_exit(&connp->conn_lock); 2191 return (leave_grp ? 0 : ENOMEM); 2192 } 2193 ilgstat = ILGSTAT_NEW; 2194 IN6_IPADDR_TO_V4MAPPED(grp, &ilg->ilg_v6group); 2195 ilg->ilg_ipif = ipif; 2196 ilg->ilg_ill = NULL; 2197 ilg->ilg_orig_ifindex = 0; 2198 } else if (leave_grp) { 2199 ilg_delete(connp, ilg, NULL); 2200 mutex_exit(&connp->conn_lock); 2201 (void) ip_delmulti(grp, ipif, B_FALSE, B_TRUE); 2202 return (0); 2203 } else { 2204 ilgstat = ILGSTAT_CHANGE; 2205 /* Preserve existing state in case ip_addmulti() fails */ 2206 orig_fmode = ilg->ilg_fmode; 2207 if (ilg->ilg_filter == NULL) { 2208 orig_filter = NULL; 2209 } else { 2210 orig_filter = l_alloc_copy(ilg->ilg_filter); 2211 if (orig_filter == NULL) { 2212 mutex_exit(&connp->conn_lock); 2213 return (ENOMEM); 2214 } 2215 } 2216 } 2217 2218 /* 2219 * Alloc buffer to copy new state into (see below) before 2220 * we make any changes, so we can bail if it fails. 2221 */ 2222 if ((new_filter = l_alloc()) == NULL) { 2223 mutex_exit(&connp->conn_lock); 2224 err = ENOMEM; 2225 goto free_and_exit; 2226 } 2227 2228 if (insrcs == 0) { 2229 CLEAR_SLIST(ilg->ilg_filter); 2230 } else { 2231 slist_t *fp; 2232 if (ilg->ilg_filter == NULL) { 2233 fp = l_alloc(); 2234 if (fp == NULL) { 2235 if (ilgstat == ILGSTAT_NEW) 2236 ilg_delete(connp, ilg, NULL); 2237 mutex_exit(&connp->conn_lock); 2238 err = ENOMEM; 2239 goto free_and_exit; 2240 } 2241 } else { 2242 fp = ilg->ilg_filter; 2243 } 2244 for (i = 0; i < insrcs; i++) { 2245 if (isv4mapped) { 2246 sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i]; 2247 fp->sl_addr[i] = sin6->sin6_addr; 2248 } else { 2249 if (is_v4only_api) { 2250 addrp = &imsf->imsf_slist[i]; 2251 } else { 2252 sin = (struct sockaddr_in *) 2253 &gf->gf_slist[i]; 2254 addrp = &sin->sin_addr; 2255 } 2256 IN6_INADDR_TO_V4MAPPED(addrp, &fp->sl_addr[i]); 2257 } 2258 } 2259 fp->sl_numsrc = insrcs; 2260 ilg->ilg_filter = fp; 2261 } 2262 /* 2263 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 2264 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 2265 * So we need to translate here. 2266 */ 2267 ilg->ilg_fmode = (infmode == MCAST_INCLUDE) ? 2268 MODE_IS_INCLUDE : MODE_IS_EXCLUDE; 2269 2270 /* 2271 * Save copy of ilg's filter state to pass to other functions, 2272 * so we can release conn_lock now. 2273 */ 2274 new_fmode = ilg->ilg_fmode; 2275 l_copy(ilg->ilg_filter, new_filter); 2276 2277 mutex_exit(&connp->conn_lock); 2278 2279 err = ip_addmulti(grp, ipif, ilgstat, new_fmode, new_filter); 2280 if (err != 0) { 2281 /* 2282 * Restore the original filter state, or delete the 2283 * newly-created ilg. We need to look up the ilg 2284 * again, though, since we've not been holding the 2285 * conn_lock. 2286 */ 2287 mutex_enter(&connp->conn_lock); 2288 ilg = ilg_lookup_ipif(connp, grp, ipif); 2289 ASSERT(ilg != NULL); 2290 if (ilgstat == ILGSTAT_NEW) { 2291 ilg_delete(connp, ilg, NULL); 2292 } else { 2293 ilg->ilg_fmode = orig_fmode; 2294 if (SLIST_IS_EMPTY(orig_filter)) { 2295 CLEAR_SLIST(ilg->ilg_filter); 2296 } else { 2297 /* 2298 * We didn't free the filter, even if we 2299 * were trying to make the source list empty; 2300 * so if orig_filter isn't empty, the ilg 2301 * must still have a filter alloc'd. 2302 */ 2303 l_copy(orig_filter, ilg->ilg_filter); 2304 } 2305 } 2306 mutex_exit(&connp->conn_lock); 2307 } 2308 2309 free_and_exit: 2310 l_free(orig_filter); 2311 l_free(new_filter); 2312 2313 return (err); 2314 } 2315 2316 static int 2317 ip_set_srcfilter_v6(conn_t *connp, struct group_filter *gf, 2318 const struct in6_addr *grp, ill_t *ill) 2319 { 2320 ilg_t *ilg; 2321 int i, orig_ifindex, orig_fmode, new_fmode, err; 2322 slist_t *orig_filter = NULL; 2323 slist_t *new_filter = NULL; 2324 struct sockaddr_storage *sl; 2325 struct sockaddr_in6 *sin6; 2326 boolean_t leave_grp; 2327 ilg_stat_t ilgstat; 2328 2329 /* Make sure we can handle the source list */ 2330 if (gf->gf_numsrc > MAX_FILTER_SIZE) 2331 return (ENOBUFS); 2332 2333 /* 2334 * setting the filter to (INCLUDE, NULL) is treated 2335 * as a request to leave the group. 2336 */ 2337 leave_grp = (gf->gf_fmode == MCAST_INCLUDE && gf->gf_numsrc == 0); 2338 2339 ASSERT(IAM_WRITER_ILL(ill)); 2340 2341 /* 2342 * Use the ifindex to do the lookup. We can't use the ill 2343 * directly because ilg_ill could point to a different ill 2344 * if things have moved. 2345 */ 2346 orig_ifindex = ill->ill_phyint->phyint_ifindex; 2347 2348 mutex_enter(&connp->conn_lock); 2349 ilg = ilg_lookup_ill_index_v6(connp, grp, orig_ifindex); 2350 if (ilg == NULL) { 2351 /* 2352 * if the request was actually to leave, and we 2353 * didn't find an ilg, there's nothing to do. 2354 */ 2355 if (!leave_grp) 2356 ilg = conn_ilg_alloc(connp); 2357 if (leave_grp || ilg == NULL) { 2358 mutex_exit(&connp->conn_lock); 2359 return (leave_grp ? 0 : ENOMEM); 2360 } 2361 ilgstat = ILGSTAT_NEW; 2362 ilg->ilg_v6group = *grp; 2363 ilg->ilg_ipif = NULL; 2364 /* 2365 * Choose our target ill to join on. This might be 2366 * different from the ill we've been given if it's 2367 * currently down and part of a group. 2368 * 2369 * new ill is not refheld; we are writer. 2370 */ 2371 ill = ip_choose_multi_ill(ill, grp); 2372 ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED)); 2373 ilg->ilg_ill = ill; 2374 /* 2375 * Remember the index that we joined on, so that we can 2376 * successfully delete them later on and also search for 2377 * duplicates if the application wants to join again. 2378 */ 2379 ilg->ilg_orig_ifindex = orig_ifindex; 2380 } else if (leave_grp) { 2381 /* 2382 * Use the ilg's current ill for the deletion, 2383 * we might have failed over. 2384 */ 2385 ill = ilg->ilg_ill; 2386 ilg_delete(connp, ilg, NULL); 2387 mutex_exit(&connp->conn_lock); 2388 (void) ip_delmulti_v6(grp, ill, orig_ifindex, 2389 connp->conn_zoneid, B_FALSE, B_TRUE); 2390 return (0); 2391 } else { 2392 ilgstat = ILGSTAT_CHANGE; 2393 /* 2394 * The current ill might be different from the one we were 2395 * asked to join on (if failover has occurred); we should 2396 * join on the ill stored in the ilg. The original ill 2397 * is noted in ilg_orig_ifindex, which matched our request. 2398 */ 2399 ill = ilg->ilg_ill; 2400 /* preserve existing state in case ip_addmulti() fails */ 2401 orig_fmode = ilg->ilg_fmode; 2402 if (ilg->ilg_filter == NULL) { 2403 orig_filter = NULL; 2404 } else { 2405 orig_filter = l_alloc_copy(ilg->ilg_filter); 2406 if (orig_filter == NULL) { 2407 mutex_exit(&connp->conn_lock); 2408 return (ENOMEM); 2409 } 2410 } 2411 } 2412 2413 /* 2414 * Alloc buffer to copy new state into (see below) before 2415 * we make any changes, so we can bail if it fails. 2416 */ 2417 if ((new_filter = l_alloc()) == NULL) { 2418 mutex_exit(&connp->conn_lock); 2419 err = ENOMEM; 2420 goto free_and_exit; 2421 } 2422 2423 if (gf->gf_numsrc == 0) { 2424 CLEAR_SLIST(ilg->ilg_filter); 2425 } else { 2426 slist_t *fp; 2427 if (ilg->ilg_filter == NULL) { 2428 fp = l_alloc(); 2429 if (fp == NULL) { 2430 if (ilgstat == ILGSTAT_NEW) 2431 ilg_delete(connp, ilg, NULL); 2432 mutex_exit(&connp->conn_lock); 2433 err = ENOMEM; 2434 goto free_and_exit; 2435 } 2436 } else { 2437 fp = ilg->ilg_filter; 2438 } 2439 for (i = 0, sl = gf->gf_slist; i < gf->gf_numsrc; i++, sl++) { 2440 sin6 = (struct sockaddr_in6 *)sl; 2441 fp->sl_addr[i] = sin6->sin6_addr; 2442 } 2443 fp->sl_numsrc = gf->gf_numsrc; 2444 ilg->ilg_filter = fp; 2445 } 2446 /* 2447 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 2448 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 2449 * So we need to translate here. 2450 */ 2451 ilg->ilg_fmode = (gf->gf_fmode == MCAST_INCLUDE) ? 2452 MODE_IS_INCLUDE : MODE_IS_EXCLUDE; 2453 2454 /* 2455 * Save copy of ilg's filter state to pass to other functions, 2456 * so we can release conn_lock now. 2457 */ 2458 new_fmode = ilg->ilg_fmode; 2459 l_copy(ilg->ilg_filter, new_filter); 2460 2461 mutex_exit(&connp->conn_lock); 2462 2463 err = ip_addmulti_v6(grp, ill, orig_ifindex, connp->conn_zoneid, 2464 ilgstat, new_fmode, new_filter); 2465 if (err != 0) { 2466 /* 2467 * Restore the original filter state, or delete the 2468 * newly-created ilg. We need to look up the ilg 2469 * again, though, since we've not been holding the 2470 * conn_lock. 2471 */ 2472 mutex_enter(&connp->conn_lock); 2473 ilg = ilg_lookup_ill_index_v6(connp, grp, orig_ifindex); 2474 ASSERT(ilg != NULL); 2475 if (ilgstat == ILGSTAT_NEW) { 2476 ilg_delete(connp, ilg, NULL); 2477 } else { 2478 ilg->ilg_fmode = orig_fmode; 2479 if (SLIST_IS_EMPTY(orig_filter)) { 2480 CLEAR_SLIST(ilg->ilg_filter); 2481 } else { 2482 /* 2483 * We didn't free the filter, even if we 2484 * were trying to make the source list empty; 2485 * so if orig_filter isn't empty, the ilg 2486 * must still have a filter alloc'd. 2487 */ 2488 l_copy(orig_filter, ilg->ilg_filter); 2489 } 2490 } 2491 mutex_exit(&connp->conn_lock); 2492 } 2493 2494 free_and_exit: 2495 l_free(orig_filter); 2496 l_free(new_filter); 2497 2498 return (err); 2499 } 2500 2501 /* 2502 * Process the SIOC[GS]MSFILTER and SIOC[GS]IPMSFILTER ioctls. 2503 */ 2504 /* ARGSUSED */ 2505 int 2506 ip_sioctl_msfilter(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, 2507 ip_ioctl_cmd_t *ipip, void *ifreq) 2508 { 2509 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 2510 /* existence verified in ip_wput_nondata() */ 2511 mblk_t *data_mp = mp->b_cont->b_cont; 2512 int datalen, err, cmd, minsize; 2513 uint_t expsize = 0; 2514 conn_t *connp; 2515 boolean_t isv6, is_v4only_api, getcmd; 2516 struct sockaddr_in *gsin; 2517 struct sockaddr_in6 *gsin6; 2518 ipaddr_t v4grp; 2519 in6_addr_t v6grp; 2520 struct group_filter *gf = NULL; 2521 struct ip_msfilter *imsf = NULL; 2522 mblk_t *ndp; 2523 2524 if (data_mp->b_cont != NULL) { 2525 if ((ndp = msgpullup(data_mp, -1)) == NULL) 2526 return (ENOMEM); 2527 freemsg(data_mp); 2528 data_mp = ndp; 2529 mp->b_cont->b_cont = data_mp; 2530 } 2531 2532 cmd = iocp->ioc_cmd; 2533 getcmd = (cmd == SIOCGIPMSFILTER || cmd == SIOCGMSFILTER); 2534 is_v4only_api = (cmd == SIOCGIPMSFILTER || cmd == SIOCSIPMSFILTER); 2535 minsize = (is_v4only_api) ? IP_MSFILTER_SIZE(0) : GROUP_FILTER_SIZE(0); 2536 datalen = MBLKL(data_mp); 2537 2538 if (datalen < minsize) 2539 return (EINVAL); 2540 2541 /* 2542 * now we know we have at least have the initial structure, 2543 * but need to check for the source list array. 2544 */ 2545 if (is_v4only_api) { 2546 imsf = (struct ip_msfilter *)data_mp->b_rptr; 2547 isv6 = B_FALSE; 2548 expsize = IP_MSFILTER_SIZE(imsf->imsf_numsrc); 2549 } else { 2550 gf = (struct group_filter *)data_mp->b_rptr; 2551 if (gf->gf_group.ss_family == AF_INET6) { 2552 gsin6 = (struct sockaddr_in6 *)&gf->gf_group; 2553 isv6 = !(IN6_IS_ADDR_V4MAPPED(&gsin6->sin6_addr)); 2554 } else { 2555 isv6 = B_FALSE; 2556 } 2557 expsize = GROUP_FILTER_SIZE(gf->gf_numsrc); 2558 } 2559 if (datalen < expsize) 2560 return (EINVAL); 2561 2562 connp = Q_TO_CONN(q); 2563 2564 /* operation not supported on the virtual network interface */ 2565 if (IS_VNI(ipif->ipif_ill)) 2566 return (EINVAL); 2567 2568 if (isv6) { 2569 ill_t *ill = ipif->ipif_ill; 2570 ill_refhold(ill); 2571 2572 gsin6 = (struct sockaddr_in6 *)&gf->gf_group; 2573 v6grp = gsin6->sin6_addr; 2574 if (getcmd) 2575 err = ip_get_srcfilter_v6(connp, gf, &v6grp, ill); 2576 else 2577 err = ip_set_srcfilter_v6(connp, gf, &v6grp, ill); 2578 2579 ill_refrele(ill); 2580 } else { 2581 boolean_t isv4mapped = B_FALSE; 2582 if (is_v4only_api) { 2583 v4grp = (ipaddr_t)imsf->imsf_multiaddr.s_addr; 2584 } else { 2585 if (gf->gf_group.ss_family == AF_INET) { 2586 gsin = (struct sockaddr_in *)&gf->gf_group; 2587 v4grp = (ipaddr_t)gsin->sin_addr.s_addr; 2588 } else { 2589 gsin6 = (struct sockaddr_in6 *)&gf->gf_group; 2590 IN6_V4MAPPED_TO_IPADDR(&gsin6->sin6_addr, 2591 v4grp); 2592 isv4mapped = B_TRUE; 2593 } 2594 } 2595 if (getcmd) 2596 err = ip_get_srcfilter(connp, gf, imsf, v4grp, ipif, 2597 isv4mapped); 2598 else 2599 err = ip_set_srcfilter(connp, gf, imsf, v4grp, ipif, 2600 isv4mapped); 2601 } 2602 2603 return (err); 2604 } 2605 2606 /* 2607 * Finds the ipif based on information in the ioctl headers. Needed to make 2608 * ip_process_ioctl() happy (it needs to know the ipif for IPI_WR-flagged 2609 * ioctls prior to calling the ioctl's handler function). 2610 */ 2611 int 2612 ip_extract_msfilter(queue_t *q, mblk_t *mp, const ip_ioctl_cmd_t *ipip, 2613 cmd_info_t *ci, ipsq_func_t func) 2614 { 2615 int cmd = ipip->ipi_cmd; 2616 int err = 0; 2617 conn_t *connp; 2618 ipif_t *ipif; 2619 /* caller has verified this mblk exists */ 2620 char *dbuf = (char *)mp->b_cont->b_cont->b_rptr; 2621 struct ip_msfilter *imsf; 2622 struct group_filter *gf; 2623 ipaddr_t v4addr, v4grp; 2624 in6_addr_t v6grp; 2625 uint32_t index; 2626 zoneid_t zoneid; 2627 ip_stack_t *ipst; 2628 2629 connp = Q_TO_CONN(q); 2630 zoneid = connp->conn_zoneid; 2631 ipst = connp->conn_netstack->netstack_ip; 2632 2633 /* don't allow multicast operations on a tcp conn */ 2634 if (IPCL_IS_TCP(connp)) 2635 return (ENOPROTOOPT); 2636 2637 if (cmd == SIOCSIPMSFILTER || cmd == SIOCGIPMSFILTER) { 2638 /* don't allow v4-specific ioctls on v6 socket */ 2639 if (connp->conn_af_isv6) 2640 return (EAFNOSUPPORT); 2641 2642 imsf = (struct ip_msfilter *)dbuf; 2643 v4addr = imsf->imsf_interface.s_addr; 2644 v4grp = imsf->imsf_multiaddr.s_addr; 2645 if (v4addr == INADDR_ANY) { 2646 ipif = ipif_lookup_group(v4grp, zoneid, ipst); 2647 if (ipif == NULL) 2648 err = EADDRNOTAVAIL; 2649 } else { 2650 ipif = ipif_lookup_addr(v4addr, NULL, zoneid, q, mp, 2651 func, &err, ipst); 2652 } 2653 } else { 2654 boolean_t isv6 = B_FALSE; 2655 gf = (struct group_filter *)dbuf; 2656 index = gf->gf_interface; 2657 if (gf->gf_group.ss_family == AF_INET6) { 2658 struct sockaddr_in6 *sin6; 2659 sin6 = (struct sockaddr_in6 *)&gf->gf_group; 2660 v6grp = sin6->sin6_addr; 2661 if (IN6_IS_ADDR_V4MAPPED(&v6grp)) 2662 IN6_V4MAPPED_TO_IPADDR(&v6grp, v4grp); 2663 else 2664 isv6 = B_TRUE; 2665 } else if (gf->gf_group.ss_family == AF_INET) { 2666 struct sockaddr_in *sin; 2667 sin = (struct sockaddr_in *)&gf->gf_group; 2668 v4grp = sin->sin_addr.s_addr; 2669 } else { 2670 return (EAFNOSUPPORT); 2671 } 2672 if (index == 0) { 2673 if (isv6) { 2674 ipif = ipif_lookup_group_v6(&v6grp, zoneid, 2675 ipst); 2676 } else { 2677 ipif = ipif_lookup_group(v4grp, zoneid, ipst); 2678 } 2679 if (ipif == NULL) 2680 err = EADDRNOTAVAIL; 2681 } else { 2682 ipif = ipif_lookup_on_ifindex(index, isv6, zoneid, 2683 q, mp, func, &err, ipst); 2684 } 2685 } 2686 2687 ci->ci_ipif = ipif; 2688 return (err); 2689 } 2690 2691 /* 2692 * The structures used for the SIOC*MSFILTER ioctls usually must be copied 2693 * in in two stages, as the first copyin tells us the size of the attached 2694 * source buffer. This function is called by ip_wput_nondata() after the 2695 * first copyin has completed; it figures out how big the second stage 2696 * needs to be, and kicks it off. 2697 * 2698 * In some cases (numsrc < 2), the second copyin is not needed as the 2699 * first one gets a complete structure containing 1 source addr. 2700 * 2701 * The function returns 0 if a second copyin has been started (i.e. there's 2702 * no more work to be done right now), or 1 if the second copyin is not 2703 * needed and ip_wput_nondata() can continue its processing. 2704 */ 2705 int 2706 ip_copyin_msfilter(queue_t *q, mblk_t *mp) 2707 { 2708 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 2709 int cmd = iocp->ioc_cmd; 2710 /* validity of this checked in ip_wput_nondata() */ 2711 mblk_t *mp1 = mp->b_cont->b_cont; 2712 int copysize = 0; 2713 int offset; 2714 2715 if (cmd == SIOCSMSFILTER || cmd == SIOCGMSFILTER) { 2716 struct group_filter *gf = (struct group_filter *)mp1->b_rptr; 2717 if (gf->gf_numsrc >= 2) { 2718 offset = sizeof (struct group_filter); 2719 copysize = GROUP_FILTER_SIZE(gf->gf_numsrc) - offset; 2720 } 2721 } else { 2722 struct ip_msfilter *imsf = (struct ip_msfilter *)mp1->b_rptr; 2723 if (imsf->imsf_numsrc >= 2) { 2724 offset = sizeof (struct ip_msfilter); 2725 copysize = IP_MSFILTER_SIZE(imsf->imsf_numsrc) - offset; 2726 } 2727 } 2728 if (copysize > 0) { 2729 mi_copyin_n(q, mp, offset, copysize); 2730 return (0); 2731 } 2732 return (1); 2733 } 2734 2735 /* 2736 * Handle the following optmgmt: 2737 * IP_ADD_MEMBERSHIP must not have joined already 2738 * MCAST_JOIN_GROUP must not have joined already 2739 * IP_BLOCK_SOURCE must have joined already 2740 * MCAST_BLOCK_SOURCE must have joined already 2741 * IP_JOIN_SOURCE_GROUP may have joined already 2742 * MCAST_JOIN_SOURCE_GROUP may have joined already 2743 * 2744 * fmode and src parameters may be used to determine which option is 2745 * being set, as follows (the IP_* and MCAST_* versions of each option 2746 * are functionally equivalent): 2747 * opt fmode src 2748 * IP_ADD_MEMBERSHIP MODE_IS_EXCLUDE INADDR_ANY 2749 * MCAST_JOIN_GROUP MODE_IS_EXCLUDE INADDR_ANY 2750 * IP_BLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 2751 * MCAST_BLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 2752 * IP_JOIN_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 2753 * MCAST_JOIN_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 2754 * 2755 * Changing the filter mode is not allowed; if a matching ilg already 2756 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 2757 * 2758 * Verifies that there is a source address of appropriate scope for 2759 * the group; if not, EADDRNOTAVAIL is returned. 2760 * 2761 * The interface to be used may be identified by an address or by an 2762 * index. A pointer to the index is passed; if it is NULL, use the 2763 * address, otherwise, use the index. 2764 */ 2765 int 2766 ip_opt_add_group(conn_t *connp, boolean_t checkonly, ipaddr_t group, 2767 ipaddr_t ifaddr, uint_t *ifindexp, mcast_record_t fmode, ipaddr_t src, 2768 mblk_t *first_mp) 2769 { 2770 ipif_t *ipif; 2771 ipsq_t *ipsq; 2772 int err = 0; 2773 ill_t *ill; 2774 2775 err = ip_opt_check(connp, group, src, ifaddr, ifindexp, first_mp, 2776 ip_restart_optmgmt, &ipif); 2777 if (err != 0) { 2778 if (err != EINPROGRESS) { 2779 ip1dbg(("ip_opt_add_group: no ipif for group 0x%x, " 2780 "ifaddr 0x%x, ifindex %d\n", ntohl(group), 2781 ntohl(ifaddr), (ifindexp == NULL) ? 0 : *ifindexp)); 2782 } 2783 return (err); 2784 } 2785 ASSERT(ipif != NULL); 2786 2787 ill = ipif->ipif_ill; 2788 /* Operation not supported on a virtual network interface */ 2789 if (IS_VNI(ill)) { 2790 ipif_refrele(ipif); 2791 return (EINVAL); 2792 } 2793 2794 if (checkonly) { 2795 /* 2796 * do not do operation, just pretend to - new T_CHECK 2797 * semantics. The error return case above if encountered 2798 * considered a good enough "check" here. 2799 */ 2800 ipif_refrele(ipif); 2801 return (0); 2802 } 2803 2804 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, ipsq, 2805 NEW_OP); 2806 2807 /* unspecified source addr => no source filtering */ 2808 err = ilg_add(connp, group, ipif, fmode, src); 2809 2810 IPSQ_EXIT(ipsq); 2811 2812 ipif_refrele(ipif); 2813 return (err); 2814 } 2815 2816 /* 2817 * Handle the following optmgmt: 2818 * IPV6_JOIN_GROUP must not have joined already 2819 * MCAST_JOIN_GROUP must not have joined already 2820 * MCAST_BLOCK_SOURCE must have joined already 2821 * MCAST_JOIN_SOURCE_GROUP may have joined already 2822 * 2823 * fmode and src parameters may be used to determine which option is 2824 * being set, as follows (IPV6_JOIN_GROUP and MCAST_JOIN_GROUP options 2825 * are functionally equivalent): 2826 * opt fmode v6src 2827 * IPV6_JOIN_GROUP MODE_IS_EXCLUDE unspecified 2828 * MCAST_JOIN_GROUP MODE_IS_EXCLUDE unspecified 2829 * MCAST_BLOCK_SOURCE MODE_IS_EXCLUDE v6 addr 2830 * MCAST_JOIN_SOURCE_GROUP MODE_IS_INCLUDE v6 addr 2831 * 2832 * Changing the filter mode is not allowed; if a matching ilg already 2833 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 2834 * 2835 * Verifies that there is a source address of appropriate scope for 2836 * the group; if not, EADDRNOTAVAIL is returned. 2837 * 2838 * Handles IPv4-mapped IPv6 multicast addresses by associating them 2839 * with the link-local ipif. Assumes that if v6group is v4-mapped, 2840 * v6src is also v4-mapped. 2841 */ 2842 int 2843 ip_opt_add_group_v6(conn_t *connp, boolean_t checkonly, 2844 const in6_addr_t *v6group, int ifindex, mcast_record_t fmode, 2845 const in6_addr_t *v6src, mblk_t *first_mp) 2846 { 2847 ill_t *ill; 2848 ipif_t *ipif; 2849 char buf[INET6_ADDRSTRLEN]; 2850 ipaddr_t v4group, v4src; 2851 boolean_t isv6; 2852 ipsq_t *ipsq; 2853 int err; 2854 2855 err = ip_opt_check_v6(connp, v6group, &v4group, v6src, &v4src, &isv6, 2856 ifindex, first_mp, ip_restart_optmgmt, &ill, &ipif); 2857 if (err != 0) { 2858 if (err != EINPROGRESS) { 2859 ip1dbg(("ip_opt_add_group_v6: no ill for group %s/" 2860 "index %d\n", inet_ntop(AF_INET6, v6group, buf, 2861 sizeof (buf)), ifindex)); 2862 } 2863 return (err); 2864 } 2865 ASSERT((!isv6 && ipif != NULL) || (isv6 && ill != NULL)); 2866 2867 /* operation is not supported on the virtual network interface */ 2868 if (isv6) { 2869 if (IS_VNI(ill)) { 2870 ill_refrele(ill); 2871 return (EINVAL); 2872 } 2873 } else { 2874 if (IS_VNI(ipif->ipif_ill)) { 2875 ipif_refrele(ipif); 2876 return (EINVAL); 2877 } 2878 } 2879 2880 if (checkonly) { 2881 /* 2882 * do not do operation, just pretend to - new T_CHECK 2883 * semantics. The error return case above if encountered 2884 * considered a good enough "check" here. 2885 */ 2886 if (isv6) 2887 ill_refrele(ill); 2888 else 2889 ipif_refrele(ipif); 2890 return (0); 2891 } 2892 2893 if (!isv6) { 2894 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, 2895 ipsq, NEW_OP); 2896 err = ilg_add(connp, v4group, ipif, fmode, v4src); 2897 IPSQ_EXIT(ipsq); 2898 ipif_refrele(ipif); 2899 } else { 2900 IPSQ_ENTER_ILL(ill, connp, first_mp, ip_restart_optmgmt, 2901 ipsq, NEW_OP); 2902 err = ilg_add_v6(connp, v6group, ill, fmode, v6src); 2903 IPSQ_EXIT(ipsq); 2904 ill_refrele(ill); 2905 } 2906 2907 return (err); 2908 } 2909 2910 static int 2911 ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group, ipif_t *ipif, 2912 mcast_record_t fmode, ipaddr_t src) 2913 { 2914 ilg_t *ilg; 2915 in6_addr_t v6src; 2916 boolean_t leaving = B_FALSE; 2917 2918 ASSERT(IAM_WRITER_IPIF(ipif)); 2919 2920 /* 2921 * The ilg is valid only while we hold the conn lock. Once we drop 2922 * the lock, another thread can locate another ilg on this connp, 2923 * but on a different ipif, and delete it, and cause the ilg array 2924 * to be reallocated and copied. Hence do the ilg_delete before 2925 * dropping the lock. 2926 */ 2927 mutex_enter(&connp->conn_lock); 2928 ilg = ilg_lookup_ipif(connp, group, ipif); 2929 if ((ilg == NULL) || (ilg->ilg_flags & ILG_DELETED)) { 2930 mutex_exit(&connp->conn_lock); 2931 return (EADDRNOTAVAIL); 2932 } 2933 2934 /* 2935 * Decide if we're actually deleting the ilg or just removing a 2936 * source filter address; if just removing an addr, make sure we 2937 * aren't trying to change the filter mode, and that the addr is 2938 * actually in our filter list already. If we're removing the 2939 * last src in an include list, just delete the ilg. 2940 */ 2941 if (src == INADDR_ANY) { 2942 v6src = ipv6_all_zeros; 2943 leaving = B_TRUE; 2944 } else { 2945 int err = 0; 2946 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 2947 if (fmode != ilg->ilg_fmode) 2948 err = EINVAL; 2949 else if (ilg->ilg_filter == NULL || 2950 !list_has_addr(ilg->ilg_filter, &v6src)) 2951 err = EADDRNOTAVAIL; 2952 if (err != 0) { 2953 mutex_exit(&connp->conn_lock); 2954 return (err); 2955 } 2956 if (fmode == MODE_IS_INCLUDE && 2957 ilg->ilg_filter->sl_numsrc == 1) { 2958 v6src = ipv6_all_zeros; 2959 leaving = B_TRUE; 2960 } 2961 } 2962 2963 ilg_delete(connp, ilg, &v6src); 2964 mutex_exit(&connp->conn_lock); 2965 2966 (void) ip_delmulti(group, ipif, B_FALSE, leaving); 2967 return (0); 2968 } 2969 2970 static int 2971 ip_opt_delete_group_excl_v6(conn_t *connp, const in6_addr_t *v6group, 2972 ill_t *ill, mcast_record_t fmode, const in6_addr_t *v6src) 2973 { 2974 ilg_t *ilg; 2975 ill_t *ilg_ill; 2976 uint_t ilg_orig_ifindex; 2977 boolean_t leaving = B_TRUE; 2978 2979 ASSERT(IAM_WRITER_ILL(ill)); 2980 2981 /* 2982 * Use the index that we originally used to join. We can't 2983 * use the ill directly because ilg_ill could point to 2984 * a new ill if things have moved. 2985 */ 2986 mutex_enter(&connp->conn_lock); 2987 ilg = ilg_lookup_ill_index_v6(connp, v6group, 2988 ill->ill_phyint->phyint_ifindex); 2989 if ((ilg == NULL) || (ilg->ilg_flags & ILG_DELETED)) { 2990 mutex_exit(&connp->conn_lock); 2991 return (EADDRNOTAVAIL); 2992 } 2993 2994 /* 2995 * Decide if we're actually deleting the ilg or just removing a 2996 * source filter address; if just removing an addr, make sure we 2997 * aren't trying to change the filter mode, and that the addr is 2998 * actually in our filter list already. If we're removing the 2999 * last src in an include list, just delete the ilg. 3000 */ 3001 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 3002 int err = 0; 3003 if (fmode != ilg->ilg_fmode) 3004 err = EINVAL; 3005 else if (ilg->ilg_filter == NULL || 3006 !list_has_addr(ilg->ilg_filter, v6src)) 3007 err = EADDRNOTAVAIL; 3008 if (err != 0) { 3009 mutex_exit(&connp->conn_lock); 3010 return (err); 3011 } 3012 if (fmode == MODE_IS_INCLUDE && 3013 ilg->ilg_filter->sl_numsrc == 1) 3014 v6src = NULL; 3015 else 3016 leaving = B_FALSE; 3017 } 3018 3019 ilg_ill = ilg->ilg_ill; 3020 ilg_orig_ifindex = ilg->ilg_orig_ifindex; 3021 ilg_delete(connp, ilg, v6src); 3022 mutex_exit(&connp->conn_lock); 3023 (void) ip_delmulti_v6(v6group, ilg_ill, ilg_orig_ifindex, 3024 connp->conn_zoneid, B_FALSE, leaving); 3025 3026 return (0); 3027 } 3028 3029 /* 3030 * Handle the following optmgmt: 3031 * IP_DROP_MEMBERSHIP will leave 3032 * MCAST_LEAVE_GROUP will leave 3033 * IP_UNBLOCK_SOURCE will not leave 3034 * MCAST_UNBLOCK_SOURCE will not leave 3035 * IP_LEAVE_SOURCE_GROUP may leave (if leaving last source) 3036 * MCAST_LEAVE_SOURCE_GROUP may leave (if leaving last source) 3037 * 3038 * fmode and src parameters may be used to determine which option is 3039 * being set, as follows (the IP_* and MCAST_* versions of each option 3040 * are functionally equivalent): 3041 * opt fmode src 3042 * IP_DROP_MEMBERSHIP MODE_IS_INCLUDE INADDR_ANY 3043 * MCAST_LEAVE_GROUP MODE_IS_INCLUDE INADDR_ANY 3044 * IP_UNBLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 3045 * MCAST_UNBLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 3046 * IP_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 3047 * MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 3048 * 3049 * Changing the filter mode is not allowed; if a matching ilg already 3050 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 3051 * 3052 * The interface to be used may be identified by an address or by an 3053 * index. A pointer to the index is passed; if it is NULL, use the 3054 * address, otherwise, use the index. 3055 */ 3056 int 3057 ip_opt_delete_group(conn_t *connp, boolean_t checkonly, ipaddr_t group, 3058 ipaddr_t ifaddr, uint_t *ifindexp, mcast_record_t fmode, ipaddr_t src, 3059 mblk_t *first_mp) 3060 { 3061 ipif_t *ipif; 3062 ipsq_t *ipsq; 3063 int err; 3064 ill_t *ill; 3065 3066 err = ip_opt_check(connp, group, src, ifaddr, ifindexp, first_mp, 3067 ip_restart_optmgmt, &ipif); 3068 if (err != 0) { 3069 if (err != EINPROGRESS) { 3070 ip1dbg(("ip_opt_delete_group: no ipif for group " 3071 "0x%x, ifaddr 0x%x\n", 3072 (int)ntohl(group), (int)ntohl(ifaddr))); 3073 } 3074 return (err); 3075 } 3076 ASSERT(ipif != NULL); 3077 3078 ill = ipif->ipif_ill; 3079 /* Operation not supported on a virtual network interface */ 3080 if (IS_VNI(ill)) { 3081 ipif_refrele(ipif); 3082 return (EINVAL); 3083 } 3084 3085 if (checkonly) { 3086 /* 3087 * do not do operation, just pretend to - new T_CHECK 3088 * semantics. The error return case above if encountered 3089 * considered a good enough "check" here. 3090 */ 3091 ipif_refrele(ipif); 3092 return (0); 3093 } 3094 3095 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, ipsq, 3096 NEW_OP); 3097 err = ip_opt_delete_group_excl(connp, group, ipif, fmode, src); 3098 IPSQ_EXIT(ipsq); 3099 3100 ipif_refrele(ipif); 3101 return (err); 3102 } 3103 3104 /* 3105 * Handle the following optmgmt: 3106 * IPV6_LEAVE_GROUP will leave 3107 * MCAST_LEAVE_GROUP will leave 3108 * MCAST_UNBLOCK_SOURCE will not leave 3109 * MCAST_LEAVE_SOURCE_GROUP may leave (if leaving last source) 3110 * 3111 * fmode and src parameters may be used to determine which option is 3112 * being set, as follows (IPV6_LEAVE_GROUP and MCAST_LEAVE_GROUP options 3113 * are functionally equivalent): 3114 * opt fmode v6src 3115 * IPV6_LEAVE_GROUP MODE_IS_INCLUDE unspecified 3116 * MCAST_LEAVE_GROUP MODE_IS_INCLUDE unspecified 3117 * MCAST_UNBLOCK_SOURCE MODE_IS_EXCLUDE v6 addr 3118 * MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE v6 addr 3119 * 3120 * Changing the filter mode is not allowed; if a matching ilg already 3121 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 3122 * 3123 * Handles IPv4-mapped IPv6 multicast addresses by associating them 3124 * with the link-local ipif. Assumes that if v6group is v4-mapped, 3125 * v6src is also v4-mapped. 3126 */ 3127 int 3128 ip_opt_delete_group_v6(conn_t *connp, boolean_t checkonly, 3129 const in6_addr_t *v6group, int ifindex, mcast_record_t fmode, 3130 const in6_addr_t *v6src, mblk_t *first_mp) 3131 { 3132 ill_t *ill; 3133 ipif_t *ipif; 3134 char buf[INET6_ADDRSTRLEN]; 3135 ipaddr_t v4group, v4src; 3136 boolean_t isv6; 3137 ipsq_t *ipsq; 3138 int err; 3139 3140 err = ip_opt_check_v6(connp, v6group, &v4group, v6src, &v4src, &isv6, 3141 ifindex, first_mp, ip_restart_optmgmt, &ill, &ipif); 3142 if (err != 0) { 3143 if (err != EINPROGRESS) { 3144 ip1dbg(("ip_opt_delete_group_v6: no ill for group %s/" 3145 "index %d\n", inet_ntop(AF_INET6, v6group, buf, 3146 sizeof (buf)), ifindex)); 3147 } 3148 return (err); 3149 } 3150 ASSERT((isv6 && ill != NULL) || (!isv6 && ipif != NULL)); 3151 3152 /* operation is not supported on the virtual network interface */ 3153 if (isv6) { 3154 if (IS_VNI(ill)) { 3155 ill_refrele(ill); 3156 return (EINVAL); 3157 } 3158 } else { 3159 if (IS_VNI(ipif->ipif_ill)) { 3160 ipif_refrele(ipif); 3161 return (EINVAL); 3162 } 3163 } 3164 3165 if (checkonly) { 3166 /* 3167 * do not do operation, just pretend to - new T_CHECK 3168 * semantics. The error return case above if encountered 3169 * considered a good enough "check" here. 3170 */ 3171 if (isv6) 3172 ill_refrele(ill); 3173 else 3174 ipif_refrele(ipif); 3175 return (0); 3176 } 3177 3178 if (!isv6) { 3179 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, 3180 ipsq, NEW_OP); 3181 err = ip_opt_delete_group_excl(connp, v4group, ipif, fmode, 3182 v4src); 3183 IPSQ_EXIT(ipsq); 3184 ipif_refrele(ipif); 3185 } else { 3186 IPSQ_ENTER_ILL(ill, connp, first_mp, ip_restart_optmgmt, 3187 ipsq, NEW_OP); 3188 err = ip_opt_delete_group_excl_v6(connp, v6group, ill, fmode, 3189 v6src); 3190 IPSQ_EXIT(ipsq); 3191 ill_refrele(ill); 3192 } 3193 3194 return (err); 3195 } 3196 3197 /* 3198 * Group mgmt for upper conn that passes things down 3199 * to the interface multicast list (and DLPI) 3200 * These routines can handle new style options that specify an interface name 3201 * as opposed to an interface address (needed for general handling of 3202 * unnumbered interfaces.) 3203 */ 3204 3205 /* 3206 * Add a group to an upper conn group data structure and pass things down 3207 * to the interface multicast list (and DLPI) 3208 */ 3209 static int 3210 ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif, mcast_record_t fmode, 3211 ipaddr_t src) 3212 { 3213 int error = 0; 3214 ill_t *ill; 3215 ilg_t *ilg; 3216 ilg_stat_t ilgstat; 3217 slist_t *new_filter = NULL; 3218 int new_fmode; 3219 3220 ASSERT(IAM_WRITER_IPIF(ipif)); 3221 3222 ill = ipif->ipif_ill; 3223 3224 if (!(ill->ill_flags & ILLF_MULTICAST)) 3225 return (EADDRNOTAVAIL); 3226 3227 /* 3228 * conn_ilg[] is protected by conn_lock. Need to hold the conn_lock 3229 * to walk the conn_ilg[] list in ilg_lookup_ipif(); also needed to 3230 * serialize 2 threads doing join (sock, group1, hme0:0) and 3231 * (sock, group2, hme1:0) where hme0 and hme1 map to different ipsqs, 3232 * but both operations happen on the same conn. 3233 */ 3234 mutex_enter(&connp->conn_lock); 3235 ilg = ilg_lookup_ipif(connp, group, ipif); 3236 3237 /* 3238 * Depending on the option we're handling, may or may not be okay 3239 * if group has already been added. Figure out our rules based 3240 * on fmode and src params. Also make sure there's enough room 3241 * in the filter if we're adding a source to an existing filter. 3242 */ 3243 if (src == INADDR_ANY) { 3244 /* we're joining for all sources, must not have joined */ 3245 if (ilg != NULL) 3246 error = EADDRINUSE; 3247 } else { 3248 if (fmode == MODE_IS_EXCLUDE) { 3249 /* (excl {addr}) => block source, must have joined */ 3250 if (ilg == NULL) 3251 error = EADDRNOTAVAIL; 3252 } 3253 /* (incl {addr}) => join source, may have joined */ 3254 3255 if (ilg != NULL && 3256 SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE) 3257 error = ENOBUFS; 3258 } 3259 if (error != 0) { 3260 mutex_exit(&connp->conn_lock); 3261 return (error); 3262 } 3263 3264 ASSERT(!(ipif->ipif_state_flags & IPIF_CONDEMNED)); 3265 3266 /* 3267 * Alloc buffer to copy new state into (see below) before 3268 * we make any changes, so we can bail if it fails. 3269 */ 3270 if ((new_filter = l_alloc()) == NULL) { 3271 mutex_exit(&connp->conn_lock); 3272 return (ENOMEM); 3273 } 3274 3275 if (ilg == NULL) { 3276 ilgstat = ILGSTAT_NEW; 3277 if ((ilg = conn_ilg_alloc(connp)) == NULL) { 3278 mutex_exit(&connp->conn_lock); 3279 l_free(new_filter); 3280 return (ENOMEM); 3281 } 3282 if (src != INADDR_ANY) { 3283 ilg->ilg_filter = l_alloc(); 3284 if (ilg->ilg_filter == NULL) { 3285 ilg_delete(connp, ilg, NULL); 3286 mutex_exit(&connp->conn_lock); 3287 l_free(new_filter); 3288 return (ENOMEM); 3289 } 3290 ilg->ilg_filter->sl_numsrc = 1; 3291 IN6_IPADDR_TO_V4MAPPED(src, 3292 &ilg->ilg_filter->sl_addr[0]); 3293 } 3294 if (group == INADDR_ANY) { 3295 ilg->ilg_v6group = ipv6_all_zeros; 3296 } else { 3297 IN6_IPADDR_TO_V4MAPPED(group, &ilg->ilg_v6group); 3298 } 3299 ilg->ilg_ipif = ipif; 3300 ilg->ilg_ill = NULL; 3301 ilg->ilg_orig_ifindex = 0; 3302 ilg->ilg_fmode = fmode; 3303 } else { 3304 int index; 3305 in6_addr_t v6src; 3306 ilgstat = ILGSTAT_CHANGE; 3307 if (ilg->ilg_fmode != fmode || src == INADDR_ANY) { 3308 mutex_exit(&connp->conn_lock); 3309 l_free(new_filter); 3310 return (EINVAL); 3311 } 3312 if (ilg->ilg_filter == NULL) { 3313 ilg->ilg_filter = l_alloc(); 3314 if (ilg->ilg_filter == NULL) { 3315 mutex_exit(&connp->conn_lock); 3316 l_free(new_filter); 3317 return (ENOMEM); 3318 } 3319 } 3320 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 3321 if (list_has_addr(ilg->ilg_filter, &v6src)) { 3322 mutex_exit(&connp->conn_lock); 3323 l_free(new_filter); 3324 return (EADDRNOTAVAIL); 3325 } 3326 index = ilg->ilg_filter->sl_numsrc++; 3327 ilg->ilg_filter->sl_addr[index] = v6src; 3328 } 3329 3330 /* 3331 * Save copy of ilg's filter state to pass to other functions, 3332 * so we can release conn_lock now. 3333 */ 3334 new_fmode = ilg->ilg_fmode; 3335 l_copy(ilg->ilg_filter, new_filter); 3336 3337 mutex_exit(&connp->conn_lock); 3338 3339 error = ip_addmulti(group, ipif, ilgstat, new_fmode, new_filter); 3340 if (error != 0) { 3341 /* 3342 * Need to undo what we did before calling ip_addmulti()! 3343 * Must look up the ilg again since we've not been holding 3344 * conn_lock. 3345 */ 3346 in6_addr_t v6src; 3347 if (ilgstat == ILGSTAT_NEW) 3348 v6src = ipv6_all_zeros; 3349 else 3350 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 3351 mutex_enter(&connp->conn_lock); 3352 ilg = ilg_lookup_ipif(connp, group, ipif); 3353 ASSERT(ilg != NULL); 3354 ilg_delete(connp, ilg, &v6src); 3355 mutex_exit(&connp->conn_lock); 3356 l_free(new_filter); 3357 return (error); 3358 } 3359 3360 l_free(new_filter); 3361 return (0); 3362 } 3363 3364 static int 3365 ilg_add_v6(conn_t *connp, const in6_addr_t *v6group, ill_t *ill, 3366 mcast_record_t fmode, const in6_addr_t *v6src) 3367 { 3368 int error = 0; 3369 int orig_ifindex; 3370 ilg_t *ilg; 3371 ilg_stat_t ilgstat; 3372 slist_t *new_filter = NULL; 3373 int new_fmode; 3374 3375 ASSERT(IAM_WRITER_ILL(ill)); 3376 3377 if (!(ill->ill_flags & ILLF_MULTICAST)) 3378 return (EADDRNOTAVAIL); 3379 3380 /* 3381 * conn_lock protects the ilg list. Serializes 2 threads doing 3382 * join (sock, group1, hme0) and (sock, group2, hme1) where hme0 3383 * and hme1 map to different ipsq's, but both operations happen 3384 * on the same conn. 3385 */ 3386 mutex_enter(&connp->conn_lock); 3387 3388 /* 3389 * Use the ifindex to do the lookup. We can't use the ill 3390 * directly because ilg_ill could point to a different ill if 3391 * things have moved. 3392 */ 3393 orig_ifindex = ill->ill_phyint->phyint_ifindex; 3394 ilg = ilg_lookup_ill_index_v6(connp, v6group, orig_ifindex); 3395 3396 /* 3397 * Depending on the option we're handling, may or may not be okay 3398 * if group has already been added. Figure out our rules based 3399 * on fmode and src params. Also make sure there's enough room 3400 * in the filter if we're adding a source to an existing filter. 3401 */ 3402 if (IN6_IS_ADDR_UNSPECIFIED(v6src)) { 3403 /* we're joining for all sources, must not have joined */ 3404 if (ilg != NULL) 3405 error = EADDRINUSE; 3406 } else { 3407 if (fmode == MODE_IS_EXCLUDE) { 3408 /* (excl {addr}) => block source, must have joined */ 3409 if (ilg == NULL) 3410 error = EADDRNOTAVAIL; 3411 } 3412 /* (incl {addr}) => join source, may have joined */ 3413 3414 if (ilg != NULL && 3415 SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE) 3416 error = ENOBUFS; 3417 } 3418 if (error != 0) { 3419 mutex_exit(&connp->conn_lock); 3420 return (error); 3421 } 3422 3423 /* 3424 * Alloc buffer to copy new state into (see below) before 3425 * we make any changes, so we can bail if it fails. 3426 */ 3427 if ((new_filter = l_alloc()) == NULL) { 3428 mutex_exit(&connp->conn_lock); 3429 return (ENOMEM); 3430 } 3431 3432 if (ilg == NULL) { 3433 if ((ilg = conn_ilg_alloc(connp)) == NULL) { 3434 mutex_exit(&connp->conn_lock); 3435 l_free(new_filter); 3436 return (ENOMEM); 3437 } 3438 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 3439 ilg->ilg_filter = l_alloc(); 3440 if (ilg->ilg_filter == NULL) { 3441 ilg_delete(connp, ilg, NULL); 3442 mutex_exit(&connp->conn_lock); 3443 l_free(new_filter); 3444 return (ENOMEM); 3445 } 3446 ilg->ilg_filter->sl_numsrc = 1; 3447 ilg->ilg_filter->sl_addr[0] = *v6src; 3448 } 3449 ilgstat = ILGSTAT_NEW; 3450 ilg->ilg_v6group = *v6group; 3451 ilg->ilg_fmode = fmode; 3452 ilg->ilg_ipif = NULL; 3453 /* 3454 * Choose our target ill to join on. This might be different 3455 * from the ill we've been given if it's currently down and 3456 * part of a group. 3457 * 3458 * new ill is not refheld; we are writer. 3459 */ 3460 ill = ip_choose_multi_ill(ill, v6group); 3461 ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED)); 3462 ilg->ilg_ill = ill; 3463 /* 3464 * Remember the orig_ifindex that we joined on, so that we 3465 * can successfully delete them later on and also search 3466 * for duplicates if the application wants to join again. 3467 */ 3468 ilg->ilg_orig_ifindex = orig_ifindex; 3469 } else { 3470 int index; 3471 if (ilg->ilg_fmode != fmode || IN6_IS_ADDR_UNSPECIFIED(v6src)) { 3472 mutex_exit(&connp->conn_lock); 3473 l_free(new_filter); 3474 return (EINVAL); 3475 } 3476 if (ilg->ilg_filter == NULL) { 3477 ilg->ilg_filter = l_alloc(); 3478 if (ilg->ilg_filter == NULL) { 3479 mutex_exit(&connp->conn_lock); 3480 l_free(new_filter); 3481 return (ENOMEM); 3482 } 3483 } 3484 if (list_has_addr(ilg->ilg_filter, v6src)) { 3485 mutex_exit(&connp->conn_lock); 3486 l_free(new_filter); 3487 return (EADDRNOTAVAIL); 3488 } 3489 ilgstat = ILGSTAT_CHANGE; 3490 index = ilg->ilg_filter->sl_numsrc++; 3491 ilg->ilg_filter->sl_addr[index] = *v6src; 3492 /* 3493 * The current ill might be different from the one we were 3494 * asked to join on (if failover has occurred); we should 3495 * join on the ill stored in the ilg. The original ill 3496 * is noted in ilg_orig_ifindex, which matched our request. 3497 */ 3498 ill = ilg->ilg_ill; 3499 } 3500 3501 /* 3502 * Save copy of ilg's filter state to pass to other functions, 3503 * so we can release conn_lock now. 3504 */ 3505 new_fmode = ilg->ilg_fmode; 3506 l_copy(ilg->ilg_filter, new_filter); 3507 3508 mutex_exit(&connp->conn_lock); 3509 3510 /* 3511 * Now update the ill. We wait to do this until after the ilg 3512 * has been updated because we need to update the src filter 3513 * info for the ill, which involves looking at the status of 3514 * all the ilgs associated with this group/interface pair. 3515 */ 3516 error = ip_addmulti_v6(v6group, ill, orig_ifindex, connp->conn_zoneid, 3517 ilgstat, new_fmode, new_filter); 3518 if (error != 0) { 3519 /* 3520 * But because we waited, we have to undo the ilg update 3521 * if ip_addmulti_v6() fails. We also must lookup ilg 3522 * again, since we've not been holding conn_lock. 3523 */ 3524 in6_addr_t delsrc = 3525 (ilgstat == ILGSTAT_NEW) ? ipv6_all_zeros : *v6src; 3526 mutex_enter(&connp->conn_lock); 3527 ilg = ilg_lookup_ill_index_v6(connp, v6group, orig_ifindex); 3528 ASSERT(ilg != NULL); 3529 ilg_delete(connp, ilg, &delsrc); 3530 mutex_exit(&connp->conn_lock); 3531 l_free(new_filter); 3532 return (error); 3533 } 3534 3535 l_free(new_filter); 3536 3537 return (0); 3538 } 3539 3540 /* 3541 * Find an IPv4 ilg matching group, ill and source 3542 */ 3543 ilg_t * 3544 ilg_lookup_ill_withsrc(conn_t *connp, ipaddr_t group, ipaddr_t src, ill_t *ill) 3545 { 3546 in6_addr_t v6group, v6src; 3547 int i; 3548 boolean_t isinlist; 3549 ilg_t *ilg; 3550 ipif_t *ipif; 3551 ill_t *ilg_ill; 3552 3553 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3554 3555 /* 3556 * INADDR_ANY is represented as the IPv6 unspecified addr. 3557 */ 3558 if (group == INADDR_ANY) 3559 v6group = ipv6_all_zeros; 3560 else 3561 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 3562 3563 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3564 ilg = &connp->conn_ilg[i]; 3565 if ((ipif = ilg->ilg_ipif) == NULL || 3566 (ilg->ilg_flags & ILG_DELETED) != 0) 3567 continue; 3568 ASSERT(ilg->ilg_ill == NULL); 3569 ilg_ill = ipif->ipif_ill; 3570 ASSERT(!ilg_ill->ill_isv6); 3571 if (ilg_ill == ill && 3572 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, &v6group)) { 3573 if (SLIST_IS_EMPTY(ilg->ilg_filter)) { 3574 /* no source filter, so this is a match */ 3575 return (ilg); 3576 } 3577 break; 3578 } 3579 } 3580 if (i == connp->conn_ilg_inuse) 3581 return (NULL); 3582 3583 /* 3584 * we have an ilg with matching ill and group; but 3585 * the ilg has a source list that we must check. 3586 */ 3587 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 3588 isinlist = B_FALSE; 3589 for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) { 3590 if (IN6_ARE_ADDR_EQUAL(&v6src, &ilg->ilg_filter->sl_addr[i])) { 3591 isinlist = B_TRUE; 3592 break; 3593 } 3594 } 3595 3596 if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) || 3597 (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE)) 3598 return (ilg); 3599 3600 return (NULL); 3601 } 3602 3603 /* 3604 * Find an IPv6 ilg matching group, ill, and source 3605 */ 3606 ilg_t * 3607 ilg_lookup_ill_withsrc_v6(conn_t *connp, const in6_addr_t *v6group, 3608 const in6_addr_t *v6src, ill_t *ill) 3609 { 3610 int i; 3611 boolean_t isinlist; 3612 ilg_t *ilg; 3613 ill_t *ilg_ill; 3614 3615 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3616 3617 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3618 ilg = &connp->conn_ilg[i]; 3619 if ((ilg_ill = ilg->ilg_ill) == NULL || 3620 (ilg->ilg_flags & ILG_DELETED) != 0) 3621 continue; 3622 ASSERT(ilg->ilg_ipif == NULL); 3623 ASSERT(ilg_ill->ill_isv6); 3624 if (ilg_ill == ill && 3625 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) { 3626 if (SLIST_IS_EMPTY(ilg->ilg_filter)) { 3627 /* no source filter, so this is a match */ 3628 return (ilg); 3629 } 3630 break; 3631 } 3632 } 3633 if (i == connp->conn_ilg_inuse) 3634 return (NULL); 3635 3636 /* 3637 * we have an ilg with matching ill and group; but 3638 * the ilg has a source list that we must check. 3639 */ 3640 isinlist = B_FALSE; 3641 for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) { 3642 if (IN6_ARE_ADDR_EQUAL(v6src, &ilg->ilg_filter->sl_addr[i])) { 3643 isinlist = B_TRUE; 3644 break; 3645 } 3646 } 3647 3648 if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) || 3649 (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE)) 3650 return (ilg); 3651 3652 return (NULL); 3653 } 3654 3655 /* 3656 * Get the ilg whose ilg_orig_ifindex is associated with ifindex. 3657 * This is useful when the interface fails and we have moved 3658 * to a new ill, but still would like to locate using the index 3659 * that we originally used to join. Used only for IPv6 currently. 3660 */ 3661 static ilg_t * 3662 ilg_lookup_ill_index_v6(conn_t *connp, const in6_addr_t *v6group, int ifindex) 3663 { 3664 ilg_t *ilg; 3665 int i; 3666 3667 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3668 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3669 ilg = &connp->conn_ilg[i]; 3670 if (ilg->ilg_ill == NULL || 3671 (ilg->ilg_flags & ILG_DELETED) != 0) 3672 continue; 3673 /* ilg_ipif is NULL for V6 */ 3674 ASSERT(ilg->ilg_ipif == NULL); 3675 ASSERT(ilg->ilg_orig_ifindex != 0); 3676 if (IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group) && 3677 ilg->ilg_orig_ifindex == ifindex) { 3678 return (ilg); 3679 } 3680 } 3681 return (NULL); 3682 } 3683 3684 /* 3685 * Find an IPv6 ilg matching group and ill 3686 */ 3687 ilg_t * 3688 ilg_lookup_ill_v6(conn_t *connp, const in6_addr_t *v6group, ill_t *ill) 3689 { 3690 ilg_t *ilg; 3691 int i; 3692 ill_t *mem_ill; 3693 3694 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3695 3696 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3697 ilg = &connp->conn_ilg[i]; 3698 if ((mem_ill = ilg->ilg_ill) == NULL || 3699 (ilg->ilg_flags & ILG_DELETED) != 0) 3700 continue; 3701 ASSERT(ilg->ilg_ipif == NULL); 3702 ASSERT(mem_ill->ill_isv6); 3703 if (mem_ill == ill && 3704 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) 3705 return (ilg); 3706 } 3707 return (NULL); 3708 } 3709 3710 /* 3711 * Find an IPv4 ilg matching group and ipif 3712 */ 3713 static ilg_t * 3714 ilg_lookup_ipif(conn_t *connp, ipaddr_t group, ipif_t *ipif) 3715 { 3716 in6_addr_t v6group; 3717 int i; 3718 ilg_t *ilg; 3719 3720 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3721 ASSERT(!ipif->ipif_ill->ill_isv6); 3722 3723 if (group == INADDR_ANY) 3724 v6group = ipv6_all_zeros; 3725 else 3726 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 3727 3728 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3729 ilg = &connp->conn_ilg[i]; 3730 if ((ilg->ilg_flags & ILG_DELETED) == 0 && 3731 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, &v6group) && 3732 ilg->ilg_ipif == ipif) 3733 return (ilg); 3734 } 3735 return (NULL); 3736 } 3737 3738 /* 3739 * If a source address is passed in (src != NULL and src is not 3740 * unspecified), remove the specified src addr from the given ilg's 3741 * filter list, else delete the ilg. 3742 */ 3743 static void 3744 ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src) 3745 { 3746 int i; 3747 3748 ASSERT((ilg->ilg_ipif != NULL) ^ (ilg->ilg_ill != NULL)); 3749 ASSERT(ilg->ilg_ipif == NULL || IAM_WRITER_IPIF(ilg->ilg_ipif)); 3750 ASSERT(ilg->ilg_ill == NULL || IAM_WRITER_ILL(ilg->ilg_ill)); 3751 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3752 ASSERT(!(ilg->ilg_flags & ILG_DELETED)); 3753 3754 if (src == NULL || IN6_IS_ADDR_UNSPECIFIED(src)) { 3755 if (connp->conn_ilg_walker_cnt != 0) { 3756 ilg->ilg_flags |= ILG_DELETED; 3757 return; 3758 } 3759 3760 FREE_SLIST(ilg->ilg_filter); 3761 3762 i = ilg - &connp->conn_ilg[0]; 3763 ASSERT(i >= 0 && i < connp->conn_ilg_inuse); 3764 3765 /* Move other entries up one step */ 3766 connp->conn_ilg_inuse--; 3767 for (; i < connp->conn_ilg_inuse; i++) 3768 connp->conn_ilg[i] = connp->conn_ilg[i+1]; 3769 3770 if (connp->conn_ilg_inuse == 0) { 3771 mi_free((char *)connp->conn_ilg); 3772 connp->conn_ilg = NULL; 3773 cv_broadcast(&connp->conn_refcv); 3774 } 3775 } else { 3776 l_remove(ilg->ilg_filter, src); 3777 } 3778 } 3779 3780 /* 3781 * Called from conn close. No new ilg can be added or removed. 3782 * because CONN_CLOSING has been set by ip_close. ilg_add / ilg_delete 3783 * will return error if conn has started closing. 3784 */ 3785 void 3786 ilg_delete_all(conn_t *connp) 3787 { 3788 int i; 3789 ipif_t *ipif = NULL; 3790 ill_t *ill = NULL; 3791 ilg_t *ilg; 3792 in6_addr_t v6group; 3793 boolean_t success; 3794 ipsq_t *ipsq; 3795 int orig_ifindex; 3796 3797 mutex_enter(&connp->conn_lock); 3798 retry: 3799 ILG_WALKER_HOLD(connp); 3800 for (i = connp->conn_ilg_inuse - 1; i >= 0; ) { 3801 ilg = &connp->conn_ilg[i]; 3802 /* 3803 * Since this walk is not atomic (we drop the 3804 * conn_lock and wait in ipsq_enter) we need 3805 * to check for the ILG_DELETED flag. 3806 */ 3807 if (ilg->ilg_flags & ILG_DELETED) { 3808 /* Go to the next ilg */ 3809 i--; 3810 continue; 3811 } 3812 v6group = ilg->ilg_v6group; 3813 3814 if (IN6_IS_ADDR_V4MAPPED(&v6group)) { 3815 ipif = ilg->ilg_ipif; 3816 ill = ipif->ipif_ill; 3817 } else { 3818 ipif = NULL; 3819 ill = ilg->ilg_ill; 3820 } 3821 /* 3822 * We may not be able to refhold the ill if the ill/ipif 3823 * is changing. But we need to make sure that the ill will 3824 * not vanish. So we just bump up the ill_waiter count. 3825 * If we are unable to do even that, then the ill is closing, 3826 * in which case the unplumb thread will handle the cleanup, 3827 * and we move on to the next ilg. 3828 */ 3829 if (!ill_waiter_inc(ill)) { 3830 /* Go to the next ilg */ 3831 i--; 3832 continue; 3833 } 3834 mutex_exit(&connp->conn_lock); 3835 /* 3836 * To prevent deadlock between ill close which waits inside 3837 * the perimeter, and conn close, ipsq_enter returns error, 3838 * the moment ILL_CONDEMNED is set, in which case ill close 3839 * takes responsibility to cleanup the ilgs. Note that we 3840 * have not yet set condemned flag, otherwise the conn can't 3841 * be refheld for cleanup by those routines and it would be 3842 * a mutual deadlock. 3843 */ 3844 success = ipsq_enter(ill, B_FALSE); 3845 ipsq = ill->ill_phyint->phyint_ipsq; 3846 ill_waiter_dcr(ill); 3847 mutex_enter(&connp->conn_lock); 3848 if (!success) { 3849 /* Go to the next ilg */ 3850 i--; 3851 continue; 3852 } 3853 3854 /* 3855 * Make sure that nothing has changed under. For eg. 3856 * a failover/failback can change ilg_ill while we were 3857 * waiting to become exclusive above 3858 */ 3859 if (IN6_IS_ADDR_V4MAPPED(&v6group)) { 3860 ipif = ilg->ilg_ipif; 3861 ill = ipif->ipif_ill; 3862 } else { 3863 ipif = NULL; 3864 ill = ilg->ilg_ill; 3865 } 3866 if (!IAM_WRITER_ILL(ill) || (ilg->ilg_flags & ILG_DELETED)) { 3867 /* 3868 * The ilg has changed under us probably due 3869 * to a failover or unplumb. Retry on the same ilg. 3870 */ 3871 mutex_exit(&connp->conn_lock); 3872 ipsq_exit(ipsq); 3873 mutex_enter(&connp->conn_lock); 3874 continue; 3875 } 3876 v6group = ilg->ilg_v6group; 3877 orig_ifindex = ilg->ilg_orig_ifindex; 3878 ilg_delete(connp, ilg, NULL); 3879 mutex_exit(&connp->conn_lock); 3880 3881 if (ipif != NULL) 3882 (void) ip_delmulti(V4_PART_OF_V6(v6group), ipif, 3883 B_FALSE, B_TRUE); 3884 3885 else 3886 (void) ip_delmulti_v6(&v6group, ill, orig_ifindex, 3887 connp->conn_zoneid, B_FALSE, B_TRUE); 3888 3889 ipsq_exit(ipsq); 3890 mutex_enter(&connp->conn_lock); 3891 /* Go to the next ilg */ 3892 i--; 3893 } 3894 ILG_WALKER_RELE(connp); 3895 3896 /* If any ill was skipped above wait and retry */ 3897 if (connp->conn_ilg_inuse != 0) { 3898 cv_wait(&connp->conn_refcv, &connp->conn_lock); 3899 goto retry; 3900 } 3901 mutex_exit(&connp->conn_lock); 3902 } 3903 3904 /* 3905 * Called from ill close by ipcl_walk for clearing conn_ilg and 3906 * conn_multicast_ipif for a given ipif. conn is held by caller. 3907 * Note that ipcl_walk only walks conns that are not yet condemned. 3908 * condemned conns can't be refheld. For this reason, conn must become clean 3909 * first, i.e. it must not refer to any ill/ire/ipif and then only set 3910 * condemned flag. 3911 */ 3912 static void 3913 conn_delete_ipif(conn_t *connp, caddr_t arg) 3914 { 3915 ipif_t *ipif = (ipif_t *)arg; 3916 int i; 3917 char group_buf1[INET6_ADDRSTRLEN]; 3918 char group_buf2[INET6_ADDRSTRLEN]; 3919 ipaddr_t group; 3920 ilg_t *ilg; 3921 3922 /* 3923 * Even though conn_ilg_inuse can change while we are in this loop, 3924 * i.e.ilgs can be created or deleted on this connp, no new ilgs can 3925 * be created or deleted for this connp, on this ill, since this ill 3926 * is the perimeter. So we won't miss any ilg in this cleanup. 3927 */ 3928 mutex_enter(&connp->conn_lock); 3929 3930 /* 3931 * Increment the walker count, so that ilg repacking does not 3932 * occur while we are in the loop. 3933 */ 3934 ILG_WALKER_HOLD(connp); 3935 for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) { 3936 ilg = &connp->conn_ilg[i]; 3937 if (ilg->ilg_ipif != ipif || (ilg->ilg_flags & ILG_DELETED)) 3938 continue; 3939 /* 3940 * ip_close cannot be cleaning this ilg at the same time. 3941 * since it also has to execute in this ill's perimeter which 3942 * we are now holding. Only a clean conn can be condemned. 3943 */ 3944 ASSERT(!(connp->conn_state_flags & CONN_CONDEMNED)); 3945 3946 /* Blow away the membership */ 3947 ip1dbg(("conn_delete_ilg_ipif: %s on %s (%s)\n", 3948 inet_ntop(AF_INET6, &connp->conn_ilg[i].ilg_v6group, 3949 group_buf1, sizeof (group_buf1)), 3950 inet_ntop(AF_INET6, &ipif->ipif_v6lcl_addr, 3951 group_buf2, sizeof (group_buf2)), 3952 ipif->ipif_ill->ill_name)); 3953 3954 /* ilg_ipif is NULL for V6, so we won't be here */ 3955 ASSERT(IN6_IS_ADDR_V4MAPPED(&ilg->ilg_v6group)); 3956 3957 group = V4_PART_OF_V6(ilg->ilg_v6group); 3958 ilg_delete(connp, &connp->conn_ilg[i], NULL); 3959 mutex_exit(&connp->conn_lock); 3960 3961 (void) ip_delmulti(group, ipif, B_FALSE, B_TRUE); 3962 mutex_enter(&connp->conn_lock); 3963 } 3964 3965 /* 3966 * If we are the last walker, need to physically delete the 3967 * ilgs and repack. 3968 */ 3969 ILG_WALKER_RELE(connp); 3970 3971 if (connp->conn_multicast_ipif == ipif) { 3972 /* Revert to late binding */ 3973 connp->conn_multicast_ipif = NULL; 3974 } 3975 mutex_exit(&connp->conn_lock); 3976 3977 conn_delete_ire(connp, (caddr_t)ipif); 3978 } 3979 3980 /* 3981 * Called from ill close by ipcl_walk for clearing conn_ilg and 3982 * conn_multicast_ill for a given ill. conn is held by caller. 3983 * Note that ipcl_walk only walks conns that are not yet condemned. 3984 * condemned conns can't be refheld. For this reason, conn must become clean 3985 * first, i.e. it must not refer to any ill/ire/ipif and then only set 3986 * condemned flag. 3987 */ 3988 static void 3989 conn_delete_ill(conn_t *connp, caddr_t arg) 3990 { 3991 ill_t *ill = (ill_t *)arg; 3992 int i; 3993 char group_buf[INET6_ADDRSTRLEN]; 3994 in6_addr_t v6group; 3995 int orig_ifindex; 3996 ilg_t *ilg; 3997 3998 /* 3999 * Even though conn_ilg_inuse can change while we are in this loop, 4000 * no new ilgs can be created/deleted for this connp, on this 4001 * ill, since this ill is the perimeter. So we won't miss any ilg 4002 * in this cleanup. 4003 */ 4004 mutex_enter(&connp->conn_lock); 4005 4006 /* 4007 * Increment the walker count, so that ilg repacking does not 4008 * occur while we are in the loop. 4009 */ 4010 ILG_WALKER_HOLD(connp); 4011 for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) { 4012 ilg = &connp->conn_ilg[i]; 4013 if ((ilg->ilg_ill == ill) && !(ilg->ilg_flags & ILG_DELETED)) { 4014 /* 4015 * ip_close cannot be cleaning this ilg at the same 4016 * time, since it also has to execute in this ill's 4017 * perimeter which we are now holding. Only a clean 4018 * conn can be condemned. 4019 */ 4020 ASSERT(!(connp->conn_state_flags & CONN_CONDEMNED)); 4021 4022 /* Blow away the membership */ 4023 ip1dbg(("conn_delete_ilg_ill: %s on %s\n", 4024 inet_ntop(AF_INET6, &ilg->ilg_v6group, 4025 group_buf, sizeof (group_buf)), 4026 ill->ill_name)); 4027 4028 v6group = ilg->ilg_v6group; 4029 orig_ifindex = ilg->ilg_orig_ifindex; 4030 ilg_delete(connp, ilg, NULL); 4031 mutex_exit(&connp->conn_lock); 4032 4033 (void) ip_delmulti_v6(&v6group, ill, orig_ifindex, 4034 connp->conn_zoneid, B_FALSE, B_TRUE); 4035 mutex_enter(&connp->conn_lock); 4036 } 4037 } 4038 /* 4039 * If we are the last walker, need to physically delete the 4040 * ilgs and repack. 4041 */ 4042 ILG_WALKER_RELE(connp); 4043 4044 if (connp->conn_multicast_ill == ill) { 4045 /* Revert to late binding */ 4046 connp->conn_multicast_ill = NULL; 4047 connp->conn_orig_multicast_ifindex = 0; 4048 } 4049 mutex_exit(&connp->conn_lock); 4050 } 4051 4052 /* 4053 * Called when an ipif is unplumbed to make sure that there are no 4054 * dangling conn references to that ipif. 4055 * Handles ilg_ipif and conn_multicast_ipif 4056 */ 4057 void 4058 reset_conn_ipif(ipif) 4059 ipif_t *ipif; 4060 { 4061 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 4062 4063 ipcl_walk(conn_delete_ipif, (caddr_t)ipif, ipst); 4064 } 4065 4066 /* 4067 * Called when an ill is unplumbed to make sure that there are no 4068 * dangling conn references to that ill. 4069 * Handles ilg_ill, conn_multicast_ill. 4070 */ 4071 void 4072 reset_conn_ill(ill_t *ill) 4073 { 4074 ip_stack_t *ipst = ill->ill_ipst; 4075 4076 ipcl_walk(conn_delete_ill, (caddr_t)ill, ipst); 4077 } 4078 4079 #ifdef DEBUG 4080 /* 4081 * Walk functions walk all the interfaces in the system to make 4082 * sure that there is no refernece to the ipif or ill that is 4083 * going away. 4084 */ 4085 int 4086 ilm_walk_ill(ill_t *ill) 4087 { 4088 int cnt = 0; 4089 ill_t *till; 4090 ilm_t *ilm; 4091 ill_walk_context_t ctx; 4092 ip_stack_t *ipst = ill->ill_ipst; 4093 4094 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 4095 till = ILL_START_WALK_ALL(&ctx, ipst); 4096 for (; till != NULL; till = ill_next(&ctx, till)) { 4097 mutex_enter(&till->ill_lock); 4098 for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 4099 if (ilm->ilm_ill == ill) { 4100 cnt++; 4101 } 4102 } 4103 mutex_exit(&till->ill_lock); 4104 } 4105 rw_exit(&ipst->ips_ill_g_lock); 4106 4107 return (cnt); 4108 } 4109 4110 /* 4111 * This function is called before the ipif is freed. 4112 */ 4113 int 4114 ilm_walk_ipif(ipif_t *ipif) 4115 { 4116 int cnt = 0; 4117 ill_t *till; 4118 ilm_t *ilm; 4119 ill_walk_context_t ctx; 4120 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 4121 4122 till = ILL_START_WALK_ALL(&ctx, ipst); 4123 for (; till != NULL; till = ill_next(&ctx, till)) { 4124 mutex_enter(&till->ill_lock); 4125 for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 4126 if (ilm->ilm_ipif == ipif) { 4127 cnt++; 4128 } 4129 } 4130 mutex_exit(&till->ill_lock); 4131 } 4132 return (cnt); 4133 } 4134 #endif 4135