1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/stream.h> 31 #include <sys/dlpi.h> 32 #include <sys/stropts.h> 33 #include <sys/strsun.h> 34 #include <sys/ddi.h> 35 #include <sys/cmn_err.h> 36 #include <sys/sdt.h> 37 #include <sys/zone.h> 38 39 #include <sys/param.h> 40 #include <sys/socket.h> 41 #include <sys/sockio.h> 42 #include <net/if.h> 43 #include <sys/systm.h> 44 #include <net/route.h> 45 #include <netinet/in.h> 46 #include <net/if_dl.h> 47 #include <netinet/ip6.h> 48 #include <netinet/icmp6.h> 49 50 #include <inet/common.h> 51 #include <inet/mi.h> 52 #include <inet/nd.h> 53 #include <inet/arp.h> 54 #include <inet/ip.h> 55 #include <inet/ip6.h> 56 #include <inet/ip_if.h> 57 #include <inet/ip_ndp.h> 58 #include <inet/ip_multi.h> 59 #include <inet/ipclassifier.h> 60 #include <inet/ipsec_impl.h> 61 #include <inet/sctp_ip.h> 62 #include <inet/ip_listutils.h> 63 #include <inet/udp_impl.h> 64 65 /* igmpv3/mldv2 source filter manipulation */ 66 static void ilm_bld_flists(conn_t *conn, void *arg); 67 static void ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, 68 slist_t *flist); 69 70 static ilm_t *ilm_add_v6(ipif_t *ipif, const in6_addr_t *group, 71 ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist, 72 int orig_ifindex, zoneid_t zoneid); 73 static void ilm_delete(ilm_t *ilm); 74 static int ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *group); 75 static int ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *group); 76 static ilg_t *ilg_lookup_ill_index_v6(conn_t *connp, 77 const in6_addr_t *v6group, int index); 78 static ilg_t *ilg_lookup_ipif(conn_t *connp, ipaddr_t group, 79 ipif_t *ipif); 80 static int ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif, 81 mcast_record_t fmode, ipaddr_t src); 82 static int ilg_add_v6(conn_t *connp, const in6_addr_t *group, ill_t *ill, 83 mcast_record_t fmode, const in6_addr_t *v6src); 84 static void ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src); 85 static mblk_t *ill_create_dl(ill_t *ill, uint32_t dl_primitive, 86 uint32_t length, uint32_t *addr_lenp, uint32_t *addr_offp); 87 static mblk_t *ill_create_squery(ill_t *ill, ipaddr_t ipaddr, 88 uint32_t addrlen, uint32_t addroff, mblk_t *mp_tail); 89 static void conn_ilg_reap(conn_t *connp); 90 static int ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group, 91 ipif_t *ipif, mcast_record_t fmode, ipaddr_t src); 92 static int ip_opt_delete_group_excl_v6(conn_t *connp, 93 const in6_addr_t *v6group, ill_t *ill, mcast_record_t fmode, 94 const in6_addr_t *v6src); 95 96 /* 97 * MT notes: 98 * 99 * Multicast joins operate on both the ilg and ilm structures. Multiple 100 * threads operating on an conn (socket) trying to do multicast joins 101 * need to synchronize when operating on the ilg. Multiple threads 102 * potentially operating on different conn (socket endpoints) trying to 103 * do multicast joins could eventually end up trying to manipulate the 104 * ilm simulatenously and need to synchronize on the access to the ilm. 105 * Both are amenable to standard Solaris MT techniques, but it would be 106 * complex to handle a failover or failback which needs to manipulate 107 * ilg/ilms if an applications can also simultaenously join/leave 108 * multicast groups. Hence multicast join/leave also go through the ipsq_t 109 * serialization. 110 * 111 * Multicast joins and leaves are single-threaded per phyint/IPMP group 112 * using the ipsq serialization mechanism. 113 * 114 * An ilm is an IP data structure used to track multicast join/leave. 115 * An ilm is associated with a <multicast group, ipif> tuple in IPv4 and 116 * with just <multicast group> in IPv6. ilm_refcnt is the number of ilg's 117 * referencing the ilm. ilms are created / destroyed only as writer. ilms 118 * are not passed around, instead they are looked up and used under the 119 * ill_lock or as writer. So we don't need a dynamic refcount of the number 120 * of threads holding reference to an ilm. 121 * 122 * Multicast Join operation: 123 * 124 * The first step is to determine the ipif (v4) or ill (v6) on which 125 * the join operation is to be done. The join is done after becoming 126 * exclusive on the ipsq associated with the ipif or ill. The conn->conn_ilg 127 * and ill->ill_ilm are thus accessed and modified exclusively per ill. 128 * Multiple threads can attempt to join simultaneously on different ipif/ill 129 * on the same conn. In this case the ipsq serialization does not help in 130 * protecting the ilg. It is the conn_lock that is used to protect the ilg. 131 * The conn_lock also protects all the ilg_t members. 132 * 133 * Leave operation. 134 * 135 * Similar to the join operation, the first step is to determine the ipif 136 * or ill (v6) on which the leave operation is to be done. The leave operation 137 * is done after becoming exclusive on the ipsq associated with the ipif or ill. 138 * As with join ilg modification is done under the protection of the conn lock. 139 */ 140 141 #define IPSQ_ENTER_IPIF(ipif, connp, first_mp, func, ipsq, type) \ 142 ASSERT(connp != NULL); \ 143 (ipsq) = ipsq_try_enter((ipif), NULL, CONNP_TO_WQ(connp), \ 144 (first_mp), (func), (type), B_TRUE); \ 145 if ((ipsq) == NULL) { \ 146 ipif_refrele(ipif); \ 147 return (EINPROGRESS); \ 148 } 149 150 #define IPSQ_ENTER_ILL(ill, connp, first_mp, func, ipsq, type) \ 151 ASSERT(connp != NULL); \ 152 (ipsq) = ipsq_try_enter(NULL, ill, CONNP_TO_WQ(connp), \ 153 (first_mp), (func), (type), B_TRUE); \ 154 if ((ipsq) == NULL) { \ 155 ill_refrele(ill); \ 156 return (EINPROGRESS); \ 157 } 158 159 #define IPSQ_EXIT(ipsq) \ 160 if (ipsq != NULL) \ 161 ipsq_exit(ipsq, B_TRUE, B_TRUE); 162 163 #define ILG_WALKER_HOLD(connp) (connp)->conn_ilg_walker_cnt++ 164 165 #define ILG_WALKER_RELE(connp) \ 166 { \ 167 (connp)->conn_ilg_walker_cnt--; \ 168 if ((connp)->conn_ilg_walker_cnt == 0) \ 169 conn_ilg_reap(connp); \ 170 } 171 172 static void 173 conn_ilg_reap(conn_t *connp) 174 { 175 int to; 176 int from; 177 178 ASSERT(MUTEX_HELD(&connp->conn_lock)); 179 180 to = 0; 181 from = 0; 182 while (from < connp->conn_ilg_inuse) { 183 if (connp->conn_ilg[from].ilg_flags & ILG_DELETED) { 184 FREE_SLIST(connp->conn_ilg[from].ilg_filter); 185 from++; 186 continue; 187 } 188 if (to != from) 189 connp->conn_ilg[to] = connp->conn_ilg[from]; 190 to++; 191 from++; 192 } 193 194 connp->conn_ilg_inuse = to; 195 196 if (connp->conn_ilg_inuse == 0) { 197 mi_free((char *)connp->conn_ilg); 198 connp->conn_ilg = NULL; 199 cv_broadcast(&connp->conn_refcv); 200 } 201 } 202 203 #define GETSTRUCT(structure, number) \ 204 ((structure *)mi_zalloc(sizeof (structure) * (number))) 205 206 #define ILG_ALLOC_CHUNK 16 207 208 /* 209 * Returns a pointer to the next available ilg in conn_ilg. Allocs more 210 * buffers in size of ILG_ALLOC_CHUNK ilgs when needed, and updates conn's 211 * ilg tracking fields appropriately (conn_ilg_inuse reflects usage of the 212 * returned ilg). Returns NULL on failure (ENOMEM). 213 * 214 * Assumes connp->conn_lock is held. 215 */ 216 static ilg_t * 217 conn_ilg_alloc(conn_t *connp) 218 { 219 ilg_t *new; 220 int curcnt; 221 222 ASSERT(MUTEX_HELD(&connp->conn_lock)); 223 ASSERT(connp->conn_ilg_inuse <= connp->conn_ilg_allocated); 224 225 if (connp->conn_ilg == NULL) { 226 connp->conn_ilg = GETSTRUCT(ilg_t, ILG_ALLOC_CHUNK); 227 if (connp->conn_ilg == NULL) 228 return (NULL); 229 connp->conn_ilg_allocated = ILG_ALLOC_CHUNK; 230 connp->conn_ilg_inuse = 0; 231 } 232 if (connp->conn_ilg_inuse == connp->conn_ilg_allocated) { 233 curcnt = connp->conn_ilg_allocated; 234 new = GETSTRUCT(ilg_t, curcnt + ILG_ALLOC_CHUNK); 235 if (new == NULL) 236 return (NULL); 237 bcopy(connp->conn_ilg, new, sizeof (ilg_t) * curcnt); 238 mi_free((char *)connp->conn_ilg); 239 connp->conn_ilg = new; 240 connp->conn_ilg_allocated += ILG_ALLOC_CHUNK; 241 } 242 243 return (&connp->conn_ilg[connp->conn_ilg_inuse++]); 244 } 245 246 typedef struct ilm_fbld_s { 247 ilm_t *fbld_ilm; 248 int fbld_in_cnt; 249 int fbld_ex_cnt; 250 slist_t fbld_in; 251 slist_t fbld_ex; 252 boolean_t fbld_in_overflow; 253 } ilm_fbld_t; 254 255 static void 256 ilm_bld_flists(conn_t *conn, void *arg) 257 { 258 int i; 259 ilm_fbld_t *fbld = (ilm_fbld_t *)(arg); 260 ilm_t *ilm = fbld->fbld_ilm; 261 in6_addr_t *v6group = &ilm->ilm_v6addr; 262 263 if (conn->conn_ilg_inuse == 0) 264 return; 265 266 /* 267 * Since we can't break out of the ipcl_walk once started, we still 268 * have to look at every conn. But if we've already found one 269 * (EXCLUDE, NULL) list, there's no need to keep checking individual 270 * ilgs--that will be our state. 271 */ 272 if (fbld->fbld_ex_cnt > 0 && fbld->fbld_ex.sl_numsrc == 0) 273 return; 274 275 /* 276 * Check this conn's ilgs to see if any are interested in our 277 * ilm (group, interface match). If so, update the master 278 * include and exclude lists we're building in the fbld struct 279 * with this ilg's filter info. 280 */ 281 mutex_enter(&conn->conn_lock); 282 for (i = 0; i < conn->conn_ilg_inuse; i++) { 283 ilg_t *ilg = &conn->conn_ilg[i]; 284 if ((ilg->ilg_ill == ilm->ilm_ill) && 285 (ilg->ilg_ipif == ilm->ilm_ipif) && 286 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) { 287 if (ilg->ilg_fmode == MODE_IS_INCLUDE) { 288 fbld->fbld_in_cnt++; 289 if (!fbld->fbld_in_overflow) 290 l_union_in_a(&fbld->fbld_in, 291 ilg->ilg_filter, 292 &fbld->fbld_in_overflow); 293 } else { 294 fbld->fbld_ex_cnt++; 295 /* 296 * On the first exclude list, don't try to do 297 * an intersection, as the master exclude list 298 * is intentionally empty. If the master list 299 * is still empty on later iterations, that 300 * means we have at least one ilg with an empty 301 * exclude list, so that should be reflected 302 * when we take the intersection. 303 */ 304 if (fbld->fbld_ex_cnt == 1) { 305 if (ilg->ilg_filter != NULL) 306 l_copy(ilg->ilg_filter, 307 &fbld->fbld_ex); 308 } else { 309 l_intersection_in_a(&fbld->fbld_ex, 310 ilg->ilg_filter); 311 } 312 } 313 /* there will only be one match, so break now. */ 314 break; 315 } 316 } 317 mutex_exit(&conn->conn_lock); 318 } 319 320 static void 321 ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, slist_t *flist) 322 { 323 ilm_fbld_t fbld; 324 ip_stack_t *ipst = ilm->ilm_ipst; 325 326 fbld.fbld_ilm = ilm; 327 fbld.fbld_in_cnt = fbld.fbld_ex_cnt = 0; 328 fbld.fbld_in.sl_numsrc = fbld.fbld_ex.sl_numsrc = 0; 329 fbld.fbld_in_overflow = B_FALSE; 330 331 /* first, construct our master include and exclude lists */ 332 ipcl_walk(ilm_bld_flists, (caddr_t)&fbld, ipst); 333 334 /* now use those master lists to generate the interface filter */ 335 336 /* if include list overflowed, filter is (EXCLUDE, NULL) */ 337 if (fbld.fbld_in_overflow) { 338 *fmode = MODE_IS_EXCLUDE; 339 flist->sl_numsrc = 0; 340 return; 341 } 342 343 /* if nobody interested, interface filter is (INCLUDE, NULL) */ 344 if (fbld.fbld_in_cnt == 0 && fbld.fbld_ex_cnt == 0) { 345 *fmode = MODE_IS_INCLUDE; 346 flist->sl_numsrc = 0; 347 return; 348 } 349 350 /* 351 * If there are no exclude lists, then the interface filter 352 * is INCLUDE, with its filter list equal to fbld_in. A single 353 * exclude list makes the interface filter EXCLUDE, with its 354 * filter list equal to (fbld_ex - fbld_in). 355 */ 356 if (fbld.fbld_ex_cnt == 0) { 357 *fmode = MODE_IS_INCLUDE; 358 l_copy(&fbld.fbld_in, flist); 359 } else { 360 *fmode = MODE_IS_EXCLUDE; 361 l_difference(&fbld.fbld_ex, &fbld.fbld_in, flist); 362 } 363 } 364 365 /* 366 * If the given interface has failed, choose a new one to join on so 367 * that we continue to receive packets. ilg_orig_ifindex remembers 368 * what the application used to join on so that we know the ilg to 369 * delete even though we change the ill here. Callers will store the 370 * ilg returned from this function in ilg_ill. Thus when we receive 371 * a packet on ilg_ill, conn_wantpacket_v6 will deliver the packets. 372 * 373 * This function must be called as writer so we can walk the group 374 * list and examine flags without holding a lock. 375 */ 376 ill_t * 377 ip_choose_multi_ill(ill_t *ill, const in6_addr_t *grp) 378 { 379 ill_t *till; 380 ill_group_t *illgrp = ill->ill_group; 381 382 ASSERT(IAM_WRITER_ILL(ill)); 383 384 if (IN6_IS_ADDR_UNSPECIFIED(grp) || illgrp == NULL) 385 return (ill); 386 387 if ((ill->ill_phyint->phyint_flags & (PHYI_FAILED|PHYI_INACTIVE)) == 0) 388 return (ill); 389 390 till = illgrp->illgrp_ill; 391 while (till != NULL && 392 (till->ill_phyint->phyint_flags & (PHYI_FAILED|PHYI_INACTIVE))) { 393 till = till->ill_group_next; 394 } 395 if (till != NULL) 396 return (till); 397 398 return (ill); 399 } 400 401 static int 402 ilm_update_add(ilm_t *ilm, ilg_stat_t ilgstat, slist_t *ilg_flist, 403 boolean_t isv6) 404 { 405 mcast_record_t fmode; 406 slist_t *flist; 407 boolean_t fdefault; 408 char buf[INET6_ADDRSTRLEN]; 409 ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill; 410 411 /* 412 * There are several cases where the ilm's filter state 413 * defaults to (EXCLUDE, NULL): 414 * - we've had previous joins without associated ilgs 415 * - this join has no associated ilg 416 * - the ilg's filter state is (EXCLUDE, NULL) 417 */ 418 fdefault = (ilm->ilm_no_ilg_cnt > 0) || 419 (ilgstat == ILGSTAT_NONE) || SLIST_IS_EMPTY(ilg_flist); 420 421 /* attempt mallocs (if needed) before doing anything else */ 422 if ((flist = l_alloc()) == NULL) 423 return (ENOMEM); 424 if (!fdefault && ilm->ilm_filter == NULL) { 425 ilm->ilm_filter = l_alloc(); 426 if (ilm->ilm_filter == NULL) { 427 l_free(flist); 428 return (ENOMEM); 429 } 430 } 431 432 if (ilgstat != ILGSTAT_CHANGE) 433 ilm->ilm_refcnt++; 434 435 if (ilgstat == ILGSTAT_NONE) 436 ilm->ilm_no_ilg_cnt++; 437 438 /* 439 * Determine new filter state. If it's not the default 440 * (EXCLUDE, NULL), we must walk the conn list to find 441 * any ilgs interested in this group, and re-build the 442 * ilm filter. 443 */ 444 if (fdefault) { 445 fmode = MODE_IS_EXCLUDE; 446 flist->sl_numsrc = 0; 447 } else { 448 ilm_gen_filter(ilm, &fmode, flist); 449 } 450 451 /* make sure state actually changed; nothing to do if not. */ 452 if ((ilm->ilm_fmode == fmode) && 453 !lists_are_different(ilm->ilm_filter, flist)) { 454 l_free(flist); 455 return (0); 456 } 457 458 /* send the state change report */ 459 if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0) { 460 if (isv6) 461 mld_statechange(ilm, fmode, flist); 462 else 463 igmp_statechange(ilm, fmode, flist); 464 } 465 466 /* update the ilm state */ 467 ilm->ilm_fmode = fmode; 468 if (flist->sl_numsrc > 0) 469 l_copy(flist, ilm->ilm_filter); 470 else 471 CLEAR_SLIST(ilm->ilm_filter); 472 473 ip1dbg(("ilm_update: new if filter mode %d, group %s\n", ilm->ilm_fmode, 474 inet_ntop(AF_INET6, &ilm->ilm_v6addr, buf, sizeof (buf)))); 475 476 l_free(flist); 477 return (0); 478 } 479 480 static int 481 ilm_update_del(ilm_t *ilm, boolean_t isv6) 482 { 483 mcast_record_t fmode; 484 slist_t *flist; 485 ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill; 486 487 ip1dbg(("ilm_update_del: still %d left; updating state\n", 488 ilm->ilm_refcnt)); 489 490 if ((flist = l_alloc()) == NULL) 491 return (ENOMEM); 492 493 /* 494 * If present, the ilg in question has already either been 495 * updated or removed from our list; so all we need to do 496 * now is walk the list to update the ilm filter state. 497 * 498 * Skip the list walk if we have any no-ilg joins, which 499 * cause the filter state to revert to (EXCLUDE, NULL). 500 */ 501 if (ilm->ilm_no_ilg_cnt != 0) { 502 fmode = MODE_IS_EXCLUDE; 503 flist->sl_numsrc = 0; 504 } else { 505 ilm_gen_filter(ilm, &fmode, flist); 506 } 507 508 /* check to see if state needs to be updated */ 509 if ((ilm->ilm_fmode == fmode) && 510 (!lists_are_different(ilm->ilm_filter, flist))) { 511 l_free(flist); 512 return (0); 513 } 514 515 if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0) { 516 if (isv6) 517 mld_statechange(ilm, fmode, flist); 518 else 519 igmp_statechange(ilm, fmode, flist); 520 } 521 522 ilm->ilm_fmode = fmode; 523 if (flist->sl_numsrc > 0) { 524 if (ilm->ilm_filter == NULL) { 525 ilm->ilm_filter = l_alloc(); 526 if (ilm->ilm_filter == NULL) { 527 char buf[INET6_ADDRSTRLEN]; 528 ip1dbg(("ilm_update_del: failed to alloc ilm " 529 "filter; no source filtering for %s on %s", 530 inet_ntop(AF_INET6, &ilm->ilm_v6addr, 531 buf, sizeof (buf)), ill->ill_name)); 532 ilm->ilm_fmode = MODE_IS_EXCLUDE; 533 l_free(flist); 534 return (0); 535 } 536 } 537 l_copy(flist, ilm->ilm_filter); 538 } else { 539 CLEAR_SLIST(ilm->ilm_filter); 540 } 541 542 l_free(flist); 543 return (0); 544 } 545 546 /* 547 * INADDR_ANY means all multicast addresses. This is only used 548 * by the multicast router. 549 * INADDR_ANY is stored as IPv6 unspecified addr. 550 */ 551 int 552 ip_addmulti(ipaddr_t group, ipif_t *ipif, ilg_stat_t ilgstat, 553 mcast_record_t ilg_fmode, slist_t *ilg_flist) 554 { 555 ill_t *ill = ipif->ipif_ill; 556 ilm_t *ilm; 557 in6_addr_t v6group; 558 int ret; 559 560 ASSERT(IAM_WRITER_IPIF(ipif)); 561 562 if (!CLASSD(group) && group != INADDR_ANY) 563 return (EINVAL); 564 565 /* 566 * INADDR_ANY is represented as the IPv6 unspecifed addr. 567 */ 568 if (group == INADDR_ANY) 569 v6group = ipv6_all_zeros; 570 else 571 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 572 573 ilm = ilm_lookup_ipif(ipif, group); 574 if (ilm != NULL) 575 return (ilm_update_add(ilm, ilgstat, ilg_flist, B_FALSE)); 576 577 /* 578 * ilms are associated with ipifs in IPv4. It moves with the 579 * ipif if the ipif moves to a new ill when the interface 580 * fails. Thus we really don't check whether the ipif_ill 581 * has failed like in IPv6. If it has FAILED the ipif 582 * will move (daemon will move it) and hence the ilm, if the 583 * ipif is not IPIF_NOFAILOVER. For the IPIF_NOFAILOVER ipifs, 584 * we continue to receive in the same place even if the 585 * interface fails. 586 */ 587 ilm = ilm_add_v6(ipif, &v6group, ilgstat, ilg_fmode, ilg_flist, 588 ill->ill_phyint->phyint_ifindex, ipif->ipif_zoneid); 589 if (ilm == NULL) 590 return (ENOMEM); 591 592 if (group == INADDR_ANY) { 593 /* 594 * Check how many ipif's have members in this group - 595 * if more then one we should not tell the driver to join 596 * this time 597 */ 598 if (ilm_numentries_v6(ill, &v6group) > 1) 599 return (0); 600 if (ill->ill_group == NULL) 601 ret = ip_join_allmulti(ipif); 602 else 603 ret = ill_nominate_mcast_rcv(ill->ill_group); 604 if (ret != 0) 605 ilm_delete(ilm); 606 return (ret); 607 } 608 609 if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0) 610 igmp_joingroup(ilm); 611 612 if (ilm_numentries_v6(ill, &v6group) > 1) 613 return (0); 614 615 ret = ip_ll_addmulti_v6(ipif, &v6group); 616 if (ret != 0) 617 ilm_delete(ilm); 618 return (ret); 619 } 620 621 /* 622 * The unspecified address means all multicast addresses. 623 * This is only used by the multicast router. 624 * 625 * ill identifies the interface to join on; it may not match the 626 * interface requested by the application of a failover has taken 627 * place. orig_ifindex always identifies the interface requested 628 * by the app. 629 * 630 * ilgstat tells us if there's an ilg associated with this join, 631 * and if so, if it's a new ilg or a change to an existing one. 632 * ilg_fmode and ilg_flist give us the current filter state of 633 * the ilg (and will be EXCLUDE {NULL} in the case of no ilg). 634 */ 635 int 636 ip_addmulti_v6(const in6_addr_t *v6group, ill_t *ill, int orig_ifindex, 637 zoneid_t zoneid, ilg_stat_t ilgstat, mcast_record_t ilg_fmode, 638 slist_t *ilg_flist) 639 { 640 ilm_t *ilm; 641 int ret; 642 643 ASSERT(IAM_WRITER_ILL(ill)); 644 645 if (!IN6_IS_ADDR_MULTICAST(v6group) && 646 !IN6_IS_ADDR_UNSPECIFIED(v6group)) { 647 return (EINVAL); 648 } 649 650 /* 651 * An ilm is uniquely identified by the tuple of (group, ill, 652 * orig_ill). group is the multicast group address, ill is 653 * the interface on which it is currently joined, and orig_ill 654 * is the interface on which the application requested the 655 * join. orig_ill and ill are the same unless orig_ill has 656 * failed over. 657 * 658 * Both orig_ill and ill are required, which means we may have 659 * 2 ilms on an ill for the same group, but with different 660 * orig_ills. These must be kept separate, so that when failback 661 * occurs, the appropriate ilms are moved back to their orig_ill 662 * without disrupting memberships on the ill to which they had 663 * been moved. 664 * 665 * In order to track orig_ill, we store orig_ifindex in the 666 * ilm and ilg. 667 */ 668 ilm = ilm_lookup_ill_index_v6(ill, v6group, orig_ifindex, zoneid); 669 if (ilm != NULL) 670 return (ilm_update_add(ilm, ilgstat, ilg_flist, B_TRUE)); 671 672 /* 673 * We need to remember where the application really wanted 674 * to join. This will be used later if we want to failback 675 * to the original interface. 676 */ 677 ilm = ilm_add_v6(ill->ill_ipif, v6group, ilgstat, ilg_fmode, 678 ilg_flist, orig_ifindex, zoneid); 679 if (ilm == NULL) 680 return (ENOMEM); 681 682 if (IN6_IS_ADDR_UNSPECIFIED(v6group)) { 683 /* 684 * Check how many ipif's that have members in this group - 685 * if more then one we should not tell the driver to join 686 * this time 687 */ 688 if (ilm_numentries_v6(ill, v6group) > 1) 689 return (0); 690 if (ill->ill_group == NULL) 691 ret = ip_join_allmulti(ill->ill_ipif); 692 else 693 ret = ill_nominate_mcast_rcv(ill->ill_group); 694 695 if (ret != 0) 696 ilm_delete(ilm); 697 return (ret); 698 } 699 700 if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0) 701 mld_joingroup(ilm); 702 703 /* 704 * If we have more then one we should not tell the driver 705 * to join this time. 706 */ 707 if (ilm_numentries_v6(ill, v6group) > 1) 708 return (0); 709 710 ret = ip_ll_addmulti_v6(ill->ill_ipif, v6group); 711 if (ret != 0) 712 ilm_delete(ilm); 713 return (ret); 714 } 715 716 /* 717 * Send a multicast request to the driver for enabling multicast reception 718 * for v6groupp address. The caller has already checked whether it is 719 * appropriate to send one or not. 720 */ 721 int 722 ip_ll_send_enabmulti_req(ill_t *ill, const in6_addr_t *v6groupp) 723 { 724 mblk_t *mp; 725 uint32_t addrlen, addroff; 726 char group_buf[INET6_ADDRSTRLEN]; 727 728 ASSERT(IAM_WRITER_ILL(ill)); 729 730 /* 731 * Create a AR_ENTRY_SQUERY message with a dl_enabmulti_req tacked 732 * on. 733 */ 734 mp = ill_create_dl(ill, DL_ENABMULTI_REQ, sizeof (dl_enabmulti_req_t), 735 &addrlen, &addroff); 736 if (!mp) 737 return (ENOMEM); 738 if (IN6_IS_ADDR_V4MAPPED(v6groupp)) { 739 ipaddr_t v4group; 740 741 IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group); 742 /* 743 * NOTE!!! 744 * The "addroff" passed in here was calculated by 745 * ill_create_dl(), and will be used by ill_create_squery() 746 * to perform some twisted coding magic. It is the offset 747 * into the dl_xxx_req of the hw addr. Here, it will be 748 * added to b_wptr - b_rptr to create a magic number that 749 * is not an offset into this squery mblk. 750 * The actual hardware address will be accessed only in the 751 * dl_xxx_req, not in the squery. More importantly, 752 * that hardware address can *only* be accessed in this 753 * mblk chain by calling mi_offset_param_c(), which uses 754 * the magic number in the squery hw offset field to go 755 * to the *next* mblk (the dl_xxx_req), subtract the 756 * (b_wptr - b_rptr), and find the actual offset into 757 * the dl_xxx_req. 758 * Any method that depends on using the 759 * offset field in the dl_disabmulti_req or squery 760 * to find either hardware address will similarly fail. 761 * 762 * Look in ar_entry_squery() in arp.c to see how this offset 763 * is used. 764 */ 765 mp = ill_create_squery(ill, v4group, addrlen, addroff, mp); 766 if (!mp) 767 return (ENOMEM); 768 ip1dbg(("ip_ll_send_enabmulti_req: IPv4 putnext %s on %s\n", 769 inet_ntop(AF_INET6, v6groupp, group_buf, 770 sizeof (group_buf)), 771 ill->ill_name)); 772 putnext(ill->ill_rq, mp); 773 } else { 774 ip1dbg(("ip_ll_send_enabmulti_req: IPv6 ndp_squery_mp %s on" 775 " %s\n", 776 inet_ntop(AF_INET6, v6groupp, group_buf, 777 sizeof (group_buf)), 778 ill->ill_name)); 779 return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp)); 780 } 781 return (0); 782 } 783 784 /* 785 * Send a multicast request to the driver for enabling multicast 786 * membership for v6group if appropriate. 787 */ 788 static int 789 ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *v6groupp) 790 { 791 ill_t *ill = ipif->ipif_ill; 792 793 ASSERT(IAM_WRITER_IPIF(ipif)); 794 795 if (ill->ill_net_type != IRE_IF_RESOLVER || 796 ipif->ipif_flags & IPIF_POINTOPOINT) { 797 ip1dbg(("ip_ll_addmulti_v6: not resolver\n")); 798 return (0); /* Must be IRE_IF_NORESOLVER */ 799 } 800 801 if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) { 802 ip1dbg(("ip_ll_addmulti_v6: MULTI_BCAST\n")); 803 return (0); 804 } 805 if (ill->ill_ipif_up_count == 0) { 806 /* 807 * Nobody there. All multicast addresses will be re-joined 808 * when we get the DL_BIND_ACK bringing the interface up. 809 */ 810 ip1dbg(("ip_ll_addmulti_v6: nobody up\n")); 811 return (0); 812 } 813 return (ip_ll_send_enabmulti_req(ill, v6groupp)); 814 } 815 816 /* 817 * INADDR_ANY means all multicast addresses. This is only used 818 * by the multicast router. 819 * INADDR_ANY is stored as the IPv6 unspecifed addr. 820 */ 821 int 822 ip_delmulti(ipaddr_t group, ipif_t *ipif, boolean_t no_ilg, boolean_t leaving) 823 { 824 ill_t *ill = ipif->ipif_ill; 825 ilm_t *ilm; 826 in6_addr_t v6group; 827 int ret; 828 829 ASSERT(IAM_WRITER_IPIF(ipif)); 830 831 if (!CLASSD(group) && group != INADDR_ANY) 832 return (EINVAL); 833 834 /* 835 * INADDR_ANY is represented as the IPv6 unspecifed addr. 836 */ 837 if (group == INADDR_ANY) 838 v6group = ipv6_all_zeros; 839 else 840 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 841 842 /* 843 * Look for a match on the ipif. 844 * (IP_DROP_MEMBERSHIP specifies an ipif using an IP address). 845 */ 846 ilm = ilm_lookup_ipif(ipif, group); 847 if (ilm == NULL) 848 return (ENOENT); 849 850 /* Update counters */ 851 if (no_ilg) 852 ilm->ilm_no_ilg_cnt--; 853 854 if (leaving) 855 ilm->ilm_refcnt--; 856 857 if (ilm->ilm_refcnt > 0) 858 return (ilm_update_del(ilm, B_FALSE)); 859 860 if (group == INADDR_ANY) { 861 ilm_delete(ilm); 862 /* 863 * Check how many ipif's that have members in this group - 864 * if there are still some left then don't tell the driver 865 * to drop it. 866 */ 867 if (ilm_numentries_v6(ill, &v6group) != 0) 868 return (0); 869 870 /* 871 * If we never joined, then don't leave. This can happen 872 * if we're in an IPMP group, since only one ill per IPMP 873 * group receives all multicast packets. 874 */ 875 if (!ill->ill_join_allmulti) { 876 ASSERT(ill->ill_group != NULL); 877 return (0); 878 } 879 880 ret = ip_leave_allmulti(ipif); 881 if (ill->ill_group != NULL) 882 (void) ill_nominate_mcast_rcv(ill->ill_group); 883 return (ret); 884 } 885 886 if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0) 887 igmp_leavegroup(ilm); 888 889 ilm_delete(ilm); 890 /* 891 * Check how many ipif's that have members in this group - 892 * if there are still some left then don't tell the driver 893 * to drop it. 894 */ 895 if (ilm_numentries_v6(ill, &v6group) != 0) 896 return (0); 897 return (ip_ll_delmulti_v6(ipif, &v6group)); 898 } 899 900 /* 901 * The unspecified address means all multicast addresses. 902 * This is only used by the multicast router. 903 */ 904 int 905 ip_delmulti_v6(const in6_addr_t *v6group, ill_t *ill, int orig_ifindex, 906 zoneid_t zoneid, boolean_t no_ilg, boolean_t leaving) 907 { 908 ipif_t *ipif; 909 ilm_t *ilm; 910 int ret; 911 912 ASSERT(IAM_WRITER_ILL(ill)); 913 914 if (!IN6_IS_ADDR_MULTICAST(v6group) && 915 !IN6_IS_ADDR_UNSPECIFIED(v6group)) 916 return (EINVAL); 917 918 /* 919 * Look for a match on the ill. 920 * (IPV6_LEAVE_GROUP specifies an ill using an ifindex). 921 * 922 * Similar to ip_addmulti_v6, we should always look using 923 * the orig_ifindex. 924 * 925 * 1) If orig_ifindex is different from ill's ifindex 926 * we should have an ilm with orig_ifindex created in 927 * ip_addmulti_v6. We should delete that here. 928 * 929 * 2) If orig_ifindex is same as ill's ifindex, we should 930 * not delete the ilm that is temporarily here because of 931 * a FAILOVER. Those ilms will have a ilm_orig_ifindex 932 * different from ill's ifindex. 933 * 934 * Thus, always lookup using orig_ifindex. 935 */ 936 ilm = ilm_lookup_ill_index_v6(ill, v6group, orig_ifindex, zoneid); 937 if (ilm == NULL) 938 return (ENOENT); 939 940 ASSERT(ilm->ilm_ill == ill); 941 942 ipif = ill->ill_ipif; 943 944 /* Update counters */ 945 if (no_ilg) 946 ilm->ilm_no_ilg_cnt--; 947 948 if (leaving) 949 ilm->ilm_refcnt--; 950 951 if (ilm->ilm_refcnt > 0) 952 return (ilm_update_del(ilm, B_TRUE)); 953 954 if (IN6_IS_ADDR_UNSPECIFIED(v6group)) { 955 ilm_delete(ilm); 956 /* 957 * Check how many ipif's that have members in this group - 958 * if there are still some left then don't tell the driver 959 * to drop it. 960 */ 961 if (ilm_numentries_v6(ill, v6group) != 0) 962 return (0); 963 964 /* 965 * If we never joined, then don't leave. This can happen 966 * if we're in an IPMP group, since only one ill per IPMP 967 * group receives all multicast packets. 968 */ 969 if (!ill->ill_join_allmulti) { 970 ASSERT(ill->ill_group != NULL); 971 return (0); 972 } 973 974 ret = ip_leave_allmulti(ipif); 975 if (ill->ill_group != NULL) 976 (void) ill_nominate_mcast_rcv(ill->ill_group); 977 return (ret); 978 } 979 980 if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0) 981 mld_leavegroup(ilm); 982 983 ilm_delete(ilm); 984 /* 985 * Check how many ipif's that have members in this group - 986 * if there are still some left then don't tell the driver 987 * to drop it. 988 */ 989 if (ilm_numentries_v6(ill, v6group) != 0) 990 return (0); 991 return (ip_ll_delmulti_v6(ipif, v6group)); 992 } 993 994 /* 995 * Send a multicast request to the driver for disabling multicast reception 996 * for v6groupp address. The caller has already checked whether it is 997 * appropriate to send one or not. 998 */ 999 int 1000 ip_ll_send_disabmulti_req(ill_t *ill, const in6_addr_t *v6groupp) 1001 { 1002 mblk_t *mp; 1003 char group_buf[INET6_ADDRSTRLEN]; 1004 uint32_t addrlen, addroff; 1005 1006 ASSERT(IAM_WRITER_ILL(ill)); 1007 /* 1008 * Create a AR_ENTRY_SQUERY message with a dl_disabmulti_req tacked 1009 * on. 1010 */ 1011 mp = ill_create_dl(ill, DL_DISABMULTI_REQ, 1012 sizeof (dl_disabmulti_req_t), &addrlen, &addroff); 1013 1014 if (!mp) 1015 return (ENOMEM); 1016 1017 if (IN6_IS_ADDR_V4MAPPED(v6groupp)) { 1018 ipaddr_t v4group; 1019 1020 IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group); 1021 /* 1022 * NOTE!!! 1023 * The "addroff" passed in here was calculated by 1024 * ill_create_dl(), and will be used by ill_create_squery() 1025 * to perform some twisted coding magic. It is the offset 1026 * into the dl_xxx_req of the hw addr. Here, it will be 1027 * added to b_wptr - b_rptr to create a magic number that 1028 * is not an offset into this mblk. 1029 * 1030 * Please see the comment in ip_ll_send)enabmulti_req() 1031 * for a complete explanation. 1032 * 1033 * Look in ar_entry_squery() in arp.c to see how this offset 1034 * is used. 1035 */ 1036 mp = ill_create_squery(ill, v4group, addrlen, addroff, mp); 1037 if (!mp) 1038 return (ENOMEM); 1039 ip1dbg(("ip_ll_send_disabmulti_req: IPv4 putnext %s on %s\n", 1040 inet_ntop(AF_INET6, v6groupp, group_buf, 1041 sizeof (group_buf)), 1042 ill->ill_name)); 1043 putnext(ill->ill_rq, mp); 1044 } else { 1045 ip1dbg(("ip_ll_send_disabmulti_req: IPv6 ndp_squery_mp %s on" 1046 " %s\n", 1047 inet_ntop(AF_INET6, v6groupp, group_buf, 1048 sizeof (group_buf)), 1049 ill->ill_name)); 1050 return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp)); 1051 } 1052 return (0); 1053 } 1054 1055 /* 1056 * Send a multicast request to the driver for disabling multicast 1057 * membership for v6group if appropriate. 1058 */ 1059 static int 1060 ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *v6group) 1061 { 1062 ill_t *ill = ipif->ipif_ill; 1063 1064 ASSERT(IAM_WRITER_IPIF(ipif)); 1065 1066 if (ill->ill_net_type != IRE_IF_RESOLVER || 1067 ipif->ipif_flags & IPIF_POINTOPOINT) { 1068 return (0); /* Must be IRE_IF_NORESOLVER */ 1069 } 1070 if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) { 1071 ip1dbg(("ip_ll_delmulti_v6: MULTI_BCAST\n")); 1072 return (0); 1073 } 1074 if (ill->ill_ipif_up_count == 0) { 1075 /* 1076 * Nobody there. All multicast addresses will be re-joined 1077 * when we get the DL_BIND_ACK bringing the interface up. 1078 */ 1079 ip1dbg(("ip_ll_delmulti_v6: nobody up\n")); 1080 return (0); 1081 } 1082 return (ip_ll_send_disabmulti_req(ill, v6group)); 1083 } 1084 1085 /* 1086 * Make the driver pass up all multicast packets 1087 * 1088 * With ill groups, the caller makes sure that there is only 1089 * one ill joining the allmulti group. 1090 */ 1091 int 1092 ip_join_allmulti(ipif_t *ipif) 1093 { 1094 ill_t *ill = ipif->ipif_ill; 1095 mblk_t *mp; 1096 uint32_t addrlen, addroff; 1097 1098 ASSERT(IAM_WRITER_IPIF(ipif)); 1099 1100 if (ill->ill_ipif_up_count == 0) { 1101 /* 1102 * Nobody there. All multicast addresses will be re-joined 1103 * when we get the DL_BIND_ACK bringing the interface up. 1104 */ 1105 return (0); 1106 } 1107 1108 ASSERT(!ill->ill_join_allmulti); 1109 1110 /* 1111 * Create a DL_PROMISCON_REQ message and send it directly to 1112 * the DLPI provider. We don't need to do this for certain 1113 * media types for which we never need to turn promiscuous 1114 * mode on. 1115 */ 1116 if ((ill->ill_net_type == IRE_IF_RESOLVER) && 1117 !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) { 1118 mp = ill_create_dl(ill, DL_PROMISCON_REQ, 1119 sizeof (dl_promiscon_req_t), &addrlen, &addroff); 1120 if (mp == NULL) 1121 return (ENOMEM); 1122 putnext(ill->ill_wq, mp); 1123 } 1124 1125 mutex_enter(&ill->ill_lock); 1126 ill->ill_join_allmulti = B_TRUE; 1127 mutex_exit(&ill->ill_lock); 1128 return (0); 1129 } 1130 1131 /* 1132 * Make the driver stop passing up all multicast packets 1133 * 1134 * With ill groups, we need to nominate some other ill as 1135 * this ipif->ipif_ill is leaving the group. 1136 */ 1137 int 1138 ip_leave_allmulti(ipif_t *ipif) 1139 { 1140 ill_t *ill = ipif->ipif_ill; 1141 mblk_t *mp; 1142 uint32_t addrlen, addroff; 1143 1144 ASSERT(IAM_WRITER_IPIF(ipif)); 1145 1146 if (ill->ill_ipif_up_count == 0) { 1147 /* 1148 * Nobody there. All multicast addresses will be re-joined 1149 * when we get the DL_BIND_ACK bringing the interface up. 1150 */ 1151 return (0); 1152 } 1153 1154 ASSERT(ill->ill_join_allmulti); 1155 1156 /* 1157 * Create a DL_PROMISCOFF_REQ message and send it directly to 1158 * the DLPI provider. We don't need to do this for certain 1159 * media types for which we never need to turn promiscuous 1160 * mode on. 1161 */ 1162 if ((ill->ill_net_type == IRE_IF_RESOLVER) && 1163 !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) { 1164 mp = ill_create_dl(ill, DL_PROMISCOFF_REQ, 1165 sizeof (dl_promiscoff_req_t), &addrlen, &addroff); 1166 if (mp == NULL) 1167 return (ENOMEM); 1168 putnext(ill->ill_wq, mp); 1169 } 1170 1171 mutex_enter(&ill->ill_lock); 1172 ill->ill_join_allmulti = B_FALSE; 1173 mutex_exit(&ill->ill_lock); 1174 return (0); 1175 } 1176 1177 /* 1178 * Copy mp_orig and pass it in as a local message. 1179 */ 1180 void 1181 ip_multicast_loopback(queue_t *q, ill_t *ill, mblk_t *mp_orig, int fanout_flags, 1182 zoneid_t zoneid) 1183 { 1184 mblk_t *mp; 1185 mblk_t *ipsec_mp; 1186 ipha_t *iph; 1187 ip_stack_t *ipst = ill->ill_ipst; 1188 1189 if (DB_TYPE(mp_orig) == M_DATA && 1190 ((ipha_t *)mp_orig->b_rptr)->ipha_protocol == IPPROTO_UDP) { 1191 uint_t hdrsz; 1192 1193 hdrsz = IPH_HDR_LENGTH((ipha_t *)mp_orig->b_rptr) + 1194 sizeof (udpha_t); 1195 ASSERT(MBLKL(mp_orig) >= hdrsz); 1196 1197 if (((mp = allocb(hdrsz, BPRI_MED)) != NULL) && 1198 (mp_orig = dupmsg(mp_orig)) != NULL) { 1199 bcopy(mp_orig->b_rptr, mp->b_rptr, hdrsz); 1200 mp->b_wptr += hdrsz; 1201 mp->b_cont = mp_orig; 1202 mp_orig->b_rptr += hdrsz; 1203 if (MBLKL(mp_orig) == 0) { 1204 mp->b_cont = mp_orig->b_cont; 1205 mp_orig->b_cont = NULL; 1206 freeb(mp_orig); 1207 } 1208 } else if (mp != NULL) { 1209 freeb(mp); 1210 mp = NULL; 1211 } 1212 } else { 1213 mp = ip_copymsg(mp_orig); /* No refcnt on ipsec_out netstack */ 1214 } 1215 1216 if (mp == NULL) 1217 return; 1218 if (DB_TYPE(mp) == M_CTL) { 1219 ipsec_mp = mp; 1220 mp = mp->b_cont; 1221 } else { 1222 ipsec_mp = mp; 1223 } 1224 1225 iph = (ipha_t *)mp->b_rptr; 1226 1227 DTRACE_PROBE4(ip4__loopback__out__start, 1228 ill_t *, NULL, ill_t *, ill, 1229 ipha_t *, iph, mblk_t *, ipsec_mp); 1230 1231 FW_HOOKS(ipst->ips_ip4_loopback_out_event, 1232 ipst->ips_ipv4firewall_loopback_out, 1233 NULL, ill, iph, ipsec_mp, mp, ipst); 1234 1235 DTRACE_PROBE1(ip4__loopback__out__end, mblk_t *, ipsec_mp); 1236 1237 if (ipsec_mp != NULL) 1238 ip_wput_local(q, ill, iph, ipsec_mp, NULL, 1239 fanout_flags, zoneid); 1240 } 1241 1242 static area_t ip_aresq_template = { 1243 AR_ENTRY_SQUERY, /* cmd */ 1244 sizeof (area_t)+IP_ADDR_LEN, /* name offset */ 1245 sizeof (area_t), /* name len (filled by ill_arp_alloc) */ 1246 IP_ARP_PROTO_TYPE, /* protocol, from arps perspective */ 1247 sizeof (area_t), /* proto addr offset */ 1248 IP_ADDR_LEN, /* proto addr_length */ 1249 0, /* proto mask offset */ 1250 /* Rest is initialized when used */ 1251 0, /* flags */ 1252 0, /* hw addr offset */ 1253 0, /* hw addr length */ 1254 }; 1255 1256 static mblk_t * 1257 ill_create_squery(ill_t *ill, ipaddr_t ipaddr, uint32_t addrlen, 1258 uint32_t addroff, mblk_t *mp_tail) 1259 { 1260 mblk_t *mp; 1261 area_t *area; 1262 1263 mp = ill_arp_alloc(ill, (uchar_t *)&ip_aresq_template, 1264 (caddr_t)&ipaddr); 1265 if (!mp) { 1266 freemsg(mp_tail); 1267 return (NULL); 1268 } 1269 area = (area_t *)mp->b_rptr; 1270 area->area_hw_addr_length = addrlen; 1271 area->area_hw_addr_offset = mp->b_wptr - mp->b_rptr + addroff; 1272 /* 1273 * NOTE! 1274 * 1275 * The area_hw_addr_offset, as can be seen, does not hold the 1276 * actual hardware address offset. Rather, it holds the offset 1277 * to the hw addr in the dl_xxx_req in mp_tail, modified by 1278 * adding (mp->b_wptr - mp->b_rptr). This allows the function 1279 * mi_offset_paramc() to find the hardware address in the 1280 * *second* mblk (dl_xxx_req), not this mblk. 1281 * 1282 * Using mi_offset_paramc() is thus the *only* way to access 1283 * the dl_xxx_hw address. 1284 * 1285 * The squery hw address should *not* be accessed. 1286 * 1287 * See ar_entry_squery() in arp.c for an example of how all this works. 1288 */ 1289 1290 mp->b_cont = mp_tail; 1291 return (mp); 1292 } 1293 1294 /* 1295 * Create a dlpi message with room for phys+sap. When we come back in 1296 * ip_wput_ctl() we will strip the sap for those primitives which 1297 * only need a physical address. 1298 */ 1299 static mblk_t * 1300 ill_create_dl(ill_t *ill, uint32_t dl_primitive, uint32_t length, 1301 uint32_t *addr_lenp, uint32_t *addr_offp) 1302 { 1303 mblk_t *mp; 1304 uint32_t hw_addr_length; 1305 char *cp; 1306 uint32_t offset; 1307 uint32_t size; 1308 1309 *addr_lenp = *addr_offp = 0; 1310 1311 hw_addr_length = ill->ill_phys_addr_length; 1312 if (!hw_addr_length) { 1313 ip0dbg(("ip_create_dl: hw addr length = 0\n")); 1314 return (NULL); 1315 } 1316 1317 size = length; 1318 switch (dl_primitive) { 1319 case DL_ENABMULTI_REQ: 1320 case DL_DISABMULTI_REQ: 1321 size += hw_addr_length; 1322 break; 1323 case DL_PROMISCON_REQ: 1324 case DL_PROMISCOFF_REQ: 1325 break; 1326 default: 1327 return (NULL); 1328 } 1329 mp = allocb(size, BPRI_HI); 1330 if (!mp) 1331 return (NULL); 1332 mp->b_wptr += size; 1333 mp->b_datap->db_type = M_PROTO; 1334 1335 cp = (char *)mp->b_rptr; 1336 offset = length; 1337 1338 switch (dl_primitive) { 1339 case DL_ENABMULTI_REQ: { 1340 dl_enabmulti_req_t *dl = (dl_enabmulti_req_t *)cp; 1341 1342 dl->dl_primitive = dl_primitive; 1343 dl->dl_addr_offset = offset; 1344 *addr_lenp = dl->dl_addr_length = hw_addr_length; 1345 *addr_offp = offset; 1346 break; 1347 } 1348 case DL_DISABMULTI_REQ: { 1349 dl_disabmulti_req_t *dl = (dl_disabmulti_req_t *)cp; 1350 1351 dl->dl_primitive = dl_primitive; 1352 dl->dl_addr_offset = offset; 1353 *addr_lenp = dl->dl_addr_length = hw_addr_length; 1354 *addr_offp = offset; 1355 break; 1356 } 1357 case DL_PROMISCON_REQ: 1358 case DL_PROMISCOFF_REQ: { 1359 dl_promiscon_req_t *dl = (dl_promiscon_req_t *)cp; 1360 1361 dl->dl_primitive = dl_primitive; 1362 dl->dl_level = DL_PROMISC_MULTI; 1363 break; 1364 } 1365 } 1366 ip1dbg(("ill_create_dl: addr_len %d, addr_off %d\n", 1367 *addr_lenp, *addr_offp)); 1368 return (mp); 1369 } 1370 1371 void 1372 ip_wput_ctl(queue_t *q, mblk_t *mp_orig) 1373 { 1374 ill_t *ill = (ill_t *)q->q_ptr; 1375 mblk_t *mp = mp_orig; 1376 area_t *area; 1377 1378 /* Check that we have a AR_ENTRY_SQUERY with a tacked on mblk */ 1379 if ((mp->b_wptr - mp->b_rptr) < sizeof (area_t) || 1380 mp->b_cont == NULL) { 1381 putnext(q, mp); 1382 return; 1383 } 1384 area = (area_t *)mp->b_rptr; 1385 if (area->area_cmd != AR_ENTRY_SQUERY) { 1386 putnext(q, mp); 1387 return; 1388 } 1389 mp = mp->b_cont; 1390 /* 1391 * Update dl_addr_length and dl_addr_offset for primitives that 1392 * have physical addresses as opposed to full saps 1393 */ 1394 switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) { 1395 case DL_ENABMULTI_REQ: 1396 /* Track the state if this is the first enabmulti */ 1397 if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN) 1398 ill->ill_dlpi_multicast_state = IDS_INPROGRESS; 1399 ip1dbg(("ip_wput_ctl: ENABMULTI\n")); 1400 break; 1401 case DL_DISABMULTI_REQ: 1402 ip1dbg(("ip_wput_ctl: DISABMULTI\n")); 1403 break; 1404 default: 1405 ip1dbg(("ip_wput_ctl: default\n")); 1406 break; 1407 } 1408 freeb(mp_orig); 1409 putnext(q, mp); 1410 } 1411 1412 /* 1413 * Rejoin any groups which have been explicitly joined by the application (we 1414 * left all explicitly joined groups as part of ill_leave_multicast() prior to 1415 * bringing the interface down). Note that because groups can be joined and 1416 * left while an interface is down, this may not be the same set of groups 1417 * that we left in ill_leave_multicast(). 1418 */ 1419 void 1420 ill_recover_multicast(ill_t *ill) 1421 { 1422 ilm_t *ilm; 1423 char addrbuf[INET6_ADDRSTRLEN]; 1424 1425 ASSERT(IAM_WRITER_ILL(ill)); 1426 1427 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1428 /* 1429 * Check how many ipif's that have members in this group - 1430 * if more then one we make sure that this entry is first 1431 * in the list. 1432 */ 1433 if (ilm_numentries_v6(ill, &ilm->ilm_v6addr) > 1 && 1434 ilm_lookup_ill_v6(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm) 1435 continue; 1436 ip1dbg(("ill_recover_multicast: %s\n", 1437 inet_ntop(AF_INET6, &ilm->ilm_v6addr, addrbuf, 1438 sizeof (addrbuf)))); 1439 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) { 1440 if (ill->ill_group == NULL) { 1441 (void) ip_join_allmulti(ill->ill_ipif); 1442 } else { 1443 /* 1444 * We don't want to join on this ill, 1445 * if somebody else in the group has 1446 * already been nominated. 1447 */ 1448 (void) ill_nominate_mcast_rcv(ill->ill_group); 1449 } 1450 } else { 1451 (void) ip_ll_addmulti_v6(ill->ill_ipif, 1452 &ilm->ilm_v6addr); 1453 } 1454 } 1455 } 1456 1457 /* 1458 * The opposite of ill_recover_multicast() -- leaves all multicast groups 1459 * that were explicitly joined. Note that both these functions could be 1460 * disposed of if we enhanced ARP to allow us to handle DL_DISABMULTI_REQ 1461 * and DL_ENABMULTI_REQ messages when an interface is down. 1462 */ 1463 void 1464 ill_leave_multicast(ill_t *ill) 1465 { 1466 ilm_t *ilm; 1467 char addrbuf[INET6_ADDRSTRLEN]; 1468 1469 ASSERT(IAM_WRITER_ILL(ill)); 1470 1471 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1472 /* 1473 * Check how many ipif's that have members in this group - 1474 * if more then one we make sure that this entry is first 1475 * in the list. 1476 */ 1477 if (ilm_numentries_v6(ill, &ilm->ilm_v6addr) > 1 && 1478 ilm_lookup_ill_v6(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm) 1479 continue; 1480 ip1dbg(("ill_leave_multicast: %s\n", 1481 inet_ntop(AF_INET6, &ilm->ilm_v6addr, addrbuf, 1482 sizeof (addrbuf)))); 1483 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) { 1484 (void) ip_leave_allmulti(ill->ill_ipif); 1485 /* 1486 * If we were part of an IPMP group, then 1487 * ill_handoff_responsibility() has already 1488 * nominated a new member (so we don't). 1489 */ 1490 ASSERT(ill->ill_group == NULL); 1491 } else { 1492 (void) ip_ll_send_disabmulti_req(ill, &ilm->ilm_v6addr); 1493 } 1494 } 1495 } 1496 1497 /* 1498 * Find an ilm for matching the ill and which has the source in its 1499 * INCLUDE list or does not have it in its EXCLUDE list 1500 */ 1501 ilm_t * 1502 ilm_lookup_ill_withsrc(ill_t *ill, ipaddr_t group, ipaddr_t src) 1503 { 1504 in6_addr_t v6group, v6src; 1505 1506 /* 1507 * INADDR_ANY is represented as the IPv6 unspecified addr. 1508 */ 1509 if (group == INADDR_ANY) 1510 v6group = ipv6_all_zeros; 1511 else 1512 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 1513 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 1514 1515 return (ilm_lookup_ill_withsrc_v6(ill, &v6group, &v6src)); 1516 } 1517 1518 ilm_t * 1519 ilm_lookup_ill_withsrc_v6(ill_t *ill, const in6_addr_t *v6group, 1520 const in6_addr_t *v6src) 1521 { 1522 ilm_t *ilm; 1523 boolean_t isinlist; 1524 int i, numsrc; 1525 1526 /* 1527 * If the source is in any ilm's INCLUDE list, or if 1528 * it is not in any ilm's EXCLUDE list, we have a hit. 1529 */ 1530 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1531 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group)) { 1532 1533 isinlist = B_FALSE; 1534 numsrc = (ilm->ilm_filter == NULL) ? 1535 0 : ilm->ilm_filter->sl_numsrc; 1536 for (i = 0; i < numsrc; i++) { 1537 if (IN6_ARE_ADDR_EQUAL(v6src, 1538 &ilm->ilm_filter->sl_addr[i])) { 1539 isinlist = B_TRUE; 1540 break; 1541 } 1542 } 1543 if ((isinlist && ilm->ilm_fmode == MODE_IS_INCLUDE) || 1544 (!isinlist && ilm->ilm_fmode == MODE_IS_EXCLUDE)) 1545 return (ilm); 1546 else 1547 return (NULL); 1548 } 1549 } 1550 return (NULL); 1551 } 1552 1553 1554 /* Find an ilm for matching the ill */ 1555 ilm_t * 1556 ilm_lookup_ill(ill_t *ill, ipaddr_t group, zoneid_t zoneid) 1557 { 1558 in6_addr_t v6group; 1559 1560 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) || 1561 IAM_WRITER_ILL(ill)); 1562 /* 1563 * INADDR_ANY is represented as the IPv6 unspecifed addr. 1564 */ 1565 if (group == INADDR_ANY) 1566 v6group = ipv6_all_zeros; 1567 else 1568 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 1569 1570 return (ilm_lookup_ill_v6(ill, &v6group, zoneid)); 1571 } 1572 1573 /* 1574 * Find an ilm for matching the ill. All the ilm lookup functions 1575 * ignore ILM_DELETED ilms. These have been logically deleted, and 1576 * igmp and linklayer disable multicast have been done. Only mi_free 1577 * yet to be done. Still there in the list due to ilm_walkers. The 1578 * last walker will release it. 1579 */ 1580 ilm_t * 1581 ilm_lookup_ill_v6(ill_t *ill, const in6_addr_t *v6group, zoneid_t zoneid) 1582 { 1583 ilm_t *ilm; 1584 1585 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) || 1586 IAM_WRITER_ILL(ill)); 1587 1588 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1589 if (ilm->ilm_flags & ILM_DELETED) 1590 continue; 1591 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) && 1592 (zoneid == ALL_ZONES || zoneid == ilm->ilm_zoneid)) 1593 return (ilm); 1594 } 1595 return (NULL); 1596 } 1597 1598 ilm_t * 1599 ilm_lookup_ill_index_v6(ill_t *ill, const in6_addr_t *v6group, int index, 1600 zoneid_t zoneid) 1601 { 1602 ilm_t *ilm; 1603 1604 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) || 1605 IAM_WRITER_ILL(ill)); 1606 1607 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1608 if (ilm->ilm_flags & ILM_DELETED) 1609 continue; 1610 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) && 1611 (zoneid == ALL_ZONES || zoneid == ilm->ilm_zoneid) && 1612 ilm->ilm_orig_ifindex == index) { 1613 return (ilm); 1614 } 1615 } 1616 return (NULL); 1617 } 1618 1619 ilm_t * 1620 ilm_lookup_ill_index_v4(ill_t *ill, ipaddr_t group, int index, zoneid_t zoneid) 1621 { 1622 in6_addr_t v6group; 1623 1624 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) || 1625 IAM_WRITER_ILL(ill)); 1626 /* 1627 * INADDR_ANY is represented as the IPv6 unspecifed addr. 1628 */ 1629 if (group == INADDR_ANY) 1630 v6group = ipv6_all_zeros; 1631 else 1632 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 1633 1634 return (ilm_lookup_ill_index_v6(ill, &v6group, index, zoneid)); 1635 } 1636 1637 /* 1638 * Found an ilm for the ipif. Only needed for IPv4 which does 1639 * ipif specific socket options. 1640 */ 1641 ilm_t * 1642 ilm_lookup_ipif(ipif_t *ipif, ipaddr_t group) 1643 { 1644 ill_t *ill = ipif->ipif_ill; 1645 ilm_t *ilm; 1646 in6_addr_t v6group; 1647 1648 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) || 1649 IAM_WRITER_ILL(ill)); 1650 1651 /* 1652 * INADDR_ANY is represented as the IPv6 unspecifed addr. 1653 */ 1654 if (group == INADDR_ANY) 1655 v6group = ipv6_all_zeros; 1656 else 1657 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 1658 1659 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1660 if (ilm->ilm_flags & ILM_DELETED) 1661 continue; 1662 if (ilm->ilm_ipif == ipif && 1663 IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, &v6group)) 1664 return (ilm); 1665 } 1666 return (NULL); 1667 } 1668 1669 /* 1670 * How many members on this ill? 1671 */ 1672 int 1673 ilm_numentries_v6(ill_t *ill, const in6_addr_t *v6group) 1674 { 1675 ilm_t *ilm; 1676 int i = 0; 1677 1678 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) || 1679 IAM_WRITER_ILL(ill)); 1680 1681 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1682 if (ilm->ilm_flags & ILM_DELETED) 1683 continue; 1684 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group)) { 1685 i++; 1686 } 1687 } 1688 return (i); 1689 } 1690 1691 /* Caller guarantees that the group is not already on the list */ 1692 static ilm_t * 1693 ilm_add_v6(ipif_t *ipif, const in6_addr_t *v6group, ilg_stat_t ilgstat, 1694 mcast_record_t ilg_fmode, slist_t *ilg_flist, int orig_ifindex, 1695 zoneid_t zoneid) 1696 { 1697 ill_t *ill = ipif->ipif_ill; 1698 ilm_t *ilm; 1699 ilm_t *ilm_cur; 1700 ilm_t **ilm_ptpn; 1701 1702 ASSERT(IAM_WRITER_IPIF(ipif)); 1703 1704 ilm = GETSTRUCT(ilm_t, 1); 1705 if (ilm == NULL) 1706 return (NULL); 1707 if (ilgstat != ILGSTAT_NONE && !SLIST_IS_EMPTY(ilg_flist)) { 1708 ilm->ilm_filter = l_alloc(); 1709 if (ilm->ilm_filter == NULL) { 1710 mi_free(ilm); 1711 return (NULL); 1712 } 1713 } 1714 ilm->ilm_v6addr = *v6group; 1715 ilm->ilm_refcnt = 1; 1716 ilm->ilm_zoneid = zoneid; 1717 ilm->ilm_timer = INFINITY; 1718 ilm->ilm_rtx.rtx_timer = INFINITY; 1719 1720 /* 1721 * IPv4 Multicast groups are joined using ipif. 1722 * IPv6 Multicast groups are joined using ill. 1723 */ 1724 if (ill->ill_isv6) { 1725 ilm->ilm_ill = ill; 1726 ilm->ilm_ipif = NULL; 1727 } else { 1728 ASSERT(ilm->ilm_zoneid == ipif->ipif_zoneid); 1729 ilm->ilm_ipif = ipif; 1730 ilm->ilm_ill = NULL; 1731 } 1732 ASSERT(ill->ill_ipst); 1733 ilm->ilm_ipst = ill->ill_ipst; /* No netstack_hold */ 1734 1735 /* 1736 * After this if ilm moves to a new ill, we don't change 1737 * the ilm_orig_ifindex. Thus, if ill_index != ilm_orig_ifindex, 1738 * it has been moved. Indexes don't match even when the application 1739 * wants to join on a FAILED/INACTIVE interface because we choose 1740 * a new interface to join in. This is considered as an implicit 1741 * move. 1742 */ 1743 ilm->ilm_orig_ifindex = orig_ifindex; 1744 1745 ASSERT(!(ipif->ipif_state_flags & IPIF_CONDEMNED)); 1746 ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED)); 1747 1748 /* 1749 * Grab lock to give consistent view to readers 1750 */ 1751 mutex_enter(&ill->ill_lock); 1752 /* 1753 * All ilms in the same zone are contiguous in the ill_ilm list. 1754 * The loops in ip_proto_input() and ip_wput_local() use this to avoid 1755 * sending duplicates up when two applications in the same zone join the 1756 * same group on different logical interfaces. 1757 */ 1758 ilm_cur = ill->ill_ilm; 1759 ilm_ptpn = &ill->ill_ilm; 1760 while (ilm_cur != NULL && ilm_cur->ilm_zoneid != ilm->ilm_zoneid) { 1761 ilm_ptpn = &ilm_cur->ilm_next; 1762 ilm_cur = ilm_cur->ilm_next; 1763 } 1764 ilm->ilm_next = ilm_cur; 1765 *ilm_ptpn = ilm; 1766 1767 /* 1768 * If we have an associated ilg, use its filter state; if not, 1769 * default to (EXCLUDE, NULL) and set no_ilg_cnt to track this. 1770 */ 1771 if (ilgstat != ILGSTAT_NONE) { 1772 if (!SLIST_IS_EMPTY(ilg_flist)) 1773 l_copy(ilg_flist, ilm->ilm_filter); 1774 ilm->ilm_fmode = ilg_fmode; 1775 } else { 1776 ilm->ilm_no_ilg_cnt = 1; 1777 ilm->ilm_fmode = MODE_IS_EXCLUDE; 1778 } 1779 1780 mutex_exit(&ill->ill_lock); 1781 return (ilm); 1782 } 1783 1784 void 1785 ilm_walker_cleanup(ill_t *ill) 1786 { 1787 ilm_t **ilmp; 1788 ilm_t *ilm; 1789 1790 ASSERT(MUTEX_HELD(&ill->ill_lock)); 1791 ASSERT(ill->ill_ilm_walker_cnt == 0); 1792 1793 ilmp = &ill->ill_ilm; 1794 while (*ilmp != NULL) { 1795 if ((*ilmp)->ilm_flags & ILM_DELETED) { 1796 ilm = *ilmp; 1797 *ilmp = ilm->ilm_next; 1798 FREE_SLIST(ilm->ilm_filter); 1799 FREE_SLIST(ilm->ilm_pendsrcs); 1800 FREE_SLIST(ilm->ilm_rtx.rtx_allow); 1801 FREE_SLIST(ilm->ilm_rtx.rtx_block); 1802 ilm->ilm_ipst = NULL; 1803 mi_free((char *)ilm); 1804 } else { 1805 ilmp = &(*ilmp)->ilm_next; 1806 } 1807 } 1808 ill->ill_ilm_cleanup_reqd = 0; 1809 } 1810 1811 /* 1812 * Unlink ilm and free it. 1813 */ 1814 static void 1815 ilm_delete(ilm_t *ilm) 1816 { 1817 ill_t *ill; 1818 ilm_t **ilmp; 1819 1820 if (ilm->ilm_ipif != NULL) { 1821 ASSERT(IAM_WRITER_IPIF(ilm->ilm_ipif)); 1822 ASSERT(ilm->ilm_ill == NULL); 1823 ill = ilm->ilm_ipif->ipif_ill; 1824 ASSERT(!ill->ill_isv6); 1825 } else { 1826 ASSERT(IAM_WRITER_ILL(ilm->ilm_ill)); 1827 ASSERT(ilm->ilm_ipif == NULL); 1828 ill = ilm->ilm_ill; 1829 ASSERT(ill->ill_isv6); 1830 } 1831 /* 1832 * Delete under lock protection so that readers don't stumble 1833 * on bad ilm_next 1834 */ 1835 mutex_enter(&ill->ill_lock); 1836 if (ill->ill_ilm_walker_cnt != 0) { 1837 ilm->ilm_flags |= ILM_DELETED; 1838 ill->ill_ilm_cleanup_reqd = 1; 1839 mutex_exit(&ill->ill_lock); 1840 return; 1841 } 1842 1843 for (ilmp = &ill->ill_ilm; *ilmp != ilm; ilmp = &(*ilmp)->ilm_next) 1844 ; 1845 *ilmp = ilm->ilm_next; 1846 mutex_exit(&ill->ill_lock); 1847 1848 FREE_SLIST(ilm->ilm_filter); 1849 FREE_SLIST(ilm->ilm_pendsrcs); 1850 FREE_SLIST(ilm->ilm_rtx.rtx_allow); 1851 FREE_SLIST(ilm->ilm_rtx.rtx_block); 1852 ilm->ilm_ipst = NULL; 1853 mi_free((char *)ilm); 1854 } 1855 1856 /* Free all ilms for this ipif */ 1857 void 1858 ilm_free(ipif_t *ipif) 1859 { 1860 ill_t *ill = ipif->ipif_ill; 1861 ilm_t *ilm; 1862 ilm_t *next_ilm; 1863 1864 ASSERT(IAM_WRITER_IPIF(ipif)); 1865 1866 for (ilm = ill->ill_ilm; ilm; ilm = next_ilm) { 1867 next_ilm = ilm->ilm_next; 1868 if (ilm->ilm_ipif == ipif) 1869 ilm_delete(ilm); 1870 } 1871 } 1872 1873 /* 1874 * Looks up the appropriate ipif given a v4 multicast group and interface 1875 * address. On success, returns 0, with *ipifpp pointing to the found 1876 * struct. On failure, returns an errno and *ipifpp is NULL. 1877 */ 1878 int 1879 ip_opt_check(conn_t *connp, ipaddr_t group, ipaddr_t src, ipaddr_t ifaddr, 1880 uint_t *ifindexp, mblk_t *first_mp, ipsq_func_t func, ipif_t **ipifpp) 1881 { 1882 ipif_t *ipif; 1883 int err = 0; 1884 zoneid_t zoneid; 1885 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1886 1887 if (!CLASSD(group) || CLASSD(src)) { 1888 return (EINVAL); 1889 } 1890 *ipifpp = NULL; 1891 1892 zoneid = IPCL_ZONEID(connp); 1893 1894 ASSERT(!(ifaddr != INADDR_ANY && ifindexp != NULL && *ifindexp != 0)); 1895 if (ifaddr != INADDR_ANY) { 1896 ipif = ipif_lookup_addr(ifaddr, NULL, zoneid, 1897 CONNP_TO_WQ(connp), first_mp, func, &err, ipst); 1898 if (err != 0 && err != EINPROGRESS) 1899 err = EADDRNOTAVAIL; 1900 } else if (ifindexp != NULL && *ifindexp != 0) { 1901 ipif = ipif_lookup_on_ifindex(*ifindexp, B_FALSE, zoneid, 1902 CONNP_TO_WQ(connp), first_mp, func, &err, ipst); 1903 } else { 1904 ipif = ipif_lookup_group(group, zoneid, ipst); 1905 if (ipif == NULL) 1906 return (EADDRNOTAVAIL); 1907 } 1908 if (ipif == NULL) 1909 return (err); 1910 1911 *ipifpp = ipif; 1912 return (0); 1913 } 1914 1915 /* 1916 * Looks up the appropriate ill (or ipif if v4mapped) given an interface 1917 * index and IPv6 multicast group. On success, returns 0, with *illpp (or 1918 * *ipifpp if v4mapped) pointing to the found struct. On failure, returns 1919 * an errno and *illpp and *ipifpp are undefined. 1920 */ 1921 int 1922 ip_opt_check_v6(conn_t *connp, const in6_addr_t *v6group, ipaddr_t *v4group, 1923 const in6_addr_t *v6src, ipaddr_t *v4src, boolean_t *isv6, int ifindex, 1924 mblk_t *first_mp, ipsq_func_t func, ill_t **illpp, ipif_t **ipifpp) 1925 { 1926 boolean_t src_unspec; 1927 ill_t *ill = NULL; 1928 ipif_t *ipif = NULL; 1929 int err; 1930 zoneid_t zoneid = connp->conn_zoneid; 1931 queue_t *wq = CONNP_TO_WQ(connp); 1932 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1933 1934 src_unspec = IN6_IS_ADDR_UNSPECIFIED(v6src); 1935 1936 if (IN6_IS_ADDR_V4MAPPED(v6group)) { 1937 if (!IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec) 1938 return (EINVAL); 1939 IN6_V4MAPPED_TO_IPADDR(v6group, *v4group); 1940 if (src_unspec) { 1941 *v4src = INADDR_ANY; 1942 } else { 1943 IN6_V4MAPPED_TO_IPADDR(v6src, *v4src); 1944 } 1945 if (!CLASSD(*v4group) || CLASSD(*v4src)) 1946 return (EINVAL); 1947 *ipifpp = NULL; 1948 *isv6 = B_FALSE; 1949 } else { 1950 if (IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec) 1951 return (EINVAL); 1952 if (!IN6_IS_ADDR_MULTICAST(v6group) || 1953 IN6_IS_ADDR_MULTICAST(v6src)) { 1954 return (EINVAL); 1955 } 1956 *illpp = NULL; 1957 *isv6 = B_TRUE; 1958 } 1959 1960 if (ifindex == 0) { 1961 if (*isv6) 1962 ill = ill_lookup_group_v6(v6group, zoneid, ipst); 1963 else 1964 ipif = ipif_lookup_group(*v4group, zoneid, ipst); 1965 if (ill == NULL && ipif == NULL) 1966 return (EADDRNOTAVAIL); 1967 } else { 1968 if (*isv6) { 1969 ill = ill_lookup_on_ifindex(ifindex, B_TRUE, 1970 wq, first_mp, func, &err, ipst); 1971 if (ill != NULL && 1972 !ipif_lookup_zoneid(ill, zoneid, 0, NULL)) { 1973 ill_refrele(ill); 1974 ill = NULL; 1975 err = EADDRNOTAVAIL; 1976 } 1977 } else { 1978 ipif = ipif_lookup_on_ifindex(ifindex, B_FALSE, 1979 zoneid, wq, first_mp, func, &err, ipst); 1980 } 1981 if (ill == NULL && ipif == NULL) 1982 return (err); 1983 } 1984 1985 *ipifpp = ipif; 1986 *illpp = ill; 1987 return (0); 1988 } 1989 1990 static int 1991 ip_get_srcfilter(conn_t *connp, struct group_filter *gf, 1992 struct ip_msfilter *imsf, ipaddr_t grp, ipif_t *ipif, boolean_t isv4mapped) 1993 { 1994 ilg_t *ilg; 1995 int i, numsrc, fmode, outsrcs; 1996 struct sockaddr_in *sin; 1997 struct sockaddr_in6 *sin6; 1998 struct in_addr *addrp; 1999 slist_t *fp; 2000 boolean_t is_v4only_api; 2001 2002 mutex_enter(&connp->conn_lock); 2003 2004 ilg = ilg_lookup_ipif(connp, grp, ipif); 2005 if (ilg == NULL) { 2006 mutex_exit(&connp->conn_lock); 2007 return (EADDRNOTAVAIL); 2008 } 2009 2010 if (gf == NULL) { 2011 ASSERT(imsf != NULL); 2012 ASSERT(!isv4mapped); 2013 is_v4only_api = B_TRUE; 2014 outsrcs = imsf->imsf_numsrc; 2015 } else { 2016 ASSERT(imsf == NULL); 2017 is_v4only_api = B_FALSE; 2018 outsrcs = gf->gf_numsrc; 2019 } 2020 2021 /* 2022 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 2023 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 2024 * So we need to translate here. 2025 */ 2026 fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ? 2027 MCAST_INCLUDE : MCAST_EXCLUDE; 2028 if ((fp = ilg->ilg_filter) == NULL) { 2029 numsrc = 0; 2030 } else { 2031 for (i = 0; i < outsrcs; i++) { 2032 if (i == fp->sl_numsrc) 2033 break; 2034 if (isv4mapped) { 2035 sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i]; 2036 sin6->sin6_family = AF_INET6; 2037 sin6->sin6_addr = fp->sl_addr[i]; 2038 } else { 2039 if (is_v4only_api) { 2040 addrp = &imsf->imsf_slist[i]; 2041 } else { 2042 sin = (struct sockaddr_in *) 2043 &gf->gf_slist[i]; 2044 sin->sin_family = AF_INET; 2045 addrp = &sin->sin_addr; 2046 } 2047 IN6_V4MAPPED_TO_INADDR(&fp->sl_addr[i], addrp); 2048 } 2049 } 2050 numsrc = fp->sl_numsrc; 2051 } 2052 2053 if (is_v4only_api) { 2054 imsf->imsf_numsrc = numsrc; 2055 imsf->imsf_fmode = fmode; 2056 } else { 2057 gf->gf_numsrc = numsrc; 2058 gf->gf_fmode = fmode; 2059 } 2060 2061 mutex_exit(&connp->conn_lock); 2062 2063 return (0); 2064 } 2065 2066 static int 2067 ip_get_srcfilter_v6(conn_t *connp, struct group_filter *gf, 2068 const struct in6_addr *grp, ill_t *ill) 2069 { 2070 ilg_t *ilg; 2071 int i; 2072 struct sockaddr_storage *sl; 2073 struct sockaddr_in6 *sin6; 2074 slist_t *fp; 2075 2076 mutex_enter(&connp->conn_lock); 2077 2078 ilg = ilg_lookup_ill_v6(connp, grp, ill); 2079 if (ilg == NULL) { 2080 mutex_exit(&connp->conn_lock); 2081 return (EADDRNOTAVAIL); 2082 } 2083 2084 /* 2085 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 2086 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 2087 * So we need to translate here. 2088 */ 2089 gf->gf_fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ? 2090 MCAST_INCLUDE : MCAST_EXCLUDE; 2091 if ((fp = ilg->ilg_filter) == NULL) { 2092 gf->gf_numsrc = 0; 2093 } else { 2094 for (i = 0, sl = gf->gf_slist; i < gf->gf_numsrc; i++, sl++) { 2095 if (i == fp->sl_numsrc) 2096 break; 2097 sin6 = (struct sockaddr_in6 *)sl; 2098 sin6->sin6_family = AF_INET6; 2099 sin6->sin6_addr = fp->sl_addr[i]; 2100 } 2101 gf->gf_numsrc = fp->sl_numsrc; 2102 } 2103 2104 mutex_exit(&connp->conn_lock); 2105 2106 return (0); 2107 } 2108 2109 static int 2110 ip_set_srcfilter(conn_t *connp, struct group_filter *gf, 2111 struct ip_msfilter *imsf, ipaddr_t grp, ipif_t *ipif, boolean_t isv4mapped) 2112 { 2113 ilg_t *ilg; 2114 int i, err, insrcs, infmode, new_fmode; 2115 struct sockaddr_in *sin; 2116 struct sockaddr_in6 *sin6; 2117 struct in_addr *addrp; 2118 slist_t *orig_filter = NULL; 2119 slist_t *new_filter = NULL; 2120 mcast_record_t orig_fmode; 2121 boolean_t leave_grp, is_v4only_api; 2122 ilg_stat_t ilgstat; 2123 2124 if (gf == NULL) { 2125 ASSERT(imsf != NULL); 2126 ASSERT(!isv4mapped); 2127 is_v4only_api = B_TRUE; 2128 insrcs = imsf->imsf_numsrc; 2129 infmode = imsf->imsf_fmode; 2130 } else { 2131 ASSERT(imsf == NULL); 2132 is_v4only_api = B_FALSE; 2133 insrcs = gf->gf_numsrc; 2134 infmode = gf->gf_fmode; 2135 } 2136 2137 /* Make sure we can handle the source list */ 2138 if (insrcs > MAX_FILTER_SIZE) 2139 return (ENOBUFS); 2140 2141 /* 2142 * setting the filter to (INCLUDE, NULL) is treated 2143 * as a request to leave the group. 2144 */ 2145 leave_grp = (infmode == MCAST_INCLUDE && insrcs == 0); 2146 2147 ASSERT(IAM_WRITER_IPIF(ipif)); 2148 2149 mutex_enter(&connp->conn_lock); 2150 2151 ilg = ilg_lookup_ipif(connp, grp, ipif); 2152 if (ilg == NULL) { 2153 /* 2154 * if the request was actually to leave, and we 2155 * didn't find an ilg, there's nothing to do. 2156 */ 2157 if (!leave_grp) 2158 ilg = conn_ilg_alloc(connp); 2159 if (leave_grp || ilg == NULL) { 2160 mutex_exit(&connp->conn_lock); 2161 return (leave_grp ? 0 : ENOMEM); 2162 } 2163 ilgstat = ILGSTAT_NEW; 2164 IN6_IPADDR_TO_V4MAPPED(grp, &ilg->ilg_v6group); 2165 ilg->ilg_ipif = ipif; 2166 ilg->ilg_ill = NULL; 2167 ilg->ilg_orig_ifindex = 0; 2168 } else if (leave_grp) { 2169 ilg_delete(connp, ilg, NULL); 2170 mutex_exit(&connp->conn_lock); 2171 (void) ip_delmulti(grp, ipif, B_FALSE, B_TRUE); 2172 return (0); 2173 } else { 2174 ilgstat = ILGSTAT_CHANGE; 2175 /* Preserve existing state in case ip_addmulti() fails */ 2176 orig_fmode = ilg->ilg_fmode; 2177 if (ilg->ilg_filter == NULL) { 2178 orig_filter = NULL; 2179 } else { 2180 orig_filter = l_alloc_copy(ilg->ilg_filter); 2181 if (orig_filter == NULL) { 2182 mutex_exit(&connp->conn_lock); 2183 return (ENOMEM); 2184 } 2185 } 2186 } 2187 2188 /* 2189 * Alloc buffer to copy new state into (see below) before 2190 * we make any changes, so we can bail if it fails. 2191 */ 2192 if ((new_filter = l_alloc()) == NULL) { 2193 mutex_exit(&connp->conn_lock); 2194 err = ENOMEM; 2195 goto free_and_exit; 2196 } 2197 2198 if (insrcs == 0) { 2199 CLEAR_SLIST(ilg->ilg_filter); 2200 } else { 2201 slist_t *fp; 2202 if (ilg->ilg_filter == NULL) { 2203 fp = l_alloc(); 2204 if (fp == NULL) { 2205 if (ilgstat == ILGSTAT_NEW) 2206 ilg_delete(connp, ilg, NULL); 2207 mutex_exit(&connp->conn_lock); 2208 err = ENOMEM; 2209 goto free_and_exit; 2210 } 2211 } else { 2212 fp = ilg->ilg_filter; 2213 } 2214 for (i = 0; i < insrcs; i++) { 2215 if (isv4mapped) { 2216 sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i]; 2217 fp->sl_addr[i] = sin6->sin6_addr; 2218 } else { 2219 if (is_v4only_api) { 2220 addrp = &imsf->imsf_slist[i]; 2221 } else { 2222 sin = (struct sockaddr_in *) 2223 &gf->gf_slist[i]; 2224 addrp = &sin->sin_addr; 2225 } 2226 IN6_INADDR_TO_V4MAPPED(addrp, &fp->sl_addr[i]); 2227 } 2228 } 2229 fp->sl_numsrc = insrcs; 2230 ilg->ilg_filter = fp; 2231 } 2232 /* 2233 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 2234 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 2235 * So we need to translate here. 2236 */ 2237 ilg->ilg_fmode = (infmode == MCAST_INCLUDE) ? 2238 MODE_IS_INCLUDE : MODE_IS_EXCLUDE; 2239 2240 /* 2241 * Save copy of ilg's filter state to pass to other functions, 2242 * so we can release conn_lock now. 2243 */ 2244 new_fmode = ilg->ilg_fmode; 2245 l_copy(ilg->ilg_filter, new_filter); 2246 2247 mutex_exit(&connp->conn_lock); 2248 2249 err = ip_addmulti(grp, ipif, ilgstat, new_fmode, new_filter); 2250 if (err != 0) { 2251 /* 2252 * Restore the original filter state, or delete the 2253 * newly-created ilg. We need to look up the ilg 2254 * again, though, since we've not been holding the 2255 * conn_lock. 2256 */ 2257 mutex_enter(&connp->conn_lock); 2258 ilg = ilg_lookup_ipif(connp, grp, ipif); 2259 ASSERT(ilg != NULL); 2260 if (ilgstat == ILGSTAT_NEW) { 2261 ilg_delete(connp, ilg, NULL); 2262 } else { 2263 ilg->ilg_fmode = orig_fmode; 2264 if (SLIST_IS_EMPTY(orig_filter)) { 2265 CLEAR_SLIST(ilg->ilg_filter); 2266 } else { 2267 /* 2268 * We didn't free the filter, even if we 2269 * were trying to make the source list empty; 2270 * so if orig_filter isn't empty, the ilg 2271 * must still have a filter alloc'd. 2272 */ 2273 l_copy(orig_filter, ilg->ilg_filter); 2274 } 2275 } 2276 mutex_exit(&connp->conn_lock); 2277 } 2278 2279 free_and_exit: 2280 l_free(orig_filter); 2281 l_free(new_filter); 2282 2283 return (err); 2284 } 2285 2286 static int 2287 ip_set_srcfilter_v6(conn_t *connp, struct group_filter *gf, 2288 const struct in6_addr *grp, ill_t *ill) 2289 { 2290 ilg_t *ilg; 2291 int i, orig_ifindex, orig_fmode, new_fmode, err; 2292 slist_t *orig_filter = NULL; 2293 slist_t *new_filter = NULL; 2294 struct sockaddr_storage *sl; 2295 struct sockaddr_in6 *sin6; 2296 boolean_t leave_grp; 2297 ilg_stat_t ilgstat; 2298 2299 /* Make sure we can handle the source list */ 2300 if (gf->gf_numsrc > MAX_FILTER_SIZE) 2301 return (ENOBUFS); 2302 2303 /* 2304 * setting the filter to (INCLUDE, NULL) is treated 2305 * as a request to leave the group. 2306 */ 2307 leave_grp = (gf->gf_fmode == MCAST_INCLUDE && gf->gf_numsrc == 0); 2308 2309 ASSERT(IAM_WRITER_ILL(ill)); 2310 2311 /* 2312 * Use the ifindex to do the lookup. We can't use the ill 2313 * directly because ilg_ill could point to a different ill 2314 * if things have moved. 2315 */ 2316 orig_ifindex = ill->ill_phyint->phyint_ifindex; 2317 2318 mutex_enter(&connp->conn_lock); 2319 ilg = ilg_lookup_ill_index_v6(connp, grp, orig_ifindex); 2320 if (ilg == NULL) { 2321 /* 2322 * if the request was actually to leave, and we 2323 * didn't find an ilg, there's nothing to do. 2324 */ 2325 if (!leave_grp) 2326 ilg = conn_ilg_alloc(connp); 2327 if (leave_grp || ilg == NULL) { 2328 mutex_exit(&connp->conn_lock); 2329 return (leave_grp ? 0 : ENOMEM); 2330 } 2331 ilgstat = ILGSTAT_NEW; 2332 ilg->ilg_v6group = *grp; 2333 ilg->ilg_ipif = NULL; 2334 /* 2335 * Choose our target ill to join on. This might be 2336 * different from the ill we've been given if it's 2337 * currently down and part of a group. 2338 * 2339 * new ill is not refheld; we are writer. 2340 */ 2341 ill = ip_choose_multi_ill(ill, grp); 2342 ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED)); 2343 ilg->ilg_ill = ill; 2344 /* 2345 * Remember the index that we joined on, so that we can 2346 * successfully delete them later on and also search for 2347 * duplicates if the application wants to join again. 2348 */ 2349 ilg->ilg_orig_ifindex = orig_ifindex; 2350 } else if (leave_grp) { 2351 /* 2352 * Use the ilg's current ill for the deletion, 2353 * we might have failed over. 2354 */ 2355 ill = ilg->ilg_ill; 2356 ilg_delete(connp, ilg, NULL); 2357 mutex_exit(&connp->conn_lock); 2358 (void) ip_delmulti_v6(grp, ill, orig_ifindex, 2359 connp->conn_zoneid, B_FALSE, B_TRUE); 2360 return (0); 2361 } else { 2362 ilgstat = ILGSTAT_CHANGE; 2363 /* 2364 * The current ill might be different from the one we were 2365 * asked to join on (if failover has occurred); we should 2366 * join on the ill stored in the ilg. The original ill 2367 * is noted in ilg_orig_ifindex, which matched our request. 2368 */ 2369 ill = ilg->ilg_ill; 2370 /* preserve existing state in case ip_addmulti() fails */ 2371 orig_fmode = ilg->ilg_fmode; 2372 if (ilg->ilg_filter == NULL) { 2373 orig_filter = NULL; 2374 } else { 2375 orig_filter = l_alloc_copy(ilg->ilg_filter); 2376 if (orig_filter == NULL) { 2377 mutex_exit(&connp->conn_lock); 2378 return (ENOMEM); 2379 } 2380 } 2381 } 2382 2383 /* 2384 * Alloc buffer to copy new state into (see below) before 2385 * we make any changes, so we can bail if it fails. 2386 */ 2387 if ((new_filter = l_alloc()) == NULL) { 2388 mutex_exit(&connp->conn_lock); 2389 err = ENOMEM; 2390 goto free_and_exit; 2391 } 2392 2393 if (gf->gf_numsrc == 0) { 2394 CLEAR_SLIST(ilg->ilg_filter); 2395 } else { 2396 slist_t *fp; 2397 if (ilg->ilg_filter == NULL) { 2398 fp = l_alloc(); 2399 if (fp == NULL) { 2400 if (ilgstat == ILGSTAT_NEW) 2401 ilg_delete(connp, ilg, NULL); 2402 mutex_exit(&connp->conn_lock); 2403 err = ENOMEM; 2404 goto free_and_exit; 2405 } 2406 } else { 2407 fp = ilg->ilg_filter; 2408 } 2409 for (i = 0, sl = gf->gf_slist; i < gf->gf_numsrc; i++, sl++) { 2410 sin6 = (struct sockaddr_in6 *)sl; 2411 fp->sl_addr[i] = sin6->sin6_addr; 2412 } 2413 fp->sl_numsrc = gf->gf_numsrc; 2414 ilg->ilg_filter = fp; 2415 } 2416 /* 2417 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 2418 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 2419 * So we need to translate here. 2420 */ 2421 ilg->ilg_fmode = (gf->gf_fmode == MCAST_INCLUDE) ? 2422 MODE_IS_INCLUDE : MODE_IS_EXCLUDE; 2423 2424 /* 2425 * Save copy of ilg's filter state to pass to other functions, 2426 * so we can release conn_lock now. 2427 */ 2428 new_fmode = ilg->ilg_fmode; 2429 l_copy(ilg->ilg_filter, new_filter); 2430 2431 mutex_exit(&connp->conn_lock); 2432 2433 err = ip_addmulti_v6(grp, ill, orig_ifindex, connp->conn_zoneid, 2434 ilgstat, new_fmode, new_filter); 2435 if (err != 0) { 2436 /* 2437 * Restore the original filter state, or delete the 2438 * newly-created ilg. We need to look up the ilg 2439 * again, though, since we've not been holding the 2440 * conn_lock. 2441 */ 2442 mutex_enter(&connp->conn_lock); 2443 ilg = ilg_lookup_ill_index_v6(connp, grp, orig_ifindex); 2444 ASSERT(ilg != NULL); 2445 if (ilgstat == ILGSTAT_NEW) { 2446 ilg_delete(connp, ilg, NULL); 2447 } else { 2448 ilg->ilg_fmode = orig_fmode; 2449 if (SLIST_IS_EMPTY(orig_filter)) { 2450 CLEAR_SLIST(ilg->ilg_filter); 2451 } else { 2452 /* 2453 * We didn't free the filter, even if we 2454 * were trying to make the source list empty; 2455 * so if orig_filter isn't empty, the ilg 2456 * must still have a filter alloc'd. 2457 */ 2458 l_copy(orig_filter, ilg->ilg_filter); 2459 } 2460 } 2461 mutex_exit(&connp->conn_lock); 2462 } 2463 2464 free_and_exit: 2465 l_free(orig_filter); 2466 l_free(new_filter); 2467 2468 return (err); 2469 } 2470 2471 /* 2472 * Process the SIOC[GS]MSFILTER and SIOC[GS]IPMSFILTER ioctls. 2473 */ 2474 /* ARGSUSED */ 2475 int 2476 ip_sioctl_msfilter(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, 2477 ip_ioctl_cmd_t *ipip, void *ifreq) 2478 { 2479 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 2480 /* existence verified in ip_wput_nondata() */ 2481 mblk_t *data_mp = mp->b_cont->b_cont; 2482 int datalen, err, cmd, minsize; 2483 int expsize = 0; 2484 conn_t *connp; 2485 boolean_t isv6, is_v4only_api, getcmd; 2486 struct sockaddr_in *gsin; 2487 struct sockaddr_in6 *gsin6; 2488 ipaddr_t v4grp; 2489 in6_addr_t v6grp; 2490 struct group_filter *gf = NULL; 2491 struct ip_msfilter *imsf = NULL; 2492 mblk_t *ndp; 2493 2494 if (data_mp->b_cont != NULL) { 2495 if ((ndp = msgpullup(data_mp, -1)) == NULL) 2496 return (ENOMEM); 2497 freemsg(data_mp); 2498 data_mp = ndp; 2499 mp->b_cont->b_cont = data_mp; 2500 } 2501 2502 cmd = iocp->ioc_cmd; 2503 getcmd = (cmd == SIOCGIPMSFILTER || cmd == SIOCGMSFILTER); 2504 is_v4only_api = (cmd == SIOCGIPMSFILTER || cmd == SIOCSIPMSFILTER); 2505 minsize = (is_v4only_api) ? IP_MSFILTER_SIZE(0) : GROUP_FILTER_SIZE(0); 2506 datalen = MBLKL(data_mp); 2507 2508 if (datalen < minsize) 2509 return (EINVAL); 2510 2511 /* 2512 * now we know we have at least have the initial structure, 2513 * but need to check for the source list array. 2514 */ 2515 if (is_v4only_api) { 2516 imsf = (struct ip_msfilter *)data_mp->b_rptr; 2517 isv6 = B_FALSE; 2518 expsize = IP_MSFILTER_SIZE(imsf->imsf_numsrc); 2519 } else { 2520 gf = (struct group_filter *)data_mp->b_rptr; 2521 if (gf->gf_group.ss_family == AF_INET6) { 2522 gsin6 = (struct sockaddr_in6 *)&gf->gf_group; 2523 isv6 = !(IN6_IS_ADDR_V4MAPPED(&gsin6->sin6_addr)); 2524 } else { 2525 isv6 = B_FALSE; 2526 } 2527 expsize = GROUP_FILTER_SIZE(gf->gf_numsrc); 2528 } 2529 if (datalen < expsize) 2530 return (EINVAL); 2531 2532 connp = Q_TO_CONN(q); 2533 2534 /* operation not supported on the virtual network interface */ 2535 if (IS_VNI(ipif->ipif_ill)) 2536 return (EINVAL); 2537 2538 if (isv6) { 2539 ill_t *ill = ipif->ipif_ill; 2540 ill_refhold(ill); 2541 2542 gsin6 = (struct sockaddr_in6 *)&gf->gf_group; 2543 v6grp = gsin6->sin6_addr; 2544 if (getcmd) 2545 err = ip_get_srcfilter_v6(connp, gf, &v6grp, ill); 2546 else 2547 err = ip_set_srcfilter_v6(connp, gf, &v6grp, ill); 2548 2549 ill_refrele(ill); 2550 } else { 2551 boolean_t isv4mapped = B_FALSE; 2552 if (is_v4only_api) { 2553 v4grp = (ipaddr_t)imsf->imsf_multiaddr.s_addr; 2554 } else { 2555 if (gf->gf_group.ss_family == AF_INET) { 2556 gsin = (struct sockaddr_in *)&gf->gf_group; 2557 v4grp = (ipaddr_t)gsin->sin_addr.s_addr; 2558 } else { 2559 gsin6 = (struct sockaddr_in6 *)&gf->gf_group; 2560 IN6_V4MAPPED_TO_IPADDR(&gsin6->sin6_addr, 2561 v4grp); 2562 isv4mapped = B_TRUE; 2563 } 2564 } 2565 if (getcmd) 2566 err = ip_get_srcfilter(connp, gf, imsf, v4grp, ipif, 2567 isv4mapped); 2568 else 2569 err = ip_set_srcfilter(connp, gf, imsf, v4grp, ipif, 2570 isv4mapped); 2571 } 2572 2573 return (err); 2574 } 2575 2576 /* 2577 * Finds the ipif based on information in the ioctl headers. Needed to make 2578 * ip_process_ioctl() happy (it needs to know the ipif for IPI_WR-flagged 2579 * ioctls prior to calling the ioctl's handler function). Somewhat analogous 2580 * to ip_extract_lifreq_cmn() and ip_extract_tunreq(). 2581 */ 2582 int 2583 ip_extract_msfilter(queue_t *q, mblk_t *mp, ipif_t **ipifpp, ipsq_func_t func) 2584 { 2585 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 2586 int cmd = iocp->ioc_cmd, err = 0; 2587 conn_t *connp; 2588 ipif_t *ipif; 2589 /* caller has verified this mblk exists */ 2590 char *dbuf = (char *)mp->b_cont->b_cont->b_rptr; 2591 struct ip_msfilter *imsf; 2592 struct group_filter *gf; 2593 ipaddr_t v4addr, v4grp; 2594 in6_addr_t v6grp; 2595 uint32_t index; 2596 zoneid_t zoneid; 2597 ip_stack_t *ipst; 2598 2599 connp = Q_TO_CONN(q); 2600 zoneid = connp->conn_zoneid; 2601 ipst = connp->conn_netstack->netstack_ip; 2602 2603 /* don't allow multicast operations on a tcp conn */ 2604 if (IPCL_IS_TCP(connp)) 2605 return (ENOPROTOOPT); 2606 2607 if (cmd == SIOCSIPMSFILTER || cmd == SIOCGIPMSFILTER) { 2608 /* don't allow v4-specific ioctls on v6 socket */ 2609 if (connp->conn_af_isv6) 2610 return (EAFNOSUPPORT); 2611 2612 imsf = (struct ip_msfilter *)dbuf; 2613 v4addr = imsf->imsf_interface.s_addr; 2614 v4grp = imsf->imsf_multiaddr.s_addr; 2615 if (v4addr == INADDR_ANY) { 2616 ipif = ipif_lookup_group(v4grp, zoneid, ipst); 2617 if (ipif == NULL) 2618 err = EADDRNOTAVAIL; 2619 } else { 2620 ipif = ipif_lookup_addr(v4addr, NULL, zoneid, q, mp, 2621 func, &err, ipst); 2622 } 2623 } else { 2624 boolean_t isv6 = B_FALSE; 2625 gf = (struct group_filter *)dbuf; 2626 index = gf->gf_interface; 2627 if (gf->gf_group.ss_family == AF_INET6) { 2628 struct sockaddr_in6 *sin6; 2629 sin6 = (struct sockaddr_in6 *)&gf->gf_group; 2630 v6grp = sin6->sin6_addr; 2631 if (IN6_IS_ADDR_V4MAPPED(&v6grp)) 2632 IN6_V4MAPPED_TO_IPADDR(&v6grp, v4grp); 2633 else 2634 isv6 = B_TRUE; 2635 } else if (gf->gf_group.ss_family == AF_INET) { 2636 struct sockaddr_in *sin; 2637 sin = (struct sockaddr_in *)&gf->gf_group; 2638 v4grp = sin->sin_addr.s_addr; 2639 } else { 2640 return (EAFNOSUPPORT); 2641 } 2642 if (index == 0) { 2643 if (isv6) { 2644 ipif = ipif_lookup_group_v6(&v6grp, zoneid, 2645 ipst); 2646 } else { 2647 ipif = ipif_lookup_group(v4grp, zoneid, ipst); 2648 } 2649 if (ipif == NULL) 2650 err = EADDRNOTAVAIL; 2651 } else { 2652 ipif = ipif_lookup_on_ifindex(index, isv6, zoneid, 2653 q, mp, func, &err, ipst); 2654 } 2655 } 2656 2657 *ipifpp = ipif; 2658 return (err); 2659 } 2660 2661 /* 2662 * The structures used for the SIOC*MSFILTER ioctls usually must be copied 2663 * in in two stages, as the first copyin tells us the size of the attached 2664 * source buffer. This function is called by ip_wput_nondata() after the 2665 * first copyin has completed; it figures out how big the second stage 2666 * needs to be, and kicks it off. 2667 * 2668 * In some cases (numsrc < 2), the second copyin is not needed as the 2669 * first one gets a complete structure containing 1 source addr. 2670 * 2671 * The function returns 0 if a second copyin has been started (i.e. there's 2672 * no more work to be done right now), or 1 if the second copyin is not 2673 * needed and ip_wput_nondata() can continue its processing. 2674 */ 2675 int 2676 ip_copyin_msfilter(queue_t *q, mblk_t *mp) 2677 { 2678 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 2679 int cmd = iocp->ioc_cmd; 2680 /* validity of this checked in ip_wput_nondata() */ 2681 mblk_t *mp1 = mp->b_cont->b_cont; 2682 int copysize = 0; 2683 int offset; 2684 2685 if (cmd == SIOCSMSFILTER || cmd == SIOCGMSFILTER) { 2686 struct group_filter *gf = (struct group_filter *)mp1->b_rptr; 2687 if (gf->gf_numsrc >= 2) { 2688 offset = sizeof (struct group_filter); 2689 copysize = GROUP_FILTER_SIZE(gf->gf_numsrc) - offset; 2690 } 2691 } else { 2692 struct ip_msfilter *imsf = (struct ip_msfilter *)mp1->b_rptr; 2693 if (imsf->imsf_numsrc >= 2) { 2694 offset = sizeof (struct ip_msfilter); 2695 copysize = IP_MSFILTER_SIZE(imsf->imsf_numsrc) - offset; 2696 } 2697 } 2698 if (copysize > 0) { 2699 mi_copyin_n(q, mp, offset, copysize); 2700 return (0); 2701 } 2702 return (1); 2703 } 2704 2705 /* 2706 * Handle the following optmgmt: 2707 * IP_ADD_MEMBERSHIP must not have joined already 2708 * MCAST_JOIN_GROUP must not have joined already 2709 * IP_BLOCK_SOURCE must have joined already 2710 * MCAST_BLOCK_SOURCE must have joined already 2711 * IP_JOIN_SOURCE_GROUP may have joined already 2712 * MCAST_JOIN_SOURCE_GROUP may have joined already 2713 * 2714 * fmode and src parameters may be used to determine which option is 2715 * being set, as follows (the IP_* and MCAST_* versions of each option 2716 * are functionally equivalent): 2717 * opt fmode src 2718 * IP_ADD_MEMBERSHIP MODE_IS_EXCLUDE INADDR_ANY 2719 * MCAST_JOIN_GROUP MODE_IS_EXCLUDE INADDR_ANY 2720 * IP_BLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 2721 * MCAST_BLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 2722 * IP_JOIN_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 2723 * MCAST_JOIN_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 2724 * 2725 * Changing the filter mode is not allowed; if a matching ilg already 2726 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 2727 * 2728 * Verifies that there is a source address of appropriate scope for 2729 * the group; if not, EADDRNOTAVAIL is returned. 2730 * 2731 * The interface to be used may be identified by an address or by an 2732 * index. A pointer to the index is passed; if it is NULL, use the 2733 * address, otherwise, use the index. 2734 */ 2735 int 2736 ip_opt_add_group(conn_t *connp, boolean_t checkonly, ipaddr_t group, 2737 ipaddr_t ifaddr, uint_t *ifindexp, mcast_record_t fmode, ipaddr_t src, 2738 mblk_t *first_mp) 2739 { 2740 ipif_t *ipif; 2741 ipsq_t *ipsq; 2742 int err = 0; 2743 ill_t *ill; 2744 2745 err = ip_opt_check(connp, group, src, ifaddr, ifindexp, first_mp, 2746 ip_restart_optmgmt, &ipif); 2747 if (err != 0) { 2748 if (err != EINPROGRESS) { 2749 ip1dbg(("ip_opt_add_group: no ipif for group 0x%x, " 2750 "ifaddr 0x%x, ifindex %d\n", ntohl(group), 2751 ntohl(ifaddr), (ifindexp == NULL) ? 0 : *ifindexp)); 2752 } 2753 return (err); 2754 } 2755 ASSERT(ipif != NULL); 2756 2757 ill = ipif->ipif_ill; 2758 /* Operation not supported on a virtual network interface */ 2759 if (IS_VNI(ill)) { 2760 ipif_refrele(ipif); 2761 return (EINVAL); 2762 } 2763 2764 if (checkonly) { 2765 /* 2766 * do not do operation, just pretend to - new T_CHECK 2767 * semantics. The error return case above if encountered 2768 * considered a good enough "check" here. 2769 */ 2770 ipif_refrele(ipif); 2771 return (0); 2772 } 2773 2774 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, ipsq, 2775 NEW_OP); 2776 2777 /* unspecified source addr => no source filtering */ 2778 err = ilg_add(connp, group, ipif, fmode, src); 2779 2780 IPSQ_EXIT(ipsq); 2781 2782 ipif_refrele(ipif); 2783 return (err); 2784 } 2785 2786 /* 2787 * Handle the following optmgmt: 2788 * IPV6_JOIN_GROUP must not have joined already 2789 * MCAST_JOIN_GROUP must not have joined already 2790 * MCAST_BLOCK_SOURCE must have joined already 2791 * MCAST_JOIN_SOURCE_GROUP may have joined already 2792 * 2793 * fmode and src parameters may be used to determine which option is 2794 * being set, as follows (IPV6_JOIN_GROUP and MCAST_JOIN_GROUP options 2795 * are functionally equivalent): 2796 * opt fmode v6src 2797 * IPV6_JOIN_GROUP MODE_IS_EXCLUDE unspecified 2798 * MCAST_JOIN_GROUP MODE_IS_EXCLUDE unspecified 2799 * MCAST_BLOCK_SOURCE MODE_IS_EXCLUDE v6 addr 2800 * MCAST_JOIN_SOURCE_GROUP MODE_IS_INCLUDE v6 addr 2801 * 2802 * Changing the filter mode is not allowed; if a matching ilg already 2803 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 2804 * 2805 * Verifies that there is a source address of appropriate scope for 2806 * the group; if not, EADDRNOTAVAIL is returned. 2807 * 2808 * Handles IPv4-mapped IPv6 multicast addresses by associating them 2809 * with the link-local ipif. Assumes that if v6group is v4-mapped, 2810 * v6src is also v4-mapped. 2811 */ 2812 int 2813 ip_opt_add_group_v6(conn_t *connp, boolean_t checkonly, 2814 const in6_addr_t *v6group, int ifindex, mcast_record_t fmode, 2815 const in6_addr_t *v6src, mblk_t *first_mp) 2816 { 2817 ill_t *ill; 2818 ipif_t *ipif; 2819 char buf[INET6_ADDRSTRLEN]; 2820 ipaddr_t v4group, v4src; 2821 boolean_t isv6; 2822 ipsq_t *ipsq; 2823 int err; 2824 2825 err = ip_opt_check_v6(connp, v6group, &v4group, v6src, &v4src, &isv6, 2826 ifindex, first_mp, ip_restart_optmgmt, &ill, &ipif); 2827 if (err != 0) { 2828 if (err != EINPROGRESS) { 2829 ip1dbg(("ip_opt_add_group_v6: no ill for group %s/" 2830 "index %d\n", inet_ntop(AF_INET6, v6group, buf, 2831 sizeof (buf)), ifindex)); 2832 } 2833 return (err); 2834 } 2835 ASSERT((!isv6 && ipif != NULL) || (isv6 && ill != NULL)); 2836 2837 /* operation is not supported on the virtual network interface */ 2838 if (isv6) { 2839 if (IS_VNI(ill)) { 2840 ill_refrele(ill); 2841 return (EINVAL); 2842 } 2843 } else { 2844 if (IS_VNI(ipif->ipif_ill)) { 2845 ipif_refrele(ipif); 2846 return (EINVAL); 2847 } 2848 } 2849 2850 if (checkonly) { 2851 /* 2852 * do not do operation, just pretend to - new T_CHECK 2853 * semantics. The error return case above if encountered 2854 * considered a good enough "check" here. 2855 */ 2856 if (isv6) 2857 ill_refrele(ill); 2858 else 2859 ipif_refrele(ipif); 2860 return (0); 2861 } 2862 2863 if (!isv6) { 2864 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, 2865 ipsq, NEW_OP); 2866 err = ilg_add(connp, v4group, ipif, fmode, v4src); 2867 IPSQ_EXIT(ipsq); 2868 ipif_refrele(ipif); 2869 } else { 2870 IPSQ_ENTER_ILL(ill, connp, first_mp, ip_restart_optmgmt, 2871 ipsq, NEW_OP); 2872 err = ilg_add_v6(connp, v6group, ill, fmode, v6src); 2873 IPSQ_EXIT(ipsq); 2874 ill_refrele(ill); 2875 } 2876 2877 return (err); 2878 } 2879 2880 static int 2881 ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group, ipif_t *ipif, 2882 mcast_record_t fmode, ipaddr_t src) 2883 { 2884 ilg_t *ilg; 2885 in6_addr_t v6src; 2886 boolean_t leaving = B_FALSE; 2887 2888 ASSERT(IAM_WRITER_IPIF(ipif)); 2889 2890 /* 2891 * The ilg is valid only while we hold the conn lock. Once we drop 2892 * the lock, another thread can locate another ilg on this connp, 2893 * but on a different ipif, and delete it, and cause the ilg array 2894 * to be reallocated and copied. Hence do the ilg_delete before 2895 * dropping the lock. 2896 */ 2897 mutex_enter(&connp->conn_lock); 2898 ilg = ilg_lookup_ipif(connp, group, ipif); 2899 if ((ilg == NULL) || (ilg->ilg_flags & ILG_DELETED)) { 2900 mutex_exit(&connp->conn_lock); 2901 return (EADDRNOTAVAIL); 2902 } 2903 2904 /* 2905 * Decide if we're actually deleting the ilg or just removing a 2906 * source filter address; if just removing an addr, make sure we 2907 * aren't trying to change the filter mode, and that the addr is 2908 * actually in our filter list already. If we're removing the 2909 * last src in an include list, just delete the ilg. 2910 */ 2911 if (src == INADDR_ANY) { 2912 v6src = ipv6_all_zeros; 2913 leaving = B_TRUE; 2914 } else { 2915 int err = 0; 2916 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 2917 if (fmode != ilg->ilg_fmode) 2918 err = EINVAL; 2919 else if (ilg->ilg_filter == NULL || 2920 !list_has_addr(ilg->ilg_filter, &v6src)) 2921 err = EADDRNOTAVAIL; 2922 if (err != 0) { 2923 mutex_exit(&connp->conn_lock); 2924 return (err); 2925 } 2926 if (fmode == MODE_IS_INCLUDE && 2927 ilg->ilg_filter->sl_numsrc == 1) { 2928 v6src = ipv6_all_zeros; 2929 leaving = B_TRUE; 2930 } 2931 } 2932 2933 ilg_delete(connp, ilg, &v6src); 2934 mutex_exit(&connp->conn_lock); 2935 2936 (void) ip_delmulti(group, ipif, B_FALSE, leaving); 2937 return (0); 2938 } 2939 2940 static int 2941 ip_opt_delete_group_excl_v6(conn_t *connp, const in6_addr_t *v6group, 2942 ill_t *ill, mcast_record_t fmode, const in6_addr_t *v6src) 2943 { 2944 ilg_t *ilg; 2945 ill_t *ilg_ill; 2946 uint_t ilg_orig_ifindex; 2947 boolean_t leaving = B_TRUE; 2948 2949 ASSERT(IAM_WRITER_ILL(ill)); 2950 2951 /* 2952 * Use the index that we originally used to join. We can't 2953 * use the ill directly because ilg_ill could point to 2954 * a new ill if things have moved. 2955 */ 2956 mutex_enter(&connp->conn_lock); 2957 ilg = ilg_lookup_ill_index_v6(connp, v6group, 2958 ill->ill_phyint->phyint_ifindex); 2959 if ((ilg == NULL) || (ilg->ilg_flags & ILG_DELETED)) { 2960 mutex_exit(&connp->conn_lock); 2961 return (EADDRNOTAVAIL); 2962 } 2963 2964 /* 2965 * Decide if we're actually deleting the ilg or just removing a 2966 * source filter address; if just removing an addr, make sure we 2967 * aren't trying to change the filter mode, and that the addr is 2968 * actually in our filter list already. If we're removing the 2969 * last src in an include list, just delete the ilg. 2970 */ 2971 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2972 int err = 0; 2973 if (fmode != ilg->ilg_fmode) 2974 err = EINVAL; 2975 else if (ilg->ilg_filter == NULL || 2976 !list_has_addr(ilg->ilg_filter, v6src)) 2977 err = EADDRNOTAVAIL; 2978 if (err != 0) { 2979 mutex_exit(&connp->conn_lock); 2980 return (err); 2981 } 2982 if (fmode == MODE_IS_INCLUDE && 2983 ilg->ilg_filter->sl_numsrc == 1) 2984 v6src = NULL; 2985 else 2986 leaving = B_FALSE; 2987 } 2988 2989 ilg_ill = ilg->ilg_ill; 2990 ilg_orig_ifindex = ilg->ilg_orig_ifindex; 2991 ilg_delete(connp, ilg, v6src); 2992 mutex_exit(&connp->conn_lock); 2993 (void) ip_delmulti_v6(v6group, ilg_ill, ilg_orig_ifindex, 2994 connp->conn_zoneid, B_FALSE, leaving); 2995 2996 return (0); 2997 } 2998 2999 /* 3000 * Handle the following optmgmt: 3001 * IP_DROP_MEMBERSHIP will leave 3002 * MCAST_LEAVE_GROUP will leave 3003 * IP_UNBLOCK_SOURCE will not leave 3004 * MCAST_UNBLOCK_SOURCE will not leave 3005 * IP_LEAVE_SOURCE_GROUP may leave (if leaving last source) 3006 * MCAST_LEAVE_SOURCE_GROUP may leave (if leaving last source) 3007 * 3008 * fmode and src parameters may be used to determine which option is 3009 * being set, as follows (the IP_* and MCAST_* versions of each option 3010 * are functionally equivalent): 3011 * opt fmode src 3012 * IP_DROP_MEMBERSHIP MODE_IS_INCLUDE INADDR_ANY 3013 * MCAST_LEAVE_GROUP MODE_IS_INCLUDE INADDR_ANY 3014 * IP_UNBLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 3015 * MCAST_UNBLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 3016 * IP_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 3017 * MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 3018 * 3019 * Changing the filter mode is not allowed; if a matching ilg already 3020 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 3021 * 3022 * The interface to be used may be identified by an address or by an 3023 * index. A pointer to the index is passed; if it is NULL, use the 3024 * address, otherwise, use the index. 3025 */ 3026 int 3027 ip_opt_delete_group(conn_t *connp, boolean_t checkonly, ipaddr_t group, 3028 ipaddr_t ifaddr, uint_t *ifindexp, mcast_record_t fmode, ipaddr_t src, 3029 mblk_t *first_mp) 3030 { 3031 ipif_t *ipif; 3032 ipsq_t *ipsq; 3033 int err; 3034 ill_t *ill; 3035 3036 err = ip_opt_check(connp, group, src, ifaddr, ifindexp, first_mp, 3037 ip_restart_optmgmt, &ipif); 3038 if (err != 0) { 3039 if (err != EINPROGRESS) { 3040 ip1dbg(("ip_opt_delete_group: no ipif for group " 3041 "0x%x, ifaddr 0x%x\n", 3042 (int)ntohl(group), (int)ntohl(ifaddr))); 3043 } 3044 return (err); 3045 } 3046 ASSERT(ipif != NULL); 3047 3048 ill = ipif->ipif_ill; 3049 /* Operation not supported on a virtual network interface */ 3050 if (IS_VNI(ill)) { 3051 ipif_refrele(ipif); 3052 return (EINVAL); 3053 } 3054 3055 if (checkonly) { 3056 /* 3057 * do not do operation, just pretend to - new T_CHECK 3058 * semantics. The error return case above if encountered 3059 * considered a good enough "check" here. 3060 */ 3061 ipif_refrele(ipif); 3062 return (0); 3063 } 3064 3065 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, ipsq, 3066 NEW_OP); 3067 err = ip_opt_delete_group_excl(connp, group, ipif, fmode, src); 3068 IPSQ_EXIT(ipsq); 3069 3070 ipif_refrele(ipif); 3071 return (err); 3072 } 3073 3074 /* 3075 * Handle the following optmgmt: 3076 * IPV6_LEAVE_GROUP will leave 3077 * MCAST_LEAVE_GROUP will leave 3078 * MCAST_UNBLOCK_SOURCE will not leave 3079 * MCAST_LEAVE_SOURCE_GROUP may leave (if leaving last source) 3080 * 3081 * fmode and src parameters may be used to determine which option is 3082 * being set, as follows (IPV6_LEAVE_GROUP and MCAST_LEAVE_GROUP options 3083 * are functionally equivalent): 3084 * opt fmode v6src 3085 * IPV6_LEAVE_GROUP MODE_IS_INCLUDE unspecified 3086 * MCAST_LEAVE_GROUP MODE_IS_INCLUDE unspecified 3087 * MCAST_UNBLOCK_SOURCE MODE_IS_EXCLUDE v6 addr 3088 * MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE v6 addr 3089 * 3090 * Changing the filter mode is not allowed; if a matching ilg already 3091 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 3092 * 3093 * Handles IPv4-mapped IPv6 multicast addresses by associating them 3094 * with the link-local ipif. Assumes that if v6group is v4-mapped, 3095 * v6src is also v4-mapped. 3096 */ 3097 int 3098 ip_opt_delete_group_v6(conn_t *connp, boolean_t checkonly, 3099 const in6_addr_t *v6group, int ifindex, mcast_record_t fmode, 3100 const in6_addr_t *v6src, mblk_t *first_mp) 3101 { 3102 ill_t *ill; 3103 ipif_t *ipif; 3104 char buf[INET6_ADDRSTRLEN]; 3105 ipaddr_t v4group, v4src; 3106 boolean_t isv6; 3107 ipsq_t *ipsq; 3108 int err; 3109 3110 err = ip_opt_check_v6(connp, v6group, &v4group, v6src, &v4src, &isv6, 3111 ifindex, first_mp, ip_restart_optmgmt, &ill, &ipif); 3112 if (err != 0) { 3113 if (err != EINPROGRESS) { 3114 ip1dbg(("ip_opt_delete_group_v6: no ill for group %s/" 3115 "index %d\n", inet_ntop(AF_INET6, v6group, buf, 3116 sizeof (buf)), ifindex)); 3117 } 3118 return (err); 3119 } 3120 ASSERT((isv6 && ill != NULL) || (!isv6 && ipif != NULL)); 3121 3122 /* operation is not supported on the virtual network interface */ 3123 if (isv6) { 3124 if (IS_VNI(ill)) { 3125 ill_refrele(ill); 3126 return (EINVAL); 3127 } 3128 } else { 3129 if (IS_VNI(ipif->ipif_ill)) { 3130 ipif_refrele(ipif); 3131 return (EINVAL); 3132 } 3133 } 3134 3135 if (checkonly) { 3136 /* 3137 * do not do operation, just pretend to - new T_CHECK 3138 * semantics. The error return case above if encountered 3139 * considered a good enough "check" here. 3140 */ 3141 if (isv6) 3142 ill_refrele(ill); 3143 else 3144 ipif_refrele(ipif); 3145 return (0); 3146 } 3147 3148 if (!isv6) { 3149 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, 3150 ipsq, NEW_OP); 3151 err = ip_opt_delete_group_excl(connp, v4group, ipif, fmode, 3152 v4src); 3153 IPSQ_EXIT(ipsq); 3154 ipif_refrele(ipif); 3155 } else { 3156 IPSQ_ENTER_ILL(ill, connp, first_mp, ip_restart_optmgmt, 3157 ipsq, NEW_OP); 3158 err = ip_opt_delete_group_excl_v6(connp, v6group, ill, fmode, 3159 v6src); 3160 IPSQ_EXIT(ipsq); 3161 ill_refrele(ill); 3162 } 3163 3164 return (err); 3165 } 3166 3167 /* 3168 * Group mgmt for upper conn that passes things down 3169 * to the interface multicast list (and DLPI) 3170 * These routines can handle new style options that specify an interface name 3171 * as opposed to an interface address (needed for general handling of 3172 * unnumbered interfaces.) 3173 */ 3174 3175 /* 3176 * Add a group to an upper conn group data structure and pass things down 3177 * to the interface multicast list (and DLPI) 3178 */ 3179 static int 3180 ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif, mcast_record_t fmode, 3181 ipaddr_t src) 3182 { 3183 int error = 0; 3184 ill_t *ill; 3185 ilg_t *ilg; 3186 ilg_stat_t ilgstat; 3187 slist_t *new_filter = NULL; 3188 int new_fmode; 3189 3190 ASSERT(IAM_WRITER_IPIF(ipif)); 3191 3192 ill = ipif->ipif_ill; 3193 3194 if (!(ill->ill_flags & ILLF_MULTICAST)) 3195 return (EADDRNOTAVAIL); 3196 3197 /* 3198 * conn_ilg[] is protected by conn_lock. Need to hold the conn_lock 3199 * to walk the conn_ilg[] list in ilg_lookup_ipif(); also needed to 3200 * serialize 2 threads doing join (sock, group1, hme0:0) and 3201 * (sock, group2, hme1:0) where hme0 and hme1 map to different ipsqs, 3202 * but both operations happen on the same conn. 3203 */ 3204 mutex_enter(&connp->conn_lock); 3205 ilg = ilg_lookup_ipif(connp, group, ipif); 3206 3207 /* 3208 * Depending on the option we're handling, may or may not be okay 3209 * if group has already been added. Figure out our rules based 3210 * on fmode and src params. Also make sure there's enough room 3211 * in the filter if we're adding a source to an existing filter. 3212 */ 3213 if (src == INADDR_ANY) { 3214 /* we're joining for all sources, must not have joined */ 3215 if (ilg != NULL) 3216 error = EADDRINUSE; 3217 } else { 3218 if (fmode == MODE_IS_EXCLUDE) { 3219 /* (excl {addr}) => block source, must have joined */ 3220 if (ilg == NULL) 3221 error = EADDRNOTAVAIL; 3222 } 3223 /* (incl {addr}) => join source, may have joined */ 3224 3225 if (ilg != NULL && 3226 SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE) 3227 error = ENOBUFS; 3228 } 3229 if (error != 0) { 3230 mutex_exit(&connp->conn_lock); 3231 return (error); 3232 } 3233 3234 ASSERT(!(ipif->ipif_state_flags & IPIF_CONDEMNED)); 3235 3236 /* 3237 * Alloc buffer to copy new state into (see below) before 3238 * we make any changes, so we can bail if it fails. 3239 */ 3240 if ((new_filter = l_alloc()) == NULL) { 3241 mutex_exit(&connp->conn_lock); 3242 return (ENOMEM); 3243 } 3244 3245 if (ilg == NULL) { 3246 ilgstat = ILGSTAT_NEW; 3247 if ((ilg = conn_ilg_alloc(connp)) == NULL) { 3248 mutex_exit(&connp->conn_lock); 3249 l_free(new_filter); 3250 return (ENOMEM); 3251 } 3252 if (src != INADDR_ANY) { 3253 ilg->ilg_filter = l_alloc(); 3254 if (ilg->ilg_filter == NULL) { 3255 ilg_delete(connp, ilg, NULL); 3256 mutex_exit(&connp->conn_lock); 3257 l_free(new_filter); 3258 return (ENOMEM); 3259 } 3260 ilg->ilg_filter->sl_numsrc = 1; 3261 IN6_IPADDR_TO_V4MAPPED(src, 3262 &ilg->ilg_filter->sl_addr[0]); 3263 } 3264 if (group == INADDR_ANY) { 3265 ilg->ilg_v6group = ipv6_all_zeros; 3266 } else { 3267 IN6_IPADDR_TO_V4MAPPED(group, &ilg->ilg_v6group); 3268 } 3269 ilg->ilg_ipif = ipif; 3270 ilg->ilg_ill = NULL; 3271 ilg->ilg_orig_ifindex = 0; 3272 ilg->ilg_fmode = fmode; 3273 } else { 3274 int index; 3275 in6_addr_t v6src; 3276 ilgstat = ILGSTAT_CHANGE; 3277 if (ilg->ilg_fmode != fmode || src == INADDR_ANY) { 3278 mutex_exit(&connp->conn_lock); 3279 l_free(new_filter); 3280 return (EINVAL); 3281 } 3282 if (ilg->ilg_filter == NULL) { 3283 ilg->ilg_filter = l_alloc(); 3284 if (ilg->ilg_filter == NULL) { 3285 mutex_exit(&connp->conn_lock); 3286 l_free(new_filter); 3287 return (ENOMEM); 3288 } 3289 } 3290 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 3291 if (list_has_addr(ilg->ilg_filter, &v6src)) { 3292 mutex_exit(&connp->conn_lock); 3293 l_free(new_filter); 3294 return (EADDRNOTAVAIL); 3295 } 3296 index = ilg->ilg_filter->sl_numsrc++; 3297 ilg->ilg_filter->sl_addr[index] = v6src; 3298 } 3299 3300 /* 3301 * Save copy of ilg's filter state to pass to other functions, 3302 * so we can release conn_lock now. 3303 */ 3304 new_fmode = ilg->ilg_fmode; 3305 l_copy(ilg->ilg_filter, new_filter); 3306 3307 mutex_exit(&connp->conn_lock); 3308 3309 error = ip_addmulti(group, ipif, ilgstat, new_fmode, new_filter); 3310 if (error != 0) { 3311 /* 3312 * Need to undo what we did before calling ip_addmulti()! 3313 * Must look up the ilg again since we've not been holding 3314 * conn_lock. 3315 */ 3316 in6_addr_t v6src; 3317 if (ilgstat == ILGSTAT_NEW) 3318 v6src = ipv6_all_zeros; 3319 else 3320 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 3321 mutex_enter(&connp->conn_lock); 3322 ilg = ilg_lookup_ipif(connp, group, ipif); 3323 ASSERT(ilg != NULL); 3324 ilg_delete(connp, ilg, &v6src); 3325 mutex_exit(&connp->conn_lock); 3326 l_free(new_filter); 3327 return (error); 3328 } 3329 3330 l_free(new_filter); 3331 return (0); 3332 } 3333 3334 static int 3335 ilg_add_v6(conn_t *connp, const in6_addr_t *v6group, ill_t *ill, 3336 mcast_record_t fmode, const in6_addr_t *v6src) 3337 { 3338 int error = 0; 3339 int orig_ifindex; 3340 ilg_t *ilg; 3341 ilg_stat_t ilgstat; 3342 slist_t *new_filter = NULL; 3343 int new_fmode; 3344 3345 ASSERT(IAM_WRITER_ILL(ill)); 3346 3347 if (!(ill->ill_flags & ILLF_MULTICAST)) 3348 return (EADDRNOTAVAIL); 3349 3350 /* 3351 * conn_lock protects the ilg list. Serializes 2 threads doing 3352 * join (sock, group1, hme0) and (sock, group2, hme1) where hme0 3353 * and hme1 map to different ipsq's, but both operations happen 3354 * on the same conn. 3355 */ 3356 mutex_enter(&connp->conn_lock); 3357 3358 /* 3359 * Use the ifindex to do the lookup. We can't use the ill 3360 * directly because ilg_ill could point to a different ill if 3361 * things have moved. 3362 */ 3363 orig_ifindex = ill->ill_phyint->phyint_ifindex; 3364 ilg = ilg_lookup_ill_index_v6(connp, v6group, orig_ifindex); 3365 3366 /* 3367 * Depending on the option we're handling, may or may not be okay 3368 * if group has already been added. Figure out our rules based 3369 * on fmode and src params. Also make sure there's enough room 3370 * in the filter if we're adding a source to an existing filter. 3371 */ 3372 if (IN6_IS_ADDR_UNSPECIFIED(v6src)) { 3373 /* we're joining for all sources, must not have joined */ 3374 if (ilg != NULL) 3375 error = EADDRINUSE; 3376 } else { 3377 if (fmode == MODE_IS_EXCLUDE) { 3378 /* (excl {addr}) => block source, must have joined */ 3379 if (ilg == NULL) 3380 error = EADDRNOTAVAIL; 3381 } 3382 /* (incl {addr}) => join source, may have joined */ 3383 3384 if (ilg != NULL && 3385 SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE) 3386 error = ENOBUFS; 3387 } 3388 if (error != 0) { 3389 mutex_exit(&connp->conn_lock); 3390 return (error); 3391 } 3392 3393 /* 3394 * Alloc buffer to copy new state into (see below) before 3395 * we make any changes, so we can bail if it fails. 3396 */ 3397 if ((new_filter = l_alloc()) == NULL) { 3398 mutex_exit(&connp->conn_lock); 3399 return (ENOMEM); 3400 } 3401 3402 if (ilg == NULL) { 3403 if ((ilg = conn_ilg_alloc(connp)) == NULL) { 3404 mutex_exit(&connp->conn_lock); 3405 l_free(new_filter); 3406 return (ENOMEM); 3407 } 3408 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 3409 ilg->ilg_filter = l_alloc(); 3410 if (ilg->ilg_filter == NULL) { 3411 ilg_delete(connp, ilg, NULL); 3412 mutex_exit(&connp->conn_lock); 3413 l_free(new_filter); 3414 return (ENOMEM); 3415 } 3416 ilg->ilg_filter->sl_numsrc = 1; 3417 ilg->ilg_filter->sl_addr[0] = *v6src; 3418 } 3419 ilgstat = ILGSTAT_NEW; 3420 ilg->ilg_v6group = *v6group; 3421 ilg->ilg_fmode = fmode; 3422 ilg->ilg_ipif = NULL; 3423 /* 3424 * Choose our target ill to join on. This might be different 3425 * from the ill we've been given if it's currently down and 3426 * part of a group. 3427 * 3428 * new ill is not refheld; we are writer. 3429 */ 3430 ill = ip_choose_multi_ill(ill, v6group); 3431 ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED)); 3432 ilg->ilg_ill = ill; 3433 /* 3434 * Remember the orig_ifindex that we joined on, so that we 3435 * can successfully delete them later on and also search 3436 * for duplicates if the application wants to join again. 3437 */ 3438 ilg->ilg_orig_ifindex = orig_ifindex; 3439 } else { 3440 int index; 3441 if (ilg->ilg_fmode != fmode || IN6_IS_ADDR_UNSPECIFIED(v6src)) { 3442 mutex_exit(&connp->conn_lock); 3443 l_free(new_filter); 3444 return (EINVAL); 3445 } 3446 if (ilg->ilg_filter == NULL) { 3447 ilg->ilg_filter = l_alloc(); 3448 if (ilg->ilg_filter == NULL) { 3449 mutex_exit(&connp->conn_lock); 3450 l_free(new_filter); 3451 return (ENOMEM); 3452 } 3453 } 3454 if (list_has_addr(ilg->ilg_filter, v6src)) { 3455 mutex_exit(&connp->conn_lock); 3456 l_free(new_filter); 3457 return (EADDRNOTAVAIL); 3458 } 3459 ilgstat = ILGSTAT_CHANGE; 3460 index = ilg->ilg_filter->sl_numsrc++; 3461 ilg->ilg_filter->sl_addr[index] = *v6src; 3462 /* 3463 * The current ill might be different from the one we were 3464 * asked to join on (if failover has occurred); we should 3465 * join on the ill stored in the ilg. The original ill 3466 * is noted in ilg_orig_ifindex, which matched our request. 3467 */ 3468 ill = ilg->ilg_ill; 3469 } 3470 3471 /* 3472 * Save copy of ilg's filter state to pass to other functions, 3473 * so we can release conn_lock now. 3474 */ 3475 new_fmode = ilg->ilg_fmode; 3476 l_copy(ilg->ilg_filter, new_filter); 3477 3478 mutex_exit(&connp->conn_lock); 3479 3480 /* 3481 * Now update the ill. We wait to do this until after the ilg 3482 * has been updated because we need to update the src filter 3483 * info for the ill, which involves looking at the status of 3484 * all the ilgs associated with this group/interface pair. 3485 */ 3486 error = ip_addmulti_v6(v6group, ill, orig_ifindex, connp->conn_zoneid, 3487 ilgstat, new_fmode, new_filter); 3488 if (error != 0) { 3489 /* 3490 * But because we waited, we have to undo the ilg update 3491 * if ip_addmulti_v6() fails. We also must lookup ilg 3492 * again, since we've not been holding conn_lock. 3493 */ 3494 in6_addr_t delsrc = 3495 (ilgstat == ILGSTAT_NEW) ? ipv6_all_zeros : *v6src; 3496 mutex_enter(&connp->conn_lock); 3497 ilg = ilg_lookup_ill_index_v6(connp, v6group, orig_ifindex); 3498 ASSERT(ilg != NULL); 3499 ilg_delete(connp, ilg, &delsrc); 3500 mutex_exit(&connp->conn_lock); 3501 l_free(new_filter); 3502 return (error); 3503 } 3504 3505 l_free(new_filter); 3506 3507 return (0); 3508 } 3509 3510 /* 3511 * Find an IPv4 ilg matching group, ill and source 3512 */ 3513 ilg_t * 3514 ilg_lookup_ill_withsrc(conn_t *connp, ipaddr_t group, ipaddr_t src, ill_t *ill) 3515 { 3516 in6_addr_t v6group, v6src; 3517 int i; 3518 boolean_t isinlist; 3519 ilg_t *ilg; 3520 ipif_t *ipif; 3521 ill_t *ilg_ill; 3522 3523 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3524 3525 /* 3526 * INADDR_ANY is represented as the IPv6 unspecified addr. 3527 */ 3528 if (group == INADDR_ANY) 3529 v6group = ipv6_all_zeros; 3530 else 3531 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 3532 3533 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3534 /* ilg_ipif is NULL for v6; skip them */ 3535 ilg = &connp->conn_ilg[i]; 3536 if ((ipif = ilg->ilg_ipif) == NULL) 3537 continue; 3538 ASSERT(ilg->ilg_ill == NULL); 3539 ilg_ill = ipif->ipif_ill; 3540 ASSERT(!ilg_ill->ill_isv6); 3541 if (ilg_ill == ill && 3542 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, &v6group)) { 3543 if (SLIST_IS_EMPTY(ilg->ilg_filter)) { 3544 /* no source filter, so this is a match */ 3545 return (ilg); 3546 } 3547 break; 3548 } 3549 } 3550 if (i == connp->conn_ilg_inuse) 3551 return (NULL); 3552 3553 /* 3554 * we have an ilg with matching ill and group; but 3555 * the ilg has a source list that we must check. 3556 */ 3557 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 3558 isinlist = B_FALSE; 3559 for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) { 3560 if (IN6_ARE_ADDR_EQUAL(&v6src, &ilg->ilg_filter->sl_addr[i])) { 3561 isinlist = B_TRUE; 3562 break; 3563 } 3564 } 3565 3566 if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) || 3567 (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE)) 3568 return (ilg); 3569 3570 return (NULL); 3571 } 3572 3573 /* 3574 * Find an IPv6 ilg matching group, ill, and source 3575 */ 3576 ilg_t * 3577 ilg_lookup_ill_withsrc_v6(conn_t *connp, const in6_addr_t *v6group, 3578 const in6_addr_t *v6src, ill_t *ill) 3579 { 3580 int i; 3581 boolean_t isinlist; 3582 ilg_t *ilg; 3583 ill_t *ilg_ill; 3584 3585 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3586 3587 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3588 ilg = &connp->conn_ilg[i]; 3589 if ((ilg_ill = ilg->ilg_ill) == NULL) 3590 continue; 3591 ASSERT(ilg->ilg_ipif == NULL); 3592 ASSERT(ilg_ill->ill_isv6); 3593 if (ilg_ill == ill && 3594 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) { 3595 if (SLIST_IS_EMPTY(ilg->ilg_filter)) { 3596 /* no source filter, so this is a match */ 3597 return (ilg); 3598 } 3599 break; 3600 } 3601 } 3602 if (i == connp->conn_ilg_inuse) 3603 return (NULL); 3604 3605 /* 3606 * we have an ilg with matching ill and group; but 3607 * the ilg has a source list that we must check. 3608 */ 3609 isinlist = B_FALSE; 3610 for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) { 3611 if (IN6_ARE_ADDR_EQUAL(v6src, &ilg->ilg_filter->sl_addr[i])) { 3612 isinlist = B_TRUE; 3613 break; 3614 } 3615 } 3616 3617 if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) || 3618 (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE)) 3619 return (ilg); 3620 3621 return (NULL); 3622 } 3623 3624 /* 3625 * Get the ilg whose ilg_orig_ifindex is associated with ifindex. 3626 * This is useful when the interface fails and we have moved 3627 * to a new ill, but still would like to locate using the index 3628 * that we originally used to join. Used only for IPv6 currently. 3629 */ 3630 static ilg_t * 3631 ilg_lookup_ill_index_v6(conn_t *connp, const in6_addr_t *v6group, int ifindex) 3632 { 3633 ilg_t *ilg; 3634 int i; 3635 3636 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3637 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3638 ilg = &connp->conn_ilg[i]; 3639 /* ilg_ill is NULL for V4. Skip them */ 3640 if (ilg->ilg_ill == NULL) 3641 continue; 3642 /* ilg_ipif is NULL for V6 */ 3643 ASSERT(ilg->ilg_ipif == NULL); 3644 ASSERT(ilg->ilg_orig_ifindex != 0); 3645 if (IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group) && 3646 ilg->ilg_orig_ifindex == ifindex) { 3647 return (ilg); 3648 } 3649 } 3650 return (NULL); 3651 } 3652 3653 /* 3654 * Find an IPv6 ilg matching group and ill 3655 */ 3656 ilg_t * 3657 ilg_lookup_ill_v6(conn_t *connp, const in6_addr_t *v6group, ill_t *ill) 3658 { 3659 ilg_t *ilg; 3660 int i; 3661 ill_t *mem_ill; 3662 3663 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3664 3665 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3666 ilg = &connp->conn_ilg[i]; 3667 if ((mem_ill = ilg->ilg_ill) == NULL) 3668 continue; 3669 ASSERT(ilg->ilg_ipif == NULL); 3670 ASSERT(mem_ill->ill_isv6); 3671 if (mem_ill == ill && 3672 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) 3673 return (ilg); 3674 } 3675 return (NULL); 3676 } 3677 3678 /* 3679 * Find an IPv4 ilg matching group and ipif 3680 */ 3681 static ilg_t * 3682 ilg_lookup_ipif(conn_t *connp, ipaddr_t group, ipif_t *ipif) 3683 { 3684 in6_addr_t v6group; 3685 int i; 3686 3687 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3688 ASSERT(!ipif->ipif_ill->ill_isv6); 3689 3690 if (group == INADDR_ANY) 3691 v6group = ipv6_all_zeros; 3692 else 3693 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 3694 3695 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3696 if (IN6_ARE_ADDR_EQUAL(&connp->conn_ilg[i].ilg_v6group, 3697 &v6group) && 3698 connp->conn_ilg[i].ilg_ipif == ipif) 3699 return (&connp->conn_ilg[i]); 3700 } 3701 return (NULL); 3702 } 3703 3704 /* 3705 * If a source address is passed in (src != NULL and src is not 3706 * unspecified), remove the specified src addr from the given ilg's 3707 * filter list, else delete the ilg. 3708 */ 3709 static void 3710 ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src) 3711 { 3712 int i; 3713 3714 ASSERT((ilg->ilg_ipif != NULL) ^ (ilg->ilg_ill != NULL)); 3715 ASSERT(ilg->ilg_ipif == NULL || IAM_WRITER_IPIF(ilg->ilg_ipif)); 3716 ASSERT(ilg->ilg_ill == NULL || IAM_WRITER_ILL(ilg->ilg_ill)); 3717 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3718 ASSERT(!(ilg->ilg_flags & ILG_DELETED)); 3719 3720 if (src == NULL || IN6_IS_ADDR_UNSPECIFIED(src)) { 3721 if (connp->conn_ilg_walker_cnt != 0) { 3722 ilg->ilg_flags |= ILG_DELETED; 3723 return; 3724 } 3725 3726 FREE_SLIST(ilg->ilg_filter); 3727 3728 i = ilg - &connp->conn_ilg[0]; 3729 ASSERT(i >= 0 && i < connp->conn_ilg_inuse); 3730 3731 /* Move other entries up one step */ 3732 connp->conn_ilg_inuse--; 3733 for (; i < connp->conn_ilg_inuse; i++) 3734 connp->conn_ilg[i] = connp->conn_ilg[i+1]; 3735 3736 if (connp->conn_ilg_inuse == 0) { 3737 mi_free((char *)connp->conn_ilg); 3738 connp->conn_ilg = NULL; 3739 cv_broadcast(&connp->conn_refcv); 3740 } 3741 } else { 3742 l_remove(ilg->ilg_filter, src); 3743 } 3744 } 3745 3746 /* 3747 * Called from conn close. No new ilg can be added or removed. 3748 * because CONN_CLOSING has been set by ip_close. ilg_add / ilg_delete 3749 * will return error if conn has started closing. 3750 */ 3751 void 3752 ilg_delete_all(conn_t *connp) 3753 { 3754 int i; 3755 ipif_t *ipif = NULL; 3756 ill_t *ill = NULL; 3757 ilg_t *ilg; 3758 in6_addr_t v6group; 3759 boolean_t success; 3760 ipsq_t *ipsq; 3761 int orig_ifindex; 3762 3763 mutex_enter(&connp->conn_lock); 3764 retry: 3765 ILG_WALKER_HOLD(connp); 3766 for (i = connp->conn_ilg_inuse - 1; i >= 0; ) { 3767 ilg = &connp->conn_ilg[i]; 3768 /* 3769 * Since this walk is not atomic (we drop the 3770 * conn_lock and wait in ipsq_enter) we need 3771 * to check for the ILG_DELETED flag. 3772 */ 3773 if (ilg->ilg_flags & ILG_DELETED) { 3774 /* Go to the next ilg */ 3775 i--; 3776 continue; 3777 } 3778 v6group = ilg->ilg_v6group; 3779 3780 if (IN6_IS_ADDR_V4MAPPED(&v6group)) { 3781 ipif = ilg->ilg_ipif; 3782 ill = ipif->ipif_ill; 3783 } else { 3784 ipif = NULL; 3785 ill = ilg->ilg_ill; 3786 } 3787 /* 3788 * We may not be able to refhold the ill if the ill/ipif 3789 * is changing. But we need to make sure that the ill will 3790 * not vanish. So we just bump up the ill_waiter count. 3791 * If we are unable to do even that, then the ill is closing, 3792 * in which case the unplumb thread will handle the cleanup, 3793 * and we move on to the next ilg. 3794 */ 3795 if (!ill_waiter_inc(ill)) { 3796 /* Go to the next ilg */ 3797 i--; 3798 continue; 3799 } 3800 mutex_exit(&connp->conn_lock); 3801 /* 3802 * To prevent deadlock between ill close which waits inside 3803 * the perimeter, and conn close, ipsq_enter returns error, 3804 * the moment ILL_CONDEMNED is set, in which case ill close 3805 * takes responsibility to cleanup the ilgs. Note that we 3806 * have not yet set condemned flag, otherwise the conn can't 3807 * be refheld for cleanup by those routines and it would be 3808 * a mutual deadlock. 3809 */ 3810 success = ipsq_enter(ill, B_FALSE); 3811 ipsq = ill->ill_phyint->phyint_ipsq; 3812 ill_waiter_dcr(ill); 3813 mutex_enter(&connp->conn_lock); 3814 if (!success) { 3815 /* Go to the next ilg */ 3816 i--; 3817 continue; 3818 } 3819 3820 /* 3821 * Make sure that nothing has changed under. For eg. 3822 * a failover/failback can change ilg_ill while we were 3823 * waiting to become exclusive above 3824 */ 3825 if (IN6_IS_ADDR_V4MAPPED(&v6group)) { 3826 ipif = ilg->ilg_ipif; 3827 ill = ipif->ipif_ill; 3828 } else { 3829 ipif = NULL; 3830 ill = ilg->ilg_ill; 3831 } 3832 if (!IAM_WRITER_ILL(ill) || (ilg->ilg_flags & ILG_DELETED)) { 3833 /* 3834 * The ilg has changed under us probably due 3835 * to a failover or unplumb. Retry on the same ilg. 3836 */ 3837 mutex_exit(&connp->conn_lock); 3838 ipsq_exit(ipsq, B_TRUE, B_TRUE); 3839 mutex_enter(&connp->conn_lock); 3840 continue; 3841 } 3842 v6group = ilg->ilg_v6group; 3843 orig_ifindex = ilg->ilg_orig_ifindex; 3844 ilg_delete(connp, ilg, NULL); 3845 mutex_exit(&connp->conn_lock); 3846 3847 if (ipif != NULL) 3848 (void) ip_delmulti(V4_PART_OF_V6(v6group), ipif, 3849 B_FALSE, B_TRUE); 3850 3851 else 3852 (void) ip_delmulti_v6(&v6group, ill, orig_ifindex, 3853 connp->conn_zoneid, B_FALSE, B_TRUE); 3854 3855 ipsq_exit(ipsq, B_TRUE, B_TRUE); 3856 mutex_enter(&connp->conn_lock); 3857 /* Go to the next ilg */ 3858 i--; 3859 } 3860 ILG_WALKER_RELE(connp); 3861 3862 /* If any ill was skipped above wait and retry */ 3863 if (connp->conn_ilg_inuse != 0) { 3864 cv_wait(&connp->conn_refcv, &connp->conn_lock); 3865 goto retry; 3866 } 3867 mutex_exit(&connp->conn_lock); 3868 } 3869 3870 /* 3871 * Called from ill close by ipcl_walk for clearing conn_ilg and 3872 * conn_multicast_ipif for a given ipif. conn is held by caller. 3873 * Note that ipcl_walk only walks conns that are not yet condemned. 3874 * condemned conns can't be refheld. For this reason, conn must become clean 3875 * first, i.e. it must not refer to any ill/ire/ipif and then only set 3876 * condemned flag. 3877 */ 3878 static void 3879 conn_delete_ipif(conn_t *connp, caddr_t arg) 3880 { 3881 ipif_t *ipif = (ipif_t *)arg; 3882 int i; 3883 char group_buf1[INET6_ADDRSTRLEN]; 3884 char group_buf2[INET6_ADDRSTRLEN]; 3885 ipaddr_t group; 3886 ilg_t *ilg; 3887 3888 /* 3889 * Even though conn_ilg_inuse can change while we are in this loop, 3890 * i.e.ilgs can be created or deleted on this connp, no new ilgs can 3891 * be created or deleted for this connp, on this ill, since this ill 3892 * is the perimeter. So we won't miss any ilg in this cleanup. 3893 */ 3894 mutex_enter(&connp->conn_lock); 3895 3896 /* 3897 * Increment the walker count, so that ilg repacking does not 3898 * occur while we are in the loop. 3899 */ 3900 ILG_WALKER_HOLD(connp); 3901 for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) { 3902 ilg = &connp->conn_ilg[i]; 3903 if (ilg->ilg_ipif != ipif || (ilg->ilg_flags & ILG_DELETED)) 3904 continue; 3905 /* 3906 * ip_close cannot be cleaning this ilg at the same time. 3907 * since it also has to execute in this ill's perimeter which 3908 * we are now holding. Only a clean conn can be condemned. 3909 */ 3910 ASSERT(!(connp->conn_state_flags & CONN_CONDEMNED)); 3911 3912 /* Blow away the membership */ 3913 ip1dbg(("conn_delete_ilg_ipif: %s on %s (%s)\n", 3914 inet_ntop(AF_INET6, &connp->conn_ilg[i].ilg_v6group, 3915 group_buf1, sizeof (group_buf1)), 3916 inet_ntop(AF_INET6, &ipif->ipif_v6lcl_addr, 3917 group_buf2, sizeof (group_buf2)), 3918 ipif->ipif_ill->ill_name)); 3919 3920 /* ilg_ipif is NULL for V6, so we won't be here */ 3921 ASSERT(IN6_IS_ADDR_V4MAPPED(&ilg->ilg_v6group)); 3922 3923 group = V4_PART_OF_V6(ilg->ilg_v6group); 3924 ilg_delete(connp, &connp->conn_ilg[i], NULL); 3925 mutex_exit(&connp->conn_lock); 3926 3927 (void) ip_delmulti(group, ipif, B_FALSE, B_TRUE); 3928 mutex_enter(&connp->conn_lock); 3929 } 3930 3931 /* 3932 * If we are the last walker, need to physically delete the 3933 * ilgs and repack. 3934 */ 3935 ILG_WALKER_RELE(connp); 3936 3937 if (connp->conn_multicast_ipif == ipif) { 3938 /* Revert to late binding */ 3939 connp->conn_multicast_ipif = NULL; 3940 } 3941 mutex_exit(&connp->conn_lock); 3942 3943 conn_delete_ire(connp, (caddr_t)ipif); 3944 } 3945 3946 /* 3947 * Called from ill close by ipcl_walk for clearing conn_ilg and 3948 * conn_multicast_ill for a given ill. conn is held by caller. 3949 * Note that ipcl_walk only walks conns that are not yet condemned. 3950 * condemned conns can't be refheld. For this reason, conn must become clean 3951 * first, i.e. it must not refer to any ill/ire/ipif and then only set 3952 * condemned flag. 3953 */ 3954 static void 3955 conn_delete_ill(conn_t *connp, caddr_t arg) 3956 { 3957 ill_t *ill = (ill_t *)arg; 3958 int i; 3959 char group_buf[INET6_ADDRSTRLEN]; 3960 in6_addr_t v6group; 3961 int orig_ifindex; 3962 ilg_t *ilg; 3963 3964 /* 3965 * Even though conn_ilg_inuse can change while we are in this loop, 3966 * no new ilgs can be created/deleted for this connp, on this 3967 * ill, since this ill is the perimeter. So we won't miss any ilg 3968 * in this cleanup. 3969 */ 3970 mutex_enter(&connp->conn_lock); 3971 3972 /* 3973 * Increment the walker count, so that ilg repacking does not 3974 * occur while we are in the loop. 3975 */ 3976 ILG_WALKER_HOLD(connp); 3977 for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) { 3978 ilg = &connp->conn_ilg[i]; 3979 if ((ilg->ilg_ill == ill) && !(ilg->ilg_flags & ILG_DELETED)) { 3980 /* 3981 * ip_close cannot be cleaning this ilg at the same 3982 * time, since it also has to execute in this ill's 3983 * perimeter which we are now holding. Only a clean 3984 * conn can be condemned. 3985 */ 3986 ASSERT(!(connp->conn_state_flags & CONN_CONDEMNED)); 3987 3988 /* Blow away the membership */ 3989 ip1dbg(("conn_delete_ilg_ill: %s on %s\n", 3990 inet_ntop(AF_INET6, &ilg->ilg_v6group, 3991 group_buf, sizeof (group_buf)), 3992 ill->ill_name)); 3993 3994 v6group = ilg->ilg_v6group; 3995 orig_ifindex = ilg->ilg_orig_ifindex; 3996 ilg_delete(connp, ilg, NULL); 3997 mutex_exit(&connp->conn_lock); 3998 3999 (void) ip_delmulti_v6(&v6group, ill, orig_ifindex, 4000 connp->conn_zoneid, B_FALSE, B_TRUE); 4001 mutex_enter(&connp->conn_lock); 4002 } 4003 } 4004 /* 4005 * If we are the last walker, need to physically delete the 4006 * ilgs and repack. 4007 */ 4008 ILG_WALKER_RELE(connp); 4009 4010 if (connp->conn_multicast_ill == ill) { 4011 /* Revert to late binding */ 4012 connp->conn_multicast_ill = NULL; 4013 connp->conn_orig_multicast_ifindex = 0; 4014 } 4015 mutex_exit(&connp->conn_lock); 4016 } 4017 4018 /* 4019 * Called when an ipif is unplumbed to make sure that there are no 4020 * dangling conn references to that ipif. 4021 * Handles ilg_ipif and conn_multicast_ipif 4022 */ 4023 void 4024 reset_conn_ipif(ipif) 4025 ipif_t *ipif; 4026 { 4027 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 4028 4029 ipcl_walk(conn_delete_ipif, (caddr_t)ipif, ipst); 4030 } 4031 4032 /* 4033 * Called when an ill is unplumbed to make sure that there are no 4034 * dangling conn references to that ill. 4035 * Handles ilg_ill, conn_multicast_ill. 4036 */ 4037 void 4038 reset_conn_ill(ill_t *ill) 4039 { 4040 ip_stack_t *ipst = ill->ill_ipst; 4041 4042 ipcl_walk(conn_delete_ill, (caddr_t)ill, ipst); 4043 } 4044 4045 #ifdef DEBUG 4046 /* 4047 * Walk functions walk all the interfaces in the system to make 4048 * sure that there is no refernece to the ipif or ill that is 4049 * going away. 4050 */ 4051 int 4052 ilm_walk_ill(ill_t *ill) 4053 { 4054 int cnt = 0; 4055 ill_t *till; 4056 ilm_t *ilm; 4057 ill_walk_context_t ctx; 4058 ip_stack_t *ipst = ill->ill_ipst; 4059 4060 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 4061 till = ILL_START_WALK_ALL(&ctx, ipst); 4062 for (; till != NULL; till = ill_next(&ctx, till)) { 4063 for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 4064 if (ilm->ilm_ill == ill) { 4065 cnt++; 4066 } 4067 } 4068 } 4069 rw_exit(&ipst->ips_ill_g_lock); 4070 4071 return (cnt); 4072 } 4073 4074 /* 4075 * This function is called before the ipif is freed. 4076 */ 4077 int 4078 ilm_walk_ipif(ipif_t *ipif) 4079 { 4080 int cnt = 0; 4081 ill_t *till; 4082 ilm_t *ilm; 4083 ill_walk_context_t ctx; 4084 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 4085 4086 till = ILL_START_WALK_ALL(&ctx, ipst); 4087 for (; till != NULL; till = ill_next(&ctx, till)) { 4088 for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 4089 if (ilm->ilm_ipif == ipif) { 4090 cnt++; 4091 } 4092 } 4093 } 4094 return (cnt); 4095 } 4096 #endif 4097