1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/stream.h> 31 #include <sys/dlpi.h> 32 #include <sys/stropts.h> 33 #include <sys/strsun.h> 34 #include <sys/ddi.h> 35 #include <sys/cmn_err.h> 36 #include <sys/sdt.h> 37 #include <sys/zone.h> 38 39 #include <sys/param.h> 40 #include <sys/socket.h> 41 #include <sys/sockio.h> 42 #include <net/if.h> 43 #include <sys/systm.h> 44 #include <net/route.h> 45 #include <netinet/in.h> 46 #include <net/if_dl.h> 47 #include <netinet/ip6.h> 48 #include <netinet/icmp6.h> 49 50 #include <inet/common.h> 51 #include <inet/mi.h> 52 #include <inet/nd.h> 53 #include <inet/arp.h> 54 #include <inet/ip.h> 55 #include <inet/ip6.h> 56 #include <inet/ip_if.h> 57 #include <inet/ip_ndp.h> 58 #include <inet/ip_multi.h> 59 #include <inet/ipclassifier.h> 60 #include <inet/ipsec_impl.h> 61 #include <inet/sctp_ip.h> 62 #include <inet/ip_listutils.h> 63 #include <inet/udp_impl.h> 64 65 /* igmpv3/mldv2 source filter manipulation */ 66 static void ilm_bld_flists(conn_t *conn, void *arg); 67 static void ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, 68 slist_t *flist); 69 70 static ilm_t *ilm_add_v6(ipif_t *ipif, const in6_addr_t *group, 71 ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist, 72 int orig_ifindex, zoneid_t zoneid); 73 static void ilm_delete(ilm_t *ilm); 74 static int ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *group); 75 static int ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *group); 76 static ilg_t *ilg_lookup_ill_index_v6(conn_t *connp, 77 const in6_addr_t *v6group, int index); 78 static ilg_t *ilg_lookup_ipif(conn_t *connp, ipaddr_t group, 79 ipif_t *ipif); 80 static int ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif, 81 mcast_record_t fmode, ipaddr_t src); 82 static int ilg_add_v6(conn_t *connp, const in6_addr_t *group, ill_t *ill, 83 mcast_record_t fmode, const in6_addr_t *v6src); 84 static void ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src); 85 static mblk_t *ill_create_dl(ill_t *ill, uint32_t dl_primitive, 86 uint32_t length, uint32_t *addr_lenp, uint32_t *addr_offp); 87 static mblk_t *ill_create_squery(ill_t *ill, ipaddr_t ipaddr, 88 uint32_t addrlen, uint32_t addroff, mblk_t *mp_tail); 89 static void conn_ilg_reap(conn_t *connp); 90 static int ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group, 91 ipif_t *ipif, mcast_record_t fmode, ipaddr_t src); 92 static int ip_opt_delete_group_excl_v6(conn_t *connp, 93 const in6_addr_t *v6group, ill_t *ill, mcast_record_t fmode, 94 const in6_addr_t *v6src); 95 96 /* 97 * MT notes: 98 * 99 * Multicast joins operate on both the ilg and ilm structures. Multiple 100 * threads operating on an conn (socket) trying to do multicast joins 101 * need to synchronize when operating on the ilg. Multiple threads 102 * potentially operating on different conn (socket endpoints) trying to 103 * do multicast joins could eventually end up trying to manipulate the 104 * ilm simulatenously and need to synchronize on the access to the ilm. 105 * Both are amenable to standard Solaris MT techniques, but it would be 106 * complex to handle a failover or failback which needs to manipulate 107 * ilg/ilms if an applications can also simultaenously join/leave 108 * multicast groups. Hence multicast join/leave also go through the ipsq_t 109 * serialization. 110 * 111 * Multicast joins and leaves are single-threaded per phyint/IPMP group 112 * using the ipsq serialization mechanism. 113 * 114 * An ilm is an IP data structure used to track multicast join/leave. 115 * An ilm is associated with a <multicast group, ipif> tuple in IPv4 and 116 * with just <multicast group> in IPv6. ilm_refcnt is the number of ilg's 117 * referencing the ilm. ilms are created / destroyed only as writer. ilms 118 * are not passed around, instead they are looked up and used under the 119 * ill_lock or as writer. So we don't need a dynamic refcount of the number 120 * of threads holding reference to an ilm. 121 * 122 * Multicast Join operation: 123 * 124 * The first step is to determine the ipif (v4) or ill (v6) on which 125 * the join operation is to be done. The join is done after becoming 126 * exclusive on the ipsq associated with the ipif or ill. The conn->conn_ilg 127 * and ill->ill_ilm are thus accessed and modified exclusively per ill. 128 * Multiple threads can attempt to join simultaneously on different ipif/ill 129 * on the same conn. In this case the ipsq serialization does not help in 130 * protecting the ilg. It is the conn_lock that is used to protect the ilg. 131 * The conn_lock also protects all the ilg_t members. 132 * 133 * Leave operation. 134 * 135 * Similar to the join operation, the first step is to determine the ipif 136 * or ill (v6) on which the leave operation is to be done. The leave operation 137 * is done after becoming exclusive on the ipsq associated with the ipif or ill. 138 * As with join ilg modification is done under the protection of the conn lock. 139 */ 140 141 #define IPSQ_ENTER_IPIF(ipif, connp, first_mp, func, ipsq, type) \ 142 ASSERT(connp != NULL); \ 143 (ipsq) = ipsq_try_enter((ipif), NULL, CONNP_TO_WQ(connp), \ 144 (first_mp), (func), (type), B_TRUE); \ 145 if ((ipsq) == NULL) { \ 146 ipif_refrele(ipif); \ 147 return (EINPROGRESS); \ 148 } 149 150 #define IPSQ_ENTER_ILL(ill, connp, first_mp, func, ipsq, type) \ 151 ASSERT(connp != NULL); \ 152 (ipsq) = ipsq_try_enter(NULL, ill, CONNP_TO_WQ(connp), \ 153 (first_mp), (func), (type), B_TRUE); \ 154 if ((ipsq) == NULL) { \ 155 ill_refrele(ill); \ 156 return (EINPROGRESS); \ 157 } 158 159 #define IPSQ_EXIT(ipsq) \ 160 if (ipsq != NULL) \ 161 ipsq_exit(ipsq, B_TRUE, B_TRUE); 162 163 #define ILG_WALKER_HOLD(connp) (connp)->conn_ilg_walker_cnt++ 164 165 #define ILG_WALKER_RELE(connp) \ 166 { \ 167 (connp)->conn_ilg_walker_cnt--; \ 168 if ((connp)->conn_ilg_walker_cnt == 0) \ 169 conn_ilg_reap(connp); \ 170 } 171 172 static void 173 conn_ilg_reap(conn_t *connp) 174 { 175 int to; 176 int from; 177 178 ASSERT(MUTEX_HELD(&connp->conn_lock)); 179 180 to = 0; 181 from = 0; 182 while (from < connp->conn_ilg_inuse) { 183 if (connp->conn_ilg[from].ilg_flags & ILG_DELETED) { 184 FREE_SLIST(connp->conn_ilg[from].ilg_filter); 185 from++; 186 continue; 187 } 188 if (to != from) 189 connp->conn_ilg[to] = connp->conn_ilg[from]; 190 to++; 191 from++; 192 } 193 194 connp->conn_ilg_inuse = to; 195 196 if (connp->conn_ilg_inuse == 0) { 197 mi_free((char *)connp->conn_ilg); 198 connp->conn_ilg = NULL; 199 cv_broadcast(&connp->conn_refcv); 200 } 201 } 202 203 #define GETSTRUCT(structure, number) \ 204 ((structure *)mi_zalloc(sizeof (structure) * (number))) 205 206 #define ILG_ALLOC_CHUNK 16 207 208 /* 209 * Returns a pointer to the next available ilg in conn_ilg. Allocs more 210 * buffers in size of ILG_ALLOC_CHUNK ilgs when needed, and updates conn's 211 * ilg tracking fields appropriately (conn_ilg_inuse reflects usage of the 212 * returned ilg). Returns NULL on failure (ENOMEM). 213 * 214 * Assumes connp->conn_lock is held. 215 */ 216 static ilg_t * 217 conn_ilg_alloc(conn_t *connp) 218 { 219 ilg_t *new; 220 int curcnt; 221 222 ASSERT(MUTEX_HELD(&connp->conn_lock)); 223 ASSERT(connp->conn_ilg_inuse <= connp->conn_ilg_allocated); 224 225 if (connp->conn_ilg == NULL) { 226 connp->conn_ilg = GETSTRUCT(ilg_t, ILG_ALLOC_CHUNK); 227 if (connp->conn_ilg == NULL) 228 return (NULL); 229 connp->conn_ilg_allocated = ILG_ALLOC_CHUNK; 230 connp->conn_ilg_inuse = 0; 231 } 232 if (connp->conn_ilg_inuse == connp->conn_ilg_allocated) { 233 curcnt = connp->conn_ilg_allocated; 234 new = GETSTRUCT(ilg_t, curcnt + ILG_ALLOC_CHUNK); 235 if (new == NULL) 236 return (NULL); 237 bcopy(connp->conn_ilg, new, sizeof (ilg_t) * curcnt); 238 mi_free((char *)connp->conn_ilg); 239 connp->conn_ilg = new; 240 connp->conn_ilg_allocated += ILG_ALLOC_CHUNK; 241 } 242 243 return (&connp->conn_ilg[connp->conn_ilg_inuse++]); 244 } 245 246 typedef struct ilm_fbld_s { 247 ilm_t *fbld_ilm; 248 int fbld_in_cnt; 249 int fbld_ex_cnt; 250 slist_t fbld_in; 251 slist_t fbld_ex; 252 boolean_t fbld_in_overflow; 253 } ilm_fbld_t; 254 255 static void 256 ilm_bld_flists(conn_t *conn, void *arg) 257 { 258 int i; 259 ilm_fbld_t *fbld = (ilm_fbld_t *)(arg); 260 ilm_t *ilm = fbld->fbld_ilm; 261 in6_addr_t *v6group = &ilm->ilm_v6addr; 262 263 if (conn->conn_ilg_inuse == 0) 264 return; 265 266 /* 267 * Since we can't break out of the ipcl_walk once started, we still 268 * have to look at every conn. But if we've already found one 269 * (EXCLUDE, NULL) list, there's no need to keep checking individual 270 * ilgs--that will be our state. 271 */ 272 if (fbld->fbld_ex_cnt > 0 && fbld->fbld_ex.sl_numsrc == 0) 273 return; 274 275 /* 276 * Check this conn's ilgs to see if any are interested in our 277 * ilm (group, interface match). If so, update the master 278 * include and exclude lists we're building in the fbld struct 279 * with this ilg's filter info. 280 */ 281 mutex_enter(&conn->conn_lock); 282 for (i = 0; i < conn->conn_ilg_inuse; i++) { 283 ilg_t *ilg = &conn->conn_ilg[i]; 284 if ((ilg->ilg_ill == ilm->ilm_ill) && 285 (ilg->ilg_ipif == ilm->ilm_ipif) && 286 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) { 287 if (ilg->ilg_fmode == MODE_IS_INCLUDE) { 288 fbld->fbld_in_cnt++; 289 if (!fbld->fbld_in_overflow) 290 l_union_in_a(&fbld->fbld_in, 291 ilg->ilg_filter, 292 &fbld->fbld_in_overflow); 293 } else { 294 fbld->fbld_ex_cnt++; 295 /* 296 * On the first exclude list, don't try to do 297 * an intersection, as the master exclude list 298 * is intentionally empty. If the master list 299 * is still empty on later iterations, that 300 * means we have at least one ilg with an empty 301 * exclude list, so that should be reflected 302 * when we take the intersection. 303 */ 304 if (fbld->fbld_ex_cnt == 1) { 305 if (ilg->ilg_filter != NULL) 306 l_copy(ilg->ilg_filter, 307 &fbld->fbld_ex); 308 } else { 309 l_intersection_in_a(&fbld->fbld_ex, 310 ilg->ilg_filter); 311 } 312 } 313 /* there will only be one match, so break now. */ 314 break; 315 } 316 } 317 mutex_exit(&conn->conn_lock); 318 } 319 320 static void 321 ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, slist_t *flist) 322 { 323 ilm_fbld_t fbld; 324 ip_stack_t *ipst = ilm->ilm_ipst; 325 326 fbld.fbld_ilm = ilm; 327 fbld.fbld_in_cnt = fbld.fbld_ex_cnt = 0; 328 fbld.fbld_in.sl_numsrc = fbld.fbld_ex.sl_numsrc = 0; 329 fbld.fbld_in_overflow = B_FALSE; 330 331 /* first, construct our master include and exclude lists */ 332 ipcl_walk(ilm_bld_flists, (caddr_t)&fbld, ipst); 333 334 /* now use those master lists to generate the interface filter */ 335 336 /* if include list overflowed, filter is (EXCLUDE, NULL) */ 337 if (fbld.fbld_in_overflow) { 338 *fmode = MODE_IS_EXCLUDE; 339 flist->sl_numsrc = 0; 340 return; 341 } 342 343 /* if nobody interested, interface filter is (INCLUDE, NULL) */ 344 if (fbld.fbld_in_cnt == 0 && fbld.fbld_ex_cnt == 0) { 345 *fmode = MODE_IS_INCLUDE; 346 flist->sl_numsrc = 0; 347 return; 348 } 349 350 /* 351 * If there are no exclude lists, then the interface filter 352 * is INCLUDE, with its filter list equal to fbld_in. A single 353 * exclude list makes the interface filter EXCLUDE, with its 354 * filter list equal to (fbld_ex - fbld_in). 355 */ 356 if (fbld.fbld_ex_cnt == 0) { 357 *fmode = MODE_IS_INCLUDE; 358 l_copy(&fbld.fbld_in, flist); 359 } else { 360 *fmode = MODE_IS_EXCLUDE; 361 l_difference(&fbld.fbld_ex, &fbld.fbld_in, flist); 362 } 363 } 364 365 /* 366 * If the given interface has failed, choose a new one to join on so 367 * that we continue to receive packets. ilg_orig_ifindex remembers 368 * what the application used to join on so that we know the ilg to 369 * delete even though we change the ill here. Callers will store the 370 * ilg returned from this function in ilg_ill. Thus when we receive 371 * a packet on ilg_ill, conn_wantpacket_v6 will deliver the packets. 372 * 373 * This function must be called as writer so we can walk the group 374 * list and examine flags without holding a lock. 375 */ 376 ill_t * 377 ip_choose_multi_ill(ill_t *ill, const in6_addr_t *grp) 378 { 379 ill_t *till; 380 ill_group_t *illgrp = ill->ill_group; 381 382 ASSERT(IAM_WRITER_ILL(ill)); 383 384 if (IN6_IS_ADDR_UNSPECIFIED(grp) || illgrp == NULL) 385 return (ill); 386 387 if ((ill->ill_phyint->phyint_flags & (PHYI_FAILED|PHYI_INACTIVE)) == 0) 388 return (ill); 389 390 till = illgrp->illgrp_ill; 391 while (till != NULL && 392 (till->ill_phyint->phyint_flags & (PHYI_FAILED|PHYI_INACTIVE))) { 393 till = till->ill_group_next; 394 } 395 if (till != NULL) 396 return (till); 397 398 return (ill); 399 } 400 401 static int 402 ilm_update_add(ilm_t *ilm, ilg_stat_t ilgstat, slist_t *ilg_flist, 403 boolean_t isv6) 404 { 405 mcast_record_t fmode; 406 slist_t *flist; 407 boolean_t fdefault; 408 char buf[INET6_ADDRSTRLEN]; 409 ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill; 410 411 /* 412 * There are several cases where the ilm's filter state 413 * defaults to (EXCLUDE, NULL): 414 * - we've had previous joins without associated ilgs 415 * - this join has no associated ilg 416 * - the ilg's filter state is (EXCLUDE, NULL) 417 */ 418 fdefault = (ilm->ilm_no_ilg_cnt > 0) || 419 (ilgstat == ILGSTAT_NONE) || SLIST_IS_EMPTY(ilg_flist); 420 421 /* attempt mallocs (if needed) before doing anything else */ 422 if ((flist = l_alloc()) == NULL) 423 return (ENOMEM); 424 if (!fdefault && ilm->ilm_filter == NULL) { 425 ilm->ilm_filter = l_alloc(); 426 if (ilm->ilm_filter == NULL) { 427 l_free(flist); 428 return (ENOMEM); 429 } 430 } 431 432 if (ilgstat != ILGSTAT_CHANGE) 433 ilm->ilm_refcnt++; 434 435 if (ilgstat == ILGSTAT_NONE) 436 ilm->ilm_no_ilg_cnt++; 437 438 /* 439 * Determine new filter state. If it's not the default 440 * (EXCLUDE, NULL), we must walk the conn list to find 441 * any ilgs interested in this group, and re-build the 442 * ilm filter. 443 */ 444 if (fdefault) { 445 fmode = MODE_IS_EXCLUDE; 446 flist->sl_numsrc = 0; 447 } else { 448 ilm_gen_filter(ilm, &fmode, flist); 449 } 450 451 /* make sure state actually changed; nothing to do if not. */ 452 if ((ilm->ilm_fmode == fmode) && 453 !lists_are_different(ilm->ilm_filter, flist)) { 454 l_free(flist); 455 return (0); 456 } 457 458 /* send the state change report */ 459 if (!IS_LOOPBACK(ill)) { 460 if (isv6) 461 mld_statechange(ilm, fmode, flist); 462 else 463 igmp_statechange(ilm, fmode, flist); 464 } 465 466 /* update the ilm state */ 467 ilm->ilm_fmode = fmode; 468 if (flist->sl_numsrc > 0) 469 l_copy(flist, ilm->ilm_filter); 470 else 471 CLEAR_SLIST(ilm->ilm_filter); 472 473 ip1dbg(("ilm_update: new if filter mode %d, group %s\n", ilm->ilm_fmode, 474 inet_ntop(AF_INET6, &ilm->ilm_v6addr, buf, sizeof (buf)))); 475 476 l_free(flist); 477 return (0); 478 } 479 480 static int 481 ilm_update_del(ilm_t *ilm, boolean_t isv6) 482 { 483 mcast_record_t fmode; 484 slist_t *flist; 485 ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill; 486 487 ip1dbg(("ilm_update_del: still %d left; updating state\n", 488 ilm->ilm_refcnt)); 489 490 if ((flist = l_alloc()) == NULL) 491 return (ENOMEM); 492 493 /* 494 * If present, the ilg in question has already either been 495 * updated or removed from our list; so all we need to do 496 * now is walk the list to update the ilm filter state. 497 * 498 * Skip the list walk if we have any no-ilg joins, which 499 * cause the filter state to revert to (EXCLUDE, NULL). 500 */ 501 if (ilm->ilm_no_ilg_cnt != 0) { 502 fmode = MODE_IS_EXCLUDE; 503 flist->sl_numsrc = 0; 504 } else { 505 ilm_gen_filter(ilm, &fmode, flist); 506 } 507 508 /* check to see if state needs to be updated */ 509 if ((ilm->ilm_fmode == fmode) && 510 (!lists_are_different(ilm->ilm_filter, flist))) { 511 l_free(flist); 512 return (0); 513 } 514 515 if (!IS_LOOPBACK(ill)) { 516 if (isv6) 517 mld_statechange(ilm, fmode, flist); 518 else 519 igmp_statechange(ilm, fmode, flist); 520 } 521 522 ilm->ilm_fmode = fmode; 523 if (flist->sl_numsrc > 0) { 524 if (ilm->ilm_filter == NULL) { 525 ilm->ilm_filter = l_alloc(); 526 if (ilm->ilm_filter == NULL) { 527 char buf[INET6_ADDRSTRLEN]; 528 ip1dbg(("ilm_update_del: failed to alloc ilm " 529 "filter; no source filtering for %s on %s", 530 inet_ntop(AF_INET6, &ilm->ilm_v6addr, 531 buf, sizeof (buf)), ill->ill_name)); 532 ilm->ilm_fmode = MODE_IS_EXCLUDE; 533 l_free(flist); 534 return (0); 535 } 536 } 537 l_copy(flist, ilm->ilm_filter); 538 } else { 539 CLEAR_SLIST(ilm->ilm_filter); 540 } 541 542 l_free(flist); 543 return (0); 544 } 545 546 /* 547 * INADDR_ANY means all multicast addresses. This is only used 548 * by the multicast router. 549 * INADDR_ANY is stored as IPv6 unspecified addr. 550 */ 551 int 552 ip_addmulti(ipaddr_t group, ipif_t *ipif, ilg_stat_t ilgstat, 553 mcast_record_t ilg_fmode, slist_t *ilg_flist) 554 { 555 ill_t *ill = ipif->ipif_ill; 556 ilm_t *ilm; 557 in6_addr_t v6group; 558 int ret; 559 560 ASSERT(IAM_WRITER_IPIF(ipif)); 561 562 if (!CLASSD(group) && group != INADDR_ANY) 563 return (EINVAL); 564 565 /* 566 * INADDR_ANY is represented as the IPv6 unspecifed addr. 567 */ 568 if (group == INADDR_ANY) 569 v6group = ipv6_all_zeros; 570 else 571 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 572 573 ilm = ilm_lookup_ipif(ipif, group); 574 if (ilm != NULL) 575 return (ilm_update_add(ilm, ilgstat, ilg_flist, B_FALSE)); 576 577 /* 578 * ilms are associated with ipifs in IPv4. It moves with the 579 * ipif if the ipif moves to a new ill when the interface 580 * fails. Thus we really don't check whether the ipif_ill 581 * has failed like in IPv6. If it has FAILED the ipif 582 * will move (daemon will move it) and hence the ilm, if the 583 * ipif is not IPIF_NOFAILOVER. For the IPIF_NOFAILOVER ipifs, 584 * we continue to receive in the same place even if the 585 * interface fails. 586 */ 587 ilm = ilm_add_v6(ipif, &v6group, ilgstat, ilg_fmode, ilg_flist, 588 ill->ill_phyint->phyint_ifindex, ipif->ipif_zoneid); 589 if (ilm == NULL) 590 return (ENOMEM); 591 592 if (group == INADDR_ANY) { 593 /* 594 * Check how many ipif's have members in this group - 595 * if more then one we should not tell the driver to join 596 * this time 597 */ 598 if (ilm_numentries_v6(ill, &v6group) > 1) 599 return (0); 600 if (ill->ill_group == NULL) 601 ret = ip_join_allmulti(ipif); 602 else 603 ret = ill_nominate_mcast_rcv(ill->ill_group); 604 if (ret != 0) 605 ilm_delete(ilm); 606 return (ret); 607 } 608 609 if (!IS_LOOPBACK(ill)) 610 igmp_joingroup(ilm); 611 612 if (ilm_numentries_v6(ill, &v6group) > 1) 613 return (0); 614 615 ret = ip_ll_addmulti_v6(ipif, &v6group); 616 if (ret != 0) 617 ilm_delete(ilm); 618 return (ret); 619 } 620 621 /* 622 * The unspecified address means all multicast addresses. 623 * This is only used by the multicast router. 624 * 625 * ill identifies the interface to join on; it may not match the 626 * interface requested by the application of a failover has taken 627 * place. orig_ifindex always identifies the interface requested 628 * by the app. 629 * 630 * ilgstat tells us if there's an ilg associated with this join, 631 * and if so, if it's a new ilg or a change to an existing one. 632 * ilg_fmode and ilg_flist give us the current filter state of 633 * the ilg (and will be EXCLUDE {NULL} in the case of no ilg). 634 */ 635 int 636 ip_addmulti_v6(const in6_addr_t *v6group, ill_t *ill, int orig_ifindex, 637 zoneid_t zoneid, ilg_stat_t ilgstat, mcast_record_t ilg_fmode, 638 slist_t *ilg_flist) 639 { 640 ilm_t *ilm; 641 int ret; 642 643 ASSERT(IAM_WRITER_ILL(ill)); 644 645 if (!IN6_IS_ADDR_MULTICAST(v6group) && 646 !IN6_IS_ADDR_UNSPECIFIED(v6group)) { 647 return (EINVAL); 648 } 649 650 /* 651 * An ilm is uniquely identified by the tuple of (group, ill, 652 * orig_ill). group is the multicast group address, ill is 653 * the interface on which it is currently joined, and orig_ill 654 * is the interface on which the application requested the 655 * join. orig_ill and ill are the same unless orig_ill has 656 * failed over. 657 * 658 * Both orig_ill and ill are required, which means we may have 659 * 2 ilms on an ill for the same group, but with different 660 * orig_ills. These must be kept separate, so that when failback 661 * occurs, the appropriate ilms are moved back to their orig_ill 662 * without disrupting memberships on the ill to which they had 663 * been moved. 664 * 665 * In order to track orig_ill, we store orig_ifindex in the 666 * ilm and ilg. 667 */ 668 ilm = ilm_lookup_ill_index_v6(ill, v6group, orig_ifindex, zoneid); 669 if (ilm != NULL) 670 return (ilm_update_add(ilm, ilgstat, ilg_flist, B_TRUE)); 671 672 /* 673 * We need to remember where the application really wanted 674 * to join. This will be used later if we want to failback 675 * to the original interface. 676 */ 677 ilm = ilm_add_v6(ill->ill_ipif, v6group, ilgstat, ilg_fmode, 678 ilg_flist, orig_ifindex, zoneid); 679 if (ilm == NULL) 680 return (ENOMEM); 681 682 if (IN6_IS_ADDR_UNSPECIFIED(v6group)) { 683 /* 684 * Check how many ipif's that have members in this group - 685 * if more then one we should not tell the driver to join 686 * this time 687 */ 688 if (ilm_numentries_v6(ill, v6group) > 1) 689 return (0); 690 if (ill->ill_group == NULL) 691 ret = ip_join_allmulti(ill->ill_ipif); 692 else 693 ret = ill_nominate_mcast_rcv(ill->ill_group); 694 695 if (ret != 0) 696 ilm_delete(ilm); 697 return (ret); 698 } 699 700 if (!IS_LOOPBACK(ill)) 701 mld_joingroup(ilm); 702 703 /* 704 * If we have more then one we should not tell the driver 705 * to join this time. 706 */ 707 if (ilm_numentries_v6(ill, v6group) > 1) 708 return (0); 709 710 ret = ip_ll_addmulti_v6(ill->ill_ipif, v6group); 711 if (ret != 0) 712 ilm_delete(ilm); 713 return (ret); 714 } 715 716 /* 717 * Send a multicast request to the driver for enabling multicast reception 718 * for v6groupp address. The caller has already checked whether it is 719 * appropriate to send one or not. 720 */ 721 int 722 ip_ll_send_enabmulti_req(ill_t *ill, const in6_addr_t *v6groupp) 723 { 724 mblk_t *mp; 725 uint32_t addrlen, addroff; 726 char group_buf[INET6_ADDRSTRLEN]; 727 728 ASSERT(IAM_WRITER_ILL(ill)); 729 730 /* 731 * Create a AR_ENTRY_SQUERY message with a dl_enabmulti_req tacked 732 * on. 733 */ 734 mp = ill_create_dl(ill, DL_ENABMULTI_REQ, sizeof (dl_enabmulti_req_t), 735 &addrlen, &addroff); 736 if (!mp) 737 return (ENOMEM); 738 if (IN6_IS_ADDR_V4MAPPED(v6groupp)) { 739 ipaddr_t v4group; 740 741 IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group); 742 /* 743 * NOTE!!! 744 * The "addroff" passed in here was calculated by 745 * ill_create_dl(), and will be used by ill_create_squery() 746 * to perform some twisted coding magic. It is the offset 747 * into the dl_xxx_req of the hw addr. Here, it will be 748 * added to b_wptr - b_rptr to create a magic number that 749 * is not an offset into this squery mblk. 750 * The actual hardware address will be accessed only in the 751 * dl_xxx_req, not in the squery. More importantly, 752 * that hardware address can *only* be accessed in this 753 * mblk chain by calling mi_offset_param_c(), which uses 754 * the magic number in the squery hw offset field to go 755 * to the *next* mblk (the dl_xxx_req), subtract the 756 * (b_wptr - b_rptr), and find the actual offset into 757 * the dl_xxx_req. 758 * Any method that depends on using the 759 * offset field in the dl_disabmulti_req or squery 760 * to find either hardware address will similarly fail. 761 * 762 * Look in ar_entry_squery() in arp.c to see how this offset 763 * is used. 764 */ 765 mp = ill_create_squery(ill, v4group, addrlen, addroff, mp); 766 if (!mp) 767 return (ENOMEM); 768 ip1dbg(("ip_ll_send_enabmulti_req: IPv4 putnext %s on %s\n", 769 inet_ntop(AF_INET6, v6groupp, group_buf, 770 sizeof (group_buf)), 771 ill->ill_name)); 772 putnext(ill->ill_rq, mp); 773 } else { 774 ip1dbg(("ip_ll_send_enabmulti_req: IPv6 ndp_mcastreq %s on" 775 " %s\n", 776 inet_ntop(AF_INET6, v6groupp, group_buf, 777 sizeof (group_buf)), 778 ill->ill_name)); 779 return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp)); 780 } 781 return (0); 782 } 783 784 /* 785 * Send a multicast request to the driver for enabling multicast 786 * membership for v6group if appropriate. 787 */ 788 static int 789 ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *v6groupp) 790 { 791 ill_t *ill = ipif->ipif_ill; 792 793 ASSERT(IAM_WRITER_IPIF(ipif)); 794 795 if (ill->ill_net_type != IRE_IF_RESOLVER || 796 ipif->ipif_flags & IPIF_POINTOPOINT) { 797 ip1dbg(("ip_ll_addmulti_v6: not resolver\n")); 798 return (0); /* Must be IRE_IF_NORESOLVER */ 799 } 800 801 if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) { 802 ip1dbg(("ip_ll_addmulti_v6: MULTI_BCAST\n")); 803 return (0); 804 } 805 if (!ill->ill_dl_up) { 806 /* 807 * Nobody there. All multicast addresses will be re-joined 808 * when we get the DL_BIND_ACK bringing the interface up. 809 */ 810 ip1dbg(("ip_ll_addmulti_v6: nobody up\n")); 811 return (0); 812 } 813 return (ip_ll_send_enabmulti_req(ill, v6groupp)); 814 } 815 816 /* 817 * INADDR_ANY means all multicast addresses. This is only used 818 * by the multicast router. 819 * INADDR_ANY is stored as the IPv6 unspecifed addr. 820 */ 821 int 822 ip_delmulti(ipaddr_t group, ipif_t *ipif, boolean_t no_ilg, boolean_t leaving) 823 { 824 ill_t *ill = ipif->ipif_ill; 825 ilm_t *ilm; 826 in6_addr_t v6group; 827 int ret; 828 829 ASSERT(IAM_WRITER_IPIF(ipif)); 830 831 if (!CLASSD(group) && group != INADDR_ANY) 832 return (EINVAL); 833 834 /* 835 * INADDR_ANY is represented as the IPv6 unspecifed addr. 836 */ 837 if (group == INADDR_ANY) 838 v6group = ipv6_all_zeros; 839 else 840 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 841 842 /* 843 * Look for a match on the ipif. 844 * (IP_DROP_MEMBERSHIP specifies an ipif using an IP address). 845 */ 846 ilm = ilm_lookup_ipif(ipif, group); 847 if (ilm == NULL) 848 return (ENOENT); 849 850 /* Update counters */ 851 if (no_ilg) 852 ilm->ilm_no_ilg_cnt--; 853 854 if (leaving) 855 ilm->ilm_refcnt--; 856 857 if (ilm->ilm_refcnt > 0) 858 return (ilm_update_del(ilm, B_FALSE)); 859 860 if (group == INADDR_ANY) { 861 ilm_delete(ilm); 862 /* 863 * Check how many ipif's that have members in this group - 864 * if there are still some left then don't tell the driver 865 * to drop it. 866 */ 867 if (ilm_numentries_v6(ill, &v6group) != 0) 868 return (0); 869 870 /* 871 * If we never joined, then don't leave. This can happen 872 * if we're in an IPMP group, since only one ill per IPMP 873 * group receives all multicast packets. 874 */ 875 if (!ill->ill_join_allmulti) { 876 ASSERT(ill->ill_group != NULL); 877 return (0); 878 } 879 880 ret = ip_leave_allmulti(ipif); 881 if (ill->ill_group != NULL) 882 (void) ill_nominate_mcast_rcv(ill->ill_group); 883 return (ret); 884 } 885 886 if (!IS_LOOPBACK(ill)) 887 igmp_leavegroup(ilm); 888 889 ilm_delete(ilm); 890 /* 891 * Check how many ipif's that have members in this group - 892 * if there are still some left then don't tell the driver 893 * to drop it. 894 */ 895 if (ilm_numentries_v6(ill, &v6group) != 0) 896 return (0); 897 return (ip_ll_delmulti_v6(ipif, &v6group)); 898 } 899 900 /* 901 * The unspecified address means all multicast addresses. 902 * This is only used by the multicast router. 903 */ 904 int 905 ip_delmulti_v6(const in6_addr_t *v6group, ill_t *ill, int orig_ifindex, 906 zoneid_t zoneid, boolean_t no_ilg, boolean_t leaving) 907 { 908 ipif_t *ipif; 909 ilm_t *ilm; 910 int ret; 911 912 ASSERT(IAM_WRITER_ILL(ill)); 913 914 if (!IN6_IS_ADDR_MULTICAST(v6group) && 915 !IN6_IS_ADDR_UNSPECIFIED(v6group)) 916 return (EINVAL); 917 918 /* 919 * Look for a match on the ill. 920 * (IPV6_LEAVE_GROUP specifies an ill using an ifindex). 921 * 922 * Similar to ip_addmulti_v6, we should always look using 923 * the orig_ifindex. 924 * 925 * 1) If orig_ifindex is different from ill's ifindex 926 * we should have an ilm with orig_ifindex created in 927 * ip_addmulti_v6. We should delete that here. 928 * 929 * 2) If orig_ifindex is same as ill's ifindex, we should 930 * not delete the ilm that is temporarily here because of 931 * a FAILOVER. Those ilms will have a ilm_orig_ifindex 932 * different from ill's ifindex. 933 * 934 * Thus, always lookup using orig_ifindex. 935 */ 936 ilm = ilm_lookup_ill_index_v6(ill, v6group, orig_ifindex, zoneid); 937 if (ilm == NULL) 938 return (ENOENT); 939 940 ASSERT(ilm->ilm_ill == ill); 941 942 ipif = ill->ill_ipif; 943 944 /* Update counters */ 945 if (no_ilg) 946 ilm->ilm_no_ilg_cnt--; 947 948 if (leaving) 949 ilm->ilm_refcnt--; 950 951 if (ilm->ilm_refcnt > 0) 952 return (ilm_update_del(ilm, B_TRUE)); 953 954 if (IN6_IS_ADDR_UNSPECIFIED(v6group)) { 955 ilm_delete(ilm); 956 /* 957 * Check how many ipif's that have members in this group - 958 * if there are still some left then don't tell the driver 959 * to drop it. 960 */ 961 if (ilm_numentries_v6(ill, v6group) != 0) 962 return (0); 963 964 /* 965 * If we never joined, then don't leave. This can happen 966 * if we're in an IPMP group, since only one ill per IPMP 967 * group receives all multicast packets. 968 */ 969 if (!ill->ill_join_allmulti) { 970 ASSERT(ill->ill_group != NULL); 971 return (0); 972 } 973 974 ret = ip_leave_allmulti(ipif); 975 if (ill->ill_group != NULL) 976 (void) ill_nominate_mcast_rcv(ill->ill_group); 977 return (ret); 978 } 979 980 if (!IS_LOOPBACK(ill)) 981 mld_leavegroup(ilm); 982 983 ilm_delete(ilm); 984 /* 985 * Check how many ipif's that have members in this group - 986 * if there are still some left then don't tell the driver 987 * to drop it. 988 */ 989 if (ilm_numentries_v6(ill, v6group) != 0) 990 return (0); 991 return (ip_ll_delmulti_v6(ipif, v6group)); 992 } 993 994 /* 995 * Send a multicast request to the driver for disabling multicast reception 996 * for v6groupp address. The caller has already checked whether it is 997 * appropriate to send one or not. 998 */ 999 int 1000 ip_ll_send_disabmulti_req(ill_t *ill, const in6_addr_t *v6groupp) 1001 { 1002 mblk_t *mp; 1003 char group_buf[INET6_ADDRSTRLEN]; 1004 uint32_t addrlen, addroff; 1005 1006 ASSERT(IAM_WRITER_ILL(ill)); 1007 /* 1008 * Create a AR_ENTRY_SQUERY message with a dl_disabmulti_req tacked 1009 * on. 1010 */ 1011 mp = ill_create_dl(ill, DL_DISABMULTI_REQ, 1012 sizeof (dl_disabmulti_req_t), &addrlen, &addroff); 1013 1014 if (!mp) 1015 return (ENOMEM); 1016 1017 if (IN6_IS_ADDR_V4MAPPED(v6groupp)) { 1018 ipaddr_t v4group; 1019 1020 IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group); 1021 /* 1022 * NOTE!!! 1023 * The "addroff" passed in here was calculated by 1024 * ill_create_dl(), and will be used by ill_create_squery() 1025 * to perform some twisted coding magic. It is the offset 1026 * into the dl_xxx_req of the hw addr. Here, it will be 1027 * added to b_wptr - b_rptr to create a magic number that 1028 * is not an offset into this mblk. 1029 * 1030 * Please see the comment in ip_ll_send)enabmulti_req() 1031 * for a complete explanation. 1032 * 1033 * Look in ar_entry_squery() in arp.c to see how this offset 1034 * is used. 1035 */ 1036 mp = ill_create_squery(ill, v4group, addrlen, addroff, mp); 1037 if (!mp) 1038 return (ENOMEM); 1039 ip1dbg(("ip_ll_send_disabmulti_req: IPv4 putnext %s on %s\n", 1040 inet_ntop(AF_INET6, v6groupp, group_buf, 1041 sizeof (group_buf)), 1042 ill->ill_name)); 1043 putnext(ill->ill_rq, mp); 1044 } else { 1045 ip1dbg(("ip_ll_send_disabmulti_req: IPv6 ndp_mcastreq %s on" 1046 " %s\n", 1047 inet_ntop(AF_INET6, v6groupp, group_buf, 1048 sizeof (group_buf)), 1049 ill->ill_name)); 1050 return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp)); 1051 } 1052 return (0); 1053 } 1054 1055 /* 1056 * Send a multicast request to the driver for disabling multicast 1057 * membership for v6group if appropriate. 1058 */ 1059 static int 1060 ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *v6group) 1061 { 1062 ill_t *ill = ipif->ipif_ill; 1063 1064 ASSERT(IAM_WRITER_IPIF(ipif)); 1065 1066 if (ill->ill_net_type != IRE_IF_RESOLVER || 1067 ipif->ipif_flags & IPIF_POINTOPOINT) { 1068 return (0); /* Must be IRE_IF_NORESOLVER */ 1069 } 1070 if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) { 1071 ip1dbg(("ip_ll_delmulti_v6: MULTI_BCAST\n")); 1072 return (0); 1073 } 1074 if (!ill->ill_dl_up) { 1075 /* 1076 * Nobody there. All multicast addresses will be re-joined 1077 * when we get the DL_BIND_ACK bringing the interface up. 1078 */ 1079 ip1dbg(("ip_ll_delmulti_v6: nobody up\n")); 1080 return (0); 1081 } 1082 return (ip_ll_send_disabmulti_req(ill, v6group)); 1083 } 1084 1085 /* 1086 * Make the driver pass up all multicast packets 1087 * 1088 * With ill groups, the caller makes sure that there is only 1089 * one ill joining the allmulti group. 1090 */ 1091 int 1092 ip_join_allmulti(ipif_t *ipif) 1093 { 1094 ill_t *ill = ipif->ipif_ill; 1095 mblk_t *mp; 1096 uint32_t addrlen, addroff; 1097 1098 ASSERT(IAM_WRITER_IPIF(ipif)); 1099 1100 if (!ill->ill_dl_up) { 1101 /* 1102 * Nobody there. All multicast addresses will be re-joined 1103 * when we get the DL_BIND_ACK bringing the interface up. 1104 */ 1105 return (0); 1106 } 1107 1108 ASSERT(!ill->ill_join_allmulti); 1109 1110 /* 1111 * Create a DL_PROMISCON_REQ message and send it directly to 1112 * the DLPI provider. We don't need to do this for certain 1113 * media types for which we never need to turn promiscuous 1114 * mode on. 1115 */ 1116 if ((ill->ill_net_type == IRE_IF_RESOLVER) && 1117 !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) { 1118 mp = ill_create_dl(ill, DL_PROMISCON_REQ, 1119 sizeof (dl_promiscon_req_t), &addrlen, &addroff); 1120 if (mp == NULL) 1121 return (ENOMEM); 1122 ill_dlpi_send(ill, mp); 1123 } 1124 1125 mutex_enter(&ill->ill_lock); 1126 ill->ill_join_allmulti = B_TRUE; 1127 mutex_exit(&ill->ill_lock); 1128 return (0); 1129 } 1130 1131 /* 1132 * Make the driver stop passing up all multicast packets 1133 * 1134 * With ill groups, we need to nominate some other ill as 1135 * this ipif->ipif_ill is leaving the group. 1136 */ 1137 int 1138 ip_leave_allmulti(ipif_t *ipif) 1139 { 1140 ill_t *ill = ipif->ipif_ill; 1141 mblk_t *mp; 1142 uint32_t addrlen, addroff; 1143 1144 ASSERT(IAM_WRITER_IPIF(ipif)); 1145 1146 if (!ill->ill_dl_up) { 1147 /* 1148 * Nobody there. All multicast addresses will be re-joined 1149 * when we get the DL_BIND_ACK bringing the interface up. 1150 */ 1151 return (0); 1152 } 1153 1154 ASSERT(ill->ill_join_allmulti); 1155 1156 /* 1157 * Create a DL_PROMISCOFF_REQ message and send it directly to 1158 * the DLPI provider. We don't need to do this for certain 1159 * media types for which we never need to turn promiscuous 1160 * mode on. 1161 */ 1162 if ((ill->ill_net_type == IRE_IF_RESOLVER) && 1163 !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) { 1164 mp = ill_create_dl(ill, DL_PROMISCOFF_REQ, 1165 sizeof (dl_promiscoff_req_t), &addrlen, &addroff); 1166 if (mp == NULL) 1167 return (ENOMEM); 1168 ill_dlpi_send(ill, mp); 1169 } 1170 1171 mutex_enter(&ill->ill_lock); 1172 ill->ill_join_allmulti = B_FALSE; 1173 mutex_exit(&ill->ill_lock); 1174 return (0); 1175 } 1176 1177 /* 1178 * Copy mp_orig and pass it in as a local message. 1179 */ 1180 void 1181 ip_multicast_loopback(queue_t *q, ill_t *ill, mblk_t *mp_orig, int fanout_flags, 1182 zoneid_t zoneid) 1183 { 1184 mblk_t *mp; 1185 mblk_t *ipsec_mp; 1186 ipha_t *iph; 1187 ip_stack_t *ipst = ill->ill_ipst; 1188 1189 if (DB_TYPE(mp_orig) == M_DATA && 1190 ((ipha_t *)mp_orig->b_rptr)->ipha_protocol == IPPROTO_UDP) { 1191 uint_t hdrsz; 1192 1193 hdrsz = IPH_HDR_LENGTH((ipha_t *)mp_orig->b_rptr) + 1194 sizeof (udpha_t); 1195 ASSERT(MBLKL(mp_orig) >= hdrsz); 1196 1197 if (((mp = allocb(hdrsz, BPRI_MED)) != NULL) && 1198 (mp_orig = dupmsg(mp_orig)) != NULL) { 1199 bcopy(mp_orig->b_rptr, mp->b_rptr, hdrsz); 1200 mp->b_wptr += hdrsz; 1201 mp->b_cont = mp_orig; 1202 mp_orig->b_rptr += hdrsz; 1203 if (MBLKL(mp_orig) == 0) { 1204 mp->b_cont = mp_orig->b_cont; 1205 mp_orig->b_cont = NULL; 1206 freeb(mp_orig); 1207 } 1208 } else if (mp != NULL) { 1209 freeb(mp); 1210 mp = NULL; 1211 } 1212 } else { 1213 mp = ip_copymsg(mp_orig); /* No refcnt on ipsec_out netstack */ 1214 } 1215 1216 if (mp == NULL) 1217 return; 1218 if (DB_TYPE(mp) == M_CTL) { 1219 ipsec_mp = mp; 1220 mp = mp->b_cont; 1221 } else { 1222 ipsec_mp = mp; 1223 } 1224 1225 iph = (ipha_t *)mp->b_rptr; 1226 1227 DTRACE_PROBE4(ip4__loopback__out__start, 1228 ill_t *, NULL, ill_t *, ill, 1229 ipha_t *, iph, mblk_t *, ipsec_mp); 1230 1231 FW_HOOKS(ipst->ips_ip4_loopback_out_event, 1232 ipst->ips_ipv4firewall_loopback_out, 1233 NULL, ill, iph, ipsec_mp, mp, ipst); 1234 1235 DTRACE_PROBE1(ip4__loopback__out__end, mblk_t *, ipsec_mp); 1236 1237 if (ipsec_mp != NULL) 1238 ip_wput_local(q, ill, iph, ipsec_mp, NULL, 1239 fanout_flags, zoneid); 1240 } 1241 1242 static area_t ip_aresq_template = { 1243 AR_ENTRY_SQUERY, /* cmd */ 1244 sizeof (area_t)+IP_ADDR_LEN, /* name offset */ 1245 sizeof (area_t), /* name len (filled by ill_arp_alloc) */ 1246 IP_ARP_PROTO_TYPE, /* protocol, from arps perspective */ 1247 sizeof (area_t), /* proto addr offset */ 1248 IP_ADDR_LEN, /* proto addr_length */ 1249 0, /* proto mask offset */ 1250 /* Rest is initialized when used */ 1251 0, /* flags */ 1252 0, /* hw addr offset */ 1253 0, /* hw addr length */ 1254 }; 1255 1256 static mblk_t * 1257 ill_create_squery(ill_t *ill, ipaddr_t ipaddr, uint32_t addrlen, 1258 uint32_t addroff, mblk_t *mp_tail) 1259 { 1260 mblk_t *mp; 1261 area_t *area; 1262 1263 mp = ill_arp_alloc(ill, (uchar_t *)&ip_aresq_template, 1264 (caddr_t)&ipaddr); 1265 if (!mp) { 1266 freemsg(mp_tail); 1267 return (NULL); 1268 } 1269 area = (area_t *)mp->b_rptr; 1270 area->area_hw_addr_length = addrlen; 1271 area->area_hw_addr_offset = mp->b_wptr - mp->b_rptr + addroff; 1272 /* 1273 * NOTE! 1274 * 1275 * The area_hw_addr_offset, as can be seen, does not hold the 1276 * actual hardware address offset. Rather, it holds the offset 1277 * to the hw addr in the dl_xxx_req in mp_tail, modified by 1278 * adding (mp->b_wptr - mp->b_rptr). This allows the function 1279 * mi_offset_paramc() to find the hardware address in the 1280 * *second* mblk (dl_xxx_req), not this mblk. 1281 * 1282 * Using mi_offset_paramc() is thus the *only* way to access 1283 * the dl_xxx_hw address. 1284 * 1285 * The squery hw address should *not* be accessed. 1286 * 1287 * See ar_entry_squery() in arp.c for an example of how all this works. 1288 */ 1289 1290 mp->b_cont = mp_tail; 1291 return (mp); 1292 } 1293 1294 /* 1295 * Create a dlpi message with room for phys+sap. When we come back in 1296 * ip_wput_ctl() we will strip the sap for those primitives which 1297 * only need a physical address. 1298 */ 1299 static mblk_t * 1300 ill_create_dl(ill_t *ill, uint32_t dl_primitive, uint32_t length, 1301 uint32_t *addr_lenp, uint32_t *addr_offp) 1302 { 1303 mblk_t *mp; 1304 uint32_t hw_addr_length; 1305 char *cp; 1306 uint32_t offset; 1307 uint32_t size; 1308 1309 *addr_lenp = *addr_offp = 0; 1310 1311 hw_addr_length = ill->ill_phys_addr_length; 1312 if (!hw_addr_length) { 1313 ip0dbg(("ip_create_dl: hw addr length = 0\n")); 1314 return (NULL); 1315 } 1316 1317 size = length; 1318 switch (dl_primitive) { 1319 case DL_ENABMULTI_REQ: 1320 case DL_DISABMULTI_REQ: 1321 size += hw_addr_length; 1322 break; 1323 case DL_PROMISCON_REQ: 1324 case DL_PROMISCOFF_REQ: 1325 break; 1326 default: 1327 return (NULL); 1328 } 1329 mp = allocb(size, BPRI_HI); 1330 if (!mp) 1331 return (NULL); 1332 mp->b_wptr += size; 1333 mp->b_datap->db_type = M_PROTO; 1334 1335 cp = (char *)mp->b_rptr; 1336 offset = length; 1337 1338 switch (dl_primitive) { 1339 case DL_ENABMULTI_REQ: { 1340 dl_enabmulti_req_t *dl = (dl_enabmulti_req_t *)cp; 1341 1342 dl->dl_primitive = dl_primitive; 1343 dl->dl_addr_offset = offset; 1344 *addr_lenp = dl->dl_addr_length = hw_addr_length; 1345 *addr_offp = offset; 1346 break; 1347 } 1348 case DL_DISABMULTI_REQ: { 1349 dl_disabmulti_req_t *dl = (dl_disabmulti_req_t *)cp; 1350 1351 dl->dl_primitive = dl_primitive; 1352 dl->dl_addr_offset = offset; 1353 *addr_lenp = dl->dl_addr_length = hw_addr_length; 1354 *addr_offp = offset; 1355 break; 1356 } 1357 case DL_PROMISCON_REQ: 1358 case DL_PROMISCOFF_REQ: { 1359 dl_promiscon_req_t *dl = (dl_promiscon_req_t *)cp; 1360 1361 dl->dl_primitive = dl_primitive; 1362 dl->dl_level = DL_PROMISC_MULTI; 1363 break; 1364 } 1365 } 1366 ip1dbg(("ill_create_dl: addr_len %d, addr_off %d\n", 1367 *addr_lenp, *addr_offp)); 1368 return (mp); 1369 } 1370 1371 void 1372 ip_wput_ctl(queue_t *q, mblk_t *mp_orig) 1373 { 1374 ill_t *ill = (ill_t *)q->q_ptr; 1375 mblk_t *mp = mp_orig; 1376 area_t *area = (area_t *)mp->b_rptr; 1377 1378 /* Check that we have a AR_ENTRY_SQUERY with a tacked on mblk */ 1379 if (MBLKL(mp) < sizeof (area_t) || mp->b_cont == NULL || 1380 area->area_cmd != AR_ENTRY_SQUERY) { 1381 putnext(q, mp); 1382 return; 1383 } 1384 mp = mp->b_cont; 1385 1386 /* 1387 * Update dl_addr_length and dl_addr_offset for primitives that 1388 * have physical addresses as opposed to full saps 1389 */ 1390 switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) { 1391 case DL_ENABMULTI_REQ: 1392 /* Track the state if this is the first enabmulti */ 1393 if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN) 1394 ill->ill_dlpi_multicast_state = IDS_INPROGRESS; 1395 ip1dbg(("ip_wput_ctl: ENABMULTI\n")); 1396 break; 1397 case DL_DISABMULTI_REQ: 1398 ip1dbg(("ip_wput_ctl: DISABMULTI\n")); 1399 break; 1400 default: 1401 ip1dbg(("ip_wput_ctl: default\n")); 1402 break; 1403 } 1404 freeb(mp_orig); 1405 ill_dlpi_send(ill, mp); 1406 } 1407 1408 /* 1409 * Rejoin any groups which have been explicitly joined by the application (we 1410 * left all explicitly joined groups as part of ill_leave_multicast() prior to 1411 * bringing the interface down). Note that because groups can be joined and 1412 * left while an interface is down, this may not be the same set of groups 1413 * that we left in ill_leave_multicast(). 1414 */ 1415 void 1416 ill_recover_multicast(ill_t *ill) 1417 { 1418 ilm_t *ilm; 1419 char addrbuf[INET6_ADDRSTRLEN]; 1420 1421 ASSERT(IAM_WRITER_ILL(ill)); 1422 1423 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1424 /* 1425 * Check how many ipif's that have members in this group - 1426 * if more then one we make sure that this entry is first 1427 * in the list. 1428 */ 1429 if (ilm_numentries_v6(ill, &ilm->ilm_v6addr) > 1 && 1430 ilm_lookup_ill_v6(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm) 1431 continue; 1432 ip1dbg(("ill_recover_multicast: %s\n", 1433 inet_ntop(AF_INET6, &ilm->ilm_v6addr, addrbuf, 1434 sizeof (addrbuf)))); 1435 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) { 1436 if (ill->ill_group == NULL) { 1437 (void) ip_join_allmulti(ill->ill_ipif); 1438 } else { 1439 /* 1440 * We don't want to join on this ill, 1441 * if somebody else in the group has 1442 * already been nominated. 1443 */ 1444 (void) ill_nominate_mcast_rcv(ill->ill_group); 1445 } 1446 } else { 1447 (void) ip_ll_addmulti_v6(ill->ill_ipif, 1448 &ilm->ilm_v6addr); 1449 } 1450 } 1451 } 1452 1453 /* 1454 * The opposite of ill_recover_multicast() -- leaves all multicast groups 1455 * that were explicitly joined. Note that both these functions could be 1456 * disposed of if we enhanced ARP to allow us to handle DL_DISABMULTI_REQ 1457 * and DL_ENABMULTI_REQ messages when an interface is down. 1458 */ 1459 void 1460 ill_leave_multicast(ill_t *ill) 1461 { 1462 ilm_t *ilm; 1463 char addrbuf[INET6_ADDRSTRLEN]; 1464 1465 ASSERT(IAM_WRITER_ILL(ill)); 1466 1467 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1468 /* 1469 * Check how many ipif's that have members in this group - 1470 * if more then one we make sure that this entry is first 1471 * in the list. 1472 */ 1473 if (ilm_numentries_v6(ill, &ilm->ilm_v6addr) > 1 && 1474 ilm_lookup_ill_v6(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm) 1475 continue; 1476 ip1dbg(("ill_leave_multicast: %s\n", 1477 inet_ntop(AF_INET6, &ilm->ilm_v6addr, addrbuf, 1478 sizeof (addrbuf)))); 1479 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) { 1480 (void) ip_leave_allmulti(ill->ill_ipif); 1481 /* 1482 * If we were part of an IPMP group, then 1483 * ill_handoff_responsibility() has already 1484 * nominated a new member (so we don't). 1485 */ 1486 ASSERT(ill->ill_group == NULL); 1487 } else { 1488 (void) ip_ll_delmulti_v6(ill->ill_ipif, 1489 &ilm->ilm_v6addr); 1490 } 1491 } 1492 } 1493 1494 /* Find an ilm for matching the ill */ 1495 ilm_t * 1496 ilm_lookup_ill(ill_t *ill, ipaddr_t group, zoneid_t zoneid) 1497 { 1498 in6_addr_t v6group; 1499 1500 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) || 1501 IAM_WRITER_ILL(ill)); 1502 /* 1503 * INADDR_ANY is represented as the IPv6 unspecifed addr. 1504 */ 1505 if (group == INADDR_ANY) 1506 v6group = ipv6_all_zeros; 1507 else 1508 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 1509 1510 return (ilm_lookup_ill_v6(ill, &v6group, zoneid)); 1511 } 1512 1513 /* 1514 * Find an ilm for matching the ill. All the ilm lookup functions 1515 * ignore ILM_DELETED ilms. These have been logically deleted, and 1516 * igmp and linklayer disable multicast have been done. Only mi_free 1517 * yet to be done. Still there in the list due to ilm_walkers. The 1518 * last walker will release it. 1519 */ 1520 ilm_t * 1521 ilm_lookup_ill_v6(ill_t *ill, const in6_addr_t *v6group, zoneid_t zoneid) 1522 { 1523 ilm_t *ilm; 1524 1525 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) || 1526 IAM_WRITER_ILL(ill)); 1527 1528 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1529 if (ilm->ilm_flags & ILM_DELETED) 1530 continue; 1531 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) && 1532 (zoneid == ALL_ZONES || zoneid == ilm->ilm_zoneid)) 1533 return (ilm); 1534 } 1535 return (NULL); 1536 } 1537 1538 ilm_t * 1539 ilm_lookup_ill_index_v6(ill_t *ill, const in6_addr_t *v6group, int index, 1540 zoneid_t zoneid) 1541 { 1542 ilm_t *ilm; 1543 1544 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) || 1545 IAM_WRITER_ILL(ill)); 1546 1547 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1548 if (ilm->ilm_flags & ILM_DELETED) 1549 continue; 1550 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) && 1551 (zoneid == ALL_ZONES || zoneid == ilm->ilm_zoneid) && 1552 ilm->ilm_orig_ifindex == index) { 1553 return (ilm); 1554 } 1555 } 1556 return (NULL); 1557 } 1558 1559 ilm_t * 1560 ilm_lookup_ill_index_v4(ill_t *ill, ipaddr_t group, int index, zoneid_t zoneid) 1561 { 1562 in6_addr_t v6group; 1563 1564 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) || 1565 IAM_WRITER_ILL(ill)); 1566 /* 1567 * INADDR_ANY is represented as the IPv6 unspecifed addr. 1568 */ 1569 if (group == INADDR_ANY) 1570 v6group = ipv6_all_zeros; 1571 else 1572 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 1573 1574 return (ilm_lookup_ill_index_v6(ill, &v6group, index, zoneid)); 1575 } 1576 1577 /* 1578 * Found an ilm for the ipif. Only needed for IPv4 which does 1579 * ipif specific socket options. 1580 */ 1581 ilm_t * 1582 ilm_lookup_ipif(ipif_t *ipif, ipaddr_t group) 1583 { 1584 ill_t *ill = ipif->ipif_ill; 1585 ilm_t *ilm; 1586 in6_addr_t v6group; 1587 1588 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) || 1589 IAM_WRITER_ILL(ill)); 1590 1591 /* 1592 * INADDR_ANY is represented as the IPv6 unspecifed addr. 1593 */ 1594 if (group == INADDR_ANY) 1595 v6group = ipv6_all_zeros; 1596 else 1597 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 1598 1599 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1600 if (ilm->ilm_flags & ILM_DELETED) 1601 continue; 1602 if (ilm->ilm_ipif == ipif && 1603 IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, &v6group)) 1604 return (ilm); 1605 } 1606 return (NULL); 1607 } 1608 1609 /* 1610 * How many members on this ill? 1611 */ 1612 int 1613 ilm_numentries_v6(ill_t *ill, const in6_addr_t *v6group) 1614 { 1615 ilm_t *ilm; 1616 int i = 0; 1617 1618 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) || 1619 IAM_WRITER_ILL(ill)); 1620 1621 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1622 if (ilm->ilm_flags & ILM_DELETED) 1623 continue; 1624 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group)) { 1625 i++; 1626 } 1627 } 1628 return (i); 1629 } 1630 1631 /* Caller guarantees that the group is not already on the list */ 1632 static ilm_t * 1633 ilm_add_v6(ipif_t *ipif, const in6_addr_t *v6group, ilg_stat_t ilgstat, 1634 mcast_record_t ilg_fmode, slist_t *ilg_flist, int orig_ifindex, 1635 zoneid_t zoneid) 1636 { 1637 ill_t *ill = ipif->ipif_ill; 1638 ilm_t *ilm; 1639 ilm_t *ilm_cur; 1640 ilm_t **ilm_ptpn; 1641 1642 ASSERT(IAM_WRITER_IPIF(ipif)); 1643 1644 ilm = GETSTRUCT(ilm_t, 1); 1645 if (ilm == NULL) 1646 return (NULL); 1647 if (ilgstat != ILGSTAT_NONE && !SLIST_IS_EMPTY(ilg_flist)) { 1648 ilm->ilm_filter = l_alloc(); 1649 if (ilm->ilm_filter == NULL) { 1650 mi_free(ilm); 1651 return (NULL); 1652 } 1653 } 1654 ilm->ilm_v6addr = *v6group; 1655 ilm->ilm_refcnt = 1; 1656 ilm->ilm_zoneid = zoneid; 1657 ilm->ilm_timer = INFINITY; 1658 ilm->ilm_rtx.rtx_timer = INFINITY; 1659 1660 /* 1661 * IPv4 Multicast groups are joined using ipif. 1662 * IPv6 Multicast groups are joined using ill. 1663 */ 1664 if (ill->ill_isv6) { 1665 ilm->ilm_ill = ill; 1666 ilm->ilm_ipif = NULL; 1667 } else { 1668 ASSERT(ilm->ilm_zoneid == ipif->ipif_zoneid); 1669 ilm->ilm_ipif = ipif; 1670 ilm->ilm_ill = NULL; 1671 } 1672 ASSERT(ill->ill_ipst); 1673 ilm->ilm_ipst = ill->ill_ipst; /* No netstack_hold */ 1674 1675 /* 1676 * After this if ilm moves to a new ill, we don't change 1677 * the ilm_orig_ifindex. Thus, if ill_index != ilm_orig_ifindex, 1678 * it has been moved. Indexes don't match even when the application 1679 * wants to join on a FAILED/INACTIVE interface because we choose 1680 * a new interface to join in. This is considered as an implicit 1681 * move. 1682 */ 1683 ilm->ilm_orig_ifindex = orig_ifindex; 1684 1685 ASSERT(!(ipif->ipif_state_flags & IPIF_CONDEMNED)); 1686 ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED)); 1687 1688 /* 1689 * Grab lock to give consistent view to readers 1690 */ 1691 mutex_enter(&ill->ill_lock); 1692 /* 1693 * All ilms in the same zone are contiguous in the ill_ilm list. 1694 * The loops in ip_proto_input() and ip_wput_local() use this to avoid 1695 * sending duplicates up when two applications in the same zone join the 1696 * same group on different logical interfaces. 1697 */ 1698 ilm_cur = ill->ill_ilm; 1699 ilm_ptpn = &ill->ill_ilm; 1700 while (ilm_cur != NULL && ilm_cur->ilm_zoneid != ilm->ilm_zoneid) { 1701 ilm_ptpn = &ilm_cur->ilm_next; 1702 ilm_cur = ilm_cur->ilm_next; 1703 } 1704 ilm->ilm_next = ilm_cur; 1705 *ilm_ptpn = ilm; 1706 1707 /* 1708 * If we have an associated ilg, use its filter state; if not, 1709 * default to (EXCLUDE, NULL) and set no_ilg_cnt to track this. 1710 */ 1711 if (ilgstat != ILGSTAT_NONE) { 1712 if (!SLIST_IS_EMPTY(ilg_flist)) 1713 l_copy(ilg_flist, ilm->ilm_filter); 1714 ilm->ilm_fmode = ilg_fmode; 1715 } else { 1716 ilm->ilm_no_ilg_cnt = 1; 1717 ilm->ilm_fmode = MODE_IS_EXCLUDE; 1718 } 1719 1720 mutex_exit(&ill->ill_lock); 1721 return (ilm); 1722 } 1723 1724 void 1725 ilm_walker_cleanup(ill_t *ill) 1726 { 1727 ilm_t **ilmp; 1728 ilm_t *ilm; 1729 1730 ASSERT(MUTEX_HELD(&ill->ill_lock)); 1731 ASSERT(ill->ill_ilm_walker_cnt == 0); 1732 1733 ilmp = &ill->ill_ilm; 1734 while (*ilmp != NULL) { 1735 if ((*ilmp)->ilm_flags & ILM_DELETED) { 1736 ilm = *ilmp; 1737 *ilmp = ilm->ilm_next; 1738 FREE_SLIST(ilm->ilm_filter); 1739 FREE_SLIST(ilm->ilm_pendsrcs); 1740 FREE_SLIST(ilm->ilm_rtx.rtx_allow); 1741 FREE_SLIST(ilm->ilm_rtx.rtx_block); 1742 ilm->ilm_ipst = NULL; 1743 mi_free((char *)ilm); 1744 } else { 1745 ilmp = &(*ilmp)->ilm_next; 1746 } 1747 } 1748 ill->ill_ilm_cleanup_reqd = 0; 1749 } 1750 1751 /* 1752 * Unlink ilm and free it. 1753 */ 1754 static void 1755 ilm_delete(ilm_t *ilm) 1756 { 1757 ill_t *ill; 1758 ilm_t **ilmp; 1759 1760 if (ilm->ilm_ipif != NULL) { 1761 ASSERT(IAM_WRITER_IPIF(ilm->ilm_ipif)); 1762 ASSERT(ilm->ilm_ill == NULL); 1763 ill = ilm->ilm_ipif->ipif_ill; 1764 ASSERT(!ill->ill_isv6); 1765 } else { 1766 ASSERT(IAM_WRITER_ILL(ilm->ilm_ill)); 1767 ASSERT(ilm->ilm_ipif == NULL); 1768 ill = ilm->ilm_ill; 1769 ASSERT(ill->ill_isv6); 1770 } 1771 /* 1772 * Delete under lock protection so that readers don't stumble 1773 * on bad ilm_next 1774 */ 1775 mutex_enter(&ill->ill_lock); 1776 if (ill->ill_ilm_walker_cnt != 0) { 1777 ilm->ilm_flags |= ILM_DELETED; 1778 ill->ill_ilm_cleanup_reqd = 1; 1779 mutex_exit(&ill->ill_lock); 1780 return; 1781 } 1782 1783 for (ilmp = &ill->ill_ilm; *ilmp != ilm; ilmp = &(*ilmp)->ilm_next) 1784 ; 1785 *ilmp = ilm->ilm_next; 1786 mutex_exit(&ill->ill_lock); 1787 1788 FREE_SLIST(ilm->ilm_filter); 1789 FREE_SLIST(ilm->ilm_pendsrcs); 1790 FREE_SLIST(ilm->ilm_rtx.rtx_allow); 1791 FREE_SLIST(ilm->ilm_rtx.rtx_block); 1792 ilm->ilm_ipst = NULL; 1793 mi_free((char *)ilm); 1794 } 1795 1796 /* Free all ilms for this ipif */ 1797 void 1798 ilm_free(ipif_t *ipif) 1799 { 1800 ill_t *ill = ipif->ipif_ill; 1801 ilm_t *ilm; 1802 ilm_t *next_ilm; 1803 1804 ASSERT(IAM_WRITER_IPIF(ipif)); 1805 1806 for (ilm = ill->ill_ilm; ilm; ilm = next_ilm) { 1807 next_ilm = ilm->ilm_next; 1808 if (ilm->ilm_ipif == ipif) 1809 ilm_delete(ilm); 1810 } 1811 } 1812 1813 /* 1814 * Looks up the appropriate ipif given a v4 multicast group and interface 1815 * address. On success, returns 0, with *ipifpp pointing to the found 1816 * struct. On failure, returns an errno and *ipifpp is NULL. 1817 */ 1818 int 1819 ip_opt_check(conn_t *connp, ipaddr_t group, ipaddr_t src, ipaddr_t ifaddr, 1820 uint_t *ifindexp, mblk_t *first_mp, ipsq_func_t func, ipif_t **ipifpp) 1821 { 1822 ipif_t *ipif; 1823 int err = 0; 1824 zoneid_t zoneid; 1825 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1826 1827 if (!CLASSD(group) || CLASSD(src)) { 1828 return (EINVAL); 1829 } 1830 *ipifpp = NULL; 1831 1832 zoneid = IPCL_ZONEID(connp); 1833 1834 ASSERT(!(ifaddr != INADDR_ANY && ifindexp != NULL && *ifindexp != 0)); 1835 if (ifaddr != INADDR_ANY) { 1836 ipif = ipif_lookup_addr(ifaddr, NULL, zoneid, 1837 CONNP_TO_WQ(connp), first_mp, func, &err, ipst); 1838 if (err != 0 && err != EINPROGRESS) 1839 err = EADDRNOTAVAIL; 1840 } else if (ifindexp != NULL && *ifindexp != 0) { 1841 ipif = ipif_lookup_on_ifindex(*ifindexp, B_FALSE, zoneid, 1842 CONNP_TO_WQ(connp), first_mp, func, &err, ipst); 1843 } else { 1844 ipif = ipif_lookup_group(group, zoneid, ipst); 1845 if (ipif == NULL) 1846 return (EADDRNOTAVAIL); 1847 } 1848 if (ipif == NULL) 1849 return (err); 1850 1851 *ipifpp = ipif; 1852 return (0); 1853 } 1854 1855 /* 1856 * Looks up the appropriate ill (or ipif if v4mapped) given an interface 1857 * index and IPv6 multicast group. On success, returns 0, with *illpp (or 1858 * *ipifpp if v4mapped) pointing to the found struct. On failure, returns 1859 * an errno and *illpp and *ipifpp are undefined. 1860 */ 1861 int 1862 ip_opt_check_v6(conn_t *connp, const in6_addr_t *v6group, ipaddr_t *v4group, 1863 const in6_addr_t *v6src, ipaddr_t *v4src, boolean_t *isv6, int ifindex, 1864 mblk_t *first_mp, ipsq_func_t func, ill_t **illpp, ipif_t **ipifpp) 1865 { 1866 boolean_t src_unspec; 1867 ill_t *ill = NULL; 1868 ipif_t *ipif = NULL; 1869 int err; 1870 zoneid_t zoneid = connp->conn_zoneid; 1871 queue_t *wq = CONNP_TO_WQ(connp); 1872 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1873 1874 src_unspec = IN6_IS_ADDR_UNSPECIFIED(v6src); 1875 1876 if (IN6_IS_ADDR_V4MAPPED(v6group)) { 1877 if (!IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec) 1878 return (EINVAL); 1879 IN6_V4MAPPED_TO_IPADDR(v6group, *v4group); 1880 if (src_unspec) { 1881 *v4src = INADDR_ANY; 1882 } else { 1883 IN6_V4MAPPED_TO_IPADDR(v6src, *v4src); 1884 } 1885 if (!CLASSD(*v4group) || CLASSD(*v4src)) 1886 return (EINVAL); 1887 *ipifpp = NULL; 1888 *isv6 = B_FALSE; 1889 } else { 1890 if (IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec) 1891 return (EINVAL); 1892 if (!IN6_IS_ADDR_MULTICAST(v6group) || 1893 IN6_IS_ADDR_MULTICAST(v6src)) { 1894 return (EINVAL); 1895 } 1896 *illpp = NULL; 1897 *isv6 = B_TRUE; 1898 } 1899 1900 if (ifindex == 0) { 1901 if (*isv6) 1902 ill = ill_lookup_group_v6(v6group, zoneid, ipst); 1903 else 1904 ipif = ipif_lookup_group(*v4group, zoneid, ipst); 1905 if (ill == NULL && ipif == NULL) 1906 return (EADDRNOTAVAIL); 1907 } else { 1908 if (*isv6) { 1909 ill = ill_lookup_on_ifindex(ifindex, B_TRUE, 1910 wq, first_mp, func, &err, ipst); 1911 if (ill != NULL && 1912 !ipif_lookup_zoneid(ill, zoneid, 0, NULL)) { 1913 ill_refrele(ill); 1914 ill = NULL; 1915 err = EADDRNOTAVAIL; 1916 } 1917 } else { 1918 ipif = ipif_lookup_on_ifindex(ifindex, B_FALSE, 1919 zoneid, wq, first_mp, func, &err, ipst); 1920 } 1921 if (ill == NULL && ipif == NULL) 1922 return (err); 1923 } 1924 1925 *ipifpp = ipif; 1926 *illpp = ill; 1927 return (0); 1928 } 1929 1930 static int 1931 ip_get_srcfilter(conn_t *connp, struct group_filter *gf, 1932 struct ip_msfilter *imsf, ipaddr_t grp, ipif_t *ipif, boolean_t isv4mapped) 1933 { 1934 ilg_t *ilg; 1935 int i, numsrc, fmode, outsrcs; 1936 struct sockaddr_in *sin; 1937 struct sockaddr_in6 *sin6; 1938 struct in_addr *addrp; 1939 slist_t *fp; 1940 boolean_t is_v4only_api; 1941 1942 mutex_enter(&connp->conn_lock); 1943 1944 ilg = ilg_lookup_ipif(connp, grp, ipif); 1945 if (ilg == NULL) { 1946 mutex_exit(&connp->conn_lock); 1947 return (EADDRNOTAVAIL); 1948 } 1949 1950 if (gf == NULL) { 1951 ASSERT(imsf != NULL); 1952 ASSERT(!isv4mapped); 1953 is_v4only_api = B_TRUE; 1954 outsrcs = imsf->imsf_numsrc; 1955 } else { 1956 ASSERT(imsf == NULL); 1957 is_v4only_api = B_FALSE; 1958 outsrcs = gf->gf_numsrc; 1959 } 1960 1961 /* 1962 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 1963 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 1964 * So we need to translate here. 1965 */ 1966 fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ? 1967 MCAST_INCLUDE : MCAST_EXCLUDE; 1968 if ((fp = ilg->ilg_filter) == NULL) { 1969 numsrc = 0; 1970 } else { 1971 for (i = 0; i < outsrcs; i++) { 1972 if (i == fp->sl_numsrc) 1973 break; 1974 if (isv4mapped) { 1975 sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i]; 1976 sin6->sin6_family = AF_INET6; 1977 sin6->sin6_addr = fp->sl_addr[i]; 1978 } else { 1979 if (is_v4only_api) { 1980 addrp = &imsf->imsf_slist[i]; 1981 } else { 1982 sin = (struct sockaddr_in *) 1983 &gf->gf_slist[i]; 1984 sin->sin_family = AF_INET; 1985 addrp = &sin->sin_addr; 1986 } 1987 IN6_V4MAPPED_TO_INADDR(&fp->sl_addr[i], addrp); 1988 } 1989 } 1990 numsrc = fp->sl_numsrc; 1991 } 1992 1993 if (is_v4only_api) { 1994 imsf->imsf_numsrc = numsrc; 1995 imsf->imsf_fmode = fmode; 1996 } else { 1997 gf->gf_numsrc = numsrc; 1998 gf->gf_fmode = fmode; 1999 } 2000 2001 mutex_exit(&connp->conn_lock); 2002 2003 return (0); 2004 } 2005 2006 static int 2007 ip_get_srcfilter_v6(conn_t *connp, struct group_filter *gf, 2008 const struct in6_addr *grp, ill_t *ill) 2009 { 2010 ilg_t *ilg; 2011 int i; 2012 struct sockaddr_storage *sl; 2013 struct sockaddr_in6 *sin6; 2014 slist_t *fp; 2015 2016 mutex_enter(&connp->conn_lock); 2017 2018 ilg = ilg_lookup_ill_v6(connp, grp, ill); 2019 if (ilg == NULL) { 2020 mutex_exit(&connp->conn_lock); 2021 return (EADDRNOTAVAIL); 2022 } 2023 2024 /* 2025 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 2026 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 2027 * So we need to translate here. 2028 */ 2029 gf->gf_fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ? 2030 MCAST_INCLUDE : MCAST_EXCLUDE; 2031 if ((fp = ilg->ilg_filter) == NULL) { 2032 gf->gf_numsrc = 0; 2033 } else { 2034 for (i = 0, sl = gf->gf_slist; i < gf->gf_numsrc; i++, sl++) { 2035 if (i == fp->sl_numsrc) 2036 break; 2037 sin6 = (struct sockaddr_in6 *)sl; 2038 sin6->sin6_family = AF_INET6; 2039 sin6->sin6_addr = fp->sl_addr[i]; 2040 } 2041 gf->gf_numsrc = fp->sl_numsrc; 2042 } 2043 2044 mutex_exit(&connp->conn_lock); 2045 2046 return (0); 2047 } 2048 2049 static int 2050 ip_set_srcfilter(conn_t *connp, struct group_filter *gf, 2051 struct ip_msfilter *imsf, ipaddr_t grp, ipif_t *ipif, boolean_t isv4mapped) 2052 { 2053 ilg_t *ilg; 2054 int i, err, insrcs, infmode, new_fmode; 2055 struct sockaddr_in *sin; 2056 struct sockaddr_in6 *sin6; 2057 struct in_addr *addrp; 2058 slist_t *orig_filter = NULL; 2059 slist_t *new_filter = NULL; 2060 mcast_record_t orig_fmode; 2061 boolean_t leave_grp, is_v4only_api; 2062 ilg_stat_t ilgstat; 2063 2064 if (gf == NULL) { 2065 ASSERT(imsf != NULL); 2066 ASSERT(!isv4mapped); 2067 is_v4only_api = B_TRUE; 2068 insrcs = imsf->imsf_numsrc; 2069 infmode = imsf->imsf_fmode; 2070 } else { 2071 ASSERT(imsf == NULL); 2072 is_v4only_api = B_FALSE; 2073 insrcs = gf->gf_numsrc; 2074 infmode = gf->gf_fmode; 2075 } 2076 2077 /* Make sure we can handle the source list */ 2078 if (insrcs > MAX_FILTER_SIZE) 2079 return (ENOBUFS); 2080 2081 /* 2082 * setting the filter to (INCLUDE, NULL) is treated 2083 * as a request to leave the group. 2084 */ 2085 leave_grp = (infmode == MCAST_INCLUDE && insrcs == 0); 2086 2087 ASSERT(IAM_WRITER_IPIF(ipif)); 2088 2089 mutex_enter(&connp->conn_lock); 2090 2091 ilg = ilg_lookup_ipif(connp, grp, ipif); 2092 if (ilg == NULL) { 2093 /* 2094 * if the request was actually to leave, and we 2095 * didn't find an ilg, there's nothing to do. 2096 */ 2097 if (!leave_grp) 2098 ilg = conn_ilg_alloc(connp); 2099 if (leave_grp || ilg == NULL) { 2100 mutex_exit(&connp->conn_lock); 2101 return (leave_grp ? 0 : ENOMEM); 2102 } 2103 ilgstat = ILGSTAT_NEW; 2104 IN6_IPADDR_TO_V4MAPPED(grp, &ilg->ilg_v6group); 2105 ilg->ilg_ipif = ipif; 2106 ilg->ilg_ill = NULL; 2107 ilg->ilg_orig_ifindex = 0; 2108 } else if (leave_grp) { 2109 ilg_delete(connp, ilg, NULL); 2110 mutex_exit(&connp->conn_lock); 2111 (void) ip_delmulti(grp, ipif, B_FALSE, B_TRUE); 2112 return (0); 2113 } else { 2114 ilgstat = ILGSTAT_CHANGE; 2115 /* Preserve existing state in case ip_addmulti() fails */ 2116 orig_fmode = ilg->ilg_fmode; 2117 if (ilg->ilg_filter == NULL) { 2118 orig_filter = NULL; 2119 } else { 2120 orig_filter = l_alloc_copy(ilg->ilg_filter); 2121 if (orig_filter == NULL) { 2122 mutex_exit(&connp->conn_lock); 2123 return (ENOMEM); 2124 } 2125 } 2126 } 2127 2128 /* 2129 * Alloc buffer to copy new state into (see below) before 2130 * we make any changes, so we can bail if it fails. 2131 */ 2132 if ((new_filter = l_alloc()) == NULL) { 2133 mutex_exit(&connp->conn_lock); 2134 err = ENOMEM; 2135 goto free_and_exit; 2136 } 2137 2138 if (insrcs == 0) { 2139 CLEAR_SLIST(ilg->ilg_filter); 2140 } else { 2141 slist_t *fp; 2142 if (ilg->ilg_filter == NULL) { 2143 fp = l_alloc(); 2144 if (fp == NULL) { 2145 if (ilgstat == ILGSTAT_NEW) 2146 ilg_delete(connp, ilg, NULL); 2147 mutex_exit(&connp->conn_lock); 2148 err = ENOMEM; 2149 goto free_and_exit; 2150 } 2151 } else { 2152 fp = ilg->ilg_filter; 2153 } 2154 for (i = 0; i < insrcs; i++) { 2155 if (isv4mapped) { 2156 sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i]; 2157 fp->sl_addr[i] = sin6->sin6_addr; 2158 } else { 2159 if (is_v4only_api) { 2160 addrp = &imsf->imsf_slist[i]; 2161 } else { 2162 sin = (struct sockaddr_in *) 2163 &gf->gf_slist[i]; 2164 addrp = &sin->sin_addr; 2165 } 2166 IN6_INADDR_TO_V4MAPPED(addrp, &fp->sl_addr[i]); 2167 } 2168 } 2169 fp->sl_numsrc = insrcs; 2170 ilg->ilg_filter = fp; 2171 } 2172 /* 2173 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 2174 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 2175 * So we need to translate here. 2176 */ 2177 ilg->ilg_fmode = (infmode == MCAST_INCLUDE) ? 2178 MODE_IS_INCLUDE : MODE_IS_EXCLUDE; 2179 2180 /* 2181 * Save copy of ilg's filter state to pass to other functions, 2182 * so we can release conn_lock now. 2183 */ 2184 new_fmode = ilg->ilg_fmode; 2185 l_copy(ilg->ilg_filter, new_filter); 2186 2187 mutex_exit(&connp->conn_lock); 2188 2189 err = ip_addmulti(grp, ipif, ilgstat, new_fmode, new_filter); 2190 if (err != 0) { 2191 /* 2192 * Restore the original filter state, or delete the 2193 * newly-created ilg. We need to look up the ilg 2194 * again, though, since we've not been holding the 2195 * conn_lock. 2196 */ 2197 mutex_enter(&connp->conn_lock); 2198 ilg = ilg_lookup_ipif(connp, grp, ipif); 2199 ASSERT(ilg != NULL); 2200 if (ilgstat == ILGSTAT_NEW) { 2201 ilg_delete(connp, ilg, NULL); 2202 } else { 2203 ilg->ilg_fmode = orig_fmode; 2204 if (SLIST_IS_EMPTY(orig_filter)) { 2205 CLEAR_SLIST(ilg->ilg_filter); 2206 } else { 2207 /* 2208 * We didn't free the filter, even if we 2209 * were trying to make the source list empty; 2210 * so if orig_filter isn't empty, the ilg 2211 * must still have a filter alloc'd. 2212 */ 2213 l_copy(orig_filter, ilg->ilg_filter); 2214 } 2215 } 2216 mutex_exit(&connp->conn_lock); 2217 } 2218 2219 free_and_exit: 2220 l_free(orig_filter); 2221 l_free(new_filter); 2222 2223 return (err); 2224 } 2225 2226 static int 2227 ip_set_srcfilter_v6(conn_t *connp, struct group_filter *gf, 2228 const struct in6_addr *grp, ill_t *ill) 2229 { 2230 ilg_t *ilg; 2231 int i, orig_ifindex, orig_fmode, new_fmode, err; 2232 slist_t *orig_filter = NULL; 2233 slist_t *new_filter = NULL; 2234 struct sockaddr_storage *sl; 2235 struct sockaddr_in6 *sin6; 2236 boolean_t leave_grp; 2237 ilg_stat_t ilgstat; 2238 2239 /* Make sure we can handle the source list */ 2240 if (gf->gf_numsrc > MAX_FILTER_SIZE) 2241 return (ENOBUFS); 2242 2243 /* 2244 * setting the filter to (INCLUDE, NULL) is treated 2245 * as a request to leave the group. 2246 */ 2247 leave_grp = (gf->gf_fmode == MCAST_INCLUDE && gf->gf_numsrc == 0); 2248 2249 ASSERT(IAM_WRITER_ILL(ill)); 2250 2251 /* 2252 * Use the ifindex to do the lookup. We can't use the ill 2253 * directly because ilg_ill could point to a different ill 2254 * if things have moved. 2255 */ 2256 orig_ifindex = ill->ill_phyint->phyint_ifindex; 2257 2258 mutex_enter(&connp->conn_lock); 2259 ilg = ilg_lookup_ill_index_v6(connp, grp, orig_ifindex); 2260 if (ilg == NULL) { 2261 /* 2262 * if the request was actually to leave, and we 2263 * didn't find an ilg, there's nothing to do. 2264 */ 2265 if (!leave_grp) 2266 ilg = conn_ilg_alloc(connp); 2267 if (leave_grp || ilg == NULL) { 2268 mutex_exit(&connp->conn_lock); 2269 return (leave_grp ? 0 : ENOMEM); 2270 } 2271 ilgstat = ILGSTAT_NEW; 2272 ilg->ilg_v6group = *grp; 2273 ilg->ilg_ipif = NULL; 2274 /* 2275 * Choose our target ill to join on. This might be 2276 * different from the ill we've been given if it's 2277 * currently down and part of a group. 2278 * 2279 * new ill is not refheld; we are writer. 2280 */ 2281 ill = ip_choose_multi_ill(ill, grp); 2282 ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED)); 2283 ilg->ilg_ill = ill; 2284 /* 2285 * Remember the index that we joined on, so that we can 2286 * successfully delete them later on and also search for 2287 * duplicates if the application wants to join again. 2288 */ 2289 ilg->ilg_orig_ifindex = orig_ifindex; 2290 } else if (leave_grp) { 2291 /* 2292 * Use the ilg's current ill for the deletion, 2293 * we might have failed over. 2294 */ 2295 ill = ilg->ilg_ill; 2296 ilg_delete(connp, ilg, NULL); 2297 mutex_exit(&connp->conn_lock); 2298 (void) ip_delmulti_v6(grp, ill, orig_ifindex, 2299 connp->conn_zoneid, B_FALSE, B_TRUE); 2300 return (0); 2301 } else { 2302 ilgstat = ILGSTAT_CHANGE; 2303 /* 2304 * The current ill might be different from the one we were 2305 * asked to join on (if failover has occurred); we should 2306 * join on the ill stored in the ilg. The original ill 2307 * is noted in ilg_orig_ifindex, which matched our request. 2308 */ 2309 ill = ilg->ilg_ill; 2310 /* preserve existing state in case ip_addmulti() fails */ 2311 orig_fmode = ilg->ilg_fmode; 2312 if (ilg->ilg_filter == NULL) { 2313 orig_filter = NULL; 2314 } else { 2315 orig_filter = l_alloc_copy(ilg->ilg_filter); 2316 if (orig_filter == NULL) { 2317 mutex_exit(&connp->conn_lock); 2318 return (ENOMEM); 2319 } 2320 } 2321 } 2322 2323 /* 2324 * Alloc buffer to copy new state into (see below) before 2325 * we make any changes, so we can bail if it fails. 2326 */ 2327 if ((new_filter = l_alloc()) == NULL) { 2328 mutex_exit(&connp->conn_lock); 2329 err = ENOMEM; 2330 goto free_and_exit; 2331 } 2332 2333 if (gf->gf_numsrc == 0) { 2334 CLEAR_SLIST(ilg->ilg_filter); 2335 } else { 2336 slist_t *fp; 2337 if (ilg->ilg_filter == NULL) { 2338 fp = l_alloc(); 2339 if (fp == NULL) { 2340 if (ilgstat == ILGSTAT_NEW) 2341 ilg_delete(connp, ilg, NULL); 2342 mutex_exit(&connp->conn_lock); 2343 err = ENOMEM; 2344 goto free_and_exit; 2345 } 2346 } else { 2347 fp = ilg->ilg_filter; 2348 } 2349 for (i = 0, sl = gf->gf_slist; i < gf->gf_numsrc; i++, sl++) { 2350 sin6 = (struct sockaddr_in6 *)sl; 2351 fp->sl_addr[i] = sin6->sin6_addr; 2352 } 2353 fp->sl_numsrc = gf->gf_numsrc; 2354 ilg->ilg_filter = fp; 2355 } 2356 /* 2357 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 2358 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 2359 * So we need to translate here. 2360 */ 2361 ilg->ilg_fmode = (gf->gf_fmode == MCAST_INCLUDE) ? 2362 MODE_IS_INCLUDE : MODE_IS_EXCLUDE; 2363 2364 /* 2365 * Save copy of ilg's filter state to pass to other functions, 2366 * so we can release conn_lock now. 2367 */ 2368 new_fmode = ilg->ilg_fmode; 2369 l_copy(ilg->ilg_filter, new_filter); 2370 2371 mutex_exit(&connp->conn_lock); 2372 2373 err = ip_addmulti_v6(grp, ill, orig_ifindex, connp->conn_zoneid, 2374 ilgstat, new_fmode, new_filter); 2375 if (err != 0) { 2376 /* 2377 * Restore the original filter state, or delete the 2378 * newly-created ilg. We need to look up the ilg 2379 * again, though, since we've not been holding the 2380 * conn_lock. 2381 */ 2382 mutex_enter(&connp->conn_lock); 2383 ilg = ilg_lookup_ill_index_v6(connp, grp, orig_ifindex); 2384 ASSERT(ilg != NULL); 2385 if (ilgstat == ILGSTAT_NEW) { 2386 ilg_delete(connp, ilg, NULL); 2387 } else { 2388 ilg->ilg_fmode = orig_fmode; 2389 if (SLIST_IS_EMPTY(orig_filter)) { 2390 CLEAR_SLIST(ilg->ilg_filter); 2391 } else { 2392 /* 2393 * We didn't free the filter, even if we 2394 * were trying to make the source list empty; 2395 * so if orig_filter isn't empty, the ilg 2396 * must still have a filter alloc'd. 2397 */ 2398 l_copy(orig_filter, ilg->ilg_filter); 2399 } 2400 } 2401 mutex_exit(&connp->conn_lock); 2402 } 2403 2404 free_and_exit: 2405 l_free(orig_filter); 2406 l_free(new_filter); 2407 2408 return (err); 2409 } 2410 2411 /* 2412 * Process the SIOC[GS]MSFILTER and SIOC[GS]IPMSFILTER ioctls. 2413 */ 2414 /* ARGSUSED */ 2415 int 2416 ip_sioctl_msfilter(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, 2417 ip_ioctl_cmd_t *ipip, void *ifreq) 2418 { 2419 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 2420 /* existence verified in ip_wput_nondata() */ 2421 mblk_t *data_mp = mp->b_cont->b_cont; 2422 int datalen, err, cmd, minsize; 2423 int expsize = 0; 2424 conn_t *connp; 2425 boolean_t isv6, is_v4only_api, getcmd; 2426 struct sockaddr_in *gsin; 2427 struct sockaddr_in6 *gsin6; 2428 ipaddr_t v4grp; 2429 in6_addr_t v6grp; 2430 struct group_filter *gf = NULL; 2431 struct ip_msfilter *imsf = NULL; 2432 mblk_t *ndp; 2433 2434 if (data_mp->b_cont != NULL) { 2435 if ((ndp = msgpullup(data_mp, -1)) == NULL) 2436 return (ENOMEM); 2437 freemsg(data_mp); 2438 data_mp = ndp; 2439 mp->b_cont->b_cont = data_mp; 2440 } 2441 2442 cmd = iocp->ioc_cmd; 2443 getcmd = (cmd == SIOCGIPMSFILTER || cmd == SIOCGMSFILTER); 2444 is_v4only_api = (cmd == SIOCGIPMSFILTER || cmd == SIOCSIPMSFILTER); 2445 minsize = (is_v4only_api) ? IP_MSFILTER_SIZE(0) : GROUP_FILTER_SIZE(0); 2446 datalen = MBLKL(data_mp); 2447 2448 if (datalen < minsize) 2449 return (EINVAL); 2450 2451 /* 2452 * now we know we have at least have the initial structure, 2453 * but need to check for the source list array. 2454 */ 2455 if (is_v4only_api) { 2456 imsf = (struct ip_msfilter *)data_mp->b_rptr; 2457 isv6 = B_FALSE; 2458 expsize = IP_MSFILTER_SIZE(imsf->imsf_numsrc); 2459 } else { 2460 gf = (struct group_filter *)data_mp->b_rptr; 2461 if (gf->gf_group.ss_family == AF_INET6) { 2462 gsin6 = (struct sockaddr_in6 *)&gf->gf_group; 2463 isv6 = !(IN6_IS_ADDR_V4MAPPED(&gsin6->sin6_addr)); 2464 } else { 2465 isv6 = B_FALSE; 2466 } 2467 expsize = GROUP_FILTER_SIZE(gf->gf_numsrc); 2468 } 2469 if (datalen < expsize) 2470 return (EINVAL); 2471 2472 connp = Q_TO_CONN(q); 2473 2474 /* operation not supported on the virtual network interface */ 2475 if (IS_VNI(ipif->ipif_ill)) 2476 return (EINVAL); 2477 2478 if (isv6) { 2479 ill_t *ill = ipif->ipif_ill; 2480 ill_refhold(ill); 2481 2482 gsin6 = (struct sockaddr_in6 *)&gf->gf_group; 2483 v6grp = gsin6->sin6_addr; 2484 if (getcmd) 2485 err = ip_get_srcfilter_v6(connp, gf, &v6grp, ill); 2486 else 2487 err = ip_set_srcfilter_v6(connp, gf, &v6grp, ill); 2488 2489 ill_refrele(ill); 2490 } else { 2491 boolean_t isv4mapped = B_FALSE; 2492 if (is_v4only_api) { 2493 v4grp = (ipaddr_t)imsf->imsf_multiaddr.s_addr; 2494 } else { 2495 if (gf->gf_group.ss_family == AF_INET) { 2496 gsin = (struct sockaddr_in *)&gf->gf_group; 2497 v4grp = (ipaddr_t)gsin->sin_addr.s_addr; 2498 } else { 2499 gsin6 = (struct sockaddr_in6 *)&gf->gf_group; 2500 IN6_V4MAPPED_TO_IPADDR(&gsin6->sin6_addr, 2501 v4grp); 2502 isv4mapped = B_TRUE; 2503 } 2504 } 2505 if (getcmd) 2506 err = ip_get_srcfilter(connp, gf, imsf, v4grp, ipif, 2507 isv4mapped); 2508 else 2509 err = ip_set_srcfilter(connp, gf, imsf, v4grp, ipif, 2510 isv4mapped); 2511 } 2512 2513 return (err); 2514 } 2515 2516 /* 2517 * Finds the ipif based on information in the ioctl headers. Needed to make 2518 * ip_process_ioctl() happy (it needs to know the ipif for IPI_WR-flagged 2519 * ioctls prior to calling the ioctl's handler function). 2520 */ 2521 int 2522 ip_extract_msfilter(queue_t *q, mblk_t *mp, const ip_ioctl_cmd_t *ipip, 2523 cmd_info_t *ci, ipsq_func_t func) 2524 { 2525 int cmd = ipip->ipi_cmd; 2526 int err = 0; 2527 conn_t *connp; 2528 ipif_t *ipif; 2529 /* caller has verified this mblk exists */ 2530 char *dbuf = (char *)mp->b_cont->b_cont->b_rptr; 2531 struct ip_msfilter *imsf; 2532 struct group_filter *gf; 2533 ipaddr_t v4addr, v4grp; 2534 in6_addr_t v6grp; 2535 uint32_t index; 2536 zoneid_t zoneid; 2537 ip_stack_t *ipst; 2538 2539 connp = Q_TO_CONN(q); 2540 zoneid = connp->conn_zoneid; 2541 ipst = connp->conn_netstack->netstack_ip; 2542 2543 /* don't allow multicast operations on a tcp conn */ 2544 if (IPCL_IS_TCP(connp)) 2545 return (ENOPROTOOPT); 2546 2547 if (cmd == SIOCSIPMSFILTER || cmd == SIOCGIPMSFILTER) { 2548 /* don't allow v4-specific ioctls on v6 socket */ 2549 if (connp->conn_af_isv6) 2550 return (EAFNOSUPPORT); 2551 2552 imsf = (struct ip_msfilter *)dbuf; 2553 v4addr = imsf->imsf_interface.s_addr; 2554 v4grp = imsf->imsf_multiaddr.s_addr; 2555 if (v4addr == INADDR_ANY) { 2556 ipif = ipif_lookup_group(v4grp, zoneid, ipst); 2557 if (ipif == NULL) 2558 err = EADDRNOTAVAIL; 2559 } else { 2560 ipif = ipif_lookup_addr(v4addr, NULL, zoneid, q, mp, 2561 func, &err, ipst); 2562 } 2563 } else { 2564 boolean_t isv6 = B_FALSE; 2565 gf = (struct group_filter *)dbuf; 2566 index = gf->gf_interface; 2567 if (gf->gf_group.ss_family == AF_INET6) { 2568 struct sockaddr_in6 *sin6; 2569 sin6 = (struct sockaddr_in6 *)&gf->gf_group; 2570 v6grp = sin6->sin6_addr; 2571 if (IN6_IS_ADDR_V4MAPPED(&v6grp)) 2572 IN6_V4MAPPED_TO_IPADDR(&v6grp, v4grp); 2573 else 2574 isv6 = B_TRUE; 2575 } else if (gf->gf_group.ss_family == AF_INET) { 2576 struct sockaddr_in *sin; 2577 sin = (struct sockaddr_in *)&gf->gf_group; 2578 v4grp = sin->sin_addr.s_addr; 2579 } else { 2580 return (EAFNOSUPPORT); 2581 } 2582 if (index == 0) { 2583 if (isv6) { 2584 ipif = ipif_lookup_group_v6(&v6grp, zoneid, 2585 ipst); 2586 } else { 2587 ipif = ipif_lookup_group(v4grp, zoneid, ipst); 2588 } 2589 if (ipif == NULL) 2590 err = EADDRNOTAVAIL; 2591 } else { 2592 ipif = ipif_lookup_on_ifindex(index, isv6, zoneid, 2593 q, mp, func, &err, ipst); 2594 } 2595 } 2596 2597 ci->ci_ipif = ipif; 2598 return (err); 2599 } 2600 2601 /* 2602 * The structures used for the SIOC*MSFILTER ioctls usually must be copied 2603 * in in two stages, as the first copyin tells us the size of the attached 2604 * source buffer. This function is called by ip_wput_nondata() after the 2605 * first copyin has completed; it figures out how big the second stage 2606 * needs to be, and kicks it off. 2607 * 2608 * In some cases (numsrc < 2), the second copyin is not needed as the 2609 * first one gets a complete structure containing 1 source addr. 2610 * 2611 * The function returns 0 if a second copyin has been started (i.e. there's 2612 * no more work to be done right now), or 1 if the second copyin is not 2613 * needed and ip_wput_nondata() can continue its processing. 2614 */ 2615 int 2616 ip_copyin_msfilter(queue_t *q, mblk_t *mp) 2617 { 2618 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 2619 int cmd = iocp->ioc_cmd; 2620 /* validity of this checked in ip_wput_nondata() */ 2621 mblk_t *mp1 = mp->b_cont->b_cont; 2622 int copysize = 0; 2623 int offset; 2624 2625 if (cmd == SIOCSMSFILTER || cmd == SIOCGMSFILTER) { 2626 struct group_filter *gf = (struct group_filter *)mp1->b_rptr; 2627 if (gf->gf_numsrc >= 2) { 2628 offset = sizeof (struct group_filter); 2629 copysize = GROUP_FILTER_SIZE(gf->gf_numsrc) - offset; 2630 } 2631 } else { 2632 struct ip_msfilter *imsf = (struct ip_msfilter *)mp1->b_rptr; 2633 if (imsf->imsf_numsrc >= 2) { 2634 offset = sizeof (struct ip_msfilter); 2635 copysize = IP_MSFILTER_SIZE(imsf->imsf_numsrc) - offset; 2636 } 2637 } 2638 if (copysize > 0) { 2639 mi_copyin_n(q, mp, offset, copysize); 2640 return (0); 2641 } 2642 return (1); 2643 } 2644 2645 /* 2646 * Handle the following optmgmt: 2647 * IP_ADD_MEMBERSHIP must not have joined already 2648 * MCAST_JOIN_GROUP must not have joined already 2649 * IP_BLOCK_SOURCE must have joined already 2650 * MCAST_BLOCK_SOURCE must have joined already 2651 * IP_JOIN_SOURCE_GROUP may have joined already 2652 * MCAST_JOIN_SOURCE_GROUP may have joined already 2653 * 2654 * fmode and src parameters may be used to determine which option is 2655 * being set, as follows (the IP_* and MCAST_* versions of each option 2656 * are functionally equivalent): 2657 * opt fmode src 2658 * IP_ADD_MEMBERSHIP MODE_IS_EXCLUDE INADDR_ANY 2659 * MCAST_JOIN_GROUP MODE_IS_EXCLUDE INADDR_ANY 2660 * IP_BLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 2661 * MCAST_BLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 2662 * IP_JOIN_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 2663 * MCAST_JOIN_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 2664 * 2665 * Changing the filter mode is not allowed; if a matching ilg already 2666 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 2667 * 2668 * Verifies that there is a source address of appropriate scope for 2669 * the group; if not, EADDRNOTAVAIL is returned. 2670 * 2671 * The interface to be used may be identified by an address or by an 2672 * index. A pointer to the index is passed; if it is NULL, use the 2673 * address, otherwise, use the index. 2674 */ 2675 int 2676 ip_opt_add_group(conn_t *connp, boolean_t checkonly, ipaddr_t group, 2677 ipaddr_t ifaddr, uint_t *ifindexp, mcast_record_t fmode, ipaddr_t src, 2678 mblk_t *first_mp) 2679 { 2680 ipif_t *ipif; 2681 ipsq_t *ipsq; 2682 int err = 0; 2683 ill_t *ill; 2684 2685 err = ip_opt_check(connp, group, src, ifaddr, ifindexp, first_mp, 2686 ip_restart_optmgmt, &ipif); 2687 if (err != 0) { 2688 if (err != EINPROGRESS) { 2689 ip1dbg(("ip_opt_add_group: no ipif for group 0x%x, " 2690 "ifaddr 0x%x, ifindex %d\n", ntohl(group), 2691 ntohl(ifaddr), (ifindexp == NULL) ? 0 : *ifindexp)); 2692 } 2693 return (err); 2694 } 2695 ASSERT(ipif != NULL); 2696 2697 ill = ipif->ipif_ill; 2698 /* Operation not supported on a virtual network interface */ 2699 if (IS_VNI(ill)) { 2700 ipif_refrele(ipif); 2701 return (EINVAL); 2702 } 2703 2704 if (checkonly) { 2705 /* 2706 * do not do operation, just pretend to - new T_CHECK 2707 * semantics. The error return case above if encountered 2708 * considered a good enough "check" here. 2709 */ 2710 ipif_refrele(ipif); 2711 return (0); 2712 } 2713 2714 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, ipsq, 2715 NEW_OP); 2716 2717 /* unspecified source addr => no source filtering */ 2718 err = ilg_add(connp, group, ipif, fmode, src); 2719 2720 IPSQ_EXIT(ipsq); 2721 2722 ipif_refrele(ipif); 2723 return (err); 2724 } 2725 2726 /* 2727 * Handle the following optmgmt: 2728 * IPV6_JOIN_GROUP must not have joined already 2729 * MCAST_JOIN_GROUP must not have joined already 2730 * MCAST_BLOCK_SOURCE must have joined already 2731 * MCAST_JOIN_SOURCE_GROUP may have joined already 2732 * 2733 * fmode and src parameters may be used to determine which option is 2734 * being set, as follows (IPV6_JOIN_GROUP and MCAST_JOIN_GROUP options 2735 * are functionally equivalent): 2736 * opt fmode v6src 2737 * IPV6_JOIN_GROUP MODE_IS_EXCLUDE unspecified 2738 * MCAST_JOIN_GROUP MODE_IS_EXCLUDE unspecified 2739 * MCAST_BLOCK_SOURCE MODE_IS_EXCLUDE v6 addr 2740 * MCAST_JOIN_SOURCE_GROUP MODE_IS_INCLUDE v6 addr 2741 * 2742 * Changing the filter mode is not allowed; if a matching ilg already 2743 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 2744 * 2745 * Verifies that there is a source address of appropriate scope for 2746 * the group; if not, EADDRNOTAVAIL is returned. 2747 * 2748 * Handles IPv4-mapped IPv6 multicast addresses by associating them 2749 * with the link-local ipif. Assumes that if v6group is v4-mapped, 2750 * v6src is also v4-mapped. 2751 */ 2752 int 2753 ip_opt_add_group_v6(conn_t *connp, boolean_t checkonly, 2754 const in6_addr_t *v6group, int ifindex, mcast_record_t fmode, 2755 const in6_addr_t *v6src, mblk_t *first_mp) 2756 { 2757 ill_t *ill; 2758 ipif_t *ipif; 2759 char buf[INET6_ADDRSTRLEN]; 2760 ipaddr_t v4group, v4src; 2761 boolean_t isv6; 2762 ipsq_t *ipsq; 2763 int err; 2764 2765 err = ip_opt_check_v6(connp, v6group, &v4group, v6src, &v4src, &isv6, 2766 ifindex, first_mp, ip_restart_optmgmt, &ill, &ipif); 2767 if (err != 0) { 2768 if (err != EINPROGRESS) { 2769 ip1dbg(("ip_opt_add_group_v6: no ill for group %s/" 2770 "index %d\n", inet_ntop(AF_INET6, v6group, buf, 2771 sizeof (buf)), ifindex)); 2772 } 2773 return (err); 2774 } 2775 ASSERT((!isv6 && ipif != NULL) || (isv6 && ill != NULL)); 2776 2777 /* operation is not supported on the virtual network interface */ 2778 if (isv6) { 2779 if (IS_VNI(ill)) { 2780 ill_refrele(ill); 2781 return (EINVAL); 2782 } 2783 } else { 2784 if (IS_VNI(ipif->ipif_ill)) { 2785 ipif_refrele(ipif); 2786 return (EINVAL); 2787 } 2788 } 2789 2790 if (checkonly) { 2791 /* 2792 * do not do operation, just pretend to - new T_CHECK 2793 * semantics. The error return case above if encountered 2794 * considered a good enough "check" here. 2795 */ 2796 if (isv6) 2797 ill_refrele(ill); 2798 else 2799 ipif_refrele(ipif); 2800 return (0); 2801 } 2802 2803 if (!isv6) { 2804 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, 2805 ipsq, NEW_OP); 2806 err = ilg_add(connp, v4group, ipif, fmode, v4src); 2807 IPSQ_EXIT(ipsq); 2808 ipif_refrele(ipif); 2809 } else { 2810 IPSQ_ENTER_ILL(ill, connp, first_mp, ip_restart_optmgmt, 2811 ipsq, NEW_OP); 2812 err = ilg_add_v6(connp, v6group, ill, fmode, v6src); 2813 IPSQ_EXIT(ipsq); 2814 ill_refrele(ill); 2815 } 2816 2817 return (err); 2818 } 2819 2820 static int 2821 ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group, ipif_t *ipif, 2822 mcast_record_t fmode, ipaddr_t src) 2823 { 2824 ilg_t *ilg; 2825 in6_addr_t v6src; 2826 boolean_t leaving = B_FALSE; 2827 2828 ASSERT(IAM_WRITER_IPIF(ipif)); 2829 2830 /* 2831 * The ilg is valid only while we hold the conn lock. Once we drop 2832 * the lock, another thread can locate another ilg on this connp, 2833 * but on a different ipif, and delete it, and cause the ilg array 2834 * to be reallocated and copied. Hence do the ilg_delete before 2835 * dropping the lock. 2836 */ 2837 mutex_enter(&connp->conn_lock); 2838 ilg = ilg_lookup_ipif(connp, group, ipif); 2839 if ((ilg == NULL) || (ilg->ilg_flags & ILG_DELETED)) { 2840 mutex_exit(&connp->conn_lock); 2841 return (EADDRNOTAVAIL); 2842 } 2843 2844 /* 2845 * Decide if we're actually deleting the ilg or just removing a 2846 * source filter address; if just removing an addr, make sure we 2847 * aren't trying to change the filter mode, and that the addr is 2848 * actually in our filter list already. If we're removing the 2849 * last src in an include list, just delete the ilg. 2850 */ 2851 if (src == INADDR_ANY) { 2852 v6src = ipv6_all_zeros; 2853 leaving = B_TRUE; 2854 } else { 2855 int err = 0; 2856 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 2857 if (fmode != ilg->ilg_fmode) 2858 err = EINVAL; 2859 else if (ilg->ilg_filter == NULL || 2860 !list_has_addr(ilg->ilg_filter, &v6src)) 2861 err = EADDRNOTAVAIL; 2862 if (err != 0) { 2863 mutex_exit(&connp->conn_lock); 2864 return (err); 2865 } 2866 if (fmode == MODE_IS_INCLUDE && 2867 ilg->ilg_filter->sl_numsrc == 1) { 2868 v6src = ipv6_all_zeros; 2869 leaving = B_TRUE; 2870 } 2871 } 2872 2873 ilg_delete(connp, ilg, &v6src); 2874 mutex_exit(&connp->conn_lock); 2875 2876 (void) ip_delmulti(group, ipif, B_FALSE, leaving); 2877 return (0); 2878 } 2879 2880 static int 2881 ip_opt_delete_group_excl_v6(conn_t *connp, const in6_addr_t *v6group, 2882 ill_t *ill, mcast_record_t fmode, const in6_addr_t *v6src) 2883 { 2884 ilg_t *ilg; 2885 ill_t *ilg_ill; 2886 uint_t ilg_orig_ifindex; 2887 boolean_t leaving = B_TRUE; 2888 2889 ASSERT(IAM_WRITER_ILL(ill)); 2890 2891 /* 2892 * Use the index that we originally used to join. We can't 2893 * use the ill directly because ilg_ill could point to 2894 * a new ill if things have moved. 2895 */ 2896 mutex_enter(&connp->conn_lock); 2897 ilg = ilg_lookup_ill_index_v6(connp, v6group, 2898 ill->ill_phyint->phyint_ifindex); 2899 if ((ilg == NULL) || (ilg->ilg_flags & ILG_DELETED)) { 2900 mutex_exit(&connp->conn_lock); 2901 return (EADDRNOTAVAIL); 2902 } 2903 2904 /* 2905 * Decide if we're actually deleting the ilg or just removing a 2906 * source filter address; if just removing an addr, make sure we 2907 * aren't trying to change the filter mode, and that the addr is 2908 * actually in our filter list already. If we're removing the 2909 * last src in an include list, just delete the ilg. 2910 */ 2911 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2912 int err = 0; 2913 if (fmode != ilg->ilg_fmode) 2914 err = EINVAL; 2915 else if (ilg->ilg_filter == NULL || 2916 !list_has_addr(ilg->ilg_filter, v6src)) 2917 err = EADDRNOTAVAIL; 2918 if (err != 0) { 2919 mutex_exit(&connp->conn_lock); 2920 return (err); 2921 } 2922 if (fmode == MODE_IS_INCLUDE && 2923 ilg->ilg_filter->sl_numsrc == 1) 2924 v6src = NULL; 2925 else 2926 leaving = B_FALSE; 2927 } 2928 2929 ilg_ill = ilg->ilg_ill; 2930 ilg_orig_ifindex = ilg->ilg_orig_ifindex; 2931 ilg_delete(connp, ilg, v6src); 2932 mutex_exit(&connp->conn_lock); 2933 (void) ip_delmulti_v6(v6group, ilg_ill, ilg_orig_ifindex, 2934 connp->conn_zoneid, B_FALSE, leaving); 2935 2936 return (0); 2937 } 2938 2939 /* 2940 * Handle the following optmgmt: 2941 * IP_DROP_MEMBERSHIP will leave 2942 * MCAST_LEAVE_GROUP will leave 2943 * IP_UNBLOCK_SOURCE will not leave 2944 * MCAST_UNBLOCK_SOURCE will not leave 2945 * IP_LEAVE_SOURCE_GROUP may leave (if leaving last source) 2946 * MCAST_LEAVE_SOURCE_GROUP may leave (if leaving last source) 2947 * 2948 * fmode and src parameters may be used to determine which option is 2949 * being set, as follows (the IP_* and MCAST_* versions of each option 2950 * are functionally equivalent): 2951 * opt fmode src 2952 * IP_DROP_MEMBERSHIP MODE_IS_INCLUDE INADDR_ANY 2953 * MCAST_LEAVE_GROUP MODE_IS_INCLUDE INADDR_ANY 2954 * IP_UNBLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 2955 * MCAST_UNBLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 2956 * IP_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 2957 * MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 2958 * 2959 * Changing the filter mode is not allowed; if a matching ilg already 2960 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 2961 * 2962 * The interface to be used may be identified by an address or by an 2963 * index. A pointer to the index is passed; if it is NULL, use the 2964 * address, otherwise, use the index. 2965 */ 2966 int 2967 ip_opt_delete_group(conn_t *connp, boolean_t checkonly, ipaddr_t group, 2968 ipaddr_t ifaddr, uint_t *ifindexp, mcast_record_t fmode, ipaddr_t src, 2969 mblk_t *first_mp) 2970 { 2971 ipif_t *ipif; 2972 ipsq_t *ipsq; 2973 int err; 2974 ill_t *ill; 2975 2976 err = ip_opt_check(connp, group, src, ifaddr, ifindexp, first_mp, 2977 ip_restart_optmgmt, &ipif); 2978 if (err != 0) { 2979 if (err != EINPROGRESS) { 2980 ip1dbg(("ip_opt_delete_group: no ipif for group " 2981 "0x%x, ifaddr 0x%x\n", 2982 (int)ntohl(group), (int)ntohl(ifaddr))); 2983 } 2984 return (err); 2985 } 2986 ASSERT(ipif != NULL); 2987 2988 ill = ipif->ipif_ill; 2989 /* Operation not supported on a virtual network interface */ 2990 if (IS_VNI(ill)) { 2991 ipif_refrele(ipif); 2992 return (EINVAL); 2993 } 2994 2995 if (checkonly) { 2996 /* 2997 * do not do operation, just pretend to - new T_CHECK 2998 * semantics. The error return case above if encountered 2999 * considered a good enough "check" here. 3000 */ 3001 ipif_refrele(ipif); 3002 return (0); 3003 } 3004 3005 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, ipsq, 3006 NEW_OP); 3007 err = ip_opt_delete_group_excl(connp, group, ipif, fmode, src); 3008 IPSQ_EXIT(ipsq); 3009 3010 ipif_refrele(ipif); 3011 return (err); 3012 } 3013 3014 /* 3015 * Handle the following optmgmt: 3016 * IPV6_LEAVE_GROUP will leave 3017 * MCAST_LEAVE_GROUP will leave 3018 * MCAST_UNBLOCK_SOURCE will not leave 3019 * MCAST_LEAVE_SOURCE_GROUP may leave (if leaving last source) 3020 * 3021 * fmode and src parameters may be used to determine which option is 3022 * being set, as follows (IPV6_LEAVE_GROUP and MCAST_LEAVE_GROUP options 3023 * are functionally equivalent): 3024 * opt fmode v6src 3025 * IPV6_LEAVE_GROUP MODE_IS_INCLUDE unspecified 3026 * MCAST_LEAVE_GROUP MODE_IS_INCLUDE unspecified 3027 * MCAST_UNBLOCK_SOURCE MODE_IS_EXCLUDE v6 addr 3028 * MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE v6 addr 3029 * 3030 * Changing the filter mode is not allowed; if a matching ilg already 3031 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 3032 * 3033 * Handles IPv4-mapped IPv6 multicast addresses by associating them 3034 * with the link-local ipif. Assumes that if v6group is v4-mapped, 3035 * v6src is also v4-mapped. 3036 */ 3037 int 3038 ip_opt_delete_group_v6(conn_t *connp, boolean_t checkonly, 3039 const in6_addr_t *v6group, int ifindex, mcast_record_t fmode, 3040 const in6_addr_t *v6src, mblk_t *first_mp) 3041 { 3042 ill_t *ill; 3043 ipif_t *ipif; 3044 char buf[INET6_ADDRSTRLEN]; 3045 ipaddr_t v4group, v4src; 3046 boolean_t isv6; 3047 ipsq_t *ipsq; 3048 int err; 3049 3050 err = ip_opt_check_v6(connp, v6group, &v4group, v6src, &v4src, &isv6, 3051 ifindex, first_mp, ip_restart_optmgmt, &ill, &ipif); 3052 if (err != 0) { 3053 if (err != EINPROGRESS) { 3054 ip1dbg(("ip_opt_delete_group_v6: no ill for group %s/" 3055 "index %d\n", inet_ntop(AF_INET6, v6group, buf, 3056 sizeof (buf)), ifindex)); 3057 } 3058 return (err); 3059 } 3060 ASSERT((isv6 && ill != NULL) || (!isv6 && ipif != NULL)); 3061 3062 /* operation is not supported on the virtual network interface */ 3063 if (isv6) { 3064 if (IS_VNI(ill)) { 3065 ill_refrele(ill); 3066 return (EINVAL); 3067 } 3068 } else { 3069 if (IS_VNI(ipif->ipif_ill)) { 3070 ipif_refrele(ipif); 3071 return (EINVAL); 3072 } 3073 } 3074 3075 if (checkonly) { 3076 /* 3077 * do not do operation, just pretend to - new T_CHECK 3078 * semantics. The error return case above if encountered 3079 * considered a good enough "check" here. 3080 */ 3081 if (isv6) 3082 ill_refrele(ill); 3083 else 3084 ipif_refrele(ipif); 3085 return (0); 3086 } 3087 3088 if (!isv6) { 3089 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, 3090 ipsq, NEW_OP); 3091 err = ip_opt_delete_group_excl(connp, v4group, ipif, fmode, 3092 v4src); 3093 IPSQ_EXIT(ipsq); 3094 ipif_refrele(ipif); 3095 } else { 3096 IPSQ_ENTER_ILL(ill, connp, first_mp, ip_restart_optmgmt, 3097 ipsq, NEW_OP); 3098 err = ip_opt_delete_group_excl_v6(connp, v6group, ill, fmode, 3099 v6src); 3100 IPSQ_EXIT(ipsq); 3101 ill_refrele(ill); 3102 } 3103 3104 return (err); 3105 } 3106 3107 /* 3108 * Group mgmt for upper conn that passes things down 3109 * to the interface multicast list (and DLPI) 3110 * These routines can handle new style options that specify an interface name 3111 * as opposed to an interface address (needed for general handling of 3112 * unnumbered interfaces.) 3113 */ 3114 3115 /* 3116 * Add a group to an upper conn group data structure and pass things down 3117 * to the interface multicast list (and DLPI) 3118 */ 3119 static int 3120 ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif, mcast_record_t fmode, 3121 ipaddr_t src) 3122 { 3123 int error = 0; 3124 ill_t *ill; 3125 ilg_t *ilg; 3126 ilg_stat_t ilgstat; 3127 slist_t *new_filter = NULL; 3128 int new_fmode; 3129 3130 ASSERT(IAM_WRITER_IPIF(ipif)); 3131 3132 ill = ipif->ipif_ill; 3133 3134 if (!(ill->ill_flags & ILLF_MULTICAST)) 3135 return (EADDRNOTAVAIL); 3136 3137 /* 3138 * conn_ilg[] is protected by conn_lock. Need to hold the conn_lock 3139 * to walk the conn_ilg[] list in ilg_lookup_ipif(); also needed to 3140 * serialize 2 threads doing join (sock, group1, hme0:0) and 3141 * (sock, group2, hme1:0) where hme0 and hme1 map to different ipsqs, 3142 * but both operations happen on the same conn. 3143 */ 3144 mutex_enter(&connp->conn_lock); 3145 ilg = ilg_lookup_ipif(connp, group, ipif); 3146 3147 /* 3148 * Depending on the option we're handling, may or may not be okay 3149 * if group has already been added. Figure out our rules based 3150 * on fmode and src params. Also make sure there's enough room 3151 * in the filter if we're adding a source to an existing filter. 3152 */ 3153 if (src == INADDR_ANY) { 3154 /* we're joining for all sources, must not have joined */ 3155 if (ilg != NULL) 3156 error = EADDRINUSE; 3157 } else { 3158 if (fmode == MODE_IS_EXCLUDE) { 3159 /* (excl {addr}) => block source, must have joined */ 3160 if (ilg == NULL) 3161 error = EADDRNOTAVAIL; 3162 } 3163 /* (incl {addr}) => join source, may have joined */ 3164 3165 if (ilg != NULL && 3166 SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE) 3167 error = ENOBUFS; 3168 } 3169 if (error != 0) { 3170 mutex_exit(&connp->conn_lock); 3171 return (error); 3172 } 3173 3174 ASSERT(!(ipif->ipif_state_flags & IPIF_CONDEMNED)); 3175 3176 /* 3177 * Alloc buffer to copy new state into (see below) before 3178 * we make any changes, so we can bail if it fails. 3179 */ 3180 if ((new_filter = l_alloc()) == NULL) { 3181 mutex_exit(&connp->conn_lock); 3182 return (ENOMEM); 3183 } 3184 3185 if (ilg == NULL) { 3186 ilgstat = ILGSTAT_NEW; 3187 if ((ilg = conn_ilg_alloc(connp)) == NULL) { 3188 mutex_exit(&connp->conn_lock); 3189 l_free(new_filter); 3190 return (ENOMEM); 3191 } 3192 if (src != INADDR_ANY) { 3193 ilg->ilg_filter = l_alloc(); 3194 if (ilg->ilg_filter == NULL) { 3195 ilg_delete(connp, ilg, NULL); 3196 mutex_exit(&connp->conn_lock); 3197 l_free(new_filter); 3198 return (ENOMEM); 3199 } 3200 ilg->ilg_filter->sl_numsrc = 1; 3201 IN6_IPADDR_TO_V4MAPPED(src, 3202 &ilg->ilg_filter->sl_addr[0]); 3203 } 3204 if (group == INADDR_ANY) { 3205 ilg->ilg_v6group = ipv6_all_zeros; 3206 } else { 3207 IN6_IPADDR_TO_V4MAPPED(group, &ilg->ilg_v6group); 3208 } 3209 ilg->ilg_ipif = ipif; 3210 ilg->ilg_ill = NULL; 3211 ilg->ilg_orig_ifindex = 0; 3212 ilg->ilg_fmode = fmode; 3213 } else { 3214 int index; 3215 in6_addr_t v6src; 3216 ilgstat = ILGSTAT_CHANGE; 3217 if (ilg->ilg_fmode != fmode || src == INADDR_ANY) { 3218 mutex_exit(&connp->conn_lock); 3219 l_free(new_filter); 3220 return (EINVAL); 3221 } 3222 if (ilg->ilg_filter == NULL) { 3223 ilg->ilg_filter = l_alloc(); 3224 if (ilg->ilg_filter == NULL) { 3225 mutex_exit(&connp->conn_lock); 3226 l_free(new_filter); 3227 return (ENOMEM); 3228 } 3229 } 3230 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 3231 if (list_has_addr(ilg->ilg_filter, &v6src)) { 3232 mutex_exit(&connp->conn_lock); 3233 l_free(new_filter); 3234 return (EADDRNOTAVAIL); 3235 } 3236 index = ilg->ilg_filter->sl_numsrc++; 3237 ilg->ilg_filter->sl_addr[index] = v6src; 3238 } 3239 3240 /* 3241 * Save copy of ilg's filter state to pass to other functions, 3242 * so we can release conn_lock now. 3243 */ 3244 new_fmode = ilg->ilg_fmode; 3245 l_copy(ilg->ilg_filter, new_filter); 3246 3247 mutex_exit(&connp->conn_lock); 3248 3249 error = ip_addmulti(group, ipif, ilgstat, new_fmode, new_filter); 3250 if (error != 0) { 3251 /* 3252 * Need to undo what we did before calling ip_addmulti()! 3253 * Must look up the ilg again since we've not been holding 3254 * conn_lock. 3255 */ 3256 in6_addr_t v6src; 3257 if (ilgstat == ILGSTAT_NEW) 3258 v6src = ipv6_all_zeros; 3259 else 3260 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 3261 mutex_enter(&connp->conn_lock); 3262 ilg = ilg_lookup_ipif(connp, group, ipif); 3263 ASSERT(ilg != NULL); 3264 ilg_delete(connp, ilg, &v6src); 3265 mutex_exit(&connp->conn_lock); 3266 l_free(new_filter); 3267 return (error); 3268 } 3269 3270 l_free(new_filter); 3271 return (0); 3272 } 3273 3274 static int 3275 ilg_add_v6(conn_t *connp, const in6_addr_t *v6group, ill_t *ill, 3276 mcast_record_t fmode, const in6_addr_t *v6src) 3277 { 3278 int error = 0; 3279 int orig_ifindex; 3280 ilg_t *ilg; 3281 ilg_stat_t ilgstat; 3282 slist_t *new_filter = NULL; 3283 int new_fmode; 3284 3285 ASSERT(IAM_WRITER_ILL(ill)); 3286 3287 if (!(ill->ill_flags & ILLF_MULTICAST)) 3288 return (EADDRNOTAVAIL); 3289 3290 /* 3291 * conn_lock protects the ilg list. Serializes 2 threads doing 3292 * join (sock, group1, hme0) and (sock, group2, hme1) where hme0 3293 * and hme1 map to different ipsq's, but both operations happen 3294 * on the same conn. 3295 */ 3296 mutex_enter(&connp->conn_lock); 3297 3298 /* 3299 * Use the ifindex to do the lookup. We can't use the ill 3300 * directly because ilg_ill could point to a different ill if 3301 * things have moved. 3302 */ 3303 orig_ifindex = ill->ill_phyint->phyint_ifindex; 3304 ilg = ilg_lookup_ill_index_v6(connp, v6group, orig_ifindex); 3305 3306 /* 3307 * Depending on the option we're handling, may or may not be okay 3308 * if group has already been added. Figure out our rules based 3309 * on fmode and src params. Also make sure there's enough room 3310 * in the filter if we're adding a source to an existing filter. 3311 */ 3312 if (IN6_IS_ADDR_UNSPECIFIED(v6src)) { 3313 /* we're joining for all sources, must not have joined */ 3314 if (ilg != NULL) 3315 error = EADDRINUSE; 3316 } else { 3317 if (fmode == MODE_IS_EXCLUDE) { 3318 /* (excl {addr}) => block source, must have joined */ 3319 if (ilg == NULL) 3320 error = EADDRNOTAVAIL; 3321 } 3322 /* (incl {addr}) => join source, may have joined */ 3323 3324 if (ilg != NULL && 3325 SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE) 3326 error = ENOBUFS; 3327 } 3328 if (error != 0) { 3329 mutex_exit(&connp->conn_lock); 3330 return (error); 3331 } 3332 3333 /* 3334 * Alloc buffer to copy new state into (see below) before 3335 * we make any changes, so we can bail if it fails. 3336 */ 3337 if ((new_filter = l_alloc()) == NULL) { 3338 mutex_exit(&connp->conn_lock); 3339 return (ENOMEM); 3340 } 3341 3342 if (ilg == NULL) { 3343 if ((ilg = conn_ilg_alloc(connp)) == NULL) { 3344 mutex_exit(&connp->conn_lock); 3345 l_free(new_filter); 3346 return (ENOMEM); 3347 } 3348 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 3349 ilg->ilg_filter = l_alloc(); 3350 if (ilg->ilg_filter == NULL) { 3351 ilg_delete(connp, ilg, NULL); 3352 mutex_exit(&connp->conn_lock); 3353 l_free(new_filter); 3354 return (ENOMEM); 3355 } 3356 ilg->ilg_filter->sl_numsrc = 1; 3357 ilg->ilg_filter->sl_addr[0] = *v6src; 3358 } 3359 ilgstat = ILGSTAT_NEW; 3360 ilg->ilg_v6group = *v6group; 3361 ilg->ilg_fmode = fmode; 3362 ilg->ilg_ipif = NULL; 3363 /* 3364 * Choose our target ill to join on. This might be different 3365 * from the ill we've been given if it's currently down and 3366 * part of a group. 3367 * 3368 * new ill is not refheld; we are writer. 3369 */ 3370 ill = ip_choose_multi_ill(ill, v6group); 3371 ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED)); 3372 ilg->ilg_ill = ill; 3373 /* 3374 * Remember the orig_ifindex that we joined on, so that we 3375 * can successfully delete them later on and also search 3376 * for duplicates if the application wants to join again. 3377 */ 3378 ilg->ilg_orig_ifindex = orig_ifindex; 3379 } else { 3380 int index; 3381 if (ilg->ilg_fmode != fmode || IN6_IS_ADDR_UNSPECIFIED(v6src)) { 3382 mutex_exit(&connp->conn_lock); 3383 l_free(new_filter); 3384 return (EINVAL); 3385 } 3386 if (ilg->ilg_filter == NULL) { 3387 ilg->ilg_filter = l_alloc(); 3388 if (ilg->ilg_filter == NULL) { 3389 mutex_exit(&connp->conn_lock); 3390 l_free(new_filter); 3391 return (ENOMEM); 3392 } 3393 } 3394 if (list_has_addr(ilg->ilg_filter, v6src)) { 3395 mutex_exit(&connp->conn_lock); 3396 l_free(new_filter); 3397 return (EADDRNOTAVAIL); 3398 } 3399 ilgstat = ILGSTAT_CHANGE; 3400 index = ilg->ilg_filter->sl_numsrc++; 3401 ilg->ilg_filter->sl_addr[index] = *v6src; 3402 /* 3403 * The current ill might be different from the one we were 3404 * asked to join on (if failover has occurred); we should 3405 * join on the ill stored in the ilg. The original ill 3406 * is noted in ilg_orig_ifindex, which matched our request. 3407 */ 3408 ill = ilg->ilg_ill; 3409 } 3410 3411 /* 3412 * Save copy of ilg's filter state to pass to other functions, 3413 * so we can release conn_lock now. 3414 */ 3415 new_fmode = ilg->ilg_fmode; 3416 l_copy(ilg->ilg_filter, new_filter); 3417 3418 mutex_exit(&connp->conn_lock); 3419 3420 /* 3421 * Now update the ill. We wait to do this until after the ilg 3422 * has been updated because we need to update the src filter 3423 * info for the ill, which involves looking at the status of 3424 * all the ilgs associated with this group/interface pair. 3425 */ 3426 error = ip_addmulti_v6(v6group, ill, orig_ifindex, connp->conn_zoneid, 3427 ilgstat, new_fmode, new_filter); 3428 if (error != 0) { 3429 /* 3430 * But because we waited, we have to undo the ilg update 3431 * if ip_addmulti_v6() fails. We also must lookup ilg 3432 * again, since we've not been holding conn_lock. 3433 */ 3434 in6_addr_t delsrc = 3435 (ilgstat == ILGSTAT_NEW) ? ipv6_all_zeros : *v6src; 3436 mutex_enter(&connp->conn_lock); 3437 ilg = ilg_lookup_ill_index_v6(connp, v6group, orig_ifindex); 3438 ASSERT(ilg != NULL); 3439 ilg_delete(connp, ilg, &delsrc); 3440 mutex_exit(&connp->conn_lock); 3441 l_free(new_filter); 3442 return (error); 3443 } 3444 3445 l_free(new_filter); 3446 3447 return (0); 3448 } 3449 3450 /* 3451 * Find an IPv4 ilg matching group, ill and source 3452 */ 3453 ilg_t * 3454 ilg_lookup_ill_withsrc(conn_t *connp, ipaddr_t group, ipaddr_t src, ill_t *ill) 3455 { 3456 in6_addr_t v6group, v6src; 3457 int i; 3458 boolean_t isinlist; 3459 ilg_t *ilg; 3460 ipif_t *ipif; 3461 ill_t *ilg_ill; 3462 3463 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3464 3465 /* 3466 * INADDR_ANY is represented as the IPv6 unspecified addr. 3467 */ 3468 if (group == INADDR_ANY) 3469 v6group = ipv6_all_zeros; 3470 else 3471 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 3472 3473 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3474 /* ilg_ipif is NULL for v6; skip them */ 3475 ilg = &connp->conn_ilg[i]; 3476 if ((ipif = ilg->ilg_ipif) == NULL) 3477 continue; 3478 ASSERT(ilg->ilg_ill == NULL); 3479 ilg_ill = ipif->ipif_ill; 3480 ASSERT(!ilg_ill->ill_isv6); 3481 if (ilg_ill == ill && 3482 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, &v6group)) { 3483 if (SLIST_IS_EMPTY(ilg->ilg_filter)) { 3484 /* no source filter, so this is a match */ 3485 return (ilg); 3486 } 3487 break; 3488 } 3489 } 3490 if (i == connp->conn_ilg_inuse) 3491 return (NULL); 3492 3493 /* 3494 * we have an ilg with matching ill and group; but 3495 * the ilg has a source list that we must check. 3496 */ 3497 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 3498 isinlist = B_FALSE; 3499 for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) { 3500 if (IN6_ARE_ADDR_EQUAL(&v6src, &ilg->ilg_filter->sl_addr[i])) { 3501 isinlist = B_TRUE; 3502 break; 3503 } 3504 } 3505 3506 if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) || 3507 (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE)) 3508 return (ilg); 3509 3510 return (NULL); 3511 } 3512 3513 /* 3514 * Find an IPv6 ilg matching group, ill, and source 3515 */ 3516 ilg_t * 3517 ilg_lookup_ill_withsrc_v6(conn_t *connp, const in6_addr_t *v6group, 3518 const in6_addr_t *v6src, ill_t *ill) 3519 { 3520 int i; 3521 boolean_t isinlist; 3522 ilg_t *ilg; 3523 ill_t *ilg_ill; 3524 3525 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3526 3527 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3528 ilg = &connp->conn_ilg[i]; 3529 if ((ilg_ill = ilg->ilg_ill) == NULL) 3530 continue; 3531 ASSERT(ilg->ilg_ipif == NULL); 3532 ASSERT(ilg_ill->ill_isv6); 3533 if (ilg_ill == ill && 3534 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) { 3535 if (SLIST_IS_EMPTY(ilg->ilg_filter)) { 3536 /* no source filter, so this is a match */ 3537 return (ilg); 3538 } 3539 break; 3540 } 3541 } 3542 if (i == connp->conn_ilg_inuse) 3543 return (NULL); 3544 3545 /* 3546 * we have an ilg with matching ill and group; but 3547 * the ilg has a source list that we must check. 3548 */ 3549 isinlist = B_FALSE; 3550 for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) { 3551 if (IN6_ARE_ADDR_EQUAL(v6src, &ilg->ilg_filter->sl_addr[i])) { 3552 isinlist = B_TRUE; 3553 break; 3554 } 3555 } 3556 3557 if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) || 3558 (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE)) 3559 return (ilg); 3560 3561 return (NULL); 3562 } 3563 3564 /* 3565 * Get the ilg whose ilg_orig_ifindex is associated with ifindex. 3566 * This is useful when the interface fails and we have moved 3567 * to a new ill, but still would like to locate using the index 3568 * that we originally used to join. Used only for IPv6 currently. 3569 */ 3570 static ilg_t * 3571 ilg_lookup_ill_index_v6(conn_t *connp, const in6_addr_t *v6group, int ifindex) 3572 { 3573 ilg_t *ilg; 3574 int i; 3575 3576 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3577 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3578 ilg = &connp->conn_ilg[i]; 3579 /* ilg_ill is NULL for V4. Skip them */ 3580 if (ilg->ilg_ill == NULL) 3581 continue; 3582 /* ilg_ipif is NULL for V6 */ 3583 ASSERT(ilg->ilg_ipif == NULL); 3584 ASSERT(ilg->ilg_orig_ifindex != 0); 3585 if (IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group) && 3586 ilg->ilg_orig_ifindex == ifindex) { 3587 return (ilg); 3588 } 3589 } 3590 return (NULL); 3591 } 3592 3593 /* 3594 * Find an IPv6 ilg matching group and ill 3595 */ 3596 ilg_t * 3597 ilg_lookup_ill_v6(conn_t *connp, const in6_addr_t *v6group, ill_t *ill) 3598 { 3599 ilg_t *ilg; 3600 int i; 3601 ill_t *mem_ill; 3602 3603 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3604 3605 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3606 ilg = &connp->conn_ilg[i]; 3607 if ((mem_ill = ilg->ilg_ill) == NULL) 3608 continue; 3609 ASSERT(ilg->ilg_ipif == NULL); 3610 ASSERT(mem_ill->ill_isv6); 3611 if (mem_ill == ill && 3612 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) 3613 return (ilg); 3614 } 3615 return (NULL); 3616 } 3617 3618 /* 3619 * Find an IPv4 ilg matching group and ipif 3620 */ 3621 static ilg_t * 3622 ilg_lookup_ipif(conn_t *connp, ipaddr_t group, ipif_t *ipif) 3623 { 3624 in6_addr_t v6group; 3625 int i; 3626 3627 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3628 ASSERT(!ipif->ipif_ill->ill_isv6); 3629 3630 if (group == INADDR_ANY) 3631 v6group = ipv6_all_zeros; 3632 else 3633 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 3634 3635 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3636 if (IN6_ARE_ADDR_EQUAL(&connp->conn_ilg[i].ilg_v6group, 3637 &v6group) && 3638 connp->conn_ilg[i].ilg_ipif == ipif) 3639 return (&connp->conn_ilg[i]); 3640 } 3641 return (NULL); 3642 } 3643 3644 /* 3645 * If a source address is passed in (src != NULL and src is not 3646 * unspecified), remove the specified src addr from the given ilg's 3647 * filter list, else delete the ilg. 3648 */ 3649 static void 3650 ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src) 3651 { 3652 int i; 3653 3654 ASSERT((ilg->ilg_ipif != NULL) ^ (ilg->ilg_ill != NULL)); 3655 ASSERT(ilg->ilg_ipif == NULL || IAM_WRITER_IPIF(ilg->ilg_ipif)); 3656 ASSERT(ilg->ilg_ill == NULL || IAM_WRITER_ILL(ilg->ilg_ill)); 3657 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3658 ASSERT(!(ilg->ilg_flags & ILG_DELETED)); 3659 3660 if (src == NULL || IN6_IS_ADDR_UNSPECIFIED(src)) { 3661 if (connp->conn_ilg_walker_cnt != 0) { 3662 ilg->ilg_flags |= ILG_DELETED; 3663 return; 3664 } 3665 3666 FREE_SLIST(ilg->ilg_filter); 3667 3668 i = ilg - &connp->conn_ilg[0]; 3669 ASSERT(i >= 0 && i < connp->conn_ilg_inuse); 3670 3671 /* Move other entries up one step */ 3672 connp->conn_ilg_inuse--; 3673 for (; i < connp->conn_ilg_inuse; i++) 3674 connp->conn_ilg[i] = connp->conn_ilg[i+1]; 3675 3676 if (connp->conn_ilg_inuse == 0) { 3677 mi_free((char *)connp->conn_ilg); 3678 connp->conn_ilg = NULL; 3679 cv_broadcast(&connp->conn_refcv); 3680 } 3681 } else { 3682 l_remove(ilg->ilg_filter, src); 3683 } 3684 } 3685 3686 /* 3687 * Called from conn close. No new ilg can be added or removed. 3688 * because CONN_CLOSING has been set by ip_close. ilg_add / ilg_delete 3689 * will return error if conn has started closing. 3690 */ 3691 void 3692 ilg_delete_all(conn_t *connp) 3693 { 3694 int i; 3695 ipif_t *ipif = NULL; 3696 ill_t *ill = NULL; 3697 ilg_t *ilg; 3698 in6_addr_t v6group; 3699 boolean_t success; 3700 ipsq_t *ipsq; 3701 int orig_ifindex; 3702 3703 mutex_enter(&connp->conn_lock); 3704 retry: 3705 ILG_WALKER_HOLD(connp); 3706 for (i = connp->conn_ilg_inuse - 1; i >= 0; ) { 3707 ilg = &connp->conn_ilg[i]; 3708 /* 3709 * Since this walk is not atomic (we drop the 3710 * conn_lock and wait in ipsq_enter) we need 3711 * to check for the ILG_DELETED flag. 3712 */ 3713 if (ilg->ilg_flags & ILG_DELETED) { 3714 /* Go to the next ilg */ 3715 i--; 3716 continue; 3717 } 3718 v6group = ilg->ilg_v6group; 3719 3720 if (IN6_IS_ADDR_V4MAPPED(&v6group)) { 3721 ipif = ilg->ilg_ipif; 3722 ill = ipif->ipif_ill; 3723 } else { 3724 ipif = NULL; 3725 ill = ilg->ilg_ill; 3726 } 3727 /* 3728 * We may not be able to refhold the ill if the ill/ipif 3729 * is changing. But we need to make sure that the ill will 3730 * not vanish. So we just bump up the ill_waiter count. 3731 * If we are unable to do even that, then the ill is closing, 3732 * in which case the unplumb thread will handle the cleanup, 3733 * and we move on to the next ilg. 3734 */ 3735 if (!ill_waiter_inc(ill)) { 3736 /* Go to the next ilg */ 3737 i--; 3738 continue; 3739 } 3740 mutex_exit(&connp->conn_lock); 3741 /* 3742 * To prevent deadlock between ill close which waits inside 3743 * the perimeter, and conn close, ipsq_enter returns error, 3744 * the moment ILL_CONDEMNED is set, in which case ill close 3745 * takes responsibility to cleanup the ilgs. Note that we 3746 * have not yet set condemned flag, otherwise the conn can't 3747 * be refheld for cleanup by those routines and it would be 3748 * a mutual deadlock. 3749 */ 3750 success = ipsq_enter(ill, B_FALSE); 3751 ipsq = ill->ill_phyint->phyint_ipsq; 3752 ill_waiter_dcr(ill); 3753 mutex_enter(&connp->conn_lock); 3754 if (!success) { 3755 /* Go to the next ilg */ 3756 i--; 3757 continue; 3758 } 3759 3760 /* 3761 * Make sure that nothing has changed under. For eg. 3762 * a failover/failback can change ilg_ill while we were 3763 * waiting to become exclusive above 3764 */ 3765 if (IN6_IS_ADDR_V4MAPPED(&v6group)) { 3766 ipif = ilg->ilg_ipif; 3767 ill = ipif->ipif_ill; 3768 } else { 3769 ipif = NULL; 3770 ill = ilg->ilg_ill; 3771 } 3772 if (!IAM_WRITER_ILL(ill) || (ilg->ilg_flags & ILG_DELETED)) { 3773 /* 3774 * The ilg has changed under us probably due 3775 * to a failover or unplumb. Retry on the same ilg. 3776 */ 3777 mutex_exit(&connp->conn_lock); 3778 ipsq_exit(ipsq, B_TRUE, B_TRUE); 3779 mutex_enter(&connp->conn_lock); 3780 continue; 3781 } 3782 v6group = ilg->ilg_v6group; 3783 orig_ifindex = ilg->ilg_orig_ifindex; 3784 ilg_delete(connp, ilg, NULL); 3785 mutex_exit(&connp->conn_lock); 3786 3787 if (ipif != NULL) 3788 (void) ip_delmulti(V4_PART_OF_V6(v6group), ipif, 3789 B_FALSE, B_TRUE); 3790 3791 else 3792 (void) ip_delmulti_v6(&v6group, ill, orig_ifindex, 3793 connp->conn_zoneid, B_FALSE, B_TRUE); 3794 3795 ipsq_exit(ipsq, B_TRUE, B_TRUE); 3796 mutex_enter(&connp->conn_lock); 3797 /* Go to the next ilg */ 3798 i--; 3799 } 3800 ILG_WALKER_RELE(connp); 3801 3802 /* If any ill was skipped above wait and retry */ 3803 if (connp->conn_ilg_inuse != 0) { 3804 cv_wait(&connp->conn_refcv, &connp->conn_lock); 3805 goto retry; 3806 } 3807 mutex_exit(&connp->conn_lock); 3808 } 3809 3810 /* 3811 * Called from ill close by ipcl_walk for clearing conn_ilg and 3812 * conn_multicast_ipif for a given ipif. conn is held by caller. 3813 * Note that ipcl_walk only walks conns that are not yet condemned. 3814 * condemned conns can't be refheld. For this reason, conn must become clean 3815 * first, i.e. it must not refer to any ill/ire/ipif and then only set 3816 * condemned flag. 3817 */ 3818 static void 3819 conn_delete_ipif(conn_t *connp, caddr_t arg) 3820 { 3821 ipif_t *ipif = (ipif_t *)arg; 3822 int i; 3823 char group_buf1[INET6_ADDRSTRLEN]; 3824 char group_buf2[INET6_ADDRSTRLEN]; 3825 ipaddr_t group; 3826 ilg_t *ilg; 3827 3828 /* 3829 * Even though conn_ilg_inuse can change while we are in this loop, 3830 * i.e.ilgs can be created or deleted on this connp, no new ilgs can 3831 * be created or deleted for this connp, on this ill, since this ill 3832 * is the perimeter. So we won't miss any ilg in this cleanup. 3833 */ 3834 mutex_enter(&connp->conn_lock); 3835 3836 /* 3837 * Increment the walker count, so that ilg repacking does not 3838 * occur while we are in the loop. 3839 */ 3840 ILG_WALKER_HOLD(connp); 3841 for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) { 3842 ilg = &connp->conn_ilg[i]; 3843 if (ilg->ilg_ipif != ipif || (ilg->ilg_flags & ILG_DELETED)) 3844 continue; 3845 /* 3846 * ip_close cannot be cleaning this ilg at the same time. 3847 * since it also has to execute in this ill's perimeter which 3848 * we are now holding. Only a clean conn can be condemned. 3849 */ 3850 ASSERT(!(connp->conn_state_flags & CONN_CONDEMNED)); 3851 3852 /* Blow away the membership */ 3853 ip1dbg(("conn_delete_ilg_ipif: %s on %s (%s)\n", 3854 inet_ntop(AF_INET6, &connp->conn_ilg[i].ilg_v6group, 3855 group_buf1, sizeof (group_buf1)), 3856 inet_ntop(AF_INET6, &ipif->ipif_v6lcl_addr, 3857 group_buf2, sizeof (group_buf2)), 3858 ipif->ipif_ill->ill_name)); 3859 3860 /* ilg_ipif is NULL for V6, so we won't be here */ 3861 ASSERT(IN6_IS_ADDR_V4MAPPED(&ilg->ilg_v6group)); 3862 3863 group = V4_PART_OF_V6(ilg->ilg_v6group); 3864 ilg_delete(connp, &connp->conn_ilg[i], NULL); 3865 mutex_exit(&connp->conn_lock); 3866 3867 (void) ip_delmulti(group, ipif, B_FALSE, B_TRUE); 3868 mutex_enter(&connp->conn_lock); 3869 } 3870 3871 /* 3872 * If we are the last walker, need to physically delete the 3873 * ilgs and repack. 3874 */ 3875 ILG_WALKER_RELE(connp); 3876 3877 if (connp->conn_multicast_ipif == ipif) { 3878 /* Revert to late binding */ 3879 connp->conn_multicast_ipif = NULL; 3880 } 3881 mutex_exit(&connp->conn_lock); 3882 3883 conn_delete_ire(connp, (caddr_t)ipif); 3884 } 3885 3886 /* 3887 * Called from ill close by ipcl_walk for clearing conn_ilg and 3888 * conn_multicast_ill for a given ill. conn is held by caller. 3889 * Note that ipcl_walk only walks conns that are not yet condemned. 3890 * condemned conns can't be refheld. For this reason, conn must become clean 3891 * first, i.e. it must not refer to any ill/ire/ipif and then only set 3892 * condemned flag. 3893 */ 3894 static void 3895 conn_delete_ill(conn_t *connp, caddr_t arg) 3896 { 3897 ill_t *ill = (ill_t *)arg; 3898 int i; 3899 char group_buf[INET6_ADDRSTRLEN]; 3900 in6_addr_t v6group; 3901 int orig_ifindex; 3902 ilg_t *ilg; 3903 3904 /* 3905 * Even though conn_ilg_inuse can change while we are in this loop, 3906 * no new ilgs can be created/deleted for this connp, on this 3907 * ill, since this ill is the perimeter. So we won't miss any ilg 3908 * in this cleanup. 3909 */ 3910 mutex_enter(&connp->conn_lock); 3911 3912 /* 3913 * Increment the walker count, so that ilg repacking does not 3914 * occur while we are in the loop. 3915 */ 3916 ILG_WALKER_HOLD(connp); 3917 for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) { 3918 ilg = &connp->conn_ilg[i]; 3919 if ((ilg->ilg_ill == ill) && !(ilg->ilg_flags & ILG_DELETED)) { 3920 /* 3921 * ip_close cannot be cleaning this ilg at the same 3922 * time, since it also has to execute in this ill's 3923 * perimeter which we are now holding. Only a clean 3924 * conn can be condemned. 3925 */ 3926 ASSERT(!(connp->conn_state_flags & CONN_CONDEMNED)); 3927 3928 /* Blow away the membership */ 3929 ip1dbg(("conn_delete_ilg_ill: %s on %s\n", 3930 inet_ntop(AF_INET6, &ilg->ilg_v6group, 3931 group_buf, sizeof (group_buf)), 3932 ill->ill_name)); 3933 3934 v6group = ilg->ilg_v6group; 3935 orig_ifindex = ilg->ilg_orig_ifindex; 3936 ilg_delete(connp, ilg, NULL); 3937 mutex_exit(&connp->conn_lock); 3938 3939 (void) ip_delmulti_v6(&v6group, ill, orig_ifindex, 3940 connp->conn_zoneid, B_FALSE, B_TRUE); 3941 mutex_enter(&connp->conn_lock); 3942 } 3943 } 3944 /* 3945 * If we are the last walker, need to physically delete the 3946 * ilgs and repack. 3947 */ 3948 ILG_WALKER_RELE(connp); 3949 3950 if (connp->conn_multicast_ill == ill) { 3951 /* Revert to late binding */ 3952 connp->conn_multicast_ill = NULL; 3953 connp->conn_orig_multicast_ifindex = 0; 3954 } 3955 mutex_exit(&connp->conn_lock); 3956 } 3957 3958 /* 3959 * Called when an ipif is unplumbed to make sure that there are no 3960 * dangling conn references to that ipif. 3961 * Handles ilg_ipif and conn_multicast_ipif 3962 */ 3963 void 3964 reset_conn_ipif(ipif) 3965 ipif_t *ipif; 3966 { 3967 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 3968 3969 ipcl_walk(conn_delete_ipif, (caddr_t)ipif, ipst); 3970 } 3971 3972 /* 3973 * Called when an ill is unplumbed to make sure that there are no 3974 * dangling conn references to that ill. 3975 * Handles ilg_ill, conn_multicast_ill. 3976 */ 3977 void 3978 reset_conn_ill(ill_t *ill) 3979 { 3980 ip_stack_t *ipst = ill->ill_ipst; 3981 3982 ipcl_walk(conn_delete_ill, (caddr_t)ill, ipst); 3983 } 3984 3985 #ifdef DEBUG 3986 /* 3987 * Walk functions walk all the interfaces in the system to make 3988 * sure that there is no refernece to the ipif or ill that is 3989 * going away. 3990 */ 3991 int 3992 ilm_walk_ill(ill_t *ill) 3993 { 3994 int cnt = 0; 3995 ill_t *till; 3996 ilm_t *ilm; 3997 ill_walk_context_t ctx; 3998 ip_stack_t *ipst = ill->ill_ipst; 3999 4000 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 4001 till = ILL_START_WALK_ALL(&ctx, ipst); 4002 for (; till != NULL; till = ill_next(&ctx, till)) { 4003 for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 4004 if (ilm->ilm_ill == ill) { 4005 cnt++; 4006 } 4007 } 4008 } 4009 rw_exit(&ipst->ips_ill_g_lock); 4010 4011 return (cnt); 4012 } 4013 4014 /* 4015 * This function is called before the ipif is freed. 4016 */ 4017 int 4018 ilm_walk_ipif(ipif_t *ipif) 4019 { 4020 int cnt = 0; 4021 ill_t *till; 4022 ilm_t *ilm; 4023 ill_walk_context_t ctx; 4024 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 4025 4026 till = ILL_START_WALK_ALL(&ctx, ipst); 4027 for (; till != NULL; till = ill_next(&ctx, till)) { 4028 for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 4029 if (ilm->ilm_ipif == ipif) { 4030 cnt++; 4031 } 4032 } 4033 } 4034 return (cnt); 4035 } 4036 #endif 4037