1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/stream.h> 31 #include <sys/dlpi.h> 32 #include <sys/stropts.h> 33 #include <sys/strsun.h> 34 #include <sys/ddi.h> 35 #include <sys/cmn_err.h> 36 #include <sys/zone.h> 37 38 #include <sys/param.h> 39 #include <sys/socket.h> 40 #include <sys/sockio.h> 41 #include <net/if.h> 42 #include <sys/systm.h> 43 #include <net/route.h> 44 #include <netinet/in.h> 45 #include <net/if_dl.h> 46 #include <netinet/ip6.h> 47 #include <netinet/icmp6.h> 48 49 #include <inet/common.h> 50 #include <inet/mi.h> 51 #include <inet/nd.h> 52 #include <inet/arp.h> 53 #include <inet/ip.h> 54 #include <inet/ip6.h> 55 #include <inet/ip_if.h> 56 #include <inet/ip_ndp.h> 57 #include <inet/ip_multi.h> 58 #include <inet/ipclassifier.h> 59 #include <inet/ipsec_impl.h> 60 #include <inet/sctp_ip.h> 61 #include <inet/ip_listutils.h> 62 #include <inet/udp_impl.h> 63 64 /* igmpv3/mldv2 source filter manipulation */ 65 static void ilm_bld_flists(conn_t *conn, void *arg); 66 static void ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, 67 slist_t *flist); 68 69 static ilm_t *ilm_add_v6(ipif_t *ipif, const in6_addr_t *group, 70 ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist, 71 int orig_ifindex, zoneid_t zoneid); 72 static void ilm_delete(ilm_t *ilm); 73 static int ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *group); 74 static int ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *group); 75 static ilg_t *ilg_lookup_ill_index_v6(conn_t *connp, 76 const in6_addr_t *v6group, int index); 77 static ilg_t *ilg_lookup_ipif(conn_t *connp, ipaddr_t group, 78 ipif_t *ipif); 79 static int ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif, 80 mcast_record_t fmode, ipaddr_t src); 81 static int ilg_add_v6(conn_t *connp, const in6_addr_t *group, ill_t *ill, 82 mcast_record_t fmode, const in6_addr_t *v6src); 83 static void ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src); 84 static mblk_t *ill_create_dl(ill_t *ill, uint32_t dl_primitive, 85 uint32_t length, uint32_t *addr_lenp, uint32_t *addr_offp); 86 static mblk_t *ill_create_squery(ill_t *ill, ipaddr_t ipaddr, 87 uint32_t addrlen, uint32_t addroff, mblk_t *mp_tail); 88 static void conn_ilg_reap(conn_t *connp); 89 static int ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group, 90 ipif_t *ipif, mcast_record_t fmode, ipaddr_t src); 91 static int ip_opt_delete_group_excl_v6(conn_t *connp, 92 const in6_addr_t *v6group, ill_t *ill, mcast_record_t fmode, 93 const in6_addr_t *v6src); 94 95 /* 96 * MT notes: 97 * 98 * Multicast joins operate on both the ilg and ilm structures. Multiple 99 * threads operating on an conn (socket) trying to do multicast joins 100 * need to synchronize when operating on the ilg. Multiple threads 101 * potentially operating on different conn (socket endpoints) trying to 102 * do multicast joins could eventually end up trying to manipulate the 103 * ilm simulatenously and need to synchronize on the access to the ilm. 104 * Both are amenable to standard Solaris MT techniques, but it would be 105 * complex to handle a failover or failback which needs to manipulate 106 * ilg/ilms if an applications can also simultaenously join/leave 107 * multicast groups. Hence multicast join/leave also go through the ipsq_t 108 * serialization. 109 * 110 * Multicast joins and leaves are single-threaded per phyint/IPMP group 111 * using the ipsq serialization mechanism. 112 * 113 * An ilm is an IP data structure used to track multicast join/leave. 114 * An ilm is associated with a <multicast group, ipif> tuple in IPv4 and 115 * with just <multicast group> in IPv6. ilm_refcnt is the number of ilg's 116 * referencing the ilm. ilms are created / destroyed only as writer. ilms 117 * are not passed around, instead they are looked up and used under the 118 * ill_lock or as writer. So we don't need a dynamic refcount of the number 119 * of threads holding reference to an ilm. 120 * 121 * Multicast Join operation: 122 * 123 * The first step is to determine the ipif (v4) or ill (v6) on which 124 * the join operation is to be done. The join is done after becoming 125 * exclusive on the ipsq associated with the ipif or ill. The conn->conn_ilg 126 * and ill->ill_ilm are thus accessed and modified exclusively per ill. 127 * Multiple threads can attempt to join simultaneously on different ipif/ill 128 * on the same conn. In this case the ipsq serialization does not help in 129 * protecting the ilg. It is the conn_lock that is used to protect the ilg. 130 * The conn_lock also protects all the ilg_t members. 131 * 132 * Leave operation. 133 * 134 * Similar to the join operation, the first step is to determine the ipif 135 * or ill (v6) on which the leave operation is to be done. The leave operation 136 * is done after becoming exclusive on the ipsq associated with the ipif or ill. 137 * As with join ilg modification is done under the protection of the conn lock. 138 */ 139 140 #define IPSQ_ENTER_IPIF(ipif, connp, first_mp, func, ipsq, type) \ 141 ASSERT(connp != NULL); \ 142 (ipsq) = ipsq_try_enter((ipif), NULL, CONNP_TO_WQ(connp), \ 143 (first_mp), (func), (type), B_TRUE); \ 144 if ((ipsq) == NULL) { \ 145 ipif_refrele(ipif); \ 146 return (EINPROGRESS); \ 147 } 148 149 #define IPSQ_ENTER_ILL(ill, connp, first_mp, func, ipsq, type) \ 150 ASSERT(connp != NULL); \ 151 (ipsq) = ipsq_try_enter(NULL, ill, CONNP_TO_WQ(connp), \ 152 (first_mp), (func), (type), B_TRUE); \ 153 if ((ipsq) == NULL) { \ 154 ill_refrele(ill); \ 155 return (EINPROGRESS); \ 156 } 157 158 #define IPSQ_EXIT(ipsq) \ 159 if (ipsq != NULL) \ 160 ipsq_exit(ipsq, B_TRUE, B_TRUE); 161 162 #define ILG_WALKER_HOLD(connp) (connp)->conn_ilg_walker_cnt++ 163 164 #define ILG_WALKER_RELE(connp) \ 165 { \ 166 (connp)->conn_ilg_walker_cnt--; \ 167 if ((connp)->conn_ilg_walker_cnt == 0) \ 168 conn_ilg_reap(connp); \ 169 } 170 171 static void 172 conn_ilg_reap(conn_t *connp) 173 { 174 int to; 175 int from; 176 177 ASSERT(MUTEX_HELD(&connp->conn_lock)); 178 179 to = 0; 180 from = 0; 181 while (from < connp->conn_ilg_inuse) { 182 if (connp->conn_ilg[from].ilg_flags & ILG_DELETED) { 183 FREE_SLIST(connp->conn_ilg[from].ilg_filter); 184 from++; 185 continue; 186 } 187 if (to != from) 188 connp->conn_ilg[to] = connp->conn_ilg[from]; 189 to++; 190 from++; 191 } 192 193 connp->conn_ilg_inuse = to; 194 195 if (connp->conn_ilg_inuse == 0) { 196 mi_free((char *)connp->conn_ilg); 197 connp->conn_ilg = NULL; 198 cv_broadcast(&connp->conn_refcv); 199 } 200 } 201 202 #define GETSTRUCT(structure, number) \ 203 ((structure *)mi_zalloc(sizeof (structure) * (number))) 204 205 #define ILG_ALLOC_CHUNK 16 206 207 /* 208 * Returns a pointer to the next available ilg in conn_ilg. Allocs more 209 * buffers in size of ILG_ALLOC_CHUNK ilgs when needed, and updates conn's 210 * ilg tracking fields appropriately (conn_ilg_inuse reflects usage of the 211 * returned ilg). Returns NULL on failure (ENOMEM). 212 * 213 * Assumes connp->conn_lock is held. 214 */ 215 static ilg_t * 216 conn_ilg_alloc(conn_t *connp) 217 { 218 ilg_t *new; 219 int curcnt; 220 221 ASSERT(MUTEX_HELD(&connp->conn_lock)); 222 ASSERT(connp->conn_ilg_inuse <= connp->conn_ilg_allocated); 223 224 if (connp->conn_ilg == NULL) { 225 connp->conn_ilg = GETSTRUCT(ilg_t, ILG_ALLOC_CHUNK); 226 if (connp->conn_ilg == NULL) 227 return (NULL); 228 connp->conn_ilg_allocated = ILG_ALLOC_CHUNK; 229 connp->conn_ilg_inuse = 0; 230 } 231 if (connp->conn_ilg_inuse == connp->conn_ilg_allocated) { 232 curcnt = connp->conn_ilg_allocated; 233 new = GETSTRUCT(ilg_t, curcnt + ILG_ALLOC_CHUNK); 234 if (new == NULL) 235 return (NULL); 236 bcopy(connp->conn_ilg, new, sizeof (ilg_t) * curcnt); 237 mi_free((char *)connp->conn_ilg); 238 connp->conn_ilg = new; 239 connp->conn_ilg_allocated += ILG_ALLOC_CHUNK; 240 } 241 242 return (&connp->conn_ilg[connp->conn_ilg_inuse++]); 243 } 244 245 typedef struct ilm_fbld_s { 246 ilm_t *fbld_ilm; 247 int fbld_in_cnt; 248 int fbld_ex_cnt; 249 slist_t fbld_in; 250 slist_t fbld_ex; 251 boolean_t fbld_in_overflow; 252 } ilm_fbld_t; 253 254 static void 255 ilm_bld_flists(conn_t *conn, void *arg) 256 { 257 int i; 258 ilm_fbld_t *fbld = (ilm_fbld_t *)(arg); 259 ilm_t *ilm = fbld->fbld_ilm; 260 in6_addr_t *v6group = &ilm->ilm_v6addr; 261 262 if (conn->conn_ilg_inuse == 0) 263 return; 264 265 /* 266 * Since we can't break out of the ipcl_walk once started, we still 267 * have to look at every conn. But if we've already found one 268 * (EXCLUDE, NULL) list, there's no need to keep checking individual 269 * ilgs--that will be our state. 270 */ 271 if (fbld->fbld_ex_cnt > 0 && fbld->fbld_ex.sl_numsrc == 0) 272 return; 273 274 /* 275 * Check this conn's ilgs to see if any are interested in our 276 * ilm (group, interface match). If so, update the master 277 * include and exclude lists we're building in the fbld struct 278 * with this ilg's filter info. 279 */ 280 mutex_enter(&conn->conn_lock); 281 for (i = 0; i < conn->conn_ilg_inuse; i++) { 282 ilg_t *ilg = &conn->conn_ilg[i]; 283 if ((ilg->ilg_ill == ilm->ilm_ill) && 284 (ilg->ilg_ipif == ilm->ilm_ipif) && 285 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) { 286 if (ilg->ilg_fmode == MODE_IS_INCLUDE) { 287 fbld->fbld_in_cnt++; 288 if (!fbld->fbld_in_overflow) 289 l_union_in_a(&fbld->fbld_in, 290 ilg->ilg_filter, 291 &fbld->fbld_in_overflow); 292 } else { 293 fbld->fbld_ex_cnt++; 294 /* 295 * On the first exclude list, don't try to do 296 * an intersection, as the master exclude list 297 * is intentionally empty. If the master list 298 * is still empty on later iterations, that 299 * means we have at least one ilg with an empty 300 * exclude list, so that should be reflected 301 * when we take the intersection. 302 */ 303 if (fbld->fbld_ex_cnt == 1) { 304 if (ilg->ilg_filter != NULL) 305 l_copy(ilg->ilg_filter, 306 &fbld->fbld_ex); 307 } else { 308 l_intersection_in_a(&fbld->fbld_ex, 309 ilg->ilg_filter); 310 } 311 } 312 /* there will only be one match, so break now. */ 313 break; 314 } 315 } 316 mutex_exit(&conn->conn_lock); 317 } 318 319 static void 320 ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, slist_t *flist) 321 { 322 ilm_fbld_t fbld; 323 324 fbld.fbld_ilm = ilm; 325 fbld.fbld_in_cnt = fbld.fbld_ex_cnt = 0; 326 fbld.fbld_in.sl_numsrc = fbld.fbld_ex.sl_numsrc = 0; 327 fbld.fbld_in_overflow = B_FALSE; 328 329 /* first, construct our master include and exclude lists */ 330 ipcl_walk(ilm_bld_flists, (caddr_t)&fbld); 331 332 /* now use those master lists to generate the interface filter */ 333 334 /* if include list overflowed, filter is (EXCLUDE, NULL) */ 335 if (fbld.fbld_in_overflow) { 336 *fmode = MODE_IS_EXCLUDE; 337 flist->sl_numsrc = 0; 338 return; 339 } 340 341 /* if nobody interested, interface filter is (INCLUDE, NULL) */ 342 if (fbld.fbld_in_cnt == 0 && fbld.fbld_ex_cnt == 0) { 343 *fmode = MODE_IS_INCLUDE; 344 flist->sl_numsrc = 0; 345 return; 346 } 347 348 /* 349 * If there are no exclude lists, then the interface filter 350 * is INCLUDE, with its filter list equal to fbld_in. A single 351 * exclude list makes the interface filter EXCLUDE, with its 352 * filter list equal to (fbld_ex - fbld_in). 353 */ 354 if (fbld.fbld_ex_cnt == 0) { 355 *fmode = MODE_IS_INCLUDE; 356 l_copy(&fbld.fbld_in, flist); 357 } else { 358 *fmode = MODE_IS_EXCLUDE; 359 l_difference(&fbld.fbld_ex, &fbld.fbld_in, flist); 360 } 361 } 362 363 /* 364 * If the given interface has failed, choose a new one to join on so 365 * that we continue to receive packets. ilg_orig_ifindex remembers 366 * what the application used to join on so that we know the ilg to 367 * delete even though we change the ill here. Callers will store the 368 * ilg returned from this function in ilg_ill. Thus when we receive 369 * a packet on ilg_ill, conn_wantpacket_v6 will deliver the packets. 370 * 371 * This function must be called as writer so we can walk the group 372 * list and examine flags without holding a lock. 373 */ 374 ill_t * 375 ip_choose_multi_ill(ill_t *ill, const in6_addr_t *grp) 376 { 377 ill_t *till; 378 ill_group_t *illgrp = ill->ill_group; 379 380 ASSERT(IAM_WRITER_ILL(ill)); 381 382 if (IN6_IS_ADDR_UNSPECIFIED(grp) || illgrp == NULL) 383 return (ill); 384 385 if ((ill->ill_phyint->phyint_flags & (PHYI_FAILED|PHYI_INACTIVE)) == 0) 386 return (ill); 387 388 till = illgrp->illgrp_ill; 389 while (till != NULL && 390 (till->ill_phyint->phyint_flags & (PHYI_FAILED|PHYI_INACTIVE))) { 391 till = till->ill_group_next; 392 } 393 if (till != NULL) 394 return (till); 395 396 return (ill); 397 } 398 399 static int 400 ilm_update_add(ilm_t *ilm, ilg_stat_t ilgstat, slist_t *ilg_flist, 401 boolean_t isv6) 402 { 403 mcast_record_t fmode; 404 slist_t *flist; 405 boolean_t fdefault; 406 char buf[INET6_ADDRSTRLEN]; 407 ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill; 408 409 /* 410 * There are several cases where the ilm's filter state 411 * defaults to (EXCLUDE, NULL): 412 * - we've had previous joins without associated ilgs 413 * - this join has no associated ilg 414 * - the ilg's filter state is (EXCLUDE, NULL) 415 */ 416 fdefault = (ilm->ilm_no_ilg_cnt > 0) || 417 (ilgstat == ILGSTAT_NONE) || SLIST_IS_EMPTY(ilg_flist); 418 419 /* attempt mallocs (if needed) before doing anything else */ 420 if ((flist = l_alloc()) == NULL) 421 return (ENOMEM); 422 if (!fdefault && ilm->ilm_filter == NULL) { 423 ilm->ilm_filter = l_alloc(); 424 if (ilm->ilm_filter == NULL) { 425 l_free(flist); 426 return (ENOMEM); 427 } 428 } 429 430 if (ilgstat != ILGSTAT_CHANGE) 431 ilm->ilm_refcnt++; 432 433 if (ilgstat == ILGSTAT_NONE) 434 ilm->ilm_no_ilg_cnt++; 435 436 /* 437 * Determine new filter state. If it's not the default 438 * (EXCLUDE, NULL), we must walk the conn list to find 439 * any ilgs interested in this group, and re-build the 440 * ilm filter. 441 */ 442 if (fdefault) { 443 fmode = MODE_IS_EXCLUDE; 444 flist->sl_numsrc = 0; 445 } else { 446 ilm_gen_filter(ilm, &fmode, flist); 447 } 448 449 /* make sure state actually changed; nothing to do if not. */ 450 if ((ilm->ilm_fmode == fmode) && 451 !lists_are_different(ilm->ilm_filter, flist)) { 452 l_free(flist); 453 return (0); 454 } 455 456 /* send the state change report */ 457 if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0) { 458 if (isv6) 459 mld_statechange(ilm, fmode, flist); 460 else 461 igmp_statechange(ilm, fmode, flist); 462 } 463 464 /* update the ilm state */ 465 ilm->ilm_fmode = fmode; 466 if (flist->sl_numsrc > 0) 467 l_copy(flist, ilm->ilm_filter); 468 else 469 CLEAR_SLIST(ilm->ilm_filter); 470 471 ip1dbg(("ilm_update: new if filter mode %d, group %s\n", ilm->ilm_fmode, 472 inet_ntop(AF_INET6, &ilm->ilm_v6addr, buf, sizeof (buf)))); 473 474 l_free(flist); 475 return (0); 476 } 477 478 static int 479 ilm_update_del(ilm_t *ilm, boolean_t isv6) 480 { 481 mcast_record_t fmode; 482 slist_t *flist; 483 ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill; 484 485 ip1dbg(("ilm_update_del: still %d left; updating state\n", 486 ilm->ilm_refcnt)); 487 488 if ((flist = l_alloc()) == NULL) 489 return (ENOMEM); 490 491 /* 492 * If present, the ilg in question has already either been 493 * updated or removed from our list; so all we need to do 494 * now is walk the list to update the ilm filter state. 495 * 496 * Skip the list walk if we have any no-ilg joins, which 497 * cause the filter state to revert to (EXCLUDE, NULL). 498 */ 499 if (ilm->ilm_no_ilg_cnt != 0) { 500 fmode = MODE_IS_EXCLUDE; 501 flist->sl_numsrc = 0; 502 } else { 503 ilm_gen_filter(ilm, &fmode, flist); 504 } 505 506 /* check to see if state needs to be updated */ 507 if ((ilm->ilm_fmode == fmode) && 508 (!lists_are_different(ilm->ilm_filter, flist))) { 509 l_free(flist); 510 return (0); 511 } 512 513 if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0) { 514 if (isv6) 515 mld_statechange(ilm, fmode, flist); 516 else 517 igmp_statechange(ilm, fmode, flist); 518 } 519 520 ilm->ilm_fmode = fmode; 521 if (flist->sl_numsrc > 0) { 522 if (ilm->ilm_filter == NULL) { 523 ilm->ilm_filter = l_alloc(); 524 if (ilm->ilm_filter == NULL) { 525 char buf[INET6_ADDRSTRLEN]; 526 ip1dbg(("ilm_update_del: failed to alloc ilm " 527 "filter; no source filtering for %s on %s", 528 inet_ntop(AF_INET6, &ilm->ilm_v6addr, 529 buf, sizeof (buf)), ill->ill_name)); 530 ilm->ilm_fmode = MODE_IS_EXCLUDE; 531 l_free(flist); 532 return (0); 533 } 534 } 535 l_copy(flist, ilm->ilm_filter); 536 } else { 537 CLEAR_SLIST(ilm->ilm_filter); 538 } 539 540 l_free(flist); 541 return (0); 542 } 543 544 /* 545 * INADDR_ANY means all multicast addresses. This is only used 546 * by the multicast router. 547 * INADDR_ANY is stored as IPv6 unspecified addr. 548 */ 549 int 550 ip_addmulti(ipaddr_t group, ipif_t *ipif, ilg_stat_t ilgstat, 551 mcast_record_t ilg_fmode, slist_t *ilg_flist) 552 { 553 ill_t *ill = ipif->ipif_ill; 554 ilm_t *ilm; 555 in6_addr_t v6group; 556 int ret; 557 558 ASSERT(IAM_WRITER_IPIF(ipif)); 559 560 if (!CLASSD(group) && group != INADDR_ANY) 561 return (EINVAL); 562 563 /* 564 * INADDR_ANY is represented as the IPv6 unspecifed addr. 565 */ 566 if (group == INADDR_ANY) 567 v6group = ipv6_all_zeros; 568 else 569 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 570 571 ilm = ilm_lookup_ipif(ipif, group); 572 if (ilm != NULL) 573 return (ilm_update_add(ilm, ilgstat, ilg_flist, B_FALSE)); 574 575 /* 576 * ilms are associated with ipifs in IPv4. It moves with the 577 * ipif if the ipif moves to a new ill when the interface 578 * fails. Thus we really don't check whether the ipif_ill 579 * has failed like in IPv6. If it has FAILED the ipif 580 * will move (daemon will move it) and hence the ilm, if the 581 * ipif is not IPIF_NOFAILOVER. For the IPIF_NOFAILOVER ipifs, 582 * we continue to receive in the same place even if the 583 * interface fails. 584 */ 585 ilm = ilm_add_v6(ipif, &v6group, ilgstat, ilg_fmode, ilg_flist, 586 ill->ill_phyint->phyint_ifindex, ipif->ipif_zoneid); 587 if (ilm == NULL) 588 return (ENOMEM); 589 590 if (group == INADDR_ANY) { 591 /* 592 * Check how many ipif's have members in this group - 593 * if more then one we should not tell the driver to join 594 * this time 595 */ 596 if (ilm_numentries_v6(ill, &v6group) > 1) 597 return (0); 598 if (ill->ill_group == NULL) 599 ret = ip_join_allmulti(ipif); 600 else 601 ret = ill_nominate_mcast_rcv(ill->ill_group); 602 if (ret != 0) 603 ilm_delete(ilm); 604 return (ret); 605 } 606 607 if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0) 608 igmp_joingroup(ilm); 609 610 if (ilm_numentries_v6(ill, &v6group) > 1) 611 return (0); 612 613 ret = ip_ll_addmulti_v6(ipif, &v6group); 614 if (ret != 0) 615 ilm_delete(ilm); 616 return (ret); 617 } 618 619 /* 620 * The unspecified address means all multicast addresses. 621 * This is only used by the multicast router. 622 * 623 * ill identifies the interface to join on; it may not match the 624 * interface requested by the application of a failover has taken 625 * place. orig_ifindex always identifies the interface requested 626 * by the app. 627 * 628 * ilgstat tells us if there's an ilg associated with this join, 629 * and if so, if it's a new ilg or a change to an existing one. 630 * ilg_fmode and ilg_flist give us the current filter state of 631 * the ilg (and will be EXCLUDE {NULL} in the case of no ilg). 632 */ 633 int 634 ip_addmulti_v6(const in6_addr_t *v6group, ill_t *ill, int orig_ifindex, 635 zoneid_t zoneid, ilg_stat_t ilgstat, mcast_record_t ilg_fmode, 636 slist_t *ilg_flist) 637 { 638 ilm_t *ilm; 639 int ret; 640 641 ASSERT(IAM_WRITER_ILL(ill)); 642 643 if (!IN6_IS_ADDR_MULTICAST(v6group) && 644 !IN6_IS_ADDR_UNSPECIFIED(v6group)) { 645 return (EINVAL); 646 } 647 648 /* 649 * An ilm is uniquely identified by the tuple of (group, ill, 650 * orig_ill). group is the multicast group address, ill is 651 * the interface on which it is currently joined, and orig_ill 652 * is the interface on which the application requested the 653 * join. orig_ill and ill are the same unless orig_ill has 654 * failed over. 655 * 656 * Both orig_ill and ill are required, which means we may have 657 * 2 ilms on an ill for the same group, but with different 658 * orig_ills. These must be kept separate, so that when failback 659 * occurs, the appropriate ilms are moved back to their orig_ill 660 * without disrupting memberships on the ill to which they had 661 * been moved. 662 * 663 * In order to track orig_ill, we store orig_ifindex in the 664 * ilm and ilg. 665 */ 666 ilm = ilm_lookup_ill_index_v6(ill, v6group, orig_ifindex, zoneid); 667 if (ilm != NULL) 668 return (ilm_update_add(ilm, ilgstat, ilg_flist, B_TRUE)); 669 670 /* 671 * We need to remember where the application really wanted 672 * to join. This will be used later if we want to failback 673 * to the original interface. 674 */ 675 ilm = ilm_add_v6(ill->ill_ipif, v6group, ilgstat, ilg_fmode, 676 ilg_flist, orig_ifindex, zoneid); 677 if (ilm == NULL) 678 return (ENOMEM); 679 680 if (IN6_IS_ADDR_UNSPECIFIED(v6group)) { 681 /* 682 * Check how many ipif's that have members in this group - 683 * if more then one we should not tell the driver to join 684 * this time 685 */ 686 if (ilm_numentries_v6(ill, v6group) > 1) 687 return (0); 688 if (ill->ill_group == NULL) 689 ret = ip_join_allmulti(ill->ill_ipif); 690 else 691 ret = ill_nominate_mcast_rcv(ill->ill_group); 692 693 if (ret != 0) 694 ilm_delete(ilm); 695 return (ret); 696 } 697 698 if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0) 699 mld_joingroup(ilm); 700 701 /* 702 * If we have more then one we should not tell the driver 703 * to join this time. 704 */ 705 if (ilm_numentries_v6(ill, v6group) > 1) 706 return (0); 707 708 ret = ip_ll_addmulti_v6(ill->ill_ipif, v6group); 709 if (ret != 0) 710 ilm_delete(ilm); 711 return (ret); 712 } 713 714 /* 715 * Send a multicast request to the driver for enabling multicast reception 716 * for v6groupp address. The caller has already checked whether it is 717 * appropriate to send one or not. 718 */ 719 int 720 ip_ll_send_enabmulti_req(ill_t *ill, const in6_addr_t *v6groupp) 721 { 722 mblk_t *mp; 723 uint32_t addrlen, addroff; 724 char group_buf[INET6_ADDRSTRLEN]; 725 726 ASSERT(IAM_WRITER_ILL(ill)); 727 728 /* 729 * Create a AR_ENTRY_SQUERY message with a dl_enabmulti_req tacked 730 * on. 731 */ 732 mp = ill_create_dl(ill, DL_ENABMULTI_REQ, sizeof (dl_enabmulti_req_t), 733 &addrlen, &addroff); 734 if (!mp) 735 return (ENOMEM); 736 if (IN6_IS_ADDR_V4MAPPED(v6groupp)) { 737 ipaddr_t v4group; 738 739 IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group); 740 /* 741 * NOTE!!! 742 * The "addroff" passed in here was calculated by 743 * ill_create_dl(), and will be used by ill_create_squery() 744 * to perform some twisted coding magic. It is the offset 745 * into the dl_xxx_req of the hw addr. Here, it will be 746 * added to b_wptr - b_rptr to create a magic number that 747 * is not an offset into this squery mblk. 748 * The actual hardware address will be accessed only in the 749 * dl_xxx_req, not in the squery. More importantly, 750 * that hardware address can *only* be accessed in this 751 * mblk chain by calling mi_offset_param_c(), which uses 752 * the magic number in the squery hw offset field to go 753 * to the *next* mblk (the dl_xxx_req), subtract the 754 * (b_wptr - b_rptr), and find the actual offset into 755 * the dl_xxx_req. 756 * Any method that depends on using the 757 * offset field in the dl_disabmulti_req or squery 758 * to find either hardware address will similarly fail. 759 * 760 * Look in ar_entry_squery() in arp.c to see how this offset 761 * is used. 762 */ 763 mp = ill_create_squery(ill, v4group, addrlen, addroff, mp); 764 if (!mp) 765 return (ENOMEM); 766 ip1dbg(("ip_ll_send_enabmulti_req: IPv4 putnext %s on %s\n", 767 inet_ntop(AF_INET6, v6groupp, group_buf, 768 sizeof (group_buf)), 769 ill->ill_name)); 770 putnext(ill->ill_rq, mp); 771 } else { 772 ip1dbg(("ip_ll_send_enabmulti_req: IPv6 ndp_squery_mp %s on" 773 " %s\n", 774 inet_ntop(AF_INET6, v6groupp, group_buf, 775 sizeof (group_buf)), 776 ill->ill_name)); 777 return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp)); 778 } 779 return (0); 780 } 781 782 /* 783 * Send a multicast request to the driver for enabling multicast 784 * membership for v6group if appropriate. 785 */ 786 static int 787 ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *v6groupp) 788 { 789 ill_t *ill = ipif->ipif_ill; 790 791 ASSERT(IAM_WRITER_IPIF(ipif)); 792 793 if (ill->ill_net_type != IRE_IF_RESOLVER || 794 ipif->ipif_flags & IPIF_POINTOPOINT) { 795 ip1dbg(("ip_ll_addmulti_v6: not resolver\n")); 796 return (0); /* Must be IRE_IF_NORESOLVER */ 797 } 798 799 if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) { 800 ip1dbg(("ip_ll_addmulti_v6: MULTI_BCAST\n")); 801 return (0); 802 } 803 if (ill->ill_ipif_up_count == 0) { 804 /* 805 * Nobody there. All multicast addresses will be re-joined 806 * when we get the DL_BIND_ACK bringing the interface up. 807 */ 808 ip1dbg(("ip_ll_addmulti_v6: nobody up\n")); 809 return (0); 810 } 811 return (ip_ll_send_enabmulti_req(ill, v6groupp)); 812 } 813 814 /* 815 * INADDR_ANY means all multicast addresses. This is only used 816 * by the multicast router. 817 * INADDR_ANY is stored as the IPv6 unspecifed addr. 818 */ 819 int 820 ip_delmulti(ipaddr_t group, ipif_t *ipif, boolean_t no_ilg, boolean_t leaving) 821 { 822 ill_t *ill = ipif->ipif_ill; 823 ilm_t *ilm; 824 in6_addr_t v6group; 825 int ret; 826 827 ASSERT(IAM_WRITER_IPIF(ipif)); 828 829 if (!CLASSD(group) && group != INADDR_ANY) 830 return (EINVAL); 831 832 /* 833 * INADDR_ANY is represented as the IPv6 unspecifed addr. 834 */ 835 if (group == INADDR_ANY) 836 v6group = ipv6_all_zeros; 837 else 838 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 839 840 /* 841 * Look for a match on the ipif. 842 * (IP_DROP_MEMBERSHIP specifies an ipif using an IP address). 843 */ 844 ilm = ilm_lookup_ipif(ipif, group); 845 if (ilm == NULL) 846 return (ENOENT); 847 848 /* Update counters */ 849 if (no_ilg) 850 ilm->ilm_no_ilg_cnt--; 851 852 if (leaving) 853 ilm->ilm_refcnt--; 854 855 if (ilm->ilm_refcnt > 0) 856 return (ilm_update_del(ilm, B_FALSE)); 857 858 if (group == INADDR_ANY) { 859 ilm_delete(ilm); 860 /* 861 * Check how many ipif's that have members in this group - 862 * if there are still some left then don't tell the driver 863 * to drop it. 864 */ 865 if (ilm_numentries_v6(ill, &v6group) != 0) 866 return (0); 867 868 /* 869 * If we never joined, then don't leave. This can happen 870 * if we're in an IPMP group, since only one ill per IPMP 871 * group receives all multicast packets. 872 */ 873 if (!ill->ill_join_allmulti) { 874 ASSERT(ill->ill_group != NULL); 875 return (0); 876 } 877 878 ret = ip_leave_allmulti(ipif); 879 if (ill->ill_group != NULL) 880 (void) ill_nominate_mcast_rcv(ill->ill_group); 881 return (ret); 882 } 883 884 if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0) 885 igmp_leavegroup(ilm); 886 887 ilm_delete(ilm); 888 /* 889 * Check how many ipif's that have members in this group - 890 * if there are still some left then don't tell the driver 891 * to drop it. 892 */ 893 if (ilm_numentries_v6(ill, &v6group) != 0) 894 return (0); 895 return (ip_ll_delmulti_v6(ipif, &v6group)); 896 } 897 898 /* 899 * The unspecified address means all multicast addresses. 900 * This is only used by the multicast router. 901 */ 902 int 903 ip_delmulti_v6(const in6_addr_t *v6group, ill_t *ill, int orig_ifindex, 904 zoneid_t zoneid, boolean_t no_ilg, boolean_t leaving) 905 { 906 ipif_t *ipif; 907 ilm_t *ilm; 908 int ret; 909 910 ASSERT(IAM_WRITER_ILL(ill)); 911 912 if (!IN6_IS_ADDR_MULTICAST(v6group) && 913 !IN6_IS_ADDR_UNSPECIFIED(v6group)) 914 return (EINVAL); 915 916 /* 917 * Look for a match on the ill. 918 * (IPV6_LEAVE_GROUP specifies an ill using an ifindex). 919 * 920 * Similar to ip_addmulti_v6, we should always look using 921 * the orig_ifindex. 922 * 923 * 1) If orig_ifindex is different from ill's ifindex 924 * we should have an ilm with orig_ifindex created in 925 * ip_addmulti_v6. We should delete that here. 926 * 927 * 2) If orig_ifindex is same as ill's ifindex, we should 928 * not delete the ilm that is temporarily here because of 929 * a FAILOVER. Those ilms will have a ilm_orig_ifindex 930 * different from ill's ifindex. 931 * 932 * Thus, always lookup using orig_ifindex. 933 */ 934 ilm = ilm_lookup_ill_index_v6(ill, v6group, orig_ifindex, zoneid); 935 if (ilm == NULL) 936 return (ENOENT); 937 938 ASSERT(ilm->ilm_ill == ill); 939 940 ipif = ill->ill_ipif; 941 942 /* Update counters */ 943 if (no_ilg) 944 ilm->ilm_no_ilg_cnt--; 945 946 if (leaving) 947 ilm->ilm_refcnt--; 948 949 if (ilm->ilm_refcnt > 0) 950 return (ilm_update_del(ilm, B_TRUE)); 951 952 if (IN6_IS_ADDR_UNSPECIFIED(v6group)) { 953 ilm_delete(ilm); 954 /* 955 * Check how many ipif's that have members in this group - 956 * if there are still some left then don't tell the driver 957 * to drop it. 958 */ 959 if (ilm_numentries_v6(ill, v6group) != 0) 960 return (0); 961 962 /* 963 * If we never joined, then don't leave. This can happen 964 * if we're in an IPMP group, since only one ill per IPMP 965 * group receives all multicast packets. 966 */ 967 if (!ill->ill_join_allmulti) { 968 ASSERT(ill->ill_group != NULL); 969 return (0); 970 } 971 972 ret = ip_leave_allmulti(ipif); 973 if (ill->ill_group != NULL) 974 (void) ill_nominate_mcast_rcv(ill->ill_group); 975 return (ret); 976 } 977 978 if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0) 979 mld_leavegroup(ilm); 980 981 ilm_delete(ilm); 982 /* 983 * Check how many ipif's that have members in this group - 984 * if there are still some left then don't tell the driver 985 * to drop it. 986 */ 987 if (ilm_numentries_v6(ill, v6group) != 0) 988 return (0); 989 return (ip_ll_delmulti_v6(ipif, v6group)); 990 } 991 992 /* 993 * Send a multicast request to the driver for disabling multicast reception 994 * for v6groupp address. The caller has already checked whether it is 995 * appropriate to send one or not. 996 */ 997 int 998 ip_ll_send_disabmulti_req(ill_t *ill, const in6_addr_t *v6groupp) 999 { 1000 mblk_t *mp; 1001 char group_buf[INET6_ADDRSTRLEN]; 1002 uint32_t addrlen, addroff; 1003 1004 ASSERT(IAM_WRITER_ILL(ill)); 1005 /* 1006 * Create a AR_ENTRY_SQUERY message with a dl_disabmulti_req tacked 1007 * on. 1008 */ 1009 mp = ill_create_dl(ill, DL_DISABMULTI_REQ, 1010 sizeof (dl_disabmulti_req_t), &addrlen, &addroff); 1011 1012 if (!mp) 1013 return (ENOMEM); 1014 1015 if (IN6_IS_ADDR_V4MAPPED(v6groupp)) { 1016 ipaddr_t v4group; 1017 1018 IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group); 1019 /* 1020 * NOTE!!! 1021 * The "addroff" passed in here was calculated by 1022 * ill_create_dl(), and will be used by ill_create_squery() 1023 * to perform some twisted coding magic. It is the offset 1024 * into the dl_xxx_req of the hw addr. Here, it will be 1025 * added to b_wptr - b_rptr to create a magic number that 1026 * is not an offset into this mblk. 1027 * 1028 * Please see the comment in ip_ll_send)enabmulti_req() 1029 * for a complete explanation. 1030 * 1031 * Look in ar_entry_squery() in arp.c to see how this offset 1032 * is used. 1033 */ 1034 mp = ill_create_squery(ill, v4group, addrlen, addroff, mp); 1035 if (!mp) 1036 return (ENOMEM); 1037 ip1dbg(("ip_ll_send_disabmulti_req: IPv4 putnext %s on %s\n", 1038 inet_ntop(AF_INET6, v6groupp, group_buf, 1039 sizeof (group_buf)), 1040 ill->ill_name)); 1041 putnext(ill->ill_rq, mp); 1042 } else { 1043 ip1dbg(("ip_ll_send_disabmulti_req: IPv6 ndp_squery_mp %s on" 1044 " %s\n", 1045 inet_ntop(AF_INET6, v6groupp, group_buf, 1046 sizeof (group_buf)), 1047 ill->ill_name)); 1048 return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp)); 1049 } 1050 return (0); 1051 } 1052 1053 /* 1054 * Send a multicast request to the driver for disabling multicast 1055 * membership for v6group if appropriate. 1056 */ 1057 static int 1058 ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *v6group) 1059 { 1060 ill_t *ill = ipif->ipif_ill; 1061 1062 ASSERT(IAM_WRITER_IPIF(ipif)); 1063 1064 if (ill->ill_net_type != IRE_IF_RESOLVER || 1065 ipif->ipif_flags & IPIF_POINTOPOINT) { 1066 return (0); /* Must be IRE_IF_NORESOLVER */ 1067 } 1068 if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) { 1069 ip1dbg(("ip_ll_delmulti_v6: MULTI_BCAST\n")); 1070 return (0); 1071 } 1072 if (ill->ill_ipif_up_count == 0) { 1073 /* 1074 * Nobody there. All multicast addresses will be re-joined 1075 * when we get the DL_BIND_ACK bringing the interface up. 1076 */ 1077 ip1dbg(("ip_ll_delmulti_v6: nobody up\n")); 1078 return (0); 1079 } 1080 return (ip_ll_send_disabmulti_req(ill, v6group)); 1081 } 1082 1083 /* 1084 * Make the driver pass up all multicast packets 1085 * 1086 * With ill groups, the caller makes sure that there is only 1087 * one ill joining the allmulti group. 1088 */ 1089 int 1090 ip_join_allmulti(ipif_t *ipif) 1091 { 1092 ill_t *ill = ipif->ipif_ill; 1093 mblk_t *mp; 1094 uint32_t addrlen, addroff; 1095 1096 ASSERT(IAM_WRITER_IPIF(ipif)); 1097 1098 if (ill->ill_ipif_up_count == 0) { 1099 /* 1100 * Nobody there. All multicast addresses will be re-joined 1101 * when we get the DL_BIND_ACK bringing the interface up. 1102 */ 1103 return (0); 1104 } 1105 1106 ASSERT(!ill->ill_join_allmulti); 1107 1108 /* 1109 * Create a DL_PROMISCON_REQ message and send it directly to 1110 * the DLPI provider. We don't need to do this for certain 1111 * media types for which we never need to turn promiscuous 1112 * mode on. 1113 */ 1114 if ((ill->ill_net_type == IRE_IF_RESOLVER) && 1115 !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) { 1116 mp = ill_create_dl(ill, DL_PROMISCON_REQ, 1117 sizeof (dl_promiscon_req_t), &addrlen, &addroff); 1118 if (mp == NULL) 1119 return (ENOMEM); 1120 putnext(ill->ill_wq, mp); 1121 } 1122 1123 mutex_enter(&ill->ill_lock); 1124 ill->ill_join_allmulti = B_TRUE; 1125 mutex_exit(&ill->ill_lock); 1126 return (0); 1127 } 1128 1129 /* 1130 * Make the driver stop passing up all multicast packets 1131 * 1132 * With ill groups, we need to nominate some other ill as 1133 * this ipif->ipif_ill is leaving the group. 1134 */ 1135 int 1136 ip_leave_allmulti(ipif_t *ipif) 1137 { 1138 ill_t *ill = ipif->ipif_ill; 1139 mblk_t *mp; 1140 uint32_t addrlen, addroff; 1141 1142 ASSERT(IAM_WRITER_IPIF(ipif)); 1143 1144 if (ill->ill_ipif_up_count == 0) { 1145 /* 1146 * Nobody there. All multicast addresses will be re-joined 1147 * when we get the DL_BIND_ACK bringing the interface up. 1148 */ 1149 return (0); 1150 } 1151 1152 ASSERT(ill->ill_join_allmulti); 1153 1154 /* 1155 * Create a DL_PROMISCOFF_REQ message and send it directly to 1156 * the DLPI provider. We don't need to do this for certain 1157 * media types for which we never need to turn promiscuous 1158 * mode on. 1159 */ 1160 if ((ill->ill_net_type == IRE_IF_RESOLVER) && 1161 !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) { 1162 mp = ill_create_dl(ill, DL_PROMISCOFF_REQ, 1163 sizeof (dl_promiscoff_req_t), &addrlen, &addroff); 1164 if (mp == NULL) 1165 return (ENOMEM); 1166 putnext(ill->ill_wq, mp); 1167 } 1168 1169 mutex_enter(&ill->ill_lock); 1170 ill->ill_join_allmulti = B_FALSE; 1171 mutex_exit(&ill->ill_lock); 1172 return (0); 1173 } 1174 1175 /* 1176 * Copy mp_orig and pass it in as a local message. 1177 */ 1178 void 1179 ip_multicast_loopback(queue_t *q, ill_t *ill, mblk_t *mp_orig, int fanout_flags, 1180 zoneid_t zoneid) 1181 { 1182 mblk_t *mp; 1183 mblk_t *ipsec_mp; 1184 1185 if (DB_TYPE(mp_orig) == M_DATA && 1186 ((ipha_t *)mp_orig->b_rptr)->ipha_protocol == IPPROTO_UDP) { 1187 uint_t hdrsz; 1188 1189 hdrsz = IPH_HDR_LENGTH((ipha_t *)mp_orig->b_rptr) + 1190 sizeof (udpha_t); 1191 ASSERT(MBLKL(mp_orig) >= hdrsz); 1192 1193 if (((mp = allocb(hdrsz, BPRI_MED)) != NULL) && 1194 (mp_orig = dupmsg(mp_orig)) != NULL) { 1195 bcopy(mp_orig->b_rptr, mp->b_rptr, hdrsz); 1196 mp->b_wptr += hdrsz; 1197 mp->b_cont = mp_orig; 1198 mp_orig->b_rptr += hdrsz; 1199 if (MBLKL(mp_orig) == 0) { 1200 mp->b_cont = mp_orig->b_cont; 1201 mp_orig->b_cont = NULL; 1202 freeb(mp_orig); 1203 } 1204 } else if (mp != NULL) { 1205 freeb(mp); 1206 mp = NULL; 1207 } 1208 } else { 1209 mp = ip_copymsg(mp_orig); 1210 } 1211 1212 if (mp == NULL) 1213 return; 1214 if (DB_TYPE(mp) == M_CTL) { 1215 ipsec_mp = mp; 1216 mp = mp->b_cont; 1217 } else { 1218 ipsec_mp = mp; 1219 } 1220 ip_wput_local(q, ill, (ipha_t *)mp->b_rptr, ipsec_mp, NULL, 1221 fanout_flags, zoneid); 1222 } 1223 1224 static area_t ip_aresq_template = { 1225 AR_ENTRY_SQUERY, /* cmd */ 1226 sizeof (area_t)+IP_ADDR_LEN, /* name offset */ 1227 sizeof (area_t), /* name len (filled by ill_arp_alloc) */ 1228 IP_ARP_PROTO_TYPE, /* protocol, from arps perspective */ 1229 sizeof (area_t), /* proto addr offset */ 1230 IP_ADDR_LEN, /* proto addr_length */ 1231 0, /* proto mask offset */ 1232 /* Rest is initialized when used */ 1233 0, /* flags */ 1234 0, /* hw addr offset */ 1235 0, /* hw addr length */ 1236 }; 1237 1238 static mblk_t * 1239 ill_create_squery(ill_t *ill, ipaddr_t ipaddr, uint32_t addrlen, 1240 uint32_t addroff, mblk_t *mp_tail) 1241 { 1242 mblk_t *mp; 1243 area_t *area; 1244 1245 mp = ill_arp_alloc(ill, (uchar_t *)&ip_aresq_template, 1246 (caddr_t)&ipaddr); 1247 if (!mp) { 1248 freemsg(mp_tail); 1249 return (NULL); 1250 } 1251 area = (area_t *)mp->b_rptr; 1252 area->area_hw_addr_length = addrlen; 1253 area->area_hw_addr_offset = mp->b_wptr - mp->b_rptr + addroff; 1254 /* 1255 * NOTE! 1256 * 1257 * The area_hw_addr_offset, as can be seen, does not hold the 1258 * actual hardware address offset. Rather, it holds the offset 1259 * to the hw addr in the dl_xxx_req in mp_tail, modified by 1260 * adding (mp->b_wptr - mp->b_rptr). This allows the function 1261 * mi_offset_paramc() to find the hardware address in the 1262 * *second* mblk (dl_xxx_req), not this mblk. 1263 * 1264 * Using mi_offset_paramc() is thus the *only* way to access 1265 * the dl_xxx_hw address. 1266 * 1267 * The squery hw address should *not* be accessed. 1268 * 1269 * See ar_entry_squery() in arp.c for an example of how all this works. 1270 */ 1271 1272 mp->b_cont = mp_tail; 1273 return (mp); 1274 } 1275 1276 /* 1277 * Create a dlpi message with room for phys+sap. When we come back in 1278 * ip_wput_ctl() we will strip the sap for those primitives which 1279 * only need a physical address. 1280 */ 1281 static mblk_t * 1282 ill_create_dl(ill_t *ill, uint32_t dl_primitive, uint32_t length, 1283 uint32_t *addr_lenp, uint32_t *addr_offp) 1284 { 1285 mblk_t *mp; 1286 uint32_t hw_addr_length; 1287 char *cp; 1288 uint32_t offset; 1289 uint32_t size; 1290 1291 *addr_lenp = *addr_offp = 0; 1292 1293 hw_addr_length = ill->ill_phys_addr_length; 1294 if (!hw_addr_length) { 1295 ip0dbg(("ip_create_dl: hw addr length = 0\n")); 1296 return (NULL); 1297 } 1298 1299 size = length; 1300 switch (dl_primitive) { 1301 case DL_ENABMULTI_REQ: 1302 case DL_DISABMULTI_REQ: 1303 size += hw_addr_length; 1304 break; 1305 case DL_PROMISCON_REQ: 1306 case DL_PROMISCOFF_REQ: 1307 break; 1308 default: 1309 return (NULL); 1310 } 1311 mp = allocb(size, BPRI_HI); 1312 if (!mp) 1313 return (NULL); 1314 mp->b_wptr += size; 1315 mp->b_datap->db_type = M_PROTO; 1316 1317 cp = (char *)mp->b_rptr; 1318 offset = length; 1319 1320 switch (dl_primitive) { 1321 case DL_ENABMULTI_REQ: { 1322 dl_enabmulti_req_t *dl = (dl_enabmulti_req_t *)cp; 1323 1324 dl->dl_primitive = dl_primitive; 1325 dl->dl_addr_offset = offset; 1326 *addr_lenp = dl->dl_addr_length = hw_addr_length; 1327 *addr_offp = offset; 1328 break; 1329 } 1330 case DL_DISABMULTI_REQ: { 1331 dl_disabmulti_req_t *dl = (dl_disabmulti_req_t *)cp; 1332 1333 dl->dl_primitive = dl_primitive; 1334 dl->dl_addr_offset = offset; 1335 *addr_lenp = dl->dl_addr_length = hw_addr_length; 1336 *addr_offp = offset; 1337 break; 1338 } 1339 case DL_PROMISCON_REQ: 1340 case DL_PROMISCOFF_REQ: { 1341 dl_promiscon_req_t *dl = (dl_promiscon_req_t *)cp; 1342 1343 dl->dl_primitive = dl_primitive; 1344 dl->dl_level = DL_PROMISC_MULTI; 1345 break; 1346 } 1347 } 1348 ip1dbg(("ill_create_dl: addr_len %d, addr_off %d\n", 1349 *addr_lenp, *addr_offp)); 1350 return (mp); 1351 } 1352 1353 void 1354 ip_wput_ctl(queue_t *q, mblk_t *mp_orig) 1355 { 1356 ill_t *ill = (ill_t *)q->q_ptr; 1357 mblk_t *mp = mp_orig; 1358 area_t *area; 1359 1360 /* Check that we have a AR_ENTRY_SQUERY with a tacked on mblk */ 1361 if ((mp->b_wptr - mp->b_rptr) < sizeof (area_t) || 1362 mp->b_cont == NULL) { 1363 putnext(q, mp); 1364 return; 1365 } 1366 area = (area_t *)mp->b_rptr; 1367 if (area->area_cmd != AR_ENTRY_SQUERY) { 1368 putnext(q, mp); 1369 return; 1370 } 1371 mp = mp->b_cont; 1372 /* 1373 * Update dl_addr_length and dl_addr_offset for primitives that 1374 * have physical addresses as opposed to full saps 1375 */ 1376 switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) { 1377 case DL_ENABMULTI_REQ: 1378 /* Track the state if this is the first enabmulti */ 1379 if (ill->ill_dlpi_multicast_state == IDMS_UNKNOWN) 1380 ill->ill_dlpi_multicast_state = IDMS_INPROGRESS; 1381 ip1dbg(("ip_wput_ctl: ENABMULTI\n")); 1382 break; 1383 case DL_DISABMULTI_REQ: 1384 ip1dbg(("ip_wput_ctl: DISABMULTI\n")); 1385 break; 1386 default: 1387 ip1dbg(("ip_wput_ctl: default\n")); 1388 break; 1389 } 1390 freeb(mp_orig); 1391 putnext(q, mp); 1392 } 1393 1394 /* 1395 * Rejoin any groups which have been explicitly joined by the application (we 1396 * left all explicitly joined groups as part of ill_leave_multicast() prior to 1397 * bringing the interface down). Note that because groups can be joined and 1398 * left while an interface is down, this may not be the same set of groups 1399 * that we left in ill_leave_multicast(). 1400 */ 1401 void 1402 ill_recover_multicast(ill_t *ill) 1403 { 1404 ilm_t *ilm; 1405 char addrbuf[INET6_ADDRSTRLEN]; 1406 1407 ASSERT(IAM_WRITER_ILL(ill)); 1408 1409 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1410 /* 1411 * Check how many ipif's that have members in this group - 1412 * if more then one we make sure that this entry is first 1413 * in the list. 1414 */ 1415 if (ilm_numentries_v6(ill, &ilm->ilm_v6addr) > 1 && 1416 ilm_lookup_ill_v6(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm) 1417 continue; 1418 ip1dbg(("ill_recover_multicast: %s\n", 1419 inet_ntop(AF_INET6, &ilm->ilm_v6addr, addrbuf, 1420 sizeof (addrbuf)))); 1421 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) { 1422 if (ill->ill_group == NULL) { 1423 (void) ip_join_allmulti(ill->ill_ipif); 1424 } else { 1425 /* 1426 * We don't want to join on this ill, 1427 * if somebody else in the group has 1428 * already been nominated. 1429 */ 1430 (void) ill_nominate_mcast_rcv(ill->ill_group); 1431 } 1432 } else { 1433 (void) ip_ll_addmulti_v6(ill->ill_ipif, 1434 &ilm->ilm_v6addr); 1435 } 1436 } 1437 } 1438 1439 /* 1440 * The opposite of ill_recover_multicast() -- leaves all multicast groups 1441 * that were explicitly joined. Note that both these functions could be 1442 * disposed of if we enhanced ARP to allow us to handle DL_DISABMULTI_REQ 1443 * and DL_ENABMULTI_REQ messages when an interface is down. 1444 */ 1445 void 1446 ill_leave_multicast(ill_t *ill) 1447 { 1448 ilm_t *ilm; 1449 char addrbuf[INET6_ADDRSTRLEN]; 1450 1451 ASSERT(IAM_WRITER_ILL(ill)); 1452 1453 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1454 /* 1455 * Check how many ipif's that have members in this group - 1456 * if more then one we make sure that this entry is first 1457 * in the list. 1458 */ 1459 if (ilm_numentries_v6(ill, &ilm->ilm_v6addr) > 1 && 1460 ilm_lookup_ill_v6(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm) 1461 continue; 1462 ip1dbg(("ill_leave_multicast: %s\n", 1463 inet_ntop(AF_INET6, &ilm->ilm_v6addr, addrbuf, 1464 sizeof (addrbuf)))); 1465 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) { 1466 (void) ip_leave_allmulti(ill->ill_ipif); 1467 /* 1468 * If we were part of an IPMP group, then 1469 * ill_handoff_responsibility() has already 1470 * nominated a new member (so we don't). 1471 */ 1472 ASSERT(ill->ill_group == NULL); 1473 } else { 1474 (void) ip_ll_send_disabmulti_req(ill, &ilm->ilm_v6addr); 1475 } 1476 } 1477 } 1478 1479 /* 1480 * Find an ilm for matching the ill and which has the source in its 1481 * INCLUDE list or does not have it in its EXCLUDE list 1482 */ 1483 ilm_t * 1484 ilm_lookup_ill_withsrc(ill_t *ill, ipaddr_t group, ipaddr_t src) 1485 { 1486 in6_addr_t v6group, v6src; 1487 1488 /* 1489 * INADDR_ANY is represented as the IPv6 unspecified addr. 1490 */ 1491 if (group == INADDR_ANY) 1492 v6group = ipv6_all_zeros; 1493 else 1494 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 1495 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 1496 1497 return (ilm_lookup_ill_withsrc_v6(ill, &v6group, &v6src)); 1498 } 1499 1500 ilm_t * 1501 ilm_lookup_ill_withsrc_v6(ill_t *ill, const in6_addr_t *v6group, 1502 const in6_addr_t *v6src) 1503 { 1504 ilm_t *ilm; 1505 boolean_t isinlist; 1506 int i, numsrc; 1507 1508 /* 1509 * If the source is in any ilm's INCLUDE list, or if 1510 * it is not in any ilm's EXCLUDE list, we have a hit. 1511 */ 1512 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1513 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group)) { 1514 1515 isinlist = B_FALSE; 1516 numsrc = (ilm->ilm_filter == NULL) ? 1517 0 : ilm->ilm_filter->sl_numsrc; 1518 for (i = 0; i < numsrc; i++) { 1519 if (IN6_ARE_ADDR_EQUAL(v6src, 1520 &ilm->ilm_filter->sl_addr[i])) { 1521 isinlist = B_TRUE; 1522 break; 1523 } 1524 } 1525 if ((isinlist && ilm->ilm_fmode == MODE_IS_INCLUDE) || 1526 (!isinlist && ilm->ilm_fmode == MODE_IS_EXCLUDE)) 1527 return (ilm); 1528 else 1529 return (NULL); 1530 } 1531 } 1532 return (NULL); 1533 } 1534 1535 1536 /* Find an ilm for matching the ill */ 1537 ilm_t * 1538 ilm_lookup_ill(ill_t *ill, ipaddr_t group, zoneid_t zoneid) 1539 { 1540 in6_addr_t v6group; 1541 1542 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) || 1543 IAM_WRITER_ILL(ill)); 1544 /* 1545 * INADDR_ANY is represented as the IPv6 unspecifed addr. 1546 */ 1547 if (group == INADDR_ANY) 1548 v6group = ipv6_all_zeros; 1549 else 1550 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 1551 1552 return (ilm_lookup_ill_v6(ill, &v6group, zoneid)); 1553 } 1554 1555 /* 1556 * Find an ilm for matching the ill. All the ilm lookup functions 1557 * ignore ILM_DELETED ilms. These have been logically deleted, and 1558 * igmp and linklayer disable multicast have been done. Only mi_free 1559 * yet to be done. Still there in the list due to ilm_walkers. The 1560 * last walker will release it. 1561 */ 1562 ilm_t * 1563 ilm_lookup_ill_v6(ill_t *ill, const in6_addr_t *v6group, zoneid_t zoneid) 1564 { 1565 ilm_t *ilm; 1566 1567 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) || 1568 IAM_WRITER_ILL(ill)); 1569 1570 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1571 if (ilm->ilm_flags & ILM_DELETED) 1572 continue; 1573 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) && 1574 (zoneid == ALL_ZONES || zoneid == ilm->ilm_zoneid)) 1575 return (ilm); 1576 } 1577 return (NULL); 1578 } 1579 1580 ilm_t * 1581 ilm_lookup_ill_index_v6(ill_t *ill, const in6_addr_t *v6group, int index, 1582 zoneid_t zoneid) 1583 { 1584 ilm_t *ilm; 1585 1586 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) || 1587 IAM_WRITER_ILL(ill)); 1588 1589 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1590 if (ilm->ilm_flags & ILM_DELETED) 1591 continue; 1592 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) && 1593 (zoneid == ALL_ZONES || zoneid == ilm->ilm_zoneid) && 1594 ilm->ilm_orig_ifindex == index) { 1595 return (ilm); 1596 } 1597 } 1598 return (NULL); 1599 } 1600 1601 ilm_t * 1602 ilm_lookup_ill_index_v4(ill_t *ill, ipaddr_t group, int index, zoneid_t zoneid) 1603 { 1604 in6_addr_t v6group; 1605 1606 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) || 1607 IAM_WRITER_ILL(ill)); 1608 /* 1609 * INADDR_ANY is represented as the IPv6 unspecifed addr. 1610 */ 1611 if (group == INADDR_ANY) 1612 v6group = ipv6_all_zeros; 1613 else 1614 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 1615 1616 return (ilm_lookup_ill_index_v6(ill, &v6group, index, zoneid)); 1617 } 1618 1619 /* 1620 * Found an ilm for the ipif. Only needed for IPv4 which does 1621 * ipif specific socket options. 1622 */ 1623 ilm_t * 1624 ilm_lookup_ipif(ipif_t *ipif, ipaddr_t group) 1625 { 1626 ill_t *ill = ipif->ipif_ill; 1627 ilm_t *ilm; 1628 in6_addr_t v6group; 1629 1630 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) || 1631 IAM_WRITER_ILL(ill)); 1632 1633 /* 1634 * INADDR_ANY is represented as the IPv6 unspecifed addr. 1635 */ 1636 if (group == INADDR_ANY) 1637 v6group = ipv6_all_zeros; 1638 else 1639 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 1640 1641 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1642 if (ilm->ilm_flags & ILM_DELETED) 1643 continue; 1644 if (ilm->ilm_ipif == ipif && 1645 IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, &v6group)) 1646 return (ilm); 1647 } 1648 return (NULL); 1649 } 1650 1651 /* 1652 * How many members on this ill? 1653 */ 1654 int 1655 ilm_numentries_v6(ill_t *ill, const in6_addr_t *v6group) 1656 { 1657 ilm_t *ilm; 1658 int i = 0; 1659 1660 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) || 1661 IAM_WRITER_ILL(ill)); 1662 1663 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1664 if (ilm->ilm_flags & ILM_DELETED) 1665 continue; 1666 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group)) { 1667 i++; 1668 } 1669 } 1670 return (i); 1671 } 1672 1673 /* Caller guarantees that the group is not already on the list */ 1674 static ilm_t * 1675 ilm_add_v6(ipif_t *ipif, const in6_addr_t *v6group, ilg_stat_t ilgstat, 1676 mcast_record_t ilg_fmode, slist_t *ilg_flist, int orig_ifindex, 1677 zoneid_t zoneid) 1678 { 1679 ill_t *ill = ipif->ipif_ill; 1680 ilm_t *ilm; 1681 ilm_t *ilm_cur; 1682 ilm_t **ilm_ptpn; 1683 1684 ASSERT(IAM_WRITER_IPIF(ipif)); 1685 1686 ilm = GETSTRUCT(ilm_t, 1); 1687 if (ilm == NULL) 1688 return (NULL); 1689 if (ilgstat != ILGSTAT_NONE && !SLIST_IS_EMPTY(ilg_flist)) { 1690 ilm->ilm_filter = l_alloc(); 1691 if (ilm->ilm_filter == NULL) { 1692 mi_free(ilm); 1693 return (NULL); 1694 } 1695 } 1696 ilm->ilm_v6addr = *v6group; 1697 ilm->ilm_refcnt = 1; 1698 ilm->ilm_zoneid = zoneid; 1699 ilm->ilm_timer = INFINITY; 1700 ilm->ilm_rtx.rtx_timer = INFINITY; 1701 1702 /* 1703 * IPv4 Multicast groups are joined using ipif. 1704 * IPv6 Multicast groups are joined using ill. 1705 */ 1706 if (ill->ill_isv6) { 1707 ilm->ilm_ill = ill; 1708 ilm->ilm_ipif = NULL; 1709 } else { 1710 ASSERT(ilm->ilm_zoneid == ipif->ipif_zoneid); 1711 ilm->ilm_ipif = ipif; 1712 ilm->ilm_ill = NULL; 1713 } 1714 /* 1715 * After this if ilm moves to a new ill, we don't change 1716 * the ilm_orig_ifindex. Thus, if ill_index != ilm_orig_ifindex, 1717 * it has been moved. Indexes don't match even when the application 1718 * wants to join on a FAILED/INACTIVE interface because we choose 1719 * a new interface to join in. This is considered as an implicit 1720 * move. 1721 */ 1722 ilm->ilm_orig_ifindex = orig_ifindex; 1723 1724 ASSERT(!(ipif->ipif_state_flags & IPIF_CONDEMNED)); 1725 ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED)); 1726 1727 /* 1728 * Grab lock to give consistent view to readers 1729 */ 1730 mutex_enter(&ill->ill_lock); 1731 /* 1732 * All ilms in the same zone are contiguous in the ill_ilm list. 1733 * The loops in ip_proto_input() and ip_wput_local() use this to avoid 1734 * sending duplicates up when two applications in the same zone join the 1735 * same group on different logical interfaces. 1736 */ 1737 ilm_cur = ill->ill_ilm; 1738 ilm_ptpn = &ill->ill_ilm; 1739 while (ilm_cur != NULL && ilm_cur->ilm_zoneid != ilm->ilm_zoneid) { 1740 ilm_ptpn = &ilm_cur->ilm_next; 1741 ilm_cur = ilm_cur->ilm_next; 1742 } 1743 ilm->ilm_next = ilm_cur; 1744 *ilm_ptpn = ilm; 1745 1746 /* 1747 * If we have an associated ilg, use its filter state; if not, 1748 * default to (EXCLUDE, NULL) and set no_ilg_cnt to track this. 1749 */ 1750 if (ilgstat != ILGSTAT_NONE) { 1751 if (!SLIST_IS_EMPTY(ilg_flist)) 1752 l_copy(ilg_flist, ilm->ilm_filter); 1753 ilm->ilm_fmode = ilg_fmode; 1754 } else { 1755 ilm->ilm_no_ilg_cnt = 1; 1756 ilm->ilm_fmode = MODE_IS_EXCLUDE; 1757 } 1758 1759 mutex_exit(&ill->ill_lock); 1760 return (ilm); 1761 } 1762 1763 void 1764 ilm_walker_cleanup(ill_t *ill) 1765 { 1766 ilm_t **ilmp; 1767 ilm_t *ilm; 1768 1769 ASSERT(MUTEX_HELD(&ill->ill_lock)); 1770 ASSERT(ill->ill_ilm_walker_cnt == 0); 1771 1772 ilmp = &ill->ill_ilm; 1773 while (*ilmp != NULL) { 1774 if ((*ilmp)->ilm_flags & ILM_DELETED) { 1775 ilm = *ilmp; 1776 *ilmp = ilm->ilm_next; 1777 FREE_SLIST(ilm->ilm_filter); 1778 FREE_SLIST(ilm->ilm_pendsrcs); 1779 FREE_SLIST(ilm->ilm_rtx.rtx_allow); 1780 FREE_SLIST(ilm->ilm_rtx.rtx_block); 1781 mi_free((char *)ilm); 1782 } else { 1783 ilmp = &(*ilmp)->ilm_next; 1784 } 1785 } 1786 ill->ill_ilm_cleanup_reqd = 0; 1787 } 1788 1789 /* 1790 * Unlink ilm and free it. 1791 */ 1792 static void 1793 ilm_delete(ilm_t *ilm) 1794 { 1795 ill_t *ill; 1796 ilm_t **ilmp; 1797 1798 if (ilm->ilm_ipif != NULL) { 1799 ASSERT(IAM_WRITER_IPIF(ilm->ilm_ipif)); 1800 ASSERT(ilm->ilm_ill == NULL); 1801 ill = ilm->ilm_ipif->ipif_ill; 1802 ASSERT(!ill->ill_isv6); 1803 } else { 1804 ASSERT(IAM_WRITER_ILL(ilm->ilm_ill)); 1805 ASSERT(ilm->ilm_ipif == NULL); 1806 ill = ilm->ilm_ill; 1807 ASSERT(ill->ill_isv6); 1808 } 1809 /* 1810 * Delete under lock protection so that readers don't stumble 1811 * on bad ilm_next 1812 */ 1813 mutex_enter(&ill->ill_lock); 1814 if (ill->ill_ilm_walker_cnt != 0) { 1815 ilm->ilm_flags |= ILM_DELETED; 1816 ill->ill_ilm_cleanup_reqd = 1; 1817 mutex_exit(&ill->ill_lock); 1818 return; 1819 } 1820 1821 for (ilmp = &ill->ill_ilm; *ilmp != ilm; ilmp = &(*ilmp)->ilm_next) 1822 ; 1823 *ilmp = ilm->ilm_next; 1824 mutex_exit(&ill->ill_lock); 1825 1826 FREE_SLIST(ilm->ilm_filter); 1827 FREE_SLIST(ilm->ilm_pendsrcs); 1828 FREE_SLIST(ilm->ilm_rtx.rtx_allow); 1829 FREE_SLIST(ilm->ilm_rtx.rtx_block); 1830 mi_free((char *)ilm); 1831 } 1832 1833 /* Free all ilms for this ipif */ 1834 void 1835 ilm_free(ipif_t *ipif) 1836 { 1837 ill_t *ill = ipif->ipif_ill; 1838 ilm_t *ilm; 1839 ilm_t *next_ilm; 1840 1841 ASSERT(IAM_WRITER_IPIF(ipif)); 1842 1843 for (ilm = ill->ill_ilm; ilm; ilm = next_ilm) { 1844 next_ilm = ilm->ilm_next; 1845 if (ilm->ilm_ipif == ipif) 1846 ilm_delete(ilm); 1847 } 1848 } 1849 1850 /* 1851 * Looks up the appropriate ipif given a v4 multicast group and interface 1852 * address. On success, returns 0, with *ipifpp pointing to the found 1853 * struct. On failure, returns an errno and *ipifpp is NULL. 1854 */ 1855 int 1856 ip_opt_check(conn_t *connp, ipaddr_t group, ipaddr_t src, ipaddr_t ifaddr, 1857 uint_t *ifindexp, mblk_t *first_mp, ipsq_func_t func, ipif_t **ipifpp) 1858 { 1859 ipif_t *ipif; 1860 int err = 0; 1861 zoneid_t zoneid = connp->conn_zoneid; 1862 1863 if (!CLASSD(group) || CLASSD(src)) { 1864 return (EINVAL); 1865 } 1866 *ipifpp = NULL; 1867 1868 ASSERT(!(ifaddr != INADDR_ANY && ifindexp != NULL && *ifindexp != 0)); 1869 if (ifaddr != INADDR_ANY) { 1870 ipif = ipif_lookup_addr(ifaddr, NULL, zoneid, 1871 CONNP_TO_WQ(connp), first_mp, func, &err); 1872 if (err != 0 && err != EINPROGRESS) 1873 err = EADDRNOTAVAIL; 1874 } else if (ifindexp != NULL && *ifindexp != 0) { 1875 ipif = ipif_lookup_on_ifindex(*ifindexp, B_FALSE, zoneid, 1876 CONNP_TO_WQ(connp), first_mp, func, &err); 1877 } else { 1878 ipif = ipif_lookup_group(group, zoneid); 1879 if (ipif == NULL) 1880 return (EADDRNOTAVAIL); 1881 } 1882 if (ipif == NULL) 1883 return (err); 1884 1885 *ipifpp = ipif; 1886 return (0); 1887 } 1888 1889 /* 1890 * Looks up the appropriate ill (or ipif if v4mapped) given an interface 1891 * index and IPv6 multicast group. On success, returns 0, with *illpp (or 1892 * *ipifpp if v4mapped) pointing to the found struct. On failure, returns 1893 * an errno and *illpp and *ipifpp are undefined. 1894 */ 1895 int 1896 ip_opt_check_v6(conn_t *connp, const in6_addr_t *v6group, ipaddr_t *v4group, 1897 const in6_addr_t *v6src, ipaddr_t *v4src, boolean_t *isv6, int ifindex, 1898 mblk_t *first_mp, ipsq_func_t func, ill_t **illpp, ipif_t **ipifpp) 1899 { 1900 boolean_t src_unspec; 1901 ill_t *ill = NULL; 1902 ipif_t *ipif = NULL; 1903 int err; 1904 zoneid_t zoneid = connp->conn_zoneid; 1905 queue_t *wq = CONNP_TO_WQ(connp); 1906 1907 src_unspec = IN6_IS_ADDR_UNSPECIFIED(v6src); 1908 1909 if (IN6_IS_ADDR_V4MAPPED(v6group)) { 1910 if (!IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec) 1911 return (EINVAL); 1912 IN6_V4MAPPED_TO_IPADDR(v6group, *v4group); 1913 if (src_unspec) { 1914 *v4src = INADDR_ANY; 1915 } else { 1916 IN6_V4MAPPED_TO_IPADDR(v6src, *v4src); 1917 } 1918 if (!CLASSD(*v4group) || CLASSD(*v4src)) 1919 return (EINVAL); 1920 *ipifpp = NULL; 1921 *isv6 = B_FALSE; 1922 } else { 1923 if (IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec) 1924 return (EINVAL); 1925 if (!IN6_IS_ADDR_MULTICAST(v6group) || 1926 IN6_IS_ADDR_MULTICAST(v6src)) { 1927 return (EINVAL); 1928 } 1929 *illpp = NULL; 1930 *isv6 = B_TRUE; 1931 } 1932 1933 if (ifindex == 0) { 1934 if (*isv6) 1935 ill = ill_lookup_group_v6(v6group, zoneid); 1936 else 1937 ipif = ipif_lookup_group(*v4group, zoneid); 1938 if (ill == NULL && ipif == NULL) 1939 return (EADDRNOTAVAIL); 1940 } else { 1941 if (*isv6) { 1942 ill = ill_lookup_on_ifindex(ifindex, B_TRUE, 1943 wq, first_mp, func, &err); 1944 if (ill != NULL && 1945 !ipif_lookup_zoneid(ill, zoneid, 0, NULL)) { 1946 ill_refrele(ill); 1947 ill = NULL; 1948 err = EADDRNOTAVAIL; 1949 } 1950 } else { 1951 ipif = ipif_lookup_on_ifindex(ifindex, B_FALSE, 1952 zoneid, wq, first_mp, func, &err); 1953 } 1954 if (ill == NULL && ipif == NULL) 1955 return (err); 1956 } 1957 1958 *ipifpp = ipif; 1959 *illpp = ill; 1960 return (0); 1961 } 1962 1963 static int 1964 ip_get_srcfilter(conn_t *connp, struct group_filter *gf, 1965 struct ip_msfilter *imsf, ipaddr_t grp, ipif_t *ipif, boolean_t isv4mapped) 1966 { 1967 ilg_t *ilg; 1968 int i, numsrc, fmode, outsrcs; 1969 struct sockaddr_in *sin; 1970 struct sockaddr_in6 *sin6; 1971 struct in_addr *addrp; 1972 slist_t *fp; 1973 boolean_t is_v4only_api; 1974 1975 mutex_enter(&connp->conn_lock); 1976 1977 ilg = ilg_lookup_ipif(connp, grp, ipif); 1978 if (ilg == NULL) { 1979 mutex_exit(&connp->conn_lock); 1980 return (EADDRNOTAVAIL); 1981 } 1982 1983 if (gf == NULL) { 1984 ASSERT(imsf != NULL); 1985 ASSERT(!isv4mapped); 1986 is_v4only_api = B_TRUE; 1987 outsrcs = imsf->imsf_numsrc; 1988 } else { 1989 ASSERT(imsf == NULL); 1990 is_v4only_api = B_FALSE; 1991 outsrcs = gf->gf_numsrc; 1992 } 1993 1994 /* 1995 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 1996 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 1997 * So we need to translate here. 1998 */ 1999 fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ? 2000 MCAST_INCLUDE : MCAST_EXCLUDE; 2001 if ((fp = ilg->ilg_filter) == NULL) { 2002 numsrc = 0; 2003 } else { 2004 for (i = 0; i < outsrcs; i++) { 2005 if (i == fp->sl_numsrc) 2006 break; 2007 if (isv4mapped) { 2008 sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i]; 2009 sin6->sin6_family = AF_INET6; 2010 sin6->sin6_addr = fp->sl_addr[i]; 2011 } else { 2012 if (is_v4only_api) { 2013 addrp = &imsf->imsf_slist[i]; 2014 } else { 2015 sin = (struct sockaddr_in *) 2016 &gf->gf_slist[i]; 2017 sin->sin_family = AF_INET; 2018 addrp = &sin->sin_addr; 2019 } 2020 IN6_V4MAPPED_TO_INADDR(&fp->sl_addr[i], addrp); 2021 } 2022 } 2023 numsrc = fp->sl_numsrc; 2024 } 2025 2026 if (is_v4only_api) { 2027 imsf->imsf_numsrc = numsrc; 2028 imsf->imsf_fmode = fmode; 2029 } else { 2030 gf->gf_numsrc = numsrc; 2031 gf->gf_fmode = fmode; 2032 } 2033 2034 mutex_exit(&connp->conn_lock); 2035 2036 return (0); 2037 } 2038 2039 static int 2040 ip_get_srcfilter_v6(conn_t *connp, struct group_filter *gf, 2041 const struct in6_addr *grp, ill_t *ill) 2042 { 2043 ilg_t *ilg; 2044 int i; 2045 struct sockaddr_storage *sl; 2046 struct sockaddr_in6 *sin6; 2047 slist_t *fp; 2048 2049 mutex_enter(&connp->conn_lock); 2050 2051 ilg = ilg_lookup_ill_v6(connp, grp, ill); 2052 if (ilg == NULL) { 2053 mutex_exit(&connp->conn_lock); 2054 return (EADDRNOTAVAIL); 2055 } 2056 2057 /* 2058 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 2059 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 2060 * So we need to translate here. 2061 */ 2062 gf->gf_fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ? 2063 MCAST_INCLUDE : MCAST_EXCLUDE; 2064 if ((fp = ilg->ilg_filter) == NULL) { 2065 gf->gf_numsrc = 0; 2066 } else { 2067 for (i = 0, sl = gf->gf_slist; i < gf->gf_numsrc; i++, sl++) { 2068 if (i == fp->sl_numsrc) 2069 break; 2070 sin6 = (struct sockaddr_in6 *)sl; 2071 sin6->sin6_family = AF_INET6; 2072 sin6->sin6_addr = fp->sl_addr[i]; 2073 } 2074 gf->gf_numsrc = fp->sl_numsrc; 2075 } 2076 2077 mutex_exit(&connp->conn_lock); 2078 2079 return (0); 2080 } 2081 2082 static int 2083 ip_set_srcfilter(conn_t *connp, struct group_filter *gf, 2084 struct ip_msfilter *imsf, ipaddr_t grp, ipif_t *ipif, boolean_t isv4mapped) 2085 { 2086 ilg_t *ilg; 2087 int i, err, insrcs, infmode, new_fmode; 2088 struct sockaddr_in *sin; 2089 struct sockaddr_in6 *sin6; 2090 struct in_addr *addrp; 2091 slist_t *orig_filter = NULL; 2092 slist_t *new_filter = NULL; 2093 mcast_record_t orig_fmode; 2094 boolean_t leave_grp, is_v4only_api; 2095 ilg_stat_t ilgstat; 2096 2097 if (gf == NULL) { 2098 ASSERT(imsf != NULL); 2099 ASSERT(!isv4mapped); 2100 is_v4only_api = B_TRUE; 2101 insrcs = imsf->imsf_numsrc; 2102 infmode = imsf->imsf_fmode; 2103 } else { 2104 ASSERT(imsf == NULL); 2105 is_v4only_api = B_FALSE; 2106 insrcs = gf->gf_numsrc; 2107 infmode = gf->gf_fmode; 2108 } 2109 2110 /* Make sure we can handle the source list */ 2111 if (insrcs > MAX_FILTER_SIZE) 2112 return (ENOBUFS); 2113 2114 /* 2115 * setting the filter to (INCLUDE, NULL) is treated 2116 * as a request to leave the group. 2117 */ 2118 leave_grp = (infmode == MCAST_INCLUDE && insrcs == 0); 2119 2120 ASSERT(IAM_WRITER_IPIF(ipif)); 2121 2122 mutex_enter(&connp->conn_lock); 2123 2124 ilg = ilg_lookup_ipif(connp, grp, ipif); 2125 if (ilg == NULL) { 2126 /* 2127 * if the request was actually to leave, and we 2128 * didn't find an ilg, there's nothing to do. 2129 */ 2130 if (!leave_grp) 2131 ilg = conn_ilg_alloc(connp); 2132 if (leave_grp || ilg == NULL) { 2133 mutex_exit(&connp->conn_lock); 2134 return (leave_grp ? 0 : ENOMEM); 2135 } 2136 ilgstat = ILGSTAT_NEW; 2137 IN6_IPADDR_TO_V4MAPPED(grp, &ilg->ilg_v6group); 2138 ilg->ilg_ipif = ipif; 2139 ilg->ilg_ill = NULL; 2140 ilg->ilg_orig_ifindex = 0; 2141 } else if (leave_grp) { 2142 ilg_delete(connp, ilg, NULL); 2143 mutex_exit(&connp->conn_lock); 2144 (void) ip_delmulti(grp, ipif, B_FALSE, B_TRUE); 2145 return (0); 2146 } else { 2147 ilgstat = ILGSTAT_CHANGE; 2148 /* Preserve existing state in case ip_addmulti() fails */ 2149 orig_fmode = ilg->ilg_fmode; 2150 if (ilg->ilg_filter == NULL) { 2151 orig_filter = NULL; 2152 } else { 2153 orig_filter = l_alloc_copy(ilg->ilg_filter); 2154 if (orig_filter == NULL) { 2155 mutex_exit(&connp->conn_lock); 2156 return (ENOMEM); 2157 } 2158 } 2159 } 2160 2161 /* 2162 * Alloc buffer to copy new state into (see below) before 2163 * we make any changes, so we can bail if it fails. 2164 */ 2165 if ((new_filter = l_alloc()) == NULL) { 2166 mutex_exit(&connp->conn_lock); 2167 err = ENOMEM; 2168 goto free_and_exit; 2169 } 2170 2171 if (insrcs == 0) { 2172 CLEAR_SLIST(ilg->ilg_filter); 2173 } else { 2174 slist_t *fp; 2175 if (ilg->ilg_filter == NULL) { 2176 fp = l_alloc(); 2177 if (fp == NULL) { 2178 if (ilgstat == ILGSTAT_NEW) 2179 ilg_delete(connp, ilg, NULL); 2180 mutex_exit(&connp->conn_lock); 2181 err = ENOMEM; 2182 goto free_and_exit; 2183 } 2184 } else { 2185 fp = ilg->ilg_filter; 2186 } 2187 for (i = 0; i < insrcs; i++) { 2188 if (isv4mapped) { 2189 sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i]; 2190 fp->sl_addr[i] = sin6->sin6_addr; 2191 } else { 2192 if (is_v4only_api) { 2193 addrp = &imsf->imsf_slist[i]; 2194 } else { 2195 sin = (struct sockaddr_in *) 2196 &gf->gf_slist[i]; 2197 addrp = &sin->sin_addr; 2198 } 2199 IN6_INADDR_TO_V4MAPPED(addrp, &fp->sl_addr[i]); 2200 } 2201 } 2202 fp->sl_numsrc = insrcs; 2203 ilg->ilg_filter = fp; 2204 } 2205 /* 2206 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 2207 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 2208 * So we need to translate here. 2209 */ 2210 ilg->ilg_fmode = (infmode == MCAST_INCLUDE) ? 2211 MODE_IS_INCLUDE : MODE_IS_EXCLUDE; 2212 2213 /* 2214 * Save copy of ilg's filter state to pass to other functions, 2215 * so we can release conn_lock now. 2216 */ 2217 new_fmode = ilg->ilg_fmode; 2218 l_copy(ilg->ilg_filter, new_filter); 2219 2220 mutex_exit(&connp->conn_lock); 2221 2222 err = ip_addmulti(grp, ipif, ilgstat, new_fmode, new_filter); 2223 if (err != 0) { 2224 /* 2225 * Restore the original filter state, or delete the 2226 * newly-created ilg. We need to look up the ilg 2227 * again, though, since we've not been holding the 2228 * conn_lock. 2229 */ 2230 mutex_enter(&connp->conn_lock); 2231 ilg = ilg_lookup_ipif(connp, grp, ipif); 2232 ASSERT(ilg != NULL); 2233 if (ilgstat == ILGSTAT_NEW) { 2234 ilg_delete(connp, ilg, NULL); 2235 } else { 2236 ilg->ilg_fmode = orig_fmode; 2237 if (SLIST_IS_EMPTY(orig_filter)) { 2238 CLEAR_SLIST(ilg->ilg_filter); 2239 } else { 2240 /* 2241 * We didn't free the filter, even if we 2242 * were trying to make the source list empty; 2243 * so if orig_filter isn't empty, the ilg 2244 * must still have a filter alloc'd. 2245 */ 2246 l_copy(orig_filter, ilg->ilg_filter); 2247 } 2248 } 2249 mutex_exit(&connp->conn_lock); 2250 } 2251 2252 free_and_exit: 2253 l_free(orig_filter); 2254 l_free(new_filter); 2255 2256 return (err); 2257 } 2258 2259 static int 2260 ip_set_srcfilter_v6(conn_t *connp, struct group_filter *gf, 2261 const struct in6_addr *grp, ill_t *ill) 2262 { 2263 ilg_t *ilg; 2264 int i, orig_ifindex, orig_fmode, new_fmode, err; 2265 slist_t *orig_filter = NULL; 2266 slist_t *new_filter = NULL; 2267 struct sockaddr_storage *sl; 2268 struct sockaddr_in6 *sin6; 2269 boolean_t leave_grp; 2270 ilg_stat_t ilgstat; 2271 2272 /* Make sure we can handle the source list */ 2273 if (gf->gf_numsrc > MAX_FILTER_SIZE) 2274 return (ENOBUFS); 2275 2276 /* 2277 * setting the filter to (INCLUDE, NULL) is treated 2278 * as a request to leave the group. 2279 */ 2280 leave_grp = (gf->gf_fmode == MCAST_INCLUDE && gf->gf_numsrc == 0); 2281 2282 ASSERT(IAM_WRITER_ILL(ill)); 2283 2284 /* 2285 * Use the ifindex to do the lookup. We can't use the ill 2286 * directly because ilg_ill could point to a different ill 2287 * if things have moved. 2288 */ 2289 orig_ifindex = ill->ill_phyint->phyint_ifindex; 2290 2291 mutex_enter(&connp->conn_lock); 2292 ilg = ilg_lookup_ill_index_v6(connp, grp, orig_ifindex); 2293 if (ilg == NULL) { 2294 /* 2295 * if the request was actually to leave, and we 2296 * didn't find an ilg, there's nothing to do. 2297 */ 2298 if (!leave_grp) 2299 ilg = conn_ilg_alloc(connp); 2300 if (leave_grp || ilg == NULL) { 2301 mutex_exit(&connp->conn_lock); 2302 return (leave_grp ? 0 : ENOMEM); 2303 } 2304 ilgstat = ILGSTAT_NEW; 2305 ilg->ilg_v6group = *grp; 2306 ilg->ilg_ipif = NULL; 2307 /* 2308 * Choose our target ill to join on. This might be 2309 * different from the ill we've been given if it's 2310 * currently down and part of a group. 2311 * 2312 * new ill is not refheld; we are writer. 2313 */ 2314 ill = ip_choose_multi_ill(ill, grp); 2315 ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED)); 2316 ilg->ilg_ill = ill; 2317 /* 2318 * Remember the index that we joined on, so that we can 2319 * successfully delete them later on and also search for 2320 * duplicates if the application wants to join again. 2321 */ 2322 ilg->ilg_orig_ifindex = orig_ifindex; 2323 } else if (leave_grp) { 2324 /* 2325 * Use the ilg's current ill for the deletion, 2326 * we might have failed over. 2327 */ 2328 ill = ilg->ilg_ill; 2329 ilg_delete(connp, ilg, NULL); 2330 mutex_exit(&connp->conn_lock); 2331 (void) ip_delmulti_v6(grp, ill, orig_ifindex, 2332 connp->conn_zoneid, B_FALSE, B_TRUE); 2333 return (0); 2334 } else { 2335 ilgstat = ILGSTAT_CHANGE; 2336 /* 2337 * The current ill might be different from the one we were 2338 * asked to join on (if failover has occurred); we should 2339 * join on the ill stored in the ilg. The original ill 2340 * is noted in ilg_orig_ifindex, which matched our request. 2341 */ 2342 ill = ilg->ilg_ill; 2343 /* preserve existing state in case ip_addmulti() fails */ 2344 orig_fmode = ilg->ilg_fmode; 2345 if (ilg->ilg_filter == NULL) { 2346 orig_filter = NULL; 2347 } else { 2348 orig_filter = l_alloc_copy(ilg->ilg_filter); 2349 if (orig_filter == NULL) { 2350 mutex_exit(&connp->conn_lock); 2351 return (ENOMEM); 2352 } 2353 } 2354 } 2355 2356 /* 2357 * Alloc buffer to copy new state into (see below) before 2358 * we make any changes, so we can bail if it fails. 2359 */ 2360 if ((new_filter = l_alloc()) == NULL) { 2361 mutex_exit(&connp->conn_lock); 2362 err = ENOMEM; 2363 goto free_and_exit; 2364 } 2365 2366 if (gf->gf_numsrc == 0) { 2367 CLEAR_SLIST(ilg->ilg_filter); 2368 } else { 2369 slist_t *fp; 2370 if (ilg->ilg_filter == NULL) { 2371 fp = l_alloc(); 2372 if (fp == NULL) { 2373 if (ilgstat == ILGSTAT_NEW) 2374 ilg_delete(connp, ilg, NULL); 2375 mutex_exit(&connp->conn_lock); 2376 err = ENOMEM; 2377 goto free_and_exit; 2378 } 2379 } else { 2380 fp = ilg->ilg_filter; 2381 } 2382 for (i = 0, sl = gf->gf_slist; i < gf->gf_numsrc; i++, sl++) { 2383 sin6 = (struct sockaddr_in6 *)sl; 2384 fp->sl_addr[i] = sin6->sin6_addr; 2385 } 2386 fp->sl_numsrc = gf->gf_numsrc; 2387 ilg->ilg_filter = fp; 2388 } 2389 /* 2390 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 2391 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 2392 * So we need to translate here. 2393 */ 2394 ilg->ilg_fmode = (gf->gf_fmode == MCAST_INCLUDE) ? 2395 MODE_IS_INCLUDE : MODE_IS_EXCLUDE; 2396 2397 /* 2398 * Save copy of ilg's filter state to pass to other functions, 2399 * so we can release conn_lock now. 2400 */ 2401 new_fmode = ilg->ilg_fmode; 2402 l_copy(ilg->ilg_filter, new_filter); 2403 2404 mutex_exit(&connp->conn_lock); 2405 2406 err = ip_addmulti_v6(grp, ill, orig_ifindex, connp->conn_zoneid, 2407 ilgstat, new_fmode, new_filter); 2408 if (err != 0) { 2409 /* 2410 * Restore the original filter state, or delete the 2411 * newly-created ilg. We need to look up the ilg 2412 * again, though, since we've not been holding the 2413 * conn_lock. 2414 */ 2415 mutex_enter(&connp->conn_lock); 2416 ilg = ilg_lookup_ill_index_v6(connp, grp, orig_ifindex); 2417 ASSERT(ilg != NULL); 2418 if (ilgstat == ILGSTAT_NEW) { 2419 ilg_delete(connp, ilg, NULL); 2420 } else { 2421 ilg->ilg_fmode = orig_fmode; 2422 if (SLIST_IS_EMPTY(orig_filter)) { 2423 CLEAR_SLIST(ilg->ilg_filter); 2424 } else { 2425 /* 2426 * We didn't free the filter, even if we 2427 * were trying to make the source list empty; 2428 * so if orig_filter isn't empty, the ilg 2429 * must still have a filter alloc'd. 2430 */ 2431 l_copy(orig_filter, ilg->ilg_filter); 2432 } 2433 } 2434 mutex_exit(&connp->conn_lock); 2435 } 2436 2437 free_and_exit: 2438 l_free(orig_filter); 2439 l_free(new_filter); 2440 2441 return (err); 2442 } 2443 2444 /* 2445 * Process the SIOC[GS]MSFILTER and SIOC[GS]IPMSFILTER ioctls. 2446 */ 2447 /* ARGSUSED */ 2448 int 2449 ip_sioctl_msfilter(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, 2450 ip_ioctl_cmd_t *ipip, void *ifreq) 2451 { 2452 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 2453 /* existence verified in ip_wput_nondata() */ 2454 mblk_t *data_mp = mp->b_cont->b_cont; 2455 int datalen, err, cmd, minsize; 2456 int expsize = 0; 2457 conn_t *connp; 2458 boolean_t isv6, is_v4only_api, getcmd; 2459 struct sockaddr_in *gsin; 2460 struct sockaddr_in6 *gsin6; 2461 ipaddr_t v4grp; 2462 in6_addr_t v6grp; 2463 struct group_filter *gf = NULL; 2464 struct ip_msfilter *imsf = NULL; 2465 mblk_t *ndp; 2466 2467 if (data_mp->b_cont != NULL) { 2468 if ((ndp = msgpullup(data_mp, -1)) == NULL) 2469 return (ENOMEM); 2470 freemsg(data_mp); 2471 data_mp = ndp; 2472 mp->b_cont->b_cont = data_mp; 2473 } 2474 2475 cmd = iocp->ioc_cmd; 2476 getcmd = (cmd == SIOCGIPMSFILTER || cmd == SIOCGMSFILTER); 2477 is_v4only_api = (cmd == SIOCGIPMSFILTER || cmd == SIOCSIPMSFILTER); 2478 minsize = (is_v4only_api) ? IP_MSFILTER_SIZE(0) : GROUP_FILTER_SIZE(0); 2479 datalen = MBLKL(data_mp); 2480 2481 if (datalen < minsize) 2482 return (EINVAL); 2483 2484 /* 2485 * now we know we have at least have the initial structure, 2486 * but need to check for the source list array. 2487 */ 2488 if (is_v4only_api) { 2489 imsf = (struct ip_msfilter *)data_mp->b_rptr; 2490 isv6 = B_FALSE; 2491 expsize = IP_MSFILTER_SIZE(imsf->imsf_numsrc); 2492 } else { 2493 gf = (struct group_filter *)data_mp->b_rptr; 2494 if (gf->gf_group.ss_family == AF_INET6) { 2495 gsin6 = (struct sockaddr_in6 *)&gf->gf_group; 2496 isv6 = !(IN6_IS_ADDR_V4MAPPED(&gsin6->sin6_addr)); 2497 } else { 2498 isv6 = B_FALSE; 2499 } 2500 expsize = GROUP_FILTER_SIZE(gf->gf_numsrc); 2501 } 2502 if (datalen < expsize) 2503 return (EINVAL); 2504 2505 connp = Q_TO_CONN(q); 2506 2507 /* operation not supported on the virtual network interface */ 2508 if (IS_VNI(ipif->ipif_ill)) 2509 return (EINVAL); 2510 2511 if (isv6) { 2512 ill_t *ill = ipif->ipif_ill; 2513 ill_refhold(ill); 2514 2515 gsin6 = (struct sockaddr_in6 *)&gf->gf_group; 2516 v6grp = gsin6->sin6_addr; 2517 if (getcmd) 2518 err = ip_get_srcfilter_v6(connp, gf, &v6grp, ill); 2519 else 2520 err = ip_set_srcfilter_v6(connp, gf, &v6grp, ill); 2521 2522 ill_refrele(ill); 2523 } else { 2524 boolean_t isv4mapped = B_FALSE; 2525 if (is_v4only_api) { 2526 v4grp = (ipaddr_t)imsf->imsf_multiaddr.s_addr; 2527 } else { 2528 if (gf->gf_group.ss_family == AF_INET) { 2529 gsin = (struct sockaddr_in *)&gf->gf_group; 2530 v4grp = (ipaddr_t)gsin->sin_addr.s_addr; 2531 } else { 2532 gsin6 = (struct sockaddr_in6 *)&gf->gf_group; 2533 IN6_V4MAPPED_TO_IPADDR(&gsin6->sin6_addr, 2534 v4grp); 2535 isv4mapped = B_TRUE; 2536 } 2537 } 2538 if (getcmd) 2539 err = ip_get_srcfilter(connp, gf, imsf, v4grp, ipif, 2540 isv4mapped); 2541 else 2542 err = ip_set_srcfilter(connp, gf, imsf, v4grp, ipif, 2543 isv4mapped); 2544 } 2545 2546 return (err); 2547 } 2548 2549 /* 2550 * Finds the ipif based on information in the ioctl headers. Needed to make 2551 * ip_process_ioctl() happy (it needs to know the ipif for IPI_WR-flagged 2552 * ioctls prior to calling the ioctl's handler function). Somewhat analogous 2553 * to ip_extract_lifreq_cmn() and ip_extract_tunreq(). 2554 */ 2555 int 2556 ip_extract_msfilter(queue_t *q, mblk_t *mp, ipif_t **ipifpp, ipsq_func_t func) 2557 { 2558 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 2559 int cmd = iocp->ioc_cmd, err = 0; 2560 conn_t *connp; 2561 ipif_t *ipif; 2562 /* caller has verified this mblk exists */ 2563 char *dbuf = (char *)mp->b_cont->b_cont->b_rptr; 2564 struct ip_msfilter *imsf; 2565 struct group_filter *gf; 2566 ipaddr_t v4addr, v4grp; 2567 in6_addr_t v6grp; 2568 uint32_t index; 2569 zoneid_t zoneid; 2570 2571 connp = Q_TO_CONN(q); 2572 zoneid = connp->conn_zoneid; 2573 2574 /* don't allow multicast operations on a tcp conn */ 2575 if (IPCL_IS_TCP(connp)) 2576 return (ENOPROTOOPT); 2577 2578 if (cmd == SIOCSIPMSFILTER || cmd == SIOCGIPMSFILTER) { 2579 /* don't allow v4-specific ioctls on v6 socket */ 2580 if (connp->conn_af_isv6) 2581 return (EAFNOSUPPORT); 2582 2583 imsf = (struct ip_msfilter *)dbuf; 2584 v4addr = imsf->imsf_interface.s_addr; 2585 v4grp = imsf->imsf_multiaddr.s_addr; 2586 if (v4addr == INADDR_ANY) { 2587 ipif = ipif_lookup_group(v4grp, zoneid); 2588 if (ipif == NULL) 2589 err = EADDRNOTAVAIL; 2590 } else { 2591 ipif = ipif_lookup_addr(v4addr, NULL, zoneid, q, mp, 2592 func, &err); 2593 } 2594 } else { 2595 boolean_t isv6 = B_FALSE; 2596 gf = (struct group_filter *)dbuf; 2597 index = gf->gf_interface; 2598 if (gf->gf_group.ss_family == AF_INET6) { 2599 struct sockaddr_in6 *sin6; 2600 sin6 = (struct sockaddr_in6 *)&gf->gf_group; 2601 v6grp = sin6->sin6_addr; 2602 if (IN6_IS_ADDR_V4MAPPED(&v6grp)) 2603 IN6_V4MAPPED_TO_IPADDR(&v6grp, v4grp); 2604 else 2605 isv6 = B_TRUE; 2606 } else if (gf->gf_group.ss_family == AF_INET) { 2607 struct sockaddr_in *sin; 2608 sin = (struct sockaddr_in *)&gf->gf_group; 2609 v4grp = sin->sin_addr.s_addr; 2610 } else { 2611 return (EAFNOSUPPORT); 2612 } 2613 if (index == 0) { 2614 if (isv6) 2615 ipif = ipif_lookup_group_v6(&v6grp, zoneid); 2616 else 2617 ipif = ipif_lookup_group(v4grp, zoneid); 2618 if (ipif == NULL) 2619 err = EADDRNOTAVAIL; 2620 } else { 2621 ipif = ipif_lookup_on_ifindex(index, isv6, zoneid, 2622 q, mp, func, &err); 2623 } 2624 } 2625 2626 *ipifpp = ipif; 2627 return (err); 2628 } 2629 2630 /* 2631 * The structures used for the SIOC*MSFILTER ioctls usually must be copied 2632 * in in two stages, as the first copyin tells us the size of the attached 2633 * source buffer. This function is called by ip_wput_nondata() after the 2634 * first copyin has completed; it figures out how big the second stage 2635 * needs to be, and kicks it off. 2636 * 2637 * In some cases (numsrc < 2), the second copyin is not needed as the 2638 * first one gets a complete structure containing 1 source addr. 2639 * 2640 * The function returns 0 if a second copyin has been started (i.e. there's 2641 * no more work to be done right now), or 1 if the second copyin is not 2642 * needed and ip_wput_nondata() can continue its processing. 2643 */ 2644 int 2645 ip_copyin_msfilter(queue_t *q, mblk_t *mp) 2646 { 2647 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 2648 int cmd = iocp->ioc_cmd; 2649 /* validity of this checked in ip_wput_nondata() */ 2650 mblk_t *mp1 = mp->b_cont->b_cont; 2651 int copysize = 0; 2652 int offset; 2653 2654 if (cmd == SIOCSMSFILTER || cmd == SIOCGMSFILTER) { 2655 struct group_filter *gf = (struct group_filter *)mp1->b_rptr; 2656 if (gf->gf_numsrc >= 2) { 2657 offset = sizeof (struct group_filter); 2658 copysize = GROUP_FILTER_SIZE(gf->gf_numsrc) - offset; 2659 } 2660 } else { 2661 struct ip_msfilter *imsf = (struct ip_msfilter *)mp1->b_rptr; 2662 if (imsf->imsf_numsrc >= 2) { 2663 offset = sizeof (struct ip_msfilter); 2664 copysize = IP_MSFILTER_SIZE(imsf->imsf_numsrc) - offset; 2665 } 2666 } 2667 if (copysize > 0) { 2668 mi_copyin_n(q, mp, offset, copysize); 2669 return (0); 2670 } 2671 return (1); 2672 } 2673 2674 /* 2675 * Handle the following optmgmt: 2676 * IP_ADD_MEMBERSHIP must not have joined already 2677 * MCAST_JOIN_GROUP must not have joined already 2678 * IP_BLOCK_SOURCE must have joined already 2679 * MCAST_BLOCK_SOURCE must have joined already 2680 * IP_JOIN_SOURCE_GROUP may have joined already 2681 * MCAST_JOIN_SOURCE_GROUP may have joined already 2682 * 2683 * fmode and src parameters may be used to determine which option is 2684 * being set, as follows (the IP_* and MCAST_* versions of each option 2685 * are functionally equivalent): 2686 * opt fmode src 2687 * IP_ADD_MEMBERSHIP MODE_IS_EXCLUDE INADDR_ANY 2688 * MCAST_JOIN_GROUP MODE_IS_EXCLUDE INADDR_ANY 2689 * IP_BLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 2690 * MCAST_BLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 2691 * IP_JOIN_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 2692 * MCAST_JOIN_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 2693 * 2694 * Changing the filter mode is not allowed; if a matching ilg already 2695 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 2696 * 2697 * Verifies that there is a source address of appropriate scope for 2698 * the group; if not, EADDRNOTAVAIL is returned. 2699 * 2700 * The interface to be used may be identified by an address or by an 2701 * index. A pointer to the index is passed; if it is NULL, use the 2702 * address, otherwise, use the index. 2703 */ 2704 int 2705 ip_opt_add_group(conn_t *connp, boolean_t checkonly, ipaddr_t group, 2706 ipaddr_t ifaddr, uint_t *ifindexp, mcast_record_t fmode, ipaddr_t src, 2707 mblk_t *first_mp) 2708 { 2709 ipif_t *ipif; 2710 ipsq_t *ipsq; 2711 int err = 0; 2712 ill_t *ill; 2713 2714 err = ip_opt_check(connp, group, src, ifaddr, ifindexp, first_mp, 2715 ip_restart_optmgmt, &ipif); 2716 if (err != 0) { 2717 if (err != EINPROGRESS) { 2718 ip1dbg(("ip_opt_add_group: no ipif for group 0x%x, " 2719 "ifaddr 0x%x, ifindex %d\n", ntohl(group), 2720 ntohl(ifaddr), (ifindexp == NULL) ? 0 : *ifindexp)); 2721 } 2722 return (err); 2723 } 2724 ASSERT(ipif != NULL); 2725 2726 ill = ipif->ipif_ill; 2727 /* Operation not supported on a virtual network interface */ 2728 if (IS_VNI(ill)) { 2729 ipif_refrele(ipif); 2730 return (EINVAL); 2731 } 2732 2733 if (checkonly) { 2734 /* 2735 * do not do operation, just pretend to - new T_CHECK 2736 * semantics. The error return case above if encountered 2737 * considered a good enough "check" here. 2738 */ 2739 ipif_refrele(ipif); 2740 return (0); 2741 } 2742 2743 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, ipsq, 2744 NEW_OP); 2745 2746 /* unspecified source addr => no source filtering */ 2747 err = ilg_add(connp, group, ipif, fmode, src); 2748 2749 IPSQ_EXIT(ipsq); 2750 2751 ipif_refrele(ipif); 2752 return (err); 2753 } 2754 2755 /* 2756 * Handle the following optmgmt: 2757 * IPV6_JOIN_GROUP must not have joined already 2758 * MCAST_JOIN_GROUP must not have joined already 2759 * MCAST_BLOCK_SOURCE must have joined already 2760 * MCAST_JOIN_SOURCE_GROUP may have joined already 2761 * 2762 * fmode and src parameters may be used to determine which option is 2763 * being set, as follows (IPV6_JOIN_GROUP and MCAST_JOIN_GROUP options 2764 * are functionally equivalent): 2765 * opt fmode v6src 2766 * IPV6_JOIN_GROUP MODE_IS_EXCLUDE unspecified 2767 * MCAST_JOIN_GROUP MODE_IS_EXCLUDE unspecified 2768 * MCAST_BLOCK_SOURCE MODE_IS_EXCLUDE v6 addr 2769 * MCAST_JOIN_SOURCE_GROUP MODE_IS_INCLUDE v6 addr 2770 * 2771 * Changing the filter mode is not allowed; if a matching ilg already 2772 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 2773 * 2774 * Verifies that there is a source address of appropriate scope for 2775 * the group; if not, EADDRNOTAVAIL is returned. 2776 * 2777 * Handles IPv4-mapped IPv6 multicast addresses by associating them 2778 * with the link-local ipif. Assumes that if v6group is v4-mapped, 2779 * v6src is also v4-mapped. 2780 */ 2781 int 2782 ip_opt_add_group_v6(conn_t *connp, boolean_t checkonly, 2783 const in6_addr_t *v6group, int ifindex, mcast_record_t fmode, 2784 const in6_addr_t *v6src, mblk_t *first_mp) 2785 { 2786 ill_t *ill; 2787 ipif_t *ipif; 2788 char buf[INET6_ADDRSTRLEN]; 2789 ipaddr_t v4group, v4src; 2790 boolean_t isv6; 2791 ipsq_t *ipsq; 2792 int err; 2793 2794 err = ip_opt_check_v6(connp, v6group, &v4group, v6src, &v4src, &isv6, 2795 ifindex, first_mp, ip_restart_optmgmt, &ill, &ipif); 2796 if (err != 0) { 2797 if (err != EINPROGRESS) { 2798 ip1dbg(("ip_opt_add_group_v6: no ill for group %s/" 2799 "index %d\n", inet_ntop(AF_INET6, v6group, buf, 2800 sizeof (buf)), ifindex)); 2801 } 2802 return (err); 2803 } 2804 ASSERT((!isv6 && ipif != NULL) || (isv6 && ill != NULL)); 2805 2806 /* operation is not supported on the virtual network interface */ 2807 if (isv6) { 2808 if (IS_VNI(ill)) { 2809 ill_refrele(ill); 2810 return (EINVAL); 2811 } 2812 } else { 2813 if (IS_VNI(ipif->ipif_ill)) { 2814 ipif_refrele(ipif); 2815 return (EINVAL); 2816 } 2817 } 2818 2819 if (checkonly) { 2820 /* 2821 * do not do operation, just pretend to - new T_CHECK 2822 * semantics. The error return case above if encountered 2823 * considered a good enough "check" here. 2824 */ 2825 if (isv6) 2826 ill_refrele(ill); 2827 else 2828 ipif_refrele(ipif); 2829 return (0); 2830 } 2831 2832 if (!isv6) { 2833 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, 2834 ipsq, NEW_OP); 2835 err = ilg_add(connp, v4group, ipif, fmode, v4src); 2836 IPSQ_EXIT(ipsq); 2837 ipif_refrele(ipif); 2838 } else { 2839 IPSQ_ENTER_ILL(ill, connp, first_mp, ip_restart_optmgmt, 2840 ipsq, NEW_OP); 2841 err = ilg_add_v6(connp, v6group, ill, fmode, v6src); 2842 IPSQ_EXIT(ipsq); 2843 ill_refrele(ill); 2844 } 2845 2846 return (err); 2847 } 2848 2849 static int 2850 ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group, ipif_t *ipif, 2851 mcast_record_t fmode, ipaddr_t src) 2852 { 2853 ilg_t *ilg; 2854 in6_addr_t v6src; 2855 boolean_t leaving = B_FALSE; 2856 2857 ASSERT(IAM_WRITER_IPIF(ipif)); 2858 2859 /* 2860 * The ilg is valid only while we hold the conn lock. Once we drop 2861 * the lock, another thread can locate another ilg on this connp, 2862 * but on a different ipif, and delete it, and cause the ilg array 2863 * to be reallocated and copied. Hence do the ilg_delete before 2864 * dropping the lock. 2865 */ 2866 mutex_enter(&connp->conn_lock); 2867 ilg = ilg_lookup_ipif(connp, group, ipif); 2868 if ((ilg == NULL) || (ilg->ilg_flags & ILG_DELETED)) { 2869 mutex_exit(&connp->conn_lock); 2870 return (EADDRNOTAVAIL); 2871 } 2872 2873 /* 2874 * Decide if we're actually deleting the ilg or just removing a 2875 * source filter address; if just removing an addr, make sure we 2876 * aren't trying to change the filter mode, and that the addr is 2877 * actually in our filter list already. If we're removing the 2878 * last src in an include list, just delete the ilg. 2879 */ 2880 if (src == INADDR_ANY) { 2881 v6src = ipv6_all_zeros; 2882 leaving = B_TRUE; 2883 } else { 2884 int err = 0; 2885 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 2886 if (fmode != ilg->ilg_fmode) 2887 err = EINVAL; 2888 else if (ilg->ilg_filter == NULL || 2889 !list_has_addr(ilg->ilg_filter, &v6src)) 2890 err = EADDRNOTAVAIL; 2891 if (err != 0) { 2892 mutex_exit(&connp->conn_lock); 2893 return (err); 2894 } 2895 if (fmode == MODE_IS_INCLUDE && 2896 ilg->ilg_filter->sl_numsrc == 1) { 2897 v6src = ipv6_all_zeros; 2898 leaving = B_TRUE; 2899 } 2900 } 2901 2902 ilg_delete(connp, ilg, &v6src); 2903 mutex_exit(&connp->conn_lock); 2904 2905 (void) ip_delmulti(group, ipif, B_FALSE, leaving); 2906 return (0); 2907 } 2908 2909 static int 2910 ip_opt_delete_group_excl_v6(conn_t *connp, const in6_addr_t *v6group, 2911 ill_t *ill, mcast_record_t fmode, const in6_addr_t *v6src) 2912 { 2913 ilg_t *ilg; 2914 ill_t *ilg_ill; 2915 uint_t ilg_orig_ifindex; 2916 boolean_t leaving = B_TRUE; 2917 2918 ASSERT(IAM_WRITER_ILL(ill)); 2919 2920 /* 2921 * Use the index that we originally used to join. We can't 2922 * use the ill directly because ilg_ill could point to 2923 * a new ill if things have moved. 2924 */ 2925 mutex_enter(&connp->conn_lock); 2926 ilg = ilg_lookup_ill_index_v6(connp, v6group, 2927 ill->ill_phyint->phyint_ifindex); 2928 if ((ilg == NULL) || (ilg->ilg_flags & ILG_DELETED)) { 2929 mutex_exit(&connp->conn_lock); 2930 return (EADDRNOTAVAIL); 2931 } 2932 2933 /* 2934 * Decide if we're actually deleting the ilg or just removing a 2935 * source filter address; if just removing an addr, make sure we 2936 * aren't trying to change the filter mode, and that the addr is 2937 * actually in our filter list already. If we're removing the 2938 * last src in an include list, just delete the ilg. 2939 */ 2940 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2941 int err = 0; 2942 if (fmode != ilg->ilg_fmode) 2943 err = EINVAL; 2944 else if (ilg->ilg_filter == NULL || 2945 !list_has_addr(ilg->ilg_filter, v6src)) 2946 err = EADDRNOTAVAIL; 2947 if (err != 0) { 2948 mutex_exit(&connp->conn_lock); 2949 return (err); 2950 } 2951 if (fmode == MODE_IS_INCLUDE && 2952 ilg->ilg_filter->sl_numsrc == 1) 2953 v6src = NULL; 2954 else 2955 leaving = B_FALSE; 2956 } 2957 2958 ilg_ill = ilg->ilg_ill; 2959 ilg_orig_ifindex = ilg->ilg_orig_ifindex; 2960 ilg_delete(connp, ilg, v6src); 2961 mutex_exit(&connp->conn_lock); 2962 (void) ip_delmulti_v6(v6group, ilg_ill, ilg_orig_ifindex, 2963 connp->conn_zoneid, B_FALSE, leaving); 2964 2965 return (0); 2966 } 2967 2968 /* 2969 * Handle the following optmgmt: 2970 * IP_DROP_MEMBERSHIP will leave 2971 * MCAST_LEAVE_GROUP will leave 2972 * IP_UNBLOCK_SOURCE will not leave 2973 * MCAST_UNBLOCK_SOURCE will not leave 2974 * IP_LEAVE_SOURCE_GROUP may leave (if leaving last source) 2975 * MCAST_LEAVE_SOURCE_GROUP may leave (if leaving last source) 2976 * 2977 * fmode and src parameters may be used to determine which option is 2978 * being set, as follows (the IP_* and MCAST_* versions of each option 2979 * are functionally equivalent): 2980 * opt fmode src 2981 * IP_DROP_MEMBERSHIP MODE_IS_INCLUDE INADDR_ANY 2982 * MCAST_LEAVE_GROUP MODE_IS_INCLUDE INADDR_ANY 2983 * IP_UNBLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 2984 * MCAST_UNBLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 2985 * IP_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 2986 * MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 2987 * 2988 * Changing the filter mode is not allowed; if a matching ilg already 2989 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 2990 * 2991 * The interface to be used may be identified by an address or by an 2992 * index. A pointer to the index is passed; if it is NULL, use the 2993 * address, otherwise, use the index. 2994 */ 2995 int 2996 ip_opt_delete_group(conn_t *connp, boolean_t checkonly, ipaddr_t group, 2997 ipaddr_t ifaddr, uint_t *ifindexp, mcast_record_t fmode, ipaddr_t src, 2998 mblk_t *first_mp) 2999 { 3000 ipif_t *ipif; 3001 ipsq_t *ipsq; 3002 int err; 3003 ill_t *ill; 3004 3005 err = ip_opt_check(connp, group, src, ifaddr, ifindexp, first_mp, 3006 ip_restart_optmgmt, &ipif); 3007 if (err != 0) { 3008 if (err != EINPROGRESS) { 3009 ip1dbg(("ip_opt_delete_group: no ipif for group " 3010 "0x%x, ifaddr 0x%x\n", 3011 (int)ntohl(group), (int)ntohl(ifaddr))); 3012 } 3013 return (err); 3014 } 3015 ASSERT(ipif != NULL); 3016 3017 ill = ipif->ipif_ill; 3018 /* Operation not supported on a virtual network interface */ 3019 if (IS_VNI(ill)) { 3020 ipif_refrele(ipif); 3021 return (EINVAL); 3022 } 3023 3024 if (checkonly) { 3025 /* 3026 * do not do operation, just pretend to - new T_CHECK 3027 * semantics. The error return case above if encountered 3028 * considered a good enough "check" here. 3029 */ 3030 ipif_refrele(ipif); 3031 return (0); 3032 } 3033 3034 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, ipsq, 3035 NEW_OP); 3036 err = ip_opt_delete_group_excl(connp, group, ipif, fmode, src); 3037 IPSQ_EXIT(ipsq); 3038 3039 ipif_refrele(ipif); 3040 return (err); 3041 } 3042 3043 /* 3044 * Handle the following optmgmt: 3045 * IPV6_LEAVE_GROUP will leave 3046 * MCAST_LEAVE_GROUP will leave 3047 * MCAST_UNBLOCK_SOURCE will not leave 3048 * MCAST_LEAVE_SOURCE_GROUP may leave (if leaving last source) 3049 * 3050 * fmode and src parameters may be used to determine which option is 3051 * being set, as follows (IPV6_LEAVE_GROUP and MCAST_LEAVE_GROUP options 3052 * are functionally equivalent): 3053 * opt fmode v6src 3054 * IPV6_LEAVE_GROUP MODE_IS_INCLUDE unspecified 3055 * MCAST_LEAVE_GROUP MODE_IS_INCLUDE unspecified 3056 * MCAST_UNBLOCK_SOURCE MODE_IS_EXCLUDE v6 addr 3057 * MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE v6 addr 3058 * 3059 * Changing the filter mode is not allowed; if a matching ilg already 3060 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 3061 * 3062 * Handles IPv4-mapped IPv6 multicast addresses by associating them 3063 * with the link-local ipif. Assumes that if v6group is v4-mapped, 3064 * v6src is also v4-mapped. 3065 */ 3066 int 3067 ip_opt_delete_group_v6(conn_t *connp, boolean_t checkonly, 3068 const in6_addr_t *v6group, int ifindex, mcast_record_t fmode, 3069 const in6_addr_t *v6src, mblk_t *first_mp) 3070 { 3071 ill_t *ill; 3072 ipif_t *ipif; 3073 char buf[INET6_ADDRSTRLEN]; 3074 ipaddr_t v4group, v4src; 3075 boolean_t isv6; 3076 ipsq_t *ipsq; 3077 int err; 3078 3079 err = ip_opt_check_v6(connp, v6group, &v4group, v6src, &v4src, &isv6, 3080 ifindex, first_mp, ip_restart_optmgmt, &ill, &ipif); 3081 if (err != 0) { 3082 if (err != EINPROGRESS) { 3083 ip1dbg(("ip_opt_delete_group_v6: no ill for group %s/" 3084 "index %d\n", inet_ntop(AF_INET6, v6group, buf, 3085 sizeof (buf)), ifindex)); 3086 } 3087 return (err); 3088 } 3089 ASSERT((isv6 && ill != NULL) || (!isv6 && ipif != NULL)); 3090 3091 /* operation is not supported on the virtual network interface */ 3092 if (isv6) { 3093 if (IS_VNI(ill)) { 3094 ill_refrele(ill); 3095 return (EINVAL); 3096 } 3097 } else { 3098 if (IS_VNI(ipif->ipif_ill)) { 3099 ipif_refrele(ipif); 3100 return (EINVAL); 3101 } 3102 } 3103 3104 if (checkonly) { 3105 /* 3106 * do not do operation, just pretend to - new T_CHECK 3107 * semantics. The error return case above if encountered 3108 * considered a good enough "check" here. 3109 */ 3110 if (isv6) 3111 ill_refrele(ill); 3112 else 3113 ipif_refrele(ipif); 3114 return (0); 3115 } 3116 3117 if (!isv6) { 3118 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, 3119 ipsq, NEW_OP); 3120 err = ip_opt_delete_group_excl(connp, v4group, ipif, fmode, 3121 v4src); 3122 IPSQ_EXIT(ipsq); 3123 ipif_refrele(ipif); 3124 } else { 3125 IPSQ_ENTER_ILL(ill, connp, first_mp, ip_restart_optmgmt, 3126 ipsq, NEW_OP); 3127 err = ip_opt_delete_group_excl_v6(connp, v6group, ill, fmode, 3128 v6src); 3129 IPSQ_EXIT(ipsq); 3130 ill_refrele(ill); 3131 } 3132 3133 return (err); 3134 } 3135 3136 /* 3137 * Group mgmt for upper conn that passes things down 3138 * to the interface multicast list (and DLPI) 3139 * These routines can handle new style options that specify an interface name 3140 * as opposed to an interface address (needed for general handling of 3141 * unnumbered interfaces.) 3142 */ 3143 3144 /* 3145 * Add a group to an upper conn group data structure and pass things down 3146 * to the interface multicast list (and DLPI) 3147 */ 3148 static int 3149 ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif, mcast_record_t fmode, 3150 ipaddr_t src) 3151 { 3152 int error = 0; 3153 ill_t *ill; 3154 ilg_t *ilg; 3155 ilg_stat_t ilgstat; 3156 slist_t *new_filter = NULL; 3157 int new_fmode; 3158 3159 ASSERT(IAM_WRITER_IPIF(ipif)); 3160 3161 ill = ipif->ipif_ill; 3162 3163 if (!(ill->ill_flags & ILLF_MULTICAST)) 3164 return (EADDRNOTAVAIL); 3165 3166 /* 3167 * conn_ilg[] is protected by conn_lock. Need to hold the conn_lock 3168 * to walk the conn_ilg[] list in ilg_lookup_ipif(); also needed to 3169 * serialize 2 threads doing join (sock, group1, hme0:0) and 3170 * (sock, group2, hme1:0) where hme0 and hme1 map to different ipsqs, 3171 * but both operations happen on the same conn. 3172 */ 3173 mutex_enter(&connp->conn_lock); 3174 ilg = ilg_lookup_ipif(connp, group, ipif); 3175 3176 /* 3177 * Depending on the option we're handling, may or may not be okay 3178 * if group has already been added. Figure out our rules based 3179 * on fmode and src params. Also make sure there's enough room 3180 * in the filter if we're adding a source to an existing filter. 3181 */ 3182 if (src == INADDR_ANY) { 3183 /* we're joining for all sources, must not have joined */ 3184 if (ilg != NULL) 3185 error = EADDRINUSE; 3186 } else { 3187 if (fmode == MODE_IS_EXCLUDE) { 3188 /* (excl {addr}) => block source, must have joined */ 3189 if (ilg == NULL) 3190 error = EADDRNOTAVAIL; 3191 } 3192 /* (incl {addr}) => join source, may have joined */ 3193 3194 if (ilg != NULL && 3195 SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE) 3196 error = ENOBUFS; 3197 } 3198 if (error != 0) { 3199 mutex_exit(&connp->conn_lock); 3200 return (error); 3201 } 3202 3203 ASSERT(!(ipif->ipif_state_flags & IPIF_CONDEMNED)); 3204 3205 /* 3206 * Alloc buffer to copy new state into (see below) before 3207 * we make any changes, so we can bail if it fails. 3208 */ 3209 if ((new_filter = l_alloc()) == NULL) { 3210 mutex_exit(&connp->conn_lock); 3211 return (ENOMEM); 3212 } 3213 3214 if (ilg == NULL) { 3215 ilgstat = ILGSTAT_NEW; 3216 if ((ilg = conn_ilg_alloc(connp)) == NULL) { 3217 mutex_exit(&connp->conn_lock); 3218 l_free(new_filter); 3219 return (ENOMEM); 3220 } 3221 if (src != INADDR_ANY) { 3222 ilg->ilg_filter = l_alloc(); 3223 if (ilg->ilg_filter == NULL) { 3224 ilg_delete(connp, ilg, NULL); 3225 mutex_exit(&connp->conn_lock); 3226 l_free(new_filter); 3227 return (ENOMEM); 3228 } 3229 ilg->ilg_filter->sl_numsrc = 1; 3230 IN6_IPADDR_TO_V4MAPPED(src, 3231 &ilg->ilg_filter->sl_addr[0]); 3232 } 3233 if (group == INADDR_ANY) { 3234 ilg->ilg_v6group = ipv6_all_zeros; 3235 } else { 3236 IN6_IPADDR_TO_V4MAPPED(group, &ilg->ilg_v6group); 3237 } 3238 ilg->ilg_ipif = ipif; 3239 ilg->ilg_ill = NULL; 3240 ilg->ilg_orig_ifindex = 0; 3241 ilg->ilg_fmode = fmode; 3242 } else { 3243 int index; 3244 in6_addr_t v6src; 3245 ilgstat = ILGSTAT_CHANGE; 3246 if (ilg->ilg_fmode != fmode || src == INADDR_ANY) { 3247 mutex_exit(&connp->conn_lock); 3248 l_free(new_filter); 3249 return (EINVAL); 3250 } 3251 if (ilg->ilg_filter == NULL) { 3252 ilg->ilg_filter = l_alloc(); 3253 if (ilg->ilg_filter == NULL) { 3254 mutex_exit(&connp->conn_lock); 3255 l_free(new_filter); 3256 return (ENOMEM); 3257 } 3258 } 3259 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 3260 if (list_has_addr(ilg->ilg_filter, &v6src)) { 3261 mutex_exit(&connp->conn_lock); 3262 l_free(new_filter); 3263 return (EADDRNOTAVAIL); 3264 } 3265 index = ilg->ilg_filter->sl_numsrc++; 3266 ilg->ilg_filter->sl_addr[index] = v6src; 3267 } 3268 3269 /* 3270 * Save copy of ilg's filter state to pass to other functions, 3271 * so we can release conn_lock now. 3272 */ 3273 new_fmode = ilg->ilg_fmode; 3274 l_copy(ilg->ilg_filter, new_filter); 3275 3276 mutex_exit(&connp->conn_lock); 3277 3278 error = ip_addmulti(group, ipif, ilgstat, new_fmode, new_filter); 3279 if (error != 0) { 3280 /* 3281 * Need to undo what we did before calling ip_addmulti()! 3282 * Must look up the ilg again since we've not been holding 3283 * conn_lock. 3284 */ 3285 in6_addr_t v6src; 3286 if (ilgstat == ILGSTAT_NEW) 3287 v6src = ipv6_all_zeros; 3288 else 3289 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 3290 mutex_enter(&connp->conn_lock); 3291 ilg = ilg_lookup_ipif(connp, group, ipif); 3292 ASSERT(ilg != NULL); 3293 ilg_delete(connp, ilg, &v6src); 3294 mutex_exit(&connp->conn_lock); 3295 l_free(new_filter); 3296 return (error); 3297 } 3298 3299 l_free(new_filter); 3300 return (0); 3301 } 3302 3303 static int 3304 ilg_add_v6(conn_t *connp, const in6_addr_t *v6group, ill_t *ill, 3305 mcast_record_t fmode, const in6_addr_t *v6src) 3306 { 3307 int error = 0; 3308 int orig_ifindex; 3309 ilg_t *ilg; 3310 ilg_stat_t ilgstat; 3311 slist_t *new_filter = NULL; 3312 int new_fmode; 3313 3314 ASSERT(IAM_WRITER_ILL(ill)); 3315 3316 if (!(ill->ill_flags & ILLF_MULTICAST)) 3317 return (EADDRNOTAVAIL); 3318 3319 /* 3320 * conn_lock protects the ilg list. Serializes 2 threads doing 3321 * join (sock, group1, hme0) and (sock, group2, hme1) where hme0 3322 * and hme1 map to different ipsq's, but both operations happen 3323 * on the same conn. 3324 */ 3325 mutex_enter(&connp->conn_lock); 3326 3327 /* 3328 * Use the ifindex to do the lookup. We can't use the ill 3329 * directly because ilg_ill could point to a different ill if 3330 * things have moved. 3331 */ 3332 orig_ifindex = ill->ill_phyint->phyint_ifindex; 3333 ilg = ilg_lookup_ill_index_v6(connp, v6group, orig_ifindex); 3334 3335 /* 3336 * Depending on the option we're handling, may or may not be okay 3337 * if group has already been added. Figure out our rules based 3338 * on fmode and src params. Also make sure there's enough room 3339 * in the filter if we're adding a source to an existing filter. 3340 */ 3341 if (IN6_IS_ADDR_UNSPECIFIED(v6src)) { 3342 /* we're joining for all sources, must not have joined */ 3343 if (ilg != NULL) 3344 error = EADDRINUSE; 3345 } else { 3346 if (fmode == MODE_IS_EXCLUDE) { 3347 /* (excl {addr}) => block source, must have joined */ 3348 if (ilg == NULL) 3349 error = EADDRNOTAVAIL; 3350 } 3351 /* (incl {addr}) => join source, may have joined */ 3352 3353 if (ilg != NULL && 3354 SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE) 3355 error = ENOBUFS; 3356 } 3357 if (error != 0) { 3358 mutex_exit(&connp->conn_lock); 3359 return (error); 3360 } 3361 3362 /* 3363 * Alloc buffer to copy new state into (see below) before 3364 * we make any changes, so we can bail if it fails. 3365 */ 3366 if ((new_filter = l_alloc()) == NULL) { 3367 mutex_exit(&connp->conn_lock); 3368 return (ENOMEM); 3369 } 3370 3371 if (ilg == NULL) { 3372 if ((ilg = conn_ilg_alloc(connp)) == NULL) { 3373 mutex_exit(&connp->conn_lock); 3374 l_free(new_filter); 3375 return (ENOMEM); 3376 } 3377 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 3378 ilg->ilg_filter = l_alloc(); 3379 if (ilg->ilg_filter == NULL) { 3380 ilg_delete(connp, ilg, NULL); 3381 mutex_exit(&connp->conn_lock); 3382 l_free(new_filter); 3383 return (ENOMEM); 3384 } 3385 ilg->ilg_filter->sl_numsrc = 1; 3386 ilg->ilg_filter->sl_addr[0] = *v6src; 3387 } 3388 ilgstat = ILGSTAT_NEW; 3389 ilg->ilg_v6group = *v6group; 3390 ilg->ilg_fmode = fmode; 3391 ilg->ilg_ipif = NULL; 3392 /* 3393 * Choose our target ill to join on. This might be different 3394 * from the ill we've been given if it's currently down and 3395 * part of a group. 3396 * 3397 * new ill is not refheld; we are writer. 3398 */ 3399 ill = ip_choose_multi_ill(ill, v6group); 3400 ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED)); 3401 ilg->ilg_ill = ill; 3402 /* 3403 * Remember the orig_ifindex that we joined on, so that we 3404 * can successfully delete them later on and also search 3405 * for duplicates if the application wants to join again. 3406 */ 3407 ilg->ilg_orig_ifindex = orig_ifindex; 3408 } else { 3409 int index; 3410 if (ilg->ilg_fmode != fmode || IN6_IS_ADDR_UNSPECIFIED(v6src)) { 3411 mutex_exit(&connp->conn_lock); 3412 l_free(new_filter); 3413 return (EINVAL); 3414 } 3415 if (ilg->ilg_filter == NULL) { 3416 ilg->ilg_filter = l_alloc(); 3417 if (ilg->ilg_filter == NULL) { 3418 mutex_exit(&connp->conn_lock); 3419 l_free(new_filter); 3420 return (ENOMEM); 3421 } 3422 } 3423 if (list_has_addr(ilg->ilg_filter, v6src)) { 3424 mutex_exit(&connp->conn_lock); 3425 l_free(new_filter); 3426 return (EADDRNOTAVAIL); 3427 } 3428 ilgstat = ILGSTAT_CHANGE; 3429 index = ilg->ilg_filter->sl_numsrc++; 3430 ilg->ilg_filter->sl_addr[index] = *v6src; 3431 /* 3432 * The current ill might be different from the one we were 3433 * asked to join on (if failover has occurred); we should 3434 * join on the ill stored in the ilg. The original ill 3435 * is noted in ilg_orig_ifindex, which matched our request. 3436 */ 3437 ill = ilg->ilg_ill; 3438 } 3439 3440 /* 3441 * Save copy of ilg's filter state to pass to other functions, 3442 * so we can release conn_lock now. 3443 */ 3444 new_fmode = ilg->ilg_fmode; 3445 l_copy(ilg->ilg_filter, new_filter); 3446 3447 mutex_exit(&connp->conn_lock); 3448 3449 /* 3450 * Now update the ill. We wait to do this until after the ilg 3451 * has been updated because we need to update the src filter 3452 * info for the ill, which involves looking at the status of 3453 * all the ilgs associated with this group/interface pair. 3454 */ 3455 error = ip_addmulti_v6(v6group, ill, orig_ifindex, connp->conn_zoneid, 3456 ilgstat, new_fmode, new_filter); 3457 if (error != 0) { 3458 /* 3459 * But because we waited, we have to undo the ilg update 3460 * if ip_addmulti_v6() fails. We also must lookup ilg 3461 * again, since we've not been holding conn_lock. 3462 */ 3463 in6_addr_t delsrc = 3464 (ilgstat == ILGSTAT_NEW) ? ipv6_all_zeros : *v6src; 3465 mutex_enter(&connp->conn_lock); 3466 ilg = ilg_lookup_ill_index_v6(connp, v6group, orig_ifindex); 3467 ASSERT(ilg != NULL); 3468 ilg_delete(connp, ilg, &delsrc); 3469 mutex_exit(&connp->conn_lock); 3470 l_free(new_filter); 3471 return (error); 3472 } 3473 3474 l_free(new_filter); 3475 3476 return (0); 3477 } 3478 3479 /* 3480 * Find an IPv4 ilg matching group, ill and source 3481 */ 3482 ilg_t * 3483 ilg_lookup_ill_withsrc(conn_t *connp, ipaddr_t group, ipaddr_t src, ill_t *ill) 3484 { 3485 in6_addr_t v6group, v6src; 3486 int i; 3487 boolean_t isinlist; 3488 ilg_t *ilg; 3489 ipif_t *ipif; 3490 ill_t *ilg_ill; 3491 3492 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3493 3494 /* 3495 * INADDR_ANY is represented as the IPv6 unspecified addr. 3496 */ 3497 if (group == INADDR_ANY) 3498 v6group = ipv6_all_zeros; 3499 else 3500 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 3501 3502 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3503 /* ilg_ipif is NULL for v6; skip them */ 3504 ilg = &connp->conn_ilg[i]; 3505 if ((ipif = ilg->ilg_ipif) == NULL) 3506 continue; 3507 ASSERT(ilg->ilg_ill == NULL); 3508 ilg_ill = ipif->ipif_ill; 3509 ASSERT(!ilg_ill->ill_isv6); 3510 if (ilg_ill == ill && 3511 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, &v6group)) { 3512 if (SLIST_IS_EMPTY(ilg->ilg_filter)) { 3513 /* no source filter, so this is a match */ 3514 return (ilg); 3515 } 3516 break; 3517 } 3518 } 3519 if (i == connp->conn_ilg_inuse) 3520 return (NULL); 3521 3522 /* 3523 * we have an ilg with matching ill and group; but 3524 * the ilg has a source list that we must check. 3525 */ 3526 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 3527 isinlist = B_FALSE; 3528 for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) { 3529 if (IN6_ARE_ADDR_EQUAL(&v6src, &ilg->ilg_filter->sl_addr[i])) { 3530 isinlist = B_TRUE; 3531 break; 3532 } 3533 } 3534 3535 if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) || 3536 (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE)) 3537 return (ilg); 3538 3539 return (NULL); 3540 } 3541 3542 /* 3543 * Find an IPv6 ilg matching group, ill, and source 3544 */ 3545 ilg_t * 3546 ilg_lookup_ill_withsrc_v6(conn_t *connp, const in6_addr_t *v6group, 3547 const in6_addr_t *v6src, ill_t *ill) 3548 { 3549 int i; 3550 boolean_t isinlist; 3551 ilg_t *ilg; 3552 ill_t *ilg_ill; 3553 3554 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3555 3556 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3557 ilg = &connp->conn_ilg[i]; 3558 if ((ilg_ill = ilg->ilg_ill) == NULL) 3559 continue; 3560 ASSERT(ilg->ilg_ipif == NULL); 3561 ASSERT(ilg_ill->ill_isv6); 3562 if (ilg_ill == ill && 3563 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) { 3564 if (SLIST_IS_EMPTY(ilg->ilg_filter)) { 3565 /* no source filter, so this is a match */ 3566 return (ilg); 3567 } 3568 break; 3569 } 3570 } 3571 if (i == connp->conn_ilg_inuse) 3572 return (NULL); 3573 3574 /* 3575 * we have an ilg with matching ill and group; but 3576 * the ilg has a source list that we must check. 3577 */ 3578 isinlist = B_FALSE; 3579 for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) { 3580 if (IN6_ARE_ADDR_EQUAL(v6src, &ilg->ilg_filter->sl_addr[i])) { 3581 isinlist = B_TRUE; 3582 break; 3583 } 3584 } 3585 3586 if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) || 3587 (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE)) 3588 return (ilg); 3589 3590 return (NULL); 3591 } 3592 3593 /* 3594 * Get the ilg whose ilg_orig_ifindex is associated with ifindex. 3595 * This is useful when the interface fails and we have moved 3596 * to a new ill, but still would like to locate using the index 3597 * that we originally used to join. Used only for IPv6 currently. 3598 */ 3599 static ilg_t * 3600 ilg_lookup_ill_index_v6(conn_t *connp, const in6_addr_t *v6group, int ifindex) 3601 { 3602 ilg_t *ilg; 3603 int i; 3604 3605 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3606 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3607 ilg = &connp->conn_ilg[i]; 3608 /* ilg_ill is NULL for V4. Skip them */ 3609 if (ilg->ilg_ill == NULL) 3610 continue; 3611 /* ilg_ipif is NULL for V6 */ 3612 ASSERT(ilg->ilg_ipif == NULL); 3613 ASSERT(ilg->ilg_orig_ifindex != 0); 3614 if (IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group) && 3615 ilg->ilg_orig_ifindex == ifindex) { 3616 return (ilg); 3617 } 3618 } 3619 return (NULL); 3620 } 3621 3622 /* 3623 * Find an IPv6 ilg matching group and ill 3624 */ 3625 ilg_t * 3626 ilg_lookup_ill_v6(conn_t *connp, const in6_addr_t *v6group, ill_t *ill) 3627 { 3628 ilg_t *ilg; 3629 int i; 3630 ill_t *mem_ill; 3631 3632 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3633 3634 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3635 ilg = &connp->conn_ilg[i]; 3636 if ((mem_ill = ilg->ilg_ill) == NULL) 3637 continue; 3638 ASSERT(ilg->ilg_ipif == NULL); 3639 ASSERT(mem_ill->ill_isv6); 3640 if (mem_ill == ill && 3641 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) 3642 return (ilg); 3643 } 3644 return (NULL); 3645 } 3646 3647 /* 3648 * Find an IPv4 ilg matching group and ipif 3649 */ 3650 static ilg_t * 3651 ilg_lookup_ipif(conn_t *connp, ipaddr_t group, ipif_t *ipif) 3652 { 3653 in6_addr_t v6group; 3654 int i; 3655 3656 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3657 ASSERT(!ipif->ipif_ill->ill_isv6); 3658 3659 if (group == INADDR_ANY) 3660 v6group = ipv6_all_zeros; 3661 else 3662 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 3663 3664 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3665 if (IN6_ARE_ADDR_EQUAL(&connp->conn_ilg[i].ilg_v6group, 3666 &v6group) && 3667 connp->conn_ilg[i].ilg_ipif == ipif) 3668 return (&connp->conn_ilg[i]); 3669 } 3670 return (NULL); 3671 } 3672 3673 /* 3674 * If a source address is passed in (src != NULL and src is not 3675 * unspecified), remove the specified src addr from the given ilg's 3676 * filter list, else delete the ilg. 3677 */ 3678 static void 3679 ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src) 3680 { 3681 int i; 3682 3683 ASSERT((ilg->ilg_ipif != NULL) ^ (ilg->ilg_ill != NULL)); 3684 ASSERT(ilg->ilg_ipif == NULL || IAM_WRITER_IPIF(ilg->ilg_ipif)); 3685 ASSERT(ilg->ilg_ill == NULL || IAM_WRITER_ILL(ilg->ilg_ill)); 3686 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3687 ASSERT(!(ilg->ilg_flags & ILG_DELETED)); 3688 3689 if (src == NULL || IN6_IS_ADDR_UNSPECIFIED(src)) { 3690 if (connp->conn_ilg_walker_cnt != 0) { 3691 ilg->ilg_flags |= ILG_DELETED; 3692 return; 3693 } 3694 3695 FREE_SLIST(ilg->ilg_filter); 3696 3697 i = ilg - &connp->conn_ilg[0]; 3698 ASSERT(i >= 0 && i < connp->conn_ilg_inuse); 3699 3700 /* Move other entries up one step */ 3701 connp->conn_ilg_inuse--; 3702 for (; i < connp->conn_ilg_inuse; i++) 3703 connp->conn_ilg[i] = connp->conn_ilg[i+1]; 3704 3705 if (connp->conn_ilg_inuse == 0) { 3706 mi_free((char *)connp->conn_ilg); 3707 connp->conn_ilg = NULL; 3708 cv_broadcast(&connp->conn_refcv); 3709 } 3710 } else { 3711 l_remove(ilg->ilg_filter, src); 3712 } 3713 } 3714 3715 /* 3716 * Called from conn close. No new ilg can be added or removed. 3717 * because CONN_CLOSING has been set by ip_close. ilg_add / ilg_delete 3718 * will return error if conn has started closing. 3719 */ 3720 void 3721 ilg_delete_all(conn_t *connp) 3722 { 3723 int i; 3724 ipif_t *ipif = NULL; 3725 ill_t *ill = NULL; 3726 ilg_t *ilg; 3727 in6_addr_t v6group; 3728 boolean_t success; 3729 ipsq_t *ipsq; 3730 int orig_ifindex; 3731 3732 mutex_enter(&connp->conn_lock); 3733 retry: 3734 ILG_WALKER_HOLD(connp); 3735 for (i = connp->conn_ilg_inuse - 1; i >= 0; ) { 3736 ilg = &connp->conn_ilg[i]; 3737 /* 3738 * Since this walk is not atomic (we drop the 3739 * conn_lock and wait in ipsq_enter) we need 3740 * to check for the ILG_DELETED flag. 3741 */ 3742 if (ilg->ilg_flags & ILG_DELETED) { 3743 /* Go to the next ilg */ 3744 i--; 3745 continue; 3746 } 3747 v6group = ilg->ilg_v6group; 3748 3749 if (IN6_IS_ADDR_V4MAPPED(&v6group)) { 3750 ipif = ilg->ilg_ipif; 3751 ill = ipif->ipif_ill; 3752 } else { 3753 ipif = NULL; 3754 ill = ilg->ilg_ill; 3755 } 3756 /* 3757 * We may not be able to refhold the ill if the ill/ipif 3758 * is changing. But we need to make sure that the ill will 3759 * not vanish. So we just bump up the ill_waiter count. 3760 * If we are unable to do even that, then the ill is closing, 3761 * in which case the unplumb thread will handle the cleanup, 3762 * and we move on to the next ilg. 3763 */ 3764 if (!ill_waiter_inc(ill)) { 3765 /* Go to the next ilg */ 3766 i--; 3767 continue; 3768 } 3769 mutex_exit(&connp->conn_lock); 3770 /* 3771 * To prevent deadlock between ill close which waits inside 3772 * the perimeter, and conn close, ipsq_enter returns error, 3773 * the moment ILL_CONDEMNED is set, in which case ill close 3774 * takes responsibility to cleanup the ilgs. Note that we 3775 * have not yet set condemned flag, otherwise the conn can't 3776 * be refheld for cleanup by those routines and it would be 3777 * a mutual deadlock. 3778 */ 3779 success = ipsq_enter(ill, B_FALSE); 3780 ipsq = ill->ill_phyint->phyint_ipsq; 3781 ill_waiter_dcr(ill); 3782 mutex_enter(&connp->conn_lock); 3783 if (!success) { 3784 /* Go to the next ilg */ 3785 i--; 3786 continue; 3787 } 3788 3789 /* 3790 * Make sure that nothing has changed under. For eg. 3791 * a failover/failback can change ilg_ill while we were 3792 * waiting to become exclusive above 3793 */ 3794 if (IN6_IS_ADDR_V4MAPPED(&v6group)) { 3795 ipif = ilg->ilg_ipif; 3796 ill = ipif->ipif_ill; 3797 } else { 3798 ipif = NULL; 3799 ill = ilg->ilg_ill; 3800 } 3801 if (!IAM_WRITER_ILL(ill) || (ilg->ilg_flags & ILG_DELETED)) { 3802 /* 3803 * The ilg has changed under us probably due 3804 * to a failover or unplumb. Retry on the same ilg. 3805 */ 3806 mutex_exit(&connp->conn_lock); 3807 ipsq_exit(ipsq, B_TRUE, B_TRUE); 3808 mutex_enter(&connp->conn_lock); 3809 continue; 3810 } 3811 v6group = ilg->ilg_v6group; 3812 orig_ifindex = ilg->ilg_orig_ifindex; 3813 ilg_delete(connp, ilg, NULL); 3814 mutex_exit(&connp->conn_lock); 3815 3816 if (ipif != NULL) 3817 (void) ip_delmulti(V4_PART_OF_V6(v6group), ipif, 3818 B_FALSE, B_TRUE); 3819 3820 else 3821 (void) ip_delmulti_v6(&v6group, ill, orig_ifindex, 3822 connp->conn_zoneid, B_FALSE, B_TRUE); 3823 3824 ipsq_exit(ipsq, B_TRUE, B_TRUE); 3825 mutex_enter(&connp->conn_lock); 3826 /* Go to the next ilg */ 3827 i--; 3828 } 3829 ILG_WALKER_RELE(connp); 3830 3831 /* If any ill was skipped above wait and retry */ 3832 if (connp->conn_ilg_inuse != 0) { 3833 cv_wait(&connp->conn_refcv, &connp->conn_lock); 3834 goto retry; 3835 } 3836 mutex_exit(&connp->conn_lock); 3837 } 3838 3839 /* 3840 * Called from ill close by ipcl_walk for clearing conn_ilg and 3841 * conn_multicast_ipif for a given ipif. conn is held by caller. 3842 * Note that ipcl_walk only walks conns that are not yet condemned. 3843 * condemned conns can't be refheld. For this reason, conn must become clean 3844 * first, i.e. it must not refer to any ill/ire/ipif and then only set 3845 * condemned flag. 3846 */ 3847 static void 3848 conn_delete_ipif(conn_t *connp, caddr_t arg) 3849 { 3850 ipif_t *ipif = (ipif_t *)arg; 3851 int i; 3852 char group_buf1[INET6_ADDRSTRLEN]; 3853 char group_buf2[INET6_ADDRSTRLEN]; 3854 ipaddr_t group; 3855 ilg_t *ilg; 3856 3857 /* 3858 * Even though conn_ilg_inuse can change while we are in this loop, 3859 * i.e.ilgs can be created or deleted on this connp, no new ilgs can 3860 * be created or deleted for this connp, on this ill, since this ill 3861 * is the perimeter. So we won't miss any ilg in this cleanup. 3862 */ 3863 mutex_enter(&connp->conn_lock); 3864 3865 /* 3866 * Increment the walker count, so that ilg repacking does not 3867 * occur while we are in the loop. 3868 */ 3869 ILG_WALKER_HOLD(connp); 3870 for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) { 3871 ilg = &connp->conn_ilg[i]; 3872 if (ilg->ilg_ipif != ipif || (ilg->ilg_flags & ILG_DELETED)) 3873 continue; 3874 /* 3875 * ip_close cannot be cleaning this ilg at the same time. 3876 * since it also has to execute in this ill's perimeter which 3877 * we are now holding. Only a clean conn can be condemned. 3878 */ 3879 ASSERT(!(connp->conn_state_flags & CONN_CONDEMNED)); 3880 3881 /* Blow away the membership */ 3882 ip1dbg(("conn_delete_ilg_ipif: %s on %s (%s)\n", 3883 inet_ntop(AF_INET6, &connp->conn_ilg[i].ilg_v6group, 3884 group_buf1, sizeof (group_buf1)), 3885 inet_ntop(AF_INET6, &ipif->ipif_v6lcl_addr, 3886 group_buf2, sizeof (group_buf2)), 3887 ipif->ipif_ill->ill_name)); 3888 3889 /* ilg_ipif is NULL for V6, so we won't be here */ 3890 ASSERT(IN6_IS_ADDR_V4MAPPED(&ilg->ilg_v6group)); 3891 3892 group = V4_PART_OF_V6(ilg->ilg_v6group); 3893 ilg_delete(connp, &connp->conn_ilg[i], NULL); 3894 mutex_exit(&connp->conn_lock); 3895 3896 (void) ip_delmulti(group, ipif, B_FALSE, B_TRUE); 3897 mutex_enter(&connp->conn_lock); 3898 } 3899 3900 /* 3901 * If we are the last walker, need to physically delete the 3902 * ilgs and repack. 3903 */ 3904 ILG_WALKER_RELE(connp); 3905 3906 if (connp->conn_multicast_ipif == ipif) { 3907 /* Revert to late binding */ 3908 connp->conn_multicast_ipif = NULL; 3909 } 3910 mutex_exit(&connp->conn_lock); 3911 3912 conn_delete_ire(connp, (caddr_t)ipif); 3913 } 3914 3915 /* 3916 * Called from ill close by ipcl_walk for clearing conn_ilg and 3917 * conn_multicast_ill for a given ill. conn is held by caller. 3918 * Note that ipcl_walk only walks conns that are not yet condemned. 3919 * condemned conns can't be refheld. For this reason, conn must become clean 3920 * first, i.e. it must not refer to any ill/ire/ipif and then only set 3921 * condemned flag. 3922 */ 3923 static void 3924 conn_delete_ill(conn_t *connp, caddr_t arg) 3925 { 3926 ill_t *ill = (ill_t *)arg; 3927 int i; 3928 char group_buf[INET6_ADDRSTRLEN]; 3929 in6_addr_t v6group; 3930 int orig_ifindex; 3931 ilg_t *ilg; 3932 3933 /* 3934 * Even though conn_ilg_inuse can change while we are in this loop, 3935 * no new ilgs can be created/deleted for this connp, on this 3936 * ill, since this ill is the perimeter. So we won't miss any ilg 3937 * in this cleanup. 3938 */ 3939 mutex_enter(&connp->conn_lock); 3940 3941 /* 3942 * Increment the walker count, so that ilg repacking does not 3943 * occur while we are in the loop. 3944 */ 3945 ILG_WALKER_HOLD(connp); 3946 for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) { 3947 ilg = &connp->conn_ilg[i]; 3948 if ((ilg->ilg_ill == ill) && !(ilg->ilg_flags & ILG_DELETED)) { 3949 /* 3950 * ip_close cannot be cleaning this ilg at the same 3951 * time, since it also has to execute in this ill's 3952 * perimeter which we are now holding. Only a clean 3953 * conn can be condemned. 3954 */ 3955 ASSERT(!(connp->conn_state_flags & CONN_CONDEMNED)); 3956 3957 /* Blow away the membership */ 3958 ip1dbg(("conn_delete_ilg_ill: %s on %s\n", 3959 inet_ntop(AF_INET6, &ilg->ilg_v6group, 3960 group_buf, sizeof (group_buf)), 3961 ill->ill_name)); 3962 3963 v6group = ilg->ilg_v6group; 3964 orig_ifindex = ilg->ilg_orig_ifindex; 3965 ilg_delete(connp, ilg, NULL); 3966 mutex_exit(&connp->conn_lock); 3967 3968 (void) ip_delmulti_v6(&v6group, ill, orig_ifindex, 3969 connp->conn_zoneid, B_FALSE, B_TRUE); 3970 mutex_enter(&connp->conn_lock); 3971 } 3972 } 3973 /* 3974 * If we are the last walker, need to physically delete the 3975 * ilgs and repack. 3976 */ 3977 ILG_WALKER_RELE(connp); 3978 3979 if (connp->conn_multicast_ill == ill) { 3980 /* Revert to late binding */ 3981 connp->conn_multicast_ill = NULL; 3982 connp->conn_orig_multicast_ifindex = 0; 3983 } 3984 mutex_exit(&connp->conn_lock); 3985 } 3986 3987 /* 3988 * Called when an ipif is unplumbed to make sure that there are no 3989 * dangling conn references to that ipif. 3990 * Handles ilg_ipif and conn_multicast_ipif 3991 */ 3992 void 3993 reset_conn_ipif(ipif) 3994 ipif_t *ipif; 3995 { 3996 ipcl_walk(conn_delete_ipif, (caddr_t)ipif); 3997 /* flush the SCTP ire cache for this ipif */ 3998 sctp_ire_cache_flush(ipif); 3999 } 4000 4001 /* 4002 * Called when an ill is unplumbed to make sure that there are no 4003 * dangling conn references to that ill. 4004 * Handles ilg_ill, conn_multicast_ill. 4005 */ 4006 void 4007 reset_conn_ill(ill_t *ill) 4008 { 4009 ipcl_walk(conn_delete_ill, (caddr_t)ill); 4010 } 4011 4012 #ifdef DEBUG 4013 /* 4014 * Walk functions walk all the interfaces in the system to make 4015 * sure that there is no refernece to the ipif or ill that is 4016 * going away. 4017 */ 4018 int 4019 ilm_walk_ill(ill_t *ill) 4020 { 4021 int cnt = 0; 4022 ill_t *till; 4023 ilm_t *ilm; 4024 ill_walk_context_t ctx; 4025 4026 rw_enter(&ill_g_lock, RW_READER); 4027 till = ILL_START_WALK_ALL(&ctx); 4028 for (; till != NULL; till = ill_next(&ctx, till)) { 4029 for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 4030 if (ilm->ilm_ill == ill) { 4031 cnt++; 4032 } 4033 } 4034 } 4035 rw_exit(&ill_g_lock); 4036 4037 return (cnt); 4038 } 4039 4040 /* 4041 * This function is called before the ipif is freed. 4042 */ 4043 int 4044 ilm_walk_ipif(ipif_t *ipif) 4045 { 4046 int cnt = 0; 4047 ill_t *till; 4048 ilm_t *ilm; 4049 ill_walk_context_t ctx; 4050 4051 till = ILL_START_WALK_ALL(&ctx); 4052 for (; till != NULL; till = ill_next(&ctx, till)) { 4053 for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 4054 if (ilm->ilm_ipif == ipif) { 4055 cnt++; 4056 } 4057 } 4058 } 4059 return (cnt); 4060 } 4061 #endif 4062