1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/stream.h> 31 #include <sys/dlpi.h> 32 #include <sys/stropts.h> 33 #include <sys/strsun.h> 34 #include <sys/ddi.h> 35 #include <sys/cmn_err.h> 36 #include <sys/sdt.h> 37 #include <sys/zone.h> 38 39 #include <sys/param.h> 40 #include <sys/socket.h> 41 #include <sys/sockio.h> 42 #include <net/if.h> 43 #include <sys/systm.h> 44 #include <net/route.h> 45 #include <netinet/in.h> 46 #include <net/if_dl.h> 47 #include <netinet/ip6.h> 48 #include <netinet/icmp6.h> 49 50 #include <inet/common.h> 51 #include <inet/mi.h> 52 #include <inet/nd.h> 53 #include <inet/arp.h> 54 #include <inet/ip.h> 55 #include <inet/ip6.h> 56 #include <inet/ip_if.h> 57 #include <inet/ip_ndp.h> 58 #include <inet/ip_multi.h> 59 #include <inet/ipclassifier.h> 60 #include <inet/ipsec_impl.h> 61 #include <inet/sctp_ip.h> 62 #include <inet/ip_listutils.h> 63 #include <inet/udp_impl.h> 64 65 /* igmpv3/mldv2 source filter manipulation */ 66 static void ilm_bld_flists(conn_t *conn, void *arg); 67 static void ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, 68 slist_t *flist); 69 70 static ilm_t *ilm_add_v6(ipif_t *ipif, const in6_addr_t *group, 71 ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist, 72 int orig_ifindex, zoneid_t zoneid); 73 static void ilm_delete(ilm_t *ilm); 74 static int ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *group); 75 static int ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *group); 76 static ilg_t *ilg_lookup_ill_index_v6(conn_t *connp, 77 const in6_addr_t *v6group, int index); 78 static ilg_t *ilg_lookup_ipif(conn_t *connp, ipaddr_t group, 79 ipif_t *ipif); 80 static int ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif, 81 mcast_record_t fmode, ipaddr_t src); 82 static int ilg_add_v6(conn_t *connp, const in6_addr_t *group, ill_t *ill, 83 mcast_record_t fmode, const in6_addr_t *v6src); 84 static void ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src); 85 static mblk_t *ill_create_dl(ill_t *ill, uint32_t dl_primitive, 86 uint32_t length, uint32_t *addr_lenp, uint32_t *addr_offp); 87 static mblk_t *ill_create_squery(ill_t *ill, ipaddr_t ipaddr, 88 uint32_t addrlen, uint32_t addroff, mblk_t *mp_tail); 89 static void conn_ilg_reap(conn_t *connp); 90 static int ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group, 91 ipif_t *ipif, mcast_record_t fmode, ipaddr_t src); 92 static int ip_opt_delete_group_excl_v6(conn_t *connp, 93 const in6_addr_t *v6group, ill_t *ill, mcast_record_t fmode, 94 const in6_addr_t *v6src); 95 96 /* 97 * MT notes: 98 * 99 * Multicast joins operate on both the ilg and ilm structures. Multiple 100 * threads operating on an conn (socket) trying to do multicast joins 101 * need to synchronize when operating on the ilg. Multiple threads 102 * potentially operating on different conn (socket endpoints) trying to 103 * do multicast joins could eventually end up trying to manipulate the 104 * ilm simulatenously and need to synchronize on the access to the ilm. 105 * Both are amenable to standard Solaris MT techniques, but it would be 106 * complex to handle a failover or failback which needs to manipulate 107 * ilg/ilms if an applications can also simultaenously join/leave 108 * multicast groups. Hence multicast join/leave also go through the ipsq_t 109 * serialization. 110 * 111 * Multicast joins and leaves are single-threaded per phyint/IPMP group 112 * using the ipsq serialization mechanism. 113 * 114 * An ilm is an IP data structure used to track multicast join/leave. 115 * An ilm is associated with a <multicast group, ipif> tuple in IPv4 and 116 * with just <multicast group> in IPv6. ilm_refcnt is the number of ilg's 117 * referencing the ilm. ilms are created / destroyed only as writer. ilms 118 * are not passed around, instead they are looked up and used under the 119 * ill_lock or as writer. So we don't need a dynamic refcount of the number 120 * of threads holding reference to an ilm. 121 * 122 * Multicast Join operation: 123 * 124 * The first step is to determine the ipif (v4) or ill (v6) on which 125 * the join operation is to be done. The join is done after becoming 126 * exclusive on the ipsq associated with the ipif or ill. The conn->conn_ilg 127 * and ill->ill_ilm are thus accessed and modified exclusively per ill. 128 * Multiple threads can attempt to join simultaneously on different ipif/ill 129 * on the same conn. In this case the ipsq serialization does not help in 130 * protecting the ilg. It is the conn_lock that is used to protect the ilg. 131 * The conn_lock also protects all the ilg_t members. 132 * 133 * Leave operation. 134 * 135 * Similar to the join operation, the first step is to determine the ipif 136 * or ill (v6) on which the leave operation is to be done. The leave operation 137 * is done after becoming exclusive on the ipsq associated with the ipif or ill. 138 * As with join ilg modification is done under the protection of the conn lock. 139 */ 140 141 #define IPSQ_ENTER_IPIF(ipif, connp, first_mp, func, ipsq, type) \ 142 ASSERT(connp != NULL); \ 143 (ipsq) = ipsq_try_enter((ipif), NULL, CONNP_TO_WQ(connp), \ 144 (first_mp), (func), (type), B_TRUE); \ 145 if ((ipsq) == NULL) { \ 146 ipif_refrele(ipif); \ 147 return (EINPROGRESS); \ 148 } 149 150 #define IPSQ_ENTER_ILL(ill, connp, first_mp, func, ipsq, type) \ 151 ASSERT(connp != NULL); \ 152 (ipsq) = ipsq_try_enter(NULL, ill, CONNP_TO_WQ(connp), \ 153 (first_mp), (func), (type), B_TRUE); \ 154 if ((ipsq) == NULL) { \ 155 ill_refrele(ill); \ 156 return (EINPROGRESS); \ 157 } 158 159 #define IPSQ_EXIT(ipsq) \ 160 if (ipsq != NULL) \ 161 ipsq_exit(ipsq, B_TRUE, B_TRUE); 162 163 #define ILG_WALKER_HOLD(connp) (connp)->conn_ilg_walker_cnt++ 164 165 #define ILG_WALKER_RELE(connp) \ 166 { \ 167 (connp)->conn_ilg_walker_cnt--; \ 168 if ((connp)->conn_ilg_walker_cnt == 0) \ 169 conn_ilg_reap(connp); \ 170 } 171 172 static void 173 conn_ilg_reap(conn_t *connp) 174 { 175 int to; 176 int from; 177 178 ASSERT(MUTEX_HELD(&connp->conn_lock)); 179 180 to = 0; 181 from = 0; 182 while (from < connp->conn_ilg_inuse) { 183 if (connp->conn_ilg[from].ilg_flags & ILG_DELETED) { 184 FREE_SLIST(connp->conn_ilg[from].ilg_filter); 185 from++; 186 continue; 187 } 188 if (to != from) 189 connp->conn_ilg[to] = connp->conn_ilg[from]; 190 to++; 191 from++; 192 } 193 194 connp->conn_ilg_inuse = to; 195 196 if (connp->conn_ilg_inuse == 0) { 197 mi_free((char *)connp->conn_ilg); 198 connp->conn_ilg = NULL; 199 cv_broadcast(&connp->conn_refcv); 200 } 201 } 202 203 #define GETSTRUCT(structure, number) \ 204 ((structure *)mi_zalloc(sizeof (structure) * (number))) 205 206 #define ILG_ALLOC_CHUNK 16 207 208 /* 209 * Returns a pointer to the next available ilg in conn_ilg. Allocs more 210 * buffers in size of ILG_ALLOC_CHUNK ilgs when needed, and updates conn's 211 * ilg tracking fields appropriately (conn_ilg_inuse reflects usage of the 212 * returned ilg). Returns NULL on failure (ENOMEM). 213 * 214 * Assumes connp->conn_lock is held. 215 */ 216 static ilg_t * 217 conn_ilg_alloc(conn_t *connp) 218 { 219 ilg_t *new; 220 int curcnt; 221 222 ASSERT(MUTEX_HELD(&connp->conn_lock)); 223 ASSERT(connp->conn_ilg_inuse <= connp->conn_ilg_allocated); 224 225 if (connp->conn_ilg == NULL) { 226 connp->conn_ilg = GETSTRUCT(ilg_t, ILG_ALLOC_CHUNK); 227 if (connp->conn_ilg == NULL) 228 return (NULL); 229 connp->conn_ilg_allocated = ILG_ALLOC_CHUNK; 230 connp->conn_ilg_inuse = 0; 231 } 232 if (connp->conn_ilg_inuse == connp->conn_ilg_allocated) { 233 curcnt = connp->conn_ilg_allocated; 234 new = GETSTRUCT(ilg_t, curcnt + ILG_ALLOC_CHUNK); 235 if (new == NULL) 236 return (NULL); 237 bcopy(connp->conn_ilg, new, sizeof (ilg_t) * curcnt); 238 mi_free((char *)connp->conn_ilg); 239 connp->conn_ilg = new; 240 connp->conn_ilg_allocated += ILG_ALLOC_CHUNK; 241 } 242 243 return (&connp->conn_ilg[connp->conn_ilg_inuse++]); 244 } 245 246 typedef struct ilm_fbld_s { 247 ilm_t *fbld_ilm; 248 int fbld_in_cnt; 249 int fbld_ex_cnt; 250 slist_t fbld_in; 251 slist_t fbld_ex; 252 boolean_t fbld_in_overflow; 253 } ilm_fbld_t; 254 255 static void 256 ilm_bld_flists(conn_t *conn, void *arg) 257 { 258 int i; 259 ilm_fbld_t *fbld = (ilm_fbld_t *)(arg); 260 ilm_t *ilm = fbld->fbld_ilm; 261 in6_addr_t *v6group = &ilm->ilm_v6addr; 262 263 if (conn->conn_ilg_inuse == 0) 264 return; 265 266 /* 267 * Since we can't break out of the ipcl_walk once started, we still 268 * have to look at every conn. But if we've already found one 269 * (EXCLUDE, NULL) list, there's no need to keep checking individual 270 * ilgs--that will be our state. 271 */ 272 if (fbld->fbld_ex_cnt > 0 && fbld->fbld_ex.sl_numsrc == 0) 273 return; 274 275 /* 276 * Check this conn's ilgs to see if any are interested in our 277 * ilm (group, interface match). If so, update the master 278 * include and exclude lists we're building in the fbld struct 279 * with this ilg's filter info. 280 */ 281 mutex_enter(&conn->conn_lock); 282 for (i = 0; i < conn->conn_ilg_inuse; i++) { 283 ilg_t *ilg = &conn->conn_ilg[i]; 284 if ((ilg->ilg_ill == ilm->ilm_ill) && 285 (ilg->ilg_ipif == ilm->ilm_ipif) && 286 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) { 287 if (ilg->ilg_fmode == MODE_IS_INCLUDE) { 288 fbld->fbld_in_cnt++; 289 if (!fbld->fbld_in_overflow) 290 l_union_in_a(&fbld->fbld_in, 291 ilg->ilg_filter, 292 &fbld->fbld_in_overflow); 293 } else { 294 fbld->fbld_ex_cnt++; 295 /* 296 * On the first exclude list, don't try to do 297 * an intersection, as the master exclude list 298 * is intentionally empty. If the master list 299 * is still empty on later iterations, that 300 * means we have at least one ilg with an empty 301 * exclude list, so that should be reflected 302 * when we take the intersection. 303 */ 304 if (fbld->fbld_ex_cnt == 1) { 305 if (ilg->ilg_filter != NULL) 306 l_copy(ilg->ilg_filter, 307 &fbld->fbld_ex); 308 } else { 309 l_intersection_in_a(&fbld->fbld_ex, 310 ilg->ilg_filter); 311 } 312 } 313 /* there will only be one match, so break now. */ 314 break; 315 } 316 } 317 mutex_exit(&conn->conn_lock); 318 } 319 320 static void 321 ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, slist_t *flist) 322 { 323 ilm_fbld_t fbld; 324 ip_stack_t *ipst = ilm->ilm_ipst; 325 326 fbld.fbld_ilm = ilm; 327 fbld.fbld_in_cnt = fbld.fbld_ex_cnt = 0; 328 fbld.fbld_in.sl_numsrc = fbld.fbld_ex.sl_numsrc = 0; 329 fbld.fbld_in_overflow = B_FALSE; 330 331 /* first, construct our master include and exclude lists */ 332 ipcl_walk(ilm_bld_flists, (caddr_t)&fbld, ipst); 333 334 /* now use those master lists to generate the interface filter */ 335 336 /* if include list overflowed, filter is (EXCLUDE, NULL) */ 337 if (fbld.fbld_in_overflow) { 338 *fmode = MODE_IS_EXCLUDE; 339 flist->sl_numsrc = 0; 340 return; 341 } 342 343 /* if nobody interested, interface filter is (INCLUDE, NULL) */ 344 if (fbld.fbld_in_cnt == 0 && fbld.fbld_ex_cnt == 0) { 345 *fmode = MODE_IS_INCLUDE; 346 flist->sl_numsrc = 0; 347 return; 348 } 349 350 /* 351 * If there are no exclude lists, then the interface filter 352 * is INCLUDE, with its filter list equal to fbld_in. A single 353 * exclude list makes the interface filter EXCLUDE, with its 354 * filter list equal to (fbld_ex - fbld_in). 355 */ 356 if (fbld.fbld_ex_cnt == 0) { 357 *fmode = MODE_IS_INCLUDE; 358 l_copy(&fbld.fbld_in, flist); 359 } else { 360 *fmode = MODE_IS_EXCLUDE; 361 l_difference(&fbld.fbld_ex, &fbld.fbld_in, flist); 362 } 363 } 364 365 /* 366 * If the given interface has failed, choose a new one to join on so 367 * that we continue to receive packets. ilg_orig_ifindex remembers 368 * what the application used to join on so that we know the ilg to 369 * delete even though we change the ill here. Callers will store the 370 * ilg returned from this function in ilg_ill. Thus when we receive 371 * a packet on ilg_ill, conn_wantpacket_v6 will deliver the packets. 372 * 373 * This function must be called as writer so we can walk the group 374 * list and examine flags without holding a lock. 375 */ 376 ill_t * 377 ip_choose_multi_ill(ill_t *ill, const in6_addr_t *grp) 378 { 379 ill_t *till; 380 ill_group_t *illgrp = ill->ill_group; 381 382 ASSERT(IAM_WRITER_ILL(ill)); 383 384 if (IN6_IS_ADDR_UNSPECIFIED(grp) || illgrp == NULL) 385 return (ill); 386 387 if ((ill->ill_phyint->phyint_flags & (PHYI_FAILED|PHYI_INACTIVE)) == 0) 388 return (ill); 389 390 till = illgrp->illgrp_ill; 391 while (till != NULL && 392 (till->ill_phyint->phyint_flags & (PHYI_FAILED|PHYI_INACTIVE))) { 393 till = till->ill_group_next; 394 } 395 if (till != NULL) 396 return (till); 397 398 return (ill); 399 } 400 401 static int 402 ilm_update_add(ilm_t *ilm, ilg_stat_t ilgstat, slist_t *ilg_flist, 403 boolean_t isv6) 404 { 405 mcast_record_t fmode; 406 slist_t *flist; 407 boolean_t fdefault; 408 char buf[INET6_ADDRSTRLEN]; 409 ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill; 410 411 /* 412 * There are several cases where the ilm's filter state 413 * defaults to (EXCLUDE, NULL): 414 * - we've had previous joins without associated ilgs 415 * - this join has no associated ilg 416 * - the ilg's filter state is (EXCLUDE, NULL) 417 */ 418 fdefault = (ilm->ilm_no_ilg_cnt > 0) || 419 (ilgstat == ILGSTAT_NONE) || SLIST_IS_EMPTY(ilg_flist); 420 421 /* attempt mallocs (if needed) before doing anything else */ 422 if ((flist = l_alloc()) == NULL) 423 return (ENOMEM); 424 if (!fdefault && ilm->ilm_filter == NULL) { 425 ilm->ilm_filter = l_alloc(); 426 if (ilm->ilm_filter == NULL) { 427 l_free(flist); 428 return (ENOMEM); 429 } 430 } 431 432 if (ilgstat != ILGSTAT_CHANGE) 433 ilm->ilm_refcnt++; 434 435 if (ilgstat == ILGSTAT_NONE) 436 ilm->ilm_no_ilg_cnt++; 437 438 /* 439 * Determine new filter state. If it's not the default 440 * (EXCLUDE, NULL), we must walk the conn list to find 441 * any ilgs interested in this group, and re-build the 442 * ilm filter. 443 */ 444 if (fdefault) { 445 fmode = MODE_IS_EXCLUDE; 446 flist->sl_numsrc = 0; 447 } else { 448 ilm_gen_filter(ilm, &fmode, flist); 449 } 450 451 /* make sure state actually changed; nothing to do if not. */ 452 if ((ilm->ilm_fmode == fmode) && 453 !lists_are_different(ilm->ilm_filter, flist)) { 454 l_free(flist); 455 return (0); 456 } 457 458 /* send the state change report */ 459 if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0) { 460 if (isv6) 461 mld_statechange(ilm, fmode, flist); 462 else 463 igmp_statechange(ilm, fmode, flist); 464 } 465 466 /* update the ilm state */ 467 ilm->ilm_fmode = fmode; 468 if (flist->sl_numsrc > 0) 469 l_copy(flist, ilm->ilm_filter); 470 else 471 CLEAR_SLIST(ilm->ilm_filter); 472 473 ip1dbg(("ilm_update: new if filter mode %d, group %s\n", ilm->ilm_fmode, 474 inet_ntop(AF_INET6, &ilm->ilm_v6addr, buf, sizeof (buf)))); 475 476 l_free(flist); 477 return (0); 478 } 479 480 static int 481 ilm_update_del(ilm_t *ilm, boolean_t isv6) 482 { 483 mcast_record_t fmode; 484 slist_t *flist; 485 ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill; 486 487 ip1dbg(("ilm_update_del: still %d left; updating state\n", 488 ilm->ilm_refcnt)); 489 490 if ((flist = l_alloc()) == NULL) 491 return (ENOMEM); 492 493 /* 494 * If present, the ilg in question has already either been 495 * updated or removed from our list; so all we need to do 496 * now is walk the list to update the ilm filter state. 497 * 498 * Skip the list walk if we have any no-ilg joins, which 499 * cause the filter state to revert to (EXCLUDE, NULL). 500 */ 501 if (ilm->ilm_no_ilg_cnt != 0) { 502 fmode = MODE_IS_EXCLUDE; 503 flist->sl_numsrc = 0; 504 } else { 505 ilm_gen_filter(ilm, &fmode, flist); 506 } 507 508 /* check to see if state needs to be updated */ 509 if ((ilm->ilm_fmode == fmode) && 510 (!lists_are_different(ilm->ilm_filter, flist))) { 511 l_free(flist); 512 return (0); 513 } 514 515 if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0) { 516 if (isv6) 517 mld_statechange(ilm, fmode, flist); 518 else 519 igmp_statechange(ilm, fmode, flist); 520 } 521 522 ilm->ilm_fmode = fmode; 523 if (flist->sl_numsrc > 0) { 524 if (ilm->ilm_filter == NULL) { 525 ilm->ilm_filter = l_alloc(); 526 if (ilm->ilm_filter == NULL) { 527 char buf[INET6_ADDRSTRLEN]; 528 ip1dbg(("ilm_update_del: failed to alloc ilm " 529 "filter; no source filtering for %s on %s", 530 inet_ntop(AF_INET6, &ilm->ilm_v6addr, 531 buf, sizeof (buf)), ill->ill_name)); 532 ilm->ilm_fmode = MODE_IS_EXCLUDE; 533 l_free(flist); 534 return (0); 535 } 536 } 537 l_copy(flist, ilm->ilm_filter); 538 } else { 539 CLEAR_SLIST(ilm->ilm_filter); 540 } 541 542 l_free(flist); 543 return (0); 544 } 545 546 /* 547 * INADDR_ANY means all multicast addresses. This is only used 548 * by the multicast router. 549 * INADDR_ANY is stored as IPv6 unspecified addr. 550 */ 551 int 552 ip_addmulti(ipaddr_t group, ipif_t *ipif, ilg_stat_t ilgstat, 553 mcast_record_t ilg_fmode, slist_t *ilg_flist) 554 { 555 ill_t *ill = ipif->ipif_ill; 556 ilm_t *ilm; 557 in6_addr_t v6group; 558 int ret; 559 560 ASSERT(IAM_WRITER_IPIF(ipif)); 561 562 if (!CLASSD(group) && group != INADDR_ANY) 563 return (EINVAL); 564 565 /* 566 * INADDR_ANY is represented as the IPv6 unspecifed addr. 567 */ 568 if (group == INADDR_ANY) 569 v6group = ipv6_all_zeros; 570 else 571 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 572 573 ilm = ilm_lookup_ipif(ipif, group); 574 if (ilm != NULL) 575 return (ilm_update_add(ilm, ilgstat, ilg_flist, B_FALSE)); 576 577 /* 578 * ilms are associated with ipifs in IPv4. It moves with the 579 * ipif if the ipif moves to a new ill when the interface 580 * fails. Thus we really don't check whether the ipif_ill 581 * has failed like in IPv6. If it has FAILED the ipif 582 * will move (daemon will move it) and hence the ilm, if the 583 * ipif is not IPIF_NOFAILOVER. For the IPIF_NOFAILOVER ipifs, 584 * we continue to receive in the same place even if the 585 * interface fails. 586 */ 587 ilm = ilm_add_v6(ipif, &v6group, ilgstat, ilg_fmode, ilg_flist, 588 ill->ill_phyint->phyint_ifindex, ipif->ipif_zoneid); 589 if (ilm == NULL) 590 return (ENOMEM); 591 592 if (group == INADDR_ANY) { 593 /* 594 * Check how many ipif's have members in this group - 595 * if more then one we should not tell the driver to join 596 * this time 597 */ 598 if (ilm_numentries_v6(ill, &v6group) > 1) 599 return (0); 600 if (ill->ill_group == NULL) 601 ret = ip_join_allmulti(ipif); 602 else 603 ret = ill_nominate_mcast_rcv(ill->ill_group); 604 if (ret != 0) 605 ilm_delete(ilm); 606 return (ret); 607 } 608 609 if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0) 610 igmp_joingroup(ilm); 611 612 if (ilm_numentries_v6(ill, &v6group) > 1) 613 return (0); 614 615 ret = ip_ll_addmulti_v6(ipif, &v6group); 616 if (ret != 0) 617 ilm_delete(ilm); 618 return (ret); 619 } 620 621 /* 622 * The unspecified address means all multicast addresses. 623 * This is only used by the multicast router. 624 * 625 * ill identifies the interface to join on; it may not match the 626 * interface requested by the application of a failover has taken 627 * place. orig_ifindex always identifies the interface requested 628 * by the app. 629 * 630 * ilgstat tells us if there's an ilg associated with this join, 631 * and if so, if it's a new ilg or a change to an existing one. 632 * ilg_fmode and ilg_flist give us the current filter state of 633 * the ilg (and will be EXCLUDE {NULL} in the case of no ilg). 634 */ 635 int 636 ip_addmulti_v6(const in6_addr_t *v6group, ill_t *ill, int orig_ifindex, 637 zoneid_t zoneid, ilg_stat_t ilgstat, mcast_record_t ilg_fmode, 638 slist_t *ilg_flist) 639 { 640 ilm_t *ilm; 641 int ret; 642 643 ASSERT(IAM_WRITER_ILL(ill)); 644 645 if (!IN6_IS_ADDR_MULTICAST(v6group) && 646 !IN6_IS_ADDR_UNSPECIFIED(v6group)) { 647 return (EINVAL); 648 } 649 650 /* 651 * An ilm is uniquely identified by the tuple of (group, ill, 652 * orig_ill). group is the multicast group address, ill is 653 * the interface on which it is currently joined, and orig_ill 654 * is the interface on which the application requested the 655 * join. orig_ill and ill are the same unless orig_ill has 656 * failed over. 657 * 658 * Both orig_ill and ill are required, which means we may have 659 * 2 ilms on an ill for the same group, but with different 660 * orig_ills. These must be kept separate, so that when failback 661 * occurs, the appropriate ilms are moved back to their orig_ill 662 * without disrupting memberships on the ill to which they had 663 * been moved. 664 * 665 * In order to track orig_ill, we store orig_ifindex in the 666 * ilm and ilg. 667 */ 668 ilm = ilm_lookup_ill_index_v6(ill, v6group, orig_ifindex, zoneid); 669 if (ilm != NULL) 670 return (ilm_update_add(ilm, ilgstat, ilg_flist, B_TRUE)); 671 672 /* 673 * We need to remember where the application really wanted 674 * to join. This will be used later if we want to failback 675 * to the original interface. 676 */ 677 ilm = ilm_add_v6(ill->ill_ipif, v6group, ilgstat, ilg_fmode, 678 ilg_flist, orig_ifindex, zoneid); 679 if (ilm == NULL) 680 return (ENOMEM); 681 682 if (IN6_IS_ADDR_UNSPECIFIED(v6group)) { 683 /* 684 * Check how many ipif's that have members in this group - 685 * if more then one we should not tell the driver to join 686 * this time 687 */ 688 if (ilm_numentries_v6(ill, v6group) > 1) 689 return (0); 690 if (ill->ill_group == NULL) 691 ret = ip_join_allmulti(ill->ill_ipif); 692 else 693 ret = ill_nominate_mcast_rcv(ill->ill_group); 694 695 if (ret != 0) 696 ilm_delete(ilm); 697 return (ret); 698 } 699 700 if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0) 701 mld_joingroup(ilm); 702 703 /* 704 * If we have more then one we should not tell the driver 705 * to join this time. 706 */ 707 if (ilm_numentries_v6(ill, v6group) > 1) 708 return (0); 709 710 ret = ip_ll_addmulti_v6(ill->ill_ipif, v6group); 711 if (ret != 0) 712 ilm_delete(ilm); 713 return (ret); 714 } 715 716 /* 717 * Send a multicast request to the driver for enabling multicast reception 718 * for v6groupp address. The caller has already checked whether it is 719 * appropriate to send one or not. 720 */ 721 int 722 ip_ll_send_enabmulti_req(ill_t *ill, const in6_addr_t *v6groupp) 723 { 724 mblk_t *mp; 725 uint32_t addrlen, addroff; 726 char group_buf[INET6_ADDRSTRLEN]; 727 728 ASSERT(IAM_WRITER_ILL(ill)); 729 730 /* 731 * Create a AR_ENTRY_SQUERY message with a dl_enabmulti_req tacked 732 * on. 733 */ 734 mp = ill_create_dl(ill, DL_ENABMULTI_REQ, sizeof (dl_enabmulti_req_t), 735 &addrlen, &addroff); 736 if (!mp) 737 return (ENOMEM); 738 if (IN6_IS_ADDR_V4MAPPED(v6groupp)) { 739 ipaddr_t v4group; 740 741 IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group); 742 /* 743 * NOTE!!! 744 * The "addroff" passed in here was calculated by 745 * ill_create_dl(), and will be used by ill_create_squery() 746 * to perform some twisted coding magic. It is the offset 747 * into the dl_xxx_req of the hw addr. Here, it will be 748 * added to b_wptr - b_rptr to create a magic number that 749 * is not an offset into this squery mblk. 750 * The actual hardware address will be accessed only in the 751 * dl_xxx_req, not in the squery. More importantly, 752 * that hardware address can *only* be accessed in this 753 * mblk chain by calling mi_offset_param_c(), which uses 754 * the magic number in the squery hw offset field to go 755 * to the *next* mblk (the dl_xxx_req), subtract the 756 * (b_wptr - b_rptr), and find the actual offset into 757 * the dl_xxx_req. 758 * Any method that depends on using the 759 * offset field in the dl_disabmulti_req or squery 760 * to find either hardware address will similarly fail. 761 * 762 * Look in ar_entry_squery() in arp.c to see how this offset 763 * is used. 764 */ 765 mp = ill_create_squery(ill, v4group, addrlen, addroff, mp); 766 if (!mp) 767 return (ENOMEM); 768 ip1dbg(("ip_ll_send_enabmulti_req: IPv4 putnext %s on %s\n", 769 inet_ntop(AF_INET6, v6groupp, group_buf, 770 sizeof (group_buf)), 771 ill->ill_name)); 772 putnext(ill->ill_rq, mp); 773 } else { 774 ip1dbg(("ip_ll_send_enabmulti_req: IPv6 ndp_mcastreq %s on" 775 " %s\n", 776 inet_ntop(AF_INET6, v6groupp, group_buf, 777 sizeof (group_buf)), 778 ill->ill_name)); 779 return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp)); 780 } 781 return (0); 782 } 783 784 /* 785 * Send a multicast request to the driver for enabling multicast 786 * membership for v6group if appropriate. 787 */ 788 static int 789 ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *v6groupp) 790 { 791 ill_t *ill = ipif->ipif_ill; 792 793 ASSERT(IAM_WRITER_IPIF(ipif)); 794 795 if (ill->ill_net_type != IRE_IF_RESOLVER || 796 ipif->ipif_flags & IPIF_POINTOPOINT) { 797 ip1dbg(("ip_ll_addmulti_v6: not resolver\n")); 798 return (0); /* Must be IRE_IF_NORESOLVER */ 799 } 800 801 if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) { 802 ip1dbg(("ip_ll_addmulti_v6: MULTI_BCAST\n")); 803 return (0); 804 } 805 if (ill->ill_ipif_up_count == 0) { 806 /* 807 * Nobody there. All multicast addresses will be re-joined 808 * when we get the DL_BIND_ACK bringing the interface up. 809 */ 810 ip1dbg(("ip_ll_addmulti_v6: nobody up\n")); 811 return (0); 812 } 813 return (ip_ll_send_enabmulti_req(ill, v6groupp)); 814 } 815 816 /* 817 * INADDR_ANY means all multicast addresses. This is only used 818 * by the multicast router. 819 * INADDR_ANY is stored as the IPv6 unspecifed addr. 820 */ 821 int 822 ip_delmulti(ipaddr_t group, ipif_t *ipif, boolean_t no_ilg, boolean_t leaving) 823 { 824 ill_t *ill = ipif->ipif_ill; 825 ilm_t *ilm; 826 in6_addr_t v6group; 827 int ret; 828 829 ASSERT(IAM_WRITER_IPIF(ipif)); 830 831 if (!CLASSD(group) && group != INADDR_ANY) 832 return (EINVAL); 833 834 /* 835 * INADDR_ANY is represented as the IPv6 unspecifed addr. 836 */ 837 if (group == INADDR_ANY) 838 v6group = ipv6_all_zeros; 839 else 840 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 841 842 /* 843 * Look for a match on the ipif. 844 * (IP_DROP_MEMBERSHIP specifies an ipif using an IP address). 845 */ 846 ilm = ilm_lookup_ipif(ipif, group); 847 if (ilm == NULL) 848 return (ENOENT); 849 850 /* Update counters */ 851 if (no_ilg) 852 ilm->ilm_no_ilg_cnt--; 853 854 if (leaving) 855 ilm->ilm_refcnt--; 856 857 if (ilm->ilm_refcnt > 0) 858 return (ilm_update_del(ilm, B_FALSE)); 859 860 if (group == INADDR_ANY) { 861 ilm_delete(ilm); 862 /* 863 * Check how many ipif's that have members in this group - 864 * if there are still some left then don't tell the driver 865 * to drop it. 866 */ 867 if (ilm_numentries_v6(ill, &v6group) != 0) 868 return (0); 869 870 /* 871 * If we never joined, then don't leave. This can happen 872 * if we're in an IPMP group, since only one ill per IPMP 873 * group receives all multicast packets. 874 */ 875 if (!ill->ill_join_allmulti) { 876 ASSERT(ill->ill_group != NULL); 877 return (0); 878 } 879 880 ret = ip_leave_allmulti(ipif); 881 if (ill->ill_group != NULL) 882 (void) ill_nominate_mcast_rcv(ill->ill_group); 883 return (ret); 884 } 885 886 if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0) 887 igmp_leavegroup(ilm); 888 889 ilm_delete(ilm); 890 /* 891 * Check how many ipif's that have members in this group - 892 * if there are still some left then don't tell the driver 893 * to drop it. 894 */ 895 if (ilm_numentries_v6(ill, &v6group) != 0) 896 return (0); 897 return (ip_ll_delmulti_v6(ipif, &v6group)); 898 } 899 900 /* 901 * The unspecified address means all multicast addresses. 902 * This is only used by the multicast router. 903 */ 904 int 905 ip_delmulti_v6(const in6_addr_t *v6group, ill_t *ill, int orig_ifindex, 906 zoneid_t zoneid, boolean_t no_ilg, boolean_t leaving) 907 { 908 ipif_t *ipif; 909 ilm_t *ilm; 910 int ret; 911 912 ASSERT(IAM_WRITER_ILL(ill)); 913 914 if (!IN6_IS_ADDR_MULTICAST(v6group) && 915 !IN6_IS_ADDR_UNSPECIFIED(v6group)) 916 return (EINVAL); 917 918 /* 919 * Look for a match on the ill. 920 * (IPV6_LEAVE_GROUP specifies an ill using an ifindex). 921 * 922 * Similar to ip_addmulti_v6, we should always look using 923 * the orig_ifindex. 924 * 925 * 1) If orig_ifindex is different from ill's ifindex 926 * we should have an ilm with orig_ifindex created in 927 * ip_addmulti_v6. We should delete that here. 928 * 929 * 2) If orig_ifindex is same as ill's ifindex, we should 930 * not delete the ilm that is temporarily here because of 931 * a FAILOVER. Those ilms will have a ilm_orig_ifindex 932 * different from ill's ifindex. 933 * 934 * Thus, always lookup using orig_ifindex. 935 */ 936 ilm = ilm_lookup_ill_index_v6(ill, v6group, orig_ifindex, zoneid); 937 if (ilm == NULL) 938 return (ENOENT); 939 940 ASSERT(ilm->ilm_ill == ill); 941 942 ipif = ill->ill_ipif; 943 944 /* Update counters */ 945 if (no_ilg) 946 ilm->ilm_no_ilg_cnt--; 947 948 if (leaving) 949 ilm->ilm_refcnt--; 950 951 if (ilm->ilm_refcnt > 0) 952 return (ilm_update_del(ilm, B_TRUE)); 953 954 if (IN6_IS_ADDR_UNSPECIFIED(v6group)) { 955 ilm_delete(ilm); 956 /* 957 * Check how many ipif's that have members in this group - 958 * if there are still some left then don't tell the driver 959 * to drop it. 960 */ 961 if (ilm_numentries_v6(ill, v6group) != 0) 962 return (0); 963 964 /* 965 * If we never joined, then don't leave. This can happen 966 * if we're in an IPMP group, since only one ill per IPMP 967 * group receives all multicast packets. 968 */ 969 if (!ill->ill_join_allmulti) { 970 ASSERT(ill->ill_group != NULL); 971 return (0); 972 } 973 974 ret = ip_leave_allmulti(ipif); 975 if (ill->ill_group != NULL) 976 (void) ill_nominate_mcast_rcv(ill->ill_group); 977 return (ret); 978 } 979 980 if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0) 981 mld_leavegroup(ilm); 982 983 ilm_delete(ilm); 984 /* 985 * Check how many ipif's that have members in this group - 986 * if there are still some left then don't tell the driver 987 * to drop it. 988 */ 989 if (ilm_numentries_v6(ill, v6group) != 0) 990 return (0); 991 return (ip_ll_delmulti_v6(ipif, v6group)); 992 } 993 994 /* 995 * Send a multicast request to the driver for disabling multicast reception 996 * for v6groupp address. The caller has already checked whether it is 997 * appropriate to send one or not. 998 */ 999 int 1000 ip_ll_send_disabmulti_req(ill_t *ill, const in6_addr_t *v6groupp) 1001 { 1002 mblk_t *mp; 1003 char group_buf[INET6_ADDRSTRLEN]; 1004 uint32_t addrlen, addroff; 1005 1006 ASSERT(IAM_WRITER_ILL(ill)); 1007 /* 1008 * Create a AR_ENTRY_SQUERY message with a dl_disabmulti_req tacked 1009 * on. 1010 */ 1011 mp = ill_create_dl(ill, DL_DISABMULTI_REQ, 1012 sizeof (dl_disabmulti_req_t), &addrlen, &addroff); 1013 1014 if (!mp) 1015 return (ENOMEM); 1016 1017 if (IN6_IS_ADDR_V4MAPPED(v6groupp)) { 1018 ipaddr_t v4group; 1019 1020 IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group); 1021 /* 1022 * NOTE!!! 1023 * The "addroff" passed in here was calculated by 1024 * ill_create_dl(), and will be used by ill_create_squery() 1025 * to perform some twisted coding magic. It is the offset 1026 * into the dl_xxx_req of the hw addr. Here, it will be 1027 * added to b_wptr - b_rptr to create a magic number that 1028 * is not an offset into this mblk. 1029 * 1030 * Please see the comment in ip_ll_send)enabmulti_req() 1031 * for a complete explanation. 1032 * 1033 * Look in ar_entry_squery() in arp.c to see how this offset 1034 * is used. 1035 */ 1036 mp = ill_create_squery(ill, v4group, addrlen, addroff, mp); 1037 if (!mp) 1038 return (ENOMEM); 1039 ip1dbg(("ip_ll_send_disabmulti_req: IPv4 putnext %s on %s\n", 1040 inet_ntop(AF_INET6, v6groupp, group_buf, 1041 sizeof (group_buf)), 1042 ill->ill_name)); 1043 putnext(ill->ill_rq, mp); 1044 } else { 1045 ip1dbg(("ip_ll_send_disabmulti_req: IPv6 ndp_mcastreq %s on" 1046 " %s\n", 1047 inet_ntop(AF_INET6, v6groupp, group_buf, 1048 sizeof (group_buf)), 1049 ill->ill_name)); 1050 return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp)); 1051 } 1052 return (0); 1053 } 1054 1055 /* 1056 * Send a multicast request to the driver for disabling multicast 1057 * membership for v6group if appropriate. 1058 */ 1059 static int 1060 ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *v6group) 1061 { 1062 ill_t *ill = ipif->ipif_ill; 1063 1064 ASSERT(IAM_WRITER_IPIF(ipif)); 1065 1066 if (ill->ill_net_type != IRE_IF_RESOLVER || 1067 ipif->ipif_flags & IPIF_POINTOPOINT) { 1068 return (0); /* Must be IRE_IF_NORESOLVER */ 1069 } 1070 if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) { 1071 ip1dbg(("ip_ll_delmulti_v6: MULTI_BCAST\n")); 1072 return (0); 1073 } 1074 if (ill->ill_ipif_up_count == 0) { 1075 /* 1076 * Nobody there. All multicast addresses will be re-joined 1077 * when we get the DL_BIND_ACK bringing the interface up. 1078 */ 1079 ip1dbg(("ip_ll_delmulti_v6: nobody up\n")); 1080 return (0); 1081 } 1082 return (ip_ll_send_disabmulti_req(ill, v6group)); 1083 } 1084 1085 /* 1086 * Make the driver pass up all multicast packets 1087 * 1088 * With ill groups, the caller makes sure that there is only 1089 * one ill joining the allmulti group. 1090 */ 1091 int 1092 ip_join_allmulti(ipif_t *ipif) 1093 { 1094 ill_t *ill = ipif->ipif_ill; 1095 mblk_t *mp; 1096 uint32_t addrlen, addroff; 1097 1098 ASSERT(IAM_WRITER_IPIF(ipif)); 1099 1100 if (ill->ill_ipif_up_count == 0) { 1101 /* 1102 * Nobody there. All multicast addresses will be re-joined 1103 * when we get the DL_BIND_ACK bringing the interface up. 1104 */ 1105 return (0); 1106 } 1107 1108 ASSERT(!ill->ill_join_allmulti); 1109 1110 /* 1111 * Create a DL_PROMISCON_REQ message and send it directly to 1112 * the DLPI provider. We don't need to do this for certain 1113 * media types for which we never need to turn promiscuous 1114 * mode on. 1115 */ 1116 if ((ill->ill_net_type == IRE_IF_RESOLVER) && 1117 !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) { 1118 mp = ill_create_dl(ill, DL_PROMISCON_REQ, 1119 sizeof (dl_promiscon_req_t), &addrlen, &addroff); 1120 if (mp == NULL) 1121 return (ENOMEM); 1122 ill_dlpi_send(ill, mp); 1123 } 1124 1125 mutex_enter(&ill->ill_lock); 1126 ill->ill_join_allmulti = B_TRUE; 1127 mutex_exit(&ill->ill_lock); 1128 return (0); 1129 } 1130 1131 /* 1132 * Make the driver stop passing up all multicast packets 1133 * 1134 * With ill groups, we need to nominate some other ill as 1135 * this ipif->ipif_ill is leaving the group. 1136 */ 1137 int 1138 ip_leave_allmulti(ipif_t *ipif) 1139 { 1140 ill_t *ill = ipif->ipif_ill; 1141 mblk_t *mp; 1142 uint32_t addrlen, addroff; 1143 1144 ASSERT(IAM_WRITER_IPIF(ipif)); 1145 1146 if (ill->ill_ipif_up_count == 0) { 1147 /* 1148 * Nobody there. All multicast addresses will be re-joined 1149 * when we get the DL_BIND_ACK bringing the interface up. 1150 */ 1151 return (0); 1152 } 1153 1154 ASSERT(ill->ill_join_allmulti); 1155 1156 /* 1157 * Create a DL_PROMISCOFF_REQ message and send it directly to 1158 * the DLPI provider. We don't need to do this for certain 1159 * media types for which we never need to turn promiscuous 1160 * mode on. 1161 */ 1162 if ((ill->ill_net_type == IRE_IF_RESOLVER) && 1163 !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) { 1164 mp = ill_create_dl(ill, DL_PROMISCOFF_REQ, 1165 sizeof (dl_promiscoff_req_t), &addrlen, &addroff); 1166 if (mp == NULL) 1167 return (ENOMEM); 1168 ill_dlpi_send(ill, mp); 1169 } 1170 1171 mutex_enter(&ill->ill_lock); 1172 ill->ill_join_allmulti = B_FALSE; 1173 mutex_exit(&ill->ill_lock); 1174 return (0); 1175 } 1176 1177 /* 1178 * Copy mp_orig and pass it in as a local message. 1179 */ 1180 void 1181 ip_multicast_loopback(queue_t *q, ill_t *ill, mblk_t *mp_orig, int fanout_flags, 1182 zoneid_t zoneid) 1183 { 1184 mblk_t *mp; 1185 mblk_t *ipsec_mp; 1186 ipha_t *iph; 1187 ip_stack_t *ipst = ill->ill_ipst; 1188 1189 if (DB_TYPE(mp_orig) == M_DATA && 1190 ((ipha_t *)mp_orig->b_rptr)->ipha_protocol == IPPROTO_UDP) { 1191 uint_t hdrsz; 1192 1193 hdrsz = IPH_HDR_LENGTH((ipha_t *)mp_orig->b_rptr) + 1194 sizeof (udpha_t); 1195 ASSERT(MBLKL(mp_orig) >= hdrsz); 1196 1197 if (((mp = allocb(hdrsz, BPRI_MED)) != NULL) && 1198 (mp_orig = dupmsg(mp_orig)) != NULL) { 1199 bcopy(mp_orig->b_rptr, mp->b_rptr, hdrsz); 1200 mp->b_wptr += hdrsz; 1201 mp->b_cont = mp_orig; 1202 mp_orig->b_rptr += hdrsz; 1203 if (MBLKL(mp_orig) == 0) { 1204 mp->b_cont = mp_orig->b_cont; 1205 mp_orig->b_cont = NULL; 1206 freeb(mp_orig); 1207 } 1208 } else if (mp != NULL) { 1209 freeb(mp); 1210 mp = NULL; 1211 } 1212 } else { 1213 mp = ip_copymsg(mp_orig); /* No refcnt on ipsec_out netstack */ 1214 } 1215 1216 if (mp == NULL) 1217 return; 1218 if (DB_TYPE(mp) == M_CTL) { 1219 ipsec_mp = mp; 1220 mp = mp->b_cont; 1221 } else { 1222 ipsec_mp = mp; 1223 } 1224 1225 iph = (ipha_t *)mp->b_rptr; 1226 1227 DTRACE_PROBE4(ip4__loopback__out__start, 1228 ill_t *, NULL, ill_t *, ill, 1229 ipha_t *, iph, mblk_t *, ipsec_mp); 1230 1231 FW_HOOKS(ipst->ips_ip4_loopback_out_event, 1232 ipst->ips_ipv4firewall_loopback_out, 1233 NULL, ill, iph, ipsec_mp, mp, ipst); 1234 1235 DTRACE_PROBE1(ip4__loopback__out__end, mblk_t *, ipsec_mp); 1236 1237 if (ipsec_mp != NULL) 1238 ip_wput_local(q, ill, iph, ipsec_mp, NULL, 1239 fanout_flags, zoneid); 1240 } 1241 1242 static area_t ip_aresq_template = { 1243 AR_ENTRY_SQUERY, /* cmd */ 1244 sizeof (area_t)+IP_ADDR_LEN, /* name offset */ 1245 sizeof (area_t), /* name len (filled by ill_arp_alloc) */ 1246 IP_ARP_PROTO_TYPE, /* protocol, from arps perspective */ 1247 sizeof (area_t), /* proto addr offset */ 1248 IP_ADDR_LEN, /* proto addr_length */ 1249 0, /* proto mask offset */ 1250 /* Rest is initialized when used */ 1251 0, /* flags */ 1252 0, /* hw addr offset */ 1253 0, /* hw addr length */ 1254 }; 1255 1256 static mblk_t * 1257 ill_create_squery(ill_t *ill, ipaddr_t ipaddr, uint32_t addrlen, 1258 uint32_t addroff, mblk_t *mp_tail) 1259 { 1260 mblk_t *mp; 1261 area_t *area; 1262 1263 mp = ill_arp_alloc(ill, (uchar_t *)&ip_aresq_template, 1264 (caddr_t)&ipaddr); 1265 if (!mp) { 1266 freemsg(mp_tail); 1267 return (NULL); 1268 } 1269 area = (area_t *)mp->b_rptr; 1270 area->area_hw_addr_length = addrlen; 1271 area->area_hw_addr_offset = mp->b_wptr - mp->b_rptr + addroff; 1272 /* 1273 * NOTE! 1274 * 1275 * The area_hw_addr_offset, as can be seen, does not hold the 1276 * actual hardware address offset. Rather, it holds the offset 1277 * to the hw addr in the dl_xxx_req in mp_tail, modified by 1278 * adding (mp->b_wptr - mp->b_rptr). This allows the function 1279 * mi_offset_paramc() to find the hardware address in the 1280 * *second* mblk (dl_xxx_req), not this mblk. 1281 * 1282 * Using mi_offset_paramc() is thus the *only* way to access 1283 * the dl_xxx_hw address. 1284 * 1285 * The squery hw address should *not* be accessed. 1286 * 1287 * See ar_entry_squery() in arp.c for an example of how all this works. 1288 */ 1289 1290 mp->b_cont = mp_tail; 1291 return (mp); 1292 } 1293 1294 /* 1295 * Create a dlpi message with room for phys+sap. When we come back in 1296 * ip_wput_ctl() we will strip the sap for those primitives which 1297 * only need a physical address. 1298 */ 1299 static mblk_t * 1300 ill_create_dl(ill_t *ill, uint32_t dl_primitive, uint32_t length, 1301 uint32_t *addr_lenp, uint32_t *addr_offp) 1302 { 1303 mblk_t *mp; 1304 uint32_t hw_addr_length; 1305 char *cp; 1306 uint32_t offset; 1307 uint32_t size; 1308 1309 *addr_lenp = *addr_offp = 0; 1310 1311 hw_addr_length = ill->ill_phys_addr_length; 1312 if (!hw_addr_length) { 1313 ip0dbg(("ip_create_dl: hw addr length = 0\n")); 1314 return (NULL); 1315 } 1316 1317 size = length; 1318 switch (dl_primitive) { 1319 case DL_ENABMULTI_REQ: 1320 case DL_DISABMULTI_REQ: 1321 size += hw_addr_length; 1322 break; 1323 case DL_PROMISCON_REQ: 1324 case DL_PROMISCOFF_REQ: 1325 break; 1326 default: 1327 return (NULL); 1328 } 1329 mp = allocb(size, BPRI_HI); 1330 if (!mp) 1331 return (NULL); 1332 mp->b_wptr += size; 1333 mp->b_datap->db_type = M_PROTO; 1334 1335 cp = (char *)mp->b_rptr; 1336 offset = length; 1337 1338 switch (dl_primitive) { 1339 case DL_ENABMULTI_REQ: { 1340 dl_enabmulti_req_t *dl = (dl_enabmulti_req_t *)cp; 1341 1342 dl->dl_primitive = dl_primitive; 1343 dl->dl_addr_offset = offset; 1344 *addr_lenp = dl->dl_addr_length = hw_addr_length; 1345 *addr_offp = offset; 1346 break; 1347 } 1348 case DL_DISABMULTI_REQ: { 1349 dl_disabmulti_req_t *dl = (dl_disabmulti_req_t *)cp; 1350 1351 dl->dl_primitive = dl_primitive; 1352 dl->dl_addr_offset = offset; 1353 *addr_lenp = dl->dl_addr_length = hw_addr_length; 1354 *addr_offp = offset; 1355 break; 1356 } 1357 case DL_PROMISCON_REQ: 1358 case DL_PROMISCOFF_REQ: { 1359 dl_promiscon_req_t *dl = (dl_promiscon_req_t *)cp; 1360 1361 dl->dl_primitive = dl_primitive; 1362 dl->dl_level = DL_PROMISC_MULTI; 1363 break; 1364 } 1365 } 1366 ip1dbg(("ill_create_dl: addr_len %d, addr_off %d\n", 1367 *addr_lenp, *addr_offp)); 1368 return (mp); 1369 } 1370 1371 void 1372 ip_wput_ctl(queue_t *q, mblk_t *mp_orig) 1373 { 1374 ill_t *ill = (ill_t *)q->q_ptr; 1375 mblk_t *mp = mp_orig; 1376 area_t *area = (area_t *)mp->b_rptr; 1377 1378 /* Check that we have a AR_ENTRY_SQUERY with a tacked on mblk */ 1379 if (MBLKL(mp) < sizeof (area_t) || mp->b_cont == NULL || 1380 area->area_cmd != AR_ENTRY_SQUERY) { 1381 putnext(q, mp); 1382 return; 1383 } 1384 mp = mp->b_cont; 1385 1386 /* 1387 * Update dl_addr_length and dl_addr_offset for primitives that 1388 * have physical addresses as opposed to full saps 1389 */ 1390 switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) { 1391 case DL_ENABMULTI_REQ: 1392 /* Track the state if this is the first enabmulti */ 1393 if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN) 1394 ill->ill_dlpi_multicast_state = IDS_INPROGRESS; 1395 ip1dbg(("ip_wput_ctl: ENABMULTI\n")); 1396 break; 1397 case DL_DISABMULTI_REQ: 1398 ip1dbg(("ip_wput_ctl: DISABMULTI\n")); 1399 break; 1400 default: 1401 ip1dbg(("ip_wput_ctl: default\n")); 1402 break; 1403 } 1404 freeb(mp_orig); 1405 ill_dlpi_send(ill, mp); 1406 } 1407 1408 /* 1409 * Rejoin any groups which have been explicitly joined by the application (we 1410 * left all explicitly joined groups as part of ill_leave_multicast() prior to 1411 * bringing the interface down). Note that because groups can be joined and 1412 * left while an interface is down, this may not be the same set of groups 1413 * that we left in ill_leave_multicast(). 1414 */ 1415 void 1416 ill_recover_multicast(ill_t *ill) 1417 { 1418 ilm_t *ilm; 1419 char addrbuf[INET6_ADDRSTRLEN]; 1420 1421 ASSERT(IAM_WRITER_ILL(ill)); 1422 1423 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1424 /* 1425 * Check how many ipif's that have members in this group - 1426 * if more then one we make sure that this entry is first 1427 * in the list. 1428 */ 1429 if (ilm_numentries_v6(ill, &ilm->ilm_v6addr) > 1 && 1430 ilm_lookup_ill_v6(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm) 1431 continue; 1432 ip1dbg(("ill_recover_multicast: %s\n", 1433 inet_ntop(AF_INET6, &ilm->ilm_v6addr, addrbuf, 1434 sizeof (addrbuf)))); 1435 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) { 1436 if (ill->ill_group == NULL) { 1437 (void) ip_join_allmulti(ill->ill_ipif); 1438 } else { 1439 /* 1440 * We don't want to join on this ill, 1441 * if somebody else in the group has 1442 * already been nominated. 1443 */ 1444 (void) ill_nominate_mcast_rcv(ill->ill_group); 1445 } 1446 } else { 1447 (void) ip_ll_addmulti_v6(ill->ill_ipif, 1448 &ilm->ilm_v6addr); 1449 } 1450 } 1451 } 1452 1453 /* 1454 * The opposite of ill_recover_multicast() -- leaves all multicast groups 1455 * that were explicitly joined. Note that both these functions could be 1456 * disposed of if we enhanced ARP to allow us to handle DL_DISABMULTI_REQ 1457 * and DL_ENABMULTI_REQ messages when an interface is down. 1458 */ 1459 void 1460 ill_leave_multicast(ill_t *ill) 1461 { 1462 ilm_t *ilm; 1463 char addrbuf[INET6_ADDRSTRLEN]; 1464 1465 ASSERT(IAM_WRITER_ILL(ill)); 1466 1467 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1468 /* 1469 * Check how many ipif's that have members in this group - 1470 * if more then one we make sure that this entry is first 1471 * in the list. 1472 */ 1473 if (ilm_numentries_v6(ill, &ilm->ilm_v6addr) > 1 && 1474 ilm_lookup_ill_v6(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm) 1475 continue; 1476 ip1dbg(("ill_leave_multicast: %s\n", 1477 inet_ntop(AF_INET6, &ilm->ilm_v6addr, addrbuf, 1478 sizeof (addrbuf)))); 1479 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) { 1480 (void) ip_leave_allmulti(ill->ill_ipif); 1481 /* 1482 * If we were part of an IPMP group, then 1483 * ill_handoff_responsibility() has already 1484 * nominated a new member (so we don't). 1485 */ 1486 ASSERT(ill->ill_group == NULL); 1487 } else { 1488 (void) ip_ll_send_disabmulti_req(ill, &ilm->ilm_v6addr); 1489 } 1490 } 1491 } 1492 1493 /* 1494 * Find an ilm for matching the ill and which has the source in its 1495 * INCLUDE list or does not have it in its EXCLUDE list 1496 */ 1497 ilm_t * 1498 ilm_lookup_ill_withsrc(ill_t *ill, ipaddr_t group, ipaddr_t src) 1499 { 1500 in6_addr_t v6group, v6src; 1501 1502 /* 1503 * INADDR_ANY is represented as the IPv6 unspecified addr. 1504 */ 1505 if (group == INADDR_ANY) 1506 v6group = ipv6_all_zeros; 1507 else 1508 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 1509 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 1510 1511 return (ilm_lookup_ill_withsrc_v6(ill, &v6group, &v6src)); 1512 } 1513 1514 ilm_t * 1515 ilm_lookup_ill_withsrc_v6(ill_t *ill, const in6_addr_t *v6group, 1516 const in6_addr_t *v6src) 1517 { 1518 ilm_t *ilm; 1519 boolean_t isinlist; 1520 int i, numsrc; 1521 1522 /* 1523 * If the source is in any ilm's INCLUDE list, or if 1524 * it is not in any ilm's EXCLUDE list, we have a hit. 1525 */ 1526 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1527 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group)) { 1528 1529 isinlist = B_FALSE; 1530 numsrc = (ilm->ilm_filter == NULL) ? 1531 0 : ilm->ilm_filter->sl_numsrc; 1532 for (i = 0; i < numsrc; i++) { 1533 if (IN6_ARE_ADDR_EQUAL(v6src, 1534 &ilm->ilm_filter->sl_addr[i])) { 1535 isinlist = B_TRUE; 1536 break; 1537 } 1538 } 1539 if ((isinlist && ilm->ilm_fmode == MODE_IS_INCLUDE) || 1540 (!isinlist && ilm->ilm_fmode == MODE_IS_EXCLUDE)) 1541 return (ilm); 1542 else 1543 return (NULL); 1544 } 1545 } 1546 return (NULL); 1547 } 1548 1549 1550 /* Find an ilm for matching the ill */ 1551 ilm_t * 1552 ilm_lookup_ill(ill_t *ill, ipaddr_t group, zoneid_t zoneid) 1553 { 1554 in6_addr_t v6group; 1555 1556 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) || 1557 IAM_WRITER_ILL(ill)); 1558 /* 1559 * INADDR_ANY is represented as the IPv6 unspecifed addr. 1560 */ 1561 if (group == INADDR_ANY) 1562 v6group = ipv6_all_zeros; 1563 else 1564 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 1565 1566 return (ilm_lookup_ill_v6(ill, &v6group, zoneid)); 1567 } 1568 1569 /* 1570 * Find an ilm for matching the ill. All the ilm lookup functions 1571 * ignore ILM_DELETED ilms. These have been logically deleted, and 1572 * igmp and linklayer disable multicast have been done. Only mi_free 1573 * yet to be done. Still there in the list due to ilm_walkers. The 1574 * last walker will release it. 1575 */ 1576 ilm_t * 1577 ilm_lookup_ill_v6(ill_t *ill, const in6_addr_t *v6group, zoneid_t zoneid) 1578 { 1579 ilm_t *ilm; 1580 1581 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) || 1582 IAM_WRITER_ILL(ill)); 1583 1584 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1585 if (ilm->ilm_flags & ILM_DELETED) 1586 continue; 1587 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) && 1588 (zoneid == ALL_ZONES || zoneid == ilm->ilm_zoneid)) 1589 return (ilm); 1590 } 1591 return (NULL); 1592 } 1593 1594 ilm_t * 1595 ilm_lookup_ill_index_v6(ill_t *ill, const in6_addr_t *v6group, int index, 1596 zoneid_t zoneid) 1597 { 1598 ilm_t *ilm; 1599 1600 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) || 1601 IAM_WRITER_ILL(ill)); 1602 1603 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1604 if (ilm->ilm_flags & ILM_DELETED) 1605 continue; 1606 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) && 1607 (zoneid == ALL_ZONES || zoneid == ilm->ilm_zoneid) && 1608 ilm->ilm_orig_ifindex == index) { 1609 return (ilm); 1610 } 1611 } 1612 return (NULL); 1613 } 1614 1615 ilm_t * 1616 ilm_lookup_ill_index_v4(ill_t *ill, ipaddr_t group, int index, zoneid_t zoneid) 1617 { 1618 in6_addr_t v6group; 1619 1620 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) || 1621 IAM_WRITER_ILL(ill)); 1622 /* 1623 * INADDR_ANY is represented as the IPv6 unspecifed addr. 1624 */ 1625 if (group == INADDR_ANY) 1626 v6group = ipv6_all_zeros; 1627 else 1628 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 1629 1630 return (ilm_lookup_ill_index_v6(ill, &v6group, index, zoneid)); 1631 } 1632 1633 /* 1634 * Found an ilm for the ipif. Only needed for IPv4 which does 1635 * ipif specific socket options. 1636 */ 1637 ilm_t * 1638 ilm_lookup_ipif(ipif_t *ipif, ipaddr_t group) 1639 { 1640 ill_t *ill = ipif->ipif_ill; 1641 ilm_t *ilm; 1642 in6_addr_t v6group; 1643 1644 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) || 1645 IAM_WRITER_ILL(ill)); 1646 1647 /* 1648 * INADDR_ANY is represented as the IPv6 unspecifed addr. 1649 */ 1650 if (group == INADDR_ANY) 1651 v6group = ipv6_all_zeros; 1652 else 1653 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 1654 1655 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1656 if (ilm->ilm_flags & ILM_DELETED) 1657 continue; 1658 if (ilm->ilm_ipif == ipif && 1659 IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, &v6group)) 1660 return (ilm); 1661 } 1662 return (NULL); 1663 } 1664 1665 /* 1666 * How many members on this ill? 1667 */ 1668 int 1669 ilm_numentries_v6(ill_t *ill, const in6_addr_t *v6group) 1670 { 1671 ilm_t *ilm; 1672 int i = 0; 1673 1674 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) || 1675 IAM_WRITER_ILL(ill)); 1676 1677 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1678 if (ilm->ilm_flags & ILM_DELETED) 1679 continue; 1680 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group)) { 1681 i++; 1682 } 1683 } 1684 return (i); 1685 } 1686 1687 /* Caller guarantees that the group is not already on the list */ 1688 static ilm_t * 1689 ilm_add_v6(ipif_t *ipif, const in6_addr_t *v6group, ilg_stat_t ilgstat, 1690 mcast_record_t ilg_fmode, slist_t *ilg_flist, int orig_ifindex, 1691 zoneid_t zoneid) 1692 { 1693 ill_t *ill = ipif->ipif_ill; 1694 ilm_t *ilm; 1695 ilm_t *ilm_cur; 1696 ilm_t **ilm_ptpn; 1697 1698 ASSERT(IAM_WRITER_IPIF(ipif)); 1699 1700 ilm = GETSTRUCT(ilm_t, 1); 1701 if (ilm == NULL) 1702 return (NULL); 1703 if (ilgstat != ILGSTAT_NONE && !SLIST_IS_EMPTY(ilg_flist)) { 1704 ilm->ilm_filter = l_alloc(); 1705 if (ilm->ilm_filter == NULL) { 1706 mi_free(ilm); 1707 return (NULL); 1708 } 1709 } 1710 ilm->ilm_v6addr = *v6group; 1711 ilm->ilm_refcnt = 1; 1712 ilm->ilm_zoneid = zoneid; 1713 ilm->ilm_timer = INFINITY; 1714 ilm->ilm_rtx.rtx_timer = INFINITY; 1715 1716 /* 1717 * IPv4 Multicast groups are joined using ipif. 1718 * IPv6 Multicast groups are joined using ill. 1719 */ 1720 if (ill->ill_isv6) { 1721 ilm->ilm_ill = ill; 1722 ilm->ilm_ipif = NULL; 1723 } else { 1724 ASSERT(ilm->ilm_zoneid == ipif->ipif_zoneid); 1725 ilm->ilm_ipif = ipif; 1726 ilm->ilm_ill = NULL; 1727 } 1728 ASSERT(ill->ill_ipst); 1729 ilm->ilm_ipst = ill->ill_ipst; /* No netstack_hold */ 1730 1731 /* 1732 * After this if ilm moves to a new ill, we don't change 1733 * the ilm_orig_ifindex. Thus, if ill_index != ilm_orig_ifindex, 1734 * it has been moved. Indexes don't match even when the application 1735 * wants to join on a FAILED/INACTIVE interface because we choose 1736 * a new interface to join in. This is considered as an implicit 1737 * move. 1738 */ 1739 ilm->ilm_orig_ifindex = orig_ifindex; 1740 1741 ASSERT(!(ipif->ipif_state_flags & IPIF_CONDEMNED)); 1742 ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED)); 1743 1744 /* 1745 * Grab lock to give consistent view to readers 1746 */ 1747 mutex_enter(&ill->ill_lock); 1748 /* 1749 * All ilms in the same zone are contiguous in the ill_ilm list. 1750 * The loops in ip_proto_input() and ip_wput_local() use this to avoid 1751 * sending duplicates up when two applications in the same zone join the 1752 * same group on different logical interfaces. 1753 */ 1754 ilm_cur = ill->ill_ilm; 1755 ilm_ptpn = &ill->ill_ilm; 1756 while (ilm_cur != NULL && ilm_cur->ilm_zoneid != ilm->ilm_zoneid) { 1757 ilm_ptpn = &ilm_cur->ilm_next; 1758 ilm_cur = ilm_cur->ilm_next; 1759 } 1760 ilm->ilm_next = ilm_cur; 1761 *ilm_ptpn = ilm; 1762 1763 /* 1764 * If we have an associated ilg, use its filter state; if not, 1765 * default to (EXCLUDE, NULL) and set no_ilg_cnt to track this. 1766 */ 1767 if (ilgstat != ILGSTAT_NONE) { 1768 if (!SLIST_IS_EMPTY(ilg_flist)) 1769 l_copy(ilg_flist, ilm->ilm_filter); 1770 ilm->ilm_fmode = ilg_fmode; 1771 } else { 1772 ilm->ilm_no_ilg_cnt = 1; 1773 ilm->ilm_fmode = MODE_IS_EXCLUDE; 1774 } 1775 1776 mutex_exit(&ill->ill_lock); 1777 return (ilm); 1778 } 1779 1780 void 1781 ilm_walker_cleanup(ill_t *ill) 1782 { 1783 ilm_t **ilmp; 1784 ilm_t *ilm; 1785 1786 ASSERT(MUTEX_HELD(&ill->ill_lock)); 1787 ASSERT(ill->ill_ilm_walker_cnt == 0); 1788 1789 ilmp = &ill->ill_ilm; 1790 while (*ilmp != NULL) { 1791 if ((*ilmp)->ilm_flags & ILM_DELETED) { 1792 ilm = *ilmp; 1793 *ilmp = ilm->ilm_next; 1794 FREE_SLIST(ilm->ilm_filter); 1795 FREE_SLIST(ilm->ilm_pendsrcs); 1796 FREE_SLIST(ilm->ilm_rtx.rtx_allow); 1797 FREE_SLIST(ilm->ilm_rtx.rtx_block); 1798 ilm->ilm_ipst = NULL; 1799 mi_free((char *)ilm); 1800 } else { 1801 ilmp = &(*ilmp)->ilm_next; 1802 } 1803 } 1804 ill->ill_ilm_cleanup_reqd = 0; 1805 } 1806 1807 /* 1808 * Unlink ilm and free it. 1809 */ 1810 static void 1811 ilm_delete(ilm_t *ilm) 1812 { 1813 ill_t *ill; 1814 ilm_t **ilmp; 1815 1816 if (ilm->ilm_ipif != NULL) { 1817 ASSERT(IAM_WRITER_IPIF(ilm->ilm_ipif)); 1818 ASSERT(ilm->ilm_ill == NULL); 1819 ill = ilm->ilm_ipif->ipif_ill; 1820 ASSERT(!ill->ill_isv6); 1821 } else { 1822 ASSERT(IAM_WRITER_ILL(ilm->ilm_ill)); 1823 ASSERT(ilm->ilm_ipif == NULL); 1824 ill = ilm->ilm_ill; 1825 ASSERT(ill->ill_isv6); 1826 } 1827 /* 1828 * Delete under lock protection so that readers don't stumble 1829 * on bad ilm_next 1830 */ 1831 mutex_enter(&ill->ill_lock); 1832 if (ill->ill_ilm_walker_cnt != 0) { 1833 ilm->ilm_flags |= ILM_DELETED; 1834 ill->ill_ilm_cleanup_reqd = 1; 1835 mutex_exit(&ill->ill_lock); 1836 return; 1837 } 1838 1839 for (ilmp = &ill->ill_ilm; *ilmp != ilm; ilmp = &(*ilmp)->ilm_next) 1840 ; 1841 *ilmp = ilm->ilm_next; 1842 mutex_exit(&ill->ill_lock); 1843 1844 FREE_SLIST(ilm->ilm_filter); 1845 FREE_SLIST(ilm->ilm_pendsrcs); 1846 FREE_SLIST(ilm->ilm_rtx.rtx_allow); 1847 FREE_SLIST(ilm->ilm_rtx.rtx_block); 1848 ilm->ilm_ipst = NULL; 1849 mi_free((char *)ilm); 1850 } 1851 1852 /* Free all ilms for this ipif */ 1853 void 1854 ilm_free(ipif_t *ipif) 1855 { 1856 ill_t *ill = ipif->ipif_ill; 1857 ilm_t *ilm; 1858 ilm_t *next_ilm; 1859 1860 ASSERT(IAM_WRITER_IPIF(ipif)); 1861 1862 for (ilm = ill->ill_ilm; ilm; ilm = next_ilm) { 1863 next_ilm = ilm->ilm_next; 1864 if (ilm->ilm_ipif == ipif) 1865 ilm_delete(ilm); 1866 } 1867 } 1868 1869 /* 1870 * Looks up the appropriate ipif given a v4 multicast group and interface 1871 * address. On success, returns 0, with *ipifpp pointing to the found 1872 * struct. On failure, returns an errno and *ipifpp is NULL. 1873 */ 1874 int 1875 ip_opt_check(conn_t *connp, ipaddr_t group, ipaddr_t src, ipaddr_t ifaddr, 1876 uint_t *ifindexp, mblk_t *first_mp, ipsq_func_t func, ipif_t **ipifpp) 1877 { 1878 ipif_t *ipif; 1879 int err = 0; 1880 zoneid_t zoneid; 1881 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1882 1883 if (!CLASSD(group) || CLASSD(src)) { 1884 return (EINVAL); 1885 } 1886 *ipifpp = NULL; 1887 1888 zoneid = IPCL_ZONEID(connp); 1889 1890 ASSERT(!(ifaddr != INADDR_ANY && ifindexp != NULL && *ifindexp != 0)); 1891 if (ifaddr != INADDR_ANY) { 1892 ipif = ipif_lookup_addr(ifaddr, NULL, zoneid, 1893 CONNP_TO_WQ(connp), first_mp, func, &err, ipst); 1894 if (err != 0 && err != EINPROGRESS) 1895 err = EADDRNOTAVAIL; 1896 } else if (ifindexp != NULL && *ifindexp != 0) { 1897 ipif = ipif_lookup_on_ifindex(*ifindexp, B_FALSE, zoneid, 1898 CONNP_TO_WQ(connp), first_mp, func, &err, ipst); 1899 } else { 1900 ipif = ipif_lookup_group(group, zoneid, ipst); 1901 if (ipif == NULL) 1902 return (EADDRNOTAVAIL); 1903 } 1904 if (ipif == NULL) 1905 return (err); 1906 1907 *ipifpp = ipif; 1908 return (0); 1909 } 1910 1911 /* 1912 * Looks up the appropriate ill (or ipif if v4mapped) given an interface 1913 * index and IPv6 multicast group. On success, returns 0, with *illpp (or 1914 * *ipifpp if v4mapped) pointing to the found struct. On failure, returns 1915 * an errno and *illpp and *ipifpp are undefined. 1916 */ 1917 int 1918 ip_opt_check_v6(conn_t *connp, const in6_addr_t *v6group, ipaddr_t *v4group, 1919 const in6_addr_t *v6src, ipaddr_t *v4src, boolean_t *isv6, int ifindex, 1920 mblk_t *first_mp, ipsq_func_t func, ill_t **illpp, ipif_t **ipifpp) 1921 { 1922 boolean_t src_unspec; 1923 ill_t *ill = NULL; 1924 ipif_t *ipif = NULL; 1925 int err; 1926 zoneid_t zoneid = connp->conn_zoneid; 1927 queue_t *wq = CONNP_TO_WQ(connp); 1928 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1929 1930 src_unspec = IN6_IS_ADDR_UNSPECIFIED(v6src); 1931 1932 if (IN6_IS_ADDR_V4MAPPED(v6group)) { 1933 if (!IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec) 1934 return (EINVAL); 1935 IN6_V4MAPPED_TO_IPADDR(v6group, *v4group); 1936 if (src_unspec) { 1937 *v4src = INADDR_ANY; 1938 } else { 1939 IN6_V4MAPPED_TO_IPADDR(v6src, *v4src); 1940 } 1941 if (!CLASSD(*v4group) || CLASSD(*v4src)) 1942 return (EINVAL); 1943 *ipifpp = NULL; 1944 *isv6 = B_FALSE; 1945 } else { 1946 if (IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec) 1947 return (EINVAL); 1948 if (!IN6_IS_ADDR_MULTICAST(v6group) || 1949 IN6_IS_ADDR_MULTICAST(v6src)) { 1950 return (EINVAL); 1951 } 1952 *illpp = NULL; 1953 *isv6 = B_TRUE; 1954 } 1955 1956 if (ifindex == 0) { 1957 if (*isv6) 1958 ill = ill_lookup_group_v6(v6group, zoneid, ipst); 1959 else 1960 ipif = ipif_lookup_group(*v4group, zoneid, ipst); 1961 if (ill == NULL && ipif == NULL) 1962 return (EADDRNOTAVAIL); 1963 } else { 1964 if (*isv6) { 1965 ill = ill_lookup_on_ifindex(ifindex, B_TRUE, 1966 wq, first_mp, func, &err, ipst); 1967 if (ill != NULL && 1968 !ipif_lookup_zoneid(ill, zoneid, 0, NULL)) { 1969 ill_refrele(ill); 1970 ill = NULL; 1971 err = EADDRNOTAVAIL; 1972 } 1973 } else { 1974 ipif = ipif_lookup_on_ifindex(ifindex, B_FALSE, 1975 zoneid, wq, first_mp, func, &err, ipst); 1976 } 1977 if (ill == NULL && ipif == NULL) 1978 return (err); 1979 } 1980 1981 *ipifpp = ipif; 1982 *illpp = ill; 1983 return (0); 1984 } 1985 1986 static int 1987 ip_get_srcfilter(conn_t *connp, struct group_filter *gf, 1988 struct ip_msfilter *imsf, ipaddr_t grp, ipif_t *ipif, boolean_t isv4mapped) 1989 { 1990 ilg_t *ilg; 1991 int i, numsrc, fmode, outsrcs; 1992 struct sockaddr_in *sin; 1993 struct sockaddr_in6 *sin6; 1994 struct in_addr *addrp; 1995 slist_t *fp; 1996 boolean_t is_v4only_api; 1997 1998 mutex_enter(&connp->conn_lock); 1999 2000 ilg = ilg_lookup_ipif(connp, grp, ipif); 2001 if (ilg == NULL) { 2002 mutex_exit(&connp->conn_lock); 2003 return (EADDRNOTAVAIL); 2004 } 2005 2006 if (gf == NULL) { 2007 ASSERT(imsf != NULL); 2008 ASSERT(!isv4mapped); 2009 is_v4only_api = B_TRUE; 2010 outsrcs = imsf->imsf_numsrc; 2011 } else { 2012 ASSERT(imsf == NULL); 2013 is_v4only_api = B_FALSE; 2014 outsrcs = gf->gf_numsrc; 2015 } 2016 2017 /* 2018 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 2019 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 2020 * So we need to translate here. 2021 */ 2022 fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ? 2023 MCAST_INCLUDE : MCAST_EXCLUDE; 2024 if ((fp = ilg->ilg_filter) == NULL) { 2025 numsrc = 0; 2026 } else { 2027 for (i = 0; i < outsrcs; i++) { 2028 if (i == fp->sl_numsrc) 2029 break; 2030 if (isv4mapped) { 2031 sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i]; 2032 sin6->sin6_family = AF_INET6; 2033 sin6->sin6_addr = fp->sl_addr[i]; 2034 } else { 2035 if (is_v4only_api) { 2036 addrp = &imsf->imsf_slist[i]; 2037 } else { 2038 sin = (struct sockaddr_in *) 2039 &gf->gf_slist[i]; 2040 sin->sin_family = AF_INET; 2041 addrp = &sin->sin_addr; 2042 } 2043 IN6_V4MAPPED_TO_INADDR(&fp->sl_addr[i], addrp); 2044 } 2045 } 2046 numsrc = fp->sl_numsrc; 2047 } 2048 2049 if (is_v4only_api) { 2050 imsf->imsf_numsrc = numsrc; 2051 imsf->imsf_fmode = fmode; 2052 } else { 2053 gf->gf_numsrc = numsrc; 2054 gf->gf_fmode = fmode; 2055 } 2056 2057 mutex_exit(&connp->conn_lock); 2058 2059 return (0); 2060 } 2061 2062 static int 2063 ip_get_srcfilter_v6(conn_t *connp, struct group_filter *gf, 2064 const struct in6_addr *grp, ill_t *ill) 2065 { 2066 ilg_t *ilg; 2067 int i; 2068 struct sockaddr_storage *sl; 2069 struct sockaddr_in6 *sin6; 2070 slist_t *fp; 2071 2072 mutex_enter(&connp->conn_lock); 2073 2074 ilg = ilg_lookup_ill_v6(connp, grp, ill); 2075 if (ilg == NULL) { 2076 mutex_exit(&connp->conn_lock); 2077 return (EADDRNOTAVAIL); 2078 } 2079 2080 /* 2081 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 2082 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 2083 * So we need to translate here. 2084 */ 2085 gf->gf_fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ? 2086 MCAST_INCLUDE : MCAST_EXCLUDE; 2087 if ((fp = ilg->ilg_filter) == NULL) { 2088 gf->gf_numsrc = 0; 2089 } else { 2090 for (i = 0, sl = gf->gf_slist; i < gf->gf_numsrc; i++, sl++) { 2091 if (i == fp->sl_numsrc) 2092 break; 2093 sin6 = (struct sockaddr_in6 *)sl; 2094 sin6->sin6_family = AF_INET6; 2095 sin6->sin6_addr = fp->sl_addr[i]; 2096 } 2097 gf->gf_numsrc = fp->sl_numsrc; 2098 } 2099 2100 mutex_exit(&connp->conn_lock); 2101 2102 return (0); 2103 } 2104 2105 static int 2106 ip_set_srcfilter(conn_t *connp, struct group_filter *gf, 2107 struct ip_msfilter *imsf, ipaddr_t grp, ipif_t *ipif, boolean_t isv4mapped) 2108 { 2109 ilg_t *ilg; 2110 int i, err, insrcs, infmode, new_fmode; 2111 struct sockaddr_in *sin; 2112 struct sockaddr_in6 *sin6; 2113 struct in_addr *addrp; 2114 slist_t *orig_filter = NULL; 2115 slist_t *new_filter = NULL; 2116 mcast_record_t orig_fmode; 2117 boolean_t leave_grp, is_v4only_api; 2118 ilg_stat_t ilgstat; 2119 2120 if (gf == NULL) { 2121 ASSERT(imsf != NULL); 2122 ASSERT(!isv4mapped); 2123 is_v4only_api = B_TRUE; 2124 insrcs = imsf->imsf_numsrc; 2125 infmode = imsf->imsf_fmode; 2126 } else { 2127 ASSERT(imsf == NULL); 2128 is_v4only_api = B_FALSE; 2129 insrcs = gf->gf_numsrc; 2130 infmode = gf->gf_fmode; 2131 } 2132 2133 /* Make sure we can handle the source list */ 2134 if (insrcs > MAX_FILTER_SIZE) 2135 return (ENOBUFS); 2136 2137 /* 2138 * setting the filter to (INCLUDE, NULL) is treated 2139 * as a request to leave the group. 2140 */ 2141 leave_grp = (infmode == MCAST_INCLUDE && insrcs == 0); 2142 2143 ASSERT(IAM_WRITER_IPIF(ipif)); 2144 2145 mutex_enter(&connp->conn_lock); 2146 2147 ilg = ilg_lookup_ipif(connp, grp, ipif); 2148 if (ilg == NULL) { 2149 /* 2150 * if the request was actually to leave, and we 2151 * didn't find an ilg, there's nothing to do. 2152 */ 2153 if (!leave_grp) 2154 ilg = conn_ilg_alloc(connp); 2155 if (leave_grp || ilg == NULL) { 2156 mutex_exit(&connp->conn_lock); 2157 return (leave_grp ? 0 : ENOMEM); 2158 } 2159 ilgstat = ILGSTAT_NEW; 2160 IN6_IPADDR_TO_V4MAPPED(grp, &ilg->ilg_v6group); 2161 ilg->ilg_ipif = ipif; 2162 ilg->ilg_ill = NULL; 2163 ilg->ilg_orig_ifindex = 0; 2164 } else if (leave_grp) { 2165 ilg_delete(connp, ilg, NULL); 2166 mutex_exit(&connp->conn_lock); 2167 (void) ip_delmulti(grp, ipif, B_FALSE, B_TRUE); 2168 return (0); 2169 } else { 2170 ilgstat = ILGSTAT_CHANGE; 2171 /* Preserve existing state in case ip_addmulti() fails */ 2172 orig_fmode = ilg->ilg_fmode; 2173 if (ilg->ilg_filter == NULL) { 2174 orig_filter = NULL; 2175 } else { 2176 orig_filter = l_alloc_copy(ilg->ilg_filter); 2177 if (orig_filter == NULL) { 2178 mutex_exit(&connp->conn_lock); 2179 return (ENOMEM); 2180 } 2181 } 2182 } 2183 2184 /* 2185 * Alloc buffer to copy new state into (see below) before 2186 * we make any changes, so we can bail if it fails. 2187 */ 2188 if ((new_filter = l_alloc()) == NULL) { 2189 mutex_exit(&connp->conn_lock); 2190 err = ENOMEM; 2191 goto free_and_exit; 2192 } 2193 2194 if (insrcs == 0) { 2195 CLEAR_SLIST(ilg->ilg_filter); 2196 } else { 2197 slist_t *fp; 2198 if (ilg->ilg_filter == NULL) { 2199 fp = l_alloc(); 2200 if (fp == NULL) { 2201 if (ilgstat == ILGSTAT_NEW) 2202 ilg_delete(connp, ilg, NULL); 2203 mutex_exit(&connp->conn_lock); 2204 err = ENOMEM; 2205 goto free_and_exit; 2206 } 2207 } else { 2208 fp = ilg->ilg_filter; 2209 } 2210 for (i = 0; i < insrcs; i++) { 2211 if (isv4mapped) { 2212 sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i]; 2213 fp->sl_addr[i] = sin6->sin6_addr; 2214 } else { 2215 if (is_v4only_api) { 2216 addrp = &imsf->imsf_slist[i]; 2217 } else { 2218 sin = (struct sockaddr_in *) 2219 &gf->gf_slist[i]; 2220 addrp = &sin->sin_addr; 2221 } 2222 IN6_INADDR_TO_V4MAPPED(addrp, &fp->sl_addr[i]); 2223 } 2224 } 2225 fp->sl_numsrc = insrcs; 2226 ilg->ilg_filter = fp; 2227 } 2228 /* 2229 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 2230 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 2231 * So we need to translate here. 2232 */ 2233 ilg->ilg_fmode = (infmode == MCAST_INCLUDE) ? 2234 MODE_IS_INCLUDE : MODE_IS_EXCLUDE; 2235 2236 /* 2237 * Save copy of ilg's filter state to pass to other functions, 2238 * so we can release conn_lock now. 2239 */ 2240 new_fmode = ilg->ilg_fmode; 2241 l_copy(ilg->ilg_filter, new_filter); 2242 2243 mutex_exit(&connp->conn_lock); 2244 2245 err = ip_addmulti(grp, ipif, ilgstat, new_fmode, new_filter); 2246 if (err != 0) { 2247 /* 2248 * Restore the original filter state, or delete the 2249 * newly-created ilg. We need to look up the ilg 2250 * again, though, since we've not been holding the 2251 * conn_lock. 2252 */ 2253 mutex_enter(&connp->conn_lock); 2254 ilg = ilg_lookup_ipif(connp, grp, ipif); 2255 ASSERT(ilg != NULL); 2256 if (ilgstat == ILGSTAT_NEW) { 2257 ilg_delete(connp, ilg, NULL); 2258 } else { 2259 ilg->ilg_fmode = orig_fmode; 2260 if (SLIST_IS_EMPTY(orig_filter)) { 2261 CLEAR_SLIST(ilg->ilg_filter); 2262 } else { 2263 /* 2264 * We didn't free the filter, even if we 2265 * were trying to make the source list empty; 2266 * so if orig_filter isn't empty, the ilg 2267 * must still have a filter alloc'd. 2268 */ 2269 l_copy(orig_filter, ilg->ilg_filter); 2270 } 2271 } 2272 mutex_exit(&connp->conn_lock); 2273 } 2274 2275 free_and_exit: 2276 l_free(orig_filter); 2277 l_free(new_filter); 2278 2279 return (err); 2280 } 2281 2282 static int 2283 ip_set_srcfilter_v6(conn_t *connp, struct group_filter *gf, 2284 const struct in6_addr *grp, ill_t *ill) 2285 { 2286 ilg_t *ilg; 2287 int i, orig_ifindex, orig_fmode, new_fmode, err; 2288 slist_t *orig_filter = NULL; 2289 slist_t *new_filter = NULL; 2290 struct sockaddr_storage *sl; 2291 struct sockaddr_in6 *sin6; 2292 boolean_t leave_grp; 2293 ilg_stat_t ilgstat; 2294 2295 /* Make sure we can handle the source list */ 2296 if (gf->gf_numsrc > MAX_FILTER_SIZE) 2297 return (ENOBUFS); 2298 2299 /* 2300 * setting the filter to (INCLUDE, NULL) is treated 2301 * as a request to leave the group. 2302 */ 2303 leave_grp = (gf->gf_fmode == MCAST_INCLUDE && gf->gf_numsrc == 0); 2304 2305 ASSERT(IAM_WRITER_ILL(ill)); 2306 2307 /* 2308 * Use the ifindex to do the lookup. We can't use the ill 2309 * directly because ilg_ill could point to a different ill 2310 * if things have moved. 2311 */ 2312 orig_ifindex = ill->ill_phyint->phyint_ifindex; 2313 2314 mutex_enter(&connp->conn_lock); 2315 ilg = ilg_lookup_ill_index_v6(connp, grp, orig_ifindex); 2316 if (ilg == NULL) { 2317 /* 2318 * if the request was actually to leave, and we 2319 * didn't find an ilg, there's nothing to do. 2320 */ 2321 if (!leave_grp) 2322 ilg = conn_ilg_alloc(connp); 2323 if (leave_grp || ilg == NULL) { 2324 mutex_exit(&connp->conn_lock); 2325 return (leave_grp ? 0 : ENOMEM); 2326 } 2327 ilgstat = ILGSTAT_NEW; 2328 ilg->ilg_v6group = *grp; 2329 ilg->ilg_ipif = NULL; 2330 /* 2331 * Choose our target ill to join on. This might be 2332 * different from the ill we've been given if it's 2333 * currently down and part of a group. 2334 * 2335 * new ill is not refheld; we are writer. 2336 */ 2337 ill = ip_choose_multi_ill(ill, grp); 2338 ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED)); 2339 ilg->ilg_ill = ill; 2340 /* 2341 * Remember the index that we joined on, so that we can 2342 * successfully delete them later on and also search for 2343 * duplicates if the application wants to join again. 2344 */ 2345 ilg->ilg_orig_ifindex = orig_ifindex; 2346 } else if (leave_grp) { 2347 /* 2348 * Use the ilg's current ill for the deletion, 2349 * we might have failed over. 2350 */ 2351 ill = ilg->ilg_ill; 2352 ilg_delete(connp, ilg, NULL); 2353 mutex_exit(&connp->conn_lock); 2354 (void) ip_delmulti_v6(grp, ill, orig_ifindex, 2355 connp->conn_zoneid, B_FALSE, B_TRUE); 2356 return (0); 2357 } else { 2358 ilgstat = ILGSTAT_CHANGE; 2359 /* 2360 * The current ill might be different from the one we were 2361 * asked to join on (if failover has occurred); we should 2362 * join on the ill stored in the ilg. The original ill 2363 * is noted in ilg_orig_ifindex, which matched our request. 2364 */ 2365 ill = ilg->ilg_ill; 2366 /* preserve existing state in case ip_addmulti() fails */ 2367 orig_fmode = ilg->ilg_fmode; 2368 if (ilg->ilg_filter == NULL) { 2369 orig_filter = NULL; 2370 } else { 2371 orig_filter = l_alloc_copy(ilg->ilg_filter); 2372 if (orig_filter == NULL) { 2373 mutex_exit(&connp->conn_lock); 2374 return (ENOMEM); 2375 } 2376 } 2377 } 2378 2379 /* 2380 * Alloc buffer to copy new state into (see below) before 2381 * we make any changes, so we can bail if it fails. 2382 */ 2383 if ((new_filter = l_alloc()) == NULL) { 2384 mutex_exit(&connp->conn_lock); 2385 err = ENOMEM; 2386 goto free_and_exit; 2387 } 2388 2389 if (gf->gf_numsrc == 0) { 2390 CLEAR_SLIST(ilg->ilg_filter); 2391 } else { 2392 slist_t *fp; 2393 if (ilg->ilg_filter == NULL) { 2394 fp = l_alloc(); 2395 if (fp == NULL) { 2396 if (ilgstat == ILGSTAT_NEW) 2397 ilg_delete(connp, ilg, NULL); 2398 mutex_exit(&connp->conn_lock); 2399 err = ENOMEM; 2400 goto free_and_exit; 2401 } 2402 } else { 2403 fp = ilg->ilg_filter; 2404 } 2405 for (i = 0, sl = gf->gf_slist; i < gf->gf_numsrc; i++, sl++) { 2406 sin6 = (struct sockaddr_in6 *)sl; 2407 fp->sl_addr[i] = sin6->sin6_addr; 2408 } 2409 fp->sl_numsrc = gf->gf_numsrc; 2410 ilg->ilg_filter = fp; 2411 } 2412 /* 2413 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 2414 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 2415 * So we need to translate here. 2416 */ 2417 ilg->ilg_fmode = (gf->gf_fmode == MCAST_INCLUDE) ? 2418 MODE_IS_INCLUDE : MODE_IS_EXCLUDE; 2419 2420 /* 2421 * Save copy of ilg's filter state to pass to other functions, 2422 * so we can release conn_lock now. 2423 */ 2424 new_fmode = ilg->ilg_fmode; 2425 l_copy(ilg->ilg_filter, new_filter); 2426 2427 mutex_exit(&connp->conn_lock); 2428 2429 err = ip_addmulti_v6(grp, ill, orig_ifindex, connp->conn_zoneid, 2430 ilgstat, new_fmode, new_filter); 2431 if (err != 0) { 2432 /* 2433 * Restore the original filter state, or delete the 2434 * newly-created ilg. We need to look up the ilg 2435 * again, though, since we've not been holding the 2436 * conn_lock. 2437 */ 2438 mutex_enter(&connp->conn_lock); 2439 ilg = ilg_lookup_ill_index_v6(connp, grp, orig_ifindex); 2440 ASSERT(ilg != NULL); 2441 if (ilgstat == ILGSTAT_NEW) { 2442 ilg_delete(connp, ilg, NULL); 2443 } else { 2444 ilg->ilg_fmode = orig_fmode; 2445 if (SLIST_IS_EMPTY(orig_filter)) { 2446 CLEAR_SLIST(ilg->ilg_filter); 2447 } else { 2448 /* 2449 * We didn't free the filter, even if we 2450 * were trying to make the source list empty; 2451 * so if orig_filter isn't empty, the ilg 2452 * must still have a filter alloc'd. 2453 */ 2454 l_copy(orig_filter, ilg->ilg_filter); 2455 } 2456 } 2457 mutex_exit(&connp->conn_lock); 2458 } 2459 2460 free_and_exit: 2461 l_free(orig_filter); 2462 l_free(new_filter); 2463 2464 return (err); 2465 } 2466 2467 /* 2468 * Process the SIOC[GS]MSFILTER and SIOC[GS]IPMSFILTER ioctls. 2469 */ 2470 /* ARGSUSED */ 2471 int 2472 ip_sioctl_msfilter(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, 2473 ip_ioctl_cmd_t *ipip, void *ifreq) 2474 { 2475 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 2476 /* existence verified in ip_wput_nondata() */ 2477 mblk_t *data_mp = mp->b_cont->b_cont; 2478 int datalen, err, cmd, minsize; 2479 int expsize = 0; 2480 conn_t *connp; 2481 boolean_t isv6, is_v4only_api, getcmd; 2482 struct sockaddr_in *gsin; 2483 struct sockaddr_in6 *gsin6; 2484 ipaddr_t v4grp; 2485 in6_addr_t v6grp; 2486 struct group_filter *gf = NULL; 2487 struct ip_msfilter *imsf = NULL; 2488 mblk_t *ndp; 2489 2490 if (data_mp->b_cont != NULL) { 2491 if ((ndp = msgpullup(data_mp, -1)) == NULL) 2492 return (ENOMEM); 2493 freemsg(data_mp); 2494 data_mp = ndp; 2495 mp->b_cont->b_cont = data_mp; 2496 } 2497 2498 cmd = iocp->ioc_cmd; 2499 getcmd = (cmd == SIOCGIPMSFILTER || cmd == SIOCGMSFILTER); 2500 is_v4only_api = (cmd == SIOCGIPMSFILTER || cmd == SIOCSIPMSFILTER); 2501 minsize = (is_v4only_api) ? IP_MSFILTER_SIZE(0) : GROUP_FILTER_SIZE(0); 2502 datalen = MBLKL(data_mp); 2503 2504 if (datalen < minsize) 2505 return (EINVAL); 2506 2507 /* 2508 * now we know we have at least have the initial structure, 2509 * but need to check for the source list array. 2510 */ 2511 if (is_v4only_api) { 2512 imsf = (struct ip_msfilter *)data_mp->b_rptr; 2513 isv6 = B_FALSE; 2514 expsize = IP_MSFILTER_SIZE(imsf->imsf_numsrc); 2515 } else { 2516 gf = (struct group_filter *)data_mp->b_rptr; 2517 if (gf->gf_group.ss_family == AF_INET6) { 2518 gsin6 = (struct sockaddr_in6 *)&gf->gf_group; 2519 isv6 = !(IN6_IS_ADDR_V4MAPPED(&gsin6->sin6_addr)); 2520 } else { 2521 isv6 = B_FALSE; 2522 } 2523 expsize = GROUP_FILTER_SIZE(gf->gf_numsrc); 2524 } 2525 if (datalen < expsize) 2526 return (EINVAL); 2527 2528 connp = Q_TO_CONN(q); 2529 2530 /* operation not supported on the virtual network interface */ 2531 if (IS_VNI(ipif->ipif_ill)) 2532 return (EINVAL); 2533 2534 if (isv6) { 2535 ill_t *ill = ipif->ipif_ill; 2536 ill_refhold(ill); 2537 2538 gsin6 = (struct sockaddr_in6 *)&gf->gf_group; 2539 v6grp = gsin6->sin6_addr; 2540 if (getcmd) 2541 err = ip_get_srcfilter_v6(connp, gf, &v6grp, ill); 2542 else 2543 err = ip_set_srcfilter_v6(connp, gf, &v6grp, ill); 2544 2545 ill_refrele(ill); 2546 } else { 2547 boolean_t isv4mapped = B_FALSE; 2548 if (is_v4only_api) { 2549 v4grp = (ipaddr_t)imsf->imsf_multiaddr.s_addr; 2550 } else { 2551 if (gf->gf_group.ss_family == AF_INET) { 2552 gsin = (struct sockaddr_in *)&gf->gf_group; 2553 v4grp = (ipaddr_t)gsin->sin_addr.s_addr; 2554 } else { 2555 gsin6 = (struct sockaddr_in6 *)&gf->gf_group; 2556 IN6_V4MAPPED_TO_IPADDR(&gsin6->sin6_addr, 2557 v4grp); 2558 isv4mapped = B_TRUE; 2559 } 2560 } 2561 if (getcmd) 2562 err = ip_get_srcfilter(connp, gf, imsf, v4grp, ipif, 2563 isv4mapped); 2564 else 2565 err = ip_set_srcfilter(connp, gf, imsf, v4grp, ipif, 2566 isv4mapped); 2567 } 2568 2569 return (err); 2570 } 2571 2572 /* 2573 * Finds the ipif based on information in the ioctl headers. Needed to make 2574 * ip_process_ioctl() happy (it needs to know the ipif for IPI_WR-flagged 2575 * ioctls prior to calling the ioctl's handler function). Somewhat analogous 2576 * to ip_extract_lifreq_cmn() and ip_extract_tunreq(). 2577 */ 2578 int 2579 ip_extract_msfilter(queue_t *q, mblk_t *mp, ipif_t **ipifpp, ipsq_func_t func) 2580 { 2581 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 2582 int cmd = iocp->ioc_cmd, err = 0; 2583 conn_t *connp; 2584 ipif_t *ipif; 2585 /* caller has verified this mblk exists */ 2586 char *dbuf = (char *)mp->b_cont->b_cont->b_rptr; 2587 struct ip_msfilter *imsf; 2588 struct group_filter *gf; 2589 ipaddr_t v4addr, v4grp; 2590 in6_addr_t v6grp; 2591 uint32_t index; 2592 zoneid_t zoneid; 2593 ip_stack_t *ipst; 2594 2595 connp = Q_TO_CONN(q); 2596 zoneid = connp->conn_zoneid; 2597 ipst = connp->conn_netstack->netstack_ip; 2598 2599 /* don't allow multicast operations on a tcp conn */ 2600 if (IPCL_IS_TCP(connp)) 2601 return (ENOPROTOOPT); 2602 2603 if (cmd == SIOCSIPMSFILTER || cmd == SIOCGIPMSFILTER) { 2604 /* don't allow v4-specific ioctls on v6 socket */ 2605 if (connp->conn_af_isv6) 2606 return (EAFNOSUPPORT); 2607 2608 imsf = (struct ip_msfilter *)dbuf; 2609 v4addr = imsf->imsf_interface.s_addr; 2610 v4grp = imsf->imsf_multiaddr.s_addr; 2611 if (v4addr == INADDR_ANY) { 2612 ipif = ipif_lookup_group(v4grp, zoneid, ipst); 2613 if (ipif == NULL) 2614 err = EADDRNOTAVAIL; 2615 } else { 2616 ipif = ipif_lookup_addr(v4addr, NULL, zoneid, q, mp, 2617 func, &err, ipst); 2618 } 2619 } else { 2620 boolean_t isv6 = B_FALSE; 2621 gf = (struct group_filter *)dbuf; 2622 index = gf->gf_interface; 2623 if (gf->gf_group.ss_family == AF_INET6) { 2624 struct sockaddr_in6 *sin6; 2625 sin6 = (struct sockaddr_in6 *)&gf->gf_group; 2626 v6grp = sin6->sin6_addr; 2627 if (IN6_IS_ADDR_V4MAPPED(&v6grp)) 2628 IN6_V4MAPPED_TO_IPADDR(&v6grp, v4grp); 2629 else 2630 isv6 = B_TRUE; 2631 } else if (gf->gf_group.ss_family == AF_INET) { 2632 struct sockaddr_in *sin; 2633 sin = (struct sockaddr_in *)&gf->gf_group; 2634 v4grp = sin->sin_addr.s_addr; 2635 } else { 2636 return (EAFNOSUPPORT); 2637 } 2638 if (index == 0) { 2639 if (isv6) { 2640 ipif = ipif_lookup_group_v6(&v6grp, zoneid, 2641 ipst); 2642 } else { 2643 ipif = ipif_lookup_group(v4grp, zoneid, ipst); 2644 } 2645 if (ipif == NULL) 2646 err = EADDRNOTAVAIL; 2647 } else { 2648 ipif = ipif_lookup_on_ifindex(index, isv6, zoneid, 2649 q, mp, func, &err, ipst); 2650 } 2651 } 2652 2653 *ipifpp = ipif; 2654 return (err); 2655 } 2656 2657 /* 2658 * The structures used for the SIOC*MSFILTER ioctls usually must be copied 2659 * in in two stages, as the first copyin tells us the size of the attached 2660 * source buffer. This function is called by ip_wput_nondata() after the 2661 * first copyin has completed; it figures out how big the second stage 2662 * needs to be, and kicks it off. 2663 * 2664 * In some cases (numsrc < 2), the second copyin is not needed as the 2665 * first one gets a complete structure containing 1 source addr. 2666 * 2667 * The function returns 0 if a second copyin has been started (i.e. there's 2668 * no more work to be done right now), or 1 if the second copyin is not 2669 * needed and ip_wput_nondata() can continue its processing. 2670 */ 2671 int 2672 ip_copyin_msfilter(queue_t *q, mblk_t *mp) 2673 { 2674 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 2675 int cmd = iocp->ioc_cmd; 2676 /* validity of this checked in ip_wput_nondata() */ 2677 mblk_t *mp1 = mp->b_cont->b_cont; 2678 int copysize = 0; 2679 int offset; 2680 2681 if (cmd == SIOCSMSFILTER || cmd == SIOCGMSFILTER) { 2682 struct group_filter *gf = (struct group_filter *)mp1->b_rptr; 2683 if (gf->gf_numsrc >= 2) { 2684 offset = sizeof (struct group_filter); 2685 copysize = GROUP_FILTER_SIZE(gf->gf_numsrc) - offset; 2686 } 2687 } else { 2688 struct ip_msfilter *imsf = (struct ip_msfilter *)mp1->b_rptr; 2689 if (imsf->imsf_numsrc >= 2) { 2690 offset = sizeof (struct ip_msfilter); 2691 copysize = IP_MSFILTER_SIZE(imsf->imsf_numsrc) - offset; 2692 } 2693 } 2694 if (copysize > 0) { 2695 mi_copyin_n(q, mp, offset, copysize); 2696 return (0); 2697 } 2698 return (1); 2699 } 2700 2701 /* 2702 * Handle the following optmgmt: 2703 * IP_ADD_MEMBERSHIP must not have joined already 2704 * MCAST_JOIN_GROUP must not have joined already 2705 * IP_BLOCK_SOURCE must have joined already 2706 * MCAST_BLOCK_SOURCE must have joined already 2707 * IP_JOIN_SOURCE_GROUP may have joined already 2708 * MCAST_JOIN_SOURCE_GROUP may have joined already 2709 * 2710 * fmode and src parameters may be used to determine which option is 2711 * being set, as follows (the IP_* and MCAST_* versions of each option 2712 * are functionally equivalent): 2713 * opt fmode src 2714 * IP_ADD_MEMBERSHIP MODE_IS_EXCLUDE INADDR_ANY 2715 * MCAST_JOIN_GROUP MODE_IS_EXCLUDE INADDR_ANY 2716 * IP_BLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 2717 * MCAST_BLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 2718 * IP_JOIN_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 2719 * MCAST_JOIN_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 2720 * 2721 * Changing the filter mode is not allowed; if a matching ilg already 2722 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 2723 * 2724 * Verifies that there is a source address of appropriate scope for 2725 * the group; if not, EADDRNOTAVAIL is returned. 2726 * 2727 * The interface to be used may be identified by an address or by an 2728 * index. A pointer to the index is passed; if it is NULL, use the 2729 * address, otherwise, use the index. 2730 */ 2731 int 2732 ip_opt_add_group(conn_t *connp, boolean_t checkonly, ipaddr_t group, 2733 ipaddr_t ifaddr, uint_t *ifindexp, mcast_record_t fmode, ipaddr_t src, 2734 mblk_t *first_mp) 2735 { 2736 ipif_t *ipif; 2737 ipsq_t *ipsq; 2738 int err = 0; 2739 ill_t *ill; 2740 2741 err = ip_opt_check(connp, group, src, ifaddr, ifindexp, first_mp, 2742 ip_restart_optmgmt, &ipif); 2743 if (err != 0) { 2744 if (err != EINPROGRESS) { 2745 ip1dbg(("ip_opt_add_group: no ipif for group 0x%x, " 2746 "ifaddr 0x%x, ifindex %d\n", ntohl(group), 2747 ntohl(ifaddr), (ifindexp == NULL) ? 0 : *ifindexp)); 2748 } 2749 return (err); 2750 } 2751 ASSERT(ipif != NULL); 2752 2753 ill = ipif->ipif_ill; 2754 /* Operation not supported on a virtual network interface */ 2755 if (IS_VNI(ill)) { 2756 ipif_refrele(ipif); 2757 return (EINVAL); 2758 } 2759 2760 if (checkonly) { 2761 /* 2762 * do not do operation, just pretend to - new T_CHECK 2763 * semantics. The error return case above if encountered 2764 * considered a good enough "check" here. 2765 */ 2766 ipif_refrele(ipif); 2767 return (0); 2768 } 2769 2770 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, ipsq, 2771 NEW_OP); 2772 2773 /* unspecified source addr => no source filtering */ 2774 err = ilg_add(connp, group, ipif, fmode, src); 2775 2776 IPSQ_EXIT(ipsq); 2777 2778 ipif_refrele(ipif); 2779 return (err); 2780 } 2781 2782 /* 2783 * Handle the following optmgmt: 2784 * IPV6_JOIN_GROUP must not have joined already 2785 * MCAST_JOIN_GROUP must not have joined already 2786 * MCAST_BLOCK_SOURCE must have joined already 2787 * MCAST_JOIN_SOURCE_GROUP may have joined already 2788 * 2789 * fmode and src parameters may be used to determine which option is 2790 * being set, as follows (IPV6_JOIN_GROUP and MCAST_JOIN_GROUP options 2791 * are functionally equivalent): 2792 * opt fmode v6src 2793 * IPV6_JOIN_GROUP MODE_IS_EXCLUDE unspecified 2794 * MCAST_JOIN_GROUP MODE_IS_EXCLUDE unspecified 2795 * MCAST_BLOCK_SOURCE MODE_IS_EXCLUDE v6 addr 2796 * MCAST_JOIN_SOURCE_GROUP MODE_IS_INCLUDE v6 addr 2797 * 2798 * Changing the filter mode is not allowed; if a matching ilg already 2799 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 2800 * 2801 * Verifies that there is a source address of appropriate scope for 2802 * the group; if not, EADDRNOTAVAIL is returned. 2803 * 2804 * Handles IPv4-mapped IPv6 multicast addresses by associating them 2805 * with the link-local ipif. Assumes that if v6group is v4-mapped, 2806 * v6src is also v4-mapped. 2807 */ 2808 int 2809 ip_opt_add_group_v6(conn_t *connp, boolean_t checkonly, 2810 const in6_addr_t *v6group, int ifindex, mcast_record_t fmode, 2811 const in6_addr_t *v6src, mblk_t *first_mp) 2812 { 2813 ill_t *ill; 2814 ipif_t *ipif; 2815 char buf[INET6_ADDRSTRLEN]; 2816 ipaddr_t v4group, v4src; 2817 boolean_t isv6; 2818 ipsq_t *ipsq; 2819 int err; 2820 2821 err = ip_opt_check_v6(connp, v6group, &v4group, v6src, &v4src, &isv6, 2822 ifindex, first_mp, ip_restart_optmgmt, &ill, &ipif); 2823 if (err != 0) { 2824 if (err != EINPROGRESS) { 2825 ip1dbg(("ip_opt_add_group_v6: no ill for group %s/" 2826 "index %d\n", inet_ntop(AF_INET6, v6group, buf, 2827 sizeof (buf)), ifindex)); 2828 } 2829 return (err); 2830 } 2831 ASSERT((!isv6 && ipif != NULL) || (isv6 && ill != NULL)); 2832 2833 /* operation is not supported on the virtual network interface */ 2834 if (isv6) { 2835 if (IS_VNI(ill)) { 2836 ill_refrele(ill); 2837 return (EINVAL); 2838 } 2839 } else { 2840 if (IS_VNI(ipif->ipif_ill)) { 2841 ipif_refrele(ipif); 2842 return (EINVAL); 2843 } 2844 } 2845 2846 if (checkonly) { 2847 /* 2848 * do not do operation, just pretend to - new T_CHECK 2849 * semantics. The error return case above if encountered 2850 * considered a good enough "check" here. 2851 */ 2852 if (isv6) 2853 ill_refrele(ill); 2854 else 2855 ipif_refrele(ipif); 2856 return (0); 2857 } 2858 2859 if (!isv6) { 2860 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, 2861 ipsq, NEW_OP); 2862 err = ilg_add(connp, v4group, ipif, fmode, v4src); 2863 IPSQ_EXIT(ipsq); 2864 ipif_refrele(ipif); 2865 } else { 2866 IPSQ_ENTER_ILL(ill, connp, first_mp, ip_restart_optmgmt, 2867 ipsq, NEW_OP); 2868 err = ilg_add_v6(connp, v6group, ill, fmode, v6src); 2869 IPSQ_EXIT(ipsq); 2870 ill_refrele(ill); 2871 } 2872 2873 return (err); 2874 } 2875 2876 static int 2877 ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group, ipif_t *ipif, 2878 mcast_record_t fmode, ipaddr_t src) 2879 { 2880 ilg_t *ilg; 2881 in6_addr_t v6src; 2882 boolean_t leaving = B_FALSE; 2883 2884 ASSERT(IAM_WRITER_IPIF(ipif)); 2885 2886 /* 2887 * The ilg is valid only while we hold the conn lock. Once we drop 2888 * the lock, another thread can locate another ilg on this connp, 2889 * but on a different ipif, and delete it, and cause the ilg array 2890 * to be reallocated and copied. Hence do the ilg_delete before 2891 * dropping the lock. 2892 */ 2893 mutex_enter(&connp->conn_lock); 2894 ilg = ilg_lookup_ipif(connp, group, ipif); 2895 if ((ilg == NULL) || (ilg->ilg_flags & ILG_DELETED)) { 2896 mutex_exit(&connp->conn_lock); 2897 return (EADDRNOTAVAIL); 2898 } 2899 2900 /* 2901 * Decide if we're actually deleting the ilg or just removing a 2902 * source filter address; if just removing an addr, make sure we 2903 * aren't trying to change the filter mode, and that the addr is 2904 * actually in our filter list already. If we're removing the 2905 * last src in an include list, just delete the ilg. 2906 */ 2907 if (src == INADDR_ANY) { 2908 v6src = ipv6_all_zeros; 2909 leaving = B_TRUE; 2910 } else { 2911 int err = 0; 2912 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 2913 if (fmode != ilg->ilg_fmode) 2914 err = EINVAL; 2915 else if (ilg->ilg_filter == NULL || 2916 !list_has_addr(ilg->ilg_filter, &v6src)) 2917 err = EADDRNOTAVAIL; 2918 if (err != 0) { 2919 mutex_exit(&connp->conn_lock); 2920 return (err); 2921 } 2922 if (fmode == MODE_IS_INCLUDE && 2923 ilg->ilg_filter->sl_numsrc == 1) { 2924 v6src = ipv6_all_zeros; 2925 leaving = B_TRUE; 2926 } 2927 } 2928 2929 ilg_delete(connp, ilg, &v6src); 2930 mutex_exit(&connp->conn_lock); 2931 2932 (void) ip_delmulti(group, ipif, B_FALSE, leaving); 2933 return (0); 2934 } 2935 2936 static int 2937 ip_opt_delete_group_excl_v6(conn_t *connp, const in6_addr_t *v6group, 2938 ill_t *ill, mcast_record_t fmode, const in6_addr_t *v6src) 2939 { 2940 ilg_t *ilg; 2941 ill_t *ilg_ill; 2942 uint_t ilg_orig_ifindex; 2943 boolean_t leaving = B_TRUE; 2944 2945 ASSERT(IAM_WRITER_ILL(ill)); 2946 2947 /* 2948 * Use the index that we originally used to join. We can't 2949 * use the ill directly because ilg_ill could point to 2950 * a new ill if things have moved. 2951 */ 2952 mutex_enter(&connp->conn_lock); 2953 ilg = ilg_lookup_ill_index_v6(connp, v6group, 2954 ill->ill_phyint->phyint_ifindex); 2955 if ((ilg == NULL) || (ilg->ilg_flags & ILG_DELETED)) { 2956 mutex_exit(&connp->conn_lock); 2957 return (EADDRNOTAVAIL); 2958 } 2959 2960 /* 2961 * Decide if we're actually deleting the ilg or just removing a 2962 * source filter address; if just removing an addr, make sure we 2963 * aren't trying to change the filter mode, and that the addr is 2964 * actually in our filter list already. If we're removing the 2965 * last src in an include list, just delete the ilg. 2966 */ 2967 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2968 int err = 0; 2969 if (fmode != ilg->ilg_fmode) 2970 err = EINVAL; 2971 else if (ilg->ilg_filter == NULL || 2972 !list_has_addr(ilg->ilg_filter, v6src)) 2973 err = EADDRNOTAVAIL; 2974 if (err != 0) { 2975 mutex_exit(&connp->conn_lock); 2976 return (err); 2977 } 2978 if (fmode == MODE_IS_INCLUDE && 2979 ilg->ilg_filter->sl_numsrc == 1) 2980 v6src = NULL; 2981 else 2982 leaving = B_FALSE; 2983 } 2984 2985 ilg_ill = ilg->ilg_ill; 2986 ilg_orig_ifindex = ilg->ilg_orig_ifindex; 2987 ilg_delete(connp, ilg, v6src); 2988 mutex_exit(&connp->conn_lock); 2989 (void) ip_delmulti_v6(v6group, ilg_ill, ilg_orig_ifindex, 2990 connp->conn_zoneid, B_FALSE, leaving); 2991 2992 return (0); 2993 } 2994 2995 /* 2996 * Handle the following optmgmt: 2997 * IP_DROP_MEMBERSHIP will leave 2998 * MCAST_LEAVE_GROUP will leave 2999 * IP_UNBLOCK_SOURCE will not leave 3000 * MCAST_UNBLOCK_SOURCE will not leave 3001 * IP_LEAVE_SOURCE_GROUP may leave (if leaving last source) 3002 * MCAST_LEAVE_SOURCE_GROUP may leave (if leaving last source) 3003 * 3004 * fmode and src parameters may be used to determine which option is 3005 * being set, as follows (the IP_* and MCAST_* versions of each option 3006 * are functionally equivalent): 3007 * opt fmode src 3008 * IP_DROP_MEMBERSHIP MODE_IS_INCLUDE INADDR_ANY 3009 * MCAST_LEAVE_GROUP MODE_IS_INCLUDE INADDR_ANY 3010 * IP_UNBLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 3011 * MCAST_UNBLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 3012 * IP_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 3013 * MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 3014 * 3015 * Changing the filter mode is not allowed; if a matching ilg already 3016 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 3017 * 3018 * The interface to be used may be identified by an address or by an 3019 * index. A pointer to the index is passed; if it is NULL, use the 3020 * address, otherwise, use the index. 3021 */ 3022 int 3023 ip_opt_delete_group(conn_t *connp, boolean_t checkonly, ipaddr_t group, 3024 ipaddr_t ifaddr, uint_t *ifindexp, mcast_record_t fmode, ipaddr_t src, 3025 mblk_t *first_mp) 3026 { 3027 ipif_t *ipif; 3028 ipsq_t *ipsq; 3029 int err; 3030 ill_t *ill; 3031 3032 err = ip_opt_check(connp, group, src, ifaddr, ifindexp, first_mp, 3033 ip_restart_optmgmt, &ipif); 3034 if (err != 0) { 3035 if (err != EINPROGRESS) { 3036 ip1dbg(("ip_opt_delete_group: no ipif for group " 3037 "0x%x, ifaddr 0x%x\n", 3038 (int)ntohl(group), (int)ntohl(ifaddr))); 3039 } 3040 return (err); 3041 } 3042 ASSERT(ipif != NULL); 3043 3044 ill = ipif->ipif_ill; 3045 /* Operation not supported on a virtual network interface */ 3046 if (IS_VNI(ill)) { 3047 ipif_refrele(ipif); 3048 return (EINVAL); 3049 } 3050 3051 if (checkonly) { 3052 /* 3053 * do not do operation, just pretend to - new T_CHECK 3054 * semantics. The error return case above if encountered 3055 * considered a good enough "check" here. 3056 */ 3057 ipif_refrele(ipif); 3058 return (0); 3059 } 3060 3061 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, ipsq, 3062 NEW_OP); 3063 err = ip_opt_delete_group_excl(connp, group, ipif, fmode, src); 3064 IPSQ_EXIT(ipsq); 3065 3066 ipif_refrele(ipif); 3067 return (err); 3068 } 3069 3070 /* 3071 * Handle the following optmgmt: 3072 * IPV6_LEAVE_GROUP will leave 3073 * MCAST_LEAVE_GROUP will leave 3074 * MCAST_UNBLOCK_SOURCE will not leave 3075 * MCAST_LEAVE_SOURCE_GROUP may leave (if leaving last source) 3076 * 3077 * fmode and src parameters may be used to determine which option is 3078 * being set, as follows (IPV6_LEAVE_GROUP and MCAST_LEAVE_GROUP options 3079 * are functionally equivalent): 3080 * opt fmode v6src 3081 * IPV6_LEAVE_GROUP MODE_IS_INCLUDE unspecified 3082 * MCAST_LEAVE_GROUP MODE_IS_INCLUDE unspecified 3083 * MCAST_UNBLOCK_SOURCE MODE_IS_EXCLUDE v6 addr 3084 * MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE v6 addr 3085 * 3086 * Changing the filter mode is not allowed; if a matching ilg already 3087 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 3088 * 3089 * Handles IPv4-mapped IPv6 multicast addresses by associating them 3090 * with the link-local ipif. Assumes that if v6group is v4-mapped, 3091 * v6src is also v4-mapped. 3092 */ 3093 int 3094 ip_opt_delete_group_v6(conn_t *connp, boolean_t checkonly, 3095 const in6_addr_t *v6group, int ifindex, mcast_record_t fmode, 3096 const in6_addr_t *v6src, mblk_t *first_mp) 3097 { 3098 ill_t *ill; 3099 ipif_t *ipif; 3100 char buf[INET6_ADDRSTRLEN]; 3101 ipaddr_t v4group, v4src; 3102 boolean_t isv6; 3103 ipsq_t *ipsq; 3104 int err; 3105 3106 err = ip_opt_check_v6(connp, v6group, &v4group, v6src, &v4src, &isv6, 3107 ifindex, first_mp, ip_restart_optmgmt, &ill, &ipif); 3108 if (err != 0) { 3109 if (err != EINPROGRESS) { 3110 ip1dbg(("ip_opt_delete_group_v6: no ill for group %s/" 3111 "index %d\n", inet_ntop(AF_INET6, v6group, buf, 3112 sizeof (buf)), ifindex)); 3113 } 3114 return (err); 3115 } 3116 ASSERT((isv6 && ill != NULL) || (!isv6 && ipif != NULL)); 3117 3118 /* operation is not supported on the virtual network interface */ 3119 if (isv6) { 3120 if (IS_VNI(ill)) { 3121 ill_refrele(ill); 3122 return (EINVAL); 3123 } 3124 } else { 3125 if (IS_VNI(ipif->ipif_ill)) { 3126 ipif_refrele(ipif); 3127 return (EINVAL); 3128 } 3129 } 3130 3131 if (checkonly) { 3132 /* 3133 * do not do operation, just pretend to - new T_CHECK 3134 * semantics. The error return case above if encountered 3135 * considered a good enough "check" here. 3136 */ 3137 if (isv6) 3138 ill_refrele(ill); 3139 else 3140 ipif_refrele(ipif); 3141 return (0); 3142 } 3143 3144 if (!isv6) { 3145 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, 3146 ipsq, NEW_OP); 3147 err = ip_opt_delete_group_excl(connp, v4group, ipif, fmode, 3148 v4src); 3149 IPSQ_EXIT(ipsq); 3150 ipif_refrele(ipif); 3151 } else { 3152 IPSQ_ENTER_ILL(ill, connp, first_mp, ip_restart_optmgmt, 3153 ipsq, NEW_OP); 3154 err = ip_opt_delete_group_excl_v6(connp, v6group, ill, fmode, 3155 v6src); 3156 IPSQ_EXIT(ipsq); 3157 ill_refrele(ill); 3158 } 3159 3160 return (err); 3161 } 3162 3163 /* 3164 * Group mgmt for upper conn that passes things down 3165 * to the interface multicast list (and DLPI) 3166 * These routines can handle new style options that specify an interface name 3167 * as opposed to an interface address (needed for general handling of 3168 * unnumbered interfaces.) 3169 */ 3170 3171 /* 3172 * Add a group to an upper conn group data structure and pass things down 3173 * to the interface multicast list (and DLPI) 3174 */ 3175 static int 3176 ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif, mcast_record_t fmode, 3177 ipaddr_t src) 3178 { 3179 int error = 0; 3180 ill_t *ill; 3181 ilg_t *ilg; 3182 ilg_stat_t ilgstat; 3183 slist_t *new_filter = NULL; 3184 int new_fmode; 3185 3186 ASSERT(IAM_WRITER_IPIF(ipif)); 3187 3188 ill = ipif->ipif_ill; 3189 3190 if (!(ill->ill_flags & ILLF_MULTICAST)) 3191 return (EADDRNOTAVAIL); 3192 3193 /* 3194 * conn_ilg[] is protected by conn_lock. Need to hold the conn_lock 3195 * to walk the conn_ilg[] list in ilg_lookup_ipif(); also needed to 3196 * serialize 2 threads doing join (sock, group1, hme0:0) and 3197 * (sock, group2, hme1:0) where hme0 and hme1 map to different ipsqs, 3198 * but both operations happen on the same conn. 3199 */ 3200 mutex_enter(&connp->conn_lock); 3201 ilg = ilg_lookup_ipif(connp, group, ipif); 3202 3203 /* 3204 * Depending on the option we're handling, may or may not be okay 3205 * if group has already been added. Figure out our rules based 3206 * on fmode and src params. Also make sure there's enough room 3207 * in the filter if we're adding a source to an existing filter. 3208 */ 3209 if (src == INADDR_ANY) { 3210 /* we're joining for all sources, must not have joined */ 3211 if (ilg != NULL) 3212 error = EADDRINUSE; 3213 } else { 3214 if (fmode == MODE_IS_EXCLUDE) { 3215 /* (excl {addr}) => block source, must have joined */ 3216 if (ilg == NULL) 3217 error = EADDRNOTAVAIL; 3218 } 3219 /* (incl {addr}) => join source, may have joined */ 3220 3221 if (ilg != NULL && 3222 SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE) 3223 error = ENOBUFS; 3224 } 3225 if (error != 0) { 3226 mutex_exit(&connp->conn_lock); 3227 return (error); 3228 } 3229 3230 ASSERT(!(ipif->ipif_state_flags & IPIF_CONDEMNED)); 3231 3232 /* 3233 * Alloc buffer to copy new state into (see below) before 3234 * we make any changes, so we can bail if it fails. 3235 */ 3236 if ((new_filter = l_alloc()) == NULL) { 3237 mutex_exit(&connp->conn_lock); 3238 return (ENOMEM); 3239 } 3240 3241 if (ilg == NULL) { 3242 ilgstat = ILGSTAT_NEW; 3243 if ((ilg = conn_ilg_alloc(connp)) == NULL) { 3244 mutex_exit(&connp->conn_lock); 3245 l_free(new_filter); 3246 return (ENOMEM); 3247 } 3248 if (src != INADDR_ANY) { 3249 ilg->ilg_filter = l_alloc(); 3250 if (ilg->ilg_filter == NULL) { 3251 ilg_delete(connp, ilg, NULL); 3252 mutex_exit(&connp->conn_lock); 3253 l_free(new_filter); 3254 return (ENOMEM); 3255 } 3256 ilg->ilg_filter->sl_numsrc = 1; 3257 IN6_IPADDR_TO_V4MAPPED(src, 3258 &ilg->ilg_filter->sl_addr[0]); 3259 } 3260 if (group == INADDR_ANY) { 3261 ilg->ilg_v6group = ipv6_all_zeros; 3262 } else { 3263 IN6_IPADDR_TO_V4MAPPED(group, &ilg->ilg_v6group); 3264 } 3265 ilg->ilg_ipif = ipif; 3266 ilg->ilg_ill = NULL; 3267 ilg->ilg_orig_ifindex = 0; 3268 ilg->ilg_fmode = fmode; 3269 } else { 3270 int index; 3271 in6_addr_t v6src; 3272 ilgstat = ILGSTAT_CHANGE; 3273 if (ilg->ilg_fmode != fmode || src == INADDR_ANY) { 3274 mutex_exit(&connp->conn_lock); 3275 l_free(new_filter); 3276 return (EINVAL); 3277 } 3278 if (ilg->ilg_filter == NULL) { 3279 ilg->ilg_filter = l_alloc(); 3280 if (ilg->ilg_filter == NULL) { 3281 mutex_exit(&connp->conn_lock); 3282 l_free(new_filter); 3283 return (ENOMEM); 3284 } 3285 } 3286 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 3287 if (list_has_addr(ilg->ilg_filter, &v6src)) { 3288 mutex_exit(&connp->conn_lock); 3289 l_free(new_filter); 3290 return (EADDRNOTAVAIL); 3291 } 3292 index = ilg->ilg_filter->sl_numsrc++; 3293 ilg->ilg_filter->sl_addr[index] = v6src; 3294 } 3295 3296 /* 3297 * Save copy of ilg's filter state to pass to other functions, 3298 * so we can release conn_lock now. 3299 */ 3300 new_fmode = ilg->ilg_fmode; 3301 l_copy(ilg->ilg_filter, new_filter); 3302 3303 mutex_exit(&connp->conn_lock); 3304 3305 error = ip_addmulti(group, ipif, ilgstat, new_fmode, new_filter); 3306 if (error != 0) { 3307 /* 3308 * Need to undo what we did before calling ip_addmulti()! 3309 * Must look up the ilg again since we've not been holding 3310 * conn_lock. 3311 */ 3312 in6_addr_t v6src; 3313 if (ilgstat == ILGSTAT_NEW) 3314 v6src = ipv6_all_zeros; 3315 else 3316 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 3317 mutex_enter(&connp->conn_lock); 3318 ilg = ilg_lookup_ipif(connp, group, ipif); 3319 ASSERT(ilg != NULL); 3320 ilg_delete(connp, ilg, &v6src); 3321 mutex_exit(&connp->conn_lock); 3322 l_free(new_filter); 3323 return (error); 3324 } 3325 3326 l_free(new_filter); 3327 return (0); 3328 } 3329 3330 static int 3331 ilg_add_v6(conn_t *connp, const in6_addr_t *v6group, ill_t *ill, 3332 mcast_record_t fmode, const in6_addr_t *v6src) 3333 { 3334 int error = 0; 3335 int orig_ifindex; 3336 ilg_t *ilg; 3337 ilg_stat_t ilgstat; 3338 slist_t *new_filter = NULL; 3339 int new_fmode; 3340 3341 ASSERT(IAM_WRITER_ILL(ill)); 3342 3343 if (!(ill->ill_flags & ILLF_MULTICAST)) 3344 return (EADDRNOTAVAIL); 3345 3346 /* 3347 * conn_lock protects the ilg list. Serializes 2 threads doing 3348 * join (sock, group1, hme0) and (sock, group2, hme1) where hme0 3349 * and hme1 map to different ipsq's, but both operations happen 3350 * on the same conn. 3351 */ 3352 mutex_enter(&connp->conn_lock); 3353 3354 /* 3355 * Use the ifindex to do the lookup. We can't use the ill 3356 * directly because ilg_ill could point to a different ill if 3357 * things have moved. 3358 */ 3359 orig_ifindex = ill->ill_phyint->phyint_ifindex; 3360 ilg = ilg_lookup_ill_index_v6(connp, v6group, orig_ifindex); 3361 3362 /* 3363 * Depending on the option we're handling, may or may not be okay 3364 * if group has already been added. Figure out our rules based 3365 * on fmode and src params. Also make sure there's enough room 3366 * in the filter if we're adding a source to an existing filter. 3367 */ 3368 if (IN6_IS_ADDR_UNSPECIFIED(v6src)) { 3369 /* we're joining for all sources, must not have joined */ 3370 if (ilg != NULL) 3371 error = EADDRINUSE; 3372 } else { 3373 if (fmode == MODE_IS_EXCLUDE) { 3374 /* (excl {addr}) => block source, must have joined */ 3375 if (ilg == NULL) 3376 error = EADDRNOTAVAIL; 3377 } 3378 /* (incl {addr}) => join source, may have joined */ 3379 3380 if (ilg != NULL && 3381 SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE) 3382 error = ENOBUFS; 3383 } 3384 if (error != 0) { 3385 mutex_exit(&connp->conn_lock); 3386 return (error); 3387 } 3388 3389 /* 3390 * Alloc buffer to copy new state into (see below) before 3391 * we make any changes, so we can bail if it fails. 3392 */ 3393 if ((new_filter = l_alloc()) == NULL) { 3394 mutex_exit(&connp->conn_lock); 3395 return (ENOMEM); 3396 } 3397 3398 if (ilg == NULL) { 3399 if ((ilg = conn_ilg_alloc(connp)) == NULL) { 3400 mutex_exit(&connp->conn_lock); 3401 l_free(new_filter); 3402 return (ENOMEM); 3403 } 3404 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 3405 ilg->ilg_filter = l_alloc(); 3406 if (ilg->ilg_filter == NULL) { 3407 ilg_delete(connp, ilg, NULL); 3408 mutex_exit(&connp->conn_lock); 3409 l_free(new_filter); 3410 return (ENOMEM); 3411 } 3412 ilg->ilg_filter->sl_numsrc = 1; 3413 ilg->ilg_filter->sl_addr[0] = *v6src; 3414 } 3415 ilgstat = ILGSTAT_NEW; 3416 ilg->ilg_v6group = *v6group; 3417 ilg->ilg_fmode = fmode; 3418 ilg->ilg_ipif = NULL; 3419 /* 3420 * Choose our target ill to join on. This might be different 3421 * from the ill we've been given if it's currently down and 3422 * part of a group. 3423 * 3424 * new ill is not refheld; we are writer. 3425 */ 3426 ill = ip_choose_multi_ill(ill, v6group); 3427 ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED)); 3428 ilg->ilg_ill = ill; 3429 /* 3430 * Remember the orig_ifindex that we joined on, so that we 3431 * can successfully delete them later on and also search 3432 * for duplicates if the application wants to join again. 3433 */ 3434 ilg->ilg_orig_ifindex = orig_ifindex; 3435 } else { 3436 int index; 3437 if (ilg->ilg_fmode != fmode || IN6_IS_ADDR_UNSPECIFIED(v6src)) { 3438 mutex_exit(&connp->conn_lock); 3439 l_free(new_filter); 3440 return (EINVAL); 3441 } 3442 if (ilg->ilg_filter == NULL) { 3443 ilg->ilg_filter = l_alloc(); 3444 if (ilg->ilg_filter == NULL) { 3445 mutex_exit(&connp->conn_lock); 3446 l_free(new_filter); 3447 return (ENOMEM); 3448 } 3449 } 3450 if (list_has_addr(ilg->ilg_filter, v6src)) { 3451 mutex_exit(&connp->conn_lock); 3452 l_free(new_filter); 3453 return (EADDRNOTAVAIL); 3454 } 3455 ilgstat = ILGSTAT_CHANGE; 3456 index = ilg->ilg_filter->sl_numsrc++; 3457 ilg->ilg_filter->sl_addr[index] = *v6src; 3458 /* 3459 * The current ill might be different from the one we were 3460 * asked to join on (if failover has occurred); we should 3461 * join on the ill stored in the ilg. The original ill 3462 * is noted in ilg_orig_ifindex, which matched our request. 3463 */ 3464 ill = ilg->ilg_ill; 3465 } 3466 3467 /* 3468 * Save copy of ilg's filter state to pass to other functions, 3469 * so we can release conn_lock now. 3470 */ 3471 new_fmode = ilg->ilg_fmode; 3472 l_copy(ilg->ilg_filter, new_filter); 3473 3474 mutex_exit(&connp->conn_lock); 3475 3476 /* 3477 * Now update the ill. We wait to do this until after the ilg 3478 * has been updated because we need to update the src filter 3479 * info for the ill, which involves looking at the status of 3480 * all the ilgs associated with this group/interface pair. 3481 */ 3482 error = ip_addmulti_v6(v6group, ill, orig_ifindex, connp->conn_zoneid, 3483 ilgstat, new_fmode, new_filter); 3484 if (error != 0) { 3485 /* 3486 * But because we waited, we have to undo the ilg update 3487 * if ip_addmulti_v6() fails. We also must lookup ilg 3488 * again, since we've not been holding conn_lock. 3489 */ 3490 in6_addr_t delsrc = 3491 (ilgstat == ILGSTAT_NEW) ? ipv6_all_zeros : *v6src; 3492 mutex_enter(&connp->conn_lock); 3493 ilg = ilg_lookup_ill_index_v6(connp, v6group, orig_ifindex); 3494 ASSERT(ilg != NULL); 3495 ilg_delete(connp, ilg, &delsrc); 3496 mutex_exit(&connp->conn_lock); 3497 l_free(new_filter); 3498 return (error); 3499 } 3500 3501 l_free(new_filter); 3502 3503 return (0); 3504 } 3505 3506 /* 3507 * Find an IPv4 ilg matching group, ill and source 3508 */ 3509 ilg_t * 3510 ilg_lookup_ill_withsrc(conn_t *connp, ipaddr_t group, ipaddr_t src, ill_t *ill) 3511 { 3512 in6_addr_t v6group, v6src; 3513 int i; 3514 boolean_t isinlist; 3515 ilg_t *ilg; 3516 ipif_t *ipif; 3517 ill_t *ilg_ill; 3518 3519 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3520 3521 /* 3522 * INADDR_ANY is represented as the IPv6 unspecified addr. 3523 */ 3524 if (group == INADDR_ANY) 3525 v6group = ipv6_all_zeros; 3526 else 3527 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 3528 3529 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3530 /* ilg_ipif is NULL for v6; skip them */ 3531 ilg = &connp->conn_ilg[i]; 3532 if ((ipif = ilg->ilg_ipif) == NULL) 3533 continue; 3534 ASSERT(ilg->ilg_ill == NULL); 3535 ilg_ill = ipif->ipif_ill; 3536 ASSERT(!ilg_ill->ill_isv6); 3537 if (ilg_ill == ill && 3538 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, &v6group)) { 3539 if (SLIST_IS_EMPTY(ilg->ilg_filter)) { 3540 /* no source filter, so this is a match */ 3541 return (ilg); 3542 } 3543 break; 3544 } 3545 } 3546 if (i == connp->conn_ilg_inuse) 3547 return (NULL); 3548 3549 /* 3550 * we have an ilg with matching ill and group; but 3551 * the ilg has a source list that we must check. 3552 */ 3553 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 3554 isinlist = B_FALSE; 3555 for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) { 3556 if (IN6_ARE_ADDR_EQUAL(&v6src, &ilg->ilg_filter->sl_addr[i])) { 3557 isinlist = B_TRUE; 3558 break; 3559 } 3560 } 3561 3562 if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) || 3563 (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE)) 3564 return (ilg); 3565 3566 return (NULL); 3567 } 3568 3569 /* 3570 * Find an IPv6 ilg matching group, ill, and source 3571 */ 3572 ilg_t * 3573 ilg_lookup_ill_withsrc_v6(conn_t *connp, const in6_addr_t *v6group, 3574 const in6_addr_t *v6src, ill_t *ill) 3575 { 3576 int i; 3577 boolean_t isinlist; 3578 ilg_t *ilg; 3579 ill_t *ilg_ill; 3580 3581 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3582 3583 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3584 ilg = &connp->conn_ilg[i]; 3585 if ((ilg_ill = ilg->ilg_ill) == NULL) 3586 continue; 3587 ASSERT(ilg->ilg_ipif == NULL); 3588 ASSERT(ilg_ill->ill_isv6); 3589 if (ilg_ill == ill && 3590 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) { 3591 if (SLIST_IS_EMPTY(ilg->ilg_filter)) { 3592 /* no source filter, so this is a match */ 3593 return (ilg); 3594 } 3595 break; 3596 } 3597 } 3598 if (i == connp->conn_ilg_inuse) 3599 return (NULL); 3600 3601 /* 3602 * we have an ilg with matching ill and group; but 3603 * the ilg has a source list that we must check. 3604 */ 3605 isinlist = B_FALSE; 3606 for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) { 3607 if (IN6_ARE_ADDR_EQUAL(v6src, &ilg->ilg_filter->sl_addr[i])) { 3608 isinlist = B_TRUE; 3609 break; 3610 } 3611 } 3612 3613 if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) || 3614 (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE)) 3615 return (ilg); 3616 3617 return (NULL); 3618 } 3619 3620 /* 3621 * Get the ilg whose ilg_orig_ifindex is associated with ifindex. 3622 * This is useful when the interface fails and we have moved 3623 * to a new ill, but still would like to locate using the index 3624 * that we originally used to join. Used only for IPv6 currently. 3625 */ 3626 static ilg_t * 3627 ilg_lookup_ill_index_v6(conn_t *connp, const in6_addr_t *v6group, int ifindex) 3628 { 3629 ilg_t *ilg; 3630 int i; 3631 3632 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3633 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3634 ilg = &connp->conn_ilg[i]; 3635 /* ilg_ill is NULL for V4. Skip them */ 3636 if (ilg->ilg_ill == NULL) 3637 continue; 3638 /* ilg_ipif is NULL for V6 */ 3639 ASSERT(ilg->ilg_ipif == NULL); 3640 ASSERT(ilg->ilg_orig_ifindex != 0); 3641 if (IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group) && 3642 ilg->ilg_orig_ifindex == ifindex) { 3643 return (ilg); 3644 } 3645 } 3646 return (NULL); 3647 } 3648 3649 /* 3650 * Find an IPv6 ilg matching group and ill 3651 */ 3652 ilg_t * 3653 ilg_lookup_ill_v6(conn_t *connp, const in6_addr_t *v6group, ill_t *ill) 3654 { 3655 ilg_t *ilg; 3656 int i; 3657 ill_t *mem_ill; 3658 3659 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3660 3661 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3662 ilg = &connp->conn_ilg[i]; 3663 if ((mem_ill = ilg->ilg_ill) == NULL) 3664 continue; 3665 ASSERT(ilg->ilg_ipif == NULL); 3666 ASSERT(mem_ill->ill_isv6); 3667 if (mem_ill == ill && 3668 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) 3669 return (ilg); 3670 } 3671 return (NULL); 3672 } 3673 3674 /* 3675 * Find an IPv4 ilg matching group and ipif 3676 */ 3677 static ilg_t * 3678 ilg_lookup_ipif(conn_t *connp, ipaddr_t group, ipif_t *ipif) 3679 { 3680 in6_addr_t v6group; 3681 int i; 3682 3683 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3684 ASSERT(!ipif->ipif_ill->ill_isv6); 3685 3686 if (group == INADDR_ANY) 3687 v6group = ipv6_all_zeros; 3688 else 3689 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 3690 3691 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3692 if (IN6_ARE_ADDR_EQUAL(&connp->conn_ilg[i].ilg_v6group, 3693 &v6group) && 3694 connp->conn_ilg[i].ilg_ipif == ipif) 3695 return (&connp->conn_ilg[i]); 3696 } 3697 return (NULL); 3698 } 3699 3700 /* 3701 * If a source address is passed in (src != NULL and src is not 3702 * unspecified), remove the specified src addr from the given ilg's 3703 * filter list, else delete the ilg. 3704 */ 3705 static void 3706 ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src) 3707 { 3708 int i; 3709 3710 ASSERT((ilg->ilg_ipif != NULL) ^ (ilg->ilg_ill != NULL)); 3711 ASSERT(ilg->ilg_ipif == NULL || IAM_WRITER_IPIF(ilg->ilg_ipif)); 3712 ASSERT(ilg->ilg_ill == NULL || IAM_WRITER_ILL(ilg->ilg_ill)); 3713 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3714 ASSERT(!(ilg->ilg_flags & ILG_DELETED)); 3715 3716 if (src == NULL || IN6_IS_ADDR_UNSPECIFIED(src)) { 3717 if (connp->conn_ilg_walker_cnt != 0) { 3718 ilg->ilg_flags |= ILG_DELETED; 3719 return; 3720 } 3721 3722 FREE_SLIST(ilg->ilg_filter); 3723 3724 i = ilg - &connp->conn_ilg[0]; 3725 ASSERT(i >= 0 && i < connp->conn_ilg_inuse); 3726 3727 /* Move other entries up one step */ 3728 connp->conn_ilg_inuse--; 3729 for (; i < connp->conn_ilg_inuse; i++) 3730 connp->conn_ilg[i] = connp->conn_ilg[i+1]; 3731 3732 if (connp->conn_ilg_inuse == 0) { 3733 mi_free((char *)connp->conn_ilg); 3734 connp->conn_ilg = NULL; 3735 cv_broadcast(&connp->conn_refcv); 3736 } 3737 } else { 3738 l_remove(ilg->ilg_filter, src); 3739 } 3740 } 3741 3742 /* 3743 * Called from conn close. No new ilg can be added or removed. 3744 * because CONN_CLOSING has been set by ip_close. ilg_add / ilg_delete 3745 * will return error if conn has started closing. 3746 */ 3747 void 3748 ilg_delete_all(conn_t *connp) 3749 { 3750 int i; 3751 ipif_t *ipif = NULL; 3752 ill_t *ill = NULL; 3753 ilg_t *ilg; 3754 in6_addr_t v6group; 3755 boolean_t success; 3756 ipsq_t *ipsq; 3757 int orig_ifindex; 3758 3759 mutex_enter(&connp->conn_lock); 3760 retry: 3761 ILG_WALKER_HOLD(connp); 3762 for (i = connp->conn_ilg_inuse - 1; i >= 0; ) { 3763 ilg = &connp->conn_ilg[i]; 3764 /* 3765 * Since this walk is not atomic (we drop the 3766 * conn_lock and wait in ipsq_enter) we need 3767 * to check for the ILG_DELETED flag. 3768 */ 3769 if (ilg->ilg_flags & ILG_DELETED) { 3770 /* Go to the next ilg */ 3771 i--; 3772 continue; 3773 } 3774 v6group = ilg->ilg_v6group; 3775 3776 if (IN6_IS_ADDR_V4MAPPED(&v6group)) { 3777 ipif = ilg->ilg_ipif; 3778 ill = ipif->ipif_ill; 3779 } else { 3780 ipif = NULL; 3781 ill = ilg->ilg_ill; 3782 } 3783 /* 3784 * We may not be able to refhold the ill if the ill/ipif 3785 * is changing. But we need to make sure that the ill will 3786 * not vanish. So we just bump up the ill_waiter count. 3787 * If we are unable to do even that, then the ill is closing, 3788 * in which case the unplumb thread will handle the cleanup, 3789 * and we move on to the next ilg. 3790 */ 3791 if (!ill_waiter_inc(ill)) { 3792 /* Go to the next ilg */ 3793 i--; 3794 continue; 3795 } 3796 mutex_exit(&connp->conn_lock); 3797 /* 3798 * To prevent deadlock between ill close which waits inside 3799 * the perimeter, and conn close, ipsq_enter returns error, 3800 * the moment ILL_CONDEMNED is set, in which case ill close 3801 * takes responsibility to cleanup the ilgs. Note that we 3802 * have not yet set condemned flag, otherwise the conn can't 3803 * be refheld for cleanup by those routines and it would be 3804 * a mutual deadlock. 3805 */ 3806 success = ipsq_enter(ill, B_FALSE); 3807 ipsq = ill->ill_phyint->phyint_ipsq; 3808 ill_waiter_dcr(ill); 3809 mutex_enter(&connp->conn_lock); 3810 if (!success) { 3811 /* Go to the next ilg */ 3812 i--; 3813 continue; 3814 } 3815 3816 /* 3817 * Make sure that nothing has changed under. For eg. 3818 * a failover/failback can change ilg_ill while we were 3819 * waiting to become exclusive above 3820 */ 3821 if (IN6_IS_ADDR_V4MAPPED(&v6group)) { 3822 ipif = ilg->ilg_ipif; 3823 ill = ipif->ipif_ill; 3824 } else { 3825 ipif = NULL; 3826 ill = ilg->ilg_ill; 3827 } 3828 if (!IAM_WRITER_ILL(ill) || (ilg->ilg_flags & ILG_DELETED)) { 3829 /* 3830 * The ilg has changed under us probably due 3831 * to a failover or unplumb. Retry on the same ilg. 3832 */ 3833 mutex_exit(&connp->conn_lock); 3834 ipsq_exit(ipsq, B_TRUE, B_TRUE); 3835 mutex_enter(&connp->conn_lock); 3836 continue; 3837 } 3838 v6group = ilg->ilg_v6group; 3839 orig_ifindex = ilg->ilg_orig_ifindex; 3840 ilg_delete(connp, ilg, NULL); 3841 mutex_exit(&connp->conn_lock); 3842 3843 if (ipif != NULL) 3844 (void) ip_delmulti(V4_PART_OF_V6(v6group), ipif, 3845 B_FALSE, B_TRUE); 3846 3847 else 3848 (void) ip_delmulti_v6(&v6group, ill, orig_ifindex, 3849 connp->conn_zoneid, B_FALSE, B_TRUE); 3850 3851 ipsq_exit(ipsq, B_TRUE, B_TRUE); 3852 mutex_enter(&connp->conn_lock); 3853 /* Go to the next ilg */ 3854 i--; 3855 } 3856 ILG_WALKER_RELE(connp); 3857 3858 /* If any ill was skipped above wait and retry */ 3859 if (connp->conn_ilg_inuse != 0) { 3860 cv_wait(&connp->conn_refcv, &connp->conn_lock); 3861 goto retry; 3862 } 3863 mutex_exit(&connp->conn_lock); 3864 } 3865 3866 /* 3867 * Called from ill close by ipcl_walk for clearing conn_ilg and 3868 * conn_multicast_ipif for a given ipif. conn is held by caller. 3869 * Note that ipcl_walk only walks conns that are not yet condemned. 3870 * condemned conns can't be refheld. For this reason, conn must become clean 3871 * first, i.e. it must not refer to any ill/ire/ipif and then only set 3872 * condemned flag. 3873 */ 3874 static void 3875 conn_delete_ipif(conn_t *connp, caddr_t arg) 3876 { 3877 ipif_t *ipif = (ipif_t *)arg; 3878 int i; 3879 char group_buf1[INET6_ADDRSTRLEN]; 3880 char group_buf2[INET6_ADDRSTRLEN]; 3881 ipaddr_t group; 3882 ilg_t *ilg; 3883 3884 /* 3885 * Even though conn_ilg_inuse can change while we are in this loop, 3886 * i.e.ilgs can be created or deleted on this connp, no new ilgs can 3887 * be created or deleted for this connp, on this ill, since this ill 3888 * is the perimeter. So we won't miss any ilg in this cleanup. 3889 */ 3890 mutex_enter(&connp->conn_lock); 3891 3892 /* 3893 * Increment the walker count, so that ilg repacking does not 3894 * occur while we are in the loop. 3895 */ 3896 ILG_WALKER_HOLD(connp); 3897 for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) { 3898 ilg = &connp->conn_ilg[i]; 3899 if (ilg->ilg_ipif != ipif || (ilg->ilg_flags & ILG_DELETED)) 3900 continue; 3901 /* 3902 * ip_close cannot be cleaning this ilg at the same time. 3903 * since it also has to execute in this ill's perimeter which 3904 * we are now holding. Only a clean conn can be condemned. 3905 */ 3906 ASSERT(!(connp->conn_state_flags & CONN_CONDEMNED)); 3907 3908 /* Blow away the membership */ 3909 ip1dbg(("conn_delete_ilg_ipif: %s on %s (%s)\n", 3910 inet_ntop(AF_INET6, &connp->conn_ilg[i].ilg_v6group, 3911 group_buf1, sizeof (group_buf1)), 3912 inet_ntop(AF_INET6, &ipif->ipif_v6lcl_addr, 3913 group_buf2, sizeof (group_buf2)), 3914 ipif->ipif_ill->ill_name)); 3915 3916 /* ilg_ipif is NULL for V6, so we won't be here */ 3917 ASSERT(IN6_IS_ADDR_V4MAPPED(&ilg->ilg_v6group)); 3918 3919 group = V4_PART_OF_V6(ilg->ilg_v6group); 3920 ilg_delete(connp, &connp->conn_ilg[i], NULL); 3921 mutex_exit(&connp->conn_lock); 3922 3923 (void) ip_delmulti(group, ipif, B_FALSE, B_TRUE); 3924 mutex_enter(&connp->conn_lock); 3925 } 3926 3927 /* 3928 * If we are the last walker, need to physically delete the 3929 * ilgs and repack. 3930 */ 3931 ILG_WALKER_RELE(connp); 3932 3933 if (connp->conn_multicast_ipif == ipif) { 3934 /* Revert to late binding */ 3935 connp->conn_multicast_ipif = NULL; 3936 } 3937 mutex_exit(&connp->conn_lock); 3938 3939 conn_delete_ire(connp, (caddr_t)ipif); 3940 } 3941 3942 /* 3943 * Called from ill close by ipcl_walk for clearing conn_ilg and 3944 * conn_multicast_ill for a given ill. conn is held by caller. 3945 * Note that ipcl_walk only walks conns that are not yet condemned. 3946 * condemned conns can't be refheld. For this reason, conn must become clean 3947 * first, i.e. it must not refer to any ill/ire/ipif and then only set 3948 * condemned flag. 3949 */ 3950 static void 3951 conn_delete_ill(conn_t *connp, caddr_t arg) 3952 { 3953 ill_t *ill = (ill_t *)arg; 3954 int i; 3955 char group_buf[INET6_ADDRSTRLEN]; 3956 in6_addr_t v6group; 3957 int orig_ifindex; 3958 ilg_t *ilg; 3959 3960 /* 3961 * Even though conn_ilg_inuse can change while we are in this loop, 3962 * no new ilgs can be created/deleted for this connp, on this 3963 * ill, since this ill is the perimeter. So we won't miss any ilg 3964 * in this cleanup. 3965 */ 3966 mutex_enter(&connp->conn_lock); 3967 3968 /* 3969 * Increment the walker count, so that ilg repacking does not 3970 * occur while we are in the loop. 3971 */ 3972 ILG_WALKER_HOLD(connp); 3973 for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) { 3974 ilg = &connp->conn_ilg[i]; 3975 if ((ilg->ilg_ill == ill) && !(ilg->ilg_flags & ILG_DELETED)) { 3976 /* 3977 * ip_close cannot be cleaning this ilg at the same 3978 * time, since it also has to execute in this ill's 3979 * perimeter which we are now holding. Only a clean 3980 * conn can be condemned. 3981 */ 3982 ASSERT(!(connp->conn_state_flags & CONN_CONDEMNED)); 3983 3984 /* Blow away the membership */ 3985 ip1dbg(("conn_delete_ilg_ill: %s on %s\n", 3986 inet_ntop(AF_INET6, &ilg->ilg_v6group, 3987 group_buf, sizeof (group_buf)), 3988 ill->ill_name)); 3989 3990 v6group = ilg->ilg_v6group; 3991 orig_ifindex = ilg->ilg_orig_ifindex; 3992 ilg_delete(connp, ilg, NULL); 3993 mutex_exit(&connp->conn_lock); 3994 3995 (void) ip_delmulti_v6(&v6group, ill, orig_ifindex, 3996 connp->conn_zoneid, B_FALSE, B_TRUE); 3997 mutex_enter(&connp->conn_lock); 3998 } 3999 } 4000 /* 4001 * If we are the last walker, need to physically delete the 4002 * ilgs and repack. 4003 */ 4004 ILG_WALKER_RELE(connp); 4005 4006 if (connp->conn_multicast_ill == ill) { 4007 /* Revert to late binding */ 4008 connp->conn_multicast_ill = NULL; 4009 connp->conn_orig_multicast_ifindex = 0; 4010 } 4011 mutex_exit(&connp->conn_lock); 4012 } 4013 4014 /* 4015 * Called when an ipif is unplumbed to make sure that there are no 4016 * dangling conn references to that ipif. 4017 * Handles ilg_ipif and conn_multicast_ipif 4018 */ 4019 void 4020 reset_conn_ipif(ipif) 4021 ipif_t *ipif; 4022 { 4023 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 4024 4025 ipcl_walk(conn_delete_ipif, (caddr_t)ipif, ipst); 4026 } 4027 4028 /* 4029 * Called when an ill is unplumbed to make sure that there are no 4030 * dangling conn references to that ill. 4031 * Handles ilg_ill, conn_multicast_ill. 4032 */ 4033 void 4034 reset_conn_ill(ill_t *ill) 4035 { 4036 ip_stack_t *ipst = ill->ill_ipst; 4037 4038 ipcl_walk(conn_delete_ill, (caddr_t)ill, ipst); 4039 } 4040 4041 #ifdef DEBUG 4042 /* 4043 * Walk functions walk all the interfaces in the system to make 4044 * sure that there is no refernece to the ipif or ill that is 4045 * going away. 4046 */ 4047 int 4048 ilm_walk_ill(ill_t *ill) 4049 { 4050 int cnt = 0; 4051 ill_t *till; 4052 ilm_t *ilm; 4053 ill_walk_context_t ctx; 4054 ip_stack_t *ipst = ill->ill_ipst; 4055 4056 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 4057 till = ILL_START_WALK_ALL(&ctx, ipst); 4058 for (; till != NULL; till = ill_next(&ctx, till)) { 4059 for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 4060 if (ilm->ilm_ill == ill) { 4061 cnt++; 4062 } 4063 } 4064 } 4065 rw_exit(&ipst->ips_ill_g_lock); 4066 4067 return (cnt); 4068 } 4069 4070 /* 4071 * This function is called before the ipif is freed. 4072 */ 4073 int 4074 ilm_walk_ipif(ipif_t *ipif) 4075 { 4076 int cnt = 0; 4077 ill_t *till; 4078 ilm_t *ilm; 4079 ill_walk_context_t ctx; 4080 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 4081 4082 till = ILL_START_WALK_ALL(&ctx, ipst); 4083 for (; till != NULL; till = ill_next(&ctx, till)) { 4084 for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 4085 if (ilm->ilm_ipif == ipif) { 4086 cnt++; 4087 } 4088 } 4089 } 4090 return (cnt); 4091 } 4092 #endif 4093