1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/stream.h> 31 #include <sys/dlpi.h> 32 #include <sys/stropts.h> 33 #include <sys/strsun.h> 34 #include <sys/ddi.h> 35 #include <sys/cmn_err.h> 36 #include <sys/sdt.h> 37 #include <sys/zone.h> 38 39 #include <sys/param.h> 40 #include <sys/socket.h> 41 #include <sys/sockio.h> 42 #include <net/if.h> 43 #include <sys/systm.h> 44 #include <net/route.h> 45 #include <netinet/in.h> 46 #include <net/if_dl.h> 47 #include <netinet/ip6.h> 48 #include <netinet/icmp6.h> 49 50 #include <inet/common.h> 51 #include <inet/mi.h> 52 #include <inet/nd.h> 53 #include <inet/arp.h> 54 #include <inet/ip.h> 55 #include <inet/ip6.h> 56 #include <inet/ip_if.h> 57 #include <inet/ip_ndp.h> 58 #include <inet/ip_multi.h> 59 #include <inet/ipclassifier.h> 60 #include <inet/ipsec_impl.h> 61 #include <inet/sctp_ip.h> 62 #include <inet/ip_listutils.h> 63 #include <inet/udp_impl.h> 64 65 /* igmpv3/mldv2 source filter manipulation */ 66 static void ilm_bld_flists(conn_t *conn, void *arg); 67 static void ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, 68 slist_t *flist); 69 70 static ilm_t *ilm_add_v6(ipif_t *ipif, const in6_addr_t *group, 71 ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist, 72 int orig_ifindex, zoneid_t zoneid); 73 static void ilm_delete(ilm_t *ilm); 74 static int ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *group); 75 static int ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *group); 76 static ilg_t *ilg_lookup_ill_index_v6(conn_t *connp, 77 const in6_addr_t *v6group, int index); 78 static ilg_t *ilg_lookup_ipif(conn_t *connp, ipaddr_t group, 79 ipif_t *ipif); 80 static int ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif, 81 mcast_record_t fmode, ipaddr_t src); 82 static int ilg_add_v6(conn_t *connp, const in6_addr_t *group, ill_t *ill, 83 mcast_record_t fmode, const in6_addr_t *v6src); 84 static void ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src); 85 static mblk_t *ill_create_dl(ill_t *ill, uint32_t dl_primitive, 86 uint32_t length, uint32_t *addr_lenp, uint32_t *addr_offp); 87 static mblk_t *ill_create_squery(ill_t *ill, ipaddr_t ipaddr, 88 uint32_t addrlen, uint32_t addroff, mblk_t *mp_tail); 89 static void conn_ilg_reap(conn_t *connp); 90 static int ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group, 91 ipif_t *ipif, mcast_record_t fmode, ipaddr_t src); 92 static int ip_opt_delete_group_excl_v6(conn_t *connp, 93 const in6_addr_t *v6group, ill_t *ill, mcast_record_t fmode, 94 const in6_addr_t *v6src); 95 96 /* 97 * MT notes: 98 * 99 * Multicast joins operate on both the ilg and ilm structures. Multiple 100 * threads operating on an conn (socket) trying to do multicast joins 101 * need to synchronize when operating on the ilg. Multiple threads 102 * potentially operating on different conn (socket endpoints) trying to 103 * do multicast joins could eventually end up trying to manipulate the 104 * ilm simulatenously and need to synchronize on the access to the ilm. 105 * Both are amenable to standard Solaris MT techniques, but it would be 106 * complex to handle a failover or failback which needs to manipulate 107 * ilg/ilms if an applications can also simultaenously join/leave 108 * multicast groups. Hence multicast join/leave also go through the ipsq_t 109 * serialization. 110 * 111 * Multicast joins and leaves are single-threaded per phyint/IPMP group 112 * using the ipsq serialization mechanism. 113 * 114 * An ilm is an IP data structure used to track multicast join/leave. 115 * An ilm is associated with a <multicast group, ipif> tuple in IPv4 and 116 * with just <multicast group> in IPv6. ilm_refcnt is the number of ilg's 117 * referencing the ilm. ilms are created / destroyed only as writer. ilms 118 * are not passed around, instead they are looked up and used under the 119 * ill_lock or as writer. So we don't need a dynamic refcount of the number 120 * of threads holding reference to an ilm. 121 * 122 * Multicast Join operation: 123 * 124 * The first step is to determine the ipif (v4) or ill (v6) on which 125 * the join operation is to be done. The join is done after becoming 126 * exclusive on the ipsq associated with the ipif or ill. The conn->conn_ilg 127 * and ill->ill_ilm are thus accessed and modified exclusively per ill. 128 * Multiple threads can attempt to join simultaneously on different ipif/ill 129 * on the same conn. In this case the ipsq serialization does not help in 130 * protecting the ilg. It is the conn_lock that is used to protect the ilg. 131 * The conn_lock also protects all the ilg_t members. 132 * 133 * Leave operation. 134 * 135 * Similar to the join operation, the first step is to determine the ipif 136 * or ill (v6) on which the leave operation is to be done. The leave operation 137 * is done after becoming exclusive on the ipsq associated with the ipif or ill. 138 * As with join ilg modification is done under the protection of the conn lock. 139 */ 140 141 #define IPSQ_ENTER_IPIF(ipif, connp, first_mp, func, ipsq, type) \ 142 ASSERT(connp != NULL); \ 143 (ipsq) = ipsq_try_enter((ipif), NULL, CONNP_TO_WQ(connp), \ 144 (first_mp), (func), (type), B_TRUE); \ 145 if ((ipsq) == NULL) { \ 146 ipif_refrele(ipif); \ 147 return (EINPROGRESS); \ 148 } 149 150 #define IPSQ_ENTER_ILL(ill, connp, first_mp, func, ipsq, type) \ 151 ASSERT(connp != NULL); \ 152 (ipsq) = ipsq_try_enter(NULL, ill, CONNP_TO_WQ(connp), \ 153 (first_mp), (func), (type), B_TRUE); \ 154 if ((ipsq) == NULL) { \ 155 ill_refrele(ill); \ 156 return (EINPROGRESS); \ 157 } 158 159 #define IPSQ_EXIT(ipsq) \ 160 if (ipsq != NULL) \ 161 ipsq_exit(ipsq, B_TRUE, B_TRUE); 162 163 #define ILG_WALKER_HOLD(connp) (connp)->conn_ilg_walker_cnt++ 164 165 #define ILG_WALKER_RELE(connp) \ 166 { \ 167 (connp)->conn_ilg_walker_cnt--; \ 168 if ((connp)->conn_ilg_walker_cnt == 0) \ 169 conn_ilg_reap(connp); \ 170 } 171 172 static void 173 conn_ilg_reap(conn_t *connp) 174 { 175 int to; 176 int from; 177 178 ASSERT(MUTEX_HELD(&connp->conn_lock)); 179 180 to = 0; 181 from = 0; 182 while (from < connp->conn_ilg_inuse) { 183 if (connp->conn_ilg[from].ilg_flags & ILG_DELETED) { 184 FREE_SLIST(connp->conn_ilg[from].ilg_filter); 185 from++; 186 continue; 187 } 188 if (to != from) 189 connp->conn_ilg[to] = connp->conn_ilg[from]; 190 to++; 191 from++; 192 } 193 194 connp->conn_ilg_inuse = to; 195 196 if (connp->conn_ilg_inuse == 0) { 197 mi_free((char *)connp->conn_ilg); 198 connp->conn_ilg = NULL; 199 cv_broadcast(&connp->conn_refcv); 200 } 201 } 202 203 #define GETSTRUCT(structure, number) \ 204 ((structure *)mi_zalloc(sizeof (structure) * (number))) 205 206 #define ILG_ALLOC_CHUNK 16 207 208 /* 209 * Returns a pointer to the next available ilg in conn_ilg. Allocs more 210 * buffers in size of ILG_ALLOC_CHUNK ilgs when needed, and updates conn's 211 * ilg tracking fields appropriately (conn_ilg_inuse reflects usage of the 212 * returned ilg). Returns NULL on failure (ENOMEM). 213 * 214 * Assumes connp->conn_lock is held. 215 */ 216 static ilg_t * 217 conn_ilg_alloc(conn_t *connp) 218 { 219 ilg_t *new; 220 int curcnt; 221 222 ASSERT(MUTEX_HELD(&connp->conn_lock)); 223 ASSERT(connp->conn_ilg_inuse <= connp->conn_ilg_allocated); 224 225 if (connp->conn_ilg == NULL) { 226 connp->conn_ilg = GETSTRUCT(ilg_t, ILG_ALLOC_CHUNK); 227 if (connp->conn_ilg == NULL) 228 return (NULL); 229 connp->conn_ilg_allocated = ILG_ALLOC_CHUNK; 230 connp->conn_ilg_inuse = 0; 231 } 232 if (connp->conn_ilg_inuse == connp->conn_ilg_allocated) { 233 curcnt = connp->conn_ilg_allocated; 234 new = GETSTRUCT(ilg_t, curcnt + ILG_ALLOC_CHUNK); 235 if (new == NULL) 236 return (NULL); 237 bcopy(connp->conn_ilg, new, sizeof (ilg_t) * curcnt); 238 mi_free((char *)connp->conn_ilg); 239 connp->conn_ilg = new; 240 connp->conn_ilg_allocated += ILG_ALLOC_CHUNK; 241 } 242 243 return (&connp->conn_ilg[connp->conn_ilg_inuse++]); 244 } 245 246 typedef struct ilm_fbld_s { 247 ilm_t *fbld_ilm; 248 int fbld_in_cnt; 249 int fbld_ex_cnt; 250 slist_t fbld_in; 251 slist_t fbld_ex; 252 boolean_t fbld_in_overflow; 253 } ilm_fbld_t; 254 255 static void 256 ilm_bld_flists(conn_t *conn, void *arg) 257 { 258 int i; 259 ilm_fbld_t *fbld = (ilm_fbld_t *)(arg); 260 ilm_t *ilm = fbld->fbld_ilm; 261 in6_addr_t *v6group = &ilm->ilm_v6addr; 262 263 if (conn->conn_ilg_inuse == 0) 264 return; 265 266 /* 267 * Since we can't break out of the ipcl_walk once started, we still 268 * have to look at every conn. But if we've already found one 269 * (EXCLUDE, NULL) list, there's no need to keep checking individual 270 * ilgs--that will be our state. 271 */ 272 if (fbld->fbld_ex_cnt > 0 && fbld->fbld_ex.sl_numsrc == 0) 273 return; 274 275 /* 276 * Check this conn's ilgs to see if any are interested in our 277 * ilm (group, interface match). If so, update the master 278 * include and exclude lists we're building in the fbld struct 279 * with this ilg's filter info. 280 */ 281 mutex_enter(&conn->conn_lock); 282 for (i = 0; i < conn->conn_ilg_inuse; i++) { 283 ilg_t *ilg = &conn->conn_ilg[i]; 284 if ((ilg->ilg_ill == ilm->ilm_ill) && 285 (ilg->ilg_ipif == ilm->ilm_ipif) && 286 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) { 287 if (ilg->ilg_fmode == MODE_IS_INCLUDE) { 288 fbld->fbld_in_cnt++; 289 if (!fbld->fbld_in_overflow) 290 l_union_in_a(&fbld->fbld_in, 291 ilg->ilg_filter, 292 &fbld->fbld_in_overflow); 293 } else { 294 fbld->fbld_ex_cnt++; 295 /* 296 * On the first exclude list, don't try to do 297 * an intersection, as the master exclude list 298 * is intentionally empty. If the master list 299 * is still empty on later iterations, that 300 * means we have at least one ilg with an empty 301 * exclude list, so that should be reflected 302 * when we take the intersection. 303 */ 304 if (fbld->fbld_ex_cnt == 1) { 305 if (ilg->ilg_filter != NULL) 306 l_copy(ilg->ilg_filter, 307 &fbld->fbld_ex); 308 } else { 309 l_intersection_in_a(&fbld->fbld_ex, 310 ilg->ilg_filter); 311 } 312 } 313 /* there will only be one match, so break now. */ 314 break; 315 } 316 } 317 mutex_exit(&conn->conn_lock); 318 } 319 320 static void 321 ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, slist_t *flist) 322 { 323 ilm_fbld_t fbld; 324 325 fbld.fbld_ilm = ilm; 326 fbld.fbld_in_cnt = fbld.fbld_ex_cnt = 0; 327 fbld.fbld_in.sl_numsrc = fbld.fbld_ex.sl_numsrc = 0; 328 fbld.fbld_in_overflow = B_FALSE; 329 330 /* first, construct our master include and exclude lists */ 331 ipcl_walk(ilm_bld_flists, (caddr_t)&fbld); 332 333 /* now use those master lists to generate the interface filter */ 334 335 /* if include list overflowed, filter is (EXCLUDE, NULL) */ 336 if (fbld.fbld_in_overflow) { 337 *fmode = MODE_IS_EXCLUDE; 338 flist->sl_numsrc = 0; 339 return; 340 } 341 342 /* if nobody interested, interface filter is (INCLUDE, NULL) */ 343 if (fbld.fbld_in_cnt == 0 && fbld.fbld_ex_cnt == 0) { 344 *fmode = MODE_IS_INCLUDE; 345 flist->sl_numsrc = 0; 346 return; 347 } 348 349 /* 350 * If there are no exclude lists, then the interface filter 351 * is INCLUDE, with its filter list equal to fbld_in. A single 352 * exclude list makes the interface filter EXCLUDE, with its 353 * filter list equal to (fbld_ex - fbld_in). 354 */ 355 if (fbld.fbld_ex_cnt == 0) { 356 *fmode = MODE_IS_INCLUDE; 357 l_copy(&fbld.fbld_in, flist); 358 } else { 359 *fmode = MODE_IS_EXCLUDE; 360 l_difference(&fbld.fbld_ex, &fbld.fbld_in, flist); 361 } 362 } 363 364 /* 365 * If the given interface has failed, choose a new one to join on so 366 * that we continue to receive packets. ilg_orig_ifindex remembers 367 * what the application used to join on so that we know the ilg to 368 * delete even though we change the ill here. Callers will store the 369 * ilg returned from this function in ilg_ill. Thus when we receive 370 * a packet on ilg_ill, conn_wantpacket_v6 will deliver the packets. 371 * 372 * This function must be called as writer so we can walk the group 373 * list and examine flags without holding a lock. 374 */ 375 ill_t * 376 ip_choose_multi_ill(ill_t *ill, const in6_addr_t *grp) 377 { 378 ill_t *till; 379 ill_group_t *illgrp = ill->ill_group; 380 381 ASSERT(IAM_WRITER_ILL(ill)); 382 383 if (IN6_IS_ADDR_UNSPECIFIED(grp) || illgrp == NULL) 384 return (ill); 385 386 if ((ill->ill_phyint->phyint_flags & (PHYI_FAILED|PHYI_INACTIVE)) == 0) 387 return (ill); 388 389 till = illgrp->illgrp_ill; 390 while (till != NULL && 391 (till->ill_phyint->phyint_flags & (PHYI_FAILED|PHYI_INACTIVE))) { 392 till = till->ill_group_next; 393 } 394 if (till != NULL) 395 return (till); 396 397 return (ill); 398 } 399 400 static int 401 ilm_update_add(ilm_t *ilm, ilg_stat_t ilgstat, slist_t *ilg_flist, 402 boolean_t isv6) 403 { 404 mcast_record_t fmode; 405 slist_t *flist; 406 boolean_t fdefault; 407 char buf[INET6_ADDRSTRLEN]; 408 ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill; 409 410 /* 411 * There are several cases where the ilm's filter state 412 * defaults to (EXCLUDE, NULL): 413 * - we've had previous joins without associated ilgs 414 * - this join has no associated ilg 415 * - the ilg's filter state is (EXCLUDE, NULL) 416 */ 417 fdefault = (ilm->ilm_no_ilg_cnt > 0) || 418 (ilgstat == ILGSTAT_NONE) || SLIST_IS_EMPTY(ilg_flist); 419 420 /* attempt mallocs (if needed) before doing anything else */ 421 if ((flist = l_alloc()) == NULL) 422 return (ENOMEM); 423 if (!fdefault && ilm->ilm_filter == NULL) { 424 ilm->ilm_filter = l_alloc(); 425 if (ilm->ilm_filter == NULL) { 426 l_free(flist); 427 return (ENOMEM); 428 } 429 } 430 431 if (ilgstat != ILGSTAT_CHANGE) 432 ilm->ilm_refcnt++; 433 434 if (ilgstat == ILGSTAT_NONE) 435 ilm->ilm_no_ilg_cnt++; 436 437 /* 438 * Determine new filter state. If it's not the default 439 * (EXCLUDE, NULL), we must walk the conn list to find 440 * any ilgs interested in this group, and re-build the 441 * ilm filter. 442 */ 443 if (fdefault) { 444 fmode = MODE_IS_EXCLUDE; 445 flist->sl_numsrc = 0; 446 } else { 447 ilm_gen_filter(ilm, &fmode, flist); 448 } 449 450 /* make sure state actually changed; nothing to do if not. */ 451 if ((ilm->ilm_fmode == fmode) && 452 !lists_are_different(ilm->ilm_filter, flist)) { 453 l_free(flist); 454 return (0); 455 } 456 457 /* send the state change report */ 458 if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0) { 459 if (isv6) 460 mld_statechange(ilm, fmode, flist); 461 else 462 igmp_statechange(ilm, fmode, flist); 463 } 464 465 /* update the ilm state */ 466 ilm->ilm_fmode = fmode; 467 if (flist->sl_numsrc > 0) 468 l_copy(flist, ilm->ilm_filter); 469 else 470 CLEAR_SLIST(ilm->ilm_filter); 471 472 ip1dbg(("ilm_update: new if filter mode %d, group %s\n", ilm->ilm_fmode, 473 inet_ntop(AF_INET6, &ilm->ilm_v6addr, buf, sizeof (buf)))); 474 475 l_free(flist); 476 return (0); 477 } 478 479 static int 480 ilm_update_del(ilm_t *ilm, boolean_t isv6) 481 { 482 mcast_record_t fmode; 483 slist_t *flist; 484 ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill; 485 486 ip1dbg(("ilm_update_del: still %d left; updating state\n", 487 ilm->ilm_refcnt)); 488 489 if ((flist = l_alloc()) == NULL) 490 return (ENOMEM); 491 492 /* 493 * If present, the ilg in question has already either been 494 * updated or removed from our list; so all we need to do 495 * now is walk the list to update the ilm filter state. 496 * 497 * Skip the list walk if we have any no-ilg joins, which 498 * cause the filter state to revert to (EXCLUDE, NULL). 499 */ 500 if (ilm->ilm_no_ilg_cnt != 0) { 501 fmode = MODE_IS_EXCLUDE; 502 flist->sl_numsrc = 0; 503 } else { 504 ilm_gen_filter(ilm, &fmode, flist); 505 } 506 507 /* check to see if state needs to be updated */ 508 if ((ilm->ilm_fmode == fmode) && 509 (!lists_are_different(ilm->ilm_filter, flist))) { 510 l_free(flist); 511 return (0); 512 } 513 514 if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0) { 515 if (isv6) 516 mld_statechange(ilm, fmode, flist); 517 else 518 igmp_statechange(ilm, fmode, flist); 519 } 520 521 ilm->ilm_fmode = fmode; 522 if (flist->sl_numsrc > 0) { 523 if (ilm->ilm_filter == NULL) { 524 ilm->ilm_filter = l_alloc(); 525 if (ilm->ilm_filter == NULL) { 526 char buf[INET6_ADDRSTRLEN]; 527 ip1dbg(("ilm_update_del: failed to alloc ilm " 528 "filter; no source filtering for %s on %s", 529 inet_ntop(AF_INET6, &ilm->ilm_v6addr, 530 buf, sizeof (buf)), ill->ill_name)); 531 ilm->ilm_fmode = MODE_IS_EXCLUDE; 532 l_free(flist); 533 return (0); 534 } 535 } 536 l_copy(flist, ilm->ilm_filter); 537 } else { 538 CLEAR_SLIST(ilm->ilm_filter); 539 } 540 541 l_free(flist); 542 return (0); 543 } 544 545 /* 546 * INADDR_ANY means all multicast addresses. This is only used 547 * by the multicast router. 548 * INADDR_ANY is stored as IPv6 unspecified addr. 549 */ 550 int 551 ip_addmulti(ipaddr_t group, ipif_t *ipif, ilg_stat_t ilgstat, 552 mcast_record_t ilg_fmode, slist_t *ilg_flist) 553 { 554 ill_t *ill = ipif->ipif_ill; 555 ilm_t *ilm; 556 in6_addr_t v6group; 557 int ret; 558 559 ASSERT(IAM_WRITER_IPIF(ipif)); 560 561 if (!CLASSD(group) && group != INADDR_ANY) 562 return (EINVAL); 563 564 /* 565 * INADDR_ANY is represented as the IPv6 unspecifed addr. 566 */ 567 if (group == INADDR_ANY) 568 v6group = ipv6_all_zeros; 569 else 570 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 571 572 ilm = ilm_lookup_ipif(ipif, group); 573 if (ilm != NULL) 574 return (ilm_update_add(ilm, ilgstat, ilg_flist, B_FALSE)); 575 576 /* 577 * ilms are associated with ipifs in IPv4. It moves with the 578 * ipif if the ipif moves to a new ill when the interface 579 * fails. Thus we really don't check whether the ipif_ill 580 * has failed like in IPv6. If it has FAILED the ipif 581 * will move (daemon will move it) and hence the ilm, if the 582 * ipif is not IPIF_NOFAILOVER. For the IPIF_NOFAILOVER ipifs, 583 * we continue to receive in the same place even if the 584 * interface fails. 585 */ 586 ilm = ilm_add_v6(ipif, &v6group, ilgstat, ilg_fmode, ilg_flist, 587 ill->ill_phyint->phyint_ifindex, ipif->ipif_zoneid); 588 if (ilm == NULL) 589 return (ENOMEM); 590 591 if (group == INADDR_ANY) { 592 /* 593 * Check how many ipif's have members in this group - 594 * if more then one we should not tell the driver to join 595 * this time 596 */ 597 if (ilm_numentries_v6(ill, &v6group) > 1) 598 return (0); 599 if (ill->ill_group == NULL) 600 ret = ip_join_allmulti(ipif); 601 else 602 ret = ill_nominate_mcast_rcv(ill->ill_group); 603 if (ret != 0) 604 ilm_delete(ilm); 605 return (ret); 606 } 607 608 if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0) 609 igmp_joingroup(ilm); 610 611 if (ilm_numentries_v6(ill, &v6group) > 1) 612 return (0); 613 614 ret = ip_ll_addmulti_v6(ipif, &v6group); 615 if (ret != 0) 616 ilm_delete(ilm); 617 return (ret); 618 } 619 620 /* 621 * The unspecified address means all multicast addresses. 622 * This is only used by the multicast router. 623 * 624 * ill identifies the interface to join on; it may not match the 625 * interface requested by the application of a failover has taken 626 * place. orig_ifindex always identifies the interface requested 627 * by the app. 628 * 629 * ilgstat tells us if there's an ilg associated with this join, 630 * and if so, if it's a new ilg or a change to an existing one. 631 * ilg_fmode and ilg_flist give us the current filter state of 632 * the ilg (and will be EXCLUDE {NULL} in the case of no ilg). 633 */ 634 int 635 ip_addmulti_v6(const in6_addr_t *v6group, ill_t *ill, int orig_ifindex, 636 zoneid_t zoneid, ilg_stat_t ilgstat, mcast_record_t ilg_fmode, 637 slist_t *ilg_flist) 638 { 639 ilm_t *ilm; 640 int ret; 641 642 ASSERT(IAM_WRITER_ILL(ill)); 643 644 if (!IN6_IS_ADDR_MULTICAST(v6group) && 645 !IN6_IS_ADDR_UNSPECIFIED(v6group)) { 646 return (EINVAL); 647 } 648 649 /* 650 * An ilm is uniquely identified by the tuple of (group, ill, 651 * orig_ill). group is the multicast group address, ill is 652 * the interface on which it is currently joined, and orig_ill 653 * is the interface on which the application requested the 654 * join. orig_ill and ill are the same unless orig_ill has 655 * failed over. 656 * 657 * Both orig_ill and ill are required, which means we may have 658 * 2 ilms on an ill for the same group, but with different 659 * orig_ills. These must be kept separate, so that when failback 660 * occurs, the appropriate ilms are moved back to their orig_ill 661 * without disrupting memberships on the ill to which they had 662 * been moved. 663 * 664 * In order to track orig_ill, we store orig_ifindex in the 665 * ilm and ilg. 666 */ 667 ilm = ilm_lookup_ill_index_v6(ill, v6group, orig_ifindex, zoneid); 668 if (ilm != NULL) 669 return (ilm_update_add(ilm, ilgstat, ilg_flist, B_TRUE)); 670 671 /* 672 * We need to remember where the application really wanted 673 * to join. This will be used later if we want to failback 674 * to the original interface. 675 */ 676 ilm = ilm_add_v6(ill->ill_ipif, v6group, ilgstat, ilg_fmode, 677 ilg_flist, orig_ifindex, zoneid); 678 if (ilm == NULL) 679 return (ENOMEM); 680 681 if (IN6_IS_ADDR_UNSPECIFIED(v6group)) { 682 /* 683 * Check how many ipif's that have members in this group - 684 * if more then one we should not tell the driver to join 685 * this time 686 */ 687 if (ilm_numentries_v6(ill, v6group) > 1) 688 return (0); 689 if (ill->ill_group == NULL) 690 ret = ip_join_allmulti(ill->ill_ipif); 691 else 692 ret = ill_nominate_mcast_rcv(ill->ill_group); 693 694 if (ret != 0) 695 ilm_delete(ilm); 696 return (ret); 697 } 698 699 if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0) 700 mld_joingroup(ilm); 701 702 /* 703 * If we have more then one we should not tell the driver 704 * to join this time. 705 */ 706 if (ilm_numentries_v6(ill, v6group) > 1) 707 return (0); 708 709 ret = ip_ll_addmulti_v6(ill->ill_ipif, v6group); 710 if (ret != 0) 711 ilm_delete(ilm); 712 return (ret); 713 } 714 715 /* 716 * Send a multicast request to the driver for enabling multicast reception 717 * for v6groupp address. The caller has already checked whether it is 718 * appropriate to send one or not. 719 */ 720 int 721 ip_ll_send_enabmulti_req(ill_t *ill, const in6_addr_t *v6groupp) 722 { 723 mblk_t *mp; 724 uint32_t addrlen, addroff; 725 char group_buf[INET6_ADDRSTRLEN]; 726 727 ASSERT(IAM_WRITER_ILL(ill)); 728 729 /* 730 * Create a AR_ENTRY_SQUERY message with a dl_enabmulti_req tacked 731 * on. 732 */ 733 mp = ill_create_dl(ill, DL_ENABMULTI_REQ, sizeof (dl_enabmulti_req_t), 734 &addrlen, &addroff); 735 if (!mp) 736 return (ENOMEM); 737 if (IN6_IS_ADDR_V4MAPPED(v6groupp)) { 738 ipaddr_t v4group; 739 740 IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group); 741 /* 742 * NOTE!!! 743 * The "addroff" passed in here was calculated by 744 * ill_create_dl(), and will be used by ill_create_squery() 745 * to perform some twisted coding magic. It is the offset 746 * into the dl_xxx_req of the hw addr. Here, it will be 747 * added to b_wptr - b_rptr to create a magic number that 748 * is not an offset into this squery mblk. 749 * The actual hardware address will be accessed only in the 750 * dl_xxx_req, not in the squery. More importantly, 751 * that hardware address can *only* be accessed in this 752 * mblk chain by calling mi_offset_param_c(), which uses 753 * the magic number in the squery hw offset field to go 754 * to the *next* mblk (the dl_xxx_req), subtract the 755 * (b_wptr - b_rptr), and find the actual offset into 756 * the dl_xxx_req. 757 * Any method that depends on using the 758 * offset field in the dl_disabmulti_req or squery 759 * to find either hardware address will similarly fail. 760 * 761 * Look in ar_entry_squery() in arp.c to see how this offset 762 * is used. 763 */ 764 mp = ill_create_squery(ill, v4group, addrlen, addroff, mp); 765 if (!mp) 766 return (ENOMEM); 767 ip1dbg(("ip_ll_send_enabmulti_req: IPv4 putnext %s on %s\n", 768 inet_ntop(AF_INET6, v6groupp, group_buf, 769 sizeof (group_buf)), 770 ill->ill_name)); 771 putnext(ill->ill_rq, mp); 772 } else { 773 ip1dbg(("ip_ll_send_enabmulti_req: IPv6 ndp_squery_mp %s on" 774 " %s\n", 775 inet_ntop(AF_INET6, v6groupp, group_buf, 776 sizeof (group_buf)), 777 ill->ill_name)); 778 return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp)); 779 } 780 return (0); 781 } 782 783 /* 784 * Send a multicast request to the driver for enabling multicast 785 * membership for v6group if appropriate. 786 */ 787 static int 788 ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *v6groupp) 789 { 790 ill_t *ill = ipif->ipif_ill; 791 792 ASSERT(IAM_WRITER_IPIF(ipif)); 793 794 if (ill->ill_net_type != IRE_IF_RESOLVER || 795 ipif->ipif_flags & IPIF_POINTOPOINT) { 796 ip1dbg(("ip_ll_addmulti_v6: not resolver\n")); 797 return (0); /* Must be IRE_IF_NORESOLVER */ 798 } 799 800 if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) { 801 ip1dbg(("ip_ll_addmulti_v6: MULTI_BCAST\n")); 802 return (0); 803 } 804 if (ill->ill_ipif_up_count == 0) { 805 /* 806 * Nobody there. All multicast addresses will be re-joined 807 * when we get the DL_BIND_ACK bringing the interface up. 808 */ 809 ip1dbg(("ip_ll_addmulti_v6: nobody up\n")); 810 return (0); 811 } 812 return (ip_ll_send_enabmulti_req(ill, v6groupp)); 813 } 814 815 /* 816 * INADDR_ANY means all multicast addresses. This is only used 817 * by the multicast router. 818 * INADDR_ANY is stored as the IPv6 unspecifed addr. 819 */ 820 int 821 ip_delmulti(ipaddr_t group, ipif_t *ipif, boolean_t no_ilg, boolean_t leaving) 822 { 823 ill_t *ill = ipif->ipif_ill; 824 ilm_t *ilm; 825 in6_addr_t v6group; 826 int ret; 827 828 ASSERT(IAM_WRITER_IPIF(ipif)); 829 830 if (!CLASSD(group) && group != INADDR_ANY) 831 return (EINVAL); 832 833 /* 834 * INADDR_ANY is represented as the IPv6 unspecifed addr. 835 */ 836 if (group == INADDR_ANY) 837 v6group = ipv6_all_zeros; 838 else 839 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 840 841 /* 842 * Look for a match on the ipif. 843 * (IP_DROP_MEMBERSHIP specifies an ipif using an IP address). 844 */ 845 ilm = ilm_lookup_ipif(ipif, group); 846 if (ilm == NULL) 847 return (ENOENT); 848 849 /* Update counters */ 850 if (no_ilg) 851 ilm->ilm_no_ilg_cnt--; 852 853 if (leaving) 854 ilm->ilm_refcnt--; 855 856 if (ilm->ilm_refcnt > 0) 857 return (ilm_update_del(ilm, B_FALSE)); 858 859 if (group == INADDR_ANY) { 860 ilm_delete(ilm); 861 /* 862 * Check how many ipif's that have members in this group - 863 * if there are still some left then don't tell the driver 864 * to drop it. 865 */ 866 if (ilm_numentries_v6(ill, &v6group) != 0) 867 return (0); 868 869 /* 870 * If we never joined, then don't leave. This can happen 871 * if we're in an IPMP group, since only one ill per IPMP 872 * group receives all multicast packets. 873 */ 874 if (!ill->ill_join_allmulti) { 875 ASSERT(ill->ill_group != NULL); 876 return (0); 877 } 878 879 ret = ip_leave_allmulti(ipif); 880 if (ill->ill_group != NULL) 881 (void) ill_nominate_mcast_rcv(ill->ill_group); 882 return (ret); 883 } 884 885 if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0) 886 igmp_leavegroup(ilm); 887 888 ilm_delete(ilm); 889 /* 890 * Check how many ipif's that have members in this group - 891 * if there are still some left then don't tell the driver 892 * to drop it. 893 */ 894 if (ilm_numentries_v6(ill, &v6group) != 0) 895 return (0); 896 return (ip_ll_delmulti_v6(ipif, &v6group)); 897 } 898 899 /* 900 * The unspecified address means all multicast addresses. 901 * This is only used by the multicast router. 902 */ 903 int 904 ip_delmulti_v6(const in6_addr_t *v6group, ill_t *ill, int orig_ifindex, 905 zoneid_t zoneid, boolean_t no_ilg, boolean_t leaving) 906 { 907 ipif_t *ipif; 908 ilm_t *ilm; 909 int ret; 910 911 ASSERT(IAM_WRITER_ILL(ill)); 912 913 if (!IN6_IS_ADDR_MULTICAST(v6group) && 914 !IN6_IS_ADDR_UNSPECIFIED(v6group)) 915 return (EINVAL); 916 917 /* 918 * Look for a match on the ill. 919 * (IPV6_LEAVE_GROUP specifies an ill using an ifindex). 920 * 921 * Similar to ip_addmulti_v6, we should always look using 922 * the orig_ifindex. 923 * 924 * 1) If orig_ifindex is different from ill's ifindex 925 * we should have an ilm with orig_ifindex created in 926 * ip_addmulti_v6. We should delete that here. 927 * 928 * 2) If orig_ifindex is same as ill's ifindex, we should 929 * not delete the ilm that is temporarily here because of 930 * a FAILOVER. Those ilms will have a ilm_orig_ifindex 931 * different from ill's ifindex. 932 * 933 * Thus, always lookup using orig_ifindex. 934 */ 935 ilm = ilm_lookup_ill_index_v6(ill, v6group, orig_ifindex, zoneid); 936 if (ilm == NULL) 937 return (ENOENT); 938 939 ASSERT(ilm->ilm_ill == ill); 940 941 ipif = ill->ill_ipif; 942 943 /* Update counters */ 944 if (no_ilg) 945 ilm->ilm_no_ilg_cnt--; 946 947 if (leaving) 948 ilm->ilm_refcnt--; 949 950 if (ilm->ilm_refcnt > 0) 951 return (ilm_update_del(ilm, B_TRUE)); 952 953 if (IN6_IS_ADDR_UNSPECIFIED(v6group)) { 954 ilm_delete(ilm); 955 /* 956 * Check how many ipif's that have members in this group - 957 * if there are still some left then don't tell the driver 958 * to drop it. 959 */ 960 if (ilm_numentries_v6(ill, v6group) != 0) 961 return (0); 962 963 /* 964 * If we never joined, then don't leave. This can happen 965 * if we're in an IPMP group, since only one ill per IPMP 966 * group receives all multicast packets. 967 */ 968 if (!ill->ill_join_allmulti) { 969 ASSERT(ill->ill_group != NULL); 970 return (0); 971 } 972 973 ret = ip_leave_allmulti(ipif); 974 if (ill->ill_group != NULL) 975 (void) ill_nominate_mcast_rcv(ill->ill_group); 976 return (ret); 977 } 978 979 if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0) 980 mld_leavegroup(ilm); 981 982 ilm_delete(ilm); 983 /* 984 * Check how many ipif's that have members in this group - 985 * if there are still some left then don't tell the driver 986 * to drop it. 987 */ 988 if (ilm_numentries_v6(ill, v6group) != 0) 989 return (0); 990 return (ip_ll_delmulti_v6(ipif, v6group)); 991 } 992 993 /* 994 * Send a multicast request to the driver for disabling multicast reception 995 * for v6groupp address. The caller has already checked whether it is 996 * appropriate to send one or not. 997 */ 998 int 999 ip_ll_send_disabmulti_req(ill_t *ill, const in6_addr_t *v6groupp) 1000 { 1001 mblk_t *mp; 1002 char group_buf[INET6_ADDRSTRLEN]; 1003 uint32_t addrlen, addroff; 1004 1005 ASSERT(IAM_WRITER_ILL(ill)); 1006 /* 1007 * Create a AR_ENTRY_SQUERY message with a dl_disabmulti_req tacked 1008 * on. 1009 */ 1010 mp = ill_create_dl(ill, DL_DISABMULTI_REQ, 1011 sizeof (dl_disabmulti_req_t), &addrlen, &addroff); 1012 1013 if (!mp) 1014 return (ENOMEM); 1015 1016 if (IN6_IS_ADDR_V4MAPPED(v6groupp)) { 1017 ipaddr_t v4group; 1018 1019 IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group); 1020 /* 1021 * NOTE!!! 1022 * The "addroff" passed in here was calculated by 1023 * ill_create_dl(), and will be used by ill_create_squery() 1024 * to perform some twisted coding magic. It is the offset 1025 * into the dl_xxx_req of the hw addr. Here, it will be 1026 * added to b_wptr - b_rptr to create a magic number that 1027 * is not an offset into this mblk. 1028 * 1029 * Please see the comment in ip_ll_send)enabmulti_req() 1030 * for a complete explanation. 1031 * 1032 * Look in ar_entry_squery() in arp.c to see how this offset 1033 * is used. 1034 */ 1035 mp = ill_create_squery(ill, v4group, addrlen, addroff, mp); 1036 if (!mp) 1037 return (ENOMEM); 1038 ip1dbg(("ip_ll_send_disabmulti_req: IPv4 putnext %s on %s\n", 1039 inet_ntop(AF_INET6, v6groupp, group_buf, 1040 sizeof (group_buf)), 1041 ill->ill_name)); 1042 putnext(ill->ill_rq, mp); 1043 } else { 1044 ip1dbg(("ip_ll_send_disabmulti_req: IPv6 ndp_squery_mp %s on" 1045 " %s\n", 1046 inet_ntop(AF_INET6, v6groupp, group_buf, 1047 sizeof (group_buf)), 1048 ill->ill_name)); 1049 return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp)); 1050 } 1051 return (0); 1052 } 1053 1054 /* 1055 * Send a multicast request to the driver for disabling multicast 1056 * membership for v6group if appropriate. 1057 */ 1058 static int 1059 ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *v6group) 1060 { 1061 ill_t *ill = ipif->ipif_ill; 1062 1063 ASSERT(IAM_WRITER_IPIF(ipif)); 1064 1065 if (ill->ill_net_type != IRE_IF_RESOLVER || 1066 ipif->ipif_flags & IPIF_POINTOPOINT) { 1067 return (0); /* Must be IRE_IF_NORESOLVER */ 1068 } 1069 if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) { 1070 ip1dbg(("ip_ll_delmulti_v6: MULTI_BCAST\n")); 1071 return (0); 1072 } 1073 if (ill->ill_ipif_up_count == 0) { 1074 /* 1075 * Nobody there. All multicast addresses will be re-joined 1076 * when we get the DL_BIND_ACK bringing the interface up. 1077 */ 1078 ip1dbg(("ip_ll_delmulti_v6: nobody up\n")); 1079 return (0); 1080 } 1081 return (ip_ll_send_disabmulti_req(ill, v6group)); 1082 } 1083 1084 /* 1085 * Make the driver pass up all multicast packets 1086 * 1087 * With ill groups, the caller makes sure that there is only 1088 * one ill joining the allmulti group. 1089 */ 1090 int 1091 ip_join_allmulti(ipif_t *ipif) 1092 { 1093 ill_t *ill = ipif->ipif_ill; 1094 mblk_t *mp; 1095 uint32_t addrlen, addroff; 1096 1097 ASSERT(IAM_WRITER_IPIF(ipif)); 1098 1099 if (ill->ill_ipif_up_count == 0) { 1100 /* 1101 * Nobody there. All multicast addresses will be re-joined 1102 * when we get the DL_BIND_ACK bringing the interface up. 1103 */ 1104 return (0); 1105 } 1106 1107 ASSERT(!ill->ill_join_allmulti); 1108 1109 /* 1110 * Create a DL_PROMISCON_REQ message and send it directly to 1111 * the DLPI provider. We don't need to do this for certain 1112 * media types for which we never need to turn promiscuous 1113 * mode on. 1114 */ 1115 if ((ill->ill_net_type == IRE_IF_RESOLVER) && 1116 !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) { 1117 mp = ill_create_dl(ill, DL_PROMISCON_REQ, 1118 sizeof (dl_promiscon_req_t), &addrlen, &addroff); 1119 if (mp == NULL) 1120 return (ENOMEM); 1121 putnext(ill->ill_wq, mp); 1122 } 1123 1124 mutex_enter(&ill->ill_lock); 1125 ill->ill_join_allmulti = B_TRUE; 1126 mutex_exit(&ill->ill_lock); 1127 return (0); 1128 } 1129 1130 /* 1131 * Make the driver stop passing up all multicast packets 1132 * 1133 * With ill groups, we need to nominate some other ill as 1134 * this ipif->ipif_ill is leaving the group. 1135 */ 1136 int 1137 ip_leave_allmulti(ipif_t *ipif) 1138 { 1139 ill_t *ill = ipif->ipif_ill; 1140 mblk_t *mp; 1141 uint32_t addrlen, addroff; 1142 1143 ASSERT(IAM_WRITER_IPIF(ipif)); 1144 1145 if (ill->ill_ipif_up_count == 0) { 1146 /* 1147 * Nobody there. All multicast addresses will be re-joined 1148 * when we get the DL_BIND_ACK bringing the interface up. 1149 */ 1150 return (0); 1151 } 1152 1153 ASSERT(ill->ill_join_allmulti); 1154 1155 /* 1156 * Create a DL_PROMISCOFF_REQ message and send it directly to 1157 * the DLPI provider. We don't need to do this for certain 1158 * media types for which we never need to turn promiscuous 1159 * mode on. 1160 */ 1161 if ((ill->ill_net_type == IRE_IF_RESOLVER) && 1162 !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) { 1163 mp = ill_create_dl(ill, DL_PROMISCOFF_REQ, 1164 sizeof (dl_promiscoff_req_t), &addrlen, &addroff); 1165 if (mp == NULL) 1166 return (ENOMEM); 1167 putnext(ill->ill_wq, mp); 1168 } 1169 1170 mutex_enter(&ill->ill_lock); 1171 ill->ill_join_allmulti = B_FALSE; 1172 mutex_exit(&ill->ill_lock); 1173 return (0); 1174 } 1175 1176 /* 1177 * Copy mp_orig and pass it in as a local message. 1178 */ 1179 void 1180 ip_multicast_loopback(queue_t *q, ill_t *ill, mblk_t *mp_orig, int fanout_flags, 1181 zoneid_t zoneid) 1182 { 1183 mblk_t *mp; 1184 mblk_t *ipsec_mp; 1185 ipha_t *iph; 1186 1187 if (DB_TYPE(mp_orig) == M_DATA && 1188 ((ipha_t *)mp_orig->b_rptr)->ipha_protocol == IPPROTO_UDP) { 1189 uint_t hdrsz; 1190 1191 hdrsz = IPH_HDR_LENGTH((ipha_t *)mp_orig->b_rptr) + 1192 sizeof (udpha_t); 1193 ASSERT(MBLKL(mp_orig) >= hdrsz); 1194 1195 if (((mp = allocb(hdrsz, BPRI_MED)) != NULL) && 1196 (mp_orig = dupmsg(mp_orig)) != NULL) { 1197 bcopy(mp_orig->b_rptr, mp->b_rptr, hdrsz); 1198 mp->b_wptr += hdrsz; 1199 mp->b_cont = mp_orig; 1200 mp_orig->b_rptr += hdrsz; 1201 if (MBLKL(mp_orig) == 0) { 1202 mp->b_cont = mp_orig->b_cont; 1203 mp_orig->b_cont = NULL; 1204 freeb(mp_orig); 1205 } 1206 } else if (mp != NULL) { 1207 freeb(mp); 1208 mp = NULL; 1209 } 1210 } else { 1211 mp = ip_copymsg(mp_orig); 1212 } 1213 1214 if (mp == NULL) 1215 return; 1216 if (DB_TYPE(mp) == M_CTL) { 1217 ipsec_mp = mp; 1218 mp = mp->b_cont; 1219 } else { 1220 ipsec_mp = mp; 1221 } 1222 1223 iph = (ipha_t *)mp->b_rptr; 1224 1225 DTRACE_PROBE4(ip4__loopback__out__start, 1226 ill_t *, NULL, ill_t *, ill, 1227 ipha_t *, iph, mblk_t *, ipsec_mp); 1228 1229 FW_HOOKS(ip4_loopback_out_event, ipv4firewall_loopback_out, NULL, ill, 1230 iph, ipsec_mp, mp); 1231 1232 DTRACE_PROBE1(ip4__loopback__out__end, mblk_t *, ipsec_mp); 1233 1234 if (ipsec_mp != NULL) 1235 ip_wput_local(q, ill, iph, ipsec_mp, NULL, 1236 fanout_flags, zoneid); 1237 } 1238 1239 static area_t ip_aresq_template = { 1240 AR_ENTRY_SQUERY, /* cmd */ 1241 sizeof (area_t)+IP_ADDR_LEN, /* name offset */ 1242 sizeof (area_t), /* name len (filled by ill_arp_alloc) */ 1243 IP_ARP_PROTO_TYPE, /* protocol, from arps perspective */ 1244 sizeof (area_t), /* proto addr offset */ 1245 IP_ADDR_LEN, /* proto addr_length */ 1246 0, /* proto mask offset */ 1247 /* Rest is initialized when used */ 1248 0, /* flags */ 1249 0, /* hw addr offset */ 1250 0, /* hw addr length */ 1251 }; 1252 1253 static mblk_t * 1254 ill_create_squery(ill_t *ill, ipaddr_t ipaddr, uint32_t addrlen, 1255 uint32_t addroff, mblk_t *mp_tail) 1256 { 1257 mblk_t *mp; 1258 area_t *area; 1259 1260 mp = ill_arp_alloc(ill, (uchar_t *)&ip_aresq_template, 1261 (caddr_t)&ipaddr); 1262 if (!mp) { 1263 freemsg(mp_tail); 1264 return (NULL); 1265 } 1266 area = (area_t *)mp->b_rptr; 1267 area->area_hw_addr_length = addrlen; 1268 area->area_hw_addr_offset = mp->b_wptr - mp->b_rptr + addroff; 1269 /* 1270 * NOTE! 1271 * 1272 * The area_hw_addr_offset, as can be seen, does not hold the 1273 * actual hardware address offset. Rather, it holds the offset 1274 * to the hw addr in the dl_xxx_req in mp_tail, modified by 1275 * adding (mp->b_wptr - mp->b_rptr). This allows the function 1276 * mi_offset_paramc() to find the hardware address in the 1277 * *second* mblk (dl_xxx_req), not this mblk. 1278 * 1279 * Using mi_offset_paramc() is thus the *only* way to access 1280 * the dl_xxx_hw address. 1281 * 1282 * The squery hw address should *not* be accessed. 1283 * 1284 * See ar_entry_squery() in arp.c for an example of how all this works. 1285 */ 1286 1287 mp->b_cont = mp_tail; 1288 return (mp); 1289 } 1290 1291 /* 1292 * Create a dlpi message with room for phys+sap. When we come back in 1293 * ip_wput_ctl() we will strip the sap for those primitives which 1294 * only need a physical address. 1295 */ 1296 static mblk_t * 1297 ill_create_dl(ill_t *ill, uint32_t dl_primitive, uint32_t length, 1298 uint32_t *addr_lenp, uint32_t *addr_offp) 1299 { 1300 mblk_t *mp; 1301 uint32_t hw_addr_length; 1302 char *cp; 1303 uint32_t offset; 1304 uint32_t size; 1305 1306 *addr_lenp = *addr_offp = 0; 1307 1308 hw_addr_length = ill->ill_phys_addr_length; 1309 if (!hw_addr_length) { 1310 ip0dbg(("ip_create_dl: hw addr length = 0\n")); 1311 return (NULL); 1312 } 1313 1314 size = length; 1315 switch (dl_primitive) { 1316 case DL_ENABMULTI_REQ: 1317 case DL_DISABMULTI_REQ: 1318 size += hw_addr_length; 1319 break; 1320 case DL_PROMISCON_REQ: 1321 case DL_PROMISCOFF_REQ: 1322 break; 1323 default: 1324 return (NULL); 1325 } 1326 mp = allocb(size, BPRI_HI); 1327 if (!mp) 1328 return (NULL); 1329 mp->b_wptr += size; 1330 mp->b_datap->db_type = M_PROTO; 1331 1332 cp = (char *)mp->b_rptr; 1333 offset = length; 1334 1335 switch (dl_primitive) { 1336 case DL_ENABMULTI_REQ: { 1337 dl_enabmulti_req_t *dl = (dl_enabmulti_req_t *)cp; 1338 1339 dl->dl_primitive = dl_primitive; 1340 dl->dl_addr_offset = offset; 1341 *addr_lenp = dl->dl_addr_length = hw_addr_length; 1342 *addr_offp = offset; 1343 break; 1344 } 1345 case DL_DISABMULTI_REQ: { 1346 dl_disabmulti_req_t *dl = (dl_disabmulti_req_t *)cp; 1347 1348 dl->dl_primitive = dl_primitive; 1349 dl->dl_addr_offset = offset; 1350 *addr_lenp = dl->dl_addr_length = hw_addr_length; 1351 *addr_offp = offset; 1352 break; 1353 } 1354 case DL_PROMISCON_REQ: 1355 case DL_PROMISCOFF_REQ: { 1356 dl_promiscon_req_t *dl = (dl_promiscon_req_t *)cp; 1357 1358 dl->dl_primitive = dl_primitive; 1359 dl->dl_level = DL_PROMISC_MULTI; 1360 break; 1361 } 1362 } 1363 ip1dbg(("ill_create_dl: addr_len %d, addr_off %d\n", 1364 *addr_lenp, *addr_offp)); 1365 return (mp); 1366 } 1367 1368 void 1369 ip_wput_ctl(queue_t *q, mblk_t *mp_orig) 1370 { 1371 ill_t *ill = (ill_t *)q->q_ptr; 1372 mblk_t *mp = mp_orig; 1373 area_t *area; 1374 1375 /* Check that we have a AR_ENTRY_SQUERY with a tacked on mblk */ 1376 if ((mp->b_wptr - mp->b_rptr) < sizeof (area_t) || 1377 mp->b_cont == NULL) { 1378 putnext(q, mp); 1379 return; 1380 } 1381 area = (area_t *)mp->b_rptr; 1382 if (area->area_cmd != AR_ENTRY_SQUERY) { 1383 putnext(q, mp); 1384 return; 1385 } 1386 mp = mp->b_cont; 1387 /* 1388 * Update dl_addr_length and dl_addr_offset for primitives that 1389 * have physical addresses as opposed to full saps 1390 */ 1391 switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) { 1392 case DL_ENABMULTI_REQ: 1393 /* Track the state if this is the first enabmulti */ 1394 if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN) 1395 ill->ill_dlpi_multicast_state = IDS_INPROGRESS; 1396 ip1dbg(("ip_wput_ctl: ENABMULTI\n")); 1397 break; 1398 case DL_DISABMULTI_REQ: 1399 ip1dbg(("ip_wput_ctl: DISABMULTI\n")); 1400 break; 1401 default: 1402 ip1dbg(("ip_wput_ctl: default\n")); 1403 break; 1404 } 1405 freeb(mp_orig); 1406 putnext(q, mp); 1407 } 1408 1409 /* 1410 * Rejoin any groups which have been explicitly joined by the application (we 1411 * left all explicitly joined groups as part of ill_leave_multicast() prior to 1412 * bringing the interface down). Note that because groups can be joined and 1413 * left while an interface is down, this may not be the same set of groups 1414 * that we left in ill_leave_multicast(). 1415 */ 1416 void 1417 ill_recover_multicast(ill_t *ill) 1418 { 1419 ilm_t *ilm; 1420 char addrbuf[INET6_ADDRSTRLEN]; 1421 1422 ASSERT(IAM_WRITER_ILL(ill)); 1423 1424 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1425 /* 1426 * Check how many ipif's that have members in this group - 1427 * if more then one we make sure that this entry is first 1428 * in the list. 1429 */ 1430 if (ilm_numentries_v6(ill, &ilm->ilm_v6addr) > 1 && 1431 ilm_lookup_ill_v6(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm) 1432 continue; 1433 ip1dbg(("ill_recover_multicast: %s\n", 1434 inet_ntop(AF_INET6, &ilm->ilm_v6addr, addrbuf, 1435 sizeof (addrbuf)))); 1436 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) { 1437 if (ill->ill_group == NULL) { 1438 (void) ip_join_allmulti(ill->ill_ipif); 1439 } else { 1440 /* 1441 * We don't want to join on this ill, 1442 * if somebody else in the group has 1443 * already been nominated. 1444 */ 1445 (void) ill_nominate_mcast_rcv(ill->ill_group); 1446 } 1447 } else { 1448 (void) ip_ll_addmulti_v6(ill->ill_ipif, 1449 &ilm->ilm_v6addr); 1450 } 1451 } 1452 } 1453 1454 /* 1455 * The opposite of ill_recover_multicast() -- leaves all multicast groups 1456 * that were explicitly joined. Note that both these functions could be 1457 * disposed of if we enhanced ARP to allow us to handle DL_DISABMULTI_REQ 1458 * and DL_ENABMULTI_REQ messages when an interface is down. 1459 */ 1460 void 1461 ill_leave_multicast(ill_t *ill) 1462 { 1463 ilm_t *ilm; 1464 char addrbuf[INET6_ADDRSTRLEN]; 1465 1466 ASSERT(IAM_WRITER_ILL(ill)); 1467 1468 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1469 /* 1470 * Check how many ipif's that have members in this group - 1471 * if more then one we make sure that this entry is first 1472 * in the list. 1473 */ 1474 if (ilm_numentries_v6(ill, &ilm->ilm_v6addr) > 1 && 1475 ilm_lookup_ill_v6(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm) 1476 continue; 1477 ip1dbg(("ill_leave_multicast: %s\n", 1478 inet_ntop(AF_INET6, &ilm->ilm_v6addr, addrbuf, 1479 sizeof (addrbuf)))); 1480 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) { 1481 (void) ip_leave_allmulti(ill->ill_ipif); 1482 /* 1483 * If we were part of an IPMP group, then 1484 * ill_handoff_responsibility() has already 1485 * nominated a new member (so we don't). 1486 */ 1487 ASSERT(ill->ill_group == NULL); 1488 } else { 1489 (void) ip_ll_send_disabmulti_req(ill, &ilm->ilm_v6addr); 1490 } 1491 } 1492 } 1493 1494 /* 1495 * Find an ilm for matching the ill and which has the source in its 1496 * INCLUDE list or does not have it in its EXCLUDE list 1497 */ 1498 ilm_t * 1499 ilm_lookup_ill_withsrc(ill_t *ill, ipaddr_t group, ipaddr_t src) 1500 { 1501 in6_addr_t v6group, v6src; 1502 1503 /* 1504 * INADDR_ANY is represented as the IPv6 unspecified addr. 1505 */ 1506 if (group == INADDR_ANY) 1507 v6group = ipv6_all_zeros; 1508 else 1509 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 1510 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 1511 1512 return (ilm_lookup_ill_withsrc_v6(ill, &v6group, &v6src)); 1513 } 1514 1515 ilm_t * 1516 ilm_lookup_ill_withsrc_v6(ill_t *ill, const in6_addr_t *v6group, 1517 const in6_addr_t *v6src) 1518 { 1519 ilm_t *ilm; 1520 boolean_t isinlist; 1521 int i, numsrc; 1522 1523 /* 1524 * If the source is in any ilm's INCLUDE list, or if 1525 * it is not in any ilm's EXCLUDE list, we have a hit. 1526 */ 1527 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1528 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group)) { 1529 1530 isinlist = B_FALSE; 1531 numsrc = (ilm->ilm_filter == NULL) ? 1532 0 : ilm->ilm_filter->sl_numsrc; 1533 for (i = 0; i < numsrc; i++) { 1534 if (IN6_ARE_ADDR_EQUAL(v6src, 1535 &ilm->ilm_filter->sl_addr[i])) { 1536 isinlist = B_TRUE; 1537 break; 1538 } 1539 } 1540 if ((isinlist && ilm->ilm_fmode == MODE_IS_INCLUDE) || 1541 (!isinlist && ilm->ilm_fmode == MODE_IS_EXCLUDE)) 1542 return (ilm); 1543 else 1544 return (NULL); 1545 } 1546 } 1547 return (NULL); 1548 } 1549 1550 1551 /* Find an ilm for matching the ill */ 1552 ilm_t * 1553 ilm_lookup_ill(ill_t *ill, ipaddr_t group, zoneid_t zoneid) 1554 { 1555 in6_addr_t v6group; 1556 1557 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) || 1558 IAM_WRITER_ILL(ill)); 1559 /* 1560 * INADDR_ANY is represented as the IPv6 unspecifed addr. 1561 */ 1562 if (group == INADDR_ANY) 1563 v6group = ipv6_all_zeros; 1564 else 1565 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 1566 1567 return (ilm_lookup_ill_v6(ill, &v6group, zoneid)); 1568 } 1569 1570 /* 1571 * Find an ilm for matching the ill. All the ilm lookup functions 1572 * ignore ILM_DELETED ilms. These have been logically deleted, and 1573 * igmp and linklayer disable multicast have been done. Only mi_free 1574 * yet to be done. Still there in the list due to ilm_walkers. The 1575 * last walker will release it. 1576 */ 1577 ilm_t * 1578 ilm_lookup_ill_v6(ill_t *ill, const in6_addr_t *v6group, zoneid_t zoneid) 1579 { 1580 ilm_t *ilm; 1581 1582 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) || 1583 IAM_WRITER_ILL(ill)); 1584 1585 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1586 if (ilm->ilm_flags & ILM_DELETED) 1587 continue; 1588 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) && 1589 (zoneid == ALL_ZONES || zoneid == ilm->ilm_zoneid)) 1590 return (ilm); 1591 } 1592 return (NULL); 1593 } 1594 1595 ilm_t * 1596 ilm_lookup_ill_index_v6(ill_t *ill, const in6_addr_t *v6group, int index, 1597 zoneid_t zoneid) 1598 { 1599 ilm_t *ilm; 1600 1601 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) || 1602 IAM_WRITER_ILL(ill)); 1603 1604 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1605 if (ilm->ilm_flags & ILM_DELETED) 1606 continue; 1607 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) && 1608 (zoneid == ALL_ZONES || zoneid == ilm->ilm_zoneid) && 1609 ilm->ilm_orig_ifindex == index) { 1610 return (ilm); 1611 } 1612 } 1613 return (NULL); 1614 } 1615 1616 ilm_t * 1617 ilm_lookup_ill_index_v4(ill_t *ill, ipaddr_t group, int index, zoneid_t zoneid) 1618 { 1619 in6_addr_t v6group; 1620 1621 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) || 1622 IAM_WRITER_ILL(ill)); 1623 /* 1624 * INADDR_ANY is represented as the IPv6 unspecifed addr. 1625 */ 1626 if (group == INADDR_ANY) 1627 v6group = ipv6_all_zeros; 1628 else 1629 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 1630 1631 return (ilm_lookup_ill_index_v6(ill, &v6group, index, zoneid)); 1632 } 1633 1634 /* 1635 * Found an ilm for the ipif. Only needed for IPv4 which does 1636 * ipif specific socket options. 1637 */ 1638 ilm_t * 1639 ilm_lookup_ipif(ipif_t *ipif, ipaddr_t group) 1640 { 1641 ill_t *ill = ipif->ipif_ill; 1642 ilm_t *ilm; 1643 in6_addr_t v6group; 1644 1645 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) || 1646 IAM_WRITER_ILL(ill)); 1647 1648 /* 1649 * INADDR_ANY is represented as the IPv6 unspecifed addr. 1650 */ 1651 if (group == INADDR_ANY) 1652 v6group = ipv6_all_zeros; 1653 else 1654 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 1655 1656 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1657 if (ilm->ilm_flags & ILM_DELETED) 1658 continue; 1659 if (ilm->ilm_ipif == ipif && 1660 IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, &v6group)) 1661 return (ilm); 1662 } 1663 return (NULL); 1664 } 1665 1666 /* 1667 * How many members on this ill? 1668 */ 1669 int 1670 ilm_numentries_v6(ill_t *ill, const in6_addr_t *v6group) 1671 { 1672 ilm_t *ilm; 1673 int i = 0; 1674 1675 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) || 1676 IAM_WRITER_ILL(ill)); 1677 1678 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1679 if (ilm->ilm_flags & ILM_DELETED) 1680 continue; 1681 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group)) { 1682 i++; 1683 } 1684 } 1685 return (i); 1686 } 1687 1688 /* Caller guarantees that the group is not already on the list */ 1689 static ilm_t * 1690 ilm_add_v6(ipif_t *ipif, const in6_addr_t *v6group, ilg_stat_t ilgstat, 1691 mcast_record_t ilg_fmode, slist_t *ilg_flist, int orig_ifindex, 1692 zoneid_t zoneid) 1693 { 1694 ill_t *ill = ipif->ipif_ill; 1695 ilm_t *ilm; 1696 ilm_t *ilm_cur; 1697 ilm_t **ilm_ptpn; 1698 1699 ASSERT(IAM_WRITER_IPIF(ipif)); 1700 1701 ilm = GETSTRUCT(ilm_t, 1); 1702 if (ilm == NULL) 1703 return (NULL); 1704 if (ilgstat != ILGSTAT_NONE && !SLIST_IS_EMPTY(ilg_flist)) { 1705 ilm->ilm_filter = l_alloc(); 1706 if (ilm->ilm_filter == NULL) { 1707 mi_free(ilm); 1708 return (NULL); 1709 } 1710 } 1711 ilm->ilm_v6addr = *v6group; 1712 ilm->ilm_refcnt = 1; 1713 ilm->ilm_zoneid = zoneid; 1714 ilm->ilm_timer = INFINITY; 1715 ilm->ilm_rtx.rtx_timer = INFINITY; 1716 1717 /* 1718 * IPv4 Multicast groups are joined using ipif. 1719 * IPv6 Multicast groups are joined using ill. 1720 */ 1721 if (ill->ill_isv6) { 1722 ilm->ilm_ill = ill; 1723 ilm->ilm_ipif = NULL; 1724 } else { 1725 ASSERT(ilm->ilm_zoneid == ipif->ipif_zoneid); 1726 ilm->ilm_ipif = ipif; 1727 ilm->ilm_ill = NULL; 1728 } 1729 /* 1730 * After this if ilm moves to a new ill, we don't change 1731 * the ilm_orig_ifindex. Thus, if ill_index != ilm_orig_ifindex, 1732 * it has been moved. Indexes don't match even when the application 1733 * wants to join on a FAILED/INACTIVE interface because we choose 1734 * a new interface to join in. This is considered as an implicit 1735 * move. 1736 */ 1737 ilm->ilm_orig_ifindex = orig_ifindex; 1738 1739 ASSERT(!(ipif->ipif_state_flags & IPIF_CONDEMNED)); 1740 ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED)); 1741 1742 /* 1743 * Grab lock to give consistent view to readers 1744 */ 1745 mutex_enter(&ill->ill_lock); 1746 /* 1747 * All ilms in the same zone are contiguous in the ill_ilm list. 1748 * The loops in ip_proto_input() and ip_wput_local() use this to avoid 1749 * sending duplicates up when two applications in the same zone join the 1750 * same group on different logical interfaces. 1751 */ 1752 ilm_cur = ill->ill_ilm; 1753 ilm_ptpn = &ill->ill_ilm; 1754 while (ilm_cur != NULL && ilm_cur->ilm_zoneid != ilm->ilm_zoneid) { 1755 ilm_ptpn = &ilm_cur->ilm_next; 1756 ilm_cur = ilm_cur->ilm_next; 1757 } 1758 ilm->ilm_next = ilm_cur; 1759 *ilm_ptpn = ilm; 1760 1761 /* 1762 * If we have an associated ilg, use its filter state; if not, 1763 * default to (EXCLUDE, NULL) and set no_ilg_cnt to track this. 1764 */ 1765 if (ilgstat != ILGSTAT_NONE) { 1766 if (!SLIST_IS_EMPTY(ilg_flist)) 1767 l_copy(ilg_flist, ilm->ilm_filter); 1768 ilm->ilm_fmode = ilg_fmode; 1769 } else { 1770 ilm->ilm_no_ilg_cnt = 1; 1771 ilm->ilm_fmode = MODE_IS_EXCLUDE; 1772 } 1773 1774 mutex_exit(&ill->ill_lock); 1775 return (ilm); 1776 } 1777 1778 void 1779 ilm_walker_cleanup(ill_t *ill) 1780 { 1781 ilm_t **ilmp; 1782 ilm_t *ilm; 1783 1784 ASSERT(MUTEX_HELD(&ill->ill_lock)); 1785 ASSERT(ill->ill_ilm_walker_cnt == 0); 1786 1787 ilmp = &ill->ill_ilm; 1788 while (*ilmp != NULL) { 1789 if ((*ilmp)->ilm_flags & ILM_DELETED) { 1790 ilm = *ilmp; 1791 *ilmp = ilm->ilm_next; 1792 FREE_SLIST(ilm->ilm_filter); 1793 FREE_SLIST(ilm->ilm_pendsrcs); 1794 FREE_SLIST(ilm->ilm_rtx.rtx_allow); 1795 FREE_SLIST(ilm->ilm_rtx.rtx_block); 1796 mi_free((char *)ilm); 1797 } else { 1798 ilmp = &(*ilmp)->ilm_next; 1799 } 1800 } 1801 ill->ill_ilm_cleanup_reqd = 0; 1802 } 1803 1804 /* 1805 * Unlink ilm and free it. 1806 */ 1807 static void 1808 ilm_delete(ilm_t *ilm) 1809 { 1810 ill_t *ill; 1811 ilm_t **ilmp; 1812 1813 if (ilm->ilm_ipif != NULL) { 1814 ASSERT(IAM_WRITER_IPIF(ilm->ilm_ipif)); 1815 ASSERT(ilm->ilm_ill == NULL); 1816 ill = ilm->ilm_ipif->ipif_ill; 1817 ASSERT(!ill->ill_isv6); 1818 } else { 1819 ASSERT(IAM_WRITER_ILL(ilm->ilm_ill)); 1820 ASSERT(ilm->ilm_ipif == NULL); 1821 ill = ilm->ilm_ill; 1822 ASSERT(ill->ill_isv6); 1823 } 1824 /* 1825 * Delete under lock protection so that readers don't stumble 1826 * on bad ilm_next 1827 */ 1828 mutex_enter(&ill->ill_lock); 1829 if (ill->ill_ilm_walker_cnt != 0) { 1830 ilm->ilm_flags |= ILM_DELETED; 1831 ill->ill_ilm_cleanup_reqd = 1; 1832 mutex_exit(&ill->ill_lock); 1833 return; 1834 } 1835 1836 for (ilmp = &ill->ill_ilm; *ilmp != ilm; ilmp = &(*ilmp)->ilm_next) 1837 ; 1838 *ilmp = ilm->ilm_next; 1839 mutex_exit(&ill->ill_lock); 1840 1841 FREE_SLIST(ilm->ilm_filter); 1842 FREE_SLIST(ilm->ilm_pendsrcs); 1843 FREE_SLIST(ilm->ilm_rtx.rtx_allow); 1844 FREE_SLIST(ilm->ilm_rtx.rtx_block); 1845 mi_free((char *)ilm); 1846 } 1847 1848 /* Free all ilms for this ipif */ 1849 void 1850 ilm_free(ipif_t *ipif) 1851 { 1852 ill_t *ill = ipif->ipif_ill; 1853 ilm_t *ilm; 1854 ilm_t *next_ilm; 1855 1856 ASSERT(IAM_WRITER_IPIF(ipif)); 1857 1858 for (ilm = ill->ill_ilm; ilm; ilm = next_ilm) { 1859 next_ilm = ilm->ilm_next; 1860 if (ilm->ilm_ipif == ipif) 1861 ilm_delete(ilm); 1862 } 1863 } 1864 1865 /* 1866 * Looks up the appropriate ipif given a v4 multicast group and interface 1867 * address. On success, returns 0, with *ipifpp pointing to the found 1868 * struct. On failure, returns an errno and *ipifpp is NULL. 1869 */ 1870 int 1871 ip_opt_check(conn_t *connp, ipaddr_t group, ipaddr_t src, ipaddr_t ifaddr, 1872 uint_t *ifindexp, mblk_t *first_mp, ipsq_func_t func, ipif_t **ipifpp) 1873 { 1874 ipif_t *ipif; 1875 int err = 0; 1876 zoneid_t zoneid; 1877 1878 if (!CLASSD(group) || CLASSD(src)) { 1879 return (EINVAL); 1880 } 1881 *ipifpp = NULL; 1882 1883 zoneid = IPCL_ZONEID(connp); 1884 1885 ASSERT(!(ifaddr != INADDR_ANY && ifindexp != NULL && *ifindexp != 0)); 1886 if (ifaddr != INADDR_ANY) { 1887 ipif = ipif_lookup_addr(ifaddr, NULL, zoneid, 1888 CONNP_TO_WQ(connp), first_mp, func, &err); 1889 if (err != 0 && err != EINPROGRESS) 1890 err = EADDRNOTAVAIL; 1891 } else if (ifindexp != NULL && *ifindexp != 0) { 1892 ipif = ipif_lookup_on_ifindex(*ifindexp, B_FALSE, zoneid, 1893 CONNP_TO_WQ(connp), first_mp, func, &err); 1894 } else { 1895 ipif = ipif_lookup_group(group, zoneid); 1896 if (ipif == NULL) 1897 return (EADDRNOTAVAIL); 1898 } 1899 if (ipif == NULL) 1900 return (err); 1901 1902 *ipifpp = ipif; 1903 return (0); 1904 } 1905 1906 /* 1907 * Looks up the appropriate ill (or ipif if v4mapped) given an interface 1908 * index and IPv6 multicast group. On success, returns 0, with *illpp (or 1909 * *ipifpp if v4mapped) pointing to the found struct. On failure, returns 1910 * an errno and *illpp and *ipifpp are undefined. 1911 */ 1912 int 1913 ip_opt_check_v6(conn_t *connp, const in6_addr_t *v6group, ipaddr_t *v4group, 1914 const in6_addr_t *v6src, ipaddr_t *v4src, boolean_t *isv6, int ifindex, 1915 mblk_t *first_mp, ipsq_func_t func, ill_t **illpp, ipif_t **ipifpp) 1916 { 1917 boolean_t src_unspec; 1918 ill_t *ill = NULL; 1919 ipif_t *ipif = NULL; 1920 int err; 1921 zoneid_t zoneid = connp->conn_zoneid; 1922 queue_t *wq = CONNP_TO_WQ(connp); 1923 1924 src_unspec = IN6_IS_ADDR_UNSPECIFIED(v6src); 1925 1926 if (IN6_IS_ADDR_V4MAPPED(v6group)) { 1927 if (!IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec) 1928 return (EINVAL); 1929 IN6_V4MAPPED_TO_IPADDR(v6group, *v4group); 1930 if (src_unspec) { 1931 *v4src = INADDR_ANY; 1932 } else { 1933 IN6_V4MAPPED_TO_IPADDR(v6src, *v4src); 1934 } 1935 if (!CLASSD(*v4group) || CLASSD(*v4src)) 1936 return (EINVAL); 1937 *ipifpp = NULL; 1938 *isv6 = B_FALSE; 1939 } else { 1940 if (IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec) 1941 return (EINVAL); 1942 if (!IN6_IS_ADDR_MULTICAST(v6group) || 1943 IN6_IS_ADDR_MULTICAST(v6src)) { 1944 return (EINVAL); 1945 } 1946 *illpp = NULL; 1947 *isv6 = B_TRUE; 1948 } 1949 1950 if (ifindex == 0) { 1951 if (*isv6) 1952 ill = ill_lookup_group_v6(v6group, zoneid); 1953 else 1954 ipif = ipif_lookup_group(*v4group, zoneid); 1955 if (ill == NULL && ipif == NULL) 1956 return (EADDRNOTAVAIL); 1957 } else { 1958 if (*isv6) { 1959 ill = ill_lookup_on_ifindex(ifindex, B_TRUE, 1960 wq, first_mp, func, &err); 1961 if (ill != NULL && 1962 !ipif_lookup_zoneid(ill, zoneid, 0, NULL)) { 1963 ill_refrele(ill); 1964 ill = NULL; 1965 err = EADDRNOTAVAIL; 1966 } 1967 } else { 1968 ipif = ipif_lookup_on_ifindex(ifindex, B_FALSE, 1969 zoneid, wq, first_mp, func, &err); 1970 } 1971 if (ill == NULL && ipif == NULL) 1972 return (err); 1973 } 1974 1975 *ipifpp = ipif; 1976 *illpp = ill; 1977 return (0); 1978 } 1979 1980 static int 1981 ip_get_srcfilter(conn_t *connp, struct group_filter *gf, 1982 struct ip_msfilter *imsf, ipaddr_t grp, ipif_t *ipif, boolean_t isv4mapped) 1983 { 1984 ilg_t *ilg; 1985 int i, numsrc, fmode, outsrcs; 1986 struct sockaddr_in *sin; 1987 struct sockaddr_in6 *sin6; 1988 struct in_addr *addrp; 1989 slist_t *fp; 1990 boolean_t is_v4only_api; 1991 1992 mutex_enter(&connp->conn_lock); 1993 1994 ilg = ilg_lookup_ipif(connp, grp, ipif); 1995 if (ilg == NULL) { 1996 mutex_exit(&connp->conn_lock); 1997 return (EADDRNOTAVAIL); 1998 } 1999 2000 if (gf == NULL) { 2001 ASSERT(imsf != NULL); 2002 ASSERT(!isv4mapped); 2003 is_v4only_api = B_TRUE; 2004 outsrcs = imsf->imsf_numsrc; 2005 } else { 2006 ASSERT(imsf == NULL); 2007 is_v4only_api = B_FALSE; 2008 outsrcs = gf->gf_numsrc; 2009 } 2010 2011 /* 2012 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 2013 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 2014 * So we need to translate here. 2015 */ 2016 fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ? 2017 MCAST_INCLUDE : MCAST_EXCLUDE; 2018 if ((fp = ilg->ilg_filter) == NULL) { 2019 numsrc = 0; 2020 } else { 2021 for (i = 0; i < outsrcs; i++) { 2022 if (i == fp->sl_numsrc) 2023 break; 2024 if (isv4mapped) { 2025 sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i]; 2026 sin6->sin6_family = AF_INET6; 2027 sin6->sin6_addr = fp->sl_addr[i]; 2028 } else { 2029 if (is_v4only_api) { 2030 addrp = &imsf->imsf_slist[i]; 2031 } else { 2032 sin = (struct sockaddr_in *) 2033 &gf->gf_slist[i]; 2034 sin->sin_family = AF_INET; 2035 addrp = &sin->sin_addr; 2036 } 2037 IN6_V4MAPPED_TO_INADDR(&fp->sl_addr[i], addrp); 2038 } 2039 } 2040 numsrc = fp->sl_numsrc; 2041 } 2042 2043 if (is_v4only_api) { 2044 imsf->imsf_numsrc = numsrc; 2045 imsf->imsf_fmode = fmode; 2046 } else { 2047 gf->gf_numsrc = numsrc; 2048 gf->gf_fmode = fmode; 2049 } 2050 2051 mutex_exit(&connp->conn_lock); 2052 2053 return (0); 2054 } 2055 2056 static int 2057 ip_get_srcfilter_v6(conn_t *connp, struct group_filter *gf, 2058 const struct in6_addr *grp, ill_t *ill) 2059 { 2060 ilg_t *ilg; 2061 int i; 2062 struct sockaddr_storage *sl; 2063 struct sockaddr_in6 *sin6; 2064 slist_t *fp; 2065 2066 mutex_enter(&connp->conn_lock); 2067 2068 ilg = ilg_lookup_ill_v6(connp, grp, ill); 2069 if (ilg == NULL) { 2070 mutex_exit(&connp->conn_lock); 2071 return (EADDRNOTAVAIL); 2072 } 2073 2074 /* 2075 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 2076 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 2077 * So we need to translate here. 2078 */ 2079 gf->gf_fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ? 2080 MCAST_INCLUDE : MCAST_EXCLUDE; 2081 if ((fp = ilg->ilg_filter) == NULL) { 2082 gf->gf_numsrc = 0; 2083 } else { 2084 for (i = 0, sl = gf->gf_slist; i < gf->gf_numsrc; i++, sl++) { 2085 if (i == fp->sl_numsrc) 2086 break; 2087 sin6 = (struct sockaddr_in6 *)sl; 2088 sin6->sin6_family = AF_INET6; 2089 sin6->sin6_addr = fp->sl_addr[i]; 2090 } 2091 gf->gf_numsrc = fp->sl_numsrc; 2092 } 2093 2094 mutex_exit(&connp->conn_lock); 2095 2096 return (0); 2097 } 2098 2099 static int 2100 ip_set_srcfilter(conn_t *connp, struct group_filter *gf, 2101 struct ip_msfilter *imsf, ipaddr_t grp, ipif_t *ipif, boolean_t isv4mapped) 2102 { 2103 ilg_t *ilg; 2104 int i, err, insrcs, infmode, new_fmode; 2105 struct sockaddr_in *sin; 2106 struct sockaddr_in6 *sin6; 2107 struct in_addr *addrp; 2108 slist_t *orig_filter = NULL; 2109 slist_t *new_filter = NULL; 2110 mcast_record_t orig_fmode; 2111 boolean_t leave_grp, is_v4only_api; 2112 ilg_stat_t ilgstat; 2113 2114 if (gf == NULL) { 2115 ASSERT(imsf != NULL); 2116 ASSERT(!isv4mapped); 2117 is_v4only_api = B_TRUE; 2118 insrcs = imsf->imsf_numsrc; 2119 infmode = imsf->imsf_fmode; 2120 } else { 2121 ASSERT(imsf == NULL); 2122 is_v4only_api = B_FALSE; 2123 insrcs = gf->gf_numsrc; 2124 infmode = gf->gf_fmode; 2125 } 2126 2127 /* Make sure we can handle the source list */ 2128 if (insrcs > MAX_FILTER_SIZE) 2129 return (ENOBUFS); 2130 2131 /* 2132 * setting the filter to (INCLUDE, NULL) is treated 2133 * as a request to leave the group. 2134 */ 2135 leave_grp = (infmode == MCAST_INCLUDE && insrcs == 0); 2136 2137 ASSERT(IAM_WRITER_IPIF(ipif)); 2138 2139 mutex_enter(&connp->conn_lock); 2140 2141 ilg = ilg_lookup_ipif(connp, grp, ipif); 2142 if (ilg == NULL) { 2143 /* 2144 * if the request was actually to leave, and we 2145 * didn't find an ilg, there's nothing to do. 2146 */ 2147 if (!leave_grp) 2148 ilg = conn_ilg_alloc(connp); 2149 if (leave_grp || ilg == NULL) { 2150 mutex_exit(&connp->conn_lock); 2151 return (leave_grp ? 0 : ENOMEM); 2152 } 2153 ilgstat = ILGSTAT_NEW; 2154 IN6_IPADDR_TO_V4MAPPED(grp, &ilg->ilg_v6group); 2155 ilg->ilg_ipif = ipif; 2156 ilg->ilg_ill = NULL; 2157 ilg->ilg_orig_ifindex = 0; 2158 } else if (leave_grp) { 2159 ilg_delete(connp, ilg, NULL); 2160 mutex_exit(&connp->conn_lock); 2161 (void) ip_delmulti(grp, ipif, B_FALSE, B_TRUE); 2162 return (0); 2163 } else { 2164 ilgstat = ILGSTAT_CHANGE; 2165 /* Preserve existing state in case ip_addmulti() fails */ 2166 orig_fmode = ilg->ilg_fmode; 2167 if (ilg->ilg_filter == NULL) { 2168 orig_filter = NULL; 2169 } else { 2170 orig_filter = l_alloc_copy(ilg->ilg_filter); 2171 if (orig_filter == NULL) { 2172 mutex_exit(&connp->conn_lock); 2173 return (ENOMEM); 2174 } 2175 } 2176 } 2177 2178 /* 2179 * Alloc buffer to copy new state into (see below) before 2180 * we make any changes, so we can bail if it fails. 2181 */ 2182 if ((new_filter = l_alloc()) == NULL) { 2183 mutex_exit(&connp->conn_lock); 2184 err = ENOMEM; 2185 goto free_and_exit; 2186 } 2187 2188 if (insrcs == 0) { 2189 CLEAR_SLIST(ilg->ilg_filter); 2190 } else { 2191 slist_t *fp; 2192 if (ilg->ilg_filter == NULL) { 2193 fp = l_alloc(); 2194 if (fp == NULL) { 2195 if (ilgstat == ILGSTAT_NEW) 2196 ilg_delete(connp, ilg, NULL); 2197 mutex_exit(&connp->conn_lock); 2198 err = ENOMEM; 2199 goto free_and_exit; 2200 } 2201 } else { 2202 fp = ilg->ilg_filter; 2203 } 2204 for (i = 0; i < insrcs; i++) { 2205 if (isv4mapped) { 2206 sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i]; 2207 fp->sl_addr[i] = sin6->sin6_addr; 2208 } else { 2209 if (is_v4only_api) { 2210 addrp = &imsf->imsf_slist[i]; 2211 } else { 2212 sin = (struct sockaddr_in *) 2213 &gf->gf_slist[i]; 2214 addrp = &sin->sin_addr; 2215 } 2216 IN6_INADDR_TO_V4MAPPED(addrp, &fp->sl_addr[i]); 2217 } 2218 } 2219 fp->sl_numsrc = insrcs; 2220 ilg->ilg_filter = fp; 2221 } 2222 /* 2223 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 2224 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 2225 * So we need to translate here. 2226 */ 2227 ilg->ilg_fmode = (infmode == MCAST_INCLUDE) ? 2228 MODE_IS_INCLUDE : MODE_IS_EXCLUDE; 2229 2230 /* 2231 * Save copy of ilg's filter state to pass to other functions, 2232 * so we can release conn_lock now. 2233 */ 2234 new_fmode = ilg->ilg_fmode; 2235 l_copy(ilg->ilg_filter, new_filter); 2236 2237 mutex_exit(&connp->conn_lock); 2238 2239 err = ip_addmulti(grp, ipif, ilgstat, new_fmode, new_filter); 2240 if (err != 0) { 2241 /* 2242 * Restore the original filter state, or delete the 2243 * newly-created ilg. We need to look up the ilg 2244 * again, though, since we've not been holding the 2245 * conn_lock. 2246 */ 2247 mutex_enter(&connp->conn_lock); 2248 ilg = ilg_lookup_ipif(connp, grp, ipif); 2249 ASSERT(ilg != NULL); 2250 if (ilgstat == ILGSTAT_NEW) { 2251 ilg_delete(connp, ilg, NULL); 2252 } else { 2253 ilg->ilg_fmode = orig_fmode; 2254 if (SLIST_IS_EMPTY(orig_filter)) { 2255 CLEAR_SLIST(ilg->ilg_filter); 2256 } else { 2257 /* 2258 * We didn't free the filter, even if we 2259 * were trying to make the source list empty; 2260 * so if orig_filter isn't empty, the ilg 2261 * must still have a filter alloc'd. 2262 */ 2263 l_copy(orig_filter, ilg->ilg_filter); 2264 } 2265 } 2266 mutex_exit(&connp->conn_lock); 2267 } 2268 2269 free_and_exit: 2270 l_free(orig_filter); 2271 l_free(new_filter); 2272 2273 return (err); 2274 } 2275 2276 static int 2277 ip_set_srcfilter_v6(conn_t *connp, struct group_filter *gf, 2278 const struct in6_addr *grp, ill_t *ill) 2279 { 2280 ilg_t *ilg; 2281 int i, orig_ifindex, orig_fmode, new_fmode, err; 2282 slist_t *orig_filter = NULL; 2283 slist_t *new_filter = NULL; 2284 struct sockaddr_storage *sl; 2285 struct sockaddr_in6 *sin6; 2286 boolean_t leave_grp; 2287 ilg_stat_t ilgstat; 2288 2289 /* Make sure we can handle the source list */ 2290 if (gf->gf_numsrc > MAX_FILTER_SIZE) 2291 return (ENOBUFS); 2292 2293 /* 2294 * setting the filter to (INCLUDE, NULL) is treated 2295 * as a request to leave the group. 2296 */ 2297 leave_grp = (gf->gf_fmode == MCAST_INCLUDE && gf->gf_numsrc == 0); 2298 2299 ASSERT(IAM_WRITER_ILL(ill)); 2300 2301 /* 2302 * Use the ifindex to do the lookup. We can't use the ill 2303 * directly because ilg_ill could point to a different ill 2304 * if things have moved. 2305 */ 2306 orig_ifindex = ill->ill_phyint->phyint_ifindex; 2307 2308 mutex_enter(&connp->conn_lock); 2309 ilg = ilg_lookup_ill_index_v6(connp, grp, orig_ifindex); 2310 if (ilg == NULL) { 2311 /* 2312 * if the request was actually to leave, and we 2313 * didn't find an ilg, there's nothing to do. 2314 */ 2315 if (!leave_grp) 2316 ilg = conn_ilg_alloc(connp); 2317 if (leave_grp || ilg == NULL) { 2318 mutex_exit(&connp->conn_lock); 2319 return (leave_grp ? 0 : ENOMEM); 2320 } 2321 ilgstat = ILGSTAT_NEW; 2322 ilg->ilg_v6group = *grp; 2323 ilg->ilg_ipif = NULL; 2324 /* 2325 * Choose our target ill to join on. This might be 2326 * different from the ill we've been given if it's 2327 * currently down and part of a group. 2328 * 2329 * new ill is not refheld; we are writer. 2330 */ 2331 ill = ip_choose_multi_ill(ill, grp); 2332 ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED)); 2333 ilg->ilg_ill = ill; 2334 /* 2335 * Remember the index that we joined on, so that we can 2336 * successfully delete them later on and also search for 2337 * duplicates if the application wants to join again. 2338 */ 2339 ilg->ilg_orig_ifindex = orig_ifindex; 2340 } else if (leave_grp) { 2341 /* 2342 * Use the ilg's current ill for the deletion, 2343 * we might have failed over. 2344 */ 2345 ill = ilg->ilg_ill; 2346 ilg_delete(connp, ilg, NULL); 2347 mutex_exit(&connp->conn_lock); 2348 (void) ip_delmulti_v6(grp, ill, orig_ifindex, 2349 connp->conn_zoneid, B_FALSE, B_TRUE); 2350 return (0); 2351 } else { 2352 ilgstat = ILGSTAT_CHANGE; 2353 /* 2354 * The current ill might be different from the one we were 2355 * asked to join on (if failover has occurred); we should 2356 * join on the ill stored in the ilg. The original ill 2357 * is noted in ilg_orig_ifindex, which matched our request. 2358 */ 2359 ill = ilg->ilg_ill; 2360 /* preserve existing state in case ip_addmulti() fails */ 2361 orig_fmode = ilg->ilg_fmode; 2362 if (ilg->ilg_filter == NULL) { 2363 orig_filter = NULL; 2364 } else { 2365 orig_filter = l_alloc_copy(ilg->ilg_filter); 2366 if (orig_filter == NULL) { 2367 mutex_exit(&connp->conn_lock); 2368 return (ENOMEM); 2369 } 2370 } 2371 } 2372 2373 /* 2374 * Alloc buffer to copy new state into (see below) before 2375 * we make any changes, so we can bail if it fails. 2376 */ 2377 if ((new_filter = l_alloc()) == NULL) { 2378 mutex_exit(&connp->conn_lock); 2379 err = ENOMEM; 2380 goto free_and_exit; 2381 } 2382 2383 if (gf->gf_numsrc == 0) { 2384 CLEAR_SLIST(ilg->ilg_filter); 2385 } else { 2386 slist_t *fp; 2387 if (ilg->ilg_filter == NULL) { 2388 fp = l_alloc(); 2389 if (fp == NULL) { 2390 if (ilgstat == ILGSTAT_NEW) 2391 ilg_delete(connp, ilg, NULL); 2392 mutex_exit(&connp->conn_lock); 2393 err = ENOMEM; 2394 goto free_and_exit; 2395 } 2396 } else { 2397 fp = ilg->ilg_filter; 2398 } 2399 for (i = 0, sl = gf->gf_slist; i < gf->gf_numsrc; i++, sl++) { 2400 sin6 = (struct sockaddr_in6 *)sl; 2401 fp->sl_addr[i] = sin6->sin6_addr; 2402 } 2403 fp->sl_numsrc = gf->gf_numsrc; 2404 ilg->ilg_filter = fp; 2405 } 2406 /* 2407 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 2408 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 2409 * So we need to translate here. 2410 */ 2411 ilg->ilg_fmode = (gf->gf_fmode == MCAST_INCLUDE) ? 2412 MODE_IS_INCLUDE : MODE_IS_EXCLUDE; 2413 2414 /* 2415 * Save copy of ilg's filter state to pass to other functions, 2416 * so we can release conn_lock now. 2417 */ 2418 new_fmode = ilg->ilg_fmode; 2419 l_copy(ilg->ilg_filter, new_filter); 2420 2421 mutex_exit(&connp->conn_lock); 2422 2423 err = ip_addmulti_v6(grp, ill, orig_ifindex, connp->conn_zoneid, 2424 ilgstat, new_fmode, new_filter); 2425 if (err != 0) { 2426 /* 2427 * Restore the original filter state, or delete the 2428 * newly-created ilg. We need to look up the ilg 2429 * again, though, since we've not been holding the 2430 * conn_lock. 2431 */ 2432 mutex_enter(&connp->conn_lock); 2433 ilg = ilg_lookup_ill_index_v6(connp, grp, orig_ifindex); 2434 ASSERT(ilg != NULL); 2435 if (ilgstat == ILGSTAT_NEW) { 2436 ilg_delete(connp, ilg, NULL); 2437 } else { 2438 ilg->ilg_fmode = orig_fmode; 2439 if (SLIST_IS_EMPTY(orig_filter)) { 2440 CLEAR_SLIST(ilg->ilg_filter); 2441 } else { 2442 /* 2443 * We didn't free the filter, even if we 2444 * were trying to make the source list empty; 2445 * so if orig_filter isn't empty, the ilg 2446 * must still have a filter alloc'd. 2447 */ 2448 l_copy(orig_filter, ilg->ilg_filter); 2449 } 2450 } 2451 mutex_exit(&connp->conn_lock); 2452 } 2453 2454 free_and_exit: 2455 l_free(orig_filter); 2456 l_free(new_filter); 2457 2458 return (err); 2459 } 2460 2461 /* 2462 * Process the SIOC[GS]MSFILTER and SIOC[GS]IPMSFILTER ioctls. 2463 */ 2464 /* ARGSUSED */ 2465 int 2466 ip_sioctl_msfilter(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, 2467 ip_ioctl_cmd_t *ipip, void *ifreq) 2468 { 2469 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 2470 /* existence verified in ip_wput_nondata() */ 2471 mblk_t *data_mp = mp->b_cont->b_cont; 2472 int datalen, err, cmd, minsize; 2473 int expsize = 0; 2474 conn_t *connp; 2475 boolean_t isv6, is_v4only_api, getcmd; 2476 struct sockaddr_in *gsin; 2477 struct sockaddr_in6 *gsin6; 2478 ipaddr_t v4grp; 2479 in6_addr_t v6grp; 2480 struct group_filter *gf = NULL; 2481 struct ip_msfilter *imsf = NULL; 2482 mblk_t *ndp; 2483 2484 if (data_mp->b_cont != NULL) { 2485 if ((ndp = msgpullup(data_mp, -1)) == NULL) 2486 return (ENOMEM); 2487 freemsg(data_mp); 2488 data_mp = ndp; 2489 mp->b_cont->b_cont = data_mp; 2490 } 2491 2492 cmd = iocp->ioc_cmd; 2493 getcmd = (cmd == SIOCGIPMSFILTER || cmd == SIOCGMSFILTER); 2494 is_v4only_api = (cmd == SIOCGIPMSFILTER || cmd == SIOCSIPMSFILTER); 2495 minsize = (is_v4only_api) ? IP_MSFILTER_SIZE(0) : GROUP_FILTER_SIZE(0); 2496 datalen = MBLKL(data_mp); 2497 2498 if (datalen < minsize) 2499 return (EINVAL); 2500 2501 /* 2502 * now we know we have at least have the initial structure, 2503 * but need to check for the source list array. 2504 */ 2505 if (is_v4only_api) { 2506 imsf = (struct ip_msfilter *)data_mp->b_rptr; 2507 isv6 = B_FALSE; 2508 expsize = IP_MSFILTER_SIZE(imsf->imsf_numsrc); 2509 } else { 2510 gf = (struct group_filter *)data_mp->b_rptr; 2511 if (gf->gf_group.ss_family == AF_INET6) { 2512 gsin6 = (struct sockaddr_in6 *)&gf->gf_group; 2513 isv6 = !(IN6_IS_ADDR_V4MAPPED(&gsin6->sin6_addr)); 2514 } else { 2515 isv6 = B_FALSE; 2516 } 2517 expsize = GROUP_FILTER_SIZE(gf->gf_numsrc); 2518 } 2519 if (datalen < expsize) 2520 return (EINVAL); 2521 2522 connp = Q_TO_CONN(q); 2523 2524 /* operation not supported on the virtual network interface */ 2525 if (IS_VNI(ipif->ipif_ill)) 2526 return (EINVAL); 2527 2528 if (isv6) { 2529 ill_t *ill = ipif->ipif_ill; 2530 ill_refhold(ill); 2531 2532 gsin6 = (struct sockaddr_in6 *)&gf->gf_group; 2533 v6grp = gsin6->sin6_addr; 2534 if (getcmd) 2535 err = ip_get_srcfilter_v6(connp, gf, &v6grp, ill); 2536 else 2537 err = ip_set_srcfilter_v6(connp, gf, &v6grp, ill); 2538 2539 ill_refrele(ill); 2540 } else { 2541 boolean_t isv4mapped = B_FALSE; 2542 if (is_v4only_api) { 2543 v4grp = (ipaddr_t)imsf->imsf_multiaddr.s_addr; 2544 } else { 2545 if (gf->gf_group.ss_family == AF_INET) { 2546 gsin = (struct sockaddr_in *)&gf->gf_group; 2547 v4grp = (ipaddr_t)gsin->sin_addr.s_addr; 2548 } else { 2549 gsin6 = (struct sockaddr_in6 *)&gf->gf_group; 2550 IN6_V4MAPPED_TO_IPADDR(&gsin6->sin6_addr, 2551 v4grp); 2552 isv4mapped = B_TRUE; 2553 } 2554 } 2555 if (getcmd) 2556 err = ip_get_srcfilter(connp, gf, imsf, v4grp, ipif, 2557 isv4mapped); 2558 else 2559 err = ip_set_srcfilter(connp, gf, imsf, v4grp, ipif, 2560 isv4mapped); 2561 } 2562 2563 return (err); 2564 } 2565 2566 /* 2567 * Finds the ipif based on information in the ioctl headers. Needed to make 2568 * ip_process_ioctl() happy (it needs to know the ipif for IPI_WR-flagged 2569 * ioctls prior to calling the ioctl's handler function). Somewhat analogous 2570 * to ip_extract_lifreq_cmn() and ip_extract_tunreq(). 2571 */ 2572 int 2573 ip_extract_msfilter(queue_t *q, mblk_t *mp, ipif_t **ipifpp, ipsq_func_t func) 2574 { 2575 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 2576 int cmd = iocp->ioc_cmd, err = 0; 2577 conn_t *connp; 2578 ipif_t *ipif; 2579 /* caller has verified this mblk exists */ 2580 char *dbuf = (char *)mp->b_cont->b_cont->b_rptr; 2581 struct ip_msfilter *imsf; 2582 struct group_filter *gf; 2583 ipaddr_t v4addr, v4grp; 2584 in6_addr_t v6grp; 2585 uint32_t index; 2586 zoneid_t zoneid; 2587 2588 connp = Q_TO_CONN(q); 2589 zoneid = connp->conn_zoneid; 2590 2591 /* don't allow multicast operations on a tcp conn */ 2592 if (IPCL_IS_TCP(connp)) 2593 return (ENOPROTOOPT); 2594 2595 if (cmd == SIOCSIPMSFILTER || cmd == SIOCGIPMSFILTER) { 2596 /* don't allow v4-specific ioctls on v6 socket */ 2597 if (connp->conn_af_isv6) 2598 return (EAFNOSUPPORT); 2599 2600 imsf = (struct ip_msfilter *)dbuf; 2601 v4addr = imsf->imsf_interface.s_addr; 2602 v4grp = imsf->imsf_multiaddr.s_addr; 2603 if (v4addr == INADDR_ANY) { 2604 ipif = ipif_lookup_group(v4grp, zoneid); 2605 if (ipif == NULL) 2606 err = EADDRNOTAVAIL; 2607 } else { 2608 ipif = ipif_lookup_addr(v4addr, NULL, zoneid, q, mp, 2609 func, &err); 2610 } 2611 } else { 2612 boolean_t isv6 = B_FALSE; 2613 gf = (struct group_filter *)dbuf; 2614 index = gf->gf_interface; 2615 if (gf->gf_group.ss_family == AF_INET6) { 2616 struct sockaddr_in6 *sin6; 2617 sin6 = (struct sockaddr_in6 *)&gf->gf_group; 2618 v6grp = sin6->sin6_addr; 2619 if (IN6_IS_ADDR_V4MAPPED(&v6grp)) 2620 IN6_V4MAPPED_TO_IPADDR(&v6grp, v4grp); 2621 else 2622 isv6 = B_TRUE; 2623 } else if (gf->gf_group.ss_family == AF_INET) { 2624 struct sockaddr_in *sin; 2625 sin = (struct sockaddr_in *)&gf->gf_group; 2626 v4grp = sin->sin_addr.s_addr; 2627 } else { 2628 return (EAFNOSUPPORT); 2629 } 2630 if (index == 0) { 2631 if (isv6) 2632 ipif = ipif_lookup_group_v6(&v6grp, zoneid); 2633 else 2634 ipif = ipif_lookup_group(v4grp, zoneid); 2635 if (ipif == NULL) 2636 err = EADDRNOTAVAIL; 2637 } else { 2638 ipif = ipif_lookup_on_ifindex(index, isv6, zoneid, 2639 q, mp, func, &err); 2640 } 2641 } 2642 2643 *ipifpp = ipif; 2644 return (err); 2645 } 2646 2647 /* 2648 * The structures used for the SIOC*MSFILTER ioctls usually must be copied 2649 * in in two stages, as the first copyin tells us the size of the attached 2650 * source buffer. This function is called by ip_wput_nondata() after the 2651 * first copyin has completed; it figures out how big the second stage 2652 * needs to be, and kicks it off. 2653 * 2654 * In some cases (numsrc < 2), the second copyin is not needed as the 2655 * first one gets a complete structure containing 1 source addr. 2656 * 2657 * The function returns 0 if a second copyin has been started (i.e. there's 2658 * no more work to be done right now), or 1 if the second copyin is not 2659 * needed and ip_wput_nondata() can continue its processing. 2660 */ 2661 int 2662 ip_copyin_msfilter(queue_t *q, mblk_t *mp) 2663 { 2664 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 2665 int cmd = iocp->ioc_cmd; 2666 /* validity of this checked in ip_wput_nondata() */ 2667 mblk_t *mp1 = mp->b_cont->b_cont; 2668 int copysize = 0; 2669 int offset; 2670 2671 if (cmd == SIOCSMSFILTER || cmd == SIOCGMSFILTER) { 2672 struct group_filter *gf = (struct group_filter *)mp1->b_rptr; 2673 if (gf->gf_numsrc >= 2) { 2674 offset = sizeof (struct group_filter); 2675 copysize = GROUP_FILTER_SIZE(gf->gf_numsrc) - offset; 2676 } 2677 } else { 2678 struct ip_msfilter *imsf = (struct ip_msfilter *)mp1->b_rptr; 2679 if (imsf->imsf_numsrc >= 2) { 2680 offset = sizeof (struct ip_msfilter); 2681 copysize = IP_MSFILTER_SIZE(imsf->imsf_numsrc) - offset; 2682 } 2683 } 2684 if (copysize > 0) { 2685 mi_copyin_n(q, mp, offset, copysize); 2686 return (0); 2687 } 2688 return (1); 2689 } 2690 2691 /* 2692 * Handle the following optmgmt: 2693 * IP_ADD_MEMBERSHIP must not have joined already 2694 * MCAST_JOIN_GROUP must not have joined already 2695 * IP_BLOCK_SOURCE must have joined already 2696 * MCAST_BLOCK_SOURCE must have joined already 2697 * IP_JOIN_SOURCE_GROUP may have joined already 2698 * MCAST_JOIN_SOURCE_GROUP may have joined already 2699 * 2700 * fmode and src parameters may be used to determine which option is 2701 * being set, as follows (the IP_* and MCAST_* versions of each option 2702 * are functionally equivalent): 2703 * opt fmode src 2704 * IP_ADD_MEMBERSHIP MODE_IS_EXCLUDE INADDR_ANY 2705 * MCAST_JOIN_GROUP MODE_IS_EXCLUDE INADDR_ANY 2706 * IP_BLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 2707 * MCAST_BLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 2708 * IP_JOIN_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 2709 * MCAST_JOIN_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 2710 * 2711 * Changing the filter mode is not allowed; if a matching ilg already 2712 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 2713 * 2714 * Verifies that there is a source address of appropriate scope for 2715 * the group; if not, EADDRNOTAVAIL is returned. 2716 * 2717 * The interface to be used may be identified by an address or by an 2718 * index. A pointer to the index is passed; if it is NULL, use the 2719 * address, otherwise, use the index. 2720 */ 2721 int 2722 ip_opt_add_group(conn_t *connp, boolean_t checkonly, ipaddr_t group, 2723 ipaddr_t ifaddr, uint_t *ifindexp, mcast_record_t fmode, ipaddr_t src, 2724 mblk_t *first_mp) 2725 { 2726 ipif_t *ipif; 2727 ipsq_t *ipsq; 2728 int err = 0; 2729 ill_t *ill; 2730 2731 err = ip_opt_check(connp, group, src, ifaddr, ifindexp, first_mp, 2732 ip_restart_optmgmt, &ipif); 2733 if (err != 0) { 2734 if (err != EINPROGRESS) { 2735 ip1dbg(("ip_opt_add_group: no ipif for group 0x%x, " 2736 "ifaddr 0x%x, ifindex %d\n", ntohl(group), 2737 ntohl(ifaddr), (ifindexp == NULL) ? 0 : *ifindexp)); 2738 } 2739 return (err); 2740 } 2741 ASSERT(ipif != NULL); 2742 2743 ill = ipif->ipif_ill; 2744 /* Operation not supported on a virtual network interface */ 2745 if (IS_VNI(ill)) { 2746 ipif_refrele(ipif); 2747 return (EINVAL); 2748 } 2749 2750 if (checkonly) { 2751 /* 2752 * do not do operation, just pretend to - new T_CHECK 2753 * semantics. The error return case above if encountered 2754 * considered a good enough "check" here. 2755 */ 2756 ipif_refrele(ipif); 2757 return (0); 2758 } 2759 2760 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, ipsq, 2761 NEW_OP); 2762 2763 /* unspecified source addr => no source filtering */ 2764 err = ilg_add(connp, group, ipif, fmode, src); 2765 2766 IPSQ_EXIT(ipsq); 2767 2768 ipif_refrele(ipif); 2769 return (err); 2770 } 2771 2772 /* 2773 * Handle the following optmgmt: 2774 * IPV6_JOIN_GROUP must not have joined already 2775 * MCAST_JOIN_GROUP must not have joined already 2776 * MCAST_BLOCK_SOURCE must have joined already 2777 * MCAST_JOIN_SOURCE_GROUP may have joined already 2778 * 2779 * fmode and src parameters may be used to determine which option is 2780 * being set, as follows (IPV6_JOIN_GROUP and MCAST_JOIN_GROUP options 2781 * are functionally equivalent): 2782 * opt fmode v6src 2783 * IPV6_JOIN_GROUP MODE_IS_EXCLUDE unspecified 2784 * MCAST_JOIN_GROUP MODE_IS_EXCLUDE unspecified 2785 * MCAST_BLOCK_SOURCE MODE_IS_EXCLUDE v6 addr 2786 * MCAST_JOIN_SOURCE_GROUP MODE_IS_INCLUDE v6 addr 2787 * 2788 * Changing the filter mode is not allowed; if a matching ilg already 2789 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 2790 * 2791 * Verifies that there is a source address of appropriate scope for 2792 * the group; if not, EADDRNOTAVAIL is returned. 2793 * 2794 * Handles IPv4-mapped IPv6 multicast addresses by associating them 2795 * with the link-local ipif. Assumes that if v6group is v4-mapped, 2796 * v6src is also v4-mapped. 2797 */ 2798 int 2799 ip_opt_add_group_v6(conn_t *connp, boolean_t checkonly, 2800 const in6_addr_t *v6group, int ifindex, mcast_record_t fmode, 2801 const in6_addr_t *v6src, mblk_t *first_mp) 2802 { 2803 ill_t *ill; 2804 ipif_t *ipif; 2805 char buf[INET6_ADDRSTRLEN]; 2806 ipaddr_t v4group, v4src; 2807 boolean_t isv6; 2808 ipsq_t *ipsq; 2809 int err; 2810 2811 err = ip_opt_check_v6(connp, v6group, &v4group, v6src, &v4src, &isv6, 2812 ifindex, first_mp, ip_restart_optmgmt, &ill, &ipif); 2813 if (err != 0) { 2814 if (err != EINPROGRESS) { 2815 ip1dbg(("ip_opt_add_group_v6: no ill for group %s/" 2816 "index %d\n", inet_ntop(AF_INET6, v6group, buf, 2817 sizeof (buf)), ifindex)); 2818 } 2819 return (err); 2820 } 2821 ASSERT((!isv6 && ipif != NULL) || (isv6 && ill != NULL)); 2822 2823 /* operation is not supported on the virtual network interface */ 2824 if (isv6) { 2825 if (IS_VNI(ill)) { 2826 ill_refrele(ill); 2827 return (EINVAL); 2828 } 2829 } else { 2830 if (IS_VNI(ipif->ipif_ill)) { 2831 ipif_refrele(ipif); 2832 return (EINVAL); 2833 } 2834 } 2835 2836 if (checkonly) { 2837 /* 2838 * do not do operation, just pretend to - new T_CHECK 2839 * semantics. The error return case above if encountered 2840 * considered a good enough "check" here. 2841 */ 2842 if (isv6) 2843 ill_refrele(ill); 2844 else 2845 ipif_refrele(ipif); 2846 return (0); 2847 } 2848 2849 if (!isv6) { 2850 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, 2851 ipsq, NEW_OP); 2852 err = ilg_add(connp, v4group, ipif, fmode, v4src); 2853 IPSQ_EXIT(ipsq); 2854 ipif_refrele(ipif); 2855 } else { 2856 IPSQ_ENTER_ILL(ill, connp, first_mp, ip_restart_optmgmt, 2857 ipsq, NEW_OP); 2858 err = ilg_add_v6(connp, v6group, ill, fmode, v6src); 2859 IPSQ_EXIT(ipsq); 2860 ill_refrele(ill); 2861 } 2862 2863 return (err); 2864 } 2865 2866 static int 2867 ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group, ipif_t *ipif, 2868 mcast_record_t fmode, ipaddr_t src) 2869 { 2870 ilg_t *ilg; 2871 in6_addr_t v6src; 2872 boolean_t leaving = B_FALSE; 2873 2874 ASSERT(IAM_WRITER_IPIF(ipif)); 2875 2876 /* 2877 * The ilg is valid only while we hold the conn lock. Once we drop 2878 * the lock, another thread can locate another ilg on this connp, 2879 * but on a different ipif, and delete it, and cause the ilg array 2880 * to be reallocated and copied. Hence do the ilg_delete before 2881 * dropping the lock. 2882 */ 2883 mutex_enter(&connp->conn_lock); 2884 ilg = ilg_lookup_ipif(connp, group, ipif); 2885 if ((ilg == NULL) || (ilg->ilg_flags & ILG_DELETED)) { 2886 mutex_exit(&connp->conn_lock); 2887 return (EADDRNOTAVAIL); 2888 } 2889 2890 /* 2891 * Decide if we're actually deleting the ilg or just removing a 2892 * source filter address; if just removing an addr, make sure we 2893 * aren't trying to change the filter mode, and that the addr is 2894 * actually in our filter list already. If we're removing the 2895 * last src in an include list, just delete the ilg. 2896 */ 2897 if (src == INADDR_ANY) { 2898 v6src = ipv6_all_zeros; 2899 leaving = B_TRUE; 2900 } else { 2901 int err = 0; 2902 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 2903 if (fmode != ilg->ilg_fmode) 2904 err = EINVAL; 2905 else if (ilg->ilg_filter == NULL || 2906 !list_has_addr(ilg->ilg_filter, &v6src)) 2907 err = EADDRNOTAVAIL; 2908 if (err != 0) { 2909 mutex_exit(&connp->conn_lock); 2910 return (err); 2911 } 2912 if (fmode == MODE_IS_INCLUDE && 2913 ilg->ilg_filter->sl_numsrc == 1) { 2914 v6src = ipv6_all_zeros; 2915 leaving = B_TRUE; 2916 } 2917 } 2918 2919 ilg_delete(connp, ilg, &v6src); 2920 mutex_exit(&connp->conn_lock); 2921 2922 (void) ip_delmulti(group, ipif, B_FALSE, leaving); 2923 return (0); 2924 } 2925 2926 static int 2927 ip_opt_delete_group_excl_v6(conn_t *connp, const in6_addr_t *v6group, 2928 ill_t *ill, mcast_record_t fmode, const in6_addr_t *v6src) 2929 { 2930 ilg_t *ilg; 2931 ill_t *ilg_ill; 2932 uint_t ilg_orig_ifindex; 2933 boolean_t leaving = B_TRUE; 2934 2935 ASSERT(IAM_WRITER_ILL(ill)); 2936 2937 /* 2938 * Use the index that we originally used to join. We can't 2939 * use the ill directly because ilg_ill could point to 2940 * a new ill if things have moved. 2941 */ 2942 mutex_enter(&connp->conn_lock); 2943 ilg = ilg_lookup_ill_index_v6(connp, v6group, 2944 ill->ill_phyint->phyint_ifindex); 2945 if ((ilg == NULL) || (ilg->ilg_flags & ILG_DELETED)) { 2946 mutex_exit(&connp->conn_lock); 2947 return (EADDRNOTAVAIL); 2948 } 2949 2950 /* 2951 * Decide if we're actually deleting the ilg or just removing a 2952 * source filter address; if just removing an addr, make sure we 2953 * aren't trying to change the filter mode, and that the addr is 2954 * actually in our filter list already. If we're removing the 2955 * last src in an include list, just delete the ilg. 2956 */ 2957 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2958 int err = 0; 2959 if (fmode != ilg->ilg_fmode) 2960 err = EINVAL; 2961 else if (ilg->ilg_filter == NULL || 2962 !list_has_addr(ilg->ilg_filter, v6src)) 2963 err = EADDRNOTAVAIL; 2964 if (err != 0) { 2965 mutex_exit(&connp->conn_lock); 2966 return (err); 2967 } 2968 if (fmode == MODE_IS_INCLUDE && 2969 ilg->ilg_filter->sl_numsrc == 1) 2970 v6src = NULL; 2971 else 2972 leaving = B_FALSE; 2973 } 2974 2975 ilg_ill = ilg->ilg_ill; 2976 ilg_orig_ifindex = ilg->ilg_orig_ifindex; 2977 ilg_delete(connp, ilg, v6src); 2978 mutex_exit(&connp->conn_lock); 2979 (void) ip_delmulti_v6(v6group, ilg_ill, ilg_orig_ifindex, 2980 connp->conn_zoneid, B_FALSE, leaving); 2981 2982 return (0); 2983 } 2984 2985 /* 2986 * Handle the following optmgmt: 2987 * IP_DROP_MEMBERSHIP will leave 2988 * MCAST_LEAVE_GROUP will leave 2989 * IP_UNBLOCK_SOURCE will not leave 2990 * MCAST_UNBLOCK_SOURCE will not leave 2991 * IP_LEAVE_SOURCE_GROUP may leave (if leaving last source) 2992 * MCAST_LEAVE_SOURCE_GROUP may leave (if leaving last source) 2993 * 2994 * fmode and src parameters may be used to determine which option is 2995 * being set, as follows (the IP_* and MCAST_* versions of each option 2996 * are functionally equivalent): 2997 * opt fmode src 2998 * IP_DROP_MEMBERSHIP MODE_IS_INCLUDE INADDR_ANY 2999 * MCAST_LEAVE_GROUP MODE_IS_INCLUDE INADDR_ANY 3000 * IP_UNBLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 3001 * MCAST_UNBLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 3002 * IP_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 3003 * MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 3004 * 3005 * Changing the filter mode is not allowed; if a matching ilg already 3006 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 3007 * 3008 * The interface to be used may be identified by an address or by an 3009 * index. A pointer to the index is passed; if it is NULL, use the 3010 * address, otherwise, use the index. 3011 */ 3012 int 3013 ip_opt_delete_group(conn_t *connp, boolean_t checkonly, ipaddr_t group, 3014 ipaddr_t ifaddr, uint_t *ifindexp, mcast_record_t fmode, ipaddr_t src, 3015 mblk_t *first_mp) 3016 { 3017 ipif_t *ipif; 3018 ipsq_t *ipsq; 3019 int err; 3020 ill_t *ill; 3021 3022 err = ip_opt_check(connp, group, src, ifaddr, ifindexp, first_mp, 3023 ip_restart_optmgmt, &ipif); 3024 if (err != 0) { 3025 if (err != EINPROGRESS) { 3026 ip1dbg(("ip_opt_delete_group: no ipif for group " 3027 "0x%x, ifaddr 0x%x\n", 3028 (int)ntohl(group), (int)ntohl(ifaddr))); 3029 } 3030 return (err); 3031 } 3032 ASSERT(ipif != NULL); 3033 3034 ill = ipif->ipif_ill; 3035 /* Operation not supported on a virtual network interface */ 3036 if (IS_VNI(ill)) { 3037 ipif_refrele(ipif); 3038 return (EINVAL); 3039 } 3040 3041 if (checkonly) { 3042 /* 3043 * do not do operation, just pretend to - new T_CHECK 3044 * semantics. The error return case above if encountered 3045 * considered a good enough "check" here. 3046 */ 3047 ipif_refrele(ipif); 3048 return (0); 3049 } 3050 3051 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, ipsq, 3052 NEW_OP); 3053 err = ip_opt_delete_group_excl(connp, group, ipif, fmode, src); 3054 IPSQ_EXIT(ipsq); 3055 3056 ipif_refrele(ipif); 3057 return (err); 3058 } 3059 3060 /* 3061 * Handle the following optmgmt: 3062 * IPV6_LEAVE_GROUP will leave 3063 * MCAST_LEAVE_GROUP will leave 3064 * MCAST_UNBLOCK_SOURCE will not leave 3065 * MCAST_LEAVE_SOURCE_GROUP may leave (if leaving last source) 3066 * 3067 * fmode and src parameters may be used to determine which option is 3068 * being set, as follows (IPV6_LEAVE_GROUP and MCAST_LEAVE_GROUP options 3069 * are functionally equivalent): 3070 * opt fmode v6src 3071 * IPV6_LEAVE_GROUP MODE_IS_INCLUDE unspecified 3072 * MCAST_LEAVE_GROUP MODE_IS_INCLUDE unspecified 3073 * MCAST_UNBLOCK_SOURCE MODE_IS_EXCLUDE v6 addr 3074 * MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE v6 addr 3075 * 3076 * Changing the filter mode is not allowed; if a matching ilg already 3077 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 3078 * 3079 * Handles IPv4-mapped IPv6 multicast addresses by associating them 3080 * with the link-local ipif. Assumes that if v6group is v4-mapped, 3081 * v6src is also v4-mapped. 3082 */ 3083 int 3084 ip_opt_delete_group_v6(conn_t *connp, boolean_t checkonly, 3085 const in6_addr_t *v6group, int ifindex, mcast_record_t fmode, 3086 const in6_addr_t *v6src, mblk_t *first_mp) 3087 { 3088 ill_t *ill; 3089 ipif_t *ipif; 3090 char buf[INET6_ADDRSTRLEN]; 3091 ipaddr_t v4group, v4src; 3092 boolean_t isv6; 3093 ipsq_t *ipsq; 3094 int err; 3095 3096 err = ip_opt_check_v6(connp, v6group, &v4group, v6src, &v4src, &isv6, 3097 ifindex, first_mp, ip_restart_optmgmt, &ill, &ipif); 3098 if (err != 0) { 3099 if (err != EINPROGRESS) { 3100 ip1dbg(("ip_opt_delete_group_v6: no ill for group %s/" 3101 "index %d\n", inet_ntop(AF_INET6, v6group, buf, 3102 sizeof (buf)), ifindex)); 3103 } 3104 return (err); 3105 } 3106 ASSERT((isv6 && ill != NULL) || (!isv6 && ipif != NULL)); 3107 3108 /* operation is not supported on the virtual network interface */ 3109 if (isv6) { 3110 if (IS_VNI(ill)) { 3111 ill_refrele(ill); 3112 return (EINVAL); 3113 } 3114 } else { 3115 if (IS_VNI(ipif->ipif_ill)) { 3116 ipif_refrele(ipif); 3117 return (EINVAL); 3118 } 3119 } 3120 3121 if (checkonly) { 3122 /* 3123 * do not do operation, just pretend to - new T_CHECK 3124 * semantics. The error return case above if encountered 3125 * considered a good enough "check" here. 3126 */ 3127 if (isv6) 3128 ill_refrele(ill); 3129 else 3130 ipif_refrele(ipif); 3131 return (0); 3132 } 3133 3134 if (!isv6) { 3135 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, 3136 ipsq, NEW_OP); 3137 err = ip_opt_delete_group_excl(connp, v4group, ipif, fmode, 3138 v4src); 3139 IPSQ_EXIT(ipsq); 3140 ipif_refrele(ipif); 3141 } else { 3142 IPSQ_ENTER_ILL(ill, connp, first_mp, ip_restart_optmgmt, 3143 ipsq, NEW_OP); 3144 err = ip_opt_delete_group_excl_v6(connp, v6group, ill, fmode, 3145 v6src); 3146 IPSQ_EXIT(ipsq); 3147 ill_refrele(ill); 3148 } 3149 3150 return (err); 3151 } 3152 3153 /* 3154 * Group mgmt for upper conn that passes things down 3155 * to the interface multicast list (and DLPI) 3156 * These routines can handle new style options that specify an interface name 3157 * as opposed to an interface address (needed for general handling of 3158 * unnumbered interfaces.) 3159 */ 3160 3161 /* 3162 * Add a group to an upper conn group data structure and pass things down 3163 * to the interface multicast list (and DLPI) 3164 */ 3165 static int 3166 ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif, mcast_record_t fmode, 3167 ipaddr_t src) 3168 { 3169 int error = 0; 3170 ill_t *ill; 3171 ilg_t *ilg; 3172 ilg_stat_t ilgstat; 3173 slist_t *new_filter = NULL; 3174 int new_fmode; 3175 3176 ASSERT(IAM_WRITER_IPIF(ipif)); 3177 3178 ill = ipif->ipif_ill; 3179 3180 if (!(ill->ill_flags & ILLF_MULTICAST)) 3181 return (EADDRNOTAVAIL); 3182 3183 /* 3184 * conn_ilg[] is protected by conn_lock. Need to hold the conn_lock 3185 * to walk the conn_ilg[] list in ilg_lookup_ipif(); also needed to 3186 * serialize 2 threads doing join (sock, group1, hme0:0) and 3187 * (sock, group2, hme1:0) where hme0 and hme1 map to different ipsqs, 3188 * but both operations happen on the same conn. 3189 */ 3190 mutex_enter(&connp->conn_lock); 3191 ilg = ilg_lookup_ipif(connp, group, ipif); 3192 3193 /* 3194 * Depending on the option we're handling, may or may not be okay 3195 * if group has already been added. Figure out our rules based 3196 * on fmode and src params. Also make sure there's enough room 3197 * in the filter if we're adding a source to an existing filter. 3198 */ 3199 if (src == INADDR_ANY) { 3200 /* we're joining for all sources, must not have joined */ 3201 if (ilg != NULL) 3202 error = EADDRINUSE; 3203 } else { 3204 if (fmode == MODE_IS_EXCLUDE) { 3205 /* (excl {addr}) => block source, must have joined */ 3206 if (ilg == NULL) 3207 error = EADDRNOTAVAIL; 3208 } 3209 /* (incl {addr}) => join source, may have joined */ 3210 3211 if (ilg != NULL && 3212 SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE) 3213 error = ENOBUFS; 3214 } 3215 if (error != 0) { 3216 mutex_exit(&connp->conn_lock); 3217 return (error); 3218 } 3219 3220 ASSERT(!(ipif->ipif_state_flags & IPIF_CONDEMNED)); 3221 3222 /* 3223 * Alloc buffer to copy new state into (see below) before 3224 * we make any changes, so we can bail if it fails. 3225 */ 3226 if ((new_filter = l_alloc()) == NULL) { 3227 mutex_exit(&connp->conn_lock); 3228 return (ENOMEM); 3229 } 3230 3231 if (ilg == NULL) { 3232 ilgstat = ILGSTAT_NEW; 3233 if ((ilg = conn_ilg_alloc(connp)) == NULL) { 3234 mutex_exit(&connp->conn_lock); 3235 l_free(new_filter); 3236 return (ENOMEM); 3237 } 3238 if (src != INADDR_ANY) { 3239 ilg->ilg_filter = l_alloc(); 3240 if (ilg->ilg_filter == NULL) { 3241 ilg_delete(connp, ilg, NULL); 3242 mutex_exit(&connp->conn_lock); 3243 l_free(new_filter); 3244 return (ENOMEM); 3245 } 3246 ilg->ilg_filter->sl_numsrc = 1; 3247 IN6_IPADDR_TO_V4MAPPED(src, 3248 &ilg->ilg_filter->sl_addr[0]); 3249 } 3250 if (group == INADDR_ANY) { 3251 ilg->ilg_v6group = ipv6_all_zeros; 3252 } else { 3253 IN6_IPADDR_TO_V4MAPPED(group, &ilg->ilg_v6group); 3254 } 3255 ilg->ilg_ipif = ipif; 3256 ilg->ilg_ill = NULL; 3257 ilg->ilg_orig_ifindex = 0; 3258 ilg->ilg_fmode = fmode; 3259 } else { 3260 int index; 3261 in6_addr_t v6src; 3262 ilgstat = ILGSTAT_CHANGE; 3263 if (ilg->ilg_fmode != fmode || src == INADDR_ANY) { 3264 mutex_exit(&connp->conn_lock); 3265 l_free(new_filter); 3266 return (EINVAL); 3267 } 3268 if (ilg->ilg_filter == NULL) { 3269 ilg->ilg_filter = l_alloc(); 3270 if (ilg->ilg_filter == NULL) { 3271 mutex_exit(&connp->conn_lock); 3272 l_free(new_filter); 3273 return (ENOMEM); 3274 } 3275 } 3276 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 3277 if (list_has_addr(ilg->ilg_filter, &v6src)) { 3278 mutex_exit(&connp->conn_lock); 3279 l_free(new_filter); 3280 return (EADDRNOTAVAIL); 3281 } 3282 index = ilg->ilg_filter->sl_numsrc++; 3283 ilg->ilg_filter->sl_addr[index] = v6src; 3284 } 3285 3286 /* 3287 * Save copy of ilg's filter state to pass to other functions, 3288 * so we can release conn_lock now. 3289 */ 3290 new_fmode = ilg->ilg_fmode; 3291 l_copy(ilg->ilg_filter, new_filter); 3292 3293 mutex_exit(&connp->conn_lock); 3294 3295 error = ip_addmulti(group, ipif, ilgstat, new_fmode, new_filter); 3296 if (error != 0) { 3297 /* 3298 * Need to undo what we did before calling ip_addmulti()! 3299 * Must look up the ilg again since we've not been holding 3300 * conn_lock. 3301 */ 3302 in6_addr_t v6src; 3303 if (ilgstat == ILGSTAT_NEW) 3304 v6src = ipv6_all_zeros; 3305 else 3306 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 3307 mutex_enter(&connp->conn_lock); 3308 ilg = ilg_lookup_ipif(connp, group, ipif); 3309 ASSERT(ilg != NULL); 3310 ilg_delete(connp, ilg, &v6src); 3311 mutex_exit(&connp->conn_lock); 3312 l_free(new_filter); 3313 return (error); 3314 } 3315 3316 l_free(new_filter); 3317 return (0); 3318 } 3319 3320 static int 3321 ilg_add_v6(conn_t *connp, const in6_addr_t *v6group, ill_t *ill, 3322 mcast_record_t fmode, const in6_addr_t *v6src) 3323 { 3324 int error = 0; 3325 int orig_ifindex; 3326 ilg_t *ilg; 3327 ilg_stat_t ilgstat; 3328 slist_t *new_filter = NULL; 3329 int new_fmode; 3330 3331 ASSERT(IAM_WRITER_ILL(ill)); 3332 3333 if (!(ill->ill_flags & ILLF_MULTICAST)) 3334 return (EADDRNOTAVAIL); 3335 3336 /* 3337 * conn_lock protects the ilg list. Serializes 2 threads doing 3338 * join (sock, group1, hme0) and (sock, group2, hme1) where hme0 3339 * and hme1 map to different ipsq's, but both operations happen 3340 * on the same conn. 3341 */ 3342 mutex_enter(&connp->conn_lock); 3343 3344 /* 3345 * Use the ifindex to do the lookup. We can't use the ill 3346 * directly because ilg_ill could point to a different ill if 3347 * things have moved. 3348 */ 3349 orig_ifindex = ill->ill_phyint->phyint_ifindex; 3350 ilg = ilg_lookup_ill_index_v6(connp, v6group, orig_ifindex); 3351 3352 /* 3353 * Depending on the option we're handling, may or may not be okay 3354 * if group has already been added. Figure out our rules based 3355 * on fmode and src params. Also make sure there's enough room 3356 * in the filter if we're adding a source to an existing filter. 3357 */ 3358 if (IN6_IS_ADDR_UNSPECIFIED(v6src)) { 3359 /* we're joining for all sources, must not have joined */ 3360 if (ilg != NULL) 3361 error = EADDRINUSE; 3362 } else { 3363 if (fmode == MODE_IS_EXCLUDE) { 3364 /* (excl {addr}) => block source, must have joined */ 3365 if (ilg == NULL) 3366 error = EADDRNOTAVAIL; 3367 } 3368 /* (incl {addr}) => join source, may have joined */ 3369 3370 if (ilg != NULL && 3371 SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE) 3372 error = ENOBUFS; 3373 } 3374 if (error != 0) { 3375 mutex_exit(&connp->conn_lock); 3376 return (error); 3377 } 3378 3379 /* 3380 * Alloc buffer to copy new state into (see below) before 3381 * we make any changes, so we can bail if it fails. 3382 */ 3383 if ((new_filter = l_alloc()) == NULL) { 3384 mutex_exit(&connp->conn_lock); 3385 return (ENOMEM); 3386 } 3387 3388 if (ilg == NULL) { 3389 if ((ilg = conn_ilg_alloc(connp)) == NULL) { 3390 mutex_exit(&connp->conn_lock); 3391 l_free(new_filter); 3392 return (ENOMEM); 3393 } 3394 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 3395 ilg->ilg_filter = l_alloc(); 3396 if (ilg->ilg_filter == NULL) { 3397 ilg_delete(connp, ilg, NULL); 3398 mutex_exit(&connp->conn_lock); 3399 l_free(new_filter); 3400 return (ENOMEM); 3401 } 3402 ilg->ilg_filter->sl_numsrc = 1; 3403 ilg->ilg_filter->sl_addr[0] = *v6src; 3404 } 3405 ilgstat = ILGSTAT_NEW; 3406 ilg->ilg_v6group = *v6group; 3407 ilg->ilg_fmode = fmode; 3408 ilg->ilg_ipif = NULL; 3409 /* 3410 * Choose our target ill to join on. This might be different 3411 * from the ill we've been given if it's currently down and 3412 * part of a group. 3413 * 3414 * new ill is not refheld; we are writer. 3415 */ 3416 ill = ip_choose_multi_ill(ill, v6group); 3417 ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED)); 3418 ilg->ilg_ill = ill; 3419 /* 3420 * Remember the orig_ifindex that we joined on, so that we 3421 * can successfully delete them later on and also search 3422 * for duplicates if the application wants to join again. 3423 */ 3424 ilg->ilg_orig_ifindex = orig_ifindex; 3425 } else { 3426 int index; 3427 if (ilg->ilg_fmode != fmode || IN6_IS_ADDR_UNSPECIFIED(v6src)) { 3428 mutex_exit(&connp->conn_lock); 3429 l_free(new_filter); 3430 return (EINVAL); 3431 } 3432 if (ilg->ilg_filter == NULL) { 3433 ilg->ilg_filter = l_alloc(); 3434 if (ilg->ilg_filter == NULL) { 3435 mutex_exit(&connp->conn_lock); 3436 l_free(new_filter); 3437 return (ENOMEM); 3438 } 3439 } 3440 if (list_has_addr(ilg->ilg_filter, v6src)) { 3441 mutex_exit(&connp->conn_lock); 3442 l_free(new_filter); 3443 return (EADDRNOTAVAIL); 3444 } 3445 ilgstat = ILGSTAT_CHANGE; 3446 index = ilg->ilg_filter->sl_numsrc++; 3447 ilg->ilg_filter->sl_addr[index] = *v6src; 3448 /* 3449 * The current ill might be different from the one we were 3450 * asked to join on (if failover has occurred); we should 3451 * join on the ill stored in the ilg. The original ill 3452 * is noted in ilg_orig_ifindex, which matched our request. 3453 */ 3454 ill = ilg->ilg_ill; 3455 } 3456 3457 /* 3458 * Save copy of ilg's filter state to pass to other functions, 3459 * so we can release conn_lock now. 3460 */ 3461 new_fmode = ilg->ilg_fmode; 3462 l_copy(ilg->ilg_filter, new_filter); 3463 3464 mutex_exit(&connp->conn_lock); 3465 3466 /* 3467 * Now update the ill. We wait to do this until after the ilg 3468 * has been updated because we need to update the src filter 3469 * info for the ill, which involves looking at the status of 3470 * all the ilgs associated with this group/interface pair. 3471 */ 3472 error = ip_addmulti_v6(v6group, ill, orig_ifindex, connp->conn_zoneid, 3473 ilgstat, new_fmode, new_filter); 3474 if (error != 0) { 3475 /* 3476 * But because we waited, we have to undo the ilg update 3477 * if ip_addmulti_v6() fails. We also must lookup ilg 3478 * again, since we've not been holding conn_lock. 3479 */ 3480 in6_addr_t delsrc = 3481 (ilgstat == ILGSTAT_NEW) ? ipv6_all_zeros : *v6src; 3482 mutex_enter(&connp->conn_lock); 3483 ilg = ilg_lookup_ill_index_v6(connp, v6group, orig_ifindex); 3484 ASSERT(ilg != NULL); 3485 ilg_delete(connp, ilg, &delsrc); 3486 mutex_exit(&connp->conn_lock); 3487 l_free(new_filter); 3488 return (error); 3489 } 3490 3491 l_free(new_filter); 3492 3493 return (0); 3494 } 3495 3496 /* 3497 * Find an IPv4 ilg matching group, ill and source 3498 */ 3499 ilg_t * 3500 ilg_lookup_ill_withsrc(conn_t *connp, ipaddr_t group, ipaddr_t src, ill_t *ill) 3501 { 3502 in6_addr_t v6group, v6src; 3503 int i; 3504 boolean_t isinlist; 3505 ilg_t *ilg; 3506 ipif_t *ipif; 3507 ill_t *ilg_ill; 3508 3509 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3510 3511 /* 3512 * INADDR_ANY is represented as the IPv6 unspecified addr. 3513 */ 3514 if (group == INADDR_ANY) 3515 v6group = ipv6_all_zeros; 3516 else 3517 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 3518 3519 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3520 /* ilg_ipif is NULL for v6; skip them */ 3521 ilg = &connp->conn_ilg[i]; 3522 if ((ipif = ilg->ilg_ipif) == NULL) 3523 continue; 3524 ASSERT(ilg->ilg_ill == NULL); 3525 ilg_ill = ipif->ipif_ill; 3526 ASSERT(!ilg_ill->ill_isv6); 3527 if (ilg_ill == ill && 3528 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, &v6group)) { 3529 if (SLIST_IS_EMPTY(ilg->ilg_filter)) { 3530 /* no source filter, so this is a match */ 3531 return (ilg); 3532 } 3533 break; 3534 } 3535 } 3536 if (i == connp->conn_ilg_inuse) 3537 return (NULL); 3538 3539 /* 3540 * we have an ilg with matching ill and group; but 3541 * the ilg has a source list that we must check. 3542 */ 3543 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 3544 isinlist = B_FALSE; 3545 for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) { 3546 if (IN6_ARE_ADDR_EQUAL(&v6src, &ilg->ilg_filter->sl_addr[i])) { 3547 isinlist = B_TRUE; 3548 break; 3549 } 3550 } 3551 3552 if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) || 3553 (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE)) 3554 return (ilg); 3555 3556 return (NULL); 3557 } 3558 3559 /* 3560 * Find an IPv6 ilg matching group, ill, and source 3561 */ 3562 ilg_t * 3563 ilg_lookup_ill_withsrc_v6(conn_t *connp, const in6_addr_t *v6group, 3564 const in6_addr_t *v6src, ill_t *ill) 3565 { 3566 int i; 3567 boolean_t isinlist; 3568 ilg_t *ilg; 3569 ill_t *ilg_ill; 3570 3571 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3572 3573 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3574 ilg = &connp->conn_ilg[i]; 3575 if ((ilg_ill = ilg->ilg_ill) == NULL) 3576 continue; 3577 ASSERT(ilg->ilg_ipif == NULL); 3578 ASSERT(ilg_ill->ill_isv6); 3579 if (ilg_ill == ill && 3580 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) { 3581 if (SLIST_IS_EMPTY(ilg->ilg_filter)) { 3582 /* no source filter, so this is a match */ 3583 return (ilg); 3584 } 3585 break; 3586 } 3587 } 3588 if (i == connp->conn_ilg_inuse) 3589 return (NULL); 3590 3591 /* 3592 * we have an ilg with matching ill and group; but 3593 * the ilg has a source list that we must check. 3594 */ 3595 isinlist = B_FALSE; 3596 for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) { 3597 if (IN6_ARE_ADDR_EQUAL(v6src, &ilg->ilg_filter->sl_addr[i])) { 3598 isinlist = B_TRUE; 3599 break; 3600 } 3601 } 3602 3603 if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) || 3604 (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE)) 3605 return (ilg); 3606 3607 return (NULL); 3608 } 3609 3610 /* 3611 * Get the ilg whose ilg_orig_ifindex is associated with ifindex. 3612 * This is useful when the interface fails and we have moved 3613 * to a new ill, but still would like to locate using the index 3614 * that we originally used to join. Used only for IPv6 currently. 3615 */ 3616 static ilg_t * 3617 ilg_lookup_ill_index_v6(conn_t *connp, const in6_addr_t *v6group, int ifindex) 3618 { 3619 ilg_t *ilg; 3620 int i; 3621 3622 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3623 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3624 ilg = &connp->conn_ilg[i]; 3625 /* ilg_ill is NULL for V4. Skip them */ 3626 if (ilg->ilg_ill == NULL) 3627 continue; 3628 /* ilg_ipif is NULL for V6 */ 3629 ASSERT(ilg->ilg_ipif == NULL); 3630 ASSERT(ilg->ilg_orig_ifindex != 0); 3631 if (IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group) && 3632 ilg->ilg_orig_ifindex == ifindex) { 3633 return (ilg); 3634 } 3635 } 3636 return (NULL); 3637 } 3638 3639 /* 3640 * Find an IPv6 ilg matching group and ill 3641 */ 3642 ilg_t * 3643 ilg_lookup_ill_v6(conn_t *connp, const in6_addr_t *v6group, ill_t *ill) 3644 { 3645 ilg_t *ilg; 3646 int i; 3647 ill_t *mem_ill; 3648 3649 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3650 3651 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3652 ilg = &connp->conn_ilg[i]; 3653 if ((mem_ill = ilg->ilg_ill) == NULL) 3654 continue; 3655 ASSERT(ilg->ilg_ipif == NULL); 3656 ASSERT(mem_ill->ill_isv6); 3657 if (mem_ill == ill && 3658 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) 3659 return (ilg); 3660 } 3661 return (NULL); 3662 } 3663 3664 /* 3665 * Find an IPv4 ilg matching group and ipif 3666 */ 3667 static ilg_t * 3668 ilg_lookup_ipif(conn_t *connp, ipaddr_t group, ipif_t *ipif) 3669 { 3670 in6_addr_t v6group; 3671 int i; 3672 3673 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3674 ASSERT(!ipif->ipif_ill->ill_isv6); 3675 3676 if (group == INADDR_ANY) 3677 v6group = ipv6_all_zeros; 3678 else 3679 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 3680 3681 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3682 if (IN6_ARE_ADDR_EQUAL(&connp->conn_ilg[i].ilg_v6group, 3683 &v6group) && 3684 connp->conn_ilg[i].ilg_ipif == ipif) 3685 return (&connp->conn_ilg[i]); 3686 } 3687 return (NULL); 3688 } 3689 3690 /* 3691 * If a source address is passed in (src != NULL and src is not 3692 * unspecified), remove the specified src addr from the given ilg's 3693 * filter list, else delete the ilg. 3694 */ 3695 static void 3696 ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src) 3697 { 3698 int i; 3699 3700 ASSERT((ilg->ilg_ipif != NULL) ^ (ilg->ilg_ill != NULL)); 3701 ASSERT(ilg->ilg_ipif == NULL || IAM_WRITER_IPIF(ilg->ilg_ipif)); 3702 ASSERT(ilg->ilg_ill == NULL || IAM_WRITER_ILL(ilg->ilg_ill)); 3703 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3704 ASSERT(!(ilg->ilg_flags & ILG_DELETED)); 3705 3706 if (src == NULL || IN6_IS_ADDR_UNSPECIFIED(src)) { 3707 if (connp->conn_ilg_walker_cnt != 0) { 3708 ilg->ilg_flags |= ILG_DELETED; 3709 return; 3710 } 3711 3712 FREE_SLIST(ilg->ilg_filter); 3713 3714 i = ilg - &connp->conn_ilg[0]; 3715 ASSERT(i >= 0 && i < connp->conn_ilg_inuse); 3716 3717 /* Move other entries up one step */ 3718 connp->conn_ilg_inuse--; 3719 for (; i < connp->conn_ilg_inuse; i++) 3720 connp->conn_ilg[i] = connp->conn_ilg[i+1]; 3721 3722 if (connp->conn_ilg_inuse == 0) { 3723 mi_free((char *)connp->conn_ilg); 3724 connp->conn_ilg = NULL; 3725 cv_broadcast(&connp->conn_refcv); 3726 } 3727 } else { 3728 l_remove(ilg->ilg_filter, src); 3729 } 3730 } 3731 3732 /* 3733 * Called from conn close. No new ilg can be added or removed. 3734 * because CONN_CLOSING has been set by ip_close. ilg_add / ilg_delete 3735 * will return error if conn has started closing. 3736 */ 3737 void 3738 ilg_delete_all(conn_t *connp) 3739 { 3740 int i; 3741 ipif_t *ipif = NULL; 3742 ill_t *ill = NULL; 3743 ilg_t *ilg; 3744 in6_addr_t v6group; 3745 boolean_t success; 3746 ipsq_t *ipsq; 3747 int orig_ifindex; 3748 3749 mutex_enter(&connp->conn_lock); 3750 retry: 3751 ILG_WALKER_HOLD(connp); 3752 for (i = connp->conn_ilg_inuse - 1; i >= 0; ) { 3753 ilg = &connp->conn_ilg[i]; 3754 /* 3755 * Since this walk is not atomic (we drop the 3756 * conn_lock and wait in ipsq_enter) we need 3757 * to check for the ILG_DELETED flag. 3758 */ 3759 if (ilg->ilg_flags & ILG_DELETED) { 3760 /* Go to the next ilg */ 3761 i--; 3762 continue; 3763 } 3764 v6group = ilg->ilg_v6group; 3765 3766 if (IN6_IS_ADDR_V4MAPPED(&v6group)) { 3767 ipif = ilg->ilg_ipif; 3768 ill = ipif->ipif_ill; 3769 } else { 3770 ipif = NULL; 3771 ill = ilg->ilg_ill; 3772 } 3773 /* 3774 * We may not be able to refhold the ill if the ill/ipif 3775 * is changing. But we need to make sure that the ill will 3776 * not vanish. So we just bump up the ill_waiter count. 3777 * If we are unable to do even that, then the ill is closing, 3778 * in which case the unplumb thread will handle the cleanup, 3779 * and we move on to the next ilg. 3780 */ 3781 if (!ill_waiter_inc(ill)) { 3782 /* Go to the next ilg */ 3783 i--; 3784 continue; 3785 } 3786 mutex_exit(&connp->conn_lock); 3787 /* 3788 * To prevent deadlock between ill close which waits inside 3789 * the perimeter, and conn close, ipsq_enter returns error, 3790 * the moment ILL_CONDEMNED is set, in which case ill close 3791 * takes responsibility to cleanup the ilgs. Note that we 3792 * have not yet set condemned flag, otherwise the conn can't 3793 * be refheld for cleanup by those routines and it would be 3794 * a mutual deadlock. 3795 */ 3796 success = ipsq_enter(ill, B_FALSE); 3797 ipsq = ill->ill_phyint->phyint_ipsq; 3798 ill_waiter_dcr(ill); 3799 mutex_enter(&connp->conn_lock); 3800 if (!success) { 3801 /* Go to the next ilg */ 3802 i--; 3803 continue; 3804 } 3805 3806 /* 3807 * Make sure that nothing has changed under. For eg. 3808 * a failover/failback can change ilg_ill while we were 3809 * waiting to become exclusive above 3810 */ 3811 if (IN6_IS_ADDR_V4MAPPED(&v6group)) { 3812 ipif = ilg->ilg_ipif; 3813 ill = ipif->ipif_ill; 3814 } else { 3815 ipif = NULL; 3816 ill = ilg->ilg_ill; 3817 } 3818 if (!IAM_WRITER_ILL(ill) || (ilg->ilg_flags & ILG_DELETED)) { 3819 /* 3820 * The ilg has changed under us probably due 3821 * to a failover or unplumb. Retry on the same ilg. 3822 */ 3823 mutex_exit(&connp->conn_lock); 3824 ipsq_exit(ipsq, B_TRUE, B_TRUE); 3825 mutex_enter(&connp->conn_lock); 3826 continue; 3827 } 3828 v6group = ilg->ilg_v6group; 3829 orig_ifindex = ilg->ilg_orig_ifindex; 3830 ilg_delete(connp, ilg, NULL); 3831 mutex_exit(&connp->conn_lock); 3832 3833 if (ipif != NULL) 3834 (void) ip_delmulti(V4_PART_OF_V6(v6group), ipif, 3835 B_FALSE, B_TRUE); 3836 3837 else 3838 (void) ip_delmulti_v6(&v6group, ill, orig_ifindex, 3839 connp->conn_zoneid, B_FALSE, B_TRUE); 3840 3841 ipsq_exit(ipsq, B_TRUE, B_TRUE); 3842 mutex_enter(&connp->conn_lock); 3843 /* Go to the next ilg */ 3844 i--; 3845 } 3846 ILG_WALKER_RELE(connp); 3847 3848 /* If any ill was skipped above wait and retry */ 3849 if (connp->conn_ilg_inuse != 0) { 3850 cv_wait(&connp->conn_refcv, &connp->conn_lock); 3851 goto retry; 3852 } 3853 mutex_exit(&connp->conn_lock); 3854 } 3855 3856 /* 3857 * Called from ill close by ipcl_walk for clearing conn_ilg and 3858 * conn_multicast_ipif for a given ipif. conn is held by caller. 3859 * Note that ipcl_walk only walks conns that are not yet condemned. 3860 * condemned conns can't be refheld. For this reason, conn must become clean 3861 * first, i.e. it must not refer to any ill/ire/ipif and then only set 3862 * condemned flag. 3863 */ 3864 static void 3865 conn_delete_ipif(conn_t *connp, caddr_t arg) 3866 { 3867 ipif_t *ipif = (ipif_t *)arg; 3868 int i; 3869 char group_buf1[INET6_ADDRSTRLEN]; 3870 char group_buf2[INET6_ADDRSTRLEN]; 3871 ipaddr_t group; 3872 ilg_t *ilg; 3873 3874 /* 3875 * Even though conn_ilg_inuse can change while we are in this loop, 3876 * i.e.ilgs can be created or deleted on this connp, no new ilgs can 3877 * be created or deleted for this connp, on this ill, since this ill 3878 * is the perimeter. So we won't miss any ilg in this cleanup. 3879 */ 3880 mutex_enter(&connp->conn_lock); 3881 3882 /* 3883 * Increment the walker count, so that ilg repacking does not 3884 * occur while we are in the loop. 3885 */ 3886 ILG_WALKER_HOLD(connp); 3887 for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) { 3888 ilg = &connp->conn_ilg[i]; 3889 if (ilg->ilg_ipif != ipif || (ilg->ilg_flags & ILG_DELETED)) 3890 continue; 3891 /* 3892 * ip_close cannot be cleaning this ilg at the same time. 3893 * since it also has to execute in this ill's perimeter which 3894 * we are now holding. Only a clean conn can be condemned. 3895 */ 3896 ASSERT(!(connp->conn_state_flags & CONN_CONDEMNED)); 3897 3898 /* Blow away the membership */ 3899 ip1dbg(("conn_delete_ilg_ipif: %s on %s (%s)\n", 3900 inet_ntop(AF_INET6, &connp->conn_ilg[i].ilg_v6group, 3901 group_buf1, sizeof (group_buf1)), 3902 inet_ntop(AF_INET6, &ipif->ipif_v6lcl_addr, 3903 group_buf2, sizeof (group_buf2)), 3904 ipif->ipif_ill->ill_name)); 3905 3906 /* ilg_ipif is NULL for V6, so we won't be here */ 3907 ASSERT(IN6_IS_ADDR_V4MAPPED(&ilg->ilg_v6group)); 3908 3909 group = V4_PART_OF_V6(ilg->ilg_v6group); 3910 ilg_delete(connp, &connp->conn_ilg[i], NULL); 3911 mutex_exit(&connp->conn_lock); 3912 3913 (void) ip_delmulti(group, ipif, B_FALSE, B_TRUE); 3914 mutex_enter(&connp->conn_lock); 3915 } 3916 3917 /* 3918 * If we are the last walker, need to physically delete the 3919 * ilgs and repack. 3920 */ 3921 ILG_WALKER_RELE(connp); 3922 3923 if (connp->conn_multicast_ipif == ipif) { 3924 /* Revert to late binding */ 3925 connp->conn_multicast_ipif = NULL; 3926 } 3927 mutex_exit(&connp->conn_lock); 3928 3929 conn_delete_ire(connp, (caddr_t)ipif); 3930 } 3931 3932 /* 3933 * Called from ill close by ipcl_walk for clearing conn_ilg and 3934 * conn_multicast_ill for a given ill. conn is held by caller. 3935 * Note that ipcl_walk only walks conns that are not yet condemned. 3936 * condemned conns can't be refheld. For this reason, conn must become clean 3937 * first, i.e. it must not refer to any ill/ire/ipif and then only set 3938 * condemned flag. 3939 */ 3940 static void 3941 conn_delete_ill(conn_t *connp, caddr_t arg) 3942 { 3943 ill_t *ill = (ill_t *)arg; 3944 int i; 3945 char group_buf[INET6_ADDRSTRLEN]; 3946 in6_addr_t v6group; 3947 int orig_ifindex; 3948 ilg_t *ilg; 3949 3950 /* 3951 * Even though conn_ilg_inuse can change while we are in this loop, 3952 * no new ilgs can be created/deleted for this connp, on this 3953 * ill, since this ill is the perimeter. So we won't miss any ilg 3954 * in this cleanup. 3955 */ 3956 mutex_enter(&connp->conn_lock); 3957 3958 /* 3959 * Increment the walker count, so that ilg repacking does not 3960 * occur while we are in the loop. 3961 */ 3962 ILG_WALKER_HOLD(connp); 3963 for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) { 3964 ilg = &connp->conn_ilg[i]; 3965 if ((ilg->ilg_ill == ill) && !(ilg->ilg_flags & ILG_DELETED)) { 3966 /* 3967 * ip_close cannot be cleaning this ilg at the same 3968 * time, since it also has to execute in this ill's 3969 * perimeter which we are now holding. Only a clean 3970 * conn can be condemned. 3971 */ 3972 ASSERT(!(connp->conn_state_flags & CONN_CONDEMNED)); 3973 3974 /* Blow away the membership */ 3975 ip1dbg(("conn_delete_ilg_ill: %s on %s\n", 3976 inet_ntop(AF_INET6, &ilg->ilg_v6group, 3977 group_buf, sizeof (group_buf)), 3978 ill->ill_name)); 3979 3980 v6group = ilg->ilg_v6group; 3981 orig_ifindex = ilg->ilg_orig_ifindex; 3982 ilg_delete(connp, ilg, NULL); 3983 mutex_exit(&connp->conn_lock); 3984 3985 (void) ip_delmulti_v6(&v6group, ill, orig_ifindex, 3986 connp->conn_zoneid, B_FALSE, B_TRUE); 3987 mutex_enter(&connp->conn_lock); 3988 } 3989 } 3990 /* 3991 * If we are the last walker, need to physically delete the 3992 * ilgs and repack. 3993 */ 3994 ILG_WALKER_RELE(connp); 3995 3996 if (connp->conn_multicast_ill == ill) { 3997 /* Revert to late binding */ 3998 connp->conn_multicast_ill = NULL; 3999 connp->conn_orig_multicast_ifindex = 0; 4000 } 4001 mutex_exit(&connp->conn_lock); 4002 } 4003 4004 /* 4005 * Called when an ipif is unplumbed to make sure that there are no 4006 * dangling conn references to that ipif. 4007 * Handles ilg_ipif and conn_multicast_ipif 4008 */ 4009 void 4010 reset_conn_ipif(ipif) 4011 ipif_t *ipif; 4012 { 4013 ipcl_walk(conn_delete_ipif, (caddr_t)ipif); 4014 } 4015 4016 /* 4017 * Called when an ill is unplumbed to make sure that there are no 4018 * dangling conn references to that ill. 4019 * Handles ilg_ill, conn_multicast_ill. 4020 */ 4021 void 4022 reset_conn_ill(ill_t *ill) 4023 { 4024 ipcl_walk(conn_delete_ill, (caddr_t)ill); 4025 } 4026 4027 #ifdef DEBUG 4028 /* 4029 * Walk functions walk all the interfaces in the system to make 4030 * sure that there is no refernece to the ipif or ill that is 4031 * going away. 4032 */ 4033 int 4034 ilm_walk_ill(ill_t *ill) 4035 { 4036 int cnt = 0; 4037 ill_t *till; 4038 ilm_t *ilm; 4039 ill_walk_context_t ctx; 4040 4041 rw_enter(&ill_g_lock, RW_READER); 4042 till = ILL_START_WALK_ALL(&ctx); 4043 for (; till != NULL; till = ill_next(&ctx, till)) { 4044 for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 4045 if (ilm->ilm_ill == ill) { 4046 cnt++; 4047 } 4048 } 4049 } 4050 rw_exit(&ill_g_lock); 4051 4052 return (cnt); 4053 } 4054 4055 /* 4056 * This function is called before the ipif is freed. 4057 */ 4058 int 4059 ilm_walk_ipif(ipif_t *ipif) 4060 { 4061 int cnt = 0; 4062 ill_t *till; 4063 ilm_t *ilm; 4064 ill_walk_context_t ctx; 4065 4066 till = ILL_START_WALK_ALL(&ctx); 4067 for (; till != NULL; till = ill_next(&ctx, till)) { 4068 for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 4069 if (ilm->ilm_ipif == ipif) { 4070 cnt++; 4071 } 4072 } 4073 } 4074 return (cnt); 4075 } 4076 #endif 4077