1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/stream.h> 31 #include <sys/dlpi.h> 32 #include <sys/stropts.h> 33 #include <sys/strsun.h> 34 #include <sys/ddi.h> 35 #include <sys/cmn_err.h> 36 #include <sys/sdt.h> 37 #include <sys/zone.h> 38 39 #include <sys/param.h> 40 #include <sys/socket.h> 41 #include <sys/sockio.h> 42 #include <net/if.h> 43 #include <sys/systm.h> 44 #include <sys/strsubr.h> 45 #include <net/route.h> 46 #include <netinet/in.h> 47 #include <net/if_dl.h> 48 #include <netinet/ip6.h> 49 #include <netinet/icmp6.h> 50 51 #include <inet/common.h> 52 #include <inet/mi.h> 53 #include <inet/nd.h> 54 #include <inet/arp.h> 55 #include <inet/ip.h> 56 #include <inet/ip6.h> 57 #include <inet/ip_if.h> 58 #include <inet/ip_ndp.h> 59 #include <inet/ip_multi.h> 60 #include <inet/ipclassifier.h> 61 #include <inet/ipsec_impl.h> 62 #include <inet/sctp_ip.h> 63 #include <inet/ip_listutils.h> 64 #include <inet/udp_impl.h> 65 66 /* igmpv3/mldv2 source filter manipulation */ 67 static void ilm_bld_flists(conn_t *conn, void *arg); 68 static void ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, 69 slist_t *flist); 70 71 static ilm_t *ilm_add_v6(ipif_t *ipif, const in6_addr_t *group, 72 ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist, 73 int orig_ifindex, zoneid_t zoneid); 74 static void ilm_delete(ilm_t *ilm); 75 static int ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *group); 76 static int ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *group); 77 static ilg_t *ilg_lookup_ill_index_v6(conn_t *connp, 78 const in6_addr_t *v6group, int index); 79 static ilg_t *ilg_lookup_ipif(conn_t *connp, ipaddr_t group, 80 ipif_t *ipif); 81 static int ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif, 82 mcast_record_t fmode, ipaddr_t src); 83 static int ilg_add_v6(conn_t *connp, const in6_addr_t *group, ill_t *ill, 84 mcast_record_t fmode, const in6_addr_t *v6src); 85 static void ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src); 86 static mblk_t *ill_create_dl(ill_t *ill, uint32_t dl_primitive, 87 uint32_t length, uint32_t *addr_lenp, uint32_t *addr_offp); 88 static mblk_t *ill_create_squery(ill_t *ill, ipaddr_t ipaddr, 89 uint32_t addrlen, uint32_t addroff, mblk_t *mp_tail); 90 static void conn_ilg_reap(conn_t *connp); 91 static int ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group, 92 ipif_t *ipif, mcast_record_t fmode, ipaddr_t src); 93 static int ip_opt_delete_group_excl_v6(conn_t *connp, 94 const in6_addr_t *v6group, ill_t *ill, mcast_record_t fmode, 95 const in6_addr_t *v6src); 96 97 /* 98 * MT notes: 99 * 100 * Multicast joins operate on both the ilg and ilm structures. Multiple 101 * threads operating on an conn (socket) trying to do multicast joins 102 * need to synchronize when operating on the ilg. Multiple threads 103 * potentially operating on different conn (socket endpoints) trying to 104 * do multicast joins could eventually end up trying to manipulate the 105 * ilm simulatenously and need to synchronize on the access to the ilm. 106 * Both are amenable to standard Solaris MT techniques, but it would be 107 * complex to handle a failover or failback which needs to manipulate 108 * ilg/ilms if an applications can also simultaenously join/leave 109 * multicast groups. Hence multicast join/leave also go through the ipsq_t 110 * serialization. 111 * 112 * Multicast joins and leaves are single-threaded per phyint/IPMP group 113 * using the ipsq serialization mechanism. 114 * 115 * An ilm is an IP data structure used to track multicast join/leave. 116 * An ilm is associated with a <multicast group, ipif> tuple in IPv4 and 117 * with just <multicast group> in IPv6. ilm_refcnt is the number of ilg's 118 * referencing the ilm. ilms are created / destroyed only as writer. ilms 119 * are not passed around, instead they are looked up and used under the 120 * ill_lock or as writer. So we don't need a dynamic refcount of the number 121 * of threads holding reference to an ilm. 122 * 123 * Multicast Join operation: 124 * 125 * The first step is to determine the ipif (v4) or ill (v6) on which 126 * the join operation is to be done. The join is done after becoming 127 * exclusive on the ipsq associated with the ipif or ill. The conn->conn_ilg 128 * and ill->ill_ilm are thus accessed and modified exclusively per ill. 129 * Multiple threads can attempt to join simultaneously on different ipif/ill 130 * on the same conn. In this case the ipsq serialization does not help in 131 * protecting the ilg. It is the conn_lock that is used to protect the ilg. 132 * The conn_lock also protects all the ilg_t members. 133 * 134 * Leave operation. 135 * 136 * Similar to the join operation, the first step is to determine the ipif 137 * or ill (v6) on which the leave operation is to be done. The leave operation 138 * is done after becoming exclusive on the ipsq associated with the ipif or ill. 139 * As with join ilg modification is done under the protection of the conn lock. 140 */ 141 142 #define IPSQ_ENTER_IPIF(ipif, connp, first_mp, func, ipsq, type) \ 143 ASSERT(connp != NULL); \ 144 (ipsq) = ipsq_try_enter((ipif), NULL, CONNP_TO_WQ(connp), \ 145 (first_mp), (func), (type), B_TRUE); \ 146 if ((ipsq) == NULL) { \ 147 ipif_refrele(ipif); \ 148 return (EINPROGRESS); \ 149 } 150 151 #define IPSQ_ENTER_ILL(ill, connp, first_mp, func, ipsq, type) \ 152 ASSERT(connp != NULL); \ 153 (ipsq) = ipsq_try_enter(NULL, ill, CONNP_TO_WQ(connp), \ 154 (first_mp), (func), (type), B_TRUE); \ 155 if ((ipsq) == NULL) { \ 156 ill_refrele(ill); \ 157 return (EINPROGRESS); \ 158 } 159 160 #define IPSQ_EXIT(ipsq) \ 161 if (ipsq != NULL) \ 162 ipsq_exit(ipsq, B_TRUE, B_TRUE); 163 164 #define ILG_WALKER_HOLD(connp) (connp)->conn_ilg_walker_cnt++ 165 166 #define ILG_WALKER_RELE(connp) \ 167 { \ 168 (connp)->conn_ilg_walker_cnt--; \ 169 if ((connp)->conn_ilg_walker_cnt == 0) \ 170 conn_ilg_reap(connp); \ 171 } 172 173 static void 174 conn_ilg_reap(conn_t *connp) 175 { 176 int to; 177 int from; 178 179 ASSERT(MUTEX_HELD(&connp->conn_lock)); 180 181 to = 0; 182 from = 0; 183 while (from < connp->conn_ilg_inuse) { 184 if (connp->conn_ilg[from].ilg_flags & ILG_DELETED) { 185 FREE_SLIST(connp->conn_ilg[from].ilg_filter); 186 from++; 187 continue; 188 } 189 if (to != from) 190 connp->conn_ilg[to] = connp->conn_ilg[from]; 191 to++; 192 from++; 193 } 194 195 connp->conn_ilg_inuse = to; 196 197 if (connp->conn_ilg_inuse == 0) { 198 mi_free((char *)connp->conn_ilg); 199 connp->conn_ilg = NULL; 200 cv_broadcast(&connp->conn_refcv); 201 } 202 } 203 204 #define GETSTRUCT(structure, number) \ 205 ((structure *)mi_zalloc(sizeof (structure) * (number))) 206 207 #define ILG_ALLOC_CHUNK 16 208 209 /* 210 * Returns a pointer to the next available ilg in conn_ilg. Allocs more 211 * buffers in size of ILG_ALLOC_CHUNK ilgs when needed, and updates conn's 212 * ilg tracking fields appropriately (conn_ilg_inuse reflects usage of the 213 * returned ilg). Returns NULL on failure (ENOMEM). 214 * 215 * Assumes connp->conn_lock is held. 216 */ 217 static ilg_t * 218 conn_ilg_alloc(conn_t *connp) 219 { 220 ilg_t *new; 221 int curcnt; 222 223 ASSERT(MUTEX_HELD(&connp->conn_lock)); 224 ASSERT(connp->conn_ilg_inuse <= connp->conn_ilg_allocated); 225 226 if (connp->conn_ilg == NULL) { 227 connp->conn_ilg = GETSTRUCT(ilg_t, ILG_ALLOC_CHUNK); 228 if (connp->conn_ilg == NULL) 229 return (NULL); 230 connp->conn_ilg_allocated = ILG_ALLOC_CHUNK; 231 connp->conn_ilg_inuse = 0; 232 } 233 if (connp->conn_ilg_inuse == connp->conn_ilg_allocated) { 234 curcnt = connp->conn_ilg_allocated; 235 new = GETSTRUCT(ilg_t, curcnt + ILG_ALLOC_CHUNK); 236 if (new == NULL) 237 return (NULL); 238 bcopy(connp->conn_ilg, new, sizeof (ilg_t) * curcnt); 239 mi_free((char *)connp->conn_ilg); 240 connp->conn_ilg = new; 241 connp->conn_ilg_allocated += ILG_ALLOC_CHUNK; 242 } 243 244 return (&connp->conn_ilg[connp->conn_ilg_inuse++]); 245 } 246 247 typedef struct ilm_fbld_s { 248 ilm_t *fbld_ilm; 249 int fbld_in_cnt; 250 int fbld_ex_cnt; 251 slist_t fbld_in; 252 slist_t fbld_ex; 253 boolean_t fbld_in_overflow; 254 } ilm_fbld_t; 255 256 static void 257 ilm_bld_flists(conn_t *conn, void *arg) 258 { 259 int i; 260 ilm_fbld_t *fbld = (ilm_fbld_t *)(arg); 261 ilm_t *ilm = fbld->fbld_ilm; 262 in6_addr_t *v6group = &ilm->ilm_v6addr; 263 264 if (conn->conn_ilg_inuse == 0) 265 return; 266 267 /* 268 * Since we can't break out of the ipcl_walk once started, we still 269 * have to look at every conn. But if we've already found one 270 * (EXCLUDE, NULL) list, there's no need to keep checking individual 271 * ilgs--that will be our state. 272 */ 273 if (fbld->fbld_ex_cnt > 0 && fbld->fbld_ex.sl_numsrc == 0) 274 return; 275 276 /* 277 * Check this conn's ilgs to see if any are interested in our 278 * ilm (group, interface match). If so, update the master 279 * include and exclude lists we're building in the fbld struct 280 * with this ilg's filter info. 281 */ 282 mutex_enter(&conn->conn_lock); 283 for (i = 0; i < conn->conn_ilg_inuse; i++) { 284 ilg_t *ilg = &conn->conn_ilg[i]; 285 if ((ilg->ilg_ill == ilm->ilm_ill) && 286 (ilg->ilg_ipif == ilm->ilm_ipif) && 287 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) { 288 if (ilg->ilg_fmode == MODE_IS_INCLUDE) { 289 fbld->fbld_in_cnt++; 290 if (!fbld->fbld_in_overflow) 291 l_union_in_a(&fbld->fbld_in, 292 ilg->ilg_filter, 293 &fbld->fbld_in_overflow); 294 } else { 295 fbld->fbld_ex_cnt++; 296 /* 297 * On the first exclude list, don't try to do 298 * an intersection, as the master exclude list 299 * is intentionally empty. If the master list 300 * is still empty on later iterations, that 301 * means we have at least one ilg with an empty 302 * exclude list, so that should be reflected 303 * when we take the intersection. 304 */ 305 if (fbld->fbld_ex_cnt == 1) { 306 if (ilg->ilg_filter != NULL) 307 l_copy(ilg->ilg_filter, 308 &fbld->fbld_ex); 309 } else { 310 l_intersection_in_a(&fbld->fbld_ex, 311 ilg->ilg_filter); 312 } 313 } 314 /* there will only be one match, so break now. */ 315 break; 316 } 317 } 318 mutex_exit(&conn->conn_lock); 319 } 320 321 static void 322 ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, slist_t *flist) 323 { 324 ilm_fbld_t fbld; 325 ip_stack_t *ipst = ilm->ilm_ipst; 326 327 fbld.fbld_ilm = ilm; 328 fbld.fbld_in_cnt = fbld.fbld_ex_cnt = 0; 329 fbld.fbld_in.sl_numsrc = fbld.fbld_ex.sl_numsrc = 0; 330 fbld.fbld_in_overflow = B_FALSE; 331 332 /* first, construct our master include and exclude lists */ 333 ipcl_walk(ilm_bld_flists, (caddr_t)&fbld, ipst); 334 335 /* now use those master lists to generate the interface filter */ 336 337 /* if include list overflowed, filter is (EXCLUDE, NULL) */ 338 if (fbld.fbld_in_overflow) { 339 *fmode = MODE_IS_EXCLUDE; 340 flist->sl_numsrc = 0; 341 return; 342 } 343 344 /* if nobody interested, interface filter is (INCLUDE, NULL) */ 345 if (fbld.fbld_in_cnt == 0 && fbld.fbld_ex_cnt == 0) { 346 *fmode = MODE_IS_INCLUDE; 347 flist->sl_numsrc = 0; 348 return; 349 } 350 351 /* 352 * If there are no exclude lists, then the interface filter 353 * is INCLUDE, with its filter list equal to fbld_in. A single 354 * exclude list makes the interface filter EXCLUDE, with its 355 * filter list equal to (fbld_ex - fbld_in). 356 */ 357 if (fbld.fbld_ex_cnt == 0) { 358 *fmode = MODE_IS_INCLUDE; 359 l_copy(&fbld.fbld_in, flist); 360 } else { 361 *fmode = MODE_IS_EXCLUDE; 362 l_difference(&fbld.fbld_ex, &fbld.fbld_in, flist); 363 } 364 } 365 366 /* 367 * If the given interface has failed, choose a new one to join on so 368 * that we continue to receive packets. ilg_orig_ifindex remembers 369 * what the application used to join on so that we know the ilg to 370 * delete even though we change the ill here. Callers will store the 371 * ilg returned from this function in ilg_ill. Thus when we receive 372 * a packet on ilg_ill, conn_wantpacket_v6 will deliver the packets. 373 * 374 * This function must be called as writer so we can walk the group 375 * list and examine flags without holding a lock. 376 */ 377 ill_t * 378 ip_choose_multi_ill(ill_t *ill, const in6_addr_t *grp) 379 { 380 ill_t *till; 381 ill_group_t *illgrp = ill->ill_group; 382 383 ASSERT(IAM_WRITER_ILL(ill)); 384 385 if (IN6_IS_ADDR_UNSPECIFIED(grp) || illgrp == NULL) 386 return (ill); 387 388 if ((ill->ill_phyint->phyint_flags & (PHYI_FAILED|PHYI_INACTIVE)) == 0) 389 return (ill); 390 391 till = illgrp->illgrp_ill; 392 while (till != NULL && 393 (till->ill_phyint->phyint_flags & (PHYI_FAILED|PHYI_INACTIVE))) { 394 till = till->ill_group_next; 395 } 396 if (till != NULL) 397 return (till); 398 399 return (ill); 400 } 401 402 static int 403 ilm_update_add(ilm_t *ilm, ilg_stat_t ilgstat, slist_t *ilg_flist, 404 boolean_t isv6) 405 { 406 mcast_record_t fmode; 407 slist_t *flist; 408 boolean_t fdefault; 409 char buf[INET6_ADDRSTRLEN]; 410 ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill; 411 412 /* 413 * There are several cases where the ilm's filter state 414 * defaults to (EXCLUDE, NULL): 415 * - we've had previous joins without associated ilgs 416 * - this join has no associated ilg 417 * - the ilg's filter state is (EXCLUDE, NULL) 418 */ 419 fdefault = (ilm->ilm_no_ilg_cnt > 0) || 420 (ilgstat == ILGSTAT_NONE) || SLIST_IS_EMPTY(ilg_flist); 421 422 /* attempt mallocs (if needed) before doing anything else */ 423 if ((flist = l_alloc()) == NULL) 424 return (ENOMEM); 425 if (!fdefault && ilm->ilm_filter == NULL) { 426 ilm->ilm_filter = l_alloc(); 427 if (ilm->ilm_filter == NULL) { 428 l_free(flist); 429 return (ENOMEM); 430 } 431 } 432 433 if (ilgstat != ILGSTAT_CHANGE) 434 ilm->ilm_refcnt++; 435 436 if (ilgstat == ILGSTAT_NONE) 437 ilm->ilm_no_ilg_cnt++; 438 439 /* 440 * Determine new filter state. If it's not the default 441 * (EXCLUDE, NULL), we must walk the conn list to find 442 * any ilgs interested in this group, and re-build the 443 * ilm filter. 444 */ 445 if (fdefault) { 446 fmode = MODE_IS_EXCLUDE; 447 flist->sl_numsrc = 0; 448 } else { 449 ilm_gen_filter(ilm, &fmode, flist); 450 } 451 452 /* make sure state actually changed; nothing to do if not. */ 453 if ((ilm->ilm_fmode == fmode) && 454 !lists_are_different(ilm->ilm_filter, flist)) { 455 l_free(flist); 456 return (0); 457 } 458 459 /* send the state change report */ 460 if (!IS_LOOPBACK(ill)) { 461 if (isv6) 462 mld_statechange(ilm, fmode, flist); 463 else 464 igmp_statechange(ilm, fmode, flist); 465 } 466 467 /* update the ilm state */ 468 ilm->ilm_fmode = fmode; 469 if (flist->sl_numsrc > 0) 470 l_copy(flist, ilm->ilm_filter); 471 else 472 CLEAR_SLIST(ilm->ilm_filter); 473 474 ip1dbg(("ilm_update: new if filter mode %d, group %s\n", ilm->ilm_fmode, 475 inet_ntop(AF_INET6, &ilm->ilm_v6addr, buf, sizeof (buf)))); 476 477 l_free(flist); 478 return (0); 479 } 480 481 static int 482 ilm_update_del(ilm_t *ilm, boolean_t isv6) 483 { 484 mcast_record_t fmode; 485 slist_t *flist; 486 ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill; 487 488 ip1dbg(("ilm_update_del: still %d left; updating state\n", 489 ilm->ilm_refcnt)); 490 491 if ((flist = l_alloc()) == NULL) 492 return (ENOMEM); 493 494 /* 495 * If present, the ilg in question has already either been 496 * updated or removed from our list; so all we need to do 497 * now is walk the list to update the ilm filter state. 498 * 499 * Skip the list walk if we have any no-ilg joins, which 500 * cause the filter state to revert to (EXCLUDE, NULL). 501 */ 502 if (ilm->ilm_no_ilg_cnt != 0) { 503 fmode = MODE_IS_EXCLUDE; 504 flist->sl_numsrc = 0; 505 } else { 506 ilm_gen_filter(ilm, &fmode, flist); 507 } 508 509 /* check to see if state needs to be updated */ 510 if ((ilm->ilm_fmode == fmode) && 511 (!lists_are_different(ilm->ilm_filter, flist))) { 512 l_free(flist); 513 return (0); 514 } 515 516 if (!IS_LOOPBACK(ill)) { 517 if (isv6) 518 mld_statechange(ilm, fmode, flist); 519 else 520 igmp_statechange(ilm, fmode, flist); 521 } 522 523 ilm->ilm_fmode = fmode; 524 if (flist->sl_numsrc > 0) { 525 if (ilm->ilm_filter == NULL) { 526 ilm->ilm_filter = l_alloc(); 527 if (ilm->ilm_filter == NULL) { 528 char buf[INET6_ADDRSTRLEN]; 529 ip1dbg(("ilm_update_del: failed to alloc ilm " 530 "filter; no source filtering for %s on %s", 531 inet_ntop(AF_INET6, &ilm->ilm_v6addr, 532 buf, sizeof (buf)), ill->ill_name)); 533 ilm->ilm_fmode = MODE_IS_EXCLUDE; 534 l_free(flist); 535 return (0); 536 } 537 } 538 l_copy(flist, ilm->ilm_filter); 539 } else { 540 CLEAR_SLIST(ilm->ilm_filter); 541 } 542 543 l_free(flist); 544 return (0); 545 } 546 547 /* 548 * INADDR_ANY means all multicast addresses. This is only used 549 * by the multicast router. 550 * INADDR_ANY is stored as IPv6 unspecified addr. 551 */ 552 int 553 ip_addmulti(ipaddr_t group, ipif_t *ipif, ilg_stat_t ilgstat, 554 mcast_record_t ilg_fmode, slist_t *ilg_flist) 555 { 556 ill_t *ill = ipif->ipif_ill; 557 ilm_t *ilm; 558 in6_addr_t v6group; 559 int ret; 560 561 ASSERT(IAM_WRITER_IPIF(ipif)); 562 563 if (!CLASSD(group) && group != INADDR_ANY) 564 return (EINVAL); 565 566 /* 567 * INADDR_ANY is represented as the IPv6 unspecifed addr. 568 */ 569 if (group == INADDR_ANY) 570 v6group = ipv6_all_zeros; 571 else 572 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 573 574 mutex_enter(&ill->ill_lock); 575 ilm = ilm_lookup_ipif(ipif, group); 576 mutex_exit(&ill->ill_lock); 577 /* 578 * Since we are writer, we know the ilm_flags itself cannot 579 * change at this point, and ilm_lookup_ipif would not have 580 * returned a DELETED ilm. However, the data path can free 581 * ilm->next via ilm_walker_cleanup() so we can safely 582 * access anything in ilm except ilm_next (for safe access to 583 * ilm_next we'd have to take the ill_lock). 584 */ 585 if (ilm != NULL) 586 return (ilm_update_add(ilm, ilgstat, ilg_flist, B_FALSE)); 587 588 /* 589 * ilms are associated with ipifs in IPv4. It moves with the 590 * ipif if the ipif moves to a new ill when the interface 591 * fails. Thus we really don't check whether the ipif_ill 592 * has failed like in IPv6. If it has FAILED the ipif 593 * will move (daemon will move it) and hence the ilm, if the 594 * ipif is not IPIF_NOFAILOVER. For the IPIF_NOFAILOVER ipifs, 595 * we continue to receive in the same place even if the 596 * interface fails. 597 */ 598 ilm = ilm_add_v6(ipif, &v6group, ilgstat, ilg_fmode, ilg_flist, 599 ill->ill_phyint->phyint_ifindex, ipif->ipif_zoneid); 600 if (ilm == NULL) 601 return (ENOMEM); 602 603 if (group == INADDR_ANY) { 604 /* 605 * Check how many ipif's have members in this group - 606 * if more then one we should not tell the driver to join 607 * this time 608 */ 609 if (ilm_numentries_v6(ill, &v6group) > 1) 610 return (0); 611 if (ill->ill_group == NULL) 612 ret = ip_join_allmulti(ipif); 613 else 614 ret = ill_nominate_mcast_rcv(ill->ill_group); 615 if (ret != 0) 616 ilm_delete(ilm); 617 return (ret); 618 } 619 620 if (!IS_LOOPBACK(ill)) 621 igmp_joingroup(ilm); 622 623 if (ilm_numentries_v6(ill, &v6group) > 1) 624 return (0); 625 626 ret = ip_ll_addmulti_v6(ipif, &v6group); 627 if (ret != 0) 628 ilm_delete(ilm); 629 return (ret); 630 } 631 632 /* 633 * The unspecified address means all multicast addresses. 634 * This is only used by the multicast router. 635 * 636 * ill identifies the interface to join on; it may not match the 637 * interface requested by the application of a failover has taken 638 * place. orig_ifindex always identifies the interface requested 639 * by the app. 640 * 641 * ilgstat tells us if there's an ilg associated with this join, 642 * and if so, if it's a new ilg or a change to an existing one. 643 * ilg_fmode and ilg_flist give us the current filter state of 644 * the ilg (and will be EXCLUDE {NULL} in the case of no ilg). 645 */ 646 int 647 ip_addmulti_v6(const in6_addr_t *v6group, ill_t *ill, int orig_ifindex, 648 zoneid_t zoneid, ilg_stat_t ilgstat, mcast_record_t ilg_fmode, 649 slist_t *ilg_flist) 650 { 651 ilm_t *ilm; 652 int ret; 653 654 ASSERT(IAM_WRITER_ILL(ill)); 655 656 if (!IN6_IS_ADDR_MULTICAST(v6group) && 657 !IN6_IS_ADDR_UNSPECIFIED(v6group)) { 658 return (EINVAL); 659 } 660 661 /* 662 * An ilm is uniquely identified by the tuple of (group, ill, 663 * orig_ill). group is the multicast group address, ill is 664 * the interface on which it is currently joined, and orig_ill 665 * is the interface on which the application requested the 666 * join. orig_ill and ill are the same unless orig_ill has 667 * failed over. 668 * 669 * Both orig_ill and ill are required, which means we may have 670 * 2 ilms on an ill for the same group, but with different 671 * orig_ills. These must be kept separate, so that when failback 672 * occurs, the appropriate ilms are moved back to their orig_ill 673 * without disrupting memberships on the ill to which they had 674 * been moved. 675 * 676 * In order to track orig_ill, we store orig_ifindex in the 677 * ilm and ilg. 678 */ 679 mutex_enter(&ill->ill_lock); 680 ilm = ilm_lookup_ill_index_v6(ill, v6group, orig_ifindex, zoneid); 681 mutex_exit(&ill->ill_lock); 682 if (ilm != NULL) 683 return (ilm_update_add(ilm, ilgstat, ilg_flist, B_TRUE)); 684 685 /* 686 * We need to remember where the application really wanted 687 * to join. This will be used later if we want to failback 688 * to the original interface. 689 */ 690 ilm = ilm_add_v6(ill->ill_ipif, v6group, ilgstat, ilg_fmode, 691 ilg_flist, orig_ifindex, zoneid); 692 if (ilm == NULL) 693 return (ENOMEM); 694 695 if (IN6_IS_ADDR_UNSPECIFIED(v6group)) { 696 /* 697 * Check how many ipif's that have members in this group - 698 * if more then one we should not tell the driver to join 699 * this time 700 */ 701 if (ilm_numentries_v6(ill, v6group) > 1) 702 return (0); 703 if (ill->ill_group == NULL) 704 ret = ip_join_allmulti(ill->ill_ipif); 705 else 706 ret = ill_nominate_mcast_rcv(ill->ill_group); 707 708 if (ret != 0) 709 ilm_delete(ilm); 710 return (ret); 711 } 712 713 if (!IS_LOOPBACK(ill)) 714 mld_joingroup(ilm); 715 716 /* 717 * If we have more then one we should not tell the driver 718 * to join this time. 719 */ 720 if (ilm_numentries_v6(ill, v6group) > 1) 721 return (0); 722 723 ret = ip_ll_addmulti_v6(ill->ill_ipif, v6group); 724 if (ret != 0) 725 ilm_delete(ilm); 726 return (ret); 727 } 728 729 /* 730 * Send a multicast request to the driver for enabling multicast reception 731 * for v6groupp address. The caller has already checked whether it is 732 * appropriate to send one or not. 733 */ 734 int 735 ip_ll_send_enabmulti_req(ill_t *ill, const in6_addr_t *v6groupp) 736 { 737 mblk_t *mp; 738 uint32_t addrlen, addroff; 739 char group_buf[INET6_ADDRSTRLEN]; 740 741 ASSERT(IAM_WRITER_ILL(ill)); 742 743 /* 744 * Create a AR_ENTRY_SQUERY message with a dl_enabmulti_req tacked 745 * on. 746 */ 747 mp = ill_create_dl(ill, DL_ENABMULTI_REQ, sizeof (dl_enabmulti_req_t), 748 &addrlen, &addroff); 749 if (!mp) 750 return (ENOMEM); 751 if (IN6_IS_ADDR_V4MAPPED(v6groupp)) { 752 ipaddr_t v4group; 753 754 IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group); 755 /* 756 * NOTE!!! 757 * The "addroff" passed in here was calculated by 758 * ill_create_dl(), and will be used by ill_create_squery() 759 * to perform some twisted coding magic. It is the offset 760 * into the dl_xxx_req of the hw addr. Here, it will be 761 * added to b_wptr - b_rptr to create a magic number that 762 * is not an offset into this squery mblk. 763 * The actual hardware address will be accessed only in the 764 * dl_xxx_req, not in the squery. More importantly, 765 * that hardware address can *only* be accessed in this 766 * mblk chain by calling mi_offset_param_c(), which uses 767 * the magic number in the squery hw offset field to go 768 * to the *next* mblk (the dl_xxx_req), subtract the 769 * (b_wptr - b_rptr), and find the actual offset into 770 * the dl_xxx_req. 771 * Any method that depends on using the 772 * offset field in the dl_disabmulti_req or squery 773 * to find either hardware address will similarly fail. 774 * 775 * Look in ar_entry_squery() in arp.c to see how this offset 776 * is used. 777 */ 778 mp = ill_create_squery(ill, v4group, addrlen, addroff, mp); 779 if (!mp) 780 return (ENOMEM); 781 ip1dbg(("ip_ll_send_enabmulti_req: IPv4 putnext %s on %s\n", 782 inet_ntop(AF_INET6, v6groupp, group_buf, 783 sizeof (group_buf)), 784 ill->ill_name)); 785 putnext(ill->ill_rq, mp); 786 } else { 787 ip1dbg(("ip_ll_send_enabmulti_req: IPv6 ndp_mcastreq %s on" 788 " %s\n", 789 inet_ntop(AF_INET6, v6groupp, group_buf, 790 sizeof (group_buf)), 791 ill->ill_name)); 792 return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp)); 793 } 794 return (0); 795 } 796 797 /* 798 * Send a multicast request to the driver for enabling multicast 799 * membership for v6group if appropriate. 800 */ 801 static int 802 ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *v6groupp) 803 { 804 ill_t *ill = ipif->ipif_ill; 805 806 ASSERT(IAM_WRITER_IPIF(ipif)); 807 808 if (ill->ill_net_type != IRE_IF_RESOLVER || 809 ipif->ipif_flags & IPIF_POINTOPOINT) { 810 ip1dbg(("ip_ll_addmulti_v6: not resolver\n")); 811 return (0); /* Must be IRE_IF_NORESOLVER */ 812 } 813 814 if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) { 815 ip1dbg(("ip_ll_addmulti_v6: MULTI_BCAST\n")); 816 return (0); 817 } 818 if (!ill->ill_dl_up) { 819 /* 820 * Nobody there. All multicast addresses will be re-joined 821 * when we get the DL_BIND_ACK bringing the interface up. 822 */ 823 ip1dbg(("ip_ll_addmulti_v6: nobody up\n")); 824 return (0); 825 } 826 return (ip_ll_send_enabmulti_req(ill, v6groupp)); 827 } 828 829 /* 830 * INADDR_ANY means all multicast addresses. This is only used 831 * by the multicast router. 832 * INADDR_ANY is stored as the IPv6 unspecifed addr. 833 */ 834 int 835 ip_delmulti(ipaddr_t group, ipif_t *ipif, boolean_t no_ilg, boolean_t leaving) 836 { 837 ill_t *ill = ipif->ipif_ill; 838 ilm_t *ilm; 839 in6_addr_t v6group; 840 int ret; 841 842 ASSERT(IAM_WRITER_IPIF(ipif)); 843 844 if (!CLASSD(group) && group != INADDR_ANY) 845 return (EINVAL); 846 847 /* 848 * INADDR_ANY is represented as the IPv6 unspecifed addr. 849 */ 850 if (group == INADDR_ANY) 851 v6group = ipv6_all_zeros; 852 else 853 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 854 855 /* 856 * Look for a match on the ipif. 857 * (IP_DROP_MEMBERSHIP specifies an ipif using an IP address). 858 */ 859 mutex_enter(&ill->ill_lock); 860 ilm = ilm_lookup_ipif(ipif, group); 861 mutex_exit(&ill->ill_lock); 862 if (ilm == NULL) 863 return (ENOENT); 864 865 /* Update counters */ 866 if (no_ilg) 867 ilm->ilm_no_ilg_cnt--; 868 869 if (leaving) 870 ilm->ilm_refcnt--; 871 872 if (ilm->ilm_refcnt > 0) 873 return (ilm_update_del(ilm, B_FALSE)); 874 875 if (group == INADDR_ANY) { 876 ilm_delete(ilm); 877 /* 878 * Check how many ipif's that have members in this group - 879 * if there are still some left then don't tell the driver 880 * to drop it. 881 */ 882 if (ilm_numentries_v6(ill, &v6group) != 0) 883 return (0); 884 885 /* 886 * If we never joined, then don't leave. This can happen 887 * if we're in an IPMP group, since only one ill per IPMP 888 * group receives all multicast packets. 889 */ 890 if (!ill->ill_join_allmulti) { 891 ASSERT(ill->ill_group != NULL); 892 return (0); 893 } 894 895 ret = ip_leave_allmulti(ipif); 896 if (ill->ill_group != NULL) 897 (void) ill_nominate_mcast_rcv(ill->ill_group); 898 return (ret); 899 } 900 901 if (!IS_LOOPBACK(ill)) 902 igmp_leavegroup(ilm); 903 904 ilm_delete(ilm); 905 /* 906 * Check how many ipif's that have members in this group - 907 * if there are still some left then don't tell the driver 908 * to drop it. 909 */ 910 if (ilm_numentries_v6(ill, &v6group) != 0) 911 return (0); 912 return (ip_ll_delmulti_v6(ipif, &v6group)); 913 } 914 915 /* 916 * The unspecified address means all multicast addresses. 917 * This is only used by the multicast router. 918 */ 919 int 920 ip_delmulti_v6(const in6_addr_t *v6group, ill_t *ill, int orig_ifindex, 921 zoneid_t zoneid, boolean_t no_ilg, boolean_t leaving) 922 { 923 ipif_t *ipif; 924 ilm_t *ilm; 925 int ret; 926 927 ASSERT(IAM_WRITER_ILL(ill)); 928 929 if (!IN6_IS_ADDR_MULTICAST(v6group) && 930 !IN6_IS_ADDR_UNSPECIFIED(v6group)) 931 return (EINVAL); 932 933 /* 934 * Look for a match on the ill. 935 * (IPV6_LEAVE_GROUP specifies an ill using an ifindex). 936 * 937 * Similar to ip_addmulti_v6, we should always look using 938 * the orig_ifindex. 939 * 940 * 1) If orig_ifindex is different from ill's ifindex 941 * we should have an ilm with orig_ifindex created in 942 * ip_addmulti_v6. We should delete that here. 943 * 944 * 2) If orig_ifindex is same as ill's ifindex, we should 945 * not delete the ilm that is temporarily here because of 946 * a FAILOVER. Those ilms will have a ilm_orig_ifindex 947 * different from ill's ifindex. 948 * 949 * Thus, always lookup using orig_ifindex. 950 */ 951 mutex_enter(&ill->ill_lock); 952 ilm = ilm_lookup_ill_index_v6(ill, v6group, orig_ifindex, zoneid); 953 mutex_exit(&ill->ill_lock); 954 if (ilm == NULL) 955 return (ENOENT); 956 957 ASSERT(ilm->ilm_ill == ill); 958 959 ipif = ill->ill_ipif; 960 961 /* Update counters */ 962 if (no_ilg) 963 ilm->ilm_no_ilg_cnt--; 964 965 if (leaving) 966 ilm->ilm_refcnt--; 967 968 if (ilm->ilm_refcnt > 0) 969 return (ilm_update_del(ilm, B_TRUE)); 970 971 if (IN6_IS_ADDR_UNSPECIFIED(v6group)) { 972 ilm_delete(ilm); 973 /* 974 * Check how many ipif's that have members in this group - 975 * if there are still some left then don't tell the driver 976 * to drop it. 977 */ 978 if (ilm_numentries_v6(ill, v6group) != 0) 979 return (0); 980 981 /* 982 * If we never joined, then don't leave. This can happen 983 * if we're in an IPMP group, since only one ill per IPMP 984 * group receives all multicast packets. 985 */ 986 if (!ill->ill_join_allmulti) { 987 ASSERT(ill->ill_group != NULL); 988 return (0); 989 } 990 991 ret = ip_leave_allmulti(ipif); 992 if (ill->ill_group != NULL) 993 (void) ill_nominate_mcast_rcv(ill->ill_group); 994 return (ret); 995 } 996 997 if (!IS_LOOPBACK(ill)) 998 mld_leavegroup(ilm); 999 1000 ilm_delete(ilm); 1001 /* 1002 * Check how many ipif's that have members in this group - 1003 * if there are still some left then don't tell the driver 1004 * to drop it. 1005 */ 1006 if (ilm_numentries_v6(ill, v6group) != 0) 1007 return (0); 1008 return (ip_ll_delmulti_v6(ipif, v6group)); 1009 } 1010 1011 /* 1012 * Send a multicast request to the driver for disabling multicast reception 1013 * for v6groupp address. The caller has already checked whether it is 1014 * appropriate to send one or not. 1015 */ 1016 int 1017 ip_ll_send_disabmulti_req(ill_t *ill, const in6_addr_t *v6groupp) 1018 { 1019 mblk_t *mp; 1020 char group_buf[INET6_ADDRSTRLEN]; 1021 uint32_t addrlen, addroff; 1022 1023 ASSERT(IAM_WRITER_ILL(ill)); 1024 /* 1025 * Create a AR_ENTRY_SQUERY message with a dl_disabmulti_req tacked 1026 * on. 1027 */ 1028 mp = ill_create_dl(ill, DL_DISABMULTI_REQ, 1029 sizeof (dl_disabmulti_req_t), &addrlen, &addroff); 1030 1031 if (!mp) 1032 return (ENOMEM); 1033 1034 if (IN6_IS_ADDR_V4MAPPED(v6groupp)) { 1035 ipaddr_t v4group; 1036 1037 IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group); 1038 /* 1039 * NOTE!!! 1040 * The "addroff" passed in here was calculated by 1041 * ill_create_dl(), and will be used by ill_create_squery() 1042 * to perform some twisted coding magic. It is the offset 1043 * into the dl_xxx_req of the hw addr. Here, it will be 1044 * added to b_wptr - b_rptr to create a magic number that 1045 * is not an offset into this mblk. 1046 * 1047 * Please see the comment in ip_ll_send)enabmulti_req() 1048 * for a complete explanation. 1049 * 1050 * Look in ar_entry_squery() in arp.c to see how this offset 1051 * is used. 1052 */ 1053 mp = ill_create_squery(ill, v4group, addrlen, addroff, mp); 1054 if (!mp) 1055 return (ENOMEM); 1056 ip1dbg(("ip_ll_send_disabmulti_req: IPv4 putnext %s on %s\n", 1057 inet_ntop(AF_INET6, v6groupp, group_buf, 1058 sizeof (group_buf)), 1059 ill->ill_name)); 1060 putnext(ill->ill_rq, mp); 1061 } else { 1062 ip1dbg(("ip_ll_send_disabmulti_req: IPv6 ndp_mcastreq %s on" 1063 " %s\n", 1064 inet_ntop(AF_INET6, v6groupp, group_buf, 1065 sizeof (group_buf)), 1066 ill->ill_name)); 1067 return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp)); 1068 } 1069 return (0); 1070 } 1071 1072 /* 1073 * Send a multicast request to the driver for disabling multicast 1074 * membership for v6group if appropriate. 1075 */ 1076 static int 1077 ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *v6group) 1078 { 1079 ill_t *ill = ipif->ipif_ill; 1080 1081 ASSERT(IAM_WRITER_IPIF(ipif)); 1082 1083 if (ill->ill_net_type != IRE_IF_RESOLVER || 1084 ipif->ipif_flags & IPIF_POINTOPOINT) { 1085 return (0); /* Must be IRE_IF_NORESOLVER */ 1086 } 1087 if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) { 1088 ip1dbg(("ip_ll_delmulti_v6: MULTI_BCAST\n")); 1089 return (0); 1090 } 1091 if (!ill->ill_dl_up) { 1092 /* 1093 * Nobody there. All multicast addresses will be re-joined 1094 * when we get the DL_BIND_ACK bringing the interface up. 1095 */ 1096 ip1dbg(("ip_ll_delmulti_v6: nobody up\n")); 1097 return (0); 1098 } 1099 return (ip_ll_send_disabmulti_req(ill, v6group)); 1100 } 1101 1102 /* 1103 * Make the driver pass up all multicast packets 1104 * 1105 * With ill groups, the caller makes sure that there is only 1106 * one ill joining the allmulti group. 1107 */ 1108 int 1109 ip_join_allmulti(ipif_t *ipif) 1110 { 1111 ill_t *ill = ipif->ipif_ill; 1112 mblk_t *mp; 1113 uint32_t addrlen, addroff; 1114 1115 ASSERT(IAM_WRITER_IPIF(ipif)); 1116 1117 if (!ill->ill_dl_up) { 1118 /* 1119 * Nobody there. All multicast addresses will be re-joined 1120 * when we get the DL_BIND_ACK bringing the interface up. 1121 */ 1122 return (0); 1123 } 1124 1125 ASSERT(!ill->ill_join_allmulti); 1126 1127 /* 1128 * Create a DL_PROMISCON_REQ message and send it directly to 1129 * the DLPI provider. We don't need to do this for certain 1130 * media types for which we never need to turn promiscuous 1131 * mode on. 1132 */ 1133 if ((ill->ill_net_type == IRE_IF_RESOLVER) && 1134 !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) { 1135 mp = ill_create_dl(ill, DL_PROMISCON_REQ, 1136 sizeof (dl_promiscon_req_t), &addrlen, &addroff); 1137 if (mp == NULL) 1138 return (ENOMEM); 1139 ill_dlpi_send(ill, mp); 1140 } 1141 1142 ill->ill_join_allmulti = B_TRUE; 1143 return (0); 1144 } 1145 1146 /* 1147 * Make the driver stop passing up all multicast packets 1148 * 1149 * With ill groups, we need to nominate some other ill as 1150 * this ipif->ipif_ill is leaving the group. 1151 */ 1152 int 1153 ip_leave_allmulti(ipif_t *ipif) 1154 { 1155 ill_t *ill = ipif->ipif_ill; 1156 mblk_t *mp; 1157 uint32_t addrlen, addroff; 1158 1159 ASSERT(IAM_WRITER_IPIF(ipif)); 1160 1161 if (!ill->ill_dl_up) { 1162 /* 1163 * Nobody there. All multicast addresses will be re-joined 1164 * when we get the DL_BIND_ACK bringing the interface up. 1165 */ 1166 return (0); 1167 } 1168 1169 ASSERT(ill->ill_join_allmulti); 1170 1171 /* 1172 * Create a DL_PROMISCOFF_REQ message and send it directly to 1173 * the DLPI provider. We don't need to do this for certain 1174 * media types for which we never need to turn promiscuous 1175 * mode on. 1176 */ 1177 if ((ill->ill_net_type == IRE_IF_RESOLVER) && 1178 !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) { 1179 mp = ill_create_dl(ill, DL_PROMISCOFF_REQ, 1180 sizeof (dl_promiscoff_req_t), &addrlen, &addroff); 1181 if (mp == NULL) 1182 return (ENOMEM); 1183 ill_dlpi_send(ill, mp); 1184 } 1185 1186 ill->ill_join_allmulti = B_FALSE; 1187 return (0); 1188 } 1189 1190 /* 1191 * Copy mp_orig and pass it in as a local message. 1192 */ 1193 void 1194 ip_multicast_loopback(queue_t *q, ill_t *ill, mblk_t *mp_orig, int fanout_flags, 1195 zoneid_t zoneid) 1196 { 1197 mblk_t *mp; 1198 mblk_t *ipsec_mp; 1199 ipha_t *iph; 1200 ip_stack_t *ipst = ill->ill_ipst; 1201 1202 if (DB_TYPE(mp_orig) == M_DATA && 1203 ((ipha_t *)mp_orig->b_rptr)->ipha_protocol == IPPROTO_UDP) { 1204 uint_t hdrsz; 1205 1206 hdrsz = IPH_HDR_LENGTH((ipha_t *)mp_orig->b_rptr) + 1207 sizeof (udpha_t); 1208 ASSERT(MBLKL(mp_orig) >= hdrsz); 1209 1210 if (((mp = allocb(hdrsz, BPRI_MED)) != NULL) && 1211 (mp_orig = dupmsg(mp_orig)) != NULL) { 1212 bcopy(mp_orig->b_rptr, mp->b_rptr, hdrsz); 1213 mp->b_wptr += hdrsz; 1214 mp->b_cont = mp_orig; 1215 mp_orig->b_rptr += hdrsz; 1216 if (is_system_labeled() && DB_CRED(mp_orig) != NULL) 1217 mblk_setcred(mp, DB_CRED(mp_orig)); 1218 if (MBLKL(mp_orig) == 0) { 1219 mp->b_cont = mp_orig->b_cont; 1220 mp_orig->b_cont = NULL; 1221 freeb(mp_orig); 1222 } 1223 } else if (mp != NULL) { 1224 freeb(mp); 1225 mp = NULL; 1226 } 1227 } else { 1228 mp = ip_copymsg(mp_orig); /* No refcnt on ipsec_out netstack */ 1229 } 1230 1231 if (mp == NULL) 1232 return; 1233 if (DB_TYPE(mp) == M_CTL) { 1234 ipsec_mp = mp; 1235 mp = mp->b_cont; 1236 } else { 1237 ipsec_mp = mp; 1238 } 1239 1240 iph = (ipha_t *)mp->b_rptr; 1241 1242 DTRACE_PROBE4(ip4__loopback__out__start, 1243 ill_t *, NULL, ill_t *, ill, 1244 ipha_t *, iph, mblk_t *, ipsec_mp); 1245 1246 FW_HOOKS(ipst->ips_ip4_loopback_out_event, 1247 ipst->ips_ipv4firewall_loopback_out, 1248 NULL, ill, iph, ipsec_mp, mp, HPE_MULTICAST, ipst); 1249 1250 DTRACE_PROBE1(ip4__loopback__out__end, mblk_t *, ipsec_mp); 1251 1252 if (ipsec_mp != NULL) 1253 ip_wput_local(q, ill, iph, ipsec_mp, NULL, 1254 fanout_flags, zoneid); 1255 } 1256 1257 static area_t ip_aresq_template = { 1258 AR_ENTRY_SQUERY, /* cmd */ 1259 sizeof (area_t)+IP_ADDR_LEN, /* name offset */ 1260 sizeof (area_t), /* name len (filled by ill_arp_alloc) */ 1261 IP_ARP_PROTO_TYPE, /* protocol, from arps perspective */ 1262 sizeof (area_t), /* proto addr offset */ 1263 IP_ADDR_LEN, /* proto addr_length */ 1264 0, /* proto mask offset */ 1265 /* Rest is initialized when used */ 1266 0, /* flags */ 1267 0, /* hw addr offset */ 1268 0, /* hw addr length */ 1269 }; 1270 1271 static mblk_t * 1272 ill_create_squery(ill_t *ill, ipaddr_t ipaddr, uint32_t addrlen, 1273 uint32_t addroff, mblk_t *mp_tail) 1274 { 1275 mblk_t *mp; 1276 area_t *area; 1277 1278 mp = ill_arp_alloc(ill, (uchar_t *)&ip_aresq_template, 1279 (caddr_t)&ipaddr); 1280 if (!mp) { 1281 freemsg(mp_tail); 1282 return (NULL); 1283 } 1284 area = (area_t *)mp->b_rptr; 1285 area->area_hw_addr_length = addrlen; 1286 area->area_hw_addr_offset = mp->b_wptr - mp->b_rptr + addroff; 1287 /* 1288 * NOTE! 1289 * 1290 * The area_hw_addr_offset, as can be seen, does not hold the 1291 * actual hardware address offset. Rather, it holds the offset 1292 * to the hw addr in the dl_xxx_req in mp_tail, modified by 1293 * adding (mp->b_wptr - mp->b_rptr). This allows the function 1294 * mi_offset_paramc() to find the hardware address in the 1295 * *second* mblk (dl_xxx_req), not this mblk. 1296 * 1297 * Using mi_offset_paramc() is thus the *only* way to access 1298 * the dl_xxx_hw address. 1299 * 1300 * The squery hw address should *not* be accessed. 1301 * 1302 * See ar_entry_squery() in arp.c for an example of how all this works. 1303 */ 1304 1305 mp->b_cont = mp_tail; 1306 return (mp); 1307 } 1308 1309 /* 1310 * Create a dlpi message with room for phys+sap. When we come back in 1311 * ip_wput_ctl() we will strip the sap for those primitives which 1312 * only need a physical address. 1313 */ 1314 static mblk_t * 1315 ill_create_dl(ill_t *ill, uint32_t dl_primitive, uint32_t length, 1316 uint32_t *addr_lenp, uint32_t *addr_offp) 1317 { 1318 mblk_t *mp; 1319 uint32_t hw_addr_length; 1320 char *cp; 1321 uint32_t offset; 1322 uint32_t size; 1323 1324 *addr_lenp = *addr_offp = 0; 1325 1326 hw_addr_length = ill->ill_phys_addr_length; 1327 if (!hw_addr_length) { 1328 ip0dbg(("ip_create_dl: hw addr length = 0\n")); 1329 return (NULL); 1330 } 1331 1332 size = length; 1333 switch (dl_primitive) { 1334 case DL_ENABMULTI_REQ: 1335 case DL_DISABMULTI_REQ: 1336 size += hw_addr_length; 1337 break; 1338 case DL_PROMISCON_REQ: 1339 case DL_PROMISCOFF_REQ: 1340 break; 1341 default: 1342 return (NULL); 1343 } 1344 mp = allocb(size, BPRI_HI); 1345 if (!mp) 1346 return (NULL); 1347 mp->b_wptr += size; 1348 mp->b_datap->db_type = M_PROTO; 1349 1350 cp = (char *)mp->b_rptr; 1351 offset = length; 1352 1353 switch (dl_primitive) { 1354 case DL_ENABMULTI_REQ: { 1355 dl_enabmulti_req_t *dl = (dl_enabmulti_req_t *)cp; 1356 1357 dl->dl_primitive = dl_primitive; 1358 dl->dl_addr_offset = offset; 1359 *addr_lenp = dl->dl_addr_length = hw_addr_length; 1360 *addr_offp = offset; 1361 break; 1362 } 1363 case DL_DISABMULTI_REQ: { 1364 dl_disabmulti_req_t *dl = (dl_disabmulti_req_t *)cp; 1365 1366 dl->dl_primitive = dl_primitive; 1367 dl->dl_addr_offset = offset; 1368 *addr_lenp = dl->dl_addr_length = hw_addr_length; 1369 *addr_offp = offset; 1370 break; 1371 } 1372 case DL_PROMISCON_REQ: 1373 case DL_PROMISCOFF_REQ: { 1374 dl_promiscon_req_t *dl = (dl_promiscon_req_t *)cp; 1375 1376 dl->dl_primitive = dl_primitive; 1377 dl->dl_level = DL_PROMISC_MULTI; 1378 break; 1379 } 1380 } 1381 ip1dbg(("ill_create_dl: addr_len %d, addr_off %d\n", 1382 *addr_lenp, *addr_offp)); 1383 return (mp); 1384 } 1385 1386 void 1387 ip_wput_ctl(queue_t *q, mblk_t *mp_orig) 1388 { 1389 ill_t *ill = (ill_t *)q->q_ptr; 1390 mblk_t *mp = mp_orig; 1391 area_t *area = (area_t *)mp->b_rptr; 1392 1393 /* Check that we have a AR_ENTRY_SQUERY with a tacked on mblk */ 1394 if (MBLKL(mp) < sizeof (area_t) || mp->b_cont == NULL || 1395 area->area_cmd != AR_ENTRY_SQUERY) { 1396 putnext(q, mp); 1397 return; 1398 } 1399 mp = mp->b_cont; 1400 1401 /* 1402 * Update dl_addr_length and dl_addr_offset for primitives that 1403 * have physical addresses as opposed to full saps 1404 */ 1405 switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) { 1406 case DL_ENABMULTI_REQ: 1407 /* Track the state if this is the first enabmulti */ 1408 if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN) 1409 ill->ill_dlpi_multicast_state = IDS_INPROGRESS; 1410 ip1dbg(("ip_wput_ctl: ENABMULTI\n")); 1411 break; 1412 case DL_DISABMULTI_REQ: 1413 ip1dbg(("ip_wput_ctl: DISABMULTI\n")); 1414 break; 1415 default: 1416 ip1dbg(("ip_wput_ctl: default\n")); 1417 break; 1418 } 1419 freeb(mp_orig); 1420 ill_dlpi_send(ill, mp); 1421 } 1422 1423 /* 1424 * Rejoin any groups which have been explicitly joined by the application (we 1425 * left all explicitly joined groups as part of ill_leave_multicast() prior to 1426 * bringing the interface down). Note that because groups can be joined and 1427 * left while an interface is down, this may not be the same set of groups 1428 * that we left in ill_leave_multicast(). 1429 */ 1430 void 1431 ill_recover_multicast(ill_t *ill) 1432 { 1433 ilm_t *ilm; 1434 char addrbuf[INET6_ADDRSTRLEN]; 1435 1436 ASSERT(IAM_WRITER_ILL(ill)); 1437 ILM_WALKER_HOLD(ill); 1438 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1439 /* 1440 * Check how many ipif's that have members in this group - 1441 * if more then one we make sure that this entry is first 1442 * in the list. 1443 */ 1444 if (ilm_numentries_v6(ill, &ilm->ilm_v6addr) > 1 && 1445 ilm_lookup_ill_v6(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm) 1446 continue; 1447 ip1dbg(("ill_recover_multicast: %s\n", 1448 inet_ntop(AF_INET6, &ilm->ilm_v6addr, addrbuf, 1449 sizeof (addrbuf)))); 1450 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) { 1451 if (ill->ill_group == NULL) { 1452 (void) ip_join_allmulti(ill->ill_ipif); 1453 } else { 1454 /* 1455 * We don't want to join on this ill, 1456 * if somebody else in the group has 1457 * already been nominated. 1458 */ 1459 (void) ill_nominate_mcast_rcv(ill->ill_group); 1460 } 1461 } else { 1462 (void) ip_ll_addmulti_v6(ill->ill_ipif, 1463 &ilm->ilm_v6addr); 1464 } 1465 } 1466 ILM_WALKER_RELE(ill); 1467 } 1468 1469 /* 1470 * The opposite of ill_recover_multicast() -- leaves all multicast groups 1471 * that were explicitly joined. Note that both these functions could be 1472 * disposed of if we enhanced ARP to allow us to handle DL_DISABMULTI_REQ 1473 * and DL_ENABMULTI_REQ messages when an interface is down. 1474 */ 1475 void 1476 ill_leave_multicast(ill_t *ill) 1477 { 1478 ilm_t *ilm; 1479 char addrbuf[INET6_ADDRSTRLEN]; 1480 1481 ASSERT(IAM_WRITER_ILL(ill)); 1482 ILM_WALKER_HOLD(ill); 1483 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1484 /* 1485 * Check how many ipif's that have members in this group - 1486 * if more then one we make sure that this entry is first 1487 * in the list. 1488 */ 1489 if (ilm_numentries_v6(ill, &ilm->ilm_v6addr) > 1 && 1490 ilm_lookup_ill_v6(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm) 1491 continue; 1492 ip1dbg(("ill_leave_multicast: %s\n", 1493 inet_ntop(AF_INET6, &ilm->ilm_v6addr, addrbuf, 1494 sizeof (addrbuf)))); 1495 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) { 1496 (void) ip_leave_allmulti(ill->ill_ipif); 1497 /* 1498 * If we were part of an IPMP group, then 1499 * ill_handoff_responsibility() has already 1500 * nominated a new member (so we don't). 1501 */ 1502 ASSERT(ill->ill_group == NULL); 1503 } else { 1504 (void) ip_ll_delmulti_v6(ill->ill_ipif, 1505 &ilm->ilm_v6addr); 1506 } 1507 } 1508 ILM_WALKER_RELE(ill); 1509 } 1510 1511 /* Find an ilm for matching the ill */ 1512 ilm_t * 1513 ilm_lookup_ill(ill_t *ill, ipaddr_t group, zoneid_t zoneid) 1514 { 1515 in6_addr_t v6group; 1516 1517 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock)); 1518 /* 1519 * INADDR_ANY is represented as the IPv6 unspecifed addr. 1520 */ 1521 if (group == INADDR_ANY) 1522 v6group = ipv6_all_zeros; 1523 else 1524 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 1525 1526 return (ilm_lookup_ill_v6(ill, &v6group, zoneid)); 1527 } 1528 1529 /* 1530 * Find an ilm for matching the ill. All the ilm lookup functions 1531 * ignore ILM_DELETED ilms. These have been logically deleted, and 1532 * igmp and linklayer disable multicast have been done. Only mi_free 1533 * yet to be done. Still there in the list due to ilm_walkers. The 1534 * last walker will release it. 1535 */ 1536 ilm_t * 1537 ilm_lookup_ill_v6(ill_t *ill, const in6_addr_t *v6group, zoneid_t zoneid) 1538 { 1539 ilm_t *ilm; 1540 1541 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock)); 1542 1543 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1544 if (ilm->ilm_flags & ILM_DELETED) 1545 continue; 1546 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) && 1547 (zoneid == ALL_ZONES || zoneid == ilm->ilm_zoneid)) 1548 return (ilm); 1549 } 1550 return (NULL); 1551 } 1552 1553 ilm_t * 1554 ilm_lookup_ill_index_v6(ill_t *ill, const in6_addr_t *v6group, int index, 1555 zoneid_t zoneid) 1556 { 1557 ilm_t *ilm; 1558 1559 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock)); 1560 1561 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1562 if (ilm->ilm_flags & ILM_DELETED) 1563 continue; 1564 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) && 1565 (zoneid == ALL_ZONES || zoneid == ilm->ilm_zoneid) && 1566 ilm->ilm_orig_ifindex == index) { 1567 return (ilm); 1568 } 1569 } 1570 return (NULL); 1571 } 1572 1573 1574 /* 1575 * Found an ilm for the ipif. Only needed for IPv4 which does 1576 * ipif specific socket options. 1577 */ 1578 ilm_t * 1579 ilm_lookup_ipif(ipif_t *ipif, ipaddr_t group) 1580 { 1581 ill_t *ill = ipif->ipif_ill; 1582 ilm_t *ilm; 1583 in6_addr_t v6group; 1584 1585 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock)); 1586 /* 1587 * INADDR_ANY is represented as the IPv6 unspecifed addr. 1588 */ 1589 if (group == INADDR_ANY) 1590 v6group = ipv6_all_zeros; 1591 else 1592 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 1593 1594 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1595 if (ilm->ilm_flags & ILM_DELETED) 1596 continue; 1597 if (ilm->ilm_ipif == ipif && 1598 IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, &v6group)) 1599 return (ilm); 1600 } 1601 return (NULL); 1602 } 1603 1604 /* 1605 * How many members on this ill? 1606 */ 1607 int 1608 ilm_numentries_v6(ill_t *ill, const in6_addr_t *v6group) 1609 { 1610 ilm_t *ilm; 1611 int i = 0; 1612 1613 mutex_enter(&ill->ill_lock); 1614 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1615 if (ilm->ilm_flags & ILM_DELETED) 1616 continue; 1617 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group)) { 1618 i++; 1619 } 1620 } 1621 mutex_exit(&ill->ill_lock); 1622 return (i); 1623 } 1624 1625 /* Caller guarantees that the group is not already on the list */ 1626 static ilm_t * 1627 ilm_add_v6(ipif_t *ipif, const in6_addr_t *v6group, ilg_stat_t ilgstat, 1628 mcast_record_t ilg_fmode, slist_t *ilg_flist, int orig_ifindex, 1629 zoneid_t zoneid) 1630 { 1631 ill_t *ill = ipif->ipif_ill; 1632 ilm_t *ilm; 1633 ilm_t *ilm_cur; 1634 ilm_t **ilm_ptpn; 1635 1636 ASSERT(IAM_WRITER_IPIF(ipif)); 1637 1638 ilm = GETSTRUCT(ilm_t, 1); 1639 if (ilm == NULL) 1640 return (NULL); 1641 if (ilgstat != ILGSTAT_NONE && !SLIST_IS_EMPTY(ilg_flist)) { 1642 ilm->ilm_filter = l_alloc(); 1643 if (ilm->ilm_filter == NULL) { 1644 mi_free(ilm); 1645 return (NULL); 1646 } 1647 } 1648 ilm->ilm_v6addr = *v6group; 1649 ilm->ilm_refcnt = 1; 1650 ilm->ilm_zoneid = zoneid; 1651 ilm->ilm_timer = INFINITY; 1652 ilm->ilm_rtx.rtx_timer = INFINITY; 1653 1654 /* 1655 * IPv4 Multicast groups are joined using ipif. 1656 * IPv6 Multicast groups are joined using ill. 1657 */ 1658 if (ill->ill_isv6) { 1659 ilm->ilm_ill = ill; 1660 ilm->ilm_ipif = NULL; 1661 DTRACE_PROBE3(ill__incr__cnt, (ill_t *), ill, 1662 (char *), "ilm", (void *), ilm); 1663 ill->ill_cnt_ilm++; 1664 } else { 1665 ASSERT(ilm->ilm_zoneid == ipif->ipif_zoneid); 1666 ilm->ilm_ipif = ipif; 1667 ilm->ilm_ill = NULL; 1668 DTRACE_PROBE3(ipif__incr__cnt, (ipif_t *), ipif, 1669 (char *), "ilm", (void *), ilm); 1670 ipif->ipif_cnt_ilm++; 1671 } 1672 ASSERT(ill->ill_ipst); 1673 ilm->ilm_ipst = ill->ill_ipst; /* No netstack_hold */ 1674 1675 /* 1676 * After this if ilm moves to a new ill, we don't change 1677 * the ilm_orig_ifindex. Thus, if ill_index != ilm_orig_ifindex, 1678 * it has been moved. Indexes don't match even when the application 1679 * wants to join on a FAILED/INACTIVE interface because we choose 1680 * a new interface to join in. This is considered as an implicit 1681 * move. 1682 */ 1683 ilm->ilm_orig_ifindex = orig_ifindex; 1684 1685 ASSERT(!(ipif->ipif_state_flags & IPIF_CONDEMNED)); 1686 ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED)); 1687 1688 /* 1689 * Grab lock to give consistent view to readers 1690 */ 1691 mutex_enter(&ill->ill_lock); 1692 /* 1693 * All ilms in the same zone are contiguous in the ill_ilm list. 1694 * The loops in ip_proto_input() and ip_wput_local() use this to avoid 1695 * sending duplicates up when two applications in the same zone join the 1696 * same group on different logical interfaces. 1697 */ 1698 ilm_cur = ill->ill_ilm; 1699 ilm_ptpn = &ill->ill_ilm; 1700 while (ilm_cur != NULL && ilm_cur->ilm_zoneid != ilm->ilm_zoneid) { 1701 ilm_ptpn = &ilm_cur->ilm_next; 1702 ilm_cur = ilm_cur->ilm_next; 1703 } 1704 ilm->ilm_next = ilm_cur; 1705 *ilm_ptpn = ilm; 1706 1707 /* 1708 * If we have an associated ilg, use its filter state; if not, 1709 * default to (EXCLUDE, NULL) and set no_ilg_cnt to track this. 1710 */ 1711 if (ilgstat != ILGSTAT_NONE) { 1712 if (!SLIST_IS_EMPTY(ilg_flist)) 1713 l_copy(ilg_flist, ilm->ilm_filter); 1714 ilm->ilm_fmode = ilg_fmode; 1715 } else { 1716 ilm->ilm_no_ilg_cnt = 1; 1717 ilm->ilm_fmode = MODE_IS_EXCLUDE; 1718 } 1719 1720 mutex_exit(&ill->ill_lock); 1721 return (ilm); 1722 } 1723 1724 static void 1725 ilm_inactive(ilm_t *ilm) 1726 { 1727 FREE_SLIST(ilm->ilm_filter); 1728 FREE_SLIST(ilm->ilm_pendsrcs); 1729 FREE_SLIST(ilm->ilm_rtx.rtx_allow); 1730 FREE_SLIST(ilm->ilm_rtx.rtx_block); 1731 ilm->ilm_ipst = NULL; 1732 mi_free((char *)ilm); 1733 } 1734 1735 void 1736 ilm_walker_cleanup(ill_t *ill) 1737 { 1738 ilm_t **ilmp; 1739 ilm_t *ilm; 1740 boolean_t need_wakeup = B_FALSE; 1741 1742 ASSERT(MUTEX_HELD(&ill->ill_lock)); 1743 ASSERT(ill->ill_ilm_walker_cnt == 0); 1744 1745 ilmp = &ill->ill_ilm; 1746 while (*ilmp != NULL) { 1747 if ((*ilmp)->ilm_flags & ILM_DELETED) { 1748 ilm = *ilmp; 1749 *ilmp = ilm->ilm_next; 1750 /* 1751 * check if there are any pending FREE or unplumb 1752 * operations that need to be restarted. 1753 */ 1754 if (ilm->ilm_ipif != NULL) { 1755 /* 1756 * IPv4 ilms hold a ref on the ipif. 1757 */ 1758 DTRACE_PROBE3(ipif__decr__cnt, 1759 (ipif_t *), ilm->ilm_ipif, 1760 (char *), "ilm", (void *), ilm); 1761 ilm->ilm_ipif->ipif_cnt_ilm--; 1762 if (IPIF_FREE_OK(ilm->ilm_ipif)) 1763 need_wakeup = B_TRUE; 1764 } else { 1765 /* 1766 * IPv6 ilms hold a ref on the ill. 1767 */ 1768 ASSERT(ilm->ilm_ill == ill); 1769 DTRACE_PROBE3(ill__decr__cnt, 1770 (ill_t *), ill, 1771 (char *), "ilm", (void *), ilm); 1772 ill->ill_cnt_ilm--; 1773 if (ILL_FREE_OK(ill)) 1774 need_wakeup = B_TRUE; 1775 } 1776 ilm_inactive(ilm); /* frees ilm */ 1777 } else { 1778 ilmp = &(*ilmp)->ilm_next; 1779 } 1780 } 1781 ill->ill_ilm_cleanup_reqd = 0; 1782 if (need_wakeup) 1783 ipif_ill_refrele_tail(ill); 1784 else 1785 mutex_exit(&ill->ill_lock); 1786 } 1787 1788 /* 1789 * Unlink ilm and free it. 1790 */ 1791 static void 1792 ilm_delete(ilm_t *ilm) 1793 { 1794 ill_t *ill; 1795 ilm_t **ilmp; 1796 boolean_t need_wakeup; 1797 1798 1799 if (ilm->ilm_ipif != NULL) { 1800 ASSERT(IAM_WRITER_IPIF(ilm->ilm_ipif)); 1801 ASSERT(ilm->ilm_ill == NULL); 1802 ill = ilm->ilm_ipif->ipif_ill; 1803 ASSERT(!ill->ill_isv6); 1804 } else { 1805 ASSERT(IAM_WRITER_ILL(ilm->ilm_ill)); 1806 ASSERT(ilm->ilm_ipif == NULL); 1807 ill = ilm->ilm_ill; 1808 ASSERT(ill->ill_isv6); 1809 } 1810 /* 1811 * Delete under lock protection so that readers don't stumble 1812 * on bad ilm_next 1813 */ 1814 mutex_enter(&ill->ill_lock); 1815 if (ill->ill_ilm_walker_cnt != 0) { 1816 ilm->ilm_flags |= ILM_DELETED; 1817 ill->ill_ilm_cleanup_reqd = 1; 1818 mutex_exit(&ill->ill_lock); 1819 return; 1820 } 1821 1822 for (ilmp = &ill->ill_ilm; *ilmp != ilm; ilmp = &(*ilmp)->ilm_next) 1823 ; 1824 *ilmp = ilm->ilm_next; 1825 1826 /* 1827 * if we are the last reference to the ipif (for IPv4 ilms) 1828 * or the ill (for IPv6 ilms), we may need to wakeup any 1829 * pending FREE or unplumb operations. 1830 */ 1831 need_wakeup = B_FALSE; 1832 if (ilm->ilm_ipif != NULL) { 1833 DTRACE_PROBE3(ipif__decr__cnt, (ipif_t *), ilm->ilm_ipif, 1834 (char *), "ilm", (void *), ilm); 1835 ilm->ilm_ipif->ipif_cnt_ilm--; 1836 if (IPIF_FREE_OK(ilm->ilm_ipif)) 1837 need_wakeup = B_TRUE; 1838 } else { 1839 DTRACE_PROBE3(ill__decr__cnt, (ill_t *), ill, 1840 (char *), "ilm", (void *), ilm); 1841 ill->ill_cnt_ilm--; 1842 if (ILL_FREE_OK(ill)) 1843 need_wakeup = B_TRUE; 1844 } 1845 1846 ilm_inactive(ilm); /* frees this ilm */ 1847 1848 if (need_wakeup) { 1849 /* drops ill lock */ 1850 ipif_ill_refrele_tail(ill); 1851 } else { 1852 mutex_exit(&ill->ill_lock); 1853 } 1854 } 1855 1856 1857 /* 1858 * Looks up the appropriate ipif given a v4 multicast group and interface 1859 * address. On success, returns 0, with *ipifpp pointing to the found 1860 * struct. On failure, returns an errno and *ipifpp is NULL. 1861 */ 1862 int 1863 ip_opt_check(conn_t *connp, ipaddr_t group, ipaddr_t src, ipaddr_t ifaddr, 1864 uint_t *ifindexp, mblk_t *first_mp, ipsq_func_t func, ipif_t **ipifpp) 1865 { 1866 ipif_t *ipif; 1867 int err = 0; 1868 zoneid_t zoneid; 1869 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1870 1871 if (!CLASSD(group) || CLASSD(src)) { 1872 return (EINVAL); 1873 } 1874 *ipifpp = NULL; 1875 1876 zoneid = IPCL_ZONEID(connp); 1877 1878 ASSERT(!(ifaddr != INADDR_ANY && ifindexp != NULL && *ifindexp != 0)); 1879 if (ifaddr != INADDR_ANY) { 1880 ipif = ipif_lookup_addr(ifaddr, NULL, zoneid, 1881 CONNP_TO_WQ(connp), first_mp, func, &err, ipst); 1882 if (err != 0 && err != EINPROGRESS) 1883 err = EADDRNOTAVAIL; 1884 } else if (ifindexp != NULL && *ifindexp != 0) { 1885 ipif = ipif_lookup_on_ifindex(*ifindexp, B_FALSE, zoneid, 1886 CONNP_TO_WQ(connp), first_mp, func, &err, ipst); 1887 } else { 1888 ipif = ipif_lookup_group(group, zoneid, ipst); 1889 if (ipif == NULL) 1890 return (EADDRNOTAVAIL); 1891 } 1892 if (ipif == NULL) 1893 return (err); 1894 1895 *ipifpp = ipif; 1896 return (0); 1897 } 1898 1899 /* 1900 * Looks up the appropriate ill (or ipif if v4mapped) given an interface 1901 * index and IPv6 multicast group. On success, returns 0, with *illpp (or 1902 * *ipifpp if v4mapped) pointing to the found struct. On failure, returns 1903 * an errno and *illpp and *ipifpp are undefined. 1904 */ 1905 int 1906 ip_opt_check_v6(conn_t *connp, const in6_addr_t *v6group, ipaddr_t *v4group, 1907 const in6_addr_t *v6src, ipaddr_t *v4src, boolean_t *isv6, int ifindex, 1908 mblk_t *first_mp, ipsq_func_t func, ill_t **illpp, ipif_t **ipifpp) 1909 { 1910 boolean_t src_unspec; 1911 ill_t *ill = NULL; 1912 ipif_t *ipif = NULL; 1913 int err; 1914 zoneid_t zoneid = connp->conn_zoneid; 1915 queue_t *wq = CONNP_TO_WQ(connp); 1916 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1917 1918 src_unspec = IN6_IS_ADDR_UNSPECIFIED(v6src); 1919 1920 if (IN6_IS_ADDR_V4MAPPED(v6group)) { 1921 if (!IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec) 1922 return (EINVAL); 1923 IN6_V4MAPPED_TO_IPADDR(v6group, *v4group); 1924 if (src_unspec) { 1925 *v4src = INADDR_ANY; 1926 } else { 1927 IN6_V4MAPPED_TO_IPADDR(v6src, *v4src); 1928 } 1929 if (!CLASSD(*v4group) || CLASSD(*v4src)) 1930 return (EINVAL); 1931 *ipifpp = NULL; 1932 *isv6 = B_FALSE; 1933 } else { 1934 if (IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec) 1935 return (EINVAL); 1936 if (!IN6_IS_ADDR_MULTICAST(v6group) || 1937 IN6_IS_ADDR_MULTICAST(v6src)) { 1938 return (EINVAL); 1939 } 1940 *illpp = NULL; 1941 *isv6 = B_TRUE; 1942 } 1943 1944 if (ifindex == 0) { 1945 if (*isv6) 1946 ill = ill_lookup_group_v6(v6group, zoneid, ipst); 1947 else 1948 ipif = ipif_lookup_group(*v4group, zoneid, ipst); 1949 if (ill == NULL && ipif == NULL) 1950 return (EADDRNOTAVAIL); 1951 } else { 1952 if (*isv6) { 1953 ill = ill_lookup_on_ifindex(ifindex, B_TRUE, 1954 wq, first_mp, func, &err, ipst); 1955 if (ill != NULL && 1956 !ipif_lookup_zoneid(ill, zoneid, 0, NULL)) { 1957 ill_refrele(ill); 1958 ill = NULL; 1959 err = EADDRNOTAVAIL; 1960 } 1961 } else { 1962 ipif = ipif_lookup_on_ifindex(ifindex, B_FALSE, 1963 zoneid, wq, first_mp, func, &err, ipst); 1964 } 1965 if (ill == NULL && ipif == NULL) 1966 return (err); 1967 } 1968 1969 *ipifpp = ipif; 1970 *illpp = ill; 1971 return (0); 1972 } 1973 1974 static int 1975 ip_get_srcfilter(conn_t *connp, struct group_filter *gf, 1976 struct ip_msfilter *imsf, ipaddr_t grp, ipif_t *ipif, boolean_t isv4mapped) 1977 { 1978 ilg_t *ilg; 1979 int i, numsrc, fmode, outsrcs; 1980 struct sockaddr_in *sin; 1981 struct sockaddr_in6 *sin6; 1982 struct in_addr *addrp; 1983 slist_t *fp; 1984 boolean_t is_v4only_api; 1985 1986 mutex_enter(&connp->conn_lock); 1987 1988 ilg = ilg_lookup_ipif(connp, grp, ipif); 1989 if (ilg == NULL) { 1990 mutex_exit(&connp->conn_lock); 1991 return (EADDRNOTAVAIL); 1992 } 1993 1994 if (gf == NULL) { 1995 ASSERT(imsf != NULL); 1996 ASSERT(!isv4mapped); 1997 is_v4only_api = B_TRUE; 1998 outsrcs = imsf->imsf_numsrc; 1999 } else { 2000 ASSERT(imsf == NULL); 2001 is_v4only_api = B_FALSE; 2002 outsrcs = gf->gf_numsrc; 2003 } 2004 2005 /* 2006 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 2007 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 2008 * So we need to translate here. 2009 */ 2010 fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ? 2011 MCAST_INCLUDE : MCAST_EXCLUDE; 2012 if ((fp = ilg->ilg_filter) == NULL) { 2013 numsrc = 0; 2014 } else { 2015 for (i = 0; i < outsrcs; i++) { 2016 if (i == fp->sl_numsrc) 2017 break; 2018 if (isv4mapped) { 2019 sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i]; 2020 sin6->sin6_family = AF_INET6; 2021 sin6->sin6_addr = fp->sl_addr[i]; 2022 } else { 2023 if (is_v4only_api) { 2024 addrp = &imsf->imsf_slist[i]; 2025 } else { 2026 sin = (struct sockaddr_in *) 2027 &gf->gf_slist[i]; 2028 sin->sin_family = AF_INET; 2029 addrp = &sin->sin_addr; 2030 } 2031 IN6_V4MAPPED_TO_INADDR(&fp->sl_addr[i], addrp); 2032 } 2033 } 2034 numsrc = fp->sl_numsrc; 2035 } 2036 2037 if (is_v4only_api) { 2038 imsf->imsf_numsrc = numsrc; 2039 imsf->imsf_fmode = fmode; 2040 } else { 2041 gf->gf_numsrc = numsrc; 2042 gf->gf_fmode = fmode; 2043 } 2044 2045 mutex_exit(&connp->conn_lock); 2046 2047 return (0); 2048 } 2049 2050 static int 2051 ip_get_srcfilter_v6(conn_t *connp, struct group_filter *gf, 2052 const struct in6_addr *grp, ill_t *ill) 2053 { 2054 ilg_t *ilg; 2055 int i; 2056 struct sockaddr_storage *sl; 2057 struct sockaddr_in6 *sin6; 2058 slist_t *fp; 2059 2060 mutex_enter(&connp->conn_lock); 2061 2062 ilg = ilg_lookup_ill_v6(connp, grp, ill); 2063 if (ilg == NULL) { 2064 mutex_exit(&connp->conn_lock); 2065 return (EADDRNOTAVAIL); 2066 } 2067 2068 /* 2069 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 2070 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 2071 * So we need to translate here. 2072 */ 2073 gf->gf_fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ? 2074 MCAST_INCLUDE : MCAST_EXCLUDE; 2075 if ((fp = ilg->ilg_filter) == NULL) { 2076 gf->gf_numsrc = 0; 2077 } else { 2078 for (i = 0, sl = gf->gf_slist; i < gf->gf_numsrc; i++, sl++) { 2079 if (i == fp->sl_numsrc) 2080 break; 2081 sin6 = (struct sockaddr_in6 *)sl; 2082 sin6->sin6_family = AF_INET6; 2083 sin6->sin6_addr = fp->sl_addr[i]; 2084 } 2085 gf->gf_numsrc = fp->sl_numsrc; 2086 } 2087 2088 mutex_exit(&connp->conn_lock); 2089 2090 return (0); 2091 } 2092 2093 static int 2094 ip_set_srcfilter(conn_t *connp, struct group_filter *gf, 2095 struct ip_msfilter *imsf, ipaddr_t grp, ipif_t *ipif, boolean_t isv4mapped) 2096 { 2097 ilg_t *ilg; 2098 int i, err, insrcs, infmode, new_fmode; 2099 struct sockaddr_in *sin; 2100 struct sockaddr_in6 *sin6; 2101 struct in_addr *addrp; 2102 slist_t *orig_filter = NULL; 2103 slist_t *new_filter = NULL; 2104 mcast_record_t orig_fmode; 2105 boolean_t leave_grp, is_v4only_api; 2106 ilg_stat_t ilgstat; 2107 2108 if (gf == NULL) { 2109 ASSERT(imsf != NULL); 2110 ASSERT(!isv4mapped); 2111 is_v4only_api = B_TRUE; 2112 insrcs = imsf->imsf_numsrc; 2113 infmode = imsf->imsf_fmode; 2114 } else { 2115 ASSERT(imsf == NULL); 2116 is_v4only_api = B_FALSE; 2117 insrcs = gf->gf_numsrc; 2118 infmode = gf->gf_fmode; 2119 } 2120 2121 /* Make sure we can handle the source list */ 2122 if (insrcs > MAX_FILTER_SIZE) 2123 return (ENOBUFS); 2124 2125 /* 2126 * setting the filter to (INCLUDE, NULL) is treated 2127 * as a request to leave the group. 2128 */ 2129 leave_grp = (infmode == MCAST_INCLUDE && insrcs == 0); 2130 2131 ASSERT(IAM_WRITER_IPIF(ipif)); 2132 2133 mutex_enter(&connp->conn_lock); 2134 2135 ilg = ilg_lookup_ipif(connp, grp, ipif); 2136 if (ilg == NULL) { 2137 /* 2138 * if the request was actually to leave, and we 2139 * didn't find an ilg, there's nothing to do. 2140 */ 2141 if (!leave_grp) 2142 ilg = conn_ilg_alloc(connp); 2143 if (leave_grp || ilg == NULL) { 2144 mutex_exit(&connp->conn_lock); 2145 return (leave_grp ? 0 : ENOMEM); 2146 } 2147 ilgstat = ILGSTAT_NEW; 2148 IN6_IPADDR_TO_V4MAPPED(grp, &ilg->ilg_v6group); 2149 ilg->ilg_ipif = ipif; 2150 ilg->ilg_ill = NULL; 2151 ilg->ilg_orig_ifindex = 0; 2152 } else if (leave_grp) { 2153 ilg_delete(connp, ilg, NULL); 2154 mutex_exit(&connp->conn_lock); 2155 (void) ip_delmulti(grp, ipif, B_FALSE, B_TRUE); 2156 return (0); 2157 } else { 2158 ilgstat = ILGSTAT_CHANGE; 2159 /* Preserve existing state in case ip_addmulti() fails */ 2160 orig_fmode = ilg->ilg_fmode; 2161 if (ilg->ilg_filter == NULL) { 2162 orig_filter = NULL; 2163 } else { 2164 orig_filter = l_alloc_copy(ilg->ilg_filter); 2165 if (orig_filter == NULL) { 2166 mutex_exit(&connp->conn_lock); 2167 return (ENOMEM); 2168 } 2169 } 2170 } 2171 2172 /* 2173 * Alloc buffer to copy new state into (see below) before 2174 * we make any changes, so we can bail if it fails. 2175 */ 2176 if ((new_filter = l_alloc()) == NULL) { 2177 mutex_exit(&connp->conn_lock); 2178 err = ENOMEM; 2179 goto free_and_exit; 2180 } 2181 2182 if (insrcs == 0) { 2183 CLEAR_SLIST(ilg->ilg_filter); 2184 } else { 2185 slist_t *fp; 2186 if (ilg->ilg_filter == NULL) { 2187 fp = l_alloc(); 2188 if (fp == NULL) { 2189 if (ilgstat == ILGSTAT_NEW) 2190 ilg_delete(connp, ilg, NULL); 2191 mutex_exit(&connp->conn_lock); 2192 err = ENOMEM; 2193 goto free_and_exit; 2194 } 2195 } else { 2196 fp = ilg->ilg_filter; 2197 } 2198 for (i = 0; i < insrcs; i++) { 2199 if (isv4mapped) { 2200 sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i]; 2201 fp->sl_addr[i] = sin6->sin6_addr; 2202 } else { 2203 if (is_v4only_api) { 2204 addrp = &imsf->imsf_slist[i]; 2205 } else { 2206 sin = (struct sockaddr_in *) 2207 &gf->gf_slist[i]; 2208 addrp = &sin->sin_addr; 2209 } 2210 IN6_INADDR_TO_V4MAPPED(addrp, &fp->sl_addr[i]); 2211 } 2212 } 2213 fp->sl_numsrc = insrcs; 2214 ilg->ilg_filter = fp; 2215 } 2216 /* 2217 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 2218 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 2219 * So we need to translate here. 2220 */ 2221 ilg->ilg_fmode = (infmode == MCAST_INCLUDE) ? 2222 MODE_IS_INCLUDE : MODE_IS_EXCLUDE; 2223 2224 /* 2225 * Save copy of ilg's filter state to pass to other functions, 2226 * so we can release conn_lock now. 2227 */ 2228 new_fmode = ilg->ilg_fmode; 2229 l_copy(ilg->ilg_filter, new_filter); 2230 2231 mutex_exit(&connp->conn_lock); 2232 2233 err = ip_addmulti(grp, ipif, ilgstat, new_fmode, new_filter); 2234 if (err != 0) { 2235 /* 2236 * Restore the original filter state, or delete the 2237 * newly-created ilg. We need to look up the ilg 2238 * again, though, since we've not been holding the 2239 * conn_lock. 2240 */ 2241 mutex_enter(&connp->conn_lock); 2242 ilg = ilg_lookup_ipif(connp, grp, ipif); 2243 ASSERT(ilg != NULL); 2244 if (ilgstat == ILGSTAT_NEW) { 2245 ilg_delete(connp, ilg, NULL); 2246 } else { 2247 ilg->ilg_fmode = orig_fmode; 2248 if (SLIST_IS_EMPTY(orig_filter)) { 2249 CLEAR_SLIST(ilg->ilg_filter); 2250 } else { 2251 /* 2252 * We didn't free the filter, even if we 2253 * were trying to make the source list empty; 2254 * so if orig_filter isn't empty, the ilg 2255 * must still have a filter alloc'd. 2256 */ 2257 l_copy(orig_filter, ilg->ilg_filter); 2258 } 2259 } 2260 mutex_exit(&connp->conn_lock); 2261 } 2262 2263 free_and_exit: 2264 l_free(orig_filter); 2265 l_free(new_filter); 2266 2267 return (err); 2268 } 2269 2270 static int 2271 ip_set_srcfilter_v6(conn_t *connp, struct group_filter *gf, 2272 const struct in6_addr *grp, ill_t *ill) 2273 { 2274 ilg_t *ilg; 2275 int i, orig_ifindex, orig_fmode, new_fmode, err; 2276 slist_t *orig_filter = NULL; 2277 slist_t *new_filter = NULL; 2278 struct sockaddr_storage *sl; 2279 struct sockaddr_in6 *sin6; 2280 boolean_t leave_grp; 2281 ilg_stat_t ilgstat; 2282 2283 /* Make sure we can handle the source list */ 2284 if (gf->gf_numsrc > MAX_FILTER_SIZE) 2285 return (ENOBUFS); 2286 2287 /* 2288 * setting the filter to (INCLUDE, NULL) is treated 2289 * as a request to leave the group. 2290 */ 2291 leave_grp = (gf->gf_fmode == MCAST_INCLUDE && gf->gf_numsrc == 0); 2292 2293 ASSERT(IAM_WRITER_ILL(ill)); 2294 2295 /* 2296 * Use the ifindex to do the lookup. We can't use the ill 2297 * directly because ilg_ill could point to a different ill 2298 * if things have moved. 2299 */ 2300 orig_ifindex = ill->ill_phyint->phyint_ifindex; 2301 2302 mutex_enter(&connp->conn_lock); 2303 ilg = ilg_lookup_ill_index_v6(connp, grp, orig_ifindex); 2304 if (ilg == NULL) { 2305 /* 2306 * if the request was actually to leave, and we 2307 * didn't find an ilg, there's nothing to do. 2308 */ 2309 if (!leave_grp) 2310 ilg = conn_ilg_alloc(connp); 2311 if (leave_grp || ilg == NULL) { 2312 mutex_exit(&connp->conn_lock); 2313 return (leave_grp ? 0 : ENOMEM); 2314 } 2315 ilgstat = ILGSTAT_NEW; 2316 ilg->ilg_v6group = *grp; 2317 ilg->ilg_ipif = NULL; 2318 /* 2319 * Choose our target ill to join on. This might be 2320 * different from the ill we've been given if it's 2321 * currently down and part of a group. 2322 * 2323 * new ill is not refheld; we are writer. 2324 */ 2325 ill = ip_choose_multi_ill(ill, grp); 2326 ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED)); 2327 ilg->ilg_ill = ill; 2328 /* 2329 * Remember the index that we joined on, so that we can 2330 * successfully delete them later on and also search for 2331 * duplicates if the application wants to join again. 2332 */ 2333 ilg->ilg_orig_ifindex = orig_ifindex; 2334 } else if (leave_grp) { 2335 /* 2336 * Use the ilg's current ill for the deletion, 2337 * we might have failed over. 2338 */ 2339 ill = ilg->ilg_ill; 2340 ilg_delete(connp, ilg, NULL); 2341 mutex_exit(&connp->conn_lock); 2342 (void) ip_delmulti_v6(grp, ill, orig_ifindex, 2343 connp->conn_zoneid, B_FALSE, B_TRUE); 2344 return (0); 2345 } else { 2346 ilgstat = ILGSTAT_CHANGE; 2347 /* 2348 * The current ill might be different from the one we were 2349 * asked to join on (if failover has occurred); we should 2350 * join on the ill stored in the ilg. The original ill 2351 * is noted in ilg_orig_ifindex, which matched our request. 2352 */ 2353 ill = ilg->ilg_ill; 2354 /* preserve existing state in case ip_addmulti() fails */ 2355 orig_fmode = ilg->ilg_fmode; 2356 if (ilg->ilg_filter == NULL) { 2357 orig_filter = NULL; 2358 } else { 2359 orig_filter = l_alloc_copy(ilg->ilg_filter); 2360 if (orig_filter == NULL) { 2361 mutex_exit(&connp->conn_lock); 2362 return (ENOMEM); 2363 } 2364 } 2365 } 2366 2367 /* 2368 * Alloc buffer to copy new state into (see below) before 2369 * we make any changes, so we can bail if it fails. 2370 */ 2371 if ((new_filter = l_alloc()) == NULL) { 2372 mutex_exit(&connp->conn_lock); 2373 err = ENOMEM; 2374 goto free_and_exit; 2375 } 2376 2377 if (gf->gf_numsrc == 0) { 2378 CLEAR_SLIST(ilg->ilg_filter); 2379 } else { 2380 slist_t *fp; 2381 if (ilg->ilg_filter == NULL) { 2382 fp = l_alloc(); 2383 if (fp == NULL) { 2384 if (ilgstat == ILGSTAT_NEW) 2385 ilg_delete(connp, ilg, NULL); 2386 mutex_exit(&connp->conn_lock); 2387 err = ENOMEM; 2388 goto free_and_exit; 2389 } 2390 } else { 2391 fp = ilg->ilg_filter; 2392 } 2393 for (i = 0, sl = gf->gf_slist; i < gf->gf_numsrc; i++, sl++) { 2394 sin6 = (struct sockaddr_in6 *)sl; 2395 fp->sl_addr[i] = sin6->sin6_addr; 2396 } 2397 fp->sl_numsrc = gf->gf_numsrc; 2398 ilg->ilg_filter = fp; 2399 } 2400 /* 2401 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 2402 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 2403 * So we need to translate here. 2404 */ 2405 ilg->ilg_fmode = (gf->gf_fmode == MCAST_INCLUDE) ? 2406 MODE_IS_INCLUDE : MODE_IS_EXCLUDE; 2407 2408 /* 2409 * Save copy of ilg's filter state to pass to other functions, 2410 * so we can release conn_lock now. 2411 */ 2412 new_fmode = ilg->ilg_fmode; 2413 l_copy(ilg->ilg_filter, new_filter); 2414 2415 mutex_exit(&connp->conn_lock); 2416 2417 err = ip_addmulti_v6(grp, ill, orig_ifindex, connp->conn_zoneid, 2418 ilgstat, new_fmode, new_filter); 2419 if (err != 0) { 2420 /* 2421 * Restore the original filter state, or delete the 2422 * newly-created ilg. We need to look up the ilg 2423 * again, though, since we've not been holding the 2424 * conn_lock. 2425 */ 2426 mutex_enter(&connp->conn_lock); 2427 ilg = ilg_lookup_ill_index_v6(connp, grp, orig_ifindex); 2428 ASSERT(ilg != NULL); 2429 if (ilgstat == ILGSTAT_NEW) { 2430 ilg_delete(connp, ilg, NULL); 2431 } else { 2432 ilg->ilg_fmode = orig_fmode; 2433 if (SLIST_IS_EMPTY(orig_filter)) { 2434 CLEAR_SLIST(ilg->ilg_filter); 2435 } else { 2436 /* 2437 * We didn't free the filter, even if we 2438 * were trying to make the source list empty; 2439 * so if orig_filter isn't empty, the ilg 2440 * must still have a filter alloc'd. 2441 */ 2442 l_copy(orig_filter, ilg->ilg_filter); 2443 } 2444 } 2445 mutex_exit(&connp->conn_lock); 2446 } 2447 2448 free_and_exit: 2449 l_free(orig_filter); 2450 l_free(new_filter); 2451 2452 return (err); 2453 } 2454 2455 /* 2456 * Process the SIOC[GS]MSFILTER and SIOC[GS]IPMSFILTER ioctls. 2457 */ 2458 /* ARGSUSED */ 2459 int 2460 ip_sioctl_msfilter(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, 2461 ip_ioctl_cmd_t *ipip, void *ifreq) 2462 { 2463 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 2464 /* existence verified in ip_wput_nondata() */ 2465 mblk_t *data_mp = mp->b_cont->b_cont; 2466 int datalen, err, cmd, minsize; 2467 int expsize = 0; 2468 conn_t *connp; 2469 boolean_t isv6, is_v4only_api, getcmd; 2470 struct sockaddr_in *gsin; 2471 struct sockaddr_in6 *gsin6; 2472 ipaddr_t v4grp; 2473 in6_addr_t v6grp; 2474 struct group_filter *gf = NULL; 2475 struct ip_msfilter *imsf = NULL; 2476 mblk_t *ndp; 2477 2478 if (data_mp->b_cont != NULL) { 2479 if ((ndp = msgpullup(data_mp, -1)) == NULL) 2480 return (ENOMEM); 2481 freemsg(data_mp); 2482 data_mp = ndp; 2483 mp->b_cont->b_cont = data_mp; 2484 } 2485 2486 cmd = iocp->ioc_cmd; 2487 getcmd = (cmd == SIOCGIPMSFILTER || cmd == SIOCGMSFILTER); 2488 is_v4only_api = (cmd == SIOCGIPMSFILTER || cmd == SIOCSIPMSFILTER); 2489 minsize = (is_v4only_api) ? IP_MSFILTER_SIZE(0) : GROUP_FILTER_SIZE(0); 2490 datalen = MBLKL(data_mp); 2491 2492 if (datalen < minsize) 2493 return (EINVAL); 2494 2495 /* 2496 * now we know we have at least have the initial structure, 2497 * but need to check for the source list array. 2498 */ 2499 if (is_v4only_api) { 2500 imsf = (struct ip_msfilter *)data_mp->b_rptr; 2501 isv6 = B_FALSE; 2502 expsize = IP_MSFILTER_SIZE(imsf->imsf_numsrc); 2503 } else { 2504 gf = (struct group_filter *)data_mp->b_rptr; 2505 if (gf->gf_group.ss_family == AF_INET6) { 2506 gsin6 = (struct sockaddr_in6 *)&gf->gf_group; 2507 isv6 = !(IN6_IS_ADDR_V4MAPPED(&gsin6->sin6_addr)); 2508 } else { 2509 isv6 = B_FALSE; 2510 } 2511 expsize = GROUP_FILTER_SIZE(gf->gf_numsrc); 2512 } 2513 if (datalen < expsize) 2514 return (EINVAL); 2515 2516 connp = Q_TO_CONN(q); 2517 2518 /* operation not supported on the virtual network interface */ 2519 if (IS_VNI(ipif->ipif_ill)) 2520 return (EINVAL); 2521 2522 if (isv6) { 2523 ill_t *ill = ipif->ipif_ill; 2524 ill_refhold(ill); 2525 2526 gsin6 = (struct sockaddr_in6 *)&gf->gf_group; 2527 v6grp = gsin6->sin6_addr; 2528 if (getcmd) 2529 err = ip_get_srcfilter_v6(connp, gf, &v6grp, ill); 2530 else 2531 err = ip_set_srcfilter_v6(connp, gf, &v6grp, ill); 2532 2533 ill_refrele(ill); 2534 } else { 2535 boolean_t isv4mapped = B_FALSE; 2536 if (is_v4only_api) { 2537 v4grp = (ipaddr_t)imsf->imsf_multiaddr.s_addr; 2538 } else { 2539 if (gf->gf_group.ss_family == AF_INET) { 2540 gsin = (struct sockaddr_in *)&gf->gf_group; 2541 v4grp = (ipaddr_t)gsin->sin_addr.s_addr; 2542 } else { 2543 gsin6 = (struct sockaddr_in6 *)&gf->gf_group; 2544 IN6_V4MAPPED_TO_IPADDR(&gsin6->sin6_addr, 2545 v4grp); 2546 isv4mapped = B_TRUE; 2547 } 2548 } 2549 if (getcmd) 2550 err = ip_get_srcfilter(connp, gf, imsf, v4grp, ipif, 2551 isv4mapped); 2552 else 2553 err = ip_set_srcfilter(connp, gf, imsf, v4grp, ipif, 2554 isv4mapped); 2555 } 2556 2557 return (err); 2558 } 2559 2560 /* 2561 * Finds the ipif based on information in the ioctl headers. Needed to make 2562 * ip_process_ioctl() happy (it needs to know the ipif for IPI_WR-flagged 2563 * ioctls prior to calling the ioctl's handler function). 2564 */ 2565 int 2566 ip_extract_msfilter(queue_t *q, mblk_t *mp, const ip_ioctl_cmd_t *ipip, 2567 cmd_info_t *ci, ipsq_func_t func) 2568 { 2569 int cmd = ipip->ipi_cmd; 2570 int err = 0; 2571 conn_t *connp; 2572 ipif_t *ipif; 2573 /* caller has verified this mblk exists */ 2574 char *dbuf = (char *)mp->b_cont->b_cont->b_rptr; 2575 struct ip_msfilter *imsf; 2576 struct group_filter *gf; 2577 ipaddr_t v4addr, v4grp; 2578 in6_addr_t v6grp; 2579 uint32_t index; 2580 zoneid_t zoneid; 2581 ip_stack_t *ipst; 2582 2583 connp = Q_TO_CONN(q); 2584 zoneid = connp->conn_zoneid; 2585 ipst = connp->conn_netstack->netstack_ip; 2586 2587 /* don't allow multicast operations on a tcp conn */ 2588 if (IPCL_IS_TCP(connp)) 2589 return (ENOPROTOOPT); 2590 2591 if (cmd == SIOCSIPMSFILTER || cmd == SIOCGIPMSFILTER) { 2592 /* don't allow v4-specific ioctls on v6 socket */ 2593 if (connp->conn_af_isv6) 2594 return (EAFNOSUPPORT); 2595 2596 imsf = (struct ip_msfilter *)dbuf; 2597 v4addr = imsf->imsf_interface.s_addr; 2598 v4grp = imsf->imsf_multiaddr.s_addr; 2599 if (v4addr == INADDR_ANY) { 2600 ipif = ipif_lookup_group(v4grp, zoneid, ipst); 2601 if (ipif == NULL) 2602 err = EADDRNOTAVAIL; 2603 } else { 2604 ipif = ipif_lookup_addr(v4addr, NULL, zoneid, q, mp, 2605 func, &err, ipst); 2606 } 2607 } else { 2608 boolean_t isv6 = B_FALSE; 2609 gf = (struct group_filter *)dbuf; 2610 index = gf->gf_interface; 2611 if (gf->gf_group.ss_family == AF_INET6) { 2612 struct sockaddr_in6 *sin6; 2613 sin6 = (struct sockaddr_in6 *)&gf->gf_group; 2614 v6grp = sin6->sin6_addr; 2615 if (IN6_IS_ADDR_V4MAPPED(&v6grp)) 2616 IN6_V4MAPPED_TO_IPADDR(&v6grp, v4grp); 2617 else 2618 isv6 = B_TRUE; 2619 } else if (gf->gf_group.ss_family == AF_INET) { 2620 struct sockaddr_in *sin; 2621 sin = (struct sockaddr_in *)&gf->gf_group; 2622 v4grp = sin->sin_addr.s_addr; 2623 } else { 2624 return (EAFNOSUPPORT); 2625 } 2626 if (index == 0) { 2627 if (isv6) { 2628 ipif = ipif_lookup_group_v6(&v6grp, zoneid, 2629 ipst); 2630 } else { 2631 ipif = ipif_lookup_group(v4grp, zoneid, ipst); 2632 } 2633 if (ipif == NULL) 2634 err = EADDRNOTAVAIL; 2635 } else { 2636 ipif = ipif_lookup_on_ifindex(index, isv6, zoneid, 2637 q, mp, func, &err, ipst); 2638 } 2639 } 2640 2641 ci->ci_ipif = ipif; 2642 return (err); 2643 } 2644 2645 /* 2646 * The structures used for the SIOC*MSFILTER ioctls usually must be copied 2647 * in in two stages, as the first copyin tells us the size of the attached 2648 * source buffer. This function is called by ip_wput_nondata() after the 2649 * first copyin has completed; it figures out how big the second stage 2650 * needs to be, and kicks it off. 2651 * 2652 * In some cases (numsrc < 2), the second copyin is not needed as the 2653 * first one gets a complete structure containing 1 source addr. 2654 * 2655 * The function returns 0 if a second copyin has been started (i.e. there's 2656 * no more work to be done right now), or 1 if the second copyin is not 2657 * needed and ip_wput_nondata() can continue its processing. 2658 */ 2659 int 2660 ip_copyin_msfilter(queue_t *q, mblk_t *mp) 2661 { 2662 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 2663 int cmd = iocp->ioc_cmd; 2664 /* validity of this checked in ip_wput_nondata() */ 2665 mblk_t *mp1 = mp->b_cont->b_cont; 2666 int copysize = 0; 2667 int offset; 2668 2669 if (cmd == SIOCSMSFILTER || cmd == SIOCGMSFILTER) { 2670 struct group_filter *gf = (struct group_filter *)mp1->b_rptr; 2671 if (gf->gf_numsrc >= 2) { 2672 offset = sizeof (struct group_filter); 2673 copysize = GROUP_FILTER_SIZE(gf->gf_numsrc) - offset; 2674 } 2675 } else { 2676 struct ip_msfilter *imsf = (struct ip_msfilter *)mp1->b_rptr; 2677 if (imsf->imsf_numsrc >= 2) { 2678 offset = sizeof (struct ip_msfilter); 2679 copysize = IP_MSFILTER_SIZE(imsf->imsf_numsrc) - offset; 2680 } 2681 } 2682 if (copysize > 0) { 2683 mi_copyin_n(q, mp, offset, copysize); 2684 return (0); 2685 } 2686 return (1); 2687 } 2688 2689 /* 2690 * Handle the following optmgmt: 2691 * IP_ADD_MEMBERSHIP must not have joined already 2692 * MCAST_JOIN_GROUP must not have joined already 2693 * IP_BLOCK_SOURCE must have joined already 2694 * MCAST_BLOCK_SOURCE must have joined already 2695 * IP_JOIN_SOURCE_GROUP may have joined already 2696 * MCAST_JOIN_SOURCE_GROUP may have joined already 2697 * 2698 * fmode and src parameters may be used to determine which option is 2699 * being set, as follows (the IP_* and MCAST_* versions of each option 2700 * are functionally equivalent): 2701 * opt fmode src 2702 * IP_ADD_MEMBERSHIP MODE_IS_EXCLUDE INADDR_ANY 2703 * MCAST_JOIN_GROUP MODE_IS_EXCLUDE INADDR_ANY 2704 * IP_BLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 2705 * MCAST_BLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 2706 * IP_JOIN_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 2707 * MCAST_JOIN_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 2708 * 2709 * Changing the filter mode is not allowed; if a matching ilg already 2710 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 2711 * 2712 * Verifies that there is a source address of appropriate scope for 2713 * the group; if not, EADDRNOTAVAIL is returned. 2714 * 2715 * The interface to be used may be identified by an address or by an 2716 * index. A pointer to the index is passed; if it is NULL, use the 2717 * address, otherwise, use the index. 2718 */ 2719 int 2720 ip_opt_add_group(conn_t *connp, boolean_t checkonly, ipaddr_t group, 2721 ipaddr_t ifaddr, uint_t *ifindexp, mcast_record_t fmode, ipaddr_t src, 2722 mblk_t *first_mp) 2723 { 2724 ipif_t *ipif; 2725 ipsq_t *ipsq; 2726 int err = 0; 2727 ill_t *ill; 2728 2729 err = ip_opt_check(connp, group, src, ifaddr, ifindexp, first_mp, 2730 ip_restart_optmgmt, &ipif); 2731 if (err != 0) { 2732 if (err != EINPROGRESS) { 2733 ip1dbg(("ip_opt_add_group: no ipif for group 0x%x, " 2734 "ifaddr 0x%x, ifindex %d\n", ntohl(group), 2735 ntohl(ifaddr), (ifindexp == NULL) ? 0 : *ifindexp)); 2736 } 2737 return (err); 2738 } 2739 ASSERT(ipif != NULL); 2740 2741 ill = ipif->ipif_ill; 2742 /* Operation not supported on a virtual network interface */ 2743 if (IS_VNI(ill)) { 2744 ipif_refrele(ipif); 2745 return (EINVAL); 2746 } 2747 2748 if (checkonly) { 2749 /* 2750 * do not do operation, just pretend to - new T_CHECK 2751 * semantics. The error return case above if encountered 2752 * considered a good enough "check" here. 2753 */ 2754 ipif_refrele(ipif); 2755 return (0); 2756 } 2757 2758 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, ipsq, 2759 NEW_OP); 2760 2761 /* unspecified source addr => no source filtering */ 2762 err = ilg_add(connp, group, ipif, fmode, src); 2763 2764 IPSQ_EXIT(ipsq); 2765 2766 ipif_refrele(ipif); 2767 return (err); 2768 } 2769 2770 /* 2771 * Handle the following optmgmt: 2772 * IPV6_JOIN_GROUP must not have joined already 2773 * MCAST_JOIN_GROUP must not have joined already 2774 * MCAST_BLOCK_SOURCE must have joined already 2775 * MCAST_JOIN_SOURCE_GROUP may have joined already 2776 * 2777 * fmode and src parameters may be used to determine which option is 2778 * being set, as follows (IPV6_JOIN_GROUP and MCAST_JOIN_GROUP options 2779 * are functionally equivalent): 2780 * opt fmode v6src 2781 * IPV6_JOIN_GROUP MODE_IS_EXCLUDE unspecified 2782 * MCAST_JOIN_GROUP MODE_IS_EXCLUDE unspecified 2783 * MCAST_BLOCK_SOURCE MODE_IS_EXCLUDE v6 addr 2784 * MCAST_JOIN_SOURCE_GROUP MODE_IS_INCLUDE v6 addr 2785 * 2786 * Changing the filter mode is not allowed; if a matching ilg already 2787 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 2788 * 2789 * Verifies that there is a source address of appropriate scope for 2790 * the group; if not, EADDRNOTAVAIL is returned. 2791 * 2792 * Handles IPv4-mapped IPv6 multicast addresses by associating them 2793 * with the link-local ipif. Assumes that if v6group is v4-mapped, 2794 * v6src is also v4-mapped. 2795 */ 2796 int 2797 ip_opt_add_group_v6(conn_t *connp, boolean_t checkonly, 2798 const in6_addr_t *v6group, int ifindex, mcast_record_t fmode, 2799 const in6_addr_t *v6src, mblk_t *first_mp) 2800 { 2801 ill_t *ill; 2802 ipif_t *ipif; 2803 char buf[INET6_ADDRSTRLEN]; 2804 ipaddr_t v4group, v4src; 2805 boolean_t isv6; 2806 ipsq_t *ipsq; 2807 int err; 2808 2809 err = ip_opt_check_v6(connp, v6group, &v4group, v6src, &v4src, &isv6, 2810 ifindex, first_mp, ip_restart_optmgmt, &ill, &ipif); 2811 if (err != 0) { 2812 if (err != EINPROGRESS) { 2813 ip1dbg(("ip_opt_add_group_v6: no ill for group %s/" 2814 "index %d\n", inet_ntop(AF_INET6, v6group, buf, 2815 sizeof (buf)), ifindex)); 2816 } 2817 return (err); 2818 } 2819 ASSERT((!isv6 && ipif != NULL) || (isv6 && ill != NULL)); 2820 2821 /* operation is not supported on the virtual network interface */ 2822 if (isv6) { 2823 if (IS_VNI(ill)) { 2824 ill_refrele(ill); 2825 return (EINVAL); 2826 } 2827 } else { 2828 if (IS_VNI(ipif->ipif_ill)) { 2829 ipif_refrele(ipif); 2830 return (EINVAL); 2831 } 2832 } 2833 2834 if (checkonly) { 2835 /* 2836 * do not do operation, just pretend to - new T_CHECK 2837 * semantics. The error return case above if encountered 2838 * considered a good enough "check" here. 2839 */ 2840 if (isv6) 2841 ill_refrele(ill); 2842 else 2843 ipif_refrele(ipif); 2844 return (0); 2845 } 2846 2847 if (!isv6) { 2848 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, 2849 ipsq, NEW_OP); 2850 err = ilg_add(connp, v4group, ipif, fmode, v4src); 2851 IPSQ_EXIT(ipsq); 2852 ipif_refrele(ipif); 2853 } else { 2854 IPSQ_ENTER_ILL(ill, connp, first_mp, ip_restart_optmgmt, 2855 ipsq, NEW_OP); 2856 err = ilg_add_v6(connp, v6group, ill, fmode, v6src); 2857 IPSQ_EXIT(ipsq); 2858 ill_refrele(ill); 2859 } 2860 2861 return (err); 2862 } 2863 2864 static int 2865 ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group, ipif_t *ipif, 2866 mcast_record_t fmode, ipaddr_t src) 2867 { 2868 ilg_t *ilg; 2869 in6_addr_t v6src; 2870 boolean_t leaving = B_FALSE; 2871 2872 ASSERT(IAM_WRITER_IPIF(ipif)); 2873 2874 /* 2875 * The ilg is valid only while we hold the conn lock. Once we drop 2876 * the lock, another thread can locate another ilg on this connp, 2877 * but on a different ipif, and delete it, and cause the ilg array 2878 * to be reallocated and copied. Hence do the ilg_delete before 2879 * dropping the lock. 2880 */ 2881 mutex_enter(&connp->conn_lock); 2882 ilg = ilg_lookup_ipif(connp, group, ipif); 2883 if ((ilg == NULL) || (ilg->ilg_flags & ILG_DELETED)) { 2884 mutex_exit(&connp->conn_lock); 2885 return (EADDRNOTAVAIL); 2886 } 2887 2888 /* 2889 * Decide if we're actually deleting the ilg or just removing a 2890 * source filter address; if just removing an addr, make sure we 2891 * aren't trying to change the filter mode, and that the addr is 2892 * actually in our filter list already. If we're removing the 2893 * last src in an include list, just delete the ilg. 2894 */ 2895 if (src == INADDR_ANY) { 2896 v6src = ipv6_all_zeros; 2897 leaving = B_TRUE; 2898 } else { 2899 int err = 0; 2900 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 2901 if (fmode != ilg->ilg_fmode) 2902 err = EINVAL; 2903 else if (ilg->ilg_filter == NULL || 2904 !list_has_addr(ilg->ilg_filter, &v6src)) 2905 err = EADDRNOTAVAIL; 2906 if (err != 0) { 2907 mutex_exit(&connp->conn_lock); 2908 return (err); 2909 } 2910 if (fmode == MODE_IS_INCLUDE && 2911 ilg->ilg_filter->sl_numsrc == 1) { 2912 v6src = ipv6_all_zeros; 2913 leaving = B_TRUE; 2914 } 2915 } 2916 2917 ilg_delete(connp, ilg, &v6src); 2918 mutex_exit(&connp->conn_lock); 2919 2920 (void) ip_delmulti(group, ipif, B_FALSE, leaving); 2921 return (0); 2922 } 2923 2924 static int 2925 ip_opt_delete_group_excl_v6(conn_t *connp, const in6_addr_t *v6group, 2926 ill_t *ill, mcast_record_t fmode, const in6_addr_t *v6src) 2927 { 2928 ilg_t *ilg; 2929 ill_t *ilg_ill; 2930 uint_t ilg_orig_ifindex; 2931 boolean_t leaving = B_TRUE; 2932 2933 ASSERT(IAM_WRITER_ILL(ill)); 2934 2935 /* 2936 * Use the index that we originally used to join. We can't 2937 * use the ill directly because ilg_ill could point to 2938 * a new ill if things have moved. 2939 */ 2940 mutex_enter(&connp->conn_lock); 2941 ilg = ilg_lookup_ill_index_v6(connp, v6group, 2942 ill->ill_phyint->phyint_ifindex); 2943 if ((ilg == NULL) || (ilg->ilg_flags & ILG_DELETED)) { 2944 mutex_exit(&connp->conn_lock); 2945 return (EADDRNOTAVAIL); 2946 } 2947 2948 /* 2949 * Decide if we're actually deleting the ilg or just removing a 2950 * source filter address; if just removing an addr, make sure we 2951 * aren't trying to change the filter mode, and that the addr is 2952 * actually in our filter list already. If we're removing the 2953 * last src in an include list, just delete the ilg. 2954 */ 2955 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2956 int err = 0; 2957 if (fmode != ilg->ilg_fmode) 2958 err = EINVAL; 2959 else if (ilg->ilg_filter == NULL || 2960 !list_has_addr(ilg->ilg_filter, v6src)) 2961 err = EADDRNOTAVAIL; 2962 if (err != 0) { 2963 mutex_exit(&connp->conn_lock); 2964 return (err); 2965 } 2966 if (fmode == MODE_IS_INCLUDE && 2967 ilg->ilg_filter->sl_numsrc == 1) 2968 v6src = NULL; 2969 else 2970 leaving = B_FALSE; 2971 } 2972 2973 ilg_ill = ilg->ilg_ill; 2974 ilg_orig_ifindex = ilg->ilg_orig_ifindex; 2975 ilg_delete(connp, ilg, v6src); 2976 mutex_exit(&connp->conn_lock); 2977 (void) ip_delmulti_v6(v6group, ilg_ill, ilg_orig_ifindex, 2978 connp->conn_zoneid, B_FALSE, leaving); 2979 2980 return (0); 2981 } 2982 2983 /* 2984 * Handle the following optmgmt: 2985 * IP_DROP_MEMBERSHIP will leave 2986 * MCAST_LEAVE_GROUP will leave 2987 * IP_UNBLOCK_SOURCE will not leave 2988 * MCAST_UNBLOCK_SOURCE will not leave 2989 * IP_LEAVE_SOURCE_GROUP may leave (if leaving last source) 2990 * MCAST_LEAVE_SOURCE_GROUP may leave (if leaving last source) 2991 * 2992 * fmode and src parameters may be used to determine which option is 2993 * being set, as follows (the IP_* and MCAST_* versions of each option 2994 * are functionally equivalent): 2995 * opt fmode src 2996 * IP_DROP_MEMBERSHIP MODE_IS_INCLUDE INADDR_ANY 2997 * MCAST_LEAVE_GROUP MODE_IS_INCLUDE INADDR_ANY 2998 * IP_UNBLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 2999 * MCAST_UNBLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 3000 * IP_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 3001 * MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 3002 * 3003 * Changing the filter mode is not allowed; if a matching ilg already 3004 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 3005 * 3006 * The interface to be used may be identified by an address or by an 3007 * index. A pointer to the index is passed; if it is NULL, use the 3008 * address, otherwise, use the index. 3009 */ 3010 int 3011 ip_opt_delete_group(conn_t *connp, boolean_t checkonly, ipaddr_t group, 3012 ipaddr_t ifaddr, uint_t *ifindexp, mcast_record_t fmode, ipaddr_t src, 3013 mblk_t *first_mp) 3014 { 3015 ipif_t *ipif; 3016 ipsq_t *ipsq; 3017 int err; 3018 ill_t *ill; 3019 3020 err = ip_opt_check(connp, group, src, ifaddr, ifindexp, first_mp, 3021 ip_restart_optmgmt, &ipif); 3022 if (err != 0) { 3023 if (err != EINPROGRESS) { 3024 ip1dbg(("ip_opt_delete_group: no ipif for group " 3025 "0x%x, ifaddr 0x%x\n", 3026 (int)ntohl(group), (int)ntohl(ifaddr))); 3027 } 3028 return (err); 3029 } 3030 ASSERT(ipif != NULL); 3031 3032 ill = ipif->ipif_ill; 3033 /* Operation not supported on a virtual network interface */ 3034 if (IS_VNI(ill)) { 3035 ipif_refrele(ipif); 3036 return (EINVAL); 3037 } 3038 3039 if (checkonly) { 3040 /* 3041 * do not do operation, just pretend to - new T_CHECK 3042 * semantics. The error return case above if encountered 3043 * considered a good enough "check" here. 3044 */ 3045 ipif_refrele(ipif); 3046 return (0); 3047 } 3048 3049 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, ipsq, 3050 NEW_OP); 3051 err = ip_opt_delete_group_excl(connp, group, ipif, fmode, src); 3052 IPSQ_EXIT(ipsq); 3053 3054 ipif_refrele(ipif); 3055 return (err); 3056 } 3057 3058 /* 3059 * Handle the following optmgmt: 3060 * IPV6_LEAVE_GROUP will leave 3061 * MCAST_LEAVE_GROUP will leave 3062 * MCAST_UNBLOCK_SOURCE will not leave 3063 * MCAST_LEAVE_SOURCE_GROUP may leave (if leaving last source) 3064 * 3065 * fmode and src parameters may be used to determine which option is 3066 * being set, as follows (IPV6_LEAVE_GROUP and MCAST_LEAVE_GROUP options 3067 * are functionally equivalent): 3068 * opt fmode v6src 3069 * IPV6_LEAVE_GROUP MODE_IS_INCLUDE unspecified 3070 * MCAST_LEAVE_GROUP MODE_IS_INCLUDE unspecified 3071 * MCAST_UNBLOCK_SOURCE MODE_IS_EXCLUDE v6 addr 3072 * MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE v6 addr 3073 * 3074 * Changing the filter mode is not allowed; if a matching ilg already 3075 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 3076 * 3077 * Handles IPv4-mapped IPv6 multicast addresses by associating them 3078 * with the link-local ipif. Assumes that if v6group is v4-mapped, 3079 * v6src is also v4-mapped. 3080 */ 3081 int 3082 ip_opt_delete_group_v6(conn_t *connp, boolean_t checkonly, 3083 const in6_addr_t *v6group, int ifindex, mcast_record_t fmode, 3084 const in6_addr_t *v6src, mblk_t *first_mp) 3085 { 3086 ill_t *ill; 3087 ipif_t *ipif; 3088 char buf[INET6_ADDRSTRLEN]; 3089 ipaddr_t v4group, v4src; 3090 boolean_t isv6; 3091 ipsq_t *ipsq; 3092 int err; 3093 3094 err = ip_opt_check_v6(connp, v6group, &v4group, v6src, &v4src, &isv6, 3095 ifindex, first_mp, ip_restart_optmgmt, &ill, &ipif); 3096 if (err != 0) { 3097 if (err != EINPROGRESS) { 3098 ip1dbg(("ip_opt_delete_group_v6: no ill for group %s/" 3099 "index %d\n", inet_ntop(AF_INET6, v6group, buf, 3100 sizeof (buf)), ifindex)); 3101 } 3102 return (err); 3103 } 3104 ASSERT((isv6 && ill != NULL) || (!isv6 && ipif != NULL)); 3105 3106 /* operation is not supported on the virtual network interface */ 3107 if (isv6) { 3108 if (IS_VNI(ill)) { 3109 ill_refrele(ill); 3110 return (EINVAL); 3111 } 3112 } else { 3113 if (IS_VNI(ipif->ipif_ill)) { 3114 ipif_refrele(ipif); 3115 return (EINVAL); 3116 } 3117 } 3118 3119 if (checkonly) { 3120 /* 3121 * do not do operation, just pretend to - new T_CHECK 3122 * semantics. The error return case above if encountered 3123 * considered a good enough "check" here. 3124 */ 3125 if (isv6) 3126 ill_refrele(ill); 3127 else 3128 ipif_refrele(ipif); 3129 return (0); 3130 } 3131 3132 if (!isv6) { 3133 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, 3134 ipsq, NEW_OP); 3135 err = ip_opt_delete_group_excl(connp, v4group, ipif, fmode, 3136 v4src); 3137 IPSQ_EXIT(ipsq); 3138 ipif_refrele(ipif); 3139 } else { 3140 IPSQ_ENTER_ILL(ill, connp, first_mp, ip_restart_optmgmt, 3141 ipsq, NEW_OP); 3142 err = ip_opt_delete_group_excl_v6(connp, v6group, ill, fmode, 3143 v6src); 3144 IPSQ_EXIT(ipsq); 3145 ill_refrele(ill); 3146 } 3147 3148 return (err); 3149 } 3150 3151 /* 3152 * Group mgmt for upper conn that passes things down 3153 * to the interface multicast list (and DLPI) 3154 * These routines can handle new style options that specify an interface name 3155 * as opposed to an interface address (needed for general handling of 3156 * unnumbered interfaces.) 3157 */ 3158 3159 /* 3160 * Add a group to an upper conn group data structure and pass things down 3161 * to the interface multicast list (and DLPI) 3162 */ 3163 static int 3164 ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif, mcast_record_t fmode, 3165 ipaddr_t src) 3166 { 3167 int error = 0; 3168 ill_t *ill; 3169 ilg_t *ilg; 3170 ilg_stat_t ilgstat; 3171 slist_t *new_filter = NULL; 3172 int new_fmode; 3173 3174 ASSERT(IAM_WRITER_IPIF(ipif)); 3175 3176 ill = ipif->ipif_ill; 3177 3178 if (!(ill->ill_flags & ILLF_MULTICAST)) 3179 return (EADDRNOTAVAIL); 3180 3181 /* 3182 * conn_ilg[] is protected by conn_lock. Need to hold the conn_lock 3183 * to walk the conn_ilg[] list in ilg_lookup_ipif(); also needed to 3184 * serialize 2 threads doing join (sock, group1, hme0:0) and 3185 * (sock, group2, hme1:0) where hme0 and hme1 map to different ipsqs, 3186 * but both operations happen on the same conn. 3187 */ 3188 mutex_enter(&connp->conn_lock); 3189 ilg = ilg_lookup_ipif(connp, group, ipif); 3190 3191 /* 3192 * Depending on the option we're handling, may or may not be okay 3193 * if group has already been added. Figure out our rules based 3194 * on fmode and src params. Also make sure there's enough room 3195 * in the filter if we're adding a source to an existing filter. 3196 */ 3197 if (src == INADDR_ANY) { 3198 /* we're joining for all sources, must not have joined */ 3199 if (ilg != NULL) 3200 error = EADDRINUSE; 3201 } else { 3202 if (fmode == MODE_IS_EXCLUDE) { 3203 /* (excl {addr}) => block source, must have joined */ 3204 if (ilg == NULL) 3205 error = EADDRNOTAVAIL; 3206 } 3207 /* (incl {addr}) => join source, may have joined */ 3208 3209 if (ilg != NULL && 3210 SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE) 3211 error = ENOBUFS; 3212 } 3213 if (error != 0) { 3214 mutex_exit(&connp->conn_lock); 3215 return (error); 3216 } 3217 3218 ASSERT(!(ipif->ipif_state_flags & IPIF_CONDEMNED)); 3219 3220 /* 3221 * Alloc buffer to copy new state into (see below) before 3222 * we make any changes, so we can bail if it fails. 3223 */ 3224 if ((new_filter = l_alloc()) == NULL) { 3225 mutex_exit(&connp->conn_lock); 3226 return (ENOMEM); 3227 } 3228 3229 if (ilg == NULL) { 3230 ilgstat = ILGSTAT_NEW; 3231 if ((ilg = conn_ilg_alloc(connp)) == NULL) { 3232 mutex_exit(&connp->conn_lock); 3233 l_free(new_filter); 3234 return (ENOMEM); 3235 } 3236 if (src != INADDR_ANY) { 3237 ilg->ilg_filter = l_alloc(); 3238 if (ilg->ilg_filter == NULL) { 3239 ilg_delete(connp, ilg, NULL); 3240 mutex_exit(&connp->conn_lock); 3241 l_free(new_filter); 3242 return (ENOMEM); 3243 } 3244 ilg->ilg_filter->sl_numsrc = 1; 3245 IN6_IPADDR_TO_V4MAPPED(src, 3246 &ilg->ilg_filter->sl_addr[0]); 3247 } 3248 if (group == INADDR_ANY) { 3249 ilg->ilg_v6group = ipv6_all_zeros; 3250 } else { 3251 IN6_IPADDR_TO_V4MAPPED(group, &ilg->ilg_v6group); 3252 } 3253 ilg->ilg_ipif = ipif; 3254 ilg->ilg_ill = NULL; 3255 ilg->ilg_orig_ifindex = 0; 3256 ilg->ilg_fmode = fmode; 3257 } else { 3258 int index; 3259 in6_addr_t v6src; 3260 ilgstat = ILGSTAT_CHANGE; 3261 if (ilg->ilg_fmode != fmode || src == INADDR_ANY) { 3262 mutex_exit(&connp->conn_lock); 3263 l_free(new_filter); 3264 return (EINVAL); 3265 } 3266 if (ilg->ilg_filter == NULL) { 3267 ilg->ilg_filter = l_alloc(); 3268 if (ilg->ilg_filter == NULL) { 3269 mutex_exit(&connp->conn_lock); 3270 l_free(new_filter); 3271 return (ENOMEM); 3272 } 3273 } 3274 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 3275 if (list_has_addr(ilg->ilg_filter, &v6src)) { 3276 mutex_exit(&connp->conn_lock); 3277 l_free(new_filter); 3278 return (EADDRNOTAVAIL); 3279 } 3280 index = ilg->ilg_filter->sl_numsrc++; 3281 ilg->ilg_filter->sl_addr[index] = v6src; 3282 } 3283 3284 /* 3285 * Save copy of ilg's filter state to pass to other functions, 3286 * so we can release conn_lock now. 3287 */ 3288 new_fmode = ilg->ilg_fmode; 3289 l_copy(ilg->ilg_filter, new_filter); 3290 3291 mutex_exit(&connp->conn_lock); 3292 3293 error = ip_addmulti(group, ipif, ilgstat, new_fmode, new_filter); 3294 if (error != 0) { 3295 /* 3296 * Need to undo what we did before calling ip_addmulti()! 3297 * Must look up the ilg again since we've not been holding 3298 * conn_lock. 3299 */ 3300 in6_addr_t v6src; 3301 if (ilgstat == ILGSTAT_NEW) 3302 v6src = ipv6_all_zeros; 3303 else 3304 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 3305 mutex_enter(&connp->conn_lock); 3306 ilg = ilg_lookup_ipif(connp, group, ipif); 3307 ASSERT(ilg != NULL); 3308 ilg_delete(connp, ilg, &v6src); 3309 mutex_exit(&connp->conn_lock); 3310 l_free(new_filter); 3311 return (error); 3312 } 3313 3314 l_free(new_filter); 3315 return (0); 3316 } 3317 3318 static int 3319 ilg_add_v6(conn_t *connp, const in6_addr_t *v6group, ill_t *ill, 3320 mcast_record_t fmode, const in6_addr_t *v6src) 3321 { 3322 int error = 0; 3323 int orig_ifindex; 3324 ilg_t *ilg; 3325 ilg_stat_t ilgstat; 3326 slist_t *new_filter = NULL; 3327 int new_fmode; 3328 3329 ASSERT(IAM_WRITER_ILL(ill)); 3330 3331 if (!(ill->ill_flags & ILLF_MULTICAST)) 3332 return (EADDRNOTAVAIL); 3333 3334 /* 3335 * conn_lock protects the ilg list. Serializes 2 threads doing 3336 * join (sock, group1, hme0) and (sock, group2, hme1) where hme0 3337 * and hme1 map to different ipsq's, but both operations happen 3338 * on the same conn. 3339 */ 3340 mutex_enter(&connp->conn_lock); 3341 3342 /* 3343 * Use the ifindex to do the lookup. We can't use the ill 3344 * directly because ilg_ill could point to a different ill if 3345 * things have moved. 3346 */ 3347 orig_ifindex = ill->ill_phyint->phyint_ifindex; 3348 ilg = ilg_lookup_ill_index_v6(connp, v6group, orig_ifindex); 3349 3350 /* 3351 * Depending on the option we're handling, may or may not be okay 3352 * if group has already been added. Figure out our rules based 3353 * on fmode and src params. Also make sure there's enough room 3354 * in the filter if we're adding a source to an existing filter. 3355 */ 3356 if (IN6_IS_ADDR_UNSPECIFIED(v6src)) { 3357 /* we're joining for all sources, must not have joined */ 3358 if (ilg != NULL) 3359 error = EADDRINUSE; 3360 } else { 3361 if (fmode == MODE_IS_EXCLUDE) { 3362 /* (excl {addr}) => block source, must have joined */ 3363 if (ilg == NULL) 3364 error = EADDRNOTAVAIL; 3365 } 3366 /* (incl {addr}) => join source, may have joined */ 3367 3368 if (ilg != NULL && 3369 SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE) 3370 error = ENOBUFS; 3371 } 3372 if (error != 0) { 3373 mutex_exit(&connp->conn_lock); 3374 return (error); 3375 } 3376 3377 /* 3378 * Alloc buffer to copy new state into (see below) before 3379 * we make any changes, so we can bail if it fails. 3380 */ 3381 if ((new_filter = l_alloc()) == NULL) { 3382 mutex_exit(&connp->conn_lock); 3383 return (ENOMEM); 3384 } 3385 3386 if (ilg == NULL) { 3387 if ((ilg = conn_ilg_alloc(connp)) == NULL) { 3388 mutex_exit(&connp->conn_lock); 3389 l_free(new_filter); 3390 return (ENOMEM); 3391 } 3392 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 3393 ilg->ilg_filter = l_alloc(); 3394 if (ilg->ilg_filter == NULL) { 3395 ilg_delete(connp, ilg, NULL); 3396 mutex_exit(&connp->conn_lock); 3397 l_free(new_filter); 3398 return (ENOMEM); 3399 } 3400 ilg->ilg_filter->sl_numsrc = 1; 3401 ilg->ilg_filter->sl_addr[0] = *v6src; 3402 } 3403 ilgstat = ILGSTAT_NEW; 3404 ilg->ilg_v6group = *v6group; 3405 ilg->ilg_fmode = fmode; 3406 ilg->ilg_ipif = NULL; 3407 /* 3408 * Choose our target ill to join on. This might be different 3409 * from the ill we've been given if it's currently down and 3410 * part of a group. 3411 * 3412 * new ill is not refheld; we are writer. 3413 */ 3414 ill = ip_choose_multi_ill(ill, v6group); 3415 ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED)); 3416 ilg->ilg_ill = ill; 3417 /* 3418 * Remember the orig_ifindex that we joined on, so that we 3419 * can successfully delete them later on and also search 3420 * for duplicates if the application wants to join again. 3421 */ 3422 ilg->ilg_orig_ifindex = orig_ifindex; 3423 } else { 3424 int index; 3425 if (ilg->ilg_fmode != fmode || IN6_IS_ADDR_UNSPECIFIED(v6src)) { 3426 mutex_exit(&connp->conn_lock); 3427 l_free(new_filter); 3428 return (EINVAL); 3429 } 3430 if (ilg->ilg_filter == NULL) { 3431 ilg->ilg_filter = l_alloc(); 3432 if (ilg->ilg_filter == NULL) { 3433 mutex_exit(&connp->conn_lock); 3434 l_free(new_filter); 3435 return (ENOMEM); 3436 } 3437 } 3438 if (list_has_addr(ilg->ilg_filter, v6src)) { 3439 mutex_exit(&connp->conn_lock); 3440 l_free(new_filter); 3441 return (EADDRNOTAVAIL); 3442 } 3443 ilgstat = ILGSTAT_CHANGE; 3444 index = ilg->ilg_filter->sl_numsrc++; 3445 ilg->ilg_filter->sl_addr[index] = *v6src; 3446 /* 3447 * The current ill might be different from the one we were 3448 * asked to join on (if failover has occurred); we should 3449 * join on the ill stored in the ilg. The original ill 3450 * is noted in ilg_orig_ifindex, which matched our request. 3451 */ 3452 ill = ilg->ilg_ill; 3453 } 3454 3455 /* 3456 * Save copy of ilg's filter state to pass to other functions, 3457 * so we can release conn_lock now. 3458 */ 3459 new_fmode = ilg->ilg_fmode; 3460 l_copy(ilg->ilg_filter, new_filter); 3461 3462 mutex_exit(&connp->conn_lock); 3463 3464 /* 3465 * Now update the ill. We wait to do this until after the ilg 3466 * has been updated because we need to update the src filter 3467 * info for the ill, which involves looking at the status of 3468 * all the ilgs associated with this group/interface pair. 3469 */ 3470 error = ip_addmulti_v6(v6group, ill, orig_ifindex, connp->conn_zoneid, 3471 ilgstat, new_fmode, new_filter); 3472 if (error != 0) { 3473 /* 3474 * But because we waited, we have to undo the ilg update 3475 * if ip_addmulti_v6() fails. We also must lookup ilg 3476 * again, since we've not been holding conn_lock. 3477 */ 3478 in6_addr_t delsrc = 3479 (ilgstat == ILGSTAT_NEW) ? ipv6_all_zeros : *v6src; 3480 mutex_enter(&connp->conn_lock); 3481 ilg = ilg_lookup_ill_index_v6(connp, v6group, orig_ifindex); 3482 ASSERT(ilg != NULL); 3483 ilg_delete(connp, ilg, &delsrc); 3484 mutex_exit(&connp->conn_lock); 3485 l_free(new_filter); 3486 return (error); 3487 } 3488 3489 l_free(new_filter); 3490 3491 return (0); 3492 } 3493 3494 /* 3495 * Find an IPv4 ilg matching group, ill and source 3496 */ 3497 ilg_t * 3498 ilg_lookup_ill_withsrc(conn_t *connp, ipaddr_t group, ipaddr_t src, ill_t *ill) 3499 { 3500 in6_addr_t v6group, v6src; 3501 int i; 3502 boolean_t isinlist; 3503 ilg_t *ilg; 3504 ipif_t *ipif; 3505 ill_t *ilg_ill; 3506 3507 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3508 3509 /* 3510 * INADDR_ANY is represented as the IPv6 unspecified addr. 3511 */ 3512 if (group == INADDR_ANY) 3513 v6group = ipv6_all_zeros; 3514 else 3515 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 3516 3517 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3518 /* ilg_ipif is NULL for v6; skip them */ 3519 ilg = &connp->conn_ilg[i]; 3520 if ((ipif = ilg->ilg_ipif) == NULL) 3521 continue; 3522 ASSERT(ilg->ilg_ill == NULL); 3523 ilg_ill = ipif->ipif_ill; 3524 ASSERT(!ilg_ill->ill_isv6); 3525 if (ilg_ill == ill && 3526 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, &v6group)) { 3527 if (SLIST_IS_EMPTY(ilg->ilg_filter)) { 3528 /* no source filter, so this is a match */ 3529 return (ilg); 3530 } 3531 break; 3532 } 3533 } 3534 if (i == connp->conn_ilg_inuse) 3535 return (NULL); 3536 3537 /* 3538 * we have an ilg with matching ill and group; but 3539 * the ilg has a source list that we must check. 3540 */ 3541 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 3542 isinlist = B_FALSE; 3543 for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) { 3544 if (IN6_ARE_ADDR_EQUAL(&v6src, &ilg->ilg_filter->sl_addr[i])) { 3545 isinlist = B_TRUE; 3546 break; 3547 } 3548 } 3549 3550 if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) || 3551 (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE)) 3552 return (ilg); 3553 3554 return (NULL); 3555 } 3556 3557 /* 3558 * Find an IPv6 ilg matching group, ill, and source 3559 */ 3560 ilg_t * 3561 ilg_lookup_ill_withsrc_v6(conn_t *connp, const in6_addr_t *v6group, 3562 const in6_addr_t *v6src, ill_t *ill) 3563 { 3564 int i; 3565 boolean_t isinlist; 3566 ilg_t *ilg; 3567 ill_t *ilg_ill; 3568 3569 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3570 3571 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3572 ilg = &connp->conn_ilg[i]; 3573 if ((ilg_ill = ilg->ilg_ill) == NULL) 3574 continue; 3575 ASSERT(ilg->ilg_ipif == NULL); 3576 ASSERT(ilg_ill->ill_isv6); 3577 if (ilg_ill == ill && 3578 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) { 3579 if (SLIST_IS_EMPTY(ilg->ilg_filter)) { 3580 /* no source filter, so this is a match */ 3581 return (ilg); 3582 } 3583 break; 3584 } 3585 } 3586 if (i == connp->conn_ilg_inuse) 3587 return (NULL); 3588 3589 /* 3590 * we have an ilg with matching ill and group; but 3591 * the ilg has a source list that we must check. 3592 */ 3593 isinlist = B_FALSE; 3594 for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) { 3595 if (IN6_ARE_ADDR_EQUAL(v6src, &ilg->ilg_filter->sl_addr[i])) { 3596 isinlist = B_TRUE; 3597 break; 3598 } 3599 } 3600 3601 if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) || 3602 (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE)) 3603 return (ilg); 3604 3605 return (NULL); 3606 } 3607 3608 /* 3609 * Get the ilg whose ilg_orig_ifindex is associated with ifindex. 3610 * This is useful when the interface fails and we have moved 3611 * to a new ill, but still would like to locate using the index 3612 * that we originally used to join. Used only for IPv6 currently. 3613 */ 3614 static ilg_t * 3615 ilg_lookup_ill_index_v6(conn_t *connp, const in6_addr_t *v6group, int ifindex) 3616 { 3617 ilg_t *ilg; 3618 int i; 3619 3620 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3621 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3622 ilg = &connp->conn_ilg[i]; 3623 /* ilg_ill is NULL for V4. Skip them */ 3624 if (ilg->ilg_ill == NULL) 3625 continue; 3626 /* ilg_ipif is NULL for V6 */ 3627 ASSERT(ilg->ilg_ipif == NULL); 3628 ASSERT(ilg->ilg_orig_ifindex != 0); 3629 if (IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group) && 3630 ilg->ilg_orig_ifindex == ifindex) { 3631 return (ilg); 3632 } 3633 } 3634 return (NULL); 3635 } 3636 3637 /* 3638 * Find an IPv6 ilg matching group and ill 3639 */ 3640 ilg_t * 3641 ilg_lookup_ill_v6(conn_t *connp, const in6_addr_t *v6group, ill_t *ill) 3642 { 3643 ilg_t *ilg; 3644 int i; 3645 ill_t *mem_ill; 3646 3647 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3648 3649 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3650 ilg = &connp->conn_ilg[i]; 3651 if ((mem_ill = ilg->ilg_ill) == NULL) 3652 continue; 3653 ASSERT(ilg->ilg_ipif == NULL); 3654 ASSERT(mem_ill->ill_isv6); 3655 if (mem_ill == ill && 3656 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) 3657 return (ilg); 3658 } 3659 return (NULL); 3660 } 3661 3662 /* 3663 * Find an IPv4 ilg matching group and ipif 3664 */ 3665 static ilg_t * 3666 ilg_lookup_ipif(conn_t *connp, ipaddr_t group, ipif_t *ipif) 3667 { 3668 in6_addr_t v6group; 3669 int i; 3670 3671 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3672 ASSERT(!ipif->ipif_ill->ill_isv6); 3673 3674 if (group == INADDR_ANY) 3675 v6group = ipv6_all_zeros; 3676 else 3677 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 3678 3679 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3680 if (IN6_ARE_ADDR_EQUAL(&connp->conn_ilg[i].ilg_v6group, 3681 &v6group) && 3682 connp->conn_ilg[i].ilg_ipif == ipif) 3683 return (&connp->conn_ilg[i]); 3684 } 3685 return (NULL); 3686 } 3687 3688 /* 3689 * If a source address is passed in (src != NULL and src is not 3690 * unspecified), remove the specified src addr from the given ilg's 3691 * filter list, else delete the ilg. 3692 */ 3693 static void 3694 ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src) 3695 { 3696 int i; 3697 3698 ASSERT((ilg->ilg_ipif != NULL) ^ (ilg->ilg_ill != NULL)); 3699 ASSERT(ilg->ilg_ipif == NULL || IAM_WRITER_IPIF(ilg->ilg_ipif)); 3700 ASSERT(ilg->ilg_ill == NULL || IAM_WRITER_ILL(ilg->ilg_ill)); 3701 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3702 ASSERT(!(ilg->ilg_flags & ILG_DELETED)); 3703 3704 if (src == NULL || IN6_IS_ADDR_UNSPECIFIED(src)) { 3705 if (connp->conn_ilg_walker_cnt != 0) { 3706 ilg->ilg_flags |= ILG_DELETED; 3707 return; 3708 } 3709 3710 FREE_SLIST(ilg->ilg_filter); 3711 3712 i = ilg - &connp->conn_ilg[0]; 3713 ASSERT(i >= 0 && i < connp->conn_ilg_inuse); 3714 3715 /* Move other entries up one step */ 3716 connp->conn_ilg_inuse--; 3717 for (; i < connp->conn_ilg_inuse; i++) 3718 connp->conn_ilg[i] = connp->conn_ilg[i+1]; 3719 3720 if (connp->conn_ilg_inuse == 0) { 3721 mi_free((char *)connp->conn_ilg); 3722 connp->conn_ilg = NULL; 3723 cv_broadcast(&connp->conn_refcv); 3724 } 3725 } else { 3726 l_remove(ilg->ilg_filter, src); 3727 } 3728 } 3729 3730 /* 3731 * Called from conn close. No new ilg can be added or removed. 3732 * because CONN_CLOSING has been set by ip_close. ilg_add / ilg_delete 3733 * will return error if conn has started closing. 3734 */ 3735 void 3736 ilg_delete_all(conn_t *connp) 3737 { 3738 int i; 3739 ipif_t *ipif = NULL; 3740 ill_t *ill = NULL; 3741 ilg_t *ilg; 3742 in6_addr_t v6group; 3743 boolean_t success; 3744 ipsq_t *ipsq; 3745 int orig_ifindex; 3746 3747 mutex_enter(&connp->conn_lock); 3748 retry: 3749 ILG_WALKER_HOLD(connp); 3750 for (i = connp->conn_ilg_inuse - 1; i >= 0; ) { 3751 ilg = &connp->conn_ilg[i]; 3752 /* 3753 * Since this walk is not atomic (we drop the 3754 * conn_lock and wait in ipsq_enter) we need 3755 * to check for the ILG_DELETED flag. 3756 */ 3757 if (ilg->ilg_flags & ILG_DELETED) { 3758 /* Go to the next ilg */ 3759 i--; 3760 continue; 3761 } 3762 v6group = ilg->ilg_v6group; 3763 3764 if (IN6_IS_ADDR_V4MAPPED(&v6group)) { 3765 ipif = ilg->ilg_ipif; 3766 ill = ipif->ipif_ill; 3767 } else { 3768 ipif = NULL; 3769 ill = ilg->ilg_ill; 3770 } 3771 /* 3772 * We may not be able to refhold the ill if the ill/ipif 3773 * is changing. But we need to make sure that the ill will 3774 * not vanish. So we just bump up the ill_waiter count. 3775 * If we are unable to do even that, then the ill is closing, 3776 * in which case the unplumb thread will handle the cleanup, 3777 * and we move on to the next ilg. 3778 */ 3779 if (!ill_waiter_inc(ill)) { 3780 /* Go to the next ilg */ 3781 i--; 3782 continue; 3783 } 3784 mutex_exit(&connp->conn_lock); 3785 /* 3786 * To prevent deadlock between ill close which waits inside 3787 * the perimeter, and conn close, ipsq_enter returns error, 3788 * the moment ILL_CONDEMNED is set, in which case ill close 3789 * takes responsibility to cleanup the ilgs. Note that we 3790 * have not yet set condemned flag, otherwise the conn can't 3791 * be refheld for cleanup by those routines and it would be 3792 * a mutual deadlock. 3793 */ 3794 success = ipsq_enter(ill, B_FALSE); 3795 ipsq = ill->ill_phyint->phyint_ipsq; 3796 ill_waiter_dcr(ill); 3797 mutex_enter(&connp->conn_lock); 3798 if (!success) { 3799 /* Go to the next ilg */ 3800 i--; 3801 continue; 3802 } 3803 3804 /* 3805 * Make sure that nothing has changed under. For eg. 3806 * a failover/failback can change ilg_ill while we were 3807 * waiting to become exclusive above 3808 */ 3809 if (IN6_IS_ADDR_V4MAPPED(&v6group)) { 3810 ipif = ilg->ilg_ipif; 3811 ill = ipif->ipif_ill; 3812 } else { 3813 ipif = NULL; 3814 ill = ilg->ilg_ill; 3815 } 3816 if (!IAM_WRITER_ILL(ill) || (ilg->ilg_flags & ILG_DELETED)) { 3817 /* 3818 * The ilg has changed under us probably due 3819 * to a failover or unplumb. Retry on the same ilg. 3820 */ 3821 mutex_exit(&connp->conn_lock); 3822 ipsq_exit(ipsq, B_TRUE, B_TRUE); 3823 mutex_enter(&connp->conn_lock); 3824 continue; 3825 } 3826 v6group = ilg->ilg_v6group; 3827 orig_ifindex = ilg->ilg_orig_ifindex; 3828 ilg_delete(connp, ilg, NULL); 3829 mutex_exit(&connp->conn_lock); 3830 3831 if (ipif != NULL) 3832 (void) ip_delmulti(V4_PART_OF_V6(v6group), ipif, 3833 B_FALSE, B_TRUE); 3834 3835 else 3836 (void) ip_delmulti_v6(&v6group, ill, orig_ifindex, 3837 connp->conn_zoneid, B_FALSE, B_TRUE); 3838 3839 ipsq_exit(ipsq, B_TRUE, B_TRUE); 3840 mutex_enter(&connp->conn_lock); 3841 /* Go to the next ilg */ 3842 i--; 3843 } 3844 ILG_WALKER_RELE(connp); 3845 3846 /* If any ill was skipped above wait and retry */ 3847 if (connp->conn_ilg_inuse != 0) { 3848 cv_wait(&connp->conn_refcv, &connp->conn_lock); 3849 goto retry; 3850 } 3851 mutex_exit(&connp->conn_lock); 3852 } 3853 3854 /* 3855 * Called from ill close by ipcl_walk for clearing conn_ilg and 3856 * conn_multicast_ipif for a given ipif. conn is held by caller. 3857 * Note that ipcl_walk only walks conns that are not yet condemned. 3858 * condemned conns can't be refheld. For this reason, conn must become clean 3859 * first, i.e. it must not refer to any ill/ire/ipif and then only set 3860 * condemned flag. 3861 */ 3862 static void 3863 conn_delete_ipif(conn_t *connp, caddr_t arg) 3864 { 3865 ipif_t *ipif = (ipif_t *)arg; 3866 int i; 3867 char group_buf1[INET6_ADDRSTRLEN]; 3868 char group_buf2[INET6_ADDRSTRLEN]; 3869 ipaddr_t group; 3870 ilg_t *ilg; 3871 3872 /* 3873 * Even though conn_ilg_inuse can change while we are in this loop, 3874 * i.e.ilgs can be created or deleted on this connp, no new ilgs can 3875 * be created or deleted for this connp, on this ill, since this ill 3876 * is the perimeter. So we won't miss any ilg in this cleanup. 3877 */ 3878 mutex_enter(&connp->conn_lock); 3879 3880 /* 3881 * Increment the walker count, so that ilg repacking does not 3882 * occur while we are in the loop. 3883 */ 3884 ILG_WALKER_HOLD(connp); 3885 for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) { 3886 ilg = &connp->conn_ilg[i]; 3887 if (ilg->ilg_ipif != ipif || (ilg->ilg_flags & ILG_DELETED)) 3888 continue; 3889 /* 3890 * ip_close cannot be cleaning this ilg at the same time. 3891 * since it also has to execute in this ill's perimeter which 3892 * we are now holding. Only a clean conn can be condemned. 3893 */ 3894 ASSERT(!(connp->conn_state_flags & CONN_CONDEMNED)); 3895 3896 /* Blow away the membership */ 3897 ip1dbg(("conn_delete_ilg_ipif: %s on %s (%s)\n", 3898 inet_ntop(AF_INET6, &connp->conn_ilg[i].ilg_v6group, 3899 group_buf1, sizeof (group_buf1)), 3900 inet_ntop(AF_INET6, &ipif->ipif_v6lcl_addr, 3901 group_buf2, sizeof (group_buf2)), 3902 ipif->ipif_ill->ill_name)); 3903 3904 /* ilg_ipif is NULL for V6, so we won't be here */ 3905 ASSERT(IN6_IS_ADDR_V4MAPPED(&ilg->ilg_v6group)); 3906 3907 group = V4_PART_OF_V6(ilg->ilg_v6group); 3908 ilg_delete(connp, &connp->conn_ilg[i], NULL); 3909 mutex_exit(&connp->conn_lock); 3910 3911 (void) ip_delmulti(group, ipif, B_FALSE, B_TRUE); 3912 mutex_enter(&connp->conn_lock); 3913 } 3914 3915 /* 3916 * If we are the last walker, need to physically delete the 3917 * ilgs and repack. 3918 */ 3919 ILG_WALKER_RELE(connp); 3920 3921 if (connp->conn_multicast_ipif == ipif) { 3922 /* Revert to late binding */ 3923 connp->conn_multicast_ipif = NULL; 3924 } 3925 mutex_exit(&connp->conn_lock); 3926 3927 conn_delete_ire(connp, (caddr_t)ipif); 3928 } 3929 3930 /* 3931 * Called from ill close by ipcl_walk for clearing conn_ilg and 3932 * conn_multicast_ill for a given ill. conn is held by caller. 3933 * Note that ipcl_walk only walks conns that are not yet condemned. 3934 * condemned conns can't be refheld. For this reason, conn must become clean 3935 * first, i.e. it must not refer to any ill/ire/ipif and then only set 3936 * condemned flag. 3937 */ 3938 static void 3939 conn_delete_ill(conn_t *connp, caddr_t arg) 3940 { 3941 ill_t *ill = (ill_t *)arg; 3942 int i; 3943 char group_buf[INET6_ADDRSTRLEN]; 3944 in6_addr_t v6group; 3945 int orig_ifindex; 3946 ilg_t *ilg; 3947 3948 /* 3949 * Even though conn_ilg_inuse can change while we are in this loop, 3950 * no new ilgs can be created/deleted for this connp, on this 3951 * ill, since this ill is the perimeter. So we won't miss any ilg 3952 * in this cleanup. 3953 */ 3954 mutex_enter(&connp->conn_lock); 3955 3956 /* 3957 * Increment the walker count, so that ilg repacking does not 3958 * occur while we are in the loop. 3959 */ 3960 ILG_WALKER_HOLD(connp); 3961 for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) { 3962 ilg = &connp->conn_ilg[i]; 3963 if ((ilg->ilg_ill == ill) && !(ilg->ilg_flags & ILG_DELETED)) { 3964 /* 3965 * ip_close cannot be cleaning this ilg at the same 3966 * time, since it also has to execute in this ill's 3967 * perimeter which we are now holding. Only a clean 3968 * conn can be condemned. 3969 */ 3970 ASSERT(!(connp->conn_state_flags & CONN_CONDEMNED)); 3971 3972 /* Blow away the membership */ 3973 ip1dbg(("conn_delete_ilg_ill: %s on %s\n", 3974 inet_ntop(AF_INET6, &ilg->ilg_v6group, 3975 group_buf, sizeof (group_buf)), 3976 ill->ill_name)); 3977 3978 v6group = ilg->ilg_v6group; 3979 orig_ifindex = ilg->ilg_orig_ifindex; 3980 ilg_delete(connp, ilg, NULL); 3981 mutex_exit(&connp->conn_lock); 3982 3983 (void) ip_delmulti_v6(&v6group, ill, orig_ifindex, 3984 connp->conn_zoneid, B_FALSE, B_TRUE); 3985 mutex_enter(&connp->conn_lock); 3986 } 3987 } 3988 /* 3989 * If we are the last walker, need to physically delete the 3990 * ilgs and repack. 3991 */ 3992 ILG_WALKER_RELE(connp); 3993 3994 if (connp->conn_multicast_ill == ill) { 3995 /* Revert to late binding */ 3996 connp->conn_multicast_ill = NULL; 3997 connp->conn_orig_multicast_ifindex = 0; 3998 } 3999 mutex_exit(&connp->conn_lock); 4000 } 4001 4002 /* 4003 * Called when an ipif is unplumbed to make sure that there are no 4004 * dangling conn references to that ipif. 4005 * Handles ilg_ipif and conn_multicast_ipif 4006 */ 4007 void 4008 reset_conn_ipif(ipif) 4009 ipif_t *ipif; 4010 { 4011 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 4012 4013 ipcl_walk(conn_delete_ipif, (caddr_t)ipif, ipst); 4014 } 4015 4016 /* 4017 * Called when an ill is unplumbed to make sure that there are no 4018 * dangling conn references to that ill. 4019 * Handles ilg_ill, conn_multicast_ill. 4020 */ 4021 void 4022 reset_conn_ill(ill_t *ill) 4023 { 4024 ip_stack_t *ipst = ill->ill_ipst; 4025 4026 ipcl_walk(conn_delete_ill, (caddr_t)ill, ipst); 4027 } 4028 4029 #ifdef DEBUG 4030 /* 4031 * Walk functions walk all the interfaces in the system to make 4032 * sure that there is no refernece to the ipif or ill that is 4033 * going away. 4034 */ 4035 int 4036 ilm_walk_ill(ill_t *ill) 4037 { 4038 int cnt = 0; 4039 ill_t *till; 4040 ilm_t *ilm; 4041 ill_walk_context_t ctx; 4042 ip_stack_t *ipst = ill->ill_ipst; 4043 4044 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 4045 till = ILL_START_WALK_ALL(&ctx, ipst); 4046 for (; till != NULL; till = ill_next(&ctx, till)) { 4047 mutex_enter(&till->ill_lock); 4048 for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 4049 if (ilm->ilm_ill == ill) { 4050 cnt++; 4051 } 4052 } 4053 mutex_exit(&till->ill_lock); 4054 } 4055 rw_exit(&ipst->ips_ill_g_lock); 4056 4057 return (cnt); 4058 } 4059 4060 /* 4061 * This function is called before the ipif is freed. 4062 */ 4063 int 4064 ilm_walk_ipif(ipif_t *ipif) 4065 { 4066 int cnt = 0; 4067 ill_t *till; 4068 ilm_t *ilm; 4069 ill_walk_context_t ctx; 4070 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 4071 4072 till = ILL_START_WALK_ALL(&ctx, ipst); 4073 for (; till != NULL; till = ill_next(&ctx, till)) { 4074 mutex_enter(&till->ill_lock); 4075 for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 4076 if (ilm->ilm_ipif == ipif) { 4077 cnt++; 4078 } 4079 } 4080 mutex_exit(&till->ill_lock); 4081 } 4082 return (cnt); 4083 } 4084 #endif 4085