1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 */ 25 /* Copyright (c) 1990 Mentat Inc. */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <sys/types.h> 30 #include <sys/stream.h> 31 #include <sys/dlpi.h> 32 #include <sys/stropts.h> 33 #include <sys/strsun.h> 34 #include <sys/ddi.h> 35 #include <sys/cmn_err.h> 36 #include <sys/sdt.h> 37 #include <sys/zone.h> 38 39 #include <sys/param.h> 40 #include <sys/socket.h> 41 #include <sys/sockio.h> 42 #include <net/if.h> 43 #include <sys/systm.h> 44 #include <sys/strsubr.h> 45 #include <net/route.h> 46 #include <netinet/in.h> 47 #include <net/if_dl.h> 48 #include <netinet/ip6.h> 49 #include <netinet/icmp6.h> 50 51 #include <inet/common.h> 52 #include <inet/mi.h> 53 #include <inet/nd.h> 54 #include <inet/arp.h> 55 #include <inet/ip.h> 56 #include <inet/ip6.h> 57 #include <inet/ip_if.h> 58 #include <inet/ip_ndp.h> 59 #include <inet/ip_multi.h> 60 #include <inet/ipclassifier.h> 61 #include <inet/ipsec_impl.h> 62 #include <inet/sctp_ip.h> 63 #include <inet/ip_listutils.h> 64 #include <inet/udp_impl.h> 65 66 /* igmpv3/mldv2 source filter manipulation */ 67 static void ilm_bld_flists(conn_t *conn, void *arg); 68 static void ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, 69 slist_t *flist); 70 71 static ilm_t *ilm_add_v6(ipif_t *ipif, const in6_addr_t *group, 72 ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist, 73 int orig_ifindex, zoneid_t zoneid); 74 static void ilm_delete(ilm_t *ilm); 75 static int ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *group); 76 static int ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *group); 77 static ilg_t *ilg_lookup_ill_index_v6(conn_t *connp, 78 const in6_addr_t *v6group, int index); 79 static ilg_t *ilg_lookup_ipif(conn_t *connp, ipaddr_t group, 80 ipif_t *ipif); 81 static int ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif, 82 mcast_record_t fmode, ipaddr_t src); 83 static int ilg_add_v6(conn_t *connp, const in6_addr_t *group, ill_t *ill, 84 mcast_record_t fmode, const in6_addr_t *v6src); 85 static void ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src); 86 static mblk_t *ill_create_dl(ill_t *ill, uint32_t dl_primitive, 87 uint32_t length, uint32_t *addr_lenp, uint32_t *addr_offp); 88 static mblk_t *ill_create_squery(ill_t *ill, ipaddr_t ipaddr, 89 uint32_t addrlen, uint32_t addroff, mblk_t *mp_tail); 90 static void conn_ilg_reap(conn_t *connp); 91 static int ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group, 92 ipif_t *ipif, mcast_record_t fmode, ipaddr_t src); 93 static int ip_opt_delete_group_excl_v6(conn_t *connp, 94 const in6_addr_t *v6group, ill_t *ill, mcast_record_t fmode, 95 const in6_addr_t *v6src); 96 97 /* 98 * MT notes: 99 * 100 * Multicast joins operate on both the ilg and ilm structures. Multiple 101 * threads operating on an conn (socket) trying to do multicast joins 102 * need to synchronize when operating on the ilg. Multiple threads 103 * potentially operating on different conn (socket endpoints) trying to 104 * do multicast joins could eventually end up trying to manipulate the 105 * ilm simulatenously and need to synchronize on the access to the ilm. 106 * Both are amenable to standard Solaris MT techniques, but it would be 107 * complex to handle a failover or failback which needs to manipulate 108 * ilg/ilms if an applications can also simultaenously join/leave 109 * multicast groups. Hence multicast join/leave also go through the ipsq_t 110 * serialization. 111 * 112 * Multicast joins and leaves are single-threaded per phyint/IPMP group 113 * using the ipsq serialization mechanism. 114 * 115 * An ilm is an IP data structure used to track multicast join/leave. 116 * An ilm is associated with a <multicast group, ipif> tuple in IPv4 and 117 * with just <multicast group> in IPv6. ilm_refcnt is the number of ilg's 118 * referencing the ilm. ilms are created / destroyed only as writer. ilms 119 * are not passed around, instead they are looked up and used under the 120 * ill_lock or as writer. So we don't need a dynamic refcount of the number 121 * of threads holding reference to an ilm. 122 * 123 * Multicast Join operation: 124 * 125 * The first step is to determine the ipif (v4) or ill (v6) on which 126 * the join operation is to be done. The join is done after becoming 127 * exclusive on the ipsq associated with the ipif or ill. The conn->conn_ilg 128 * and ill->ill_ilm are thus accessed and modified exclusively per ill. 129 * Multiple threads can attempt to join simultaneously on different ipif/ill 130 * on the same conn. In this case the ipsq serialization does not help in 131 * protecting the ilg. It is the conn_lock that is used to protect the ilg. 132 * The conn_lock also protects all the ilg_t members. 133 * 134 * Leave operation. 135 * 136 * Similar to the join operation, the first step is to determine the ipif 137 * or ill (v6) on which the leave operation is to be done. The leave operation 138 * is done after becoming exclusive on the ipsq associated with the ipif or ill. 139 * As with join ilg modification is done under the protection of the conn lock. 140 */ 141 142 #define IPSQ_ENTER_IPIF(ipif, connp, first_mp, func, ipsq, type) \ 143 ASSERT(connp != NULL); \ 144 (ipsq) = ipsq_try_enter((ipif), NULL, CONNP_TO_WQ(connp), \ 145 (first_mp), (func), (type), B_TRUE); \ 146 if ((ipsq) == NULL) { \ 147 ipif_refrele(ipif); \ 148 return (EINPROGRESS); \ 149 } 150 151 #define IPSQ_ENTER_ILL(ill, connp, first_mp, func, ipsq, type) \ 152 ASSERT(connp != NULL); \ 153 (ipsq) = ipsq_try_enter(NULL, ill, CONNP_TO_WQ(connp), \ 154 (first_mp), (func), (type), B_TRUE); \ 155 if ((ipsq) == NULL) { \ 156 ill_refrele(ill); \ 157 return (EINPROGRESS); \ 158 } 159 160 #define IPSQ_EXIT(ipsq) \ 161 if (ipsq != NULL) \ 162 ipsq_exit(ipsq, B_TRUE, B_TRUE); 163 164 #define ILG_WALKER_HOLD(connp) (connp)->conn_ilg_walker_cnt++ 165 166 #define ILG_WALKER_RELE(connp) \ 167 { \ 168 (connp)->conn_ilg_walker_cnt--; \ 169 if ((connp)->conn_ilg_walker_cnt == 0) \ 170 conn_ilg_reap(connp); \ 171 } 172 173 static void 174 conn_ilg_reap(conn_t *connp) 175 { 176 int to; 177 int from; 178 179 ASSERT(MUTEX_HELD(&connp->conn_lock)); 180 181 to = 0; 182 from = 0; 183 while (from < connp->conn_ilg_inuse) { 184 if (connp->conn_ilg[from].ilg_flags & ILG_DELETED) { 185 FREE_SLIST(connp->conn_ilg[from].ilg_filter); 186 from++; 187 continue; 188 } 189 if (to != from) 190 connp->conn_ilg[to] = connp->conn_ilg[from]; 191 to++; 192 from++; 193 } 194 195 connp->conn_ilg_inuse = to; 196 197 if (connp->conn_ilg_inuse == 0) { 198 mi_free((char *)connp->conn_ilg); 199 connp->conn_ilg = NULL; 200 cv_broadcast(&connp->conn_refcv); 201 } 202 } 203 204 #define GETSTRUCT(structure, number) \ 205 ((structure *)mi_zalloc(sizeof (structure) * (number))) 206 207 #define ILG_ALLOC_CHUNK 16 208 209 /* 210 * Returns a pointer to the next available ilg in conn_ilg. Allocs more 211 * buffers in size of ILG_ALLOC_CHUNK ilgs when needed, and updates conn's 212 * ilg tracking fields appropriately (conn_ilg_inuse reflects usage of the 213 * returned ilg). Returns NULL on failure (ENOMEM). 214 * 215 * Assumes connp->conn_lock is held. 216 */ 217 static ilg_t * 218 conn_ilg_alloc(conn_t *connp) 219 { 220 ilg_t *new; 221 int curcnt; 222 223 ASSERT(MUTEX_HELD(&connp->conn_lock)); 224 ASSERT(connp->conn_ilg_inuse <= connp->conn_ilg_allocated); 225 226 if (connp->conn_ilg == NULL) { 227 connp->conn_ilg = GETSTRUCT(ilg_t, ILG_ALLOC_CHUNK); 228 if (connp->conn_ilg == NULL) 229 return (NULL); 230 connp->conn_ilg_allocated = ILG_ALLOC_CHUNK; 231 connp->conn_ilg_inuse = 0; 232 } 233 if (connp->conn_ilg_inuse == connp->conn_ilg_allocated) { 234 curcnt = connp->conn_ilg_allocated; 235 new = GETSTRUCT(ilg_t, curcnt + ILG_ALLOC_CHUNK); 236 if (new == NULL) 237 return (NULL); 238 bcopy(connp->conn_ilg, new, sizeof (ilg_t) * curcnt); 239 mi_free((char *)connp->conn_ilg); 240 connp->conn_ilg = new; 241 connp->conn_ilg_allocated += ILG_ALLOC_CHUNK; 242 } 243 244 return (&connp->conn_ilg[connp->conn_ilg_inuse++]); 245 } 246 247 typedef struct ilm_fbld_s { 248 ilm_t *fbld_ilm; 249 int fbld_in_cnt; 250 int fbld_ex_cnt; 251 slist_t fbld_in; 252 slist_t fbld_ex; 253 boolean_t fbld_in_overflow; 254 } ilm_fbld_t; 255 256 static void 257 ilm_bld_flists(conn_t *conn, void *arg) 258 { 259 int i; 260 ilm_fbld_t *fbld = (ilm_fbld_t *)(arg); 261 ilm_t *ilm = fbld->fbld_ilm; 262 in6_addr_t *v6group = &ilm->ilm_v6addr; 263 264 if (conn->conn_ilg_inuse == 0) 265 return; 266 267 /* 268 * Since we can't break out of the ipcl_walk once started, we still 269 * have to look at every conn. But if we've already found one 270 * (EXCLUDE, NULL) list, there's no need to keep checking individual 271 * ilgs--that will be our state. 272 */ 273 if (fbld->fbld_ex_cnt > 0 && fbld->fbld_ex.sl_numsrc == 0) 274 return; 275 276 /* 277 * Check this conn's ilgs to see if any are interested in our 278 * ilm (group, interface match). If so, update the master 279 * include and exclude lists we're building in the fbld struct 280 * with this ilg's filter info. 281 */ 282 mutex_enter(&conn->conn_lock); 283 for (i = 0; i < conn->conn_ilg_inuse; i++) { 284 ilg_t *ilg = &conn->conn_ilg[i]; 285 if ((ilg->ilg_ill == ilm->ilm_ill) && 286 (ilg->ilg_ipif == ilm->ilm_ipif) && 287 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) { 288 if (ilg->ilg_fmode == MODE_IS_INCLUDE) { 289 fbld->fbld_in_cnt++; 290 if (!fbld->fbld_in_overflow) 291 l_union_in_a(&fbld->fbld_in, 292 ilg->ilg_filter, 293 &fbld->fbld_in_overflow); 294 } else { 295 fbld->fbld_ex_cnt++; 296 /* 297 * On the first exclude list, don't try to do 298 * an intersection, as the master exclude list 299 * is intentionally empty. If the master list 300 * is still empty on later iterations, that 301 * means we have at least one ilg with an empty 302 * exclude list, so that should be reflected 303 * when we take the intersection. 304 */ 305 if (fbld->fbld_ex_cnt == 1) { 306 if (ilg->ilg_filter != NULL) 307 l_copy(ilg->ilg_filter, 308 &fbld->fbld_ex); 309 } else { 310 l_intersection_in_a(&fbld->fbld_ex, 311 ilg->ilg_filter); 312 } 313 } 314 /* there will only be one match, so break now. */ 315 break; 316 } 317 } 318 mutex_exit(&conn->conn_lock); 319 } 320 321 static void 322 ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, slist_t *flist) 323 { 324 ilm_fbld_t fbld; 325 ip_stack_t *ipst = ilm->ilm_ipst; 326 327 fbld.fbld_ilm = ilm; 328 fbld.fbld_in_cnt = fbld.fbld_ex_cnt = 0; 329 fbld.fbld_in.sl_numsrc = fbld.fbld_ex.sl_numsrc = 0; 330 fbld.fbld_in_overflow = B_FALSE; 331 332 /* first, construct our master include and exclude lists */ 333 ipcl_walk(ilm_bld_flists, (caddr_t)&fbld, ipst); 334 335 /* now use those master lists to generate the interface filter */ 336 337 /* if include list overflowed, filter is (EXCLUDE, NULL) */ 338 if (fbld.fbld_in_overflow) { 339 *fmode = MODE_IS_EXCLUDE; 340 flist->sl_numsrc = 0; 341 return; 342 } 343 344 /* if nobody interested, interface filter is (INCLUDE, NULL) */ 345 if (fbld.fbld_in_cnt == 0 && fbld.fbld_ex_cnt == 0) { 346 *fmode = MODE_IS_INCLUDE; 347 flist->sl_numsrc = 0; 348 return; 349 } 350 351 /* 352 * If there are no exclude lists, then the interface filter 353 * is INCLUDE, with its filter list equal to fbld_in. A single 354 * exclude list makes the interface filter EXCLUDE, with its 355 * filter list equal to (fbld_ex - fbld_in). 356 */ 357 if (fbld.fbld_ex_cnt == 0) { 358 *fmode = MODE_IS_INCLUDE; 359 l_copy(&fbld.fbld_in, flist); 360 } else { 361 *fmode = MODE_IS_EXCLUDE; 362 l_difference(&fbld.fbld_ex, &fbld.fbld_in, flist); 363 } 364 } 365 366 /* 367 * If the given interface has failed, choose a new one to join on so 368 * that we continue to receive packets. ilg_orig_ifindex remembers 369 * what the application used to join on so that we know the ilg to 370 * delete even though we change the ill here. Callers will store the 371 * ilg returned from this function in ilg_ill. Thus when we receive 372 * a packet on ilg_ill, conn_wantpacket_v6 will deliver the packets. 373 * 374 * This function must be called as writer so we can walk the group 375 * list and examine flags without holding a lock. 376 */ 377 ill_t * 378 ip_choose_multi_ill(ill_t *ill, const in6_addr_t *grp) 379 { 380 ill_t *till; 381 ill_group_t *illgrp = ill->ill_group; 382 383 ASSERT(IAM_WRITER_ILL(ill)); 384 385 if (IN6_IS_ADDR_UNSPECIFIED(grp) || illgrp == NULL) 386 return (ill); 387 388 if ((ill->ill_phyint->phyint_flags & (PHYI_FAILED|PHYI_INACTIVE)) == 0) 389 return (ill); 390 391 till = illgrp->illgrp_ill; 392 while (till != NULL && 393 (till->ill_phyint->phyint_flags & (PHYI_FAILED|PHYI_INACTIVE))) { 394 till = till->ill_group_next; 395 } 396 if (till != NULL) 397 return (till); 398 399 return (ill); 400 } 401 402 static int 403 ilm_update_add(ilm_t *ilm, ilg_stat_t ilgstat, slist_t *ilg_flist, 404 boolean_t isv6) 405 { 406 mcast_record_t fmode; 407 slist_t *flist; 408 boolean_t fdefault; 409 char buf[INET6_ADDRSTRLEN]; 410 ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill; 411 412 /* 413 * There are several cases where the ilm's filter state 414 * defaults to (EXCLUDE, NULL): 415 * - we've had previous joins without associated ilgs 416 * - this join has no associated ilg 417 * - the ilg's filter state is (EXCLUDE, NULL) 418 */ 419 fdefault = (ilm->ilm_no_ilg_cnt > 0) || 420 (ilgstat == ILGSTAT_NONE) || SLIST_IS_EMPTY(ilg_flist); 421 422 /* attempt mallocs (if needed) before doing anything else */ 423 if ((flist = l_alloc()) == NULL) 424 return (ENOMEM); 425 if (!fdefault && ilm->ilm_filter == NULL) { 426 ilm->ilm_filter = l_alloc(); 427 if (ilm->ilm_filter == NULL) { 428 l_free(flist); 429 return (ENOMEM); 430 } 431 } 432 433 if (ilgstat != ILGSTAT_CHANGE) 434 ilm->ilm_refcnt++; 435 436 if (ilgstat == ILGSTAT_NONE) 437 ilm->ilm_no_ilg_cnt++; 438 439 /* 440 * Determine new filter state. If it's not the default 441 * (EXCLUDE, NULL), we must walk the conn list to find 442 * any ilgs interested in this group, and re-build the 443 * ilm filter. 444 */ 445 if (fdefault) { 446 fmode = MODE_IS_EXCLUDE; 447 flist->sl_numsrc = 0; 448 } else { 449 ilm_gen_filter(ilm, &fmode, flist); 450 } 451 452 /* make sure state actually changed; nothing to do if not. */ 453 if ((ilm->ilm_fmode == fmode) && 454 !lists_are_different(ilm->ilm_filter, flist)) { 455 l_free(flist); 456 return (0); 457 } 458 459 /* send the state change report */ 460 if (!IS_LOOPBACK(ill)) { 461 if (isv6) 462 mld_statechange(ilm, fmode, flist); 463 else 464 igmp_statechange(ilm, fmode, flist); 465 } 466 467 /* update the ilm state */ 468 ilm->ilm_fmode = fmode; 469 if (flist->sl_numsrc > 0) 470 l_copy(flist, ilm->ilm_filter); 471 else 472 CLEAR_SLIST(ilm->ilm_filter); 473 474 ip1dbg(("ilm_update: new if filter mode %d, group %s\n", ilm->ilm_fmode, 475 inet_ntop(AF_INET6, &ilm->ilm_v6addr, buf, sizeof (buf)))); 476 477 l_free(flist); 478 return (0); 479 } 480 481 static int 482 ilm_update_del(ilm_t *ilm, boolean_t isv6) 483 { 484 mcast_record_t fmode; 485 slist_t *flist; 486 ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill; 487 488 ip1dbg(("ilm_update_del: still %d left; updating state\n", 489 ilm->ilm_refcnt)); 490 491 if ((flist = l_alloc()) == NULL) 492 return (ENOMEM); 493 494 /* 495 * If present, the ilg in question has already either been 496 * updated or removed from our list; so all we need to do 497 * now is walk the list to update the ilm filter state. 498 * 499 * Skip the list walk if we have any no-ilg joins, which 500 * cause the filter state to revert to (EXCLUDE, NULL). 501 */ 502 if (ilm->ilm_no_ilg_cnt != 0) { 503 fmode = MODE_IS_EXCLUDE; 504 flist->sl_numsrc = 0; 505 } else { 506 ilm_gen_filter(ilm, &fmode, flist); 507 } 508 509 /* check to see if state needs to be updated */ 510 if ((ilm->ilm_fmode == fmode) && 511 (!lists_are_different(ilm->ilm_filter, flist))) { 512 l_free(flist); 513 return (0); 514 } 515 516 if (!IS_LOOPBACK(ill)) { 517 if (isv6) 518 mld_statechange(ilm, fmode, flist); 519 else 520 igmp_statechange(ilm, fmode, flist); 521 } 522 523 ilm->ilm_fmode = fmode; 524 if (flist->sl_numsrc > 0) { 525 if (ilm->ilm_filter == NULL) { 526 ilm->ilm_filter = l_alloc(); 527 if (ilm->ilm_filter == NULL) { 528 char buf[INET6_ADDRSTRLEN]; 529 ip1dbg(("ilm_update_del: failed to alloc ilm " 530 "filter; no source filtering for %s on %s", 531 inet_ntop(AF_INET6, &ilm->ilm_v6addr, 532 buf, sizeof (buf)), ill->ill_name)); 533 ilm->ilm_fmode = MODE_IS_EXCLUDE; 534 l_free(flist); 535 return (0); 536 } 537 } 538 l_copy(flist, ilm->ilm_filter); 539 } else { 540 CLEAR_SLIST(ilm->ilm_filter); 541 } 542 543 l_free(flist); 544 return (0); 545 } 546 547 /* 548 * INADDR_ANY means all multicast addresses. This is only used 549 * by the multicast router. 550 * INADDR_ANY is stored as IPv6 unspecified addr. 551 */ 552 int 553 ip_addmulti(ipaddr_t group, ipif_t *ipif, ilg_stat_t ilgstat, 554 mcast_record_t ilg_fmode, slist_t *ilg_flist) 555 { 556 ill_t *ill = ipif->ipif_ill; 557 ilm_t *ilm; 558 in6_addr_t v6group; 559 int ret; 560 561 ASSERT(IAM_WRITER_IPIF(ipif)); 562 563 if (!CLASSD(group) && group != INADDR_ANY) 564 return (EINVAL); 565 566 /* 567 * INADDR_ANY is represented as the IPv6 unspecifed addr. 568 */ 569 if (group == INADDR_ANY) 570 v6group = ipv6_all_zeros; 571 else 572 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 573 574 ilm = ilm_lookup_ipif(ipif, group); 575 if (ilm != NULL) 576 return (ilm_update_add(ilm, ilgstat, ilg_flist, B_FALSE)); 577 578 /* 579 * ilms are associated with ipifs in IPv4. It moves with the 580 * ipif if the ipif moves to a new ill when the interface 581 * fails. Thus we really don't check whether the ipif_ill 582 * has failed like in IPv6. If it has FAILED the ipif 583 * will move (daemon will move it) and hence the ilm, if the 584 * ipif is not IPIF_NOFAILOVER. For the IPIF_NOFAILOVER ipifs, 585 * we continue to receive in the same place even if the 586 * interface fails. 587 */ 588 ilm = ilm_add_v6(ipif, &v6group, ilgstat, ilg_fmode, ilg_flist, 589 ill->ill_phyint->phyint_ifindex, ipif->ipif_zoneid); 590 if (ilm == NULL) 591 return (ENOMEM); 592 593 if (group == INADDR_ANY) { 594 /* 595 * Check how many ipif's have members in this group - 596 * if more then one we should not tell the driver to join 597 * this time 598 */ 599 if (ilm_numentries_v6(ill, &v6group) > 1) 600 return (0); 601 if (ill->ill_group == NULL) 602 ret = ip_join_allmulti(ipif); 603 else 604 ret = ill_nominate_mcast_rcv(ill->ill_group); 605 if (ret != 0) 606 ilm_delete(ilm); 607 return (ret); 608 } 609 610 if (!IS_LOOPBACK(ill)) 611 igmp_joingroup(ilm); 612 613 if (ilm_numentries_v6(ill, &v6group) > 1) 614 return (0); 615 616 ret = ip_ll_addmulti_v6(ipif, &v6group); 617 if (ret != 0) 618 ilm_delete(ilm); 619 return (ret); 620 } 621 622 /* 623 * The unspecified address means all multicast addresses. 624 * This is only used by the multicast router. 625 * 626 * ill identifies the interface to join on; it may not match the 627 * interface requested by the application of a failover has taken 628 * place. orig_ifindex always identifies the interface requested 629 * by the app. 630 * 631 * ilgstat tells us if there's an ilg associated with this join, 632 * and if so, if it's a new ilg or a change to an existing one. 633 * ilg_fmode and ilg_flist give us the current filter state of 634 * the ilg (and will be EXCLUDE {NULL} in the case of no ilg). 635 */ 636 int 637 ip_addmulti_v6(const in6_addr_t *v6group, ill_t *ill, int orig_ifindex, 638 zoneid_t zoneid, ilg_stat_t ilgstat, mcast_record_t ilg_fmode, 639 slist_t *ilg_flist) 640 { 641 ilm_t *ilm; 642 int ret; 643 644 ASSERT(IAM_WRITER_ILL(ill)); 645 646 if (!IN6_IS_ADDR_MULTICAST(v6group) && 647 !IN6_IS_ADDR_UNSPECIFIED(v6group)) { 648 return (EINVAL); 649 } 650 651 /* 652 * An ilm is uniquely identified by the tuple of (group, ill, 653 * orig_ill). group is the multicast group address, ill is 654 * the interface on which it is currently joined, and orig_ill 655 * is the interface on which the application requested the 656 * join. orig_ill and ill are the same unless orig_ill has 657 * failed over. 658 * 659 * Both orig_ill and ill are required, which means we may have 660 * 2 ilms on an ill for the same group, but with different 661 * orig_ills. These must be kept separate, so that when failback 662 * occurs, the appropriate ilms are moved back to their orig_ill 663 * without disrupting memberships on the ill to which they had 664 * been moved. 665 * 666 * In order to track orig_ill, we store orig_ifindex in the 667 * ilm and ilg. 668 */ 669 ilm = ilm_lookup_ill_index_v6(ill, v6group, orig_ifindex, zoneid); 670 if (ilm != NULL) 671 return (ilm_update_add(ilm, ilgstat, ilg_flist, B_TRUE)); 672 673 /* 674 * We need to remember where the application really wanted 675 * to join. This will be used later if we want to failback 676 * to the original interface. 677 */ 678 ilm = ilm_add_v6(ill->ill_ipif, v6group, ilgstat, ilg_fmode, 679 ilg_flist, orig_ifindex, zoneid); 680 if (ilm == NULL) 681 return (ENOMEM); 682 683 if (IN6_IS_ADDR_UNSPECIFIED(v6group)) { 684 /* 685 * Check how many ipif's that have members in this group - 686 * if more then one we should not tell the driver to join 687 * this time 688 */ 689 if (ilm_numentries_v6(ill, v6group) > 1) 690 return (0); 691 if (ill->ill_group == NULL) 692 ret = ip_join_allmulti(ill->ill_ipif); 693 else 694 ret = ill_nominate_mcast_rcv(ill->ill_group); 695 696 if (ret != 0) 697 ilm_delete(ilm); 698 return (ret); 699 } 700 701 if (!IS_LOOPBACK(ill)) 702 mld_joingroup(ilm); 703 704 /* 705 * If we have more then one we should not tell the driver 706 * to join this time. 707 */ 708 if (ilm_numentries_v6(ill, v6group) > 1) 709 return (0); 710 711 ret = ip_ll_addmulti_v6(ill->ill_ipif, v6group); 712 if (ret != 0) 713 ilm_delete(ilm); 714 return (ret); 715 } 716 717 /* 718 * Send a multicast request to the driver for enabling multicast reception 719 * for v6groupp address. The caller has already checked whether it is 720 * appropriate to send one or not. 721 */ 722 int 723 ip_ll_send_enabmulti_req(ill_t *ill, const in6_addr_t *v6groupp) 724 { 725 mblk_t *mp; 726 uint32_t addrlen, addroff; 727 char group_buf[INET6_ADDRSTRLEN]; 728 729 ASSERT(IAM_WRITER_ILL(ill)); 730 731 /* 732 * Create a AR_ENTRY_SQUERY message with a dl_enabmulti_req tacked 733 * on. 734 */ 735 mp = ill_create_dl(ill, DL_ENABMULTI_REQ, sizeof (dl_enabmulti_req_t), 736 &addrlen, &addroff); 737 if (!mp) 738 return (ENOMEM); 739 if (IN6_IS_ADDR_V4MAPPED(v6groupp)) { 740 ipaddr_t v4group; 741 742 IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group); 743 /* 744 * NOTE!!! 745 * The "addroff" passed in here was calculated by 746 * ill_create_dl(), and will be used by ill_create_squery() 747 * to perform some twisted coding magic. It is the offset 748 * into the dl_xxx_req of the hw addr. Here, it will be 749 * added to b_wptr - b_rptr to create a magic number that 750 * is not an offset into this squery mblk. 751 * The actual hardware address will be accessed only in the 752 * dl_xxx_req, not in the squery. More importantly, 753 * that hardware address can *only* be accessed in this 754 * mblk chain by calling mi_offset_param_c(), which uses 755 * the magic number in the squery hw offset field to go 756 * to the *next* mblk (the dl_xxx_req), subtract the 757 * (b_wptr - b_rptr), and find the actual offset into 758 * the dl_xxx_req. 759 * Any method that depends on using the 760 * offset field in the dl_disabmulti_req or squery 761 * to find either hardware address will similarly fail. 762 * 763 * Look in ar_entry_squery() in arp.c to see how this offset 764 * is used. 765 */ 766 mp = ill_create_squery(ill, v4group, addrlen, addroff, mp); 767 if (!mp) 768 return (ENOMEM); 769 ip1dbg(("ip_ll_send_enabmulti_req: IPv4 putnext %s on %s\n", 770 inet_ntop(AF_INET6, v6groupp, group_buf, 771 sizeof (group_buf)), 772 ill->ill_name)); 773 putnext(ill->ill_rq, mp); 774 } else { 775 ip1dbg(("ip_ll_send_enabmulti_req: IPv6 ndp_mcastreq %s on" 776 " %s\n", 777 inet_ntop(AF_INET6, v6groupp, group_buf, 778 sizeof (group_buf)), 779 ill->ill_name)); 780 return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp)); 781 } 782 return (0); 783 } 784 785 /* 786 * Send a multicast request to the driver for enabling multicast 787 * membership for v6group if appropriate. 788 */ 789 static int 790 ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *v6groupp) 791 { 792 ill_t *ill = ipif->ipif_ill; 793 794 ASSERT(IAM_WRITER_IPIF(ipif)); 795 796 if (ill->ill_net_type != IRE_IF_RESOLVER || 797 ipif->ipif_flags & IPIF_POINTOPOINT) { 798 ip1dbg(("ip_ll_addmulti_v6: not resolver\n")); 799 return (0); /* Must be IRE_IF_NORESOLVER */ 800 } 801 802 if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) { 803 ip1dbg(("ip_ll_addmulti_v6: MULTI_BCAST\n")); 804 return (0); 805 } 806 if (!ill->ill_dl_up) { 807 /* 808 * Nobody there. All multicast addresses will be re-joined 809 * when we get the DL_BIND_ACK bringing the interface up. 810 */ 811 ip1dbg(("ip_ll_addmulti_v6: nobody up\n")); 812 return (0); 813 } 814 return (ip_ll_send_enabmulti_req(ill, v6groupp)); 815 } 816 817 /* 818 * INADDR_ANY means all multicast addresses. This is only used 819 * by the multicast router. 820 * INADDR_ANY is stored as the IPv6 unspecifed addr. 821 */ 822 int 823 ip_delmulti(ipaddr_t group, ipif_t *ipif, boolean_t no_ilg, boolean_t leaving) 824 { 825 ill_t *ill = ipif->ipif_ill; 826 ilm_t *ilm; 827 in6_addr_t v6group; 828 int ret; 829 830 ASSERT(IAM_WRITER_IPIF(ipif)); 831 832 if (!CLASSD(group) && group != INADDR_ANY) 833 return (EINVAL); 834 835 /* 836 * INADDR_ANY is represented as the IPv6 unspecifed addr. 837 */ 838 if (group == INADDR_ANY) 839 v6group = ipv6_all_zeros; 840 else 841 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 842 843 /* 844 * Look for a match on the ipif. 845 * (IP_DROP_MEMBERSHIP specifies an ipif using an IP address). 846 */ 847 ilm = ilm_lookup_ipif(ipif, group); 848 if (ilm == NULL) 849 return (ENOENT); 850 851 /* Update counters */ 852 if (no_ilg) 853 ilm->ilm_no_ilg_cnt--; 854 855 if (leaving) 856 ilm->ilm_refcnt--; 857 858 if (ilm->ilm_refcnt > 0) 859 return (ilm_update_del(ilm, B_FALSE)); 860 861 if (group == INADDR_ANY) { 862 ilm_delete(ilm); 863 /* 864 * Check how many ipif's that have members in this group - 865 * if there are still some left then don't tell the driver 866 * to drop it. 867 */ 868 if (ilm_numentries_v6(ill, &v6group) != 0) 869 return (0); 870 871 /* 872 * If we never joined, then don't leave. This can happen 873 * if we're in an IPMP group, since only one ill per IPMP 874 * group receives all multicast packets. 875 */ 876 if (!ill->ill_join_allmulti) { 877 ASSERT(ill->ill_group != NULL); 878 return (0); 879 } 880 881 ret = ip_leave_allmulti(ipif); 882 if (ill->ill_group != NULL) 883 (void) ill_nominate_mcast_rcv(ill->ill_group); 884 return (ret); 885 } 886 887 if (!IS_LOOPBACK(ill)) 888 igmp_leavegroup(ilm); 889 890 ilm_delete(ilm); 891 /* 892 * Check how many ipif's that have members in this group - 893 * if there are still some left then don't tell the driver 894 * to drop it. 895 */ 896 if (ilm_numentries_v6(ill, &v6group) != 0) 897 return (0); 898 return (ip_ll_delmulti_v6(ipif, &v6group)); 899 } 900 901 /* 902 * The unspecified address means all multicast addresses. 903 * This is only used by the multicast router. 904 */ 905 int 906 ip_delmulti_v6(const in6_addr_t *v6group, ill_t *ill, int orig_ifindex, 907 zoneid_t zoneid, boolean_t no_ilg, boolean_t leaving) 908 { 909 ipif_t *ipif; 910 ilm_t *ilm; 911 int ret; 912 913 ASSERT(IAM_WRITER_ILL(ill)); 914 915 if (!IN6_IS_ADDR_MULTICAST(v6group) && 916 !IN6_IS_ADDR_UNSPECIFIED(v6group)) 917 return (EINVAL); 918 919 /* 920 * Look for a match on the ill. 921 * (IPV6_LEAVE_GROUP specifies an ill using an ifindex). 922 * 923 * Similar to ip_addmulti_v6, we should always look using 924 * the orig_ifindex. 925 * 926 * 1) If orig_ifindex is different from ill's ifindex 927 * we should have an ilm with orig_ifindex created in 928 * ip_addmulti_v6. We should delete that here. 929 * 930 * 2) If orig_ifindex is same as ill's ifindex, we should 931 * not delete the ilm that is temporarily here because of 932 * a FAILOVER. Those ilms will have a ilm_orig_ifindex 933 * different from ill's ifindex. 934 * 935 * Thus, always lookup using orig_ifindex. 936 */ 937 ilm = ilm_lookup_ill_index_v6(ill, v6group, orig_ifindex, zoneid); 938 if (ilm == NULL) 939 return (ENOENT); 940 941 ASSERT(ilm->ilm_ill == ill); 942 943 ipif = ill->ill_ipif; 944 945 /* Update counters */ 946 if (no_ilg) 947 ilm->ilm_no_ilg_cnt--; 948 949 if (leaving) 950 ilm->ilm_refcnt--; 951 952 if (ilm->ilm_refcnt > 0) 953 return (ilm_update_del(ilm, B_TRUE)); 954 955 if (IN6_IS_ADDR_UNSPECIFIED(v6group)) { 956 ilm_delete(ilm); 957 /* 958 * Check how many ipif's that have members in this group - 959 * if there are still some left then don't tell the driver 960 * to drop it. 961 */ 962 if (ilm_numentries_v6(ill, v6group) != 0) 963 return (0); 964 965 /* 966 * If we never joined, then don't leave. This can happen 967 * if we're in an IPMP group, since only one ill per IPMP 968 * group receives all multicast packets. 969 */ 970 if (!ill->ill_join_allmulti) { 971 ASSERT(ill->ill_group != NULL); 972 return (0); 973 } 974 975 ret = ip_leave_allmulti(ipif); 976 if (ill->ill_group != NULL) 977 (void) ill_nominate_mcast_rcv(ill->ill_group); 978 return (ret); 979 } 980 981 if (!IS_LOOPBACK(ill)) 982 mld_leavegroup(ilm); 983 984 ilm_delete(ilm); 985 /* 986 * Check how many ipif's that have members in this group - 987 * if there are still some left then don't tell the driver 988 * to drop it. 989 */ 990 if (ilm_numentries_v6(ill, v6group) != 0) 991 return (0); 992 return (ip_ll_delmulti_v6(ipif, v6group)); 993 } 994 995 /* 996 * Send a multicast request to the driver for disabling multicast reception 997 * for v6groupp address. The caller has already checked whether it is 998 * appropriate to send one or not. 999 */ 1000 int 1001 ip_ll_send_disabmulti_req(ill_t *ill, const in6_addr_t *v6groupp) 1002 { 1003 mblk_t *mp; 1004 char group_buf[INET6_ADDRSTRLEN]; 1005 uint32_t addrlen, addroff; 1006 1007 ASSERT(IAM_WRITER_ILL(ill)); 1008 /* 1009 * Create a AR_ENTRY_SQUERY message with a dl_disabmulti_req tacked 1010 * on. 1011 */ 1012 mp = ill_create_dl(ill, DL_DISABMULTI_REQ, 1013 sizeof (dl_disabmulti_req_t), &addrlen, &addroff); 1014 1015 if (!mp) 1016 return (ENOMEM); 1017 1018 if (IN6_IS_ADDR_V4MAPPED(v6groupp)) { 1019 ipaddr_t v4group; 1020 1021 IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group); 1022 /* 1023 * NOTE!!! 1024 * The "addroff" passed in here was calculated by 1025 * ill_create_dl(), and will be used by ill_create_squery() 1026 * to perform some twisted coding magic. It is the offset 1027 * into the dl_xxx_req of the hw addr. Here, it will be 1028 * added to b_wptr - b_rptr to create a magic number that 1029 * is not an offset into this mblk. 1030 * 1031 * Please see the comment in ip_ll_send)enabmulti_req() 1032 * for a complete explanation. 1033 * 1034 * Look in ar_entry_squery() in arp.c to see how this offset 1035 * is used. 1036 */ 1037 mp = ill_create_squery(ill, v4group, addrlen, addroff, mp); 1038 if (!mp) 1039 return (ENOMEM); 1040 ip1dbg(("ip_ll_send_disabmulti_req: IPv4 putnext %s on %s\n", 1041 inet_ntop(AF_INET6, v6groupp, group_buf, 1042 sizeof (group_buf)), 1043 ill->ill_name)); 1044 putnext(ill->ill_rq, mp); 1045 } else { 1046 ip1dbg(("ip_ll_send_disabmulti_req: IPv6 ndp_mcastreq %s on" 1047 " %s\n", 1048 inet_ntop(AF_INET6, v6groupp, group_buf, 1049 sizeof (group_buf)), 1050 ill->ill_name)); 1051 return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp)); 1052 } 1053 return (0); 1054 } 1055 1056 /* 1057 * Send a multicast request to the driver for disabling multicast 1058 * membership for v6group if appropriate. 1059 */ 1060 static int 1061 ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *v6group) 1062 { 1063 ill_t *ill = ipif->ipif_ill; 1064 1065 ASSERT(IAM_WRITER_IPIF(ipif)); 1066 1067 if (ill->ill_net_type != IRE_IF_RESOLVER || 1068 ipif->ipif_flags & IPIF_POINTOPOINT) { 1069 return (0); /* Must be IRE_IF_NORESOLVER */ 1070 } 1071 if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) { 1072 ip1dbg(("ip_ll_delmulti_v6: MULTI_BCAST\n")); 1073 return (0); 1074 } 1075 if (!ill->ill_dl_up) { 1076 /* 1077 * Nobody there. All multicast addresses will be re-joined 1078 * when we get the DL_BIND_ACK bringing the interface up. 1079 */ 1080 ip1dbg(("ip_ll_delmulti_v6: nobody up\n")); 1081 return (0); 1082 } 1083 return (ip_ll_send_disabmulti_req(ill, v6group)); 1084 } 1085 1086 /* 1087 * Make the driver pass up all multicast packets 1088 * 1089 * With ill groups, the caller makes sure that there is only 1090 * one ill joining the allmulti group. 1091 */ 1092 int 1093 ip_join_allmulti(ipif_t *ipif) 1094 { 1095 ill_t *ill = ipif->ipif_ill; 1096 mblk_t *mp; 1097 uint32_t addrlen, addroff; 1098 1099 ASSERT(IAM_WRITER_IPIF(ipif)); 1100 1101 if (!ill->ill_dl_up) { 1102 /* 1103 * Nobody there. All multicast addresses will be re-joined 1104 * when we get the DL_BIND_ACK bringing the interface up. 1105 */ 1106 return (0); 1107 } 1108 1109 ASSERT(!ill->ill_join_allmulti); 1110 1111 /* 1112 * Create a DL_PROMISCON_REQ message and send it directly to 1113 * the DLPI provider. We don't need to do this for certain 1114 * media types for which we never need to turn promiscuous 1115 * mode on. 1116 */ 1117 if ((ill->ill_net_type == IRE_IF_RESOLVER) && 1118 !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) { 1119 mp = ill_create_dl(ill, DL_PROMISCON_REQ, 1120 sizeof (dl_promiscon_req_t), &addrlen, &addroff); 1121 if (mp == NULL) 1122 return (ENOMEM); 1123 ill_dlpi_send(ill, mp); 1124 } 1125 1126 mutex_enter(&ill->ill_lock); 1127 ill->ill_join_allmulti = B_TRUE; 1128 mutex_exit(&ill->ill_lock); 1129 return (0); 1130 } 1131 1132 /* 1133 * Make the driver stop passing up all multicast packets 1134 * 1135 * With ill groups, we need to nominate some other ill as 1136 * this ipif->ipif_ill is leaving the group. 1137 */ 1138 int 1139 ip_leave_allmulti(ipif_t *ipif) 1140 { 1141 ill_t *ill = ipif->ipif_ill; 1142 mblk_t *mp; 1143 uint32_t addrlen, addroff; 1144 1145 ASSERT(IAM_WRITER_IPIF(ipif)); 1146 1147 if (!ill->ill_dl_up) { 1148 /* 1149 * Nobody there. All multicast addresses will be re-joined 1150 * when we get the DL_BIND_ACK bringing the interface up. 1151 */ 1152 return (0); 1153 } 1154 1155 ASSERT(ill->ill_join_allmulti); 1156 1157 /* 1158 * Create a DL_PROMISCOFF_REQ message and send it directly to 1159 * the DLPI provider. We don't need to do this for certain 1160 * media types for which we never need to turn promiscuous 1161 * mode on. 1162 */ 1163 if ((ill->ill_net_type == IRE_IF_RESOLVER) && 1164 !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) { 1165 mp = ill_create_dl(ill, DL_PROMISCOFF_REQ, 1166 sizeof (dl_promiscoff_req_t), &addrlen, &addroff); 1167 if (mp == NULL) 1168 return (ENOMEM); 1169 ill_dlpi_send(ill, mp); 1170 } 1171 1172 mutex_enter(&ill->ill_lock); 1173 ill->ill_join_allmulti = B_FALSE; 1174 mutex_exit(&ill->ill_lock); 1175 return (0); 1176 } 1177 1178 /* 1179 * Copy mp_orig and pass it in as a local message. 1180 */ 1181 void 1182 ip_multicast_loopback(queue_t *q, ill_t *ill, mblk_t *mp_orig, int fanout_flags, 1183 zoneid_t zoneid) 1184 { 1185 mblk_t *mp; 1186 mblk_t *ipsec_mp; 1187 ipha_t *iph; 1188 ip_stack_t *ipst = ill->ill_ipst; 1189 1190 if (DB_TYPE(mp_orig) == M_DATA && 1191 ((ipha_t *)mp_orig->b_rptr)->ipha_protocol == IPPROTO_UDP) { 1192 uint_t hdrsz; 1193 1194 hdrsz = IPH_HDR_LENGTH((ipha_t *)mp_orig->b_rptr) + 1195 sizeof (udpha_t); 1196 ASSERT(MBLKL(mp_orig) >= hdrsz); 1197 1198 if (((mp = allocb(hdrsz, BPRI_MED)) != NULL) && 1199 (mp_orig = dupmsg(mp_orig)) != NULL) { 1200 bcopy(mp_orig->b_rptr, mp->b_rptr, hdrsz); 1201 mp->b_wptr += hdrsz; 1202 mp->b_cont = mp_orig; 1203 mp_orig->b_rptr += hdrsz; 1204 if (is_system_labeled() && DB_CRED(mp_orig) != NULL) 1205 mblk_setcred(mp, DB_CRED(mp_orig)); 1206 if (MBLKL(mp_orig) == 0) { 1207 mp->b_cont = mp_orig->b_cont; 1208 mp_orig->b_cont = NULL; 1209 freeb(mp_orig); 1210 } 1211 } else if (mp != NULL) { 1212 freeb(mp); 1213 mp = NULL; 1214 } 1215 } else { 1216 mp = ip_copymsg(mp_orig); /* No refcnt on ipsec_out netstack */ 1217 } 1218 1219 if (mp == NULL) 1220 return; 1221 if (DB_TYPE(mp) == M_CTL) { 1222 ipsec_mp = mp; 1223 mp = mp->b_cont; 1224 } else { 1225 ipsec_mp = mp; 1226 } 1227 1228 iph = (ipha_t *)mp->b_rptr; 1229 1230 DTRACE_PROBE4(ip4__loopback__out__start, 1231 ill_t *, NULL, ill_t *, ill, 1232 ipha_t *, iph, mblk_t *, ipsec_mp); 1233 1234 FW_HOOKS(ipst->ips_ip4_loopback_out_event, 1235 ipst->ips_ipv4firewall_loopback_out, 1236 NULL, ill, iph, ipsec_mp, mp, HPE_MULTICAST, ipst); 1237 1238 DTRACE_PROBE1(ip4__loopback__out__end, mblk_t *, ipsec_mp); 1239 1240 if (ipsec_mp != NULL) 1241 ip_wput_local(q, ill, iph, ipsec_mp, NULL, 1242 fanout_flags, zoneid); 1243 } 1244 1245 static area_t ip_aresq_template = { 1246 AR_ENTRY_SQUERY, /* cmd */ 1247 sizeof (area_t)+IP_ADDR_LEN, /* name offset */ 1248 sizeof (area_t), /* name len (filled by ill_arp_alloc) */ 1249 IP_ARP_PROTO_TYPE, /* protocol, from arps perspective */ 1250 sizeof (area_t), /* proto addr offset */ 1251 IP_ADDR_LEN, /* proto addr_length */ 1252 0, /* proto mask offset */ 1253 /* Rest is initialized when used */ 1254 0, /* flags */ 1255 0, /* hw addr offset */ 1256 0, /* hw addr length */ 1257 }; 1258 1259 static mblk_t * 1260 ill_create_squery(ill_t *ill, ipaddr_t ipaddr, uint32_t addrlen, 1261 uint32_t addroff, mblk_t *mp_tail) 1262 { 1263 mblk_t *mp; 1264 area_t *area; 1265 1266 mp = ill_arp_alloc(ill, (uchar_t *)&ip_aresq_template, 1267 (caddr_t)&ipaddr); 1268 if (!mp) { 1269 freemsg(mp_tail); 1270 return (NULL); 1271 } 1272 area = (area_t *)mp->b_rptr; 1273 area->area_hw_addr_length = addrlen; 1274 area->area_hw_addr_offset = mp->b_wptr - mp->b_rptr + addroff; 1275 /* 1276 * NOTE! 1277 * 1278 * The area_hw_addr_offset, as can be seen, does not hold the 1279 * actual hardware address offset. Rather, it holds the offset 1280 * to the hw addr in the dl_xxx_req in mp_tail, modified by 1281 * adding (mp->b_wptr - mp->b_rptr). This allows the function 1282 * mi_offset_paramc() to find the hardware address in the 1283 * *second* mblk (dl_xxx_req), not this mblk. 1284 * 1285 * Using mi_offset_paramc() is thus the *only* way to access 1286 * the dl_xxx_hw address. 1287 * 1288 * The squery hw address should *not* be accessed. 1289 * 1290 * See ar_entry_squery() in arp.c for an example of how all this works. 1291 */ 1292 1293 mp->b_cont = mp_tail; 1294 return (mp); 1295 } 1296 1297 /* 1298 * Create a dlpi message with room for phys+sap. When we come back in 1299 * ip_wput_ctl() we will strip the sap for those primitives which 1300 * only need a physical address. 1301 */ 1302 static mblk_t * 1303 ill_create_dl(ill_t *ill, uint32_t dl_primitive, uint32_t length, 1304 uint32_t *addr_lenp, uint32_t *addr_offp) 1305 { 1306 mblk_t *mp; 1307 uint32_t hw_addr_length; 1308 char *cp; 1309 uint32_t offset; 1310 uint32_t size; 1311 1312 *addr_lenp = *addr_offp = 0; 1313 1314 hw_addr_length = ill->ill_phys_addr_length; 1315 if (!hw_addr_length) { 1316 ip0dbg(("ip_create_dl: hw addr length = 0\n")); 1317 return (NULL); 1318 } 1319 1320 size = length; 1321 switch (dl_primitive) { 1322 case DL_ENABMULTI_REQ: 1323 case DL_DISABMULTI_REQ: 1324 size += hw_addr_length; 1325 break; 1326 case DL_PROMISCON_REQ: 1327 case DL_PROMISCOFF_REQ: 1328 break; 1329 default: 1330 return (NULL); 1331 } 1332 mp = allocb(size, BPRI_HI); 1333 if (!mp) 1334 return (NULL); 1335 mp->b_wptr += size; 1336 mp->b_datap->db_type = M_PROTO; 1337 1338 cp = (char *)mp->b_rptr; 1339 offset = length; 1340 1341 switch (dl_primitive) { 1342 case DL_ENABMULTI_REQ: { 1343 dl_enabmulti_req_t *dl = (dl_enabmulti_req_t *)cp; 1344 1345 dl->dl_primitive = dl_primitive; 1346 dl->dl_addr_offset = offset; 1347 *addr_lenp = dl->dl_addr_length = hw_addr_length; 1348 *addr_offp = offset; 1349 break; 1350 } 1351 case DL_DISABMULTI_REQ: { 1352 dl_disabmulti_req_t *dl = (dl_disabmulti_req_t *)cp; 1353 1354 dl->dl_primitive = dl_primitive; 1355 dl->dl_addr_offset = offset; 1356 *addr_lenp = dl->dl_addr_length = hw_addr_length; 1357 *addr_offp = offset; 1358 break; 1359 } 1360 case DL_PROMISCON_REQ: 1361 case DL_PROMISCOFF_REQ: { 1362 dl_promiscon_req_t *dl = (dl_promiscon_req_t *)cp; 1363 1364 dl->dl_primitive = dl_primitive; 1365 dl->dl_level = DL_PROMISC_MULTI; 1366 break; 1367 } 1368 } 1369 ip1dbg(("ill_create_dl: addr_len %d, addr_off %d\n", 1370 *addr_lenp, *addr_offp)); 1371 return (mp); 1372 } 1373 1374 void 1375 ip_wput_ctl(queue_t *q, mblk_t *mp_orig) 1376 { 1377 ill_t *ill = (ill_t *)q->q_ptr; 1378 mblk_t *mp = mp_orig; 1379 area_t *area = (area_t *)mp->b_rptr; 1380 1381 /* Check that we have a AR_ENTRY_SQUERY with a tacked on mblk */ 1382 if (MBLKL(mp) < sizeof (area_t) || mp->b_cont == NULL || 1383 area->area_cmd != AR_ENTRY_SQUERY) { 1384 putnext(q, mp); 1385 return; 1386 } 1387 mp = mp->b_cont; 1388 1389 /* 1390 * Update dl_addr_length and dl_addr_offset for primitives that 1391 * have physical addresses as opposed to full saps 1392 */ 1393 switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) { 1394 case DL_ENABMULTI_REQ: 1395 /* Track the state if this is the first enabmulti */ 1396 if (ill->ill_dlpi_multicast_state == IDS_UNKNOWN) 1397 ill->ill_dlpi_multicast_state = IDS_INPROGRESS; 1398 ip1dbg(("ip_wput_ctl: ENABMULTI\n")); 1399 break; 1400 case DL_DISABMULTI_REQ: 1401 ip1dbg(("ip_wput_ctl: DISABMULTI\n")); 1402 break; 1403 default: 1404 ip1dbg(("ip_wput_ctl: default\n")); 1405 break; 1406 } 1407 freeb(mp_orig); 1408 ill_dlpi_send(ill, mp); 1409 } 1410 1411 /* 1412 * Rejoin any groups which have been explicitly joined by the application (we 1413 * left all explicitly joined groups as part of ill_leave_multicast() prior to 1414 * bringing the interface down). Note that because groups can be joined and 1415 * left while an interface is down, this may not be the same set of groups 1416 * that we left in ill_leave_multicast(). 1417 */ 1418 void 1419 ill_recover_multicast(ill_t *ill) 1420 { 1421 ilm_t *ilm; 1422 char addrbuf[INET6_ADDRSTRLEN]; 1423 1424 ASSERT(IAM_WRITER_ILL(ill)); 1425 1426 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1427 /* 1428 * Check how many ipif's that have members in this group - 1429 * if more then one we make sure that this entry is first 1430 * in the list. 1431 */ 1432 if (ilm_numentries_v6(ill, &ilm->ilm_v6addr) > 1 && 1433 ilm_lookup_ill_v6(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm) 1434 continue; 1435 ip1dbg(("ill_recover_multicast: %s\n", 1436 inet_ntop(AF_INET6, &ilm->ilm_v6addr, addrbuf, 1437 sizeof (addrbuf)))); 1438 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) { 1439 if (ill->ill_group == NULL) { 1440 (void) ip_join_allmulti(ill->ill_ipif); 1441 } else { 1442 /* 1443 * We don't want to join on this ill, 1444 * if somebody else in the group has 1445 * already been nominated. 1446 */ 1447 (void) ill_nominate_mcast_rcv(ill->ill_group); 1448 } 1449 } else { 1450 (void) ip_ll_addmulti_v6(ill->ill_ipif, 1451 &ilm->ilm_v6addr); 1452 } 1453 } 1454 } 1455 1456 /* 1457 * The opposite of ill_recover_multicast() -- leaves all multicast groups 1458 * that were explicitly joined. Note that both these functions could be 1459 * disposed of if we enhanced ARP to allow us to handle DL_DISABMULTI_REQ 1460 * and DL_ENABMULTI_REQ messages when an interface is down. 1461 */ 1462 void 1463 ill_leave_multicast(ill_t *ill) 1464 { 1465 ilm_t *ilm; 1466 char addrbuf[INET6_ADDRSTRLEN]; 1467 1468 ASSERT(IAM_WRITER_ILL(ill)); 1469 1470 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1471 /* 1472 * Check how many ipif's that have members in this group - 1473 * if more then one we make sure that this entry is first 1474 * in the list. 1475 */ 1476 if (ilm_numentries_v6(ill, &ilm->ilm_v6addr) > 1 && 1477 ilm_lookup_ill_v6(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm) 1478 continue; 1479 ip1dbg(("ill_leave_multicast: %s\n", 1480 inet_ntop(AF_INET6, &ilm->ilm_v6addr, addrbuf, 1481 sizeof (addrbuf)))); 1482 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) { 1483 (void) ip_leave_allmulti(ill->ill_ipif); 1484 /* 1485 * If we were part of an IPMP group, then 1486 * ill_handoff_responsibility() has already 1487 * nominated a new member (so we don't). 1488 */ 1489 ASSERT(ill->ill_group == NULL); 1490 } else { 1491 (void) ip_ll_delmulti_v6(ill->ill_ipif, 1492 &ilm->ilm_v6addr); 1493 } 1494 } 1495 } 1496 1497 /* Find an ilm for matching the ill */ 1498 ilm_t * 1499 ilm_lookup_ill(ill_t *ill, ipaddr_t group, zoneid_t zoneid) 1500 { 1501 in6_addr_t v6group; 1502 1503 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) || 1504 IAM_WRITER_ILL(ill)); 1505 /* 1506 * INADDR_ANY is represented as the IPv6 unspecifed addr. 1507 */ 1508 if (group == INADDR_ANY) 1509 v6group = ipv6_all_zeros; 1510 else 1511 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 1512 1513 return (ilm_lookup_ill_v6(ill, &v6group, zoneid)); 1514 } 1515 1516 /* 1517 * Find an ilm for matching the ill. All the ilm lookup functions 1518 * ignore ILM_DELETED ilms. These have been logically deleted, and 1519 * igmp and linklayer disable multicast have been done. Only mi_free 1520 * yet to be done. Still there in the list due to ilm_walkers. The 1521 * last walker will release it. 1522 */ 1523 ilm_t * 1524 ilm_lookup_ill_v6(ill_t *ill, const in6_addr_t *v6group, zoneid_t zoneid) 1525 { 1526 ilm_t *ilm; 1527 1528 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) || 1529 IAM_WRITER_ILL(ill)); 1530 1531 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1532 if (ilm->ilm_flags & ILM_DELETED) 1533 continue; 1534 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) && 1535 (zoneid == ALL_ZONES || zoneid == ilm->ilm_zoneid)) 1536 return (ilm); 1537 } 1538 return (NULL); 1539 } 1540 1541 ilm_t * 1542 ilm_lookup_ill_index_v6(ill_t *ill, const in6_addr_t *v6group, int index, 1543 zoneid_t zoneid) 1544 { 1545 ilm_t *ilm; 1546 1547 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) || 1548 IAM_WRITER_ILL(ill)); 1549 1550 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1551 if (ilm->ilm_flags & ILM_DELETED) 1552 continue; 1553 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) && 1554 (zoneid == ALL_ZONES || zoneid == ilm->ilm_zoneid) && 1555 ilm->ilm_orig_ifindex == index) { 1556 return (ilm); 1557 } 1558 } 1559 return (NULL); 1560 } 1561 1562 ilm_t * 1563 ilm_lookup_ill_index_v4(ill_t *ill, ipaddr_t group, int index, zoneid_t zoneid) 1564 { 1565 in6_addr_t v6group; 1566 1567 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) || 1568 IAM_WRITER_ILL(ill)); 1569 /* 1570 * INADDR_ANY is represented as the IPv6 unspecifed addr. 1571 */ 1572 if (group == INADDR_ANY) 1573 v6group = ipv6_all_zeros; 1574 else 1575 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 1576 1577 return (ilm_lookup_ill_index_v6(ill, &v6group, index, zoneid)); 1578 } 1579 1580 /* 1581 * Found an ilm for the ipif. Only needed for IPv4 which does 1582 * ipif specific socket options. 1583 */ 1584 ilm_t * 1585 ilm_lookup_ipif(ipif_t *ipif, ipaddr_t group) 1586 { 1587 ill_t *ill = ipif->ipif_ill; 1588 ilm_t *ilm; 1589 in6_addr_t v6group; 1590 1591 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) || 1592 IAM_WRITER_ILL(ill)); 1593 1594 /* 1595 * INADDR_ANY is represented as the IPv6 unspecifed addr. 1596 */ 1597 if (group == INADDR_ANY) 1598 v6group = ipv6_all_zeros; 1599 else 1600 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 1601 1602 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1603 if (ilm->ilm_flags & ILM_DELETED) 1604 continue; 1605 if (ilm->ilm_ipif == ipif && 1606 IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, &v6group)) 1607 return (ilm); 1608 } 1609 return (NULL); 1610 } 1611 1612 /* 1613 * How many members on this ill? 1614 */ 1615 int 1616 ilm_numentries_v6(ill_t *ill, const in6_addr_t *v6group) 1617 { 1618 ilm_t *ilm; 1619 int i = 0; 1620 1621 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) || 1622 IAM_WRITER_ILL(ill)); 1623 1624 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1625 if (ilm->ilm_flags & ILM_DELETED) 1626 continue; 1627 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group)) { 1628 i++; 1629 } 1630 } 1631 return (i); 1632 } 1633 1634 /* Caller guarantees that the group is not already on the list */ 1635 static ilm_t * 1636 ilm_add_v6(ipif_t *ipif, const in6_addr_t *v6group, ilg_stat_t ilgstat, 1637 mcast_record_t ilg_fmode, slist_t *ilg_flist, int orig_ifindex, 1638 zoneid_t zoneid) 1639 { 1640 ill_t *ill = ipif->ipif_ill; 1641 ilm_t *ilm; 1642 ilm_t *ilm_cur; 1643 ilm_t **ilm_ptpn; 1644 1645 ASSERT(IAM_WRITER_IPIF(ipif)); 1646 1647 ilm = GETSTRUCT(ilm_t, 1); 1648 if (ilm == NULL) 1649 return (NULL); 1650 if (ilgstat != ILGSTAT_NONE && !SLIST_IS_EMPTY(ilg_flist)) { 1651 ilm->ilm_filter = l_alloc(); 1652 if (ilm->ilm_filter == NULL) { 1653 mi_free(ilm); 1654 return (NULL); 1655 } 1656 } 1657 ilm->ilm_v6addr = *v6group; 1658 ilm->ilm_refcnt = 1; 1659 ilm->ilm_zoneid = zoneid; 1660 ilm->ilm_timer = INFINITY; 1661 ilm->ilm_rtx.rtx_timer = INFINITY; 1662 1663 /* 1664 * IPv4 Multicast groups are joined using ipif. 1665 * IPv6 Multicast groups are joined using ill. 1666 */ 1667 if (ill->ill_isv6) { 1668 ilm->ilm_ill = ill; 1669 ilm->ilm_ipif = NULL; 1670 } else { 1671 ASSERT(ilm->ilm_zoneid == ipif->ipif_zoneid); 1672 ilm->ilm_ipif = ipif; 1673 ilm->ilm_ill = NULL; 1674 } 1675 ASSERT(ill->ill_ipst); 1676 ilm->ilm_ipst = ill->ill_ipst; /* No netstack_hold */ 1677 1678 /* 1679 * After this if ilm moves to a new ill, we don't change 1680 * the ilm_orig_ifindex. Thus, if ill_index != ilm_orig_ifindex, 1681 * it has been moved. Indexes don't match even when the application 1682 * wants to join on a FAILED/INACTIVE interface because we choose 1683 * a new interface to join in. This is considered as an implicit 1684 * move. 1685 */ 1686 ilm->ilm_orig_ifindex = orig_ifindex; 1687 1688 ASSERT(!(ipif->ipif_state_flags & IPIF_CONDEMNED)); 1689 ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED)); 1690 1691 /* 1692 * Grab lock to give consistent view to readers 1693 */ 1694 mutex_enter(&ill->ill_lock); 1695 /* 1696 * All ilms in the same zone are contiguous in the ill_ilm list. 1697 * The loops in ip_proto_input() and ip_wput_local() use this to avoid 1698 * sending duplicates up when two applications in the same zone join the 1699 * same group on different logical interfaces. 1700 */ 1701 ilm_cur = ill->ill_ilm; 1702 ilm_ptpn = &ill->ill_ilm; 1703 while (ilm_cur != NULL && ilm_cur->ilm_zoneid != ilm->ilm_zoneid) { 1704 ilm_ptpn = &ilm_cur->ilm_next; 1705 ilm_cur = ilm_cur->ilm_next; 1706 } 1707 ilm->ilm_next = ilm_cur; 1708 *ilm_ptpn = ilm; 1709 1710 /* 1711 * If we have an associated ilg, use its filter state; if not, 1712 * default to (EXCLUDE, NULL) and set no_ilg_cnt to track this. 1713 */ 1714 if (ilgstat != ILGSTAT_NONE) { 1715 if (!SLIST_IS_EMPTY(ilg_flist)) 1716 l_copy(ilg_flist, ilm->ilm_filter); 1717 ilm->ilm_fmode = ilg_fmode; 1718 } else { 1719 ilm->ilm_no_ilg_cnt = 1; 1720 ilm->ilm_fmode = MODE_IS_EXCLUDE; 1721 } 1722 1723 mutex_exit(&ill->ill_lock); 1724 return (ilm); 1725 } 1726 1727 void 1728 ilm_walker_cleanup(ill_t *ill) 1729 { 1730 ilm_t **ilmp; 1731 ilm_t *ilm; 1732 1733 ASSERT(MUTEX_HELD(&ill->ill_lock)); 1734 ASSERT(ill->ill_ilm_walker_cnt == 0); 1735 1736 ilmp = &ill->ill_ilm; 1737 while (*ilmp != NULL) { 1738 if ((*ilmp)->ilm_flags & ILM_DELETED) { 1739 ilm = *ilmp; 1740 *ilmp = ilm->ilm_next; 1741 FREE_SLIST(ilm->ilm_filter); 1742 FREE_SLIST(ilm->ilm_pendsrcs); 1743 FREE_SLIST(ilm->ilm_rtx.rtx_allow); 1744 FREE_SLIST(ilm->ilm_rtx.rtx_block); 1745 ilm->ilm_ipst = NULL; 1746 mi_free((char *)ilm); 1747 } else { 1748 ilmp = &(*ilmp)->ilm_next; 1749 } 1750 } 1751 ill->ill_ilm_cleanup_reqd = 0; 1752 } 1753 1754 /* 1755 * Unlink ilm and free it. 1756 */ 1757 static void 1758 ilm_delete(ilm_t *ilm) 1759 { 1760 ill_t *ill; 1761 ilm_t **ilmp; 1762 1763 if (ilm->ilm_ipif != NULL) { 1764 ASSERT(IAM_WRITER_IPIF(ilm->ilm_ipif)); 1765 ASSERT(ilm->ilm_ill == NULL); 1766 ill = ilm->ilm_ipif->ipif_ill; 1767 ASSERT(!ill->ill_isv6); 1768 } else { 1769 ASSERT(IAM_WRITER_ILL(ilm->ilm_ill)); 1770 ASSERT(ilm->ilm_ipif == NULL); 1771 ill = ilm->ilm_ill; 1772 ASSERT(ill->ill_isv6); 1773 } 1774 /* 1775 * Delete under lock protection so that readers don't stumble 1776 * on bad ilm_next 1777 */ 1778 mutex_enter(&ill->ill_lock); 1779 if (ill->ill_ilm_walker_cnt != 0) { 1780 ilm->ilm_flags |= ILM_DELETED; 1781 ill->ill_ilm_cleanup_reqd = 1; 1782 mutex_exit(&ill->ill_lock); 1783 return; 1784 } 1785 1786 for (ilmp = &ill->ill_ilm; *ilmp != ilm; ilmp = &(*ilmp)->ilm_next) 1787 ; 1788 *ilmp = ilm->ilm_next; 1789 mutex_exit(&ill->ill_lock); 1790 1791 FREE_SLIST(ilm->ilm_filter); 1792 FREE_SLIST(ilm->ilm_pendsrcs); 1793 FREE_SLIST(ilm->ilm_rtx.rtx_allow); 1794 FREE_SLIST(ilm->ilm_rtx.rtx_block); 1795 ilm->ilm_ipst = NULL; 1796 mi_free((char *)ilm); 1797 } 1798 1799 /* Free all ilms for this ipif */ 1800 void 1801 ilm_free(ipif_t *ipif) 1802 { 1803 ill_t *ill = ipif->ipif_ill; 1804 ilm_t *ilm; 1805 ilm_t *next_ilm; 1806 1807 ASSERT(IAM_WRITER_IPIF(ipif)); 1808 1809 for (ilm = ill->ill_ilm; ilm; ilm = next_ilm) { 1810 next_ilm = ilm->ilm_next; 1811 if (ilm->ilm_ipif == ipif) 1812 ilm_delete(ilm); 1813 } 1814 } 1815 1816 /* 1817 * Looks up the appropriate ipif given a v4 multicast group and interface 1818 * address. On success, returns 0, with *ipifpp pointing to the found 1819 * struct. On failure, returns an errno and *ipifpp is NULL. 1820 */ 1821 int 1822 ip_opt_check(conn_t *connp, ipaddr_t group, ipaddr_t src, ipaddr_t ifaddr, 1823 uint_t *ifindexp, mblk_t *first_mp, ipsq_func_t func, ipif_t **ipifpp) 1824 { 1825 ipif_t *ipif; 1826 int err = 0; 1827 zoneid_t zoneid; 1828 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1829 1830 if (!CLASSD(group) || CLASSD(src)) { 1831 return (EINVAL); 1832 } 1833 *ipifpp = NULL; 1834 1835 zoneid = IPCL_ZONEID(connp); 1836 1837 ASSERT(!(ifaddr != INADDR_ANY && ifindexp != NULL && *ifindexp != 0)); 1838 if (ifaddr != INADDR_ANY) { 1839 ipif = ipif_lookup_addr(ifaddr, NULL, zoneid, 1840 CONNP_TO_WQ(connp), first_mp, func, &err, ipst); 1841 if (err != 0 && err != EINPROGRESS) 1842 err = EADDRNOTAVAIL; 1843 } else if (ifindexp != NULL && *ifindexp != 0) { 1844 ipif = ipif_lookup_on_ifindex(*ifindexp, B_FALSE, zoneid, 1845 CONNP_TO_WQ(connp), first_mp, func, &err, ipst); 1846 } else { 1847 ipif = ipif_lookup_group(group, zoneid, ipst); 1848 if (ipif == NULL) 1849 return (EADDRNOTAVAIL); 1850 } 1851 if (ipif == NULL) 1852 return (err); 1853 1854 *ipifpp = ipif; 1855 return (0); 1856 } 1857 1858 /* 1859 * Looks up the appropriate ill (or ipif if v4mapped) given an interface 1860 * index and IPv6 multicast group. On success, returns 0, with *illpp (or 1861 * *ipifpp if v4mapped) pointing to the found struct. On failure, returns 1862 * an errno and *illpp and *ipifpp are undefined. 1863 */ 1864 int 1865 ip_opt_check_v6(conn_t *connp, const in6_addr_t *v6group, ipaddr_t *v4group, 1866 const in6_addr_t *v6src, ipaddr_t *v4src, boolean_t *isv6, int ifindex, 1867 mblk_t *first_mp, ipsq_func_t func, ill_t **illpp, ipif_t **ipifpp) 1868 { 1869 boolean_t src_unspec; 1870 ill_t *ill = NULL; 1871 ipif_t *ipif = NULL; 1872 int err; 1873 zoneid_t zoneid = connp->conn_zoneid; 1874 queue_t *wq = CONNP_TO_WQ(connp); 1875 ip_stack_t *ipst = connp->conn_netstack->netstack_ip; 1876 1877 src_unspec = IN6_IS_ADDR_UNSPECIFIED(v6src); 1878 1879 if (IN6_IS_ADDR_V4MAPPED(v6group)) { 1880 if (!IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec) 1881 return (EINVAL); 1882 IN6_V4MAPPED_TO_IPADDR(v6group, *v4group); 1883 if (src_unspec) { 1884 *v4src = INADDR_ANY; 1885 } else { 1886 IN6_V4MAPPED_TO_IPADDR(v6src, *v4src); 1887 } 1888 if (!CLASSD(*v4group) || CLASSD(*v4src)) 1889 return (EINVAL); 1890 *ipifpp = NULL; 1891 *isv6 = B_FALSE; 1892 } else { 1893 if (IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec) 1894 return (EINVAL); 1895 if (!IN6_IS_ADDR_MULTICAST(v6group) || 1896 IN6_IS_ADDR_MULTICAST(v6src)) { 1897 return (EINVAL); 1898 } 1899 *illpp = NULL; 1900 *isv6 = B_TRUE; 1901 } 1902 1903 if (ifindex == 0) { 1904 if (*isv6) 1905 ill = ill_lookup_group_v6(v6group, zoneid, ipst); 1906 else 1907 ipif = ipif_lookup_group(*v4group, zoneid, ipst); 1908 if (ill == NULL && ipif == NULL) 1909 return (EADDRNOTAVAIL); 1910 } else { 1911 if (*isv6) { 1912 ill = ill_lookup_on_ifindex(ifindex, B_TRUE, 1913 wq, first_mp, func, &err, ipst); 1914 if (ill != NULL && 1915 !ipif_lookup_zoneid(ill, zoneid, 0, NULL)) { 1916 ill_refrele(ill); 1917 ill = NULL; 1918 err = EADDRNOTAVAIL; 1919 } 1920 } else { 1921 ipif = ipif_lookup_on_ifindex(ifindex, B_FALSE, 1922 zoneid, wq, first_mp, func, &err, ipst); 1923 } 1924 if (ill == NULL && ipif == NULL) 1925 return (err); 1926 } 1927 1928 *ipifpp = ipif; 1929 *illpp = ill; 1930 return (0); 1931 } 1932 1933 static int 1934 ip_get_srcfilter(conn_t *connp, struct group_filter *gf, 1935 struct ip_msfilter *imsf, ipaddr_t grp, ipif_t *ipif, boolean_t isv4mapped) 1936 { 1937 ilg_t *ilg; 1938 int i, numsrc, fmode, outsrcs; 1939 struct sockaddr_in *sin; 1940 struct sockaddr_in6 *sin6; 1941 struct in_addr *addrp; 1942 slist_t *fp; 1943 boolean_t is_v4only_api; 1944 1945 mutex_enter(&connp->conn_lock); 1946 1947 ilg = ilg_lookup_ipif(connp, grp, ipif); 1948 if (ilg == NULL) { 1949 mutex_exit(&connp->conn_lock); 1950 return (EADDRNOTAVAIL); 1951 } 1952 1953 if (gf == NULL) { 1954 ASSERT(imsf != NULL); 1955 ASSERT(!isv4mapped); 1956 is_v4only_api = B_TRUE; 1957 outsrcs = imsf->imsf_numsrc; 1958 } else { 1959 ASSERT(imsf == NULL); 1960 is_v4only_api = B_FALSE; 1961 outsrcs = gf->gf_numsrc; 1962 } 1963 1964 /* 1965 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 1966 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 1967 * So we need to translate here. 1968 */ 1969 fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ? 1970 MCAST_INCLUDE : MCAST_EXCLUDE; 1971 if ((fp = ilg->ilg_filter) == NULL) { 1972 numsrc = 0; 1973 } else { 1974 for (i = 0; i < outsrcs; i++) { 1975 if (i == fp->sl_numsrc) 1976 break; 1977 if (isv4mapped) { 1978 sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i]; 1979 sin6->sin6_family = AF_INET6; 1980 sin6->sin6_addr = fp->sl_addr[i]; 1981 } else { 1982 if (is_v4only_api) { 1983 addrp = &imsf->imsf_slist[i]; 1984 } else { 1985 sin = (struct sockaddr_in *) 1986 &gf->gf_slist[i]; 1987 sin->sin_family = AF_INET; 1988 addrp = &sin->sin_addr; 1989 } 1990 IN6_V4MAPPED_TO_INADDR(&fp->sl_addr[i], addrp); 1991 } 1992 } 1993 numsrc = fp->sl_numsrc; 1994 } 1995 1996 if (is_v4only_api) { 1997 imsf->imsf_numsrc = numsrc; 1998 imsf->imsf_fmode = fmode; 1999 } else { 2000 gf->gf_numsrc = numsrc; 2001 gf->gf_fmode = fmode; 2002 } 2003 2004 mutex_exit(&connp->conn_lock); 2005 2006 return (0); 2007 } 2008 2009 static int 2010 ip_get_srcfilter_v6(conn_t *connp, struct group_filter *gf, 2011 const struct in6_addr *grp, ill_t *ill) 2012 { 2013 ilg_t *ilg; 2014 int i; 2015 struct sockaddr_storage *sl; 2016 struct sockaddr_in6 *sin6; 2017 slist_t *fp; 2018 2019 mutex_enter(&connp->conn_lock); 2020 2021 ilg = ilg_lookup_ill_v6(connp, grp, ill); 2022 if (ilg == NULL) { 2023 mutex_exit(&connp->conn_lock); 2024 return (EADDRNOTAVAIL); 2025 } 2026 2027 /* 2028 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 2029 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 2030 * So we need to translate here. 2031 */ 2032 gf->gf_fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ? 2033 MCAST_INCLUDE : MCAST_EXCLUDE; 2034 if ((fp = ilg->ilg_filter) == NULL) { 2035 gf->gf_numsrc = 0; 2036 } else { 2037 for (i = 0, sl = gf->gf_slist; i < gf->gf_numsrc; i++, sl++) { 2038 if (i == fp->sl_numsrc) 2039 break; 2040 sin6 = (struct sockaddr_in6 *)sl; 2041 sin6->sin6_family = AF_INET6; 2042 sin6->sin6_addr = fp->sl_addr[i]; 2043 } 2044 gf->gf_numsrc = fp->sl_numsrc; 2045 } 2046 2047 mutex_exit(&connp->conn_lock); 2048 2049 return (0); 2050 } 2051 2052 static int 2053 ip_set_srcfilter(conn_t *connp, struct group_filter *gf, 2054 struct ip_msfilter *imsf, ipaddr_t grp, ipif_t *ipif, boolean_t isv4mapped) 2055 { 2056 ilg_t *ilg; 2057 int i, err, insrcs, infmode, new_fmode; 2058 struct sockaddr_in *sin; 2059 struct sockaddr_in6 *sin6; 2060 struct in_addr *addrp; 2061 slist_t *orig_filter = NULL; 2062 slist_t *new_filter = NULL; 2063 mcast_record_t orig_fmode; 2064 boolean_t leave_grp, is_v4only_api; 2065 ilg_stat_t ilgstat; 2066 2067 if (gf == NULL) { 2068 ASSERT(imsf != NULL); 2069 ASSERT(!isv4mapped); 2070 is_v4only_api = B_TRUE; 2071 insrcs = imsf->imsf_numsrc; 2072 infmode = imsf->imsf_fmode; 2073 } else { 2074 ASSERT(imsf == NULL); 2075 is_v4only_api = B_FALSE; 2076 insrcs = gf->gf_numsrc; 2077 infmode = gf->gf_fmode; 2078 } 2079 2080 /* Make sure we can handle the source list */ 2081 if (insrcs > MAX_FILTER_SIZE) 2082 return (ENOBUFS); 2083 2084 /* 2085 * setting the filter to (INCLUDE, NULL) is treated 2086 * as a request to leave the group. 2087 */ 2088 leave_grp = (infmode == MCAST_INCLUDE && insrcs == 0); 2089 2090 ASSERT(IAM_WRITER_IPIF(ipif)); 2091 2092 mutex_enter(&connp->conn_lock); 2093 2094 ilg = ilg_lookup_ipif(connp, grp, ipif); 2095 if (ilg == NULL) { 2096 /* 2097 * if the request was actually to leave, and we 2098 * didn't find an ilg, there's nothing to do. 2099 */ 2100 if (!leave_grp) 2101 ilg = conn_ilg_alloc(connp); 2102 if (leave_grp || ilg == NULL) { 2103 mutex_exit(&connp->conn_lock); 2104 return (leave_grp ? 0 : ENOMEM); 2105 } 2106 ilgstat = ILGSTAT_NEW; 2107 IN6_IPADDR_TO_V4MAPPED(grp, &ilg->ilg_v6group); 2108 ilg->ilg_ipif = ipif; 2109 ilg->ilg_ill = NULL; 2110 ilg->ilg_orig_ifindex = 0; 2111 } else if (leave_grp) { 2112 ilg_delete(connp, ilg, NULL); 2113 mutex_exit(&connp->conn_lock); 2114 (void) ip_delmulti(grp, ipif, B_FALSE, B_TRUE); 2115 return (0); 2116 } else { 2117 ilgstat = ILGSTAT_CHANGE; 2118 /* Preserve existing state in case ip_addmulti() fails */ 2119 orig_fmode = ilg->ilg_fmode; 2120 if (ilg->ilg_filter == NULL) { 2121 orig_filter = NULL; 2122 } else { 2123 orig_filter = l_alloc_copy(ilg->ilg_filter); 2124 if (orig_filter == NULL) { 2125 mutex_exit(&connp->conn_lock); 2126 return (ENOMEM); 2127 } 2128 } 2129 } 2130 2131 /* 2132 * Alloc buffer to copy new state into (see below) before 2133 * we make any changes, so we can bail if it fails. 2134 */ 2135 if ((new_filter = l_alloc()) == NULL) { 2136 mutex_exit(&connp->conn_lock); 2137 err = ENOMEM; 2138 goto free_and_exit; 2139 } 2140 2141 if (insrcs == 0) { 2142 CLEAR_SLIST(ilg->ilg_filter); 2143 } else { 2144 slist_t *fp; 2145 if (ilg->ilg_filter == NULL) { 2146 fp = l_alloc(); 2147 if (fp == NULL) { 2148 if (ilgstat == ILGSTAT_NEW) 2149 ilg_delete(connp, ilg, NULL); 2150 mutex_exit(&connp->conn_lock); 2151 err = ENOMEM; 2152 goto free_and_exit; 2153 } 2154 } else { 2155 fp = ilg->ilg_filter; 2156 } 2157 for (i = 0; i < insrcs; i++) { 2158 if (isv4mapped) { 2159 sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i]; 2160 fp->sl_addr[i] = sin6->sin6_addr; 2161 } else { 2162 if (is_v4only_api) { 2163 addrp = &imsf->imsf_slist[i]; 2164 } else { 2165 sin = (struct sockaddr_in *) 2166 &gf->gf_slist[i]; 2167 addrp = &sin->sin_addr; 2168 } 2169 IN6_INADDR_TO_V4MAPPED(addrp, &fp->sl_addr[i]); 2170 } 2171 } 2172 fp->sl_numsrc = insrcs; 2173 ilg->ilg_filter = fp; 2174 } 2175 /* 2176 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 2177 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 2178 * So we need to translate here. 2179 */ 2180 ilg->ilg_fmode = (infmode == MCAST_INCLUDE) ? 2181 MODE_IS_INCLUDE : MODE_IS_EXCLUDE; 2182 2183 /* 2184 * Save copy of ilg's filter state to pass to other functions, 2185 * so we can release conn_lock now. 2186 */ 2187 new_fmode = ilg->ilg_fmode; 2188 l_copy(ilg->ilg_filter, new_filter); 2189 2190 mutex_exit(&connp->conn_lock); 2191 2192 err = ip_addmulti(grp, ipif, ilgstat, new_fmode, new_filter); 2193 if (err != 0) { 2194 /* 2195 * Restore the original filter state, or delete the 2196 * newly-created ilg. We need to look up the ilg 2197 * again, though, since we've not been holding the 2198 * conn_lock. 2199 */ 2200 mutex_enter(&connp->conn_lock); 2201 ilg = ilg_lookup_ipif(connp, grp, ipif); 2202 ASSERT(ilg != NULL); 2203 if (ilgstat == ILGSTAT_NEW) { 2204 ilg_delete(connp, ilg, NULL); 2205 } else { 2206 ilg->ilg_fmode = orig_fmode; 2207 if (SLIST_IS_EMPTY(orig_filter)) { 2208 CLEAR_SLIST(ilg->ilg_filter); 2209 } else { 2210 /* 2211 * We didn't free the filter, even if we 2212 * were trying to make the source list empty; 2213 * so if orig_filter isn't empty, the ilg 2214 * must still have a filter alloc'd. 2215 */ 2216 l_copy(orig_filter, ilg->ilg_filter); 2217 } 2218 } 2219 mutex_exit(&connp->conn_lock); 2220 } 2221 2222 free_and_exit: 2223 l_free(orig_filter); 2224 l_free(new_filter); 2225 2226 return (err); 2227 } 2228 2229 static int 2230 ip_set_srcfilter_v6(conn_t *connp, struct group_filter *gf, 2231 const struct in6_addr *grp, ill_t *ill) 2232 { 2233 ilg_t *ilg; 2234 int i, orig_ifindex, orig_fmode, new_fmode, err; 2235 slist_t *orig_filter = NULL; 2236 slist_t *new_filter = NULL; 2237 struct sockaddr_storage *sl; 2238 struct sockaddr_in6 *sin6; 2239 boolean_t leave_grp; 2240 ilg_stat_t ilgstat; 2241 2242 /* Make sure we can handle the source list */ 2243 if (gf->gf_numsrc > MAX_FILTER_SIZE) 2244 return (ENOBUFS); 2245 2246 /* 2247 * setting the filter to (INCLUDE, NULL) is treated 2248 * as a request to leave the group. 2249 */ 2250 leave_grp = (gf->gf_fmode == MCAST_INCLUDE && gf->gf_numsrc == 0); 2251 2252 ASSERT(IAM_WRITER_ILL(ill)); 2253 2254 /* 2255 * Use the ifindex to do the lookup. We can't use the ill 2256 * directly because ilg_ill could point to a different ill 2257 * if things have moved. 2258 */ 2259 orig_ifindex = ill->ill_phyint->phyint_ifindex; 2260 2261 mutex_enter(&connp->conn_lock); 2262 ilg = ilg_lookup_ill_index_v6(connp, grp, orig_ifindex); 2263 if (ilg == NULL) { 2264 /* 2265 * if the request was actually to leave, and we 2266 * didn't find an ilg, there's nothing to do. 2267 */ 2268 if (!leave_grp) 2269 ilg = conn_ilg_alloc(connp); 2270 if (leave_grp || ilg == NULL) { 2271 mutex_exit(&connp->conn_lock); 2272 return (leave_grp ? 0 : ENOMEM); 2273 } 2274 ilgstat = ILGSTAT_NEW; 2275 ilg->ilg_v6group = *grp; 2276 ilg->ilg_ipif = NULL; 2277 /* 2278 * Choose our target ill to join on. This might be 2279 * different from the ill we've been given if it's 2280 * currently down and part of a group. 2281 * 2282 * new ill is not refheld; we are writer. 2283 */ 2284 ill = ip_choose_multi_ill(ill, grp); 2285 ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED)); 2286 ilg->ilg_ill = ill; 2287 /* 2288 * Remember the index that we joined on, so that we can 2289 * successfully delete them later on and also search for 2290 * duplicates if the application wants to join again. 2291 */ 2292 ilg->ilg_orig_ifindex = orig_ifindex; 2293 } else if (leave_grp) { 2294 /* 2295 * Use the ilg's current ill for the deletion, 2296 * we might have failed over. 2297 */ 2298 ill = ilg->ilg_ill; 2299 ilg_delete(connp, ilg, NULL); 2300 mutex_exit(&connp->conn_lock); 2301 (void) ip_delmulti_v6(grp, ill, orig_ifindex, 2302 connp->conn_zoneid, B_FALSE, B_TRUE); 2303 return (0); 2304 } else { 2305 ilgstat = ILGSTAT_CHANGE; 2306 /* 2307 * The current ill might be different from the one we were 2308 * asked to join on (if failover has occurred); we should 2309 * join on the ill stored in the ilg. The original ill 2310 * is noted in ilg_orig_ifindex, which matched our request. 2311 */ 2312 ill = ilg->ilg_ill; 2313 /* preserve existing state in case ip_addmulti() fails */ 2314 orig_fmode = ilg->ilg_fmode; 2315 if (ilg->ilg_filter == NULL) { 2316 orig_filter = NULL; 2317 } else { 2318 orig_filter = l_alloc_copy(ilg->ilg_filter); 2319 if (orig_filter == NULL) { 2320 mutex_exit(&connp->conn_lock); 2321 return (ENOMEM); 2322 } 2323 } 2324 } 2325 2326 /* 2327 * Alloc buffer to copy new state into (see below) before 2328 * we make any changes, so we can bail if it fails. 2329 */ 2330 if ((new_filter = l_alloc()) == NULL) { 2331 mutex_exit(&connp->conn_lock); 2332 err = ENOMEM; 2333 goto free_and_exit; 2334 } 2335 2336 if (gf->gf_numsrc == 0) { 2337 CLEAR_SLIST(ilg->ilg_filter); 2338 } else { 2339 slist_t *fp; 2340 if (ilg->ilg_filter == NULL) { 2341 fp = l_alloc(); 2342 if (fp == NULL) { 2343 if (ilgstat == ILGSTAT_NEW) 2344 ilg_delete(connp, ilg, NULL); 2345 mutex_exit(&connp->conn_lock); 2346 err = ENOMEM; 2347 goto free_and_exit; 2348 } 2349 } else { 2350 fp = ilg->ilg_filter; 2351 } 2352 for (i = 0, sl = gf->gf_slist; i < gf->gf_numsrc; i++, sl++) { 2353 sin6 = (struct sockaddr_in6 *)sl; 2354 fp->sl_addr[i] = sin6->sin6_addr; 2355 } 2356 fp->sl_numsrc = gf->gf_numsrc; 2357 ilg->ilg_filter = fp; 2358 } 2359 /* 2360 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 2361 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 2362 * So we need to translate here. 2363 */ 2364 ilg->ilg_fmode = (gf->gf_fmode == MCAST_INCLUDE) ? 2365 MODE_IS_INCLUDE : MODE_IS_EXCLUDE; 2366 2367 /* 2368 * Save copy of ilg's filter state to pass to other functions, 2369 * so we can release conn_lock now. 2370 */ 2371 new_fmode = ilg->ilg_fmode; 2372 l_copy(ilg->ilg_filter, new_filter); 2373 2374 mutex_exit(&connp->conn_lock); 2375 2376 err = ip_addmulti_v6(grp, ill, orig_ifindex, connp->conn_zoneid, 2377 ilgstat, new_fmode, new_filter); 2378 if (err != 0) { 2379 /* 2380 * Restore the original filter state, or delete the 2381 * newly-created ilg. We need to look up the ilg 2382 * again, though, since we've not been holding the 2383 * conn_lock. 2384 */ 2385 mutex_enter(&connp->conn_lock); 2386 ilg = ilg_lookup_ill_index_v6(connp, grp, orig_ifindex); 2387 ASSERT(ilg != NULL); 2388 if (ilgstat == ILGSTAT_NEW) { 2389 ilg_delete(connp, ilg, NULL); 2390 } else { 2391 ilg->ilg_fmode = orig_fmode; 2392 if (SLIST_IS_EMPTY(orig_filter)) { 2393 CLEAR_SLIST(ilg->ilg_filter); 2394 } else { 2395 /* 2396 * We didn't free the filter, even if we 2397 * were trying to make the source list empty; 2398 * so if orig_filter isn't empty, the ilg 2399 * must still have a filter alloc'd. 2400 */ 2401 l_copy(orig_filter, ilg->ilg_filter); 2402 } 2403 } 2404 mutex_exit(&connp->conn_lock); 2405 } 2406 2407 free_and_exit: 2408 l_free(orig_filter); 2409 l_free(new_filter); 2410 2411 return (err); 2412 } 2413 2414 /* 2415 * Process the SIOC[GS]MSFILTER and SIOC[GS]IPMSFILTER ioctls. 2416 */ 2417 /* ARGSUSED */ 2418 int 2419 ip_sioctl_msfilter(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, 2420 ip_ioctl_cmd_t *ipip, void *ifreq) 2421 { 2422 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 2423 /* existence verified in ip_wput_nondata() */ 2424 mblk_t *data_mp = mp->b_cont->b_cont; 2425 int datalen, err, cmd, minsize; 2426 int expsize = 0; 2427 conn_t *connp; 2428 boolean_t isv6, is_v4only_api, getcmd; 2429 struct sockaddr_in *gsin; 2430 struct sockaddr_in6 *gsin6; 2431 ipaddr_t v4grp; 2432 in6_addr_t v6grp; 2433 struct group_filter *gf = NULL; 2434 struct ip_msfilter *imsf = NULL; 2435 mblk_t *ndp; 2436 2437 if (data_mp->b_cont != NULL) { 2438 if ((ndp = msgpullup(data_mp, -1)) == NULL) 2439 return (ENOMEM); 2440 freemsg(data_mp); 2441 data_mp = ndp; 2442 mp->b_cont->b_cont = data_mp; 2443 } 2444 2445 cmd = iocp->ioc_cmd; 2446 getcmd = (cmd == SIOCGIPMSFILTER || cmd == SIOCGMSFILTER); 2447 is_v4only_api = (cmd == SIOCGIPMSFILTER || cmd == SIOCSIPMSFILTER); 2448 minsize = (is_v4only_api) ? IP_MSFILTER_SIZE(0) : GROUP_FILTER_SIZE(0); 2449 datalen = MBLKL(data_mp); 2450 2451 if (datalen < minsize) 2452 return (EINVAL); 2453 2454 /* 2455 * now we know we have at least have the initial structure, 2456 * but need to check for the source list array. 2457 */ 2458 if (is_v4only_api) { 2459 imsf = (struct ip_msfilter *)data_mp->b_rptr; 2460 isv6 = B_FALSE; 2461 expsize = IP_MSFILTER_SIZE(imsf->imsf_numsrc); 2462 } else { 2463 gf = (struct group_filter *)data_mp->b_rptr; 2464 if (gf->gf_group.ss_family == AF_INET6) { 2465 gsin6 = (struct sockaddr_in6 *)&gf->gf_group; 2466 isv6 = !(IN6_IS_ADDR_V4MAPPED(&gsin6->sin6_addr)); 2467 } else { 2468 isv6 = B_FALSE; 2469 } 2470 expsize = GROUP_FILTER_SIZE(gf->gf_numsrc); 2471 } 2472 if (datalen < expsize) 2473 return (EINVAL); 2474 2475 connp = Q_TO_CONN(q); 2476 2477 /* operation not supported on the virtual network interface */ 2478 if (IS_VNI(ipif->ipif_ill)) 2479 return (EINVAL); 2480 2481 if (isv6) { 2482 ill_t *ill = ipif->ipif_ill; 2483 ill_refhold(ill); 2484 2485 gsin6 = (struct sockaddr_in6 *)&gf->gf_group; 2486 v6grp = gsin6->sin6_addr; 2487 if (getcmd) 2488 err = ip_get_srcfilter_v6(connp, gf, &v6grp, ill); 2489 else 2490 err = ip_set_srcfilter_v6(connp, gf, &v6grp, ill); 2491 2492 ill_refrele(ill); 2493 } else { 2494 boolean_t isv4mapped = B_FALSE; 2495 if (is_v4only_api) { 2496 v4grp = (ipaddr_t)imsf->imsf_multiaddr.s_addr; 2497 } else { 2498 if (gf->gf_group.ss_family == AF_INET) { 2499 gsin = (struct sockaddr_in *)&gf->gf_group; 2500 v4grp = (ipaddr_t)gsin->sin_addr.s_addr; 2501 } else { 2502 gsin6 = (struct sockaddr_in6 *)&gf->gf_group; 2503 IN6_V4MAPPED_TO_IPADDR(&gsin6->sin6_addr, 2504 v4grp); 2505 isv4mapped = B_TRUE; 2506 } 2507 } 2508 if (getcmd) 2509 err = ip_get_srcfilter(connp, gf, imsf, v4grp, ipif, 2510 isv4mapped); 2511 else 2512 err = ip_set_srcfilter(connp, gf, imsf, v4grp, ipif, 2513 isv4mapped); 2514 } 2515 2516 return (err); 2517 } 2518 2519 /* 2520 * Finds the ipif based on information in the ioctl headers. Needed to make 2521 * ip_process_ioctl() happy (it needs to know the ipif for IPI_WR-flagged 2522 * ioctls prior to calling the ioctl's handler function). 2523 */ 2524 int 2525 ip_extract_msfilter(queue_t *q, mblk_t *mp, const ip_ioctl_cmd_t *ipip, 2526 cmd_info_t *ci, ipsq_func_t func) 2527 { 2528 int cmd = ipip->ipi_cmd; 2529 int err = 0; 2530 conn_t *connp; 2531 ipif_t *ipif; 2532 /* caller has verified this mblk exists */ 2533 char *dbuf = (char *)mp->b_cont->b_cont->b_rptr; 2534 struct ip_msfilter *imsf; 2535 struct group_filter *gf; 2536 ipaddr_t v4addr, v4grp; 2537 in6_addr_t v6grp; 2538 uint32_t index; 2539 zoneid_t zoneid; 2540 ip_stack_t *ipst; 2541 2542 connp = Q_TO_CONN(q); 2543 zoneid = connp->conn_zoneid; 2544 ipst = connp->conn_netstack->netstack_ip; 2545 2546 /* don't allow multicast operations on a tcp conn */ 2547 if (IPCL_IS_TCP(connp)) 2548 return (ENOPROTOOPT); 2549 2550 if (cmd == SIOCSIPMSFILTER || cmd == SIOCGIPMSFILTER) { 2551 /* don't allow v4-specific ioctls on v6 socket */ 2552 if (connp->conn_af_isv6) 2553 return (EAFNOSUPPORT); 2554 2555 imsf = (struct ip_msfilter *)dbuf; 2556 v4addr = imsf->imsf_interface.s_addr; 2557 v4grp = imsf->imsf_multiaddr.s_addr; 2558 if (v4addr == INADDR_ANY) { 2559 ipif = ipif_lookup_group(v4grp, zoneid, ipst); 2560 if (ipif == NULL) 2561 err = EADDRNOTAVAIL; 2562 } else { 2563 ipif = ipif_lookup_addr(v4addr, NULL, zoneid, q, mp, 2564 func, &err, ipst); 2565 } 2566 } else { 2567 boolean_t isv6 = B_FALSE; 2568 gf = (struct group_filter *)dbuf; 2569 index = gf->gf_interface; 2570 if (gf->gf_group.ss_family == AF_INET6) { 2571 struct sockaddr_in6 *sin6; 2572 sin6 = (struct sockaddr_in6 *)&gf->gf_group; 2573 v6grp = sin6->sin6_addr; 2574 if (IN6_IS_ADDR_V4MAPPED(&v6grp)) 2575 IN6_V4MAPPED_TO_IPADDR(&v6grp, v4grp); 2576 else 2577 isv6 = B_TRUE; 2578 } else if (gf->gf_group.ss_family == AF_INET) { 2579 struct sockaddr_in *sin; 2580 sin = (struct sockaddr_in *)&gf->gf_group; 2581 v4grp = sin->sin_addr.s_addr; 2582 } else { 2583 return (EAFNOSUPPORT); 2584 } 2585 if (index == 0) { 2586 if (isv6) { 2587 ipif = ipif_lookup_group_v6(&v6grp, zoneid, 2588 ipst); 2589 } else { 2590 ipif = ipif_lookup_group(v4grp, zoneid, ipst); 2591 } 2592 if (ipif == NULL) 2593 err = EADDRNOTAVAIL; 2594 } else { 2595 ipif = ipif_lookup_on_ifindex(index, isv6, zoneid, 2596 q, mp, func, &err, ipst); 2597 } 2598 } 2599 2600 ci->ci_ipif = ipif; 2601 return (err); 2602 } 2603 2604 /* 2605 * The structures used for the SIOC*MSFILTER ioctls usually must be copied 2606 * in in two stages, as the first copyin tells us the size of the attached 2607 * source buffer. This function is called by ip_wput_nondata() after the 2608 * first copyin has completed; it figures out how big the second stage 2609 * needs to be, and kicks it off. 2610 * 2611 * In some cases (numsrc < 2), the second copyin is not needed as the 2612 * first one gets a complete structure containing 1 source addr. 2613 * 2614 * The function returns 0 if a second copyin has been started (i.e. there's 2615 * no more work to be done right now), or 1 if the second copyin is not 2616 * needed and ip_wput_nondata() can continue its processing. 2617 */ 2618 int 2619 ip_copyin_msfilter(queue_t *q, mblk_t *mp) 2620 { 2621 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 2622 int cmd = iocp->ioc_cmd; 2623 /* validity of this checked in ip_wput_nondata() */ 2624 mblk_t *mp1 = mp->b_cont->b_cont; 2625 int copysize = 0; 2626 int offset; 2627 2628 if (cmd == SIOCSMSFILTER || cmd == SIOCGMSFILTER) { 2629 struct group_filter *gf = (struct group_filter *)mp1->b_rptr; 2630 if (gf->gf_numsrc >= 2) { 2631 offset = sizeof (struct group_filter); 2632 copysize = GROUP_FILTER_SIZE(gf->gf_numsrc) - offset; 2633 } 2634 } else { 2635 struct ip_msfilter *imsf = (struct ip_msfilter *)mp1->b_rptr; 2636 if (imsf->imsf_numsrc >= 2) { 2637 offset = sizeof (struct ip_msfilter); 2638 copysize = IP_MSFILTER_SIZE(imsf->imsf_numsrc) - offset; 2639 } 2640 } 2641 if (copysize > 0) { 2642 mi_copyin_n(q, mp, offset, copysize); 2643 return (0); 2644 } 2645 return (1); 2646 } 2647 2648 /* 2649 * Handle the following optmgmt: 2650 * IP_ADD_MEMBERSHIP must not have joined already 2651 * MCAST_JOIN_GROUP must not have joined already 2652 * IP_BLOCK_SOURCE must have joined already 2653 * MCAST_BLOCK_SOURCE must have joined already 2654 * IP_JOIN_SOURCE_GROUP may have joined already 2655 * MCAST_JOIN_SOURCE_GROUP may have joined already 2656 * 2657 * fmode and src parameters may be used to determine which option is 2658 * being set, as follows (the IP_* and MCAST_* versions of each option 2659 * are functionally equivalent): 2660 * opt fmode src 2661 * IP_ADD_MEMBERSHIP MODE_IS_EXCLUDE INADDR_ANY 2662 * MCAST_JOIN_GROUP MODE_IS_EXCLUDE INADDR_ANY 2663 * IP_BLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 2664 * MCAST_BLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 2665 * IP_JOIN_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 2666 * MCAST_JOIN_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 2667 * 2668 * Changing the filter mode is not allowed; if a matching ilg already 2669 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 2670 * 2671 * Verifies that there is a source address of appropriate scope for 2672 * the group; if not, EADDRNOTAVAIL is returned. 2673 * 2674 * The interface to be used may be identified by an address or by an 2675 * index. A pointer to the index is passed; if it is NULL, use the 2676 * address, otherwise, use the index. 2677 */ 2678 int 2679 ip_opt_add_group(conn_t *connp, boolean_t checkonly, ipaddr_t group, 2680 ipaddr_t ifaddr, uint_t *ifindexp, mcast_record_t fmode, ipaddr_t src, 2681 mblk_t *first_mp) 2682 { 2683 ipif_t *ipif; 2684 ipsq_t *ipsq; 2685 int err = 0; 2686 ill_t *ill; 2687 2688 err = ip_opt_check(connp, group, src, ifaddr, ifindexp, first_mp, 2689 ip_restart_optmgmt, &ipif); 2690 if (err != 0) { 2691 if (err != EINPROGRESS) { 2692 ip1dbg(("ip_opt_add_group: no ipif for group 0x%x, " 2693 "ifaddr 0x%x, ifindex %d\n", ntohl(group), 2694 ntohl(ifaddr), (ifindexp == NULL) ? 0 : *ifindexp)); 2695 } 2696 return (err); 2697 } 2698 ASSERT(ipif != NULL); 2699 2700 ill = ipif->ipif_ill; 2701 /* Operation not supported on a virtual network interface */ 2702 if (IS_VNI(ill)) { 2703 ipif_refrele(ipif); 2704 return (EINVAL); 2705 } 2706 2707 if (checkonly) { 2708 /* 2709 * do not do operation, just pretend to - new T_CHECK 2710 * semantics. The error return case above if encountered 2711 * considered a good enough "check" here. 2712 */ 2713 ipif_refrele(ipif); 2714 return (0); 2715 } 2716 2717 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, ipsq, 2718 NEW_OP); 2719 2720 /* unspecified source addr => no source filtering */ 2721 err = ilg_add(connp, group, ipif, fmode, src); 2722 2723 IPSQ_EXIT(ipsq); 2724 2725 ipif_refrele(ipif); 2726 return (err); 2727 } 2728 2729 /* 2730 * Handle the following optmgmt: 2731 * IPV6_JOIN_GROUP must not have joined already 2732 * MCAST_JOIN_GROUP must not have joined already 2733 * MCAST_BLOCK_SOURCE must have joined already 2734 * MCAST_JOIN_SOURCE_GROUP may have joined already 2735 * 2736 * fmode and src parameters may be used to determine which option is 2737 * being set, as follows (IPV6_JOIN_GROUP and MCAST_JOIN_GROUP options 2738 * are functionally equivalent): 2739 * opt fmode v6src 2740 * IPV6_JOIN_GROUP MODE_IS_EXCLUDE unspecified 2741 * MCAST_JOIN_GROUP MODE_IS_EXCLUDE unspecified 2742 * MCAST_BLOCK_SOURCE MODE_IS_EXCLUDE v6 addr 2743 * MCAST_JOIN_SOURCE_GROUP MODE_IS_INCLUDE v6 addr 2744 * 2745 * Changing the filter mode is not allowed; if a matching ilg already 2746 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 2747 * 2748 * Verifies that there is a source address of appropriate scope for 2749 * the group; if not, EADDRNOTAVAIL is returned. 2750 * 2751 * Handles IPv4-mapped IPv6 multicast addresses by associating them 2752 * with the link-local ipif. Assumes that if v6group is v4-mapped, 2753 * v6src is also v4-mapped. 2754 */ 2755 int 2756 ip_opt_add_group_v6(conn_t *connp, boolean_t checkonly, 2757 const in6_addr_t *v6group, int ifindex, mcast_record_t fmode, 2758 const in6_addr_t *v6src, mblk_t *first_mp) 2759 { 2760 ill_t *ill; 2761 ipif_t *ipif; 2762 char buf[INET6_ADDRSTRLEN]; 2763 ipaddr_t v4group, v4src; 2764 boolean_t isv6; 2765 ipsq_t *ipsq; 2766 int err; 2767 2768 err = ip_opt_check_v6(connp, v6group, &v4group, v6src, &v4src, &isv6, 2769 ifindex, first_mp, ip_restart_optmgmt, &ill, &ipif); 2770 if (err != 0) { 2771 if (err != EINPROGRESS) { 2772 ip1dbg(("ip_opt_add_group_v6: no ill for group %s/" 2773 "index %d\n", inet_ntop(AF_INET6, v6group, buf, 2774 sizeof (buf)), ifindex)); 2775 } 2776 return (err); 2777 } 2778 ASSERT((!isv6 && ipif != NULL) || (isv6 && ill != NULL)); 2779 2780 /* operation is not supported on the virtual network interface */ 2781 if (isv6) { 2782 if (IS_VNI(ill)) { 2783 ill_refrele(ill); 2784 return (EINVAL); 2785 } 2786 } else { 2787 if (IS_VNI(ipif->ipif_ill)) { 2788 ipif_refrele(ipif); 2789 return (EINVAL); 2790 } 2791 } 2792 2793 if (checkonly) { 2794 /* 2795 * do not do operation, just pretend to - new T_CHECK 2796 * semantics. The error return case above if encountered 2797 * considered a good enough "check" here. 2798 */ 2799 if (isv6) 2800 ill_refrele(ill); 2801 else 2802 ipif_refrele(ipif); 2803 return (0); 2804 } 2805 2806 if (!isv6) { 2807 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, 2808 ipsq, NEW_OP); 2809 err = ilg_add(connp, v4group, ipif, fmode, v4src); 2810 IPSQ_EXIT(ipsq); 2811 ipif_refrele(ipif); 2812 } else { 2813 IPSQ_ENTER_ILL(ill, connp, first_mp, ip_restart_optmgmt, 2814 ipsq, NEW_OP); 2815 err = ilg_add_v6(connp, v6group, ill, fmode, v6src); 2816 IPSQ_EXIT(ipsq); 2817 ill_refrele(ill); 2818 } 2819 2820 return (err); 2821 } 2822 2823 static int 2824 ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group, ipif_t *ipif, 2825 mcast_record_t fmode, ipaddr_t src) 2826 { 2827 ilg_t *ilg; 2828 in6_addr_t v6src; 2829 boolean_t leaving = B_FALSE; 2830 2831 ASSERT(IAM_WRITER_IPIF(ipif)); 2832 2833 /* 2834 * The ilg is valid only while we hold the conn lock. Once we drop 2835 * the lock, another thread can locate another ilg on this connp, 2836 * but on a different ipif, and delete it, and cause the ilg array 2837 * to be reallocated and copied. Hence do the ilg_delete before 2838 * dropping the lock. 2839 */ 2840 mutex_enter(&connp->conn_lock); 2841 ilg = ilg_lookup_ipif(connp, group, ipif); 2842 if ((ilg == NULL) || (ilg->ilg_flags & ILG_DELETED)) { 2843 mutex_exit(&connp->conn_lock); 2844 return (EADDRNOTAVAIL); 2845 } 2846 2847 /* 2848 * Decide if we're actually deleting the ilg or just removing a 2849 * source filter address; if just removing an addr, make sure we 2850 * aren't trying to change the filter mode, and that the addr is 2851 * actually in our filter list already. If we're removing the 2852 * last src in an include list, just delete the ilg. 2853 */ 2854 if (src == INADDR_ANY) { 2855 v6src = ipv6_all_zeros; 2856 leaving = B_TRUE; 2857 } else { 2858 int err = 0; 2859 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 2860 if (fmode != ilg->ilg_fmode) 2861 err = EINVAL; 2862 else if (ilg->ilg_filter == NULL || 2863 !list_has_addr(ilg->ilg_filter, &v6src)) 2864 err = EADDRNOTAVAIL; 2865 if (err != 0) { 2866 mutex_exit(&connp->conn_lock); 2867 return (err); 2868 } 2869 if (fmode == MODE_IS_INCLUDE && 2870 ilg->ilg_filter->sl_numsrc == 1) { 2871 v6src = ipv6_all_zeros; 2872 leaving = B_TRUE; 2873 } 2874 } 2875 2876 ilg_delete(connp, ilg, &v6src); 2877 mutex_exit(&connp->conn_lock); 2878 2879 (void) ip_delmulti(group, ipif, B_FALSE, leaving); 2880 return (0); 2881 } 2882 2883 static int 2884 ip_opt_delete_group_excl_v6(conn_t *connp, const in6_addr_t *v6group, 2885 ill_t *ill, mcast_record_t fmode, const in6_addr_t *v6src) 2886 { 2887 ilg_t *ilg; 2888 ill_t *ilg_ill; 2889 uint_t ilg_orig_ifindex; 2890 boolean_t leaving = B_TRUE; 2891 2892 ASSERT(IAM_WRITER_ILL(ill)); 2893 2894 /* 2895 * Use the index that we originally used to join. We can't 2896 * use the ill directly because ilg_ill could point to 2897 * a new ill if things have moved. 2898 */ 2899 mutex_enter(&connp->conn_lock); 2900 ilg = ilg_lookup_ill_index_v6(connp, v6group, 2901 ill->ill_phyint->phyint_ifindex); 2902 if ((ilg == NULL) || (ilg->ilg_flags & ILG_DELETED)) { 2903 mutex_exit(&connp->conn_lock); 2904 return (EADDRNOTAVAIL); 2905 } 2906 2907 /* 2908 * Decide if we're actually deleting the ilg or just removing a 2909 * source filter address; if just removing an addr, make sure we 2910 * aren't trying to change the filter mode, and that the addr is 2911 * actually in our filter list already. If we're removing the 2912 * last src in an include list, just delete the ilg. 2913 */ 2914 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2915 int err = 0; 2916 if (fmode != ilg->ilg_fmode) 2917 err = EINVAL; 2918 else if (ilg->ilg_filter == NULL || 2919 !list_has_addr(ilg->ilg_filter, v6src)) 2920 err = EADDRNOTAVAIL; 2921 if (err != 0) { 2922 mutex_exit(&connp->conn_lock); 2923 return (err); 2924 } 2925 if (fmode == MODE_IS_INCLUDE && 2926 ilg->ilg_filter->sl_numsrc == 1) 2927 v6src = NULL; 2928 else 2929 leaving = B_FALSE; 2930 } 2931 2932 ilg_ill = ilg->ilg_ill; 2933 ilg_orig_ifindex = ilg->ilg_orig_ifindex; 2934 ilg_delete(connp, ilg, v6src); 2935 mutex_exit(&connp->conn_lock); 2936 (void) ip_delmulti_v6(v6group, ilg_ill, ilg_orig_ifindex, 2937 connp->conn_zoneid, B_FALSE, leaving); 2938 2939 return (0); 2940 } 2941 2942 /* 2943 * Handle the following optmgmt: 2944 * IP_DROP_MEMBERSHIP will leave 2945 * MCAST_LEAVE_GROUP will leave 2946 * IP_UNBLOCK_SOURCE will not leave 2947 * MCAST_UNBLOCK_SOURCE will not leave 2948 * IP_LEAVE_SOURCE_GROUP may leave (if leaving last source) 2949 * MCAST_LEAVE_SOURCE_GROUP may leave (if leaving last source) 2950 * 2951 * fmode and src parameters may be used to determine which option is 2952 * being set, as follows (the IP_* and MCAST_* versions of each option 2953 * are functionally equivalent): 2954 * opt fmode src 2955 * IP_DROP_MEMBERSHIP MODE_IS_INCLUDE INADDR_ANY 2956 * MCAST_LEAVE_GROUP MODE_IS_INCLUDE INADDR_ANY 2957 * IP_UNBLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 2958 * MCAST_UNBLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 2959 * IP_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 2960 * MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 2961 * 2962 * Changing the filter mode is not allowed; if a matching ilg already 2963 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 2964 * 2965 * The interface to be used may be identified by an address or by an 2966 * index. A pointer to the index is passed; if it is NULL, use the 2967 * address, otherwise, use the index. 2968 */ 2969 int 2970 ip_opt_delete_group(conn_t *connp, boolean_t checkonly, ipaddr_t group, 2971 ipaddr_t ifaddr, uint_t *ifindexp, mcast_record_t fmode, ipaddr_t src, 2972 mblk_t *first_mp) 2973 { 2974 ipif_t *ipif; 2975 ipsq_t *ipsq; 2976 int err; 2977 ill_t *ill; 2978 2979 err = ip_opt_check(connp, group, src, ifaddr, ifindexp, first_mp, 2980 ip_restart_optmgmt, &ipif); 2981 if (err != 0) { 2982 if (err != EINPROGRESS) { 2983 ip1dbg(("ip_opt_delete_group: no ipif for group " 2984 "0x%x, ifaddr 0x%x\n", 2985 (int)ntohl(group), (int)ntohl(ifaddr))); 2986 } 2987 return (err); 2988 } 2989 ASSERT(ipif != NULL); 2990 2991 ill = ipif->ipif_ill; 2992 /* Operation not supported on a virtual network interface */ 2993 if (IS_VNI(ill)) { 2994 ipif_refrele(ipif); 2995 return (EINVAL); 2996 } 2997 2998 if (checkonly) { 2999 /* 3000 * do not do operation, just pretend to - new T_CHECK 3001 * semantics. The error return case above if encountered 3002 * considered a good enough "check" here. 3003 */ 3004 ipif_refrele(ipif); 3005 return (0); 3006 } 3007 3008 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, ipsq, 3009 NEW_OP); 3010 err = ip_opt_delete_group_excl(connp, group, ipif, fmode, src); 3011 IPSQ_EXIT(ipsq); 3012 3013 ipif_refrele(ipif); 3014 return (err); 3015 } 3016 3017 /* 3018 * Handle the following optmgmt: 3019 * IPV6_LEAVE_GROUP will leave 3020 * MCAST_LEAVE_GROUP will leave 3021 * MCAST_UNBLOCK_SOURCE will not leave 3022 * MCAST_LEAVE_SOURCE_GROUP may leave (if leaving last source) 3023 * 3024 * fmode and src parameters may be used to determine which option is 3025 * being set, as follows (IPV6_LEAVE_GROUP and MCAST_LEAVE_GROUP options 3026 * are functionally equivalent): 3027 * opt fmode v6src 3028 * IPV6_LEAVE_GROUP MODE_IS_INCLUDE unspecified 3029 * MCAST_LEAVE_GROUP MODE_IS_INCLUDE unspecified 3030 * MCAST_UNBLOCK_SOURCE MODE_IS_EXCLUDE v6 addr 3031 * MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE v6 addr 3032 * 3033 * Changing the filter mode is not allowed; if a matching ilg already 3034 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 3035 * 3036 * Handles IPv4-mapped IPv6 multicast addresses by associating them 3037 * with the link-local ipif. Assumes that if v6group is v4-mapped, 3038 * v6src is also v4-mapped. 3039 */ 3040 int 3041 ip_opt_delete_group_v6(conn_t *connp, boolean_t checkonly, 3042 const in6_addr_t *v6group, int ifindex, mcast_record_t fmode, 3043 const in6_addr_t *v6src, mblk_t *first_mp) 3044 { 3045 ill_t *ill; 3046 ipif_t *ipif; 3047 char buf[INET6_ADDRSTRLEN]; 3048 ipaddr_t v4group, v4src; 3049 boolean_t isv6; 3050 ipsq_t *ipsq; 3051 int err; 3052 3053 err = ip_opt_check_v6(connp, v6group, &v4group, v6src, &v4src, &isv6, 3054 ifindex, first_mp, ip_restart_optmgmt, &ill, &ipif); 3055 if (err != 0) { 3056 if (err != EINPROGRESS) { 3057 ip1dbg(("ip_opt_delete_group_v6: no ill for group %s/" 3058 "index %d\n", inet_ntop(AF_INET6, v6group, buf, 3059 sizeof (buf)), ifindex)); 3060 } 3061 return (err); 3062 } 3063 ASSERT((isv6 && ill != NULL) || (!isv6 && ipif != NULL)); 3064 3065 /* operation is not supported on the virtual network interface */ 3066 if (isv6) { 3067 if (IS_VNI(ill)) { 3068 ill_refrele(ill); 3069 return (EINVAL); 3070 } 3071 } else { 3072 if (IS_VNI(ipif->ipif_ill)) { 3073 ipif_refrele(ipif); 3074 return (EINVAL); 3075 } 3076 } 3077 3078 if (checkonly) { 3079 /* 3080 * do not do operation, just pretend to - new T_CHECK 3081 * semantics. The error return case above if encountered 3082 * considered a good enough "check" here. 3083 */ 3084 if (isv6) 3085 ill_refrele(ill); 3086 else 3087 ipif_refrele(ipif); 3088 return (0); 3089 } 3090 3091 if (!isv6) { 3092 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, 3093 ipsq, NEW_OP); 3094 err = ip_opt_delete_group_excl(connp, v4group, ipif, fmode, 3095 v4src); 3096 IPSQ_EXIT(ipsq); 3097 ipif_refrele(ipif); 3098 } else { 3099 IPSQ_ENTER_ILL(ill, connp, first_mp, ip_restart_optmgmt, 3100 ipsq, NEW_OP); 3101 err = ip_opt_delete_group_excl_v6(connp, v6group, ill, fmode, 3102 v6src); 3103 IPSQ_EXIT(ipsq); 3104 ill_refrele(ill); 3105 } 3106 3107 return (err); 3108 } 3109 3110 /* 3111 * Group mgmt for upper conn that passes things down 3112 * to the interface multicast list (and DLPI) 3113 * These routines can handle new style options that specify an interface name 3114 * as opposed to an interface address (needed for general handling of 3115 * unnumbered interfaces.) 3116 */ 3117 3118 /* 3119 * Add a group to an upper conn group data structure and pass things down 3120 * to the interface multicast list (and DLPI) 3121 */ 3122 static int 3123 ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif, mcast_record_t fmode, 3124 ipaddr_t src) 3125 { 3126 int error = 0; 3127 ill_t *ill; 3128 ilg_t *ilg; 3129 ilg_stat_t ilgstat; 3130 slist_t *new_filter = NULL; 3131 int new_fmode; 3132 3133 ASSERT(IAM_WRITER_IPIF(ipif)); 3134 3135 ill = ipif->ipif_ill; 3136 3137 if (!(ill->ill_flags & ILLF_MULTICAST)) 3138 return (EADDRNOTAVAIL); 3139 3140 /* 3141 * conn_ilg[] is protected by conn_lock. Need to hold the conn_lock 3142 * to walk the conn_ilg[] list in ilg_lookup_ipif(); also needed to 3143 * serialize 2 threads doing join (sock, group1, hme0:0) and 3144 * (sock, group2, hme1:0) where hme0 and hme1 map to different ipsqs, 3145 * but both operations happen on the same conn. 3146 */ 3147 mutex_enter(&connp->conn_lock); 3148 ilg = ilg_lookup_ipif(connp, group, ipif); 3149 3150 /* 3151 * Depending on the option we're handling, may or may not be okay 3152 * if group has already been added. Figure out our rules based 3153 * on fmode and src params. Also make sure there's enough room 3154 * in the filter if we're adding a source to an existing filter. 3155 */ 3156 if (src == INADDR_ANY) { 3157 /* we're joining for all sources, must not have joined */ 3158 if (ilg != NULL) 3159 error = EADDRINUSE; 3160 } else { 3161 if (fmode == MODE_IS_EXCLUDE) { 3162 /* (excl {addr}) => block source, must have joined */ 3163 if (ilg == NULL) 3164 error = EADDRNOTAVAIL; 3165 } 3166 /* (incl {addr}) => join source, may have joined */ 3167 3168 if (ilg != NULL && 3169 SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE) 3170 error = ENOBUFS; 3171 } 3172 if (error != 0) { 3173 mutex_exit(&connp->conn_lock); 3174 return (error); 3175 } 3176 3177 ASSERT(!(ipif->ipif_state_flags & IPIF_CONDEMNED)); 3178 3179 /* 3180 * Alloc buffer to copy new state into (see below) before 3181 * we make any changes, so we can bail if it fails. 3182 */ 3183 if ((new_filter = l_alloc()) == NULL) { 3184 mutex_exit(&connp->conn_lock); 3185 return (ENOMEM); 3186 } 3187 3188 if (ilg == NULL) { 3189 ilgstat = ILGSTAT_NEW; 3190 if ((ilg = conn_ilg_alloc(connp)) == NULL) { 3191 mutex_exit(&connp->conn_lock); 3192 l_free(new_filter); 3193 return (ENOMEM); 3194 } 3195 if (src != INADDR_ANY) { 3196 ilg->ilg_filter = l_alloc(); 3197 if (ilg->ilg_filter == NULL) { 3198 ilg_delete(connp, ilg, NULL); 3199 mutex_exit(&connp->conn_lock); 3200 l_free(new_filter); 3201 return (ENOMEM); 3202 } 3203 ilg->ilg_filter->sl_numsrc = 1; 3204 IN6_IPADDR_TO_V4MAPPED(src, 3205 &ilg->ilg_filter->sl_addr[0]); 3206 } 3207 if (group == INADDR_ANY) { 3208 ilg->ilg_v6group = ipv6_all_zeros; 3209 } else { 3210 IN6_IPADDR_TO_V4MAPPED(group, &ilg->ilg_v6group); 3211 } 3212 ilg->ilg_ipif = ipif; 3213 ilg->ilg_ill = NULL; 3214 ilg->ilg_orig_ifindex = 0; 3215 ilg->ilg_fmode = fmode; 3216 } else { 3217 int index; 3218 in6_addr_t v6src; 3219 ilgstat = ILGSTAT_CHANGE; 3220 if (ilg->ilg_fmode != fmode || src == INADDR_ANY) { 3221 mutex_exit(&connp->conn_lock); 3222 l_free(new_filter); 3223 return (EINVAL); 3224 } 3225 if (ilg->ilg_filter == NULL) { 3226 ilg->ilg_filter = l_alloc(); 3227 if (ilg->ilg_filter == NULL) { 3228 mutex_exit(&connp->conn_lock); 3229 l_free(new_filter); 3230 return (ENOMEM); 3231 } 3232 } 3233 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 3234 if (list_has_addr(ilg->ilg_filter, &v6src)) { 3235 mutex_exit(&connp->conn_lock); 3236 l_free(new_filter); 3237 return (EADDRNOTAVAIL); 3238 } 3239 index = ilg->ilg_filter->sl_numsrc++; 3240 ilg->ilg_filter->sl_addr[index] = v6src; 3241 } 3242 3243 /* 3244 * Save copy of ilg's filter state to pass to other functions, 3245 * so we can release conn_lock now. 3246 */ 3247 new_fmode = ilg->ilg_fmode; 3248 l_copy(ilg->ilg_filter, new_filter); 3249 3250 mutex_exit(&connp->conn_lock); 3251 3252 error = ip_addmulti(group, ipif, ilgstat, new_fmode, new_filter); 3253 if (error != 0) { 3254 /* 3255 * Need to undo what we did before calling ip_addmulti()! 3256 * Must look up the ilg again since we've not been holding 3257 * conn_lock. 3258 */ 3259 in6_addr_t v6src; 3260 if (ilgstat == ILGSTAT_NEW) 3261 v6src = ipv6_all_zeros; 3262 else 3263 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 3264 mutex_enter(&connp->conn_lock); 3265 ilg = ilg_lookup_ipif(connp, group, ipif); 3266 ASSERT(ilg != NULL); 3267 ilg_delete(connp, ilg, &v6src); 3268 mutex_exit(&connp->conn_lock); 3269 l_free(new_filter); 3270 return (error); 3271 } 3272 3273 l_free(new_filter); 3274 return (0); 3275 } 3276 3277 static int 3278 ilg_add_v6(conn_t *connp, const in6_addr_t *v6group, ill_t *ill, 3279 mcast_record_t fmode, const in6_addr_t *v6src) 3280 { 3281 int error = 0; 3282 int orig_ifindex; 3283 ilg_t *ilg; 3284 ilg_stat_t ilgstat; 3285 slist_t *new_filter = NULL; 3286 int new_fmode; 3287 3288 ASSERT(IAM_WRITER_ILL(ill)); 3289 3290 if (!(ill->ill_flags & ILLF_MULTICAST)) 3291 return (EADDRNOTAVAIL); 3292 3293 /* 3294 * conn_lock protects the ilg list. Serializes 2 threads doing 3295 * join (sock, group1, hme0) and (sock, group2, hme1) where hme0 3296 * and hme1 map to different ipsq's, but both operations happen 3297 * on the same conn. 3298 */ 3299 mutex_enter(&connp->conn_lock); 3300 3301 /* 3302 * Use the ifindex to do the lookup. We can't use the ill 3303 * directly because ilg_ill could point to a different ill if 3304 * things have moved. 3305 */ 3306 orig_ifindex = ill->ill_phyint->phyint_ifindex; 3307 ilg = ilg_lookup_ill_index_v6(connp, v6group, orig_ifindex); 3308 3309 /* 3310 * Depending on the option we're handling, may or may not be okay 3311 * if group has already been added. Figure out our rules based 3312 * on fmode and src params. Also make sure there's enough room 3313 * in the filter if we're adding a source to an existing filter. 3314 */ 3315 if (IN6_IS_ADDR_UNSPECIFIED(v6src)) { 3316 /* we're joining for all sources, must not have joined */ 3317 if (ilg != NULL) 3318 error = EADDRINUSE; 3319 } else { 3320 if (fmode == MODE_IS_EXCLUDE) { 3321 /* (excl {addr}) => block source, must have joined */ 3322 if (ilg == NULL) 3323 error = EADDRNOTAVAIL; 3324 } 3325 /* (incl {addr}) => join source, may have joined */ 3326 3327 if (ilg != NULL && 3328 SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE) 3329 error = ENOBUFS; 3330 } 3331 if (error != 0) { 3332 mutex_exit(&connp->conn_lock); 3333 return (error); 3334 } 3335 3336 /* 3337 * Alloc buffer to copy new state into (see below) before 3338 * we make any changes, so we can bail if it fails. 3339 */ 3340 if ((new_filter = l_alloc()) == NULL) { 3341 mutex_exit(&connp->conn_lock); 3342 return (ENOMEM); 3343 } 3344 3345 if (ilg == NULL) { 3346 if ((ilg = conn_ilg_alloc(connp)) == NULL) { 3347 mutex_exit(&connp->conn_lock); 3348 l_free(new_filter); 3349 return (ENOMEM); 3350 } 3351 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 3352 ilg->ilg_filter = l_alloc(); 3353 if (ilg->ilg_filter == NULL) { 3354 ilg_delete(connp, ilg, NULL); 3355 mutex_exit(&connp->conn_lock); 3356 l_free(new_filter); 3357 return (ENOMEM); 3358 } 3359 ilg->ilg_filter->sl_numsrc = 1; 3360 ilg->ilg_filter->sl_addr[0] = *v6src; 3361 } 3362 ilgstat = ILGSTAT_NEW; 3363 ilg->ilg_v6group = *v6group; 3364 ilg->ilg_fmode = fmode; 3365 ilg->ilg_ipif = NULL; 3366 /* 3367 * Choose our target ill to join on. This might be different 3368 * from the ill we've been given if it's currently down and 3369 * part of a group. 3370 * 3371 * new ill is not refheld; we are writer. 3372 */ 3373 ill = ip_choose_multi_ill(ill, v6group); 3374 ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED)); 3375 ilg->ilg_ill = ill; 3376 /* 3377 * Remember the orig_ifindex that we joined on, so that we 3378 * can successfully delete them later on and also search 3379 * for duplicates if the application wants to join again. 3380 */ 3381 ilg->ilg_orig_ifindex = orig_ifindex; 3382 } else { 3383 int index; 3384 if (ilg->ilg_fmode != fmode || IN6_IS_ADDR_UNSPECIFIED(v6src)) { 3385 mutex_exit(&connp->conn_lock); 3386 l_free(new_filter); 3387 return (EINVAL); 3388 } 3389 if (ilg->ilg_filter == NULL) { 3390 ilg->ilg_filter = l_alloc(); 3391 if (ilg->ilg_filter == NULL) { 3392 mutex_exit(&connp->conn_lock); 3393 l_free(new_filter); 3394 return (ENOMEM); 3395 } 3396 } 3397 if (list_has_addr(ilg->ilg_filter, v6src)) { 3398 mutex_exit(&connp->conn_lock); 3399 l_free(new_filter); 3400 return (EADDRNOTAVAIL); 3401 } 3402 ilgstat = ILGSTAT_CHANGE; 3403 index = ilg->ilg_filter->sl_numsrc++; 3404 ilg->ilg_filter->sl_addr[index] = *v6src; 3405 /* 3406 * The current ill might be different from the one we were 3407 * asked to join on (if failover has occurred); we should 3408 * join on the ill stored in the ilg. The original ill 3409 * is noted in ilg_orig_ifindex, which matched our request. 3410 */ 3411 ill = ilg->ilg_ill; 3412 } 3413 3414 /* 3415 * Save copy of ilg's filter state to pass to other functions, 3416 * so we can release conn_lock now. 3417 */ 3418 new_fmode = ilg->ilg_fmode; 3419 l_copy(ilg->ilg_filter, new_filter); 3420 3421 mutex_exit(&connp->conn_lock); 3422 3423 /* 3424 * Now update the ill. We wait to do this until after the ilg 3425 * has been updated because we need to update the src filter 3426 * info for the ill, which involves looking at the status of 3427 * all the ilgs associated with this group/interface pair. 3428 */ 3429 error = ip_addmulti_v6(v6group, ill, orig_ifindex, connp->conn_zoneid, 3430 ilgstat, new_fmode, new_filter); 3431 if (error != 0) { 3432 /* 3433 * But because we waited, we have to undo the ilg update 3434 * if ip_addmulti_v6() fails. We also must lookup ilg 3435 * again, since we've not been holding conn_lock. 3436 */ 3437 in6_addr_t delsrc = 3438 (ilgstat == ILGSTAT_NEW) ? ipv6_all_zeros : *v6src; 3439 mutex_enter(&connp->conn_lock); 3440 ilg = ilg_lookup_ill_index_v6(connp, v6group, orig_ifindex); 3441 ASSERT(ilg != NULL); 3442 ilg_delete(connp, ilg, &delsrc); 3443 mutex_exit(&connp->conn_lock); 3444 l_free(new_filter); 3445 return (error); 3446 } 3447 3448 l_free(new_filter); 3449 3450 return (0); 3451 } 3452 3453 /* 3454 * Find an IPv4 ilg matching group, ill and source 3455 */ 3456 ilg_t * 3457 ilg_lookup_ill_withsrc(conn_t *connp, ipaddr_t group, ipaddr_t src, ill_t *ill) 3458 { 3459 in6_addr_t v6group, v6src; 3460 int i; 3461 boolean_t isinlist; 3462 ilg_t *ilg; 3463 ipif_t *ipif; 3464 ill_t *ilg_ill; 3465 3466 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3467 3468 /* 3469 * INADDR_ANY is represented as the IPv6 unspecified addr. 3470 */ 3471 if (group == INADDR_ANY) 3472 v6group = ipv6_all_zeros; 3473 else 3474 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 3475 3476 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3477 /* ilg_ipif is NULL for v6; skip them */ 3478 ilg = &connp->conn_ilg[i]; 3479 if ((ipif = ilg->ilg_ipif) == NULL) 3480 continue; 3481 ASSERT(ilg->ilg_ill == NULL); 3482 ilg_ill = ipif->ipif_ill; 3483 ASSERT(!ilg_ill->ill_isv6); 3484 if (ilg_ill == ill && 3485 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, &v6group)) { 3486 if (SLIST_IS_EMPTY(ilg->ilg_filter)) { 3487 /* no source filter, so this is a match */ 3488 return (ilg); 3489 } 3490 break; 3491 } 3492 } 3493 if (i == connp->conn_ilg_inuse) 3494 return (NULL); 3495 3496 /* 3497 * we have an ilg with matching ill and group; but 3498 * the ilg has a source list that we must check. 3499 */ 3500 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 3501 isinlist = B_FALSE; 3502 for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) { 3503 if (IN6_ARE_ADDR_EQUAL(&v6src, &ilg->ilg_filter->sl_addr[i])) { 3504 isinlist = B_TRUE; 3505 break; 3506 } 3507 } 3508 3509 if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) || 3510 (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE)) 3511 return (ilg); 3512 3513 return (NULL); 3514 } 3515 3516 /* 3517 * Find an IPv6 ilg matching group, ill, and source 3518 */ 3519 ilg_t * 3520 ilg_lookup_ill_withsrc_v6(conn_t *connp, const in6_addr_t *v6group, 3521 const in6_addr_t *v6src, ill_t *ill) 3522 { 3523 int i; 3524 boolean_t isinlist; 3525 ilg_t *ilg; 3526 ill_t *ilg_ill; 3527 3528 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3529 3530 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3531 ilg = &connp->conn_ilg[i]; 3532 if ((ilg_ill = ilg->ilg_ill) == NULL) 3533 continue; 3534 ASSERT(ilg->ilg_ipif == NULL); 3535 ASSERT(ilg_ill->ill_isv6); 3536 if (ilg_ill == ill && 3537 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) { 3538 if (SLIST_IS_EMPTY(ilg->ilg_filter)) { 3539 /* no source filter, so this is a match */ 3540 return (ilg); 3541 } 3542 break; 3543 } 3544 } 3545 if (i == connp->conn_ilg_inuse) 3546 return (NULL); 3547 3548 /* 3549 * we have an ilg with matching ill and group; but 3550 * the ilg has a source list that we must check. 3551 */ 3552 isinlist = B_FALSE; 3553 for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) { 3554 if (IN6_ARE_ADDR_EQUAL(v6src, &ilg->ilg_filter->sl_addr[i])) { 3555 isinlist = B_TRUE; 3556 break; 3557 } 3558 } 3559 3560 if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) || 3561 (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE)) 3562 return (ilg); 3563 3564 return (NULL); 3565 } 3566 3567 /* 3568 * Get the ilg whose ilg_orig_ifindex is associated with ifindex. 3569 * This is useful when the interface fails and we have moved 3570 * to a new ill, but still would like to locate using the index 3571 * that we originally used to join. Used only for IPv6 currently. 3572 */ 3573 static ilg_t * 3574 ilg_lookup_ill_index_v6(conn_t *connp, const in6_addr_t *v6group, int ifindex) 3575 { 3576 ilg_t *ilg; 3577 int i; 3578 3579 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3580 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3581 ilg = &connp->conn_ilg[i]; 3582 /* ilg_ill is NULL for V4. Skip them */ 3583 if (ilg->ilg_ill == NULL) 3584 continue; 3585 /* ilg_ipif is NULL for V6 */ 3586 ASSERT(ilg->ilg_ipif == NULL); 3587 ASSERT(ilg->ilg_orig_ifindex != 0); 3588 if (IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group) && 3589 ilg->ilg_orig_ifindex == ifindex) { 3590 return (ilg); 3591 } 3592 } 3593 return (NULL); 3594 } 3595 3596 /* 3597 * Find an IPv6 ilg matching group and ill 3598 */ 3599 ilg_t * 3600 ilg_lookup_ill_v6(conn_t *connp, const in6_addr_t *v6group, ill_t *ill) 3601 { 3602 ilg_t *ilg; 3603 int i; 3604 ill_t *mem_ill; 3605 3606 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3607 3608 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3609 ilg = &connp->conn_ilg[i]; 3610 if ((mem_ill = ilg->ilg_ill) == NULL) 3611 continue; 3612 ASSERT(ilg->ilg_ipif == NULL); 3613 ASSERT(mem_ill->ill_isv6); 3614 if (mem_ill == ill && 3615 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) 3616 return (ilg); 3617 } 3618 return (NULL); 3619 } 3620 3621 /* 3622 * Find an IPv4 ilg matching group and ipif 3623 */ 3624 static ilg_t * 3625 ilg_lookup_ipif(conn_t *connp, ipaddr_t group, ipif_t *ipif) 3626 { 3627 in6_addr_t v6group; 3628 int i; 3629 3630 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3631 ASSERT(!ipif->ipif_ill->ill_isv6); 3632 3633 if (group == INADDR_ANY) 3634 v6group = ipv6_all_zeros; 3635 else 3636 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 3637 3638 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3639 if (IN6_ARE_ADDR_EQUAL(&connp->conn_ilg[i].ilg_v6group, 3640 &v6group) && 3641 connp->conn_ilg[i].ilg_ipif == ipif) 3642 return (&connp->conn_ilg[i]); 3643 } 3644 return (NULL); 3645 } 3646 3647 /* 3648 * If a source address is passed in (src != NULL and src is not 3649 * unspecified), remove the specified src addr from the given ilg's 3650 * filter list, else delete the ilg. 3651 */ 3652 static void 3653 ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src) 3654 { 3655 int i; 3656 3657 ASSERT((ilg->ilg_ipif != NULL) ^ (ilg->ilg_ill != NULL)); 3658 ASSERT(ilg->ilg_ipif == NULL || IAM_WRITER_IPIF(ilg->ilg_ipif)); 3659 ASSERT(ilg->ilg_ill == NULL || IAM_WRITER_ILL(ilg->ilg_ill)); 3660 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3661 ASSERT(!(ilg->ilg_flags & ILG_DELETED)); 3662 3663 if (src == NULL || IN6_IS_ADDR_UNSPECIFIED(src)) { 3664 if (connp->conn_ilg_walker_cnt != 0) { 3665 ilg->ilg_flags |= ILG_DELETED; 3666 return; 3667 } 3668 3669 FREE_SLIST(ilg->ilg_filter); 3670 3671 i = ilg - &connp->conn_ilg[0]; 3672 ASSERT(i >= 0 && i < connp->conn_ilg_inuse); 3673 3674 /* Move other entries up one step */ 3675 connp->conn_ilg_inuse--; 3676 for (; i < connp->conn_ilg_inuse; i++) 3677 connp->conn_ilg[i] = connp->conn_ilg[i+1]; 3678 3679 if (connp->conn_ilg_inuse == 0) { 3680 mi_free((char *)connp->conn_ilg); 3681 connp->conn_ilg = NULL; 3682 cv_broadcast(&connp->conn_refcv); 3683 } 3684 } else { 3685 l_remove(ilg->ilg_filter, src); 3686 } 3687 } 3688 3689 /* 3690 * Called from conn close. No new ilg can be added or removed. 3691 * because CONN_CLOSING has been set by ip_close. ilg_add / ilg_delete 3692 * will return error if conn has started closing. 3693 */ 3694 void 3695 ilg_delete_all(conn_t *connp) 3696 { 3697 int i; 3698 ipif_t *ipif = NULL; 3699 ill_t *ill = NULL; 3700 ilg_t *ilg; 3701 in6_addr_t v6group; 3702 boolean_t success; 3703 ipsq_t *ipsq; 3704 int orig_ifindex; 3705 3706 mutex_enter(&connp->conn_lock); 3707 retry: 3708 ILG_WALKER_HOLD(connp); 3709 for (i = connp->conn_ilg_inuse - 1; i >= 0; ) { 3710 ilg = &connp->conn_ilg[i]; 3711 /* 3712 * Since this walk is not atomic (we drop the 3713 * conn_lock and wait in ipsq_enter) we need 3714 * to check for the ILG_DELETED flag. 3715 */ 3716 if (ilg->ilg_flags & ILG_DELETED) { 3717 /* Go to the next ilg */ 3718 i--; 3719 continue; 3720 } 3721 v6group = ilg->ilg_v6group; 3722 3723 if (IN6_IS_ADDR_V4MAPPED(&v6group)) { 3724 ipif = ilg->ilg_ipif; 3725 ill = ipif->ipif_ill; 3726 } else { 3727 ipif = NULL; 3728 ill = ilg->ilg_ill; 3729 } 3730 /* 3731 * We may not be able to refhold the ill if the ill/ipif 3732 * is changing. But we need to make sure that the ill will 3733 * not vanish. So we just bump up the ill_waiter count. 3734 * If we are unable to do even that, then the ill is closing, 3735 * in which case the unplumb thread will handle the cleanup, 3736 * and we move on to the next ilg. 3737 */ 3738 if (!ill_waiter_inc(ill)) { 3739 /* Go to the next ilg */ 3740 i--; 3741 continue; 3742 } 3743 mutex_exit(&connp->conn_lock); 3744 /* 3745 * To prevent deadlock between ill close which waits inside 3746 * the perimeter, and conn close, ipsq_enter returns error, 3747 * the moment ILL_CONDEMNED is set, in which case ill close 3748 * takes responsibility to cleanup the ilgs. Note that we 3749 * have not yet set condemned flag, otherwise the conn can't 3750 * be refheld for cleanup by those routines and it would be 3751 * a mutual deadlock. 3752 */ 3753 success = ipsq_enter(ill, B_FALSE); 3754 ipsq = ill->ill_phyint->phyint_ipsq; 3755 ill_waiter_dcr(ill); 3756 mutex_enter(&connp->conn_lock); 3757 if (!success) { 3758 /* Go to the next ilg */ 3759 i--; 3760 continue; 3761 } 3762 3763 /* 3764 * Make sure that nothing has changed under. For eg. 3765 * a failover/failback can change ilg_ill while we were 3766 * waiting to become exclusive above 3767 */ 3768 if (IN6_IS_ADDR_V4MAPPED(&v6group)) { 3769 ipif = ilg->ilg_ipif; 3770 ill = ipif->ipif_ill; 3771 } else { 3772 ipif = NULL; 3773 ill = ilg->ilg_ill; 3774 } 3775 if (!IAM_WRITER_ILL(ill) || (ilg->ilg_flags & ILG_DELETED)) { 3776 /* 3777 * The ilg has changed under us probably due 3778 * to a failover or unplumb. Retry on the same ilg. 3779 */ 3780 mutex_exit(&connp->conn_lock); 3781 ipsq_exit(ipsq, B_TRUE, B_TRUE); 3782 mutex_enter(&connp->conn_lock); 3783 continue; 3784 } 3785 v6group = ilg->ilg_v6group; 3786 orig_ifindex = ilg->ilg_orig_ifindex; 3787 ilg_delete(connp, ilg, NULL); 3788 mutex_exit(&connp->conn_lock); 3789 3790 if (ipif != NULL) 3791 (void) ip_delmulti(V4_PART_OF_V6(v6group), ipif, 3792 B_FALSE, B_TRUE); 3793 3794 else 3795 (void) ip_delmulti_v6(&v6group, ill, orig_ifindex, 3796 connp->conn_zoneid, B_FALSE, B_TRUE); 3797 3798 ipsq_exit(ipsq, B_TRUE, B_TRUE); 3799 mutex_enter(&connp->conn_lock); 3800 /* Go to the next ilg */ 3801 i--; 3802 } 3803 ILG_WALKER_RELE(connp); 3804 3805 /* If any ill was skipped above wait and retry */ 3806 if (connp->conn_ilg_inuse != 0) { 3807 cv_wait(&connp->conn_refcv, &connp->conn_lock); 3808 goto retry; 3809 } 3810 mutex_exit(&connp->conn_lock); 3811 } 3812 3813 /* 3814 * Called from ill close by ipcl_walk for clearing conn_ilg and 3815 * conn_multicast_ipif for a given ipif. conn is held by caller. 3816 * Note that ipcl_walk only walks conns that are not yet condemned. 3817 * condemned conns can't be refheld. For this reason, conn must become clean 3818 * first, i.e. it must not refer to any ill/ire/ipif and then only set 3819 * condemned flag. 3820 */ 3821 static void 3822 conn_delete_ipif(conn_t *connp, caddr_t arg) 3823 { 3824 ipif_t *ipif = (ipif_t *)arg; 3825 int i; 3826 char group_buf1[INET6_ADDRSTRLEN]; 3827 char group_buf2[INET6_ADDRSTRLEN]; 3828 ipaddr_t group; 3829 ilg_t *ilg; 3830 3831 /* 3832 * Even though conn_ilg_inuse can change while we are in this loop, 3833 * i.e.ilgs can be created or deleted on this connp, no new ilgs can 3834 * be created or deleted for this connp, on this ill, since this ill 3835 * is the perimeter. So we won't miss any ilg in this cleanup. 3836 */ 3837 mutex_enter(&connp->conn_lock); 3838 3839 /* 3840 * Increment the walker count, so that ilg repacking does not 3841 * occur while we are in the loop. 3842 */ 3843 ILG_WALKER_HOLD(connp); 3844 for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) { 3845 ilg = &connp->conn_ilg[i]; 3846 if (ilg->ilg_ipif != ipif || (ilg->ilg_flags & ILG_DELETED)) 3847 continue; 3848 /* 3849 * ip_close cannot be cleaning this ilg at the same time. 3850 * since it also has to execute in this ill's perimeter which 3851 * we are now holding. Only a clean conn can be condemned. 3852 */ 3853 ASSERT(!(connp->conn_state_flags & CONN_CONDEMNED)); 3854 3855 /* Blow away the membership */ 3856 ip1dbg(("conn_delete_ilg_ipif: %s on %s (%s)\n", 3857 inet_ntop(AF_INET6, &connp->conn_ilg[i].ilg_v6group, 3858 group_buf1, sizeof (group_buf1)), 3859 inet_ntop(AF_INET6, &ipif->ipif_v6lcl_addr, 3860 group_buf2, sizeof (group_buf2)), 3861 ipif->ipif_ill->ill_name)); 3862 3863 /* ilg_ipif is NULL for V6, so we won't be here */ 3864 ASSERT(IN6_IS_ADDR_V4MAPPED(&ilg->ilg_v6group)); 3865 3866 group = V4_PART_OF_V6(ilg->ilg_v6group); 3867 ilg_delete(connp, &connp->conn_ilg[i], NULL); 3868 mutex_exit(&connp->conn_lock); 3869 3870 (void) ip_delmulti(group, ipif, B_FALSE, B_TRUE); 3871 mutex_enter(&connp->conn_lock); 3872 } 3873 3874 /* 3875 * If we are the last walker, need to physically delete the 3876 * ilgs and repack. 3877 */ 3878 ILG_WALKER_RELE(connp); 3879 3880 if (connp->conn_multicast_ipif == ipif) { 3881 /* Revert to late binding */ 3882 connp->conn_multicast_ipif = NULL; 3883 } 3884 mutex_exit(&connp->conn_lock); 3885 3886 conn_delete_ire(connp, (caddr_t)ipif); 3887 } 3888 3889 /* 3890 * Called from ill close by ipcl_walk for clearing conn_ilg and 3891 * conn_multicast_ill for a given ill. conn is held by caller. 3892 * Note that ipcl_walk only walks conns that are not yet condemned. 3893 * condemned conns can't be refheld. For this reason, conn must become clean 3894 * first, i.e. it must not refer to any ill/ire/ipif and then only set 3895 * condemned flag. 3896 */ 3897 static void 3898 conn_delete_ill(conn_t *connp, caddr_t arg) 3899 { 3900 ill_t *ill = (ill_t *)arg; 3901 int i; 3902 char group_buf[INET6_ADDRSTRLEN]; 3903 in6_addr_t v6group; 3904 int orig_ifindex; 3905 ilg_t *ilg; 3906 3907 /* 3908 * Even though conn_ilg_inuse can change while we are in this loop, 3909 * no new ilgs can be created/deleted for this connp, on this 3910 * ill, since this ill is the perimeter. So we won't miss any ilg 3911 * in this cleanup. 3912 */ 3913 mutex_enter(&connp->conn_lock); 3914 3915 /* 3916 * Increment the walker count, so that ilg repacking does not 3917 * occur while we are in the loop. 3918 */ 3919 ILG_WALKER_HOLD(connp); 3920 for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) { 3921 ilg = &connp->conn_ilg[i]; 3922 if ((ilg->ilg_ill == ill) && !(ilg->ilg_flags & ILG_DELETED)) { 3923 /* 3924 * ip_close cannot be cleaning this ilg at the same 3925 * time, since it also has to execute in this ill's 3926 * perimeter which we are now holding. Only a clean 3927 * conn can be condemned. 3928 */ 3929 ASSERT(!(connp->conn_state_flags & CONN_CONDEMNED)); 3930 3931 /* Blow away the membership */ 3932 ip1dbg(("conn_delete_ilg_ill: %s on %s\n", 3933 inet_ntop(AF_INET6, &ilg->ilg_v6group, 3934 group_buf, sizeof (group_buf)), 3935 ill->ill_name)); 3936 3937 v6group = ilg->ilg_v6group; 3938 orig_ifindex = ilg->ilg_orig_ifindex; 3939 ilg_delete(connp, ilg, NULL); 3940 mutex_exit(&connp->conn_lock); 3941 3942 (void) ip_delmulti_v6(&v6group, ill, orig_ifindex, 3943 connp->conn_zoneid, B_FALSE, B_TRUE); 3944 mutex_enter(&connp->conn_lock); 3945 } 3946 } 3947 /* 3948 * If we are the last walker, need to physically delete the 3949 * ilgs and repack. 3950 */ 3951 ILG_WALKER_RELE(connp); 3952 3953 if (connp->conn_multicast_ill == ill) { 3954 /* Revert to late binding */ 3955 connp->conn_multicast_ill = NULL; 3956 connp->conn_orig_multicast_ifindex = 0; 3957 } 3958 mutex_exit(&connp->conn_lock); 3959 } 3960 3961 /* 3962 * Called when an ipif is unplumbed to make sure that there are no 3963 * dangling conn references to that ipif. 3964 * Handles ilg_ipif and conn_multicast_ipif 3965 */ 3966 void 3967 reset_conn_ipif(ipif) 3968 ipif_t *ipif; 3969 { 3970 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 3971 3972 ipcl_walk(conn_delete_ipif, (caddr_t)ipif, ipst); 3973 } 3974 3975 /* 3976 * Called when an ill is unplumbed to make sure that there are no 3977 * dangling conn references to that ill. 3978 * Handles ilg_ill, conn_multicast_ill. 3979 */ 3980 void 3981 reset_conn_ill(ill_t *ill) 3982 { 3983 ip_stack_t *ipst = ill->ill_ipst; 3984 3985 ipcl_walk(conn_delete_ill, (caddr_t)ill, ipst); 3986 } 3987 3988 #ifdef DEBUG 3989 /* 3990 * Walk functions walk all the interfaces in the system to make 3991 * sure that there is no refernece to the ipif or ill that is 3992 * going away. 3993 */ 3994 int 3995 ilm_walk_ill(ill_t *ill) 3996 { 3997 int cnt = 0; 3998 ill_t *till; 3999 ilm_t *ilm; 4000 ill_walk_context_t ctx; 4001 ip_stack_t *ipst = ill->ill_ipst; 4002 4003 rw_enter(&ipst->ips_ill_g_lock, RW_READER); 4004 till = ILL_START_WALK_ALL(&ctx, ipst); 4005 for (; till != NULL; till = ill_next(&ctx, till)) { 4006 for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 4007 if (ilm->ilm_ill == ill) { 4008 cnt++; 4009 } 4010 } 4011 } 4012 rw_exit(&ipst->ips_ill_g_lock); 4013 4014 return (cnt); 4015 } 4016 4017 /* 4018 * This function is called before the ipif is freed. 4019 */ 4020 int 4021 ilm_walk_ipif(ipif_t *ipif) 4022 { 4023 int cnt = 0; 4024 ill_t *till; 4025 ilm_t *ilm; 4026 ill_walk_context_t ctx; 4027 ip_stack_t *ipst = ipif->ipif_ill->ill_ipst; 4028 4029 till = ILL_START_WALK_ALL(&ctx, ipst); 4030 for (; till != NULL; till = ill_next(&ctx, till)) { 4031 for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 4032 if (ilm->ilm_ipif == ipif) { 4033 cnt++; 4034 } 4035 } 4036 } 4037 return (cnt); 4038 } 4039 #endif 4040