1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 /* Copyright (c) 1990 Mentat Inc. */ 27 28 #pragma ident "%Z%%M% %I% %E% SMI" 29 30 #include <sys/types.h> 31 #include <sys/stream.h> 32 #include <sys/dlpi.h> 33 #include <sys/stropts.h> 34 #include <sys/strsun.h> 35 #include <sys/strlog.h> 36 #include <sys/ddi.h> 37 #include <sys/cmn_err.h> 38 #include <sys/zone.h> 39 40 #include <sys/param.h> 41 #include <sys/socket.h> 42 #define _SUN_TPI_VERSION 2 43 #include <sys/tihdr.h> 44 #include <net/if.h> 45 #include <net/if_arp.h> 46 #include <sys/sockio.h> 47 #include <sys/systm.h> 48 #include <net/route.h> 49 #include <netinet/in.h> 50 #include <net/if_dl.h> 51 #include <netinet/ip6.h> 52 #include <netinet/icmp6.h> 53 54 #include <inet/common.h> 55 #include <inet/mi.h> 56 #include <inet/nd.h> 57 #include <inet/arp.h> 58 #include <inet/ip.h> 59 #include <inet/ip6.h> 60 #include <inet/ip_if.h> 61 #include <inet/ip_ire.h> 62 #include <inet/ip_ndp.h> 63 #include <inet/ip_multi.h> 64 #include <inet/ipclassifier.h> 65 #include <inet/ipsec_impl.h> 66 #include <inet/sctp_ip.h> 67 #include <inet/ip_listutils.h> 68 #include <inet/udp_impl.h> 69 70 #include <netinet/igmp.h> 71 72 /* igmpv3/mldv2 source filter manipulation */ 73 static void ilm_bld_flists(conn_t *conn, void *arg); 74 static void ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, 75 slist_t *flist); 76 77 static ilm_t *ilm_add_v6(ipif_t *ipif, const in6_addr_t *group, 78 ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist, 79 int orig_ifindex, zoneid_t zoneid); 80 static void ilm_delete(ilm_t *ilm); 81 static int ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *group); 82 static int ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *group); 83 static ilg_t *ilg_lookup_ill_index_v6(conn_t *connp, 84 const in6_addr_t *v6group, int index); 85 static ilg_t *ilg_lookup_ipif(conn_t *connp, ipaddr_t group, 86 ipif_t *ipif); 87 static int ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif, 88 mcast_record_t fmode, ipaddr_t src); 89 static int ilg_add_v6(conn_t *connp, const in6_addr_t *group, ill_t *ill, 90 mcast_record_t fmode, const in6_addr_t *v6src); 91 static void ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src); 92 static mblk_t *ill_create_dl(ill_t *ill, uint32_t dl_primitive, 93 uint32_t length, uint32_t *addr_lenp, uint32_t *addr_offp); 94 static mblk_t *ill_create_squery(ill_t *ill, ipaddr_t ipaddr, 95 uint32_t addrlen, uint32_t addroff, mblk_t *mp_tail); 96 static void conn_ilg_reap(conn_t *connp); 97 static int ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group, 98 ipif_t *ipif, mcast_record_t fmode, ipaddr_t src); 99 static int ip_opt_delete_group_excl_v6(conn_t *connp, 100 const in6_addr_t *v6group, ill_t *ill, mcast_record_t fmode, 101 const in6_addr_t *v6src); 102 103 /* 104 * MT notes: 105 * 106 * Multicast joins operate on both the ilg and ilm structures. Multiple 107 * threads operating on an conn (socket) trying to do multicast joins 108 * need to synchronize when operating on the ilg. Multiple threads 109 * potentially operating on different conn (socket endpoints) trying to 110 * do multicast joins could eventually end up trying to manipulate the 111 * ilm simulatenously and need to synchronize on the access to the ilm. 112 * Both are amenable to standard Solaris MT techniques, but it would be 113 * complex to handle a failover or failback which needs to manipulate 114 * ilg/ilms if an applications can also simultaenously join/leave 115 * multicast groups. Hence multicast join/leave also go through the ipsq_t 116 * serialization. 117 * 118 * Multicast joins and leaves are single-threaded per phyint/IPMP group 119 * using the ipsq serialization mechanism. 120 * 121 * An ilm is an IP data structure used to track multicast join/leave. 122 * An ilm is associated with a <multicast group, ipif> tuple in IPv4 and 123 * with just <multicast group> in IPv6. ilm_refcnt is the number of ilg's 124 * referencing the ilm. ilms are created / destroyed only as writer. ilms 125 * are not passed around, instead they are looked up and used under the 126 * ill_lock or as writer. So we don't need a dynamic refcount of the number 127 * of threads holding reference to an ilm. 128 * 129 * Multicast Join operation: 130 * 131 * The first step is to determine the ipif (v4) or ill (v6) on which 132 * the join operation is to be done. The join is done after becoming 133 * exclusive on the ipsq associated with the ipif or ill. The conn->conn_ilg 134 * and ill->ill_ilm are thus accessed and modified exclusively per ill. 135 * Multiple threads can attempt to join simultaneously on different ipif/ill 136 * on the same conn. In this case the ipsq serialization does not help in 137 * protecting the ilg. It is the conn_lock that is used to protect the ilg. 138 * The conn_lock also protects all the ilg_t members. 139 * 140 * Leave operation. 141 * 142 * Similar to the join operation, the first step is to determine the ipif 143 * or ill (v6) on which the leave operation is to be done. The leave operation 144 * is done after becoming exclusive on the ipsq associated with the ipif or ill. 145 * As with join ilg modification is done under the protection of the conn lock. 146 */ 147 148 #define IPSQ_ENTER_IPIF(ipif, connp, first_mp, func, ipsq, type) \ 149 ASSERT(connp != NULL); \ 150 (ipsq) = ipsq_try_enter((ipif), NULL, CONNP_TO_WQ(connp), \ 151 (first_mp), (func), (type), B_TRUE); \ 152 if ((ipsq) == NULL) { \ 153 ipif_refrele(ipif); \ 154 return (EINPROGRESS); \ 155 } 156 157 #define IPSQ_ENTER_ILL(ill, connp, first_mp, func, ipsq, type) \ 158 ASSERT(connp != NULL); \ 159 (ipsq) = ipsq_try_enter(NULL, ill, CONNP_TO_WQ(connp), \ 160 (first_mp), (func), (type), B_TRUE); \ 161 if ((ipsq) == NULL) { \ 162 ill_refrele(ill); \ 163 return (EINPROGRESS); \ 164 } 165 166 #define IPSQ_EXIT(ipsq) \ 167 if (ipsq != NULL) \ 168 ipsq_exit(ipsq, B_TRUE, B_TRUE); 169 170 #define ILG_WALKER_HOLD(connp) (connp)->conn_ilg_walker_cnt++ 171 172 #define ILG_WALKER_RELE(connp) \ 173 { \ 174 (connp)->conn_ilg_walker_cnt--; \ 175 if ((connp)->conn_ilg_walker_cnt == 0) \ 176 conn_ilg_reap(connp); \ 177 } 178 179 static void 180 conn_ilg_reap(conn_t *connp) 181 { 182 int to; 183 int from; 184 185 ASSERT(MUTEX_HELD(&connp->conn_lock)); 186 187 to = 0; 188 from = 0; 189 while (from < connp->conn_ilg_inuse) { 190 if (connp->conn_ilg[from].ilg_flags & ILG_DELETED) { 191 FREE_SLIST(connp->conn_ilg[from].ilg_filter); 192 from++; 193 continue; 194 } 195 if (to != from) 196 connp->conn_ilg[to] = connp->conn_ilg[from]; 197 to++; 198 from++; 199 } 200 201 connp->conn_ilg_inuse = to; 202 203 if (connp->conn_ilg_inuse == 0) { 204 mi_free((char *)connp->conn_ilg); 205 connp->conn_ilg = NULL; 206 cv_broadcast(&connp->conn_refcv); 207 } 208 } 209 210 #define GETSTRUCT(structure, number) \ 211 ((structure *)mi_zalloc(sizeof (structure) * (number))) 212 213 #define ILG_ALLOC_CHUNK 16 214 215 /* 216 * Returns a pointer to the next available ilg in conn_ilg. Allocs more 217 * buffers in size of ILG_ALLOC_CHUNK ilgs when needed, and updates conn's 218 * ilg tracking fields appropriately (conn_ilg_inuse reflects usage of the 219 * returned ilg). Returns NULL on failure (ENOMEM). 220 * 221 * Assumes connp->conn_lock is held. 222 */ 223 static ilg_t * 224 conn_ilg_alloc(conn_t *connp) 225 { 226 ilg_t *new; 227 int curcnt; 228 229 ASSERT(MUTEX_HELD(&connp->conn_lock)); 230 ASSERT(connp->conn_ilg_inuse <= connp->conn_ilg_allocated); 231 232 if (connp->conn_ilg == NULL) { 233 connp->conn_ilg = GETSTRUCT(ilg_t, ILG_ALLOC_CHUNK); 234 if (connp->conn_ilg == NULL) 235 return (NULL); 236 connp->conn_ilg_allocated = ILG_ALLOC_CHUNK; 237 connp->conn_ilg_inuse = 0; 238 } 239 if (connp->conn_ilg_inuse == connp->conn_ilg_allocated) { 240 curcnt = connp->conn_ilg_allocated; 241 new = GETSTRUCT(ilg_t, curcnt + ILG_ALLOC_CHUNK); 242 if (new == NULL) 243 return (NULL); 244 bcopy(connp->conn_ilg, new, sizeof (ilg_t) * curcnt); 245 mi_free((char *)connp->conn_ilg); 246 connp->conn_ilg = new; 247 connp->conn_ilg_allocated += ILG_ALLOC_CHUNK; 248 } 249 250 return (&connp->conn_ilg[connp->conn_ilg_inuse++]); 251 } 252 253 typedef struct ilm_fbld_s { 254 ilm_t *fbld_ilm; 255 int fbld_in_cnt; 256 int fbld_ex_cnt; 257 slist_t fbld_in; 258 slist_t fbld_ex; 259 boolean_t fbld_in_overflow; 260 } ilm_fbld_t; 261 262 static void 263 ilm_bld_flists(conn_t *conn, void *arg) 264 { 265 int i; 266 ilm_fbld_t *fbld = (ilm_fbld_t *)(arg); 267 ilm_t *ilm = fbld->fbld_ilm; 268 in6_addr_t *v6group = &ilm->ilm_v6addr; 269 270 if (conn->conn_ilg_inuse == 0) 271 return; 272 273 /* 274 * Since we can't break out of the ipcl_walk once started, we still 275 * have to look at every conn. But if we've already found one 276 * (EXCLUDE, NULL) list, there's no need to keep checking individual 277 * ilgs--that will be our state. 278 */ 279 if (fbld->fbld_ex_cnt > 0 && fbld->fbld_ex.sl_numsrc == 0) 280 return; 281 282 /* 283 * Check this conn's ilgs to see if any are interested in our 284 * ilm (group, interface match). If so, update the master 285 * include and exclude lists we're building in the fbld struct 286 * with this ilg's filter info. 287 */ 288 mutex_enter(&conn->conn_lock); 289 for (i = 0; i < conn->conn_ilg_inuse; i++) { 290 ilg_t *ilg = &conn->conn_ilg[i]; 291 if ((ilg->ilg_ill == ilm->ilm_ill) && 292 (ilg->ilg_ipif == ilm->ilm_ipif) && 293 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) { 294 if (ilg->ilg_fmode == MODE_IS_INCLUDE) { 295 fbld->fbld_in_cnt++; 296 if (!fbld->fbld_in_overflow) 297 l_union_in_a(&fbld->fbld_in, 298 ilg->ilg_filter, 299 &fbld->fbld_in_overflow); 300 } else { 301 fbld->fbld_ex_cnt++; 302 /* 303 * On the first exclude list, don't try to do 304 * an intersection, as the master exclude list 305 * is intentionally empty. If the master list 306 * is still empty on later iterations, that 307 * means we have at least one ilg with an empty 308 * exclude list, so that should be reflected 309 * when we take the intersection. 310 */ 311 if (fbld->fbld_ex_cnt == 1) { 312 if (ilg->ilg_filter != NULL) 313 l_copy(ilg->ilg_filter, 314 &fbld->fbld_ex); 315 } else { 316 l_intersection_in_a(&fbld->fbld_ex, 317 ilg->ilg_filter); 318 } 319 } 320 /* there will only be one match, so break now. */ 321 break; 322 } 323 } 324 mutex_exit(&conn->conn_lock); 325 } 326 327 static void 328 ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, slist_t *flist) 329 { 330 ilm_fbld_t fbld; 331 332 fbld.fbld_ilm = ilm; 333 fbld.fbld_in_cnt = fbld.fbld_ex_cnt = 0; 334 fbld.fbld_in.sl_numsrc = fbld.fbld_ex.sl_numsrc = 0; 335 fbld.fbld_in_overflow = B_FALSE; 336 337 /* first, construct our master include and exclude lists */ 338 ipcl_walk(ilm_bld_flists, (caddr_t)&fbld); 339 340 /* now use those master lists to generate the interface filter */ 341 342 /* if include list overflowed, filter is (EXCLUDE, NULL) */ 343 if (fbld.fbld_in_overflow) { 344 *fmode = MODE_IS_EXCLUDE; 345 flist->sl_numsrc = 0; 346 return; 347 } 348 349 /* if nobody interested, interface filter is (INCLUDE, NULL) */ 350 if (fbld.fbld_in_cnt == 0 && fbld.fbld_ex_cnt == 0) { 351 *fmode = MODE_IS_INCLUDE; 352 flist->sl_numsrc = 0; 353 return; 354 } 355 356 /* 357 * If there are no exclude lists, then the interface filter 358 * is INCLUDE, with its filter list equal to fbld_in. A single 359 * exclude list makes the interface filter EXCLUDE, with its 360 * filter list equal to (fbld_ex - fbld_in). 361 */ 362 if (fbld.fbld_ex_cnt == 0) { 363 *fmode = MODE_IS_INCLUDE; 364 l_copy(&fbld.fbld_in, flist); 365 } else { 366 *fmode = MODE_IS_EXCLUDE; 367 l_difference(&fbld.fbld_ex, &fbld.fbld_in, flist); 368 } 369 } 370 371 /* 372 * If the given interface has failed, choose a new one to join on so 373 * that we continue to receive packets. ilg_orig_ifindex remembers 374 * what the application used to join on so that we know the ilg to 375 * delete even though we change the ill here. Callers will store the 376 * ilg returned from this function in ilg_ill. Thus when we receive 377 * a packet on ilg_ill, conn_wantpacket_v6 will deliver the packets. 378 * 379 * This function must be called as writer so we can walk the group 380 * list and examine flags without holding a lock. 381 */ 382 ill_t * 383 ip_choose_multi_ill(ill_t *ill, const in6_addr_t *grp) 384 { 385 ill_t *till; 386 ill_group_t *illgrp = ill->ill_group; 387 388 ASSERT(IAM_WRITER_ILL(ill)); 389 390 if (IN6_IS_ADDR_UNSPECIFIED(grp) || illgrp == NULL) 391 return (ill); 392 393 if ((ill->ill_phyint->phyint_flags & (PHYI_FAILED|PHYI_INACTIVE)) == 0) 394 return (ill); 395 396 till = illgrp->illgrp_ill; 397 while (till != NULL && 398 (till->ill_phyint->phyint_flags & (PHYI_FAILED|PHYI_INACTIVE))) { 399 till = till->ill_group_next; 400 } 401 if (till != NULL) 402 return (till); 403 404 return (ill); 405 } 406 407 static int 408 ilm_update_add(ilm_t *ilm, ilg_stat_t ilgstat, slist_t *ilg_flist, 409 boolean_t isv6) 410 { 411 mcast_record_t fmode; 412 slist_t *flist; 413 boolean_t fdefault; 414 char buf[INET6_ADDRSTRLEN]; 415 ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill; 416 417 /* 418 * There are several cases where the ilm's filter state 419 * defaults to (EXCLUDE, NULL): 420 * - we've had previous joins without associated ilgs 421 * - this join has no associated ilg 422 * - the ilg's filter state is (EXCLUDE, NULL) 423 */ 424 fdefault = (ilm->ilm_no_ilg_cnt > 0) || 425 (ilgstat == ILGSTAT_NONE) || SLIST_IS_EMPTY(ilg_flist); 426 427 /* attempt mallocs (if needed) before doing anything else */ 428 if ((flist = l_alloc()) == NULL) 429 return (ENOMEM); 430 if (!fdefault && ilm->ilm_filter == NULL) { 431 ilm->ilm_filter = l_alloc(); 432 if (ilm->ilm_filter == NULL) { 433 l_free(flist); 434 return (ENOMEM); 435 } 436 } 437 438 if (ilgstat != ILGSTAT_CHANGE) 439 ilm->ilm_refcnt++; 440 441 if (ilgstat == ILGSTAT_NONE) 442 ilm->ilm_no_ilg_cnt++; 443 444 /* 445 * Determine new filter state. If it's not the default 446 * (EXCLUDE, NULL), we must walk the conn list to find 447 * any ilgs interested in this group, and re-build the 448 * ilm filter. 449 */ 450 if (fdefault) { 451 fmode = MODE_IS_EXCLUDE; 452 flist->sl_numsrc = 0; 453 } else { 454 ilm_gen_filter(ilm, &fmode, flist); 455 } 456 457 /* make sure state actually changed; nothing to do if not. */ 458 if ((ilm->ilm_fmode == fmode) && 459 !lists_are_different(ilm->ilm_filter, flist)) { 460 l_free(flist); 461 return (0); 462 } 463 464 /* send the state change report */ 465 if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0) { 466 if (isv6) 467 mld_statechange(ilm, fmode, flist); 468 else 469 igmp_statechange(ilm, fmode, flist); 470 } 471 472 /* update the ilm state */ 473 ilm->ilm_fmode = fmode; 474 if (flist->sl_numsrc > 0) 475 l_copy(flist, ilm->ilm_filter); 476 else 477 CLEAR_SLIST(ilm->ilm_filter); 478 479 ip1dbg(("ilm_update: new if filter mode %d, group %s\n", ilm->ilm_fmode, 480 inet_ntop(AF_INET6, &ilm->ilm_v6addr, buf, sizeof (buf)))); 481 482 l_free(flist); 483 return (0); 484 } 485 486 static int 487 ilm_update_del(ilm_t *ilm, boolean_t isv6) 488 { 489 mcast_record_t fmode; 490 slist_t *flist; 491 ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill; 492 493 ip1dbg(("ilm_update_del: still %d left; updating state\n", 494 ilm->ilm_refcnt)); 495 496 if ((flist = l_alloc()) == NULL) 497 return (ENOMEM); 498 499 /* 500 * If present, the ilg in question has already either been 501 * updated or removed from our list; so all we need to do 502 * now is walk the list to update the ilm filter state. 503 * 504 * Skip the list walk if we have any no-ilg joins, which 505 * cause the filter state to revert to (EXCLUDE, NULL). 506 */ 507 if (ilm->ilm_no_ilg_cnt != 0) { 508 fmode = MODE_IS_EXCLUDE; 509 flist->sl_numsrc = 0; 510 } else { 511 ilm_gen_filter(ilm, &fmode, flist); 512 } 513 514 /* check to see if state needs to be updated */ 515 if ((ilm->ilm_fmode == fmode) && 516 (!lists_are_different(ilm->ilm_filter, flist))) { 517 l_free(flist); 518 return (0); 519 } 520 521 if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0) { 522 if (isv6) 523 mld_statechange(ilm, fmode, flist); 524 else 525 igmp_statechange(ilm, fmode, flist); 526 } 527 528 ilm->ilm_fmode = fmode; 529 if (flist->sl_numsrc > 0) { 530 if (ilm->ilm_filter == NULL) { 531 ilm->ilm_filter = l_alloc(); 532 if (ilm->ilm_filter == NULL) { 533 char buf[INET6_ADDRSTRLEN]; 534 ip1dbg(("ilm_update_del: failed to alloc ilm " 535 "filter; no source filtering for %s on %s", 536 inet_ntop(AF_INET6, &ilm->ilm_v6addr, 537 buf, sizeof (buf)), ill->ill_name)); 538 ilm->ilm_fmode = MODE_IS_EXCLUDE; 539 l_free(flist); 540 return (0); 541 } 542 } 543 l_copy(flist, ilm->ilm_filter); 544 } else { 545 CLEAR_SLIST(ilm->ilm_filter); 546 } 547 548 l_free(flist); 549 return (0); 550 } 551 552 /* 553 * INADDR_ANY means all multicast addresses. This is only used 554 * by the multicast router. 555 * INADDR_ANY is stored as IPv6 unspecified addr. 556 */ 557 int 558 ip_addmulti(ipaddr_t group, ipif_t *ipif, ilg_stat_t ilgstat, 559 mcast_record_t ilg_fmode, slist_t *ilg_flist) 560 { 561 ill_t *ill = ipif->ipif_ill; 562 ilm_t *ilm; 563 in6_addr_t v6group; 564 int ret; 565 566 ASSERT(IAM_WRITER_IPIF(ipif)); 567 568 if (!CLASSD(group) && group != INADDR_ANY) 569 return (EINVAL); 570 571 /* 572 * INADDR_ANY is represented as the IPv6 unspecifed addr. 573 */ 574 if (group == INADDR_ANY) 575 v6group = ipv6_all_zeros; 576 else 577 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 578 579 ilm = ilm_lookup_ipif(ipif, group); 580 if (ilm != NULL) 581 return (ilm_update_add(ilm, ilgstat, ilg_flist, B_FALSE)); 582 583 /* 584 * ilms are associated with ipifs in IPv4. It moves with the 585 * ipif if the ipif moves to a new ill when the interface 586 * fails. Thus we really don't check whether the ipif_ill 587 * has failed like in IPv6. If it has FAILED the ipif 588 * will move (daemon will move it) and hence the ilm, if the 589 * ipif is not IPIF_NOFAILOVER. For the IPIF_NOFAILOVER ipifs, 590 * we continue to receive in the same place even if the 591 * interface fails. 592 */ 593 ilm = ilm_add_v6(ipif, &v6group, ilgstat, ilg_fmode, ilg_flist, 594 ill->ill_phyint->phyint_ifindex, ipif->ipif_zoneid); 595 if (ilm == NULL) 596 return (ENOMEM); 597 598 if (group == INADDR_ANY) { 599 /* 600 * Check how many ipif's have members in this group - 601 * if more then one we should not tell the driver to join 602 * this time 603 */ 604 if (ilm_numentries_v6(ill, &v6group) > 1) 605 return (0); 606 if (ill->ill_group == NULL) 607 ret = ip_join_allmulti(ipif); 608 else 609 ret = ill_nominate_mcast_rcv(ill->ill_group); 610 if (ret != 0) 611 ilm_delete(ilm); 612 return (ret); 613 } 614 615 if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0) 616 igmp_joingroup(ilm); 617 618 if (ilm_numentries_v6(ill, &v6group) > 1) 619 return (0); 620 621 ret = ip_ll_addmulti_v6(ipif, &v6group); 622 if (ret != 0) 623 ilm_delete(ilm); 624 return (ret); 625 } 626 627 /* 628 * The unspecified address means all multicast addresses. 629 * This is only used by the multicast router. 630 * 631 * ill identifies the interface to join on; it may not match the 632 * interface requested by the application of a failover has taken 633 * place. orig_ifindex always identifies the interface requested 634 * by the app. 635 * 636 * ilgstat tells us if there's an ilg associated with this join, 637 * and if so, if it's a new ilg or a change to an existing one. 638 * ilg_fmode and ilg_flist give us the current filter state of 639 * the ilg (and will be EXCLUDE {NULL} in the case of no ilg). 640 */ 641 int 642 ip_addmulti_v6(const in6_addr_t *v6group, ill_t *ill, int orig_ifindex, 643 zoneid_t zoneid, ilg_stat_t ilgstat, mcast_record_t ilg_fmode, 644 slist_t *ilg_flist) 645 { 646 ilm_t *ilm; 647 int ret; 648 649 ASSERT(IAM_WRITER_ILL(ill)); 650 651 if (!IN6_IS_ADDR_MULTICAST(v6group) && 652 !IN6_IS_ADDR_UNSPECIFIED(v6group)) { 653 return (EINVAL); 654 } 655 656 /* 657 * An ilm is uniquely identified by the tuple of (group, ill, 658 * orig_ill). group is the multicast group address, ill is 659 * the interface on which it is currently joined, and orig_ill 660 * is the interface on which the application requested the 661 * join. orig_ill and ill are the same unless orig_ill has 662 * failed over. 663 * 664 * Both orig_ill and ill are required, which means we may have 665 * 2 ilms on an ill for the same group, but with different 666 * orig_ills. These must be kept separate, so that when failback 667 * occurs, the appropriate ilms are moved back to their orig_ill 668 * without disrupting memberships on the ill to which they had 669 * been moved. 670 * 671 * In order to track orig_ill, we store orig_ifindex in the 672 * ilm and ilg. 673 */ 674 ilm = ilm_lookup_ill_index_v6(ill, v6group, orig_ifindex, zoneid); 675 if (ilm != NULL) 676 return (ilm_update_add(ilm, ilgstat, ilg_flist, B_TRUE)); 677 678 /* 679 * We need to remember where the application really wanted 680 * to join. This will be used later if we want to failback 681 * to the original interface. 682 */ 683 ilm = ilm_add_v6(ill->ill_ipif, v6group, ilgstat, ilg_fmode, 684 ilg_flist, orig_ifindex, zoneid); 685 if (ilm == NULL) 686 return (ENOMEM); 687 688 if (IN6_IS_ADDR_UNSPECIFIED(v6group)) { 689 /* 690 * Check how many ipif's that have members in this group - 691 * if more then one we should not tell the driver to join 692 * this time 693 */ 694 if (ilm_numentries_v6(ill, v6group) > 1) 695 return (0); 696 if (ill->ill_group == NULL) 697 ret = ip_join_allmulti(ill->ill_ipif); 698 else 699 ret = ill_nominate_mcast_rcv(ill->ill_group); 700 701 if (ret != 0) 702 ilm_delete(ilm); 703 return (ret); 704 } 705 706 if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0) 707 mld_joingroup(ilm); 708 709 /* 710 * If we have more then one we should not tell the driver 711 * to join this time. 712 */ 713 if (ilm_numentries_v6(ill, v6group) > 1) 714 return (0); 715 716 ret = ip_ll_addmulti_v6(ill->ill_ipif, v6group); 717 if (ret != 0) 718 ilm_delete(ilm); 719 return (ret); 720 } 721 722 /* 723 * Send a multicast request to the driver for enabling multicast reception 724 * for v6groupp address. The caller has already checked whether it is 725 * appropriate to send one or not. 726 */ 727 int 728 ip_ll_send_enabmulti_req(ill_t *ill, const in6_addr_t *v6groupp) 729 { 730 mblk_t *mp; 731 uint32_t addrlen, addroff; 732 char group_buf[INET6_ADDRSTRLEN]; 733 734 ASSERT(IAM_WRITER_ILL(ill)); 735 736 /* 737 * Create a AR_ENTRY_SQUERY message with a dl_enabmulti_req tacked 738 * on. 739 */ 740 mp = ill_create_dl(ill, DL_ENABMULTI_REQ, sizeof (dl_enabmulti_req_t), 741 &addrlen, &addroff); 742 if (!mp) 743 return (ENOMEM); 744 if (IN6_IS_ADDR_V4MAPPED(v6groupp)) { 745 ipaddr_t v4group; 746 747 IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group); 748 /* 749 * NOTE!!! 750 * The "addroff" passed in here was calculated by 751 * ill_create_dl(), and will be used by ill_create_squery() 752 * to perform some twisted coding magic. It is the offset 753 * into the dl_xxx_req of the hw addr. Here, it will be 754 * added to b_wptr - b_rptr to create a magic number that 755 * is not an offset into this squery mblk. 756 * The actual hardware address will be accessed only in the 757 * dl_xxx_req, not in the squery. More importantly, 758 * that hardware address can *only* be accessed in this 759 * mblk chain by calling mi_offset_param_c(), which uses 760 * the magic number in the squery hw offset field to go 761 * to the *next* mblk (the dl_xxx_req), subtract the 762 * (b_wptr - b_rptr), and find the actual offset into 763 * the dl_xxx_req. 764 * Any method that depends on using the 765 * offset field in the dl_disabmulti_req or squery 766 * to find either hardware address will similarly fail. 767 * 768 * Look in ar_entry_squery() in arp.c to see how this offset 769 * is used. 770 */ 771 mp = ill_create_squery(ill, v4group, addrlen, addroff, mp); 772 if (!mp) 773 return (ENOMEM); 774 ip1dbg(("ip_ll_send_enabmulti_req: IPv4 putnext %s on %s\n", 775 inet_ntop(AF_INET6, v6groupp, group_buf, 776 sizeof (group_buf)), 777 ill->ill_name)); 778 putnext(ill->ill_rq, mp); 779 } else { 780 ip1dbg(("ip_ll_send_enabmulti_req: IPv6 ndp_squery_mp %s on" 781 " %s\n", 782 inet_ntop(AF_INET6, v6groupp, group_buf, 783 sizeof (group_buf)), 784 ill->ill_name)); 785 return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp)); 786 } 787 return (0); 788 } 789 790 /* 791 * Send a multicast request to the driver for enabling multicast 792 * membership for v6group if appropriate. 793 */ 794 static int 795 ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *v6groupp) 796 { 797 ill_t *ill = ipif->ipif_ill; 798 799 ASSERT(IAM_WRITER_IPIF(ipif)); 800 801 if (ill->ill_net_type != IRE_IF_RESOLVER || 802 ipif->ipif_flags & IPIF_POINTOPOINT) { 803 ip1dbg(("ip_ll_addmulti_v6: not resolver\n")); 804 return (0); /* Must be IRE_IF_NORESOLVER */ 805 } 806 807 if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) { 808 ip1dbg(("ip_ll_addmulti_v6: MULTI_BCAST\n")); 809 return (0); 810 } 811 if (ill->ill_ipif_up_count == 0) { 812 /* 813 * Nobody there. All multicast addresses will be re-joined 814 * when we get the DL_BIND_ACK bringing the interface up. 815 */ 816 ip1dbg(("ip_ll_addmulti_v6: nobody up\n")); 817 return (0); 818 } 819 return (ip_ll_send_enabmulti_req(ill, v6groupp)); 820 } 821 822 /* 823 * INADDR_ANY means all multicast addresses. This is only used 824 * by the multicast router. 825 * INADDR_ANY is stored as the IPv6 unspecifed addr. 826 */ 827 int 828 ip_delmulti(ipaddr_t group, ipif_t *ipif, boolean_t no_ilg, boolean_t leaving) 829 { 830 ill_t *ill = ipif->ipif_ill; 831 ilm_t *ilm; 832 in6_addr_t v6group; 833 int ret; 834 835 ASSERT(IAM_WRITER_IPIF(ipif)); 836 837 if (!CLASSD(group) && group != INADDR_ANY) 838 return (EINVAL); 839 840 /* 841 * INADDR_ANY is represented as the IPv6 unspecifed addr. 842 */ 843 if (group == INADDR_ANY) 844 v6group = ipv6_all_zeros; 845 else 846 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 847 848 /* 849 * Look for a match on the ipif. 850 * (IP_DROP_MEMBERSHIP specifies an ipif using an IP address). 851 */ 852 ilm = ilm_lookup_ipif(ipif, group); 853 if (ilm == NULL) 854 return (ENOENT); 855 856 /* Update counters */ 857 if (no_ilg) 858 ilm->ilm_no_ilg_cnt--; 859 860 if (leaving) 861 ilm->ilm_refcnt--; 862 863 if (ilm->ilm_refcnt > 0) 864 return (ilm_update_del(ilm, B_FALSE)); 865 866 if (group == INADDR_ANY) { 867 ilm_delete(ilm); 868 /* 869 * Check how many ipif's that have members in this group - 870 * if there are still some left then don't tell the driver 871 * to drop it. 872 */ 873 if (ilm_numentries_v6(ill, &v6group) != 0) 874 return (0); 875 876 /* 877 * If we never joined, then don't leave. This can happen 878 * if we're in an IPMP group, since only one ill per IPMP 879 * group receives all multicast packets. 880 */ 881 if (!ill->ill_join_allmulti) { 882 ASSERT(ill->ill_group != NULL); 883 return (0); 884 } 885 886 ret = ip_leave_allmulti(ipif); 887 if (ill->ill_group != NULL) 888 (void) ill_nominate_mcast_rcv(ill->ill_group); 889 return (ret); 890 } 891 892 if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0) 893 igmp_leavegroup(ilm); 894 895 ilm_delete(ilm); 896 /* 897 * Check how many ipif's that have members in this group - 898 * if there are still some left then don't tell the driver 899 * to drop it. 900 */ 901 if (ilm_numentries_v6(ill, &v6group) != 0) 902 return (0); 903 return (ip_ll_delmulti_v6(ipif, &v6group)); 904 } 905 906 /* 907 * The unspecified address means all multicast addresses. 908 * This is only used by the multicast router. 909 */ 910 int 911 ip_delmulti_v6(const in6_addr_t *v6group, ill_t *ill, int orig_ifindex, 912 zoneid_t zoneid, boolean_t no_ilg, boolean_t leaving) 913 { 914 ipif_t *ipif; 915 ilm_t *ilm; 916 int ret; 917 918 ASSERT(IAM_WRITER_ILL(ill)); 919 920 if (!IN6_IS_ADDR_MULTICAST(v6group) && 921 !IN6_IS_ADDR_UNSPECIFIED(v6group)) 922 return (EINVAL); 923 924 /* 925 * Look for a match on the ill. 926 * (IPV6_LEAVE_GROUP specifies an ill using an ifindex). 927 * 928 * Similar to ip_addmulti_v6, we should always look using 929 * the orig_ifindex. 930 * 931 * 1) If orig_ifindex is different from ill's ifindex 932 * we should have an ilm with orig_ifindex created in 933 * ip_addmulti_v6. We should delete that here. 934 * 935 * 2) If orig_ifindex is same as ill's ifindex, we should 936 * not delete the ilm that is temporarily here because of 937 * a FAILOVER. Those ilms will have a ilm_orig_ifindex 938 * different from ill's ifindex. 939 * 940 * Thus, always lookup using orig_ifindex. 941 */ 942 ilm = ilm_lookup_ill_index_v6(ill, v6group, orig_ifindex, zoneid); 943 if (ilm == NULL) 944 return (ENOENT); 945 946 ASSERT(ilm->ilm_ill == ill); 947 948 ipif = ill->ill_ipif; 949 950 /* Update counters */ 951 if (no_ilg) 952 ilm->ilm_no_ilg_cnt--; 953 954 if (leaving) 955 ilm->ilm_refcnt--; 956 957 if (ilm->ilm_refcnt > 0) 958 return (ilm_update_del(ilm, B_TRUE)); 959 960 if (IN6_IS_ADDR_UNSPECIFIED(v6group)) { 961 ilm_delete(ilm); 962 /* 963 * Check how many ipif's that have members in this group - 964 * if there are still some left then don't tell the driver 965 * to drop it. 966 */ 967 if (ilm_numentries_v6(ill, v6group) != 0) 968 return (0); 969 970 /* 971 * If we never joined, then don't leave. This can happen 972 * if we're in an IPMP group, since only one ill per IPMP 973 * group receives all multicast packets. 974 */ 975 if (!ill->ill_join_allmulti) { 976 ASSERT(ill->ill_group != NULL); 977 return (0); 978 } 979 980 ret = ip_leave_allmulti(ipif); 981 if (ill->ill_group != NULL) 982 (void) ill_nominate_mcast_rcv(ill->ill_group); 983 return (ret); 984 } 985 986 if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0) 987 mld_leavegroup(ilm); 988 989 ilm_delete(ilm); 990 /* 991 * Check how many ipif's that have members in this group - 992 * if there are still some left then don't tell the driver 993 * to drop it. 994 */ 995 if (ilm_numentries_v6(ill, v6group) != 0) 996 return (0); 997 return (ip_ll_delmulti_v6(ipif, v6group)); 998 } 999 1000 /* 1001 * Send a multicast request to the driver for disabling multicast reception 1002 * for v6groupp address. The caller has already checked whether it is 1003 * appropriate to send one or not. 1004 */ 1005 int 1006 ip_ll_send_disabmulti_req(ill_t *ill, const in6_addr_t *v6groupp) 1007 { 1008 mblk_t *mp; 1009 char group_buf[INET6_ADDRSTRLEN]; 1010 uint32_t addrlen, addroff; 1011 1012 ASSERT(IAM_WRITER_ILL(ill)); 1013 /* 1014 * Create a AR_ENTRY_SQUERY message with a dl_disabmulti_req tacked 1015 * on. 1016 */ 1017 mp = ill_create_dl(ill, DL_DISABMULTI_REQ, 1018 sizeof (dl_disabmulti_req_t), &addrlen, &addroff); 1019 1020 if (!mp) 1021 return (ENOMEM); 1022 1023 if (IN6_IS_ADDR_V4MAPPED(v6groupp)) { 1024 ipaddr_t v4group; 1025 1026 IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group); 1027 /* 1028 * NOTE!!! 1029 * The "addroff" passed in here was calculated by 1030 * ill_create_dl(), and will be used by ill_create_squery() 1031 * to perform some twisted coding magic. It is the offset 1032 * into the dl_xxx_req of the hw addr. Here, it will be 1033 * added to b_wptr - b_rptr to create a magic number that 1034 * is not an offset into this mblk. 1035 * 1036 * Please see the comment in ip_ll_send)enabmulti_req() 1037 * for a complete explanation. 1038 * 1039 * Look in ar_entry_squery() in arp.c to see how this offset 1040 * is used. 1041 */ 1042 mp = ill_create_squery(ill, v4group, addrlen, addroff, mp); 1043 if (!mp) 1044 return (ENOMEM); 1045 ip1dbg(("ip_ll_send_disabmulti_req: IPv4 putnext %s on %s\n", 1046 inet_ntop(AF_INET6, v6groupp, group_buf, 1047 sizeof (group_buf)), 1048 ill->ill_name)); 1049 putnext(ill->ill_rq, mp); 1050 } else { 1051 ip1dbg(("ip_ll_send_disabmulti_req: IPv6 ndp_squery_mp %s on" 1052 " %s\n", 1053 inet_ntop(AF_INET6, v6groupp, group_buf, 1054 sizeof (group_buf)), 1055 ill->ill_name)); 1056 return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp)); 1057 } 1058 return (0); 1059 } 1060 1061 /* 1062 * Send a multicast request to the driver for disabling multicast 1063 * membership for v6group if appropriate. 1064 */ 1065 static int 1066 ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *v6group) 1067 { 1068 ill_t *ill = ipif->ipif_ill; 1069 1070 ASSERT(IAM_WRITER_IPIF(ipif)); 1071 1072 if (ill->ill_net_type != IRE_IF_RESOLVER || 1073 ipif->ipif_flags & IPIF_POINTOPOINT) { 1074 return (0); /* Must be IRE_IF_NORESOLVER */ 1075 } 1076 if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) { 1077 ip1dbg(("ip_ll_delmulti_v6: MULTI_BCAST\n")); 1078 return (0); 1079 } 1080 if (ill->ill_ipif_up_count == 0) { 1081 /* 1082 * Nobody there. All multicast addresses will be re-joined 1083 * when we get the DL_BIND_ACK bringing the interface up. 1084 */ 1085 ip1dbg(("ip_ll_delmulti_v6: nobody up\n")); 1086 return (0); 1087 } 1088 return (ip_ll_send_disabmulti_req(ill, v6group)); 1089 } 1090 1091 /* 1092 * Make the driver pass up all multicast packets 1093 * 1094 * With ill groups, the caller makes sure that there is only 1095 * one ill joining the allmulti group. 1096 */ 1097 int 1098 ip_join_allmulti(ipif_t *ipif) 1099 { 1100 ill_t *ill = ipif->ipif_ill; 1101 mblk_t *mp; 1102 uint32_t addrlen, addroff; 1103 1104 ASSERT(IAM_WRITER_IPIF(ipif)); 1105 1106 if (ill->ill_ipif_up_count == 0) { 1107 /* 1108 * Nobody there. All multicast addresses will be re-joined 1109 * when we get the DL_BIND_ACK bringing the interface up. 1110 */ 1111 return (0); 1112 } 1113 1114 ASSERT(!ill->ill_join_allmulti); 1115 1116 /* 1117 * Create a DL_PROMISCON_REQ message and send it directly to 1118 * the DLPI provider. We don't need to do this for certain 1119 * media types for which we never need to turn promiscuous 1120 * mode on. 1121 */ 1122 if ((ill->ill_net_type == IRE_IF_RESOLVER) && 1123 !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) { 1124 mp = ill_create_dl(ill, DL_PROMISCON_REQ, 1125 sizeof (dl_promiscon_req_t), &addrlen, &addroff); 1126 if (mp == NULL) 1127 return (ENOMEM); 1128 putnext(ill->ill_wq, mp); 1129 } 1130 1131 mutex_enter(&ill->ill_lock); 1132 ill->ill_join_allmulti = B_TRUE; 1133 mutex_exit(&ill->ill_lock); 1134 return (0); 1135 } 1136 1137 /* 1138 * Make the driver stop passing up all multicast packets 1139 * 1140 * With ill groups, we need to nominate some other ill as 1141 * this ipif->ipif_ill is leaving the group. 1142 */ 1143 int 1144 ip_leave_allmulti(ipif_t *ipif) 1145 { 1146 ill_t *ill = ipif->ipif_ill; 1147 mblk_t *mp; 1148 uint32_t addrlen, addroff; 1149 1150 ASSERT(IAM_WRITER_IPIF(ipif)); 1151 1152 if (ill->ill_ipif_up_count == 0) { 1153 /* 1154 * Nobody there. All multicast addresses will be re-joined 1155 * when we get the DL_BIND_ACK bringing the interface up. 1156 */ 1157 return (0); 1158 } 1159 1160 ASSERT(ill->ill_join_allmulti); 1161 1162 /* 1163 * Create a DL_PROMISCOFF_REQ message and send it directly to 1164 * the DLPI provider. We don't need to do this for certain 1165 * media types for which we never need to turn promiscuous 1166 * mode on. 1167 */ 1168 if ((ill->ill_net_type == IRE_IF_RESOLVER) && 1169 !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) { 1170 mp = ill_create_dl(ill, DL_PROMISCOFF_REQ, 1171 sizeof (dl_promiscoff_req_t), &addrlen, &addroff); 1172 if (mp == NULL) 1173 return (ENOMEM); 1174 putnext(ill->ill_wq, mp); 1175 } 1176 1177 mutex_enter(&ill->ill_lock); 1178 ill->ill_join_allmulti = B_FALSE; 1179 mutex_exit(&ill->ill_lock); 1180 return (0); 1181 } 1182 1183 /* 1184 * Copy mp_orig and pass it in as a local message. 1185 */ 1186 void 1187 ip_multicast_loopback(queue_t *q, ill_t *ill, mblk_t *mp_orig, int fanout_flags, 1188 zoneid_t zoneid) 1189 { 1190 mblk_t *mp; 1191 mblk_t *ipsec_mp; 1192 1193 if (DB_TYPE(mp_orig) == M_DATA && 1194 ((ipha_t *)mp_orig->b_rptr)->ipha_protocol == IPPROTO_UDP) { 1195 uint_t hdrsz; 1196 1197 hdrsz = IPH_HDR_LENGTH((ipha_t *)mp_orig->b_rptr) + 1198 sizeof (udpha_t); 1199 ASSERT(MBLKL(mp_orig) >= hdrsz); 1200 1201 if (((mp = allocb(hdrsz, BPRI_MED)) != NULL) && 1202 (mp_orig = dupmsg(mp_orig)) != NULL) { 1203 bcopy(mp_orig->b_rptr, mp->b_rptr, hdrsz); 1204 mp->b_wptr += hdrsz; 1205 mp->b_cont = mp_orig; 1206 mp_orig->b_rptr += hdrsz; 1207 if (MBLKL(mp_orig) == 0) { 1208 mp->b_cont = mp_orig->b_cont; 1209 mp_orig->b_cont = NULL; 1210 freeb(mp_orig); 1211 } 1212 } else if (mp != NULL) { 1213 freeb(mp); 1214 mp = NULL; 1215 } 1216 } else { 1217 mp = ip_copymsg(mp_orig); 1218 } 1219 1220 if (mp == NULL) 1221 return; 1222 if (DB_TYPE(mp) == M_CTL) { 1223 ipsec_mp = mp; 1224 mp = mp->b_cont; 1225 } else { 1226 ipsec_mp = mp; 1227 } 1228 ip_wput_local(q, ill, (ipha_t *)mp->b_rptr, ipsec_mp, NULL, 1229 fanout_flags, zoneid); 1230 } 1231 1232 static area_t ip_aresq_template = { 1233 AR_ENTRY_SQUERY, /* cmd */ 1234 sizeof (area_t)+IP_ADDR_LEN, /* name offset */ 1235 sizeof (area_t), /* name len (filled by ill_arp_alloc) */ 1236 IP_ARP_PROTO_TYPE, /* protocol, from arps perspective */ 1237 sizeof (area_t), /* proto addr offset */ 1238 IP_ADDR_LEN, /* proto addr_length */ 1239 0, /* proto mask offset */ 1240 /* Rest is initialized when used */ 1241 0, /* flags */ 1242 0, /* hw addr offset */ 1243 0, /* hw addr length */ 1244 }; 1245 1246 static mblk_t * 1247 ill_create_squery(ill_t *ill, ipaddr_t ipaddr, uint32_t addrlen, 1248 uint32_t addroff, mblk_t *mp_tail) 1249 { 1250 mblk_t *mp; 1251 area_t *area; 1252 1253 mp = ill_arp_alloc(ill, (uchar_t *)&ip_aresq_template, 1254 (caddr_t)&ipaddr); 1255 if (!mp) { 1256 freemsg(mp_tail); 1257 return (NULL); 1258 } 1259 area = (area_t *)mp->b_rptr; 1260 area->area_hw_addr_length = addrlen; 1261 area->area_hw_addr_offset = mp->b_wptr - mp->b_rptr + addroff; 1262 /* 1263 * NOTE! 1264 * 1265 * The area_hw_addr_offset, as can be seen, does not hold the 1266 * actual hardware address offset. Rather, it holds the offset 1267 * to the hw addr in the dl_xxx_req in mp_tail, modified by 1268 * adding (mp->b_wptr - mp->b_rptr). This allows the function 1269 * mi_offset_paramc() to find the hardware address in the 1270 * *second* mblk (dl_xxx_req), not this mblk. 1271 * 1272 * Using mi_offset_paramc() is thus the *only* way to access 1273 * the dl_xxx_hw address. 1274 * 1275 * The squery hw address should *not* be accessed. 1276 * 1277 * See ar_entry_squery() in arp.c for an example of how all this works. 1278 */ 1279 1280 mp->b_cont = mp_tail; 1281 return (mp); 1282 } 1283 1284 /* 1285 * Create a dlpi message with room for phys+sap. When we come back in 1286 * ip_wput_ctl() we will strip the sap for those primitives which 1287 * only need a physical address. 1288 */ 1289 static mblk_t * 1290 ill_create_dl(ill_t *ill, uint32_t dl_primitive, uint32_t length, 1291 uint32_t *addr_lenp, uint32_t *addr_offp) 1292 { 1293 mblk_t *mp; 1294 uint32_t hw_addr_length; 1295 char *cp; 1296 uint32_t offset; 1297 uint32_t size; 1298 1299 *addr_lenp = *addr_offp = 0; 1300 1301 hw_addr_length = ill->ill_phys_addr_length; 1302 if (!hw_addr_length) { 1303 ip0dbg(("ip_create_dl: hw addr length = 0\n")); 1304 return (NULL); 1305 } 1306 1307 size = length; 1308 switch (dl_primitive) { 1309 case DL_ENABMULTI_REQ: 1310 case DL_DISABMULTI_REQ: 1311 size += hw_addr_length; 1312 break; 1313 case DL_PROMISCON_REQ: 1314 case DL_PROMISCOFF_REQ: 1315 break; 1316 default: 1317 return (NULL); 1318 } 1319 mp = allocb(size, BPRI_HI); 1320 if (!mp) 1321 return (NULL); 1322 mp->b_wptr += size; 1323 mp->b_datap->db_type = M_PROTO; 1324 1325 cp = (char *)mp->b_rptr; 1326 offset = length; 1327 1328 switch (dl_primitive) { 1329 case DL_ENABMULTI_REQ: { 1330 dl_enabmulti_req_t *dl = (dl_enabmulti_req_t *)cp; 1331 1332 dl->dl_primitive = dl_primitive; 1333 dl->dl_addr_offset = offset; 1334 *addr_lenp = dl->dl_addr_length = hw_addr_length; 1335 *addr_offp = offset; 1336 break; 1337 } 1338 case DL_DISABMULTI_REQ: { 1339 dl_disabmulti_req_t *dl = (dl_disabmulti_req_t *)cp; 1340 1341 dl->dl_primitive = dl_primitive; 1342 dl->dl_addr_offset = offset; 1343 *addr_lenp = dl->dl_addr_length = hw_addr_length; 1344 *addr_offp = offset; 1345 break; 1346 } 1347 case DL_PROMISCON_REQ: 1348 case DL_PROMISCOFF_REQ: { 1349 dl_promiscon_req_t *dl = (dl_promiscon_req_t *)cp; 1350 1351 dl->dl_primitive = dl_primitive; 1352 dl->dl_level = DL_PROMISC_MULTI; 1353 break; 1354 } 1355 } 1356 ip1dbg(("ill_create_dl: addr_len %d, addr_off %d\n", 1357 *addr_lenp, *addr_offp)); 1358 return (mp); 1359 } 1360 1361 void 1362 ip_wput_ctl(queue_t *q, mblk_t *mp_orig) 1363 { 1364 ill_t *ill = (ill_t *)q->q_ptr; 1365 mblk_t *mp = mp_orig; 1366 area_t *area; 1367 1368 /* Check that we have a AR_ENTRY_SQUERY with a tacked on mblk */ 1369 if ((mp->b_wptr - mp->b_rptr) < sizeof (area_t) || 1370 mp->b_cont == NULL) { 1371 putnext(q, mp); 1372 return; 1373 } 1374 area = (area_t *)mp->b_rptr; 1375 if (area->area_cmd != AR_ENTRY_SQUERY) { 1376 putnext(q, mp); 1377 return; 1378 } 1379 mp = mp->b_cont; 1380 /* 1381 * Update dl_addr_length and dl_addr_offset for primitives that 1382 * have physical addresses as opposed to full saps 1383 */ 1384 switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) { 1385 case DL_ENABMULTI_REQ: 1386 /* Track the state if this is the first enabmulti */ 1387 if (ill->ill_dlpi_multicast_state == IDMS_UNKNOWN) 1388 ill->ill_dlpi_multicast_state = IDMS_INPROGRESS; 1389 ip1dbg(("ip_wput_ctl: ENABMULTI\n")); 1390 break; 1391 case DL_DISABMULTI_REQ: 1392 ip1dbg(("ip_wput_ctl: DISABMULTI\n")); 1393 break; 1394 default: 1395 ip1dbg(("ip_wput_ctl: default\n")); 1396 break; 1397 } 1398 freeb(mp_orig); 1399 putnext(q, mp); 1400 } 1401 1402 /* 1403 * Rejoin any groups which have been explicitly joined by the application (we 1404 * left all explicitly joined groups as part of ill_leave_multicast() prior to 1405 * bringing the interface down). Note that because groups can be joined and 1406 * left while an interface is down, this may not be the same set of groups 1407 * that we left in ill_leave_multicast(). 1408 */ 1409 void 1410 ill_recover_multicast(ill_t *ill) 1411 { 1412 ilm_t *ilm; 1413 char addrbuf[INET6_ADDRSTRLEN]; 1414 1415 ASSERT(IAM_WRITER_ILL(ill)); 1416 1417 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1418 /* 1419 * Check how many ipif's that have members in this group - 1420 * if more then one we make sure that this entry is first 1421 * in the list. 1422 */ 1423 if (ilm_numentries_v6(ill, &ilm->ilm_v6addr) > 1 && 1424 ilm_lookup_ill_v6(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm) 1425 continue; 1426 ip1dbg(("ill_recover_multicast: %s\n", 1427 inet_ntop(AF_INET6, &ilm->ilm_v6addr, addrbuf, 1428 sizeof (addrbuf)))); 1429 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) { 1430 if (ill->ill_group == NULL) { 1431 (void) ip_join_allmulti(ill->ill_ipif); 1432 } else { 1433 /* 1434 * We don't want to join on this ill, 1435 * if somebody else in the group has 1436 * already been nominated. 1437 */ 1438 (void) ill_nominate_mcast_rcv(ill->ill_group); 1439 } 1440 } else { 1441 (void) ip_ll_addmulti_v6(ill->ill_ipif, 1442 &ilm->ilm_v6addr); 1443 } 1444 } 1445 } 1446 1447 /* 1448 * The opposite of ill_recover_multicast() -- leaves all multicast groups 1449 * that were explicitly joined. Note that both these functions could be 1450 * disposed of if we enhanced ARP to allow us to handle DL_DISABMULTI_REQ 1451 * and DL_ENABMULTI_REQ messages when an interface is down. 1452 */ 1453 void 1454 ill_leave_multicast(ill_t *ill) 1455 { 1456 ilm_t *ilm; 1457 char addrbuf[INET6_ADDRSTRLEN]; 1458 1459 ASSERT(IAM_WRITER_ILL(ill)); 1460 1461 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1462 /* 1463 * Check how many ipif's that have members in this group - 1464 * if more then one we make sure that this entry is first 1465 * in the list. 1466 */ 1467 if (ilm_numentries_v6(ill, &ilm->ilm_v6addr) > 1 && 1468 ilm_lookup_ill_v6(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm) 1469 continue; 1470 ip1dbg(("ill_leave_multicast: %s\n", 1471 inet_ntop(AF_INET6, &ilm->ilm_v6addr, addrbuf, 1472 sizeof (addrbuf)))); 1473 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) { 1474 (void) ip_leave_allmulti(ill->ill_ipif); 1475 /* 1476 * If we were part of an IPMP group, then 1477 * ill_handoff_responsibility() has already 1478 * nominated a new member (so we don't). 1479 */ 1480 ASSERT(ill->ill_group == NULL); 1481 } else { 1482 (void) ip_ll_send_disabmulti_req(ill, &ilm->ilm_v6addr); 1483 } 1484 } 1485 } 1486 1487 /* 1488 * Find an ilm for matching the ill and which has the source in its 1489 * INCLUDE list or does not have it in its EXCLUDE list 1490 */ 1491 ilm_t * 1492 ilm_lookup_ill_withsrc(ill_t *ill, ipaddr_t group, ipaddr_t src) 1493 { 1494 in6_addr_t v6group, v6src; 1495 1496 /* 1497 * INADDR_ANY is represented as the IPv6 unspecified addr. 1498 */ 1499 if (group == INADDR_ANY) 1500 v6group = ipv6_all_zeros; 1501 else 1502 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 1503 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 1504 1505 return (ilm_lookup_ill_withsrc_v6(ill, &v6group, &v6src)); 1506 } 1507 1508 ilm_t * 1509 ilm_lookup_ill_withsrc_v6(ill_t *ill, const in6_addr_t *v6group, 1510 const in6_addr_t *v6src) 1511 { 1512 ilm_t *ilm; 1513 boolean_t isinlist; 1514 int i, numsrc; 1515 1516 /* 1517 * If the source is in any ilm's INCLUDE list, or if 1518 * it is not in any ilm's EXCLUDE list, we have a hit. 1519 */ 1520 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1521 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group)) { 1522 1523 isinlist = B_FALSE; 1524 numsrc = (ilm->ilm_filter == NULL) ? 1525 0 : ilm->ilm_filter->sl_numsrc; 1526 for (i = 0; i < numsrc; i++) { 1527 if (IN6_ARE_ADDR_EQUAL(v6src, 1528 &ilm->ilm_filter->sl_addr[i])) { 1529 isinlist = B_TRUE; 1530 break; 1531 } 1532 } 1533 if ((isinlist && ilm->ilm_fmode == MODE_IS_INCLUDE) || 1534 (!isinlist && ilm->ilm_fmode == MODE_IS_EXCLUDE)) 1535 return (ilm); 1536 else 1537 return (NULL); 1538 } 1539 } 1540 return (NULL); 1541 } 1542 1543 1544 /* Find an ilm for matching the ill */ 1545 ilm_t * 1546 ilm_lookup_ill(ill_t *ill, ipaddr_t group, zoneid_t zoneid) 1547 { 1548 in6_addr_t v6group; 1549 1550 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) || 1551 IAM_WRITER_ILL(ill)); 1552 /* 1553 * INADDR_ANY is represented as the IPv6 unspecifed addr. 1554 */ 1555 if (group == INADDR_ANY) 1556 v6group = ipv6_all_zeros; 1557 else 1558 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 1559 1560 return (ilm_lookup_ill_v6(ill, &v6group, zoneid)); 1561 } 1562 1563 /* 1564 * Find an ilm for matching the ill. All the ilm lookup functions 1565 * ignore ILM_DELETED ilms. These have been logically deleted, and 1566 * igmp and linklayer disable multicast have been done. Only mi_free 1567 * yet to be done. Still there in the list due to ilm_walkers. The 1568 * last walker will release it. 1569 */ 1570 ilm_t * 1571 ilm_lookup_ill_v6(ill_t *ill, const in6_addr_t *v6group, zoneid_t zoneid) 1572 { 1573 ilm_t *ilm; 1574 1575 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) || 1576 IAM_WRITER_ILL(ill)); 1577 1578 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1579 if (ilm->ilm_flags & ILM_DELETED) 1580 continue; 1581 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) && 1582 (zoneid == ALL_ZONES || zoneid == ilm->ilm_zoneid)) 1583 return (ilm); 1584 } 1585 return (NULL); 1586 } 1587 1588 ilm_t * 1589 ilm_lookup_ill_index_v6(ill_t *ill, const in6_addr_t *v6group, int index, 1590 zoneid_t zoneid) 1591 { 1592 ilm_t *ilm; 1593 1594 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) || 1595 IAM_WRITER_ILL(ill)); 1596 1597 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1598 if (ilm->ilm_flags & ILM_DELETED) 1599 continue; 1600 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) && 1601 (zoneid == ALL_ZONES || zoneid == ilm->ilm_zoneid) && 1602 ilm->ilm_orig_ifindex == index) { 1603 return (ilm); 1604 } 1605 } 1606 return (NULL); 1607 } 1608 1609 ilm_t * 1610 ilm_lookup_ill_index_v4(ill_t *ill, ipaddr_t group, int index, zoneid_t zoneid) 1611 { 1612 in6_addr_t v6group; 1613 1614 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) || 1615 IAM_WRITER_ILL(ill)); 1616 /* 1617 * INADDR_ANY is represented as the IPv6 unspecifed addr. 1618 */ 1619 if (group == INADDR_ANY) 1620 v6group = ipv6_all_zeros; 1621 else 1622 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 1623 1624 return (ilm_lookup_ill_index_v6(ill, &v6group, index, zoneid)); 1625 } 1626 1627 /* 1628 * Found an ilm for the ipif. Only needed for IPv4 which does 1629 * ipif specific socket options. 1630 */ 1631 ilm_t * 1632 ilm_lookup_ipif(ipif_t *ipif, ipaddr_t group) 1633 { 1634 ill_t *ill = ipif->ipif_ill; 1635 ilm_t *ilm; 1636 in6_addr_t v6group; 1637 1638 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) || 1639 IAM_WRITER_ILL(ill)); 1640 1641 /* 1642 * INADDR_ANY is represented as the IPv6 unspecifed addr. 1643 */ 1644 if (group == INADDR_ANY) 1645 v6group = ipv6_all_zeros; 1646 else 1647 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 1648 1649 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1650 if (ilm->ilm_flags & ILM_DELETED) 1651 continue; 1652 if (ilm->ilm_ipif == ipif && 1653 IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, &v6group)) 1654 return (ilm); 1655 } 1656 return (NULL); 1657 } 1658 1659 /* 1660 * How many members on this ill? 1661 */ 1662 int 1663 ilm_numentries_v6(ill_t *ill, const in6_addr_t *v6group) 1664 { 1665 ilm_t *ilm; 1666 int i = 0; 1667 1668 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) || 1669 IAM_WRITER_ILL(ill)); 1670 1671 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1672 if (ilm->ilm_flags & ILM_DELETED) 1673 continue; 1674 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group)) { 1675 i++; 1676 } 1677 } 1678 return (i); 1679 } 1680 1681 /* Caller guarantees that the group is not already on the list */ 1682 static ilm_t * 1683 ilm_add_v6(ipif_t *ipif, const in6_addr_t *v6group, ilg_stat_t ilgstat, 1684 mcast_record_t ilg_fmode, slist_t *ilg_flist, int orig_ifindex, 1685 zoneid_t zoneid) 1686 { 1687 ill_t *ill = ipif->ipif_ill; 1688 ilm_t *ilm; 1689 ilm_t *ilm_cur; 1690 ilm_t **ilm_ptpn; 1691 1692 ASSERT(IAM_WRITER_IPIF(ipif)); 1693 1694 ilm = GETSTRUCT(ilm_t, 1); 1695 if (ilm == NULL) 1696 return (NULL); 1697 if (ilgstat != ILGSTAT_NONE && !SLIST_IS_EMPTY(ilg_flist)) { 1698 ilm->ilm_filter = l_alloc(); 1699 if (ilm->ilm_filter == NULL) { 1700 mi_free(ilm); 1701 return (NULL); 1702 } 1703 } 1704 ilm->ilm_v6addr = *v6group; 1705 ilm->ilm_refcnt = 1; 1706 ilm->ilm_zoneid = zoneid; 1707 ilm->ilm_timer = INFINITY; 1708 ilm->ilm_rtx.rtx_timer = INFINITY; 1709 /* 1710 * IPv4 Multicast groups are joined using ipif. 1711 * IPv6 Multicast groups are joined using ill. 1712 */ 1713 if (ill->ill_isv6) { 1714 ilm->ilm_ill = ill; 1715 ilm->ilm_ipif = NULL; 1716 } else { 1717 ASSERT(ilm->ilm_zoneid == ipif->ipif_zoneid); 1718 ilm->ilm_ipif = ipif; 1719 ilm->ilm_ill = NULL; 1720 } 1721 /* 1722 * After this if ilm moves to a new ill, we don't change 1723 * the ilm_orig_ifindex. Thus, if ill_index != ilm_orig_ifindex, 1724 * it has been moved. Indexes don't match even when the application 1725 * wants to join on a FAILED/INACTIVE interface because we choose 1726 * a new interface to join in. This is considered as an implicit 1727 * move. 1728 */ 1729 ilm->ilm_orig_ifindex = orig_ifindex; 1730 1731 ASSERT(!(ipif->ipif_state_flags & IPIF_CONDEMNED)); 1732 ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED)); 1733 1734 /* 1735 * Grab lock to give consistent view to readers 1736 */ 1737 mutex_enter(&ill->ill_lock); 1738 /* 1739 * All ilms in the same zone are contiguous in the ill_ilm list. 1740 * The loops in ip_proto_input() and ip_wput_local() use this to avoid 1741 * sending duplicates up when two applications in the same zone join the 1742 * same group on different logical interfaces. 1743 */ 1744 ilm_cur = ill->ill_ilm; 1745 ilm_ptpn = &ill->ill_ilm; 1746 while (ilm_cur != NULL && ilm_cur->ilm_zoneid != ilm->ilm_zoneid) { 1747 ilm_ptpn = &ilm_cur->ilm_next; 1748 ilm_cur = ilm_cur->ilm_next; 1749 } 1750 ilm->ilm_next = ilm_cur; 1751 *ilm_ptpn = ilm; 1752 1753 /* 1754 * If we have an associated ilg, use its filter state; if not, 1755 * default to (EXCLUDE, NULL) and set no_ilg_cnt to track this. 1756 */ 1757 if (ilgstat != ILGSTAT_NONE) { 1758 if (!SLIST_IS_EMPTY(ilg_flist)) 1759 l_copy(ilg_flist, ilm->ilm_filter); 1760 ilm->ilm_fmode = ilg_fmode; 1761 } else { 1762 ilm->ilm_no_ilg_cnt = 1; 1763 ilm->ilm_fmode = MODE_IS_EXCLUDE; 1764 } 1765 1766 mutex_exit(&ill->ill_lock); 1767 return (ilm); 1768 } 1769 1770 void 1771 ilm_walker_cleanup(ill_t *ill) 1772 { 1773 ilm_t **ilmp; 1774 ilm_t *ilm; 1775 1776 ASSERT(MUTEX_HELD(&ill->ill_lock)); 1777 ASSERT(ill->ill_ilm_walker_cnt == 0); 1778 1779 ilmp = &ill->ill_ilm; 1780 while (*ilmp != NULL) { 1781 if ((*ilmp)->ilm_flags & ILM_DELETED) { 1782 ilm = *ilmp; 1783 *ilmp = ilm->ilm_next; 1784 FREE_SLIST(ilm->ilm_filter); 1785 FREE_SLIST(ilm->ilm_pendsrcs); 1786 FREE_SLIST(ilm->ilm_rtx.rtx_allow); 1787 FREE_SLIST(ilm->ilm_rtx.rtx_block); 1788 mi_free((char *)ilm); 1789 } else { 1790 ilmp = &(*ilmp)->ilm_next; 1791 } 1792 } 1793 ill->ill_ilm_cleanup_reqd = 0; 1794 } 1795 1796 /* 1797 * Unlink ilm and free it. 1798 */ 1799 static void 1800 ilm_delete(ilm_t *ilm) 1801 { 1802 ill_t *ill; 1803 ilm_t **ilmp; 1804 1805 if (ilm->ilm_ipif != NULL) { 1806 ASSERT(IAM_WRITER_IPIF(ilm->ilm_ipif)); 1807 ASSERT(ilm->ilm_ill == NULL); 1808 ill = ilm->ilm_ipif->ipif_ill; 1809 ASSERT(!ill->ill_isv6); 1810 } else { 1811 ASSERT(IAM_WRITER_ILL(ilm->ilm_ill)); 1812 ASSERT(ilm->ilm_ipif == NULL); 1813 ill = ilm->ilm_ill; 1814 ASSERT(ill->ill_isv6); 1815 } 1816 /* 1817 * Delete under lock protection so that readers don't stumble 1818 * on bad ilm_next 1819 */ 1820 mutex_enter(&ill->ill_lock); 1821 if (ill->ill_ilm_walker_cnt != 0) { 1822 ilm->ilm_flags |= ILM_DELETED; 1823 ill->ill_ilm_cleanup_reqd = 1; 1824 mutex_exit(&ill->ill_lock); 1825 return; 1826 } 1827 1828 for (ilmp = &ill->ill_ilm; *ilmp != ilm; ilmp = &(*ilmp)->ilm_next) 1829 ; 1830 *ilmp = ilm->ilm_next; 1831 mutex_exit(&ill->ill_lock); 1832 1833 FREE_SLIST(ilm->ilm_filter); 1834 FREE_SLIST(ilm->ilm_pendsrcs); 1835 FREE_SLIST(ilm->ilm_rtx.rtx_allow); 1836 FREE_SLIST(ilm->ilm_rtx.rtx_block); 1837 mi_free((char *)ilm); 1838 } 1839 1840 /* Free all ilms for this ipif */ 1841 void 1842 ilm_free(ipif_t *ipif) 1843 { 1844 ill_t *ill = ipif->ipif_ill; 1845 ilm_t *ilm; 1846 ilm_t *next_ilm; 1847 1848 ASSERT(IAM_WRITER_IPIF(ipif)); 1849 1850 for (ilm = ill->ill_ilm; ilm; ilm = next_ilm) { 1851 next_ilm = ilm->ilm_next; 1852 if (ilm->ilm_ipif == ipif) 1853 ilm_delete(ilm); 1854 } 1855 } 1856 1857 /* 1858 * Looks up the appropriate ipif given a v4 multicast group and interface 1859 * address. On success, returns 0, with *ipifpp pointing to the found 1860 * struct. On failure, returns an errno and *ipifpp is NULL. 1861 */ 1862 int 1863 ip_opt_check(conn_t *connp, ipaddr_t group, ipaddr_t src, ipaddr_t ifaddr, 1864 uint_t *ifindexp, mblk_t *first_mp, ipsq_func_t func, ipif_t **ipifpp) 1865 { 1866 ipif_t *ipif; 1867 int err = 0; 1868 zoneid_t zoneid = connp->conn_zoneid; 1869 1870 if (!CLASSD(group) || CLASSD(src)) { 1871 return (EINVAL); 1872 } 1873 *ipifpp = NULL; 1874 1875 ASSERT(!(ifaddr != INADDR_ANY && ifindexp != NULL && *ifindexp != 0)); 1876 if (ifaddr != INADDR_ANY) { 1877 ipif = ipif_lookup_addr(ifaddr, NULL, zoneid, 1878 CONNP_TO_WQ(connp), first_mp, func, &err); 1879 if (err != 0 && err != EINPROGRESS) 1880 err = EADDRNOTAVAIL; 1881 } else if (ifindexp != NULL && *ifindexp != 0) { 1882 ipif = ipif_lookup_on_ifindex(*ifindexp, B_FALSE, zoneid, 1883 CONNP_TO_WQ(connp), first_mp, func, &err); 1884 } else { 1885 ipif = ipif_lookup_group(group, zoneid); 1886 if (ipif == NULL) 1887 return (EADDRNOTAVAIL); 1888 } 1889 if (ipif == NULL) 1890 return (err); 1891 1892 *ipifpp = ipif; 1893 return (0); 1894 } 1895 1896 /* 1897 * Looks up the appropriate ill (or ipif if v4mapped) given an interface 1898 * index and IPv6 multicast group. On success, returns 0, with *illpp (or 1899 * *ipifpp if v4mapped) pointing to the found struct. On failure, returns 1900 * an errno and *illpp and *ipifpp are undefined. 1901 */ 1902 int 1903 ip_opt_check_v6(conn_t *connp, const in6_addr_t *v6group, ipaddr_t *v4group, 1904 const in6_addr_t *v6src, ipaddr_t *v4src, boolean_t *isv6, int ifindex, 1905 mblk_t *first_mp, ipsq_func_t func, ill_t **illpp, ipif_t **ipifpp) 1906 { 1907 boolean_t src_unspec; 1908 ill_t *ill = NULL; 1909 ipif_t *ipif = NULL; 1910 int err; 1911 zoneid_t zoneid = connp->conn_zoneid; 1912 queue_t *wq = CONNP_TO_WQ(connp); 1913 1914 src_unspec = IN6_IS_ADDR_UNSPECIFIED(v6src); 1915 1916 if (IN6_IS_ADDR_V4MAPPED(v6group)) { 1917 if (!IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec) 1918 return (EINVAL); 1919 IN6_V4MAPPED_TO_IPADDR(v6group, *v4group); 1920 if (src_unspec) { 1921 *v4src = INADDR_ANY; 1922 } else { 1923 IN6_V4MAPPED_TO_IPADDR(v6src, *v4src); 1924 } 1925 if (!CLASSD(*v4group) || CLASSD(*v4src)) 1926 return (EINVAL); 1927 *ipifpp = NULL; 1928 *isv6 = B_FALSE; 1929 } else { 1930 if (IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec) 1931 return (EINVAL); 1932 if (!IN6_IS_ADDR_MULTICAST(v6group) || 1933 IN6_IS_ADDR_MULTICAST(v6src)) { 1934 return (EINVAL); 1935 } 1936 *illpp = NULL; 1937 *isv6 = B_TRUE; 1938 } 1939 1940 if (ifindex == 0) { 1941 if (*isv6) 1942 ill = ill_lookup_group_v6(v6group, zoneid); 1943 else 1944 ipif = ipif_lookup_group(*v4group, zoneid); 1945 if (ill == NULL && ipif == NULL) 1946 return (EADDRNOTAVAIL); 1947 } else { 1948 if (*isv6) { 1949 ill = ill_lookup_on_ifindex(ifindex, B_TRUE, 1950 wq, first_mp, func, &err); 1951 if (ill != NULL && 1952 !ipif_lookup_zoneid(ill, zoneid, 0, NULL)) { 1953 ill_refrele(ill); 1954 ill = NULL; 1955 err = EADDRNOTAVAIL; 1956 } 1957 } else { 1958 ipif = ipif_lookup_on_ifindex(ifindex, B_FALSE, 1959 zoneid, wq, first_mp, func, &err); 1960 } 1961 if (ill == NULL && ipif == NULL) 1962 return (err); 1963 } 1964 1965 *ipifpp = ipif; 1966 *illpp = ill; 1967 return (0); 1968 } 1969 1970 static int 1971 ip_get_srcfilter(conn_t *connp, struct group_filter *gf, 1972 struct ip_msfilter *imsf, ipaddr_t grp, ipif_t *ipif, boolean_t isv4mapped) 1973 { 1974 ilg_t *ilg; 1975 int i, numsrc, fmode, outsrcs; 1976 struct sockaddr_in *sin; 1977 struct sockaddr_in6 *sin6; 1978 struct in_addr *addrp; 1979 slist_t *fp; 1980 boolean_t is_v4only_api; 1981 1982 mutex_enter(&connp->conn_lock); 1983 1984 ilg = ilg_lookup_ipif(connp, grp, ipif); 1985 if (ilg == NULL) { 1986 mutex_exit(&connp->conn_lock); 1987 return (EADDRNOTAVAIL); 1988 } 1989 1990 if (gf == NULL) { 1991 ASSERT(imsf != NULL); 1992 ASSERT(!isv4mapped); 1993 is_v4only_api = B_TRUE; 1994 outsrcs = imsf->imsf_numsrc; 1995 } else { 1996 ASSERT(imsf == NULL); 1997 is_v4only_api = B_FALSE; 1998 outsrcs = gf->gf_numsrc; 1999 } 2000 2001 /* 2002 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 2003 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 2004 * So we need to translate here. 2005 */ 2006 fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ? 2007 MCAST_INCLUDE : MCAST_EXCLUDE; 2008 if ((fp = ilg->ilg_filter) == NULL) { 2009 numsrc = 0; 2010 } else { 2011 for (i = 0; i < outsrcs; i++) { 2012 if (i == fp->sl_numsrc) 2013 break; 2014 if (isv4mapped) { 2015 sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i]; 2016 sin6->sin6_family = AF_INET6; 2017 sin6->sin6_addr = fp->sl_addr[i]; 2018 } else { 2019 if (is_v4only_api) { 2020 addrp = &imsf->imsf_slist[i]; 2021 } else { 2022 sin = (struct sockaddr_in *) 2023 &gf->gf_slist[i]; 2024 sin->sin_family = AF_INET; 2025 addrp = &sin->sin_addr; 2026 } 2027 IN6_V4MAPPED_TO_INADDR(&fp->sl_addr[i], addrp); 2028 } 2029 } 2030 numsrc = fp->sl_numsrc; 2031 } 2032 2033 if (is_v4only_api) { 2034 imsf->imsf_numsrc = numsrc; 2035 imsf->imsf_fmode = fmode; 2036 } else { 2037 gf->gf_numsrc = numsrc; 2038 gf->gf_fmode = fmode; 2039 } 2040 2041 mutex_exit(&connp->conn_lock); 2042 2043 return (0); 2044 } 2045 2046 static int 2047 ip_get_srcfilter_v6(conn_t *connp, struct group_filter *gf, 2048 const struct in6_addr *grp, ill_t *ill) 2049 { 2050 ilg_t *ilg; 2051 int i; 2052 struct sockaddr_storage *sl; 2053 struct sockaddr_in6 *sin6; 2054 slist_t *fp; 2055 2056 mutex_enter(&connp->conn_lock); 2057 2058 ilg = ilg_lookup_ill_v6(connp, grp, ill); 2059 if (ilg == NULL) { 2060 mutex_exit(&connp->conn_lock); 2061 return (EADDRNOTAVAIL); 2062 } 2063 2064 /* 2065 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 2066 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 2067 * So we need to translate here. 2068 */ 2069 gf->gf_fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ? 2070 MCAST_INCLUDE : MCAST_EXCLUDE; 2071 if ((fp = ilg->ilg_filter) == NULL) { 2072 gf->gf_numsrc = 0; 2073 } else { 2074 for (i = 0, sl = gf->gf_slist; i < gf->gf_numsrc; i++, sl++) { 2075 if (i == fp->sl_numsrc) 2076 break; 2077 sin6 = (struct sockaddr_in6 *)sl; 2078 sin6->sin6_family = AF_INET6; 2079 sin6->sin6_addr = fp->sl_addr[i]; 2080 } 2081 gf->gf_numsrc = fp->sl_numsrc; 2082 } 2083 2084 mutex_exit(&connp->conn_lock); 2085 2086 return (0); 2087 } 2088 2089 static int 2090 ip_set_srcfilter(conn_t *connp, struct group_filter *gf, 2091 struct ip_msfilter *imsf, ipaddr_t grp, ipif_t *ipif, boolean_t isv4mapped) 2092 { 2093 ilg_t *ilg; 2094 int i, err, insrcs, infmode, new_fmode; 2095 struct sockaddr_in *sin; 2096 struct sockaddr_in6 *sin6; 2097 struct in_addr *addrp; 2098 slist_t *orig_filter = NULL; 2099 slist_t *new_filter = NULL; 2100 mcast_record_t orig_fmode; 2101 boolean_t leave_grp, is_v4only_api; 2102 ilg_stat_t ilgstat; 2103 2104 if (gf == NULL) { 2105 ASSERT(imsf != NULL); 2106 ASSERT(!isv4mapped); 2107 is_v4only_api = B_TRUE; 2108 insrcs = imsf->imsf_numsrc; 2109 infmode = imsf->imsf_fmode; 2110 } else { 2111 ASSERT(imsf == NULL); 2112 is_v4only_api = B_FALSE; 2113 insrcs = gf->gf_numsrc; 2114 infmode = gf->gf_fmode; 2115 } 2116 2117 /* Make sure we can handle the source list */ 2118 if (insrcs > MAX_FILTER_SIZE) 2119 return (ENOBUFS); 2120 2121 /* 2122 * setting the filter to (INCLUDE, NULL) is treated 2123 * as a request to leave the group. 2124 */ 2125 leave_grp = (infmode == MCAST_INCLUDE && insrcs == 0); 2126 2127 ASSERT(IAM_WRITER_IPIF(ipif)); 2128 2129 mutex_enter(&connp->conn_lock); 2130 2131 ilg = ilg_lookup_ipif(connp, grp, ipif); 2132 if (ilg == NULL) { 2133 /* 2134 * if the request was actually to leave, and we 2135 * didn't find an ilg, there's nothing to do. 2136 */ 2137 if (!leave_grp) 2138 ilg = conn_ilg_alloc(connp); 2139 if (leave_grp || ilg == NULL) { 2140 mutex_exit(&connp->conn_lock); 2141 return (leave_grp ? 0 : ENOMEM); 2142 } 2143 ilgstat = ILGSTAT_NEW; 2144 IN6_IPADDR_TO_V4MAPPED(grp, &ilg->ilg_v6group); 2145 ilg->ilg_ipif = ipif; 2146 ilg->ilg_ill = NULL; 2147 ilg->ilg_orig_ifindex = 0; 2148 } else if (leave_grp) { 2149 ilg_delete(connp, ilg, NULL); 2150 mutex_exit(&connp->conn_lock); 2151 (void) ip_delmulti(grp, ipif, B_FALSE, B_TRUE); 2152 return (0); 2153 } else { 2154 ilgstat = ILGSTAT_CHANGE; 2155 /* Preserve existing state in case ip_addmulti() fails */ 2156 orig_fmode = ilg->ilg_fmode; 2157 if (ilg->ilg_filter == NULL) { 2158 orig_filter = NULL; 2159 } else { 2160 orig_filter = l_alloc_copy(ilg->ilg_filter); 2161 if (orig_filter == NULL) { 2162 mutex_exit(&connp->conn_lock); 2163 return (ENOMEM); 2164 } 2165 } 2166 } 2167 2168 /* 2169 * Alloc buffer to copy new state into (see below) before 2170 * we make any changes, so we can bail if it fails. 2171 */ 2172 if ((new_filter = l_alloc()) == NULL) { 2173 mutex_exit(&connp->conn_lock); 2174 err = ENOMEM; 2175 goto free_and_exit; 2176 } 2177 2178 if (insrcs == 0) { 2179 CLEAR_SLIST(ilg->ilg_filter); 2180 } else { 2181 slist_t *fp; 2182 if (ilg->ilg_filter == NULL) { 2183 fp = l_alloc(); 2184 if (fp == NULL) { 2185 if (ilgstat == ILGSTAT_NEW) 2186 ilg_delete(connp, ilg, NULL); 2187 mutex_exit(&connp->conn_lock); 2188 err = ENOMEM; 2189 goto free_and_exit; 2190 } 2191 } else { 2192 fp = ilg->ilg_filter; 2193 } 2194 for (i = 0; i < insrcs; i++) { 2195 if (isv4mapped) { 2196 sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i]; 2197 fp->sl_addr[i] = sin6->sin6_addr; 2198 } else { 2199 if (is_v4only_api) { 2200 addrp = &imsf->imsf_slist[i]; 2201 } else { 2202 sin = (struct sockaddr_in *) 2203 &gf->gf_slist[i]; 2204 addrp = &sin->sin_addr; 2205 } 2206 IN6_INADDR_TO_V4MAPPED(addrp, &fp->sl_addr[i]); 2207 } 2208 } 2209 fp->sl_numsrc = insrcs; 2210 ilg->ilg_filter = fp; 2211 } 2212 /* 2213 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 2214 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 2215 * So we need to translate here. 2216 */ 2217 ilg->ilg_fmode = (infmode == MCAST_INCLUDE) ? 2218 MODE_IS_INCLUDE : MODE_IS_EXCLUDE; 2219 2220 /* 2221 * Save copy of ilg's filter state to pass to other functions, 2222 * so we can release conn_lock now. 2223 */ 2224 new_fmode = ilg->ilg_fmode; 2225 l_copy(ilg->ilg_filter, new_filter); 2226 2227 mutex_exit(&connp->conn_lock); 2228 2229 err = ip_addmulti(grp, ipif, ilgstat, new_fmode, new_filter); 2230 if (err != 0) { 2231 /* 2232 * Restore the original filter state, or delete the 2233 * newly-created ilg. We need to look up the ilg 2234 * again, though, since we've not been holding the 2235 * conn_lock. 2236 */ 2237 mutex_enter(&connp->conn_lock); 2238 ilg = ilg_lookup_ipif(connp, grp, ipif); 2239 ASSERT(ilg != NULL); 2240 if (ilgstat == ILGSTAT_NEW) { 2241 ilg_delete(connp, ilg, NULL); 2242 } else { 2243 ilg->ilg_fmode = orig_fmode; 2244 if (SLIST_IS_EMPTY(orig_filter)) { 2245 CLEAR_SLIST(ilg->ilg_filter); 2246 } else { 2247 /* 2248 * We didn't free the filter, even if we 2249 * were trying to make the source list empty; 2250 * so if orig_filter isn't empty, the ilg 2251 * must still have a filter alloc'd. 2252 */ 2253 l_copy(orig_filter, ilg->ilg_filter); 2254 } 2255 } 2256 mutex_exit(&connp->conn_lock); 2257 } 2258 2259 free_and_exit: 2260 l_free(orig_filter); 2261 l_free(new_filter); 2262 2263 return (err); 2264 } 2265 2266 static int 2267 ip_set_srcfilter_v6(conn_t *connp, struct group_filter *gf, 2268 const struct in6_addr *grp, ill_t *ill) 2269 { 2270 ilg_t *ilg; 2271 int i, orig_ifindex, orig_fmode, new_fmode, err; 2272 slist_t *orig_filter = NULL; 2273 slist_t *new_filter = NULL; 2274 struct sockaddr_storage *sl; 2275 struct sockaddr_in6 *sin6; 2276 boolean_t leave_grp; 2277 ilg_stat_t ilgstat; 2278 2279 /* Make sure we can handle the source list */ 2280 if (gf->gf_numsrc > MAX_FILTER_SIZE) 2281 return (ENOBUFS); 2282 2283 /* 2284 * setting the filter to (INCLUDE, NULL) is treated 2285 * as a request to leave the group. 2286 */ 2287 leave_grp = (gf->gf_fmode == MCAST_INCLUDE && gf->gf_numsrc == 0); 2288 2289 ASSERT(IAM_WRITER_ILL(ill)); 2290 2291 /* 2292 * Use the ifindex to do the lookup. We can't use the ill 2293 * directly because ilg_ill could point to a different ill 2294 * if things have moved. 2295 */ 2296 orig_ifindex = ill->ill_phyint->phyint_ifindex; 2297 2298 mutex_enter(&connp->conn_lock); 2299 ilg = ilg_lookup_ill_index_v6(connp, grp, orig_ifindex); 2300 if (ilg == NULL) { 2301 /* 2302 * if the request was actually to leave, and we 2303 * didn't find an ilg, there's nothing to do. 2304 */ 2305 if (!leave_grp) 2306 ilg = conn_ilg_alloc(connp); 2307 if (leave_grp || ilg == NULL) { 2308 mutex_exit(&connp->conn_lock); 2309 return (leave_grp ? 0 : ENOMEM); 2310 } 2311 ilgstat = ILGSTAT_NEW; 2312 ilg->ilg_v6group = *grp; 2313 ilg->ilg_ipif = NULL; 2314 /* 2315 * Choose our target ill to join on. This might be 2316 * different from the ill we've been given if it's 2317 * currently down and part of a group. 2318 * 2319 * new ill is not refheld; we are writer. 2320 */ 2321 ill = ip_choose_multi_ill(ill, grp); 2322 ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED)); 2323 ilg->ilg_ill = ill; 2324 /* 2325 * Remember the index that we joined on, so that we can 2326 * successfully delete them later on and also search for 2327 * duplicates if the application wants to join again. 2328 */ 2329 ilg->ilg_orig_ifindex = orig_ifindex; 2330 } else if (leave_grp) { 2331 /* 2332 * Use the ilg's current ill for the deletion, 2333 * we might have failed over. 2334 */ 2335 ill = ilg->ilg_ill; 2336 ilg_delete(connp, ilg, NULL); 2337 mutex_exit(&connp->conn_lock); 2338 (void) ip_delmulti_v6(grp, ill, orig_ifindex, 2339 connp->conn_zoneid, B_FALSE, B_TRUE); 2340 return (0); 2341 } else { 2342 ilgstat = ILGSTAT_CHANGE; 2343 /* 2344 * The current ill might be different from the one we were 2345 * asked to join on (if failover has occurred); we should 2346 * join on the ill stored in the ilg. The original ill 2347 * is noted in ilg_orig_ifindex, which matched our request. 2348 */ 2349 ill = ilg->ilg_ill; 2350 /* preserve existing state in case ip_addmulti() fails */ 2351 orig_fmode = ilg->ilg_fmode; 2352 if (ilg->ilg_filter == NULL) { 2353 orig_filter = NULL; 2354 } else { 2355 orig_filter = l_alloc_copy(ilg->ilg_filter); 2356 if (orig_filter == NULL) { 2357 mutex_exit(&connp->conn_lock); 2358 return (ENOMEM); 2359 } 2360 } 2361 } 2362 2363 /* 2364 * Alloc buffer to copy new state into (see below) before 2365 * we make any changes, so we can bail if it fails. 2366 */ 2367 if ((new_filter = l_alloc()) == NULL) { 2368 mutex_exit(&connp->conn_lock); 2369 err = ENOMEM; 2370 goto free_and_exit; 2371 } 2372 2373 if (gf->gf_numsrc == 0) { 2374 CLEAR_SLIST(ilg->ilg_filter); 2375 } else { 2376 slist_t *fp; 2377 if (ilg->ilg_filter == NULL) { 2378 fp = l_alloc(); 2379 if (fp == NULL) { 2380 if (ilgstat == ILGSTAT_NEW) 2381 ilg_delete(connp, ilg, NULL); 2382 mutex_exit(&connp->conn_lock); 2383 err = ENOMEM; 2384 goto free_and_exit; 2385 } 2386 } else { 2387 fp = ilg->ilg_filter; 2388 } 2389 for (i = 0, sl = gf->gf_slist; i < gf->gf_numsrc; i++, sl++) { 2390 sin6 = (struct sockaddr_in6 *)sl; 2391 fp->sl_addr[i] = sin6->sin6_addr; 2392 } 2393 fp->sl_numsrc = gf->gf_numsrc; 2394 ilg->ilg_filter = fp; 2395 } 2396 /* 2397 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 2398 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 2399 * So we need to translate here. 2400 */ 2401 ilg->ilg_fmode = (gf->gf_fmode == MCAST_INCLUDE) ? 2402 MODE_IS_INCLUDE : MODE_IS_EXCLUDE; 2403 2404 /* 2405 * Save copy of ilg's filter state to pass to other functions, 2406 * so we can release conn_lock now. 2407 */ 2408 new_fmode = ilg->ilg_fmode; 2409 l_copy(ilg->ilg_filter, new_filter); 2410 2411 mutex_exit(&connp->conn_lock); 2412 2413 err = ip_addmulti_v6(grp, ill, orig_ifindex, connp->conn_zoneid, 2414 ilgstat, new_fmode, new_filter); 2415 if (err != 0) { 2416 /* 2417 * Restore the original filter state, or delete the 2418 * newly-created ilg. We need to look up the ilg 2419 * again, though, since we've not been holding the 2420 * conn_lock. 2421 */ 2422 mutex_enter(&connp->conn_lock); 2423 ilg = ilg_lookup_ill_index_v6(connp, grp, orig_ifindex); 2424 ASSERT(ilg != NULL); 2425 if (ilgstat == ILGSTAT_NEW) { 2426 ilg_delete(connp, ilg, NULL); 2427 } else { 2428 ilg->ilg_fmode = orig_fmode; 2429 if (SLIST_IS_EMPTY(orig_filter)) { 2430 CLEAR_SLIST(ilg->ilg_filter); 2431 } else { 2432 /* 2433 * We didn't free the filter, even if we 2434 * were trying to make the source list empty; 2435 * so if orig_filter isn't empty, the ilg 2436 * must still have a filter alloc'd. 2437 */ 2438 l_copy(orig_filter, ilg->ilg_filter); 2439 } 2440 } 2441 mutex_exit(&connp->conn_lock); 2442 } 2443 2444 free_and_exit: 2445 l_free(orig_filter); 2446 l_free(new_filter); 2447 2448 return (err); 2449 } 2450 2451 /* 2452 * Process the SIOC[GS]MSFILTER and SIOC[GS]IPMSFILTER ioctls. 2453 */ 2454 /* ARGSUSED */ 2455 int 2456 ip_sioctl_msfilter(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, 2457 ip_ioctl_cmd_t *ipip, void *ifreq) 2458 { 2459 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 2460 /* existence verified in ip_wput_nondata() */ 2461 mblk_t *data_mp = mp->b_cont->b_cont; 2462 int datalen, err, cmd, minsize; 2463 int expsize = 0; 2464 conn_t *connp; 2465 boolean_t isv6, is_v4only_api, getcmd; 2466 struct sockaddr_in *gsin; 2467 struct sockaddr_in6 *gsin6; 2468 ipaddr_t v4grp; 2469 in6_addr_t v6grp; 2470 struct group_filter *gf = NULL; 2471 struct ip_msfilter *imsf = NULL; 2472 mblk_t *ndp; 2473 2474 if (data_mp->b_cont != NULL) { 2475 if ((ndp = msgpullup(data_mp, -1)) == NULL) 2476 return (ENOMEM); 2477 freemsg(data_mp); 2478 data_mp = ndp; 2479 mp->b_cont->b_cont = data_mp; 2480 } 2481 2482 cmd = iocp->ioc_cmd; 2483 getcmd = (cmd == SIOCGIPMSFILTER || cmd == SIOCGMSFILTER); 2484 is_v4only_api = (cmd == SIOCGIPMSFILTER || cmd == SIOCSIPMSFILTER); 2485 minsize = (is_v4only_api) ? IP_MSFILTER_SIZE(0) : GROUP_FILTER_SIZE(0); 2486 datalen = MBLKL(data_mp); 2487 2488 if (datalen < minsize) 2489 return (EINVAL); 2490 2491 /* 2492 * now we know we have at least have the initial structure, 2493 * but need to check for the source list array. 2494 */ 2495 if (is_v4only_api) { 2496 imsf = (struct ip_msfilter *)data_mp->b_rptr; 2497 isv6 = B_FALSE; 2498 expsize = IP_MSFILTER_SIZE(imsf->imsf_numsrc); 2499 } else { 2500 gf = (struct group_filter *)data_mp->b_rptr; 2501 if (gf->gf_group.ss_family == AF_INET6) { 2502 gsin6 = (struct sockaddr_in6 *)&gf->gf_group; 2503 isv6 = !(IN6_IS_ADDR_V4MAPPED(&gsin6->sin6_addr)); 2504 } else { 2505 isv6 = B_FALSE; 2506 } 2507 expsize = GROUP_FILTER_SIZE(gf->gf_numsrc); 2508 } 2509 if (datalen < expsize) 2510 return (EINVAL); 2511 2512 connp = Q_TO_CONN(q); 2513 2514 /* operation not supported on the virtual network interface */ 2515 if (IS_VNI(ipif->ipif_ill)) 2516 return (EINVAL); 2517 2518 if (isv6) { 2519 ill_t *ill = ipif->ipif_ill; 2520 ill_refhold(ill); 2521 2522 gsin6 = (struct sockaddr_in6 *)&gf->gf_group; 2523 v6grp = gsin6->sin6_addr; 2524 if (getcmd) 2525 err = ip_get_srcfilter_v6(connp, gf, &v6grp, ill); 2526 else 2527 err = ip_set_srcfilter_v6(connp, gf, &v6grp, ill); 2528 2529 ill_refrele(ill); 2530 } else { 2531 boolean_t isv4mapped = B_FALSE; 2532 if (is_v4only_api) { 2533 v4grp = (ipaddr_t)imsf->imsf_multiaddr.s_addr; 2534 } else { 2535 if (gf->gf_group.ss_family == AF_INET) { 2536 gsin = (struct sockaddr_in *)&gf->gf_group; 2537 v4grp = (ipaddr_t)gsin->sin_addr.s_addr; 2538 } else { 2539 gsin6 = (struct sockaddr_in6 *)&gf->gf_group; 2540 IN6_V4MAPPED_TO_IPADDR(&gsin6->sin6_addr, 2541 v4grp); 2542 isv4mapped = B_TRUE; 2543 } 2544 } 2545 if (getcmd) 2546 err = ip_get_srcfilter(connp, gf, imsf, v4grp, ipif, 2547 isv4mapped); 2548 else 2549 err = ip_set_srcfilter(connp, gf, imsf, v4grp, ipif, 2550 isv4mapped); 2551 } 2552 2553 return (err); 2554 } 2555 2556 /* 2557 * Finds the ipif based on information in the ioctl headers. Needed to make 2558 * ip_process_ioctl() happy (it needs to know the ipif for IPI_WR-flagged 2559 * ioctls prior to calling the ioctl's handler function). Somewhat analogous 2560 * to ip_extract_lifreq_cmn() and ip_extract_tunreq(). 2561 */ 2562 int 2563 ip_extract_msfilter(queue_t *q, mblk_t *mp, ipif_t **ipifpp, ipsq_func_t func) 2564 { 2565 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 2566 int cmd = iocp->ioc_cmd, err = 0; 2567 conn_t *connp; 2568 ipif_t *ipif; 2569 /* caller has verified this mblk exists */ 2570 char *dbuf = (char *)mp->b_cont->b_cont->b_rptr; 2571 struct ip_msfilter *imsf; 2572 struct group_filter *gf; 2573 ipaddr_t v4addr, v4grp; 2574 in6_addr_t v6grp; 2575 uint32_t index; 2576 zoneid_t zoneid; 2577 2578 connp = Q_TO_CONN(q); 2579 zoneid = connp->conn_zoneid; 2580 2581 /* don't allow multicast operations on a tcp conn */ 2582 if (IPCL_IS_TCP(connp)) 2583 return (ENOPROTOOPT); 2584 2585 if (cmd == SIOCSIPMSFILTER || cmd == SIOCGIPMSFILTER) { 2586 /* don't allow v4-specific ioctls on v6 socket */ 2587 if (connp->conn_af_isv6) 2588 return (EAFNOSUPPORT); 2589 2590 imsf = (struct ip_msfilter *)dbuf; 2591 v4addr = imsf->imsf_interface.s_addr; 2592 v4grp = imsf->imsf_multiaddr.s_addr; 2593 if (v4addr == INADDR_ANY) { 2594 ipif = ipif_lookup_group(v4grp, zoneid); 2595 if (ipif == NULL) 2596 err = EADDRNOTAVAIL; 2597 } else { 2598 ipif = ipif_lookup_addr(v4addr, NULL, zoneid, q, mp, 2599 func, &err); 2600 } 2601 } else { 2602 boolean_t isv6 = B_FALSE; 2603 gf = (struct group_filter *)dbuf; 2604 index = gf->gf_interface; 2605 if (gf->gf_group.ss_family == AF_INET6) { 2606 struct sockaddr_in6 *sin6; 2607 sin6 = (struct sockaddr_in6 *)&gf->gf_group; 2608 v6grp = sin6->sin6_addr; 2609 if (IN6_IS_ADDR_V4MAPPED(&v6grp)) 2610 IN6_V4MAPPED_TO_IPADDR(&v6grp, v4grp); 2611 else 2612 isv6 = B_TRUE; 2613 } else if (gf->gf_group.ss_family == AF_INET) { 2614 struct sockaddr_in *sin; 2615 sin = (struct sockaddr_in *)&gf->gf_group; 2616 v4grp = sin->sin_addr.s_addr; 2617 } else { 2618 return (EAFNOSUPPORT); 2619 } 2620 if (index == 0) { 2621 if (isv6) 2622 ipif = ipif_lookup_group_v6(&v6grp, zoneid); 2623 else 2624 ipif = ipif_lookup_group(v4grp, zoneid); 2625 if (ipif == NULL) 2626 err = EADDRNOTAVAIL; 2627 } else { 2628 ipif = ipif_lookup_on_ifindex(index, isv6, zoneid, 2629 q, mp, func, &err); 2630 } 2631 } 2632 2633 *ipifpp = ipif; 2634 return (err); 2635 } 2636 2637 /* 2638 * The structures used for the SIOC*MSFILTER ioctls usually must be copied 2639 * in in two stages, as the first copyin tells us the size of the attached 2640 * source buffer. This function is called by ip_wput_nondata() after the 2641 * first copyin has completed; it figures out how big the second stage 2642 * needs to be, and kicks it off. 2643 * 2644 * In some cases (numsrc < 2), the second copyin is not needed as the 2645 * first one gets a complete structure containing 1 source addr. 2646 * 2647 * The function returns 0 if a second copyin has been started (i.e. there's 2648 * no more work to be done right now), or 1 if the second copyin is not 2649 * needed and ip_wput_nondata() can continue its processing. 2650 */ 2651 int 2652 ip_copyin_msfilter(queue_t *q, mblk_t *mp) 2653 { 2654 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 2655 int cmd = iocp->ioc_cmd; 2656 /* validity of this checked in ip_wput_nondata() */ 2657 mblk_t *mp1 = mp->b_cont->b_cont; 2658 int copysize = 0; 2659 int offset; 2660 2661 if (cmd == SIOCSMSFILTER || cmd == SIOCGMSFILTER) { 2662 struct group_filter *gf = (struct group_filter *)mp1->b_rptr; 2663 if (gf->gf_numsrc >= 2) { 2664 offset = sizeof (struct group_filter); 2665 copysize = GROUP_FILTER_SIZE(gf->gf_numsrc) - offset; 2666 } 2667 } else { 2668 struct ip_msfilter *imsf = (struct ip_msfilter *)mp1->b_rptr; 2669 if (imsf->imsf_numsrc >= 2) { 2670 offset = sizeof (struct ip_msfilter); 2671 copysize = IP_MSFILTER_SIZE(imsf->imsf_numsrc) - offset; 2672 } 2673 } 2674 if (copysize > 0) { 2675 mi_copyin_n(q, mp, offset, copysize); 2676 return (0); 2677 } 2678 return (1); 2679 } 2680 2681 /* 2682 * Handle the following optmgmt: 2683 * IP_ADD_MEMBERSHIP must not have joined already 2684 * MCAST_JOIN_GROUP must not have joined already 2685 * IP_BLOCK_SOURCE must have joined already 2686 * MCAST_BLOCK_SOURCE must have joined already 2687 * IP_JOIN_SOURCE_GROUP may have joined already 2688 * MCAST_JOIN_SOURCE_GROUP may have joined already 2689 * 2690 * fmode and src parameters may be used to determine which option is 2691 * being set, as follows (the IP_* and MCAST_* versions of each option 2692 * are functionally equivalent): 2693 * opt fmode src 2694 * IP_ADD_MEMBERSHIP MODE_IS_EXCLUDE INADDR_ANY 2695 * MCAST_JOIN_GROUP MODE_IS_EXCLUDE INADDR_ANY 2696 * IP_BLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 2697 * MCAST_BLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 2698 * IP_JOIN_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 2699 * MCAST_JOIN_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 2700 * 2701 * Changing the filter mode is not allowed; if a matching ilg already 2702 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 2703 * 2704 * Verifies that there is a source address of appropriate scope for 2705 * the group; if not, EADDRNOTAVAIL is returned. 2706 * 2707 * The interface to be used may be identified by an address or by an 2708 * index. A pointer to the index is passed; if it is NULL, use the 2709 * address, otherwise, use the index. 2710 */ 2711 int 2712 ip_opt_add_group(conn_t *connp, boolean_t checkonly, ipaddr_t group, 2713 ipaddr_t ifaddr, uint_t *ifindexp, mcast_record_t fmode, ipaddr_t src, 2714 mblk_t *first_mp) 2715 { 2716 ipif_t *ipif; 2717 ipsq_t *ipsq; 2718 int err = 0; 2719 ill_t *ill; 2720 2721 err = ip_opt_check(connp, group, src, ifaddr, ifindexp, first_mp, 2722 ip_restart_optmgmt, &ipif); 2723 if (err != 0) { 2724 if (err != EINPROGRESS) { 2725 ip1dbg(("ip_opt_add_group: no ipif for group 0x%x, " 2726 "ifaddr 0x%x, ifindex %d\n", ntohl(group), 2727 ntohl(ifaddr), (ifindexp == NULL) ? 0 : *ifindexp)); 2728 } 2729 return (err); 2730 } 2731 ASSERT(ipif != NULL); 2732 2733 ill = ipif->ipif_ill; 2734 /* Operation not supported on a virtual network interface */ 2735 if (IS_VNI(ill)) { 2736 ipif_refrele(ipif); 2737 return (EINVAL); 2738 } 2739 2740 if (checkonly) { 2741 /* 2742 * do not do operation, just pretend to - new T_CHECK 2743 * semantics. The error return case above if encountered 2744 * considered a good enough "check" here. 2745 */ 2746 ipif_refrele(ipif); 2747 return (0); 2748 } 2749 2750 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, ipsq, 2751 NEW_OP); 2752 2753 /* unspecified source addr => no source filtering */ 2754 err = ilg_add(connp, group, ipif, fmode, src); 2755 2756 IPSQ_EXIT(ipsq); 2757 2758 ipif_refrele(ipif); 2759 return (err); 2760 } 2761 2762 /* 2763 * Handle the following optmgmt: 2764 * IPV6_JOIN_GROUP must not have joined already 2765 * MCAST_JOIN_GROUP must not have joined already 2766 * MCAST_BLOCK_SOURCE must have joined already 2767 * MCAST_JOIN_SOURCE_GROUP may have joined already 2768 * 2769 * fmode and src parameters may be used to determine which option is 2770 * being set, as follows (IPV6_JOIN_GROUP and MCAST_JOIN_GROUP options 2771 * are functionally equivalent): 2772 * opt fmode v6src 2773 * IPV6_JOIN_GROUP MODE_IS_EXCLUDE unspecified 2774 * MCAST_JOIN_GROUP MODE_IS_EXCLUDE unspecified 2775 * MCAST_BLOCK_SOURCE MODE_IS_EXCLUDE v6 addr 2776 * MCAST_JOIN_SOURCE_GROUP MODE_IS_INCLUDE v6 addr 2777 * 2778 * Changing the filter mode is not allowed; if a matching ilg already 2779 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 2780 * 2781 * Verifies that there is a source address of appropriate scope for 2782 * the group; if not, EADDRNOTAVAIL is returned. 2783 * 2784 * Handles IPv4-mapped IPv6 multicast addresses by associating them 2785 * with the link-local ipif. Assumes that if v6group is v4-mapped, 2786 * v6src is also v4-mapped. 2787 */ 2788 int 2789 ip_opt_add_group_v6(conn_t *connp, boolean_t checkonly, 2790 const in6_addr_t *v6group, int ifindex, mcast_record_t fmode, 2791 const in6_addr_t *v6src, mblk_t *first_mp) 2792 { 2793 ill_t *ill; 2794 ipif_t *ipif; 2795 char buf[INET6_ADDRSTRLEN]; 2796 ipaddr_t v4group, v4src; 2797 boolean_t isv6; 2798 ipsq_t *ipsq; 2799 int err; 2800 2801 err = ip_opt_check_v6(connp, v6group, &v4group, v6src, &v4src, &isv6, 2802 ifindex, first_mp, ip_restart_optmgmt, &ill, &ipif); 2803 if (err != 0) { 2804 if (err != EINPROGRESS) { 2805 ip1dbg(("ip_opt_add_group_v6: no ill for group %s/" 2806 "index %d\n", inet_ntop(AF_INET6, v6group, buf, 2807 sizeof (buf)), ifindex)); 2808 } 2809 return (err); 2810 } 2811 ASSERT((!isv6 && ipif != NULL) || (isv6 && ill != NULL)); 2812 2813 /* operation is not supported on the virtual network interface */ 2814 if (isv6) { 2815 if (IS_VNI(ill)) { 2816 ill_refrele(ill); 2817 return (EINVAL); 2818 } 2819 } else { 2820 if (IS_VNI(ipif->ipif_ill)) { 2821 ipif_refrele(ipif); 2822 return (EINVAL); 2823 } 2824 } 2825 2826 if (checkonly) { 2827 /* 2828 * do not do operation, just pretend to - new T_CHECK 2829 * semantics. The error return case above if encountered 2830 * considered a good enough "check" here. 2831 */ 2832 if (isv6) 2833 ill_refrele(ill); 2834 else 2835 ipif_refrele(ipif); 2836 return (0); 2837 } 2838 2839 if (!isv6) { 2840 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, 2841 ipsq, NEW_OP); 2842 err = ilg_add(connp, v4group, ipif, fmode, v4src); 2843 IPSQ_EXIT(ipsq); 2844 ipif_refrele(ipif); 2845 } else { 2846 IPSQ_ENTER_ILL(ill, connp, first_mp, ip_restart_optmgmt, 2847 ipsq, NEW_OP); 2848 err = ilg_add_v6(connp, v6group, ill, fmode, v6src); 2849 IPSQ_EXIT(ipsq); 2850 ill_refrele(ill); 2851 } 2852 2853 return (err); 2854 } 2855 2856 static int 2857 ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group, ipif_t *ipif, 2858 mcast_record_t fmode, ipaddr_t src) 2859 { 2860 ilg_t *ilg; 2861 in6_addr_t v6src; 2862 boolean_t leaving = B_FALSE; 2863 2864 ASSERT(IAM_WRITER_IPIF(ipif)); 2865 2866 /* 2867 * The ilg is valid only while we hold the conn lock. Once we drop 2868 * the lock, another thread can locate another ilg on this connp, 2869 * but on a different ipif, and delete it, and cause the ilg array 2870 * to be reallocated and copied. Hence do the ilg_delete before 2871 * dropping the lock. 2872 */ 2873 mutex_enter(&connp->conn_lock); 2874 ilg = ilg_lookup_ipif(connp, group, ipif); 2875 if ((ilg == NULL) || (ilg->ilg_flags & ILG_DELETED)) { 2876 mutex_exit(&connp->conn_lock); 2877 return (EADDRNOTAVAIL); 2878 } 2879 2880 /* 2881 * Decide if we're actually deleting the ilg or just removing a 2882 * source filter address; if just removing an addr, make sure we 2883 * aren't trying to change the filter mode, and that the addr is 2884 * actually in our filter list already. If we're removing the 2885 * last src in an include list, just delete the ilg. 2886 */ 2887 if (src == INADDR_ANY) { 2888 v6src = ipv6_all_zeros; 2889 leaving = B_TRUE; 2890 } else { 2891 int err = 0; 2892 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 2893 if (fmode != ilg->ilg_fmode) 2894 err = EINVAL; 2895 else if (ilg->ilg_filter == NULL || 2896 !list_has_addr(ilg->ilg_filter, &v6src)) 2897 err = EADDRNOTAVAIL; 2898 if (err != 0) { 2899 mutex_exit(&connp->conn_lock); 2900 return (err); 2901 } 2902 if (fmode == MODE_IS_INCLUDE && 2903 ilg->ilg_filter->sl_numsrc == 1) { 2904 v6src = ipv6_all_zeros; 2905 leaving = B_TRUE; 2906 } 2907 } 2908 2909 ilg_delete(connp, ilg, &v6src); 2910 mutex_exit(&connp->conn_lock); 2911 2912 (void) ip_delmulti(group, ipif, B_FALSE, leaving); 2913 return (0); 2914 } 2915 2916 static int 2917 ip_opt_delete_group_excl_v6(conn_t *connp, const in6_addr_t *v6group, 2918 ill_t *ill, mcast_record_t fmode, const in6_addr_t *v6src) 2919 { 2920 ilg_t *ilg; 2921 ill_t *ilg_ill; 2922 uint_t ilg_orig_ifindex; 2923 boolean_t leaving = B_TRUE; 2924 2925 ASSERT(IAM_WRITER_ILL(ill)); 2926 2927 /* 2928 * Use the index that we originally used to join. We can't 2929 * use the ill directly because ilg_ill could point to 2930 * a new ill if things have moved. 2931 */ 2932 mutex_enter(&connp->conn_lock); 2933 ilg = ilg_lookup_ill_index_v6(connp, v6group, 2934 ill->ill_phyint->phyint_ifindex); 2935 if ((ilg == NULL) || (ilg->ilg_flags & ILG_DELETED)) { 2936 mutex_exit(&connp->conn_lock); 2937 return (EADDRNOTAVAIL); 2938 } 2939 2940 /* 2941 * Decide if we're actually deleting the ilg or just removing a 2942 * source filter address; if just removing an addr, make sure we 2943 * aren't trying to change the filter mode, and that the addr is 2944 * actually in our filter list already. If we're removing the 2945 * last src in an include list, just delete the ilg. 2946 */ 2947 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2948 int err = 0; 2949 if (fmode != ilg->ilg_fmode) 2950 err = EINVAL; 2951 else if (ilg->ilg_filter == NULL || 2952 !list_has_addr(ilg->ilg_filter, v6src)) 2953 err = EADDRNOTAVAIL; 2954 if (err != 0) { 2955 mutex_exit(&connp->conn_lock); 2956 return (err); 2957 } 2958 if (fmode == MODE_IS_INCLUDE && 2959 ilg->ilg_filter->sl_numsrc == 1) 2960 v6src = NULL; 2961 else 2962 leaving = B_FALSE; 2963 } 2964 2965 ilg_ill = ilg->ilg_ill; 2966 ilg_orig_ifindex = ilg->ilg_orig_ifindex; 2967 ilg_delete(connp, ilg, v6src); 2968 mutex_exit(&connp->conn_lock); 2969 (void) ip_delmulti_v6(v6group, ilg_ill, ilg_orig_ifindex, 2970 connp->conn_zoneid, B_FALSE, leaving); 2971 2972 return (0); 2973 } 2974 2975 /* 2976 * Handle the following optmgmt: 2977 * IP_DROP_MEMBERSHIP will leave 2978 * MCAST_LEAVE_GROUP will leave 2979 * IP_UNBLOCK_SOURCE will not leave 2980 * MCAST_UNBLOCK_SOURCE will not leave 2981 * IP_LEAVE_SOURCE_GROUP may leave (if leaving last source) 2982 * MCAST_LEAVE_SOURCE_GROUP may leave (if leaving last source) 2983 * 2984 * fmode and src parameters may be used to determine which option is 2985 * being set, as follows (the IP_* and MCAST_* versions of each option 2986 * are functionally equivalent): 2987 * opt fmode src 2988 * IP_DROP_MEMBERSHIP MODE_IS_INCLUDE INADDR_ANY 2989 * MCAST_LEAVE_GROUP MODE_IS_INCLUDE INADDR_ANY 2990 * IP_UNBLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 2991 * MCAST_UNBLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 2992 * IP_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 2993 * MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 2994 * 2995 * Changing the filter mode is not allowed; if a matching ilg already 2996 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 2997 * 2998 * The interface to be used may be identified by an address or by an 2999 * index. A pointer to the index is passed; if it is NULL, use the 3000 * address, otherwise, use the index. 3001 */ 3002 int 3003 ip_opt_delete_group(conn_t *connp, boolean_t checkonly, ipaddr_t group, 3004 ipaddr_t ifaddr, uint_t *ifindexp, mcast_record_t fmode, ipaddr_t src, 3005 mblk_t *first_mp) 3006 { 3007 ipif_t *ipif; 3008 ipsq_t *ipsq; 3009 int err; 3010 ill_t *ill; 3011 3012 err = ip_opt_check(connp, group, src, ifaddr, ifindexp, first_mp, 3013 ip_restart_optmgmt, &ipif); 3014 if (err != 0) { 3015 if (err != EINPROGRESS) { 3016 ip1dbg(("ip_opt_delete_group: no ipif for group " 3017 "0x%x, ifaddr 0x%x\n", 3018 (int)ntohl(group), (int)ntohl(ifaddr))); 3019 } 3020 return (err); 3021 } 3022 ASSERT(ipif != NULL); 3023 3024 ill = ipif->ipif_ill; 3025 /* Operation not supported on a virtual network interface */ 3026 if (IS_VNI(ill)) { 3027 ipif_refrele(ipif); 3028 return (EINVAL); 3029 } 3030 3031 if (checkonly) { 3032 /* 3033 * do not do operation, just pretend to - new T_CHECK 3034 * semantics. The error return case above if encountered 3035 * considered a good enough "check" here. 3036 */ 3037 ipif_refrele(ipif); 3038 return (0); 3039 } 3040 3041 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, ipsq, 3042 NEW_OP); 3043 err = ip_opt_delete_group_excl(connp, group, ipif, fmode, src); 3044 IPSQ_EXIT(ipsq); 3045 3046 ipif_refrele(ipif); 3047 return (err); 3048 } 3049 3050 /* 3051 * Handle the following optmgmt: 3052 * IPV6_LEAVE_GROUP will leave 3053 * MCAST_LEAVE_GROUP will leave 3054 * MCAST_UNBLOCK_SOURCE will not leave 3055 * MCAST_LEAVE_SOURCE_GROUP may leave (if leaving last source) 3056 * 3057 * fmode and src parameters may be used to determine which option is 3058 * being set, as follows (IPV6_LEAVE_GROUP and MCAST_LEAVE_GROUP options 3059 * are functionally equivalent): 3060 * opt fmode v6src 3061 * IPV6_LEAVE_GROUP MODE_IS_INCLUDE unspecified 3062 * MCAST_LEAVE_GROUP MODE_IS_INCLUDE unspecified 3063 * MCAST_UNBLOCK_SOURCE MODE_IS_EXCLUDE v6 addr 3064 * MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE v6 addr 3065 * 3066 * Changing the filter mode is not allowed; if a matching ilg already 3067 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 3068 * 3069 * Handles IPv4-mapped IPv6 multicast addresses by associating them 3070 * with the link-local ipif. Assumes that if v6group is v4-mapped, 3071 * v6src is also v4-mapped. 3072 */ 3073 int 3074 ip_opt_delete_group_v6(conn_t *connp, boolean_t checkonly, 3075 const in6_addr_t *v6group, int ifindex, mcast_record_t fmode, 3076 const in6_addr_t *v6src, mblk_t *first_mp) 3077 { 3078 ill_t *ill; 3079 ipif_t *ipif; 3080 char buf[INET6_ADDRSTRLEN]; 3081 ipaddr_t v4group, v4src; 3082 boolean_t isv6; 3083 ipsq_t *ipsq; 3084 int err; 3085 3086 err = ip_opt_check_v6(connp, v6group, &v4group, v6src, &v4src, &isv6, 3087 ifindex, first_mp, ip_restart_optmgmt, &ill, &ipif); 3088 if (err != 0) { 3089 if (err != EINPROGRESS) { 3090 ip1dbg(("ip_opt_delete_group_v6: no ill for group %s/" 3091 "index %d\n", inet_ntop(AF_INET6, v6group, buf, 3092 sizeof (buf)), ifindex)); 3093 } 3094 return (err); 3095 } 3096 ASSERT((isv6 && ill != NULL) || (!isv6 && ipif != NULL)); 3097 3098 /* operation is not supported on the virtual network interface */ 3099 if (isv6) { 3100 if (IS_VNI(ill)) { 3101 ill_refrele(ill); 3102 return (EINVAL); 3103 } 3104 } else { 3105 if (IS_VNI(ipif->ipif_ill)) { 3106 ipif_refrele(ipif); 3107 return (EINVAL); 3108 } 3109 } 3110 3111 if (checkonly) { 3112 /* 3113 * do not do operation, just pretend to - new T_CHECK 3114 * semantics. The error return case above if encountered 3115 * considered a good enough "check" here. 3116 */ 3117 if (isv6) 3118 ill_refrele(ill); 3119 else 3120 ipif_refrele(ipif); 3121 return (0); 3122 } 3123 3124 if (!isv6) { 3125 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, 3126 ipsq, NEW_OP); 3127 err = ip_opt_delete_group_excl(connp, v4group, ipif, fmode, 3128 v4src); 3129 IPSQ_EXIT(ipsq); 3130 ipif_refrele(ipif); 3131 } else { 3132 IPSQ_ENTER_ILL(ill, connp, first_mp, ip_restart_optmgmt, 3133 ipsq, NEW_OP); 3134 err = ip_opt_delete_group_excl_v6(connp, v6group, ill, fmode, 3135 v6src); 3136 IPSQ_EXIT(ipsq); 3137 ill_refrele(ill); 3138 } 3139 3140 return (err); 3141 } 3142 3143 /* 3144 * Group mgmt for upper conn that passes things down 3145 * to the interface multicast list (and DLPI) 3146 * These routines can handle new style options that specify an interface name 3147 * as opposed to an interface address (needed for general handling of 3148 * unnumbered interfaces.) 3149 */ 3150 3151 /* 3152 * Add a group to an upper conn group data structure and pass things down 3153 * to the interface multicast list (and DLPI) 3154 */ 3155 static int 3156 ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif, mcast_record_t fmode, 3157 ipaddr_t src) 3158 { 3159 int error = 0; 3160 ill_t *ill; 3161 ilg_t *ilg; 3162 ilg_stat_t ilgstat; 3163 slist_t *new_filter = NULL; 3164 int new_fmode; 3165 3166 ASSERT(IAM_WRITER_IPIF(ipif)); 3167 3168 ill = ipif->ipif_ill; 3169 3170 if (!(ill->ill_flags & ILLF_MULTICAST)) 3171 return (EADDRNOTAVAIL); 3172 3173 /* 3174 * conn_ilg[] is protected by conn_lock. Need to hold the conn_lock 3175 * to walk the conn_ilg[] list in ilg_lookup_ipif(); also needed to 3176 * serialize 2 threads doing join (sock, group1, hme0:0) and 3177 * (sock, group2, hme1:0) where hme0 and hme1 map to different ipsqs, 3178 * but both operations happen on the same conn. 3179 */ 3180 mutex_enter(&connp->conn_lock); 3181 ilg = ilg_lookup_ipif(connp, group, ipif); 3182 3183 /* 3184 * Depending on the option we're handling, may or may not be okay 3185 * if group has already been added. Figure out our rules based 3186 * on fmode and src params. Also make sure there's enough room 3187 * in the filter if we're adding a source to an existing filter. 3188 */ 3189 if (src == INADDR_ANY) { 3190 /* we're joining for all sources, must not have joined */ 3191 if (ilg != NULL) 3192 error = EADDRINUSE; 3193 } else { 3194 if (fmode == MODE_IS_EXCLUDE) { 3195 /* (excl {addr}) => block source, must have joined */ 3196 if (ilg == NULL) 3197 error = EADDRNOTAVAIL; 3198 } 3199 /* (incl {addr}) => join source, may have joined */ 3200 3201 if (ilg != NULL && 3202 SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE) 3203 error = ENOBUFS; 3204 } 3205 if (error != 0) { 3206 mutex_exit(&connp->conn_lock); 3207 return (error); 3208 } 3209 3210 ASSERT(!(ipif->ipif_state_flags & IPIF_CONDEMNED)); 3211 3212 /* 3213 * Alloc buffer to copy new state into (see below) before 3214 * we make any changes, so we can bail if it fails. 3215 */ 3216 if ((new_filter = l_alloc()) == NULL) { 3217 mutex_exit(&connp->conn_lock); 3218 return (ENOMEM); 3219 } 3220 3221 if (ilg == NULL) { 3222 ilgstat = ILGSTAT_NEW; 3223 if ((ilg = conn_ilg_alloc(connp)) == NULL) { 3224 mutex_exit(&connp->conn_lock); 3225 l_free(new_filter); 3226 return (ENOMEM); 3227 } 3228 if (src != INADDR_ANY) { 3229 ilg->ilg_filter = l_alloc(); 3230 if (ilg->ilg_filter == NULL) { 3231 ilg_delete(connp, ilg, NULL); 3232 mutex_exit(&connp->conn_lock); 3233 l_free(new_filter); 3234 return (ENOMEM); 3235 } 3236 ilg->ilg_filter->sl_numsrc = 1; 3237 IN6_IPADDR_TO_V4MAPPED(src, 3238 &ilg->ilg_filter->sl_addr[0]); 3239 } 3240 if (group == INADDR_ANY) { 3241 ilg->ilg_v6group = ipv6_all_zeros; 3242 } else { 3243 IN6_IPADDR_TO_V4MAPPED(group, &ilg->ilg_v6group); 3244 } 3245 ilg->ilg_ipif = ipif; 3246 ilg->ilg_ill = NULL; 3247 ilg->ilg_orig_ifindex = 0; 3248 ilg->ilg_fmode = fmode; 3249 } else { 3250 int index; 3251 in6_addr_t v6src; 3252 ilgstat = ILGSTAT_CHANGE; 3253 if (ilg->ilg_fmode != fmode || src == INADDR_ANY) { 3254 mutex_exit(&connp->conn_lock); 3255 l_free(new_filter); 3256 return (EINVAL); 3257 } 3258 if (ilg->ilg_filter == NULL) { 3259 ilg->ilg_filter = l_alloc(); 3260 if (ilg->ilg_filter == NULL) { 3261 mutex_exit(&connp->conn_lock); 3262 l_free(new_filter); 3263 return (ENOMEM); 3264 } 3265 } 3266 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 3267 if (list_has_addr(ilg->ilg_filter, &v6src)) { 3268 mutex_exit(&connp->conn_lock); 3269 l_free(new_filter); 3270 return (EADDRNOTAVAIL); 3271 } 3272 index = ilg->ilg_filter->sl_numsrc++; 3273 ilg->ilg_filter->sl_addr[index] = v6src; 3274 } 3275 3276 /* 3277 * Save copy of ilg's filter state to pass to other functions, 3278 * so we can release conn_lock now. 3279 */ 3280 new_fmode = ilg->ilg_fmode; 3281 l_copy(ilg->ilg_filter, new_filter); 3282 3283 mutex_exit(&connp->conn_lock); 3284 3285 error = ip_addmulti(group, ipif, ilgstat, new_fmode, new_filter); 3286 if (error != 0) { 3287 /* 3288 * Need to undo what we did before calling ip_addmulti()! 3289 * Must look up the ilg again since we've not been holding 3290 * conn_lock. 3291 */ 3292 in6_addr_t v6src; 3293 if (ilgstat == ILGSTAT_NEW) 3294 v6src = ipv6_all_zeros; 3295 else 3296 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 3297 mutex_enter(&connp->conn_lock); 3298 ilg = ilg_lookup_ipif(connp, group, ipif); 3299 ASSERT(ilg != NULL); 3300 ilg_delete(connp, ilg, &v6src); 3301 mutex_exit(&connp->conn_lock); 3302 l_free(new_filter); 3303 return (error); 3304 } 3305 3306 l_free(new_filter); 3307 return (0); 3308 } 3309 3310 static int 3311 ilg_add_v6(conn_t *connp, const in6_addr_t *v6group, ill_t *ill, 3312 mcast_record_t fmode, const in6_addr_t *v6src) 3313 { 3314 int error = 0; 3315 int orig_ifindex; 3316 ilg_t *ilg; 3317 ilg_stat_t ilgstat; 3318 slist_t *new_filter = NULL; 3319 int new_fmode; 3320 3321 ASSERT(IAM_WRITER_ILL(ill)); 3322 3323 if (!(ill->ill_flags & ILLF_MULTICAST)) 3324 return (EADDRNOTAVAIL); 3325 3326 /* 3327 * conn_lock protects the ilg list. Serializes 2 threads doing 3328 * join (sock, group1, hme0) and (sock, group2, hme1) where hme0 3329 * and hme1 map to different ipsq's, but both operations happen 3330 * on the same conn. 3331 */ 3332 mutex_enter(&connp->conn_lock); 3333 3334 /* 3335 * Use the ifindex to do the lookup. We can't use the ill 3336 * directly because ilg_ill could point to a different ill if 3337 * things have moved. 3338 */ 3339 orig_ifindex = ill->ill_phyint->phyint_ifindex; 3340 ilg = ilg_lookup_ill_index_v6(connp, v6group, orig_ifindex); 3341 3342 /* 3343 * Depending on the option we're handling, may or may not be okay 3344 * if group has already been added. Figure out our rules based 3345 * on fmode and src params. Also make sure there's enough room 3346 * in the filter if we're adding a source to an existing filter. 3347 */ 3348 if (IN6_IS_ADDR_UNSPECIFIED(v6src)) { 3349 /* we're joining for all sources, must not have joined */ 3350 if (ilg != NULL) 3351 error = EADDRINUSE; 3352 } else { 3353 if (fmode == MODE_IS_EXCLUDE) { 3354 /* (excl {addr}) => block source, must have joined */ 3355 if (ilg == NULL) 3356 error = EADDRNOTAVAIL; 3357 } 3358 /* (incl {addr}) => join source, may have joined */ 3359 3360 if (ilg != NULL && 3361 SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE) 3362 error = ENOBUFS; 3363 } 3364 if (error != 0) { 3365 mutex_exit(&connp->conn_lock); 3366 return (error); 3367 } 3368 3369 /* 3370 * Alloc buffer to copy new state into (see below) before 3371 * we make any changes, so we can bail if it fails. 3372 */ 3373 if ((new_filter = l_alloc()) == NULL) { 3374 mutex_exit(&connp->conn_lock); 3375 return (ENOMEM); 3376 } 3377 3378 if (ilg == NULL) { 3379 if ((ilg = conn_ilg_alloc(connp)) == NULL) { 3380 mutex_exit(&connp->conn_lock); 3381 l_free(new_filter); 3382 return (ENOMEM); 3383 } 3384 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 3385 ilg->ilg_filter = l_alloc(); 3386 if (ilg->ilg_filter == NULL) { 3387 ilg_delete(connp, ilg, NULL); 3388 mutex_exit(&connp->conn_lock); 3389 l_free(new_filter); 3390 return (ENOMEM); 3391 } 3392 ilg->ilg_filter->sl_numsrc = 1; 3393 ilg->ilg_filter->sl_addr[0] = *v6src; 3394 } 3395 ilgstat = ILGSTAT_NEW; 3396 ilg->ilg_v6group = *v6group; 3397 ilg->ilg_fmode = fmode; 3398 ilg->ilg_ipif = NULL; 3399 /* 3400 * Choose our target ill to join on. This might be different 3401 * from the ill we've been given if it's currently down and 3402 * part of a group. 3403 * 3404 * new ill is not refheld; we are writer. 3405 */ 3406 ill = ip_choose_multi_ill(ill, v6group); 3407 ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED)); 3408 ilg->ilg_ill = ill; 3409 /* 3410 * Remember the orig_ifindex that we joined on, so that we 3411 * can successfully delete them later on and also search 3412 * for duplicates if the application wants to join again. 3413 */ 3414 ilg->ilg_orig_ifindex = orig_ifindex; 3415 } else { 3416 int index; 3417 if (ilg->ilg_fmode != fmode || IN6_IS_ADDR_UNSPECIFIED(v6src)) { 3418 mutex_exit(&connp->conn_lock); 3419 l_free(new_filter); 3420 return (EINVAL); 3421 } 3422 if (ilg->ilg_filter == NULL) { 3423 ilg->ilg_filter = l_alloc(); 3424 if (ilg->ilg_filter == NULL) { 3425 mutex_exit(&connp->conn_lock); 3426 l_free(new_filter); 3427 return (ENOMEM); 3428 } 3429 } 3430 if (list_has_addr(ilg->ilg_filter, v6src)) { 3431 mutex_exit(&connp->conn_lock); 3432 l_free(new_filter); 3433 return (EADDRNOTAVAIL); 3434 } 3435 ilgstat = ILGSTAT_CHANGE; 3436 index = ilg->ilg_filter->sl_numsrc++; 3437 ilg->ilg_filter->sl_addr[index] = *v6src; 3438 /* 3439 * The current ill might be different from the one we were 3440 * asked to join on (if failover has occurred); we should 3441 * join on the ill stored in the ilg. The original ill 3442 * is noted in ilg_orig_ifindex, which matched our request. 3443 */ 3444 ill = ilg->ilg_ill; 3445 } 3446 3447 /* 3448 * Save copy of ilg's filter state to pass to other functions, 3449 * so we can release conn_lock now. 3450 */ 3451 new_fmode = ilg->ilg_fmode; 3452 l_copy(ilg->ilg_filter, new_filter); 3453 3454 mutex_exit(&connp->conn_lock); 3455 3456 /* 3457 * Now update the ill. We wait to do this until after the ilg 3458 * has been updated because we need to update the src filter 3459 * info for the ill, which involves looking at the status of 3460 * all the ilgs associated with this group/interface pair. 3461 */ 3462 error = ip_addmulti_v6(v6group, ill, orig_ifindex, connp->conn_zoneid, 3463 ilgstat, new_fmode, new_filter); 3464 if (error != 0) { 3465 /* 3466 * But because we waited, we have to undo the ilg update 3467 * if ip_addmulti_v6() fails. We also must lookup ilg 3468 * again, since we've not been holding conn_lock. 3469 */ 3470 in6_addr_t delsrc = 3471 (ilgstat == ILGSTAT_NEW) ? ipv6_all_zeros : *v6src; 3472 mutex_enter(&connp->conn_lock); 3473 ilg = ilg_lookup_ill_index_v6(connp, v6group, orig_ifindex); 3474 ASSERT(ilg != NULL); 3475 ilg_delete(connp, ilg, &delsrc); 3476 mutex_exit(&connp->conn_lock); 3477 l_free(new_filter); 3478 return (error); 3479 } 3480 3481 l_free(new_filter); 3482 3483 return (0); 3484 } 3485 3486 /* 3487 * Find an IPv4 ilg matching group, ill and source 3488 */ 3489 ilg_t * 3490 ilg_lookup_ill_withsrc(conn_t *connp, ipaddr_t group, ipaddr_t src, ill_t *ill) 3491 { 3492 in6_addr_t v6group, v6src; 3493 int i; 3494 boolean_t isinlist; 3495 ilg_t *ilg; 3496 ipif_t *ipif; 3497 ill_t *ilg_ill; 3498 3499 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3500 3501 /* 3502 * INADDR_ANY is represented as the IPv6 unspecified addr. 3503 */ 3504 if (group == INADDR_ANY) 3505 v6group = ipv6_all_zeros; 3506 else 3507 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 3508 3509 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3510 /* ilg_ipif is NULL for v6; skip them */ 3511 ilg = &connp->conn_ilg[i]; 3512 if ((ipif = ilg->ilg_ipif) == NULL) 3513 continue; 3514 ASSERT(ilg->ilg_ill == NULL); 3515 ilg_ill = ipif->ipif_ill; 3516 ASSERT(!ilg_ill->ill_isv6); 3517 if (ilg_ill == ill && 3518 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, &v6group)) { 3519 if (SLIST_IS_EMPTY(ilg->ilg_filter)) { 3520 /* no source filter, so this is a match */ 3521 return (ilg); 3522 } 3523 break; 3524 } 3525 } 3526 if (i == connp->conn_ilg_inuse) 3527 return (NULL); 3528 3529 /* 3530 * we have an ilg with matching ill and group; but 3531 * the ilg has a source list that we must check. 3532 */ 3533 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 3534 isinlist = B_FALSE; 3535 for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) { 3536 if (IN6_ARE_ADDR_EQUAL(&v6src, &ilg->ilg_filter->sl_addr[i])) { 3537 isinlist = B_TRUE; 3538 break; 3539 } 3540 } 3541 3542 if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) || 3543 (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE)) 3544 return (ilg); 3545 3546 return (NULL); 3547 } 3548 3549 /* 3550 * Find an IPv6 ilg matching group, ill, and source 3551 */ 3552 ilg_t * 3553 ilg_lookup_ill_withsrc_v6(conn_t *connp, const in6_addr_t *v6group, 3554 const in6_addr_t *v6src, ill_t *ill) 3555 { 3556 int i; 3557 boolean_t isinlist; 3558 ilg_t *ilg; 3559 ill_t *ilg_ill; 3560 3561 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3562 3563 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3564 ilg = &connp->conn_ilg[i]; 3565 if ((ilg_ill = ilg->ilg_ill) == NULL) 3566 continue; 3567 ASSERT(ilg->ilg_ipif == NULL); 3568 ASSERT(ilg_ill->ill_isv6); 3569 if (ilg_ill == ill && 3570 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) { 3571 if (SLIST_IS_EMPTY(ilg->ilg_filter)) { 3572 /* no source filter, so this is a match */ 3573 return (ilg); 3574 } 3575 break; 3576 } 3577 } 3578 if (i == connp->conn_ilg_inuse) 3579 return (NULL); 3580 3581 /* 3582 * we have an ilg with matching ill and group; but 3583 * the ilg has a source list that we must check. 3584 */ 3585 isinlist = B_FALSE; 3586 for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) { 3587 if (IN6_ARE_ADDR_EQUAL(v6src, &ilg->ilg_filter->sl_addr[i])) { 3588 isinlist = B_TRUE; 3589 break; 3590 } 3591 } 3592 3593 if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) || 3594 (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE)) 3595 return (ilg); 3596 3597 return (NULL); 3598 } 3599 3600 /* 3601 * Get the ilg whose ilg_orig_ifindex is associated with ifindex. 3602 * This is useful when the interface fails and we have moved 3603 * to a new ill, but still would like to locate using the index 3604 * that we originally used to join. Used only for IPv6 currently. 3605 */ 3606 static ilg_t * 3607 ilg_lookup_ill_index_v6(conn_t *connp, const in6_addr_t *v6group, int ifindex) 3608 { 3609 ilg_t *ilg; 3610 int i; 3611 3612 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3613 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3614 ilg = &connp->conn_ilg[i]; 3615 /* ilg_ill is NULL for V4. Skip them */ 3616 if (ilg->ilg_ill == NULL) 3617 continue; 3618 /* ilg_ipif is NULL for V6 */ 3619 ASSERT(ilg->ilg_ipif == NULL); 3620 ASSERT(ilg->ilg_orig_ifindex != 0); 3621 if (IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group) && 3622 ilg->ilg_orig_ifindex == ifindex) { 3623 return (ilg); 3624 } 3625 } 3626 return (NULL); 3627 } 3628 3629 /* 3630 * Find an IPv6 ilg matching group and ill 3631 */ 3632 ilg_t * 3633 ilg_lookup_ill_v6(conn_t *connp, const in6_addr_t *v6group, ill_t *ill) 3634 { 3635 ilg_t *ilg; 3636 int i; 3637 ill_t *mem_ill; 3638 3639 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3640 3641 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3642 ilg = &connp->conn_ilg[i]; 3643 if ((mem_ill = ilg->ilg_ill) == NULL) 3644 continue; 3645 ASSERT(ilg->ilg_ipif == NULL); 3646 ASSERT(mem_ill->ill_isv6); 3647 if (mem_ill == ill && 3648 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) 3649 return (ilg); 3650 } 3651 return (NULL); 3652 } 3653 3654 /* 3655 * Find an IPv4 ilg matching group and ipif 3656 */ 3657 static ilg_t * 3658 ilg_lookup_ipif(conn_t *connp, ipaddr_t group, ipif_t *ipif) 3659 { 3660 in6_addr_t v6group; 3661 int i; 3662 3663 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3664 ASSERT(!ipif->ipif_ill->ill_isv6); 3665 3666 if (group == INADDR_ANY) 3667 v6group = ipv6_all_zeros; 3668 else 3669 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 3670 3671 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3672 if (IN6_ARE_ADDR_EQUAL(&connp->conn_ilg[i].ilg_v6group, 3673 &v6group) && 3674 connp->conn_ilg[i].ilg_ipif == ipif) 3675 return (&connp->conn_ilg[i]); 3676 } 3677 return (NULL); 3678 } 3679 3680 /* 3681 * If a source address is passed in (src != NULL and src is not 3682 * unspecified), remove the specified src addr from the given ilg's 3683 * filter list, else delete the ilg. 3684 */ 3685 static void 3686 ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src) 3687 { 3688 int i; 3689 3690 ASSERT((ilg->ilg_ipif != NULL) ^ (ilg->ilg_ill != NULL)); 3691 ASSERT(ilg->ilg_ipif == NULL || IAM_WRITER_IPIF(ilg->ilg_ipif)); 3692 ASSERT(ilg->ilg_ill == NULL || IAM_WRITER_ILL(ilg->ilg_ill)); 3693 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3694 ASSERT(!(ilg->ilg_flags & ILG_DELETED)); 3695 3696 if (src == NULL || IN6_IS_ADDR_UNSPECIFIED(src)) { 3697 if (connp->conn_ilg_walker_cnt != 0) { 3698 ilg->ilg_flags |= ILG_DELETED; 3699 return; 3700 } 3701 3702 FREE_SLIST(ilg->ilg_filter); 3703 3704 i = ilg - &connp->conn_ilg[0]; 3705 ASSERT(i >= 0 && i < connp->conn_ilg_inuse); 3706 3707 /* Move other entries up one step */ 3708 connp->conn_ilg_inuse--; 3709 for (; i < connp->conn_ilg_inuse; i++) 3710 connp->conn_ilg[i] = connp->conn_ilg[i+1]; 3711 3712 if (connp->conn_ilg_inuse == 0) { 3713 mi_free((char *)connp->conn_ilg); 3714 connp->conn_ilg = NULL; 3715 cv_broadcast(&connp->conn_refcv); 3716 } 3717 } else { 3718 l_remove(ilg->ilg_filter, src); 3719 } 3720 } 3721 3722 /* 3723 * Called from conn close. No new ilg can be added or removed. 3724 * because CONN_CLOSING has been set by ip_close. ilg_add / ilg_delete 3725 * will return error if conn has started closing. 3726 */ 3727 void 3728 ilg_delete_all(conn_t *connp) 3729 { 3730 int i; 3731 ipif_t *ipif = NULL; 3732 ill_t *ill = NULL; 3733 ilg_t *ilg; 3734 in6_addr_t v6group; 3735 boolean_t success; 3736 ipsq_t *ipsq; 3737 int orig_ifindex; 3738 3739 mutex_enter(&connp->conn_lock); 3740 retry: 3741 ILG_WALKER_HOLD(connp); 3742 for (i = connp->conn_ilg_inuse - 1; i >= 0; ) { 3743 ilg = &connp->conn_ilg[i]; 3744 /* 3745 * Since this walk is not atomic (we drop the 3746 * conn_lock and wait in ipsq_enter) we need 3747 * to check for the ILG_DELETED flag. 3748 */ 3749 if (ilg->ilg_flags & ILG_DELETED) { 3750 /* Go to the next ilg */ 3751 i--; 3752 continue; 3753 } 3754 v6group = ilg->ilg_v6group; 3755 3756 if (IN6_IS_ADDR_V4MAPPED(&v6group)) { 3757 ipif = ilg->ilg_ipif; 3758 ill = ipif->ipif_ill; 3759 } else { 3760 ipif = NULL; 3761 ill = ilg->ilg_ill; 3762 } 3763 /* 3764 * We may not be able to refhold the ill if the ill/ipif 3765 * is changing. But we need to make sure that the ill will 3766 * not vanish. So we just bump up the ill_waiter count. 3767 * If we are unable to do even that, then the ill is closing, 3768 * in which case the unplumb thread will handle the cleanup, 3769 * and we move on to the next ilg. 3770 */ 3771 if (!ill_waiter_inc(ill)) { 3772 /* Go to the next ilg */ 3773 i--; 3774 continue; 3775 } 3776 mutex_exit(&connp->conn_lock); 3777 /* 3778 * To prevent deadlock between ill close which waits inside 3779 * the perimeter, and conn close, ipsq_enter returns error, 3780 * the moment ILL_CONDEMNED is set, in which case ill close 3781 * takes responsibility to cleanup the ilgs. Note that we 3782 * have not yet set condemned flag, otherwise the conn can't 3783 * be refheld for cleanup by those routines and it would be 3784 * a mutual deadlock. 3785 */ 3786 success = ipsq_enter(ill, B_FALSE); 3787 ipsq = ill->ill_phyint->phyint_ipsq; 3788 ill_waiter_dcr(ill); 3789 mutex_enter(&connp->conn_lock); 3790 if (!success) { 3791 /* Go to the next ilg */ 3792 i--; 3793 continue; 3794 } 3795 3796 /* 3797 * Make sure that nothing has changed under. For eg. 3798 * a failover/failback can change ilg_ill while we were 3799 * waiting to become exclusive above 3800 */ 3801 if (IN6_IS_ADDR_V4MAPPED(&v6group)) { 3802 ipif = ilg->ilg_ipif; 3803 ill = ipif->ipif_ill; 3804 } else { 3805 ipif = NULL; 3806 ill = ilg->ilg_ill; 3807 } 3808 if (!IAM_WRITER_ILL(ill) || (ilg->ilg_flags & ILG_DELETED)) { 3809 /* 3810 * The ilg has changed under us probably due 3811 * to a failover or unplumb. Retry on the same ilg. 3812 */ 3813 mutex_exit(&connp->conn_lock); 3814 ipsq_exit(ipsq, B_TRUE, B_TRUE); 3815 mutex_enter(&connp->conn_lock); 3816 continue; 3817 } 3818 v6group = ilg->ilg_v6group; 3819 orig_ifindex = ilg->ilg_orig_ifindex; 3820 ilg_delete(connp, ilg, NULL); 3821 mutex_exit(&connp->conn_lock); 3822 3823 if (ipif != NULL) 3824 (void) ip_delmulti(V4_PART_OF_V6(v6group), ipif, 3825 B_FALSE, B_TRUE); 3826 3827 else 3828 (void) ip_delmulti_v6(&v6group, ill, orig_ifindex, 3829 connp->conn_zoneid, B_FALSE, B_TRUE); 3830 3831 ipsq_exit(ipsq, B_TRUE, B_TRUE); 3832 mutex_enter(&connp->conn_lock); 3833 /* Go to the next ilg */ 3834 i--; 3835 } 3836 ILG_WALKER_RELE(connp); 3837 3838 /* If any ill was skipped above wait and retry */ 3839 if (connp->conn_ilg_inuse != 0) { 3840 cv_wait(&connp->conn_refcv, &connp->conn_lock); 3841 goto retry; 3842 } 3843 mutex_exit(&connp->conn_lock); 3844 } 3845 3846 /* 3847 * Called from ill close by ipcl_walk for clearing conn_ilg and 3848 * conn_multicast_ipif for a given ipif. conn is held by caller. 3849 * Note that ipcl_walk only walks conns that are not yet condemned. 3850 * condemned conns can't be refheld. For this reason, conn must become clean 3851 * first, i.e. it must not refer to any ill/ire/ipif and then only set 3852 * condemned flag. 3853 */ 3854 static void 3855 conn_delete_ipif(conn_t *connp, caddr_t arg) 3856 { 3857 ipif_t *ipif = (ipif_t *)arg; 3858 int i; 3859 char group_buf1[INET6_ADDRSTRLEN]; 3860 char group_buf2[INET6_ADDRSTRLEN]; 3861 ipaddr_t group; 3862 ilg_t *ilg; 3863 3864 /* 3865 * Even though conn_ilg_inuse can change while we are in this loop, 3866 * i.e.ilgs can be created or deleted on this connp, no new ilgs can 3867 * be created or deleted for this connp, on this ill, since this ill 3868 * is the perimeter. So we won't miss any ilg in this cleanup. 3869 */ 3870 mutex_enter(&connp->conn_lock); 3871 3872 /* 3873 * Increment the walker count, so that ilg repacking does not 3874 * occur while we are in the loop. 3875 */ 3876 ILG_WALKER_HOLD(connp); 3877 for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) { 3878 ilg = &connp->conn_ilg[i]; 3879 if (ilg->ilg_ipif != ipif || (ilg->ilg_flags & ILG_DELETED)) 3880 continue; 3881 /* 3882 * ip_close cannot be cleaning this ilg at the same time. 3883 * since it also has to execute in this ill's perimeter which 3884 * we are now holding. Only a clean conn can be condemned. 3885 */ 3886 ASSERT(!(connp->conn_state_flags & CONN_CONDEMNED)); 3887 3888 /* Blow away the membership */ 3889 ip1dbg(("conn_delete_ilg_ipif: %s on %s (%s)\n", 3890 inet_ntop(AF_INET6, &connp->conn_ilg[i].ilg_v6group, 3891 group_buf1, sizeof (group_buf1)), 3892 inet_ntop(AF_INET6, &ipif->ipif_v6lcl_addr, 3893 group_buf2, sizeof (group_buf2)), 3894 ipif->ipif_ill->ill_name)); 3895 3896 /* ilg_ipif is NULL for V6, so we won't be here */ 3897 ASSERT(IN6_IS_ADDR_V4MAPPED(&ilg->ilg_v6group)); 3898 3899 group = V4_PART_OF_V6(ilg->ilg_v6group); 3900 ilg_delete(connp, &connp->conn_ilg[i], NULL); 3901 mutex_exit(&connp->conn_lock); 3902 3903 (void) ip_delmulti(group, ipif, B_FALSE, B_TRUE); 3904 mutex_enter(&connp->conn_lock); 3905 } 3906 3907 /* 3908 * If we are the last walker, need to physically delete the 3909 * ilgs and repack. 3910 */ 3911 ILG_WALKER_RELE(connp); 3912 3913 if (connp->conn_multicast_ipif == ipif) { 3914 /* Revert to late binding */ 3915 connp->conn_multicast_ipif = NULL; 3916 } 3917 mutex_exit(&connp->conn_lock); 3918 3919 conn_delete_ire(connp, (caddr_t)ipif); 3920 } 3921 3922 /* 3923 * Called from ill close by ipcl_walk for clearing conn_ilg and 3924 * conn_multicast_ill for a given ill. conn is held by caller. 3925 * Note that ipcl_walk only walks conns that are not yet condemned. 3926 * condemned conns can't be refheld. For this reason, conn must become clean 3927 * first, i.e. it must not refer to any ill/ire/ipif and then only set 3928 * condemned flag. 3929 */ 3930 static void 3931 conn_delete_ill(conn_t *connp, caddr_t arg) 3932 { 3933 ill_t *ill = (ill_t *)arg; 3934 int i; 3935 char group_buf[INET6_ADDRSTRLEN]; 3936 in6_addr_t v6group; 3937 int orig_ifindex; 3938 ilg_t *ilg; 3939 3940 /* 3941 * Even though conn_ilg_inuse can change while we are in this loop, 3942 * no new ilgs can be created/deleted for this connp, on this 3943 * ill, since this ill is the perimeter. So we won't miss any ilg 3944 * in this cleanup. 3945 */ 3946 mutex_enter(&connp->conn_lock); 3947 3948 /* 3949 * Increment the walker count, so that ilg repacking does not 3950 * occur while we are in the loop. 3951 */ 3952 ILG_WALKER_HOLD(connp); 3953 for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) { 3954 ilg = &connp->conn_ilg[i]; 3955 if ((ilg->ilg_ill == ill) && !(ilg->ilg_flags & ILG_DELETED)) { 3956 /* 3957 * ip_close cannot be cleaning this ilg at the same 3958 * time, since it also has to execute in this ill's 3959 * perimeter which we are now holding. Only a clean 3960 * conn can be condemned. 3961 */ 3962 ASSERT(!(connp->conn_state_flags & CONN_CONDEMNED)); 3963 3964 /* Blow away the membership */ 3965 ip1dbg(("conn_delete_ilg_ill: %s on %s\n", 3966 inet_ntop(AF_INET6, &ilg->ilg_v6group, 3967 group_buf, sizeof (group_buf)), 3968 ill->ill_name)); 3969 3970 v6group = ilg->ilg_v6group; 3971 orig_ifindex = ilg->ilg_orig_ifindex; 3972 ilg_delete(connp, ilg, NULL); 3973 mutex_exit(&connp->conn_lock); 3974 3975 (void) ip_delmulti_v6(&v6group, ill, orig_ifindex, 3976 connp->conn_zoneid, B_FALSE, B_TRUE); 3977 mutex_enter(&connp->conn_lock); 3978 } 3979 } 3980 /* 3981 * If we are the last walker, need to physically delete the 3982 * ilgs and repack. 3983 */ 3984 ILG_WALKER_RELE(connp); 3985 3986 if (connp->conn_multicast_ill == ill) { 3987 /* Revert to late binding */ 3988 connp->conn_multicast_ill = NULL; 3989 connp->conn_orig_multicast_ifindex = 0; 3990 } 3991 mutex_exit(&connp->conn_lock); 3992 } 3993 3994 /* 3995 * Called when an ipif is unplumbed to make sure that there are no 3996 * dangling conn references to that ipif. 3997 * Handles ilg_ipif and conn_multicast_ipif 3998 */ 3999 void 4000 reset_conn_ipif(ipif) 4001 ipif_t *ipif; 4002 { 4003 ipcl_walk(conn_delete_ipif, (caddr_t)ipif); 4004 /* flush the SCTP ire cache for this ipif */ 4005 sctp_ire_cache_flush(ipif); 4006 } 4007 4008 /* 4009 * Called when an ill is unplumbed to make sure that there are no 4010 * dangling conn references to that ill. 4011 * Handles ilg_ill, conn_multicast_ill. 4012 */ 4013 void 4014 reset_conn_ill(ill_t *ill) 4015 { 4016 ipcl_walk(conn_delete_ill, (caddr_t)ill); 4017 } 4018 4019 #ifdef DEBUG 4020 /* 4021 * Walk functions walk all the interfaces in the system to make 4022 * sure that there is no refernece to the ipif or ill that is 4023 * going away. 4024 */ 4025 int 4026 ilm_walk_ill(ill_t *ill) 4027 { 4028 int cnt = 0; 4029 ill_t *till; 4030 ilm_t *ilm; 4031 ill_walk_context_t ctx; 4032 4033 rw_enter(&ill_g_lock, RW_READER); 4034 till = ILL_START_WALK_ALL(&ctx); 4035 for (; till != NULL; till = ill_next(&ctx, till)) { 4036 for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 4037 if (ilm->ilm_ill == ill) { 4038 cnt++; 4039 } 4040 } 4041 } 4042 rw_exit(&ill_g_lock); 4043 4044 return (cnt); 4045 } 4046 4047 /* 4048 * This function is called before the ipif is freed. 4049 */ 4050 int 4051 ilm_walk_ipif(ipif_t *ipif) 4052 { 4053 int cnt = 0; 4054 ill_t *till; 4055 ilm_t *ilm; 4056 ill_walk_context_t ctx; 4057 4058 till = ILL_START_WALK_ALL(&ctx); 4059 for (; till != NULL; till = ill_next(&ctx, till)) { 4060 for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 4061 if (ilm->ilm_ipif == ipif) { 4062 cnt++; 4063 } 4064 } 4065 } 4066 return (cnt); 4067 } 4068 #endif 4069