1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 /* Copyright (c) 1990 Mentat Inc. */ 27 28 #pragma ident "%Z%%M% %I% %E% SMI" 29 30 #include <sys/types.h> 31 #include <sys/stream.h> 32 #include <sys/dlpi.h> 33 #include <sys/stropts.h> 34 #include <sys/strsun.h> 35 #include <sys/strlog.h> 36 #include <sys/ddi.h> 37 #include <sys/cmn_err.h> 38 #include <sys/zone.h> 39 40 #include <sys/param.h> 41 #include <sys/socket.h> 42 #define _SUN_TPI_VERSION 2 43 #include <sys/tihdr.h> 44 #include <net/if.h> 45 #include <net/if_arp.h> 46 #include <sys/sockio.h> 47 #include <sys/systm.h> 48 #include <net/route.h> 49 #include <netinet/in.h> 50 #include <net/if_dl.h> 51 #include <netinet/ip6.h> 52 #include <netinet/icmp6.h> 53 54 #include <inet/common.h> 55 #include <inet/mi.h> 56 #include <inet/nd.h> 57 #include <inet/arp.h> 58 #include <inet/ip.h> 59 #include <inet/ip6.h> 60 #include <inet/ip_if.h> 61 #include <inet/ip_ire.h> 62 #include <inet/ip_ndp.h> 63 #include <inet/ip_multi.h> 64 #include <inet/ipclassifier.h> 65 #include <inet/ipsec_impl.h> 66 #include <inet/sctp_ip.h> 67 #include <inet/ip_listutils.h> 68 69 #include <netinet/igmp.h> 70 71 /* igmpv3/mldv2 source filter manipulation */ 72 static void ilm_bld_flists(conn_t *conn, void *arg); 73 static void ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, 74 slist_t *flist); 75 76 static ilm_t *ilm_add_v6(ipif_t *ipif, const in6_addr_t *group, 77 ilg_stat_t ilgstat, mcast_record_t ilg_fmode, slist_t *ilg_flist, 78 int orig_ifindex, zoneid_t zoneid); 79 static void ilm_delete(ilm_t *ilm); 80 static int ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *group); 81 static int ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *group); 82 static ilg_t *ilg_lookup_ill_index_v6(conn_t *connp, 83 const in6_addr_t *v6group, int index); 84 static ilg_t *ilg_lookup_ipif(conn_t *connp, ipaddr_t group, 85 ipif_t *ipif); 86 static int ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif, 87 mcast_record_t fmode, ipaddr_t src); 88 static int ilg_add_v6(conn_t *connp, const in6_addr_t *group, ill_t *ill, 89 mcast_record_t fmode, const in6_addr_t *v6src); 90 static void ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src); 91 static mblk_t *ill_create_dl(ill_t *ill, uint32_t dl_primitive, 92 uint32_t length, uint32_t *addr_lenp, uint32_t *addr_offp); 93 static mblk_t *ill_create_squery(ill_t *ill, ipaddr_t ipaddr, 94 uint32_t addrlen, uint32_t addroff, mblk_t *mp_tail); 95 static void conn_ilg_reap(conn_t *connp); 96 static int ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group, 97 ipif_t *ipif, mcast_record_t fmode, ipaddr_t src); 98 static int ip_opt_delete_group_excl_v6(conn_t *connp, 99 const in6_addr_t *v6group, ill_t *ill, mcast_record_t fmode, 100 const in6_addr_t *v6src); 101 102 /* 103 * MT notes: 104 * 105 * Multicast joins operate on both the ilg and ilm structures. Multiple 106 * threads operating on an conn (socket) trying to do multicast joins 107 * need to synchronize when operating on the ilg. Multiple threads 108 * potentially operating on different conn (socket endpoints) trying to 109 * do multicast joins could eventually end up trying to manipulate the 110 * ilm simulatenously and need to synchronize on the access to the ilm. 111 * Both are amenable to standard Solaris MT techniques, but it would be 112 * complex to handle a failover or failback which needs to manipulate 113 * ilg/ilms if an applications can also simultaenously join/leave 114 * multicast groups. Hence multicast join/leave also go through the ipsq_t 115 * serialization. 116 * 117 * Multicast joins and leaves are single-threaded per phyint/IPMP group 118 * using the ipsq serialization mechanism. 119 * 120 * An ilm is an IP data structure used to track multicast join/leave. 121 * An ilm is associated with a <multicast group, ipif> tuple in IPv4 and 122 * with just <multicast group> in IPv6. ilm_refcnt is the number of ilg's 123 * referencing the ilm. ilms are created / destroyed only as writer. ilms 124 * are not passed around, instead they are looked up and used under the 125 * ill_lock or as writer. So we don't need a dynamic refcount of the number 126 * of threads holding reference to an ilm. 127 * 128 * Multicast Join operation: 129 * 130 * The first step is to determine the ipif (v4) or ill (v6) on which 131 * the join operation is to be done. The join is done after becoming 132 * exclusive on the ipsq associated with the ipif or ill. The conn->conn_ilg 133 * and ill->ill_ilm are thus accessed and modified exclusively per ill. 134 * Multiple threads can attempt to join simultaneously on different ipif/ill 135 * on the same conn. In this case the ipsq serialization does not help in 136 * protecting the ilg. It is the conn_lock that is used to protect the ilg. 137 * The conn_lock also protects all the ilg_t members. 138 * 139 * Leave operation. 140 * 141 * Similar to the join operation, the first step is to determine the ipif 142 * or ill (v6) on which the leave operation is to be done. The leave operation 143 * is done after becoming exclusive on the ipsq associated with the ipif or ill. 144 * As with join ilg modification is done under the protection of the conn lock. 145 */ 146 147 #define IPSQ_ENTER_IPIF(ipif, connp, first_mp, func, ipsq, type) \ 148 ASSERT(connp != NULL); \ 149 (ipsq) = ipsq_try_enter((ipif), NULL, CONNP_TO_WQ(connp), \ 150 (first_mp), (func), (type), B_TRUE); \ 151 if ((ipsq) == NULL) { \ 152 ipif_refrele(ipif); \ 153 return (EINPROGRESS); \ 154 } 155 156 #define IPSQ_ENTER_ILL(ill, connp, first_mp, func, ipsq, type) \ 157 ASSERT(connp != NULL); \ 158 (ipsq) = ipsq_try_enter(NULL, ill, CONNP_TO_WQ(connp), \ 159 (first_mp), (func), (type), B_TRUE); \ 160 if ((ipsq) == NULL) { \ 161 ill_refrele(ill); \ 162 return (EINPROGRESS); \ 163 } 164 165 #define IPSQ_EXIT(ipsq) \ 166 if (ipsq != NULL) \ 167 ipsq_exit(ipsq, B_TRUE, B_TRUE); 168 169 #define ILG_WALKER_HOLD(connp) (connp)->conn_ilg_walker_cnt++ 170 171 #define ILG_WALKER_RELE(connp) \ 172 { \ 173 (connp)->conn_ilg_walker_cnt--; \ 174 if ((connp)->conn_ilg_walker_cnt == 0) \ 175 conn_ilg_reap(connp); \ 176 } 177 178 static void 179 conn_ilg_reap(conn_t *connp) 180 { 181 int to; 182 int from; 183 184 ASSERT(MUTEX_HELD(&connp->conn_lock)); 185 186 to = 0; 187 from = 0; 188 while (from < connp->conn_ilg_inuse) { 189 if (connp->conn_ilg[from].ilg_flags & ILG_DELETED) { 190 FREE_SLIST(connp->conn_ilg[from].ilg_filter); 191 from++; 192 continue; 193 } 194 if (to != from) 195 connp->conn_ilg[to] = connp->conn_ilg[from]; 196 to++; 197 from++; 198 } 199 200 connp->conn_ilg_inuse = to; 201 202 if (connp->conn_ilg_inuse == 0) { 203 mi_free((char *)connp->conn_ilg); 204 connp->conn_ilg = NULL; 205 cv_broadcast(&connp->conn_refcv); 206 } 207 } 208 209 #define GETSTRUCT(structure, number) \ 210 ((structure *)mi_zalloc(sizeof (structure) * (number))) 211 212 #define ILG_ALLOC_CHUNK 16 213 214 /* 215 * Returns a pointer to the next available ilg in conn_ilg. Allocs more 216 * buffers in size of ILG_ALLOC_CHUNK ilgs when needed, and updates conn's 217 * ilg tracking fields appropriately (conn_ilg_inuse reflects usage of the 218 * returned ilg). Returns NULL on failure (ENOMEM). 219 * 220 * Assumes connp->conn_lock is held. 221 */ 222 static ilg_t * 223 conn_ilg_alloc(conn_t *connp) 224 { 225 ilg_t *new; 226 int curcnt; 227 228 ASSERT(MUTEX_HELD(&connp->conn_lock)); 229 ASSERT(connp->conn_ilg_inuse <= connp->conn_ilg_allocated); 230 231 if (connp->conn_ilg == NULL) { 232 connp->conn_ilg = GETSTRUCT(ilg_t, ILG_ALLOC_CHUNK); 233 if (connp->conn_ilg == NULL) 234 return (NULL); 235 connp->conn_ilg_allocated = ILG_ALLOC_CHUNK; 236 connp->conn_ilg_inuse = 0; 237 } 238 if (connp->conn_ilg_inuse == connp->conn_ilg_allocated) { 239 curcnt = connp->conn_ilg_allocated; 240 new = GETSTRUCT(ilg_t, curcnt + ILG_ALLOC_CHUNK); 241 if (new == NULL) 242 return (NULL); 243 bcopy(connp->conn_ilg, new, sizeof (ilg_t) * curcnt); 244 mi_free((char *)connp->conn_ilg); 245 connp->conn_ilg = new; 246 connp->conn_ilg_allocated += ILG_ALLOC_CHUNK; 247 } 248 249 return (&connp->conn_ilg[connp->conn_ilg_inuse++]); 250 } 251 252 typedef struct ilm_fbld_s { 253 ilm_t *fbld_ilm; 254 int fbld_in_cnt; 255 int fbld_ex_cnt; 256 slist_t fbld_in; 257 slist_t fbld_ex; 258 boolean_t fbld_in_overflow; 259 } ilm_fbld_t; 260 261 static void 262 ilm_bld_flists(conn_t *conn, void *arg) 263 { 264 int i; 265 ilm_fbld_t *fbld = (ilm_fbld_t *)(arg); 266 ilm_t *ilm = fbld->fbld_ilm; 267 in6_addr_t *v6group = &ilm->ilm_v6addr; 268 269 if (conn->conn_ilg_inuse == 0) 270 return; 271 272 /* 273 * Since we can't break out of the ipcl_walk once started, we still 274 * have to look at every conn. But if we've already found one 275 * (EXCLUDE, NULL) list, there's no need to keep checking individual 276 * ilgs--that will be our state. 277 */ 278 if (fbld->fbld_ex_cnt > 0 && fbld->fbld_ex.sl_numsrc == 0) 279 return; 280 281 /* 282 * Check this conn's ilgs to see if any are interested in our 283 * ilm (group, interface match). If so, update the master 284 * include and exclude lists we're building in the fbld struct 285 * with this ilg's filter info. 286 */ 287 mutex_enter(&conn->conn_lock); 288 for (i = 0; i < conn->conn_ilg_inuse; i++) { 289 ilg_t *ilg = &conn->conn_ilg[i]; 290 if ((ilg->ilg_ill == ilm->ilm_ill) && 291 (ilg->ilg_ipif == ilm->ilm_ipif) && 292 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) { 293 if (ilg->ilg_fmode == MODE_IS_INCLUDE) { 294 fbld->fbld_in_cnt++; 295 if (!fbld->fbld_in_overflow) 296 l_union_in_a(&fbld->fbld_in, 297 ilg->ilg_filter, 298 &fbld->fbld_in_overflow); 299 } else { 300 fbld->fbld_ex_cnt++; 301 /* 302 * On the first exclude list, don't try to do 303 * an intersection, as the master exclude list 304 * is intentionally empty. If the master list 305 * is still empty on later iterations, that 306 * means we have at least one ilg with an empty 307 * exclude list, so that should be reflected 308 * when we take the intersection. 309 */ 310 if (fbld->fbld_ex_cnt == 1) { 311 if (ilg->ilg_filter != NULL) 312 l_copy(ilg->ilg_filter, 313 &fbld->fbld_ex); 314 } else { 315 l_intersection_in_a(&fbld->fbld_ex, 316 ilg->ilg_filter); 317 } 318 } 319 /* there will only be one match, so break now. */ 320 break; 321 } 322 } 323 mutex_exit(&conn->conn_lock); 324 } 325 326 static void 327 ilm_gen_filter(ilm_t *ilm, mcast_record_t *fmode, slist_t *flist) 328 { 329 ilm_fbld_t fbld; 330 331 fbld.fbld_ilm = ilm; 332 fbld.fbld_in_cnt = fbld.fbld_ex_cnt = 0; 333 fbld.fbld_in.sl_numsrc = fbld.fbld_ex.sl_numsrc = 0; 334 fbld.fbld_in_overflow = B_FALSE; 335 336 /* first, construct our master include and exclude lists */ 337 ipcl_walk(ilm_bld_flists, (caddr_t)&fbld); 338 339 /* now use those master lists to generate the interface filter */ 340 341 /* if include list overflowed, filter is (EXCLUDE, NULL) */ 342 if (fbld.fbld_in_overflow) { 343 *fmode = MODE_IS_EXCLUDE; 344 flist->sl_numsrc = 0; 345 return; 346 } 347 348 /* if nobody interested, interface filter is (INCLUDE, NULL) */ 349 if (fbld.fbld_in_cnt == 0 && fbld.fbld_ex_cnt == 0) { 350 *fmode = MODE_IS_INCLUDE; 351 flist->sl_numsrc = 0; 352 return; 353 } 354 355 /* 356 * If there are no exclude lists, then the interface filter 357 * is INCLUDE, with its filter list equal to fbld_in. A single 358 * exclude list makes the interface filter EXCLUDE, with its 359 * filter list equal to (fbld_ex - fbld_in). 360 */ 361 if (fbld.fbld_ex_cnt == 0) { 362 *fmode = MODE_IS_INCLUDE; 363 l_copy(&fbld.fbld_in, flist); 364 } else { 365 *fmode = MODE_IS_EXCLUDE; 366 l_difference(&fbld.fbld_ex, &fbld.fbld_in, flist); 367 } 368 } 369 370 /* 371 * If the given interface has failed, choose a new one to join on so 372 * that we continue to receive packets. ilg_orig_ifindex remembers 373 * what the application used to join on so that we know the ilg to 374 * delete even though we change the ill here. Callers will store the 375 * ilg returned from this function in ilg_ill. Thus when we receive 376 * a packet on ilg_ill, conn_wantpacket_v6 will deliver the packets. 377 * 378 * This function must be called as writer so we can walk the group 379 * list and examine flags without holding a lock. 380 */ 381 ill_t * 382 ip_choose_multi_ill(ill_t *ill, const in6_addr_t *grp) 383 { 384 ill_t *till; 385 ill_group_t *illgrp = ill->ill_group; 386 387 ASSERT(IAM_WRITER_ILL(ill)); 388 389 if (IN6_IS_ADDR_UNSPECIFIED(grp) || illgrp == NULL) 390 return (ill); 391 392 if ((ill->ill_phyint->phyint_flags & (PHYI_FAILED|PHYI_INACTIVE)) == 0) 393 return (ill); 394 395 till = illgrp->illgrp_ill; 396 while (till != NULL && 397 (till->ill_phyint->phyint_flags & (PHYI_FAILED|PHYI_INACTIVE))) { 398 till = till->ill_group_next; 399 } 400 if (till != NULL) 401 return (till); 402 403 return (ill); 404 } 405 406 static int 407 ilm_update_add(ilm_t *ilm, ilg_stat_t ilgstat, slist_t *ilg_flist, 408 boolean_t isv6) 409 { 410 mcast_record_t fmode; 411 slist_t *flist; 412 boolean_t fdefault; 413 char buf[INET6_ADDRSTRLEN]; 414 ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill; 415 416 /* 417 * There are several cases where the ilm's filter state 418 * defaults to (EXCLUDE, NULL): 419 * - we've had previous joins without associated ilgs 420 * - this join has no associated ilg 421 * - the ilg's filter state is (EXCLUDE, NULL) 422 */ 423 fdefault = (ilm->ilm_no_ilg_cnt > 0) || 424 (ilgstat == ILGSTAT_NONE) || SLIST_IS_EMPTY(ilg_flist); 425 426 /* attempt mallocs (if needed) before doing anything else */ 427 if ((flist = l_alloc()) == NULL) 428 return (ENOMEM); 429 if (!fdefault && ilm->ilm_filter == NULL) { 430 ilm->ilm_filter = l_alloc(); 431 if (ilm->ilm_filter == NULL) { 432 l_free(flist); 433 return (ENOMEM); 434 } 435 } 436 437 if (ilgstat != ILGSTAT_CHANGE) 438 ilm->ilm_refcnt++; 439 440 if (ilgstat == ILGSTAT_NONE) 441 ilm->ilm_no_ilg_cnt++; 442 443 /* 444 * Determine new filter state. If it's not the default 445 * (EXCLUDE, NULL), we must walk the conn list to find 446 * any ilgs interested in this group, and re-build the 447 * ilm filter. 448 */ 449 if (fdefault) { 450 fmode = MODE_IS_EXCLUDE; 451 flist->sl_numsrc = 0; 452 } else { 453 ilm_gen_filter(ilm, &fmode, flist); 454 } 455 456 /* make sure state actually changed; nothing to do if not. */ 457 if ((ilm->ilm_fmode == fmode) && 458 !lists_are_different(ilm->ilm_filter, flist)) { 459 l_free(flist); 460 return (0); 461 } 462 463 /* send the state change report */ 464 if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0) { 465 if (isv6) 466 mld_statechange(ilm, fmode, flist); 467 else 468 igmp_statechange(ilm, fmode, flist); 469 } 470 471 /* update the ilm state */ 472 ilm->ilm_fmode = fmode; 473 if (flist->sl_numsrc > 0) 474 l_copy(flist, ilm->ilm_filter); 475 else 476 CLEAR_SLIST(ilm->ilm_filter); 477 478 ip1dbg(("ilm_update: new if filter mode %d, group %s\n", ilm->ilm_fmode, 479 inet_ntop(AF_INET6, &ilm->ilm_v6addr, buf, sizeof (buf)))); 480 481 l_free(flist); 482 return (0); 483 } 484 485 static int 486 ilm_update_del(ilm_t *ilm, boolean_t isv6) 487 { 488 mcast_record_t fmode; 489 slist_t *flist; 490 ill_t *ill = isv6 ? ilm->ilm_ill : ilm->ilm_ipif->ipif_ill; 491 492 ip1dbg(("ilm_update_del: still %d left; updating state\n", 493 ilm->ilm_refcnt)); 494 495 if ((flist = l_alloc()) == NULL) 496 return (ENOMEM); 497 498 /* 499 * If present, the ilg in question has already either been 500 * updated or removed from our list; so all we need to do 501 * now is walk the list to update the ilm filter state. 502 * 503 * Skip the list walk if we have any no-ilg joins, which 504 * cause the filter state to revert to (EXCLUDE, NULL). 505 */ 506 if (ilm->ilm_no_ilg_cnt != 0) { 507 fmode = MODE_IS_EXCLUDE; 508 flist->sl_numsrc = 0; 509 } else { 510 ilm_gen_filter(ilm, &fmode, flist); 511 } 512 513 /* check to see if state needs to be updated */ 514 if ((ilm->ilm_fmode == fmode) && 515 (!lists_are_different(ilm->ilm_filter, flist))) { 516 l_free(flist); 517 return (0); 518 } 519 520 if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0) { 521 if (isv6) 522 mld_statechange(ilm, fmode, flist); 523 else 524 igmp_statechange(ilm, fmode, flist); 525 } 526 527 ilm->ilm_fmode = fmode; 528 if (flist->sl_numsrc > 0) { 529 if (ilm->ilm_filter == NULL) { 530 ilm->ilm_filter = l_alloc(); 531 if (ilm->ilm_filter == NULL) { 532 char buf[INET6_ADDRSTRLEN]; 533 ip1dbg(("ilm_update_del: failed to alloc ilm " 534 "filter; no source filtering for %s on %s", 535 inet_ntop(AF_INET6, &ilm->ilm_v6addr, 536 buf, sizeof (buf)), ill->ill_name)); 537 ilm->ilm_fmode = MODE_IS_EXCLUDE; 538 l_free(flist); 539 return (0); 540 } 541 } 542 l_copy(flist, ilm->ilm_filter); 543 } else { 544 CLEAR_SLIST(ilm->ilm_filter); 545 } 546 547 l_free(flist); 548 return (0); 549 } 550 551 /* 552 * INADDR_ANY means all multicast addresses. This is only used 553 * by the multicast router. 554 * INADDR_ANY is stored as IPv6 unspecified addr. 555 */ 556 int 557 ip_addmulti(ipaddr_t group, ipif_t *ipif, ilg_stat_t ilgstat, 558 mcast_record_t ilg_fmode, slist_t *ilg_flist) 559 { 560 ill_t *ill = ipif->ipif_ill; 561 ilm_t *ilm; 562 in6_addr_t v6group; 563 int ret; 564 565 ASSERT(IAM_WRITER_IPIF(ipif)); 566 567 if (!CLASSD(group) && group != INADDR_ANY) 568 return (EINVAL); 569 570 /* 571 * INADDR_ANY is represented as the IPv6 unspecifed addr. 572 */ 573 if (group == INADDR_ANY) 574 v6group = ipv6_all_zeros; 575 else 576 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 577 578 ilm = ilm_lookup_ipif(ipif, group); 579 if (ilm != NULL) 580 return (ilm_update_add(ilm, ilgstat, ilg_flist, B_FALSE)); 581 582 /* 583 * ilms are associated with ipifs in IPv4. It moves with the 584 * ipif if the ipif moves to a new ill when the interface 585 * fails. Thus we really don't check whether the ipif_ill 586 * has failed like in IPv6. If it has FAILED the ipif 587 * will move (daemon will move it) and hence the ilm, if the 588 * ipif is not IPIF_NOFAILOVER. For the IPIF_NOFAILOVER ipifs, 589 * we continue to receive in the same place even if the 590 * interface fails. 591 */ 592 ilm = ilm_add_v6(ipif, &v6group, ilgstat, ilg_fmode, ilg_flist, 593 ill->ill_phyint->phyint_ifindex, ipif->ipif_zoneid); 594 if (ilm == NULL) 595 return (ENOMEM); 596 597 if (group == INADDR_ANY) { 598 /* 599 * Check how many ipif's have members in this group - 600 * if more then one we should not tell the driver to join 601 * this time 602 */ 603 if (ilm_numentries_v6(ill, &v6group) > 1) 604 return (0); 605 if (ill->ill_group == NULL) 606 ret = ip_join_allmulti(ipif); 607 else 608 ret = ill_nominate_mcast_rcv(ill->ill_group); 609 if (ret != 0) 610 ilm_delete(ilm); 611 return (ret); 612 } 613 614 if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0) 615 igmp_joingroup(ilm); 616 617 if (ilm_numentries_v6(ill, &v6group) > 1) 618 return (0); 619 620 ret = ip_ll_addmulti_v6(ipif, &v6group); 621 if (ret != 0) 622 ilm_delete(ilm); 623 return (ret); 624 } 625 626 /* 627 * The unspecified address means all multicast addresses. 628 * This is only used by the multicast router. 629 * 630 * ill identifies the interface to join on; it may not match the 631 * interface requested by the application of a failover has taken 632 * place. orig_ifindex always identifies the interface requested 633 * by the app. 634 * 635 * ilgstat tells us if there's an ilg associated with this join, 636 * and if so, if it's a new ilg or a change to an existing one. 637 * ilg_fmode and ilg_flist give us the current filter state of 638 * the ilg (and will be EXCLUDE {NULL} in the case of no ilg). 639 */ 640 int 641 ip_addmulti_v6(const in6_addr_t *v6group, ill_t *ill, int orig_ifindex, 642 zoneid_t zoneid, ilg_stat_t ilgstat, mcast_record_t ilg_fmode, 643 slist_t *ilg_flist) 644 { 645 ilm_t *ilm; 646 int ret; 647 648 ASSERT(IAM_WRITER_ILL(ill)); 649 650 if (!IN6_IS_ADDR_MULTICAST(v6group) && 651 !IN6_IS_ADDR_UNSPECIFIED(v6group)) { 652 return (EINVAL); 653 } 654 655 /* 656 * An ilm is uniquely identified by the tuple of (group, ill, 657 * orig_ill). group is the multicast group address, ill is 658 * the interface on which it is currently joined, and orig_ill 659 * is the interface on which the application requested the 660 * join. orig_ill and ill are the same unless orig_ill has 661 * failed over. 662 * 663 * Both orig_ill and ill are required, which means we may have 664 * 2 ilms on an ill for the same group, but with different 665 * orig_ills. These must be kept separate, so that when failback 666 * occurs, the appropriate ilms are moved back to their orig_ill 667 * without disrupting memberships on the ill to which they had 668 * been moved. 669 * 670 * In order to track orig_ill, we store orig_ifindex in the 671 * ilm and ilg. 672 */ 673 ilm = ilm_lookup_ill_index_v6(ill, v6group, orig_ifindex, zoneid); 674 if (ilm != NULL) 675 return (ilm_update_add(ilm, ilgstat, ilg_flist, B_TRUE)); 676 677 /* 678 * We need to remember where the application really wanted 679 * to join. This will be used later if we want to failback 680 * to the original interface. 681 */ 682 ilm = ilm_add_v6(ill->ill_ipif, v6group, ilgstat, ilg_fmode, 683 ilg_flist, orig_ifindex, zoneid); 684 if (ilm == NULL) 685 return (ENOMEM); 686 687 if (IN6_IS_ADDR_UNSPECIFIED(v6group)) { 688 /* 689 * Check how many ipif's that have members in this group - 690 * if more then one we should not tell the driver to join 691 * this time 692 */ 693 if (ilm_numentries_v6(ill, v6group) > 1) 694 return (0); 695 if (ill->ill_group == NULL) 696 ret = ip_join_allmulti(ill->ill_ipif); 697 else 698 ret = ill_nominate_mcast_rcv(ill->ill_group); 699 700 if (ret != 0) 701 ilm_delete(ilm); 702 return (ret); 703 } 704 705 if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0) 706 mld_joingroup(ilm); 707 708 /* 709 * If we have more then one we should not tell the driver 710 * to join this time. 711 */ 712 if (ilm_numentries_v6(ill, v6group) > 1) 713 return (0); 714 715 ret = ip_ll_addmulti_v6(ill->ill_ipif, v6group); 716 if (ret != 0) 717 ilm_delete(ilm); 718 return (ret); 719 } 720 721 /* 722 * Send a multicast request to the driver for enabling multicast reception 723 * for v6groupp address. The caller has already checked whether it is 724 * appropriate to send one or not. 725 */ 726 int 727 ip_ll_send_enabmulti_req(ill_t *ill, const in6_addr_t *v6groupp) 728 { 729 mblk_t *mp; 730 uint32_t addrlen, addroff; 731 char group_buf[INET6_ADDRSTRLEN]; 732 733 ASSERT(IAM_WRITER_ILL(ill)); 734 735 /* 736 * Create a AR_ENTRY_SQUERY message with a dl_enabmulti_req tacked 737 * on. 738 */ 739 mp = ill_create_dl(ill, DL_ENABMULTI_REQ, sizeof (dl_enabmulti_req_t), 740 &addrlen, &addroff); 741 if (!mp) 742 return (ENOMEM); 743 if (IN6_IS_ADDR_V4MAPPED(v6groupp)) { 744 ipaddr_t v4group; 745 746 IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group); 747 /* 748 * NOTE!!! 749 * The "addroff" passed in here was calculated by 750 * ill_create_dl(), and will be used by ill_create_squery() 751 * to perform some twisted coding magic. It is the offset 752 * into the dl_xxx_req of the hw addr. Here, it will be 753 * added to b_wptr - b_rptr to create a magic number that 754 * is not an offset into this squery mblk. 755 * The actual hardware address will be accessed only in the 756 * dl_xxx_req, not in the squery. More importantly, 757 * that hardware address can *only* be accessed in this 758 * mblk chain by calling mi_offset_param_c(), which uses 759 * the magic number in the squery hw offset field to go 760 * to the *next* mblk (the dl_xxx_req), subtract the 761 * (b_wptr - b_rptr), and find the actual offset into 762 * the dl_xxx_req. 763 * Any method that depends on using the 764 * offset field in the dl_disabmulti_req or squery 765 * to find either hardware address will similarly fail. 766 * 767 * Look in ar_entry_squery() in arp.c to see how this offset 768 * is used. 769 */ 770 mp = ill_create_squery(ill, v4group, addrlen, addroff, mp); 771 if (!mp) 772 return (ENOMEM); 773 ip1dbg(("ip_ll_send_enabmulti_req: IPv4 putnext %s on %s\n", 774 inet_ntop(AF_INET6, v6groupp, group_buf, 775 sizeof (group_buf)), 776 ill->ill_name)); 777 putnext(ill->ill_rq, mp); 778 } else { 779 ip1dbg(("ip_ll_send_enabmulti_req: IPv6 ndp_squery_mp %s on" 780 " %s\n", 781 inet_ntop(AF_INET6, v6groupp, group_buf, 782 sizeof (group_buf)), 783 ill->ill_name)); 784 return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp)); 785 } 786 return (0); 787 } 788 789 /* 790 * Send a multicast request to the driver for enabling multicast 791 * membership for v6group if appropriate. 792 */ 793 static int 794 ip_ll_addmulti_v6(ipif_t *ipif, const in6_addr_t *v6groupp) 795 { 796 ill_t *ill = ipif->ipif_ill; 797 798 ASSERT(IAM_WRITER_IPIF(ipif)); 799 800 if (ill->ill_net_type != IRE_IF_RESOLVER || 801 ipif->ipif_flags & IPIF_POINTOPOINT) { 802 ip1dbg(("ip_ll_addmulti_v6: not resolver\n")); 803 return (0); /* Must be IRE_IF_NORESOLVER */ 804 } 805 806 if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) { 807 ip1dbg(("ip_ll_addmulti_v6: MULTI_BCAST\n")); 808 return (0); 809 } 810 if (ill->ill_ipif_up_count == 0) { 811 /* 812 * Nobody there. All multicast addresses will be re-joined 813 * when we get the DL_BIND_ACK bringing the interface up. 814 */ 815 ip1dbg(("ip_ll_addmulti_v6: nobody up\n")); 816 return (0); 817 } 818 return (ip_ll_send_enabmulti_req(ill, v6groupp)); 819 } 820 821 /* 822 * INADDR_ANY means all multicast addresses. This is only used 823 * by the multicast router. 824 * INADDR_ANY is stored as the IPv6 unspecifed addr. 825 */ 826 int 827 ip_delmulti(ipaddr_t group, ipif_t *ipif, boolean_t no_ilg, boolean_t leaving) 828 { 829 ill_t *ill = ipif->ipif_ill; 830 ilm_t *ilm; 831 in6_addr_t v6group; 832 int ret; 833 834 ASSERT(IAM_WRITER_IPIF(ipif)); 835 836 if (!CLASSD(group) && group != INADDR_ANY) 837 return (EINVAL); 838 839 /* 840 * INADDR_ANY is represented as the IPv6 unspecifed addr. 841 */ 842 if (group == INADDR_ANY) 843 v6group = ipv6_all_zeros; 844 else 845 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 846 847 /* 848 * Look for a match on the ipif. 849 * (IP_DROP_MEMBERSHIP specifies an ipif using an IP address). 850 */ 851 ilm = ilm_lookup_ipif(ipif, group); 852 if (ilm == NULL) 853 return (ENOENT); 854 855 /* Update counters */ 856 if (no_ilg) 857 ilm->ilm_no_ilg_cnt--; 858 859 if (leaving) 860 ilm->ilm_refcnt--; 861 862 if (ilm->ilm_refcnt > 0) 863 return (ilm_update_del(ilm, B_FALSE)); 864 865 if (group == INADDR_ANY) { 866 ilm_delete(ilm); 867 /* 868 * Check how many ipif's that have members in this group - 869 * if there are still some left then don't tell the driver 870 * to drop it. 871 */ 872 if (ilm_numentries_v6(ill, &v6group) != 0) 873 return (0); 874 875 /* 876 * If we never joined, then don't leave. This can happen 877 * if we're in an IPMP group, since only one ill per IPMP 878 * group receives all multicast packets. 879 */ 880 if (!ill->ill_join_allmulti) { 881 ASSERT(ill->ill_group != NULL); 882 return (0); 883 } 884 885 ret = ip_leave_allmulti(ipif); 886 if (ill->ill_group != NULL) 887 (void) ill_nominate_mcast_rcv(ill->ill_group); 888 return (ret); 889 } 890 891 if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0) 892 igmp_leavegroup(ilm); 893 894 ilm_delete(ilm); 895 /* 896 * Check how many ipif's that have members in this group - 897 * if there are still some left then don't tell the driver 898 * to drop it. 899 */ 900 if (ilm_numentries_v6(ill, &v6group) != 0) 901 return (0); 902 return (ip_ll_delmulti_v6(ipif, &v6group)); 903 } 904 905 /* 906 * The unspecified address means all multicast addresses. 907 * This is only used by the multicast router. 908 */ 909 int 910 ip_delmulti_v6(const in6_addr_t *v6group, ill_t *ill, int orig_ifindex, 911 zoneid_t zoneid, boolean_t no_ilg, boolean_t leaving) 912 { 913 ipif_t *ipif; 914 ilm_t *ilm; 915 int ret; 916 917 ASSERT(IAM_WRITER_ILL(ill)); 918 919 if (!IN6_IS_ADDR_MULTICAST(v6group) && 920 !IN6_IS_ADDR_UNSPECIFIED(v6group)) 921 return (EINVAL); 922 923 /* 924 * Look for a match on the ill. 925 * (IPV6_LEAVE_GROUP specifies an ill using an ifindex). 926 * 927 * Similar to ip_addmulti_v6, we should always look using 928 * the orig_ifindex. 929 * 930 * 1) If orig_ifindex is different from ill's ifindex 931 * we should have an ilm with orig_ifindex created in 932 * ip_addmulti_v6. We should delete that here. 933 * 934 * 2) If orig_ifindex is same as ill's ifindex, we should 935 * not delete the ilm that is temporarily here because of 936 * a FAILOVER. Those ilms will have a ilm_orig_ifindex 937 * different from ill's ifindex. 938 * 939 * Thus, always lookup using orig_ifindex. 940 */ 941 ilm = ilm_lookup_ill_index_v6(ill, v6group, orig_ifindex, zoneid); 942 if (ilm == NULL) 943 return (ENOENT); 944 945 ASSERT(ilm->ilm_ill == ill); 946 947 ipif = ill->ill_ipif; 948 949 /* Update counters */ 950 if (no_ilg) 951 ilm->ilm_no_ilg_cnt--; 952 953 if (leaving) 954 ilm->ilm_refcnt--; 955 956 if (ilm->ilm_refcnt > 0) 957 return (ilm_update_del(ilm, B_TRUE)); 958 959 if (IN6_IS_ADDR_UNSPECIFIED(v6group)) { 960 ilm_delete(ilm); 961 /* 962 * Check how many ipif's that have members in this group - 963 * if there are still some left then don't tell the driver 964 * to drop it. 965 */ 966 if (ilm_numentries_v6(ill, v6group) != 0) 967 return (0); 968 969 /* 970 * If we never joined, then don't leave. This can happen 971 * if we're in an IPMP group, since only one ill per IPMP 972 * group receives all multicast packets. 973 */ 974 if (!ill->ill_join_allmulti) { 975 ASSERT(ill->ill_group != NULL); 976 return (0); 977 } 978 979 ret = ip_leave_allmulti(ipif); 980 if (ill->ill_group != NULL) 981 (void) ill_nominate_mcast_rcv(ill->ill_group); 982 return (ret); 983 } 984 985 if ((ill->ill_phyint->phyint_flags & PHYI_LOOPBACK) == 0) 986 mld_leavegroup(ilm); 987 988 ilm_delete(ilm); 989 /* 990 * Check how many ipif's that have members in this group - 991 * if there are still some left then don't tell the driver 992 * to drop it. 993 */ 994 if (ilm_numentries_v6(ill, v6group) != 0) 995 return (0); 996 return (ip_ll_delmulti_v6(ipif, v6group)); 997 } 998 999 /* 1000 * Send a multicast request to the driver for disabling multicast reception 1001 * for v6groupp address. The caller has already checked whether it is 1002 * appropriate to send one or not. 1003 */ 1004 int 1005 ip_ll_send_disabmulti_req(ill_t *ill, const in6_addr_t *v6groupp) 1006 { 1007 mblk_t *mp; 1008 char group_buf[INET6_ADDRSTRLEN]; 1009 uint32_t addrlen, addroff; 1010 1011 ASSERT(IAM_WRITER_ILL(ill)); 1012 /* 1013 * Create a AR_ENTRY_SQUERY message with a dl_disabmulti_req tacked 1014 * on. 1015 */ 1016 mp = ill_create_dl(ill, DL_DISABMULTI_REQ, 1017 sizeof (dl_disabmulti_req_t), &addrlen, &addroff); 1018 1019 if (!mp) 1020 return (ENOMEM); 1021 1022 if (IN6_IS_ADDR_V4MAPPED(v6groupp)) { 1023 ipaddr_t v4group; 1024 1025 IN6_V4MAPPED_TO_IPADDR(v6groupp, v4group); 1026 /* 1027 * NOTE!!! 1028 * The "addroff" passed in here was calculated by 1029 * ill_create_dl(), and will be used by ill_create_squery() 1030 * to perform some twisted coding magic. It is the offset 1031 * into the dl_xxx_req of the hw addr. Here, it will be 1032 * added to b_wptr - b_rptr to create a magic number that 1033 * is not an offset into this mblk. 1034 * 1035 * Please see the comment in ip_ll_send)enabmulti_req() 1036 * for a complete explanation. 1037 * 1038 * Look in ar_entry_squery() in arp.c to see how this offset 1039 * is used. 1040 */ 1041 mp = ill_create_squery(ill, v4group, addrlen, addroff, mp); 1042 if (!mp) 1043 return (ENOMEM); 1044 ip1dbg(("ip_ll_send_disabmulti_req: IPv4 putnext %s on %s\n", 1045 inet_ntop(AF_INET6, v6groupp, group_buf, 1046 sizeof (group_buf)), 1047 ill->ill_name)); 1048 putnext(ill->ill_rq, mp); 1049 } else { 1050 ip1dbg(("ip_ll_send_disabmulti_req: IPv6 ndp_squery_mp %s on" 1051 " %s\n", 1052 inet_ntop(AF_INET6, v6groupp, group_buf, 1053 sizeof (group_buf)), 1054 ill->ill_name)); 1055 return (ndp_mcastreq(ill, v6groupp, addrlen, addroff, mp)); 1056 } 1057 return (0); 1058 } 1059 1060 /* 1061 * Send a multicast request to the driver for disabling multicast 1062 * membership for v6group if appropriate. 1063 */ 1064 static int 1065 ip_ll_delmulti_v6(ipif_t *ipif, const in6_addr_t *v6group) 1066 { 1067 ill_t *ill = ipif->ipif_ill; 1068 1069 ASSERT(IAM_WRITER_IPIF(ipif)); 1070 1071 if (ill->ill_net_type != IRE_IF_RESOLVER || 1072 ipif->ipif_flags & IPIF_POINTOPOINT) { 1073 return (0); /* Must be IRE_IF_NORESOLVER */ 1074 } 1075 if (ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST) { 1076 ip1dbg(("ip_ll_delmulti_v6: MULTI_BCAST\n")); 1077 return (0); 1078 } 1079 if (ill->ill_ipif_up_count == 0) { 1080 /* 1081 * Nobody there. All multicast addresses will be re-joined 1082 * when we get the DL_BIND_ACK bringing the interface up. 1083 */ 1084 ip1dbg(("ip_ll_delmulti_v6: nobody up\n")); 1085 return (0); 1086 } 1087 return (ip_ll_send_disabmulti_req(ill, v6group)); 1088 } 1089 1090 /* 1091 * Make the driver pass up all multicast packets 1092 * 1093 * With ill groups, the caller makes sure that there is only 1094 * one ill joining the allmulti group. 1095 */ 1096 int 1097 ip_join_allmulti(ipif_t *ipif) 1098 { 1099 ill_t *ill = ipif->ipif_ill; 1100 mblk_t *mp; 1101 uint32_t addrlen, addroff; 1102 1103 ASSERT(IAM_WRITER_IPIF(ipif)); 1104 1105 if (ill->ill_ipif_up_count == 0) { 1106 /* 1107 * Nobody there. All multicast addresses will be re-joined 1108 * when we get the DL_BIND_ACK bringing the interface up. 1109 */ 1110 return (0); 1111 } 1112 1113 ASSERT(!ill->ill_join_allmulti); 1114 1115 /* 1116 * Create a DL_PROMISCON_REQ message and send it directly to 1117 * the DLPI provider. We don't need to do this for certain 1118 * media types for which we never need to turn promiscuous 1119 * mode on. 1120 */ 1121 if ((ill->ill_net_type == IRE_IF_RESOLVER) && 1122 !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) { 1123 mp = ill_create_dl(ill, DL_PROMISCON_REQ, 1124 sizeof (dl_promiscon_req_t), &addrlen, &addroff); 1125 if (mp == NULL) 1126 return (ENOMEM); 1127 putnext(ill->ill_wq, mp); 1128 } 1129 1130 mutex_enter(&ill->ill_lock); 1131 ill->ill_join_allmulti = B_TRUE; 1132 mutex_exit(&ill->ill_lock); 1133 return (0); 1134 } 1135 1136 /* 1137 * Make the driver stop passing up all multicast packets 1138 * 1139 * With ill groups, we need to nominate some other ill as 1140 * this ipif->ipif_ill is leaving the group. 1141 */ 1142 int 1143 ip_leave_allmulti(ipif_t *ipif) 1144 { 1145 ill_t *ill = ipif->ipif_ill; 1146 mblk_t *mp; 1147 uint32_t addrlen, addroff; 1148 1149 ASSERT(IAM_WRITER_IPIF(ipif)); 1150 1151 if (ill->ill_ipif_up_count == 0) { 1152 /* 1153 * Nobody there. All multicast addresses will be re-joined 1154 * when we get the DL_BIND_ACK bringing the interface up. 1155 */ 1156 return (0); 1157 } 1158 1159 ASSERT(ill->ill_join_allmulti); 1160 1161 /* 1162 * Create a DL_PROMISCOFF_REQ message and send it directly to 1163 * the DLPI provider. We don't need to do this for certain 1164 * media types for which we never need to turn promiscuous 1165 * mode on. 1166 */ 1167 if ((ill->ill_net_type == IRE_IF_RESOLVER) && 1168 !(ill->ill_phyint->phyint_flags & PHYI_MULTI_BCAST)) { 1169 mp = ill_create_dl(ill, DL_PROMISCOFF_REQ, 1170 sizeof (dl_promiscoff_req_t), &addrlen, &addroff); 1171 if (mp == NULL) 1172 return (ENOMEM); 1173 putnext(ill->ill_wq, mp); 1174 } 1175 1176 mutex_enter(&ill->ill_lock); 1177 ill->ill_join_allmulti = B_FALSE; 1178 mutex_exit(&ill->ill_lock); 1179 return (0); 1180 } 1181 1182 /* 1183 * Copy mp_orig and pass it in as a local message. 1184 */ 1185 void 1186 ip_multicast_loopback(queue_t *q, ill_t *ill, mblk_t *mp_orig, int fanout_flags, 1187 zoneid_t zoneid) 1188 { 1189 mblk_t *mp; 1190 mblk_t *ipsec_mp; 1191 1192 /* TODO this could use dup'ed messages except for the IP header. */ 1193 mp = ip_copymsg(mp_orig); 1194 if (mp == NULL) 1195 return; 1196 if (mp->b_datap->db_type == M_CTL) { 1197 ipsec_mp = mp; 1198 mp = mp->b_cont; 1199 } else { 1200 ipsec_mp = mp; 1201 } 1202 ip_wput_local(q, ill, (ipha_t *)mp->b_rptr, ipsec_mp, NULL, 1203 fanout_flags, zoneid); 1204 } 1205 1206 static area_t ip_aresq_template = { 1207 AR_ENTRY_SQUERY, /* cmd */ 1208 sizeof (area_t)+IP_ADDR_LEN, /* name offset */ 1209 sizeof (area_t), /* name len (filled by ill_arp_alloc) */ 1210 IP_ARP_PROTO_TYPE, /* protocol, from arps perspective */ 1211 sizeof (area_t), /* proto addr offset */ 1212 IP_ADDR_LEN, /* proto addr_length */ 1213 0, /* proto mask offset */ 1214 /* Rest is initialized when used */ 1215 0, /* flags */ 1216 0, /* hw addr offset */ 1217 0, /* hw addr length */ 1218 }; 1219 1220 static mblk_t * 1221 ill_create_squery(ill_t *ill, ipaddr_t ipaddr, uint32_t addrlen, 1222 uint32_t addroff, mblk_t *mp_tail) 1223 { 1224 mblk_t *mp; 1225 area_t *area; 1226 1227 mp = ill_arp_alloc(ill, (uchar_t *)&ip_aresq_template, 1228 (caddr_t)&ipaddr); 1229 if (!mp) { 1230 freemsg(mp_tail); 1231 return (NULL); 1232 } 1233 area = (area_t *)mp->b_rptr; 1234 area->area_hw_addr_length = addrlen; 1235 area->area_hw_addr_offset = mp->b_wptr - mp->b_rptr + addroff; 1236 /* 1237 * NOTE! 1238 * 1239 * The area_hw_addr_offset, as can be seen, does not hold the 1240 * actual hardware address offset. Rather, it holds the offset 1241 * to the hw addr in the dl_xxx_req in mp_tail, modified by 1242 * adding (mp->b_wptr - mp->b_rptr). This allows the function 1243 * mi_offset_paramc() to find the hardware address in the 1244 * *second* mblk (dl_xxx_req), not this mblk. 1245 * 1246 * Using mi_offset_paramc() is thus the *only* way to access 1247 * the dl_xxx_hw address. 1248 * 1249 * The squery hw address should *not* be accessed. 1250 * 1251 * See ar_entry_squery() in arp.c for an example of how all this works. 1252 */ 1253 1254 mp->b_cont = mp_tail; 1255 return (mp); 1256 } 1257 1258 /* 1259 * Create a dlpi message with room for phys+sap. When we come back in 1260 * ip_wput_ctl() we will strip the sap for those primitives which 1261 * only need a physical address. 1262 */ 1263 static mblk_t * 1264 ill_create_dl(ill_t *ill, uint32_t dl_primitive, uint32_t length, 1265 uint32_t *addr_lenp, uint32_t *addr_offp) 1266 { 1267 mblk_t *mp; 1268 uint32_t hw_addr_length; 1269 char *cp; 1270 uint32_t offset; 1271 uint32_t size; 1272 1273 *addr_lenp = *addr_offp = 0; 1274 1275 hw_addr_length = ill->ill_phys_addr_length; 1276 if (!hw_addr_length) { 1277 ip0dbg(("ip_create_dl: hw addr length = 0\n")); 1278 return (NULL); 1279 } 1280 1281 size = length; 1282 switch (dl_primitive) { 1283 case DL_ENABMULTI_REQ: 1284 case DL_DISABMULTI_REQ: 1285 size += hw_addr_length; 1286 break; 1287 case DL_PROMISCON_REQ: 1288 case DL_PROMISCOFF_REQ: 1289 break; 1290 default: 1291 return (NULL); 1292 } 1293 mp = allocb(size, BPRI_HI); 1294 if (!mp) 1295 return (NULL); 1296 mp->b_wptr += size; 1297 mp->b_datap->db_type = M_PROTO; 1298 1299 cp = (char *)mp->b_rptr; 1300 offset = length; 1301 1302 switch (dl_primitive) { 1303 case DL_ENABMULTI_REQ: { 1304 dl_enabmulti_req_t *dl = (dl_enabmulti_req_t *)cp; 1305 1306 dl->dl_primitive = dl_primitive; 1307 dl->dl_addr_offset = offset; 1308 *addr_lenp = dl->dl_addr_length = hw_addr_length; 1309 *addr_offp = offset; 1310 break; 1311 } 1312 case DL_DISABMULTI_REQ: { 1313 dl_disabmulti_req_t *dl = (dl_disabmulti_req_t *)cp; 1314 1315 dl->dl_primitive = dl_primitive; 1316 dl->dl_addr_offset = offset; 1317 *addr_lenp = dl->dl_addr_length = hw_addr_length; 1318 *addr_offp = offset; 1319 break; 1320 } 1321 case DL_PROMISCON_REQ: 1322 case DL_PROMISCOFF_REQ: { 1323 dl_promiscon_req_t *dl = (dl_promiscon_req_t *)cp; 1324 1325 dl->dl_primitive = dl_primitive; 1326 dl->dl_level = DL_PROMISC_MULTI; 1327 break; 1328 } 1329 } 1330 ip1dbg(("ill_create_dl: addr_len %d, addr_off %d\n", 1331 *addr_lenp, *addr_offp)); 1332 return (mp); 1333 } 1334 1335 void 1336 ip_wput_ctl(queue_t *q, mblk_t *mp_orig) 1337 { 1338 ill_t *ill = (ill_t *)q->q_ptr; 1339 mblk_t *mp = mp_orig; 1340 area_t *area; 1341 1342 /* Check that we have a AR_ENTRY_SQUERY with a tacked on mblk */ 1343 if ((mp->b_wptr - mp->b_rptr) < sizeof (area_t) || 1344 mp->b_cont == NULL) { 1345 putnext(q, mp); 1346 return; 1347 } 1348 area = (area_t *)mp->b_rptr; 1349 if (area->area_cmd != AR_ENTRY_SQUERY) { 1350 putnext(q, mp); 1351 return; 1352 } 1353 mp = mp->b_cont; 1354 /* 1355 * Update dl_addr_length and dl_addr_offset for primitives that 1356 * have physical addresses as opposed to full saps 1357 */ 1358 switch (((union DL_primitives *)mp->b_rptr)->dl_primitive) { 1359 case DL_ENABMULTI_REQ: 1360 /* Track the state if this is the first enabmulti */ 1361 if (ill->ill_dlpi_multicast_state == IDMS_UNKNOWN) 1362 ill->ill_dlpi_multicast_state = IDMS_INPROGRESS; 1363 ip1dbg(("ip_wput_ctl: ENABMULTI\n")); 1364 break; 1365 case DL_DISABMULTI_REQ: 1366 ip1dbg(("ip_wput_ctl: DISABMULTI\n")); 1367 break; 1368 default: 1369 ip1dbg(("ip_wput_ctl: default\n")); 1370 break; 1371 } 1372 freeb(mp_orig); 1373 putnext(q, mp); 1374 } 1375 1376 /* 1377 * Rejoin any groups which have been explicitly joined by the application (we 1378 * left all explicitly joined groups as part of ill_leave_multicast() prior to 1379 * bringing the interface down). Note that because groups can be joined and 1380 * left while an interface is down, this may not be the same set of groups 1381 * that we left in ill_leave_multicast(). 1382 */ 1383 void 1384 ill_recover_multicast(ill_t *ill) 1385 { 1386 ilm_t *ilm; 1387 char addrbuf[INET6_ADDRSTRLEN]; 1388 1389 ASSERT(IAM_WRITER_ILL(ill)); 1390 1391 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1392 /* 1393 * Check how many ipif's that have members in this group - 1394 * if more then one we make sure that this entry is first 1395 * in the list. 1396 */ 1397 if (ilm_numentries_v6(ill, &ilm->ilm_v6addr) > 1 && 1398 ilm_lookup_ill_v6(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm) 1399 continue; 1400 ip1dbg(("ill_recover_multicast: %s\n", 1401 inet_ntop(AF_INET6, &ilm->ilm_v6addr, addrbuf, 1402 sizeof (addrbuf)))); 1403 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) { 1404 if (ill->ill_group == NULL) { 1405 (void) ip_join_allmulti(ill->ill_ipif); 1406 } else { 1407 /* 1408 * We don't want to join on this ill, 1409 * if somebody else in the group has 1410 * already been nominated. 1411 */ 1412 (void) ill_nominate_mcast_rcv(ill->ill_group); 1413 } 1414 } else { 1415 (void) ip_ll_addmulti_v6(ill->ill_ipif, 1416 &ilm->ilm_v6addr); 1417 } 1418 } 1419 } 1420 1421 /* 1422 * The opposite of ill_recover_multicast() -- leaves all multicast groups 1423 * that were explicitly joined. Note that both these functions could be 1424 * disposed of if we enhanced ARP to allow us to handle DL_DISABMULTI_REQ 1425 * and DL_ENABMULTI_REQ messages when an interface is down. 1426 */ 1427 void 1428 ill_leave_multicast(ill_t *ill) 1429 { 1430 ilm_t *ilm; 1431 char addrbuf[INET6_ADDRSTRLEN]; 1432 1433 ASSERT(IAM_WRITER_ILL(ill)); 1434 1435 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1436 /* 1437 * Check how many ipif's that have members in this group - 1438 * if more then one we make sure that this entry is first 1439 * in the list. 1440 */ 1441 if (ilm_numentries_v6(ill, &ilm->ilm_v6addr) > 1 && 1442 ilm_lookup_ill_v6(ill, &ilm->ilm_v6addr, ALL_ZONES) != ilm) 1443 continue; 1444 ip1dbg(("ill_leave_multicast: %s\n", 1445 inet_ntop(AF_INET6, &ilm->ilm_v6addr, addrbuf, 1446 sizeof (addrbuf)))); 1447 if (IN6_IS_ADDR_UNSPECIFIED(&ilm->ilm_v6addr)) { 1448 (void) ip_leave_allmulti(ill->ill_ipif); 1449 /* 1450 * If we were part of an IPMP group, then 1451 * ill_handoff_responsibility() has already 1452 * nominated a new member (so we don't). 1453 */ 1454 ASSERT(ill->ill_group == NULL); 1455 } else { 1456 (void) ip_ll_send_disabmulti_req(ill, &ilm->ilm_v6addr); 1457 } 1458 } 1459 } 1460 1461 /* 1462 * Find an ilm for matching the ill and which has the source in its 1463 * INCLUDE list or does not have it in its EXCLUDE list 1464 */ 1465 ilm_t * 1466 ilm_lookup_ill_withsrc(ill_t *ill, ipaddr_t group, ipaddr_t src) 1467 { 1468 in6_addr_t v6group, v6src; 1469 1470 /* 1471 * INADDR_ANY is represented as the IPv6 unspecified addr. 1472 */ 1473 if (group == INADDR_ANY) 1474 v6group = ipv6_all_zeros; 1475 else 1476 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 1477 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 1478 1479 return (ilm_lookup_ill_withsrc_v6(ill, &v6group, &v6src)); 1480 } 1481 1482 ilm_t * 1483 ilm_lookup_ill_withsrc_v6(ill_t *ill, const in6_addr_t *v6group, 1484 const in6_addr_t *v6src) 1485 { 1486 ilm_t *ilm; 1487 boolean_t isinlist; 1488 int i, numsrc; 1489 1490 /* 1491 * If the source is in any ilm's INCLUDE list, or if 1492 * it is not in any ilm's EXCLUDE list, we have a hit. 1493 */ 1494 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1495 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group)) { 1496 1497 isinlist = B_FALSE; 1498 numsrc = (ilm->ilm_filter == NULL) ? 1499 0 : ilm->ilm_filter->sl_numsrc; 1500 for (i = 0; i < numsrc; i++) { 1501 if (IN6_ARE_ADDR_EQUAL(v6src, 1502 &ilm->ilm_filter->sl_addr[i])) { 1503 isinlist = B_TRUE; 1504 break; 1505 } 1506 } 1507 if ((isinlist && ilm->ilm_fmode == MODE_IS_INCLUDE) || 1508 (!isinlist && ilm->ilm_fmode == MODE_IS_EXCLUDE)) 1509 return (ilm); 1510 else 1511 return (NULL); 1512 } 1513 } 1514 return (NULL); 1515 } 1516 1517 1518 /* Find an ilm for matching the ill */ 1519 ilm_t * 1520 ilm_lookup_ill(ill_t *ill, ipaddr_t group, zoneid_t zoneid) 1521 { 1522 in6_addr_t v6group; 1523 1524 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) || 1525 IAM_WRITER_ILL(ill)); 1526 /* 1527 * INADDR_ANY is represented as the IPv6 unspecifed addr. 1528 */ 1529 if (group == INADDR_ANY) 1530 v6group = ipv6_all_zeros; 1531 else 1532 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 1533 1534 return (ilm_lookup_ill_v6(ill, &v6group, zoneid)); 1535 } 1536 1537 /* 1538 * Find an ilm for matching the ill. All the ilm lookup functions 1539 * ignore ILM_DELETED ilms. These have been logically deleted, and 1540 * igmp and linklayer disable multicast have been done. Only mi_free 1541 * yet to be done. Still there in the list due to ilm_walkers. The 1542 * last walker will release it. 1543 */ 1544 ilm_t * 1545 ilm_lookup_ill_v6(ill_t *ill, const in6_addr_t *v6group, zoneid_t zoneid) 1546 { 1547 ilm_t *ilm; 1548 1549 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) || 1550 IAM_WRITER_ILL(ill)); 1551 1552 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1553 if (ilm->ilm_flags & ILM_DELETED) 1554 continue; 1555 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) && 1556 (zoneid == ALL_ZONES || zoneid == ilm->ilm_zoneid)) 1557 return (ilm); 1558 } 1559 return (NULL); 1560 } 1561 1562 ilm_t * 1563 ilm_lookup_ill_index_v6(ill_t *ill, const in6_addr_t *v6group, int index, 1564 zoneid_t zoneid) 1565 { 1566 ilm_t *ilm; 1567 1568 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) || 1569 IAM_WRITER_ILL(ill)); 1570 1571 for (ilm = ill->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 1572 if (ilm->ilm_flags & ILM_DELETED) 1573 continue; 1574 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group) && 1575 (zoneid == ALL_ZONES || zoneid == ilm->ilm_zoneid) && 1576 ilm->ilm_orig_ifindex == index) { 1577 return (ilm); 1578 } 1579 } 1580 return (NULL); 1581 } 1582 1583 ilm_t * 1584 ilm_lookup_ill_index_v4(ill_t *ill, ipaddr_t group, int index, zoneid_t zoneid) 1585 { 1586 in6_addr_t v6group; 1587 1588 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) || 1589 IAM_WRITER_ILL(ill)); 1590 /* 1591 * INADDR_ANY is represented as the IPv6 unspecifed addr. 1592 */ 1593 if (group == INADDR_ANY) 1594 v6group = ipv6_all_zeros; 1595 else 1596 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 1597 1598 return (ilm_lookup_ill_index_v6(ill, &v6group, index, zoneid)); 1599 } 1600 1601 /* 1602 * Found an ilm for the ipif. Only needed for IPv4 which does 1603 * ipif specific socket options. 1604 */ 1605 ilm_t * 1606 ilm_lookup_ipif(ipif_t *ipif, ipaddr_t group) 1607 { 1608 ill_t *ill = ipif->ipif_ill; 1609 ilm_t *ilm; 1610 in6_addr_t v6group; 1611 1612 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) || 1613 IAM_WRITER_ILL(ill)); 1614 1615 /* 1616 * INADDR_ANY is represented as the IPv6 unspecifed addr. 1617 */ 1618 if (group == INADDR_ANY) 1619 v6group = ipv6_all_zeros; 1620 else 1621 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 1622 1623 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1624 if (ilm->ilm_flags & ILM_DELETED) 1625 continue; 1626 if (ilm->ilm_ipif == ipif && 1627 IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, &v6group)) 1628 return (ilm); 1629 } 1630 return (NULL); 1631 } 1632 1633 /* 1634 * How many members on this ill? 1635 */ 1636 int 1637 ilm_numentries_v6(ill_t *ill, const in6_addr_t *v6group) 1638 { 1639 ilm_t *ilm; 1640 int i = 0; 1641 1642 ASSERT(ill->ill_ilm_walker_cnt != 0 || MUTEX_HELD(&ill->ill_lock) || 1643 IAM_WRITER_ILL(ill)); 1644 1645 for (ilm = ill->ill_ilm; ilm; ilm = ilm->ilm_next) { 1646 if (ilm->ilm_flags & ILM_DELETED) 1647 continue; 1648 if (IN6_ARE_ADDR_EQUAL(&ilm->ilm_v6addr, v6group)) { 1649 i++; 1650 } 1651 } 1652 return (i); 1653 } 1654 1655 /* Caller guarantees that the group is not already on the list */ 1656 static ilm_t * 1657 ilm_add_v6(ipif_t *ipif, const in6_addr_t *v6group, ilg_stat_t ilgstat, 1658 mcast_record_t ilg_fmode, slist_t *ilg_flist, int orig_ifindex, 1659 zoneid_t zoneid) 1660 { 1661 ill_t *ill = ipif->ipif_ill; 1662 ilm_t *ilm; 1663 ilm_t *ilm_cur; 1664 ilm_t **ilm_ptpn; 1665 1666 ASSERT(IAM_WRITER_IPIF(ipif)); 1667 1668 ilm = GETSTRUCT(ilm_t, 1); 1669 if (ilm == NULL) 1670 return (NULL); 1671 if (ilgstat != ILGSTAT_NONE && !SLIST_IS_EMPTY(ilg_flist)) { 1672 ilm->ilm_filter = l_alloc(); 1673 if (ilm->ilm_filter == NULL) { 1674 mi_free(ilm); 1675 return (NULL); 1676 } 1677 } 1678 ilm->ilm_v6addr = *v6group; 1679 ilm->ilm_refcnt = 1; 1680 ilm->ilm_zoneid = zoneid; 1681 ilm->ilm_timer = INFINITY; 1682 ilm->ilm_rtx.rtx_timer = INFINITY; 1683 /* 1684 * IPv4 Multicast groups are joined using ipif. 1685 * IPv6 Multicast groups are joined using ill. 1686 */ 1687 if (ill->ill_isv6) { 1688 ilm->ilm_ill = ill; 1689 ilm->ilm_ipif = NULL; 1690 } else { 1691 ASSERT(ilm->ilm_zoneid == ipif->ipif_zoneid); 1692 ilm->ilm_ipif = ipif; 1693 ilm->ilm_ill = NULL; 1694 } 1695 /* 1696 * After this if ilm moves to a new ill, we don't change 1697 * the ilm_orig_ifindex. Thus, if ill_index != ilm_orig_ifindex, 1698 * it has been moved. Indexes don't match even when the application 1699 * wants to join on a FAILED/INACTIVE interface because we choose 1700 * a new interface to join in. This is considered as an implicit 1701 * move. 1702 */ 1703 ilm->ilm_orig_ifindex = orig_ifindex; 1704 1705 ASSERT(!(ipif->ipif_state_flags & IPIF_CONDEMNED)); 1706 ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED)); 1707 1708 /* 1709 * Grab lock to give consistent view to readers 1710 */ 1711 mutex_enter(&ill->ill_lock); 1712 /* 1713 * All ilms in the same zone are contiguous in the ill_ilm list. 1714 * The loops in ip_proto_input() and ip_wput_local() use this to avoid 1715 * sending duplicates up when two applications in the same zone join the 1716 * same group on different logical interfaces. 1717 */ 1718 ilm_cur = ill->ill_ilm; 1719 ilm_ptpn = &ill->ill_ilm; 1720 while (ilm_cur != NULL && ilm_cur->ilm_zoneid != ilm->ilm_zoneid) { 1721 ilm_ptpn = &ilm_cur->ilm_next; 1722 ilm_cur = ilm_cur->ilm_next; 1723 } 1724 ilm->ilm_next = ilm_cur; 1725 *ilm_ptpn = ilm; 1726 1727 /* 1728 * If we have an associated ilg, use its filter state; if not, 1729 * default to (EXCLUDE, NULL) and set no_ilg_cnt to track this. 1730 */ 1731 if (ilgstat != ILGSTAT_NONE) { 1732 if (!SLIST_IS_EMPTY(ilg_flist)) 1733 l_copy(ilg_flist, ilm->ilm_filter); 1734 ilm->ilm_fmode = ilg_fmode; 1735 } else { 1736 ilm->ilm_no_ilg_cnt = 1; 1737 ilm->ilm_fmode = MODE_IS_EXCLUDE; 1738 } 1739 1740 mutex_exit(&ill->ill_lock); 1741 return (ilm); 1742 } 1743 1744 void 1745 ilm_walker_cleanup(ill_t *ill) 1746 { 1747 ilm_t **ilmp; 1748 ilm_t *ilm; 1749 1750 ASSERT(MUTEX_HELD(&ill->ill_lock)); 1751 ASSERT(ill->ill_ilm_walker_cnt == 0); 1752 1753 ilmp = &ill->ill_ilm; 1754 while (*ilmp != NULL) { 1755 if ((*ilmp)->ilm_flags & ILM_DELETED) { 1756 ilm = *ilmp; 1757 *ilmp = ilm->ilm_next; 1758 FREE_SLIST(ilm->ilm_filter); 1759 FREE_SLIST(ilm->ilm_pendsrcs); 1760 FREE_SLIST(ilm->ilm_rtx.rtx_allow); 1761 FREE_SLIST(ilm->ilm_rtx.rtx_block); 1762 mi_free((char *)ilm); 1763 } else { 1764 ilmp = &(*ilmp)->ilm_next; 1765 } 1766 } 1767 ill->ill_ilm_cleanup_reqd = 0; 1768 } 1769 1770 /* 1771 * Unlink ilm and free it. 1772 */ 1773 static void 1774 ilm_delete(ilm_t *ilm) 1775 { 1776 ill_t *ill; 1777 ilm_t **ilmp; 1778 1779 if (ilm->ilm_ipif != NULL) { 1780 ASSERT(IAM_WRITER_IPIF(ilm->ilm_ipif)); 1781 ASSERT(ilm->ilm_ill == NULL); 1782 ill = ilm->ilm_ipif->ipif_ill; 1783 ASSERT(!ill->ill_isv6); 1784 } else { 1785 ASSERT(IAM_WRITER_ILL(ilm->ilm_ill)); 1786 ASSERT(ilm->ilm_ipif == NULL); 1787 ill = ilm->ilm_ill; 1788 ASSERT(ill->ill_isv6); 1789 } 1790 /* 1791 * Delete under lock protection so that readers don't stumble 1792 * on bad ilm_next 1793 */ 1794 mutex_enter(&ill->ill_lock); 1795 if (ill->ill_ilm_walker_cnt != 0) { 1796 ilm->ilm_flags |= ILM_DELETED; 1797 ill->ill_ilm_cleanup_reqd = 1; 1798 mutex_exit(&ill->ill_lock); 1799 return; 1800 } 1801 1802 for (ilmp = &ill->ill_ilm; *ilmp != ilm; ilmp = &(*ilmp)->ilm_next) 1803 ; 1804 *ilmp = ilm->ilm_next; 1805 mutex_exit(&ill->ill_lock); 1806 1807 FREE_SLIST(ilm->ilm_filter); 1808 FREE_SLIST(ilm->ilm_pendsrcs); 1809 FREE_SLIST(ilm->ilm_rtx.rtx_allow); 1810 FREE_SLIST(ilm->ilm_rtx.rtx_block); 1811 mi_free((char *)ilm); 1812 } 1813 1814 /* Free all ilms for this ipif */ 1815 void 1816 ilm_free(ipif_t *ipif) 1817 { 1818 ill_t *ill = ipif->ipif_ill; 1819 ilm_t *ilm; 1820 ilm_t *next_ilm; 1821 1822 ASSERT(IAM_WRITER_IPIF(ipif)); 1823 1824 for (ilm = ill->ill_ilm; ilm; ilm = next_ilm) { 1825 next_ilm = ilm->ilm_next; 1826 if (ilm->ilm_ipif == ipif) 1827 ilm_delete(ilm); 1828 } 1829 } 1830 1831 /* 1832 * Looks up the appropriate ipif given a v4 multicast group and interface 1833 * address. On success, returns 0, with *ipifpp pointing to the found 1834 * struct. On failure, returns an errno and *ipifpp is NULL. 1835 */ 1836 int 1837 ip_opt_check(conn_t *connp, ipaddr_t group, ipaddr_t src, ipaddr_t ifaddr, 1838 uint_t *ifindexp, mblk_t *first_mp, ipsq_func_t func, ipif_t **ipifpp) 1839 { 1840 ipif_t *ipif; 1841 int err = 0; 1842 zoneid_t zoneid = connp->conn_zoneid; 1843 1844 if (!CLASSD(group) || CLASSD(src)) { 1845 return (EINVAL); 1846 } 1847 *ipifpp = NULL; 1848 1849 ASSERT(!(ifaddr != INADDR_ANY && ifindexp != NULL && *ifindexp != 0)); 1850 if (ifaddr != INADDR_ANY) { 1851 ipif = ipif_lookup_addr(ifaddr, NULL, zoneid, 1852 CONNP_TO_WQ(connp), first_mp, func, &err); 1853 if (err != 0 && err != EINPROGRESS) 1854 err = EADDRNOTAVAIL; 1855 } else if (ifindexp != NULL && *ifindexp != 0) { 1856 ipif = ipif_lookup_on_ifindex(*ifindexp, B_FALSE, zoneid, 1857 CONNP_TO_WQ(connp), first_mp, func, &err); 1858 } else { 1859 ipif = ipif_lookup_group(group, zoneid); 1860 if (ipif == NULL) 1861 return (EADDRNOTAVAIL); 1862 } 1863 if (ipif == NULL) 1864 return (err); 1865 1866 *ipifpp = ipif; 1867 return (0); 1868 } 1869 1870 /* 1871 * Looks up the appropriate ill (or ipif if v4mapped) given an interface 1872 * index and IPv6 multicast group. On success, returns 0, with *illpp (or 1873 * *ipifpp if v4mapped) pointing to the found struct. On failure, returns 1874 * an errno and *illpp and *ipifpp are undefined. 1875 */ 1876 int 1877 ip_opt_check_v6(conn_t *connp, const in6_addr_t *v6group, ipaddr_t *v4group, 1878 const in6_addr_t *v6src, ipaddr_t *v4src, boolean_t *isv6, int ifindex, 1879 mblk_t *first_mp, ipsq_func_t func, ill_t **illpp, ipif_t **ipifpp) 1880 { 1881 boolean_t src_unspec; 1882 ill_t *ill = NULL; 1883 ipif_t *ipif = NULL; 1884 int err; 1885 zoneid_t zoneid = connp->conn_zoneid; 1886 queue_t *wq = CONNP_TO_WQ(connp); 1887 1888 src_unspec = IN6_IS_ADDR_UNSPECIFIED(v6src); 1889 1890 if (IN6_IS_ADDR_V4MAPPED(v6group)) { 1891 if (!IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec) 1892 return (EINVAL); 1893 IN6_V4MAPPED_TO_IPADDR(v6group, *v4group); 1894 if (src_unspec) { 1895 *v4src = INADDR_ANY; 1896 } else { 1897 IN6_V4MAPPED_TO_IPADDR(v6src, *v4src); 1898 } 1899 if (!CLASSD(*v4group) || CLASSD(*v4src)) 1900 return (EINVAL); 1901 *ipifpp = NULL; 1902 *isv6 = B_FALSE; 1903 } else { 1904 if (IN6_IS_ADDR_V4MAPPED(v6src) && !src_unspec) 1905 return (EINVAL); 1906 if (!IN6_IS_ADDR_MULTICAST(v6group) || 1907 IN6_IS_ADDR_MULTICAST(v6src)) { 1908 return (EINVAL); 1909 } 1910 *illpp = NULL; 1911 *isv6 = B_TRUE; 1912 } 1913 1914 if (ifindex == 0) { 1915 if (*isv6) 1916 ill = ill_lookup_group_v6(v6group, zoneid); 1917 else 1918 ipif = ipif_lookup_group(*v4group, zoneid); 1919 if (ill == NULL && ipif == NULL) 1920 return (EADDRNOTAVAIL); 1921 } else { 1922 if (*isv6) { 1923 ill = ill_lookup_on_ifindex(ifindex, B_TRUE, 1924 wq, first_mp, func, &err); 1925 if (ill != NULL && 1926 !ipif_lookup_zoneid(ill, zoneid, 0, NULL)) { 1927 ill_refrele(ill); 1928 ill = NULL; 1929 err = EADDRNOTAVAIL; 1930 } 1931 } else { 1932 ipif = ipif_lookup_on_ifindex(ifindex, B_FALSE, 1933 zoneid, wq, first_mp, func, &err); 1934 } 1935 if (ill == NULL && ipif == NULL) 1936 return (err); 1937 } 1938 1939 *ipifpp = ipif; 1940 *illpp = ill; 1941 return (0); 1942 } 1943 1944 static int 1945 ip_get_srcfilter(conn_t *connp, struct group_filter *gf, 1946 struct ip_msfilter *imsf, ipaddr_t grp, ipif_t *ipif, boolean_t isv4mapped) 1947 { 1948 ilg_t *ilg; 1949 int i, numsrc, fmode, outsrcs; 1950 struct sockaddr_in *sin; 1951 struct sockaddr_in6 *sin6; 1952 struct in_addr *addrp; 1953 slist_t *fp; 1954 boolean_t is_v4only_api; 1955 1956 mutex_enter(&connp->conn_lock); 1957 1958 ilg = ilg_lookup_ipif(connp, grp, ipif); 1959 if (ilg == NULL) { 1960 mutex_exit(&connp->conn_lock); 1961 return (EADDRNOTAVAIL); 1962 } 1963 1964 if (gf == NULL) { 1965 ASSERT(imsf != NULL); 1966 ASSERT(!isv4mapped); 1967 is_v4only_api = B_TRUE; 1968 outsrcs = imsf->imsf_numsrc; 1969 } else { 1970 ASSERT(imsf == NULL); 1971 is_v4only_api = B_FALSE; 1972 outsrcs = gf->gf_numsrc; 1973 } 1974 1975 /* 1976 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 1977 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 1978 * So we need to translate here. 1979 */ 1980 fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ? 1981 MCAST_INCLUDE : MCAST_EXCLUDE; 1982 if ((fp = ilg->ilg_filter) == NULL) { 1983 numsrc = 0; 1984 } else { 1985 for (i = 0; i < outsrcs; i++) { 1986 if (i == fp->sl_numsrc) 1987 break; 1988 if (isv4mapped) { 1989 sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i]; 1990 sin6->sin6_family = AF_INET6; 1991 sin6->sin6_addr = fp->sl_addr[i]; 1992 } else { 1993 if (is_v4only_api) { 1994 addrp = &imsf->imsf_slist[i]; 1995 } else { 1996 sin = (struct sockaddr_in *) 1997 &gf->gf_slist[i]; 1998 sin->sin_family = AF_INET; 1999 addrp = &sin->sin_addr; 2000 } 2001 IN6_V4MAPPED_TO_INADDR(&fp->sl_addr[i], addrp); 2002 } 2003 } 2004 numsrc = fp->sl_numsrc; 2005 } 2006 2007 if (is_v4only_api) { 2008 imsf->imsf_numsrc = numsrc; 2009 imsf->imsf_fmode = fmode; 2010 } else { 2011 gf->gf_numsrc = numsrc; 2012 gf->gf_fmode = fmode; 2013 } 2014 2015 mutex_exit(&connp->conn_lock); 2016 2017 return (0); 2018 } 2019 2020 static int 2021 ip_get_srcfilter_v6(conn_t *connp, struct group_filter *gf, 2022 const struct in6_addr *grp, ill_t *ill) 2023 { 2024 ilg_t *ilg; 2025 int i; 2026 struct sockaddr_storage *sl; 2027 struct sockaddr_in6 *sin6; 2028 slist_t *fp; 2029 2030 mutex_enter(&connp->conn_lock); 2031 2032 ilg = ilg_lookup_ill_v6(connp, grp, ill); 2033 if (ilg == NULL) { 2034 mutex_exit(&connp->conn_lock); 2035 return (EADDRNOTAVAIL); 2036 } 2037 2038 /* 2039 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 2040 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 2041 * So we need to translate here. 2042 */ 2043 gf->gf_fmode = (ilg->ilg_fmode == MODE_IS_INCLUDE) ? 2044 MCAST_INCLUDE : MCAST_EXCLUDE; 2045 if ((fp = ilg->ilg_filter) == NULL) { 2046 gf->gf_numsrc = 0; 2047 } else { 2048 for (i = 0, sl = gf->gf_slist; i < gf->gf_numsrc; i++, sl++) { 2049 if (i == fp->sl_numsrc) 2050 break; 2051 sin6 = (struct sockaddr_in6 *)sl; 2052 sin6->sin6_family = AF_INET6; 2053 sin6->sin6_addr = fp->sl_addr[i]; 2054 } 2055 gf->gf_numsrc = fp->sl_numsrc; 2056 } 2057 2058 mutex_exit(&connp->conn_lock); 2059 2060 return (0); 2061 } 2062 2063 static int 2064 ip_set_srcfilter(conn_t *connp, struct group_filter *gf, 2065 struct ip_msfilter *imsf, ipaddr_t grp, ipif_t *ipif, boolean_t isv4mapped) 2066 { 2067 ilg_t *ilg; 2068 int i, err, insrcs, infmode, new_fmode; 2069 struct sockaddr_in *sin; 2070 struct sockaddr_in6 *sin6; 2071 struct in_addr *addrp; 2072 slist_t *orig_filter = NULL; 2073 slist_t *new_filter = NULL; 2074 mcast_record_t orig_fmode; 2075 boolean_t leave_grp, is_v4only_api; 2076 ilg_stat_t ilgstat; 2077 2078 if (gf == NULL) { 2079 ASSERT(imsf != NULL); 2080 ASSERT(!isv4mapped); 2081 is_v4only_api = B_TRUE; 2082 insrcs = imsf->imsf_numsrc; 2083 infmode = imsf->imsf_fmode; 2084 } else { 2085 ASSERT(imsf == NULL); 2086 is_v4only_api = B_FALSE; 2087 insrcs = gf->gf_numsrc; 2088 infmode = gf->gf_fmode; 2089 } 2090 2091 /* Make sure we can handle the source list */ 2092 if (insrcs > MAX_FILTER_SIZE) 2093 return (ENOBUFS); 2094 2095 /* 2096 * setting the filter to (INCLUDE, NULL) is treated 2097 * as a request to leave the group. 2098 */ 2099 leave_grp = (infmode == MCAST_INCLUDE && insrcs == 0); 2100 2101 ASSERT(IAM_WRITER_IPIF(ipif)); 2102 2103 mutex_enter(&connp->conn_lock); 2104 2105 ilg = ilg_lookup_ipif(connp, grp, ipif); 2106 if (ilg == NULL) { 2107 /* 2108 * if the request was actually to leave, and we 2109 * didn't find an ilg, there's nothing to do. 2110 */ 2111 if (!leave_grp) 2112 ilg = conn_ilg_alloc(connp); 2113 if (leave_grp || ilg == NULL) { 2114 mutex_exit(&connp->conn_lock); 2115 return (leave_grp ? 0 : ENOMEM); 2116 } 2117 ilgstat = ILGSTAT_NEW; 2118 IN6_IPADDR_TO_V4MAPPED(grp, &ilg->ilg_v6group); 2119 ilg->ilg_ipif = ipif; 2120 ilg->ilg_ill = NULL; 2121 ilg->ilg_orig_ifindex = 0; 2122 } else if (leave_grp) { 2123 ilg_delete(connp, ilg, NULL); 2124 mutex_exit(&connp->conn_lock); 2125 (void) ip_delmulti(grp, ipif, B_FALSE, B_TRUE); 2126 return (0); 2127 } else { 2128 ilgstat = ILGSTAT_CHANGE; 2129 /* Preserve existing state in case ip_addmulti() fails */ 2130 orig_fmode = ilg->ilg_fmode; 2131 if (ilg->ilg_filter == NULL) { 2132 orig_filter = NULL; 2133 } else { 2134 orig_filter = l_alloc_copy(ilg->ilg_filter); 2135 if (orig_filter == NULL) { 2136 mutex_exit(&connp->conn_lock); 2137 return (ENOMEM); 2138 } 2139 } 2140 } 2141 2142 /* 2143 * Alloc buffer to copy new state into (see below) before 2144 * we make any changes, so we can bail if it fails. 2145 */ 2146 if ((new_filter = l_alloc()) == NULL) { 2147 mutex_exit(&connp->conn_lock); 2148 err = ENOMEM; 2149 goto free_and_exit; 2150 } 2151 2152 if (insrcs == 0) { 2153 CLEAR_SLIST(ilg->ilg_filter); 2154 } else { 2155 slist_t *fp; 2156 if (ilg->ilg_filter == NULL) { 2157 fp = l_alloc(); 2158 if (fp == NULL) { 2159 if (ilgstat == ILGSTAT_NEW) 2160 ilg_delete(connp, ilg, NULL); 2161 mutex_exit(&connp->conn_lock); 2162 err = ENOMEM; 2163 goto free_and_exit; 2164 } 2165 } else { 2166 fp = ilg->ilg_filter; 2167 } 2168 for (i = 0; i < insrcs; i++) { 2169 if (isv4mapped) { 2170 sin6 = (struct sockaddr_in6 *)&gf->gf_slist[i]; 2171 fp->sl_addr[i] = sin6->sin6_addr; 2172 } else { 2173 if (is_v4only_api) { 2174 addrp = &imsf->imsf_slist[i]; 2175 } else { 2176 sin = (struct sockaddr_in *) 2177 &gf->gf_slist[i]; 2178 addrp = &sin->sin_addr; 2179 } 2180 IN6_INADDR_TO_V4MAPPED(addrp, &fp->sl_addr[i]); 2181 } 2182 } 2183 fp->sl_numsrc = insrcs; 2184 ilg->ilg_filter = fp; 2185 } 2186 /* 2187 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 2188 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 2189 * So we need to translate here. 2190 */ 2191 ilg->ilg_fmode = (infmode == MCAST_INCLUDE) ? 2192 MODE_IS_INCLUDE : MODE_IS_EXCLUDE; 2193 2194 /* 2195 * Save copy of ilg's filter state to pass to other functions, 2196 * so we can release conn_lock now. 2197 */ 2198 new_fmode = ilg->ilg_fmode; 2199 l_copy(ilg->ilg_filter, new_filter); 2200 2201 mutex_exit(&connp->conn_lock); 2202 2203 err = ip_addmulti(grp, ipif, ilgstat, new_fmode, new_filter); 2204 if (err != 0) { 2205 /* 2206 * Restore the original filter state, or delete the 2207 * newly-created ilg. We need to look up the ilg 2208 * again, though, since we've not been holding the 2209 * conn_lock. 2210 */ 2211 mutex_enter(&connp->conn_lock); 2212 ilg = ilg_lookup_ipif(connp, grp, ipif); 2213 ASSERT(ilg != NULL); 2214 if (ilgstat == ILGSTAT_NEW) { 2215 ilg_delete(connp, ilg, NULL); 2216 } else { 2217 ilg->ilg_fmode = orig_fmode; 2218 if (SLIST_IS_EMPTY(orig_filter)) { 2219 CLEAR_SLIST(ilg->ilg_filter); 2220 } else { 2221 /* 2222 * We didn't free the filter, even if we 2223 * were trying to make the source list empty; 2224 * so if orig_filter isn't empty, the ilg 2225 * must still have a filter alloc'd. 2226 */ 2227 l_copy(orig_filter, ilg->ilg_filter); 2228 } 2229 } 2230 mutex_exit(&connp->conn_lock); 2231 } 2232 2233 free_and_exit: 2234 l_free(orig_filter); 2235 l_free(new_filter); 2236 2237 return (err); 2238 } 2239 2240 static int 2241 ip_set_srcfilter_v6(conn_t *connp, struct group_filter *gf, 2242 const struct in6_addr *grp, ill_t *ill) 2243 { 2244 ilg_t *ilg; 2245 int i, orig_ifindex, orig_fmode, new_fmode, err; 2246 slist_t *orig_filter = NULL; 2247 slist_t *new_filter = NULL; 2248 struct sockaddr_storage *sl; 2249 struct sockaddr_in6 *sin6; 2250 boolean_t leave_grp; 2251 ilg_stat_t ilgstat; 2252 2253 /* Make sure we can handle the source list */ 2254 if (gf->gf_numsrc > MAX_FILTER_SIZE) 2255 return (ENOBUFS); 2256 2257 /* 2258 * setting the filter to (INCLUDE, NULL) is treated 2259 * as a request to leave the group. 2260 */ 2261 leave_grp = (gf->gf_fmode == MCAST_INCLUDE && gf->gf_numsrc == 0); 2262 2263 ASSERT(IAM_WRITER_ILL(ill)); 2264 2265 /* 2266 * Use the ifindex to do the lookup. We can't use the ill 2267 * directly because ilg_ill could point to a different ill 2268 * if things have moved. 2269 */ 2270 orig_ifindex = ill->ill_phyint->phyint_ifindex; 2271 2272 mutex_enter(&connp->conn_lock); 2273 ilg = ilg_lookup_ill_index_v6(connp, grp, orig_ifindex); 2274 if (ilg == NULL) { 2275 /* 2276 * if the request was actually to leave, and we 2277 * didn't find an ilg, there's nothing to do. 2278 */ 2279 if (!leave_grp) 2280 ilg = conn_ilg_alloc(connp); 2281 if (leave_grp || ilg == NULL) { 2282 mutex_exit(&connp->conn_lock); 2283 return (leave_grp ? 0 : ENOMEM); 2284 } 2285 ilgstat = ILGSTAT_NEW; 2286 ilg->ilg_v6group = *grp; 2287 ilg->ilg_ipif = NULL; 2288 /* 2289 * Choose our target ill to join on. This might be 2290 * different from the ill we've been given if it's 2291 * currently down and part of a group. 2292 * 2293 * new ill is not refheld; we are writer. 2294 */ 2295 ill = ip_choose_multi_ill(ill, grp); 2296 ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED)); 2297 ilg->ilg_ill = ill; 2298 /* 2299 * Remember the index that we joined on, so that we can 2300 * successfully delete them later on and also search for 2301 * duplicates if the application wants to join again. 2302 */ 2303 ilg->ilg_orig_ifindex = orig_ifindex; 2304 } else if (leave_grp) { 2305 /* 2306 * Use the ilg's current ill for the deletion, 2307 * we might have failed over. 2308 */ 2309 ill = ilg->ilg_ill; 2310 ilg_delete(connp, ilg, NULL); 2311 mutex_exit(&connp->conn_lock); 2312 (void) ip_delmulti_v6(grp, ill, orig_ifindex, 2313 connp->conn_zoneid, B_FALSE, B_TRUE); 2314 return (0); 2315 } else { 2316 ilgstat = ILGSTAT_CHANGE; 2317 /* 2318 * The current ill might be different from the one we were 2319 * asked to join on (if failover has occurred); we should 2320 * join on the ill stored in the ilg. The original ill 2321 * is noted in ilg_orig_ifindex, which matched our request. 2322 */ 2323 ill = ilg->ilg_ill; 2324 /* preserve existing state in case ip_addmulti() fails */ 2325 orig_fmode = ilg->ilg_fmode; 2326 if (ilg->ilg_filter == NULL) { 2327 orig_filter = NULL; 2328 } else { 2329 orig_filter = l_alloc_copy(ilg->ilg_filter); 2330 if (orig_filter == NULL) { 2331 mutex_exit(&connp->conn_lock); 2332 return (ENOMEM); 2333 } 2334 } 2335 } 2336 2337 /* 2338 * Alloc buffer to copy new state into (see below) before 2339 * we make any changes, so we can bail if it fails. 2340 */ 2341 if ((new_filter = l_alloc()) == NULL) { 2342 mutex_exit(&connp->conn_lock); 2343 err = ENOMEM; 2344 goto free_and_exit; 2345 } 2346 2347 if (gf->gf_numsrc == 0) { 2348 CLEAR_SLIST(ilg->ilg_filter); 2349 } else { 2350 slist_t *fp; 2351 if (ilg->ilg_filter == NULL) { 2352 fp = l_alloc(); 2353 if (fp == NULL) { 2354 if (ilgstat == ILGSTAT_NEW) 2355 ilg_delete(connp, ilg, NULL); 2356 mutex_exit(&connp->conn_lock); 2357 err = ENOMEM; 2358 goto free_and_exit; 2359 } 2360 } else { 2361 fp = ilg->ilg_filter; 2362 } 2363 for (i = 0, sl = gf->gf_slist; i < gf->gf_numsrc; i++, sl++) { 2364 sin6 = (struct sockaddr_in6 *)sl; 2365 fp->sl_addr[i] = sin6->sin6_addr; 2366 } 2367 fp->sl_numsrc = gf->gf_numsrc; 2368 ilg->ilg_filter = fp; 2369 } 2370 /* 2371 * In the kernel, we use the state definitions MODE_IS_[IN|EX]CLUDE 2372 * to identify the filter mode; but the API uses MCAST_[IN|EX]CLUDE. 2373 * So we need to translate here. 2374 */ 2375 ilg->ilg_fmode = (gf->gf_fmode == MCAST_INCLUDE) ? 2376 MODE_IS_INCLUDE : MODE_IS_EXCLUDE; 2377 2378 /* 2379 * Save copy of ilg's filter state to pass to other functions, 2380 * so we can release conn_lock now. 2381 */ 2382 new_fmode = ilg->ilg_fmode; 2383 l_copy(ilg->ilg_filter, new_filter); 2384 2385 mutex_exit(&connp->conn_lock); 2386 2387 err = ip_addmulti_v6(grp, ill, orig_ifindex, connp->conn_zoneid, 2388 ilgstat, new_fmode, new_filter); 2389 if (err != 0) { 2390 /* 2391 * Restore the original filter state, or delete the 2392 * newly-created ilg. We need to look up the ilg 2393 * again, though, since we've not been holding the 2394 * conn_lock. 2395 */ 2396 mutex_enter(&connp->conn_lock); 2397 ilg = ilg_lookup_ill_index_v6(connp, grp, orig_ifindex); 2398 ASSERT(ilg != NULL); 2399 if (ilgstat == ILGSTAT_NEW) { 2400 ilg_delete(connp, ilg, NULL); 2401 } else { 2402 ilg->ilg_fmode = orig_fmode; 2403 if (SLIST_IS_EMPTY(orig_filter)) { 2404 CLEAR_SLIST(ilg->ilg_filter); 2405 } else { 2406 /* 2407 * We didn't free the filter, even if we 2408 * were trying to make the source list empty; 2409 * so if orig_filter isn't empty, the ilg 2410 * must still have a filter alloc'd. 2411 */ 2412 l_copy(orig_filter, ilg->ilg_filter); 2413 } 2414 } 2415 mutex_exit(&connp->conn_lock); 2416 } 2417 2418 free_and_exit: 2419 l_free(orig_filter); 2420 l_free(new_filter); 2421 2422 return (err); 2423 } 2424 2425 /* 2426 * Process the SIOC[GS]MSFILTER and SIOC[GS]IPMSFILTER ioctls. 2427 */ 2428 /* ARGSUSED */ 2429 int 2430 ip_sioctl_msfilter(ipif_t *ipif, sin_t *dummy_sin, queue_t *q, mblk_t *mp, 2431 ip_ioctl_cmd_t *ipip, void *ifreq) 2432 { 2433 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 2434 /* existence verified in ip_wput_nondata() */ 2435 mblk_t *data_mp = mp->b_cont->b_cont; 2436 int datalen, err, cmd, minsize; 2437 int expsize = 0; 2438 conn_t *connp; 2439 boolean_t isv6, is_v4only_api, getcmd; 2440 struct sockaddr_in *gsin; 2441 struct sockaddr_in6 *gsin6; 2442 ipaddr_t v4grp; 2443 in6_addr_t v6grp; 2444 struct group_filter *gf = NULL; 2445 struct ip_msfilter *imsf = NULL; 2446 mblk_t *ndp; 2447 2448 if (data_mp->b_cont != NULL) { 2449 if ((ndp = msgpullup(data_mp, -1)) == NULL) 2450 return (ENOMEM); 2451 freemsg(data_mp); 2452 data_mp = ndp; 2453 mp->b_cont->b_cont = data_mp; 2454 } 2455 2456 cmd = iocp->ioc_cmd; 2457 getcmd = (cmd == SIOCGIPMSFILTER || cmd == SIOCGMSFILTER); 2458 is_v4only_api = (cmd == SIOCGIPMSFILTER || cmd == SIOCSIPMSFILTER); 2459 minsize = (is_v4only_api) ? IP_MSFILTER_SIZE(0) : GROUP_FILTER_SIZE(0); 2460 datalen = MBLKL(data_mp); 2461 2462 if (datalen < minsize) 2463 return (EINVAL); 2464 2465 /* 2466 * now we know we have at least have the initial structure, 2467 * but need to check for the source list array. 2468 */ 2469 if (is_v4only_api) { 2470 imsf = (struct ip_msfilter *)data_mp->b_rptr; 2471 isv6 = B_FALSE; 2472 expsize = IP_MSFILTER_SIZE(imsf->imsf_numsrc); 2473 } else { 2474 gf = (struct group_filter *)data_mp->b_rptr; 2475 if (gf->gf_group.ss_family == AF_INET6) { 2476 gsin6 = (struct sockaddr_in6 *)&gf->gf_group; 2477 isv6 = !(IN6_IS_ADDR_V4MAPPED(&gsin6->sin6_addr)); 2478 } else { 2479 isv6 = B_FALSE; 2480 } 2481 expsize = GROUP_FILTER_SIZE(gf->gf_numsrc); 2482 } 2483 if (datalen < expsize) 2484 return (EINVAL); 2485 2486 connp = Q_TO_CONN(q); 2487 2488 /* operation not supported on the virtual network interface */ 2489 if (IS_VNI(ipif->ipif_ill)) 2490 return (EINVAL); 2491 2492 if (isv6) { 2493 ill_t *ill = ipif->ipif_ill; 2494 ill_refhold(ill); 2495 2496 gsin6 = (struct sockaddr_in6 *)&gf->gf_group; 2497 v6grp = gsin6->sin6_addr; 2498 if (getcmd) 2499 err = ip_get_srcfilter_v6(connp, gf, &v6grp, ill); 2500 else 2501 err = ip_set_srcfilter_v6(connp, gf, &v6grp, ill); 2502 2503 ill_refrele(ill); 2504 } else { 2505 boolean_t isv4mapped = B_FALSE; 2506 if (is_v4only_api) { 2507 v4grp = (ipaddr_t)imsf->imsf_multiaddr.s_addr; 2508 } else { 2509 if (gf->gf_group.ss_family == AF_INET) { 2510 gsin = (struct sockaddr_in *)&gf->gf_group; 2511 v4grp = (ipaddr_t)gsin->sin_addr.s_addr; 2512 } else { 2513 gsin6 = (struct sockaddr_in6 *)&gf->gf_group; 2514 IN6_V4MAPPED_TO_IPADDR(&gsin6->sin6_addr, 2515 v4grp); 2516 isv4mapped = B_TRUE; 2517 } 2518 } 2519 if (getcmd) 2520 err = ip_get_srcfilter(connp, gf, imsf, v4grp, ipif, 2521 isv4mapped); 2522 else 2523 err = ip_set_srcfilter(connp, gf, imsf, v4grp, ipif, 2524 isv4mapped); 2525 } 2526 2527 return (err); 2528 } 2529 2530 /* 2531 * Finds the ipif based on information in the ioctl headers. Needed to make 2532 * ip_process_ioctl() happy (it needs to know the ipif for IPI_WR-flagged 2533 * ioctls prior to calling the ioctl's handler function). Somewhat analogous 2534 * to ip_extract_lifreq_cmn() and ip_extract_tunreq(). 2535 */ 2536 int 2537 ip_extract_msfilter(queue_t *q, mblk_t *mp, ipif_t **ipifpp, ipsq_func_t func) 2538 { 2539 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 2540 int cmd = iocp->ioc_cmd, err = 0; 2541 conn_t *connp; 2542 ipif_t *ipif; 2543 /* caller has verified this mblk exists */ 2544 char *dbuf = (char *)mp->b_cont->b_cont->b_rptr; 2545 struct ip_msfilter *imsf; 2546 struct group_filter *gf; 2547 ipaddr_t v4addr, v4grp; 2548 in6_addr_t v6grp; 2549 uint32_t index; 2550 zoneid_t zoneid; 2551 2552 connp = Q_TO_CONN(q); 2553 zoneid = connp->conn_zoneid; 2554 2555 /* don't allow multicast operations on a tcp conn */ 2556 if (IS_TCP_CONN(connp)) 2557 return (ENOPROTOOPT); 2558 2559 if (cmd == SIOCSIPMSFILTER || cmd == SIOCGIPMSFILTER) { 2560 /* don't allow v4-specific ioctls on v6 socket */ 2561 if (connp->conn_af_isv6) 2562 return (EAFNOSUPPORT); 2563 2564 imsf = (struct ip_msfilter *)dbuf; 2565 v4addr = imsf->imsf_interface.s_addr; 2566 v4grp = imsf->imsf_multiaddr.s_addr; 2567 if (v4addr == INADDR_ANY) { 2568 ipif = ipif_lookup_group(v4grp, zoneid); 2569 if (ipif == NULL) 2570 err = EADDRNOTAVAIL; 2571 } else { 2572 ipif = ipif_lookup_addr(v4addr, NULL, zoneid, q, mp, 2573 func, &err); 2574 } 2575 } else { 2576 boolean_t isv6 = B_FALSE; 2577 gf = (struct group_filter *)dbuf; 2578 index = gf->gf_interface; 2579 if (gf->gf_group.ss_family == AF_INET6) { 2580 struct sockaddr_in6 *sin6; 2581 sin6 = (struct sockaddr_in6 *)&gf->gf_group; 2582 v6grp = sin6->sin6_addr; 2583 if (IN6_IS_ADDR_V4MAPPED(&v6grp)) 2584 IN6_V4MAPPED_TO_IPADDR(&v6grp, v4grp); 2585 else 2586 isv6 = B_TRUE; 2587 } else if (gf->gf_group.ss_family == AF_INET) { 2588 struct sockaddr_in *sin; 2589 sin = (struct sockaddr_in *)&gf->gf_group; 2590 v4grp = sin->sin_addr.s_addr; 2591 } else { 2592 return (EAFNOSUPPORT); 2593 } 2594 if (index == 0) { 2595 if (isv6) 2596 ipif = ipif_lookup_group_v6(&v6grp, zoneid); 2597 else 2598 ipif = ipif_lookup_group(v4grp, zoneid); 2599 if (ipif == NULL) 2600 err = EADDRNOTAVAIL; 2601 } else { 2602 ipif = ipif_lookup_on_ifindex(index, isv6, zoneid, 2603 q, mp, func, &err); 2604 } 2605 } 2606 2607 *ipifpp = ipif; 2608 return (err); 2609 } 2610 2611 /* 2612 * The structures used for the SIOC*MSFILTER ioctls usually must be copied 2613 * in in two stages, as the first copyin tells us the size of the attached 2614 * source buffer. This function is called by ip_wput_nondata() after the 2615 * first copyin has completed; it figures out how big the second stage 2616 * needs to be, and kicks it off. 2617 * 2618 * In some cases (numsrc < 2), the second copyin is not needed as the 2619 * first one gets a complete structure containing 1 source addr. 2620 * 2621 * The function returns 0 if a second copyin has been started (i.e. there's 2622 * no more work to be done right now), or 1 if the second copyin is not 2623 * needed and ip_wput_nondata() can continue its processing. 2624 */ 2625 int 2626 ip_copyin_msfilter(queue_t *q, mblk_t *mp) 2627 { 2628 struct iocblk *iocp = (struct iocblk *)mp->b_rptr; 2629 int cmd = iocp->ioc_cmd; 2630 /* validity of this checked in ip_wput_nondata() */ 2631 mblk_t *mp1 = mp->b_cont->b_cont; 2632 int copysize = 0; 2633 int offset; 2634 2635 if (cmd == SIOCSMSFILTER || cmd == SIOCGMSFILTER) { 2636 struct group_filter *gf = (struct group_filter *)mp1->b_rptr; 2637 if (gf->gf_numsrc >= 2) { 2638 offset = sizeof (struct group_filter); 2639 copysize = GROUP_FILTER_SIZE(gf->gf_numsrc) - offset; 2640 } 2641 } else { 2642 struct ip_msfilter *imsf = (struct ip_msfilter *)mp1->b_rptr; 2643 if (imsf->imsf_numsrc >= 2) { 2644 offset = sizeof (struct ip_msfilter); 2645 copysize = IP_MSFILTER_SIZE(imsf->imsf_numsrc) - offset; 2646 } 2647 } 2648 if (copysize > 0) { 2649 mi_copyin_n(q, mp, offset, copysize); 2650 return (0); 2651 } 2652 return (1); 2653 } 2654 2655 /* 2656 * Handle the following optmgmt: 2657 * IP_ADD_MEMBERSHIP must not have joined already 2658 * MCAST_JOIN_GROUP must not have joined already 2659 * IP_BLOCK_SOURCE must have joined already 2660 * MCAST_BLOCK_SOURCE must have joined already 2661 * IP_JOIN_SOURCE_GROUP may have joined already 2662 * MCAST_JOIN_SOURCE_GROUP may have joined already 2663 * 2664 * fmode and src parameters may be used to determine which option is 2665 * being set, as follows (the IP_* and MCAST_* versions of each option 2666 * are functionally equivalent): 2667 * opt fmode src 2668 * IP_ADD_MEMBERSHIP MODE_IS_EXCLUDE INADDR_ANY 2669 * MCAST_JOIN_GROUP MODE_IS_EXCLUDE INADDR_ANY 2670 * IP_BLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 2671 * MCAST_BLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 2672 * IP_JOIN_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 2673 * MCAST_JOIN_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 2674 * 2675 * Changing the filter mode is not allowed; if a matching ilg already 2676 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 2677 * 2678 * Verifies that there is a source address of appropriate scope for 2679 * the group; if not, EADDRNOTAVAIL is returned. 2680 * 2681 * The interface to be used may be identified by an address or by an 2682 * index. A pointer to the index is passed; if it is NULL, use the 2683 * address, otherwise, use the index. 2684 */ 2685 int 2686 ip_opt_add_group(conn_t *connp, boolean_t checkonly, ipaddr_t group, 2687 ipaddr_t ifaddr, uint_t *ifindexp, mcast_record_t fmode, ipaddr_t src, 2688 mblk_t *first_mp) 2689 { 2690 ipif_t *ipif; 2691 ipsq_t *ipsq; 2692 int err = 0; 2693 ill_t *ill; 2694 2695 err = ip_opt_check(connp, group, src, ifaddr, ifindexp, first_mp, 2696 ip_restart_optmgmt, &ipif); 2697 if (err != 0) { 2698 if (err != EINPROGRESS) { 2699 ip1dbg(("ip_opt_add_group: no ipif for group 0x%x, " 2700 "ifaddr 0x%x, ifindex %d\n", ntohl(group), 2701 ntohl(ifaddr), (ifindexp == NULL) ? 0 : *ifindexp)); 2702 } 2703 return (err); 2704 } 2705 ASSERT(ipif != NULL); 2706 2707 ill = ipif->ipif_ill; 2708 /* Operation not supported on a virtual network interface */ 2709 if (IS_VNI(ill)) { 2710 ipif_refrele(ipif); 2711 return (EINVAL); 2712 } 2713 2714 if (checkonly) { 2715 /* 2716 * do not do operation, just pretend to - new T_CHECK 2717 * semantics. The error return case above if encountered 2718 * considered a good enough "check" here. 2719 */ 2720 ipif_refrele(ipif); 2721 return (0); 2722 } 2723 2724 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, ipsq, 2725 NEW_OP); 2726 2727 /* unspecified source addr => no source filtering */ 2728 err = ilg_add(connp, group, ipif, fmode, src); 2729 2730 IPSQ_EXIT(ipsq); 2731 2732 ipif_refrele(ipif); 2733 return (err); 2734 } 2735 2736 /* 2737 * Handle the following optmgmt: 2738 * IPV6_JOIN_GROUP must not have joined already 2739 * MCAST_JOIN_GROUP must not have joined already 2740 * MCAST_BLOCK_SOURCE must have joined already 2741 * MCAST_JOIN_SOURCE_GROUP may have joined already 2742 * 2743 * fmode and src parameters may be used to determine which option is 2744 * being set, as follows (IPV6_JOIN_GROUP and MCAST_JOIN_GROUP options 2745 * are functionally equivalent): 2746 * opt fmode v6src 2747 * IPV6_JOIN_GROUP MODE_IS_EXCLUDE unspecified 2748 * MCAST_JOIN_GROUP MODE_IS_EXCLUDE unspecified 2749 * MCAST_BLOCK_SOURCE MODE_IS_EXCLUDE v6 addr 2750 * MCAST_JOIN_SOURCE_GROUP MODE_IS_INCLUDE v6 addr 2751 * 2752 * Changing the filter mode is not allowed; if a matching ilg already 2753 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 2754 * 2755 * Verifies that there is a source address of appropriate scope for 2756 * the group; if not, EADDRNOTAVAIL is returned. 2757 * 2758 * Handles IPv4-mapped IPv6 multicast addresses by associating them 2759 * with the link-local ipif. Assumes that if v6group is v4-mapped, 2760 * v6src is also v4-mapped. 2761 */ 2762 int 2763 ip_opt_add_group_v6(conn_t *connp, boolean_t checkonly, 2764 const in6_addr_t *v6group, int ifindex, mcast_record_t fmode, 2765 const in6_addr_t *v6src, mblk_t *first_mp) 2766 { 2767 ill_t *ill; 2768 ipif_t *ipif; 2769 char buf[INET6_ADDRSTRLEN]; 2770 ipaddr_t v4group, v4src; 2771 boolean_t isv6; 2772 ipsq_t *ipsq; 2773 int err; 2774 2775 err = ip_opt_check_v6(connp, v6group, &v4group, v6src, &v4src, &isv6, 2776 ifindex, first_mp, ip_restart_optmgmt, &ill, &ipif); 2777 if (err != 0) { 2778 if (err != EINPROGRESS) { 2779 ip1dbg(("ip_opt_add_group_v6: no ill for group %s/" 2780 "index %d\n", inet_ntop(AF_INET6, v6group, buf, 2781 sizeof (buf)), ifindex)); 2782 } 2783 return (err); 2784 } 2785 ASSERT((!isv6 && ipif != NULL) || (isv6 && ill != NULL)); 2786 2787 /* operation is not supported on the virtual network interface */ 2788 if (isv6) { 2789 if (IS_VNI(ill)) { 2790 ill_refrele(ill); 2791 return (EINVAL); 2792 } 2793 } else { 2794 if (IS_VNI(ipif->ipif_ill)) { 2795 ipif_refrele(ipif); 2796 return (EINVAL); 2797 } 2798 } 2799 2800 if (checkonly) { 2801 /* 2802 * do not do operation, just pretend to - new T_CHECK 2803 * semantics. The error return case above if encountered 2804 * considered a good enough "check" here. 2805 */ 2806 if (isv6) 2807 ill_refrele(ill); 2808 else 2809 ipif_refrele(ipif); 2810 return (0); 2811 } 2812 2813 if (!isv6) { 2814 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, 2815 ipsq, NEW_OP); 2816 err = ilg_add(connp, v4group, ipif, fmode, v4src); 2817 IPSQ_EXIT(ipsq); 2818 ipif_refrele(ipif); 2819 } else { 2820 IPSQ_ENTER_ILL(ill, connp, first_mp, ip_restart_optmgmt, 2821 ipsq, NEW_OP); 2822 err = ilg_add_v6(connp, v6group, ill, fmode, v6src); 2823 IPSQ_EXIT(ipsq); 2824 ill_refrele(ill); 2825 } 2826 2827 return (err); 2828 } 2829 2830 static int 2831 ip_opt_delete_group_excl(conn_t *connp, ipaddr_t group, ipif_t *ipif, 2832 mcast_record_t fmode, ipaddr_t src) 2833 { 2834 ilg_t *ilg; 2835 in6_addr_t v6src; 2836 boolean_t leaving = B_FALSE; 2837 2838 ASSERT(IAM_WRITER_IPIF(ipif)); 2839 2840 /* 2841 * The ilg is valid only while we hold the conn lock. Once we drop 2842 * the lock, another thread can locate another ilg on this connp, 2843 * but on a different ipif, and delete it, and cause the ilg array 2844 * to be reallocated and copied. Hence do the ilg_delete before 2845 * dropping the lock. 2846 */ 2847 mutex_enter(&connp->conn_lock); 2848 ilg = ilg_lookup_ipif(connp, group, ipif); 2849 if ((ilg == NULL) || (ilg->ilg_flags & ILG_DELETED)) { 2850 mutex_exit(&connp->conn_lock); 2851 return (EADDRNOTAVAIL); 2852 } 2853 2854 /* 2855 * Decide if we're actually deleting the ilg or just removing a 2856 * source filter address; if just removing an addr, make sure we 2857 * aren't trying to change the filter mode, and that the addr is 2858 * actually in our filter list already. If we're removing the 2859 * last src in an include list, just delete the ilg. 2860 */ 2861 if (src == INADDR_ANY) { 2862 v6src = ipv6_all_zeros; 2863 leaving = B_TRUE; 2864 } else { 2865 int err = 0; 2866 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 2867 if (fmode != ilg->ilg_fmode) 2868 err = EINVAL; 2869 else if (ilg->ilg_filter == NULL || 2870 !list_has_addr(ilg->ilg_filter, &v6src)) 2871 err = EADDRNOTAVAIL; 2872 if (err != 0) { 2873 mutex_exit(&connp->conn_lock); 2874 return (err); 2875 } 2876 if (fmode == MODE_IS_INCLUDE && 2877 ilg->ilg_filter->sl_numsrc == 1) { 2878 v6src = ipv6_all_zeros; 2879 leaving = B_TRUE; 2880 } 2881 } 2882 2883 ilg_delete(connp, ilg, &v6src); 2884 mutex_exit(&connp->conn_lock); 2885 2886 (void) ip_delmulti(group, ipif, B_FALSE, leaving); 2887 return (0); 2888 } 2889 2890 static int 2891 ip_opt_delete_group_excl_v6(conn_t *connp, const in6_addr_t *v6group, 2892 ill_t *ill, mcast_record_t fmode, const in6_addr_t *v6src) 2893 { 2894 ilg_t *ilg; 2895 ill_t *ilg_ill; 2896 uint_t ilg_orig_ifindex; 2897 boolean_t leaving = B_TRUE; 2898 2899 ASSERT(IAM_WRITER_ILL(ill)); 2900 2901 /* 2902 * Use the index that we originally used to join. We can't 2903 * use the ill directly because ilg_ill could point to 2904 * a new ill if things have moved. 2905 */ 2906 mutex_enter(&connp->conn_lock); 2907 ilg = ilg_lookup_ill_index_v6(connp, v6group, 2908 ill->ill_phyint->phyint_ifindex); 2909 if ((ilg == NULL) || (ilg->ilg_flags & ILG_DELETED)) { 2910 mutex_exit(&connp->conn_lock); 2911 return (EADDRNOTAVAIL); 2912 } 2913 2914 /* 2915 * Decide if we're actually deleting the ilg or just removing a 2916 * source filter address; if just removing an addr, make sure we 2917 * aren't trying to change the filter mode, and that the addr is 2918 * actually in our filter list already. If we're removing the 2919 * last src in an include list, just delete the ilg. 2920 */ 2921 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 2922 int err = 0; 2923 if (fmode != ilg->ilg_fmode) 2924 err = EINVAL; 2925 else if (ilg->ilg_filter == NULL || 2926 !list_has_addr(ilg->ilg_filter, v6src)) 2927 err = EADDRNOTAVAIL; 2928 if (err != 0) { 2929 mutex_exit(&connp->conn_lock); 2930 return (err); 2931 } 2932 if (fmode == MODE_IS_INCLUDE && 2933 ilg->ilg_filter->sl_numsrc == 1) 2934 v6src = NULL; 2935 else 2936 leaving = B_FALSE; 2937 } 2938 2939 ilg_ill = ilg->ilg_ill; 2940 ilg_orig_ifindex = ilg->ilg_orig_ifindex; 2941 ilg_delete(connp, ilg, v6src); 2942 mutex_exit(&connp->conn_lock); 2943 (void) ip_delmulti_v6(v6group, ilg_ill, ilg_orig_ifindex, 2944 connp->conn_zoneid, B_FALSE, leaving); 2945 2946 return (0); 2947 } 2948 2949 /* 2950 * Handle the following optmgmt: 2951 * IP_DROP_MEMBERSHIP will leave 2952 * MCAST_LEAVE_GROUP will leave 2953 * IP_UNBLOCK_SOURCE will not leave 2954 * MCAST_UNBLOCK_SOURCE will not leave 2955 * IP_LEAVE_SOURCE_GROUP may leave (if leaving last source) 2956 * MCAST_LEAVE_SOURCE_GROUP may leave (if leaving last source) 2957 * 2958 * fmode and src parameters may be used to determine which option is 2959 * being set, as follows (the IP_* and MCAST_* versions of each option 2960 * are functionally equivalent): 2961 * opt fmode src 2962 * IP_DROP_MEMBERSHIP MODE_IS_INCLUDE INADDR_ANY 2963 * MCAST_LEAVE_GROUP MODE_IS_INCLUDE INADDR_ANY 2964 * IP_UNBLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 2965 * MCAST_UNBLOCK_SOURCE MODE_IS_EXCLUDE v4 addr 2966 * IP_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 2967 * MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE v4 addr 2968 * 2969 * Changing the filter mode is not allowed; if a matching ilg already 2970 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 2971 * 2972 * The interface to be used may be identified by an address or by an 2973 * index. A pointer to the index is passed; if it is NULL, use the 2974 * address, otherwise, use the index. 2975 */ 2976 int 2977 ip_opt_delete_group(conn_t *connp, boolean_t checkonly, ipaddr_t group, 2978 ipaddr_t ifaddr, uint_t *ifindexp, mcast_record_t fmode, ipaddr_t src, 2979 mblk_t *first_mp) 2980 { 2981 ipif_t *ipif; 2982 ipsq_t *ipsq; 2983 int err; 2984 ill_t *ill; 2985 2986 err = ip_opt_check(connp, group, src, ifaddr, ifindexp, first_mp, 2987 ip_restart_optmgmt, &ipif); 2988 if (err != 0) { 2989 if (err != EINPROGRESS) { 2990 ip1dbg(("ip_opt_delete_group: no ipif for group " 2991 "0x%x, ifaddr 0x%x\n", 2992 (int)ntohl(group), (int)ntohl(ifaddr))); 2993 } 2994 return (err); 2995 } 2996 ASSERT(ipif != NULL); 2997 2998 ill = ipif->ipif_ill; 2999 /* Operation not supported on a virtual network interface */ 3000 if (IS_VNI(ill)) { 3001 ipif_refrele(ipif); 3002 return (EINVAL); 3003 } 3004 3005 if (checkonly) { 3006 /* 3007 * do not do operation, just pretend to - new T_CHECK 3008 * semantics. The error return case above if encountered 3009 * considered a good enough "check" here. 3010 */ 3011 ipif_refrele(ipif); 3012 return (0); 3013 } 3014 3015 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, ipsq, 3016 NEW_OP); 3017 err = ip_opt_delete_group_excl(connp, group, ipif, fmode, src); 3018 IPSQ_EXIT(ipsq); 3019 3020 ipif_refrele(ipif); 3021 return (err); 3022 } 3023 3024 /* 3025 * Handle the following optmgmt: 3026 * IPV6_LEAVE_GROUP will leave 3027 * MCAST_LEAVE_GROUP will leave 3028 * MCAST_UNBLOCK_SOURCE will not leave 3029 * MCAST_LEAVE_SOURCE_GROUP may leave (if leaving last source) 3030 * 3031 * fmode and src parameters may be used to determine which option is 3032 * being set, as follows (IPV6_LEAVE_GROUP and MCAST_LEAVE_GROUP options 3033 * are functionally equivalent): 3034 * opt fmode v6src 3035 * IPV6_LEAVE_GROUP MODE_IS_INCLUDE unspecified 3036 * MCAST_LEAVE_GROUP MODE_IS_INCLUDE unspecified 3037 * MCAST_UNBLOCK_SOURCE MODE_IS_EXCLUDE v6 addr 3038 * MCAST_LEAVE_SOURCE_GROUP MODE_IS_INCLUDE v6 addr 3039 * 3040 * Changing the filter mode is not allowed; if a matching ilg already 3041 * exists and fmode != ilg->ilg_fmode, EINVAL is returned. 3042 * 3043 * Handles IPv4-mapped IPv6 multicast addresses by associating them 3044 * with the link-local ipif. Assumes that if v6group is v4-mapped, 3045 * v6src is also v4-mapped. 3046 */ 3047 int 3048 ip_opt_delete_group_v6(conn_t *connp, boolean_t checkonly, 3049 const in6_addr_t *v6group, int ifindex, mcast_record_t fmode, 3050 const in6_addr_t *v6src, mblk_t *first_mp) 3051 { 3052 ill_t *ill; 3053 ipif_t *ipif; 3054 char buf[INET6_ADDRSTRLEN]; 3055 ipaddr_t v4group, v4src; 3056 boolean_t isv6; 3057 ipsq_t *ipsq; 3058 int err; 3059 3060 err = ip_opt_check_v6(connp, v6group, &v4group, v6src, &v4src, &isv6, 3061 ifindex, first_mp, ip_restart_optmgmt, &ill, &ipif); 3062 if (err != 0) { 3063 if (err != EINPROGRESS) { 3064 ip1dbg(("ip_opt_delete_group_v6: no ill for group %s/" 3065 "index %d\n", inet_ntop(AF_INET6, v6group, buf, 3066 sizeof (buf)), ifindex)); 3067 } 3068 return (err); 3069 } 3070 ASSERT((isv6 && ill != NULL) || (!isv6 && ipif != NULL)); 3071 3072 /* operation is not supported on the virtual network interface */ 3073 if (isv6) { 3074 if (IS_VNI(ill)) { 3075 ill_refrele(ill); 3076 return (EINVAL); 3077 } 3078 } else { 3079 if (IS_VNI(ipif->ipif_ill)) { 3080 ipif_refrele(ipif); 3081 return (EINVAL); 3082 } 3083 } 3084 3085 if (checkonly) { 3086 /* 3087 * do not do operation, just pretend to - new T_CHECK 3088 * semantics. The error return case above if encountered 3089 * considered a good enough "check" here. 3090 */ 3091 if (isv6) 3092 ill_refrele(ill); 3093 else 3094 ipif_refrele(ipif); 3095 return (0); 3096 } 3097 3098 if (!isv6) { 3099 IPSQ_ENTER_IPIF(ipif, connp, first_mp, ip_restart_optmgmt, 3100 ipsq, NEW_OP); 3101 err = ip_opt_delete_group_excl(connp, v4group, ipif, fmode, 3102 v4src); 3103 IPSQ_EXIT(ipsq); 3104 ipif_refrele(ipif); 3105 } else { 3106 IPSQ_ENTER_ILL(ill, connp, first_mp, ip_restart_optmgmt, 3107 ipsq, NEW_OP); 3108 err = ip_opt_delete_group_excl_v6(connp, v6group, ill, fmode, 3109 v6src); 3110 IPSQ_EXIT(ipsq); 3111 ill_refrele(ill); 3112 } 3113 3114 return (err); 3115 } 3116 3117 /* 3118 * Group mgmt for upper conn that passes things down 3119 * to the interface multicast list (and DLPI) 3120 * These routines can handle new style options that specify an interface name 3121 * as opposed to an interface address (needed for general handling of 3122 * unnumbered interfaces.) 3123 */ 3124 3125 /* 3126 * Add a group to an upper conn group data structure and pass things down 3127 * to the interface multicast list (and DLPI) 3128 */ 3129 static int 3130 ilg_add(conn_t *connp, ipaddr_t group, ipif_t *ipif, mcast_record_t fmode, 3131 ipaddr_t src) 3132 { 3133 int error = 0; 3134 ill_t *ill; 3135 ilg_t *ilg; 3136 ilg_stat_t ilgstat; 3137 slist_t *new_filter = NULL; 3138 int new_fmode; 3139 3140 ASSERT(IAM_WRITER_IPIF(ipif)); 3141 3142 ill = ipif->ipif_ill; 3143 3144 if (!(ill->ill_flags & ILLF_MULTICAST)) 3145 return (EADDRNOTAVAIL); 3146 3147 /* 3148 * conn_ilg[] is protected by conn_lock. Need to hold the conn_lock 3149 * to walk the conn_ilg[] list in ilg_lookup_ipif(); also needed to 3150 * serialize 2 threads doing join (sock, group1, hme0:0) and 3151 * (sock, group2, hme1:0) where hme0 and hme1 map to different ipsqs, 3152 * but both operations happen on the same conn. 3153 */ 3154 mutex_enter(&connp->conn_lock); 3155 ilg = ilg_lookup_ipif(connp, group, ipif); 3156 3157 /* 3158 * Depending on the option we're handling, may or may not be okay 3159 * if group has already been added. Figure out our rules based 3160 * on fmode and src params. Also make sure there's enough room 3161 * in the filter if we're adding a source to an existing filter. 3162 */ 3163 if (src == INADDR_ANY) { 3164 /* we're joining for all sources, must not have joined */ 3165 if (ilg != NULL) 3166 error = EADDRINUSE; 3167 } else { 3168 if (fmode == MODE_IS_EXCLUDE) { 3169 /* (excl {addr}) => block source, must have joined */ 3170 if (ilg == NULL) 3171 error = EADDRNOTAVAIL; 3172 } 3173 /* (incl {addr}) => join source, may have joined */ 3174 3175 if (ilg != NULL && 3176 SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE) 3177 error = ENOBUFS; 3178 } 3179 if (error != 0) { 3180 mutex_exit(&connp->conn_lock); 3181 return (error); 3182 } 3183 3184 ASSERT(!(ipif->ipif_state_flags & IPIF_CONDEMNED)); 3185 3186 /* 3187 * Alloc buffer to copy new state into (see below) before 3188 * we make any changes, so we can bail if it fails. 3189 */ 3190 if ((new_filter = l_alloc()) == NULL) { 3191 mutex_exit(&connp->conn_lock); 3192 return (ENOMEM); 3193 } 3194 3195 if (ilg == NULL) { 3196 ilgstat = ILGSTAT_NEW; 3197 if ((ilg = conn_ilg_alloc(connp)) == NULL) { 3198 mutex_exit(&connp->conn_lock); 3199 l_free(new_filter); 3200 return (ENOMEM); 3201 } 3202 if (src != INADDR_ANY) { 3203 ilg->ilg_filter = l_alloc(); 3204 if (ilg->ilg_filter == NULL) { 3205 ilg_delete(connp, ilg, NULL); 3206 mutex_exit(&connp->conn_lock); 3207 l_free(new_filter); 3208 return (ENOMEM); 3209 } 3210 ilg->ilg_filter->sl_numsrc = 1; 3211 IN6_IPADDR_TO_V4MAPPED(src, 3212 &ilg->ilg_filter->sl_addr[0]); 3213 } 3214 if (group == INADDR_ANY) { 3215 ilg->ilg_v6group = ipv6_all_zeros; 3216 } else { 3217 IN6_IPADDR_TO_V4MAPPED(group, &ilg->ilg_v6group); 3218 } 3219 ilg->ilg_ipif = ipif; 3220 ilg->ilg_ill = NULL; 3221 ilg->ilg_orig_ifindex = 0; 3222 ilg->ilg_fmode = fmode; 3223 } else { 3224 int index; 3225 in6_addr_t v6src; 3226 ilgstat = ILGSTAT_CHANGE; 3227 if (ilg->ilg_fmode != fmode || src == INADDR_ANY) { 3228 mutex_exit(&connp->conn_lock); 3229 l_free(new_filter); 3230 return (EINVAL); 3231 } 3232 if (ilg->ilg_filter == NULL) { 3233 ilg->ilg_filter = l_alloc(); 3234 if (ilg->ilg_filter == NULL) { 3235 mutex_exit(&connp->conn_lock); 3236 l_free(new_filter); 3237 return (ENOMEM); 3238 } 3239 } 3240 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 3241 if (list_has_addr(ilg->ilg_filter, &v6src)) { 3242 mutex_exit(&connp->conn_lock); 3243 l_free(new_filter); 3244 return (EADDRNOTAVAIL); 3245 } 3246 index = ilg->ilg_filter->sl_numsrc++; 3247 ilg->ilg_filter->sl_addr[index] = v6src; 3248 } 3249 3250 /* 3251 * Save copy of ilg's filter state to pass to other functions, 3252 * so we can release conn_lock now. 3253 */ 3254 new_fmode = ilg->ilg_fmode; 3255 l_copy(ilg->ilg_filter, new_filter); 3256 3257 mutex_exit(&connp->conn_lock); 3258 3259 error = ip_addmulti(group, ipif, ilgstat, new_fmode, new_filter); 3260 if (error != 0) { 3261 /* 3262 * Need to undo what we did before calling ip_addmulti()! 3263 * Must look up the ilg again since we've not been holding 3264 * conn_lock. 3265 */ 3266 in6_addr_t v6src; 3267 if (ilgstat == ILGSTAT_NEW) 3268 v6src = ipv6_all_zeros; 3269 else 3270 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 3271 mutex_enter(&connp->conn_lock); 3272 ilg = ilg_lookup_ipif(connp, group, ipif); 3273 ASSERT(ilg != NULL); 3274 ilg_delete(connp, ilg, &v6src); 3275 mutex_exit(&connp->conn_lock); 3276 l_free(new_filter); 3277 return (error); 3278 } 3279 3280 l_free(new_filter); 3281 return (0); 3282 } 3283 3284 static int 3285 ilg_add_v6(conn_t *connp, const in6_addr_t *v6group, ill_t *ill, 3286 mcast_record_t fmode, const in6_addr_t *v6src) 3287 { 3288 int error = 0; 3289 int orig_ifindex; 3290 ilg_t *ilg; 3291 ilg_stat_t ilgstat; 3292 slist_t *new_filter = NULL; 3293 int new_fmode; 3294 3295 ASSERT(IAM_WRITER_ILL(ill)); 3296 3297 if (!(ill->ill_flags & ILLF_MULTICAST)) 3298 return (EADDRNOTAVAIL); 3299 3300 /* 3301 * conn_lock protects the ilg list. Serializes 2 threads doing 3302 * join (sock, group1, hme0) and (sock, group2, hme1) where hme0 3303 * and hme1 map to different ipsq's, but both operations happen 3304 * on the same conn. 3305 */ 3306 mutex_enter(&connp->conn_lock); 3307 3308 /* 3309 * Use the ifindex to do the lookup. We can't use the ill 3310 * directly because ilg_ill could point to a different ill if 3311 * things have moved. 3312 */ 3313 orig_ifindex = ill->ill_phyint->phyint_ifindex; 3314 ilg = ilg_lookup_ill_index_v6(connp, v6group, orig_ifindex); 3315 3316 /* 3317 * Depending on the option we're handling, may or may not be okay 3318 * if group has already been added. Figure out our rules based 3319 * on fmode and src params. Also make sure there's enough room 3320 * in the filter if we're adding a source to an existing filter. 3321 */ 3322 if (IN6_IS_ADDR_UNSPECIFIED(v6src)) { 3323 /* we're joining for all sources, must not have joined */ 3324 if (ilg != NULL) 3325 error = EADDRINUSE; 3326 } else { 3327 if (fmode == MODE_IS_EXCLUDE) { 3328 /* (excl {addr}) => block source, must have joined */ 3329 if (ilg == NULL) 3330 error = EADDRNOTAVAIL; 3331 } 3332 /* (incl {addr}) => join source, may have joined */ 3333 3334 if (ilg != NULL && 3335 SLIST_CNT(ilg->ilg_filter) == MAX_FILTER_SIZE) 3336 error = ENOBUFS; 3337 } 3338 if (error != 0) { 3339 mutex_exit(&connp->conn_lock); 3340 return (error); 3341 } 3342 3343 /* 3344 * Alloc buffer to copy new state into (see below) before 3345 * we make any changes, so we can bail if it fails. 3346 */ 3347 if ((new_filter = l_alloc()) == NULL) { 3348 mutex_exit(&connp->conn_lock); 3349 return (ENOMEM); 3350 } 3351 3352 if (ilg == NULL) { 3353 if ((ilg = conn_ilg_alloc(connp)) == NULL) { 3354 mutex_exit(&connp->conn_lock); 3355 l_free(new_filter); 3356 return (ENOMEM); 3357 } 3358 if (!IN6_IS_ADDR_UNSPECIFIED(v6src)) { 3359 ilg->ilg_filter = l_alloc(); 3360 if (ilg->ilg_filter == NULL) { 3361 ilg_delete(connp, ilg, NULL); 3362 mutex_exit(&connp->conn_lock); 3363 l_free(new_filter); 3364 return (ENOMEM); 3365 } 3366 ilg->ilg_filter->sl_numsrc = 1; 3367 ilg->ilg_filter->sl_addr[0] = *v6src; 3368 } 3369 ilgstat = ILGSTAT_NEW; 3370 ilg->ilg_v6group = *v6group; 3371 ilg->ilg_fmode = fmode; 3372 ilg->ilg_ipif = NULL; 3373 /* 3374 * Choose our target ill to join on. This might be different 3375 * from the ill we've been given if it's currently down and 3376 * part of a group. 3377 * 3378 * new ill is not refheld; we are writer. 3379 */ 3380 ill = ip_choose_multi_ill(ill, v6group); 3381 ASSERT(!(ill->ill_state_flags & ILL_CONDEMNED)); 3382 ilg->ilg_ill = ill; 3383 /* 3384 * Remember the orig_ifindex that we joined on, so that we 3385 * can successfully delete them later on and also search 3386 * for duplicates if the application wants to join again. 3387 */ 3388 ilg->ilg_orig_ifindex = orig_ifindex; 3389 } else { 3390 int index; 3391 if (ilg->ilg_fmode != fmode || IN6_IS_ADDR_UNSPECIFIED(v6src)) { 3392 mutex_exit(&connp->conn_lock); 3393 l_free(new_filter); 3394 return (EINVAL); 3395 } 3396 if (ilg->ilg_filter == NULL) { 3397 ilg->ilg_filter = l_alloc(); 3398 if (ilg->ilg_filter == NULL) { 3399 mutex_exit(&connp->conn_lock); 3400 l_free(new_filter); 3401 return (ENOMEM); 3402 } 3403 } 3404 if (list_has_addr(ilg->ilg_filter, v6src)) { 3405 mutex_exit(&connp->conn_lock); 3406 l_free(new_filter); 3407 return (EADDRNOTAVAIL); 3408 } 3409 ilgstat = ILGSTAT_CHANGE; 3410 index = ilg->ilg_filter->sl_numsrc++; 3411 ilg->ilg_filter->sl_addr[index] = *v6src; 3412 /* 3413 * The current ill might be different from the one we were 3414 * asked to join on (if failover has occurred); we should 3415 * join on the ill stored in the ilg. The original ill 3416 * is noted in ilg_orig_ifindex, which matched our request. 3417 */ 3418 ill = ilg->ilg_ill; 3419 } 3420 3421 /* 3422 * Save copy of ilg's filter state to pass to other functions, 3423 * so we can release conn_lock now. 3424 */ 3425 new_fmode = ilg->ilg_fmode; 3426 l_copy(ilg->ilg_filter, new_filter); 3427 3428 mutex_exit(&connp->conn_lock); 3429 3430 /* 3431 * Now update the ill. We wait to do this until after the ilg 3432 * has been updated because we need to update the src filter 3433 * info for the ill, which involves looking at the status of 3434 * all the ilgs associated with this group/interface pair. 3435 */ 3436 error = ip_addmulti_v6(v6group, ill, orig_ifindex, connp->conn_zoneid, 3437 ilgstat, new_fmode, new_filter); 3438 if (error != 0) { 3439 /* 3440 * But because we waited, we have to undo the ilg update 3441 * if ip_addmulti_v6() fails. We also must lookup ilg 3442 * again, since we've not been holding conn_lock. 3443 */ 3444 in6_addr_t delsrc = 3445 (ilgstat == ILGSTAT_NEW) ? ipv6_all_zeros : *v6src; 3446 mutex_enter(&connp->conn_lock); 3447 ilg = ilg_lookup_ill_index_v6(connp, v6group, orig_ifindex); 3448 ASSERT(ilg != NULL); 3449 ilg_delete(connp, ilg, &delsrc); 3450 mutex_exit(&connp->conn_lock); 3451 l_free(new_filter); 3452 return (error); 3453 } 3454 3455 l_free(new_filter); 3456 3457 return (0); 3458 } 3459 3460 /* 3461 * Find an IPv4 ilg matching group, ill and source 3462 */ 3463 ilg_t * 3464 ilg_lookup_ill_withsrc(conn_t *connp, ipaddr_t group, ipaddr_t src, ill_t *ill) 3465 { 3466 in6_addr_t v6group, v6src; 3467 int i; 3468 boolean_t isinlist; 3469 ilg_t *ilg; 3470 ipif_t *ipif; 3471 ill_t *ilg_ill; 3472 3473 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3474 3475 /* 3476 * INADDR_ANY is represented as the IPv6 unspecified addr. 3477 */ 3478 if (group == INADDR_ANY) 3479 v6group = ipv6_all_zeros; 3480 else 3481 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 3482 3483 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3484 /* ilg_ipif is NULL for v6; skip them */ 3485 ilg = &connp->conn_ilg[i]; 3486 if ((ipif = ilg->ilg_ipif) == NULL) 3487 continue; 3488 ASSERT(ilg->ilg_ill == NULL); 3489 ilg_ill = ipif->ipif_ill; 3490 ASSERT(!ilg_ill->ill_isv6); 3491 if (ilg_ill == ill && 3492 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, &v6group)) { 3493 if (SLIST_IS_EMPTY(ilg->ilg_filter)) { 3494 /* no source filter, so this is a match */ 3495 return (ilg); 3496 } 3497 break; 3498 } 3499 } 3500 if (i == connp->conn_ilg_inuse) 3501 return (NULL); 3502 3503 /* 3504 * we have an ilg with matching ill and group; but 3505 * the ilg has a source list that we must check. 3506 */ 3507 IN6_IPADDR_TO_V4MAPPED(src, &v6src); 3508 isinlist = B_FALSE; 3509 for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) { 3510 if (IN6_ARE_ADDR_EQUAL(&v6src, &ilg->ilg_filter->sl_addr[i])) { 3511 isinlist = B_TRUE; 3512 break; 3513 } 3514 } 3515 3516 if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) || 3517 (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE)) 3518 return (ilg); 3519 3520 return (NULL); 3521 } 3522 3523 /* 3524 * Find an IPv6 ilg matching group, ill, and source 3525 */ 3526 ilg_t * 3527 ilg_lookup_ill_withsrc_v6(conn_t *connp, const in6_addr_t *v6group, 3528 const in6_addr_t *v6src, ill_t *ill) 3529 { 3530 int i; 3531 boolean_t isinlist; 3532 ilg_t *ilg; 3533 ill_t *ilg_ill; 3534 3535 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3536 3537 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3538 ilg = &connp->conn_ilg[i]; 3539 if ((ilg_ill = ilg->ilg_ill) == NULL) 3540 continue; 3541 ASSERT(ilg->ilg_ipif == NULL); 3542 ASSERT(ilg_ill->ill_isv6); 3543 if (ilg_ill == ill && 3544 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) { 3545 if (SLIST_IS_EMPTY(ilg->ilg_filter)) { 3546 /* no source filter, so this is a match */ 3547 return (ilg); 3548 } 3549 break; 3550 } 3551 } 3552 if (i == connp->conn_ilg_inuse) 3553 return (NULL); 3554 3555 /* 3556 * we have an ilg with matching ill and group; but 3557 * the ilg has a source list that we must check. 3558 */ 3559 isinlist = B_FALSE; 3560 for (i = 0; i < ilg->ilg_filter->sl_numsrc; i++) { 3561 if (IN6_ARE_ADDR_EQUAL(v6src, &ilg->ilg_filter->sl_addr[i])) { 3562 isinlist = B_TRUE; 3563 break; 3564 } 3565 } 3566 3567 if ((isinlist && ilg->ilg_fmode == MODE_IS_INCLUDE) || 3568 (!isinlist && ilg->ilg_fmode == MODE_IS_EXCLUDE)) 3569 return (ilg); 3570 3571 return (NULL); 3572 } 3573 3574 /* 3575 * Get the ilg whose ilg_orig_ifindex is associated with ifindex. 3576 * This is useful when the interface fails and we have moved 3577 * to a new ill, but still would like to locate using the index 3578 * that we originally used to join. Used only for IPv6 currently. 3579 */ 3580 static ilg_t * 3581 ilg_lookup_ill_index_v6(conn_t *connp, const in6_addr_t *v6group, int ifindex) 3582 { 3583 ilg_t *ilg; 3584 int i; 3585 3586 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3587 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3588 ilg = &connp->conn_ilg[i]; 3589 /* ilg_ill is NULL for V4. Skip them */ 3590 if (ilg->ilg_ill == NULL) 3591 continue; 3592 /* ilg_ipif is NULL for V6 */ 3593 ASSERT(ilg->ilg_ipif == NULL); 3594 ASSERT(ilg->ilg_orig_ifindex != 0); 3595 if (IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group) && 3596 ilg->ilg_orig_ifindex == ifindex) { 3597 return (ilg); 3598 } 3599 } 3600 return (NULL); 3601 } 3602 3603 /* 3604 * Find an IPv6 ilg matching group and ill 3605 */ 3606 ilg_t * 3607 ilg_lookup_ill_v6(conn_t *connp, const in6_addr_t *v6group, ill_t *ill) 3608 { 3609 ilg_t *ilg; 3610 int i; 3611 ill_t *mem_ill; 3612 3613 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3614 3615 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3616 ilg = &connp->conn_ilg[i]; 3617 if ((mem_ill = ilg->ilg_ill) == NULL) 3618 continue; 3619 ASSERT(ilg->ilg_ipif == NULL); 3620 ASSERT(mem_ill->ill_isv6); 3621 if (mem_ill == ill && 3622 IN6_ARE_ADDR_EQUAL(&ilg->ilg_v6group, v6group)) 3623 return (ilg); 3624 } 3625 return (NULL); 3626 } 3627 3628 /* 3629 * Find an IPv4 ilg matching group and ipif 3630 */ 3631 static ilg_t * 3632 ilg_lookup_ipif(conn_t *connp, ipaddr_t group, ipif_t *ipif) 3633 { 3634 in6_addr_t v6group; 3635 int i; 3636 3637 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3638 ASSERT(!ipif->ipif_ill->ill_isv6); 3639 3640 if (group == INADDR_ANY) 3641 v6group = ipv6_all_zeros; 3642 else 3643 IN6_IPADDR_TO_V4MAPPED(group, &v6group); 3644 3645 for (i = 0; i < connp->conn_ilg_inuse; i++) { 3646 if (IN6_ARE_ADDR_EQUAL(&connp->conn_ilg[i].ilg_v6group, 3647 &v6group) && 3648 connp->conn_ilg[i].ilg_ipif == ipif) 3649 return (&connp->conn_ilg[i]); 3650 } 3651 return (NULL); 3652 } 3653 3654 /* 3655 * If a source address is passed in (src != NULL and src is not 3656 * unspecified), remove the specified src addr from the given ilg's 3657 * filter list, else delete the ilg. 3658 */ 3659 static void 3660 ilg_delete(conn_t *connp, ilg_t *ilg, const in6_addr_t *src) 3661 { 3662 int i; 3663 3664 ASSERT((ilg->ilg_ipif != NULL) ^ (ilg->ilg_ill != NULL)); 3665 ASSERT(ilg->ilg_ipif == NULL || IAM_WRITER_IPIF(ilg->ilg_ipif)); 3666 ASSERT(ilg->ilg_ill == NULL || IAM_WRITER_ILL(ilg->ilg_ill)); 3667 ASSERT(MUTEX_HELD(&connp->conn_lock)); 3668 ASSERT(!(ilg->ilg_flags & ILG_DELETED)); 3669 3670 if (src == NULL || IN6_IS_ADDR_UNSPECIFIED(src)) { 3671 if (connp->conn_ilg_walker_cnt != 0) { 3672 ilg->ilg_flags |= ILG_DELETED; 3673 return; 3674 } 3675 3676 FREE_SLIST(ilg->ilg_filter); 3677 3678 i = ilg - &connp->conn_ilg[0]; 3679 ASSERT(i >= 0 && i < connp->conn_ilg_inuse); 3680 3681 /* Move other entries up one step */ 3682 connp->conn_ilg_inuse--; 3683 for (; i < connp->conn_ilg_inuse; i++) 3684 connp->conn_ilg[i] = connp->conn_ilg[i+1]; 3685 3686 if (connp->conn_ilg_inuse == 0) { 3687 mi_free((char *)connp->conn_ilg); 3688 connp->conn_ilg = NULL; 3689 cv_broadcast(&connp->conn_refcv); 3690 } 3691 } else { 3692 l_remove(ilg->ilg_filter, src); 3693 } 3694 } 3695 3696 /* 3697 * Called from conn close. No new ilg can be added or removed. 3698 * because CONN_CLOSING has been set by ip_close. ilg_add / ilg_delete 3699 * will return error if conn has started closing. 3700 */ 3701 void 3702 ilg_delete_all(conn_t *connp) 3703 { 3704 int i; 3705 ipif_t *ipif = NULL; 3706 ill_t *ill = NULL; 3707 ilg_t *ilg; 3708 in6_addr_t v6group; 3709 boolean_t success; 3710 ipsq_t *ipsq; 3711 int orig_ifindex; 3712 3713 mutex_enter(&connp->conn_lock); 3714 retry: 3715 ILG_WALKER_HOLD(connp); 3716 for (i = connp->conn_ilg_inuse - 1; i >= 0; ) { 3717 ilg = &connp->conn_ilg[i]; 3718 /* 3719 * Since this walk is not atomic (we drop the 3720 * conn_lock and wait in ipsq_enter) we need 3721 * to check for the ILG_DELETED flag. 3722 */ 3723 if (ilg->ilg_flags & ILG_DELETED) { 3724 /* Go to the next ilg */ 3725 i--; 3726 continue; 3727 } 3728 v6group = ilg->ilg_v6group; 3729 3730 if (IN6_IS_ADDR_V4MAPPED(&v6group)) { 3731 ipif = ilg->ilg_ipif; 3732 ill = ipif->ipif_ill; 3733 } else { 3734 ipif = NULL; 3735 ill = ilg->ilg_ill; 3736 } 3737 /* 3738 * We may not be able to refhold the ill if the ill/ipif 3739 * is changing. But we need to make sure that the ill will 3740 * not vanish. So we just bump up the ill_waiter count. 3741 * If we are unable to do even that, then the ill is closing, 3742 * in which case the unplumb thread will handle the cleanup, 3743 * and we move on to the next ilg. 3744 */ 3745 if (!ill_waiter_inc(ill)) { 3746 /* Go to the next ilg */ 3747 i--; 3748 continue; 3749 } 3750 mutex_exit(&connp->conn_lock); 3751 /* 3752 * To prevent deadlock between ill close which waits inside 3753 * the perimeter, and conn close, ipsq_enter returns error, 3754 * the moment ILL_CONDEMNED is set, in which case ill close 3755 * takes responsibility to cleanup the ilgs. Note that we 3756 * have not yet set condemned flag, otherwise the conn can't 3757 * be refheld for cleanup by those routines and it would be 3758 * a mutual deadlock. 3759 */ 3760 success = ipsq_enter(ill, B_FALSE); 3761 ipsq = ill->ill_phyint->phyint_ipsq; 3762 ill_waiter_dcr(ill); 3763 mutex_enter(&connp->conn_lock); 3764 if (!success) { 3765 /* Go to the next ilg */ 3766 i--; 3767 continue; 3768 } 3769 3770 /* 3771 * Make sure that nothing has changed under. For eg. 3772 * a failover/failback can change ilg_ill while we were 3773 * waiting to become exclusive above 3774 */ 3775 if (IN6_IS_ADDR_V4MAPPED(&v6group)) { 3776 ipif = ilg->ilg_ipif; 3777 ill = ipif->ipif_ill; 3778 } else { 3779 ipif = NULL; 3780 ill = ilg->ilg_ill; 3781 } 3782 if (!IAM_WRITER_ILL(ill) || (ilg->ilg_flags & ILG_DELETED)) { 3783 /* 3784 * The ilg has changed under us probably due 3785 * to a failover or unplumb. Retry on the same ilg. 3786 */ 3787 mutex_exit(&connp->conn_lock); 3788 ipsq_exit(ipsq, B_TRUE, B_TRUE); 3789 mutex_enter(&connp->conn_lock); 3790 continue; 3791 } 3792 v6group = ilg->ilg_v6group; 3793 orig_ifindex = ilg->ilg_orig_ifindex; 3794 ilg_delete(connp, ilg, NULL); 3795 mutex_exit(&connp->conn_lock); 3796 3797 if (ipif != NULL) 3798 (void) ip_delmulti(V4_PART_OF_V6(v6group), ipif, 3799 B_FALSE, B_TRUE); 3800 3801 else 3802 (void) ip_delmulti_v6(&v6group, ill, orig_ifindex, 3803 connp->conn_zoneid, B_FALSE, B_TRUE); 3804 3805 ipsq_exit(ipsq, B_TRUE, B_TRUE); 3806 mutex_enter(&connp->conn_lock); 3807 /* Go to the next ilg */ 3808 i--; 3809 } 3810 ILG_WALKER_RELE(connp); 3811 3812 /* If any ill was skipped above wait and retry */ 3813 if (connp->conn_ilg_inuse != 0) { 3814 cv_wait(&connp->conn_refcv, &connp->conn_lock); 3815 goto retry; 3816 } 3817 mutex_exit(&connp->conn_lock); 3818 } 3819 3820 /* 3821 * Called from ill close by ipcl_walk for clearing conn_ilg and 3822 * conn_multicast_ipif for a given ipif. conn is held by caller. 3823 * Note that ipcl_walk only walks conns that are not yet condemned. 3824 * condemned conns can't be refheld. For this reason, conn must become clean 3825 * first, i.e. it must not refer to any ill/ire/ipif and then only set 3826 * condemned flag. 3827 */ 3828 static void 3829 conn_delete_ipif(conn_t *connp, caddr_t arg) 3830 { 3831 ipif_t *ipif = (ipif_t *)arg; 3832 int i; 3833 char group_buf1[INET6_ADDRSTRLEN]; 3834 char group_buf2[INET6_ADDRSTRLEN]; 3835 ipaddr_t group; 3836 ilg_t *ilg; 3837 3838 /* 3839 * Even though conn_ilg_inuse can change while we are in this loop, 3840 * i.e.ilgs can be created or deleted on this connp, no new ilgs can 3841 * be created or deleted for this connp, on this ill, since this ill 3842 * is the perimeter. So we won't miss any ilg in this cleanup. 3843 */ 3844 mutex_enter(&connp->conn_lock); 3845 3846 /* 3847 * Increment the walker count, so that ilg repacking does not 3848 * occur while we are in the loop. 3849 */ 3850 ILG_WALKER_HOLD(connp); 3851 for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) { 3852 ilg = &connp->conn_ilg[i]; 3853 if (ilg->ilg_ipif != ipif || (ilg->ilg_flags & ILG_DELETED)) 3854 continue; 3855 /* 3856 * ip_close cannot be cleaning this ilg at the same time. 3857 * since it also has to execute in this ill's perimeter which 3858 * we are now holding. Only a clean conn can be condemned. 3859 */ 3860 ASSERT(!(connp->conn_state_flags & CONN_CONDEMNED)); 3861 3862 /* Blow away the membership */ 3863 ip1dbg(("conn_delete_ilg_ipif: %s on %s (%s)\n", 3864 inet_ntop(AF_INET6, &connp->conn_ilg[i].ilg_v6group, 3865 group_buf1, sizeof (group_buf1)), 3866 inet_ntop(AF_INET6, &ipif->ipif_v6lcl_addr, 3867 group_buf2, sizeof (group_buf2)), 3868 ipif->ipif_ill->ill_name)); 3869 3870 /* ilg_ipif is NULL for V6, so we won't be here */ 3871 ASSERT(IN6_IS_ADDR_V4MAPPED(&ilg->ilg_v6group)); 3872 3873 group = V4_PART_OF_V6(ilg->ilg_v6group); 3874 ilg_delete(connp, &connp->conn_ilg[i], NULL); 3875 mutex_exit(&connp->conn_lock); 3876 3877 (void) ip_delmulti(group, ipif, B_FALSE, B_TRUE); 3878 mutex_enter(&connp->conn_lock); 3879 } 3880 3881 /* 3882 * If we are the last walker, need to physically delete the 3883 * ilgs and repack. 3884 */ 3885 ILG_WALKER_RELE(connp); 3886 3887 if (connp->conn_multicast_ipif == ipif) { 3888 /* Revert to late binding */ 3889 connp->conn_multicast_ipif = NULL; 3890 } 3891 mutex_exit(&connp->conn_lock); 3892 3893 conn_delete_ire(connp, (caddr_t)ipif); 3894 } 3895 3896 /* 3897 * Called from ill close by ipcl_walk for clearing conn_ilg and 3898 * conn_multicast_ill for a given ill. conn is held by caller. 3899 * Note that ipcl_walk only walks conns that are not yet condemned. 3900 * condemned conns can't be refheld. For this reason, conn must become clean 3901 * first, i.e. it must not refer to any ill/ire/ipif and then only set 3902 * condemned flag. 3903 */ 3904 static void 3905 conn_delete_ill(conn_t *connp, caddr_t arg) 3906 { 3907 ill_t *ill = (ill_t *)arg; 3908 int i; 3909 char group_buf[INET6_ADDRSTRLEN]; 3910 in6_addr_t v6group; 3911 int orig_ifindex; 3912 ilg_t *ilg; 3913 3914 /* 3915 * Even though conn_ilg_inuse can change while we are in this loop, 3916 * no new ilgs can be created/deleted for this connp, on this 3917 * ill, since this ill is the perimeter. So we won't miss any ilg 3918 * in this cleanup. 3919 */ 3920 mutex_enter(&connp->conn_lock); 3921 3922 /* 3923 * Increment the walker count, so that ilg repacking does not 3924 * occur while we are in the loop. 3925 */ 3926 ILG_WALKER_HOLD(connp); 3927 for (i = connp->conn_ilg_inuse - 1; i >= 0; i--) { 3928 ilg = &connp->conn_ilg[i]; 3929 if ((ilg->ilg_ill == ill) && !(ilg->ilg_flags & ILG_DELETED)) { 3930 /* 3931 * ip_close cannot be cleaning this ilg at the same 3932 * time, since it also has to execute in this ill's 3933 * perimeter which we are now holding. Only a clean 3934 * conn can be condemned. 3935 */ 3936 ASSERT(!(connp->conn_state_flags & CONN_CONDEMNED)); 3937 3938 /* Blow away the membership */ 3939 ip1dbg(("conn_delete_ilg_ill: %s on %s\n", 3940 inet_ntop(AF_INET6, &ilg->ilg_v6group, 3941 group_buf, sizeof (group_buf)), 3942 ill->ill_name)); 3943 3944 v6group = ilg->ilg_v6group; 3945 orig_ifindex = ilg->ilg_orig_ifindex; 3946 ilg_delete(connp, ilg, NULL); 3947 mutex_exit(&connp->conn_lock); 3948 3949 (void) ip_delmulti_v6(&v6group, ill, orig_ifindex, 3950 connp->conn_zoneid, B_FALSE, B_TRUE); 3951 mutex_enter(&connp->conn_lock); 3952 } 3953 } 3954 /* 3955 * If we are the last walker, need to physically delete the 3956 * ilgs and repack. 3957 */ 3958 ILG_WALKER_RELE(connp); 3959 3960 if (connp->conn_multicast_ill == ill) { 3961 /* Revert to late binding */ 3962 connp->conn_multicast_ill = NULL; 3963 connp->conn_orig_multicast_ifindex = 0; 3964 } 3965 mutex_exit(&connp->conn_lock); 3966 } 3967 3968 /* 3969 * Called when an ipif is unplumbed to make sure that there are no 3970 * dangling conn references to that ipif. 3971 * Handles ilg_ipif and conn_multicast_ipif 3972 */ 3973 void 3974 reset_conn_ipif(ipif) 3975 ipif_t *ipif; 3976 { 3977 ipcl_walk(conn_delete_ipif, (caddr_t)ipif); 3978 /* flush the SCTP ire cache for this ipif */ 3979 sctp_ire_cache_flush(ipif); 3980 } 3981 3982 /* 3983 * Called when an ill is unplumbed to make sure that there are no 3984 * dangling conn references to that ill. 3985 * Handles ilg_ill, conn_multicast_ill. 3986 */ 3987 void 3988 reset_conn_ill(ill_t *ill) 3989 { 3990 ipcl_walk(conn_delete_ill, (caddr_t)ill); 3991 } 3992 3993 #ifdef DEBUG 3994 /* 3995 * Walk functions walk all the interfaces in the system to make 3996 * sure that there is no refernece to the ipif or ill that is 3997 * going away. 3998 */ 3999 int 4000 ilm_walk_ill(ill_t *ill) 4001 { 4002 int cnt = 0; 4003 ill_t *till; 4004 ilm_t *ilm; 4005 ill_walk_context_t ctx; 4006 4007 rw_enter(&ill_g_lock, RW_READER); 4008 till = ILL_START_WALK_ALL(&ctx); 4009 for (; till != NULL; till = ill_next(&ctx, till)) { 4010 for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 4011 if (ilm->ilm_ill == ill) { 4012 cnt++; 4013 } 4014 } 4015 } 4016 rw_exit(&ill_g_lock); 4017 4018 return (cnt); 4019 } 4020 4021 /* 4022 * This function is called before the ipif is freed. 4023 */ 4024 int 4025 ilm_walk_ipif(ipif_t *ipif) 4026 { 4027 int cnt = 0; 4028 ill_t *till; 4029 ilm_t *ilm; 4030 ill_walk_context_t ctx; 4031 4032 till = ILL_START_WALK_ALL(&ctx); 4033 for (; till != NULL; till = ill_next(&ctx, till)) { 4034 for (ilm = till->ill_ilm; ilm != NULL; ilm = ilm->ilm_next) { 4035 if (ilm->ilm_ipif == ipif) { 4036 cnt++; 4037 } 4038 } 4039 } 4040 return (cnt); 4041 } 4042 #endif 4043