1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2007-2009 Bruce Simpson. 5 * Copyright (c) 2005 Robert N. M. Watson. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. The name of the author may not be used to endorse or promote 17 * products derived from this software without specific prior written 18 * permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 /* 34 * IPv4 multicast socket, group, and socket option processing module. 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/kernel.h> 43 #include <sys/lock.h> 44 #include <sys/malloc.h> 45 #include <sys/mbuf.h> 46 #include <sys/protosw.h> 47 #include <sys/rmlock.h> 48 #include <sys/socket.h> 49 #include <sys/socketvar.h> 50 #include <sys/protosw.h> 51 #include <sys/sysctl.h> 52 #include <sys/ktr.h> 53 #include <sys/taskqueue.h> 54 #include <sys/tree.h> 55 56 #include <net/if.h> 57 #include <net/if_var.h> 58 #include <net/if_dl.h> 59 #include <net/route.h> 60 #include <net/vnet.h> 61 62 #include <netinet/in.h> 63 #include <netinet/in_systm.h> 64 #include <netinet/in_fib.h> 65 #include <netinet/in_pcb.h> 66 #include <netinet/in_var.h> 67 #include <netinet/ip_var.h> 68 #include <netinet/igmp_var.h> 69 70 #ifndef KTR_IGMPV3 71 #define KTR_IGMPV3 KTR_INET 72 #endif 73 74 #ifndef __SOCKUNION_DECLARED 75 union sockunion { 76 struct sockaddr_storage ss; 77 struct sockaddr sa; 78 struct sockaddr_dl sdl; 79 struct sockaddr_in sin; 80 }; 81 typedef union sockunion sockunion_t; 82 #define __SOCKUNION_DECLARED 83 #endif /* __SOCKUNION_DECLARED */ 84 85 static MALLOC_DEFINE(M_INMFILTER, "in_mfilter", 86 "IPv4 multicast PCB-layer source filter"); 87 static MALLOC_DEFINE(M_IPMADDR, "in_multi", "IPv4 multicast group"); 88 static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "IPv4 multicast options"); 89 static MALLOC_DEFINE(M_IPMSOURCE, "ip_msource", 90 "IPv4 multicast IGMP-layer source filter"); 91 92 /* 93 * Locking: 94 * - Lock order is: Giant, INP_WLOCK, IN_MULTI_LOCK, IGMP_LOCK, IF_ADDR_LOCK. 95 * - The IF_ADDR_LOCK is implicitly taken by inm_lookup() earlier, however 96 * it can be taken by code in net/if.c also. 97 * - ip_moptions and in_mfilter are covered by the INP_WLOCK. 98 * 99 * struct in_multi is covered by IN_MULTI_LOCK. There isn't strictly 100 * any need for in_multi itself to be virtualized -- it is bound to an ifp 101 * anyway no matter what happens. 102 */ 103 struct mtx in_multi_mtx; 104 MTX_SYSINIT(in_multi_mtx, &in_multi_mtx, "in_multi_mtx", MTX_DEF); 105 106 /* 107 * Functions with non-static linkage defined in this file should be 108 * declared in in_var.h: 109 * imo_multi_filter() 110 * in_addmulti() 111 * in_delmulti() 112 * in_joingroup() 113 * in_joingroup_locked() 114 * in_leavegroup() 115 * in_leavegroup_locked() 116 * and ip_var.h: 117 * inp_freemoptions() 118 * inp_getmoptions() 119 * inp_setmoptions() 120 * 121 * XXX: Both carp and pf need to use the legacy (*,G) KPIs in_addmulti() 122 * and in_delmulti(). 123 */ 124 static void imf_commit(struct in_mfilter *); 125 static int imf_get_source(struct in_mfilter *imf, 126 const struct sockaddr_in *psin, 127 struct in_msource **); 128 static struct in_msource * 129 imf_graft(struct in_mfilter *, const uint8_t, 130 const struct sockaddr_in *); 131 static void imf_leave(struct in_mfilter *); 132 static int imf_prune(struct in_mfilter *, const struct sockaddr_in *); 133 static void imf_purge(struct in_mfilter *); 134 static void imf_rollback(struct in_mfilter *); 135 static void imf_reap(struct in_mfilter *); 136 static int imo_grow(struct ip_moptions *); 137 static size_t imo_match_group(const struct ip_moptions *, 138 const struct ifnet *, const struct sockaddr *); 139 static struct in_msource * 140 imo_match_source(const struct ip_moptions *, const size_t, 141 const struct sockaddr *); 142 static void ims_merge(struct ip_msource *ims, 143 const struct in_msource *lims, const int rollback); 144 static int in_getmulti(struct ifnet *, const struct in_addr *, 145 struct in_multi **); 146 static int inm_get_source(struct in_multi *inm, const in_addr_t haddr, 147 const int noalloc, struct ip_msource **pims); 148 #ifdef KTR 149 static int inm_is_ifp_detached(const struct in_multi *); 150 #endif 151 static int inm_merge(struct in_multi *, /*const*/ struct in_mfilter *); 152 static void inm_purge(struct in_multi *); 153 static void inm_reap(struct in_multi *); 154 static struct ip_moptions * 155 inp_findmoptions(struct inpcb *); 156 static void inp_freemoptions_internal(struct ip_moptions *); 157 static void inp_gcmoptions(void *, int); 158 static int inp_get_source_filters(struct inpcb *, struct sockopt *); 159 static int inp_join_group(struct inpcb *, struct sockopt *); 160 static int inp_leave_group(struct inpcb *, struct sockopt *); 161 static struct ifnet * 162 inp_lookup_mcast_ifp(const struct inpcb *, 163 const struct sockaddr_in *, const struct in_addr); 164 static int inp_block_unblock_source(struct inpcb *, struct sockopt *); 165 static int inp_set_multicast_if(struct inpcb *, struct sockopt *); 166 static int inp_set_source_filters(struct inpcb *, struct sockopt *); 167 static int sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS); 168 169 static SYSCTL_NODE(_net_inet_ip, OID_AUTO, mcast, CTLFLAG_RW, 0, 170 "IPv4 multicast"); 171 172 static u_long in_mcast_maxgrpsrc = IP_MAX_GROUP_SRC_FILTER; 173 SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxgrpsrc, 174 CTLFLAG_RWTUN, &in_mcast_maxgrpsrc, 0, 175 "Max source filters per group"); 176 177 static u_long in_mcast_maxsocksrc = IP_MAX_SOCK_SRC_FILTER; 178 SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxsocksrc, 179 CTLFLAG_RWTUN, &in_mcast_maxsocksrc, 0, 180 "Max source filters per socket"); 181 182 int in_mcast_loop = IP_DEFAULT_MULTICAST_LOOP; 183 SYSCTL_INT(_net_inet_ip_mcast, OID_AUTO, loop, CTLFLAG_RWTUN, 184 &in_mcast_loop, 0, "Loopback multicast datagrams by default"); 185 186 static SYSCTL_NODE(_net_inet_ip_mcast, OID_AUTO, filters, 187 CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_ip_mcast_filters, 188 "Per-interface stack-wide source filters"); 189 190 static STAILQ_HEAD(, ip_moptions) imo_gc_list = 191 STAILQ_HEAD_INITIALIZER(imo_gc_list); 192 static struct task imo_gc_task = TASK_INITIALIZER(0, inp_gcmoptions, NULL); 193 194 #ifdef KTR 195 /* 196 * Inline function which wraps assertions for a valid ifp. 197 * The ifnet layer will set the ifma's ifp pointer to NULL if the ifp 198 * is detached. 199 */ 200 static int __inline 201 inm_is_ifp_detached(const struct in_multi *inm) 202 { 203 struct ifnet *ifp; 204 205 KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__)); 206 ifp = inm->inm_ifma->ifma_ifp; 207 if (ifp != NULL) { 208 /* 209 * Sanity check that netinet's notion of ifp is the 210 * same as net's. 211 */ 212 KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__)); 213 } 214 215 return (ifp == NULL); 216 } 217 #endif 218 219 /* 220 * Initialize an in_mfilter structure to a known state at t0, t1 221 * with an empty source filter list. 222 */ 223 static __inline void 224 imf_init(struct in_mfilter *imf, const int st0, const int st1) 225 { 226 memset(imf, 0, sizeof(struct in_mfilter)); 227 RB_INIT(&imf->imf_sources); 228 imf->imf_st[0] = st0; 229 imf->imf_st[1] = st1; 230 } 231 232 /* 233 * Function for looking up an in_multi record for an IPv4 multicast address 234 * on a given interface. ifp must be valid. If no record found, return NULL. 235 * The IN_MULTI_LOCK and IF_ADDR_LOCK on ifp must be held. 236 */ 237 struct in_multi * 238 inm_lookup_locked(struct ifnet *ifp, const struct in_addr ina) 239 { 240 struct ifmultiaddr *ifma; 241 struct in_multi *inm; 242 243 IN_MULTI_LOCK_ASSERT(); 244 IF_ADDR_LOCK_ASSERT(ifp); 245 246 inm = NULL; 247 TAILQ_FOREACH(ifma, &((ifp)->if_multiaddrs), ifma_link) { 248 if (ifma->ifma_addr->sa_family == AF_INET) { 249 inm = (struct in_multi *)ifma->ifma_protospec; 250 if (inm->inm_addr.s_addr == ina.s_addr) 251 break; 252 inm = NULL; 253 } 254 } 255 return (inm); 256 } 257 258 /* 259 * Wrapper for inm_lookup_locked(). 260 * The IF_ADDR_LOCK will be taken on ifp and released on return. 261 */ 262 struct in_multi * 263 inm_lookup(struct ifnet *ifp, const struct in_addr ina) 264 { 265 struct in_multi *inm; 266 267 IN_MULTI_LOCK_ASSERT(); 268 IF_ADDR_RLOCK(ifp); 269 inm = inm_lookup_locked(ifp, ina); 270 IF_ADDR_RUNLOCK(ifp); 271 272 return (inm); 273 } 274 275 /* 276 * Resize the ip_moptions vector to the next power-of-two minus 1. 277 * May be called with locks held; do not sleep. 278 */ 279 static int 280 imo_grow(struct ip_moptions *imo) 281 { 282 struct in_multi **nmships; 283 struct in_multi **omships; 284 struct in_mfilter *nmfilters; 285 struct in_mfilter *omfilters; 286 size_t idx; 287 size_t newmax; 288 size_t oldmax; 289 290 nmships = NULL; 291 nmfilters = NULL; 292 omships = imo->imo_membership; 293 omfilters = imo->imo_mfilters; 294 oldmax = imo->imo_max_memberships; 295 newmax = ((oldmax + 1) * 2) - 1; 296 297 if (newmax <= IP_MAX_MEMBERSHIPS) { 298 nmships = (struct in_multi **)realloc(omships, 299 sizeof(struct in_multi *) * newmax, M_IPMOPTS, M_NOWAIT); 300 nmfilters = (struct in_mfilter *)realloc(omfilters, 301 sizeof(struct in_mfilter) * newmax, M_INMFILTER, M_NOWAIT); 302 if (nmships != NULL && nmfilters != NULL) { 303 /* Initialize newly allocated source filter heads. */ 304 for (idx = oldmax; idx < newmax; idx++) { 305 imf_init(&nmfilters[idx], MCAST_UNDEFINED, 306 MCAST_EXCLUDE); 307 } 308 imo->imo_max_memberships = newmax; 309 imo->imo_membership = nmships; 310 imo->imo_mfilters = nmfilters; 311 } 312 } 313 314 if (nmships == NULL || nmfilters == NULL) { 315 if (nmships != NULL) 316 free(nmships, M_IPMOPTS); 317 if (nmfilters != NULL) 318 free(nmfilters, M_INMFILTER); 319 return (ETOOMANYREFS); 320 } 321 322 return (0); 323 } 324 325 /* 326 * Find an IPv4 multicast group entry for this ip_moptions instance 327 * which matches the specified group, and optionally an interface. 328 * Return its index into the array, or -1 if not found. 329 */ 330 static size_t 331 imo_match_group(const struct ip_moptions *imo, const struct ifnet *ifp, 332 const struct sockaddr *group) 333 { 334 const struct sockaddr_in *gsin; 335 struct in_multi **pinm; 336 int idx; 337 int nmships; 338 339 gsin = (const struct sockaddr_in *)group; 340 341 /* The imo_membership array may be lazy allocated. */ 342 if (imo->imo_membership == NULL || imo->imo_num_memberships == 0) 343 return (-1); 344 345 nmships = imo->imo_num_memberships; 346 pinm = &imo->imo_membership[0]; 347 for (idx = 0; idx < nmships; idx++, pinm++) { 348 if (*pinm == NULL) 349 continue; 350 if ((ifp == NULL || ((*pinm)->inm_ifp == ifp)) && 351 in_hosteq((*pinm)->inm_addr, gsin->sin_addr)) { 352 break; 353 } 354 } 355 if (idx >= nmships) 356 idx = -1; 357 358 return (idx); 359 } 360 361 /* 362 * Find an IPv4 multicast source entry for this imo which matches 363 * the given group index for this socket, and source address. 364 * 365 * NOTE: This does not check if the entry is in-mode, merely if 366 * it exists, which may not be the desired behaviour. 367 */ 368 static struct in_msource * 369 imo_match_source(const struct ip_moptions *imo, const size_t gidx, 370 const struct sockaddr *src) 371 { 372 struct ip_msource find; 373 struct in_mfilter *imf; 374 struct ip_msource *ims; 375 const sockunion_t *psa; 376 377 KASSERT(src->sa_family == AF_INET, ("%s: !AF_INET", __func__)); 378 KASSERT(gidx != -1 && gidx < imo->imo_num_memberships, 379 ("%s: invalid index %d\n", __func__, (int)gidx)); 380 381 /* The imo_mfilters array may be lazy allocated. */ 382 if (imo->imo_mfilters == NULL) 383 return (NULL); 384 imf = &imo->imo_mfilters[gidx]; 385 386 /* Source trees are keyed in host byte order. */ 387 psa = (const sockunion_t *)src; 388 find.ims_haddr = ntohl(psa->sin.sin_addr.s_addr); 389 ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find); 390 391 return ((struct in_msource *)ims); 392 } 393 394 /* 395 * Perform filtering for multicast datagrams on a socket by group and source. 396 * 397 * Returns 0 if a datagram should be allowed through, or various error codes 398 * if the socket was not a member of the group, or the source was muted, etc. 399 */ 400 int 401 imo_multi_filter(const struct ip_moptions *imo, const struct ifnet *ifp, 402 const struct sockaddr *group, const struct sockaddr *src) 403 { 404 size_t gidx; 405 struct in_msource *ims; 406 int mode; 407 408 KASSERT(ifp != NULL, ("%s: null ifp", __func__)); 409 410 gidx = imo_match_group(imo, ifp, group); 411 if (gidx == -1) 412 return (MCAST_NOTGMEMBER); 413 414 /* 415 * Check if the source was included in an (S,G) join. 416 * Allow reception on exclusive memberships by default, 417 * reject reception on inclusive memberships by default. 418 * Exclude source only if an in-mode exclude filter exists. 419 * Include source only if an in-mode include filter exists. 420 * NOTE: We are comparing group state here at IGMP t1 (now) 421 * with socket-layer t0 (since last downcall). 422 */ 423 mode = imo->imo_mfilters[gidx].imf_st[1]; 424 ims = imo_match_source(imo, gidx, src); 425 426 if ((ims == NULL && mode == MCAST_INCLUDE) || 427 (ims != NULL && ims->imsl_st[0] != mode)) 428 return (MCAST_NOTSMEMBER); 429 430 return (MCAST_PASS); 431 } 432 433 /* 434 * Find and return a reference to an in_multi record for (ifp, group), 435 * and bump its reference count. 436 * If one does not exist, try to allocate it, and update link-layer multicast 437 * filters on ifp to listen for group. 438 * Assumes the IN_MULTI lock is held across the call. 439 * Return 0 if successful, otherwise return an appropriate error code. 440 */ 441 static int 442 in_getmulti(struct ifnet *ifp, const struct in_addr *group, 443 struct in_multi **pinm) 444 { 445 struct sockaddr_in gsin; 446 struct ifmultiaddr *ifma; 447 struct in_ifinfo *ii; 448 struct in_multi *inm; 449 int error; 450 451 IN_MULTI_LOCK_ASSERT(); 452 453 ii = (struct in_ifinfo *)ifp->if_afdata[AF_INET]; 454 455 inm = inm_lookup(ifp, *group); 456 if (inm != NULL) { 457 /* 458 * If we already joined this group, just bump the 459 * refcount and return it. 460 */ 461 KASSERT(inm->inm_refcount >= 1, 462 ("%s: bad refcount %d", __func__, inm->inm_refcount)); 463 ++inm->inm_refcount; 464 *pinm = inm; 465 return (0); 466 } 467 468 memset(&gsin, 0, sizeof(gsin)); 469 gsin.sin_family = AF_INET; 470 gsin.sin_len = sizeof(struct sockaddr_in); 471 gsin.sin_addr = *group; 472 473 /* 474 * Check if a link-layer group is already associated 475 * with this network-layer group on the given ifnet. 476 */ 477 error = if_addmulti(ifp, (struct sockaddr *)&gsin, &ifma); 478 if (error != 0) 479 return (error); 480 481 /* XXX ifma_protospec must be covered by IF_ADDR_LOCK */ 482 IF_ADDR_WLOCK(ifp); 483 484 /* 485 * If something other than netinet is occupying the link-layer 486 * group, print a meaningful error message and back out of 487 * the allocation. 488 * Otherwise, bump the refcount on the existing network-layer 489 * group association and return it. 490 */ 491 if (ifma->ifma_protospec != NULL) { 492 inm = (struct in_multi *)ifma->ifma_protospec; 493 #ifdef INVARIANTS 494 KASSERT(ifma->ifma_addr != NULL, ("%s: no ifma_addr", 495 __func__)); 496 KASSERT(ifma->ifma_addr->sa_family == AF_INET, 497 ("%s: ifma not AF_INET", __func__)); 498 KASSERT(inm != NULL, ("%s: no ifma_protospec", __func__)); 499 if (inm->inm_ifma != ifma || inm->inm_ifp != ifp || 500 !in_hosteq(inm->inm_addr, *group)) { 501 char addrbuf[INET_ADDRSTRLEN]; 502 503 panic("%s: ifma %p is inconsistent with %p (%s)", 504 __func__, ifma, inm, inet_ntoa_r(*group, addrbuf)); 505 } 506 #endif 507 ++inm->inm_refcount; 508 *pinm = inm; 509 IF_ADDR_WUNLOCK(ifp); 510 return (0); 511 } 512 513 IF_ADDR_WLOCK_ASSERT(ifp); 514 515 /* 516 * A new in_multi record is needed; allocate and initialize it. 517 * We DO NOT perform an IGMP join as the in_ layer may need to 518 * push an initial source list down to IGMP to support SSM. 519 * 520 * The initial source filter state is INCLUDE, {} as per the RFC. 521 */ 522 inm = malloc(sizeof(*inm), M_IPMADDR, M_NOWAIT | M_ZERO); 523 if (inm == NULL) { 524 IF_ADDR_WUNLOCK(ifp); 525 if_delmulti_ifma(ifma); 526 return (ENOMEM); 527 } 528 inm->inm_addr = *group; 529 inm->inm_ifp = ifp; 530 inm->inm_igi = ii->ii_igmp; 531 inm->inm_ifma = ifma; 532 inm->inm_refcount = 1; 533 inm->inm_state = IGMP_NOT_MEMBER; 534 mbufq_init(&inm->inm_scq, IGMP_MAX_STATE_CHANGES); 535 inm->inm_st[0].iss_fmode = MCAST_UNDEFINED; 536 inm->inm_st[1].iss_fmode = MCAST_UNDEFINED; 537 RB_INIT(&inm->inm_srcs); 538 539 ifma->ifma_protospec = inm; 540 541 *pinm = inm; 542 543 IF_ADDR_WUNLOCK(ifp); 544 return (0); 545 } 546 547 /* 548 * Drop a reference to an in_multi record. 549 * 550 * If the refcount drops to 0, free the in_multi record and 551 * delete the underlying link-layer membership. 552 */ 553 void 554 inm_release_locked(struct in_multi *inm) 555 { 556 struct ifmultiaddr *ifma; 557 558 IN_MULTI_LOCK_ASSERT(); 559 560 CTR2(KTR_IGMPV3, "%s: refcount is %d", __func__, inm->inm_refcount); 561 562 if (--inm->inm_refcount > 0) { 563 CTR2(KTR_IGMPV3, "%s: refcount is now %d", __func__, 564 inm->inm_refcount); 565 return; 566 } 567 568 CTR2(KTR_IGMPV3, "%s: freeing inm %p", __func__, inm); 569 570 ifma = inm->inm_ifma; 571 572 /* XXX this access is not covered by IF_ADDR_LOCK */ 573 CTR2(KTR_IGMPV3, "%s: purging ifma %p", __func__, ifma); 574 KASSERT(ifma->ifma_protospec == inm, 575 ("%s: ifma_protospec != inm", __func__)); 576 ifma->ifma_protospec = NULL; 577 578 inm_purge(inm); 579 580 free(inm, M_IPMADDR); 581 582 if_delmulti_ifma(ifma); 583 } 584 585 /* 586 * Clear recorded source entries for a group. 587 * Used by the IGMP code. Caller must hold the IN_MULTI lock. 588 * FIXME: Should reap. 589 */ 590 void 591 inm_clear_recorded(struct in_multi *inm) 592 { 593 struct ip_msource *ims; 594 595 IN_MULTI_LOCK_ASSERT(); 596 597 RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) { 598 if (ims->ims_stp) { 599 ims->ims_stp = 0; 600 --inm->inm_st[1].iss_rec; 601 } 602 } 603 KASSERT(inm->inm_st[1].iss_rec == 0, 604 ("%s: iss_rec %d not 0", __func__, inm->inm_st[1].iss_rec)); 605 } 606 607 /* 608 * Record a source as pending for a Source-Group IGMPv3 query. 609 * This lives here as it modifies the shared tree. 610 * 611 * inm is the group descriptor. 612 * naddr is the address of the source to record in network-byte order. 613 * 614 * If the net.inet.igmp.sgalloc sysctl is non-zero, we will 615 * lazy-allocate a source node in response to an SG query. 616 * Otherwise, no allocation is performed. This saves some memory 617 * with the trade-off that the source will not be reported to the 618 * router if joined in the window between the query response and 619 * the group actually being joined on the local host. 620 * 621 * VIMAGE: XXX: Currently the igmp_sgalloc feature has been removed. 622 * This turns off the allocation of a recorded source entry if 623 * the group has not been joined. 624 * 625 * Return 0 if the source didn't exist or was already marked as recorded. 626 * Return 1 if the source was marked as recorded by this function. 627 * Return <0 if any error occurred (negated errno code). 628 */ 629 int 630 inm_record_source(struct in_multi *inm, const in_addr_t naddr) 631 { 632 struct ip_msource find; 633 struct ip_msource *ims, *nims; 634 635 IN_MULTI_LOCK_ASSERT(); 636 637 find.ims_haddr = ntohl(naddr); 638 ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find); 639 if (ims && ims->ims_stp) 640 return (0); 641 if (ims == NULL) { 642 if (inm->inm_nsrc == in_mcast_maxgrpsrc) 643 return (-ENOSPC); 644 nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE, 645 M_NOWAIT | M_ZERO); 646 if (nims == NULL) 647 return (-ENOMEM); 648 nims->ims_haddr = find.ims_haddr; 649 RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims); 650 ++inm->inm_nsrc; 651 ims = nims; 652 } 653 654 /* 655 * Mark the source as recorded and update the recorded 656 * source count. 657 */ 658 ++ims->ims_stp; 659 ++inm->inm_st[1].iss_rec; 660 661 return (1); 662 } 663 664 /* 665 * Return a pointer to an in_msource owned by an in_mfilter, 666 * given its source address. 667 * Lazy-allocate if needed. If this is a new entry its filter state is 668 * undefined at t0. 669 * 670 * imf is the filter set being modified. 671 * haddr is the source address in *host* byte-order. 672 * 673 * SMPng: May be called with locks held; malloc must not block. 674 */ 675 static int 676 imf_get_source(struct in_mfilter *imf, const struct sockaddr_in *psin, 677 struct in_msource **plims) 678 { 679 struct ip_msource find; 680 struct ip_msource *ims, *nims; 681 struct in_msource *lims; 682 int error; 683 684 error = 0; 685 ims = NULL; 686 lims = NULL; 687 688 /* key is host byte order */ 689 find.ims_haddr = ntohl(psin->sin_addr.s_addr); 690 ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find); 691 lims = (struct in_msource *)ims; 692 if (lims == NULL) { 693 if (imf->imf_nsrc == in_mcast_maxsocksrc) 694 return (ENOSPC); 695 nims = malloc(sizeof(struct in_msource), M_INMFILTER, 696 M_NOWAIT | M_ZERO); 697 if (nims == NULL) 698 return (ENOMEM); 699 lims = (struct in_msource *)nims; 700 lims->ims_haddr = find.ims_haddr; 701 lims->imsl_st[0] = MCAST_UNDEFINED; 702 RB_INSERT(ip_msource_tree, &imf->imf_sources, nims); 703 ++imf->imf_nsrc; 704 } 705 706 *plims = lims; 707 708 return (error); 709 } 710 711 /* 712 * Graft a source entry into an existing socket-layer filter set, 713 * maintaining any required invariants and checking allocations. 714 * 715 * The source is marked as being in the new filter mode at t1. 716 * 717 * Return the pointer to the new node, otherwise return NULL. 718 */ 719 static struct in_msource * 720 imf_graft(struct in_mfilter *imf, const uint8_t st1, 721 const struct sockaddr_in *psin) 722 { 723 struct ip_msource *nims; 724 struct in_msource *lims; 725 726 nims = malloc(sizeof(struct in_msource), M_INMFILTER, 727 M_NOWAIT | M_ZERO); 728 if (nims == NULL) 729 return (NULL); 730 lims = (struct in_msource *)nims; 731 lims->ims_haddr = ntohl(psin->sin_addr.s_addr); 732 lims->imsl_st[0] = MCAST_UNDEFINED; 733 lims->imsl_st[1] = st1; 734 RB_INSERT(ip_msource_tree, &imf->imf_sources, nims); 735 ++imf->imf_nsrc; 736 737 return (lims); 738 } 739 740 /* 741 * Prune a source entry from an existing socket-layer filter set, 742 * maintaining any required invariants and checking allocations. 743 * 744 * The source is marked as being left at t1, it is not freed. 745 * 746 * Return 0 if no error occurred, otherwise return an errno value. 747 */ 748 static int 749 imf_prune(struct in_mfilter *imf, const struct sockaddr_in *psin) 750 { 751 struct ip_msource find; 752 struct ip_msource *ims; 753 struct in_msource *lims; 754 755 /* key is host byte order */ 756 find.ims_haddr = ntohl(psin->sin_addr.s_addr); 757 ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find); 758 if (ims == NULL) 759 return (ENOENT); 760 lims = (struct in_msource *)ims; 761 lims->imsl_st[1] = MCAST_UNDEFINED; 762 return (0); 763 } 764 765 /* 766 * Revert socket-layer filter set deltas at t1 to t0 state. 767 */ 768 static void 769 imf_rollback(struct in_mfilter *imf) 770 { 771 struct ip_msource *ims, *tims; 772 struct in_msource *lims; 773 774 RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) { 775 lims = (struct in_msource *)ims; 776 if (lims->imsl_st[0] == lims->imsl_st[1]) { 777 /* no change at t1 */ 778 continue; 779 } else if (lims->imsl_st[0] != MCAST_UNDEFINED) { 780 /* revert change to existing source at t1 */ 781 lims->imsl_st[1] = lims->imsl_st[0]; 782 } else { 783 /* revert source added t1 */ 784 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 785 RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims); 786 free(ims, M_INMFILTER); 787 imf->imf_nsrc--; 788 } 789 } 790 imf->imf_st[1] = imf->imf_st[0]; 791 } 792 793 /* 794 * Mark socket-layer filter set as INCLUDE {} at t1. 795 */ 796 static void 797 imf_leave(struct in_mfilter *imf) 798 { 799 struct ip_msource *ims; 800 struct in_msource *lims; 801 802 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 803 lims = (struct in_msource *)ims; 804 lims->imsl_st[1] = MCAST_UNDEFINED; 805 } 806 imf->imf_st[1] = MCAST_INCLUDE; 807 } 808 809 /* 810 * Mark socket-layer filter set deltas as committed. 811 */ 812 static void 813 imf_commit(struct in_mfilter *imf) 814 { 815 struct ip_msource *ims; 816 struct in_msource *lims; 817 818 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 819 lims = (struct in_msource *)ims; 820 lims->imsl_st[0] = lims->imsl_st[1]; 821 } 822 imf->imf_st[0] = imf->imf_st[1]; 823 } 824 825 /* 826 * Reap unreferenced sources from socket-layer filter set. 827 */ 828 static void 829 imf_reap(struct in_mfilter *imf) 830 { 831 struct ip_msource *ims, *tims; 832 struct in_msource *lims; 833 834 RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) { 835 lims = (struct in_msource *)ims; 836 if ((lims->imsl_st[0] == MCAST_UNDEFINED) && 837 (lims->imsl_st[1] == MCAST_UNDEFINED)) { 838 CTR2(KTR_IGMPV3, "%s: free lims %p", __func__, ims); 839 RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims); 840 free(ims, M_INMFILTER); 841 imf->imf_nsrc--; 842 } 843 } 844 } 845 846 /* 847 * Purge socket-layer filter set. 848 */ 849 static void 850 imf_purge(struct in_mfilter *imf) 851 { 852 struct ip_msource *ims, *tims; 853 854 RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) { 855 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 856 RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims); 857 free(ims, M_INMFILTER); 858 imf->imf_nsrc--; 859 } 860 imf->imf_st[0] = imf->imf_st[1] = MCAST_UNDEFINED; 861 KASSERT(RB_EMPTY(&imf->imf_sources), 862 ("%s: imf_sources not empty", __func__)); 863 } 864 865 /* 866 * Look up a source filter entry for a multicast group. 867 * 868 * inm is the group descriptor to work with. 869 * haddr is the host-byte-order IPv4 address to look up. 870 * noalloc may be non-zero to suppress allocation of sources. 871 * *pims will be set to the address of the retrieved or allocated source. 872 * 873 * SMPng: NOTE: may be called with locks held. 874 * Return 0 if successful, otherwise return a non-zero error code. 875 */ 876 static int 877 inm_get_source(struct in_multi *inm, const in_addr_t haddr, 878 const int noalloc, struct ip_msource **pims) 879 { 880 struct ip_msource find; 881 struct ip_msource *ims, *nims; 882 883 find.ims_haddr = haddr; 884 ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find); 885 if (ims == NULL && !noalloc) { 886 if (inm->inm_nsrc == in_mcast_maxgrpsrc) 887 return (ENOSPC); 888 nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE, 889 M_NOWAIT | M_ZERO); 890 if (nims == NULL) 891 return (ENOMEM); 892 nims->ims_haddr = haddr; 893 RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims); 894 ++inm->inm_nsrc; 895 ims = nims; 896 #ifdef KTR 897 CTR3(KTR_IGMPV3, "%s: allocated 0x%08x as %p", __func__, 898 haddr, ims); 899 #endif 900 } 901 902 *pims = ims; 903 return (0); 904 } 905 906 /* 907 * Merge socket-layer source into IGMP-layer source. 908 * If rollback is non-zero, perform the inverse of the merge. 909 */ 910 static void 911 ims_merge(struct ip_msource *ims, const struct in_msource *lims, 912 const int rollback) 913 { 914 int n = rollback ? -1 : 1; 915 916 if (lims->imsl_st[0] == MCAST_EXCLUDE) { 917 CTR3(KTR_IGMPV3, "%s: t1 ex -= %d on 0x%08x", 918 __func__, n, ims->ims_haddr); 919 ims->ims_st[1].ex -= n; 920 } else if (lims->imsl_st[0] == MCAST_INCLUDE) { 921 CTR3(KTR_IGMPV3, "%s: t1 in -= %d on 0x%08x", 922 __func__, n, ims->ims_haddr); 923 ims->ims_st[1].in -= n; 924 } 925 926 if (lims->imsl_st[1] == MCAST_EXCLUDE) { 927 CTR3(KTR_IGMPV3, "%s: t1 ex += %d on 0x%08x", 928 __func__, n, ims->ims_haddr); 929 ims->ims_st[1].ex += n; 930 } else if (lims->imsl_st[1] == MCAST_INCLUDE) { 931 CTR3(KTR_IGMPV3, "%s: t1 in += %d on 0x%08x", 932 __func__, n, ims->ims_haddr); 933 ims->ims_st[1].in += n; 934 } 935 } 936 937 /* 938 * Atomically update the global in_multi state, when a membership's 939 * filter list is being updated in any way. 940 * 941 * imf is the per-inpcb-membership group filter pointer. 942 * A fake imf may be passed for in-kernel consumers. 943 * 944 * XXX This is a candidate for a set-symmetric-difference style loop 945 * which would eliminate the repeated lookup from root of ims nodes, 946 * as they share the same key space. 947 * 948 * If any error occurred this function will back out of refcounts 949 * and return a non-zero value. 950 */ 951 static int 952 inm_merge(struct in_multi *inm, /*const*/ struct in_mfilter *imf) 953 { 954 struct ip_msource *ims, *nims; 955 struct in_msource *lims; 956 int schanged, error; 957 int nsrc0, nsrc1; 958 959 schanged = 0; 960 error = 0; 961 nsrc1 = nsrc0 = 0; 962 963 /* 964 * Update the source filters first, as this may fail. 965 * Maintain count of in-mode filters at t0, t1. These are 966 * used to work out if we transition into ASM mode or not. 967 * Maintain a count of source filters whose state was 968 * actually modified by this operation. 969 */ 970 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 971 lims = (struct in_msource *)ims; 972 if (lims->imsl_st[0] == imf->imf_st[0]) nsrc0++; 973 if (lims->imsl_st[1] == imf->imf_st[1]) nsrc1++; 974 if (lims->imsl_st[0] == lims->imsl_st[1]) continue; 975 error = inm_get_source(inm, lims->ims_haddr, 0, &nims); 976 ++schanged; 977 if (error) 978 break; 979 ims_merge(nims, lims, 0); 980 } 981 if (error) { 982 struct ip_msource *bims; 983 984 RB_FOREACH_REVERSE_FROM(ims, ip_msource_tree, nims) { 985 lims = (struct in_msource *)ims; 986 if (lims->imsl_st[0] == lims->imsl_st[1]) 987 continue; 988 (void)inm_get_source(inm, lims->ims_haddr, 1, &bims); 989 if (bims == NULL) 990 continue; 991 ims_merge(bims, lims, 1); 992 } 993 goto out_reap; 994 } 995 996 CTR3(KTR_IGMPV3, "%s: imf filters in-mode: %d at t0, %d at t1", 997 __func__, nsrc0, nsrc1); 998 999 /* Handle transition between INCLUDE {n} and INCLUDE {} on socket. */ 1000 if (imf->imf_st[0] == imf->imf_st[1] && 1001 imf->imf_st[1] == MCAST_INCLUDE) { 1002 if (nsrc1 == 0) { 1003 CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__); 1004 --inm->inm_st[1].iss_in; 1005 } 1006 } 1007 1008 /* Handle filter mode transition on socket. */ 1009 if (imf->imf_st[0] != imf->imf_st[1]) { 1010 CTR3(KTR_IGMPV3, "%s: imf transition %d to %d", 1011 __func__, imf->imf_st[0], imf->imf_st[1]); 1012 1013 if (imf->imf_st[0] == MCAST_EXCLUDE) { 1014 CTR1(KTR_IGMPV3, "%s: --ex on inm at t1", __func__); 1015 --inm->inm_st[1].iss_ex; 1016 } else if (imf->imf_st[0] == MCAST_INCLUDE) { 1017 CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__); 1018 --inm->inm_st[1].iss_in; 1019 } 1020 1021 if (imf->imf_st[1] == MCAST_EXCLUDE) { 1022 CTR1(KTR_IGMPV3, "%s: ex++ on inm at t1", __func__); 1023 inm->inm_st[1].iss_ex++; 1024 } else if (imf->imf_st[1] == MCAST_INCLUDE && nsrc1 > 0) { 1025 CTR1(KTR_IGMPV3, "%s: in++ on inm at t1", __func__); 1026 inm->inm_st[1].iss_in++; 1027 } 1028 } 1029 1030 /* 1031 * Track inm filter state in terms of listener counts. 1032 * If there are any exclusive listeners, stack-wide 1033 * membership is exclusive. 1034 * Otherwise, if only inclusive listeners, stack-wide is inclusive. 1035 * If no listeners remain, state is undefined at t1, 1036 * and the IGMP lifecycle for this group should finish. 1037 */ 1038 if (inm->inm_st[1].iss_ex > 0) { 1039 CTR1(KTR_IGMPV3, "%s: transition to EX", __func__); 1040 inm->inm_st[1].iss_fmode = MCAST_EXCLUDE; 1041 } else if (inm->inm_st[1].iss_in > 0) { 1042 CTR1(KTR_IGMPV3, "%s: transition to IN", __func__); 1043 inm->inm_st[1].iss_fmode = MCAST_INCLUDE; 1044 } else { 1045 CTR1(KTR_IGMPV3, "%s: transition to UNDEF", __func__); 1046 inm->inm_st[1].iss_fmode = MCAST_UNDEFINED; 1047 } 1048 1049 /* Decrement ASM listener count on transition out of ASM mode. */ 1050 if (imf->imf_st[0] == MCAST_EXCLUDE && nsrc0 == 0) { 1051 if ((imf->imf_st[1] != MCAST_EXCLUDE) || 1052 (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 > 0)) { 1053 CTR1(KTR_IGMPV3, "%s: --asm on inm at t1", __func__); 1054 --inm->inm_st[1].iss_asm; 1055 } 1056 } 1057 1058 /* Increment ASM listener count on transition to ASM mode. */ 1059 if (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 == 0) { 1060 CTR1(KTR_IGMPV3, "%s: asm++ on inm at t1", __func__); 1061 inm->inm_st[1].iss_asm++; 1062 } 1063 1064 CTR3(KTR_IGMPV3, "%s: merged imf %p to inm %p", __func__, imf, inm); 1065 inm_print(inm); 1066 1067 out_reap: 1068 if (schanged > 0) { 1069 CTR1(KTR_IGMPV3, "%s: sources changed; reaping", __func__); 1070 inm_reap(inm); 1071 } 1072 return (error); 1073 } 1074 1075 /* 1076 * Mark an in_multi's filter set deltas as committed. 1077 * Called by IGMP after a state change has been enqueued. 1078 */ 1079 void 1080 inm_commit(struct in_multi *inm) 1081 { 1082 struct ip_msource *ims; 1083 1084 CTR2(KTR_IGMPV3, "%s: commit inm %p", __func__, inm); 1085 CTR1(KTR_IGMPV3, "%s: pre commit:", __func__); 1086 inm_print(inm); 1087 1088 RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) { 1089 ims->ims_st[0] = ims->ims_st[1]; 1090 } 1091 inm->inm_st[0] = inm->inm_st[1]; 1092 } 1093 1094 /* 1095 * Reap unreferenced nodes from an in_multi's filter set. 1096 */ 1097 static void 1098 inm_reap(struct in_multi *inm) 1099 { 1100 struct ip_msource *ims, *tims; 1101 1102 RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) { 1103 if (ims->ims_st[0].ex > 0 || ims->ims_st[0].in > 0 || 1104 ims->ims_st[1].ex > 0 || ims->ims_st[1].in > 0 || 1105 ims->ims_stp != 0) 1106 continue; 1107 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 1108 RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims); 1109 free(ims, M_IPMSOURCE); 1110 inm->inm_nsrc--; 1111 } 1112 } 1113 1114 /* 1115 * Purge all source nodes from an in_multi's filter set. 1116 */ 1117 static void 1118 inm_purge(struct in_multi *inm) 1119 { 1120 struct ip_msource *ims, *tims; 1121 1122 RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) { 1123 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 1124 RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims); 1125 free(ims, M_IPMSOURCE); 1126 inm->inm_nsrc--; 1127 } 1128 } 1129 1130 /* 1131 * Join a multicast group; unlocked entry point. 1132 * 1133 * SMPng: XXX: in_joingroup() is called from in_control() when Giant 1134 * is not held. Fortunately, ifp is unlikely to have been detached 1135 * at this point, so we assume it's OK to recurse. 1136 */ 1137 int 1138 in_joingroup(struct ifnet *ifp, const struct in_addr *gina, 1139 /*const*/ struct in_mfilter *imf, struct in_multi **pinm) 1140 { 1141 int error; 1142 1143 IN_MULTI_LOCK(); 1144 error = in_joingroup_locked(ifp, gina, imf, pinm); 1145 IN_MULTI_UNLOCK(); 1146 1147 return (error); 1148 } 1149 1150 /* 1151 * Join a multicast group; real entry point. 1152 * 1153 * Only preserves atomicity at inm level. 1154 * NOTE: imf argument cannot be const due to sys/tree.h limitations. 1155 * 1156 * If the IGMP downcall fails, the group is not joined, and an error 1157 * code is returned. 1158 */ 1159 int 1160 in_joingroup_locked(struct ifnet *ifp, const struct in_addr *gina, 1161 /*const*/ struct in_mfilter *imf, struct in_multi **pinm) 1162 { 1163 struct in_mfilter timf; 1164 struct in_multi *inm; 1165 int error; 1166 1167 IN_MULTI_LOCK_ASSERT(); 1168 1169 CTR4(KTR_IGMPV3, "%s: join 0x%08x on %p(%s))", __func__, 1170 ntohl(gina->s_addr), ifp, ifp->if_xname); 1171 1172 error = 0; 1173 inm = NULL; 1174 1175 /* 1176 * If no imf was specified (i.e. kernel consumer), 1177 * fake one up and assume it is an ASM join. 1178 */ 1179 if (imf == NULL) { 1180 imf_init(&timf, MCAST_UNDEFINED, MCAST_EXCLUDE); 1181 imf = &timf; 1182 } 1183 1184 error = in_getmulti(ifp, gina, &inm); 1185 if (error) { 1186 CTR1(KTR_IGMPV3, "%s: in_getmulti() failure", __func__); 1187 return (error); 1188 } 1189 1190 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 1191 error = inm_merge(inm, imf); 1192 if (error) { 1193 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__); 1194 goto out_inm_release; 1195 } 1196 1197 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 1198 error = igmp_change_state(inm); 1199 if (error) { 1200 CTR1(KTR_IGMPV3, "%s: failed to update source", __func__); 1201 goto out_inm_release; 1202 } 1203 1204 out_inm_release: 1205 if (error) { 1206 CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm); 1207 inm_release_locked(inm); 1208 } else { 1209 *pinm = inm; 1210 } 1211 1212 return (error); 1213 } 1214 1215 /* 1216 * Leave a multicast group; unlocked entry point. 1217 */ 1218 int 1219 in_leavegroup(struct in_multi *inm, /*const*/ struct in_mfilter *imf) 1220 { 1221 int error; 1222 1223 IN_MULTI_LOCK(); 1224 error = in_leavegroup_locked(inm, imf); 1225 IN_MULTI_UNLOCK(); 1226 1227 return (error); 1228 } 1229 1230 /* 1231 * Leave a multicast group; real entry point. 1232 * All source filters will be expunged. 1233 * 1234 * Only preserves atomicity at inm level. 1235 * 1236 * Holding the write lock for the INP which contains imf 1237 * is highly advisable. We can't assert for it as imf does not 1238 * contain a back-pointer to the owning inp. 1239 * 1240 * Note: This is not the same as inm_release(*) as this function also 1241 * makes a state change downcall into IGMP. 1242 */ 1243 int 1244 in_leavegroup_locked(struct in_multi *inm, /*const*/ struct in_mfilter *imf) 1245 { 1246 struct in_mfilter timf; 1247 int error; 1248 1249 error = 0; 1250 1251 IN_MULTI_LOCK_ASSERT(); 1252 1253 CTR5(KTR_IGMPV3, "%s: leave inm %p, 0x%08x/%s, imf %p", __func__, 1254 inm, ntohl(inm->inm_addr.s_addr), 1255 (inm_is_ifp_detached(inm) ? "null" : inm->inm_ifp->if_xname), 1256 imf); 1257 1258 /* 1259 * If no imf was specified (i.e. kernel consumer), 1260 * fake one up and assume it is an ASM join. 1261 */ 1262 if (imf == NULL) { 1263 imf_init(&timf, MCAST_EXCLUDE, MCAST_UNDEFINED); 1264 imf = &timf; 1265 } 1266 1267 /* 1268 * Begin state merge transaction at IGMP layer. 1269 * 1270 * As this particular invocation should not cause any memory 1271 * to be allocated, and there is no opportunity to roll back 1272 * the transaction, it MUST NOT fail. 1273 */ 1274 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 1275 error = inm_merge(inm, imf); 1276 KASSERT(error == 0, ("%s: failed to merge inm state", __func__)); 1277 1278 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 1279 CURVNET_SET(inm->inm_ifp->if_vnet); 1280 error = igmp_change_state(inm); 1281 CURVNET_RESTORE(); 1282 if (error) 1283 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__); 1284 1285 CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm); 1286 inm_release_locked(inm); 1287 1288 return (error); 1289 } 1290 1291 /*#ifndef BURN_BRIDGES*/ 1292 /* 1293 * Join an IPv4 multicast group in (*,G) exclusive mode. 1294 * The group must be a 224.0.0.0/24 link-scope group. 1295 * This KPI is for legacy kernel consumers only. 1296 */ 1297 struct in_multi * 1298 in_addmulti(struct in_addr *ap, struct ifnet *ifp) 1299 { 1300 struct in_multi *pinm; 1301 int error; 1302 #ifdef INVARIANTS 1303 char addrbuf[INET_ADDRSTRLEN]; 1304 #endif 1305 1306 KASSERT(IN_LOCAL_GROUP(ntohl(ap->s_addr)), 1307 ("%s: %s not in 224.0.0.0/24", __func__, 1308 inet_ntoa_r(*ap, addrbuf))); 1309 1310 error = in_joingroup(ifp, ap, NULL, &pinm); 1311 if (error != 0) 1312 pinm = NULL; 1313 1314 return (pinm); 1315 } 1316 1317 /* 1318 * Leave an IPv4 multicast group, assumed to be in exclusive (*,G) mode. 1319 * This KPI is for legacy kernel consumers only. 1320 */ 1321 void 1322 in_delmulti(struct in_multi *inm) 1323 { 1324 1325 (void)in_leavegroup(inm, NULL); 1326 } 1327 /*#endif*/ 1328 1329 /* 1330 * Block or unblock an ASM multicast source on an inpcb. 1331 * This implements the delta-based API described in RFC 3678. 1332 * 1333 * The delta-based API applies only to exclusive-mode memberships. 1334 * An IGMP downcall will be performed. 1335 * 1336 * SMPng: NOTE: Must take Giant as a join may create a new ifma. 1337 * 1338 * Return 0 if successful, otherwise return an appropriate error code. 1339 */ 1340 static int 1341 inp_block_unblock_source(struct inpcb *inp, struct sockopt *sopt) 1342 { 1343 struct group_source_req gsr; 1344 sockunion_t *gsa, *ssa; 1345 struct ifnet *ifp; 1346 struct in_mfilter *imf; 1347 struct ip_moptions *imo; 1348 struct in_msource *ims; 1349 struct in_multi *inm; 1350 size_t idx; 1351 uint16_t fmode; 1352 int error, doblock; 1353 1354 ifp = NULL; 1355 error = 0; 1356 doblock = 0; 1357 1358 memset(&gsr, 0, sizeof(struct group_source_req)); 1359 gsa = (sockunion_t *)&gsr.gsr_group; 1360 ssa = (sockunion_t *)&gsr.gsr_source; 1361 1362 switch (sopt->sopt_name) { 1363 case IP_BLOCK_SOURCE: 1364 case IP_UNBLOCK_SOURCE: { 1365 struct ip_mreq_source mreqs; 1366 1367 error = sooptcopyin(sopt, &mreqs, 1368 sizeof(struct ip_mreq_source), 1369 sizeof(struct ip_mreq_source)); 1370 if (error) 1371 return (error); 1372 1373 gsa->sin.sin_family = AF_INET; 1374 gsa->sin.sin_len = sizeof(struct sockaddr_in); 1375 gsa->sin.sin_addr = mreqs.imr_multiaddr; 1376 1377 ssa->sin.sin_family = AF_INET; 1378 ssa->sin.sin_len = sizeof(struct sockaddr_in); 1379 ssa->sin.sin_addr = mreqs.imr_sourceaddr; 1380 1381 if (!in_nullhost(mreqs.imr_interface)) 1382 INADDR_TO_IFP(mreqs.imr_interface, ifp); 1383 1384 if (sopt->sopt_name == IP_BLOCK_SOURCE) 1385 doblock = 1; 1386 1387 CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p", 1388 __func__, ntohl(mreqs.imr_interface.s_addr), ifp); 1389 break; 1390 } 1391 1392 case MCAST_BLOCK_SOURCE: 1393 case MCAST_UNBLOCK_SOURCE: 1394 error = sooptcopyin(sopt, &gsr, 1395 sizeof(struct group_source_req), 1396 sizeof(struct group_source_req)); 1397 if (error) 1398 return (error); 1399 1400 if (gsa->sin.sin_family != AF_INET || 1401 gsa->sin.sin_len != sizeof(struct sockaddr_in)) 1402 return (EINVAL); 1403 1404 if (ssa->sin.sin_family != AF_INET || 1405 ssa->sin.sin_len != sizeof(struct sockaddr_in)) 1406 return (EINVAL); 1407 1408 if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface) 1409 return (EADDRNOTAVAIL); 1410 1411 ifp = ifnet_byindex(gsr.gsr_interface); 1412 1413 if (sopt->sopt_name == MCAST_BLOCK_SOURCE) 1414 doblock = 1; 1415 break; 1416 1417 default: 1418 CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d", 1419 __func__, sopt->sopt_name); 1420 return (EOPNOTSUPP); 1421 break; 1422 } 1423 1424 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 1425 return (EINVAL); 1426 1427 /* 1428 * Check if we are actually a member of this group. 1429 */ 1430 imo = inp_findmoptions(inp); 1431 idx = imo_match_group(imo, ifp, &gsa->sa); 1432 if (idx == -1 || imo->imo_mfilters == NULL) { 1433 error = EADDRNOTAVAIL; 1434 goto out_inp_locked; 1435 } 1436 1437 KASSERT(imo->imo_mfilters != NULL, 1438 ("%s: imo_mfilters not allocated", __func__)); 1439 imf = &imo->imo_mfilters[idx]; 1440 inm = imo->imo_membership[idx]; 1441 1442 /* 1443 * Attempting to use the delta-based API on an 1444 * non exclusive-mode membership is an error. 1445 */ 1446 fmode = imf->imf_st[0]; 1447 if (fmode != MCAST_EXCLUDE) { 1448 error = EINVAL; 1449 goto out_inp_locked; 1450 } 1451 1452 /* 1453 * Deal with error cases up-front: 1454 * Asked to block, but already blocked; or 1455 * Asked to unblock, but nothing to unblock. 1456 * If adding a new block entry, allocate it. 1457 */ 1458 ims = imo_match_source(imo, idx, &ssa->sa); 1459 if ((ims != NULL && doblock) || (ims == NULL && !doblock)) { 1460 CTR3(KTR_IGMPV3, "%s: source 0x%08x %spresent", __func__, 1461 ntohl(ssa->sin.sin_addr.s_addr), doblock ? "" : "not "); 1462 error = EADDRNOTAVAIL; 1463 goto out_inp_locked; 1464 } 1465 1466 INP_WLOCK_ASSERT(inp); 1467 1468 /* 1469 * Begin state merge transaction at socket layer. 1470 */ 1471 if (doblock) { 1472 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block"); 1473 ims = imf_graft(imf, fmode, &ssa->sin); 1474 if (ims == NULL) 1475 error = ENOMEM; 1476 } else { 1477 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow"); 1478 error = imf_prune(imf, &ssa->sin); 1479 } 1480 1481 if (error) { 1482 CTR1(KTR_IGMPV3, "%s: merge imf state failed", __func__); 1483 goto out_imf_rollback; 1484 } 1485 1486 /* 1487 * Begin state merge transaction at IGMP layer. 1488 */ 1489 IN_MULTI_LOCK(); 1490 1491 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 1492 error = inm_merge(inm, imf); 1493 if (error) { 1494 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__); 1495 goto out_in_multi_locked; 1496 } 1497 1498 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 1499 error = igmp_change_state(inm); 1500 if (error) 1501 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__); 1502 1503 out_in_multi_locked: 1504 1505 IN_MULTI_UNLOCK(); 1506 1507 out_imf_rollback: 1508 if (error) 1509 imf_rollback(imf); 1510 else 1511 imf_commit(imf); 1512 1513 imf_reap(imf); 1514 1515 out_inp_locked: 1516 INP_WUNLOCK(inp); 1517 return (error); 1518 } 1519 1520 /* 1521 * Given an inpcb, return its multicast options structure pointer. Accepts 1522 * an unlocked inpcb pointer, but will return it locked. May sleep. 1523 * 1524 * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held. 1525 * SMPng: NOTE: Returns with the INP write lock held. 1526 */ 1527 static struct ip_moptions * 1528 inp_findmoptions(struct inpcb *inp) 1529 { 1530 struct ip_moptions *imo; 1531 struct in_multi **immp; 1532 struct in_mfilter *imfp; 1533 size_t idx; 1534 1535 INP_WLOCK(inp); 1536 if (inp->inp_moptions != NULL) 1537 return (inp->inp_moptions); 1538 1539 INP_WUNLOCK(inp); 1540 1541 imo = malloc(sizeof(*imo), M_IPMOPTS, M_WAITOK); 1542 immp = malloc(sizeof(*immp) * IP_MIN_MEMBERSHIPS, M_IPMOPTS, 1543 M_WAITOK | M_ZERO); 1544 imfp = malloc(sizeof(struct in_mfilter) * IP_MIN_MEMBERSHIPS, 1545 M_INMFILTER, M_WAITOK); 1546 1547 imo->imo_multicast_ifp = NULL; 1548 imo->imo_multicast_addr.s_addr = INADDR_ANY; 1549 imo->imo_multicast_vif = -1; 1550 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1551 imo->imo_multicast_loop = in_mcast_loop; 1552 imo->imo_num_memberships = 0; 1553 imo->imo_max_memberships = IP_MIN_MEMBERSHIPS; 1554 imo->imo_membership = immp; 1555 1556 /* Initialize per-group source filters. */ 1557 for (idx = 0; idx < IP_MIN_MEMBERSHIPS; idx++) 1558 imf_init(&imfp[idx], MCAST_UNDEFINED, MCAST_EXCLUDE); 1559 imo->imo_mfilters = imfp; 1560 1561 INP_WLOCK(inp); 1562 if (inp->inp_moptions != NULL) { 1563 free(imfp, M_INMFILTER); 1564 free(immp, M_IPMOPTS); 1565 free(imo, M_IPMOPTS); 1566 return (inp->inp_moptions); 1567 } 1568 inp->inp_moptions = imo; 1569 return (imo); 1570 } 1571 1572 /* 1573 * Discard the IP multicast options (and source filters). To minimize 1574 * the amount of work done while holding locks such as the INP's 1575 * pcbinfo lock (which is used in the receive path), the free 1576 * operation is performed asynchronously in a separate task. 1577 * 1578 * SMPng: NOTE: assumes INP write lock is held. 1579 */ 1580 void 1581 inp_freemoptions(struct ip_moptions *imo) 1582 { 1583 1584 KASSERT(imo != NULL, ("%s: ip_moptions is NULL", __func__)); 1585 IN_MULTI_LOCK(); 1586 STAILQ_INSERT_TAIL(&imo_gc_list, imo, imo_link); 1587 IN_MULTI_UNLOCK(); 1588 taskqueue_enqueue(taskqueue_thread, &imo_gc_task); 1589 } 1590 1591 static void 1592 inp_freemoptions_internal(struct ip_moptions *imo) 1593 { 1594 struct in_mfilter *imf; 1595 size_t idx, nmships; 1596 1597 nmships = imo->imo_num_memberships; 1598 for (idx = 0; idx < nmships; ++idx) { 1599 imf = imo->imo_mfilters ? &imo->imo_mfilters[idx] : NULL; 1600 if (imf) 1601 imf_leave(imf); 1602 (void)in_leavegroup(imo->imo_membership[idx], imf); 1603 if (imf) 1604 imf_purge(imf); 1605 } 1606 1607 if (imo->imo_mfilters) 1608 free(imo->imo_mfilters, M_INMFILTER); 1609 free(imo->imo_membership, M_IPMOPTS); 1610 free(imo, M_IPMOPTS); 1611 } 1612 1613 static void 1614 inp_gcmoptions(void *context, int pending) 1615 { 1616 struct ip_moptions *imo; 1617 1618 IN_MULTI_LOCK(); 1619 while (!STAILQ_EMPTY(&imo_gc_list)) { 1620 imo = STAILQ_FIRST(&imo_gc_list); 1621 STAILQ_REMOVE_HEAD(&imo_gc_list, imo_link); 1622 IN_MULTI_UNLOCK(); 1623 inp_freemoptions_internal(imo); 1624 IN_MULTI_LOCK(); 1625 } 1626 IN_MULTI_UNLOCK(); 1627 } 1628 1629 /* 1630 * Atomically get source filters on a socket for an IPv4 multicast group. 1631 * Called with INP lock held; returns with lock released. 1632 */ 1633 static int 1634 inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt) 1635 { 1636 struct __msfilterreq msfr; 1637 sockunion_t *gsa; 1638 struct ifnet *ifp; 1639 struct ip_moptions *imo; 1640 struct in_mfilter *imf; 1641 struct ip_msource *ims; 1642 struct in_msource *lims; 1643 struct sockaddr_in *psin; 1644 struct sockaddr_storage *ptss; 1645 struct sockaddr_storage *tss; 1646 int error; 1647 size_t idx, nsrcs, ncsrcs; 1648 1649 INP_WLOCK_ASSERT(inp); 1650 1651 imo = inp->inp_moptions; 1652 KASSERT(imo != NULL, ("%s: null ip_moptions", __func__)); 1653 1654 INP_WUNLOCK(inp); 1655 1656 error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq), 1657 sizeof(struct __msfilterreq)); 1658 if (error) 1659 return (error); 1660 1661 if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex) 1662 return (EINVAL); 1663 1664 ifp = ifnet_byindex(msfr.msfr_ifindex); 1665 if (ifp == NULL) 1666 return (EINVAL); 1667 1668 INP_WLOCK(inp); 1669 1670 /* 1671 * Lookup group on the socket. 1672 */ 1673 gsa = (sockunion_t *)&msfr.msfr_group; 1674 idx = imo_match_group(imo, ifp, &gsa->sa); 1675 if (idx == -1 || imo->imo_mfilters == NULL) { 1676 INP_WUNLOCK(inp); 1677 return (EADDRNOTAVAIL); 1678 } 1679 imf = &imo->imo_mfilters[idx]; 1680 1681 /* 1682 * Ignore memberships which are in limbo. 1683 */ 1684 if (imf->imf_st[1] == MCAST_UNDEFINED) { 1685 INP_WUNLOCK(inp); 1686 return (EAGAIN); 1687 } 1688 msfr.msfr_fmode = imf->imf_st[1]; 1689 1690 /* 1691 * If the user specified a buffer, copy out the source filter 1692 * entries to userland gracefully. 1693 * We only copy out the number of entries which userland 1694 * has asked for, but we always tell userland how big the 1695 * buffer really needs to be. 1696 */ 1697 if (msfr.msfr_nsrcs > in_mcast_maxsocksrc) 1698 msfr.msfr_nsrcs = in_mcast_maxsocksrc; 1699 tss = NULL; 1700 if (msfr.msfr_srcs != NULL && msfr.msfr_nsrcs > 0) { 1701 tss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs, 1702 M_TEMP, M_NOWAIT | M_ZERO); 1703 if (tss == NULL) { 1704 INP_WUNLOCK(inp); 1705 return (ENOBUFS); 1706 } 1707 } 1708 1709 /* 1710 * Count number of sources in-mode at t0. 1711 * If buffer space exists and remains, copy out source entries. 1712 */ 1713 nsrcs = msfr.msfr_nsrcs; 1714 ncsrcs = 0; 1715 ptss = tss; 1716 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 1717 lims = (struct in_msource *)ims; 1718 if (lims->imsl_st[0] == MCAST_UNDEFINED || 1719 lims->imsl_st[0] != imf->imf_st[0]) 1720 continue; 1721 ++ncsrcs; 1722 if (tss != NULL && nsrcs > 0) { 1723 psin = (struct sockaddr_in *)ptss; 1724 psin->sin_family = AF_INET; 1725 psin->sin_len = sizeof(struct sockaddr_in); 1726 psin->sin_addr.s_addr = htonl(lims->ims_haddr); 1727 psin->sin_port = 0; 1728 ++ptss; 1729 --nsrcs; 1730 } 1731 } 1732 1733 INP_WUNLOCK(inp); 1734 1735 if (tss != NULL) { 1736 error = copyout(tss, msfr.msfr_srcs, 1737 sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs); 1738 free(tss, M_TEMP); 1739 if (error) 1740 return (error); 1741 } 1742 1743 msfr.msfr_nsrcs = ncsrcs; 1744 error = sooptcopyout(sopt, &msfr, sizeof(struct __msfilterreq)); 1745 1746 return (error); 1747 } 1748 1749 /* 1750 * Return the IP multicast options in response to user getsockopt(). 1751 */ 1752 int 1753 inp_getmoptions(struct inpcb *inp, struct sockopt *sopt) 1754 { 1755 struct rm_priotracker in_ifa_tracker; 1756 struct ip_mreqn mreqn; 1757 struct ip_moptions *imo; 1758 struct ifnet *ifp; 1759 struct in_ifaddr *ia; 1760 int error, optval; 1761 u_char coptval; 1762 1763 INP_WLOCK(inp); 1764 imo = inp->inp_moptions; 1765 /* 1766 * If socket is neither of type SOCK_RAW or SOCK_DGRAM, 1767 * or is a divert socket, reject it. 1768 */ 1769 if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT || 1770 (inp->inp_socket->so_proto->pr_type != SOCK_RAW && 1771 inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) { 1772 INP_WUNLOCK(inp); 1773 return (EOPNOTSUPP); 1774 } 1775 1776 error = 0; 1777 switch (sopt->sopt_name) { 1778 case IP_MULTICAST_VIF: 1779 if (imo != NULL) 1780 optval = imo->imo_multicast_vif; 1781 else 1782 optval = -1; 1783 INP_WUNLOCK(inp); 1784 error = sooptcopyout(sopt, &optval, sizeof(int)); 1785 break; 1786 1787 case IP_MULTICAST_IF: 1788 memset(&mreqn, 0, sizeof(struct ip_mreqn)); 1789 if (imo != NULL) { 1790 ifp = imo->imo_multicast_ifp; 1791 if (!in_nullhost(imo->imo_multicast_addr)) { 1792 mreqn.imr_address = imo->imo_multicast_addr; 1793 } else if (ifp != NULL) { 1794 mreqn.imr_ifindex = ifp->if_index; 1795 IFP_TO_IA(ifp, ia, &in_ifa_tracker); 1796 if (ia != NULL) { 1797 mreqn.imr_address = 1798 IA_SIN(ia)->sin_addr; 1799 ifa_free(&ia->ia_ifa); 1800 } 1801 } 1802 } 1803 INP_WUNLOCK(inp); 1804 if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) { 1805 error = sooptcopyout(sopt, &mreqn, 1806 sizeof(struct ip_mreqn)); 1807 } else { 1808 error = sooptcopyout(sopt, &mreqn.imr_address, 1809 sizeof(struct in_addr)); 1810 } 1811 break; 1812 1813 case IP_MULTICAST_TTL: 1814 if (imo == NULL) 1815 optval = coptval = IP_DEFAULT_MULTICAST_TTL; 1816 else 1817 optval = coptval = imo->imo_multicast_ttl; 1818 INP_WUNLOCK(inp); 1819 if (sopt->sopt_valsize == sizeof(u_char)) 1820 error = sooptcopyout(sopt, &coptval, sizeof(u_char)); 1821 else 1822 error = sooptcopyout(sopt, &optval, sizeof(int)); 1823 break; 1824 1825 case IP_MULTICAST_LOOP: 1826 if (imo == NULL) 1827 optval = coptval = IP_DEFAULT_MULTICAST_LOOP; 1828 else 1829 optval = coptval = imo->imo_multicast_loop; 1830 INP_WUNLOCK(inp); 1831 if (sopt->sopt_valsize == sizeof(u_char)) 1832 error = sooptcopyout(sopt, &coptval, sizeof(u_char)); 1833 else 1834 error = sooptcopyout(sopt, &optval, sizeof(int)); 1835 break; 1836 1837 case IP_MSFILTER: 1838 if (imo == NULL) { 1839 error = EADDRNOTAVAIL; 1840 INP_WUNLOCK(inp); 1841 } else { 1842 error = inp_get_source_filters(inp, sopt); 1843 } 1844 break; 1845 1846 default: 1847 INP_WUNLOCK(inp); 1848 error = ENOPROTOOPT; 1849 break; 1850 } 1851 1852 INP_UNLOCK_ASSERT(inp); 1853 1854 return (error); 1855 } 1856 1857 /* 1858 * Look up the ifnet to use for a multicast group membership, 1859 * given the IPv4 address of an interface, and the IPv4 group address. 1860 * 1861 * This routine exists to support legacy multicast applications 1862 * which do not understand that multicast memberships are scoped to 1863 * specific physical links in the networking stack, or which need 1864 * to join link-scope groups before IPv4 addresses are configured. 1865 * 1866 * If inp is non-NULL, use this socket's current FIB number for any 1867 * required FIB lookup. 1868 * If ina is INADDR_ANY, look up the group address in the unicast FIB, 1869 * and use its ifp; usually, this points to the default next-hop. 1870 * 1871 * If the FIB lookup fails, attempt to use the first non-loopback 1872 * interface with multicast capability in the system as a 1873 * last resort. The legacy IPv4 ASM API requires that we do 1874 * this in order to allow groups to be joined when the routing 1875 * table has not yet been populated during boot. 1876 * 1877 * Returns NULL if no ifp could be found. 1878 * 1879 * SMPng: TODO: Acquire the appropriate locks for INADDR_TO_IFP. 1880 * FUTURE: Implement IPv4 source-address selection. 1881 */ 1882 static struct ifnet * 1883 inp_lookup_mcast_ifp(const struct inpcb *inp, 1884 const struct sockaddr_in *gsin, const struct in_addr ina) 1885 { 1886 struct rm_priotracker in_ifa_tracker; 1887 struct ifnet *ifp; 1888 struct nhop4_basic nh4; 1889 uint32_t fibnum; 1890 1891 KASSERT(gsin->sin_family == AF_INET, ("%s: not AF_INET", __func__)); 1892 KASSERT(IN_MULTICAST(ntohl(gsin->sin_addr.s_addr)), 1893 ("%s: not multicast", __func__)); 1894 1895 ifp = NULL; 1896 if (!in_nullhost(ina)) { 1897 INADDR_TO_IFP(ina, ifp); 1898 } else { 1899 fibnum = inp ? inp->inp_inc.inc_fibnum : 0; 1900 if (fib4_lookup_nh_basic(fibnum, gsin->sin_addr, 0, 0, &nh4)==0) 1901 ifp = nh4.nh_ifp; 1902 else { 1903 struct in_ifaddr *ia; 1904 struct ifnet *mifp; 1905 1906 mifp = NULL; 1907 IN_IFADDR_RLOCK(&in_ifa_tracker); 1908 TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { 1909 mifp = ia->ia_ifp; 1910 if (!(mifp->if_flags & IFF_LOOPBACK) && 1911 (mifp->if_flags & IFF_MULTICAST)) { 1912 ifp = mifp; 1913 break; 1914 } 1915 } 1916 IN_IFADDR_RUNLOCK(&in_ifa_tracker); 1917 } 1918 } 1919 1920 return (ifp); 1921 } 1922 1923 /* 1924 * Join an IPv4 multicast group, possibly with a source. 1925 */ 1926 static int 1927 inp_join_group(struct inpcb *inp, struct sockopt *sopt) 1928 { 1929 struct group_source_req gsr; 1930 sockunion_t *gsa, *ssa; 1931 struct ifnet *ifp; 1932 struct in_mfilter *imf; 1933 struct ip_moptions *imo; 1934 struct in_multi *inm; 1935 struct in_msource *lims; 1936 size_t idx; 1937 int error, is_new; 1938 1939 ifp = NULL; 1940 imf = NULL; 1941 lims = NULL; 1942 error = 0; 1943 is_new = 0; 1944 1945 memset(&gsr, 0, sizeof(struct group_source_req)); 1946 gsa = (sockunion_t *)&gsr.gsr_group; 1947 gsa->ss.ss_family = AF_UNSPEC; 1948 ssa = (sockunion_t *)&gsr.gsr_source; 1949 ssa->ss.ss_family = AF_UNSPEC; 1950 1951 switch (sopt->sopt_name) { 1952 case IP_ADD_MEMBERSHIP: 1953 case IP_ADD_SOURCE_MEMBERSHIP: { 1954 struct ip_mreq_source mreqs; 1955 1956 if (sopt->sopt_name == IP_ADD_MEMBERSHIP) { 1957 error = sooptcopyin(sopt, &mreqs, 1958 sizeof(struct ip_mreq), 1959 sizeof(struct ip_mreq)); 1960 /* 1961 * Do argument switcharoo from ip_mreq into 1962 * ip_mreq_source to avoid using two instances. 1963 */ 1964 mreqs.imr_interface = mreqs.imr_sourceaddr; 1965 mreqs.imr_sourceaddr.s_addr = INADDR_ANY; 1966 } else if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) { 1967 error = sooptcopyin(sopt, &mreqs, 1968 sizeof(struct ip_mreq_source), 1969 sizeof(struct ip_mreq_source)); 1970 } 1971 if (error) 1972 return (error); 1973 1974 gsa->sin.sin_family = AF_INET; 1975 gsa->sin.sin_len = sizeof(struct sockaddr_in); 1976 gsa->sin.sin_addr = mreqs.imr_multiaddr; 1977 1978 if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) { 1979 ssa->sin.sin_family = AF_INET; 1980 ssa->sin.sin_len = sizeof(struct sockaddr_in); 1981 ssa->sin.sin_addr = mreqs.imr_sourceaddr; 1982 } 1983 1984 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 1985 return (EINVAL); 1986 1987 ifp = inp_lookup_mcast_ifp(inp, &gsa->sin, 1988 mreqs.imr_interface); 1989 CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p", 1990 __func__, ntohl(mreqs.imr_interface.s_addr), ifp); 1991 break; 1992 } 1993 1994 case MCAST_JOIN_GROUP: 1995 case MCAST_JOIN_SOURCE_GROUP: 1996 if (sopt->sopt_name == MCAST_JOIN_GROUP) { 1997 error = sooptcopyin(sopt, &gsr, 1998 sizeof(struct group_req), 1999 sizeof(struct group_req)); 2000 } else if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) { 2001 error = sooptcopyin(sopt, &gsr, 2002 sizeof(struct group_source_req), 2003 sizeof(struct group_source_req)); 2004 } 2005 if (error) 2006 return (error); 2007 2008 if (gsa->sin.sin_family != AF_INET || 2009 gsa->sin.sin_len != sizeof(struct sockaddr_in)) 2010 return (EINVAL); 2011 2012 /* 2013 * Overwrite the port field if present, as the sockaddr 2014 * being copied in may be matched with a binary comparison. 2015 */ 2016 gsa->sin.sin_port = 0; 2017 if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) { 2018 if (ssa->sin.sin_family != AF_INET || 2019 ssa->sin.sin_len != sizeof(struct sockaddr_in)) 2020 return (EINVAL); 2021 ssa->sin.sin_port = 0; 2022 } 2023 2024 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2025 return (EINVAL); 2026 2027 if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface) 2028 return (EADDRNOTAVAIL); 2029 ifp = ifnet_byindex(gsr.gsr_interface); 2030 break; 2031 2032 default: 2033 CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d", 2034 __func__, sopt->sopt_name); 2035 return (EOPNOTSUPP); 2036 break; 2037 } 2038 2039 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) 2040 return (EADDRNOTAVAIL); 2041 2042 imo = inp_findmoptions(inp); 2043 idx = imo_match_group(imo, ifp, &gsa->sa); 2044 if (idx == -1) { 2045 is_new = 1; 2046 } else { 2047 inm = imo->imo_membership[idx]; 2048 imf = &imo->imo_mfilters[idx]; 2049 if (ssa->ss.ss_family != AF_UNSPEC) { 2050 /* 2051 * MCAST_JOIN_SOURCE_GROUP on an exclusive membership 2052 * is an error. On an existing inclusive membership, 2053 * it just adds the source to the filter list. 2054 */ 2055 if (imf->imf_st[1] != MCAST_INCLUDE) { 2056 error = EINVAL; 2057 goto out_inp_locked; 2058 } 2059 /* 2060 * Throw out duplicates. 2061 * 2062 * XXX FIXME: This makes a naive assumption that 2063 * even if entries exist for *ssa in this imf, 2064 * they will be rejected as dupes, even if they 2065 * are not valid in the current mode (in-mode). 2066 * 2067 * in_msource is transactioned just as for anything 2068 * else in SSM -- but note naive use of inm_graft() 2069 * below for allocating new filter entries. 2070 * 2071 * This is only an issue if someone mixes the 2072 * full-state SSM API with the delta-based API, 2073 * which is discouraged in the relevant RFCs. 2074 */ 2075 lims = imo_match_source(imo, idx, &ssa->sa); 2076 if (lims != NULL /*&& 2077 lims->imsl_st[1] == MCAST_INCLUDE*/) { 2078 error = EADDRNOTAVAIL; 2079 goto out_inp_locked; 2080 } 2081 } else { 2082 /* 2083 * MCAST_JOIN_GROUP on an existing exclusive 2084 * membership is an error; return EADDRINUSE 2085 * to preserve 4.4BSD API idempotence, and 2086 * avoid tedious detour to code below. 2087 * NOTE: This is bending RFC 3678 a bit. 2088 * 2089 * On an existing inclusive membership, this is also 2090 * an error; if you want to change filter mode, 2091 * you must use the userland API setsourcefilter(). 2092 * XXX We don't reject this for imf in UNDEFINED 2093 * state at t1, because allocation of a filter 2094 * is atomic with allocation of a membership. 2095 */ 2096 error = EINVAL; 2097 if (imf->imf_st[1] == MCAST_EXCLUDE) 2098 error = EADDRINUSE; 2099 goto out_inp_locked; 2100 } 2101 } 2102 2103 /* 2104 * Begin state merge transaction at socket layer. 2105 */ 2106 INP_WLOCK_ASSERT(inp); 2107 2108 if (is_new) { 2109 if (imo->imo_num_memberships == imo->imo_max_memberships) { 2110 error = imo_grow(imo); 2111 if (error) 2112 goto out_inp_locked; 2113 } 2114 /* 2115 * Allocate the new slot upfront so we can deal with 2116 * grafting the new source filter in same code path 2117 * as for join-source on existing membership. 2118 */ 2119 idx = imo->imo_num_memberships; 2120 imo->imo_membership[idx] = NULL; 2121 imo->imo_num_memberships++; 2122 KASSERT(imo->imo_mfilters != NULL, 2123 ("%s: imf_mfilters vector was not allocated", __func__)); 2124 imf = &imo->imo_mfilters[idx]; 2125 KASSERT(RB_EMPTY(&imf->imf_sources), 2126 ("%s: imf_sources not empty", __func__)); 2127 } 2128 2129 /* 2130 * Graft new source into filter list for this inpcb's 2131 * membership of the group. The in_multi may not have 2132 * been allocated yet if this is a new membership, however, 2133 * the in_mfilter slot will be allocated and must be initialized. 2134 * 2135 * Note: Grafting of exclusive mode filters doesn't happen 2136 * in this path. 2137 * XXX: Should check for non-NULL lims (node exists but may 2138 * not be in-mode) for interop with full-state API. 2139 */ 2140 if (ssa->ss.ss_family != AF_UNSPEC) { 2141 /* Membership starts in IN mode */ 2142 if (is_new) { 2143 CTR1(KTR_IGMPV3, "%s: new join w/source", __func__); 2144 imf_init(imf, MCAST_UNDEFINED, MCAST_INCLUDE); 2145 } else { 2146 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow"); 2147 } 2148 lims = imf_graft(imf, MCAST_INCLUDE, &ssa->sin); 2149 if (lims == NULL) { 2150 CTR1(KTR_IGMPV3, "%s: merge imf state failed", 2151 __func__); 2152 error = ENOMEM; 2153 goto out_imo_free; 2154 } 2155 } else { 2156 /* No address specified; Membership starts in EX mode */ 2157 if (is_new) { 2158 CTR1(KTR_IGMPV3, "%s: new join w/o source", __func__); 2159 imf_init(imf, MCAST_UNDEFINED, MCAST_EXCLUDE); 2160 } 2161 } 2162 2163 /* 2164 * Begin state merge transaction at IGMP layer. 2165 */ 2166 IN_MULTI_LOCK(); 2167 2168 if (is_new) { 2169 error = in_joingroup_locked(ifp, &gsa->sin.sin_addr, imf, 2170 &inm); 2171 if (error) { 2172 CTR1(KTR_IGMPV3, "%s: in_joingroup_locked failed", 2173 __func__); 2174 IN_MULTI_UNLOCK(); 2175 goto out_imo_free; 2176 } 2177 imo->imo_membership[idx] = inm; 2178 } else { 2179 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 2180 error = inm_merge(inm, imf); 2181 if (error) { 2182 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", 2183 __func__); 2184 goto out_in_multi_locked; 2185 } 2186 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 2187 error = igmp_change_state(inm); 2188 if (error) { 2189 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", 2190 __func__); 2191 goto out_in_multi_locked; 2192 } 2193 } 2194 2195 out_in_multi_locked: 2196 2197 IN_MULTI_UNLOCK(); 2198 2199 INP_WLOCK_ASSERT(inp); 2200 if (error) { 2201 imf_rollback(imf); 2202 if (is_new) 2203 imf_purge(imf); 2204 else 2205 imf_reap(imf); 2206 } else { 2207 imf_commit(imf); 2208 } 2209 2210 out_imo_free: 2211 if (error && is_new) { 2212 imo->imo_membership[idx] = NULL; 2213 --imo->imo_num_memberships; 2214 } 2215 2216 out_inp_locked: 2217 INP_WUNLOCK(inp); 2218 return (error); 2219 } 2220 2221 /* 2222 * Leave an IPv4 multicast group on an inpcb, possibly with a source. 2223 */ 2224 static int 2225 inp_leave_group(struct inpcb *inp, struct sockopt *sopt) 2226 { 2227 struct group_source_req gsr; 2228 struct ip_mreq_source mreqs; 2229 sockunion_t *gsa, *ssa; 2230 struct ifnet *ifp; 2231 struct in_mfilter *imf; 2232 struct ip_moptions *imo; 2233 struct in_msource *ims; 2234 struct in_multi *inm; 2235 size_t idx; 2236 int error, is_final; 2237 2238 ifp = NULL; 2239 error = 0; 2240 is_final = 1; 2241 2242 memset(&gsr, 0, sizeof(struct group_source_req)); 2243 gsa = (sockunion_t *)&gsr.gsr_group; 2244 gsa->ss.ss_family = AF_UNSPEC; 2245 ssa = (sockunion_t *)&gsr.gsr_source; 2246 ssa->ss.ss_family = AF_UNSPEC; 2247 2248 switch (sopt->sopt_name) { 2249 case IP_DROP_MEMBERSHIP: 2250 case IP_DROP_SOURCE_MEMBERSHIP: 2251 if (sopt->sopt_name == IP_DROP_MEMBERSHIP) { 2252 error = sooptcopyin(sopt, &mreqs, 2253 sizeof(struct ip_mreq), 2254 sizeof(struct ip_mreq)); 2255 /* 2256 * Swap interface and sourceaddr arguments, 2257 * as ip_mreq and ip_mreq_source are laid 2258 * out differently. 2259 */ 2260 mreqs.imr_interface = mreqs.imr_sourceaddr; 2261 mreqs.imr_sourceaddr.s_addr = INADDR_ANY; 2262 } else if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) { 2263 error = sooptcopyin(sopt, &mreqs, 2264 sizeof(struct ip_mreq_source), 2265 sizeof(struct ip_mreq_source)); 2266 } 2267 if (error) 2268 return (error); 2269 2270 gsa->sin.sin_family = AF_INET; 2271 gsa->sin.sin_len = sizeof(struct sockaddr_in); 2272 gsa->sin.sin_addr = mreqs.imr_multiaddr; 2273 2274 if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) { 2275 ssa->sin.sin_family = AF_INET; 2276 ssa->sin.sin_len = sizeof(struct sockaddr_in); 2277 ssa->sin.sin_addr = mreqs.imr_sourceaddr; 2278 } 2279 2280 /* 2281 * Attempt to look up hinted ifp from interface address. 2282 * Fallthrough with null ifp iff lookup fails, to 2283 * preserve 4.4BSD mcast API idempotence. 2284 * XXX NOTE WELL: The RFC 3678 API is preferred because 2285 * using an IPv4 address as a key is racy. 2286 */ 2287 if (!in_nullhost(mreqs.imr_interface)) 2288 INADDR_TO_IFP(mreqs.imr_interface, ifp); 2289 2290 CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p", 2291 __func__, ntohl(mreqs.imr_interface.s_addr), ifp); 2292 2293 break; 2294 2295 case MCAST_LEAVE_GROUP: 2296 case MCAST_LEAVE_SOURCE_GROUP: 2297 if (sopt->sopt_name == MCAST_LEAVE_GROUP) { 2298 error = sooptcopyin(sopt, &gsr, 2299 sizeof(struct group_req), 2300 sizeof(struct group_req)); 2301 } else if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) { 2302 error = sooptcopyin(sopt, &gsr, 2303 sizeof(struct group_source_req), 2304 sizeof(struct group_source_req)); 2305 } 2306 if (error) 2307 return (error); 2308 2309 if (gsa->sin.sin_family != AF_INET || 2310 gsa->sin.sin_len != sizeof(struct sockaddr_in)) 2311 return (EINVAL); 2312 2313 if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) { 2314 if (ssa->sin.sin_family != AF_INET || 2315 ssa->sin.sin_len != sizeof(struct sockaddr_in)) 2316 return (EINVAL); 2317 } 2318 2319 if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface) 2320 return (EADDRNOTAVAIL); 2321 2322 ifp = ifnet_byindex(gsr.gsr_interface); 2323 2324 if (ifp == NULL) 2325 return (EADDRNOTAVAIL); 2326 break; 2327 2328 default: 2329 CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d", 2330 __func__, sopt->sopt_name); 2331 return (EOPNOTSUPP); 2332 break; 2333 } 2334 2335 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2336 return (EINVAL); 2337 2338 /* 2339 * Find the membership in the membership array. 2340 */ 2341 imo = inp_findmoptions(inp); 2342 idx = imo_match_group(imo, ifp, &gsa->sa); 2343 if (idx == -1) { 2344 error = EADDRNOTAVAIL; 2345 goto out_inp_locked; 2346 } 2347 inm = imo->imo_membership[idx]; 2348 imf = &imo->imo_mfilters[idx]; 2349 2350 if (ssa->ss.ss_family != AF_UNSPEC) 2351 is_final = 0; 2352 2353 /* 2354 * Begin state merge transaction at socket layer. 2355 */ 2356 INP_WLOCK_ASSERT(inp); 2357 2358 /* 2359 * If we were instructed only to leave a given source, do so. 2360 * MCAST_LEAVE_SOURCE_GROUP is only valid for inclusive memberships. 2361 */ 2362 if (is_final) { 2363 imf_leave(imf); 2364 } else { 2365 if (imf->imf_st[0] == MCAST_EXCLUDE) { 2366 error = EADDRNOTAVAIL; 2367 goto out_inp_locked; 2368 } 2369 ims = imo_match_source(imo, idx, &ssa->sa); 2370 if (ims == NULL) { 2371 CTR3(KTR_IGMPV3, "%s: source 0x%08x %spresent", 2372 __func__, ntohl(ssa->sin.sin_addr.s_addr), "not "); 2373 error = EADDRNOTAVAIL; 2374 goto out_inp_locked; 2375 } 2376 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block"); 2377 error = imf_prune(imf, &ssa->sin); 2378 if (error) { 2379 CTR1(KTR_IGMPV3, "%s: merge imf state failed", 2380 __func__); 2381 goto out_inp_locked; 2382 } 2383 } 2384 2385 /* 2386 * Begin state merge transaction at IGMP layer. 2387 */ 2388 IN_MULTI_LOCK(); 2389 2390 if (is_final) { 2391 /* 2392 * Give up the multicast address record to which 2393 * the membership points. 2394 */ 2395 (void)in_leavegroup_locked(inm, imf); 2396 } else { 2397 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 2398 error = inm_merge(inm, imf); 2399 if (error) { 2400 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", 2401 __func__); 2402 goto out_in_multi_locked; 2403 } 2404 2405 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 2406 error = igmp_change_state(inm); 2407 if (error) { 2408 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", 2409 __func__); 2410 } 2411 } 2412 2413 out_in_multi_locked: 2414 2415 IN_MULTI_UNLOCK(); 2416 2417 if (error) 2418 imf_rollback(imf); 2419 else 2420 imf_commit(imf); 2421 2422 imf_reap(imf); 2423 2424 if (is_final) { 2425 /* Remove the gap in the membership and filter array. */ 2426 for (++idx; idx < imo->imo_num_memberships; ++idx) { 2427 imo->imo_membership[idx-1] = imo->imo_membership[idx]; 2428 imo->imo_mfilters[idx-1] = imo->imo_mfilters[idx]; 2429 } 2430 imo->imo_num_memberships--; 2431 } 2432 2433 out_inp_locked: 2434 INP_WUNLOCK(inp); 2435 return (error); 2436 } 2437 2438 /* 2439 * Select the interface for transmitting IPv4 multicast datagrams. 2440 * 2441 * Either an instance of struct in_addr or an instance of struct ip_mreqn 2442 * may be passed to this socket option. An address of INADDR_ANY or an 2443 * interface index of 0 is used to remove a previous selection. 2444 * When no interface is selected, one is chosen for every send. 2445 */ 2446 static int 2447 inp_set_multicast_if(struct inpcb *inp, struct sockopt *sopt) 2448 { 2449 struct in_addr addr; 2450 struct ip_mreqn mreqn; 2451 struct ifnet *ifp; 2452 struct ip_moptions *imo; 2453 int error; 2454 2455 if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) { 2456 /* 2457 * An interface index was specified using the 2458 * Linux-derived ip_mreqn structure. 2459 */ 2460 error = sooptcopyin(sopt, &mreqn, sizeof(struct ip_mreqn), 2461 sizeof(struct ip_mreqn)); 2462 if (error) 2463 return (error); 2464 2465 if (mreqn.imr_ifindex < 0 || V_if_index < mreqn.imr_ifindex) 2466 return (EINVAL); 2467 2468 if (mreqn.imr_ifindex == 0) { 2469 ifp = NULL; 2470 } else { 2471 ifp = ifnet_byindex(mreqn.imr_ifindex); 2472 if (ifp == NULL) 2473 return (EADDRNOTAVAIL); 2474 } 2475 } else { 2476 /* 2477 * An interface was specified by IPv4 address. 2478 * This is the traditional BSD usage. 2479 */ 2480 error = sooptcopyin(sopt, &addr, sizeof(struct in_addr), 2481 sizeof(struct in_addr)); 2482 if (error) 2483 return (error); 2484 if (in_nullhost(addr)) { 2485 ifp = NULL; 2486 } else { 2487 INADDR_TO_IFP(addr, ifp); 2488 if (ifp == NULL) 2489 return (EADDRNOTAVAIL); 2490 } 2491 CTR3(KTR_IGMPV3, "%s: ifp = %p, addr = 0x%08x", __func__, ifp, 2492 ntohl(addr.s_addr)); 2493 } 2494 2495 /* Reject interfaces which do not support multicast. */ 2496 if (ifp != NULL && (ifp->if_flags & IFF_MULTICAST) == 0) 2497 return (EOPNOTSUPP); 2498 2499 imo = inp_findmoptions(inp); 2500 imo->imo_multicast_ifp = ifp; 2501 imo->imo_multicast_addr.s_addr = INADDR_ANY; 2502 INP_WUNLOCK(inp); 2503 2504 return (0); 2505 } 2506 2507 /* 2508 * Atomically set source filters on a socket for an IPv4 multicast group. 2509 * 2510 * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held. 2511 */ 2512 static int 2513 inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt) 2514 { 2515 struct __msfilterreq msfr; 2516 sockunion_t *gsa; 2517 struct ifnet *ifp; 2518 struct in_mfilter *imf; 2519 struct ip_moptions *imo; 2520 struct in_multi *inm; 2521 size_t idx; 2522 int error; 2523 2524 error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq), 2525 sizeof(struct __msfilterreq)); 2526 if (error) 2527 return (error); 2528 2529 if (msfr.msfr_nsrcs > in_mcast_maxsocksrc) 2530 return (ENOBUFS); 2531 2532 if ((msfr.msfr_fmode != MCAST_EXCLUDE && 2533 msfr.msfr_fmode != MCAST_INCLUDE)) 2534 return (EINVAL); 2535 2536 if (msfr.msfr_group.ss_family != AF_INET || 2537 msfr.msfr_group.ss_len != sizeof(struct sockaddr_in)) 2538 return (EINVAL); 2539 2540 gsa = (sockunion_t *)&msfr.msfr_group; 2541 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2542 return (EINVAL); 2543 2544 gsa->sin.sin_port = 0; /* ignore port */ 2545 2546 if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex) 2547 return (EADDRNOTAVAIL); 2548 2549 ifp = ifnet_byindex(msfr.msfr_ifindex); 2550 if (ifp == NULL) 2551 return (EADDRNOTAVAIL); 2552 2553 /* 2554 * Take the INP write lock. 2555 * Check if this socket is a member of this group. 2556 */ 2557 imo = inp_findmoptions(inp); 2558 idx = imo_match_group(imo, ifp, &gsa->sa); 2559 if (idx == -1 || imo->imo_mfilters == NULL) { 2560 error = EADDRNOTAVAIL; 2561 goto out_inp_locked; 2562 } 2563 inm = imo->imo_membership[idx]; 2564 imf = &imo->imo_mfilters[idx]; 2565 2566 /* 2567 * Begin state merge transaction at socket layer. 2568 */ 2569 INP_WLOCK_ASSERT(inp); 2570 2571 imf->imf_st[1] = msfr.msfr_fmode; 2572 2573 /* 2574 * Apply any new source filters, if present. 2575 * Make a copy of the user-space source vector so 2576 * that we may copy them with a single copyin. This 2577 * allows us to deal with page faults up-front. 2578 */ 2579 if (msfr.msfr_nsrcs > 0) { 2580 struct in_msource *lims; 2581 struct sockaddr_in *psin; 2582 struct sockaddr_storage *kss, *pkss; 2583 int i; 2584 2585 INP_WUNLOCK(inp); 2586 2587 CTR2(KTR_IGMPV3, "%s: loading %lu source list entries", 2588 __func__, (unsigned long)msfr.msfr_nsrcs); 2589 kss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs, 2590 M_TEMP, M_WAITOK); 2591 error = copyin(msfr.msfr_srcs, kss, 2592 sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs); 2593 if (error) { 2594 free(kss, M_TEMP); 2595 return (error); 2596 } 2597 2598 INP_WLOCK(inp); 2599 2600 /* 2601 * Mark all source filters as UNDEFINED at t1. 2602 * Restore new group filter mode, as imf_leave() 2603 * will set it to INCLUDE. 2604 */ 2605 imf_leave(imf); 2606 imf->imf_st[1] = msfr.msfr_fmode; 2607 2608 /* 2609 * Update socket layer filters at t1, lazy-allocating 2610 * new entries. This saves a bunch of memory at the 2611 * cost of one RB_FIND() per source entry; duplicate 2612 * entries in the msfr_nsrcs vector are ignored. 2613 * If we encounter an error, rollback transaction. 2614 * 2615 * XXX This too could be replaced with a set-symmetric 2616 * difference like loop to avoid walking from root 2617 * every time, as the key space is common. 2618 */ 2619 for (i = 0, pkss = kss; i < msfr.msfr_nsrcs; i++, pkss++) { 2620 psin = (struct sockaddr_in *)pkss; 2621 if (psin->sin_family != AF_INET) { 2622 error = EAFNOSUPPORT; 2623 break; 2624 } 2625 if (psin->sin_len != sizeof(struct sockaddr_in)) { 2626 error = EINVAL; 2627 break; 2628 } 2629 error = imf_get_source(imf, psin, &lims); 2630 if (error) 2631 break; 2632 lims->imsl_st[1] = imf->imf_st[1]; 2633 } 2634 free(kss, M_TEMP); 2635 } 2636 2637 if (error) 2638 goto out_imf_rollback; 2639 2640 INP_WLOCK_ASSERT(inp); 2641 IN_MULTI_LOCK(); 2642 2643 /* 2644 * Begin state merge transaction at IGMP layer. 2645 */ 2646 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 2647 error = inm_merge(inm, imf); 2648 if (error) { 2649 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__); 2650 goto out_in_multi_locked; 2651 } 2652 2653 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 2654 error = igmp_change_state(inm); 2655 if (error) 2656 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__); 2657 2658 out_in_multi_locked: 2659 2660 IN_MULTI_UNLOCK(); 2661 2662 out_imf_rollback: 2663 if (error) 2664 imf_rollback(imf); 2665 else 2666 imf_commit(imf); 2667 2668 imf_reap(imf); 2669 2670 out_inp_locked: 2671 INP_WUNLOCK(inp); 2672 return (error); 2673 } 2674 2675 /* 2676 * Set the IP multicast options in response to user setsockopt(). 2677 * 2678 * Many of the socket options handled in this function duplicate the 2679 * functionality of socket options in the regular unicast API. However, 2680 * it is not possible to merge the duplicate code, because the idempotence 2681 * of the IPv4 multicast part of the BSD Sockets API must be preserved; 2682 * the effects of these options must be treated as separate and distinct. 2683 * 2684 * SMPng: XXX: Unlocked read of inp_socket believed OK. 2685 * FUTURE: The IP_MULTICAST_VIF option may be eliminated if MROUTING 2686 * is refactored to no longer use vifs. 2687 */ 2688 int 2689 inp_setmoptions(struct inpcb *inp, struct sockopt *sopt) 2690 { 2691 struct ip_moptions *imo; 2692 int error; 2693 2694 error = 0; 2695 2696 /* 2697 * If socket is neither of type SOCK_RAW or SOCK_DGRAM, 2698 * or is a divert socket, reject it. 2699 */ 2700 if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT || 2701 (inp->inp_socket->so_proto->pr_type != SOCK_RAW && 2702 inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) 2703 return (EOPNOTSUPP); 2704 2705 switch (sopt->sopt_name) { 2706 case IP_MULTICAST_VIF: { 2707 int vifi; 2708 /* 2709 * Select a multicast VIF for transmission. 2710 * Only useful if multicast forwarding is active. 2711 */ 2712 if (legal_vif_num == NULL) { 2713 error = EOPNOTSUPP; 2714 break; 2715 } 2716 error = sooptcopyin(sopt, &vifi, sizeof(int), sizeof(int)); 2717 if (error) 2718 break; 2719 if (!legal_vif_num(vifi) && (vifi != -1)) { 2720 error = EINVAL; 2721 break; 2722 } 2723 imo = inp_findmoptions(inp); 2724 imo->imo_multicast_vif = vifi; 2725 INP_WUNLOCK(inp); 2726 break; 2727 } 2728 2729 case IP_MULTICAST_IF: 2730 error = inp_set_multicast_if(inp, sopt); 2731 break; 2732 2733 case IP_MULTICAST_TTL: { 2734 u_char ttl; 2735 2736 /* 2737 * Set the IP time-to-live for outgoing multicast packets. 2738 * The original multicast API required a char argument, 2739 * which is inconsistent with the rest of the socket API. 2740 * We allow either a char or an int. 2741 */ 2742 if (sopt->sopt_valsize == sizeof(u_char)) { 2743 error = sooptcopyin(sopt, &ttl, sizeof(u_char), 2744 sizeof(u_char)); 2745 if (error) 2746 break; 2747 } else { 2748 u_int ittl; 2749 2750 error = sooptcopyin(sopt, &ittl, sizeof(u_int), 2751 sizeof(u_int)); 2752 if (error) 2753 break; 2754 if (ittl > 255) { 2755 error = EINVAL; 2756 break; 2757 } 2758 ttl = (u_char)ittl; 2759 } 2760 imo = inp_findmoptions(inp); 2761 imo->imo_multicast_ttl = ttl; 2762 INP_WUNLOCK(inp); 2763 break; 2764 } 2765 2766 case IP_MULTICAST_LOOP: { 2767 u_char loop; 2768 2769 /* 2770 * Set the loopback flag for outgoing multicast packets. 2771 * Must be zero or one. The original multicast API required a 2772 * char argument, which is inconsistent with the rest 2773 * of the socket API. We allow either a char or an int. 2774 */ 2775 if (sopt->sopt_valsize == sizeof(u_char)) { 2776 error = sooptcopyin(sopt, &loop, sizeof(u_char), 2777 sizeof(u_char)); 2778 if (error) 2779 break; 2780 } else { 2781 u_int iloop; 2782 2783 error = sooptcopyin(sopt, &iloop, sizeof(u_int), 2784 sizeof(u_int)); 2785 if (error) 2786 break; 2787 loop = (u_char)iloop; 2788 } 2789 imo = inp_findmoptions(inp); 2790 imo->imo_multicast_loop = !!loop; 2791 INP_WUNLOCK(inp); 2792 break; 2793 } 2794 2795 case IP_ADD_MEMBERSHIP: 2796 case IP_ADD_SOURCE_MEMBERSHIP: 2797 case MCAST_JOIN_GROUP: 2798 case MCAST_JOIN_SOURCE_GROUP: 2799 error = inp_join_group(inp, sopt); 2800 break; 2801 2802 case IP_DROP_MEMBERSHIP: 2803 case IP_DROP_SOURCE_MEMBERSHIP: 2804 case MCAST_LEAVE_GROUP: 2805 case MCAST_LEAVE_SOURCE_GROUP: 2806 error = inp_leave_group(inp, sopt); 2807 break; 2808 2809 case IP_BLOCK_SOURCE: 2810 case IP_UNBLOCK_SOURCE: 2811 case MCAST_BLOCK_SOURCE: 2812 case MCAST_UNBLOCK_SOURCE: 2813 error = inp_block_unblock_source(inp, sopt); 2814 break; 2815 2816 case IP_MSFILTER: 2817 error = inp_set_source_filters(inp, sopt); 2818 break; 2819 2820 default: 2821 error = EOPNOTSUPP; 2822 break; 2823 } 2824 2825 INP_UNLOCK_ASSERT(inp); 2826 2827 return (error); 2828 } 2829 2830 /* 2831 * Expose IGMP's multicast filter mode and source list(s) to userland, 2832 * keyed by (ifindex, group). 2833 * The filter mode is written out as a uint32_t, followed by 2834 * 0..n of struct in_addr. 2835 * For use by ifmcstat(8). 2836 * SMPng: NOTE: unlocked read of ifindex space. 2837 */ 2838 static int 2839 sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS) 2840 { 2841 struct in_addr src, group; 2842 struct ifnet *ifp; 2843 struct ifmultiaddr *ifma; 2844 struct in_multi *inm; 2845 struct ip_msource *ims; 2846 int *name; 2847 int retval; 2848 u_int namelen; 2849 uint32_t fmode, ifindex; 2850 2851 name = (int *)arg1; 2852 namelen = arg2; 2853 2854 if (req->newptr != NULL) 2855 return (EPERM); 2856 2857 if (namelen != 2) 2858 return (EINVAL); 2859 2860 ifindex = name[0]; 2861 if (ifindex <= 0 || ifindex > V_if_index) { 2862 CTR2(KTR_IGMPV3, "%s: ifindex %u out of range", 2863 __func__, ifindex); 2864 return (ENOENT); 2865 } 2866 2867 group.s_addr = name[1]; 2868 if (!IN_MULTICAST(ntohl(group.s_addr))) { 2869 CTR2(KTR_IGMPV3, "%s: group 0x%08x is not multicast", 2870 __func__, ntohl(group.s_addr)); 2871 return (EINVAL); 2872 } 2873 2874 ifp = ifnet_byindex(ifindex); 2875 if (ifp == NULL) { 2876 CTR2(KTR_IGMPV3, "%s: no ifp for ifindex %u", 2877 __func__, ifindex); 2878 return (ENOENT); 2879 } 2880 2881 retval = sysctl_wire_old_buffer(req, 2882 sizeof(uint32_t) + (in_mcast_maxgrpsrc * sizeof(struct in_addr))); 2883 if (retval) 2884 return (retval); 2885 2886 IN_MULTI_LOCK(); 2887 2888 IF_ADDR_RLOCK(ifp); 2889 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 2890 if (ifma->ifma_addr->sa_family != AF_INET || 2891 ifma->ifma_protospec == NULL) 2892 continue; 2893 inm = (struct in_multi *)ifma->ifma_protospec; 2894 if (!in_hosteq(inm->inm_addr, group)) 2895 continue; 2896 fmode = inm->inm_st[1].iss_fmode; 2897 retval = SYSCTL_OUT(req, &fmode, sizeof(uint32_t)); 2898 if (retval != 0) 2899 break; 2900 RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) { 2901 CTR2(KTR_IGMPV3, "%s: visit node 0x%08x", __func__, 2902 ims->ims_haddr); 2903 /* 2904 * Only copy-out sources which are in-mode. 2905 */ 2906 if (fmode != ims_get_mode(inm, ims, 1)) { 2907 CTR1(KTR_IGMPV3, "%s: skip non-in-mode", 2908 __func__); 2909 continue; 2910 } 2911 src.s_addr = htonl(ims->ims_haddr); 2912 retval = SYSCTL_OUT(req, &src, sizeof(struct in_addr)); 2913 if (retval != 0) 2914 break; 2915 } 2916 } 2917 IF_ADDR_RUNLOCK(ifp); 2918 2919 IN_MULTI_UNLOCK(); 2920 2921 return (retval); 2922 } 2923 2924 #if defined(KTR) && (KTR_COMPILE & KTR_IGMPV3) 2925 2926 static const char *inm_modestrs[] = { "un", "in", "ex" }; 2927 2928 static const char * 2929 inm_mode_str(const int mode) 2930 { 2931 2932 if (mode >= MCAST_UNDEFINED && mode <= MCAST_EXCLUDE) 2933 return (inm_modestrs[mode]); 2934 return ("??"); 2935 } 2936 2937 static const char *inm_statestrs[] = { 2938 "not-member", 2939 "silent", 2940 "idle", 2941 "lazy", 2942 "sleeping", 2943 "awakening", 2944 "query-pending", 2945 "sg-query-pending", 2946 "leaving" 2947 }; 2948 2949 static const char * 2950 inm_state_str(const int state) 2951 { 2952 2953 if (state >= IGMP_NOT_MEMBER && state <= IGMP_LEAVING_MEMBER) 2954 return (inm_statestrs[state]); 2955 return ("??"); 2956 } 2957 2958 /* 2959 * Dump an in_multi structure to the console. 2960 */ 2961 void 2962 inm_print(const struct in_multi *inm) 2963 { 2964 int t; 2965 char addrbuf[INET_ADDRSTRLEN]; 2966 2967 if ((ktr_mask & KTR_IGMPV3) == 0) 2968 return; 2969 2970 printf("%s: --- begin inm %p ---\n", __func__, inm); 2971 printf("addr %s ifp %p(%s) ifma %p\n", 2972 inet_ntoa_r(inm->inm_addr, addrbuf), 2973 inm->inm_ifp, 2974 inm->inm_ifp->if_xname, 2975 inm->inm_ifma); 2976 printf("timer %u state %s refcount %u scq.len %u\n", 2977 inm->inm_timer, 2978 inm_state_str(inm->inm_state), 2979 inm->inm_refcount, 2980 inm->inm_scq.mq_len); 2981 printf("igi %p nsrc %lu sctimer %u scrv %u\n", 2982 inm->inm_igi, 2983 inm->inm_nsrc, 2984 inm->inm_sctimer, 2985 inm->inm_scrv); 2986 for (t = 0; t < 2; t++) { 2987 printf("t%d: fmode %s asm %u ex %u in %u rec %u\n", t, 2988 inm_mode_str(inm->inm_st[t].iss_fmode), 2989 inm->inm_st[t].iss_asm, 2990 inm->inm_st[t].iss_ex, 2991 inm->inm_st[t].iss_in, 2992 inm->inm_st[t].iss_rec); 2993 } 2994 printf("%s: --- end inm %p ---\n", __func__, inm); 2995 } 2996 2997 #else /* !KTR || !(KTR_COMPILE & KTR_IGMPV3) */ 2998 2999 void 3000 inm_print(const struct in_multi *inm) 3001 { 3002 3003 } 3004 3005 #endif /* KTR && (KTR_COMPILE & KTR_IGMPV3) */ 3006 3007 RB_GENERATE(ip_msource_tree, ip_msource, ims_link, ip_msource_cmp); 3008