1 /*- 2 * Copyright (c) 2007-2009 Bruce Simpson. 3 * Copyright (c) 2005 Robert N. M. Watson. 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote 15 * products derived from this software without specific prior written 16 * permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 /* 32 * IPv4 multicast socket, group, and socket option processing module. 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/kernel.h> 41 #include <sys/lock.h> 42 #include <sys/malloc.h> 43 #include <sys/mbuf.h> 44 #include <sys/protosw.h> 45 #include <sys/rmlock.h> 46 #include <sys/socket.h> 47 #include <sys/socketvar.h> 48 #include <sys/protosw.h> 49 #include <sys/sysctl.h> 50 #include <sys/ktr.h> 51 #include <sys/taskqueue.h> 52 #include <sys/tree.h> 53 54 #include <net/if.h> 55 #include <net/if_var.h> 56 #include <net/if_dl.h> 57 #include <net/route.h> 58 #include <net/vnet.h> 59 60 #include <netinet/in.h> 61 #include <netinet/in_systm.h> 62 #include <netinet/in_pcb.h> 63 #include <netinet/in_var.h> 64 #include <netinet/ip_var.h> 65 #include <netinet/igmp_var.h> 66 67 #ifndef KTR_IGMPV3 68 #define KTR_IGMPV3 KTR_INET 69 #endif 70 71 #ifndef __SOCKUNION_DECLARED 72 union sockunion { 73 struct sockaddr_storage ss; 74 struct sockaddr sa; 75 struct sockaddr_dl sdl; 76 struct sockaddr_in sin; 77 }; 78 typedef union sockunion sockunion_t; 79 #define __SOCKUNION_DECLARED 80 #endif /* __SOCKUNION_DECLARED */ 81 82 static MALLOC_DEFINE(M_INMFILTER, "in_mfilter", 83 "IPv4 multicast PCB-layer source filter"); 84 static MALLOC_DEFINE(M_IPMADDR, "in_multi", "IPv4 multicast group"); 85 static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "IPv4 multicast options"); 86 static MALLOC_DEFINE(M_IPMSOURCE, "ip_msource", 87 "IPv4 multicast IGMP-layer source filter"); 88 89 /* 90 * Locking: 91 * - Lock order is: Giant, INP_WLOCK, IN_MULTI_LOCK, IGMP_LOCK, IF_ADDR_LOCK. 92 * - The IF_ADDR_LOCK is implicitly taken by inm_lookup() earlier, however 93 * it can be taken by code in net/if.c also. 94 * - ip_moptions and in_mfilter are covered by the INP_WLOCK. 95 * 96 * struct in_multi is covered by IN_MULTI_LOCK. There isn't strictly 97 * any need for in_multi itself to be virtualized -- it is bound to an ifp 98 * anyway no matter what happens. 99 */ 100 struct mtx in_multi_mtx; 101 MTX_SYSINIT(in_multi_mtx, &in_multi_mtx, "in_multi_mtx", MTX_DEF); 102 103 /* 104 * Functions with non-static linkage defined in this file should be 105 * declared in in_var.h: 106 * imo_multi_filter() 107 * in_addmulti() 108 * in_delmulti() 109 * in_joingroup() 110 * in_joingroup_locked() 111 * in_leavegroup() 112 * in_leavegroup_locked() 113 * and ip_var.h: 114 * inp_freemoptions() 115 * inp_getmoptions() 116 * inp_setmoptions() 117 * 118 * XXX: Both carp and pf need to use the legacy (*,G) KPIs in_addmulti() 119 * and in_delmulti(). 120 */ 121 static void imf_commit(struct in_mfilter *); 122 static int imf_get_source(struct in_mfilter *imf, 123 const struct sockaddr_in *psin, 124 struct in_msource **); 125 static struct in_msource * 126 imf_graft(struct in_mfilter *, const uint8_t, 127 const struct sockaddr_in *); 128 static void imf_leave(struct in_mfilter *); 129 static int imf_prune(struct in_mfilter *, const struct sockaddr_in *); 130 static void imf_purge(struct in_mfilter *); 131 static void imf_rollback(struct in_mfilter *); 132 static void imf_reap(struct in_mfilter *); 133 static int imo_grow(struct ip_moptions *); 134 static size_t imo_match_group(const struct ip_moptions *, 135 const struct ifnet *, const struct sockaddr *); 136 static struct in_msource * 137 imo_match_source(const struct ip_moptions *, const size_t, 138 const struct sockaddr *); 139 static void ims_merge(struct ip_msource *ims, 140 const struct in_msource *lims, const int rollback); 141 static int in_getmulti(struct ifnet *, const struct in_addr *, 142 struct in_multi **); 143 static int inm_get_source(struct in_multi *inm, const in_addr_t haddr, 144 const int noalloc, struct ip_msource **pims); 145 #ifdef KTR 146 static int inm_is_ifp_detached(const struct in_multi *); 147 #endif 148 static int inm_merge(struct in_multi *, /*const*/ struct in_mfilter *); 149 static void inm_purge(struct in_multi *); 150 static void inm_reap(struct in_multi *); 151 static struct ip_moptions * 152 inp_findmoptions(struct inpcb *); 153 static void inp_freemoptions_internal(struct ip_moptions *); 154 static void inp_gcmoptions(void *, int); 155 static int inp_get_source_filters(struct inpcb *, struct sockopt *); 156 static int inp_join_group(struct inpcb *, struct sockopt *); 157 static int inp_leave_group(struct inpcb *, struct sockopt *); 158 static struct ifnet * 159 inp_lookup_mcast_ifp(const struct inpcb *, 160 const struct sockaddr_in *, const struct in_addr); 161 static int inp_block_unblock_source(struct inpcb *, struct sockopt *); 162 static int inp_set_multicast_if(struct inpcb *, struct sockopt *); 163 static int inp_set_source_filters(struct inpcb *, struct sockopt *); 164 static int sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS); 165 166 static SYSCTL_NODE(_net_inet_ip, OID_AUTO, mcast, CTLFLAG_RW, 0, 167 "IPv4 multicast"); 168 169 static u_long in_mcast_maxgrpsrc = IP_MAX_GROUP_SRC_FILTER; 170 SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxgrpsrc, 171 CTLFLAG_RWTUN, &in_mcast_maxgrpsrc, 0, 172 "Max source filters per group"); 173 174 static u_long in_mcast_maxsocksrc = IP_MAX_SOCK_SRC_FILTER; 175 SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxsocksrc, 176 CTLFLAG_RWTUN, &in_mcast_maxsocksrc, 0, 177 "Max source filters per socket"); 178 179 int in_mcast_loop = IP_DEFAULT_MULTICAST_LOOP; 180 SYSCTL_INT(_net_inet_ip_mcast, OID_AUTO, loop, CTLFLAG_RWTUN, 181 &in_mcast_loop, 0, "Loopback multicast datagrams by default"); 182 183 static SYSCTL_NODE(_net_inet_ip_mcast, OID_AUTO, filters, 184 CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_ip_mcast_filters, 185 "Per-interface stack-wide source filters"); 186 187 static STAILQ_HEAD(, ip_moptions) imo_gc_list = 188 STAILQ_HEAD_INITIALIZER(imo_gc_list); 189 static struct task imo_gc_task = TASK_INITIALIZER(0, inp_gcmoptions, NULL); 190 191 #ifdef KTR 192 /* 193 * Inline function which wraps assertions for a valid ifp. 194 * The ifnet layer will set the ifma's ifp pointer to NULL if the ifp 195 * is detached. 196 */ 197 static int __inline 198 inm_is_ifp_detached(const struct in_multi *inm) 199 { 200 struct ifnet *ifp; 201 202 KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__)); 203 ifp = inm->inm_ifma->ifma_ifp; 204 if (ifp != NULL) { 205 /* 206 * Sanity check that netinet's notion of ifp is the 207 * same as net's. 208 */ 209 KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__)); 210 } 211 212 return (ifp == NULL); 213 } 214 #endif 215 216 /* 217 * Initialize an in_mfilter structure to a known state at t0, t1 218 * with an empty source filter list. 219 */ 220 static __inline void 221 imf_init(struct in_mfilter *imf, const int st0, const int st1) 222 { 223 memset(imf, 0, sizeof(struct in_mfilter)); 224 RB_INIT(&imf->imf_sources); 225 imf->imf_st[0] = st0; 226 imf->imf_st[1] = st1; 227 } 228 229 /* 230 * Function for looking up an in_multi record for an IPv4 multicast address 231 * on a given interface. ifp must be valid. If no record found, return NULL. 232 * The IN_MULTI_LOCK and IF_ADDR_LOCK on ifp must be held. 233 */ 234 struct in_multi * 235 inm_lookup_locked(struct ifnet *ifp, const struct in_addr ina) 236 { 237 struct ifmultiaddr *ifma; 238 struct in_multi *inm; 239 240 IN_MULTI_LOCK_ASSERT(); 241 IF_ADDR_LOCK_ASSERT(ifp); 242 243 inm = NULL; 244 TAILQ_FOREACH(ifma, &((ifp)->if_multiaddrs), ifma_link) { 245 if (ifma->ifma_addr->sa_family == AF_INET) { 246 inm = (struct in_multi *)ifma->ifma_protospec; 247 if (inm->inm_addr.s_addr == ina.s_addr) 248 break; 249 inm = NULL; 250 } 251 } 252 return (inm); 253 } 254 255 /* 256 * Wrapper for inm_lookup_locked(). 257 * The IF_ADDR_LOCK will be taken on ifp and released on return. 258 */ 259 struct in_multi * 260 inm_lookup(struct ifnet *ifp, const struct in_addr ina) 261 { 262 struct in_multi *inm; 263 264 IN_MULTI_LOCK_ASSERT(); 265 IF_ADDR_RLOCK(ifp); 266 inm = inm_lookup_locked(ifp, ina); 267 IF_ADDR_RUNLOCK(ifp); 268 269 return (inm); 270 } 271 272 /* 273 * Resize the ip_moptions vector to the next power-of-two minus 1. 274 * May be called with locks held; do not sleep. 275 */ 276 static int 277 imo_grow(struct ip_moptions *imo) 278 { 279 struct in_multi **nmships; 280 struct in_multi **omships; 281 struct in_mfilter *nmfilters; 282 struct in_mfilter *omfilters; 283 size_t idx; 284 size_t newmax; 285 size_t oldmax; 286 287 nmships = NULL; 288 nmfilters = NULL; 289 omships = imo->imo_membership; 290 omfilters = imo->imo_mfilters; 291 oldmax = imo->imo_max_memberships; 292 newmax = ((oldmax + 1) * 2) - 1; 293 294 if (newmax <= IP_MAX_MEMBERSHIPS) { 295 nmships = (struct in_multi **)realloc(omships, 296 sizeof(struct in_multi *) * newmax, M_IPMOPTS, M_NOWAIT); 297 nmfilters = (struct in_mfilter *)realloc(omfilters, 298 sizeof(struct in_mfilter) * newmax, M_INMFILTER, M_NOWAIT); 299 if (nmships != NULL && nmfilters != NULL) { 300 /* Initialize newly allocated source filter heads. */ 301 for (idx = oldmax; idx < newmax; idx++) { 302 imf_init(&nmfilters[idx], MCAST_UNDEFINED, 303 MCAST_EXCLUDE); 304 } 305 imo->imo_max_memberships = newmax; 306 imo->imo_membership = nmships; 307 imo->imo_mfilters = nmfilters; 308 } 309 } 310 311 if (nmships == NULL || nmfilters == NULL) { 312 if (nmships != NULL) 313 free(nmships, M_IPMOPTS); 314 if (nmfilters != NULL) 315 free(nmfilters, M_INMFILTER); 316 return (ETOOMANYREFS); 317 } 318 319 return (0); 320 } 321 322 /* 323 * Find an IPv4 multicast group entry for this ip_moptions instance 324 * which matches the specified group, and optionally an interface. 325 * Return its index into the array, or -1 if not found. 326 */ 327 static size_t 328 imo_match_group(const struct ip_moptions *imo, const struct ifnet *ifp, 329 const struct sockaddr *group) 330 { 331 const struct sockaddr_in *gsin; 332 struct in_multi **pinm; 333 int idx; 334 int nmships; 335 336 gsin = (const struct sockaddr_in *)group; 337 338 /* The imo_membership array may be lazy allocated. */ 339 if (imo->imo_membership == NULL || imo->imo_num_memberships == 0) 340 return (-1); 341 342 nmships = imo->imo_num_memberships; 343 pinm = &imo->imo_membership[0]; 344 for (idx = 0; idx < nmships; idx++, pinm++) { 345 if (*pinm == NULL) 346 continue; 347 if ((ifp == NULL || ((*pinm)->inm_ifp == ifp)) && 348 in_hosteq((*pinm)->inm_addr, gsin->sin_addr)) { 349 break; 350 } 351 } 352 if (idx >= nmships) 353 idx = -1; 354 355 return (idx); 356 } 357 358 /* 359 * Find an IPv4 multicast source entry for this imo which matches 360 * the given group index for this socket, and source address. 361 * 362 * NOTE: This does not check if the entry is in-mode, merely if 363 * it exists, which may not be the desired behaviour. 364 */ 365 static struct in_msource * 366 imo_match_source(const struct ip_moptions *imo, const size_t gidx, 367 const struct sockaddr *src) 368 { 369 struct ip_msource find; 370 struct in_mfilter *imf; 371 struct ip_msource *ims; 372 const sockunion_t *psa; 373 374 KASSERT(src->sa_family == AF_INET, ("%s: !AF_INET", __func__)); 375 KASSERT(gidx != -1 && gidx < imo->imo_num_memberships, 376 ("%s: invalid index %d\n", __func__, (int)gidx)); 377 378 /* The imo_mfilters array may be lazy allocated. */ 379 if (imo->imo_mfilters == NULL) 380 return (NULL); 381 imf = &imo->imo_mfilters[gidx]; 382 383 /* Source trees are keyed in host byte order. */ 384 psa = (const sockunion_t *)src; 385 find.ims_haddr = ntohl(psa->sin.sin_addr.s_addr); 386 ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find); 387 388 return ((struct in_msource *)ims); 389 } 390 391 /* 392 * Perform filtering for multicast datagrams on a socket by group and source. 393 * 394 * Returns 0 if a datagram should be allowed through, or various error codes 395 * if the socket was not a member of the group, or the source was muted, etc. 396 */ 397 int 398 imo_multi_filter(const struct ip_moptions *imo, const struct ifnet *ifp, 399 const struct sockaddr *group, const struct sockaddr *src) 400 { 401 size_t gidx; 402 struct in_msource *ims; 403 int mode; 404 405 KASSERT(ifp != NULL, ("%s: null ifp", __func__)); 406 407 gidx = imo_match_group(imo, ifp, group); 408 if (gidx == -1) 409 return (MCAST_NOTGMEMBER); 410 411 /* 412 * Check if the source was included in an (S,G) join. 413 * Allow reception on exclusive memberships by default, 414 * reject reception on inclusive memberships by default. 415 * Exclude source only if an in-mode exclude filter exists. 416 * Include source only if an in-mode include filter exists. 417 * NOTE: We are comparing group state here at IGMP t1 (now) 418 * with socket-layer t0 (since last downcall). 419 */ 420 mode = imo->imo_mfilters[gidx].imf_st[1]; 421 ims = imo_match_source(imo, gidx, src); 422 423 if ((ims == NULL && mode == MCAST_INCLUDE) || 424 (ims != NULL && ims->imsl_st[0] != mode)) 425 return (MCAST_NOTSMEMBER); 426 427 return (MCAST_PASS); 428 } 429 430 /* 431 * Find and return a reference to an in_multi record for (ifp, group), 432 * and bump its reference count. 433 * If one does not exist, try to allocate it, and update link-layer multicast 434 * filters on ifp to listen for group. 435 * Assumes the IN_MULTI lock is held across the call. 436 * Return 0 if successful, otherwise return an appropriate error code. 437 */ 438 static int 439 in_getmulti(struct ifnet *ifp, const struct in_addr *group, 440 struct in_multi **pinm) 441 { 442 struct sockaddr_in gsin; 443 struct ifmultiaddr *ifma; 444 struct in_ifinfo *ii; 445 struct in_multi *inm; 446 int error; 447 448 IN_MULTI_LOCK_ASSERT(); 449 450 ii = (struct in_ifinfo *)ifp->if_afdata[AF_INET]; 451 452 inm = inm_lookup(ifp, *group); 453 if (inm != NULL) { 454 /* 455 * If we already joined this group, just bump the 456 * refcount and return it. 457 */ 458 KASSERT(inm->inm_refcount >= 1, 459 ("%s: bad refcount %d", __func__, inm->inm_refcount)); 460 ++inm->inm_refcount; 461 *pinm = inm; 462 return (0); 463 } 464 465 memset(&gsin, 0, sizeof(gsin)); 466 gsin.sin_family = AF_INET; 467 gsin.sin_len = sizeof(struct sockaddr_in); 468 gsin.sin_addr = *group; 469 470 /* 471 * Check if a link-layer group is already associated 472 * with this network-layer group on the given ifnet. 473 */ 474 error = if_addmulti(ifp, (struct sockaddr *)&gsin, &ifma); 475 if (error != 0) 476 return (error); 477 478 /* XXX ifma_protospec must be covered by IF_ADDR_LOCK */ 479 IF_ADDR_WLOCK(ifp); 480 481 /* 482 * If something other than netinet is occupying the link-layer 483 * group, print a meaningful error message and back out of 484 * the allocation. 485 * Otherwise, bump the refcount on the existing network-layer 486 * group association and return it. 487 */ 488 if (ifma->ifma_protospec != NULL) { 489 inm = (struct in_multi *)ifma->ifma_protospec; 490 #ifdef INVARIANTS 491 KASSERT(ifma->ifma_addr != NULL, ("%s: no ifma_addr", 492 __func__)); 493 KASSERT(ifma->ifma_addr->sa_family == AF_INET, 494 ("%s: ifma not AF_INET", __func__)); 495 KASSERT(inm != NULL, ("%s: no ifma_protospec", __func__)); 496 if (inm->inm_ifma != ifma || inm->inm_ifp != ifp || 497 !in_hosteq(inm->inm_addr, *group)) 498 panic("%s: ifma %p is inconsistent with %p (%s)", 499 __func__, ifma, inm, inet_ntoa(*group)); 500 #endif 501 ++inm->inm_refcount; 502 *pinm = inm; 503 IF_ADDR_WUNLOCK(ifp); 504 return (0); 505 } 506 507 IF_ADDR_WLOCK_ASSERT(ifp); 508 509 /* 510 * A new in_multi record is needed; allocate and initialize it. 511 * We DO NOT perform an IGMP join as the in_ layer may need to 512 * push an initial source list down to IGMP to support SSM. 513 * 514 * The initial source filter state is INCLUDE, {} as per the RFC. 515 */ 516 inm = malloc(sizeof(*inm), M_IPMADDR, M_NOWAIT | M_ZERO); 517 if (inm == NULL) { 518 if_delmulti_ifma(ifma); 519 IF_ADDR_WUNLOCK(ifp); 520 return (ENOMEM); 521 } 522 inm->inm_addr = *group; 523 inm->inm_ifp = ifp; 524 inm->inm_igi = ii->ii_igmp; 525 inm->inm_ifma = ifma; 526 inm->inm_refcount = 1; 527 inm->inm_state = IGMP_NOT_MEMBER; 528 mbufq_init(&inm->inm_scq, IGMP_MAX_STATE_CHANGES); 529 inm->inm_st[0].iss_fmode = MCAST_UNDEFINED; 530 inm->inm_st[1].iss_fmode = MCAST_UNDEFINED; 531 RB_INIT(&inm->inm_srcs); 532 533 ifma->ifma_protospec = inm; 534 535 *pinm = inm; 536 537 IF_ADDR_WUNLOCK(ifp); 538 return (0); 539 } 540 541 /* 542 * Drop a reference to an in_multi record. 543 * 544 * If the refcount drops to 0, free the in_multi record and 545 * delete the underlying link-layer membership. 546 */ 547 void 548 inm_release_locked(struct in_multi *inm) 549 { 550 struct ifmultiaddr *ifma; 551 552 IN_MULTI_LOCK_ASSERT(); 553 554 CTR2(KTR_IGMPV3, "%s: refcount is %d", __func__, inm->inm_refcount); 555 556 if (--inm->inm_refcount > 0) { 557 CTR2(KTR_IGMPV3, "%s: refcount is now %d", __func__, 558 inm->inm_refcount); 559 return; 560 } 561 562 CTR2(KTR_IGMPV3, "%s: freeing inm %p", __func__, inm); 563 564 ifma = inm->inm_ifma; 565 566 /* XXX this access is not covered by IF_ADDR_LOCK */ 567 CTR2(KTR_IGMPV3, "%s: purging ifma %p", __func__, ifma); 568 KASSERT(ifma->ifma_protospec == inm, 569 ("%s: ifma_protospec != inm", __func__)); 570 ifma->ifma_protospec = NULL; 571 572 inm_purge(inm); 573 574 free(inm, M_IPMADDR); 575 576 if_delmulti_ifma(ifma); 577 } 578 579 /* 580 * Clear recorded source entries for a group. 581 * Used by the IGMP code. Caller must hold the IN_MULTI lock. 582 * FIXME: Should reap. 583 */ 584 void 585 inm_clear_recorded(struct in_multi *inm) 586 { 587 struct ip_msource *ims; 588 589 IN_MULTI_LOCK_ASSERT(); 590 591 RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) { 592 if (ims->ims_stp) { 593 ims->ims_stp = 0; 594 --inm->inm_st[1].iss_rec; 595 } 596 } 597 KASSERT(inm->inm_st[1].iss_rec == 0, 598 ("%s: iss_rec %d not 0", __func__, inm->inm_st[1].iss_rec)); 599 } 600 601 /* 602 * Record a source as pending for a Source-Group IGMPv3 query. 603 * This lives here as it modifies the shared tree. 604 * 605 * inm is the group descriptor. 606 * naddr is the address of the source to record in network-byte order. 607 * 608 * If the net.inet.igmp.sgalloc sysctl is non-zero, we will 609 * lazy-allocate a source node in response to an SG query. 610 * Otherwise, no allocation is performed. This saves some memory 611 * with the trade-off that the source will not be reported to the 612 * router if joined in the window between the query response and 613 * the group actually being joined on the local host. 614 * 615 * VIMAGE: XXX: Currently the igmp_sgalloc feature has been removed. 616 * This turns off the allocation of a recorded source entry if 617 * the group has not been joined. 618 * 619 * Return 0 if the source didn't exist or was already marked as recorded. 620 * Return 1 if the source was marked as recorded by this function. 621 * Return <0 if any error occured (negated errno code). 622 */ 623 int 624 inm_record_source(struct in_multi *inm, const in_addr_t naddr) 625 { 626 struct ip_msource find; 627 struct ip_msource *ims, *nims; 628 629 IN_MULTI_LOCK_ASSERT(); 630 631 find.ims_haddr = ntohl(naddr); 632 ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find); 633 if (ims && ims->ims_stp) 634 return (0); 635 if (ims == NULL) { 636 if (inm->inm_nsrc == in_mcast_maxgrpsrc) 637 return (-ENOSPC); 638 nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE, 639 M_NOWAIT | M_ZERO); 640 if (nims == NULL) 641 return (-ENOMEM); 642 nims->ims_haddr = find.ims_haddr; 643 RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims); 644 ++inm->inm_nsrc; 645 ims = nims; 646 } 647 648 /* 649 * Mark the source as recorded and update the recorded 650 * source count. 651 */ 652 ++ims->ims_stp; 653 ++inm->inm_st[1].iss_rec; 654 655 return (1); 656 } 657 658 /* 659 * Return a pointer to an in_msource owned by an in_mfilter, 660 * given its source address. 661 * Lazy-allocate if needed. If this is a new entry its filter state is 662 * undefined at t0. 663 * 664 * imf is the filter set being modified. 665 * haddr is the source address in *host* byte-order. 666 * 667 * SMPng: May be called with locks held; malloc must not block. 668 */ 669 static int 670 imf_get_source(struct in_mfilter *imf, const struct sockaddr_in *psin, 671 struct in_msource **plims) 672 { 673 struct ip_msource find; 674 struct ip_msource *ims, *nims; 675 struct in_msource *lims; 676 int error; 677 678 error = 0; 679 ims = NULL; 680 lims = NULL; 681 682 /* key is host byte order */ 683 find.ims_haddr = ntohl(psin->sin_addr.s_addr); 684 ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find); 685 lims = (struct in_msource *)ims; 686 if (lims == NULL) { 687 if (imf->imf_nsrc == in_mcast_maxsocksrc) 688 return (ENOSPC); 689 nims = malloc(sizeof(struct in_msource), M_INMFILTER, 690 M_NOWAIT | M_ZERO); 691 if (nims == NULL) 692 return (ENOMEM); 693 lims = (struct in_msource *)nims; 694 lims->ims_haddr = find.ims_haddr; 695 lims->imsl_st[0] = MCAST_UNDEFINED; 696 RB_INSERT(ip_msource_tree, &imf->imf_sources, nims); 697 ++imf->imf_nsrc; 698 } 699 700 *plims = lims; 701 702 return (error); 703 } 704 705 /* 706 * Graft a source entry into an existing socket-layer filter set, 707 * maintaining any required invariants and checking allocations. 708 * 709 * The source is marked as being in the new filter mode at t1. 710 * 711 * Return the pointer to the new node, otherwise return NULL. 712 */ 713 static struct in_msource * 714 imf_graft(struct in_mfilter *imf, const uint8_t st1, 715 const struct sockaddr_in *psin) 716 { 717 struct ip_msource *nims; 718 struct in_msource *lims; 719 720 nims = malloc(sizeof(struct in_msource), M_INMFILTER, 721 M_NOWAIT | M_ZERO); 722 if (nims == NULL) 723 return (NULL); 724 lims = (struct in_msource *)nims; 725 lims->ims_haddr = ntohl(psin->sin_addr.s_addr); 726 lims->imsl_st[0] = MCAST_UNDEFINED; 727 lims->imsl_st[1] = st1; 728 RB_INSERT(ip_msource_tree, &imf->imf_sources, nims); 729 ++imf->imf_nsrc; 730 731 return (lims); 732 } 733 734 /* 735 * Prune a source entry from an existing socket-layer filter set, 736 * maintaining any required invariants and checking allocations. 737 * 738 * The source is marked as being left at t1, it is not freed. 739 * 740 * Return 0 if no error occurred, otherwise return an errno value. 741 */ 742 static int 743 imf_prune(struct in_mfilter *imf, const struct sockaddr_in *psin) 744 { 745 struct ip_msource find; 746 struct ip_msource *ims; 747 struct in_msource *lims; 748 749 /* key is host byte order */ 750 find.ims_haddr = ntohl(psin->sin_addr.s_addr); 751 ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find); 752 if (ims == NULL) 753 return (ENOENT); 754 lims = (struct in_msource *)ims; 755 lims->imsl_st[1] = MCAST_UNDEFINED; 756 return (0); 757 } 758 759 /* 760 * Revert socket-layer filter set deltas at t1 to t0 state. 761 */ 762 static void 763 imf_rollback(struct in_mfilter *imf) 764 { 765 struct ip_msource *ims, *tims; 766 struct in_msource *lims; 767 768 RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) { 769 lims = (struct in_msource *)ims; 770 if (lims->imsl_st[0] == lims->imsl_st[1]) { 771 /* no change at t1 */ 772 continue; 773 } else if (lims->imsl_st[0] != MCAST_UNDEFINED) { 774 /* revert change to existing source at t1 */ 775 lims->imsl_st[1] = lims->imsl_st[0]; 776 } else { 777 /* revert source added t1 */ 778 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 779 RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims); 780 free(ims, M_INMFILTER); 781 imf->imf_nsrc--; 782 } 783 } 784 imf->imf_st[1] = imf->imf_st[0]; 785 } 786 787 /* 788 * Mark socket-layer filter set as INCLUDE {} at t1. 789 */ 790 static void 791 imf_leave(struct in_mfilter *imf) 792 { 793 struct ip_msource *ims; 794 struct in_msource *lims; 795 796 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 797 lims = (struct in_msource *)ims; 798 lims->imsl_st[1] = MCAST_UNDEFINED; 799 } 800 imf->imf_st[1] = MCAST_INCLUDE; 801 } 802 803 /* 804 * Mark socket-layer filter set deltas as committed. 805 */ 806 static void 807 imf_commit(struct in_mfilter *imf) 808 { 809 struct ip_msource *ims; 810 struct in_msource *lims; 811 812 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 813 lims = (struct in_msource *)ims; 814 lims->imsl_st[0] = lims->imsl_st[1]; 815 } 816 imf->imf_st[0] = imf->imf_st[1]; 817 } 818 819 /* 820 * Reap unreferenced sources from socket-layer filter set. 821 */ 822 static void 823 imf_reap(struct in_mfilter *imf) 824 { 825 struct ip_msource *ims, *tims; 826 struct in_msource *lims; 827 828 RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) { 829 lims = (struct in_msource *)ims; 830 if ((lims->imsl_st[0] == MCAST_UNDEFINED) && 831 (lims->imsl_st[1] == MCAST_UNDEFINED)) { 832 CTR2(KTR_IGMPV3, "%s: free lims %p", __func__, ims); 833 RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims); 834 free(ims, M_INMFILTER); 835 imf->imf_nsrc--; 836 } 837 } 838 } 839 840 /* 841 * Purge socket-layer filter set. 842 */ 843 static void 844 imf_purge(struct in_mfilter *imf) 845 { 846 struct ip_msource *ims, *tims; 847 848 RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) { 849 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 850 RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims); 851 free(ims, M_INMFILTER); 852 imf->imf_nsrc--; 853 } 854 imf->imf_st[0] = imf->imf_st[1] = MCAST_UNDEFINED; 855 KASSERT(RB_EMPTY(&imf->imf_sources), 856 ("%s: imf_sources not empty", __func__)); 857 } 858 859 /* 860 * Look up a source filter entry for a multicast group. 861 * 862 * inm is the group descriptor to work with. 863 * haddr is the host-byte-order IPv4 address to look up. 864 * noalloc may be non-zero to suppress allocation of sources. 865 * *pims will be set to the address of the retrieved or allocated source. 866 * 867 * SMPng: NOTE: may be called with locks held. 868 * Return 0 if successful, otherwise return a non-zero error code. 869 */ 870 static int 871 inm_get_source(struct in_multi *inm, const in_addr_t haddr, 872 const int noalloc, struct ip_msource **pims) 873 { 874 struct ip_msource find; 875 struct ip_msource *ims, *nims; 876 #ifdef KTR 877 struct in_addr ia; 878 #endif 879 880 find.ims_haddr = haddr; 881 ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find); 882 if (ims == NULL && !noalloc) { 883 if (inm->inm_nsrc == in_mcast_maxgrpsrc) 884 return (ENOSPC); 885 nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE, 886 M_NOWAIT | M_ZERO); 887 if (nims == NULL) 888 return (ENOMEM); 889 nims->ims_haddr = haddr; 890 RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims); 891 ++inm->inm_nsrc; 892 ims = nims; 893 #ifdef KTR 894 ia.s_addr = htonl(haddr); 895 CTR3(KTR_IGMPV3, "%s: allocated %s as %p", __func__, 896 inet_ntoa(ia), ims); 897 #endif 898 } 899 900 *pims = ims; 901 return (0); 902 } 903 904 /* 905 * Merge socket-layer source into IGMP-layer source. 906 * If rollback is non-zero, perform the inverse of the merge. 907 */ 908 static void 909 ims_merge(struct ip_msource *ims, const struct in_msource *lims, 910 const int rollback) 911 { 912 int n = rollback ? -1 : 1; 913 #ifdef KTR 914 struct in_addr ia; 915 916 ia.s_addr = htonl(ims->ims_haddr); 917 #endif 918 919 if (lims->imsl_st[0] == MCAST_EXCLUDE) { 920 CTR3(KTR_IGMPV3, "%s: t1 ex -= %d on %s", 921 __func__, n, inet_ntoa(ia)); 922 ims->ims_st[1].ex -= n; 923 } else if (lims->imsl_st[0] == MCAST_INCLUDE) { 924 CTR3(KTR_IGMPV3, "%s: t1 in -= %d on %s", 925 __func__, n, inet_ntoa(ia)); 926 ims->ims_st[1].in -= n; 927 } 928 929 if (lims->imsl_st[1] == MCAST_EXCLUDE) { 930 CTR3(KTR_IGMPV3, "%s: t1 ex += %d on %s", 931 __func__, n, inet_ntoa(ia)); 932 ims->ims_st[1].ex += n; 933 } else if (lims->imsl_st[1] == MCAST_INCLUDE) { 934 CTR3(KTR_IGMPV3, "%s: t1 in += %d on %s", 935 __func__, n, inet_ntoa(ia)); 936 ims->ims_st[1].in += n; 937 } 938 } 939 940 /* 941 * Atomically update the global in_multi state, when a membership's 942 * filter list is being updated in any way. 943 * 944 * imf is the per-inpcb-membership group filter pointer. 945 * A fake imf may be passed for in-kernel consumers. 946 * 947 * XXX This is a candidate for a set-symmetric-difference style loop 948 * which would eliminate the repeated lookup from root of ims nodes, 949 * as they share the same key space. 950 * 951 * If any error occurred this function will back out of refcounts 952 * and return a non-zero value. 953 */ 954 static int 955 inm_merge(struct in_multi *inm, /*const*/ struct in_mfilter *imf) 956 { 957 struct ip_msource *ims, *nims; 958 struct in_msource *lims; 959 int schanged, error; 960 int nsrc0, nsrc1; 961 962 schanged = 0; 963 error = 0; 964 nsrc1 = nsrc0 = 0; 965 966 /* 967 * Update the source filters first, as this may fail. 968 * Maintain count of in-mode filters at t0, t1. These are 969 * used to work out if we transition into ASM mode or not. 970 * Maintain a count of source filters whose state was 971 * actually modified by this operation. 972 */ 973 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 974 lims = (struct in_msource *)ims; 975 if (lims->imsl_st[0] == imf->imf_st[0]) nsrc0++; 976 if (lims->imsl_st[1] == imf->imf_st[1]) nsrc1++; 977 if (lims->imsl_st[0] == lims->imsl_st[1]) continue; 978 error = inm_get_source(inm, lims->ims_haddr, 0, &nims); 979 ++schanged; 980 if (error) 981 break; 982 ims_merge(nims, lims, 0); 983 } 984 if (error) { 985 struct ip_msource *bims; 986 987 RB_FOREACH_REVERSE_FROM(ims, ip_msource_tree, nims) { 988 lims = (struct in_msource *)ims; 989 if (lims->imsl_st[0] == lims->imsl_st[1]) 990 continue; 991 (void)inm_get_source(inm, lims->ims_haddr, 1, &bims); 992 if (bims == NULL) 993 continue; 994 ims_merge(bims, lims, 1); 995 } 996 goto out_reap; 997 } 998 999 CTR3(KTR_IGMPV3, "%s: imf filters in-mode: %d at t0, %d at t1", 1000 __func__, nsrc0, nsrc1); 1001 1002 /* Handle transition between INCLUDE {n} and INCLUDE {} on socket. */ 1003 if (imf->imf_st[0] == imf->imf_st[1] && 1004 imf->imf_st[1] == MCAST_INCLUDE) { 1005 if (nsrc1 == 0) { 1006 CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__); 1007 --inm->inm_st[1].iss_in; 1008 } 1009 } 1010 1011 /* Handle filter mode transition on socket. */ 1012 if (imf->imf_st[0] != imf->imf_st[1]) { 1013 CTR3(KTR_IGMPV3, "%s: imf transition %d to %d", 1014 __func__, imf->imf_st[0], imf->imf_st[1]); 1015 1016 if (imf->imf_st[0] == MCAST_EXCLUDE) { 1017 CTR1(KTR_IGMPV3, "%s: --ex on inm at t1", __func__); 1018 --inm->inm_st[1].iss_ex; 1019 } else if (imf->imf_st[0] == MCAST_INCLUDE) { 1020 CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__); 1021 --inm->inm_st[1].iss_in; 1022 } 1023 1024 if (imf->imf_st[1] == MCAST_EXCLUDE) { 1025 CTR1(KTR_IGMPV3, "%s: ex++ on inm at t1", __func__); 1026 inm->inm_st[1].iss_ex++; 1027 } else if (imf->imf_st[1] == MCAST_INCLUDE && nsrc1 > 0) { 1028 CTR1(KTR_IGMPV3, "%s: in++ on inm at t1", __func__); 1029 inm->inm_st[1].iss_in++; 1030 } 1031 } 1032 1033 /* 1034 * Track inm filter state in terms of listener counts. 1035 * If there are any exclusive listeners, stack-wide 1036 * membership is exclusive. 1037 * Otherwise, if only inclusive listeners, stack-wide is inclusive. 1038 * If no listeners remain, state is undefined at t1, 1039 * and the IGMP lifecycle for this group should finish. 1040 */ 1041 if (inm->inm_st[1].iss_ex > 0) { 1042 CTR1(KTR_IGMPV3, "%s: transition to EX", __func__); 1043 inm->inm_st[1].iss_fmode = MCAST_EXCLUDE; 1044 } else if (inm->inm_st[1].iss_in > 0) { 1045 CTR1(KTR_IGMPV3, "%s: transition to IN", __func__); 1046 inm->inm_st[1].iss_fmode = MCAST_INCLUDE; 1047 } else { 1048 CTR1(KTR_IGMPV3, "%s: transition to UNDEF", __func__); 1049 inm->inm_st[1].iss_fmode = MCAST_UNDEFINED; 1050 } 1051 1052 /* Decrement ASM listener count on transition out of ASM mode. */ 1053 if (imf->imf_st[0] == MCAST_EXCLUDE && nsrc0 == 0) { 1054 if ((imf->imf_st[1] != MCAST_EXCLUDE) || 1055 (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 > 0)) 1056 CTR1(KTR_IGMPV3, "%s: --asm on inm at t1", __func__); 1057 --inm->inm_st[1].iss_asm; 1058 } 1059 1060 /* Increment ASM listener count on transition to ASM mode. */ 1061 if (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 == 0) { 1062 CTR1(KTR_IGMPV3, "%s: asm++ on inm at t1", __func__); 1063 inm->inm_st[1].iss_asm++; 1064 } 1065 1066 CTR3(KTR_IGMPV3, "%s: merged imf %p to inm %p", __func__, imf, inm); 1067 inm_print(inm); 1068 1069 out_reap: 1070 if (schanged > 0) { 1071 CTR1(KTR_IGMPV3, "%s: sources changed; reaping", __func__); 1072 inm_reap(inm); 1073 } 1074 return (error); 1075 } 1076 1077 /* 1078 * Mark an in_multi's filter set deltas as committed. 1079 * Called by IGMP after a state change has been enqueued. 1080 */ 1081 void 1082 inm_commit(struct in_multi *inm) 1083 { 1084 struct ip_msource *ims; 1085 1086 CTR2(KTR_IGMPV3, "%s: commit inm %p", __func__, inm); 1087 CTR1(KTR_IGMPV3, "%s: pre commit:", __func__); 1088 inm_print(inm); 1089 1090 RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) { 1091 ims->ims_st[0] = ims->ims_st[1]; 1092 } 1093 inm->inm_st[0] = inm->inm_st[1]; 1094 } 1095 1096 /* 1097 * Reap unreferenced nodes from an in_multi's filter set. 1098 */ 1099 static void 1100 inm_reap(struct in_multi *inm) 1101 { 1102 struct ip_msource *ims, *tims; 1103 1104 RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) { 1105 if (ims->ims_st[0].ex > 0 || ims->ims_st[0].in > 0 || 1106 ims->ims_st[1].ex > 0 || ims->ims_st[1].in > 0 || 1107 ims->ims_stp != 0) 1108 continue; 1109 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 1110 RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims); 1111 free(ims, M_IPMSOURCE); 1112 inm->inm_nsrc--; 1113 } 1114 } 1115 1116 /* 1117 * Purge all source nodes from an in_multi's filter set. 1118 */ 1119 static void 1120 inm_purge(struct in_multi *inm) 1121 { 1122 struct ip_msource *ims, *tims; 1123 1124 RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) { 1125 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 1126 RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims); 1127 free(ims, M_IPMSOURCE); 1128 inm->inm_nsrc--; 1129 } 1130 } 1131 1132 /* 1133 * Join a multicast group; unlocked entry point. 1134 * 1135 * SMPng: XXX: in_joingroup() is called from in_control() when Giant 1136 * is not held. Fortunately, ifp is unlikely to have been detached 1137 * at this point, so we assume it's OK to recurse. 1138 */ 1139 int 1140 in_joingroup(struct ifnet *ifp, const struct in_addr *gina, 1141 /*const*/ struct in_mfilter *imf, struct in_multi **pinm) 1142 { 1143 int error; 1144 1145 IN_MULTI_LOCK(); 1146 error = in_joingroup_locked(ifp, gina, imf, pinm); 1147 IN_MULTI_UNLOCK(); 1148 1149 return (error); 1150 } 1151 1152 /* 1153 * Join a multicast group; real entry point. 1154 * 1155 * Only preserves atomicity at inm level. 1156 * NOTE: imf argument cannot be const due to sys/tree.h limitations. 1157 * 1158 * If the IGMP downcall fails, the group is not joined, and an error 1159 * code is returned. 1160 */ 1161 int 1162 in_joingroup_locked(struct ifnet *ifp, const struct in_addr *gina, 1163 /*const*/ struct in_mfilter *imf, struct in_multi **pinm) 1164 { 1165 struct in_mfilter timf; 1166 struct in_multi *inm; 1167 int error; 1168 1169 IN_MULTI_LOCK_ASSERT(); 1170 1171 CTR4(KTR_IGMPV3, "%s: join %s on %p(%s))", __func__, 1172 inet_ntoa(*gina), ifp, ifp->if_xname); 1173 1174 error = 0; 1175 inm = NULL; 1176 1177 /* 1178 * If no imf was specified (i.e. kernel consumer), 1179 * fake one up and assume it is an ASM join. 1180 */ 1181 if (imf == NULL) { 1182 imf_init(&timf, MCAST_UNDEFINED, MCAST_EXCLUDE); 1183 imf = &timf; 1184 } 1185 1186 error = in_getmulti(ifp, gina, &inm); 1187 if (error) { 1188 CTR1(KTR_IGMPV3, "%s: in_getmulti() failure", __func__); 1189 return (error); 1190 } 1191 1192 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 1193 error = inm_merge(inm, imf); 1194 if (error) { 1195 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__); 1196 goto out_inm_release; 1197 } 1198 1199 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 1200 error = igmp_change_state(inm); 1201 if (error) { 1202 CTR1(KTR_IGMPV3, "%s: failed to update source", __func__); 1203 goto out_inm_release; 1204 } 1205 1206 out_inm_release: 1207 if (error) { 1208 CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm); 1209 inm_release_locked(inm); 1210 } else { 1211 *pinm = inm; 1212 } 1213 1214 return (error); 1215 } 1216 1217 /* 1218 * Leave a multicast group; unlocked entry point. 1219 */ 1220 int 1221 in_leavegroup(struct in_multi *inm, /*const*/ struct in_mfilter *imf) 1222 { 1223 int error; 1224 1225 IN_MULTI_LOCK(); 1226 error = in_leavegroup_locked(inm, imf); 1227 IN_MULTI_UNLOCK(); 1228 1229 return (error); 1230 } 1231 1232 /* 1233 * Leave a multicast group; real entry point. 1234 * All source filters will be expunged. 1235 * 1236 * Only preserves atomicity at inm level. 1237 * 1238 * Holding the write lock for the INP which contains imf 1239 * is highly advisable. We can't assert for it as imf does not 1240 * contain a back-pointer to the owning inp. 1241 * 1242 * Note: This is not the same as inm_release(*) as this function also 1243 * makes a state change downcall into IGMP. 1244 */ 1245 int 1246 in_leavegroup_locked(struct in_multi *inm, /*const*/ struct in_mfilter *imf) 1247 { 1248 struct in_mfilter timf; 1249 int error; 1250 1251 error = 0; 1252 1253 IN_MULTI_LOCK_ASSERT(); 1254 1255 CTR5(KTR_IGMPV3, "%s: leave inm %p, %s/%s, imf %p", __func__, 1256 inm, inet_ntoa(inm->inm_addr), 1257 (inm_is_ifp_detached(inm) ? "null" : inm->inm_ifp->if_xname), 1258 imf); 1259 1260 /* 1261 * If no imf was specified (i.e. kernel consumer), 1262 * fake one up and assume it is an ASM join. 1263 */ 1264 if (imf == NULL) { 1265 imf_init(&timf, MCAST_EXCLUDE, MCAST_UNDEFINED); 1266 imf = &timf; 1267 } 1268 1269 /* 1270 * Begin state merge transaction at IGMP layer. 1271 * 1272 * As this particular invocation should not cause any memory 1273 * to be allocated, and there is no opportunity to roll back 1274 * the transaction, it MUST NOT fail. 1275 */ 1276 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 1277 error = inm_merge(inm, imf); 1278 KASSERT(error == 0, ("%s: failed to merge inm state", __func__)); 1279 1280 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 1281 CURVNET_SET(inm->inm_ifp->if_vnet); 1282 error = igmp_change_state(inm); 1283 CURVNET_RESTORE(); 1284 if (error) 1285 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__); 1286 1287 CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm); 1288 inm_release_locked(inm); 1289 1290 return (error); 1291 } 1292 1293 /*#ifndef BURN_BRIDGES*/ 1294 /* 1295 * Join an IPv4 multicast group in (*,G) exclusive mode. 1296 * The group must be a 224.0.0.0/24 link-scope group. 1297 * This KPI is for legacy kernel consumers only. 1298 */ 1299 struct in_multi * 1300 in_addmulti(struct in_addr *ap, struct ifnet *ifp) 1301 { 1302 struct in_multi *pinm; 1303 int error; 1304 1305 KASSERT(IN_LOCAL_GROUP(ntohl(ap->s_addr)), 1306 ("%s: %s not in 224.0.0.0/24", __func__, inet_ntoa(*ap))); 1307 1308 error = in_joingroup(ifp, ap, NULL, &pinm); 1309 if (error != 0) 1310 pinm = NULL; 1311 1312 return (pinm); 1313 } 1314 1315 /* 1316 * Leave an IPv4 multicast group, assumed to be in exclusive (*,G) mode. 1317 * This KPI is for legacy kernel consumers only. 1318 */ 1319 void 1320 in_delmulti(struct in_multi *inm) 1321 { 1322 1323 (void)in_leavegroup(inm, NULL); 1324 } 1325 /*#endif*/ 1326 1327 /* 1328 * Block or unblock an ASM multicast source on an inpcb. 1329 * This implements the delta-based API described in RFC 3678. 1330 * 1331 * The delta-based API applies only to exclusive-mode memberships. 1332 * An IGMP downcall will be performed. 1333 * 1334 * SMPng: NOTE: Must take Giant as a join may create a new ifma. 1335 * 1336 * Return 0 if successful, otherwise return an appropriate error code. 1337 */ 1338 static int 1339 inp_block_unblock_source(struct inpcb *inp, struct sockopt *sopt) 1340 { 1341 struct group_source_req gsr; 1342 sockunion_t *gsa, *ssa; 1343 struct ifnet *ifp; 1344 struct in_mfilter *imf; 1345 struct ip_moptions *imo; 1346 struct in_msource *ims; 1347 struct in_multi *inm; 1348 size_t idx; 1349 uint16_t fmode; 1350 int error, doblock; 1351 1352 ifp = NULL; 1353 error = 0; 1354 doblock = 0; 1355 1356 memset(&gsr, 0, sizeof(struct group_source_req)); 1357 gsa = (sockunion_t *)&gsr.gsr_group; 1358 ssa = (sockunion_t *)&gsr.gsr_source; 1359 1360 switch (sopt->sopt_name) { 1361 case IP_BLOCK_SOURCE: 1362 case IP_UNBLOCK_SOURCE: { 1363 struct ip_mreq_source mreqs; 1364 1365 error = sooptcopyin(sopt, &mreqs, 1366 sizeof(struct ip_mreq_source), 1367 sizeof(struct ip_mreq_source)); 1368 if (error) 1369 return (error); 1370 1371 gsa->sin.sin_family = AF_INET; 1372 gsa->sin.sin_len = sizeof(struct sockaddr_in); 1373 gsa->sin.sin_addr = mreqs.imr_multiaddr; 1374 1375 ssa->sin.sin_family = AF_INET; 1376 ssa->sin.sin_len = sizeof(struct sockaddr_in); 1377 ssa->sin.sin_addr = mreqs.imr_sourceaddr; 1378 1379 if (!in_nullhost(mreqs.imr_interface)) 1380 INADDR_TO_IFP(mreqs.imr_interface, ifp); 1381 1382 if (sopt->sopt_name == IP_BLOCK_SOURCE) 1383 doblock = 1; 1384 1385 CTR3(KTR_IGMPV3, "%s: imr_interface = %s, ifp = %p", 1386 __func__, inet_ntoa(mreqs.imr_interface), ifp); 1387 break; 1388 } 1389 1390 case MCAST_BLOCK_SOURCE: 1391 case MCAST_UNBLOCK_SOURCE: 1392 error = sooptcopyin(sopt, &gsr, 1393 sizeof(struct group_source_req), 1394 sizeof(struct group_source_req)); 1395 if (error) 1396 return (error); 1397 1398 if (gsa->sin.sin_family != AF_INET || 1399 gsa->sin.sin_len != sizeof(struct sockaddr_in)) 1400 return (EINVAL); 1401 1402 if (ssa->sin.sin_family != AF_INET || 1403 ssa->sin.sin_len != sizeof(struct sockaddr_in)) 1404 return (EINVAL); 1405 1406 if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface) 1407 return (EADDRNOTAVAIL); 1408 1409 ifp = ifnet_byindex(gsr.gsr_interface); 1410 1411 if (sopt->sopt_name == MCAST_BLOCK_SOURCE) 1412 doblock = 1; 1413 break; 1414 1415 default: 1416 CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d", 1417 __func__, sopt->sopt_name); 1418 return (EOPNOTSUPP); 1419 break; 1420 } 1421 1422 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 1423 return (EINVAL); 1424 1425 /* 1426 * Check if we are actually a member of this group. 1427 */ 1428 imo = inp_findmoptions(inp); 1429 idx = imo_match_group(imo, ifp, &gsa->sa); 1430 if (idx == -1 || imo->imo_mfilters == NULL) { 1431 error = EADDRNOTAVAIL; 1432 goto out_inp_locked; 1433 } 1434 1435 KASSERT(imo->imo_mfilters != NULL, 1436 ("%s: imo_mfilters not allocated", __func__)); 1437 imf = &imo->imo_mfilters[idx]; 1438 inm = imo->imo_membership[idx]; 1439 1440 /* 1441 * Attempting to use the delta-based API on an 1442 * non exclusive-mode membership is an error. 1443 */ 1444 fmode = imf->imf_st[0]; 1445 if (fmode != MCAST_EXCLUDE) { 1446 error = EINVAL; 1447 goto out_inp_locked; 1448 } 1449 1450 /* 1451 * Deal with error cases up-front: 1452 * Asked to block, but already blocked; or 1453 * Asked to unblock, but nothing to unblock. 1454 * If adding a new block entry, allocate it. 1455 */ 1456 ims = imo_match_source(imo, idx, &ssa->sa); 1457 if ((ims != NULL && doblock) || (ims == NULL && !doblock)) { 1458 CTR3(KTR_IGMPV3, "%s: source %s %spresent", __func__, 1459 inet_ntoa(ssa->sin.sin_addr), doblock ? "" : "not "); 1460 error = EADDRNOTAVAIL; 1461 goto out_inp_locked; 1462 } 1463 1464 INP_WLOCK_ASSERT(inp); 1465 1466 /* 1467 * Begin state merge transaction at socket layer. 1468 */ 1469 if (doblock) { 1470 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block"); 1471 ims = imf_graft(imf, fmode, &ssa->sin); 1472 if (ims == NULL) 1473 error = ENOMEM; 1474 } else { 1475 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow"); 1476 error = imf_prune(imf, &ssa->sin); 1477 } 1478 1479 if (error) { 1480 CTR1(KTR_IGMPV3, "%s: merge imf state failed", __func__); 1481 goto out_imf_rollback; 1482 } 1483 1484 /* 1485 * Begin state merge transaction at IGMP layer. 1486 */ 1487 IN_MULTI_LOCK(); 1488 1489 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 1490 error = inm_merge(inm, imf); 1491 if (error) { 1492 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__); 1493 goto out_in_multi_locked; 1494 } 1495 1496 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 1497 error = igmp_change_state(inm); 1498 if (error) 1499 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__); 1500 1501 out_in_multi_locked: 1502 1503 IN_MULTI_UNLOCK(); 1504 1505 out_imf_rollback: 1506 if (error) 1507 imf_rollback(imf); 1508 else 1509 imf_commit(imf); 1510 1511 imf_reap(imf); 1512 1513 out_inp_locked: 1514 INP_WUNLOCK(inp); 1515 return (error); 1516 } 1517 1518 /* 1519 * Given an inpcb, return its multicast options structure pointer. Accepts 1520 * an unlocked inpcb pointer, but will return it locked. May sleep. 1521 * 1522 * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held. 1523 * SMPng: NOTE: Returns with the INP write lock held. 1524 */ 1525 static struct ip_moptions * 1526 inp_findmoptions(struct inpcb *inp) 1527 { 1528 struct ip_moptions *imo; 1529 struct in_multi **immp; 1530 struct in_mfilter *imfp; 1531 size_t idx; 1532 1533 INP_WLOCK(inp); 1534 if (inp->inp_moptions != NULL) 1535 return (inp->inp_moptions); 1536 1537 INP_WUNLOCK(inp); 1538 1539 imo = malloc(sizeof(*imo), M_IPMOPTS, M_WAITOK); 1540 immp = malloc(sizeof(*immp) * IP_MIN_MEMBERSHIPS, M_IPMOPTS, 1541 M_WAITOK | M_ZERO); 1542 imfp = malloc(sizeof(struct in_mfilter) * IP_MIN_MEMBERSHIPS, 1543 M_INMFILTER, M_WAITOK); 1544 1545 imo->imo_multicast_ifp = NULL; 1546 imo->imo_multicast_addr.s_addr = INADDR_ANY; 1547 imo->imo_multicast_vif = -1; 1548 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1549 imo->imo_multicast_loop = in_mcast_loop; 1550 imo->imo_num_memberships = 0; 1551 imo->imo_max_memberships = IP_MIN_MEMBERSHIPS; 1552 imo->imo_membership = immp; 1553 1554 /* Initialize per-group source filters. */ 1555 for (idx = 0; idx < IP_MIN_MEMBERSHIPS; idx++) 1556 imf_init(&imfp[idx], MCAST_UNDEFINED, MCAST_EXCLUDE); 1557 imo->imo_mfilters = imfp; 1558 1559 INP_WLOCK(inp); 1560 if (inp->inp_moptions != NULL) { 1561 free(imfp, M_INMFILTER); 1562 free(immp, M_IPMOPTS); 1563 free(imo, M_IPMOPTS); 1564 return (inp->inp_moptions); 1565 } 1566 inp->inp_moptions = imo; 1567 return (imo); 1568 } 1569 1570 /* 1571 * Discard the IP multicast options (and source filters). To minimize 1572 * the amount of work done while holding locks such as the INP's 1573 * pcbinfo lock (which is used in the receive path), the free 1574 * operation is performed asynchronously in a separate task. 1575 * 1576 * SMPng: NOTE: assumes INP write lock is held. 1577 */ 1578 void 1579 inp_freemoptions(struct ip_moptions *imo) 1580 { 1581 1582 KASSERT(imo != NULL, ("%s: ip_moptions is NULL", __func__)); 1583 IN_MULTI_LOCK(); 1584 STAILQ_INSERT_TAIL(&imo_gc_list, imo, imo_link); 1585 IN_MULTI_UNLOCK(); 1586 taskqueue_enqueue(taskqueue_thread, &imo_gc_task); 1587 } 1588 1589 static void 1590 inp_freemoptions_internal(struct ip_moptions *imo) 1591 { 1592 struct in_mfilter *imf; 1593 size_t idx, nmships; 1594 1595 nmships = imo->imo_num_memberships; 1596 for (idx = 0; idx < nmships; ++idx) { 1597 imf = imo->imo_mfilters ? &imo->imo_mfilters[idx] : NULL; 1598 if (imf) 1599 imf_leave(imf); 1600 (void)in_leavegroup(imo->imo_membership[idx], imf); 1601 if (imf) 1602 imf_purge(imf); 1603 } 1604 1605 if (imo->imo_mfilters) 1606 free(imo->imo_mfilters, M_INMFILTER); 1607 free(imo->imo_membership, M_IPMOPTS); 1608 free(imo, M_IPMOPTS); 1609 } 1610 1611 static void 1612 inp_gcmoptions(void *context, int pending) 1613 { 1614 struct ip_moptions *imo; 1615 1616 IN_MULTI_LOCK(); 1617 while (!STAILQ_EMPTY(&imo_gc_list)) { 1618 imo = STAILQ_FIRST(&imo_gc_list); 1619 STAILQ_REMOVE_HEAD(&imo_gc_list, imo_link); 1620 IN_MULTI_UNLOCK(); 1621 inp_freemoptions_internal(imo); 1622 IN_MULTI_LOCK(); 1623 } 1624 IN_MULTI_UNLOCK(); 1625 } 1626 1627 /* 1628 * Atomically get source filters on a socket for an IPv4 multicast group. 1629 * Called with INP lock held; returns with lock released. 1630 */ 1631 static int 1632 inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt) 1633 { 1634 struct __msfilterreq msfr; 1635 sockunion_t *gsa; 1636 struct ifnet *ifp; 1637 struct ip_moptions *imo; 1638 struct in_mfilter *imf; 1639 struct ip_msource *ims; 1640 struct in_msource *lims; 1641 struct sockaddr_in *psin; 1642 struct sockaddr_storage *ptss; 1643 struct sockaddr_storage *tss; 1644 int error; 1645 size_t idx, nsrcs, ncsrcs; 1646 1647 INP_WLOCK_ASSERT(inp); 1648 1649 imo = inp->inp_moptions; 1650 KASSERT(imo != NULL, ("%s: null ip_moptions", __func__)); 1651 1652 INP_WUNLOCK(inp); 1653 1654 error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq), 1655 sizeof(struct __msfilterreq)); 1656 if (error) 1657 return (error); 1658 1659 if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex) 1660 return (EINVAL); 1661 1662 ifp = ifnet_byindex(msfr.msfr_ifindex); 1663 if (ifp == NULL) 1664 return (EINVAL); 1665 1666 INP_WLOCK(inp); 1667 1668 /* 1669 * Lookup group on the socket. 1670 */ 1671 gsa = (sockunion_t *)&msfr.msfr_group; 1672 idx = imo_match_group(imo, ifp, &gsa->sa); 1673 if (idx == -1 || imo->imo_mfilters == NULL) { 1674 INP_WUNLOCK(inp); 1675 return (EADDRNOTAVAIL); 1676 } 1677 imf = &imo->imo_mfilters[idx]; 1678 1679 /* 1680 * Ignore memberships which are in limbo. 1681 */ 1682 if (imf->imf_st[1] == MCAST_UNDEFINED) { 1683 INP_WUNLOCK(inp); 1684 return (EAGAIN); 1685 } 1686 msfr.msfr_fmode = imf->imf_st[1]; 1687 1688 /* 1689 * If the user specified a buffer, copy out the source filter 1690 * entries to userland gracefully. 1691 * We only copy out the number of entries which userland 1692 * has asked for, but we always tell userland how big the 1693 * buffer really needs to be. 1694 */ 1695 if (msfr.msfr_nsrcs > in_mcast_maxsocksrc) 1696 msfr.msfr_nsrcs = in_mcast_maxsocksrc; 1697 tss = NULL; 1698 if (msfr.msfr_srcs != NULL && msfr.msfr_nsrcs > 0) { 1699 tss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs, 1700 M_TEMP, M_NOWAIT | M_ZERO); 1701 if (tss == NULL) { 1702 INP_WUNLOCK(inp); 1703 return (ENOBUFS); 1704 } 1705 } 1706 1707 /* 1708 * Count number of sources in-mode at t0. 1709 * If buffer space exists and remains, copy out source entries. 1710 */ 1711 nsrcs = msfr.msfr_nsrcs; 1712 ncsrcs = 0; 1713 ptss = tss; 1714 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 1715 lims = (struct in_msource *)ims; 1716 if (lims->imsl_st[0] == MCAST_UNDEFINED || 1717 lims->imsl_st[0] != imf->imf_st[0]) 1718 continue; 1719 ++ncsrcs; 1720 if (tss != NULL && nsrcs > 0) { 1721 psin = (struct sockaddr_in *)ptss; 1722 psin->sin_family = AF_INET; 1723 psin->sin_len = sizeof(struct sockaddr_in); 1724 psin->sin_addr.s_addr = htonl(lims->ims_haddr); 1725 psin->sin_port = 0; 1726 ++ptss; 1727 --nsrcs; 1728 } 1729 } 1730 1731 INP_WUNLOCK(inp); 1732 1733 if (tss != NULL) { 1734 error = copyout(tss, msfr.msfr_srcs, 1735 sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs); 1736 free(tss, M_TEMP); 1737 if (error) 1738 return (error); 1739 } 1740 1741 msfr.msfr_nsrcs = ncsrcs; 1742 error = sooptcopyout(sopt, &msfr, sizeof(struct __msfilterreq)); 1743 1744 return (error); 1745 } 1746 1747 /* 1748 * Return the IP multicast options in response to user getsockopt(). 1749 */ 1750 int 1751 inp_getmoptions(struct inpcb *inp, struct sockopt *sopt) 1752 { 1753 struct rm_priotracker in_ifa_tracker; 1754 struct ip_mreqn mreqn; 1755 struct ip_moptions *imo; 1756 struct ifnet *ifp; 1757 struct in_ifaddr *ia; 1758 int error, optval; 1759 u_char coptval; 1760 1761 INP_WLOCK(inp); 1762 imo = inp->inp_moptions; 1763 /* 1764 * If socket is neither of type SOCK_RAW or SOCK_DGRAM, 1765 * or is a divert socket, reject it. 1766 */ 1767 if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT || 1768 (inp->inp_socket->so_proto->pr_type != SOCK_RAW && 1769 inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) { 1770 INP_WUNLOCK(inp); 1771 return (EOPNOTSUPP); 1772 } 1773 1774 error = 0; 1775 switch (sopt->sopt_name) { 1776 case IP_MULTICAST_VIF: 1777 if (imo != NULL) 1778 optval = imo->imo_multicast_vif; 1779 else 1780 optval = -1; 1781 INP_WUNLOCK(inp); 1782 error = sooptcopyout(sopt, &optval, sizeof(int)); 1783 break; 1784 1785 case IP_MULTICAST_IF: 1786 memset(&mreqn, 0, sizeof(struct ip_mreqn)); 1787 if (imo != NULL) { 1788 ifp = imo->imo_multicast_ifp; 1789 if (!in_nullhost(imo->imo_multicast_addr)) { 1790 mreqn.imr_address = imo->imo_multicast_addr; 1791 } else if (ifp != NULL) { 1792 mreqn.imr_ifindex = ifp->if_index; 1793 IFP_TO_IA(ifp, ia, &in_ifa_tracker); 1794 if (ia != NULL) { 1795 mreqn.imr_address = 1796 IA_SIN(ia)->sin_addr; 1797 ifa_free(&ia->ia_ifa); 1798 } 1799 } 1800 } 1801 INP_WUNLOCK(inp); 1802 if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) { 1803 error = sooptcopyout(sopt, &mreqn, 1804 sizeof(struct ip_mreqn)); 1805 } else { 1806 error = sooptcopyout(sopt, &mreqn.imr_address, 1807 sizeof(struct in_addr)); 1808 } 1809 break; 1810 1811 case IP_MULTICAST_TTL: 1812 if (imo == 0) 1813 optval = coptval = IP_DEFAULT_MULTICAST_TTL; 1814 else 1815 optval = coptval = imo->imo_multicast_ttl; 1816 INP_WUNLOCK(inp); 1817 if (sopt->sopt_valsize == sizeof(u_char)) 1818 error = sooptcopyout(sopt, &coptval, sizeof(u_char)); 1819 else 1820 error = sooptcopyout(sopt, &optval, sizeof(int)); 1821 break; 1822 1823 case IP_MULTICAST_LOOP: 1824 if (imo == 0) 1825 optval = coptval = IP_DEFAULT_MULTICAST_LOOP; 1826 else 1827 optval = coptval = imo->imo_multicast_loop; 1828 INP_WUNLOCK(inp); 1829 if (sopt->sopt_valsize == sizeof(u_char)) 1830 error = sooptcopyout(sopt, &coptval, sizeof(u_char)); 1831 else 1832 error = sooptcopyout(sopt, &optval, sizeof(int)); 1833 break; 1834 1835 case IP_MSFILTER: 1836 if (imo == NULL) { 1837 error = EADDRNOTAVAIL; 1838 INP_WUNLOCK(inp); 1839 } else { 1840 error = inp_get_source_filters(inp, sopt); 1841 } 1842 break; 1843 1844 default: 1845 INP_WUNLOCK(inp); 1846 error = ENOPROTOOPT; 1847 break; 1848 } 1849 1850 INP_UNLOCK_ASSERT(inp); 1851 1852 return (error); 1853 } 1854 1855 /* 1856 * Look up the ifnet to use for a multicast group membership, 1857 * given the IPv4 address of an interface, and the IPv4 group address. 1858 * 1859 * This routine exists to support legacy multicast applications 1860 * which do not understand that multicast memberships are scoped to 1861 * specific physical links in the networking stack, or which need 1862 * to join link-scope groups before IPv4 addresses are configured. 1863 * 1864 * If inp is non-NULL, use this socket's current FIB number for any 1865 * required FIB lookup. 1866 * If ina is INADDR_ANY, look up the group address in the unicast FIB, 1867 * and use its ifp; usually, this points to the default next-hop. 1868 * 1869 * If the FIB lookup fails, attempt to use the first non-loopback 1870 * interface with multicast capability in the system as a 1871 * last resort. The legacy IPv4 ASM API requires that we do 1872 * this in order to allow groups to be joined when the routing 1873 * table has not yet been populated during boot. 1874 * 1875 * Returns NULL if no ifp could be found. 1876 * 1877 * SMPng: TODO: Acquire the appropriate locks for INADDR_TO_IFP. 1878 * FUTURE: Implement IPv4 source-address selection. 1879 */ 1880 static struct ifnet * 1881 inp_lookup_mcast_ifp(const struct inpcb *inp, 1882 const struct sockaddr_in *gsin, const struct in_addr ina) 1883 { 1884 struct rm_priotracker in_ifa_tracker; 1885 struct ifnet *ifp; 1886 1887 KASSERT(gsin->sin_family == AF_INET, ("%s: not AF_INET", __func__)); 1888 KASSERT(IN_MULTICAST(ntohl(gsin->sin_addr.s_addr)), 1889 ("%s: not multicast", __func__)); 1890 1891 ifp = NULL; 1892 if (!in_nullhost(ina)) { 1893 INADDR_TO_IFP(ina, ifp); 1894 } else { 1895 struct route ro; 1896 1897 ro.ro_rt = NULL; 1898 memcpy(&ro.ro_dst, gsin, sizeof(struct sockaddr_in)); 1899 in_rtalloc_ign(&ro, 0, inp ? inp->inp_inc.inc_fibnum : 0); 1900 if (ro.ro_rt != NULL) { 1901 ifp = ro.ro_rt->rt_ifp; 1902 KASSERT(ifp != NULL, ("%s: null ifp", __func__)); 1903 RTFREE(ro.ro_rt); 1904 } else { 1905 struct in_ifaddr *ia; 1906 struct ifnet *mifp; 1907 1908 mifp = NULL; 1909 IN_IFADDR_RLOCK(&in_ifa_tracker); 1910 TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { 1911 mifp = ia->ia_ifp; 1912 if (!(mifp->if_flags & IFF_LOOPBACK) && 1913 (mifp->if_flags & IFF_MULTICAST)) { 1914 ifp = mifp; 1915 break; 1916 } 1917 } 1918 IN_IFADDR_RUNLOCK(&in_ifa_tracker); 1919 } 1920 } 1921 1922 return (ifp); 1923 } 1924 1925 /* 1926 * Join an IPv4 multicast group, possibly with a source. 1927 */ 1928 static int 1929 inp_join_group(struct inpcb *inp, struct sockopt *sopt) 1930 { 1931 struct group_source_req gsr; 1932 sockunion_t *gsa, *ssa; 1933 struct ifnet *ifp; 1934 struct in_mfilter *imf; 1935 struct ip_moptions *imo; 1936 struct in_multi *inm; 1937 struct in_msource *lims; 1938 size_t idx; 1939 int error, is_new; 1940 1941 ifp = NULL; 1942 imf = NULL; 1943 lims = NULL; 1944 error = 0; 1945 is_new = 0; 1946 1947 memset(&gsr, 0, sizeof(struct group_source_req)); 1948 gsa = (sockunion_t *)&gsr.gsr_group; 1949 gsa->ss.ss_family = AF_UNSPEC; 1950 ssa = (sockunion_t *)&gsr.gsr_source; 1951 ssa->ss.ss_family = AF_UNSPEC; 1952 1953 switch (sopt->sopt_name) { 1954 case IP_ADD_MEMBERSHIP: 1955 case IP_ADD_SOURCE_MEMBERSHIP: { 1956 struct ip_mreq_source mreqs; 1957 1958 if (sopt->sopt_name == IP_ADD_MEMBERSHIP) { 1959 error = sooptcopyin(sopt, &mreqs, 1960 sizeof(struct ip_mreq), 1961 sizeof(struct ip_mreq)); 1962 /* 1963 * Do argument switcharoo from ip_mreq into 1964 * ip_mreq_source to avoid using two instances. 1965 */ 1966 mreqs.imr_interface = mreqs.imr_sourceaddr; 1967 mreqs.imr_sourceaddr.s_addr = INADDR_ANY; 1968 } else if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) { 1969 error = sooptcopyin(sopt, &mreqs, 1970 sizeof(struct ip_mreq_source), 1971 sizeof(struct ip_mreq_source)); 1972 } 1973 if (error) 1974 return (error); 1975 1976 gsa->sin.sin_family = AF_INET; 1977 gsa->sin.sin_len = sizeof(struct sockaddr_in); 1978 gsa->sin.sin_addr = mreqs.imr_multiaddr; 1979 1980 if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) { 1981 ssa->sin.sin_family = AF_INET; 1982 ssa->sin.sin_len = sizeof(struct sockaddr_in); 1983 ssa->sin.sin_addr = mreqs.imr_sourceaddr; 1984 } 1985 1986 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 1987 return (EINVAL); 1988 1989 ifp = inp_lookup_mcast_ifp(inp, &gsa->sin, 1990 mreqs.imr_interface); 1991 CTR3(KTR_IGMPV3, "%s: imr_interface = %s, ifp = %p", 1992 __func__, inet_ntoa(mreqs.imr_interface), ifp); 1993 break; 1994 } 1995 1996 case MCAST_JOIN_GROUP: 1997 case MCAST_JOIN_SOURCE_GROUP: 1998 if (sopt->sopt_name == MCAST_JOIN_GROUP) { 1999 error = sooptcopyin(sopt, &gsr, 2000 sizeof(struct group_req), 2001 sizeof(struct group_req)); 2002 } else if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) { 2003 error = sooptcopyin(sopt, &gsr, 2004 sizeof(struct group_source_req), 2005 sizeof(struct group_source_req)); 2006 } 2007 if (error) 2008 return (error); 2009 2010 if (gsa->sin.sin_family != AF_INET || 2011 gsa->sin.sin_len != sizeof(struct sockaddr_in)) 2012 return (EINVAL); 2013 2014 /* 2015 * Overwrite the port field if present, as the sockaddr 2016 * being copied in may be matched with a binary comparison. 2017 */ 2018 gsa->sin.sin_port = 0; 2019 if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) { 2020 if (ssa->sin.sin_family != AF_INET || 2021 ssa->sin.sin_len != sizeof(struct sockaddr_in)) 2022 return (EINVAL); 2023 ssa->sin.sin_port = 0; 2024 } 2025 2026 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2027 return (EINVAL); 2028 2029 if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface) 2030 return (EADDRNOTAVAIL); 2031 ifp = ifnet_byindex(gsr.gsr_interface); 2032 break; 2033 2034 default: 2035 CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d", 2036 __func__, sopt->sopt_name); 2037 return (EOPNOTSUPP); 2038 break; 2039 } 2040 2041 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) 2042 return (EADDRNOTAVAIL); 2043 2044 imo = inp_findmoptions(inp); 2045 idx = imo_match_group(imo, ifp, &gsa->sa); 2046 if (idx == -1) { 2047 is_new = 1; 2048 } else { 2049 inm = imo->imo_membership[idx]; 2050 imf = &imo->imo_mfilters[idx]; 2051 if (ssa->ss.ss_family != AF_UNSPEC) { 2052 /* 2053 * MCAST_JOIN_SOURCE_GROUP on an exclusive membership 2054 * is an error. On an existing inclusive membership, 2055 * it just adds the source to the filter list. 2056 */ 2057 if (imf->imf_st[1] != MCAST_INCLUDE) { 2058 error = EINVAL; 2059 goto out_inp_locked; 2060 } 2061 /* 2062 * Throw out duplicates. 2063 * 2064 * XXX FIXME: This makes a naive assumption that 2065 * even if entries exist for *ssa in this imf, 2066 * they will be rejected as dupes, even if they 2067 * are not valid in the current mode (in-mode). 2068 * 2069 * in_msource is transactioned just as for anything 2070 * else in SSM -- but note naive use of inm_graft() 2071 * below for allocating new filter entries. 2072 * 2073 * This is only an issue if someone mixes the 2074 * full-state SSM API with the delta-based API, 2075 * which is discouraged in the relevant RFCs. 2076 */ 2077 lims = imo_match_source(imo, idx, &ssa->sa); 2078 if (lims != NULL /*&& 2079 lims->imsl_st[1] == MCAST_INCLUDE*/) { 2080 error = EADDRNOTAVAIL; 2081 goto out_inp_locked; 2082 } 2083 } else { 2084 /* 2085 * MCAST_JOIN_GROUP on an existing exclusive 2086 * membership is an error; return EADDRINUSE 2087 * to preserve 4.4BSD API idempotence, and 2088 * avoid tedious detour to code below. 2089 * NOTE: This is bending RFC 3678 a bit. 2090 * 2091 * On an existing inclusive membership, this is also 2092 * an error; if you want to change filter mode, 2093 * you must use the userland API setsourcefilter(). 2094 * XXX We don't reject this for imf in UNDEFINED 2095 * state at t1, because allocation of a filter 2096 * is atomic with allocation of a membership. 2097 */ 2098 error = EINVAL; 2099 if (imf->imf_st[1] == MCAST_EXCLUDE) 2100 error = EADDRINUSE; 2101 goto out_inp_locked; 2102 } 2103 } 2104 2105 /* 2106 * Begin state merge transaction at socket layer. 2107 */ 2108 INP_WLOCK_ASSERT(inp); 2109 2110 if (is_new) { 2111 if (imo->imo_num_memberships == imo->imo_max_memberships) { 2112 error = imo_grow(imo); 2113 if (error) 2114 goto out_inp_locked; 2115 } 2116 /* 2117 * Allocate the new slot upfront so we can deal with 2118 * grafting the new source filter in same code path 2119 * as for join-source on existing membership. 2120 */ 2121 idx = imo->imo_num_memberships; 2122 imo->imo_membership[idx] = NULL; 2123 imo->imo_num_memberships++; 2124 KASSERT(imo->imo_mfilters != NULL, 2125 ("%s: imf_mfilters vector was not allocated", __func__)); 2126 imf = &imo->imo_mfilters[idx]; 2127 KASSERT(RB_EMPTY(&imf->imf_sources), 2128 ("%s: imf_sources not empty", __func__)); 2129 } 2130 2131 /* 2132 * Graft new source into filter list for this inpcb's 2133 * membership of the group. The in_multi may not have 2134 * been allocated yet if this is a new membership, however, 2135 * the in_mfilter slot will be allocated and must be initialized. 2136 * 2137 * Note: Grafting of exclusive mode filters doesn't happen 2138 * in this path. 2139 * XXX: Should check for non-NULL lims (node exists but may 2140 * not be in-mode) for interop with full-state API. 2141 */ 2142 if (ssa->ss.ss_family != AF_UNSPEC) { 2143 /* Membership starts in IN mode */ 2144 if (is_new) { 2145 CTR1(KTR_IGMPV3, "%s: new join w/source", __func__); 2146 imf_init(imf, MCAST_UNDEFINED, MCAST_INCLUDE); 2147 } else { 2148 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow"); 2149 } 2150 lims = imf_graft(imf, MCAST_INCLUDE, &ssa->sin); 2151 if (lims == NULL) { 2152 CTR1(KTR_IGMPV3, "%s: merge imf state failed", 2153 __func__); 2154 error = ENOMEM; 2155 goto out_imo_free; 2156 } 2157 } else { 2158 /* No address specified; Membership starts in EX mode */ 2159 if (is_new) { 2160 CTR1(KTR_IGMPV3, "%s: new join w/o source", __func__); 2161 imf_init(imf, MCAST_UNDEFINED, MCAST_EXCLUDE); 2162 } 2163 } 2164 2165 /* 2166 * Begin state merge transaction at IGMP layer. 2167 */ 2168 IN_MULTI_LOCK(); 2169 2170 if (is_new) { 2171 error = in_joingroup_locked(ifp, &gsa->sin.sin_addr, imf, 2172 &inm); 2173 if (error) { 2174 CTR1(KTR_IGMPV3, "%s: in_joingroup_locked failed", 2175 __func__); 2176 IN_MULTI_UNLOCK(); 2177 goto out_imo_free; 2178 } 2179 imo->imo_membership[idx] = inm; 2180 } else { 2181 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 2182 error = inm_merge(inm, imf); 2183 if (error) { 2184 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", 2185 __func__); 2186 goto out_in_multi_locked; 2187 } 2188 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 2189 error = igmp_change_state(inm); 2190 if (error) { 2191 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", 2192 __func__); 2193 goto out_in_multi_locked; 2194 } 2195 } 2196 2197 out_in_multi_locked: 2198 2199 IN_MULTI_UNLOCK(); 2200 2201 INP_WLOCK_ASSERT(inp); 2202 if (error) { 2203 imf_rollback(imf); 2204 if (is_new) 2205 imf_purge(imf); 2206 else 2207 imf_reap(imf); 2208 } else { 2209 imf_commit(imf); 2210 } 2211 2212 out_imo_free: 2213 if (error && is_new) { 2214 imo->imo_membership[idx] = NULL; 2215 --imo->imo_num_memberships; 2216 } 2217 2218 out_inp_locked: 2219 INP_WUNLOCK(inp); 2220 return (error); 2221 } 2222 2223 /* 2224 * Leave an IPv4 multicast group on an inpcb, possibly with a source. 2225 */ 2226 static int 2227 inp_leave_group(struct inpcb *inp, struct sockopt *sopt) 2228 { 2229 struct group_source_req gsr; 2230 struct ip_mreq_source mreqs; 2231 sockunion_t *gsa, *ssa; 2232 struct ifnet *ifp; 2233 struct in_mfilter *imf; 2234 struct ip_moptions *imo; 2235 struct in_msource *ims; 2236 struct in_multi *inm; 2237 size_t idx; 2238 int error, is_final; 2239 2240 ifp = NULL; 2241 error = 0; 2242 is_final = 1; 2243 2244 memset(&gsr, 0, sizeof(struct group_source_req)); 2245 gsa = (sockunion_t *)&gsr.gsr_group; 2246 gsa->ss.ss_family = AF_UNSPEC; 2247 ssa = (sockunion_t *)&gsr.gsr_source; 2248 ssa->ss.ss_family = AF_UNSPEC; 2249 2250 switch (sopt->sopt_name) { 2251 case IP_DROP_MEMBERSHIP: 2252 case IP_DROP_SOURCE_MEMBERSHIP: 2253 if (sopt->sopt_name == IP_DROP_MEMBERSHIP) { 2254 error = sooptcopyin(sopt, &mreqs, 2255 sizeof(struct ip_mreq), 2256 sizeof(struct ip_mreq)); 2257 /* 2258 * Swap interface and sourceaddr arguments, 2259 * as ip_mreq and ip_mreq_source are laid 2260 * out differently. 2261 */ 2262 mreqs.imr_interface = mreqs.imr_sourceaddr; 2263 mreqs.imr_sourceaddr.s_addr = INADDR_ANY; 2264 } else if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) { 2265 error = sooptcopyin(sopt, &mreqs, 2266 sizeof(struct ip_mreq_source), 2267 sizeof(struct ip_mreq_source)); 2268 } 2269 if (error) 2270 return (error); 2271 2272 gsa->sin.sin_family = AF_INET; 2273 gsa->sin.sin_len = sizeof(struct sockaddr_in); 2274 gsa->sin.sin_addr = mreqs.imr_multiaddr; 2275 2276 if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) { 2277 ssa->sin.sin_family = AF_INET; 2278 ssa->sin.sin_len = sizeof(struct sockaddr_in); 2279 ssa->sin.sin_addr = mreqs.imr_sourceaddr; 2280 } 2281 2282 /* 2283 * Attempt to look up hinted ifp from interface address. 2284 * Fallthrough with null ifp iff lookup fails, to 2285 * preserve 4.4BSD mcast API idempotence. 2286 * XXX NOTE WELL: The RFC 3678 API is preferred because 2287 * using an IPv4 address as a key is racy. 2288 */ 2289 if (!in_nullhost(mreqs.imr_interface)) 2290 INADDR_TO_IFP(mreqs.imr_interface, ifp); 2291 2292 CTR3(KTR_IGMPV3, "%s: imr_interface = %s, ifp = %p", 2293 __func__, inet_ntoa(mreqs.imr_interface), ifp); 2294 2295 break; 2296 2297 case MCAST_LEAVE_GROUP: 2298 case MCAST_LEAVE_SOURCE_GROUP: 2299 if (sopt->sopt_name == MCAST_LEAVE_GROUP) { 2300 error = sooptcopyin(sopt, &gsr, 2301 sizeof(struct group_req), 2302 sizeof(struct group_req)); 2303 } else if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) { 2304 error = sooptcopyin(sopt, &gsr, 2305 sizeof(struct group_source_req), 2306 sizeof(struct group_source_req)); 2307 } 2308 if (error) 2309 return (error); 2310 2311 if (gsa->sin.sin_family != AF_INET || 2312 gsa->sin.sin_len != sizeof(struct sockaddr_in)) 2313 return (EINVAL); 2314 2315 if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) { 2316 if (ssa->sin.sin_family != AF_INET || 2317 ssa->sin.sin_len != sizeof(struct sockaddr_in)) 2318 return (EINVAL); 2319 } 2320 2321 if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface) 2322 return (EADDRNOTAVAIL); 2323 2324 ifp = ifnet_byindex(gsr.gsr_interface); 2325 2326 if (ifp == NULL) 2327 return (EADDRNOTAVAIL); 2328 break; 2329 2330 default: 2331 CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d", 2332 __func__, sopt->sopt_name); 2333 return (EOPNOTSUPP); 2334 break; 2335 } 2336 2337 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2338 return (EINVAL); 2339 2340 /* 2341 * Find the membership in the membership array. 2342 */ 2343 imo = inp_findmoptions(inp); 2344 idx = imo_match_group(imo, ifp, &gsa->sa); 2345 if (idx == -1) { 2346 error = EADDRNOTAVAIL; 2347 goto out_inp_locked; 2348 } 2349 inm = imo->imo_membership[idx]; 2350 imf = &imo->imo_mfilters[idx]; 2351 2352 if (ssa->ss.ss_family != AF_UNSPEC) 2353 is_final = 0; 2354 2355 /* 2356 * Begin state merge transaction at socket layer. 2357 */ 2358 INP_WLOCK_ASSERT(inp); 2359 2360 /* 2361 * If we were instructed only to leave a given source, do so. 2362 * MCAST_LEAVE_SOURCE_GROUP is only valid for inclusive memberships. 2363 */ 2364 if (is_final) { 2365 imf_leave(imf); 2366 } else { 2367 if (imf->imf_st[0] == MCAST_EXCLUDE) { 2368 error = EADDRNOTAVAIL; 2369 goto out_inp_locked; 2370 } 2371 ims = imo_match_source(imo, idx, &ssa->sa); 2372 if (ims == NULL) { 2373 CTR3(KTR_IGMPV3, "%s: source %s %spresent", __func__, 2374 inet_ntoa(ssa->sin.sin_addr), "not "); 2375 error = EADDRNOTAVAIL; 2376 goto out_inp_locked; 2377 } 2378 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block"); 2379 error = imf_prune(imf, &ssa->sin); 2380 if (error) { 2381 CTR1(KTR_IGMPV3, "%s: merge imf state failed", 2382 __func__); 2383 goto out_inp_locked; 2384 } 2385 } 2386 2387 /* 2388 * Begin state merge transaction at IGMP layer. 2389 */ 2390 IN_MULTI_LOCK(); 2391 2392 if (is_final) { 2393 /* 2394 * Give up the multicast address record to which 2395 * the membership points. 2396 */ 2397 (void)in_leavegroup_locked(inm, imf); 2398 } else { 2399 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 2400 error = inm_merge(inm, imf); 2401 if (error) { 2402 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", 2403 __func__); 2404 goto out_in_multi_locked; 2405 } 2406 2407 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 2408 error = igmp_change_state(inm); 2409 if (error) { 2410 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", 2411 __func__); 2412 } 2413 } 2414 2415 out_in_multi_locked: 2416 2417 IN_MULTI_UNLOCK(); 2418 2419 if (error) 2420 imf_rollback(imf); 2421 else 2422 imf_commit(imf); 2423 2424 imf_reap(imf); 2425 2426 if (is_final) { 2427 /* Remove the gap in the membership and filter array. */ 2428 for (++idx; idx < imo->imo_num_memberships; ++idx) { 2429 imo->imo_membership[idx-1] = imo->imo_membership[idx]; 2430 imo->imo_mfilters[idx-1] = imo->imo_mfilters[idx]; 2431 } 2432 imo->imo_num_memberships--; 2433 } 2434 2435 out_inp_locked: 2436 INP_WUNLOCK(inp); 2437 return (error); 2438 } 2439 2440 /* 2441 * Select the interface for transmitting IPv4 multicast datagrams. 2442 * 2443 * Either an instance of struct in_addr or an instance of struct ip_mreqn 2444 * may be passed to this socket option. An address of INADDR_ANY or an 2445 * interface index of 0 is used to remove a previous selection. 2446 * When no interface is selected, one is chosen for every send. 2447 */ 2448 static int 2449 inp_set_multicast_if(struct inpcb *inp, struct sockopt *sopt) 2450 { 2451 struct in_addr addr; 2452 struct ip_mreqn mreqn; 2453 struct ifnet *ifp; 2454 struct ip_moptions *imo; 2455 int error; 2456 2457 if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) { 2458 /* 2459 * An interface index was specified using the 2460 * Linux-derived ip_mreqn structure. 2461 */ 2462 error = sooptcopyin(sopt, &mreqn, sizeof(struct ip_mreqn), 2463 sizeof(struct ip_mreqn)); 2464 if (error) 2465 return (error); 2466 2467 if (mreqn.imr_ifindex < 0 || V_if_index < mreqn.imr_ifindex) 2468 return (EINVAL); 2469 2470 if (mreqn.imr_ifindex == 0) { 2471 ifp = NULL; 2472 } else { 2473 ifp = ifnet_byindex(mreqn.imr_ifindex); 2474 if (ifp == NULL) 2475 return (EADDRNOTAVAIL); 2476 } 2477 } else { 2478 /* 2479 * An interface was specified by IPv4 address. 2480 * This is the traditional BSD usage. 2481 */ 2482 error = sooptcopyin(sopt, &addr, sizeof(struct in_addr), 2483 sizeof(struct in_addr)); 2484 if (error) 2485 return (error); 2486 if (in_nullhost(addr)) { 2487 ifp = NULL; 2488 } else { 2489 INADDR_TO_IFP(addr, ifp); 2490 if (ifp == NULL) 2491 return (EADDRNOTAVAIL); 2492 } 2493 CTR3(KTR_IGMPV3, "%s: ifp = %p, addr = %s", __func__, ifp, 2494 inet_ntoa(addr)); 2495 } 2496 2497 /* Reject interfaces which do not support multicast. */ 2498 if (ifp != NULL && (ifp->if_flags & IFF_MULTICAST) == 0) 2499 return (EOPNOTSUPP); 2500 2501 imo = inp_findmoptions(inp); 2502 imo->imo_multicast_ifp = ifp; 2503 imo->imo_multicast_addr.s_addr = INADDR_ANY; 2504 INP_WUNLOCK(inp); 2505 2506 return (0); 2507 } 2508 2509 /* 2510 * Atomically set source filters on a socket for an IPv4 multicast group. 2511 * 2512 * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held. 2513 */ 2514 static int 2515 inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt) 2516 { 2517 struct __msfilterreq msfr; 2518 sockunion_t *gsa; 2519 struct ifnet *ifp; 2520 struct in_mfilter *imf; 2521 struct ip_moptions *imo; 2522 struct in_multi *inm; 2523 size_t idx; 2524 int error; 2525 2526 error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq), 2527 sizeof(struct __msfilterreq)); 2528 if (error) 2529 return (error); 2530 2531 if (msfr.msfr_nsrcs > in_mcast_maxsocksrc) 2532 return (ENOBUFS); 2533 2534 if ((msfr.msfr_fmode != MCAST_EXCLUDE && 2535 msfr.msfr_fmode != MCAST_INCLUDE)) 2536 return (EINVAL); 2537 2538 if (msfr.msfr_group.ss_family != AF_INET || 2539 msfr.msfr_group.ss_len != sizeof(struct sockaddr_in)) 2540 return (EINVAL); 2541 2542 gsa = (sockunion_t *)&msfr.msfr_group; 2543 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2544 return (EINVAL); 2545 2546 gsa->sin.sin_port = 0; /* ignore port */ 2547 2548 if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex) 2549 return (EADDRNOTAVAIL); 2550 2551 ifp = ifnet_byindex(msfr.msfr_ifindex); 2552 if (ifp == NULL) 2553 return (EADDRNOTAVAIL); 2554 2555 /* 2556 * Take the INP write lock. 2557 * Check if this socket is a member of this group. 2558 */ 2559 imo = inp_findmoptions(inp); 2560 idx = imo_match_group(imo, ifp, &gsa->sa); 2561 if (idx == -1 || imo->imo_mfilters == NULL) { 2562 error = EADDRNOTAVAIL; 2563 goto out_inp_locked; 2564 } 2565 inm = imo->imo_membership[idx]; 2566 imf = &imo->imo_mfilters[idx]; 2567 2568 /* 2569 * Begin state merge transaction at socket layer. 2570 */ 2571 INP_WLOCK_ASSERT(inp); 2572 2573 imf->imf_st[1] = msfr.msfr_fmode; 2574 2575 /* 2576 * Apply any new source filters, if present. 2577 * Make a copy of the user-space source vector so 2578 * that we may copy them with a single copyin. This 2579 * allows us to deal with page faults up-front. 2580 */ 2581 if (msfr.msfr_nsrcs > 0) { 2582 struct in_msource *lims; 2583 struct sockaddr_in *psin; 2584 struct sockaddr_storage *kss, *pkss; 2585 int i; 2586 2587 INP_WUNLOCK(inp); 2588 2589 CTR2(KTR_IGMPV3, "%s: loading %lu source list entries", 2590 __func__, (unsigned long)msfr.msfr_nsrcs); 2591 kss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs, 2592 M_TEMP, M_WAITOK); 2593 error = copyin(msfr.msfr_srcs, kss, 2594 sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs); 2595 if (error) { 2596 free(kss, M_TEMP); 2597 return (error); 2598 } 2599 2600 INP_WLOCK(inp); 2601 2602 /* 2603 * Mark all source filters as UNDEFINED at t1. 2604 * Restore new group filter mode, as imf_leave() 2605 * will set it to INCLUDE. 2606 */ 2607 imf_leave(imf); 2608 imf->imf_st[1] = msfr.msfr_fmode; 2609 2610 /* 2611 * Update socket layer filters at t1, lazy-allocating 2612 * new entries. This saves a bunch of memory at the 2613 * cost of one RB_FIND() per source entry; duplicate 2614 * entries in the msfr_nsrcs vector are ignored. 2615 * If we encounter an error, rollback transaction. 2616 * 2617 * XXX This too could be replaced with a set-symmetric 2618 * difference like loop to avoid walking from root 2619 * every time, as the key space is common. 2620 */ 2621 for (i = 0, pkss = kss; i < msfr.msfr_nsrcs; i++, pkss++) { 2622 psin = (struct sockaddr_in *)pkss; 2623 if (psin->sin_family != AF_INET) { 2624 error = EAFNOSUPPORT; 2625 break; 2626 } 2627 if (psin->sin_len != sizeof(struct sockaddr_in)) { 2628 error = EINVAL; 2629 break; 2630 } 2631 error = imf_get_source(imf, psin, &lims); 2632 if (error) 2633 break; 2634 lims->imsl_st[1] = imf->imf_st[1]; 2635 } 2636 free(kss, M_TEMP); 2637 } 2638 2639 if (error) 2640 goto out_imf_rollback; 2641 2642 INP_WLOCK_ASSERT(inp); 2643 IN_MULTI_LOCK(); 2644 2645 /* 2646 * Begin state merge transaction at IGMP layer. 2647 */ 2648 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 2649 error = inm_merge(inm, imf); 2650 if (error) { 2651 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__); 2652 goto out_in_multi_locked; 2653 } 2654 2655 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 2656 error = igmp_change_state(inm); 2657 if (error) 2658 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__); 2659 2660 out_in_multi_locked: 2661 2662 IN_MULTI_UNLOCK(); 2663 2664 out_imf_rollback: 2665 if (error) 2666 imf_rollback(imf); 2667 else 2668 imf_commit(imf); 2669 2670 imf_reap(imf); 2671 2672 out_inp_locked: 2673 INP_WUNLOCK(inp); 2674 return (error); 2675 } 2676 2677 /* 2678 * Set the IP multicast options in response to user setsockopt(). 2679 * 2680 * Many of the socket options handled in this function duplicate the 2681 * functionality of socket options in the regular unicast API. However, 2682 * it is not possible to merge the duplicate code, because the idempotence 2683 * of the IPv4 multicast part of the BSD Sockets API must be preserved; 2684 * the effects of these options must be treated as separate and distinct. 2685 * 2686 * SMPng: XXX: Unlocked read of inp_socket believed OK. 2687 * FUTURE: The IP_MULTICAST_VIF option may be eliminated if MROUTING 2688 * is refactored to no longer use vifs. 2689 */ 2690 int 2691 inp_setmoptions(struct inpcb *inp, struct sockopt *sopt) 2692 { 2693 struct ip_moptions *imo; 2694 int error; 2695 2696 error = 0; 2697 2698 /* 2699 * If socket is neither of type SOCK_RAW or SOCK_DGRAM, 2700 * or is a divert socket, reject it. 2701 */ 2702 if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT || 2703 (inp->inp_socket->so_proto->pr_type != SOCK_RAW && 2704 inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) 2705 return (EOPNOTSUPP); 2706 2707 switch (sopt->sopt_name) { 2708 case IP_MULTICAST_VIF: { 2709 int vifi; 2710 /* 2711 * Select a multicast VIF for transmission. 2712 * Only useful if multicast forwarding is active. 2713 */ 2714 if (legal_vif_num == NULL) { 2715 error = EOPNOTSUPP; 2716 break; 2717 } 2718 error = sooptcopyin(sopt, &vifi, sizeof(int), sizeof(int)); 2719 if (error) 2720 break; 2721 if (!legal_vif_num(vifi) && (vifi != -1)) { 2722 error = EINVAL; 2723 break; 2724 } 2725 imo = inp_findmoptions(inp); 2726 imo->imo_multicast_vif = vifi; 2727 INP_WUNLOCK(inp); 2728 break; 2729 } 2730 2731 case IP_MULTICAST_IF: 2732 error = inp_set_multicast_if(inp, sopt); 2733 break; 2734 2735 case IP_MULTICAST_TTL: { 2736 u_char ttl; 2737 2738 /* 2739 * Set the IP time-to-live for outgoing multicast packets. 2740 * The original multicast API required a char argument, 2741 * which is inconsistent with the rest of the socket API. 2742 * We allow either a char or an int. 2743 */ 2744 if (sopt->sopt_valsize == sizeof(u_char)) { 2745 error = sooptcopyin(sopt, &ttl, sizeof(u_char), 2746 sizeof(u_char)); 2747 if (error) 2748 break; 2749 } else { 2750 u_int ittl; 2751 2752 error = sooptcopyin(sopt, &ittl, sizeof(u_int), 2753 sizeof(u_int)); 2754 if (error) 2755 break; 2756 if (ittl > 255) { 2757 error = EINVAL; 2758 break; 2759 } 2760 ttl = (u_char)ittl; 2761 } 2762 imo = inp_findmoptions(inp); 2763 imo->imo_multicast_ttl = ttl; 2764 INP_WUNLOCK(inp); 2765 break; 2766 } 2767 2768 case IP_MULTICAST_LOOP: { 2769 u_char loop; 2770 2771 /* 2772 * Set the loopback flag for outgoing multicast packets. 2773 * Must be zero or one. The original multicast API required a 2774 * char argument, which is inconsistent with the rest 2775 * of the socket API. We allow either a char or an int. 2776 */ 2777 if (sopt->sopt_valsize == sizeof(u_char)) { 2778 error = sooptcopyin(sopt, &loop, sizeof(u_char), 2779 sizeof(u_char)); 2780 if (error) 2781 break; 2782 } else { 2783 u_int iloop; 2784 2785 error = sooptcopyin(sopt, &iloop, sizeof(u_int), 2786 sizeof(u_int)); 2787 if (error) 2788 break; 2789 loop = (u_char)iloop; 2790 } 2791 imo = inp_findmoptions(inp); 2792 imo->imo_multicast_loop = !!loop; 2793 INP_WUNLOCK(inp); 2794 break; 2795 } 2796 2797 case IP_ADD_MEMBERSHIP: 2798 case IP_ADD_SOURCE_MEMBERSHIP: 2799 case MCAST_JOIN_GROUP: 2800 case MCAST_JOIN_SOURCE_GROUP: 2801 error = inp_join_group(inp, sopt); 2802 break; 2803 2804 case IP_DROP_MEMBERSHIP: 2805 case IP_DROP_SOURCE_MEMBERSHIP: 2806 case MCAST_LEAVE_GROUP: 2807 case MCAST_LEAVE_SOURCE_GROUP: 2808 error = inp_leave_group(inp, sopt); 2809 break; 2810 2811 case IP_BLOCK_SOURCE: 2812 case IP_UNBLOCK_SOURCE: 2813 case MCAST_BLOCK_SOURCE: 2814 case MCAST_UNBLOCK_SOURCE: 2815 error = inp_block_unblock_source(inp, sopt); 2816 break; 2817 2818 case IP_MSFILTER: 2819 error = inp_set_source_filters(inp, sopt); 2820 break; 2821 2822 default: 2823 error = EOPNOTSUPP; 2824 break; 2825 } 2826 2827 INP_UNLOCK_ASSERT(inp); 2828 2829 return (error); 2830 } 2831 2832 /* 2833 * Expose IGMP's multicast filter mode and source list(s) to userland, 2834 * keyed by (ifindex, group). 2835 * The filter mode is written out as a uint32_t, followed by 2836 * 0..n of struct in_addr. 2837 * For use by ifmcstat(8). 2838 * SMPng: NOTE: unlocked read of ifindex space. 2839 */ 2840 static int 2841 sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS) 2842 { 2843 struct in_addr src, group; 2844 struct ifnet *ifp; 2845 struct ifmultiaddr *ifma; 2846 struct in_multi *inm; 2847 struct ip_msource *ims; 2848 int *name; 2849 int retval; 2850 u_int namelen; 2851 uint32_t fmode, ifindex; 2852 2853 name = (int *)arg1; 2854 namelen = arg2; 2855 2856 if (req->newptr != NULL) 2857 return (EPERM); 2858 2859 if (namelen != 2) 2860 return (EINVAL); 2861 2862 ifindex = name[0]; 2863 if (ifindex <= 0 || ifindex > V_if_index) { 2864 CTR2(KTR_IGMPV3, "%s: ifindex %u out of range", 2865 __func__, ifindex); 2866 return (ENOENT); 2867 } 2868 2869 group.s_addr = name[1]; 2870 if (!IN_MULTICAST(ntohl(group.s_addr))) { 2871 CTR2(KTR_IGMPV3, "%s: group %s is not multicast", 2872 __func__, inet_ntoa(group)); 2873 return (EINVAL); 2874 } 2875 2876 ifp = ifnet_byindex(ifindex); 2877 if (ifp == NULL) { 2878 CTR2(KTR_IGMPV3, "%s: no ifp for ifindex %u", 2879 __func__, ifindex); 2880 return (ENOENT); 2881 } 2882 2883 retval = sysctl_wire_old_buffer(req, 2884 sizeof(uint32_t) + (in_mcast_maxgrpsrc * sizeof(struct in_addr))); 2885 if (retval) 2886 return (retval); 2887 2888 IN_MULTI_LOCK(); 2889 2890 IF_ADDR_RLOCK(ifp); 2891 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 2892 if (ifma->ifma_addr->sa_family != AF_INET || 2893 ifma->ifma_protospec == NULL) 2894 continue; 2895 inm = (struct in_multi *)ifma->ifma_protospec; 2896 if (!in_hosteq(inm->inm_addr, group)) 2897 continue; 2898 fmode = inm->inm_st[1].iss_fmode; 2899 retval = SYSCTL_OUT(req, &fmode, sizeof(uint32_t)); 2900 if (retval != 0) 2901 break; 2902 RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) { 2903 #ifdef KTR 2904 struct in_addr ina; 2905 ina.s_addr = htonl(ims->ims_haddr); 2906 CTR2(KTR_IGMPV3, "%s: visit node %s", __func__, 2907 inet_ntoa(ina)); 2908 #endif 2909 /* 2910 * Only copy-out sources which are in-mode. 2911 */ 2912 if (fmode != ims_get_mode(inm, ims, 1)) { 2913 CTR1(KTR_IGMPV3, "%s: skip non-in-mode", 2914 __func__); 2915 continue; 2916 } 2917 src.s_addr = htonl(ims->ims_haddr); 2918 retval = SYSCTL_OUT(req, &src, sizeof(struct in_addr)); 2919 if (retval != 0) 2920 break; 2921 } 2922 } 2923 IF_ADDR_RUNLOCK(ifp); 2924 2925 IN_MULTI_UNLOCK(); 2926 2927 return (retval); 2928 } 2929 2930 #if defined(KTR) && (KTR_COMPILE & KTR_IGMPV3) 2931 2932 static const char *inm_modestrs[] = { "un", "in", "ex" }; 2933 2934 static const char * 2935 inm_mode_str(const int mode) 2936 { 2937 2938 if (mode >= MCAST_UNDEFINED && mode <= MCAST_EXCLUDE) 2939 return (inm_modestrs[mode]); 2940 return ("??"); 2941 } 2942 2943 static const char *inm_statestrs[] = { 2944 "not-member", 2945 "silent", 2946 "idle", 2947 "lazy", 2948 "sleeping", 2949 "awakening", 2950 "query-pending", 2951 "sg-query-pending", 2952 "leaving" 2953 }; 2954 2955 static const char * 2956 inm_state_str(const int state) 2957 { 2958 2959 if (state >= IGMP_NOT_MEMBER && state <= IGMP_LEAVING_MEMBER) 2960 return (inm_statestrs[state]); 2961 return ("??"); 2962 } 2963 2964 /* 2965 * Dump an in_multi structure to the console. 2966 */ 2967 void 2968 inm_print(const struct in_multi *inm) 2969 { 2970 int t; 2971 2972 if ((ktr_mask & KTR_IGMPV3) == 0) 2973 return; 2974 2975 printf("%s: --- begin inm %p ---\n", __func__, inm); 2976 printf("addr %s ifp %p(%s) ifma %p\n", 2977 inet_ntoa(inm->inm_addr), 2978 inm->inm_ifp, 2979 inm->inm_ifp->if_xname, 2980 inm->inm_ifma); 2981 printf("timer %u state %s refcount %u scq.len %u\n", 2982 inm->inm_timer, 2983 inm_state_str(inm->inm_state), 2984 inm->inm_refcount, 2985 inm->inm_scq.mq_len); 2986 printf("igi %p nsrc %lu sctimer %u scrv %u\n", 2987 inm->inm_igi, 2988 inm->inm_nsrc, 2989 inm->inm_sctimer, 2990 inm->inm_scrv); 2991 for (t = 0; t < 2; t++) { 2992 printf("t%d: fmode %s asm %u ex %u in %u rec %u\n", t, 2993 inm_mode_str(inm->inm_st[t].iss_fmode), 2994 inm->inm_st[t].iss_asm, 2995 inm->inm_st[t].iss_ex, 2996 inm->inm_st[t].iss_in, 2997 inm->inm_st[t].iss_rec); 2998 } 2999 printf("%s: --- end inm %p ---\n", __func__, inm); 3000 } 3001 3002 #else /* !KTR || !(KTR_COMPILE & KTR_IGMPV3) */ 3003 3004 void 3005 inm_print(const struct in_multi *inm) 3006 { 3007 3008 } 3009 3010 #endif /* KTR && (KTR_COMPILE & KTR_IGMPV3) */ 3011 3012 RB_GENERATE(ip_msource_tree, ip_msource, ims_link, ip_msource_cmp); 3013