1 /*- 2 * Copyright (c) 2007-2009 Bruce Simpson. 3 * Copyright (c) 2005 Robert N. M. Watson. 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote 15 * products derived from this software without specific prior written 16 * permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 /* 32 * IPv4 multicast socket, group, and socket option processing module. 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/kernel.h> 41 #include <sys/lock.h> 42 #include <sys/malloc.h> 43 #include <sys/mbuf.h> 44 #include <sys/protosw.h> 45 #include <sys/rmlock.h> 46 #include <sys/socket.h> 47 #include <sys/socketvar.h> 48 #include <sys/protosw.h> 49 #include <sys/sysctl.h> 50 #include <sys/ktr.h> 51 #include <sys/taskqueue.h> 52 #include <sys/tree.h> 53 54 #include <net/if.h> 55 #include <net/if_var.h> 56 #include <net/if_dl.h> 57 #include <net/route.h> 58 #include <net/vnet.h> 59 60 #include <netinet/in.h> 61 #include <netinet/in_systm.h> 62 #include <netinet/in_fib.h> 63 #include <netinet/in_pcb.h> 64 #include <netinet/in_var.h> 65 #include <netinet/ip_var.h> 66 #include <netinet/igmp_var.h> 67 68 #ifndef KTR_IGMPV3 69 #define KTR_IGMPV3 KTR_INET 70 #endif 71 72 #ifndef __SOCKUNION_DECLARED 73 union sockunion { 74 struct sockaddr_storage ss; 75 struct sockaddr sa; 76 struct sockaddr_dl sdl; 77 struct sockaddr_in sin; 78 }; 79 typedef union sockunion sockunion_t; 80 #define __SOCKUNION_DECLARED 81 #endif /* __SOCKUNION_DECLARED */ 82 83 static MALLOC_DEFINE(M_INMFILTER, "in_mfilter", 84 "IPv4 multicast PCB-layer source filter"); 85 static MALLOC_DEFINE(M_IPMADDR, "in_multi", "IPv4 multicast group"); 86 static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "IPv4 multicast options"); 87 static MALLOC_DEFINE(M_IPMSOURCE, "ip_msource", 88 "IPv4 multicast IGMP-layer source filter"); 89 90 /* 91 * Locking: 92 * - Lock order is: Giant, INP_WLOCK, IN_MULTI_LOCK, IGMP_LOCK, IF_ADDR_LOCK. 93 * - The IF_ADDR_LOCK is implicitly taken by inm_lookup() earlier, however 94 * it can be taken by code in net/if.c also. 95 * - ip_moptions and in_mfilter are covered by the INP_WLOCK. 96 * 97 * struct in_multi is covered by IN_MULTI_LOCK. There isn't strictly 98 * any need for in_multi itself to be virtualized -- it is bound to an ifp 99 * anyway no matter what happens. 100 */ 101 struct mtx in_multi_mtx; 102 MTX_SYSINIT(in_multi_mtx, &in_multi_mtx, "in_multi_mtx", MTX_DEF); 103 104 /* 105 * Functions with non-static linkage defined in this file should be 106 * declared in in_var.h: 107 * imo_multi_filter() 108 * in_addmulti() 109 * in_delmulti() 110 * in_joingroup() 111 * in_joingroup_locked() 112 * in_leavegroup() 113 * in_leavegroup_locked() 114 * and ip_var.h: 115 * inp_freemoptions() 116 * inp_getmoptions() 117 * inp_setmoptions() 118 * 119 * XXX: Both carp and pf need to use the legacy (*,G) KPIs in_addmulti() 120 * and in_delmulti(). 121 */ 122 static void imf_commit(struct in_mfilter *); 123 static int imf_get_source(struct in_mfilter *imf, 124 const struct sockaddr_in *psin, 125 struct in_msource **); 126 static struct in_msource * 127 imf_graft(struct in_mfilter *, const uint8_t, 128 const struct sockaddr_in *); 129 static void imf_leave(struct in_mfilter *); 130 static int imf_prune(struct in_mfilter *, const struct sockaddr_in *); 131 static void imf_purge(struct in_mfilter *); 132 static void imf_rollback(struct in_mfilter *); 133 static void imf_reap(struct in_mfilter *); 134 static int imo_grow(struct ip_moptions *); 135 static size_t imo_match_group(const struct ip_moptions *, 136 const struct ifnet *, const struct sockaddr *); 137 static struct in_msource * 138 imo_match_source(const struct ip_moptions *, const size_t, 139 const struct sockaddr *); 140 static void ims_merge(struct ip_msource *ims, 141 const struct in_msource *lims, const int rollback); 142 static int in_getmulti(struct ifnet *, const struct in_addr *, 143 struct in_multi **); 144 static int inm_get_source(struct in_multi *inm, const in_addr_t haddr, 145 const int noalloc, struct ip_msource **pims); 146 #ifdef KTR 147 static int inm_is_ifp_detached(const struct in_multi *); 148 #endif 149 static int inm_merge(struct in_multi *, /*const*/ struct in_mfilter *); 150 static void inm_purge(struct in_multi *); 151 static void inm_reap(struct in_multi *); 152 static struct ip_moptions * 153 inp_findmoptions(struct inpcb *); 154 static void inp_freemoptions_internal(struct ip_moptions *); 155 static void inp_gcmoptions(void *, int); 156 static int inp_get_source_filters(struct inpcb *, struct sockopt *); 157 static int inp_join_group(struct inpcb *, struct sockopt *); 158 static int inp_leave_group(struct inpcb *, struct sockopt *); 159 static struct ifnet * 160 inp_lookup_mcast_ifp(const struct inpcb *, 161 const struct sockaddr_in *, const struct in_addr); 162 static int inp_block_unblock_source(struct inpcb *, struct sockopt *); 163 static int inp_set_multicast_if(struct inpcb *, struct sockopt *); 164 static int inp_set_source_filters(struct inpcb *, struct sockopt *); 165 static int sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS); 166 167 static SYSCTL_NODE(_net_inet_ip, OID_AUTO, mcast, CTLFLAG_RW, 0, 168 "IPv4 multicast"); 169 170 static u_long in_mcast_maxgrpsrc = IP_MAX_GROUP_SRC_FILTER; 171 SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxgrpsrc, 172 CTLFLAG_RWTUN, &in_mcast_maxgrpsrc, 0, 173 "Max source filters per group"); 174 175 static u_long in_mcast_maxsocksrc = IP_MAX_SOCK_SRC_FILTER; 176 SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxsocksrc, 177 CTLFLAG_RWTUN, &in_mcast_maxsocksrc, 0, 178 "Max source filters per socket"); 179 180 int in_mcast_loop = IP_DEFAULT_MULTICAST_LOOP; 181 SYSCTL_INT(_net_inet_ip_mcast, OID_AUTO, loop, CTLFLAG_RWTUN, 182 &in_mcast_loop, 0, "Loopback multicast datagrams by default"); 183 184 static SYSCTL_NODE(_net_inet_ip_mcast, OID_AUTO, filters, 185 CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_ip_mcast_filters, 186 "Per-interface stack-wide source filters"); 187 188 static STAILQ_HEAD(, ip_moptions) imo_gc_list = 189 STAILQ_HEAD_INITIALIZER(imo_gc_list); 190 static struct task imo_gc_task = TASK_INITIALIZER(0, inp_gcmoptions, NULL); 191 192 #ifdef KTR 193 /* 194 * Inline function which wraps assertions for a valid ifp. 195 * The ifnet layer will set the ifma's ifp pointer to NULL if the ifp 196 * is detached. 197 */ 198 static int __inline 199 inm_is_ifp_detached(const struct in_multi *inm) 200 { 201 struct ifnet *ifp; 202 203 KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__)); 204 ifp = inm->inm_ifma->ifma_ifp; 205 if (ifp != NULL) { 206 /* 207 * Sanity check that netinet's notion of ifp is the 208 * same as net's. 209 */ 210 KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__)); 211 } 212 213 return (ifp == NULL); 214 } 215 #endif 216 217 /* 218 * Initialize an in_mfilter structure to a known state at t0, t1 219 * with an empty source filter list. 220 */ 221 static __inline void 222 imf_init(struct in_mfilter *imf, const int st0, const int st1) 223 { 224 memset(imf, 0, sizeof(struct in_mfilter)); 225 RB_INIT(&imf->imf_sources); 226 imf->imf_st[0] = st0; 227 imf->imf_st[1] = st1; 228 } 229 230 /* 231 * Function for looking up an in_multi record for an IPv4 multicast address 232 * on a given interface. ifp must be valid. If no record found, return NULL. 233 * The IN_MULTI_LOCK and IF_ADDR_LOCK on ifp must be held. 234 */ 235 struct in_multi * 236 inm_lookup_locked(struct ifnet *ifp, const struct in_addr ina) 237 { 238 struct ifmultiaddr *ifma; 239 struct in_multi *inm; 240 241 IN_MULTI_LOCK_ASSERT(); 242 IF_ADDR_LOCK_ASSERT(ifp); 243 244 inm = NULL; 245 TAILQ_FOREACH(ifma, &((ifp)->if_multiaddrs), ifma_link) { 246 if (ifma->ifma_addr->sa_family == AF_INET) { 247 inm = (struct in_multi *)ifma->ifma_protospec; 248 if (inm->inm_addr.s_addr == ina.s_addr) 249 break; 250 inm = NULL; 251 } 252 } 253 return (inm); 254 } 255 256 /* 257 * Wrapper for inm_lookup_locked(). 258 * The IF_ADDR_LOCK will be taken on ifp and released on return. 259 */ 260 struct in_multi * 261 inm_lookup(struct ifnet *ifp, const struct in_addr ina) 262 { 263 struct in_multi *inm; 264 265 IN_MULTI_LOCK_ASSERT(); 266 IF_ADDR_RLOCK(ifp); 267 inm = inm_lookup_locked(ifp, ina); 268 IF_ADDR_RUNLOCK(ifp); 269 270 return (inm); 271 } 272 273 /* 274 * Resize the ip_moptions vector to the next power-of-two minus 1. 275 * May be called with locks held; do not sleep. 276 */ 277 static int 278 imo_grow(struct ip_moptions *imo) 279 { 280 struct in_multi **nmships; 281 struct in_multi **omships; 282 struct in_mfilter *nmfilters; 283 struct in_mfilter *omfilters; 284 size_t idx; 285 size_t newmax; 286 size_t oldmax; 287 288 nmships = NULL; 289 nmfilters = NULL; 290 omships = imo->imo_membership; 291 omfilters = imo->imo_mfilters; 292 oldmax = imo->imo_max_memberships; 293 newmax = ((oldmax + 1) * 2) - 1; 294 295 if (newmax <= IP_MAX_MEMBERSHIPS) { 296 nmships = (struct in_multi **)realloc(omships, 297 sizeof(struct in_multi *) * newmax, M_IPMOPTS, M_NOWAIT); 298 nmfilters = (struct in_mfilter *)realloc(omfilters, 299 sizeof(struct in_mfilter) * newmax, M_INMFILTER, M_NOWAIT); 300 if (nmships != NULL && nmfilters != NULL) { 301 /* Initialize newly allocated source filter heads. */ 302 for (idx = oldmax; idx < newmax; idx++) { 303 imf_init(&nmfilters[idx], MCAST_UNDEFINED, 304 MCAST_EXCLUDE); 305 } 306 imo->imo_max_memberships = newmax; 307 imo->imo_membership = nmships; 308 imo->imo_mfilters = nmfilters; 309 } 310 } 311 312 if (nmships == NULL || nmfilters == NULL) { 313 if (nmships != NULL) 314 free(nmships, M_IPMOPTS); 315 if (nmfilters != NULL) 316 free(nmfilters, M_INMFILTER); 317 return (ETOOMANYREFS); 318 } 319 320 return (0); 321 } 322 323 /* 324 * Find an IPv4 multicast group entry for this ip_moptions instance 325 * which matches the specified group, and optionally an interface. 326 * Return its index into the array, or -1 if not found. 327 */ 328 static size_t 329 imo_match_group(const struct ip_moptions *imo, const struct ifnet *ifp, 330 const struct sockaddr *group) 331 { 332 const struct sockaddr_in *gsin; 333 struct in_multi **pinm; 334 int idx; 335 int nmships; 336 337 gsin = (const struct sockaddr_in *)group; 338 339 /* The imo_membership array may be lazy allocated. */ 340 if (imo->imo_membership == NULL || imo->imo_num_memberships == 0) 341 return (-1); 342 343 nmships = imo->imo_num_memberships; 344 pinm = &imo->imo_membership[0]; 345 for (idx = 0; idx < nmships; idx++, pinm++) { 346 if (*pinm == NULL) 347 continue; 348 if ((ifp == NULL || ((*pinm)->inm_ifp == ifp)) && 349 in_hosteq((*pinm)->inm_addr, gsin->sin_addr)) { 350 break; 351 } 352 } 353 if (idx >= nmships) 354 idx = -1; 355 356 return (idx); 357 } 358 359 /* 360 * Find an IPv4 multicast source entry for this imo which matches 361 * the given group index for this socket, and source address. 362 * 363 * NOTE: This does not check if the entry is in-mode, merely if 364 * it exists, which may not be the desired behaviour. 365 */ 366 static struct in_msource * 367 imo_match_source(const struct ip_moptions *imo, const size_t gidx, 368 const struct sockaddr *src) 369 { 370 struct ip_msource find; 371 struct in_mfilter *imf; 372 struct ip_msource *ims; 373 const sockunion_t *psa; 374 375 KASSERT(src->sa_family == AF_INET, ("%s: !AF_INET", __func__)); 376 KASSERT(gidx != -1 && gidx < imo->imo_num_memberships, 377 ("%s: invalid index %d\n", __func__, (int)gidx)); 378 379 /* The imo_mfilters array may be lazy allocated. */ 380 if (imo->imo_mfilters == NULL) 381 return (NULL); 382 imf = &imo->imo_mfilters[gidx]; 383 384 /* Source trees are keyed in host byte order. */ 385 psa = (const sockunion_t *)src; 386 find.ims_haddr = ntohl(psa->sin.sin_addr.s_addr); 387 ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find); 388 389 return ((struct in_msource *)ims); 390 } 391 392 /* 393 * Perform filtering for multicast datagrams on a socket by group and source. 394 * 395 * Returns 0 if a datagram should be allowed through, or various error codes 396 * if the socket was not a member of the group, or the source was muted, etc. 397 */ 398 int 399 imo_multi_filter(const struct ip_moptions *imo, const struct ifnet *ifp, 400 const struct sockaddr *group, const struct sockaddr *src) 401 { 402 size_t gidx; 403 struct in_msource *ims; 404 int mode; 405 406 KASSERT(ifp != NULL, ("%s: null ifp", __func__)); 407 408 gidx = imo_match_group(imo, ifp, group); 409 if (gidx == -1) 410 return (MCAST_NOTGMEMBER); 411 412 /* 413 * Check if the source was included in an (S,G) join. 414 * Allow reception on exclusive memberships by default, 415 * reject reception on inclusive memberships by default. 416 * Exclude source only if an in-mode exclude filter exists. 417 * Include source only if an in-mode include filter exists. 418 * NOTE: We are comparing group state here at IGMP t1 (now) 419 * with socket-layer t0 (since last downcall). 420 */ 421 mode = imo->imo_mfilters[gidx].imf_st[1]; 422 ims = imo_match_source(imo, gidx, src); 423 424 if ((ims == NULL && mode == MCAST_INCLUDE) || 425 (ims != NULL && ims->imsl_st[0] != mode)) 426 return (MCAST_NOTSMEMBER); 427 428 return (MCAST_PASS); 429 } 430 431 /* 432 * Find and return a reference to an in_multi record for (ifp, group), 433 * and bump its reference count. 434 * If one does not exist, try to allocate it, and update link-layer multicast 435 * filters on ifp to listen for group. 436 * Assumes the IN_MULTI lock is held across the call. 437 * Return 0 if successful, otherwise return an appropriate error code. 438 */ 439 static int 440 in_getmulti(struct ifnet *ifp, const struct in_addr *group, 441 struct in_multi **pinm) 442 { 443 struct sockaddr_in gsin; 444 struct ifmultiaddr *ifma; 445 struct in_ifinfo *ii; 446 struct in_multi *inm; 447 int error; 448 449 IN_MULTI_LOCK_ASSERT(); 450 451 ii = (struct in_ifinfo *)ifp->if_afdata[AF_INET]; 452 453 inm = inm_lookup(ifp, *group); 454 if (inm != NULL) { 455 /* 456 * If we already joined this group, just bump the 457 * refcount and return it. 458 */ 459 KASSERT(inm->inm_refcount >= 1, 460 ("%s: bad refcount %d", __func__, inm->inm_refcount)); 461 ++inm->inm_refcount; 462 *pinm = inm; 463 return (0); 464 } 465 466 memset(&gsin, 0, sizeof(gsin)); 467 gsin.sin_family = AF_INET; 468 gsin.sin_len = sizeof(struct sockaddr_in); 469 gsin.sin_addr = *group; 470 471 /* 472 * Check if a link-layer group is already associated 473 * with this network-layer group on the given ifnet. 474 */ 475 error = if_addmulti(ifp, (struct sockaddr *)&gsin, &ifma); 476 if (error != 0) 477 return (error); 478 479 /* XXX ifma_protospec must be covered by IF_ADDR_LOCK */ 480 IF_ADDR_WLOCK(ifp); 481 482 /* 483 * If something other than netinet is occupying the link-layer 484 * group, print a meaningful error message and back out of 485 * the allocation. 486 * Otherwise, bump the refcount on the existing network-layer 487 * group association and return it. 488 */ 489 if (ifma->ifma_protospec != NULL) { 490 inm = (struct in_multi *)ifma->ifma_protospec; 491 #ifdef INVARIANTS 492 KASSERT(ifma->ifma_addr != NULL, ("%s: no ifma_addr", 493 __func__)); 494 KASSERT(ifma->ifma_addr->sa_family == AF_INET, 495 ("%s: ifma not AF_INET", __func__)); 496 KASSERT(inm != NULL, ("%s: no ifma_protospec", __func__)); 497 if (inm->inm_ifma != ifma || inm->inm_ifp != ifp || 498 !in_hosteq(inm->inm_addr, *group)) 499 panic("%s: ifma %p is inconsistent with %p (%s)", 500 __func__, ifma, inm, inet_ntoa(*group)); 501 #endif 502 ++inm->inm_refcount; 503 *pinm = inm; 504 IF_ADDR_WUNLOCK(ifp); 505 return (0); 506 } 507 508 IF_ADDR_WLOCK_ASSERT(ifp); 509 510 /* 511 * A new in_multi record is needed; allocate and initialize it. 512 * We DO NOT perform an IGMP join as the in_ layer may need to 513 * push an initial source list down to IGMP to support SSM. 514 * 515 * The initial source filter state is INCLUDE, {} as per the RFC. 516 */ 517 inm = malloc(sizeof(*inm), M_IPMADDR, M_NOWAIT | M_ZERO); 518 if (inm == NULL) { 519 IF_ADDR_WUNLOCK(ifp); 520 if_delmulti_ifma(ifma); 521 return (ENOMEM); 522 } 523 inm->inm_addr = *group; 524 inm->inm_ifp = ifp; 525 inm->inm_igi = ii->ii_igmp; 526 inm->inm_ifma = ifma; 527 inm->inm_refcount = 1; 528 inm->inm_state = IGMP_NOT_MEMBER; 529 mbufq_init(&inm->inm_scq, IGMP_MAX_STATE_CHANGES); 530 inm->inm_st[0].iss_fmode = MCAST_UNDEFINED; 531 inm->inm_st[1].iss_fmode = MCAST_UNDEFINED; 532 RB_INIT(&inm->inm_srcs); 533 534 ifma->ifma_protospec = inm; 535 536 *pinm = inm; 537 538 IF_ADDR_WUNLOCK(ifp); 539 return (0); 540 } 541 542 /* 543 * Drop a reference to an in_multi record. 544 * 545 * If the refcount drops to 0, free the in_multi record and 546 * delete the underlying link-layer membership. 547 */ 548 void 549 inm_release_locked(struct in_multi *inm) 550 { 551 struct ifmultiaddr *ifma; 552 553 IN_MULTI_LOCK_ASSERT(); 554 555 CTR2(KTR_IGMPV3, "%s: refcount is %d", __func__, inm->inm_refcount); 556 557 if (--inm->inm_refcount > 0) { 558 CTR2(KTR_IGMPV3, "%s: refcount is now %d", __func__, 559 inm->inm_refcount); 560 return; 561 } 562 563 CTR2(KTR_IGMPV3, "%s: freeing inm %p", __func__, inm); 564 565 ifma = inm->inm_ifma; 566 567 /* XXX this access is not covered by IF_ADDR_LOCK */ 568 CTR2(KTR_IGMPV3, "%s: purging ifma %p", __func__, ifma); 569 KASSERT(ifma->ifma_protospec == inm, 570 ("%s: ifma_protospec != inm", __func__)); 571 ifma->ifma_protospec = NULL; 572 573 inm_purge(inm); 574 575 free(inm, M_IPMADDR); 576 577 if_delmulti_ifma(ifma); 578 } 579 580 /* 581 * Clear recorded source entries for a group. 582 * Used by the IGMP code. Caller must hold the IN_MULTI lock. 583 * FIXME: Should reap. 584 */ 585 void 586 inm_clear_recorded(struct in_multi *inm) 587 { 588 struct ip_msource *ims; 589 590 IN_MULTI_LOCK_ASSERT(); 591 592 RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) { 593 if (ims->ims_stp) { 594 ims->ims_stp = 0; 595 --inm->inm_st[1].iss_rec; 596 } 597 } 598 KASSERT(inm->inm_st[1].iss_rec == 0, 599 ("%s: iss_rec %d not 0", __func__, inm->inm_st[1].iss_rec)); 600 } 601 602 /* 603 * Record a source as pending for a Source-Group IGMPv3 query. 604 * This lives here as it modifies the shared tree. 605 * 606 * inm is the group descriptor. 607 * naddr is the address of the source to record in network-byte order. 608 * 609 * If the net.inet.igmp.sgalloc sysctl is non-zero, we will 610 * lazy-allocate a source node in response to an SG query. 611 * Otherwise, no allocation is performed. This saves some memory 612 * with the trade-off that the source will not be reported to the 613 * router if joined in the window between the query response and 614 * the group actually being joined on the local host. 615 * 616 * VIMAGE: XXX: Currently the igmp_sgalloc feature has been removed. 617 * This turns off the allocation of a recorded source entry if 618 * the group has not been joined. 619 * 620 * Return 0 if the source didn't exist or was already marked as recorded. 621 * Return 1 if the source was marked as recorded by this function. 622 * Return <0 if any error occurred (negated errno code). 623 */ 624 int 625 inm_record_source(struct in_multi *inm, const in_addr_t naddr) 626 { 627 struct ip_msource find; 628 struct ip_msource *ims, *nims; 629 630 IN_MULTI_LOCK_ASSERT(); 631 632 find.ims_haddr = ntohl(naddr); 633 ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find); 634 if (ims && ims->ims_stp) 635 return (0); 636 if (ims == NULL) { 637 if (inm->inm_nsrc == in_mcast_maxgrpsrc) 638 return (-ENOSPC); 639 nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE, 640 M_NOWAIT | M_ZERO); 641 if (nims == NULL) 642 return (-ENOMEM); 643 nims->ims_haddr = find.ims_haddr; 644 RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims); 645 ++inm->inm_nsrc; 646 ims = nims; 647 } 648 649 /* 650 * Mark the source as recorded and update the recorded 651 * source count. 652 */ 653 ++ims->ims_stp; 654 ++inm->inm_st[1].iss_rec; 655 656 return (1); 657 } 658 659 /* 660 * Return a pointer to an in_msource owned by an in_mfilter, 661 * given its source address. 662 * Lazy-allocate if needed. If this is a new entry its filter state is 663 * undefined at t0. 664 * 665 * imf is the filter set being modified. 666 * haddr is the source address in *host* byte-order. 667 * 668 * SMPng: May be called with locks held; malloc must not block. 669 */ 670 static int 671 imf_get_source(struct in_mfilter *imf, const struct sockaddr_in *psin, 672 struct in_msource **plims) 673 { 674 struct ip_msource find; 675 struct ip_msource *ims, *nims; 676 struct in_msource *lims; 677 int error; 678 679 error = 0; 680 ims = NULL; 681 lims = NULL; 682 683 /* key is host byte order */ 684 find.ims_haddr = ntohl(psin->sin_addr.s_addr); 685 ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find); 686 lims = (struct in_msource *)ims; 687 if (lims == NULL) { 688 if (imf->imf_nsrc == in_mcast_maxsocksrc) 689 return (ENOSPC); 690 nims = malloc(sizeof(struct in_msource), M_INMFILTER, 691 M_NOWAIT | M_ZERO); 692 if (nims == NULL) 693 return (ENOMEM); 694 lims = (struct in_msource *)nims; 695 lims->ims_haddr = find.ims_haddr; 696 lims->imsl_st[0] = MCAST_UNDEFINED; 697 RB_INSERT(ip_msource_tree, &imf->imf_sources, nims); 698 ++imf->imf_nsrc; 699 } 700 701 *plims = lims; 702 703 return (error); 704 } 705 706 /* 707 * Graft a source entry into an existing socket-layer filter set, 708 * maintaining any required invariants and checking allocations. 709 * 710 * The source is marked as being in the new filter mode at t1. 711 * 712 * Return the pointer to the new node, otherwise return NULL. 713 */ 714 static struct in_msource * 715 imf_graft(struct in_mfilter *imf, const uint8_t st1, 716 const struct sockaddr_in *psin) 717 { 718 struct ip_msource *nims; 719 struct in_msource *lims; 720 721 nims = malloc(sizeof(struct in_msource), M_INMFILTER, 722 M_NOWAIT | M_ZERO); 723 if (nims == NULL) 724 return (NULL); 725 lims = (struct in_msource *)nims; 726 lims->ims_haddr = ntohl(psin->sin_addr.s_addr); 727 lims->imsl_st[0] = MCAST_UNDEFINED; 728 lims->imsl_st[1] = st1; 729 RB_INSERT(ip_msource_tree, &imf->imf_sources, nims); 730 ++imf->imf_nsrc; 731 732 return (lims); 733 } 734 735 /* 736 * Prune a source entry from an existing socket-layer filter set, 737 * maintaining any required invariants and checking allocations. 738 * 739 * The source is marked as being left at t1, it is not freed. 740 * 741 * Return 0 if no error occurred, otherwise return an errno value. 742 */ 743 static int 744 imf_prune(struct in_mfilter *imf, const struct sockaddr_in *psin) 745 { 746 struct ip_msource find; 747 struct ip_msource *ims; 748 struct in_msource *lims; 749 750 /* key is host byte order */ 751 find.ims_haddr = ntohl(psin->sin_addr.s_addr); 752 ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find); 753 if (ims == NULL) 754 return (ENOENT); 755 lims = (struct in_msource *)ims; 756 lims->imsl_st[1] = MCAST_UNDEFINED; 757 return (0); 758 } 759 760 /* 761 * Revert socket-layer filter set deltas at t1 to t0 state. 762 */ 763 static void 764 imf_rollback(struct in_mfilter *imf) 765 { 766 struct ip_msource *ims, *tims; 767 struct in_msource *lims; 768 769 RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) { 770 lims = (struct in_msource *)ims; 771 if (lims->imsl_st[0] == lims->imsl_st[1]) { 772 /* no change at t1 */ 773 continue; 774 } else if (lims->imsl_st[0] != MCAST_UNDEFINED) { 775 /* revert change to existing source at t1 */ 776 lims->imsl_st[1] = lims->imsl_st[0]; 777 } else { 778 /* revert source added t1 */ 779 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 780 RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims); 781 free(ims, M_INMFILTER); 782 imf->imf_nsrc--; 783 } 784 } 785 imf->imf_st[1] = imf->imf_st[0]; 786 } 787 788 /* 789 * Mark socket-layer filter set as INCLUDE {} at t1. 790 */ 791 static void 792 imf_leave(struct in_mfilter *imf) 793 { 794 struct ip_msource *ims; 795 struct in_msource *lims; 796 797 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 798 lims = (struct in_msource *)ims; 799 lims->imsl_st[1] = MCAST_UNDEFINED; 800 } 801 imf->imf_st[1] = MCAST_INCLUDE; 802 } 803 804 /* 805 * Mark socket-layer filter set deltas as committed. 806 */ 807 static void 808 imf_commit(struct in_mfilter *imf) 809 { 810 struct ip_msource *ims; 811 struct in_msource *lims; 812 813 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 814 lims = (struct in_msource *)ims; 815 lims->imsl_st[0] = lims->imsl_st[1]; 816 } 817 imf->imf_st[0] = imf->imf_st[1]; 818 } 819 820 /* 821 * Reap unreferenced sources from socket-layer filter set. 822 */ 823 static void 824 imf_reap(struct in_mfilter *imf) 825 { 826 struct ip_msource *ims, *tims; 827 struct in_msource *lims; 828 829 RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) { 830 lims = (struct in_msource *)ims; 831 if ((lims->imsl_st[0] == MCAST_UNDEFINED) && 832 (lims->imsl_st[1] == MCAST_UNDEFINED)) { 833 CTR2(KTR_IGMPV3, "%s: free lims %p", __func__, ims); 834 RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims); 835 free(ims, M_INMFILTER); 836 imf->imf_nsrc--; 837 } 838 } 839 } 840 841 /* 842 * Purge socket-layer filter set. 843 */ 844 static void 845 imf_purge(struct in_mfilter *imf) 846 { 847 struct ip_msource *ims, *tims; 848 849 RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) { 850 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 851 RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims); 852 free(ims, M_INMFILTER); 853 imf->imf_nsrc--; 854 } 855 imf->imf_st[0] = imf->imf_st[1] = MCAST_UNDEFINED; 856 KASSERT(RB_EMPTY(&imf->imf_sources), 857 ("%s: imf_sources not empty", __func__)); 858 } 859 860 /* 861 * Look up a source filter entry for a multicast group. 862 * 863 * inm is the group descriptor to work with. 864 * haddr is the host-byte-order IPv4 address to look up. 865 * noalloc may be non-zero to suppress allocation of sources. 866 * *pims will be set to the address of the retrieved or allocated source. 867 * 868 * SMPng: NOTE: may be called with locks held. 869 * Return 0 if successful, otherwise return a non-zero error code. 870 */ 871 static int 872 inm_get_source(struct in_multi *inm, const in_addr_t haddr, 873 const int noalloc, struct ip_msource **pims) 874 { 875 struct ip_msource find; 876 struct ip_msource *ims, *nims; 877 #ifdef KTR 878 struct in_addr ia; 879 #endif 880 881 find.ims_haddr = haddr; 882 ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find); 883 if (ims == NULL && !noalloc) { 884 if (inm->inm_nsrc == in_mcast_maxgrpsrc) 885 return (ENOSPC); 886 nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE, 887 M_NOWAIT | M_ZERO); 888 if (nims == NULL) 889 return (ENOMEM); 890 nims->ims_haddr = haddr; 891 RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims); 892 ++inm->inm_nsrc; 893 ims = nims; 894 #ifdef KTR 895 ia.s_addr = htonl(haddr); 896 CTR3(KTR_IGMPV3, "%s: allocated %s as %p", __func__, 897 inet_ntoa(ia), ims); 898 #endif 899 } 900 901 *pims = ims; 902 return (0); 903 } 904 905 /* 906 * Merge socket-layer source into IGMP-layer source. 907 * If rollback is non-zero, perform the inverse of the merge. 908 */ 909 static void 910 ims_merge(struct ip_msource *ims, const struct in_msource *lims, 911 const int rollback) 912 { 913 int n = rollback ? -1 : 1; 914 #ifdef KTR 915 struct in_addr ia; 916 917 ia.s_addr = htonl(ims->ims_haddr); 918 #endif 919 920 if (lims->imsl_st[0] == MCAST_EXCLUDE) { 921 CTR3(KTR_IGMPV3, "%s: t1 ex -= %d on %s", 922 __func__, n, inet_ntoa(ia)); 923 ims->ims_st[1].ex -= n; 924 } else if (lims->imsl_st[0] == MCAST_INCLUDE) { 925 CTR3(KTR_IGMPV3, "%s: t1 in -= %d on %s", 926 __func__, n, inet_ntoa(ia)); 927 ims->ims_st[1].in -= n; 928 } 929 930 if (lims->imsl_st[1] == MCAST_EXCLUDE) { 931 CTR3(KTR_IGMPV3, "%s: t1 ex += %d on %s", 932 __func__, n, inet_ntoa(ia)); 933 ims->ims_st[1].ex += n; 934 } else if (lims->imsl_st[1] == MCAST_INCLUDE) { 935 CTR3(KTR_IGMPV3, "%s: t1 in += %d on %s", 936 __func__, n, inet_ntoa(ia)); 937 ims->ims_st[1].in += n; 938 } 939 } 940 941 /* 942 * Atomically update the global in_multi state, when a membership's 943 * filter list is being updated in any way. 944 * 945 * imf is the per-inpcb-membership group filter pointer. 946 * A fake imf may be passed for in-kernel consumers. 947 * 948 * XXX This is a candidate for a set-symmetric-difference style loop 949 * which would eliminate the repeated lookup from root of ims nodes, 950 * as they share the same key space. 951 * 952 * If any error occurred this function will back out of refcounts 953 * and return a non-zero value. 954 */ 955 static int 956 inm_merge(struct in_multi *inm, /*const*/ struct in_mfilter *imf) 957 { 958 struct ip_msource *ims, *nims; 959 struct in_msource *lims; 960 int schanged, error; 961 int nsrc0, nsrc1; 962 963 schanged = 0; 964 error = 0; 965 nsrc1 = nsrc0 = 0; 966 967 /* 968 * Update the source filters first, as this may fail. 969 * Maintain count of in-mode filters at t0, t1. These are 970 * used to work out if we transition into ASM mode or not. 971 * Maintain a count of source filters whose state was 972 * actually modified by this operation. 973 */ 974 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 975 lims = (struct in_msource *)ims; 976 if (lims->imsl_st[0] == imf->imf_st[0]) nsrc0++; 977 if (lims->imsl_st[1] == imf->imf_st[1]) nsrc1++; 978 if (lims->imsl_st[0] == lims->imsl_st[1]) continue; 979 error = inm_get_source(inm, lims->ims_haddr, 0, &nims); 980 ++schanged; 981 if (error) 982 break; 983 ims_merge(nims, lims, 0); 984 } 985 if (error) { 986 struct ip_msource *bims; 987 988 RB_FOREACH_REVERSE_FROM(ims, ip_msource_tree, nims) { 989 lims = (struct in_msource *)ims; 990 if (lims->imsl_st[0] == lims->imsl_st[1]) 991 continue; 992 (void)inm_get_source(inm, lims->ims_haddr, 1, &bims); 993 if (bims == NULL) 994 continue; 995 ims_merge(bims, lims, 1); 996 } 997 goto out_reap; 998 } 999 1000 CTR3(KTR_IGMPV3, "%s: imf filters in-mode: %d at t0, %d at t1", 1001 __func__, nsrc0, nsrc1); 1002 1003 /* Handle transition between INCLUDE {n} and INCLUDE {} on socket. */ 1004 if (imf->imf_st[0] == imf->imf_st[1] && 1005 imf->imf_st[1] == MCAST_INCLUDE) { 1006 if (nsrc1 == 0) { 1007 CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__); 1008 --inm->inm_st[1].iss_in; 1009 } 1010 } 1011 1012 /* Handle filter mode transition on socket. */ 1013 if (imf->imf_st[0] != imf->imf_st[1]) { 1014 CTR3(KTR_IGMPV3, "%s: imf transition %d to %d", 1015 __func__, imf->imf_st[0], imf->imf_st[1]); 1016 1017 if (imf->imf_st[0] == MCAST_EXCLUDE) { 1018 CTR1(KTR_IGMPV3, "%s: --ex on inm at t1", __func__); 1019 --inm->inm_st[1].iss_ex; 1020 } else if (imf->imf_st[0] == MCAST_INCLUDE) { 1021 CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__); 1022 --inm->inm_st[1].iss_in; 1023 } 1024 1025 if (imf->imf_st[1] == MCAST_EXCLUDE) { 1026 CTR1(KTR_IGMPV3, "%s: ex++ on inm at t1", __func__); 1027 inm->inm_st[1].iss_ex++; 1028 } else if (imf->imf_st[1] == MCAST_INCLUDE && nsrc1 > 0) { 1029 CTR1(KTR_IGMPV3, "%s: in++ on inm at t1", __func__); 1030 inm->inm_st[1].iss_in++; 1031 } 1032 } 1033 1034 /* 1035 * Track inm filter state in terms of listener counts. 1036 * If there are any exclusive listeners, stack-wide 1037 * membership is exclusive. 1038 * Otherwise, if only inclusive listeners, stack-wide is inclusive. 1039 * If no listeners remain, state is undefined at t1, 1040 * and the IGMP lifecycle for this group should finish. 1041 */ 1042 if (inm->inm_st[1].iss_ex > 0) { 1043 CTR1(KTR_IGMPV3, "%s: transition to EX", __func__); 1044 inm->inm_st[1].iss_fmode = MCAST_EXCLUDE; 1045 } else if (inm->inm_st[1].iss_in > 0) { 1046 CTR1(KTR_IGMPV3, "%s: transition to IN", __func__); 1047 inm->inm_st[1].iss_fmode = MCAST_INCLUDE; 1048 } else { 1049 CTR1(KTR_IGMPV3, "%s: transition to UNDEF", __func__); 1050 inm->inm_st[1].iss_fmode = MCAST_UNDEFINED; 1051 } 1052 1053 /* Decrement ASM listener count on transition out of ASM mode. */ 1054 if (imf->imf_st[0] == MCAST_EXCLUDE && nsrc0 == 0) { 1055 if ((imf->imf_st[1] != MCAST_EXCLUDE) || 1056 (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 > 0)) 1057 CTR1(KTR_IGMPV3, "%s: --asm on inm at t1", __func__); 1058 --inm->inm_st[1].iss_asm; 1059 } 1060 1061 /* Increment ASM listener count on transition to ASM mode. */ 1062 if (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 == 0) { 1063 CTR1(KTR_IGMPV3, "%s: asm++ on inm at t1", __func__); 1064 inm->inm_st[1].iss_asm++; 1065 } 1066 1067 CTR3(KTR_IGMPV3, "%s: merged imf %p to inm %p", __func__, imf, inm); 1068 inm_print(inm); 1069 1070 out_reap: 1071 if (schanged > 0) { 1072 CTR1(KTR_IGMPV3, "%s: sources changed; reaping", __func__); 1073 inm_reap(inm); 1074 } 1075 return (error); 1076 } 1077 1078 /* 1079 * Mark an in_multi's filter set deltas as committed. 1080 * Called by IGMP after a state change has been enqueued. 1081 */ 1082 void 1083 inm_commit(struct in_multi *inm) 1084 { 1085 struct ip_msource *ims; 1086 1087 CTR2(KTR_IGMPV3, "%s: commit inm %p", __func__, inm); 1088 CTR1(KTR_IGMPV3, "%s: pre commit:", __func__); 1089 inm_print(inm); 1090 1091 RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) { 1092 ims->ims_st[0] = ims->ims_st[1]; 1093 } 1094 inm->inm_st[0] = inm->inm_st[1]; 1095 } 1096 1097 /* 1098 * Reap unreferenced nodes from an in_multi's filter set. 1099 */ 1100 static void 1101 inm_reap(struct in_multi *inm) 1102 { 1103 struct ip_msource *ims, *tims; 1104 1105 RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) { 1106 if (ims->ims_st[0].ex > 0 || ims->ims_st[0].in > 0 || 1107 ims->ims_st[1].ex > 0 || ims->ims_st[1].in > 0 || 1108 ims->ims_stp != 0) 1109 continue; 1110 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 1111 RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims); 1112 free(ims, M_IPMSOURCE); 1113 inm->inm_nsrc--; 1114 } 1115 } 1116 1117 /* 1118 * Purge all source nodes from an in_multi's filter set. 1119 */ 1120 static void 1121 inm_purge(struct in_multi *inm) 1122 { 1123 struct ip_msource *ims, *tims; 1124 1125 RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) { 1126 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 1127 RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims); 1128 free(ims, M_IPMSOURCE); 1129 inm->inm_nsrc--; 1130 } 1131 } 1132 1133 /* 1134 * Join a multicast group; unlocked entry point. 1135 * 1136 * SMPng: XXX: in_joingroup() is called from in_control() when Giant 1137 * is not held. Fortunately, ifp is unlikely to have been detached 1138 * at this point, so we assume it's OK to recurse. 1139 */ 1140 int 1141 in_joingroup(struct ifnet *ifp, const struct in_addr *gina, 1142 /*const*/ struct in_mfilter *imf, struct in_multi **pinm) 1143 { 1144 int error; 1145 1146 IN_MULTI_LOCK(); 1147 error = in_joingroup_locked(ifp, gina, imf, pinm); 1148 IN_MULTI_UNLOCK(); 1149 1150 return (error); 1151 } 1152 1153 /* 1154 * Join a multicast group; real entry point. 1155 * 1156 * Only preserves atomicity at inm level. 1157 * NOTE: imf argument cannot be const due to sys/tree.h limitations. 1158 * 1159 * If the IGMP downcall fails, the group is not joined, and an error 1160 * code is returned. 1161 */ 1162 int 1163 in_joingroup_locked(struct ifnet *ifp, const struct in_addr *gina, 1164 /*const*/ struct in_mfilter *imf, struct in_multi **pinm) 1165 { 1166 struct in_mfilter timf; 1167 struct in_multi *inm; 1168 int error; 1169 1170 IN_MULTI_LOCK_ASSERT(); 1171 1172 CTR4(KTR_IGMPV3, "%s: join %s on %p(%s))", __func__, 1173 inet_ntoa(*gina), ifp, ifp->if_xname); 1174 1175 error = 0; 1176 inm = NULL; 1177 1178 /* 1179 * If no imf was specified (i.e. kernel consumer), 1180 * fake one up and assume it is an ASM join. 1181 */ 1182 if (imf == NULL) { 1183 imf_init(&timf, MCAST_UNDEFINED, MCAST_EXCLUDE); 1184 imf = &timf; 1185 } 1186 1187 error = in_getmulti(ifp, gina, &inm); 1188 if (error) { 1189 CTR1(KTR_IGMPV3, "%s: in_getmulti() failure", __func__); 1190 return (error); 1191 } 1192 1193 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 1194 error = inm_merge(inm, imf); 1195 if (error) { 1196 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__); 1197 goto out_inm_release; 1198 } 1199 1200 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 1201 error = igmp_change_state(inm); 1202 if (error) { 1203 CTR1(KTR_IGMPV3, "%s: failed to update source", __func__); 1204 goto out_inm_release; 1205 } 1206 1207 out_inm_release: 1208 if (error) { 1209 CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm); 1210 inm_release_locked(inm); 1211 } else { 1212 *pinm = inm; 1213 } 1214 1215 return (error); 1216 } 1217 1218 /* 1219 * Leave a multicast group; unlocked entry point. 1220 */ 1221 int 1222 in_leavegroup(struct in_multi *inm, /*const*/ struct in_mfilter *imf) 1223 { 1224 int error; 1225 1226 IN_MULTI_LOCK(); 1227 error = in_leavegroup_locked(inm, imf); 1228 IN_MULTI_UNLOCK(); 1229 1230 return (error); 1231 } 1232 1233 /* 1234 * Leave a multicast group; real entry point. 1235 * All source filters will be expunged. 1236 * 1237 * Only preserves atomicity at inm level. 1238 * 1239 * Holding the write lock for the INP which contains imf 1240 * is highly advisable. We can't assert for it as imf does not 1241 * contain a back-pointer to the owning inp. 1242 * 1243 * Note: This is not the same as inm_release(*) as this function also 1244 * makes a state change downcall into IGMP. 1245 */ 1246 int 1247 in_leavegroup_locked(struct in_multi *inm, /*const*/ struct in_mfilter *imf) 1248 { 1249 struct in_mfilter timf; 1250 int error; 1251 1252 error = 0; 1253 1254 IN_MULTI_LOCK_ASSERT(); 1255 1256 CTR5(KTR_IGMPV3, "%s: leave inm %p, %s/%s, imf %p", __func__, 1257 inm, inet_ntoa(inm->inm_addr), 1258 (inm_is_ifp_detached(inm) ? "null" : inm->inm_ifp->if_xname), 1259 imf); 1260 1261 /* 1262 * If no imf was specified (i.e. kernel consumer), 1263 * fake one up and assume it is an ASM join. 1264 */ 1265 if (imf == NULL) { 1266 imf_init(&timf, MCAST_EXCLUDE, MCAST_UNDEFINED); 1267 imf = &timf; 1268 } 1269 1270 /* 1271 * Begin state merge transaction at IGMP layer. 1272 * 1273 * As this particular invocation should not cause any memory 1274 * to be allocated, and there is no opportunity to roll back 1275 * the transaction, it MUST NOT fail. 1276 */ 1277 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 1278 error = inm_merge(inm, imf); 1279 KASSERT(error == 0, ("%s: failed to merge inm state", __func__)); 1280 1281 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 1282 CURVNET_SET(inm->inm_ifp->if_vnet); 1283 error = igmp_change_state(inm); 1284 CURVNET_RESTORE(); 1285 if (error) 1286 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__); 1287 1288 CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm); 1289 inm_release_locked(inm); 1290 1291 return (error); 1292 } 1293 1294 /*#ifndef BURN_BRIDGES*/ 1295 /* 1296 * Join an IPv4 multicast group in (*,G) exclusive mode. 1297 * The group must be a 224.0.0.0/24 link-scope group. 1298 * This KPI is for legacy kernel consumers only. 1299 */ 1300 struct in_multi * 1301 in_addmulti(struct in_addr *ap, struct ifnet *ifp) 1302 { 1303 struct in_multi *pinm; 1304 int error; 1305 1306 KASSERT(IN_LOCAL_GROUP(ntohl(ap->s_addr)), 1307 ("%s: %s not in 224.0.0.0/24", __func__, inet_ntoa(*ap))); 1308 1309 error = in_joingroup(ifp, ap, NULL, &pinm); 1310 if (error != 0) 1311 pinm = NULL; 1312 1313 return (pinm); 1314 } 1315 1316 /* 1317 * Leave an IPv4 multicast group, assumed to be in exclusive (*,G) mode. 1318 * This KPI is for legacy kernel consumers only. 1319 */ 1320 void 1321 in_delmulti(struct in_multi *inm) 1322 { 1323 1324 (void)in_leavegroup(inm, NULL); 1325 } 1326 /*#endif*/ 1327 1328 /* 1329 * Block or unblock an ASM multicast source on an inpcb. 1330 * This implements the delta-based API described in RFC 3678. 1331 * 1332 * The delta-based API applies only to exclusive-mode memberships. 1333 * An IGMP downcall will be performed. 1334 * 1335 * SMPng: NOTE: Must take Giant as a join may create a new ifma. 1336 * 1337 * Return 0 if successful, otherwise return an appropriate error code. 1338 */ 1339 static int 1340 inp_block_unblock_source(struct inpcb *inp, struct sockopt *sopt) 1341 { 1342 struct group_source_req gsr; 1343 sockunion_t *gsa, *ssa; 1344 struct ifnet *ifp; 1345 struct in_mfilter *imf; 1346 struct ip_moptions *imo; 1347 struct in_msource *ims; 1348 struct in_multi *inm; 1349 size_t idx; 1350 uint16_t fmode; 1351 int error, doblock; 1352 1353 ifp = NULL; 1354 error = 0; 1355 doblock = 0; 1356 1357 memset(&gsr, 0, sizeof(struct group_source_req)); 1358 gsa = (sockunion_t *)&gsr.gsr_group; 1359 ssa = (sockunion_t *)&gsr.gsr_source; 1360 1361 switch (sopt->sopt_name) { 1362 case IP_BLOCK_SOURCE: 1363 case IP_UNBLOCK_SOURCE: { 1364 struct ip_mreq_source mreqs; 1365 1366 error = sooptcopyin(sopt, &mreqs, 1367 sizeof(struct ip_mreq_source), 1368 sizeof(struct ip_mreq_source)); 1369 if (error) 1370 return (error); 1371 1372 gsa->sin.sin_family = AF_INET; 1373 gsa->sin.sin_len = sizeof(struct sockaddr_in); 1374 gsa->sin.sin_addr = mreqs.imr_multiaddr; 1375 1376 ssa->sin.sin_family = AF_INET; 1377 ssa->sin.sin_len = sizeof(struct sockaddr_in); 1378 ssa->sin.sin_addr = mreqs.imr_sourceaddr; 1379 1380 if (!in_nullhost(mreqs.imr_interface)) 1381 INADDR_TO_IFP(mreqs.imr_interface, ifp); 1382 1383 if (sopt->sopt_name == IP_BLOCK_SOURCE) 1384 doblock = 1; 1385 1386 CTR3(KTR_IGMPV3, "%s: imr_interface = %s, ifp = %p", 1387 __func__, inet_ntoa(mreqs.imr_interface), ifp); 1388 break; 1389 } 1390 1391 case MCAST_BLOCK_SOURCE: 1392 case MCAST_UNBLOCK_SOURCE: 1393 error = sooptcopyin(sopt, &gsr, 1394 sizeof(struct group_source_req), 1395 sizeof(struct group_source_req)); 1396 if (error) 1397 return (error); 1398 1399 if (gsa->sin.sin_family != AF_INET || 1400 gsa->sin.sin_len != sizeof(struct sockaddr_in)) 1401 return (EINVAL); 1402 1403 if (ssa->sin.sin_family != AF_INET || 1404 ssa->sin.sin_len != sizeof(struct sockaddr_in)) 1405 return (EINVAL); 1406 1407 if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface) 1408 return (EADDRNOTAVAIL); 1409 1410 ifp = ifnet_byindex(gsr.gsr_interface); 1411 1412 if (sopt->sopt_name == MCAST_BLOCK_SOURCE) 1413 doblock = 1; 1414 break; 1415 1416 default: 1417 CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d", 1418 __func__, sopt->sopt_name); 1419 return (EOPNOTSUPP); 1420 break; 1421 } 1422 1423 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 1424 return (EINVAL); 1425 1426 /* 1427 * Check if we are actually a member of this group. 1428 */ 1429 imo = inp_findmoptions(inp); 1430 idx = imo_match_group(imo, ifp, &gsa->sa); 1431 if (idx == -1 || imo->imo_mfilters == NULL) { 1432 error = EADDRNOTAVAIL; 1433 goto out_inp_locked; 1434 } 1435 1436 KASSERT(imo->imo_mfilters != NULL, 1437 ("%s: imo_mfilters not allocated", __func__)); 1438 imf = &imo->imo_mfilters[idx]; 1439 inm = imo->imo_membership[idx]; 1440 1441 /* 1442 * Attempting to use the delta-based API on an 1443 * non exclusive-mode membership is an error. 1444 */ 1445 fmode = imf->imf_st[0]; 1446 if (fmode != MCAST_EXCLUDE) { 1447 error = EINVAL; 1448 goto out_inp_locked; 1449 } 1450 1451 /* 1452 * Deal with error cases up-front: 1453 * Asked to block, but already blocked; or 1454 * Asked to unblock, but nothing to unblock. 1455 * If adding a new block entry, allocate it. 1456 */ 1457 ims = imo_match_source(imo, idx, &ssa->sa); 1458 if ((ims != NULL && doblock) || (ims == NULL && !doblock)) { 1459 CTR3(KTR_IGMPV3, "%s: source %s %spresent", __func__, 1460 inet_ntoa(ssa->sin.sin_addr), doblock ? "" : "not "); 1461 error = EADDRNOTAVAIL; 1462 goto out_inp_locked; 1463 } 1464 1465 INP_WLOCK_ASSERT(inp); 1466 1467 /* 1468 * Begin state merge transaction at socket layer. 1469 */ 1470 if (doblock) { 1471 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block"); 1472 ims = imf_graft(imf, fmode, &ssa->sin); 1473 if (ims == NULL) 1474 error = ENOMEM; 1475 } else { 1476 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow"); 1477 error = imf_prune(imf, &ssa->sin); 1478 } 1479 1480 if (error) { 1481 CTR1(KTR_IGMPV3, "%s: merge imf state failed", __func__); 1482 goto out_imf_rollback; 1483 } 1484 1485 /* 1486 * Begin state merge transaction at IGMP layer. 1487 */ 1488 IN_MULTI_LOCK(); 1489 1490 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 1491 error = inm_merge(inm, imf); 1492 if (error) { 1493 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__); 1494 goto out_in_multi_locked; 1495 } 1496 1497 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 1498 error = igmp_change_state(inm); 1499 if (error) 1500 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__); 1501 1502 out_in_multi_locked: 1503 1504 IN_MULTI_UNLOCK(); 1505 1506 out_imf_rollback: 1507 if (error) 1508 imf_rollback(imf); 1509 else 1510 imf_commit(imf); 1511 1512 imf_reap(imf); 1513 1514 out_inp_locked: 1515 INP_WUNLOCK(inp); 1516 return (error); 1517 } 1518 1519 /* 1520 * Given an inpcb, return its multicast options structure pointer. Accepts 1521 * an unlocked inpcb pointer, but will return it locked. May sleep. 1522 * 1523 * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held. 1524 * SMPng: NOTE: Returns with the INP write lock held. 1525 */ 1526 static struct ip_moptions * 1527 inp_findmoptions(struct inpcb *inp) 1528 { 1529 struct ip_moptions *imo; 1530 struct in_multi **immp; 1531 struct in_mfilter *imfp; 1532 size_t idx; 1533 1534 INP_WLOCK(inp); 1535 if (inp->inp_moptions != NULL) 1536 return (inp->inp_moptions); 1537 1538 INP_WUNLOCK(inp); 1539 1540 imo = malloc(sizeof(*imo), M_IPMOPTS, M_WAITOK); 1541 immp = malloc(sizeof(*immp) * IP_MIN_MEMBERSHIPS, M_IPMOPTS, 1542 M_WAITOK | M_ZERO); 1543 imfp = malloc(sizeof(struct in_mfilter) * IP_MIN_MEMBERSHIPS, 1544 M_INMFILTER, M_WAITOK); 1545 1546 imo->imo_multicast_ifp = NULL; 1547 imo->imo_multicast_addr.s_addr = INADDR_ANY; 1548 imo->imo_multicast_vif = -1; 1549 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1550 imo->imo_multicast_loop = in_mcast_loop; 1551 imo->imo_num_memberships = 0; 1552 imo->imo_max_memberships = IP_MIN_MEMBERSHIPS; 1553 imo->imo_membership = immp; 1554 1555 /* Initialize per-group source filters. */ 1556 for (idx = 0; idx < IP_MIN_MEMBERSHIPS; idx++) 1557 imf_init(&imfp[idx], MCAST_UNDEFINED, MCAST_EXCLUDE); 1558 imo->imo_mfilters = imfp; 1559 1560 INP_WLOCK(inp); 1561 if (inp->inp_moptions != NULL) { 1562 free(imfp, M_INMFILTER); 1563 free(immp, M_IPMOPTS); 1564 free(imo, M_IPMOPTS); 1565 return (inp->inp_moptions); 1566 } 1567 inp->inp_moptions = imo; 1568 return (imo); 1569 } 1570 1571 /* 1572 * Discard the IP multicast options (and source filters). To minimize 1573 * the amount of work done while holding locks such as the INP's 1574 * pcbinfo lock (which is used in the receive path), the free 1575 * operation is performed asynchronously in a separate task. 1576 * 1577 * SMPng: NOTE: assumes INP write lock is held. 1578 */ 1579 void 1580 inp_freemoptions(struct ip_moptions *imo) 1581 { 1582 1583 KASSERT(imo != NULL, ("%s: ip_moptions is NULL", __func__)); 1584 IN_MULTI_LOCK(); 1585 STAILQ_INSERT_TAIL(&imo_gc_list, imo, imo_link); 1586 IN_MULTI_UNLOCK(); 1587 taskqueue_enqueue(taskqueue_thread, &imo_gc_task); 1588 } 1589 1590 static void 1591 inp_freemoptions_internal(struct ip_moptions *imo) 1592 { 1593 struct in_mfilter *imf; 1594 size_t idx, nmships; 1595 1596 nmships = imo->imo_num_memberships; 1597 for (idx = 0; idx < nmships; ++idx) { 1598 imf = imo->imo_mfilters ? &imo->imo_mfilters[idx] : NULL; 1599 if (imf) 1600 imf_leave(imf); 1601 (void)in_leavegroup(imo->imo_membership[idx], imf); 1602 if (imf) 1603 imf_purge(imf); 1604 } 1605 1606 if (imo->imo_mfilters) 1607 free(imo->imo_mfilters, M_INMFILTER); 1608 free(imo->imo_membership, M_IPMOPTS); 1609 free(imo, M_IPMOPTS); 1610 } 1611 1612 static void 1613 inp_gcmoptions(void *context, int pending) 1614 { 1615 struct ip_moptions *imo; 1616 1617 IN_MULTI_LOCK(); 1618 while (!STAILQ_EMPTY(&imo_gc_list)) { 1619 imo = STAILQ_FIRST(&imo_gc_list); 1620 STAILQ_REMOVE_HEAD(&imo_gc_list, imo_link); 1621 IN_MULTI_UNLOCK(); 1622 inp_freemoptions_internal(imo); 1623 IN_MULTI_LOCK(); 1624 } 1625 IN_MULTI_UNLOCK(); 1626 } 1627 1628 /* 1629 * Atomically get source filters on a socket for an IPv4 multicast group. 1630 * Called with INP lock held; returns with lock released. 1631 */ 1632 static int 1633 inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt) 1634 { 1635 struct __msfilterreq msfr; 1636 sockunion_t *gsa; 1637 struct ifnet *ifp; 1638 struct ip_moptions *imo; 1639 struct in_mfilter *imf; 1640 struct ip_msource *ims; 1641 struct in_msource *lims; 1642 struct sockaddr_in *psin; 1643 struct sockaddr_storage *ptss; 1644 struct sockaddr_storage *tss; 1645 int error; 1646 size_t idx, nsrcs, ncsrcs; 1647 1648 INP_WLOCK_ASSERT(inp); 1649 1650 imo = inp->inp_moptions; 1651 KASSERT(imo != NULL, ("%s: null ip_moptions", __func__)); 1652 1653 INP_WUNLOCK(inp); 1654 1655 error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq), 1656 sizeof(struct __msfilterreq)); 1657 if (error) 1658 return (error); 1659 1660 if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex) 1661 return (EINVAL); 1662 1663 ifp = ifnet_byindex(msfr.msfr_ifindex); 1664 if (ifp == NULL) 1665 return (EINVAL); 1666 1667 INP_WLOCK(inp); 1668 1669 /* 1670 * Lookup group on the socket. 1671 */ 1672 gsa = (sockunion_t *)&msfr.msfr_group; 1673 idx = imo_match_group(imo, ifp, &gsa->sa); 1674 if (idx == -1 || imo->imo_mfilters == NULL) { 1675 INP_WUNLOCK(inp); 1676 return (EADDRNOTAVAIL); 1677 } 1678 imf = &imo->imo_mfilters[idx]; 1679 1680 /* 1681 * Ignore memberships which are in limbo. 1682 */ 1683 if (imf->imf_st[1] == MCAST_UNDEFINED) { 1684 INP_WUNLOCK(inp); 1685 return (EAGAIN); 1686 } 1687 msfr.msfr_fmode = imf->imf_st[1]; 1688 1689 /* 1690 * If the user specified a buffer, copy out the source filter 1691 * entries to userland gracefully. 1692 * We only copy out the number of entries which userland 1693 * has asked for, but we always tell userland how big the 1694 * buffer really needs to be. 1695 */ 1696 if (msfr.msfr_nsrcs > in_mcast_maxsocksrc) 1697 msfr.msfr_nsrcs = in_mcast_maxsocksrc; 1698 tss = NULL; 1699 if (msfr.msfr_srcs != NULL && msfr.msfr_nsrcs > 0) { 1700 tss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs, 1701 M_TEMP, M_NOWAIT | M_ZERO); 1702 if (tss == NULL) { 1703 INP_WUNLOCK(inp); 1704 return (ENOBUFS); 1705 } 1706 } 1707 1708 /* 1709 * Count number of sources in-mode at t0. 1710 * If buffer space exists and remains, copy out source entries. 1711 */ 1712 nsrcs = msfr.msfr_nsrcs; 1713 ncsrcs = 0; 1714 ptss = tss; 1715 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 1716 lims = (struct in_msource *)ims; 1717 if (lims->imsl_st[0] == MCAST_UNDEFINED || 1718 lims->imsl_st[0] != imf->imf_st[0]) 1719 continue; 1720 ++ncsrcs; 1721 if (tss != NULL && nsrcs > 0) { 1722 psin = (struct sockaddr_in *)ptss; 1723 psin->sin_family = AF_INET; 1724 psin->sin_len = sizeof(struct sockaddr_in); 1725 psin->sin_addr.s_addr = htonl(lims->ims_haddr); 1726 psin->sin_port = 0; 1727 ++ptss; 1728 --nsrcs; 1729 } 1730 } 1731 1732 INP_WUNLOCK(inp); 1733 1734 if (tss != NULL) { 1735 error = copyout(tss, msfr.msfr_srcs, 1736 sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs); 1737 free(tss, M_TEMP); 1738 if (error) 1739 return (error); 1740 } 1741 1742 msfr.msfr_nsrcs = ncsrcs; 1743 error = sooptcopyout(sopt, &msfr, sizeof(struct __msfilterreq)); 1744 1745 return (error); 1746 } 1747 1748 /* 1749 * Return the IP multicast options in response to user getsockopt(). 1750 */ 1751 int 1752 inp_getmoptions(struct inpcb *inp, struct sockopt *sopt) 1753 { 1754 struct rm_priotracker in_ifa_tracker; 1755 struct ip_mreqn mreqn; 1756 struct ip_moptions *imo; 1757 struct ifnet *ifp; 1758 struct in_ifaddr *ia; 1759 int error, optval; 1760 u_char coptval; 1761 1762 INP_WLOCK(inp); 1763 imo = inp->inp_moptions; 1764 /* 1765 * If socket is neither of type SOCK_RAW or SOCK_DGRAM, 1766 * or is a divert socket, reject it. 1767 */ 1768 if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT || 1769 (inp->inp_socket->so_proto->pr_type != SOCK_RAW && 1770 inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) { 1771 INP_WUNLOCK(inp); 1772 return (EOPNOTSUPP); 1773 } 1774 1775 error = 0; 1776 switch (sopt->sopt_name) { 1777 case IP_MULTICAST_VIF: 1778 if (imo != NULL) 1779 optval = imo->imo_multicast_vif; 1780 else 1781 optval = -1; 1782 INP_WUNLOCK(inp); 1783 error = sooptcopyout(sopt, &optval, sizeof(int)); 1784 break; 1785 1786 case IP_MULTICAST_IF: 1787 memset(&mreqn, 0, sizeof(struct ip_mreqn)); 1788 if (imo != NULL) { 1789 ifp = imo->imo_multicast_ifp; 1790 if (!in_nullhost(imo->imo_multicast_addr)) { 1791 mreqn.imr_address = imo->imo_multicast_addr; 1792 } else if (ifp != NULL) { 1793 mreqn.imr_ifindex = ifp->if_index; 1794 IFP_TO_IA(ifp, ia, &in_ifa_tracker); 1795 if (ia != NULL) { 1796 mreqn.imr_address = 1797 IA_SIN(ia)->sin_addr; 1798 ifa_free(&ia->ia_ifa); 1799 } 1800 } 1801 } 1802 INP_WUNLOCK(inp); 1803 if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) { 1804 error = sooptcopyout(sopt, &mreqn, 1805 sizeof(struct ip_mreqn)); 1806 } else { 1807 error = sooptcopyout(sopt, &mreqn.imr_address, 1808 sizeof(struct in_addr)); 1809 } 1810 break; 1811 1812 case IP_MULTICAST_TTL: 1813 if (imo == NULL) 1814 optval = coptval = IP_DEFAULT_MULTICAST_TTL; 1815 else 1816 optval = coptval = imo->imo_multicast_ttl; 1817 INP_WUNLOCK(inp); 1818 if (sopt->sopt_valsize == sizeof(u_char)) 1819 error = sooptcopyout(sopt, &coptval, sizeof(u_char)); 1820 else 1821 error = sooptcopyout(sopt, &optval, sizeof(int)); 1822 break; 1823 1824 case IP_MULTICAST_LOOP: 1825 if (imo == NULL) 1826 optval = coptval = IP_DEFAULT_MULTICAST_LOOP; 1827 else 1828 optval = coptval = imo->imo_multicast_loop; 1829 INP_WUNLOCK(inp); 1830 if (sopt->sopt_valsize == sizeof(u_char)) 1831 error = sooptcopyout(sopt, &coptval, sizeof(u_char)); 1832 else 1833 error = sooptcopyout(sopt, &optval, sizeof(int)); 1834 break; 1835 1836 case IP_MSFILTER: 1837 if (imo == NULL) { 1838 error = EADDRNOTAVAIL; 1839 INP_WUNLOCK(inp); 1840 } else { 1841 error = inp_get_source_filters(inp, sopt); 1842 } 1843 break; 1844 1845 default: 1846 INP_WUNLOCK(inp); 1847 error = ENOPROTOOPT; 1848 break; 1849 } 1850 1851 INP_UNLOCK_ASSERT(inp); 1852 1853 return (error); 1854 } 1855 1856 /* 1857 * Look up the ifnet to use for a multicast group membership, 1858 * given the IPv4 address of an interface, and the IPv4 group address. 1859 * 1860 * This routine exists to support legacy multicast applications 1861 * which do not understand that multicast memberships are scoped to 1862 * specific physical links in the networking stack, or which need 1863 * to join link-scope groups before IPv4 addresses are configured. 1864 * 1865 * If inp is non-NULL, use this socket's current FIB number for any 1866 * required FIB lookup. 1867 * If ina is INADDR_ANY, look up the group address in the unicast FIB, 1868 * and use its ifp; usually, this points to the default next-hop. 1869 * 1870 * If the FIB lookup fails, attempt to use the first non-loopback 1871 * interface with multicast capability in the system as a 1872 * last resort. The legacy IPv4 ASM API requires that we do 1873 * this in order to allow groups to be joined when the routing 1874 * table has not yet been populated during boot. 1875 * 1876 * Returns NULL if no ifp could be found. 1877 * 1878 * SMPng: TODO: Acquire the appropriate locks for INADDR_TO_IFP. 1879 * FUTURE: Implement IPv4 source-address selection. 1880 */ 1881 static struct ifnet * 1882 inp_lookup_mcast_ifp(const struct inpcb *inp, 1883 const struct sockaddr_in *gsin, const struct in_addr ina) 1884 { 1885 struct rm_priotracker in_ifa_tracker; 1886 struct ifnet *ifp; 1887 struct nhop4_basic nh4; 1888 uint32_t fibnum; 1889 1890 KASSERT(gsin->sin_family == AF_INET, ("%s: not AF_INET", __func__)); 1891 KASSERT(IN_MULTICAST(ntohl(gsin->sin_addr.s_addr)), 1892 ("%s: not multicast", __func__)); 1893 1894 ifp = NULL; 1895 if (!in_nullhost(ina)) { 1896 INADDR_TO_IFP(ina, ifp); 1897 } else { 1898 fibnum = inp ? inp->inp_inc.inc_fibnum : 0; 1899 if (fib4_lookup_nh_basic(fibnum, gsin->sin_addr, 0, 0, &nh4)==0) 1900 ifp = nh4.nh_ifp; 1901 else { 1902 struct in_ifaddr *ia; 1903 struct ifnet *mifp; 1904 1905 mifp = NULL; 1906 IN_IFADDR_RLOCK(&in_ifa_tracker); 1907 TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { 1908 mifp = ia->ia_ifp; 1909 if (!(mifp->if_flags & IFF_LOOPBACK) && 1910 (mifp->if_flags & IFF_MULTICAST)) { 1911 ifp = mifp; 1912 break; 1913 } 1914 } 1915 IN_IFADDR_RUNLOCK(&in_ifa_tracker); 1916 } 1917 } 1918 1919 return (ifp); 1920 } 1921 1922 /* 1923 * Join an IPv4 multicast group, possibly with a source. 1924 */ 1925 static int 1926 inp_join_group(struct inpcb *inp, struct sockopt *sopt) 1927 { 1928 struct group_source_req gsr; 1929 sockunion_t *gsa, *ssa; 1930 struct ifnet *ifp; 1931 struct in_mfilter *imf; 1932 struct ip_moptions *imo; 1933 struct in_multi *inm; 1934 struct in_msource *lims; 1935 size_t idx; 1936 int error, is_new; 1937 1938 ifp = NULL; 1939 imf = NULL; 1940 lims = NULL; 1941 error = 0; 1942 is_new = 0; 1943 1944 memset(&gsr, 0, sizeof(struct group_source_req)); 1945 gsa = (sockunion_t *)&gsr.gsr_group; 1946 gsa->ss.ss_family = AF_UNSPEC; 1947 ssa = (sockunion_t *)&gsr.gsr_source; 1948 ssa->ss.ss_family = AF_UNSPEC; 1949 1950 switch (sopt->sopt_name) { 1951 case IP_ADD_MEMBERSHIP: 1952 case IP_ADD_SOURCE_MEMBERSHIP: { 1953 struct ip_mreq_source mreqs; 1954 1955 if (sopt->sopt_name == IP_ADD_MEMBERSHIP) { 1956 error = sooptcopyin(sopt, &mreqs, 1957 sizeof(struct ip_mreq), 1958 sizeof(struct ip_mreq)); 1959 /* 1960 * Do argument switcharoo from ip_mreq into 1961 * ip_mreq_source to avoid using two instances. 1962 */ 1963 mreqs.imr_interface = mreqs.imr_sourceaddr; 1964 mreqs.imr_sourceaddr.s_addr = INADDR_ANY; 1965 } else if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) { 1966 error = sooptcopyin(sopt, &mreqs, 1967 sizeof(struct ip_mreq_source), 1968 sizeof(struct ip_mreq_source)); 1969 } 1970 if (error) 1971 return (error); 1972 1973 gsa->sin.sin_family = AF_INET; 1974 gsa->sin.sin_len = sizeof(struct sockaddr_in); 1975 gsa->sin.sin_addr = mreqs.imr_multiaddr; 1976 1977 if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) { 1978 ssa->sin.sin_family = AF_INET; 1979 ssa->sin.sin_len = sizeof(struct sockaddr_in); 1980 ssa->sin.sin_addr = mreqs.imr_sourceaddr; 1981 } 1982 1983 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 1984 return (EINVAL); 1985 1986 ifp = inp_lookup_mcast_ifp(inp, &gsa->sin, 1987 mreqs.imr_interface); 1988 CTR3(KTR_IGMPV3, "%s: imr_interface = %s, ifp = %p", 1989 __func__, inet_ntoa(mreqs.imr_interface), ifp); 1990 break; 1991 } 1992 1993 case MCAST_JOIN_GROUP: 1994 case MCAST_JOIN_SOURCE_GROUP: 1995 if (sopt->sopt_name == MCAST_JOIN_GROUP) { 1996 error = sooptcopyin(sopt, &gsr, 1997 sizeof(struct group_req), 1998 sizeof(struct group_req)); 1999 } else if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) { 2000 error = sooptcopyin(sopt, &gsr, 2001 sizeof(struct group_source_req), 2002 sizeof(struct group_source_req)); 2003 } 2004 if (error) 2005 return (error); 2006 2007 if (gsa->sin.sin_family != AF_INET || 2008 gsa->sin.sin_len != sizeof(struct sockaddr_in)) 2009 return (EINVAL); 2010 2011 /* 2012 * Overwrite the port field if present, as the sockaddr 2013 * being copied in may be matched with a binary comparison. 2014 */ 2015 gsa->sin.sin_port = 0; 2016 if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) { 2017 if (ssa->sin.sin_family != AF_INET || 2018 ssa->sin.sin_len != sizeof(struct sockaddr_in)) 2019 return (EINVAL); 2020 ssa->sin.sin_port = 0; 2021 } 2022 2023 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2024 return (EINVAL); 2025 2026 if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface) 2027 return (EADDRNOTAVAIL); 2028 ifp = ifnet_byindex(gsr.gsr_interface); 2029 break; 2030 2031 default: 2032 CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d", 2033 __func__, sopt->sopt_name); 2034 return (EOPNOTSUPP); 2035 break; 2036 } 2037 2038 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) 2039 return (EADDRNOTAVAIL); 2040 2041 imo = inp_findmoptions(inp); 2042 idx = imo_match_group(imo, ifp, &gsa->sa); 2043 if (idx == -1) { 2044 is_new = 1; 2045 } else { 2046 inm = imo->imo_membership[idx]; 2047 imf = &imo->imo_mfilters[idx]; 2048 if (ssa->ss.ss_family != AF_UNSPEC) { 2049 /* 2050 * MCAST_JOIN_SOURCE_GROUP on an exclusive membership 2051 * is an error. On an existing inclusive membership, 2052 * it just adds the source to the filter list. 2053 */ 2054 if (imf->imf_st[1] != MCAST_INCLUDE) { 2055 error = EINVAL; 2056 goto out_inp_locked; 2057 } 2058 /* 2059 * Throw out duplicates. 2060 * 2061 * XXX FIXME: This makes a naive assumption that 2062 * even if entries exist for *ssa in this imf, 2063 * they will be rejected as dupes, even if they 2064 * are not valid in the current mode (in-mode). 2065 * 2066 * in_msource is transactioned just as for anything 2067 * else in SSM -- but note naive use of inm_graft() 2068 * below for allocating new filter entries. 2069 * 2070 * This is only an issue if someone mixes the 2071 * full-state SSM API with the delta-based API, 2072 * which is discouraged in the relevant RFCs. 2073 */ 2074 lims = imo_match_source(imo, idx, &ssa->sa); 2075 if (lims != NULL /*&& 2076 lims->imsl_st[1] == MCAST_INCLUDE*/) { 2077 error = EADDRNOTAVAIL; 2078 goto out_inp_locked; 2079 } 2080 } else { 2081 /* 2082 * MCAST_JOIN_GROUP on an existing exclusive 2083 * membership is an error; return EADDRINUSE 2084 * to preserve 4.4BSD API idempotence, and 2085 * avoid tedious detour to code below. 2086 * NOTE: This is bending RFC 3678 a bit. 2087 * 2088 * On an existing inclusive membership, this is also 2089 * an error; if you want to change filter mode, 2090 * you must use the userland API setsourcefilter(). 2091 * XXX We don't reject this for imf in UNDEFINED 2092 * state at t1, because allocation of a filter 2093 * is atomic with allocation of a membership. 2094 */ 2095 error = EINVAL; 2096 if (imf->imf_st[1] == MCAST_EXCLUDE) 2097 error = EADDRINUSE; 2098 goto out_inp_locked; 2099 } 2100 } 2101 2102 /* 2103 * Begin state merge transaction at socket layer. 2104 */ 2105 INP_WLOCK_ASSERT(inp); 2106 2107 if (is_new) { 2108 if (imo->imo_num_memberships == imo->imo_max_memberships) { 2109 error = imo_grow(imo); 2110 if (error) 2111 goto out_inp_locked; 2112 } 2113 /* 2114 * Allocate the new slot upfront so we can deal with 2115 * grafting the new source filter in same code path 2116 * as for join-source on existing membership. 2117 */ 2118 idx = imo->imo_num_memberships; 2119 imo->imo_membership[idx] = NULL; 2120 imo->imo_num_memberships++; 2121 KASSERT(imo->imo_mfilters != NULL, 2122 ("%s: imf_mfilters vector was not allocated", __func__)); 2123 imf = &imo->imo_mfilters[idx]; 2124 KASSERT(RB_EMPTY(&imf->imf_sources), 2125 ("%s: imf_sources not empty", __func__)); 2126 } 2127 2128 /* 2129 * Graft new source into filter list for this inpcb's 2130 * membership of the group. The in_multi may not have 2131 * been allocated yet if this is a new membership, however, 2132 * the in_mfilter slot will be allocated and must be initialized. 2133 * 2134 * Note: Grafting of exclusive mode filters doesn't happen 2135 * in this path. 2136 * XXX: Should check for non-NULL lims (node exists but may 2137 * not be in-mode) for interop with full-state API. 2138 */ 2139 if (ssa->ss.ss_family != AF_UNSPEC) { 2140 /* Membership starts in IN mode */ 2141 if (is_new) { 2142 CTR1(KTR_IGMPV3, "%s: new join w/source", __func__); 2143 imf_init(imf, MCAST_UNDEFINED, MCAST_INCLUDE); 2144 } else { 2145 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow"); 2146 } 2147 lims = imf_graft(imf, MCAST_INCLUDE, &ssa->sin); 2148 if (lims == NULL) { 2149 CTR1(KTR_IGMPV3, "%s: merge imf state failed", 2150 __func__); 2151 error = ENOMEM; 2152 goto out_imo_free; 2153 } 2154 } else { 2155 /* No address specified; Membership starts in EX mode */ 2156 if (is_new) { 2157 CTR1(KTR_IGMPV3, "%s: new join w/o source", __func__); 2158 imf_init(imf, MCAST_UNDEFINED, MCAST_EXCLUDE); 2159 } 2160 } 2161 2162 /* 2163 * Begin state merge transaction at IGMP layer. 2164 */ 2165 IN_MULTI_LOCK(); 2166 2167 if (is_new) { 2168 error = in_joingroup_locked(ifp, &gsa->sin.sin_addr, imf, 2169 &inm); 2170 if (error) { 2171 CTR1(KTR_IGMPV3, "%s: in_joingroup_locked failed", 2172 __func__); 2173 IN_MULTI_UNLOCK(); 2174 goto out_imo_free; 2175 } 2176 imo->imo_membership[idx] = inm; 2177 } else { 2178 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 2179 error = inm_merge(inm, imf); 2180 if (error) { 2181 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", 2182 __func__); 2183 goto out_in_multi_locked; 2184 } 2185 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 2186 error = igmp_change_state(inm); 2187 if (error) { 2188 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", 2189 __func__); 2190 goto out_in_multi_locked; 2191 } 2192 } 2193 2194 out_in_multi_locked: 2195 2196 IN_MULTI_UNLOCK(); 2197 2198 INP_WLOCK_ASSERT(inp); 2199 if (error) { 2200 imf_rollback(imf); 2201 if (is_new) 2202 imf_purge(imf); 2203 else 2204 imf_reap(imf); 2205 } else { 2206 imf_commit(imf); 2207 } 2208 2209 out_imo_free: 2210 if (error && is_new) { 2211 imo->imo_membership[idx] = NULL; 2212 --imo->imo_num_memberships; 2213 } 2214 2215 out_inp_locked: 2216 INP_WUNLOCK(inp); 2217 return (error); 2218 } 2219 2220 /* 2221 * Leave an IPv4 multicast group on an inpcb, possibly with a source. 2222 */ 2223 static int 2224 inp_leave_group(struct inpcb *inp, struct sockopt *sopt) 2225 { 2226 struct group_source_req gsr; 2227 struct ip_mreq_source mreqs; 2228 sockunion_t *gsa, *ssa; 2229 struct ifnet *ifp; 2230 struct in_mfilter *imf; 2231 struct ip_moptions *imo; 2232 struct in_msource *ims; 2233 struct in_multi *inm; 2234 size_t idx; 2235 int error, is_final; 2236 2237 ifp = NULL; 2238 error = 0; 2239 is_final = 1; 2240 2241 memset(&gsr, 0, sizeof(struct group_source_req)); 2242 gsa = (sockunion_t *)&gsr.gsr_group; 2243 gsa->ss.ss_family = AF_UNSPEC; 2244 ssa = (sockunion_t *)&gsr.gsr_source; 2245 ssa->ss.ss_family = AF_UNSPEC; 2246 2247 switch (sopt->sopt_name) { 2248 case IP_DROP_MEMBERSHIP: 2249 case IP_DROP_SOURCE_MEMBERSHIP: 2250 if (sopt->sopt_name == IP_DROP_MEMBERSHIP) { 2251 error = sooptcopyin(sopt, &mreqs, 2252 sizeof(struct ip_mreq), 2253 sizeof(struct ip_mreq)); 2254 /* 2255 * Swap interface and sourceaddr arguments, 2256 * as ip_mreq and ip_mreq_source are laid 2257 * out differently. 2258 */ 2259 mreqs.imr_interface = mreqs.imr_sourceaddr; 2260 mreqs.imr_sourceaddr.s_addr = INADDR_ANY; 2261 } else if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) { 2262 error = sooptcopyin(sopt, &mreqs, 2263 sizeof(struct ip_mreq_source), 2264 sizeof(struct ip_mreq_source)); 2265 } 2266 if (error) 2267 return (error); 2268 2269 gsa->sin.sin_family = AF_INET; 2270 gsa->sin.sin_len = sizeof(struct sockaddr_in); 2271 gsa->sin.sin_addr = mreqs.imr_multiaddr; 2272 2273 if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) { 2274 ssa->sin.sin_family = AF_INET; 2275 ssa->sin.sin_len = sizeof(struct sockaddr_in); 2276 ssa->sin.sin_addr = mreqs.imr_sourceaddr; 2277 } 2278 2279 /* 2280 * Attempt to look up hinted ifp from interface address. 2281 * Fallthrough with null ifp iff lookup fails, to 2282 * preserve 4.4BSD mcast API idempotence. 2283 * XXX NOTE WELL: The RFC 3678 API is preferred because 2284 * using an IPv4 address as a key is racy. 2285 */ 2286 if (!in_nullhost(mreqs.imr_interface)) 2287 INADDR_TO_IFP(mreqs.imr_interface, ifp); 2288 2289 CTR3(KTR_IGMPV3, "%s: imr_interface = %s, ifp = %p", 2290 __func__, inet_ntoa(mreqs.imr_interface), ifp); 2291 2292 break; 2293 2294 case MCAST_LEAVE_GROUP: 2295 case MCAST_LEAVE_SOURCE_GROUP: 2296 if (sopt->sopt_name == MCAST_LEAVE_GROUP) { 2297 error = sooptcopyin(sopt, &gsr, 2298 sizeof(struct group_req), 2299 sizeof(struct group_req)); 2300 } else if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) { 2301 error = sooptcopyin(sopt, &gsr, 2302 sizeof(struct group_source_req), 2303 sizeof(struct group_source_req)); 2304 } 2305 if (error) 2306 return (error); 2307 2308 if (gsa->sin.sin_family != AF_INET || 2309 gsa->sin.sin_len != sizeof(struct sockaddr_in)) 2310 return (EINVAL); 2311 2312 if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) { 2313 if (ssa->sin.sin_family != AF_INET || 2314 ssa->sin.sin_len != sizeof(struct sockaddr_in)) 2315 return (EINVAL); 2316 } 2317 2318 if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface) 2319 return (EADDRNOTAVAIL); 2320 2321 ifp = ifnet_byindex(gsr.gsr_interface); 2322 2323 if (ifp == NULL) 2324 return (EADDRNOTAVAIL); 2325 break; 2326 2327 default: 2328 CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d", 2329 __func__, sopt->sopt_name); 2330 return (EOPNOTSUPP); 2331 break; 2332 } 2333 2334 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2335 return (EINVAL); 2336 2337 /* 2338 * Find the membership in the membership array. 2339 */ 2340 imo = inp_findmoptions(inp); 2341 idx = imo_match_group(imo, ifp, &gsa->sa); 2342 if (idx == -1) { 2343 error = EADDRNOTAVAIL; 2344 goto out_inp_locked; 2345 } 2346 inm = imo->imo_membership[idx]; 2347 imf = &imo->imo_mfilters[idx]; 2348 2349 if (ssa->ss.ss_family != AF_UNSPEC) 2350 is_final = 0; 2351 2352 /* 2353 * Begin state merge transaction at socket layer. 2354 */ 2355 INP_WLOCK_ASSERT(inp); 2356 2357 /* 2358 * If we were instructed only to leave a given source, do so. 2359 * MCAST_LEAVE_SOURCE_GROUP is only valid for inclusive memberships. 2360 */ 2361 if (is_final) { 2362 imf_leave(imf); 2363 } else { 2364 if (imf->imf_st[0] == MCAST_EXCLUDE) { 2365 error = EADDRNOTAVAIL; 2366 goto out_inp_locked; 2367 } 2368 ims = imo_match_source(imo, idx, &ssa->sa); 2369 if (ims == NULL) { 2370 CTR3(KTR_IGMPV3, "%s: source %s %spresent", __func__, 2371 inet_ntoa(ssa->sin.sin_addr), "not "); 2372 error = EADDRNOTAVAIL; 2373 goto out_inp_locked; 2374 } 2375 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block"); 2376 error = imf_prune(imf, &ssa->sin); 2377 if (error) { 2378 CTR1(KTR_IGMPV3, "%s: merge imf state failed", 2379 __func__); 2380 goto out_inp_locked; 2381 } 2382 } 2383 2384 /* 2385 * Begin state merge transaction at IGMP layer. 2386 */ 2387 IN_MULTI_LOCK(); 2388 2389 if (is_final) { 2390 /* 2391 * Give up the multicast address record to which 2392 * the membership points. 2393 */ 2394 (void)in_leavegroup_locked(inm, imf); 2395 } else { 2396 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 2397 error = inm_merge(inm, imf); 2398 if (error) { 2399 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", 2400 __func__); 2401 goto out_in_multi_locked; 2402 } 2403 2404 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 2405 error = igmp_change_state(inm); 2406 if (error) { 2407 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", 2408 __func__); 2409 } 2410 } 2411 2412 out_in_multi_locked: 2413 2414 IN_MULTI_UNLOCK(); 2415 2416 if (error) 2417 imf_rollback(imf); 2418 else 2419 imf_commit(imf); 2420 2421 imf_reap(imf); 2422 2423 if (is_final) { 2424 /* Remove the gap in the membership and filter array. */ 2425 for (++idx; idx < imo->imo_num_memberships; ++idx) { 2426 imo->imo_membership[idx-1] = imo->imo_membership[idx]; 2427 imo->imo_mfilters[idx-1] = imo->imo_mfilters[idx]; 2428 } 2429 imo->imo_num_memberships--; 2430 } 2431 2432 out_inp_locked: 2433 INP_WUNLOCK(inp); 2434 return (error); 2435 } 2436 2437 /* 2438 * Select the interface for transmitting IPv4 multicast datagrams. 2439 * 2440 * Either an instance of struct in_addr or an instance of struct ip_mreqn 2441 * may be passed to this socket option. An address of INADDR_ANY or an 2442 * interface index of 0 is used to remove a previous selection. 2443 * When no interface is selected, one is chosen for every send. 2444 */ 2445 static int 2446 inp_set_multicast_if(struct inpcb *inp, struct sockopt *sopt) 2447 { 2448 struct in_addr addr; 2449 struct ip_mreqn mreqn; 2450 struct ifnet *ifp; 2451 struct ip_moptions *imo; 2452 int error; 2453 2454 if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) { 2455 /* 2456 * An interface index was specified using the 2457 * Linux-derived ip_mreqn structure. 2458 */ 2459 error = sooptcopyin(sopt, &mreqn, sizeof(struct ip_mreqn), 2460 sizeof(struct ip_mreqn)); 2461 if (error) 2462 return (error); 2463 2464 if (mreqn.imr_ifindex < 0 || V_if_index < mreqn.imr_ifindex) 2465 return (EINVAL); 2466 2467 if (mreqn.imr_ifindex == 0) { 2468 ifp = NULL; 2469 } else { 2470 ifp = ifnet_byindex(mreqn.imr_ifindex); 2471 if (ifp == NULL) 2472 return (EADDRNOTAVAIL); 2473 } 2474 } else { 2475 /* 2476 * An interface was specified by IPv4 address. 2477 * This is the traditional BSD usage. 2478 */ 2479 error = sooptcopyin(sopt, &addr, sizeof(struct in_addr), 2480 sizeof(struct in_addr)); 2481 if (error) 2482 return (error); 2483 if (in_nullhost(addr)) { 2484 ifp = NULL; 2485 } else { 2486 INADDR_TO_IFP(addr, ifp); 2487 if (ifp == NULL) 2488 return (EADDRNOTAVAIL); 2489 } 2490 CTR3(KTR_IGMPV3, "%s: ifp = %p, addr = %s", __func__, ifp, 2491 inet_ntoa(addr)); 2492 } 2493 2494 /* Reject interfaces which do not support multicast. */ 2495 if (ifp != NULL && (ifp->if_flags & IFF_MULTICAST) == 0) 2496 return (EOPNOTSUPP); 2497 2498 imo = inp_findmoptions(inp); 2499 imo->imo_multicast_ifp = ifp; 2500 imo->imo_multicast_addr.s_addr = INADDR_ANY; 2501 INP_WUNLOCK(inp); 2502 2503 return (0); 2504 } 2505 2506 /* 2507 * Atomically set source filters on a socket for an IPv4 multicast group. 2508 * 2509 * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held. 2510 */ 2511 static int 2512 inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt) 2513 { 2514 struct __msfilterreq msfr; 2515 sockunion_t *gsa; 2516 struct ifnet *ifp; 2517 struct in_mfilter *imf; 2518 struct ip_moptions *imo; 2519 struct in_multi *inm; 2520 size_t idx; 2521 int error; 2522 2523 error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq), 2524 sizeof(struct __msfilterreq)); 2525 if (error) 2526 return (error); 2527 2528 if (msfr.msfr_nsrcs > in_mcast_maxsocksrc) 2529 return (ENOBUFS); 2530 2531 if ((msfr.msfr_fmode != MCAST_EXCLUDE && 2532 msfr.msfr_fmode != MCAST_INCLUDE)) 2533 return (EINVAL); 2534 2535 if (msfr.msfr_group.ss_family != AF_INET || 2536 msfr.msfr_group.ss_len != sizeof(struct sockaddr_in)) 2537 return (EINVAL); 2538 2539 gsa = (sockunion_t *)&msfr.msfr_group; 2540 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2541 return (EINVAL); 2542 2543 gsa->sin.sin_port = 0; /* ignore port */ 2544 2545 if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex) 2546 return (EADDRNOTAVAIL); 2547 2548 ifp = ifnet_byindex(msfr.msfr_ifindex); 2549 if (ifp == NULL) 2550 return (EADDRNOTAVAIL); 2551 2552 /* 2553 * Take the INP write lock. 2554 * Check if this socket is a member of this group. 2555 */ 2556 imo = inp_findmoptions(inp); 2557 idx = imo_match_group(imo, ifp, &gsa->sa); 2558 if (idx == -1 || imo->imo_mfilters == NULL) { 2559 error = EADDRNOTAVAIL; 2560 goto out_inp_locked; 2561 } 2562 inm = imo->imo_membership[idx]; 2563 imf = &imo->imo_mfilters[idx]; 2564 2565 /* 2566 * Begin state merge transaction at socket layer. 2567 */ 2568 INP_WLOCK_ASSERT(inp); 2569 2570 imf->imf_st[1] = msfr.msfr_fmode; 2571 2572 /* 2573 * Apply any new source filters, if present. 2574 * Make a copy of the user-space source vector so 2575 * that we may copy them with a single copyin. This 2576 * allows us to deal with page faults up-front. 2577 */ 2578 if (msfr.msfr_nsrcs > 0) { 2579 struct in_msource *lims; 2580 struct sockaddr_in *psin; 2581 struct sockaddr_storage *kss, *pkss; 2582 int i; 2583 2584 INP_WUNLOCK(inp); 2585 2586 CTR2(KTR_IGMPV3, "%s: loading %lu source list entries", 2587 __func__, (unsigned long)msfr.msfr_nsrcs); 2588 kss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs, 2589 M_TEMP, M_WAITOK); 2590 error = copyin(msfr.msfr_srcs, kss, 2591 sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs); 2592 if (error) { 2593 free(kss, M_TEMP); 2594 return (error); 2595 } 2596 2597 INP_WLOCK(inp); 2598 2599 /* 2600 * Mark all source filters as UNDEFINED at t1. 2601 * Restore new group filter mode, as imf_leave() 2602 * will set it to INCLUDE. 2603 */ 2604 imf_leave(imf); 2605 imf->imf_st[1] = msfr.msfr_fmode; 2606 2607 /* 2608 * Update socket layer filters at t1, lazy-allocating 2609 * new entries. This saves a bunch of memory at the 2610 * cost of one RB_FIND() per source entry; duplicate 2611 * entries in the msfr_nsrcs vector are ignored. 2612 * If we encounter an error, rollback transaction. 2613 * 2614 * XXX This too could be replaced with a set-symmetric 2615 * difference like loop to avoid walking from root 2616 * every time, as the key space is common. 2617 */ 2618 for (i = 0, pkss = kss; i < msfr.msfr_nsrcs; i++, pkss++) { 2619 psin = (struct sockaddr_in *)pkss; 2620 if (psin->sin_family != AF_INET) { 2621 error = EAFNOSUPPORT; 2622 break; 2623 } 2624 if (psin->sin_len != sizeof(struct sockaddr_in)) { 2625 error = EINVAL; 2626 break; 2627 } 2628 error = imf_get_source(imf, psin, &lims); 2629 if (error) 2630 break; 2631 lims->imsl_st[1] = imf->imf_st[1]; 2632 } 2633 free(kss, M_TEMP); 2634 } 2635 2636 if (error) 2637 goto out_imf_rollback; 2638 2639 INP_WLOCK_ASSERT(inp); 2640 IN_MULTI_LOCK(); 2641 2642 /* 2643 * Begin state merge transaction at IGMP layer. 2644 */ 2645 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 2646 error = inm_merge(inm, imf); 2647 if (error) { 2648 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__); 2649 goto out_in_multi_locked; 2650 } 2651 2652 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 2653 error = igmp_change_state(inm); 2654 if (error) 2655 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__); 2656 2657 out_in_multi_locked: 2658 2659 IN_MULTI_UNLOCK(); 2660 2661 out_imf_rollback: 2662 if (error) 2663 imf_rollback(imf); 2664 else 2665 imf_commit(imf); 2666 2667 imf_reap(imf); 2668 2669 out_inp_locked: 2670 INP_WUNLOCK(inp); 2671 return (error); 2672 } 2673 2674 /* 2675 * Set the IP multicast options in response to user setsockopt(). 2676 * 2677 * Many of the socket options handled in this function duplicate the 2678 * functionality of socket options in the regular unicast API. However, 2679 * it is not possible to merge the duplicate code, because the idempotence 2680 * of the IPv4 multicast part of the BSD Sockets API must be preserved; 2681 * the effects of these options must be treated as separate and distinct. 2682 * 2683 * SMPng: XXX: Unlocked read of inp_socket believed OK. 2684 * FUTURE: The IP_MULTICAST_VIF option may be eliminated if MROUTING 2685 * is refactored to no longer use vifs. 2686 */ 2687 int 2688 inp_setmoptions(struct inpcb *inp, struct sockopt *sopt) 2689 { 2690 struct ip_moptions *imo; 2691 int error; 2692 2693 error = 0; 2694 2695 /* 2696 * If socket is neither of type SOCK_RAW or SOCK_DGRAM, 2697 * or is a divert socket, reject it. 2698 */ 2699 if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT || 2700 (inp->inp_socket->so_proto->pr_type != SOCK_RAW && 2701 inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) 2702 return (EOPNOTSUPP); 2703 2704 switch (sopt->sopt_name) { 2705 case IP_MULTICAST_VIF: { 2706 int vifi; 2707 /* 2708 * Select a multicast VIF for transmission. 2709 * Only useful if multicast forwarding is active. 2710 */ 2711 if (legal_vif_num == NULL) { 2712 error = EOPNOTSUPP; 2713 break; 2714 } 2715 error = sooptcopyin(sopt, &vifi, sizeof(int), sizeof(int)); 2716 if (error) 2717 break; 2718 if (!legal_vif_num(vifi) && (vifi != -1)) { 2719 error = EINVAL; 2720 break; 2721 } 2722 imo = inp_findmoptions(inp); 2723 imo->imo_multicast_vif = vifi; 2724 INP_WUNLOCK(inp); 2725 break; 2726 } 2727 2728 case IP_MULTICAST_IF: 2729 error = inp_set_multicast_if(inp, sopt); 2730 break; 2731 2732 case IP_MULTICAST_TTL: { 2733 u_char ttl; 2734 2735 /* 2736 * Set the IP time-to-live for outgoing multicast packets. 2737 * The original multicast API required a char argument, 2738 * which is inconsistent with the rest of the socket API. 2739 * We allow either a char or an int. 2740 */ 2741 if (sopt->sopt_valsize == sizeof(u_char)) { 2742 error = sooptcopyin(sopt, &ttl, sizeof(u_char), 2743 sizeof(u_char)); 2744 if (error) 2745 break; 2746 } else { 2747 u_int ittl; 2748 2749 error = sooptcopyin(sopt, &ittl, sizeof(u_int), 2750 sizeof(u_int)); 2751 if (error) 2752 break; 2753 if (ittl > 255) { 2754 error = EINVAL; 2755 break; 2756 } 2757 ttl = (u_char)ittl; 2758 } 2759 imo = inp_findmoptions(inp); 2760 imo->imo_multicast_ttl = ttl; 2761 INP_WUNLOCK(inp); 2762 break; 2763 } 2764 2765 case IP_MULTICAST_LOOP: { 2766 u_char loop; 2767 2768 /* 2769 * Set the loopback flag for outgoing multicast packets. 2770 * Must be zero or one. The original multicast API required a 2771 * char argument, which is inconsistent with the rest 2772 * of the socket API. We allow either a char or an int. 2773 */ 2774 if (sopt->sopt_valsize == sizeof(u_char)) { 2775 error = sooptcopyin(sopt, &loop, sizeof(u_char), 2776 sizeof(u_char)); 2777 if (error) 2778 break; 2779 } else { 2780 u_int iloop; 2781 2782 error = sooptcopyin(sopt, &iloop, sizeof(u_int), 2783 sizeof(u_int)); 2784 if (error) 2785 break; 2786 loop = (u_char)iloop; 2787 } 2788 imo = inp_findmoptions(inp); 2789 imo->imo_multicast_loop = !!loop; 2790 INP_WUNLOCK(inp); 2791 break; 2792 } 2793 2794 case IP_ADD_MEMBERSHIP: 2795 case IP_ADD_SOURCE_MEMBERSHIP: 2796 case MCAST_JOIN_GROUP: 2797 case MCAST_JOIN_SOURCE_GROUP: 2798 error = inp_join_group(inp, sopt); 2799 break; 2800 2801 case IP_DROP_MEMBERSHIP: 2802 case IP_DROP_SOURCE_MEMBERSHIP: 2803 case MCAST_LEAVE_GROUP: 2804 case MCAST_LEAVE_SOURCE_GROUP: 2805 error = inp_leave_group(inp, sopt); 2806 break; 2807 2808 case IP_BLOCK_SOURCE: 2809 case IP_UNBLOCK_SOURCE: 2810 case MCAST_BLOCK_SOURCE: 2811 case MCAST_UNBLOCK_SOURCE: 2812 error = inp_block_unblock_source(inp, sopt); 2813 break; 2814 2815 case IP_MSFILTER: 2816 error = inp_set_source_filters(inp, sopt); 2817 break; 2818 2819 default: 2820 error = EOPNOTSUPP; 2821 break; 2822 } 2823 2824 INP_UNLOCK_ASSERT(inp); 2825 2826 return (error); 2827 } 2828 2829 /* 2830 * Expose IGMP's multicast filter mode and source list(s) to userland, 2831 * keyed by (ifindex, group). 2832 * The filter mode is written out as a uint32_t, followed by 2833 * 0..n of struct in_addr. 2834 * For use by ifmcstat(8). 2835 * SMPng: NOTE: unlocked read of ifindex space. 2836 */ 2837 static int 2838 sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS) 2839 { 2840 struct in_addr src, group; 2841 struct ifnet *ifp; 2842 struct ifmultiaddr *ifma; 2843 struct in_multi *inm; 2844 struct ip_msource *ims; 2845 int *name; 2846 int retval; 2847 u_int namelen; 2848 uint32_t fmode, ifindex; 2849 2850 name = (int *)arg1; 2851 namelen = arg2; 2852 2853 if (req->newptr != NULL) 2854 return (EPERM); 2855 2856 if (namelen != 2) 2857 return (EINVAL); 2858 2859 ifindex = name[0]; 2860 if (ifindex <= 0 || ifindex > V_if_index) { 2861 CTR2(KTR_IGMPV3, "%s: ifindex %u out of range", 2862 __func__, ifindex); 2863 return (ENOENT); 2864 } 2865 2866 group.s_addr = name[1]; 2867 if (!IN_MULTICAST(ntohl(group.s_addr))) { 2868 CTR2(KTR_IGMPV3, "%s: group %s is not multicast", 2869 __func__, inet_ntoa(group)); 2870 return (EINVAL); 2871 } 2872 2873 ifp = ifnet_byindex(ifindex); 2874 if (ifp == NULL) { 2875 CTR2(KTR_IGMPV3, "%s: no ifp for ifindex %u", 2876 __func__, ifindex); 2877 return (ENOENT); 2878 } 2879 2880 retval = sysctl_wire_old_buffer(req, 2881 sizeof(uint32_t) + (in_mcast_maxgrpsrc * sizeof(struct in_addr))); 2882 if (retval) 2883 return (retval); 2884 2885 IN_MULTI_LOCK(); 2886 2887 IF_ADDR_RLOCK(ifp); 2888 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 2889 if (ifma->ifma_addr->sa_family != AF_INET || 2890 ifma->ifma_protospec == NULL) 2891 continue; 2892 inm = (struct in_multi *)ifma->ifma_protospec; 2893 if (!in_hosteq(inm->inm_addr, group)) 2894 continue; 2895 fmode = inm->inm_st[1].iss_fmode; 2896 retval = SYSCTL_OUT(req, &fmode, sizeof(uint32_t)); 2897 if (retval != 0) 2898 break; 2899 RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) { 2900 #ifdef KTR 2901 struct in_addr ina; 2902 ina.s_addr = htonl(ims->ims_haddr); 2903 CTR2(KTR_IGMPV3, "%s: visit node %s", __func__, 2904 inet_ntoa(ina)); 2905 #endif 2906 /* 2907 * Only copy-out sources which are in-mode. 2908 */ 2909 if (fmode != ims_get_mode(inm, ims, 1)) { 2910 CTR1(KTR_IGMPV3, "%s: skip non-in-mode", 2911 __func__); 2912 continue; 2913 } 2914 src.s_addr = htonl(ims->ims_haddr); 2915 retval = SYSCTL_OUT(req, &src, sizeof(struct in_addr)); 2916 if (retval != 0) 2917 break; 2918 } 2919 } 2920 IF_ADDR_RUNLOCK(ifp); 2921 2922 IN_MULTI_UNLOCK(); 2923 2924 return (retval); 2925 } 2926 2927 #if defined(KTR) && (KTR_COMPILE & KTR_IGMPV3) 2928 2929 static const char *inm_modestrs[] = { "un", "in", "ex" }; 2930 2931 static const char * 2932 inm_mode_str(const int mode) 2933 { 2934 2935 if (mode >= MCAST_UNDEFINED && mode <= MCAST_EXCLUDE) 2936 return (inm_modestrs[mode]); 2937 return ("??"); 2938 } 2939 2940 static const char *inm_statestrs[] = { 2941 "not-member", 2942 "silent", 2943 "idle", 2944 "lazy", 2945 "sleeping", 2946 "awakening", 2947 "query-pending", 2948 "sg-query-pending", 2949 "leaving" 2950 }; 2951 2952 static const char * 2953 inm_state_str(const int state) 2954 { 2955 2956 if (state >= IGMP_NOT_MEMBER && state <= IGMP_LEAVING_MEMBER) 2957 return (inm_statestrs[state]); 2958 return ("??"); 2959 } 2960 2961 /* 2962 * Dump an in_multi structure to the console. 2963 */ 2964 void 2965 inm_print(const struct in_multi *inm) 2966 { 2967 int t; 2968 2969 if ((ktr_mask & KTR_IGMPV3) == 0) 2970 return; 2971 2972 printf("%s: --- begin inm %p ---\n", __func__, inm); 2973 printf("addr %s ifp %p(%s) ifma %p\n", 2974 inet_ntoa(inm->inm_addr), 2975 inm->inm_ifp, 2976 inm->inm_ifp->if_xname, 2977 inm->inm_ifma); 2978 printf("timer %u state %s refcount %u scq.len %u\n", 2979 inm->inm_timer, 2980 inm_state_str(inm->inm_state), 2981 inm->inm_refcount, 2982 inm->inm_scq.mq_len); 2983 printf("igi %p nsrc %lu sctimer %u scrv %u\n", 2984 inm->inm_igi, 2985 inm->inm_nsrc, 2986 inm->inm_sctimer, 2987 inm->inm_scrv); 2988 for (t = 0; t < 2; t++) { 2989 printf("t%d: fmode %s asm %u ex %u in %u rec %u\n", t, 2990 inm_mode_str(inm->inm_st[t].iss_fmode), 2991 inm->inm_st[t].iss_asm, 2992 inm->inm_st[t].iss_ex, 2993 inm->inm_st[t].iss_in, 2994 inm->inm_st[t].iss_rec); 2995 } 2996 printf("%s: --- end inm %p ---\n", __func__, inm); 2997 } 2998 2999 #else /* !KTR || !(KTR_COMPILE & KTR_IGMPV3) */ 3000 3001 void 3002 inm_print(const struct in_multi *inm) 3003 { 3004 3005 } 3006 3007 #endif /* KTR && (KTR_COMPILE & KTR_IGMPV3) */ 3008 3009 RB_GENERATE(ip_msource_tree, ip_msource, ims_link, ip_msource_cmp); 3010