1 /*- 2 * Copyright (c) 2007-2009 Bruce Simpson. 3 * Copyright (c) 2005 Robert N. M. Watson. 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote 15 * products derived from this software without specific prior written 16 * permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 /* 32 * IPv4 multicast socket, group, and socket option processing module. 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/kernel.h> 41 #include <sys/lock.h> 42 #include <sys/malloc.h> 43 #include <sys/mbuf.h> 44 #include <sys/protosw.h> 45 #include <sys/rmlock.h> 46 #include <sys/socket.h> 47 #include <sys/socketvar.h> 48 #include <sys/protosw.h> 49 #include <sys/sysctl.h> 50 #include <sys/ktr.h> 51 #include <sys/taskqueue.h> 52 #include <sys/tree.h> 53 54 #include <net/if.h> 55 #include <net/if_var.h> 56 #include <net/if_dl.h> 57 #include <net/route.h> 58 #include <net/vnet.h> 59 60 #include <netinet/in.h> 61 #include <netinet/in_systm.h> 62 #include <netinet/in_fib.h> 63 #include <netinet/in_pcb.h> 64 #include <netinet/in_var.h> 65 #include <netinet/ip_var.h> 66 #include <netinet/igmp_var.h> 67 68 #ifndef KTR_IGMPV3 69 #define KTR_IGMPV3 KTR_INET 70 #endif 71 72 #ifndef __SOCKUNION_DECLARED 73 union sockunion { 74 struct sockaddr_storage ss; 75 struct sockaddr sa; 76 struct sockaddr_dl sdl; 77 struct sockaddr_in sin; 78 }; 79 typedef union sockunion sockunion_t; 80 #define __SOCKUNION_DECLARED 81 #endif /* __SOCKUNION_DECLARED */ 82 83 static MALLOC_DEFINE(M_INMFILTER, "in_mfilter", 84 "IPv4 multicast PCB-layer source filter"); 85 static MALLOC_DEFINE(M_IPMADDR, "in_multi", "IPv4 multicast group"); 86 static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "IPv4 multicast options"); 87 static MALLOC_DEFINE(M_IPMSOURCE, "ip_msource", 88 "IPv4 multicast IGMP-layer source filter"); 89 90 /* 91 * Locking: 92 * - Lock order is: Giant, INP_WLOCK, IN_MULTI_LOCK, IGMP_LOCK, IF_ADDR_LOCK. 93 * - The IF_ADDR_LOCK is implicitly taken by inm_lookup() earlier, however 94 * it can be taken by code in net/if.c also. 95 * - ip_moptions and in_mfilter are covered by the INP_WLOCK. 96 * 97 * struct in_multi is covered by IN_MULTI_LOCK. There isn't strictly 98 * any need for in_multi itself to be virtualized -- it is bound to an ifp 99 * anyway no matter what happens. 100 */ 101 struct mtx in_multi_mtx; 102 MTX_SYSINIT(in_multi_mtx, &in_multi_mtx, "in_multi_mtx", MTX_DEF); 103 104 /* 105 * Functions with non-static linkage defined in this file should be 106 * declared in in_var.h: 107 * imo_multi_filter() 108 * in_addmulti() 109 * in_delmulti() 110 * in_joingroup() 111 * in_joingroup_locked() 112 * in_leavegroup() 113 * in_leavegroup_locked() 114 * and ip_var.h: 115 * inp_freemoptions() 116 * inp_getmoptions() 117 * inp_setmoptions() 118 * 119 * XXX: Both carp and pf need to use the legacy (*,G) KPIs in_addmulti() 120 * and in_delmulti(). 121 */ 122 static void imf_commit(struct in_mfilter *); 123 static int imf_get_source(struct in_mfilter *imf, 124 const struct sockaddr_in *psin, 125 struct in_msource **); 126 static struct in_msource * 127 imf_graft(struct in_mfilter *, const uint8_t, 128 const struct sockaddr_in *); 129 static void imf_leave(struct in_mfilter *); 130 static int imf_prune(struct in_mfilter *, const struct sockaddr_in *); 131 static void imf_purge(struct in_mfilter *); 132 static void imf_rollback(struct in_mfilter *); 133 static void imf_reap(struct in_mfilter *); 134 static int imo_grow(struct ip_moptions *); 135 static size_t imo_match_group(const struct ip_moptions *, 136 const struct ifnet *, const struct sockaddr *); 137 static struct in_msource * 138 imo_match_source(const struct ip_moptions *, const size_t, 139 const struct sockaddr *); 140 static void ims_merge(struct ip_msource *ims, 141 const struct in_msource *lims, const int rollback); 142 static int in_getmulti(struct ifnet *, const struct in_addr *, 143 struct in_multi **); 144 static int inm_get_source(struct in_multi *inm, const in_addr_t haddr, 145 const int noalloc, struct ip_msource **pims); 146 #ifdef KTR 147 static int inm_is_ifp_detached(const struct in_multi *); 148 #endif 149 static int inm_merge(struct in_multi *, /*const*/ struct in_mfilter *); 150 static void inm_purge(struct in_multi *); 151 static void inm_reap(struct in_multi *); 152 static struct ip_moptions * 153 inp_findmoptions(struct inpcb *); 154 static void inp_freemoptions_internal(struct ip_moptions *); 155 static void inp_gcmoptions(void *, int); 156 static int inp_get_source_filters(struct inpcb *, struct sockopt *); 157 static int inp_join_group(struct inpcb *, struct sockopt *); 158 static int inp_leave_group(struct inpcb *, struct sockopt *); 159 static struct ifnet * 160 inp_lookup_mcast_ifp(const struct inpcb *, 161 const struct sockaddr_in *, const struct in_addr); 162 static int inp_block_unblock_source(struct inpcb *, struct sockopt *); 163 static int inp_set_multicast_if(struct inpcb *, struct sockopt *); 164 static int inp_set_source_filters(struct inpcb *, struct sockopt *); 165 static int sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS); 166 167 static SYSCTL_NODE(_net_inet_ip, OID_AUTO, mcast, CTLFLAG_RW, 0, 168 "IPv4 multicast"); 169 170 static u_long in_mcast_maxgrpsrc = IP_MAX_GROUP_SRC_FILTER; 171 SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxgrpsrc, 172 CTLFLAG_RWTUN, &in_mcast_maxgrpsrc, 0, 173 "Max source filters per group"); 174 175 static u_long in_mcast_maxsocksrc = IP_MAX_SOCK_SRC_FILTER; 176 SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxsocksrc, 177 CTLFLAG_RWTUN, &in_mcast_maxsocksrc, 0, 178 "Max source filters per socket"); 179 180 int in_mcast_loop = IP_DEFAULT_MULTICAST_LOOP; 181 SYSCTL_INT(_net_inet_ip_mcast, OID_AUTO, loop, CTLFLAG_RWTUN, 182 &in_mcast_loop, 0, "Loopback multicast datagrams by default"); 183 184 static SYSCTL_NODE(_net_inet_ip_mcast, OID_AUTO, filters, 185 CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_ip_mcast_filters, 186 "Per-interface stack-wide source filters"); 187 188 static STAILQ_HEAD(, ip_moptions) imo_gc_list = 189 STAILQ_HEAD_INITIALIZER(imo_gc_list); 190 static struct task imo_gc_task = TASK_INITIALIZER(0, inp_gcmoptions, NULL); 191 192 #ifdef KTR 193 /* 194 * Inline function which wraps assertions for a valid ifp. 195 * The ifnet layer will set the ifma's ifp pointer to NULL if the ifp 196 * is detached. 197 */ 198 static int __inline 199 inm_is_ifp_detached(const struct in_multi *inm) 200 { 201 struct ifnet *ifp; 202 203 KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__)); 204 ifp = inm->inm_ifma->ifma_ifp; 205 if (ifp != NULL) { 206 /* 207 * Sanity check that netinet's notion of ifp is the 208 * same as net's. 209 */ 210 KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__)); 211 } 212 213 return (ifp == NULL); 214 } 215 #endif 216 217 /* 218 * Initialize an in_mfilter structure to a known state at t0, t1 219 * with an empty source filter list. 220 */ 221 static __inline void 222 imf_init(struct in_mfilter *imf, const int st0, const int st1) 223 { 224 memset(imf, 0, sizeof(struct in_mfilter)); 225 RB_INIT(&imf->imf_sources); 226 imf->imf_st[0] = st0; 227 imf->imf_st[1] = st1; 228 } 229 230 /* 231 * Function for looking up an in_multi record for an IPv4 multicast address 232 * on a given interface. ifp must be valid. If no record found, return NULL. 233 * The IN_MULTI_LOCK and IF_ADDR_LOCK on ifp must be held. 234 */ 235 struct in_multi * 236 inm_lookup_locked(struct ifnet *ifp, const struct in_addr ina) 237 { 238 struct ifmultiaddr *ifma; 239 struct in_multi *inm; 240 241 IN_MULTI_LOCK_ASSERT(); 242 IF_ADDR_LOCK_ASSERT(ifp); 243 244 inm = NULL; 245 TAILQ_FOREACH(ifma, &((ifp)->if_multiaddrs), ifma_link) { 246 if (ifma->ifma_addr->sa_family == AF_INET) { 247 inm = (struct in_multi *)ifma->ifma_protospec; 248 if (inm->inm_addr.s_addr == ina.s_addr) 249 break; 250 inm = NULL; 251 } 252 } 253 return (inm); 254 } 255 256 /* 257 * Wrapper for inm_lookup_locked(). 258 * The IF_ADDR_LOCK will be taken on ifp and released on return. 259 */ 260 struct in_multi * 261 inm_lookup(struct ifnet *ifp, const struct in_addr ina) 262 { 263 struct in_multi *inm; 264 265 IN_MULTI_LOCK_ASSERT(); 266 IF_ADDR_RLOCK(ifp); 267 inm = inm_lookup_locked(ifp, ina); 268 IF_ADDR_RUNLOCK(ifp); 269 270 return (inm); 271 } 272 273 /* 274 * Resize the ip_moptions vector to the next power-of-two minus 1. 275 * May be called with locks held; do not sleep. 276 */ 277 static int 278 imo_grow(struct ip_moptions *imo) 279 { 280 struct in_multi **nmships; 281 struct in_multi **omships; 282 struct in_mfilter *nmfilters; 283 struct in_mfilter *omfilters; 284 size_t idx; 285 size_t newmax; 286 size_t oldmax; 287 288 nmships = NULL; 289 nmfilters = NULL; 290 omships = imo->imo_membership; 291 omfilters = imo->imo_mfilters; 292 oldmax = imo->imo_max_memberships; 293 newmax = ((oldmax + 1) * 2) - 1; 294 295 if (newmax <= IP_MAX_MEMBERSHIPS) { 296 nmships = (struct in_multi **)realloc(omships, 297 sizeof(struct in_multi *) * newmax, M_IPMOPTS, M_NOWAIT); 298 nmfilters = (struct in_mfilter *)realloc(omfilters, 299 sizeof(struct in_mfilter) * newmax, M_INMFILTER, M_NOWAIT); 300 if (nmships != NULL && nmfilters != NULL) { 301 /* Initialize newly allocated source filter heads. */ 302 for (idx = oldmax; idx < newmax; idx++) { 303 imf_init(&nmfilters[idx], MCAST_UNDEFINED, 304 MCAST_EXCLUDE); 305 } 306 imo->imo_max_memberships = newmax; 307 imo->imo_membership = nmships; 308 imo->imo_mfilters = nmfilters; 309 } 310 } 311 312 if (nmships == NULL || nmfilters == NULL) { 313 if (nmships != NULL) 314 free(nmships, M_IPMOPTS); 315 if (nmfilters != NULL) 316 free(nmfilters, M_INMFILTER); 317 return (ETOOMANYREFS); 318 } 319 320 return (0); 321 } 322 323 /* 324 * Find an IPv4 multicast group entry for this ip_moptions instance 325 * which matches the specified group, and optionally an interface. 326 * Return its index into the array, or -1 if not found. 327 */ 328 static size_t 329 imo_match_group(const struct ip_moptions *imo, const struct ifnet *ifp, 330 const struct sockaddr *group) 331 { 332 const struct sockaddr_in *gsin; 333 struct in_multi **pinm; 334 int idx; 335 int nmships; 336 337 gsin = (const struct sockaddr_in *)group; 338 339 /* The imo_membership array may be lazy allocated. */ 340 if (imo->imo_membership == NULL || imo->imo_num_memberships == 0) 341 return (-1); 342 343 nmships = imo->imo_num_memberships; 344 pinm = &imo->imo_membership[0]; 345 for (idx = 0; idx < nmships; idx++, pinm++) { 346 if (*pinm == NULL) 347 continue; 348 if ((ifp == NULL || ((*pinm)->inm_ifp == ifp)) && 349 in_hosteq((*pinm)->inm_addr, gsin->sin_addr)) { 350 break; 351 } 352 } 353 if (idx >= nmships) 354 idx = -1; 355 356 return (idx); 357 } 358 359 /* 360 * Find an IPv4 multicast source entry for this imo which matches 361 * the given group index for this socket, and source address. 362 * 363 * NOTE: This does not check if the entry is in-mode, merely if 364 * it exists, which may not be the desired behaviour. 365 */ 366 static struct in_msource * 367 imo_match_source(const struct ip_moptions *imo, const size_t gidx, 368 const struct sockaddr *src) 369 { 370 struct ip_msource find; 371 struct in_mfilter *imf; 372 struct ip_msource *ims; 373 const sockunion_t *psa; 374 375 KASSERT(src->sa_family == AF_INET, ("%s: !AF_INET", __func__)); 376 KASSERT(gidx != -1 && gidx < imo->imo_num_memberships, 377 ("%s: invalid index %d\n", __func__, (int)gidx)); 378 379 /* The imo_mfilters array may be lazy allocated. */ 380 if (imo->imo_mfilters == NULL) 381 return (NULL); 382 imf = &imo->imo_mfilters[gidx]; 383 384 /* Source trees are keyed in host byte order. */ 385 psa = (const sockunion_t *)src; 386 find.ims_haddr = ntohl(psa->sin.sin_addr.s_addr); 387 ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find); 388 389 return ((struct in_msource *)ims); 390 } 391 392 /* 393 * Perform filtering for multicast datagrams on a socket by group and source. 394 * 395 * Returns 0 if a datagram should be allowed through, or various error codes 396 * if the socket was not a member of the group, or the source was muted, etc. 397 */ 398 int 399 imo_multi_filter(const struct ip_moptions *imo, const struct ifnet *ifp, 400 const struct sockaddr *group, const struct sockaddr *src) 401 { 402 size_t gidx; 403 struct in_msource *ims; 404 int mode; 405 406 KASSERT(ifp != NULL, ("%s: null ifp", __func__)); 407 408 gidx = imo_match_group(imo, ifp, group); 409 if (gidx == -1) 410 return (MCAST_NOTGMEMBER); 411 412 /* 413 * Check if the source was included in an (S,G) join. 414 * Allow reception on exclusive memberships by default, 415 * reject reception on inclusive memberships by default. 416 * Exclude source only if an in-mode exclude filter exists. 417 * Include source only if an in-mode include filter exists. 418 * NOTE: We are comparing group state here at IGMP t1 (now) 419 * with socket-layer t0 (since last downcall). 420 */ 421 mode = imo->imo_mfilters[gidx].imf_st[1]; 422 ims = imo_match_source(imo, gidx, src); 423 424 if ((ims == NULL && mode == MCAST_INCLUDE) || 425 (ims != NULL && ims->imsl_st[0] != mode)) 426 return (MCAST_NOTSMEMBER); 427 428 return (MCAST_PASS); 429 } 430 431 /* 432 * Find and return a reference to an in_multi record for (ifp, group), 433 * and bump its reference count. 434 * If one does not exist, try to allocate it, and update link-layer multicast 435 * filters on ifp to listen for group. 436 * Assumes the IN_MULTI lock is held across the call. 437 * Return 0 if successful, otherwise return an appropriate error code. 438 */ 439 static int 440 in_getmulti(struct ifnet *ifp, const struct in_addr *group, 441 struct in_multi **pinm) 442 { 443 struct sockaddr_in gsin; 444 struct ifmultiaddr *ifma; 445 struct in_ifinfo *ii; 446 struct in_multi *inm; 447 int error; 448 449 IN_MULTI_LOCK_ASSERT(); 450 451 ii = (struct in_ifinfo *)ifp->if_afdata[AF_INET]; 452 453 inm = inm_lookup(ifp, *group); 454 if (inm != NULL) { 455 /* 456 * If we already joined this group, just bump the 457 * refcount and return it. 458 */ 459 KASSERT(inm->inm_refcount >= 1, 460 ("%s: bad refcount %d", __func__, inm->inm_refcount)); 461 ++inm->inm_refcount; 462 *pinm = inm; 463 return (0); 464 } 465 466 memset(&gsin, 0, sizeof(gsin)); 467 gsin.sin_family = AF_INET; 468 gsin.sin_len = sizeof(struct sockaddr_in); 469 gsin.sin_addr = *group; 470 471 /* 472 * Check if a link-layer group is already associated 473 * with this network-layer group on the given ifnet. 474 */ 475 error = if_addmulti(ifp, (struct sockaddr *)&gsin, &ifma); 476 if (error != 0) 477 return (error); 478 479 /* XXX ifma_protospec must be covered by IF_ADDR_LOCK */ 480 IF_ADDR_WLOCK(ifp); 481 482 /* 483 * If something other than netinet is occupying the link-layer 484 * group, print a meaningful error message and back out of 485 * the allocation. 486 * Otherwise, bump the refcount on the existing network-layer 487 * group association and return it. 488 */ 489 if (ifma->ifma_protospec != NULL) { 490 inm = (struct in_multi *)ifma->ifma_protospec; 491 #ifdef INVARIANTS 492 KASSERT(ifma->ifma_addr != NULL, ("%s: no ifma_addr", 493 __func__)); 494 KASSERT(ifma->ifma_addr->sa_family == AF_INET, 495 ("%s: ifma not AF_INET", __func__)); 496 KASSERT(inm != NULL, ("%s: no ifma_protospec", __func__)); 497 if (inm->inm_ifma != ifma || inm->inm_ifp != ifp || 498 !in_hosteq(inm->inm_addr, *group)) { 499 char addrbuf[INET_ADDRSTRLEN]; 500 501 panic("%s: ifma %p is inconsistent with %p (%s)", 502 __func__, ifma, inm, inet_ntoa_r(*group, addrbuf)); 503 } 504 #endif 505 ++inm->inm_refcount; 506 *pinm = inm; 507 IF_ADDR_WUNLOCK(ifp); 508 return (0); 509 } 510 511 IF_ADDR_WLOCK_ASSERT(ifp); 512 513 /* 514 * A new in_multi record is needed; allocate and initialize it. 515 * We DO NOT perform an IGMP join as the in_ layer may need to 516 * push an initial source list down to IGMP to support SSM. 517 * 518 * The initial source filter state is INCLUDE, {} as per the RFC. 519 */ 520 inm = malloc(sizeof(*inm), M_IPMADDR, M_NOWAIT | M_ZERO); 521 if (inm == NULL) { 522 IF_ADDR_WUNLOCK(ifp); 523 if_delmulti_ifma(ifma); 524 return (ENOMEM); 525 } 526 inm->inm_addr = *group; 527 inm->inm_ifp = ifp; 528 inm->inm_igi = ii->ii_igmp; 529 inm->inm_ifma = ifma; 530 inm->inm_refcount = 1; 531 inm->inm_state = IGMP_NOT_MEMBER; 532 mbufq_init(&inm->inm_scq, IGMP_MAX_STATE_CHANGES); 533 inm->inm_st[0].iss_fmode = MCAST_UNDEFINED; 534 inm->inm_st[1].iss_fmode = MCAST_UNDEFINED; 535 RB_INIT(&inm->inm_srcs); 536 537 ifma->ifma_protospec = inm; 538 539 *pinm = inm; 540 541 IF_ADDR_WUNLOCK(ifp); 542 return (0); 543 } 544 545 /* 546 * Drop a reference to an in_multi record. 547 * 548 * If the refcount drops to 0, free the in_multi record and 549 * delete the underlying link-layer membership. 550 */ 551 void 552 inm_release_locked(struct in_multi *inm) 553 { 554 struct ifmultiaddr *ifma; 555 556 IN_MULTI_LOCK_ASSERT(); 557 558 CTR2(KTR_IGMPV3, "%s: refcount is %d", __func__, inm->inm_refcount); 559 560 if (--inm->inm_refcount > 0) { 561 CTR2(KTR_IGMPV3, "%s: refcount is now %d", __func__, 562 inm->inm_refcount); 563 return; 564 } 565 566 CTR2(KTR_IGMPV3, "%s: freeing inm %p", __func__, inm); 567 568 ifma = inm->inm_ifma; 569 570 /* XXX this access is not covered by IF_ADDR_LOCK */ 571 CTR2(KTR_IGMPV3, "%s: purging ifma %p", __func__, ifma); 572 KASSERT(ifma->ifma_protospec == inm, 573 ("%s: ifma_protospec != inm", __func__)); 574 ifma->ifma_protospec = NULL; 575 576 inm_purge(inm); 577 578 free(inm, M_IPMADDR); 579 580 if_delmulti_ifma(ifma); 581 } 582 583 /* 584 * Clear recorded source entries for a group. 585 * Used by the IGMP code. Caller must hold the IN_MULTI lock. 586 * FIXME: Should reap. 587 */ 588 void 589 inm_clear_recorded(struct in_multi *inm) 590 { 591 struct ip_msource *ims; 592 593 IN_MULTI_LOCK_ASSERT(); 594 595 RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) { 596 if (ims->ims_stp) { 597 ims->ims_stp = 0; 598 --inm->inm_st[1].iss_rec; 599 } 600 } 601 KASSERT(inm->inm_st[1].iss_rec == 0, 602 ("%s: iss_rec %d not 0", __func__, inm->inm_st[1].iss_rec)); 603 } 604 605 /* 606 * Record a source as pending for a Source-Group IGMPv3 query. 607 * This lives here as it modifies the shared tree. 608 * 609 * inm is the group descriptor. 610 * naddr is the address of the source to record in network-byte order. 611 * 612 * If the net.inet.igmp.sgalloc sysctl is non-zero, we will 613 * lazy-allocate a source node in response to an SG query. 614 * Otherwise, no allocation is performed. This saves some memory 615 * with the trade-off that the source will not be reported to the 616 * router if joined in the window between the query response and 617 * the group actually being joined on the local host. 618 * 619 * VIMAGE: XXX: Currently the igmp_sgalloc feature has been removed. 620 * This turns off the allocation of a recorded source entry if 621 * the group has not been joined. 622 * 623 * Return 0 if the source didn't exist or was already marked as recorded. 624 * Return 1 if the source was marked as recorded by this function. 625 * Return <0 if any error occurred (negated errno code). 626 */ 627 int 628 inm_record_source(struct in_multi *inm, const in_addr_t naddr) 629 { 630 struct ip_msource find; 631 struct ip_msource *ims, *nims; 632 633 IN_MULTI_LOCK_ASSERT(); 634 635 find.ims_haddr = ntohl(naddr); 636 ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find); 637 if (ims && ims->ims_stp) 638 return (0); 639 if (ims == NULL) { 640 if (inm->inm_nsrc == in_mcast_maxgrpsrc) 641 return (-ENOSPC); 642 nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE, 643 M_NOWAIT | M_ZERO); 644 if (nims == NULL) 645 return (-ENOMEM); 646 nims->ims_haddr = find.ims_haddr; 647 RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims); 648 ++inm->inm_nsrc; 649 ims = nims; 650 } 651 652 /* 653 * Mark the source as recorded and update the recorded 654 * source count. 655 */ 656 ++ims->ims_stp; 657 ++inm->inm_st[1].iss_rec; 658 659 return (1); 660 } 661 662 /* 663 * Return a pointer to an in_msource owned by an in_mfilter, 664 * given its source address. 665 * Lazy-allocate if needed. If this is a new entry its filter state is 666 * undefined at t0. 667 * 668 * imf is the filter set being modified. 669 * haddr is the source address in *host* byte-order. 670 * 671 * SMPng: May be called with locks held; malloc must not block. 672 */ 673 static int 674 imf_get_source(struct in_mfilter *imf, const struct sockaddr_in *psin, 675 struct in_msource **plims) 676 { 677 struct ip_msource find; 678 struct ip_msource *ims, *nims; 679 struct in_msource *lims; 680 int error; 681 682 error = 0; 683 ims = NULL; 684 lims = NULL; 685 686 /* key is host byte order */ 687 find.ims_haddr = ntohl(psin->sin_addr.s_addr); 688 ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find); 689 lims = (struct in_msource *)ims; 690 if (lims == NULL) { 691 if (imf->imf_nsrc == in_mcast_maxsocksrc) 692 return (ENOSPC); 693 nims = malloc(sizeof(struct in_msource), M_INMFILTER, 694 M_NOWAIT | M_ZERO); 695 if (nims == NULL) 696 return (ENOMEM); 697 lims = (struct in_msource *)nims; 698 lims->ims_haddr = find.ims_haddr; 699 lims->imsl_st[0] = MCAST_UNDEFINED; 700 RB_INSERT(ip_msource_tree, &imf->imf_sources, nims); 701 ++imf->imf_nsrc; 702 } 703 704 *plims = lims; 705 706 return (error); 707 } 708 709 /* 710 * Graft a source entry into an existing socket-layer filter set, 711 * maintaining any required invariants and checking allocations. 712 * 713 * The source is marked as being in the new filter mode at t1. 714 * 715 * Return the pointer to the new node, otherwise return NULL. 716 */ 717 static struct in_msource * 718 imf_graft(struct in_mfilter *imf, const uint8_t st1, 719 const struct sockaddr_in *psin) 720 { 721 struct ip_msource *nims; 722 struct in_msource *lims; 723 724 nims = malloc(sizeof(struct in_msource), M_INMFILTER, 725 M_NOWAIT | M_ZERO); 726 if (nims == NULL) 727 return (NULL); 728 lims = (struct in_msource *)nims; 729 lims->ims_haddr = ntohl(psin->sin_addr.s_addr); 730 lims->imsl_st[0] = MCAST_UNDEFINED; 731 lims->imsl_st[1] = st1; 732 RB_INSERT(ip_msource_tree, &imf->imf_sources, nims); 733 ++imf->imf_nsrc; 734 735 return (lims); 736 } 737 738 /* 739 * Prune a source entry from an existing socket-layer filter set, 740 * maintaining any required invariants and checking allocations. 741 * 742 * The source is marked as being left at t1, it is not freed. 743 * 744 * Return 0 if no error occurred, otherwise return an errno value. 745 */ 746 static int 747 imf_prune(struct in_mfilter *imf, const struct sockaddr_in *psin) 748 { 749 struct ip_msource find; 750 struct ip_msource *ims; 751 struct in_msource *lims; 752 753 /* key is host byte order */ 754 find.ims_haddr = ntohl(psin->sin_addr.s_addr); 755 ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find); 756 if (ims == NULL) 757 return (ENOENT); 758 lims = (struct in_msource *)ims; 759 lims->imsl_st[1] = MCAST_UNDEFINED; 760 return (0); 761 } 762 763 /* 764 * Revert socket-layer filter set deltas at t1 to t0 state. 765 */ 766 static void 767 imf_rollback(struct in_mfilter *imf) 768 { 769 struct ip_msource *ims, *tims; 770 struct in_msource *lims; 771 772 RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) { 773 lims = (struct in_msource *)ims; 774 if (lims->imsl_st[0] == lims->imsl_st[1]) { 775 /* no change at t1 */ 776 continue; 777 } else if (lims->imsl_st[0] != MCAST_UNDEFINED) { 778 /* revert change to existing source at t1 */ 779 lims->imsl_st[1] = lims->imsl_st[0]; 780 } else { 781 /* revert source added t1 */ 782 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 783 RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims); 784 free(ims, M_INMFILTER); 785 imf->imf_nsrc--; 786 } 787 } 788 imf->imf_st[1] = imf->imf_st[0]; 789 } 790 791 /* 792 * Mark socket-layer filter set as INCLUDE {} at t1. 793 */ 794 static void 795 imf_leave(struct in_mfilter *imf) 796 { 797 struct ip_msource *ims; 798 struct in_msource *lims; 799 800 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 801 lims = (struct in_msource *)ims; 802 lims->imsl_st[1] = MCAST_UNDEFINED; 803 } 804 imf->imf_st[1] = MCAST_INCLUDE; 805 } 806 807 /* 808 * Mark socket-layer filter set deltas as committed. 809 */ 810 static void 811 imf_commit(struct in_mfilter *imf) 812 { 813 struct ip_msource *ims; 814 struct in_msource *lims; 815 816 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 817 lims = (struct in_msource *)ims; 818 lims->imsl_st[0] = lims->imsl_st[1]; 819 } 820 imf->imf_st[0] = imf->imf_st[1]; 821 } 822 823 /* 824 * Reap unreferenced sources from socket-layer filter set. 825 */ 826 static void 827 imf_reap(struct in_mfilter *imf) 828 { 829 struct ip_msource *ims, *tims; 830 struct in_msource *lims; 831 832 RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) { 833 lims = (struct in_msource *)ims; 834 if ((lims->imsl_st[0] == MCAST_UNDEFINED) && 835 (lims->imsl_st[1] == MCAST_UNDEFINED)) { 836 CTR2(KTR_IGMPV3, "%s: free lims %p", __func__, ims); 837 RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims); 838 free(ims, M_INMFILTER); 839 imf->imf_nsrc--; 840 } 841 } 842 } 843 844 /* 845 * Purge socket-layer filter set. 846 */ 847 static void 848 imf_purge(struct in_mfilter *imf) 849 { 850 struct ip_msource *ims, *tims; 851 852 RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) { 853 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 854 RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims); 855 free(ims, M_INMFILTER); 856 imf->imf_nsrc--; 857 } 858 imf->imf_st[0] = imf->imf_st[1] = MCAST_UNDEFINED; 859 KASSERT(RB_EMPTY(&imf->imf_sources), 860 ("%s: imf_sources not empty", __func__)); 861 } 862 863 /* 864 * Look up a source filter entry for a multicast group. 865 * 866 * inm is the group descriptor to work with. 867 * haddr is the host-byte-order IPv4 address to look up. 868 * noalloc may be non-zero to suppress allocation of sources. 869 * *pims will be set to the address of the retrieved or allocated source. 870 * 871 * SMPng: NOTE: may be called with locks held. 872 * Return 0 if successful, otherwise return a non-zero error code. 873 */ 874 static int 875 inm_get_source(struct in_multi *inm, const in_addr_t haddr, 876 const int noalloc, struct ip_msource **pims) 877 { 878 struct ip_msource find; 879 struct ip_msource *ims, *nims; 880 881 find.ims_haddr = haddr; 882 ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find); 883 if (ims == NULL && !noalloc) { 884 if (inm->inm_nsrc == in_mcast_maxgrpsrc) 885 return (ENOSPC); 886 nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE, 887 M_NOWAIT | M_ZERO); 888 if (nims == NULL) 889 return (ENOMEM); 890 nims->ims_haddr = haddr; 891 RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims); 892 ++inm->inm_nsrc; 893 ims = nims; 894 #ifdef KTR 895 CTR3(KTR_IGMPV3, "%s: allocated 0x%08x as %p", __func__, 896 haddr, ims); 897 #endif 898 } 899 900 *pims = ims; 901 return (0); 902 } 903 904 /* 905 * Merge socket-layer source into IGMP-layer source. 906 * If rollback is non-zero, perform the inverse of the merge. 907 */ 908 static void 909 ims_merge(struct ip_msource *ims, const struct in_msource *lims, 910 const int rollback) 911 { 912 int n = rollback ? -1 : 1; 913 914 if (lims->imsl_st[0] == MCAST_EXCLUDE) { 915 CTR3(KTR_IGMPV3, "%s: t1 ex -= %d on 0x%08x", 916 __func__, n, ims->ims_haddr); 917 ims->ims_st[1].ex -= n; 918 } else if (lims->imsl_st[0] == MCAST_INCLUDE) { 919 CTR3(KTR_IGMPV3, "%s: t1 in -= %d on 0x%08x", 920 __func__, n, ims->ims_haddr); 921 ims->ims_st[1].in -= n; 922 } 923 924 if (lims->imsl_st[1] == MCAST_EXCLUDE) { 925 CTR3(KTR_IGMPV3, "%s: t1 ex += %d on 0x%08x", 926 __func__, n, ims->ims_haddr); 927 ims->ims_st[1].ex += n; 928 } else if (lims->imsl_st[1] == MCAST_INCLUDE) { 929 CTR3(KTR_IGMPV3, "%s: t1 in += %d on 0x%08x", 930 __func__, n, ims->ims_haddr); 931 ims->ims_st[1].in += n; 932 } 933 } 934 935 /* 936 * Atomically update the global in_multi state, when a membership's 937 * filter list is being updated in any way. 938 * 939 * imf is the per-inpcb-membership group filter pointer. 940 * A fake imf may be passed for in-kernel consumers. 941 * 942 * XXX This is a candidate for a set-symmetric-difference style loop 943 * which would eliminate the repeated lookup from root of ims nodes, 944 * as they share the same key space. 945 * 946 * If any error occurred this function will back out of refcounts 947 * and return a non-zero value. 948 */ 949 static int 950 inm_merge(struct in_multi *inm, /*const*/ struct in_mfilter *imf) 951 { 952 struct ip_msource *ims, *nims; 953 struct in_msource *lims; 954 int schanged, error; 955 int nsrc0, nsrc1; 956 957 schanged = 0; 958 error = 0; 959 nsrc1 = nsrc0 = 0; 960 961 /* 962 * Update the source filters first, as this may fail. 963 * Maintain count of in-mode filters at t0, t1. These are 964 * used to work out if we transition into ASM mode or not. 965 * Maintain a count of source filters whose state was 966 * actually modified by this operation. 967 */ 968 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 969 lims = (struct in_msource *)ims; 970 if (lims->imsl_st[0] == imf->imf_st[0]) nsrc0++; 971 if (lims->imsl_st[1] == imf->imf_st[1]) nsrc1++; 972 if (lims->imsl_st[0] == lims->imsl_st[1]) continue; 973 error = inm_get_source(inm, lims->ims_haddr, 0, &nims); 974 ++schanged; 975 if (error) 976 break; 977 ims_merge(nims, lims, 0); 978 } 979 if (error) { 980 struct ip_msource *bims; 981 982 RB_FOREACH_REVERSE_FROM(ims, ip_msource_tree, nims) { 983 lims = (struct in_msource *)ims; 984 if (lims->imsl_st[0] == lims->imsl_st[1]) 985 continue; 986 (void)inm_get_source(inm, lims->ims_haddr, 1, &bims); 987 if (bims == NULL) 988 continue; 989 ims_merge(bims, lims, 1); 990 } 991 goto out_reap; 992 } 993 994 CTR3(KTR_IGMPV3, "%s: imf filters in-mode: %d at t0, %d at t1", 995 __func__, nsrc0, nsrc1); 996 997 /* Handle transition between INCLUDE {n} and INCLUDE {} on socket. */ 998 if (imf->imf_st[0] == imf->imf_st[1] && 999 imf->imf_st[1] == MCAST_INCLUDE) { 1000 if (nsrc1 == 0) { 1001 CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__); 1002 --inm->inm_st[1].iss_in; 1003 } 1004 } 1005 1006 /* Handle filter mode transition on socket. */ 1007 if (imf->imf_st[0] != imf->imf_st[1]) { 1008 CTR3(KTR_IGMPV3, "%s: imf transition %d to %d", 1009 __func__, imf->imf_st[0], imf->imf_st[1]); 1010 1011 if (imf->imf_st[0] == MCAST_EXCLUDE) { 1012 CTR1(KTR_IGMPV3, "%s: --ex on inm at t1", __func__); 1013 --inm->inm_st[1].iss_ex; 1014 } else if (imf->imf_st[0] == MCAST_INCLUDE) { 1015 CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__); 1016 --inm->inm_st[1].iss_in; 1017 } 1018 1019 if (imf->imf_st[1] == MCAST_EXCLUDE) { 1020 CTR1(KTR_IGMPV3, "%s: ex++ on inm at t1", __func__); 1021 inm->inm_st[1].iss_ex++; 1022 } else if (imf->imf_st[1] == MCAST_INCLUDE && nsrc1 > 0) { 1023 CTR1(KTR_IGMPV3, "%s: in++ on inm at t1", __func__); 1024 inm->inm_st[1].iss_in++; 1025 } 1026 } 1027 1028 /* 1029 * Track inm filter state in terms of listener counts. 1030 * If there are any exclusive listeners, stack-wide 1031 * membership is exclusive. 1032 * Otherwise, if only inclusive listeners, stack-wide is inclusive. 1033 * If no listeners remain, state is undefined at t1, 1034 * and the IGMP lifecycle for this group should finish. 1035 */ 1036 if (inm->inm_st[1].iss_ex > 0) { 1037 CTR1(KTR_IGMPV3, "%s: transition to EX", __func__); 1038 inm->inm_st[1].iss_fmode = MCAST_EXCLUDE; 1039 } else if (inm->inm_st[1].iss_in > 0) { 1040 CTR1(KTR_IGMPV3, "%s: transition to IN", __func__); 1041 inm->inm_st[1].iss_fmode = MCAST_INCLUDE; 1042 } else { 1043 CTR1(KTR_IGMPV3, "%s: transition to UNDEF", __func__); 1044 inm->inm_st[1].iss_fmode = MCAST_UNDEFINED; 1045 } 1046 1047 /* Decrement ASM listener count on transition out of ASM mode. */ 1048 if (imf->imf_st[0] == MCAST_EXCLUDE && nsrc0 == 0) { 1049 if ((imf->imf_st[1] != MCAST_EXCLUDE) || 1050 (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 > 0)) { 1051 CTR1(KTR_IGMPV3, "%s: --asm on inm at t1", __func__); 1052 --inm->inm_st[1].iss_asm; 1053 } 1054 } 1055 1056 /* Increment ASM listener count on transition to ASM mode. */ 1057 if (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 == 0) { 1058 CTR1(KTR_IGMPV3, "%s: asm++ on inm at t1", __func__); 1059 inm->inm_st[1].iss_asm++; 1060 } 1061 1062 CTR3(KTR_IGMPV3, "%s: merged imf %p to inm %p", __func__, imf, inm); 1063 inm_print(inm); 1064 1065 out_reap: 1066 if (schanged > 0) { 1067 CTR1(KTR_IGMPV3, "%s: sources changed; reaping", __func__); 1068 inm_reap(inm); 1069 } 1070 return (error); 1071 } 1072 1073 /* 1074 * Mark an in_multi's filter set deltas as committed. 1075 * Called by IGMP after a state change has been enqueued. 1076 */ 1077 void 1078 inm_commit(struct in_multi *inm) 1079 { 1080 struct ip_msource *ims; 1081 1082 CTR2(KTR_IGMPV3, "%s: commit inm %p", __func__, inm); 1083 CTR1(KTR_IGMPV3, "%s: pre commit:", __func__); 1084 inm_print(inm); 1085 1086 RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) { 1087 ims->ims_st[0] = ims->ims_st[1]; 1088 } 1089 inm->inm_st[0] = inm->inm_st[1]; 1090 } 1091 1092 /* 1093 * Reap unreferenced nodes from an in_multi's filter set. 1094 */ 1095 static void 1096 inm_reap(struct in_multi *inm) 1097 { 1098 struct ip_msource *ims, *tims; 1099 1100 RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) { 1101 if (ims->ims_st[0].ex > 0 || ims->ims_st[0].in > 0 || 1102 ims->ims_st[1].ex > 0 || ims->ims_st[1].in > 0 || 1103 ims->ims_stp != 0) 1104 continue; 1105 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 1106 RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims); 1107 free(ims, M_IPMSOURCE); 1108 inm->inm_nsrc--; 1109 } 1110 } 1111 1112 /* 1113 * Purge all source nodes from an in_multi's filter set. 1114 */ 1115 static void 1116 inm_purge(struct in_multi *inm) 1117 { 1118 struct ip_msource *ims, *tims; 1119 1120 RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) { 1121 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 1122 RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims); 1123 free(ims, M_IPMSOURCE); 1124 inm->inm_nsrc--; 1125 } 1126 } 1127 1128 /* 1129 * Join a multicast group; unlocked entry point. 1130 * 1131 * SMPng: XXX: in_joingroup() is called from in_control() when Giant 1132 * is not held. Fortunately, ifp is unlikely to have been detached 1133 * at this point, so we assume it's OK to recurse. 1134 */ 1135 int 1136 in_joingroup(struct ifnet *ifp, const struct in_addr *gina, 1137 /*const*/ struct in_mfilter *imf, struct in_multi **pinm) 1138 { 1139 int error; 1140 1141 IN_MULTI_LOCK(); 1142 error = in_joingroup_locked(ifp, gina, imf, pinm); 1143 IN_MULTI_UNLOCK(); 1144 1145 return (error); 1146 } 1147 1148 /* 1149 * Join a multicast group; real entry point. 1150 * 1151 * Only preserves atomicity at inm level. 1152 * NOTE: imf argument cannot be const due to sys/tree.h limitations. 1153 * 1154 * If the IGMP downcall fails, the group is not joined, and an error 1155 * code is returned. 1156 */ 1157 int 1158 in_joingroup_locked(struct ifnet *ifp, const struct in_addr *gina, 1159 /*const*/ struct in_mfilter *imf, struct in_multi **pinm) 1160 { 1161 struct in_mfilter timf; 1162 struct in_multi *inm; 1163 int error; 1164 1165 IN_MULTI_LOCK_ASSERT(); 1166 1167 CTR4(KTR_IGMPV3, "%s: join 0x%08x on %p(%s))", __func__, 1168 ntohl(gina->s_addr), ifp, ifp->if_xname); 1169 1170 error = 0; 1171 inm = NULL; 1172 1173 /* 1174 * If no imf was specified (i.e. kernel consumer), 1175 * fake one up and assume it is an ASM join. 1176 */ 1177 if (imf == NULL) { 1178 imf_init(&timf, MCAST_UNDEFINED, MCAST_EXCLUDE); 1179 imf = &timf; 1180 } 1181 1182 error = in_getmulti(ifp, gina, &inm); 1183 if (error) { 1184 CTR1(KTR_IGMPV3, "%s: in_getmulti() failure", __func__); 1185 return (error); 1186 } 1187 1188 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 1189 error = inm_merge(inm, imf); 1190 if (error) { 1191 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__); 1192 goto out_inm_release; 1193 } 1194 1195 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 1196 error = igmp_change_state(inm); 1197 if (error) { 1198 CTR1(KTR_IGMPV3, "%s: failed to update source", __func__); 1199 goto out_inm_release; 1200 } 1201 1202 out_inm_release: 1203 if (error) { 1204 CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm); 1205 inm_release_locked(inm); 1206 } else { 1207 *pinm = inm; 1208 } 1209 1210 return (error); 1211 } 1212 1213 /* 1214 * Leave a multicast group; unlocked entry point. 1215 */ 1216 int 1217 in_leavegroup(struct in_multi *inm, /*const*/ struct in_mfilter *imf) 1218 { 1219 int error; 1220 1221 IN_MULTI_LOCK(); 1222 error = in_leavegroup_locked(inm, imf); 1223 IN_MULTI_UNLOCK(); 1224 1225 return (error); 1226 } 1227 1228 /* 1229 * Leave a multicast group; real entry point. 1230 * All source filters will be expunged. 1231 * 1232 * Only preserves atomicity at inm level. 1233 * 1234 * Holding the write lock for the INP which contains imf 1235 * is highly advisable. We can't assert for it as imf does not 1236 * contain a back-pointer to the owning inp. 1237 * 1238 * Note: This is not the same as inm_release(*) as this function also 1239 * makes a state change downcall into IGMP. 1240 */ 1241 int 1242 in_leavegroup_locked(struct in_multi *inm, /*const*/ struct in_mfilter *imf) 1243 { 1244 struct in_mfilter timf; 1245 int error; 1246 1247 error = 0; 1248 1249 IN_MULTI_LOCK_ASSERT(); 1250 1251 CTR5(KTR_IGMPV3, "%s: leave inm %p, 0x%08x/%s, imf %p", __func__, 1252 inm, ntohl(inm->inm_addr.s_addr), 1253 (inm_is_ifp_detached(inm) ? "null" : inm->inm_ifp->if_xname), 1254 imf); 1255 1256 /* 1257 * If no imf was specified (i.e. kernel consumer), 1258 * fake one up and assume it is an ASM join. 1259 */ 1260 if (imf == NULL) { 1261 imf_init(&timf, MCAST_EXCLUDE, MCAST_UNDEFINED); 1262 imf = &timf; 1263 } 1264 1265 /* 1266 * Begin state merge transaction at IGMP layer. 1267 * 1268 * As this particular invocation should not cause any memory 1269 * to be allocated, and there is no opportunity to roll back 1270 * the transaction, it MUST NOT fail. 1271 */ 1272 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 1273 error = inm_merge(inm, imf); 1274 KASSERT(error == 0, ("%s: failed to merge inm state", __func__)); 1275 1276 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 1277 CURVNET_SET(inm->inm_ifp->if_vnet); 1278 error = igmp_change_state(inm); 1279 CURVNET_RESTORE(); 1280 if (error) 1281 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__); 1282 1283 CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm); 1284 inm_release_locked(inm); 1285 1286 return (error); 1287 } 1288 1289 /*#ifndef BURN_BRIDGES*/ 1290 /* 1291 * Join an IPv4 multicast group in (*,G) exclusive mode. 1292 * The group must be a 224.0.0.0/24 link-scope group. 1293 * This KPI is for legacy kernel consumers only. 1294 */ 1295 struct in_multi * 1296 in_addmulti(struct in_addr *ap, struct ifnet *ifp) 1297 { 1298 struct in_multi *pinm; 1299 int error; 1300 #ifdef INVARIANTS 1301 char addrbuf[INET_ADDRSTRLEN]; 1302 #endif 1303 1304 KASSERT(IN_LOCAL_GROUP(ntohl(ap->s_addr)), 1305 ("%s: %s not in 224.0.0.0/24", __func__, 1306 inet_ntoa_r(*ap, addrbuf))); 1307 1308 error = in_joingroup(ifp, ap, NULL, &pinm); 1309 if (error != 0) 1310 pinm = NULL; 1311 1312 return (pinm); 1313 } 1314 1315 /* 1316 * Leave an IPv4 multicast group, assumed to be in exclusive (*,G) mode. 1317 * This KPI is for legacy kernel consumers only. 1318 */ 1319 void 1320 in_delmulti(struct in_multi *inm) 1321 { 1322 1323 (void)in_leavegroup(inm, NULL); 1324 } 1325 /*#endif*/ 1326 1327 /* 1328 * Block or unblock an ASM multicast source on an inpcb. 1329 * This implements the delta-based API described in RFC 3678. 1330 * 1331 * The delta-based API applies only to exclusive-mode memberships. 1332 * An IGMP downcall will be performed. 1333 * 1334 * SMPng: NOTE: Must take Giant as a join may create a new ifma. 1335 * 1336 * Return 0 if successful, otherwise return an appropriate error code. 1337 */ 1338 static int 1339 inp_block_unblock_source(struct inpcb *inp, struct sockopt *sopt) 1340 { 1341 struct group_source_req gsr; 1342 sockunion_t *gsa, *ssa; 1343 struct ifnet *ifp; 1344 struct in_mfilter *imf; 1345 struct ip_moptions *imo; 1346 struct in_msource *ims; 1347 struct in_multi *inm; 1348 size_t idx; 1349 uint16_t fmode; 1350 int error, doblock; 1351 1352 ifp = NULL; 1353 error = 0; 1354 doblock = 0; 1355 1356 memset(&gsr, 0, sizeof(struct group_source_req)); 1357 gsa = (sockunion_t *)&gsr.gsr_group; 1358 ssa = (sockunion_t *)&gsr.gsr_source; 1359 1360 switch (sopt->sopt_name) { 1361 case IP_BLOCK_SOURCE: 1362 case IP_UNBLOCK_SOURCE: { 1363 struct ip_mreq_source mreqs; 1364 1365 error = sooptcopyin(sopt, &mreqs, 1366 sizeof(struct ip_mreq_source), 1367 sizeof(struct ip_mreq_source)); 1368 if (error) 1369 return (error); 1370 1371 gsa->sin.sin_family = AF_INET; 1372 gsa->sin.sin_len = sizeof(struct sockaddr_in); 1373 gsa->sin.sin_addr = mreqs.imr_multiaddr; 1374 1375 ssa->sin.sin_family = AF_INET; 1376 ssa->sin.sin_len = sizeof(struct sockaddr_in); 1377 ssa->sin.sin_addr = mreqs.imr_sourceaddr; 1378 1379 if (!in_nullhost(mreqs.imr_interface)) 1380 INADDR_TO_IFP(mreqs.imr_interface, ifp); 1381 1382 if (sopt->sopt_name == IP_BLOCK_SOURCE) 1383 doblock = 1; 1384 1385 CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p", 1386 __func__, ntohl(mreqs.imr_interface.s_addr), ifp); 1387 break; 1388 } 1389 1390 case MCAST_BLOCK_SOURCE: 1391 case MCAST_UNBLOCK_SOURCE: 1392 error = sooptcopyin(sopt, &gsr, 1393 sizeof(struct group_source_req), 1394 sizeof(struct group_source_req)); 1395 if (error) 1396 return (error); 1397 1398 if (gsa->sin.sin_family != AF_INET || 1399 gsa->sin.sin_len != sizeof(struct sockaddr_in)) 1400 return (EINVAL); 1401 1402 if (ssa->sin.sin_family != AF_INET || 1403 ssa->sin.sin_len != sizeof(struct sockaddr_in)) 1404 return (EINVAL); 1405 1406 if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface) 1407 return (EADDRNOTAVAIL); 1408 1409 ifp = ifnet_byindex(gsr.gsr_interface); 1410 1411 if (sopt->sopt_name == MCAST_BLOCK_SOURCE) 1412 doblock = 1; 1413 break; 1414 1415 default: 1416 CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d", 1417 __func__, sopt->sopt_name); 1418 return (EOPNOTSUPP); 1419 break; 1420 } 1421 1422 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 1423 return (EINVAL); 1424 1425 /* 1426 * Check if we are actually a member of this group. 1427 */ 1428 imo = inp_findmoptions(inp); 1429 idx = imo_match_group(imo, ifp, &gsa->sa); 1430 if (idx == -1 || imo->imo_mfilters == NULL) { 1431 error = EADDRNOTAVAIL; 1432 goto out_inp_locked; 1433 } 1434 1435 KASSERT(imo->imo_mfilters != NULL, 1436 ("%s: imo_mfilters not allocated", __func__)); 1437 imf = &imo->imo_mfilters[idx]; 1438 inm = imo->imo_membership[idx]; 1439 1440 /* 1441 * Attempting to use the delta-based API on an 1442 * non exclusive-mode membership is an error. 1443 */ 1444 fmode = imf->imf_st[0]; 1445 if (fmode != MCAST_EXCLUDE) { 1446 error = EINVAL; 1447 goto out_inp_locked; 1448 } 1449 1450 /* 1451 * Deal with error cases up-front: 1452 * Asked to block, but already blocked; or 1453 * Asked to unblock, but nothing to unblock. 1454 * If adding a new block entry, allocate it. 1455 */ 1456 ims = imo_match_source(imo, idx, &ssa->sa); 1457 if ((ims != NULL && doblock) || (ims == NULL && !doblock)) { 1458 CTR3(KTR_IGMPV3, "%s: source 0x%08x %spresent", __func__, 1459 ntohl(ssa->sin.sin_addr.s_addr), doblock ? "" : "not "); 1460 error = EADDRNOTAVAIL; 1461 goto out_inp_locked; 1462 } 1463 1464 INP_WLOCK_ASSERT(inp); 1465 1466 /* 1467 * Begin state merge transaction at socket layer. 1468 */ 1469 if (doblock) { 1470 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block"); 1471 ims = imf_graft(imf, fmode, &ssa->sin); 1472 if (ims == NULL) 1473 error = ENOMEM; 1474 } else { 1475 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow"); 1476 error = imf_prune(imf, &ssa->sin); 1477 } 1478 1479 if (error) { 1480 CTR1(KTR_IGMPV3, "%s: merge imf state failed", __func__); 1481 goto out_imf_rollback; 1482 } 1483 1484 /* 1485 * Begin state merge transaction at IGMP layer. 1486 */ 1487 IN_MULTI_LOCK(); 1488 1489 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 1490 error = inm_merge(inm, imf); 1491 if (error) { 1492 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__); 1493 goto out_in_multi_locked; 1494 } 1495 1496 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 1497 error = igmp_change_state(inm); 1498 if (error) 1499 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__); 1500 1501 out_in_multi_locked: 1502 1503 IN_MULTI_UNLOCK(); 1504 1505 out_imf_rollback: 1506 if (error) 1507 imf_rollback(imf); 1508 else 1509 imf_commit(imf); 1510 1511 imf_reap(imf); 1512 1513 out_inp_locked: 1514 INP_WUNLOCK(inp); 1515 return (error); 1516 } 1517 1518 /* 1519 * Given an inpcb, return its multicast options structure pointer. Accepts 1520 * an unlocked inpcb pointer, but will return it locked. May sleep. 1521 * 1522 * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held. 1523 * SMPng: NOTE: Returns with the INP write lock held. 1524 */ 1525 static struct ip_moptions * 1526 inp_findmoptions(struct inpcb *inp) 1527 { 1528 struct ip_moptions *imo; 1529 struct in_multi **immp; 1530 struct in_mfilter *imfp; 1531 size_t idx; 1532 1533 INP_WLOCK(inp); 1534 if (inp->inp_moptions != NULL) 1535 return (inp->inp_moptions); 1536 1537 INP_WUNLOCK(inp); 1538 1539 imo = malloc(sizeof(*imo), M_IPMOPTS, M_WAITOK); 1540 immp = malloc(sizeof(*immp) * IP_MIN_MEMBERSHIPS, M_IPMOPTS, 1541 M_WAITOK | M_ZERO); 1542 imfp = malloc(sizeof(struct in_mfilter) * IP_MIN_MEMBERSHIPS, 1543 M_INMFILTER, M_WAITOK); 1544 1545 imo->imo_multicast_ifp = NULL; 1546 imo->imo_multicast_addr.s_addr = INADDR_ANY; 1547 imo->imo_multicast_vif = -1; 1548 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1549 imo->imo_multicast_loop = in_mcast_loop; 1550 imo->imo_num_memberships = 0; 1551 imo->imo_max_memberships = IP_MIN_MEMBERSHIPS; 1552 imo->imo_membership = immp; 1553 1554 /* Initialize per-group source filters. */ 1555 for (idx = 0; idx < IP_MIN_MEMBERSHIPS; idx++) 1556 imf_init(&imfp[idx], MCAST_UNDEFINED, MCAST_EXCLUDE); 1557 imo->imo_mfilters = imfp; 1558 1559 INP_WLOCK(inp); 1560 if (inp->inp_moptions != NULL) { 1561 free(imfp, M_INMFILTER); 1562 free(immp, M_IPMOPTS); 1563 free(imo, M_IPMOPTS); 1564 return (inp->inp_moptions); 1565 } 1566 inp->inp_moptions = imo; 1567 return (imo); 1568 } 1569 1570 /* 1571 * Discard the IP multicast options (and source filters). To minimize 1572 * the amount of work done while holding locks such as the INP's 1573 * pcbinfo lock (which is used in the receive path), the free 1574 * operation is performed asynchronously in a separate task. 1575 * 1576 * SMPng: NOTE: assumes INP write lock is held. 1577 */ 1578 void 1579 inp_freemoptions(struct ip_moptions *imo) 1580 { 1581 1582 KASSERT(imo != NULL, ("%s: ip_moptions is NULL", __func__)); 1583 IN_MULTI_LOCK(); 1584 STAILQ_INSERT_TAIL(&imo_gc_list, imo, imo_link); 1585 IN_MULTI_UNLOCK(); 1586 taskqueue_enqueue(taskqueue_thread, &imo_gc_task); 1587 } 1588 1589 static void 1590 inp_freemoptions_internal(struct ip_moptions *imo) 1591 { 1592 struct in_mfilter *imf; 1593 size_t idx, nmships; 1594 1595 nmships = imo->imo_num_memberships; 1596 for (idx = 0; idx < nmships; ++idx) { 1597 imf = imo->imo_mfilters ? &imo->imo_mfilters[idx] : NULL; 1598 if (imf) 1599 imf_leave(imf); 1600 (void)in_leavegroup(imo->imo_membership[idx], imf); 1601 if (imf) 1602 imf_purge(imf); 1603 } 1604 1605 if (imo->imo_mfilters) 1606 free(imo->imo_mfilters, M_INMFILTER); 1607 free(imo->imo_membership, M_IPMOPTS); 1608 free(imo, M_IPMOPTS); 1609 } 1610 1611 static void 1612 inp_gcmoptions(void *context, int pending) 1613 { 1614 struct ip_moptions *imo; 1615 1616 IN_MULTI_LOCK(); 1617 while (!STAILQ_EMPTY(&imo_gc_list)) { 1618 imo = STAILQ_FIRST(&imo_gc_list); 1619 STAILQ_REMOVE_HEAD(&imo_gc_list, imo_link); 1620 IN_MULTI_UNLOCK(); 1621 inp_freemoptions_internal(imo); 1622 IN_MULTI_LOCK(); 1623 } 1624 IN_MULTI_UNLOCK(); 1625 } 1626 1627 /* 1628 * Atomically get source filters on a socket for an IPv4 multicast group. 1629 * Called with INP lock held; returns with lock released. 1630 */ 1631 static int 1632 inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt) 1633 { 1634 struct __msfilterreq msfr; 1635 sockunion_t *gsa; 1636 struct ifnet *ifp; 1637 struct ip_moptions *imo; 1638 struct in_mfilter *imf; 1639 struct ip_msource *ims; 1640 struct in_msource *lims; 1641 struct sockaddr_in *psin; 1642 struct sockaddr_storage *ptss; 1643 struct sockaddr_storage *tss; 1644 int error; 1645 size_t idx, nsrcs, ncsrcs; 1646 1647 INP_WLOCK_ASSERT(inp); 1648 1649 imo = inp->inp_moptions; 1650 KASSERT(imo != NULL, ("%s: null ip_moptions", __func__)); 1651 1652 INP_WUNLOCK(inp); 1653 1654 error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq), 1655 sizeof(struct __msfilterreq)); 1656 if (error) 1657 return (error); 1658 1659 if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex) 1660 return (EINVAL); 1661 1662 ifp = ifnet_byindex(msfr.msfr_ifindex); 1663 if (ifp == NULL) 1664 return (EINVAL); 1665 1666 INP_WLOCK(inp); 1667 1668 /* 1669 * Lookup group on the socket. 1670 */ 1671 gsa = (sockunion_t *)&msfr.msfr_group; 1672 idx = imo_match_group(imo, ifp, &gsa->sa); 1673 if (idx == -1 || imo->imo_mfilters == NULL) { 1674 INP_WUNLOCK(inp); 1675 return (EADDRNOTAVAIL); 1676 } 1677 imf = &imo->imo_mfilters[idx]; 1678 1679 /* 1680 * Ignore memberships which are in limbo. 1681 */ 1682 if (imf->imf_st[1] == MCAST_UNDEFINED) { 1683 INP_WUNLOCK(inp); 1684 return (EAGAIN); 1685 } 1686 msfr.msfr_fmode = imf->imf_st[1]; 1687 1688 /* 1689 * If the user specified a buffer, copy out the source filter 1690 * entries to userland gracefully. 1691 * We only copy out the number of entries which userland 1692 * has asked for, but we always tell userland how big the 1693 * buffer really needs to be. 1694 */ 1695 if (msfr.msfr_nsrcs > in_mcast_maxsocksrc) 1696 msfr.msfr_nsrcs = in_mcast_maxsocksrc; 1697 tss = NULL; 1698 if (msfr.msfr_srcs != NULL && msfr.msfr_nsrcs > 0) { 1699 tss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs, 1700 M_TEMP, M_NOWAIT | M_ZERO); 1701 if (tss == NULL) { 1702 INP_WUNLOCK(inp); 1703 return (ENOBUFS); 1704 } 1705 } 1706 1707 /* 1708 * Count number of sources in-mode at t0. 1709 * If buffer space exists and remains, copy out source entries. 1710 */ 1711 nsrcs = msfr.msfr_nsrcs; 1712 ncsrcs = 0; 1713 ptss = tss; 1714 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 1715 lims = (struct in_msource *)ims; 1716 if (lims->imsl_st[0] == MCAST_UNDEFINED || 1717 lims->imsl_st[0] != imf->imf_st[0]) 1718 continue; 1719 ++ncsrcs; 1720 if (tss != NULL && nsrcs > 0) { 1721 psin = (struct sockaddr_in *)ptss; 1722 psin->sin_family = AF_INET; 1723 psin->sin_len = sizeof(struct sockaddr_in); 1724 psin->sin_addr.s_addr = htonl(lims->ims_haddr); 1725 psin->sin_port = 0; 1726 ++ptss; 1727 --nsrcs; 1728 } 1729 } 1730 1731 INP_WUNLOCK(inp); 1732 1733 if (tss != NULL) { 1734 error = copyout(tss, msfr.msfr_srcs, 1735 sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs); 1736 free(tss, M_TEMP); 1737 if (error) 1738 return (error); 1739 } 1740 1741 msfr.msfr_nsrcs = ncsrcs; 1742 error = sooptcopyout(sopt, &msfr, sizeof(struct __msfilterreq)); 1743 1744 return (error); 1745 } 1746 1747 /* 1748 * Return the IP multicast options in response to user getsockopt(). 1749 */ 1750 int 1751 inp_getmoptions(struct inpcb *inp, struct sockopt *sopt) 1752 { 1753 struct rm_priotracker in_ifa_tracker; 1754 struct ip_mreqn mreqn; 1755 struct ip_moptions *imo; 1756 struct ifnet *ifp; 1757 struct in_ifaddr *ia; 1758 int error, optval; 1759 u_char coptval; 1760 1761 INP_WLOCK(inp); 1762 imo = inp->inp_moptions; 1763 /* 1764 * If socket is neither of type SOCK_RAW or SOCK_DGRAM, 1765 * or is a divert socket, reject it. 1766 */ 1767 if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT || 1768 (inp->inp_socket->so_proto->pr_type != SOCK_RAW && 1769 inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) { 1770 INP_WUNLOCK(inp); 1771 return (EOPNOTSUPP); 1772 } 1773 1774 error = 0; 1775 switch (sopt->sopt_name) { 1776 case IP_MULTICAST_VIF: 1777 if (imo != NULL) 1778 optval = imo->imo_multicast_vif; 1779 else 1780 optval = -1; 1781 INP_WUNLOCK(inp); 1782 error = sooptcopyout(sopt, &optval, sizeof(int)); 1783 break; 1784 1785 case IP_MULTICAST_IF: 1786 memset(&mreqn, 0, sizeof(struct ip_mreqn)); 1787 if (imo != NULL) { 1788 ifp = imo->imo_multicast_ifp; 1789 if (!in_nullhost(imo->imo_multicast_addr)) { 1790 mreqn.imr_address = imo->imo_multicast_addr; 1791 } else if (ifp != NULL) { 1792 mreqn.imr_ifindex = ifp->if_index; 1793 IFP_TO_IA(ifp, ia, &in_ifa_tracker); 1794 if (ia != NULL) { 1795 mreqn.imr_address = 1796 IA_SIN(ia)->sin_addr; 1797 ifa_free(&ia->ia_ifa); 1798 } 1799 } 1800 } 1801 INP_WUNLOCK(inp); 1802 if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) { 1803 error = sooptcopyout(sopt, &mreqn, 1804 sizeof(struct ip_mreqn)); 1805 } else { 1806 error = sooptcopyout(sopt, &mreqn.imr_address, 1807 sizeof(struct in_addr)); 1808 } 1809 break; 1810 1811 case IP_MULTICAST_TTL: 1812 if (imo == NULL) 1813 optval = coptval = IP_DEFAULT_MULTICAST_TTL; 1814 else 1815 optval = coptval = imo->imo_multicast_ttl; 1816 INP_WUNLOCK(inp); 1817 if (sopt->sopt_valsize == sizeof(u_char)) 1818 error = sooptcopyout(sopt, &coptval, sizeof(u_char)); 1819 else 1820 error = sooptcopyout(sopt, &optval, sizeof(int)); 1821 break; 1822 1823 case IP_MULTICAST_LOOP: 1824 if (imo == NULL) 1825 optval = coptval = IP_DEFAULT_MULTICAST_LOOP; 1826 else 1827 optval = coptval = imo->imo_multicast_loop; 1828 INP_WUNLOCK(inp); 1829 if (sopt->sopt_valsize == sizeof(u_char)) 1830 error = sooptcopyout(sopt, &coptval, sizeof(u_char)); 1831 else 1832 error = sooptcopyout(sopt, &optval, sizeof(int)); 1833 break; 1834 1835 case IP_MSFILTER: 1836 if (imo == NULL) { 1837 error = EADDRNOTAVAIL; 1838 INP_WUNLOCK(inp); 1839 } else { 1840 error = inp_get_source_filters(inp, sopt); 1841 } 1842 break; 1843 1844 default: 1845 INP_WUNLOCK(inp); 1846 error = ENOPROTOOPT; 1847 break; 1848 } 1849 1850 INP_UNLOCK_ASSERT(inp); 1851 1852 return (error); 1853 } 1854 1855 /* 1856 * Look up the ifnet to use for a multicast group membership, 1857 * given the IPv4 address of an interface, and the IPv4 group address. 1858 * 1859 * This routine exists to support legacy multicast applications 1860 * which do not understand that multicast memberships are scoped to 1861 * specific physical links in the networking stack, or which need 1862 * to join link-scope groups before IPv4 addresses are configured. 1863 * 1864 * If inp is non-NULL, use this socket's current FIB number for any 1865 * required FIB lookup. 1866 * If ina is INADDR_ANY, look up the group address in the unicast FIB, 1867 * and use its ifp; usually, this points to the default next-hop. 1868 * 1869 * If the FIB lookup fails, attempt to use the first non-loopback 1870 * interface with multicast capability in the system as a 1871 * last resort. The legacy IPv4 ASM API requires that we do 1872 * this in order to allow groups to be joined when the routing 1873 * table has not yet been populated during boot. 1874 * 1875 * Returns NULL if no ifp could be found. 1876 * 1877 * SMPng: TODO: Acquire the appropriate locks for INADDR_TO_IFP. 1878 * FUTURE: Implement IPv4 source-address selection. 1879 */ 1880 static struct ifnet * 1881 inp_lookup_mcast_ifp(const struct inpcb *inp, 1882 const struct sockaddr_in *gsin, const struct in_addr ina) 1883 { 1884 struct rm_priotracker in_ifa_tracker; 1885 struct ifnet *ifp; 1886 struct nhop4_basic nh4; 1887 uint32_t fibnum; 1888 1889 KASSERT(gsin->sin_family == AF_INET, ("%s: not AF_INET", __func__)); 1890 KASSERT(IN_MULTICAST(ntohl(gsin->sin_addr.s_addr)), 1891 ("%s: not multicast", __func__)); 1892 1893 ifp = NULL; 1894 if (!in_nullhost(ina)) { 1895 INADDR_TO_IFP(ina, ifp); 1896 } else { 1897 fibnum = inp ? inp->inp_inc.inc_fibnum : 0; 1898 if (fib4_lookup_nh_basic(fibnum, gsin->sin_addr, 0, 0, &nh4)==0) 1899 ifp = nh4.nh_ifp; 1900 else { 1901 struct in_ifaddr *ia; 1902 struct ifnet *mifp; 1903 1904 mifp = NULL; 1905 IN_IFADDR_RLOCK(&in_ifa_tracker); 1906 TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { 1907 mifp = ia->ia_ifp; 1908 if (!(mifp->if_flags & IFF_LOOPBACK) && 1909 (mifp->if_flags & IFF_MULTICAST)) { 1910 ifp = mifp; 1911 break; 1912 } 1913 } 1914 IN_IFADDR_RUNLOCK(&in_ifa_tracker); 1915 } 1916 } 1917 1918 return (ifp); 1919 } 1920 1921 /* 1922 * Join an IPv4 multicast group, possibly with a source. 1923 */ 1924 static int 1925 inp_join_group(struct inpcb *inp, struct sockopt *sopt) 1926 { 1927 struct group_source_req gsr; 1928 sockunion_t *gsa, *ssa; 1929 struct ifnet *ifp; 1930 struct in_mfilter *imf; 1931 struct ip_moptions *imo; 1932 struct in_multi *inm; 1933 struct in_msource *lims; 1934 size_t idx; 1935 int error, is_new; 1936 1937 ifp = NULL; 1938 imf = NULL; 1939 lims = NULL; 1940 error = 0; 1941 is_new = 0; 1942 1943 memset(&gsr, 0, sizeof(struct group_source_req)); 1944 gsa = (sockunion_t *)&gsr.gsr_group; 1945 gsa->ss.ss_family = AF_UNSPEC; 1946 ssa = (sockunion_t *)&gsr.gsr_source; 1947 ssa->ss.ss_family = AF_UNSPEC; 1948 1949 switch (sopt->sopt_name) { 1950 case IP_ADD_MEMBERSHIP: 1951 case IP_ADD_SOURCE_MEMBERSHIP: { 1952 struct ip_mreq_source mreqs; 1953 1954 if (sopt->sopt_name == IP_ADD_MEMBERSHIP) { 1955 error = sooptcopyin(sopt, &mreqs, 1956 sizeof(struct ip_mreq), 1957 sizeof(struct ip_mreq)); 1958 /* 1959 * Do argument switcharoo from ip_mreq into 1960 * ip_mreq_source to avoid using two instances. 1961 */ 1962 mreqs.imr_interface = mreqs.imr_sourceaddr; 1963 mreqs.imr_sourceaddr.s_addr = INADDR_ANY; 1964 } else if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) { 1965 error = sooptcopyin(sopt, &mreqs, 1966 sizeof(struct ip_mreq_source), 1967 sizeof(struct ip_mreq_source)); 1968 } 1969 if (error) 1970 return (error); 1971 1972 gsa->sin.sin_family = AF_INET; 1973 gsa->sin.sin_len = sizeof(struct sockaddr_in); 1974 gsa->sin.sin_addr = mreqs.imr_multiaddr; 1975 1976 if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) { 1977 ssa->sin.sin_family = AF_INET; 1978 ssa->sin.sin_len = sizeof(struct sockaddr_in); 1979 ssa->sin.sin_addr = mreqs.imr_sourceaddr; 1980 } 1981 1982 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 1983 return (EINVAL); 1984 1985 ifp = inp_lookup_mcast_ifp(inp, &gsa->sin, 1986 mreqs.imr_interface); 1987 CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p", 1988 __func__, ntohl(mreqs.imr_interface.s_addr), ifp); 1989 break; 1990 } 1991 1992 case MCAST_JOIN_GROUP: 1993 case MCAST_JOIN_SOURCE_GROUP: 1994 if (sopt->sopt_name == MCAST_JOIN_GROUP) { 1995 error = sooptcopyin(sopt, &gsr, 1996 sizeof(struct group_req), 1997 sizeof(struct group_req)); 1998 } else if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) { 1999 error = sooptcopyin(sopt, &gsr, 2000 sizeof(struct group_source_req), 2001 sizeof(struct group_source_req)); 2002 } 2003 if (error) 2004 return (error); 2005 2006 if (gsa->sin.sin_family != AF_INET || 2007 gsa->sin.sin_len != sizeof(struct sockaddr_in)) 2008 return (EINVAL); 2009 2010 /* 2011 * Overwrite the port field if present, as the sockaddr 2012 * being copied in may be matched with a binary comparison. 2013 */ 2014 gsa->sin.sin_port = 0; 2015 if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) { 2016 if (ssa->sin.sin_family != AF_INET || 2017 ssa->sin.sin_len != sizeof(struct sockaddr_in)) 2018 return (EINVAL); 2019 ssa->sin.sin_port = 0; 2020 } 2021 2022 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2023 return (EINVAL); 2024 2025 if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface) 2026 return (EADDRNOTAVAIL); 2027 ifp = ifnet_byindex(gsr.gsr_interface); 2028 break; 2029 2030 default: 2031 CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d", 2032 __func__, sopt->sopt_name); 2033 return (EOPNOTSUPP); 2034 break; 2035 } 2036 2037 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) 2038 return (EADDRNOTAVAIL); 2039 2040 imo = inp_findmoptions(inp); 2041 idx = imo_match_group(imo, ifp, &gsa->sa); 2042 if (idx == -1) { 2043 is_new = 1; 2044 } else { 2045 inm = imo->imo_membership[idx]; 2046 imf = &imo->imo_mfilters[idx]; 2047 if (ssa->ss.ss_family != AF_UNSPEC) { 2048 /* 2049 * MCAST_JOIN_SOURCE_GROUP on an exclusive membership 2050 * is an error. On an existing inclusive membership, 2051 * it just adds the source to the filter list. 2052 */ 2053 if (imf->imf_st[1] != MCAST_INCLUDE) { 2054 error = EINVAL; 2055 goto out_inp_locked; 2056 } 2057 /* 2058 * Throw out duplicates. 2059 * 2060 * XXX FIXME: This makes a naive assumption that 2061 * even if entries exist for *ssa in this imf, 2062 * they will be rejected as dupes, even if they 2063 * are not valid in the current mode (in-mode). 2064 * 2065 * in_msource is transactioned just as for anything 2066 * else in SSM -- but note naive use of inm_graft() 2067 * below for allocating new filter entries. 2068 * 2069 * This is only an issue if someone mixes the 2070 * full-state SSM API with the delta-based API, 2071 * which is discouraged in the relevant RFCs. 2072 */ 2073 lims = imo_match_source(imo, idx, &ssa->sa); 2074 if (lims != NULL /*&& 2075 lims->imsl_st[1] == MCAST_INCLUDE*/) { 2076 error = EADDRNOTAVAIL; 2077 goto out_inp_locked; 2078 } 2079 } else { 2080 /* 2081 * MCAST_JOIN_GROUP on an existing exclusive 2082 * membership is an error; return EADDRINUSE 2083 * to preserve 4.4BSD API idempotence, and 2084 * avoid tedious detour to code below. 2085 * NOTE: This is bending RFC 3678 a bit. 2086 * 2087 * On an existing inclusive membership, this is also 2088 * an error; if you want to change filter mode, 2089 * you must use the userland API setsourcefilter(). 2090 * XXX We don't reject this for imf in UNDEFINED 2091 * state at t1, because allocation of a filter 2092 * is atomic with allocation of a membership. 2093 */ 2094 error = EINVAL; 2095 if (imf->imf_st[1] == MCAST_EXCLUDE) 2096 error = EADDRINUSE; 2097 goto out_inp_locked; 2098 } 2099 } 2100 2101 /* 2102 * Begin state merge transaction at socket layer. 2103 */ 2104 INP_WLOCK_ASSERT(inp); 2105 2106 if (is_new) { 2107 if (imo->imo_num_memberships == imo->imo_max_memberships) { 2108 error = imo_grow(imo); 2109 if (error) 2110 goto out_inp_locked; 2111 } 2112 /* 2113 * Allocate the new slot upfront so we can deal with 2114 * grafting the new source filter in same code path 2115 * as for join-source on existing membership. 2116 */ 2117 idx = imo->imo_num_memberships; 2118 imo->imo_membership[idx] = NULL; 2119 imo->imo_num_memberships++; 2120 KASSERT(imo->imo_mfilters != NULL, 2121 ("%s: imf_mfilters vector was not allocated", __func__)); 2122 imf = &imo->imo_mfilters[idx]; 2123 KASSERT(RB_EMPTY(&imf->imf_sources), 2124 ("%s: imf_sources not empty", __func__)); 2125 } 2126 2127 /* 2128 * Graft new source into filter list for this inpcb's 2129 * membership of the group. The in_multi may not have 2130 * been allocated yet if this is a new membership, however, 2131 * the in_mfilter slot will be allocated and must be initialized. 2132 * 2133 * Note: Grafting of exclusive mode filters doesn't happen 2134 * in this path. 2135 * XXX: Should check for non-NULL lims (node exists but may 2136 * not be in-mode) for interop with full-state API. 2137 */ 2138 if (ssa->ss.ss_family != AF_UNSPEC) { 2139 /* Membership starts in IN mode */ 2140 if (is_new) { 2141 CTR1(KTR_IGMPV3, "%s: new join w/source", __func__); 2142 imf_init(imf, MCAST_UNDEFINED, MCAST_INCLUDE); 2143 } else { 2144 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow"); 2145 } 2146 lims = imf_graft(imf, MCAST_INCLUDE, &ssa->sin); 2147 if (lims == NULL) { 2148 CTR1(KTR_IGMPV3, "%s: merge imf state failed", 2149 __func__); 2150 error = ENOMEM; 2151 goto out_imo_free; 2152 } 2153 } else { 2154 /* No address specified; Membership starts in EX mode */ 2155 if (is_new) { 2156 CTR1(KTR_IGMPV3, "%s: new join w/o source", __func__); 2157 imf_init(imf, MCAST_UNDEFINED, MCAST_EXCLUDE); 2158 } 2159 } 2160 2161 /* 2162 * Begin state merge transaction at IGMP layer. 2163 */ 2164 IN_MULTI_LOCK(); 2165 2166 if (is_new) { 2167 error = in_joingroup_locked(ifp, &gsa->sin.sin_addr, imf, 2168 &inm); 2169 if (error) { 2170 CTR1(KTR_IGMPV3, "%s: in_joingroup_locked failed", 2171 __func__); 2172 IN_MULTI_UNLOCK(); 2173 goto out_imo_free; 2174 } 2175 imo->imo_membership[idx] = inm; 2176 } else { 2177 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 2178 error = inm_merge(inm, imf); 2179 if (error) { 2180 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", 2181 __func__); 2182 goto out_in_multi_locked; 2183 } 2184 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 2185 error = igmp_change_state(inm); 2186 if (error) { 2187 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", 2188 __func__); 2189 goto out_in_multi_locked; 2190 } 2191 } 2192 2193 out_in_multi_locked: 2194 2195 IN_MULTI_UNLOCK(); 2196 2197 INP_WLOCK_ASSERT(inp); 2198 if (error) { 2199 imf_rollback(imf); 2200 if (is_new) 2201 imf_purge(imf); 2202 else 2203 imf_reap(imf); 2204 } else { 2205 imf_commit(imf); 2206 } 2207 2208 out_imo_free: 2209 if (error && is_new) { 2210 imo->imo_membership[idx] = NULL; 2211 --imo->imo_num_memberships; 2212 } 2213 2214 out_inp_locked: 2215 INP_WUNLOCK(inp); 2216 return (error); 2217 } 2218 2219 /* 2220 * Leave an IPv4 multicast group on an inpcb, possibly with a source. 2221 */ 2222 static int 2223 inp_leave_group(struct inpcb *inp, struct sockopt *sopt) 2224 { 2225 struct group_source_req gsr; 2226 struct ip_mreq_source mreqs; 2227 sockunion_t *gsa, *ssa; 2228 struct ifnet *ifp; 2229 struct in_mfilter *imf; 2230 struct ip_moptions *imo; 2231 struct in_msource *ims; 2232 struct in_multi *inm; 2233 size_t idx; 2234 int error, is_final; 2235 2236 ifp = NULL; 2237 error = 0; 2238 is_final = 1; 2239 2240 memset(&gsr, 0, sizeof(struct group_source_req)); 2241 gsa = (sockunion_t *)&gsr.gsr_group; 2242 gsa->ss.ss_family = AF_UNSPEC; 2243 ssa = (sockunion_t *)&gsr.gsr_source; 2244 ssa->ss.ss_family = AF_UNSPEC; 2245 2246 switch (sopt->sopt_name) { 2247 case IP_DROP_MEMBERSHIP: 2248 case IP_DROP_SOURCE_MEMBERSHIP: 2249 if (sopt->sopt_name == IP_DROP_MEMBERSHIP) { 2250 error = sooptcopyin(sopt, &mreqs, 2251 sizeof(struct ip_mreq), 2252 sizeof(struct ip_mreq)); 2253 /* 2254 * Swap interface and sourceaddr arguments, 2255 * as ip_mreq and ip_mreq_source are laid 2256 * out differently. 2257 */ 2258 mreqs.imr_interface = mreqs.imr_sourceaddr; 2259 mreqs.imr_sourceaddr.s_addr = INADDR_ANY; 2260 } else if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) { 2261 error = sooptcopyin(sopt, &mreqs, 2262 sizeof(struct ip_mreq_source), 2263 sizeof(struct ip_mreq_source)); 2264 } 2265 if (error) 2266 return (error); 2267 2268 gsa->sin.sin_family = AF_INET; 2269 gsa->sin.sin_len = sizeof(struct sockaddr_in); 2270 gsa->sin.sin_addr = mreqs.imr_multiaddr; 2271 2272 if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) { 2273 ssa->sin.sin_family = AF_INET; 2274 ssa->sin.sin_len = sizeof(struct sockaddr_in); 2275 ssa->sin.sin_addr = mreqs.imr_sourceaddr; 2276 } 2277 2278 /* 2279 * Attempt to look up hinted ifp from interface address. 2280 * Fallthrough with null ifp iff lookup fails, to 2281 * preserve 4.4BSD mcast API idempotence. 2282 * XXX NOTE WELL: The RFC 3678 API is preferred because 2283 * using an IPv4 address as a key is racy. 2284 */ 2285 if (!in_nullhost(mreqs.imr_interface)) 2286 INADDR_TO_IFP(mreqs.imr_interface, ifp); 2287 2288 CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p", 2289 __func__, ntohl(mreqs.imr_interface.s_addr), ifp); 2290 2291 break; 2292 2293 case MCAST_LEAVE_GROUP: 2294 case MCAST_LEAVE_SOURCE_GROUP: 2295 if (sopt->sopt_name == MCAST_LEAVE_GROUP) { 2296 error = sooptcopyin(sopt, &gsr, 2297 sizeof(struct group_req), 2298 sizeof(struct group_req)); 2299 } else if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) { 2300 error = sooptcopyin(sopt, &gsr, 2301 sizeof(struct group_source_req), 2302 sizeof(struct group_source_req)); 2303 } 2304 if (error) 2305 return (error); 2306 2307 if (gsa->sin.sin_family != AF_INET || 2308 gsa->sin.sin_len != sizeof(struct sockaddr_in)) 2309 return (EINVAL); 2310 2311 if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) { 2312 if (ssa->sin.sin_family != AF_INET || 2313 ssa->sin.sin_len != sizeof(struct sockaddr_in)) 2314 return (EINVAL); 2315 } 2316 2317 if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface) 2318 return (EADDRNOTAVAIL); 2319 2320 ifp = ifnet_byindex(gsr.gsr_interface); 2321 2322 if (ifp == NULL) 2323 return (EADDRNOTAVAIL); 2324 break; 2325 2326 default: 2327 CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d", 2328 __func__, sopt->sopt_name); 2329 return (EOPNOTSUPP); 2330 break; 2331 } 2332 2333 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2334 return (EINVAL); 2335 2336 /* 2337 * Find the membership in the membership array. 2338 */ 2339 imo = inp_findmoptions(inp); 2340 idx = imo_match_group(imo, ifp, &gsa->sa); 2341 if (idx == -1) { 2342 error = EADDRNOTAVAIL; 2343 goto out_inp_locked; 2344 } 2345 inm = imo->imo_membership[idx]; 2346 imf = &imo->imo_mfilters[idx]; 2347 2348 if (ssa->ss.ss_family != AF_UNSPEC) 2349 is_final = 0; 2350 2351 /* 2352 * Begin state merge transaction at socket layer. 2353 */ 2354 INP_WLOCK_ASSERT(inp); 2355 2356 /* 2357 * If we were instructed only to leave a given source, do so. 2358 * MCAST_LEAVE_SOURCE_GROUP is only valid for inclusive memberships. 2359 */ 2360 if (is_final) { 2361 imf_leave(imf); 2362 } else { 2363 if (imf->imf_st[0] == MCAST_EXCLUDE) { 2364 error = EADDRNOTAVAIL; 2365 goto out_inp_locked; 2366 } 2367 ims = imo_match_source(imo, idx, &ssa->sa); 2368 if (ims == NULL) { 2369 CTR3(KTR_IGMPV3, "%s: source 0x%08x %spresent", 2370 __func__, ntohl(ssa->sin.sin_addr.s_addr), "not "); 2371 error = EADDRNOTAVAIL; 2372 goto out_inp_locked; 2373 } 2374 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block"); 2375 error = imf_prune(imf, &ssa->sin); 2376 if (error) { 2377 CTR1(KTR_IGMPV3, "%s: merge imf state failed", 2378 __func__); 2379 goto out_inp_locked; 2380 } 2381 } 2382 2383 /* 2384 * Begin state merge transaction at IGMP layer. 2385 */ 2386 IN_MULTI_LOCK(); 2387 2388 if (is_final) { 2389 /* 2390 * Give up the multicast address record to which 2391 * the membership points. 2392 */ 2393 (void)in_leavegroup_locked(inm, imf); 2394 } else { 2395 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 2396 error = inm_merge(inm, imf); 2397 if (error) { 2398 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", 2399 __func__); 2400 goto out_in_multi_locked; 2401 } 2402 2403 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 2404 error = igmp_change_state(inm); 2405 if (error) { 2406 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", 2407 __func__); 2408 } 2409 } 2410 2411 out_in_multi_locked: 2412 2413 IN_MULTI_UNLOCK(); 2414 2415 if (error) 2416 imf_rollback(imf); 2417 else 2418 imf_commit(imf); 2419 2420 imf_reap(imf); 2421 2422 if (is_final) { 2423 /* Remove the gap in the membership and filter array. */ 2424 for (++idx; idx < imo->imo_num_memberships; ++idx) { 2425 imo->imo_membership[idx-1] = imo->imo_membership[idx]; 2426 imo->imo_mfilters[idx-1] = imo->imo_mfilters[idx]; 2427 } 2428 imo->imo_num_memberships--; 2429 } 2430 2431 out_inp_locked: 2432 INP_WUNLOCK(inp); 2433 return (error); 2434 } 2435 2436 /* 2437 * Select the interface for transmitting IPv4 multicast datagrams. 2438 * 2439 * Either an instance of struct in_addr or an instance of struct ip_mreqn 2440 * may be passed to this socket option. An address of INADDR_ANY or an 2441 * interface index of 0 is used to remove a previous selection. 2442 * When no interface is selected, one is chosen for every send. 2443 */ 2444 static int 2445 inp_set_multicast_if(struct inpcb *inp, struct sockopt *sopt) 2446 { 2447 struct in_addr addr; 2448 struct ip_mreqn mreqn; 2449 struct ifnet *ifp; 2450 struct ip_moptions *imo; 2451 int error; 2452 2453 if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) { 2454 /* 2455 * An interface index was specified using the 2456 * Linux-derived ip_mreqn structure. 2457 */ 2458 error = sooptcopyin(sopt, &mreqn, sizeof(struct ip_mreqn), 2459 sizeof(struct ip_mreqn)); 2460 if (error) 2461 return (error); 2462 2463 if (mreqn.imr_ifindex < 0 || V_if_index < mreqn.imr_ifindex) 2464 return (EINVAL); 2465 2466 if (mreqn.imr_ifindex == 0) { 2467 ifp = NULL; 2468 } else { 2469 ifp = ifnet_byindex(mreqn.imr_ifindex); 2470 if (ifp == NULL) 2471 return (EADDRNOTAVAIL); 2472 } 2473 } else { 2474 /* 2475 * An interface was specified by IPv4 address. 2476 * This is the traditional BSD usage. 2477 */ 2478 error = sooptcopyin(sopt, &addr, sizeof(struct in_addr), 2479 sizeof(struct in_addr)); 2480 if (error) 2481 return (error); 2482 if (in_nullhost(addr)) { 2483 ifp = NULL; 2484 } else { 2485 INADDR_TO_IFP(addr, ifp); 2486 if (ifp == NULL) 2487 return (EADDRNOTAVAIL); 2488 } 2489 CTR3(KTR_IGMPV3, "%s: ifp = %p, addr = 0x%08x", __func__, ifp, 2490 ntohl(addr.s_addr)); 2491 } 2492 2493 /* Reject interfaces which do not support multicast. */ 2494 if (ifp != NULL && (ifp->if_flags & IFF_MULTICAST) == 0) 2495 return (EOPNOTSUPP); 2496 2497 imo = inp_findmoptions(inp); 2498 imo->imo_multicast_ifp = ifp; 2499 imo->imo_multicast_addr.s_addr = INADDR_ANY; 2500 INP_WUNLOCK(inp); 2501 2502 return (0); 2503 } 2504 2505 /* 2506 * Atomically set source filters on a socket for an IPv4 multicast group. 2507 * 2508 * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held. 2509 */ 2510 static int 2511 inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt) 2512 { 2513 struct __msfilterreq msfr; 2514 sockunion_t *gsa; 2515 struct ifnet *ifp; 2516 struct in_mfilter *imf; 2517 struct ip_moptions *imo; 2518 struct in_multi *inm; 2519 size_t idx; 2520 int error; 2521 2522 error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq), 2523 sizeof(struct __msfilterreq)); 2524 if (error) 2525 return (error); 2526 2527 if (msfr.msfr_nsrcs > in_mcast_maxsocksrc) 2528 return (ENOBUFS); 2529 2530 if ((msfr.msfr_fmode != MCAST_EXCLUDE && 2531 msfr.msfr_fmode != MCAST_INCLUDE)) 2532 return (EINVAL); 2533 2534 if (msfr.msfr_group.ss_family != AF_INET || 2535 msfr.msfr_group.ss_len != sizeof(struct sockaddr_in)) 2536 return (EINVAL); 2537 2538 gsa = (sockunion_t *)&msfr.msfr_group; 2539 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2540 return (EINVAL); 2541 2542 gsa->sin.sin_port = 0; /* ignore port */ 2543 2544 if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex) 2545 return (EADDRNOTAVAIL); 2546 2547 ifp = ifnet_byindex(msfr.msfr_ifindex); 2548 if (ifp == NULL) 2549 return (EADDRNOTAVAIL); 2550 2551 /* 2552 * Take the INP write lock. 2553 * Check if this socket is a member of this group. 2554 */ 2555 imo = inp_findmoptions(inp); 2556 idx = imo_match_group(imo, ifp, &gsa->sa); 2557 if (idx == -1 || imo->imo_mfilters == NULL) { 2558 error = EADDRNOTAVAIL; 2559 goto out_inp_locked; 2560 } 2561 inm = imo->imo_membership[idx]; 2562 imf = &imo->imo_mfilters[idx]; 2563 2564 /* 2565 * Begin state merge transaction at socket layer. 2566 */ 2567 INP_WLOCK_ASSERT(inp); 2568 2569 imf->imf_st[1] = msfr.msfr_fmode; 2570 2571 /* 2572 * Apply any new source filters, if present. 2573 * Make a copy of the user-space source vector so 2574 * that we may copy them with a single copyin. This 2575 * allows us to deal with page faults up-front. 2576 */ 2577 if (msfr.msfr_nsrcs > 0) { 2578 struct in_msource *lims; 2579 struct sockaddr_in *psin; 2580 struct sockaddr_storage *kss, *pkss; 2581 int i; 2582 2583 INP_WUNLOCK(inp); 2584 2585 CTR2(KTR_IGMPV3, "%s: loading %lu source list entries", 2586 __func__, (unsigned long)msfr.msfr_nsrcs); 2587 kss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs, 2588 M_TEMP, M_WAITOK); 2589 error = copyin(msfr.msfr_srcs, kss, 2590 sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs); 2591 if (error) { 2592 free(kss, M_TEMP); 2593 return (error); 2594 } 2595 2596 INP_WLOCK(inp); 2597 2598 /* 2599 * Mark all source filters as UNDEFINED at t1. 2600 * Restore new group filter mode, as imf_leave() 2601 * will set it to INCLUDE. 2602 */ 2603 imf_leave(imf); 2604 imf->imf_st[1] = msfr.msfr_fmode; 2605 2606 /* 2607 * Update socket layer filters at t1, lazy-allocating 2608 * new entries. This saves a bunch of memory at the 2609 * cost of one RB_FIND() per source entry; duplicate 2610 * entries in the msfr_nsrcs vector are ignored. 2611 * If we encounter an error, rollback transaction. 2612 * 2613 * XXX This too could be replaced with a set-symmetric 2614 * difference like loop to avoid walking from root 2615 * every time, as the key space is common. 2616 */ 2617 for (i = 0, pkss = kss; i < msfr.msfr_nsrcs; i++, pkss++) { 2618 psin = (struct sockaddr_in *)pkss; 2619 if (psin->sin_family != AF_INET) { 2620 error = EAFNOSUPPORT; 2621 break; 2622 } 2623 if (psin->sin_len != sizeof(struct sockaddr_in)) { 2624 error = EINVAL; 2625 break; 2626 } 2627 error = imf_get_source(imf, psin, &lims); 2628 if (error) 2629 break; 2630 lims->imsl_st[1] = imf->imf_st[1]; 2631 } 2632 free(kss, M_TEMP); 2633 } 2634 2635 if (error) 2636 goto out_imf_rollback; 2637 2638 INP_WLOCK_ASSERT(inp); 2639 IN_MULTI_LOCK(); 2640 2641 /* 2642 * Begin state merge transaction at IGMP layer. 2643 */ 2644 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 2645 error = inm_merge(inm, imf); 2646 if (error) { 2647 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__); 2648 goto out_in_multi_locked; 2649 } 2650 2651 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 2652 error = igmp_change_state(inm); 2653 if (error) 2654 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__); 2655 2656 out_in_multi_locked: 2657 2658 IN_MULTI_UNLOCK(); 2659 2660 out_imf_rollback: 2661 if (error) 2662 imf_rollback(imf); 2663 else 2664 imf_commit(imf); 2665 2666 imf_reap(imf); 2667 2668 out_inp_locked: 2669 INP_WUNLOCK(inp); 2670 return (error); 2671 } 2672 2673 /* 2674 * Set the IP multicast options in response to user setsockopt(). 2675 * 2676 * Many of the socket options handled in this function duplicate the 2677 * functionality of socket options in the regular unicast API. However, 2678 * it is not possible to merge the duplicate code, because the idempotence 2679 * of the IPv4 multicast part of the BSD Sockets API must be preserved; 2680 * the effects of these options must be treated as separate and distinct. 2681 * 2682 * SMPng: XXX: Unlocked read of inp_socket believed OK. 2683 * FUTURE: The IP_MULTICAST_VIF option may be eliminated if MROUTING 2684 * is refactored to no longer use vifs. 2685 */ 2686 int 2687 inp_setmoptions(struct inpcb *inp, struct sockopt *sopt) 2688 { 2689 struct ip_moptions *imo; 2690 int error; 2691 2692 error = 0; 2693 2694 /* 2695 * If socket is neither of type SOCK_RAW or SOCK_DGRAM, 2696 * or is a divert socket, reject it. 2697 */ 2698 if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT || 2699 (inp->inp_socket->so_proto->pr_type != SOCK_RAW && 2700 inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) 2701 return (EOPNOTSUPP); 2702 2703 switch (sopt->sopt_name) { 2704 case IP_MULTICAST_VIF: { 2705 int vifi; 2706 /* 2707 * Select a multicast VIF for transmission. 2708 * Only useful if multicast forwarding is active. 2709 */ 2710 if (legal_vif_num == NULL) { 2711 error = EOPNOTSUPP; 2712 break; 2713 } 2714 error = sooptcopyin(sopt, &vifi, sizeof(int), sizeof(int)); 2715 if (error) 2716 break; 2717 if (!legal_vif_num(vifi) && (vifi != -1)) { 2718 error = EINVAL; 2719 break; 2720 } 2721 imo = inp_findmoptions(inp); 2722 imo->imo_multicast_vif = vifi; 2723 INP_WUNLOCK(inp); 2724 break; 2725 } 2726 2727 case IP_MULTICAST_IF: 2728 error = inp_set_multicast_if(inp, sopt); 2729 break; 2730 2731 case IP_MULTICAST_TTL: { 2732 u_char ttl; 2733 2734 /* 2735 * Set the IP time-to-live for outgoing multicast packets. 2736 * The original multicast API required a char argument, 2737 * which is inconsistent with the rest of the socket API. 2738 * We allow either a char or an int. 2739 */ 2740 if (sopt->sopt_valsize == sizeof(u_char)) { 2741 error = sooptcopyin(sopt, &ttl, sizeof(u_char), 2742 sizeof(u_char)); 2743 if (error) 2744 break; 2745 } else { 2746 u_int ittl; 2747 2748 error = sooptcopyin(sopt, &ittl, sizeof(u_int), 2749 sizeof(u_int)); 2750 if (error) 2751 break; 2752 if (ittl > 255) { 2753 error = EINVAL; 2754 break; 2755 } 2756 ttl = (u_char)ittl; 2757 } 2758 imo = inp_findmoptions(inp); 2759 imo->imo_multicast_ttl = ttl; 2760 INP_WUNLOCK(inp); 2761 break; 2762 } 2763 2764 case IP_MULTICAST_LOOP: { 2765 u_char loop; 2766 2767 /* 2768 * Set the loopback flag for outgoing multicast packets. 2769 * Must be zero or one. The original multicast API required a 2770 * char argument, which is inconsistent with the rest 2771 * of the socket API. We allow either a char or an int. 2772 */ 2773 if (sopt->sopt_valsize == sizeof(u_char)) { 2774 error = sooptcopyin(sopt, &loop, sizeof(u_char), 2775 sizeof(u_char)); 2776 if (error) 2777 break; 2778 } else { 2779 u_int iloop; 2780 2781 error = sooptcopyin(sopt, &iloop, sizeof(u_int), 2782 sizeof(u_int)); 2783 if (error) 2784 break; 2785 loop = (u_char)iloop; 2786 } 2787 imo = inp_findmoptions(inp); 2788 imo->imo_multicast_loop = !!loop; 2789 INP_WUNLOCK(inp); 2790 break; 2791 } 2792 2793 case IP_ADD_MEMBERSHIP: 2794 case IP_ADD_SOURCE_MEMBERSHIP: 2795 case MCAST_JOIN_GROUP: 2796 case MCAST_JOIN_SOURCE_GROUP: 2797 error = inp_join_group(inp, sopt); 2798 break; 2799 2800 case IP_DROP_MEMBERSHIP: 2801 case IP_DROP_SOURCE_MEMBERSHIP: 2802 case MCAST_LEAVE_GROUP: 2803 case MCAST_LEAVE_SOURCE_GROUP: 2804 error = inp_leave_group(inp, sopt); 2805 break; 2806 2807 case IP_BLOCK_SOURCE: 2808 case IP_UNBLOCK_SOURCE: 2809 case MCAST_BLOCK_SOURCE: 2810 case MCAST_UNBLOCK_SOURCE: 2811 error = inp_block_unblock_source(inp, sopt); 2812 break; 2813 2814 case IP_MSFILTER: 2815 error = inp_set_source_filters(inp, sopt); 2816 break; 2817 2818 default: 2819 error = EOPNOTSUPP; 2820 break; 2821 } 2822 2823 INP_UNLOCK_ASSERT(inp); 2824 2825 return (error); 2826 } 2827 2828 /* 2829 * Expose IGMP's multicast filter mode and source list(s) to userland, 2830 * keyed by (ifindex, group). 2831 * The filter mode is written out as a uint32_t, followed by 2832 * 0..n of struct in_addr. 2833 * For use by ifmcstat(8). 2834 * SMPng: NOTE: unlocked read of ifindex space. 2835 */ 2836 static int 2837 sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS) 2838 { 2839 struct in_addr src, group; 2840 struct ifnet *ifp; 2841 struct ifmultiaddr *ifma; 2842 struct in_multi *inm; 2843 struct ip_msource *ims; 2844 int *name; 2845 int retval; 2846 u_int namelen; 2847 uint32_t fmode, ifindex; 2848 2849 name = (int *)arg1; 2850 namelen = arg2; 2851 2852 if (req->newptr != NULL) 2853 return (EPERM); 2854 2855 if (namelen != 2) 2856 return (EINVAL); 2857 2858 ifindex = name[0]; 2859 if (ifindex <= 0 || ifindex > V_if_index) { 2860 CTR2(KTR_IGMPV3, "%s: ifindex %u out of range", 2861 __func__, ifindex); 2862 return (ENOENT); 2863 } 2864 2865 group.s_addr = name[1]; 2866 if (!IN_MULTICAST(ntohl(group.s_addr))) { 2867 CTR2(KTR_IGMPV3, "%s: group 0x%08x is not multicast", 2868 __func__, ntohl(group.s_addr)); 2869 return (EINVAL); 2870 } 2871 2872 ifp = ifnet_byindex(ifindex); 2873 if (ifp == NULL) { 2874 CTR2(KTR_IGMPV3, "%s: no ifp for ifindex %u", 2875 __func__, ifindex); 2876 return (ENOENT); 2877 } 2878 2879 retval = sysctl_wire_old_buffer(req, 2880 sizeof(uint32_t) + (in_mcast_maxgrpsrc * sizeof(struct in_addr))); 2881 if (retval) 2882 return (retval); 2883 2884 IN_MULTI_LOCK(); 2885 2886 IF_ADDR_RLOCK(ifp); 2887 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 2888 if (ifma->ifma_addr->sa_family != AF_INET || 2889 ifma->ifma_protospec == NULL) 2890 continue; 2891 inm = (struct in_multi *)ifma->ifma_protospec; 2892 if (!in_hosteq(inm->inm_addr, group)) 2893 continue; 2894 fmode = inm->inm_st[1].iss_fmode; 2895 retval = SYSCTL_OUT(req, &fmode, sizeof(uint32_t)); 2896 if (retval != 0) 2897 break; 2898 RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) { 2899 CTR2(KTR_IGMPV3, "%s: visit node 0x%08x", __func__, 2900 ims->ims_haddr); 2901 /* 2902 * Only copy-out sources which are in-mode. 2903 */ 2904 if (fmode != ims_get_mode(inm, ims, 1)) { 2905 CTR1(KTR_IGMPV3, "%s: skip non-in-mode", 2906 __func__); 2907 continue; 2908 } 2909 src.s_addr = htonl(ims->ims_haddr); 2910 retval = SYSCTL_OUT(req, &src, sizeof(struct in_addr)); 2911 if (retval != 0) 2912 break; 2913 } 2914 } 2915 IF_ADDR_RUNLOCK(ifp); 2916 2917 IN_MULTI_UNLOCK(); 2918 2919 return (retval); 2920 } 2921 2922 #if defined(KTR) && (KTR_COMPILE & KTR_IGMPV3) 2923 2924 static const char *inm_modestrs[] = { "un", "in", "ex" }; 2925 2926 static const char * 2927 inm_mode_str(const int mode) 2928 { 2929 2930 if (mode >= MCAST_UNDEFINED && mode <= MCAST_EXCLUDE) 2931 return (inm_modestrs[mode]); 2932 return ("??"); 2933 } 2934 2935 static const char *inm_statestrs[] = { 2936 "not-member", 2937 "silent", 2938 "idle", 2939 "lazy", 2940 "sleeping", 2941 "awakening", 2942 "query-pending", 2943 "sg-query-pending", 2944 "leaving" 2945 }; 2946 2947 static const char * 2948 inm_state_str(const int state) 2949 { 2950 2951 if (state >= IGMP_NOT_MEMBER && state <= IGMP_LEAVING_MEMBER) 2952 return (inm_statestrs[state]); 2953 return ("??"); 2954 } 2955 2956 /* 2957 * Dump an in_multi structure to the console. 2958 */ 2959 void 2960 inm_print(const struct in_multi *inm) 2961 { 2962 int t; 2963 char addrbuf[INET_ADDRSTRLEN]; 2964 2965 if ((ktr_mask & KTR_IGMPV3) == 0) 2966 return; 2967 2968 printf("%s: --- begin inm %p ---\n", __func__, inm); 2969 printf("addr %s ifp %p(%s) ifma %p\n", 2970 inet_ntoa_r(inm->inm_addr, addrbuf), 2971 inm->inm_ifp, 2972 inm->inm_ifp->if_xname, 2973 inm->inm_ifma); 2974 printf("timer %u state %s refcount %u scq.len %u\n", 2975 inm->inm_timer, 2976 inm_state_str(inm->inm_state), 2977 inm->inm_refcount, 2978 inm->inm_scq.mq_len); 2979 printf("igi %p nsrc %lu sctimer %u scrv %u\n", 2980 inm->inm_igi, 2981 inm->inm_nsrc, 2982 inm->inm_sctimer, 2983 inm->inm_scrv); 2984 for (t = 0; t < 2; t++) { 2985 printf("t%d: fmode %s asm %u ex %u in %u rec %u\n", t, 2986 inm_mode_str(inm->inm_st[t].iss_fmode), 2987 inm->inm_st[t].iss_asm, 2988 inm->inm_st[t].iss_ex, 2989 inm->inm_st[t].iss_in, 2990 inm->inm_st[t].iss_rec); 2991 } 2992 printf("%s: --- end inm %p ---\n", __func__, inm); 2993 } 2994 2995 #else /* !KTR || !(KTR_COMPILE & KTR_IGMPV3) */ 2996 2997 void 2998 inm_print(const struct in_multi *inm) 2999 { 3000 3001 } 3002 3003 #endif /* KTR && (KTR_COMPILE & KTR_IGMPV3) */ 3004 3005 RB_GENERATE(ip_msource_tree, ip_msource, ims_link, ip_msource_cmp); 3006