1 /*- 2 * Copyright (c) 2007-2009 Bruce Simpson. 3 * Copyright (c) 2005 Robert N. M. Watson. 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote 15 * products derived from this software without specific prior written 16 * permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 /* 32 * IPv4 multicast socket, group, and socket option processing module. 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/kernel.h> 41 #include <sys/malloc.h> 42 #include <sys/mbuf.h> 43 #include <sys/protosw.h> 44 #include <sys/socket.h> 45 #include <sys/socketvar.h> 46 #include <sys/protosw.h> 47 #include <sys/sysctl.h> 48 #include <sys/ktr.h> 49 #include <sys/taskqueue.h> 50 #include <sys/tree.h> 51 52 #include <net/if.h> 53 #include <net/if_var.h> 54 #include <net/if_dl.h> 55 #include <net/route.h> 56 #include <net/vnet.h> 57 58 #include <netinet/in.h> 59 #include <netinet/in_systm.h> 60 #include <netinet/in_pcb.h> 61 #include <netinet/in_var.h> 62 #include <netinet/ip_var.h> 63 #include <netinet/igmp_var.h> 64 65 #ifndef KTR_IGMPV3 66 #define KTR_IGMPV3 KTR_INET 67 #endif 68 69 #ifndef __SOCKUNION_DECLARED 70 union sockunion { 71 struct sockaddr_storage ss; 72 struct sockaddr sa; 73 struct sockaddr_dl sdl; 74 struct sockaddr_in sin; 75 }; 76 typedef union sockunion sockunion_t; 77 #define __SOCKUNION_DECLARED 78 #endif /* __SOCKUNION_DECLARED */ 79 80 static MALLOC_DEFINE(M_INMFILTER, "in_mfilter", 81 "IPv4 multicast PCB-layer source filter"); 82 static MALLOC_DEFINE(M_IPMADDR, "in_multi", "IPv4 multicast group"); 83 static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "IPv4 multicast options"); 84 static MALLOC_DEFINE(M_IPMSOURCE, "ip_msource", 85 "IPv4 multicast IGMP-layer source filter"); 86 87 /* 88 * Locking: 89 * - Lock order is: Giant, INP_WLOCK, IN_MULTI_LOCK, IGMP_LOCK, IF_ADDR_LOCK. 90 * - The IF_ADDR_LOCK is implicitly taken by inm_lookup() earlier, however 91 * it can be taken by code in net/if.c also. 92 * - ip_moptions and in_mfilter are covered by the INP_WLOCK. 93 * 94 * struct in_multi is covered by IN_MULTI_LOCK. There isn't strictly 95 * any need for in_multi itself to be virtualized -- it is bound to an ifp 96 * anyway no matter what happens. 97 */ 98 struct mtx in_multi_mtx; 99 MTX_SYSINIT(in_multi_mtx, &in_multi_mtx, "in_multi_mtx", MTX_DEF); 100 101 /* 102 * Functions with non-static linkage defined in this file should be 103 * declared in in_var.h: 104 * imo_multi_filter() 105 * in_addmulti() 106 * in_delmulti() 107 * in_joingroup() 108 * in_joingroup_locked() 109 * in_leavegroup() 110 * in_leavegroup_locked() 111 * and ip_var.h: 112 * inp_freemoptions() 113 * inp_getmoptions() 114 * inp_setmoptions() 115 * 116 * XXX: Both carp and pf need to use the legacy (*,G) KPIs in_addmulti() 117 * and in_delmulti(). 118 */ 119 static void imf_commit(struct in_mfilter *); 120 static int imf_get_source(struct in_mfilter *imf, 121 const struct sockaddr_in *psin, 122 struct in_msource **); 123 static struct in_msource * 124 imf_graft(struct in_mfilter *, const uint8_t, 125 const struct sockaddr_in *); 126 static void imf_leave(struct in_mfilter *); 127 static int imf_prune(struct in_mfilter *, const struct sockaddr_in *); 128 static void imf_purge(struct in_mfilter *); 129 static void imf_rollback(struct in_mfilter *); 130 static void imf_reap(struct in_mfilter *); 131 static int imo_grow(struct ip_moptions *); 132 static size_t imo_match_group(const struct ip_moptions *, 133 const struct ifnet *, const struct sockaddr *); 134 static struct in_msource * 135 imo_match_source(const struct ip_moptions *, const size_t, 136 const struct sockaddr *); 137 static void ims_merge(struct ip_msource *ims, 138 const struct in_msource *lims, const int rollback); 139 static int in_getmulti(struct ifnet *, const struct in_addr *, 140 struct in_multi **); 141 static int inm_get_source(struct in_multi *inm, const in_addr_t haddr, 142 const int noalloc, struct ip_msource **pims); 143 #ifdef KTR 144 static int inm_is_ifp_detached(const struct in_multi *); 145 #endif 146 static int inm_merge(struct in_multi *, /*const*/ struct in_mfilter *); 147 static void inm_purge(struct in_multi *); 148 static void inm_reap(struct in_multi *); 149 static struct ip_moptions * 150 inp_findmoptions(struct inpcb *); 151 static void inp_freemoptions_internal(struct ip_moptions *); 152 static void inp_gcmoptions(void *, int); 153 static int inp_get_source_filters(struct inpcb *, struct sockopt *); 154 static int inp_join_group(struct inpcb *, struct sockopt *); 155 static int inp_leave_group(struct inpcb *, struct sockopt *); 156 static struct ifnet * 157 inp_lookup_mcast_ifp(const struct inpcb *, 158 const struct sockaddr_in *, const struct in_addr); 159 static int inp_block_unblock_source(struct inpcb *, struct sockopt *); 160 static int inp_set_multicast_if(struct inpcb *, struct sockopt *); 161 static int inp_set_source_filters(struct inpcb *, struct sockopt *); 162 static int sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS); 163 164 static SYSCTL_NODE(_net_inet_ip, OID_AUTO, mcast, CTLFLAG_RW, 0, 165 "IPv4 multicast"); 166 167 static u_long in_mcast_maxgrpsrc = IP_MAX_GROUP_SRC_FILTER; 168 SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxgrpsrc, 169 CTLFLAG_RWTUN, &in_mcast_maxgrpsrc, 0, 170 "Max source filters per group"); 171 172 static u_long in_mcast_maxsocksrc = IP_MAX_SOCK_SRC_FILTER; 173 SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxsocksrc, 174 CTLFLAG_RWTUN, &in_mcast_maxsocksrc, 0, 175 "Max source filters per socket"); 176 177 int in_mcast_loop = IP_DEFAULT_MULTICAST_LOOP; 178 SYSCTL_INT(_net_inet_ip_mcast, OID_AUTO, loop, CTLFLAG_RWTUN, 179 &in_mcast_loop, 0, "Loopback multicast datagrams by default"); 180 181 static SYSCTL_NODE(_net_inet_ip_mcast, OID_AUTO, filters, 182 CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_ip_mcast_filters, 183 "Per-interface stack-wide source filters"); 184 185 static STAILQ_HEAD(, ip_moptions) imo_gc_list = 186 STAILQ_HEAD_INITIALIZER(imo_gc_list); 187 static struct task imo_gc_task = TASK_INITIALIZER(0, inp_gcmoptions, NULL); 188 189 #ifdef KTR 190 /* 191 * Inline function which wraps assertions for a valid ifp. 192 * The ifnet layer will set the ifma's ifp pointer to NULL if the ifp 193 * is detached. 194 */ 195 static int __inline 196 inm_is_ifp_detached(const struct in_multi *inm) 197 { 198 struct ifnet *ifp; 199 200 KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__)); 201 ifp = inm->inm_ifma->ifma_ifp; 202 if (ifp != NULL) { 203 /* 204 * Sanity check that netinet's notion of ifp is the 205 * same as net's. 206 */ 207 KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__)); 208 } 209 210 return (ifp == NULL); 211 } 212 #endif 213 214 /* 215 * Initialize an in_mfilter structure to a known state at t0, t1 216 * with an empty source filter list. 217 */ 218 static __inline void 219 imf_init(struct in_mfilter *imf, const int st0, const int st1) 220 { 221 memset(imf, 0, sizeof(struct in_mfilter)); 222 RB_INIT(&imf->imf_sources); 223 imf->imf_st[0] = st0; 224 imf->imf_st[1] = st1; 225 } 226 227 /* 228 * Function for looking up an in_multi record for an IPv4 multicast address 229 * on a given interface. ifp must be valid. If no record found, return NULL. 230 * The IN_MULTI_LOCK and IF_ADDR_LOCK on ifp must be held. 231 */ 232 struct in_multi * 233 inm_lookup_locked(struct ifnet *ifp, const struct in_addr ina) 234 { 235 struct ifmultiaddr *ifma; 236 struct in_multi *inm; 237 238 IN_MULTI_LOCK_ASSERT(); 239 IF_ADDR_LOCK_ASSERT(ifp); 240 241 inm = NULL; 242 TAILQ_FOREACH(ifma, &((ifp)->if_multiaddrs), ifma_link) { 243 if (ifma->ifma_addr->sa_family == AF_INET) { 244 inm = (struct in_multi *)ifma->ifma_protospec; 245 if (inm->inm_addr.s_addr == ina.s_addr) 246 break; 247 inm = NULL; 248 } 249 } 250 return (inm); 251 } 252 253 /* 254 * Wrapper for inm_lookup_locked(). 255 * The IF_ADDR_LOCK will be taken on ifp and released on return. 256 */ 257 struct in_multi * 258 inm_lookup(struct ifnet *ifp, const struct in_addr ina) 259 { 260 struct in_multi *inm; 261 262 IN_MULTI_LOCK_ASSERT(); 263 IF_ADDR_RLOCK(ifp); 264 inm = inm_lookup_locked(ifp, ina); 265 IF_ADDR_RUNLOCK(ifp); 266 267 return (inm); 268 } 269 270 /* 271 * Resize the ip_moptions vector to the next power-of-two minus 1. 272 * May be called with locks held; do not sleep. 273 */ 274 static int 275 imo_grow(struct ip_moptions *imo) 276 { 277 struct in_multi **nmships; 278 struct in_multi **omships; 279 struct in_mfilter *nmfilters; 280 struct in_mfilter *omfilters; 281 size_t idx; 282 size_t newmax; 283 size_t oldmax; 284 285 nmships = NULL; 286 nmfilters = NULL; 287 omships = imo->imo_membership; 288 omfilters = imo->imo_mfilters; 289 oldmax = imo->imo_max_memberships; 290 newmax = ((oldmax + 1) * 2) - 1; 291 292 if (newmax <= IP_MAX_MEMBERSHIPS) { 293 nmships = (struct in_multi **)realloc(omships, 294 sizeof(struct in_multi *) * newmax, M_IPMOPTS, M_NOWAIT); 295 nmfilters = (struct in_mfilter *)realloc(omfilters, 296 sizeof(struct in_mfilter) * newmax, M_INMFILTER, M_NOWAIT); 297 if (nmships != NULL && nmfilters != NULL) { 298 /* Initialize newly allocated source filter heads. */ 299 for (idx = oldmax; idx < newmax; idx++) { 300 imf_init(&nmfilters[idx], MCAST_UNDEFINED, 301 MCAST_EXCLUDE); 302 } 303 imo->imo_max_memberships = newmax; 304 imo->imo_membership = nmships; 305 imo->imo_mfilters = nmfilters; 306 } 307 } 308 309 if (nmships == NULL || nmfilters == NULL) { 310 if (nmships != NULL) 311 free(nmships, M_IPMOPTS); 312 if (nmfilters != NULL) 313 free(nmfilters, M_INMFILTER); 314 return (ETOOMANYREFS); 315 } 316 317 return (0); 318 } 319 320 /* 321 * Find an IPv4 multicast group entry for this ip_moptions instance 322 * which matches the specified group, and optionally an interface. 323 * Return its index into the array, or -1 if not found. 324 */ 325 static size_t 326 imo_match_group(const struct ip_moptions *imo, const struct ifnet *ifp, 327 const struct sockaddr *group) 328 { 329 const struct sockaddr_in *gsin; 330 struct in_multi **pinm; 331 int idx; 332 int nmships; 333 334 gsin = (const struct sockaddr_in *)group; 335 336 /* The imo_membership array may be lazy allocated. */ 337 if (imo->imo_membership == NULL || imo->imo_num_memberships == 0) 338 return (-1); 339 340 nmships = imo->imo_num_memberships; 341 pinm = &imo->imo_membership[0]; 342 for (idx = 0; idx < nmships; idx++, pinm++) { 343 if (*pinm == NULL) 344 continue; 345 if ((ifp == NULL || ((*pinm)->inm_ifp == ifp)) && 346 in_hosteq((*pinm)->inm_addr, gsin->sin_addr)) { 347 break; 348 } 349 } 350 if (idx >= nmships) 351 idx = -1; 352 353 return (idx); 354 } 355 356 /* 357 * Find an IPv4 multicast source entry for this imo which matches 358 * the given group index for this socket, and source address. 359 * 360 * NOTE: This does not check if the entry is in-mode, merely if 361 * it exists, which may not be the desired behaviour. 362 */ 363 static struct in_msource * 364 imo_match_source(const struct ip_moptions *imo, const size_t gidx, 365 const struct sockaddr *src) 366 { 367 struct ip_msource find; 368 struct in_mfilter *imf; 369 struct ip_msource *ims; 370 const sockunion_t *psa; 371 372 KASSERT(src->sa_family == AF_INET, ("%s: !AF_INET", __func__)); 373 KASSERT(gidx != -1 && gidx < imo->imo_num_memberships, 374 ("%s: invalid index %d\n", __func__, (int)gidx)); 375 376 /* The imo_mfilters array may be lazy allocated. */ 377 if (imo->imo_mfilters == NULL) 378 return (NULL); 379 imf = &imo->imo_mfilters[gidx]; 380 381 /* Source trees are keyed in host byte order. */ 382 psa = (const sockunion_t *)src; 383 find.ims_haddr = ntohl(psa->sin.sin_addr.s_addr); 384 ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find); 385 386 return ((struct in_msource *)ims); 387 } 388 389 /* 390 * Perform filtering for multicast datagrams on a socket by group and source. 391 * 392 * Returns 0 if a datagram should be allowed through, or various error codes 393 * if the socket was not a member of the group, or the source was muted, etc. 394 */ 395 int 396 imo_multi_filter(const struct ip_moptions *imo, const struct ifnet *ifp, 397 const struct sockaddr *group, const struct sockaddr *src) 398 { 399 size_t gidx; 400 struct in_msource *ims; 401 int mode; 402 403 KASSERT(ifp != NULL, ("%s: null ifp", __func__)); 404 405 gidx = imo_match_group(imo, ifp, group); 406 if (gidx == -1) 407 return (MCAST_NOTGMEMBER); 408 409 /* 410 * Check if the source was included in an (S,G) join. 411 * Allow reception on exclusive memberships by default, 412 * reject reception on inclusive memberships by default. 413 * Exclude source only if an in-mode exclude filter exists. 414 * Include source only if an in-mode include filter exists. 415 * NOTE: We are comparing group state here at IGMP t1 (now) 416 * with socket-layer t0 (since last downcall). 417 */ 418 mode = imo->imo_mfilters[gidx].imf_st[1]; 419 ims = imo_match_source(imo, gidx, src); 420 421 if ((ims == NULL && mode == MCAST_INCLUDE) || 422 (ims != NULL && ims->imsl_st[0] != mode)) 423 return (MCAST_NOTSMEMBER); 424 425 return (MCAST_PASS); 426 } 427 428 /* 429 * Find and return a reference to an in_multi record for (ifp, group), 430 * and bump its reference count. 431 * If one does not exist, try to allocate it, and update link-layer multicast 432 * filters on ifp to listen for group. 433 * Assumes the IN_MULTI lock is held across the call. 434 * Return 0 if successful, otherwise return an appropriate error code. 435 */ 436 static int 437 in_getmulti(struct ifnet *ifp, const struct in_addr *group, 438 struct in_multi **pinm) 439 { 440 struct sockaddr_in gsin; 441 struct ifmultiaddr *ifma; 442 struct in_ifinfo *ii; 443 struct in_multi *inm; 444 int error; 445 446 IN_MULTI_LOCK_ASSERT(); 447 448 ii = (struct in_ifinfo *)ifp->if_afdata[AF_INET]; 449 450 inm = inm_lookup(ifp, *group); 451 if (inm != NULL) { 452 /* 453 * If we already joined this group, just bump the 454 * refcount and return it. 455 */ 456 KASSERT(inm->inm_refcount >= 1, 457 ("%s: bad refcount %d", __func__, inm->inm_refcount)); 458 ++inm->inm_refcount; 459 *pinm = inm; 460 return (0); 461 } 462 463 memset(&gsin, 0, sizeof(gsin)); 464 gsin.sin_family = AF_INET; 465 gsin.sin_len = sizeof(struct sockaddr_in); 466 gsin.sin_addr = *group; 467 468 /* 469 * Check if a link-layer group is already associated 470 * with this network-layer group on the given ifnet. 471 */ 472 error = if_addmulti(ifp, (struct sockaddr *)&gsin, &ifma); 473 if (error != 0) 474 return (error); 475 476 /* XXX ifma_protospec must be covered by IF_ADDR_LOCK */ 477 IF_ADDR_WLOCK(ifp); 478 479 /* 480 * If something other than netinet is occupying the link-layer 481 * group, print a meaningful error message and back out of 482 * the allocation. 483 * Otherwise, bump the refcount on the existing network-layer 484 * group association and return it. 485 */ 486 if (ifma->ifma_protospec != NULL) { 487 inm = (struct in_multi *)ifma->ifma_protospec; 488 #ifdef INVARIANTS 489 KASSERT(ifma->ifma_addr != NULL, ("%s: no ifma_addr", 490 __func__)); 491 KASSERT(ifma->ifma_addr->sa_family == AF_INET, 492 ("%s: ifma not AF_INET", __func__)); 493 KASSERT(inm != NULL, ("%s: no ifma_protospec", __func__)); 494 if (inm->inm_ifma != ifma || inm->inm_ifp != ifp || 495 !in_hosteq(inm->inm_addr, *group)) 496 panic("%s: ifma %p is inconsistent with %p (%s)", 497 __func__, ifma, inm, inet_ntoa(*group)); 498 #endif 499 ++inm->inm_refcount; 500 *pinm = inm; 501 IF_ADDR_WUNLOCK(ifp); 502 return (0); 503 } 504 505 IF_ADDR_WLOCK_ASSERT(ifp); 506 507 /* 508 * A new in_multi record is needed; allocate and initialize it. 509 * We DO NOT perform an IGMP join as the in_ layer may need to 510 * push an initial source list down to IGMP to support SSM. 511 * 512 * The initial source filter state is INCLUDE, {} as per the RFC. 513 */ 514 inm = malloc(sizeof(*inm), M_IPMADDR, M_NOWAIT | M_ZERO); 515 if (inm == NULL) { 516 if_delmulti_ifma(ifma); 517 IF_ADDR_WUNLOCK(ifp); 518 return (ENOMEM); 519 } 520 inm->inm_addr = *group; 521 inm->inm_ifp = ifp; 522 inm->inm_igi = ii->ii_igmp; 523 inm->inm_ifma = ifma; 524 inm->inm_refcount = 1; 525 inm->inm_state = IGMP_NOT_MEMBER; 526 mbufq_init(&inm->inm_scq, IGMP_MAX_STATE_CHANGES); 527 inm->inm_st[0].iss_fmode = MCAST_UNDEFINED; 528 inm->inm_st[1].iss_fmode = MCAST_UNDEFINED; 529 RB_INIT(&inm->inm_srcs); 530 531 ifma->ifma_protospec = inm; 532 533 *pinm = inm; 534 535 IF_ADDR_WUNLOCK(ifp); 536 return (0); 537 } 538 539 /* 540 * Drop a reference to an in_multi record. 541 * 542 * If the refcount drops to 0, free the in_multi record and 543 * delete the underlying link-layer membership. 544 */ 545 void 546 inm_release_locked(struct in_multi *inm) 547 { 548 struct ifmultiaddr *ifma; 549 550 IN_MULTI_LOCK_ASSERT(); 551 552 CTR2(KTR_IGMPV3, "%s: refcount is %d", __func__, inm->inm_refcount); 553 554 if (--inm->inm_refcount > 0) { 555 CTR2(KTR_IGMPV3, "%s: refcount is now %d", __func__, 556 inm->inm_refcount); 557 return; 558 } 559 560 CTR2(KTR_IGMPV3, "%s: freeing inm %p", __func__, inm); 561 562 ifma = inm->inm_ifma; 563 564 /* XXX this access is not covered by IF_ADDR_LOCK */ 565 CTR2(KTR_IGMPV3, "%s: purging ifma %p", __func__, ifma); 566 KASSERT(ifma->ifma_protospec == inm, 567 ("%s: ifma_protospec != inm", __func__)); 568 ifma->ifma_protospec = NULL; 569 570 inm_purge(inm); 571 572 free(inm, M_IPMADDR); 573 574 if_delmulti_ifma(ifma); 575 } 576 577 /* 578 * Clear recorded source entries for a group. 579 * Used by the IGMP code. Caller must hold the IN_MULTI lock. 580 * FIXME: Should reap. 581 */ 582 void 583 inm_clear_recorded(struct in_multi *inm) 584 { 585 struct ip_msource *ims; 586 587 IN_MULTI_LOCK_ASSERT(); 588 589 RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) { 590 if (ims->ims_stp) { 591 ims->ims_stp = 0; 592 --inm->inm_st[1].iss_rec; 593 } 594 } 595 KASSERT(inm->inm_st[1].iss_rec == 0, 596 ("%s: iss_rec %d not 0", __func__, inm->inm_st[1].iss_rec)); 597 } 598 599 /* 600 * Record a source as pending for a Source-Group IGMPv3 query. 601 * This lives here as it modifies the shared tree. 602 * 603 * inm is the group descriptor. 604 * naddr is the address of the source to record in network-byte order. 605 * 606 * If the net.inet.igmp.sgalloc sysctl is non-zero, we will 607 * lazy-allocate a source node in response to an SG query. 608 * Otherwise, no allocation is performed. This saves some memory 609 * with the trade-off that the source will not be reported to the 610 * router if joined in the window between the query response and 611 * the group actually being joined on the local host. 612 * 613 * VIMAGE: XXX: Currently the igmp_sgalloc feature has been removed. 614 * This turns off the allocation of a recorded source entry if 615 * the group has not been joined. 616 * 617 * Return 0 if the source didn't exist or was already marked as recorded. 618 * Return 1 if the source was marked as recorded by this function. 619 * Return <0 if any error occured (negated errno code). 620 */ 621 int 622 inm_record_source(struct in_multi *inm, const in_addr_t naddr) 623 { 624 struct ip_msource find; 625 struct ip_msource *ims, *nims; 626 627 IN_MULTI_LOCK_ASSERT(); 628 629 find.ims_haddr = ntohl(naddr); 630 ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find); 631 if (ims && ims->ims_stp) 632 return (0); 633 if (ims == NULL) { 634 if (inm->inm_nsrc == in_mcast_maxgrpsrc) 635 return (-ENOSPC); 636 nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE, 637 M_NOWAIT | M_ZERO); 638 if (nims == NULL) 639 return (-ENOMEM); 640 nims->ims_haddr = find.ims_haddr; 641 RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims); 642 ++inm->inm_nsrc; 643 ims = nims; 644 } 645 646 /* 647 * Mark the source as recorded and update the recorded 648 * source count. 649 */ 650 ++ims->ims_stp; 651 ++inm->inm_st[1].iss_rec; 652 653 return (1); 654 } 655 656 /* 657 * Return a pointer to an in_msource owned by an in_mfilter, 658 * given its source address. 659 * Lazy-allocate if needed. If this is a new entry its filter state is 660 * undefined at t0. 661 * 662 * imf is the filter set being modified. 663 * haddr is the source address in *host* byte-order. 664 * 665 * SMPng: May be called with locks held; malloc must not block. 666 */ 667 static int 668 imf_get_source(struct in_mfilter *imf, const struct sockaddr_in *psin, 669 struct in_msource **plims) 670 { 671 struct ip_msource find; 672 struct ip_msource *ims, *nims; 673 struct in_msource *lims; 674 int error; 675 676 error = 0; 677 ims = NULL; 678 lims = NULL; 679 680 /* key is host byte order */ 681 find.ims_haddr = ntohl(psin->sin_addr.s_addr); 682 ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find); 683 lims = (struct in_msource *)ims; 684 if (lims == NULL) { 685 if (imf->imf_nsrc == in_mcast_maxsocksrc) 686 return (ENOSPC); 687 nims = malloc(sizeof(struct in_msource), M_INMFILTER, 688 M_NOWAIT | M_ZERO); 689 if (nims == NULL) 690 return (ENOMEM); 691 lims = (struct in_msource *)nims; 692 lims->ims_haddr = find.ims_haddr; 693 lims->imsl_st[0] = MCAST_UNDEFINED; 694 RB_INSERT(ip_msource_tree, &imf->imf_sources, nims); 695 ++imf->imf_nsrc; 696 } 697 698 *plims = lims; 699 700 return (error); 701 } 702 703 /* 704 * Graft a source entry into an existing socket-layer filter set, 705 * maintaining any required invariants and checking allocations. 706 * 707 * The source is marked as being in the new filter mode at t1. 708 * 709 * Return the pointer to the new node, otherwise return NULL. 710 */ 711 static struct in_msource * 712 imf_graft(struct in_mfilter *imf, const uint8_t st1, 713 const struct sockaddr_in *psin) 714 { 715 struct ip_msource *nims; 716 struct in_msource *lims; 717 718 nims = malloc(sizeof(struct in_msource), M_INMFILTER, 719 M_NOWAIT | M_ZERO); 720 if (nims == NULL) 721 return (NULL); 722 lims = (struct in_msource *)nims; 723 lims->ims_haddr = ntohl(psin->sin_addr.s_addr); 724 lims->imsl_st[0] = MCAST_UNDEFINED; 725 lims->imsl_st[1] = st1; 726 RB_INSERT(ip_msource_tree, &imf->imf_sources, nims); 727 ++imf->imf_nsrc; 728 729 return (lims); 730 } 731 732 /* 733 * Prune a source entry from an existing socket-layer filter set, 734 * maintaining any required invariants and checking allocations. 735 * 736 * The source is marked as being left at t1, it is not freed. 737 * 738 * Return 0 if no error occurred, otherwise return an errno value. 739 */ 740 static int 741 imf_prune(struct in_mfilter *imf, const struct sockaddr_in *psin) 742 { 743 struct ip_msource find; 744 struct ip_msource *ims; 745 struct in_msource *lims; 746 747 /* key is host byte order */ 748 find.ims_haddr = ntohl(psin->sin_addr.s_addr); 749 ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find); 750 if (ims == NULL) 751 return (ENOENT); 752 lims = (struct in_msource *)ims; 753 lims->imsl_st[1] = MCAST_UNDEFINED; 754 return (0); 755 } 756 757 /* 758 * Revert socket-layer filter set deltas at t1 to t0 state. 759 */ 760 static void 761 imf_rollback(struct in_mfilter *imf) 762 { 763 struct ip_msource *ims, *tims; 764 struct in_msource *lims; 765 766 RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) { 767 lims = (struct in_msource *)ims; 768 if (lims->imsl_st[0] == lims->imsl_st[1]) { 769 /* no change at t1 */ 770 continue; 771 } else if (lims->imsl_st[0] != MCAST_UNDEFINED) { 772 /* revert change to existing source at t1 */ 773 lims->imsl_st[1] = lims->imsl_st[0]; 774 } else { 775 /* revert source added t1 */ 776 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 777 RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims); 778 free(ims, M_INMFILTER); 779 imf->imf_nsrc--; 780 } 781 } 782 imf->imf_st[1] = imf->imf_st[0]; 783 } 784 785 /* 786 * Mark socket-layer filter set as INCLUDE {} at t1. 787 */ 788 static void 789 imf_leave(struct in_mfilter *imf) 790 { 791 struct ip_msource *ims; 792 struct in_msource *lims; 793 794 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 795 lims = (struct in_msource *)ims; 796 lims->imsl_st[1] = MCAST_UNDEFINED; 797 } 798 imf->imf_st[1] = MCAST_INCLUDE; 799 } 800 801 /* 802 * Mark socket-layer filter set deltas as committed. 803 */ 804 static void 805 imf_commit(struct in_mfilter *imf) 806 { 807 struct ip_msource *ims; 808 struct in_msource *lims; 809 810 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 811 lims = (struct in_msource *)ims; 812 lims->imsl_st[0] = lims->imsl_st[1]; 813 } 814 imf->imf_st[0] = imf->imf_st[1]; 815 } 816 817 /* 818 * Reap unreferenced sources from socket-layer filter set. 819 */ 820 static void 821 imf_reap(struct in_mfilter *imf) 822 { 823 struct ip_msource *ims, *tims; 824 struct in_msource *lims; 825 826 RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) { 827 lims = (struct in_msource *)ims; 828 if ((lims->imsl_st[0] == MCAST_UNDEFINED) && 829 (lims->imsl_st[1] == MCAST_UNDEFINED)) { 830 CTR2(KTR_IGMPV3, "%s: free lims %p", __func__, ims); 831 RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims); 832 free(ims, M_INMFILTER); 833 imf->imf_nsrc--; 834 } 835 } 836 } 837 838 /* 839 * Purge socket-layer filter set. 840 */ 841 static void 842 imf_purge(struct in_mfilter *imf) 843 { 844 struct ip_msource *ims, *tims; 845 846 RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) { 847 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 848 RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims); 849 free(ims, M_INMFILTER); 850 imf->imf_nsrc--; 851 } 852 imf->imf_st[0] = imf->imf_st[1] = MCAST_UNDEFINED; 853 KASSERT(RB_EMPTY(&imf->imf_sources), 854 ("%s: imf_sources not empty", __func__)); 855 } 856 857 /* 858 * Look up a source filter entry for a multicast group. 859 * 860 * inm is the group descriptor to work with. 861 * haddr is the host-byte-order IPv4 address to look up. 862 * noalloc may be non-zero to suppress allocation of sources. 863 * *pims will be set to the address of the retrieved or allocated source. 864 * 865 * SMPng: NOTE: may be called with locks held. 866 * Return 0 if successful, otherwise return a non-zero error code. 867 */ 868 static int 869 inm_get_source(struct in_multi *inm, const in_addr_t haddr, 870 const int noalloc, struct ip_msource **pims) 871 { 872 struct ip_msource find; 873 struct ip_msource *ims, *nims; 874 #ifdef KTR 875 struct in_addr ia; 876 #endif 877 878 find.ims_haddr = haddr; 879 ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find); 880 if (ims == NULL && !noalloc) { 881 if (inm->inm_nsrc == in_mcast_maxgrpsrc) 882 return (ENOSPC); 883 nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE, 884 M_NOWAIT | M_ZERO); 885 if (nims == NULL) 886 return (ENOMEM); 887 nims->ims_haddr = haddr; 888 RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims); 889 ++inm->inm_nsrc; 890 ims = nims; 891 #ifdef KTR 892 ia.s_addr = htonl(haddr); 893 CTR3(KTR_IGMPV3, "%s: allocated %s as %p", __func__, 894 inet_ntoa(ia), ims); 895 #endif 896 } 897 898 *pims = ims; 899 return (0); 900 } 901 902 /* 903 * Merge socket-layer source into IGMP-layer source. 904 * If rollback is non-zero, perform the inverse of the merge. 905 */ 906 static void 907 ims_merge(struct ip_msource *ims, const struct in_msource *lims, 908 const int rollback) 909 { 910 int n = rollback ? -1 : 1; 911 #ifdef KTR 912 struct in_addr ia; 913 914 ia.s_addr = htonl(ims->ims_haddr); 915 #endif 916 917 if (lims->imsl_st[0] == MCAST_EXCLUDE) { 918 CTR3(KTR_IGMPV3, "%s: t1 ex -= %d on %s", 919 __func__, n, inet_ntoa(ia)); 920 ims->ims_st[1].ex -= n; 921 } else if (lims->imsl_st[0] == MCAST_INCLUDE) { 922 CTR3(KTR_IGMPV3, "%s: t1 in -= %d on %s", 923 __func__, n, inet_ntoa(ia)); 924 ims->ims_st[1].in -= n; 925 } 926 927 if (lims->imsl_st[1] == MCAST_EXCLUDE) { 928 CTR3(KTR_IGMPV3, "%s: t1 ex += %d on %s", 929 __func__, n, inet_ntoa(ia)); 930 ims->ims_st[1].ex += n; 931 } else if (lims->imsl_st[1] == MCAST_INCLUDE) { 932 CTR3(KTR_IGMPV3, "%s: t1 in += %d on %s", 933 __func__, n, inet_ntoa(ia)); 934 ims->ims_st[1].in += n; 935 } 936 } 937 938 /* 939 * Atomically update the global in_multi state, when a membership's 940 * filter list is being updated in any way. 941 * 942 * imf is the per-inpcb-membership group filter pointer. 943 * A fake imf may be passed for in-kernel consumers. 944 * 945 * XXX This is a candidate for a set-symmetric-difference style loop 946 * which would eliminate the repeated lookup from root of ims nodes, 947 * as they share the same key space. 948 * 949 * If any error occurred this function will back out of refcounts 950 * and return a non-zero value. 951 */ 952 static int 953 inm_merge(struct in_multi *inm, /*const*/ struct in_mfilter *imf) 954 { 955 struct ip_msource *ims, *nims; 956 struct in_msource *lims; 957 int schanged, error; 958 int nsrc0, nsrc1; 959 960 schanged = 0; 961 error = 0; 962 nsrc1 = nsrc0 = 0; 963 964 /* 965 * Update the source filters first, as this may fail. 966 * Maintain count of in-mode filters at t0, t1. These are 967 * used to work out if we transition into ASM mode or not. 968 * Maintain a count of source filters whose state was 969 * actually modified by this operation. 970 */ 971 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 972 lims = (struct in_msource *)ims; 973 if (lims->imsl_st[0] == imf->imf_st[0]) nsrc0++; 974 if (lims->imsl_st[1] == imf->imf_st[1]) nsrc1++; 975 if (lims->imsl_st[0] == lims->imsl_st[1]) continue; 976 error = inm_get_source(inm, lims->ims_haddr, 0, &nims); 977 ++schanged; 978 if (error) 979 break; 980 ims_merge(nims, lims, 0); 981 } 982 if (error) { 983 struct ip_msource *bims; 984 985 RB_FOREACH_REVERSE_FROM(ims, ip_msource_tree, nims) { 986 lims = (struct in_msource *)ims; 987 if (lims->imsl_st[0] == lims->imsl_st[1]) 988 continue; 989 (void)inm_get_source(inm, lims->ims_haddr, 1, &bims); 990 if (bims == NULL) 991 continue; 992 ims_merge(bims, lims, 1); 993 } 994 goto out_reap; 995 } 996 997 CTR3(KTR_IGMPV3, "%s: imf filters in-mode: %d at t0, %d at t1", 998 __func__, nsrc0, nsrc1); 999 1000 /* Handle transition between INCLUDE {n} and INCLUDE {} on socket. */ 1001 if (imf->imf_st[0] == imf->imf_st[1] && 1002 imf->imf_st[1] == MCAST_INCLUDE) { 1003 if (nsrc1 == 0) { 1004 CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__); 1005 --inm->inm_st[1].iss_in; 1006 } 1007 } 1008 1009 /* Handle filter mode transition on socket. */ 1010 if (imf->imf_st[0] != imf->imf_st[1]) { 1011 CTR3(KTR_IGMPV3, "%s: imf transition %d to %d", 1012 __func__, imf->imf_st[0], imf->imf_st[1]); 1013 1014 if (imf->imf_st[0] == MCAST_EXCLUDE) { 1015 CTR1(KTR_IGMPV3, "%s: --ex on inm at t1", __func__); 1016 --inm->inm_st[1].iss_ex; 1017 } else if (imf->imf_st[0] == MCAST_INCLUDE) { 1018 CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__); 1019 --inm->inm_st[1].iss_in; 1020 } 1021 1022 if (imf->imf_st[1] == MCAST_EXCLUDE) { 1023 CTR1(KTR_IGMPV3, "%s: ex++ on inm at t1", __func__); 1024 inm->inm_st[1].iss_ex++; 1025 } else if (imf->imf_st[1] == MCAST_INCLUDE && nsrc1 > 0) { 1026 CTR1(KTR_IGMPV3, "%s: in++ on inm at t1", __func__); 1027 inm->inm_st[1].iss_in++; 1028 } 1029 } 1030 1031 /* 1032 * Track inm filter state in terms of listener counts. 1033 * If there are any exclusive listeners, stack-wide 1034 * membership is exclusive. 1035 * Otherwise, if only inclusive listeners, stack-wide is inclusive. 1036 * If no listeners remain, state is undefined at t1, 1037 * and the IGMP lifecycle for this group should finish. 1038 */ 1039 if (inm->inm_st[1].iss_ex > 0) { 1040 CTR1(KTR_IGMPV3, "%s: transition to EX", __func__); 1041 inm->inm_st[1].iss_fmode = MCAST_EXCLUDE; 1042 } else if (inm->inm_st[1].iss_in > 0) { 1043 CTR1(KTR_IGMPV3, "%s: transition to IN", __func__); 1044 inm->inm_st[1].iss_fmode = MCAST_INCLUDE; 1045 } else { 1046 CTR1(KTR_IGMPV3, "%s: transition to UNDEF", __func__); 1047 inm->inm_st[1].iss_fmode = MCAST_UNDEFINED; 1048 } 1049 1050 /* Decrement ASM listener count on transition out of ASM mode. */ 1051 if (imf->imf_st[0] == MCAST_EXCLUDE && nsrc0 == 0) { 1052 if ((imf->imf_st[1] != MCAST_EXCLUDE) || 1053 (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 > 0)) 1054 CTR1(KTR_IGMPV3, "%s: --asm on inm at t1", __func__); 1055 --inm->inm_st[1].iss_asm; 1056 } 1057 1058 /* Increment ASM listener count on transition to ASM mode. */ 1059 if (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 == 0) { 1060 CTR1(KTR_IGMPV3, "%s: asm++ on inm at t1", __func__); 1061 inm->inm_st[1].iss_asm++; 1062 } 1063 1064 CTR3(KTR_IGMPV3, "%s: merged imf %p to inm %p", __func__, imf, inm); 1065 inm_print(inm); 1066 1067 out_reap: 1068 if (schanged > 0) { 1069 CTR1(KTR_IGMPV3, "%s: sources changed; reaping", __func__); 1070 inm_reap(inm); 1071 } 1072 return (error); 1073 } 1074 1075 /* 1076 * Mark an in_multi's filter set deltas as committed. 1077 * Called by IGMP after a state change has been enqueued. 1078 */ 1079 void 1080 inm_commit(struct in_multi *inm) 1081 { 1082 struct ip_msource *ims; 1083 1084 CTR2(KTR_IGMPV3, "%s: commit inm %p", __func__, inm); 1085 CTR1(KTR_IGMPV3, "%s: pre commit:", __func__); 1086 inm_print(inm); 1087 1088 RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) { 1089 ims->ims_st[0] = ims->ims_st[1]; 1090 } 1091 inm->inm_st[0] = inm->inm_st[1]; 1092 } 1093 1094 /* 1095 * Reap unreferenced nodes from an in_multi's filter set. 1096 */ 1097 static void 1098 inm_reap(struct in_multi *inm) 1099 { 1100 struct ip_msource *ims, *tims; 1101 1102 RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) { 1103 if (ims->ims_st[0].ex > 0 || ims->ims_st[0].in > 0 || 1104 ims->ims_st[1].ex > 0 || ims->ims_st[1].in > 0 || 1105 ims->ims_stp != 0) 1106 continue; 1107 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 1108 RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims); 1109 free(ims, M_IPMSOURCE); 1110 inm->inm_nsrc--; 1111 } 1112 } 1113 1114 /* 1115 * Purge all source nodes from an in_multi's filter set. 1116 */ 1117 static void 1118 inm_purge(struct in_multi *inm) 1119 { 1120 struct ip_msource *ims, *tims; 1121 1122 RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) { 1123 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 1124 RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims); 1125 free(ims, M_IPMSOURCE); 1126 inm->inm_nsrc--; 1127 } 1128 } 1129 1130 /* 1131 * Join a multicast group; unlocked entry point. 1132 * 1133 * SMPng: XXX: in_joingroup() is called from in_control() when Giant 1134 * is not held. Fortunately, ifp is unlikely to have been detached 1135 * at this point, so we assume it's OK to recurse. 1136 */ 1137 int 1138 in_joingroup(struct ifnet *ifp, const struct in_addr *gina, 1139 /*const*/ struct in_mfilter *imf, struct in_multi **pinm) 1140 { 1141 int error; 1142 1143 IN_MULTI_LOCK(); 1144 error = in_joingroup_locked(ifp, gina, imf, pinm); 1145 IN_MULTI_UNLOCK(); 1146 1147 return (error); 1148 } 1149 1150 /* 1151 * Join a multicast group; real entry point. 1152 * 1153 * Only preserves atomicity at inm level. 1154 * NOTE: imf argument cannot be const due to sys/tree.h limitations. 1155 * 1156 * If the IGMP downcall fails, the group is not joined, and an error 1157 * code is returned. 1158 */ 1159 int 1160 in_joingroup_locked(struct ifnet *ifp, const struct in_addr *gina, 1161 /*const*/ struct in_mfilter *imf, struct in_multi **pinm) 1162 { 1163 struct in_mfilter timf; 1164 struct in_multi *inm; 1165 int error; 1166 1167 IN_MULTI_LOCK_ASSERT(); 1168 1169 CTR4(KTR_IGMPV3, "%s: join %s on %p(%s))", __func__, 1170 inet_ntoa(*gina), ifp, ifp->if_xname); 1171 1172 error = 0; 1173 inm = NULL; 1174 1175 /* 1176 * If no imf was specified (i.e. kernel consumer), 1177 * fake one up and assume it is an ASM join. 1178 */ 1179 if (imf == NULL) { 1180 imf_init(&timf, MCAST_UNDEFINED, MCAST_EXCLUDE); 1181 imf = &timf; 1182 } 1183 1184 error = in_getmulti(ifp, gina, &inm); 1185 if (error) { 1186 CTR1(KTR_IGMPV3, "%s: in_getmulti() failure", __func__); 1187 return (error); 1188 } 1189 1190 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 1191 error = inm_merge(inm, imf); 1192 if (error) { 1193 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__); 1194 goto out_inm_release; 1195 } 1196 1197 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 1198 error = igmp_change_state(inm); 1199 if (error) { 1200 CTR1(KTR_IGMPV3, "%s: failed to update source", __func__); 1201 goto out_inm_release; 1202 } 1203 1204 out_inm_release: 1205 if (error) { 1206 CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm); 1207 inm_release_locked(inm); 1208 } else { 1209 *pinm = inm; 1210 } 1211 1212 return (error); 1213 } 1214 1215 /* 1216 * Leave a multicast group; unlocked entry point. 1217 */ 1218 int 1219 in_leavegroup(struct in_multi *inm, /*const*/ struct in_mfilter *imf) 1220 { 1221 int error; 1222 1223 IN_MULTI_LOCK(); 1224 error = in_leavegroup_locked(inm, imf); 1225 IN_MULTI_UNLOCK(); 1226 1227 return (error); 1228 } 1229 1230 /* 1231 * Leave a multicast group; real entry point. 1232 * All source filters will be expunged. 1233 * 1234 * Only preserves atomicity at inm level. 1235 * 1236 * Holding the write lock for the INP which contains imf 1237 * is highly advisable. We can't assert for it as imf does not 1238 * contain a back-pointer to the owning inp. 1239 * 1240 * Note: This is not the same as inm_release(*) as this function also 1241 * makes a state change downcall into IGMP. 1242 */ 1243 int 1244 in_leavegroup_locked(struct in_multi *inm, /*const*/ struct in_mfilter *imf) 1245 { 1246 struct in_mfilter timf; 1247 int error; 1248 1249 error = 0; 1250 1251 IN_MULTI_LOCK_ASSERT(); 1252 1253 CTR5(KTR_IGMPV3, "%s: leave inm %p, %s/%s, imf %p", __func__, 1254 inm, inet_ntoa(inm->inm_addr), 1255 (inm_is_ifp_detached(inm) ? "null" : inm->inm_ifp->if_xname), 1256 imf); 1257 1258 /* 1259 * If no imf was specified (i.e. kernel consumer), 1260 * fake one up and assume it is an ASM join. 1261 */ 1262 if (imf == NULL) { 1263 imf_init(&timf, MCAST_EXCLUDE, MCAST_UNDEFINED); 1264 imf = &timf; 1265 } 1266 1267 /* 1268 * Begin state merge transaction at IGMP layer. 1269 * 1270 * As this particular invocation should not cause any memory 1271 * to be allocated, and there is no opportunity to roll back 1272 * the transaction, it MUST NOT fail. 1273 */ 1274 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 1275 error = inm_merge(inm, imf); 1276 KASSERT(error == 0, ("%s: failed to merge inm state", __func__)); 1277 1278 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 1279 CURVNET_SET(inm->inm_ifp->if_vnet); 1280 error = igmp_change_state(inm); 1281 CURVNET_RESTORE(); 1282 if (error) 1283 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__); 1284 1285 CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm); 1286 inm_release_locked(inm); 1287 1288 return (error); 1289 } 1290 1291 /*#ifndef BURN_BRIDGES*/ 1292 /* 1293 * Join an IPv4 multicast group in (*,G) exclusive mode. 1294 * The group must be a 224.0.0.0/24 link-scope group. 1295 * This KPI is for legacy kernel consumers only. 1296 */ 1297 struct in_multi * 1298 in_addmulti(struct in_addr *ap, struct ifnet *ifp) 1299 { 1300 struct in_multi *pinm; 1301 int error; 1302 1303 KASSERT(IN_LOCAL_GROUP(ntohl(ap->s_addr)), 1304 ("%s: %s not in 224.0.0.0/24", __func__, inet_ntoa(*ap))); 1305 1306 error = in_joingroup(ifp, ap, NULL, &pinm); 1307 if (error != 0) 1308 pinm = NULL; 1309 1310 return (pinm); 1311 } 1312 1313 /* 1314 * Leave an IPv4 multicast group, assumed to be in exclusive (*,G) mode. 1315 * This KPI is for legacy kernel consumers only. 1316 */ 1317 void 1318 in_delmulti(struct in_multi *inm) 1319 { 1320 1321 (void)in_leavegroup(inm, NULL); 1322 } 1323 /*#endif*/ 1324 1325 /* 1326 * Block or unblock an ASM multicast source on an inpcb. 1327 * This implements the delta-based API described in RFC 3678. 1328 * 1329 * The delta-based API applies only to exclusive-mode memberships. 1330 * An IGMP downcall will be performed. 1331 * 1332 * SMPng: NOTE: Must take Giant as a join may create a new ifma. 1333 * 1334 * Return 0 if successful, otherwise return an appropriate error code. 1335 */ 1336 static int 1337 inp_block_unblock_source(struct inpcb *inp, struct sockopt *sopt) 1338 { 1339 struct group_source_req gsr; 1340 sockunion_t *gsa, *ssa; 1341 struct ifnet *ifp; 1342 struct in_mfilter *imf; 1343 struct ip_moptions *imo; 1344 struct in_msource *ims; 1345 struct in_multi *inm; 1346 size_t idx; 1347 uint16_t fmode; 1348 int error, doblock; 1349 1350 ifp = NULL; 1351 error = 0; 1352 doblock = 0; 1353 1354 memset(&gsr, 0, sizeof(struct group_source_req)); 1355 gsa = (sockunion_t *)&gsr.gsr_group; 1356 ssa = (sockunion_t *)&gsr.gsr_source; 1357 1358 switch (sopt->sopt_name) { 1359 case IP_BLOCK_SOURCE: 1360 case IP_UNBLOCK_SOURCE: { 1361 struct ip_mreq_source mreqs; 1362 1363 error = sooptcopyin(sopt, &mreqs, 1364 sizeof(struct ip_mreq_source), 1365 sizeof(struct ip_mreq_source)); 1366 if (error) 1367 return (error); 1368 1369 gsa->sin.sin_family = AF_INET; 1370 gsa->sin.sin_len = sizeof(struct sockaddr_in); 1371 gsa->sin.sin_addr = mreqs.imr_multiaddr; 1372 1373 ssa->sin.sin_family = AF_INET; 1374 ssa->sin.sin_len = sizeof(struct sockaddr_in); 1375 ssa->sin.sin_addr = mreqs.imr_sourceaddr; 1376 1377 if (!in_nullhost(mreqs.imr_interface)) 1378 INADDR_TO_IFP(mreqs.imr_interface, ifp); 1379 1380 if (sopt->sopt_name == IP_BLOCK_SOURCE) 1381 doblock = 1; 1382 1383 CTR3(KTR_IGMPV3, "%s: imr_interface = %s, ifp = %p", 1384 __func__, inet_ntoa(mreqs.imr_interface), ifp); 1385 break; 1386 } 1387 1388 case MCAST_BLOCK_SOURCE: 1389 case MCAST_UNBLOCK_SOURCE: 1390 error = sooptcopyin(sopt, &gsr, 1391 sizeof(struct group_source_req), 1392 sizeof(struct group_source_req)); 1393 if (error) 1394 return (error); 1395 1396 if (gsa->sin.sin_family != AF_INET || 1397 gsa->sin.sin_len != sizeof(struct sockaddr_in)) 1398 return (EINVAL); 1399 1400 if (ssa->sin.sin_family != AF_INET || 1401 ssa->sin.sin_len != sizeof(struct sockaddr_in)) 1402 return (EINVAL); 1403 1404 if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface) 1405 return (EADDRNOTAVAIL); 1406 1407 ifp = ifnet_byindex(gsr.gsr_interface); 1408 1409 if (sopt->sopt_name == MCAST_BLOCK_SOURCE) 1410 doblock = 1; 1411 break; 1412 1413 default: 1414 CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d", 1415 __func__, sopt->sopt_name); 1416 return (EOPNOTSUPP); 1417 break; 1418 } 1419 1420 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 1421 return (EINVAL); 1422 1423 /* 1424 * Check if we are actually a member of this group. 1425 */ 1426 imo = inp_findmoptions(inp); 1427 idx = imo_match_group(imo, ifp, &gsa->sa); 1428 if (idx == -1 || imo->imo_mfilters == NULL) { 1429 error = EADDRNOTAVAIL; 1430 goto out_inp_locked; 1431 } 1432 1433 KASSERT(imo->imo_mfilters != NULL, 1434 ("%s: imo_mfilters not allocated", __func__)); 1435 imf = &imo->imo_mfilters[idx]; 1436 inm = imo->imo_membership[idx]; 1437 1438 /* 1439 * Attempting to use the delta-based API on an 1440 * non exclusive-mode membership is an error. 1441 */ 1442 fmode = imf->imf_st[0]; 1443 if (fmode != MCAST_EXCLUDE) { 1444 error = EINVAL; 1445 goto out_inp_locked; 1446 } 1447 1448 /* 1449 * Deal with error cases up-front: 1450 * Asked to block, but already blocked; or 1451 * Asked to unblock, but nothing to unblock. 1452 * If adding a new block entry, allocate it. 1453 */ 1454 ims = imo_match_source(imo, idx, &ssa->sa); 1455 if ((ims != NULL && doblock) || (ims == NULL && !doblock)) { 1456 CTR3(KTR_IGMPV3, "%s: source %s %spresent", __func__, 1457 inet_ntoa(ssa->sin.sin_addr), doblock ? "" : "not "); 1458 error = EADDRNOTAVAIL; 1459 goto out_inp_locked; 1460 } 1461 1462 INP_WLOCK_ASSERT(inp); 1463 1464 /* 1465 * Begin state merge transaction at socket layer. 1466 */ 1467 if (doblock) { 1468 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block"); 1469 ims = imf_graft(imf, fmode, &ssa->sin); 1470 if (ims == NULL) 1471 error = ENOMEM; 1472 } else { 1473 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow"); 1474 error = imf_prune(imf, &ssa->sin); 1475 } 1476 1477 if (error) { 1478 CTR1(KTR_IGMPV3, "%s: merge imf state failed", __func__); 1479 goto out_imf_rollback; 1480 } 1481 1482 /* 1483 * Begin state merge transaction at IGMP layer. 1484 */ 1485 IN_MULTI_LOCK(); 1486 1487 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 1488 error = inm_merge(inm, imf); 1489 if (error) { 1490 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__); 1491 goto out_in_multi_locked; 1492 } 1493 1494 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 1495 error = igmp_change_state(inm); 1496 if (error) 1497 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__); 1498 1499 out_in_multi_locked: 1500 1501 IN_MULTI_UNLOCK(); 1502 1503 out_imf_rollback: 1504 if (error) 1505 imf_rollback(imf); 1506 else 1507 imf_commit(imf); 1508 1509 imf_reap(imf); 1510 1511 out_inp_locked: 1512 INP_WUNLOCK(inp); 1513 return (error); 1514 } 1515 1516 /* 1517 * Given an inpcb, return its multicast options structure pointer. Accepts 1518 * an unlocked inpcb pointer, but will return it locked. May sleep. 1519 * 1520 * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held. 1521 * SMPng: NOTE: Returns with the INP write lock held. 1522 */ 1523 static struct ip_moptions * 1524 inp_findmoptions(struct inpcb *inp) 1525 { 1526 struct ip_moptions *imo; 1527 struct in_multi **immp; 1528 struct in_mfilter *imfp; 1529 size_t idx; 1530 1531 INP_WLOCK(inp); 1532 if (inp->inp_moptions != NULL) 1533 return (inp->inp_moptions); 1534 1535 INP_WUNLOCK(inp); 1536 1537 imo = malloc(sizeof(*imo), M_IPMOPTS, M_WAITOK); 1538 immp = malloc(sizeof(*immp) * IP_MIN_MEMBERSHIPS, M_IPMOPTS, 1539 M_WAITOK | M_ZERO); 1540 imfp = malloc(sizeof(struct in_mfilter) * IP_MIN_MEMBERSHIPS, 1541 M_INMFILTER, M_WAITOK); 1542 1543 imo->imo_multicast_ifp = NULL; 1544 imo->imo_multicast_addr.s_addr = INADDR_ANY; 1545 imo->imo_multicast_vif = -1; 1546 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1547 imo->imo_multicast_loop = in_mcast_loop; 1548 imo->imo_num_memberships = 0; 1549 imo->imo_max_memberships = IP_MIN_MEMBERSHIPS; 1550 imo->imo_membership = immp; 1551 1552 /* Initialize per-group source filters. */ 1553 for (idx = 0; idx < IP_MIN_MEMBERSHIPS; idx++) 1554 imf_init(&imfp[idx], MCAST_UNDEFINED, MCAST_EXCLUDE); 1555 imo->imo_mfilters = imfp; 1556 1557 INP_WLOCK(inp); 1558 if (inp->inp_moptions != NULL) { 1559 free(imfp, M_INMFILTER); 1560 free(immp, M_IPMOPTS); 1561 free(imo, M_IPMOPTS); 1562 return (inp->inp_moptions); 1563 } 1564 inp->inp_moptions = imo; 1565 return (imo); 1566 } 1567 1568 /* 1569 * Discard the IP multicast options (and source filters). To minimize 1570 * the amount of work done while holding locks such as the INP's 1571 * pcbinfo lock (which is used in the receive path), the free 1572 * operation is performed asynchronously in a separate task. 1573 * 1574 * SMPng: NOTE: assumes INP write lock is held. 1575 */ 1576 void 1577 inp_freemoptions(struct ip_moptions *imo) 1578 { 1579 1580 KASSERT(imo != NULL, ("%s: ip_moptions is NULL", __func__)); 1581 IN_MULTI_LOCK(); 1582 STAILQ_INSERT_TAIL(&imo_gc_list, imo, imo_link); 1583 IN_MULTI_UNLOCK(); 1584 taskqueue_enqueue(taskqueue_thread, &imo_gc_task); 1585 } 1586 1587 static void 1588 inp_freemoptions_internal(struct ip_moptions *imo) 1589 { 1590 struct in_mfilter *imf; 1591 size_t idx, nmships; 1592 1593 nmships = imo->imo_num_memberships; 1594 for (idx = 0; idx < nmships; ++idx) { 1595 imf = imo->imo_mfilters ? &imo->imo_mfilters[idx] : NULL; 1596 if (imf) 1597 imf_leave(imf); 1598 (void)in_leavegroup(imo->imo_membership[idx], imf); 1599 if (imf) 1600 imf_purge(imf); 1601 } 1602 1603 if (imo->imo_mfilters) 1604 free(imo->imo_mfilters, M_INMFILTER); 1605 free(imo->imo_membership, M_IPMOPTS); 1606 free(imo, M_IPMOPTS); 1607 } 1608 1609 static void 1610 inp_gcmoptions(void *context, int pending) 1611 { 1612 struct ip_moptions *imo; 1613 1614 IN_MULTI_LOCK(); 1615 while (!STAILQ_EMPTY(&imo_gc_list)) { 1616 imo = STAILQ_FIRST(&imo_gc_list); 1617 STAILQ_REMOVE_HEAD(&imo_gc_list, imo_link); 1618 IN_MULTI_UNLOCK(); 1619 inp_freemoptions_internal(imo); 1620 IN_MULTI_LOCK(); 1621 } 1622 IN_MULTI_UNLOCK(); 1623 } 1624 1625 /* 1626 * Atomically get source filters on a socket for an IPv4 multicast group. 1627 * Called with INP lock held; returns with lock released. 1628 */ 1629 static int 1630 inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt) 1631 { 1632 struct __msfilterreq msfr; 1633 sockunion_t *gsa; 1634 struct ifnet *ifp; 1635 struct ip_moptions *imo; 1636 struct in_mfilter *imf; 1637 struct ip_msource *ims; 1638 struct in_msource *lims; 1639 struct sockaddr_in *psin; 1640 struct sockaddr_storage *ptss; 1641 struct sockaddr_storage *tss; 1642 int error; 1643 size_t idx, nsrcs, ncsrcs; 1644 1645 INP_WLOCK_ASSERT(inp); 1646 1647 imo = inp->inp_moptions; 1648 KASSERT(imo != NULL, ("%s: null ip_moptions", __func__)); 1649 1650 INP_WUNLOCK(inp); 1651 1652 error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq), 1653 sizeof(struct __msfilterreq)); 1654 if (error) 1655 return (error); 1656 1657 if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex) 1658 return (EINVAL); 1659 1660 ifp = ifnet_byindex(msfr.msfr_ifindex); 1661 if (ifp == NULL) 1662 return (EINVAL); 1663 1664 INP_WLOCK(inp); 1665 1666 /* 1667 * Lookup group on the socket. 1668 */ 1669 gsa = (sockunion_t *)&msfr.msfr_group; 1670 idx = imo_match_group(imo, ifp, &gsa->sa); 1671 if (idx == -1 || imo->imo_mfilters == NULL) { 1672 INP_WUNLOCK(inp); 1673 return (EADDRNOTAVAIL); 1674 } 1675 imf = &imo->imo_mfilters[idx]; 1676 1677 /* 1678 * Ignore memberships which are in limbo. 1679 */ 1680 if (imf->imf_st[1] == MCAST_UNDEFINED) { 1681 INP_WUNLOCK(inp); 1682 return (EAGAIN); 1683 } 1684 msfr.msfr_fmode = imf->imf_st[1]; 1685 1686 /* 1687 * If the user specified a buffer, copy out the source filter 1688 * entries to userland gracefully. 1689 * We only copy out the number of entries which userland 1690 * has asked for, but we always tell userland how big the 1691 * buffer really needs to be. 1692 */ 1693 if (msfr.msfr_nsrcs > in_mcast_maxsocksrc) 1694 msfr.msfr_nsrcs = in_mcast_maxsocksrc; 1695 tss = NULL; 1696 if (msfr.msfr_srcs != NULL && msfr.msfr_nsrcs > 0) { 1697 tss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs, 1698 M_TEMP, M_NOWAIT | M_ZERO); 1699 if (tss == NULL) { 1700 INP_WUNLOCK(inp); 1701 return (ENOBUFS); 1702 } 1703 } 1704 1705 /* 1706 * Count number of sources in-mode at t0. 1707 * If buffer space exists and remains, copy out source entries. 1708 */ 1709 nsrcs = msfr.msfr_nsrcs; 1710 ncsrcs = 0; 1711 ptss = tss; 1712 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 1713 lims = (struct in_msource *)ims; 1714 if (lims->imsl_st[0] == MCAST_UNDEFINED || 1715 lims->imsl_st[0] != imf->imf_st[0]) 1716 continue; 1717 ++ncsrcs; 1718 if (tss != NULL && nsrcs > 0) { 1719 psin = (struct sockaddr_in *)ptss; 1720 psin->sin_family = AF_INET; 1721 psin->sin_len = sizeof(struct sockaddr_in); 1722 psin->sin_addr.s_addr = htonl(lims->ims_haddr); 1723 psin->sin_port = 0; 1724 ++ptss; 1725 --nsrcs; 1726 } 1727 } 1728 1729 INP_WUNLOCK(inp); 1730 1731 if (tss != NULL) { 1732 error = copyout(tss, msfr.msfr_srcs, 1733 sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs); 1734 free(tss, M_TEMP); 1735 if (error) 1736 return (error); 1737 } 1738 1739 msfr.msfr_nsrcs = ncsrcs; 1740 error = sooptcopyout(sopt, &msfr, sizeof(struct __msfilterreq)); 1741 1742 return (error); 1743 } 1744 1745 /* 1746 * Return the IP multicast options in response to user getsockopt(). 1747 */ 1748 int 1749 inp_getmoptions(struct inpcb *inp, struct sockopt *sopt) 1750 { 1751 struct ip_mreqn mreqn; 1752 struct ip_moptions *imo; 1753 struct ifnet *ifp; 1754 struct in_ifaddr *ia; 1755 int error, optval; 1756 u_char coptval; 1757 1758 INP_WLOCK(inp); 1759 imo = inp->inp_moptions; 1760 /* 1761 * If socket is neither of type SOCK_RAW or SOCK_DGRAM, 1762 * or is a divert socket, reject it. 1763 */ 1764 if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT || 1765 (inp->inp_socket->so_proto->pr_type != SOCK_RAW && 1766 inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) { 1767 INP_WUNLOCK(inp); 1768 return (EOPNOTSUPP); 1769 } 1770 1771 error = 0; 1772 switch (sopt->sopt_name) { 1773 case IP_MULTICAST_VIF: 1774 if (imo != NULL) 1775 optval = imo->imo_multicast_vif; 1776 else 1777 optval = -1; 1778 INP_WUNLOCK(inp); 1779 error = sooptcopyout(sopt, &optval, sizeof(int)); 1780 break; 1781 1782 case IP_MULTICAST_IF: 1783 memset(&mreqn, 0, sizeof(struct ip_mreqn)); 1784 if (imo != NULL) { 1785 ifp = imo->imo_multicast_ifp; 1786 if (!in_nullhost(imo->imo_multicast_addr)) { 1787 mreqn.imr_address = imo->imo_multicast_addr; 1788 } else if (ifp != NULL) { 1789 mreqn.imr_ifindex = ifp->if_index; 1790 IFP_TO_IA(ifp, ia); 1791 if (ia != NULL) { 1792 mreqn.imr_address = 1793 IA_SIN(ia)->sin_addr; 1794 ifa_free(&ia->ia_ifa); 1795 } 1796 } 1797 } 1798 INP_WUNLOCK(inp); 1799 if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) { 1800 error = sooptcopyout(sopt, &mreqn, 1801 sizeof(struct ip_mreqn)); 1802 } else { 1803 error = sooptcopyout(sopt, &mreqn.imr_address, 1804 sizeof(struct in_addr)); 1805 } 1806 break; 1807 1808 case IP_MULTICAST_TTL: 1809 if (imo == 0) 1810 optval = coptval = IP_DEFAULT_MULTICAST_TTL; 1811 else 1812 optval = coptval = imo->imo_multicast_ttl; 1813 INP_WUNLOCK(inp); 1814 if (sopt->sopt_valsize == sizeof(u_char)) 1815 error = sooptcopyout(sopt, &coptval, sizeof(u_char)); 1816 else 1817 error = sooptcopyout(sopt, &optval, sizeof(int)); 1818 break; 1819 1820 case IP_MULTICAST_LOOP: 1821 if (imo == 0) 1822 optval = coptval = IP_DEFAULT_MULTICAST_LOOP; 1823 else 1824 optval = coptval = imo->imo_multicast_loop; 1825 INP_WUNLOCK(inp); 1826 if (sopt->sopt_valsize == sizeof(u_char)) 1827 error = sooptcopyout(sopt, &coptval, sizeof(u_char)); 1828 else 1829 error = sooptcopyout(sopt, &optval, sizeof(int)); 1830 break; 1831 1832 case IP_MSFILTER: 1833 if (imo == NULL) { 1834 error = EADDRNOTAVAIL; 1835 INP_WUNLOCK(inp); 1836 } else { 1837 error = inp_get_source_filters(inp, sopt); 1838 } 1839 break; 1840 1841 default: 1842 INP_WUNLOCK(inp); 1843 error = ENOPROTOOPT; 1844 break; 1845 } 1846 1847 INP_UNLOCK_ASSERT(inp); 1848 1849 return (error); 1850 } 1851 1852 /* 1853 * Look up the ifnet to use for a multicast group membership, 1854 * given the IPv4 address of an interface, and the IPv4 group address. 1855 * 1856 * This routine exists to support legacy multicast applications 1857 * which do not understand that multicast memberships are scoped to 1858 * specific physical links in the networking stack, or which need 1859 * to join link-scope groups before IPv4 addresses are configured. 1860 * 1861 * If inp is non-NULL, use this socket's current FIB number for any 1862 * required FIB lookup. 1863 * If ina is INADDR_ANY, look up the group address in the unicast FIB, 1864 * and use its ifp; usually, this points to the default next-hop. 1865 * 1866 * If the FIB lookup fails, attempt to use the first non-loopback 1867 * interface with multicast capability in the system as a 1868 * last resort. The legacy IPv4 ASM API requires that we do 1869 * this in order to allow groups to be joined when the routing 1870 * table has not yet been populated during boot. 1871 * 1872 * Returns NULL if no ifp could be found. 1873 * 1874 * SMPng: TODO: Acquire the appropriate locks for INADDR_TO_IFP. 1875 * FUTURE: Implement IPv4 source-address selection. 1876 */ 1877 static struct ifnet * 1878 inp_lookup_mcast_ifp(const struct inpcb *inp, 1879 const struct sockaddr_in *gsin, const struct in_addr ina) 1880 { 1881 struct ifnet *ifp; 1882 1883 KASSERT(gsin->sin_family == AF_INET, ("%s: not AF_INET", __func__)); 1884 KASSERT(IN_MULTICAST(ntohl(gsin->sin_addr.s_addr)), 1885 ("%s: not multicast", __func__)); 1886 1887 ifp = NULL; 1888 if (!in_nullhost(ina)) { 1889 INADDR_TO_IFP(ina, ifp); 1890 } else { 1891 struct route ro; 1892 1893 ro.ro_rt = NULL; 1894 memcpy(&ro.ro_dst, gsin, sizeof(struct sockaddr_in)); 1895 in_rtalloc_ign(&ro, 0, inp ? inp->inp_inc.inc_fibnum : 0); 1896 if (ro.ro_rt != NULL) { 1897 ifp = ro.ro_rt->rt_ifp; 1898 KASSERT(ifp != NULL, ("%s: null ifp", __func__)); 1899 RTFREE(ro.ro_rt); 1900 } else { 1901 struct in_ifaddr *ia; 1902 struct ifnet *mifp; 1903 1904 mifp = NULL; 1905 IN_IFADDR_RLOCK(); 1906 TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { 1907 mifp = ia->ia_ifp; 1908 if (!(mifp->if_flags & IFF_LOOPBACK) && 1909 (mifp->if_flags & IFF_MULTICAST)) { 1910 ifp = mifp; 1911 break; 1912 } 1913 } 1914 IN_IFADDR_RUNLOCK(); 1915 } 1916 } 1917 1918 return (ifp); 1919 } 1920 1921 /* 1922 * Join an IPv4 multicast group, possibly with a source. 1923 */ 1924 static int 1925 inp_join_group(struct inpcb *inp, struct sockopt *sopt) 1926 { 1927 struct group_source_req gsr; 1928 sockunion_t *gsa, *ssa; 1929 struct ifnet *ifp; 1930 struct in_mfilter *imf; 1931 struct ip_moptions *imo; 1932 struct in_multi *inm; 1933 struct in_msource *lims; 1934 size_t idx; 1935 int error, is_new; 1936 1937 ifp = NULL; 1938 imf = NULL; 1939 lims = NULL; 1940 error = 0; 1941 is_new = 0; 1942 1943 memset(&gsr, 0, sizeof(struct group_source_req)); 1944 gsa = (sockunion_t *)&gsr.gsr_group; 1945 gsa->ss.ss_family = AF_UNSPEC; 1946 ssa = (sockunion_t *)&gsr.gsr_source; 1947 ssa->ss.ss_family = AF_UNSPEC; 1948 1949 switch (sopt->sopt_name) { 1950 case IP_ADD_MEMBERSHIP: 1951 case IP_ADD_SOURCE_MEMBERSHIP: { 1952 struct ip_mreq_source mreqs; 1953 1954 if (sopt->sopt_name == IP_ADD_MEMBERSHIP) { 1955 error = sooptcopyin(sopt, &mreqs, 1956 sizeof(struct ip_mreq), 1957 sizeof(struct ip_mreq)); 1958 /* 1959 * Do argument switcharoo from ip_mreq into 1960 * ip_mreq_source to avoid using two instances. 1961 */ 1962 mreqs.imr_interface = mreqs.imr_sourceaddr; 1963 mreqs.imr_sourceaddr.s_addr = INADDR_ANY; 1964 } else if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) { 1965 error = sooptcopyin(sopt, &mreqs, 1966 sizeof(struct ip_mreq_source), 1967 sizeof(struct ip_mreq_source)); 1968 } 1969 if (error) 1970 return (error); 1971 1972 gsa->sin.sin_family = AF_INET; 1973 gsa->sin.sin_len = sizeof(struct sockaddr_in); 1974 gsa->sin.sin_addr = mreqs.imr_multiaddr; 1975 1976 if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) { 1977 ssa->sin.sin_family = AF_INET; 1978 ssa->sin.sin_len = sizeof(struct sockaddr_in); 1979 ssa->sin.sin_addr = mreqs.imr_sourceaddr; 1980 } 1981 1982 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 1983 return (EINVAL); 1984 1985 ifp = inp_lookup_mcast_ifp(inp, &gsa->sin, 1986 mreqs.imr_interface); 1987 CTR3(KTR_IGMPV3, "%s: imr_interface = %s, ifp = %p", 1988 __func__, inet_ntoa(mreqs.imr_interface), ifp); 1989 break; 1990 } 1991 1992 case MCAST_JOIN_GROUP: 1993 case MCAST_JOIN_SOURCE_GROUP: 1994 if (sopt->sopt_name == MCAST_JOIN_GROUP) { 1995 error = sooptcopyin(sopt, &gsr, 1996 sizeof(struct group_req), 1997 sizeof(struct group_req)); 1998 } else if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) { 1999 error = sooptcopyin(sopt, &gsr, 2000 sizeof(struct group_source_req), 2001 sizeof(struct group_source_req)); 2002 } 2003 if (error) 2004 return (error); 2005 2006 if (gsa->sin.sin_family != AF_INET || 2007 gsa->sin.sin_len != sizeof(struct sockaddr_in)) 2008 return (EINVAL); 2009 2010 /* 2011 * Overwrite the port field if present, as the sockaddr 2012 * being copied in may be matched with a binary comparison. 2013 */ 2014 gsa->sin.sin_port = 0; 2015 if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) { 2016 if (ssa->sin.sin_family != AF_INET || 2017 ssa->sin.sin_len != sizeof(struct sockaddr_in)) 2018 return (EINVAL); 2019 ssa->sin.sin_port = 0; 2020 } 2021 2022 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2023 return (EINVAL); 2024 2025 if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface) 2026 return (EADDRNOTAVAIL); 2027 ifp = ifnet_byindex(gsr.gsr_interface); 2028 break; 2029 2030 default: 2031 CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d", 2032 __func__, sopt->sopt_name); 2033 return (EOPNOTSUPP); 2034 break; 2035 } 2036 2037 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) 2038 return (EADDRNOTAVAIL); 2039 2040 imo = inp_findmoptions(inp); 2041 idx = imo_match_group(imo, ifp, &gsa->sa); 2042 if (idx == -1) { 2043 is_new = 1; 2044 } else { 2045 inm = imo->imo_membership[idx]; 2046 imf = &imo->imo_mfilters[idx]; 2047 if (ssa->ss.ss_family != AF_UNSPEC) { 2048 /* 2049 * MCAST_JOIN_SOURCE_GROUP on an exclusive membership 2050 * is an error. On an existing inclusive membership, 2051 * it just adds the source to the filter list. 2052 */ 2053 if (imf->imf_st[1] != MCAST_INCLUDE) { 2054 error = EINVAL; 2055 goto out_inp_locked; 2056 } 2057 /* 2058 * Throw out duplicates. 2059 * 2060 * XXX FIXME: This makes a naive assumption that 2061 * even if entries exist for *ssa in this imf, 2062 * they will be rejected as dupes, even if they 2063 * are not valid in the current mode (in-mode). 2064 * 2065 * in_msource is transactioned just as for anything 2066 * else in SSM -- but note naive use of inm_graft() 2067 * below for allocating new filter entries. 2068 * 2069 * This is only an issue if someone mixes the 2070 * full-state SSM API with the delta-based API, 2071 * which is discouraged in the relevant RFCs. 2072 */ 2073 lims = imo_match_source(imo, idx, &ssa->sa); 2074 if (lims != NULL /*&& 2075 lims->imsl_st[1] == MCAST_INCLUDE*/) { 2076 error = EADDRNOTAVAIL; 2077 goto out_inp_locked; 2078 } 2079 } else { 2080 /* 2081 * MCAST_JOIN_GROUP on an existing exclusive 2082 * membership is an error; return EADDRINUSE 2083 * to preserve 4.4BSD API idempotence, and 2084 * avoid tedious detour to code below. 2085 * NOTE: This is bending RFC 3678 a bit. 2086 * 2087 * On an existing inclusive membership, this is also 2088 * an error; if you want to change filter mode, 2089 * you must use the userland API setsourcefilter(). 2090 * XXX We don't reject this for imf in UNDEFINED 2091 * state at t1, because allocation of a filter 2092 * is atomic with allocation of a membership. 2093 */ 2094 error = EINVAL; 2095 if (imf->imf_st[1] == MCAST_EXCLUDE) 2096 error = EADDRINUSE; 2097 goto out_inp_locked; 2098 } 2099 } 2100 2101 /* 2102 * Begin state merge transaction at socket layer. 2103 */ 2104 INP_WLOCK_ASSERT(inp); 2105 2106 if (is_new) { 2107 if (imo->imo_num_memberships == imo->imo_max_memberships) { 2108 error = imo_grow(imo); 2109 if (error) 2110 goto out_inp_locked; 2111 } 2112 /* 2113 * Allocate the new slot upfront so we can deal with 2114 * grafting the new source filter in same code path 2115 * as for join-source on existing membership. 2116 */ 2117 idx = imo->imo_num_memberships; 2118 imo->imo_membership[idx] = NULL; 2119 imo->imo_num_memberships++; 2120 KASSERT(imo->imo_mfilters != NULL, 2121 ("%s: imf_mfilters vector was not allocated", __func__)); 2122 imf = &imo->imo_mfilters[idx]; 2123 KASSERT(RB_EMPTY(&imf->imf_sources), 2124 ("%s: imf_sources not empty", __func__)); 2125 } 2126 2127 /* 2128 * Graft new source into filter list for this inpcb's 2129 * membership of the group. The in_multi may not have 2130 * been allocated yet if this is a new membership, however, 2131 * the in_mfilter slot will be allocated and must be initialized. 2132 * 2133 * Note: Grafting of exclusive mode filters doesn't happen 2134 * in this path. 2135 * XXX: Should check for non-NULL lims (node exists but may 2136 * not be in-mode) for interop with full-state API. 2137 */ 2138 if (ssa->ss.ss_family != AF_UNSPEC) { 2139 /* Membership starts in IN mode */ 2140 if (is_new) { 2141 CTR1(KTR_IGMPV3, "%s: new join w/source", __func__); 2142 imf_init(imf, MCAST_UNDEFINED, MCAST_INCLUDE); 2143 } else { 2144 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow"); 2145 } 2146 lims = imf_graft(imf, MCAST_INCLUDE, &ssa->sin); 2147 if (lims == NULL) { 2148 CTR1(KTR_IGMPV3, "%s: merge imf state failed", 2149 __func__); 2150 error = ENOMEM; 2151 goto out_imo_free; 2152 } 2153 } else { 2154 /* No address specified; Membership starts in EX mode */ 2155 if (is_new) { 2156 CTR1(KTR_IGMPV3, "%s: new join w/o source", __func__); 2157 imf_init(imf, MCAST_UNDEFINED, MCAST_EXCLUDE); 2158 } 2159 } 2160 2161 /* 2162 * Begin state merge transaction at IGMP layer. 2163 */ 2164 IN_MULTI_LOCK(); 2165 2166 if (is_new) { 2167 error = in_joingroup_locked(ifp, &gsa->sin.sin_addr, imf, 2168 &inm); 2169 if (error) { 2170 CTR1(KTR_IGMPV3, "%s: in_joingroup_locked failed", 2171 __func__); 2172 IN_MULTI_UNLOCK(); 2173 goto out_imo_free; 2174 } 2175 imo->imo_membership[idx] = inm; 2176 } else { 2177 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 2178 error = inm_merge(inm, imf); 2179 if (error) { 2180 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", 2181 __func__); 2182 goto out_in_multi_locked; 2183 } 2184 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 2185 error = igmp_change_state(inm); 2186 if (error) { 2187 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", 2188 __func__); 2189 goto out_in_multi_locked; 2190 } 2191 } 2192 2193 out_in_multi_locked: 2194 2195 IN_MULTI_UNLOCK(); 2196 2197 INP_WLOCK_ASSERT(inp); 2198 if (error) { 2199 imf_rollback(imf); 2200 if (is_new) 2201 imf_purge(imf); 2202 else 2203 imf_reap(imf); 2204 } else { 2205 imf_commit(imf); 2206 } 2207 2208 out_imo_free: 2209 if (error && is_new) { 2210 imo->imo_membership[idx] = NULL; 2211 --imo->imo_num_memberships; 2212 } 2213 2214 out_inp_locked: 2215 INP_WUNLOCK(inp); 2216 return (error); 2217 } 2218 2219 /* 2220 * Leave an IPv4 multicast group on an inpcb, possibly with a source. 2221 */ 2222 static int 2223 inp_leave_group(struct inpcb *inp, struct sockopt *sopt) 2224 { 2225 struct group_source_req gsr; 2226 struct ip_mreq_source mreqs; 2227 sockunion_t *gsa, *ssa; 2228 struct ifnet *ifp; 2229 struct in_mfilter *imf; 2230 struct ip_moptions *imo; 2231 struct in_msource *ims; 2232 struct in_multi *inm; 2233 size_t idx; 2234 int error, is_final; 2235 2236 ifp = NULL; 2237 error = 0; 2238 is_final = 1; 2239 2240 memset(&gsr, 0, sizeof(struct group_source_req)); 2241 gsa = (sockunion_t *)&gsr.gsr_group; 2242 gsa->ss.ss_family = AF_UNSPEC; 2243 ssa = (sockunion_t *)&gsr.gsr_source; 2244 ssa->ss.ss_family = AF_UNSPEC; 2245 2246 switch (sopt->sopt_name) { 2247 case IP_DROP_MEMBERSHIP: 2248 case IP_DROP_SOURCE_MEMBERSHIP: 2249 if (sopt->sopt_name == IP_DROP_MEMBERSHIP) { 2250 error = sooptcopyin(sopt, &mreqs, 2251 sizeof(struct ip_mreq), 2252 sizeof(struct ip_mreq)); 2253 /* 2254 * Swap interface and sourceaddr arguments, 2255 * as ip_mreq and ip_mreq_source are laid 2256 * out differently. 2257 */ 2258 mreqs.imr_interface = mreqs.imr_sourceaddr; 2259 mreqs.imr_sourceaddr.s_addr = INADDR_ANY; 2260 } else if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) { 2261 error = sooptcopyin(sopt, &mreqs, 2262 sizeof(struct ip_mreq_source), 2263 sizeof(struct ip_mreq_source)); 2264 } 2265 if (error) 2266 return (error); 2267 2268 gsa->sin.sin_family = AF_INET; 2269 gsa->sin.sin_len = sizeof(struct sockaddr_in); 2270 gsa->sin.sin_addr = mreqs.imr_multiaddr; 2271 2272 if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) { 2273 ssa->sin.sin_family = AF_INET; 2274 ssa->sin.sin_len = sizeof(struct sockaddr_in); 2275 ssa->sin.sin_addr = mreqs.imr_sourceaddr; 2276 } 2277 2278 /* 2279 * Attempt to look up hinted ifp from interface address. 2280 * Fallthrough with null ifp iff lookup fails, to 2281 * preserve 4.4BSD mcast API idempotence. 2282 * XXX NOTE WELL: The RFC 3678 API is preferred because 2283 * using an IPv4 address as a key is racy. 2284 */ 2285 if (!in_nullhost(mreqs.imr_interface)) 2286 INADDR_TO_IFP(mreqs.imr_interface, ifp); 2287 2288 CTR3(KTR_IGMPV3, "%s: imr_interface = %s, ifp = %p", 2289 __func__, inet_ntoa(mreqs.imr_interface), ifp); 2290 2291 break; 2292 2293 case MCAST_LEAVE_GROUP: 2294 case MCAST_LEAVE_SOURCE_GROUP: 2295 if (sopt->sopt_name == MCAST_LEAVE_GROUP) { 2296 error = sooptcopyin(sopt, &gsr, 2297 sizeof(struct group_req), 2298 sizeof(struct group_req)); 2299 } else if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) { 2300 error = sooptcopyin(sopt, &gsr, 2301 sizeof(struct group_source_req), 2302 sizeof(struct group_source_req)); 2303 } 2304 if (error) 2305 return (error); 2306 2307 if (gsa->sin.sin_family != AF_INET || 2308 gsa->sin.sin_len != sizeof(struct sockaddr_in)) 2309 return (EINVAL); 2310 2311 if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) { 2312 if (ssa->sin.sin_family != AF_INET || 2313 ssa->sin.sin_len != sizeof(struct sockaddr_in)) 2314 return (EINVAL); 2315 } 2316 2317 if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface) 2318 return (EADDRNOTAVAIL); 2319 2320 ifp = ifnet_byindex(gsr.gsr_interface); 2321 2322 if (ifp == NULL) 2323 return (EADDRNOTAVAIL); 2324 break; 2325 2326 default: 2327 CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d", 2328 __func__, sopt->sopt_name); 2329 return (EOPNOTSUPP); 2330 break; 2331 } 2332 2333 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2334 return (EINVAL); 2335 2336 /* 2337 * Find the membership in the membership array. 2338 */ 2339 imo = inp_findmoptions(inp); 2340 idx = imo_match_group(imo, ifp, &gsa->sa); 2341 if (idx == -1) { 2342 error = EADDRNOTAVAIL; 2343 goto out_inp_locked; 2344 } 2345 inm = imo->imo_membership[idx]; 2346 imf = &imo->imo_mfilters[idx]; 2347 2348 if (ssa->ss.ss_family != AF_UNSPEC) 2349 is_final = 0; 2350 2351 /* 2352 * Begin state merge transaction at socket layer. 2353 */ 2354 INP_WLOCK_ASSERT(inp); 2355 2356 /* 2357 * If we were instructed only to leave a given source, do so. 2358 * MCAST_LEAVE_SOURCE_GROUP is only valid for inclusive memberships. 2359 */ 2360 if (is_final) { 2361 imf_leave(imf); 2362 } else { 2363 if (imf->imf_st[0] == MCAST_EXCLUDE) { 2364 error = EADDRNOTAVAIL; 2365 goto out_inp_locked; 2366 } 2367 ims = imo_match_source(imo, idx, &ssa->sa); 2368 if (ims == NULL) { 2369 CTR3(KTR_IGMPV3, "%s: source %s %spresent", __func__, 2370 inet_ntoa(ssa->sin.sin_addr), "not "); 2371 error = EADDRNOTAVAIL; 2372 goto out_inp_locked; 2373 } 2374 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block"); 2375 error = imf_prune(imf, &ssa->sin); 2376 if (error) { 2377 CTR1(KTR_IGMPV3, "%s: merge imf state failed", 2378 __func__); 2379 goto out_inp_locked; 2380 } 2381 } 2382 2383 /* 2384 * Begin state merge transaction at IGMP layer. 2385 */ 2386 IN_MULTI_LOCK(); 2387 2388 if (is_final) { 2389 /* 2390 * Give up the multicast address record to which 2391 * the membership points. 2392 */ 2393 (void)in_leavegroup_locked(inm, imf); 2394 } else { 2395 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 2396 error = inm_merge(inm, imf); 2397 if (error) { 2398 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", 2399 __func__); 2400 goto out_in_multi_locked; 2401 } 2402 2403 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 2404 error = igmp_change_state(inm); 2405 if (error) { 2406 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", 2407 __func__); 2408 } 2409 } 2410 2411 out_in_multi_locked: 2412 2413 IN_MULTI_UNLOCK(); 2414 2415 if (error) 2416 imf_rollback(imf); 2417 else 2418 imf_commit(imf); 2419 2420 imf_reap(imf); 2421 2422 if (is_final) { 2423 /* Remove the gap in the membership and filter array. */ 2424 for (++idx; idx < imo->imo_num_memberships; ++idx) { 2425 imo->imo_membership[idx-1] = imo->imo_membership[idx]; 2426 imo->imo_mfilters[idx-1] = imo->imo_mfilters[idx]; 2427 } 2428 imo->imo_num_memberships--; 2429 } 2430 2431 out_inp_locked: 2432 INP_WUNLOCK(inp); 2433 return (error); 2434 } 2435 2436 /* 2437 * Select the interface for transmitting IPv4 multicast datagrams. 2438 * 2439 * Either an instance of struct in_addr or an instance of struct ip_mreqn 2440 * may be passed to this socket option. An address of INADDR_ANY or an 2441 * interface index of 0 is used to remove a previous selection. 2442 * When no interface is selected, one is chosen for every send. 2443 */ 2444 static int 2445 inp_set_multicast_if(struct inpcb *inp, struct sockopt *sopt) 2446 { 2447 struct in_addr addr; 2448 struct ip_mreqn mreqn; 2449 struct ifnet *ifp; 2450 struct ip_moptions *imo; 2451 int error; 2452 2453 if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) { 2454 /* 2455 * An interface index was specified using the 2456 * Linux-derived ip_mreqn structure. 2457 */ 2458 error = sooptcopyin(sopt, &mreqn, sizeof(struct ip_mreqn), 2459 sizeof(struct ip_mreqn)); 2460 if (error) 2461 return (error); 2462 2463 if (mreqn.imr_ifindex < 0 || V_if_index < mreqn.imr_ifindex) 2464 return (EINVAL); 2465 2466 if (mreqn.imr_ifindex == 0) { 2467 ifp = NULL; 2468 } else { 2469 ifp = ifnet_byindex(mreqn.imr_ifindex); 2470 if (ifp == NULL) 2471 return (EADDRNOTAVAIL); 2472 } 2473 } else { 2474 /* 2475 * An interface was specified by IPv4 address. 2476 * This is the traditional BSD usage. 2477 */ 2478 error = sooptcopyin(sopt, &addr, sizeof(struct in_addr), 2479 sizeof(struct in_addr)); 2480 if (error) 2481 return (error); 2482 if (in_nullhost(addr)) { 2483 ifp = NULL; 2484 } else { 2485 INADDR_TO_IFP(addr, ifp); 2486 if (ifp == NULL) 2487 return (EADDRNOTAVAIL); 2488 } 2489 CTR3(KTR_IGMPV3, "%s: ifp = %p, addr = %s", __func__, ifp, 2490 inet_ntoa(addr)); 2491 } 2492 2493 /* Reject interfaces which do not support multicast. */ 2494 if (ifp != NULL && (ifp->if_flags & IFF_MULTICAST) == 0) 2495 return (EOPNOTSUPP); 2496 2497 imo = inp_findmoptions(inp); 2498 imo->imo_multicast_ifp = ifp; 2499 imo->imo_multicast_addr.s_addr = INADDR_ANY; 2500 INP_WUNLOCK(inp); 2501 2502 return (0); 2503 } 2504 2505 /* 2506 * Atomically set source filters on a socket for an IPv4 multicast group. 2507 * 2508 * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held. 2509 */ 2510 static int 2511 inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt) 2512 { 2513 struct __msfilterreq msfr; 2514 sockunion_t *gsa; 2515 struct ifnet *ifp; 2516 struct in_mfilter *imf; 2517 struct ip_moptions *imo; 2518 struct in_multi *inm; 2519 size_t idx; 2520 int error; 2521 2522 error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq), 2523 sizeof(struct __msfilterreq)); 2524 if (error) 2525 return (error); 2526 2527 if (msfr.msfr_nsrcs > in_mcast_maxsocksrc) 2528 return (ENOBUFS); 2529 2530 if ((msfr.msfr_fmode != MCAST_EXCLUDE && 2531 msfr.msfr_fmode != MCAST_INCLUDE)) 2532 return (EINVAL); 2533 2534 if (msfr.msfr_group.ss_family != AF_INET || 2535 msfr.msfr_group.ss_len != sizeof(struct sockaddr_in)) 2536 return (EINVAL); 2537 2538 gsa = (sockunion_t *)&msfr.msfr_group; 2539 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2540 return (EINVAL); 2541 2542 gsa->sin.sin_port = 0; /* ignore port */ 2543 2544 if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex) 2545 return (EADDRNOTAVAIL); 2546 2547 ifp = ifnet_byindex(msfr.msfr_ifindex); 2548 if (ifp == NULL) 2549 return (EADDRNOTAVAIL); 2550 2551 /* 2552 * Take the INP write lock. 2553 * Check if this socket is a member of this group. 2554 */ 2555 imo = inp_findmoptions(inp); 2556 idx = imo_match_group(imo, ifp, &gsa->sa); 2557 if (idx == -1 || imo->imo_mfilters == NULL) { 2558 error = EADDRNOTAVAIL; 2559 goto out_inp_locked; 2560 } 2561 inm = imo->imo_membership[idx]; 2562 imf = &imo->imo_mfilters[idx]; 2563 2564 /* 2565 * Begin state merge transaction at socket layer. 2566 */ 2567 INP_WLOCK_ASSERT(inp); 2568 2569 imf->imf_st[1] = msfr.msfr_fmode; 2570 2571 /* 2572 * Apply any new source filters, if present. 2573 * Make a copy of the user-space source vector so 2574 * that we may copy them with a single copyin. This 2575 * allows us to deal with page faults up-front. 2576 */ 2577 if (msfr.msfr_nsrcs > 0) { 2578 struct in_msource *lims; 2579 struct sockaddr_in *psin; 2580 struct sockaddr_storage *kss, *pkss; 2581 int i; 2582 2583 INP_WUNLOCK(inp); 2584 2585 CTR2(KTR_IGMPV3, "%s: loading %lu source list entries", 2586 __func__, (unsigned long)msfr.msfr_nsrcs); 2587 kss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs, 2588 M_TEMP, M_WAITOK); 2589 error = copyin(msfr.msfr_srcs, kss, 2590 sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs); 2591 if (error) { 2592 free(kss, M_TEMP); 2593 return (error); 2594 } 2595 2596 INP_WLOCK(inp); 2597 2598 /* 2599 * Mark all source filters as UNDEFINED at t1. 2600 * Restore new group filter mode, as imf_leave() 2601 * will set it to INCLUDE. 2602 */ 2603 imf_leave(imf); 2604 imf->imf_st[1] = msfr.msfr_fmode; 2605 2606 /* 2607 * Update socket layer filters at t1, lazy-allocating 2608 * new entries. This saves a bunch of memory at the 2609 * cost of one RB_FIND() per source entry; duplicate 2610 * entries in the msfr_nsrcs vector are ignored. 2611 * If we encounter an error, rollback transaction. 2612 * 2613 * XXX This too could be replaced with a set-symmetric 2614 * difference like loop to avoid walking from root 2615 * every time, as the key space is common. 2616 */ 2617 for (i = 0, pkss = kss; i < msfr.msfr_nsrcs; i++, pkss++) { 2618 psin = (struct sockaddr_in *)pkss; 2619 if (psin->sin_family != AF_INET) { 2620 error = EAFNOSUPPORT; 2621 break; 2622 } 2623 if (psin->sin_len != sizeof(struct sockaddr_in)) { 2624 error = EINVAL; 2625 break; 2626 } 2627 error = imf_get_source(imf, psin, &lims); 2628 if (error) 2629 break; 2630 lims->imsl_st[1] = imf->imf_st[1]; 2631 } 2632 free(kss, M_TEMP); 2633 } 2634 2635 if (error) 2636 goto out_imf_rollback; 2637 2638 INP_WLOCK_ASSERT(inp); 2639 IN_MULTI_LOCK(); 2640 2641 /* 2642 * Begin state merge transaction at IGMP layer. 2643 */ 2644 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 2645 error = inm_merge(inm, imf); 2646 if (error) { 2647 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__); 2648 goto out_in_multi_locked; 2649 } 2650 2651 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 2652 error = igmp_change_state(inm); 2653 if (error) 2654 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__); 2655 2656 out_in_multi_locked: 2657 2658 IN_MULTI_UNLOCK(); 2659 2660 out_imf_rollback: 2661 if (error) 2662 imf_rollback(imf); 2663 else 2664 imf_commit(imf); 2665 2666 imf_reap(imf); 2667 2668 out_inp_locked: 2669 INP_WUNLOCK(inp); 2670 return (error); 2671 } 2672 2673 /* 2674 * Set the IP multicast options in response to user setsockopt(). 2675 * 2676 * Many of the socket options handled in this function duplicate the 2677 * functionality of socket options in the regular unicast API. However, 2678 * it is not possible to merge the duplicate code, because the idempotence 2679 * of the IPv4 multicast part of the BSD Sockets API must be preserved; 2680 * the effects of these options must be treated as separate and distinct. 2681 * 2682 * SMPng: XXX: Unlocked read of inp_socket believed OK. 2683 * FUTURE: The IP_MULTICAST_VIF option may be eliminated if MROUTING 2684 * is refactored to no longer use vifs. 2685 */ 2686 int 2687 inp_setmoptions(struct inpcb *inp, struct sockopt *sopt) 2688 { 2689 struct ip_moptions *imo; 2690 int error; 2691 2692 error = 0; 2693 2694 /* 2695 * If socket is neither of type SOCK_RAW or SOCK_DGRAM, 2696 * or is a divert socket, reject it. 2697 */ 2698 if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT || 2699 (inp->inp_socket->so_proto->pr_type != SOCK_RAW && 2700 inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) 2701 return (EOPNOTSUPP); 2702 2703 switch (sopt->sopt_name) { 2704 case IP_MULTICAST_VIF: { 2705 int vifi; 2706 /* 2707 * Select a multicast VIF for transmission. 2708 * Only useful if multicast forwarding is active. 2709 */ 2710 if (legal_vif_num == NULL) { 2711 error = EOPNOTSUPP; 2712 break; 2713 } 2714 error = sooptcopyin(sopt, &vifi, sizeof(int), sizeof(int)); 2715 if (error) 2716 break; 2717 if (!legal_vif_num(vifi) && (vifi != -1)) { 2718 error = EINVAL; 2719 break; 2720 } 2721 imo = inp_findmoptions(inp); 2722 imo->imo_multicast_vif = vifi; 2723 INP_WUNLOCK(inp); 2724 break; 2725 } 2726 2727 case IP_MULTICAST_IF: 2728 error = inp_set_multicast_if(inp, sopt); 2729 break; 2730 2731 case IP_MULTICAST_TTL: { 2732 u_char ttl; 2733 2734 /* 2735 * Set the IP time-to-live for outgoing multicast packets. 2736 * The original multicast API required a char argument, 2737 * which is inconsistent with the rest of the socket API. 2738 * We allow either a char or an int. 2739 */ 2740 if (sopt->sopt_valsize == sizeof(u_char)) { 2741 error = sooptcopyin(sopt, &ttl, sizeof(u_char), 2742 sizeof(u_char)); 2743 if (error) 2744 break; 2745 } else { 2746 u_int ittl; 2747 2748 error = sooptcopyin(sopt, &ittl, sizeof(u_int), 2749 sizeof(u_int)); 2750 if (error) 2751 break; 2752 if (ittl > 255) { 2753 error = EINVAL; 2754 break; 2755 } 2756 ttl = (u_char)ittl; 2757 } 2758 imo = inp_findmoptions(inp); 2759 imo->imo_multicast_ttl = ttl; 2760 INP_WUNLOCK(inp); 2761 break; 2762 } 2763 2764 case IP_MULTICAST_LOOP: { 2765 u_char loop; 2766 2767 /* 2768 * Set the loopback flag for outgoing multicast packets. 2769 * Must be zero or one. The original multicast API required a 2770 * char argument, which is inconsistent with the rest 2771 * of the socket API. We allow either a char or an int. 2772 */ 2773 if (sopt->sopt_valsize == sizeof(u_char)) { 2774 error = sooptcopyin(sopt, &loop, sizeof(u_char), 2775 sizeof(u_char)); 2776 if (error) 2777 break; 2778 } else { 2779 u_int iloop; 2780 2781 error = sooptcopyin(sopt, &iloop, sizeof(u_int), 2782 sizeof(u_int)); 2783 if (error) 2784 break; 2785 loop = (u_char)iloop; 2786 } 2787 imo = inp_findmoptions(inp); 2788 imo->imo_multicast_loop = !!loop; 2789 INP_WUNLOCK(inp); 2790 break; 2791 } 2792 2793 case IP_ADD_MEMBERSHIP: 2794 case IP_ADD_SOURCE_MEMBERSHIP: 2795 case MCAST_JOIN_GROUP: 2796 case MCAST_JOIN_SOURCE_GROUP: 2797 error = inp_join_group(inp, sopt); 2798 break; 2799 2800 case IP_DROP_MEMBERSHIP: 2801 case IP_DROP_SOURCE_MEMBERSHIP: 2802 case MCAST_LEAVE_GROUP: 2803 case MCAST_LEAVE_SOURCE_GROUP: 2804 error = inp_leave_group(inp, sopt); 2805 break; 2806 2807 case IP_BLOCK_SOURCE: 2808 case IP_UNBLOCK_SOURCE: 2809 case MCAST_BLOCK_SOURCE: 2810 case MCAST_UNBLOCK_SOURCE: 2811 error = inp_block_unblock_source(inp, sopt); 2812 break; 2813 2814 case IP_MSFILTER: 2815 error = inp_set_source_filters(inp, sopt); 2816 break; 2817 2818 default: 2819 error = EOPNOTSUPP; 2820 break; 2821 } 2822 2823 INP_UNLOCK_ASSERT(inp); 2824 2825 return (error); 2826 } 2827 2828 /* 2829 * Expose IGMP's multicast filter mode and source list(s) to userland, 2830 * keyed by (ifindex, group). 2831 * The filter mode is written out as a uint32_t, followed by 2832 * 0..n of struct in_addr. 2833 * For use by ifmcstat(8). 2834 * SMPng: NOTE: unlocked read of ifindex space. 2835 */ 2836 static int 2837 sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS) 2838 { 2839 struct in_addr src, group; 2840 struct ifnet *ifp; 2841 struct ifmultiaddr *ifma; 2842 struct in_multi *inm; 2843 struct ip_msource *ims; 2844 int *name; 2845 int retval; 2846 u_int namelen; 2847 uint32_t fmode, ifindex; 2848 2849 name = (int *)arg1; 2850 namelen = arg2; 2851 2852 if (req->newptr != NULL) 2853 return (EPERM); 2854 2855 if (namelen != 2) 2856 return (EINVAL); 2857 2858 ifindex = name[0]; 2859 if (ifindex <= 0 || ifindex > V_if_index) { 2860 CTR2(KTR_IGMPV3, "%s: ifindex %u out of range", 2861 __func__, ifindex); 2862 return (ENOENT); 2863 } 2864 2865 group.s_addr = name[1]; 2866 if (!IN_MULTICAST(ntohl(group.s_addr))) { 2867 CTR2(KTR_IGMPV3, "%s: group %s is not multicast", 2868 __func__, inet_ntoa(group)); 2869 return (EINVAL); 2870 } 2871 2872 ifp = ifnet_byindex(ifindex); 2873 if (ifp == NULL) { 2874 CTR2(KTR_IGMPV3, "%s: no ifp for ifindex %u", 2875 __func__, ifindex); 2876 return (ENOENT); 2877 } 2878 2879 retval = sysctl_wire_old_buffer(req, 2880 sizeof(uint32_t) + (in_mcast_maxgrpsrc * sizeof(struct in_addr))); 2881 if (retval) 2882 return (retval); 2883 2884 IN_MULTI_LOCK(); 2885 2886 IF_ADDR_RLOCK(ifp); 2887 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 2888 if (ifma->ifma_addr->sa_family != AF_INET || 2889 ifma->ifma_protospec == NULL) 2890 continue; 2891 inm = (struct in_multi *)ifma->ifma_protospec; 2892 if (!in_hosteq(inm->inm_addr, group)) 2893 continue; 2894 fmode = inm->inm_st[1].iss_fmode; 2895 retval = SYSCTL_OUT(req, &fmode, sizeof(uint32_t)); 2896 if (retval != 0) 2897 break; 2898 RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) { 2899 #ifdef KTR 2900 struct in_addr ina; 2901 ina.s_addr = htonl(ims->ims_haddr); 2902 CTR2(KTR_IGMPV3, "%s: visit node %s", __func__, 2903 inet_ntoa(ina)); 2904 #endif 2905 /* 2906 * Only copy-out sources which are in-mode. 2907 */ 2908 if (fmode != ims_get_mode(inm, ims, 1)) { 2909 CTR1(KTR_IGMPV3, "%s: skip non-in-mode", 2910 __func__); 2911 continue; 2912 } 2913 src.s_addr = htonl(ims->ims_haddr); 2914 retval = SYSCTL_OUT(req, &src, sizeof(struct in_addr)); 2915 if (retval != 0) 2916 break; 2917 } 2918 } 2919 IF_ADDR_RUNLOCK(ifp); 2920 2921 IN_MULTI_UNLOCK(); 2922 2923 return (retval); 2924 } 2925 2926 #if defined(KTR) && (KTR_COMPILE & KTR_IGMPV3) 2927 2928 static const char *inm_modestrs[] = { "un", "in", "ex" }; 2929 2930 static const char * 2931 inm_mode_str(const int mode) 2932 { 2933 2934 if (mode >= MCAST_UNDEFINED && mode <= MCAST_EXCLUDE) 2935 return (inm_modestrs[mode]); 2936 return ("??"); 2937 } 2938 2939 static const char *inm_statestrs[] = { 2940 "not-member", 2941 "silent", 2942 "idle", 2943 "lazy", 2944 "sleeping", 2945 "awakening", 2946 "query-pending", 2947 "sg-query-pending", 2948 "leaving" 2949 }; 2950 2951 static const char * 2952 inm_state_str(const int state) 2953 { 2954 2955 if (state >= IGMP_NOT_MEMBER && state <= IGMP_LEAVING_MEMBER) 2956 return (inm_statestrs[state]); 2957 return ("??"); 2958 } 2959 2960 /* 2961 * Dump an in_multi structure to the console. 2962 */ 2963 void 2964 inm_print(const struct in_multi *inm) 2965 { 2966 int t; 2967 2968 if ((ktr_mask & KTR_IGMPV3) == 0) 2969 return; 2970 2971 printf("%s: --- begin inm %p ---\n", __func__, inm); 2972 printf("addr %s ifp %p(%s) ifma %p\n", 2973 inet_ntoa(inm->inm_addr), 2974 inm->inm_ifp, 2975 inm->inm_ifp->if_xname, 2976 inm->inm_ifma); 2977 printf("timer %u state %s refcount %u scq.len %u\n", 2978 inm->inm_timer, 2979 inm_state_str(inm->inm_state), 2980 inm->inm_refcount, 2981 inm->inm_scq.mq_len); 2982 printf("igi %p nsrc %lu sctimer %u scrv %u\n", 2983 inm->inm_igi, 2984 inm->inm_nsrc, 2985 inm->inm_sctimer, 2986 inm->inm_scrv); 2987 for (t = 0; t < 2; t++) { 2988 printf("t%d: fmode %s asm %u ex %u in %u rec %u\n", t, 2989 inm_mode_str(inm->inm_st[t].iss_fmode), 2990 inm->inm_st[t].iss_asm, 2991 inm->inm_st[t].iss_ex, 2992 inm->inm_st[t].iss_in, 2993 inm->inm_st[t].iss_rec); 2994 } 2995 printf("%s: --- end inm %p ---\n", __func__, inm); 2996 } 2997 2998 #else /* !KTR || !(KTR_COMPILE & KTR_IGMPV3) */ 2999 3000 void 3001 inm_print(const struct in_multi *inm) 3002 { 3003 3004 } 3005 3006 #endif /* KTR && (KTR_COMPILE & KTR_IGMPV3) */ 3007 3008 RB_GENERATE(ip_msource_tree, ip_msource, ims_link, ip_msource_cmp); 3009