1 /*- 2 * Copyright (c) 2007-2009 Bruce Simpson. 3 * Copyright (c) 2005 Robert N. M. Watson. 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote 15 * products derived from this software without specific prior written 16 * permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 */ 30 31 /* 32 * IPv4 multicast socket, group, and socket option processing module. 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/kernel.h> 41 #include <sys/malloc.h> 42 #include <sys/mbuf.h> 43 #include <sys/protosw.h> 44 #include <sys/socket.h> 45 #include <sys/socketvar.h> 46 #include <sys/protosw.h> 47 #include <sys/sysctl.h> 48 #include <sys/ktr.h> 49 #include <sys/taskqueue.h> 50 #include <sys/tree.h> 51 52 #include <net/if.h> 53 #include <net/if_var.h> 54 #include <net/if_dl.h> 55 #include <net/route.h> 56 #include <net/vnet.h> 57 58 #include <netinet/in.h> 59 #include <netinet/in_systm.h> 60 #include <netinet/in_pcb.h> 61 #include <netinet/in_var.h> 62 #include <netinet/ip_var.h> 63 #include <netinet/igmp_var.h> 64 65 #ifndef KTR_IGMPV3 66 #define KTR_IGMPV3 KTR_INET 67 #endif 68 69 #ifndef __SOCKUNION_DECLARED 70 union sockunion { 71 struct sockaddr_storage ss; 72 struct sockaddr sa; 73 struct sockaddr_dl sdl; 74 struct sockaddr_in sin; 75 }; 76 typedef union sockunion sockunion_t; 77 #define __SOCKUNION_DECLARED 78 #endif /* __SOCKUNION_DECLARED */ 79 80 static MALLOC_DEFINE(M_INMFILTER, "in_mfilter", 81 "IPv4 multicast PCB-layer source filter"); 82 static MALLOC_DEFINE(M_IPMADDR, "in_multi", "IPv4 multicast group"); 83 static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "IPv4 multicast options"); 84 static MALLOC_DEFINE(M_IPMSOURCE, "ip_msource", 85 "IPv4 multicast IGMP-layer source filter"); 86 87 /* 88 * Locking: 89 * - Lock order is: Giant, INP_WLOCK, IN_MULTI_LOCK, IGMP_LOCK, IF_ADDR_LOCK. 90 * - The IF_ADDR_LOCK is implicitly taken by inm_lookup() earlier, however 91 * it can be taken by code in net/if.c also. 92 * - ip_moptions and in_mfilter are covered by the INP_WLOCK. 93 * 94 * struct in_multi is covered by IN_MULTI_LOCK. There isn't strictly 95 * any need for in_multi itself to be virtualized -- it is bound to an ifp 96 * anyway no matter what happens. 97 */ 98 struct mtx in_multi_mtx; 99 MTX_SYSINIT(in_multi_mtx, &in_multi_mtx, "in_multi_mtx", MTX_DEF); 100 101 /* 102 * Functions with non-static linkage defined in this file should be 103 * declared in in_var.h: 104 * imo_multi_filter() 105 * in_addmulti() 106 * in_delmulti() 107 * in_joingroup() 108 * in_joingroup_locked() 109 * in_leavegroup() 110 * in_leavegroup_locked() 111 * and ip_var.h: 112 * inp_freemoptions() 113 * inp_getmoptions() 114 * inp_setmoptions() 115 * 116 * XXX: Both carp and pf need to use the legacy (*,G) KPIs in_addmulti() 117 * and in_delmulti(). 118 */ 119 static void imf_commit(struct in_mfilter *); 120 static int imf_get_source(struct in_mfilter *imf, 121 const struct sockaddr_in *psin, 122 struct in_msource **); 123 static struct in_msource * 124 imf_graft(struct in_mfilter *, const uint8_t, 125 const struct sockaddr_in *); 126 static void imf_leave(struct in_mfilter *); 127 static int imf_prune(struct in_mfilter *, const struct sockaddr_in *); 128 static void imf_purge(struct in_mfilter *); 129 static void imf_rollback(struct in_mfilter *); 130 static void imf_reap(struct in_mfilter *); 131 static int imo_grow(struct ip_moptions *); 132 static size_t imo_match_group(const struct ip_moptions *, 133 const struct ifnet *, const struct sockaddr *); 134 static struct in_msource * 135 imo_match_source(const struct ip_moptions *, const size_t, 136 const struct sockaddr *); 137 static void ims_merge(struct ip_msource *ims, 138 const struct in_msource *lims, const int rollback); 139 static int in_getmulti(struct ifnet *, const struct in_addr *, 140 struct in_multi **); 141 static int inm_get_source(struct in_multi *inm, const in_addr_t haddr, 142 const int noalloc, struct ip_msource **pims); 143 static int inm_is_ifp_detached(const struct in_multi *); 144 static int inm_merge(struct in_multi *, /*const*/ struct in_mfilter *); 145 static void inm_purge(struct in_multi *); 146 static void inm_reap(struct in_multi *); 147 static struct ip_moptions * 148 inp_findmoptions(struct inpcb *); 149 static void inp_freemoptions_internal(struct ip_moptions *); 150 static void inp_gcmoptions(void *, int); 151 static int inp_get_source_filters(struct inpcb *, struct sockopt *); 152 static int inp_join_group(struct inpcb *, struct sockopt *); 153 static int inp_leave_group(struct inpcb *, struct sockopt *); 154 static struct ifnet * 155 inp_lookup_mcast_ifp(const struct inpcb *, 156 const struct sockaddr_in *, const struct in_addr); 157 static int inp_block_unblock_source(struct inpcb *, struct sockopt *); 158 static int inp_set_multicast_if(struct inpcb *, struct sockopt *); 159 static int inp_set_source_filters(struct inpcb *, struct sockopt *); 160 static int sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS); 161 162 static SYSCTL_NODE(_net_inet_ip, OID_AUTO, mcast, CTLFLAG_RW, 0, 163 "IPv4 multicast"); 164 165 static u_long in_mcast_maxgrpsrc = IP_MAX_GROUP_SRC_FILTER; 166 SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxgrpsrc, 167 CTLFLAG_RW | CTLFLAG_TUN, &in_mcast_maxgrpsrc, 0, 168 "Max source filters per group"); 169 TUNABLE_ULONG("net.inet.ip.mcast.maxgrpsrc", &in_mcast_maxgrpsrc); 170 171 static u_long in_mcast_maxsocksrc = IP_MAX_SOCK_SRC_FILTER; 172 SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxsocksrc, 173 CTLFLAG_RW | CTLFLAG_TUN, &in_mcast_maxsocksrc, 0, 174 "Max source filters per socket"); 175 TUNABLE_ULONG("net.inet.ip.mcast.maxsocksrc", &in_mcast_maxsocksrc); 176 177 int in_mcast_loop = IP_DEFAULT_MULTICAST_LOOP; 178 SYSCTL_INT(_net_inet_ip_mcast, OID_AUTO, loop, CTLFLAG_RW | CTLFLAG_TUN, 179 &in_mcast_loop, 0, "Loopback multicast datagrams by default"); 180 TUNABLE_INT("net.inet.ip.mcast.loop", &in_mcast_loop); 181 182 static SYSCTL_NODE(_net_inet_ip_mcast, OID_AUTO, filters, 183 CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_ip_mcast_filters, 184 "Per-interface stack-wide source filters"); 185 186 static STAILQ_HEAD(, ip_moptions) imo_gc_list = 187 STAILQ_HEAD_INITIALIZER(imo_gc_list); 188 static struct task imo_gc_task = TASK_INITIALIZER(0, inp_gcmoptions, NULL); 189 190 /* 191 * Inline function which wraps assertions for a valid ifp. 192 * The ifnet layer will set the ifma's ifp pointer to NULL if the ifp 193 * is detached. 194 */ 195 static int __inline 196 inm_is_ifp_detached(const struct in_multi *inm) 197 { 198 struct ifnet *ifp; 199 200 KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__)); 201 ifp = inm->inm_ifma->ifma_ifp; 202 if (ifp != NULL) { 203 /* 204 * Sanity check that netinet's notion of ifp is the 205 * same as net's. 206 */ 207 KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__)); 208 } 209 210 return (ifp == NULL); 211 } 212 213 /* 214 * Initialize an in_mfilter structure to a known state at t0, t1 215 * with an empty source filter list. 216 */ 217 static __inline void 218 imf_init(struct in_mfilter *imf, const int st0, const int st1) 219 { 220 memset(imf, 0, sizeof(struct in_mfilter)); 221 RB_INIT(&imf->imf_sources); 222 imf->imf_st[0] = st0; 223 imf->imf_st[1] = st1; 224 } 225 226 /* 227 * Function for looking up an in_multi record for an IPv4 multicast address 228 * on a given interface. ifp must be valid. If no record found, return NULL. 229 * The IN_MULTI_LOCK and IF_ADDR_LOCK on ifp must be held. 230 */ 231 struct in_multi * 232 inm_lookup_locked(struct ifnet *ifp, const struct in_addr ina) 233 { 234 struct ifmultiaddr *ifma; 235 struct in_multi *inm; 236 237 IN_MULTI_LOCK_ASSERT(); 238 IF_ADDR_LOCK_ASSERT(ifp); 239 240 inm = NULL; 241 TAILQ_FOREACH(ifma, &((ifp)->if_multiaddrs), ifma_link) { 242 if (ifma->ifma_addr->sa_family == AF_INET) { 243 inm = (struct in_multi *)ifma->ifma_protospec; 244 if (inm->inm_addr.s_addr == ina.s_addr) 245 break; 246 inm = NULL; 247 } 248 } 249 return (inm); 250 } 251 252 /* 253 * Wrapper for inm_lookup_locked(). 254 * The IF_ADDR_LOCK will be taken on ifp and released on return. 255 */ 256 struct in_multi * 257 inm_lookup(struct ifnet *ifp, const struct in_addr ina) 258 { 259 struct in_multi *inm; 260 261 IN_MULTI_LOCK_ASSERT(); 262 IF_ADDR_RLOCK(ifp); 263 inm = inm_lookup_locked(ifp, ina); 264 IF_ADDR_RUNLOCK(ifp); 265 266 return (inm); 267 } 268 269 /* 270 * Resize the ip_moptions vector to the next power-of-two minus 1. 271 * May be called with locks held; do not sleep. 272 */ 273 static int 274 imo_grow(struct ip_moptions *imo) 275 { 276 struct in_multi **nmships; 277 struct in_multi **omships; 278 struct in_mfilter *nmfilters; 279 struct in_mfilter *omfilters; 280 size_t idx; 281 size_t newmax; 282 size_t oldmax; 283 284 nmships = NULL; 285 nmfilters = NULL; 286 omships = imo->imo_membership; 287 omfilters = imo->imo_mfilters; 288 oldmax = imo->imo_max_memberships; 289 newmax = ((oldmax + 1) * 2) - 1; 290 291 if (newmax <= IP_MAX_MEMBERSHIPS) { 292 nmships = (struct in_multi **)realloc(omships, 293 sizeof(struct in_multi *) * newmax, M_IPMOPTS, M_NOWAIT); 294 nmfilters = (struct in_mfilter *)realloc(omfilters, 295 sizeof(struct in_mfilter) * newmax, M_INMFILTER, M_NOWAIT); 296 if (nmships != NULL && nmfilters != NULL) { 297 /* Initialize newly allocated source filter heads. */ 298 for (idx = oldmax; idx < newmax; idx++) { 299 imf_init(&nmfilters[idx], MCAST_UNDEFINED, 300 MCAST_EXCLUDE); 301 } 302 imo->imo_max_memberships = newmax; 303 imo->imo_membership = nmships; 304 imo->imo_mfilters = nmfilters; 305 } 306 } 307 308 if (nmships == NULL || nmfilters == NULL) { 309 if (nmships != NULL) 310 free(nmships, M_IPMOPTS); 311 if (nmfilters != NULL) 312 free(nmfilters, M_INMFILTER); 313 return (ETOOMANYREFS); 314 } 315 316 return (0); 317 } 318 319 /* 320 * Find an IPv4 multicast group entry for this ip_moptions instance 321 * which matches the specified group, and optionally an interface. 322 * Return its index into the array, or -1 if not found. 323 */ 324 static size_t 325 imo_match_group(const struct ip_moptions *imo, const struct ifnet *ifp, 326 const struct sockaddr *group) 327 { 328 const struct sockaddr_in *gsin; 329 struct in_multi **pinm; 330 int idx; 331 int nmships; 332 333 gsin = (const struct sockaddr_in *)group; 334 335 /* The imo_membership array may be lazy allocated. */ 336 if (imo->imo_membership == NULL || imo->imo_num_memberships == 0) 337 return (-1); 338 339 nmships = imo->imo_num_memberships; 340 pinm = &imo->imo_membership[0]; 341 for (idx = 0; idx < nmships; idx++, pinm++) { 342 if (*pinm == NULL) 343 continue; 344 if ((ifp == NULL || ((*pinm)->inm_ifp == ifp)) && 345 in_hosteq((*pinm)->inm_addr, gsin->sin_addr)) { 346 break; 347 } 348 } 349 if (idx >= nmships) 350 idx = -1; 351 352 return (idx); 353 } 354 355 /* 356 * Find an IPv4 multicast source entry for this imo which matches 357 * the given group index for this socket, and source address. 358 * 359 * NOTE: This does not check if the entry is in-mode, merely if 360 * it exists, which may not be the desired behaviour. 361 */ 362 static struct in_msource * 363 imo_match_source(const struct ip_moptions *imo, const size_t gidx, 364 const struct sockaddr *src) 365 { 366 struct ip_msource find; 367 struct in_mfilter *imf; 368 struct ip_msource *ims; 369 const sockunion_t *psa; 370 371 KASSERT(src->sa_family == AF_INET, ("%s: !AF_INET", __func__)); 372 KASSERT(gidx != -1 && gidx < imo->imo_num_memberships, 373 ("%s: invalid index %d\n", __func__, (int)gidx)); 374 375 /* The imo_mfilters array may be lazy allocated. */ 376 if (imo->imo_mfilters == NULL) 377 return (NULL); 378 imf = &imo->imo_mfilters[gidx]; 379 380 /* Source trees are keyed in host byte order. */ 381 psa = (const sockunion_t *)src; 382 find.ims_haddr = ntohl(psa->sin.sin_addr.s_addr); 383 ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find); 384 385 return ((struct in_msource *)ims); 386 } 387 388 /* 389 * Perform filtering for multicast datagrams on a socket by group and source. 390 * 391 * Returns 0 if a datagram should be allowed through, or various error codes 392 * if the socket was not a member of the group, or the source was muted, etc. 393 */ 394 int 395 imo_multi_filter(const struct ip_moptions *imo, const struct ifnet *ifp, 396 const struct sockaddr *group, const struct sockaddr *src) 397 { 398 size_t gidx; 399 struct in_msource *ims; 400 int mode; 401 402 KASSERT(ifp != NULL, ("%s: null ifp", __func__)); 403 404 gidx = imo_match_group(imo, ifp, group); 405 if (gidx == -1) 406 return (MCAST_NOTGMEMBER); 407 408 /* 409 * Check if the source was included in an (S,G) join. 410 * Allow reception on exclusive memberships by default, 411 * reject reception on inclusive memberships by default. 412 * Exclude source only if an in-mode exclude filter exists. 413 * Include source only if an in-mode include filter exists. 414 * NOTE: We are comparing group state here at IGMP t1 (now) 415 * with socket-layer t0 (since last downcall). 416 */ 417 mode = imo->imo_mfilters[gidx].imf_st[1]; 418 ims = imo_match_source(imo, gidx, src); 419 420 if ((ims == NULL && mode == MCAST_INCLUDE) || 421 (ims != NULL && ims->imsl_st[0] != mode)) 422 return (MCAST_NOTSMEMBER); 423 424 return (MCAST_PASS); 425 } 426 427 /* 428 * Find and return a reference to an in_multi record for (ifp, group), 429 * and bump its reference count. 430 * If one does not exist, try to allocate it, and update link-layer multicast 431 * filters on ifp to listen for group. 432 * Assumes the IN_MULTI lock is held across the call. 433 * Return 0 if successful, otherwise return an appropriate error code. 434 */ 435 static int 436 in_getmulti(struct ifnet *ifp, const struct in_addr *group, 437 struct in_multi **pinm) 438 { 439 struct sockaddr_in gsin; 440 struct ifmultiaddr *ifma; 441 struct in_ifinfo *ii; 442 struct in_multi *inm; 443 int error; 444 445 IN_MULTI_LOCK_ASSERT(); 446 447 ii = (struct in_ifinfo *)ifp->if_afdata[AF_INET]; 448 449 inm = inm_lookup(ifp, *group); 450 if (inm != NULL) { 451 /* 452 * If we already joined this group, just bump the 453 * refcount and return it. 454 */ 455 KASSERT(inm->inm_refcount >= 1, 456 ("%s: bad refcount %d", __func__, inm->inm_refcount)); 457 ++inm->inm_refcount; 458 *pinm = inm; 459 return (0); 460 } 461 462 memset(&gsin, 0, sizeof(gsin)); 463 gsin.sin_family = AF_INET; 464 gsin.sin_len = sizeof(struct sockaddr_in); 465 gsin.sin_addr = *group; 466 467 /* 468 * Check if a link-layer group is already associated 469 * with this network-layer group on the given ifnet. 470 */ 471 error = if_addmulti(ifp, (struct sockaddr *)&gsin, &ifma); 472 if (error != 0) 473 return (error); 474 475 /* XXX ifma_protospec must be covered by IF_ADDR_LOCK */ 476 IF_ADDR_WLOCK(ifp); 477 478 /* 479 * If something other than netinet is occupying the link-layer 480 * group, print a meaningful error message and back out of 481 * the allocation. 482 * Otherwise, bump the refcount on the existing network-layer 483 * group association and return it. 484 */ 485 if (ifma->ifma_protospec != NULL) { 486 inm = (struct in_multi *)ifma->ifma_protospec; 487 #ifdef INVARIANTS 488 KASSERT(ifma->ifma_addr != NULL, ("%s: no ifma_addr", 489 __func__)); 490 KASSERT(ifma->ifma_addr->sa_family == AF_INET, 491 ("%s: ifma not AF_INET", __func__)); 492 KASSERT(inm != NULL, ("%s: no ifma_protospec", __func__)); 493 if (inm->inm_ifma != ifma || inm->inm_ifp != ifp || 494 !in_hosteq(inm->inm_addr, *group)) 495 panic("%s: ifma %p is inconsistent with %p (%s)", 496 __func__, ifma, inm, inet_ntoa(*group)); 497 #endif 498 ++inm->inm_refcount; 499 *pinm = inm; 500 IF_ADDR_WUNLOCK(ifp); 501 return (0); 502 } 503 504 IF_ADDR_WLOCK_ASSERT(ifp); 505 506 /* 507 * A new in_multi record is needed; allocate and initialize it. 508 * We DO NOT perform an IGMP join as the in_ layer may need to 509 * push an initial source list down to IGMP to support SSM. 510 * 511 * The initial source filter state is INCLUDE, {} as per the RFC. 512 */ 513 inm = malloc(sizeof(*inm), M_IPMADDR, M_NOWAIT | M_ZERO); 514 if (inm == NULL) { 515 if_delmulti_ifma(ifma); 516 IF_ADDR_WUNLOCK(ifp); 517 return (ENOMEM); 518 } 519 inm->inm_addr = *group; 520 inm->inm_ifp = ifp; 521 inm->inm_igi = ii->ii_igmp; 522 inm->inm_ifma = ifma; 523 inm->inm_refcount = 1; 524 inm->inm_state = IGMP_NOT_MEMBER; 525 526 /* 527 * Pending state-changes per group are subject to a bounds check. 528 */ 529 IFQ_SET_MAXLEN(&inm->inm_scq, IGMP_MAX_STATE_CHANGES); 530 531 inm->inm_st[0].iss_fmode = MCAST_UNDEFINED; 532 inm->inm_st[1].iss_fmode = MCAST_UNDEFINED; 533 RB_INIT(&inm->inm_srcs); 534 535 ifma->ifma_protospec = inm; 536 537 *pinm = inm; 538 539 IF_ADDR_WUNLOCK(ifp); 540 return (0); 541 } 542 543 /* 544 * Drop a reference to an in_multi record. 545 * 546 * If the refcount drops to 0, free the in_multi record and 547 * delete the underlying link-layer membership. 548 */ 549 void 550 inm_release_locked(struct in_multi *inm) 551 { 552 struct ifmultiaddr *ifma; 553 554 IN_MULTI_LOCK_ASSERT(); 555 556 CTR2(KTR_IGMPV3, "%s: refcount is %d", __func__, inm->inm_refcount); 557 558 if (--inm->inm_refcount > 0) { 559 CTR2(KTR_IGMPV3, "%s: refcount is now %d", __func__, 560 inm->inm_refcount); 561 return; 562 } 563 564 CTR2(KTR_IGMPV3, "%s: freeing inm %p", __func__, inm); 565 566 ifma = inm->inm_ifma; 567 568 /* XXX this access is not covered by IF_ADDR_LOCK */ 569 CTR2(KTR_IGMPV3, "%s: purging ifma %p", __func__, ifma); 570 KASSERT(ifma->ifma_protospec == inm, 571 ("%s: ifma_protospec != inm", __func__)); 572 ifma->ifma_protospec = NULL; 573 574 inm_purge(inm); 575 576 free(inm, M_IPMADDR); 577 578 if_delmulti_ifma(ifma); 579 } 580 581 /* 582 * Clear recorded source entries for a group. 583 * Used by the IGMP code. Caller must hold the IN_MULTI lock. 584 * FIXME: Should reap. 585 */ 586 void 587 inm_clear_recorded(struct in_multi *inm) 588 { 589 struct ip_msource *ims; 590 591 IN_MULTI_LOCK_ASSERT(); 592 593 RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) { 594 if (ims->ims_stp) { 595 ims->ims_stp = 0; 596 --inm->inm_st[1].iss_rec; 597 } 598 } 599 KASSERT(inm->inm_st[1].iss_rec == 0, 600 ("%s: iss_rec %d not 0", __func__, inm->inm_st[1].iss_rec)); 601 } 602 603 /* 604 * Record a source as pending for a Source-Group IGMPv3 query. 605 * This lives here as it modifies the shared tree. 606 * 607 * inm is the group descriptor. 608 * naddr is the address of the source to record in network-byte order. 609 * 610 * If the net.inet.igmp.sgalloc sysctl is non-zero, we will 611 * lazy-allocate a source node in response to an SG query. 612 * Otherwise, no allocation is performed. This saves some memory 613 * with the trade-off that the source will not be reported to the 614 * router if joined in the window between the query response and 615 * the group actually being joined on the local host. 616 * 617 * VIMAGE: XXX: Currently the igmp_sgalloc feature has been removed. 618 * This turns off the allocation of a recorded source entry if 619 * the group has not been joined. 620 * 621 * Return 0 if the source didn't exist or was already marked as recorded. 622 * Return 1 if the source was marked as recorded by this function. 623 * Return <0 if any error occured (negated errno code). 624 */ 625 int 626 inm_record_source(struct in_multi *inm, const in_addr_t naddr) 627 { 628 struct ip_msource find; 629 struct ip_msource *ims, *nims; 630 631 IN_MULTI_LOCK_ASSERT(); 632 633 find.ims_haddr = ntohl(naddr); 634 ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find); 635 if (ims && ims->ims_stp) 636 return (0); 637 if (ims == NULL) { 638 if (inm->inm_nsrc == in_mcast_maxgrpsrc) 639 return (-ENOSPC); 640 nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE, 641 M_NOWAIT | M_ZERO); 642 if (nims == NULL) 643 return (-ENOMEM); 644 nims->ims_haddr = find.ims_haddr; 645 RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims); 646 ++inm->inm_nsrc; 647 ims = nims; 648 } 649 650 /* 651 * Mark the source as recorded and update the recorded 652 * source count. 653 */ 654 ++ims->ims_stp; 655 ++inm->inm_st[1].iss_rec; 656 657 return (1); 658 } 659 660 /* 661 * Return a pointer to an in_msource owned by an in_mfilter, 662 * given its source address. 663 * Lazy-allocate if needed. If this is a new entry its filter state is 664 * undefined at t0. 665 * 666 * imf is the filter set being modified. 667 * haddr is the source address in *host* byte-order. 668 * 669 * SMPng: May be called with locks held; malloc must not block. 670 */ 671 static int 672 imf_get_source(struct in_mfilter *imf, const struct sockaddr_in *psin, 673 struct in_msource **plims) 674 { 675 struct ip_msource find; 676 struct ip_msource *ims, *nims; 677 struct in_msource *lims; 678 int error; 679 680 error = 0; 681 ims = NULL; 682 lims = NULL; 683 684 /* key is host byte order */ 685 find.ims_haddr = ntohl(psin->sin_addr.s_addr); 686 ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find); 687 lims = (struct in_msource *)ims; 688 if (lims == NULL) { 689 if (imf->imf_nsrc == in_mcast_maxsocksrc) 690 return (ENOSPC); 691 nims = malloc(sizeof(struct in_msource), M_INMFILTER, 692 M_NOWAIT | M_ZERO); 693 if (nims == NULL) 694 return (ENOMEM); 695 lims = (struct in_msource *)nims; 696 lims->ims_haddr = find.ims_haddr; 697 lims->imsl_st[0] = MCAST_UNDEFINED; 698 RB_INSERT(ip_msource_tree, &imf->imf_sources, nims); 699 ++imf->imf_nsrc; 700 } 701 702 *plims = lims; 703 704 return (error); 705 } 706 707 /* 708 * Graft a source entry into an existing socket-layer filter set, 709 * maintaining any required invariants and checking allocations. 710 * 711 * The source is marked as being in the new filter mode at t1. 712 * 713 * Return the pointer to the new node, otherwise return NULL. 714 */ 715 static struct in_msource * 716 imf_graft(struct in_mfilter *imf, const uint8_t st1, 717 const struct sockaddr_in *psin) 718 { 719 struct ip_msource *nims; 720 struct in_msource *lims; 721 722 nims = malloc(sizeof(struct in_msource), M_INMFILTER, 723 M_NOWAIT | M_ZERO); 724 if (nims == NULL) 725 return (NULL); 726 lims = (struct in_msource *)nims; 727 lims->ims_haddr = ntohl(psin->sin_addr.s_addr); 728 lims->imsl_st[0] = MCAST_UNDEFINED; 729 lims->imsl_st[1] = st1; 730 RB_INSERT(ip_msource_tree, &imf->imf_sources, nims); 731 ++imf->imf_nsrc; 732 733 return (lims); 734 } 735 736 /* 737 * Prune a source entry from an existing socket-layer filter set, 738 * maintaining any required invariants and checking allocations. 739 * 740 * The source is marked as being left at t1, it is not freed. 741 * 742 * Return 0 if no error occurred, otherwise return an errno value. 743 */ 744 static int 745 imf_prune(struct in_mfilter *imf, const struct sockaddr_in *psin) 746 { 747 struct ip_msource find; 748 struct ip_msource *ims; 749 struct in_msource *lims; 750 751 /* key is host byte order */ 752 find.ims_haddr = ntohl(psin->sin_addr.s_addr); 753 ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find); 754 if (ims == NULL) 755 return (ENOENT); 756 lims = (struct in_msource *)ims; 757 lims->imsl_st[1] = MCAST_UNDEFINED; 758 return (0); 759 } 760 761 /* 762 * Revert socket-layer filter set deltas at t1 to t0 state. 763 */ 764 static void 765 imf_rollback(struct in_mfilter *imf) 766 { 767 struct ip_msource *ims, *tims; 768 struct in_msource *lims; 769 770 RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) { 771 lims = (struct in_msource *)ims; 772 if (lims->imsl_st[0] == lims->imsl_st[1]) { 773 /* no change at t1 */ 774 continue; 775 } else if (lims->imsl_st[0] != MCAST_UNDEFINED) { 776 /* revert change to existing source at t1 */ 777 lims->imsl_st[1] = lims->imsl_st[0]; 778 } else { 779 /* revert source added t1 */ 780 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 781 RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims); 782 free(ims, M_INMFILTER); 783 imf->imf_nsrc--; 784 } 785 } 786 imf->imf_st[1] = imf->imf_st[0]; 787 } 788 789 /* 790 * Mark socket-layer filter set as INCLUDE {} at t1. 791 */ 792 static void 793 imf_leave(struct in_mfilter *imf) 794 { 795 struct ip_msource *ims; 796 struct in_msource *lims; 797 798 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 799 lims = (struct in_msource *)ims; 800 lims->imsl_st[1] = MCAST_UNDEFINED; 801 } 802 imf->imf_st[1] = MCAST_INCLUDE; 803 } 804 805 /* 806 * Mark socket-layer filter set deltas as committed. 807 */ 808 static void 809 imf_commit(struct in_mfilter *imf) 810 { 811 struct ip_msource *ims; 812 struct in_msource *lims; 813 814 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 815 lims = (struct in_msource *)ims; 816 lims->imsl_st[0] = lims->imsl_st[1]; 817 } 818 imf->imf_st[0] = imf->imf_st[1]; 819 } 820 821 /* 822 * Reap unreferenced sources from socket-layer filter set. 823 */ 824 static void 825 imf_reap(struct in_mfilter *imf) 826 { 827 struct ip_msource *ims, *tims; 828 struct in_msource *lims; 829 830 RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) { 831 lims = (struct in_msource *)ims; 832 if ((lims->imsl_st[0] == MCAST_UNDEFINED) && 833 (lims->imsl_st[1] == MCAST_UNDEFINED)) { 834 CTR2(KTR_IGMPV3, "%s: free lims %p", __func__, ims); 835 RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims); 836 free(ims, M_INMFILTER); 837 imf->imf_nsrc--; 838 } 839 } 840 } 841 842 /* 843 * Purge socket-layer filter set. 844 */ 845 static void 846 imf_purge(struct in_mfilter *imf) 847 { 848 struct ip_msource *ims, *tims; 849 850 RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) { 851 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 852 RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims); 853 free(ims, M_INMFILTER); 854 imf->imf_nsrc--; 855 } 856 imf->imf_st[0] = imf->imf_st[1] = MCAST_UNDEFINED; 857 KASSERT(RB_EMPTY(&imf->imf_sources), 858 ("%s: imf_sources not empty", __func__)); 859 } 860 861 /* 862 * Look up a source filter entry for a multicast group. 863 * 864 * inm is the group descriptor to work with. 865 * haddr is the host-byte-order IPv4 address to look up. 866 * noalloc may be non-zero to suppress allocation of sources. 867 * *pims will be set to the address of the retrieved or allocated source. 868 * 869 * SMPng: NOTE: may be called with locks held. 870 * Return 0 if successful, otherwise return a non-zero error code. 871 */ 872 static int 873 inm_get_source(struct in_multi *inm, const in_addr_t haddr, 874 const int noalloc, struct ip_msource **pims) 875 { 876 struct ip_msource find; 877 struct ip_msource *ims, *nims; 878 #ifdef KTR 879 struct in_addr ia; 880 #endif 881 882 find.ims_haddr = haddr; 883 ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find); 884 if (ims == NULL && !noalloc) { 885 if (inm->inm_nsrc == in_mcast_maxgrpsrc) 886 return (ENOSPC); 887 nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE, 888 M_NOWAIT | M_ZERO); 889 if (nims == NULL) 890 return (ENOMEM); 891 nims->ims_haddr = haddr; 892 RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims); 893 ++inm->inm_nsrc; 894 ims = nims; 895 #ifdef KTR 896 ia.s_addr = htonl(haddr); 897 CTR3(KTR_IGMPV3, "%s: allocated %s as %p", __func__, 898 inet_ntoa(ia), ims); 899 #endif 900 } 901 902 *pims = ims; 903 return (0); 904 } 905 906 /* 907 * Merge socket-layer source into IGMP-layer source. 908 * If rollback is non-zero, perform the inverse of the merge. 909 */ 910 static void 911 ims_merge(struct ip_msource *ims, const struct in_msource *lims, 912 const int rollback) 913 { 914 int n = rollback ? -1 : 1; 915 #ifdef KTR 916 struct in_addr ia; 917 918 ia.s_addr = htonl(ims->ims_haddr); 919 #endif 920 921 if (lims->imsl_st[0] == MCAST_EXCLUDE) { 922 CTR3(KTR_IGMPV3, "%s: t1 ex -= %d on %s", 923 __func__, n, inet_ntoa(ia)); 924 ims->ims_st[1].ex -= n; 925 } else if (lims->imsl_st[0] == MCAST_INCLUDE) { 926 CTR3(KTR_IGMPV3, "%s: t1 in -= %d on %s", 927 __func__, n, inet_ntoa(ia)); 928 ims->ims_st[1].in -= n; 929 } 930 931 if (lims->imsl_st[1] == MCAST_EXCLUDE) { 932 CTR3(KTR_IGMPV3, "%s: t1 ex += %d on %s", 933 __func__, n, inet_ntoa(ia)); 934 ims->ims_st[1].ex += n; 935 } else if (lims->imsl_st[1] == MCAST_INCLUDE) { 936 CTR3(KTR_IGMPV3, "%s: t1 in += %d on %s", 937 __func__, n, inet_ntoa(ia)); 938 ims->ims_st[1].in += n; 939 } 940 } 941 942 /* 943 * Atomically update the global in_multi state, when a membership's 944 * filter list is being updated in any way. 945 * 946 * imf is the per-inpcb-membership group filter pointer. 947 * A fake imf may be passed for in-kernel consumers. 948 * 949 * XXX This is a candidate for a set-symmetric-difference style loop 950 * which would eliminate the repeated lookup from root of ims nodes, 951 * as they share the same key space. 952 * 953 * If any error occurred this function will back out of refcounts 954 * and return a non-zero value. 955 */ 956 static int 957 inm_merge(struct in_multi *inm, /*const*/ struct in_mfilter *imf) 958 { 959 struct ip_msource *ims, *nims; 960 struct in_msource *lims; 961 int schanged, error; 962 int nsrc0, nsrc1; 963 964 schanged = 0; 965 error = 0; 966 nsrc1 = nsrc0 = 0; 967 968 /* 969 * Update the source filters first, as this may fail. 970 * Maintain count of in-mode filters at t0, t1. These are 971 * used to work out if we transition into ASM mode or not. 972 * Maintain a count of source filters whose state was 973 * actually modified by this operation. 974 */ 975 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 976 lims = (struct in_msource *)ims; 977 if (lims->imsl_st[0] == imf->imf_st[0]) nsrc0++; 978 if (lims->imsl_st[1] == imf->imf_st[1]) nsrc1++; 979 if (lims->imsl_st[0] == lims->imsl_st[1]) continue; 980 error = inm_get_source(inm, lims->ims_haddr, 0, &nims); 981 ++schanged; 982 if (error) 983 break; 984 ims_merge(nims, lims, 0); 985 } 986 if (error) { 987 struct ip_msource *bims; 988 989 RB_FOREACH_REVERSE_FROM(ims, ip_msource_tree, nims) { 990 lims = (struct in_msource *)ims; 991 if (lims->imsl_st[0] == lims->imsl_st[1]) 992 continue; 993 (void)inm_get_source(inm, lims->ims_haddr, 1, &bims); 994 if (bims == NULL) 995 continue; 996 ims_merge(bims, lims, 1); 997 } 998 goto out_reap; 999 } 1000 1001 CTR3(KTR_IGMPV3, "%s: imf filters in-mode: %d at t0, %d at t1", 1002 __func__, nsrc0, nsrc1); 1003 1004 /* Handle transition between INCLUDE {n} and INCLUDE {} on socket. */ 1005 if (imf->imf_st[0] == imf->imf_st[1] && 1006 imf->imf_st[1] == MCAST_INCLUDE) { 1007 if (nsrc1 == 0) { 1008 CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__); 1009 --inm->inm_st[1].iss_in; 1010 } 1011 } 1012 1013 /* Handle filter mode transition on socket. */ 1014 if (imf->imf_st[0] != imf->imf_st[1]) { 1015 CTR3(KTR_IGMPV3, "%s: imf transition %d to %d", 1016 __func__, imf->imf_st[0], imf->imf_st[1]); 1017 1018 if (imf->imf_st[0] == MCAST_EXCLUDE) { 1019 CTR1(KTR_IGMPV3, "%s: --ex on inm at t1", __func__); 1020 --inm->inm_st[1].iss_ex; 1021 } else if (imf->imf_st[0] == MCAST_INCLUDE) { 1022 CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__); 1023 --inm->inm_st[1].iss_in; 1024 } 1025 1026 if (imf->imf_st[1] == MCAST_EXCLUDE) { 1027 CTR1(KTR_IGMPV3, "%s: ex++ on inm at t1", __func__); 1028 inm->inm_st[1].iss_ex++; 1029 } else if (imf->imf_st[1] == MCAST_INCLUDE && nsrc1 > 0) { 1030 CTR1(KTR_IGMPV3, "%s: in++ on inm at t1", __func__); 1031 inm->inm_st[1].iss_in++; 1032 } 1033 } 1034 1035 /* 1036 * Track inm filter state in terms of listener counts. 1037 * If there are any exclusive listeners, stack-wide 1038 * membership is exclusive. 1039 * Otherwise, if only inclusive listeners, stack-wide is inclusive. 1040 * If no listeners remain, state is undefined at t1, 1041 * and the IGMP lifecycle for this group should finish. 1042 */ 1043 if (inm->inm_st[1].iss_ex > 0) { 1044 CTR1(KTR_IGMPV3, "%s: transition to EX", __func__); 1045 inm->inm_st[1].iss_fmode = MCAST_EXCLUDE; 1046 } else if (inm->inm_st[1].iss_in > 0) { 1047 CTR1(KTR_IGMPV3, "%s: transition to IN", __func__); 1048 inm->inm_st[1].iss_fmode = MCAST_INCLUDE; 1049 } else { 1050 CTR1(KTR_IGMPV3, "%s: transition to UNDEF", __func__); 1051 inm->inm_st[1].iss_fmode = MCAST_UNDEFINED; 1052 } 1053 1054 /* Decrement ASM listener count on transition out of ASM mode. */ 1055 if (imf->imf_st[0] == MCAST_EXCLUDE && nsrc0 == 0) { 1056 if ((imf->imf_st[1] != MCAST_EXCLUDE) || 1057 (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 > 0)) 1058 CTR1(KTR_IGMPV3, "%s: --asm on inm at t1", __func__); 1059 --inm->inm_st[1].iss_asm; 1060 } 1061 1062 /* Increment ASM listener count on transition to ASM mode. */ 1063 if (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 == 0) { 1064 CTR1(KTR_IGMPV3, "%s: asm++ on inm at t1", __func__); 1065 inm->inm_st[1].iss_asm++; 1066 } 1067 1068 CTR3(KTR_IGMPV3, "%s: merged imf %p to inm %p", __func__, imf, inm); 1069 inm_print(inm); 1070 1071 out_reap: 1072 if (schanged > 0) { 1073 CTR1(KTR_IGMPV3, "%s: sources changed; reaping", __func__); 1074 inm_reap(inm); 1075 } 1076 return (error); 1077 } 1078 1079 /* 1080 * Mark an in_multi's filter set deltas as committed. 1081 * Called by IGMP after a state change has been enqueued. 1082 */ 1083 void 1084 inm_commit(struct in_multi *inm) 1085 { 1086 struct ip_msource *ims; 1087 1088 CTR2(KTR_IGMPV3, "%s: commit inm %p", __func__, inm); 1089 CTR1(KTR_IGMPV3, "%s: pre commit:", __func__); 1090 inm_print(inm); 1091 1092 RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) { 1093 ims->ims_st[0] = ims->ims_st[1]; 1094 } 1095 inm->inm_st[0] = inm->inm_st[1]; 1096 } 1097 1098 /* 1099 * Reap unreferenced nodes from an in_multi's filter set. 1100 */ 1101 static void 1102 inm_reap(struct in_multi *inm) 1103 { 1104 struct ip_msource *ims, *tims; 1105 1106 RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) { 1107 if (ims->ims_st[0].ex > 0 || ims->ims_st[0].in > 0 || 1108 ims->ims_st[1].ex > 0 || ims->ims_st[1].in > 0 || 1109 ims->ims_stp != 0) 1110 continue; 1111 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 1112 RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims); 1113 free(ims, M_IPMSOURCE); 1114 inm->inm_nsrc--; 1115 } 1116 } 1117 1118 /* 1119 * Purge all source nodes from an in_multi's filter set. 1120 */ 1121 static void 1122 inm_purge(struct in_multi *inm) 1123 { 1124 struct ip_msource *ims, *tims; 1125 1126 RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) { 1127 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 1128 RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims); 1129 free(ims, M_IPMSOURCE); 1130 inm->inm_nsrc--; 1131 } 1132 } 1133 1134 /* 1135 * Join a multicast group; unlocked entry point. 1136 * 1137 * SMPng: XXX: in_joingroup() is called from in_control() when Giant 1138 * is not held. Fortunately, ifp is unlikely to have been detached 1139 * at this point, so we assume it's OK to recurse. 1140 */ 1141 int 1142 in_joingroup(struct ifnet *ifp, const struct in_addr *gina, 1143 /*const*/ struct in_mfilter *imf, struct in_multi **pinm) 1144 { 1145 int error; 1146 1147 IN_MULTI_LOCK(); 1148 error = in_joingroup_locked(ifp, gina, imf, pinm); 1149 IN_MULTI_UNLOCK(); 1150 1151 return (error); 1152 } 1153 1154 /* 1155 * Join a multicast group; real entry point. 1156 * 1157 * Only preserves atomicity at inm level. 1158 * NOTE: imf argument cannot be const due to sys/tree.h limitations. 1159 * 1160 * If the IGMP downcall fails, the group is not joined, and an error 1161 * code is returned. 1162 */ 1163 int 1164 in_joingroup_locked(struct ifnet *ifp, const struct in_addr *gina, 1165 /*const*/ struct in_mfilter *imf, struct in_multi **pinm) 1166 { 1167 struct in_mfilter timf; 1168 struct in_multi *inm; 1169 int error; 1170 1171 IN_MULTI_LOCK_ASSERT(); 1172 1173 CTR4(KTR_IGMPV3, "%s: join %s on %p(%s))", __func__, 1174 inet_ntoa(*gina), ifp, ifp->if_xname); 1175 1176 error = 0; 1177 inm = NULL; 1178 1179 /* 1180 * If no imf was specified (i.e. kernel consumer), 1181 * fake one up and assume it is an ASM join. 1182 */ 1183 if (imf == NULL) { 1184 imf_init(&timf, MCAST_UNDEFINED, MCAST_EXCLUDE); 1185 imf = &timf; 1186 } 1187 1188 error = in_getmulti(ifp, gina, &inm); 1189 if (error) { 1190 CTR1(KTR_IGMPV3, "%s: in_getmulti() failure", __func__); 1191 return (error); 1192 } 1193 1194 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 1195 error = inm_merge(inm, imf); 1196 if (error) { 1197 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__); 1198 goto out_inm_release; 1199 } 1200 1201 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 1202 error = igmp_change_state(inm); 1203 if (error) { 1204 CTR1(KTR_IGMPV3, "%s: failed to update source", __func__); 1205 goto out_inm_release; 1206 } 1207 1208 out_inm_release: 1209 if (error) { 1210 CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm); 1211 inm_release_locked(inm); 1212 } else { 1213 *pinm = inm; 1214 } 1215 1216 return (error); 1217 } 1218 1219 /* 1220 * Leave a multicast group; unlocked entry point. 1221 */ 1222 int 1223 in_leavegroup(struct in_multi *inm, /*const*/ struct in_mfilter *imf) 1224 { 1225 int error; 1226 1227 IN_MULTI_LOCK(); 1228 error = in_leavegroup_locked(inm, imf); 1229 IN_MULTI_UNLOCK(); 1230 1231 return (error); 1232 } 1233 1234 /* 1235 * Leave a multicast group; real entry point. 1236 * All source filters will be expunged. 1237 * 1238 * Only preserves atomicity at inm level. 1239 * 1240 * Holding the write lock for the INP which contains imf 1241 * is highly advisable. We can't assert for it as imf does not 1242 * contain a back-pointer to the owning inp. 1243 * 1244 * Note: This is not the same as inm_release(*) as this function also 1245 * makes a state change downcall into IGMP. 1246 */ 1247 int 1248 in_leavegroup_locked(struct in_multi *inm, /*const*/ struct in_mfilter *imf) 1249 { 1250 struct in_mfilter timf; 1251 int error; 1252 1253 error = 0; 1254 1255 IN_MULTI_LOCK_ASSERT(); 1256 1257 CTR5(KTR_IGMPV3, "%s: leave inm %p, %s/%s, imf %p", __func__, 1258 inm, inet_ntoa(inm->inm_addr), 1259 (inm_is_ifp_detached(inm) ? "null" : inm->inm_ifp->if_xname), 1260 imf); 1261 1262 /* 1263 * If no imf was specified (i.e. kernel consumer), 1264 * fake one up and assume it is an ASM join. 1265 */ 1266 if (imf == NULL) { 1267 imf_init(&timf, MCAST_EXCLUDE, MCAST_UNDEFINED); 1268 imf = &timf; 1269 } 1270 1271 /* 1272 * Begin state merge transaction at IGMP layer. 1273 * 1274 * As this particular invocation should not cause any memory 1275 * to be allocated, and there is no opportunity to roll back 1276 * the transaction, it MUST NOT fail. 1277 */ 1278 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 1279 error = inm_merge(inm, imf); 1280 KASSERT(error == 0, ("%s: failed to merge inm state", __func__)); 1281 1282 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 1283 CURVNET_SET(inm->inm_ifp->if_vnet); 1284 error = igmp_change_state(inm); 1285 CURVNET_RESTORE(); 1286 if (error) 1287 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__); 1288 1289 CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm); 1290 inm_release_locked(inm); 1291 1292 return (error); 1293 } 1294 1295 /*#ifndef BURN_BRIDGES*/ 1296 /* 1297 * Join an IPv4 multicast group in (*,G) exclusive mode. 1298 * The group must be a 224.0.0.0/24 link-scope group. 1299 * This KPI is for legacy kernel consumers only. 1300 */ 1301 struct in_multi * 1302 in_addmulti(struct in_addr *ap, struct ifnet *ifp) 1303 { 1304 struct in_multi *pinm; 1305 int error; 1306 1307 KASSERT(IN_LOCAL_GROUP(ntohl(ap->s_addr)), 1308 ("%s: %s not in 224.0.0.0/24", __func__, inet_ntoa(*ap))); 1309 1310 error = in_joingroup(ifp, ap, NULL, &pinm); 1311 if (error != 0) 1312 pinm = NULL; 1313 1314 return (pinm); 1315 } 1316 1317 /* 1318 * Leave an IPv4 multicast group, assumed to be in exclusive (*,G) mode. 1319 * This KPI is for legacy kernel consumers only. 1320 */ 1321 void 1322 in_delmulti(struct in_multi *inm) 1323 { 1324 1325 (void)in_leavegroup(inm, NULL); 1326 } 1327 /*#endif*/ 1328 1329 /* 1330 * Block or unblock an ASM multicast source on an inpcb. 1331 * This implements the delta-based API described in RFC 3678. 1332 * 1333 * The delta-based API applies only to exclusive-mode memberships. 1334 * An IGMP downcall will be performed. 1335 * 1336 * SMPng: NOTE: Must take Giant as a join may create a new ifma. 1337 * 1338 * Return 0 if successful, otherwise return an appropriate error code. 1339 */ 1340 static int 1341 inp_block_unblock_source(struct inpcb *inp, struct sockopt *sopt) 1342 { 1343 struct group_source_req gsr; 1344 sockunion_t *gsa, *ssa; 1345 struct ifnet *ifp; 1346 struct in_mfilter *imf; 1347 struct ip_moptions *imo; 1348 struct in_msource *ims; 1349 struct in_multi *inm; 1350 size_t idx; 1351 uint16_t fmode; 1352 int error, doblock; 1353 1354 ifp = NULL; 1355 error = 0; 1356 doblock = 0; 1357 1358 memset(&gsr, 0, sizeof(struct group_source_req)); 1359 gsa = (sockunion_t *)&gsr.gsr_group; 1360 ssa = (sockunion_t *)&gsr.gsr_source; 1361 1362 switch (sopt->sopt_name) { 1363 case IP_BLOCK_SOURCE: 1364 case IP_UNBLOCK_SOURCE: { 1365 struct ip_mreq_source mreqs; 1366 1367 error = sooptcopyin(sopt, &mreqs, 1368 sizeof(struct ip_mreq_source), 1369 sizeof(struct ip_mreq_source)); 1370 if (error) 1371 return (error); 1372 1373 gsa->sin.sin_family = AF_INET; 1374 gsa->sin.sin_len = sizeof(struct sockaddr_in); 1375 gsa->sin.sin_addr = mreqs.imr_multiaddr; 1376 1377 ssa->sin.sin_family = AF_INET; 1378 ssa->sin.sin_len = sizeof(struct sockaddr_in); 1379 ssa->sin.sin_addr = mreqs.imr_sourceaddr; 1380 1381 if (!in_nullhost(mreqs.imr_interface)) 1382 INADDR_TO_IFP(mreqs.imr_interface, ifp); 1383 1384 if (sopt->sopt_name == IP_BLOCK_SOURCE) 1385 doblock = 1; 1386 1387 CTR3(KTR_IGMPV3, "%s: imr_interface = %s, ifp = %p", 1388 __func__, inet_ntoa(mreqs.imr_interface), ifp); 1389 break; 1390 } 1391 1392 case MCAST_BLOCK_SOURCE: 1393 case MCAST_UNBLOCK_SOURCE: 1394 error = sooptcopyin(sopt, &gsr, 1395 sizeof(struct group_source_req), 1396 sizeof(struct group_source_req)); 1397 if (error) 1398 return (error); 1399 1400 if (gsa->sin.sin_family != AF_INET || 1401 gsa->sin.sin_len != sizeof(struct sockaddr_in)) 1402 return (EINVAL); 1403 1404 if (ssa->sin.sin_family != AF_INET || 1405 ssa->sin.sin_len != sizeof(struct sockaddr_in)) 1406 return (EINVAL); 1407 1408 if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface) 1409 return (EADDRNOTAVAIL); 1410 1411 ifp = ifnet_byindex(gsr.gsr_interface); 1412 1413 if (sopt->sopt_name == MCAST_BLOCK_SOURCE) 1414 doblock = 1; 1415 break; 1416 1417 default: 1418 CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d", 1419 __func__, sopt->sopt_name); 1420 return (EOPNOTSUPP); 1421 break; 1422 } 1423 1424 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 1425 return (EINVAL); 1426 1427 /* 1428 * Check if we are actually a member of this group. 1429 */ 1430 imo = inp_findmoptions(inp); 1431 idx = imo_match_group(imo, ifp, &gsa->sa); 1432 if (idx == -1 || imo->imo_mfilters == NULL) { 1433 error = EADDRNOTAVAIL; 1434 goto out_inp_locked; 1435 } 1436 1437 KASSERT(imo->imo_mfilters != NULL, 1438 ("%s: imo_mfilters not allocated", __func__)); 1439 imf = &imo->imo_mfilters[idx]; 1440 inm = imo->imo_membership[idx]; 1441 1442 /* 1443 * Attempting to use the delta-based API on an 1444 * non exclusive-mode membership is an error. 1445 */ 1446 fmode = imf->imf_st[0]; 1447 if (fmode != MCAST_EXCLUDE) { 1448 error = EINVAL; 1449 goto out_inp_locked; 1450 } 1451 1452 /* 1453 * Deal with error cases up-front: 1454 * Asked to block, but already blocked; or 1455 * Asked to unblock, but nothing to unblock. 1456 * If adding a new block entry, allocate it. 1457 */ 1458 ims = imo_match_source(imo, idx, &ssa->sa); 1459 if ((ims != NULL && doblock) || (ims == NULL && !doblock)) { 1460 CTR3(KTR_IGMPV3, "%s: source %s %spresent", __func__, 1461 inet_ntoa(ssa->sin.sin_addr), doblock ? "" : "not "); 1462 error = EADDRNOTAVAIL; 1463 goto out_inp_locked; 1464 } 1465 1466 INP_WLOCK_ASSERT(inp); 1467 1468 /* 1469 * Begin state merge transaction at socket layer. 1470 */ 1471 if (doblock) { 1472 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block"); 1473 ims = imf_graft(imf, fmode, &ssa->sin); 1474 if (ims == NULL) 1475 error = ENOMEM; 1476 } else { 1477 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow"); 1478 error = imf_prune(imf, &ssa->sin); 1479 } 1480 1481 if (error) { 1482 CTR1(KTR_IGMPV3, "%s: merge imf state failed", __func__); 1483 goto out_imf_rollback; 1484 } 1485 1486 /* 1487 * Begin state merge transaction at IGMP layer. 1488 */ 1489 IN_MULTI_LOCK(); 1490 1491 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 1492 error = inm_merge(inm, imf); 1493 if (error) { 1494 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__); 1495 goto out_imf_rollback; 1496 } 1497 1498 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 1499 error = igmp_change_state(inm); 1500 if (error) 1501 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__); 1502 1503 IN_MULTI_UNLOCK(); 1504 1505 out_imf_rollback: 1506 if (error) 1507 imf_rollback(imf); 1508 else 1509 imf_commit(imf); 1510 1511 imf_reap(imf); 1512 1513 out_inp_locked: 1514 INP_WUNLOCK(inp); 1515 return (error); 1516 } 1517 1518 /* 1519 * Given an inpcb, return its multicast options structure pointer. Accepts 1520 * an unlocked inpcb pointer, but will return it locked. May sleep. 1521 * 1522 * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held. 1523 * SMPng: NOTE: Returns with the INP write lock held. 1524 */ 1525 static struct ip_moptions * 1526 inp_findmoptions(struct inpcb *inp) 1527 { 1528 struct ip_moptions *imo; 1529 struct in_multi **immp; 1530 struct in_mfilter *imfp; 1531 size_t idx; 1532 1533 INP_WLOCK(inp); 1534 if (inp->inp_moptions != NULL) 1535 return (inp->inp_moptions); 1536 1537 INP_WUNLOCK(inp); 1538 1539 imo = malloc(sizeof(*imo), M_IPMOPTS, M_WAITOK); 1540 immp = malloc(sizeof(*immp) * IP_MIN_MEMBERSHIPS, M_IPMOPTS, 1541 M_WAITOK | M_ZERO); 1542 imfp = malloc(sizeof(struct in_mfilter) * IP_MIN_MEMBERSHIPS, 1543 M_INMFILTER, M_WAITOK); 1544 1545 imo->imo_multicast_ifp = NULL; 1546 imo->imo_multicast_addr.s_addr = INADDR_ANY; 1547 imo->imo_multicast_vif = -1; 1548 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1549 imo->imo_multicast_loop = in_mcast_loop; 1550 imo->imo_num_memberships = 0; 1551 imo->imo_max_memberships = IP_MIN_MEMBERSHIPS; 1552 imo->imo_membership = immp; 1553 1554 /* Initialize per-group source filters. */ 1555 for (idx = 0; idx < IP_MIN_MEMBERSHIPS; idx++) 1556 imf_init(&imfp[idx], MCAST_UNDEFINED, MCAST_EXCLUDE); 1557 imo->imo_mfilters = imfp; 1558 1559 INP_WLOCK(inp); 1560 if (inp->inp_moptions != NULL) { 1561 free(imfp, M_INMFILTER); 1562 free(immp, M_IPMOPTS); 1563 free(imo, M_IPMOPTS); 1564 return (inp->inp_moptions); 1565 } 1566 inp->inp_moptions = imo; 1567 return (imo); 1568 } 1569 1570 /* 1571 * Discard the IP multicast options (and source filters). To minimize 1572 * the amount of work done while holding locks such as the INP's 1573 * pcbinfo lock (which is used in the receive path), the free 1574 * operation is performed asynchronously in a separate task. 1575 * 1576 * SMPng: NOTE: assumes INP write lock is held. 1577 */ 1578 void 1579 inp_freemoptions(struct ip_moptions *imo) 1580 { 1581 1582 KASSERT(imo != NULL, ("%s: ip_moptions is NULL", __func__)); 1583 IN_MULTI_LOCK(); 1584 STAILQ_INSERT_TAIL(&imo_gc_list, imo, imo_link); 1585 IN_MULTI_UNLOCK(); 1586 taskqueue_enqueue(taskqueue_thread, &imo_gc_task); 1587 } 1588 1589 static void 1590 inp_freemoptions_internal(struct ip_moptions *imo) 1591 { 1592 struct in_mfilter *imf; 1593 size_t idx, nmships; 1594 1595 nmships = imo->imo_num_memberships; 1596 for (idx = 0; idx < nmships; ++idx) { 1597 imf = imo->imo_mfilters ? &imo->imo_mfilters[idx] : NULL; 1598 if (imf) 1599 imf_leave(imf); 1600 (void)in_leavegroup(imo->imo_membership[idx], imf); 1601 if (imf) 1602 imf_purge(imf); 1603 } 1604 1605 if (imo->imo_mfilters) 1606 free(imo->imo_mfilters, M_INMFILTER); 1607 free(imo->imo_membership, M_IPMOPTS); 1608 free(imo, M_IPMOPTS); 1609 } 1610 1611 static void 1612 inp_gcmoptions(void *context, int pending) 1613 { 1614 struct ip_moptions *imo; 1615 1616 IN_MULTI_LOCK(); 1617 while (!STAILQ_EMPTY(&imo_gc_list)) { 1618 imo = STAILQ_FIRST(&imo_gc_list); 1619 STAILQ_REMOVE_HEAD(&imo_gc_list, imo_link); 1620 IN_MULTI_UNLOCK(); 1621 inp_freemoptions_internal(imo); 1622 IN_MULTI_LOCK(); 1623 } 1624 IN_MULTI_UNLOCK(); 1625 } 1626 1627 /* 1628 * Atomically get source filters on a socket for an IPv4 multicast group. 1629 * Called with INP lock held; returns with lock released. 1630 */ 1631 static int 1632 inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt) 1633 { 1634 struct __msfilterreq msfr; 1635 sockunion_t *gsa; 1636 struct ifnet *ifp; 1637 struct ip_moptions *imo; 1638 struct in_mfilter *imf; 1639 struct ip_msource *ims; 1640 struct in_msource *lims; 1641 struct sockaddr_in *psin; 1642 struct sockaddr_storage *ptss; 1643 struct sockaddr_storage *tss; 1644 int error; 1645 size_t idx, nsrcs, ncsrcs; 1646 1647 INP_WLOCK_ASSERT(inp); 1648 1649 imo = inp->inp_moptions; 1650 KASSERT(imo != NULL, ("%s: null ip_moptions", __func__)); 1651 1652 INP_WUNLOCK(inp); 1653 1654 error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq), 1655 sizeof(struct __msfilterreq)); 1656 if (error) 1657 return (error); 1658 1659 if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex) 1660 return (EINVAL); 1661 1662 ifp = ifnet_byindex(msfr.msfr_ifindex); 1663 if (ifp == NULL) 1664 return (EINVAL); 1665 1666 INP_WLOCK(inp); 1667 1668 /* 1669 * Lookup group on the socket. 1670 */ 1671 gsa = (sockunion_t *)&msfr.msfr_group; 1672 idx = imo_match_group(imo, ifp, &gsa->sa); 1673 if (idx == -1 || imo->imo_mfilters == NULL) { 1674 INP_WUNLOCK(inp); 1675 return (EADDRNOTAVAIL); 1676 } 1677 imf = &imo->imo_mfilters[idx]; 1678 1679 /* 1680 * Ignore memberships which are in limbo. 1681 */ 1682 if (imf->imf_st[1] == MCAST_UNDEFINED) { 1683 INP_WUNLOCK(inp); 1684 return (EAGAIN); 1685 } 1686 msfr.msfr_fmode = imf->imf_st[1]; 1687 1688 /* 1689 * If the user specified a buffer, copy out the source filter 1690 * entries to userland gracefully. 1691 * We only copy out the number of entries which userland 1692 * has asked for, but we always tell userland how big the 1693 * buffer really needs to be. 1694 */ 1695 if (msfr.msfr_nsrcs > in_mcast_maxsocksrc) 1696 msfr.msfr_nsrcs = in_mcast_maxsocksrc; 1697 tss = NULL; 1698 if (msfr.msfr_srcs != NULL && msfr.msfr_nsrcs > 0) { 1699 tss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs, 1700 M_TEMP, M_NOWAIT | M_ZERO); 1701 if (tss == NULL) { 1702 INP_WUNLOCK(inp); 1703 return (ENOBUFS); 1704 } 1705 } 1706 1707 /* 1708 * Count number of sources in-mode at t0. 1709 * If buffer space exists and remains, copy out source entries. 1710 */ 1711 nsrcs = msfr.msfr_nsrcs; 1712 ncsrcs = 0; 1713 ptss = tss; 1714 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 1715 lims = (struct in_msource *)ims; 1716 if (lims->imsl_st[0] == MCAST_UNDEFINED || 1717 lims->imsl_st[0] != imf->imf_st[0]) 1718 continue; 1719 ++ncsrcs; 1720 if (tss != NULL && nsrcs > 0) { 1721 psin = (struct sockaddr_in *)ptss; 1722 psin->sin_family = AF_INET; 1723 psin->sin_len = sizeof(struct sockaddr_in); 1724 psin->sin_addr.s_addr = htonl(lims->ims_haddr); 1725 psin->sin_port = 0; 1726 ++ptss; 1727 --nsrcs; 1728 } 1729 } 1730 1731 INP_WUNLOCK(inp); 1732 1733 if (tss != NULL) { 1734 error = copyout(tss, msfr.msfr_srcs, 1735 sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs); 1736 free(tss, M_TEMP); 1737 if (error) 1738 return (error); 1739 } 1740 1741 msfr.msfr_nsrcs = ncsrcs; 1742 error = sooptcopyout(sopt, &msfr, sizeof(struct __msfilterreq)); 1743 1744 return (error); 1745 } 1746 1747 /* 1748 * Return the IP multicast options in response to user getsockopt(). 1749 */ 1750 int 1751 inp_getmoptions(struct inpcb *inp, struct sockopt *sopt) 1752 { 1753 struct ip_mreqn mreqn; 1754 struct ip_moptions *imo; 1755 struct ifnet *ifp; 1756 struct in_ifaddr *ia; 1757 int error, optval; 1758 u_char coptval; 1759 1760 INP_WLOCK(inp); 1761 imo = inp->inp_moptions; 1762 /* 1763 * If socket is neither of type SOCK_RAW or SOCK_DGRAM, 1764 * or is a divert socket, reject it. 1765 */ 1766 if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT || 1767 (inp->inp_socket->so_proto->pr_type != SOCK_RAW && 1768 inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) { 1769 INP_WUNLOCK(inp); 1770 return (EOPNOTSUPP); 1771 } 1772 1773 error = 0; 1774 switch (sopt->sopt_name) { 1775 case IP_MULTICAST_VIF: 1776 if (imo != NULL) 1777 optval = imo->imo_multicast_vif; 1778 else 1779 optval = -1; 1780 INP_WUNLOCK(inp); 1781 error = sooptcopyout(sopt, &optval, sizeof(int)); 1782 break; 1783 1784 case IP_MULTICAST_IF: 1785 memset(&mreqn, 0, sizeof(struct ip_mreqn)); 1786 if (imo != NULL) { 1787 ifp = imo->imo_multicast_ifp; 1788 if (!in_nullhost(imo->imo_multicast_addr)) { 1789 mreqn.imr_address = imo->imo_multicast_addr; 1790 } else if (ifp != NULL) { 1791 mreqn.imr_ifindex = ifp->if_index; 1792 IFP_TO_IA(ifp, ia); 1793 if (ia != NULL) { 1794 mreqn.imr_address = 1795 IA_SIN(ia)->sin_addr; 1796 ifa_free(&ia->ia_ifa); 1797 } 1798 } 1799 } 1800 INP_WUNLOCK(inp); 1801 if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) { 1802 error = sooptcopyout(sopt, &mreqn, 1803 sizeof(struct ip_mreqn)); 1804 } else { 1805 error = sooptcopyout(sopt, &mreqn.imr_address, 1806 sizeof(struct in_addr)); 1807 } 1808 break; 1809 1810 case IP_MULTICAST_TTL: 1811 if (imo == 0) 1812 optval = coptval = IP_DEFAULT_MULTICAST_TTL; 1813 else 1814 optval = coptval = imo->imo_multicast_ttl; 1815 INP_WUNLOCK(inp); 1816 if (sopt->sopt_valsize == sizeof(u_char)) 1817 error = sooptcopyout(sopt, &coptval, sizeof(u_char)); 1818 else 1819 error = sooptcopyout(sopt, &optval, sizeof(int)); 1820 break; 1821 1822 case IP_MULTICAST_LOOP: 1823 if (imo == 0) 1824 optval = coptval = IP_DEFAULT_MULTICAST_LOOP; 1825 else 1826 optval = coptval = imo->imo_multicast_loop; 1827 INP_WUNLOCK(inp); 1828 if (sopt->sopt_valsize == sizeof(u_char)) 1829 error = sooptcopyout(sopt, &coptval, sizeof(u_char)); 1830 else 1831 error = sooptcopyout(sopt, &optval, sizeof(int)); 1832 break; 1833 1834 case IP_MSFILTER: 1835 if (imo == NULL) { 1836 error = EADDRNOTAVAIL; 1837 INP_WUNLOCK(inp); 1838 } else { 1839 error = inp_get_source_filters(inp, sopt); 1840 } 1841 break; 1842 1843 default: 1844 INP_WUNLOCK(inp); 1845 error = ENOPROTOOPT; 1846 break; 1847 } 1848 1849 INP_UNLOCK_ASSERT(inp); 1850 1851 return (error); 1852 } 1853 1854 /* 1855 * Look up the ifnet to use for a multicast group membership, 1856 * given the IPv4 address of an interface, and the IPv4 group address. 1857 * 1858 * This routine exists to support legacy multicast applications 1859 * which do not understand that multicast memberships are scoped to 1860 * specific physical links in the networking stack, or which need 1861 * to join link-scope groups before IPv4 addresses are configured. 1862 * 1863 * If inp is non-NULL, use this socket's current FIB number for any 1864 * required FIB lookup. 1865 * If ina is INADDR_ANY, look up the group address in the unicast FIB, 1866 * and use its ifp; usually, this points to the default next-hop. 1867 * 1868 * If the FIB lookup fails, attempt to use the first non-loopback 1869 * interface with multicast capability in the system as a 1870 * last resort. The legacy IPv4 ASM API requires that we do 1871 * this in order to allow groups to be joined when the routing 1872 * table has not yet been populated during boot. 1873 * 1874 * Returns NULL if no ifp could be found. 1875 * 1876 * SMPng: TODO: Acquire the appropriate locks for INADDR_TO_IFP. 1877 * FUTURE: Implement IPv4 source-address selection. 1878 */ 1879 static struct ifnet * 1880 inp_lookup_mcast_ifp(const struct inpcb *inp, 1881 const struct sockaddr_in *gsin, const struct in_addr ina) 1882 { 1883 struct ifnet *ifp; 1884 1885 KASSERT(gsin->sin_family == AF_INET, ("%s: not AF_INET", __func__)); 1886 KASSERT(IN_MULTICAST(ntohl(gsin->sin_addr.s_addr)), 1887 ("%s: not multicast", __func__)); 1888 1889 ifp = NULL; 1890 if (!in_nullhost(ina)) { 1891 INADDR_TO_IFP(ina, ifp); 1892 } else { 1893 struct route ro; 1894 1895 ro.ro_rt = NULL; 1896 memcpy(&ro.ro_dst, gsin, sizeof(struct sockaddr_in)); 1897 in_rtalloc_ign(&ro, 0, inp ? inp->inp_inc.inc_fibnum : 0); 1898 if (ro.ro_rt != NULL) { 1899 ifp = ro.ro_rt->rt_ifp; 1900 KASSERT(ifp != NULL, ("%s: null ifp", __func__)); 1901 RTFREE(ro.ro_rt); 1902 } else { 1903 struct in_ifaddr *ia; 1904 struct ifnet *mifp; 1905 1906 mifp = NULL; 1907 IN_IFADDR_RLOCK(); 1908 TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { 1909 mifp = ia->ia_ifp; 1910 if (!(mifp->if_flags & IFF_LOOPBACK) && 1911 (mifp->if_flags & IFF_MULTICAST)) { 1912 ifp = mifp; 1913 break; 1914 } 1915 } 1916 IN_IFADDR_RUNLOCK(); 1917 } 1918 } 1919 1920 return (ifp); 1921 } 1922 1923 /* 1924 * Join an IPv4 multicast group, possibly with a source. 1925 */ 1926 static int 1927 inp_join_group(struct inpcb *inp, struct sockopt *sopt) 1928 { 1929 struct group_source_req gsr; 1930 sockunion_t *gsa, *ssa; 1931 struct ifnet *ifp; 1932 struct in_mfilter *imf; 1933 struct ip_moptions *imo; 1934 struct in_multi *inm; 1935 struct in_msource *lims; 1936 size_t idx; 1937 int error, is_new; 1938 1939 ifp = NULL; 1940 imf = NULL; 1941 lims = NULL; 1942 error = 0; 1943 is_new = 0; 1944 1945 memset(&gsr, 0, sizeof(struct group_source_req)); 1946 gsa = (sockunion_t *)&gsr.gsr_group; 1947 gsa->ss.ss_family = AF_UNSPEC; 1948 ssa = (sockunion_t *)&gsr.gsr_source; 1949 ssa->ss.ss_family = AF_UNSPEC; 1950 1951 switch (sopt->sopt_name) { 1952 case IP_ADD_MEMBERSHIP: 1953 case IP_ADD_SOURCE_MEMBERSHIP: { 1954 struct ip_mreq_source mreqs; 1955 1956 if (sopt->sopt_name == IP_ADD_MEMBERSHIP) { 1957 error = sooptcopyin(sopt, &mreqs, 1958 sizeof(struct ip_mreq), 1959 sizeof(struct ip_mreq)); 1960 /* 1961 * Do argument switcharoo from ip_mreq into 1962 * ip_mreq_source to avoid using two instances. 1963 */ 1964 mreqs.imr_interface = mreqs.imr_sourceaddr; 1965 mreqs.imr_sourceaddr.s_addr = INADDR_ANY; 1966 } else if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) { 1967 error = sooptcopyin(sopt, &mreqs, 1968 sizeof(struct ip_mreq_source), 1969 sizeof(struct ip_mreq_source)); 1970 } 1971 if (error) 1972 return (error); 1973 1974 gsa->sin.sin_family = AF_INET; 1975 gsa->sin.sin_len = sizeof(struct sockaddr_in); 1976 gsa->sin.sin_addr = mreqs.imr_multiaddr; 1977 1978 if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) { 1979 ssa->sin.sin_family = AF_INET; 1980 ssa->sin.sin_len = sizeof(struct sockaddr_in); 1981 ssa->sin.sin_addr = mreqs.imr_sourceaddr; 1982 } 1983 1984 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 1985 return (EINVAL); 1986 1987 ifp = inp_lookup_mcast_ifp(inp, &gsa->sin, 1988 mreqs.imr_interface); 1989 CTR3(KTR_IGMPV3, "%s: imr_interface = %s, ifp = %p", 1990 __func__, inet_ntoa(mreqs.imr_interface), ifp); 1991 break; 1992 } 1993 1994 case MCAST_JOIN_GROUP: 1995 case MCAST_JOIN_SOURCE_GROUP: 1996 if (sopt->sopt_name == MCAST_JOIN_GROUP) { 1997 error = sooptcopyin(sopt, &gsr, 1998 sizeof(struct group_req), 1999 sizeof(struct group_req)); 2000 } else if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) { 2001 error = sooptcopyin(sopt, &gsr, 2002 sizeof(struct group_source_req), 2003 sizeof(struct group_source_req)); 2004 } 2005 if (error) 2006 return (error); 2007 2008 if (gsa->sin.sin_family != AF_INET || 2009 gsa->sin.sin_len != sizeof(struct sockaddr_in)) 2010 return (EINVAL); 2011 2012 /* 2013 * Overwrite the port field if present, as the sockaddr 2014 * being copied in may be matched with a binary comparison. 2015 */ 2016 gsa->sin.sin_port = 0; 2017 if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) { 2018 if (ssa->sin.sin_family != AF_INET || 2019 ssa->sin.sin_len != sizeof(struct sockaddr_in)) 2020 return (EINVAL); 2021 ssa->sin.sin_port = 0; 2022 } 2023 2024 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2025 return (EINVAL); 2026 2027 if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface) 2028 return (EADDRNOTAVAIL); 2029 ifp = ifnet_byindex(gsr.gsr_interface); 2030 break; 2031 2032 default: 2033 CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d", 2034 __func__, sopt->sopt_name); 2035 return (EOPNOTSUPP); 2036 break; 2037 } 2038 2039 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) 2040 return (EADDRNOTAVAIL); 2041 2042 imo = inp_findmoptions(inp); 2043 idx = imo_match_group(imo, ifp, &gsa->sa); 2044 if (idx == -1) { 2045 is_new = 1; 2046 } else { 2047 inm = imo->imo_membership[idx]; 2048 imf = &imo->imo_mfilters[idx]; 2049 if (ssa->ss.ss_family != AF_UNSPEC) { 2050 /* 2051 * MCAST_JOIN_SOURCE_GROUP on an exclusive membership 2052 * is an error. On an existing inclusive membership, 2053 * it just adds the source to the filter list. 2054 */ 2055 if (imf->imf_st[1] != MCAST_INCLUDE) { 2056 error = EINVAL; 2057 goto out_inp_locked; 2058 } 2059 /* 2060 * Throw out duplicates. 2061 * 2062 * XXX FIXME: This makes a naive assumption that 2063 * even if entries exist for *ssa in this imf, 2064 * they will be rejected as dupes, even if they 2065 * are not valid in the current mode (in-mode). 2066 * 2067 * in_msource is transactioned just as for anything 2068 * else in SSM -- but note naive use of inm_graft() 2069 * below for allocating new filter entries. 2070 * 2071 * This is only an issue if someone mixes the 2072 * full-state SSM API with the delta-based API, 2073 * which is discouraged in the relevant RFCs. 2074 */ 2075 lims = imo_match_source(imo, idx, &ssa->sa); 2076 if (lims != NULL /*&& 2077 lims->imsl_st[1] == MCAST_INCLUDE*/) { 2078 error = EADDRNOTAVAIL; 2079 goto out_inp_locked; 2080 } 2081 } else { 2082 /* 2083 * MCAST_JOIN_GROUP on an existing exclusive 2084 * membership is an error; return EADDRINUSE 2085 * to preserve 4.4BSD API idempotence, and 2086 * avoid tedious detour to code below. 2087 * NOTE: This is bending RFC 3678 a bit. 2088 * 2089 * On an existing inclusive membership, this is also 2090 * an error; if you want to change filter mode, 2091 * you must use the userland API setsourcefilter(). 2092 * XXX We don't reject this for imf in UNDEFINED 2093 * state at t1, because allocation of a filter 2094 * is atomic with allocation of a membership. 2095 */ 2096 error = EINVAL; 2097 if (imf->imf_st[1] == MCAST_EXCLUDE) 2098 error = EADDRINUSE; 2099 goto out_inp_locked; 2100 } 2101 } 2102 2103 /* 2104 * Begin state merge transaction at socket layer. 2105 */ 2106 INP_WLOCK_ASSERT(inp); 2107 2108 if (is_new) { 2109 if (imo->imo_num_memberships == imo->imo_max_memberships) { 2110 error = imo_grow(imo); 2111 if (error) 2112 goto out_inp_locked; 2113 } 2114 /* 2115 * Allocate the new slot upfront so we can deal with 2116 * grafting the new source filter in same code path 2117 * as for join-source on existing membership. 2118 */ 2119 idx = imo->imo_num_memberships; 2120 imo->imo_membership[idx] = NULL; 2121 imo->imo_num_memberships++; 2122 KASSERT(imo->imo_mfilters != NULL, 2123 ("%s: imf_mfilters vector was not allocated", __func__)); 2124 imf = &imo->imo_mfilters[idx]; 2125 KASSERT(RB_EMPTY(&imf->imf_sources), 2126 ("%s: imf_sources not empty", __func__)); 2127 } 2128 2129 /* 2130 * Graft new source into filter list for this inpcb's 2131 * membership of the group. The in_multi may not have 2132 * been allocated yet if this is a new membership, however, 2133 * the in_mfilter slot will be allocated and must be initialized. 2134 * 2135 * Note: Grafting of exclusive mode filters doesn't happen 2136 * in this path. 2137 * XXX: Should check for non-NULL lims (node exists but may 2138 * not be in-mode) for interop with full-state API. 2139 */ 2140 if (ssa->ss.ss_family != AF_UNSPEC) { 2141 /* Membership starts in IN mode */ 2142 if (is_new) { 2143 CTR1(KTR_IGMPV3, "%s: new join w/source", __func__); 2144 imf_init(imf, MCAST_UNDEFINED, MCAST_INCLUDE); 2145 } else { 2146 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow"); 2147 } 2148 lims = imf_graft(imf, MCAST_INCLUDE, &ssa->sin); 2149 if (lims == NULL) { 2150 CTR1(KTR_IGMPV3, "%s: merge imf state failed", 2151 __func__); 2152 error = ENOMEM; 2153 goto out_imo_free; 2154 } 2155 } else { 2156 /* No address specified; Membership starts in EX mode */ 2157 if (is_new) { 2158 CTR1(KTR_IGMPV3, "%s: new join w/o source", __func__); 2159 imf_init(imf, MCAST_UNDEFINED, MCAST_EXCLUDE); 2160 } 2161 } 2162 2163 /* 2164 * Begin state merge transaction at IGMP layer. 2165 */ 2166 IN_MULTI_LOCK(); 2167 2168 if (is_new) { 2169 error = in_joingroup_locked(ifp, &gsa->sin.sin_addr, imf, 2170 &inm); 2171 if (error) 2172 goto out_imo_free; 2173 imo->imo_membership[idx] = inm; 2174 } else { 2175 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 2176 error = inm_merge(inm, imf); 2177 if (error) { 2178 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", 2179 __func__); 2180 goto out_imf_rollback; 2181 } 2182 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 2183 error = igmp_change_state(inm); 2184 if (error) { 2185 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", 2186 __func__); 2187 goto out_imf_rollback; 2188 } 2189 } 2190 2191 IN_MULTI_UNLOCK(); 2192 2193 out_imf_rollback: 2194 INP_WLOCK_ASSERT(inp); 2195 if (error) { 2196 imf_rollback(imf); 2197 if (is_new) 2198 imf_purge(imf); 2199 else 2200 imf_reap(imf); 2201 } else { 2202 imf_commit(imf); 2203 } 2204 2205 out_imo_free: 2206 if (error && is_new) { 2207 imo->imo_membership[idx] = NULL; 2208 --imo->imo_num_memberships; 2209 } 2210 2211 out_inp_locked: 2212 INP_WUNLOCK(inp); 2213 return (error); 2214 } 2215 2216 /* 2217 * Leave an IPv4 multicast group on an inpcb, possibly with a source. 2218 */ 2219 static int 2220 inp_leave_group(struct inpcb *inp, struct sockopt *sopt) 2221 { 2222 struct group_source_req gsr; 2223 struct ip_mreq_source mreqs; 2224 sockunion_t *gsa, *ssa; 2225 struct ifnet *ifp; 2226 struct in_mfilter *imf; 2227 struct ip_moptions *imo; 2228 struct in_msource *ims; 2229 struct in_multi *inm; 2230 size_t idx; 2231 int error, is_final; 2232 2233 ifp = NULL; 2234 error = 0; 2235 is_final = 1; 2236 2237 memset(&gsr, 0, sizeof(struct group_source_req)); 2238 gsa = (sockunion_t *)&gsr.gsr_group; 2239 gsa->ss.ss_family = AF_UNSPEC; 2240 ssa = (sockunion_t *)&gsr.gsr_source; 2241 ssa->ss.ss_family = AF_UNSPEC; 2242 2243 switch (sopt->sopt_name) { 2244 case IP_DROP_MEMBERSHIP: 2245 case IP_DROP_SOURCE_MEMBERSHIP: 2246 if (sopt->sopt_name == IP_DROP_MEMBERSHIP) { 2247 error = sooptcopyin(sopt, &mreqs, 2248 sizeof(struct ip_mreq), 2249 sizeof(struct ip_mreq)); 2250 /* 2251 * Swap interface and sourceaddr arguments, 2252 * as ip_mreq and ip_mreq_source are laid 2253 * out differently. 2254 */ 2255 mreqs.imr_interface = mreqs.imr_sourceaddr; 2256 mreqs.imr_sourceaddr.s_addr = INADDR_ANY; 2257 } else if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) { 2258 error = sooptcopyin(sopt, &mreqs, 2259 sizeof(struct ip_mreq_source), 2260 sizeof(struct ip_mreq_source)); 2261 } 2262 if (error) 2263 return (error); 2264 2265 gsa->sin.sin_family = AF_INET; 2266 gsa->sin.sin_len = sizeof(struct sockaddr_in); 2267 gsa->sin.sin_addr = mreqs.imr_multiaddr; 2268 2269 if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) { 2270 ssa->sin.sin_family = AF_INET; 2271 ssa->sin.sin_len = sizeof(struct sockaddr_in); 2272 ssa->sin.sin_addr = mreqs.imr_sourceaddr; 2273 } 2274 2275 /* 2276 * Attempt to look up hinted ifp from interface address. 2277 * Fallthrough with null ifp iff lookup fails, to 2278 * preserve 4.4BSD mcast API idempotence. 2279 * XXX NOTE WELL: The RFC 3678 API is preferred because 2280 * using an IPv4 address as a key is racy. 2281 */ 2282 if (!in_nullhost(mreqs.imr_interface)) 2283 INADDR_TO_IFP(mreqs.imr_interface, ifp); 2284 2285 CTR3(KTR_IGMPV3, "%s: imr_interface = %s, ifp = %p", 2286 __func__, inet_ntoa(mreqs.imr_interface), ifp); 2287 2288 break; 2289 2290 case MCAST_LEAVE_GROUP: 2291 case MCAST_LEAVE_SOURCE_GROUP: 2292 if (sopt->sopt_name == MCAST_LEAVE_GROUP) { 2293 error = sooptcopyin(sopt, &gsr, 2294 sizeof(struct group_req), 2295 sizeof(struct group_req)); 2296 } else if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) { 2297 error = sooptcopyin(sopt, &gsr, 2298 sizeof(struct group_source_req), 2299 sizeof(struct group_source_req)); 2300 } 2301 if (error) 2302 return (error); 2303 2304 if (gsa->sin.sin_family != AF_INET || 2305 gsa->sin.sin_len != sizeof(struct sockaddr_in)) 2306 return (EINVAL); 2307 2308 if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) { 2309 if (ssa->sin.sin_family != AF_INET || 2310 ssa->sin.sin_len != sizeof(struct sockaddr_in)) 2311 return (EINVAL); 2312 } 2313 2314 if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface) 2315 return (EADDRNOTAVAIL); 2316 2317 ifp = ifnet_byindex(gsr.gsr_interface); 2318 2319 if (ifp == NULL) 2320 return (EADDRNOTAVAIL); 2321 break; 2322 2323 default: 2324 CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d", 2325 __func__, sopt->sopt_name); 2326 return (EOPNOTSUPP); 2327 break; 2328 } 2329 2330 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2331 return (EINVAL); 2332 2333 /* 2334 * Find the membership in the membership array. 2335 */ 2336 imo = inp_findmoptions(inp); 2337 idx = imo_match_group(imo, ifp, &gsa->sa); 2338 if (idx == -1) { 2339 error = EADDRNOTAVAIL; 2340 goto out_inp_locked; 2341 } 2342 inm = imo->imo_membership[idx]; 2343 imf = &imo->imo_mfilters[idx]; 2344 2345 if (ssa->ss.ss_family != AF_UNSPEC) 2346 is_final = 0; 2347 2348 /* 2349 * Begin state merge transaction at socket layer. 2350 */ 2351 INP_WLOCK_ASSERT(inp); 2352 2353 /* 2354 * If we were instructed only to leave a given source, do so. 2355 * MCAST_LEAVE_SOURCE_GROUP is only valid for inclusive memberships. 2356 */ 2357 if (is_final) { 2358 imf_leave(imf); 2359 } else { 2360 if (imf->imf_st[0] == MCAST_EXCLUDE) { 2361 error = EADDRNOTAVAIL; 2362 goto out_inp_locked; 2363 } 2364 ims = imo_match_source(imo, idx, &ssa->sa); 2365 if (ims == NULL) { 2366 CTR3(KTR_IGMPV3, "%s: source %s %spresent", __func__, 2367 inet_ntoa(ssa->sin.sin_addr), "not "); 2368 error = EADDRNOTAVAIL; 2369 goto out_inp_locked; 2370 } 2371 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block"); 2372 error = imf_prune(imf, &ssa->sin); 2373 if (error) { 2374 CTR1(KTR_IGMPV3, "%s: merge imf state failed", 2375 __func__); 2376 goto out_inp_locked; 2377 } 2378 } 2379 2380 /* 2381 * Begin state merge transaction at IGMP layer. 2382 */ 2383 IN_MULTI_LOCK(); 2384 2385 if (is_final) { 2386 /* 2387 * Give up the multicast address record to which 2388 * the membership points. 2389 */ 2390 (void)in_leavegroup_locked(inm, imf); 2391 } else { 2392 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 2393 error = inm_merge(inm, imf); 2394 if (error) { 2395 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", 2396 __func__); 2397 goto out_imf_rollback; 2398 } 2399 2400 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 2401 error = igmp_change_state(inm); 2402 if (error) { 2403 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", 2404 __func__); 2405 } 2406 } 2407 2408 IN_MULTI_UNLOCK(); 2409 2410 out_imf_rollback: 2411 if (error) 2412 imf_rollback(imf); 2413 else 2414 imf_commit(imf); 2415 2416 imf_reap(imf); 2417 2418 if (is_final) { 2419 /* Remove the gap in the membership and filter array. */ 2420 for (++idx; idx < imo->imo_num_memberships; ++idx) { 2421 imo->imo_membership[idx-1] = imo->imo_membership[idx]; 2422 imo->imo_mfilters[idx-1] = imo->imo_mfilters[idx]; 2423 } 2424 imo->imo_num_memberships--; 2425 } 2426 2427 out_inp_locked: 2428 INP_WUNLOCK(inp); 2429 return (error); 2430 } 2431 2432 /* 2433 * Select the interface for transmitting IPv4 multicast datagrams. 2434 * 2435 * Either an instance of struct in_addr or an instance of struct ip_mreqn 2436 * may be passed to this socket option. An address of INADDR_ANY or an 2437 * interface index of 0 is used to remove a previous selection. 2438 * When no interface is selected, one is chosen for every send. 2439 */ 2440 static int 2441 inp_set_multicast_if(struct inpcb *inp, struct sockopt *sopt) 2442 { 2443 struct in_addr addr; 2444 struct ip_mreqn mreqn; 2445 struct ifnet *ifp; 2446 struct ip_moptions *imo; 2447 int error; 2448 2449 if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) { 2450 /* 2451 * An interface index was specified using the 2452 * Linux-derived ip_mreqn structure. 2453 */ 2454 error = sooptcopyin(sopt, &mreqn, sizeof(struct ip_mreqn), 2455 sizeof(struct ip_mreqn)); 2456 if (error) 2457 return (error); 2458 2459 if (mreqn.imr_ifindex < 0 || V_if_index < mreqn.imr_ifindex) 2460 return (EINVAL); 2461 2462 if (mreqn.imr_ifindex == 0) { 2463 ifp = NULL; 2464 } else { 2465 ifp = ifnet_byindex(mreqn.imr_ifindex); 2466 if (ifp == NULL) 2467 return (EADDRNOTAVAIL); 2468 } 2469 } else { 2470 /* 2471 * An interface was specified by IPv4 address. 2472 * This is the traditional BSD usage. 2473 */ 2474 error = sooptcopyin(sopt, &addr, sizeof(struct in_addr), 2475 sizeof(struct in_addr)); 2476 if (error) 2477 return (error); 2478 if (in_nullhost(addr)) { 2479 ifp = NULL; 2480 } else { 2481 INADDR_TO_IFP(addr, ifp); 2482 if (ifp == NULL) 2483 return (EADDRNOTAVAIL); 2484 } 2485 CTR3(KTR_IGMPV3, "%s: ifp = %p, addr = %s", __func__, ifp, 2486 inet_ntoa(addr)); 2487 } 2488 2489 /* Reject interfaces which do not support multicast. */ 2490 if (ifp != NULL && (ifp->if_flags & IFF_MULTICAST) == 0) 2491 return (EOPNOTSUPP); 2492 2493 imo = inp_findmoptions(inp); 2494 imo->imo_multicast_ifp = ifp; 2495 imo->imo_multicast_addr.s_addr = INADDR_ANY; 2496 INP_WUNLOCK(inp); 2497 2498 return (0); 2499 } 2500 2501 /* 2502 * Atomically set source filters on a socket for an IPv4 multicast group. 2503 * 2504 * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held. 2505 */ 2506 static int 2507 inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt) 2508 { 2509 struct __msfilterreq msfr; 2510 sockunion_t *gsa; 2511 struct ifnet *ifp; 2512 struct in_mfilter *imf; 2513 struct ip_moptions *imo; 2514 struct in_multi *inm; 2515 size_t idx; 2516 int error; 2517 2518 error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq), 2519 sizeof(struct __msfilterreq)); 2520 if (error) 2521 return (error); 2522 2523 if (msfr.msfr_nsrcs > in_mcast_maxsocksrc) 2524 return (ENOBUFS); 2525 2526 if ((msfr.msfr_fmode != MCAST_EXCLUDE && 2527 msfr.msfr_fmode != MCAST_INCLUDE)) 2528 return (EINVAL); 2529 2530 if (msfr.msfr_group.ss_family != AF_INET || 2531 msfr.msfr_group.ss_len != sizeof(struct sockaddr_in)) 2532 return (EINVAL); 2533 2534 gsa = (sockunion_t *)&msfr.msfr_group; 2535 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2536 return (EINVAL); 2537 2538 gsa->sin.sin_port = 0; /* ignore port */ 2539 2540 if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex) 2541 return (EADDRNOTAVAIL); 2542 2543 ifp = ifnet_byindex(msfr.msfr_ifindex); 2544 if (ifp == NULL) 2545 return (EADDRNOTAVAIL); 2546 2547 /* 2548 * Take the INP write lock. 2549 * Check if this socket is a member of this group. 2550 */ 2551 imo = inp_findmoptions(inp); 2552 idx = imo_match_group(imo, ifp, &gsa->sa); 2553 if (idx == -1 || imo->imo_mfilters == NULL) { 2554 error = EADDRNOTAVAIL; 2555 goto out_inp_locked; 2556 } 2557 inm = imo->imo_membership[idx]; 2558 imf = &imo->imo_mfilters[idx]; 2559 2560 /* 2561 * Begin state merge transaction at socket layer. 2562 */ 2563 INP_WLOCK_ASSERT(inp); 2564 2565 imf->imf_st[1] = msfr.msfr_fmode; 2566 2567 /* 2568 * Apply any new source filters, if present. 2569 * Make a copy of the user-space source vector so 2570 * that we may copy them with a single copyin. This 2571 * allows us to deal with page faults up-front. 2572 */ 2573 if (msfr.msfr_nsrcs > 0) { 2574 struct in_msource *lims; 2575 struct sockaddr_in *psin; 2576 struct sockaddr_storage *kss, *pkss; 2577 int i; 2578 2579 INP_WUNLOCK(inp); 2580 2581 CTR2(KTR_IGMPV3, "%s: loading %lu source list entries", 2582 __func__, (unsigned long)msfr.msfr_nsrcs); 2583 kss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs, 2584 M_TEMP, M_WAITOK); 2585 error = copyin(msfr.msfr_srcs, kss, 2586 sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs); 2587 if (error) { 2588 free(kss, M_TEMP); 2589 return (error); 2590 } 2591 2592 INP_WLOCK(inp); 2593 2594 /* 2595 * Mark all source filters as UNDEFINED at t1. 2596 * Restore new group filter mode, as imf_leave() 2597 * will set it to INCLUDE. 2598 */ 2599 imf_leave(imf); 2600 imf->imf_st[1] = msfr.msfr_fmode; 2601 2602 /* 2603 * Update socket layer filters at t1, lazy-allocating 2604 * new entries. This saves a bunch of memory at the 2605 * cost of one RB_FIND() per source entry; duplicate 2606 * entries in the msfr_nsrcs vector are ignored. 2607 * If we encounter an error, rollback transaction. 2608 * 2609 * XXX This too could be replaced with a set-symmetric 2610 * difference like loop to avoid walking from root 2611 * every time, as the key space is common. 2612 */ 2613 for (i = 0, pkss = kss; i < msfr.msfr_nsrcs; i++, pkss++) { 2614 psin = (struct sockaddr_in *)pkss; 2615 if (psin->sin_family != AF_INET) { 2616 error = EAFNOSUPPORT; 2617 break; 2618 } 2619 if (psin->sin_len != sizeof(struct sockaddr_in)) { 2620 error = EINVAL; 2621 break; 2622 } 2623 error = imf_get_source(imf, psin, &lims); 2624 if (error) 2625 break; 2626 lims->imsl_st[1] = imf->imf_st[1]; 2627 } 2628 free(kss, M_TEMP); 2629 } 2630 2631 if (error) 2632 goto out_imf_rollback; 2633 2634 INP_WLOCK_ASSERT(inp); 2635 IN_MULTI_LOCK(); 2636 2637 /* 2638 * Begin state merge transaction at IGMP layer. 2639 */ 2640 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 2641 error = inm_merge(inm, imf); 2642 if (error) { 2643 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__); 2644 goto out_imf_rollback; 2645 } 2646 2647 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 2648 error = igmp_change_state(inm); 2649 if (error) 2650 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__); 2651 2652 IN_MULTI_UNLOCK(); 2653 2654 out_imf_rollback: 2655 if (error) 2656 imf_rollback(imf); 2657 else 2658 imf_commit(imf); 2659 2660 imf_reap(imf); 2661 2662 out_inp_locked: 2663 INP_WUNLOCK(inp); 2664 return (error); 2665 } 2666 2667 /* 2668 * Set the IP multicast options in response to user setsockopt(). 2669 * 2670 * Many of the socket options handled in this function duplicate the 2671 * functionality of socket options in the regular unicast API. However, 2672 * it is not possible to merge the duplicate code, because the idempotence 2673 * of the IPv4 multicast part of the BSD Sockets API must be preserved; 2674 * the effects of these options must be treated as separate and distinct. 2675 * 2676 * SMPng: XXX: Unlocked read of inp_socket believed OK. 2677 * FUTURE: The IP_MULTICAST_VIF option may be eliminated if MROUTING 2678 * is refactored to no longer use vifs. 2679 */ 2680 int 2681 inp_setmoptions(struct inpcb *inp, struct sockopt *sopt) 2682 { 2683 struct ip_moptions *imo; 2684 int error; 2685 2686 error = 0; 2687 2688 /* 2689 * If socket is neither of type SOCK_RAW or SOCK_DGRAM, 2690 * or is a divert socket, reject it. 2691 */ 2692 if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT || 2693 (inp->inp_socket->so_proto->pr_type != SOCK_RAW && 2694 inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) 2695 return (EOPNOTSUPP); 2696 2697 switch (sopt->sopt_name) { 2698 case IP_MULTICAST_VIF: { 2699 int vifi; 2700 /* 2701 * Select a multicast VIF for transmission. 2702 * Only useful if multicast forwarding is active. 2703 */ 2704 if (legal_vif_num == NULL) { 2705 error = EOPNOTSUPP; 2706 break; 2707 } 2708 error = sooptcopyin(sopt, &vifi, sizeof(int), sizeof(int)); 2709 if (error) 2710 break; 2711 if (!legal_vif_num(vifi) && (vifi != -1)) { 2712 error = EINVAL; 2713 break; 2714 } 2715 imo = inp_findmoptions(inp); 2716 imo->imo_multicast_vif = vifi; 2717 INP_WUNLOCK(inp); 2718 break; 2719 } 2720 2721 case IP_MULTICAST_IF: 2722 error = inp_set_multicast_if(inp, sopt); 2723 break; 2724 2725 case IP_MULTICAST_TTL: { 2726 u_char ttl; 2727 2728 /* 2729 * Set the IP time-to-live for outgoing multicast packets. 2730 * The original multicast API required a char argument, 2731 * which is inconsistent with the rest of the socket API. 2732 * We allow either a char or an int. 2733 */ 2734 if (sopt->sopt_valsize == sizeof(u_char)) { 2735 error = sooptcopyin(sopt, &ttl, sizeof(u_char), 2736 sizeof(u_char)); 2737 if (error) 2738 break; 2739 } else { 2740 u_int ittl; 2741 2742 error = sooptcopyin(sopt, &ittl, sizeof(u_int), 2743 sizeof(u_int)); 2744 if (error) 2745 break; 2746 if (ittl > 255) { 2747 error = EINVAL; 2748 break; 2749 } 2750 ttl = (u_char)ittl; 2751 } 2752 imo = inp_findmoptions(inp); 2753 imo->imo_multicast_ttl = ttl; 2754 INP_WUNLOCK(inp); 2755 break; 2756 } 2757 2758 case IP_MULTICAST_LOOP: { 2759 u_char loop; 2760 2761 /* 2762 * Set the loopback flag for outgoing multicast packets. 2763 * Must be zero or one. The original multicast API required a 2764 * char argument, which is inconsistent with the rest 2765 * of the socket API. We allow either a char or an int. 2766 */ 2767 if (sopt->sopt_valsize == sizeof(u_char)) { 2768 error = sooptcopyin(sopt, &loop, sizeof(u_char), 2769 sizeof(u_char)); 2770 if (error) 2771 break; 2772 } else { 2773 u_int iloop; 2774 2775 error = sooptcopyin(sopt, &iloop, sizeof(u_int), 2776 sizeof(u_int)); 2777 if (error) 2778 break; 2779 loop = (u_char)iloop; 2780 } 2781 imo = inp_findmoptions(inp); 2782 imo->imo_multicast_loop = !!loop; 2783 INP_WUNLOCK(inp); 2784 break; 2785 } 2786 2787 case IP_ADD_MEMBERSHIP: 2788 case IP_ADD_SOURCE_MEMBERSHIP: 2789 case MCAST_JOIN_GROUP: 2790 case MCAST_JOIN_SOURCE_GROUP: 2791 error = inp_join_group(inp, sopt); 2792 break; 2793 2794 case IP_DROP_MEMBERSHIP: 2795 case IP_DROP_SOURCE_MEMBERSHIP: 2796 case MCAST_LEAVE_GROUP: 2797 case MCAST_LEAVE_SOURCE_GROUP: 2798 error = inp_leave_group(inp, sopt); 2799 break; 2800 2801 case IP_BLOCK_SOURCE: 2802 case IP_UNBLOCK_SOURCE: 2803 case MCAST_BLOCK_SOURCE: 2804 case MCAST_UNBLOCK_SOURCE: 2805 error = inp_block_unblock_source(inp, sopt); 2806 break; 2807 2808 case IP_MSFILTER: 2809 error = inp_set_source_filters(inp, sopt); 2810 break; 2811 2812 default: 2813 error = EOPNOTSUPP; 2814 break; 2815 } 2816 2817 INP_UNLOCK_ASSERT(inp); 2818 2819 return (error); 2820 } 2821 2822 /* 2823 * Expose IGMP's multicast filter mode and source list(s) to userland, 2824 * keyed by (ifindex, group). 2825 * The filter mode is written out as a uint32_t, followed by 2826 * 0..n of struct in_addr. 2827 * For use by ifmcstat(8). 2828 * SMPng: NOTE: unlocked read of ifindex space. 2829 */ 2830 static int 2831 sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS) 2832 { 2833 struct in_addr src, group; 2834 struct ifnet *ifp; 2835 struct ifmultiaddr *ifma; 2836 struct in_multi *inm; 2837 struct ip_msource *ims; 2838 int *name; 2839 int retval; 2840 u_int namelen; 2841 uint32_t fmode, ifindex; 2842 2843 name = (int *)arg1; 2844 namelen = arg2; 2845 2846 if (req->newptr != NULL) 2847 return (EPERM); 2848 2849 if (namelen != 2) 2850 return (EINVAL); 2851 2852 ifindex = name[0]; 2853 if (ifindex <= 0 || ifindex > V_if_index) { 2854 CTR2(KTR_IGMPV3, "%s: ifindex %u out of range", 2855 __func__, ifindex); 2856 return (ENOENT); 2857 } 2858 2859 group.s_addr = name[1]; 2860 if (!IN_MULTICAST(ntohl(group.s_addr))) { 2861 CTR2(KTR_IGMPV3, "%s: group %s is not multicast", 2862 __func__, inet_ntoa(group)); 2863 return (EINVAL); 2864 } 2865 2866 ifp = ifnet_byindex(ifindex); 2867 if (ifp == NULL) { 2868 CTR2(KTR_IGMPV3, "%s: no ifp for ifindex %u", 2869 __func__, ifindex); 2870 return (ENOENT); 2871 } 2872 2873 retval = sysctl_wire_old_buffer(req, 2874 sizeof(uint32_t) + (in_mcast_maxgrpsrc * sizeof(struct in_addr))); 2875 if (retval) 2876 return (retval); 2877 2878 IN_MULTI_LOCK(); 2879 2880 IF_ADDR_RLOCK(ifp); 2881 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 2882 if (ifma->ifma_addr->sa_family != AF_INET || 2883 ifma->ifma_protospec == NULL) 2884 continue; 2885 inm = (struct in_multi *)ifma->ifma_protospec; 2886 if (!in_hosteq(inm->inm_addr, group)) 2887 continue; 2888 fmode = inm->inm_st[1].iss_fmode; 2889 retval = SYSCTL_OUT(req, &fmode, sizeof(uint32_t)); 2890 if (retval != 0) 2891 break; 2892 RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) { 2893 #ifdef KTR 2894 struct in_addr ina; 2895 ina.s_addr = htonl(ims->ims_haddr); 2896 CTR2(KTR_IGMPV3, "%s: visit node %s", __func__, 2897 inet_ntoa(ina)); 2898 #endif 2899 /* 2900 * Only copy-out sources which are in-mode. 2901 */ 2902 if (fmode != ims_get_mode(inm, ims, 1)) { 2903 CTR1(KTR_IGMPV3, "%s: skip non-in-mode", 2904 __func__); 2905 continue; 2906 } 2907 src.s_addr = htonl(ims->ims_haddr); 2908 retval = SYSCTL_OUT(req, &src, sizeof(struct in_addr)); 2909 if (retval != 0) 2910 break; 2911 } 2912 } 2913 IF_ADDR_RUNLOCK(ifp); 2914 2915 IN_MULTI_UNLOCK(); 2916 2917 return (retval); 2918 } 2919 2920 #ifdef KTR 2921 2922 static const char *inm_modestrs[] = { "un", "in", "ex" }; 2923 2924 static const char * 2925 inm_mode_str(const int mode) 2926 { 2927 2928 if (mode >= MCAST_UNDEFINED && mode <= MCAST_EXCLUDE) 2929 return (inm_modestrs[mode]); 2930 return ("??"); 2931 } 2932 2933 static const char *inm_statestrs[] = { 2934 "not-member", 2935 "silent", 2936 "idle", 2937 "lazy", 2938 "sleeping", 2939 "awakening", 2940 "query-pending", 2941 "sg-query-pending", 2942 "leaving" 2943 }; 2944 2945 static const char * 2946 inm_state_str(const int state) 2947 { 2948 2949 if (state >= IGMP_NOT_MEMBER && state <= IGMP_LEAVING_MEMBER) 2950 return (inm_statestrs[state]); 2951 return ("??"); 2952 } 2953 2954 /* 2955 * Dump an in_multi structure to the console. 2956 */ 2957 void 2958 inm_print(const struct in_multi *inm) 2959 { 2960 int t; 2961 2962 if ((ktr_mask & KTR_IGMPV3) == 0) 2963 return; 2964 2965 printf("%s: --- begin inm %p ---\n", __func__, inm); 2966 printf("addr %s ifp %p(%s) ifma %p\n", 2967 inet_ntoa(inm->inm_addr), 2968 inm->inm_ifp, 2969 inm->inm_ifp->if_xname, 2970 inm->inm_ifma); 2971 printf("timer %u state %s refcount %u scq.len %u\n", 2972 inm->inm_timer, 2973 inm_state_str(inm->inm_state), 2974 inm->inm_refcount, 2975 inm->inm_scq.ifq_len); 2976 printf("igi %p nsrc %lu sctimer %u scrv %u\n", 2977 inm->inm_igi, 2978 inm->inm_nsrc, 2979 inm->inm_sctimer, 2980 inm->inm_scrv); 2981 for (t = 0; t < 2; t++) { 2982 printf("t%d: fmode %s asm %u ex %u in %u rec %u\n", t, 2983 inm_mode_str(inm->inm_st[t].iss_fmode), 2984 inm->inm_st[t].iss_asm, 2985 inm->inm_st[t].iss_ex, 2986 inm->inm_st[t].iss_in, 2987 inm->inm_st[t].iss_rec); 2988 } 2989 printf("%s: --- end inm %p ---\n", __func__, inm); 2990 } 2991 2992 #else /* !KTR */ 2993 2994 void 2995 inm_print(const struct in_multi *inm) 2996 { 2997 2998 } 2999 3000 #endif /* KTR */ 3001 3002 RB_GENERATE(ip_msource_tree, ip_msource, ims_link, ip_msource_cmp); 3003