1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2007-2009 Bruce Simpson. 5 * Copyright (c) 2005 Robert N. M. Watson. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. The name of the author may not be used to endorse or promote 17 * products derived from this software without specific prior written 18 * permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 /* 34 * IPv4 multicast socket, group, and socket option processing module. 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/kernel.h> 43 #include <sys/lock.h> 44 #include <sys/malloc.h> 45 #include <sys/mbuf.h> 46 #include <sys/protosw.h> 47 #include <sys/rmlock.h> 48 #include <sys/socket.h> 49 #include <sys/socketvar.h> 50 #include <sys/protosw.h> 51 #include <sys/sysctl.h> 52 #include <sys/ktr.h> 53 #include <sys/taskqueue.h> 54 #include <sys/gtaskqueue.h> 55 #include <sys/tree.h> 56 57 #include <net/if.h> 58 #include <net/if_var.h> 59 #include <net/if_dl.h> 60 #include <net/route.h> 61 #include <net/vnet.h> 62 63 #include <net/ethernet.h> 64 65 #include <netinet/in.h> 66 #include <netinet/in_systm.h> 67 #include <netinet/in_fib.h> 68 #include <netinet/in_pcb.h> 69 #include <netinet/in_var.h> 70 #include <netinet/ip_var.h> 71 #include <netinet/igmp_var.h> 72 73 #ifndef KTR_IGMPV3 74 #define KTR_IGMPV3 KTR_INET 75 #endif 76 77 #ifndef __SOCKUNION_DECLARED 78 union sockunion { 79 struct sockaddr_storage ss; 80 struct sockaddr sa; 81 struct sockaddr_dl sdl; 82 struct sockaddr_in sin; 83 }; 84 typedef union sockunion sockunion_t; 85 #define __SOCKUNION_DECLARED 86 #endif /* __SOCKUNION_DECLARED */ 87 88 static MALLOC_DEFINE(M_INMFILTER, "in_mfilter", 89 "IPv4 multicast PCB-layer source filter"); 90 static MALLOC_DEFINE(M_IPMADDR, "in_multi", "IPv4 multicast group"); 91 static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "IPv4 multicast options"); 92 static MALLOC_DEFINE(M_IPMSOURCE, "ip_msource", 93 "IPv4 multicast IGMP-layer source filter"); 94 95 /* 96 * Locking: 97 * - Lock order is: Giant, INP_WLOCK, IN_MULTI_LIST_LOCK, IGMP_LOCK, IF_ADDR_LOCK. 98 * - The IF_ADDR_LOCK is implicitly taken by inm_lookup() earlier, however 99 * it can be taken by code in net/if.c also. 100 * - ip_moptions and in_mfilter are covered by the INP_WLOCK. 101 * 102 * struct in_multi is covered by IN_MULTI_LIST_LOCK. There isn't strictly 103 * any need for in_multi itself to be virtualized -- it is bound to an ifp 104 * anyway no matter what happens. 105 */ 106 struct mtx in_multi_list_mtx; 107 MTX_SYSINIT(in_multi_mtx, &in_multi_list_mtx, "in_multi_list_mtx", MTX_DEF); 108 109 struct mtx in_multi_free_mtx; 110 MTX_SYSINIT(in_multi_free_mtx, &in_multi_free_mtx, "in_multi_free_mtx", MTX_DEF); 111 112 struct sx in_multi_sx; 113 SX_SYSINIT(in_multi_sx, &in_multi_sx, "in_multi_sx"); 114 115 int ifma_restart; 116 117 /* 118 * Functions with non-static linkage defined in this file should be 119 * declared in in_var.h: 120 * imo_multi_filter() 121 * in_addmulti() 122 * in_delmulti() 123 * in_joingroup() 124 * in_joingroup_locked() 125 * in_leavegroup() 126 * in_leavegroup_locked() 127 * and ip_var.h: 128 * inp_freemoptions() 129 * inp_getmoptions() 130 * inp_setmoptions() 131 * 132 * XXX: Both carp and pf need to use the legacy (*,G) KPIs in_addmulti() 133 * and in_delmulti(). 134 */ 135 static void imf_commit(struct in_mfilter *); 136 static int imf_get_source(struct in_mfilter *imf, 137 const struct sockaddr_in *psin, 138 struct in_msource **); 139 static struct in_msource * 140 imf_graft(struct in_mfilter *, const uint8_t, 141 const struct sockaddr_in *); 142 static void imf_leave(struct in_mfilter *); 143 static int imf_prune(struct in_mfilter *, const struct sockaddr_in *); 144 static void imf_purge(struct in_mfilter *); 145 static void imf_rollback(struct in_mfilter *); 146 static void imf_reap(struct in_mfilter *); 147 static int imo_grow(struct ip_moptions *); 148 static size_t imo_match_group(const struct ip_moptions *, 149 const struct ifnet *, const struct sockaddr *); 150 static struct in_msource * 151 imo_match_source(const struct ip_moptions *, const size_t, 152 const struct sockaddr *); 153 static void ims_merge(struct ip_msource *ims, 154 const struct in_msource *lims, const int rollback); 155 static int in_getmulti(struct ifnet *, const struct in_addr *, 156 struct in_multi **); 157 static int inm_get_source(struct in_multi *inm, const in_addr_t haddr, 158 const int noalloc, struct ip_msource **pims); 159 #ifdef KTR 160 static int inm_is_ifp_detached(const struct in_multi *); 161 #endif 162 static int inm_merge(struct in_multi *, /*const*/ struct in_mfilter *); 163 static void inm_purge(struct in_multi *); 164 static void inm_reap(struct in_multi *); 165 static void inm_release(struct in_multi *); 166 static struct ip_moptions * 167 inp_findmoptions(struct inpcb *); 168 static int inp_get_source_filters(struct inpcb *, struct sockopt *); 169 static int inp_join_group(struct inpcb *, struct sockopt *); 170 static int inp_leave_group(struct inpcb *, struct sockopt *); 171 static struct ifnet * 172 inp_lookup_mcast_ifp(const struct inpcb *, 173 const struct sockaddr_in *, const struct in_addr); 174 static int inp_block_unblock_source(struct inpcb *, struct sockopt *); 175 static int inp_set_multicast_if(struct inpcb *, struct sockopt *); 176 static int inp_set_source_filters(struct inpcb *, struct sockopt *); 177 static int sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS); 178 179 static SYSCTL_NODE(_net_inet_ip, OID_AUTO, mcast, CTLFLAG_RW, 0, 180 "IPv4 multicast"); 181 182 static u_long in_mcast_maxgrpsrc = IP_MAX_GROUP_SRC_FILTER; 183 SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxgrpsrc, 184 CTLFLAG_RWTUN, &in_mcast_maxgrpsrc, 0, 185 "Max source filters per group"); 186 187 static u_long in_mcast_maxsocksrc = IP_MAX_SOCK_SRC_FILTER; 188 SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxsocksrc, 189 CTLFLAG_RWTUN, &in_mcast_maxsocksrc, 0, 190 "Max source filters per socket"); 191 192 int in_mcast_loop = IP_DEFAULT_MULTICAST_LOOP; 193 SYSCTL_INT(_net_inet_ip_mcast, OID_AUTO, loop, CTLFLAG_RWTUN, 194 &in_mcast_loop, 0, "Loopback multicast datagrams by default"); 195 196 static SYSCTL_NODE(_net_inet_ip_mcast, OID_AUTO, filters, 197 CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_ip_mcast_filters, 198 "Per-interface stack-wide source filters"); 199 200 #ifdef KTR 201 /* 202 * Inline function which wraps assertions for a valid ifp. 203 * The ifnet layer will set the ifma's ifp pointer to NULL if the ifp 204 * is detached. 205 */ 206 static int __inline 207 inm_is_ifp_detached(const struct in_multi *inm) 208 { 209 struct ifnet *ifp; 210 211 KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__)); 212 ifp = inm->inm_ifma->ifma_ifp; 213 if (ifp != NULL) { 214 /* 215 * Sanity check that netinet's notion of ifp is the 216 * same as net's. 217 */ 218 KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__)); 219 } 220 221 return (ifp == NULL); 222 } 223 #endif 224 225 static struct grouptask free_gtask; 226 static struct in_multi_head inm_free_list; 227 static void inm_release_task(void *arg __unused); 228 static void inm_init(void) 229 { 230 SLIST_INIT(&inm_free_list); 231 taskqgroup_config_gtask_init(NULL, &free_gtask, inm_release_task, "inm release task"); 232 } 233 234 SYSINIT(inm_init, SI_SUB_SMP + 1, SI_ORDER_FIRST, 235 inm_init, NULL); 236 237 238 void 239 inm_release_list_deferred(struct in_multi_head *inmh) 240 { 241 242 if (SLIST_EMPTY(inmh)) 243 return; 244 mtx_lock(&in_multi_free_mtx); 245 SLIST_CONCAT(&inm_free_list, inmh, in_multi, inm_nrele); 246 mtx_unlock(&in_multi_free_mtx); 247 GROUPTASK_ENQUEUE(&free_gtask); 248 } 249 250 void 251 inm_disconnect(struct in_multi *inm) 252 { 253 struct ifnet *ifp; 254 struct ifmultiaddr *ifma, *ll_ifma; 255 256 ifp = inm->inm_ifp; 257 IF_ADDR_WLOCK_ASSERT(ifp); 258 ifma = inm->inm_ifma; 259 260 if_ref(ifp); 261 CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifmultiaddr, ifma_link); 262 MCDPRINTF("removed ifma: %p from %s\n", ifma, ifp->if_xname); 263 if ((ll_ifma = ifma->ifma_llifma) != NULL) { 264 MPASS(ifma != ll_ifma); 265 ifma->ifma_llifma = NULL; 266 MPASS(ll_ifma->ifma_llifma == NULL); 267 MPASS(ll_ifma->ifma_ifp == ifp); 268 if (--ll_ifma->ifma_refcount == 0) { 269 CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma, ifmultiaddr, ifma_link); 270 MCDPRINTF("removed ll_ifma: %p from %s\n", ll_ifma, ifp->if_xname); 271 if_freemulti(ll_ifma); 272 ifma_restart = true; 273 } 274 } 275 } 276 277 void 278 inm_release_deferred(struct in_multi *inm) 279 { 280 struct in_multi_head tmp; 281 282 IN_MULTI_LIST_LOCK_ASSERT(); 283 MPASS(inm->inm_refcount > 0); 284 if (--inm->inm_refcount == 0) { 285 SLIST_INIT(&tmp); 286 inm_disconnect(inm); 287 inm->inm_ifma->ifma_protospec = NULL; 288 SLIST_INSERT_HEAD(&tmp, inm, inm_nrele); 289 inm_release_list_deferred(&tmp); 290 } 291 } 292 293 static void 294 inm_release_task(void *arg __unused) 295 { 296 struct in_multi_head inm_free_tmp; 297 struct in_multi *inm, *tinm; 298 299 SLIST_INIT(&inm_free_tmp); 300 mtx_lock(&in_multi_free_mtx); 301 SLIST_CONCAT(&inm_free_tmp, &inm_free_list, in_multi, inm_nrele); 302 mtx_unlock(&in_multi_free_mtx); 303 IN_MULTI_LOCK(); 304 SLIST_FOREACH_SAFE(inm, &inm_free_tmp, inm_nrele, tinm) { 305 SLIST_REMOVE_HEAD(&inm_free_tmp, inm_nrele); 306 MPASS(inm); 307 inm_release(inm); 308 } 309 IN_MULTI_UNLOCK(); 310 } 311 312 /* 313 * Initialize an in_mfilter structure to a known state at t0, t1 314 * with an empty source filter list. 315 */ 316 static __inline void 317 imf_init(struct in_mfilter *imf, const int st0, const int st1) 318 { 319 memset(imf, 0, sizeof(struct in_mfilter)); 320 RB_INIT(&imf->imf_sources); 321 imf->imf_st[0] = st0; 322 imf->imf_st[1] = st1; 323 } 324 325 /* 326 * Function for looking up an in_multi record for an IPv4 multicast address 327 * on a given interface. ifp must be valid. If no record found, return NULL. 328 * The IN_MULTI_LIST_LOCK and IF_ADDR_LOCK on ifp must be held. 329 */ 330 struct in_multi * 331 inm_lookup_locked(struct ifnet *ifp, const struct in_addr ina) 332 { 333 struct ifmultiaddr *ifma; 334 struct in_multi *inm; 335 336 IN_MULTI_LIST_LOCK_ASSERT(); 337 IF_ADDR_LOCK_ASSERT(ifp); 338 339 inm = NULL; 340 CK_STAILQ_FOREACH(ifma, &((ifp)->if_multiaddrs), ifma_link) { 341 if (ifma->ifma_addr->sa_family != AF_INET || 342 ifma->ifma_protospec == NULL) 343 continue; 344 inm = (struct in_multi *)ifma->ifma_protospec; 345 if (inm->inm_addr.s_addr == ina.s_addr) 346 break; 347 inm = NULL; 348 } 349 return (inm); 350 } 351 352 /* 353 * Wrapper for inm_lookup_locked(). 354 * The IF_ADDR_LOCK will be taken on ifp and released on return. 355 */ 356 struct in_multi * 357 inm_lookup(struct ifnet *ifp, const struct in_addr ina) 358 { 359 struct in_multi *inm; 360 361 IN_MULTI_LIST_LOCK_ASSERT(); 362 IF_ADDR_RLOCK(ifp); 363 inm = inm_lookup_locked(ifp, ina); 364 IF_ADDR_RUNLOCK(ifp); 365 366 return (inm); 367 } 368 369 /* 370 * Resize the ip_moptions vector to the next power-of-two minus 1. 371 * May be called with locks held; do not sleep. 372 */ 373 static int 374 imo_grow(struct ip_moptions *imo) 375 { 376 struct in_multi **nmships; 377 struct in_multi **omships; 378 struct in_mfilter *nmfilters; 379 struct in_mfilter *omfilters; 380 size_t idx; 381 size_t newmax; 382 size_t oldmax; 383 384 nmships = NULL; 385 nmfilters = NULL; 386 omships = imo->imo_membership; 387 omfilters = imo->imo_mfilters; 388 oldmax = imo->imo_max_memberships; 389 newmax = ((oldmax + 1) * 2) - 1; 390 391 if (newmax <= IP_MAX_MEMBERSHIPS) { 392 nmships = (struct in_multi **)realloc(omships, 393 sizeof(struct in_multi *) * newmax, M_IPMOPTS, M_NOWAIT); 394 nmfilters = (struct in_mfilter *)realloc(omfilters, 395 sizeof(struct in_mfilter) * newmax, M_INMFILTER, M_NOWAIT); 396 if (nmships != NULL && nmfilters != NULL) { 397 /* Initialize newly allocated source filter heads. */ 398 for (idx = oldmax; idx < newmax; idx++) { 399 imf_init(&nmfilters[idx], MCAST_UNDEFINED, 400 MCAST_EXCLUDE); 401 } 402 imo->imo_max_memberships = newmax; 403 imo->imo_membership = nmships; 404 imo->imo_mfilters = nmfilters; 405 } 406 } 407 408 if (nmships == NULL || nmfilters == NULL) { 409 if (nmships != NULL) 410 free(nmships, M_IPMOPTS); 411 if (nmfilters != NULL) 412 free(nmfilters, M_INMFILTER); 413 return (ETOOMANYREFS); 414 } 415 416 return (0); 417 } 418 419 /* 420 * Find an IPv4 multicast group entry for this ip_moptions instance 421 * which matches the specified group, and optionally an interface. 422 * Return its index into the array, or -1 if not found. 423 */ 424 static size_t 425 imo_match_group(const struct ip_moptions *imo, const struct ifnet *ifp, 426 const struct sockaddr *group) 427 { 428 const struct sockaddr_in *gsin; 429 struct in_multi **pinm; 430 int idx; 431 int nmships; 432 433 gsin = (const struct sockaddr_in *)group; 434 435 /* The imo_membership array may be lazy allocated. */ 436 if (imo->imo_membership == NULL || imo->imo_num_memberships == 0) 437 return (-1); 438 439 nmships = imo->imo_num_memberships; 440 pinm = &imo->imo_membership[0]; 441 for (idx = 0; idx < nmships; idx++, pinm++) { 442 if (*pinm == NULL) 443 continue; 444 if ((ifp == NULL || ((*pinm)->inm_ifp == ifp)) && 445 in_hosteq((*pinm)->inm_addr, gsin->sin_addr)) { 446 break; 447 } 448 } 449 if (idx >= nmships) 450 idx = -1; 451 452 return (idx); 453 } 454 455 /* 456 * Find an IPv4 multicast source entry for this imo which matches 457 * the given group index for this socket, and source address. 458 * 459 * NOTE: This does not check if the entry is in-mode, merely if 460 * it exists, which may not be the desired behaviour. 461 */ 462 static struct in_msource * 463 imo_match_source(const struct ip_moptions *imo, const size_t gidx, 464 const struct sockaddr *src) 465 { 466 struct ip_msource find; 467 struct in_mfilter *imf; 468 struct ip_msource *ims; 469 const sockunion_t *psa; 470 471 KASSERT(src->sa_family == AF_INET, ("%s: !AF_INET", __func__)); 472 KASSERT(gidx != -1 && gidx < imo->imo_num_memberships, 473 ("%s: invalid index %d\n", __func__, (int)gidx)); 474 475 /* The imo_mfilters array may be lazy allocated. */ 476 if (imo->imo_mfilters == NULL) 477 return (NULL); 478 imf = &imo->imo_mfilters[gidx]; 479 480 /* Source trees are keyed in host byte order. */ 481 psa = (const sockunion_t *)src; 482 find.ims_haddr = ntohl(psa->sin.sin_addr.s_addr); 483 ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find); 484 485 return ((struct in_msource *)ims); 486 } 487 488 /* 489 * Perform filtering for multicast datagrams on a socket by group and source. 490 * 491 * Returns 0 if a datagram should be allowed through, or various error codes 492 * if the socket was not a member of the group, or the source was muted, etc. 493 */ 494 int 495 imo_multi_filter(const struct ip_moptions *imo, const struct ifnet *ifp, 496 const struct sockaddr *group, const struct sockaddr *src) 497 { 498 size_t gidx; 499 struct in_msource *ims; 500 int mode; 501 502 KASSERT(ifp != NULL, ("%s: null ifp", __func__)); 503 504 gidx = imo_match_group(imo, ifp, group); 505 if (gidx == -1) 506 return (MCAST_NOTGMEMBER); 507 508 /* 509 * Check if the source was included in an (S,G) join. 510 * Allow reception on exclusive memberships by default, 511 * reject reception on inclusive memberships by default. 512 * Exclude source only if an in-mode exclude filter exists. 513 * Include source only if an in-mode include filter exists. 514 * NOTE: We are comparing group state here at IGMP t1 (now) 515 * with socket-layer t0 (since last downcall). 516 */ 517 mode = imo->imo_mfilters[gidx].imf_st[1]; 518 ims = imo_match_source(imo, gidx, src); 519 520 if ((ims == NULL && mode == MCAST_INCLUDE) || 521 (ims != NULL && ims->imsl_st[0] != mode)) 522 return (MCAST_NOTSMEMBER); 523 524 return (MCAST_PASS); 525 } 526 527 /* 528 * Find and return a reference to an in_multi record for (ifp, group), 529 * and bump its reference count. 530 * If one does not exist, try to allocate it, and update link-layer multicast 531 * filters on ifp to listen for group. 532 * Assumes the IN_MULTI lock is held across the call. 533 * Return 0 if successful, otherwise return an appropriate error code. 534 */ 535 static int 536 in_getmulti(struct ifnet *ifp, const struct in_addr *group, 537 struct in_multi **pinm) 538 { 539 struct sockaddr_in gsin; 540 struct ifmultiaddr *ifma; 541 struct in_ifinfo *ii; 542 struct in_multi *inm; 543 int error; 544 545 IN_MULTI_LOCK_ASSERT(); 546 547 ii = (struct in_ifinfo *)ifp->if_afdata[AF_INET]; 548 IN_MULTI_LIST_LOCK(); 549 inm = inm_lookup(ifp, *group); 550 if (inm != NULL) { 551 /* 552 * If we already joined this group, just bump the 553 * refcount and return it. 554 */ 555 KASSERT(inm->inm_refcount >= 1, 556 ("%s: bad refcount %d", __func__, inm->inm_refcount)); 557 inm_acquire_locked(inm); 558 *pinm = inm; 559 } 560 IN_MULTI_LIST_UNLOCK(); 561 if (inm != NULL) 562 return (0); 563 564 memset(&gsin, 0, sizeof(gsin)); 565 gsin.sin_family = AF_INET; 566 gsin.sin_len = sizeof(struct sockaddr_in); 567 gsin.sin_addr = *group; 568 569 /* 570 * Check if a link-layer group is already associated 571 * with this network-layer group on the given ifnet. 572 */ 573 error = if_addmulti(ifp, (struct sockaddr *)&gsin, &ifma); 574 if (error != 0) 575 return (error); 576 577 /* XXX ifma_protospec must be covered by IF_ADDR_LOCK */ 578 IN_MULTI_LIST_LOCK(); 579 IF_ADDR_WLOCK(ifp); 580 581 /* 582 * If something other than netinet is occupying the link-layer 583 * group, print a meaningful error message and back out of 584 * the allocation. 585 * Otherwise, bump the refcount on the existing network-layer 586 * group association and return it. 587 */ 588 if (ifma->ifma_protospec != NULL) { 589 inm = (struct in_multi *)ifma->ifma_protospec; 590 #ifdef INVARIANTS 591 KASSERT(ifma->ifma_addr != NULL, ("%s: no ifma_addr", 592 __func__)); 593 KASSERT(ifma->ifma_addr->sa_family == AF_INET, 594 ("%s: ifma not AF_INET", __func__)); 595 KASSERT(inm != NULL, ("%s: no ifma_protospec", __func__)); 596 if (inm->inm_ifma != ifma || inm->inm_ifp != ifp || 597 !in_hosteq(inm->inm_addr, *group)) { 598 char addrbuf[INET_ADDRSTRLEN]; 599 600 panic("%s: ifma %p is inconsistent with %p (%s)", 601 __func__, ifma, inm, inet_ntoa_r(*group, addrbuf)); 602 } 603 #endif 604 inm_acquire_locked(inm); 605 *pinm = inm; 606 goto out_locked; 607 } 608 609 IF_ADDR_WLOCK_ASSERT(ifp); 610 611 /* 612 * A new in_multi record is needed; allocate and initialize it. 613 * We DO NOT perform an IGMP join as the in_ layer may need to 614 * push an initial source list down to IGMP to support SSM. 615 * 616 * The initial source filter state is INCLUDE, {} as per the RFC. 617 */ 618 inm = malloc(sizeof(*inm), M_IPMADDR, M_NOWAIT | M_ZERO); 619 if (inm == NULL) { 620 IF_ADDR_WUNLOCK(ifp); 621 IN_MULTI_LIST_UNLOCK(); 622 if_delmulti_ifma(ifma); 623 return (ENOMEM); 624 } 625 inm->inm_addr = *group; 626 inm->inm_ifp = ifp; 627 inm->inm_igi = ii->ii_igmp; 628 inm->inm_ifma = ifma; 629 inm->inm_refcount = 1; 630 inm->inm_state = IGMP_NOT_MEMBER; 631 mbufq_init(&inm->inm_scq, IGMP_MAX_STATE_CHANGES); 632 inm->inm_st[0].iss_fmode = MCAST_UNDEFINED; 633 inm->inm_st[1].iss_fmode = MCAST_UNDEFINED; 634 RB_INIT(&inm->inm_srcs); 635 636 ifma->ifma_protospec = inm; 637 638 *pinm = inm; 639 out_locked: 640 IF_ADDR_WUNLOCK(ifp); 641 IN_MULTI_LIST_UNLOCK(); 642 return (0); 643 } 644 645 /* 646 * Drop a reference to an in_multi record. 647 * 648 * If the refcount drops to 0, free the in_multi record and 649 * delete the underlying link-layer membership. 650 */ 651 static void 652 inm_release(struct in_multi *inm) 653 { 654 struct ifmultiaddr *ifma; 655 struct ifnet *ifp; 656 657 CTR2(KTR_IGMPV3, "%s: refcount is %d", __func__, inm->inm_refcount); 658 MPASS(inm->inm_refcount == 0); 659 CTR2(KTR_IGMPV3, "%s: freeing inm %p", __func__, inm); 660 661 ifma = inm->inm_ifma; 662 ifp = inm->inm_ifp; 663 664 /* XXX this access is not covered by IF_ADDR_LOCK */ 665 CTR2(KTR_IGMPV3, "%s: purging ifma %p", __func__, ifma); 666 if (ifp) 667 CURVNET_SET(ifp->if_vnet); 668 inm_purge(inm); 669 free(inm, M_IPMADDR); 670 671 if_delmulti_ifma_flags(ifma, 1); 672 if (ifp) { 673 CURVNET_RESTORE(); 674 if_rele(ifp); 675 } 676 } 677 678 /* 679 * Clear recorded source entries for a group. 680 * Used by the IGMP code. Caller must hold the IN_MULTI lock. 681 * FIXME: Should reap. 682 */ 683 void 684 inm_clear_recorded(struct in_multi *inm) 685 { 686 struct ip_msource *ims; 687 688 IN_MULTI_LIST_LOCK_ASSERT(); 689 690 RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) { 691 if (ims->ims_stp) { 692 ims->ims_stp = 0; 693 --inm->inm_st[1].iss_rec; 694 } 695 } 696 KASSERT(inm->inm_st[1].iss_rec == 0, 697 ("%s: iss_rec %d not 0", __func__, inm->inm_st[1].iss_rec)); 698 } 699 700 /* 701 * Record a source as pending for a Source-Group IGMPv3 query. 702 * This lives here as it modifies the shared tree. 703 * 704 * inm is the group descriptor. 705 * naddr is the address of the source to record in network-byte order. 706 * 707 * If the net.inet.igmp.sgalloc sysctl is non-zero, we will 708 * lazy-allocate a source node in response to an SG query. 709 * Otherwise, no allocation is performed. This saves some memory 710 * with the trade-off that the source will not be reported to the 711 * router if joined in the window between the query response and 712 * the group actually being joined on the local host. 713 * 714 * VIMAGE: XXX: Currently the igmp_sgalloc feature has been removed. 715 * This turns off the allocation of a recorded source entry if 716 * the group has not been joined. 717 * 718 * Return 0 if the source didn't exist or was already marked as recorded. 719 * Return 1 if the source was marked as recorded by this function. 720 * Return <0 if any error occurred (negated errno code). 721 */ 722 int 723 inm_record_source(struct in_multi *inm, const in_addr_t naddr) 724 { 725 struct ip_msource find; 726 struct ip_msource *ims, *nims; 727 728 IN_MULTI_LIST_LOCK_ASSERT(); 729 730 find.ims_haddr = ntohl(naddr); 731 ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find); 732 if (ims && ims->ims_stp) 733 return (0); 734 if (ims == NULL) { 735 if (inm->inm_nsrc == in_mcast_maxgrpsrc) 736 return (-ENOSPC); 737 nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE, 738 M_NOWAIT | M_ZERO); 739 if (nims == NULL) 740 return (-ENOMEM); 741 nims->ims_haddr = find.ims_haddr; 742 RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims); 743 ++inm->inm_nsrc; 744 ims = nims; 745 } 746 747 /* 748 * Mark the source as recorded and update the recorded 749 * source count. 750 */ 751 ++ims->ims_stp; 752 ++inm->inm_st[1].iss_rec; 753 754 return (1); 755 } 756 757 /* 758 * Return a pointer to an in_msource owned by an in_mfilter, 759 * given its source address. 760 * Lazy-allocate if needed. If this is a new entry its filter state is 761 * undefined at t0. 762 * 763 * imf is the filter set being modified. 764 * haddr is the source address in *host* byte-order. 765 * 766 * SMPng: May be called with locks held; malloc must not block. 767 */ 768 static int 769 imf_get_source(struct in_mfilter *imf, const struct sockaddr_in *psin, 770 struct in_msource **plims) 771 { 772 struct ip_msource find; 773 struct ip_msource *ims, *nims; 774 struct in_msource *lims; 775 int error; 776 777 error = 0; 778 ims = NULL; 779 lims = NULL; 780 781 /* key is host byte order */ 782 find.ims_haddr = ntohl(psin->sin_addr.s_addr); 783 ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find); 784 lims = (struct in_msource *)ims; 785 if (lims == NULL) { 786 if (imf->imf_nsrc == in_mcast_maxsocksrc) 787 return (ENOSPC); 788 nims = malloc(sizeof(struct in_msource), M_INMFILTER, 789 M_NOWAIT | M_ZERO); 790 if (nims == NULL) 791 return (ENOMEM); 792 lims = (struct in_msource *)nims; 793 lims->ims_haddr = find.ims_haddr; 794 lims->imsl_st[0] = MCAST_UNDEFINED; 795 RB_INSERT(ip_msource_tree, &imf->imf_sources, nims); 796 ++imf->imf_nsrc; 797 } 798 799 *plims = lims; 800 801 return (error); 802 } 803 804 /* 805 * Graft a source entry into an existing socket-layer filter set, 806 * maintaining any required invariants and checking allocations. 807 * 808 * The source is marked as being in the new filter mode at t1. 809 * 810 * Return the pointer to the new node, otherwise return NULL. 811 */ 812 static struct in_msource * 813 imf_graft(struct in_mfilter *imf, const uint8_t st1, 814 const struct sockaddr_in *psin) 815 { 816 struct ip_msource *nims; 817 struct in_msource *lims; 818 819 nims = malloc(sizeof(struct in_msource), M_INMFILTER, 820 M_NOWAIT | M_ZERO); 821 if (nims == NULL) 822 return (NULL); 823 lims = (struct in_msource *)nims; 824 lims->ims_haddr = ntohl(psin->sin_addr.s_addr); 825 lims->imsl_st[0] = MCAST_UNDEFINED; 826 lims->imsl_st[1] = st1; 827 RB_INSERT(ip_msource_tree, &imf->imf_sources, nims); 828 ++imf->imf_nsrc; 829 830 return (lims); 831 } 832 833 /* 834 * Prune a source entry from an existing socket-layer filter set, 835 * maintaining any required invariants and checking allocations. 836 * 837 * The source is marked as being left at t1, it is not freed. 838 * 839 * Return 0 if no error occurred, otherwise return an errno value. 840 */ 841 static int 842 imf_prune(struct in_mfilter *imf, const struct sockaddr_in *psin) 843 { 844 struct ip_msource find; 845 struct ip_msource *ims; 846 struct in_msource *lims; 847 848 /* key is host byte order */ 849 find.ims_haddr = ntohl(psin->sin_addr.s_addr); 850 ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find); 851 if (ims == NULL) 852 return (ENOENT); 853 lims = (struct in_msource *)ims; 854 lims->imsl_st[1] = MCAST_UNDEFINED; 855 return (0); 856 } 857 858 /* 859 * Revert socket-layer filter set deltas at t1 to t0 state. 860 */ 861 static void 862 imf_rollback(struct in_mfilter *imf) 863 { 864 struct ip_msource *ims, *tims; 865 struct in_msource *lims; 866 867 RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) { 868 lims = (struct in_msource *)ims; 869 if (lims->imsl_st[0] == lims->imsl_st[1]) { 870 /* no change at t1 */ 871 continue; 872 } else if (lims->imsl_st[0] != MCAST_UNDEFINED) { 873 /* revert change to existing source at t1 */ 874 lims->imsl_st[1] = lims->imsl_st[0]; 875 } else { 876 /* revert source added t1 */ 877 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 878 RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims); 879 free(ims, M_INMFILTER); 880 imf->imf_nsrc--; 881 } 882 } 883 imf->imf_st[1] = imf->imf_st[0]; 884 } 885 886 /* 887 * Mark socket-layer filter set as INCLUDE {} at t1. 888 */ 889 static void 890 imf_leave(struct in_mfilter *imf) 891 { 892 struct ip_msource *ims; 893 struct in_msource *lims; 894 895 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 896 lims = (struct in_msource *)ims; 897 lims->imsl_st[1] = MCAST_UNDEFINED; 898 } 899 imf->imf_st[1] = MCAST_INCLUDE; 900 } 901 902 /* 903 * Mark socket-layer filter set deltas as committed. 904 */ 905 static void 906 imf_commit(struct in_mfilter *imf) 907 { 908 struct ip_msource *ims; 909 struct in_msource *lims; 910 911 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 912 lims = (struct in_msource *)ims; 913 lims->imsl_st[0] = lims->imsl_st[1]; 914 } 915 imf->imf_st[0] = imf->imf_st[1]; 916 } 917 918 /* 919 * Reap unreferenced sources from socket-layer filter set. 920 */ 921 static void 922 imf_reap(struct in_mfilter *imf) 923 { 924 struct ip_msource *ims, *tims; 925 struct in_msource *lims; 926 927 RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) { 928 lims = (struct in_msource *)ims; 929 if ((lims->imsl_st[0] == MCAST_UNDEFINED) && 930 (lims->imsl_st[1] == MCAST_UNDEFINED)) { 931 CTR2(KTR_IGMPV3, "%s: free lims %p", __func__, ims); 932 RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims); 933 free(ims, M_INMFILTER); 934 imf->imf_nsrc--; 935 } 936 } 937 } 938 939 /* 940 * Purge socket-layer filter set. 941 */ 942 static void 943 imf_purge(struct in_mfilter *imf) 944 { 945 struct ip_msource *ims, *tims; 946 947 RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) { 948 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 949 RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims); 950 free(ims, M_INMFILTER); 951 imf->imf_nsrc--; 952 } 953 imf->imf_st[0] = imf->imf_st[1] = MCAST_UNDEFINED; 954 KASSERT(RB_EMPTY(&imf->imf_sources), 955 ("%s: imf_sources not empty", __func__)); 956 } 957 958 /* 959 * Look up a source filter entry for a multicast group. 960 * 961 * inm is the group descriptor to work with. 962 * haddr is the host-byte-order IPv4 address to look up. 963 * noalloc may be non-zero to suppress allocation of sources. 964 * *pims will be set to the address of the retrieved or allocated source. 965 * 966 * SMPng: NOTE: may be called with locks held. 967 * Return 0 if successful, otherwise return a non-zero error code. 968 */ 969 static int 970 inm_get_source(struct in_multi *inm, const in_addr_t haddr, 971 const int noalloc, struct ip_msource **pims) 972 { 973 struct ip_msource find; 974 struct ip_msource *ims, *nims; 975 976 find.ims_haddr = haddr; 977 ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find); 978 if (ims == NULL && !noalloc) { 979 if (inm->inm_nsrc == in_mcast_maxgrpsrc) 980 return (ENOSPC); 981 nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE, 982 M_NOWAIT | M_ZERO); 983 if (nims == NULL) 984 return (ENOMEM); 985 nims->ims_haddr = haddr; 986 RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims); 987 ++inm->inm_nsrc; 988 ims = nims; 989 #ifdef KTR 990 CTR3(KTR_IGMPV3, "%s: allocated 0x%08x as %p", __func__, 991 haddr, ims); 992 #endif 993 } 994 995 *pims = ims; 996 return (0); 997 } 998 999 /* 1000 * Merge socket-layer source into IGMP-layer source. 1001 * If rollback is non-zero, perform the inverse of the merge. 1002 */ 1003 static void 1004 ims_merge(struct ip_msource *ims, const struct in_msource *lims, 1005 const int rollback) 1006 { 1007 int n = rollback ? -1 : 1; 1008 1009 if (lims->imsl_st[0] == MCAST_EXCLUDE) { 1010 CTR3(KTR_IGMPV3, "%s: t1 ex -= %d on 0x%08x", 1011 __func__, n, ims->ims_haddr); 1012 ims->ims_st[1].ex -= n; 1013 } else if (lims->imsl_st[0] == MCAST_INCLUDE) { 1014 CTR3(KTR_IGMPV3, "%s: t1 in -= %d on 0x%08x", 1015 __func__, n, ims->ims_haddr); 1016 ims->ims_st[1].in -= n; 1017 } 1018 1019 if (lims->imsl_st[1] == MCAST_EXCLUDE) { 1020 CTR3(KTR_IGMPV3, "%s: t1 ex += %d on 0x%08x", 1021 __func__, n, ims->ims_haddr); 1022 ims->ims_st[1].ex += n; 1023 } else if (lims->imsl_st[1] == MCAST_INCLUDE) { 1024 CTR3(KTR_IGMPV3, "%s: t1 in += %d on 0x%08x", 1025 __func__, n, ims->ims_haddr); 1026 ims->ims_st[1].in += n; 1027 } 1028 } 1029 1030 /* 1031 * Atomically update the global in_multi state, when a membership's 1032 * filter list is being updated in any way. 1033 * 1034 * imf is the per-inpcb-membership group filter pointer. 1035 * A fake imf may be passed for in-kernel consumers. 1036 * 1037 * XXX This is a candidate for a set-symmetric-difference style loop 1038 * which would eliminate the repeated lookup from root of ims nodes, 1039 * as they share the same key space. 1040 * 1041 * If any error occurred this function will back out of refcounts 1042 * and return a non-zero value. 1043 */ 1044 static int 1045 inm_merge(struct in_multi *inm, /*const*/ struct in_mfilter *imf) 1046 { 1047 struct ip_msource *ims, *nims; 1048 struct in_msource *lims; 1049 int schanged, error; 1050 int nsrc0, nsrc1; 1051 1052 schanged = 0; 1053 error = 0; 1054 nsrc1 = nsrc0 = 0; 1055 IN_MULTI_LIST_LOCK_ASSERT(); 1056 1057 /* 1058 * Update the source filters first, as this may fail. 1059 * Maintain count of in-mode filters at t0, t1. These are 1060 * used to work out if we transition into ASM mode or not. 1061 * Maintain a count of source filters whose state was 1062 * actually modified by this operation. 1063 */ 1064 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 1065 lims = (struct in_msource *)ims; 1066 if (lims->imsl_st[0] == imf->imf_st[0]) nsrc0++; 1067 if (lims->imsl_st[1] == imf->imf_st[1]) nsrc1++; 1068 if (lims->imsl_st[0] == lims->imsl_st[1]) continue; 1069 error = inm_get_source(inm, lims->ims_haddr, 0, &nims); 1070 ++schanged; 1071 if (error) 1072 break; 1073 ims_merge(nims, lims, 0); 1074 } 1075 if (error) { 1076 struct ip_msource *bims; 1077 1078 RB_FOREACH_REVERSE_FROM(ims, ip_msource_tree, nims) { 1079 lims = (struct in_msource *)ims; 1080 if (lims->imsl_st[0] == lims->imsl_st[1]) 1081 continue; 1082 (void)inm_get_source(inm, lims->ims_haddr, 1, &bims); 1083 if (bims == NULL) 1084 continue; 1085 ims_merge(bims, lims, 1); 1086 } 1087 goto out_reap; 1088 } 1089 1090 CTR3(KTR_IGMPV3, "%s: imf filters in-mode: %d at t0, %d at t1", 1091 __func__, nsrc0, nsrc1); 1092 1093 /* Handle transition between INCLUDE {n} and INCLUDE {} on socket. */ 1094 if (imf->imf_st[0] == imf->imf_st[1] && 1095 imf->imf_st[1] == MCAST_INCLUDE) { 1096 if (nsrc1 == 0) { 1097 CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__); 1098 --inm->inm_st[1].iss_in; 1099 } 1100 } 1101 1102 /* Handle filter mode transition on socket. */ 1103 if (imf->imf_st[0] != imf->imf_st[1]) { 1104 CTR3(KTR_IGMPV3, "%s: imf transition %d to %d", 1105 __func__, imf->imf_st[0], imf->imf_st[1]); 1106 1107 if (imf->imf_st[0] == MCAST_EXCLUDE) { 1108 CTR1(KTR_IGMPV3, "%s: --ex on inm at t1", __func__); 1109 --inm->inm_st[1].iss_ex; 1110 } else if (imf->imf_st[0] == MCAST_INCLUDE) { 1111 CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__); 1112 --inm->inm_st[1].iss_in; 1113 } 1114 1115 if (imf->imf_st[1] == MCAST_EXCLUDE) { 1116 CTR1(KTR_IGMPV3, "%s: ex++ on inm at t1", __func__); 1117 inm->inm_st[1].iss_ex++; 1118 } else if (imf->imf_st[1] == MCAST_INCLUDE && nsrc1 > 0) { 1119 CTR1(KTR_IGMPV3, "%s: in++ on inm at t1", __func__); 1120 inm->inm_st[1].iss_in++; 1121 } 1122 } 1123 1124 /* 1125 * Track inm filter state in terms of listener counts. 1126 * If there are any exclusive listeners, stack-wide 1127 * membership is exclusive. 1128 * Otherwise, if only inclusive listeners, stack-wide is inclusive. 1129 * If no listeners remain, state is undefined at t1, 1130 * and the IGMP lifecycle for this group should finish. 1131 */ 1132 if (inm->inm_st[1].iss_ex > 0) { 1133 CTR1(KTR_IGMPV3, "%s: transition to EX", __func__); 1134 inm->inm_st[1].iss_fmode = MCAST_EXCLUDE; 1135 } else if (inm->inm_st[1].iss_in > 0) { 1136 CTR1(KTR_IGMPV3, "%s: transition to IN", __func__); 1137 inm->inm_st[1].iss_fmode = MCAST_INCLUDE; 1138 } else { 1139 CTR1(KTR_IGMPV3, "%s: transition to UNDEF", __func__); 1140 inm->inm_st[1].iss_fmode = MCAST_UNDEFINED; 1141 } 1142 1143 /* Decrement ASM listener count on transition out of ASM mode. */ 1144 if (imf->imf_st[0] == MCAST_EXCLUDE && nsrc0 == 0) { 1145 if ((imf->imf_st[1] != MCAST_EXCLUDE) || 1146 (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 > 0)) { 1147 CTR1(KTR_IGMPV3, "%s: --asm on inm at t1", __func__); 1148 --inm->inm_st[1].iss_asm; 1149 } 1150 } 1151 1152 /* Increment ASM listener count on transition to ASM mode. */ 1153 if (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 == 0) { 1154 CTR1(KTR_IGMPV3, "%s: asm++ on inm at t1", __func__); 1155 inm->inm_st[1].iss_asm++; 1156 } 1157 1158 CTR3(KTR_IGMPV3, "%s: merged imf %p to inm %p", __func__, imf, inm); 1159 inm_print(inm); 1160 1161 out_reap: 1162 if (schanged > 0) { 1163 CTR1(KTR_IGMPV3, "%s: sources changed; reaping", __func__); 1164 inm_reap(inm); 1165 } 1166 return (error); 1167 } 1168 1169 /* 1170 * Mark an in_multi's filter set deltas as committed. 1171 * Called by IGMP after a state change has been enqueued. 1172 */ 1173 void 1174 inm_commit(struct in_multi *inm) 1175 { 1176 struct ip_msource *ims; 1177 1178 CTR2(KTR_IGMPV3, "%s: commit inm %p", __func__, inm); 1179 CTR1(KTR_IGMPV3, "%s: pre commit:", __func__); 1180 inm_print(inm); 1181 1182 RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) { 1183 ims->ims_st[0] = ims->ims_st[1]; 1184 } 1185 inm->inm_st[0] = inm->inm_st[1]; 1186 } 1187 1188 /* 1189 * Reap unreferenced nodes from an in_multi's filter set. 1190 */ 1191 static void 1192 inm_reap(struct in_multi *inm) 1193 { 1194 struct ip_msource *ims, *tims; 1195 1196 RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) { 1197 if (ims->ims_st[0].ex > 0 || ims->ims_st[0].in > 0 || 1198 ims->ims_st[1].ex > 0 || ims->ims_st[1].in > 0 || 1199 ims->ims_stp != 0) 1200 continue; 1201 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 1202 RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims); 1203 free(ims, M_IPMSOURCE); 1204 inm->inm_nsrc--; 1205 } 1206 } 1207 1208 /* 1209 * Purge all source nodes from an in_multi's filter set. 1210 */ 1211 static void 1212 inm_purge(struct in_multi *inm) 1213 { 1214 struct ip_msource *ims, *tims; 1215 1216 RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) { 1217 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 1218 RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims); 1219 free(ims, M_IPMSOURCE); 1220 inm->inm_nsrc--; 1221 } 1222 } 1223 1224 /* 1225 * Join a multicast group; unlocked entry point. 1226 * 1227 * SMPng: XXX: in_joingroup() is called from in_control() when Giant 1228 * is not held. Fortunately, ifp is unlikely to have been detached 1229 * at this point, so we assume it's OK to recurse. 1230 */ 1231 int 1232 in_joingroup(struct ifnet *ifp, const struct in_addr *gina, 1233 /*const*/ struct in_mfilter *imf, struct in_multi **pinm) 1234 { 1235 int error; 1236 1237 IN_MULTI_LOCK(); 1238 error = in_joingroup_locked(ifp, gina, imf, pinm); 1239 IN_MULTI_UNLOCK(); 1240 1241 return (error); 1242 } 1243 1244 /* 1245 * Join a multicast group; real entry point. 1246 * 1247 * Only preserves atomicity at inm level. 1248 * NOTE: imf argument cannot be const due to sys/tree.h limitations. 1249 * 1250 * If the IGMP downcall fails, the group is not joined, and an error 1251 * code is returned. 1252 */ 1253 int 1254 in_joingroup_locked(struct ifnet *ifp, const struct in_addr *gina, 1255 /*const*/ struct in_mfilter *imf, struct in_multi **pinm) 1256 { 1257 struct in_mfilter timf; 1258 struct in_multi *inm; 1259 int error; 1260 1261 IN_MULTI_LOCK_ASSERT(); 1262 IN_MULTI_LIST_UNLOCK_ASSERT(); 1263 1264 CTR4(KTR_IGMPV3, "%s: join 0x%08x on %p(%s))", __func__, 1265 ntohl(gina->s_addr), ifp, ifp->if_xname); 1266 1267 error = 0; 1268 inm = NULL; 1269 1270 /* 1271 * If no imf was specified (i.e. kernel consumer), 1272 * fake one up and assume it is an ASM join. 1273 */ 1274 if (imf == NULL) { 1275 imf_init(&timf, MCAST_UNDEFINED, MCAST_EXCLUDE); 1276 imf = &timf; 1277 } 1278 1279 error = in_getmulti(ifp, gina, &inm); 1280 if (error) { 1281 CTR1(KTR_IGMPV3, "%s: in_getmulti() failure", __func__); 1282 return (error); 1283 } 1284 IN_MULTI_LIST_LOCK(); 1285 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 1286 error = inm_merge(inm, imf); 1287 if (error) { 1288 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__); 1289 goto out_inm_release; 1290 } 1291 1292 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 1293 error = igmp_change_state(inm); 1294 if (error) { 1295 CTR1(KTR_IGMPV3, "%s: failed to update source", __func__); 1296 goto out_inm_release; 1297 } 1298 1299 out_inm_release: 1300 if (error) { 1301 1302 CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm); 1303 inm_release_deferred(inm); 1304 } else { 1305 *pinm = inm; 1306 } 1307 IN_MULTI_LIST_UNLOCK(); 1308 1309 return (error); 1310 } 1311 1312 /* 1313 * Leave a multicast group; unlocked entry point. 1314 */ 1315 int 1316 in_leavegroup(struct in_multi *inm, /*const*/ struct in_mfilter *imf) 1317 { 1318 int error; 1319 1320 IN_MULTI_LOCK(); 1321 error = in_leavegroup_locked(inm, imf); 1322 IN_MULTI_UNLOCK(); 1323 1324 return (error); 1325 } 1326 1327 /* 1328 * Leave a multicast group; real entry point. 1329 * All source filters will be expunged. 1330 * 1331 * Only preserves atomicity at inm level. 1332 * 1333 * Holding the write lock for the INP which contains imf 1334 * is highly advisable. We can't assert for it as imf does not 1335 * contain a back-pointer to the owning inp. 1336 * 1337 * Note: This is not the same as inm_release(*) as this function also 1338 * makes a state change downcall into IGMP. 1339 */ 1340 int 1341 in_leavegroup_locked(struct in_multi *inm, /*const*/ struct in_mfilter *imf) 1342 { 1343 struct in_mfilter timf; 1344 int error; 1345 1346 error = 0; 1347 1348 IN_MULTI_LOCK_ASSERT(); 1349 IN_MULTI_LIST_UNLOCK_ASSERT(); 1350 1351 CTR5(KTR_IGMPV3, "%s: leave inm %p, 0x%08x/%s, imf %p", __func__, 1352 inm, ntohl(inm->inm_addr.s_addr), 1353 (inm_is_ifp_detached(inm) ? "null" : inm->inm_ifp->if_xname), 1354 imf); 1355 1356 /* 1357 * If no imf was specified (i.e. kernel consumer), 1358 * fake one up and assume it is an ASM join. 1359 */ 1360 if (imf == NULL) { 1361 imf_init(&timf, MCAST_EXCLUDE, MCAST_UNDEFINED); 1362 imf = &timf; 1363 } 1364 1365 /* 1366 * Begin state merge transaction at IGMP layer. 1367 * 1368 * As this particular invocation should not cause any memory 1369 * to be allocated, and there is no opportunity to roll back 1370 * the transaction, it MUST NOT fail. 1371 */ 1372 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 1373 IN_MULTI_LIST_LOCK(); 1374 error = inm_merge(inm, imf); 1375 KASSERT(error == 0, ("%s: failed to merge inm state", __func__)); 1376 1377 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 1378 CURVNET_SET(inm->inm_ifp->if_vnet); 1379 error = igmp_change_state(inm); 1380 IF_ADDR_WLOCK(inm->inm_ifp); 1381 inm_release_deferred(inm); 1382 IF_ADDR_WUNLOCK(inm->inm_ifp); 1383 IN_MULTI_LIST_UNLOCK(); 1384 CURVNET_RESTORE(); 1385 if (error) 1386 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__); 1387 1388 CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm); 1389 1390 return (error); 1391 } 1392 1393 /*#ifndef BURN_BRIDGES*/ 1394 /* 1395 * Join an IPv4 multicast group in (*,G) exclusive mode. 1396 * The group must be a 224.0.0.0/24 link-scope group. 1397 * This KPI is for legacy kernel consumers only. 1398 */ 1399 struct in_multi * 1400 in_addmulti(struct in_addr *ap, struct ifnet *ifp) 1401 { 1402 struct in_multi *pinm; 1403 int error; 1404 #ifdef INVARIANTS 1405 char addrbuf[INET_ADDRSTRLEN]; 1406 #endif 1407 1408 KASSERT(IN_LOCAL_GROUP(ntohl(ap->s_addr)), 1409 ("%s: %s not in 224.0.0.0/24", __func__, 1410 inet_ntoa_r(*ap, addrbuf))); 1411 1412 error = in_joingroup(ifp, ap, NULL, &pinm); 1413 if (error != 0) 1414 pinm = NULL; 1415 1416 return (pinm); 1417 } 1418 1419 /* 1420 * Block or unblock an ASM multicast source on an inpcb. 1421 * This implements the delta-based API described in RFC 3678. 1422 * 1423 * The delta-based API applies only to exclusive-mode memberships. 1424 * An IGMP downcall will be performed. 1425 * 1426 * SMPng: NOTE: Must take Giant as a join may create a new ifma. 1427 * 1428 * Return 0 if successful, otherwise return an appropriate error code. 1429 */ 1430 static int 1431 inp_block_unblock_source(struct inpcb *inp, struct sockopt *sopt) 1432 { 1433 struct group_source_req gsr; 1434 sockunion_t *gsa, *ssa; 1435 struct ifnet *ifp; 1436 struct in_mfilter *imf; 1437 struct ip_moptions *imo; 1438 struct in_msource *ims; 1439 struct in_multi *inm; 1440 size_t idx; 1441 uint16_t fmode; 1442 int error, doblock; 1443 1444 ifp = NULL; 1445 error = 0; 1446 doblock = 0; 1447 1448 memset(&gsr, 0, sizeof(struct group_source_req)); 1449 gsa = (sockunion_t *)&gsr.gsr_group; 1450 ssa = (sockunion_t *)&gsr.gsr_source; 1451 1452 switch (sopt->sopt_name) { 1453 case IP_BLOCK_SOURCE: 1454 case IP_UNBLOCK_SOURCE: { 1455 struct ip_mreq_source mreqs; 1456 1457 error = sooptcopyin(sopt, &mreqs, 1458 sizeof(struct ip_mreq_source), 1459 sizeof(struct ip_mreq_source)); 1460 if (error) 1461 return (error); 1462 1463 gsa->sin.sin_family = AF_INET; 1464 gsa->sin.sin_len = sizeof(struct sockaddr_in); 1465 gsa->sin.sin_addr = mreqs.imr_multiaddr; 1466 1467 ssa->sin.sin_family = AF_INET; 1468 ssa->sin.sin_len = sizeof(struct sockaddr_in); 1469 ssa->sin.sin_addr = mreqs.imr_sourceaddr; 1470 1471 if (!in_nullhost(mreqs.imr_interface)) 1472 INADDR_TO_IFP(mreqs.imr_interface, ifp); 1473 1474 if (sopt->sopt_name == IP_BLOCK_SOURCE) 1475 doblock = 1; 1476 1477 CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p", 1478 __func__, ntohl(mreqs.imr_interface.s_addr), ifp); 1479 break; 1480 } 1481 1482 case MCAST_BLOCK_SOURCE: 1483 case MCAST_UNBLOCK_SOURCE: 1484 error = sooptcopyin(sopt, &gsr, 1485 sizeof(struct group_source_req), 1486 sizeof(struct group_source_req)); 1487 if (error) 1488 return (error); 1489 1490 if (gsa->sin.sin_family != AF_INET || 1491 gsa->sin.sin_len != sizeof(struct sockaddr_in)) 1492 return (EINVAL); 1493 1494 if (ssa->sin.sin_family != AF_INET || 1495 ssa->sin.sin_len != sizeof(struct sockaddr_in)) 1496 return (EINVAL); 1497 1498 if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface) 1499 return (EADDRNOTAVAIL); 1500 1501 ifp = ifnet_byindex(gsr.gsr_interface); 1502 1503 if (sopt->sopt_name == MCAST_BLOCK_SOURCE) 1504 doblock = 1; 1505 break; 1506 1507 default: 1508 CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d", 1509 __func__, sopt->sopt_name); 1510 return (EOPNOTSUPP); 1511 break; 1512 } 1513 1514 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 1515 return (EINVAL); 1516 1517 /* 1518 * Check if we are actually a member of this group. 1519 */ 1520 imo = inp_findmoptions(inp); 1521 idx = imo_match_group(imo, ifp, &gsa->sa); 1522 if (idx == -1 || imo->imo_mfilters == NULL) { 1523 error = EADDRNOTAVAIL; 1524 goto out_inp_locked; 1525 } 1526 1527 KASSERT(imo->imo_mfilters != NULL, 1528 ("%s: imo_mfilters not allocated", __func__)); 1529 imf = &imo->imo_mfilters[idx]; 1530 inm = imo->imo_membership[idx]; 1531 1532 /* 1533 * Attempting to use the delta-based API on an 1534 * non exclusive-mode membership is an error. 1535 */ 1536 fmode = imf->imf_st[0]; 1537 if (fmode != MCAST_EXCLUDE) { 1538 error = EINVAL; 1539 goto out_inp_locked; 1540 } 1541 1542 /* 1543 * Deal with error cases up-front: 1544 * Asked to block, but already blocked; or 1545 * Asked to unblock, but nothing to unblock. 1546 * If adding a new block entry, allocate it. 1547 */ 1548 ims = imo_match_source(imo, idx, &ssa->sa); 1549 if ((ims != NULL && doblock) || (ims == NULL && !doblock)) { 1550 CTR3(KTR_IGMPV3, "%s: source 0x%08x %spresent", __func__, 1551 ntohl(ssa->sin.sin_addr.s_addr), doblock ? "" : "not "); 1552 error = EADDRNOTAVAIL; 1553 goto out_inp_locked; 1554 } 1555 1556 INP_WLOCK_ASSERT(inp); 1557 1558 /* 1559 * Begin state merge transaction at socket layer. 1560 */ 1561 if (doblock) { 1562 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block"); 1563 ims = imf_graft(imf, fmode, &ssa->sin); 1564 if (ims == NULL) 1565 error = ENOMEM; 1566 } else { 1567 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow"); 1568 error = imf_prune(imf, &ssa->sin); 1569 } 1570 1571 if (error) { 1572 CTR1(KTR_IGMPV3, "%s: merge imf state failed", __func__); 1573 goto out_imf_rollback; 1574 } 1575 1576 /* 1577 * Begin state merge transaction at IGMP layer. 1578 */ 1579 IN_MULTI_LOCK(); 1580 IN_MULTI_LIST_LOCK(); 1581 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 1582 error = inm_merge(inm, imf); 1583 if (error) { 1584 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__); 1585 goto out_in_multi_locked; 1586 } 1587 1588 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 1589 error = igmp_change_state(inm); 1590 if (error) 1591 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__); 1592 1593 out_in_multi_locked: 1594 1595 IN_MULTI_UNLOCK(); 1596 IN_MULTI_UNLOCK(); 1597 out_imf_rollback: 1598 if (error) 1599 imf_rollback(imf); 1600 else 1601 imf_commit(imf); 1602 1603 imf_reap(imf); 1604 1605 out_inp_locked: 1606 INP_WUNLOCK(inp); 1607 return (error); 1608 } 1609 1610 /* 1611 * Given an inpcb, return its multicast options structure pointer. Accepts 1612 * an unlocked inpcb pointer, but will return it locked. May sleep. 1613 * 1614 * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held. 1615 * SMPng: NOTE: Returns with the INP write lock held. 1616 */ 1617 static struct ip_moptions * 1618 inp_findmoptions(struct inpcb *inp) 1619 { 1620 struct ip_moptions *imo; 1621 struct in_multi **immp; 1622 struct in_mfilter *imfp; 1623 size_t idx; 1624 1625 INP_WLOCK(inp); 1626 if (inp->inp_moptions != NULL) 1627 return (inp->inp_moptions); 1628 1629 INP_WUNLOCK(inp); 1630 1631 imo = malloc(sizeof(*imo), M_IPMOPTS, M_WAITOK); 1632 immp = malloc(sizeof(*immp) * IP_MIN_MEMBERSHIPS, M_IPMOPTS, 1633 M_WAITOK | M_ZERO); 1634 imfp = malloc(sizeof(struct in_mfilter) * IP_MIN_MEMBERSHIPS, 1635 M_INMFILTER, M_WAITOK); 1636 1637 imo->imo_multicast_ifp = NULL; 1638 imo->imo_multicast_addr.s_addr = INADDR_ANY; 1639 imo->imo_multicast_vif = -1; 1640 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1641 imo->imo_multicast_loop = in_mcast_loop; 1642 imo->imo_num_memberships = 0; 1643 imo->imo_max_memberships = IP_MIN_MEMBERSHIPS; 1644 imo->imo_membership = immp; 1645 1646 /* Initialize per-group source filters. */ 1647 for (idx = 0; idx < IP_MIN_MEMBERSHIPS; idx++) 1648 imf_init(&imfp[idx], MCAST_UNDEFINED, MCAST_EXCLUDE); 1649 imo->imo_mfilters = imfp; 1650 1651 INP_WLOCK(inp); 1652 if (inp->inp_moptions != NULL) { 1653 free(imfp, M_INMFILTER); 1654 free(immp, M_IPMOPTS); 1655 free(imo, M_IPMOPTS); 1656 return (inp->inp_moptions); 1657 } 1658 inp->inp_moptions = imo; 1659 return (imo); 1660 } 1661 1662 static void 1663 inp_gcmoptions(epoch_context_t ctx) 1664 { 1665 struct ip_moptions *imo; 1666 struct in_mfilter *imf; 1667 size_t idx, nmships; 1668 1669 imo = __containerof(ctx, struct ip_moptions, imo_epoch_ctx); 1670 1671 nmships = imo->imo_num_memberships; 1672 for (idx = 0; idx < nmships; ++idx) { 1673 imf = imo->imo_mfilters ? &imo->imo_mfilters[idx] : NULL; 1674 if (imf) 1675 imf_leave(imf); 1676 (void)in_leavegroup(imo->imo_membership[idx], imf); 1677 if (imf) 1678 imf_purge(imf); 1679 } 1680 1681 if (imo->imo_mfilters) 1682 free(imo->imo_mfilters, M_INMFILTER); 1683 free(imo->imo_membership, M_IPMOPTS); 1684 free(imo, M_IPMOPTS); 1685 } 1686 1687 /* 1688 * Discard the IP multicast options (and source filters). To minimize 1689 * the amount of work done while holding locks such as the INP's 1690 * pcbinfo lock (which is used in the receive path), the free 1691 * operation is deferred to the epoch callback task. 1692 */ 1693 void 1694 inp_freemoptions(struct ip_moptions *imo) 1695 { 1696 if (imo == NULL) 1697 return; 1698 epoch_call(net_epoch_preempt, &imo->imo_epoch_ctx, inp_gcmoptions); 1699 } 1700 1701 /* 1702 * Atomically get source filters on a socket for an IPv4 multicast group. 1703 * Called with INP lock held; returns with lock released. 1704 */ 1705 static int 1706 inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt) 1707 { 1708 struct __msfilterreq msfr; 1709 sockunion_t *gsa; 1710 struct ifnet *ifp; 1711 struct ip_moptions *imo; 1712 struct in_mfilter *imf; 1713 struct ip_msource *ims; 1714 struct in_msource *lims; 1715 struct sockaddr_in *psin; 1716 struct sockaddr_storage *ptss; 1717 struct sockaddr_storage *tss; 1718 int error; 1719 size_t idx, nsrcs, ncsrcs; 1720 1721 INP_WLOCK_ASSERT(inp); 1722 1723 imo = inp->inp_moptions; 1724 KASSERT(imo != NULL, ("%s: null ip_moptions", __func__)); 1725 1726 INP_WUNLOCK(inp); 1727 1728 error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq), 1729 sizeof(struct __msfilterreq)); 1730 if (error) 1731 return (error); 1732 1733 if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex) 1734 return (EINVAL); 1735 1736 ifp = ifnet_byindex(msfr.msfr_ifindex); 1737 if (ifp == NULL) 1738 return (EINVAL); 1739 1740 INP_WLOCK(inp); 1741 1742 /* 1743 * Lookup group on the socket. 1744 */ 1745 gsa = (sockunion_t *)&msfr.msfr_group; 1746 idx = imo_match_group(imo, ifp, &gsa->sa); 1747 if (idx == -1 || imo->imo_mfilters == NULL) { 1748 INP_WUNLOCK(inp); 1749 return (EADDRNOTAVAIL); 1750 } 1751 imf = &imo->imo_mfilters[idx]; 1752 1753 /* 1754 * Ignore memberships which are in limbo. 1755 */ 1756 if (imf->imf_st[1] == MCAST_UNDEFINED) { 1757 INP_WUNLOCK(inp); 1758 return (EAGAIN); 1759 } 1760 msfr.msfr_fmode = imf->imf_st[1]; 1761 1762 /* 1763 * If the user specified a buffer, copy out the source filter 1764 * entries to userland gracefully. 1765 * We only copy out the number of entries which userland 1766 * has asked for, but we always tell userland how big the 1767 * buffer really needs to be. 1768 */ 1769 if (msfr.msfr_nsrcs > in_mcast_maxsocksrc) 1770 msfr.msfr_nsrcs = in_mcast_maxsocksrc; 1771 tss = NULL; 1772 if (msfr.msfr_srcs != NULL && msfr.msfr_nsrcs > 0) { 1773 tss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs, 1774 M_TEMP, M_NOWAIT | M_ZERO); 1775 if (tss == NULL) { 1776 INP_WUNLOCK(inp); 1777 return (ENOBUFS); 1778 } 1779 } 1780 1781 /* 1782 * Count number of sources in-mode at t0. 1783 * If buffer space exists and remains, copy out source entries. 1784 */ 1785 nsrcs = msfr.msfr_nsrcs; 1786 ncsrcs = 0; 1787 ptss = tss; 1788 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 1789 lims = (struct in_msource *)ims; 1790 if (lims->imsl_st[0] == MCAST_UNDEFINED || 1791 lims->imsl_st[0] != imf->imf_st[0]) 1792 continue; 1793 ++ncsrcs; 1794 if (tss != NULL && nsrcs > 0) { 1795 psin = (struct sockaddr_in *)ptss; 1796 psin->sin_family = AF_INET; 1797 psin->sin_len = sizeof(struct sockaddr_in); 1798 psin->sin_addr.s_addr = htonl(lims->ims_haddr); 1799 psin->sin_port = 0; 1800 ++ptss; 1801 --nsrcs; 1802 } 1803 } 1804 1805 INP_WUNLOCK(inp); 1806 1807 if (tss != NULL) { 1808 error = copyout(tss, msfr.msfr_srcs, 1809 sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs); 1810 free(tss, M_TEMP); 1811 if (error) 1812 return (error); 1813 } 1814 1815 msfr.msfr_nsrcs = ncsrcs; 1816 error = sooptcopyout(sopt, &msfr, sizeof(struct __msfilterreq)); 1817 1818 return (error); 1819 } 1820 1821 /* 1822 * Return the IP multicast options in response to user getsockopt(). 1823 */ 1824 int 1825 inp_getmoptions(struct inpcb *inp, struct sockopt *sopt) 1826 { 1827 struct rm_priotracker in_ifa_tracker; 1828 struct ip_mreqn mreqn; 1829 struct ip_moptions *imo; 1830 struct ifnet *ifp; 1831 struct in_ifaddr *ia; 1832 int error, optval; 1833 u_char coptval; 1834 1835 INP_WLOCK(inp); 1836 imo = inp->inp_moptions; 1837 /* 1838 * If socket is neither of type SOCK_RAW or SOCK_DGRAM, 1839 * or is a divert socket, reject it. 1840 */ 1841 if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT || 1842 (inp->inp_socket->so_proto->pr_type != SOCK_RAW && 1843 inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) { 1844 INP_WUNLOCK(inp); 1845 return (EOPNOTSUPP); 1846 } 1847 1848 error = 0; 1849 switch (sopt->sopt_name) { 1850 case IP_MULTICAST_VIF: 1851 if (imo != NULL) 1852 optval = imo->imo_multicast_vif; 1853 else 1854 optval = -1; 1855 INP_WUNLOCK(inp); 1856 error = sooptcopyout(sopt, &optval, sizeof(int)); 1857 break; 1858 1859 case IP_MULTICAST_IF: 1860 memset(&mreqn, 0, sizeof(struct ip_mreqn)); 1861 if (imo != NULL) { 1862 ifp = imo->imo_multicast_ifp; 1863 if (!in_nullhost(imo->imo_multicast_addr)) { 1864 mreqn.imr_address = imo->imo_multicast_addr; 1865 } else if (ifp != NULL) { 1866 mreqn.imr_ifindex = ifp->if_index; 1867 IFP_TO_IA(ifp, ia, &in_ifa_tracker); 1868 if (ia != NULL) { 1869 mreqn.imr_address = 1870 IA_SIN(ia)->sin_addr; 1871 ifa_free(&ia->ia_ifa); 1872 } 1873 } 1874 } 1875 INP_WUNLOCK(inp); 1876 if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) { 1877 error = sooptcopyout(sopt, &mreqn, 1878 sizeof(struct ip_mreqn)); 1879 } else { 1880 error = sooptcopyout(sopt, &mreqn.imr_address, 1881 sizeof(struct in_addr)); 1882 } 1883 break; 1884 1885 case IP_MULTICAST_TTL: 1886 if (imo == NULL) 1887 optval = coptval = IP_DEFAULT_MULTICAST_TTL; 1888 else 1889 optval = coptval = imo->imo_multicast_ttl; 1890 INP_WUNLOCK(inp); 1891 if (sopt->sopt_valsize == sizeof(u_char)) 1892 error = sooptcopyout(sopt, &coptval, sizeof(u_char)); 1893 else 1894 error = sooptcopyout(sopt, &optval, sizeof(int)); 1895 break; 1896 1897 case IP_MULTICAST_LOOP: 1898 if (imo == NULL) 1899 optval = coptval = IP_DEFAULT_MULTICAST_LOOP; 1900 else 1901 optval = coptval = imo->imo_multicast_loop; 1902 INP_WUNLOCK(inp); 1903 if (sopt->sopt_valsize == sizeof(u_char)) 1904 error = sooptcopyout(sopt, &coptval, sizeof(u_char)); 1905 else 1906 error = sooptcopyout(sopt, &optval, sizeof(int)); 1907 break; 1908 1909 case IP_MSFILTER: 1910 if (imo == NULL) { 1911 error = EADDRNOTAVAIL; 1912 INP_WUNLOCK(inp); 1913 } else { 1914 error = inp_get_source_filters(inp, sopt); 1915 } 1916 break; 1917 1918 default: 1919 INP_WUNLOCK(inp); 1920 error = ENOPROTOOPT; 1921 break; 1922 } 1923 1924 INP_UNLOCK_ASSERT(inp); 1925 1926 return (error); 1927 } 1928 1929 /* 1930 * Look up the ifnet to use for a multicast group membership, 1931 * given the IPv4 address of an interface, and the IPv4 group address. 1932 * 1933 * This routine exists to support legacy multicast applications 1934 * which do not understand that multicast memberships are scoped to 1935 * specific physical links in the networking stack, or which need 1936 * to join link-scope groups before IPv4 addresses are configured. 1937 * 1938 * If inp is non-NULL, use this socket's current FIB number for any 1939 * required FIB lookup. 1940 * If ina is INADDR_ANY, look up the group address in the unicast FIB, 1941 * and use its ifp; usually, this points to the default next-hop. 1942 * 1943 * If the FIB lookup fails, attempt to use the first non-loopback 1944 * interface with multicast capability in the system as a 1945 * last resort. The legacy IPv4 ASM API requires that we do 1946 * this in order to allow groups to be joined when the routing 1947 * table has not yet been populated during boot. 1948 * 1949 * Returns NULL if no ifp could be found. 1950 * 1951 * SMPng: TODO: Acquire the appropriate locks for INADDR_TO_IFP. 1952 * FUTURE: Implement IPv4 source-address selection. 1953 */ 1954 static struct ifnet * 1955 inp_lookup_mcast_ifp(const struct inpcb *inp, 1956 const struct sockaddr_in *gsin, const struct in_addr ina) 1957 { 1958 struct rm_priotracker in_ifa_tracker; 1959 struct ifnet *ifp; 1960 struct nhop4_basic nh4; 1961 uint32_t fibnum; 1962 1963 KASSERT(gsin->sin_family == AF_INET, ("%s: not AF_INET", __func__)); 1964 KASSERT(IN_MULTICAST(ntohl(gsin->sin_addr.s_addr)), 1965 ("%s: not multicast", __func__)); 1966 1967 ifp = NULL; 1968 if (!in_nullhost(ina)) { 1969 INADDR_TO_IFP(ina, ifp); 1970 } else { 1971 fibnum = inp ? inp->inp_inc.inc_fibnum : 0; 1972 if (fib4_lookup_nh_basic(fibnum, gsin->sin_addr, 0, 0, &nh4)==0) 1973 ifp = nh4.nh_ifp; 1974 else { 1975 struct in_ifaddr *ia; 1976 struct ifnet *mifp; 1977 1978 mifp = NULL; 1979 IN_IFADDR_RLOCK(&in_ifa_tracker); 1980 CK_STAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { 1981 mifp = ia->ia_ifp; 1982 if (!(mifp->if_flags & IFF_LOOPBACK) && 1983 (mifp->if_flags & IFF_MULTICAST)) { 1984 ifp = mifp; 1985 break; 1986 } 1987 } 1988 IN_IFADDR_RUNLOCK(&in_ifa_tracker); 1989 } 1990 } 1991 1992 return (ifp); 1993 } 1994 1995 /* 1996 * Join an IPv4 multicast group, possibly with a source. 1997 */ 1998 static int 1999 inp_join_group(struct inpcb *inp, struct sockopt *sopt) 2000 { 2001 struct group_source_req gsr; 2002 sockunion_t *gsa, *ssa; 2003 struct ifnet *ifp; 2004 struct in_mfilter *imf; 2005 struct ip_moptions *imo; 2006 struct in_multi *inm; 2007 struct in_msource *lims; 2008 size_t idx; 2009 int error, is_new; 2010 2011 ifp = NULL; 2012 imf = NULL; 2013 lims = NULL; 2014 error = 0; 2015 is_new = 0; 2016 2017 memset(&gsr, 0, sizeof(struct group_source_req)); 2018 gsa = (sockunion_t *)&gsr.gsr_group; 2019 gsa->ss.ss_family = AF_UNSPEC; 2020 ssa = (sockunion_t *)&gsr.gsr_source; 2021 ssa->ss.ss_family = AF_UNSPEC; 2022 2023 switch (sopt->sopt_name) { 2024 case IP_ADD_MEMBERSHIP: 2025 case IP_ADD_SOURCE_MEMBERSHIP: { 2026 struct ip_mreq_source mreqs; 2027 2028 if (sopt->sopt_name == IP_ADD_MEMBERSHIP) { 2029 error = sooptcopyin(sopt, &mreqs, 2030 sizeof(struct ip_mreq), 2031 sizeof(struct ip_mreq)); 2032 /* 2033 * Do argument switcharoo from ip_mreq into 2034 * ip_mreq_source to avoid using two instances. 2035 */ 2036 mreqs.imr_interface = mreqs.imr_sourceaddr; 2037 mreqs.imr_sourceaddr.s_addr = INADDR_ANY; 2038 } else if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) { 2039 error = sooptcopyin(sopt, &mreqs, 2040 sizeof(struct ip_mreq_source), 2041 sizeof(struct ip_mreq_source)); 2042 } 2043 if (error) 2044 return (error); 2045 2046 gsa->sin.sin_family = AF_INET; 2047 gsa->sin.sin_len = sizeof(struct sockaddr_in); 2048 gsa->sin.sin_addr = mreqs.imr_multiaddr; 2049 2050 if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) { 2051 ssa->sin.sin_family = AF_INET; 2052 ssa->sin.sin_len = sizeof(struct sockaddr_in); 2053 ssa->sin.sin_addr = mreqs.imr_sourceaddr; 2054 } 2055 2056 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2057 return (EINVAL); 2058 2059 ifp = inp_lookup_mcast_ifp(inp, &gsa->sin, 2060 mreqs.imr_interface); 2061 CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p", 2062 __func__, ntohl(mreqs.imr_interface.s_addr), ifp); 2063 break; 2064 } 2065 2066 case MCAST_JOIN_GROUP: 2067 case MCAST_JOIN_SOURCE_GROUP: 2068 if (sopt->sopt_name == MCAST_JOIN_GROUP) { 2069 error = sooptcopyin(sopt, &gsr, 2070 sizeof(struct group_req), 2071 sizeof(struct group_req)); 2072 } else if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) { 2073 error = sooptcopyin(sopt, &gsr, 2074 sizeof(struct group_source_req), 2075 sizeof(struct group_source_req)); 2076 } 2077 if (error) 2078 return (error); 2079 2080 if (gsa->sin.sin_family != AF_INET || 2081 gsa->sin.sin_len != sizeof(struct sockaddr_in)) 2082 return (EINVAL); 2083 2084 /* 2085 * Overwrite the port field if present, as the sockaddr 2086 * being copied in may be matched with a binary comparison. 2087 */ 2088 gsa->sin.sin_port = 0; 2089 if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) { 2090 if (ssa->sin.sin_family != AF_INET || 2091 ssa->sin.sin_len != sizeof(struct sockaddr_in)) 2092 return (EINVAL); 2093 ssa->sin.sin_port = 0; 2094 } 2095 2096 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2097 return (EINVAL); 2098 2099 if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface) 2100 return (EADDRNOTAVAIL); 2101 ifp = ifnet_byindex(gsr.gsr_interface); 2102 break; 2103 2104 default: 2105 CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d", 2106 __func__, sopt->sopt_name); 2107 return (EOPNOTSUPP); 2108 break; 2109 } 2110 2111 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) 2112 return (EADDRNOTAVAIL); 2113 2114 imo = inp_findmoptions(inp); 2115 idx = imo_match_group(imo, ifp, &gsa->sa); 2116 if (idx == -1) { 2117 is_new = 1; 2118 } else { 2119 inm = imo->imo_membership[idx]; 2120 imf = &imo->imo_mfilters[idx]; 2121 if (ssa->ss.ss_family != AF_UNSPEC) { 2122 /* 2123 * MCAST_JOIN_SOURCE_GROUP on an exclusive membership 2124 * is an error. On an existing inclusive membership, 2125 * it just adds the source to the filter list. 2126 */ 2127 if (imf->imf_st[1] != MCAST_INCLUDE) { 2128 error = EINVAL; 2129 goto out_inp_locked; 2130 } 2131 /* 2132 * Throw out duplicates. 2133 * 2134 * XXX FIXME: This makes a naive assumption that 2135 * even if entries exist for *ssa in this imf, 2136 * they will be rejected as dupes, even if they 2137 * are not valid in the current mode (in-mode). 2138 * 2139 * in_msource is transactioned just as for anything 2140 * else in SSM -- but note naive use of inm_graft() 2141 * below for allocating new filter entries. 2142 * 2143 * This is only an issue if someone mixes the 2144 * full-state SSM API with the delta-based API, 2145 * which is discouraged in the relevant RFCs. 2146 */ 2147 lims = imo_match_source(imo, idx, &ssa->sa); 2148 if (lims != NULL /*&& 2149 lims->imsl_st[1] == MCAST_INCLUDE*/) { 2150 error = EADDRNOTAVAIL; 2151 goto out_inp_locked; 2152 } 2153 } else { 2154 /* 2155 * MCAST_JOIN_GROUP on an existing exclusive 2156 * membership is an error; return EADDRINUSE 2157 * to preserve 4.4BSD API idempotence, and 2158 * avoid tedious detour to code below. 2159 * NOTE: This is bending RFC 3678 a bit. 2160 * 2161 * On an existing inclusive membership, this is also 2162 * an error; if you want to change filter mode, 2163 * you must use the userland API setsourcefilter(). 2164 * XXX We don't reject this for imf in UNDEFINED 2165 * state at t1, because allocation of a filter 2166 * is atomic with allocation of a membership. 2167 */ 2168 error = EINVAL; 2169 if (imf->imf_st[1] == MCAST_EXCLUDE) 2170 error = EADDRINUSE; 2171 goto out_inp_locked; 2172 } 2173 } 2174 2175 /* 2176 * Begin state merge transaction at socket layer. 2177 */ 2178 INP_WLOCK_ASSERT(inp); 2179 2180 if (is_new) { 2181 if (imo->imo_num_memberships == imo->imo_max_memberships) { 2182 error = imo_grow(imo); 2183 if (error) 2184 goto out_inp_locked; 2185 } 2186 /* 2187 * Allocate the new slot upfront so we can deal with 2188 * grafting the new source filter in same code path 2189 * as for join-source on existing membership. 2190 */ 2191 idx = imo->imo_num_memberships; 2192 imo->imo_membership[idx] = NULL; 2193 imo->imo_num_memberships++; 2194 KASSERT(imo->imo_mfilters != NULL, 2195 ("%s: imf_mfilters vector was not allocated", __func__)); 2196 imf = &imo->imo_mfilters[idx]; 2197 KASSERT(RB_EMPTY(&imf->imf_sources), 2198 ("%s: imf_sources not empty", __func__)); 2199 } 2200 2201 /* 2202 * Graft new source into filter list for this inpcb's 2203 * membership of the group. The in_multi may not have 2204 * been allocated yet if this is a new membership, however, 2205 * the in_mfilter slot will be allocated and must be initialized. 2206 * 2207 * Note: Grafting of exclusive mode filters doesn't happen 2208 * in this path. 2209 * XXX: Should check for non-NULL lims (node exists but may 2210 * not be in-mode) for interop with full-state API. 2211 */ 2212 if (ssa->ss.ss_family != AF_UNSPEC) { 2213 /* Membership starts in IN mode */ 2214 if (is_new) { 2215 CTR1(KTR_IGMPV3, "%s: new join w/source", __func__); 2216 imf_init(imf, MCAST_UNDEFINED, MCAST_INCLUDE); 2217 } else { 2218 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow"); 2219 } 2220 lims = imf_graft(imf, MCAST_INCLUDE, &ssa->sin); 2221 if (lims == NULL) { 2222 CTR1(KTR_IGMPV3, "%s: merge imf state failed", 2223 __func__); 2224 error = ENOMEM; 2225 goto out_imo_free; 2226 } 2227 } else { 2228 /* No address specified; Membership starts in EX mode */ 2229 if (is_new) { 2230 CTR1(KTR_IGMPV3, "%s: new join w/o source", __func__); 2231 imf_init(imf, MCAST_UNDEFINED, MCAST_EXCLUDE); 2232 } 2233 } 2234 2235 /* 2236 * Begin state merge transaction at IGMP layer. 2237 */ 2238 in_pcbref(inp); 2239 INP_WUNLOCK(inp); 2240 IN_MULTI_LOCK(); 2241 2242 if (is_new) { 2243 error = in_joingroup_locked(ifp, &gsa->sin.sin_addr, imf, 2244 &inm); 2245 if (error) { 2246 CTR1(KTR_IGMPV3, "%s: in_joingroup_locked failed", 2247 __func__); 2248 IN_MULTI_LIST_UNLOCK(); 2249 goto out_imo_free; 2250 } 2251 imo->imo_membership[idx] = inm; 2252 } else { 2253 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 2254 IN_MULTI_LIST_LOCK(); 2255 error = inm_merge(inm, imf); 2256 if (error) { 2257 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", 2258 __func__); 2259 IN_MULTI_LIST_UNLOCK(); 2260 goto out_in_multi_locked; 2261 } 2262 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 2263 error = igmp_change_state(inm); 2264 IN_MULTI_LIST_UNLOCK(); 2265 if (error) { 2266 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", 2267 __func__); 2268 goto out_in_multi_locked; 2269 } 2270 } 2271 2272 out_in_multi_locked: 2273 2274 IN_MULTI_UNLOCK(); 2275 INP_WLOCK(inp); 2276 if (in_pcbrele_wlocked(inp)) 2277 return (ENXIO); 2278 if (error) { 2279 imf_rollback(imf); 2280 if (is_new) 2281 imf_purge(imf); 2282 else 2283 imf_reap(imf); 2284 } else { 2285 imf_commit(imf); 2286 } 2287 2288 out_imo_free: 2289 if (error && is_new) { 2290 imo->imo_membership[idx] = NULL; 2291 --imo->imo_num_memberships; 2292 } 2293 2294 out_inp_locked: 2295 INP_WUNLOCK(inp); 2296 return (error); 2297 } 2298 2299 /* 2300 * Leave an IPv4 multicast group on an inpcb, possibly with a source. 2301 */ 2302 static int 2303 inp_leave_group(struct inpcb *inp, struct sockopt *sopt) 2304 { 2305 struct group_source_req gsr; 2306 struct ip_mreq_source mreqs; 2307 sockunion_t *gsa, *ssa; 2308 struct ifnet *ifp; 2309 struct in_mfilter *imf; 2310 struct ip_moptions *imo; 2311 struct in_msource *ims; 2312 struct in_multi *inm; 2313 size_t idx; 2314 int error, is_final; 2315 2316 ifp = NULL; 2317 error = 0; 2318 is_final = 1; 2319 2320 memset(&gsr, 0, sizeof(struct group_source_req)); 2321 gsa = (sockunion_t *)&gsr.gsr_group; 2322 gsa->ss.ss_family = AF_UNSPEC; 2323 ssa = (sockunion_t *)&gsr.gsr_source; 2324 ssa->ss.ss_family = AF_UNSPEC; 2325 2326 switch (sopt->sopt_name) { 2327 case IP_DROP_MEMBERSHIP: 2328 case IP_DROP_SOURCE_MEMBERSHIP: 2329 if (sopt->sopt_name == IP_DROP_MEMBERSHIP) { 2330 error = sooptcopyin(sopt, &mreqs, 2331 sizeof(struct ip_mreq), 2332 sizeof(struct ip_mreq)); 2333 /* 2334 * Swap interface and sourceaddr arguments, 2335 * as ip_mreq and ip_mreq_source are laid 2336 * out differently. 2337 */ 2338 mreqs.imr_interface = mreqs.imr_sourceaddr; 2339 mreqs.imr_sourceaddr.s_addr = INADDR_ANY; 2340 } else if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) { 2341 error = sooptcopyin(sopt, &mreqs, 2342 sizeof(struct ip_mreq_source), 2343 sizeof(struct ip_mreq_source)); 2344 } 2345 if (error) 2346 return (error); 2347 2348 gsa->sin.sin_family = AF_INET; 2349 gsa->sin.sin_len = sizeof(struct sockaddr_in); 2350 gsa->sin.sin_addr = mreqs.imr_multiaddr; 2351 2352 if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) { 2353 ssa->sin.sin_family = AF_INET; 2354 ssa->sin.sin_len = sizeof(struct sockaddr_in); 2355 ssa->sin.sin_addr = mreqs.imr_sourceaddr; 2356 } 2357 2358 /* 2359 * Attempt to look up hinted ifp from interface address. 2360 * Fallthrough with null ifp iff lookup fails, to 2361 * preserve 4.4BSD mcast API idempotence. 2362 * XXX NOTE WELL: The RFC 3678 API is preferred because 2363 * using an IPv4 address as a key is racy. 2364 */ 2365 if (!in_nullhost(mreqs.imr_interface)) 2366 INADDR_TO_IFP(mreqs.imr_interface, ifp); 2367 2368 CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p", 2369 __func__, ntohl(mreqs.imr_interface.s_addr), ifp); 2370 2371 break; 2372 2373 case MCAST_LEAVE_GROUP: 2374 case MCAST_LEAVE_SOURCE_GROUP: 2375 if (sopt->sopt_name == MCAST_LEAVE_GROUP) { 2376 error = sooptcopyin(sopt, &gsr, 2377 sizeof(struct group_req), 2378 sizeof(struct group_req)); 2379 } else if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) { 2380 error = sooptcopyin(sopt, &gsr, 2381 sizeof(struct group_source_req), 2382 sizeof(struct group_source_req)); 2383 } 2384 if (error) 2385 return (error); 2386 2387 if (gsa->sin.sin_family != AF_INET || 2388 gsa->sin.sin_len != sizeof(struct sockaddr_in)) 2389 return (EINVAL); 2390 2391 if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) { 2392 if (ssa->sin.sin_family != AF_INET || 2393 ssa->sin.sin_len != sizeof(struct sockaddr_in)) 2394 return (EINVAL); 2395 } 2396 2397 if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface) 2398 return (EADDRNOTAVAIL); 2399 2400 ifp = ifnet_byindex(gsr.gsr_interface); 2401 2402 if (ifp == NULL) 2403 return (EADDRNOTAVAIL); 2404 break; 2405 2406 default: 2407 CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d", 2408 __func__, sopt->sopt_name); 2409 return (EOPNOTSUPP); 2410 break; 2411 } 2412 2413 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2414 return (EINVAL); 2415 2416 /* 2417 * Find the membership in the membership array. 2418 */ 2419 imo = inp_findmoptions(inp); 2420 idx = imo_match_group(imo, ifp, &gsa->sa); 2421 if (idx == -1) { 2422 error = EADDRNOTAVAIL; 2423 goto out_inp_locked; 2424 } 2425 inm = imo->imo_membership[idx]; 2426 imf = &imo->imo_mfilters[idx]; 2427 2428 if (ssa->ss.ss_family != AF_UNSPEC) 2429 is_final = 0; 2430 2431 /* 2432 * Begin state merge transaction at socket layer. 2433 */ 2434 INP_WLOCK_ASSERT(inp); 2435 2436 /* 2437 * If we were instructed only to leave a given source, do so. 2438 * MCAST_LEAVE_SOURCE_GROUP is only valid for inclusive memberships. 2439 */ 2440 if (is_final) { 2441 imf_leave(imf); 2442 } else { 2443 if (imf->imf_st[0] == MCAST_EXCLUDE) { 2444 error = EADDRNOTAVAIL; 2445 goto out_inp_locked; 2446 } 2447 ims = imo_match_source(imo, idx, &ssa->sa); 2448 if (ims == NULL) { 2449 CTR3(KTR_IGMPV3, "%s: source 0x%08x %spresent", 2450 __func__, ntohl(ssa->sin.sin_addr.s_addr), "not "); 2451 error = EADDRNOTAVAIL; 2452 goto out_inp_locked; 2453 } 2454 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block"); 2455 error = imf_prune(imf, &ssa->sin); 2456 if (error) { 2457 CTR1(KTR_IGMPV3, "%s: merge imf state failed", 2458 __func__); 2459 goto out_inp_locked; 2460 } 2461 } 2462 2463 /* 2464 * Begin state merge transaction at IGMP layer. 2465 */ 2466 in_pcbref(inp); 2467 INP_WUNLOCK(inp); 2468 IN_MULTI_LOCK(); 2469 2470 if (is_final) { 2471 /* 2472 * Give up the multicast address record to which 2473 * the membership points. 2474 */ 2475 (void)in_leavegroup_locked(inm, imf); 2476 } else { 2477 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 2478 IN_MULTI_LIST_LOCK(); 2479 error = inm_merge(inm, imf); 2480 if (error) { 2481 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", 2482 __func__); 2483 goto out_in_multi_locked; 2484 } 2485 2486 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 2487 error = igmp_change_state(inm); 2488 IN_MULTI_LIST_UNLOCK(); 2489 if (error) { 2490 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", 2491 __func__); 2492 } 2493 } 2494 2495 out_in_multi_locked: 2496 2497 IN_MULTI_UNLOCK(); 2498 INP_WLOCK(inp); 2499 if (in_pcbrele_wlocked(inp)) 2500 return (ENXIO); 2501 2502 if (error) 2503 imf_rollback(imf); 2504 else 2505 imf_commit(imf); 2506 2507 imf_reap(imf); 2508 2509 if (is_final) { 2510 /* Remove the gap in the membership and filter array. */ 2511 for (++idx; idx < imo->imo_num_memberships; ++idx) { 2512 imo->imo_membership[idx-1] = imo->imo_membership[idx]; 2513 imo->imo_mfilters[idx-1] = imo->imo_mfilters[idx]; 2514 } 2515 imo->imo_num_memberships--; 2516 } 2517 2518 out_inp_locked: 2519 INP_WUNLOCK(inp); 2520 return (error); 2521 } 2522 2523 /* 2524 * Select the interface for transmitting IPv4 multicast datagrams. 2525 * 2526 * Either an instance of struct in_addr or an instance of struct ip_mreqn 2527 * may be passed to this socket option. An address of INADDR_ANY or an 2528 * interface index of 0 is used to remove a previous selection. 2529 * When no interface is selected, one is chosen for every send. 2530 */ 2531 static int 2532 inp_set_multicast_if(struct inpcb *inp, struct sockopt *sopt) 2533 { 2534 struct in_addr addr; 2535 struct ip_mreqn mreqn; 2536 struct ifnet *ifp; 2537 struct ip_moptions *imo; 2538 int error; 2539 2540 if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) { 2541 /* 2542 * An interface index was specified using the 2543 * Linux-derived ip_mreqn structure. 2544 */ 2545 error = sooptcopyin(sopt, &mreqn, sizeof(struct ip_mreqn), 2546 sizeof(struct ip_mreqn)); 2547 if (error) 2548 return (error); 2549 2550 if (mreqn.imr_ifindex < 0 || V_if_index < mreqn.imr_ifindex) 2551 return (EINVAL); 2552 2553 if (mreqn.imr_ifindex == 0) { 2554 ifp = NULL; 2555 } else { 2556 ifp = ifnet_byindex(mreqn.imr_ifindex); 2557 if (ifp == NULL) 2558 return (EADDRNOTAVAIL); 2559 } 2560 } else { 2561 /* 2562 * An interface was specified by IPv4 address. 2563 * This is the traditional BSD usage. 2564 */ 2565 error = sooptcopyin(sopt, &addr, sizeof(struct in_addr), 2566 sizeof(struct in_addr)); 2567 if (error) 2568 return (error); 2569 if (in_nullhost(addr)) { 2570 ifp = NULL; 2571 } else { 2572 INADDR_TO_IFP(addr, ifp); 2573 if (ifp == NULL) 2574 return (EADDRNOTAVAIL); 2575 } 2576 CTR3(KTR_IGMPV3, "%s: ifp = %p, addr = 0x%08x", __func__, ifp, 2577 ntohl(addr.s_addr)); 2578 } 2579 2580 /* Reject interfaces which do not support multicast. */ 2581 if (ifp != NULL && (ifp->if_flags & IFF_MULTICAST) == 0) 2582 return (EOPNOTSUPP); 2583 2584 imo = inp_findmoptions(inp); 2585 imo->imo_multicast_ifp = ifp; 2586 imo->imo_multicast_addr.s_addr = INADDR_ANY; 2587 INP_WUNLOCK(inp); 2588 2589 return (0); 2590 } 2591 2592 /* 2593 * Atomically set source filters on a socket for an IPv4 multicast group. 2594 * 2595 * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held. 2596 */ 2597 static int 2598 inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt) 2599 { 2600 struct __msfilterreq msfr; 2601 sockunion_t *gsa; 2602 struct ifnet *ifp; 2603 struct in_mfilter *imf; 2604 struct ip_moptions *imo; 2605 struct in_multi *inm; 2606 size_t idx; 2607 int error; 2608 2609 error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq), 2610 sizeof(struct __msfilterreq)); 2611 if (error) 2612 return (error); 2613 2614 if (msfr.msfr_nsrcs > in_mcast_maxsocksrc) 2615 return (ENOBUFS); 2616 2617 if ((msfr.msfr_fmode != MCAST_EXCLUDE && 2618 msfr.msfr_fmode != MCAST_INCLUDE)) 2619 return (EINVAL); 2620 2621 if (msfr.msfr_group.ss_family != AF_INET || 2622 msfr.msfr_group.ss_len != sizeof(struct sockaddr_in)) 2623 return (EINVAL); 2624 2625 gsa = (sockunion_t *)&msfr.msfr_group; 2626 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2627 return (EINVAL); 2628 2629 gsa->sin.sin_port = 0; /* ignore port */ 2630 2631 if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex) 2632 return (EADDRNOTAVAIL); 2633 2634 ifp = ifnet_byindex(msfr.msfr_ifindex); 2635 if (ifp == NULL) 2636 return (EADDRNOTAVAIL); 2637 2638 /* 2639 * Take the INP write lock. 2640 * Check if this socket is a member of this group. 2641 */ 2642 imo = inp_findmoptions(inp); 2643 idx = imo_match_group(imo, ifp, &gsa->sa); 2644 if (idx == -1 || imo->imo_mfilters == NULL) { 2645 error = EADDRNOTAVAIL; 2646 goto out_inp_locked; 2647 } 2648 inm = imo->imo_membership[idx]; 2649 imf = &imo->imo_mfilters[idx]; 2650 2651 /* 2652 * Begin state merge transaction at socket layer. 2653 */ 2654 INP_WLOCK_ASSERT(inp); 2655 2656 imf->imf_st[1] = msfr.msfr_fmode; 2657 2658 /* 2659 * Apply any new source filters, if present. 2660 * Make a copy of the user-space source vector so 2661 * that we may copy them with a single copyin. This 2662 * allows us to deal with page faults up-front. 2663 */ 2664 if (msfr.msfr_nsrcs > 0) { 2665 struct in_msource *lims; 2666 struct sockaddr_in *psin; 2667 struct sockaddr_storage *kss, *pkss; 2668 int i; 2669 2670 INP_WUNLOCK(inp); 2671 2672 CTR2(KTR_IGMPV3, "%s: loading %lu source list entries", 2673 __func__, (unsigned long)msfr.msfr_nsrcs); 2674 kss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs, 2675 M_TEMP, M_WAITOK); 2676 error = copyin(msfr.msfr_srcs, kss, 2677 sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs); 2678 if (error) { 2679 free(kss, M_TEMP); 2680 return (error); 2681 } 2682 2683 INP_WLOCK(inp); 2684 2685 /* 2686 * Mark all source filters as UNDEFINED at t1. 2687 * Restore new group filter mode, as imf_leave() 2688 * will set it to INCLUDE. 2689 */ 2690 imf_leave(imf); 2691 imf->imf_st[1] = msfr.msfr_fmode; 2692 2693 /* 2694 * Update socket layer filters at t1, lazy-allocating 2695 * new entries. This saves a bunch of memory at the 2696 * cost of one RB_FIND() per source entry; duplicate 2697 * entries in the msfr_nsrcs vector are ignored. 2698 * If we encounter an error, rollback transaction. 2699 * 2700 * XXX This too could be replaced with a set-symmetric 2701 * difference like loop to avoid walking from root 2702 * every time, as the key space is common. 2703 */ 2704 for (i = 0, pkss = kss; i < msfr.msfr_nsrcs; i++, pkss++) { 2705 psin = (struct sockaddr_in *)pkss; 2706 if (psin->sin_family != AF_INET) { 2707 error = EAFNOSUPPORT; 2708 break; 2709 } 2710 if (psin->sin_len != sizeof(struct sockaddr_in)) { 2711 error = EINVAL; 2712 break; 2713 } 2714 error = imf_get_source(imf, psin, &lims); 2715 if (error) 2716 break; 2717 lims->imsl_st[1] = imf->imf_st[1]; 2718 } 2719 free(kss, M_TEMP); 2720 } 2721 2722 if (error) 2723 goto out_imf_rollback; 2724 2725 INP_WLOCK_ASSERT(inp); 2726 IN_MULTI_LOCK(); 2727 IN_MULTI_LIST_LOCK(); 2728 2729 /* 2730 * Begin state merge transaction at IGMP layer. 2731 */ 2732 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 2733 error = inm_merge(inm, imf); 2734 if (error) { 2735 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__); 2736 IN_MULTI_LIST_UNLOCK(); 2737 goto out_in_multi_locked; 2738 } 2739 2740 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 2741 error = igmp_change_state(inm); 2742 IN_MULTI_LIST_UNLOCK(); 2743 if (error) 2744 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__); 2745 2746 out_in_multi_locked: 2747 2748 IN_MULTI_UNLOCK(); 2749 2750 out_imf_rollback: 2751 if (error) 2752 imf_rollback(imf); 2753 else 2754 imf_commit(imf); 2755 2756 imf_reap(imf); 2757 2758 out_inp_locked: 2759 INP_WUNLOCK(inp); 2760 return (error); 2761 } 2762 2763 /* 2764 * Set the IP multicast options in response to user setsockopt(). 2765 * 2766 * Many of the socket options handled in this function duplicate the 2767 * functionality of socket options in the regular unicast API. However, 2768 * it is not possible to merge the duplicate code, because the idempotence 2769 * of the IPv4 multicast part of the BSD Sockets API must be preserved; 2770 * the effects of these options must be treated as separate and distinct. 2771 * 2772 * SMPng: XXX: Unlocked read of inp_socket believed OK. 2773 * FUTURE: The IP_MULTICAST_VIF option may be eliminated if MROUTING 2774 * is refactored to no longer use vifs. 2775 */ 2776 int 2777 inp_setmoptions(struct inpcb *inp, struct sockopt *sopt) 2778 { 2779 struct ip_moptions *imo; 2780 int error; 2781 2782 error = 0; 2783 2784 /* 2785 * If socket is neither of type SOCK_RAW or SOCK_DGRAM, 2786 * or is a divert socket, reject it. 2787 */ 2788 if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT || 2789 (inp->inp_socket->so_proto->pr_type != SOCK_RAW && 2790 inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) 2791 return (EOPNOTSUPP); 2792 2793 switch (sopt->sopt_name) { 2794 case IP_MULTICAST_VIF: { 2795 int vifi; 2796 /* 2797 * Select a multicast VIF for transmission. 2798 * Only useful if multicast forwarding is active. 2799 */ 2800 if (legal_vif_num == NULL) { 2801 error = EOPNOTSUPP; 2802 break; 2803 } 2804 error = sooptcopyin(sopt, &vifi, sizeof(int), sizeof(int)); 2805 if (error) 2806 break; 2807 if (!legal_vif_num(vifi) && (vifi != -1)) { 2808 error = EINVAL; 2809 break; 2810 } 2811 imo = inp_findmoptions(inp); 2812 imo->imo_multicast_vif = vifi; 2813 INP_WUNLOCK(inp); 2814 break; 2815 } 2816 2817 case IP_MULTICAST_IF: 2818 error = inp_set_multicast_if(inp, sopt); 2819 break; 2820 2821 case IP_MULTICAST_TTL: { 2822 u_char ttl; 2823 2824 /* 2825 * Set the IP time-to-live for outgoing multicast packets. 2826 * The original multicast API required a char argument, 2827 * which is inconsistent with the rest of the socket API. 2828 * We allow either a char or an int. 2829 */ 2830 if (sopt->sopt_valsize == sizeof(u_char)) { 2831 error = sooptcopyin(sopt, &ttl, sizeof(u_char), 2832 sizeof(u_char)); 2833 if (error) 2834 break; 2835 } else { 2836 u_int ittl; 2837 2838 error = sooptcopyin(sopt, &ittl, sizeof(u_int), 2839 sizeof(u_int)); 2840 if (error) 2841 break; 2842 if (ittl > 255) { 2843 error = EINVAL; 2844 break; 2845 } 2846 ttl = (u_char)ittl; 2847 } 2848 imo = inp_findmoptions(inp); 2849 imo->imo_multicast_ttl = ttl; 2850 INP_WUNLOCK(inp); 2851 break; 2852 } 2853 2854 case IP_MULTICAST_LOOP: { 2855 u_char loop; 2856 2857 /* 2858 * Set the loopback flag for outgoing multicast packets. 2859 * Must be zero or one. The original multicast API required a 2860 * char argument, which is inconsistent with the rest 2861 * of the socket API. We allow either a char or an int. 2862 */ 2863 if (sopt->sopt_valsize == sizeof(u_char)) { 2864 error = sooptcopyin(sopt, &loop, sizeof(u_char), 2865 sizeof(u_char)); 2866 if (error) 2867 break; 2868 } else { 2869 u_int iloop; 2870 2871 error = sooptcopyin(sopt, &iloop, sizeof(u_int), 2872 sizeof(u_int)); 2873 if (error) 2874 break; 2875 loop = (u_char)iloop; 2876 } 2877 imo = inp_findmoptions(inp); 2878 imo->imo_multicast_loop = !!loop; 2879 INP_WUNLOCK(inp); 2880 break; 2881 } 2882 2883 case IP_ADD_MEMBERSHIP: 2884 case IP_ADD_SOURCE_MEMBERSHIP: 2885 case MCAST_JOIN_GROUP: 2886 case MCAST_JOIN_SOURCE_GROUP: 2887 error = inp_join_group(inp, sopt); 2888 break; 2889 2890 case IP_DROP_MEMBERSHIP: 2891 case IP_DROP_SOURCE_MEMBERSHIP: 2892 case MCAST_LEAVE_GROUP: 2893 case MCAST_LEAVE_SOURCE_GROUP: 2894 error = inp_leave_group(inp, sopt); 2895 break; 2896 2897 case IP_BLOCK_SOURCE: 2898 case IP_UNBLOCK_SOURCE: 2899 case MCAST_BLOCK_SOURCE: 2900 case MCAST_UNBLOCK_SOURCE: 2901 error = inp_block_unblock_source(inp, sopt); 2902 break; 2903 2904 case IP_MSFILTER: 2905 error = inp_set_source_filters(inp, sopt); 2906 break; 2907 2908 default: 2909 error = EOPNOTSUPP; 2910 break; 2911 } 2912 2913 INP_UNLOCK_ASSERT(inp); 2914 2915 return (error); 2916 } 2917 2918 /* 2919 * Expose IGMP's multicast filter mode and source list(s) to userland, 2920 * keyed by (ifindex, group). 2921 * The filter mode is written out as a uint32_t, followed by 2922 * 0..n of struct in_addr. 2923 * For use by ifmcstat(8). 2924 * SMPng: NOTE: unlocked read of ifindex space. 2925 */ 2926 static int 2927 sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS) 2928 { 2929 struct in_addr src, group; 2930 struct ifnet *ifp; 2931 struct ifmultiaddr *ifma; 2932 struct in_multi *inm; 2933 struct ip_msource *ims; 2934 int *name; 2935 int retval; 2936 u_int namelen; 2937 uint32_t fmode, ifindex; 2938 2939 name = (int *)arg1; 2940 namelen = arg2; 2941 2942 if (req->newptr != NULL) 2943 return (EPERM); 2944 2945 if (namelen != 2) 2946 return (EINVAL); 2947 2948 ifindex = name[0]; 2949 if (ifindex <= 0 || ifindex > V_if_index) { 2950 CTR2(KTR_IGMPV3, "%s: ifindex %u out of range", 2951 __func__, ifindex); 2952 return (ENOENT); 2953 } 2954 2955 group.s_addr = name[1]; 2956 if (!IN_MULTICAST(ntohl(group.s_addr))) { 2957 CTR2(KTR_IGMPV3, "%s: group 0x%08x is not multicast", 2958 __func__, ntohl(group.s_addr)); 2959 return (EINVAL); 2960 } 2961 2962 ifp = ifnet_byindex(ifindex); 2963 if (ifp == NULL) { 2964 CTR2(KTR_IGMPV3, "%s: no ifp for ifindex %u", 2965 __func__, ifindex); 2966 return (ENOENT); 2967 } 2968 2969 retval = sysctl_wire_old_buffer(req, 2970 sizeof(uint32_t) + (in_mcast_maxgrpsrc * sizeof(struct in_addr))); 2971 if (retval) 2972 return (retval); 2973 2974 IN_MULTI_LIST_LOCK(); 2975 2976 IF_ADDR_RLOCK(ifp); 2977 CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 2978 if (ifma->ifma_addr->sa_family != AF_INET || 2979 ifma->ifma_protospec == NULL) 2980 continue; 2981 inm = (struct in_multi *)ifma->ifma_protospec; 2982 if (!in_hosteq(inm->inm_addr, group)) 2983 continue; 2984 fmode = inm->inm_st[1].iss_fmode; 2985 retval = SYSCTL_OUT(req, &fmode, sizeof(uint32_t)); 2986 if (retval != 0) 2987 break; 2988 RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) { 2989 CTR2(KTR_IGMPV3, "%s: visit node 0x%08x", __func__, 2990 ims->ims_haddr); 2991 /* 2992 * Only copy-out sources which are in-mode. 2993 */ 2994 if (fmode != ims_get_mode(inm, ims, 1)) { 2995 CTR1(KTR_IGMPV3, "%s: skip non-in-mode", 2996 __func__); 2997 continue; 2998 } 2999 src.s_addr = htonl(ims->ims_haddr); 3000 retval = SYSCTL_OUT(req, &src, sizeof(struct in_addr)); 3001 if (retval != 0) 3002 break; 3003 } 3004 } 3005 IF_ADDR_RUNLOCK(ifp); 3006 3007 IN_MULTI_LIST_UNLOCK(); 3008 3009 return (retval); 3010 } 3011 3012 #if defined(KTR) && (KTR_COMPILE & KTR_IGMPV3) 3013 3014 static const char *inm_modestrs[] = { "un", "in", "ex" }; 3015 3016 static const char * 3017 inm_mode_str(const int mode) 3018 { 3019 3020 if (mode >= MCAST_UNDEFINED && mode <= MCAST_EXCLUDE) 3021 return (inm_modestrs[mode]); 3022 return ("??"); 3023 } 3024 3025 static const char *inm_statestrs[] = { 3026 "not-member", 3027 "silent", 3028 "idle", 3029 "lazy", 3030 "sleeping", 3031 "awakening", 3032 "query-pending", 3033 "sg-query-pending", 3034 "leaving" 3035 }; 3036 3037 static const char * 3038 inm_state_str(const int state) 3039 { 3040 3041 if (state >= IGMP_NOT_MEMBER && state <= IGMP_LEAVING_MEMBER) 3042 return (inm_statestrs[state]); 3043 return ("??"); 3044 } 3045 3046 /* 3047 * Dump an in_multi structure to the console. 3048 */ 3049 void 3050 inm_print(const struct in_multi *inm) 3051 { 3052 int t; 3053 char addrbuf[INET_ADDRSTRLEN]; 3054 3055 if ((ktr_mask & KTR_IGMPV3) == 0) 3056 return; 3057 3058 printf("%s: --- begin inm %p ---\n", __func__, inm); 3059 printf("addr %s ifp %p(%s) ifma %p\n", 3060 inet_ntoa_r(inm->inm_addr, addrbuf), 3061 inm->inm_ifp, 3062 inm->inm_ifp->if_xname, 3063 inm->inm_ifma); 3064 printf("timer %u state %s refcount %u scq.len %u\n", 3065 inm->inm_timer, 3066 inm_state_str(inm->inm_state), 3067 inm->inm_refcount, 3068 inm->inm_scq.mq_len); 3069 printf("igi %p nsrc %lu sctimer %u scrv %u\n", 3070 inm->inm_igi, 3071 inm->inm_nsrc, 3072 inm->inm_sctimer, 3073 inm->inm_scrv); 3074 for (t = 0; t < 2; t++) { 3075 printf("t%d: fmode %s asm %u ex %u in %u rec %u\n", t, 3076 inm_mode_str(inm->inm_st[t].iss_fmode), 3077 inm->inm_st[t].iss_asm, 3078 inm->inm_st[t].iss_ex, 3079 inm->inm_st[t].iss_in, 3080 inm->inm_st[t].iss_rec); 3081 } 3082 printf("%s: --- end inm %p ---\n", __func__, inm); 3083 } 3084 3085 #else /* !KTR || !(KTR_COMPILE & KTR_IGMPV3) */ 3086 3087 void 3088 inm_print(const struct in_multi *inm) 3089 { 3090 3091 } 3092 3093 #endif /* KTR && (KTR_COMPILE & KTR_IGMPV3) */ 3094 3095 RB_GENERATE(ip_msource_tree, ip_msource, ims_link, ip_msource_cmp); 3096