1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2007-2009 Bruce Simpson. 5 * Copyright (c) 2005 Robert N. M. Watson. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. The name of the author may not be used to endorse or promote 17 * products derived from this software without specific prior written 18 * permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 /* 34 * IPv4 multicast socket, group, and socket option processing module. 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/kernel.h> 43 #include <sys/lock.h> 44 #include <sys/malloc.h> 45 #include <sys/mbuf.h> 46 #include <sys/protosw.h> 47 #include <sys/rmlock.h> 48 #include <sys/socket.h> 49 #include <sys/socketvar.h> 50 #include <sys/protosw.h> 51 #include <sys/sysctl.h> 52 #include <sys/ktr.h> 53 #include <sys/taskqueue.h> 54 #include <sys/gtaskqueue.h> 55 #include <sys/tree.h> 56 57 #include <net/if.h> 58 #include <net/if_var.h> 59 #include <net/if_dl.h> 60 #include <net/route.h> 61 #include <net/vnet.h> 62 63 #include <net/ethernet.h> 64 65 #include <netinet/in.h> 66 #include <netinet/in_systm.h> 67 #include <netinet/in_fib.h> 68 #include <netinet/in_pcb.h> 69 #include <netinet/in_var.h> 70 #include <netinet/ip_var.h> 71 #include <netinet/igmp_var.h> 72 73 #ifndef KTR_IGMPV3 74 #define KTR_IGMPV3 KTR_INET 75 #endif 76 77 #ifndef __SOCKUNION_DECLARED 78 union sockunion { 79 struct sockaddr_storage ss; 80 struct sockaddr sa; 81 struct sockaddr_dl sdl; 82 struct sockaddr_in sin; 83 }; 84 typedef union sockunion sockunion_t; 85 #define __SOCKUNION_DECLARED 86 #endif /* __SOCKUNION_DECLARED */ 87 88 static MALLOC_DEFINE(M_INMFILTER, "in_mfilter", 89 "IPv4 multicast PCB-layer source filter"); 90 static MALLOC_DEFINE(M_IPMADDR, "in_multi", "IPv4 multicast group"); 91 static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "IPv4 multicast options"); 92 static MALLOC_DEFINE(M_IPMSOURCE, "ip_msource", 93 "IPv4 multicast IGMP-layer source filter"); 94 95 /* 96 * Locking: 97 * - Lock order is: Giant, INP_WLOCK, IN_MULTI_LIST_LOCK, IGMP_LOCK, IF_ADDR_LOCK. 98 * - The IF_ADDR_LOCK is implicitly taken by inm_lookup() earlier, however 99 * it can be taken by code in net/if.c also. 100 * - ip_moptions and in_mfilter are covered by the INP_WLOCK. 101 * 102 * struct in_multi is covered by IN_MULTI_LIST_LOCK. There isn't strictly 103 * any need for in_multi itself to be virtualized -- it is bound to an ifp 104 * anyway no matter what happens. 105 */ 106 struct mtx in_multi_list_mtx; 107 MTX_SYSINIT(in_multi_mtx, &in_multi_list_mtx, "in_multi_list_mtx", MTX_DEF); 108 109 struct mtx in_multi_free_mtx; 110 MTX_SYSINIT(in_multi_free_mtx, &in_multi_free_mtx, "in_multi_free_mtx", MTX_DEF); 111 112 struct sx in_multi_sx; 113 SX_SYSINIT(in_multi_sx, &in_multi_sx, "in_multi_sx"); 114 115 int ifma_restart; 116 117 /* 118 * Functions with non-static linkage defined in this file should be 119 * declared in in_var.h: 120 * imo_multi_filter() 121 * in_addmulti() 122 * in_delmulti() 123 * in_joingroup() 124 * in_joingroup_locked() 125 * in_leavegroup() 126 * in_leavegroup_locked() 127 * and ip_var.h: 128 * inp_freemoptions() 129 * inp_getmoptions() 130 * inp_setmoptions() 131 * 132 * XXX: Both carp and pf need to use the legacy (*,G) KPIs in_addmulti() 133 * and in_delmulti(). 134 */ 135 static void imf_commit(struct in_mfilter *); 136 static int imf_get_source(struct in_mfilter *imf, 137 const struct sockaddr_in *psin, 138 struct in_msource **); 139 static struct in_msource * 140 imf_graft(struct in_mfilter *, const uint8_t, 141 const struct sockaddr_in *); 142 static void imf_leave(struct in_mfilter *); 143 static int imf_prune(struct in_mfilter *, const struct sockaddr_in *); 144 static void imf_purge(struct in_mfilter *); 145 static void imf_rollback(struct in_mfilter *); 146 static void imf_reap(struct in_mfilter *); 147 static int imo_grow(struct ip_moptions *); 148 static size_t imo_match_group(const struct ip_moptions *, 149 const struct ifnet *, const struct sockaddr *); 150 static struct in_msource * 151 imo_match_source(const struct ip_moptions *, const size_t, 152 const struct sockaddr *); 153 static void ims_merge(struct ip_msource *ims, 154 const struct in_msource *lims, const int rollback); 155 static int in_getmulti(struct ifnet *, const struct in_addr *, 156 struct in_multi **); 157 static int inm_get_source(struct in_multi *inm, const in_addr_t haddr, 158 const int noalloc, struct ip_msource **pims); 159 #ifdef KTR 160 static int inm_is_ifp_detached(const struct in_multi *); 161 #endif 162 static int inm_merge(struct in_multi *, /*const*/ struct in_mfilter *); 163 static void inm_purge(struct in_multi *); 164 static void inm_reap(struct in_multi *); 165 static void inm_release(struct in_multi *); 166 static struct ip_moptions * 167 inp_findmoptions(struct inpcb *); 168 static int inp_get_source_filters(struct inpcb *, struct sockopt *); 169 static int inp_join_group(struct inpcb *, struct sockopt *); 170 static int inp_leave_group(struct inpcb *, struct sockopt *); 171 static struct ifnet * 172 inp_lookup_mcast_ifp(const struct inpcb *, 173 const struct sockaddr_in *, const struct in_addr); 174 static int inp_block_unblock_source(struct inpcb *, struct sockopt *); 175 static int inp_set_multicast_if(struct inpcb *, struct sockopt *); 176 static int inp_set_source_filters(struct inpcb *, struct sockopt *); 177 static int sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS); 178 179 static SYSCTL_NODE(_net_inet_ip, OID_AUTO, mcast, CTLFLAG_RW, 0, 180 "IPv4 multicast"); 181 182 static u_long in_mcast_maxgrpsrc = IP_MAX_GROUP_SRC_FILTER; 183 SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxgrpsrc, 184 CTLFLAG_RWTUN, &in_mcast_maxgrpsrc, 0, 185 "Max source filters per group"); 186 187 static u_long in_mcast_maxsocksrc = IP_MAX_SOCK_SRC_FILTER; 188 SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxsocksrc, 189 CTLFLAG_RWTUN, &in_mcast_maxsocksrc, 0, 190 "Max source filters per socket"); 191 192 int in_mcast_loop = IP_DEFAULT_MULTICAST_LOOP; 193 SYSCTL_INT(_net_inet_ip_mcast, OID_AUTO, loop, CTLFLAG_RWTUN, 194 &in_mcast_loop, 0, "Loopback multicast datagrams by default"); 195 196 static SYSCTL_NODE(_net_inet_ip_mcast, OID_AUTO, filters, 197 CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_ip_mcast_filters, 198 "Per-interface stack-wide source filters"); 199 200 #ifdef KTR 201 /* 202 * Inline function which wraps assertions for a valid ifp. 203 * The ifnet layer will set the ifma's ifp pointer to NULL if the ifp 204 * is detached. 205 */ 206 static int __inline 207 inm_is_ifp_detached(const struct in_multi *inm) 208 { 209 struct ifnet *ifp; 210 211 KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__)); 212 ifp = inm->inm_ifma->ifma_ifp; 213 if (ifp != NULL) { 214 /* 215 * Sanity check that netinet's notion of ifp is the 216 * same as net's. 217 */ 218 KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__)); 219 } 220 221 return (ifp == NULL); 222 } 223 #endif 224 225 static struct grouptask free_gtask; 226 static struct in_multi_head inm_free_list; 227 static void inm_release_task(void *arg __unused); 228 static void inm_init(void) 229 { 230 SLIST_INIT(&inm_free_list); 231 taskqgroup_config_gtask_init(NULL, &free_gtask, inm_release_task, "inm release task"); 232 } 233 234 SYSINIT(inm_init, SI_SUB_SMP + 1, SI_ORDER_FIRST, 235 inm_init, NULL); 236 237 238 void 239 inm_release_list_deferred(struct in_multi_head *inmh) 240 { 241 242 if (SLIST_EMPTY(inmh)) 243 return; 244 mtx_lock(&in_multi_free_mtx); 245 SLIST_CONCAT(&inm_free_list, inmh, in_multi, inm_nrele); 246 mtx_unlock(&in_multi_free_mtx); 247 GROUPTASK_ENQUEUE(&free_gtask); 248 } 249 250 void 251 inm_disconnect(struct in_multi *inm) 252 { 253 struct ifnet *ifp; 254 struct ifmultiaddr *ifma, *ll_ifma; 255 256 ifp = inm->inm_ifp; 257 IF_ADDR_WLOCK_ASSERT(ifp); 258 ifma = inm->inm_ifma; 259 260 if_ref(ifp); 261 CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifmultiaddr, ifma_link); 262 MCDPRINTF("removed ifma: %p from %s\n", ifma, ifp->if_xname); 263 if ((ll_ifma = ifma->ifma_llifma) != NULL) { 264 MPASS(ifma != ll_ifma); 265 ifma->ifma_llifma = NULL; 266 MPASS(ll_ifma->ifma_llifma == NULL); 267 MPASS(ll_ifma->ifma_ifp == ifp); 268 if (--ll_ifma->ifma_refcount == 0) { 269 CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma, ifmultiaddr, ifma_link); 270 MCDPRINTF("removed ll_ifma: %p from %s\n", ll_ifma, ifp->if_xname); 271 if_freemulti(ll_ifma); 272 ifma_restart = true; 273 } 274 } 275 } 276 277 void 278 inm_release_deferred(struct in_multi *inm) 279 { 280 struct in_multi_head tmp; 281 282 IN_MULTI_LIST_LOCK_ASSERT(); 283 MPASS(inm->inm_refcount > 0); 284 if (--inm->inm_refcount == 0) { 285 SLIST_INIT(&tmp); 286 inm_disconnect(inm); 287 inm->inm_ifma->ifma_protospec = NULL; 288 SLIST_INSERT_HEAD(&tmp, inm, inm_nrele); 289 inm_release_list_deferred(&tmp); 290 } 291 } 292 293 static void 294 inm_release_task(void *arg __unused) 295 { 296 struct in_multi_head inm_free_tmp; 297 struct in_multi *inm, *tinm; 298 299 SLIST_INIT(&inm_free_tmp); 300 mtx_lock(&in_multi_free_mtx); 301 SLIST_CONCAT(&inm_free_tmp, &inm_free_list, in_multi, inm_nrele); 302 mtx_unlock(&in_multi_free_mtx); 303 IN_MULTI_LOCK(); 304 SLIST_FOREACH_SAFE(inm, &inm_free_tmp, inm_nrele, tinm) { 305 SLIST_REMOVE_HEAD(&inm_free_tmp, inm_nrele); 306 MPASS(inm); 307 inm_release(inm); 308 } 309 IN_MULTI_UNLOCK(); 310 } 311 312 /* 313 * Initialize an in_mfilter structure to a known state at t0, t1 314 * with an empty source filter list. 315 */ 316 static __inline void 317 imf_init(struct in_mfilter *imf, const int st0, const int st1) 318 { 319 memset(imf, 0, sizeof(struct in_mfilter)); 320 RB_INIT(&imf->imf_sources); 321 imf->imf_st[0] = st0; 322 imf->imf_st[1] = st1; 323 } 324 325 /* 326 * Function for looking up an in_multi record for an IPv4 multicast address 327 * on a given interface. ifp must be valid. If no record found, return NULL. 328 * The IN_MULTI_LIST_LOCK and IF_ADDR_LOCK on ifp must be held. 329 */ 330 struct in_multi * 331 inm_lookup_locked(struct ifnet *ifp, const struct in_addr ina) 332 { 333 struct ifmultiaddr *ifma; 334 struct in_multi *inm; 335 336 IN_MULTI_LIST_LOCK_ASSERT(); 337 IF_ADDR_LOCK_ASSERT(ifp); 338 339 inm = NULL; 340 CK_STAILQ_FOREACH(ifma, &((ifp)->if_multiaddrs), ifma_link) { 341 if (ifma->ifma_addr->sa_family != AF_INET || 342 ifma->ifma_protospec == NULL) 343 continue; 344 inm = (struct in_multi *)ifma->ifma_protospec; 345 if (inm->inm_addr.s_addr == ina.s_addr) 346 break; 347 inm = NULL; 348 } 349 return (inm); 350 } 351 352 /* 353 * Wrapper for inm_lookup_locked(). 354 * The IF_ADDR_LOCK will be taken on ifp and released on return. 355 */ 356 struct in_multi * 357 inm_lookup(struct ifnet *ifp, const struct in_addr ina) 358 { 359 struct in_multi *inm; 360 361 IN_MULTI_LIST_LOCK_ASSERT(); 362 IF_ADDR_RLOCK(ifp); 363 inm = inm_lookup_locked(ifp, ina); 364 IF_ADDR_RUNLOCK(ifp); 365 366 return (inm); 367 } 368 369 /* 370 * Resize the ip_moptions vector to the next power-of-two minus 1. 371 * May be called with locks held; do not sleep. 372 */ 373 static int 374 imo_grow(struct ip_moptions *imo) 375 { 376 struct in_multi **nmships; 377 struct in_multi **omships; 378 struct in_mfilter *nmfilters; 379 struct in_mfilter *omfilters; 380 size_t idx; 381 size_t newmax; 382 size_t oldmax; 383 384 nmships = NULL; 385 nmfilters = NULL; 386 omships = imo->imo_membership; 387 omfilters = imo->imo_mfilters; 388 oldmax = imo->imo_max_memberships; 389 newmax = ((oldmax + 1) * 2) - 1; 390 391 if (newmax <= IP_MAX_MEMBERSHIPS) { 392 nmships = (struct in_multi **)realloc(omships, 393 sizeof(struct in_multi *) * newmax, M_IPMOPTS, M_NOWAIT); 394 nmfilters = (struct in_mfilter *)realloc(omfilters, 395 sizeof(struct in_mfilter) * newmax, M_INMFILTER, M_NOWAIT); 396 if (nmships != NULL && nmfilters != NULL) { 397 /* Initialize newly allocated source filter heads. */ 398 for (idx = oldmax; idx < newmax; idx++) { 399 imf_init(&nmfilters[idx], MCAST_UNDEFINED, 400 MCAST_EXCLUDE); 401 } 402 imo->imo_max_memberships = newmax; 403 imo->imo_membership = nmships; 404 imo->imo_mfilters = nmfilters; 405 } 406 } 407 408 if (nmships == NULL || nmfilters == NULL) { 409 if (nmships != NULL) 410 free(nmships, M_IPMOPTS); 411 if (nmfilters != NULL) 412 free(nmfilters, M_INMFILTER); 413 return (ETOOMANYREFS); 414 } 415 416 return (0); 417 } 418 419 /* 420 * Find an IPv4 multicast group entry for this ip_moptions instance 421 * which matches the specified group, and optionally an interface. 422 * Return its index into the array, or -1 if not found. 423 */ 424 static size_t 425 imo_match_group(const struct ip_moptions *imo, const struct ifnet *ifp, 426 const struct sockaddr *group) 427 { 428 const struct sockaddr_in *gsin; 429 struct in_multi **pinm; 430 int idx; 431 int nmships; 432 433 gsin = (const struct sockaddr_in *)group; 434 435 /* The imo_membership array may be lazy allocated. */ 436 if (imo->imo_membership == NULL || imo->imo_num_memberships == 0) 437 return (-1); 438 439 nmships = imo->imo_num_memberships; 440 pinm = &imo->imo_membership[0]; 441 for (idx = 0; idx < nmships; idx++, pinm++) { 442 if (*pinm == NULL) 443 continue; 444 if ((ifp == NULL || ((*pinm)->inm_ifp == ifp)) && 445 in_hosteq((*pinm)->inm_addr, gsin->sin_addr)) { 446 break; 447 } 448 } 449 if (idx >= nmships) 450 idx = -1; 451 452 return (idx); 453 } 454 455 /* 456 * Find an IPv4 multicast source entry for this imo which matches 457 * the given group index for this socket, and source address. 458 * 459 * NOTE: This does not check if the entry is in-mode, merely if 460 * it exists, which may not be the desired behaviour. 461 */ 462 static struct in_msource * 463 imo_match_source(const struct ip_moptions *imo, const size_t gidx, 464 const struct sockaddr *src) 465 { 466 struct ip_msource find; 467 struct in_mfilter *imf; 468 struct ip_msource *ims; 469 const sockunion_t *psa; 470 471 KASSERT(src->sa_family == AF_INET, ("%s: !AF_INET", __func__)); 472 KASSERT(gidx != -1 && gidx < imo->imo_num_memberships, 473 ("%s: invalid index %d\n", __func__, (int)gidx)); 474 475 /* The imo_mfilters array may be lazy allocated. */ 476 if (imo->imo_mfilters == NULL) 477 return (NULL); 478 imf = &imo->imo_mfilters[gidx]; 479 480 /* Source trees are keyed in host byte order. */ 481 psa = (const sockunion_t *)src; 482 find.ims_haddr = ntohl(psa->sin.sin_addr.s_addr); 483 ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find); 484 485 return ((struct in_msource *)ims); 486 } 487 488 /* 489 * Perform filtering for multicast datagrams on a socket by group and source. 490 * 491 * Returns 0 if a datagram should be allowed through, or various error codes 492 * if the socket was not a member of the group, or the source was muted, etc. 493 */ 494 int 495 imo_multi_filter(const struct ip_moptions *imo, const struct ifnet *ifp, 496 const struct sockaddr *group, const struct sockaddr *src) 497 { 498 size_t gidx; 499 struct in_msource *ims; 500 int mode; 501 502 KASSERT(ifp != NULL, ("%s: null ifp", __func__)); 503 504 gidx = imo_match_group(imo, ifp, group); 505 if (gidx == -1) 506 return (MCAST_NOTGMEMBER); 507 508 /* 509 * Check if the source was included in an (S,G) join. 510 * Allow reception on exclusive memberships by default, 511 * reject reception on inclusive memberships by default. 512 * Exclude source only if an in-mode exclude filter exists. 513 * Include source only if an in-mode include filter exists. 514 * NOTE: We are comparing group state here at IGMP t1 (now) 515 * with socket-layer t0 (since last downcall). 516 */ 517 mode = imo->imo_mfilters[gidx].imf_st[1]; 518 ims = imo_match_source(imo, gidx, src); 519 520 if ((ims == NULL && mode == MCAST_INCLUDE) || 521 (ims != NULL && ims->imsl_st[0] != mode)) 522 return (MCAST_NOTSMEMBER); 523 524 return (MCAST_PASS); 525 } 526 527 /* 528 * Find and return a reference to an in_multi record for (ifp, group), 529 * and bump its reference count. 530 * If one does not exist, try to allocate it, and update link-layer multicast 531 * filters on ifp to listen for group. 532 * Assumes the IN_MULTI lock is held across the call. 533 * Return 0 if successful, otherwise return an appropriate error code. 534 */ 535 static int 536 in_getmulti(struct ifnet *ifp, const struct in_addr *group, 537 struct in_multi **pinm) 538 { 539 struct sockaddr_in gsin; 540 struct ifmultiaddr *ifma; 541 struct in_ifinfo *ii; 542 struct in_multi *inm; 543 int error; 544 545 IN_MULTI_LOCK_ASSERT(); 546 547 ii = (struct in_ifinfo *)ifp->if_afdata[AF_INET]; 548 IN_MULTI_LIST_LOCK(); 549 inm = inm_lookup(ifp, *group); 550 if (inm != NULL) { 551 /* 552 * If we already joined this group, just bump the 553 * refcount and return it. 554 */ 555 KASSERT(inm->inm_refcount >= 1, 556 ("%s: bad refcount %d", __func__, inm->inm_refcount)); 557 inm_acquire_locked(inm); 558 *pinm = inm; 559 } 560 IN_MULTI_LIST_UNLOCK(); 561 if (inm != NULL) 562 return (0); 563 564 memset(&gsin, 0, sizeof(gsin)); 565 gsin.sin_family = AF_INET; 566 gsin.sin_len = sizeof(struct sockaddr_in); 567 gsin.sin_addr = *group; 568 569 /* 570 * Check if a link-layer group is already associated 571 * with this network-layer group on the given ifnet. 572 */ 573 error = if_addmulti(ifp, (struct sockaddr *)&gsin, &ifma); 574 if (error != 0) 575 return (error); 576 577 /* XXX ifma_protospec must be covered by IF_ADDR_LOCK */ 578 IN_MULTI_LIST_LOCK(); 579 IF_ADDR_WLOCK(ifp); 580 581 /* 582 * If something other than netinet is occupying the link-layer 583 * group, print a meaningful error message and back out of 584 * the allocation. 585 * Otherwise, bump the refcount on the existing network-layer 586 * group association and return it. 587 */ 588 if (ifma->ifma_protospec != NULL) { 589 inm = (struct in_multi *)ifma->ifma_protospec; 590 #ifdef INVARIANTS 591 KASSERT(ifma->ifma_addr != NULL, ("%s: no ifma_addr", 592 __func__)); 593 KASSERT(ifma->ifma_addr->sa_family == AF_INET, 594 ("%s: ifma not AF_INET", __func__)); 595 KASSERT(inm != NULL, ("%s: no ifma_protospec", __func__)); 596 if (inm->inm_ifma != ifma || inm->inm_ifp != ifp || 597 !in_hosteq(inm->inm_addr, *group)) { 598 char addrbuf[INET_ADDRSTRLEN]; 599 600 panic("%s: ifma %p is inconsistent with %p (%s)", 601 __func__, ifma, inm, inet_ntoa_r(*group, addrbuf)); 602 } 603 #endif 604 inm_acquire_locked(inm); 605 *pinm = inm; 606 goto out_locked; 607 } 608 609 IF_ADDR_WLOCK_ASSERT(ifp); 610 611 /* 612 * A new in_multi record is needed; allocate and initialize it. 613 * We DO NOT perform an IGMP join as the in_ layer may need to 614 * push an initial source list down to IGMP to support SSM. 615 * 616 * The initial source filter state is INCLUDE, {} as per the RFC. 617 */ 618 inm = malloc(sizeof(*inm), M_IPMADDR, M_NOWAIT | M_ZERO); 619 if (inm == NULL) { 620 IF_ADDR_WUNLOCK(ifp); 621 IN_MULTI_LIST_UNLOCK(); 622 if_delmulti_ifma(ifma); 623 return (ENOMEM); 624 } 625 inm->inm_addr = *group; 626 inm->inm_ifp = ifp; 627 inm->inm_igi = ii->ii_igmp; 628 inm->inm_ifma = ifma; 629 inm->inm_refcount = 1; 630 inm->inm_state = IGMP_NOT_MEMBER; 631 mbufq_init(&inm->inm_scq, IGMP_MAX_STATE_CHANGES); 632 inm->inm_st[0].iss_fmode = MCAST_UNDEFINED; 633 inm->inm_st[1].iss_fmode = MCAST_UNDEFINED; 634 RB_INIT(&inm->inm_srcs); 635 636 ifma->ifma_protospec = inm; 637 638 *pinm = inm; 639 out_locked: 640 IF_ADDR_WUNLOCK(ifp); 641 IN_MULTI_LIST_UNLOCK(); 642 return (0); 643 } 644 645 /* 646 * Drop a reference to an in_multi record. 647 * 648 * If the refcount drops to 0, free the in_multi record and 649 * delete the underlying link-layer membership. 650 */ 651 static void 652 inm_release(struct in_multi *inm) 653 { 654 struct ifmultiaddr *ifma; 655 struct ifnet *ifp; 656 657 CTR2(KTR_IGMPV3, "%s: refcount is %d", __func__, inm->inm_refcount); 658 MPASS(inm->inm_refcount == 0); 659 CTR2(KTR_IGMPV3, "%s: freeing inm %p", __func__, inm); 660 661 ifma = inm->inm_ifma; 662 ifp = inm->inm_ifp; 663 664 /* XXX this access is not covered by IF_ADDR_LOCK */ 665 CTR2(KTR_IGMPV3, "%s: purging ifma %p", __func__, ifma); 666 if (ifp != NULL) { 667 CURVNET_SET(ifp->if_vnet); 668 inm_purge(inm); 669 free(inm, M_IPMADDR); 670 if_delmulti_ifma_flags(ifma, 1); 671 CURVNET_RESTORE(); 672 if_rele(ifp); 673 } else { 674 inm_purge(inm); 675 free(inm, M_IPMADDR); 676 if_delmulti_ifma_flags(ifma, 1); 677 } 678 } 679 680 /* 681 * Clear recorded source entries for a group. 682 * Used by the IGMP code. Caller must hold the IN_MULTI lock. 683 * FIXME: Should reap. 684 */ 685 void 686 inm_clear_recorded(struct in_multi *inm) 687 { 688 struct ip_msource *ims; 689 690 IN_MULTI_LIST_LOCK_ASSERT(); 691 692 RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) { 693 if (ims->ims_stp) { 694 ims->ims_stp = 0; 695 --inm->inm_st[1].iss_rec; 696 } 697 } 698 KASSERT(inm->inm_st[1].iss_rec == 0, 699 ("%s: iss_rec %d not 0", __func__, inm->inm_st[1].iss_rec)); 700 } 701 702 /* 703 * Record a source as pending for a Source-Group IGMPv3 query. 704 * This lives here as it modifies the shared tree. 705 * 706 * inm is the group descriptor. 707 * naddr is the address of the source to record in network-byte order. 708 * 709 * If the net.inet.igmp.sgalloc sysctl is non-zero, we will 710 * lazy-allocate a source node in response to an SG query. 711 * Otherwise, no allocation is performed. This saves some memory 712 * with the trade-off that the source will not be reported to the 713 * router if joined in the window between the query response and 714 * the group actually being joined on the local host. 715 * 716 * VIMAGE: XXX: Currently the igmp_sgalloc feature has been removed. 717 * This turns off the allocation of a recorded source entry if 718 * the group has not been joined. 719 * 720 * Return 0 if the source didn't exist or was already marked as recorded. 721 * Return 1 if the source was marked as recorded by this function. 722 * Return <0 if any error occurred (negated errno code). 723 */ 724 int 725 inm_record_source(struct in_multi *inm, const in_addr_t naddr) 726 { 727 struct ip_msource find; 728 struct ip_msource *ims, *nims; 729 730 IN_MULTI_LIST_LOCK_ASSERT(); 731 732 find.ims_haddr = ntohl(naddr); 733 ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find); 734 if (ims && ims->ims_stp) 735 return (0); 736 if (ims == NULL) { 737 if (inm->inm_nsrc == in_mcast_maxgrpsrc) 738 return (-ENOSPC); 739 nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE, 740 M_NOWAIT | M_ZERO); 741 if (nims == NULL) 742 return (-ENOMEM); 743 nims->ims_haddr = find.ims_haddr; 744 RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims); 745 ++inm->inm_nsrc; 746 ims = nims; 747 } 748 749 /* 750 * Mark the source as recorded and update the recorded 751 * source count. 752 */ 753 ++ims->ims_stp; 754 ++inm->inm_st[1].iss_rec; 755 756 return (1); 757 } 758 759 /* 760 * Return a pointer to an in_msource owned by an in_mfilter, 761 * given its source address. 762 * Lazy-allocate if needed. If this is a new entry its filter state is 763 * undefined at t0. 764 * 765 * imf is the filter set being modified. 766 * haddr is the source address in *host* byte-order. 767 * 768 * SMPng: May be called with locks held; malloc must not block. 769 */ 770 static int 771 imf_get_source(struct in_mfilter *imf, const struct sockaddr_in *psin, 772 struct in_msource **plims) 773 { 774 struct ip_msource find; 775 struct ip_msource *ims, *nims; 776 struct in_msource *lims; 777 int error; 778 779 error = 0; 780 ims = NULL; 781 lims = NULL; 782 783 /* key is host byte order */ 784 find.ims_haddr = ntohl(psin->sin_addr.s_addr); 785 ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find); 786 lims = (struct in_msource *)ims; 787 if (lims == NULL) { 788 if (imf->imf_nsrc == in_mcast_maxsocksrc) 789 return (ENOSPC); 790 nims = malloc(sizeof(struct in_msource), M_INMFILTER, 791 M_NOWAIT | M_ZERO); 792 if (nims == NULL) 793 return (ENOMEM); 794 lims = (struct in_msource *)nims; 795 lims->ims_haddr = find.ims_haddr; 796 lims->imsl_st[0] = MCAST_UNDEFINED; 797 RB_INSERT(ip_msource_tree, &imf->imf_sources, nims); 798 ++imf->imf_nsrc; 799 } 800 801 *plims = lims; 802 803 return (error); 804 } 805 806 /* 807 * Graft a source entry into an existing socket-layer filter set, 808 * maintaining any required invariants and checking allocations. 809 * 810 * The source is marked as being in the new filter mode at t1. 811 * 812 * Return the pointer to the new node, otherwise return NULL. 813 */ 814 static struct in_msource * 815 imf_graft(struct in_mfilter *imf, const uint8_t st1, 816 const struct sockaddr_in *psin) 817 { 818 struct ip_msource *nims; 819 struct in_msource *lims; 820 821 nims = malloc(sizeof(struct in_msource), M_INMFILTER, 822 M_NOWAIT | M_ZERO); 823 if (nims == NULL) 824 return (NULL); 825 lims = (struct in_msource *)nims; 826 lims->ims_haddr = ntohl(psin->sin_addr.s_addr); 827 lims->imsl_st[0] = MCAST_UNDEFINED; 828 lims->imsl_st[1] = st1; 829 RB_INSERT(ip_msource_tree, &imf->imf_sources, nims); 830 ++imf->imf_nsrc; 831 832 return (lims); 833 } 834 835 /* 836 * Prune a source entry from an existing socket-layer filter set, 837 * maintaining any required invariants and checking allocations. 838 * 839 * The source is marked as being left at t1, it is not freed. 840 * 841 * Return 0 if no error occurred, otherwise return an errno value. 842 */ 843 static int 844 imf_prune(struct in_mfilter *imf, const struct sockaddr_in *psin) 845 { 846 struct ip_msource find; 847 struct ip_msource *ims; 848 struct in_msource *lims; 849 850 /* key is host byte order */ 851 find.ims_haddr = ntohl(psin->sin_addr.s_addr); 852 ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find); 853 if (ims == NULL) 854 return (ENOENT); 855 lims = (struct in_msource *)ims; 856 lims->imsl_st[1] = MCAST_UNDEFINED; 857 return (0); 858 } 859 860 /* 861 * Revert socket-layer filter set deltas at t1 to t0 state. 862 */ 863 static void 864 imf_rollback(struct in_mfilter *imf) 865 { 866 struct ip_msource *ims, *tims; 867 struct in_msource *lims; 868 869 RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) { 870 lims = (struct in_msource *)ims; 871 if (lims->imsl_st[0] == lims->imsl_st[1]) { 872 /* no change at t1 */ 873 continue; 874 } else if (lims->imsl_st[0] != MCAST_UNDEFINED) { 875 /* revert change to existing source at t1 */ 876 lims->imsl_st[1] = lims->imsl_st[0]; 877 } else { 878 /* revert source added t1 */ 879 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 880 RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims); 881 free(ims, M_INMFILTER); 882 imf->imf_nsrc--; 883 } 884 } 885 imf->imf_st[1] = imf->imf_st[0]; 886 } 887 888 /* 889 * Mark socket-layer filter set as INCLUDE {} at t1. 890 */ 891 static void 892 imf_leave(struct in_mfilter *imf) 893 { 894 struct ip_msource *ims; 895 struct in_msource *lims; 896 897 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 898 lims = (struct in_msource *)ims; 899 lims->imsl_st[1] = MCAST_UNDEFINED; 900 } 901 imf->imf_st[1] = MCAST_INCLUDE; 902 } 903 904 /* 905 * Mark socket-layer filter set deltas as committed. 906 */ 907 static void 908 imf_commit(struct in_mfilter *imf) 909 { 910 struct ip_msource *ims; 911 struct in_msource *lims; 912 913 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 914 lims = (struct in_msource *)ims; 915 lims->imsl_st[0] = lims->imsl_st[1]; 916 } 917 imf->imf_st[0] = imf->imf_st[1]; 918 } 919 920 /* 921 * Reap unreferenced sources from socket-layer filter set. 922 */ 923 static void 924 imf_reap(struct in_mfilter *imf) 925 { 926 struct ip_msource *ims, *tims; 927 struct in_msource *lims; 928 929 RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) { 930 lims = (struct in_msource *)ims; 931 if ((lims->imsl_st[0] == MCAST_UNDEFINED) && 932 (lims->imsl_st[1] == MCAST_UNDEFINED)) { 933 CTR2(KTR_IGMPV3, "%s: free lims %p", __func__, ims); 934 RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims); 935 free(ims, M_INMFILTER); 936 imf->imf_nsrc--; 937 } 938 } 939 } 940 941 /* 942 * Purge socket-layer filter set. 943 */ 944 static void 945 imf_purge(struct in_mfilter *imf) 946 { 947 struct ip_msource *ims, *tims; 948 949 RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) { 950 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 951 RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims); 952 free(ims, M_INMFILTER); 953 imf->imf_nsrc--; 954 } 955 imf->imf_st[0] = imf->imf_st[1] = MCAST_UNDEFINED; 956 KASSERT(RB_EMPTY(&imf->imf_sources), 957 ("%s: imf_sources not empty", __func__)); 958 } 959 960 /* 961 * Look up a source filter entry for a multicast group. 962 * 963 * inm is the group descriptor to work with. 964 * haddr is the host-byte-order IPv4 address to look up. 965 * noalloc may be non-zero to suppress allocation of sources. 966 * *pims will be set to the address of the retrieved or allocated source. 967 * 968 * SMPng: NOTE: may be called with locks held. 969 * Return 0 if successful, otherwise return a non-zero error code. 970 */ 971 static int 972 inm_get_source(struct in_multi *inm, const in_addr_t haddr, 973 const int noalloc, struct ip_msource **pims) 974 { 975 struct ip_msource find; 976 struct ip_msource *ims, *nims; 977 978 find.ims_haddr = haddr; 979 ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find); 980 if (ims == NULL && !noalloc) { 981 if (inm->inm_nsrc == in_mcast_maxgrpsrc) 982 return (ENOSPC); 983 nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE, 984 M_NOWAIT | M_ZERO); 985 if (nims == NULL) 986 return (ENOMEM); 987 nims->ims_haddr = haddr; 988 RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims); 989 ++inm->inm_nsrc; 990 ims = nims; 991 #ifdef KTR 992 CTR3(KTR_IGMPV3, "%s: allocated 0x%08x as %p", __func__, 993 haddr, ims); 994 #endif 995 } 996 997 *pims = ims; 998 return (0); 999 } 1000 1001 /* 1002 * Merge socket-layer source into IGMP-layer source. 1003 * If rollback is non-zero, perform the inverse of the merge. 1004 */ 1005 static void 1006 ims_merge(struct ip_msource *ims, const struct in_msource *lims, 1007 const int rollback) 1008 { 1009 int n = rollback ? -1 : 1; 1010 1011 if (lims->imsl_st[0] == MCAST_EXCLUDE) { 1012 CTR3(KTR_IGMPV3, "%s: t1 ex -= %d on 0x%08x", 1013 __func__, n, ims->ims_haddr); 1014 ims->ims_st[1].ex -= n; 1015 } else if (lims->imsl_st[0] == MCAST_INCLUDE) { 1016 CTR3(KTR_IGMPV3, "%s: t1 in -= %d on 0x%08x", 1017 __func__, n, ims->ims_haddr); 1018 ims->ims_st[1].in -= n; 1019 } 1020 1021 if (lims->imsl_st[1] == MCAST_EXCLUDE) { 1022 CTR3(KTR_IGMPV3, "%s: t1 ex += %d on 0x%08x", 1023 __func__, n, ims->ims_haddr); 1024 ims->ims_st[1].ex += n; 1025 } else if (lims->imsl_st[1] == MCAST_INCLUDE) { 1026 CTR3(KTR_IGMPV3, "%s: t1 in += %d on 0x%08x", 1027 __func__, n, ims->ims_haddr); 1028 ims->ims_st[1].in += n; 1029 } 1030 } 1031 1032 /* 1033 * Atomically update the global in_multi state, when a membership's 1034 * filter list is being updated in any way. 1035 * 1036 * imf is the per-inpcb-membership group filter pointer. 1037 * A fake imf may be passed for in-kernel consumers. 1038 * 1039 * XXX This is a candidate for a set-symmetric-difference style loop 1040 * which would eliminate the repeated lookup from root of ims nodes, 1041 * as they share the same key space. 1042 * 1043 * If any error occurred this function will back out of refcounts 1044 * and return a non-zero value. 1045 */ 1046 static int 1047 inm_merge(struct in_multi *inm, /*const*/ struct in_mfilter *imf) 1048 { 1049 struct ip_msource *ims, *nims; 1050 struct in_msource *lims; 1051 int schanged, error; 1052 int nsrc0, nsrc1; 1053 1054 schanged = 0; 1055 error = 0; 1056 nsrc1 = nsrc0 = 0; 1057 IN_MULTI_LIST_LOCK_ASSERT(); 1058 1059 /* 1060 * Update the source filters first, as this may fail. 1061 * Maintain count of in-mode filters at t0, t1. These are 1062 * used to work out if we transition into ASM mode or not. 1063 * Maintain a count of source filters whose state was 1064 * actually modified by this operation. 1065 */ 1066 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 1067 lims = (struct in_msource *)ims; 1068 if (lims->imsl_st[0] == imf->imf_st[0]) nsrc0++; 1069 if (lims->imsl_st[1] == imf->imf_st[1]) nsrc1++; 1070 if (lims->imsl_st[0] == lims->imsl_st[1]) continue; 1071 error = inm_get_source(inm, lims->ims_haddr, 0, &nims); 1072 ++schanged; 1073 if (error) 1074 break; 1075 ims_merge(nims, lims, 0); 1076 } 1077 if (error) { 1078 struct ip_msource *bims; 1079 1080 RB_FOREACH_REVERSE_FROM(ims, ip_msource_tree, nims) { 1081 lims = (struct in_msource *)ims; 1082 if (lims->imsl_st[0] == lims->imsl_st[1]) 1083 continue; 1084 (void)inm_get_source(inm, lims->ims_haddr, 1, &bims); 1085 if (bims == NULL) 1086 continue; 1087 ims_merge(bims, lims, 1); 1088 } 1089 goto out_reap; 1090 } 1091 1092 CTR3(KTR_IGMPV3, "%s: imf filters in-mode: %d at t0, %d at t1", 1093 __func__, nsrc0, nsrc1); 1094 1095 /* Handle transition between INCLUDE {n} and INCLUDE {} on socket. */ 1096 if (imf->imf_st[0] == imf->imf_st[1] && 1097 imf->imf_st[1] == MCAST_INCLUDE) { 1098 if (nsrc1 == 0) { 1099 CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__); 1100 --inm->inm_st[1].iss_in; 1101 } 1102 } 1103 1104 /* Handle filter mode transition on socket. */ 1105 if (imf->imf_st[0] != imf->imf_st[1]) { 1106 CTR3(KTR_IGMPV3, "%s: imf transition %d to %d", 1107 __func__, imf->imf_st[0], imf->imf_st[1]); 1108 1109 if (imf->imf_st[0] == MCAST_EXCLUDE) { 1110 CTR1(KTR_IGMPV3, "%s: --ex on inm at t1", __func__); 1111 --inm->inm_st[1].iss_ex; 1112 } else if (imf->imf_st[0] == MCAST_INCLUDE) { 1113 CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__); 1114 --inm->inm_st[1].iss_in; 1115 } 1116 1117 if (imf->imf_st[1] == MCAST_EXCLUDE) { 1118 CTR1(KTR_IGMPV3, "%s: ex++ on inm at t1", __func__); 1119 inm->inm_st[1].iss_ex++; 1120 } else if (imf->imf_st[1] == MCAST_INCLUDE && nsrc1 > 0) { 1121 CTR1(KTR_IGMPV3, "%s: in++ on inm at t1", __func__); 1122 inm->inm_st[1].iss_in++; 1123 } 1124 } 1125 1126 /* 1127 * Track inm filter state in terms of listener counts. 1128 * If there are any exclusive listeners, stack-wide 1129 * membership is exclusive. 1130 * Otherwise, if only inclusive listeners, stack-wide is inclusive. 1131 * If no listeners remain, state is undefined at t1, 1132 * and the IGMP lifecycle for this group should finish. 1133 */ 1134 if (inm->inm_st[1].iss_ex > 0) { 1135 CTR1(KTR_IGMPV3, "%s: transition to EX", __func__); 1136 inm->inm_st[1].iss_fmode = MCAST_EXCLUDE; 1137 } else if (inm->inm_st[1].iss_in > 0) { 1138 CTR1(KTR_IGMPV3, "%s: transition to IN", __func__); 1139 inm->inm_st[1].iss_fmode = MCAST_INCLUDE; 1140 } else { 1141 CTR1(KTR_IGMPV3, "%s: transition to UNDEF", __func__); 1142 inm->inm_st[1].iss_fmode = MCAST_UNDEFINED; 1143 } 1144 1145 /* Decrement ASM listener count on transition out of ASM mode. */ 1146 if (imf->imf_st[0] == MCAST_EXCLUDE && nsrc0 == 0) { 1147 if ((imf->imf_st[1] != MCAST_EXCLUDE) || 1148 (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 > 0)) { 1149 CTR1(KTR_IGMPV3, "%s: --asm on inm at t1", __func__); 1150 --inm->inm_st[1].iss_asm; 1151 } 1152 } 1153 1154 /* Increment ASM listener count on transition to ASM mode. */ 1155 if (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 == 0) { 1156 CTR1(KTR_IGMPV3, "%s: asm++ on inm at t1", __func__); 1157 inm->inm_st[1].iss_asm++; 1158 } 1159 1160 CTR3(KTR_IGMPV3, "%s: merged imf %p to inm %p", __func__, imf, inm); 1161 inm_print(inm); 1162 1163 out_reap: 1164 if (schanged > 0) { 1165 CTR1(KTR_IGMPV3, "%s: sources changed; reaping", __func__); 1166 inm_reap(inm); 1167 } 1168 return (error); 1169 } 1170 1171 /* 1172 * Mark an in_multi's filter set deltas as committed. 1173 * Called by IGMP after a state change has been enqueued. 1174 */ 1175 void 1176 inm_commit(struct in_multi *inm) 1177 { 1178 struct ip_msource *ims; 1179 1180 CTR2(KTR_IGMPV3, "%s: commit inm %p", __func__, inm); 1181 CTR1(KTR_IGMPV3, "%s: pre commit:", __func__); 1182 inm_print(inm); 1183 1184 RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) { 1185 ims->ims_st[0] = ims->ims_st[1]; 1186 } 1187 inm->inm_st[0] = inm->inm_st[1]; 1188 } 1189 1190 /* 1191 * Reap unreferenced nodes from an in_multi's filter set. 1192 */ 1193 static void 1194 inm_reap(struct in_multi *inm) 1195 { 1196 struct ip_msource *ims, *tims; 1197 1198 RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) { 1199 if (ims->ims_st[0].ex > 0 || ims->ims_st[0].in > 0 || 1200 ims->ims_st[1].ex > 0 || ims->ims_st[1].in > 0 || 1201 ims->ims_stp != 0) 1202 continue; 1203 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 1204 RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims); 1205 free(ims, M_IPMSOURCE); 1206 inm->inm_nsrc--; 1207 } 1208 } 1209 1210 /* 1211 * Purge all source nodes from an in_multi's filter set. 1212 */ 1213 static void 1214 inm_purge(struct in_multi *inm) 1215 { 1216 struct ip_msource *ims, *tims; 1217 1218 RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) { 1219 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 1220 RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims); 1221 free(ims, M_IPMSOURCE); 1222 inm->inm_nsrc--; 1223 } 1224 } 1225 1226 /* 1227 * Join a multicast group; unlocked entry point. 1228 * 1229 * SMPng: XXX: in_joingroup() is called from in_control() when Giant 1230 * is not held. Fortunately, ifp is unlikely to have been detached 1231 * at this point, so we assume it's OK to recurse. 1232 */ 1233 int 1234 in_joingroup(struct ifnet *ifp, const struct in_addr *gina, 1235 /*const*/ struct in_mfilter *imf, struct in_multi **pinm) 1236 { 1237 int error; 1238 1239 IN_MULTI_LOCK(); 1240 error = in_joingroup_locked(ifp, gina, imf, pinm); 1241 IN_MULTI_UNLOCK(); 1242 1243 return (error); 1244 } 1245 1246 /* 1247 * Join a multicast group; real entry point. 1248 * 1249 * Only preserves atomicity at inm level. 1250 * NOTE: imf argument cannot be const due to sys/tree.h limitations. 1251 * 1252 * If the IGMP downcall fails, the group is not joined, and an error 1253 * code is returned. 1254 */ 1255 int 1256 in_joingroup_locked(struct ifnet *ifp, const struct in_addr *gina, 1257 /*const*/ struct in_mfilter *imf, struct in_multi **pinm) 1258 { 1259 struct in_mfilter timf; 1260 struct in_multi *inm; 1261 int error; 1262 1263 IN_MULTI_LOCK_ASSERT(); 1264 IN_MULTI_LIST_UNLOCK_ASSERT(); 1265 1266 CTR4(KTR_IGMPV3, "%s: join 0x%08x on %p(%s))", __func__, 1267 ntohl(gina->s_addr), ifp, ifp->if_xname); 1268 1269 error = 0; 1270 inm = NULL; 1271 1272 /* 1273 * If no imf was specified (i.e. kernel consumer), 1274 * fake one up and assume it is an ASM join. 1275 */ 1276 if (imf == NULL) { 1277 imf_init(&timf, MCAST_UNDEFINED, MCAST_EXCLUDE); 1278 imf = &timf; 1279 } 1280 1281 error = in_getmulti(ifp, gina, &inm); 1282 if (error) { 1283 CTR1(KTR_IGMPV3, "%s: in_getmulti() failure", __func__); 1284 return (error); 1285 } 1286 IN_MULTI_LIST_LOCK(); 1287 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 1288 error = inm_merge(inm, imf); 1289 if (error) { 1290 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__); 1291 goto out_inm_release; 1292 } 1293 1294 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 1295 error = igmp_change_state(inm); 1296 if (error) { 1297 CTR1(KTR_IGMPV3, "%s: failed to update source", __func__); 1298 goto out_inm_release; 1299 } 1300 1301 out_inm_release: 1302 if (error) { 1303 1304 CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm); 1305 inm_release_deferred(inm); 1306 } else { 1307 *pinm = inm; 1308 } 1309 IN_MULTI_LIST_UNLOCK(); 1310 1311 return (error); 1312 } 1313 1314 /* 1315 * Leave a multicast group; unlocked entry point. 1316 */ 1317 int 1318 in_leavegroup(struct in_multi *inm, /*const*/ struct in_mfilter *imf) 1319 { 1320 int error; 1321 1322 IN_MULTI_LOCK(); 1323 error = in_leavegroup_locked(inm, imf); 1324 IN_MULTI_UNLOCK(); 1325 1326 return (error); 1327 } 1328 1329 /* 1330 * Leave a multicast group; real entry point. 1331 * All source filters will be expunged. 1332 * 1333 * Only preserves atomicity at inm level. 1334 * 1335 * Holding the write lock for the INP which contains imf 1336 * is highly advisable. We can't assert for it as imf does not 1337 * contain a back-pointer to the owning inp. 1338 * 1339 * Note: This is not the same as inm_release(*) as this function also 1340 * makes a state change downcall into IGMP. 1341 */ 1342 int 1343 in_leavegroup_locked(struct in_multi *inm, /*const*/ struct in_mfilter *imf) 1344 { 1345 struct in_mfilter timf; 1346 int error; 1347 1348 error = 0; 1349 1350 IN_MULTI_LOCK_ASSERT(); 1351 IN_MULTI_LIST_UNLOCK_ASSERT(); 1352 1353 CTR5(KTR_IGMPV3, "%s: leave inm %p, 0x%08x/%s, imf %p", __func__, 1354 inm, ntohl(inm->inm_addr.s_addr), 1355 (inm_is_ifp_detached(inm) ? "null" : inm->inm_ifp->if_xname), 1356 imf); 1357 1358 /* 1359 * If no imf was specified (i.e. kernel consumer), 1360 * fake one up and assume it is an ASM join. 1361 */ 1362 if (imf == NULL) { 1363 imf_init(&timf, MCAST_EXCLUDE, MCAST_UNDEFINED); 1364 imf = &timf; 1365 } 1366 1367 /* 1368 * Begin state merge transaction at IGMP layer. 1369 * 1370 * As this particular invocation should not cause any memory 1371 * to be allocated, and there is no opportunity to roll back 1372 * the transaction, it MUST NOT fail. 1373 */ 1374 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 1375 IN_MULTI_LIST_LOCK(); 1376 error = inm_merge(inm, imf); 1377 KASSERT(error == 0, ("%s: failed to merge inm state", __func__)); 1378 1379 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 1380 CURVNET_SET(inm->inm_ifp->if_vnet); 1381 error = igmp_change_state(inm); 1382 IF_ADDR_WLOCK(inm->inm_ifp); 1383 inm_release_deferred(inm); 1384 IF_ADDR_WUNLOCK(inm->inm_ifp); 1385 IN_MULTI_LIST_UNLOCK(); 1386 CURVNET_RESTORE(); 1387 if (error) 1388 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__); 1389 1390 CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm); 1391 1392 return (error); 1393 } 1394 1395 /*#ifndef BURN_BRIDGES*/ 1396 /* 1397 * Join an IPv4 multicast group in (*,G) exclusive mode. 1398 * The group must be a 224.0.0.0/24 link-scope group. 1399 * This KPI is for legacy kernel consumers only. 1400 */ 1401 struct in_multi * 1402 in_addmulti(struct in_addr *ap, struct ifnet *ifp) 1403 { 1404 struct in_multi *pinm; 1405 int error; 1406 #ifdef INVARIANTS 1407 char addrbuf[INET_ADDRSTRLEN]; 1408 #endif 1409 1410 KASSERT(IN_LOCAL_GROUP(ntohl(ap->s_addr)), 1411 ("%s: %s not in 224.0.0.0/24", __func__, 1412 inet_ntoa_r(*ap, addrbuf))); 1413 1414 error = in_joingroup(ifp, ap, NULL, &pinm); 1415 if (error != 0) 1416 pinm = NULL; 1417 1418 return (pinm); 1419 } 1420 1421 /* 1422 * Block or unblock an ASM multicast source on an inpcb. 1423 * This implements the delta-based API described in RFC 3678. 1424 * 1425 * The delta-based API applies only to exclusive-mode memberships. 1426 * An IGMP downcall will be performed. 1427 * 1428 * SMPng: NOTE: Must take Giant as a join may create a new ifma. 1429 * 1430 * Return 0 if successful, otherwise return an appropriate error code. 1431 */ 1432 static int 1433 inp_block_unblock_source(struct inpcb *inp, struct sockopt *sopt) 1434 { 1435 struct group_source_req gsr; 1436 sockunion_t *gsa, *ssa; 1437 struct ifnet *ifp; 1438 struct in_mfilter *imf; 1439 struct ip_moptions *imo; 1440 struct in_msource *ims; 1441 struct in_multi *inm; 1442 size_t idx; 1443 uint16_t fmode; 1444 int error, doblock; 1445 1446 ifp = NULL; 1447 error = 0; 1448 doblock = 0; 1449 1450 memset(&gsr, 0, sizeof(struct group_source_req)); 1451 gsa = (sockunion_t *)&gsr.gsr_group; 1452 ssa = (sockunion_t *)&gsr.gsr_source; 1453 1454 switch (sopt->sopt_name) { 1455 case IP_BLOCK_SOURCE: 1456 case IP_UNBLOCK_SOURCE: { 1457 struct ip_mreq_source mreqs; 1458 1459 error = sooptcopyin(sopt, &mreqs, 1460 sizeof(struct ip_mreq_source), 1461 sizeof(struct ip_mreq_source)); 1462 if (error) 1463 return (error); 1464 1465 gsa->sin.sin_family = AF_INET; 1466 gsa->sin.sin_len = sizeof(struct sockaddr_in); 1467 gsa->sin.sin_addr = mreqs.imr_multiaddr; 1468 1469 ssa->sin.sin_family = AF_INET; 1470 ssa->sin.sin_len = sizeof(struct sockaddr_in); 1471 ssa->sin.sin_addr = mreqs.imr_sourceaddr; 1472 1473 if (!in_nullhost(mreqs.imr_interface)) 1474 INADDR_TO_IFP(mreqs.imr_interface, ifp); 1475 1476 if (sopt->sopt_name == IP_BLOCK_SOURCE) 1477 doblock = 1; 1478 1479 CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p", 1480 __func__, ntohl(mreqs.imr_interface.s_addr), ifp); 1481 break; 1482 } 1483 1484 case MCAST_BLOCK_SOURCE: 1485 case MCAST_UNBLOCK_SOURCE: 1486 error = sooptcopyin(sopt, &gsr, 1487 sizeof(struct group_source_req), 1488 sizeof(struct group_source_req)); 1489 if (error) 1490 return (error); 1491 1492 if (gsa->sin.sin_family != AF_INET || 1493 gsa->sin.sin_len != sizeof(struct sockaddr_in)) 1494 return (EINVAL); 1495 1496 if (ssa->sin.sin_family != AF_INET || 1497 ssa->sin.sin_len != sizeof(struct sockaddr_in)) 1498 return (EINVAL); 1499 1500 if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface) 1501 return (EADDRNOTAVAIL); 1502 1503 ifp = ifnet_byindex(gsr.gsr_interface); 1504 1505 if (sopt->sopt_name == MCAST_BLOCK_SOURCE) 1506 doblock = 1; 1507 break; 1508 1509 default: 1510 CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d", 1511 __func__, sopt->sopt_name); 1512 return (EOPNOTSUPP); 1513 break; 1514 } 1515 1516 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 1517 return (EINVAL); 1518 1519 /* 1520 * Check if we are actually a member of this group. 1521 */ 1522 imo = inp_findmoptions(inp); 1523 idx = imo_match_group(imo, ifp, &gsa->sa); 1524 if (idx == -1 || imo->imo_mfilters == NULL) { 1525 error = EADDRNOTAVAIL; 1526 goto out_inp_locked; 1527 } 1528 1529 KASSERT(imo->imo_mfilters != NULL, 1530 ("%s: imo_mfilters not allocated", __func__)); 1531 imf = &imo->imo_mfilters[idx]; 1532 inm = imo->imo_membership[idx]; 1533 1534 /* 1535 * Attempting to use the delta-based API on an 1536 * non exclusive-mode membership is an error. 1537 */ 1538 fmode = imf->imf_st[0]; 1539 if (fmode != MCAST_EXCLUDE) { 1540 error = EINVAL; 1541 goto out_inp_locked; 1542 } 1543 1544 /* 1545 * Deal with error cases up-front: 1546 * Asked to block, but already blocked; or 1547 * Asked to unblock, but nothing to unblock. 1548 * If adding a new block entry, allocate it. 1549 */ 1550 ims = imo_match_source(imo, idx, &ssa->sa); 1551 if ((ims != NULL && doblock) || (ims == NULL && !doblock)) { 1552 CTR3(KTR_IGMPV3, "%s: source 0x%08x %spresent", __func__, 1553 ntohl(ssa->sin.sin_addr.s_addr), doblock ? "" : "not "); 1554 error = EADDRNOTAVAIL; 1555 goto out_inp_locked; 1556 } 1557 1558 INP_WLOCK_ASSERT(inp); 1559 1560 /* 1561 * Begin state merge transaction at socket layer. 1562 */ 1563 if (doblock) { 1564 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block"); 1565 ims = imf_graft(imf, fmode, &ssa->sin); 1566 if (ims == NULL) 1567 error = ENOMEM; 1568 } else { 1569 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow"); 1570 error = imf_prune(imf, &ssa->sin); 1571 } 1572 1573 if (error) { 1574 CTR1(KTR_IGMPV3, "%s: merge imf state failed", __func__); 1575 goto out_imf_rollback; 1576 } 1577 1578 /* 1579 * Begin state merge transaction at IGMP layer. 1580 */ 1581 IN_MULTI_LOCK(); 1582 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 1583 IN_MULTI_LIST_LOCK(); 1584 error = inm_merge(inm, imf); 1585 if (error) { 1586 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__); 1587 IN_MULTI_LIST_UNLOCK(); 1588 goto out_in_multi_locked; 1589 } 1590 1591 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 1592 error = igmp_change_state(inm); 1593 IN_MULTI_LIST_UNLOCK(); 1594 if (error) 1595 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__); 1596 1597 out_in_multi_locked: 1598 1599 IN_MULTI_UNLOCK(); 1600 out_imf_rollback: 1601 if (error) 1602 imf_rollback(imf); 1603 else 1604 imf_commit(imf); 1605 1606 imf_reap(imf); 1607 1608 out_inp_locked: 1609 INP_WUNLOCK(inp); 1610 return (error); 1611 } 1612 1613 /* 1614 * Given an inpcb, return its multicast options structure pointer. Accepts 1615 * an unlocked inpcb pointer, but will return it locked. May sleep. 1616 * 1617 * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held. 1618 * SMPng: NOTE: Returns with the INP write lock held. 1619 */ 1620 static struct ip_moptions * 1621 inp_findmoptions(struct inpcb *inp) 1622 { 1623 struct ip_moptions *imo; 1624 struct in_multi **immp; 1625 struct in_mfilter *imfp; 1626 size_t idx; 1627 1628 INP_WLOCK(inp); 1629 if (inp->inp_moptions != NULL) 1630 return (inp->inp_moptions); 1631 1632 INP_WUNLOCK(inp); 1633 1634 imo = malloc(sizeof(*imo), M_IPMOPTS, M_WAITOK); 1635 immp = malloc(sizeof(*immp) * IP_MIN_MEMBERSHIPS, M_IPMOPTS, 1636 M_WAITOK | M_ZERO); 1637 imfp = malloc(sizeof(struct in_mfilter) * IP_MIN_MEMBERSHIPS, 1638 M_INMFILTER, M_WAITOK); 1639 1640 imo->imo_multicast_ifp = NULL; 1641 imo->imo_multicast_addr.s_addr = INADDR_ANY; 1642 imo->imo_multicast_vif = -1; 1643 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1644 imo->imo_multicast_loop = in_mcast_loop; 1645 imo->imo_num_memberships = 0; 1646 imo->imo_max_memberships = IP_MIN_MEMBERSHIPS; 1647 imo->imo_membership = immp; 1648 1649 /* Initialize per-group source filters. */ 1650 for (idx = 0; idx < IP_MIN_MEMBERSHIPS; idx++) 1651 imf_init(&imfp[idx], MCAST_UNDEFINED, MCAST_EXCLUDE); 1652 imo->imo_mfilters = imfp; 1653 1654 INP_WLOCK(inp); 1655 if (inp->inp_moptions != NULL) { 1656 free(imfp, M_INMFILTER); 1657 free(immp, M_IPMOPTS); 1658 free(imo, M_IPMOPTS); 1659 return (inp->inp_moptions); 1660 } 1661 inp->inp_moptions = imo; 1662 return (imo); 1663 } 1664 1665 static void 1666 inp_gcmoptions(epoch_context_t ctx) 1667 { 1668 struct ip_moptions *imo; 1669 struct in_mfilter *imf; 1670 struct in_multi *inm; 1671 struct ifnet *ifp; 1672 size_t idx, nmships; 1673 1674 imo = __containerof(ctx, struct ip_moptions, imo_epoch_ctx); 1675 1676 nmships = imo->imo_num_memberships; 1677 for (idx = 0; idx < nmships; ++idx) { 1678 imf = imo->imo_mfilters ? &imo->imo_mfilters[idx] : NULL; 1679 if (imf) 1680 imf_leave(imf); 1681 inm = imo->imo_membership[idx]; 1682 ifp = inm->inm_ifp; 1683 if (ifp != NULL) { 1684 CURVNET_SET(ifp->if_vnet); 1685 (void)in_leavegroup(inm, imf); 1686 CURVNET_RESTORE(); 1687 } else { 1688 (void)in_leavegroup(inm, imf); 1689 } 1690 if (imf) 1691 imf_purge(imf); 1692 } 1693 1694 if (imo->imo_mfilters) 1695 free(imo->imo_mfilters, M_INMFILTER); 1696 free(imo->imo_membership, M_IPMOPTS); 1697 free(imo, M_IPMOPTS); 1698 } 1699 1700 /* 1701 * Discard the IP multicast options (and source filters). To minimize 1702 * the amount of work done while holding locks such as the INP's 1703 * pcbinfo lock (which is used in the receive path), the free 1704 * operation is deferred to the epoch callback task. 1705 */ 1706 void 1707 inp_freemoptions(struct ip_moptions *imo) 1708 { 1709 if (imo == NULL) 1710 return; 1711 epoch_call(net_epoch_preempt, &imo->imo_epoch_ctx, inp_gcmoptions); 1712 } 1713 1714 /* 1715 * Atomically get source filters on a socket for an IPv4 multicast group. 1716 * Called with INP lock held; returns with lock released. 1717 */ 1718 static int 1719 inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt) 1720 { 1721 struct __msfilterreq msfr; 1722 sockunion_t *gsa; 1723 struct ifnet *ifp; 1724 struct ip_moptions *imo; 1725 struct in_mfilter *imf; 1726 struct ip_msource *ims; 1727 struct in_msource *lims; 1728 struct sockaddr_in *psin; 1729 struct sockaddr_storage *ptss; 1730 struct sockaddr_storage *tss; 1731 int error; 1732 size_t idx, nsrcs, ncsrcs; 1733 1734 INP_WLOCK_ASSERT(inp); 1735 1736 imo = inp->inp_moptions; 1737 KASSERT(imo != NULL, ("%s: null ip_moptions", __func__)); 1738 1739 INP_WUNLOCK(inp); 1740 1741 error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq), 1742 sizeof(struct __msfilterreq)); 1743 if (error) 1744 return (error); 1745 1746 if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex) 1747 return (EINVAL); 1748 1749 ifp = ifnet_byindex(msfr.msfr_ifindex); 1750 if (ifp == NULL) 1751 return (EINVAL); 1752 1753 INP_WLOCK(inp); 1754 1755 /* 1756 * Lookup group on the socket. 1757 */ 1758 gsa = (sockunion_t *)&msfr.msfr_group; 1759 idx = imo_match_group(imo, ifp, &gsa->sa); 1760 if (idx == -1 || imo->imo_mfilters == NULL) { 1761 INP_WUNLOCK(inp); 1762 return (EADDRNOTAVAIL); 1763 } 1764 imf = &imo->imo_mfilters[idx]; 1765 1766 /* 1767 * Ignore memberships which are in limbo. 1768 */ 1769 if (imf->imf_st[1] == MCAST_UNDEFINED) { 1770 INP_WUNLOCK(inp); 1771 return (EAGAIN); 1772 } 1773 msfr.msfr_fmode = imf->imf_st[1]; 1774 1775 /* 1776 * If the user specified a buffer, copy out the source filter 1777 * entries to userland gracefully. 1778 * We only copy out the number of entries which userland 1779 * has asked for, but we always tell userland how big the 1780 * buffer really needs to be. 1781 */ 1782 if (msfr.msfr_nsrcs > in_mcast_maxsocksrc) 1783 msfr.msfr_nsrcs = in_mcast_maxsocksrc; 1784 tss = NULL; 1785 if (msfr.msfr_srcs != NULL && msfr.msfr_nsrcs > 0) { 1786 tss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs, 1787 M_TEMP, M_NOWAIT | M_ZERO); 1788 if (tss == NULL) { 1789 INP_WUNLOCK(inp); 1790 return (ENOBUFS); 1791 } 1792 } 1793 1794 /* 1795 * Count number of sources in-mode at t0. 1796 * If buffer space exists and remains, copy out source entries. 1797 */ 1798 nsrcs = msfr.msfr_nsrcs; 1799 ncsrcs = 0; 1800 ptss = tss; 1801 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 1802 lims = (struct in_msource *)ims; 1803 if (lims->imsl_st[0] == MCAST_UNDEFINED || 1804 lims->imsl_st[0] != imf->imf_st[0]) 1805 continue; 1806 ++ncsrcs; 1807 if (tss != NULL && nsrcs > 0) { 1808 psin = (struct sockaddr_in *)ptss; 1809 psin->sin_family = AF_INET; 1810 psin->sin_len = sizeof(struct sockaddr_in); 1811 psin->sin_addr.s_addr = htonl(lims->ims_haddr); 1812 psin->sin_port = 0; 1813 ++ptss; 1814 --nsrcs; 1815 } 1816 } 1817 1818 INP_WUNLOCK(inp); 1819 1820 if (tss != NULL) { 1821 error = copyout(tss, msfr.msfr_srcs, 1822 sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs); 1823 free(tss, M_TEMP); 1824 if (error) 1825 return (error); 1826 } 1827 1828 msfr.msfr_nsrcs = ncsrcs; 1829 error = sooptcopyout(sopt, &msfr, sizeof(struct __msfilterreq)); 1830 1831 return (error); 1832 } 1833 1834 /* 1835 * Return the IP multicast options in response to user getsockopt(). 1836 */ 1837 int 1838 inp_getmoptions(struct inpcb *inp, struct sockopt *sopt) 1839 { 1840 struct rm_priotracker in_ifa_tracker; 1841 struct ip_mreqn mreqn; 1842 struct ip_moptions *imo; 1843 struct ifnet *ifp; 1844 struct in_ifaddr *ia; 1845 int error, optval; 1846 u_char coptval; 1847 1848 INP_WLOCK(inp); 1849 imo = inp->inp_moptions; 1850 /* 1851 * If socket is neither of type SOCK_RAW or SOCK_DGRAM, 1852 * or is a divert socket, reject it. 1853 */ 1854 if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT || 1855 (inp->inp_socket->so_proto->pr_type != SOCK_RAW && 1856 inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) { 1857 INP_WUNLOCK(inp); 1858 return (EOPNOTSUPP); 1859 } 1860 1861 error = 0; 1862 switch (sopt->sopt_name) { 1863 case IP_MULTICAST_VIF: 1864 if (imo != NULL) 1865 optval = imo->imo_multicast_vif; 1866 else 1867 optval = -1; 1868 INP_WUNLOCK(inp); 1869 error = sooptcopyout(sopt, &optval, sizeof(int)); 1870 break; 1871 1872 case IP_MULTICAST_IF: 1873 memset(&mreqn, 0, sizeof(struct ip_mreqn)); 1874 if (imo != NULL) { 1875 ifp = imo->imo_multicast_ifp; 1876 if (!in_nullhost(imo->imo_multicast_addr)) { 1877 mreqn.imr_address = imo->imo_multicast_addr; 1878 } else if (ifp != NULL) { 1879 mreqn.imr_ifindex = ifp->if_index; 1880 NET_EPOCH_ENTER(); 1881 IFP_TO_IA(ifp, ia, &in_ifa_tracker); 1882 if (ia != NULL) 1883 mreqn.imr_address = 1884 IA_SIN(ia)->sin_addr; 1885 NET_EPOCH_EXIT(); 1886 } 1887 } 1888 INP_WUNLOCK(inp); 1889 if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) { 1890 error = sooptcopyout(sopt, &mreqn, 1891 sizeof(struct ip_mreqn)); 1892 } else { 1893 error = sooptcopyout(sopt, &mreqn.imr_address, 1894 sizeof(struct in_addr)); 1895 } 1896 break; 1897 1898 case IP_MULTICAST_TTL: 1899 if (imo == NULL) 1900 optval = coptval = IP_DEFAULT_MULTICAST_TTL; 1901 else 1902 optval = coptval = imo->imo_multicast_ttl; 1903 INP_WUNLOCK(inp); 1904 if (sopt->sopt_valsize == sizeof(u_char)) 1905 error = sooptcopyout(sopt, &coptval, sizeof(u_char)); 1906 else 1907 error = sooptcopyout(sopt, &optval, sizeof(int)); 1908 break; 1909 1910 case IP_MULTICAST_LOOP: 1911 if (imo == NULL) 1912 optval = coptval = IP_DEFAULT_MULTICAST_LOOP; 1913 else 1914 optval = coptval = imo->imo_multicast_loop; 1915 INP_WUNLOCK(inp); 1916 if (sopt->sopt_valsize == sizeof(u_char)) 1917 error = sooptcopyout(sopt, &coptval, sizeof(u_char)); 1918 else 1919 error = sooptcopyout(sopt, &optval, sizeof(int)); 1920 break; 1921 1922 case IP_MSFILTER: 1923 if (imo == NULL) { 1924 error = EADDRNOTAVAIL; 1925 INP_WUNLOCK(inp); 1926 } else { 1927 error = inp_get_source_filters(inp, sopt); 1928 } 1929 break; 1930 1931 default: 1932 INP_WUNLOCK(inp); 1933 error = ENOPROTOOPT; 1934 break; 1935 } 1936 1937 INP_UNLOCK_ASSERT(inp); 1938 1939 return (error); 1940 } 1941 1942 /* 1943 * Look up the ifnet to use for a multicast group membership, 1944 * given the IPv4 address of an interface, and the IPv4 group address. 1945 * 1946 * This routine exists to support legacy multicast applications 1947 * which do not understand that multicast memberships are scoped to 1948 * specific physical links in the networking stack, or which need 1949 * to join link-scope groups before IPv4 addresses are configured. 1950 * 1951 * If inp is non-NULL, use this socket's current FIB number for any 1952 * required FIB lookup. 1953 * If ina is INADDR_ANY, look up the group address in the unicast FIB, 1954 * and use its ifp; usually, this points to the default next-hop. 1955 * 1956 * If the FIB lookup fails, attempt to use the first non-loopback 1957 * interface with multicast capability in the system as a 1958 * last resort. The legacy IPv4 ASM API requires that we do 1959 * this in order to allow groups to be joined when the routing 1960 * table has not yet been populated during boot. 1961 * 1962 * Returns NULL if no ifp could be found. 1963 * 1964 * SMPng: TODO: Acquire the appropriate locks for INADDR_TO_IFP. 1965 * FUTURE: Implement IPv4 source-address selection. 1966 */ 1967 static struct ifnet * 1968 inp_lookup_mcast_ifp(const struct inpcb *inp, 1969 const struct sockaddr_in *gsin, const struct in_addr ina) 1970 { 1971 struct rm_priotracker in_ifa_tracker; 1972 struct ifnet *ifp; 1973 struct nhop4_basic nh4; 1974 uint32_t fibnum; 1975 1976 KASSERT(gsin->sin_family == AF_INET, ("%s: not AF_INET", __func__)); 1977 KASSERT(IN_MULTICAST(ntohl(gsin->sin_addr.s_addr)), 1978 ("%s: not multicast", __func__)); 1979 1980 ifp = NULL; 1981 if (!in_nullhost(ina)) { 1982 INADDR_TO_IFP(ina, ifp); 1983 } else { 1984 fibnum = inp ? inp->inp_inc.inc_fibnum : 0; 1985 if (fib4_lookup_nh_basic(fibnum, gsin->sin_addr, 0, 0, &nh4)==0) 1986 ifp = nh4.nh_ifp; 1987 else { 1988 struct in_ifaddr *ia; 1989 struct ifnet *mifp; 1990 1991 mifp = NULL; 1992 IN_IFADDR_RLOCK(&in_ifa_tracker); 1993 CK_STAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { 1994 mifp = ia->ia_ifp; 1995 if (!(mifp->if_flags & IFF_LOOPBACK) && 1996 (mifp->if_flags & IFF_MULTICAST)) { 1997 ifp = mifp; 1998 break; 1999 } 2000 } 2001 IN_IFADDR_RUNLOCK(&in_ifa_tracker); 2002 } 2003 } 2004 2005 return (ifp); 2006 } 2007 2008 /* 2009 * Join an IPv4 multicast group, possibly with a source. 2010 */ 2011 static int 2012 inp_join_group(struct inpcb *inp, struct sockopt *sopt) 2013 { 2014 struct group_source_req gsr; 2015 sockunion_t *gsa, *ssa; 2016 struct ifnet *ifp; 2017 struct in_mfilter *imf; 2018 struct ip_moptions *imo; 2019 struct in_multi *inm; 2020 struct in_msource *lims; 2021 size_t idx; 2022 int error, is_new; 2023 2024 ifp = NULL; 2025 imf = NULL; 2026 lims = NULL; 2027 error = 0; 2028 is_new = 0; 2029 2030 memset(&gsr, 0, sizeof(struct group_source_req)); 2031 gsa = (sockunion_t *)&gsr.gsr_group; 2032 gsa->ss.ss_family = AF_UNSPEC; 2033 ssa = (sockunion_t *)&gsr.gsr_source; 2034 ssa->ss.ss_family = AF_UNSPEC; 2035 2036 switch (sopt->sopt_name) { 2037 case IP_ADD_MEMBERSHIP: 2038 case IP_ADD_SOURCE_MEMBERSHIP: { 2039 struct ip_mreq_source mreqs; 2040 2041 if (sopt->sopt_name == IP_ADD_MEMBERSHIP) { 2042 error = sooptcopyin(sopt, &mreqs, 2043 sizeof(struct ip_mreq), 2044 sizeof(struct ip_mreq)); 2045 /* 2046 * Do argument switcharoo from ip_mreq into 2047 * ip_mreq_source to avoid using two instances. 2048 */ 2049 mreqs.imr_interface = mreqs.imr_sourceaddr; 2050 mreqs.imr_sourceaddr.s_addr = INADDR_ANY; 2051 } else if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) { 2052 error = sooptcopyin(sopt, &mreqs, 2053 sizeof(struct ip_mreq_source), 2054 sizeof(struct ip_mreq_source)); 2055 } 2056 if (error) 2057 return (error); 2058 2059 gsa->sin.sin_family = AF_INET; 2060 gsa->sin.sin_len = sizeof(struct sockaddr_in); 2061 gsa->sin.sin_addr = mreqs.imr_multiaddr; 2062 2063 if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) { 2064 ssa->sin.sin_family = AF_INET; 2065 ssa->sin.sin_len = sizeof(struct sockaddr_in); 2066 ssa->sin.sin_addr = mreqs.imr_sourceaddr; 2067 } 2068 2069 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2070 return (EINVAL); 2071 2072 ifp = inp_lookup_mcast_ifp(inp, &gsa->sin, 2073 mreqs.imr_interface); 2074 CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p", 2075 __func__, ntohl(mreqs.imr_interface.s_addr), ifp); 2076 break; 2077 } 2078 2079 case MCAST_JOIN_GROUP: 2080 case MCAST_JOIN_SOURCE_GROUP: 2081 if (sopt->sopt_name == MCAST_JOIN_GROUP) { 2082 error = sooptcopyin(sopt, &gsr, 2083 sizeof(struct group_req), 2084 sizeof(struct group_req)); 2085 } else if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) { 2086 error = sooptcopyin(sopt, &gsr, 2087 sizeof(struct group_source_req), 2088 sizeof(struct group_source_req)); 2089 } 2090 if (error) 2091 return (error); 2092 2093 if (gsa->sin.sin_family != AF_INET || 2094 gsa->sin.sin_len != sizeof(struct sockaddr_in)) 2095 return (EINVAL); 2096 2097 /* 2098 * Overwrite the port field if present, as the sockaddr 2099 * being copied in may be matched with a binary comparison. 2100 */ 2101 gsa->sin.sin_port = 0; 2102 if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) { 2103 if (ssa->sin.sin_family != AF_INET || 2104 ssa->sin.sin_len != sizeof(struct sockaddr_in)) 2105 return (EINVAL); 2106 ssa->sin.sin_port = 0; 2107 } 2108 2109 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2110 return (EINVAL); 2111 2112 if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface) 2113 return (EADDRNOTAVAIL); 2114 ifp = ifnet_byindex(gsr.gsr_interface); 2115 break; 2116 2117 default: 2118 CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d", 2119 __func__, sopt->sopt_name); 2120 return (EOPNOTSUPP); 2121 break; 2122 } 2123 2124 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) 2125 return (EADDRNOTAVAIL); 2126 2127 imo = inp_findmoptions(inp); 2128 idx = imo_match_group(imo, ifp, &gsa->sa); 2129 if (idx == -1) { 2130 is_new = 1; 2131 } else { 2132 inm = imo->imo_membership[idx]; 2133 imf = &imo->imo_mfilters[idx]; 2134 if (ssa->ss.ss_family != AF_UNSPEC) { 2135 /* 2136 * MCAST_JOIN_SOURCE_GROUP on an exclusive membership 2137 * is an error. On an existing inclusive membership, 2138 * it just adds the source to the filter list. 2139 */ 2140 if (imf->imf_st[1] != MCAST_INCLUDE) { 2141 error = EINVAL; 2142 goto out_inp_locked; 2143 } 2144 /* 2145 * Throw out duplicates. 2146 * 2147 * XXX FIXME: This makes a naive assumption that 2148 * even if entries exist for *ssa in this imf, 2149 * they will be rejected as dupes, even if they 2150 * are not valid in the current mode (in-mode). 2151 * 2152 * in_msource is transactioned just as for anything 2153 * else in SSM -- but note naive use of inm_graft() 2154 * below for allocating new filter entries. 2155 * 2156 * This is only an issue if someone mixes the 2157 * full-state SSM API with the delta-based API, 2158 * which is discouraged in the relevant RFCs. 2159 */ 2160 lims = imo_match_source(imo, idx, &ssa->sa); 2161 if (lims != NULL /*&& 2162 lims->imsl_st[1] == MCAST_INCLUDE*/) { 2163 error = EADDRNOTAVAIL; 2164 goto out_inp_locked; 2165 } 2166 } else { 2167 /* 2168 * MCAST_JOIN_GROUP on an existing exclusive 2169 * membership is an error; return EADDRINUSE 2170 * to preserve 4.4BSD API idempotence, and 2171 * avoid tedious detour to code below. 2172 * NOTE: This is bending RFC 3678 a bit. 2173 * 2174 * On an existing inclusive membership, this is also 2175 * an error; if you want to change filter mode, 2176 * you must use the userland API setsourcefilter(). 2177 * XXX We don't reject this for imf in UNDEFINED 2178 * state at t1, because allocation of a filter 2179 * is atomic with allocation of a membership. 2180 */ 2181 error = EINVAL; 2182 if (imf->imf_st[1] == MCAST_EXCLUDE) 2183 error = EADDRINUSE; 2184 goto out_inp_locked; 2185 } 2186 } 2187 2188 /* 2189 * Begin state merge transaction at socket layer. 2190 */ 2191 INP_WLOCK_ASSERT(inp); 2192 2193 if (is_new) { 2194 if (imo->imo_num_memberships == imo->imo_max_memberships) { 2195 error = imo_grow(imo); 2196 if (error) 2197 goto out_inp_locked; 2198 } 2199 /* 2200 * Allocate the new slot upfront so we can deal with 2201 * grafting the new source filter in same code path 2202 * as for join-source on existing membership. 2203 */ 2204 idx = imo->imo_num_memberships; 2205 imo->imo_membership[idx] = NULL; 2206 imo->imo_num_memberships++; 2207 KASSERT(imo->imo_mfilters != NULL, 2208 ("%s: imf_mfilters vector was not allocated", __func__)); 2209 imf = &imo->imo_mfilters[idx]; 2210 KASSERT(RB_EMPTY(&imf->imf_sources), 2211 ("%s: imf_sources not empty", __func__)); 2212 } 2213 2214 /* 2215 * Graft new source into filter list for this inpcb's 2216 * membership of the group. The in_multi may not have 2217 * been allocated yet if this is a new membership, however, 2218 * the in_mfilter slot will be allocated and must be initialized. 2219 * 2220 * Note: Grafting of exclusive mode filters doesn't happen 2221 * in this path. 2222 * XXX: Should check for non-NULL lims (node exists but may 2223 * not be in-mode) for interop with full-state API. 2224 */ 2225 if (ssa->ss.ss_family != AF_UNSPEC) { 2226 /* Membership starts in IN mode */ 2227 if (is_new) { 2228 CTR1(KTR_IGMPV3, "%s: new join w/source", __func__); 2229 imf_init(imf, MCAST_UNDEFINED, MCAST_INCLUDE); 2230 } else { 2231 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow"); 2232 } 2233 lims = imf_graft(imf, MCAST_INCLUDE, &ssa->sin); 2234 if (lims == NULL) { 2235 CTR1(KTR_IGMPV3, "%s: merge imf state failed", 2236 __func__); 2237 error = ENOMEM; 2238 goto out_imo_free; 2239 } 2240 } else { 2241 /* No address specified; Membership starts in EX mode */ 2242 if (is_new) { 2243 CTR1(KTR_IGMPV3, "%s: new join w/o source", __func__); 2244 imf_init(imf, MCAST_UNDEFINED, MCAST_EXCLUDE); 2245 } 2246 } 2247 2248 /* 2249 * Begin state merge transaction at IGMP layer. 2250 */ 2251 in_pcbref(inp); 2252 INP_WUNLOCK(inp); 2253 IN_MULTI_LOCK(); 2254 2255 if (is_new) { 2256 error = in_joingroup_locked(ifp, &gsa->sin.sin_addr, imf, 2257 &inm); 2258 if (error) { 2259 CTR1(KTR_IGMPV3, "%s: in_joingroup_locked failed", 2260 __func__); 2261 IN_MULTI_LIST_UNLOCK(); 2262 goto out_imo_free; 2263 } 2264 imo->imo_membership[idx] = inm; 2265 } else { 2266 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 2267 IN_MULTI_LIST_LOCK(); 2268 error = inm_merge(inm, imf); 2269 if (error) { 2270 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", 2271 __func__); 2272 IN_MULTI_LIST_UNLOCK(); 2273 goto out_in_multi_locked; 2274 } 2275 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 2276 error = igmp_change_state(inm); 2277 IN_MULTI_LIST_UNLOCK(); 2278 if (error) { 2279 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", 2280 __func__); 2281 goto out_in_multi_locked; 2282 } 2283 } 2284 2285 out_in_multi_locked: 2286 2287 IN_MULTI_UNLOCK(); 2288 INP_WLOCK(inp); 2289 if (in_pcbrele_wlocked(inp)) 2290 return (ENXIO); 2291 if (error) { 2292 imf_rollback(imf); 2293 if (is_new) 2294 imf_purge(imf); 2295 else 2296 imf_reap(imf); 2297 } else { 2298 imf_commit(imf); 2299 } 2300 2301 out_imo_free: 2302 if (error && is_new) { 2303 imo->imo_membership[idx] = NULL; 2304 --imo->imo_num_memberships; 2305 } 2306 2307 out_inp_locked: 2308 INP_WUNLOCK(inp); 2309 return (error); 2310 } 2311 2312 /* 2313 * Leave an IPv4 multicast group on an inpcb, possibly with a source. 2314 */ 2315 static int 2316 inp_leave_group(struct inpcb *inp, struct sockopt *sopt) 2317 { 2318 struct group_source_req gsr; 2319 struct ip_mreq_source mreqs; 2320 sockunion_t *gsa, *ssa; 2321 struct ifnet *ifp; 2322 struct in_mfilter *imf; 2323 struct ip_moptions *imo; 2324 struct in_msource *ims; 2325 struct in_multi *inm; 2326 size_t idx; 2327 int error, is_final; 2328 2329 ifp = NULL; 2330 error = 0; 2331 is_final = 1; 2332 2333 memset(&gsr, 0, sizeof(struct group_source_req)); 2334 gsa = (sockunion_t *)&gsr.gsr_group; 2335 gsa->ss.ss_family = AF_UNSPEC; 2336 ssa = (sockunion_t *)&gsr.gsr_source; 2337 ssa->ss.ss_family = AF_UNSPEC; 2338 2339 switch (sopt->sopt_name) { 2340 case IP_DROP_MEMBERSHIP: 2341 case IP_DROP_SOURCE_MEMBERSHIP: 2342 if (sopt->sopt_name == IP_DROP_MEMBERSHIP) { 2343 error = sooptcopyin(sopt, &mreqs, 2344 sizeof(struct ip_mreq), 2345 sizeof(struct ip_mreq)); 2346 /* 2347 * Swap interface and sourceaddr arguments, 2348 * as ip_mreq and ip_mreq_source are laid 2349 * out differently. 2350 */ 2351 mreqs.imr_interface = mreqs.imr_sourceaddr; 2352 mreqs.imr_sourceaddr.s_addr = INADDR_ANY; 2353 } else if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) { 2354 error = sooptcopyin(sopt, &mreqs, 2355 sizeof(struct ip_mreq_source), 2356 sizeof(struct ip_mreq_source)); 2357 } 2358 if (error) 2359 return (error); 2360 2361 gsa->sin.sin_family = AF_INET; 2362 gsa->sin.sin_len = sizeof(struct sockaddr_in); 2363 gsa->sin.sin_addr = mreqs.imr_multiaddr; 2364 2365 if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) { 2366 ssa->sin.sin_family = AF_INET; 2367 ssa->sin.sin_len = sizeof(struct sockaddr_in); 2368 ssa->sin.sin_addr = mreqs.imr_sourceaddr; 2369 } 2370 2371 /* 2372 * Attempt to look up hinted ifp from interface address. 2373 * Fallthrough with null ifp iff lookup fails, to 2374 * preserve 4.4BSD mcast API idempotence. 2375 * XXX NOTE WELL: The RFC 3678 API is preferred because 2376 * using an IPv4 address as a key is racy. 2377 */ 2378 if (!in_nullhost(mreqs.imr_interface)) 2379 INADDR_TO_IFP(mreqs.imr_interface, ifp); 2380 2381 CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p", 2382 __func__, ntohl(mreqs.imr_interface.s_addr), ifp); 2383 2384 break; 2385 2386 case MCAST_LEAVE_GROUP: 2387 case MCAST_LEAVE_SOURCE_GROUP: 2388 if (sopt->sopt_name == MCAST_LEAVE_GROUP) { 2389 error = sooptcopyin(sopt, &gsr, 2390 sizeof(struct group_req), 2391 sizeof(struct group_req)); 2392 } else if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) { 2393 error = sooptcopyin(sopt, &gsr, 2394 sizeof(struct group_source_req), 2395 sizeof(struct group_source_req)); 2396 } 2397 if (error) 2398 return (error); 2399 2400 if (gsa->sin.sin_family != AF_INET || 2401 gsa->sin.sin_len != sizeof(struct sockaddr_in)) 2402 return (EINVAL); 2403 2404 if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) { 2405 if (ssa->sin.sin_family != AF_INET || 2406 ssa->sin.sin_len != sizeof(struct sockaddr_in)) 2407 return (EINVAL); 2408 } 2409 2410 if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface) 2411 return (EADDRNOTAVAIL); 2412 2413 ifp = ifnet_byindex(gsr.gsr_interface); 2414 2415 if (ifp == NULL) 2416 return (EADDRNOTAVAIL); 2417 break; 2418 2419 default: 2420 CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d", 2421 __func__, sopt->sopt_name); 2422 return (EOPNOTSUPP); 2423 break; 2424 } 2425 2426 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2427 return (EINVAL); 2428 2429 /* 2430 * Find the membership in the membership array. 2431 */ 2432 imo = inp_findmoptions(inp); 2433 idx = imo_match_group(imo, ifp, &gsa->sa); 2434 if (idx == -1) { 2435 error = EADDRNOTAVAIL; 2436 goto out_inp_locked; 2437 } 2438 inm = imo->imo_membership[idx]; 2439 imf = &imo->imo_mfilters[idx]; 2440 2441 if (ssa->ss.ss_family != AF_UNSPEC) 2442 is_final = 0; 2443 2444 /* 2445 * Begin state merge transaction at socket layer. 2446 */ 2447 INP_WLOCK_ASSERT(inp); 2448 2449 /* 2450 * If we were instructed only to leave a given source, do so. 2451 * MCAST_LEAVE_SOURCE_GROUP is only valid for inclusive memberships. 2452 */ 2453 if (is_final) { 2454 imf_leave(imf); 2455 } else { 2456 if (imf->imf_st[0] == MCAST_EXCLUDE) { 2457 error = EADDRNOTAVAIL; 2458 goto out_inp_locked; 2459 } 2460 ims = imo_match_source(imo, idx, &ssa->sa); 2461 if (ims == NULL) { 2462 CTR3(KTR_IGMPV3, "%s: source 0x%08x %spresent", 2463 __func__, ntohl(ssa->sin.sin_addr.s_addr), "not "); 2464 error = EADDRNOTAVAIL; 2465 goto out_inp_locked; 2466 } 2467 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block"); 2468 error = imf_prune(imf, &ssa->sin); 2469 if (error) { 2470 CTR1(KTR_IGMPV3, "%s: merge imf state failed", 2471 __func__); 2472 goto out_inp_locked; 2473 } 2474 } 2475 2476 /* 2477 * Begin state merge transaction at IGMP layer. 2478 */ 2479 in_pcbref(inp); 2480 INP_WUNLOCK(inp); 2481 IN_MULTI_LOCK(); 2482 2483 if (is_final) { 2484 /* 2485 * Give up the multicast address record to which 2486 * the membership points. 2487 */ 2488 (void)in_leavegroup_locked(inm, imf); 2489 } else { 2490 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 2491 IN_MULTI_LIST_LOCK(); 2492 error = inm_merge(inm, imf); 2493 if (error) { 2494 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", 2495 __func__); 2496 IN_MULTI_LIST_UNLOCK(); 2497 goto out_in_multi_locked; 2498 } 2499 2500 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 2501 error = igmp_change_state(inm); 2502 IN_MULTI_LIST_UNLOCK(); 2503 if (error) { 2504 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", 2505 __func__); 2506 } 2507 } 2508 2509 out_in_multi_locked: 2510 2511 IN_MULTI_UNLOCK(); 2512 INP_WLOCK(inp); 2513 if (in_pcbrele_wlocked(inp)) 2514 return (ENXIO); 2515 2516 if (error) 2517 imf_rollback(imf); 2518 else 2519 imf_commit(imf); 2520 2521 imf_reap(imf); 2522 2523 if (is_final) { 2524 /* Remove the gap in the membership and filter array. */ 2525 for (++idx; idx < imo->imo_num_memberships; ++idx) { 2526 imo->imo_membership[idx-1] = imo->imo_membership[idx]; 2527 imo->imo_mfilters[idx-1] = imo->imo_mfilters[idx]; 2528 } 2529 imo->imo_num_memberships--; 2530 } 2531 2532 out_inp_locked: 2533 INP_WUNLOCK(inp); 2534 return (error); 2535 } 2536 2537 /* 2538 * Select the interface for transmitting IPv4 multicast datagrams. 2539 * 2540 * Either an instance of struct in_addr or an instance of struct ip_mreqn 2541 * may be passed to this socket option. An address of INADDR_ANY or an 2542 * interface index of 0 is used to remove a previous selection. 2543 * When no interface is selected, one is chosen for every send. 2544 */ 2545 static int 2546 inp_set_multicast_if(struct inpcb *inp, struct sockopt *sopt) 2547 { 2548 struct in_addr addr; 2549 struct ip_mreqn mreqn; 2550 struct ifnet *ifp; 2551 struct ip_moptions *imo; 2552 int error; 2553 2554 if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) { 2555 /* 2556 * An interface index was specified using the 2557 * Linux-derived ip_mreqn structure. 2558 */ 2559 error = sooptcopyin(sopt, &mreqn, sizeof(struct ip_mreqn), 2560 sizeof(struct ip_mreqn)); 2561 if (error) 2562 return (error); 2563 2564 if (mreqn.imr_ifindex < 0 || V_if_index < mreqn.imr_ifindex) 2565 return (EINVAL); 2566 2567 if (mreqn.imr_ifindex == 0) { 2568 ifp = NULL; 2569 } else { 2570 ifp = ifnet_byindex(mreqn.imr_ifindex); 2571 if (ifp == NULL) 2572 return (EADDRNOTAVAIL); 2573 } 2574 } else { 2575 /* 2576 * An interface was specified by IPv4 address. 2577 * This is the traditional BSD usage. 2578 */ 2579 error = sooptcopyin(sopt, &addr, sizeof(struct in_addr), 2580 sizeof(struct in_addr)); 2581 if (error) 2582 return (error); 2583 if (in_nullhost(addr)) { 2584 ifp = NULL; 2585 } else { 2586 INADDR_TO_IFP(addr, ifp); 2587 if (ifp == NULL) 2588 return (EADDRNOTAVAIL); 2589 } 2590 CTR3(KTR_IGMPV3, "%s: ifp = %p, addr = 0x%08x", __func__, ifp, 2591 ntohl(addr.s_addr)); 2592 } 2593 2594 /* Reject interfaces which do not support multicast. */ 2595 if (ifp != NULL && (ifp->if_flags & IFF_MULTICAST) == 0) 2596 return (EOPNOTSUPP); 2597 2598 imo = inp_findmoptions(inp); 2599 imo->imo_multicast_ifp = ifp; 2600 imo->imo_multicast_addr.s_addr = INADDR_ANY; 2601 INP_WUNLOCK(inp); 2602 2603 return (0); 2604 } 2605 2606 /* 2607 * Atomically set source filters on a socket for an IPv4 multicast group. 2608 * 2609 * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held. 2610 */ 2611 static int 2612 inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt) 2613 { 2614 struct __msfilterreq msfr; 2615 sockunion_t *gsa; 2616 struct ifnet *ifp; 2617 struct in_mfilter *imf; 2618 struct ip_moptions *imo; 2619 struct in_multi *inm; 2620 size_t idx; 2621 int error; 2622 2623 error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq), 2624 sizeof(struct __msfilterreq)); 2625 if (error) 2626 return (error); 2627 2628 if (msfr.msfr_nsrcs > in_mcast_maxsocksrc) 2629 return (ENOBUFS); 2630 2631 if ((msfr.msfr_fmode != MCAST_EXCLUDE && 2632 msfr.msfr_fmode != MCAST_INCLUDE)) 2633 return (EINVAL); 2634 2635 if (msfr.msfr_group.ss_family != AF_INET || 2636 msfr.msfr_group.ss_len != sizeof(struct sockaddr_in)) 2637 return (EINVAL); 2638 2639 gsa = (sockunion_t *)&msfr.msfr_group; 2640 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2641 return (EINVAL); 2642 2643 gsa->sin.sin_port = 0; /* ignore port */ 2644 2645 if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex) 2646 return (EADDRNOTAVAIL); 2647 2648 ifp = ifnet_byindex(msfr.msfr_ifindex); 2649 if (ifp == NULL) 2650 return (EADDRNOTAVAIL); 2651 2652 /* 2653 * Take the INP write lock. 2654 * Check if this socket is a member of this group. 2655 */ 2656 imo = inp_findmoptions(inp); 2657 idx = imo_match_group(imo, ifp, &gsa->sa); 2658 if (idx == -1 || imo->imo_mfilters == NULL) { 2659 error = EADDRNOTAVAIL; 2660 goto out_inp_locked; 2661 } 2662 inm = imo->imo_membership[idx]; 2663 imf = &imo->imo_mfilters[idx]; 2664 2665 /* 2666 * Begin state merge transaction at socket layer. 2667 */ 2668 INP_WLOCK_ASSERT(inp); 2669 2670 imf->imf_st[1] = msfr.msfr_fmode; 2671 2672 /* 2673 * Apply any new source filters, if present. 2674 * Make a copy of the user-space source vector so 2675 * that we may copy them with a single copyin. This 2676 * allows us to deal with page faults up-front. 2677 */ 2678 if (msfr.msfr_nsrcs > 0) { 2679 struct in_msource *lims; 2680 struct sockaddr_in *psin; 2681 struct sockaddr_storage *kss, *pkss; 2682 int i; 2683 2684 INP_WUNLOCK(inp); 2685 2686 CTR2(KTR_IGMPV3, "%s: loading %lu source list entries", 2687 __func__, (unsigned long)msfr.msfr_nsrcs); 2688 kss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs, 2689 M_TEMP, M_WAITOK); 2690 error = copyin(msfr.msfr_srcs, kss, 2691 sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs); 2692 if (error) { 2693 free(kss, M_TEMP); 2694 return (error); 2695 } 2696 2697 INP_WLOCK(inp); 2698 2699 /* 2700 * Mark all source filters as UNDEFINED at t1. 2701 * Restore new group filter mode, as imf_leave() 2702 * will set it to INCLUDE. 2703 */ 2704 imf_leave(imf); 2705 imf->imf_st[1] = msfr.msfr_fmode; 2706 2707 /* 2708 * Update socket layer filters at t1, lazy-allocating 2709 * new entries. This saves a bunch of memory at the 2710 * cost of one RB_FIND() per source entry; duplicate 2711 * entries in the msfr_nsrcs vector are ignored. 2712 * If we encounter an error, rollback transaction. 2713 * 2714 * XXX This too could be replaced with a set-symmetric 2715 * difference like loop to avoid walking from root 2716 * every time, as the key space is common. 2717 */ 2718 for (i = 0, pkss = kss; i < msfr.msfr_nsrcs; i++, pkss++) { 2719 psin = (struct sockaddr_in *)pkss; 2720 if (psin->sin_family != AF_INET) { 2721 error = EAFNOSUPPORT; 2722 break; 2723 } 2724 if (psin->sin_len != sizeof(struct sockaddr_in)) { 2725 error = EINVAL; 2726 break; 2727 } 2728 error = imf_get_source(imf, psin, &lims); 2729 if (error) 2730 break; 2731 lims->imsl_st[1] = imf->imf_st[1]; 2732 } 2733 free(kss, M_TEMP); 2734 } 2735 2736 if (error) 2737 goto out_imf_rollback; 2738 2739 INP_WLOCK_ASSERT(inp); 2740 IN_MULTI_LOCK(); 2741 2742 /* 2743 * Begin state merge transaction at IGMP layer. 2744 */ 2745 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 2746 IN_MULTI_LIST_LOCK(); 2747 error = inm_merge(inm, imf); 2748 if (error) { 2749 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__); 2750 IN_MULTI_LIST_UNLOCK(); 2751 goto out_in_multi_locked; 2752 } 2753 2754 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 2755 error = igmp_change_state(inm); 2756 IN_MULTI_LIST_UNLOCK(); 2757 if (error) 2758 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__); 2759 2760 out_in_multi_locked: 2761 2762 IN_MULTI_UNLOCK(); 2763 2764 out_imf_rollback: 2765 if (error) 2766 imf_rollback(imf); 2767 else 2768 imf_commit(imf); 2769 2770 imf_reap(imf); 2771 2772 out_inp_locked: 2773 INP_WUNLOCK(inp); 2774 return (error); 2775 } 2776 2777 /* 2778 * Set the IP multicast options in response to user setsockopt(). 2779 * 2780 * Many of the socket options handled in this function duplicate the 2781 * functionality of socket options in the regular unicast API. However, 2782 * it is not possible to merge the duplicate code, because the idempotence 2783 * of the IPv4 multicast part of the BSD Sockets API must be preserved; 2784 * the effects of these options must be treated as separate and distinct. 2785 * 2786 * SMPng: XXX: Unlocked read of inp_socket believed OK. 2787 * FUTURE: The IP_MULTICAST_VIF option may be eliminated if MROUTING 2788 * is refactored to no longer use vifs. 2789 */ 2790 int 2791 inp_setmoptions(struct inpcb *inp, struct sockopt *sopt) 2792 { 2793 struct ip_moptions *imo; 2794 int error; 2795 2796 error = 0; 2797 2798 /* 2799 * If socket is neither of type SOCK_RAW or SOCK_DGRAM, 2800 * or is a divert socket, reject it. 2801 */ 2802 if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT || 2803 (inp->inp_socket->so_proto->pr_type != SOCK_RAW && 2804 inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) 2805 return (EOPNOTSUPP); 2806 2807 switch (sopt->sopt_name) { 2808 case IP_MULTICAST_VIF: { 2809 int vifi; 2810 /* 2811 * Select a multicast VIF for transmission. 2812 * Only useful if multicast forwarding is active. 2813 */ 2814 if (legal_vif_num == NULL) { 2815 error = EOPNOTSUPP; 2816 break; 2817 } 2818 error = sooptcopyin(sopt, &vifi, sizeof(int), sizeof(int)); 2819 if (error) 2820 break; 2821 if (!legal_vif_num(vifi) && (vifi != -1)) { 2822 error = EINVAL; 2823 break; 2824 } 2825 imo = inp_findmoptions(inp); 2826 imo->imo_multicast_vif = vifi; 2827 INP_WUNLOCK(inp); 2828 break; 2829 } 2830 2831 case IP_MULTICAST_IF: 2832 error = inp_set_multicast_if(inp, sopt); 2833 break; 2834 2835 case IP_MULTICAST_TTL: { 2836 u_char ttl; 2837 2838 /* 2839 * Set the IP time-to-live for outgoing multicast packets. 2840 * The original multicast API required a char argument, 2841 * which is inconsistent with the rest of the socket API. 2842 * We allow either a char or an int. 2843 */ 2844 if (sopt->sopt_valsize == sizeof(u_char)) { 2845 error = sooptcopyin(sopt, &ttl, sizeof(u_char), 2846 sizeof(u_char)); 2847 if (error) 2848 break; 2849 } else { 2850 u_int ittl; 2851 2852 error = sooptcopyin(sopt, &ittl, sizeof(u_int), 2853 sizeof(u_int)); 2854 if (error) 2855 break; 2856 if (ittl > 255) { 2857 error = EINVAL; 2858 break; 2859 } 2860 ttl = (u_char)ittl; 2861 } 2862 imo = inp_findmoptions(inp); 2863 imo->imo_multicast_ttl = ttl; 2864 INP_WUNLOCK(inp); 2865 break; 2866 } 2867 2868 case IP_MULTICAST_LOOP: { 2869 u_char loop; 2870 2871 /* 2872 * Set the loopback flag for outgoing multicast packets. 2873 * Must be zero or one. The original multicast API required a 2874 * char argument, which is inconsistent with the rest 2875 * of the socket API. We allow either a char or an int. 2876 */ 2877 if (sopt->sopt_valsize == sizeof(u_char)) { 2878 error = sooptcopyin(sopt, &loop, sizeof(u_char), 2879 sizeof(u_char)); 2880 if (error) 2881 break; 2882 } else { 2883 u_int iloop; 2884 2885 error = sooptcopyin(sopt, &iloop, sizeof(u_int), 2886 sizeof(u_int)); 2887 if (error) 2888 break; 2889 loop = (u_char)iloop; 2890 } 2891 imo = inp_findmoptions(inp); 2892 imo->imo_multicast_loop = !!loop; 2893 INP_WUNLOCK(inp); 2894 break; 2895 } 2896 2897 case IP_ADD_MEMBERSHIP: 2898 case IP_ADD_SOURCE_MEMBERSHIP: 2899 case MCAST_JOIN_GROUP: 2900 case MCAST_JOIN_SOURCE_GROUP: 2901 error = inp_join_group(inp, sopt); 2902 break; 2903 2904 case IP_DROP_MEMBERSHIP: 2905 case IP_DROP_SOURCE_MEMBERSHIP: 2906 case MCAST_LEAVE_GROUP: 2907 case MCAST_LEAVE_SOURCE_GROUP: 2908 error = inp_leave_group(inp, sopt); 2909 break; 2910 2911 case IP_BLOCK_SOURCE: 2912 case IP_UNBLOCK_SOURCE: 2913 case MCAST_BLOCK_SOURCE: 2914 case MCAST_UNBLOCK_SOURCE: 2915 error = inp_block_unblock_source(inp, sopt); 2916 break; 2917 2918 case IP_MSFILTER: 2919 error = inp_set_source_filters(inp, sopt); 2920 break; 2921 2922 default: 2923 error = EOPNOTSUPP; 2924 break; 2925 } 2926 2927 INP_UNLOCK_ASSERT(inp); 2928 2929 return (error); 2930 } 2931 2932 /* 2933 * Expose IGMP's multicast filter mode and source list(s) to userland, 2934 * keyed by (ifindex, group). 2935 * The filter mode is written out as a uint32_t, followed by 2936 * 0..n of struct in_addr. 2937 * For use by ifmcstat(8). 2938 * SMPng: NOTE: unlocked read of ifindex space. 2939 */ 2940 static int 2941 sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS) 2942 { 2943 struct in_addr src, group; 2944 struct ifnet *ifp; 2945 struct ifmultiaddr *ifma; 2946 struct in_multi *inm; 2947 struct ip_msource *ims; 2948 int *name; 2949 int retval; 2950 u_int namelen; 2951 uint32_t fmode, ifindex; 2952 2953 name = (int *)arg1; 2954 namelen = arg2; 2955 2956 if (req->newptr != NULL) 2957 return (EPERM); 2958 2959 if (namelen != 2) 2960 return (EINVAL); 2961 2962 ifindex = name[0]; 2963 if (ifindex <= 0 || ifindex > V_if_index) { 2964 CTR2(KTR_IGMPV3, "%s: ifindex %u out of range", 2965 __func__, ifindex); 2966 return (ENOENT); 2967 } 2968 2969 group.s_addr = name[1]; 2970 if (!IN_MULTICAST(ntohl(group.s_addr))) { 2971 CTR2(KTR_IGMPV3, "%s: group 0x%08x is not multicast", 2972 __func__, ntohl(group.s_addr)); 2973 return (EINVAL); 2974 } 2975 2976 ifp = ifnet_byindex(ifindex); 2977 if (ifp == NULL) { 2978 CTR2(KTR_IGMPV3, "%s: no ifp for ifindex %u", 2979 __func__, ifindex); 2980 return (ENOENT); 2981 } 2982 2983 retval = sysctl_wire_old_buffer(req, 2984 sizeof(uint32_t) + (in_mcast_maxgrpsrc * sizeof(struct in_addr))); 2985 if (retval) 2986 return (retval); 2987 2988 IN_MULTI_LIST_LOCK(); 2989 2990 IF_ADDR_RLOCK(ifp); 2991 CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 2992 if (ifma->ifma_addr->sa_family != AF_INET || 2993 ifma->ifma_protospec == NULL) 2994 continue; 2995 inm = (struct in_multi *)ifma->ifma_protospec; 2996 if (!in_hosteq(inm->inm_addr, group)) 2997 continue; 2998 fmode = inm->inm_st[1].iss_fmode; 2999 retval = SYSCTL_OUT(req, &fmode, sizeof(uint32_t)); 3000 if (retval != 0) 3001 break; 3002 RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) { 3003 CTR2(KTR_IGMPV3, "%s: visit node 0x%08x", __func__, 3004 ims->ims_haddr); 3005 /* 3006 * Only copy-out sources which are in-mode. 3007 */ 3008 if (fmode != ims_get_mode(inm, ims, 1)) { 3009 CTR1(KTR_IGMPV3, "%s: skip non-in-mode", 3010 __func__); 3011 continue; 3012 } 3013 src.s_addr = htonl(ims->ims_haddr); 3014 retval = SYSCTL_OUT(req, &src, sizeof(struct in_addr)); 3015 if (retval != 0) 3016 break; 3017 } 3018 } 3019 IF_ADDR_RUNLOCK(ifp); 3020 3021 IN_MULTI_LIST_UNLOCK(); 3022 3023 return (retval); 3024 } 3025 3026 #if defined(KTR) && (KTR_COMPILE & KTR_IGMPV3) 3027 3028 static const char *inm_modestrs[] = { "un", "in", "ex" }; 3029 3030 static const char * 3031 inm_mode_str(const int mode) 3032 { 3033 3034 if (mode >= MCAST_UNDEFINED && mode <= MCAST_EXCLUDE) 3035 return (inm_modestrs[mode]); 3036 return ("??"); 3037 } 3038 3039 static const char *inm_statestrs[] = { 3040 "not-member", 3041 "silent", 3042 "idle", 3043 "lazy", 3044 "sleeping", 3045 "awakening", 3046 "query-pending", 3047 "sg-query-pending", 3048 "leaving" 3049 }; 3050 3051 static const char * 3052 inm_state_str(const int state) 3053 { 3054 3055 if (state >= IGMP_NOT_MEMBER && state <= IGMP_LEAVING_MEMBER) 3056 return (inm_statestrs[state]); 3057 return ("??"); 3058 } 3059 3060 /* 3061 * Dump an in_multi structure to the console. 3062 */ 3063 void 3064 inm_print(const struct in_multi *inm) 3065 { 3066 int t; 3067 char addrbuf[INET_ADDRSTRLEN]; 3068 3069 if ((ktr_mask & KTR_IGMPV3) == 0) 3070 return; 3071 3072 printf("%s: --- begin inm %p ---\n", __func__, inm); 3073 printf("addr %s ifp %p(%s) ifma %p\n", 3074 inet_ntoa_r(inm->inm_addr, addrbuf), 3075 inm->inm_ifp, 3076 inm->inm_ifp->if_xname, 3077 inm->inm_ifma); 3078 printf("timer %u state %s refcount %u scq.len %u\n", 3079 inm->inm_timer, 3080 inm_state_str(inm->inm_state), 3081 inm->inm_refcount, 3082 inm->inm_scq.mq_len); 3083 printf("igi %p nsrc %lu sctimer %u scrv %u\n", 3084 inm->inm_igi, 3085 inm->inm_nsrc, 3086 inm->inm_sctimer, 3087 inm->inm_scrv); 3088 for (t = 0; t < 2; t++) { 3089 printf("t%d: fmode %s asm %u ex %u in %u rec %u\n", t, 3090 inm_mode_str(inm->inm_st[t].iss_fmode), 3091 inm->inm_st[t].iss_asm, 3092 inm->inm_st[t].iss_ex, 3093 inm->inm_st[t].iss_in, 3094 inm->inm_st[t].iss_rec); 3095 } 3096 printf("%s: --- end inm %p ---\n", __func__, inm); 3097 } 3098 3099 #else /* !KTR || !(KTR_COMPILE & KTR_IGMPV3) */ 3100 3101 void 3102 inm_print(const struct in_multi *inm) 3103 { 3104 3105 } 3106 3107 #endif /* KTR && (KTR_COMPILE & KTR_IGMPV3) */ 3108 3109 RB_GENERATE(ip_msource_tree, ip_msource, ims_link, ip_msource_cmp); 3110