1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2007-2009 Bruce Simpson. 5 * Copyright (c) 2005 Robert N. M. Watson. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. The name of the author may not be used to endorse or promote 17 * products derived from this software without specific prior written 18 * permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 /* 34 * IPv4 multicast socket, group, and socket option processing module. 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/kernel.h> 43 #include <sys/lock.h> 44 #include <sys/malloc.h> 45 #include <sys/mbuf.h> 46 #include <sys/protosw.h> 47 #include <sys/rmlock.h> 48 #include <sys/socket.h> 49 #include <sys/socketvar.h> 50 #include <sys/protosw.h> 51 #include <sys/sysctl.h> 52 #include <sys/ktr.h> 53 #include <sys/taskqueue.h> 54 #include <sys/gtaskqueue.h> 55 #include <sys/tree.h> 56 57 #include <net/if.h> 58 #include <net/if_var.h> 59 #include <net/if_dl.h> 60 #include <net/route.h> 61 #include <net/vnet.h> 62 63 #include <net/ethernet.h> 64 65 #include <netinet/in.h> 66 #include <netinet/in_systm.h> 67 #include <netinet/in_fib.h> 68 #include <netinet/in_pcb.h> 69 #include <netinet/in_var.h> 70 #include <netinet/ip_var.h> 71 #include <netinet/igmp_var.h> 72 73 #ifndef KTR_IGMPV3 74 #define KTR_IGMPV3 KTR_INET 75 #endif 76 77 #ifndef __SOCKUNION_DECLARED 78 union sockunion { 79 struct sockaddr_storage ss; 80 struct sockaddr sa; 81 struct sockaddr_dl sdl; 82 struct sockaddr_in sin; 83 }; 84 typedef union sockunion sockunion_t; 85 #define __SOCKUNION_DECLARED 86 #endif /* __SOCKUNION_DECLARED */ 87 88 static MALLOC_DEFINE(M_INMFILTER, "in_mfilter", 89 "IPv4 multicast PCB-layer source filter"); 90 static MALLOC_DEFINE(M_IPMADDR, "in_multi", "IPv4 multicast group"); 91 static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "IPv4 multicast options"); 92 static MALLOC_DEFINE(M_IPMSOURCE, "ip_msource", 93 "IPv4 multicast IGMP-layer source filter"); 94 95 /* 96 * Locking: 97 * - Lock order is: Giant, INP_WLOCK, IN_MULTI_LIST_LOCK, IGMP_LOCK, IF_ADDR_LOCK. 98 * - The IF_ADDR_LOCK is implicitly taken by inm_lookup() earlier, however 99 * it can be taken by code in net/if.c also. 100 * - ip_moptions and in_mfilter are covered by the INP_WLOCK. 101 * 102 * struct in_multi is covered by IN_MULTI_LIST_LOCK. There isn't strictly 103 * any need for in_multi itself to be virtualized -- it is bound to an ifp 104 * anyway no matter what happens. 105 */ 106 struct mtx in_multi_list_mtx; 107 MTX_SYSINIT(in_multi_mtx, &in_multi_list_mtx, "in_multi_list_mtx", MTX_DEF); 108 109 struct mtx in_multi_free_mtx; 110 MTX_SYSINIT(in_multi_free_mtx, &in_multi_free_mtx, "in_multi_free_mtx", MTX_DEF); 111 112 struct sx in_multi_sx; 113 SX_SYSINIT(in_multi_sx, &in_multi_sx, "in_multi_sx"); 114 115 int ifma_restart; 116 117 /* 118 * Functions with non-static linkage defined in this file should be 119 * declared in in_var.h: 120 * imo_multi_filter() 121 * in_addmulti() 122 * in_delmulti() 123 * in_joingroup() 124 * in_joingroup_locked() 125 * in_leavegroup() 126 * in_leavegroup_locked() 127 * and ip_var.h: 128 * inp_freemoptions() 129 * inp_getmoptions() 130 * inp_setmoptions() 131 * 132 * XXX: Both carp and pf need to use the legacy (*,G) KPIs in_addmulti() 133 * and in_delmulti(). 134 */ 135 static void imf_commit(struct in_mfilter *); 136 static int imf_get_source(struct in_mfilter *imf, 137 const struct sockaddr_in *psin, 138 struct in_msource **); 139 static struct in_msource * 140 imf_graft(struct in_mfilter *, const uint8_t, 141 const struct sockaddr_in *); 142 static void imf_leave(struct in_mfilter *); 143 static int imf_prune(struct in_mfilter *, const struct sockaddr_in *); 144 static void imf_purge(struct in_mfilter *); 145 static void imf_rollback(struct in_mfilter *); 146 static void imf_reap(struct in_mfilter *); 147 static int imo_grow(struct ip_moptions *); 148 static size_t imo_match_group(const struct ip_moptions *, 149 const struct ifnet *, const struct sockaddr *); 150 static struct in_msource * 151 imo_match_source(const struct ip_moptions *, const size_t, 152 const struct sockaddr *); 153 static void ims_merge(struct ip_msource *ims, 154 const struct in_msource *lims, const int rollback); 155 static int in_getmulti(struct ifnet *, const struct in_addr *, 156 struct in_multi **); 157 static int inm_get_source(struct in_multi *inm, const in_addr_t haddr, 158 const int noalloc, struct ip_msource **pims); 159 #ifdef KTR 160 static int inm_is_ifp_detached(const struct in_multi *); 161 #endif 162 static int inm_merge(struct in_multi *, /*const*/ struct in_mfilter *); 163 static void inm_purge(struct in_multi *); 164 static void inm_reap(struct in_multi *); 165 static void inm_release(struct in_multi *); 166 static struct ip_moptions * 167 inp_findmoptions(struct inpcb *); 168 static int inp_get_source_filters(struct inpcb *, struct sockopt *); 169 static int inp_join_group(struct inpcb *, struct sockopt *); 170 static int inp_leave_group(struct inpcb *, struct sockopt *); 171 static struct ifnet * 172 inp_lookup_mcast_ifp(const struct inpcb *, 173 const struct sockaddr_in *, const struct in_addr); 174 static int inp_block_unblock_source(struct inpcb *, struct sockopt *); 175 static int inp_set_multicast_if(struct inpcb *, struct sockopt *); 176 static int inp_set_source_filters(struct inpcb *, struct sockopt *); 177 static int sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS); 178 179 static SYSCTL_NODE(_net_inet_ip, OID_AUTO, mcast, CTLFLAG_RW, 0, 180 "IPv4 multicast"); 181 182 static u_long in_mcast_maxgrpsrc = IP_MAX_GROUP_SRC_FILTER; 183 SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxgrpsrc, 184 CTLFLAG_RWTUN, &in_mcast_maxgrpsrc, 0, 185 "Max source filters per group"); 186 187 static u_long in_mcast_maxsocksrc = IP_MAX_SOCK_SRC_FILTER; 188 SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxsocksrc, 189 CTLFLAG_RWTUN, &in_mcast_maxsocksrc, 0, 190 "Max source filters per socket"); 191 192 int in_mcast_loop = IP_DEFAULT_MULTICAST_LOOP; 193 SYSCTL_INT(_net_inet_ip_mcast, OID_AUTO, loop, CTLFLAG_RWTUN, 194 &in_mcast_loop, 0, "Loopback multicast datagrams by default"); 195 196 static SYSCTL_NODE(_net_inet_ip_mcast, OID_AUTO, filters, 197 CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_ip_mcast_filters, 198 "Per-interface stack-wide source filters"); 199 200 #ifdef KTR 201 /* 202 * Inline function which wraps assertions for a valid ifp. 203 * The ifnet layer will set the ifma's ifp pointer to NULL if the ifp 204 * is detached. 205 */ 206 static int __inline 207 inm_is_ifp_detached(const struct in_multi *inm) 208 { 209 struct ifnet *ifp; 210 211 KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__)); 212 ifp = inm->inm_ifma->ifma_ifp; 213 if (ifp != NULL) { 214 /* 215 * Sanity check that netinet's notion of ifp is the 216 * same as net's. 217 */ 218 KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__)); 219 } 220 221 return (ifp == NULL); 222 } 223 #endif 224 225 static struct grouptask free_gtask; 226 static struct in_multi_head inm_free_list; 227 static void inm_release_task(void *arg __unused); 228 static void inm_init(void) 229 { 230 SLIST_INIT(&inm_free_list); 231 taskqgroup_config_gtask_init(NULL, &free_gtask, inm_release_task, "inm release task"); 232 } 233 234 SYSINIT(inm_init, SI_SUB_SMP + 1, SI_ORDER_FIRST, 235 inm_init, NULL); 236 237 238 void 239 inm_release_list_deferred(struct in_multi_head *inmh) 240 { 241 242 if (SLIST_EMPTY(inmh)) 243 return; 244 mtx_lock(&in_multi_free_mtx); 245 SLIST_CONCAT(&inm_free_list, inmh, in_multi, inm_nrele); 246 mtx_unlock(&in_multi_free_mtx); 247 GROUPTASK_ENQUEUE(&free_gtask); 248 } 249 250 void 251 inm_disconnect(struct in_multi *inm) 252 { 253 struct ifnet *ifp; 254 struct ifmultiaddr *ifma, *ll_ifma; 255 256 ifp = inm->inm_ifp; 257 IF_ADDR_WLOCK_ASSERT(ifp); 258 ifma = inm->inm_ifma; 259 260 if_ref(ifp); 261 CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifmultiaddr, ifma_link); 262 MCDPRINTF("removed ifma: %p from %s\n", ifma, ifp->if_xname); 263 if ((ll_ifma = ifma->ifma_llifma) != NULL) { 264 MPASS(ifma != ll_ifma); 265 ifma->ifma_llifma = NULL; 266 MPASS(ll_ifma->ifma_llifma == NULL); 267 MPASS(ll_ifma->ifma_ifp == ifp); 268 if (--ll_ifma->ifma_refcount == 0) { 269 CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma, ifmultiaddr, ifma_link); 270 MCDPRINTF("removed ll_ifma: %p from %s\n", ll_ifma, ifp->if_xname); 271 if_freemulti(ll_ifma); 272 ifma_restart = true; 273 } 274 } 275 } 276 277 void 278 inm_release_deferred(struct in_multi *inm) 279 { 280 struct in_multi_head tmp; 281 282 IN_MULTI_LIST_LOCK_ASSERT(); 283 MPASS(inm->inm_refcount > 0); 284 if (--inm->inm_refcount == 0) { 285 SLIST_INIT(&tmp); 286 inm_disconnect(inm); 287 inm->inm_ifma->ifma_protospec = NULL; 288 SLIST_INSERT_HEAD(&tmp, inm, inm_nrele); 289 inm_release_list_deferred(&tmp); 290 } 291 } 292 293 static void 294 inm_release_task(void *arg __unused) 295 { 296 struct in_multi_head inm_free_tmp; 297 struct in_multi *inm, *tinm; 298 299 SLIST_INIT(&inm_free_tmp); 300 mtx_lock(&in_multi_free_mtx); 301 SLIST_CONCAT(&inm_free_tmp, &inm_free_list, in_multi, inm_nrele); 302 mtx_unlock(&in_multi_free_mtx); 303 IN_MULTI_LOCK(); 304 SLIST_FOREACH_SAFE(inm, &inm_free_tmp, inm_nrele, tinm) { 305 SLIST_REMOVE_HEAD(&inm_free_tmp, inm_nrele); 306 MPASS(inm); 307 inm_release(inm); 308 } 309 IN_MULTI_UNLOCK(); 310 } 311 312 /* 313 * Initialize an in_mfilter structure to a known state at t0, t1 314 * with an empty source filter list. 315 */ 316 static __inline void 317 imf_init(struct in_mfilter *imf, const int st0, const int st1) 318 { 319 memset(imf, 0, sizeof(struct in_mfilter)); 320 RB_INIT(&imf->imf_sources); 321 imf->imf_st[0] = st0; 322 imf->imf_st[1] = st1; 323 } 324 325 /* 326 * Function for looking up an in_multi record for an IPv4 multicast address 327 * on a given interface. ifp must be valid. If no record found, return NULL. 328 * The IN_MULTI_LIST_LOCK and IF_ADDR_LOCK on ifp must be held. 329 */ 330 struct in_multi * 331 inm_lookup_locked(struct ifnet *ifp, const struct in_addr ina) 332 { 333 struct ifmultiaddr *ifma; 334 struct in_multi *inm; 335 336 IN_MULTI_LIST_LOCK_ASSERT(); 337 IF_ADDR_LOCK_ASSERT(ifp); 338 339 inm = NULL; 340 CK_STAILQ_FOREACH(ifma, &((ifp)->if_multiaddrs), ifma_link) { 341 if (ifma->ifma_addr->sa_family != AF_INET || 342 ifma->ifma_protospec == NULL) 343 continue; 344 inm = (struct in_multi *)ifma->ifma_protospec; 345 if (inm->inm_addr.s_addr == ina.s_addr) 346 break; 347 inm = NULL; 348 } 349 return (inm); 350 } 351 352 /* 353 * Wrapper for inm_lookup_locked(). 354 * The IF_ADDR_LOCK will be taken on ifp and released on return. 355 */ 356 struct in_multi * 357 inm_lookup(struct ifnet *ifp, const struct in_addr ina) 358 { 359 struct in_multi *inm; 360 361 IN_MULTI_LIST_LOCK_ASSERT(); 362 IF_ADDR_RLOCK(ifp); 363 inm = inm_lookup_locked(ifp, ina); 364 IF_ADDR_RUNLOCK(ifp); 365 366 return (inm); 367 } 368 369 /* 370 * Resize the ip_moptions vector to the next power-of-two minus 1. 371 * May be called with locks held; do not sleep. 372 */ 373 static int 374 imo_grow(struct ip_moptions *imo) 375 { 376 struct in_multi **nmships; 377 struct in_multi **omships; 378 struct in_mfilter *nmfilters; 379 struct in_mfilter *omfilters; 380 size_t idx; 381 size_t newmax; 382 size_t oldmax; 383 384 nmships = NULL; 385 nmfilters = NULL; 386 omships = imo->imo_membership; 387 omfilters = imo->imo_mfilters; 388 oldmax = imo->imo_max_memberships; 389 newmax = ((oldmax + 1) * 2) - 1; 390 391 if (newmax <= IP_MAX_MEMBERSHIPS) { 392 nmships = (struct in_multi **)realloc(omships, 393 sizeof(struct in_multi *) * newmax, M_IPMOPTS, M_NOWAIT); 394 nmfilters = (struct in_mfilter *)realloc(omfilters, 395 sizeof(struct in_mfilter) * newmax, M_INMFILTER, M_NOWAIT); 396 if (nmships != NULL && nmfilters != NULL) { 397 /* Initialize newly allocated source filter heads. */ 398 for (idx = oldmax; idx < newmax; idx++) { 399 imf_init(&nmfilters[idx], MCAST_UNDEFINED, 400 MCAST_EXCLUDE); 401 } 402 imo->imo_max_memberships = newmax; 403 imo->imo_membership = nmships; 404 imo->imo_mfilters = nmfilters; 405 } 406 } 407 408 if (nmships == NULL || nmfilters == NULL) { 409 if (nmships != NULL) 410 free(nmships, M_IPMOPTS); 411 if (nmfilters != NULL) 412 free(nmfilters, M_INMFILTER); 413 return (ETOOMANYREFS); 414 } 415 416 return (0); 417 } 418 419 /* 420 * Find an IPv4 multicast group entry for this ip_moptions instance 421 * which matches the specified group, and optionally an interface. 422 * Return its index into the array, or -1 if not found. 423 */ 424 static size_t 425 imo_match_group(const struct ip_moptions *imo, const struct ifnet *ifp, 426 const struct sockaddr *group) 427 { 428 const struct sockaddr_in *gsin; 429 struct in_multi **pinm; 430 int idx; 431 int nmships; 432 433 gsin = (const struct sockaddr_in *)group; 434 435 /* The imo_membership array may be lazy allocated. */ 436 if (imo->imo_membership == NULL || imo->imo_num_memberships == 0) 437 return (-1); 438 439 nmships = imo->imo_num_memberships; 440 pinm = &imo->imo_membership[0]; 441 for (idx = 0; idx < nmships; idx++, pinm++) { 442 if (*pinm == NULL) 443 continue; 444 if ((ifp == NULL || ((*pinm)->inm_ifp == ifp)) && 445 in_hosteq((*pinm)->inm_addr, gsin->sin_addr)) { 446 break; 447 } 448 } 449 if (idx >= nmships) 450 idx = -1; 451 452 return (idx); 453 } 454 455 /* 456 * Find an IPv4 multicast source entry for this imo which matches 457 * the given group index for this socket, and source address. 458 * 459 * NOTE: This does not check if the entry is in-mode, merely if 460 * it exists, which may not be the desired behaviour. 461 */ 462 static struct in_msource * 463 imo_match_source(const struct ip_moptions *imo, const size_t gidx, 464 const struct sockaddr *src) 465 { 466 struct ip_msource find; 467 struct in_mfilter *imf; 468 struct ip_msource *ims; 469 const sockunion_t *psa; 470 471 KASSERT(src->sa_family == AF_INET, ("%s: !AF_INET", __func__)); 472 KASSERT(gidx != -1 && gidx < imo->imo_num_memberships, 473 ("%s: invalid index %d\n", __func__, (int)gidx)); 474 475 /* The imo_mfilters array may be lazy allocated. */ 476 if (imo->imo_mfilters == NULL) 477 return (NULL); 478 imf = &imo->imo_mfilters[gidx]; 479 480 /* Source trees are keyed in host byte order. */ 481 psa = (const sockunion_t *)src; 482 find.ims_haddr = ntohl(psa->sin.sin_addr.s_addr); 483 ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find); 484 485 return ((struct in_msource *)ims); 486 } 487 488 /* 489 * Perform filtering for multicast datagrams on a socket by group and source. 490 * 491 * Returns 0 if a datagram should be allowed through, or various error codes 492 * if the socket was not a member of the group, or the source was muted, etc. 493 */ 494 int 495 imo_multi_filter(const struct ip_moptions *imo, const struct ifnet *ifp, 496 const struct sockaddr *group, const struct sockaddr *src) 497 { 498 size_t gidx; 499 struct in_msource *ims; 500 int mode; 501 502 KASSERT(ifp != NULL, ("%s: null ifp", __func__)); 503 504 gidx = imo_match_group(imo, ifp, group); 505 if (gidx == -1) 506 return (MCAST_NOTGMEMBER); 507 508 /* 509 * Check if the source was included in an (S,G) join. 510 * Allow reception on exclusive memberships by default, 511 * reject reception on inclusive memberships by default. 512 * Exclude source only if an in-mode exclude filter exists. 513 * Include source only if an in-mode include filter exists. 514 * NOTE: We are comparing group state here at IGMP t1 (now) 515 * with socket-layer t0 (since last downcall). 516 */ 517 mode = imo->imo_mfilters[gidx].imf_st[1]; 518 ims = imo_match_source(imo, gidx, src); 519 520 if ((ims == NULL && mode == MCAST_INCLUDE) || 521 (ims != NULL && ims->imsl_st[0] != mode)) 522 return (MCAST_NOTSMEMBER); 523 524 return (MCAST_PASS); 525 } 526 527 /* 528 * Find and return a reference to an in_multi record for (ifp, group), 529 * and bump its reference count. 530 * If one does not exist, try to allocate it, and update link-layer multicast 531 * filters on ifp to listen for group. 532 * Assumes the IN_MULTI lock is held across the call. 533 * Return 0 if successful, otherwise return an appropriate error code. 534 */ 535 static int 536 in_getmulti(struct ifnet *ifp, const struct in_addr *group, 537 struct in_multi **pinm) 538 { 539 struct sockaddr_in gsin; 540 struct ifmultiaddr *ifma; 541 struct in_ifinfo *ii; 542 struct in_multi *inm; 543 int error; 544 545 IN_MULTI_LOCK_ASSERT(); 546 547 ii = (struct in_ifinfo *)ifp->if_afdata[AF_INET]; 548 IN_MULTI_LIST_LOCK(); 549 inm = inm_lookup(ifp, *group); 550 if (inm != NULL) { 551 /* 552 * If we already joined this group, just bump the 553 * refcount and return it. 554 */ 555 KASSERT(inm->inm_refcount >= 1, 556 ("%s: bad refcount %d", __func__, inm->inm_refcount)); 557 inm_acquire_locked(inm); 558 *pinm = inm; 559 } 560 IN_MULTI_LIST_UNLOCK(); 561 if (inm != NULL) 562 return (0); 563 564 memset(&gsin, 0, sizeof(gsin)); 565 gsin.sin_family = AF_INET; 566 gsin.sin_len = sizeof(struct sockaddr_in); 567 gsin.sin_addr = *group; 568 569 /* 570 * Check if a link-layer group is already associated 571 * with this network-layer group on the given ifnet. 572 */ 573 error = if_addmulti(ifp, (struct sockaddr *)&gsin, &ifma); 574 if (error != 0) 575 return (error); 576 577 /* XXX ifma_protospec must be covered by IF_ADDR_LOCK */ 578 IN_MULTI_LIST_LOCK(); 579 IF_ADDR_WLOCK(ifp); 580 581 /* 582 * If something other than netinet is occupying the link-layer 583 * group, print a meaningful error message and back out of 584 * the allocation. 585 * Otherwise, bump the refcount on the existing network-layer 586 * group association and return it. 587 */ 588 if (ifma->ifma_protospec != NULL) { 589 inm = (struct in_multi *)ifma->ifma_protospec; 590 #ifdef INVARIANTS 591 KASSERT(ifma->ifma_addr != NULL, ("%s: no ifma_addr", 592 __func__)); 593 KASSERT(ifma->ifma_addr->sa_family == AF_INET, 594 ("%s: ifma not AF_INET", __func__)); 595 KASSERT(inm != NULL, ("%s: no ifma_protospec", __func__)); 596 if (inm->inm_ifma != ifma || inm->inm_ifp != ifp || 597 !in_hosteq(inm->inm_addr, *group)) { 598 char addrbuf[INET_ADDRSTRLEN]; 599 600 panic("%s: ifma %p is inconsistent with %p (%s)", 601 __func__, ifma, inm, inet_ntoa_r(*group, addrbuf)); 602 } 603 #endif 604 inm_acquire_locked(inm); 605 *pinm = inm; 606 goto out_locked; 607 } 608 609 IF_ADDR_WLOCK_ASSERT(ifp); 610 611 /* 612 * A new in_multi record is needed; allocate and initialize it. 613 * We DO NOT perform an IGMP join as the in_ layer may need to 614 * push an initial source list down to IGMP to support SSM. 615 * 616 * The initial source filter state is INCLUDE, {} as per the RFC. 617 */ 618 inm = malloc(sizeof(*inm), M_IPMADDR, M_NOWAIT | M_ZERO); 619 if (inm == NULL) { 620 IF_ADDR_WUNLOCK(ifp); 621 IN_MULTI_LIST_UNLOCK(); 622 if_delmulti_ifma(ifma); 623 return (ENOMEM); 624 } 625 inm->inm_addr = *group; 626 inm->inm_ifp = ifp; 627 inm->inm_igi = ii->ii_igmp; 628 inm->inm_ifma = ifma; 629 inm->inm_refcount = 1; 630 inm->inm_state = IGMP_NOT_MEMBER; 631 mbufq_init(&inm->inm_scq, IGMP_MAX_STATE_CHANGES); 632 inm->inm_st[0].iss_fmode = MCAST_UNDEFINED; 633 inm->inm_st[1].iss_fmode = MCAST_UNDEFINED; 634 RB_INIT(&inm->inm_srcs); 635 636 ifma->ifma_protospec = inm; 637 638 *pinm = inm; 639 out_locked: 640 IF_ADDR_WUNLOCK(ifp); 641 IN_MULTI_LIST_UNLOCK(); 642 return (0); 643 } 644 645 /* 646 * Drop a reference to an in_multi record. 647 * 648 * If the refcount drops to 0, free the in_multi record and 649 * delete the underlying link-layer membership. 650 */ 651 static void 652 inm_release(struct in_multi *inm) 653 { 654 struct ifmultiaddr *ifma; 655 struct ifnet *ifp; 656 657 CTR2(KTR_IGMPV3, "%s: refcount is %d", __func__, inm->inm_refcount); 658 MPASS(inm->inm_refcount == 0); 659 CTR2(KTR_IGMPV3, "%s: freeing inm %p", __func__, inm); 660 661 ifma = inm->inm_ifma; 662 ifp = inm->inm_ifp; 663 664 /* XXX this access is not covered by IF_ADDR_LOCK */ 665 CTR2(KTR_IGMPV3, "%s: purging ifma %p", __func__, ifma); 666 if (ifp != NULL) { 667 CURVNET_SET(ifp->if_vnet); 668 inm_purge(inm); 669 free(inm, M_IPMADDR); 670 if_delmulti_ifma_flags(ifma, 1); 671 CURVNET_RESTORE(); 672 if_rele(ifp); 673 } else { 674 inm_purge(inm); 675 free(inm, M_IPMADDR); 676 if_delmulti_ifma_flags(ifma, 1); 677 } 678 } 679 680 /* 681 * Clear recorded source entries for a group. 682 * Used by the IGMP code. Caller must hold the IN_MULTI lock. 683 * FIXME: Should reap. 684 */ 685 void 686 inm_clear_recorded(struct in_multi *inm) 687 { 688 struct ip_msource *ims; 689 690 IN_MULTI_LIST_LOCK_ASSERT(); 691 692 RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) { 693 if (ims->ims_stp) { 694 ims->ims_stp = 0; 695 --inm->inm_st[1].iss_rec; 696 } 697 } 698 KASSERT(inm->inm_st[1].iss_rec == 0, 699 ("%s: iss_rec %d not 0", __func__, inm->inm_st[1].iss_rec)); 700 } 701 702 /* 703 * Record a source as pending for a Source-Group IGMPv3 query. 704 * This lives here as it modifies the shared tree. 705 * 706 * inm is the group descriptor. 707 * naddr is the address of the source to record in network-byte order. 708 * 709 * If the net.inet.igmp.sgalloc sysctl is non-zero, we will 710 * lazy-allocate a source node in response to an SG query. 711 * Otherwise, no allocation is performed. This saves some memory 712 * with the trade-off that the source will not be reported to the 713 * router if joined in the window between the query response and 714 * the group actually being joined on the local host. 715 * 716 * VIMAGE: XXX: Currently the igmp_sgalloc feature has been removed. 717 * This turns off the allocation of a recorded source entry if 718 * the group has not been joined. 719 * 720 * Return 0 if the source didn't exist or was already marked as recorded. 721 * Return 1 if the source was marked as recorded by this function. 722 * Return <0 if any error occurred (negated errno code). 723 */ 724 int 725 inm_record_source(struct in_multi *inm, const in_addr_t naddr) 726 { 727 struct ip_msource find; 728 struct ip_msource *ims, *nims; 729 730 IN_MULTI_LIST_LOCK_ASSERT(); 731 732 find.ims_haddr = ntohl(naddr); 733 ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find); 734 if (ims && ims->ims_stp) 735 return (0); 736 if (ims == NULL) { 737 if (inm->inm_nsrc == in_mcast_maxgrpsrc) 738 return (-ENOSPC); 739 nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE, 740 M_NOWAIT | M_ZERO); 741 if (nims == NULL) 742 return (-ENOMEM); 743 nims->ims_haddr = find.ims_haddr; 744 RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims); 745 ++inm->inm_nsrc; 746 ims = nims; 747 } 748 749 /* 750 * Mark the source as recorded and update the recorded 751 * source count. 752 */ 753 ++ims->ims_stp; 754 ++inm->inm_st[1].iss_rec; 755 756 return (1); 757 } 758 759 /* 760 * Return a pointer to an in_msource owned by an in_mfilter, 761 * given its source address. 762 * Lazy-allocate if needed. If this is a new entry its filter state is 763 * undefined at t0. 764 * 765 * imf is the filter set being modified. 766 * haddr is the source address in *host* byte-order. 767 * 768 * SMPng: May be called with locks held; malloc must not block. 769 */ 770 static int 771 imf_get_source(struct in_mfilter *imf, const struct sockaddr_in *psin, 772 struct in_msource **plims) 773 { 774 struct ip_msource find; 775 struct ip_msource *ims, *nims; 776 struct in_msource *lims; 777 int error; 778 779 error = 0; 780 ims = NULL; 781 lims = NULL; 782 783 /* key is host byte order */ 784 find.ims_haddr = ntohl(psin->sin_addr.s_addr); 785 ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find); 786 lims = (struct in_msource *)ims; 787 if (lims == NULL) { 788 if (imf->imf_nsrc == in_mcast_maxsocksrc) 789 return (ENOSPC); 790 nims = malloc(sizeof(struct in_msource), M_INMFILTER, 791 M_NOWAIT | M_ZERO); 792 if (nims == NULL) 793 return (ENOMEM); 794 lims = (struct in_msource *)nims; 795 lims->ims_haddr = find.ims_haddr; 796 lims->imsl_st[0] = MCAST_UNDEFINED; 797 RB_INSERT(ip_msource_tree, &imf->imf_sources, nims); 798 ++imf->imf_nsrc; 799 } 800 801 *plims = lims; 802 803 return (error); 804 } 805 806 /* 807 * Graft a source entry into an existing socket-layer filter set, 808 * maintaining any required invariants and checking allocations. 809 * 810 * The source is marked as being in the new filter mode at t1. 811 * 812 * Return the pointer to the new node, otherwise return NULL. 813 */ 814 static struct in_msource * 815 imf_graft(struct in_mfilter *imf, const uint8_t st1, 816 const struct sockaddr_in *psin) 817 { 818 struct ip_msource *nims; 819 struct in_msource *lims; 820 821 nims = malloc(sizeof(struct in_msource), M_INMFILTER, 822 M_NOWAIT | M_ZERO); 823 if (nims == NULL) 824 return (NULL); 825 lims = (struct in_msource *)nims; 826 lims->ims_haddr = ntohl(psin->sin_addr.s_addr); 827 lims->imsl_st[0] = MCAST_UNDEFINED; 828 lims->imsl_st[1] = st1; 829 RB_INSERT(ip_msource_tree, &imf->imf_sources, nims); 830 ++imf->imf_nsrc; 831 832 return (lims); 833 } 834 835 /* 836 * Prune a source entry from an existing socket-layer filter set, 837 * maintaining any required invariants and checking allocations. 838 * 839 * The source is marked as being left at t1, it is not freed. 840 * 841 * Return 0 if no error occurred, otherwise return an errno value. 842 */ 843 static int 844 imf_prune(struct in_mfilter *imf, const struct sockaddr_in *psin) 845 { 846 struct ip_msource find; 847 struct ip_msource *ims; 848 struct in_msource *lims; 849 850 /* key is host byte order */ 851 find.ims_haddr = ntohl(psin->sin_addr.s_addr); 852 ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find); 853 if (ims == NULL) 854 return (ENOENT); 855 lims = (struct in_msource *)ims; 856 lims->imsl_st[1] = MCAST_UNDEFINED; 857 return (0); 858 } 859 860 /* 861 * Revert socket-layer filter set deltas at t1 to t0 state. 862 */ 863 static void 864 imf_rollback(struct in_mfilter *imf) 865 { 866 struct ip_msource *ims, *tims; 867 struct in_msource *lims; 868 869 RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) { 870 lims = (struct in_msource *)ims; 871 if (lims->imsl_st[0] == lims->imsl_st[1]) { 872 /* no change at t1 */ 873 continue; 874 } else if (lims->imsl_st[0] != MCAST_UNDEFINED) { 875 /* revert change to existing source at t1 */ 876 lims->imsl_st[1] = lims->imsl_st[0]; 877 } else { 878 /* revert source added t1 */ 879 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 880 RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims); 881 free(ims, M_INMFILTER); 882 imf->imf_nsrc--; 883 } 884 } 885 imf->imf_st[1] = imf->imf_st[0]; 886 } 887 888 /* 889 * Mark socket-layer filter set as INCLUDE {} at t1. 890 */ 891 static void 892 imf_leave(struct in_mfilter *imf) 893 { 894 struct ip_msource *ims; 895 struct in_msource *lims; 896 897 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 898 lims = (struct in_msource *)ims; 899 lims->imsl_st[1] = MCAST_UNDEFINED; 900 } 901 imf->imf_st[1] = MCAST_INCLUDE; 902 } 903 904 /* 905 * Mark socket-layer filter set deltas as committed. 906 */ 907 static void 908 imf_commit(struct in_mfilter *imf) 909 { 910 struct ip_msource *ims; 911 struct in_msource *lims; 912 913 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 914 lims = (struct in_msource *)ims; 915 lims->imsl_st[0] = lims->imsl_st[1]; 916 } 917 imf->imf_st[0] = imf->imf_st[1]; 918 } 919 920 /* 921 * Reap unreferenced sources from socket-layer filter set. 922 */ 923 static void 924 imf_reap(struct in_mfilter *imf) 925 { 926 struct ip_msource *ims, *tims; 927 struct in_msource *lims; 928 929 RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) { 930 lims = (struct in_msource *)ims; 931 if ((lims->imsl_st[0] == MCAST_UNDEFINED) && 932 (lims->imsl_st[1] == MCAST_UNDEFINED)) { 933 CTR2(KTR_IGMPV3, "%s: free lims %p", __func__, ims); 934 RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims); 935 free(ims, M_INMFILTER); 936 imf->imf_nsrc--; 937 } 938 } 939 } 940 941 /* 942 * Purge socket-layer filter set. 943 */ 944 static void 945 imf_purge(struct in_mfilter *imf) 946 { 947 struct ip_msource *ims, *tims; 948 949 RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) { 950 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 951 RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims); 952 free(ims, M_INMFILTER); 953 imf->imf_nsrc--; 954 } 955 imf->imf_st[0] = imf->imf_st[1] = MCAST_UNDEFINED; 956 KASSERT(RB_EMPTY(&imf->imf_sources), 957 ("%s: imf_sources not empty", __func__)); 958 } 959 960 /* 961 * Look up a source filter entry for a multicast group. 962 * 963 * inm is the group descriptor to work with. 964 * haddr is the host-byte-order IPv4 address to look up. 965 * noalloc may be non-zero to suppress allocation of sources. 966 * *pims will be set to the address of the retrieved or allocated source. 967 * 968 * SMPng: NOTE: may be called with locks held. 969 * Return 0 if successful, otherwise return a non-zero error code. 970 */ 971 static int 972 inm_get_source(struct in_multi *inm, const in_addr_t haddr, 973 const int noalloc, struct ip_msource **pims) 974 { 975 struct ip_msource find; 976 struct ip_msource *ims, *nims; 977 978 find.ims_haddr = haddr; 979 ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find); 980 if (ims == NULL && !noalloc) { 981 if (inm->inm_nsrc == in_mcast_maxgrpsrc) 982 return (ENOSPC); 983 nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE, 984 M_NOWAIT | M_ZERO); 985 if (nims == NULL) 986 return (ENOMEM); 987 nims->ims_haddr = haddr; 988 RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims); 989 ++inm->inm_nsrc; 990 ims = nims; 991 #ifdef KTR 992 CTR3(KTR_IGMPV3, "%s: allocated 0x%08x as %p", __func__, 993 haddr, ims); 994 #endif 995 } 996 997 *pims = ims; 998 return (0); 999 } 1000 1001 /* 1002 * Merge socket-layer source into IGMP-layer source. 1003 * If rollback is non-zero, perform the inverse of the merge. 1004 */ 1005 static void 1006 ims_merge(struct ip_msource *ims, const struct in_msource *lims, 1007 const int rollback) 1008 { 1009 int n = rollback ? -1 : 1; 1010 1011 if (lims->imsl_st[0] == MCAST_EXCLUDE) { 1012 CTR3(KTR_IGMPV3, "%s: t1 ex -= %d on 0x%08x", 1013 __func__, n, ims->ims_haddr); 1014 ims->ims_st[1].ex -= n; 1015 } else if (lims->imsl_st[0] == MCAST_INCLUDE) { 1016 CTR3(KTR_IGMPV3, "%s: t1 in -= %d on 0x%08x", 1017 __func__, n, ims->ims_haddr); 1018 ims->ims_st[1].in -= n; 1019 } 1020 1021 if (lims->imsl_st[1] == MCAST_EXCLUDE) { 1022 CTR3(KTR_IGMPV3, "%s: t1 ex += %d on 0x%08x", 1023 __func__, n, ims->ims_haddr); 1024 ims->ims_st[1].ex += n; 1025 } else if (lims->imsl_st[1] == MCAST_INCLUDE) { 1026 CTR3(KTR_IGMPV3, "%s: t1 in += %d on 0x%08x", 1027 __func__, n, ims->ims_haddr); 1028 ims->ims_st[1].in += n; 1029 } 1030 } 1031 1032 /* 1033 * Atomically update the global in_multi state, when a membership's 1034 * filter list is being updated in any way. 1035 * 1036 * imf is the per-inpcb-membership group filter pointer. 1037 * A fake imf may be passed for in-kernel consumers. 1038 * 1039 * XXX This is a candidate for a set-symmetric-difference style loop 1040 * which would eliminate the repeated lookup from root of ims nodes, 1041 * as they share the same key space. 1042 * 1043 * If any error occurred this function will back out of refcounts 1044 * and return a non-zero value. 1045 */ 1046 static int 1047 inm_merge(struct in_multi *inm, /*const*/ struct in_mfilter *imf) 1048 { 1049 struct ip_msource *ims, *nims; 1050 struct in_msource *lims; 1051 int schanged, error; 1052 int nsrc0, nsrc1; 1053 1054 schanged = 0; 1055 error = 0; 1056 nsrc1 = nsrc0 = 0; 1057 IN_MULTI_LIST_LOCK_ASSERT(); 1058 1059 /* 1060 * Update the source filters first, as this may fail. 1061 * Maintain count of in-mode filters at t0, t1. These are 1062 * used to work out if we transition into ASM mode or not. 1063 * Maintain a count of source filters whose state was 1064 * actually modified by this operation. 1065 */ 1066 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 1067 lims = (struct in_msource *)ims; 1068 if (lims->imsl_st[0] == imf->imf_st[0]) nsrc0++; 1069 if (lims->imsl_st[1] == imf->imf_st[1]) nsrc1++; 1070 if (lims->imsl_st[0] == lims->imsl_st[1]) continue; 1071 error = inm_get_source(inm, lims->ims_haddr, 0, &nims); 1072 ++schanged; 1073 if (error) 1074 break; 1075 ims_merge(nims, lims, 0); 1076 } 1077 if (error) { 1078 struct ip_msource *bims; 1079 1080 RB_FOREACH_REVERSE_FROM(ims, ip_msource_tree, nims) { 1081 lims = (struct in_msource *)ims; 1082 if (lims->imsl_st[0] == lims->imsl_st[1]) 1083 continue; 1084 (void)inm_get_source(inm, lims->ims_haddr, 1, &bims); 1085 if (bims == NULL) 1086 continue; 1087 ims_merge(bims, lims, 1); 1088 } 1089 goto out_reap; 1090 } 1091 1092 CTR3(KTR_IGMPV3, "%s: imf filters in-mode: %d at t0, %d at t1", 1093 __func__, nsrc0, nsrc1); 1094 1095 /* Handle transition between INCLUDE {n} and INCLUDE {} on socket. */ 1096 if (imf->imf_st[0] == imf->imf_st[1] && 1097 imf->imf_st[1] == MCAST_INCLUDE) { 1098 if (nsrc1 == 0) { 1099 CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__); 1100 --inm->inm_st[1].iss_in; 1101 } 1102 } 1103 1104 /* Handle filter mode transition on socket. */ 1105 if (imf->imf_st[0] != imf->imf_st[1]) { 1106 CTR3(KTR_IGMPV3, "%s: imf transition %d to %d", 1107 __func__, imf->imf_st[0], imf->imf_st[1]); 1108 1109 if (imf->imf_st[0] == MCAST_EXCLUDE) { 1110 CTR1(KTR_IGMPV3, "%s: --ex on inm at t1", __func__); 1111 --inm->inm_st[1].iss_ex; 1112 } else if (imf->imf_st[0] == MCAST_INCLUDE) { 1113 CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__); 1114 --inm->inm_st[1].iss_in; 1115 } 1116 1117 if (imf->imf_st[1] == MCAST_EXCLUDE) { 1118 CTR1(KTR_IGMPV3, "%s: ex++ on inm at t1", __func__); 1119 inm->inm_st[1].iss_ex++; 1120 } else if (imf->imf_st[1] == MCAST_INCLUDE && nsrc1 > 0) { 1121 CTR1(KTR_IGMPV3, "%s: in++ on inm at t1", __func__); 1122 inm->inm_st[1].iss_in++; 1123 } 1124 } 1125 1126 /* 1127 * Track inm filter state in terms of listener counts. 1128 * If there are any exclusive listeners, stack-wide 1129 * membership is exclusive. 1130 * Otherwise, if only inclusive listeners, stack-wide is inclusive. 1131 * If no listeners remain, state is undefined at t1, 1132 * and the IGMP lifecycle for this group should finish. 1133 */ 1134 if (inm->inm_st[1].iss_ex > 0) { 1135 CTR1(KTR_IGMPV3, "%s: transition to EX", __func__); 1136 inm->inm_st[1].iss_fmode = MCAST_EXCLUDE; 1137 } else if (inm->inm_st[1].iss_in > 0) { 1138 CTR1(KTR_IGMPV3, "%s: transition to IN", __func__); 1139 inm->inm_st[1].iss_fmode = MCAST_INCLUDE; 1140 } else { 1141 CTR1(KTR_IGMPV3, "%s: transition to UNDEF", __func__); 1142 inm->inm_st[1].iss_fmode = MCAST_UNDEFINED; 1143 } 1144 1145 /* Decrement ASM listener count on transition out of ASM mode. */ 1146 if (imf->imf_st[0] == MCAST_EXCLUDE && nsrc0 == 0) { 1147 if ((imf->imf_st[1] != MCAST_EXCLUDE) || 1148 (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 > 0)) { 1149 CTR1(KTR_IGMPV3, "%s: --asm on inm at t1", __func__); 1150 --inm->inm_st[1].iss_asm; 1151 } 1152 } 1153 1154 /* Increment ASM listener count on transition to ASM mode. */ 1155 if (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 == 0) { 1156 CTR1(KTR_IGMPV3, "%s: asm++ on inm at t1", __func__); 1157 inm->inm_st[1].iss_asm++; 1158 } 1159 1160 CTR3(KTR_IGMPV3, "%s: merged imf %p to inm %p", __func__, imf, inm); 1161 inm_print(inm); 1162 1163 out_reap: 1164 if (schanged > 0) { 1165 CTR1(KTR_IGMPV3, "%s: sources changed; reaping", __func__); 1166 inm_reap(inm); 1167 } 1168 return (error); 1169 } 1170 1171 /* 1172 * Mark an in_multi's filter set deltas as committed. 1173 * Called by IGMP after a state change has been enqueued. 1174 */ 1175 void 1176 inm_commit(struct in_multi *inm) 1177 { 1178 struct ip_msource *ims; 1179 1180 CTR2(KTR_IGMPV3, "%s: commit inm %p", __func__, inm); 1181 CTR1(KTR_IGMPV3, "%s: pre commit:", __func__); 1182 inm_print(inm); 1183 1184 RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) { 1185 ims->ims_st[0] = ims->ims_st[1]; 1186 } 1187 inm->inm_st[0] = inm->inm_st[1]; 1188 } 1189 1190 /* 1191 * Reap unreferenced nodes from an in_multi's filter set. 1192 */ 1193 static void 1194 inm_reap(struct in_multi *inm) 1195 { 1196 struct ip_msource *ims, *tims; 1197 1198 RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) { 1199 if (ims->ims_st[0].ex > 0 || ims->ims_st[0].in > 0 || 1200 ims->ims_st[1].ex > 0 || ims->ims_st[1].in > 0 || 1201 ims->ims_stp != 0) 1202 continue; 1203 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 1204 RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims); 1205 free(ims, M_IPMSOURCE); 1206 inm->inm_nsrc--; 1207 } 1208 } 1209 1210 /* 1211 * Purge all source nodes from an in_multi's filter set. 1212 */ 1213 static void 1214 inm_purge(struct in_multi *inm) 1215 { 1216 struct ip_msource *ims, *tims; 1217 1218 RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) { 1219 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 1220 RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims); 1221 free(ims, M_IPMSOURCE); 1222 inm->inm_nsrc--; 1223 } 1224 } 1225 1226 /* 1227 * Join a multicast group; unlocked entry point. 1228 * 1229 * SMPng: XXX: in_joingroup() is called from in_control() when Giant 1230 * is not held. Fortunately, ifp is unlikely to have been detached 1231 * at this point, so we assume it's OK to recurse. 1232 */ 1233 int 1234 in_joingroup(struct ifnet *ifp, const struct in_addr *gina, 1235 /*const*/ struct in_mfilter *imf, struct in_multi **pinm) 1236 { 1237 int error; 1238 1239 IN_MULTI_LOCK(); 1240 error = in_joingroup_locked(ifp, gina, imf, pinm); 1241 IN_MULTI_UNLOCK(); 1242 1243 return (error); 1244 } 1245 1246 /* 1247 * Join a multicast group; real entry point. 1248 * 1249 * Only preserves atomicity at inm level. 1250 * NOTE: imf argument cannot be const due to sys/tree.h limitations. 1251 * 1252 * If the IGMP downcall fails, the group is not joined, and an error 1253 * code is returned. 1254 */ 1255 int 1256 in_joingroup_locked(struct ifnet *ifp, const struct in_addr *gina, 1257 /*const*/ struct in_mfilter *imf, struct in_multi **pinm) 1258 { 1259 struct in_mfilter timf; 1260 struct in_multi *inm; 1261 int error; 1262 1263 IN_MULTI_LOCK_ASSERT(); 1264 IN_MULTI_LIST_UNLOCK_ASSERT(); 1265 1266 CTR4(KTR_IGMPV3, "%s: join 0x%08x on %p(%s))", __func__, 1267 ntohl(gina->s_addr), ifp, ifp->if_xname); 1268 1269 error = 0; 1270 inm = NULL; 1271 1272 /* 1273 * If no imf was specified (i.e. kernel consumer), 1274 * fake one up and assume it is an ASM join. 1275 */ 1276 if (imf == NULL) { 1277 imf_init(&timf, MCAST_UNDEFINED, MCAST_EXCLUDE); 1278 imf = &timf; 1279 } 1280 1281 error = in_getmulti(ifp, gina, &inm); 1282 if (error) { 1283 CTR1(KTR_IGMPV3, "%s: in_getmulti() failure", __func__); 1284 return (error); 1285 } 1286 IN_MULTI_LIST_LOCK(); 1287 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 1288 error = inm_merge(inm, imf); 1289 if (error) { 1290 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__); 1291 goto out_inm_release; 1292 } 1293 1294 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 1295 error = igmp_change_state(inm); 1296 if (error) { 1297 CTR1(KTR_IGMPV3, "%s: failed to update source", __func__); 1298 goto out_inm_release; 1299 } 1300 1301 out_inm_release: 1302 if (error) { 1303 1304 CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm); 1305 inm_release_deferred(inm); 1306 } else { 1307 *pinm = inm; 1308 } 1309 IN_MULTI_LIST_UNLOCK(); 1310 1311 return (error); 1312 } 1313 1314 /* 1315 * Leave a multicast group; unlocked entry point. 1316 */ 1317 int 1318 in_leavegroup(struct in_multi *inm, /*const*/ struct in_mfilter *imf) 1319 { 1320 int error; 1321 1322 IN_MULTI_LOCK(); 1323 error = in_leavegroup_locked(inm, imf); 1324 IN_MULTI_UNLOCK(); 1325 1326 return (error); 1327 } 1328 1329 /* 1330 * Leave a multicast group; real entry point. 1331 * All source filters will be expunged. 1332 * 1333 * Only preserves atomicity at inm level. 1334 * 1335 * Holding the write lock for the INP which contains imf 1336 * is highly advisable. We can't assert for it as imf does not 1337 * contain a back-pointer to the owning inp. 1338 * 1339 * Note: This is not the same as inm_release(*) as this function also 1340 * makes a state change downcall into IGMP. 1341 */ 1342 int 1343 in_leavegroup_locked(struct in_multi *inm, /*const*/ struct in_mfilter *imf) 1344 { 1345 struct in_mfilter timf; 1346 int error; 1347 1348 error = 0; 1349 1350 IN_MULTI_LOCK_ASSERT(); 1351 IN_MULTI_LIST_UNLOCK_ASSERT(); 1352 1353 CTR5(KTR_IGMPV3, "%s: leave inm %p, 0x%08x/%s, imf %p", __func__, 1354 inm, ntohl(inm->inm_addr.s_addr), 1355 (inm_is_ifp_detached(inm) ? "null" : inm->inm_ifp->if_xname), 1356 imf); 1357 1358 /* 1359 * If no imf was specified (i.e. kernel consumer), 1360 * fake one up and assume it is an ASM join. 1361 */ 1362 if (imf == NULL) { 1363 imf_init(&timf, MCAST_EXCLUDE, MCAST_UNDEFINED); 1364 imf = &timf; 1365 } 1366 1367 /* 1368 * Begin state merge transaction at IGMP layer. 1369 * 1370 * As this particular invocation should not cause any memory 1371 * to be allocated, and there is no opportunity to roll back 1372 * the transaction, it MUST NOT fail. 1373 */ 1374 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 1375 IN_MULTI_LIST_LOCK(); 1376 error = inm_merge(inm, imf); 1377 KASSERT(error == 0, ("%s: failed to merge inm state", __func__)); 1378 1379 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 1380 CURVNET_SET(inm->inm_ifp->if_vnet); 1381 error = igmp_change_state(inm); 1382 IF_ADDR_WLOCK(inm->inm_ifp); 1383 inm_release_deferred(inm); 1384 IF_ADDR_WUNLOCK(inm->inm_ifp); 1385 IN_MULTI_LIST_UNLOCK(); 1386 CURVNET_RESTORE(); 1387 if (error) 1388 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__); 1389 1390 CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm); 1391 1392 return (error); 1393 } 1394 1395 /*#ifndef BURN_BRIDGES*/ 1396 /* 1397 * Join an IPv4 multicast group in (*,G) exclusive mode. 1398 * The group must be a 224.0.0.0/24 link-scope group. 1399 * This KPI is for legacy kernel consumers only. 1400 */ 1401 struct in_multi * 1402 in_addmulti(struct in_addr *ap, struct ifnet *ifp) 1403 { 1404 struct in_multi *pinm; 1405 int error; 1406 #ifdef INVARIANTS 1407 char addrbuf[INET_ADDRSTRLEN]; 1408 #endif 1409 1410 KASSERT(IN_LOCAL_GROUP(ntohl(ap->s_addr)), 1411 ("%s: %s not in 224.0.0.0/24", __func__, 1412 inet_ntoa_r(*ap, addrbuf))); 1413 1414 error = in_joingroup(ifp, ap, NULL, &pinm); 1415 if (error != 0) 1416 pinm = NULL; 1417 1418 return (pinm); 1419 } 1420 1421 /* 1422 * Block or unblock an ASM multicast source on an inpcb. 1423 * This implements the delta-based API described in RFC 3678. 1424 * 1425 * The delta-based API applies only to exclusive-mode memberships. 1426 * An IGMP downcall will be performed. 1427 * 1428 * SMPng: NOTE: Must take Giant as a join may create a new ifma. 1429 * 1430 * Return 0 if successful, otherwise return an appropriate error code. 1431 */ 1432 static int 1433 inp_block_unblock_source(struct inpcb *inp, struct sockopt *sopt) 1434 { 1435 struct group_source_req gsr; 1436 sockunion_t *gsa, *ssa; 1437 struct ifnet *ifp; 1438 struct in_mfilter *imf; 1439 struct ip_moptions *imo; 1440 struct in_msource *ims; 1441 struct in_multi *inm; 1442 size_t idx; 1443 uint16_t fmode; 1444 int error, doblock; 1445 1446 ifp = NULL; 1447 error = 0; 1448 doblock = 0; 1449 1450 memset(&gsr, 0, sizeof(struct group_source_req)); 1451 gsa = (sockunion_t *)&gsr.gsr_group; 1452 ssa = (sockunion_t *)&gsr.gsr_source; 1453 1454 switch (sopt->sopt_name) { 1455 case IP_BLOCK_SOURCE: 1456 case IP_UNBLOCK_SOURCE: { 1457 struct ip_mreq_source mreqs; 1458 1459 error = sooptcopyin(sopt, &mreqs, 1460 sizeof(struct ip_mreq_source), 1461 sizeof(struct ip_mreq_source)); 1462 if (error) 1463 return (error); 1464 1465 gsa->sin.sin_family = AF_INET; 1466 gsa->sin.sin_len = sizeof(struct sockaddr_in); 1467 gsa->sin.sin_addr = mreqs.imr_multiaddr; 1468 1469 ssa->sin.sin_family = AF_INET; 1470 ssa->sin.sin_len = sizeof(struct sockaddr_in); 1471 ssa->sin.sin_addr = mreqs.imr_sourceaddr; 1472 1473 if (!in_nullhost(mreqs.imr_interface)) 1474 INADDR_TO_IFP(mreqs.imr_interface, ifp); 1475 1476 if (sopt->sopt_name == IP_BLOCK_SOURCE) 1477 doblock = 1; 1478 1479 CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p", 1480 __func__, ntohl(mreqs.imr_interface.s_addr), ifp); 1481 break; 1482 } 1483 1484 case MCAST_BLOCK_SOURCE: 1485 case MCAST_UNBLOCK_SOURCE: 1486 error = sooptcopyin(sopt, &gsr, 1487 sizeof(struct group_source_req), 1488 sizeof(struct group_source_req)); 1489 if (error) 1490 return (error); 1491 1492 if (gsa->sin.sin_family != AF_INET || 1493 gsa->sin.sin_len != sizeof(struct sockaddr_in)) 1494 return (EINVAL); 1495 1496 if (ssa->sin.sin_family != AF_INET || 1497 ssa->sin.sin_len != sizeof(struct sockaddr_in)) 1498 return (EINVAL); 1499 1500 if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface) 1501 return (EADDRNOTAVAIL); 1502 1503 ifp = ifnet_byindex(gsr.gsr_interface); 1504 1505 if (sopt->sopt_name == MCAST_BLOCK_SOURCE) 1506 doblock = 1; 1507 break; 1508 1509 default: 1510 CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d", 1511 __func__, sopt->sopt_name); 1512 return (EOPNOTSUPP); 1513 break; 1514 } 1515 1516 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 1517 return (EINVAL); 1518 1519 /* 1520 * Check if we are actually a member of this group. 1521 */ 1522 imo = inp_findmoptions(inp); 1523 idx = imo_match_group(imo, ifp, &gsa->sa); 1524 if (idx == -1 || imo->imo_mfilters == NULL) { 1525 error = EADDRNOTAVAIL; 1526 goto out_inp_locked; 1527 } 1528 1529 KASSERT(imo->imo_mfilters != NULL, 1530 ("%s: imo_mfilters not allocated", __func__)); 1531 imf = &imo->imo_mfilters[idx]; 1532 inm = imo->imo_membership[idx]; 1533 1534 /* 1535 * Attempting to use the delta-based API on an 1536 * non exclusive-mode membership is an error. 1537 */ 1538 fmode = imf->imf_st[0]; 1539 if (fmode != MCAST_EXCLUDE) { 1540 error = EINVAL; 1541 goto out_inp_locked; 1542 } 1543 1544 /* 1545 * Deal with error cases up-front: 1546 * Asked to block, but already blocked; or 1547 * Asked to unblock, but nothing to unblock. 1548 * If adding a new block entry, allocate it. 1549 */ 1550 ims = imo_match_source(imo, idx, &ssa->sa); 1551 if ((ims != NULL && doblock) || (ims == NULL && !doblock)) { 1552 CTR3(KTR_IGMPV3, "%s: source 0x%08x %spresent", __func__, 1553 ntohl(ssa->sin.sin_addr.s_addr), doblock ? "" : "not "); 1554 error = EADDRNOTAVAIL; 1555 goto out_inp_locked; 1556 } 1557 1558 INP_WLOCK_ASSERT(inp); 1559 1560 /* 1561 * Begin state merge transaction at socket layer. 1562 */ 1563 if (doblock) { 1564 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block"); 1565 ims = imf_graft(imf, fmode, &ssa->sin); 1566 if (ims == NULL) 1567 error = ENOMEM; 1568 } else { 1569 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow"); 1570 error = imf_prune(imf, &ssa->sin); 1571 } 1572 1573 if (error) { 1574 CTR1(KTR_IGMPV3, "%s: merge imf state failed", __func__); 1575 goto out_imf_rollback; 1576 } 1577 1578 /* 1579 * Begin state merge transaction at IGMP layer. 1580 */ 1581 IN_MULTI_LOCK(); 1582 IN_MULTI_LIST_LOCK(); 1583 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 1584 error = inm_merge(inm, imf); 1585 if (error) { 1586 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__); 1587 goto out_in_multi_locked; 1588 } 1589 1590 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 1591 error = igmp_change_state(inm); 1592 if (error) 1593 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__); 1594 1595 out_in_multi_locked: 1596 1597 IN_MULTI_UNLOCK(); 1598 IN_MULTI_UNLOCK(); 1599 out_imf_rollback: 1600 if (error) 1601 imf_rollback(imf); 1602 else 1603 imf_commit(imf); 1604 1605 imf_reap(imf); 1606 1607 out_inp_locked: 1608 INP_WUNLOCK(inp); 1609 return (error); 1610 } 1611 1612 /* 1613 * Given an inpcb, return its multicast options structure pointer. Accepts 1614 * an unlocked inpcb pointer, but will return it locked. May sleep. 1615 * 1616 * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held. 1617 * SMPng: NOTE: Returns with the INP write lock held. 1618 */ 1619 static struct ip_moptions * 1620 inp_findmoptions(struct inpcb *inp) 1621 { 1622 struct ip_moptions *imo; 1623 struct in_multi **immp; 1624 struct in_mfilter *imfp; 1625 size_t idx; 1626 1627 INP_WLOCK(inp); 1628 if (inp->inp_moptions != NULL) 1629 return (inp->inp_moptions); 1630 1631 INP_WUNLOCK(inp); 1632 1633 imo = malloc(sizeof(*imo), M_IPMOPTS, M_WAITOK); 1634 immp = malloc(sizeof(*immp) * IP_MIN_MEMBERSHIPS, M_IPMOPTS, 1635 M_WAITOK | M_ZERO); 1636 imfp = malloc(sizeof(struct in_mfilter) * IP_MIN_MEMBERSHIPS, 1637 M_INMFILTER, M_WAITOK); 1638 1639 imo->imo_multicast_ifp = NULL; 1640 imo->imo_multicast_addr.s_addr = INADDR_ANY; 1641 imo->imo_multicast_vif = -1; 1642 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1643 imo->imo_multicast_loop = in_mcast_loop; 1644 imo->imo_num_memberships = 0; 1645 imo->imo_max_memberships = IP_MIN_MEMBERSHIPS; 1646 imo->imo_membership = immp; 1647 1648 /* Initialize per-group source filters. */ 1649 for (idx = 0; idx < IP_MIN_MEMBERSHIPS; idx++) 1650 imf_init(&imfp[idx], MCAST_UNDEFINED, MCAST_EXCLUDE); 1651 imo->imo_mfilters = imfp; 1652 1653 INP_WLOCK(inp); 1654 if (inp->inp_moptions != NULL) { 1655 free(imfp, M_INMFILTER); 1656 free(immp, M_IPMOPTS); 1657 free(imo, M_IPMOPTS); 1658 return (inp->inp_moptions); 1659 } 1660 inp->inp_moptions = imo; 1661 return (imo); 1662 } 1663 1664 static void 1665 inp_gcmoptions(epoch_context_t ctx) 1666 { 1667 struct ip_moptions *imo; 1668 struct in_mfilter *imf; 1669 struct in_multi *inm; 1670 struct ifnet *ifp; 1671 size_t idx, nmships; 1672 1673 imo = __containerof(ctx, struct ip_moptions, imo_epoch_ctx); 1674 1675 nmships = imo->imo_num_memberships; 1676 for (idx = 0; idx < nmships; ++idx) { 1677 imf = imo->imo_mfilters ? &imo->imo_mfilters[idx] : NULL; 1678 if (imf) 1679 imf_leave(imf); 1680 inm = imo->imo_membership[idx]; 1681 ifp = inm->inm_ifp; 1682 if (ifp != NULL) { 1683 CURVNET_SET(ifp->if_vnet); 1684 (void)in_leavegroup(inm, imf); 1685 CURVNET_RESTORE(); 1686 } else { 1687 (void)in_leavegroup(inm, imf); 1688 } 1689 if (imf) 1690 imf_purge(imf); 1691 } 1692 1693 if (imo->imo_mfilters) 1694 free(imo->imo_mfilters, M_INMFILTER); 1695 free(imo->imo_membership, M_IPMOPTS); 1696 free(imo, M_IPMOPTS); 1697 } 1698 1699 /* 1700 * Discard the IP multicast options (and source filters). To minimize 1701 * the amount of work done while holding locks such as the INP's 1702 * pcbinfo lock (which is used in the receive path), the free 1703 * operation is deferred to the epoch callback task. 1704 */ 1705 void 1706 inp_freemoptions(struct ip_moptions *imo) 1707 { 1708 if (imo == NULL) 1709 return; 1710 epoch_call(net_epoch_preempt, &imo->imo_epoch_ctx, inp_gcmoptions); 1711 } 1712 1713 /* 1714 * Atomically get source filters on a socket for an IPv4 multicast group. 1715 * Called with INP lock held; returns with lock released. 1716 */ 1717 static int 1718 inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt) 1719 { 1720 struct __msfilterreq msfr; 1721 sockunion_t *gsa; 1722 struct ifnet *ifp; 1723 struct ip_moptions *imo; 1724 struct in_mfilter *imf; 1725 struct ip_msource *ims; 1726 struct in_msource *lims; 1727 struct sockaddr_in *psin; 1728 struct sockaddr_storage *ptss; 1729 struct sockaddr_storage *tss; 1730 int error; 1731 size_t idx, nsrcs, ncsrcs; 1732 1733 INP_WLOCK_ASSERT(inp); 1734 1735 imo = inp->inp_moptions; 1736 KASSERT(imo != NULL, ("%s: null ip_moptions", __func__)); 1737 1738 INP_WUNLOCK(inp); 1739 1740 error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq), 1741 sizeof(struct __msfilterreq)); 1742 if (error) 1743 return (error); 1744 1745 if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex) 1746 return (EINVAL); 1747 1748 ifp = ifnet_byindex(msfr.msfr_ifindex); 1749 if (ifp == NULL) 1750 return (EINVAL); 1751 1752 INP_WLOCK(inp); 1753 1754 /* 1755 * Lookup group on the socket. 1756 */ 1757 gsa = (sockunion_t *)&msfr.msfr_group; 1758 idx = imo_match_group(imo, ifp, &gsa->sa); 1759 if (idx == -1 || imo->imo_mfilters == NULL) { 1760 INP_WUNLOCK(inp); 1761 return (EADDRNOTAVAIL); 1762 } 1763 imf = &imo->imo_mfilters[idx]; 1764 1765 /* 1766 * Ignore memberships which are in limbo. 1767 */ 1768 if (imf->imf_st[1] == MCAST_UNDEFINED) { 1769 INP_WUNLOCK(inp); 1770 return (EAGAIN); 1771 } 1772 msfr.msfr_fmode = imf->imf_st[1]; 1773 1774 /* 1775 * If the user specified a buffer, copy out the source filter 1776 * entries to userland gracefully. 1777 * We only copy out the number of entries which userland 1778 * has asked for, but we always tell userland how big the 1779 * buffer really needs to be. 1780 */ 1781 if (msfr.msfr_nsrcs > in_mcast_maxsocksrc) 1782 msfr.msfr_nsrcs = in_mcast_maxsocksrc; 1783 tss = NULL; 1784 if (msfr.msfr_srcs != NULL && msfr.msfr_nsrcs > 0) { 1785 tss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs, 1786 M_TEMP, M_NOWAIT | M_ZERO); 1787 if (tss == NULL) { 1788 INP_WUNLOCK(inp); 1789 return (ENOBUFS); 1790 } 1791 } 1792 1793 /* 1794 * Count number of sources in-mode at t0. 1795 * If buffer space exists and remains, copy out source entries. 1796 */ 1797 nsrcs = msfr.msfr_nsrcs; 1798 ncsrcs = 0; 1799 ptss = tss; 1800 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 1801 lims = (struct in_msource *)ims; 1802 if (lims->imsl_st[0] == MCAST_UNDEFINED || 1803 lims->imsl_st[0] != imf->imf_st[0]) 1804 continue; 1805 ++ncsrcs; 1806 if (tss != NULL && nsrcs > 0) { 1807 psin = (struct sockaddr_in *)ptss; 1808 psin->sin_family = AF_INET; 1809 psin->sin_len = sizeof(struct sockaddr_in); 1810 psin->sin_addr.s_addr = htonl(lims->ims_haddr); 1811 psin->sin_port = 0; 1812 ++ptss; 1813 --nsrcs; 1814 } 1815 } 1816 1817 INP_WUNLOCK(inp); 1818 1819 if (tss != NULL) { 1820 error = copyout(tss, msfr.msfr_srcs, 1821 sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs); 1822 free(tss, M_TEMP); 1823 if (error) 1824 return (error); 1825 } 1826 1827 msfr.msfr_nsrcs = ncsrcs; 1828 error = sooptcopyout(sopt, &msfr, sizeof(struct __msfilterreq)); 1829 1830 return (error); 1831 } 1832 1833 /* 1834 * Return the IP multicast options in response to user getsockopt(). 1835 */ 1836 int 1837 inp_getmoptions(struct inpcb *inp, struct sockopt *sopt) 1838 { 1839 struct rm_priotracker in_ifa_tracker; 1840 struct ip_mreqn mreqn; 1841 struct ip_moptions *imo; 1842 struct ifnet *ifp; 1843 struct in_ifaddr *ia; 1844 int error, optval; 1845 u_char coptval; 1846 1847 INP_WLOCK(inp); 1848 imo = inp->inp_moptions; 1849 /* 1850 * If socket is neither of type SOCK_RAW or SOCK_DGRAM, 1851 * or is a divert socket, reject it. 1852 */ 1853 if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT || 1854 (inp->inp_socket->so_proto->pr_type != SOCK_RAW && 1855 inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) { 1856 INP_WUNLOCK(inp); 1857 return (EOPNOTSUPP); 1858 } 1859 1860 error = 0; 1861 switch (sopt->sopt_name) { 1862 case IP_MULTICAST_VIF: 1863 if (imo != NULL) 1864 optval = imo->imo_multicast_vif; 1865 else 1866 optval = -1; 1867 INP_WUNLOCK(inp); 1868 error = sooptcopyout(sopt, &optval, sizeof(int)); 1869 break; 1870 1871 case IP_MULTICAST_IF: 1872 memset(&mreqn, 0, sizeof(struct ip_mreqn)); 1873 if (imo != NULL) { 1874 ifp = imo->imo_multicast_ifp; 1875 if (!in_nullhost(imo->imo_multicast_addr)) { 1876 mreqn.imr_address = imo->imo_multicast_addr; 1877 } else if (ifp != NULL) { 1878 mreqn.imr_ifindex = ifp->if_index; 1879 NET_EPOCH_ENTER(); 1880 IFP_TO_IA(ifp, ia, &in_ifa_tracker); 1881 if (ia != NULL) 1882 mreqn.imr_address = 1883 IA_SIN(ia)->sin_addr; 1884 NET_EPOCH_EXIT(); 1885 } 1886 } 1887 INP_WUNLOCK(inp); 1888 if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) { 1889 error = sooptcopyout(sopt, &mreqn, 1890 sizeof(struct ip_mreqn)); 1891 } else { 1892 error = sooptcopyout(sopt, &mreqn.imr_address, 1893 sizeof(struct in_addr)); 1894 } 1895 break; 1896 1897 case IP_MULTICAST_TTL: 1898 if (imo == NULL) 1899 optval = coptval = IP_DEFAULT_MULTICAST_TTL; 1900 else 1901 optval = coptval = imo->imo_multicast_ttl; 1902 INP_WUNLOCK(inp); 1903 if (sopt->sopt_valsize == sizeof(u_char)) 1904 error = sooptcopyout(sopt, &coptval, sizeof(u_char)); 1905 else 1906 error = sooptcopyout(sopt, &optval, sizeof(int)); 1907 break; 1908 1909 case IP_MULTICAST_LOOP: 1910 if (imo == NULL) 1911 optval = coptval = IP_DEFAULT_MULTICAST_LOOP; 1912 else 1913 optval = coptval = imo->imo_multicast_loop; 1914 INP_WUNLOCK(inp); 1915 if (sopt->sopt_valsize == sizeof(u_char)) 1916 error = sooptcopyout(sopt, &coptval, sizeof(u_char)); 1917 else 1918 error = sooptcopyout(sopt, &optval, sizeof(int)); 1919 break; 1920 1921 case IP_MSFILTER: 1922 if (imo == NULL) { 1923 error = EADDRNOTAVAIL; 1924 INP_WUNLOCK(inp); 1925 } else { 1926 error = inp_get_source_filters(inp, sopt); 1927 } 1928 break; 1929 1930 default: 1931 INP_WUNLOCK(inp); 1932 error = ENOPROTOOPT; 1933 break; 1934 } 1935 1936 INP_UNLOCK_ASSERT(inp); 1937 1938 return (error); 1939 } 1940 1941 /* 1942 * Look up the ifnet to use for a multicast group membership, 1943 * given the IPv4 address of an interface, and the IPv4 group address. 1944 * 1945 * This routine exists to support legacy multicast applications 1946 * which do not understand that multicast memberships are scoped to 1947 * specific physical links in the networking stack, or which need 1948 * to join link-scope groups before IPv4 addresses are configured. 1949 * 1950 * If inp is non-NULL, use this socket's current FIB number for any 1951 * required FIB lookup. 1952 * If ina is INADDR_ANY, look up the group address in the unicast FIB, 1953 * and use its ifp; usually, this points to the default next-hop. 1954 * 1955 * If the FIB lookup fails, attempt to use the first non-loopback 1956 * interface with multicast capability in the system as a 1957 * last resort. The legacy IPv4 ASM API requires that we do 1958 * this in order to allow groups to be joined when the routing 1959 * table has not yet been populated during boot. 1960 * 1961 * Returns NULL if no ifp could be found. 1962 * 1963 * SMPng: TODO: Acquire the appropriate locks for INADDR_TO_IFP. 1964 * FUTURE: Implement IPv4 source-address selection. 1965 */ 1966 static struct ifnet * 1967 inp_lookup_mcast_ifp(const struct inpcb *inp, 1968 const struct sockaddr_in *gsin, const struct in_addr ina) 1969 { 1970 struct rm_priotracker in_ifa_tracker; 1971 struct ifnet *ifp; 1972 struct nhop4_basic nh4; 1973 uint32_t fibnum; 1974 1975 KASSERT(gsin->sin_family == AF_INET, ("%s: not AF_INET", __func__)); 1976 KASSERT(IN_MULTICAST(ntohl(gsin->sin_addr.s_addr)), 1977 ("%s: not multicast", __func__)); 1978 1979 ifp = NULL; 1980 if (!in_nullhost(ina)) { 1981 INADDR_TO_IFP(ina, ifp); 1982 } else { 1983 fibnum = inp ? inp->inp_inc.inc_fibnum : 0; 1984 if (fib4_lookup_nh_basic(fibnum, gsin->sin_addr, 0, 0, &nh4)==0) 1985 ifp = nh4.nh_ifp; 1986 else { 1987 struct in_ifaddr *ia; 1988 struct ifnet *mifp; 1989 1990 mifp = NULL; 1991 IN_IFADDR_RLOCK(&in_ifa_tracker); 1992 CK_STAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { 1993 mifp = ia->ia_ifp; 1994 if (!(mifp->if_flags & IFF_LOOPBACK) && 1995 (mifp->if_flags & IFF_MULTICAST)) { 1996 ifp = mifp; 1997 break; 1998 } 1999 } 2000 IN_IFADDR_RUNLOCK(&in_ifa_tracker); 2001 } 2002 } 2003 2004 return (ifp); 2005 } 2006 2007 /* 2008 * Join an IPv4 multicast group, possibly with a source. 2009 */ 2010 static int 2011 inp_join_group(struct inpcb *inp, struct sockopt *sopt) 2012 { 2013 struct group_source_req gsr; 2014 sockunion_t *gsa, *ssa; 2015 struct ifnet *ifp; 2016 struct in_mfilter *imf; 2017 struct ip_moptions *imo; 2018 struct in_multi *inm; 2019 struct in_msource *lims; 2020 size_t idx; 2021 int error, is_new; 2022 2023 ifp = NULL; 2024 imf = NULL; 2025 lims = NULL; 2026 error = 0; 2027 is_new = 0; 2028 2029 memset(&gsr, 0, sizeof(struct group_source_req)); 2030 gsa = (sockunion_t *)&gsr.gsr_group; 2031 gsa->ss.ss_family = AF_UNSPEC; 2032 ssa = (sockunion_t *)&gsr.gsr_source; 2033 ssa->ss.ss_family = AF_UNSPEC; 2034 2035 switch (sopt->sopt_name) { 2036 case IP_ADD_MEMBERSHIP: 2037 case IP_ADD_SOURCE_MEMBERSHIP: { 2038 struct ip_mreq_source mreqs; 2039 2040 if (sopt->sopt_name == IP_ADD_MEMBERSHIP) { 2041 error = sooptcopyin(sopt, &mreqs, 2042 sizeof(struct ip_mreq), 2043 sizeof(struct ip_mreq)); 2044 /* 2045 * Do argument switcharoo from ip_mreq into 2046 * ip_mreq_source to avoid using two instances. 2047 */ 2048 mreqs.imr_interface = mreqs.imr_sourceaddr; 2049 mreqs.imr_sourceaddr.s_addr = INADDR_ANY; 2050 } else if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) { 2051 error = sooptcopyin(sopt, &mreqs, 2052 sizeof(struct ip_mreq_source), 2053 sizeof(struct ip_mreq_source)); 2054 } 2055 if (error) 2056 return (error); 2057 2058 gsa->sin.sin_family = AF_INET; 2059 gsa->sin.sin_len = sizeof(struct sockaddr_in); 2060 gsa->sin.sin_addr = mreqs.imr_multiaddr; 2061 2062 if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) { 2063 ssa->sin.sin_family = AF_INET; 2064 ssa->sin.sin_len = sizeof(struct sockaddr_in); 2065 ssa->sin.sin_addr = mreqs.imr_sourceaddr; 2066 } 2067 2068 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2069 return (EINVAL); 2070 2071 ifp = inp_lookup_mcast_ifp(inp, &gsa->sin, 2072 mreqs.imr_interface); 2073 CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p", 2074 __func__, ntohl(mreqs.imr_interface.s_addr), ifp); 2075 break; 2076 } 2077 2078 case MCAST_JOIN_GROUP: 2079 case MCAST_JOIN_SOURCE_GROUP: 2080 if (sopt->sopt_name == MCAST_JOIN_GROUP) { 2081 error = sooptcopyin(sopt, &gsr, 2082 sizeof(struct group_req), 2083 sizeof(struct group_req)); 2084 } else if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) { 2085 error = sooptcopyin(sopt, &gsr, 2086 sizeof(struct group_source_req), 2087 sizeof(struct group_source_req)); 2088 } 2089 if (error) 2090 return (error); 2091 2092 if (gsa->sin.sin_family != AF_INET || 2093 gsa->sin.sin_len != sizeof(struct sockaddr_in)) 2094 return (EINVAL); 2095 2096 /* 2097 * Overwrite the port field if present, as the sockaddr 2098 * being copied in may be matched with a binary comparison. 2099 */ 2100 gsa->sin.sin_port = 0; 2101 if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) { 2102 if (ssa->sin.sin_family != AF_INET || 2103 ssa->sin.sin_len != sizeof(struct sockaddr_in)) 2104 return (EINVAL); 2105 ssa->sin.sin_port = 0; 2106 } 2107 2108 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2109 return (EINVAL); 2110 2111 if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface) 2112 return (EADDRNOTAVAIL); 2113 ifp = ifnet_byindex(gsr.gsr_interface); 2114 break; 2115 2116 default: 2117 CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d", 2118 __func__, sopt->sopt_name); 2119 return (EOPNOTSUPP); 2120 break; 2121 } 2122 2123 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) 2124 return (EADDRNOTAVAIL); 2125 2126 imo = inp_findmoptions(inp); 2127 idx = imo_match_group(imo, ifp, &gsa->sa); 2128 if (idx == -1) { 2129 is_new = 1; 2130 } else { 2131 inm = imo->imo_membership[idx]; 2132 imf = &imo->imo_mfilters[idx]; 2133 if (ssa->ss.ss_family != AF_UNSPEC) { 2134 /* 2135 * MCAST_JOIN_SOURCE_GROUP on an exclusive membership 2136 * is an error. On an existing inclusive membership, 2137 * it just adds the source to the filter list. 2138 */ 2139 if (imf->imf_st[1] != MCAST_INCLUDE) { 2140 error = EINVAL; 2141 goto out_inp_locked; 2142 } 2143 /* 2144 * Throw out duplicates. 2145 * 2146 * XXX FIXME: This makes a naive assumption that 2147 * even if entries exist for *ssa in this imf, 2148 * they will be rejected as dupes, even if they 2149 * are not valid in the current mode (in-mode). 2150 * 2151 * in_msource is transactioned just as for anything 2152 * else in SSM -- but note naive use of inm_graft() 2153 * below for allocating new filter entries. 2154 * 2155 * This is only an issue if someone mixes the 2156 * full-state SSM API with the delta-based API, 2157 * which is discouraged in the relevant RFCs. 2158 */ 2159 lims = imo_match_source(imo, idx, &ssa->sa); 2160 if (lims != NULL /*&& 2161 lims->imsl_st[1] == MCAST_INCLUDE*/) { 2162 error = EADDRNOTAVAIL; 2163 goto out_inp_locked; 2164 } 2165 } else { 2166 /* 2167 * MCAST_JOIN_GROUP on an existing exclusive 2168 * membership is an error; return EADDRINUSE 2169 * to preserve 4.4BSD API idempotence, and 2170 * avoid tedious detour to code below. 2171 * NOTE: This is bending RFC 3678 a bit. 2172 * 2173 * On an existing inclusive membership, this is also 2174 * an error; if you want to change filter mode, 2175 * you must use the userland API setsourcefilter(). 2176 * XXX We don't reject this for imf in UNDEFINED 2177 * state at t1, because allocation of a filter 2178 * is atomic with allocation of a membership. 2179 */ 2180 error = EINVAL; 2181 if (imf->imf_st[1] == MCAST_EXCLUDE) 2182 error = EADDRINUSE; 2183 goto out_inp_locked; 2184 } 2185 } 2186 2187 /* 2188 * Begin state merge transaction at socket layer. 2189 */ 2190 INP_WLOCK_ASSERT(inp); 2191 2192 if (is_new) { 2193 if (imo->imo_num_memberships == imo->imo_max_memberships) { 2194 error = imo_grow(imo); 2195 if (error) 2196 goto out_inp_locked; 2197 } 2198 /* 2199 * Allocate the new slot upfront so we can deal with 2200 * grafting the new source filter in same code path 2201 * as for join-source on existing membership. 2202 */ 2203 idx = imo->imo_num_memberships; 2204 imo->imo_membership[idx] = NULL; 2205 imo->imo_num_memberships++; 2206 KASSERT(imo->imo_mfilters != NULL, 2207 ("%s: imf_mfilters vector was not allocated", __func__)); 2208 imf = &imo->imo_mfilters[idx]; 2209 KASSERT(RB_EMPTY(&imf->imf_sources), 2210 ("%s: imf_sources not empty", __func__)); 2211 } 2212 2213 /* 2214 * Graft new source into filter list for this inpcb's 2215 * membership of the group. The in_multi may not have 2216 * been allocated yet if this is a new membership, however, 2217 * the in_mfilter slot will be allocated and must be initialized. 2218 * 2219 * Note: Grafting of exclusive mode filters doesn't happen 2220 * in this path. 2221 * XXX: Should check for non-NULL lims (node exists but may 2222 * not be in-mode) for interop with full-state API. 2223 */ 2224 if (ssa->ss.ss_family != AF_UNSPEC) { 2225 /* Membership starts in IN mode */ 2226 if (is_new) { 2227 CTR1(KTR_IGMPV3, "%s: new join w/source", __func__); 2228 imf_init(imf, MCAST_UNDEFINED, MCAST_INCLUDE); 2229 } else { 2230 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow"); 2231 } 2232 lims = imf_graft(imf, MCAST_INCLUDE, &ssa->sin); 2233 if (lims == NULL) { 2234 CTR1(KTR_IGMPV3, "%s: merge imf state failed", 2235 __func__); 2236 error = ENOMEM; 2237 goto out_imo_free; 2238 } 2239 } else { 2240 /* No address specified; Membership starts in EX mode */ 2241 if (is_new) { 2242 CTR1(KTR_IGMPV3, "%s: new join w/o source", __func__); 2243 imf_init(imf, MCAST_UNDEFINED, MCAST_EXCLUDE); 2244 } 2245 } 2246 2247 /* 2248 * Begin state merge transaction at IGMP layer. 2249 */ 2250 in_pcbref(inp); 2251 INP_WUNLOCK(inp); 2252 IN_MULTI_LOCK(); 2253 2254 if (is_new) { 2255 error = in_joingroup_locked(ifp, &gsa->sin.sin_addr, imf, 2256 &inm); 2257 if (error) { 2258 CTR1(KTR_IGMPV3, "%s: in_joingroup_locked failed", 2259 __func__); 2260 IN_MULTI_LIST_UNLOCK(); 2261 goto out_imo_free; 2262 } 2263 imo->imo_membership[idx] = inm; 2264 } else { 2265 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 2266 IN_MULTI_LIST_LOCK(); 2267 error = inm_merge(inm, imf); 2268 if (error) { 2269 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", 2270 __func__); 2271 IN_MULTI_LIST_UNLOCK(); 2272 goto out_in_multi_locked; 2273 } 2274 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 2275 error = igmp_change_state(inm); 2276 IN_MULTI_LIST_UNLOCK(); 2277 if (error) { 2278 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", 2279 __func__); 2280 goto out_in_multi_locked; 2281 } 2282 } 2283 2284 out_in_multi_locked: 2285 2286 IN_MULTI_UNLOCK(); 2287 INP_WLOCK(inp); 2288 if (in_pcbrele_wlocked(inp)) 2289 return (ENXIO); 2290 if (error) { 2291 imf_rollback(imf); 2292 if (is_new) 2293 imf_purge(imf); 2294 else 2295 imf_reap(imf); 2296 } else { 2297 imf_commit(imf); 2298 } 2299 2300 out_imo_free: 2301 if (error && is_new) { 2302 imo->imo_membership[idx] = NULL; 2303 --imo->imo_num_memberships; 2304 } 2305 2306 out_inp_locked: 2307 INP_WUNLOCK(inp); 2308 return (error); 2309 } 2310 2311 /* 2312 * Leave an IPv4 multicast group on an inpcb, possibly with a source. 2313 */ 2314 static int 2315 inp_leave_group(struct inpcb *inp, struct sockopt *sopt) 2316 { 2317 struct group_source_req gsr; 2318 struct ip_mreq_source mreqs; 2319 sockunion_t *gsa, *ssa; 2320 struct ifnet *ifp; 2321 struct in_mfilter *imf; 2322 struct ip_moptions *imo; 2323 struct in_msource *ims; 2324 struct in_multi *inm; 2325 size_t idx; 2326 int error, is_final; 2327 2328 ifp = NULL; 2329 error = 0; 2330 is_final = 1; 2331 2332 memset(&gsr, 0, sizeof(struct group_source_req)); 2333 gsa = (sockunion_t *)&gsr.gsr_group; 2334 gsa->ss.ss_family = AF_UNSPEC; 2335 ssa = (sockunion_t *)&gsr.gsr_source; 2336 ssa->ss.ss_family = AF_UNSPEC; 2337 2338 switch (sopt->sopt_name) { 2339 case IP_DROP_MEMBERSHIP: 2340 case IP_DROP_SOURCE_MEMBERSHIP: 2341 if (sopt->sopt_name == IP_DROP_MEMBERSHIP) { 2342 error = sooptcopyin(sopt, &mreqs, 2343 sizeof(struct ip_mreq), 2344 sizeof(struct ip_mreq)); 2345 /* 2346 * Swap interface and sourceaddr arguments, 2347 * as ip_mreq and ip_mreq_source are laid 2348 * out differently. 2349 */ 2350 mreqs.imr_interface = mreqs.imr_sourceaddr; 2351 mreqs.imr_sourceaddr.s_addr = INADDR_ANY; 2352 } else if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) { 2353 error = sooptcopyin(sopt, &mreqs, 2354 sizeof(struct ip_mreq_source), 2355 sizeof(struct ip_mreq_source)); 2356 } 2357 if (error) 2358 return (error); 2359 2360 gsa->sin.sin_family = AF_INET; 2361 gsa->sin.sin_len = sizeof(struct sockaddr_in); 2362 gsa->sin.sin_addr = mreqs.imr_multiaddr; 2363 2364 if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) { 2365 ssa->sin.sin_family = AF_INET; 2366 ssa->sin.sin_len = sizeof(struct sockaddr_in); 2367 ssa->sin.sin_addr = mreqs.imr_sourceaddr; 2368 } 2369 2370 /* 2371 * Attempt to look up hinted ifp from interface address. 2372 * Fallthrough with null ifp iff lookup fails, to 2373 * preserve 4.4BSD mcast API idempotence. 2374 * XXX NOTE WELL: The RFC 3678 API is preferred because 2375 * using an IPv4 address as a key is racy. 2376 */ 2377 if (!in_nullhost(mreqs.imr_interface)) 2378 INADDR_TO_IFP(mreqs.imr_interface, ifp); 2379 2380 CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p", 2381 __func__, ntohl(mreqs.imr_interface.s_addr), ifp); 2382 2383 break; 2384 2385 case MCAST_LEAVE_GROUP: 2386 case MCAST_LEAVE_SOURCE_GROUP: 2387 if (sopt->sopt_name == MCAST_LEAVE_GROUP) { 2388 error = sooptcopyin(sopt, &gsr, 2389 sizeof(struct group_req), 2390 sizeof(struct group_req)); 2391 } else if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) { 2392 error = sooptcopyin(sopt, &gsr, 2393 sizeof(struct group_source_req), 2394 sizeof(struct group_source_req)); 2395 } 2396 if (error) 2397 return (error); 2398 2399 if (gsa->sin.sin_family != AF_INET || 2400 gsa->sin.sin_len != sizeof(struct sockaddr_in)) 2401 return (EINVAL); 2402 2403 if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) { 2404 if (ssa->sin.sin_family != AF_INET || 2405 ssa->sin.sin_len != sizeof(struct sockaddr_in)) 2406 return (EINVAL); 2407 } 2408 2409 if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface) 2410 return (EADDRNOTAVAIL); 2411 2412 ifp = ifnet_byindex(gsr.gsr_interface); 2413 2414 if (ifp == NULL) 2415 return (EADDRNOTAVAIL); 2416 break; 2417 2418 default: 2419 CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d", 2420 __func__, sopt->sopt_name); 2421 return (EOPNOTSUPP); 2422 break; 2423 } 2424 2425 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2426 return (EINVAL); 2427 2428 /* 2429 * Find the membership in the membership array. 2430 */ 2431 imo = inp_findmoptions(inp); 2432 idx = imo_match_group(imo, ifp, &gsa->sa); 2433 if (idx == -1) { 2434 error = EADDRNOTAVAIL; 2435 goto out_inp_locked; 2436 } 2437 inm = imo->imo_membership[idx]; 2438 imf = &imo->imo_mfilters[idx]; 2439 2440 if (ssa->ss.ss_family != AF_UNSPEC) 2441 is_final = 0; 2442 2443 /* 2444 * Begin state merge transaction at socket layer. 2445 */ 2446 INP_WLOCK_ASSERT(inp); 2447 2448 /* 2449 * If we were instructed only to leave a given source, do so. 2450 * MCAST_LEAVE_SOURCE_GROUP is only valid for inclusive memberships. 2451 */ 2452 if (is_final) { 2453 imf_leave(imf); 2454 } else { 2455 if (imf->imf_st[0] == MCAST_EXCLUDE) { 2456 error = EADDRNOTAVAIL; 2457 goto out_inp_locked; 2458 } 2459 ims = imo_match_source(imo, idx, &ssa->sa); 2460 if (ims == NULL) { 2461 CTR3(KTR_IGMPV3, "%s: source 0x%08x %spresent", 2462 __func__, ntohl(ssa->sin.sin_addr.s_addr), "not "); 2463 error = EADDRNOTAVAIL; 2464 goto out_inp_locked; 2465 } 2466 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block"); 2467 error = imf_prune(imf, &ssa->sin); 2468 if (error) { 2469 CTR1(KTR_IGMPV3, "%s: merge imf state failed", 2470 __func__); 2471 goto out_inp_locked; 2472 } 2473 } 2474 2475 /* 2476 * Begin state merge transaction at IGMP layer. 2477 */ 2478 in_pcbref(inp); 2479 INP_WUNLOCK(inp); 2480 IN_MULTI_LOCK(); 2481 2482 if (is_final) { 2483 /* 2484 * Give up the multicast address record to which 2485 * the membership points. 2486 */ 2487 (void)in_leavegroup_locked(inm, imf); 2488 } else { 2489 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 2490 IN_MULTI_LIST_LOCK(); 2491 error = inm_merge(inm, imf); 2492 if (error) { 2493 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", 2494 __func__); 2495 goto out_in_multi_locked; 2496 } 2497 2498 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 2499 error = igmp_change_state(inm); 2500 IN_MULTI_LIST_UNLOCK(); 2501 if (error) { 2502 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", 2503 __func__); 2504 } 2505 } 2506 2507 out_in_multi_locked: 2508 2509 IN_MULTI_UNLOCK(); 2510 INP_WLOCK(inp); 2511 if (in_pcbrele_wlocked(inp)) 2512 return (ENXIO); 2513 2514 if (error) 2515 imf_rollback(imf); 2516 else 2517 imf_commit(imf); 2518 2519 imf_reap(imf); 2520 2521 if (is_final) { 2522 /* Remove the gap in the membership and filter array. */ 2523 for (++idx; idx < imo->imo_num_memberships; ++idx) { 2524 imo->imo_membership[idx-1] = imo->imo_membership[idx]; 2525 imo->imo_mfilters[idx-1] = imo->imo_mfilters[idx]; 2526 } 2527 imo->imo_num_memberships--; 2528 } 2529 2530 out_inp_locked: 2531 INP_WUNLOCK(inp); 2532 return (error); 2533 } 2534 2535 /* 2536 * Select the interface for transmitting IPv4 multicast datagrams. 2537 * 2538 * Either an instance of struct in_addr or an instance of struct ip_mreqn 2539 * may be passed to this socket option. An address of INADDR_ANY or an 2540 * interface index of 0 is used to remove a previous selection. 2541 * When no interface is selected, one is chosen for every send. 2542 */ 2543 static int 2544 inp_set_multicast_if(struct inpcb *inp, struct sockopt *sopt) 2545 { 2546 struct in_addr addr; 2547 struct ip_mreqn mreqn; 2548 struct ifnet *ifp; 2549 struct ip_moptions *imo; 2550 int error; 2551 2552 if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) { 2553 /* 2554 * An interface index was specified using the 2555 * Linux-derived ip_mreqn structure. 2556 */ 2557 error = sooptcopyin(sopt, &mreqn, sizeof(struct ip_mreqn), 2558 sizeof(struct ip_mreqn)); 2559 if (error) 2560 return (error); 2561 2562 if (mreqn.imr_ifindex < 0 || V_if_index < mreqn.imr_ifindex) 2563 return (EINVAL); 2564 2565 if (mreqn.imr_ifindex == 0) { 2566 ifp = NULL; 2567 } else { 2568 ifp = ifnet_byindex(mreqn.imr_ifindex); 2569 if (ifp == NULL) 2570 return (EADDRNOTAVAIL); 2571 } 2572 } else { 2573 /* 2574 * An interface was specified by IPv4 address. 2575 * This is the traditional BSD usage. 2576 */ 2577 error = sooptcopyin(sopt, &addr, sizeof(struct in_addr), 2578 sizeof(struct in_addr)); 2579 if (error) 2580 return (error); 2581 if (in_nullhost(addr)) { 2582 ifp = NULL; 2583 } else { 2584 INADDR_TO_IFP(addr, ifp); 2585 if (ifp == NULL) 2586 return (EADDRNOTAVAIL); 2587 } 2588 CTR3(KTR_IGMPV3, "%s: ifp = %p, addr = 0x%08x", __func__, ifp, 2589 ntohl(addr.s_addr)); 2590 } 2591 2592 /* Reject interfaces which do not support multicast. */ 2593 if (ifp != NULL && (ifp->if_flags & IFF_MULTICAST) == 0) 2594 return (EOPNOTSUPP); 2595 2596 imo = inp_findmoptions(inp); 2597 imo->imo_multicast_ifp = ifp; 2598 imo->imo_multicast_addr.s_addr = INADDR_ANY; 2599 INP_WUNLOCK(inp); 2600 2601 return (0); 2602 } 2603 2604 /* 2605 * Atomically set source filters on a socket for an IPv4 multicast group. 2606 * 2607 * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held. 2608 */ 2609 static int 2610 inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt) 2611 { 2612 struct __msfilterreq msfr; 2613 sockunion_t *gsa; 2614 struct ifnet *ifp; 2615 struct in_mfilter *imf; 2616 struct ip_moptions *imo; 2617 struct in_multi *inm; 2618 size_t idx; 2619 int error; 2620 2621 error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq), 2622 sizeof(struct __msfilterreq)); 2623 if (error) 2624 return (error); 2625 2626 if (msfr.msfr_nsrcs > in_mcast_maxsocksrc) 2627 return (ENOBUFS); 2628 2629 if ((msfr.msfr_fmode != MCAST_EXCLUDE && 2630 msfr.msfr_fmode != MCAST_INCLUDE)) 2631 return (EINVAL); 2632 2633 if (msfr.msfr_group.ss_family != AF_INET || 2634 msfr.msfr_group.ss_len != sizeof(struct sockaddr_in)) 2635 return (EINVAL); 2636 2637 gsa = (sockunion_t *)&msfr.msfr_group; 2638 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2639 return (EINVAL); 2640 2641 gsa->sin.sin_port = 0; /* ignore port */ 2642 2643 if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex) 2644 return (EADDRNOTAVAIL); 2645 2646 ifp = ifnet_byindex(msfr.msfr_ifindex); 2647 if (ifp == NULL) 2648 return (EADDRNOTAVAIL); 2649 2650 /* 2651 * Take the INP write lock. 2652 * Check if this socket is a member of this group. 2653 */ 2654 imo = inp_findmoptions(inp); 2655 idx = imo_match_group(imo, ifp, &gsa->sa); 2656 if (idx == -1 || imo->imo_mfilters == NULL) { 2657 error = EADDRNOTAVAIL; 2658 goto out_inp_locked; 2659 } 2660 inm = imo->imo_membership[idx]; 2661 imf = &imo->imo_mfilters[idx]; 2662 2663 /* 2664 * Begin state merge transaction at socket layer. 2665 */ 2666 INP_WLOCK_ASSERT(inp); 2667 2668 imf->imf_st[1] = msfr.msfr_fmode; 2669 2670 /* 2671 * Apply any new source filters, if present. 2672 * Make a copy of the user-space source vector so 2673 * that we may copy them with a single copyin. This 2674 * allows us to deal with page faults up-front. 2675 */ 2676 if (msfr.msfr_nsrcs > 0) { 2677 struct in_msource *lims; 2678 struct sockaddr_in *psin; 2679 struct sockaddr_storage *kss, *pkss; 2680 int i; 2681 2682 INP_WUNLOCK(inp); 2683 2684 CTR2(KTR_IGMPV3, "%s: loading %lu source list entries", 2685 __func__, (unsigned long)msfr.msfr_nsrcs); 2686 kss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs, 2687 M_TEMP, M_WAITOK); 2688 error = copyin(msfr.msfr_srcs, kss, 2689 sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs); 2690 if (error) { 2691 free(kss, M_TEMP); 2692 return (error); 2693 } 2694 2695 INP_WLOCK(inp); 2696 2697 /* 2698 * Mark all source filters as UNDEFINED at t1. 2699 * Restore new group filter mode, as imf_leave() 2700 * will set it to INCLUDE. 2701 */ 2702 imf_leave(imf); 2703 imf->imf_st[1] = msfr.msfr_fmode; 2704 2705 /* 2706 * Update socket layer filters at t1, lazy-allocating 2707 * new entries. This saves a bunch of memory at the 2708 * cost of one RB_FIND() per source entry; duplicate 2709 * entries in the msfr_nsrcs vector are ignored. 2710 * If we encounter an error, rollback transaction. 2711 * 2712 * XXX This too could be replaced with a set-symmetric 2713 * difference like loop to avoid walking from root 2714 * every time, as the key space is common. 2715 */ 2716 for (i = 0, pkss = kss; i < msfr.msfr_nsrcs; i++, pkss++) { 2717 psin = (struct sockaddr_in *)pkss; 2718 if (psin->sin_family != AF_INET) { 2719 error = EAFNOSUPPORT; 2720 break; 2721 } 2722 if (psin->sin_len != sizeof(struct sockaddr_in)) { 2723 error = EINVAL; 2724 break; 2725 } 2726 error = imf_get_source(imf, psin, &lims); 2727 if (error) 2728 break; 2729 lims->imsl_st[1] = imf->imf_st[1]; 2730 } 2731 free(kss, M_TEMP); 2732 } 2733 2734 if (error) 2735 goto out_imf_rollback; 2736 2737 INP_WLOCK_ASSERT(inp); 2738 IN_MULTI_LOCK(); 2739 IN_MULTI_LIST_LOCK(); 2740 2741 /* 2742 * Begin state merge transaction at IGMP layer. 2743 */ 2744 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 2745 error = inm_merge(inm, imf); 2746 if (error) { 2747 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__); 2748 IN_MULTI_LIST_UNLOCK(); 2749 goto out_in_multi_locked; 2750 } 2751 2752 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 2753 error = igmp_change_state(inm); 2754 IN_MULTI_LIST_UNLOCK(); 2755 if (error) 2756 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__); 2757 2758 out_in_multi_locked: 2759 2760 IN_MULTI_UNLOCK(); 2761 2762 out_imf_rollback: 2763 if (error) 2764 imf_rollback(imf); 2765 else 2766 imf_commit(imf); 2767 2768 imf_reap(imf); 2769 2770 out_inp_locked: 2771 INP_WUNLOCK(inp); 2772 return (error); 2773 } 2774 2775 /* 2776 * Set the IP multicast options in response to user setsockopt(). 2777 * 2778 * Many of the socket options handled in this function duplicate the 2779 * functionality of socket options in the regular unicast API. However, 2780 * it is not possible to merge the duplicate code, because the idempotence 2781 * of the IPv4 multicast part of the BSD Sockets API must be preserved; 2782 * the effects of these options must be treated as separate and distinct. 2783 * 2784 * SMPng: XXX: Unlocked read of inp_socket believed OK. 2785 * FUTURE: The IP_MULTICAST_VIF option may be eliminated if MROUTING 2786 * is refactored to no longer use vifs. 2787 */ 2788 int 2789 inp_setmoptions(struct inpcb *inp, struct sockopt *sopt) 2790 { 2791 struct ip_moptions *imo; 2792 int error; 2793 2794 error = 0; 2795 2796 /* 2797 * If socket is neither of type SOCK_RAW or SOCK_DGRAM, 2798 * or is a divert socket, reject it. 2799 */ 2800 if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT || 2801 (inp->inp_socket->so_proto->pr_type != SOCK_RAW && 2802 inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) 2803 return (EOPNOTSUPP); 2804 2805 switch (sopt->sopt_name) { 2806 case IP_MULTICAST_VIF: { 2807 int vifi; 2808 /* 2809 * Select a multicast VIF for transmission. 2810 * Only useful if multicast forwarding is active. 2811 */ 2812 if (legal_vif_num == NULL) { 2813 error = EOPNOTSUPP; 2814 break; 2815 } 2816 error = sooptcopyin(sopt, &vifi, sizeof(int), sizeof(int)); 2817 if (error) 2818 break; 2819 if (!legal_vif_num(vifi) && (vifi != -1)) { 2820 error = EINVAL; 2821 break; 2822 } 2823 imo = inp_findmoptions(inp); 2824 imo->imo_multicast_vif = vifi; 2825 INP_WUNLOCK(inp); 2826 break; 2827 } 2828 2829 case IP_MULTICAST_IF: 2830 error = inp_set_multicast_if(inp, sopt); 2831 break; 2832 2833 case IP_MULTICAST_TTL: { 2834 u_char ttl; 2835 2836 /* 2837 * Set the IP time-to-live for outgoing multicast packets. 2838 * The original multicast API required a char argument, 2839 * which is inconsistent with the rest of the socket API. 2840 * We allow either a char or an int. 2841 */ 2842 if (sopt->sopt_valsize == sizeof(u_char)) { 2843 error = sooptcopyin(sopt, &ttl, sizeof(u_char), 2844 sizeof(u_char)); 2845 if (error) 2846 break; 2847 } else { 2848 u_int ittl; 2849 2850 error = sooptcopyin(sopt, &ittl, sizeof(u_int), 2851 sizeof(u_int)); 2852 if (error) 2853 break; 2854 if (ittl > 255) { 2855 error = EINVAL; 2856 break; 2857 } 2858 ttl = (u_char)ittl; 2859 } 2860 imo = inp_findmoptions(inp); 2861 imo->imo_multicast_ttl = ttl; 2862 INP_WUNLOCK(inp); 2863 break; 2864 } 2865 2866 case IP_MULTICAST_LOOP: { 2867 u_char loop; 2868 2869 /* 2870 * Set the loopback flag for outgoing multicast packets. 2871 * Must be zero or one. The original multicast API required a 2872 * char argument, which is inconsistent with the rest 2873 * of the socket API. We allow either a char or an int. 2874 */ 2875 if (sopt->sopt_valsize == sizeof(u_char)) { 2876 error = sooptcopyin(sopt, &loop, sizeof(u_char), 2877 sizeof(u_char)); 2878 if (error) 2879 break; 2880 } else { 2881 u_int iloop; 2882 2883 error = sooptcopyin(sopt, &iloop, sizeof(u_int), 2884 sizeof(u_int)); 2885 if (error) 2886 break; 2887 loop = (u_char)iloop; 2888 } 2889 imo = inp_findmoptions(inp); 2890 imo->imo_multicast_loop = !!loop; 2891 INP_WUNLOCK(inp); 2892 break; 2893 } 2894 2895 case IP_ADD_MEMBERSHIP: 2896 case IP_ADD_SOURCE_MEMBERSHIP: 2897 case MCAST_JOIN_GROUP: 2898 case MCAST_JOIN_SOURCE_GROUP: 2899 error = inp_join_group(inp, sopt); 2900 break; 2901 2902 case IP_DROP_MEMBERSHIP: 2903 case IP_DROP_SOURCE_MEMBERSHIP: 2904 case MCAST_LEAVE_GROUP: 2905 case MCAST_LEAVE_SOURCE_GROUP: 2906 error = inp_leave_group(inp, sopt); 2907 break; 2908 2909 case IP_BLOCK_SOURCE: 2910 case IP_UNBLOCK_SOURCE: 2911 case MCAST_BLOCK_SOURCE: 2912 case MCAST_UNBLOCK_SOURCE: 2913 error = inp_block_unblock_source(inp, sopt); 2914 break; 2915 2916 case IP_MSFILTER: 2917 error = inp_set_source_filters(inp, sopt); 2918 break; 2919 2920 default: 2921 error = EOPNOTSUPP; 2922 break; 2923 } 2924 2925 INP_UNLOCK_ASSERT(inp); 2926 2927 return (error); 2928 } 2929 2930 /* 2931 * Expose IGMP's multicast filter mode and source list(s) to userland, 2932 * keyed by (ifindex, group). 2933 * The filter mode is written out as a uint32_t, followed by 2934 * 0..n of struct in_addr. 2935 * For use by ifmcstat(8). 2936 * SMPng: NOTE: unlocked read of ifindex space. 2937 */ 2938 static int 2939 sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS) 2940 { 2941 struct in_addr src, group; 2942 struct ifnet *ifp; 2943 struct ifmultiaddr *ifma; 2944 struct in_multi *inm; 2945 struct ip_msource *ims; 2946 int *name; 2947 int retval; 2948 u_int namelen; 2949 uint32_t fmode, ifindex; 2950 2951 name = (int *)arg1; 2952 namelen = arg2; 2953 2954 if (req->newptr != NULL) 2955 return (EPERM); 2956 2957 if (namelen != 2) 2958 return (EINVAL); 2959 2960 ifindex = name[0]; 2961 if (ifindex <= 0 || ifindex > V_if_index) { 2962 CTR2(KTR_IGMPV3, "%s: ifindex %u out of range", 2963 __func__, ifindex); 2964 return (ENOENT); 2965 } 2966 2967 group.s_addr = name[1]; 2968 if (!IN_MULTICAST(ntohl(group.s_addr))) { 2969 CTR2(KTR_IGMPV3, "%s: group 0x%08x is not multicast", 2970 __func__, ntohl(group.s_addr)); 2971 return (EINVAL); 2972 } 2973 2974 ifp = ifnet_byindex(ifindex); 2975 if (ifp == NULL) { 2976 CTR2(KTR_IGMPV3, "%s: no ifp for ifindex %u", 2977 __func__, ifindex); 2978 return (ENOENT); 2979 } 2980 2981 retval = sysctl_wire_old_buffer(req, 2982 sizeof(uint32_t) + (in_mcast_maxgrpsrc * sizeof(struct in_addr))); 2983 if (retval) 2984 return (retval); 2985 2986 IN_MULTI_LIST_LOCK(); 2987 2988 IF_ADDR_RLOCK(ifp); 2989 CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 2990 if (ifma->ifma_addr->sa_family != AF_INET || 2991 ifma->ifma_protospec == NULL) 2992 continue; 2993 inm = (struct in_multi *)ifma->ifma_protospec; 2994 if (!in_hosteq(inm->inm_addr, group)) 2995 continue; 2996 fmode = inm->inm_st[1].iss_fmode; 2997 retval = SYSCTL_OUT(req, &fmode, sizeof(uint32_t)); 2998 if (retval != 0) 2999 break; 3000 RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) { 3001 CTR2(KTR_IGMPV3, "%s: visit node 0x%08x", __func__, 3002 ims->ims_haddr); 3003 /* 3004 * Only copy-out sources which are in-mode. 3005 */ 3006 if (fmode != ims_get_mode(inm, ims, 1)) { 3007 CTR1(KTR_IGMPV3, "%s: skip non-in-mode", 3008 __func__); 3009 continue; 3010 } 3011 src.s_addr = htonl(ims->ims_haddr); 3012 retval = SYSCTL_OUT(req, &src, sizeof(struct in_addr)); 3013 if (retval != 0) 3014 break; 3015 } 3016 } 3017 IF_ADDR_RUNLOCK(ifp); 3018 3019 IN_MULTI_LIST_UNLOCK(); 3020 3021 return (retval); 3022 } 3023 3024 #if defined(KTR) && (KTR_COMPILE & KTR_IGMPV3) 3025 3026 static const char *inm_modestrs[] = { "un", "in", "ex" }; 3027 3028 static const char * 3029 inm_mode_str(const int mode) 3030 { 3031 3032 if (mode >= MCAST_UNDEFINED && mode <= MCAST_EXCLUDE) 3033 return (inm_modestrs[mode]); 3034 return ("??"); 3035 } 3036 3037 static const char *inm_statestrs[] = { 3038 "not-member", 3039 "silent", 3040 "idle", 3041 "lazy", 3042 "sleeping", 3043 "awakening", 3044 "query-pending", 3045 "sg-query-pending", 3046 "leaving" 3047 }; 3048 3049 static const char * 3050 inm_state_str(const int state) 3051 { 3052 3053 if (state >= IGMP_NOT_MEMBER && state <= IGMP_LEAVING_MEMBER) 3054 return (inm_statestrs[state]); 3055 return ("??"); 3056 } 3057 3058 /* 3059 * Dump an in_multi structure to the console. 3060 */ 3061 void 3062 inm_print(const struct in_multi *inm) 3063 { 3064 int t; 3065 char addrbuf[INET_ADDRSTRLEN]; 3066 3067 if ((ktr_mask & KTR_IGMPV3) == 0) 3068 return; 3069 3070 printf("%s: --- begin inm %p ---\n", __func__, inm); 3071 printf("addr %s ifp %p(%s) ifma %p\n", 3072 inet_ntoa_r(inm->inm_addr, addrbuf), 3073 inm->inm_ifp, 3074 inm->inm_ifp->if_xname, 3075 inm->inm_ifma); 3076 printf("timer %u state %s refcount %u scq.len %u\n", 3077 inm->inm_timer, 3078 inm_state_str(inm->inm_state), 3079 inm->inm_refcount, 3080 inm->inm_scq.mq_len); 3081 printf("igi %p nsrc %lu sctimer %u scrv %u\n", 3082 inm->inm_igi, 3083 inm->inm_nsrc, 3084 inm->inm_sctimer, 3085 inm->inm_scrv); 3086 for (t = 0; t < 2; t++) { 3087 printf("t%d: fmode %s asm %u ex %u in %u rec %u\n", t, 3088 inm_mode_str(inm->inm_st[t].iss_fmode), 3089 inm->inm_st[t].iss_asm, 3090 inm->inm_st[t].iss_ex, 3091 inm->inm_st[t].iss_in, 3092 inm->inm_st[t].iss_rec); 3093 } 3094 printf("%s: --- end inm %p ---\n", __func__, inm); 3095 } 3096 3097 #else /* !KTR || !(KTR_COMPILE & KTR_IGMPV3) */ 3098 3099 void 3100 inm_print(const struct in_multi *inm) 3101 { 3102 3103 } 3104 3105 #endif /* KTR && (KTR_COMPILE & KTR_IGMPV3) */ 3106 3107 RB_GENERATE(ip_msource_tree, ip_msource, ims_link, ip_msource_cmp); 3108