1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2007-2009 Bruce Simpson. 5 * Copyright (c) 2005 Robert N. M. Watson. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. The name of the author may not be used to endorse or promote 17 * products derived from this software without specific prior written 18 * permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 /* 34 * IPv4 multicast socket, group, and socket option processing module. 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/kernel.h> 43 #include <sys/lock.h> 44 #include <sys/malloc.h> 45 #include <sys/mbuf.h> 46 #include <sys/protosw.h> 47 #include <sys/rmlock.h> 48 #include <sys/socket.h> 49 #include <sys/socketvar.h> 50 #include <sys/protosw.h> 51 #include <sys/sysctl.h> 52 #include <sys/ktr.h> 53 #include <sys/taskqueue.h> 54 #include <sys/gtaskqueue.h> 55 #include <sys/tree.h> 56 57 #include <net/if.h> 58 #include <net/if_var.h> 59 #include <net/if_dl.h> 60 #include <net/route.h> 61 #include <net/vnet.h> 62 63 #include <net/ethernet.h> 64 65 #include <netinet/in.h> 66 #include <netinet/in_systm.h> 67 #include <netinet/in_fib.h> 68 #include <netinet/in_pcb.h> 69 #include <netinet/in_var.h> 70 #include <netinet/ip_var.h> 71 #include <netinet/igmp_var.h> 72 73 #ifndef KTR_IGMPV3 74 #define KTR_IGMPV3 KTR_INET 75 #endif 76 77 #ifndef __SOCKUNION_DECLARED 78 union sockunion { 79 struct sockaddr_storage ss; 80 struct sockaddr sa; 81 struct sockaddr_dl sdl; 82 struct sockaddr_in sin; 83 }; 84 typedef union sockunion sockunion_t; 85 #define __SOCKUNION_DECLARED 86 #endif /* __SOCKUNION_DECLARED */ 87 88 static MALLOC_DEFINE(M_INMFILTER, "in_mfilter", 89 "IPv4 multicast PCB-layer source filter"); 90 static MALLOC_DEFINE(M_IPMADDR, "in_multi", "IPv4 multicast group"); 91 static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "IPv4 multicast options"); 92 static MALLOC_DEFINE(M_IPMSOURCE, "ip_msource", 93 "IPv4 multicast IGMP-layer source filter"); 94 95 /* 96 * Locking: 97 * 98 * - Lock order is: Giant, IN_MULTI_LOCK, INP_WLOCK, 99 * IN_MULTI_LIST_LOCK, IGMP_LOCK, IF_ADDR_LOCK. 100 * - The IF_ADDR_LOCK is implicitly taken by inm_lookup() earlier, however 101 * it can be taken by code in net/if.c also. 102 * - ip_moptions and in_mfilter are covered by the INP_WLOCK. 103 * 104 * struct in_multi is covered by IN_MULTI_LIST_LOCK. There isn't strictly 105 * any need for in_multi itself to be virtualized -- it is bound to an ifp 106 * anyway no matter what happens. 107 */ 108 struct mtx in_multi_list_mtx; 109 MTX_SYSINIT(in_multi_mtx, &in_multi_list_mtx, "in_multi_list_mtx", MTX_DEF); 110 111 struct mtx in_multi_free_mtx; 112 MTX_SYSINIT(in_multi_free_mtx, &in_multi_free_mtx, "in_multi_free_mtx", MTX_DEF); 113 114 struct sx in_multi_sx; 115 SX_SYSINIT(in_multi_sx, &in_multi_sx, "in_multi_sx"); 116 117 int ifma_restart; 118 119 /* 120 * Functions with non-static linkage defined in this file should be 121 * declared in in_var.h: 122 * imo_multi_filter() 123 * in_addmulti() 124 * in_delmulti() 125 * in_joingroup() 126 * in_joingroup_locked() 127 * in_leavegroup() 128 * in_leavegroup_locked() 129 * and ip_var.h: 130 * inp_freemoptions() 131 * inp_getmoptions() 132 * inp_setmoptions() 133 * 134 * XXX: Both carp and pf need to use the legacy (*,G) KPIs in_addmulti() 135 * and in_delmulti(). 136 */ 137 static void imf_commit(struct in_mfilter *); 138 static int imf_get_source(struct in_mfilter *imf, 139 const struct sockaddr_in *psin, 140 struct in_msource **); 141 static struct in_msource * 142 imf_graft(struct in_mfilter *, const uint8_t, 143 const struct sockaddr_in *); 144 static void imf_leave(struct in_mfilter *); 145 static int imf_prune(struct in_mfilter *, const struct sockaddr_in *); 146 static void imf_purge(struct in_mfilter *); 147 static void imf_rollback(struct in_mfilter *); 148 static void imf_reap(struct in_mfilter *); 149 static struct in_mfilter * 150 imo_match_group(const struct ip_moptions *, 151 const struct ifnet *, const struct sockaddr *); 152 static struct in_msource * 153 imo_match_source(struct in_mfilter *, const struct sockaddr *); 154 static void ims_merge(struct ip_msource *ims, 155 const struct in_msource *lims, const int rollback); 156 static int in_getmulti(struct ifnet *, const struct in_addr *, 157 struct in_multi **); 158 static int inm_get_source(struct in_multi *inm, const in_addr_t haddr, 159 const int noalloc, struct ip_msource **pims); 160 #ifdef KTR 161 static int inm_is_ifp_detached(const struct in_multi *); 162 #endif 163 static int inm_merge(struct in_multi *, /*const*/ struct in_mfilter *); 164 static void inm_purge(struct in_multi *); 165 static void inm_reap(struct in_multi *); 166 static void inm_release(struct in_multi *); 167 static struct ip_moptions * 168 inp_findmoptions(struct inpcb *); 169 static int inp_get_source_filters(struct inpcb *, struct sockopt *); 170 static int inp_join_group(struct inpcb *, struct sockopt *); 171 static int inp_leave_group(struct inpcb *, struct sockopt *); 172 static struct ifnet * 173 inp_lookup_mcast_ifp(const struct inpcb *, 174 const struct sockaddr_in *, const struct in_addr); 175 static int inp_block_unblock_source(struct inpcb *, struct sockopt *); 176 static int inp_set_multicast_if(struct inpcb *, struct sockopt *); 177 static int inp_set_source_filters(struct inpcb *, struct sockopt *); 178 static int sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS); 179 180 static SYSCTL_NODE(_net_inet_ip, OID_AUTO, mcast, CTLFLAG_RW, 0, 181 "IPv4 multicast"); 182 183 static u_long in_mcast_maxgrpsrc = IP_MAX_GROUP_SRC_FILTER; 184 SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxgrpsrc, 185 CTLFLAG_RWTUN, &in_mcast_maxgrpsrc, 0, 186 "Max source filters per group"); 187 188 static u_long in_mcast_maxsocksrc = IP_MAX_SOCK_SRC_FILTER; 189 SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxsocksrc, 190 CTLFLAG_RWTUN, &in_mcast_maxsocksrc, 0, 191 "Max source filters per socket"); 192 193 int in_mcast_loop = IP_DEFAULT_MULTICAST_LOOP; 194 SYSCTL_INT(_net_inet_ip_mcast, OID_AUTO, loop, CTLFLAG_RWTUN, 195 &in_mcast_loop, 0, "Loopback multicast datagrams by default"); 196 197 static SYSCTL_NODE(_net_inet_ip_mcast, OID_AUTO, filters, 198 CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_ip_mcast_filters, 199 "Per-interface stack-wide source filters"); 200 201 #ifdef KTR 202 /* 203 * Inline function which wraps assertions for a valid ifp. 204 * The ifnet layer will set the ifma's ifp pointer to NULL if the ifp 205 * is detached. 206 */ 207 static int __inline 208 inm_is_ifp_detached(const struct in_multi *inm) 209 { 210 struct ifnet *ifp; 211 212 KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__)); 213 ifp = inm->inm_ifma->ifma_ifp; 214 if (ifp != NULL) { 215 /* 216 * Sanity check that netinet's notion of ifp is the 217 * same as net's. 218 */ 219 KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__)); 220 } 221 222 return (ifp == NULL); 223 } 224 #endif 225 226 static struct grouptask free_gtask; 227 static struct in_multi_head inm_free_list; 228 static void inm_release_task(void *arg __unused); 229 static void inm_init(void) 230 { 231 SLIST_INIT(&inm_free_list); 232 taskqgroup_config_gtask_init(NULL, &free_gtask, inm_release_task, "inm release task"); 233 } 234 235 #ifdef EARLY_AP_STARTUP 236 SYSINIT(inm_init, SI_SUB_SMP + 1, SI_ORDER_FIRST, 237 inm_init, NULL); 238 #else 239 SYSINIT(inm_init, SI_SUB_ROOT_CONF - 1, SI_ORDER_FIRST, 240 inm_init, NULL); 241 #endif 242 243 244 void 245 inm_release_list_deferred(struct in_multi_head *inmh) 246 { 247 248 if (SLIST_EMPTY(inmh)) 249 return; 250 mtx_lock(&in_multi_free_mtx); 251 SLIST_CONCAT(&inm_free_list, inmh, in_multi, inm_nrele); 252 mtx_unlock(&in_multi_free_mtx); 253 GROUPTASK_ENQUEUE(&free_gtask); 254 } 255 256 void 257 inm_disconnect(struct in_multi *inm) 258 { 259 struct ifnet *ifp; 260 struct ifmultiaddr *ifma, *ll_ifma; 261 262 ifp = inm->inm_ifp; 263 IF_ADDR_WLOCK_ASSERT(ifp); 264 ifma = inm->inm_ifma; 265 266 if_ref(ifp); 267 if (ifma->ifma_flags & IFMA_F_ENQUEUED) { 268 CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifmultiaddr, ifma_link); 269 ifma->ifma_flags &= ~IFMA_F_ENQUEUED; 270 } 271 MCDPRINTF("removed ifma: %p from %s\n", ifma, ifp->if_xname); 272 if ((ll_ifma = ifma->ifma_llifma) != NULL) { 273 MPASS(ifma != ll_ifma); 274 ifma->ifma_llifma = NULL; 275 MPASS(ll_ifma->ifma_llifma == NULL); 276 MPASS(ll_ifma->ifma_ifp == ifp); 277 if (--ll_ifma->ifma_refcount == 0) { 278 if (ll_ifma->ifma_flags & IFMA_F_ENQUEUED) { 279 CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma, ifmultiaddr, ifma_link); 280 ll_ifma->ifma_flags &= ~IFMA_F_ENQUEUED; 281 } 282 MCDPRINTF("removed ll_ifma: %p from %s\n", ll_ifma, ifp->if_xname); 283 if_freemulti(ll_ifma); 284 ifma_restart = true; 285 } 286 } 287 } 288 289 void 290 inm_release_deferred(struct in_multi *inm) 291 { 292 struct in_multi_head tmp; 293 294 IN_MULTI_LIST_LOCK_ASSERT(); 295 MPASS(inm->inm_refcount > 0); 296 if (--inm->inm_refcount == 0) { 297 SLIST_INIT(&tmp); 298 inm_disconnect(inm); 299 inm->inm_ifma->ifma_protospec = NULL; 300 SLIST_INSERT_HEAD(&tmp, inm, inm_nrele); 301 inm_release_list_deferred(&tmp); 302 } 303 } 304 305 static void 306 inm_release_task(void *arg __unused) 307 { 308 struct in_multi_head inm_free_tmp; 309 struct in_multi *inm, *tinm; 310 311 SLIST_INIT(&inm_free_tmp); 312 mtx_lock(&in_multi_free_mtx); 313 SLIST_CONCAT(&inm_free_tmp, &inm_free_list, in_multi, inm_nrele); 314 mtx_unlock(&in_multi_free_mtx); 315 IN_MULTI_LOCK(); 316 SLIST_FOREACH_SAFE(inm, &inm_free_tmp, inm_nrele, tinm) { 317 SLIST_REMOVE_HEAD(&inm_free_tmp, inm_nrele); 318 MPASS(inm); 319 inm_release(inm); 320 } 321 IN_MULTI_UNLOCK(); 322 } 323 324 /* 325 * Initialize an in_mfilter structure to a known state at t0, t1 326 * with an empty source filter list. 327 */ 328 static __inline void 329 imf_init(struct in_mfilter *imf, const int st0, const int st1) 330 { 331 memset(imf, 0, sizeof(struct in_mfilter)); 332 RB_INIT(&imf->imf_sources); 333 imf->imf_st[0] = st0; 334 imf->imf_st[1] = st1; 335 } 336 337 struct in_mfilter * 338 ip_mfilter_alloc(const int mflags, const int st0, const int st1) 339 { 340 struct in_mfilter *imf; 341 342 imf = malloc(sizeof(*imf), M_INMFILTER, mflags); 343 if (imf != NULL) 344 imf_init(imf, st0, st1); 345 346 return (imf); 347 } 348 349 void 350 ip_mfilter_free(struct in_mfilter *imf) 351 { 352 353 imf_purge(imf); 354 free(imf, M_INMFILTER); 355 } 356 357 /* 358 * Function for looking up an in_multi record for an IPv4 multicast address 359 * on a given interface. ifp must be valid. If no record found, return NULL. 360 * The IN_MULTI_LIST_LOCK and IF_ADDR_LOCK on ifp must be held. 361 */ 362 struct in_multi * 363 inm_lookup_locked(struct ifnet *ifp, const struct in_addr ina) 364 { 365 struct ifmultiaddr *ifma; 366 struct in_multi *inm; 367 368 IN_MULTI_LIST_LOCK_ASSERT(); 369 IF_ADDR_LOCK_ASSERT(ifp); 370 371 inm = NULL; 372 CK_STAILQ_FOREACH(ifma, &((ifp)->if_multiaddrs), ifma_link) { 373 if (ifma->ifma_addr->sa_family != AF_INET || 374 ifma->ifma_protospec == NULL) 375 continue; 376 inm = (struct in_multi *)ifma->ifma_protospec; 377 if (inm->inm_addr.s_addr == ina.s_addr) 378 break; 379 inm = NULL; 380 } 381 return (inm); 382 } 383 384 /* 385 * Wrapper for inm_lookup_locked(). 386 * The IF_ADDR_LOCK will be taken on ifp and released on return. 387 */ 388 struct in_multi * 389 inm_lookup(struct ifnet *ifp, const struct in_addr ina) 390 { 391 struct epoch_tracker et; 392 struct in_multi *inm; 393 394 IN_MULTI_LIST_LOCK_ASSERT(); 395 NET_EPOCH_ENTER(et); 396 397 inm = inm_lookup_locked(ifp, ina); 398 NET_EPOCH_EXIT(et); 399 400 return (inm); 401 } 402 403 /* 404 * Find an IPv4 multicast group entry for this ip_moptions instance 405 * which matches the specified group, and optionally an interface. 406 * Return its index into the array, or -1 if not found. 407 */ 408 static struct in_mfilter * 409 imo_match_group(const struct ip_moptions *imo, const struct ifnet *ifp, 410 const struct sockaddr *group) 411 { 412 const struct sockaddr_in *gsin; 413 struct in_mfilter *imf; 414 struct in_multi *inm; 415 416 gsin = (const struct sockaddr_in *)group; 417 418 IP_MFILTER_FOREACH(imf, &imo->imo_head) { 419 inm = imf->imf_inm; 420 if (inm == NULL) 421 continue; 422 if ((ifp == NULL || (inm->inm_ifp == ifp)) && 423 in_hosteq(inm->inm_addr, gsin->sin_addr)) { 424 break; 425 } 426 } 427 return (imf); 428 } 429 430 /* 431 * Find an IPv4 multicast source entry for this imo which matches 432 * the given group index for this socket, and source address. 433 * 434 * NOTE: This does not check if the entry is in-mode, merely if 435 * it exists, which may not be the desired behaviour. 436 */ 437 static struct in_msource * 438 imo_match_source(struct in_mfilter *imf, const struct sockaddr *src) 439 { 440 struct ip_msource find; 441 struct ip_msource *ims; 442 const sockunion_t *psa; 443 444 KASSERT(src->sa_family == AF_INET, ("%s: !AF_INET", __func__)); 445 446 /* Source trees are keyed in host byte order. */ 447 psa = (const sockunion_t *)src; 448 find.ims_haddr = ntohl(psa->sin.sin_addr.s_addr); 449 ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find); 450 451 return ((struct in_msource *)ims); 452 } 453 454 /* 455 * Perform filtering for multicast datagrams on a socket by group and source. 456 * 457 * Returns 0 if a datagram should be allowed through, or various error codes 458 * if the socket was not a member of the group, or the source was muted, etc. 459 */ 460 int 461 imo_multi_filter(const struct ip_moptions *imo, const struct ifnet *ifp, 462 const struct sockaddr *group, const struct sockaddr *src) 463 { 464 struct in_mfilter *imf; 465 struct in_msource *ims; 466 int mode; 467 468 KASSERT(ifp != NULL, ("%s: null ifp", __func__)); 469 470 imf = imo_match_group(imo, ifp, group); 471 if (imf == NULL) 472 return (MCAST_NOTGMEMBER); 473 474 /* 475 * Check if the source was included in an (S,G) join. 476 * Allow reception on exclusive memberships by default, 477 * reject reception on inclusive memberships by default. 478 * Exclude source only if an in-mode exclude filter exists. 479 * Include source only if an in-mode include filter exists. 480 * NOTE: We are comparing group state here at IGMP t1 (now) 481 * with socket-layer t0 (since last downcall). 482 */ 483 mode = imf->imf_st[1]; 484 ims = imo_match_source(imf, src); 485 486 if ((ims == NULL && mode == MCAST_INCLUDE) || 487 (ims != NULL && ims->imsl_st[0] != mode)) 488 return (MCAST_NOTSMEMBER); 489 490 return (MCAST_PASS); 491 } 492 493 /* 494 * Find and return a reference to an in_multi record for (ifp, group), 495 * and bump its reference count. 496 * If one does not exist, try to allocate it, and update link-layer multicast 497 * filters on ifp to listen for group. 498 * Assumes the IN_MULTI lock is held across the call. 499 * Return 0 if successful, otherwise return an appropriate error code. 500 */ 501 static int 502 in_getmulti(struct ifnet *ifp, const struct in_addr *group, 503 struct in_multi **pinm) 504 { 505 struct sockaddr_in gsin; 506 struct ifmultiaddr *ifma; 507 struct in_ifinfo *ii; 508 struct in_multi *inm; 509 int error; 510 511 IN_MULTI_LOCK_ASSERT(); 512 513 ii = (struct in_ifinfo *)ifp->if_afdata[AF_INET]; 514 IN_MULTI_LIST_LOCK(); 515 inm = inm_lookup(ifp, *group); 516 if (inm != NULL) { 517 /* 518 * If we already joined this group, just bump the 519 * refcount and return it. 520 */ 521 KASSERT(inm->inm_refcount >= 1, 522 ("%s: bad refcount %d", __func__, inm->inm_refcount)); 523 inm_acquire_locked(inm); 524 *pinm = inm; 525 } 526 IN_MULTI_LIST_UNLOCK(); 527 if (inm != NULL) 528 return (0); 529 530 memset(&gsin, 0, sizeof(gsin)); 531 gsin.sin_family = AF_INET; 532 gsin.sin_len = sizeof(struct sockaddr_in); 533 gsin.sin_addr = *group; 534 535 /* 536 * Check if a link-layer group is already associated 537 * with this network-layer group on the given ifnet. 538 */ 539 error = if_addmulti(ifp, (struct sockaddr *)&gsin, &ifma); 540 if (error != 0) 541 return (error); 542 543 /* XXX ifma_protospec must be covered by IF_ADDR_LOCK */ 544 IN_MULTI_LIST_LOCK(); 545 IF_ADDR_WLOCK(ifp); 546 547 /* 548 * If something other than netinet is occupying the link-layer 549 * group, print a meaningful error message and back out of 550 * the allocation. 551 * Otherwise, bump the refcount on the existing network-layer 552 * group association and return it. 553 */ 554 if (ifma->ifma_protospec != NULL) { 555 inm = (struct in_multi *)ifma->ifma_protospec; 556 #ifdef INVARIANTS 557 KASSERT(ifma->ifma_addr != NULL, ("%s: no ifma_addr", 558 __func__)); 559 KASSERT(ifma->ifma_addr->sa_family == AF_INET, 560 ("%s: ifma not AF_INET", __func__)); 561 KASSERT(inm != NULL, ("%s: no ifma_protospec", __func__)); 562 if (inm->inm_ifma != ifma || inm->inm_ifp != ifp || 563 !in_hosteq(inm->inm_addr, *group)) { 564 char addrbuf[INET_ADDRSTRLEN]; 565 566 panic("%s: ifma %p is inconsistent with %p (%s)", 567 __func__, ifma, inm, inet_ntoa_r(*group, addrbuf)); 568 } 569 #endif 570 inm_acquire_locked(inm); 571 *pinm = inm; 572 goto out_locked; 573 } 574 575 IF_ADDR_WLOCK_ASSERT(ifp); 576 577 /* 578 * A new in_multi record is needed; allocate and initialize it. 579 * We DO NOT perform an IGMP join as the in_ layer may need to 580 * push an initial source list down to IGMP to support SSM. 581 * 582 * The initial source filter state is INCLUDE, {} as per the RFC. 583 */ 584 inm = malloc(sizeof(*inm), M_IPMADDR, M_NOWAIT | M_ZERO); 585 if (inm == NULL) { 586 IF_ADDR_WUNLOCK(ifp); 587 IN_MULTI_LIST_UNLOCK(); 588 if_delmulti_ifma(ifma); 589 return (ENOMEM); 590 } 591 inm->inm_addr = *group; 592 inm->inm_ifp = ifp; 593 inm->inm_igi = ii->ii_igmp; 594 inm->inm_ifma = ifma; 595 inm->inm_refcount = 1; 596 inm->inm_state = IGMP_NOT_MEMBER; 597 mbufq_init(&inm->inm_scq, IGMP_MAX_STATE_CHANGES); 598 inm->inm_st[0].iss_fmode = MCAST_UNDEFINED; 599 inm->inm_st[1].iss_fmode = MCAST_UNDEFINED; 600 RB_INIT(&inm->inm_srcs); 601 602 ifma->ifma_protospec = inm; 603 604 *pinm = inm; 605 out_locked: 606 IF_ADDR_WUNLOCK(ifp); 607 IN_MULTI_LIST_UNLOCK(); 608 return (0); 609 } 610 611 /* 612 * Drop a reference to an in_multi record. 613 * 614 * If the refcount drops to 0, free the in_multi record and 615 * delete the underlying link-layer membership. 616 */ 617 static void 618 inm_release(struct in_multi *inm) 619 { 620 struct ifmultiaddr *ifma; 621 struct ifnet *ifp; 622 623 CTR2(KTR_IGMPV3, "%s: refcount is %d", __func__, inm->inm_refcount); 624 MPASS(inm->inm_refcount == 0); 625 CTR2(KTR_IGMPV3, "%s: freeing inm %p", __func__, inm); 626 627 ifma = inm->inm_ifma; 628 ifp = inm->inm_ifp; 629 630 /* XXX this access is not covered by IF_ADDR_LOCK */ 631 CTR2(KTR_IGMPV3, "%s: purging ifma %p", __func__, ifma); 632 if (ifp != NULL) { 633 CURVNET_SET(ifp->if_vnet); 634 inm_purge(inm); 635 free(inm, M_IPMADDR); 636 if_delmulti_ifma_flags(ifma, 1); 637 CURVNET_RESTORE(); 638 if_rele(ifp); 639 } else { 640 inm_purge(inm); 641 free(inm, M_IPMADDR); 642 if_delmulti_ifma_flags(ifma, 1); 643 } 644 } 645 646 /* 647 * Clear recorded source entries for a group. 648 * Used by the IGMP code. Caller must hold the IN_MULTI lock. 649 * FIXME: Should reap. 650 */ 651 void 652 inm_clear_recorded(struct in_multi *inm) 653 { 654 struct ip_msource *ims; 655 656 IN_MULTI_LIST_LOCK_ASSERT(); 657 658 RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) { 659 if (ims->ims_stp) { 660 ims->ims_stp = 0; 661 --inm->inm_st[1].iss_rec; 662 } 663 } 664 KASSERT(inm->inm_st[1].iss_rec == 0, 665 ("%s: iss_rec %d not 0", __func__, inm->inm_st[1].iss_rec)); 666 } 667 668 /* 669 * Record a source as pending for a Source-Group IGMPv3 query. 670 * This lives here as it modifies the shared tree. 671 * 672 * inm is the group descriptor. 673 * naddr is the address of the source to record in network-byte order. 674 * 675 * If the net.inet.igmp.sgalloc sysctl is non-zero, we will 676 * lazy-allocate a source node in response to an SG query. 677 * Otherwise, no allocation is performed. This saves some memory 678 * with the trade-off that the source will not be reported to the 679 * router if joined in the window between the query response and 680 * the group actually being joined on the local host. 681 * 682 * VIMAGE: XXX: Currently the igmp_sgalloc feature has been removed. 683 * This turns off the allocation of a recorded source entry if 684 * the group has not been joined. 685 * 686 * Return 0 if the source didn't exist or was already marked as recorded. 687 * Return 1 if the source was marked as recorded by this function. 688 * Return <0 if any error occurred (negated errno code). 689 */ 690 int 691 inm_record_source(struct in_multi *inm, const in_addr_t naddr) 692 { 693 struct ip_msource find; 694 struct ip_msource *ims, *nims; 695 696 IN_MULTI_LIST_LOCK_ASSERT(); 697 698 find.ims_haddr = ntohl(naddr); 699 ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find); 700 if (ims && ims->ims_stp) 701 return (0); 702 if (ims == NULL) { 703 if (inm->inm_nsrc == in_mcast_maxgrpsrc) 704 return (-ENOSPC); 705 nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE, 706 M_NOWAIT | M_ZERO); 707 if (nims == NULL) 708 return (-ENOMEM); 709 nims->ims_haddr = find.ims_haddr; 710 RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims); 711 ++inm->inm_nsrc; 712 ims = nims; 713 } 714 715 /* 716 * Mark the source as recorded and update the recorded 717 * source count. 718 */ 719 ++ims->ims_stp; 720 ++inm->inm_st[1].iss_rec; 721 722 return (1); 723 } 724 725 /* 726 * Return a pointer to an in_msource owned by an in_mfilter, 727 * given its source address. 728 * Lazy-allocate if needed. If this is a new entry its filter state is 729 * undefined at t0. 730 * 731 * imf is the filter set being modified. 732 * haddr is the source address in *host* byte-order. 733 * 734 * SMPng: May be called with locks held; malloc must not block. 735 */ 736 static int 737 imf_get_source(struct in_mfilter *imf, const struct sockaddr_in *psin, 738 struct in_msource **plims) 739 { 740 struct ip_msource find; 741 struct ip_msource *ims, *nims; 742 struct in_msource *lims; 743 int error; 744 745 error = 0; 746 ims = NULL; 747 lims = NULL; 748 749 /* key is host byte order */ 750 find.ims_haddr = ntohl(psin->sin_addr.s_addr); 751 ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find); 752 lims = (struct in_msource *)ims; 753 if (lims == NULL) { 754 if (imf->imf_nsrc == in_mcast_maxsocksrc) 755 return (ENOSPC); 756 nims = malloc(sizeof(struct in_msource), M_INMFILTER, 757 M_NOWAIT | M_ZERO); 758 if (nims == NULL) 759 return (ENOMEM); 760 lims = (struct in_msource *)nims; 761 lims->ims_haddr = find.ims_haddr; 762 lims->imsl_st[0] = MCAST_UNDEFINED; 763 RB_INSERT(ip_msource_tree, &imf->imf_sources, nims); 764 ++imf->imf_nsrc; 765 } 766 767 *plims = lims; 768 769 return (error); 770 } 771 772 /* 773 * Graft a source entry into an existing socket-layer filter set, 774 * maintaining any required invariants and checking allocations. 775 * 776 * The source is marked as being in the new filter mode at t1. 777 * 778 * Return the pointer to the new node, otherwise return NULL. 779 */ 780 static struct in_msource * 781 imf_graft(struct in_mfilter *imf, const uint8_t st1, 782 const struct sockaddr_in *psin) 783 { 784 struct ip_msource *nims; 785 struct in_msource *lims; 786 787 nims = malloc(sizeof(struct in_msource), M_INMFILTER, 788 M_NOWAIT | M_ZERO); 789 if (nims == NULL) 790 return (NULL); 791 lims = (struct in_msource *)nims; 792 lims->ims_haddr = ntohl(psin->sin_addr.s_addr); 793 lims->imsl_st[0] = MCAST_UNDEFINED; 794 lims->imsl_st[1] = st1; 795 RB_INSERT(ip_msource_tree, &imf->imf_sources, nims); 796 ++imf->imf_nsrc; 797 798 return (lims); 799 } 800 801 /* 802 * Prune a source entry from an existing socket-layer filter set, 803 * maintaining any required invariants and checking allocations. 804 * 805 * The source is marked as being left at t1, it is not freed. 806 * 807 * Return 0 if no error occurred, otherwise return an errno value. 808 */ 809 static int 810 imf_prune(struct in_mfilter *imf, const struct sockaddr_in *psin) 811 { 812 struct ip_msource find; 813 struct ip_msource *ims; 814 struct in_msource *lims; 815 816 /* key is host byte order */ 817 find.ims_haddr = ntohl(psin->sin_addr.s_addr); 818 ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find); 819 if (ims == NULL) 820 return (ENOENT); 821 lims = (struct in_msource *)ims; 822 lims->imsl_st[1] = MCAST_UNDEFINED; 823 return (0); 824 } 825 826 /* 827 * Revert socket-layer filter set deltas at t1 to t0 state. 828 */ 829 static void 830 imf_rollback(struct in_mfilter *imf) 831 { 832 struct ip_msource *ims, *tims; 833 struct in_msource *lims; 834 835 RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) { 836 lims = (struct in_msource *)ims; 837 if (lims->imsl_st[0] == lims->imsl_st[1]) { 838 /* no change at t1 */ 839 continue; 840 } else if (lims->imsl_st[0] != MCAST_UNDEFINED) { 841 /* revert change to existing source at t1 */ 842 lims->imsl_st[1] = lims->imsl_st[0]; 843 } else { 844 /* revert source added t1 */ 845 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 846 RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims); 847 free(ims, M_INMFILTER); 848 imf->imf_nsrc--; 849 } 850 } 851 imf->imf_st[1] = imf->imf_st[0]; 852 } 853 854 /* 855 * Mark socket-layer filter set as INCLUDE {} at t1. 856 */ 857 static void 858 imf_leave(struct in_mfilter *imf) 859 { 860 struct ip_msource *ims; 861 struct in_msource *lims; 862 863 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 864 lims = (struct in_msource *)ims; 865 lims->imsl_st[1] = MCAST_UNDEFINED; 866 } 867 imf->imf_st[1] = MCAST_INCLUDE; 868 } 869 870 /* 871 * Mark socket-layer filter set deltas as committed. 872 */ 873 static void 874 imf_commit(struct in_mfilter *imf) 875 { 876 struct ip_msource *ims; 877 struct in_msource *lims; 878 879 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 880 lims = (struct in_msource *)ims; 881 lims->imsl_st[0] = lims->imsl_st[1]; 882 } 883 imf->imf_st[0] = imf->imf_st[1]; 884 } 885 886 /* 887 * Reap unreferenced sources from socket-layer filter set. 888 */ 889 static void 890 imf_reap(struct in_mfilter *imf) 891 { 892 struct ip_msource *ims, *tims; 893 struct in_msource *lims; 894 895 RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) { 896 lims = (struct in_msource *)ims; 897 if ((lims->imsl_st[0] == MCAST_UNDEFINED) && 898 (lims->imsl_st[1] == MCAST_UNDEFINED)) { 899 CTR2(KTR_IGMPV3, "%s: free lims %p", __func__, ims); 900 RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims); 901 free(ims, M_INMFILTER); 902 imf->imf_nsrc--; 903 } 904 } 905 } 906 907 /* 908 * Purge socket-layer filter set. 909 */ 910 static void 911 imf_purge(struct in_mfilter *imf) 912 { 913 struct ip_msource *ims, *tims; 914 915 RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) { 916 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 917 RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims); 918 free(ims, M_INMFILTER); 919 imf->imf_nsrc--; 920 } 921 imf->imf_st[0] = imf->imf_st[1] = MCAST_UNDEFINED; 922 KASSERT(RB_EMPTY(&imf->imf_sources), 923 ("%s: imf_sources not empty", __func__)); 924 } 925 926 /* 927 * Look up a source filter entry for a multicast group. 928 * 929 * inm is the group descriptor to work with. 930 * haddr is the host-byte-order IPv4 address to look up. 931 * noalloc may be non-zero to suppress allocation of sources. 932 * *pims will be set to the address of the retrieved or allocated source. 933 * 934 * SMPng: NOTE: may be called with locks held. 935 * Return 0 if successful, otherwise return a non-zero error code. 936 */ 937 static int 938 inm_get_source(struct in_multi *inm, const in_addr_t haddr, 939 const int noalloc, struct ip_msource **pims) 940 { 941 struct ip_msource find; 942 struct ip_msource *ims, *nims; 943 944 find.ims_haddr = haddr; 945 ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find); 946 if (ims == NULL && !noalloc) { 947 if (inm->inm_nsrc == in_mcast_maxgrpsrc) 948 return (ENOSPC); 949 nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE, 950 M_NOWAIT | M_ZERO); 951 if (nims == NULL) 952 return (ENOMEM); 953 nims->ims_haddr = haddr; 954 RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims); 955 ++inm->inm_nsrc; 956 ims = nims; 957 #ifdef KTR 958 CTR3(KTR_IGMPV3, "%s: allocated 0x%08x as %p", __func__, 959 haddr, ims); 960 #endif 961 } 962 963 *pims = ims; 964 return (0); 965 } 966 967 /* 968 * Merge socket-layer source into IGMP-layer source. 969 * If rollback is non-zero, perform the inverse of the merge. 970 */ 971 static void 972 ims_merge(struct ip_msource *ims, const struct in_msource *lims, 973 const int rollback) 974 { 975 int n = rollback ? -1 : 1; 976 977 if (lims->imsl_st[0] == MCAST_EXCLUDE) { 978 CTR3(KTR_IGMPV3, "%s: t1 ex -= %d on 0x%08x", 979 __func__, n, ims->ims_haddr); 980 ims->ims_st[1].ex -= n; 981 } else if (lims->imsl_st[0] == MCAST_INCLUDE) { 982 CTR3(KTR_IGMPV3, "%s: t1 in -= %d on 0x%08x", 983 __func__, n, ims->ims_haddr); 984 ims->ims_st[1].in -= n; 985 } 986 987 if (lims->imsl_st[1] == MCAST_EXCLUDE) { 988 CTR3(KTR_IGMPV3, "%s: t1 ex += %d on 0x%08x", 989 __func__, n, ims->ims_haddr); 990 ims->ims_st[1].ex += n; 991 } else if (lims->imsl_st[1] == MCAST_INCLUDE) { 992 CTR3(KTR_IGMPV3, "%s: t1 in += %d on 0x%08x", 993 __func__, n, ims->ims_haddr); 994 ims->ims_st[1].in += n; 995 } 996 } 997 998 /* 999 * Atomically update the global in_multi state, when a membership's 1000 * filter list is being updated in any way. 1001 * 1002 * imf is the per-inpcb-membership group filter pointer. 1003 * A fake imf may be passed for in-kernel consumers. 1004 * 1005 * XXX This is a candidate for a set-symmetric-difference style loop 1006 * which would eliminate the repeated lookup from root of ims nodes, 1007 * as they share the same key space. 1008 * 1009 * If any error occurred this function will back out of refcounts 1010 * and return a non-zero value. 1011 */ 1012 static int 1013 inm_merge(struct in_multi *inm, /*const*/ struct in_mfilter *imf) 1014 { 1015 struct ip_msource *ims, *nims; 1016 struct in_msource *lims; 1017 int schanged, error; 1018 int nsrc0, nsrc1; 1019 1020 schanged = 0; 1021 error = 0; 1022 nsrc1 = nsrc0 = 0; 1023 IN_MULTI_LIST_LOCK_ASSERT(); 1024 1025 /* 1026 * Update the source filters first, as this may fail. 1027 * Maintain count of in-mode filters at t0, t1. These are 1028 * used to work out if we transition into ASM mode or not. 1029 * Maintain a count of source filters whose state was 1030 * actually modified by this operation. 1031 */ 1032 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 1033 lims = (struct in_msource *)ims; 1034 if (lims->imsl_st[0] == imf->imf_st[0]) nsrc0++; 1035 if (lims->imsl_st[1] == imf->imf_st[1]) nsrc1++; 1036 if (lims->imsl_st[0] == lims->imsl_st[1]) continue; 1037 error = inm_get_source(inm, lims->ims_haddr, 0, &nims); 1038 ++schanged; 1039 if (error) 1040 break; 1041 ims_merge(nims, lims, 0); 1042 } 1043 if (error) { 1044 struct ip_msource *bims; 1045 1046 RB_FOREACH_REVERSE_FROM(ims, ip_msource_tree, nims) { 1047 lims = (struct in_msource *)ims; 1048 if (lims->imsl_st[0] == lims->imsl_st[1]) 1049 continue; 1050 (void)inm_get_source(inm, lims->ims_haddr, 1, &bims); 1051 if (bims == NULL) 1052 continue; 1053 ims_merge(bims, lims, 1); 1054 } 1055 goto out_reap; 1056 } 1057 1058 CTR3(KTR_IGMPV3, "%s: imf filters in-mode: %d at t0, %d at t1", 1059 __func__, nsrc0, nsrc1); 1060 1061 /* Handle transition between INCLUDE {n} and INCLUDE {} on socket. */ 1062 if (imf->imf_st[0] == imf->imf_st[1] && 1063 imf->imf_st[1] == MCAST_INCLUDE) { 1064 if (nsrc1 == 0) { 1065 CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__); 1066 --inm->inm_st[1].iss_in; 1067 } 1068 } 1069 1070 /* Handle filter mode transition on socket. */ 1071 if (imf->imf_st[0] != imf->imf_st[1]) { 1072 CTR3(KTR_IGMPV3, "%s: imf transition %d to %d", 1073 __func__, imf->imf_st[0], imf->imf_st[1]); 1074 1075 if (imf->imf_st[0] == MCAST_EXCLUDE) { 1076 CTR1(KTR_IGMPV3, "%s: --ex on inm at t1", __func__); 1077 --inm->inm_st[1].iss_ex; 1078 } else if (imf->imf_st[0] == MCAST_INCLUDE) { 1079 CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__); 1080 --inm->inm_st[1].iss_in; 1081 } 1082 1083 if (imf->imf_st[1] == MCAST_EXCLUDE) { 1084 CTR1(KTR_IGMPV3, "%s: ex++ on inm at t1", __func__); 1085 inm->inm_st[1].iss_ex++; 1086 } else if (imf->imf_st[1] == MCAST_INCLUDE && nsrc1 > 0) { 1087 CTR1(KTR_IGMPV3, "%s: in++ on inm at t1", __func__); 1088 inm->inm_st[1].iss_in++; 1089 } 1090 } 1091 1092 /* 1093 * Track inm filter state in terms of listener counts. 1094 * If there are any exclusive listeners, stack-wide 1095 * membership is exclusive. 1096 * Otherwise, if only inclusive listeners, stack-wide is inclusive. 1097 * If no listeners remain, state is undefined at t1, 1098 * and the IGMP lifecycle for this group should finish. 1099 */ 1100 if (inm->inm_st[1].iss_ex > 0) { 1101 CTR1(KTR_IGMPV3, "%s: transition to EX", __func__); 1102 inm->inm_st[1].iss_fmode = MCAST_EXCLUDE; 1103 } else if (inm->inm_st[1].iss_in > 0) { 1104 CTR1(KTR_IGMPV3, "%s: transition to IN", __func__); 1105 inm->inm_st[1].iss_fmode = MCAST_INCLUDE; 1106 } else { 1107 CTR1(KTR_IGMPV3, "%s: transition to UNDEF", __func__); 1108 inm->inm_st[1].iss_fmode = MCAST_UNDEFINED; 1109 } 1110 1111 /* Decrement ASM listener count on transition out of ASM mode. */ 1112 if (imf->imf_st[0] == MCAST_EXCLUDE && nsrc0 == 0) { 1113 if ((imf->imf_st[1] != MCAST_EXCLUDE) || 1114 (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 > 0)) { 1115 CTR1(KTR_IGMPV3, "%s: --asm on inm at t1", __func__); 1116 --inm->inm_st[1].iss_asm; 1117 } 1118 } 1119 1120 /* Increment ASM listener count on transition to ASM mode. */ 1121 if (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 == 0) { 1122 CTR1(KTR_IGMPV3, "%s: asm++ on inm at t1", __func__); 1123 inm->inm_st[1].iss_asm++; 1124 } 1125 1126 CTR3(KTR_IGMPV3, "%s: merged imf %p to inm %p", __func__, imf, inm); 1127 inm_print(inm); 1128 1129 out_reap: 1130 if (schanged > 0) { 1131 CTR1(KTR_IGMPV3, "%s: sources changed; reaping", __func__); 1132 inm_reap(inm); 1133 } 1134 return (error); 1135 } 1136 1137 /* 1138 * Mark an in_multi's filter set deltas as committed. 1139 * Called by IGMP after a state change has been enqueued. 1140 */ 1141 void 1142 inm_commit(struct in_multi *inm) 1143 { 1144 struct ip_msource *ims; 1145 1146 CTR2(KTR_IGMPV3, "%s: commit inm %p", __func__, inm); 1147 CTR1(KTR_IGMPV3, "%s: pre commit:", __func__); 1148 inm_print(inm); 1149 1150 RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) { 1151 ims->ims_st[0] = ims->ims_st[1]; 1152 } 1153 inm->inm_st[0] = inm->inm_st[1]; 1154 } 1155 1156 /* 1157 * Reap unreferenced nodes from an in_multi's filter set. 1158 */ 1159 static void 1160 inm_reap(struct in_multi *inm) 1161 { 1162 struct ip_msource *ims, *tims; 1163 1164 RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) { 1165 if (ims->ims_st[0].ex > 0 || ims->ims_st[0].in > 0 || 1166 ims->ims_st[1].ex > 0 || ims->ims_st[1].in > 0 || 1167 ims->ims_stp != 0) 1168 continue; 1169 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 1170 RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims); 1171 free(ims, M_IPMSOURCE); 1172 inm->inm_nsrc--; 1173 } 1174 } 1175 1176 /* 1177 * Purge all source nodes from an in_multi's filter set. 1178 */ 1179 static void 1180 inm_purge(struct in_multi *inm) 1181 { 1182 struct ip_msource *ims, *tims; 1183 1184 RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) { 1185 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 1186 RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims); 1187 free(ims, M_IPMSOURCE); 1188 inm->inm_nsrc--; 1189 } 1190 } 1191 1192 /* 1193 * Join a multicast group; unlocked entry point. 1194 * 1195 * SMPng: XXX: in_joingroup() is called from in_control() when Giant 1196 * is not held. Fortunately, ifp is unlikely to have been detached 1197 * at this point, so we assume it's OK to recurse. 1198 */ 1199 int 1200 in_joingroup(struct ifnet *ifp, const struct in_addr *gina, 1201 /*const*/ struct in_mfilter *imf, struct in_multi **pinm) 1202 { 1203 int error; 1204 1205 IN_MULTI_LOCK(); 1206 error = in_joingroup_locked(ifp, gina, imf, pinm); 1207 IN_MULTI_UNLOCK(); 1208 1209 return (error); 1210 } 1211 1212 /* 1213 * Join a multicast group; real entry point. 1214 * 1215 * Only preserves atomicity at inm level. 1216 * NOTE: imf argument cannot be const due to sys/tree.h limitations. 1217 * 1218 * If the IGMP downcall fails, the group is not joined, and an error 1219 * code is returned. 1220 */ 1221 int 1222 in_joingroup_locked(struct ifnet *ifp, const struct in_addr *gina, 1223 /*const*/ struct in_mfilter *imf, struct in_multi **pinm) 1224 { 1225 struct in_mfilter timf; 1226 struct in_multi *inm; 1227 int error; 1228 1229 IN_MULTI_LOCK_ASSERT(); 1230 IN_MULTI_LIST_UNLOCK_ASSERT(); 1231 1232 CTR4(KTR_IGMPV3, "%s: join 0x%08x on %p(%s))", __func__, 1233 ntohl(gina->s_addr), ifp, ifp->if_xname); 1234 1235 error = 0; 1236 inm = NULL; 1237 1238 /* 1239 * If no imf was specified (i.e. kernel consumer), 1240 * fake one up and assume it is an ASM join. 1241 */ 1242 if (imf == NULL) { 1243 imf_init(&timf, MCAST_UNDEFINED, MCAST_EXCLUDE); 1244 imf = &timf; 1245 } 1246 1247 error = in_getmulti(ifp, gina, &inm); 1248 if (error) { 1249 CTR1(KTR_IGMPV3, "%s: in_getmulti() failure", __func__); 1250 return (error); 1251 } 1252 IN_MULTI_LIST_LOCK(); 1253 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 1254 error = inm_merge(inm, imf); 1255 if (error) { 1256 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__); 1257 goto out_inm_release; 1258 } 1259 1260 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 1261 error = igmp_change_state(inm); 1262 if (error) { 1263 CTR1(KTR_IGMPV3, "%s: failed to update source", __func__); 1264 goto out_inm_release; 1265 } 1266 1267 out_inm_release: 1268 if (error) { 1269 1270 CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm); 1271 IF_ADDR_WLOCK(ifp); 1272 inm_release_deferred(inm); 1273 IF_ADDR_WUNLOCK(ifp); 1274 } else { 1275 *pinm = inm; 1276 } 1277 IN_MULTI_LIST_UNLOCK(); 1278 1279 return (error); 1280 } 1281 1282 /* 1283 * Leave a multicast group; unlocked entry point. 1284 */ 1285 int 1286 in_leavegroup(struct in_multi *inm, /*const*/ struct in_mfilter *imf) 1287 { 1288 int error; 1289 1290 IN_MULTI_LOCK(); 1291 error = in_leavegroup_locked(inm, imf); 1292 IN_MULTI_UNLOCK(); 1293 1294 return (error); 1295 } 1296 1297 /* 1298 * Leave a multicast group; real entry point. 1299 * All source filters will be expunged. 1300 * 1301 * Only preserves atomicity at inm level. 1302 * 1303 * Holding the write lock for the INP which contains imf 1304 * is highly advisable. We can't assert for it as imf does not 1305 * contain a back-pointer to the owning inp. 1306 * 1307 * Note: This is not the same as inm_release(*) as this function also 1308 * makes a state change downcall into IGMP. 1309 */ 1310 int 1311 in_leavegroup_locked(struct in_multi *inm, /*const*/ struct in_mfilter *imf) 1312 { 1313 struct in_mfilter timf; 1314 int error; 1315 1316 IN_MULTI_LOCK_ASSERT(); 1317 IN_MULTI_LIST_UNLOCK_ASSERT(); 1318 1319 error = 0; 1320 1321 CTR5(KTR_IGMPV3, "%s: leave inm %p, 0x%08x/%s, imf %p", __func__, 1322 inm, ntohl(inm->inm_addr.s_addr), 1323 (inm_is_ifp_detached(inm) ? "null" : inm->inm_ifp->if_xname), 1324 imf); 1325 1326 /* 1327 * If no imf was specified (i.e. kernel consumer), 1328 * fake one up and assume it is an ASM join. 1329 */ 1330 if (imf == NULL) { 1331 imf_init(&timf, MCAST_EXCLUDE, MCAST_UNDEFINED); 1332 imf = &timf; 1333 } 1334 1335 /* 1336 * Begin state merge transaction at IGMP layer. 1337 * 1338 * As this particular invocation should not cause any memory 1339 * to be allocated, and there is no opportunity to roll back 1340 * the transaction, it MUST NOT fail. 1341 */ 1342 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 1343 IN_MULTI_LIST_LOCK(); 1344 error = inm_merge(inm, imf); 1345 KASSERT(error == 0, ("%s: failed to merge inm state", __func__)); 1346 1347 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 1348 CURVNET_SET(inm->inm_ifp->if_vnet); 1349 error = igmp_change_state(inm); 1350 IF_ADDR_WLOCK(inm->inm_ifp); 1351 inm_release_deferred(inm); 1352 IF_ADDR_WUNLOCK(inm->inm_ifp); 1353 IN_MULTI_LIST_UNLOCK(); 1354 CURVNET_RESTORE(); 1355 if (error) 1356 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__); 1357 1358 CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm); 1359 1360 return (error); 1361 } 1362 1363 /*#ifndef BURN_BRIDGES*/ 1364 /* 1365 * Join an IPv4 multicast group in (*,G) exclusive mode. 1366 * The group must be a 224.0.0.0/24 link-scope group. 1367 * This KPI is for legacy kernel consumers only. 1368 */ 1369 struct in_multi * 1370 in_addmulti(struct in_addr *ap, struct ifnet *ifp) 1371 { 1372 struct in_multi *pinm; 1373 int error; 1374 #ifdef INVARIANTS 1375 char addrbuf[INET_ADDRSTRLEN]; 1376 #endif 1377 1378 KASSERT(IN_LOCAL_GROUP(ntohl(ap->s_addr)), 1379 ("%s: %s not in 224.0.0.0/24", __func__, 1380 inet_ntoa_r(*ap, addrbuf))); 1381 1382 error = in_joingroup(ifp, ap, NULL, &pinm); 1383 if (error != 0) 1384 pinm = NULL; 1385 1386 return (pinm); 1387 } 1388 1389 /* 1390 * Block or unblock an ASM multicast source on an inpcb. 1391 * This implements the delta-based API described in RFC 3678. 1392 * 1393 * The delta-based API applies only to exclusive-mode memberships. 1394 * An IGMP downcall will be performed. 1395 * 1396 * SMPng: NOTE: Must take Giant as a join may create a new ifma. 1397 * 1398 * Return 0 if successful, otherwise return an appropriate error code. 1399 */ 1400 static int 1401 inp_block_unblock_source(struct inpcb *inp, struct sockopt *sopt) 1402 { 1403 struct group_source_req gsr; 1404 struct rm_priotracker in_ifa_tracker; 1405 sockunion_t *gsa, *ssa; 1406 struct ifnet *ifp; 1407 struct in_mfilter *imf; 1408 struct ip_moptions *imo; 1409 struct in_msource *ims; 1410 struct in_multi *inm; 1411 uint16_t fmode; 1412 int error, doblock; 1413 1414 ifp = NULL; 1415 error = 0; 1416 doblock = 0; 1417 1418 memset(&gsr, 0, sizeof(struct group_source_req)); 1419 gsa = (sockunion_t *)&gsr.gsr_group; 1420 ssa = (sockunion_t *)&gsr.gsr_source; 1421 1422 switch (sopt->sopt_name) { 1423 case IP_BLOCK_SOURCE: 1424 case IP_UNBLOCK_SOURCE: { 1425 struct ip_mreq_source mreqs; 1426 1427 error = sooptcopyin(sopt, &mreqs, 1428 sizeof(struct ip_mreq_source), 1429 sizeof(struct ip_mreq_source)); 1430 if (error) 1431 return (error); 1432 1433 gsa->sin.sin_family = AF_INET; 1434 gsa->sin.sin_len = sizeof(struct sockaddr_in); 1435 gsa->sin.sin_addr = mreqs.imr_multiaddr; 1436 1437 ssa->sin.sin_family = AF_INET; 1438 ssa->sin.sin_len = sizeof(struct sockaddr_in); 1439 ssa->sin.sin_addr = mreqs.imr_sourceaddr; 1440 1441 if (!in_nullhost(mreqs.imr_interface)) { 1442 IN_IFADDR_RLOCK(&in_ifa_tracker); 1443 INADDR_TO_IFP(mreqs.imr_interface, ifp); 1444 IN_IFADDR_RUNLOCK(&in_ifa_tracker); 1445 } 1446 if (sopt->sopt_name == IP_BLOCK_SOURCE) 1447 doblock = 1; 1448 1449 CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p", 1450 __func__, ntohl(mreqs.imr_interface.s_addr), ifp); 1451 break; 1452 } 1453 1454 case MCAST_BLOCK_SOURCE: 1455 case MCAST_UNBLOCK_SOURCE: 1456 error = sooptcopyin(sopt, &gsr, 1457 sizeof(struct group_source_req), 1458 sizeof(struct group_source_req)); 1459 if (error) 1460 return (error); 1461 1462 if (gsa->sin.sin_family != AF_INET || 1463 gsa->sin.sin_len != sizeof(struct sockaddr_in)) 1464 return (EINVAL); 1465 1466 if (ssa->sin.sin_family != AF_INET || 1467 ssa->sin.sin_len != sizeof(struct sockaddr_in)) 1468 return (EINVAL); 1469 1470 if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface) 1471 return (EADDRNOTAVAIL); 1472 1473 ifp = ifnet_byindex(gsr.gsr_interface); 1474 1475 if (sopt->sopt_name == MCAST_BLOCK_SOURCE) 1476 doblock = 1; 1477 break; 1478 1479 default: 1480 CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d", 1481 __func__, sopt->sopt_name); 1482 return (EOPNOTSUPP); 1483 break; 1484 } 1485 1486 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 1487 return (EINVAL); 1488 1489 IN_MULTI_LOCK(); 1490 1491 /* 1492 * Check if we are actually a member of this group. 1493 */ 1494 imo = inp_findmoptions(inp); 1495 imf = imo_match_group(imo, ifp, &gsa->sa); 1496 if (imf == NULL) { 1497 error = EADDRNOTAVAIL; 1498 goto out_inp_locked; 1499 } 1500 inm = imf->imf_inm; 1501 1502 /* 1503 * Attempting to use the delta-based API on an 1504 * non exclusive-mode membership is an error. 1505 */ 1506 fmode = imf->imf_st[0]; 1507 if (fmode != MCAST_EXCLUDE) { 1508 error = EINVAL; 1509 goto out_inp_locked; 1510 } 1511 1512 /* 1513 * Deal with error cases up-front: 1514 * Asked to block, but already blocked; or 1515 * Asked to unblock, but nothing to unblock. 1516 * If adding a new block entry, allocate it. 1517 */ 1518 ims = imo_match_source(imf, &ssa->sa); 1519 if ((ims != NULL && doblock) || (ims == NULL && !doblock)) { 1520 CTR3(KTR_IGMPV3, "%s: source 0x%08x %spresent", __func__, 1521 ntohl(ssa->sin.sin_addr.s_addr), doblock ? "" : "not "); 1522 error = EADDRNOTAVAIL; 1523 goto out_inp_locked; 1524 } 1525 1526 INP_WLOCK_ASSERT(inp); 1527 1528 /* 1529 * Begin state merge transaction at socket layer. 1530 */ 1531 if (doblock) { 1532 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block"); 1533 ims = imf_graft(imf, fmode, &ssa->sin); 1534 if (ims == NULL) 1535 error = ENOMEM; 1536 } else { 1537 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow"); 1538 error = imf_prune(imf, &ssa->sin); 1539 } 1540 1541 if (error) { 1542 CTR1(KTR_IGMPV3, "%s: merge imf state failed", __func__); 1543 goto out_imf_rollback; 1544 } 1545 1546 /* 1547 * Begin state merge transaction at IGMP layer. 1548 */ 1549 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 1550 IN_MULTI_LIST_LOCK(); 1551 error = inm_merge(inm, imf); 1552 if (error) { 1553 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__); 1554 IN_MULTI_LIST_UNLOCK(); 1555 goto out_imf_rollback; 1556 } 1557 1558 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 1559 error = igmp_change_state(inm); 1560 IN_MULTI_LIST_UNLOCK(); 1561 if (error) 1562 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__); 1563 1564 out_imf_rollback: 1565 if (error) 1566 imf_rollback(imf); 1567 else 1568 imf_commit(imf); 1569 1570 imf_reap(imf); 1571 1572 out_inp_locked: 1573 INP_WUNLOCK(inp); 1574 IN_MULTI_UNLOCK(); 1575 return (error); 1576 } 1577 1578 /* 1579 * Given an inpcb, return its multicast options structure pointer. Accepts 1580 * an unlocked inpcb pointer, but will return it locked. May sleep. 1581 * 1582 * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held. 1583 * SMPng: NOTE: Returns with the INP write lock held. 1584 */ 1585 static struct ip_moptions * 1586 inp_findmoptions(struct inpcb *inp) 1587 { 1588 struct ip_moptions *imo; 1589 1590 INP_WLOCK(inp); 1591 if (inp->inp_moptions != NULL) 1592 return (inp->inp_moptions); 1593 1594 INP_WUNLOCK(inp); 1595 1596 imo = malloc(sizeof(*imo), M_IPMOPTS, M_WAITOK); 1597 1598 imo->imo_multicast_ifp = NULL; 1599 imo->imo_multicast_addr.s_addr = INADDR_ANY; 1600 imo->imo_multicast_vif = -1; 1601 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1602 imo->imo_multicast_loop = in_mcast_loop; 1603 STAILQ_INIT(&imo->imo_head); 1604 1605 INP_WLOCK(inp); 1606 if (inp->inp_moptions != NULL) { 1607 free(imo, M_IPMOPTS); 1608 return (inp->inp_moptions); 1609 } 1610 inp->inp_moptions = imo; 1611 return (imo); 1612 } 1613 1614 static void 1615 inp_gcmoptions(struct ip_moptions *imo) 1616 { 1617 struct in_mfilter *imf; 1618 struct in_multi *inm; 1619 struct ifnet *ifp; 1620 1621 while ((imf = ip_mfilter_first(&imo->imo_head)) != NULL) { 1622 ip_mfilter_remove(&imo->imo_head, imf); 1623 1624 imf_leave(imf); 1625 if ((inm = imf->imf_inm) != NULL) { 1626 if ((ifp = inm->inm_ifp) != NULL) { 1627 CURVNET_SET(ifp->if_vnet); 1628 (void)in_leavegroup(inm, imf); 1629 CURVNET_RESTORE(); 1630 } else { 1631 (void)in_leavegroup(inm, imf); 1632 } 1633 } 1634 ip_mfilter_free(imf); 1635 } 1636 free(imo, M_IPMOPTS); 1637 } 1638 1639 /* 1640 * Discard the IP multicast options (and source filters). To minimize 1641 * the amount of work done while holding locks such as the INP's 1642 * pcbinfo lock (which is used in the receive path), the free 1643 * operation is deferred to the epoch callback task. 1644 */ 1645 void 1646 inp_freemoptions(struct ip_moptions *imo) 1647 { 1648 if (imo == NULL) 1649 return; 1650 inp_gcmoptions(imo); 1651 } 1652 1653 /* 1654 * Atomically get source filters on a socket for an IPv4 multicast group. 1655 * Called with INP lock held; returns with lock released. 1656 */ 1657 static int 1658 inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt) 1659 { 1660 struct __msfilterreq msfr; 1661 sockunion_t *gsa; 1662 struct ifnet *ifp; 1663 struct ip_moptions *imo; 1664 struct in_mfilter *imf; 1665 struct ip_msource *ims; 1666 struct in_msource *lims; 1667 struct sockaddr_in *psin; 1668 struct sockaddr_storage *ptss; 1669 struct sockaddr_storage *tss; 1670 int error; 1671 size_t nsrcs, ncsrcs; 1672 1673 INP_WLOCK_ASSERT(inp); 1674 1675 imo = inp->inp_moptions; 1676 KASSERT(imo != NULL, ("%s: null ip_moptions", __func__)); 1677 1678 INP_WUNLOCK(inp); 1679 1680 error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq), 1681 sizeof(struct __msfilterreq)); 1682 if (error) 1683 return (error); 1684 1685 if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex) 1686 return (EINVAL); 1687 1688 ifp = ifnet_byindex(msfr.msfr_ifindex); 1689 if (ifp == NULL) 1690 return (EINVAL); 1691 1692 INP_WLOCK(inp); 1693 1694 /* 1695 * Lookup group on the socket. 1696 */ 1697 gsa = (sockunion_t *)&msfr.msfr_group; 1698 imf = imo_match_group(imo, ifp, &gsa->sa); 1699 if (imf == NULL) { 1700 INP_WUNLOCK(inp); 1701 return (EADDRNOTAVAIL); 1702 } 1703 1704 /* 1705 * Ignore memberships which are in limbo. 1706 */ 1707 if (imf->imf_st[1] == MCAST_UNDEFINED) { 1708 INP_WUNLOCK(inp); 1709 return (EAGAIN); 1710 } 1711 msfr.msfr_fmode = imf->imf_st[1]; 1712 1713 /* 1714 * If the user specified a buffer, copy out the source filter 1715 * entries to userland gracefully. 1716 * We only copy out the number of entries which userland 1717 * has asked for, but we always tell userland how big the 1718 * buffer really needs to be. 1719 */ 1720 if (msfr.msfr_nsrcs > in_mcast_maxsocksrc) 1721 msfr.msfr_nsrcs = in_mcast_maxsocksrc; 1722 tss = NULL; 1723 if (msfr.msfr_srcs != NULL && msfr.msfr_nsrcs > 0) { 1724 tss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs, 1725 M_TEMP, M_NOWAIT | M_ZERO); 1726 if (tss == NULL) { 1727 INP_WUNLOCK(inp); 1728 return (ENOBUFS); 1729 } 1730 } 1731 1732 /* 1733 * Count number of sources in-mode at t0. 1734 * If buffer space exists and remains, copy out source entries. 1735 */ 1736 nsrcs = msfr.msfr_nsrcs; 1737 ncsrcs = 0; 1738 ptss = tss; 1739 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 1740 lims = (struct in_msource *)ims; 1741 if (lims->imsl_st[0] == MCAST_UNDEFINED || 1742 lims->imsl_st[0] != imf->imf_st[0]) 1743 continue; 1744 ++ncsrcs; 1745 if (tss != NULL && nsrcs > 0) { 1746 psin = (struct sockaddr_in *)ptss; 1747 psin->sin_family = AF_INET; 1748 psin->sin_len = sizeof(struct sockaddr_in); 1749 psin->sin_addr.s_addr = htonl(lims->ims_haddr); 1750 psin->sin_port = 0; 1751 ++ptss; 1752 --nsrcs; 1753 } 1754 } 1755 1756 INP_WUNLOCK(inp); 1757 1758 if (tss != NULL) { 1759 error = copyout(tss, msfr.msfr_srcs, 1760 sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs); 1761 free(tss, M_TEMP); 1762 if (error) 1763 return (error); 1764 } 1765 1766 msfr.msfr_nsrcs = ncsrcs; 1767 error = sooptcopyout(sopt, &msfr, sizeof(struct __msfilterreq)); 1768 1769 return (error); 1770 } 1771 1772 /* 1773 * Return the IP multicast options in response to user getsockopt(). 1774 */ 1775 int 1776 inp_getmoptions(struct inpcb *inp, struct sockopt *sopt) 1777 { 1778 struct rm_priotracker in_ifa_tracker; 1779 struct ip_mreqn mreqn; 1780 struct ip_moptions *imo; 1781 struct ifnet *ifp; 1782 struct in_ifaddr *ia; 1783 int error, optval; 1784 u_char coptval; 1785 1786 INP_WLOCK(inp); 1787 imo = inp->inp_moptions; 1788 /* 1789 * If socket is neither of type SOCK_RAW or SOCK_DGRAM, 1790 * or is a divert socket, reject it. 1791 */ 1792 if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT || 1793 (inp->inp_socket->so_proto->pr_type != SOCK_RAW && 1794 inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) { 1795 INP_WUNLOCK(inp); 1796 return (EOPNOTSUPP); 1797 } 1798 1799 error = 0; 1800 switch (sopt->sopt_name) { 1801 case IP_MULTICAST_VIF: 1802 if (imo != NULL) 1803 optval = imo->imo_multicast_vif; 1804 else 1805 optval = -1; 1806 INP_WUNLOCK(inp); 1807 error = sooptcopyout(sopt, &optval, sizeof(int)); 1808 break; 1809 1810 case IP_MULTICAST_IF: 1811 memset(&mreqn, 0, sizeof(struct ip_mreqn)); 1812 if (imo != NULL) { 1813 ifp = imo->imo_multicast_ifp; 1814 if (!in_nullhost(imo->imo_multicast_addr)) { 1815 mreqn.imr_address = imo->imo_multicast_addr; 1816 } else if (ifp != NULL) { 1817 struct epoch_tracker et; 1818 1819 mreqn.imr_ifindex = ifp->if_index; 1820 NET_EPOCH_ENTER(et); 1821 IFP_TO_IA(ifp, ia, &in_ifa_tracker); 1822 if (ia != NULL) 1823 mreqn.imr_address = 1824 IA_SIN(ia)->sin_addr; 1825 NET_EPOCH_EXIT(et); 1826 } 1827 } 1828 INP_WUNLOCK(inp); 1829 if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) { 1830 error = sooptcopyout(sopt, &mreqn, 1831 sizeof(struct ip_mreqn)); 1832 } else { 1833 error = sooptcopyout(sopt, &mreqn.imr_address, 1834 sizeof(struct in_addr)); 1835 } 1836 break; 1837 1838 case IP_MULTICAST_TTL: 1839 if (imo == NULL) 1840 optval = coptval = IP_DEFAULT_MULTICAST_TTL; 1841 else 1842 optval = coptval = imo->imo_multicast_ttl; 1843 INP_WUNLOCK(inp); 1844 if (sopt->sopt_valsize == sizeof(u_char)) 1845 error = sooptcopyout(sopt, &coptval, sizeof(u_char)); 1846 else 1847 error = sooptcopyout(sopt, &optval, sizeof(int)); 1848 break; 1849 1850 case IP_MULTICAST_LOOP: 1851 if (imo == NULL) 1852 optval = coptval = IP_DEFAULT_MULTICAST_LOOP; 1853 else 1854 optval = coptval = imo->imo_multicast_loop; 1855 INP_WUNLOCK(inp); 1856 if (sopt->sopt_valsize == sizeof(u_char)) 1857 error = sooptcopyout(sopt, &coptval, sizeof(u_char)); 1858 else 1859 error = sooptcopyout(sopt, &optval, sizeof(int)); 1860 break; 1861 1862 case IP_MSFILTER: 1863 if (imo == NULL) { 1864 error = EADDRNOTAVAIL; 1865 INP_WUNLOCK(inp); 1866 } else { 1867 error = inp_get_source_filters(inp, sopt); 1868 } 1869 break; 1870 1871 default: 1872 INP_WUNLOCK(inp); 1873 error = ENOPROTOOPT; 1874 break; 1875 } 1876 1877 INP_UNLOCK_ASSERT(inp); 1878 1879 return (error); 1880 } 1881 1882 /* 1883 * Look up the ifnet to use for a multicast group membership, 1884 * given the IPv4 address of an interface, and the IPv4 group address. 1885 * 1886 * This routine exists to support legacy multicast applications 1887 * which do not understand that multicast memberships are scoped to 1888 * specific physical links in the networking stack, or which need 1889 * to join link-scope groups before IPv4 addresses are configured. 1890 * 1891 * If inp is non-NULL, use this socket's current FIB number for any 1892 * required FIB lookup. 1893 * If ina is INADDR_ANY, look up the group address in the unicast FIB, 1894 * and use its ifp; usually, this points to the default next-hop. 1895 * 1896 * If the FIB lookup fails, attempt to use the first non-loopback 1897 * interface with multicast capability in the system as a 1898 * last resort. The legacy IPv4 ASM API requires that we do 1899 * this in order to allow groups to be joined when the routing 1900 * table has not yet been populated during boot. 1901 * 1902 * Returns NULL if no ifp could be found. 1903 * 1904 * FUTURE: Implement IPv4 source-address selection. 1905 */ 1906 static struct ifnet * 1907 inp_lookup_mcast_ifp(const struct inpcb *inp, 1908 const struct sockaddr_in *gsin, const struct in_addr ina) 1909 { 1910 struct rm_priotracker in_ifa_tracker; 1911 struct ifnet *ifp; 1912 struct nhop4_basic nh4; 1913 uint32_t fibnum; 1914 1915 KASSERT(gsin->sin_family == AF_INET, ("%s: not AF_INET", __func__)); 1916 KASSERT(IN_MULTICAST(ntohl(gsin->sin_addr.s_addr)), 1917 ("%s: not multicast", __func__)); 1918 1919 ifp = NULL; 1920 if (!in_nullhost(ina)) { 1921 IN_IFADDR_RLOCK(&in_ifa_tracker); 1922 INADDR_TO_IFP(ina, ifp); 1923 IN_IFADDR_RUNLOCK(&in_ifa_tracker); 1924 } else { 1925 fibnum = inp ? inp->inp_inc.inc_fibnum : 0; 1926 if (fib4_lookup_nh_basic(fibnum, gsin->sin_addr, 0, 0, &nh4)==0) 1927 ifp = nh4.nh_ifp; 1928 else { 1929 struct in_ifaddr *ia; 1930 struct ifnet *mifp; 1931 1932 mifp = NULL; 1933 IN_IFADDR_RLOCK(&in_ifa_tracker); 1934 CK_STAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { 1935 mifp = ia->ia_ifp; 1936 if (!(mifp->if_flags & IFF_LOOPBACK) && 1937 (mifp->if_flags & IFF_MULTICAST)) { 1938 ifp = mifp; 1939 break; 1940 } 1941 } 1942 IN_IFADDR_RUNLOCK(&in_ifa_tracker); 1943 } 1944 } 1945 1946 return (ifp); 1947 } 1948 1949 /* 1950 * Join an IPv4 multicast group, possibly with a source. 1951 */ 1952 static int 1953 inp_join_group(struct inpcb *inp, struct sockopt *sopt) 1954 { 1955 struct group_source_req gsr; 1956 sockunion_t *gsa, *ssa; 1957 struct ifnet *ifp; 1958 struct in_mfilter *imf; 1959 struct ip_moptions *imo; 1960 struct in_multi *inm; 1961 struct in_msource *lims; 1962 int error, is_new; 1963 1964 ifp = NULL; 1965 lims = NULL; 1966 error = 0; 1967 1968 memset(&gsr, 0, sizeof(struct group_source_req)); 1969 gsa = (sockunion_t *)&gsr.gsr_group; 1970 gsa->ss.ss_family = AF_UNSPEC; 1971 ssa = (sockunion_t *)&gsr.gsr_source; 1972 ssa->ss.ss_family = AF_UNSPEC; 1973 1974 switch (sopt->sopt_name) { 1975 case IP_ADD_MEMBERSHIP: { 1976 struct ip_mreqn mreqn; 1977 1978 if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) 1979 error = sooptcopyin(sopt, &mreqn, 1980 sizeof(struct ip_mreqn), sizeof(struct ip_mreqn)); 1981 else 1982 error = sooptcopyin(sopt, &mreqn, 1983 sizeof(struct ip_mreq), sizeof(struct ip_mreq)); 1984 if (error) 1985 return (error); 1986 1987 gsa->sin.sin_family = AF_INET; 1988 gsa->sin.sin_len = sizeof(struct sockaddr_in); 1989 gsa->sin.sin_addr = mreqn.imr_multiaddr; 1990 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 1991 return (EINVAL); 1992 1993 if (sopt->sopt_valsize == sizeof(struct ip_mreqn) && 1994 mreqn.imr_ifindex != 0) 1995 ifp = ifnet_byindex(mreqn.imr_ifindex); 1996 else 1997 ifp = inp_lookup_mcast_ifp(inp, &gsa->sin, 1998 mreqn.imr_address); 1999 break; 2000 } 2001 case IP_ADD_SOURCE_MEMBERSHIP: { 2002 struct ip_mreq_source mreqs; 2003 2004 error = sooptcopyin(sopt, &mreqs, sizeof(struct ip_mreq_source), 2005 sizeof(struct ip_mreq_source)); 2006 if (error) 2007 return (error); 2008 2009 gsa->sin.sin_family = ssa->sin.sin_family = AF_INET; 2010 gsa->sin.sin_len = ssa->sin.sin_len = 2011 sizeof(struct sockaddr_in); 2012 2013 gsa->sin.sin_addr = mreqs.imr_multiaddr; 2014 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2015 return (EINVAL); 2016 2017 ssa->sin.sin_addr = mreqs.imr_sourceaddr; 2018 2019 ifp = inp_lookup_mcast_ifp(inp, &gsa->sin, 2020 mreqs.imr_interface); 2021 CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p", 2022 __func__, ntohl(mreqs.imr_interface.s_addr), ifp); 2023 break; 2024 } 2025 2026 case MCAST_JOIN_GROUP: 2027 case MCAST_JOIN_SOURCE_GROUP: 2028 if (sopt->sopt_name == MCAST_JOIN_GROUP) { 2029 error = sooptcopyin(sopt, &gsr, 2030 sizeof(struct group_req), 2031 sizeof(struct group_req)); 2032 } else if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) { 2033 error = sooptcopyin(sopt, &gsr, 2034 sizeof(struct group_source_req), 2035 sizeof(struct group_source_req)); 2036 } 2037 if (error) 2038 return (error); 2039 2040 if (gsa->sin.sin_family != AF_INET || 2041 gsa->sin.sin_len != sizeof(struct sockaddr_in)) 2042 return (EINVAL); 2043 2044 /* 2045 * Overwrite the port field if present, as the sockaddr 2046 * being copied in may be matched with a binary comparison. 2047 */ 2048 gsa->sin.sin_port = 0; 2049 if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) { 2050 if (ssa->sin.sin_family != AF_INET || 2051 ssa->sin.sin_len != sizeof(struct sockaddr_in)) 2052 return (EINVAL); 2053 ssa->sin.sin_port = 0; 2054 } 2055 2056 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2057 return (EINVAL); 2058 2059 if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface) 2060 return (EADDRNOTAVAIL); 2061 ifp = ifnet_byindex(gsr.gsr_interface); 2062 break; 2063 2064 default: 2065 CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d", 2066 __func__, sopt->sopt_name); 2067 return (EOPNOTSUPP); 2068 break; 2069 } 2070 2071 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) 2072 return (EADDRNOTAVAIL); 2073 2074 IN_MULTI_LOCK(); 2075 2076 /* 2077 * Find the membership in the membership list. 2078 */ 2079 imo = inp_findmoptions(inp); 2080 imf = imo_match_group(imo, ifp, &gsa->sa); 2081 if (imf == NULL) { 2082 is_new = 1; 2083 inm = NULL; 2084 2085 if (ip_mfilter_count(&imo->imo_head) >= IP_MAX_MEMBERSHIPS) { 2086 error = ENOMEM; 2087 goto out_inp_locked; 2088 } 2089 } else { 2090 is_new = 0; 2091 inm = imf->imf_inm; 2092 2093 if (ssa->ss.ss_family != AF_UNSPEC) { 2094 /* 2095 * MCAST_JOIN_SOURCE_GROUP on an exclusive membership 2096 * is an error. On an existing inclusive membership, 2097 * it just adds the source to the filter list. 2098 */ 2099 if (imf->imf_st[1] != MCAST_INCLUDE) { 2100 error = EINVAL; 2101 goto out_inp_locked; 2102 } 2103 /* 2104 * Throw out duplicates. 2105 * 2106 * XXX FIXME: This makes a naive assumption that 2107 * even if entries exist for *ssa in this imf, 2108 * they will be rejected as dupes, even if they 2109 * are not valid in the current mode (in-mode). 2110 * 2111 * in_msource is transactioned just as for anything 2112 * else in SSM -- but note naive use of inm_graft() 2113 * below for allocating new filter entries. 2114 * 2115 * This is only an issue if someone mixes the 2116 * full-state SSM API with the delta-based API, 2117 * which is discouraged in the relevant RFCs. 2118 */ 2119 lims = imo_match_source(imf, &ssa->sa); 2120 if (lims != NULL /*&& 2121 lims->imsl_st[1] == MCAST_INCLUDE*/) { 2122 error = EADDRNOTAVAIL; 2123 goto out_inp_locked; 2124 } 2125 } else { 2126 /* 2127 * MCAST_JOIN_GROUP on an existing exclusive 2128 * membership is an error; return EADDRINUSE 2129 * to preserve 4.4BSD API idempotence, and 2130 * avoid tedious detour to code below. 2131 * NOTE: This is bending RFC 3678 a bit. 2132 * 2133 * On an existing inclusive membership, this is also 2134 * an error; if you want to change filter mode, 2135 * you must use the userland API setsourcefilter(). 2136 * XXX We don't reject this for imf in UNDEFINED 2137 * state at t1, because allocation of a filter 2138 * is atomic with allocation of a membership. 2139 */ 2140 error = EINVAL; 2141 if (imf->imf_st[1] == MCAST_EXCLUDE) 2142 error = EADDRINUSE; 2143 goto out_inp_locked; 2144 } 2145 } 2146 2147 /* 2148 * Begin state merge transaction at socket layer. 2149 */ 2150 INP_WLOCK_ASSERT(inp); 2151 2152 /* 2153 * Graft new source into filter list for this inpcb's 2154 * membership of the group. The in_multi may not have 2155 * been allocated yet if this is a new membership, however, 2156 * the in_mfilter slot will be allocated and must be initialized. 2157 * 2158 * Note: Grafting of exclusive mode filters doesn't happen 2159 * in this path. 2160 * XXX: Should check for non-NULL lims (node exists but may 2161 * not be in-mode) for interop with full-state API. 2162 */ 2163 if (ssa->ss.ss_family != AF_UNSPEC) { 2164 /* Membership starts in IN mode */ 2165 if (is_new) { 2166 CTR1(KTR_IGMPV3, "%s: new join w/source", __func__); 2167 imf = ip_mfilter_alloc(M_NOWAIT, MCAST_UNDEFINED, MCAST_INCLUDE); 2168 if (imf == NULL) { 2169 error = ENOMEM; 2170 goto out_inp_locked; 2171 } 2172 } else { 2173 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow"); 2174 } 2175 lims = imf_graft(imf, MCAST_INCLUDE, &ssa->sin); 2176 if (lims == NULL) { 2177 CTR1(KTR_IGMPV3, "%s: merge imf state failed", 2178 __func__); 2179 error = ENOMEM; 2180 goto out_inp_locked; 2181 } 2182 } else { 2183 /* No address specified; Membership starts in EX mode */ 2184 if (is_new) { 2185 CTR1(KTR_IGMPV3, "%s: new join w/o source", __func__); 2186 imf = ip_mfilter_alloc(M_NOWAIT, MCAST_UNDEFINED, MCAST_EXCLUDE); 2187 if (imf == NULL) { 2188 error = ENOMEM; 2189 goto out_inp_locked; 2190 } 2191 } 2192 } 2193 2194 /* 2195 * Begin state merge transaction at IGMP layer. 2196 */ 2197 if (is_new) { 2198 in_pcbref(inp); 2199 INP_WUNLOCK(inp); 2200 2201 error = in_joingroup_locked(ifp, &gsa->sin.sin_addr, imf, 2202 &imf->imf_inm); 2203 2204 INP_WLOCK(inp); 2205 if (in_pcbrele_wlocked(inp)) { 2206 error = ENXIO; 2207 goto out_inp_unlocked; 2208 } 2209 if (error) { 2210 CTR1(KTR_IGMPV3, "%s: in_joingroup_locked failed", 2211 __func__); 2212 goto out_inp_locked; 2213 } 2214 /* 2215 * NOTE: Refcount from in_joingroup_locked() 2216 * is protecting membership. 2217 */ 2218 ip_mfilter_insert(&imo->imo_head, imf); 2219 } else { 2220 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 2221 IN_MULTI_LIST_LOCK(); 2222 error = inm_merge(inm, imf); 2223 if (error) { 2224 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", 2225 __func__); 2226 IN_MULTI_LIST_UNLOCK(); 2227 imf_rollback(imf); 2228 imf_reap(imf); 2229 goto out_inp_locked; 2230 } 2231 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 2232 error = igmp_change_state(inm); 2233 IN_MULTI_LIST_UNLOCK(); 2234 if (error) { 2235 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", 2236 __func__); 2237 imf_rollback(imf); 2238 imf_reap(imf); 2239 goto out_inp_locked; 2240 } 2241 } 2242 2243 imf_commit(imf); 2244 imf = NULL; 2245 2246 out_inp_locked: 2247 INP_WUNLOCK(inp); 2248 out_inp_unlocked: 2249 IN_MULTI_UNLOCK(); 2250 2251 if (is_new && imf) { 2252 if (imf->imf_inm != NULL) { 2253 IN_MULTI_LIST_LOCK(); 2254 IF_ADDR_WLOCK(ifp); 2255 inm_release_deferred(imf->imf_inm); 2256 IF_ADDR_WUNLOCK(ifp); 2257 IN_MULTI_LIST_UNLOCK(); 2258 } 2259 ip_mfilter_free(imf); 2260 } 2261 return (error); 2262 } 2263 2264 /* 2265 * Leave an IPv4 multicast group on an inpcb, possibly with a source. 2266 */ 2267 static int 2268 inp_leave_group(struct inpcb *inp, struct sockopt *sopt) 2269 { 2270 struct group_source_req gsr; 2271 struct ip_mreq_source mreqs; 2272 struct rm_priotracker in_ifa_tracker; 2273 sockunion_t *gsa, *ssa; 2274 struct ifnet *ifp; 2275 struct in_mfilter *imf; 2276 struct ip_moptions *imo; 2277 struct in_msource *ims; 2278 struct in_multi *inm; 2279 int error; 2280 bool is_final; 2281 2282 ifp = NULL; 2283 error = 0; 2284 is_final = true; 2285 2286 memset(&gsr, 0, sizeof(struct group_source_req)); 2287 gsa = (sockunion_t *)&gsr.gsr_group; 2288 gsa->ss.ss_family = AF_UNSPEC; 2289 ssa = (sockunion_t *)&gsr.gsr_source; 2290 ssa->ss.ss_family = AF_UNSPEC; 2291 2292 switch (sopt->sopt_name) { 2293 case IP_DROP_MEMBERSHIP: 2294 case IP_DROP_SOURCE_MEMBERSHIP: 2295 if (sopt->sopt_name == IP_DROP_MEMBERSHIP) { 2296 error = sooptcopyin(sopt, &mreqs, 2297 sizeof(struct ip_mreq), 2298 sizeof(struct ip_mreq)); 2299 /* 2300 * Swap interface and sourceaddr arguments, 2301 * as ip_mreq and ip_mreq_source are laid 2302 * out differently. 2303 */ 2304 mreqs.imr_interface = mreqs.imr_sourceaddr; 2305 mreqs.imr_sourceaddr.s_addr = INADDR_ANY; 2306 } else if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) { 2307 error = sooptcopyin(sopt, &mreqs, 2308 sizeof(struct ip_mreq_source), 2309 sizeof(struct ip_mreq_source)); 2310 } 2311 if (error) 2312 return (error); 2313 2314 gsa->sin.sin_family = AF_INET; 2315 gsa->sin.sin_len = sizeof(struct sockaddr_in); 2316 gsa->sin.sin_addr = mreqs.imr_multiaddr; 2317 2318 if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) { 2319 ssa->sin.sin_family = AF_INET; 2320 ssa->sin.sin_len = sizeof(struct sockaddr_in); 2321 ssa->sin.sin_addr = mreqs.imr_sourceaddr; 2322 } 2323 2324 /* 2325 * Attempt to look up hinted ifp from interface address. 2326 * Fallthrough with null ifp iff lookup fails, to 2327 * preserve 4.4BSD mcast API idempotence. 2328 * XXX NOTE WELL: The RFC 3678 API is preferred because 2329 * using an IPv4 address as a key is racy. 2330 */ 2331 if (!in_nullhost(mreqs.imr_interface)) { 2332 IN_IFADDR_RLOCK(&in_ifa_tracker); 2333 INADDR_TO_IFP(mreqs.imr_interface, ifp); 2334 IN_IFADDR_RUNLOCK(&in_ifa_tracker); 2335 } 2336 CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p", 2337 __func__, ntohl(mreqs.imr_interface.s_addr), ifp); 2338 2339 break; 2340 2341 case MCAST_LEAVE_GROUP: 2342 case MCAST_LEAVE_SOURCE_GROUP: 2343 if (sopt->sopt_name == MCAST_LEAVE_GROUP) { 2344 error = sooptcopyin(sopt, &gsr, 2345 sizeof(struct group_req), 2346 sizeof(struct group_req)); 2347 } else if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) { 2348 error = sooptcopyin(sopt, &gsr, 2349 sizeof(struct group_source_req), 2350 sizeof(struct group_source_req)); 2351 } 2352 if (error) 2353 return (error); 2354 2355 if (gsa->sin.sin_family != AF_INET || 2356 gsa->sin.sin_len != sizeof(struct sockaddr_in)) 2357 return (EINVAL); 2358 2359 if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) { 2360 if (ssa->sin.sin_family != AF_INET || 2361 ssa->sin.sin_len != sizeof(struct sockaddr_in)) 2362 return (EINVAL); 2363 } 2364 2365 if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface) 2366 return (EADDRNOTAVAIL); 2367 2368 ifp = ifnet_byindex(gsr.gsr_interface); 2369 2370 if (ifp == NULL) 2371 return (EADDRNOTAVAIL); 2372 break; 2373 2374 default: 2375 CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d", 2376 __func__, sopt->sopt_name); 2377 return (EOPNOTSUPP); 2378 break; 2379 } 2380 2381 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2382 return (EINVAL); 2383 2384 IN_MULTI_LOCK(); 2385 2386 /* 2387 * Find the membership in the membership list. 2388 */ 2389 imo = inp_findmoptions(inp); 2390 imf = imo_match_group(imo, ifp, &gsa->sa); 2391 if (imf == NULL) { 2392 error = EADDRNOTAVAIL; 2393 goto out_inp_locked; 2394 } 2395 inm = imf->imf_inm; 2396 2397 if (ssa->ss.ss_family != AF_UNSPEC) 2398 is_final = false; 2399 2400 /* 2401 * Begin state merge transaction at socket layer. 2402 */ 2403 INP_WLOCK_ASSERT(inp); 2404 2405 /* 2406 * If we were instructed only to leave a given source, do so. 2407 * MCAST_LEAVE_SOURCE_GROUP is only valid for inclusive memberships. 2408 */ 2409 if (is_final) { 2410 ip_mfilter_remove(&imo->imo_head, imf); 2411 imf_leave(imf); 2412 2413 /* 2414 * Give up the multicast address record to which 2415 * the membership points. 2416 */ 2417 (void) in_leavegroup_locked(imf->imf_inm, imf); 2418 } else { 2419 if (imf->imf_st[0] == MCAST_EXCLUDE) { 2420 error = EADDRNOTAVAIL; 2421 goto out_inp_locked; 2422 } 2423 ims = imo_match_source(imf, &ssa->sa); 2424 if (ims == NULL) { 2425 CTR3(KTR_IGMPV3, "%s: source 0x%08x %spresent", 2426 __func__, ntohl(ssa->sin.sin_addr.s_addr), "not "); 2427 error = EADDRNOTAVAIL; 2428 goto out_inp_locked; 2429 } 2430 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block"); 2431 error = imf_prune(imf, &ssa->sin); 2432 if (error) { 2433 CTR1(KTR_IGMPV3, "%s: merge imf state failed", 2434 __func__); 2435 goto out_inp_locked; 2436 } 2437 } 2438 2439 /* 2440 * Begin state merge transaction at IGMP layer. 2441 */ 2442 if (!is_final) { 2443 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 2444 IN_MULTI_LIST_LOCK(); 2445 error = inm_merge(inm, imf); 2446 if (error) { 2447 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", 2448 __func__); 2449 IN_MULTI_LIST_UNLOCK(); 2450 imf_rollback(imf); 2451 imf_reap(imf); 2452 goto out_inp_locked; 2453 } 2454 2455 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 2456 error = igmp_change_state(inm); 2457 IN_MULTI_LIST_UNLOCK(); 2458 if (error) { 2459 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", 2460 __func__); 2461 imf_rollback(imf); 2462 imf_reap(imf); 2463 goto out_inp_locked; 2464 } 2465 } 2466 imf_commit(imf); 2467 imf_reap(imf); 2468 2469 out_inp_locked: 2470 INP_WUNLOCK(inp); 2471 2472 if (is_final && imf) 2473 ip_mfilter_free(imf); 2474 2475 IN_MULTI_UNLOCK(); 2476 return (error); 2477 } 2478 2479 /* 2480 * Select the interface for transmitting IPv4 multicast datagrams. 2481 * 2482 * Either an instance of struct in_addr or an instance of struct ip_mreqn 2483 * may be passed to this socket option. An address of INADDR_ANY or an 2484 * interface index of 0 is used to remove a previous selection. 2485 * When no interface is selected, one is chosen for every send. 2486 */ 2487 static int 2488 inp_set_multicast_if(struct inpcb *inp, struct sockopt *sopt) 2489 { 2490 struct rm_priotracker in_ifa_tracker; 2491 struct in_addr addr; 2492 struct ip_mreqn mreqn; 2493 struct ifnet *ifp; 2494 struct ip_moptions *imo; 2495 int error; 2496 2497 if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) { 2498 /* 2499 * An interface index was specified using the 2500 * Linux-derived ip_mreqn structure. 2501 */ 2502 error = sooptcopyin(sopt, &mreqn, sizeof(struct ip_mreqn), 2503 sizeof(struct ip_mreqn)); 2504 if (error) 2505 return (error); 2506 2507 if (mreqn.imr_ifindex < 0 || V_if_index < mreqn.imr_ifindex) 2508 return (EINVAL); 2509 2510 if (mreqn.imr_ifindex == 0) { 2511 ifp = NULL; 2512 } else { 2513 ifp = ifnet_byindex(mreqn.imr_ifindex); 2514 if (ifp == NULL) 2515 return (EADDRNOTAVAIL); 2516 } 2517 } else { 2518 /* 2519 * An interface was specified by IPv4 address. 2520 * This is the traditional BSD usage. 2521 */ 2522 error = sooptcopyin(sopt, &addr, sizeof(struct in_addr), 2523 sizeof(struct in_addr)); 2524 if (error) 2525 return (error); 2526 if (in_nullhost(addr)) { 2527 ifp = NULL; 2528 } else { 2529 IN_IFADDR_RLOCK(&in_ifa_tracker); 2530 INADDR_TO_IFP(addr, ifp); 2531 IN_IFADDR_RUNLOCK(&in_ifa_tracker); 2532 if (ifp == NULL) 2533 return (EADDRNOTAVAIL); 2534 } 2535 CTR3(KTR_IGMPV3, "%s: ifp = %p, addr = 0x%08x", __func__, ifp, 2536 ntohl(addr.s_addr)); 2537 } 2538 2539 /* Reject interfaces which do not support multicast. */ 2540 if (ifp != NULL && (ifp->if_flags & IFF_MULTICAST) == 0) 2541 return (EOPNOTSUPP); 2542 2543 imo = inp_findmoptions(inp); 2544 imo->imo_multicast_ifp = ifp; 2545 imo->imo_multicast_addr.s_addr = INADDR_ANY; 2546 INP_WUNLOCK(inp); 2547 2548 return (0); 2549 } 2550 2551 /* 2552 * Atomically set source filters on a socket for an IPv4 multicast group. 2553 * 2554 * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held. 2555 */ 2556 static int 2557 inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt) 2558 { 2559 struct __msfilterreq msfr; 2560 sockunion_t *gsa; 2561 struct ifnet *ifp; 2562 struct in_mfilter *imf; 2563 struct ip_moptions *imo; 2564 struct in_multi *inm; 2565 int error; 2566 2567 error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq), 2568 sizeof(struct __msfilterreq)); 2569 if (error) 2570 return (error); 2571 2572 if (msfr.msfr_nsrcs > in_mcast_maxsocksrc) 2573 return (ENOBUFS); 2574 2575 if ((msfr.msfr_fmode != MCAST_EXCLUDE && 2576 msfr.msfr_fmode != MCAST_INCLUDE)) 2577 return (EINVAL); 2578 2579 if (msfr.msfr_group.ss_family != AF_INET || 2580 msfr.msfr_group.ss_len != sizeof(struct sockaddr_in)) 2581 return (EINVAL); 2582 2583 gsa = (sockunion_t *)&msfr.msfr_group; 2584 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2585 return (EINVAL); 2586 2587 gsa->sin.sin_port = 0; /* ignore port */ 2588 2589 if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex) 2590 return (EADDRNOTAVAIL); 2591 2592 ifp = ifnet_byindex(msfr.msfr_ifindex); 2593 if (ifp == NULL) 2594 return (EADDRNOTAVAIL); 2595 2596 IN_MULTI_LOCK(); 2597 2598 /* 2599 * Take the INP write lock. 2600 * Check if this socket is a member of this group. 2601 */ 2602 imo = inp_findmoptions(inp); 2603 imf = imo_match_group(imo, ifp, &gsa->sa); 2604 if (imf == NULL) { 2605 error = EADDRNOTAVAIL; 2606 goto out_inp_locked; 2607 } 2608 inm = imf->imf_inm; 2609 2610 /* 2611 * Begin state merge transaction at socket layer. 2612 */ 2613 INP_WLOCK_ASSERT(inp); 2614 2615 imf->imf_st[1] = msfr.msfr_fmode; 2616 2617 /* 2618 * Apply any new source filters, if present. 2619 * Make a copy of the user-space source vector so 2620 * that we may copy them with a single copyin. This 2621 * allows us to deal with page faults up-front. 2622 */ 2623 if (msfr.msfr_nsrcs > 0) { 2624 struct in_msource *lims; 2625 struct sockaddr_in *psin; 2626 struct sockaddr_storage *kss, *pkss; 2627 int i; 2628 2629 INP_WUNLOCK(inp); 2630 2631 CTR2(KTR_IGMPV3, "%s: loading %lu source list entries", 2632 __func__, (unsigned long)msfr.msfr_nsrcs); 2633 kss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs, 2634 M_TEMP, M_WAITOK); 2635 error = copyin(msfr.msfr_srcs, kss, 2636 sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs); 2637 if (error) { 2638 free(kss, M_TEMP); 2639 return (error); 2640 } 2641 2642 INP_WLOCK(inp); 2643 2644 /* 2645 * Mark all source filters as UNDEFINED at t1. 2646 * Restore new group filter mode, as imf_leave() 2647 * will set it to INCLUDE. 2648 */ 2649 imf_leave(imf); 2650 imf->imf_st[1] = msfr.msfr_fmode; 2651 2652 /* 2653 * Update socket layer filters at t1, lazy-allocating 2654 * new entries. This saves a bunch of memory at the 2655 * cost of one RB_FIND() per source entry; duplicate 2656 * entries in the msfr_nsrcs vector are ignored. 2657 * If we encounter an error, rollback transaction. 2658 * 2659 * XXX This too could be replaced with a set-symmetric 2660 * difference like loop to avoid walking from root 2661 * every time, as the key space is common. 2662 */ 2663 for (i = 0, pkss = kss; i < msfr.msfr_nsrcs; i++, pkss++) { 2664 psin = (struct sockaddr_in *)pkss; 2665 if (psin->sin_family != AF_INET) { 2666 error = EAFNOSUPPORT; 2667 break; 2668 } 2669 if (psin->sin_len != sizeof(struct sockaddr_in)) { 2670 error = EINVAL; 2671 break; 2672 } 2673 error = imf_get_source(imf, psin, &lims); 2674 if (error) 2675 break; 2676 lims->imsl_st[1] = imf->imf_st[1]; 2677 } 2678 free(kss, M_TEMP); 2679 } 2680 2681 if (error) 2682 goto out_imf_rollback; 2683 2684 INP_WLOCK_ASSERT(inp); 2685 2686 /* 2687 * Begin state merge transaction at IGMP layer. 2688 */ 2689 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 2690 IN_MULTI_LIST_LOCK(); 2691 error = inm_merge(inm, imf); 2692 if (error) { 2693 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__); 2694 IN_MULTI_LIST_UNLOCK(); 2695 goto out_imf_rollback; 2696 } 2697 2698 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 2699 error = igmp_change_state(inm); 2700 IN_MULTI_LIST_UNLOCK(); 2701 if (error) 2702 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__); 2703 2704 out_imf_rollback: 2705 if (error) 2706 imf_rollback(imf); 2707 else 2708 imf_commit(imf); 2709 2710 imf_reap(imf); 2711 2712 out_inp_locked: 2713 INP_WUNLOCK(inp); 2714 IN_MULTI_UNLOCK(); 2715 return (error); 2716 } 2717 2718 /* 2719 * Set the IP multicast options in response to user setsockopt(). 2720 * 2721 * Many of the socket options handled in this function duplicate the 2722 * functionality of socket options in the regular unicast API. However, 2723 * it is not possible to merge the duplicate code, because the idempotence 2724 * of the IPv4 multicast part of the BSD Sockets API must be preserved; 2725 * the effects of these options must be treated as separate and distinct. 2726 * 2727 * SMPng: XXX: Unlocked read of inp_socket believed OK. 2728 * FUTURE: The IP_MULTICAST_VIF option may be eliminated if MROUTING 2729 * is refactored to no longer use vifs. 2730 */ 2731 int 2732 inp_setmoptions(struct inpcb *inp, struct sockopt *sopt) 2733 { 2734 struct ip_moptions *imo; 2735 int error; 2736 2737 error = 0; 2738 2739 /* 2740 * If socket is neither of type SOCK_RAW or SOCK_DGRAM, 2741 * or is a divert socket, reject it. 2742 */ 2743 if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT || 2744 (inp->inp_socket->so_proto->pr_type != SOCK_RAW && 2745 inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) 2746 return (EOPNOTSUPP); 2747 2748 switch (sopt->sopt_name) { 2749 case IP_MULTICAST_VIF: { 2750 int vifi; 2751 /* 2752 * Select a multicast VIF for transmission. 2753 * Only useful if multicast forwarding is active. 2754 */ 2755 if (legal_vif_num == NULL) { 2756 error = EOPNOTSUPP; 2757 break; 2758 } 2759 error = sooptcopyin(sopt, &vifi, sizeof(int), sizeof(int)); 2760 if (error) 2761 break; 2762 if (!legal_vif_num(vifi) && (vifi != -1)) { 2763 error = EINVAL; 2764 break; 2765 } 2766 imo = inp_findmoptions(inp); 2767 imo->imo_multicast_vif = vifi; 2768 INP_WUNLOCK(inp); 2769 break; 2770 } 2771 2772 case IP_MULTICAST_IF: 2773 error = inp_set_multicast_if(inp, sopt); 2774 break; 2775 2776 case IP_MULTICAST_TTL: { 2777 u_char ttl; 2778 2779 /* 2780 * Set the IP time-to-live for outgoing multicast packets. 2781 * The original multicast API required a char argument, 2782 * which is inconsistent with the rest of the socket API. 2783 * We allow either a char or an int. 2784 */ 2785 if (sopt->sopt_valsize == sizeof(u_char)) { 2786 error = sooptcopyin(sopt, &ttl, sizeof(u_char), 2787 sizeof(u_char)); 2788 if (error) 2789 break; 2790 } else { 2791 u_int ittl; 2792 2793 error = sooptcopyin(sopt, &ittl, sizeof(u_int), 2794 sizeof(u_int)); 2795 if (error) 2796 break; 2797 if (ittl > 255) { 2798 error = EINVAL; 2799 break; 2800 } 2801 ttl = (u_char)ittl; 2802 } 2803 imo = inp_findmoptions(inp); 2804 imo->imo_multicast_ttl = ttl; 2805 INP_WUNLOCK(inp); 2806 break; 2807 } 2808 2809 case IP_MULTICAST_LOOP: { 2810 u_char loop; 2811 2812 /* 2813 * Set the loopback flag for outgoing multicast packets. 2814 * Must be zero or one. The original multicast API required a 2815 * char argument, which is inconsistent with the rest 2816 * of the socket API. We allow either a char or an int. 2817 */ 2818 if (sopt->sopt_valsize == sizeof(u_char)) { 2819 error = sooptcopyin(sopt, &loop, sizeof(u_char), 2820 sizeof(u_char)); 2821 if (error) 2822 break; 2823 } else { 2824 u_int iloop; 2825 2826 error = sooptcopyin(sopt, &iloop, sizeof(u_int), 2827 sizeof(u_int)); 2828 if (error) 2829 break; 2830 loop = (u_char)iloop; 2831 } 2832 imo = inp_findmoptions(inp); 2833 imo->imo_multicast_loop = !!loop; 2834 INP_WUNLOCK(inp); 2835 break; 2836 } 2837 2838 case IP_ADD_MEMBERSHIP: 2839 case IP_ADD_SOURCE_MEMBERSHIP: 2840 case MCAST_JOIN_GROUP: 2841 case MCAST_JOIN_SOURCE_GROUP: 2842 error = inp_join_group(inp, sopt); 2843 break; 2844 2845 case IP_DROP_MEMBERSHIP: 2846 case IP_DROP_SOURCE_MEMBERSHIP: 2847 case MCAST_LEAVE_GROUP: 2848 case MCAST_LEAVE_SOURCE_GROUP: 2849 error = inp_leave_group(inp, sopt); 2850 break; 2851 2852 case IP_BLOCK_SOURCE: 2853 case IP_UNBLOCK_SOURCE: 2854 case MCAST_BLOCK_SOURCE: 2855 case MCAST_UNBLOCK_SOURCE: 2856 error = inp_block_unblock_source(inp, sopt); 2857 break; 2858 2859 case IP_MSFILTER: 2860 error = inp_set_source_filters(inp, sopt); 2861 break; 2862 2863 default: 2864 error = EOPNOTSUPP; 2865 break; 2866 } 2867 2868 INP_UNLOCK_ASSERT(inp); 2869 2870 return (error); 2871 } 2872 2873 /* 2874 * Expose IGMP's multicast filter mode and source list(s) to userland, 2875 * keyed by (ifindex, group). 2876 * The filter mode is written out as a uint32_t, followed by 2877 * 0..n of struct in_addr. 2878 * For use by ifmcstat(8). 2879 * SMPng: NOTE: unlocked read of ifindex space. 2880 */ 2881 static int 2882 sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS) 2883 { 2884 struct in_addr src, group; 2885 struct epoch_tracker et; 2886 struct ifnet *ifp; 2887 struct ifmultiaddr *ifma; 2888 struct in_multi *inm; 2889 struct ip_msource *ims; 2890 int *name; 2891 int retval; 2892 u_int namelen; 2893 uint32_t fmode, ifindex; 2894 2895 name = (int *)arg1; 2896 namelen = arg2; 2897 2898 if (req->newptr != NULL) 2899 return (EPERM); 2900 2901 if (namelen != 2) 2902 return (EINVAL); 2903 2904 ifindex = name[0]; 2905 if (ifindex <= 0 || ifindex > V_if_index) { 2906 CTR2(KTR_IGMPV3, "%s: ifindex %u out of range", 2907 __func__, ifindex); 2908 return (ENOENT); 2909 } 2910 2911 group.s_addr = name[1]; 2912 if (!IN_MULTICAST(ntohl(group.s_addr))) { 2913 CTR2(KTR_IGMPV3, "%s: group 0x%08x is not multicast", 2914 __func__, ntohl(group.s_addr)); 2915 return (EINVAL); 2916 } 2917 2918 NET_EPOCH_ENTER(et); 2919 ifp = ifnet_byindex(ifindex); 2920 if (ifp == NULL) { 2921 NET_EPOCH_EXIT(et); 2922 CTR2(KTR_IGMPV3, "%s: no ifp for ifindex %u", 2923 __func__, ifindex); 2924 return (ENOENT); 2925 } 2926 2927 retval = sysctl_wire_old_buffer(req, 2928 sizeof(uint32_t) + (in_mcast_maxgrpsrc * sizeof(struct in_addr))); 2929 if (retval) { 2930 NET_EPOCH_EXIT(et); 2931 return (retval); 2932 } 2933 2934 IN_MULTI_LIST_LOCK(); 2935 2936 CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 2937 if (ifma->ifma_addr->sa_family != AF_INET || 2938 ifma->ifma_protospec == NULL) 2939 continue; 2940 inm = (struct in_multi *)ifma->ifma_protospec; 2941 if (!in_hosteq(inm->inm_addr, group)) 2942 continue; 2943 fmode = inm->inm_st[1].iss_fmode; 2944 retval = SYSCTL_OUT(req, &fmode, sizeof(uint32_t)); 2945 if (retval != 0) 2946 break; 2947 RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) { 2948 CTR2(KTR_IGMPV3, "%s: visit node 0x%08x", __func__, 2949 ims->ims_haddr); 2950 /* 2951 * Only copy-out sources which are in-mode. 2952 */ 2953 if (fmode != ims_get_mode(inm, ims, 1)) { 2954 CTR1(KTR_IGMPV3, "%s: skip non-in-mode", 2955 __func__); 2956 continue; 2957 } 2958 src.s_addr = htonl(ims->ims_haddr); 2959 retval = SYSCTL_OUT(req, &src, sizeof(struct in_addr)); 2960 if (retval != 0) 2961 break; 2962 } 2963 } 2964 2965 IN_MULTI_LIST_UNLOCK(); 2966 NET_EPOCH_EXIT(et); 2967 2968 return (retval); 2969 } 2970 2971 #if defined(KTR) && (KTR_COMPILE & KTR_IGMPV3) 2972 2973 static const char *inm_modestrs[] = { 2974 [MCAST_UNDEFINED] = "un", 2975 [MCAST_INCLUDE] = "in", 2976 [MCAST_EXCLUDE] = "ex", 2977 }; 2978 _Static_assert(MCAST_UNDEFINED == 0 && 2979 MCAST_EXCLUDE + 1 == nitems(inm_modestrs), 2980 "inm_modestrs: no longer matches #defines"); 2981 2982 static const char * 2983 inm_mode_str(const int mode) 2984 { 2985 2986 if (mode >= MCAST_UNDEFINED && mode <= MCAST_EXCLUDE) 2987 return (inm_modestrs[mode]); 2988 return ("??"); 2989 } 2990 2991 static const char *inm_statestrs[] = { 2992 [IGMP_NOT_MEMBER] = "not-member", 2993 [IGMP_SILENT_MEMBER] = "silent", 2994 [IGMP_REPORTING_MEMBER] = "reporting", 2995 [IGMP_IDLE_MEMBER] = "idle", 2996 [IGMP_LAZY_MEMBER] = "lazy", 2997 [IGMP_SLEEPING_MEMBER] = "sleeping", 2998 [IGMP_AWAKENING_MEMBER] = "awakening", 2999 [IGMP_G_QUERY_PENDING_MEMBER] = "query-pending", 3000 [IGMP_SG_QUERY_PENDING_MEMBER] = "sg-query-pending", 3001 [IGMP_LEAVING_MEMBER] = "leaving", 3002 }; 3003 _Static_assert(IGMP_NOT_MEMBER == 0 && 3004 IGMP_LEAVING_MEMBER + 1 == nitems(inm_statestrs), 3005 "inm_statetrs: no longer matches #defines"); 3006 3007 static const char * 3008 inm_state_str(const int state) 3009 { 3010 3011 if (state >= IGMP_NOT_MEMBER && state <= IGMP_LEAVING_MEMBER) 3012 return (inm_statestrs[state]); 3013 return ("??"); 3014 } 3015 3016 /* 3017 * Dump an in_multi structure to the console. 3018 */ 3019 void 3020 inm_print(const struct in_multi *inm) 3021 { 3022 int t; 3023 char addrbuf[INET_ADDRSTRLEN]; 3024 3025 if ((ktr_mask & KTR_IGMPV3) == 0) 3026 return; 3027 3028 printf("%s: --- begin inm %p ---\n", __func__, inm); 3029 printf("addr %s ifp %p(%s) ifma %p\n", 3030 inet_ntoa_r(inm->inm_addr, addrbuf), 3031 inm->inm_ifp, 3032 inm->inm_ifp->if_xname, 3033 inm->inm_ifma); 3034 printf("timer %u state %s refcount %u scq.len %u\n", 3035 inm->inm_timer, 3036 inm_state_str(inm->inm_state), 3037 inm->inm_refcount, 3038 inm->inm_scq.mq_len); 3039 printf("igi %p nsrc %lu sctimer %u scrv %u\n", 3040 inm->inm_igi, 3041 inm->inm_nsrc, 3042 inm->inm_sctimer, 3043 inm->inm_scrv); 3044 for (t = 0; t < 2; t++) { 3045 printf("t%d: fmode %s asm %u ex %u in %u rec %u\n", t, 3046 inm_mode_str(inm->inm_st[t].iss_fmode), 3047 inm->inm_st[t].iss_asm, 3048 inm->inm_st[t].iss_ex, 3049 inm->inm_st[t].iss_in, 3050 inm->inm_st[t].iss_rec); 3051 } 3052 printf("%s: --- end inm %p ---\n", __func__, inm); 3053 } 3054 3055 #else /* !KTR || !(KTR_COMPILE & KTR_IGMPV3) */ 3056 3057 void 3058 inm_print(const struct in_multi *inm) 3059 { 3060 3061 } 3062 3063 #endif /* KTR && (KTR_COMPILE & KTR_IGMPV3) */ 3064 3065 RB_GENERATE(ip_msource_tree, ip_msource, ims_link, ip_msource_cmp); 3066