1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2007-2009 Bruce Simpson. 5 * Copyright (c) 2005 Robert N. M. Watson. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. The name of the author may not be used to endorse or promote 17 * products derived from this software without specific prior written 18 * permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 /* 34 * IPv4 multicast socket, group, and socket option processing module. 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/kernel.h> 43 #include <sys/lock.h> 44 #include <sys/malloc.h> 45 #include <sys/mbuf.h> 46 #include <sys/protosw.h> 47 #include <sys/rmlock.h> 48 #include <sys/socket.h> 49 #include <sys/socketvar.h> 50 #include <sys/protosw.h> 51 #include <sys/sysctl.h> 52 #include <sys/ktr.h> 53 #include <sys/taskqueue.h> 54 #include <sys/gtaskqueue.h> 55 #include <sys/tree.h> 56 57 #include <net/if.h> 58 #include <net/if_var.h> 59 #include <net/if_dl.h> 60 #include <net/route.h> 61 #include <net/vnet.h> 62 63 #include <net/ethernet.h> 64 65 #include <netinet/in.h> 66 #include <netinet/in_systm.h> 67 #include <netinet/in_fib.h> 68 #include <netinet/in_pcb.h> 69 #include <netinet/in_var.h> 70 #include <netinet/ip_var.h> 71 #include <netinet/igmp_var.h> 72 73 #ifndef KTR_IGMPV3 74 #define KTR_IGMPV3 KTR_INET 75 #endif 76 77 #ifndef __SOCKUNION_DECLARED 78 union sockunion { 79 struct sockaddr_storage ss; 80 struct sockaddr sa; 81 struct sockaddr_dl sdl; 82 struct sockaddr_in sin; 83 }; 84 typedef union sockunion sockunion_t; 85 #define __SOCKUNION_DECLARED 86 #endif /* __SOCKUNION_DECLARED */ 87 88 static MALLOC_DEFINE(M_INMFILTER, "in_mfilter", 89 "IPv4 multicast PCB-layer source filter"); 90 static MALLOC_DEFINE(M_IPMADDR, "in_multi", "IPv4 multicast group"); 91 static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "IPv4 multicast options"); 92 static MALLOC_DEFINE(M_IPMSOURCE, "ip_msource", 93 "IPv4 multicast IGMP-layer source filter"); 94 95 /* 96 * Locking: 97 * 98 * - Lock order is: Giant, IN_MULTI_LOCK, INP_WLOCK, 99 * IN_MULTI_LIST_LOCK, IGMP_LOCK, IF_ADDR_LOCK. 100 * - The IF_ADDR_LOCK is implicitly taken by inm_lookup() earlier, however 101 * it can be taken by code in net/if.c also. 102 * - ip_moptions and in_mfilter are covered by the INP_WLOCK. 103 * 104 * struct in_multi is covered by IN_MULTI_LIST_LOCK. There isn't strictly 105 * any need for in_multi itself to be virtualized -- it is bound to an ifp 106 * anyway no matter what happens. 107 */ 108 struct mtx in_multi_list_mtx; 109 MTX_SYSINIT(in_multi_mtx, &in_multi_list_mtx, "in_multi_list_mtx", MTX_DEF); 110 111 struct mtx in_multi_free_mtx; 112 MTX_SYSINIT(in_multi_free_mtx, &in_multi_free_mtx, "in_multi_free_mtx", MTX_DEF); 113 114 struct sx in_multi_sx; 115 SX_SYSINIT(in_multi_sx, &in_multi_sx, "in_multi_sx"); 116 117 int ifma_restart; 118 119 /* 120 * Functions with non-static linkage defined in this file should be 121 * declared in in_var.h: 122 * imo_multi_filter() 123 * in_addmulti() 124 * in_delmulti() 125 * in_joingroup() 126 * in_joingroup_locked() 127 * in_leavegroup() 128 * in_leavegroup_locked() 129 * and ip_var.h: 130 * inp_freemoptions() 131 * inp_getmoptions() 132 * inp_setmoptions() 133 * 134 * XXX: Both carp and pf need to use the legacy (*,G) KPIs in_addmulti() 135 * and in_delmulti(). 136 */ 137 static void imf_commit(struct in_mfilter *); 138 static int imf_get_source(struct in_mfilter *imf, 139 const struct sockaddr_in *psin, 140 struct in_msource **); 141 static struct in_msource * 142 imf_graft(struct in_mfilter *, const uint8_t, 143 const struct sockaddr_in *); 144 static void imf_leave(struct in_mfilter *); 145 static int imf_prune(struct in_mfilter *, const struct sockaddr_in *); 146 static void imf_purge(struct in_mfilter *); 147 static void imf_rollback(struct in_mfilter *); 148 static void imf_reap(struct in_mfilter *); 149 static struct in_mfilter * 150 imo_match_group(const struct ip_moptions *, 151 const struct ifnet *, const struct sockaddr *); 152 static struct in_msource * 153 imo_match_source(struct in_mfilter *, const struct sockaddr *); 154 static void ims_merge(struct ip_msource *ims, 155 const struct in_msource *lims, const int rollback); 156 static int in_getmulti(struct ifnet *, const struct in_addr *, 157 struct in_multi **); 158 static int inm_get_source(struct in_multi *inm, const in_addr_t haddr, 159 const int noalloc, struct ip_msource **pims); 160 #ifdef KTR 161 static int inm_is_ifp_detached(const struct in_multi *); 162 #endif 163 static int inm_merge(struct in_multi *, /*const*/ struct in_mfilter *); 164 static void inm_purge(struct in_multi *); 165 static void inm_reap(struct in_multi *); 166 static void inm_release(struct in_multi *); 167 static struct ip_moptions * 168 inp_findmoptions(struct inpcb *); 169 static int inp_get_source_filters(struct inpcb *, struct sockopt *); 170 static int inp_join_group(struct inpcb *, struct sockopt *); 171 static int inp_leave_group(struct inpcb *, struct sockopt *); 172 static struct ifnet * 173 inp_lookup_mcast_ifp(const struct inpcb *, 174 const struct sockaddr_in *, const struct in_addr); 175 static int inp_block_unblock_source(struct inpcb *, struct sockopt *); 176 static int inp_set_multicast_if(struct inpcb *, struct sockopt *); 177 static int inp_set_source_filters(struct inpcb *, struct sockopt *); 178 static int sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS); 179 180 static SYSCTL_NODE(_net_inet_ip, OID_AUTO, mcast, CTLFLAG_RW, 0, 181 "IPv4 multicast"); 182 183 static u_long in_mcast_maxgrpsrc = IP_MAX_GROUP_SRC_FILTER; 184 SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxgrpsrc, 185 CTLFLAG_RWTUN, &in_mcast_maxgrpsrc, 0, 186 "Max source filters per group"); 187 188 static u_long in_mcast_maxsocksrc = IP_MAX_SOCK_SRC_FILTER; 189 SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxsocksrc, 190 CTLFLAG_RWTUN, &in_mcast_maxsocksrc, 0, 191 "Max source filters per socket"); 192 193 int in_mcast_loop = IP_DEFAULT_MULTICAST_LOOP; 194 SYSCTL_INT(_net_inet_ip_mcast, OID_AUTO, loop, CTLFLAG_RWTUN, 195 &in_mcast_loop, 0, "Loopback multicast datagrams by default"); 196 197 static SYSCTL_NODE(_net_inet_ip_mcast, OID_AUTO, filters, 198 CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_ip_mcast_filters, 199 "Per-interface stack-wide source filters"); 200 201 #ifdef KTR 202 /* 203 * Inline function which wraps assertions for a valid ifp. 204 * The ifnet layer will set the ifma's ifp pointer to NULL if the ifp 205 * is detached. 206 */ 207 static int __inline 208 inm_is_ifp_detached(const struct in_multi *inm) 209 { 210 struct ifnet *ifp; 211 212 KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__)); 213 ifp = inm->inm_ifma->ifma_ifp; 214 if (ifp != NULL) { 215 /* 216 * Sanity check that netinet's notion of ifp is the 217 * same as net's. 218 */ 219 KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__)); 220 } 221 222 return (ifp == NULL); 223 } 224 #endif 225 226 static struct grouptask free_gtask; 227 static struct in_multi_head inm_free_list; 228 static void inm_release_task(void *arg __unused); 229 static void inm_init(void) 230 { 231 SLIST_INIT(&inm_free_list); 232 taskqgroup_config_gtask_init(NULL, &free_gtask, inm_release_task, "inm release task"); 233 } 234 235 #ifdef EARLY_AP_STARTUP 236 SYSINIT(inm_init, SI_SUB_SMP + 1, SI_ORDER_FIRST, 237 inm_init, NULL); 238 #else 239 SYSINIT(inm_init, SI_SUB_ROOT_CONF - 1, SI_ORDER_FIRST, 240 inm_init, NULL); 241 #endif 242 243 244 void 245 inm_release_list_deferred(struct in_multi_head *inmh) 246 { 247 248 if (SLIST_EMPTY(inmh)) 249 return; 250 mtx_lock(&in_multi_free_mtx); 251 SLIST_CONCAT(&inm_free_list, inmh, in_multi, inm_nrele); 252 mtx_unlock(&in_multi_free_mtx); 253 GROUPTASK_ENQUEUE(&free_gtask); 254 } 255 256 void 257 inm_disconnect(struct in_multi *inm) 258 { 259 struct ifnet *ifp; 260 struct ifmultiaddr *ifma, *ll_ifma; 261 262 ifp = inm->inm_ifp; 263 IF_ADDR_WLOCK_ASSERT(ifp); 264 ifma = inm->inm_ifma; 265 266 if_ref(ifp); 267 if (ifma->ifma_flags & IFMA_F_ENQUEUED) { 268 CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifmultiaddr, ifma_link); 269 ifma->ifma_flags &= ~IFMA_F_ENQUEUED; 270 } 271 MCDPRINTF("removed ifma: %p from %s\n", ifma, ifp->if_xname); 272 if ((ll_ifma = ifma->ifma_llifma) != NULL) { 273 MPASS(ifma != ll_ifma); 274 ifma->ifma_llifma = NULL; 275 MPASS(ll_ifma->ifma_llifma == NULL); 276 MPASS(ll_ifma->ifma_ifp == ifp); 277 if (--ll_ifma->ifma_refcount == 0) { 278 if (ll_ifma->ifma_flags & IFMA_F_ENQUEUED) { 279 CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma, ifmultiaddr, ifma_link); 280 ll_ifma->ifma_flags &= ~IFMA_F_ENQUEUED; 281 } 282 MCDPRINTF("removed ll_ifma: %p from %s\n", ll_ifma, ifp->if_xname); 283 if_freemulti(ll_ifma); 284 ifma_restart = true; 285 } 286 } 287 } 288 289 void 290 inm_release_deferred(struct in_multi *inm) 291 { 292 struct in_multi_head tmp; 293 294 IN_MULTI_LIST_LOCK_ASSERT(); 295 MPASS(inm->inm_refcount > 0); 296 if (--inm->inm_refcount == 0) { 297 SLIST_INIT(&tmp); 298 inm_disconnect(inm); 299 inm->inm_ifma->ifma_protospec = NULL; 300 SLIST_INSERT_HEAD(&tmp, inm, inm_nrele); 301 inm_release_list_deferred(&tmp); 302 } 303 } 304 305 static void 306 inm_release_task(void *arg __unused) 307 { 308 struct in_multi_head inm_free_tmp; 309 struct in_multi *inm, *tinm; 310 311 SLIST_INIT(&inm_free_tmp); 312 mtx_lock(&in_multi_free_mtx); 313 SLIST_CONCAT(&inm_free_tmp, &inm_free_list, in_multi, inm_nrele); 314 mtx_unlock(&in_multi_free_mtx); 315 IN_MULTI_LOCK(); 316 SLIST_FOREACH_SAFE(inm, &inm_free_tmp, inm_nrele, tinm) { 317 SLIST_REMOVE_HEAD(&inm_free_tmp, inm_nrele); 318 MPASS(inm); 319 inm_release(inm); 320 } 321 IN_MULTI_UNLOCK(); 322 } 323 324 /* 325 * Initialize an in_mfilter structure to a known state at t0, t1 326 * with an empty source filter list. 327 */ 328 static __inline void 329 imf_init(struct in_mfilter *imf, const int st0, const int st1) 330 { 331 memset(imf, 0, sizeof(struct in_mfilter)); 332 RB_INIT(&imf->imf_sources); 333 imf->imf_st[0] = st0; 334 imf->imf_st[1] = st1; 335 } 336 337 struct in_mfilter * 338 ip_mfilter_alloc(const int mflags, const int st0, const int st1) 339 { 340 struct in_mfilter *imf; 341 342 imf = malloc(sizeof(*imf), M_INMFILTER, mflags); 343 if (imf != NULL) 344 imf_init(imf, st0, st1); 345 346 return (imf); 347 } 348 349 void 350 ip_mfilter_free(struct in_mfilter *imf) 351 { 352 353 imf_purge(imf); 354 free(imf, M_INMFILTER); 355 } 356 357 /* 358 * Function for looking up an in_multi record for an IPv4 multicast address 359 * on a given interface. ifp must be valid. If no record found, return NULL. 360 * The IN_MULTI_LIST_LOCK and IF_ADDR_LOCK on ifp must be held. 361 */ 362 struct in_multi * 363 inm_lookup_locked(struct ifnet *ifp, const struct in_addr ina) 364 { 365 struct ifmultiaddr *ifma; 366 struct in_multi *inm; 367 368 IN_MULTI_LIST_LOCK_ASSERT(); 369 IF_ADDR_LOCK_ASSERT(ifp); 370 371 inm = NULL; 372 CK_STAILQ_FOREACH(ifma, &((ifp)->if_multiaddrs), ifma_link) { 373 if (ifma->ifma_addr->sa_family != AF_INET || 374 ifma->ifma_protospec == NULL) 375 continue; 376 inm = (struct in_multi *)ifma->ifma_protospec; 377 if (inm->inm_addr.s_addr == ina.s_addr) 378 break; 379 inm = NULL; 380 } 381 return (inm); 382 } 383 384 /* 385 * Wrapper for inm_lookup_locked(). 386 * The IF_ADDR_LOCK will be taken on ifp and released on return. 387 */ 388 struct in_multi * 389 inm_lookup(struct ifnet *ifp, const struct in_addr ina) 390 { 391 struct epoch_tracker et; 392 struct in_multi *inm; 393 394 IN_MULTI_LIST_LOCK_ASSERT(); 395 NET_EPOCH_ENTER(et); 396 397 inm = inm_lookup_locked(ifp, ina); 398 NET_EPOCH_EXIT(et); 399 400 return (inm); 401 } 402 403 /* 404 * Find an IPv4 multicast group entry for this ip_moptions instance 405 * which matches the specified group, and optionally an interface. 406 * Return its index into the array, or -1 if not found. 407 */ 408 static struct in_mfilter * 409 imo_match_group(const struct ip_moptions *imo, const struct ifnet *ifp, 410 const struct sockaddr *group) 411 { 412 const struct sockaddr_in *gsin; 413 struct in_mfilter *imf; 414 struct in_multi *inm; 415 416 gsin = (const struct sockaddr_in *)group; 417 418 IP_MFILTER_FOREACH(imf, &imo->imo_head) { 419 inm = imf->imf_inm; 420 if (inm == NULL) 421 continue; 422 if ((ifp == NULL || (inm->inm_ifp == ifp)) && 423 in_hosteq(inm->inm_addr, gsin->sin_addr)) { 424 break; 425 } 426 } 427 return (imf); 428 } 429 430 /* 431 * Find an IPv4 multicast source entry for this imo which matches 432 * the given group index for this socket, and source address. 433 * 434 * NOTE: This does not check if the entry is in-mode, merely if 435 * it exists, which may not be the desired behaviour. 436 */ 437 static struct in_msource * 438 imo_match_source(struct in_mfilter *imf, const struct sockaddr *src) 439 { 440 struct ip_msource find; 441 struct ip_msource *ims; 442 const sockunion_t *psa; 443 444 KASSERT(src->sa_family == AF_INET, ("%s: !AF_INET", __func__)); 445 446 /* Source trees are keyed in host byte order. */ 447 psa = (const sockunion_t *)src; 448 find.ims_haddr = ntohl(psa->sin.sin_addr.s_addr); 449 ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find); 450 451 return ((struct in_msource *)ims); 452 } 453 454 /* 455 * Perform filtering for multicast datagrams on a socket by group and source. 456 * 457 * Returns 0 if a datagram should be allowed through, or various error codes 458 * if the socket was not a member of the group, or the source was muted, etc. 459 */ 460 int 461 imo_multi_filter(const struct ip_moptions *imo, const struct ifnet *ifp, 462 const struct sockaddr *group, const struct sockaddr *src) 463 { 464 struct in_mfilter *imf; 465 struct in_msource *ims; 466 int mode; 467 468 KASSERT(ifp != NULL, ("%s: null ifp", __func__)); 469 470 imf = imo_match_group(imo, ifp, group); 471 if (imf == NULL) 472 return (MCAST_NOTGMEMBER); 473 474 /* 475 * Check if the source was included in an (S,G) join. 476 * Allow reception on exclusive memberships by default, 477 * reject reception on inclusive memberships by default. 478 * Exclude source only if an in-mode exclude filter exists. 479 * Include source only if an in-mode include filter exists. 480 * NOTE: We are comparing group state here at IGMP t1 (now) 481 * with socket-layer t0 (since last downcall). 482 */ 483 mode = imf->imf_st[1]; 484 ims = imo_match_source(imf, src); 485 486 if ((ims == NULL && mode == MCAST_INCLUDE) || 487 (ims != NULL && ims->imsl_st[0] != mode)) 488 return (MCAST_NOTSMEMBER); 489 490 return (MCAST_PASS); 491 } 492 493 /* 494 * Find and return a reference to an in_multi record for (ifp, group), 495 * and bump its reference count. 496 * If one does not exist, try to allocate it, and update link-layer multicast 497 * filters on ifp to listen for group. 498 * Assumes the IN_MULTI lock is held across the call. 499 * Return 0 if successful, otherwise return an appropriate error code. 500 */ 501 static int 502 in_getmulti(struct ifnet *ifp, const struct in_addr *group, 503 struct in_multi **pinm) 504 { 505 struct sockaddr_in gsin; 506 struct ifmultiaddr *ifma; 507 struct in_ifinfo *ii; 508 struct in_multi *inm; 509 int error; 510 511 IN_MULTI_LOCK_ASSERT(); 512 513 ii = (struct in_ifinfo *)ifp->if_afdata[AF_INET]; 514 IN_MULTI_LIST_LOCK(); 515 inm = inm_lookup(ifp, *group); 516 if (inm != NULL) { 517 /* 518 * If we already joined this group, just bump the 519 * refcount and return it. 520 */ 521 KASSERT(inm->inm_refcount >= 1, 522 ("%s: bad refcount %d", __func__, inm->inm_refcount)); 523 inm_acquire_locked(inm); 524 *pinm = inm; 525 } 526 IN_MULTI_LIST_UNLOCK(); 527 if (inm != NULL) 528 return (0); 529 530 memset(&gsin, 0, sizeof(gsin)); 531 gsin.sin_family = AF_INET; 532 gsin.sin_len = sizeof(struct sockaddr_in); 533 gsin.sin_addr = *group; 534 535 /* 536 * Check if a link-layer group is already associated 537 * with this network-layer group on the given ifnet. 538 */ 539 error = if_addmulti(ifp, (struct sockaddr *)&gsin, &ifma); 540 if (error != 0) 541 return (error); 542 543 /* XXX ifma_protospec must be covered by IF_ADDR_LOCK */ 544 IN_MULTI_LIST_LOCK(); 545 IF_ADDR_WLOCK(ifp); 546 547 /* 548 * If something other than netinet is occupying the link-layer 549 * group, print a meaningful error message and back out of 550 * the allocation. 551 * Otherwise, bump the refcount on the existing network-layer 552 * group association and return it. 553 */ 554 if (ifma->ifma_protospec != NULL) { 555 inm = (struct in_multi *)ifma->ifma_protospec; 556 #ifdef INVARIANTS 557 KASSERT(ifma->ifma_addr != NULL, ("%s: no ifma_addr", 558 __func__)); 559 KASSERT(ifma->ifma_addr->sa_family == AF_INET, 560 ("%s: ifma not AF_INET", __func__)); 561 KASSERT(inm != NULL, ("%s: no ifma_protospec", __func__)); 562 if (inm->inm_ifma != ifma || inm->inm_ifp != ifp || 563 !in_hosteq(inm->inm_addr, *group)) { 564 char addrbuf[INET_ADDRSTRLEN]; 565 566 panic("%s: ifma %p is inconsistent with %p (%s)", 567 __func__, ifma, inm, inet_ntoa_r(*group, addrbuf)); 568 } 569 #endif 570 inm_acquire_locked(inm); 571 *pinm = inm; 572 goto out_locked; 573 } 574 575 IF_ADDR_WLOCK_ASSERT(ifp); 576 577 /* 578 * A new in_multi record is needed; allocate and initialize it. 579 * We DO NOT perform an IGMP join as the in_ layer may need to 580 * push an initial source list down to IGMP to support SSM. 581 * 582 * The initial source filter state is INCLUDE, {} as per the RFC. 583 */ 584 inm = malloc(sizeof(*inm), M_IPMADDR, M_NOWAIT | M_ZERO); 585 if (inm == NULL) { 586 IF_ADDR_WUNLOCK(ifp); 587 IN_MULTI_LIST_UNLOCK(); 588 if_delmulti_ifma(ifma); 589 return (ENOMEM); 590 } 591 inm->inm_addr = *group; 592 inm->inm_ifp = ifp; 593 inm->inm_igi = ii->ii_igmp; 594 inm->inm_ifma = ifma; 595 inm->inm_refcount = 1; 596 inm->inm_state = IGMP_NOT_MEMBER; 597 mbufq_init(&inm->inm_scq, IGMP_MAX_STATE_CHANGES); 598 inm->inm_st[0].iss_fmode = MCAST_UNDEFINED; 599 inm->inm_st[1].iss_fmode = MCAST_UNDEFINED; 600 RB_INIT(&inm->inm_srcs); 601 602 ifma->ifma_protospec = inm; 603 604 *pinm = inm; 605 out_locked: 606 IF_ADDR_WUNLOCK(ifp); 607 IN_MULTI_LIST_UNLOCK(); 608 return (0); 609 } 610 611 /* 612 * Drop a reference to an in_multi record. 613 * 614 * If the refcount drops to 0, free the in_multi record and 615 * delete the underlying link-layer membership. 616 */ 617 static void 618 inm_release(struct in_multi *inm) 619 { 620 struct ifmultiaddr *ifma; 621 struct ifnet *ifp; 622 623 CTR2(KTR_IGMPV3, "%s: refcount is %d", __func__, inm->inm_refcount); 624 MPASS(inm->inm_refcount == 0); 625 CTR2(KTR_IGMPV3, "%s: freeing inm %p", __func__, inm); 626 627 ifma = inm->inm_ifma; 628 ifp = inm->inm_ifp; 629 630 /* XXX this access is not covered by IF_ADDR_LOCK */ 631 CTR2(KTR_IGMPV3, "%s: purging ifma %p", __func__, ifma); 632 if (ifp != NULL) { 633 CURVNET_SET(ifp->if_vnet); 634 inm_purge(inm); 635 free(inm, M_IPMADDR); 636 if_delmulti_ifma_flags(ifma, 1); 637 CURVNET_RESTORE(); 638 if_rele(ifp); 639 } else { 640 inm_purge(inm); 641 free(inm, M_IPMADDR); 642 if_delmulti_ifma_flags(ifma, 1); 643 } 644 } 645 646 /* 647 * Clear recorded source entries for a group. 648 * Used by the IGMP code. Caller must hold the IN_MULTI lock. 649 * FIXME: Should reap. 650 */ 651 void 652 inm_clear_recorded(struct in_multi *inm) 653 { 654 struct ip_msource *ims; 655 656 IN_MULTI_LIST_LOCK_ASSERT(); 657 658 RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) { 659 if (ims->ims_stp) { 660 ims->ims_stp = 0; 661 --inm->inm_st[1].iss_rec; 662 } 663 } 664 KASSERT(inm->inm_st[1].iss_rec == 0, 665 ("%s: iss_rec %d not 0", __func__, inm->inm_st[1].iss_rec)); 666 } 667 668 /* 669 * Record a source as pending for a Source-Group IGMPv3 query. 670 * This lives here as it modifies the shared tree. 671 * 672 * inm is the group descriptor. 673 * naddr is the address of the source to record in network-byte order. 674 * 675 * If the net.inet.igmp.sgalloc sysctl is non-zero, we will 676 * lazy-allocate a source node in response to an SG query. 677 * Otherwise, no allocation is performed. This saves some memory 678 * with the trade-off that the source will not be reported to the 679 * router if joined in the window between the query response and 680 * the group actually being joined on the local host. 681 * 682 * VIMAGE: XXX: Currently the igmp_sgalloc feature has been removed. 683 * This turns off the allocation of a recorded source entry if 684 * the group has not been joined. 685 * 686 * Return 0 if the source didn't exist or was already marked as recorded. 687 * Return 1 if the source was marked as recorded by this function. 688 * Return <0 if any error occurred (negated errno code). 689 */ 690 int 691 inm_record_source(struct in_multi *inm, const in_addr_t naddr) 692 { 693 struct ip_msource find; 694 struct ip_msource *ims, *nims; 695 696 IN_MULTI_LIST_LOCK_ASSERT(); 697 698 find.ims_haddr = ntohl(naddr); 699 ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find); 700 if (ims && ims->ims_stp) 701 return (0); 702 if (ims == NULL) { 703 if (inm->inm_nsrc == in_mcast_maxgrpsrc) 704 return (-ENOSPC); 705 nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE, 706 M_NOWAIT | M_ZERO); 707 if (nims == NULL) 708 return (-ENOMEM); 709 nims->ims_haddr = find.ims_haddr; 710 RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims); 711 ++inm->inm_nsrc; 712 ims = nims; 713 } 714 715 /* 716 * Mark the source as recorded and update the recorded 717 * source count. 718 */ 719 ++ims->ims_stp; 720 ++inm->inm_st[1].iss_rec; 721 722 return (1); 723 } 724 725 /* 726 * Return a pointer to an in_msource owned by an in_mfilter, 727 * given its source address. 728 * Lazy-allocate if needed. If this is a new entry its filter state is 729 * undefined at t0. 730 * 731 * imf is the filter set being modified. 732 * haddr is the source address in *host* byte-order. 733 * 734 * SMPng: May be called with locks held; malloc must not block. 735 */ 736 static int 737 imf_get_source(struct in_mfilter *imf, const struct sockaddr_in *psin, 738 struct in_msource **plims) 739 { 740 struct ip_msource find; 741 struct ip_msource *ims, *nims; 742 struct in_msource *lims; 743 int error; 744 745 error = 0; 746 ims = NULL; 747 lims = NULL; 748 749 /* key is host byte order */ 750 find.ims_haddr = ntohl(psin->sin_addr.s_addr); 751 ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find); 752 lims = (struct in_msource *)ims; 753 if (lims == NULL) { 754 if (imf->imf_nsrc == in_mcast_maxsocksrc) 755 return (ENOSPC); 756 nims = malloc(sizeof(struct in_msource), M_INMFILTER, 757 M_NOWAIT | M_ZERO); 758 if (nims == NULL) 759 return (ENOMEM); 760 lims = (struct in_msource *)nims; 761 lims->ims_haddr = find.ims_haddr; 762 lims->imsl_st[0] = MCAST_UNDEFINED; 763 RB_INSERT(ip_msource_tree, &imf->imf_sources, nims); 764 ++imf->imf_nsrc; 765 } 766 767 *plims = lims; 768 769 return (error); 770 } 771 772 /* 773 * Graft a source entry into an existing socket-layer filter set, 774 * maintaining any required invariants and checking allocations. 775 * 776 * The source is marked as being in the new filter mode at t1. 777 * 778 * Return the pointer to the new node, otherwise return NULL. 779 */ 780 static struct in_msource * 781 imf_graft(struct in_mfilter *imf, const uint8_t st1, 782 const struct sockaddr_in *psin) 783 { 784 struct ip_msource *nims; 785 struct in_msource *lims; 786 787 nims = malloc(sizeof(struct in_msource), M_INMFILTER, 788 M_NOWAIT | M_ZERO); 789 if (nims == NULL) 790 return (NULL); 791 lims = (struct in_msource *)nims; 792 lims->ims_haddr = ntohl(psin->sin_addr.s_addr); 793 lims->imsl_st[0] = MCAST_UNDEFINED; 794 lims->imsl_st[1] = st1; 795 RB_INSERT(ip_msource_tree, &imf->imf_sources, nims); 796 ++imf->imf_nsrc; 797 798 return (lims); 799 } 800 801 /* 802 * Prune a source entry from an existing socket-layer filter set, 803 * maintaining any required invariants and checking allocations. 804 * 805 * The source is marked as being left at t1, it is not freed. 806 * 807 * Return 0 if no error occurred, otherwise return an errno value. 808 */ 809 static int 810 imf_prune(struct in_mfilter *imf, const struct sockaddr_in *psin) 811 { 812 struct ip_msource find; 813 struct ip_msource *ims; 814 struct in_msource *lims; 815 816 /* key is host byte order */ 817 find.ims_haddr = ntohl(psin->sin_addr.s_addr); 818 ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find); 819 if (ims == NULL) 820 return (ENOENT); 821 lims = (struct in_msource *)ims; 822 lims->imsl_st[1] = MCAST_UNDEFINED; 823 return (0); 824 } 825 826 /* 827 * Revert socket-layer filter set deltas at t1 to t0 state. 828 */ 829 static void 830 imf_rollback(struct in_mfilter *imf) 831 { 832 struct ip_msource *ims, *tims; 833 struct in_msource *lims; 834 835 RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) { 836 lims = (struct in_msource *)ims; 837 if (lims->imsl_st[0] == lims->imsl_st[1]) { 838 /* no change at t1 */ 839 continue; 840 } else if (lims->imsl_st[0] != MCAST_UNDEFINED) { 841 /* revert change to existing source at t1 */ 842 lims->imsl_st[1] = lims->imsl_st[0]; 843 } else { 844 /* revert source added t1 */ 845 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 846 RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims); 847 free(ims, M_INMFILTER); 848 imf->imf_nsrc--; 849 } 850 } 851 imf->imf_st[1] = imf->imf_st[0]; 852 } 853 854 /* 855 * Mark socket-layer filter set as INCLUDE {} at t1. 856 */ 857 static void 858 imf_leave(struct in_mfilter *imf) 859 { 860 struct ip_msource *ims; 861 struct in_msource *lims; 862 863 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 864 lims = (struct in_msource *)ims; 865 lims->imsl_st[1] = MCAST_UNDEFINED; 866 } 867 imf->imf_st[1] = MCAST_INCLUDE; 868 } 869 870 /* 871 * Mark socket-layer filter set deltas as committed. 872 */ 873 static void 874 imf_commit(struct in_mfilter *imf) 875 { 876 struct ip_msource *ims; 877 struct in_msource *lims; 878 879 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 880 lims = (struct in_msource *)ims; 881 lims->imsl_st[0] = lims->imsl_st[1]; 882 } 883 imf->imf_st[0] = imf->imf_st[1]; 884 } 885 886 /* 887 * Reap unreferenced sources from socket-layer filter set. 888 */ 889 static void 890 imf_reap(struct in_mfilter *imf) 891 { 892 struct ip_msource *ims, *tims; 893 struct in_msource *lims; 894 895 RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) { 896 lims = (struct in_msource *)ims; 897 if ((lims->imsl_st[0] == MCAST_UNDEFINED) && 898 (lims->imsl_st[1] == MCAST_UNDEFINED)) { 899 CTR2(KTR_IGMPV3, "%s: free lims %p", __func__, ims); 900 RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims); 901 free(ims, M_INMFILTER); 902 imf->imf_nsrc--; 903 } 904 } 905 } 906 907 /* 908 * Purge socket-layer filter set. 909 */ 910 static void 911 imf_purge(struct in_mfilter *imf) 912 { 913 struct ip_msource *ims, *tims; 914 915 RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) { 916 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 917 RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims); 918 free(ims, M_INMFILTER); 919 imf->imf_nsrc--; 920 } 921 imf->imf_st[0] = imf->imf_st[1] = MCAST_UNDEFINED; 922 KASSERT(RB_EMPTY(&imf->imf_sources), 923 ("%s: imf_sources not empty", __func__)); 924 } 925 926 /* 927 * Look up a source filter entry for a multicast group. 928 * 929 * inm is the group descriptor to work with. 930 * haddr is the host-byte-order IPv4 address to look up. 931 * noalloc may be non-zero to suppress allocation of sources. 932 * *pims will be set to the address of the retrieved or allocated source. 933 * 934 * SMPng: NOTE: may be called with locks held. 935 * Return 0 if successful, otherwise return a non-zero error code. 936 */ 937 static int 938 inm_get_source(struct in_multi *inm, const in_addr_t haddr, 939 const int noalloc, struct ip_msource **pims) 940 { 941 struct ip_msource find; 942 struct ip_msource *ims, *nims; 943 944 find.ims_haddr = haddr; 945 ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find); 946 if (ims == NULL && !noalloc) { 947 if (inm->inm_nsrc == in_mcast_maxgrpsrc) 948 return (ENOSPC); 949 nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE, 950 M_NOWAIT | M_ZERO); 951 if (nims == NULL) 952 return (ENOMEM); 953 nims->ims_haddr = haddr; 954 RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims); 955 ++inm->inm_nsrc; 956 ims = nims; 957 #ifdef KTR 958 CTR3(KTR_IGMPV3, "%s: allocated 0x%08x as %p", __func__, 959 haddr, ims); 960 #endif 961 } 962 963 *pims = ims; 964 return (0); 965 } 966 967 /* 968 * Merge socket-layer source into IGMP-layer source. 969 * If rollback is non-zero, perform the inverse of the merge. 970 */ 971 static void 972 ims_merge(struct ip_msource *ims, const struct in_msource *lims, 973 const int rollback) 974 { 975 int n = rollback ? -1 : 1; 976 977 if (lims->imsl_st[0] == MCAST_EXCLUDE) { 978 CTR3(KTR_IGMPV3, "%s: t1 ex -= %d on 0x%08x", 979 __func__, n, ims->ims_haddr); 980 ims->ims_st[1].ex -= n; 981 } else if (lims->imsl_st[0] == MCAST_INCLUDE) { 982 CTR3(KTR_IGMPV3, "%s: t1 in -= %d on 0x%08x", 983 __func__, n, ims->ims_haddr); 984 ims->ims_st[1].in -= n; 985 } 986 987 if (lims->imsl_st[1] == MCAST_EXCLUDE) { 988 CTR3(KTR_IGMPV3, "%s: t1 ex += %d on 0x%08x", 989 __func__, n, ims->ims_haddr); 990 ims->ims_st[1].ex += n; 991 } else if (lims->imsl_st[1] == MCAST_INCLUDE) { 992 CTR3(KTR_IGMPV3, "%s: t1 in += %d on 0x%08x", 993 __func__, n, ims->ims_haddr); 994 ims->ims_st[1].in += n; 995 } 996 } 997 998 /* 999 * Atomically update the global in_multi state, when a membership's 1000 * filter list is being updated in any way. 1001 * 1002 * imf is the per-inpcb-membership group filter pointer. 1003 * A fake imf may be passed for in-kernel consumers. 1004 * 1005 * XXX This is a candidate for a set-symmetric-difference style loop 1006 * which would eliminate the repeated lookup from root of ims nodes, 1007 * as they share the same key space. 1008 * 1009 * If any error occurred this function will back out of refcounts 1010 * and return a non-zero value. 1011 */ 1012 static int 1013 inm_merge(struct in_multi *inm, /*const*/ struct in_mfilter *imf) 1014 { 1015 struct ip_msource *ims, *nims; 1016 struct in_msource *lims; 1017 int schanged, error; 1018 int nsrc0, nsrc1; 1019 1020 schanged = 0; 1021 error = 0; 1022 nsrc1 = nsrc0 = 0; 1023 IN_MULTI_LIST_LOCK_ASSERT(); 1024 1025 /* 1026 * Update the source filters first, as this may fail. 1027 * Maintain count of in-mode filters at t0, t1. These are 1028 * used to work out if we transition into ASM mode or not. 1029 * Maintain a count of source filters whose state was 1030 * actually modified by this operation. 1031 */ 1032 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 1033 lims = (struct in_msource *)ims; 1034 if (lims->imsl_st[0] == imf->imf_st[0]) nsrc0++; 1035 if (lims->imsl_st[1] == imf->imf_st[1]) nsrc1++; 1036 if (lims->imsl_st[0] == lims->imsl_st[1]) continue; 1037 error = inm_get_source(inm, lims->ims_haddr, 0, &nims); 1038 ++schanged; 1039 if (error) 1040 break; 1041 ims_merge(nims, lims, 0); 1042 } 1043 if (error) { 1044 struct ip_msource *bims; 1045 1046 RB_FOREACH_REVERSE_FROM(ims, ip_msource_tree, nims) { 1047 lims = (struct in_msource *)ims; 1048 if (lims->imsl_st[0] == lims->imsl_st[1]) 1049 continue; 1050 (void)inm_get_source(inm, lims->ims_haddr, 1, &bims); 1051 if (bims == NULL) 1052 continue; 1053 ims_merge(bims, lims, 1); 1054 } 1055 goto out_reap; 1056 } 1057 1058 CTR3(KTR_IGMPV3, "%s: imf filters in-mode: %d at t0, %d at t1", 1059 __func__, nsrc0, nsrc1); 1060 1061 /* Handle transition between INCLUDE {n} and INCLUDE {} on socket. */ 1062 if (imf->imf_st[0] == imf->imf_st[1] && 1063 imf->imf_st[1] == MCAST_INCLUDE) { 1064 if (nsrc1 == 0) { 1065 CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__); 1066 --inm->inm_st[1].iss_in; 1067 } 1068 } 1069 1070 /* Handle filter mode transition on socket. */ 1071 if (imf->imf_st[0] != imf->imf_st[1]) { 1072 CTR3(KTR_IGMPV3, "%s: imf transition %d to %d", 1073 __func__, imf->imf_st[0], imf->imf_st[1]); 1074 1075 if (imf->imf_st[0] == MCAST_EXCLUDE) { 1076 CTR1(KTR_IGMPV3, "%s: --ex on inm at t1", __func__); 1077 --inm->inm_st[1].iss_ex; 1078 } else if (imf->imf_st[0] == MCAST_INCLUDE) { 1079 CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__); 1080 --inm->inm_st[1].iss_in; 1081 } 1082 1083 if (imf->imf_st[1] == MCAST_EXCLUDE) { 1084 CTR1(KTR_IGMPV3, "%s: ex++ on inm at t1", __func__); 1085 inm->inm_st[1].iss_ex++; 1086 } else if (imf->imf_st[1] == MCAST_INCLUDE && nsrc1 > 0) { 1087 CTR1(KTR_IGMPV3, "%s: in++ on inm at t1", __func__); 1088 inm->inm_st[1].iss_in++; 1089 } 1090 } 1091 1092 /* 1093 * Track inm filter state in terms of listener counts. 1094 * If there are any exclusive listeners, stack-wide 1095 * membership is exclusive. 1096 * Otherwise, if only inclusive listeners, stack-wide is inclusive. 1097 * If no listeners remain, state is undefined at t1, 1098 * and the IGMP lifecycle for this group should finish. 1099 */ 1100 if (inm->inm_st[1].iss_ex > 0) { 1101 CTR1(KTR_IGMPV3, "%s: transition to EX", __func__); 1102 inm->inm_st[1].iss_fmode = MCAST_EXCLUDE; 1103 } else if (inm->inm_st[1].iss_in > 0) { 1104 CTR1(KTR_IGMPV3, "%s: transition to IN", __func__); 1105 inm->inm_st[1].iss_fmode = MCAST_INCLUDE; 1106 } else { 1107 CTR1(KTR_IGMPV3, "%s: transition to UNDEF", __func__); 1108 inm->inm_st[1].iss_fmode = MCAST_UNDEFINED; 1109 } 1110 1111 /* Decrement ASM listener count on transition out of ASM mode. */ 1112 if (imf->imf_st[0] == MCAST_EXCLUDE && nsrc0 == 0) { 1113 if ((imf->imf_st[1] != MCAST_EXCLUDE) || 1114 (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 > 0)) { 1115 CTR1(KTR_IGMPV3, "%s: --asm on inm at t1", __func__); 1116 --inm->inm_st[1].iss_asm; 1117 } 1118 } 1119 1120 /* Increment ASM listener count on transition to ASM mode. */ 1121 if (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 == 0) { 1122 CTR1(KTR_IGMPV3, "%s: asm++ on inm at t1", __func__); 1123 inm->inm_st[1].iss_asm++; 1124 } 1125 1126 CTR3(KTR_IGMPV3, "%s: merged imf %p to inm %p", __func__, imf, inm); 1127 inm_print(inm); 1128 1129 out_reap: 1130 if (schanged > 0) { 1131 CTR1(KTR_IGMPV3, "%s: sources changed; reaping", __func__); 1132 inm_reap(inm); 1133 } 1134 return (error); 1135 } 1136 1137 /* 1138 * Mark an in_multi's filter set deltas as committed. 1139 * Called by IGMP after a state change has been enqueued. 1140 */ 1141 void 1142 inm_commit(struct in_multi *inm) 1143 { 1144 struct ip_msource *ims; 1145 1146 CTR2(KTR_IGMPV3, "%s: commit inm %p", __func__, inm); 1147 CTR1(KTR_IGMPV3, "%s: pre commit:", __func__); 1148 inm_print(inm); 1149 1150 RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) { 1151 ims->ims_st[0] = ims->ims_st[1]; 1152 } 1153 inm->inm_st[0] = inm->inm_st[1]; 1154 } 1155 1156 /* 1157 * Reap unreferenced nodes from an in_multi's filter set. 1158 */ 1159 static void 1160 inm_reap(struct in_multi *inm) 1161 { 1162 struct ip_msource *ims, *tims; 1163 1164 RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) { 1165 if (ims->ims_st[0].ex > 0 || ims->ims_st[0].in > 0 || 1166 ims->ims_st[1].ex > 0 || ims->ims_st[1].in > 0 || 1167 ims->ims_stp != 0) 1168 continue; 1169 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 1170 RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims); 1171 free(ims, M_IPMSOURCE); 1172 inm->inm_nsrc--; 1173 } 1174 } 1175 1176 /* 1177 * Purge all source nodes from an in_multi's filter set. 1178 */ 1179 static void 1180 inm_purge(struct in_multi *inm) 1181 { 1182 struct ip_msource *ims, *tims; 1183 1184 RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) { 1185 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 1186 RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims); 1187 free(ims, M_IPMSOURCE); 1188 inm->inm_nsrc--; 1189 } 1190 } 1191 1192 /* 1193 * Join a multicast group; unlocked entry point. 1194 * 1195 * SMPng: XXX: in_joingroup() is called from in_control() when Giant 1196 * is not held. Fortunately, ifp is unlikely to have been detached 1197 * at this point, so we assume it's OK to recurse. 1198 */ 1199 int 1200 in_joingroup(struct ifnet *ifp, const struct in_addr *gina, 1201 /*const*/ struct in_mfilter *imf, struct in_multi **pinm) 1202 { 1203 int error; 1204 1205 IN_MULTI_LOCK(); 1206 error = in_joingroup_locked(ifp, gina, imf, pinm); 1207 IN_MULTI_UNLOCK(); 1208 1209 return (error); 1210 } 1211 1212 /* 1213 * Join a multicast group; real entry point. 1214 * 1215 * Only preserves atomicity at inm level. 1216 * NOTE: imf argument cannot be const due to sys/tree.h limitations. 1217 * 1218 * If the IGMP downcall fails, the group is not joined, and an error 1219 * code is returned. 1220 */ 1221 int 1222 in_joingroup_locked(struct ifnet *ifp, const struct in_addr *gina, 1223 /*const*/ struct in_mfilter *imf, struct in_multi **pinm) 1224 { 1225 struct in_mfilter timf; 1226 struct in_multi *inm; 1227 int error; 1228 1229 IN_MULTI_LOCK_ASSERT(); 1230 IN_MULTI_LIST_UNLOCK_ASSERT(); 1231 1232 CTR4(KTR_IGMPV3, "%s: join 0x%08x on %p(%s))", __func__, 1233 ntohl(gina->s_addr), ifp, ifp->if_xname); 1234 1235 error = 0; 1236 inm = NULL; 1237 1238 /* 1239 * If no imf was specified (i.e. kernel consumer), 1240 * fake one up and assume it is an ASM join. 1241 */ 1242 if (imf == NULL) { 1243 imf_init(&timf, MCAST_UNDEFINED, MCAST_EXCLUDE); 1244 imf = &timf; 1245 } 1246 1247 error = in_getmulti(ifp, gina, &inm); 1248 if (error) { 1249 CTR1(KTR_IGMPV3, "%s: in_getmulti() failure", __func__); 1250 return (error); 1251 } 1252 IN_MULTI_LIST_LOCK(); 1253 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 1254 error = inm_merge(inm, imf); 1255 if (error) { 1256 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__); 1257 goto out_inm_release; 1258 } 1259 1260 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 1261 error = igmp_change_state(inm); 1262 if (error) { 1263 CTR1(KTR_IGMPV3, "%s: failed to update source", __func__); 1264 goto out_inm_release; 1265 } 1266 1267 out_inm_release: 1268 if (error) { 1269 1270 CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm); 1271 inm_release_deferred(inm); 1272 } else { 1273 *pinm = inm; 1274 } 1275 IN_MULTI_LIST_UNLOCK(); 1276 1277 return (error); 1278 } 1279 1280 /* 1281 * Leave a multicast group; unlocked entry point. 1282 */ 1283 int 1284 in_leavegroup(struct in_multi *inm, /*const*/ struct in_mfilter *imf) 1285 { 1286 int error; 1287 1288 IN_MULTI_LOCK(); 1289 error = in_leavegroup_locked(inm, imf); 1290 IN_MULTI_UNLOCK(); 1291 1292 return (error); 1293 } 1294 1295 /* 1296 * Leave a multicast group; real entry point. 1297 * All source filters will be expunged. 1298 * 1299 * Only preserves atomicity at inm level. 1300 * 1301 * Holding the write lock for the INP which contains imf 1302 * is highly advisable. We can't assert for it as imf does not 1303 * contain a back-pointer to the owning inp. 1304 * 1305 * Note: This is not the same as inm_release(*) as this function also 1306 * makes a state change downcall into IGMP. 1307 */ 1308 int 1309 in_leavegroup_locked(struct in_multi *inm, /*const*/ struct in_mfilter *imf) 1310 { 1311 struct in_mfilter timf; 1312 int error; 1313 1314 IN_MULTI_LOCK_ASSERT(); 1315 IN_MULTI_LIST_UNLOCK_ASSERT(); 1316 1317 error = 0; 1318 1319 CTR5(KTR_IGMPV3, "%s: leave inm %p, 0x%08x/%s, imf %p", __func__, 1320 inm, ntohl(inm->inm_addr.s_addr), 1321 (inm_is_ifp_detached(inm) ? "null" : inm->inm_ifp->if_xname), 1322 imf); 1323 1324 /* 1325 * If no imf was specified (i.e. kernel consumer), 1326 * fake one up and assume it is an ASM join. 1327 */ 1328 if (imf == NULL) { 1329 imf_init(&timf, MCAST_EXCLUDE, MCAST_UNDEFINED); 1330 imf = &timf; 1331 } 1332 1333 /* 1334 * Begin state merge transaction at IGMP layer. 1335 * 1336 * As this particular invocation should not cause any memory 1337 * to be allocated, and there is no opportunity to roll back 1338 * the transaction, it MUST NOT fail. 1339 */ 1340 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 1341 IN_MULTI_LIST_LOCK(); 1342 error = inm_merge(inm, imf); 1343 KASSERT(error == 0, ("%s: failed to merge inm state", __func__)); 1344 1345 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 1346 CURVNET_SET(inm->inm_ifp->if_vnet); 1347 error = igmp_change_state(inm); 1348 IF_ADDR_WLOCK(inm->inm_ifp); 1349 inm_release_deferred(inm); 1350 IF_ADDR_WUNLOCK(inm->inm_ifp); 1351 IN_MULTI_LIST_UNLOCK(); 1352 CURVNET_RESTORE(); 1353 if (error) 1354 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__); 1355 1356 CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm); 1357 1358 return (error); 1359 } 1360 1361 /*#ifndef BURN_BRIDGES*/ 1362 /* 1363 * Join an IPv4 multicast group in (*,G) exclusive mode. 1364 * The group must be a 224.0.0.0/24 link-scope group. 1365 * This KPI is for legacy kernel consumers only. 1366 */ 1367 struct in_multi * 1368 in_addmulti(struct in_addr *ap, struct ifnet *ifp) 1369 { 1370 struct in_multi *pinm; 1371 int error; 1372 #ifdef INVARIANTS 1373 char addrbuf[INET_ADDRSTRLEN]; 1374 #endif 1375 1376 KASSERT(IN_LOCAL_GROUP(ntohl(ap->s_addr)), 1377 ("%s: %s not in 224.0.0.0/24", __func__, 1378 inet_ntoa_r(*ap, addrbuf))); 1379 1380 error = in_joingroup(ifp, ap, NULL, &pinm); 1381 if (error != 0) 1382 pinm = NULL; 1383 1384 return (pinm); 1385 } 1386 1387 /* 1388 * Block or unblock an ASM multicast source on an inpcb. 1389 * This implements the delta-based API described in RFC 3678. 1390 * 1391 * The delta-based API applies only to exclusive-mode memberships. 1392 * An IGMP downcall will be performed. 1393 * 1394 * SMPng: NOTE: Must take Giant as a join may create a new ifma. 1395 * 1396 * Return 0 if successful, otherwise return an appropriate error code. 1397 */ 1398 static int 1399 inp_block_unblock_source(struct inpcb *inp, struct sockopt *sopt) 1400 { 1401 struct group_source_req gsr; 1402 struct rm_priotracker in_ifa_tracker; 1403 sockunion_t *gsa, *ssa; 1404 struct ifnet *ifp; 1405 struct in_mfilter *imf; 1406 struct ip_moptions *imo; 1407 struct in_msource *ims; 1408 struct in_multi *inm; 1409 uint16_t fmode; 1410 int error, doblock; 1411 1412 ifp = NULL; 1413 error = 0; 1414 doblock = 0; 1415 1416 memset(&gsr, 0, sizeof(struct group_source_req)); 1417 gsa = (sockunion_t *)&gsr.gsr_group; 1418 ssa = (sockunion_t *)&gsr.gsr_source; 1419 1420 switch (sopt->sopt_name) { 1421 case IP_BLOCK_SOURCE: 1422 case IP_UNBLOCK_SOURCE: { 1423 struct ip_mreq_source mreqs; 1424 1425 error = sooptcopyin(sopt, &mreqs, 1426 sizeof(struct ip_mreq_source), 1427 sizeof(struct ip_mreq_source)); 1428 if (error) 1429 return (error); 1430 1431 gsa->sin.sin_family = AF_INET; 1432 gsa->sin.sin_len = sizeof(struct sockaddr_in); 1433 gsa->sin.sin_addr = mreqs.imr_multiaddr; 1434 1435 ssa->sin.sin_family = AF_INET; 1436 ssa->sin.sin_len = sizeof(struct sockaddr_in); 1437 ssa->sin.sin_addr = mreqs.imr_sourceaddr; 1438 1439 if (!in_nullhost(mreqs.imr_interface)) { 1440 IN_IFADDR_RLOCK(&in_ifa_tracker); 1441 INADDR_TO_IFP(mreqs.imr_interface, ifp); 1442 IN_IFADDR_RUNLOCK(&in_ifa_tracker); 1443 } 1444 if (sopt->sopt_name == IP_BLOCK_SOURCE) 1445 doblock = 1; 1446 1447 CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p", 1448 __func__, ntohl(mreqs.imr_interface.s_addr), ifp); 1449 break; 1450 } 1451 1452 case MCAST_BLOCK_SOURCE: 1453 case MCAST_UNBLOCK_SOURCE: 1454 error = sooptcopyin(sopt, &gsr, 1455 sizeof(struct group_source_req), 1456 sizeof(struct group_source_req)); 1457 if (error) 1458 return (error); 1459 1460 if (gsa->sin.sin_family != AF_INET || 1461 gsa->sin.sin_len != sizeof(struct sockaddr_in)) 1462 return (EINVAL); 1463 1464 if (ssa->sin.sin_family != AF_INET || 1465 ssa->sin.sin_len != sizeof(struct sockaddr_in)) 1466 return (EINVAL); 1467 1468 if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface) 1469 return (EADDRNOTAVAIL); 1470 1471 ifp = ifnet_byindex(gsr.gsr_interface); 1472 1473 if (sopt->sopt_name == MCAST_BLOCK_SOURCE) 1474 doblock = 1; 1475 break; 1476 1477 default: 1478 CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d", 1479 __func__, sopt->sopt_name); 1480 return (EOPNOTSUPP); 1481 break; 1482 } 1483 1484 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 1485 return (EINVAL); 1486 1487 IN_MULTI_LOCK(); 1488 1489 /* 1490 * Check if we are actually a member of this group. 1491 */ 1492 imo = inp_findmoptions(inp); 1493 imf = imo_match_group(imo, ifp, &gsa->sa); 1494 if (imf == NULL) { 1495 error = EADDRNOTAVAIL; 1496 goto out_inp_locked; 1497 } 1498 inm = imf->imf_inm; 1499 1500 /* 1501 * Attempting to use the delta-based API on an 1502 * non exclusive-mode membership is an error. 1503 */ 1504 fmode = imf->imf_st[0]; 1505 if (fmode != MCAST_EXCLUDE) { 1506 error = EINVAL; 1507 goto out_inp_locked; 1508 } 1509 1510 /* 1511 * Deal with error cases up-front: 1512 * Asked to block, but already blocked; or 1513 * Asked to unblock, but nothing to unblock. 1514 * If adding a new block entry, allocate it. 1515 */ 1516 ims = imo_match_source(imf, &ssa->sa); 1517 if ((ims != NULL && doblock) || (ims == NULL && !doblock)) { 1518 CTR3(KTR_IGMPV3, "%s: source 0x%08x %spresent", __func__, 1519 ntohl(ssa->sin.sin_addr.s_addr), doblock ? "" : "not "); 1520 error = EADDRNOTAVAIL; 1521 goto out_inp_locked; 1522 } 1523 1524 INP_WLOCK_ASSERT(inp); 1525 1526 /* 1527 * Begin state merge transaction at socket layer. 1528 */ 1529 if (doblock) { 1530 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block"); 1531 ims = imf_graft(imf, fmode, &ssa->sin); 1532 if (ims == NULL) 1533 error = ENOMEM; 1534 } else { 1535 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow"); 1536 error = imf_prune(imf, &ssa->sin); 1537 } 1538 1539 if (error) { 1540 CTR1(KTR_IGMPV3, "%s: merge imf state failed", __func__); 1541 goto out_imf_rollback; 1542 } 1543 1544 /* 1545 * Begin state merge transaction at IGMP layer. 1546 */ 1547 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 1548 IN_MULTI_LIST_LOCK(); 1549 error = inm_merge(inm, imf); 1550 if (error) { 1551 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__); 1552 IN_MULTI_LIST_UNLOCK(); 1553 goto out_imf_rollback; 1554 } 1555 1556 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 1557 error = igmp_change_state(inm); 1558 IN_MULTI_LIST_UNLOCK(); 1559 if (error) 1560 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__); 1561 1562 out_imf_rollback: 1563 if (error) 1564 imf_rollback(imf); 1565 else 1566 imf_commit(imf); 1567 1568 imf_reap(imf); 1569 1570 out_inp_locked: 1571 INP_WUNLOCK(inp); 1572 IN_MULTI_UNLOCK(); 1573 return (error); 1574 } 1575 1576 /* 1577 * Given an inpcb, return its multicast options structure pointer. Accepts 1578 * an unlocked inpcb pointer, but will return it locked. May sleep. 1579 * 1580 * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held. 1581 * SMPng: NOTE: Returns with the INP write lock held. 1582 */ 1583 static struct ip_moptions * 1584 inp_findmoptions(struct inpcb *inp) 1585 { 1586 struct ip_moptions *imo; 1587 1588 INP_WLOCK(inp); 1589 if (inp->inp_moptions != NULL) 1590 return (inp->inp_moptions); 1591 1592 INP_WUNLOCK(inp); 1593 1594 imo = malloc(sizeof(*imo), M_IPMOPTS, M_WAITOK); 1595 1596 imo->imo_multicast_ifp = NULL; 1597 imo->imo_multicast_addr.s_addr = INADDR_ANY; 1598 imo->imo_multicast_vif = -1; 1599 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1600 imo->imo_multicast_loop = in_mcast_loop; 1601 STAILQ_INIT(&imo->imo_head); 1602 1603 INP_WLOCK(inp); 1604 if (inp->inp_moptions != NULL) { 1605 free(imo, M_IPMOPTS); 1606 return (inp->inp_moptions); 1607 } 1608 inp->inp_moptions = imo; 1609 return (imo); 1610 } 1611 1612 static void 1613 inp_gcmoptions(struct ip_moptions *imo) 1614 { 1615 struct in_mfilter *imf; 1616 struct in_multi *inm; 1617 struct ifnet *ifp; 1618 1619 while ((imf = ip_mfilter_first(&imo->imo_head)) != NULL) { 1620 ip_mfilter_remove(&imo->imo_head, imf); 1621 1622 imf_leave(imf); 1623 if ((inm = imf->imf_inm) != NULL) { 1624 if ((ifp = inm->inm_ifp) != NULL) { 1625 CURVNET_SET(ifp->if_vnet); 1626 (void)in_leavegroup(inm, imf); 1627 CURVNET_RESTORE(); 1628 } else { 1629 (void)in_leavegroup(inm, imf); 1630 } 1631 } 1632 ip_mfilter_free(imf); 1633 } 1634 free(imo, M_IPMOPTS); 1635 } 1636 1637 /* 1638 * Discard the IP multicast options (and source filters). To minimize 1639 * the amount of work done while holding locks such as the INP's 1640 * pcbinfo lock (which is used in the receive path), the free 1641 * operation is deferred to the epoch callback task. 1642 */ 1643 void 1644 inp_freemoptions(struct ip_moptions *imo) 1645 { 1646 if (imo == NULL) 1647 return; 1648 inp_gcmoptions(imo); 1649 } 1650 1651 /* 1652 * Atomically get source filters on a socket for an IPv4 multicast group. 1653 * Called with INP lock held; returns with lock released. 1654 */ 1655 static int 1656 inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt) 1657 { 1658 struct __msfilterreq msfr; 1659 sockunion_t *gsa; 1660 struct ifnet *ifp; 1661 struct ip_moptions *imo; 1662 struct in_mfilter *imf; 1663 struct ip_msource *ims; 1664 struct in_msource *lims; 1665 struct sockaddr_in *psin; 1666 struct sockaddr_storage *ptss; 1667 struct sockaddr_storage *tss; 1668 int error; 1669 size_t nsrcs, ncsrcs; 1670 1671 INP_WLOCK_ASSERT(inp); 1672 1673 imo = inp->inp_moptions; 1674 KASSERT(imo != NULL, ("%s: null ip_moptions", __func__)); 1675 1676 INP_WUNLOCK(inp); 1677 1678 error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq), 1679 sizeof(struct __msfilterreq)); 1680 if (error) 1681 return (error); 1682 1683 if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex) 1684 return (EINVAL); 1685 1686 ifp = ifnet_byindex(msfr.msfr_ifindex); 1687 if (ifp == NULL) 1688 return (EINVAL); 1689 1690 INP_WLOCK(inp); 1691 1692 /* 1693 * Lookup group on the socket. 1694 */ 1695 gsa = (sockunion_t *)&msfr.msfr_group; 1696 imf = imo_match_group(imo, ifp, &gsa->sa); 1697 if (imf == NULL) { 1698 INP_WUNLOCK(inp); 1699 return (EADDRNOTAVAIL); 1700 } 1701 1702 /* 1703 * Ignore memberships which are in limbo. 1704 */ 1705 if (imf->imf_st[1] == MCAST_UNDEFINED) { 1706 INP_WUNLOCK(inp); 1707 return (EAGAIN); 1708 } 1709 msfr.msfr_fmode = imf->imf_st[1]; 1710 1711 /* 1712 * If the user specified a buffer, copy out the source filter 1713 * entries to userland gracefully. 1714 * We only copy out the number of entries which userland 1715 * has asked for, but we always tell userland how big the 1716 * buffer really needs to be. 1717 */ 1718 if (msfr.msfr_nsrcs > in_mcast_maxsocksrc) 1719 msfr.msfr_nsrcs = in_mcast_maxsocksrc; 1720 tss = NULL; 1721 if (msfr.msfr_srcs != NULL && msfr.msfr_nsrcs > 0) { 1722 tss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs, 1723 M_TEMP, M_NOWAIT | M_ZERO); 1724 if (tss == NULL) { 1725 INP_WUNLOCK(inp); 1726 return (ENOBUFS); 1727 } 1728 } 1729 1730 /* 1731 * Count number of sources in-mode at t0. 1732 * If buffer space exists and remains, copy out source entries. 1733 */ 1734 nsrcs = msfr.msfr_nsrcs; 1735 ncsrcs = 0; 1736 ptss = tss; 1737 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 1738 lims = (struct in_msource *)ims; 1739 if (lims->imsl_st[0] == MCAST_UNDEFINED || 1740 lims->imsl_st[0] != imf->imf_st[0]) 1741 continue; 1742 ++ncsrcs; 1743 if (tss != NULL && nsrcs > 0) { 1744 psin = (struct sockaddr_in *)ptss; 1745 psin->sin_family = AF_INET; 1746 psin->sin_len = sizeof(struct sockaddr_in); 1747 psin->sin_addr.s_addr = htonl(lims->ims_haddr); 1748 psin->sin_port = 0; 1749 ++ptss; 1750 --nsrcs; 1751 } 1752 } 1753 1754 INP_WUNLOCK(inp); 1755 1756 if (tss != NULL) { 1757 error = copyout(tss, msfr.msfr_srcs, 1758 sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs); 1759 free(tss, M_TEMP); 1760 if (error) 1761 return (error); 1762 } 1763 1764 msfr.msfr_nsrcs = ncsrcs; 1765 error = sooptcopyout(sopt, &msfr, sizeof(struct __msfilterreq)); 1766 1767 return (error); 1768 } 1769 1770 /* 1771 * Return the IP multicast options in response to user getsockopt(). 1772 */ 1773 int 1774 inp_getmoptions(struct inpcb *inp, struct sockopt *sopt) 1775 { 1776 struct rm_priotracker in_ifa_tracker; 1777 struct ip_mreqn mreqn; 1778 struct ip_moptions *imo; 1779 struct ifnet *ifp; 1780 struct in_ifaddr *ia; 1781 int error, optval; 1782 u_char coptval; 1783 1784 INP_WLOCK(inp); 1785 imo = inp->inp_moptions; 1786 /* 1787 * If socket is neither of type SOCK_RAW or SOCK_DGRAM, 1788 * or is a divert socket, reject it. 1789 */ 1790 if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT || 1791 (inp->inp_socket->so_proto->pr_type != SOCK_RAW && 1792 inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) { 1793 INP_WUNLOCK(inp); 1794 return (EOPNOTSUPP); 1795 } 1796 1797 error = 0; 1798 switch (sopt->sopt_name) { 1799 case IP_MULTICAST_VIF: 1800 if (imo != NULL) 1801 optval = imo->imo_multicast_vif; 1802 else 1803 optval = -1; 1804 INP_WUNLOCK(inp); 1805 error = sooptcopyout(sopt, &optval, sizeof(int)); 1806 break; 1807 1808 case IP_MULTICAST_IF: 1809 memset(&mreqn, 0, sizeof(struct ip_mreqn)); 1810 if (imo != NULL) { 1811 ifp = imo->imo_multicast_ifp; 1812 if (!in_nullhost(imo->imo_multicast_addr)) { 1813 mreqn.imr_address = imo->imo_multicast_addr; 1814 } else if (ifp != NULL) { 1815 struct epoch_tracker et; 1816 1817 mreqn.imr_ifindex = ifp->if_index; 1818 NET_EPOCH_ENTER(et); 1819 IFP_TO_IA(ifp, ia, &in_ifa_tracker); 1820 if (ia != NULL) 1821 mreqn.imr_address = 1822 IA_SIN(ia)->sin_addr; 1823 NET_EPOCH_EXIT(et); 1824 } 1825 } 1826 INP_WUNLOCK(inp); 1827 if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) { 1828 error = sooptcopyout(sopt, &mreqn, 1829 sizeof(struct ip_mreqn)); 1830 } else { 1831 error = sooptcopyout(sopt, &mreqn.imr_address, 1832 sizeof(struct in_addr)); 1833 } 1834 break; 1835 1836 case IP_MULTICAST_TTL: 1837 if (imo == NULL) 1838 optval = coptval = IP_DEFAULT_MULTICAST_TTL; 1839 else 1840 optval = coptval = imo->imo_multicast_ttl; 1841 INP_WUNLOCK(inp); 1842 if (sopt->sopt_valsize == sizeof(u_char)) 1843 error = sooptcopyout(sopt, &coptval, sizeof(u_char)); 1844 else 1845 error = sooptcopyout(sopt, &optval, sizeof(int)); 1846 break; 1847 1848 case IP_MULTICAST_LOOP: 1849 if (imo == NULL) 1850 optval = coptval = IP_DEFAULT_MULTICAST_LOOP; 1851 else 1852 optval = coptval = imo->imo_multicast_loop; 1853 INP_WUNLOCK(inp); 1854 if (sopt->sopt_valsize == sizeof(u_char)) 1855 error = sooptcopyout(sopt, &coptval, sizeof(u_char)); 1856 else 1857 error = sooptcopyout(sopt, &optval, sizeof(int)); 1858 break; 1859 1860 case IP_MSFILTER: 1861 if (imo == NULL) { 1862 error = EADDRNOTAVAIL; 1863 INP_WUNLOCK(inp); 1864 } else { 1865 error = inp_get_source_filters(inp, sopt); 1866 } 1867 break; 1868 1869 default: 1870 INP_WUNLOCK(inp); 1871 error = ENOPROTOOPT; 1872 break; 1873 } 1874 1875 INP_UNLOCK_ASSERT(inp); 1876 1877 return (error); 1878 } 1879 1880 /* 1881 * Look up the ifnet to use for a multicast group membership, 1882 * given the IPv4 address of an interface, and the IPv4 group address. 1883 * 1884 * This routine exists to support legacy multicast applications 1885 * which do not understand that multicast memberships are scoped to 1886 * specific physical links in the networking stack, or which need 1887 * to join link-scope groups before IPv4 addresses are configured. 1888 * 1889 * If inp is non-NULL, use this socket's current FIB number for any 1890 * required FIB lookup. 1891 * If ina is INADDR_ANY, look up the group address in the unicast FIB, 1892 * and use its ifp; usually, this points to the default next-hop. 1893 * 1894 * If the FIB lookup fails, attempt to use the first non-loopback 1895 * interface with multicast capability in the system as a 1896 * last resort. The legacy IPv4 ASM API requires that we do 1897 * this in order to allow groups to be joined when the routing 1898 * table has not yet been populated during boot. 1899 * 1900 * Returns NULL if no ifp could be found. 1901 * 1902 * FUTURE: Implement IPv4 source-address selection. 1903 */ 1904 static struct ifnet * 1905 inp_lookup_mcast_ifp(const struct inpcb *inp, 1906 const struct sockaddr_in *gsin, const struct in_addr ina) 1907 { 1908 struct rm_priotracker in_ifa_tracker; 1909 struct ifnet *ifp; 1910 struct nhop4_basic nh4; 1911 uint32_t fibnum; 1912 1913 KASSERT(gsin->sin_family == AF_INET, ("%s: not AF_INET", __func__)); 1914 KASSERT(IN_MULTICAST(ntohl(gsin->sin_addr.s_addr)), 1915 ("%s: not multicast", __func__)); 1916 1917 ifp = NULL; 1918 if (!in_nullhost(ina)) { 1919 IN_IFADDR_RLOCK(&in_ifa_tracker); 1920 INADDR_TO_IFP(ina, ifp); 1921 IN_IFADDR_RUNLOCK(&in_ifa_tracker); 1922 } else { 1923 fibnum = inp ? inp->inp_inc.inc_fibnum : 0; 1924 if (fib4_lookup_nh_basic(fibnum, gsin->sin_addr, 0, 0, &nh4)==0) 1925 ifp = nh4.nh_ifp; 1926 else { 1927 struct in_ifaddr *ia; 1928 struct ifnet *mifp; 1929 1930 mifp = NULL; 1931 IN_IFADDR_RLOCK(&in_ifa_tracker); 1932 CK_STAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { 1933 mifp = ia->ia_ifp; 1934 if (!(mifp->if_flags & IFF_LOOPBACK) && 1935 (mifp->if_flags & IFF_MULTICAST)) { 1936 ifp = mifp; 1937 break; 1938 } 1939 } 1940 IN_IFADDR_RUNLOCK(&in_ifa_tracker); 1941 } 1942 } 1943 1944 return (ifp); 1945 } 1946 1947 /* 1948 * Join an IPv4 multicast group, possibly with a source. 1949 */ 1950 static int 1951 inp_join_group(struct inpcb *inp, struct sockopt *sopt) 1952 { 1953 struct group_source_req gsr; 1954 sockunion_t *gsa, *ssa; 1955 struct ifnet *ifp; 1956 struct in_mfilter *imf; 1957 struct ip_moptions *imo; 1958 struct in_multi *inm; 1959 struct in_msource *lims; 1960 int error, is_new; 1961 1962 ifp = NULL; 1963 lims = NULL; 1964 error = 0; 1965 1966 memset(&gsr, 0, sizeof(struct group_source_req)); 1967 gsa = (sockunion_t *)&gsr.gsr_group; 1968 gsa->ss.ss_family = AF_UNSPEC; 1969 ssa = (sockunion_t *)&gsr.gsr_source; 1970 ssa->ss.ss_family = AF_UNSPEC; 1971 1972 switch (sopt->sopt_name) { 1973 case IP_ADD_MEMBERSHIP: { 1974 struct ip_mreqn mreqn; 1975 1976 if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) 1977 error = sooptcopyin(sopt, &mreqn, 1978 sizeof(struct ip_mreqn), sizeof(struct ip_mreqn)); 1979 else 1980 error = sooptcopyin(sopt, &mreqn, 1981 sizeof(struct ip_mreq), sizeof(struct ip_mreq)); 1982 if (error) 1983 return (error); 1984 1985 gsa->sin.sin_family = AF_INET; 1986 gsa->sin.sin_len = sizeof(struct sockaddr_in); 1987 gsa->sin.sin_addr = mreqn.imr_multiaddr; 1988 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 1989 return (EINVAL); 1990 1991 if (sopt->sopt_valsize == sizeof(struct ip_mreqn) && 1992 mreqn.imr_ifindex != 0) 1993 ifp = ifnet_byindex(mreqn.imr_ifindex); 1994 else 1995 ifp = inp_lookup_mcast_ifp(inp, &gsa->sin, 1996 mreqn.imr_address); 1997 break; 1998 } 1999 case IP_ADD_SOURCE_MEMBERSHIP: { 2000 struct ip_mreq_source mreqs; 2001 2002 error = sooptcopyin(sopt, &mreqs, sizeof(struct ip_mreq_source), 2003 sizeof(struct ip_mreq_source)); 2004 if (error) 2005 return (error); 2006 2007 gsa->sin.sin_family = ssa->sin.sin_family = AF_INET; 2008 gsa->sin.sin_len = ssa->sin.sin_len = 2009 sizeof(struct sockaddr_in); 2010 2011 gsa->sin.sin_addr = mreqs.imr_multiaddr; 2012 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2013 return (EINVAL); 2014 2015 ssa->sin.sin_addr = mreqs.imr_sourceaddr; 2016 2017 ifp = inp_lookup_mcast_ifp(inp, &gsa->sin, 2018 mreqs.imr_interface); 2019 CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p", 2020 __func__, ntohl(mreqs.imr_interface.s_addr), ifp); 2021 break; 2022 } 2023 2024 case MCAST_JOIN_GROUP: 2025 case MCAST_JOIN_SOURCE_GROUP: 2026 if (sopt->sopt_name == MCAST_JOIN_GROUP) { 2027 error = sooptcopyin(sopt, &gsr, 2028 sizeof(struct group_req), 2029 sizeof(struct group_req)); 2030 } else if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) { 2031 error = sooptcopyin(sopt, &gsr, 2032 sizeof(struct group_source_req), 2033 sizeof(struct group_source_req)); 2034 } 2035 if (error) 2036 return (error); 2037 2038 if (gsa->sin.sin_family != AF_INET || 2039 gsa->sin.sin_len != sizeof(struct sockaddr_in)) 2040 return (EINVAL); 2041 2042 /* 2043 * Overwrite the port field if present, as the sockaddr 2044 * being copied in may be matched with a binary comparison. 2045 */ 2046 gsa->sin.sin_port = 0; 2047 if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) { 2048 if (ssa->sin.sin_family != AF_INET || 2049 ssa->sin.sin_len != sizeof(struct sockaddr_in)) 2050 return (EINVAL); 2051 ssa->sin.sin_port = 0; 2052 } 2053 2054 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2055 return (EINVAL); 2056 2057 if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface) 2058 return (EADDRNOTAVAIL); 2059 ifp = ifnet_byindex(gsr.gsr_interface); 2060 break; 2061 2062 default: 2063 CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d", 2064 __func__, sopt->sopt_name); 2065 return (EOPNOTSUPP); 2066 break; 2067 } 2068 2069 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) 2070 return (EADDRNOTAVAIL); 2071 2072 IN_MULTI_LOCK(); 2073 2074 /* 2075 * Find the membership in the membership list. 2076 */ 2077 imo = inp_findmoptions(inp); 2078 imf = imo_match_group(imo, ifp, &gsa->sa); 2079 if (imf == NULL) { 2080 is_new = 1; 2081 inm = NULL; 2082 2083 if (ip_mfilter_count(&imo->imo_head) >= IP_MAX_MEMBERSHIPS) { 2084 error = ENOMEM; 2085 goto out_inp_locked; 2086 } 2087 } else { 2088 is_new = 0; 2089 inm = imf->imf_inm; 2090 2091 if (ssa->ss.ss_family != AF_UNSPEC) { 2092 /* 2093 * MCAST_JOIN_SOURCE_GROUP on an exclusive membership 2094 * is an error. On an existing inclusive membership, 2095 * it just adds the source to the filter list. 2096 */ 2097 if (imf->imf_st[1] != MCAST_INCLUDE) { 2098 error = EINVAL; 2099 goto out_inp_locked; 2100 } 2101 /* 2102 * Throw out duplicates. 2103 * 2104 * XXX FIXME: This makes a naive assumption that 2105 * even if entries exist for *ssa in this imf, 2106 * they will be rejected as dupes, even if they 2107 * are not valid in the current mode (in-mode). 2108 * 2109 * in_msource is transactioned just as for anything 2110 * else in SSM -- but note naive use of inm_graft() 2111 * below for allocating new filter entries. 2112 * 2113 * This is only an issue if someone mixes the 2114 * full-state SSM API with the delta-based API, 2115 * which is discouraged in the relevant RFCs. 2116 */ 2117 lims = imo_match_source(imf, &ssa->sa); 2118 if (lims != NULL /*&& 2119 lims->imsl_st[1] == MCAST_INCLUDE*/) { 2120 error = EADDRNOTAVAIL; 2121 goto out_inp_locked; 2122 } 2123 } else { 2124 /* 2125 * MCAST_JOIN_GROUP on an existing exclusive 2126 * membership is an error; return EADDRINUSE 2127 * to preserve 4.4BSD API idempotence, and 2128 * avoid tedious detour to code below. 2129 * NOTE: This is bending RFC 3678 a bit. 2130 * 2131 * On an existing inclusive membership, this is also 2132 * an error; if you want to change filter mode, 2133 * you must use the userland API setsourcefilter(). 2134 * XXX We don't reject this for imf in UNDEFINED 2135 * state at t1, because allocation of a filter 2136 * is atomic with allocation of a membership. 2137 */ 2138 error = EINVAL; 2139 if (imf->imf_st[1] == MCAST_EXCLUDE) 2140 error = EADDRINUSE; 2141 goto out_inp_locked; 2142 } 2143 } 2144 2145 /* 2146 * Begin state merge transaction at socket layer. 2147 */ 2148 INP_WLOCK_ASSERT(inp); 2149 2150 /* 2151 * Graft new source into filter list for this inpcb's 2152 * membership of the group. The in_multi may not have 2153 * been allocated yet if this is a new membership, however, 2154 * the in_mfilter slot will be allocated and must be initialized. 2155 * 2156 * Note: Grafting of exclusive mode filters doesn't happen 2157 * in this path. 2158 * XXX: Should check for non-NULL lims (node exists but may 2159 * not be in-mode) for interop with full-state API. 2160 */ 2161 if (ssa->ss.ss_family != AF_UNSPEC) { 2162 /* Membership starts in IN mode */ 2163 if (is_new) { 2164 CTR1(KTR_IGMPV3, "%s: new join w/source", __func__); 2165 imf = ip_mfilter_alloc(M_NOWAIT, MCAST_UNDEFINED, MCAST_INCLUDE); 2166 if (imf == NULL) { 2167 error = ENOMEM; 2168 goto out_inp_locked; 2169 } 2170 } else { 2171 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow"); 2172 } 2173 lims = imf_graft(imf, MCAST_INCLUDE, &ssa->sin); 2174 if (lims == NULL) { 2175 CTR1(KTR_IGMPV3, "%s: merge imf state failed", 2176 __func__); 2177 error = ENOMEM; 2178 goto out_inp_locked; 2179 } 2180 } else { 2181 /* No address specified; Membership starts in EX mode */ 2182 if (is_new) { 2183 CTR1(KTR_IGMPV3, "%s: new join w/o source", __func__); 2184 imf = ip_mfilter_alloc(M_NOWAIT, MCAST_UNDEFINED, MCAST_EXCLUDE); 2185 if (imf == NULL) { 2186 error = ENOMEM; 2187 goto out_inp_locked; 2188 } 2189 } 2190 } 2191 2192 /* 2193 * Begin state merge transaction at IGMP layer. 2194 */ 2195 if (is_new) { 2196 in_pcbref(inp); 2197 INP_WUNLOCK(inp); 2198 2199 error = in_joingroup_locked(ifp, &gsa->sin.sin_addr, imf, 2200 &imf->imf_inm); 2201 2202 INP_WLOCK(inp); 2203 if (in_pcbrele_wlocked(inp)) { 2204 error = ENXIO; 2205 goto out_inp_unlocked; 2206 } 2207 if (error) { 2208 CTR1(KTR_IGMPV3, "%s: in_joingroup_locked failed", 2209 __func__); 2210 goto out_inp_locked; 2211 } 2212 inm_acquire(imf->imf_inm); 2213 } else { 2214 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 2215 IN_MULTI_LIST_LOCK(); 2216 error = inm_merge(inm, imf); 2217 if (error) { 2218 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", 2219 __func__); 2220 IN_MULTI_LIST_UNLOCK(); 2221 imf_rollback(imf); 2222 imf_reap(imf); 2223 goto out_inp_locked; 2224 } 2225 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 2226 error = igmp_change_state(inm); 2227 IN_MULTI_LIST_UNLOCK(); 2228 if (error) { 2229 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", 2230 __func__); 2231 imf_rollback(imf); 2232 imf_reap(imf); 2233 goto out_inp_locked; 2234 } 2235 } 2236 if (is_new) 2237 ip_mfilter_insert(&imo->imo_head, imf); 2238 2239 imf_commit(imf); 2240 imf = NULL; 2241 2242 out_inp_locked: 2243 INP_WUNLOCK(inp); 2244 out_inp_unlocked: 2245 IN_MULTI_UNLOCK(); 2246 2247 if (is_new && imf) { 2248 if (imf->imf_inm != NULL) { 2249 IN_MULTI_LIST_LOCK(); 2250 inm_release_deferred(imf->imf_inm); 2251 IN_MULTI_LIST_UNLOCK(); 2252 } 2253 ip_mfilter_free(imf); 2254 } 2255 return (error); 2256 } 2257 2258 /* 2259 * Leave an IPv4 multicast group on an inpcb, possibly with a source. 2260 */ 2261 static int 2262 inp_leave_group(struct inpcb *inp, struct sockopt *sopt) 2263 { 2264 struct group_source_req gsr; 2265 struct ip_mreq_source mreqs; 2266 struct rm_priotracker in_ifa_tracker; 2267 sockunion_t *gsa, *ssa; 2268 struct ifnet *ifp; 2269 struct in_mfilter *imf; 2270 struct ip_moptions *imo; 2271 struct in_msource *ims; 2272 struct in_multi *inm; 2273 int error; 2274 bool is_final; 2275 2276 ifp = NULL; 2277 error = 0; 2278 is_final = true; 2279 2280 memset(&gsr, 0, sizeof(struct group_source_req)); 2281 gsa = (sockunion_t *)&gsr.gsr_group; 2282 gsa->ss.ss_family = AF_UNSPEC; 2283 ssa = (sockunion_t *)&gsr.gsr_source; 2284 ssa->ss.ss_family = AF_UNSPEC; 2285 2286 switch (sopt->sopt_name) { 2287 case IP_DROP_MEMBERSHIP: 2288 case IP_DROP_SOURCE_MEMBERSHIP: 2289 if (sopt->sopt_name == IP_DROP_MEMBERSHIP) { 2290 error = sooptcopyin(sopt, &mreqs, 2291 sizeof(struct ip_mreq), 2292 sizeof(struct ip_mreq)); 2293 /* 2294 * Swap interface and sourceaddr arguments, 2295 * as ip_mreq and ip_mreq_source are laid 2296 * out differently. 2297 */ 2298 mreqs.imr_interface = mreqs.imr_sourceaddr; 2299 mreqs.imr_sourceaddr.s_addr = INADDR_ANY; 2300 } else if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) { 2301 error = sooptcopyin(sopt, &mreqs, 2302 sizeof(struct ip_mreq_source), 2303 sizeof(struct ip_mreq_source)); 2304 } 2305 if (error) 2306 return (error); 2307 2308 gsa->sin.sin_family = AF_INET; 2309 gsa->sin.sin_len = sizeof(struct sockaddr_in); 2310 gsa->sin.sin_addr = mreqs.imr_multiaddr; 2311 2312 if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) { 2313 ssa->sin.sin_family = AF_INET; 2314 ssa->sin.sin_len = sizeof(struct sockaddr_in); 2315 ssa->sin.sin_addr = mreqs.imr_sourceaddr; 2316 } 2317 2318 /* 2319 * Attempt to look up hinted ifp from interface address. 2320 * Fallthrough with null ifp iff lookup fails, to 2321 * preserve 4.4BSD mcast API idempotence. 2322 * XXX NOTE WELL: The RFC 3678 API is preferred because 2323 * using an IPv4 address as a key is racy. 2324 */ 2325 if (!in_nullhost(mreqs.imr_interface)) { 2326 IN_IFADDR_RLOCK(&in_ifa_tracker); 2327 INADDR_TO_IFP(mreqs.imr_interface, ifp); 2328 IN_IFADDR_RUNLOCK(&in_ifa_tracker); 2329 } 2330 CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p", 2331 __func__, ntohl(mreqs.imr_interface.s_addr), ifp); 2332 2333 break; 2334 2335 case MCAST_LEAVE_GROUP: 2336 case MCAST_LEAVE_SOURCE_GROUP: 2337 if (sopt->sopt_name == MCAST_LEAVE_GROUP) { 2338 error = sooptcopyin(sopt, &gsr, 2339 sizeof(struct group_req), 2340 sizeof(struct group_req)); 2341 } else if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) { 2342 error = sooptcopyin(sopt, &gsr, 2343 sizeof(struct group_source_req), 2344 sizeof(struct group_source_req)); 2345 } 2346 if (error) 2347 return (error); 2348 2349 if (gsa->sin.sin_family != AF_INET || 2350 gsa->sin.sin_len != sizeof(struct sockaddr_in)) 2351 return (EINVAL); 2352 2353 if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) { 2354 if (ssa->sin.sin_family != AF_INET || 2355 ssa->sin.sin_len != sizeof(struct sockaddr_in)) 2356 return (EINVAL); 2357 } 2358 2359 if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface) 2360 return (EADDRNOTAVAIL); 2361 2362 ifp = ifnet_byindex(gsr.gsr_interface); 2363 2364 if (ifp == NULL) 2365 return (EADDRNOTAVAIL); 2366 break; 2367 2368 default: 2369 CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d", 2370 __func__, sopt->sopt_name); 2371 return (EOPNOTSUPP); 2372 break; 2373 } 2374 2375 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2376 return (EINVAL); 2377 2378 IN_MULTI_LOCK(); 2379 2380 /* 2381 * Find the membership in the membership list. 2382 */ 2383 imo = inp_findmoptions(inp); 2384 imf = imo_match_group(imo, ifp, &gsa->sa); 2385 if (imf == NULL) { 2386 error = EADDRNOTAVAIL; 2387 goto out_inp_locked; 2388 } 2389 inm = imf->imf_inm; 2390 2391 if (ssa->ss.ss_family != AF_UNSPEC) 2392 is_final = false; 2393 2394 /* 2395 * Begin state merge transaction at socket layer. 2396 */ 2397 INP_WLOCK_ASSERT(inp); 2398 2399 /* 2400 * If we were instructed only to leave a given source, do so. 2401 * MCAST_LEAVE_SOURCE_GROUP is only valid for inclusive memberships. 2402 */ 2403 if (is_final) { 2404 ip_mfilter_remove(&imo->imo_head, imf); 2405 imf_leave(imf); 2406 } else { 2407 if (imf->imf_st[0] == MCAST_EXCLUDE) { 2408 error = EADDRNOTAVAIL; 2409 goto out_inp_locked; 2410 } 2411 ims = imo_match_source(imf, &ssa->sa); 2412 if (ims == NULL) { 2413 CTR3(KTR_IGMPV3, "%s: source 0x%08x %spresent", 2414 __func__, ntohl(ssa->sin.sin_addr.s_addr), "not "); 2415 error = EADDRNOTAVAIL; 2416 goto out_inp_locked; 2417 } 2418 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block"); 2419 error = imf_prune(imf, &ssa->sin); 2420 if (error) { 2421 CTR1(KTR_IGMPV3, "%s: merge imf state failed", 2422 __func__); 2423 goto out_inp_locked; 2424 } 2425 } 2426 2427 /* 2428 * Begin state merge transaction at IGMP layer. 2429 */ 2430 if (!is_final) { 2431 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 2432 IN_MULTI_LIST_LOCK(); 2433 error = inm_merge(inm, imf); 2434 if (error) { 2435 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", 2436 __func__); 2437 IN_MULTI_LIST_UNLOCK(); 2438 imf_rollback(imf); 2439 imf_reap(imf); 2440 goto out_inp_locked; 2441 } 2442 2443 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 2444 error = igmp_change_state(inm); 2445 IN_MULTI_LIST_UNLOCK(); 2446 if (error) { 2447 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", 2448 __func__); 2449 imf_rollback(imf); 2450 imf_reap(imf); 2451 goto out_inp_locked; 2452 } 2453 } 2454 imf_commit(imf); 2455 imf_reap(imf); 2456 2457 out_inp_locked: 2458 INP_WUNLOCK(inp); 2459 2460 if (is_final && imf) { 2461 /* 2462 * Give up the multicast address record to which 2463 * the membership points. 2464 */ 2465 (void) in_leavegroup_locked(imf->imf_inm, imf); 2466 ip_mfilter_free(imf); 2467 } 2468 2469 IN_MULTI_UNLOCK(); 2470 return (error); 2471 } 2472 2473 /* 2474 * Select the interface for transmitting IPv4 multicast datagrams. 2475 * 2476 * Either an instance of struct in_addr or an instance of struct ip_mreqn 2477 * may be passed to this socket option. An address of INADDR_ANY or an 2478 * interface index of 0 is used to remove a previous selection. 2479 * When no interface is selected, one is chosen for every send. 2480 */ 2481 static int 2482 inp_set_multicast_if(struct inpcb *inp, struct sockopt *sopt) 2483 { 2484 struct rm_priotracker in_ifa_tracker; 2485 struct in_addr addr; 2486 struct ip_mreqn mreqn; 2487 struct ifnet *ifp; 2488 struct ip_moptions *imo; 2489 int error; 2490 2491 if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) { 2492 /* 2493 * An interface index was specified using the 2494 * Linux-derived ip_mreqn structure. 2495 */ 2496 error = sooptcopyin(sopt, &mreqn, sizeof(struct ip_mreqn), 2497 sizeof(struct ip_mreqn)); 2498 if (error) 2499 return (error); 2500 2501 if (mreqn.imr_ifindex < 0 || V_if_index < mreqn.imr_ifindex) 2502 return (EINVAL); 2503 2504 if (mreqn.imr_ifindex == 0) { 2505 ifp = NULL; 2506 } else { 2507 ifp = ifnet_byindex(mreqn.imr_ifindex); 2508 if (ifp == NULL) 2509 return (EADDRNOTAVAIL); 2510 } 2511 } else { 2512 /* 2513 * An interface was specified by IPv4 address. 2514 * This is the traditional BSD usage. 2515 */ 2516 error = sooptcopyin(sopt, &addr, sizeof(struct in_addr), 2517 sizeof(struct in_addr)); 2518 if (error) 2519 return (error); 2520 if (in_nullhost(addr)) { 2521 ifp = NULL; 2522 } else { 2523 IN_IFADDR_RLOCK(&in_ifa_tracker); 2524 INADDR_TO_IFP(addr, ifp); 2525 IN_IFADDR_RUNLOCK(&in_ifa_tracker); 2526 if (ifp == NULL) 2527 return (EADDRNOTAVAIL); 2528 } 2529 CTR3(KTR_IGMPV3, "%s: ifp = %p, addr = 0x%08x", __func__, ifp, 2530 ntohl(addr.s_addr)); 2531 } 2532 2533 /* Reject interfaces which do not support multicast. */ 2534 if (ifp != NULL && (ifp->if_flags & IFF_MULTICAST) == 0) 2535 return (EOPNOTSUPP); 2536 2537 imo = inp_findmoptions(inp); 2538 imo->imo_multicast_ifp = ifp; 2539 imo->imo_multicast_addr.s_addr = INADDR_ANY; 2540 INP_WUNLOCK(inp); 2541 2542 return (0); 2543 } 2544 2545 /* 2546 * Atomically set source filters on a socket for an IPv4 multicast group. 2547 * 2548 * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held. 2549 */ 2550 static int 2551 inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt) 2552 { 2553 struct __msfilterreq msfr; 2554 sockunion_t *gsa; 2555 struct ifnet *ifp; 2556 struct in_mfilter *imf; 2557 struct ip_moptions *imo; 2558 struct in_multi *inm; 2559 int error; 2560 2561 error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq), 2562 sizeof(struct __msfilterreq)); 2563 if (error) 2564 return (error); 2565 2566 if (msfr.msfr_nsrcs > in_mcast_maxsocksrc) 2567 return (ENOBUFS); 2568 2569 if ((msfr.msfr_fmode != MCAST_EXCLUDE && 2570 msfr.msfr_fmode != MCAST_INCLUDE)) 2571 return (EINVAL); 2572 2573 if (msfr.msfr_group.ss_family != AF_INET || 2574 msfr.msfr_group.ss_len != sizeof(struct sockaddr_in)) 2575 return (EINVAL); 2576 2577 gsa = (sockunion_t *)&msfr.msfr_group; 2578 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2579 return (EINVAL); 2580 2581 gsa->sin.sin_port = 0; /* ignore port */ 2582 2583 if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex) 2584 return (EADDRNOTAVAIL); 2585 2586 ifp = ifnet_byindex(msfr.msfr_ifindex); 2587 if (ifp == NULL) 2588 return (EADDRNOTAVAIL); 2589 2590 IN_MULTI_LOCK(); 2591 2592 /* 2593 * Take the INP write lock. 2594 * Check if this socket is a member of this group. 2595 */ 2596 imo = inp_findmoptions(inp); 2597 imf = imo_match_group(imo, ifp, &gsa->sa); 2598 if (imf == NULL) { 2599 error = EADDRNOTAVAIL; 2600 goto out_inp_locked; 2601 } 2602 inm = imf->imf_inm; 2603 2604 /* 2605 * Begin state merge transaction at socket layer. 2606 */ 2607 INP_WLOCK_ASSERT(inp); 2608 2609 imf->imf_st[1] = msfr.msfr_fmode; 2610 2611 /* 2612 * Apply any new source filters, if present. 2613 * Make a copy of the user-space source vector so 2614 * that we may copy them with a single copyin. This 2615 * allows us to deal with page faults up-front. 2616 */ 2617 if (msfr.msfr_nsrcs > 0) { 2618 struct in_msource *lims; 2619 struct sockaddr_in *psin; 2620 struct sockaddr_storage *kss, *pkss; 2621 int i; 2622 2623 INP_WUNLOCK(inp); 2624 2625 CTR2(KTR_IGMPV3, "%s: loading %lu source list entries", 2626 __func__, (unsigned long)msfr.msfr_nsrcs); 2627 kss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs, 2628 M_TEMP, M_WAITOK); 2629 error = copyin(msfr.msfr_srcs, kss, 2630 sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs); 2631 if (error) { 2632 free(kss, M_TEMP); 2633 return (error); 2634 } 2635 2636 INP_WLOCK(inp); 2637 2638 /* 2639 * Mark all source filters as UNDEFINED at t1. 2640 * Restore new group filter mode, as imf_leave() 2641 * will set it to INCLUDE. 2642 */ 2643 imf_leave(imf); 2644 imf->imf_st[1] = msfr.msfr_fmode; 2645 2646 /* 2647 * Update socket layer filters at t1, lazy-allocating 2648 * new entries. This saves a bunch of memory at the 2649 * cost of one RB_FIND() per source entry; duplicate 2650 * entries in the msfr_nsrcs vector are ignored. 2651 * If we encounter an error, rollback transaction. 2652 * 2653 * XXX This too could be replaced with a set-symmetric 2654 * difference like loop to avoid walking from root 2655 * every time, as the key space is common. 2656 */ 2657 for (i = 0, pkss = kss; i < msfr.msfr_nsrcs; i++, pkss++) { 2658 psin = (struct sockaddr_in *)pkss; 2659 if (psin->sin_family != AF_INET) { 2660 error = EAFNOSUPPORT; 2661 break; 2662 } 2663 if (psin->sin_len != sizeof(struct sockaddr_in)) { 2664 error = EINVAL; 2665 break; 2666 } 2667 error = imf_get_source(imf, psin, &lims); 2668 if (error) 2669 break; 2670 lims->imsl_st[1] = imf->imf_st[1]; 2671 } 2672 free(kss, M_TEMP); 2673 } 2674 2675 if (error) 2676 goto out_imf_rollback; 2677 2678 INP_WLOCK_ASSERT(inp); 2679 2680 /* 2681 * Begin state merge transaction at IGMP layer. 2682 */ 2683 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 2684 IN_MULTI_LIST_LOCK(); 2685 error = inm_merge(inm, imf); 2686 if (error) { 2687 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__); 2688 IN_MULTI_LIST_UNLOCK(); 2689 goto out_imf_rollback; 2690 } 2691 2692 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 2693 error = igmp_change_state(inm); 2694 IN_MULTI_LIST_UNLOCK(); 2695 if (error) 2696 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__); 2697 2698 out_imf_rollback: 2699 if (error) 2700 imf_rollback(imf); 2701 else 2702 imf_commit(imf); 2703 2704 imf_reap(imf); 2705 2706 out_inp_locked: 2707 INP_WUNLOCK(inp); 2708 IN_MULTI_UNLOCK(); 2709 return (error); 2710 } 2711 2712 /* 2713 * Set the IP multicast options in response to user setsockopt(). 2714 * 2715 * Many of the socket options handled in this function duplicate the 2716 * functionality of socket options in the regular unicast API. However, 2717 * it is not possible to merge the duplicate code, because the idempotence 2718 * of the IPv4 multicast part of the BSD Sockets API must be preserved; 2719 * the effects of these options must be treated as separate and distinct. 2720 * 2721 * SMPng: XXX: Unlocked read of inp_socket believed OK. 2722 * FUTURE: The IP_MULTICAST_VIF option may be eliminated if MROUTING 2723 * is refactored to no longer use vifs. 2724 */ 2725 int 2726 inp_setmoptions(struct inpcb *inp, struct sockopt *sopt) 2727 { 2728 struct ip_moptions *imo; 2729 int error; 2730 2731 error = 0; 2732 2733 /* 2734 * If socket is neither of type SOCK_RAW or SOCK_DGRAM, 2735 * or is a divert socket, reject it. 2736 */ 2737 if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT || 2738 (inp->inp_socket->so_proto->pr_type != SOCK_RAW && 2739 inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) 2740 return (EOPNOTSUPP); 2741 2742 switch (sopt->sopt_name) { 2743 case IP_MULTICAST_VIF: { 2744 int vifi; 2745 /* 2746 * Select a multicast VIF for transmission. 2747 * Only useful if multicast forwarding is active. 2748 */ 2749 if (legal_vif_num == NULL) { 2750 error = EOPNOTSUPP; 2751 break; 2752 } 2753 error = sooptcopyin(sopt, &vifi, sizeof(int), sizeof(int)); 2754 if (error) 2755 break; 2756 if (!legal_vif_num(vifi) && (vifi != -1)) { 2757 error = EINVAL; 2758 break; 2759 } 2760 imo = inp_findmoptions(inp); 2761 imo->imo_multicast_vif = vifi; 2762 INP_WUNLOCK(inp); 2763 break; 2764 } 2765 2766 case IP_MULTICAST_IF: 2767 error = inp_set_multicast_if(inp, sopt); 2768 break; 2769 2770 case IP_MULTICAST_TTL: { 2771 u_char ttl; 2772 2773 /* 2774 * Set the IP time-to-live for outgoing multicast packets. 2775 * The original multicast API required a char argument, 2776 * which is inconsistent with the rest of the socket API. 2777 * We allow either a char or an int. 2778 */ 2779 if (sopt->sopt_valsize == sizeof(u_char)) { 2780 error = sooptcopyin(sopt, &ttl, sizeof(u_char), 2781 sizeof(u_char)); 2782 if (error) 2783 break; 2784 } else { 2785 u_int ittl; 2786 2787 error = sooptcopyin(sopt, &ittl, sizeof(u_int), 2788 sizeof(u_int)); 2789 if (error) 2790 break; 2791 if (ittl > 255) { 2792 error = EINVAL; 2793 break; 2794 } 2795 ttl = (u_char)ittl; 2796 } 2797 imo = inp_findmoptions(inp); 2798 imo->imo_multicast_ttl = ttl; 2799 INP_WUNLOCK(inp); 2800 break; 2801 } 2802 2803 case IP_MULTICAST_LOOP: { 2804 u_char loop; 2805 2806 /* 2807 * Set the loopback flag for outgoing multicast packets. 2808 * Must be zero or one. The original multicast API required a 2809 * char argument, which is inconsistent with the rest 2810 * of the socket API. We allow either a char or an int. 2811 */ 2812 if (sopt->sopt_valsize == sizeof(u_char)) { 2813 error = sooptcopyin(sopt, &loop, sizeof(u_char), 2814 sizeof(u_char)); 2815 if (error) 2816 break; 2817 } else { 2818 u_int iloop; 2819 2820 error = sooptcopyin(sopt, &iloop, sizeof(u_int), 2821 sizeof(u_int)); 2822 if (error) 2823 break; 2824 loop = (u_char)iloop; 2825 } 2826 imo = inp_findmoptions(inp); 2827 imo->imo_multicast_loop = !!loop; 2828 INP_WUNLOCK(inp); 2829 break; 2830 } 2831 2832 case IP_ADD_MEMBERSHIP: 2833 case IP_ADD_SOURCE_MEMBERSHIP: 2834 case MCAST_JOIN_GROUP: 2835 case MCAST_JOIN_SOURCE_GROUP: 2836 error = inp_join_group(inp, sopt); 2837 break; 2838 2839 case IP_DROP_MEMBERSHIP: 2840 case IP_DROP_SOURCE_MEMBERSHIP: 2841 case MCAST_LEAVE_GROUP: 2842 case MCAST_LEAVE_SOURCE_GROUP: 2843 error = inp_leave_group(inp, sopt); 2844 break; 2845 2846 case IP_BLOCK_SOURCE: 2847 case IP_UNBLOCK_SOURCE: 2848 case MCAST_BLOCK_SOURCE: 2849 case MCAST_UNBLOCK_SOURCE: 2850 error = inp_block_unblock_source(inp, sopt); 2851 break; 2852 2853 case IP_MSFILTER: 2854 error = inp_set_source_filters(inp, sopt); 2855 break; 2856 2857 default: 2858 error = EOPNOTSUPP; 2859 break; 2860 } 2861 2862 INP_UNLOCK_ASSERT(inp); 2863 2864 return (error); 2865 } 2866 2867 /* 2868 * Expose IGMP's multicast filter mode and source list(s) to userland, 2869 * keyed by (ifindex, group). 2870 * The filter mode is written out as a uint32_t, followed by 2871 * 0..n of struct in_addr. 2872 * For use by ifmcstat(8). 2873 * SMPng: NOTE: unlocked read of ifindex space. 2874 */ 2875 static int 2876 sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS) 2877 { 2878 struct in_addr src, group; 2879 struct epoch_tracker et; 2880 struct ifnet *ifp; 2881 struct ifmultiaddr *ifma; 2882 struct in_multi *inm; 2883 struct ip_msource *ims; 2884 int *name; 2885 int retval; 2886 u_int namelen; 2887 uint32_t fmode, ifindex; 2888 2889 name = (int *)arg1; 2890 namelen = arg2; 2891 2892 if (req->newptr != NULL) 2893 return (EPERM); 2894 2895 if (namelen != 2) 2896 return (EINVAL); 2897 2898 ifindex = name[0]; 2899 if (ifindex <= 0 || ifindex > V_if_index) { 2900 CTR2(KTR_IGMPV3, "%s: ifindex %u out of range", 2901 __func__, ifindex); 2902 return (ENOENT); 2903 } 2904 2905 group.s_addr = name[1]; 2906 if (!IN_MULTICAST(ntohl(group.s_addr))) { 2907 CTR2(KTR_IGMPV3, "%s: group 0x%08x is not multicast", 2908 __func__, ntohl(group.s_addr)); 2909 return (EINVAL); 2910 } 2911 2912 NET_EPOCH_ENTER(et); 2913 ifp = ifnet_byindex(ifindex); 2914 if (ifp == NULL) { 2915 NET_EPOCH_EXIT(et); 2916 CTR2(KTR_IGMPV3, "%s: no ifp for ifindex %u", 2917 __func__, ifindex); 2918 return (ENOENT); 2919 } 2920 2921 retval = sysctl_wire_old_buffer(req, 2922 sizeof(uint32_t) + (in_mcast_maxgrpsrc * sizeof(struct in_addr))); 2923 if (retval) { 2924 NET_EPOCH_EXIT(et); 2925 return (retval); 2926 } 2927 2928 IN_MULTI_LIST_LOCK(); 2929 2930 CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 2931 if (ifma->ifma_addr->sa_family != AF_INET || 2932 ifma->ifma_protospec == NULL) 2933 continue; 2934 inm = (struct in_multi *)ifma->ifma_protospec; 2935 if (!in_hosteq(inm->inm_addr, group)) 2936 continue; 2937 fmode = inm->inm_st[1].iss_fmode; 2938 retval = SYSCTL_OUT(req, &fmode, sizeof(uint32_t)); 2939 if (retval != 0) 2940 break; 2941 RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) { 2942 CTR2(KTR_IGMPV3, "%s: visit node 0x%08x", __func__, 2943 ims->ims_haddr); 2944 /* 2945 * Only copy-out sources which are in-mode. 2946 */ 2947 if (fmode != ims_get_mode(inm, ims, 1)) { 2948 CTR1(KTR_IGMPV3, "%s: skip non-in-mode", 2949 __func__); 2950 continue; 2951 } 2952 src.s_addr = htonl(ims->ims_haddr); 2953 retval = SYSCTL_OUT(req, &src, sizeof(struct in_addr)); 2954 if (retval != 0) 2955 break; 2956 } 2957 } 2958 2959 IN_MULTI_LIST_UNLOCK(); 2960 NET_EPOCH_EXIT(et); 2961 2962 return (retval); 2963 } 2964 2965 #if defined(KTR) && (KTR_COMPILE & KTR_IGMPV3) 2966 2967 static const char *inm_modestrs[] = { 2968 [MCAST_UNDEFINED] = "un", 2969 [MCAST_INCLUDE] = "in", 2970 [MCAST_EXCLUDE] = "ex", 2971 }; 2972 _Static_assert(MCAST_UNDEFINED == 0 && 2973 MCAST_EXCLUDE + 1 == nitems(inm_modestrs), 2974 "inm_modestrs: no longer matches #defines"); 2975 2976 static const char * 2977 inm_mode_str(const int mode) 2978 { 2979 2980 if (mode >= MCAST_UNDEFINED && mode <= MCAST_EXCLUDE) 2981 return (inm_modestrs[mode]); 2982 return ("??"); 2983 } 2984 2985 static const char *inm_statestrs[] = { 2986 [IGMP_NOT_MEMBER] = "not-member", 2987 [IGMP_SILENT_MEMBER] = "silent", 2988 [IGMP_REPORTING_MEMBER] = "reporting", 2989 [IGMP_IDLE_MEMBER] = "idle", 2990 [IGMP_LAZY_MEMBER] = "lazy", 2991 [IGMP_SLEEPING_MEMBER] = "sleeping", 2992 [IGMP_AWAKENING_MEMBER] = "awakening", 2993 [IGMP_G_QUERY_PENDING_MEMBER] = "query-pending", 2994 [IGMP_SG_QUERY_PENDING_MEMBER] = "sg-query-pending", 2995 [IGMP_LEAVING_MEMBER] = "leaving", 2996 }; 2997 _Static_assert(IGMP_NOT_MEMBER == 0 && 2998 IGMP_LEAVING_MEMBER + 1 == nitems(inm_statestrs), 2999 "inm_statetrs: no longer matches #defines"); 3000 3001 static const char * 3002 inm_state_str(const int state) 3003 { 3004 3005 if (state >= IGMP_NOT_MEMBER && state <= IGMP_LEAVING_MEMBER) 3006 return (inm_statestrs[state]); 3007 return ("??"); 3008 } 3009 3010 /* 3011 * Dump an in_multi structure to the console. 3012 */ 3013 void 3014 inm_print(const struct in_multi *inm) 3015 { 3016 int t; 3017 char addrbuf[INET_ADDRSTRLEN]; 3018 3019 if ((ktr_mask & KTR_IGMPV3) == 0) 3020 return; 3021 3022 printf("%s: --- begin inm %p ---\n", __func__, inm); 3023 printf("addr %s ifp %p(%s) ifma %p\n", 3024 inet_ntoa_r(inm->inm_addr, addrbuf), 3025 inm->inm_ifp, 3026 inm->inm_ifp->if_xname, 3027 inm->inm_ifma); 3028 printf("timer %u state %s refcount %u scq.len %u\n", 3029 inm->inm_timer, 3030 inm_state_str(inm->inm_state), 3031 inm->inm_refcount, 3032 inm->inm_scq.mq_len); 3033 printf("igi %p nsrc %lu sctimer %u scrv %u\n", 3034 inm->inm_igi, 3035 inm->inm_nsrc, 3036 inm->inm_sctimer, 3037 inm->inm_scrv); 3038 for (t = 0; t < 2; t++) { 3039 printf("t%d: fmode %s asm %u ex %u in %u rec %u\n", t, 3040 inm_mode_str(inm->inm_st[t].iss_fmode), 3041 inm->inm_st[t].iss_asm, 3042 inm->inm_st[t].iss_ex, 3043 inm->inm_st[t].iss_in, 3044 inm->inm_st[t].iss_rec); 3045 } 3046 printf("%s: --- end inm %p ---\n", __func__, inm); 3047 } 3048 3049 #else /* !KTR || !(KTR_COMPILE & KTR_IGMPV3) */ 3050 3051 void 3052 inm_print(const struct in_multi *inm) 3053 { 3054 3055 } 3056 3057 #endif /* KTR && (KTR_COMPILE & KTR_IGMPV3) */ 3058 3059 RB_GENERATE(ip_msource_tree, ip_msource, ims_link, ip_msource_cmp); 3060