1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2007-2009 Bruce Simpson. 5 * Copyright (c) 2005 Robert N. M. Watson. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. The name of the author may not be used to endorse or promote 17 * products derived from this software without specific prior written 18 * permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 /* 34 * IPv4 multicast socket, group, and socket option processing module. 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/kernel.h> 43 #include <sys/lock.h> 44 #include <sys/malloc.h> 45 #include <sys/mbuf.h> 46 #include <sys/protosw.h> 47 #include <sys/rmlock.h> 48 #include <sys/socket.h> 49 #include <sys/socketvar.h> 50 #include <sys/protosw.h> 51 #include <sys/sysctl.h> 52 #include <sys/ktr.h> 53 #include <sys/taskqueue.h> 54 #include <sys/gtaskqueue.h> 55 #include <sys/tree.h> 56 57 #include <net/if.h> 58 #include <net/if_var.h> 59 #include <net/if_dl.h> 60 #include <net/route.h> 61 #include <net/vnet.h> 62 63 #include <net/ethernet.h> 64 65 #include <netinet/in.h> 66 #include <netinet/in_systm.h> 67 #include <netinet/in_fib.h> 68 #include <netinet/in_pcb.h> 69 #include <netinet/in_var.h> 70 #include <netinet/ip_var.h> 71 #include <netinet/igmp_var.h> 72 73 #ifndef KTR_IGMPV3 74 #define KTR_IGMPV3 KTR_INET 75 #endif 76 77 #ifndef __SOCKUNION_DECLARED 78 union sockunion { 79 struct sockaddr_storage ss; 80 struct sockaddr sa; 81 struct sockaddr_dl sdl; 82 struct sockaddr_in sin; 83 }; 84 typedef union sockunion sockunion_t; 85 #define __SOCKUNION_DECLARED 86 #endif /* __SOCKUNION_DECLARED */ 87 88 static MALLOC_DEFINE(M_INMFILTER, "in_mfilter", 89 "IPv4 multicast PCB-layer source filter"); 90 static MALLOC_DEFINE(M_IPMADDR, "in_multi", "IPv4 multicast group"); 91 static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "IPv4 multicast options"); 92 static MALLOC_DEFINE(M_IPMSOURCE, "ip_msource", 93 "IPv4 multicast IGMP-layer source filter"); 94 95 /* 96 * Locking: 97 * - Lock order is: Giant, INP_WLOCK, IN_MULTI_LIST_LOCK, IGMP_LOCK, IF_ADDR_LOCK. 98 * - The IF_ADDR_LOCK is implicitly taken by inm_lookup() earlier, however 99 * it can be taken by code in net/if.c also. 100 * - ip_moptions and in_mfilter are covered by the INP_WLOCK. 101 * 102 * struct in_multi is covered by IN_MULTI_LIST_LOCK. There isn't strictly 103 * any need for in_multi itself to be virtualized -- it is bound to an ifp 104 * anyway no matter what happens. 105 */ 106 struct mtx in_multi_list_mtx; 107 MTX_SYSINIT(in_multi_mtx, &in_multi_list_mtx, "in_multi_list_mtx", MTX_DEF); 108 109 struct mtx in_multi_free_mtx; 110 MTX_SYSINIT(in_multi_free_mtx, &in_multi_free_mtx, "in_multi_free_mtx", MTX_DEF); 111 112 struct sx in_multi_sx; 113 SX_SYSINIT(in_multi_sx, &in_multi_sx, "in_multi_sx"); 114 115 int ifma_restart; 116 117 /* 118 * Functions with non-static linkage defined in this file should be 119 * declared in in_var.h: 120 * imo_multi_filter() 121 * in_addmulti() 122 * in_delmulti() 123 * in_joingroup() 124 * in_joingroup_locked() 125 * in_leavegroup() 126 * in_leavegroup_locked() 127 * and ip_var.h: 128 * inp_freemoptions() 129 * inp_getmoptions() 130 * inp_setmoptions() 131 * 132 * XXX: Both carp and pf need to use the legacy (*,G) KPIs in_addmulti() 133 * and in_delmulti(). 134 */ 135 static void imf_commit(struct in_mfilter *); 136 static int imf_get_source(struct in_mfilter *imf, 137 const struct sockaddr_in *psin, 138 struct in_msource **); 139 static struct in_msource * 140 imf_graft(struct in_mfilter *, const uint8_t, 141 const struct sockaddr_in *); 142 static void imf_leave(struct in_mfilter *); 143 static int imf_prune(struct in_mfilter *, const struct sockaddr_in *); 144 static void imf_purge(struct in_mfilter *); 145 static void imf_rollback(struct in_mfilter *); 146 static void imf_reap(struct in_mfilter *); 147 static int imo_grow(struct ip_moptions *); 148 static size_t imo_match_group(const struct ip_moptions *, 149 const struct ifnet *, const struct sockaddr *); 150 static struct in_msource * 151 imo_match_source(const struct ip_moptions *, const size_t, 152 const struct sockaddr *); 153 static void ims_merge(struct ip_msource *ims, 154 const struct in_msource *lims, const int rollback); 155 static int in_getmulti(struct ifnet *, const struct in_addr *, 156 struct in_multi **); 157 static int inm_get_source(struct in_multi *inm, const in_addr_t haddr, 158 const int noalloc, struct ip_msource **pims); 159 #ifdef KTR 160 static int inm_is_ifp_detached(const struct in_multi *); 161 #endif 162 static int inm_merge(struct in_multi *, /*const*/ struct in_mfilter *); 163 static void inm_purge(struct in_multi *); 164 static void inm_reap(struct in_multi *); 165 static void inm_release(struct in_multi *); 166 static struct ip_moptions * 167 inp_findmoptions(struct inpcb *); 168 static int inp_get_source_filters(struct inpcb *, struct sockopt *); 169 static int inp_join_group(struct inpcb *, struct sockopt *); 170 static int inp_leave_group(struct inpcb *, struct sockopt *); 171 static struct ifnet * 172 inp_lookup_mcast_ifp(const struct inpcb *, 173 const struct sockaddr_in *, const struct in_addr); 174 static int inp_block_unblock_source(struct inpcb *, struct sockopt *); 175 static int inp_set_multicast_if(struct inpcb *, struct sockopt *); 176 static int inp_set_source_filters(struct inpcb *, struct sockopt *); 177 static int sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS); 178 179 static SYSCTL_NODE(_net_inet_ip, OID_AUTO, mcast, CTLFLAG_RW, 0, 180 "IPv4 multicast"); 181 182 static u_long in_mcast_maxgrpsrc = IP_MAX_GROUP_SRC_FILTER; 183 SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxgrpsrc, 184 CTLFLAG_RWTUN, &in_mcast_maxgrpsrc, 0, 185 "Max source filters per group"); 186 187 static u_long in_mcast_maxsocksrc = IP_MAX_SOCK_SRC_FILTER; 188 SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxsocksrc, 189 CTLFLAG_RWTUN, &in_mcast_maxsocksrc, 0, 190 "Max source filters per socket"); 191 192 int in_mcast_loop = IP_DEFAULT_MULTICAST_LOOP; 193 SYSCTL_INT(_net_inet_ip_mcast, OID_AUTO, loop, CTLFLAG_RWTUN, 194 &in_mcast_loop, 0, "Loopback multicast datagrams by default"); 195 196 static SYSCTL_NODE(_net_inet_ip_mcast, OID_AUTO, filters, 197 CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_ip_mcast_filters, 198 "Per-interface stack-wide source filters"); 199 200 #ifdef KTR 201 /* 202 * Inline function which wraps assertions for a valid ifp. 203 * The ifnet layer will set the ifma's ifp pointer to NULL if the ifp 204 * is detached. 205 */ 206 static int __inline 207 inm_is_ifp_detached(const struct in_multi *inm) 208 { 209 struct ifnet *ifp; 210 211 KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__)); 212 ifp = inm->inm_ifma->ifma_ifp; 213 if (ifp != NULL) { 214 /* 215 * Sanity check that netinet's notion of ifp is the 216 * same as net's. 217 */ 218 KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__)); 219 } 220 221 return (ifp == NULL); 222 } 223 #endif 224 225 static struct grouptask free_gtask; 226 static struct in_multi_head inm_free_list; 227 static void inm_release_task(void *arg __unused); 228 static void inm_init(void) 229 { 230 SLIST_INIT(&inm_free_list); 231 taskqgroup_config_gtask_init(NULL, &free_gtask, inm_release_task, "inm release task"); 232 } 233 234 #ifdef EARLY_AP_STARTUP 235 SYSINIT(inm_init, SI_SUB_SMP + 1, SI_ORDER_FIRST, 236 inm_init, NULL); 237 #else 238 SYSINIT(inm_init, SI_SUB_ROOT_CONF - 1, SI_ORDER_FIRST, 239 inm_init, NULL); 240 #endif 241 242 243 void 244 inm_release_list_deferred(struct in_multi_head *inmh) 245 { 246 247 if (SLIST_EMPTY(inmh)) 248 return; 249 mtx_lock(&in_multi_free_mtx); 250 SLIST_CONCAT(&inm_free_list, inmh, in_multi, inm_nrele); 251 mtx_unlock(&in_multi_free_mtx); 252 GROUPTASK_ENQUEUE(&free_gtask); 253 } 254 255 void 256 inm_disconnect(struct in_multi *inm) 257 { 258 struct ifnet *ifp; 259 struct ifmultiaddr *ifma, *ll_ifma; 260 261 ifp = inm->inm_ifp; 262 IF_ADDR_WLOCK_ASSERT(ifp); 263 ifma = inm->inm_ifma; 264 265 if_ref(ifp); 266 CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifmultiaddr, ifma_link); 267 MCDPRINTF("removed ifma: %p from %s\n", ifma, ifp->if_xname); 268 if ((ll_ifma = ifma->ifma_llifma) != NULL) { 269 MPASS(ifma != ll_ifma); 270 ifma->ifma_llifma = NULL; 271 MPASS(ll_ifma->ifma_llifma == NULL); 272 MPASS(ll_ifma->ifma_ifp == ifp); 273 if (--ll_ifma->ifma_refcount == 0) { 274 CK_STAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma, ifmultiaddr, ifma_link); 275 MCDPRINTF("removed ll_ifma: %p from %s\n", ll_ifma, ifp->if_xname); 276 if_freemulti(ll_ifma); 277 ifma_restart = true; 278 } 279 } 280 } 281 282 void 283 inm_release_deferred(struct in_multi *inm) 284 { 285 struct in_multi_head tmp; 286 287 IN_MULTI_LIST_LOCK_ASSERT(); 288 MPASS(inm->inm_refcount > 0); 289 if (--inm->inm_refcount == 0) { 290 SLIST_INIT(&tmp); 291 inm_disconnect(inm); 292 inm->inm_ifma->ifma_protospec = NULL; 293 SLIST_INSERT_HEAD(&tmp, inm, inm_nrele); 294 inm_release_list_deferred(&tmp); 295 } 296 } 297 298 static void 299 inm_release_task(void *arg __unused) 300 { 301 struct in_multi_head inm_free_tmp; 302 struct in_multi *inm, *tinm; 303 304 SLIST_INIT(&inm_free_tmp); 305 mtx_lock(&in_multi_free_mtx); 306 SLIST_CONCAT(&inm_free_tmp, &inm_free_list, in_multi, inm_nrele); 307 mtx_unlock(&in_multi_free_mtx); 308 IN_MULTI_LOCK(); 309 SLIST_FOREACH_SAFE(inm, &inm_free_tmp, inm_nrele, tinm) { 310 SLIST_REMOVE_HEAD(&inm_free_tmp, inm_nrele); 311 MPASS(inm); 312 inm_release(inm); 313 } 314 IN_MULTI_UNLOCK(); 315 } 316 317 /* 318 * Initialize an in_mfilter structure to a known state at t0, t1 319 * with an empty source filter list. 320 */ 321 static __inline void 322 imf_init(struct in_mfilter *imf, const int st0, const int st1) 323 { 324 memset(imf, 0, sizeof(struct in_mfilter)); 325 RB_INIT(&imf->imf_sources); 326 imf->imf_st[0] = st0; 327 imf->imf_st[1] = st1; 328 } 329 330 /* 331 * Function for looking up an in_multi record for an IPv4 multicast address 332 * on a given interface. ifp must be valid. If no record found, return NULL. 333 * The IN_MULTI_LIST_LOCK and IF_ADDR_LOCK on ifp must be held. 334 */ 335 struct in_multi * 336 inm_lookup_locked(struct ifnet *ifp, const struct in_addr ina) 337 { 338 struct ifmultiaddr *ifma; 339 struct in_multi *inm; 340 341 IN_MULTI_LIST_LOCK_ASSERT(); 342 IF_ADDR_LOCK_ASSERT(ifp); 343 344 inm = NULL; 345 CK_STAILQ_FOREACH(ifma, &((ifp)->if_multiaddrs), ifma_link) { 346 if (ifma->ifma_addr->sa_family != AF_INET || 347 ifma->ifma_protospec == NULL) 348 continue; 349 inm = (struct in_multi *)ifma->ifma_protospec; 350 if (inm->inm_addr.s_addr == ina.s_addr) 351 break; 352 inm = NULL; 353 } 354 return (inm); 355 } 356 357 /* 358 * Wrapper for inm_lookup_locked(). 359 * The IF_ADDR_LOCK will be taken on ifp and released on return. 360 */ 361 struct in_multi * 362 inm_lookup(struct ifnet *ifp, const struct in_addr ina) 363 { 364 struct in_multi *inm; 365 366 IN_MULTI_LIST_LOCK_ASSERT(); 367 IF_ADDR_RLOCK(ifp); 368 inm = inm_lookup_locked(ifp, ina); 369 IF_ADDR_RUNLOCK(ifp); 370 371 return (inm); 372 } 373 374 /* 375 * Resize the ip_moptions vector to the next power-of-two minus 1. 376 * May be called with locks held; do not sleep. 377 */ 378 static int 379 imo_grow(struct ip_moptions *imo) 380 { 381 struct in_multi **nmships; 382 struct in_multi **omships; 383 struct in_mfilter *nmfilters; 384 struct in_mfilter *omfilters; 385 size_t idx; 386 size_t newmax; 387 size_t oldmax; 388 389 nmships = NULL; 390 nmfilters = NULL; 391 omships = imo->imo_membership; 392 omfilters = imo->imo_mfilters; 393 oldmax = imo->imo_max_memberships; 394 newmax = ((oldmax + 1) * 2) - 1; 395 396 if (newmax <= IP_MAX_MEMBERSHIPS) { 397 nmships = (struct in_multi **)realloc(omships, 398 sizeof(struct in_multi *) * newmax, M_IPMOPTS, M_NOWAIT); 399 nmfilters = (struct in_mfilter *)realloc(omfilters, 400 sizeof(struct in_mfilter) * newmax, M_INMFILTER, M_NOWAIT); 401 if (nmships != NULL && nmfilters != NULL) { 402 /* Initialize newly allocated source filter heads. */ 403 for (idx = oldmax; idx < newmax; idx++) { 404 imf_init(&nmfilters[idx], MCAST_UNDEFINED, 405 MCAST_EXCLUDE); 406 } 407 imo->imo_max_memberships = newmax; 408 imo->imo_membership = nmships; 409 imo->imo_mfilters = nmfilters; 410 } 411 } 412 413 if (nmships == NULL || nmfilters == NULL) { 414 if (nmships != NULL) 415 free(nmships, M_IPMOPTS); 416 if (nmfilters != NULL) 417 free(nmfilters, M_INMFILTER); 418 return (ETOOMANYREFS); 419 } 420 421 return (0); 422 } 423 424 /* 425 * Find an IPv4 multicast group entry for this ip_moptions instance 426 * which matches the specified group, and optionally an interface. 427 * Return its index into the array, or -1 if not found. 428 */ 429 static size_t 430 imo_match_group(const struct ip_moptions *imo, const struct ifnet *ifp, 431 const struct sockaddr *group) 432 { 433 const struct sockaddr_in *gsin; 434 struct in_multi **pinm; 435 int idx; 436 int nmships; 437 438 gsin = (const struct sockaddr_in *)group; 439 440 /* The imo_membership array may be lazy allocated. */ 441 if (imo->imo_membership == NULL || imo->imo_num_memberships == 0) 442 return (-1); 443 444 nmships = imo->imo_num_memberships; 445 pinm = &imo->imo_membership[0]; 446 for (idx = 0; idx < nmships; idx++, pinm++) { 447 if (*pinm == NULL) 448 continue; 449 if ((ifp == NULL || ((*pinm)->inm_ifp == ifp)) && 450 in_hosteq((*pinm)->inm_addr, gsin->sin_addr)) { 451 break; 452 } 453 } 454 if (idx >= nmships) 455 idx = -1; 456 457 return (idx); 458 } 459 460 /* 461 * Find an IPv4 multicast source entry for this imo which matches 462 * the given group index for this socket, and source address. 463 * 464 * NOTE: This does not check if the entry is in-mode, merely if 465 * it exists, which may not be the desired behaviour. 466 */ 467 static struct in_msource * 468 imo_match_source(const struct ip_moptions *imo, const size_t gidx, 469 const struct sockaddr *src) 470 { 471 struct ip_msource find; 472 struct in_mfilter *imf; 473 struct ip_msource *ims; 474 const sockunion_t *psa; 475 476 KASSERT(src->sa_family == AF_INET, ("%s: !AF_INET", __func__)); 477 KASSERT(gidx != -1 && gidx < imo->imo_num_memberships, 478 ("%s: invalid index %d\n", __func__, (int)gidx)); 479 480 /* The imo_mfilters array may be lazy allocated. */ 481 if (imo->imo_mfilters == NULL) 482 return (NULL); 483 imf = &imo->imo_mfilters[gidx]; 484 485 /* Source trees are keyed in host byte order. */ 486 psa = (const sockunion_t *)src; 487 find.ims_haddr = ntohl(psa->sin.sin_addr.s_addr); 488 ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find); 489 490 return ((struct in_msource *)ims); 491 } 492 493 /* 494 * Perform filtering for multicast datagrams on a socket by group and source. 495 * 496 * Returns 0 if a datagram should be allowed through, or various error codes 497 * if the socket was not a member of the group, or the source was muted, etc. 498 */ 499 int 500 imo_multi_filter(const struct ip_moptions *imo, const struct ifnet *ifp, 501 const struct sockaddr *group, const struct sockaddr *src) 502 { 503 size_t gidx; 504 struct in_msource *ims; 505 int mode; 506 507 KASSERT(ifp != NULL, ("%s: null ifp", __func__)); 508 509 gidx = imo_match_group(imo, ifp, group); 510 if (gidx == -1) 511 return (MCAST_NOTGMEMBER); 512 513 /* 514 * Check if the source was included in an (S,G) join. 515 * Allow reception on exclusive memberships by default, 516 * reject reception on inclusive memberships by default. 517 * Exclude source only if an in-mode exclude filter exists. 518 * Include source only if an in-mode include filter exists. 519 * NOTE: We are comparing group state here at IGMP t1 (now) 520 * with socket-layer t0 (since last downcall). 521 */ 522 mode = imo->imo_mfilters[gidx].imf_st[1]; 523 ims = imo_match_source(imo, gidx, src); 524 525 if ((ims == NULL && mode == MCAST_INCLUDE) || 526 (ims != NULL && ims->imsl_st[0] != mode)) 527 return (MCAST_NOTSMEMBER); 528 529 return (MCAST_PASS); 530 } 531 532 /* 533 * Find and return a reference to an in_multi record for (ifp, group), 534 * and bump its reference count. 535 * If one does not exist, try to allocate it, and update link-layer multicast 536 * filters on ifp to listen for group. 537 * Assumes the IN_MULTI lock is held across the call. 538 * Return 0 if successful, otherwise return an appropriate error code. 539 */ 540 static int 541 in_getmulti(struct ifnet *ifp, const struct in_addr *group, 542 struct in_multi **pinm) 543 { 544 struct sockaddr_in gsin; 545 struct ifmultiaddr *ifma; 546 struct in_ifinfo *ii; 547 struct in_multi *inm; 548 int error; 549 550 IN_MULTI_LOCK_ASSERT(); 551 552 ii = (struct in_ifinfo *)ifp->if_afdata[AF_INET]; 553 IN_MULTI_LIST_LOCK(); 554 inm = inm_lookup(ifp, *group); 555 if (inm != NULL) { 556 /* 557 * If we already joined this group, just bump the 558 * refcount and return it. 559 */ 560 KASSERT(inm->inm_refcount >= 1, 561 ("%s: bad refcount %d", __func__, inm->inm_refcount)); 562 inm_acquire_locked(inm); 563 *pinm = inm; 564 } 565 IN_MULTI_LIST_UNLOCK(); 566 if (inm != NULL) 567 return (0); 568 569 memset(&gsin, 0, sizeof(gsin)); 570 gsin.sin_family = AF_INET; 571 gsin.sin_len = sizeof(struct sockaddr_in); 572 gsin.sin_addr = *group; 573 574 /* 575 * Check if a link-layer group is already associated 576 * with this network-layer group on the given ifnet. 577 */ 578 error = if_addmulti(ifp, (struct sockaddr *)&gsin, &ifma); 579 if (error != 0) 580 return (error); 581 582 /* XXX ifma_protospec must be covered by IF_ADDR_LOCK */ 583 IN_MULTI_LIST_LOCK(); 584 IF_ADDR_WLOCK(ifp); 585 586 /* 587 * If something other than netinet is occupying the link-layer 588 * group, print a meaningful error message and back out of 589 * the allocation. 590 * Otherwise, bump the refcount on the existing network-layer 591 * group association and return it. 592 */ 593 if (ifma->ifma_protospec != NULL) { 594 inm = (struct in_multi *)ifma->ifma_protospec; 595 #ifdef INVARIANTS 596 KASSERT(ifma->ifma_addr != NULL, ("%s: no ifma_addr", 597 __func__)); 598 KASSERT(ifma->ifma_addr->sa_family == AF_INET, 599 ("%s: ifma not AF_INET", __func__)); 600 KASSERT(inm != NULL, ("%s: no ifma_protospec", __func__)); 601 if (inm->inm_ifma != ifma || inm->inm_ifp != ifp || 602 !in_hosteq(inm->inm_addr, *group)) { 603 char addrbuf[INET_ADDRSTRLEN]; 604 605 panic("%s: ifma %p is inconsistent with %p (%s)", 606 __func__, ifma, inm, inet_ntoa_r(*group, addrbuf)); 607 } 608 #endif 609 inm_acquire_locked(inm); 610 *pinm = inm; 611 goto out_locked; 612 } 613 614 IF_ADDR_WLOCK_ASSERT(ifp); 615 616 /* 617 * A new in_multi record is needed; allocate and initialize it. 618 * We DO NOT perform an IGMP join as the in_ layer may need to 619 * push an initial source list down to IGMP to support SSM. 620 * 621 * The initial source filter state is INCLUDE, {} as per the RFC. 622 */ 623 inm = malloc(sizeof(*inm), M_IPMADDR, M_NOWAIT | M_ZERO); 624 if (inm == NULL) { 625 IF_ADDR_WUNLOCK(ifp); 626 IN_MULTI_LIST_UNLOCK(); 627 if_delmulti_ifma(ifma); 628 return (ENOMEM); 629 } 630 inm->inm_addr = *group; 631 inm->inm_ifp = ifp; 632 inm->inm_igi = ii->ii_igmp; 633 inm->inm_ifma = ifma; 634 inm->inm_refcount = 1; 635 inm->inm_state = IGMP_NOT_MEMBER; 636 mbufq_init(&inm->inm_scq, IGMP_MAX_STATE_CHANGES); 637 inm->inm_st[0].iss_fmode = MCAST_UNDEFINED; 638 inm->inm_st[1].iss_fmode = MCAST_UNDEFINED; 639 RB_INIT(&inm->inm_srcs); 640 641 ifma->ifma_protospec = inm; 642 643 *pinm = inm; 644 out_locked: 645 IF_ADDR_WUNLOCK(ifp); 646 IN_MULTI_LIST_UNLOCK(); 647 return (0); 648 } 649 650 /* 651 * Drop a reference to an in_multi record. 652 * 653 * If the refcount drops to 0, free the in_multi record and 654 * delete the underlying link-layer membership. 655 */ 656 static void 657 inm_release(struct in_multi *inm) 658 { 659 struct ifmultiaddr *ifma; 660 struct ifnet *ifp; 661 662 CTR2(KTR_IGMPV3, "%s: refcount is %d", __func__, inm->inm_refcount); 663 MPASS(inm->inm_refcount == 0); 664 CTR2(KTR_IGMPV3, "%s: freeing inm %p", __func__, inm); 665 666 ifma = inm->inm_ifma; 667 ifp = inm->inm_ifp; 668 669 /* XXX this access is not covered by IF_ADDR_LOCK */ 670 CTR2(KTR_IGMPV3, "%s: purging ifma %p", __func__, ifma); 671 if (ifp != NULL) { 672 CURVNET_SET(ifp->if_vnet); 673 inm_purge(inm); 674 free(inm, M_IPMADDR); 675 if_delmulti_ifma_flags(ifma, 1); 676 CURVNET_RESTORE(); 677 if_rele(ifp); 678 } else { 679 inm_purge(inm); 680 free(inm, M_IPMADDR); 681 if_delmulti_ifma_flags(ifma, 1); 682 } 683 } 684 685 /* 686 * Clear recorded source entries for a group. 687 * Used by the IGMP code. Caller must hold the IN_MULTI lock. 688 * FIXME: Should reap. 689 */ 690 void 691 inm_clear_recorded(struct in_multi *inm) 692 { 693 struct ip_msource *ims; 694 695 IN_MULTI_LIST_LOCK_ASSERT(); 696 697 RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) { 698 if (ims->ims_stp) { 699 ims->ims_stp = 0; 700 --inm->inm_st[1].iss_rec; 701 } 702 } 703 KASSERT(inm->inm_st[1].iss_rec == 0, 704 ("%s: iss_rec %d not 0", __func__, inm->inm_st[1].iss_rec)); 705 } 706 707 /* 708 * Record a source as pending for a Source-Group IGMPv3 query. 709 * This lives here as it modifies the shared tree. 710 * 711 * inm is the group descriptor. 712 * naddr is the address of the source to record in network-byte order. 713 * 714 * If the net.inet.igmp.sgalloc sysctl is non-zero, we will 715 * lazy-allocate a source node in response to an SG query. 716 * Otherwise, no allocation is performed. This saves some memory 717 * with the trade-off that the source will not be reported to the 718 * router if joined in the window between the query response and 719 * the group actually being joined on the local host. 720 * 721 * VIMAGE: XXX: Currently the igmp_sgalloc feature has been removed. 722 * This turns off the allocation of a recorded source entry if 723 * the group has not been joined. 724 * 725 * Return 0 if the source didn't exist or was already marked as recorded. 726 * Return 1 if the source was marked as recorded by this function. 727 * Return <0 if any error occurred (negated errno code). 728 */ 729 int 730 inm_record_source(struct in_multi *inm, const in_addr_t naddr) 731 { 732 struct ip_msource find; 733 struct ip_msource *ims, *nims; 734 735 IN_MULTI_LIST_LOCK_ASSERT(); 736 737 find.ims_haddr = ntohl(naddr); 738 ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find); 739 if (ims && ims->ims_stp) 740 return (0); 741 if (ims == NULL) { 742 if (inm->inm_nsrc == in_mcast_maxgrpsrc) 743 return (-ENOSPC); 744 nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE, 745 M_NOWAIT | M_ZERO); 746 if (nims == NULL) 747 return (-ENOMEM); 748 nims->ims_haddr = find.ims_haddr; 749 RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims); 750 ++inm->inm_nsrc; 751 ims = nims; 752 } 753 754 /* 755 * Mark the source as recorded and update the recorded 756 * source count. 757 */ 758 ++ims->ims_stp; 759 ++inm->inm_st[1].iss_rec; 760 761 return (1); 762 } 763 764 /* 765 * Return a pointer to an in_msource owned by an in_mfilter, 766 * given its source address. 767 * Lazy-allocate if needed. If this is a new entry its filter state is 768 * undefined at t0. 769 * 770 * imf is the filter set being modified. 771 * haddr is the source address in *host* byte-order. 772 * 773 * SMPng: May be called with locks held; malloc must not block. 774 */ 775 static int 776 imf_get_source(struct in_mfilter *imf, const struct sockaddr_in *psin, 777 struct in_msource **plims) 778 { 779 struct ip_msource find; 780 struct ip_msource *ims, *nims; 781 struct in_msource *lims; 782 int error; 783 784 error = 0; 785 ims = NULL; 786 lims = NULL; 787 788 /* key is host byte order */ 789 find.ims_haddr = ntohl(psin->sin_addr.s_addr); 790 ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find); 791 lims = (struct in_msource *)ims; 792 if (lims == NULL) { 793 if (imf->imf_nsrc == in_mcast_maxsocksrc) 794 return (ENOSPC); 795 nims = malloc(sizeof(struct in_msource), M_INMFILTER, 796 M_NOWAIT | M_ZERO); 797 if (nims == NULL) 798 return (ENOMEM); 799 lims = (struct in_msource *)nims; 800 lims->ims_haddr = find.ims_haddr; 801 lims->imsl_st[0] = MCAST_UNDEFINED; 802 RB_INSERT(ip_msource_tree, &imf->imf_sources, nims); 803 ++imf->imf_nsrc; 804 } 805 806 *plims = lims; 807 808 return (error); 809 } 810 811 /* 812 * Graft a source entry into an existing socket-layer filter set, 813 * maintaining any required invariants and checking allocations. 814 * 815 * The source is marked as being in the new filter mode at t1. 816 * 817 * Return the pointer to the new node, otherwise return NULL. 818 */ 819 static struct in_msource * 820 imf_graft(struct in_mfilter *imf, const uint8_t st1, 821 const struct sockaddr_in *psin) 822 { 823 struct ip_msource *nims; 824 struct in_msource *lims; 825 826 nims = malloc(sizeof(struct in_msource), M_INMFILTER, 827 M_NOWAIT | M_ZERO); 828 if (nims == NULL) 829 return (NULL); 830 lims = (struct in_msource *)nims; 831 lims->ims_haddr = ntohl(psin->sin_addr.s_addr); 832 lims->imsl_st[0] = MCAST_UNDEFINED; 833 lims->imsl_st[1] = st1; 834 RB_INSERT(ip_msource_tree, &imf->imf_sources, nims); 835 ++imf->imf_nsrc; 836 837 return (lims); 838 } 839 840 /* 841 * Prune a source entry from an existing socket-layer filter set, 842 * maintaining any required invariants and checking allocations. 843 * 844 * The source is marked as being left at t1, it is not freed. 845 * 846 * Return 0 if no error occurred, otherwise return an errno value. 847 */ 848 static int 849 imf_prune(struct in_mfilter *imf, const struct sockaddr_in *psin) 850 { 851 struct ip_msource find; 852 struct ip_msource *ims; 853 struct in_msource *lims; 854 855 /* key is host byte order */ 856 find.ims_haddr = ntohl(psin->sin_addr.s_addr); 857 ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find); 858 if (ims == NULL) 859 return (ENOENT); 860 lims = (struct in_msource *)ims; 861 lims->imsl_st[1] = MCAST_UNDEFINED; 862 return (0); 863 } 864 865 /* 866 * Revert socket-layer filter set deltas at t1 to t0 state. 867 */ 868 static void 869 imf_rollback(struct in_mfilter *imf) 870 { 871 struct ip_msource *ims, *tims; 872 struct in_msource *lims; 873 874 RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) { 875 lims = (struct in_msource *)ims; 876 if (lims->imsl_st[0] == lims->imsl_st[1]) { 877 /* no change at t1 */ 878 continue; 879 } else if (lims->imsl_st[0] != MCAST_UNDEFINED) { 880 /* revert change to existing source at t1 */ 881 lims->imsl_st[1] = lims->imsl_st[0]; 882 } else { 883 /* revert source added t1 */ 884 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 885 RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims); 886 free(ims, M_INMFILTER); 887 imf->imf_nsrc--; 888 } 889 } 890 imf->imf_st[1] = imf->imf_st[0]; 891 } 892 893 /* 894 * Mark socket-layer filter set as INCLUDE {} at t1. 895 */ 896 static void 897 imf_leave(struct in_mfilter *imf) 898 { 899 struct ip_msource *ims; 900 struct in_msource *lims; 901 902 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 903 lims = (struct in_msource *)ims; 904 lims->imsl_st[1] = MCAST_UNDEFINED; 905 } 906 imf->imf_st[1] = MCAST_INCLUDE; 907 } 908 909 /* 910 * Mark socket-layer filter set deltas as committed. 911 */ 912 static void 913 imf_commit(struct in_mfilter *imf) 914 { 915 struct ip_msource *ims; 916 struct in_msource *lims; 917 918 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 919 lims = (struct in_msource *)ims; 920 lims->imsl_st[0] = lims->imsl_st[1]; 921 } 922 imf->imf_st[0] = imf->imf_st[1]; 923 } 924 925 /* 926 * Reap unreferenced sources from socket-layer filter set. 927 */ 928 static void 929 imf_reap(struct in_mfilter *imf) 930 { 931 struct ip_msource *ims, *tims; 932 struct in_msource *lims; 933 934 RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) { 935 lims = (struct in_msource *)ims; 936 if ((lims->imsl_st[0] == MCAST_UNDEFINED) && 937 (lims->imsl_st[1] == MCAST_UNDEFINED)) { 938 CTR2(KTR_IGMPV3, "%s: free lims %p", __func__, ims); 939 RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims); 940 free(ims, M_INMFILTER); 941 imf->imf_nsrc--; 942 } 943 } 944 } 945 946 /* 947 * Purge socket-layer filter set. 948 */ 949 static void 950 imf_purge(struct in_mfilter *imf) 951 { 952 struct ip_msource *ims, *tims; 953 954 RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) { 955 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 956 RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims); 957 free(ims, M_INMFILTER); 958 imf->imf_nsrc--; 959 } 960 imf->imf_st[0] = imf->imf_st[1] = MCAST_UNDEFINED; 961 KASSERT(RB_EMPTY(&imf->imf_sources), 962 ("%s: imf_sources not empty", __func__)); 963 } 964 965 /* 966 * Look up a source filter entry for a multicast group. 967 * 968 * inm is the group descriptor to work with. 969 * haddr is the host-byte-order IPv4 address to look up. 970 * noalloc may be non-zero to suppress allocation of sources. 971 * *pims will be set to the address of the retrieved or allocated source. 972 * 973 * SMPng: NOTE: may be called with locks held. 974 * Return 0 if successful, otherwise return a non-zero error code. 975 */ 976 static int 977 inm_get_source(struct in_multi *inm, const in_addr_t haddr, 978 const int noalloc, struct ip_msource **pims) 979 { 980 struct ip_msource find; 981 struct ip_msource *ims, *nims; 982 983 find.ims_haddr = haddr; 984 ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find); 985 if (ims == NULL && !noalloc) { 986 if (inm->inm_nsrc == in_mcast_maxgrpsrc) 987 return (ENOSPC); 988 nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE, 989 M_NOWAIT | M_ZERO); 990 if (nims == NULL) 991 return (ENOMEM); 992 nims->ims_haddr = haddr; 993 RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims); 994 ++inm->inm_nsrc; 995 ims = nims; 996 #ifdef KTR 997 CTR3(KTR_IGMPV3, "%s: allocated 0x%08x as %p", __func__, 998 haddr, ims); 999 #endif 1000 } 1001 1002 *pims = ims; 1003 return (0); 1004 } 1005 1006 /* 1007 * Merge socket-layer source into IGMP-layer source. 1008 * If rollback is non-zero, perform the inverse of the merge. 1009 */ 1010 static void 1011 ims_merge(struct ip_msource *ims, const struct in_msource *lims, 1012 const int rollback) 1013 { 1014 int n = rollback ? -1 : 1; 1015 1016 if (lims->imsl_st[0] == MCAST_EXCLUDE) { 1017 CTR3(KTR_IGMPV3, "%s: t1 ex -= %d on 0x%08x", 1018 __func__, n, ims->ims_haddr); 1019 ims->ims_st[1].ex -= n; 1020 } else if (lims->imsl_st[0] == MCAST_INCLUDE) { 1021 CTR3(KTR_IGMPV3, "%s: t1 in -= %d on 0x%08x", 1022 __func__, n, ims->ims_haddr); 1023 ims->ims_st[1].in -= n; 1024 } 1025 1026 if (lims->imsl_st[1] == MCAST_EXCLUDE) { 1027 CTR3(KTR_IGMPV3, "%s: t1 ex += %d on 0x%08x", 1028 __func__, n, ims->ims_haddr); 1029 ims->ims_st[1].ex += n; 1030 } else if (lims->imsl_st[1] == MCAST_INCLUDE) { 1031 CTR3(KTR_IGMPV3, "%s: t1 in += %d on 0x%08x", 1032 __func__, n, ims->ims_haddr); 1033 ims->ims_st[1].in += n; 1034 } 1035 } 1036 1037 /* 1038 * Atomically update the global in_multi state, when a membership's 1039 * filter list is being updated in any way. 1040 * 1041 * imf is the per-inpcb-membership group filter pointer. 1042 * A fake imf may be passed for in-kernel consumers. 1043 * 1044 * XXX This is a candidate for a set-symmetric-difference style loop 1045 * which would eliminate the repeated lookup from root of ims nodes, 1046 * as they share the same key space. 1047 * 1048 * If any error occurred this function will back out of refcounts 1049 * and return a non-zero value. 1050 */ 1051 static int 1052 inm_merge(struct in_multi *inm, /*const*/ struct in_mfilter *imf) 1053 { 1054 struct ip_msource *ims, *nims; 1055 struct in_msource *lims; 1056 int schanged, error; 1057 int nsrc0, nsrc1; 1058 1059 schanged = 0; 1060 error = 0; 1061 nsrc1 = nsrc0 = 0; 1062 IN_MULTI_LIST_LOCK_ASSERT(); 1063 1064 /* 1065 * Update the source filters first, as this may fail. 1066 * Maintain count of in-mode filters at t0, t1. These are 1067 * used to work out if we transition into ASM mode or not. 1068 * Maintain a count of source filters whose state was 1069 * actually modified by this operation. 1070 */ 1071 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 1072 lims = (struct in_msource *)ims; 1073 if (lims->imsl_st[0] == imf->imf_st[0]) nsrc0++; 1074 if (lims->imsl_st[1] == imf->imf_st[1]) nsrc1++; 1075 if (lims->imsl_st[0] == lims->imsl_st[1]) continue; 1076 error = inm_get_source(inm, lims->ims_haddr, 0, &nims); 1077 ++schanged; 1078 if (error) 1079 break; 1080 ims_merge(nims, lims, 0); 1081 } 1082 if (error) { 1083 struct ip_msource *bims; 1084 1085 RB_FOREACH_REVERSE_FROM(ims, ip_msource_tree, nims) { 1086 lims = (struct in_msource *)ims; 1087 if (lims->imsl_st[0] == lims->imsl_st[1]) 1088 continue; 1089 (void)inm_get_source(inm, lims->ims_haddr, 1, &bims); 1090 if (bims == NULL) 1091 continue; 1092 ims_merge(bims, lims, 1); 1093 } 1094 goto out_reap; 1095 } 1096 1097 CTR3(KTR_IGMPV3, "%s: imf filters in-mode: %d at t0, %d at t1", 1098 __func__, nsrc0, nsrc1); 1099 1100 /* Handle transition between INCLUDE {n} and INCLUDE {} on socket. */ 1101 if (imf->imf_st[0] == imf->imf_st[1] && 1102 imf->imf_st[1] == MCAST_INCLUDE) { 1103 if (nsrc1 == 0) { 1104 CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__); 1105 --inm->inm_st[1].iss_in; 1106 } 1107 } 1108 1109 /* Handle filter mode transition on socket. */ 1110 if (imf->imf_st[0] != imf->imf_st[1]) { 1111 CTR3(KTR_IGMPV3, "%s: imf transition %d to %d", 1112 __func__, imf->imf_st[0], imf->imf_st[1]); 1113 1114 if (imf->imf_st[0] == MCAST_EXCLUDE) { 1115 CTR1(KTR_IGMPV3, "%s: --ex on inm at t1", __func__); 1116 --inm->inm_st[1].iss_ex; 1117 } else if (imf->imf_st[0] == MCAST_INCLUDE) { 1118 CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__); 1119 --inm->inm_st[1].iss_in; 1120 } 1121 1122 if (imf->imf_st[1] == MCAST_EXCLUDE) { 1123 CTR1(KTR_IGMPV3, "%s: ex++ on inm at t1", __func__); 1124 inm->inm_st[1].iss_ex++; 1125 } else if (imf->imf_st[1] == MCAST_INCLUDE && nsrc1 > 0) { 1126 CTR1(KTR_IGMPV3, "%s: in++ on inm at t1", __func__); 1127 inm->inm_st[1].iss_in++; 1128 } 1129 } 1130 1131 /* 1132 * Track inm filter state in terms of listener counts. 1133 * If there are any exclusive listeners, stack-wide 1134 * membership is exclusive. 1135 * Otherwise, if only inclusive listeners, stack-wide is inclusive. 1136 * If no listeners remain, state is undefined at t1, 1137 * and the IGMP lifecycle for this group should finish. 1138 */ 1139 if (inm->inm_st[1].iss_ex > 0) { 1140 CTR1(KTR_IGMPV3, "%s: transition to EX", __func__); 1141 inm->inm_st[1].iss_fmode = MCAST_EXCLUDE; 1142 } else if (inm->inm_st[1].iss_in > 0) { 1143 CTR1(KTR_IGMPV3, "%s: transition to IN", __func__); 1144 inm->inm_st[1].iss_fmode = MCAST_INCLUDE; 1145 } else { 1146 CTR1(KTR_IGMPV3, "%s: transition to UNDEF", __func__); 1147 inm->inm_st[1].iss_fmode = MCAST_UNDEFINED; 1148 } 1149 1150 /* Decrement ASM listener count on transition out of ASM mode. */ 1151 if (imf->imf_st[0] == MCAST_EXCLUDE && nsrc0 == 0) { 1152 if ((imf->imf_st[1] != MCAST_EXCLUDE) || 1153 (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 > 0)) { 1154 CTR1(KTR_IGMPV3, "%s: --asm on inm at t1", __func__); 1155 --inm->inm_st[1].iss_asm; 1156 } 1157 } 1158 1159 /* Increment ASM listener count on transition to ASM mode. */ 1160 if (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 == 0) { 1161 CTR1(KTR_IGMPV3, "%s: asm++ on inm at t1", __func__); 1162 inm->inm_st[1].iss_asm++; 1163 } 1164 1165 CTR3(KTR_IGMPV3, "%s: merged imf %p to inm %p", __func__, imf, inm); 1166 inm_print(inm); 1167 1168 out_reap: 1169 if (schanged > 0) { 1170 CTR1(KTR_IGMPV3, "%s: sources changed; reaping", __func__); 1171 inm_reap(inm); 1172 } 1173 return (error); 1174 } 1175 1176 /* 1177 * Mark an in_multi's filter set deltas as committed. 1178 * Called by IGMP after a state change has been enqueued. 1179 */ 1180 void 1181 inm_commit(struct in_multi *inm) 1182 { 1183 struct ip_msource *ims; 1184 1185 CTR2(KTR_IGMPV3, "%s: commit inm %p", __func__, inm); 1186 CTR1(KTR_IGMPV3, "%s: pre commit:", __func__); 1187 inm_print(inm); 1188 1189 RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) { 1190 ims->ims_st[0] = ims->ims_st[1]; 1191 } 1192 inm->inm_st[0] = inm->inm_st[1]; 1193 } 1194 1195 /* 1196 * Reap unreferenced nodes from an in_multi's filter set. 1197 */ 1198 static void 1199 inm_reap(struct in_multi *inm) 1200 { 1201 struct ip_msource *ims, *tims; 1202 1203 RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) { 1204 if (ims->ims_st[0].ex > 0 || ims->ims_st[0].in > 0 || 1205 ims->ims_st[1].ex > 0 || ims->ims_st[1].in > 0 || 1206 ims->ims_stp != 0) 1207 continue; 1208 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 1209 RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims); 1210 free(ims, M_IPMSOURCE); 1211 inm->inm_nsrc--; 1212 } 1213 } 1214 1215 /* 1216 * Purge all source nodes from an in_multi's filter set. 1217 */ 1218 static void 1219 inm_purge(struct in_multi *inm) 1220 { 1221 struct ip_msource *ims, *tims; 1222 1223 RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) { 1224 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 1225 RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims); 1226 free(ims, M_IPMSOURCE); 1227 inm->inm_nsrc--; 1228 } 1229 } 1230 1231 /* 1232 * Join a multicast group; unlocked entry point. 1233 * 1234 * SMPng: XXX: in_joingroup() is called from in_control() when Giant 1235 * is not held. Fortunately, ifp is unlikely to have been detached 1236 * at this point, so we assume it's OK to recurse. 1237 */ 1238 int 1239 in_joingroup(struct ifnet *ifp, const struct in_addr *gina, 1240 /*const*/ struct in_mfilter *imf, struct in_multi **pinm) 1241 { 1242 int error; 1243 1244 IN_MULTI_LOCK(); 1245 error = in_joingroup_locked(ifp, gina, imf, pinm); 1246 IN_MULTI_UNLOCK(); 1247 1248 return (error); 1249 } 1250 1251 /* 1252 * Join a multicast group; real entry point. 1253 * 1254 * Only preserves atomicity at inm level. 1255 * NOTE: imf argument cannot be const due to sys/tree.h limitations. 1256 * 1257 * If the IGMP downcall fails, the group is not joined, and an error 1258 * code is returned. 1259 */ 1260 int 1261 in_joingroup_locked(struct ifnet *ifp, const struct in_addr *gina, 1262 /*const*/ struct in_mfilter *imf, struct in_multi **pinm) 1263 { 1264 struct in_mfilter timf; 1265 struct in_multi *inm; 1266 int error; 1267 1268 IN_MULTI_LOCK_ASSERT(); 1269 IN_MULTI_LIST_UNLOCK_ASSERT(); 1270 1271 CTR4(KTR_IGMPV3, "%s: join 0x%08x on %p(%s))", __func__, 1272 ntohl(gina->s_addr), ifp, ifp->if_xname); 1273 1274 error = 0; 1275 inm = NULL; 1276 1277 /* 1278 * If no imf was specified (i.e. kernel consumer), 1279 * fake one up and assume it is an ASM join. 1280 */ 1281 if (imf == NULL) { 1282 imf_init(&timf, MCAST_UNDEFINED, MCAST_EXCLUDE); 1283 imf = &timf; 1284 } 1285 1286 error = in_getmulti(ifp, gina, &inm); 1287 if (error) { 1288 CTR1(KTR_IGMPV3, "%s: in_getmulti() failure", __func__); 1289 return (error); 1290 } 1291 IN_MULTI_LIST_LOCK(); 1292 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 1293 error = inm_merge(inm, imf); 1294 if (error) { 1295 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__); 1296 goto out_inm_release; 1297 } 1298 1299 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 1300 error = igmp_change_state(inm); 1301 if (error) { 1302 CTR1(KTR_IGMPV3, "%s: failed to update source", __func__); 1303 goto out_inm_release; 1304 } 1305 1306 out_inm_release: 1307 if (error) { 1308 1309 CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm); 1310 inm_release_deferred(inm); 1311 } else { 1312 *pinm = inm; 1313 } 1314 IN_MULTI_LIST_UNLOCK(); 1315 1316 return (error); 1317 } 1318 1319 /* 1320 * Leave a multicast group; unlocked entry point. 1321 */ 1322 int 1323 in_leavegroup(struct in_multi *inm, /*const*/ struct in_mfilter *imf) 1324 { 1325 int error; 1326 1327 IN_MULTI_LOCK(); 1328 error = in_leavegroup_locked(inm, imf); 1329 IN_MULTI_UNLOCK(); 1330 1331 return (error); 1332 } 1333 1334 /* 1335 * Leave a multicast group; real entry point. 1336 * All source filters will be expunged. 1337 * 1338 * Only preserves atomicity at inm level. 1339 * 1340 * Holding the write lock for the INP which contains imf 1341 * is highly advisable. We can't assert for it as imf does not 1342 * contain a back-pointer to the owning inp. 1343 * 1344 * Note: This is not the same as inm_release(*) as this function also 1345 * makes a state change downcall into IGMP. 1346 */ 1347 int 1348 in_leavegroup_locked(struct in_multi *inm, /*const*/ struct in_mfilter *imf) 1349 { 1350 struct in_mfilter timf; 1351 int error; 1352 1353 error = 0; 1354 1355 IN_MULTI_LOCK_ASSERT(); 1356 IN_MULTI_LIST_UNLOCK_ASSERT(); 1357 1358 CTR5(KTR_IGMPV3, "%s: leave inm %p, 0x%08x/%s, imf %p", __func__, 1359 inm, ntohl(inm->inm_addr.s_addr), 1360 (inm_is_ifp_detached(inm) ? "null" : inm->inm_ifp->if_xname), 1361 imf); 1362 1363 /* 1364 * If no imf was specified (i.e. kernel consumer), 1365 * fake one up and assume it is an ASM join. 1366 */ 1367 if (imf == NULL) { 1368 imf_init(&timf, MCAST_EXCLUDE, MCAST_UNDEFINED); 1369 imf = &timf; 1370 } 1371 1372 /* 1373 * Begin state merge transaction at IGMP layer. 1374 * 1375 * As this particular invocation should not cause any memory 1376 * to be allocated, and there is no opportunity to roll back 1377 * the transaction, it MUST NOT fail. 1378 */ 1379 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 1380 IN_MULTI_LIST_LOCK(); 1381 error = inm_merge(inm, imf); 1382 KASSERT(error == 0, ("%s: failed to merge inm state", __func__)); 1383 1384 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 1385 CURVNET_SET(inm->inm_ifp->if_vnet); 1386 error = igmp_change_state(inm); 1387 IF_ADDR_WLOCK(inm->inm_ifp); 1388 inm_release_deferred(inm); 1389 IF_ADDR_WUNLOCK(inm->inm_ifp); 1390 IN_MULTI_LIST_UNLOCK(); 1391 CURVNET_RESTORE(); 1392 if (error) 1393 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__); 1394 1395 CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm); 1396 1397 return (error); 1398 } 1399 1400 /*#ifndef BURN_BRIDGES*/ 1401 /* 1402 * Join an IPv4 multicast group in (*,G) exclusive mode. 1403 * The group must be a 224.0.0.0/24 link-scope group. 1404 * This KPI is for legacy kernel consumers only. 1405 */ 1406 struct in_multi * 1407 in_addmulti(struct in_addr *ap, struct ifnet *ifp) 1408 { 1409 struct in_multi *pinm; 1410 int error; 1411 #ifdef INVARIANTS 1412 char addrbuf[INET_ADDRSTRLEN]; 1413 #endif 1414 1415 KASSERT(IN_LOCAL_GROUP(ntohl(ap->s_addr)), 1416 ("%s: %s not in 224.0.0.0/24", __func__, 1417 inet_ntoa_r(*ap, addrbuf))); 1418 1419 error = in_joingroup(ifp, ap, NULL, &pinm); 1420 if (error != 0) 1421 pinm = NULL; 1422 1423 return (pinm); 1424 } 1425 1426 /* 1427 * Block or unblock an ASM multicast source on an inpcb. 1428 * This implements the delta-based API described in RFC 3678. 1429 * 1430 * The delta-based API applies only to exclusive-mode memberships. 1431 * An IGMP downcall will be performed. 1432 * 1433 * SMPng: NOTE: Must take Giant as a join may create a new ifma. 1434 * 1435 * Return 0 if successful, otherwise return an appropriate error code. 1436 */ 1437 static int 1438 inp_block_unblock_source(struct inpcb *inp, struct sockopt *sopt) 1439 { 1440 struct group_source_req gsr; 1441 sockunion_t *gsa, *ssa; 1442 struct ifnet *ifp; 1443 struct in_mfilter *imf; 1444 struct ip_moptions *imo; 1445 struct in_msource *ims; 1446 struct in_multi *inm; 1447 size_t idx; 1448 uint16_t fmode; 1449 int error, doblock; 1450 1451 ifp = NULL; 1452 error = 0; 1453 doblock = 0; 1454 1455 memset(&gsr, 0, sizeof(struct group_source_req)); 1456 gsa = (sockunion_t *)&gsr.gsr_group; 1457 ssa = (sockunion_t *)&gsr.gsr_source; 1458 1459 switch (sopt->sopt_name) { 1460 case IP_BLOCK_SOURCE: 1461 case IP_UNBLOCK_SOURCE: { 1462 struct ip_mreq_source mreqs; 1463 1464 error = sooptcopyin(sopt, &mreqs, 1465 sizeof(struct ip_mreq_source), 1466 sizeof(struct ip_mreq_source)); 1467 if (error) 1468 return (error); 1469 1470 gsa->sin.sin_family = AF_INET; 1471 gsa->sin.sin_len = sizeof(struct sockaddr_in); 1472 gsa->sin.sin_addr = mreqs.imr_multiaddr; 1473 1474 ssa->sin.sin_family = AF_INET; 1475 ssa->sin.sin_len = sizeof(struct sockaddr_in); 1476 ssa->sin.sin_addr = mreqs.imr_sourceaddr; 1477 1478 if (!in_nullhost(mreqs.imr_interface)) 1479 INADDR_TO_IFP(mreqs.imr_interface, ifp); 1480 1481 if (sopt->sopt_name == IP_BLOCK_SOURCE) 1482 doblock = 1; 1483 1484 CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p", 1485 __func__, ntohl(mreqs.imr_interface.s_addr), ifp); 1486 break; 1487 } 1488 1489 case MCAST_BLOCK_SOURCE: 1490 case MCAST_UNBLOCK_SOURCE: 1491 error = sooptcopyin(sopt, &gsr, 1492 sizeof(struct group_source_req), 1493 sizeof(struct group_source_req)); 1494 if (error) 1495 return (error); 1496 1497 if (gsa->sin.sin_family != AF_INET || 1498 gsa->sin.sin_len != sizeof(struct sockaddr_in)) 1499 return (EINVAL); 1500 1501 if (ssa->sin.sin_family != AF_INET || 1502 ssa->sin.sin_len != sizeof(struct sockaddr_in)) 1503 return (EINVAL); 1504 1505 if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface) 1506 return (EADDRNOTAVAIL); 1507 1508 ifp = ifnet_byindex(gsr.gsr_interface); 1509 1510 if (sopt->sopt_name == MCAST_BLOCK_SOURCE) 1511 doblock = 1; 1512 break; 1513 1514 default: 1515 CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d", 1516 __func__, sopt->sopt_name); 1517 return (EOPNOTSUPP); 1518 break; 1519 } 1520 1521 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 1522 return (EINVAL); 1523 1524 /* 1525 * Check if we are actually a member of this group. 1526 */ 1527 imo = inp_findmoptions(inp); 1528 idx = imo_match_group(imo, ifp, &gsa->sa); 1529 if (idx == -1 || imo->imo_mfilters == NULL) { 1530 error = EADDRNOTAVAIL; 1531 goto out_inp_locked; 1532 } 1533 1534 KASSERT(imo->imo_mfilters != NULL, 1535 ("%s: imo_mfilters not allocated", __func__)); 1536 imf = &imo->imo_mfilters[idx]; 1537 inm = imo->imo_membership[idx]; 1538 1539 /* 1540 * Attempting to use the delta-based API on an 1541 * non exclusive-mode membership is an error. 1542 */ 1543 fmode = imf->imf_st[0]; 1544 if (fmode != MCAST_EXCLUDE) { 1545 error = EINVAL; 1546 goto out_inp_locked; 1547 } 1548 1549 /* 1550 * Deal with error cases up-front: 1551 * Asked to block, but already blocked; or 1552 * Asked to unblock, but nothing to unblock. 1553 * If adding a new block entry, allocate it. 1554 */ 1555 ims = imo_match_source(imo, idx, &ssa->sa); 1556 if ((ims != NULL && doblock) || (ims == NULL && !doblock)) { 1557 CTR3(KTR_IGMPV3, "%s: source 0x%08x %spresent", __func__, 1558 ntohl(ssa->sin.sin_addr.s_addr), doblock ? "" : "not "); 1559 error = EADDRNOTAVAIL; 1560 goto out_inp_locked; 1561 } 1562 1563 INP_WLOCK_ASSERT(inp); 1564 1565 /* 1566 * Begin state merge transaction at socket layer. 1567 */ 1568 if (doblock) { 1569 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block"); 1570 ims = imf_graft(imf, fmode, &ssa->sin); 1571 if (ims == NULL) 1572 error = ENOMEM; 1573 } else { 1574 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow"); 1575 error = imf_prune(imf, &ssa->sin); 1576 } 1577 1578 if (error) { 1579 CTR1(KTR_IGMPV3, "%s: merge imf state failed", __func__); 1580 goto out_imf_rollback; 1581 } 1582 1583 /* 1584 * Begin state merge transaction at IGMP layer. 1585 */ 1586 IN_MULTI_LOCK(); 1587 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 1588 IN_MULTI_LIST_LOCK(); 1589 error = inm_merge(inm, imf); 1590 if (error) { 1591 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__); 1592 IN_MULTI_LIST_UNLOCK(); 1593 goto out_in_multi_locked; 1594 } 1595 1596 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 1597 error = igmp_change_state(inm); 1598 IN_MULTI_LIST_UNLOCK(); 1599 if (error) 1600 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__); 1601 1602 out_in_multi_locked: 1603 1604 IN_MULTI_UNLOCK(); 1605 out_imf_rollback: 1606 if (error) 1607 imf_rollback(imf); 1608 else 1609 imf_commit(imf); 1610 1611 imf_reap(imf); 1612 1613 out_inp_locked: 1614 INP_WUNLOCK(inp); 1615 return (error); 1616 } 1617 1618 /* 1619 * Given an inpcb, return its multicast options structure pointer. Accepts 1620 * an unlocked inpcb pointer, but will return it locked. May sleep. 1621 * 1622 * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held. 1623 * SMPng: NOTE: Returns with the INP write lock held. 1624 */ 1625 static struct ip_moptions * 1626 inp_findmoptions(struct inpcb *inp) 1627 { 1628 struct ip_moptions *imo; 1629 struct in_multi **immp; 1630 struct in_mfilter *imfp; 1631 size_t idx; 1632 1633 INP_WLOCK(inp); 1634 if (inp->inp_moptions != NULL) 1635 return (inp->inp_moptions); 1636 1637 INP_WUNLOCK(inp); 1638 1639 imo = malloc(sizeof(*imo), M_IPMOPTS, M_WAITOK); 1640 immp = malloc(sizeof(*immp) * IP_MIN_MEMBERSHIPS, M_IPMOPTS, 1641 M_WAITOK | M_ZERO); 1642 imfp = malloc(sizeof(struct in_mfilter) * IP_MIN_MEMBERSHIPS, 1643 M_INMFILTER, M_WAITOK); 1644 1645 imo->imo_multicast_ifp = NULL; 1646 imo->imo_multicast_addr.s_addr = INADDR_ANY; 1647 imo->imo_multicast_vif = -1; 1648 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1649 imo->imo_multicast_loop = in_mcast_loop; 1650 imo->imo_num_memberships = 0; 1651 imo->imo_max_memberships = IP_MIN_MEMBERSHIPS; 1652 imo->imo_membership = immp; 1653 1654 /* Initialize per-group source filters. */ 1655 for (idx = 0; idx < IP_MIN_MEMBERSHIPS; idx++) 1656 imf_init(&imfp[idx], MCAST_UNDEFINED, MCAST_EXCLUDE); 1657 imo->imo_mfilters = imfp; 1658 1659 INP_WLOCK(inp); 1660 if (inp->inp_moptions != NULL) { 1661 free(imfp, M_INMFILTER); 1662 free(immp, M_IPMOPTS); 1663 free(imo, M_IPMOPTS); 1664 return (inp->inp_moptions); 1665 } 1666 inp->inp_moptions = imo; 1667 return (imo); 1668 } 1669 1670 static void 1671 inp_gcmoptions(epoch_context_t ctx) 1672 { 1673 struct ip_moptions *imo; 1674 struct in_mfilter *imf; 1675 struct in_multi *inm; 1676 struct ifnet *ifp; 1677 size_t idx, nmships; 1678 1679 imo = __containerof(ctx, struct ip_moptions, imo_epoch_ctx); 1680 1681 nmships = imo->imo_num_memberships; 1682 for (idx = 0; idx < nmships; ++idx) { 1683 imf = imo->imo_mfilters ? &imo->imo_mfilters[idx] : NULL; 1684 if (imf) 1685 imf_leave(imf); 1686 inm = imo->imo_membership[idx]; 1687 ifp = inm->inm_ifp; 1688 if (ifp != NULL) { 1689 CURVNET_SET(ifp->if_vnet); 1690 (void)in_leavegroup(inm, imf); 1691 CURVNET_RESTORE(); 1692 } else { 1693 (void)in_leavegroup(inm, imf); 1694 } 1695 if (imf) 1696 imf_purge(imf); 1697 } 1698 1699 if (imo->imo_mfilters) 1700 free(imo->imo_mfilters, M_INMFILTER); 1701 free(imo->imo_membership, M_IPMOPTS); 1702 free(imo, M_IPMOPTS); 1703 } 1704 1705 /* 1706 * Discard the IP multicast options (and source filters). To minimize 1707 * the amount of work done while holding locks such as the INP's 1708 * pcbinfo lock (which is used in the receive path), the free 1709 * operation is deferred to the epoch callback task. 1710 */ 1711 void 1712 inp_freemoptions(struct ip_moptions *imo) 1713 { 1714 if (imo == NULL) 1715 return; 1716 epoch_call(net_epoch_preempt, &imo->imo_epoch_ctx, inp_gcmoptions); 1717 } 1718 1719 /* 1720 * Atomically get source filters on a socket for an IPv4 multicast group. 1721 * Called with INP lock held; returns with lock released. 1722 */ 1723 static int 1724 inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt) 1725 { 1726 struct __msfilterreq msfr; 1727 sockunion_t *gsa; 1728 struct ifnet *ifp; 1729 struct ip_moptions *imo; 1730 struct in_mfilter *imf; 1731 struct ip_msource *ims; 1732 struct in_msource *lims; 1733 struct sockaddr_in *psin; 1734 struct sockaddr_storage *ptss; 1735 struct sockaddr_storage *tss; 1736 int error; 1737 size_t idx, nsrcs, ncsrcs; 1738 1739 INP_WLOCK_ASSERT(inp); 1740 1741 imo = inp->inp_moptions; 1742 KASSERT(imo != NULL, ("%s: null ip_moptions", __func__)); 1743 1744 INP_WUNLOCK(inp); 1745 1746 error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq), 1747 sizeof(struct __msfilterreq)); 1748 if (error) 1749 return (error); 1750 1751 if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex) 1752 return (EINVAL); 1753 1754 ifp = ifnet_byindex(msfr.msfr_ifindex); 1755 if (ifp == NULL) 1756 return (EINVAL); 1757 1758 INP_WLOCK(inp); 1759 1760 /* 1761 * Lookup group on the socket. 1762 */ 1763 gsa = (sockunion_t *)&msfr.msfr_group; 1764 idx = imo_match_group(imo, ifp, &gsa->sa); 1765 if (idx == -1 || imo->imo_mfilters == NULL) { 1766 INP_WUNLOCK(inp); 1767 return (EADDRNOTAVAIL); 1768 } 1769 imf = &imo->imo_mfilters[idx]; 1770 1771 /* 1772 * Ignore memberships which are in limbo. 1773 */ 1774 if (imf->imf_st[1] == MCAST_UNDEFINED) { 1775 INP_WUNLOCK(inp); 1776 return (EAGAIN); 1777 } 1778 msfr.msfr_fmode = imf->imf_st[1]; 1779 1780 /* 1781 * If the user specified a buffer, copy out the source filter 1782 * entries to userland gracefully. 1783 * We only copy out the number of entries which userland 1784 * has asked for, but we always tell userland how big the 1785 * buffer really needs to be. 1786 */ 1787 if (msfr.msfr_nsrcs > in_mcast_maxsocksrc) 1788 msfr.msfr_nsrcs = in_mcast_maxsocksrc; 1789 tss = NULL; 1790 if (msfr.msfr_srcs != NULL && msfr.msfr_nsrcs > 0) { 1791 tss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs, 1792 M_TEMP, M_NOWAIT | M_ZERO); 1793 if (tss == NULL) { 1794 INP_WUNLOCK(inp); 1795 return (ENOBUFS); 1796 } 1797 } 1798 1799 /* 1800 * Count number of sources in-mode at t0. 1801 * If buffer space exists and remains, copy out source entries. 1802 */ 1803 nsrcs = msfr.msfr_nsrcs; 1804 ncsrcs = 0; 1805 ptss = tss; 1806 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 1807 lims = (struct in_msource *)ims; 1808 if (lims->imsl_st[0] == MCAST_UNDEFINED || 1809 lims->imsl_st[0] != imf->imf_st[0]) 1810 continue; 1811 ++ncsrcs; 1812 if (tss != NULL && nsrcs > 0) { 1813 psin = (struct sockaddr_in *)ptss; 1814 psin->sin_family = AF_INET; 1815 psin->sin_len = sizeof(struct sockaddr_in); 1816 psin->sin_addr.s_addr = htonl(lims->ims_haddr); 1817 psin->sin_port = 0; 1818 ++ptss; 1819 --nsrcs; 1820 } 1821 } 1822 1823 INP_WUNLOCK(inp); 1824 1825 if (tss != NULL) { 1826 error = copyout(tss, msfr.msfr_srcs, 1827 sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs); 1828 free(tss, M_TEMP); 1829 if (error) 1830 return (error); 1831 } 1832 1833 msfr.msfr_nsrcs = ncsrcs; 1834 error = sooptcopyout(sopt, &msfr, sizeof(struct __msfilterreq)); 1835 1836 return (error); 1837 } 1838 1839 /* 1840 * Return the IP multicast options in response to user getsockopt(). 1841 */ 1842 int 1843 inp_getmoptions(struct inpcb *inp, struct sockopt *sopt) 1844 { 1845 struct rm_priotracker in_ifa_tracker; 1846 struct ip_mreqn mreqn; 1847 struct ip_moptions *imo; 1848 struct ifnet *ifp; 1849 struct in_ifaddr *ia; 1850 int error, optval; 1851 u_char coptval; 1852 1853 INP_WLOCK(inp); 1854 imo = inp->inp_moptions; 1855 /* 1856 * If socket is neither of type SOCK_RAW or SOCK_DGRAM, 1857 * or is a divert socket, reject it. 1858 */ 1859 if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT || 1860 (inp->inp_socket->so_proto->pr_type != SOCK_RAW && 1861 inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) { 1862 INP_WUNLOCK(inp); 1863 return (EOPNOTSUPP); 1864 } 1865 1866 error = 0; 1867 switch (sopt->sopt_name) { 1868 case IP_MULTICAST_VIF: 1869 if (imo != NULL) 1870 optval = imo->imo_multicast_vif; 1871 else 1872 optval = -1; 1873 INP_WUNLOCK(inp); 1874 error = sooptcopyout(sopt, &optval, sizeof(int)); 1875 break; 1876 1877 case IP_MULTICAST_IF: 1878 memset(&mreqn, 0, sizeof(struct ip_mreqn)); 1879 if (imo != NULL) { 1880 ifp = imo->imo_multicast_ifp; 1881 if (!in_nullhost(imo->imo_multicast_addr)) { 1882 mreqn.imr_address = imo->imo_multicast_addr; 1883 } else if (ifp != NULL) { 1884 mreqn.imr_ifindex = ifp->if_index; 1885 NET_EPOCH_ENTER(); 1886 IFP_TO_IA(ifp, ia, &in_ifa_tracker); 1887 if (ia != NULL) 1888 mreqn.imr_address = 1889 IA_SIN(ia)->sin_addr; 1890 NET_EPOCH_EXIT(); 1891 } 1892 } 1893 INP_WUNLOCK(inp); 1894 if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) { 1895 error = sooptcopyout(sopt, &mreqn, 1896 sizeof(struct ip_mreqn)); 1897 } else { 1898 error = sooptcopyout(sopt, &mreqn.imr_address, 1899 sizeof(struct in_addr)); 1900 } 1901 break; 1902 1903 case IP_MULTICAST_TTL: 1904 if (imo == NULL) 1905 optval = coptval = IP_DEFAULT_MULTICAST_TTL; 1906 else 1907 optval = coptval = imo->imo_multicast_ttl; 1908 INP_WUNLOCK(inp); 1909 if (sopt->sopt_valsize == sizeof(u_char)) 1910 error = sooptcopyout(sopt, &coptval, sizeof(u_char)); 1911 else 1912 error = sooptcopyout(sopt, &optval, sizeof(int)); 1913 break; 1914 1915 case IP_MULTICAST_LOOP: 1916 if (imo == NULL) 1917 optval = coptval = IP_DEFAULT_MULTICAST_LOOP; 1918 else 1919 optval = coptval = imo->imo_multicast_loop; 1920 INP_WUNLOCK(inp); 1921 if (sopt->sopt_valsize == sizeof(u_char)) 1922 error = sooptcopyout(sopt, &coptval, sizeof(u_char)); 1923 else 1924 error = sooptcopyout(sopt, &optval, sizeof(int)); 1925 break; 1926 1927 case IP_MSFILTER: 1928 if (imo == NULL) { 1929 error = EADDRNOTAVAIL; 1930 INP_WUNLOCK(inp); 1931 } else { 1932 error = inp_get_source_filters(inp, sopt); 1933 } 1934 break; 1935 1936 default: 1937 INP_WUNLOCK(inp); 1938 error = ENOPROTOOPT; 1939 break; 1940 } 1941 1942 INP_UNLOCK_ASSERT(inp); 1943 1944 return (error); 1945 } 1946 1947 /* 1948 * Look up the ifnet to use for a multicast group membership, 1949 * given the IPv4 address of an interface, and the IPv4 group address. 1950 * 1951 * This routine exists to support legacy multicast applications 1952 * which do not understand that multicast memberships are scoped to 1953 * specific physical links in the networking stack, or which need 1954 * to join link-scope groups before IPv4 addresses are configured. 1955 * 1956 * If inp is non-NULL, use this socket's current FIB number for any 1957 * required FIB lookup. 1958 * If ina is INADDR_ANY, look up the group address in the unicast FIB, 1959 * and use its ifp; usually, this points to the default next-hop. 1960 * 1961 * If the FIB lookup fails, attempt to use the first non-loopback 1962 * interface with multicast capability in the system as a 1963 * last resort. The legacy IPv4 ASM API requires that we do 1964 * this in order to allow groups to be joined when the routing 1965 * table has not yet been populated during boot. 1966 * 1967 * Returns NULL if no ifp could be found. 1968 * 1969 * SMPng: TODO: Acquire the appropriate locks for INADDR_TO_IFP. 1970 * FUTURE: Implement IPv4 source-address selection. 1971 */ 1972 static struct ifnet * 1973 inp_lookup_mcast_ifp(const struct inpcb *inp, 1974 const struct sockaddr_in *gsin, const struct in_addr ina) 1975 { 1976 struct rm_priotracker in_ifa_tracker; 1977 struct ifnet *ifp; 1978 struct nhop4_basic nh4; 1979 uint32_t fibnum; 1980 1981 KASSERT(gsin->sin_family == AF_INET, ("%s: not AF_INET", __func__)); 1982 KASSERT(IN_MULTICAST(ntohl(gsin->sin_addr.s_addr)), 1983 ("%s: not multicast", __func__)); 1984 1985 ifp = NULL; 1986 if (!in_nullhost(ina)) { 1987 INADDR_TO_IFP(ina, ifp); 1988 } else { 1989 fibnum = inp ? inp->inp_inc.inc_fibnum : 0; 1990 if (fib4_lookup_nh_basic(fibnum, gsin->sin_addr, 0, 0, &nh4)==0) 1991 ifp = nh4.nh_ifp; 1992 else { 1993 struct in_ifaddr *ia; 1994 struct ifnet *mifp; 1995 1996 mifp = NULL; 1997 IN_IFADDR_RLOCK(&in_ifa_tracker); 1998 CK_STAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { 1999 mifp = ia->ia_ifp; 2000 if (!(mifp->if_flags & IFF_LOOPBACK) && 2001 (mifp->if_flags & IFF_MULTICAST)) { 2002 ifp = mifp; 2003 break; 2004 } 2005 } 2006 IN_IFADDR_RUNLOCK(&in_ifa_tracker); 2007 } 2008 } 2009 2010 return (ifp); 2011 } 2012 2013 /* 2014 * Join an IPv4 multicast group, possibly with a source. 2015 */ 2016 static int 2017 inp_join_group(struct inpcb *inp, struct sockopt *sopt) 2018 { 2019 struct group_source_req gsr; 2020 sockunion_t *gsa, *ssa; 2021 struct ifnet *ifp; 2022 struct in_mfilter *imf; 2023 struct ip_moptions *imo; 2024 struct in_multi *inm; 2025 struct in_msource *lims; 2026 size_t idx; 2027 int error, is_new; 2028 2029 ifp = NULL; 2030 imf = NULL; 2031 lims = NULL; 2032 error = 0; 2033 is_new = 0; 2034 2035 memset(&gsr, 0, sizeof(struct group_source_req)); 2036 gsa = (sockunion_t *)&gsr.gsr_group; 2037 gsa->ss.ss_family = AF_UNSPEC; 2038 ssa = (sockunion_t *)&gsr.gsr_source; 2039 ssa->ss.ss_family = AF_UNSPEC; 2040 2041 switch (sopt->sopt_name) { 2042 case IP_ADD_MEMBERSHIP: 2043 case IP_ADD_SOURCE_MEMBERSHIP: { 2044 struct ip_mreq_source mreqs; 2045 2046 if (sopt->sopt_name == IP_ADD_MEMBERSHIP) { 2047 error = sooptcopyin(sopt, &mreqs, 2048 sizeof(struct ip_mreq), 2049 sizeof(struct ip_mreq)); 2050 /* 2051 * Do argument switcharoo from ip_mreq into 2052 * ip_mreq_source to avoid using two instances. 2053 */ 2054 mreqs.imr_interface = mreqs.imr_sourceaddr; 2055 mreqs.imr_sourceaddr.s_addr = INADDR_ANY; 2056 } else if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) { 2057 error = sooptcopyin(sopt, &mreqs, 2058 sizeof(struct ip_mreq_source), 2059 sizeof(struct ip_mreq_source)); 2060 } 2061 if (error) 2062 return (error); 2063 2064 gsa->sin.sin_family = AF_INET; 2065 gsa->sin.sin_len = sizeof(struct sockaddr_in); 2066 gsa->sin.sin_addr = mreqs.imr_multiaddr; 2067 2068 if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) { 2069 ssa->sin.sin_family = AF_INET; 2070 ssa->sin.sin_len = sizeof(struct sockaddr_in); 2071 ssa->sin.sin_addr = mreqs.imr_sourceaddr; 2072 } 2073 2074 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2075 return (EINVAL); 2076 2077 ifp = inp_lookup_mcast_ifp(inp, &gsa->sin, 2078 mreqs.imr_interface); 2079 CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p", 2080 __func__, ntohl(mreqs.imr_interface.s_addr), ifp); 2081 break; 2082 } 2083 2084 case MCAST_JOIN_GROUP: 2085 case MCAST_JOIN_SOURCE_GROUP: 2086 if (sopt->sopt_name == MCAST_JOIN_GROUP) { 2087 error = sooptcopyin(sopt, &gsr, 2088 sizeof(struct group_req), 2089 sizeof(struct group_req)); 2090 } else if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) { 2091 error = sooptcopyin(sopt, &gsr, 2092 sizeof(struct group_source_req), 2093 sizeof(struct group_source_req)); 2094 } 2095 if (error) 2096 return (error); 2097 2098 if (gsa->sin.sin_family != AF_INET || 2099 gsa->sin.sin_len != sizeof(struct sockaddr_in)) 2100 return (EINVAL); 2101 2102 /* 2103 * Overwrite the port field if present, as the sockaddr 2104 * being copied in may be matched with a binary comparison. 2105 */ 2106 gsa->sin.sin_port = 0; 2107 if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) { 2108 if (ssa->sin.sin_family != AF_INET || 2109 ssa->sin.sin_len != sizeof(struct sockaddr_in)) 2110 return (EINVAL); 2111 ssa->sin.sin_port = 0; 2112 } 2113 2114 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2115 return (EINVAL); 2116 2117 if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface) 2118 return (EADDRNOTAVAIL); 2119 ifp = ifnet_byindex(gsr.gsr_interface); 2120 break; 2121 2122 default: 2123 CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d", 2124 __func__, sopt->sopt_name); 2125 return (EOPNOTSUPP); 2126 break; 2127 } 2128 2129 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) 2130 return (EADDRNOTAVAIL); 2131 2132 imo = inp_findmoptions(inp); 2133 idx = imo_match_group(imo, ifp, &gsa->sa); 2134 if (idx == -1) { 2135 is_new = 1; 2136 } else { 2137 inm = imo->imo_membership[idx]; 2138 imf = &imo->imo_mfilters[idx]; 2139 if (ssa->ss.ss_family != AF_UNSPEC) { 2140 /* 2141 * MCAST_JOIN_SOURCE_GROUP on an exclusive membership 2142 * is an error. On an existing inclusive membership, 2143 * it just adds the source to the filter list. 2144 */ 2145 if (imf->imf_st[1] != MCAST_INCLUDE) { 2146 error = EINVAL; 2147 goto out_inp_locked; 2148 } 2149 /* 2150 * Throw out duplicates. 2151 * 2152 * XXX FIXME: This makes a naive assumption that 2153 * even if entries exist for *ssa in this imf, 2154 * they will be rejected as dupes, even if they 2155 * are not valid in the current mode (in-mode). 2156 * 2157 * in_msource is transactioned just as for anything 2158 * else in SSM -- but note naive use of inm_graft() 2159 * below for allocating new filter entries. 2160 * 2161 * This is only an issue if someone mixes the 2162 * full-state SSM API with the delta-based API, 2163 * which is discouraged in the relevant RFCs. 2164 */ 2165 lims = imo_match_source(imo, idx, &ssa->sa); 2166 if (lims != NULL /*&& 2167 lims->imsl_st[1] == MCAST_INCLUDE*/) { 2168 error = EADDRNOTAVAIL; 2169 goto out_inp_locked; 2170 } 2171 } else { 2172 /* 2173 * MCAST_JOIN_GROUP on an existing exclusive 2174 * membership is an error; return EADDRINUSE 2175 * to preserve 4.4BSD API idempotence, and 2176 * avoid tedious detour to code below. 2177 * NOTE: This is bending RFC 3678 a bit. 2178 * 2179 * On an existing inclusive membership, this is also 2180 * an error; if you want to change filter mode, 2181 * you must use the userland API setsourcefilter(). 2182 * XXX We don't reject this for imf in UNDEFINED 2183 * state at t1, because allocation of a filter 2184 * is atomic with allocation of a membership. 2185 */ 2186 error = EINVAL; 2187 if (imf->imf_st[1] == MCAST_EXCLUDE) 2188 error = EADDRINUSE; 2189 goto out_inp_locked; 2190 } 2191 } 2192 2193 /* 2194 * Begin state merge transaction at socket layer. 2195 */ 2196 INP_WLOCK_ASSERT(inp); 2197 2198 if (is_new) { 2199 if (imo->imo_num_memberships == imo->imo_max_memberships) { 2200 error = imo_grow(imo); 2201 if (error) 2202 goto out_inp_locked; 2203 } 2204 /* 2205 * Allocate the new slot upfront so we can deal with 2206 * grafting the new source filter in same code path 2207 * as for join-source on existing membership. 2208 */ 2209 idx = imo->imo_num_memberships; 2210 imo->imo_membership[idx] = NULL; 2211 imo->imo_num_memberships++; 2212 KASSERT(imo->imo_mfilters != NULL, 2213 ("%s: imf_mfilters vector was not allocated", __func__)); 2214 imf = &imo->imo_mfilters[idx]; 2215 KASSERT(RB_EMPTY(&imf->imf_sources), 2216 ("%s: imf_sources not empty", __func__)); 2217 } 2218 2219 /* 2220 * Graft new source into filter list for this inpcb's 2221 * membership of the group. The in_multi may not have 2222 * been allocated yet if this is a new membership, however, 2223 * the in_mfilter slot will be allocated and must be initialized. 2224 * 2225 * Note: Grafting of exclusive mode filters doesn't happen 2226 * in this path. 2227 * XXX: Should check for non-NULL lims (node exists but may 2228 * not be in-mode) for interop with full-state API. 2229 */ 2230 if (ssa->ss.ss_family != AF_UNSPEC) { 2231 /* Membership starts in IN mode */ 2232 if (is_new) { 2233 CTR1(KTR_IGMPV3, "%s: new join w/source", __func__); 2234 imf_init(imf, MCAST_UNDEFINED, MCAST_INCLUDE); 2235 } else { 2236 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow"); 2237 } 2238 lims = imf_graft(imf, MCAST_INCLUDE, &ssa->sin); 2239 if (lims == NULL) { 2240 CTR1(KTR_IGMPV3, "%s: merge imf state failed", 2241 __func__); 2242 error = ENOMEM; 2243 goto out_imo_free; 2244 } 2245 } else { 2246 /* No address specified; Membership starts in EX mode */ 2247 if (is_new) { 2248 CTR1(KTR_IGMPV3, "%s: new join w/o source", __func__); 2249 imf_init(imf, MCAST_UNDEFINED, MCAST_EXCLUDE); 2250 } 2251 } 2252 2253 /* 2254 * Begin state merge transaction at IGMP layer. 2255 */ 2256 in_pcbref(inp); 2257 INP_WUNLOCK(inp); 2258 IN_MULTI_LOCK(); 2259 2260 if (is_new) { 2261 error = in_joingroup_locked(ifp, &gsa->sin.sin_addr, imf, 2262 &inm); 2263 if (error) { 2264 CTR1(KTR_IGMPV3, "%s: in_joingroup_locked failed", 2265 __func__); 2266 IN_MULTI_LIST_UNLOCK(); 2267 goto out_imo_free; 2268 } 2269 imo->imo_membership[idx] = inm; 2270 } else { 2271 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 2272 IN_MULTI_LIST_LOCK(); 2273 error = inm_merge(inm, imf); 2274 if (error) { 2275 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", 2276 __func__); 2277 IN_MULTI_LIST_UNLOCK(); 2278 goto out_in_multi_locked; 2279 } 2280 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 2281 error = igmp_change_state(inm); 2282 IN_MULTI_LIST_UNLOCK(); 2283 if (error) { 2284 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", 2285 __func__); 2286 goto out_in_multi_locked; 2287 } 2288 } 2289 2290 out_in_multi_locked: 2291 2292 IN_MULTI_UNLOCK(); 2293 INP_WLOCK(inp); 2294 if (in_pcbrele_wlocked(inp)) 2295 return (ENXIO); 2296 if (error) { 2297 imf_rollback(imf); 2298 if (is_new) 2299 imf_purge(imf); 2300 else 2301 imf_reap(imf); 2302 } else { 2303 imf_commit(imf); 2304 } 2305 2306 out_imo_free: 2307 if (error && is_new) { 2308 imo->imo_membership[idx] = NULL; 2309 --imo->imo_num_memberships; 2310 } 2311 2312 out_inp_locked: 2313 INP_WUNLOCK(inp); 2314 return (error); 2315 } 2316 2317 /* 2318 * Leave an IPv4 multicast group on an inpcb, possibly with a source. 2319 */ 2320 static int 2321 inp_leave_group(struct inpcb *inp, struct sockopt *sopt) 2322 { 2323 struct group_source_req gsr; 2324 struct ip_mreq_source mreqs; 2325 sockunion_t *gsa, *ssa; 2326 struct ifnet *ifp; 2327 struct in_mfilter *imf; 2328 struct ip_moptions *imo; 2329 struct in_msource *ims; 2330 struct in_multi *inm; 2331 size_t idx; 2332 int error, is_final; 2333 2334 ifp = NULL; 2335 error = 0; 2336 is_final = 1; 2337 2338 memset(&gsr, 0, sizeof(struct group_source_req)); 2339 gsa = (sockunion_t *)&gsr.gsr_group; 2340 gsa->ss.ss_family = AF_UNSPEC; 2341 ssa = (sockunion_t *)&gsr.gsr_source; 2342 ssa->ss.ss_family = AF_UNSPEC; 2343 2344 switch (sopt->sopt_name) { 2345 case IP_DROP_MEMBERSHIP: 2346 case IP_DROP_SOURCE_MEMBERSHIP: 2347 if (sopt->sopt_name == IP_DROP_MEMBERSHIP) { 2348 error = sooptcopyin(sopt, &mreqs, 2349 sizeof(struct ip_mreq), 2350 sizeof(struct ip_mreq)); 2351 /* 2352 * Swap interface and sourceaddr arguments, 2353 * as ip_mreq and ip_mreq_source are laid 2354 * out differently. 2355 */ 2356 mreqs.imr_interface = mreqs.imr_sourceaddr; 2357 mreqs.imr_sourceaddr.s_addr = INADDR_ANY; 2358 } else if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) { 2359 error = sooptcopyin(sopt, &mreqs, 2360 sizeof(struct ip_mreq_source), 2361 sizeof(struct ip_mreq_source)); 2362 } 2363 if (error) 2364 return (error); 2365 2366 gsa->sin.sin_family = AF_INET; 2367 gsa->sin.sin_len = sizeof(struct sockaddr_in); 2368 gsa->sin.sin_addr = mreqs.imr_multiaddr; 2369 2370 if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) { 2371 ssa->sin.sin_family = AF_INET; 2372 ssa->sin.sin_len = sizeof(struct sockaddr_in); 2373 ssa->sin.sin_addr = mreqs.imr_sourceaddr; 2374 } 2375 2376 /* 2377 * Attempt to look up hinted ifp from interface address. 2378 * Fallthrough with null ifp iff lookup fails, to 2379 * preserve 4.4BSD mcast API idempotence. 2380 * XXX NOTE WELL: The RFC 3678 API is preferred because 2381 * using an IPv4 address as a key is racy. 2382 */ 2383 if (!in_nullhost(mreqs.imr_interface)) 2384 INADDR_TO_IFP(mreqs.imr_interface, ifp); 2385 2386 CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p", 2387 __func__, ntohl(mreqs.imr_interface.s_addr), ifp); 2388 2389 break; 2390 2391 case MCAST_LEAVE_GROUP: 2392 case MCAST_LEAVE_SOURCE_GROUP: 2393 if (sopt->sopt_name == MCAST_LEAVE_GROUP) { 2394 error = sooptcopyin(sopt, &gsr, 2395 sizeof(struct group_req), 2396 sizeof(struct group_req)); 2397 } else if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) { 2398 error = sooptcopyin(sopt, &gsr, 2399 sizeof(struct group_source_req), 2400 sizeof(struct group_source_req)); 2401 } 2402 if (error) 2403 return (error); 2404 2405 if (gsa->sin.sin_family != AF_INET || 2406 gsa->sin.sin_len != sizeof(struct sockaddr_in)) 2407 return (EINVAL); 2408 2409 if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) { 2410 if (ssa->sin.sin_family != AF_INET || 2411 ssa->sin.sin_len != sizeof(struct sockaddr_in)) 2412 return (EINVAL); 2413 } 2414 2415 if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface) 2416 return (EADDRNOTAVAIL); 2417 2418 ifp = ifnet_byindex(gsr.gsr_interface); 2419 2420 if (ifp == NULL) 2421 return (EADDRNOTAVAIL); 2422 break; 2423 2424 default: 2425 CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d", 2426 __func__, sopt->sopt_name); 2427 return (EOPNOTSUPP); 2428 break; 2429 } 2430 2431 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2432 return (EINVAL); 2433 2434 /* 2435 * Find the membership in the membership array. 2436 */ 2437 imo = inp_findmoptions(inp); 2438 idx = imo_match_group(imo, ifp, &gsa->sa); 2439 if (idx == -1) { 2440 error = EADDRNOTAVAIL; 2441 goto out_inp_locked; 2442 } 2443 inm = imo->imo_membership[idx]; 2444 imf = &imo->imo_mfilters[idx]; 2445 2446 if (ssa->ss.ss_family != AF_UNSPEC) 2447 is_final = 0; 2448 2449 /* 2450 * Begin state merge transaction at socket layer. 2451 */ 2452 INP_WLOCK_ASSERT(inp); 2453 2454 /* 2455 * If we were instructed only to leave a given source, do so. 2456 * MCAST_LEAVE_SOURCE_GROUP is only valid for inclusive memberships. 2457 */ 2458 if (is_final) { 2459 imf_leave(imf); 2460 } else { 2461 if (imf->imf_st[0] == MCAST_EXCLUDE) { 2462 error = EADDRNOTAVAIL; 2463 goto out_inp_locked; 2464 } 2465 ims = imo_match_source(imo, idx, &ssa->sa); 2466 if (ims == NULL) { 2467 CTR3(KTR_IGMPV3, "%s: source 0x%08x %spresent", 2468 __func__, ntohl(ssa->sin.sin_addr.s_addr), "not "); 2469 error = EADDRNOTAVAIL; 2470 goto out_inp_locked; 2471 } 2472 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block"); 2473 error = imf_prune(imf, &ssa->sin); 2474 if (error) { 2475 CTR1(KTR_IGMPV3, "%s: merge imf state failed", 2476 __func__); 2477 goto out_inp_locked; 2478 } 2479 } 2480 2481 /* 2482 * Begin state merge transaction at IGMP layer. 2483 */ 2484 in_pcbref(inp); 2485 INP_WUNLOCK(inp); 2486 IN_MULTI_LOCK(); 2487 2488 if (is_final) { 2489 /* 2490 * Give up the multicast address record to which 2491 * the membership points. 2492 */ 2493 (void)in_leavegroup_locked(inm, imf); 2494 } else { 2495 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 2496 IN_MULTI_LIST_LOCK(); 2497 error = inm_merge(inm, imf); 2498 if (error) { 2499 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", 2500 __func__); 2501 IN_MULTI_LIST_UNLOCK(); 2502 goto out_in_multi_locked; 2503 } 2504 2505 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 2506 error = igmp_change_state(inm); 2507 IN_MULTI_LIST_UNLOCK(); 2508 if (error) { 2509 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", 2510 __func__); 2511 } 2512 } 2513 2514 out_in_multi_locked: 2515 2516 IN_MULTI_UNLOCK(); 2517 INP_WLOCK(inp); 2518 if (in_pcbrele_wlocked(inp)) 2519 return (ENXIO); 2520 2521 if (error) 2522 imf_rollback(imf); 2523 else 2524 imf_commit(imf); 2525 2526 imf_reap(imf); 2527 2528 if (is_final) { 2529 /* Remove the gap in the membership and filter array. */ 2530 for (++idx; idx < imo->imo_num_memberships; ++idx) { 2531 imo->imo_membership[idx-1] = imo->imo_membership[idx]; 2532 imo->imo_mfilters[idx-1] = imo->imo_mfilters[idx]; 2533 } 2534 imo->imo_num_memberships--; 2535 } 2536 2537 out_inp_locked: 2538 INP_WUNLOCK(inp); 2539 return (error); 2540 } 2541 2542 /* 2543 * Select the interface for transmitting IPv4 multicast datagrams. 2544 * 2545 * Either an instance of struct in_addr or an instance of struct ip_mreqn 2546 * may be passed to this socket option. An address of INADDR_ANY or an 2547 * interface index of 0 is used to remove a previous selection. 2548 * When no interface is selected, one is chosen for every send. 2549 */ 2550 static int 2551 inp_set_multicast_if(struct inpcb *inp, struct sockopt *sopt) 2552 { 2553 struct in_addr addr; 2554 struct ip_mreqn mreqn; 2555 struct ifnet *ifp; 2556 struct ip_moptions *imo; 2557 int error; 2558 2559 if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) { 2560 /* 2561 * An interface index was specified using the 2562 * Linux-derived ip_mreqn structure. 2563 */ 2564 error = sooptcopyin(sopt, &mreqn, sizeof(struct ip_mreqn), 2565 sizeof(struct ip_mreqn)); 2566 if (error) 2567 return (error); 2568 2569 if (mreqn.imr_ifindex < 0 || V_if_index < mreqn.imr_ifindex) 2570 return (EINVAL); 2571 2572 if (mreqn.imr_ifindex == 0) { 2573 ifp = NULL; 2574 } else { 2575 ifp = ifnet_byindex(mreqn.imr_ifindex); 2576 if (ifp == NULL) 2577 return (EADDRNOTAVAIL); 2578 } 2579 } else { 2580 /* 2581 * An interface was specified by IPv4 address. 2582 * This is the traditional BSD usage. 2583 */ 2584 error = sooptcopyin(sopt, &addr, sizeof(struct in_addr), 2585 sizeof(struct in_addr)); 2586 if (error) 2587 return (error); 2588 if (in_nullhost(addr)) { 2589 ifp = NULL; 2590 } else { 2591 INADDR_TO_IFP(addr, ifp); 2592 if (ifp == NULL) 2593 return (EADDRNOTAVAIL); 2594 } 2595 CTR3(KTR_IGMPV3, "%s: ifp = %p, addr = 0x%08x", __func__, ifp, 2596 ntohl(addr.s_addr)); 2597 } 2598 2599 /* Reject interfaces which do not support multicast. */ 2600 if (ifp != NULL && (ifp->if_flags & IFF_MULTICAST) == 0) 2601 return (EOPNOTSUPP); 2602 2603 imo = inp_findmoptions(inp); 2604 imo->imo_multicast_ifp = ifp; 2605 imo->imo_multicast_addr.s_addr = INADDR_ANY; 2606 INP_WUNLOCK(inp); 2607 2608 return (0); 2609 } 2610 2611 /* 2612 * Atomically set source filters on a socket for an IPv4 multicast group. 2613 * 2614 * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held. 2615 */ 2616 static int 2617 inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt) 2618 { 2619 struct __msfilterreq msfr; 2620 sockunion_t *gsa; 2621 struct ifnet *ifp; 2622 struct in_mfilter *imf; 2623 struct ip_moptions *imo; 2624 struct in_multi *inm; 2625 size_t idx; 2626 int error; 2627 2628 error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq), 2629 sizeof(struct __msfilterreq)); 2630 if (error) 2631 return (error); 2632 2633 if (msfr.msfr_nsrcs > in_mcast_maxsocksrc) 2634 return (ENOBUFS); 2635 2636 if ((msfr.msfr_fmode != MCAST_EXCLUDE && 2637 msfr.msfr_fmode != MCAST_INCLUDE)) 2638 return (EINVAL); 2639 2640 if (msfr.msfr_group.ss_family != AF_INET || 2641 msfr.msfr_group.ss_len != sizeof(struct sockaddr_in)) 2642 return (EINVAL); 2643 2644 gsa = (sockunion_t *)&msfr.msfr_group; 2645 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2646 return (EINVAL); 2647 2648 gsa->sin.sin_port = 0; /* ignore port */ 2649 2650 if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex) 2651 return (EADDRNOTAVAIL); 2652 2653 ifp = ifnet_byindex(msfr.msfr_ifindex); 2654 if (ifp == NULL) 2655 return (EADDRNOTAVAIL); 2656 2657 /* 2658 * Take the INP write lock. 2659 * Check if this socket is a member of this group. 2660 */ 2661 imo = inp_findmoptions(inp); 2662 idx = imo_match_group(imo, ifp, &gsa->sa); 2663 if (idx == -1 || imo->imo_mfilters == NULL) { 2664 error = EADDRNOTAVAIL; 2665 goto out_inp_locked; 2666 } 2667 inm = imo->imo_membership[idx]; 2668 imf = &imo->imo_mfilters[idx]; 2669 2670 /* 2671 * Begin state merge transaction at socket layer. 2672 */ 2673 INP_WLOCK_ASSERT(inp); 2674 2675 imf->imf_st[1] = msfr.msfr_fmode; 2676 2677 /* 2678 * Apply any new source filters, if present. 2679 * Make a copy of the user-space source vector so 2680 * that we may copy them with a single copyin. This 2681 * allows us to deal with page faults up-front. 2682 */ 2683 if (msfr.msfr_nsrcs > 0) { 2684 struct in_msource *lims; 2685 struct sockaddr_in *psin; 2686 struct sockaddr_storage *kss, *pkss; 2687 int i; 2688 2689 INP_WUNLOCK(inp); 2690 2691 CTR2(KTR_IGMPV3, "%s: loading %lu source list entries", 2692 __func__, (unsigned long)msfr.msfr_nsrcs); 2693 kss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs, 2694 M_TEMP, M_WAITOK); 2695 error = copyin(msfr.msfr_srcs, kss, 2696 sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs); 2697 if (error) { 2698 free(kss, M_TEMP); 2699 return (error); 2700 } 2701 2702 INP_WLOCK(inp); 2703 2704 /* 2705 * Mark all source filters as UNDEFINED at t1. 2706 * Restore new group filter mode, as imf_leave() 2707 * will set it to INCLUDE. 2708 */ 2709 imf_leave(imf); 2710 imf->imf_st[1] = msfr.msfr_fmode; 2711 2712 /* 2713 * Update socket layer filters at t1, lazy-allocating 2714 * new entries. This saves a bunch of memory at the 2715 * cost of one RB_FIND() per source entry; duplicate 2716 * entries in the msfr_nsrcs vector are ignored. 2717 * If we encounter an error, rollback transaction. 2718 * 2719 * XXX This too could be replaced with a set-symmetric 2720 * difference like loop to avoid walking from root 2721 * every time, as the key space is common. 2722 */ 2723 for (i = 0, pkss = kss; i < msfr.msfr_nsrcs; i++, pkss++) { 2724 psin = (struct sockaddr_in *)pkss; 2725 if (psin->sin_family != AF_INET) { 2726 error = EAFNOSUPPORT; 2727 break; 2728 } 2729 if (psin->sin_len != sizeof(struct sockaddr_in)) { 2730 error = EINVAL; 2731 break; 2732 } 2733 error = imf_get_source(imf, psin, &lims); 2734 if (error) 2735 break; 2736 lims->imsl_st[1] = imf->imf_st[1]; 2737 } 2738 free(kss, M_TEMP); 2739 } 2740 2741 if (error) 2742 goto out_imf_rollback; 2743 2744 INP_WLOCK_ASSERT(inp); 2745 IN_MULTI_LOCK(); 2746 2747 /* 2748 * Begin state merge transaction at IGMP layer. 2749 */ 2750 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 2751 IN_MULTI_LIST_LOCK(); 2752 error = inm_merge(inm, imf); 2753 if (error) { 2754 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__); 2755 IN_MULTI_LIST_UNLOCK(); 2756 goto out_in_multi_locked; 2757 } 2758 2759 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 2760 error = igmp_change_state(inm); 2761 IN_MULTI_LIST_UNLOCK(); 2762 if (error) 2763 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__); 2764 2765 out_in_multi_locked: 2766 2767 IN_MULTI_UNLOCK(); 2768 2769 out_imf_rollback: 2770 if (error) 2771 imf_rollback(imf); 2772 else 2773 imf_commit(imf); 2774 2775 imf_reap(imf); 2776 2777 out_inp_locked: 2778 INP_WUNLOCK(inp); 2779 return (error); 2780 } 2781 2782 /* 2783 * Set the IP multicast options in response to user setsockopt(). 2784 * 2785 * Many of the socket options handled in this function duplicate the 2786 * functionality of socket options in the regular unicast API. However, 2787 * it is not possible to merge the duplicate code, because the idempotence 2788 * of the IPv4 multicast part of the BSD Sockets API must be preserved; 2789 * the effects of these options must be treated as separate and distinct. 2790 * 2791 * SMPng: XXX: Unlocked read of inp_socket believed OK. 2792 * FUTURE: The IP_MULTICAST_VIF option may be eliminated if MROUTING 2793 * is refactored to no longer use vifs. 2794 */ 2795 int 2796 inp_setmoptions(struct inpcb *inp, struct sockopt *sopt) 2797 { 2798 struct ip_moptions *imo; 2799 int error; 2800 2801 error = 0; 2802 2803 /* 2804 * If socket is neither of type SOCK_RAW or SOCK_DGRAM, 2805 * or is a divert socket, reject it. 2806 */ 2807 if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT || 2808 (inp->inp_socket->so_proto->pr_type != SOCK_RAW && 2809 inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) 2810 return (EOPNOTSUPP); 2811 2812 switch (sopt->sopt_name) { 2813 case IP_MULTICAST_VIF: { 2814 int vifi; 2815 /* 2816 * Select a multicast VIF for transmission. 2817 * Only useful if multicast forwarding is active. 2818 */ 2819 if (legal_vif_num == NULL) { 2820 error = EOPNOTSUPP; 2821 break; 2822 } 2823 error = sooptcopyin(sopt, &vifi, sizeof(int), sizeof(int)); 2824 if (error) 2825 break; 2826 if (!legal_vif_num(vifi) && (vifi != -1)) { 2827 error = EINVAL; 2828 break; 2829 } 2830 imo = inp_findmoptions(inp); 2831 imo->imo_multicast_vif = vifi; 2832 INP_WUNLOCK(inp); 2833 break; 2834 } 2835 2836 case IP_MULTICAST_IF: 2837 error = inp_set_multicast_if(inp, sopt); 2838 break; 2839 2840 case IP_MULTICAST_TTL: { 2841 u_char ttl; 2842 2843 /* 2844 * Set the IP time-to-live for outgoing multicast packets. 2845 * The original multicast API required a char argument, 2846 * which is inconsistent with the rest of the socket API. 2847 * We allow either a char or an int. 2848 */ 2849 if (sopt->sopt_valsize == sizeof(u_char)) { 2850 error = sooptcopyin(sopt, &ttl, sizeof(u_char), 2851 sizeof(u_char)); 2852 if (error) 2853 break; 2854 } else { 2855 u_int ittl; 2856 2857 error = sooptcopyin(sopt, &ittl, sizeof(u_int), 2858 sizeof(u_int)); 2859 if (error) 2860 break; 2861 if (ittl > 255) { 2862 error = EINVAL; 2863 break; 2864 } 2865 ttl = (u_char)ittl; 2866 } 2867 imo = inp_findmoptions(inp); 2868 imo->imo_multicast_ttl = ttl; 2869 INP_WUNLOCK(inp); 2870 break; 2871 } 2872 2873 case IP_MULTICAST_LOOP: { 2874 u_char loop; 2875 2876 /* 2877 * Set the loopback flag for outgoing multicast packets. 2878 * Must be zero or one. The original multicast API required a 2879 * char argument, which is inconsistent with the rest 2880 * of the socket API. We allow either a char or an int. 2881 */ 2882 if (sopt->sopt_valsize == sizeof(u_char)) { 2883 error = sooptcopyin(sopt, &loop, sizeof(u_char), 2884 sizeof(u_char)); 2885 if (error) 2886 break; 2887 } else { 2888 u_int iloop; 2889 2890 error = sooptcopyin(sopt, &iloop, sizeof(u_int), 2891 sizeof(u_int)); 2892 if (error) 2893 break; 2894 loop = (u_char)iloop; 2895 } 2896 imo = inp_findmoptions(inp); 2897 imo->imo_multicast_loop = !!loop; 2898 INP_WUNLOCK(inp); 2899 break; 2900 } 2901 2902 case IP_ADD_MEMBERSHIP: 2903 case IP_ADD_SOURCE_MEMBERSHIP: 2904 case MCAST_JOIN_GROUP: 2905 case MCAST_JOIN_SOURCE_GROUP: 2906 error = inp_join_group(inp, sopt); 2907 break; 2908 2909 case IP_DROP_MEMBERSHIP: 2910 case IP_DROP_SOURCE_MEMBERSHIP: 2911 case MCAST_LEAVE_GROUP: 2912 case MCAST_LEAVE_SOURCE_GROUP: 2913 error = inp_leave_group(inp, sopt); 2914 break; 2915 2916 case IP_BLOCK_SOURCE: 2917 case IP_UNBLOCK_SOURCE: 2918 case MCAST_BLOCK_SOURCE: 2919 case MCAST_UNBLOCK_SOURCE: 2920 error = inp_block_unblock_source(inp, sopt); 2921 break; 2922 2923 case IP_MSFILTER: 2924 error = inp_set_source_filters(inp, sopt); 2925 break; 2926 2927 default: 2928 error = EOPNOTSUPP; 2929 break; 2930 } 2931 2932 INP_UNLOCK_ASSERT(inp); 2933 2934 return (error); 2935 } 2936 2937 /* 2938 * Expose IGMP's multicast filter mode and source list(s) to userland, 2939 * keyed by (ifindex, group). 2940 * The filter mode is written out as a uint32_t, followed by 2941 * 0..n of struct in_addr. 2942 * For use by ifmcstat(8). 2943 * SMPng: NOTE: unlocked read of ifindex space. 2944 */ 2945 static int 2946 sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS) 2947 { 2948 struct in_addr src, group; 2949 struct ifnet *ifp; 2950 struct ifmultiaddr *ifma; 2951 struct in_multi *inm; 2952 struct ip_msource *ims; 2953 int *name; 2954 int retval; 2955 u_int namelen; 2956 uint32_t fmode, ifindex; 2957 2958 name = (int *)arg1; 2959 namelen = arg2; 2960 2961 if (req->newptr != NULL) 2962 return (EPERM); 2963 2964 if (namelen != 2) 2965 return (EINVAL); 2966 2967 ifindex = name[0]; 2968 if (ifindex <= 0 || ifindex > V_if_index) { 2969 CTR2(KTR_IGMPV3, "%s: ifindex %u out of range", 2970 __func__, ifindex); 2971 return (ENOENT); 2972 } 2973 2974 group.s_addr = name[1]; 2975 if (!IN_MULTICAST(ntohl(group.s_addr))) { 2976 CTR2(KTR_IGMPV3, "%s: group 0x%08x is not multicast", 2977 __func__, ntohl(group.s_addr)); 2978 return (EINVAL); 2979 } 2980 2981 ifp = ifnet_byindex(ifindex); 2982 if (ifp == NULL) { 2983 CTR2(KTR_IGMPV3, "%s: no ifp for ifindex %u", 2984 __func__, ifindex); 2985 return (ENOENT); 2986 } 2987 2988 retval = sysctl_wire_old_buffer(req, 2989 sizeof(uint32_t) + (in_mcast_maxgrpsrc * sizeof(struct in_addr))); 2990 if (retval) 2991 return (retval); 2992 2993 IN_MULTI_LIST_LOCK(); 2994 2995 IF_ADDR_RLOCK(ifp); 2996 CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 2997 if (ifma->ifma_addr->sa_family != AF_INET || 2998 ifma->ifma_protospec == NULL) 2999 continue; 3000 inm = (struct in_multi *)ifma->ifma_protospec; 3001 if (!in_hosteq(inm->inm_addr, group)) 3002 continue; 3003 fmode = inm->inm_st[1].iss_fmode; 3004 retval = SYSCTL_OUT(req, &fmode, sizeof(uint32_t)); 3005 if (retval != 0) 3006 break; 3007 RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) { 3008 CTR2(KTR_IGMPV3, "%s: visit node 0x%08x", __func__, 3009 ims->ims_haddr); 3010 /* 3011 * Only copy-out sources which are in-mode. 3012 */ 3013 if (fmode != ims_get_mode(inm, ims, 1)) { 3014 CTR1(KTR_IGMPV3, "%s: skip non-in-mode", 3015 __func__); 3016 continue; 3017 } 3018 src.s_addr = htonl(ims->ims_haddr); 3019 retval = SYSCTL_OUT(req, &src, sizeof(struct in_addr)); 3020 if (retval != 0) 3021 break; 3022 } 3023 } 3024 IF_ADDR_RUNLOCK(ifp); 3025 3026 IN_MULTI_LIST_UNLOCK(); 3027 3028 return (retval); 3029 } 3030 3031 #if defined(KTR) && (KTR_COMPILE & KTR_IGMPV3) 3032 3033 static const char *inm_modestrs[] = { "un", "in", "ex" }; 3034 3035 static const char * 3036 inm_mode_str(const int mode) 3037 { 3038 3039 if (mode >= MCAST_UNDEFINED && mode <= MCAST_EXCLUDE) 3040 return (inm_modestrs[mode]); 3041 return ("??"); 3042 } 3043 3044 static const char *inm_statestrs[] = { 3045 "not-member", 3046 "silent", 3047 "idle", 3048 "lazy", 3049 "sleeping", 3050 "awakening", 3051 "query-pending", 3052 "sg-query-pending", 3053 "leaving" 3054 }; 3055 3056 static const char * 3057 inm_state_str(const int state) 3058 { 3059 3060 if (state >= IGMP_NOT_MEMBER && state <= IGMP_LEAVING_MEMBER) 3061 return (inm_statestrs[state]); 3062 return ("??"); 3063 } 3064 3065 /* 3066 * Dump an in_multi structure to the console. 3067 */ 3068 void 3069 inm_print(const struct in_multi *inm) 3070 { 3071 int t; 3072 char addrbuf[INET_ADDRSTRLEN]; 3073 3074 if ((ktr_mask & KTR_IGMPV3) == 0) 3075 return; 3076 3077 printf("%s: --- begin inm %p ---\n", __func__, inm); 3078 printf("addr %s ifp %p(%s) ifma %p\n", 3079 inet_ntoa_r(inm->inm_addr, addrbuf), 3080 inm->inm_ifp, 3081 inm->inm_ifp->if_xname, 3082 inm->inm_ifma); 3083 printf("timer %u state %s refcount %u scq.len %u\n", 3084 inm->inm_timer, 3085 inm_state_str(inm->inm_state), 3086 inm->inm_refcount, 3087 inm->inm_scq.mq_len); 3088 printf("igi %p nsrc %lu sctimer %u scrv %u\n", 3089 inm->inm_igi, 3090 inm->inm_nsrc, 3091 inm->inm_sctimer, 3092 inm->inm_scrv); 3093 for (t = 0; t < 2; t++) { 3094 printf("t%d: fmode %s asm %u ex %u in %u rec %u\n", t, 3095 inm_mode_str(inm->inm_st[t].iss_fmode), 3096 inm->inm_st[t].iss_asm, 3097 inm->inm_st[t].iss_ex, 3098 inm->inm_st[t].iss_in, 3099 inm->inm_st[t].iss_rec); 3100 } 3101 printf("%s: --- end inm %p ---\n", __func__, inm); 3102 } 3103 3104 #else /* !KTR || !(KTR_COMPILE & KTR_IGMPV3) */ 3105 3106 void 3107 inm_print(const struct in_multi *inm) 3108 { 3109 3110 } 3111 3112 #endif /* KTR && (KTR_COMPILE & KTR_IGMPV3) */ 3113 3114 RB_GENERATE(ip_msource_tree, ip_msource, ims_link, ip_msource_cmp); 3115