1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2007-2009 Bruce Simpson. 5 * Copyright (c) 2005 Robert N. M. Watson. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. The name of the author may not be used to endorse or promote 17 * products derived from this software without specific prior written 18 * permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 /* 34 * IPv4 multicast socket, group, and socket option processing module. 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/kernel.h> 43 #include <sys/lock.h> 44 #include <sys/malloc.h> 45 #include <sys/mbuf.h> 46 #include <sys/protosw.h> 47 #include <sys/rmlock.h> 48 #include <sys/socket.h> 49 #include <sys/socketvar.h> 50 #include <sys/protosw.h> 51 #include <sys/sysctl.h> 52 #include <sys/ktr.h> 53 #include <sys/taskqueue.h> 54 #include <sys/gtaskqueue.h> 55 #include <sys/tree.h> 56 57 #include <net/if.h> 58 #include <net/if_var.h> 59 #include <net/if_dl.h> 60 #include <net/route.h> 61 #include <net/vnet.h> 62 63 #include <net/ethernet.h> 64 65 #include <netinet/in.h> 66 #include <netinet/in_systm.h> 67 #include <netinet/in_fib.h> 68 #include <netinet/in_pcb.h> 69 #include <netinet/in_var.h> 70 #include <netinet/ip_var.h> 71 #include <netinet/igmp_var.h> 72 73 #ifndef KTR_IGMPV3 74 #define KTR_IGMPV3 KTR_INET 75 #endif 76 77 #ifndef __SOCKUNION_DECLARED 78 union sockunion { 79 struct sockaddr_storage ss; 80 struct sockaddr sa; 81 struct sockaddr_dl sdl; 82 struct sockaddr_in sin; 83 }; 84 typedef union sockunion sockunion_t; 85 #define __SOCKUNION_DECLARED 86 #endif /* __SOCKUNION_DECLARED */ 87 88 static MALLOC_DEFINE(M_INMFILTER, "in_mfilter", 89 "IPv4 multicast PCB-layer source filter"); 90 static MALLOC_DEFINE(M_IPMADDR, "in_multi", "IPv4 multicast group"); 91 static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "IPv4 multicast options"); 92 static MALLOC_DEFINE(M_IPMSOURCE, "ip_msource", 93 "IPv4 multicast IGMP-layer source filter"); 94 95 /* 96 * Locking: 97 * - Lock order is: Giant, INP_WLOCK, IN_MULTI_LIST_LOCK, IGMP_LOCK, IF_ADDR_LOCK. 98 * - The IF_ADDR_LOCK is implicitly taken by inm_lookup() earlier, however 99 * it can be taken by code in net/if.c also. 100 * - ip_moptions and in_mfilter are covered by the INP_WLOCK. 101 * 102 * struct in_multi is covered by IN_MULTI_LIST_LOCK. There isn't strictly 103 * any need for in_multi itself to be virtualized -- it is bound to an ifp 104 * anyway no matter what happens. 105 */ 106 struct mtx in_multi_list_mtx; 107 MTX_SYSINIT(in_multi_mtx, &in_multi_list_mtx, "in_multi_list_mtx", MTX_DEF); 108 109 struct mtx in_multi_free_mtx; 110 MTX_SYSINIT(in_multi_free_mtx, &in_multi_free_mtx, "in_multi_free_mtx", MTX_DEF); 111 112 struct sx in_multi_sx; 113 SX_SYSINIT(in_multi_sx, &in_multi_sx, "in_multi_sx"); 114 115 int ifma_restart; 116 117 /* 118 * Functions with non-static linkage defined in this file should be 119 * declared in in_var.h: 120 * imo_multi_filter() 121 * in_addmulti() 122 * in_delmulti() 123 * in_joingroup() 124 * in_joingroup_locked() 125 * in_leavegroup() 126 * in_leavegroup_locked() 127 * and ip_var.h: 128 * inp_freemoptions() 129 * inp_getmoptions() 130 * inp_setmoptions() 131 * 132 * XXX: Both carp and pf need to use the legacy (*,G) KPIs in_addmulti() 133 * and in_delmulti(). 134 */ 135 static void imf_commit(struct in_mfilter *); 136 static int imf_get_source(struct in_mfilter *imf, 137 const struct sockaddr_in *psin, 138 struct in_msource **); 139 static struct in_msource * 140 imf_graft(struct in_mfilter *, const uint8_t, 141 const struct sockaddr_in *); 142 static void imf_leave(struct in_mfilter *); 143 static int imf_prune(struct in_mfilter *, const struct sockaddr_in *); 144 static void imf_purge(struct in_mfilter *); 145 static void imf_rollback(struct in_mfilter *); 146 static void imf_reap(struct in_mfilter *); 147 static int imo_grow(struct ip_moptions *); 148 static size_t imo_match_group(const struct ip_moptions *, 149 const struct ifnet *, const struct sockaddr *); 150 static struct in_msource * 151 imo_match_source(const struct ip_moptions *, const size_t, 152 const struct sockaddr *); 153 static void ims_merge(struct ip_msource *ims, 154 const struct in_msource *lims, const int rollback); 155 static int in_getmulti(struct ifnet *, const struct in_addr *, 156 struct in_multi **); 157 static int inm_get_source(struct in_multi *inm, const in_addr_t haddr, 158 const int noalloc, struct ip_msource **pims); 159 #ifdef KTR 160 static int inm_is_ifp_detached(const struct in_multi *); 161 #endif 162 static int inm_merge(struct in_multi *, /*const*/ struct in_mfilter *); 163 static void inm_purge(struct in_multi *); 164 static void inm_reap(struct in_multi *); 165 static void inm_release(struct in_multi *); 166 static struct ip_moptions * 167 inp_findmoptions(struct inpcb *); 168 static void inp_freemoptions_internal(struct ip_moptions *); 169 static void inp_gcmoptions(void *, int); 170 static int inp_get_source_filters(struct inpcb *, struct sockopt *); 171 static int inp_join_group(struct inpcb *, struct sockopt *); 172 static int inp_leave_group(struct inpcb *, struct sockopt *); 173 static struct ifnet * 174 inp_lookup_mcast_ifp(const struct inpcb *, 175 const struct sockaddr_in *, const struct in_addr); 176 static int inp_block_unblock_source(struct inpcb *, struct sockopt *); 177 static int inp_set_multicast_if(struct inpcb *, struct sockopt *); 178 static int inp_set_source_filters(struct inpcb *, struct sockopt *); 179 static int sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS); 180 181 static SYSCTL_NODE(_net_inet_ip, OID_AUTO, mcast, CTLFLAG_RW, 0, 182 "IPv4 multicast"); 183 184 static u_long in_mcast_maxgrpsrc = IP_MAX_GROUP_SRC_FILTER; 185 SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxgrpsrc, 186 CTLFLAG_RWTUN, &in_mcast_maxgrpsrc, 0, 187 "Max source filters per group"); 188 189 static u_long in_mcast_maxsocksrc = IP_MAX_SOCK_SRC_FILTER; 190 SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxsocksrc, 191 CTLFLAG_RWTUN, &in_mcast_maxsocksrc, 0, 192 "Max source filters per socket"); 193 194 int in_mcast_loop = IP_DEFAULT_MULTICAST_LOOP; 195 SYSCTL_INT(_net_inet_ip_mcast, OID_AUTO, loop, CTLFLAG_RWTUN, 196 &in_mcast_loop, 0, "Loopback multicast datagrams by default"); 197 198 static SYSCTL_NODE(_net_inet_ip_mcast, OID_AUTO, filters, 199 CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_ip_mcast_filters, 200 "Per-interface stack-wide source filters"); 201 202 static STAILQ_HEAD(, ip_moptions) imo_gc_list = 203 STAILQ_HEAD_INITIALIZER(imo_gc_list); 204 static struct task imo_gc_task = TASK_INITIALIZER(0, inp_gcmoptions, NULL); 205 206 #ifdef KTR 207 /* 208 * Inline function which wraps assertions for a valid ifp. 209 * The ifnet layer will set the ifma's ifp pointer to NULL if the ifp 210 * is detached. 211 */ 212 static int __inline 213 inm_is_ifp_detached(const struct in_multi *inm) 214 { 215 struct ifnet *ifp; 216 217 KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__)); 218 ifp = inm->inm_ifma->ifma_ifp; 219 if (ifp != NULL) { 220 /* 221 * Sanity check that netinet's notion of ifp is the 222 * same as net's. 223 */ 224 KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__)); 225 } 226 227 return (ifp == NULL); 228 } 229 #endif 230 231 static struct grouptask free_gtask; 232 static struct in_multi_head inm_free_list; 233 static void inm_release_task(void *arg __unused); 234 static void inm_init(void) 235 { 236 SLIST_INIT(&inm_free_list); 237 taskqgroup_config_gtask_init(NULL, &free_gtask, inm_release_task, "inm release task"); 238 } 239 240 SYSINIT(inm_init, SI_SUB_SMP + 1, SI_ORDER_FIRST, 241 inm_init, NULL); 242 243 244 void 245 inm_release_list_deferred(struct in_multi_head *inmh) 246 { 247 248 if (SLIST_EMPTY(inmh)) 249 return; 250 mtx_lock(&in_multi_free_mtx); 251 SLIST_CONCAT(&inm_free_list, inmh, in_multi, inm_nrele); 252 mtx_unlock(&in_multi_free_mtx); 253 GROUPTASK_ENQUEUE(&free_gtask); 254 } 255 256 void 257 inm_disconnect(struct in_multi *inm) 258 { 259 struct ifnet *ifp; 260 struct ifmultiaddr *ifma, *ll_ifma; 261 262 ifp = inm->inm_ifp; 263 IF_ADDR_WLOCK_ASSERT(ifp); 264 ifma = inm->inm_ifma; 265 266 if_ref(ifp); 267 TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link); 268 MCDPRINTF("removed ifma: %p from %s\n", ifma, ifp->if_xname); 269 if ((ll_ifma = ifma->ifma_llifma) != NULL) { 270 MPASS(ifma != ll_ifma); 271 ifma->ifma_llifma = NULL; 272 MPASS(ll_ifma->ifma_llifma == NULL); 273 MPASS(ll_ifma->ifma_ifp == ifp); 274 if (--ll_ifma->ifma_refcount == 0) { 275 TAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma, ifma_link); 276 MCDPRINTF("removed ll_ifma: %p from %s\n", ll_ifma, ifp->if_xname); 277 if_freemulti(ll_ifma); 278 ifma_restart = true; 279 } 280 } 281 } 282 283 void 284 inm_release_deferred(struct in_multi *inm) 285 { 286 struct in_multi_head tmp; 287 288 IN_MULTI_LIST_LOCK_ASSERT(); 289 MPASS(inm->inm_refcount > 0); 290 if (--inm->inm_refcount == 0) { 291 SLIST_INIT(&tmp); 292 inm_disconnect(inm); 293 inm->inm_ifma->ifma_protospec = NULL; 294 SLIST_INSERT_HEAD(&tmp, inm, inm_nrele); 295 inm_release_list_deferred(&tmp); 296 } 297 } 298 299 static void 300 inm_release_task(void *arg __unused) 301 { 302 struct in_multi_head inm_free_tmp; 303 struct in_multi *inm, *tinm; 304 305 SLIST_INIT(&inm_free_tmp); 306 mtx_lock(&in_multi_free_mtx); 307 SLIST_CONCAT(&inm_free_tmp, &inm_free_list, in_multi, inm_nrele); 308 mtx_unlock(&in_multi_free_mtx); 309 IN_MULTI_LOCK(); 310 SLIST_FOREACH_SAFE(inm, &inm_free_tmp, inm_nrele, tinm) { 311 SLIST_REMOVE_HEAD(&inm_free_tmp, inm_nrele); 312 MPASS(inm); 313 inm_release(inm); 314 } 315 IN_MULTI_UNLOCK(); 316 } 317 318 /* 319 * Initialize an in_mfilter structure to a known state at t0, t1 320 * with an empty source filter list. 321 */ 322 static __inline void 323 imf_init(struct in_mfilter *imf, const int st0, const int st1) 324 { 325 memset(imf, 0, sizeof(struct in_mfilter)); 326 RB_INIT(&imf->imf_sources); 327 imf->imf_st[0] = st0; 328 imf->imf_st[1] = st1; 329 } 330 331 /* 332 * Function for looking up an in_multi record for an IPv4 multicast address 333 * on a given interface. ifp must be valid. If no record found, return NULL. 334 * The IN_MULTI_LIST_LOCK and IF_ADDR_LOCK on ifp must be held. 335 */ 336 struct in_multi * 337 inm_lookup_locked(struct ifnet *ifp, const struct in_addr ina) 338 { 339 struct ifmultiaddr *ifma; 340 struct in_multi *inm; 341 342 IN_MULTI_LIST_LOCK_ASSERT(); 343 IF_ADDR_LOCK_ASSERT(ifp); 344 345 inm = NULL; 346 TAILQ_FOREACH(ifma, &((ifp)->if_multiaddrs), ifma_link) { 347 if (ifma->ifma_addr->sa_family == AF_INET) { 348 inm = (struct in_multi *)ifma->ifma_protospec; 349 if (inm->inm_addr.s_addr == ina.s_addr) 350 break; 351 inm = NULL; 352 } 353 } 354 return (inm); 355 } 356 357 /* 358 * Wrapper for inm_lookup_locked(). 359 * The IF_ADDR_LOCK will be taken on ifp and released on return. 360 */ 361 struct in_multi * 362 inm_lookup(struct ifnet *ifp, const struct in_addr ina) 363 { 364 struct in_multi *inm; 365 366 IN_MULTI_LIST_LOCK_ASSERT(); 367 IF_ADDR_RLOCK(ifp); 368 inm = inm_lookup_locked(ifp, ina); 369 IF_ADDR_RUNLOCK(ifp); 370 371 return (inm); 372 } 373 374 /* 375 * Resize the ip_moptions vector to the next power-of-two minus 1. 376 * May be called with locks held; do not sleep. 377 */ 378 static int 379 imo_grow(struct ip_moptions *imo) 380 { 381 struct in_multi **nmships; 382 struct in_multi **omships; 383 struct in_mfilter *nmfilters; 384 struct in_mfilter *omfilters; 385 size_t idx; 386 size_t newmax; 387 size_t oldmax; 388 389 nmships = NULL; 390 nmfilters = NULL; 391 omships = imo->imo_membership; 392 omfilters = imo->imo_mfilters; 393 oldmax = imo->imo_max_memberships; 394 newmax = ((oldmax + 1) * 2) - 1; 395 396 if (newmax <= IP_MAX_MEMBERSHIPS) { 397 nmships = (struct in_multi **)realloc(omships, 398 sizeof(struct in_multi *) * newmax, M_IPMOPTS, M_NOWAIT); 399 nmfilters = (struct in_mfilter *)realloc(omfilters, 400 sizeof(struct in_mfilter) * newmax, M_INMFILTER, M_NOWAIT); 401 if (nmships != NULL && nmfilters != NULL) { 402 /* Initialize newly allocated source filter heads. */ 403 for (idx = oldmax; idx < newmax; idx++) { 404 imf_init(&nmfilters[idx], MCAST_UNDEFINED, 405 MCAST_EXCLUDE); 406 } 407 imo->imo_max_memberships = newmax; 408 imo->imo_membership = nmships; 409 imo->imo_mfilters = nmfilters; 410 } 411 } 412 413 if (nmships == NULL || nmfilters == NULL) { 414 if (nmships != NULL) 415 free(nmships, M_IPMOPTS); 416 if (nmfilters != NULL) 417 free(nmfilters, M_INMFILTER); 418 return (ETOOMANYREFS); 419 } 420 421 return (0); 422 } 423 424 /* 425 * Find an IPv4 multicast group entry for this ip_moptions instance 426 * which matches the specified group, and optionally an interface. 427 * Return its index into the array, or -1 if not found. 428 */ 429 static size_t 430 imo_match_group(const struct ip_moptions *imo, const struct ifnet *ifp, 431 const struct sockaddr *group) 432 { 433 const struct sockaddr_in *gsin; 434 struct in_multi **pinm; 435 int idx; 436 int nmships; 437 438 gsin = (const struct sockaddr_in *)group; 439 440 /* The imo_membership array may be lazy allocated. */ 441 if (imo->imo_membership == NULL || imo->imo_num_memberships == 0) 442 return (-1); 443 444 nmships = imo->imo_num_memberships; 445 pinm = &imo->imo_membership[0]; 446 for (idx = 0; idx < nmships; idx++, pinm++) { 447 if (*pinm == NULL) 448 continue; 449 if ((ifp == NULL || ((*pinm)->inm_ifp == ifp)) && 450 in_hosteq((*pinm)->inm_addr, gsin->sin_addr)) { 451 break; 452 } 453 } 454 if (idx >= nmships) 455 idx = -1; 456 457 return (idx); 458 } 459 460 /* 461 * Find an IPv4 multicast source entry for this imo which matches 462 * the given group index for this socket, and source address. 463 * 464 * NOTE: This does not check if the entry is in-mode, merely if 465 * it exists, which may not be the desired behaviour. 466 */ 467 static struct in_msource * 468 imo_match_source(const struct ip_moptions *imo, const size_t gidx, 469 const struct sockaddr *src) 470 { 471 struct ip_msource find; 472 struct in_mfilter *imf; 473 struct ip_msource *ims; 474 const sockunion_t *psa; 475 476 KASSERT(src->sa_family == AF_INET, ("%s: !AF_INET", __func__)); 477 KASSERT(gidx != -1 && gidx < imo->imo_num_memberships, 478 ("%s: invalid index %d\n", __func__, (int)gidx)); 479 480 /* The imo_mfilters array may be lazy allocated. */ 481 if (imo->imo_mfilters == NULL) 482 return (NULL); 483 imf = &imo->imo_mfilters[gidx]; 484 485 /* Source trees are keyed in host byte order. */ 486 psa = (const sockunion_t *)src; 487 find.ims_haddr = ntohl(psa->sin.sin_addr.s_addr); 488 ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find); 489 490 return ((struct in_msource *)ims); 491 } 492 493 /* 494 * Perform filtering for multicast datagrams on a socket by group and source. 495 * 496 * Returns 0 if a datagram should be allowed through, or various error codes 497 * if the socket was not a member of the group, or the source was muted, etc. 498 */ 499 int 500 imo_multi_filter(const struct ip_moptions *imo, const struct ifnet *ifp, 501 const struct sockaddr *group, const struct sockaddr *src) 502 { 503 size_t gidx; 504 struct in_msource *ims; 505 int mode; 506 507 KASSERT(ifp != NULL, ("%s: null ifp", __func__)); 508 509 gidx = imo_match_group(imo, ifp, group); 510 if (gidx == -1) 511 return (MCAST_NOTGMEMBER); 512 513 /* 514 * Check if the source was included in an (S,G) join. 515 * Allow reception on exclusive memberships by default, 516 * reject reception on inclusive memberships by default. 517 * Exclude source only if an in-mode exclude filter exists. 518 * Include source only if an in-mode include filter exists. 519 * NOTE: We are comparing group state here at IGMP t1 (now) 520 * with socket-layer t0 (since last downcall). 521 */ 522 mode = imo->imo_mfilters[gidx].imf_st[1]; 523 ims = imo_match_source(imo, gidx, src); 524 525 if ((ims == NULL && mode == MCAST_INCLUDE) || 526 (ims != NULL && ims->imsl_st[0] != mode)) 527 return (MCAST_NOTSMEMBER); 528 529 return (MCAST_PASS); 530 } 531 532 /* 533 * Find and return a reference to an in_multi record for (ifp, group), 534 * and bump its reference count. 535 * If one does not exist, try to allocate it, and update link-layer multicast 536 * filters on ifp to listen for group. 537 * Assumes the IN_MULTI lock is held across the call. 538 * Return 0 if successful, otherwise return an appropriate error code. 539 */ 540 static int 541 in_getmulti(struct ifnet *ifp, const struct in_addr *group, 542 struct in_multi **pinm) 543 { 544 struct sockaddr_in gsin; 545 struct ifmultiaddr *ifma; 546 struct in_ifinfo *ii; 547 struct in_multi *inm; 548 int error; 549 550 IN_MULTI_LOCK_ASSERT(); 551 552 ii = (struct in_ifinfo *)ifp->if_afdata[AF_INET]; 553 IN_MULTI_LIST_LOCK(); 554 inm = inm_lookup(ifp, *group); 555 if (inm != NULL) { 556 /* 557 * If we already joined this group, just bump the 558 * refcount and return it. 559 */ 560 KASSERT(inm->inm_refcount >= 1, 561 ("%s: bad refcount %d", __func__, inm->inm_refcount)); 562 inm_acquire_locked(inm); 563 *pinm = inm; 564 } 565 IN_MULTI_LIST_UNLOCK(); 566 if (inm != NULL) 567 return (0); 568 569 memset(&gsin, 0, sizeof(gsin)); 570 gsin.sin_family = AF_INET; 571 gsin.sin_len = sizeof(struct sockaddr_in); 572 gsin.sin_addr = *group; 573 574 /* 575 * Check if a link-layer group is already associated 576 * with this network-layer group on the given ifnet. 577 */ 578 error = if_addmulti(ifp, (struct sockaddr *)&gsin, &ifma); 579 if (error != 0) 580 return (error); 581 582 /* XXX ifma_protospec must be covered by IF_ADDR_LOCK */ 583 IN_MULTI_LIST_LOCK(); 584 IF_ADDR_WLOCK(ifp); 585 586 /* 587 * If something other than netinet is occupying the link-layer 588 * group, print a meaningful error message and back out of 589 * the allocation. 590 * Otherwise, bump the refcount on the existing network-layer 591 * group association and return it. 592 */ 593 if (ifma->ifma_protospec != NULL) { 594 inm = (struct in_multi *)ifma->ifma_protospec; 595 #ifdef INVARIANTS 596 KASSERT(ifma->ifma_addr != NULL, ("%s: no ifma_addr", 597 __func__)); 598 KASSERT(ifma->ifma_addr->sa_family == AF_INET, 599 ("%s: ifma not AF_INET", __func__)); 600 KASSERT(inm != NULL, ("%s: no ifma_protospec", __func__)); 601 if (inm->inm_ifma != ifma || inm->inm_ifp != ifp || 602 !in_hosteq(inm->inm_addr, *group)) { 603 char addrbuf[INET_ADDRSTRLEN]; 604 605 panic("%s: ifma %p is inconsistent with %p (%s)", 606 __func__, ifma, inm, inet_ntoa_r(*group, addrbuf)); 607 } 608 #endif 609 inm_acquire_locked(inm); 610 *pinm = inm; 611 goto out_locked; 612 } 613 614 IF_ADDR_WLOCK_ASSERT(ifp); 615 616 /* 617 * A new in_multi record is needed; allocate and initialize it. 618 * We DO NOT perform an IGMP join as the in_ layer may need to 619 * push an initial source list down to IGMP to support SSM. 620 * 621 * The initial source filter state is INCLUDE, {} as per the RFC. 622 */ 623 inm = malloc(sizeof(*inm), M_IPMADDR, M_NOWAIT | M_ZERO); 624 if (inm == NULL) { 625 IF_ADDR_WUNLOCK(ifp); 626 IN_MULTI_LIST_UNLOCK(); 627 if_delmulti_ifma(ifma); 628 return (ENOMEM); 629 } 630 inm->inm_addr = *group; 631 inm->inm_ifp = ifp; 632 inm->inm_igi = ii->ii_igmp; 633 inm->inm_ifma = ifma; 634 inm->inm_refcount = 1; 635 inm->inm_state = IGMP_NOT_MEMBER; 636 mbufq_init(&inm->inm_scq, IGMP_MAX_STATE_CHANGES); 637 inm->inm_st[0].iss_fmode = MCAST_UNDEFINED; 638 inm->inm_st[1].iss_fmode = MCAST_UNDEFINED; 639 RB_INIT(&inm->inm_srcs); 640 641 ifma->ifma_protospec = inm; 642 643 *pinm = inm; 644 out_locked: 645 IF_ADDR_WUNLOCK(ifp); 646 IN_MULTI_LIST_UNLOCK(); 647 return (0); 648 } 649 650 /* 651 * Drop a reference to an in_multi record. 652 * 653 * If the refcount drops to 0, free the in_multi record and 654 * delete the underlying link-layer membership. 655 */ 656 static void 657 inm_release(struct in_multi *inm) 658 { 659 struct ifmultiaddr *ifma; 660 struct ifnet *ifp; 661 662 CTR2(KTR_IGMPV3, "%s: refcount is %d", __func__, inm->inm_refcount); 663 MPASS(inm->inm_refcount == 0); 664 CTR2(KTR_IGMPV3, "%s: freeing inm %p", __func__, inm); 665 666 ifma = inm->inm_ifma; 667 ifp = inm->inm_ifp; 668 669 /* XXX this access is not covered by IF_ADDR_LOCK */ 670 CTR2(KTR_IGMPV3, "%s: purging ifma %p", __func__, ifma); 671 if (ifp) 672 CURVNET_SET(ifp->if_vnet); 673 inm_purge(inm); 674 free(inm, M_IPMADDR); 675 676 if_delmulti_ifma_flags(ifma, 1); 677 if (ifp) { 678 CURVNET_RESTORE(); 679 if_rele(ifp); 680 } 681 } 682 683 /* 684 * Clear recorded source entries for a group. 685 * Used by the IGMP code. Caller must hold the IN_MULTI lock. 686 * FIXME: Should reap. 687 */ 688 void 689 inm_clear_recorded(struct in_multi *inm) 690 { 691 struct ip_msource *ims; 692 693 IN_MULTI_LIST_LOCK_ASSERT(); 694 695 RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) { 696 if (ims->ims_stp) { 697 ims->ims_stp = 0; 698 --inm->inm_st[1].iss_rec; 699 } 700 } 701 KASSERT(inm->inm_st[1].iss_rec == 0, 702 ("%s: iss_rec %d not 0", __func__, inm->inm_st[1].iss_rec)); 703 } 704 705 /* 706 * Record a source as pending for a Source-Group IGMPv3 query. 707 * This lives here as it modifies the shared tree. 708 * 709 * inm is the group descriptor. 710 * naddr is the address of the source to record in network-byte order. 711 * 712 * If the net.inet.igmp.sgalloc sysctl is non-zero, we will 713 * lazy-allocate a source node in response to an SG query. 714 * Otherwise, no allocation is performed. This saves some memory 715 * with the trade-off that the source will not be reported to the 716 * router if joined in the window between the query response and 717 * the group actually being joined on the local host. 718 * 719 * VIMAGE: XXX: Currently the igmp_sgalloc feature has been removed. 720 * This turns off the allocation of a recorded source entry if 721 * the group has not been joined. 722 * 723 * Return 0 if the source didn't exist or was already marked as recorded. 724 * Return 1 if the source was marked as recorded by this function. 725 * Return <0 if any error occurred (negated errno code). 726 */ 727 int 728 inm_record_source(struct in_multi *inm, const in_addr_t naddr) 729 { 730 struct ip_msource find; 731 struct ip_msource *ims, *nims; 732 733 IN_MULTI_LIST_LOCK_ASSERT(); 734 735 find.ims_haddr = ntohl(naddr); 736 ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find); 737 if (ims && ims->ims_stp) 738 return (0); 739 if (ims == NULL) { 740 if (inm->inm_nsrc == in_mcast_maxgrpsrc) 741 return (-ENOSPC); 742 nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE, 743 M_NOWAIT | M_ZERO); 744 if (nims == NULL) 745 return (-ENOMEM); 746 nims->ims_haddr = find.ims_haddr; 747 RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims); 748 ++inm->inm_nsrc; 749 ims = nims; 750 } 751 752 /* 753 * Mark the source as recorded and update the recorded 754 * source count. 755 */ 756 ++ims->ims_stp; 757 ++inm->inm_st[1].iss_rec; 758 759 return (1); 760 } 761 762 /* 763 * Return a pointer to an in_msource owned by an in_mfilter, 764 * given its source address. 765 * Lazy-allocate if needed. If this is a new entry its filter state is 766 * undefined at t0. 767 * 768 * imf is the filter set being modified. 769 * haddr is the source address in *host* byte-order. 770 * 771 * SMPng: May be called with locks held; malloc must not block. 772 */ 773 static int 774 imf_get_source(struct in_mfilter *imf, const struct sockaddr_in *psin, 775 struct in_msource **plims) 776 { 777 struct ip_msource find; 778 struct ip_msource *ims, *nims; 779 struct in_msource *lims; 780 int error; 781 782 error = 0; 783 ims = NULL; 784 lims = NULL; 785 786 /* key is host byte order */ 787 find.ims_haddr = ntohl(psin->sin_addr.s_addr); 788 ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find); 789 lims = (struct in_msource *)ims; 790 if (lims == NULL) { 791 if (imf->imf_nsrc == in_mcast_maxsocksrc) 792 return (ENOSPC); 793 nims = malloc(sizeof(struct in_msource), M_INMFILTER, 794 M_NOWAIT | M_ZERO); 795 if (nims == NULL) 796 return (ENOMEM); 797 lims = (struct in_msource *)nims; 798 lims->ims_haddr = find.ims_haddr; 799 lims->imsl_st[0] = MCAST_UNDEFINED; 800 RB_INSERT(ip_msource_tree, &imf->imf_sources, nims); 801 ++imf->imf_nsrc; 802 } 803 804 *plims = lims; 805 806 return (error); 807 } 808 809 /* 810 * Graft a source entry into an existing socket-layer filter set, 811 * maintaining any required invariants and checking allocations. 812 * 813 * The source is marked as being in the new filter mode at t1. 814 * 815 * Return the pointer to the new node, otherwise return NULL. 816 */ 817 static struct in_msource * 818 imf_graft(struct in_mfilter *imf, const uint8_t st1, 819 const struct sockaddr_in *psin) 820 { 821 struct ip_msource *nims; 822 struct in_msource *lims; 823 824 nims = malloc(sizeof(struct in_msource), M_INMFILTER, 825 M_NOWAIT | M_ZERO); 826 if (nims == NULL) 827 return (NULL); 828 lims = (struct in_msource *)nims; 829 lims->ims_haddr = ntohl(psin->sin_addr.s_addr); 830 lims->imsl_st[0] = MCAST_UNDEFINED; 831 lims->imsl_st[1] = st1; 832 RB_INSERT(ip_msource_tree, &imf->imf_sources, nims); 833 ++imf->imf_nsrc; 834 835 return (lims); 836 } 837 838 /* 839 * Prune a source entry from an existing socket-layer filter set, 840 * maintaining any required invariants and checking allocations. 841 * 842 * The source is marked as being left at t1, it is not freed. 843 * 844 * Return 0 if no error occurred, otherwise return an errno value. 845 */ 846 static int 847 imf_prune(struct in_mfilter *imf, const struct sockaddr_in *psin) 848 { 849 struct ip_msource find; 850 struct ip_msource *ims; 851 struct in_msource *lims; 852 853 /* key is host byte order */ 854 find.ims_haddr = ntohl(psin->sin_addr.s_addr); 855 ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find); 856 if (ims == NULL) 857 return (ENOENT); 858 lims = (struct in_msource *)ims; 859 lims->imsl_st[1] = MCAST_UNDEFINED; 860 return (0); 861 } 862 863 /* 864 * Revert socket-layer filter set deltas at t1 to t0 state. 865 */ 866 static void 867 imf_rollback(struct in_mfilter *imf) 868 { 869 struct ip_msource *ims, *tims; 870 struct in_msource *lims; 871 872 RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) { 873 lims = (struct in_msource *)ims; 874 if (lims->imsl_st[0] == lims->imsl_st[1]) { 875 /* no change at t1 */ 876 continue; 877 } else if (lims->imsl_st[0] != MCAST_UNDEFINED) { 878 /* revert change to existing source at t1 */ 879 lims->imsl_st[1] = lims->imsl_st[0]; 880 } else { 881 /* revert source added t1 */ 882 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 883 RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims); 884 free(ims, M_INMFILTER); 885 imf->imf_nsrc--; 886 } 887 } 888 imf->imf_st[1] = imf->imf_st[0]; 889 } 890 891 /* 892 * Mark socket-layer filter set as INCLUDE {} at t1. 893 */ 894 static void 895 imf_leave(struct in_mfilter *imf) 896 { 897 struct ip_msource *ims; 898 struct in_msource *lims; 899 900 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 901 lims = (struct in_msource *)ims; 902 lims->imsl_st[1] = MCAST_UNDEFINED; 903 } 904 imf->imf_st[1] = MCAST_INCLUDE; 905 } 906 907 /* 908 * Mark socket-layer filter set deltas as committed. 909 */ 910 static void 911 imf_commit(struct in_mfilter *imf) 912 { 913 struct ip_msource *ims; 914 struct in_msource *lims; 915 916 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 917 lims = (struct in_msource *)ims; 918 lims->imsl_st[0] = lims->imsl_st[1]; 919 } 920 imf->imf_st[0] = imf->imf_st[1]; 921 } 922 923 /* 924 * Reap unreferenced sources from socket-layer filter set. 925 */ 926 static void 927 imf_reap(struct in_mfilter *imf) 928 { 929 struct ip_msource *ims, *tims; 930 struct in_msource *lims; 931 932 RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) { 933 lims = (struct in_msource *)ims; 934 if ((lims->imsl_st[0] == MCAST_UNDEFINED) && 935 (lims->imsl_st[1] == MCAST_UNDEFINED)) { 936 CTR2(KTR_IGMPV3, "%s: free lims %p", __func__, ims); 937 RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims); 938 free(ims, M_INMFILTER); 939 imf->imf_nsrc--; 940 } 941 } 942 } 943 944 /* 945 * Purge socket-layer filter set. 946 */ 947 static void 948 imf_purge(struct in_mfilter *imf) 949 { 950 struct ip_msource *ims, *tims; 951 952 RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) { 953 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 954 RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims); 955 free(ims, M_INMFILTER); 956 imf->imf_nsrc--; 957 } 958 imf->imf_st[0] = imf->imf_st[1] = MCAST_UNDEFINED; 959 KASSERT(RB_EMPTY(&imf->imf_sources), 960 ("%s: imf_sources not empty", __func__)); 961 } 962 963 /* 964 * Look up a source filter entry for a multicast group. 965 * 966 * inm is the group descriptor to work with. 967 * haddr is the host-byte-order IPv4 address to look up. 968 * noalloc may be non-zero to suppress allocation of sources. 969 * *pims will be set to the address of the retrieved or allocated source. 970 * 971 * SMPng: NOTE: may be called with locks held. 972 * Return 0 if successful, otherwise return a non-zero error code. 973 */ 974 static int 975 inm_get_source(struct in_multi *inm, const in_addr_t haddr, 976 const int noalloc, struct ip_msource **pims) 977 { 978 struct ip_msource find; 979 struct ip_msource *ims, *nims; 980 981 find.ims_haddr = haddr; 982 ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find); 983 if (ims == NULL && !noalloc) { 984 if (inm->inm_nsrc == in_mcast_maxgrpsrc) 985 return (ENOSPC); 986 nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE, 987 M_NOWAIT | M_ZERO); 988 if (nims == NULL) 989 return (ENOMEM); 990 nims->ims_haddr = haddr; 991 RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims); 992 ++inm->inm_nsrc; 993 ims = nims; 994 #ifdef KTR 995 CTR3(KTR_IGMPV3, "%s: allocated 0x%08x as %p", __func__, 996 haddr, ims); 997 #endif 998 } 999 1000 *pims = ims; 1001 return (0); 1002 } 1003 1004 /* 1005 * Merge socket-layer source into IGMP-layer source. 1006 * If rollback is non-zero, perform the inverse of the merge. 1007 */ 1008 static void 1009 ims_merge(struct ip_msource *ims, const struct in_msource *lims, 1010 const int rollback) 1011 { 1012 int n = rollback ? -1 : 1; 1013 1014 if (lims->imsl_st[0] == MCAST_EXCLUDE) { 1015 CTR3(KTR_IGMPV3, "%s: t1 ex -= %d on 0x%08x", 1016 __func__, n, ims->ims_haddr); 1017 ims->ims_st[1].ex -= n; 1018 } else if (lims->imsl_st[0] == MCAST_INCLUDE) { 1019 CTR3(KTR_IGMPV3, "%s: t1 in -= %d on 0x%08x", 1020 __func__, n, ims->ims_haddr); 1021 ims->ims_st[1].in -= n; 1022 } 1023 1024 if (lims->imsl_st[1] == MCAST_EXCLUDE) { 1025 CTR3(KTR_IGMPV3, "%s: t1 ex += %d on 0x%08x", 1026 __func__, n, ims->ims_haddr); 1027 ims->ims_st[1].ex += n; 1028 } else if (lims->imsl_st[1] == MCAST_INCLUDE) { 1029 CTR3(KTR_IGMPV3, "%s: t1 in += %d on 0x%08x", 1030 __func__, n, ims->ims_haddr); 1031 ims->ims_st[1].in += n; 1032 } 1033 } 1034 1035 /* 1036 * Atomically update the global in_multi state, when a membership's 1037 * filter list is being updated in any way. 1038 * 1039 * imf is the per-inpcb-membership group filter pointer. 1040 * A fake imf may be passed for in-kernel consumers. 1041 * 1042 * XXX This is a candidate for a set-symmetric-difference style loop 1043 * which would eliminate the repeated lookup from root of ims nodes, 1044 * as they share the same key space. 1045 * 1046 * If any error occurred this function will back out of refcounts 1047 * and return a non-zero value. 1048 */ 1049 static int 1050 inm_merge(struct in_multi *inm, /*const*/ struct in_mfilter *imf) 1051 { 1052 struct ip_msource *ims, *nims; 1053 struct in_msource *lims; 1054 int schanged, error; 1055 int nsrc0, nsrc1; 1056 1057 schanged = 0; 1058 error = 0; 1059 nsrc1 = nsrc0 = 0; 1060 IN_MULTI_LIST_LOCK_ASSERT(); 1061 1062 /* 1063 * Update the source filters first, as this may fail. 1064 * Maintain count of in-mode filters at t0, t1. These are 1065 * used to work out if we transition into ASM mode or not. 1066 * Maintain a count of source filters whose state was 1067 * actually modified by this operation. 1068 */ 1069 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 1070 lims = (struct in_msource *)ims; 1071 if (lims->imsl_st[0] == imf->imf_st[0]) nsrc0++; 1072 if (lims->imsl_st[1] == imf->imf_st[1]) nsrc1++; 1073 if (lims->imsl_st[0] == lims->imsl_st[1]) continue; 1074 error = inm_get_source(inm, lims->ims_haddr, 0, &nims); 1075 ++schanged; 1076 if (error) 1077 break; 1078 ims_merge(nims, lims, 0); 1079 } 1080 if (error) { 1081 struct ip_msource *bims; 1082 1083 RB_FOREACH_REVERSE_FROM(ims, ip_msource_tree, nims) { 1084 lims = (struct in_msource *)ims; 1085 if (lims->imsl_st[0] == lims->imsl_st[1]) 1086 continue; 1087 (void)inm_get_source(inm, lims->ims_haddr, 1, &bims); 1088 if (bims == NULL) 1089 continue; 1090 ims_merge(bims, lims, 1); 1091 } 1092 goto out_reap; 1093 } 1094 1095 CTR3(KTR_IGMPV3, "%s: imf filters in-mode: %d at t0, %d at t1", 1096 __func__, nsrc0, nsrc1); 1097 1098 /* Handle transition between INCLUDE {n} and INCLUDE {} on socket. */ 1099 if (imf->imf_st[0] == imf->imf_st[1] && 1100 imf->imf_st[1] == MCAST_INCLUDE) { 1101 if (nsrc1 == 0) { 1102 CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__); 1103 --inm->inm_st[1].iss_in; 1104 } 1105 } 1106 1107 /* Handle filter mode transition on socket. */ 1108 if (imf->imf_st[0] != imf->imf_st[1]) { 1109 CTR3(KTR_IGMPV3, "%s: imf transition %d to %d", 1110 __func__, imf->imf_st[0], imf->imf_st[1]); 1111 1112 if (imf->imf_st[0] == MCAST_EXCLUDE) { 1113 CTR1(KTR_IGMPV3, "%s: --ex on inm at t1", __func__); 1114 --inm->inm_st[1].iss_ex; 1115 } else if (imf->imf_st[0] == MCAST_INCLUDE) { 1116 CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__); 1117 --inm->inm_st[1].iss_in; 1118 } 1119 1120 if (imf->imf_st[1] == MCAST_EXCLUDE) { 1121 CTR1(KTR_IGMPV3, "%s: ex++ on inm at t1", __func__); 1122 inm->inm_st[1].iss_ex++; 1123 } else if (imf->imf_st[1] == MCAST_INCLUDE && nsrc1 > 0) { 1124 CTR1(KTR_IGMPV3, "%s: in++ on inm at t1", __func__); 1125 inm->inm_st[1].iss_in++; 1126 } 1127 } 1128 1129 /* 1130 * Track inm filter state in terms of listener counts. 1131 * If there are any exclusive listeners, stack-wide 1132 * membership is exclusive. 1133 * Otherwise, if only inclusive listeners, stack-wide is inclusive. 1134 * If no listeners remain, state is undefined at t1, 1135 * and the IGMP lifecycle for this group should finish. 1136 */ 1137 if (inm->inm_st[1].iss_ex > 0) { 1138 CTR1(KTR_IGMPV3, "%s: transition to EX", __func__); 1139 inm->inm_st[1].iss_fmode = MCAST_EXCLUDE; 1140 } else if (inm->inm_st[1].iss_in > 0) { 1141 CTR1(KTR_IGMPV3, "%s: transition to IN", __func__); 1142 inm->inm_st[1].iss_fmode = MCAST_INCLUDE; 1143 } else { 1144 CTR1(KTR_IGMPV3, "%s: transition to UNDEF", __func__); 1145 inm->inm_st[1].iss_fmode = MCAST_UNDEFINED; 1146 } 1147 1148 /* Decrement ASM listener count on transition out of ASM mode. */ 1149 if (imf->imf_st[0] == MCAST_EXCLUDE && nsrc0 == 0) { 1150 if ((imf->imf_st[1] != MCAST_EXCLUDE) || 1151 (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 > 0)) { 1152 CTR1(KTR_IGMPV3, "%s: --asm on inm at t1", __func__); 1153 --inm->inm_st[1].iss_asm; 1154 } 1155 } 1156 1157 /* Increment ASM listener count on transition to ASM mode. */ 1158 if (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 == 0) { 1159 CTR1(KTR_IGMPV3, "%s: asm++ on inm at t1", __func__); 1160 inm->inm_st[1].iss_asm++; 1161 } 1162 1163 CTR3(KTR_IGMPV3, "%s: merged imf %p to inm %p", __func__, imf, inm); 1164 inm_print(inm); 1165 1166 out_reap: 1167 if (schanged > 0) { 1168 CTR1(KTR_IGMPV3, "%s: sources changed; reaping", __func__); 1169 inm_reap(inm); 1170 } 1171 return (error); 1172 } 1173 1174 /* 1175 * Mark an in_multi's filter set deltas as committed. 1176 * Called by IGMP after a state change has been enqueued. 1177 */ 1178 void 1179 inm_commit(struct in_multi *inm) 1180 { 1181 struct ip_msource *ims; 1182 1183 CTR2(KTR_IGMPV3, "%s: commit inm %p", __func__, inm); 1184 CTR1(KTR_IGMPV3, "%s: pre commit:", __func__); 1185 inm_print(inm); 1186 1187 RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) { 1188 ims->ims_st[0] = ims->ims_st[1]; 1189 } 1190 inm->inm_st[0] = inm->inm_st[1]; 1191 } 1192 1193 /* 1194 * Reap unreferenced nodes from an in_multi's filter set. 1195 */ 1196 static void 1197 inm_reap(struct in_multi *inm) 1198 { 1199 struct ip_msource *ims, *tims; 1200 1201 RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) { 1202 if (ims->ims_st[0].ex > 0 || ims->ims_st[0].in > 0 || 1203 ims->ims_st[1].ex > 0 || ims->ims_st[1].in > 0 || 1204 ims->ims_stp != 0) 1205 continue; 1206 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 1207 RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims); 1208 free(ims, M_IPMSOURCE); 1209 inm->inm_nsrc--; 1210 } 1211 } 1212 1213 /* 1214 * Purge all source nodes from an in_multi's filter set. 1215 */ 1216 static void 1217 inm_purge(struct in_multi *inm) 1218 { 1219 struct ip_msource *ims, *tims; 1220 1221 RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) { 1222 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 1223 RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims); 1224 free(ims, M_IPMSOURCE); 1225 inm->inm_nsrc--; 1226 } 1227 } 1228 1229 /* 1230 * Join a multicast group; unlocked entry point. 1231 * 1232 * SMPng: XXX: in_joingroup() is called from in_control() when Giant 1233 * is not held. Fortunately, ifp is unlikely to have been detached 1234 * at this point, so we assume it's OK to recurse. 1235 */ 1236 int 1237 in_joingroup(struct ifnet *ifp, const struct in_addr *gina, 1238 /*const*/ struct in_mfilter *imf, struct in_multi **pinm) 1239 { 1240 int error; 1241 1242 IN_MULTI_LOCK(); 1243 error = in_joingroup_locked(ifp, gina, imf, pinm); 1244 IN_MULTI_UNLOCK(); 1245 1246 return (error); 1247 } 1248 1249 /* 1250 * Join a multicast group; real entry point. 1251 * 1252 * Only preserves atomicity at inm level. 1253 * NOTE: imf argument cannot be const due to sys/tree.h limitations. 1254 * 1255 * If the IGMP downcall fails, the group is not joined, and an error 1256 * code is returned. 1257 */ 1258 int 1259 in_joingroup_locked(struct ifnet *ifp, const struct in_addr *gina, 1260 /*const*/ struct in_mfilter *imf, struct in_multi **pinm) 1261 { 1262 struct in_mfilter timf; 1263 struct in_multi *inm; 1264 int error; 1265 1266 IN_MULTI_LOCK_ASSERT(); 1267 IN_MULTI_LIST_UNLOCK_ASSERT(); 1268 1269 CTR4(KTR_IGMPV3, "%s: join 0x%08x on %p(%s))", __func__, 1270 ntohl(gina->s_addr), ifp, ifp->if_xname); 1271 1272 error = 0; 1273 inm = NULL; 1274 1275 /* 1276 * If no imf was specified (i.e. kernel consumer), 1277 * fake one up and assume it is an ASM join. 1278 */ 1279 if (imf == NULL) { 1280 imf_init(&timf, MCAST_UNDEFINED, MCAST_EXCLUDE); 1281 imf = &timf; 1282 } 1283 1284 error = in_getmulti(ifp, gina, &inm); 1285 if (error) { 1286 CTR1(KTR_IGMPV3, "%s: in_getmulti() failure", __func__); 1287 return (error); 1288 } 1289 IN_MULTI_LIST_LOCK(); 1290 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 1291 error = inm_merge(inm, imf); 1292 if (error) { 1293 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__); 1294 goto out_inm_release; 1295 } 1296 1297 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 1298 error = igmp_change_state(inm); 1299 if (error) { 1300 CTR1(KTR_IGMPV3, "%s: failed to update source", __func__); 1301 goto out_inm_release; 1302 } 1303 1304 out_inm_release: 1305 if (error) { 1306 1307 CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm); 1308 inm_release_deferred(inm); 1309 } else { 1310 *pinm = inm; 1311 } 1312 IN_MULTI_LIST_UNLOCK(); 1313 1314 return (error); 1315 } 1316 1317 /* 1318 * Leave a multicast group; unlocked entry point. 1319 */ 1320 int 1321 in_leavegroup(struct in_multi *inm, /*const*/ struct in_mfilter *imf) 1322 { 1323 int error; 1324 1325 IN_MULTI_LOCK(); 1326 error = in_leavegroup_locked(inm, imf); 1327 IN_MULTI_UNLOCK(); 1328 1329 return (error); 1330 } 1331 1332 /* 1333 * Leave a multicast group; real entry point. 1334 * All source filters will be expunged. 1335 * 1336 * Only preserves atomicity at inm level. 1337 * 1338 * Holding the write lock for the INP which contains imf 1339 * is highly advisable. We can't assert for it as imf does not 1340 * contain a back-pointer to the owning inp. 1341 * 1342 * Note: This is not the same as inm_release(*) as this function also 1343 * makes a state change downcall into IGMP. 1344 */ 1345 int 1346 in_leavegroup_locked(struct in_multi *inm, /*const*/ struct in_mfilter *imf) 1347 { 1348 struct in_mfilter timf; 1349 int error; 1350 1351 error = 0; 1352 1353 IN_MULTI_LOCK_ASSERT(); 1354 IN_MULTI_LIST_UNLOCK_ASSERT(); 1355 1356 CTR5(KTR_IGMPV3, "%s: leave inm %p, 0x%08x/%s, imf %p", __func__, 1357 inm, ntohl(inm->inm_addr.s_addr), 1358 (inm_is_ifp_detached(inm) ? "null" : inm->inm_ifp->if_xname), 1359 imf); 1360 1361 /* 1362 * If no imf was specified (i.e. kernel consumer), 1363 * fake one up and assume it is an ASM join. 1364 */ 1365 if (imf == NULL) { 1366 imf_init(&timf, MCAST_EXCLUDE, MCAST_UNDEFINED); 1367 imf = &timf; 1368 } 1369 1370 /* 1371 * Begin state merge transaction at IGMP layer. 1372 * 1373 * As this particular invocation should not cause any memory 1374 * to be allocated, and there is no opportunity to roll back 1375 * the transaction, it MUST NOT fail. 1376 */ 1377 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 1378 IN_MULTI_LIST_LOCK(); 1379 error = inm_merge(inm, imf); 1380 KASSERT(error == 0, ("%s: failed to merge inm state", __func__)); 1381 1382 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 1383 CURVNET_SET(inm->inm_ifp->if_vnet); 1384 error = igmp_change_state(inm); 1385 IF_ADDR_WLOCK(inm->inm_ifp); 1386 inm_release_deferred(inm); 1387 IF_ADDR_WUNLOCK(inm->inm_ifp); 1388 IN_MULTI_LIST_UNLOCK(); 1389 CURVNET_RESTORE(); 1390 if (error) 1391 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__); 1392 1393 CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm); 1394 1395 return (error); 1396 } 1397 1398 /*#ifndef BURN_BRIDGES*/ 1399 /* 1400 * Join an IPv4 multicast group in (*,G) exclusive mode. 1401 * The group must be a 224.0.0.0/24 link-scope group. 1402 * This KPI is for legacy kernel consumers only. 1403 */ 1404 struct in_multi * 1405 in_addmulti(struct in_addr *ap, struct ifnet *ifp) 1406 { 1407 struct in_multi *pinm; 1408 int error; 1409 #ifdef INVARIANTS 1410 char addrbuf[INET_ADDRSTRLEN]; 1411 #endif 1412 1413 KASSERT(IN_LOCAL_GROUP(ntohl(ap->s_addr)), 1414 ("%s: %s not in 224.0.0.0/24", __func__, 1415 inet_ntoa_r(*ap, addrbuf))); 1416 1417 error = in_joingroup(ifp, ap, NULL, &pinm); 1418 if (error != 0) 1419 pinm = NULL; 1420 1421 return (pinm); 1422 } 1423 1424 /* 1425 * Block or unblock an ASM multicast source on an inpcb. 1426 * This implements the delta-based API described in RFC 3678. 1427 * 1428 * The delta-based API applies only to exclusive-mode memberships. 1429 * An IGMP downcall will be performed. 1430 * 1431 * SMPng: NOTE: Must take Giant as a join may create a new ifma. 1432 * 1433 * Return 0 if successful, otherwise return an appropriate error code. 1434 */ 1435 static int 1436 inp_block_unblock_source(struct inpcb *inp, struct sockopt *sopt) 1437 { 1438 struct group_source_req gsr; 1439 sockunion_t *gsa, *ssa; 1440 struct ifnet *ifp; 1441 struct in_mfilter *imf; 1442 struct ip_moptions *imo; 1443 struct in_msource *ims; 1444 struct in_multi *inm; 1445 size_t idx; 1446 uint16_t fmode; 1447 int error, doblock; 1448 1449 ifp = NULL; 1450 error = 0; 1451 doblock = 0; 1452 1453 memset(&gsr, 0, sizeof(struct group_source_req)); 1454 gsa = (sockunion_t *)&gsr.gsr_group; 1455 ssa = (sockunion_t *)&gsr.gsr_source; 1456 1457 switch (sopt->sopt_name) { 1458 case IP_BLOCK_SOURCE: 1459 case IP_UNBLOCK_SOURCE: { 1460 struct ip_mreq_source mreqs; 1461 1462 error = sooptcopyin(sopt, &mreqs, 1463 sizeof(struct ip_mreq_source), 1464 sizeof(struct ip_mreq_source)); 1465 if (error) 1466 return (error); 1467 1468 gsa->sin.sin_family = AF_INET; 1469 gsa->sin.sin_len = sizeof(struct sockaddr_in); 1470 gsa->sin.sin_addr = mreqs.imr_multiaddr; 1471 1472 ssa->sin.sin_family = AF_INET; 1473 ssa->sin.sin_len = sizeof(struct sockaddr_in); 1474 ssa->sin.sin_addr = mreqs.imr_sourceaddr; 1475 1476 if (!in_nullhost(mreqs.imr_interface)) 1477 INADDR_TO_IFP(mreqs.imr_interface, ifp); 1478 1479 if (sopt->sopt_name == IP_BLOCK_SOURCE) 1480 doblock = 1; 1481 1482 CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p", 1483 __func__, ntohl(mreqs.imr_interface.s_addr), ifp); 1484 break; 1485 } 1486 1487 case MCAST_BLOCK_SOURCE: 1488 case MCAST_UNBLOCK_SOURCE: 1489 error = sooptcopyin(sopt, &gsr, 1490 sizeof(struct group_source_req), 1491 sizeof(struct group_source_req)); 1492 if (error) 1493 return (error); 1494 1495 if (gsa->sin.sin_family != AF_INET || 1496 gsa->sin.sin_len != sizeof(struct sockaddr_in)) 1497 return (EINVAL); 1498 1499 if (ssa->sin.sin_family != AF_INET || 1500 ssa->sin.sin_len != sizeof(struct sockaddr_in)) 1501 return (EINVAL); 1502 1503 if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface) 1504 return (EADDRNOTAVAIL); 1505 1506 ifp = ifnet_byindex(gsr.gsr_interface); 1507 1508 if (sopt->sopt_name == MCAST_BLOCK_SOURCE) 1509 doblock = 1; 1510 break; 1511 1512 default: 1513 CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d", 1514 __func__, sopt->sopt_name); 1515 return (EOPNOTSUPP); 1516 break; 1517 } 1518 1519 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 1520 return (EINVAL); 1521 1522 /* 1523 * Check if we are actually a member of this group. 1524 */ 1525 imo = inp_findmoptions(inp); 1526 idx = imo_match_group(imo, ifp, &gsa->sa); 1527 if (idx == -1 || imo->imo_mfilters == NULL) { 1528 error = EADDRNOTAVAIL; 1529 goto out_inp_locked; 1530 } 1531 1532 KASSERT(imo->imo_mfilters != NULL, 1533 ("%s: imo_mfilters not allocated", __func__)); 1534 imf = &imo->imo_mfilters[idx]; 1535 inm = imo->imo_membership[idx]; 1536 1537 /* 1538 * Attempting to use the delta-based API on an 1539 * non exclusive-mode membership is an error. 1540 */ 1541 fmode = imf->imf_st[0]; 1542 if (fmode != MCAST_EXCLUDE) { 1543 error = EINVAL; 1544 goto out_inp_locked; 1545 } 1546 1547 /* 1548 * Deal with error cases up-front: 1549 * Asked to block, but already blocked; or 1550 * Asked to unblock, but nothing to unblock. 1551 * If adding a new block entry, allocate it. 1552 */ 1553 ims = imo_match_source(imo, idx, &ssa->sa); 1554 if ((ims != NULL && doblock) || (ims == NULL && !doblock)) { 1555 CTR3(KTR_IGMPV3, "%s: source 0x%08x %spresent", __func__, 1556 ntohl(ssa->sin.sin_addr.s_addr), doblock ? "" : "not "); 1557 error = EADDRNOTAVAIL; 1558 goto out_inp_locked; 1559 } 1560 1561 INP_WLOCK_ASSERT(inp); 1562 1563 /* 1564 * Begin state merge transaction at socket layer. 1565 */ 1566 if (doblock) { 1567 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block"); 1568 ims = imf_graft(imf, fmode, &ssa->sin); 1569 if (ims == NULL) 1570 error = ENOMEM; 1571 } else { 1572 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow"); 1573 error = imf_prune(imf, &ssa->sin); 1574 } 1575 1576 if (error) { 1577 CTR1(KTR_IGMPV3, "%s: merge imf state failed", __func__); 1578 goto out_imf_rollback; 1579 } 1580 1581 /* 1582 * Begin state merge transaction at IGMP layer. 1583 */ 1584 IN_MULTI_LOCK(); 1585 IN_MULTI_LIST_LOCK(); 1586 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 1587 error = inm_merge(inm, imf); 1588 if (error) { 1589 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__); 1590 goto out_in_multi_locked; 1591 } 1592 1593 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 1594 error = igmp_change_state(inm); 1595 if (error) 1596 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__); 1597 1598 out_in_multi_locked: 1599 1600 IN_MULTI_UNLOCK(); 1601 IN_MULTI_UNLOCK(); 1602 out_imf_rollback: 1603 if (error) 1604 imf_rollback(imf); 1605 else 1606 imf_commit(imf); 1607 1608 imf_reap(imf); 1609 1610 out_inp_locked: 1611 INP_WUNLOCK(inp); 1612 return (error); 1613 } 1614 1615 /* 1616 * Given an inpcb, return its multicast options structure pointer. Accepts 1617 * an unlocked inpcb pointer, but will return it locked. May sleep. 1618 * 1619 * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held. 1620 * SMPng: NOTE: Returns with the INP write lock held. 1621 */ 1622 static struct ip_moptions * 1623 inp_findmoptions(struct inpcb *inp) 1624 { 1625 struct ip_moptions *imo; 1626 struct in_multi **immp; 1627 struct in_mfilter *imfp; 1628 size_t idx; 1629 1630 INP_WLOCK(inp); 1631 if (inp->inp_moptions != NULL) 1632 return (inp->inp_moptions); 1633 1634 INP_WUNLOCK(inp); 1635 1636 imo = malloc(sizeof(*imo), M_IPMOPTS, M_WAITOK); 1637 immp = malloc(sizeof(*immp) * IP_MIN_MEMBERSHIPS, M_IPMOPTS, 1638 M_WAITOK | M_ZERO); 1639 imfp = malloc(sizeof(struct in_mfilter) * IP_MIN_MEMBERSHIPS, 1640 M_INMFILTER, M_WAITOK); 1641 1642 imo->imo_multicast_ifp = NULL; 1643 imo->imo_multicast_addr.s_addr = INADDR_ANY; 1644 imo->imo_multicast_vif = -1; 1645 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1646 imo->imo_multicast_loop = in_mcast_loop; 1647 imo->imo_num_memberships = 0; 1648 imo->imo_max_memberships = IP_MIN_MEMBERSHIPS; 1649 imo->imo_membership = immp; 1650 1651 /* Initialize per-group source filters. */ 1652 for (idx = 0; idx < IP_MIN_MEMBERSHIPS; idx++) 1653 imf_init(&imfp[idx], MCAST_UNDEFINED, MCAST_EXCLUDE); 1654 imo->imo_mfilters = imfp; 1655 1656 INP_WLOCK(inp); 1657 if (inp->inp_moptions != NULL) { 1658 free(imfp, M_INMFILTER); 1659 free(immp, M_IPMOPTS); 1660 free(imo, M_IPMOPTS); 1661 return (inp->inp_moptions); 1662 } 1663 inp->inp_moptions = imo; 1664 return (imo); 1665 } 1666 1667 /* 1668 * Discard the IP multicast options (and source filters). To minimize 1669 * the amount of work done while holding locks such as the INP's 1670 * pcbinfo lock (which is used in the receive path), the free 1671 * operation is performed asynchronously in a separate task. 1672 * 1673 * SMPng: NOTE: assumes INP write lock is held. 1674 */ 1675 void 1676 inp_freemoptions(struct ip_moptions *imo, struct inpcbinfo *pcbinfo) 1677 { 1678 int wlock; 1679 1680 if (imo == NULL) 1681 return; 1682 1683 INP_INFO_LOCK_ASSERT(pcbinfo); 1684 wlock = INP_INFO_WLOCKED(pcbinfo); 1685 if (wlock) 1686 INP_INFO_WUNLOCK(pcbinfo); 1687 else 1688 INP_INFO_RUNLOCK(pcbinfo); 1689 1690 KASSERT(imo != NULL, ("%s: ip_moptions is NULL", __func__)); 1691 IN_MULTI_LIST_LOCK(); 1692 STAILQ_INSERT_TAIL(&imo_gc_list, imo, imo_link); 1693 IN_MULTI_LIST_UNLOCK(); 1694 taskqueue_enqueue(taskqueue_thread, &imo_gc_task); 1695 if (wlock) 1696 INP_INFO_WLOCK(pcbinfo); 1697 else 1698 INP_INFO_RLOCK(pcbinfo); 1699 } 1700 1701 static void 1702 inp_freemoptions_internal(struct ip_moptions *imo) 1703 { 1704 struct in_mfilter *imf; 1705 size_t idx, nmships; 1706 1707 nmships = imo->imo_num_memberships; 1708 for (idx = 0; idx < nmships; ++idx) { 1709 imf = imo->imo_mfilters ? &imo->imo_mfilters[idx] : NULL; 1710 if (imf) 1711 imf_leave(imf); 1712 (void)in_leavegroup(imo->imo_membership[idx], imf); 1713 if (imf) 1714 imf_purge(imf); 1715 } 1716 1717 if (imo->imo_mfilters) 1718 free(imo->imo_mfilters, M_INMFILTER); 1719 free(imo->imo_membership, M_IPMOPTS); 1720 free(imo, M_IPMOPTS); 1721 } 1722 1723 static void 1724 inp_gcmoptions(void *context, int pending) 1725 { 1726 struct ip_moptions *imo; 1727 1728 IN_MULTI_LIST_LOCK(); 1729 while (!STAILQ_EMPTY(&imo_gc_list)) { 1730 imo = STAILQ_FIRST(&imo_gc_list); 1731 STAILQ_REMOVE_HEAD(&imo_gc_list, imo_link); 1732 IN_MULTI_LIST_UNLOCK(); 1733 inp_freemoptions_internal(imo); 1734 IN_MULTI_LIST_LOCK(); 1735 } 1736 IN_MULTI_LIST_UNLOCK(); 1737 } 1738 1739 /* 1740 * Atomically get source filters on a socket for an IPv4 multicast group. 1741 * Called with INP lock held; returns with lock released. 1742 */ 1743 static int 1744 inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt) 1745 { 1746 struct __msfilterreq msfr; 1747 sockunion_t *gsa; 1748 struct ifnet *ifp; 1749 struct ip_moptions *imo; 1750 struct in_mfilter *imf; 1751 struct ip_msource *ims; 1752 struct in_msource *lims; 1753 struct sockaddr_in *psin; 1754 struct sockaddr_storage *ptss; 1755 struct sockaddr_storage *tss; 1756 int error; 1757 size_t idx, nsrcs, ncsrcs; 1758 1759 INP_WLOCK_ASSERT(inp); 1760 1761 imo = inp->inp_moptions; 1762 KASSERT(imo != NULL, ("%s: null ip_moptions", __func__)); 1763 1764 INP_WUNLOCK(inp); 1765 1766 error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq), 1767 sizeof(struct __msfilterreq)); 1768 if (error) 1769 return (error); 1770 1771 if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex) 1772 return (EINVAL); 1773 1774 ifp = ifnet_byindex(msfr.msfr_ifindex); 1775 if (ifp == NULL) 1776 return (EINVAL); 1777 1778 INP_WLOCK(inp); 1779 1780 /* 1781 * Lookup group on the socket. 1782 */ 1783 gsa = (sockunion_t *)&msfr.msfr_group; 1784 idx = imo_match_group(imo, ifp, &gsa->sa); 1785 if (idx == -1 || imo->imo_mfilters == NULL) { 1786 INP_WUNLOCK(inp); 1787 return (EADDRNOTAVAIL); 1788 } 1789 imf = &imo->imo_mfilters[idx]; 1790 1791 /* 1792 * Ignore memberships which are in limbo. 1793 */ 1794 if (imf->imf_st[1] == MCAST_UNDEFINED) { 1795 INP_WUNLOCK(inp); 1796 return (EAGAIN); 1797 } 1798 msfr.msfr_fmode = imf->imf_st[1]; 1799 1800 /* 1801 * If the user specified a buffer, copy out the source filter 1802 * entries to userland gracefully. 1803 * We only copy out the number of entries which userland 1804 * has asked for, but we always tell userland how big the 1805 * buffer really needs to be. 1806 */ 1807 if (msfr.msfr_nsrcs > in_mcast_maxsocksrc) 1808 msfr.msfr_nsrcs = in_mcast_maxsocksrc; 1809 tss = NULL; 1810 if (msfr.msfr_srcs != NULL && msfr.msfr_nsrcs > 0) { 1811 tss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs, 1812 M_TEMP, M_NOWAIT | M_ZERO); 1813 if (tss == NULL) { 1814 INP_WUNLOCK(inp); 1815 return (ENOBUFS); 1816 } 1817 } 1818 1819 /* 1820 * Count number of sources in-mode at t0. 1821 * If buffer space exists and remains, copy out source entries. 1822 */ 1823 nsrcs = msfr.msfr_nsrcs; 1824 ncsrcs = 0; 1825 ptss = tss; 1826 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 1827 lims = (struct in_msource *)ims; 1828 if (lims->imsl_st[0] == MCAST_UNDEFINED || 1829 lims->imsl_st[0] != imf->imf_st[0]) 1830 continue; 1831 ++ncsrcs; 1832 if (tss != NULL && nsrcs > 0) { 1833 psin = (struct sockaddr_in *)ptss; 1834 psin->sin_family = AF_INET; 1835 psin->sin_len = sizeof(struct sockaddr_in); 1836 psin->sin_addr.s_addr = htonl(lims->ims_haddr); 1837 psin->sin_port = 0; 1838 ++ptss; 1839 --nsrcs; 1840 } 1841 } 1842 1843 INP_WUNLOCK(inp); 1844 1845 if (tss != NULL) { 1846 error = copyout(tss, msfr.msfr_srcs, 1847 sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs); 1848 free(tss, M_TEMP); 1849 if (error) 1850 return (error); 1851 } 1852 1853 msfr.msfr_nsrcs = ncsrcs; 1854 error = sooptcopyout(sopt, &msfr, sizeof(struct __msfilterreq)); 1855 1856 return (error); 1857 } 1858 1859 /* 1860 * Return the IP multicast options in response to user getsockopt(). 1861 */ 1862 int 1863 inp_getmoptions(struct inpcb *inp, struct sockopt *sopt) 1864 { 1865 struct rm_priotracker in_ifa_tracker; 1866 struct ip_mreqn mreqn; 1867 struct ip_moptions *imo; 1868 struct ifnet *ifp; 1869 struct in_ifaddr *ia; 1870 int error, optval; 1871 u_char coptval; 1872 1873 INP_WLOCK(inp); 1874 imo = inp->inp_moptions; 1875 /* 1876 * If socket is neither of type SOCK_RAW or SOCK_DGRAM, 1877 * or is a divert socket, reject it. 1878 */ 1879 if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT || 1880 (inp->inp_socket->so_proto->pr_type != SOCK_RAW && 1881 inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) { 1882 INP_WUNLOCK(inp); 1883 return (EOPNOTSUPP); 1884 } 1885 1886 error = 0; 1887 switch (sopt->sopt_name) { 1888 case IP_MULTICAST_VIF: 1889 if (imo != NULL) 1890 optval = imo->imo_multicast_vif; 1891 else 1892 optval = -1; 1893 INP_WUNLOCK(inp); 1894 error = sooptcopyout(sopt, &optval, sizeof(int)); 1895 break; 1896 1897 case IP_MULTICAST_IF: 1898 memset(&mreqn, 0, sizeof(struct ip_mreqn)); 1899 if (imo != NULL) { 1900 ifp = imo->imo_multicast_ifp; 1901 if (!in_nullhost(imo->imo_multicast_addr)) { 1902 mreqn.imr_address = imo->imo_multicast_addr; 1903 } else if (ifp != NULL) { 1904 mreqn.imr_ifindex = ifp->if_index; 1905 IFP_TO_IA(ifp, ia, &in_ifa_tracker); 1906 if (ia != NULL) { 1907 mreqn.imr_address = 1908 IA_SIN(ia)->sin_addr; 1909 ifa_free(&ia->ia_ifa); 1910 } 1911 } 1912 } 1913 INP_WUNLOCK(inp); 1914 if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) { 1915 error = sooptcopyout(sopt, &mreqn, 1916 sizeof(struct ip_mreqn)); 1917 } else { 1918 error = sooptcopyout(sopt, &mreqn.imr_address, 1919 sizeof(struct in_addr)); 1920 } 1921 break; 1922 1923 case IP_MULTICAST_TTL: 1924 if (imo == NULL) 1925 optval = coptval = IP_DEFAULT_MULTICAST_TTL; 1926 else 1927 optval = coptval = imo->imo_multicast_ttl; 1928 INP_WUNLOCK(inp); 1929 if (sopt->sopt_valsize == sizeof(u_char)) 1930 error = sooptcopyout(sopt, &coptval, sizeof(u_char)); 1931 else 1932 error = sooptcopyout(sopt, &optval, sizeof(int)); 1933 break; 1934 1935 case IP_MULTICAST_LOOP: 1936 if (imo == NULL) 1937 optval = coptval = IP_DEFAULT_MULTICAST_LOOP; 1938 else 1939 optval = coptval = imo->imo_multicast_loop; 1940 INP_WUNLOCK(inp); 1941 if (sopt->sopt_valsize == sizeof(u_char)) 1942 error = sooptcopyout(sopt, &coptval, sizeof(u_char)); 1943 else 1944 error = sooptcopyout(sopt, &optval, sizeof(int)); 1945 break; 1946 1947 case IP_MSFILTER: 1948 if (imo == NULL) { 1949 error = EADDRNOTAVAIL; 1950 INP_WUNLOCK(inp); 1951 } else { 1952 error = inp_get_source_filters(inp, sopt); 1953 } 1954 break; 1955 1956 default: 1957 INP_WUNLOCK(inp); 1958 error = ENOPROTOOPT; 1959 break; 1960 } 1961 1962 INP_UNLOCK_ASSERT(inp); 1963 1964 return (error); 1965 } 1966 1967 /* 1968 * Look up the ifnet to use for a multicast group membership, 1969 * given the IPv4 address of an interface, and the IPv4 group address. 1970 * 1971 * This routine exists to support legacy multicast applications 1972 * which do not understand that multicast memberships are scoped to 1973 * specific physical links in the networking stack, or which need 1974 * to join link-scope groups before IPv4 addresses are configured. 1975 * 1976 * If inp is non-NULL, use this socket's current FIB number for any 1977 * required FIB lookup. 1978 * If ina is INADDR_ANY, look up the group address in the unicast FIB, 1979 * and use its ifp; usually, this points to the default next-hop. 1980 * 1981 * If the FIB lookup fails, attempt to use the first non-loopback 1982 * interface with multicast capability in the system as a 1983 * last resort. The legacy IPv4 ASM API requires that we do 1984 * this in order to allow groups to be joined when the routing 1985 * table has not yet been populated during boot. 1986 * 1987 * Returns NULL if no ifp could be found. 1988 * 1989 * SMPng: TODO: Acquire the appropriate locks for INADDR_TO_IFP. 1990 * FUTURE: Implement IPv4 source-address selection. 1991 */ 1992 static struct ifnet * 1993 inp_lookup_mcast_ifp(const struct inpcb *inp, 1994 const struct sockaddr_in *gsin, const struct in_addr ina) 1995 { 1996 struct rm_priotracker in_ifa_tracker; 1997 struct ifnet *ifp; 1998 struct nhop4_basic nh4; 1999 uint32_t fibnum; 2000 2001 KASSERT(gsin->sin_family == AF_INET, ("%s: not AF_INET", __func__)); 2002 KASSERT(IN_MULTICAST(ntohl(gsin->sin_addr.s_addr)), 2003 ("%s: not multicast", __func__)); 2004 2005 ifp = NULL; 2006 if (!in_nullhost(ina)) { 2007 INADDR_TO_IFP(ina, ifp); 2008 } else { 2009 fibnum = inp ? inp->inp_inc.inc_fibnum : 0; 2010 if (fib4_lookup_nh_basic(fibnum, gsin->sin_addr, 0, 0, &nh4)==0) 2011 ifp = nh4.nh_ifp; 2012 else { 2013 struct in_ifaddr *ia; 2014 struct ifnet *mifp; 2015 2016 mifp = NULL; 2017 IN_IFADDR_RLOCK(&in_ifa_tracker); 2018 TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { 2019 mifp = ia->ia_ifp; 2020 if (!(mifp->if_flags & IFF_LOOPBACK) && 2021 (mifp->if_flags & IFF_MULTICAST)) { 2022 ifp = mifp; 2023 break; 2024 } 2025 } 2026 IN_IFADDR_RUNLOCK(&in_ifa_tracker); 2027 } 2028 } 2029 2030 return (ifp); 2031 } 2032 2033 /* 2034 * Join an IPv4 multicast group, possibly with a source. 2035 */ 2036 static int 2037 inp_join_group(struct inpcb *inp, struct sockopt *sopt) 2038 { 2039 struct group_source_req gsr; 2040 sockunion_t *gsa, *ssa; 2041 struct ifnet *ifp; 2042 struct in_mfilter *imf; 2043 struct ip_moptions *imo; 2044 struct in_multi *inm; 2045 struct in_msource *lims; 2046 size_t idx; 2047 int error, is_new; 2048 2049 ifp = NULL; 2050 imf = NULL; 2051 lims = NULL; 2052 error = 0; 2053 is_new = 0; 2054 2055 memset(&gsr, 0, sizeof(struct group_source_req)); 2056 gsa = (sockunion_t *)&gsr.gsr_group; 2057 gsa->ss.ss_family = AF_UNSPEC; 2058 ssa = (sockunion_t *)&gsr.gsr_source; 2059 ssa->ss.ss_family = AF_UNSPEC; 2060 2061 switch (sopt->sopt_name) { 2062 case IP_ADD_MEMBERSHIP: 2063 case IP_ADD_SOURCE_MEMBERSHIP: { 2064 struct ip_mreq_source mreqs; 2065 2066 if (sopt->sopt_name == IP_ADD_MEMBERSHIP) { 2067 error = sooptcopyin(sopt, &mreqs, 2068 sizeof(struct ip_mreq), 2069 sizeof(struct ip_mreq)); 2070 /* 2071 * Do argument switcharoo from ip_mreq into 2072 * ip_mreq_source to avoid using two instances. 2073 */ 2074 mreqs.imr_interface = mreqs.imr_sourceaddr; 2075 mreqs.imr_sourceaddr.s_addr = INADDR_ANY; 2076 } else if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) { 2077 error = sooptcopyin(sopt, &mreqs, 2078 sizeof(struct ip_mreq_source), 2079 sizeof(struct ip_mreq_source)); 2080 } 2081 if (error) 2082 return (error); 2083 2084 gsa->sin.sin_family = AF_INET; 2085 gsa->sin.sin_len = sizeof(struct sockaddr_in); 2086 gsa->sin.sin_addr = mreqs.imr_multiaddr; 2087 2088 if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) { 2089 ssa->sin.sin_family = AF_INET; 2090 ssa->sin.sin_len = sizeof(struct sockaddr_in); 2091 ssa->sin.sin_addr = mreqs.imr_sourceaddr; 2092 } 2093 2094 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2095 return (EINVAL); 2096 2097 ifp = inp_lookup_mcast_ifp(inp, &gsa->sin, 2098 mreqs.imr_interface); 2099 CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p", 2100 __func__, ntohl(mreqs.imr_interface.s_addr), ifp); 2101 break; 2102 } 2103 2104 case MCAST_JOIN_GROUP: 2105 case MCAST_JOIN_SOURCE_GROUP: 2106 if (sopt->sopt_name == MCAST_JOIN_GROUP) { 2107 error = sooptcopyin(sopt, &gsr, 2108 sizeof(struct group_req), 2109 sizeof(struct group_req)); 2110 } else if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) { 2111 error = sooptcopyin(sopt, &gsr, 2112 sizeof(struct group_source_req), 2113 sizeof(struct group_source_req)); 2114 } 2115 if (error) 2116 return (error); 2117 2118 if (gsa->sin.sin_family != AF_INET || 2119 gsa->sin.sin_len != sizeof(struct sockaddr_in)) 2120 return (EINVAL); 2121 2122 /* 2123 * Overwrite the port field if present, as the sockaddr 2124 * being copied in may be matched with a binary comparison. 2125 */ 2126 gsa->sin.sin_port = 0; 2127 if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) { 2128 if (ssa->sin.sin_family != AF_INET || 2129 ssa->sin.sin_len != sizeof(struct sockaddr_in)) 2130 return (EINVAL); 2131 ssa->sin.sin_port = 0; 2132 } 2133 2134 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2135 return (EINVAL); 2136 2137 if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface) 2138 return (EADDRNOTAVAIL); 2139 ifp = ifnet_byindex(gsr.gsr_interface); 2140 break; 2141 2142 default: 2143 CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d", 2144 __func__, sopt->sopt_name); 2145 return (EOPNOTSUPP); 2146 break; 2147 } 2148 2149 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) 2150 return (EADDRNOTAVAIL); 2151 2152 imo = inp_findmoptions(inp); 2153 idx = imo_match_group(imo, ifp, &gsa->sa); 2154 if (idx == -1) { 2155 is_new = 1; 2156 } else { 2157 inm = imo->imo_membership[idx]; 2158 imf = &imo->imo_mfilters[idx]; 2159 if (ssa->ss.ss_family != AF_UNSPEC) { 2160 /* 2161 * MCAST_JOIN_SOURCE_GROUP on an exclusive membership 2162 * is an error. On an existing inclusive membership, 2163 * it just adds the source to the filter list. 2164 */ 2165 if (imf->imf_st[1] != MCAST_INCLUDE) { 2166 error = EINVAL; 2167 goto out_inp_locked; 2168 } 2169 /* 2170 * Throw out duplicates. 2171 * 2172 * XXX FIXME: This makes a naive assumption that 2173 * even if entries exist for *ssa in this imf, 2174 * they will be rejected as dupes, even if they 2175 * are not valid in the current mode (in-mode). 2176 * 2177 * in_msource is transactioned just as for anything 2178 * else in SSM -- but note naive use of inm_graft() 2179 * below for allocating new filter entries. 2180 * 2181 * This is only an issue if someone mixes the 2182 * full-state SSM API with the delta-based API, 2183 * which is discouraged in the relevant RFCs. 2184 */ 2185 lims = imo_match_source(imo, idx, &ssa->sa); 2186 if (lims != NULL /*&& 2187 lims->imsl_st[1] == MCAST_INCLUDE*/) { 2188 error = EADDRNOTAVAIL; 2189 goto out_inp_locked; 2190 } 2191 } else { 2192 /* 2193 * MCAST_JOIN_GROUP on an existing exclusive 2194 * membership is an error; return EADDRINUSE 2195 * to preserve 4.4BSD API idempotence, and 2196 * avoid tedious detour to code below. 2197 * NOTE: This is bending RFC 3678 a bit. 2198 * 2199 * On an existing inclusive membership, this is also 2200 * an error; if you want to change filter mode, 2201 * you must use the userland API setsourcefilter(). 2202 * XXX We don't reject this for imf in UNDEFINED 2203 * state at t1, because allocation of a filter 2204 * is atomic with allocation of a membership. 2205 */ 2206 error = EINVAL; 2207 if (imf->imf_st[1] == MCAST_EXCLUDE) 2208 error = EADDRINUSE; 2209 goto out_inp_locked; 2210 } 2211 } 2212 2213 /* 2214 * Begin state merge transaction at socket layer. 2215 */ 2216 INP_WLOCK_ASSERT(inp); 2217 2218 if (is_new) { 2219 if (imo->imo_num_memberships == imo->imo_max_memberships) { 2220 error = imo_grow(imo); 2221 if (error) 2222 goto out_inp_locked; 2223 } 2224 /* 2225 * Allocate the new slot upfront so we can deal with 2226 * grafting the new source filter in same code path 2227 * as for join-source on existing membership. 2228 */ 2229 idx = imo->imo_num_memberships; 2230 imo->imo_membership[idx] = NULL; 2231 imo->imo_num_memberships++; 2232 KASSERT(imo->imo_mfilters != NULL, 2233 ("%s: imf_mfilters vector was not allocated", __func__)); 2234 imf = &imo->imo_mfilters[idx]; 2235 KASSERT(RB_EMPTY(&imf->imf_sources), 2236 ("%s: imf_sources not empty", __func__)); 2237 } 2238 2239 /* 2240 * Graft new source into filter list for this inpcb's 2241 * membership of the group. The in_multi may not have 2242 * been allocated yet if this is a new membership, however, 2243 * the in_mfilter slot will be allocated and must be initialized. 2244 * 2245 * Note: Grafting of exclusive mode filters doesn't happen 2246 * in this path. 2247 * XXX: Should check for non-NULL lims (node exists but may 2248 * not be in-mode) for interop with full-state API. 2249 */ 2250 if (ssa->ss.ss_family != AF_UNSPEC) { 2251 /* Membership starts in IN mode */ 2252 if (is_new) { 2253 CTR1(KTR_IGMPV3, "%s: new join w/source", __func__); 2254 imf_init(imf, MCAST_UNDEFINED, MCAST_INCLUDE); 2255 } else { 2256 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow"); 2257 } 2258 lims = imf_graft(imf, MCAST_INCLUDE, &ssa->sin); 2259 if (lims == NULL) { 2260 CTR1(KTR_IGMPV3, "%s: merge imf state failed", 2261 __func__); 2262 error = ENOMEM; 2263 goto out_imo_free; 2264 } 2265 } else { 2266 /* No address specified; Membership starts in EX mode */ 2267 if (is_new) { 2268 CTR1(KTR_IGMPV3, "%s: new join w/o source", __func__); 2269 imf_init(imf, MCAST_UNDEFINED, MCAST_EXCLUDE); 2270 } 2271 } 2272 2273 /* 2274 * Begin state merge transaction at IGMP layer. 2275 */ 2276 in_pcbref(inp); 2277 INP_WUNLOCK(inp); 2278 IN_MULTI_LOCK(); 2279 2280 if (is_new) { 2281 error = in_joingroup_locked(ifp, &gsa->sin.sin_addr, imf, 2282 &inm); 2283 if (error) { 2284 CTR1(KTR_IGMPV3, "%s: in_joingroup_locked failed", 2285 __func__); 2286 IN_MULTI_LIST_UNLOCK(); 2287 goto out_imo_free; 2288 } 2289 imo->imo_membership[idx] = inm; 2290 } else { 2291 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 2292 IN_MULTI_LIST_LOCK(); 2293 error = inm_merge(inm, imf); 2294 if (error) { 2295 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", 2296 __func__); 2297 IN_MULTI_LIST_UNLOCK(); 2298 goto out_in_multi_locked; 2299 } 2300 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 2301 error = igmp_change_state(inm); 2302 IN_MULTI_LIST_UNLOCK(); 2303 if (error) { 2304 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", 2305 __func__); 2306 goto out_in_multi_locked; 2307 } 2308 } 2309 2310 out_in_multi_locked: 2311 2312 IN_MULTI_UNLOCK(); 2313 INP_WLOCK(inp); 2314 if (in_pcbrele_wlocked(inp)) 2315 return (ENXIO); 2316 if (error) { 2317 imf_rollback(imf); 2318 if (is_new) 2319 imf_purge(imf); 2320 else 2321 imf_reap(imf); 2322 } else { 2323 imf_commit(imf); 2324 } 2325 2326 out_imo_free: 2327 if (error && is_new) { 2328 imo->imo_membership[idx] = NULL; 2329 --imo->imo_num_memberships; 2330 } 2331 2332 out_inp_locked: 2333 INP_WUNLOCK(inp); 2334 return (error); 2335 } 2336 2337 /* 2338 * Leave an IPv4 multicast group on an inpcb, possibly with a source. 2339 */ 2340 static int 2341 inp_leave_group(struct inpcb *inp, struct sockopt *sopt) 2342 { 2343 struct group_source_req gsr; 2344 struct ip_mreq_source mreqs; 2345 sockunion_t *gsa, *ssa; 2346 struct ifnet *ifp; 2347 struct in_mfilter *imf; 2348 struct ip_moptions *imo; 2349 struct in_msource *ims; 2350 struct in_multi *inm; 2351 size_t idx; 2352 int error, is_final; 2353 2354 ifp = NULL; 2355 error = 0; 2356 is_final = 1; 2357 2358 memset(&gsr, 0, sizeof(struct group_source_req)); 2359 gsa = (sockunion_t *)&gsr.gsr_group; 2360 gsa->ss.ss_family = AF_UNSPEC; 2361 ssa = (sockunion_t *)&gsr.gsr_source; 2362 ssa->ss.ss_family = AF_UNSPEC; 2363 2364 switch (sopt->sopt_name) { 2365 case IP_DROP_MEMBERSHIP: 2366 case IP_DROP_SOURCE_MEMBERSHIP: 2367 if (sopt->sopt_name == IP_DROP_MEMBERSHIP) { 2368 error = sooptcopyin(sopt, &mreqs, 2369 sizeof(struct ip_mreq), 2370 sizeof(struct ip_mreq)); 2371 /* 2372 * Swap interface and sourceaddr arguments, 2373 * as ip_mreq and ip_mreq_source are laid 2374 * out differently. 2375 */ 2376 mreqs.imr_interface = mreqs.imr_sourceaddr; 2377 mreqs.imr_sourceaddr.s_addr = INADDR_ANY; 2378 } else if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) { 2379 error = sooptcopyin(sopt, &mreqs, 2380 sizeof(struct ip_mreq_source), 2381 sizeof(struct ip_mreq_source)); 2382 } 2383 if (error) 2384 return (error); 2385 2386 gsa->sin.sin_family = AF_INET; 2387 gsa->sin.sin_len = sizeof(struct sockaddr_in); 2388 gsa->sin.sin_addr = mreqs.imr_multiaddr; 2389 2390 if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) { 2391 ssa->sin.sin_family = AF_INET; 2392 ssa->sin.sin_len = sizeof(struct sockaddr_in); 2393 ssa->sin.sin_addr = mreqs.imr_sourceaddr; 2394 } 2395 2396 /* 2397 * Attempt to look up hinted ifp from interface address. 2398 * Fallthrough with null ifp iff lookup fails, to 2399 * preserve 4.4BSD mcast API idempotence. 2400 * XXX NOTE WELL: The RFC 3678 API is preferred because 2401 * using an IPv4 address as a key is racy. 2402 */ 2403 if (!in_nullhost(mreqs.imr_interface)) 2404 INADDR_TO_IFP(mreqs.imr_interface, ifp); 2405 2406 CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p", 2407 __func__, ntohl(mreqs.imr_interface.s_addr), ifp); 2408 2409 break; 2410 2411 case MCAST_LEAVE_GROUP: 2412 case MCAST_LEAVE_SOURCE_GROUP: 2413 if (sopt->sopt_name == MCAST_LEAVE_GROUP) { 2414 error = sooptcopyin(sopt, &gsr, 2415 sizeof(struct group_req), 2416 sizeof(struct group_req)); 2417 } else if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) { 2418 error = sooptcopyin(sopt, &gsr, 2419 sizeof(struct group_source_req), 2420 sizeof(struct group_source_req)); 2421 } 2422 if (error) 2423 return (error); 2424 2425 if (gsa->sin.sin_family != AF_INET || 2426 gsa->sin.sin_len != sizeof(struct sockaddr_in)) 2427 return (EINVAL); 2428 2429 if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) { 2430 if (ssa->sin.sin_family != AF_INET || 2431 ssa->sin.sin_len != sizeof(struct sockaddr_in)) 2432 return (EINVAL); 2433 } 2434 2435 if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface) 2436 return (EADDRNOTAVAIL); 2437 2438 ifp = ifnet_byindex(gsr.gsr_interface); 2439 2440 if (ifp == NULL) 2441 return (EADDRNOTAVAIL); 2442 break; 2443 2444 default: 2445 CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d", 2446 __func__, sopt->sopt_name); 2447 return (EOPNOTSUPP); 2448 break; 2449 } 2450 2451 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2452 return (EINVAL); 2453 2454 /* 2455 * Find the membership in the membership array. 2456 */ 2457 imo = inp_findmoptions(inp); 2458 idx = imo_match_group(imo, ifp, &gsa->sa); 2459 if (idx == -1) { 2460 error = EADDRNOTAVAIL; 2461 goto out_inp_locked; 2462 } 2463 inm = imo->imo_membership[idx]; 2464 imf = &imo->imo_mfilters[idx]; 2465 2466 if (ssa->ss.ss_family != AF_UNSPEC) 2467 is_final = 0; 2468 2469 /* 2470 * Begin state merge transaction at socket layer. 2471 */ 2472 INP_WLOCK_ASSERT(inp); 2473 2474 /* 2475 * If we were instructed only to leave a given source, do so. 2476 * MCAST_LEAVE_SOURCE_GROUP is only valid for inclusive memberships. 2477 */ 2478 if (is_final) { 2479 imf_leave(imf); 2480 } else { 2481 if (imf->imf_st[0] == MCAST_EXCLUDE) { 2482 error = EADDRNOTAVAIL; 2483 goto out_inp_locked; 2484 } 2485 ims = imo_match_source(imo, idx, &ssa->sa); 2486 if (ims == NULL) { 2487 CTR3(KTR_IGMPV3, "%s: source 0x%08x %spresent", 2488 __func__, ntohl(ssa->sin.sin_addr.s_addr), "not "); 2489 error = EADDRNOTAVAIL; 2490 goto out_inp_locked; 2491 } 2492 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block"); 2493 error = imf_prune(imf, &ssa->sin); 2494 if (error) { 2495 CTR1(KTR_IGMPV3, "%s: merge imf state failed", 2496 __func__); 2497 goto out_inp_locked; 2498 } 2499 } 2500 2501 /* 2502 * Begin state merge transaction at IGMP layer. 2503 */ 2504 IN_MULTI_LOCK(); 2505 2506 if (is_final) { 2507 /* 2508 * Give up the multicast address record to which 2509 * the membership points. 2510 */ 2511 (void)in_leavegroup_locked(inm, imf); 2512 } else { 2513 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 2514 IN_MULTI_LIST_LOCK(); 2515 error = inm_merge(inm, imf); 2516 if (error) { 2517 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", 2518 __func__); 2519 goto out_in_multi_locked; 2520 } 2521 2522 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 2523 error = igmp_change_state(inm); 2524 IN_MULTI_LIST_UNLOCK(); 2525 if (error) { 2526 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", 2527 __func__); 2528 } 2529 } 2530 2531 out_in_multi_locked: 2532 2533 IN_MULTI_UNLOCK(); 2534 2535 if (error) 2536 imf_rollback(imf); 2537 else 2538 imf_commit(imf); 2539 2540 imf_reap(imf); 2541 2542 if (is_final) { 2543 /* Remove the gap in the membership and filter array. */ 2544 for (++idx; idx < imo->imo_num_memberships; ++idx) { 2545 imo->imo_membership[idx-1] = imo->imo_membership[idx]; 2546 imo->imo_mfilters[idx-1] = imo->imo_mfilters[idx]; 2547 } 2548 imo->imo_num_memberships--; 2549 } 2550 2551 out_inp_locked: 2552 INP_WUNLOCK(inp); 2553 return (error); 2554 } 2555 2556 /* 2557 * Select the interface for transmitting IPv4 multicast datagrams. 2558 * 2559 * Either an instance of struct in_addr or an instance of struct ip_mreqn 2560 * may be passed to this socket option. An address of INADDR_ANY or an 2561 * interface index of 0 is used to remove a previous selection. 2562 * When no interface is selected, one is chosen for every send. 2563 */ 2564 static int 2565 inp_set_multicast_if(struct inpcb *inp, struct sockopt *sopt) 2566 { 2567 struct in_addr addr; 2568 struct ip_mreqn mreqn; 2569 struct ifnet *ifp; 2570 struct ip_moptions *imo; 2571 int error; 2572 2573 if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) { 2574 /* 2575 * An interface index was specified using the 2576 * Linux-derived ip_mreqn structure. 2577 */ 2578 error = sooptcopyin(sopt, &mreqn, sizeof(struct ip_mreqn), 2579 sizeof(struct ip_mreqn)); 2580 if (error) 2581 return (error); 2582 2583 if (mreqn.imr_ifindex < 0 || V_if_index < mreqn.imr_ifindex) 2584 return (EINVAL); 2585 2586 if (mreqn.imr_ifindex == 0) { 2587 ifp = NULL; 2588 } else { 2589 ifp = ifnet_byindex(mreqn.imr_ifindex); 2590 if (ifp == NULL) 2591 return (EADDRNOTAVAIL); 2592 } 2593 } else { 2594 /* 2595 * An interface was specified by IPv4 address. 2596 * This is the traditional BSD usage. 2597 */ 2598 error = sooptcopyin(sopt, &addr, sizeof(struct in_addr), 2599 sizeof(struct in_addr)); 2600 if (error) 2601 return (error); 2602 if (in_nullhost(addr)) { 2603 ifp = NULL; 2604 } else { 2605 INADDR_TO_IFP(addr, ifp); 2606 if (ifp == NULL) 2607 return (EADDRNOTAVAIL); 2608 } 2609 CTR3(KTR_IGMPV3, "%s: ifp = %p, addr = 0x%08x", __func__, ifp, 2610 ntohl(addr.s_addr)); 2611 } 2612 2613 /* Reject interfaces which do not support multicast. */ 2614 if (ifp != NULL && (ifp->if_flags & IFF_MULTICAST) == 0) 2615 return (EOPNOTSUPP); 2616 2617 imo = inp_findmoptions(inp); 2618 imo->imo_multicast_ifp = ifp; 2619 imo->imo_multicast_addr.s_addr = INADDR_ANY; 2620 INP_WUNLOCK(inp); 2621 2622 return (0); 2623 } 2624 2625 /* 2626 * Atomically set source filters on a socket for an IPv4 multicast group. 2627 * 2628 * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held. 2629 */ 2630 static int 2631 inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt) 2632 { 2633 struct __msfilterreq msfr; 2634 sockunion_t *gsa; 2635 struct ifnet *ifp; 2636 struct in_mfilter *imf; 2637 struct ip_moptions *imo; 2638 struct in_multi *inm; 2639 size_t idx; 2640 int error; 2641 2642 error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq), 2643 sizeof(struct __msfilterreq)); 2644 if (error) 2645 return (error); 2646 2647 if (msfr.msfr_nsrcs > in_mcast_maxsocksrc) 2648 return (ENOBUFS); 2649 2650 if ((msfr.msfr_fmode != MCAST_EXCLUDE && 2651 msfr.msfr_fmode != MCAST_INCLUDE)) 2652 return (EINVAL); 2653 2654 if (msfr.msfr_group.ss_family != AF_INET || 2655 msfr.msfr_group.ss_len != sizeof(struct sockaddr_in)) 2656 return (EINVAL); 2657 2658 gsa = (sockunion_t *)&msfr.msfr_group; 2659 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2660 return (EINVAL); 2661 2662 gsa->sin.sin_port = 0; /* ignore port */ 2663 2664 if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex) 2665 return (EADDRNOTAVAIL); 2666 2667 ifp = ifnet_byindex(msfr.msfr_ifindex); 2668 if (ifp == NULL) 2669 return (EADDRNOTAVAIL); 2670 2671 /* 2672 * Take the INP write lock. 2673 * Check if this socket is a member of this group. 2674 */ 2675 imo = inp_findmoptions(inp); 2676 idx = imo_match_group(imo, ifp, &gsa->sa); 2677 if (idx == -1 || imo->imo_mfilters == NULL) { 2678 error = EADDRNOTAVAIL; 2679 goto out_inp_locked; 2680 } 2681 inm = imo->imo_membership[idx]; 2682 imf = &imo->imo_mfilters[idx]; 2683 2684 /* 2685 * Begin state merge transaction at socket layer. 2686 */ 2687 INP_WLOCK_ASSERT(inp); 2688 2689 imf->imf_st[1] = msfr.msfr_fmode; 2690 2691 /* 2692 * Apply any new source filters, if present. 2693 * Make a copy of the user-space source vector so 2694 * that we may copy them with a single copyin. This 2695 * allows us to deal with page faults up-front. 2696 */ 2697 if (msfr.msfr_nsrcs > 0) { 2698 struct in_msource *lims; 2699 struct sockaddr_in *psin; 2700 struct sockaddr_storage *kss, *pkss; 2701 int i; 2702 2703 INP_WUNLOCK(inp); 2704 2705 CTR2(KTR_IGMPV3, "%s: loading %lu source list entries", 2706 __func__, (unsigned long)msfr.msfr_nsrcs); 2707 kss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs, 2708 M_TEMP, M_WAITOK); 2709 error = copyin(msfr.msfr_srcs, kss, 2710 sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs); 2711 if (error) { 2712 free(kss, M_TEMP); 2713 return (error); 2714 } 2715 2716 INP_WLOCK(inp); 2717 2718 /* 2719 * Mark all source filters as UNDEFINED at t1. 2720 * Restore new group filter mode, as imf_leave() 2721 * will set it to INCLUDE. 2722 */ 2723 imf_leave(imf); 2724 imf->imf_st[1] = msfr.msfr_fmode; 2725 2726 /* 2727 * Update socket layer filters at t1, lazy-allocating 2728 * new entries. This saves a bunch of memory at the 2729 * cost of one RB_FIND() per source entry; duplicate 2730 * entries in the msfr_nsrcs vector are ignored. 2731 * If we encounter an error, rollback transaction. 2732 * 2733 * XXX This too could be replaced with a set-symmetric 2734 * difference like loop to avoid walking from root 2735 * every time, as the key space is common. 2736 */ 2737 for (i = 0, pkss = kss; i < msfr.msfr_nsrcs; i++, pkss++) { 2738 psin = (struct sockaddr_in *)pkss; 2739 if (psin->sin_family != AF_INET) { 2740 error = EAFNOSUPPORT; 2741 break; 2742 } 2743 if (psin->sin_len != sizeof(struct sockaddr_in)) { 2744 error = EINVAL; 2745 break; 2746 } 2747 error = imf_get_source(imf, psin, &lims); 2748 if (error) 2749 break; 2750 lims->imsl_st[1] = imf->imf_st[1]; 2751 } 2752 free(kss, M_TEMP); 2753 } 2754 2755 if (error) 2756 goto out_imf_rollback; 2757 2758 INP_WLOCK_ASSERT(inp); 2759 IN_MULTI_LOCK(); 2760 IN_MULTI_LIST_LOCK(); 2761 2762 /* 2763 * Begin state merge transaction at IGMP layer. 2764 */ 2765 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 2766 error = inm_merge(inm, imf); 2767 if (error) { 2768 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__); 2769 IN_MULTI_LIST_UNLOCK(); 2770 goto out_in_multi_locked; 2771 } 2772 2773 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 2774 error = igmp_change_state(inm); 2775 IN_MULTI_LIST_UNLOCK(); 2776 if (error) 2777 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__); 2778 2779 out_in_multi_locked: 2780 2781 IN_MULTI_UNLOCK(); 2782 2783 out_imf_rollback: 2784 if (error) 2785 imf_rollback(imf); 2786 else 2787 imf_commit(imf); 2788 2789 imf_reap(imf); 2790 2791 out_inp_locked: 2792 INP_WUNLOCK(inp); 2793 return (error); 2794 } 2795 2796 /* 2797 * Set the IP multicast options in response to user setsockopt(). 2798 * 2799 * Many of the socket options handled in this function duplicate the 2800 * functionality of socket options in the regular unicast API. However, 2801 * it is not possible to merge the duplicate code, because the idempotence 2802 * of the IPv4 multicast part of the BSD Sockets API must be preserved; 2803 * the effects of these options must be treated as separate and distinct. 2804 * 2805 * SMPng: XXX: Unlocked read of inp_socket believed OK. 2806 * FUTURE: The IP_MULTICAST_VIF option may be eliminated if MROUTING 2807 * is refactored to no longer use vifs. 2808 */ 2809 int 2810 inp_setmoptions(struct inpcb *inp, struct sockopt *sopt) 2811 { 2812 struct ip_moptions *imo; 2813 int error; 2814 2815 error = 0; 2816 2817 /* 2818 * If socket is neither of type SOCK_RAW or SOCK_DGRAM, 2819 * or is a divert socket, reject it. 2820 */ 2821 if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT || 2822 (inp->inp_socket->so_proto->pr_type != SOCK_RAW && 2823 inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) 2824 return (EOPNOTSUPP); 2825 2826 switch (sopt->sopt_name) { 2827 case IP_MULTICAST_VIF: { 2828 int vifi; 2829 /* 2830 * Select a multicast VIF for transmission. 2831 * Only useful if multicast forwarding is active. 2832 */ 2833 if (legal_vif_num == NULL) { 2834 error = EOPNOTSUPP; 2835 break; 2836 } 2837 error = sooptcopyin(sopt, &vifi, sizeof(int), sizeof(int)); 2838 if (error) 2839 break; 2840 if (!legal_vif_num(vifi) && (vifi != -1)) { 2841 error = EINVAL; 2842 break; 2843 } 2844 imo = inp_findmoptions(inp); 2845 imo->imo_multicast_vif = vifi; 2846 INP_WUNLOCK(inp); 2847 break; 2848 } 2849 2850 case IP_MULTICAST_IF: 2851 error = inp_set_multicast_if(inp, sopt); 2852 break; 2853 2854 case IP_MULTICAST_TTL: { 2855 u_char ttl; 2856 2857 /* 2858 * Set the IP time-to-live for outgoing multicast packets. 2859 * The original multicast API required a char argument, 2860 * which is inconsistent with the rest of the socket API. 2861 * We allow either a char or an int. 2862 */ 2863 if (sopt->sopt_valsize == sizeof(u_char)) { 2864 error = sooptcopyin(sopt, &ttl, sizeof(u_char), 2865 sizeof(u_char)); 2866 if (error) 2867 break; 2868 } else { 2869 u_int ittl; 2870 2871 error = sooptcopyin(sopt, &ittl, sizeof(u_int), 2872 sizeof(u_int)); 2873 if (error) 2874 break; 2875 if (ittl > 255) { 2876 error = EINVAL; 2877 break; 2878 } 2879 ttl = (u_char)ittl; 2880 } 2881 imo = inp_findmoptions(inp); 2882 imo->imo_multicast_ttl = ttl; 2883 INP_WUNLOCK(inp); 2884 break; 2885 } 2886 2887 case IP_MULTICAST_LOOP: { 2888 u_char loop; 2889 2890 /* 2891 * Set the loopback flag for outgoing multicast packets. 2892 * Must be zero or one. The original multicast API required a 2893 * char argument, which is inconsistent with the rest 2894 * of the socket API. We allow either a char or an int. 2895 */ 2896 if (sopt->sopt_valsize == sizeof(u_char)) { 2897 error = sooptcopyin(sopt, &loop, sizeof(u_char), 2898 sizeof(u_char)); 2899 if (error) 2900 break; 2901 } else { 2902 u_int iloop; 2903 2904 error = sooptcopyin(sopt, &iloop, sizeof(u_int), 2905 sizeof(u_int)); 2906 if (error) 2907 break; 2908 loop = (u_char)iloop; 2909 } 2910 imo = inp_findmoptions(inp); 2911 imo->imo_multicast_loop = !!loop; 2912 INP_WUNLOCK(inp); 2913 break; 2914 } 2915 2916 case IP_ADD_MEMBERSHIP: 2917 case IP_ADD_SOURCE_MEMBERSHIP: 2918 case MCAST_JOIN_GROUP: 2919 case MCAST_JOIN_SOURCE_GROUP: 2920 error = inp_join_group(inp, sopt); 2921 break; 2922 2923 case IP_DROP_MEMBERSHIP: 2924 case IP_DROP_SOURCE_MEMBERSHIP: 2925 case MCAST_LEAVE_GROUP: 2926 case MCAST_LEAVE_SOURCE_GROUP: 2927 error = inp_leave_group(inp, sopt); 2928 break; 2929 2930 case IP_BLOCK_SOURCE: 2931 case IP_UNBLOCK_SOURCE: 2932 case MCAST_BLOCK_SOURCE: 2933 case MCAST_UNBLOCK_SOURCE: 2934 error = inp_block_unblock_source(inp, sopt); 2935 break; 2936 2937 case IP_MSFILTER: 2938 error = inp_set_source_filters(inp, sopt); 2939 break; 2940 2941 default: 2942 error = EOPNOTSUPP; 2943 break; 2944 } 2945 2946 INP_UNLOCK_ASSERT(inp); 2947 2948 return (error); 2949 } 2950 2951 /* 2952 * Expose IGMP's multicast filter mode and source list(s) to userland, 2953 * keyed by (ifindex, group). 2954 * The filter mode is written out as a uint32_t, followed by 2955 * 0..n of struct in_addr. 2956 * For use by ifmcstat(8). 2957 * SMPng: NOTE: unlocked read of ifindex space. 2958 */ 2959 static int 2960 sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS) 2961 { 2962 struct in_addr src, group; 2963 struct ifnet *ifp; 2964 struct ifmultiaddr *ifma; 2965 struct in_multi *inm; 2966 struct ip_msource *ims; 2967 int *name; 2968 int retval; 2969 u_int namelen; 2970 uint32_t fmode, ifindex; 2971 2972 name = (int *)arg1; 2973 namelen = arg2; 2974 2975 if (req->newptr != NULL) 2976 return (EPERM); 2977 2978 if (namelen != 2) 2979 return (EINVAL); 2980 2981 ifindex = name[0]; 2982 if (ifindex <= 0 || ifindex > V_if_index) { 2983 CTR2(KTR_IGMPV3, "%s: ifindex %u out of range", 2984 __func__, ifindex); 2985 return (ENOENT); 2986 } 2987 2988 group.s_addr = name[1]; 2989 if (!IN_MULTICAST(ntohl(group.s_addr))) { 2990 CTR2(KTR_IGMPV3, "%s: group 0x%08x is not multicast", 2991 __func__, ntohl(group.s_addr)); 2992 return (EINVAL); 2993 } 2994 2995 ifp = ifnet_byindex(ifindex); 2996 if (ifp == NULL) { 2997 CTR2(KTR_IGMPV3, "%s: no ifp for ifindex %u", 2998 __func__, ifindex); 2999 return (ENOENT); 3000 } 3001 3002 retval = sysctl_wire_old_buffer(req, 3003 sizeof(uint32_t) + (in_mcast_maxgrpsrc * sizeof(struct in_addr))); 3004 if (retval) 3005 return (retval); 3006 3007 IN_MULTI_LIST_LOCK(); 3008 3009 IF_ADDR_RLOCK(ifp); 3010 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 3011 if (ifma->ifma_addr->sa_family != AF_INET || 3012 ifma->ifma_protospec == NULL) 3013 continue; 3014 inm = (struct in_multi *)ifma->ifma_protospec; 3015 if (!in_hosteq(inm->inm_addr, group)) 3016 continue; 3017 fmode = inm->inm_st[1].iss_fmode; 3018 retval = SYSCTL_OUT(req, &fmode, sizeof(uint32_t)); 3019 if (retval != 0) 3020 break; 3021 RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) { 3022 CTR2(KTR_IGMPV3, "%s: visit node 0x%08x", __func__, 3023 ims->ims_haddr); 3024 /* 3025 * Only copy-out sources which are in-mode. 3026 */ 3027 if (fmode != ims_get_mode(inm, ims, 1)) { 3028 CTR1(KTR_IGMPV3, "%s: skip non-in-mode", 3029 __func__); 3030 continue; 3031 } 3032 src.s_addr = htonl(ims->ims_haddr); 3033 retval = SYSCTL_OUT(req, &src, sizeof(struct in_addr)); 3034 if (retval != 0) 3035 break; 3036 } 3037 } 3038 IF_ADDR_RUNLOCK(ifp); 3039 3040 IN_MULTI_LIST_UNLOCK(); 3041 3042 return (retval); 3043 } 3044 3045 #if defined(KTR) && (KTR_COMPILE & KTR_IGMPV3) 3046 3047 static const char *inm_modestrs[] = { "un", "in", "ex" }; 3048 3049 static const char * 3050 inm_mode_str(const int mode) 3051 { 3052 3053 if (mode >= MCAST_UNDEFINED && mode <= MCAST_EXCLUDE) 3054 return (inm_modestrs[mode]); 3055 return ("??"); 3056 } 3057 3058 static const char *inm_statestrs[] = { 3059 "not-member", 3060 "silent", 3061 "idle", 3062 "lazy", 3063 "sleeping", 3064 "awakening", 3065 "query-pending", 3066 "sg-query-pending", 3067 "leaving" 3068 }; 3069 3070 static const char * 3071 inm_state_str(const int state) 3072 { 3073 3074 if (state >= IGMP_NOT_MEMBER && state <= IGMP_LEAVING_MEMBER) 3075 return (inm_statestrs[state]); 3076 return ("??"); 3077 } 3078 3079 /* 3080 * Dump an in_multi structure to the console. 3081 */ 3082 void 3083 inm_print(const struct in_multi *inm) 3084 { 3085 int t; 3086 char addrbuf[INET_ADDRSTRLEN]; 3087 3088 if ((ktr_mask & KTR_IGMPV3) == 0) 3089 return; 3090 3091 printf("%s: --- begin inm %p ---\n", __func__, inm); 3092 printf("addr %s ifp %p(%s) ifma %p\n", 3093 inet_ntoa_r(inm->inm_addr, addrbuf), 3094 inm->inm_ifp, 3095 inm->inm_ifp->if_xname, 3096 inm->inm_ifma); 3097 printf("timer %u state %s refcount %u scq.len %u\n", 3098 inm->inm_timer, 3099 inm_state_str(inm->inm_state), 3100 inm->inm_refcount, 3101 inm->inm_scq.mq_len); 3102 printf("igi %p nsrc %lu sctimer %u scrv %u\n", 3103 inm->inm_igi, 3104 inm->inm_nsrc, 3105 inm->inm_sctimer, 3106 inm->inm_scrv); 3107 for (t = 0; t < 2; t++) { 3108 printf("t%d: fmode %s asm %u ex %u in %u rec %u\n", t, 3109 inm_mode_str(inm->inm_st[t].iss_fmode), 3110 inm->inm_st[t].iss_asm, 3111 inm->inm_st[t].iss_ex, 3112 inm->inm_st[t].iss_in, 3113 inm->inm_st[t].iss_rec); 3114 } 3115 printf("%s: --- end inm %p ---\n", __func__, inm); 3116 } 3117 3118 #else /* !KTR || !(KTR_COMPILE & KTR_IGMPV3) */ 3119 3120 void 3121 inm_print(const struct in_multi *inm) 3122 { 3123 3124 } 3125 3126 #endif /* KTR && (KTR_COMPILE & KTR_IGMPV3) */ 3127 3128 RB_GENERATE(ip_msource_tree, ip_msource, ims_link, ip_msource_cmp); 3129