1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2007-2009 Bruce Simpson. 5 * Copyright (c) 2005 Robert N. M. Watson. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. The name of the author may not be used to endorse or promote 17 * products derived from this software without specific prior written 18 * permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 /* 34 * IPv4 multicast socket, group, and socket option processing module. 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include <sys/param.h> 41 #include <sys/systm.h> 42 #include <sys/kernel.h> 43 #include <sys/lock.h> 44 #include <sys/malloc.h> 45 #include <sys/mbuf.h> 46 #include <sys/protosw.h> 47 #include <sys/rmlock.h> 48 #include <sys/socket.h> 49 #include <sys/socketvar.h> 50 #include <sys/protosw.h> 51 #include <sys/sysctl.h> 52 #include <sys/ktr.h> 53 #include <sys/taskqueue.h> 54 #include <sys/gtaskqueue.h> 55 #include <sys/tree.h> 56 57 #include <net/if.h> 58 #include <net/if_var.h> 59 #include <net/if_dl.h> 60 #include <net/route.h> 61 #include <net/vnet.h> 62 63 #include <net/ethernet.h> 64 65 #include <netinet/in.h> 66 #include <netinet/in_systm.h> 67 #include <netinet/in_fib.h> 68 #include <netinet/in_pcb.h> 69 #include <netinet/in_var.h> 70 #include <netinet/ip_var.h> 71 #include <netinet/igmp_var.h> 72 73 #ifndef KTR_IGMPV3 74 #define KTR_IGMPV3 KTR_INET 75 #endif 76 77 #ifndef __SOCKUNION_DECLARED 78 union sockunion { 79 struct sockaddr_storage ss; 80 struct sockaddr sa; 81 struct sockaddr_dl sdl; 82 struct sockaddr_in sin; 83 }; 84 typedef union sockunion sockunion_t; 85 #define __SOCKUNION_DECLARED 86 #endif /* __SOCKUNION_DECLARED */ 87 88 static MALLOC_DEFINE(M_INMFILTER, "in_mfilter", 89 "IPv4 multicast PCB-layer source filter"); 90 static MALLOC_DEFINE(M_IPMADDR, "in_multi", "IPv4 multicast group"); 91 static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "IPv4 multicast options"); 92 static MALLOC_DEFINE(M_IPMSOURCE, "ip_msource", 93 "IPv4 multicast IGMP-layer source filter"); 94 95 /* 96 * Locking: 97 * - Lock order is: Giant, INP_WLOCK, IN_MULTI_LIST_LOCK, IGMP_LOCK, IF_ADDR_LOCK. 98 * - The IF_ADDR_LOCK is implicitly taken by inm_lookup() earlier, however 99 * it can be taken by code in net/if.c also. 100 * - ip_moptions and in_mfilter are covered by the INP_WLOCK. 101 * 102 * struct in_multi is covered by IN_MULTI_LIST_LOCK. There isn't strictly 103 * any need for in_multi itself to be virtualized -- it is bound to an ifp 104 * anyway no matter what happens. 105 */ 106 struct mtx in_multi_list_mtx; 107 MTX_SYSINIT(in_multi_mtx, &in_multi_list_mtx, "in_multi_list_mtx", MTX_DEF); 108 109 struct mtx in_multi_free_mtx; 110 MTX_SYSINIT(in_multi_free_mtx, &in_multi_free_mtx, "in_multi_free_mtx", MTX_DEF); 111 112 struct sx in_multi_sx; 113 SX_SYSINIT(in_multi_sx, &in_multi_sx, "in_multi_sx"); 114 115 int ifma_restart; 116 117 /* 118 * Functions with non-static linkage defined in this file should be 119 * declared in in_var.h: 120 * imo_multi_filter() 121 * in_addmulti() 122 * in_delmulti() 123 * in_joingroup() 124 * in_joingroup_locked() 125 * in_leavegroup() 126 * in_leavegroup_locked() 127 * and ip_var.h: 128 * inp_freemoptions() 129 * inp_getmoptions() 130 * inp_setmoptions() 131 * 132 * XXX: Both carp and pf need to use the legacy (*,G) KPIs in_addmulti() 133 * and in_delmulti(). 134 */ 135 static void imf_commit(struct in_mfilter *); 136 static int imf_get_source(struct in_mfilter *imf, 137 const struct sockaddr_in *psin, 138 struct in_msource **); 139 static struct in_msource * 140 imf_graft(struct in_mfilter *, const uint8_t, 141 const struct sockaddr_in *); 142 static void imf_leave(struct in_mfilter *); 143 static int imf_prune(struct in_mfilter *, const struct sockaddr_in *); 144 static void imf_purge(struct in_mfilter *); 145 static void imf_rollback(struct in_mfilter *); 146 static void imf_reap(struct in_mfilter *); 147 static int imo_grow(struct ip_moptions *); 148 static size_t imo_match_group(const struct ip_moptions *, 149 const struct ifnet *, const struct sockaddr *); 150 static struct in_msource * 151 imo_match_source(const struct ip_moptions *, const size_t, 152 const struct sockaddr *); 153 static void ims_merge(struct ip_msource *ims, 154 const struct in_msource *lims, const int rollback); 155 static int in_getmulti(struct ifnet *, const struct in_addr *, 156 struct in_multi **); 157 static int inm_get_source(struct in_multi *inm, const in_addr_t haddr, 158 const int noalloc, struct ip_msource **pims); 159 #ifdef KTR 160 static int inm_is_ifp_detached(const struct in_multi *); 161 #endif 162 static int inm_merge(struct in_multi *, /*const*/ struct in_mfilter *); 163 static void inm_purge(struct in_multi *); 164 static void inm_reap(struct in_multi *); 165 static void inm_release(struct in_multi *); 166 static struct ip_moptions * 167 inp_findmoptions(struct inpcb *); 168 static void inp_freemoptions_internal(struct ip_moptions *); 169 static void inp_gcmoptions(void *, int); 170 static int inp_get_source_filters(struct inpcb *, struct sockopt *); 171 static int inp_join_group(struct inpcb *, struct sockopt *); 172 static int inp_leave_group(struct inpcb *, struct sockopt *); 173 static struct ifnet * 174 inp_lookup_mcast_ifp(const struct inpcb *, 175 const struct sockaddr_in *, const struct in_addr); 176 static int inp_block_unblock_source(struct inpcb *, struct sockopt *); 177 static int inp_set_multicast_if(struct inpcb *, struct sockopt *); 178 static int inp_set_source_filters(struct inpcb *, struct sockopt *); 179 static int sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS); 180 181 static SYSCTL_NODE(_net_inet_ip, OID_AUTO, mcast, CTLFLAG_RW, 0, 182 "IPv4 multicast"); 183 184 static u_long in_mcast_maxgrpsrc = IP_MAX_GROUP_SRC_FILTER; 185 SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxgrpsrc, 186 CTLFLAG_RWTUN, &in_mcast_maxgrpsrc, 0, 187 "Max source filters per group"); 188 189 static u_long in_mcast_maxsocksrc = IP_MAX_SOCK_SRC_FILTER; 190 SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxsocksrc, 191 CTLFLAG_RWTUN, &in_mcast_maxsocksrc, 0, 192 "Max source filters per socket"); 193 194 int in_mcast_loop = IP_DEFAULT_MULTICAST_LOOP; 195 SYSCTL_INT(_net_inet_ip_mcast, OID_AUTO, loop, CTLFLAG_RWTUN, 196 &in_mcast_loop, 0, "Loopback multicast datagrams by default"); 197 198 static SYSCTL_NODE(_net_inet_ip_mcast, OID_AUTO, filters, 199 CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_ip_mcast_filters, 200 "Per-interface stack-wide source filters"); 201 202 static STAILQ_HEAD(, ip_moptions) imo_gc_list = 203 STAILQ_HEAD_INITIALIZER(imo_gc_list); 204 static struct task imo_gc_task = TASK_INITIALIZER(0, inp_gcmoptions, NULL); 205 206 #ifdef KTR 207 /* 208 * Inline function which wraps assertions for a valid ifp. 209 * The ifnet layer will set the ifma's ifp pointer to NULL if the ifp 210 * is detached. 211 */ 212 static int __inline 213 inm_is_ifp_detached(const struct in_multi *inm) 214 { 215 struct ifnet *ifp; 216 217 KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__)); 218 ifp = inm->inm_ifma->ifma_ifp; 219 if (ifp != NULL) { 220 /* 221 * Sanity check that netinet's notion of ifp is the 222 * same as net's. 223 */ 224 KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__)); 225 } 226 227 return (ifp == NULL); 228 } 229 #endif 230 231 static struct grouptask free_gtask; 232 static struct in_multi_head inm_free_list; 233 static void inm_release_task(void *arg __unused); 234 static void inm_init(void) 235 { 236 SLIST_INIT(&inm_free_list); 237 taskqgroup_config_gtask_init(NULL, &free_gtask, inm_release_task, "inm release task"); 238 } 239 240 SYSINIT(inm_init, SI_SUB_SMP + 1, SI_ORDER_FIRST, 241 inm_init, NULL); 242 243 244 void 245 inm_release_list_deferred(struct in_multi_head *inmh) 246 { 247 248 if (SLIST_EMPTY(inmh)) 249 return; 250 mtx_lock(&in_multi_free_mtx); 251 SLIST_CONCAT(&inm_free_list, inmh, in_multi, inm_nrele); 252 mtx_unlock(&in_multi_free_mtx); 253 GROUPTASK_ENQUEUE(&free_gtask); 254 } 255 256 void 257 inm_disconnect(struct in_multi *inm) 258 { 259 struct ifnet *ifp; 260 struct ifmultiaddr *ifma, *ll_ifma; 261 262 ifp = inm->inm_ifp; 263 IF_ADDR_WLOCK_ASSERT(ifp); 264 ifma = inm->inm_ifma; 265 266 if_ref(ifp); 267 TAILQ_REMOVE(&ifp->if_multiaddrs, ifma, ifma_link); 268 MCDPRINTF("removed ifma: %p from %s\n", ifma, ifp->if_xname); 269 if ((ll_ifma = ifma->ifma_llifma) != NULL) { 270 MPASS(ifma != ll_ifma); 271 ifma->ifma_llifma = NULL; 272 MPASS(ll_ifma->ifma_llifma == NULL); 273 MPASS(ll_ifma->ifma_ifp == ifp); 274 if (--ll_ifma->ifma_refcount == 0) { 275 TAILQ_REMOVE(&ifp->if_multiaddrs, ll_ifma, ifma_link); 276 MCDPRINTF("removed ll_ifma: %p from %s\n", ll_ifma, ifp->if_xname); 277 if_freemulti(ll_ifma); 278 ifma_restart = true; 279 } 280 } 281 } 282 283 void 284 inm_release_deferred(struct in_multi *inm) 285 { 286 struct in_multi_head tmp; 287 288 IN_MULTI_LIST_LOCK_ASSERT(); 289 MPASS(inm->inm_refcount > 0); 290 if (--inm->inm_refcount == 0) { 291 SLIST_INIT(&tmp); 292 inm_disconnect(inm); 293 inm->inm_ifma->ifma_protospec = NULL; 294 SLIST_INSERT_HEAD(&tmp, inm, inm_nrele); 295 inm_release_list_deferred(&tmp); 296 } 297 } 298 299 static void 300 inm_release_task(void *arg __unused) 301 { 302 struct in_multi_head inm_free_tmp; 303 struct in_multi *inm, *tinm; 304 305 SLIST_INIT(&inm_free_tmp); 306 mtx_lock(&in_multi_free_mtx); 307 SLIST_CONCAT(&inm_free_tmp, &inm_free_list, in_multi, inm_nrele); 308 mtx_unlock(&in_multi_free_mtx); 309 IN_MULTI_LOCK(); 310 SLIST_FOREACH_SAFE(inm, &inm_free_tmp, inm_nrele, tinm) { 311 SLIST_REMOVE_HEAD(&inm_free_tmp, inm_nrele); 312 MPASS(inm); 313 inm_release(inm); 314 } 315 IN_MULTI_UNLOCK(); 316 } 317 318 /* 319 * Initialize an in_mfilter structure to a known state at t0, t1 320 * with an empty source filter list. 321 */ 322 static __inline void 323 imf_init(struct in_mfilter *imf, const int st0, const int st1) 324 { 325 memset(imf, 0, sizeof(struct in_mfilter)); 326 RB_INIT(&imf->imf_sources); 327 imf->imf_st[0] = st0; 328 imf->imf_st[1] = st1; 329 } 330 331 /* 332 * Function for looking up an in_multi record for an IPv4 multicast address 333 * on a given interface. ifp must be valid. If no record found, return NULL. 334 * The IN_MULTI_LIST_LOCK and IF_ADDR_LOCK on ifp must be held. 335 */ 336 struct in_multi * 337 inm_lookup_locked(struct ifnet *ifp, const struct in_addr ina) 338 { 339 struct ifmultiaddr *ifma; 340 struct in_multi *inm; 341 342 IN_MULTI_LIST_LOCK_ASSERT(); 343 IF_ADDR_LOCK_ASSERT(ifp); 344 345 inm = NULL; 346 TAILQ_FOREACH(ifma, &((ifp)->if_multiaddrs), ifma_link) { 347 if (ifma->ifma_addr->sa_family != AF_INET || 348 ifma->ifma_protospec == NULL) 349 continue; 350 inm = (struct in_multi *)ifma->ifma_protospec; 351 if (inm->inm_addr.s_addr == ina.s_addr) 352 break; 353 inm = NULL; 354 } 355 return (inm); 356 } 357 358 /* 359 * Wrapper for inm_lookup_locked(). 360 * The IF_ADDR_LOCK will be taken on ifp and released on return. 361 */ 362 struct in_multi * 363 inm_lookup(struct ifnet *ifp, const struct in_addr ina) 364 { 365 struct in_multi *inm; 366 367 IN_MULTI_LIST_LOCK_ASSERT(); 368 IF_ADDR_RLOCK(ifp); 369 inm = inm_lookup_locked(ifp, ina); 370 IF_ADDR_RUNLOCK(ifp); 371 372 return (inm); 373 } 374 375 /* 376 * Resize the ip_moptions vector to the next power-of-two minus 1. 377 * May be called with locks held; do not sleep. 378 */ 379 static int 380 imo_grow(struct ip_moptions *imo) 381 { 382 struct in_multi **nmships; 383 struct in_multi **omships; 384 struct in_mfilter *nmfilters; 385 struct in_mfilter *omfilters; 386 size_t idx; 387 size_t newmax; 388 size_t oldmax; 389 390 nmships = NULL; 391 nmfilters = NULL; 392 omships = imo->imo_membership; 393 omfilters = imo->imo_mfilters; 394 oldmax = imo->imo_max_memberships; 395 newmax = ((oldmax + 1) * 2) - 1; 396 397 if (newmax <= IP_MAX_MEMBERSHIPS) { 398 nmships = (struct in_multi **)realloc(omships, 399 sizeof(struct in_multi *) * newmax, M_IPMOPTS, M_NOWAIT); 400 nmfilters = (struct in_mfilter *)realloc(omfilters, 401 sizeof(struct in_mfilter) * newmax, M_INMFILTER, M_NOWAIT); 402 if (nmships != NULL && nmfilters != NULL) { 403 /* Initialize newly allocated source filter heads. */ 404 for (idx = oldmax; idx < newmax; idx++) { 405 imf_init(&nmfilters[idx], MCAST_UNDEFINED, 406 MCAST_EXCLUDE); 407 } 408 imo->imo_max_memberships = newmax; 409 imo->imo_membership = nmships; 410 imo->imo_mfilters = nmfilters; 411 } 412 } 413 414 if (nmships == NULL || nmfilters == NULL) { 415 if (nmships != NULL) 416 free(nmships, M_IPMOPTS); 417 if (nmfilters != NULL) 418 free(nmfilters, M_INMFILTER); 419 return (ETOOMANYREFS); 420 } 421 422 return (0); 423 } 424 425 /* 426 * Find an IPv4 multicast group entry for this ip_moptions instance 427 * which matches the specified group, and optionally an interface. 428 * Return its index into the array, or -1 if not found. 429 */ 430 static size_t 431 imo_match_group(const struct ip_moptions *imo, const struct ifnet *ifp, 432 const struct sockaddr *group) 433 { 434 const struct sockaddr_in *gsin; 435 struct in_multi **pinm; 436 int idx; 437 int nmships; 438 439 gsin = (const struct sockaddr_in *)group; 440 441 /* The imo_membership array may be lazy allocated. */ 442 if (imo->imo_membership == NULL || imo->imo_num_memberships == 0) 443 return (-1); 444 445 nmships = imo->imo_num_memberships; 446 pinm = &imo->imo_membership[0]; 447 for (idx = 0; idx < nmships; idx++, pinm++) { 448 if (*pinm == NULL) 449 continue; 450 if ((ifp == NULL || ((*pinm)->inm_ifp == ifp)) && 451 in_hosteq((*pinm)->inm_addr, gsin->sin_addr)) { 452 break; 453 } 454 } 455 if (idx >= nmships) 456 idx = -1; 457 458 return (idx); 459 } 460 461 /* 462 * Find an IPv4 multicast source entry for this imo which matches 463 * the given group index for this socket, and source address. 464 * 465 * NOTE: This does not check if the entry is in-mode, merely if 466 * it exists, which may not be the desired behaviour. 467 */ 468 static struct in_msource * 469 imo_match_source(const struct ip_moptions *imo, const size_t gidx, 470 const struct sockaddr *src) 471 { 472 struct ip_msource find; 473 struct in_mfilter *imf; 474 struct ip_msource *ims; 475 const sockunion_t *psa; 476 477 KASSERT(src->sa_family == AF_INET, ("%s: !AF_INET", __func__)); 478 KASSERT(gidx != -1 && gidx < imo->imo_num_memberships, 479 ("%s: invalid index %d\n", __func__, (int)gidx)); 480 481 /* The imo_mfilters array may be lazy allocated. */ 482 if (imo->imo_mfilters == NULL) 483 return (NULL); 484 imf = &imo->imo_mfilters[gidx]; 485 486 /* Source trees are keyed in host byte order. */ 487 psa = (const sockunion_t *)src; 488 find.ims_haddr = ntohl(psa->sin.sin_addr.s_addr); 489 ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find); 490 491 return ((struct in_msource *)ims); 492 } 493 494 /* 495 * Perform filtering for multicast datagrams on a socket by group and source. 496 * 497 * Returns 0 if a datagram should be allowed through, or various error codes 498 * if the socket was not a member of the group, or the source was muted, etc. 499 */ 500 int 501 imo_multi_filter(const struct ip_moptions *imo, const struct ifnet *ifp, 502 const struct sockaddr *group, const struct sockaddr *src) 503 { 504 size_t gidx; 505 struct in_msource *ims; 506 int mode; 507 508 KASSERT(ifp != NULL, ("%s: null ifp", __func__)); 509 510 gidx = imo_match_group(imo, ifp, group); 511 if (gidx == -1) 512 return (MCAST_NOTGMEMBER); 513 514 /* 515 * Check if the source was included in an (S,G) join. 516 * Allow reception on exclusive memberships by default, 517 * reject reception on inclusive memberships by default. 518 * Exclude source only if an in-mode exclude filter exists. 519 * Include source only if an in-mode include filter exists. 520 * NOTE: We are comparing group state here at IGMP t1 (now) 521 * with socket-layer t0 (since last downcall). 522 */ 523 mode = imo->imo_mfilters[gidx].imf_st[1]; 524 ims = imo_match_source(imo, gidx, src); 525 526 if ((ims == NULL && mode == MCAST_INCLUDE) || 527 (ims != NULL && ims->imsl_st[0] != mode)) 528 return (MCAST_NOTSMEMBER); 529 530 return (MCAST_PASS); 531 } 532 533 /* 534 * Find and return a reference to an in_multi record for (ifp, group), 535 * and bump its reference count. 536 * If one does not exist, try to allocate it, and update link-layer multicast 537 * filters on ifp to listen for group. 538 * Assumes the IN_MULTI lock is held across the call. 539 * Return 0 if successful, otherwise return an appropriate error code. 540 */ 541 static int 542 in_getmulti(struct ifnet *ifp, const struct in_addr *group, 543 struct in_multi **pinm) 544 { 545 struct sockaddr_in gsin; 546 struct ifmultiaddr *ifma; 547 struct in_ifinfo *ii; 548 struct in_multi *inm; 549 int error; 550 551 IN_MULTI_LOCK_ASSERT(); 552 553 ii = (struct in_ifinfo *)ifp->if_afdata[AF_INET]; 554 IN_MULTI_LIST_LOCK(); 555 inm = inm_lookup(ifp, *group); 556 if (inm != NULL) { 557 /* 558 * If we already joined this group, just bump the 559 * refcount and return it. 560 */ 561 KASSERT(inm->inm_refcount >= 1, 562 ("%s: bad refcount %d", __func__, inm->inm_refcount)); 563 inm_acquire_locked(inm); 564 *pinm = inm; 565 } 566 IN_MULTI_LIST_UNLOCK(); 567 if (inm != NULL) 568 return (0); 569 570 memset(&gsin, 0, sizeof(gsin)); 571 gsin.sin_family = AF_INET; 572 gsin.sin_len = sizeof(struct sockaddr_in); 573 gsin.sin_addr = *group; 574 575 /* 576 * Check if a link-layer group is already associated 577 * with this network-layer group on the given ifnet. 578 */ 579 error = if_addmulti(ifp, (struct sockaddr *)&gsin, &ifma); 580 if (error != 0) 581 return (error); 582 583 /* XXX ifma_protospec must be covered by IF_ADDR_LOCK */ 584 IN_MULTI_LIST_LOCK(); 585 IF_ADDR_WLOCK(ifp); 586 587 /* 588 * If something other than netinet is occupying the link-layer 589 * group, print a meaningful error message and back out of 590 * the allocation. 591 * Otherwise, bump the refcount on the existing network-layer 592 * group association and return it. 593 */ 594 if (ifma->ifma_protospec != NULL) { 595 inm = (struct in_multi *)ifma->ifma_protospec; 596 #ifdef INVARIANTS 597 KASSERT(ifma->ifma_addr != NULL, ("%s: no ifma_addr", 598 __func__)); 599 KASSERT(ifma->ifma_addr->sa_family == AF_INET, 600 ("%s: ifma not AF_INET", __func__)); 601 KASSERT(inm != NULL, ("%s: no ifma_protospec", __func__)); 602 if (inm->inm_ifma != ifma || inm->inm_ifp != ifp || 603 !in_hosteq(inm->inm_addr, *group)) { 604 char addrbuf[INET_ADDRSTRLEN]; 605 606 panic("%s: ifma %p is inconsistent with %p (%s)", 607 __func__, ifma, inm, inet_ntoa_r(*group, addrbuf)); 608 } 609 #endif 610 inm_acquire_locked(inm); 611 *pinm = inm; 612 goto out_locked; 613 } 614 615 IF_ADDR_WLOCK_ASSERT(ifp); 616 617 /* 618 * A new in_multi record is needed; allocate and initialize it. 619 * We DO NOT perform an IGMP join as the in_ layer may need to 620 * push an initial source list down to IGMP to support SSM. 621 * 622 * The initial source filter state is INCLUDE, {} as per the RFC. 623 */ 624 inm = malloc(sizeof(*inm), M_IPMADDR, M_NOWAIT | M_ZERO); 625 if (inm == NULL) { 626 IF_ADDR_WUNLOCK(ifp); 627 IN_MULTI_LIST_UNLOCK(); 628 if_delmulti_ifma(ifma); 629 return (ENOMEM); 630 } 631 inm->inm_addr = *group; 632 inm->inm_ifp = ifp; 633 inm->inm_igi = ii->ii_igmp; 634 inm->inm_ifma = ifma; 635 inm->inm_refcount = 1; 636 inm->inm_state = IGMP_NOT_MEMBER; 637 mbufq_init(&inm->inm_scq, IGMP_MAX_STATE_CHANGES); 638 inm->inm_st[0].iss_fmode = MCAST_UNDEFINED; 639 inm->inm_st[1].iss_fmode = MCAST_UNDEFINED; 640 RB_INIT(&inm->inm_srcs); 641 642 ifma->ifma_protospec = inm; 643 644 *pinm = inm; 645 out_locked: 646 IF_ADDR_WUNLOCK(ifp); 647 IN_MULTI_LIST_UNLOCK(); 648 return (0); 649 } 650 651 /* 652 * Drop a reference to an in_multi record. 653 * 654 * If the refcount drops to 0, free the in_multi record and 655 * delete the underlying link-layer membership. 656 */ 657 static void 658 inm_release(struct in_multi *inm) 659 { 660 struct ifmultiaddr *ifma; 661 struct ifnet *ifp; 662 663 CTR2(KTR_IGMPV3, "%s: refcount is %d", __func__, inm->inm_refcount); 664 MPASS(inm->inm_refcount == 0); 665 CTR2(KTR_IGMPV3, "%s: freeing inm %p", __func__, inm); 666 667 ifma = inm->inm_ifma; 668 ifp = inm->inm_ifp; 669 670 /* XXX this access is not covered by IF_ADDR_LOCK */ 671 CTR2(KTR_IGMPV3, "%s: purging ifma %p", __func__, ifma); 672 if (ifp) 673 CURVNET_SET(ifp->if_vnet); 674 inm_purge(inm); 675 free(inm, M_IPMADDR); 676 677 if_delmulti_ifma_flags(ifma, 1); 678 if (ifp) { 679 CURVNET_RESTORE(); 680 if_rele(ifp); 681 } 682 } 683 684 /* 685 * Clear recorded source entries for a group. 686 * Used by the IGMP code. Caller must hold the IN_MULTI lock. 687 * FIXME: Should reap. 688 */ 689 void 690 inm_clear_recorded(struct in_multi *inm) 691 { 692 struct ip_msource *ims; 693 694 IN_MULTI_LIST_LOCK_ASSERT(); 695 696 RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) { 697 if (ims->ims_stp) { 698 ims->ims_stp = 0; 699 --inm->inm_st[1].iss_rec; 700 } 701 } 702 KASSERT(inm->inm_st[1].iss_rec == 0, 703 ("%s: iss_rec %d not 0", __func__, inm->inm_st[1].iss_rec)); 704 } 705 706 /* 707 * Record a source as pending for a Source-Group IGMPv3 query. 708 * This lives here as it modifies the shared tree. 709 * 710 * inm is the group descriptor. 711 * naddr is the address of the source to record in network-byte order. 712 * 713 * If the net.inet.igmp.sgalloc sysctl is non-zero, we will 714 * lazy-allocate a source node in response to an SG query. 715 * Otherwise, no allocation is performed. This saves some memory 716 * with the trade-off that the source will not be reported to the 717 * router if joined in the window between the query response and 718 * the group actually being joined on the local host. 719 * 720 * VIMAGE: XXX: Currently the igmp_sgalloc feature has been removed. 721 * This turns off the allocation of a recorded source entry if 722 * the group has not been joined. 723 * 724 * Return 0 if the source didn't exist or was already marked as recorded. 725 * Return 1 if the source was marked as recorded by this function. 726 * Return <0 if any error occurred (negated errno code). 727 */ 728 int 729 inm_record_source(struct in_multi *inm, const in_addr_t naddr) 730 { 731 struct ip_msource find; 732 struct ip_msource *ims, *nims; 733 734 IN_MULTI_LIST_LOCK_ASSERT(); 735 736 find.ims_haddr = ntohl(naddr); 737 ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find); 738 if (ims && ims->ims_stp) 739 return (0); 740 if (ims == NULL) { 741 if (inm->inm_nsrc == in_mcast_maxgrpsrc) 742 return (-ENOSPC); 743 nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE, 744 M_NOWAIT | M_ZERO); 745 if (nims == NULL) 746 return (-ENOMEM); 747 nims->ims_haddr = find.ims_haddr; 748 RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims); 749 ++inm->inm_nsrc; 750 ims = nims; 751 } 752 753 /* 754 * Mark the source as recorded and update the recorded 755 * source count. 756 */ 757 ++ims->ims_stp; 758 ++inm->inm_st[1].iss_rec; 759 760 return (1); 761 } 762 763 /* 764 * Return a pointer to an in_msource owned by an in_mfilter, 765 * given its source address. 766 * Lazy-allocate if needed. If this is a new entry its filter state is 767 * undefined at t0. 768 * 769 * imf is the filter set being modified. 770 * haddr is the source address in *host* byte-order. 771 * 772 * SMPng: May be called with locks held; malloc must not block. 773 */ 774 static int 775 imf_get_source(struct in_mfilter *imf, const struct sockaddr_in *psin, 776 struct in_msource **plims) 777 { 778 struct ip_msource find; 779 struct ip_msource *ims, *nims; 780 struct in_msource *lims; 781 int error; 782 783 error = 0; 784 ims = NULL; 785 lims = NULL; 786 787 /* key is host byte order */ 788 find.ims_haddr = ntohl(psin->sin_addr.s_addr); 789 ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find); 790 lims = (struct in_msource *)ims; 791 if (lims == NULL) { 792 if (imf->imf_nsrc == in_mcast_maxsocksrc) 793 return (ENOSPC); 794 nims = malloc(sizeof(struct in_msource), M_INMFILTER, 795 M_NOWAIT | M_ZERO); 796 if (nims == NULL) 797 return (ENOMEM); 798 lims = (struct in_msource *)nims; 799 lims->ims_haddr = find.ims_haddr; 800 lims->imsl_st[0] = MCAST_UNDEFINED; 801 RB_INSERT(ip_msource_tree, &imf->imf_sources, nims); 802 ++imf->imf_nsrc; 803 } 804 805 *plims = lims; 806 807 return (error); 808 } 809 810 /* 811 * Graft a source entry into an existing socket-layer filter set, 812 * maintaining any required invariants and checking allocations. 813 * 814 * The source is marked as being in the new filter mode at t1. 815 * 816 * Return the pointer to the new node, otherwise return NULL. 817 */ 818 static struct in_msource * 819 imf_graft(struct in_mfilter *imf, const uint8_t st1, 820 const struct sockaddr_in *psin) 821 { 822 struct ip_msource *nims; 823 struct in_msource *lims; 824 825 nims = malloc(sizeof(struct in_msource), M_INMFILTER, 826 M_NOWAIT | M_ZERO); 827 if (nims == NULL) 828 return (NULL); 829 lims = (struct in_msource *)nims; 830 lims->ims_haddr = ntohl(psin->sin_addr.s_addr); 831 lims->imsl_st[0] = MCAST_UNDEFINED; 832 lims->imsl_st[1] = st1; 833 RB_INSERT(ip_msource_tree, &imf->imf_sources, nims); 834 ++imf->imf_nsrc; 835 836 return (lims); 837 } 838 839 /* 840 * Prune a source entry from an existing socket-layer filter set, 841 * maintaining any required invariants and checking allocations. 842 * 843 * The source is marked as being left at t1, it is not freed. 844 * 845 * Return 0 if no error occurred, otherwise return an errno value. 846 */ 847 static int 848 imf_prune(struct in_mfilter *imf, const struct sockaddr_in *psin) 849 { 850 struct ip_msource find; 851 struct ip_msource *ims; 852 struct in_msource *lims; 853 854 /* key is host byte order */ 855 find.ims_haddr = ntohl(psin->sin_addr.s_addr); 856 ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find); 857 if (ims == NULL) 858 return (ENOENT); 859 lims = (struct in_msource *)ims; 860 lims->imsl_st[1] = MCAST_UNDEFINED; 861 return (0); 862 } 863 864 /* 865 * Revert socket-layer filter set deltas at t1 to t0 state. 866 */ 867 static void 868 imf_rollback(struct in_mfilter *imf) 869 { 870 struct ip_msource *ims, *tims; 871 struct in_msource *lims; 872 873 RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) { 874 lims = (struct in_msource *)ims; 875 if (lims->imsl_st[0] == lims->imsl_st[1]) { 876 /* no change at t1 */ 877 continue; 878 } else if (lims->imsl_st[0] != MCAST_UNDEFINED) { 879 /* revert change to existing source at t1 */ 880 lims->imsl_st[1] = lims->imsl_st[0]; 881 } else { 882 /* revert source added t1 */ 883 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 884 RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims); 885 free(ims, M_INMFILTER); 886 imf->imf_nsrc--; 887 } 888 } 889 imf->imf_st[1] = imf->imf_st[0]; 890 } 891 892 /* 893 * Mark socket-layer filter set as INCLUDE {} at t1. 894 */ 895 static void 896 imf_leave(struct in_mfilter *imf) 897 { 898 struct ip_msource *ims; 899 struct in_msource *lims; 900 901 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 902 lims = (struct in_msource *)ims; 903 lims->imsl_st[1] = MCAST_UNDEFINED; 904 } 905 imf->imf_st[1] = MCAST_INCLUDE; 906 } 907 908 /* 909 * Mark socket-layer filter set deltas as committed. 910 */ 911 static void 912 imf_commit(struct in_mfilter *imf) 913 { 914 struct ip_msource *ims; 915 struct in_msource *lims; 916 917 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 918 lims = (struct in_msource *)ims; 919 lims->imsl_st[0] = lims->imsl_st[1]; 920 } 921 imf->imf_st[0] = imf->imf_st[1]; 922 } 923 924 /* 925 * Reap unreferenced sources from socket-layer filter set. 926 */ 927 static void 928 imf_reap(struct in_mfilter *imf) 929 { 930 struct ip_msource *ims, *tims; 931 struct in_msource *lims; 932 933 RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) { 934 lims = (struct in_msource *)ims; 935 if ((lims->imsl_st[0] == MCAST_UNDEFINED) && 936 (lims->imsl_st[1] == MCAST_UNDEFINED)) { 937 CTR2(KTR_IGMPV3, "%s: free lims %p", __func__, ims); 938 RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims); 939 free(ims, M_INMFILTER); 940 imf->imf_nsrc--; 941 } 942 } 943 } 944 945 /* 946 * Purge socket-layer filter set. 947 */ 948 static void 949 imf_purge(struct in_mfilter *imf) 950 { 951 struct ip_msource *ims, *tims; 952 953 RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) { 954 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 955 RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims); 956 free(ims, M_INMFILTER); 957 imf->imf_nsrc--; 958 } 959 imf->imf_st[0] = imf->imf_st[1] = MCAST_UNDEFINED; 960 KASSERT(RB_EMPTY(&imf->imf_sources), 961 ("%s: imf_sources not empty", __func__)); 962 } 963 964 /* 965 * Look up a source filter entry for a multicast group. 966 * 967 * inm is the group descriptor to work with. 968 * haddr is the host-byte-order IPv4 address to look up. 969 * noalloc may be non-zero to suppress allocation of sources. 970 * *pims will be set to the address of the retrieved or allocated source. 971 * 972 * SMPng: NOTE: may be called with locks held. 973 * Return 0 if successful, otherwise return a non-zero error code. 974 */ 975 static int 976 inm_get_source(struct in_multi *inm, const in_addr_t haddr, 977 const int noalloc, struct ip_msource **pims) 978 { 979 struct ip_msource find; 980 struct ip_msource *ims, *nims; 981 982 find.ims_haddr = haddr; 983 ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find); 984 if (ims == NULL && !noalloc) { 985 if (inm->inm_nsrc == in_mcast_maxgrpsrc) 986 return (ENOSPC); 987 nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE, 988 M_NOWAIT | M_ZERO); 989 if (nims == NULL) 990 return (ENOMEM); 991 nims->ims_haddr = haddr; 992 RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims); 993 ++inm->inm_nsrc; 994 ims = nims; 995 #ifdef KTR 996 CTR3(KTR_IGMPV3, "%s: allocated 0x%08x as %p", __func__, 997 haddr, ims); 998 #endif 999 } 1000 1001 *pims = ims; 1002 return (0); 1003 } 1004 1005 /* 1006 * Merge socket-layer source into IGMP-layer source. 1007 * If rollback is non-zero, perform the inverse of the merge. 1008 */ 1009 static void 1010 ims_merge(struct ip_msource *ims, const struct in_msource *lims, 1011 const int rollback) 1012 { 1013 int n = rollback ? -1 : 1; 1014 1015 if (lims->imsl_st[0] == MCAST_EXCLUDE) { 1016 CTR3(KTR_IGMPV3, "%s: t1 ex -= %d on 0x%08x", 1017 __func__, n, ims->ims_haddr); 1018 ims->ims_st[1].ex -= n; 1019 } else if (lims->imsl_st[0] == MCAST_INCLUDE) { 1020 CTR3(KTR_IGMPV3, "%s: t1 in -= %d on 0x%08x", 1021 __func__, n, ims->ims_haddr); 1022 ims->ims_st[1].in -= n; 1023 } 1024 1025 if (lims->imsl_st[1] == MCAST_EXCLUDE) { 1026 CTR3(KTR_IGMPV3, "%s: t1 ex += %d on 0x%08x", 1027 __func__, n, ims->ims_haddr); 1028 ims->ims_st[1].ex += n; 1029 } else if (lims->imsl_st[1] == MCAST_INCLUDE) { 1030 CTR3(KTR_IGMPV3, "%s: t1 in += %d on 0x%08x", 1031 __func__, n, ims->ims_haddr); 1032 ims->ims_st[1].in += n; 1033 } 1034 } 1035 1036 /* 1037 * Atomically update the global in_multi state, when a membership's 1038 * filter list is being updated in any way. 1039 * 1040 * imf is the per-inpcb-membership group filter pointer. 1041 * A fake imf may be passed for in-kernel consumers. 1042 * 1043 * XXX This is a candidate for a set-symmetric-difference style loop 1044 * which would eliminate the repeated lookup from root of ims nodes, 1045 * as they share the same key space. 1046 * 1047 * If any error occurred this function will back out of refcounts 1048 * and return a non-zero value. 1049 */ 1050 static int 1051 inm_merge(struct in_multi *inm, /*const*/ struct in_mfilter *imf) 1052 { 1053 struct ip_msource *ims, *nims; 1054 struct in_msource *lims; 1055 int schanged, error; 1056 int nsrc0, nsrc1; 1057 1058 schanged = 0; 1059 error = 0; 1060 nsrc1 = nsrc0 = 0; 1061 IN_MULTI_LIST_LOCK_ASSERT(); 1062 1063 /* 1064 * Update the source filters first, as this may fail. 1065 * Maintain count of in-mode filters at t0, t1. These are 1066 * used to work out if we transition into ASM mode or not. 1067 * Maintain a count of source filters whose state was 1068 * actually modified by this operation. 1069 */ 1070 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 1071 lims = (struct in_msource *)ims; 1072 if (lims->imsl_st[0] == imf->imf_st[0]) nsrc0++; 1073 if (lims->imsl_st[1] == imf->imf_st[1]) nsrc1++; 1074 if (lims->imsl_st[0] == lims->imsl_st[1]) continue; 1075 error = inm_get_source(inm, lims->ims_haddr, 0, &nims); 1076 ++schanged; 1077 if (error) 1078 break; 1079 ims_merge(nims, lims, 0); 1080 } 1081 if (error) { 1082 struct ip_msource *bims; 1083 1084 RB_FOREACH_REVERSE_FROM(ims, ip_msource_tree, nims) { 1085 lims = (struct in_msource *)ims; 1086 if (lims->imsl_st[0] == lims->imsl_st[1]) 1087 continue; 1088 (void)inm_get_source(inm, lims->ims_haddr, 1, &bims); 1089 if (bims == NULL) 1090 continue; 1091 ims_merge(bims, lims, 1); 1092 } 1093 goto out_reap; 1094 } 1095 1096 CTR3(KTR_IGMPV3, "%s: imf filters in-mode: %d at t0, %d at t1", 1097 __func__, nsrc0, nsrc1); 1098 1099 /* Handle transition between INCLUDE {n} and INCLUDE {} on socket. */ 1100 if (imf->imf_st[0] == imf->imf_st[1] && 1101 imf->imf_st[1] == MCAST_INCLUDE) { 1102 if (nsrc1 == 0) { 1103 CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__); 1104 --inm->inm_st[1].iss_in; 1105 } 1106 } 1107 1108 /* Handle filter mode transition on socket. */ 1109 if (imf->imf_st[0] != imf->imf_st[1]) { 1110 CTR3(KTR_IGMPV3, "%s: imf transition %d to %d", 1111 __func__, imf->imf_st[0], imf->imf_st[1]); 1112 1113 if (imf->imf_st[0] == MCAST_EXCLUDE) { 1114 CTR1(KTR_IGMPV3, "%s: --ex on inm at t1", __func__); 1115 --inm->inm_st[1].iss_ex; 1116 } else if (imf->imf_st[0] == MCAST_INCLUDE) { 1117 CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__); 1118 --inm->inm_st[1].iss_in; 1119 } 1120 1121 if (imf->imf_st[1] == MCAST_EXCLUDE) { 1122 CTR1(KTR_IGMPV3, "%s: ex++ on inm at t1", __func__); 1123 inm->inm_st[1].iss_ex++; 1124 } else if (imf->imf_st[1] == MCAST_INCLUDE && nsrc1 > 0) { 1125 CTR1(KTR_IGMPV3, "%s: in++ on inm at t1", __func__); 1126 inm->inm_st[1].iss_in++; 1127 } 1128 } 1129 1130 /* 1131 * Track inm filter state in terms of listener counts. 1132 * If there are any exclusive listeners, stack-wide 1133 * membership is exclusive. 1134 * Otherwise, if only inclusive listeners, stack-wide is inclusive. 1135 * If no listeners remain, state is undefined at t1, 1136 * and the IGMP lifecycle for this group should finish. 1137 */ 1138 if (inm->inm_st[1].iss_ex > 0) { 1139 CTR1(KTR_IGMPV3, "%s: transition to EX", __func__); 1140 inm->inm_st[1].iss_fmode = MCAST_EXCLUDE; 1141 } else if (inm->inm_st[1].iss_in > 0) { 1142 CTR1(KTR_IGMPV3, "%s: transition to IN", __func__); 1143 inm->inm_st[1].iss_fmode = MCAST_INCLUDE; 1144 } else { 1145 CTR1(KTR_IGMPV3, "%s: transition to UNDEF", __func__); 1146 inm->inm_st[1].iss_fmode = MCAST_UNDEFINED; 1147 } 1148 1149 /* Decrement ASM listener count on transition out of ASM mode. */ 1150 if (imf->imf_st[0] == MCAST_EXCLUDE && nsrc0 == 0) { 1151 if ((imf->imf_st[1] != MCAST_EXCLUDE) || 1152 (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 > 0)) { 1153 CTR1(KTR_IGMPV3, "%s: --asm on inm at t1", __func__); 1154 --inm->inm_st[1].iss_asm; 1155 } 1156 } 1157 1158 /* Increment ASM listener count on transition to ASM mode. */ 1159 if (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 == 0) { 1160 CTR1(KTR_IGMPV3, "%s: asm++ on inm at t1", __func__); 1161 inm->inm_st[1].iss_asm++; 1162 } 1163 1164 CTR3(KTR_IGMPV3, "%s: merged imf %p to inm %p", __func__, imf, inm); 1165 inm_print(inm); 1166 1167 out_reap: 1168 if (schanged > 0) { 1169 CTR1(KTR_IGMPV3, "%s: sources changed; reaping", __func__); 1170 inm_reap(inm); 1171 } 1172 return (error); 1173 } 1174 1175 /* 1176 * Mark an in_multi's filter set deltas as committed. 1177 * Called by IGMP after a state change has been enqueued. 1178 */ 1179 void 1180 inm_commit(struct in_multi *inm) 1181 { 1182 struct ip_msource *ims; 1183 1184 CTR2(KTR_IGMPV3, "%s: commit inm %p", __func__, inm); 1185 CTR1(KTR_IGMPV3, "%s: pre commit:", __func__); 1186 inm_print(inm); 1187 1188 RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) { 1189 ims->ims_st[0] = ims->ims_st[1]; 1190 } 1191 inm->inm_st[0] = inm->inm_st[1]; 1192 } 1193 1194 /* 1195 * Reap unreferenced nodes from an in_multi's filter set. 1196 */ 1197 static void 1198 inm_reap(struct in_multi *inm) 1199 { 1200 struct ip_msource *ims, *tims; 1201 1202 RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) { 1203 if (ims->ims_st[0].ex > 0 || ims->ims_st[0].in > 0 || 1204 ims->ims_st[1].ex > 0 || ims->ims_st[1].in > 0 || 1205 ims->ims_stp != 0) 1206 continue; 1207 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 1208 RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims); 1209 free(ims, M_IPMSOURCE); 1210 inm->inm_nsrc--; 1211 } 1212 } 1213 1214 /* 1215 * Purge all source nodes from an in_multi's filter set. 1216 */ 1217 static void 1218 inm_purge(struct in_multi *inm) 1219 { 1220 struct ip_msource *ims, *tims; 1221 1222 RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) { 1223 CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims); 1224 RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims); 1225 free(ims, M_IPMSOURCE); 1226 inm->inm_nsrc--; 1227 } 1228 } 1229 1230 /* 1231 * Join a multicast group; unlocked entry point. 1232 * 1233 * SMPng: XXX: in_joingroup() is called from in_control() when Giant 1234 * is not held. Fortunately, ifp is unlikely to have been detached 1235 * at this point, so we assume it's OK to recurse. 1236 */ 1237 int 1238 in_joingroup(struct ifnet *ifp, const struct in_addr *gina, 1239 /*const*/ struct in_mfilter *imf, struct in_multi **pinm) 1240 { 1241 int error; 1242 1243 IN_MULTI_LOCK(); 1244 error = in_joingroup_locked(ifp, gina, imf, pinm); 1245 IN_MULTI_UNLOCK(); 1246 1247 return (error); 1248 } 1249 1250 /* 1251 * Join a multicast group; real entry point. 1252 * 1253 * Only preserves atomicity at inm level. 1254 * NOTE: imf argument cannot be const due to sys/tree.h limitations. 1255 * 1256 * If the IGMP downcall fails, the group is not joined, and an error 1257 * code is returned. 1258 */ 1259 int 1260 in_joingroup_locked(struct ifnet *ifp, const struct in_addr *gina, 1261 /*const*/ struct in_mfilter *imf, struct in_multi **pinm) 1262 { 1263 struct in_mfilter timf; 1264 struct in_multi *inm; 1265 int error; 1266 1267 IN_MULTI_LOCK_ASSERT(); 1268 IN_MULTI_LIST_UNLOCK_ASSERT(); 1269 1270 CTR4(KTR_IGMPV3, "%s: join 0x%08x on %p(%s))", __func__, 1271 ntohl(gina->s_addr), ifp, ifp->if_xname); 1272 1273 error = 0; 1274 inm = NULL; 1275 1276 /* 1277 * If no imf was specified (i.e. kernel consumer), 1278 * fake one up and assume it is an ASM join. 1279 */ 1280 if (imf == NULL) { 1281 imf_init(&timf, MCAST_UNDEFINED, MCAST_EXCLUDE); 1282 imf = &timf; 1283 } 1284 1285 error = in_getmulti(ifp, gina, &inm); 1286 if (error) { 1287 CTR1(KTR_IGMPV3, "%s: in_getmulti() failure", __func__); 1288 return (error); 1289 } 1290 IN_MULTI_LIST_LOCK(); 1291 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 1292 error = inm_merge(inm, imf); 1293 if (error) { 1294 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__); 1295 goto out_inm_release; 1296 } 1297 1298 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 1299 error = igmp_change_state(inm); 1300 if (error) { 1301 CTR1(KTR_IGMPV3, "%s: failed to update source", __func__); 1302 goto out_inm_release; 1303 } 1304 1305 out_inm_release: 1306 if (error) { 1307 1308 CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm); 1309 inm_release_deferred(inm); 1310 } else { 1311 *pinm = inm; 1312 } 1313 IN_MULTI_LIST_UNLOCK(); 1314 1315 return (error); 1316 } 1317 1318 /* 1319 * Leave a multicast group; unlocked entry point. 1320 */ 1321 int 1322 in_leavegroup(struct in_multi *inm, /*const*/ struct in_mfilter *imf) 1323 { 1324 int error; 1325 1326 IN_MULTI_LOCK(); 1327 error = in_leavegroup_locked(inm, imf); 1328 IN_MULTI_UNLOCK(); 1329 1330 return (error); 1331 } 1332 1333 /* 1334 * Leave a multicast group; real entry point. 1335 * All source filters will be expunged. 1336 * 1337 * Only preserves atomicity at inm level. 1338 * 1339 * Holding the write lock for the INP which contains imf 1340 * is highly advisable. We can't assert for it as imf does not 1341 * contain a back-pointer to the owning inp. 1342 * 1343 * Note: This is not the same as inm_release(*) as this function also 1344 * makes a state change downcall into IGMP. 1345 */ 1346 int 1347 in_leavegroup_locked(struct in_multi *inm, /*const*/ struct in_mfilter *imf) 1348 { 1349 struct in_mfilter timf; 1350 int error; 1351 1352 error = 0; 1353 1354 IN_MULTI_LOCK_ASSERT(); 1355 IN_MULTI_LIST_UNLOCK_ASSERT(); 1356 1357 CTR5(KTR_IGMPV3, "%s: leave inm %p, 0x%08x/%s, imf %p", __func__, 1358 inm, ntohl(inm->inm_addr.s_addr), 1359 (inm_is_ifp_detached(inm) ? "null" : inm->inm_ifp->if_xname), 1360 imf); 1361 1362 /* 1363 * If no imf was specified (i.e. kernel consumer), 1364 * fake one up and assume it is an ASM join. 1365 */ 1366 if (imf == NULL) { 1367 imf_init(&timf, MCAST_EXCLUDE, MCAST_UNDEFINED); 1368 imf = &timf; 1369 } 1370 1371 /* 1372 * Begin state merge transaction at IGMP layer. 1373 * 1374 * As this particular invocation should not cause any memory 1375 * to be allocated, and there is no opportunity to roll back 1376 * the transaction, it MUST NOT fail. 1377 */ 1378 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 1379 IN_MULTI_LIST_LOCK(); 1380 error = inm_merge(inm, imf); 1381 KASSERT(error == 0, ("%s: failed to merge inm state", __func__)); 1382 1383 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 1384 CURVNET_SET(inm->inm_ifp->if_vnet); 1385 error = igmp_change_state(inm); 1386 IF_ADDR_WLOCK(inm->inm_ifp); 1387 inm_release_deferred(inm); 1388 IF_ADDR_WUNLOCK(inm->inm_ifp); 1389 IN_MULTI_LIST_UNLOCK(); 1390 CURVNET_RESTORE(); 1391 if (error) 1392 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__); 1393 1394 CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm); 1395 1396 return (error); 1397 } 1398 1399 /*#ifndef BURN_BRIDGES*/ 1400 /* 1401 * Join an IPv4 multicast group in (*,G) exclusive mode. 1402 * The group must be a 224.0.0.0/24 link-scope group. 1403 * This KPI is for legacy kernel consumers only. 1404 */ 1405 struct in_multi * 1406 in_addmulti(struct in_addr *ap, struct ifnet *ifp) 1407 { 1408 struct in_multi *pinm; 1409 int error; 1410 #ifdef INVARIANTS 1411 char addrbuf[INET_ADDRSTRLEN]; 1412 #endif 1413 1414 KASSERT(IN_LOCAL_GROUP(ntohl(ap->s_addr)), 1415 ("%s: %s not in 224.0.0.0/24", __func__, 1416 inet_ntoa_r(*ap, addrbuf))); 1417 1418 error = in_joingroup(ifp, ap, NULL, &pinm); 1419 if (error != 0) 1420 pinm = NULL; 1421 1422 return (pinm); 1423 } 1424 1425 /* 1426 * Block or unblock an ASM multicast source on an inpcb. 1427 * This implements the delta-based API described in RFC 3678. 1428 * 1429 * The delta-based API applies only to exclusive-mode memberships. 1430 * An IGMP downcall will be performed. 1431 * 1432 * SMPng: NOTE: Must take Giant as a join may create a new ifma. 1433 * 1434 * Return 0 if successful, otherwise return an appropriate error code. 1435 */ 1436 static int 1437 inp_block_unblock_source(struct inpcb *inp, struct sockopt *sopt) 1438 { 1439 struct group_source_req gsr; 1440 sockunion_t *gsa, *ssa; 1441 struct ifnet *ifp; 1442 struct in_mfilter *imf; 1443 struct ip_moptions *imo; 1444 struct in_msource *ims; 1445 struct in_multi *inm; 1446 size_t idx; 1447 uint16_t fmode; 1448 int error, doblock; 1449 1450 ifp = NULL; 1451 error = 0; 1452 doblock = 0; 1453 1454 memset(&gsr, 0, sizeof(struct group_source_req)); 1455 gsa = (sockunion_t *)&gsr.gsr_group; 1456 ssa = (sockunion_t *)&gsr.gsr_source; 1457 1458 switch (sopt->sopt_name) { 1459 case IP_BLOCK_SOURCE: 1460 case IP_UNBLOCK_SOURCE: { 1461 struct ip_mreq_source mreqs; 1462 1463 error = sooptcopyin(sopt, &mreqs, 1464 sizeof(struct ip_mreq_source), 1465 sizeof(struct ip_mreq_source)); 1466 if (error) 1467 return (error); 1468 1469 gsa->sin.sin_family = AF_INET; 1470 gsa->sin.sin_len = sizeof(struct sockaddr_in); 1471 gsa->sin.sin_addr = mreqs.imr_multiaddr; 1472 1473 ssa->sin.sin_family = AF_INET; 1474 ssa->sin.sin_len = sizeof(struct sockaddr_in); 1475 ssa->sin.sin_addr = mreqs.imr_sourceaddr; 1476 1477 if (!in_nullhost(mreqs.imr_interface)) 1478 INADDR_TO_IFP(mreqs.imr_interface, ifp); 1479 1480 if (sopt->sopt_name == IP_BLOCK_SOURCE) 1481 doblock = 1; 1482 1483 CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p", 1484 __func__, ntohl(mreqs.imr_interface.s_addr), ifp); 1485 break; 1486 } 1487 1488 case MCAST_BLOCK_SOURCE: 1489 case MCAST_UNBLOCK_SOURCE: 1490 error = sooptcopyin(sopt, &gsr, 1491 sizeof(struct group_source_req), 1492 sizeof(struct group_source_req)); 1493 if (error) 1494 return (error); 1495 1496 if (gsa->sin.sin_family != AF_INET || 1497 gsa->sin.sin_len != sizeof(struct sockaddr_in)) 1498 return (EINVAL); 1499 1500 if (ssa->sin.sin_family != AF_INET || 1501 ssa->sin.sin_len != sizeof(struct sockaddr_in)) 1502 return (EINVAL); 1503 1504 if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface) 1505 return (EADDRNOTAVAIL); 1506 1507 ifp = ifnet_byindex(gsr.gsr_interface); 1508 1509 if (sopt->sopt_name == MCAST_BLOCK_SOURCE) 1510 doblock = 1; 1511 break; 1512 1513 default: 1514 CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d", 1515 __func__, sopt->sopt_name); 1516 return (EOPNOTSUPP); 1517 break; 1518 } 1519 1520 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 1521 return (EINVAL); 1522 1523 /* 1524 * Check if we are actually a member of this group. 1525 */ 1526 imo = inp_findmoptions(inp); 1527 idx = imo_match_group(imo, ifp, &gsa->sa); 1528 if (idx == -1 || imo->imo_mfilters == NULL) { 1529 error = EADDRNOTAVAIL; 1530 goto out_inp_locked; 1531 } 1532 1533 KASSERT(imo->imo_mfilters != NULL, 1534 ("%s: imo_mfilters not allocated", __func__)); 1535 imf = &imo->imo_mfilters[idx]; 1536 inm = imo->imo_membership[idx]; 1537 1538 /* 1539 * Attempting to use the delta-based API on an 1540 * non exclusive-mode membership is an error. 1541 */ 1542 fmode = imf->imf_st[0]; 1543 if (fmode != MCAST_EXCLUDE) { 1544 error = EINVAL; 1545 goto out_inp_locked; 1546 } 1547 1548 /* 1549 * Deal with error cases up-front: 1550 * Asked to block, but already blocked; or 1551 * Asked to unblock, but nothing to unblock. 1552 * If adding a new block entry, allocate it. 1553 */ 1554 ims = imo_match_source(imo, idx, &ssa->sa); 1555 if ((ims != NULL && doblock) || (ims == NULL && !doblock)) { 1556 CTR3(KTR_IGMPV3, "%s: source 0x%08x %spresent", __func__, 1557 ntohl(ssa->sin.sin_addr.s_addr), doblock ? "" : "not "); 1558 error = EADDRNOTAVAIL; 1559 goto out_inp_locked; 1560 } 1561 1562 INP_WLOCK_ASSERT(inp); 1563 1564 /* 1565 * Begin state merge transaction at socket layer. 1566 */ 1567 if (doblock) { 1568 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block"); 1569 ims = imf_graft(imf, fmode, &ssa->sin); 1570 if (ims == NULL) 1571 error = ENOMEM; 1572 } else { 1573 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow"); 1574 error = imf_prune(imf, &ssa->sin); 1575 } 1576 1577 if (error) { 1578 CTR1(KTR_IGMPV3, "%s: merge imf state failed", __func__); 1579 goto out_imf_rollback; 1580 } 1581 1582 /* 1583 * Begin state merge transaction at IGMP layer. 1584 */ 1585 IN_MULTI_LOCK(); 1586 IN_MULTI_LIST_LOCK(); 1587 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 1588 error = inm_merge(inm, imf); 1589 if (error) { 1590 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__); 1591 goto out_in_multi_locked; 1592 } 1593 1594 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 1595 error = igmp_change_state(inm); 1596 if (error) 1597 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__); 1598 1599 out_in_multi_locked: 1600 1601 IN_MULTI_UNLOCK(); 1602 IN_MULTI_UNLOCK(); 1603 out_imf_rollback: 1604 if (error) 1605 imf_rollback(imf); 1606 else 1607 imf_commit(imf); 1608 1609 imf_reap(imf); 1610 1611 out_inp_locked: 1612 INP_WUNLOCK(inp); 1613 return (error); 1614 } 1615 1616 /* 1617 * Given an inpcb, return its multicast options structure pointer. Accepts 1618 * an unlocked inpcb pointer, but will return it locked. May sleep. 1619 * 1620 * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held. 1621 * SMPng: NOTE: Returns with the INP write lock held. 1622 */ 1623 static struct ip_moptions * 1624 inp_findmoptions(struct inpcb *inp) 1625 { 1626 struct ip_moptions *imo; 1627 struct in_multi **immp; 1628 struct in_mfilter *imfp; 1629 size_t idx; 1630 1631 INP_WLOCK(inp); 1632 if (inp->inp_moptions != NULL) 1633 return (inp->inp_moptions); 1634 1635 INP_WUNLOCK(inp); 1636 1637 imo = malloc(sizeof(*imo), M_IPMOPTS, M_WAITOK); 1638 immp = malloc(sizeof(*immp) * IP_MIN_MEMBERSHIPS, M_IPMOPTS, 1639 M_WAITOK | M_ZERO); 1640 imfp = malloc(sizeof(struct in_mfilter) * IP_MIN_MEMBERSHIPS, 1641 M_INMFILTER, M_WAITOK); 1642 1643 imo->imo_multicast_ifp = NULL; 1644 imo->imo_multicast_addr.s_addr = INADDR_ANY; 1645 imo->imo_multicast_vif = -1; 1646 imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; 1647 imo->imo_multicast_loop = in_mcast_loop; 1648 imo->imo_num_memberships = 0; 1649 imo->imo_max_memberships = IP_MIN_MEMBERSHIPS; 1650 imo->imo_membership = immp; 1651 1652 /* Initialize per-group source filters. */ 1653 for (idx = 0; idx < IP_MIN_MEMBERSHIPS; idx++) 1654 imf_init(&imfp[idx], MCAST_UNDEFINED, MCAST_EXCLUDE); 1655 imo->imo_mfilters = imfp; 1656 1657 INP_WLOCK(inp); 1658 if (inp->inp_moptions != NULL) { 1659 free(imfp, M_INMFILTER); 1660 free(immp, M_IPMOPTS); 1661 free(imo, M_IPMOPTS); 1662 return (inp->inp_moptions); 1663 } 1664 inp->inp_moptions = imo; 1665 return (imo); 1666 } 1667 1668 /* 1669 * Discard the IP multicast options (and source filters). To minimize 1670 * the amount of work done while holding locks such as the INP's 1671 * pcbinfo lock (which is used in the receive path), the free 1672 * operation is performed asynchronously in a separate task. 1673 * 1674 * SMPng: NOTE: assumes INP write lock is held. 1675 */ 1676 void 1677 inp_freemoptions(struct ip_moptions *imo, struct inpcbinfo *pcbinfo) 1678 { 1679 int wlock; 1680 1681 if (imo == NULL) 1682 return; 1683 1684 INP_INFO_LOCK_ASSERT(pcbinfo); 1685 wlock = INP_INFO_WLOCKED(pcbinfo); 1686 if (wlock) 1687 INP_INFO_WUNLOCK(pcbinfo); 1688 else 1689 INP_INFO_RUNLOCK(pcbinfo); 1690 1691 KASSERT(imo != NULL, ("%s: ip_moptions is NULL", __func__)); 1692 IN_MULTI_LIST_LOCK(); 1693 STAILQ_INSERT_TAIL(&imo_gc_list, imo, imo_link); 1694 IN_MULTI_LIST_UNLOCK(); 1695 taskqueue_enqueue(taskqueue_thread, &imo_gc_task); 1696 if (wlock) 1697 INP_INFO_WLOCK(pcbinfo); 1698 else 1699 INP_INFO_RLOCK(pcbinfo); 1700 } 1701 1702 static void 1703 inp_freemoptions_internal(struct ip_moptions *imo) 1704 { 1705 struct in_mfilter *imf; 1706 size_t idx, nmships; 1707 1708 nmships = imo->imo_num_memberships; 1709 for (idx = 0; idx < nmships; ++idx) { 1710 imf = imo->imo_mfilters ? &imo->imo_mfilters[idx] : NULL; 1711 if (imf) 1712 imf_leave(imf); 1713 (void)in_leavegroup(imo->imo_membership[idx], imf); 1714 if (imf) 1715 imf_purge(imf); 1716 } 1717 1718 if (imo->imo_mfilters) 1719 free(imo->imo_mfilters, M_INMFILTER); 1720 free(imo->imo_membership, M_IPMOPTS); 1721 free(imo, M_IPMOPTS); 1722 } 1723 1724 static void 1725 inp_gcmoptions(void *context, int pending) 1726 { 1727 struct ip_moptions *imo; 1728 1729 IN_MULTI_LIST_LOCK(); 1730 while (!STAILQ_EMPTY(&imo_gc_list)) { 1731 imo = STAILQ_FIRST(&imo_gc_list); 1732 STAILQ_REMOVE_HEAD(&imo_gc_list, imo_link); 1733 IN_MULTI_LIST_UNLOCK(); 1734 inp_freemoptions_internal(imo); 1735 IN_MULTI_LIST_LOCK(); 1736 } 1737 IN_MULTI_LIST_UNLOCK(); 1738 } 1739 1740 /* 1741 * Atomically get source filters on a socket for an IPv4 multicast group. 1742 * Called with INP lock held; returns with lock released. 1743 */ 1744 static int 1745 inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt) 1746 { 1747 struct __msfilterreq msfr; 1748 sockunion_t *gsa; 1749 struct ifnet *ifp; 1750 struct ip_moptions *imo; 1751 struct in_mfilter *imf; 1752 struct ip_msource *ims; 1753 struct in_msource *lims; 1754 struct sockaddr_in *psin; 1755 struct sockaddr_storage *ptss; 1756 struct sockaddr_storage *tss; 1757 int error; 1758 size_t idx, nsrcs, ncsrcs; 1759 1760 INP_WLOCK_ASSERT(inp); 1761 1762 imo = inp->inp_moptions; 1763 KASSERT(imo != NULL, ("%s: null ip_moptions", __func__)); 1764 1765 INP_WUNLOCK(inp); 1766 1767 error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq), 1768 sizeof(struct __msfilterreq)); 1769 if (error) 1770 return (error); 1771 1772 if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex) 1773 return (EINVAL); 1774 1775 ifp = ifnet_byindex(msfr.msfr_ifindex); 1776 if (ifp == NULL) 1777 return (EINVAL); 1778 1779 INP_WLOCK(inp); 1780 1781 /* 1782 * Lookup group on the socket. 1783 */ 1784 gsa = (sockunion_t *)&msfr.msfr_group; 1785 idx = imo_match_group(imo, ifp, &gsa->sa); 1786 if (idx == -1 || imo->imo_mfilters == NULL) { 1787 INP_WUNLOCK(inp); 1788 return (EADDRNOTAVAIL); 1789 } 1790 imf = &imo->imo_mfilters[idx]; 1791 1792 /* 1793 * Ignore memberships which are in limbo. 1794 */ 1795 if (imf->imf_st[1] == MCAST_UNDEFINED) { 1796 INP_WUNLOCK(inp); 1797 return (EAGAIN); 1798 } 1799 msfr.msfr_fmode = imf->imf_st[1]; 1800 1801 /* 1802 * If the user specified a buffer, copy out the source filter 1803 * entries to userland gracefully. 1804 * We only copy out the number of entries which userland 1805 * has asked for, but we always tell userland how big the 1806 * buffer really needs to be. 1807 */ 1808 if (msfr.msfr_nsrcs > in_mcast_maxsocksrc) 1809 msfr.msfr_nsrcs = in_mcast_maxsocksrc; 1810 tss = NULL; 1811 if (msfr.msfr_srcs != NULL && msfr.msfr_nsrcs > 0) { 1812 tss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs, 1813 M_TEMP, M_NOWAIT | M_ZERO); 1814 if (tss == NULL) { 1815 INP_WUNLOCK(inp); 1816 return (ENOBUFS); 1817 } 1818 } 1819 1820 /* 1821 * Count number of sources in-mode at t0. 1822 * If buffer space exists and remains, copy out source entries. 1823 */ 1824 nsrcs = msfr.msfr_nsrcs; 1825 ncsrcs = 0; 1826 ptss = tss; 1827 RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) { 1828 lims = (struct in_msource *)ims; 1829 if (lims->imsl_st[0] == MCAST_UNDEFINED || 1830 lims->imsl_st[0] != imf->imf_st[0]) 1831 continue; 1832 ++ncsrcs; 1833 if (tss != NULL && nsrcs > 0) { 1834 psin = (struct sockaddr_in *)ptss; 1835 psin->sin_family = AF_INET; 1836 psin->sin_len = sizeof(struct sockaddr_in); 1837 psin->sin_addr.s_addr = htonl(lims->ims_haddr); 1838 psin->sin_port = 0; 1839 ++ptss; 1840 --nsrcs; 1841 } 1842 } 1843 1844 INP_WUNLOCK(inp); 1845 1846 if (tss != NULL) { 1847 error = copyout(tss, msfr.msfr_srcs, 1848 sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs); 1849 free(tss, M_TEMP); 1850 if (error) 1851 return (error); 1852 } 1853 1854 msfr.msfr_nsrcs = ncsrcs; 1855 error = sooptcopyout(sopt, &msfr, sizeof(struct __msfilterreq)); 1856 1857 return (error); 1858 } 1859 1860 /* 1861 * Return the IP multicast options in response to user getsockopt(). 1862 */ 1863 int 1864 inp_getmoptions(struct inpcb *inp, struct sockopt *sopt) 1865 { 1866 struct rm_priotracker in_ifa_tracker; 1867 struct ip_mreqn mreqn; 1868 struct ip_moptions *imo; 1869 struct ifnet *ifp; 1870 struct in_ifaddr *ia; 1871 int error, optval; 1872 u_char coptval; 1873 1874 INP_WLOCK(inp); 1875 imo = inp->inp_moptions; 1876 /* 1877 * If socket is neither of type SOCK_RAW or SOCK_DGRAM, 1878 * or is a divert socket, reject it. 1879 */ 1880 if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT || 1881 (inp->inp_socket->so_proto->pr_type != SOCK_RAW && 1882 inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) { 1883 INP_WUNLOCK(inp); 1884 return (EOPNOTSUPP); 1885 } 1886 1887 error = 0; 1888 switch (sopt->sopt_name) { 1889 case IP_MULTICAST_VIF: 1890 if (imo != NULL) 1891 optval = imo->imo_multicast_vif; 1892 else 1893 optval = -1; 1894 INP_WUNLOCK(inp); 1895 error = sooptcopyout(sopt, &optval, sizeof(int)); 1896 break; 1897 1898 case IP_MULTICAST_IF: 1899 memset(&mreqn, 0, sizeof(struct ip_mreqn)); 1900 if (imo != NULL) { 1901 ifp = imo->imo_multicast_ifp; 1902 if (!in_nullhost(imo->imo_multicast_addr)) { 1903 mreqn.imr_address = imo->imo_multicast_addr; 1904 } else if (ifp != NULL) { 1905 mreqn.imr_ifindex = ifp->if_index; 1906 IFP_TO_IA(ifp, ia, &in_ifa_tracker); 1907 if (ia != NULL) { 1908 mreqn.imr_address = 1909 IA_SIN(ia)->sin_addr; 1910 ifa_free(&ia->ia_ifa); 1911 } 1912 } 1913 } 1914 INP_WUNLOCK(inp); 1915 if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) { 1916 error = sooptcopyout(sopt, &mreqn, 1917 sizeof(struct ip_mreqn)); 1918 } else { 1919 error = sooptcopyout(sopt, &mreqn.imr_address, 1920 sizeof(struct in_addr)); 1921 } 1922 break; 1923 1924 case IP_MULTICAST_TTL: 1925 if (imo == NULL) 1926 optval = coptval = IP_DEFAULT_MULTICAST_TTL; 1927 else 1928 optval = coptval = imo->imo_multicast_ttl; 1929 INP_WUNLOCK(inp); 1930 if (sopt->sopt_valsize == sizeof(u_char)) 1931 error = sooptcopyout(sopt, &coptval, sizeof(u_char)); 1932 else 1933 error = sooptcopyout(sopt, &optval, sizeof(int)); 1934 break; 1935 1936 case IP_MULTICAST_LOOP: 1937 if (imo == NULL) 1938 optval = coptval = IP_DEFAULT_MULTICAST_LOOP; 1939 else 1940 optval = coptval = imo->imo_multicast_loop; 1941 INP_WUNLOCK(inp); 1942 if (sopt->sopt_valsize == sizeof(u_char)) 1943 error = sooptcopyout(sopt, &coptval, sizeof(u_char)); 1944 else 1945 error = sooptcopyout(sopt, &optval, sizeof(int)); 1946 break; 1947 1948 case IP_MSFILTER: 1949 if (imo == NULL) { 1950 error = EADDRNOTAVAIL; 1951 INP_WUNLOCK(inp); 1952 } else { 1953 error = inp_get_source_filters(inp, sopt); 1954 } 1955 break; 1956 1957 default: 1958 INP_WUNLOCK(inp); 1959 error = ENOPROTOOPT; 1960 break; 1961 } 1962 1963 INP_UNLOCK_ASSERT(inp); 1964 1965 return (error); 1966 } 1967 1968 /* 1969 * Look up the ifnet to use for a multicast group membership, 1970 * given the IPv4 address of an interface, and the IPv4 group address. 1971 * 1972 * This routine exists to support legacy multicast applications 1973 * which do not understand that multicast memberships are scoped to 1974 * specific physical links in the networking stack, or which need 1975 * to join link-scope groups before IPv4 addresses are configured. 1976 * 1977 * If inp is non-NULL, use this socket's current FIB number for any 1978 * required FIB lookup. 1979 * If ina is INADDR_ANY, look up the group address in the unicast FIB, 1980 * and use its ifp; usually, this points to the default next-hop. 1981 * 1982 * If the FIB lookup fails, attempt to use the first non-loopback 1983 * interface with multicast capability in the system as a 1984 * last resort. The legacy IPv4 ASM API requires that we do 1985 * this in order to allow groups to be joined when the routing 1986 * table has not yet been populated during boot. 1987 * 1988 * Returns NULL if no ifp could be found. 1989 * 1990 * SMPng: TODO: Acquire the appropriate locks for INADDR_TO_IFP. 1991 * FUTURE: Implement IPv4 source-address selection. 1992 */ 1993 static struct ifnet * 1994 inp_lookup_mcast_ifp(const struct inpcb *inp, 1995 const struct sockaddr_in *gsin, const struct in_addr ina) 1996 { 1997 struct rm_priotracker in_ifa_tracker; 1998 struct ifnet *ifp; 1999 struct nhop4_basic nh4; 2000 uint32_t fibnum; 2001 2002 KASSERT(gsin->sin_family == AF_INET, ("%s: not AF_INET", __func__)); 2003 KASSERT(IN_MULTICAST(ntohl(gsin->sin_addr.s_addr)), 2004 ("%s: not multicast", __func__)); 2005 2006 ifp = NULL; 2007 if (!in_nullhost(ina)) { 2008 INADDR_TO_IFP(ina, ifp); 2009 } else { 2010 fibnum = inp ? inp->inp_inc.inc_fibnum : 0; 2011 if (fib4_lookup_nh_basic(fibnum, gsin->sin_addr, 0, 0, &nh4)==0) 2012 ifp = nh4.nh_ifp; 2013 else { 2014 struct in_ifaddr *ia; 2015 struct ifnet *mifp; 2016 2017 mifp = NULL; 2018 IN_IFADDR_RLOCK(&in_ifa_tracker); 2019 TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) { 2020 mifp = ia->ia_ifp; 2021 if (!(mifp->if_flags & IFF_LOOPBACK) && 2022 (mifp->if_flags & IFF_MULTICAST)) { 2023 ifp = mifp; 2024 break; 2025 } 2026 } 2027 IN_IFADDR_RUNLOCK(&in_ifa_tracker); 2028 } 2029 } 2030 2031 return (ifp); 2032 } 2033 2034 /* 2035 * Join an IPv4 multicast group, possibly with a source. 2036 */ 2037 static int 2038 inp_join_group(struct inpcb *inp, struct sockopt *sopt) 2039 { 2040 struct group_source_req gsr; 2041 sockunion_t *gsa, *ssa; 2042 struct ifnet *ifp; 2043 struct in_mfilter *imf; 2044 struct ip_moptions *imo; 2045 struct in_multi *inm; 2046 struct in_msource *lims; 2047 size_t idx; 2048 int error, is_new; 2049 2050 ifp = NULL; 2051 imf = NULL; 2052 lims = NULL; 2053 error = 0; 2054 is_new = 0; 2055 2056 memset(&gsr, 0, sizeof(struct group_source_req)); 2057 gsa = (sockunion_t *)&gsr.gsr_group; 2058 gsa->ss.ss_family = AF_UNSPEC; 2059 ssa = (sockunion_t *)&gsr.gsr_source; 2060 ssa->ss.ss_family = AF_UNSPEC; 2061 2062 switch (sopt->sopt_name) { 2063 case IP_ADD_MEMBERSHIP: 2064 case IP_ADD_SOURCE_MEMBERSHIP: { 2065 struct ip_mreq_source mreqs; 2066 2067 if (sopt->sopt_name == IP_ADD_MEMBERSHIP) { 2068 error = sooptcopyin(sopt, &mreqs, 2069 sizeof(struct ip_mreq), 2070 sizeof(struct ip_mreq)); 2071 /* 2072 * Do argument switcharoo from ip_mreq into 2073 * ip_mreq_source to avoid using two instances. 2074 */ 2075 mreqs.imr_interface = mreqs.imr_sourceaddr; 2076 mreqs.imr_sourceaddr.s_addr = INADDR_ANY; 2077 } else if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) { 2078 error = sooptcopyin(sopt, &mreqs, 2079 sizeof(struct ip_mreq_source), 2080 sizeof(struct ip_mreq_source)); 2081 } 2082 if (error) 2083 return (error); 2084 2085 gsa->sin.sin_family = AF_INET; 2086 gsa->sin.sin_len = sizeof(struct sockaddr_in); 2087 gsa->sin.sin_addr = mreqs.imr_multiaddr; 2088 2089 if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) { 2090 ssa->sin.sin_family = AF_INET; 2091 ssa->sin.sin_len = sizeof(struct sockaddr_in); 2092 ssa->sin.sin_addr = mreqs.imr_sourceaddr; 2093 } 2094 2095 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2096 return (EINVAL); 2097 2098 ifp = inp_lookup_mcast_ifp(inp, &gsa->sin, 2099 mreqs.imr_interface); 2100 CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p", 2101 __func__, ntohl(mreqs.imr_interface.s_addr), ifp); 2102 break; 2103 } 2104 2105 case MCAST_JOIN_GROUP: 2106 case MCAST_JOIN_SOURCE_GROUP: 2107 if (sopt->sopt_name == MCAST_JOIN_GROUP) { 2108 error = sooptcopyin(sopt, &gsr, 2109 sizeof(struct group_req), 2110 sizeof(struct group_req)); 2111 } else if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) { 2112 error = sooptcopyin(sopt, &gsr, 2113 sizeof(struct group_source_req), 2114 sizeof(struct group_source_req)); 2115 } 2116 if (error) 2117 return (error); 2118 2119 if (gsa->sin.sin_family != AF_INET || 2120 gsa->sin.sin_len != sizeof(struct sockaddr_in)) 2121 return (EINVAL); 2122 2123 /* 2124 * Overwrite the port field if present, as the sockaddr 2125 * being copied in may be matched with a binary comparison. 2126 */ 2127 gsa->sin.sin_port = 0; 2128 if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) { 2129 if (ssa->sin.sin_family != AF_INET || 2130 ssa->sin.sin_len != sizeof(struct sockaddr_in)) 2131 return (EINVAL); 2132 ssa->sin.sin_port = 0; 2133 } 2134 2135 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2136 return (EINVAL); 2137 2138 if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface) 2139 return (EADDRNOTAVAIL); 2140 ifp = ifnet_byindex(gsr.gsr_interface); 2141 break; 2142 2143 default: 2144 CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d", 2145 __func__, sopt->sopt_name); 2146 return (EOPNOTSUPP); 2147 break; 2148 } 2149 2150 if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) 2151 return (EADDRNOTAVAIL); 2152 2153 imo = inp_findmoptions(inp); 2154 idx = imo_match_group(imo, ifp, &gsa->sa); 2155 if (idx == -1) { 2156 is_new = 1; 2157 } else { 2158 inm = imo->imo_membership[idx]; 2159 imf = &imo->imo_mfilters[idx]; 2160 if (ssa->ss.ss_family != AF_UNSPEC) { 2161 /* 2162 * MCAST_JOIN_SOURCE_GROUP on an exclusive membership 2163 * is an error. On an existing inclusive membership, 2164 * it just adds the source to the filter list. 2165 */ 2166 if (imf->imf_st[1] != MCAST_INCLUDE) { 2167 error = EINVAL; 2168 goto out_inp_locked; 2169 } 2170 /* 2171 * Throw out duplicates. 2172 * 2173 * XXX FIXME: This makes a naive assumption that 2174 * even if entries exist for *ssa in this imf, 2175 * they will be rejected as dupes, even if they 2176 * are not valid in the current mode (in-mode). 2177 * 2178 * in_msource is transactioned just as for anything 2179 * else in SSM -- but note naive use of inm_graft() 2180 * below for allocating new filter entries. 2181 * 2182 * This is only an issue if someone mixes the 2183 * full-state SSM API with the delta-based API, 2184 * which is discouraged in the relevant RFCs. 2185 */ 2186 lims = imo_match_source(imo, idx, &ssa->sa); 2187 if (lims != NULL /*&& 2188 lims->imsl_st[1] == MCAST_INCLUDE*/) { 2189 error = EADDRNOTAVAIL; 2190 goto out_inp_locked; 2191 } 2192 } else { 2193 /* 2194 * MCAST_JOIN_GROUP on an existing exclusive 2195 * membership is an error; return EADDRINUSE 2196 * to preserve 4.4BSD API idempotence, and 2197 * avoid tedious detour to code below. 2198 * NOTE: This is bending RFC 3678 a bit. 2199 * 2200 * On an existing inclusive membership, this is also 2201 * an error; if you want to change filter mode, 2202 * you must use the userland API setsourcefilter(). 2203 * XXX We don't reject this for imf in UNDEFINED 2204 * state at t1, because allocation of a filter 2205 * is atomic with allocation of a membership. 2206 */ 2207 error = EINVAL; 2208 if (imf->imf_st[1] == MCAST_EXCLUDE) 2209 error = EADDRINUSE; 2210 goto out_inp_locked; 2211 } 2212 } 2213 2214 /* 2215 * Begin state merge transaction at socket layer. 2216 */ 2217 INP_WLOCK_ASSERT(inp); 2218 2219 if (is_new) { 2220 if (imo->imo_num_memberships == imo->imo_max_memberships) { 2221 error = imo_grow(imo); 2222 if (error) 2223 goto out_inp_locked; 2224 } 2225 /* 2226 * Allocate the new slot upfront so we can deal with 2227 * grafting the new source filter in same code path 2228 * as for join-source on existing membership. 2229 */ 2230 idx = imo->imo_num_memberships; 2231 imo->imo_membership[idx] = NULL; 2232 imo->imo_num_memberships++; 2233 KASSERT(imo->imo_mfilters != NULL, 2234 ("%s: imf_mfilters vector was not allocated", __func__)); 2235 imf = &imo->imo_mfilters[idx]; 2236 KASSERT(RB_EMPTY(&imf->imf_sources), 2237 ("%s: imf_sources not empty", __func__)); 2238 } 2239 2240 /* 2241 * Graft new source into filter list for this inpcb's 2242 * membership of the group. The in_multi may not have 2243 * been allocated yet if this is a new membership, however, 2244 * the in_mfilter slot will be allocated and must be initialized. 2245 * 2246 * Note: Grafting of exclusive mode filters doesn't happen 2247 * in this path. 2248 * XXX: Should check for non-NULL lims (node exists but may 2249 * not be in-mode) for interop with full-state API. 2250 */ 2251 if (ssa->ss.ss_family != AF_UNSPEC) { 2252 /* Membership starts in IN mode */ 2253 if (is_new) { 2254 CTR1(KTR_IGMPV3, "%s: new join w/source", __func__); 2255 imf_init(imf, MCAST_UNDEFINED, MCAST_INCLUDE); 2256 } else { 2257 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow"); 2258 } 2259 lims = imf_graft(imf, MCAST_INCLUDE, &ssa->sin); 2260 if (lims == NULL) { 2261 CTR1(KTR_IGMPV3, "%s: merge imf state failed", 2262 __func__); 2263 error = ENOMEM; 2264 goto out_imo_free; 2265 } 2266 } else { 2267 /* No address specified; Membership starts in EX mode */ 2268 if (is_new) { 2269 CTR1(KTR_IGMPV3, "%s: new join w/o source", __func__); 2270 imf_init(imf, MCAST_UNDEFINED, MCAST_EXCLUDE); 2271 } 2272 } 2273 2274 /* 2275 * Begin state merge transaction at IGMP layer. 2276 */ 2277 in_pcbref(inp); 2278 INP_WUNLOCK(inp); 2279 IN_MULTI_LOCK(); 2280 2281 if (is_new) { 2282 error = in_joingroup_locked(ifp, &gsa->sin.sin_addr, imf, 2283 &inm); 2284 if (error) { 2285 CTR1(KTR_IGMPV3, "%s: in_joingroup_locked failed", 2286 __func__); 2287 IN_MULTI_LIST_UNLOCK(); 2288 goto out_imo_free; 2289 } 2290 imo->imo_membership[idx] = inm; 2291 } else { 2292 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 2293 IN_MULTI_LIST_LOCK(); 2294 error = inm_merge(inm, imf); 2295 if (error) { 2296 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", 2297 __func__); 2298 IN_MULTI_LIST_UNLOCK(); 2299 goto out_in_multi_locked; 2300 } 2301 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 2302 error = igmp_change_state(inm); 2303 IN_MULTI_LIST_UNLOCK(); 2304 if (error) { 2305 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", 2306 __func__); 2307 goto out_in_multi_locked; 2308 } 2309 } 2310 2311 out_in_multi_locked: 2312 2313 IN_MULTI_UNLOCK(); 2314 INP_WLOCK(inp); 2315 if (in_pcbrele_wlocked(inp)) 2316 return (ENXIO); 2317 if (error) { 2318 imf_rollback(imf); 2319 if (is_new) 2320 imf_purge(imf); 2321 else 2322 imf_reap(imf); 2323 } else { 2324 imf_commit(imf); 2325 } 2326 2327 out_imo_free: 2328 if (error && is_new) { 2329 imo->imo_membership[idx] = NULL; 2330 --imo->imo_num_memberships; 2331 } 2332 2333 out_inp_locked: 2334 INP_WUNLOCK(inp); 2335 return (error); 2336 } 2337 2338 /* 2339 * Leave an IPv4 multicast group on an inpcb, possibly with a source. 2340 */ 2341 static int 2342 inp_leave_group(struct inpcb *inp, struct sockopt *sopt) 2343 { 2344 struct group_source_req gsr; 2345 struct ip_mreq_source mreqs; 2346 sockunion_t *gsa, *ssa; 2347 struct ifnet *ifp; 2348 struct in_mfilter *imf; 2349 struct ip_moptions *imo; 2350 struct in_msource *ims; 2351 struct in_multi *inm; 2352 size_t idx; 2353 int error, is_final; 2354 2355 ifp = NULL; 2356 error = 0; 2357 is_final = 1; 2358 2359 memset(&gsr, 0, sizeof(struct group_source_req)); 2360 gsa = (sockunion_t *)&gsr.gsr_group; 2361 gsa->ss.ss_family = AF_UNSPEC; 2362 ssa = (sockunion_t *)&gsr.gsr_source; 2363 ssa->ss.ss_family = AF_UNSPEC; 2364 2365 switch (sopt->sopt_name) { 2366 case IP_DROP_MEMBERSHIP: 2367 case IP_DROP_SOURCE_MEMBERSHIP: 2368 if (sopt->sopt_name == IP_DROP_MEMBERSHIP) { 2369 error = sooptcopyin(sopt, &mreqs, 2370 sizeof(struct ip_mreq), 2371 sizeof(struct ip_mreq)); 2372 /* 2373 * Swap interface and sourceaddr arguments, 2374 * as ip_mreq and ip_mreq_source are laid 2375 * out differently. 2376 */ 2377 mreqs.imr_interface = mreqs.imr_sourceaddr; 2378 mreqs.imr_sourceaddr.s_addr = INADDR_ANY; 2379 } else if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) { 2380 error = sooptcopyin(sopt, &mreqs, 2381 sizeof(struct ip_mreq_source), 2382 sizeof(struct ip_mreq_source)); 2383 } 2384 if (error) 2385 return (error); 2386 2387 gsa->sin.sin_family = AF_INET; 2388 gsa->sin.sin_len = sizeof(struct sockaddr_in); 2389 gsa->sin.sin_addr = mreqs.imr_multiaddr; 2390 2391 if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) { 2392 ssa->sin.sin_family = AF_INET; 2393 ssa->sin.sin_len = sizeof(struct sockaddr_in); 2394 ssa->sin.sin_addr = mreqs.imr_sourceaddr; 2395 } 2396 2397 /* 2398 * Attempt to look up hinted ifp from interface address. 2399 * Fallthrough with null ifp iff lookup fails, to 2400 * preserve 4.4BSD mcast API idempotence. 2401 * XXX NOTE WELL: The RFC 3678 API is preferred because 2402 * using an IPv4 address as a key is racy. 2403 */ 2404 if (!in_nullhost(mreqs.imr_interface)) 2405 INADDR_TO_IFP(mreqs.imr_interface, ifp); 2406 2407 CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p", 2408 __func__, ntohl(mreqs.imr_interface.s_addr), ifp); 2409 2410 break; 2411 2412 case MCAST_LEAVE_GROUP: 2413 case MCAST_LEAVE_SOURCE_GROUP: 2414 if (sopt->sopt_name == MCAST_LEAVE_GROUP) { 2415 error = sooptcopyin(sopt, &gsr, 2416 sizeof(struct group_req), 2417 sizeof(struct group_req)); 2418 } else if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) { 2419 error = sooptcopyin(sopt, &gsr, 2420 sizeof(struct group_source_req), 2421 sizeof(struct group_source_req)); 2422 } 2423 if (error) 2424 return (error); 2425 2426 if (gsa->sin.sin_family != AF_INET || 2427 gsa->sin.sin_len != sizeof(struct sockaddr_in)) 2428 return (EINVAL); 2429 2430 if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) { 2431 if (ssa->sin.sin_family != AF_INET || 2432 ssa->sin.sin_len != sizeof(struct sockaddr_in)) 2433 return (EINVAL); 2434 } 2435 2436 if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface) 2437 return (EADDRNOTAVAIL); 2438 2439 ifp = ifnet_byindex(gsr.gsr_interface); 2440 2441 if (ifp == NULL) 2442 return (EADDRNOTAVAIL); 2443 break; 2444 2445 default: 2446 CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d", 2447 __func__, sopt->sopt_name); 2448 return (EOPNOTSUPP); 2449 break; 2450 } 2451 2452 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2453 return (EINVAL); 2454 2455 /* 2456 * Find the membership in the membership array. 2457 */ 2458 imo = inp_findmoptions(inp); 2459 idx = imo_match_group(imo, ifp, &gsa->sa); 2460 if (idx == -1) { 2461 error = EADDRNOTAVAIL; 2462 goto out_inp_locked; 2463 } 2464 inm = imo->imo_membership[idx]; 2465 imf = &imo->imo_mfilters[idx]; 2466 2467 if (ssa->ss.ss_family != AF_UNSPEC) 2468 is_final = 0; 2469 2470 /* 2471 * Begin state merge transaction at socket layer. 2472 */ 2473 INP_WLOCK_ASSERT(inp); 2474 2475 /* 2476 * If we were instructed only to leave a given source, do so. 2477 * MCAST_LEAVE_SOURCE_GROUP is only valid for inclusive memberships. 2478 */ 2479 if (is_final) { 2480 imf_leave(imf); 2481 } else { 2482 if (imf->imf_st[0] == MCAST_EXCLUDE) { 2483 error = EADDRNOTAVAIL; 2484 goto out_inp_locked; 2485 } 2486 ims = imo_match_source(imo, idx, &ssa->sa); 2487 if (ims == NULL) { 2488 CTR3(KTR_IGMPV3, "%s: source 0x%08x %spresent", 2489 __func__, ntohl(ssa->sin.sin_addr.s_addr), "not "); 2490 error = EADDRNOTAVAIL; 2491 goto out_inp_locked; 2492 } 2493 CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block"); 2494 error = imf_prune(imf, &ssa->sin); 2495 if (error) { 2496 CTR1(KTR_IGMPV3, "%s: merge imf state failed", 2497 __func__); 2498 goto out_inp_locked; 2499 } 2500 } 2501 2502 /* 2503 * Begin state merge transaction at IGMP layer. 2504 */ 2505 in_pcbref(inp); 2506 INP_WUNLOCK(inp); 2507 IN_MULTI_LOCK(); 2508 2509 if (is_final) { 2510 /* 2511 * Give up the multicast address record to which 2512 * the membership points. 2513 */ 2514 (void)in_leavegroup_locked(inm, imf); 2515 } else { 2516 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 2517 IN_MULTI_LIST_LOCK(); 2518 error = inm_merge(inm, imf); 2519 if (error) { 2520 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", 2521 __func__); 2522 goto out_in_multi_locked; 2523 } 2524 2525 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 2526 error = igmp_change_state(inm); 2527 IN_MULTI_LIST_UNLOCK(); 2528 if (error) { 2529 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", 2530 __func__); 2531 } 2532 } 2533 2534 out_in_multi_locked: 2535 2536 IN_MULTI_UNLOCK(); 2537 INP_WLOCK(inp); 2538 if (in_pcbrele_wlocked(inp)) 2539 return (ENXIO); 2540 2541 if (error) 2542 imf_rollback(imf); 2543 else 2544 imf_commit(imf); 2545 2546 imf_reap(imf); 2547 2548 if (is_final) { 2549 /* Remove the gap in the membership and filter array. */ 2550 for (++idx; idx < imo->imo_num_memberships; ++idx) { 2551 imo->imo_membership[idx-1] = imo->imo_membership[idx]; 2552 imo->imo_mfilters[idx-1] = imo->imo_mfilters[idx]; 2553 } 2554 imo->imo_num_memberships--; 2555 } 2556 2557 out_inp_locked: 2558 INP_WUNLOCK(inp); 2559 return (error); 2560 } 2561 2562 /* 2563 * Select the interface for transmitting IPv4 multicast datagrams. 2564 * 2565 * Either an instance of struct in_addr or an instance of struct ip_mreqn 2566 * may be passed to this socket option. An address of INADDR_ANY or an 2567 * interface index of 0 is used to remove a previous selection. 2568 * When no interface is selected, one is chosen for every send. 2569 */ 2570 static int 2571 inp_set_multicast_if(struct inpcb *inp, struct sockopt *sopt) 2572 { 2573 struct in_addr addr; 2574 struct ip_mreqn mreqn; 2575 struct ifnet *ifp; 2576 struct ip_moptions *imo; 2577 int error; 2578 2579 if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) { 2580 /* 2581 * An interface index was specified using the 2582 * Linux-derived ip_mreqn structure. 2583 */ 2584 error = sooptcopyin(sopt, &mreqn, sizeof(struct ip_mreqn), 2585 sizeof(struct ip_mreqn)); 2586 if (error) 2587 return (error); 2588 2589 if (mreqn.imr_ifindex < 0 || V_if_index < mreqn.imr_ifindex) 2590 return (EINVAL); 2591 2592 if (mreqn.imr_ifindex == 0) { 2593 ifp = NULL; 2594 } else { 2595 ifp = ifnet_byindex(mreqn.imr_ifindex); 2596 if (ifp == NULL) 2597 return (EADDRNOTAVAIL); 2598 } 2599 } else { 2600 /* 2601 * An interface was specified by IPv4 address. 2602 * This is the traditional BSD usage. 2603 */ 2604 error = sooptcopyin(sopt, &addr, sizeof(struct in_addr), 2605 sizeof(struct in_addr)); 2606 if (error) 2607 return (error); 2608 if (in_nullhost(addr)) { 2609 ifp = NULL; 2610 } else { 2611 INADDR_TO_IFP(addr, ifp); 2612 if (ifp == NULL) 2613 return (EADDRNOTAVAIL); 2614 } 2615 CTR3(KTR_IGMPV3, "%s: ifp = %p, addr = 0x%08x", __func__, ifp, 2616 ntohl(addr.s_addr)); 2617 } 2618 2619 /* Reject interfaces which do not support multicast. */ 2620 if (ifp != NULL && (ifp->if_flags & IFF_MULTICAST) == 0) 2621 return (EOPNOTSUPP); 2622 2623 imo = inp_findmoptions(inp); 2624 imo->imo_multicast_ifp = ifp; 2625 imo->imo_multicast_addr.s_addr = INADDR_ANY; 2626 INP_WUNLOCK(inp); 2627 2628 return (0); 2629 } 2630 2631 /* 2632 * Atomically set source filters on a socket for an IPv4 multicast group. 2633 * 2634 * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held. 2635 */ 2636 static int 2637 inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt) 2638 { 2639 struct __msfilterreq msfr; 2640 sockunion_t *gsa; 2641 struct ifnet *ifp; 2642 struct in_mfilter *imf; 2643 struct ip_moptions *imo; 2644 struct in_multi *inm; 2645 size_t idx; 2646 int error; 2647 2648 error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq), 2649 sizeof(struct __msfilterreq)); 2650 if (error) 2651 return (error); 2652 2653 if (msfr.msfr_nsrcs > in_mcast_maxsocksrc) 2654 return (ENOBUFS); 2655 2656 if ((msfr.msfr_fmode != MCAST_EXCLUDE && 2657 msfr.msfr_fmode != MCAST_INCLUDE)) 2658 return (EINVAL); 2659 2660 if (msfr.msfr_group.ss_family != AF_INET || 2661 msfr.msfr_group.ss_len != sizeof(struct sockaddr_in)) 2662 return (EINVAL); 2663 2664 gsa = (sockunion_t *)&msfr.msfr_group; 2665 if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) 2666 return (EINVAL); 2667 2668 gsa->sin.sin_port = 0; /* ignore port */ 2669 2670 if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex) 2671 return (EADDRNOTAVAIL); 2672 2673 ifp = ifnet_byindex(msfr.msfr_ifindex); 2674 if (ifp == NULL) 2675 return (EADDRNOTAVAIL); 2676 2677 /* 2678 * Take the INP write lock. 2679 * Check if this socket is a member of this group. 2680 */ 2681 imo = inp_findmoptions(inp); 2682 idx = imo_match_group(imo, ifp, &gsa->sa); 2683 if (idx == -1 || imo->imo_mfilters == NULL) { 2684 error = EADDRNOTAVAIL; 2685 goto out_inp_locked; 2686 } 2687 inm = imo->imo_membership[idx]; 2688 imf = &imo->imo_mfilters[idx]; 2689 2690 /* 2691 * Begin state merge transaction at socket layer. 2692 */ 2693 INP_WLOCK_ASSERT(inp); 2694 2695 imf->imf_st[1] = msfr.msfr_fmode; 2696 2697 /* 2698 * Apply any new source filters, if present. 2699 * Make a copy of the user-space source vector so 2700 * that we may copy them with a single copyin. This 2701 * allows us to deal with page faults up-front. 2702 */ 2703 if (msfr.msfr_nsrcs > 0) { 2704 struct in_msource *lims; 2705 struct sockaddr_in *psin; 2706 struct sockaddr_storage *kss, *pkss; 2707 int i; 2708 2709 INP_WUNLOCK(inp); 2710 2711 CTR2(KTR_IGMPV3, "%s: loading %lu source list entries", 2712 __func__, (unsigned long)msfr.msfr_nsrcs); 2713 kss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs, 2714 M_TEMP, M_WAITOK); 2715 error = copyin(msfr.msfr_srcs, kss, 2716 sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs); 2717 if (error) { 2718 free(kss, M_TEMP); 2719 return (error); 2720 } 2721 2722 INP_WLOCK(inp); 2723 2724 /* 2725 * Mark all source filters as UNDEFINED at t1. 2726 * Restore new group filter mode, as imf_leave() 2727 * will set it to INCLUDE. 2728 */ 2729 imf_leave(imf); 2730 imf->imf_st[1] = msfr.msfr_fmode; 2731 2732 /* 2733 * Update socket layer filters at t1, lazy-allocating 2734 * new entries. This saves a bunch of memory at the 2735 * cost of one RB_FIND() per source entry; duplicate 2736 * entries in the msfr_nsrcs vector are ignored. 2737 * If we encounter an error, rollback transaction. 2738 * 2739 * XXX This too could be replaced with a set-symmetric 2740 * difference like loop to avoid walking from root 2741 * every time, as the key space is common. 2742 */ 2743 for (i = 0, pkss = kss; i < msfr.msfr_nsrcs; i++, pkss++) { 2744 psin = (struct sockaddr_in *)pkss; 2745 if (psin->sin_family != AF_INET) { 2746 error = EAFNOSUPPORT; 2747 break; 2748 } 2749 if (psin->sin_len != sizeof(struct sockaddr_in)) { 2750 error = EINVAL; 2751 break; 2752 } 2753 error = imf_get_source(imf, psin, &lims); 2754 if (error) 2755 break; 2756 lims->imsl_st[1] = imf->imf_st[1]; 2757 } 2758 free(kss, M_TEMP); 2759 } 2760 2761 if (error) 2762 goto out_imf_rollback; 2763 2764 INP_WLOCK_ASSERT(inp); 2765 IN_MULTI_LOCK(); 2766 IN_MULTI_LIST_LOCK(); 2767 2768 /* 2769 * Begin state merge transaction at IGMP layer. 2770 */ 2771 CTR1(KTR_IGMPV3, "%s: merge inm state", __func__); 2772 error = inm_merge(inm, imf); 2773 if (error) { 2774 CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__); 2775 IN_MULTI_LIST_UNLOCK(); 2776 goto out_in_multi_locked; 2777 } 2778 2779 CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__); 2780 error = igmp_change_state(inm); 2781 IN_MULTI_LIST_UNLOCK(); 2782 if (error) 2783 CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__); 2784 2785 out_in_multi_locked: 2786 2787 IN_MULTI_UNLOCK(); 2788 2789 out_imf_rollback: 2790 if (error) 2791 imf_rollback(imf); 2792 else 2793 imf_commit(imf); 2794 2795 imf_reap(imf); 2796 2797 out_inp_locked: 2798 INP_WUNLOCK(inp); 2799 return (error); 2800 } 2801 2802 /* 2803 * Set the IP multicast options in response to user setsockopt(). 2804 * 2805 * Many of the socket options handled in this function duplicate the 2806 * functionality of socket options in the regular unicast API. However, 2807 * it is not possible to merge the duplicate code, because the idempotence 2808 * of the IPv4 multicast part of the BSD Sockets API must be preserved; 2809 * the effects of these options must be treated as separate and distinct. 2810 * 2811 * SMPng: XXX: Unlocked read of inp_socket believed OK. 2812 * FUTURE: The IP_MULTICAST_VIF option may be eliminated if MROUTING 2813 * is refactored to no longer use vifs. 2814 */ 2815 int 2816 inp_setmoptions(struct inpcb *inp, struct sockopt *sopt) 2817 { 2818 struct ip_moptions *imo; 2819 int error; 2820 2821 error = 0; 2822 2823 /* 2824 * If socket is neither of type SOCK_RAW or SOCK_DGRAM, 2825 * or is a divert socket, reject it. 2826 */ 2827 if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT || 2828 (inp->inp_socket->so_proto->pr_type != SOCK_RAW && 2829 inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) 2830 return (EOPNOTSUPP); 2831 2832 switch (sopt->sopt_name) { 2833 case IP_MULTICAST_VIF: { 2834 int vifi; 2835 /* 2836 * Select a multicast VIF for transmission. 2837 * Only useful if multicast forwarding is active. 2838 */ 2839 if (legal_vif_num == NULL) { 2840 error = EOPNOTSUPP; 2841 break; 2842 } 2843 error = sooptcopyin(sopt, &vifi, sizeof(int), sizeof(int)); 2844 if (error) 2845 break; 2846 if (!legal_vif_num(vifi) && (vifi != -1)) { 2847 error = EINVAL; 2848 break; 2849 } 2850 imo = inp_findmoptions(inp); 2851 imo->imo_multicast_vif = vifi; 2852 INP_WUNLOCK(inp); 2853 break; 2854 } 2855 2856 case IP_MULTICAST_IF: 2857 error = inp_set_multicast_if(inp, sopt); 2858 break; 2859 2860 case IP_MULTICAST_TTL: { 2861 u_char ttl; 2862 2863 /* 2864 * Set the IP time-to-live for outgoing multicast packets. 2865 * The original multicast API required a char argument, 2866 * which is inconsistent with the rest of the socket API. 2867 * We allow either a char or an int. 2868 */ 2869 if (sopt->sopt_valsize == sizeof(u_char)) { 2870 error = sooptcopyin(sopt, &ttl, sizeof(u_char), 2871 sizeof(u_char)); 2872 if (error) 2873 break; 2874 } else { 2875 u_int ittl; 2876 2877 error = sooptcopyin(sopt, &ittl, sizeof(u_int), 2878 sizeof(u_int)); 2879 if (error) 2880 break; 2881 if (ittl > 255) { 2882 error = EINVAL; 2883 break; 2884 } 2885 ttl = (u_char)ittl; 2886 } 2887 imo = inp_findmoptions(inp); 2888 imo->imo_multicast_ttl = ttl; 2889 INP_WUNLOCK(inp); 2890 break; 2891 } 2892 2893 case IP_MULTICAST_LOOP: { 2894 u_char loop; 2895 2896 /* 2897 * Set the loopback flag for outgoing multicast packets. 2898 * Must be zero or one. The original multicast API required a 2899 * char argument, which is inconsistent with the rest 2900 * of the socket API. We allow either a char or an int. 2901 */ 2902 if (sopt->sopt_valsize == sizeof(u_char)) { 2903 error = sooptcopyin(sopt, &loop, sizeof(u_char), 2904 sizeof(u_char)); 2905 if (error) 2906 break; 2907 } else { 2908 u_int iloop; 2909 2910 error = sooptcopyin(sopt, &iloop, sizeof(u_int), 2911 sizeof(u_int)); 2912 if (error) 2913 break; 2914 loop = (u_char)iloop; 2915 } 2916 imo = inp_findmoptions(inp); 2917 imo->imo_multicast_loop = !!loop; 2918 INP_WUNLOCK(inp); 2919 break; 2920 } 2921 2922 case IP_ADD_MEMBERSHIP: 2923 case IP_ADD_SOURCE_MEMBERSHIP: 2924 case MCAST_JOIN_GROUP: 2925 case MCAST_JOIN_SOURCE_GROUP: 2926 error = inp_join_group(inp, sopt); 2927 break; 2928 2929 case IP_DROP_MEMBERSHIP: 2930 case IP_DROP_SOURCE_MEMBERSHIP: 2931 case MCAST_LEAVE_GROUP: 2932 case MCAST_LEAVE_SOURCE_GROUP: 2933 error = inp_leave_group(inp, sopt); 2934 break; 2935 2936 case IP_BLOCK_SOURCE: 2937 case IP_UNBLOCK_SOURCE: 2938 case MCAST_BLOCK_SOURCE: 2939 case MCAST_UNBLOCK_SOURCE: 2940 error = inp_block_unblock_source(inp, sopt); 2941 break; 2942 2943 case IP_MSFILTER: 2944 error = inp_set_source_filters(inp, sopt); 2945 break; 2946 2947 default: 2948 error = EOPNOTSUPP; 2949 break; 2950 } 2951 2952 INP_UNLOCK_ASSERT(inp); 2953 2954 return (error); 2955 } 2956 2957 /* 2958 * Expose IGMP's multicast filter mode and source list(s) to userland, 2959 * keyed by (ifindex, group). 2960 * The filter mode is written out as a uint32_t, followed by 2961 * 0..n of struct in_addr. 2962 * For use by ifmcstat(8). 2963 * SMPng: NOTE: unlocked read of ifindex space. 2964 */ 2965 static int 2966 sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS) 2967 { 2968 struct in_addr src, group; 2969 struct ifnet *ifp; 2970 struct ifmultiaddr *ifma; 2971 struct in_multi *inm; 2972 struct ip_msource *ims; 2973 int *name; 2974 int retval; 2975 u_int namelen; 2976 uint32_t fmode, ifindex; 2977 2978 name = (int *)arg1; 2979 namelen = arg2; 2980 2981 if (req->newptr != NULL) 2982 return (EPERM); 2983 2984 if (namelen != 2) 2985 return (EINVAL); 2986 2987 ifindex = name[0]; 2988 if (ifindex <= 0 || ifindex > V_if_index) { 2989 CTR2(KTR_IGMPV3, "%s: ifindex %u out of range", 2990 __func__, ifindex); 2991 return (ENOENT); 2992 } 2993 2994 group.s_addr = name[1]; 2995 if (!IN_MULTICAST(ntohl(group.s_addr))) { 2996 CTR2(KTR_IGMPV3, "%s: group 0x%08x is not multicast", 2997 __func__, ntohl(group.s_addr)); 2998 return (EINVAL); 2999 } 3000 3001 ifp = ifnet_byindex(ifindex); 3002 if (ifp == NULL) { 3003 CTR2(KTR_IGMPV3, "%s: no ifp for ifindex %u", 3004 __func__, ifindex); 3005 return (ENOENT); 3006 } 3007 3008 retval = sysctl_wire_old_buffer(req, 3009 sizeof(uint32_t) + (in_mcast_maxgrpsrc * sizeof(struct in_addr))); 3010 if (retval) 3011 return (retval); 3012 3013 IN_MULTI_LIST_LOCK(); 3014 3015 IF_ADDR_RLOCK(ifp); 3016 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 3017 if (ifma->ifma_addr->sa_family != AF_INET || 3018 ifma->ifma_protospec == NULL) 3019 continue; 3020 inm = (struct in_multi *)ifma->ifma_protospec; 3021 if (!in_hosteq(inm->inm_addr, group)) 3022 continue; 3023 fmode = inm->inm_st[1].iss_fmode; 3024 retval = SYSCTL_OUT(req, &fmode, sizeof(uint32_t)); 3025 if (retval != 0) 3026 break; 3027 RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) { 3028 CTR2(KTR_IGMPV3, "%s: visit node 0x%08x", __func__, 3029 ims->ims_haddr); 3030 /* 3031 * Only copy-out sources which are in-mode. 3032 */ 3033 if (fmode != ims_get_mode(inm, ims, 1)) { 3034 CTR1(KTR_IGMPV3, "%s: skip non-in-mode", 3035 __func__); 3036 continue; 3037 } 3038 src.s_addr = htonl(ims->ims_haddr); 3039 retval = SYSCTL_OUT(req, &src, sizeof(struct in_addr)); 3040 if (retval != 0) 3041 break; 3042 } 3043 } 3044 IF_ADDR_RUNLOCK(ifp); 3045 3046 IN_MULTI_LIST_UNLOCK(); 3047 3048 return (retval); 3049 } 3050 3051 #if defined(KTR) && (KTR_COMPILE & KTR_IGMPV3) 3052 3053 static const char *inm_modestrs[] = { "un", "in", "ex" }; 3054 3055 static const char * 3056 inm_mode_str(const int mode) 3057 { 3058 3059 if (mode >= MCAST_UNDEFINED && mode <= MCAST_EXCLUDE) 3060 return (inm_modestrs[mode]); 3061 return ("??"); 3062 } 3063 3064 static const char *inm_statestrs[] = { 3065 "not-member", 3066 "silent", 3067 "idle", 3068 "lazy", 3069 "sleeping", 3070 "awakening", 3071 "query-pending", 3072 "sg-query-pending", 3073 "leaving" 3074 }; 3075 3076 static const char * 3077 inm_state_str(const int state) 3078 { 3079 3080 if (state >= IGMP_NOT_MEMBER && state <= IGMP_LEAVING_MEMBER) 3081 return (inm_statestrs[state]); 3082 return ("??"); 3083 } 3084 3085 /* 3086 * Dump an in_multi structure to the console. 3087 */ 3088 void 3089 inm_print(const struct in_multi *inm) 3090 { 3091 int t; 3092 char addrbuf[INET_ADDRSTRLEN]; 3093 3094 if ((ktr_mask & KTR_IGMPV3) == 0) 3095 return; 3096 3097 printf("%s: --- begin inm %p ---\n", __func__, inm); 3098 printf("addr %s ifp %p(%s) ifma %p\n", 3099 inet_ntoa_r(inm->inm_addr, addrbuf), 3100 inm->inm_ifp, 3101 inm->inm_ifp->if_xname, 3102 inm->inm_ifma); 3103 printf("timer %u state %s refcount %u scq.len %u\n", 3104 inm->inm_timer, 3105 inm_state_str(inm->inm_state), 3106 inm->inm_refcount, 3107 inm->inm_scq.mq_len); 3108 printf("igi %p nsrc %lu sctimer %u scrv %u\n", 3109 inm->inm_igi, 3110 inm->inm_nsrc, 3111 inm->inm_sctimer, 3112 inm->inm_scrv); 3113 for (t = 0; t < 2; t++) { 3114 printf("t%d: fmode %s asm %u ex %u in %u rec %u\n", t, 3115 inm_mode_str(inm->inm_st[t].iss_fmode), 3116 inm->inm_st[t].iss_asm, 3117 inm->inm_st[t].iss_ex, 3118 inm->inm_st[t].iss_in, 3119 inm->inm_st[t].iss_rec); 3120 } 3121 printf("%s: --- end inm %p ---\n", __func__, inm); 3122 } 3123 3124 #else /* !KTR || !(KTR_COMPILE & KTR_IGMPV3) */ 3125 3126 void 3127 inm_print(const struct in_multi *inm) 3128 { 3129 3130 } 3131 3132 #endif /* KTR && (KTR_COMPILE & KTR_IGMPV3) */ 3133 3134 RB_GENERATE(ip_msource_tree, ip_msource, ims_link, ip_msource_cmp); 3135