xref: /freebsd/sys/netinet/in_mcast.c (revision 97cb52fa9aefd90fad38790fded50905aeeb9b9e)
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 2007-2009 Bruce Simpson.
5  * Copyright (c) 2005 Robert N. M. Watson.
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  * 3. The name of the author may not be used to endorse or promote
17  *    products derived from this software without specific prior written
18  *    permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
21  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
24  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30  * SUCH DAMAGE.
31  */
32 
33 /*
34  * IPv4 multicast socket, group, and socket option processing module.
35  */
36 
37 #include <sys/cdefs.h>
38 __FBSDID("$FreeBSD$");
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/kernel.h>
43 #include <sys/lock.h>
44 #include <sys/malloc.h>
45 #include <sys/mbuf.h>
46 #include <sys/protosw.h>
47 #include <sys/rmlock.h>
48 #include <sys/socket.h>
49 #include <sys/socketvar.h>
50 #include <sys/protosw.h>
51 #include <sys/sysctl.h>
52 #include <sys/ktr.h>
53 #include <sys/taskqueue.h>
54 #include <sys/tree.h>
55 
56 #include <net/if.h>
57 #include <net/if_var.h>
58 #include <net/if_dl.h>
59 #include <net/route.h>
60 #include <net/vnet.h>
61 
62 #include <netinet/in.h>
63 #include <netinet/in_systm.h>
64 #include <netinet/in_fib.h>
65 #include <netinet/in_pcb.h>
66 #include <netinet/in_var.h>
67 #include <netinet/ip_var.h>
68 #include <netinet/igmp_var.h>
69 
70 #ifndef KTR_IGMPV3
71 #define KTR_IGMPV3 KTR_INET
72 #endif
73 
74 #ifndef __SOCKUNION_DECLARED
75 union sockunion {
76 	struct sockaddr_storage	ss;
77 	struct sockaddr		sa;
78 	struct sockaddr_dl	sdl;
79 	struct sockaddr_in	sin;
80 };
81 typedef union sockunion sockunion_t;
82 #define __SOCKUNION_DECLARED
83 #endif /* __SOCKUNION_DECLARED */
84 
85 static MALLOC_DEFINE(M_INMFILTER, "in_mfilter",
86     "IPv4 multicast PCB-layer source filter");
87 static MALLOC_DEFINE(M_IPMADDR, "in_multi", "IPv4 multicast group");
88 static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "IPv4 multicast options");
89 static MALLOC_DEFINE(M_IPMSOURCE, "ip_msource",
90     "IPv4 multicast IGMP-layer source filter");
91 
92 /*
93  * Locking:
94  * - Lock order is: Giant, INP_WLOCK, IN_MULTI_LOCK, IGMP_LOCK, IF_ADDR_LOCK.
95  * - The IF_ADDR_LOCK is implicitly taken by inm_lookup() earlier, however
96  *   it can be taken by code in net/if.c also.
97  * - ip_moptions and in_mfilter are covered by the INP_WLOCK.
98  *
99  * struct in_multi is covered by IN_MULTI_LOCK. There isn't strictly
100  * any need for in_multi itself to be virtualized -- it is bound to an ifp
101  * anyway no matter what happens.
102  */
103 struct mtx in_multi_mtx;
104 MTX_SYSINIT(in_multi_mtx, &in_multi_mtx, "in_multi_mtx", MTX_DEF);
105 
106 /*
107  * Functions with non-static linkage defined in this file should be
108  * declared in in_var.h:
109  *  imo_multi_filter()
110  *  in_addmulti()
111  *  in_delmulti()
112  *  in_joingroup()
113  *  in_joingroup_locked()
114  *  in_leavegroup()
115  *  in_leavegroup_locked()
116  * and ip_var.h:
117  *  inp_freemoptions()
118  *  inp_getmoptions()
119  *  inp_setmoptions()
120  *
121  * XXX: Both carp and pf need to use the legacy (*,G) KPIs in_addmulti()
122  * and in_delmulti().
123  */
124 static void	imf_commit(struct in_mfilter *);
125 static int	imf_get_source(struct in_mfilter *imf,
126 		    const struct sockaddr_in *psin,
127 		    struct in_msource **);
128 static struct in_msource *
129 		imf_graft(struct in_mfilter *, const uint8_t,
130 		    const struct sockaddr_in *);
131 static void	imf_leave(struct in_mfilter *);
132 static int	imf_prune(struct in_mfilter *, const struct sockaddr_in *);
133 static void	imf_purge(struct in_mfilter *);
134 static void	imf_rollback(struct in_mfilter *);
135 static void	imf_reap(struct in_mfilter *);
136 static int	imo_grow(struct ip_moptions *);
137 static size_t	imo_match_group(const struct ip_moptions *,
138 		    const struct ifnet *, const struct sockaddr *);
139 static struct in_msource *
140 		imo_match_source(const struct ip_moptions *, const size_t,
141 		    const struct sockaddr *);
142 static void	ims_merge(struct ip_msource *ims,
143 		    const struct in_msource *lims, const int rollback);
144 static int	in_getmulti(struct ifnet *, const struct in_addr *,
145 		    struct in_multi **);
146 static int	inm_get_source(struct in_multi *inm, const in_addr_t haddr,
147 		    const int noalloc, struct ip_msource **pims);
148 #ifdef KTR
149 static int	inm_is_ifp_detached(const struct in_multi *);
150 #endif
151 static int	inm_merge(struct in_multi *, /*const*/ struct in_mfilter *);
152 static void	inm_purge(struct in_multi *);
153 static void	inm_reap(struct in_multi *);
154 static struct ip_moptions *
155 		inp_findmoptions(struct inpcb *);
156 static void	inp_freemoptions_internal(struct ip_moptions *);
157 static void	inp_gcmoptions(void *, int);
158 static int	inp_get_source_filters(struct inpcb *, struct sockopt *);
159 static int	inp_join_group(struct inpcb *, struct sockopt *);
160 static int	inp_leave_group(struct inpcb *, struct sockopt *);
161 static struct ifnet *
162 		inp_lookup_mcast_ifp(const struct inpcb *,
163 		    const struct sockaddr_in *, const struct in_addr);
164 static int	inp_block_unblock_source(struct inpcb *, struct sockopt *);
165 static int	inp_set_multicast_if(struct inpcb *, struct sockopt *);
166 static int	inp_set_source_filters(struct inpcb *, struct sockopt *);
167 static int	sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS);
168 
169 static SYSCTL_NODE(_net_inet_ip, OID_AUTO, mcast, CTLFLAG_RW, 0,
170     "IPv4 multicast");
171 
172 static u_long in_mcast_maxgrpsrc = IP_MAX_GROUP_SRC_FILTER;
173 SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxgrpsrc,
174     CTLFLAG_RWTUN, &in_mcast_maxgrpsrc, 0,
175     "Max source filters per group");
176 
177 static u_long in_mcast_maxsocksrc = IP_MAX_SOCK_SRC_FILTER;
178 SYSCTL_ULONG(_net_inet_ip_mcast, OID_AUTO, maxsocksrc,
179     CTLFLAG_RWTUN, &in_mcast_maxsocksrc, 0,
180     "Max source filters per socket");
181 
182 int in_mcast_loop = IP_DEFAULT_MULTICAST_LOOP;
183 SYSCTL_INT(_net_inet_ip_mcast, OID_AUTO, loop, CTLFLAG_RWTUN,
184     &in_mcast_loop, 0, "Loopback multicast datagrams by default");
185 
186 static SYSCTL_NODE(_net_inet_ip_mcast, OID_AUTO, filters,
187     CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_ip_mcast_filters,
188     "Per-interface stack-wide source filters");
189 
190 static STAILQ_HEAD(, ip_moptions) imo_gc_list =
191     STAILQ_HEAD_INITIALIZER(imo_gc_list);
192 static struct task imo_gc_task = TASK_INITIALIZER(0, inp_gcmoptions, NULL);
193 
194 #ifdef KTR
195 /*
196  * Inline function which wraps assertions for a valid ifp.
197  * The ifnet layer will set the ifma's ifp pointer to NULL if the ifp
198  * is detached.
199  */
200 static int __inline
201 inm_is_ifp_detached(const struct in_multi *inm)
202 {
203 	struct ifnet *ifp;
204 
205 	KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__));
206 	ifp = inm->inm_ifma->ifma_ifp;
207 	if (ifp != NULL) {
208 		/*
209 		 * Sanity check that netinet's notion of ifp is the
210 		 * same as net's.
211 		 */
212 		KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__));
213 	}
214 
215 	return (ifp == NULL);
216 }
217 #endif
218 
219 /*
220  * Initialize an in_mfilter structure to a known state at t0, t1
221  * with an empty source filter list.
222  */
223 static __inline void
224 imf_init(struct in_mfilter *imf, const int st0, const int st1)
225 {
226 	memset(imf, 0, sizeof(struct in_mfilter));
227 	RB_INIT(&imf->imf_sources);
228 	imf->imf_st[0] = st0;
229 	imf->imf_st[1] = st1;
230 }
231 
232 /*
233  * Function for looking up an in_multi record for an IPv4 multicast address
234  * on a given interface. ifp must be valid. If no record found, return NULL.
235  * The IN_MULTI_LOCK and IF_ADDR_LOCK on ifp must be held.
236  */
237 struct in_multi *
238 inm_lookup_locked(struct ifnet *ifp, const struct in_addr ina)
239 {
240 	struct ifmultiaddr *ifma;
241 	struct in_multi *inm;
242 
243 	IN_MULTI_LOCK_ASSERT();
244 	IF_ADDR_LOCK_ASSERT(ifp);
245 
246 	inm = NULL;
247 	TAILQ_FOREACH(ifma, &((ifp)->if_multiaddrs), ifma_link) {
248 		if (ifma->ifma_addr->sa_family == AF_INET) {
249 			inm = (struct in_multi *)ifma->ifma_protospec;
250 			if (inm->inm_addr.s_addr == ina.s_addr)
251 				break;
252 			inm = NULL;
253 		}
254 	}
255 	return (inm);
256 }
257 
258 /*
259  * Wrapper for inm_lookup_locked().
260  * The IF_ADDR_LOCK will be taken on ifp and released on return.
261  */
262 struct in_multi *
263 inm_lookup(struct ifnet *ifp, const struct in_addr ina)
264 {
265 	struct in_multi *inm;
266 
267 	IN_MULTI_LOCK_ASSERT();
268 	IF_ADDR_RLOCK(ifp);
269 	inm = inm_lookup_locked(ifp, ina);
270 	IF_ADDR_RUNLOCK(ifp);
271 
272 	return (inm);
273 }
274 
275 /*
276  * Resize the ip_moptions vector to the next power-of-two minus 1.
277  * May be called with locks held; do not sleep.
278  */
279 static int
280 imo_grow(struct ip_moptions *imo)
281 {
282 	struct in_multi		**nmships;
283 	struct in_multi		**omships;
284 	struct in_mfilter	 *nmfilters;
285 	struct in_mfilter	 *omfilters;
286 	size_t			  idx;
287 	size_t			  newmax;
288 	size_t			  oldmax;
289 
290 	nmships = NULL;
291 	nmfilters = NULL;
292 	omships = imo->imo_membership;
293 	omfilters = imo->imo_mfilters;
294 	oldmax = imo->imo_max_memberships;
295 	newmax = ((oldmax + 1) * 2) - 1;
296 
297 	if (newmax <= IP_MAX_MEMBERSHIPS) {
298 		nmships = (struct in_multi **)realloc(omships,
299 		    sizeof(struct in_multi *) * newmax, M_IPMOPTS, M_NOWAIT);
300 		nmfilters = (struct in_mfilter *)realloc(omfilters,
301 		    sizeof(struct in_mfilter) * newmax, M_INMFILTER, M_NOWAIT);
302 		if (nmships != NULL && nmfilters != NULL) {
303 			/* Initialize newly allocated source filter heads. */
304 			for (idx = oldmax; idx < newmax; idx++) {
305 				imf_init(&nmfilters[idx], MCAST_UNDEFINED,
306 				    MCAST_EXCLUDE);
307 			}
308 			imo->imo_max_memberships = newmax;
309 			imo->imo_membership = nmships;
310 			imo->imo_mfilters = nmfilters;
311 		}
312 	}
313 
314 	if (nmships == NULL || nmfilters == NULL) {
315 		if (nmships != NULL)
316 			free(nmships, M_IPMOPTS);
317 		if (nmfilters != NULL)
318 			free(nmfilters, M_INMFILTER);
319 		return (ETOOMANYREFS);
320 	}
321 
322 	return (0);
323 }
324 
325 /*
326  * Find an IPv4 multicast group entry for this ip_moptions instance
327  * which matches the specified group, and optionally an interface.
328  * Return its index into the array, or -1 if not found.
329  */
330 static size_t
331 imo_match_group(const struct ip_moptions *imo, const struct ifnet *ifp,
332     const struct sockaddr *group)
333 {
334 	const struct sockaddr_in *gsin;
335 	struct in_multi	**pinm;
336 	int		  idx;
337 	int		  nmships;
338 
339 	gsin = (const struct sockaddr_in *)group;
340 
341 	/* The imo_membership array may be lazy allocated. */
342 	if (imo->imo_membership == NULL || imo->imo_num_memberships == 0)
343 		return (-1);
344 
345 	nmships = imo->imo_num_memberships;
346 	pinm = &imo->imo_membership[0];
347 	for (idx = 0; idx < nmships; idx++, pinm++) {
348 		if (*pinm == NULL)
349 			continue;
350 		if ((ifp == NULL || ((*pinm)->inm_ifp == ifp)) &&
351 		    in_hosteq((*pinm)->inm_addr, gsin->sin_addr)) {
352 			break;
353 		}
354 	}
355 	if (idx >= nmships)
356 		idx = -1;
357 
358 	return (idx);
359 }
360 
361 /*
362  * Find an IPv4 multicast source entry for this imo which matches
363  * the given group index for this socket, and source address.
364  *
365  * NOTE: This does not check if the entry is in-mode, merely if
366  * it exists, which may not be the desired behaviour.
367  */
368 static struct in_msource *
369 imo_match_source(const struct ip_moptions *imo, const size_t gidx,
370     const struct sockaddr *src)
371 {
372 	struct ip_msource	 find;
373 	struct in_mfilter	*imf;
374 	struct ip_msource	*ims;
375 	const sockunion_t	*psa;
376 
377 	KASSERT(src->sa_family == AF_INET, ("%s: !AF_INET", __func__));
378 	KASSERT(gidx != -1 && gidx < imo->imo_num_memberships,
379 	    ("%s: invalid index %d\n", __func__, (int)gidx));
380 
381 	/* The imo_mfilters array may be lazy allocated. */
382 	if (imo->imo_mfilters == NULL)
383 		return (NULL);
384 	imf = &imo->imo_mfilters[gidx];
385 
386 	/* Source trees are keyed in host byte order. */
387 	psa = (const sockunion_t *)src;
388 	find.ims_haddr = ntohl(psa->sin.sin_addr.s_addr);
389 	ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find);
390 
391 	return ((struct in_msource *)ims);
392 }
393 
394 /*
395  * Perform filtering for multicast datagrams on a socket by group and source.
396  *
397  * Returns 0 if a datagram should be allowed through, or various error codes
398  * if the socket was not a member of the group, or the source was muted, etc.
399  */
400 int
401 imo_multi_filter(const struct ip_moptions *imo, const struct ifnet *ifp,
402     const struct sockaddr *group, const struct sockaddr *src)
403 {
404 	size_t gidx;
405 	struct in_msource *ims;
406 	int mode;
407 
408 	KASSERT(ifp != NULL, ("%s: null ifp", __func__));
409 
410 	gidx = imo_match_group(imo, ifp, group);
411 	if (gidx == -1)
412 		return (MCAST_NOTGMEMBER);
413 
414 	/*
415 	 * Check if the source was included in an (S,G) join.
416 	 * Allow reception on exclusive memberships by default,
417 	 * reject reception on inclusive memberships by default.
418 	 * Exclude source only if an in-mode exclude filter exists.
419 	 * Include source only if an in-mode include filter exists.
420 	 * NOTE: We are comparing group state here at IGMP t1 (now)
421 	 * with socket-layer t0 (since last downcall).
422 	 */
423 	mode = imo->imo_mfilters[gidx].imf_st[1];
424 	ims = imo_match_source(imo, gidx, src);
425 
426 	if ((ims == NULL && mode == MCAST_INCLUDE) ||
427 	    (ims != NULL && ims->imsl_st[0] != mode))
428 		return (MCAST_NOTSMEMBER);
429 
430 	return (MCAST_PASS);
431 }
432 
433 /*
434  * Find and return a reference to an in_multi record for (ifp, group),
435  * and bump its reference count.
436  * If one does not exist, try to allocate it, and update link-layer multicast
437  * filters on ifp to listen for group.
438  * Assumes the IN_MULTI lock is held across the call.
439  * Return 0 if successful, otherwise return an appropriate error code.
440  */
441 static int
442 in_getmulti(struct ifnet *ifp, const struct in_addr *group,
443     struct in_multi **pinm)
444 {
445 	struct sockaddr_in	 gsin;
446 	struct ifmultiaddr	*ifma;
447 	struct in_ifinfo	*ii;
448 	struct in_multi		*inm;
449 	int error;
450 
451 	IN_MULTI_LOCK_ASSERT();
452 
453 	ii = (struct in_ifinfo *)ifp->if_afdata[AF_INET];
454 
455 	inm = inm_lookup(ifp, *group);
456 	if (inm != NULL) {
457 		/*
458 		 * If we already joined this group, just bump the
459 		 * refcount and return it.
460 		 */
461 		KASSERT(inm->inm_refcount >= 1,
462 		    ("%s: bad refcount %d", __func__, inm->inm_refcount));
463 		++inm->inm_refcount;
464 		*pinm = inm;
465 		return (0);
466 	}
467 
468 	memset(&gsin, 0, sizeof(gsin));
469 	gsin.sin_family = AF_INET;
470 	gsin.sin_len = sizeof(struct sockaddr_in);
471 	gsin.sin_addr = *group;
472 
473 	/*
474 	 * Check if a link-layer group is already associated
475 	 * with this network-layer group on the given ifnet.
476 	 */
477 	error = if_addmulti(ifp, (struct sockaddr *)&gsin, &ifma);
478 	if (error != 0)
479 		return (error);
480 
481 	/* XXX ifma_protospec must be covered by IF_ADDR_LOCK */
482 	IF_ADDR_WLOCK(ifp);
483 
484 	/*
485 	 * If something other than netinet is occupying the link-layer
486 	 * group, print a meaningful error message and back out of
487 	 * the allocation.
488 	 * Otherwise, bump the refcount on the existing network-layer
489 	 * group association and return it.
490 	 */
491 	if (ifma->ifma_protospec != NULL) {
492 		inm = (struct in_multi *)ifma->ifma_protospec;
493 #ifdef INVARIANTS
494 		KASSERT(ifma->ifma_addr != NULL, ("%s: no ifma_addr",
495 		    __func__));
496 		KASSERT(ifma->ifma_addr->sa_family == AF_INET,
497 		    ("%s: ifma not AF_INET", __func__));
498 		KASSERT(inm != NULL, ("%s: no ifma_protospec", __func__));
499 		if (inm->inm_ifma != ifma || inm->inm_ifp != ifp ||
500 		    !in_hosteq(inm->inm_addr, *group)) {
501 			char addrbuf[INET_ADDRSTRLEN];
502 
503 			panic("%s: ifma %p is inconsistent with %p (%s)",
504 			    __func__, ifma, inm, inet_ntoa_r(*group, addrbuf));
505 		}
506 #endif
507 		++inm->inm_refcount;
508 		*pinm = inm;
509 		IF_ADDR_WUNLOCK(ifp);
510 		return (0);
511 	}
512 
513 	IF_ADDR_WLOCK_ASSERT(ifp);
514 
515 	/*
516 	 * A new in_multi record is needed; allocate and initialize it.
517 	 * We DO NOT perform an IGMP join as the in_ layer may need to
518 	 * push an initial source list down to IGMP to support SSM.
519 	 *
520 	 * The initial source filter state is INCLUDE, {} as per the RFC.
521 	 */
522 	inm = malloc(sizeof(*inm), M_IPMADDR, M_NOWAIT | M_ZERO);
523 	if (inm == NULL) {
524 		IF_ADDR_WUNLOCK(ifp);
525 		if_delmulti_ifma(ifma);
526 		return (ENOMEM);
527 	}
528 	inm->inm_addr = *group;
529 	inm->inm_ifp = ifp;
530 	inm->inm_igi = ii->ii_igmp;
531 	inm->inm_ifma = ifma;
532 	inm->inm_refcount = 1;
533 	inm->inm_state = IGMP_NOT_MEMBER;
534 	mbufq_init(&inm->inm_scq, IGMP_MAX_STATE_CHANGES);
535 	inm->inm_st[0].iss_fmode = MCAST_UNDEFINED;
536 	inm->inm_st[1].iss_fmode = MCAST_UNDEFINED;
537 	RB_INIT(&inm->inm_srcs);
538 
539 	ifma->ifma_protospec = inm;
540 
541 	*pinm = inm;
542 
543 	IF_ADDR_WUNLOCK(ifp);
544 	return (0);
545 }
546 
547 /*
548  * Drop a reference to an in_multi record.
549  *
550  * If the refcount drops to 0, free the in_multi record and
551  * delete the underlying link-layer membership.
552  */
553 void
554 inm_release_locked(struct in_multi *inm)
555 {
556 	struct ifmultiaddr *ifma;
557 
558 	IN_MULTI_LOCK_ASSERT();
559 
560 	CTR2(KTR_IGMPV3, "%s: refcount is %d", __func__, inm->inm_refcount);
561 
562 	if (--inm->inm_refcount > 0) {
563 		CTR2(KTR_IGMPV3, "%s: refcount is now %d", __func__,
564 		    inm->inm_refcount);
565 		return;
566 	}
567 
568 	CTR2(KTR_IGMPV3, "%s: freeing inm %p", __func__, inm);
569 
570 	ifma = inm->inm_ifma;
571 
572 	/* XXX this access is not covered by IF_ADDR_LOCK */
573 	CTR2(KTR_IGMPV3, "%s: purging ifma %p", __func__, ifma);
574 	KASSERT(ifma->ifma_protospec == inm,
575 	    ("%s: ifma_protospec != inm", __func__));
576 	ifma->ifma_protospec = NULL;
577 
578 	inm_purge(inm);
579 
580 	free(inm, M_IPMADDR);
581 
582 	if_delmulti_ifma(ifma);
583 }
584 
585 /*
586  * Clear recorded source entries for a group.
587  * Used by the IGMP code. Caller must hold the IN_MULTI lock.
588  * FIXME: Should reap.
589  */
590 void
591 inm_clear_recorded(struct in_multi *inm)
592 {
593 	struct ip_msource	*ims;
594 
595 	IN_MULTI_LOCK_ASSERT();
596 
597 	RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) {
598 		if (ims->ims_stp) {
599 			ims->ims_stp = 0;
600 			--inm->inm_st[1].iss_rec;
601 		}
602 	}
603 	KASSERT(inm->inm_st[1].iss_rec == 0,
604 	    ("%s: iss_rec %d not 0", __func__, inm->inm_st[1].iss_rec));
605 }
606 
607 /*
608  * Record a source as pending for a Source-Group IGMPv3 query.
609  * This lives here as it modifies the shared tree.
610  *
611  * inm is the group descriptor.
612  * naddr is the address of the source to record in network-byte order.
613  *
614  * If the net.inet.igmp.sgalloc sysctl is non-zero, we will
615  * lazy-allocate a source node in response to an SG query.
616  * Otherwise, no allocation is performed. This saves some memory
617  * with the trade-off that the source will not be reported to the
618  * router if joined in the window between the query response and
619  * the group actually being joined on the local host.
620  *
621  * VIMAGE: XXX: Currently the igmp_sgalloc feature has been removed.
622  * This turns off the allocation of a recorded source entry if
623  * the group has not been joined.
624  *
625  * Return 0 if the source didn't exist or was already marked as recorded.
626  * Return 1 if the source was marked as recorded by this function.
627  * Return <0 if any error occurred (negated errno code).
628  */
629 int
630 inm_record_source(struct in_multi *inm, const in_addr_t naddr)
631 {
632 	struct ip_msource	 find;
633 	struct ip_msource	*ims, *nims;
634 
635 	IN_MULTI_LOCK_ASSERT();
636 
637 	find.ims_haddr = ntohl(naddr);
638 	ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find);
639 	if (ims && ims->ims_stp)
640 		return (0);
641 	if (ims == NULL) {
642 		if (inm->inm_nsrc == in_mcast_maxgrpsrc)
643 			return (-ENOSPC);
644 		nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE,
645 		    M_NOWAIT | M_ZERO);
646 		if (nims == NULL)
647 			return (-ENOMEM);
648 		nims->ims_haddr = find.ims_haddr;
649 		RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims);
650 		++inm->inm_nsrc;
651 		ims = nims;
652 	}
653 
654 	/*
655 	 * Mark the source as recorded and update the recorded
656 	 * source count.
657 	 */
658 	++ims->ims_stp;
659 	++inm->inm_st[1].iss_rec;
660 
661 	return (1);
662 }
663 
664 /*
665  * Return a pointer to an in_msource owned by an in_mfilter,
666  * given its source address.
667  * Lazy-allocate if needed. If this is a new entry its filter state is
668  * undefined at t0.
669  *
670  * imf is the filter set being modified.
671  * haddr is the source address in *host* byte-order.
672  *
673  * SMPng: May be called with locks held; malloc must not block.
674  */
675 static int
676 imf_get_source(struct in_mfilter *imf, const struct sockaddr_in *psin,
677     struct in_msource **plims)
678 {
679 	struct ip_msource	 find;
680 	struct ip_msource	*ims, *nims;
681 	struct in_msource	*lims;
682 	int			 error;
683 
684 	error = 0;
685 	ims = NULL;
686 	lims = NULL;
687 
688 	/* key is host byte order */
689 	find.ims_haddr = ntohl(psin->sin_addr.s_addr);
690 	ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find);
691 	lims = (struct in_msource *)ims;
692 	if (lims == NULL) {
693 		if (imf->imf_nsrc == in_mcast_maxsocksrc)
694 			return (ENOSPC);
695 		nims = malloc(sizeof(struct in_msource), M_INMFILTER,
696 		    M_NOWAIT | M_ZERO);
697 		if (nims == NULL)
698 			return (ENOMEM);
699 		lims = (struct in_msource *)nims;
700 		lims->ims_haddr = find.ims_haddr;
701 		lims->imsl_st[0] = MCAST_UNDEFINED;
702 		RB_INSERT(ip_msource_tree, &imf->imf_sources, nims);
703 		++imf->imf_nsrc;
704 	}
705 
706 	*plims = lims;
707 
708 	return (error);
709 }
710 
711 /*
712  * Graft a source entry into an existing socket-layer filter set,
713  * maintaining any required invariants and checking allocations.
714  *
715  * The source is marked as being in the new filter mode at t1.
716  *
717  * Return the pointer to the new node, otherwise return NULL.
718  */
719 static struct in_msource *
720 imf_graft(struct in_mfilter *imf, const uint8_t st1,
721     const struct sockaddr_in *psin)
722 {
723 	struct ip_msource	*nims;
724 	struct in_msource	*lims;
725 
726 	nims = malloc(sizeof(struct in_msource), M_INMFILTER,
727 	    M_NOWAIT | M_ZERO);
728 	if (nims == NULL)
729 		return (NULL);
730 	lims = (struct in_msource *)nims;
731 	lims->ims_haddr = ntohl(psin->sin_addr.s_addr);
732 	lims->imsl_st[0] = MCAST_UNDEFINED;
733 	lims->imsl_st[1] = st1;
734 	RB_INSERT(ip_msource_tree, &imf->imf_sources, nims);
735 	++imf->imf_nsrc;
736 
737 	return (lims);
738 }
739 
740 /*
741  * Prune a source entry from an existing socket-layer filter set,
742  * maintaining any required invariants and checking allocations.
743  *
744  * The source is marked as being left at t1, it is not freed.
745  *
746  * Return 0 if no error occurred, otherwise return an errno value.
747  */
748 static int
749 imf_prune(struct in_mfilter *imf, const struct sockaddr_in *psin)
750 {
751 	struct ip_msource	 find;
752 	struct ip_msource	*ims;
753 	struct in_msource	*lims;
754 
755 	/* key is host byte order */
756 	find.ims_haddr = ntohl(psin->sin_addr.s_addr);
757 	ims = RB_FIND(ip_msource_tree, &imf->imf_sources, &find);
758 	if (ims == NULL)
759 		return (ENOENT);
760 	lims = (struct in_msource *)ims;
761 	lims->imsl_st[1] = MCAST_UNDEFINED;
762 	return (0);
763 }
764 
765 /*
766  * Revert socket-layer filter set deltas at t1 to t0 state.
767  */
768 static void
769 imf_rollback(struct in_mfilter *imf)
770 {
771 	struct ip_msource	*ims, *tims;
772 	struct in_msource	*lims;
773 
774 	RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) {
775 		lims = (struct in_msource *)ims;
776 		if (lims->imsl_st[0] == lims->imsl_st[1]) {
777 			/* no change at t1 */
778 			continue;
779 		} else if (lims->imsl_st[0] != MCAST_UNDEFINED) {
780 			/* revert change to existing source at t1 */
781 			lims->imsl_st[1] = lims->imsl_st[0];
782 		} else {
783 			/* revert source added t1 */
784 			CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
785 			RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims);
786 			free(ims, M_INMFILTER);
787 			imf->imf_nsrc--;
788 		}
789 	}
790 	imf->imf_st[1] = imf->imf_st[0];
791 }
792 
793 /*
794  * Mark socket-layer filter set as INCLUDE {} at t1.
795  */
796 static void
797 imf_leave(struct in_mfilter *imf)
798 {
799 	struct ip_msource	*ims;
800 	struct in_msource	*lims;
801 
802 	RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
803 		lims = (struct in_msource *)ims;
804 		lims->imsl_st[1] = MCAST_UNDEFINED;
805 	}
806 	imf->imf_st[1] = MCAST_INCLUDE;
807 }
808 
809 /*
810  * Mark socket-layer filter set deltas as committed.
811  */
812 static void
813 imf_commit(struct in_mfilter *imf)
814 {
815 	struct ip_msource	*ims;
816 	struct in_msource	*lims;
817 
818 	RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
819 		lims = (struct in_msource *)ims;
820 		lims->imsl_st[0] = lims->imsl_st[1];
821 	}
822 	imf->imf_st[0] = imf->imf_st[1];
823 }
824 
825 /*
826  * Reap unreferenced sources from socket-layer filter set.
827  */
828 static void
829 imf_reap(struct in_mfilter *imf)
830 {
831 	struct ip_msource	*ims, *tims;
832 	struct in_msource	*lims;
833 
834 	RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) {
835 		lims = (struct in_msource *)ims;
836 		if ((lims->imsl_st[0] == MCAST_UNDEFINED) &&
837 		    (lims->imsl_st[1] == MCAST_UNDEFINED)) {
838 			CTR2(KTR_IGMPV3, "%s: free lims %p", __func__, ims);
839 			RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims);
840 			free(ims, M_INMFILTER);
841 			imf->imf_nsrc--;
842 		}
843 	}
844 }
845 
846 /*
847  * Purge socket-layer filter set.
848  */
849 static void
850 imf_purge(struct in_mfilter *imf)
851 {
852 	struct ip_msource	*ims, *tims;
853 
854 	RB_FOREACH_SAFE(ims, ip_msource_tree, &imf->imf_sources, tims) {
855 		CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
856 		RB_REMOVE(ip_msource_tree, &imf->imf_sources, ims);
857 		free(ims, M_INMFILTER);
858 		imf->imf_nsrc--;
859 	}
860 	imf->imf_st[0] = imf->imf_st[1] = MCAST_UNDEFINED;
861 	KASSERT(RB_EMPTY(&imf->imf_sources),
862 	    ("%s: imf_sources not empty", __func__));
863 }
864 
865 /*
866  * Look up a source filter entry for a multicast group.
867  *
868  * inm is the group descriptor to work with.
869  * haddr is the host-byte-order IPv4 address to look up.
870  * noalloc may be non-zero to suppress allocation of sources.
871  * *pims will be set to the address of the retrieved or allocated source.
872  *
873  * SMPng: NOTE: may be called with locks held.
874  * Return 0 if successful, otherwise return a non-zero error code.
875  */
876 static int
877 inm_get_source(struct in_multi *inm, const in_addr_t haddr,
878     const int noalloc, struct ip_msource **pims)
879 {
880 	struct ip_msource	 find;
881 	struct ip_msource	*ims, *nims;
882 
883 	find.ims_haddr = haddr;
884 	ims = RB_FIND(ip_msource_tree, &inm->inm_srcs, &find);
885 	if (ims == NULL && !noalloc) {
886 		if (inm->inm_nsrc == in_mcast_maxgrpsrc)
887 			return (ENOSPC);
888 		nims = malloc(sizeof(struct ip_msource), M_IPMSOURCE,
889 		    M_NOWAIT | M_ZERO);
890 		if (nims == NULL)
891 			return (ENOMEM);
892 		nims->ims_haddr = haddr;
893 		RB_INSERT(ip_msource_tree, &inm->inm_srcs, nims);
894 		++inm->inm_nsrc;
895 		ims = nims;
896 #ifdef KTR
897 		CTR3(KTR_IGMPV3, "%s: allocated 0x%08x as %p", __func__,
898 		    haddr, ims);
899 #endif
900 	}
901 
902 	*pims = ims;
903 	return (0);
904 }
905 
906 /*
907  * Merge socket-layer source into IGMP-layer source.
908  * If rollback is non-zero, perform the inverse of the merge.
909  */
910 static void
911 ims_merge(struct ip_msource *ims, const struct in_msource *lims,
912     const int rollback)
913 {
914 	int n = rollback ? -1 : 1;
915 
916 	if (lims->imsl_st[0] == MCAST_EXCLUDE) {
917 		CTR3(KTR_IGMPV3, "%s: t1 ex -= %d on 0x%08x",
918 		    __func__, n, ims->ims_haddr);
919 		ims->ims_st[1].ex -= n;
920 	} else if (lims->imsl_st[0] == MCAST_INCLUDE) {
921 		CTR3(KTR_IGMPV3, "%s: t1 in -= %d on 0x%08x",
922 		    __func__, n, ims->ims_haddr);
923 		ims->ims_st[1].in -= n;
924 	}
925 
926 	if (lims->imsl_st[1] == MCAST_EXCLUDE) {
927 		CTR3(KTR_IGMPV3, "%s: t1 ex += %d on 0x%08x",
928 		    __func__, n, ims->ims_haddr);
929 		ims->ims_st[1].ex += n;
930 	} else if (lims->imsl_st[1] == MCAST_INCLUDE) {
931 		CTR3(KTR_IGMPV3, "%s: t1 in += %d on 0x%08x",
932 		    __func__, n, ims->ims_haddr);
933 		ims->ims_st[1].in += n;
934 	}
935 }
936 
937 /*
938  * Atomically update the global in_multi state, when a membership's
939  * filter list is being updated in any way.
940  *
941  * imf is the per-inpcb-membership group filter pointer.
942  * A fake imf may be passed for in-kernel consumers.
943  *
944  * XXX This is a candidate for a set-symmetric-difference style loop
945  * which would eliminate the repeated lookup from root of ims nodes,
946  * as they share the same key space.
947  *
948  * If any error occurred this function will back out of refcounts
949  * and return a non-zero value.
950  */
951 static int
952 inm_merge(struct in_multi *inm, /*const*/ struct in_mfilter *imf)
953 {
954 	struct ip_msource	*ims, *nims;
955 	struct in_msource	*lims;
956 	int			 schanged, error;
957 	int			 nsrc0, nsrc1;
958 
959 	schanged = 0;
960 	error = 0;
961 	nsrc1 = nsrc0 = 0;
962 
963 	/*
964 	 * Update the source filters first, as this may fail.
965 	 * Maintain count of in-mode filters at t0, t1. These are
966 	 * used to work out if we transition into ASM mode or not.
967 	 * Maintain a count of source filters whose state was
968 	 * actually modified by this operation.
969 	 */
970 	RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
971 		lims = (struct in_msource *)ims;
972 		if (lims->imsl_st[0] == imf->imf_st[0]) nsrc0++;
973 		if (lims->imsl_st[1] == imf->imf_st[1]) nsrc1++;
974 		if (lims->imsl_st[0] == lims->imsl_st[1]) continue;
975 		error = inm_get_source(inm, lims->ims_haddr, 0, &nims);
976 		++schanged;
977 		if (error)
978 			break;
979 		ims_merge(nims, lims, 0);
980 	}
981 	if (error) {
982 		struct ip_msource *bims;
983 
984 		RB_FOREACH_REVERSE_FROM(ims, ip_msource_tree, nims) {
985 			lims = (struct in_msource *)ims;
986 			if (lims->imsl_st[0] == lims->imsl_st[1])
987 				continue;
988 			(void)inm_get_source(inm, lims->ims_haddr, 1, &bims);
989 			if (bims == NULL)
990 				continue;
991 			ims_merge(bims, lims, 1);
992 		}
993 		goto out_reap;
994 	}
995 
996 	CTR3(KTR_IGMPV3, "%s: imf filters in-mode: %d at t0, %d at t1",
997 	    __func__, nsrc0, nsrc1);
998 
999 	/* Handle transition between INCLUDE {n} and INCLUDE {} on socket. */
1000 	if (imf->imf_st[0] == imf->imf_st[1] &&
1001 	    imf->imf_st[1] == MCAST_INCLUDE) {
1002 		if (nsrc1 == 0) {
1003 			CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__);
1004 			--inm->inm_st[1].iss_in;
1005 		}
1006 	}
1007 
1008 	/* Handle filter mode transition on socket. */
1009 	if (imf->imf_st[0] != imf->imf_st[1]) {
1010 		CTR3(KTR_IGMPV3, "%s: imf transition %d to %d",
1011 		    __func__, imf->imf_st[0], imf->imf_st[1]);
1012 
1013 		if (imf->imf_st[0] == MCAST_EXCLUDE) {
1014 			CTR1(KTR_IGMPV3, "%s: --ex on inm at t1", __func__);
1015 			--inm->inm_st[1].iss_ex;
1016 		} else if (imf->imf_st[0] == MCAST_INCLUDE) {
1017 			CTR1(KTR_IGMPV3, "%s: --in on inm at t1", __func__);
1018 			--inm->inm_st[1].iss_in;
1019 		}
1020 
1021 		if (imf->imf_st[1] == MCAST_EXCLUDE) {
1022 			CTR1(KTR_IGMPV3, "%s: ex++ on inm at t1", __func__);
1023 			inm->inm_st[1].iss_ex++;
1024 		} else if (imf->imf_st[1] == MCAST_INCLUDE && nsrc1 > 0) {
1025 			CTR1(KTR_IGMPV3, "%s: in++ on inm at t1", __func__);
1026 			inm->inm_st[1].iss_in++;
1027 		}
1028 	}
1029 
1030 	/*
1031 	 * Track inm filter state in terms of listener counts.
1032 	 * If there are any exclusive listeners, stack-wide
1033 	 * membership is exclusive.
1034 	 * Otherwise, if only inclusive listeners, stack-wide is inclusive.
1035 	 * If no listeners remain, state is undefined at t1,
1036 	 * and the IGMP lifecycle for this group should finish.
1037 	 */
1038 	if (inm->inm_st[1].iss_ex > 0) {
1039 		CTR1(KTR_IGMPV3, "%s: transition to EX", __func__);
1040 		inm->inm_st[1].iss_fmode = MCAST_EXCLUDE;
1041 	} else if (inm->inm_st[1].iss_in > 0) {
1042 		CTR1(KTR_IGMPV3, "%s: transition to IN", __func__);
1043 		inm->inm_st[1].iss_fmode = MCAST_INCLUDE;
1044 	} else {
1045 		CTR1(KTR_IGMPV3, "%s: transition to UNDEF", __func__);
1046 		inm->inm_st[1].iss_fmode = MCAST_UNDEFINED;
1047 	}
1048 
1049 	/* Decrement ASM listener count on transition out of ASM mode. */
1050 	if (imf->imf_st[0] == MCAST_EXCLUDE && nsrc0 == 0) {
1051 		if ((imf->imf_st[1] != MCAST_EXCLUDE) ||
1052 		    (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 > 0)) {
1053 			CTR1(KTR_IGMPV3, "%s: --asm on inm at t1", __func__);
1054 			--inm->inm_st[1].iss_asm;
1055 		}
1056 	}
1057 
1058 	/* Increment ASM listener count on transition to ASM mode. */
1059 	if (imf->imf_st[1] == MCAST_EXCLUDE && nsrc1 == 0) {
1060 		CTR1(KTR_IGMPV3, "%s: asm++ on inm at t1", __func__);
1061 		inm->inm_st[1].iss_asm++;
1062 	}
1063 
1064 	CTR3(KTR_IGMPV3, "%s: merged imf %p to inm %p", __func__, imf, inm);
1065 	inm_print(inm);
1066 
1067 out_reap:
1068 	if (schanged > 0) {
1069 		CTR1(KTR_IGMPV3, "%s: sources changed; reaping", __func__);
1070 		inm_reap(inm);
1071 	}
1072 	return (error);
1073 }
1074 
1075 /*
1076  * Mark an in_multi's filter set deltas as committed.
1077  * Called by IGMP after a state change has been enqueued.
1078  */
1079 void
1080 inm_commit(struct in_multi *inm)
1081 {
1082 	struct ip_msource	*ims;
1083 
1084 	CTR2(KTR_IGMPV3, "%s: commit inm %p", __func__, inm);
1085 	CTR1(KTR_IGMPV3, "%s: pre commit:", __func__);
1086 	inm_print(inm);
1087 
1088 	RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) {
1089 		ims->ims_st[0] = ims->ims_st[1];
1090 	}
1091 	inm->inm_st[0] = inm->inm_st[1];
1092 }
1093 
1094 /*
1095  * Reap unreferenced nodes from an in_multi's filter set.
1096  */
1097 static void
1098 inm_reap(struct in_multi *inm)
1099 {
1100 	struct ip_msource	*ims, *tims;
1101 
1102 	RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) {
1103 		if (ims->ims_st[0].ex > 0 || ims->ims_st[0].in > 0 ||
1104 		    ims->ims_st[1].ex > 0 || ims->ims_st[1].in > 0 ||
1105 		    ims->ims_stp != 0)
1106 			continue;
1107 		CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
1108 		RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims);
1109 		free(ims, M_IPMSOURCE);
1110 		inm->inm_nsrc--;
1111 	}
1112 }
1113 
1114 /*
1115  * Purge all source nodes from an in_multi's filter set.
1116  */
1117 static void
1118 inm_purge(struct in_multi *inm)
1119 {
1120 	struct ip_msource	*ims, *tims;
1121 
1122 	RB_FOREACH_SAFE(ims, ip_msource_tree, &inm->inm_srcs, tims) {
1123 		CTR2(KTR_IGMPV3, "%s: free ims %p", __func__, ims);
1124 		RB_REMOVE(ip_msource_tree, &inm->inm_srcs, ims);
1125 		free(ims, M_IPMSOURCE);
1126 		inm->inm_nsrc--;
1127 	}
1128 }
1129 
1130 /*
1131  * Join a multicast group; unlocked entry point.
1132  *
1133  * SMPng: XXX: in_joingroup() is called from in_control() when Giant
1134  * is not held. Fortunately, ifp is unlikely to have been detached
1135  * at this point, so we assume it's OK to recurse.
1136  */
1137 int
1138 in_joingroup(struct ifnet *ifp, const struct in_addr *gina,
1139     /*const*/ struct in_mfilter *imf, struct in_multi **pinm)
1140 {
1141 	int error;
1142 
1143 	IN_MULTI_LOCK();
1144 	error = in_joingroup_locked(ifp, gina, imf, pinm);
1145 	IN_MULTI_UNLOCK();
1146 
1147 	return (error);
1148 }
1149 
1150 /*
1151  * Join a multicast group; real entry point.
1152  *
1153  * Only preserves atomicity at inm level.
1154  * NOTE: imf argument cannot be const due to sys/tree.h limitations.
1155  *
1156  * If the IGMP downcall fails, the group is not joined, and an error
1157  * code is returned.
1158  */
1159 int
1160 in_joingroup_locked(struct ifnet *ifp, const struct in_addr *gina,
1161     /*const*/ struct in_mfilter *imf, struct in_multi **pinm)
1162 {
1163 	struct in_mfilter	 timf;
1164 	struct in_multi		*inm;
1165 	int			 error;
1166 
1167 	IN_MULTI_LOCK_ASSERT();
1168 
1169 	CTR4(KTR_IGMPV3, "%s: join 0x%08x on %p(%s))", __func__,
1170 	    ntohl(gina->s_addr), ifp, ifp->if_xname);
1171 
1172 	error = 0;
1173 	inm = NULL;
1174 
1175 	/*
1176 	 * If no imf was specified (i.e. kernel consumer),
1177 	 * fake one up and assume it is an ASM join.
1178 	 */
1179 	if (imf == NULL) {
1180 		imf_init(&timf, MCAST_UNDEFINED, MCAST_EXCLUDE);
1181 		imf = &timf;
1182 	}
1183 
1184 	error = in_getmulti(ifp, gina, &inm);
1185 	if (error) {
1186 		CTR1(KTR_IGMPV3, "%s: in_getmulti() failure", __func__);
1187 		return (error);
1188 	}
1189 
1190 	CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
1191 	error = inm_merge(inm, imf);
1192 	if (error) {
1193 		CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__);
1194 		goto out_inm_release;
1195 	}
1196 
1197 	CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
1198 	error = igmp_change_state(inm);
1199 	if (error) {
1200 		CTR1(KTR_IGMPV3, "%s: failed to update source", __func__);
1201 		goto out_inm_release;
1202 	}
1203 
1204 out_inm_release:
1205 	if (error) {
1206 		CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm);
1207 		inm_release_locked(inm);
1208 	} else {
1209 		*pinm = inm;
1210 	}
1211 
1212 	return (error);
1213 }
1214 
1215 /*
1216  * Leave a multicast group; unlocked entry point.
1217  */
1218 int
1219 in_leavegroup(struct in_multi *inm, /*const*/ struct in_mfilter *imf)
1220 {
1221 	int error;
1222 
1223 	IN_MULTI_LOCK();
1224 	error = in_leavegroup_locked(inm, imf);
1225 	IN_MULTI_UNLOCK();
1226 
1227 	return (error);
1228 }
1229 
1230 /*
1231  * Leave a multicast group; real entry point.
1232  * All source filters will be expunged.
1233  *
1234  * Only preserves atomicity at inm level.
1235  *
1236  * Holding the write lock for the INP which contains imf
1237  * is highly advisable. We can't assert for it as imf does not
1238  * contain a back-pointer to the owning inp.
1239  *
1240  * Note: This is not the same as inm_release(*) as this function also
1241  * makes a state change downcall into IGMP.
1242  */
1243 int
1244 in_leavegroup_locked(struct in_multi *inm, /*const*/ struct in_mfilter *imf)
1245 {
1246 	struct in_mfilter	 timf;
1247 	int			 error;
1248 
1249 	error = 0;
1250 
1251 	IN_MULTI_LOCK_ASSERT();
1252 
1253 	CTR5(KTR_IGMPV3, "%s: leave inm %p, 0x%08x/%s, imf %p", __func__,
1254 	    inm, ntohl(inm->inm_addr.s_addr),
1255 	    (inm_is_ifp_detached(inm) ? "null" : inm->inm_ifp->if_xname),
1256 	    imf);
1257 
1258 	/*
1259 	 * If no imf was specified (i.e. kernel consumer),
1260 	 * fake one up and assume it is an ASM join.
1261 	 */
1262 	if (imf == NULL) {
1263 		imf_init(&timf, MCAST_EXCLUDE, MCAST_UNDEFINED);
1264 		imf = &timf;
1265 	}
1266 
1267 	/*
1268 	 * Begin state merge transaction at IGMP layer.
1269 	 *
1270 	 * As this particular invocation should not cause any memory
1271 	 * to be allocated, and there is no opportunity to roll back
1272 	 * the transaction, it MUST NOT fail.
1273 	 */
1274 	CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
1275 	error = inm_merge(inm, imf);
1276 	KASSERT(error == 0, ("%s: failed to merge inm state", __func__));
1277 
1278 	CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
1279 	CURVNET_SET(inm->inm_ifp->if_vnet);
1280 	error = igmp_change_state(inm);
1281 	CURVNET_RESTORE();
1282 	if (error)
1283 		CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
1284 
1285 	CTR2(KTR_IGMPV3, "%s: dropping ref on %p", __func__, inm);
1286 	inm_release_locked(inm);
1287 
1288 	return (error);
1289 }
1290 
1291 /*#ifndef BURN_BRIDGES*/
1292 /*
1293  * Join an IPv4 multicast group in (*,G) exclusive mode.
1294  * The group must be a 224.0.0.0/24 link-scope group.
1295  * This KPI is for legacy kernel consumers only.
1296  */
1297 struct in_multi *
1298 in_addmulti(struct in_addr *ap, struct ifnet *ifp)
1299 {
1300 	struct in_multi *pinm;
1301 	int error;
1302 #ifdef INVARIANTS
1303 	char addrbuf[INET_ADDRSTRLEN];
1304 #endif
1305 
1306 	KASSERT(IN_LOCAL_GROUP(ntohl(ap->s_addr)),
1307 	    ("%s: %s not in 224.0.0.0/24", __func__,
1308 	    inet_ntoa_r(*ap, addrbuf)));
1309 
1310 	error = in_joingroup(ifp, ap, NULL, &pinm);
1311 	if (error != 0)
1312 		pinm = NULL;
1313 
1314 	return (pinm);
1315 }
1316 
1317 /*
1318  * Leave an IPv4 multicast group, assumed to be in exclusive (*,G) mode.
1319  * This KPI is for legacy kernel consumers only.
1320  */
1321 void
1322 in_delmulti(struct in_multi *inm)
1323 {
1324 
1325 	(void)in_leavegroup(inm, NULL);
1326 }
1327 /*#endif*/
1328 
1329 /*
1330  * Block or unblock an ASM multicast source on an inpcb.
1331  * This implements the delta-based API described in RFC 3678.
1332  *
1333  * The delta-based API applies only to exclusive-mode memberships.
1334  * An IGMP downcall will be performed.
1335  *
1336  * SMPng: NOTE: Must take Giant as a join may create a new ifma.
1337  *
1338  * Return 0 if successful, otherwise return an appropriate error code.
1339  */
1340 static int
1341 inp_block_unblock_source(struct inpcb *inp, struct sockopt *sopt)
1342 {
1343 	struct group_source_req		 gsr;
1344 	sockunion_t			*gsa, *ssa;
1345 	struct ifnet			*ifp;
1346 	struct in_mfilter		*imf;
1347 	struct ip_moptions		*imo;
1348 	struct in_msource		*ims;
1349 	struct in_multi			*inm;
1350 	size_t				 idx;
1351 	uint16_t			 fmode;
1352 	int				 error, doblock;
1353 
1354 	ifp = NULL;
1355 	error = 0;
1356 	doblock = 0;
1357 
1358 	memset(&gsr, 0, sizeof(struct group_source_req));
1359 	gsa = (sockunion_t *)&gsr.gsr_group;
1360 	ssa = (sockunion_t *)&gsr.gsr_source;
1361 
1362 	switch (sopt->sopt_name) {
1363 	case IP_BLOCK_SOURCE:
1364 	case IP_UNBLOCK_SOURCE: {
1365 		struct ip_mreq_source	 mreqs;
1366 
1367 		error = sooptcopyin(sopt, &mreqs,
1368 		    sizeof(struct ip_mreq_source),
1369 		    sizeof(struct ip_mreq_source));
1370 		if (error)
1371 			return (error);
1372 
1373 		gsa->sin.sin_family = AF_INET;
1374 		gsa->sin.sin_len = sizeof(struct sockaddr_in);
1375 		gsa->sin.sin_addr = mreqs.imr_multiaddr;
1376 
1377 		ssa->sin.sin_family = AF_INET;
1378 		ssa->sin.sin_len = sizeof(struct sockaddr_in);
1379 		ssa->sin.sin_addr = mreqs.imr_sourceaddr;
1380 
1381 		if (!in_nullhost(mreqs.imr_interface))
1382 			INADDR_TO_IFP(mreqs.imr_interface, ifp);
1383 
1384 		if (sopt->sopt_name == IP_BLOCK_SOURCE)
1385 			doblock = 1;
1386 
1387 		CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p",
1388 		    __func__, ntohl(mreqs.imr_interface.s_addr), ifp);
1389 		break;
1390 	    }
1391 
1392 	case MCAST_BLOCK_SOURCE:
1393 	case MCAST_UNBLOCK_SOURCE:
1394 		error = sooptcopyin(sopt, &gsr,
1395 		    sizeof(struct group_source_req),
1396 		    sizeof(struct group_source_req));
1397 		if (error)
1398 			return (error);
1399 
1400 		if (gsa->sin.sin_family != AF_INET ||
1401 		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
1402 			return (EINVAL);
1403 
1404 		if (ssa->sin.sin_family != AF_INET ||
1405 		    ssa->sin.sin_len != sizeof(struct sockaddr_in))
1406 			return (EINVAL);
1407 
1408 		if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
1409 			return (EADDRNOTAVAIL);
1410 
1411 		ifp = ifnet_byindex(gsr.gsr_interface);
1412 
1413 		if (sopt->sopt_name == MCAST_BLOCK_SOURCE)
1414 			doblock = 1;
1415 		break;
1416 
1417 	default:
1418 		CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d",
1419 		    __func__, sopt->sopt_name);
1420 		return (EOPNOTSUPP);
1421 		break;
1422 	}
1423 
1424 	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
1425 		return (EINVAL);
1426 
1427 	/*
1428 	 * Check if we are actually a member of this group.
1429 	 */
1430 	imo = inp_findmoptions(inp);
1431 	idx = imo_match_group(imo, ifp, &gsa->sa);
1432 	if (idx == -1 || imo->imo_mfilters == NULL) {
1433 		error = EADDRNOTAVAIL;
1434 		goto out_inp_locked;
1435 	}
1436 
1437 	KASSERT(imo->imo_mfilters != NULL,
1438 	    ("%s: imo_mfilters not allocated", __func__));
1439 	imf = &imo->imo_mfilters[idx];
1440 	inm = imo->imo_membership[idx];
1441 
1442 	/*
1443 	 * Attempting to use the delta-based API on an
1444 	 * non exclusive-mode membership is an error.
1445 	 */
1446 	fmode = imf->imf_st[0];
1447 	if (fmode != MCAST_EXCLUDE) {
1448 		error = EINVAL;
1449 		goto out_inp_locked;
1450 	}
1451 
1452 	/*
1453 	 * Deal with error cases up-front:
1454 	 *  Asked to block, but already blocked; or
1455 	 *  Asked to unblock, but nothing to unblock.
1456 	 * If adding a new block entry, allocate it.
1457 	 */
1458 	ims = imo_match_source(imo, idx, &ssa->sa);
1459 	if ((ims != NULL && doblock) || (ims == NULL && !doblock)) {
1460 		CTR3(KTR_IGMPV3, "%s: source 0x%08x %spresent", __func__,
1461 		    ntohl(ssa->sin.sin_addr.s_addr), doblock ? "" : "not ");
1462 		error = EADDRNOTAVAIL;
1463 		goto out_inp_locked;
1464 	}
1465 
1466 	INP_WLOCK_ASSERT(inp);
1467 
1468 	/*
1469 	 * Begin state merge transaction at socket layer.
1470 	 */
1471 	if (doblock) {
1472 		CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block");
1473 		ims = imf_graft(imf, fmode, &ssa->sin);
1474 		if (ims == NULL)
1475 			error = ENOMEM;
1476 	} else {
1477 		CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow");
1478 		error = imf_prune(imf, &ssa->sin);
1479 	}
1480 
1481 	if (error) {
1482 		CTR1(KTR_IGMPV3, "%s: merge imf state failed", __func__);
1483 		goto out_imf_rollback;
1484 	}
1485 
1486 	/*
1487 	 * Begin state merge transaction at IGMP layer.
1488 	 */
1489 	IN_MULTI_LOCK();
1490 
1491 	CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
1492 	error = inm_merge(inm, imf);
1493 	if (error) {
1494 		CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__);
1495 		goto out_in_multi_locked;
1496 	}
1497 
1498 	CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
1499 	error = igmp_change_state(inm);
1500 	if (error)
1501 		CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
1502 
1503 out_in_multi_locked:
1504 
1505 	IN_MULTI_UNLOCK();
1506 
1507 out_imf_rollback:
1508 	if (error)
1509 		imf_rollback(imf);
1510 	else
1511 		imf_commit(imf);
1512 
1513 	imf_reap(imf);
1514 
1515 out_inp_locked:
1516 	INP_WUNLOCK(inp);
1517 	return (error);
1518 }
1519 
1520 /*
1521  * Given an inpcb, return its multicast options structure pointer.  Accepts
1522  * an unlocked inpcb pointer, but will return it locked.  May sleep.
1523  *
1524  * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held.
1525  * SMPng: NOTE: Returns with the INP write lock held.
1526  */
1527 static struct ip_moptions *
1528 inp_findmoptions(struct inpcb *inp)
1529 {
1530 	struct ip_moptions	 *imo;
1531 	struct in_multi		**immp;
1532 	struct in_mfilter	 *imfp;
1533 	size_t			  idx;
1534 
1535 	INP_WLOCK(inp);
1536 	if (inp->inp_moptions != NULL)
1537 		return (inp->inp_moptions);
1538 
1539 	INP_WUNLOCK(inp);
1540 
1541 	imo = malloc(sizeof(*imo), M_IPMOPTS, M_WAITOK);
1542 	immp = malloc(sizeof(*immp) * IP_MIN_MEMBERSHIPS, M_IPMOPTS,
1543 	    M_WAITOK | M_ZERO);
1544 	imfp = malloc(sizeof(struct in_mfilter) * IP_MIN_MEMBERSHIPS,
1545 	    M_INMFILTER, M_WAITOK);
1546 
1547 	imo->imo_multicast_ifp = NULL;
1548 	imo->imo_multicast_addr.s_addr = INADDR_ANY;
1549 	imo->imo_multicast_vif = -1;
1550 	imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1551 	imo->imo_multicast_loop = in_mcast_loop;
1552 	imo->imo_num_memberships = 0;
1553 	imo->imo_max_memberships = IP_MIN_MEMBERSHIPS;
1554 	imo->imo_membership = immp;
1555 
1556 	/* Initialize per-group source filters. */
1557 	for (idx = 0; idx < IP_MIN_MEMBERSHIPS; idx++)
1558 		imf_init(&imfp[idx], MCAST_UNDEFINED, MCAST_EXCLUDE);
1559 	imo->imo_mfilters = imfp;
1560 
1561 	INP_WLOCK(inp);
1562 	if (inp->inp_moptions != NULL) {
1563 		free(imfp, M_INMFILTER);
1564 		free(immp, M_IPMOPTS);
1565 		free(imo, M_IPMOPTS);
1566 		return (inp->inp_moptions);
1567 	}
1568 	inp->inp_moptions = imo;
1569 	return (imo);
1570 }
1571 
1572 /*
1573  * Discard the IP multicast options (and source filters).  To minimize
1574  * the amount of work done while holding locks such as the INP's
1575  * pcbinfo lock (which is used in the receive path), the free
1576  * operation is performed asynchronously in a separate task.
1577  *
1578  * SMPng: NOTE: assumes INP write lock is held.
1579  */
1580 void
1581 inp_freemoptions(struct ip_moptions *imo)
1582 {
1583 
1584 	KASSERT(imo != NULL, ("%s: ip_moptions is NULL", __func__));
1585 	IN_MULTI_LOCK();
1586 	STAILQ_INSERT_TAIL(&imo_gc_list, imo, imo_link);
1587 	IN_MULTI_UNLOCK();
1588 	taskqueue_enqueue(taskqueue_thread, &imo_gc_task);
1589 }
1590 
1591 static void
1592 inp_freemoptions_internal(struct ip_moptions *imo)
1593 {
1594 	struct in_mfilter	*imf;
1595 	size_t			 idx, nmships;
1596 
1597 	nmships = imo->imo_num_memberships;
1598 	for (idx = 0; idx < nmships; ++idx) {
1599 		imf = imo->imo_mfilters ? &imo->imo_mfilters[idx] : NULL;
1600 		if (imf)
1601 			imf_leave(imf);
1602 		(void)in_leavegroup(imo->imo_membership[idx], imf);
1603 		if (imf)
1604 			imf_purge(imf);
1605 	}
1606 
1607 	if (imo->imo_mfilters)
1608 		free(imo->imo_mfilters, M_INMFILTER);
1609 	free(imo->imo_membership, M_IPMOPTS);
1610 	free(imo, M_IPMOPTS);
1611 }
1612 
1613 static void
1614 inp_gcmoptions(void *context, int pending)
1615 {
1616 	struct ip_moptions *imo;
1617 
1618 	IN_MULTI_LOCK();
1619 	while (!STAILQ_EMPTY(&imo_gc_list)) {
1620 		imo = STAILQ_FIRST(&imo_gc_list);
1621 		STAILQ_REMOVE_HEAD(&imo_gc_list, imo_link);
1622 		IN_MULTI_UNLOCK();
1623 		inp_freemoptions_internal(imo);
1624 		IN_MULTI_LOCK();
1625 	}
1626 	IN_MULTI_UNLOCK();
1627 }
1628 
1629 /*
1630  * Atomically get source filters on a socket for an IPv4 multicast group.
1631  * Called with INP lock held; returns with lock released.
1632  */
1633 static int
1634 inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt)
1635 {
1636 	struct __msfilterreq	 msfr;
1637 	sockunion_t		*gsa;
1638 	struct ifnet		*ifp;
1639 	struct ip_moptions	*imo;
1640 	struct in_mfilter	*imf;
1641 	struct ip_msource	*ims;
1642 	struct in_msource	*lims;
1643 	struct sockaddr_in	*psin;
1644 	struct sockaddr_storage	*ptss;
1645 	struct sockaddr_storage	*tss;
1646 	int			 error;
1647 	size_t			 idx, nsrcs, ncsrcs;
1648 
1649 	INP_WLOCK_ASSERT(inp);
1650 
1651 	imo = inp->inp_moptions;
1652 	KASSERT(imo != NULL, ("%s: null ip_moptions", __func__));
1653 
1654 	INP_WUNLOCK(inp);
1655 
1656 	error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq),
1657 	    sizeof(struct __msfilterreq));
1658 	if (error)
1659 		return (error);
1660 
1661 	if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex)
1662 		return (EINVAL);
1663 
1664 	ifp = ifnet_byindex(msfr.msfr_ifindex);
1665 	if (ifp == NULL)
1666 		return (EINVAL);
1667 
1668 	INP_WLOCK(inp);
1669 
1670 	/*
1671 	 * Lookup group on the socket.
1672 	 */
1673 	gsa = (sockunion_t *)&msfr.msfr_group;
1674 	idx = imo_match_group(imo, ifp, &gsa->sa);
1675 	if (idx == -1 || imo->imo_mfilters == NULL) {
1676 		INP_WUNLOCK(inp);
1677 		return (EADDRNOTAVAIL);
1678 	}
1679 	imf = &imo->imo_mfilters[idx];
1680 
1681 	/*
1682 	 * Ignore memberships which are in limbo.
1683 	 */
1684 	if (imf->imf_st[1] == MCAST_UNDEFINED) {
1685 		INP_WUNLOCK(inp);
1686 		return (EAGAIN);
1687 	}
1688 	msfr.msfr_fmode = imf->imf_st[1];
1689 
1690 	/*
1691 	 * If the user specified a buffer, copy out the source filter
1692 	 * entries to userland gracefully.
1693 	 * We only copy out the number of entries which userland
1694 	 * has asked for, but we always tell userland how big the
1695 	 * buffer really needs to be.
1696 	 */
1697 	if (msfr.msfr_nsrcs > in_mcast_maxsocksrc)
1698 		msfr.msfr_nsrcs = in_mcast_maxsocksrc;
1699 	tss = NULL;
1700 	if (msfr.msfr_srcs != NULL && msfr.msfr_nsrcs > 0) {
1701 		tss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
1702 		    M_TEMP, M_NOWAIT | M_ZERO);
1703 		if (tss == NULL) {
1704 			INP_WUNLOCK(inp);
1705 			return (ENOBUFS);
1706 		}
1707 	}
1708 
1709 	/*
1710 	 * Count number of sources in-mode at t0.
1711 	 * If buffer space exists and remains, copy out source entries.
1712 	 */
1713 	nsrcs = msfr.msfr_nsrcs;
1714 	ncsrcs = 0;
1715 	ptss = tss;
1716 	RB_FOREACH(ims, ip_msource_tree, &imf->imf_sources) {
1717 		lims = (struct in_msource *)ims;
1718 		if (lims->imsl_st[0] == MCAST_UNDEFINED ||
1719 		    lims->imsl_st[0] != imf->imf_st[0])
1720 			continue;
1721 		++ncsrcs;
1722 		if (tss != NULL && nsrcs > 0) {
1723 			psin = (struct sockaddr_in *)ptss;
1724 			psin->sin_family = AF_INET;
1725 			psin->sin_len = sizeof(struct sockaddr_in);
1726 			psin->sin_addr.s_addr = htonl(lims->ims_haddr);
1727 			psin->sin_port = 0;
1728 			++ptss;
1729 			--nsrcs;
1730 		}
1731 	}
1732 
1733 	INP_WUNLOCK(inp);
1734 
1735 	if (tss != NULL) {
1736 		error = copyout(tss, msfr.msfr_srcs,
1737 		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
1738 		free(tss, M_TEMP);
1739 		if (error)
1740 			return (error);
1741 	}
1742 
1743 	msfr.msfr_nsrcs = ncsrcs;
1744 	error = sooptcopyout(sopt, &msfr, sizeof(struct __msfilterreq));
1745 
1746 	return (error);
1747 }
1748 
1749 /*
1750  * Return the IP multicast options in response to user getsockopt().
1751  */
1752 int
1753 inp_getmoptions(struct inpcb *inp, struct sockopt *sopt)
1754 {
1755 	struct rm_priotracker	 in_ifa_tracker;
1756 	struct ip_mreqn		 mreqn;
1757 	struct ip_moptions	*imo;
1758 	struct ifnet		*ifp;
1759 	struct in_ifaddr	*ia;
1760 	int			 error, optval;
1761 	u_char			 coptval;
1762 
1763 	INP_WLOCK(inp);
1764 	imo = inp->inp_moptions;
1765 	/*
1766 	 * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
1767 	 * or is a divert socket, reject it.
1768 	 */
1769 	if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
1770 	    (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
1771 	    inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) {
1772 		INP_WUNLOCK(inp);
1773 		return (EOPNOTSUPP);
1774 	}
1775 
1776 	error = 0;
1777 	switch (sopt->sopt_name) {
1778 	case IP_MULTICAST_VIF:
1779 		if (imo != NULL)
1780 			optval = imo->imo_multicast_vif;
1781 		else
1782 			optval = -1;
1783 		INP_WUNLOCK(inp);
1784 		error = sooptcopyout(sopt, &optval, sizeof(int));
1785 		break;
1786 
1787 	case IP_MULTICAST_IF:
1788 		memset(&mreqn, 0, sizeof(struct ip_mreqn));
1789 		if (imo != NULL) {
1790 			ifp = imo->imo_multicast_ifp;
1791 			if (!in_nullhost(imo->imo_multicast_addr)) {
1792 				mreqn.imr_address = imo->imo_multicast_addr;
1793 			} else if (ifp != NULL) {
1794 				mreqn.imr_ifindex = ifp->if_index;
1795 				IFP_TO_IA(ifp, ia, &in_ifa_tracker);
1796 				if (ia != NULL) {
1797 					mreqn.imr_address =
1798 					    IA_SIN(ia)->sin_addr;
1799 					ifa_free(&ia->ia_ifa);
1800 				}
1801 			}
1802 		}
1803 		INP_WUNLOCK(inp);
1804 		if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) {
1805 			error = sooptcopyout(sopt, &mreqn,
1806 			    sizeof(struct ip_mreqn));
1807 		} else {
1808 			error = sooptcopyout(sopt, &mreqn.imr_address,
1809 			    sizeof(struct in_addr));
1810 		}
1811 		break;
1812 
1813 	case IP_MULTICAST_TTL:
1814 		if (imo == NULL)
1815 			optval = coptval = IP_DEFAULT_MULTICAST_TTL;
1816 		else
1817 			optval = coptval = imo->imo_multicast_ttl;
1818 		INP_WUNLOCK(inp);
1819 		if (sopt->sopt_valsize == sizeof(u_char))
1820 			error = sooptcopyout(sopt, &coptval, sizeof(u_char));
1821 		else
1822 			error = sooptcopyout(sopt, &optval, sizeof(int));
1823 		break;
1824 
1825 	case IP_MULTICAST_LOOP:
1826 		if (imo == NULL)
1827 			optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
1828 		else
1829 			optval = coptval = imo->imo_multicast_loop;
1830 		INP_WUNLOCK(inp);
1831 		if (sopt->sopt_valsize == sizeof(u_char))
1832 			error = sooptcopyout(sopt, &coptval, sizeof(u_char));
1833 		else
1834 			error = sooptcopyout(sopt, &optval, sizeof(int));
1835 		break;
1836 
1837 	case IP_MSFILTER:
1838 		if (imo == NULL) {
1839 			error = EADDRNOTAVAIL;
1840 			INP_WUNLOCK(inp);
1841 		} else {
1842 			error = inp_get_source_filters(inp, sopt);
1843 		}
1844 		break;
1845 
1846 	default:
1847 		INP_WUNLOCK(inp);
1848 		error = ENOPROTOOPT;
1849 		break;
1850 	}
1851 
1852 	INP_UNLOCK_ASSERT(inp);
1853 
1854 	return (error);
1855 }
1856 
1857 /*
1858  * Look up the ifnet to use for a multicast group membership,
1859  * given the IPv4 address of an interface, and the IPv4 group address.
1860  *
1861  * This routine exists to support legacy multicast applications
1862  * which do not understand that multicast memberships are scoped to
1863  * specific physical links in the networking stack, or which need
1864  * to join link-scope groups before IPv4 addresses are configured.
1865  *
1866  * If inp is non-NULL, use this socket's current FIB number for any
1867  * required FIB lookup.
1868  * If ina is INADDR_ANY, look up the group address in the unicast FIB,
1869  * and use its ifp; usually, this points to the default next-hop.
1870  *
1871  * If the FIB lookup fails, attempt to use the first non-loopback
1872  * interface with multicast capability in the system as a
1873  * last resort. The legacy IPv4 ASM API requires that we do
1874  * this in order to allow groups to be joined when the routing
1875  * table has not yet been populated during boot.
1876  *
1877  * Returns NULL if no ifp could be found.
1878  *
1879  * SMPng: TODO: Acquire the appropriate locks for INADDR_TO_IFP.
1880  * FUTURE: Implement IPv4 source-address selection.
1881  */
1882 static struct ifnet *
1883 inp_lookup_mcast_ifp(const struct inpcb *inp,
1884     const struct sockaddr_in *gsin, const struct in_addr ina)
1885 {
1886 	struct rm_priotracker in_ifa_tracker;
1887 	struct ifnet *ifp;
1888 	struct nhop4_basic nh4;
1889 	uint32_t fibnum;
1890 
1891 	KASSERT(gsin->sin_family == AF_INET, ("%s: not AF_INET", __func__));
1892 	KASSERT(IN_MULTICAST(ntohl(gsin->sin_addr.s_addr)),
1893 	    ("%s: not multicast", __func__));
1894 
1895 	ifp = NULL;
1896 	if (!in_nullhost(ina)) {
1897 		INADDR_TO_IFP(ina, ifp);
1898 	} else {
1899 		fibnum = inp ? inp->inp_inc.inc_fibnum : 0;
1900 		if (fib4_lookup_nh_basic(fibnum, gsin->sin_addr, 0, 0, &nh4)==0)
1901 			ifp = nh4.nh_ifp;
1902 		else {
1903 			struct in_ifaddr *ia;
1904 			struct ifnet *mifp;
1905 
1906 			mifp = NULL;
1907 			IN_IFADDR_RLOCK(&in_ifa_tracker);
1908 			TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
1909 				mifp = ia->ia_ifp;
1910 				if (!(mifp->if_flags & IFF_LOOPBACK) &&
1911 				     (mifp->if_flags & IFF_MULTICAST)) {
1912 					ifp = mifp;
1913 					break;
1914 				}
1915 			}
1916 			IN_IFADDR_RUNLOCK(&in_ifa_tracker);
1917 		}
1918 	}
1919 
1920 	return (ifp);
1921 }
1922 
1923 /*
1924  * Join an IPv4 multicast group, possibly with a source.
1925  */
1926 static int
1927 inp_join_group(struct inpcb *inp, struct sockopt *sopt)
1928 {
1929 	struct group_source_req		 gsr;
1930 	sockunion_t			*gsa, *ssa;
1931 	struct ifnet			*ifp;
1932 	struct in_mfilter		*imf;
1933 	struct ip_moptions		*imo;
1934 	struct in_multi			*inm;
1935 	struct in_msource		*lims;
1936 	size_t				 idx;
1937 	int				 error, is_new;
1938 
1939 	ifp = NULL;
1940 	imf = NULL;
1941 	lims = NULL;
1942 	error = 0;
1943 	is_new = 0;
1944 
1945 	memset(&gsr, 0, sizeof(struct group_source_req));
1946 	gsa = (sockunion_t *)&gsr.gsr_group;
1947 	gsa->ss.ss_family = AF_UNSPEC;
1948 	ssa = (sockunion_t *)&gsr.gsr_source;
1949 	ssa->ss.ss_family = AF_UNSPEC;
1950 
1951 	switch (sopt->sopt_name) {
1952 	case IP_ADD_MEMBERSHIP:
1953 	case IP_ADD_SOURCE_MEMBERSHIP: {
1954 		struct ip_mreq_source	 mreqs;
1955 
1956 		if (sopt->sopt_name == IP_ADD_MEMBERSHIP) {
1957 			error = sooptcopyin(sopt, &mreqs,
1958 			    sizeof(struct ip_mreq),
1959 			    sizeof(struct ip_mreq));
1960 			/*
1961 			 * Do argument switcharoo from ip_mreq into
1962 			 * ip_mreq_source to avoid using two instances.
1963 			 */
1964 			mreqs.imr_interface = mreqs.imr_sourceaddr;
1965 			mreqs.imr_sourceaddr.s_addr = INADDR_ANY;
1966 		} else if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) {
1967 			error = sooptcopyin(sopt, &mreqs,
1968 			    sizeof(struct ip_mreq_source),
1969 			    sizeof(struct ip_mreq_source));
1970 		}
1971 		if (error)
1972 			return (error);
1973 
1974 		gsa->sin.sin_family = AF_INET;
1975 		gsa->sin.sin_len = sizeof(struct sockaddr_in);
1976 		gsa->sin.sin_addr = mreqs.imr_multiaddr;
1977 
1978 		if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) {
1979 			ssa->sin.sin_family = AF_INET;
1980 			ssa->sin.sin_len = sizeof(struct sockaddr_in);
1981 			ssa->sin.sin_addr = mreqs.imr_sourceaddr;
1982 		}
1983 
1984 		if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
1985 			return (EINVAL);
1986 
1987 		ifp = inp_lookup_mcast_ifp(inp, &gsa->sin,
1988 		    mreqs.imr_interface);
1989 		CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p",
1990 		    __func__, ntohl(mreqs.imr_interface.s_addr), ifp);
1991 		break;
1992 	}
1993 
1994 	case MCAST_JOIN_GROUP:
1995 	case MCAST_JOIN_SOURCE_GROUP:
1996 		if (sopt->sopt_name == MCAST_JOIN_GROUP) {
1997 			error = sooptcopyin(sopt, &gsr,
1998 			    sizeof(struct group_req),
1999 			    sizeof(struct group_req));
2000 		} else if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
2001 			error = sooptcopyin(sopt, &gsr,
2002 			    sizeof(struct group_source_req),
2003 			    sizeof(struct group_source_req));
2004 		}
2005 		if (error)
2006 			return (error);
2007 
2008 		if (gsa->sin.sin_family != AF_INET ||
2009 		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
2010 			return (EINVAL);
2011 
2012 		/*
2013 		 * Overwrite the port field if present, as the sockaddr
2014 		 * being copied in may be matched with a binary comparison.
2015 		 */
2016 		gsa->sin.sin_port = 0;
2017 		if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
2018 			if (ssa->sin.sin_family != AF_INET ||
2019 			    ssa->sin.sin_len != sizeof(struct sockaddr_in))
2020 				return (EINVAL);
2021 			ssa->sin.sin_port = 0;
2022 		}
2023 
2024 		if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
2025 			return (EINVAL);
2026 
2027 		if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
2028 			return (EADDRNOTAVAIL);
2029 		ifp = ifnet_byindex(gsr.gsr_interface);
2030 		break;
2031 
2032 	default:
2033 		CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d",
2034 		    __func__, sopt->sopt_name);
2035 		return (EOPNOTSUPP);
2036 		break;
2037 	}
2038 
2039 	if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0)
2040 		return (EADDRNOTAVAIL);
2041 
2042 	imo = inp_findmoptions(inp);
2043 	idx = imo_match_group(imo, ifp, &gsa->sa);
2044 	if (idx == -1) {
2045 		is_new = 1;
2046 	} else {
2047 		inm = imo->imo_membership[idx];
2048 		imf = &imo->imo_mfilters[idx];
2049 		if (ssa->ss.ss_family != AF_UNSPEC) {
2050 			/*
2051 			 * MCAST_JOIN_SOURCE_GROUP on an exclusive membership
2052 			 * is an error. On an existing inclusive membership,
2053 			 * it just adds the source to the filter list.
2054 			 */
2055 			if (imf->imf_st[1] != MCAST_INCLUDE) {
2056 				error = EINVAL;
2057 				goto out_inp_locked;
2058 			}
2059 			/*
2060 			 * Throw out duplicates.
2061 			 *
2062 			 * XXX FIXME: This makes a naive assumption that
2063 			 * even if entries exist for *ssa in this imf,
2064 			 * they will be rejected as dupes, even if they
2065 			 * are not valid in the current mode (in-mode).
2066 			 *
2067 			 * in_msource is transactioned just as for anything
2068 			 * else in SSM -- but note naive use of inm_graft()
2069 			 * below for allocating new filter entries.
2070 			 *
2071 			 * This is only an issue if someone mixes the
2072 			 * full-state SSM API with the delta-based API,
2073 			 * which is discouraged in the relevant RFCs.
2074 			 */
2075 			lims = imo_match_source(imo, idx, &ssa->sa);
2076 			if (lims != NULL /*&&
2077 			    lims->imsl_st[1] == MCAST_INCLUDE*/) {
2078 				error = EADDRNOTAVAIL;
2079 				goto out_inp_locked;
2080 			}
2081 		} else {
2082 			/*
2083 			 * MCAST_JOIN_GROUP on an existing exclusive
2084 			 * membership is an error; return EADDRINUSE
2085 			 * to preserve 4.4BSD API idempotence, and
2086 			 * avoid tedious detour to code below.
2087 			 * NOTE: This is bending RFC 3678 a bit.
2088 			 *
2089 			 * On an existing inclusive membership, this is also
2090 			 * an error; if you want to change filter mode,
2091 			 * you must use the userland API setsourcefilter().
2092 			 * XXX We don't reject this for imf in UNDEFINED
2093 			 * state at t1, because allocation of a filter
2094 			 * is atomic with allocation of a membership.
2095 			 */
2096 			error = EINVAL;
2097 			if (imf->imf_st[1] == MCAST_EXCLUDE)
2098 				error = EADDRINUSE;
2099 			goto out_inp_locked;
2100 		}
2101 	}
2102 
2103 	/*
2104 	 * Begin state merge transaction at socket layer.
2105 	 */
2106 	INP_WLOCK_ASSERT(inp);
2107 
2108 	if (is_new) {
2109 		if (imo->imo_num_memberships == imo->imo_max_memberships) {
2110 			error = imo_grow(imo);
2111 			if (error)
2112 				goto out_inp_locked;
2113 		}
2114 		/*
2115 		 * Allocate the new slot upfront so we can deal with
2116 		 * grafting the new source filter in same code path
2117 		 * as for join-source on existing membership.
2118 		 */
2119 		idx = imo->imo_num_memberships;
2120 		imo->imo_membership[idx] = NULL;
2121 		imo->imo_num_memberships++;
2122 		KASSERT(imo->imo_mfilters != NULL,
2123 		    ("%s: imf_mfilters vector was not allocated", __func__));
2124 		imf = &imo->imo_mfilters[idx];
2125 		KASSERT(RB_EMPTY(&imf->imf_sources),
2126 		    ("%s: imf_sources not empty", __func__));
2127 	}
2128 
2129 	/*
2130 	 * Graft new source into filter list for this inpcb's
2131 	 * membership of the group. The in_multi may not have
2132 	 * been allocated yet if this is a new membership, however,
2133 	 * the in_mfilter slot will be allocated and must be initialized.
2134 	 *
2135 	 * Note: Grafting of exclusive mode filters doesn't happen
2136 	 * in this path.
2137 	 * XXX: Should check for non-NULL lims (node exists but may
2138 	 * not be in-mode) for interop with full-state API.
2139 	 */
2140 	if (ssa->ss.ss_family != AF_UNSPEC) {
2141 		/* Membership starts in IN mode */
2142 		if (is_new) {
2143 			CTR1(KTR_IGMPV3, "%s: new join w/source", __func__);
2144 			imf_init(imf, MCAST_UNDEFINED, MCAST_INCLUDE);
2145 		} else {
2146 			CTR2(KTR_IGMPV3, "%s: %s source", __func__, "allow");
2147 		}
2148 		lims = imf_graft(imf, MCAST_INCLUDE, &ssa->sin);
2149 		if (lims == NULL) {
2150 			CTR1(KTR_IGMPV3, "%s: merge imf state failed",
2151 			    __func__);
2152 			error = ENOMEM;
2153 			goto out_imo_free;
2154 		}
2155 	} else {
2156 		/* No address specified; Membership starts in EX mode */
2157 		if (is_new) {
2158 			CTR1(KTR_IGMPV3, "%s: new join w/o source", __func__);
2159 			imf_init(imf, MCAST_UNDEFINED, MCAST_EXCLUDE);
2160 		}
2161 	}
2162 
2163 	/*
2164 	 * Begin state merge transaction at IGMP layer.
2165 	 */
2166 	IN_MULTI_LOCK();
2167 
2168 	if (is_new) {
2169 		error = in_joingroup_locked(ifp, &gsa->sin.sin_addr, imf,
2170 		    &inm);
2171 		if (error) {
2172                         CTR1(KTR_IGMPV3, "%s: in_joingroup_locked failed",
2173                             __func__);
2174                         IN_MULTI_UNLOCK();
2175 			goto out_imo_free;
2176                 }
2177 		imo->imo_membership[idx] = inm;
2178 	} else {
2179 		CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
2180 		error = inm_merge(inm, imf);
2181 		if (error) {
2182 			CTR1(KTR_IGMPV3, "%s: failed to merge inm state",
2183 			    __func__);
2184 			goto out_in_multi_locked;
2185 		}
2186 		CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
2187 		error = igmp_change_state(inm);
2188 		if (error) {
2189 			CTR1(KTR_IGMPV3, "%s: failed igmp downcall",
2190 			    __func__);
2191 			goto out_in_multi_locked;
2192 		}
2193 	}
2194 
2195 out_in_multi_locked:
2196 
2197 	IN_MULTI_UNLOCK();
2198 
2199 	INP_WLOCK_ASSERT(inp);
2200 	if (error) {
2201 		imf_rollback(imf);
2202 		if (is_new)
2203 			imf_purge(imf);
2204 		else
2205 			imf_reap(imf);
2206 	} else {
2207 		imf_commit(imf);
2208 	}
2209 
2210 out_imo_free:
2211 	if (error && is_new) {
2212 		imo->imo_membership[idx] = NULL;
2213 		--imo->imo_num_memberships;
2214 	}
2215 
2216 out_inp_locked:
2217 	INP_WUNLOCK(inp);
2218 	return (error);
2219 }
2220 
2221 /*
2222  * Leave an IPv4 multicast group on an inpcb, possibly with a source.
2223  */
2224 static int
2225 inp_leave_group(struct inpcb *inp, struct sockopt *sopt)
2226 {
2227 	struct group_source_req		 gsr;
2228 	struct ip_mreq_source		 mreqs;
2229 	sockunion_t			*gsa, *ssa;
2230 	struct ifnet			*ifp;
2231 	struct in_mfilter		*imf;
2232 	struct ip_moptions		*imo;
2233 	struct in_msource		*ims;
2234 	struct in_multi			*inm;
2235 	size_t				 idx;
2236 	int				 error, is_final;
2237 
2238 	ifp = NULL;
2239 	error = 0;
2240 	is_final = 1;
2241 
2242 	memset(&gsr, 0, sizeof(struct group_source_req));
2243 	gsa = (sockunion_t *)&gsr.gsr_group;
2244 	gsa->ss.ss_family = AF_UNSPEC;
2245 	ssa = (sockunion_t *)&gsr.gsr_source;
2246 	ssa->ss.ss_family = AF_UNSPEC;
2247 
2248 	switch (sopt->sopt_name) {
2249 	case IP_DROP_MEMBERSHIP:
2250 	case IP_DROP_SOURCE_MEMBERSHIP:
2251 		if (sopt->sopt_name == IP_DROP_MEMBERSHIP) {
2252 			error = sooptcopyin(sopt, &mreqs,
2253 			    sizeof(struct ip_mreq),
2254 			    sizeof(struct ip_mreq));
2255 			/*
2256 			 * Swap interface and sourceaddr arguments,
2257 			 * as ip_mreq and ip_mreq_source are laid
2258 			 * out differently.
2259 			 */
2260 			mreqs.imr_interface = mreqs.imr_sourceaddr;
2261 			mreqs.imr_sourceaddr.s_addr = INADDR_ANY;
2262 		} else if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) {
2263 			error = sooptcopyin(sopt, &mreqs,
2264 			    sizeof(struct ip_mreq_source),
2265 			    sizeof(struct ip_mreq_source));
2266 		}
2267 		if (error)
2268 			return (error);
2269 
2270 		gsa->sin.sin_family = AF_INET;
2271 		gsa->sin.sin_len = sizeof(struct sockaddr_in);
2272 		gsa->sin.sin_addr = mreqs.imr_multiaddr;
2273 
2274 		if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) {
2275 			ssa->sin.sin_family = AF_INET;
2276 			ssa->sin.sin_len = sizeof(struct sockaddr_in);
2277 			ssa->sin.sin_addr = mreqs.imr_sourceaddr;
2278 		}
2279 
2280 		/*
2281 		 * Attempt to look up hinted ifp from interface address.
2282 		 * Fallthrough with null ifp iff lookup fails, to
2283 		 * preserve 4.4BSD mcast API idempotence.
2284 		 * XXX NOTE WELL: The RFC 3678 API is preferred because
2285 		 * using an IPv4 address as a key is racy.
2286 		 */
2287 		if (!in_nullhost(mreqs.imr_interface))
2288 			INADDR_TO_IFP(mreqs.imr_interface, ifp);
2289 
2290 		CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p",
2291 		    __func__, ntohl(mreqs.imr_interface.s_addr), ifp);
2292 
2293 		break;
2294 
2295 	case MCAST_LEAVE_GROUP:
2296 	case MCAST_LEAVE_SOURCE_GROUP:
2297 		if (sopt->sopt_name == MCAST_LEAVE_GROUP) {
2298 			error = sooptcopyin(sopt, &gsr,
2299 			    sizeof(struct group_req),
2300 			    sizeof(struct group_req));
2301 		} else if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
2302 			error = sooptcopyin(sopt, &gsr,
2303 			    sizeof(struct group_source_req),
2304 			    sizeof(struct group_source_req));
2305 		}
2306 		if (error)
2307 			return (error);
2308 
2309 		if (gsa->sin.sin_family != AF_INET ||
2310 		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
2311 			return (EINVAL);
2312 
2313 		if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
2314 			if (ssa->sin.sin_family != AF_INET ||
2315 			    ssa->sin.sin_len != sizeof(struct sockaddr_in))
2316 				return (EINVAL);
2317 		}
2318 
2319 		if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
2320 			return (EADDRNOTAVAIL);
2321 
2322 		ifp = ifnet_byindex(gsr.gsr_interface);
2323 
2324 		if (ifp == NULL)
2325 			return (EADDRNOTAVAIL);
2326 		break;
2327 
2328 	default:
2329 		CTR2(KTR_IGMPV3, "%s: unknown sopt_name %d",
2330 		    __func__, sopt->sopt_name);
2331 		return (EOPNOTSUPP);
2332 		break;
2333 	}
2334 
2335 	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
2336 		return (EINVAL);
2337 
2338 	/*
2339 	 * Find the membership in the membership array.
2340 	 */
2341 	imo = inp_findmoptions(inp);
2342 	idx = imo_match_group(imo, ifp, &gsa->sa);
2343 	if (idx == -1) {
2344 		error = EADDRNOTAVAIL;
2345 		goto out_inp_locked;
2346 	}
2347 	inm = imo->imo_membership[idx];
2348 	imf = &imo->imo_mfilters[idx];
2349 
2350 	if (ssa->ss.ss_family != AF_UNSPEC)
2351 		is_final = 0;
2352 
2353 	/*
2354 	 * Begin state merge transaction at socket layer.
2355 	 */
2356 	INP_WLOCK_ASSERT(inp);
2357 
2358 	/*
2359 	 * If we were instructed only to leave a given source, do so.
2360 	 * MCAST_LEAVE_SOURCE_GROUP is only valid for inclusive memberships.
2361 	 */
2362 	if (is_final) {
2363 		imf_leave(imf);
2364 	} else {
2365 		if (imf->imf_st[0] == MCAST_EXCLUDE) {
2366 			error = EADDRNOTAVAIL;
2367 			goto out_inp_locked;
2368 		}
2369 		ims = imo_match_source(imo, idx, &ssa->sa);
2370 		if (ims == NULL) {
2371 			CTR3(KTR_IGMPV3, "%s: source 0x%08x %spresent",
2372 			    __func__, ntohl(ssa->sin.sin_addr.s_addr), "not ");
2373 			error = EADDRNOTAVAIL;
2374 			goto out_inp_locked;
2375 		}
2376 		CTR2(KTR_IGMPV3, "%s: %s source", __func__, "block");
2377 		error = imf_prune(imf, &ssa->sin);
2378 		if (error) {
2379 			CTR1(KTR_IGMPV3, "%s: merge imf state failed",
2380 			    __func__);
2381 			goto out_inp_locked;
2382 		}
2383 	}
2384 
2385 	/*
2386 	 * Begin state merge transaction at IGMP layer.
2387 	 */
2388 	IN_MULTI_LOCK();
2389 
2390 	if (is_final) {
2391 		/*
2392 		 * Give up the multicast address record to which
2393 		 * the membership points.
2394 		 */
2395 		(void)in_leavegroup_locked(inm, imf);
2396 	} else {
2397 		CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
2398 		error = inm_merge(inm, imf);
2399 		if (error) {
2400 			CTR1(KTR_IGMPV3, "%s: failed to merge inm state",
2401 			    __func__);
2402 			goto out_in_multi_locked;
2403 		}
2404 
2405 		CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
2406 		error = igmp_change_state(inm);
2407 		if (error) {
2408 			CTR1(KTR_IGMPV3, "%s: failed igmp downcall",
2409 			    __func__);
2410 		}
2411 	}
2412 
2413 out_in_multi_locked:
2414 
2415 	IN_MULTI_UNLOCK();
2416 
2417 	if (error)
2418 		imf_rollback(imf);
2419 	else
2420 		imf_commit(imf);
2421 
2422 	imf_reap(imf);
2423 
2424 	if (is_final) {
2425 		/* Remove the gap in the membership and filter array. */
2426 		for (++idx; idx < imo->imo_num_memberships; ++idx) {
2427 			imo->imo_membership[idx-1] = imo->imo_membership[idx];
2428 			imo->imo_mfilters[idx-1] = imo->imo_mfilters[idx];
2429 		}
2430 		imo->imo_num_memberships--;
2431 	}
2432 
2433 out_inp_locked:
2434 	INP_WUNLOCK(inp);
2435 	return (error);
2436 }
2437 
2438 /*
2439  * Select the interface for transmitting IPv4 multicast datagrams.
2440  *
2441  * Either an instance of struct in_addr or an instance of struct ip_mreqn
2442  * may be passed to this socket option. An address of INADDR_ANY or an
2443  * interface index of 0 is used to remove a previous selection.
2444  * When no interface is selected, one is chosen for every send.
2445  */
2446 static int
2447 inp_set_multicast_if(struct inpcb *inp, struct sockopt *sopt)
2448 {
2449 	struct in_addr		 addr;
2450 	struct ip_mreqn		 mreqn;
2451 	struct ifnet		*ifp;
2452 	struct ip_moptions	*imo;
2453 	int			 error;
2454 
2455 	if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) {
2456 		/*
2457 		 * An interface index was specified using the
2458 		 * Linux-derived ip_mreqn structure.
2459 		 */
2460 		error = sooptcopyin(sopt, &mreqn, sizeof(struct ip_mreqn),
2461 		    sizeof(struct ip_mreqn));
2462 		if (error)
2463 			return (error);
2464 
2465 		if (mreqn.imr_ifindex < 0 || V_if_index < mreqn.imr_ifindex)
2466 			return (EINVAL);
2467 
2468 		if (mreqn.imr_ifindex == 0) {
2469 			ifp = NULL;
2470 		} else {
2471 			ifp = ifnet_byindex(mreqn.imr_ifindex);
2472 			if (ifp == NULL)
2473 				return (EADDRNOTAVAIL);
2474 		}
2475 	} else {
2476 		/*
2477 		 * An interface was specified by IPv4 address.
2478 		 * This is the traditional BSD usage.
2479 		 */
2480 		error = sooptcopyin(sopt, &addr, sizeof(struct in_addr),
2481 		    sizeof(struct in_addr));
2482 		if (error)
2483 			return (error);
2484 		if (in_nullhost(addr)) {
2485 			ifp = NULL;
2486 		} else {
2487 			INADDR_TO_IFP(addr, ifp);
2488 			if (ifp == NULL)
2489 				return (EADDRNOTAVAIL);
2490 		}
2491 		CTR3(KTR_IGMPV3, "%s: ifp = %p, addr = 0x%08x", __func__, ifp,
2492 		    ntohl(addr.s_addr));
2493 	}
2494 
2495 	/* Reject interfaces which do not support multicast. */
2496 	if (ifp != NULL && (ifp->if_flags & IFF_MULTICAST) == 0)
2497 		return (EOPNOTSUPP);
2498 
2499 	imo = inp_findmoptions(inp);
2500 	imo->imo_multicast_ifp = ifp;
2501 	imo->imo_multicast_addr.s_addr = INADDR_ANY;
2502 	INP_WUNLOCK(inp);
2503 
2504 	return (0);
2505 }
2506 
2507 /*
2508  * Atomically set source filters on a socket for an IPv4 multicast group.
2509  *
2510  * SMPng: NOTE: Potentially calls malloc(M_WAITOK) with Giant held.
2511  */
2512 static int
2513 inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt)
2514 {
2515 	struct __msfilterreq	 msfr;
2516 	sockunion_t		*gsa;
2517 	struct ifnet		*ifp;
2518 	struct in_mfilter	*imf;
2519 	struct ip_moptions	*imo;
2520 	struct in_multi		*inm;
2521 	size_t			 idx;
2522 	int			 error;
2523 
2524 	error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq),
2525 	    sizeof(struct __msfilterreq));
2526 	if (error)
2527 		return (error);
2528 
2529 	if (msfr.msfr_nsrcs > in_mcast_maxsocksrc)
2530 		return (ENOBUFS);
2531 
2532 	if ((msfr.msfr_fmode != MCAST_EXCLUDE &&
2533 	     msfr.msfr_fmode != MCAST_INCLUDE))
2534 		return (EINVAL);
2535 
2536 	if (msfr.msfr_group.ss_family != AF_INET ||
2537 	    msfr.msfr_group.ss_len != sizeof(struct sockaddr_in))
2538 		return (EINVAL);
2539 
2540 	gsa = (sockunion_t *)&msfr.msfr_group;
2541 	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
2542 		return (EINVAL);
2543 
2544 	gsa->sin.sin_port = 0;	/* ignore port */
2545 
2546 	if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex)
2547 		return (EADDRNOTAVAIL);
2548 
2549 	ifp = ifnet_byindex(msfr.msfr_ifindex);
2550 	if (ifp == NULL)
2551 		return (EADDRNOTAVAIL);
2552 
2553 	/*
2554 	 * Take the INP write lock.
2555 	 * Check if this socket is a member of this group.
2556 	 */
2557 	imo = inp_findmoptions(inp);
2558 	idx = imo_match_group(imo, ifp, &gsa->sa);
2559 	if (idx == -1 || imo->imo_mfilters == NULL) {
2560 		error = EADDRNOTAVAIL;
2561 		goto out_inp_locked;
2562 	}
2563 	inm = imo->imo_membership[idx];
2564 	imf = &imo->imo_mfilters[idx];
2565 
2566 	/*
2567 	 * Begin state merge transaction at socket layer.
2568 	 */
2569 	INP_WLOCK_ASSERT(inp);
2570 
2571 	imf->imf_st[1] = msfr.msfr_fmode;
2572 
2573 	/*
2574 	 * Apply any new source filters, if present.
2575 	 * Make a copy of the user-space source vector so
2576 	 * that we may copy them with a single copyin. This
2577 	 * allows us to deal with page faults up-front.
2578 	 */
2579 	if (msfr.msfr_nsrcs > 0) {
2580 		struct in_msource	*lims;
2581 		struct sockaddr_in	*psin;
2582 		struct sockaddr_storage	*kss, *pkss;
2583 		int			 i;
2584 
2585 		INP_WUNLOCK(inp);
2586 
2587 		CTR2(KTR_IGMPV3, "%s: loading %lu source list entries",
2588 		    __func__, (unsigned long)msfr.msfr_nsrcs);
2589 		kss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
2590 		    M_TEMP, M_WAITOK);
2591 		error = copyin(msfr.msfr_srcs, kss,
2592 		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
2593 		if (error) {
2594 			free(kss, M_TEMP);
2595 			return (error);
2596 		}
2597 
2598 		INP_WLOCK(inp);
2599 
2600 		/*
2601 		 * Mark all source filters as UNDEFINED at t1.
2602 		 * Restore new group filter mode, as imf_leave()
2603 		 * will set it to INCLUDE.
2604 		 */
2605 		imf_leave(imf);
2606 		imf->imf_st[1] = msfr.msfr_fmode;
2607 
2608 		/*
2609 		 * Update socket layer filters at t1, lazy-allocating
2610 		 * new entries. This saves a bunch of memory at the
2611 		 * cost of one RB_FIND() per source entry; duplicate
2612 		 * entries in the msfr_nsrcs vector are ignored.
2613 		 * If we encounter an error, rollback transaction.
2614 		 *
2615 		 * XXX This too could be replaced with a set-symmetric
2616 		 * difference like loop to avoid walking from root
2617 		 * every time, as the key space is common.
2618 		 */
2619 		for (i = 0, pkss = kss; i < msfr.msfr_nsrcs; i++, pkss++) {
2620 			psin = (struct sockaddr_in *)pkss;
2621 			if (psin->sin_family != AF_INET) {
2622 				error = EAFNOSUPPORT;
2623 				break;
2624 			}
2625 			if (psin->sin_len != sizeof(struct sockaddr_in)) {
2626 				error = EINVAL;
2627 				break;
2628 			}
2629 			error = imf_get_source(imf, psin, &lims);
2630 			if (error)
2631 				break;
2632 			lims->imsl_st[1] = imf->imf_st[1];
2633 		}
2634 		free(kss, M_TEMP);
2635 	}
2636 
2637 	if (error)
2638 		goto out_imf_rollback;
2639 
2640 	INP_WLOCK_ASSERT(inp);
2641 	IN_MULTI_LOCK();
2642 
2643 	/*
2644 	 * Begin state merge transaction at IGMP layer.
2645 	 */
2646 	CTR1(KTR_IGMPV3, "%s: merge inm state", __func__);
2647 	error = inm_merge(inm, imf);
2648 	if (error) {
2649 		CTR1(KTR_IGMPV3, "%s: failed to merge inm state", __func__);
2650 		goto out_in_multi_locked;
2651 	}
2652 
2653 	CTR1(KTR_IGMPV3, "%s: doing igmp downcall", __func__);
2654 	error = igmp_change_state(inm);
2655 	if (error)
2656 		CTR1(KTR_IGMPV3, "%s: failed igmp downcall", __func__);
2657 
2658 out_in_multi_locked:
2659 
2660 	IN_MULTI_UNLOCK();
2661 
2662 out_imf_rollback:
2663 	if (error)
2664 		imf_rollback(imf);
2665 	else
2666 		imf_commit(imf);
2667 
2668 	imf_reap(imf);
2669 
2670 out_inp_locked:
2671 	INP_WUNLOCK(inp);
2672 	return (error);
2673 }
2674 
2675 /*
2676  * Set the IP multicast options in response to user setsockopt().
2677  *
2678  * Many of the socket options handled in this function duplicate the
2679  * functionality of socket options in the regular unicast API. However,
2680  * it is not possible to merge the duplicate code, because the idempotence
2681  * of the IPv4 multicast part of the BSD Sockets API must be preserved;
2682  * the effects of these options must be treated as separate and distinct.
2683  *
2684  * SMPng: XXX: Unlocked read of inp_socket believed OK.
2685  * FUTURE: The IP_MULTICAST_VIF option may be eliminated if MROUTING
2686  * is refactored to no longer use vifs.
2687  */
2688 int
2689 inp_setmoptions(struct inpcb *inp, struct sockopt *sopt)
2690 {
2691 	struct ip_moptions	*imo;
2692 	int			 error;
2693 
2694 	error = 0;
2695 
2696 	/*
2697 	 * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
2698 	 * or is a divert socket, reject it.
2699 	 */
2700 	if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
2701 	    (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
2702 	     inp->inp_socket->so_proto->pr_type != SOCK_DGRAM))
2703 		return (EOPNOTSUPP);
2704 
2705 	switch (sopt->sopt_name) {
2706 	case IP_MULTICAST_VIF: {
2707 		int vifi;
2708 		/*
2709 		 * Select a multicast VIF for transmission.
2710 		 * Only useful if multicast forwarding is active.
2711 		 */
2712 		if (legal_vif_num == NULL) {
2713 			error = EOPNOTSUPP;
2714 			break;
2715 		}
2716 		error = sooptcopyin(sopt, &vifi, sizeof(int), sizeof(int));
2717 		if (error)
2718 			break;
2719 		if (!legal_vif_num(vifi) && (vifi != -1)) {
2720 			error = EINVAL;
2721 			break;
2722 		}
2723 		imo = inp_findmoptions(inp);
2724 		imo->imo_multicast_vif = vifi;
2725 		INP_WUNLOCK(inp);
2726 		break;
2727 	}
2728 
2729 	case IP_MULTICAST_IF:
2730 		error = inp_set_multicast_if(inp, sopt);
2731 		break;
2732 
2733 	case IP_MULTICAST_TTL: {
2734 		u_char ttl;
2735 
2736 		/*
2737 		 * Set the IP time-to-live for outgoing multicast packets.
2738 		 * The original multicast API required a char argument,
2739 		 * which is inconsistent with the rest of the socket API.
2740 		 * We allow either a char or an int.
2741 		 */
2742 		if (sopt->sopt_valsize == sizeof(u_char)) {
2743 			error = sooptcopyin(sopt, &ttl, sizeof(u_char),
2744 			    sizeof(u_char));
2745 			if (error)
2746 				break;
2747 		} else {
2748 			u_int ittl;
2749 
2750 			error = sooptcopyin(sopt, &ittl, sizeof(u_int),
2751 			    sizeof(u_int));
2752 			if (error)
2753 				break;
2754 			if (ittl > 255) {
2755 				error = EINVAL;
2756 				break;
2757 			}
2758 			ttl = (u_char)ittl;
2759 		}
2760 		imo = inp_findmoptions(inp);
2761 		imo->imo_multicast_ttl = ttl;
2762 		INP_WUNLOCK(inp);
2763 		break;
2764 	}
2765 
2766 	case IP_MULTICAST_LOOP: {
2767 		u_char loop;
2768 
2769 		/*
2770 		 * Set the loopback flag for outgoing multicast packets.
2771 		 * Must be zero or one.  The original multicast API required a
2772 		 * char argument, which is inconsistent with the rest
2773 		 * of the socket API.  We allow either a char or an int.
2774 		 */
2775 		if (sopt->sopt_valsize == sizeof(u_char)) {
2776 			error = sooptcopyin(sopt, &loop, sizeof(u_char),
2777 			    sizeof(u_char));
2778 			if (error)
2779 				break;
2780 		} else {
2781 			u_int iloop;
2782 
2783 			error = sooptcopyin(sopt, &iloop, sizeof(u_int),
2784 					    sizeof(u_int));
2785 			if (error)
2786 				break;
2787 			loop = (u_char)iloop;
2788 		}
2789 		imo = inp_findmoptions(inp);
2790 		imo->imo_multicast_loop = !!loop;
2791 		INP_WUNLOCK(inp);
2792 		break;
2793 	}
2794 
2795 	case IP_ADD_MEMBERSHIP:
2796 	case IP_ADD_SOURCE_MEMBERSHIP:
2797 	case MCAST_JOIN_GROUP:
2798 	case MCAST_JOIN_SOURCE_GROUP:
2799 		error = inp_join_group(inp, sopt);
2800 		break;
2801 
2802 	case IP_DROP_MEMBERSHIP:
2803 	case IP_DROP_SOURCE_MEMBERSHIP:
2804 	case MCAST_LEAVE_GROUP:
2805 	case MCAST_LEAVE_SOURCE_GROUP:
2806 		error = inp_leave_group(inp, sopt);
2807 		break;
2808 
2809 	case IP_BLOCK_SOURCE:
2810 	case IP_UNBLOCK_SOURCE:
2811 	case MCAST_BLOCK_SOURCE:
2812 	case MCAST_UNBLOCK_SOURCE:
2813 		error = inp_block_unblock_source(inp, sopt);
2814 		break;
2815 
2816 	case IP_MSFILTER:
2817 		error = inp_set_source_filters(inp, sopt);
2818 		break;
2819 
2820 	default:
2821 		error = EOPNOTSUPP;
2822 		break;
2823 	}
2824 
2825 	INP_UNLOCK_ASSERT(inp);
2826 
2827 	return (error);
2828 }
2829 
2830 /*
2831  * Expose IGMP's multicast filter mode and source list(s) to userland,
2832  * keyed by (ifindex, group).
2833  * The filter mode is written out as a uint32_t, followed by
2834  * 0..n of struct in_addr.
2835  * For use by ifmcstat(8).
2836  * SMPng: NOTE: unlocked read of ifindex space.
2837  */
2838 static int
2839 sysctl_ip_mcast_filters(SYSCTL_HANDLER_ARGS)
2840 {
2841 	struct in_addr			 src, group;
2842 	struct ifnet			*ifp;
2843 	struct ifmultiaddr		*ifma;
2844 	struct in_multi			*inm;
2845 	struct ip_msource		*ims;
2846 	int				*name;
2847 	int				 retval;
2848 	u_int				 namelen;
2849 	uint32_t			 fmode, ifindex;
2850 
2851 	name = (int *)arg1;
2852 	namelen = arg2;
2853 
2854 	if (req->newptr != NULL)
2855 		return (EPERM);
2856 
2857 	if (namelen != 2)
2858 		return (EINVAL);
2859 
2860 	ifindex = name[0];
2861 	if (ifindex <= 0 || ifindex > V_if_index) {
2862 		CTR2(KTR_IGMPV3, "%s: ifindex %u out of range",
2863 		    __func__, ifindex);
2864 		return (ENOENT);
2865 	}
2866 
2867 	group.s_addr = name[1];
2868 	if (!IN_MULTICAST(ntohl(group.s_addr))) {
2869 		CTR2(KTR_IGMPV3, "%s: group 0x%08x is not multicast",
2870 		    __func__, ntohl(group.s_addr));
2871 		return (EINVAL);
2872 	}
2873 
2874 	ifp = ifnet_byindex(ifindex);
2875 	if (ifp == NULL) {
2876 		CTR2(KTR_IGMPV3, "%s: no ifp for ifindex %u",
2877 		    __func__, ifindex);
2878 		return (ENOENT);
2879 	}
2880 
2881 	retval = sysctl_wire_old_buffer(req,
2882 	    sizeof(uint32_t) + (in_mcast_maxgrpsrc * sizeof(struct in_addr)));
2883 	if (retval)
2884 		return (retval);
2885 
2886 	IN_MULTI_LOCK();
2887 
2888 	IF_ADDR_RLOCK(ifp);
2889 	TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2890 		if (ifma->ifma_addr->sa_family != AF_INET ||
2891 		    ifma->ifma_protospec == NULL)
2892 			continue;
2893 		inm = (struct in_multi *)ifma->ifma_protospec;
2894 		if (!in_hosteq(inm->inm_addr, group))
2895 			continue;
2896 		fmode = inm->inm_st[1].iss_fmode;
2897 		retval = SYSCTL_OUT(req, &fmode, sizeof(uint32_t));
2898 		if (retval != 0)
2899 			break;
2900 		RB_FOREACH(ims, ip_msource_tree, &inm->inm_srcs) {
2901 			CTR2(KTR_IGMPV3, "%s: visit node 0x%08x", __func__,
2902 			    ims->ims_haddr);
2903 			/*
2904 			 * Only copy-out sources which are in-mode.
2905 			 */
2906 			if (fmode != ims_get_mode(inm, ims, 1)) {
2907 				CTR1(KTR_IGMPV3, "%s: skip non-in-mode",
2908 				    __func__);
2909 				continue;
2910 			}
2911 			src.s_addr = htonl(ims->ims_haddr);
2912 			retval = SYSCTL_OUT(req, &src, sizeof(struct in_addr));
2913 			if (retval != 0)
2914 				break;
2915 		}
2916 	}
2917 	IF_ADDR_RUNLOCK(ifp);
2918 
2919 	IN_MULTI_UNLOCK();
2920 
2921 	return (retval);
2922 }
2923 
2924 #if defined(KTR) && (KTR_COMPILE & KTR_IGMPV3)
2925 
2926 static const char *inm_modestrs[] = { "un", "in", "ex" };
2927 
2928 static const char *
2929 inm_mode_str(const int mode)
2930 {
2931 
2932 	if (mode >= MCAST_UNDEFINED && mode <= MCAST_EXCLUDE)
2933 		return (inm_modestrs[mode]);
2934 	return ("??");
2935 }
2936 
2937 static const char *inm_statestrs[] = {
2938 	"not-member",
2939 	"silent",
2940 	"idle",
2941 	"lazy",
2942 	"sleeping",
2943 	"awakening",
2944 	"query-pending",
2945 	"sg-query-pending",
2946 	"leaving"
2947 };
2948 
2949 static const char *
2950 inm_state_str(const int state)
2951 {
2952 
2953 	if (state >= IGMP_NOT_MEMBER && state <= IGMP_LEAVING_MEMBER)
2954 		return (inm_statestrs[state]);
2955 	return ("??");
2956 }
2957 
2958 /*
2959  * Dump an in_multi structure to the console.
2960  */
2961 void
2962 inm_print(const struct in_multi *inm)
2963 {
2964 	int t;
2965 	char addrbuf[INET_ADDRSTRLEN];
2966 
2967 	if ((ktr_mask & KTR_IGMPV3) == 0)
2968 		return;
2969 
2970 	printf("%s: --- begin inm %p ---\n", __func__, inm);
2971 	printf("addr %s ifp %p(%s) ifma %p\n",
2972 	    inet_ntoa_r(inm->inm_addr, addrbuf),
2973 	    inm->inm_ifp,
2974 	    inm->inm_ifp->if_xname,
2975 	    inm->inm_ifma);
2976 	printf("timer %u state %s refcount %u scq.len %u\n",
2977 	    inm->inm_timer,
2978 	    inm_state_str(inm->inm_state),
2979 	    inm->inm_refcount,
2980 	    inm->inm_scq.mq_len);
2981 	printf("igi %p nsrc %lu sctimer %u scrv %u\n",
2982 	    inm->inm_igi,
2983 	    inm->inm_nsrc,
2984 	    inm->inm_sctimer,
2985 	    inm->inm_scrv);
2986 	for (t = 0; t < 2; t++) {
2987 		printf("t%d: fmode %s asm %u ex %u in %u rec %u\n", t,
2988 		    inm_mode_str(inm->inm_st[t].iss_fmode),
2989 		    inm->inm_st[t].iss_asm,
2990 		    inm->inm_st[t].iss_ex,
2991 		    inm->inm_st[t].iss_in,
2992 		    inm->inm_st[t].iss_rec);
2993 	}
2994 	printf("%s: --- end inm %p ---\n", __func__, inm);
2995 }
2996 
2997 #else /* !KTR || !(KTR_COMPILE & KTR_IGMPV3) */
2998 
2999 void
3000 inm_print(const struct in_multi *inm)
3001 {
3002 
3003 }
3004 
3005 #endif /* KTR && (KTR_COMPILE & KTR_IGMPV3) */
3006 
3007 RB_GENERATE(ip_msource_tree, ip_msource, ims_link, ip_msource_cmp);
3008