xref: /freebsd/sys/netinet/in_mcast.c (revision 8655c70597b0e0918c82114b1186df5669b83eb6)
1 /*-
2  * Copyright (c) 2007 Bruce M. Simpson.
3  * Copyright (c) 2005 Robert N. M. Watson.
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote
15  *    products derived from this software without specific prior written
16  *    permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 /*
32  * IPv4 multicast socket, group, and socket option processing module.
33  * Until further notice, this file requires INET to compile.
34  * TODO: Make this infrastructure independent of address family.
35  * TODO: Teach netinet6 to use this code.
36  * TODO: Hook up SSM logic to IGMPv3/MLDv2.
37  */
38 
39 #include <sys/cdefs.h>
40 __FBSDID("$FreeBSD$");
41 
42 #include "opt_route.h"
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/kernel.h>
47 #include <sys/malloc.h>
48 #include <sys/mbuf.h>
49 #include <sys/protosw.h>
50 #include <sys/socket.h>
51 #include <sys/socketvar.h>
52 #include <sys/sysctl.h>
53 #include <sys/vimage.h>
54 
55 #include <net/if.h>
56 #include <net/if_dl.h>
57 #include <net/route.h>
58 #include <net/vnet.h>
59 
60 #include <netinet/in.h>
61 #include <netinet/in_systm.h>
62 #include <netinet/in_pcb.h>
63 #include <netinet/in_var.h>
64 #include <netinet/ip_var.h>
65 #include <netinet/igmp_var.h>
66 #include <netinet/vinet.h>
67 
68 #ifndef __SOCKUNION_DECLARED
69 union sockunion {
70 	struct sockaddr_storage	ss;
71 	struct sockaddr		sa;
72 	struct sockaddr_dl	sdl;
73 	struct sockaddr_in	sin;
74 #ifdef INET6
75 	struct sockaddr_in6	sin6;
76 #endif
77 };
78 typedef union sockunion sockunion_t;
79 #define __SOCKUNION_DECLARED
80 #endif /* __SOCKUNION_DECLARED */
81 
82 static MALLOC_DEFINE(M_IPMADDR, "in_multi", "IPv4 multicast group");
83 static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "IPv4 multicast options");
84 static MALLOC_DEFINE(M_IPMSOURCE, "in_msource", "IPv4 multicast source filter");
85 
86 /*
87  * The IPv4 multicast list (in_multihead and associated structures) are
88  * protected by the global in_multi_mtx.  See in_var.h for more details.  For
89  * now, in_multi_mtx is marked as recursible due to IGMP's calling back into
90  * ip_output() to send IGMP packets while holding the lock; this probably is
91  * not quite desirable.
92  */
93 #ifdef VIMAGE_GLOBALS
94 struct in_multihead in_multihead;	/* XXX BSS initialization */
95 #endif
96 struct mtx in_multi_mtx;
97 MTX_SYSINIT(in_multi_mtx, &in_multi_mtx, "in_multi_mtx", MTX_DEF | MTX_RECURSE);
98 
99 /*
100  * Functions with non-static linkage defined in this file should be
101  * declared in in_var.h:
102  *  imo_match_group()
103  *  imo_match_source()
104  *  in_addmulti()
105  *  in_delmulti()
106  *  in_delmulti_locked()
107  * and ip_var.h:
108  *  inp_freemoptions()
109  *  inp_getmoptions()
110  *  inp_setmoptions()
111  */
112 static int	imo_grow(struct ip_moptions *);
113 static int	imo_join_source(struct ip_moptions *, size_t, sockunion_t *);
114 static int	imo_leave_source(struct ip_moptions *, size_t, sockunion_t *);
115 static int	inp_change_source_filter(struct inpcb *, struct sockopt *);
116 static struct ip_moptions *
117 		inp_findmoptions(struct inpcb *);
118 static int	inp_get_source_filters(struct inpcb *, struct sockopt *);
119 static int	inp_join_group(struct inpcb *, struct sockopt *);
120 static int	inp_leave_group(struct inpcb *, struct sockopt *);
121 static int	inp_set_multicast_if(struct inpcb *, struct sockopt *);
122 static int	inp_set_source_filters(struct inpcb *, struct sockopt *);
123 
124 SYSCTL_NODE(_net_inet_ip, OID_AUTO, mcast, CTLFLAG_RW, 0, "IPv4 multicast");
125 
126 int in_mcast_loop = IP_DEFAULT_MULTICAST_LOOP;
127 SYSCTL_INT(_net_inet_ip_mcast, OID_AUTO, loop, CTLFLAG_RW | CTLFLAG_TUN,
128     &in_mcast_loop, 0, "Loopback multicast datagrams by default");
129 TUNABLE_INT("net.inet.ip.mcast.loop", &in_mcast_loop);
130 
131 /*
132  * Resize the ip_moptions vector to the next power-of-two minus 1.
133  * May be called with locks held; do not sleep.
134  */
135 static int
136 imo_grow(struct ip_moptions *imo)
137 {
138 	struct in_multi		**nmships;
139 	struct in_multi		**omships;
140 	struct in_mfilter	 *nmfilters;
141 	struct in_mfilter	 *omfilters;
142 	size_t			  idx;
143 	size_t			  newmax;
144 	size_t			  oldmax;
145 
146 	nmships = NULL;
147 	nmfilters = NULL;
148 	omships = imo->imo_membership;
149 	omfilters = imo->imo_mfilters;
150 	oldmax = imo->imo_max_memberships;
151 	newmax = ((oldmax + 1) * 2) - 1;
152 
153 	if (newmax <= IP_MAX_MEMBERSHIPS) {
154 		nmships = (struct in_multi **)realloc(omships,
155 		    sizeof(struct in_multi *) * newmax, M_IPMOPTS, M_NOWAIT);
156 		nmfilters = (struct in_mfilter *)realloc(omfilters,
157 		    sizeof(struct in_mfilter) * newmax, M_IPMSOURCE, M_NOWAIT);
158 		if (nmships != NULL && nmfilters != NULL) {
159 			/* Initialize newly allocated source filter heads. */
160 			for (idx = oldmax; idx < newmax; idx++) {
161 				nmfilters[idx].imf_fmode = MCAST_EXCLUDE;
162 				nmfilters[idx].imf_nsources = 0;
163 				TAILQ_INIT(&nmfilters[idx].imf_sources);
164 			}
165 			imo->imo_max_memberships = newmax;
166 			imo->imo_membership = nmships;
167 			imo->imo_mfilters = nmfilters;
168 		}
169 	}
170 
171 	if (nmships == NULL || nmfilters == NULL) {
172 		if (nmships != NULL)
173 			free(nmships, M_IPMOPTS);
174 		if (nmfilters != NULL)
175 			free(nmfilters, M_IPMSOURCE);
176 		return (ETOOMANYREFS);
177 	}
178 
179 	return (0);
180 }
181 
182 /*
183  * Add a source to a multicast filter list.
184  * Assumes the associated inpcb is locked.
185  */
186 static int
187 imo_join_source(struct ip_moptions *imo, size_t gidx, sockunion_t *src)
188 {
189 	struct in_msource	*ims, *nims;
190 	struct in_mfilter	*imf;
191 
192 	KASSERT(src->ss.ss_family == AF_INET, ("%s: !AF_INET", __func__));
193 	KASSERT(imo->imo_mfilters != NULL,
194 	    ("%s: imo_mfilters vector not allocated", __func__));
195 
196 	imf = &imo->imo_mfilters[gidx];
197 	if (imf->imf_nsources == IP_MAX_SOURCE_FILTER)
198 		return (ENOBUFS);
199 
200 	ims = imo_match_source(imo, gidx, &src->sa);
201 	if (ims != NULL)
202 		return (EADDRNOTAVAIL);
203 
204 	/* Do not sleep with inp lock held. */
205 	nims = malloc(sizeof(struct in_msource),
206 	    M_IPMSOURCE, M_NOWAIT | M_ZERO);
207 	if (nims == NULL)
208 		return (ENOBUFS);
209 
210 	nims->ims_addr = src->ss;
211 	TAILQ_INSERT_TAIL(&imf->imf_sources, nims, ims_next);
212 	imf->imf_nsources++;
213 
214 	return (0);
215 }
216 
217 static int
218 imo_leave_source(struct ip_moptions *imo, size_t gidx, sockunion_t *src)
219 {
220 	struct in_msource	*ims;
221 	struct in_mfilter	*imf;
222 
223 	KASSERT(src->ss.ss_family == AF_INET, ("%s: !AF_INET", __func__));
224 	KASSERT(imo->imo_mfilters != NULL,
225 	    ("%s: imo_mfilters vector not allocated", __func__));
226 
227 	imf = &imo->imo_mfilters[gidx];
228 	if (imf->imf_nsources == IP_MAX_SOURCE_FILTER)
229 		return (ENOBUFS);
230 
231 	ims = imo_match_source(imo, gidx, &src->sa);
232 	if (ims == NULL)
233 		return (EADDRNOTAVAIL);
234 
235 	TAILQ_REMOVE(&imf->imf_sources, ims, ims_next);
236 	free(ims, M_IPMSOURCE);
237 	imf->imf_nsources--;
238 
239 	return (0);
240 }
241 
242 /*
243  * Find an IPv4 multicast group entry for this ip_moptions instance
244  * which matches the specified group, and optionally an interface.
245  * Return its index into the array, or -1 if not found.
246  */
247 size_t
248 imo_match_group(struct ip_moptions *imo, struct ifnet *ifp,
249     struct sockaddr *group)
250 {
251 	sockunion_t	 *gsa;
252 	struct in_multi	**pinm;
253 	int		  idx;
254 	int		  nmships;
255 
256 	gsa = (sockunion_t *)group;
257 
258 	/* The imo_membership array may be lazy allocated. */
259 	if (imo->imo_membership == NULL || imo->imo_num_memberships == 0)
260 		return (-1);
261 
262 	nmships = imo->imo_num_memberships;
263 	pinm = &imo->imo_membership[0];
264 	for (idx = 0; idx < nmships; idx++, pinm++) {
265 		if (*pinm == NULL)
266 			continue;
267 #if 0
268 		printf("%s: trying ifp = %p, inaddr = %s ", __func__,
269 		    ifp, inet_ntoa(gsa->sin.sin_addr));
270 		printf("against %p, %s\n",
271 		    (*pinm)->inm_ifp, inet_ntoa((*pinm)->inm_addr));
272 #endif
273 		if ((ifp == NULL || ((*pinm)->inm_ifp == ifp)) &&
274 		    (*pinm)->inm_addr.s_addr == gsa->sin.sin_addr.s_addr) {
275 			break;
276 		}
277 	}
278 	if (idx >= nmships)
279 		idx = -1;
280 
281 	return (idx);
282 }
283 
284 /*
285  * Find a multicast source entry for this imo which matches
286  * the given group index for this socket, and source address.
287  */
288 struct in_msource *
289 imo_match_source(struct ip_moptions *imo, size_t gidx, struct sockaddr *src)
290 {
291 	struct in_mfilter	*imf;
292 	struct in_msource	*ims, *pims;
293 
294 	KASSERT(src->sa_family == AF_INET, ("%s: !AF_INET", __func__));
295 	KASSERT(gidx != -1 && gidx < imo->imo_num_memberships,
296 	    ("%s: invalid index %d\n", __func__, (int)gidx));
297 
298 	/* The imo_mfilters array may be lazy allocated. */
299 	if (imo->imo_mfilters == NULL)
300 		return (NULL);
301 
302 	pims = NULL;
303 	imf = &imo->imo_mfilters[gidx];
304 	TAILQ_FOREACH(ims, &imf->imf_sources, ims_next) {
305 		/*
306 		 * Perform bitwise comparison of two IPv4 addresses.
307 		 * TODO: Do the same for IPv6.
308 		 * Do not use sa_equal() for this as it is not aware of
309 		 * deeper structure in sockaddr_in or sockaddr_in6.
310 		 */
311 		if (((struct sockaddr_in *)&ims->ims_addr)->sin_addr.s_addr ==
312 		    ((struct sockaddr_in *)src)->sin_addr.s_addr) {
313 			pims = ims;
314 			break;
315 		}
316 	}
317 
318 	return (pims);
319 }
320 
321 /*
322  * Join an IPv4 multicast group.
323  */
324 struct in_multi *
325 in_addmulti(struct in_addr *ap, struct ifnet *ifp)
326 {
327 	INIT_VNET_INET(ifp->if_vnet);
328 	struct in_multi *inm;
329 
330 	inm = NULL;
331 
332 	IFF_LOCKGIANT(ifp);
333 	IN_MULTI_LOCK();
334 
335 	IN_LOOKUP_MULTI(*ap, ifp, inm);
336 	if (inm != NULL) {
337 		/*
338 		 * If we already joined this group, just bump the
339 		 * refcount and return it.
340 		 */
341 		KASSERT(inm->inm_refcount >= 1,
342 		    ("%s: bad refcount %d", __func__, inm->inm_refcount));
343 		++inm->inm_refcount;
344 	} else do {
345 		sockunion_t		 gsa;
346 		struct ifmultiaddr	*ifma;
347 		struct in_multi		*ninm;
348 		int			 error;
349 
350 		memset(&gsa, 0, sizeof(gsa));
351 		gsa.sin.sin_family = AF_INET;
352 		gsa.sin.sin_len = sizeof(struct sockaddr_in);
353 		gsa.sin.sin_addr = *ap;
354 
355 		/*
356 		 * Check if a link-layer group is already associated
357 		 * with this network-layer group on the given ifnet.
358 		 * If so, bump the refcount on the existing network-layer
359 		 * group association and return it.
360 		 */
361 		error = if_addmulti(ifp, &gsa.sa, &ifma);
362 		if (error)
363 			break;
364 		if (ifma->ifma_protospec != NULL) {
365 			inm = (struct in_multi *)ifma->ifma_protospec;
366 #ifdef INVARIANTS
367 			if (inm->inm_ifma != ifma || inm->inm_ifp != ifp ||
368 			    inm->inm_addr.s_addr != ap->s_addr)
369 				panic("%s: ifma is inconsistent", __func__);
370 #endif
371 			++inm->inm_refcount;
372 			break;
373 		}
374 
375 		/*
376 		 * A new membership is needed; construct it and
377 		 * perform the IGMP join.
378 		 */
379 		ninm = malloc(sizeof(*ninm), M_IPMADDR, M_NOWAIT | M_ZERO);
380 		if (ninm == NULL) {
381 			if_delmulti_ifma(ifma);
382 			break;
383 		}
384 		ninm->inm_addr = *ap;
385 		ninm->inm_ifp = ifp;
386 		ninm->inm_ifma = ifma;
387 		ninm->inm_refcount = 1;
388 		ifma->ifma_protospec = ninm;
389 		LIST_INSERT_HEAD(&V_in_multihead, ninm, inm_link);
390 
391 		igmp_joingroup(ninm);
392 
393 		inm = ninm;
394 	} while (0);
395 
396 	IN_MULTI_UNLOCK();
397 	IFF_UNLOCKGIANT(ifp);
398 
399 	return (inm);
400 }
401 
402 /*
403  * Leave an IPv4 multicast group.
404  * It is OK to call this routine if the underlying ifnet went away.
405  *
406  * XXX: To deal with the ifp going away, we cheat; the link-layer code in net
407  * will set ifma_ifp to NULL when the associated ifnet instance is detached
408  * from the system.
409  *
410  * The only reason we need to violate layers and check ifma_ifp here at all
411  * is because certain hardware drivers still require Giant to be held,
412  * and it must always be taken before other locks.
413  */
414 void
415 in_delmulti(struct in_multi *inm)
416 {
417 	struct ifnet *ifp;
418 
419 	KASSERT(inm != NULL, ("%s: inm is NULL", __func__));
420 	KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__));
421 	ifp = inm->inm_ifma->ifma_ifp;
422 
423 	if (ifp != NULL) {
424 		/*
425 		 * Sanity check that netinet's notion of ifp is the
426 		 * same as net's.
427 		 */
428 		KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__));
429 		IFF_LOCKGIANT(ifp);
430 	}
431 
432 	IN_MULTI_LOCK();
433 	in_delmulti_locked(inm);
434 	IN_MULTI_UNLOCK();
435 
436 	if (ifp != NULL)
437 		IFF_UNLOCKGIANT(ifp);
438 }
439 
440 /*
441  * Delete a multicast address record, with locks held.
442  *
443  * It is OK to call this routine if the ifp went away.
444  * Assumes that caller holds the IN_MULTI lock, and that
445  * Giant was taken before other locks if required by the hardware.
446  */
447 void
448 in_delmulti_locked(struct in_multi *inm)
449 {
450 	struct ifmultiaddr *ifma;
451 
452 	IN_MULTI_LOCK_ASSERT();
453 	KASSERT(inm->inm_refcount >= 1, ("%s: freeing freed inm", __func__));
454 
455 	if (--inm->inm_refcount == 0) {
456 		igmp_leavegroup(inm);
457 
458 		ifma = inm->inm_ifma;
459 #ifdef DIAGNOSTIC
460 		if (bootverbose)
461 			printf("%s: purging ifma %p\n", __func__, ifma);
462 #endif
463 		KASSERT(ifma->ifma_protospec == inm,
464 		    ("%s: ifma_protospec != inm", __func__));
465 		ifma->ifma_protospec = NULL;
466 
467 		LIST_REMOVE(inm, inm_link);
468 		free(inm, M_IPMADDR);
469 
470 		if_delmulti_ifma(ifma);
471 	}
472 }
473 
474 /*
475  * Block or unblock an ASM/SSM multicast source on an inpcb.
476  */
477 static int
478 inp_change_source_filter(struct inpcb *inp, struct sockopt *sopt)
479 {
480 	INIT_VNET_NET(curvnet);
481 	INIT_VNET_INET(curvnet);
482 	struct group_source_req		 gsr;
483 	sockunion_t			*gsa, *ssa;
484 	struct ifnet			*ifp;
485 	struct in_mfilter		*imf;
486 	struct ip_moptions		*imo;
487 	struct in_msource		*ims;
488 	size_t				 idx;
489 	int				 error;
490 	int				 block;
491 
492 	ifp = NULL;
493 	error = 0;
494 	block = 0;
495 
496 	memset(&gsr, 0, sizeof(struct group_source_req));
497 	gsa = (sockunion_t *)&gsr.gsr_group;
498 	ssa = (sockunion_t *)&gsr.gsr_source;
499 
500 	switch (sopt->sopt_name) {
501 	case IP_BLOCK_SOURCE:
502 	case IP_UNBLOCK_SOURCE: {
503 		struct ip_mreq_source	 mreqs;
504 
505 		error = sooptcopyin(sopt, &mreqs,
506 		    sizeof(struct ip_mreq_source),
507 		    sizeof(struct ip_mreq_source));
508 		if (error)
509 			return (error);
510 
511 		gsa->sin.sin_family = AF_INET;
512 		gsa->sin.sin_len = sizeof(struct sockaddr_in);
513 		gsa->sin.sin_addr = mreqs.imr_multiaddr;
514 
515 		ssa->sin.sin_family = AF_INET;
516 		ssa->sin.sin_len = sizeof(struct sockaddr_in);
517 		ssa->sin.sin_addr = mreqs.imr_sourceaddr;
518 
519 		if (mreqs.imr_interface.s_addr != INADDR_ANY)
520 			INADDR_TO_IFP(mreqs.imr_interface, ifp);
521 
522 		if (sopt->sopt_name == IP_BLOCK_SOURCE)
523 			block = 1;
524 
525 #ifdef DIAGNOSTIC
526 		if (bootverbose) {
527 			printf("%s: imr_interface = %s, ifp = %p\n",
528 			    __func__, inet_ntoa(mreqs.imr_interface), ifp);
529 		}
530 #endif
531 		break;
532 	    }
533 
534 	case MCAST_BLOCK_SOURCE:
535 	case MCAST_UNBLOCK_SOURCE:
536 		error = sooptcopyin(sopt, &gsr,
537 		    sizeof(struct group_source_req),
538 		    sizeof(struct group_source_req));
539 		if (error)
540 			return (error);
541 
542 		if (gsa->sin.sin_family != AF_INET ||
543 		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
544 			return (EINVAL);
545 
546 		if (ssa->sin.sin_family != AF_INET ||
547 		    ssa->sin.sin_len != sizeof(struct sockaddr_in))
548 			return (EINVAL);
549 
550 		if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
551 			return (EADDRNOTAVAIL);
552 
553 		ifp = ifnet_byindex(gsr.gsr_interface);
554 
555 		if (sopt->sopt_name == MCAST_BLOCK_SOURCE)
556 			block = 1;
557 		break;
558 
559 	default:
560 #ifdef DIAGNOSTIC
561 		if (bootverbose) {
562 			printf("%s: unknown sopt_name %d\n", __func__,
563 			    sopt->sopt_name);
564 		}
565 #endif
566 		return (EOPNOTSUPP);
567 		break;
568 	}
569 
570 	/* XXX INET6 */
571 	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
572 		return (EINVAL);
573 
574 	/*
575 	 * Check if we are actually a member of this group.
576 	 */
577 	imo = inp_findmoptions(inp);
578 	idx = imo_match_group(imo, ifp, &gsa->sa);
579 	if (idx == -1 || imo->imo_mfilters == NULL) {
580 		error = EADDRNOTAVAIL;
581 		goto out_locked;
582 	}
583 
584 	KASSERT(imo->imo_mfilters != NULL,
585 	    ("%s: imo_mfilters not allocated", __func__));
586 	imf = &imo->imo_mfilters[idx];
587 
588 	/*
589 	 * SSM multicast truth table for block/unblock operations.
590 	 *
591 	 * Operation   Filter Mode  Entry exists?   Action
592 	 *
593 	 * block       exclude      no              add source to filter
594 	 * unblock     include      no              add source to filter
595 	 * block       include      no              EINVAL
596 	 * unblock     exclude      no              EINVAL
597 	 * block       exclude      yes             EADDRNOTAVAIL
598 	 * unblock     include      yes             EADDRNOTAVAIL
599 	 * block       include      yes             remove source from filter
600 	 * unblock     exclude      yes             remove source from filter
601 	 *
602 	 * FreeBSD does not explicitly distinguish between ASM and SSM
603 	 * mode sockets; all sockets are assumed to have a filter list.
604 	 */
605 #ifdef DIAGNOSTIC
606 	if (bootverbose) {
607 		printf("%s: imf_fmode is %s\n", __func__,
608 		    imf->imf_fmode == MCAST_INCLUDE ? "include" : "exclude");
609 	}
610 #endif
611 	ims = imo_match_source(imo, idx, &ssa->sa);
612 	if (ims == NULL) {
613 		if ((block == 1 && imf->imf_fmode == MCAST_EXCLUDE) ||
614 		    (block == 0 && imf->imf_fmode == MCAST_INCLUDE)) {
615 #ifdef DIAGNOSTIC
616 			if (bootverbose) {
617 				printf("%s: adding %s to filter list\n",
618 				    __func__, inet_ntoa(ssa->sin.sin_addr));
619 			}
620 #endif
621 			error = imo_join_source(imo, idx, ssa);
622 		}
623 		if ((block == 1 && imf->imf_fmode == MCAST_INCLUDE) ||
624 		    (block == 0 && imf->imf_fmode == MCAST_EXCLUDE)) {
625 			/*
626 			 * If the socket is in inclusive mode:
627 			 *  the source is already blocked as it has no entry.
628 			 * If the socket is in exclusive mode:
629 			 *  the source is already unblocked as it has no entry.
630 			 */
631 #ifdef DIAGNOSTIC
632 			if (bootverbose) {
633 				printf("%s: ims %p; %s already [un]blocked\n",
634 				    __func__, ims,
635 				    inet_ntoa(ssa->sin.sin_addr));
636 			}
637 #endif
638 			error = EINVAL;
639 		}
640 	} else {
641 		if ((block == 1 && imf->imf_fmode == MCAST_EXCLUDE) ||
642 		    (block == 0 && imf->imf_fmode == MCAST_INCLUDE)) {
643 			/*
644 			 * If the socket is in exclusive mode:
645 			 *  the source is already blocked as it has an entry.
646 			 * If the socket is in inclusive mode:
647 			 *  the source is already unblocked as it has an entry.
648 			 */
649 #ifdef DIAGNOSTIC
650 			if (bootverbose) {
651 				printf("%s: ims %p; %s already [un]blocked\n",
652 				    __func__, ims,
653 				    inet_ntoa(ssa->sin.sin_addr));
654 			}
655 #endif
656 			error = EADDRNOTAVAIL;
657 		}
658 		if ((block == 1 && imf->imf_fmode == MCAST_INCLUDE) ||
659 		    (block == 0 && imf->imf_fmode == MCAST_EXCLUDE)) {
660 #ifdef DIAGNOSTIC
661 			if (bootverbose) {
662 				printf("%s: removing %s from filter list\n",
663 				    __func__, inet_ntoa(ssa->sin.sin_addr));
664 			}
665 #endif
666 			error = imo_leave_source(imo, idx, ssa);
667 		}
668 	}
669 
670 out_locked:
671 	INP_WUNLOCK(inp);
672 	return (error);
673 }
674 
675 /*
676  * Given an inpcb, return its multicast options structure pointer.  Accepts
677  * an unlocked inpcb pointer, but will return it locked.  May sleep.
678  */
679 static struct ip_moptions *
680 inp_findmoptions(struct inpcb *inp)
681 {
682 	struct ip_moptions	 *imo;
683 	struct in_multi		**immp;
684 	struct in_mfilter	 *imfp;
685 	size_t			  idx;
686 
687 	INP_WLOCK(inp);
688 	if (inp->inp_moptions != NULL)
689 		return (inp->inp_moptions);
690 
691 	INP_WUNLOCK(inp);
692 
693 	imo = (struct ip_moptions *)malloc(sizeof(*imo), M_IPMOPTS,
694 	    M_WAITOK);
695 	immp = (struct in_multi **)malloc(sizeof(*immp) * IP_MIN_MEMBERSHIPS,
696 	    M_IPMOPTS, M_WAITOK | M_ZERO);
697 	imfp = (struct in_mfilter *)malloc(
698 	    sizeof(struct in_mfilter) * IP_MIN_MEMBERSHIPS,
699 	    M_IPMSOURCE, M_WAITOK);
700 
701 	imo->imo_multicast_ifp = NULL;
702 	imo->imo_multicast_addr.s_addr = INADDR_ANY;
703 	imo->imo_multicast_vif = -1;
704 	imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
705 	imo->imo_multicast_loop = in_mcast_loop;
706 	imo->imo_num_memberships = 0;
707 	imo->imo_max_memberships = IP_MIN_MEMBERSHIPS;
708 	imo->imo_membership = immp;
709 
710 	/* Initialize per-group source filters. */
711 	for (idx = 0; idx < IP_MIN_MEMBERSHIPS; idx++) {
712 		imfp[idx].imf_fmode = MCAST_EXCLUDE;
713 		imfp[idx].imf_nsources = 0;
714 		TAILQ_INIT(&imfp[idx].imf_sources);
715 	}
716 	imo->imo_mfilters = imfp;
717 
718 	INP_WLOCK(inp);
719 	if (inp->inp_moptions != NULL) {
720 		free(imfp, M_IPMSOURCE);
721 		free(immp, M_IPMOPTS);
722 		free(imo, M_IPMOPTS);
723 		return (inp->inp_moptions);
724 	}
725 	inp->inp_moptions = imo;
726 	return (imo);
727 }
728 
729 /*
730  * Discard the IP multicast options (and source filters).
731  */
732 void
733 inp_freemoptions(struct ip_moptions *imo)
734 {
735 	struct in_mfilter	*imf;
736 	struct in_msource	*ims, *tims;
737 	size_t			 idx, nmships;
738 
739 	KASSERT(imo != NULL, ("%s: ip_moptions is NULL", __func__));
740 
741 	nmships = imo->imo_num_memberships;
742 	for (idx = 0; idx < nmships; ++idx) {
743 		in_delmulti(imo->imo_membership[idx]);
744 
745 		if (imo->imo_mfilters != NULL) {
746 			imf = &imo->imo_mfilters[idx];
747 			TAILQ_FOREACH_SAFE(ims, &imf->imf_sources,
748 			    ims_next, tims) {
749 				TAILQ_REMOVE(&imf->imf_sources, ims, ims_next);
750 				free(ims, M_IPMSOURCE);
751 				imf->imf_nsources--;
752 			}
753 			KASSERT(imf->imf_nsources == 0,
754 			    ("%s: did not free all imf_nsources", __func__));
755 		}
756 	}
757 
758 	if (imo->imo_mfilters != NULL)
759 		free(imo->imo_mfilters, M_IPMSOURCE);
760 	free(imo->imo_membership, M_IPMOPTS);
761 	free(imo, M_IPMOPTS);
762 }
763 
764 /*
765  * Atomically get source filters on a socket for an IPv4 multicast group.
766  * Called with INP lock held; returns with lock released.
767  */
768 static int
769 inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt)
770 {
771 	INIT_VNET_NET(curvnet);
772 	struct __msfilterreq	 msfr;
773 	sockunion_t		*gsa;
774 	struct ifnet		*ifp;
775 	struct ip_moptions	*imo;
776 	struct in_mfilter	*imf;
777 	struct in_msource	*ims;
778 	struct sockaddr_storage	*ptss;
779 	struct sockaddr_storage	*tss;
780 	int			 error;
781 	size_t			 idx;
782 
783 	INP_WLOCK_ASSERT(inp);
784 
785 	imo = inp->inp_moptions;
786 	KASSERT(imo != NULL, ("%s: null ip_moptions", __func__));
787 
788 	INP_WUNLOCK(inp);
789 
790 	error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq),
791 	    sizeof(struct __msfilterreq));
792 	if (error)
793 		return (error);
794 
795 	if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex)
796 		return (EINVAL);
797 
798 	ifp = ifnet_byindex(msfr.msfr_ifindex);
799 	if (ifp == NULL)
800 		return (EINVAL);
801 
802 	INP_WLOCK(inp);
803 
804 	/*
805 	 * Lookup group on the socket.
806 	 */
807 	gsa = (sockunion_t *)&msfr.msfr_group;
808 	idx = imo_match_group(imo, ifp, &gsa->sa);
809 	if (idx == -1 || imo->imo_mfilters == NULL) {
810 		INP_WUNLOCK(inp);
811 		return (EADDRNOTAVAIL);
812 	}
813 
814 	imf = &imo->imo_mfilters[idx];
815 	msfr.msfr_fmode = imf->imf_fmode;
816 	msfr.msfr_nsrcs = imf->imf_nsources;
817 
818 	/*
819 	 * If the user specified a buffer, copy out the source filter
820 	 * entries to userland gracefully.
821 	 * msfr.msfr_nsrcs is always set to the total number of filter
822 	 * entries which the kernel currently has for this group.
823 	 */
824 	tss = NULL;
825 	if (msfr.msfr_srcs != NULL && msfr.msfr_nsrcs > 0) {
826 		/*
827 		 * Make a copy of the source vector so that we do not
828 		 * thrash the inpcb lock whilst copying it out.
829 		 * We only copy out the number of entries which userland
830 		 * has asked for, but we always tell userland how big the
831 		 * buffer really needs to be.
832 		 */
833 		tss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
834 		    M_TEMP, M_NOWAIT);
835 		if (tss == NULL) {
836 			error = ENOBUFS;
837 		} else {
838 			ptss = tss;
839 			TAILQ_FOREACH(ims, &imf->imf_sources, ims_next) {
840 				memcpy(ptss++, &ims->ims_addr,
841 				    sizeof(struct sockaddr_storage));
842 			}
843 		}
844 	}
845 
846 	INP_WUNLOCK(inp);
847 
848 	if (tss != NULL) {
849 		error = copyout(tss, msfr.msfr_srcs,
850 		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
851 		free(tss, M_TEMP);
852 	}
853 
854 	if (error)
855 		return (error);
856 
857 	error = sooptcopyout(sopt, &msfr, sizeof(struct __msfilterreq));
858 
859 	return (error);
860 }
861 
862 /*
863  * Return the IP multicast options in response to user getsockopt().
864  */
865 int
866 inp_getmoptions(struct inpcb *inp, struct sockopt *sopt)
867 {
868 	INIT_VNET_INET(curvnet);
869 	struct ip_mreqn		 mreqn;
870 	struct ip_moptions	*imo;
871 	struct ifnet		*ifp;
872 	struct in_ifaddr	*ia;
873 	int			 error, optval;
874 	u_char			 coptval;
875 
876 	INP_WLOCK(inp);
877 	imo = inp->inp_moptions;
878 	/*
879 	 * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
880 	 * or is a divert socket, reject it.
881 	 */
882 	if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
883 	    (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
884 	    inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) {
885 		INP_WUNLOCK(inp);
886 		return (EOPNOTSUPP);
887 	}
888 
889 	error = 0;
890 	switch (sopt->sopt_name) {
891 	case IP_MULTICAST_VIF:
892 		if (imo != NULL)
893 			optval = imo->imo_multicast_vif;
894 		else
895 			optval = -1;
896 		INP_WUNLOCK(inp);
897 		error = sooptcopyout(sopt, &optval, sizeof(int));
898 		break;
899 
900 	case IP_MULTICAST_IF:
901 		memset(&mreqn, 0, sizeof(struct ip_mreqn));
902 		if (imo != NULL) {
903 			ifp = imo->imo_multicast_ifp;
904 			if (imo->imo_multicast_addr.s_addr != INADDR_ANY) {
905 				mreqn.imr_address = imo->imo_multicast_addr;
906 			} else if (ifp != NULL) {
907 				mreqn.imr_ifindex = ifp->if_index;
908 				IFP_TO_IA(ifp, ia);
909 				if (ia != NULL) {
910 					mreqn.imr_address =
911 					    IA_SIN(ia)->sin_addr;
912 				}
913 			}
914 		}
915 		INP_WUNLOCK(inp);
916 		if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) {
917 			error = sooptcopyout(sopt, &mreqn,
918 			    sizeof(struct ip_mreqn));
919 		} else {
920 			error = sooptcopyout(sopt, &mreqn.imr_address,
921 			    sizeof(struct in_addr));
922 		}
923 		break;
924 
925 	case IP_MULTICAST_TTL:
926 		if (imo == 0)
927 			optval = coptval = IP_DEFAULT_MULTICAST_TTL;
928 		else
929 			optval = coptval = imo->imo_multicast_ttl;
930 		INP_WUNLOCK(inp);
931 		if (sopt->sopt_valsize == sizeof(u_char))
932 			error = sooptcopyout(sopt, &coptval, sizeof(u_char));
933 		else
934 			error = sooptcopyout(sopt, &optval, sizeof(int));
935 		break;
936 
937 	case IP_MULTICAST_LOOP:
938 		if (imo == 0)
939 			optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
940 		else
941 			optval = coptval = imo->imo_multicast_loop;
942 		INP_WUNLOCK(inp);
943 		if (sopt->sopt_valsize == sizeof(u_char))
944 			error = sooptcopyout(sopt, &coptval, sizeof(u_char));
945 		else
946 			error = sooptcopyout(sopt, &optval, sizeof(int));
947 		break;
948 
949 	case IP_MSFILTER:
950 		if (imo == NULL) {
951 			error = EADDRNOTAVAIL;
952 			INP_WUNLOCK(inp);
953 		} else {
954 			error = inp_get_source_filters(inp, sopt);
955 		}
956 		break;
957 
958 	default:
959 		INP_WUNLOCK(inp);
960 		error = ENOPROTOOPT;
961 		break;
962 	}
963 
964 	INP_UNLOCK_ASSERT(inp);
965 
966 	return (error);
967 }
968 
969 /*
970  * Join an IPv4 multicast group, possibly with a source.
971  */
972 static int
973 inp_join_group(struct inpcb *inp, struct sockopt *sopt)
974 {
975 	INIT_VNET_NET(curvnet);
976 	INIT_VNET_INET(curvnet);
977 	struct group_source_req		 gsr;
978 	sockunion_t			*gsa, *ssa;
979 	struct ifnet			*ifp;
980 	struct in_mfilter		*imf;
981 	struct ip_moptions		*imo;
982 	struct in_multi			*inm;
983 	size_t				 idx;
984 	int				 error;
985 
986 	ifp = NULL;
987 	error = 0;
988 
989 	memset(&gsr, 0, sizeof(struct group_source_req));
990 	gsa = (sockunion_t *)&gsr.gsr_group;
991 	gsa->ss.ss_family = AF_UNSPEC;
992 	ssa = (sockunion_t *)&gsr.gsr_source;
993 	ssa->ss.ss_family = AF_UNSPEC;
994 
995 	switch (sopt->sopt_name) {
996 	case IP_ADD_MEMBERSHIP:
997 	case IP_ADD_SOURCE_MEMBERSHIP: {
998 		struct ip_mreq_source	 mreqs;
999 
1000 		if (sopt->sopt_name == IP_ADD_MEMBERSHIP) {
1001 			error = sooptcopyin(sopt, &mreqs,
1002 			    sizeof(struct ip_mreq),
1003 			    sizeof(struct ip_mreq));
1004 			/*
1005 			 * Do argument switcharoo from ip_mreq into
1006 			 * ip_mreq_source to avoid using two instances.
1007 			 */
1008 			mreqs.imr_interface = mreqs.imr_sourceaddr;
1009 			mreqs.imr_sourceaddr.s_addr = INADDR_ANY;
1010 		} else if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) {
1011 			error = sooptcopyin(sopt, &mreqs,
1012 			    sizeof(struct ip_mreq_source),
1013 			    sizeof(struct ip_mreq_source));
1014 		}
1015 		if (error)
1016 			return (error);
1017 
1018 		gsa->sin.sin_family = AF_INET;
1019 		gsa->sin.sin_len = sizeof(struct sockaddr_in);
1020 		gsa->sin.sin_addr = mreqs.imr_multiaddr;
1021 
1022 		if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) {
1023 			ssa->sin.sin_family = AF_INET;
1024 			ssa->sin.sin_len = sizeof(struct sockaddr_in);
1025 			ssa->sin.sin_addr = mreqs.imr_sourceaddr;
1026 		}
1027 
1028 		/*
1029 		 * Obtain ifp. If no interface address was provided,
1030 		 * use the interface of the route in the unicast FIB for
1031 		 * the given multicast destination; usually, this is the
1032 		 * default route.
1033 		 * If this lookup fails, attempt to use the first non-loopback
1034 		 * interface with multicast capability in the system as a
1035 		 * last resort. The legacy IPv4 ASM API requires that we do
1036 		 * this in order to allow groups to be joined when the routing
1037 		 * table has not yet been populated during boot.
1038 		 * If all of these conditions fail, return EADDRNOTAVAIL, and
1039 		 * reject the IPv4 multicast join.
1040 		 */
1041 		if (mreqs.imr_interface.s_addr != INADDR_ANY) {
1042 			INADDR_TO_IFP(mreqs.imr_interface, ifp);
1043 		} else {
1044 			struct route ro;
1045 
1046 			ro.ro_rt = NULL;
1047 			*(struct sockaddr_in *)&ro.ro_dst = gsa->sin;
1048 			in_rtalloc_ign(&ro, 0,
1049 			   inp->inp_inc.inc_fibnum);
1050 			if (ro.ro_rt != NULL) {
1051 				ifp = ro.ro_rt->rt_ifp;
1052 				KASSERT(ifp != NULL, ("%s: null ifp",
1053 				    __func__));
1054 				RTFREE(ro.ro_rt);
1055 			} else {
1056 				struct in_ifaddr *ia;
1057 				struct ifnet *mfp = NULL;
1058 				TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
1059 					mfp = ia->ia_ifp;
1060 					if (!(mfp->if_flags & IFF_LOOPBACK) &&
1061 					     (mfp->if_flags & IFF_MULTICAST)) {
1062 						ifp = mfp;
1063 						break;
1064 					}
1065 				}
1066 			}
1067 		}
1068 #ifdef DIAGNOSTIC
1069 		if (bootverbose) {
1070 			printf("%s: imr_interface = %s, ifp = %p\n",
1071 			    __func__, inet_ntoa(mreqs.imr_interface), ifp);
1072 		}
1073 #endif
1074 		break;
1075 	}
1076 
1077 	case MCAST_JOIN_GROUP:
1078 	case MCAST_JOIN_SOURCE_GROUP:
1079 		if (sopt->sopt_name == MCAST_JOIN_GROUP) {
1080 			error = sooptcopyin(sopt, &gsr,
1081 			    sizeof(struct group_req),
1082 			    sizeof(struct group_req));
1083 		} else if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
1084 			error = sooptcopyin(sopt, &gsr,
1085 			    sizeof(struct group_source_req),
1086 			    sizeof(struct group_source_req));
1087 		}
1088 		if (error)
1089 			return (error);
1090 
1091 		if (gsa->sin.sin_family != AF_INET ||
1092 		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
1093 			return (EINVAL);
1094 
1095 		/*
1096 		 * Overwrite the port field if present, as the sockaddr
1097 		 * being copied in may be matched with a binary comparison.
1098 		 * XXX INET6
1099 		 */
1100 		gsa->sin.sin_port = 0;
1101 		if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
1102 			if (ssa->sin.sin_family != AF_INET ||
1103 			    ssa->sin.sin_len != sizeof(struct sockaddr_in))
1104 				return (EINVAL);
1105 			ssa->sin.sin_port = 0;
1106 		}
1107 
1108 		/*
1109 		 * Obtain the ifp.
1110 		 */
1111 		if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
1112 			return (EADDRNOTAVAIL);
1113 		ifp = ifnet_byindex(gsr.gsr_interface);
1114 
1115 		break;
1116 
1117 	default:
1118 #ifdef DIAGNOSTIC
1119 		if (bootverbose) {
1120 			printf("%s: unknown sopt_name %d\n", __func__,
1121 			    sopt->sopt_name);
1122 		}
1123 #endif
1124 		return (EOPNOTSUPP);
1125 		break;
1126 	}
1127 
1128 	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
1129 		return (EINVAL);
1130 
1131 	if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0)
1132 		return (EADDRNOTAVAIL);
1133 
1134 	/*
1135 	 * Check if we already hold membership of this group for this inpcb.
1136 	 * If so, we do not need to perform the initial join.
1137 	 */
1138 	imo = inp_findmoptions(inp);
1139 	idx = imo_match_group(imo, ifp, &gsa->sa);
1140 	if (idx != -1) {
1141 		if (ssa->ss.ss_family != AF_UNSPEC) {
1142 			/*
1143 			 * Attempting to join an ASM group (when already
1144 			 * an ASM or SSM member) is an error.
1145 			 */
1146 			error = EADDRNOTAVAIL;
1147 		} else {
1148 			imf = &imo->imo_mfilters[idx];
1149 			if (imf->imf_nsources == 0) {
1150 				/*
1151 				 * Attempting to join an SSM group (when
1152 				 * already an ASM member) is an error.
1153 				 */
1154 				error = EINVAL;
1155 			} else {
1156 				/*
1157 				 * Attempting to join an SSM group (when
1158 				 * already an SSM member) means "add this
1159 				 * source to the inclusive filter list".
1160 				 */
1161 				error = imo_join_source(imo, idx, ssa);
1162 			}
1163 		}
1164 		goto out_locked;
1165 	}
1166 
1167 	/*
1168 	 * Call imo_grow() to reallocate the membership and source filter
1169 	 * vectors if they are full. If the size would exceed the hard limit,
1170 	 * then we know we've really run out of entries. We keep the INP
1171 	 * lock held to avoid introducing a race condition.
1172 	 */
1173 	if (imo->imo_num_memberships == imo->imo_max_memberships) {
1174 		error = imo_grow(imo);
1175 		if (error)
1176 			goto out_locked;
1177 	}
1178 
1179 	/*
1180 	 * So far, so good: perform the layer 3 join, layer 2 join,
1181 	 * and make an IGMP announcement if needed.
1182 	 */
1183 	inm = in_addmulti(&gsa->sin.sin_addr, ifp);
1184 	if (inm == NULL) {
1185 		error = ENOBUFS;
1186 		goto out_locked;
1187 	}
1188 	idx = imo->imo_num_memberships;
1189 	imo->imo_membership[idx] = inm;
1190 	imo->imo_num_memberships++;
1191 
1192 	KASSERT(imo->imo_mfilters != NULL,
1193 	    ("%s: imf_mfilters vector was not allocated", __func__));
1194 	imf = &imo->imo_mfilters[idx];
1195 	KASSERT(TAILQ_EMPTY(&imf->imf_sources),
1196 	    ("%s: imf_sources not empty", __func__));
1197 
1198 	/*
1199 	 * If this is a new SSM group join (i.e. a source was specified
1200 	 * with this group), add this source to the filter list.
1201 	 */
1202 	if (ssa->ss.ss_family != AF_UNSPEC) {
1203 		/*
1204 		 * An initial SSM join implies that this socket's membership
1205 		 * of the multicast group is now in inclusive mode.
1206 		 */
1207 		imf->imf_fmode = MCAST_INCLUDE;
1208 
1209 		error = imo_join_source(imo, idx, ssa);
1210 		if (error) {
1211 			/*
1212 			 * Drop inp lock before calling in_delmulti(),
1213 			 * to prevent a lock order reversal.
1214 			 */
1215 			--imo->imo_num_memberships;
1216 			INP_WUNLOCK(inp);
1217 			in_delmulti(inm);
1218 			return (error);
1219 		}
1220 	}
1221 
1222 out_locked:
1223 	INP_WUNLOCK(inp);
1224 	return (error);
1225 }
1226 
1227 /*
1228  * Leave an IPv4 multicast group on an inpcb, possibly with a source.
1229  */
1230 static int
1231 inp_leave_group(struct inpcb *inp, struct sockopt *sopt)
1232 {
1233 	INIT_VNET_NET(curvnet);
1234 	INIT_VNET_INET(curvnet);
1235 	struct group_source_req		 gsr;
1236 	struct ip_mreq_source		 mreqs;
1237 	sockunion_t			*gsa, *ssa;
1238 	struct ifnet			*ifp;
1239 	struct in_mfilter		*imf;
1240 	struct ip_moptions		*imo;
1241 	struct in_msource		*ims, *tims;
1242 	struct in_multi			*inm;
1243 	size_t				 idx;
1244 	int				 error;
1245 
1246 	ifp = NULL;
1247 	error = 0;
1248 
1249 	memset(&gsr, 0, sizeof(struct group_source_req));
1250 	gsa = (sockunion_t *)&gsr.gsr_group;
1251 	gsa->ss.ss_family = AF_UNSPEC;
1252 	ssa = (sockunion_t *)&gsr.gsr_source;
1253 	ssa->ss.ss_family = AF_UNSPEC;
1254 
1255 	switch (sopt->sopt_name) {
1256 	case IP_DROP_MEMBERSHIP:
1257 	case IP_DROP_SOURCE_MEMBERSHIP:
1258 		if (sopt->sopt_name == IP_DROP_MEMBERSHIP) {
1259 			error = sooptcopyin(sopt, &mreqs,
1260 			    sizeof(struct ip_mreq),
1261 			    sizeof(struct ip_mreq));
1262 			/*
1263 			 * Swap interface and sourceaddr arguments,
1264 			 * as ip_mreq and ip_mreq_source are laid
1265 			 * out differently.
1266 			 */
1267 			mreqs.imr_interface = mreqs.imr_sourceaddr;
1268 			mreqs.imr_sourceaddr.s_addr = INADDR_ANY;
1269 		} else if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) {
1270 			error = sooptcopyin(sopt, &mreqs,
1271 			    sizeof(struct ip_mreq_source),
1272 			    sizeof(struct ip_mreq_source));
1273 		}
1274 		if (error)
1275 			return (error);
1276 
1277 		gsa->sin.sin_family = AF_INET;
1278 		gsa->sin.sin_len = sizeof(struct sockaddr_in);
1279 		gsa->sin.sin_addr = mreqs.imr_multiaddr;
1280 
1281 		if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) {
1282 			ssa->sin.sin_family = AF_INET;
1283 			ssa->sin.sin_len = sizeof(struct sockaddr_in);
1284 			ssa->sin.sin_addr = mreqs.imr_sourceaddr;
1285 		}
1286 
1287 		if (gsa->sin.sin_addr.s_addr != INADDR_ANY)
1288 			INADDR_TO_IFP(mreqs.imr_interface, ifp);
1289 
1290 #ifdef DIAGNOSTIC
1291 		if (bootverbose) {
1292 			printf("%s: imr_interface = %s, ifp = %p\n",
1293 			    __func__, inet_ntoa(mreqs.imr_interface), ifp);
1294 		}
1295 #endif
1296 		break;
1297 
1298 	case MCAST_LEAVE_GROUP:
1299 	case MCAST_LEAVE_SOURCE_GROUP:
1300 		if (sopt->sopt_name == MCAST_LEAVE_GROUP) {
1301 			error = sooptcopyin(sopt, &gsr,
1302 			    sizeof(struct group_req),
1303 			    sizeof(struct group_req));
1304 		} else if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
1305 			error = sooptcopyin(sopt, &gsr,
1306 			    sizeof(struct group_source_req),
1307 			    sizeof(struct group_source_req));
1308 		}
1309 		if (error)
1310 			return (error);
1311 
1312 		if (gsa->sin.sin_family != AF_INET ||
1313 		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
1314 			return (EINVAL);
1315 
1316 		if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
1317 			if (ssa->sin.sin_family != AF_INET ||
1318 			    ssa->sin.sin_len != sizeof(struct sockaddr_in))
1319 				return (EINVAL);
1320 		}
1321 
1322 		if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
1323 			return (EADDRNOTAVAIL);
1324 
1325 		ifp = ifnet_byindex(gsr.gsr_interface);
1326 		break;
1327 
1328 	default:
1329 #ifdef DIAGNOSTIC
1330 		if (bootverbose) {
1331 			printf("%s: unknown sopt_name %d\n", __func__,
1332 			    sopt->sopt_name);
1333 		}
1334 #endif
1335 		return (EOPNOTSUPP);
1336 		break;
1337 	}
1338 
1339 	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
1340 		return (EINVAL);
1341 
1342 	/*
1343 	 * Find the membership in the membership array.
1344 	 */
1345 	imo = inp_findmoptions(inp);
1346 	idx = imo_match_group(imo, ifp, &gsa->sa);
1347 	if (idx == -1) {
1348 		error = EADDRNOTAVAIL;
1349 		goto out_locked;
1350 	}
1351 	imf = &imo->imo_mfilters[idx];
1352 
1353 	/*
1354 	 * If we were instructed only to leave a given source, do so.
1355 	 */
1356 	if (ssa->ss.ss_family != AF_UNSPEC) {
1357 		if (imf->imf_nsources == 0 ||
1358 		    imf->imf_fmode == MCAST_EXCLUDE) {
1359 			/*
1360 			 * Attempting to SSM leave an ASM group
1361 			 * is an error; should use *_BLOCK_SOURCE instead.
1362 			 * Attempting to SSM leave a source in a group when
1363 			 * the socket is in 'exclude mode' is also an error.
1364 			 */
1365 			error = EINVAL;
1366 		} else {
1367 			error = imo_leave_source(imo, idx, ssa);
1368 		}
1369 		/*
1370 		 * If an error occurred, or this source is not the last
1371 		 * source in the group, do not leave the whole group.
1372 		 */
1373 		if (error || imf->imf_nsources > 0)
1374 			goto out_locked;
1375 	}
1376 
1377 	/*
1378 	 * Give up the multicast address record to which the membership points.
1379 	 */
1380 	inm = imo->imo_membership[idx];
1381 	in_delmulti(inm);
1382 
1383 	/*
1384 	 * Free any source filters for this group if they exist.
1385 	 * Revert inpcb to the default MCAST_EXCLUDE state.
1386 	 */
1387 	if (imo->imo_mfilters != NULL) {
1388 		TAILQ_FOREACH_SAFE(ims, &imf->imf_sources, ims_next, tims) {
1389 			TAILQ_REMOVE(&imf->imf_sources, ims, ims_next);
1390 			free(ims, M_IPMSOURCE);
1391 			imf->imf_nsources--;
1392 		}
1393 		KASSERT(imf->imf_nsources == 0,
1394 		    ("%s: imf_nsources not 0", __func__));
1395 		KASSERT(TAILQ_EMPTY(&imf->imf_sources),
1396 		    ("%s: imf_sources not empty", __func__));
1397 		imf->imf_fmode = MCAST_EXCLUDE;
1398 	}
1399 
1400 	/*
1401 	 * Remove the gap in the membership array.
1402 	 */
1403 	for (++idx; idx < imo->imo_num_memberships; ++idx)
1404 		imo->imo_membership[idx-1] = imo->imo_membership[idx];
1405 	imo->imo_num_memberships--;
1406 
1407 out_locked:
1408 	INP_WUNLOCK(inp);
1409 	return (error);
1410 }
1411 
1412 /*
1413  * Select the interface for transmitting IPv4 multicast datagrams.
1414  *
1415  * Either an instance of struct in_addr or an instance of struct ip_mreqn
1416  * may be passed to this socket option. An address of INADDR_ANY or an
1417  * interface index of 0 is used to remove a previous selection.
1418  * When no interface is selected, one is chosen for every send.
1419  */
1420 static int
1421 inp_set_multicast_if(struct inpcb *inp, struct sockopt *sopt)
1422 {
1423 	INIT_VNET_NET(curvnet);
1424 	struct in_addr		 addr;
1425 	struct ip_mreqn		 mreqn;
1426 	struct ifnet		*ifp;
1427 	struct ip_moptions	*imo;
1428 	int			 error;
1429 
1430 	if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) {
1431 		/*
1432 		 * An interface index was specified using the
1433 		 * Linux-derived ip_mreqn structure.
1434 		 */
1435 		error = sooptcopyin(sopt, &mreqn, sizeof(struct ip_mreqn),
1436 		    sizeof(struct ip_mreqn));
1437 		if (error)
1438 			return (error);
1439 
1440 		if (mreqn.imr_ifindex < 0 || V_if_index < mreqn.imr_ifindex)
1441 			return (EINVAL);
1442 
1443 		if (mreqn.imr_ifindex == 0) {
1444 			ifp = NULL;
1445 		} else {
1446 			ifp = ifnet_byindex(mreqn.imr_ifindex);
1447 			if (ifp == NULL)
1448 				return (EADDRNOTAVAIL);
1449 		}
1450 	} else {
1451 		/*
1452 		 * An interface was specified by IPv4 address.
1453 		 * This is the traditional BSD usage.
1454 		 */
1455 		error = sooptcopyin(sopt, &addr, sizeof(struct in_addr),
1456 		    sizeof(struct in_addr));
1457 		if (error)
1458 			return (error);
1459 		if (addr.s_addr == INADDR_ANY) {
1460 			ifp = NULL;
1461 		} else {
1462 			INADDR_TO_IFP(addr, ifp);
1463 			if (ifp == NULL)
1464 				return (EADDRNOTAVAIL);
1465 		}
1466 #ifdef DIAGNOSTIC
1467 		if (bootverbose) {
1468 			printf("%s: ifp = %p, addr = %s\n",
1469 			    __func__, ifp, inet_ntoa(addr)); /* XXX INET6 */
1470 		}
1471 #endif
1472 	}
1473 
1474 	/* Reject interfaces which do not support multicast. */
1475 	if (ifp != NULL && (ifp->if_flags & IFF_MULTICAST) == 0)
1476 		return (EOPNOTSUPP);
1477 
1478 	imo = inp_findmoptions(inp);
1479 	imo->imo_multicast_ifp = ifp;
1480 	imo->imo_multicast_addr.s_addr = INADDR_ANY;
1481 	INP_WUNLOCK(inp);
1482 
1483 	return (0);
1484 }
1485 
1486 /*
1487  * Atomically set source filters on a socket for an IPv4 multicast group.
1488  */
1489 static int
1490 inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt)
1491 {
1492 	INIT_VNET_NET(curvnet);
1493 	struct __msfilterreq	 msfr;
1494 	sockunion_t		*gsa;
1495 	struct ifnet		*ifp;
1496 	struct in_mfilter	*imf;
1497 	struct ip_moptions	*imo;
1498 	struct in_msource	*ims, *tims;
1499 	size_t			 idx;
1500 	int			 error;
1501 
1502 	error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq),
1503 	    sizeof(struct __msfilterreq));
1504 	if (error)
1505 		return (error);
1506 
1507 	if (msfr.msfr_nsrcs > IP_MAX_SOURCE_FILTER ||
1508 	    (msfr.msfr_fmode != MCAST_EXCLUDE &&
1509 	     msfr.msfr_fmode != MCAST_INCLUDE))
1510 		return (EINVAL);
1511 
1512 	if (msfr.msfr_group.ss_family != AF_INET ||
1513 	    msfr.msfr_group.ss_len != sizeof(struct sockaddr_in))
1514 		return (EINVAL);
1515 
1516 	gsa = (sockunion_t *)&msfr.msfr_group;
1517 	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
1518 		return (EINVAL);
1519 
1520 	gsa->sin.sin_port = 0;	/* ignore port */
1521 
1522 	if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex)
1523 		return (EADDRNOTAVAIL);
1524 
1525 	ifp = ifnet_byindex(msfr.msfr_ifindex);
1526 	if (ifp == NULL)
1527 		return (EADDRNOTAVAIL);
1528 
1529 	/*
1530 	 * Take the INP lock.
1531 	 * Check if this socket is a member of this group.
1532 	 */
1533 	imo = inp_findmoptions(inp);
1534 	idx = imo_match_group(imo, ifp, &gsa->sa);
1535 	if (idx == -1 || imo->imo_mfilters == NULL) {
1536 		error = EADDRNOTAVAIL;
1537 		goto out_locked;
1538 	}
1539 	imf = &imo->imo_mfilters[idx];
1540 
1541 #ifdef DIAGNOSTIC
1542 	if (bootverbose)
1543 		printf("%s: clearing source list\n", __func__);
1544 #endif
1545 
1546 	/*
1547 	 * Remove any existing source filters.
1548 	 */
1549 	TAILQ_FOREACH_SAFE(ims, &imf->imf_sources, ims_next, tims) {
1550 		TAILQ_REMOVE(&imf->imf_sources, ims, ims_next);
1551 		free(ims, M_IPMSOURCE);
1552 		imf->imf_nsources--;
1553 	}
1554 	KASSERT(imf->imf_nsources == 0,
1555 	    ("%s: source list not cleared", __func__));
1556 
1557 	/*
1558 	 * Apply any new source filters, if present.
1559 	 */
1560 	if (msfr.msfr_nsrcs > 0) {
1561 		struct in_msource	**pnims;
1562 		struct in_msource	*nims;
1563 		struct sockaddr_storage	*kss;
1564 		struct sockaddr_storage	*pkss;
1565 		sockunion_t		*psu;
1566 		int			 i, j;
1567 
1568 		/*
1569 		 * Drop the inp lock so we may sleep if we need to
1570 		 * in order to satisfy a malloc request.
1571 		 * We will re-take it before changing socket state.
1572 		 */
1573 		INP_WUNLOCK(inp);
1574 #ifdef DIAGNOSTIC
1575 		if (bootverbose) {
1576 			printf("%s: loading %lu source list entries\n",
1577 			    __func__, (unsigned long)msfr.msfr_nsrcs);
1578 		}
1579 #endif
1580 		/*
1581 		 * Make a copy of the user-space source vector so
1582 		 * that we may copy them with a single copyin. This
1583 		 * allows us to deal with page faults up-front.
1584 		 */
1585 		kss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
1586 		    M_TEMP, M_WAITOK);
1587 		error = copyin(msfr.msfr_srcs, kss,
1588 		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
1589 		if (error) {
1590 			free(kss, M_TEMP);
1591 			return (error);
1592 		}
1593 
1594 		/*
1595 		 * Perform argument checking on every sockaddr_storage
1596 		 * structure in the vector provided to us. Overwrite
1597 		 * fields which should not apply to source entries.
1598 		 * TODO: Check for duplicate sources on this pass.
1599 		 */
1600 		psu = (sockunion_t *)kss;
1601 		for (i = 0; i < msfr.msfr_nsrcs; i++, psu++) {
1602 			switch (psu->ss.ss_family) {
1603 			case AF_INET:
1604 				if (psu->sin.sin_len !=
1605 				    sizeof(struct sockaddr_in)) {
1606 					error = EINVAL;
1607 				} else {
1608 					psu->sin.sin_port = 0;
1609 				}
1610 				break;
1611 #ifdef notyet
1612 			case AF_INET6;
1613 				if (psu->sin6.sin6_len !=
1614 				    sizeof(struct sockaddr_in6)) {
1615 					error = EINVAL;
1616 				} else {
1617 					psu->sin6.sin6_port = 0;
1618 					psu->sin6.sin6_flowinfo = 0;
1619 				}
1620 				break;
1621 #endif
1622 			default:
1623 				error = EAFNOSUPPORT;
1624 				break;
1625 			}
1626 			if (error)
1627 				break;
1628 		}
1629 		if (error) {
1630 			free(kss, M_TEMP);
1631 			return (error);
1632 		}
1633 
1634 		/*
1635 		 * Allocate a block to track all the in_msource
1636 		 * entries we are about to allocate, in case we
1637 		 * abruptly need to free them.
1638 		 */
1639 		pnims = malloc(sizeof(struct in_msource *) * msfr.msfr_nsrcs,
1640 		    M_TEMP, M_WAITOK | M_ZERO);
1641 
1642 		/*
1643 		 * Allocate up to nsrcs individual chunks.
1644 		 * If we encounter an error, backtrack out of
1645 		 * all allocations cleanly; updates must be atomic.
1646 		 */
1647 		pkss = kss;
1648 		nims = NULL;
1649 		for (i = 0; i < msfr.msfr_nsrcs; i++, pkss++) {
1650 			nims = malloc(sizeof(struct in_msource) *
1651 			    msfr.msfr_nsrcs, M_IPMSOURCE, M_WAITOK | M_ZERO);
1652 			pnims[i] = nims;
1653 		}
1654 		if (i < msfr.msfr_nsrcs) {
1655 			for (j = 0; j < i; j++) {
1656 				if (pnims[j] != NULL)
1657 					free(pnims[j], M_IPMSOURCE);
1658 			}
1659 			free(pnims, M_TEMP);
1660 			free(kss, M_TEMP);
1661 			return (ENOBUFS);
1662 		}
1663 
1664 		INP_UNLOCK_ASSERT(inp);
1665 
1666 		/*
1667 		 * Finally, apply the filters to the socket.
1668 		 * Re-take the inp lock; we are changing socket state.
1669 		 */
1670 		pkss = kss;
1671 		INP_WLOCK(inp);
1672 		for (i = 0; i < msfr.msfr_nsrcs; i++, pkss++) {
1673 			memcpy(&(pnims[i]->ims_addr), pkss,
1674 			    sizeof(struct sockaddr_storage));
1675 			TAILQ_INSERT_TAIL(&imf->imf_sources, pnims[i],
1676 			    ims_next);
1677 			imf->imf_nsources++;
1678 		}
1679 		free(pnims, M_TEMP);
1680 		free(kss, M_TEMP);
1681 	}
1682 
1683 	/*
1684 	 * Update the filter mode on the socket before releasing the inpcb.
1685 	 */
1686 	INP_WLOCK_ASSERT(inp);
1687 	imf->imf_fmode = msfr.msfr_fmode;
1688 
1689 out_locked:
1690 	INP_WUNLOCK(inp);
1691 	return (error);
1692 }
1693 
1694 /*
1695  * Set the IP multicast options in response to user setsockopt().
1696  *
1697  * Many of the socket options handled in this function duplicate the
1698  * functionality of socket options in the regular unicast API. However,
1699  * it is not possible to merge the duplicate code, because the idempotence
1700  * of the IPv4 multicast part of the BSD Sockets API must be preserved;
1701  * the effects of these options must be treated as separate and distinct.
1702  */
1703 int
1704 inp_setmoptions(struct inpcb *inp, struct sockopt *sopt)
1705 {
1706 	struct ip_moptions	*imo;
1707 	int			 error;
1708 
1709 	error = 0;
1710 
1711 	/*
1712 	 * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
1713 	 * or is a divert socket, reject it.
1714 	 * XXX Unlocked read of inp_socket believed OK.
1715 	 */
1716 	if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
1717 	    (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
1718 	    inp->inp_socket->so_proto->pr_type != SOCK_DGRAM))
1719 		return (EOPNOTSUPP);
1720 
1721 	switch (sopt->sopt_name) {
1722 	case IP_MULTICAST_VIF: {
1723 		int vifi;
1724 		/*
1725 		 * Select a multicast VIF for transmission.
1726 		 * Only useful if multicast forwarding is active.
1727 		 */
1728 		if (legal_vif_num == NULL) {
1729 			error = EOPNOTSUPP;
1730 			break;
1731 		}
1732 		error = sooptcopyin(sopt, &vifi, sizeof(int), sizeof(int));
1733 		if (error)
1734 			break;
1735 		if (!legal_vif_num(vifi) && (vifi != -1)) {
1736 			error = EINVAL;
1737 			break;
1738 		}
1739 		imo = inp_findmoptions(inp);
1740 		imo->imo_multicast_vif = vifi;
1741 		INP_WUNLOCK(inp);
1742 		break;
1743 	}
1744 
1745 	case IP_MULTICAST_IF:
1746 		error = inp_set_multicast_if(inp, sopt);
1747 		break;
1748 
1749 	case IP_MULTICAST_TTL: {
1750 		u_char ttl;
1751 
1752 		/*
1753 		 * Set the IP time-to-live for outgoing multicast packets.
1754 		 * The original multicast API required a char argument,
1755 		 * which is inconsistent with the rest of the socket API.
1756 		 * We allow either a char or an int.
1757 		 */
1758 		if (sopt->sopt_valsize == sizeof(u_char)) {
1759 			error = sooptcopyin(sopt, &ttl, sizeof(u_char),
1760 			    sizeof(u_char));
1761 			if (error)
1762 				break;
1763 		} else {
1764 			u_int ittl;
1765 
1766 			error = sooptcopyin(sopt, &ittl, sizeof(u_int),
1767 			    sizeof(u_int));
1768 			if (error)
1769 				break;
1770 			if (ittl > 255) {
1771 				error = EINVAL;
1772 				break;
1773 			}
1774 			ttl = (u_char)ittl;
1775 		}
1776 		imo = inp_findmoptions(inp);
1777 		imo->imo_multicast_ttl = ttl;
1778 		INP_WUNLOCK(inp);
1779 		break;
1780 	}
1781 
1782 	case IP_MULTICAST_LOOP: {
1783 		u_char loop;
1784 
1785 		/*
1786 		 * Set the loopback flag for outgoing multicast packets.
1787 		 * Must be zero or one.  The original multicast API required a
1788 		 * char argument, which is inconsistent with the rest
1789 		 * of the socket API.  We allow either a char or an int.
1790 		 */
1791 		if (sopt->sopt_valsize == sizeof(u_char)) {
1792 			error = sooptcopyin(sopt, &loop, sizeof(u_char),
1793 			    sizeof(u_char));
1794 			if (error)
1795 				break;
1796 		} else {
1797 			u_int iloop;
1798 
1799 			error = sooptcopyin(sopt, &iloop, sizeof(u_int),
1800 					    sizeof(u_int));
1801 			if (error)
1802 				break;
1803 			loop = (u_char)iloop;
1804 		}
1805 		imo = inp_findmoptions(inp);
1806 		imo->imo_multicast_loop = !!loop;
1807 		INP_WUNLOCK(inp);
1808 		break;
1809 	}
1810 
1811 	case IP_ADD_MEMBERSHIP:
1812 	case IP_ADD_SOURCE_MEMBERSHIP:
1813 	case MCAST_JOIN_GROUP:
1814 	case MCAST_JOIN_SOURCE_GROUP:
1815 		error = inp_join_group(inp, sopt);
1816 		break;
1817 
1818 	case IP_DROP_MEMBERSHIP:
1819 	case IP_DROP_SOURCE_MEMBERSHIP:
1820 	case MCAST_LEAVE_GROUP:
1821 	case MCAST_LEAVE_SOURCE_GROUP:
1822 		error = inp_leave_group(inp, sopt);
1823 		break;
1824 
1825 	case IP_BLOCK_SOURCE:
1826 	case IP_UNBLOCK_SOURCE:
1827 	case MCAST_BLOCK_SOURCE:
1828 	case MCAST_UNBLOCK_SOURCE:
1829 		error = inp_change_source_filter(inp, sopt);
1830 		break;
1831 
1832 	case IP_MSFILTER:
1833 		error = inp_set_source_filters(inp, sopt);
1834 		break;
1835 
1836 	default:
1837 		error = EOPNOTSUPP;
1838 		break;
1839 	}
1840 
1841 	INP_UNLOCK_ASSERT(inp);
1842 
1843 	return (error);
1844 }
1845