xref: /freebsd/sys/netinet/in_mcast.c (revision 33644623554bb0fc57ed3c7d874193a498679b22)
1 /*-
2  * Copyright (c) 2007 Bruce M. Simpson.
3  * Copyright (c) 2005 Robert N. M. Watson.
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote
15  *    products derived from this software without specific prior written
16  *    permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 /*
32  * IPv4 multicast socket, group, and socket option processing module.
33  * Until further notice, this file requires INET to compile.
34  * TODO: Make this infrastructure independent of address family.
35  * TODO: Teach netinet6 to use this code.
36  * TODO: Hook up SSM logic to IGMPv3/MLDv2.
37  */
38 
39 #include <sys/cdefs.h>
40 __FBSDID("$FreeBSD$");
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/malloc.h>
46 #include <sys/mbuf.h>
47 #include <sys/protosw.h>
48 #include <sys/socket.h>
49 #include <sys/socketvar.h>
50 #include <sys/sysctl.h>
51 #include <sys/vimage.h>
52 
53 #include <net/if.h>
54 #include <net/if_dl.h>
55 #include <net/route.h>
56 
57 #include <netinet/in.h>
58 #include <netinet/in_systm.h>
59 #include <netinet/in_pcb.h>
60 #include <netinet/in_var.h>
61 #include <netinet/ip_var.h>
62 #include <netinet/igmp_var.h>
63 
64 #ifndef __SOCKUNION_DECLARED
65 union sockunion {
66 	struct sockaddr_storage	ss;
67 	struct sockaddr		sa;
68 	struct sockaddr_dl	sdl;
69 	struct sockaddr_in	sin;
70 #ifdef INET6
71 	struct sockaddr_in6	sin6;
72 #endif
73 };
74 typedef union sockunion sockunion_t;
75 #define __SOCKUNION_DECLARED
76 #endif /* __SOCKUNION_DECLARED */
77 
78 static MALLOC_DEFINE(M_IPMADDR, "in_multi", "IPv4 multicast group");
79 static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "IPv4 multicast options");
80 static MALLOC_DEFINE(M_IPMSOURCE, "in_msource", "IPv4 multicast source filter");
81 
82 /*
83  * The IPv4 multicast list (in_multihead and associated structures) are
84  * protected by the global in_multi_mtx.  See in_var.h for more details.  For
85  * now, in_multi_mtx is marked as recursible due to IGMP's calling back into
86  * ip_output() to send IGMP packets while holding the lock; this probably is
87  * not quite desirable.
88  */
89 #ifdef VIMAGE_GLOBALS
90 struct in_multihead in_multihead;	/* XXX BSS initialization */
91 #endif
92 struct mtx in_multi_mtx;
93 MTX_SYSINIT(in_multi_mtx, &in_multi_mtx, "in_multi_mtx", MTX_DEF | MTX_RECURSE);
94 
95 /*
96  * Functions with non-static linkage defined in this file should be
97  * declared in in_var.h:
98  *  imo_match_group()
99  *  imo_match_source()
100  *  in_addmulti()
101  *  in_delmulti()
102  *  in_delmulti_locked()
103  * and ip_var.h:
104  *  inp_freemoptions()
105  *  inp_getmoptions()
106  *  inp_setmoptions()
107  */
108 static int	imo_grow(struct ip_moptions *);
109 static int	imo_join_source(struct ip_moptions *, size_t, sockunion_t *);
110 static int	imo_leave_source(struct ip_moptions *, size_t, sockunion_t *);
111 static int	inp_change_source_filter(struct inpcb *, struct sockopt *);
112 static struct ip_moptions *
113 		inp_findmoptions(struct inpcb *);
114 static int	inp_get_source_filters(struct inpcb *, struct sockopt *);
115 static int	inp_join_group(struct inpcb *, struct sockopt *);
116 static int	inp_leave_group(struct inpcb *, struct sockopt *);
117 static int	inp_set_multicast_if(struct inpcb *, struct sockopt *);
118 static int	inp_set_source_filters(struct inpcb *, struct sockopt *);
119 
120 /*
121  * Resize the ip_moptions vector to the next power-of-two minus 1.
122  * May be called with locks held; do not sleep.
123  */
124 static int
125 imo_grow(struct ip_moptions *imo)
126 {
127 	struct in_multi		**nmships;
128 	struct in_multi		**omships;
129 	struct in_mfilter	 *nmfilters;
130 	struct in_mfilter	 *omfilters;
131 	size_t			  idx;
132 	size_t			  newmax;
133 	size_t			  oldmax;
134 
135 	nmships = NULL;
136 	nmfilters = NULL;
137 	omships = imo->imo_membership;
138 	omfilters = imo->imo_mfilters;
139 	oldmax = imo->imo_max_memberships;
140 	newmax = ((oldmax + 1) * 2) - 1;
141 
142 	if (newmax <= IP_MAX_MEMBERSHIPS) {
143 		nmships = (struct in_multi **)realloc(omships,
144 		    sizeof(struct in_multi *) * newmax, M_IPMOPTS, M_NOWAIT);
145 		nmfilters = (struct in_mfilter *)realloc(omfilters,
146 		    sizeof(struct in_mfilter) * newmax, M_IPMSOURCE, M_NOWAIT);
147 		if (nmships != NULL && nmfilters != NULL) {
148 			/* Initialize newly allocated source filter heads. */
149 			for (idx = oldmax; idx < newmax; idx++) {
150 				nmfilters[idx].imf_fmode = MCAST_EXCLUDE;
151 				nmfilters[idx].imf_nsources = 0;
152 				TAILQ_INIT(&nmfilters[idx].imf_sources);
153 			}
154 			imo->imo_max_memberships = newmax;
155 			imo->imo_membership = nmships;
156 			imo->imo_mfilters = nmfilters;
157 		}
158 	}
159 
160 	if (nmships == NULL || nmfilters == NULL) {
161 		if (nmships != NULL)
162 			free(nmships, M_IPMOPTS);
163 		if (nmfilters != NULL)
164 			free(nmfilters, M_IPMSOURCE);
165 		return (ETOOMANYREFS);
166 	}
167 
168 	return (0);
169 }
170 
171 /*
172  * Add a source to a multicast filter list.
173  * Assumes the associated inpcb is locked.
174  */
175 static int
176 imo_join_source(struct ip_moptions *imo, size_t gidx, sockunion_t *src)
177 {
178 	struct in_msource	*ims, *nims;
179 	struct in_mfilter	*imf;
180 
181 	KASSERT(src->ss.ss_family == AF_INET, ("%s: !AF_INET", __func__));
182 	KASSERT(imo->imo_mfilters != NULL,
183 	    ("%s: imo_mfilters vector not allocated", __func__));
184 
185 	imf = &imo->imo_mfilters[gidx];
186 	if (imf->imf_nsources == IP_MAX_SOURCE_FILTER)
187 		return (ENOBUFS);
188 
189 	ims = imo_match_source(imo, gidx, &src->sa);
190 	if (ims != NULL)
191 		return (EADDRNOTAVAIL);
192 
193 	/* Do not sleep with inp lock held. */
194 	nims = malloc(sizeof(struct in_msource),
195 	    M_IPMSOURCE, M_NOWAIT | M_ZERO);
196 	if (nims == NULL)
197 		return (ENOBUFS);
198 
199 	nims->ims_addr = src->ss;
200 	TAILQ_INSERT_TAIL(&imf->imf_sources, nims, ims_next);
201 	imf->imf_nsources++;
202 
203 	return (0);
204 }
205 
206 static int
207 imo_leave_source(struct ip_moptions *imo, size_t gidx, sockunion_t *src)
208 {
209 	struct in_msource	*ims;
210 	struct in_mfilter	*imf;
211 
212 	KASSERT(src->ss.ss_family == AF_INET, ("%s: !AF_INET", __func__));
213 	KASSERT(imo->imo_mfilters != NULL,
214 	    ("%s: imo_mfilters vector not allocated", __func__));
215 
216 	imf = &imo->imo_mfilters[gidx];
217 	if (imf->imf_nsources == IP_MAX_SOURCE_FILTER)
218 		return (ENOBUFS);
219 
220 	ims = imo_match_source(imo, gidx, &src->sa);
221 	if (ims == NULL)
222 		return (EADDRNOTAVAIL);
223 
224 	TAILQ_REMOVE(&imf->imf_sources, ims, ims_next);
225 	free(ims, M_IPMSOURCE);
226 	imf->imf_nsources--;
227 
228 	return (0);
229 }
230 
231 /*
232  * Find an IPv4 multicast group entry for this ip_moptions instance
233  * which matches the specified group, and optionally an interface.
234  * Return its index into the array, or -1 if not found.
235  */
236 size_t
237 imo_match_group(struct ip_moptions *imo, struct ifnet *ifp,
238     struct sockaddr *group)
239 {
240 	sockunion_t	 *gsa;
241 	struct in_multi	**pinm;
242 	int		  idx;
243 	int		  nmships;
244 
245 	gsa = (sockunion_t *)group;
246 
247 	/* The imo_membership array may be lazy allocated. */
248 	if (imo->imo_membership == NULL || imo->imo_num_memberships == 0)
249 		return (-1);
250 
251 	nmships = imo->imo_num_memberships;
252 	pinm = &imo->imo_membership[0];
253 	for (idx = 0; idx < nmships; idx++, pinm++) {
254 		if (*pinm == NULL)
255 			continue;
256 #if 0
257 		printf("%s: trying ifp = %p, inaddr = %s ", __func__,
258 		    ifp, inet_ntoa(gsa->sin.sin_addr));
259 		printf("against %p, %s\n",
260 		    (*pinm)->inm_ifp, inet_ntoa((*pinm)->inm_addr));
261 #endif
262 		if ((ifp == NULL || ((*pinm)->inm_ifp == ifp)) &&
263 		    (*pinm)->inm_addr.s_addr == gsa->sin.sin_addr.s_addr) {
264 			break;
265 		}
266 	}
267 	if (idx >= nmships)
268 		idx = -1;
269 
270 	return (idx);
271 }
272 
273 /*
274  * Find a multicast source entry for this imo which matches
275  * the given group index for this socket, and source address.
276  */
277 struct in_msource *
278 imo_match_source(struct ip_moptions *imo, size_t gidx, struct sockaddr *src)
279 {
280 	struct in_mfilter	*imf;
281 	struct in_msource	*ims, *pims;
282 
283 	KASSERT(src->sa_family == AF_INET, ("%s: !AF_INET", __func__));
284 	KASSERT(gidx != -1 && gidx < imo->imo_num_memberships,
285 	    ("%s: invalid index %d\n", __func__, (int)gidx));
286 
287 	/* The imo_mfilters array may be lazy allocated. */
288 	if (imo->imo_mfilters == NULL)
289 		return (NULL);
290 
291 	pims = NULL;
292 	imf = &imo->imo_mfilters[gidx];
293 	TAILQ_FOREACH(ims, &imf->imf_sources, ims_next) {
294 		/*
295 		 * Perform bitwise comparison of two IPv4 addresses.
296 		 * TODO: Do the same for IPv6.
297 		 * Do not use sa_equal() for this as it is not aware of
298 		 * deeper structure in sockaddr_in or sockaddr_in6.
299 		 */
300 		if (((struct sockaddr_in *)&ims->ims_addr)->sin_addr.s_addr ==
301 		    ((struct sockaddr_in *)src)->sin_addr.s_addr) {
302 			pims = ims;
303 			break;
304 		}
305 	}
306 
307 	return (pims);
308 }
309 
310 /*
311  * Join an IPv4 multicast group.
312  */
313 struct in_multi *
314 in_addmulti(struct in_addr *ap, struct ifnet *ifp)
315 {
316 	INIT_VNET_INET(ifp->if_vnet);
317 	struct in_multi *inm;
318 
319 	inm = NULL;
320 
321 	IFF_LOCKGIANT(ifp);
322 	IN_MULTI_LOCK();
323 
324 	IN_LOOKUP_MULTI(*ap, ifp, inm);
325 	if (inm != NULL) {
326 		/*
327 		 * If we already joined this group, just bump the
328 		 * refcount and return it.
329 		 */
330 		KASSERT(inm->inm_refcount >= 1,
331 		    ("%s: bad refcount %d", __func__, inm->inm_refcount));
332 		++inm->inm_refcount;
333 	} else do {
334 		sockunion_t		 gsa;
335 		struct ifmultiaddr	*ifma;
336 		struct in_multi		*ninm;
337 		int			 error;
338 
339 		memset(&gsa, 0, sizeof(gsa));
340 		gsa.sin.sin_family = AF_INET;
341 		gsa.sin.sin_len = sizeof(struct sockaddr_in);
342 		gsa.sin.sin_addr = *ap;
343 
344 		/*
345 		 * Check if a link-layer group is already associated
346 		 * with this network-layer group on the given ifnet.
347 		 * If so, bump the refcount on the existing network-layer
348 		 * group association and return it.
349 		 */
350 		error = if_addmulti(ifp, &gsa.sa, &ifma);
351 		if (error)
352 			break;
353 		if (ifma->ifma_protospec != NULL) {
354 			inm = (struct in_multi *)ifma->ifma_protospec;
355 #ifdef INVARIANTS
356 			if (inm->inm_ifma != ifma || inm->inm_ifp != ifp ||
357 			    inm->inm_addr.s_addr != ap->s_addr)
358 				panic("%s: ifma is inconsistent", __func__);
359 #endif
360 			++inm->inm_refcount;
361 			break;
362 		}
363 
364 		/*
365 		 * A new membership is needed; construct it and
366 		 * perform the IGMP join.
367 		 */
368 		ninm = malloc(sizeof(*ninm), M_IPMADDR, M_NOWAIT | M_ZERO);
369 		if (ninm == NULL) {
370 			if_delmulti_ifma(ifma);
371 			break;
372 		}
373 		ninm->inm_addr = *ap;
374 		ninm->inm_ifp = ifp;
375 		ninm->inm_ifma = ifma;
376 		ninm->inm_refcount = 1;
377 		ifma->ifma_protospec = ninm;
378 		LIST_INSERT_HEAD(&V_in_multihead, ninm, inm_link);
379 
380 		igmp_joingroup(ninm);
381 
382 		inm = ninm;
383 	} while (0);
384 
385 	IN_MULTI_UNLOCK();
386 	IFF_UNLOCKGIANT(ifp);
387 
388 	return (inm);
389 }
390 
391 /*
392  * Leave an IPv4 multicast group.
393  * It is OK to call this routine if the underlying ifnet went away.
394  *
395  * XXX: To deal with the ifp going away, we cheat; the link-layer code in net
396  * will set ifma_ifp to NULL when the associated ifnet instance is detached
397  * from the system.
398  *
399  * The only reason we need to violate layers and check ifma_ifp here at all
400  * is because certain hardware drivers still require Giant to be held,
401  * and it must always be taken before other locks.
402  */
403 void
404 in_delmulti(struct in_multi *inm)
405 {
406 	struct ifnet *ifp;
407 
408 	KASSERT(inm != NULL, ("%s: inm is NULL", __func__));
409 	KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__));
410 	ifp = inm->inm_ifma->ifma_ifp;
411 
412 	if (ifp != NULL) {
413 		/*
414 		 * Sanity check that netinet's notion of ifp is the
415 		 * same as net's.
416 		 */
417 		KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__));
418 		IFF_LOCKGIANT(ifp);
419 	}
420 
421 	IN_MULTI_LOCK();
422 	in_delmulti_locked(inm);
423 	IN_MULTI_UNLOCK();
424 
425 	if (ifp != NULL)
426 		IFF_UNLOCKGIANT(ifp);
427 }
428 
429 /*
430  * Delete a multicast address record, with locks held.
431  *
432  * It is OK to call this routine if the ifp went away.
433  * Assumes that caller holds the IN_MULTI lock, and that
434  * Giant was taken before other locks if required by the hardware.
435  */
436 void
437 in_delmulti_locked(struct in_multi *inm)
438 {
439 	struct ifmultiaddr *ifma;
440 
441 	IN_MULTI_LOCK_ASSERT();
442 	KASSERT(inm->inm_refcount >= 1, ("%s: freeing freed inm", __func__));
443 
444 	if (--inm->inm_refcount == 0) {
445 		igmp_leavegroup(inm);
446 
447 		ifma = inm->inm_ifma;
448 #ifdef DIAGNOSTIC
449 		if (bootverbose)
450 			printf("%s: purging ifma %p\n", __func__, ifma);
451 #endif
452 		KASSERT(ifma->ifma_protospec == inm,
453 		    ("%s: ifma_protospec != inm", __func__));
454 		ifma->ifma_protospec = NULL;
455 
456 		LIST_REMOVE(inm, inm_link);
457 		free(inm, M_IPMADDR);
458 
459 		if_delmulti_ifma(ifma);
460 	}
461 }
462 
463 /*
464  * Block or unblock an ASM/SSM multicast source on an inpcb.
465  */
466 static int
467 inp_change_source_filter(struct inpcb *inp, struct sockopt *sopt)
468 {
469 	INIT_VNET_NET(curvnet);
470 	INIT_VNET_INET(curvnet);
471 	struct group_source_req		 gsr;
472 	sockunion_t			*gsa, *ssa;
473 	struct ifnet			*ifp;
474 	struct in_mfilter		*imf;
475 	struct ip_moptions		*imo;
476 	struct in_msource		*ims;
477 	size_t				 idx;
478 	int				 error;
479 	int				 block;
480 
481 	ifp = NULL;
482 	error = 0;
483 	block = 0;
484 
485 	memset(&gsr, 0, sizeof(struct group_source_req));
486 	gsa = (sockunion_t *)&gsr.gsr_group;
487 	ssa = (sockunion_t *)&gsr.gsr_source;
488 
489 	switch (sopt->sopt_name) {
490 	case IP_BLOCK_SOURCE:
491 	case IP_UNBLOCK_SOURCE: {
492 		struct ip_mreq_source	 mreqs;
493 
494 		error = sooptcopyin(sopt, &mreqs,
495 		    sizeof(struct ip_mreq_source),
496 		    sizeof(struct ip_mreq_source));
497 		if (error)
498 			return (error);
499 
500 		gsa->sin.sin_family = AF_INET;
501 		gsa->sin.sin_len = sizeof(struct sockaddr_in);
502 		gsa->sin.sin_addr = mreqs.imr_multiaddr;
503 
504 		ssa->sin.sin_family = AF_INET;
505 		ssa->sin.sin_len = sizeof(struct sockaddr_in);
506 		ssa->sin.sin_addr = mreqs.imr_sourceaddr;
507 
508 		if (mreqs.imr_interface.s_addr != INADDR_ANY)
509 			INADDR_TO_IFP(mreqs.imr_interface, ifp);
510 
511 		if (sopt->sopt_name == IP_BLOCK_SOURCE)
512 			block = 1;
513 
514 #ifdef DIAGNOSTIC
515 		if (bootverbose) {
516 			printf("%s: imr_interface = %s, ifp = %p\n",
517 			    __func__, inet_ntoa(mreqs.imr_interface), ifp);
518 		}
519 #endif
520 		break;
521 	    }
522 
523 	case MCAST_BLOCK_SOURCE:
524 	case MCAST_UNBLOCK_SOURCE:
525 		error = sooptcopyin(sopt, &gsr,
526 		    sizeof(struct group_source_req),
527 		    sizeof(struct group_source_req));
528 		if (error)
529 			return (error);
530 
531 		if (gsa->sin.sin_family != AF_INET ||
532 		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
533 			return (EINVAL);
534 
535 		if (ssa->sin.sin_family != AF_INET ||
536 		    ssa->sin.sin_len != sizeof(struct sockaddr_in))
537 			return (EINVAL);
538 
539 		if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
540 			return (EADDRNOTAVAIL);
541 
542 		ifp = ifnet_byindex(gsr.gsr_interface);
543 
544 		if (sopt->sopt_name == MCAST_BLOCK_SOURCE)
545 			block = 1;
546 		break;
547 
548 	default:
549 #ifdef DIAGNOSTIC
550 		if (bootverbose) {
551 			printf("%s: unknown sopt_name %d\n", __func__,
552 			    sopt->sopt_name);
553 		}
554 #endif
555 		return (EOPNOTSUPP);
556 		break;
557 	}
558 
559 	/* XXX INET6 */
560 	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
561 		return (EINVAL);
562 
563 	/*
564 	 * Check if we are actually a member of this group.
565 	 */
566 	imo = inp_findmoptions(inp);
567 	idx = imo_match_group(imo, ifp, &gsa->sa);
568 	if (idx == -1 || imo->imo_mfilters == NULL) {
569 		error = EADDRNOTAVAIL;
570 		goto out_locked;
571 	}
572 
573 	KASSERT(imo->imo_mfilters != NULL,
574 	    ("%s: imo_mfilters not allocated", __func__));
575 	imf = &imo->imo_mfilters[idx];
576 
577 	/*
578 	 * SSM multicast truth table for block/unblock operations.
579 	 *
580 	 * Operation   Filter Mode  Entry exists?   Action
581 	 *
582 	 * block       exclude      no              add source to filter
583 	 * unblock     include      no              add source to filter
584 	 * block       include      no              EINVAL
585 	 * unblock     exclude      no              EINVAL
586 	 * block       exclude      yes             EADDRNOTAVAIL
587 	 * unblock     include      yes             EADDRNOTAVAIL
588 	 * block       include      yes             remove source from filter
589 	 * unblock     exclude      yes             remove source from filter
590 	 *
591 	 * FreeBSD does not explicitly distinguish between ASM and SSM
592 	 * mode sockets; all sockets are assumed to have a filter list.
593 	 */
594 #ifdef DIAGNOSTIC
595 	if (bootverbose) {
596 		printf("%s: imf_fmode is %s\n", __func__,
597 		    imf->imf_fmode == MCAST_INCLUDE ? "include" : "exclude");
598 	}
599 #endif
600 	ims = imo_match_source(imo, idx, &ssa->sa);
601 	if (ims == NULL) {
602 		if ((block == 1 && imf->imf_fmode == MCAST_EXCLUDE) ||
603 		    (block == 0 && imf->imf_fmode == MCAST_INCLUDE)) {
604 #ifdef DIAGNOSTIC
605 			if (bootverbose) {
606 				printf("%s: adding %s to filter list\n",
607 				    __func__, inet_ntoa(ssa->sin.sin_addr));
608 			}
609 #endif
610 			error = imo_join_source(imo, idx, ssa);
611 		}
612 		if ((block == 1 && imf->imf_fmode == MCAST_INCLUDE) ||
613 		    (block == 0 && imf->imf_fmode == MCAST_EXCLUDE)) {
614 			/*
615 			 * If the socket is in inclusive mode:
616 			 *  the source is already blocked as it has no entry.
617 			 * If the socket is in exclusive mode:
618 			 *  the source is already unblocked as it has no entry.
619 			 */
620 #ifdef DIAGNOSTIC
621 			if (bootverbose) {
622 				printf("%s: ims %p; %s already [un]blocked\n",
623 				    __func__, ims,
624 				    inet_ntoa(ssa->sin.sin_addr));
625 			}
626 #endif
627 			error = EINVAL;
628 		}
629 	} else {
630 		if ((block == 1 && imf->imf_fmode == MCAST_EXCLUDE) ||
631 		    (block == 0 && imf->imf_fmode == MCAST_INCLUDE)) {
632 			/*
633 			 * If the socket is in exclusive mode:
634 			 *  the source is already blocked as it has an entry.
635 			 * If the socket is in inclusive mode:
636 			 *  the source is already unblocked as it has an entry.
637 			 */
638 #ifdef DIAGNOSTIC
639 			if (bootverbose) {
640 				printf("%s: ims %p; %s already [un]blocked\n",
641 				    __func__, ims,
642 				    inet_ntoa(ssa->sin.sin_addr));
643 			}
644 #endif
645 			error = EADDRNOTAVAIL;
646 		}
647 		if ((block == 1 && imf->imf_fmode == MCAST_INCLUDE) ||
648 		    (block == 0 && imf->imf_fmode == MCAST_EXCLUDE)) {
649 #ifdef DIAGNOSTIC
650 			if (bootverbose) {
651 				printf("%s: removing %s from filter list\n",
652 				    __func__, inet_ntoa(ssa->sin.sin_addr));
653 			}
654 #endif
655 			error = imo_leave_source(imo, idx, ssa);
656 		}
657 	}
658 
659 out_locked:
660 	INP_WUNLOCK(inp);
661 	return (error);
662 }
663 
664 /*
665  * Given an inpcb, return its multicast options structure pointer.  Accepts
666  * an unlocked inpcb pointer, but will return it locked.  May sleep.
667  */
668 static struct ip_moptions *
669 inp_findmoptions(struct inpcb *inp)
670 {
671 	struct ip_moptions	 *imo;
672 	struct in_multi		**immp;
673 	struct in_mfilter	 *imfp;
674 	size_t			  idx;
675 
676 	INP_WLOCK(inp);
677 	if (inp->inp_moptions != NULL)
678 		return (inp->inp_moptions);
679 
680 	INP_WUNLOCK(inp);
681 
682 	imo = (struct ip_moptions *)malloc(sizeof(*imo), M_IPMOPTS,
683 	    M_WAITOK);
684 	immp = (struct in_multi **)malloc(sizeof(*immp) * IP_MIN_MEMBERSHIPS,
685 	    M_IPMOPTS, M_WAITOK | M_ZERO);
686 	imfp = (struct in_mfilter *)malloc(
687 	    sizeof(struct in_mfilter) * IP_MIN_MEMBERSHIPS,
688 	    M_IPMSOURCE, M_WAITOK);
689 
690 	imo->imo_multicast_ifp = NULL;
691 	imo->imo_multicast_addr.s_addr = INADDR_ANY;
692 	imo->imo_multicast_vif = -1;
693 	imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
694 	imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
695 	imo->imo_num_memberships = 0;
696 	imo->imo_max_memberships = IP_MIN_MEMBERSHIPS;
697 	imo->imo_membership = immp;
698 
699 	/* Initialize per-group source filters. */
700 	for (idx = 0; idx < IP_MIN_MEMBERSHIPS; idx++) {
701 		imfp[idx].imf_fmode = MCAST_EXCLUDE;
702 		imfp[idx].imf_nsources = 0;
703 		TAILQ_INIT(&imfp[idx].imf_sources);
704 	}
705 	imo->imo_mfilters = imfp;
706 
707 	INP_WLOCK(inp);
708 	if (inp->inp_moptions != NULL) {
709 		free(imfp, M_IPMSOURCE);
710 		free(immp, M_IPMOPTS);
711 		free(imo, M_IPMOPTS);
712 		return (inp->inp_moptions);
713 	}
714 	inp->inp_moptions = imo;
715 	return (imo);
716 }
717 
718 /*
719  * Discard the IP multicast options (and source filters).
720  */
721 void
722 inp_freemoptions(struct ip_moptions *imo)
723 {
724 	struct in_mfilter	*imf;
725 	struct in_msource	*ims, *tims;
726 	size_t			 idx, nmships;
727 
728 	KASSERT(imo != NULL, ("%s: ip_moptions is NULL", __func__));
729 
730 	nmships = imo->imo_num_memberships;
731 	for (idx = 0; idx < nmships; ++idx) {
732 		in_delmulti(imo->imo_membership[idx]);
733 
734 		if (imo->imo_mfilters != NULL) {
735 			imf = &imo->imo_mfilters[idx];
736 			TAILQ_FOREACH_SAFE(ims, &imf->imf_sources,
737 			    ims_next, tims) {
738 				TAILQ_REMOVE(&imf->imf_sources, ims, ims_next);
739 				free(ims, M_IPMSOURCE);
740 				imf->imf_nsources--;
741 			}
742 			KASSERT(imf->imf_nsources == 0,
743 			    ("%s: did not free all imf_nsources", __func__));
744 		}
745 	}
746 
747 	if (imo->imo_mfilters != NULL)
748 		free(imo->imo_mfilters, M_IPMSOURCE);
749 	free(imo->imo_membership, M_IPMOPTS);
750 	free(imo, M_IPMOPTS);
751 }
752 
753 /*
754  * Atomically get source filters on a socket for an IPv4 multicast group.
755  * Called with INP lock held; returns with lock released.
756  */
757 static int
758 inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt)
759 {
760 	INIT_VNET_NET(curvnet);
761 	struct __msfilterreq	 msfr;
762 	sockunion_t		*gsa;
763 	struct ifnet		*ifp;
764 	struct ip_moptions	*imo;
765 	struct in_mfilter	*imf;
766 	struct in_msource	*ims;
767 	struct sockaddr_storage	*ptss;
768 	struct sockaddr_storage	*tss;
769 	int			 error;
770 	size_t			 idx;
771 
772 	INP_WLOCK_ASSERT(inp);
773 
774 	imo = inp->inp_moptions;
775 	KASSERT(imo != NULL, ("%s: null ip_moptions", __func__));
776 
777 	INP_WUNLOCK(inp);
778 
779 	error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq),
780 	    sizeof(struct __msfilterreq));
781 	if (error)
782 		return (error);
783 
784 	if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex)
785 		return (EINVAL);
786 
787 	ifp = ifnet_byindex(msfr.msfr_ifindex);
788 	if (ifp == NULL)
789 		return (EINVAL);
790 
791 	INP_WLOCK(inp);
792 
793 	/*
794 	 * Lookup group on the socket.
795 	 */
796 	gsa = (sockunion_t *)&msfr.msfr_group;
797 	idx = imo_match_group(imo, ifp, &gsa->sa);
798 	if (idx == -1 || imo->imo_mfilters == NULL) {
799 		INP_WUNLOCK(inp);
800 		return (EADDRNOTAVAIL);
801 	}
802 
803 	imf = &imo->imo_mfilters[idx];
804 	msfr.msfr_fmode = imf->imf_fmode;
805 	msfr.msfr_nsrcs = imf->imf_nsources;
806 
807 	/*
808 	 * If the user specified a buffer, copy out the source filter
809 	 * entries to userland gracefully.
810 	 * msfr.msfr_nsrcs is always set to the total number of filter
811 	 * entries which the kernel currently has for this group.
812 	 */
813 	tss = NULL;
814 	if (msfr.msfr_srcs != NULL && msfr.msfr_nsrcs > 0) {
815 		/*
816 		 * Make a copy of the source vector so that we do not
817 		 * thrash the inpcb lock whilst copying it out.
818 		 * We only copy out the number of entries which userland
819 		 * has asked for, but we always tell userland how big the
820 		 * buffer really needs to be.
821 		 */
822 		tss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
823 		    M_TEMP, M_NOWAIT);
824 		if (tss == NULL) {
825 			error = ENOBUFS;
826 		} else {
827 			ptss = tss;
828 			TAILQ_FOREACH(ims, &imf->imf_sources, ims_next) {
829 				memcpy(ptss++, &ims->ims_addr,
830 				    sizeof(struct sockaddr_storage));
831 			}
832 		}
833 	}
834 
835 	INP_WUNLOCK(inp);
836 
837 	if (tss != NULL) {
838 		error = copyout(tss, msfr.msfr_srcs,
839 		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
840 		free(tss, M_TEMP);
841 	}
842 
843 	if (error)
844 		return (error);
845 
846 	error = sooptcopyout(sopt, &msfr, sizeof(struct __msfilterreq));
847 
848 	return (error);
849 }
850 
851 /*
852  * Return the IP multicast options in response to user getsockopt().
853  */
854 int
855 inp_getmoptions(struct inpcb *inp, struct sockopt *sopt)
856 {
857 	INIT_VNET_INET(curvnet);
858 	struct ip_mreqn		 mreqn;
859 	struct ip_moptions	*imo;
860 	struct ifnet		*ifp;
861 	struct in_ifaddr	*ia;
862 	int			 error, optval;
863 	u_char			 coptval;
864 
865 	INP_WLOCK(inp);
866 	imo = inp->inp_moptions;
867 	/*
868 	 * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
869 	 * or is a divert socket, reject it.
870 	 */
871 	if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
872 	    (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
873 	    inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) {
874 		INP_WUNLOCK(inp);
875 		return (EOPNOTSUPP);
876 	}
877 
878 	error = 0;
879 	switch (sopt->sopt_name) {
880 	case IP_MULTICAST_VIF:
881 		if (imo != NULL)
882 			optval = imo->imo_multicast_vif;
883 		else
884 			optval = -1;
885 		INP_WUNLOCK(inp);
886 		error = sooptcopyout(sopt, &optval, sizeof(int));
887 		break;
888 
889 	case IP_MULTICAST_IF:
890 		memset(&mreqn, 0, sizeof(struct ip_mreqn));
891 		if (imo != NULL) {
892 			ifp = imo->imo_multicast_ifp;
893 			if (imo->imo_multicast_addr.s_addr != INADDR_ANY) {
894 				mreqn.imr_address = imo->imo_multicast_addr;
895 			} else if (ifp != NULL) {
896 				mreqn.imr_ifindex = ifp->if_index;
897 				IFP_TO_IA(ifp, ia);
898 				if (ia != NULL) {
899 					mreqn.imr_address =
900 					    IA_SIN(ia)->sin_addr;
901 				}
902 			}
903 		}
904 		INP_WUNLOCK(inp);
905 		if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) {
906 			error = sooptcopyout(sopt, &mreqn,
907 			    sizeof(struct ip_mreqn));
908 		} else {
909 			error = sooptcopyout(sopt, &mreqn.imr_address,
910 			    sizeof(struct in_addr));
911 		}
912 		break;
913 
914 	case IP_MULTICAST_TTL:
915 		if (imo == 0)
916 			optval = coptval = IP_DEFAULT_MULTICAST_TTL;
917 		else
918 			optval = coptval = imo->imo_multicast_ttl;
919 		INP_WUNLOCK(inp);
920 		if (sopt->sopt_valsize == sizeof(u_char))
921 			error = sooptcopyout(sopt, &coptval, sizeof(u_char));
922 		else
923 			error = sooptcopyout(sopt, &optval, sizeof(int));
924 		break;
925 
926 	case IP_MULTICAST_LOOP:
927 		if (imo == 0)
928 			optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
929 		else
930 			optval = coptval = imo->imo_multicast_loop;
931 		INP_WUNLOCK(inp);
932 		if (sopt->sopt_valsize == sizeof(u_char))
933 			error = sooptcopyout(sopt, &coptval, sizeof(u_char));
934 		else
935 			error = sooptcopyout(sopt, &optval, sizeof(int));
936 		break;
937 
938 	case IP_MSFILTER:
939 		if (imo == NULL) {
940 			error = EADDRNOTAVAIL;
941 			INP_WUNLOCK(inp);
942 		} else {
943 			error = inp_get_source_filters(inp, sopt);
944 		}
945 		break;
946 
947 	default:
948 		INP_WUNLOCK(inp);
949 		error = ENOPROTOOPT;
950 		break;
951 	}
952 
953 	INP_UNLOCK_ASSERT(inp);
954 
955 	return (error);
956 }
957 
958 /*
959  * Join an IPv4 multicast group, possibly with a source.
960  */
961 static int
962 inp_join_group(struct inpcb *inp, struct sockopt *sopt)
963 {
964 	INIT_VNET_NET(curvnet);
965 	INIT_VNET_INET(curvnet);
966 	struct group_source_req		 gsr;
967 	sockunion_t			*gsa, *ssa;
968 	struct ifnet			*ifp;
969 	struct in_mfilter		*imf;
970 	struct ip_moptions		*imo;
971 	struct in_multi			*inm;
972 	size_t				 idx;
973 	int				 error;
974 
975 	ifp = NULL;
976 	error = 0;
977 
978 	memset(&gsr, 0, sizeof(struct group_source_req));
979 	gsa = (sockunion_t *)&gsr.gsr_group;
980 	gsa->ss.ss_family = AF_UNSPEC;
981 	ssa = (sockunion_t *)&gsr.gsr_source;
982 	ssa->ss.ss_family = AF_UNSPEC;
983 
984 	switch (sopt->sopt_name) {
985 	case IP_ADD_MEMBERSHIP:
986 	case IP_ADD_SOURCE_MEMBERSHIP: {
987 		struct ip_mreq_source	 mreqs;
988 
989 		if (sopt->sopt_name == IP_ADD_MEMBERSHIP) {
990 			error = sooptcopyin(sopt, &mreqs,
991 			    sizeof(struct ip_mreq),
992 			    sizeof(struct ip_mreq));
993 			/*
994 			 * Do argument switcharoo from ip_mreq into
995 			 * ip_mreq_source to avoid using two instances.
996 			 */
997 			mreqs.imr_interface = mreqs.imr_sourceaddr;
998 			mreqs.imr_sourceaddr.s_addr = INADDR_ANY;
999 		} else if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) {
1000 			error = sooptcopyin(sopt, &mreqs,
1001 			    sizeof(struct ip_mreq_source),
1002 			    sizeof(struct ip_mreq_source));
1003 		}
1004 		if (error)
1005 			return (error);
1006 
1007 		gsa->sin.sin_family = AF_INET;
1008 		gsa->sin.sin_len = sizeof(struct sockaddr_in);
1009 		gsa->sin.sin_addr = mreqs.imr_multiaddr;
1010 
1011 		if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) {
1012 			ssa->sin.sin_family = AF_INET;
1013 			ssa->sin.sin_len = sizeof(struct sockaddr_in);
1014 			ssa->sin.sin_addr = mreqs.imr_sourceaddr;
1015 		}
1016 
1017 		/*
1018 		 * Obtain ifp. If no interface address was provided,
1019 		 * use the interface of the route in the unicast FIB for
1020 		 * the given multicast destination; usually, this is the
1021 		 * default route.
1022 		 * If this lookup fails, attempt to use the first non-loopback
1023 		 * interface with multicast capability in the system as a
1024 		 * last resort. The legacy IPv4 ASM API requires that we do
1025 		 * this in order to allow groups to be joined when the routing
1026 		 * table has not yet been populated during boot.
1027 		 * If all of these conditions fail, return EADDRNOTAVAIL, and
1028 		 * reject the IPv4 multicast join.
1029 		 */
1030 		if (mreqs.imr_interface.s_addr != INADDR_ANY) {
1031 			INADDR_TO_IFP(mreqs.imr_interface, ifp);
1032 		} else {
1033 			struct route ro;
1034 
1035 			ro.ro_rt = NULL;
1036 			*(struct sockaddr_in *)&ro.ro_dst = gsa->sin;
1037 			in_rtalloc_ign(&ro, RTF_CLONING,
1038 			   inp->inp_inc.inc_fibnum);
1039 			if (ro.ro_rt != NULL) {
1040 				ifp = ro.ro_rt->rt_ifp;
1041 				KASSERT(ifp != NULL, ("%s: null ifp",
1042 				    __func__));
1043 				RTFREE(ro.ro_rt);
1044 			} else {
1045 				struct in_ifaddr *ia;
1046 				struct ifnet *mfp = NULL;
1047 				TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
1048 					mfp = ia->ia_ifp;
1049 					if (!(mfp->if_flags & IFF_LOOPBACK) &&
1050 					     (mfp->if_flags & IFF_MULTICAST)) {
1051 						ifp = mfp;
1052 						break;
1053 					}
1054 				}
1055 			}
1056 		}
1057 #ifdef DIAGNOSTIC
1058 		if (bootverbose) {
1059 			printf("%s: imr_interface = %s, ifp = %p\n",
1060 			    __func__, inet_ntoa(mreqs.imr_interface), ifp);
1061 		}
1062 #endif
1063 		break;
1064 	}
1065 
1066 	case MCAST_JOIN_GROUP:
1067 	case MCAST_JOIN_SOURCE_GROUP:
1068 		if (sopt->sopt_name == MCAST_JOIN_GROUP) {
1069 			error = sooptcopyin(sopt, &gsr,
1070 			    sizeof(struct group_req),
1071 			    sizeof(struct group_req));
1072 		} else if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
1073 			error = sooptcopyin(sopt, &gsr,
1074 			    sizeof(struct group_source_req),
1075 			    sizeof(struct group_source_req));
1076 		}
1077 		if (error)
1078 			return (error);
1079 
1080 		if (gsa->sin.sin_family != AF_INET ||
1081 		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
1082 			return (EINVAL);
1083 
1084 		/*
1085 		 * Overwrite the port field if present, as the sockaddr
1086 		 * being copied in may be matched with a binary comparison.
1087 		 * XXX INET6
1088 		 */
1089 		gsa->sin.sin_port = 0;
1090 		if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
1091 			if (ssa->sin.sin_family != AF_INET ||
1092 			    ssa->sin.sin_len != sizeof(struct sockaddr_in))
1093 				return (EINVAL);
1094 			ssa->sin.sin_port = 0;
1095 		}
1096 
1097 		/*
1098 		 * Obtain the ifp.
1099 		 */
1100 		if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
1101 			return (EADDRNOTAVAIL);
1102 		ifp = ifnet_byindex(gsr.gsr_interface);
1103 
1104 		break;
1105 
1106 	default:
1107 #ifdef DIAGNOSTIC
1108 		if (bootverbose) {
1109 			printf("%s: unknown sopt_name %d\n", __func__,
1110 			    sopt->sopt_name);
1111 		}
1112 #endif
1113 		return (EOPNOTSUPP);
1114 		break;
1115 	}
1116 
1117 	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
1118 		return (EINVAL);
1119 
1120 	if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0)
1121 		return (EADDRNOTAVAIL);
1122 
1123 	/*
1124 	 * Check if we already hold membership of this group for this inpcb.
1125 	 * If so, we do not need to perform the initial join.
1126 	 */
1127 	imo = inp_findmoptions(inp);
1128 	idx = imo_match_group(imo, ifp, &gsa->sa);
1129 	if (idx != -1) {
1130 		if (ssa->ss.ss_family != AF_UNSPEC) {
1131 			/*
1132 			 * Attempting to join an ASM group (when already
1133 			 * an ASM or SSM member) is an error.
1134 			 */
1135 			error = EADDRNOTAVAIL;
1136 		} else {
1137 			imf = &imo->imo_mfilters[idx];
1138 			if (imf->imf_nsources == 0) {
1139 				/*
1140 				 * Attempting to join an SSM group (when
1141 				 * already an ASM member) is an error.
1142 				 */
1143 				error = EINVAL;
1144 			} else {
1145 				/*
1146 				 * Attempting to join an SSM group (when
1147 				 * already an SSM member) means "add this
1148 				 * source to the inclusive filter list".
1149 				 */
1150 				error = imo_join_source(imo, idx, ssa);
1151 			}
1152 		}
1153 		goto out_locked;
1154 	}
1155 
1156 	/*
1157 	 * Call imo_grow() to reallocate the membership and source filter
1158 	 * vectors if they are full. If the size would exceed the hard limit,
1159 	 * then we know we've really run out of entries. We keep the INP
1160 	 * lock held to avoid introducing a race condition.
1161 	 */
1162 	if (imo->imo_num_memberships == imo->imo_max_memberships) {
1163 		error = imo_grow(imo);
1164 		if (error)
1165 			goto out_locked;
1166 	}
1167 
1168 	/*
1169 	 * So far, so good: perform the layer 3 join, layer 2 join,
1170 	 * and make an IGMP announcement if needed.
1171 	 */
1172 	inm = in_addmulti(&gsa->sin.sin_addr, ifp);
1173 	if (inm == NULL) {
1174 		error = ENOBUFS;
1175 		goto out_locked;
1176 	}
1177 	idx = imo->imo_num_memberships;
1178 	imo->imo_membership[idx] = inm;
1179 	imo->imo_num_memberships++;
1180 
1181 	KASSERT(imo->imo_mfilters != NULL,
1182 	    ("%s: imf_mfilters vector was not allocated", __func__));
1183 	imf = &imo->imo_mfilters[idx];
1184 	KASSERT(TAILQ_EMPTY(&imf->imf_sources),
1185 	    ("%s: imf_sources not empty", __func__));
1186 
1187 	/*
1188 	 * If this is a new SSM group join (i.e. a source was specified
1189 	 * with this group), add this source to the filter list.
1190 	 */
1191 	if (ssa->ss.ss_family != AF_UNSPEC) {
1192 		/*
1193 		 * An initial SSM join implies that this socket's membership
1194 		 * of the multicast group is now in inclusive mode.
1195 		 */
1196 		imf->imf_fmode = MCAST_INCLUDE;
1197 
1198 		error = imo_join_source(imo, idx, ssa);
1199 		if (error) {
1200 			/*
1201 			 * Drop inp lock before calling in_delmulti(),
1202 			 * to prevent a lock order reversal.
1203 			 */
1204 			--imo->imo_num_memberships;
1205 			INP_WUNLOCK(inp);
1206 			in_delmulti(inm);
1207 			return (error);
1208 		}
1209 	}
1210 
1211 out_locked:
1212 	INP_WUNLOCK(inp);
1213 	return (error);
1214 }
1215 
1216 /*
1217  * Leave an IPv4 multicast group on an inpcb, possibly with a source.
1218  */
1219 static int
1220 inp_leave_group(struct inpcb *inp, struct sockopt *sopt)
1221 {
1222 	INIT_VNET_NET(curvnet);
1223 	INIT_VNET_INET(curvnet);
1224 	struct group_source_req		 gsr;
1225 	struct ip_mreq_source		 mreqs;
1226 	sockunion_t			*gsa, *ssa;
1227 	struct ifnet			*ifp;
1228 	struct in_mfilter		*imf;
1229 	struct ip_moptions		*imo;
1230 	struct in_msource		*ims, *tims;
1231 	struct in_multi			*inm;
1232 	size_t				 idx;
1233 	int				 error;
1234 
1235 	ifp = NULL;
1236 	error = 0;
1237 
1238 	memset(&gsr, 0, sizeof(struct group_source_req));
1239 	gsa = (sockunion_t *)&gsr.gsr_group;
1240 	gsa->ss.ss_family = AF_UNSPEC;
1241 	ssa = (sockunion_t *)&gsr.gsr_source;
1242 	ssa->ss.ss_family = AF_UNSPEC;
1243 
1244 	switch (sopt->sopt_name) {
1245 	case IP_DROP_MEMBERSHIP:
1246 	case IP_DROP_SOURCE_MEMBERSHIP:
1247 		if (sopt->sopt_name == IP_DROP_MEMBERSHIP) {
1248 			error = sooptcopyin(sopt, &mreqs,
1249 			    sizeof(struct ip_mreq),
1250 			    sizeof(struct ip_mreq));
1251 			/*
1252 			 * Swap interface and sourceaddr arguments,
1253 			 * as ip_mreq and ip_mreq_source are laid
1254 			 * out differently.
1255 			 */
1256 			mreqs.imr_interface = mreqs.imr_sourceaddr;
1257 			mreqs.imr_sourceaddr.s_addr = INADDR_ANY;
1258 		} else if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) {
1259 			error = sooptcopyin(sopt, &mreqs,
1260 			    sizeof(struct ip_mreq_source),
1261 			    sizeof(struct ip_mreq_source));
1262 		}
1263 		if (error)
1264 			return (error);
1265 
1266 		gsa->sin.sin_family = AF_INET;
1267 		gsa->sin.sin_len = sizeof(struct sockaddr_in);
1268 		gsa->sin.sin_addr = mreqs.imr_multiaddr;
1269 
1270 		if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) {
1271 			ssa->sin.sin_family = AF_INET;
1272 			ssa->sin.sin_len = sizeof(struct sockaddr_in);
1273 			ssa->sin.sin_addr = mreqs.imr_sourceaddr;
1274 		}
1275 
1276 		if (gsa->sin.sin_addr.s_addr != INADDR_ANY)
1277 			INADDR_TO_IFP(mreqs.imr_interface, ifp);
1278 
1279 #ifdef DIAGNOSTIC
1280 		if (bootverbose) {
1281 			printf("%s: imr_interface = %s, ifp = %p\n",
1282 			    __func__, inet_ntoa(mreqs.imr_interface), ifp);
1283 		}
1284 #endif
1285 		break;
1286 
1287 	case MCAST_LEAVE_GROUP:
1288 	case MCAST_LEAVE_SOURCE_GROUP:
1289 		if (sopt->sopt_name == MCAST_LEAVE_GROUP) {
1290 			error = sooptcopyin(sopt, &gsr,
1291 			    sizeof(struct group_req),
1292 			    sizeof(struct group_req));
1293 		} else if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
1294 			error = sooptcopyin(sopt, &gsr,
1295 			    sizeof(struct group_source_req),
1296 			    sizeof(struct group_source_req));
1297 		}
1298 		if (error)
1299 			return (error);
1300 
1301 		if (gsa->sin.sin_family != AF_INET ||
1302 		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
1303 			return (EINVAL);
1304 
1305 		if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
1306 			if (ssa->sin.sin_family != AF_INET ||
1307 			    ssa->sin.sin_len != sizeof(struct sockaddr_in))
1308 				return (EINVAL);
1309 		}
1310 
1311 		if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
1312 			return (EADDRNOTAVAIL);
1313 
1314 		ifp = ifnet_byindex(gsr.gsr_interface);
1315 		break;
1316 
1317 	default:
1318 #ifdef DIAGNOSTIC
1319 		if (bootverbose) {
1320 			printf("%s: unknown sopt_name %d\n", __func__,
1321 			    sopt->sopt_name);
1322 		}
1323 #endif
1324 		return (EOPNOTSUPP);
1325 		break;
1326 	}
1327 
1328 	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
1329 		return (EINVAL);
1330 
1331 	/*
1332 	 * Find the membership in the membership array.
1333 	 */
1334 	imo = inp_findmoptions(inp);
1335 	idx = imo_match_group(imo, ifp, &gsa->sa);
1336 	if (idx == -1) {
1337 		error = EADDRNOTAVAIL;
1338 		goto out_locked;
1339 	}
1340 	imf = &imo->imo_mfilters[idx];
1341 
1342 	/*
1343 	 * If we were instructed only to leave a given source, do so.
1344 	 */
1345 	if (ssa->ss.ss_family != AF_UNSPEC) {
1346 		if (imf->imf_nsources == 0 ||
1347 		    imf->imf_fmode == MCAST_EXCLUDE) {
1348 			/*
1349 			 * Attempting to SSM leave an ASM group
1350 			 * is an error; should use *_BLOCK_SOURCE instead.
1351 			 * Attempting to SSM leave a source in a group when
1352 			 * the socket is in 'exclude mode' is also an error.
1353 			 */
1354 			error = EINVAL;
1355 		} else {
1356 			error = imo_leave_source(imo, idx, ssa);
1357 		}
1358 		/*
1359 		 * If an error occurred, or this source is not the last
1360 		 * source in the group, do not leave the whole group.
1361 		 */
1362 		if (error || imf->imf_nsources > 0)
1363 			goto out_locked;
1364 	}
1365 
1366 	/*
1367 	 * Give up the multicast address record to which the membership points.
1368 	 */
1369 	inm = imo->imo_membership[idx];
1370 	in_delmulti(inm);
1371 
1372 	/*
1373 	 * Free any source filters for this group if they exist.
1374 	 * Revert inpcb to the default MCAST_EXCLUDE state.
1375 	 */
1376 	if (imo->imo_mfilters != NULL) {
1377 		TAILQ_FOREACH_SAFE(ims, &imf->imf_sources, ims_next, tims) {
1378 			TAILQ_REMOVE(&imf->imf_sources, ims, ims_next);
1379 			free(ims, M_IPMSOURCE);
1380 			imf->imf_nsources--;
1381 		}
1382 		KASSERT(imf->imf_nsources == 0,
1383 		    ("%s: imf_nsources not 0", __func__));
1384 		KASSERT(TAILQ_EMPTY(&imf->imf_sources),
1385 		    ("%s: imf_sources not empty", __func__));
1386 		imf->imf_fmode = MCAST_EXCLUDE;
1387 	}
1388 
1389 	/*
1390 	 * Remove the gap in the membership array.
1391 	 */
1392 	for (++idx; idx < imo->imo_num_memberships; ++idx)
1393 		imo->imo_membership[idx-1] = imo->imo_membership[idx];
1394 	imo->imo_num_memberships--;
1395 
1396 out_locked:
1397 	INP_WUNLOCK(inp);
1398 	return (error);
1399 }
1400 
1401 /*
1402  * Select the interface for transmitting IPv4 multicast datagrams.
1403  *
1404  * Either an instance of struct in_addr or an instance of struct ip_mreqn
1405  * may be passed to this socket option. An address of INADDR_ANY or an
1406  * interface index of 0 is used to remove a previous selection.
1407  * When no interface is selected, one is chosen for every send.
1408  */
1409 static int
1410 inp_set_multicast_if(struct inpcb *inp, struct sockopt *sopt)
1411 {
1412 	INIT_VNET_NET(curvnet);
1413 	struct in_addr		 addr;
1414 	struct ip_mreqn		 mreqn;
1415 	struct ifnet		*ifp;
1416 	struct ip_moptions	*imo;
1417 	int			 error;
1418 
1419 	if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) {
1420 		/*
1421 		 * An interface index was specified using the
1422 		 * Linux-derived ip_mreqn structure.
1423 		 */
1424 		error = sooptcopyin(sopt, &mreqn, sizeof(struct ip_mreqn),
1425 		    sizeof(struct ip_mreqn));
1426 		if (error)
1427 			return (error);
1428 
1429 		if (mreqn.imr_ifindex < 0 || V_if_index < mreqn.imr_ifindex)
1430 			return (EINVAL);
1431 
1432 		if (mreqn.imr_ifindex == 0) {
1433 			ifp = NULL;
1434 		} else {
1435 			ifp = ifnet_byindex(mreqn.imr_ifindex);
1436 			if (ifp == NULL)
1437 				return (EADDRNOTAVAIL);
1438 		}
1439 	} else {
1440 		/*
1441 		 * An interface was specified by IPv4 address.
1442 		 * This is the traditional BSD usage.
1443 		 */
1444 		error = sooptcopyin(sopt, &addr, sizeof(struct in_addr),
1445 		    sizeof(struct in_addr));
1446 		if (error)
1447 			return (error);
1448 		if (addr.s_addr == INADDR_ANY) {
1449 			ifp = NULL;
1450 		} else {
1451 			INADDR_TO_IFP(addr, ifp);
1452 			if (ifp == NULL)
1453 				return (EADDRNOTAVAIL);
1454 		}
1455 #ifdef DIAGNOSTIC
1456 		if (bootverbose) {
1457 			printf("%s: ifp = %p, addr = %s\n",
1458 			    __func__, ifp, inet_ntoa(addr)); /* XXX INET6 */
1459 		}
1460 #endif
1461 	}
1462 
1463 	/* Reject interfaces which do not support multicast. */
1464 	if (ifp != NULL && (ifp->if_flags & IFF_MULTICAST) == 0)
1465 		return (EOPNOTSUPP);
1466 
1467 	imo = inp_findmoptions(inp);
1468 	imo->imo_multicast_ifp = ifp;
1469 	imo->imo_multicast_addr.s_addr = INADDR_ANY;
1470 	INP_WUNLOCK(inp);
1471 
1472 	return (0);
1473 }
1474 
1475 /*
1476  * Atomically set source filters on a socket for an IPv4 multicast group.
1477  */
1478 static int
1479 inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt)
1480 {
1481 	INIT_VNET_NET(curvnet);
1482 	struct __msfilterreq	 msfr;
1483 	sockunion_t		*gsa;
1484 	struct ifnet		*ifp;
1485 	struct in_mfilter	*imf;
1486 	struct ip_moptions	*imo;
1487 	struct in_msource	*ims, *tims;
1488 	size_t			 idx;
1489 	int			 error;
1490 
1491 	error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq),
1492 	    sizeof(struct __msfilterreq));
1493 	if (error)
1494 		return (error);
1495 
1496 	if (msfr.msfr_nsrcs > IP_MAX_SOURCE_FILTER ||
1497 	    (msfr.msfr_fmode != MCAST_EXCLUDE &&
1498 	     msfr.msfr_fmode != MCAST_INCLUDE))
1499 		return (EINVAL);
1500 
1501 	if (msfr.msfr_group.ss_family != AF_INET ||
1502 	    msfr.msfr_group.ss_len != sizeof(struct sockaddr_in))
1503 		return (EINVAL);
1504 
1505 	gsa = (sockunion_t *)&msfr.msfr_group;
1506 	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
1507 		return (EINVAL);
1508 
1509 	gsa->sin.sin_port = 0;	/* ignore port */
1510 
1511 	if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex)
1512 		return (EADDRNOTAVAIL);
1513 
1514 	ifp = ifnet_byindex(msfr.msfr_ifindex);
1515 	if (ifp == NULL)
1516 		return (EADDRNOTAVAIL);
1517 
1518 	/*
1519 	 * Take the INP lock.
1520 	 * Check if this socket is a member of this group.
1521 	 */
1522 	imo = inp_findmoptions(inp);
1523 	idx = imo_match_group(imo, ifp, &gsa->sa);
1524 	if (idx == -1 || imo->imo_mfilters == NULL) {
1525 		error = EADDRNOTAVAIL;
1526 		goto out_locked;
1527 	}
1528 	imf = &imo->imo_mfilters[idx];
1529 
1530 #ifdef DIAGNOSTIC
1531 	if (bootverbose)
1532 		printf("%s: clearing source list\n", __func__);
1533 #endif
1534 
1535 	/*
1536 	 * Remove any existing source filters.
1537 	 */
1538 	TAILQ_FOREACH_SAFE(ims, &imf->imf_sources, ims_next, tims) {
1539 		TAILQ_REMOVE(&imf->imf_sources, ims, ims_next);
1540 		free(ims, M_IPMSOURCE);
1541 		imf->imf_nsources--;
1542 	}
1543 	KASSERT(imf->imf_nsources == 0,
1544 	    ("%s: source list not cleared", __func__));
1545 
1546 	/*
1547 	 * Apply any new source filters, if present.
1548 	 */
1549 	if (msfr.msfr_nsrcs > 0) {
1550 		struct in_msource	**pnims;
1551 		struct in_msource	*nims;
1552 		struct sockaddr_storage	*kss;
1553 		struct sockaddr_storage	*pkss;
1554 		sockunion_t		*psu;
1555 		int			 i, j;
1556 
1557 		/*
1558 		 * Drop the inp lock so we may sleep if we need to
1559 		 * in order to satisfy a malloc request.
1560 		 * We will re-take it before changing socket state.
1561 		 */
1562 		INP_WUNLOCK(inp);
1563 #ifdef DIAGNOSTIC
1564 		if (bootverbose) {
1565 			printf("%s: loading %lu source list entries\n",
1566 			    __func__, (unsigned long)msfr.msfr_nsrcs);
1567 		}
1568 #endif
1569 		/*
1570 		 * Make a copy of the user-space source vector so
1571 		 * that we may copy them with a single copyin. This
1572 		 * allows us to deal with page faults up-front.
1573 		 */
1574 		kss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
1575 		    M_TEMP, M_WAITOK);
1576 		error = copyin(msfr.msfr_srcs, kss,
1577 		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
1578 		if (error) {
1579 			free(kss, M_TEMP);
1580 			return (error);
1581 		}
1582 
1583 		/*
1584 		 * Perform argument checking on every sockaddr_storage
1585 		 * structure in the vector provided to us. Overwrite
1586 		 * fields which should not apply to source entries.
1587 		 * TODO: Check for duplicate sources on this pass.
1588 		 */
1589 		psu = (sockunion_t *)kss;
1590 		for (i = 0; i < msfr.msfr_nsrcs; i++, psu++) {
1591 			switch (psu->ss.ss_family) {
1592 			case AF_INET:
1593 				if (psu->sin.sin_len !=
1594 				    sizeof(struct sockaddr_in)) {
1595 					error = EINVAL;
1596 				} else {
1597 					psu->sin.sin_port = 0;
1598 				}
1599 				break;
1600 #ifdef notyet
1601 			case AF_INET6;
1602 				if (psu->sin6.sin6_len !=
1603 				    sizeof(struct sockaddr_in6)) {
1604 					error = EINVAL;
1605 				} else {
1606 					psu->sin6.sin6_port = 0;
1607 					psu->sin6.sin6_flowinfo = 0;
1608 				}
1609 				break;
1610 #endif
1611 			default:
1612 				error = EAFNOSUPPORT;
1613 				break;
1614 			}
1615 			if (error)
1616 				break;
1617 		}
1618 		if (error) {
1619 			free(kss, M_TEMP);
1620 			return (error);
1621 		}
1622 
1623 		/*
1624 		 * Allocate a block to track all the in_msource
1625 		 * entries we are about to allocate, in case we
1626 		 * abruptly need to free them.
1627 		 */
1628 		pnims = malloc(sizeof(struct in_msource *) * msfr.msfr_nsrcs,
1629 		    M_TEMP, M_WAITOK | M_ZERO);
1630 
1631 		/*
1632 		 * Allocate up to nsrcs individual chunks.
1633 		 * If we encounter an error, backtrack out of
1634 		 * all allocations cleanly; updates must be atomic.
1635 		 */
1636 		pkss = kss;
1637 		nims = NULL;
1638 		for (i = 0; i < msfr.msfr_nsrcs; i++, pkss++) {
1639 			nims = malloc(sizeof(struct in_msource) *
1640 			    msfr.msfr_nsrcs, M_IPMSOURCE, M_WAITOK | M_ZERO);
1641 			pnims[i] = nims;
1642 		}
1643 		if (i < msfr.msfr_nsrcs) {
1644 			for (j = 0; j < i; j++) {
1645 				if (pnims[j] != NULL)
1646 					free(pnims[j], M_IPMSOURCE);
1647 			}
1648 			free(pnims, M_TEMP);
1649 			free(kss, M_TEMP);
1650 			return (ENOBUFS);
1651 		}
1652 
1653 		INP_UNLOCK_ASSERT(inp);
1654 
1655 		/*
1656 		 * Finally, apply the filters to the socket.
1657 		 * Re-take the inp lock; we are changing socket state.
1658 		 */
1659 		pkss = kss;
1660 		INP_WLOCK(inp);
1661 		for (i = 0; i < msfr.msfr_nsrcs; i++, pkss++) {
1662 			memcpy(&(pnims[i]->ims_addr), pkss,
1663 			    sizeof(struct sockaddr_storage));
1664 			TAILQ_INSERT_TAIL(&imf->imf_sources, pnims[i],
1665 			    ims_next);
1666 			imf->imf_nsources++;
1667 		}
1668 		free(pnims, M_TEMP);
1669 		free(kss, M_TEMP);
1670 	}
1671 
1672 	/*
1673 	 * Update the filter mode on the socket before releasing the inpcb.
1674 	 */
1675 	INP_WLOCK_ASSERT(inp);
1676 	imf->imf_fmode = msfr.msfr_fmode;
1677 
1678 out_locked:
1679 	INP_WUNLOCK(inp);
1680 	return (error);
1681 }
1682 
1683 /*
1684  * Set the IP multicast options in response to user setsockopt().
1685  *
1686  * Many of the socket options handled in this function duplicate the
1687  * functionality of socket options in the regular unicast API. However,
1688  * it is not possible to merge the duplicate code, because the idempotence
1689  * of the IPv4 multicast part of the BSD Sockets API must be preserved;
1690  * the effects of these options must be treated as separate and distinct.
1691  */
1692 int
1693 inp_setmoptions(struct inpcb *inp, struct sockopt *sopt)
1694 {
1695 	struct ip_moptions	*imo;
1696 	int			 error;
1697 
1698 	error = 0;
1699 
1700 	/*
1701 	 * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
1702 	 * or is a divert socket, reject it.
1703 	 * XXX Unlocked read of inp_socket believed OK.
1704 	 */
1705 	if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
1706 	    (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
1707 	    inp->inp_socket->so_proto->pr_type != SOCK_DGRAM))
1708 		return (EOPNOTSUPP);
1709 
1710 	switch (sopt->sopt_name) {
1711 	case IP_MULTICAST_VIF: {
1712 		int vifi;
1713 		/*
1714 		 * Select a multicast VIF for transmission.
1715 		 * Only useful if multicast forwarding is active.
1716 		 */
1717 		if (legal_vif_num == NULL) {
1718 			error = EOPNOTSUPP;
1719 			break;
1720 		}
1721 		error = sooptcopyin(sopt, &vifi, sizeof(int), sizeof(int));
1722 		if (error)
1723 			break;
1724 		if (!legal_vif_num(vifi) && (vifi != -1)) {
1725 			error = EINVAL;
1726 			break;
1727 		}
1728 		imo = inp_findmoptions(inp);
1729 		imo->imo_multicast_vif = vifi;
1730 		INP_WUNLOCK(inp);
1731 		break;
1732 	}
1733 
1734 	case IP_MULTICAST_IF:
1735 		error = inp_set_multicast_if(inp, sopt);
1736 		break;
1737 
1738 	case IP_MULTICAST_TTL: {
1739 		u_char ttl;
1740 
1741 		/*
1742 		 * Set the IP time-to-live for outgoing multicast packets.
1743 		 * The original multicast API required a char argument,
1744 		 * which is inconsistent with the rest of the socket API.
1745 		 * We allow either a char or an int.
1746 		 */
1747 		if (sopt->sopt_valsize == sizeof(u_char)) {
1748 			error = sooptcopyin(sopt, &ttl, sizeof(u_char),
1749 			    sizeof(u_char));
1750 			if (error)
1751 				break;
1752 		} else {
1753 			u_int ittl;
1754 
1755 			error = sooptcopyin(sopt, &ittl, sizeof(u_int),
1756 			    sizeof(u_int));
1757 			if (error)
1758 				break;
1759 			if (ittl > 255) {
1760 				error = EINVAL;
1761 				break;
1762 			}
1763 			ttl = (u_char)ittl;
1764 		}
1765 		imo = inp_findmoptions(inp);
1766 		imo->imo_multicast_ttl = ttl;
1767 		INP_WUNLOCK(inp);
1768 		break;
1769 	}
1770 
1771 	case IP_MULTICAST_LOOP: {
1772 		u_char loop;
1773 
1774 		/*
1775 		 * Set the loopback flag for outgoing multicast packets.
1776 		 * Must be zero or one.  The original multicast API required a
1777 		 * char argument, which is inconsistent with the rest
1778 		 * of the socket API.  We allow either a char or an int.
1779 		 */
1780 		if (sopt->sopt_valsize == sizeof(u_char)) {
1781 			error = sooptcopyin(sopt, &loop, sizeof(u_char),
1782 			    sizeof(u_char));
1783 			if (error)
1784 				break;
1785 		} else {
1786 			u_int iloop;
1787 
1788 			error = sooptcopyin(sopt, &iloop, sizeof(u_int),
1789 					    sizeof(u_int));
1790 			if (error)
1791 				break;
1792 			loop = (u_char)iloop;
1793 		}
1794 		imo = inp_findmoptions(inp);
1795 		imo->imo_multicast_loop = !!loop;
1796 		INP_WUNLOCK(inp);
1797 		break;
1798 	}
1799 
1800 	case IP_ADD_MEMBERSHIP:
1801 	case IP_ADD_SOURCE_MEMBERSHIP:
1802 	case MCAST_JOIN_GROUP:
1803 	case MCAST_JOIN_SOURCE_GROUP:
1804 		error = inp_join_group(inp, sopt);
1805 		break;
1806 
1807 	case IP_DROP_MEMBERSHIP:
1808 	case IP_DROP_SOURCE_MEMBERSHIP:
1809 	case MCAST_LEAVE_GROUP:
1810 	case MCAST_LEAVE_SOURCE_GROUP:
1811 		error = inp_leave_group(inp, sopt);
1812 		break;
1813 
1814 	case IP_BLOCK_SOURCE:
1815 	case IP_UNBLOCK_SOURCE:
1816 	case MCAST_BLOCK_SOURCE:
1817 	case MCAST_UNBLOCK_SOURCE:
1818 		error = inp_change_source_filter(inp, sopt);
1819 		break;
1820 
1821 	case IP_MSFILTER:
1822 		error = inp_set_source_filters(inp, sopt);
1823 		break;
1824 
1825 	default:
1826 		error = EOPNOTSUPP;
1827 		break;
1828 	}
1829 
1830 	INP_UNLOCK_ASSERT(inp);
1831 
1832 	return (error);
1833 }
1834