xref: /freebsd/sys/netinet/in_mcast.c (revision 721351876cd4d3a8a700f62d2061331fa951a488)
1 /*-
2  * Copyright (c) 2007 Bruce M. Simpson.
3  * Copyright (c) 2005 Robert N. M. Watson.
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote
15  *    products derived from this software without specific prior written
16  *    permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 /*
32  * IPv4 multicast socket, group, and socket option processing module.
33  * Until further notice, this file requires INET to compile.
34  * TODO: Make this infrastructure independent of address family.
35  * TODO: Teach netinet6 to use this code.
36  * TODO: Hook up SSM logic to IGMPv3/MLDv2.
37  */
38 
39 #include <sys/cdefs.h>
40 __FBSDID("$FreeBSD$");
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/malloc.h>
46 #include <sys/mbuf.h>
47 #include <sys/protosw.h>
48 #include <sys/socket.h>
49 #include <sys/socketvar.h>
50 #include <sys/sysctl.h>
51 
52 #include <net/if.h>
53 #include <net/if_dl.h>
54 #include <net/route.h>
55 
56 #include <netinet/in.h>
57 #include <netinet/in_systm.h>
58 #include <netinet/in_pcb.h>
59 #include <netinet/in_var.h>
60 #include <netinet/ip_var.h>
61 #include <netinet/igmp_var.h>
62 
63 #ifndef __SOCKUNION_DECLARED
64 union sockunion {
65 	struct sockaddr_storage	ss;
66 	struct sockaddr		sa;
67 	struct sockaddr_dl	sdl;
68 	struct sockaddr_in	sin;
69 #ifdef INET6
70 	struct sockaddr_in6	sin6;
71 #endif
72 };
73 typedef union sockunion sockunion_t;
74 #define __SOCKUNION_DECLARED
75 #endif /* __SOCKUNION_DECLARED */
76 
77 static MALLOC_DEFINE(M_IPMADDR, "in_multi", "IPv4 multicast group");
78 static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "IPv4 multicast options");
79 static MALLOC_DEFINE(M_IPMSOURCE, "in_msource", "IPv4 multicast source filter");
80 
81 /*
82  * The IPv4 multicast list (in_multihead and associated structures) are
83  * protected by the global in_multi_mtx.  See in_var.h for more details.  For
84  * now, in_multi_mtx is marked as recursible due to IGMP's calling back into
85  * ip_output() to send IGMP packets while holding the lock; this probably is
86  * not quite desirable.
87  */
88 struct in_multihead in_multihead;	/* XXX BSS initialization */
89 struct mtx in_multi_mtx;
90 MTX_SYSINIT(in_multi_mtx, &in_multi_mtx, "in_multi_mtx", MTX_DEF | MTX_RECURSE);
91 
92 /*
93  * Functions with non-static linkage defined in this file should be
94  * declared in in_var.h:
95  *  imo_match_group()
96  *  imo_match_source()
97  *  in_addmulti()
98  *  in_delmulti()
99  *  in_delmulti_locked()
100  * and ip_var.h:
101  *  inp_freemoptions()
102  *  inp_getmoptions()
103  *  inp_setmoptions()
104  */
105 static int	imo_grow(struct ip_moptions *);
106 static int	imo_join_source(struct ip_moptions *, size_t, sockunion_t *);
107 static int	imo_leave_source(struct ip_moptions *, size_t, sockunion_t *);
108 static int	inp_change_source_filter(struct inpcb *, struct sockopt *);
109 static struct ip_moptions *
110 		inp_findmoptions(struct inpcb *);
111 static int	inp_get_source_filters(struct inpcb *, struct sockopt *);
112 static int	inp_join_group(struct inpcb *, struct sockopt *);
113 static int	inp_leave_group(struct inpcb *, struct sockopt *);
114 static int	inp_set_multicast_if(struct inpcb *, struct sockopt *);
115 static int	inp_set_source_filters(struct inpcb *, struct sockopt *);
116 
117 /*
118  * Resize the ip_moptions vector to the next power-of-two minus 1.
119  * May be called with locks held; do not sleep.
120  */
121 static int
122 imo_grow(struct ip_moptions *imo)
123 {
124 	struct in_multi		**nmships;
125 	struct in_multi		**omships;
126 	struct in_mfilter	 *nmfilters;
127 	struct in_mfilter	 *omfilters;
128 	size_t			  idx;
129 	size_t			  newmax;
130 	size_t			  oldmax;
131 
132 	nmships = NULL;
133 	nmfilters = NULL;
134 	omships = imo->imo_membership;
135 	omfilters = imo->imo_mfilters;
136 	oldmax = imo->imo_max_memberships;
137 	newmax = ((oldmax + 1) * 2) - 1;
138 
139 	if (newmax <= IP_MAX_MEMBERSHIPS) {
140 		nmships = (struct in_multi **)realloc(omships,
141 		    sizeof(struct in_multi *) * newmax, M_IPMOPTS, M_NOWAIT);
142 		nmfilters = (struct in_mfilter *)realloc(omfilters,
143 		    sizeof(struct in_mfilter) * newmax, M_IPMSOURCE, M_NOWAIT);
144 		if (nmships != NULL && nmfilters != NULL) {
145 			/* Initialize newly allocated source filter heads. */
146 			for (idx = oldmax; idx < newmax; idx++) {
147 				nmfilters[idx].imf_fmode = MCAST_EXCLUDE;
148 				nmfilters[idx].imf_nsources = 0;
149 				TAILQ_INIT(&nmfilters[idx].imf_sources);
150 			}
151 			imo->imo_max_memberships = newmax;
152 			imo->imo_membership = nmships;
153 			imo->imo_mfilters = nmfilters;
154 		}
155 	}
156 
157 	if (nmships == NULL || nmfilters == NULL) {
158 		if (nmships != NULL)
159 			free(nmships, M_IPMOPTS);
160 		if (nmfilters != NULL)
161 			free(nmfilters, M_IPMSOURCE);
162 		return (ETOOMANYREFS);
163 	}
164 
165 	return (0);
166 }
167 
168 /*
169  * Add a source to a multicast filter list.
170  * Assumes the associated inpcb is locked.
171  */
172 static int
173 imo_join_source(struct ip_moptions *imo, size_t gidx, sockunion_t *src)
174 {
175 	struct in_msource	*ims, *nims;
176 	struct in_mfilter	*imf;
177 
178 	KASSERT(src->ss.ss_family == AF_INET, ("%s: !AF_INET", __func__));
179 	KASSERT(imo->imo_mfilters != NULL,
180 	    ("%s: imo_mfilters vector not allocated", __func__));
181 
182 	imf = &imo->imo_mfilters[gidx];
183 	if (imf->imf_nsources == IP_MAX_SOURCE_FILTER)
184 		return (ENOBUFS);
185 
186 	ims = imo_match_source(imo, gidx, &src->sa);
187 	if (ims != NULL)
188 		return (EADDRNOTAVAIL);
189 
190 	/* Do not sleep with inp lock held. */
191 	MALLOC(nims, struct in_msource *, sizeof(struct in_msource),
192 	    M_IPMSOURCE, M_NOWAIT | M_ZERO);
193 	if (nims == NULL)
194 		return (ENOBUFS);
195 
196 	nims->ims_addr = src->ss;
197 	TAILQ_INSERT_TAIL(&imf->imf_sources, nims, ims_next);
198 	imf->imf_nsources++;
199 
200 	return (0);
201 }
202 
203 static int
204 imo_leave_source(struct ip_moptions *imo, size_t gidx, sockunion_t *src)
205 {
206 	struct in_msource	*ims;
207 	struct in_mfilter	*imf;
208 
209 	KASSERT(src->ss.ss_family == AF_INET, ("%s: !AF_INET", __func__));
210 	KASSERT(imo->imo_mfilters != NULL,
211 	    ("%s: imo_mfilters vector not allocated", __func__));
212 
213 	imf = &imo->imo_mfilters[gidx];
214 	if (imf->imf_nsources == IP_MAX_SOURCE_FILTER)
215 		return (ENOBUFS);
216 
217 	ims = imo_match_source(imo, gidx, &src->sa);
218 	if (ims == NULL)
219 		return (EADDRNOTAVAIL);
220 
221 	TAILQ_REMOVE(&imf->imf_sources, ims, ims_next);
222 	FREE(ims, M_IPMSOURCE);
223 	imf->imf_nsources--;
224 
225 	return (0);
226 }
227 
228 /*
229  * Find an IPv4 multicast group entry for this ip_moptions instance
230  * which matches the specified group, and optionally an interface.
231  * Return its index into the array, or -1 if not found.
232  */
233 size_t
234 imo_match_group(struct ip_moptions *imo, struct ifnet *ifp,
235     struct sockaddr *group)
236 {
237 	sockunion_t	 *gsa;
238 	struct in_multi	**pinm;
239 	int		  idx;
240 	int		  nmships;
241 
242 	gsa = (sockunion_t *)group;
243 
244 	/* The imo_membership array may be lazy allocated. */
245 	if (imo->imo_membership == NULL || imo->imo_num_memberships == 0)
246 		return (-1);
247 
248 	nmships = imo->imo_num_memberships;
249 	pinm = &imo->imo_membership[0];
250 	for (idx = 0; idx < nmships; idx++, pinm++) {
251 		if (*pinm == NULL)
252 			continue;
253 #if 0
254 		printf("%s: trying ifp = %p, inaddr = %s ", __func__,
255 		    ifp, inet_ntoa(gsa->sin.sin_addr));
256 		printf("against %p, %s\n",
257 		    (*pinm)->inm_ifp, inet_ntoa((*pinm)->inm_addr));
258 #endif
259 		if ((ifp == NULL || ((*pinm)->inm_ifp == ifp)) &&
260 		    (*pinm)->inm_addr.s_addr == gsa->sin.sin_addr.s_addr) {
261 			break;
262 		}
263 	}
264 	if (idx >= nmships)
265 		idx = -1;
266 
267 	return (idx);
268 }
269 
270 /*
271  * Find a multicast source entry for this imo which matches
272  * the given group index for this socket, and source address.
273  */
274 struct in_msource *
275 imo_match_source(struct ip_moptions *imo, size_t gidx, struct sockaddr *src)
276 {
277 	struct in_mfilter	*imf;
278 	struct in_msource	*ims, *pims;
279 
280 	KASSERT(src->sa_family == AF_INET, ("%s: !AF_INET", __func__));
281 	KASSERT(gidx != -1 && gidx < imo->imo_num_memberships,
282 	    ("%s: invalid index %d\n", __func__, (int)gidx));
283 
284 	/* The imo_mfilters array may be lazy allocated. */
285 	if (imo->imo_mfilters == NULL)
286 		return (NULL);
287 
288 	pims = NULL;
289 	imf = &imo->imo_mfilters[gidx];
290 	TAILQ_FOREACH(ims, &imf->imf_sources, ims_next) {
291 		/*
292 		 * Perform bitwise comparison of two IPv4 addresses.
293 		 * TODO: Do the same for IPv6.
294 		 * Do not use sa_equal() for this as it is not aware of
295 		 * deeper structure in sockaddr_in or sockaddr_in6.
296 		 */
297 		if (((struct sockaddr_in *)&ims->ims_addr)->sin_addr.s_addr ==
298 		    ((struct sockaddr_in *)src)->sin_addr.s_addr) {
299 			pims = ims;
300 			break;
301 		}
302 	}
303 
304 	return (pims);
305 }
306 
307 /*
308  * Join an IPv4 multicast group.
309  */
310 struct in_multi *
311 in_addmulti(struct in_addr *ap, struct ifnet *ifp)
312 {
313 	struct in_multi *inm;
314 
315 	inm = NULL;
316 
317 	IFF_LOCKGIANT(ifp);
318 	IN_MULTI_LOCK();
319 
320 	IN_LOOKUP_MULTI(*ap, ifp, inm);
321 	if (inm != NULL) {
322 		/*
323 		 * If we already joined this group, just bump the
324 		 * refcount and return it.
325 		 */
326 		KASSERT(inm->inm_refcount >= 1,
327 		    ("%s: bad refcount %d", __func__, inm->inm_refcount));
328 		++inm->inm_refcount;
329 	} else do {
330 		sockunion_t		 gsa;
331 		struct ifmultiaddr	*ifma;
332 		struct in_multi		*ninm;
333 		int			 error;
334 
335 		memset(&gsa, 0, sizeof(gsa));
336 		gsa.sin.sin_family = AF_INET;
337 		gsa.sin.sin_len = sizeof(struct sockaddr_in);
338 		gsa.sin.sin_addr = *ap;
339 
340 		/*
341 		 * Check if a link-layer group is already associated
342 		 * with this network-layer group on the given ifnet.
343 		 * If so, bump the refcount on the existing network-layer
344 		 * group association and return it.
345 		 */
346 		error = if_addmulti(ifp, &gsa.sa, &ifma);
347 		if (error)
348 			break;
349 		if (ifma->ifma_protospec != NULL) {
350 			inm = (struct in_multi *)ifma->ifma_protospec;
351 #ifdef INVARIANTS
352 			if (inm->inm_ifma != ifma || inm->inm_ifp != ifp ||
353 			    inm->inm_addr.s_addr != ap->s_addr)
354 				panic("%s: ifma is inconsistent", __func__);
355 #endif
356 			++inm->inm_refcount;
357 			break;
358 		}
359 
360 		/*
361 		 * A new membership is needed; construct it and
362 		 * perform the IGMP join.
363 		 */
364 		ninm = malloc(sizeof(*ninm), M_IPMADDR, M_NOWAIT | M_ZERO);
365 		if (ninm == NULL) {
366 			if_delmulti_ifma(ifma);
367 			break;
368 		}
369 		ninm->inm_addr = *ap;
370 		ninm->inm_ifp = ifp;
371 		ninm->inm_ifma = ifma;
372 		ninm->inm_refcount = 1;
373 		ifma->ifma_protospec = ninm;
374 		LIST_INSERT_HEAD(&in_multihead, ninm, inm_link);
375 
376 		igmp_joingroup(ninm);
377 
378 		inm = ninm;
379 	} while (0);
380 
381 	IN_MULTI_UNLOCK();
382 	IFF_UNLOCKGIANT(ifp);
383 
384 	return (inm);
385 }
386 
387 /*
388  * Leave an IPv4 multicast group.
389  * It is OK to call this routine if the underlying ifnet went away.
390  *
391  * XXX: To deal with the ifp going away, we cheat; the link-layer code in net
392  * will set ifma_ifp to NULL when the associated ifnet instance is detached
393  * from the system.
394  *
395  * The only reason we need to violate layers and check ifma_ifp here at all
396  * is because certain hardware drivers still require Giant to be held,
397  * and it must always be taken before other locks.
398  */
399 void
400 in_delmulti(struct in_multi *inm)
401 {
402 	struct ifnet *ifp;
403 
404 	KASSERT(inm != NULL, ("%s: inm is NULL", __func__));
405 	KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__));
406 	ifp = inm->inm_ifma->ifma_ifp;
407 
408 	if (ifp != NULL) {
409 		/*
410 		 * Sanity check that netinet's notion of ifp is the
411 		 * same as net's.
412 		 */
413 		KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__));
414 		IFF_LOCKGIANT(ifp);
415 	}
416 
417 	IN_MULTI_LOCK();
418 	in_delmulti_locked(inm);
419 	IN_MULTI_UNLOCK();
420 
421 	if (ifp != NULL)
422 		IFF_UNLOCKGIANT(ifp);
423 }
424 
425 /*
426  * Delete a multicast address record, with locks held.
427  *
428  * It is OK to call this routine if the ifp went away.
429  * Assumes that caller holds the IN_MULTI lock, and that
430  * Giant was taken before other locks if required by the hardware.
431  */
432 void
433 in_delmulti_locked(struct in_multi *inm)
434 {
435 	struct ifmultiaddr *ifma;
436 
437 	IN_MULTI_LOCK_ASSERT();
438 	KASSERT(inm->inm_refcount >= 1, ("%s: freeing freed inm", __func__));
439 
440 	if (--inm->inm_refcount == 0) {
441 		igmp_leavegroup(inm);
442 
443 		ifma = inm->inm_ifma;
444 #ifdef DIAGNOSTIC
445 		if (bootverbose)
446 			printf("%s: purging ifma %p\n", __func__, ifma);
447 #endif
448 		KASSERT(ifma->ifma_protospec == inm,
449 		    ("%s: ifma_protospec != inm", __func__));
450 		ifma->ifma_protospec = NULL;
451 
452 		LIST_REMOVE(inm, inm_link);
453 		free(inm, M_IPMADDR);
454 
455 		if_delmulti_ifma(ifma);
456 	}
457 }
458 
459 /*
460  * Block or unblock an ASM/SSM multicast source on an inpcb.
461  */
462 static int
463 inp_change_source_filter(struct inpcb *inp, struct sockopt *sopt)
464 {
465 	struct group_source_req		 gsr;
466 	sockunion_t			*gsa, *ssa;
467 	struct ifnet			*ifp;
468 	struct in_mfilter		*imf;
469 	struct ip_moptions		*imo;
470 	struct in_msource		*ims;
471 	size_t				 idx;
472 	int				 error;
473 	int				 block;
474 
475 	ifp = NULL;
476 	error = 0;
477 	block = 0;
478 
479 	memset(&gsr, 0, sizeof(struct group_source_req));
480 	gsa = (sockunion_t *)&gsr.gsr_group;
481 	ssa = (sockunion_t *)&gsr.gsr_source;
482 
483 	switch (sopt->sopt_name) {
484 	case IP_BLOCK_SOURCE:
485 	case IP_UNBLOCK_SOURCE: {
486 		struct ip_mreq_source	 mreqs;
487 
488 		error = sooptcopyin(sopt, &mreqs,
489 		    sizeof(struct ip_mreq_source),
490 		    sizeof(struct ip_mreq_source));
491 		if (error)
492 			return (error);
493 
494 		gsa->sin.sin_family = AF_INET;
495 		gsa->sin.sin_len = sizeof(struct sockaddr_in);
496 		gsa->sin.sin_addr = mreqs.imr_multiaddr;
497 
498 		ssa->sin.sin_family = AF_INET;
499 		ssa->sin.sin_len = sizeof(struct sockaddr_in);
500 		ssa->sin.sin_addr = mreqs.imr_sourceaddr;
501 
502 		if (mreqs.imr_interface.s_addr != INADDR_ANY)
503 			INADDR_TO_IFP(mreqs.imr_interface, ifp);
504 
505 		if (sopt->sopt_name == IP_BLOCK_SOURCE)
506 			block = 1;
507 
508 #ifdef DIAGNOSTIC
509 		if (bootverbose) {
510 			printf("%s: imr_interface = %s, ifp = %p\n",
511 			    __func__, inet_ntoa(mreqs.imr_interface), ifp);
512 		}
513 #endif
514 		break;
515 	    }
516 
517 	case MCAST_BLOCK_SOURCE:
518 	case MCAST_UNBLOCK_SOURCE:
519 		error = sooptcopyin(sopt, &gsr,
520 		    sizeof(struct group_source_req),
521 		    sizeof(struct group_source_req));
522 		if (error)
523 			return (error);
524 
525 		if (gsa->sin.sin_family != AF_INET ||
526 		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
527 			return (EINVAL);
528 
529 		if (ssa->sin.sin_family != AF_INET ||
530 		    ssa->sin.sin_len != sizeof(struct sockaddr_in))
531 			return (EINVAL);
532 
533 		if (gsr.gsr_interface == 0 || if_index < gsr.gsr_interface)
534 			return (EADDRNOTAVAIL);
535 
536 		ifp = ifnet_byindex(gsr.gsr_interface);
537 
538 		if (sopt->sopt_name == MCAST_BLOCK_SOURCE)
539 			block = 1;
540 		break;
541 
542 	default:
543 #ifdef DIAGNOSTIC
544 		if (bootverbose) {
545 			printf("%s: unknown sopt_name %d\n", __func__,
546 			    sopt->sopt_name);
547 		}
548 #endif
549 		return (EOPNOTSUPP);
550 		break;
551 	}
552 
553 	/* XXX INET6 */
554 	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
555 		return (EINVAL);
556 
557 	/*
558 	 * Check if we are actually a member of this group.
559 	 */
560 	imo = inp_findmoptions(inp);
561 	idx = imo_match_group(imo, ifp, &gsa->sa);
562 	if (idx == -1 || imo->imo_mfilters == NULL) {
563 		error = EADDRNOTAVAIL;
564 		goto out_locked;
565 	}
566 
567 	KASSERT(imo->imo_mfilters != NULL,
568 	    ("%s: imo_mfilters not allocated", __func__));
569 	imf = &imo->imo_mfilters[idx];
570 
571 	/*
572 	 * SSM multicast truth table for block/unblock operations.
573 	 *
574 	 * Operation   Filter Mode  Entry exists?   Action
575 	 *
576 	 * block       exclude      no              add source to filter
577 	 * unblock     include      no              add source to filter
578 	 * block       include      no              EINVAL
579 	 * unblock     exclude      no              EINVAL
580 	 * block       exclude      yes             EADDRNOTAVAIL
581 	 * unblock     include      yes             EADDRNOTAVAIL
582 	 * block       include      yes             remove source from filter
583 	 * unblock     exclude      yes             remove source from filter
584 	 *
585 	 * FreeBSD does not explicitly distinguish between ASM and SSM
586 	 * mode sockets; all sockets are assumed to have a filter list.
587 	 */
588 #ifdef DIAGNOSTIC
589 	if (bootverbose) {
590 		printf("%s: imf_fmode is %s\n", __func__,
591 		    imf->imf_fmode == MCAST_INCLUDE ? "include" : "exclude");
592 	}
593 #endif
594 	ims = imo_match_source(imo, idx, &ssa->sa);
595 	if (ims == NULL) {
596 		if ((block == 1 && imf->imf_fmode == MCAST_EXCLUDE) ||
597 		    (block == 0 && imf->imf_fmode == MCAST_INCLUDE)) {
598 #ifdef DIAGNOSTIC
599 			if (bootverbose) {
600 				printf("%s: adding %s to filter list\n",
601 				    __func__, inet_ntoa(ssa->sin.sin_addr));
602 			}
603 #endif
604 			error = imo_join_source(imo, idx, ssa);
605 		}
606 		if ((block == 1 && imf->imf_fmode == MCAST_INCLUDE) ||
607 		    (block == 0 && imf->imf_fmode == MCAST_EXCLUDE)) {
608 			/*
609 			 * If the socket is in inclusive mode:
610 			 *  the source is already blocked as it has no entry.
611 			 * If the socket is in exclusive mode:
612 			 *  the source is already unblocked as it has no entry.
613 			 */
614 #ifdef DIAGNOSTIC
615 			if (bootverbose) {
616 				printf("%s: ims %p; %s already [un]blocked\n",
617 				    __func__, ims,
618 				    inet_ntoa(ssa->sin.sin_addr));
619 			}
620 #endif
621 			error = EINVAL;
622 		}
623 	} else {
624 		if ((block == 1 && imf->imf_fmode == MCAST_EXCLUDE) ||
625 		    (block == 0 && imf->imf_fmode == MCAST_INCLUDE)) {
626 			/*
627 			 * If the socket is in exclusive mode:
628 			 *  the source is already blocked as it has an entry.
629 			 * If the socket is in inclusive mode:
630 			 *  the source is already unblocked as it has an entry.
631 			 */
632 #ifdef DIAGNOSTIC
633 			if (bootverbose) {
634 				printf("%s: ims %p; %s already [un]blocked\n",
635 				    __func__, ims,
636 				    inet_ntoa(ssa->sin.sin_addr));
637 			}
638 #endif
639 			error = EADDRNOTAVAIL;
640 		}
641 		if ((block == 1 && imf->imf_fmode == MCAST_INCLUDE) ||
642 		    (block == 0 && imf->imf_fmode == MCAST_EXCLUDE)) {
643 #ifdef DIAGNOSTIC
644 			if (bootverbose) {
645 				printf("%s: removing %s from filter list\n",
646 				    __func__, inet_ntoa(ssa->sin.sin_addr));
647 			}
648 #endif
649 			error = imo_leave_source(imo, idx, ssa);
650 		}
651 	}
652 
653 out_locked:
654 	INP_WUNLOCK(inp);
655 	return (error);
656 }
657 
658 /*
659  * Given an inpcb, return its multicast options structure pointer.  Accepts
660  * an unlocked inpcb pointer, but will return it locked.  May sleep.
661  */
662 static struct ip_moptions *
663 inp_findmoptions(struct inpcb *inp)
664 {
665 	struct ip_moptions	 *imo;
666 	struct in_multi		**immp;
667 	struct in_mfilter	 *imfp;
668 	size_t			  idx;
669 
670 	INP_WLOCK(inp);
671 	if (inp->inp_moptions != NULL)
672 		return (inp->inp_moptions);
673 
674 	INP_WUNLOCK(inp);
675 
676 	imo = (struct ip_moptions *)malloc(sizeof(*imo), M_IPMOPTS,
677 	    M_WAITOK);
678 	immp = (struct in_multi **)malloc(sizeof(*immp) * IP_MIN_MEMBERSHIPS,
679 	    M_IPMOPTS, M_WAITOK | M_ZERO);
680 	imfp = (struct in_mfilter *)malloc(
681 	    sizeof(struct in_mfilter) * IP_MIN_MEMBERSHIPS,
682 	    M_IPMSOURCE, M_WAITOK);
683 
684 	imo->imo_multicast_ifp = NULL;
685 	imo->imo_multicast_addr.s_addr = INADDR_ANY;
686 	imo->imo_multicast_vif = -1;
687 	imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
688 	imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
689 	imo->imo_num_memberships = 0;
690 	imo->imo_max_memberships = IP_MIN_MEMBERSHIPS;
691 	imo->imo_membership = immp;
692 
693 	/* Initialize per-group source filters. */
694 	for (idx = 0; idx < IP_MIN_MEMBERSHIPS; idx++) {
695 		imfp[idx].imf_fmode = MCAST_EXCLUDE;
696 		imfp[idx].imf_nsources = 0;
697 		TAILQ_INIT(&imfp[idx].imf_sources);
698 	}
699 	imo->imo_mfilters = imfp;
700 
701 	INP_WLOCK(inp);
702 	if (inp->inp_moptions != NULL) {
703 		free(imfp, M_IPMSOURCE);
704 		free(immp, M_IPMOPTS);
705 		free(imo, M_IPMOPTS);
706 		return (inp->inp_moptions);
707 	}
708 	inp->inp_moptions = imo;
709 	return (imo);
710 }
711 
712 /*
713  * Discard the IP multicast options (and source filters).
714  */
715 void
716 inp_freemoptions(struct ip_moptions *imo)
717 {
718 	struct in_mfilter	*imf;
719 	struct in_msource	*ims, *tims;
720 	size_t			 idx, nmships;
721 
722 	KASSERT(imo != NULL, ("%s: ip_moptions is NULL", __func__));
723 
724 	nmships = imo->imo_num_memberships;
725 	for (idx = 0; idx < nmships; ++idx) {
726 		in_delmulti(imo->imo_membership[idx]);
727 
728 		if (imo->imo_mfilters != NULL) {
729 			imf = &imo->imo_mfilters[idx];
730 			TAILQ_FOREACH_SAFE(ims, &imf->imf_sources,
731 			    ims_next, tims) {
732 				TAILQ_REMOVE(&imf->imf_sources, ims, ims_next);
733 				FREE(ims, M_IPMSOURCE);
734 				imf->imf_nsources--;
735 			}
736 			KASSERT(imf->imf_nsources == 0,
737 			    ("%s: did not free all imf_nsources", __func__));
738 		}
739 	}
740 
741 	if (imo->imo_mfilters != NULL)
742 		free(imo->imo_mfilters, M_IPMSOURCE);
743 	free(imo->imo_membership, M_IPMOPTS);
744 	free(imo, M_IPMOPTS);
745 }
746 
747 /*
748  * Atomically get source filters on a socket for an IPv4 multicast group.
749  * Called with INP lock held; returns with lock released.
750  */
751 static int
752 inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt)
753 {
754 	struct __msfilterreq	 msfr;
755 	sockunion_t		*gsa;
756 	struct ifnet		*ifp;
757 	struct ip_moptions	*imo;
758 	struct in_mfilter	*imf;
759 	struct in_msource	*ims;
760 	struct sockaddr_storage	*ptss;
761 	struct sockaddr_storage	*tss;
762 	int			 error;
763 	size_t			 idx;
764 
765 	INP_WLOCK_ASSERT(inp);
766 
767 	imo = inp->inp_moptions;
768 	KASSERT(imo != NULL, ("%s: null ip_moptions", __func__));
769 
770 	INP_WUNLOCK(inp);
771 
772 	error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq),
773 	    sizeof(struct __msfilterreq));
774 	if (error)
775 		return (error);
776 
777 	if (msfr.msfr_ifindex == 0 || if_index < msfr.msfr_ifindex)
778 		return (EINVAL);
779 
780 	ifp = ifnet_byindex(msfr.msfr_ifindex);
781 	if (ifp == NULL)
782 		return (EINVAL);
783 
784 	INP_WLOCK(inp);
785 
786 	/*
787 	 * Lookup group on the socket.
788 	 */
789 	gsa = (sockunion_t *)&msfr.msfr_group;
790 	idx = imo_match_group(imo, ifp, &gsa->sa);
791 	if (idx == -1 || imo->imo_mfilters == NULL) {
792 		INP_WUNLOCK(inp);
793 		return (EADDRNOTAVAIL);
794 	}
795 
796 	imf = &imo->imo_mfilters[idx];
797 	msfr.msfr_fmode = imf->imf_fmode;
798 	msfr.msfr_nsrcs = imf->imf_nsources;
799 
800 	/*
801 	 * If the user specified a buffer, copy out the source filter
802 	 * entries to userland gracefully.
803 	 * msfr.msfr_nsrcs is always set to the total number of filter
804 	 * entries which the kernel currently has for this group.
805 	 */
806 	tss = NULL;
807 	if (msfr.msfr_srcs != NULL && msfr.msfr_nsrcs > 0) {
808 		/*
809 		 * Make a copy of the source vector so that we do not
810 		 * thrash the inpcb lock whilst copying it out.
811 		 * We only copy out the number of entries which userland
812 		 * has asked for, but we always tell userland how big the
813 		 * buffer really needs to be.
814 		 */
815 		MALLOC(tss, struct sockaddr_storage *,
816 		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
817 		    M_TEMP, M_NOWAIT);
818 		if (tss == NULL) {
819 			error = ENOBUFS;
820 		} else {
821 			ptss = tss;
822 			TAILQ_FOREACH(ims, &imf->imf_sources, ims_next) {
823 				memcpy(ptss++, &ims->ims_addr,
824 				    sizeof(struct sockaddr_storage));
825 			}
826 		}
827 	}
828 
829 	INP_WUNLOCK(inp);
830 
831 	if (tss != NULL) {
832 		error = copyout(tss, msfr.msfr_srcs,
833 		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
834 		FREE(tss, M_TEMP);
835 	}
836 
837 	if (error)
838 		return (error);
839 
840 	error = sooptcopyout(sopt, &msfr, sizeof(struct __msfilterreq));
841 
842 	return (error);
843 }
844 
845 /*
846  * Return the IP multicast options in response to user getsockopt().
847  */
848 int
849 inp_getmoptions(struct inpcb *inp, struct sockopt *sopt)
850 {
851 	struct ip_mreqn		 mreqn;
852 	struct ip_moptions	*imo;
853 	struct ifnet		*ifp;
854 	struct in_ifaddr	*ia;
855 	int			 error, optval;
856 	u_char			 coptval;
857 
858 	INP_WLOCK(inp);
859 	imo = inp->inp_moptions;
860 	/*
861 	 * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
862 	 * or is a divert socket, reject it.
863 	 */
864 	if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
865 	    (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
866 	    inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) {
867 		INP_WUNLOCK(inp);
868 		return (EOPNOTSUPP);
869 	}
870 
871 	error = 0;
872 	switch (sopt->sopt_name) {
873 	case IP_MULTICAST_VIF:
874 		if (imo != NULL)
875 			optval = imo->imo_multicast_vif;
876 		else
877 			optval = -1;
878 		INP_WUNLOCK(inp);
879 		error = sooptcopyout(sopt, &optval, sizeof(int));
880 		break;
881 
882 	case IP_MULTICAST_IF:
883 		memset(&mreqn, 0, sizeof(struct ip_mreqn));
884 		if (imo != NULL) {
885 			ifp = imo->imo_multicast_ifp;
886 			if (imo->imo_multicast_addr.s_addr != INADDR_ANY) {
887 				mreqn.imr_address = imo->imo_multicast_addr;
888 			} else if (ifp != NULL) {
889 				mreqn.imr_ifindex = ifp->if_index;
890 				IFP_TO_IA(ifp, ia);
891 				if (ia != NULL) {
892 					mreqn.imr_address =
893 					    IA_SIN(ia)->sin_addr;
894 				}
895 			}
896 		}
897 		INP_WUNLOCK(inp);
898 		if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) {
899 			error = sooptcopyout(sopt, &mreqn,
900 			    sizeof(struct ip_mreqn));
901 		} else {
902 			error = sooptcopyout(sopt, &mreqn.imr_address,
903 			    sizeof(struct in_addr));
904 		}
905 		break;
906 
907 	case IP_MULTICAST_TTL:
908 		if (imo == 0)
909 			optval = coptval = IP_DEFAULT_MULTICAST_TTL;
910 		else
911 			optval = coptval = imo->imo_multicast_ttl;
912 		INP_WUNLOCK(inp);
913 		if (sopt->sopt_valsize == sizeof(u_char))
914 			error = sooptcopyout(sopt, &coptval, sizeof(u_char));
915 		else
916 			error = sooptcopyout(sopt, &optval, sizeof(int));
917 		break;
918 
919 	case IP_MULTICAST_LOOP:
920 		if (imo == 0)
921 			optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
922 		else
923 			optval = coptval = imo->imo_multicast_loop;
924 		INP_WUNLOCK(inp);
925 		if (sopt->sopt_valsize == sizeof(u_char))
926 			error = sooptcopyout(sopt, &coptval, sizeof(u_char));
927 		else
928 			error = sooptcopyout(sopt, &optval, sizeof(int));
929 		break;
930 
931 	case IP_MSFILTER:
932 		if (imo == NULL) {
933 			error = EADDRNOTAVAIL;
934 			INP_WUNLOCK(inp);
935 		} else {
936 			error = inp_get_source_filters(inp, sopt);
937 		}
938 		break;
939 
940 	default:
941 		INP_WUNLOCK(inp);
942 		error = ENOPROTOOPT;
943 		break;
944 	}
945 
946 	INP_UNLOCK_ASSERT(inp);
947 
948 	return (error);
949 }
950 
951 /*
952  * Join an IPv4 multicast group, possibly with a source.
953  */
954 static int
955 inp_join_group(struct inpcb *inp, struct sockopt *sopt)
956 {
957 	struct group_source_req		 gsr;
958 	sockunion_t			*gsa, *ssa;
959 	struct ifnet			*ifp;
960 	struct in_mfilter		*imf;
961 	struct ip_moptions		*imo;
962 	struct in_multi			*inm;
963 	size_t				 idx;
964 	int				 error;
965 
966 	ifp = NULL;
967 	error = 0;
968 
969 	memset(&gsr, 0, sizeof(struct group_source_req));
970 	gsa = (sockunion_t *)&gsr.gsr_group;
971 	gsa->ss.ss_family = AF_UNSPEC;
972 	ssa = (sockunion_t *)&gsr.gsr_source;
973 	ssa->ss.ss_family = AF_UNSPEC;
974 
975 	switch (sopt->sopt_name) {
976 	case IP_ADD_MEMBERSHIP:
977 	case IP_ADD_SOURCE_MEMBERSHIP: {
978 		struct ip_mreq_source	 mreqs;
979 
980 		if (sopt->sopt_name == IP_ADD_MEMBERSHIP) {
981 			error = sooptcopyin(sopt, &mreqs,
982 			    sizeof(struct ip_mreq),
983 			    sizeof(struct ip_mreq));
984 			/*
985 			 * Do argument switcharoo from ip_mreq into
986 			 * ip_mreq_source to avoid using two instances.
987 			 */
988 			mreqs.imr_interface = mreqs.imr_sourceaddr;
989 			mreqs.imr_sourceaddr.s_addr = INADDR_ANY;
990 		} else if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) {
991 			error = sooptcopyin(sopt, &mreqs,
992 			    sizeof(struct ip_mreq_source),
993 			    sizeof(struct ip_mreq_source));
994 		}
995 		if (error)
996 			return (error);
997 
998 		gsa->sin.sin_family = AF_INET;
999 		gsa->sin.sin_len = sizeof(struct sockaddr_in);
1000 		gsa->sin.sin_addr = mreqs.imr_multiaddr;
1001 
1002 		if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) {
1003 			ssa->sin.sin_family = AF_INET;
1004 			ssa->sin.sin_len = sizeof(struct sockaddr_in);
1005 			ssa->sin.sin_addr = mreqs.imr_sourceaddr;
1006 		}
1007 
1008 		/*
1009 		 * Obtain ifp. If no interface address was provided,
1010 		 * use the interface of the route in the unicast FIB for
1011 		 * the given multicast destination; usually, this is the
1012 		 * default route.
1013 		 * If this lookup fails, attempt to use the first non-loopback
1014 		 * interface with multicast capability in the system as a
1015 		 * last resort. The legacy IPv4 ASM API requires that we do
1016 		 * this in order to allow groups to be joined when the routing
1017 		 * table has not yet been populated during boot.
1018 		 * If all of these conditions fail, return EADDRNOTAVAIL, and
1019 		 * reject the IPv4 multicast join.
1020 		 */
1021 		if (mreqs.imr_interface.s_addr != INADDR_ANY) {
1022 			INADDR_TO_IFP(mreqs.imr_interface, ifp);
1023 		} else {
1024 			struct route ro;
1025 
1026 			ro.ro_rt = NULL;
1027 			*(struct sockaddr_in *)&ro.ro_dst = gsa->sin;
1028 			in_rtalloc_ign(&ro, RTF_CLONING,
1029 			   inp->inp_inc.inc_fibnum);
1030 			if (ro.ro_rt != NULL) {
1031 				ifp = ro.ro_rt->rt_ifp;
1032 				KASSERT(ifp != NULL, ("%s: null ifp",
1033 				    __func__));
1034 				RTFREE(ro.ro_rt);
1035 			} else {
1036 				struct in_ifaddr *ia;
1037 				struct ifnet *mfp = NULL;
1038 				TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
1039 					mfp = ia->ia_ifp;
1040 					if (!(mfp->if_flags & IFF_LOOPBACK) &&
1041 					     (mfp->if_flags & IFF_MULTICAST)) {
1042 						ifp = mfp;
1043 						break;
1044 					}
1045 				}
1046 			}
1047 		}
1048 #ifdef DIAGNOSTIC
1049 		if (bootverbose) {
1050 			printf("%s: imr_interface = %s, ifp = %p\n",
1051 			    __func__, inet_ntoa(mreqs.imr_interface), ifp);
1052 		}
1053 #endif
1054 		break;
1055 	}
1056 
1057 	case MCAST_JOIN_GROUP:
1058 	case MCAST_JOIN_SOURCE_GROUP:
1059 		if (sopt->sopt_name == MCAST_JOIN_GROUP) {
1060 			error = sooptcopyin(sopt, &gsr,
1061 			    sizeof(struct group_req),
1062 			    sizeof(struct group_req));
1063 		} else if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
1064 			error = sooptcopyin(sopt, &gsr,
1065 			    sizeof(struct group_source_req),
1066 			    sizeof(struct group_source_req));
1067 		}
1068 		if (error)
1069 			return (error);
1070 
1071 		if (gsa->sin.sin_family != AF_INET ||
1072 		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
1073 			return (EINVAL);
1074 
1075 		/*
1076 		 * Overwrite the port field if present, as the sockaddr
1077 		 * being copied in may be matched with a binary comparison.
1078 		 * XXX INET6
1079 		 */
1080 		gsa->sin.sin_port = 0;
1081 		if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
1082 			if (ssa->sin.sin_family != AF_INET ||
1083 			    ssa->sin.sin_len != sizeof(struct sockaddr_in))
1084 				return (EINVAL);
1085 			ssa->sin.sin_port = 0;
1086 		}
1087 
1088 		/*
1089 		 * Obtain the ifp.
1090 		 */
1091 		if (gsr.gsr_interface == 0 || if_index < gsr.gsr_interface)
1092 			return (EADDRNOTAVAIL);
1093 		ifp = ifnet_byindex(gsr.gsr_interface);
1094 
1095 		break;
1096 
1097 	default:
1098 #ifdef DIAGNOSTIC
1099 		if (bootverbose) {
1100 			printf("%s: unknown sopt_name %d\n", __func__,
1101 			    sopt->sopt_name);
1102 		}
1103 #endif
1104 		return (EOPNOTSUPP);
1105 		break;
1106 	}
1107 
1108 	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
1109 		return (EINVAL);
1110 
1111 	if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0)
1112 		return (EADDRNOTAVAIL);
1113 
1114 	/*
1115 	 * Check if we already hold membership of this group for this inpcb.
1116 	 * If so, we do not need to perform the initial join.
1117 	 */
1118 	imo = inp_findmoptions(inp);
1119 	idx = imo_match_group(imo, ifp, &gsa->sa);
1120 	if (idx != -1) {
1121 		if (ssa->ss.ss_family != AF_UNSPEC) {
1122 			/*
1123 			 * Attempting to join an ASM group (when already
1124 			 * an ASM or SSM member) is an error.
1125 			 */
1126 			error = EADDRNOTAVAIL;
1127 		} else {
1128 			imf = &imo->imo_mfilters[idx];
1129 			if (imf->imf_nsources == 0) {
1130 				/*
1131 				 * Attempting to join an SSM group (when
1132 				 * already an ASM member) is an error.
1133 				 */
1134 				error = EINVAL;
1135 			} else {
1136 				/*
1137 				 * Attempting to join an SSM group (when
1138 				 * already an SSM member) means "add this
1139 				 * source to the inclusive filter list".
1140 				 */
1141 				error = imo_join_source(imo, idx, ssa);
1142 			}
1143 		}
1144 		goto out_locked;
1145 	}
1146 
1147 	/*
1148 	 * Call imo_grow() to reallocate the membership and source filter
1149 	 * vectors if they are full. If the size would exceed the hard limit,
1150 	 * then we know we've really run out of entries. We keep the INP
1151 	 * lock held to avoid introducing a race condition.
1152 	 */
1153 	if (imo->imo_num_memberships == imo->imo_max_memberships) {
1154 		error = imo_grow(imo);
1155 		if (error)
1156 			goto out_locked;
1157 	}
1158 
1159 	/*
1160 	 * So far, so good: perform the layer 3 join, layer 2 join,
1161 	 * and make an IGMP announcement if needed.
1162 	 */
1163 	inm = in_addmulti(&gsa->sin.sin_addr, ifp);
1164 	if (inm == NULL) {
1165 		error = ENOBUFS;
1166 		goto out_locked;
1167 	}
1168 	idx = imo->imo_num_memberships;
1169 	imo->imo_membership[idx] = inm;
1170 	imo->imo_num_memberships++;
1171 
1172 	KASSERT(imo->imo_mfilters != NULL,
1173 	    ("%s: imf_mfilters vector was not allocated", __func__));
1174 	imf = &imo->imo_mfilters[idx];
1175 	KASSERT(TAILQ_EMPTY(&imf->imf_sources),
1176 	    ("%s: imf_sources not empty", __func__));
1177 
1178 	/*
1179 	 * If this is a new SSM group join (i.e. a source was specified
1180 	 * with this group), add this source to the filter list.
1181 	 */
1182 	if (ssa->ss.ss_family != AF_UNSPEC) {
1183 		/*
1184 		 * An initial SSM join implies that this socket's membership
1185 		 * of the multicast group is now in inclusive mode.
1186 		 */
1187 		imf->imf_fmode = MCAST_INCLUDE;
1188 
1189 		error = imo_join_source(imo, idx, ssa);
1190 		if (error) {
1191 			/*
1192 			 * Drop inp lock before calling in_delmulti(),
1193 			 * to prevent a lock order reversal.
1194 			 */
1195 			--imo->imo_num_memberships;
1196 			INP_WUNLOCK(inp);
1197 			in_delmulti(inm);
1198 			return (error);
1199 		}
1200 	}
1201 
1202 out_locked:
1203 	INP_WUNLOCK(inp);
1204 	return (error);
1205 }
1206 
1207 /*
1208  * Leave an IPv4 multicast group on an inpcb, possibly with a source.
1209  */
1210 static int
1211 inp_leave_group(struct inpcb *inp, struct sockopt *sopt)
1212 {
1213 	struct group_source_req		 gsr;
1214 	struct ip_mreq_source		 mreqs;
1215 	sockunion_t			*gsa, *ssa;
1216 	struct ifnet			*ifp;
1217 	struct in_mfilter		*imf;
1218 	struct ip_moptions		*imo;
1219 	struct in_msource		*ims, *tims;
1220 	struct in_multi			*inm;
1221 	size_t				 idx;
1222 	int				 error;
1223 
1224 	ifp = NULL;
1225 	error = 0;
1226 
1227 	memset(&gsr, 0, sizeof(struct group_source_req));
1228 	gsa = (sockunion_t *)&gsr.gsr_group;
1229 	gsa->ss.ss_family = AF_UNSPEC;
1230 	ssa = (sockunion_t *)&gsr.gsr_source;
1231 	ssa->ss.ss_family = AF_UNSPEC;
1232 
1233 	switch (sopt->sopt_name) {
1234 	case IP_DROP_MEMBERSHIP:
1235 	case IP_DROP_SOURCE_MEMBERSHIP:
1236 		if (sopt->sopt_name == IP_DROP_MEMBERSHIP) {
1237 			error = sooptcopyin(sopt, &mreqs,
1238 			    sizeof(struct ip_mreq),
1239 			    sizeof(struct ip_mreq));
1240 			/*
1241 			 * Swap interface and sourceaddr arguments,
1242 			 * as ip_mreq and ip_mreq_source are laid
1243 			 * out differently.
1244 			 */
1245 			mreqs.imr_interface = mreqs.imr_sourceaddr;
1246 			mreqs.imr_sourceaddr.s_addr = INADDR_ANY;
1247 		} else if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) {
1248 			error = sooptcopyin(sopt, &mreqs,
1249 			    sizeof(struct ip_mreq_source),
1250 			    sizeof(struct ip_mreq_source));
1251 		}
1252 		if (error)
1253 			return (error);
1254 
1255 		gsa->sin.sin_family = AF_INET;
1256 		gsa->sin.sin_len = sizeof(struct sockaddr_in);
1257 		gsa->sin.sin_addr = mreqs.imr_multiaddr;
1258 
1259 		if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) {
1260 			ssa->sin.sin_family = AF_INET;
1261 			ssa->sin.sin_len = sizeof(struct sockaddr_in);
1262 			ssa->sin.sin_addr = mreqs.imr_sourceaddr;
1263 		}
1264 
1265 		if (gsa->sin.sin_addr.s_addr != INADDR_ANY)
1266 			INADDR_TO_IFP(mreqs.imr_interface, ifp);
1267 
1268 #ifdef DIAGNOSTIC
1269 		if (bootverbose) {
1270 			printf("%s: imr_interface = %s, ifp = %p\n",
1271 			    __func__, inet_ntoa(mreqs.imr_interface), ifp);
1272 		}
1273 #endif
1274 		break;
1275 
1276 	case MCAST_LEAVE_GROUP:
1277 	case MCAST_LEAVE_SOURCE_GROUP:
1278 		if (sopt->sopt_name == MCAST_LEAVE_GROUP) {
1279 			error = sooptcopyin(sopt, &gsr,
1280 			    sizeof(struct group_req),
1281 			    sizeof(struct group_req));
1282 		} else if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
1283 			error = sooptcopyin(sopt, &gsr,
1284 			    sizeof(struct group_source_req),
1285 			    sizeof(struct group_source_req));
1286 		}
1287 		if (error)
1288 			return (error);
1289 
1290 		if (gsa->sin.sin_family != AF_INET ||
1291 		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
1292 			return (EINVAL);
1293 
1294 		if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
1295 			if (ssa->sin.sin_family != AF_INET ||
1296 			    ssa->sin.sin_len != sizeof(struct sockaddr_in))
1297 				return (EINVAL);
1298 		}
1299 
1300 		if (gsr.gsr_interface == 0 || if_index < gsr.gsr_interface)
1301 			return (EADDRNOTAVAIL);
1302 
1303 		ifp = ifnet_byindex(gsr.gsr_interface);
1304 		break;
1305 
1306 	default:
1307 #ifdef DIAGNOSTIC
1308 		if (bootverbose) {
1309 			printf("%s: unknown sopt_name %d\n", __func__,
1310 			    sopt->sopt_name);
1311 		}
1312 #endif
1313 		return (EOPNOTSUPP);
1314 		break;
1315 	}
1316 
1317 	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
1318 		return (EINVAL);
1319 
1320 	/*
1321 	 * Find the membership in the membership array.
1322 	 */
1323 	imo = inp_findmoptions(inp);
1324 	idx = imo_match_group(imo, ifp, &gsa->sa);
1325 	if (idx == -1) {
1326 		error = EADDRNOTAVAIL;
1327 		goto out_locked;
1328 	}
1329 	imf = &imo->imo_mfilters[idx];
1330 
1331 	/*
1332 	 * If we were instructed only to leave a given source, do so.
1333 	 */
1334 	if (ssa->ss.ss_family != AF_UNSPEC) {
1335 		if (imf->imf_nsources == 0 ||
1336 		    imf->imf_fmode == MCAST_EXCLUDE) {
1337 			/*
1338 			 * Attempting to SSM leave an ASM group
1339 			 * is an error; should use *_BLOCK_SOURCE instead.
1340 			 * Attempting to SSM leave a source in a group when
1341 			 * the socket is in 'exclude mode' is also an error.
1342 			 */
1343 			error = EINVAL;
1344 		} else {
1345 			error = imo_leave_source(imo, idx, ssa);
1346 		}
1347 		/*
1348 		 * If an error occurred, or this source is not the last
1349 		 * source in the group, do not leave the whole group.
1350 		 */
1351 		if (error || imf->imf_nsources > 0)
1352 			goto out_locked;
1353 	}
1354 
1355 	/*
1356 	 * Give up the multicast address record to which the membership points.
1357 	 */
1358 	inm = imo->imo_membership[idx];
1359 	in_delmulti(inm);
1360 
1361 	/*
1362 	 * Free any source filters for this group if they exist.
1363 	 * Revert inpcb to the default MCAST_EXCLUDE state.
1364 	 */
1365 	if (imo->imo_mfilters != NULL) {
1366 		TAILQ_FOREACH_SAFE(ims, &imf->imf_sources, ims_next, tims) {
1367 			TAILQ_REMOVE(&imf->imf_sources, ims, ims_next);
1368 			FREE(ims, M_IPMSOURCE);
1369 			imf->imf_nsources--;
1370 		}
1371 		KASSERT(imf->imf_nsources == 0,
1372 		    ("%s: imf_nsources not 0", __func__));
1373 		KASSERT(TAILQ_EMPTY(&imf->imf_sources),
1374 		    ("%s: imf_sources not empty", __func__));
1375 		imf->imf_fmode = MCAST_EXCLUDE;
1376 	}
1377 
1378 	/*
1379 	 * Remove the gap in the membership array.
1380 	 */
1381 	for (++idx; idx < imo->imo_num_memberships; ++idx)
1382 		imo->imo_membership[idx-1] = imo->imo_membership[idx];
1383 	imo->imo_num_memberships--;
1384 
1385 out_locked:
1386 	INP_WUNLOCK(inp);
1387 	return (error);
1388 }
1389 
1390 /*
1391  * Select the interface for transmitting IPv4 multicast datagrams.
1392  *
1393  * Either an instance of struct in_addr or an instance of struct ip_mreqn
1394  * may be passed to this socket option. An address of INADDR_ANY or an
1395  * interface index of 0 is used to remove a previous selection.
1396  * When no interface is selected, one is chosen for every send.
1397  */
1398 static int
1399 inp_set_multicast_if(struct inpcb *inp, struct sockopt *sopt)
1400 {
1401 	struct in_addr		 addr;
1402 	struct ip_mreqn		 mreqn;
1403 	struct ifnet		*ifp;
1404 	struct ip_moptions	*imo;
1405 	int			 error;
1406 
1407 	if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) {
1408 		/*
1409 		 * An interface index was specified using the
1410 		 * Linux-derived ip_mreqn structure.
1411 		 */
1412 		error = sooptcopyin(sopt, &mreqn, sizeof(struct ip_mreqn),
1413 		    sizeof(struct ip_mreqn));
1414 		if (error)
1415 			return (error);
1416 
1417 		if (mreqn.imr_ifindex < 0 || if_index < mreqn.imr_ifindex)
1418 			return (EINVAL);
1419 
1420 		if (mreqn.imr_ifindex == 0) {
1421 			ifp = NULL;
1422 		} else {
1423 			ifp = ifnet_byindex(mreqn.imr_ifindex);
1424 			if (ifp == NULL)
1425 				return (EADDRNOTAVAIL);
1426 		}
1427 	} else {
1428 		/*
1429 		 * An interface was specified by IPv4 address.
1430 		 * This is the traditional BSD usage.
1431 		 */
1432 		error = sooptcopyin(sopt, &addr, sizeof(struct in_addr),
1433 		    sizeof(struct in_addr));
1434 		if (error)
1435 			return (error);
1436 		if (addr.s_addr == INADDR_ANY) {
1437 			ifp = NULL;
1438 		} else {
1439 			INADDR_TO_IFP(addr, ifp);
1440 			if (ifp == NULL)
1441 				return (EADDRNOTAVAIL);
1442 		}
1443 #ifdef DIAGNOSTIC
1444 		if (bootverbose) {
1445 			printf("%s: ifp = %p, addr = %s\n",
1446 			    __func__, ifp, inet_ntoa(addr)); /* XXX INET6 */
1447 		}
1448 #endif
1449 	}
1450 
1451 	/* Reject interfaces which do not support multicast. */
1452 	if (ifp != NULL && (ifp->if_flags & IFF_MULTICAST) == 0)
1453 		return (EOPNOTSUPP);
1454 
1455 	imo = inp_findmoptions(inp);
1456 	imo->imo_multicast_ifp = ifp;
1457 	imo->imo_multicast_addr.s_addr = INADDR_ANY;
1458 	INP_WUNLOCK(inp);
1459 
1460 	return (0);
1461 }
1462 
1463 /*
1464  * Atomically set source filters on a socket for an IPv4 multicast group.
1465  */
1466 static int
1467 inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt)
1468 {
1469 	struct __msfilterreq	 msfr;
1470 	sockunion_t		*gsa;
1471 	struct ifnet		*ifp;
1472 	struct in_mfilter	*imf;
1473 	struct ip_moptions	*imo;
1474 	struct in_msource	*ims, *tims;
1475 	size_t			 idx;
1476 	int			 error;
1477 
1478 	error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq),
1479 	    sizeof(struct __msfilterreq));
1480 	if (error)
1481 		return (error);
1482 
1483 	if (msfr.msfr_nsrcs > IP_MAX_SOURCE_FILTER ||
1484 	    (msfr.msfr_fmode != MCAST_EXCLUDE &&
1485 	     msfr.msfr_fmode != MCAST_INCLUDE))
1486 		return (EINVAL);
1487 
1488 	if (msfr.msfr_group.ss_family != AF_INET ||
1489 	    msfr.msfr_group.ss_len != sizeof(struct sockaddr_in))
1490 		return (EINVAL);
1491 
1492 	gsa = (sockunion_t *)&msfr.msfr_group;
1493 	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
1494 		return (EINVAL);
1495 
1496 	gsa->sin.sin_port = 0;	/* ignore port */
1497 
1498 	if (msfr.msfr_ifindex == 0 || if_index < msfr.msfr_ifindex)
1499 		return (EADDRNOTAVAIL);
1500 
1501 	ifp = ifnet_byindex(msfr.msfr_ifindex);
1502 	if (ifp == NULL)
1503 		return (EADDRNOTAVAIL);
1504 
1505 	/*
1506 	 * Take the INP lock.
1507 	 * Check if this socket is a member of this group.
1508 	 */
1509 	imo = inp_findmoptions(inp);
1510 	idx = imo_match_group(imo, ifp, &gsa->sa);
1511 	if (idx == -1 || imo->imo_mfilters == NULL) {
1512 		error = EADDRNOTAVAIL;
1513 		goto out_locked;
1514 	}
1515 	imf = &imo->imo_mfilters[idx];
1516 
1517 #ifdef DIAGNOSTIC
1518 	if (bootverbose)
1519 		printf("%s: clearing source list\n", __func__);
1520 #endif
1521 
1522 	/*
1523 	 * Remove any existing source filters.
1524 	 */
1525 	TAILQ_FOREACH_SAFE(ims, &imf->imf_sources, ims_next, tims) {
1526 		TAILQ_REMOVE(&imf->imf_sources, ims, ims_next);
1527 		FREE(ims, M_IPMSOURCE);
1528 		imf->imf_nsources--;
1529 	}
1530 	KASSERT(imf->imf_nsources == 0,
1531 	    ("%s: source list not cleared", __func__));
1532 
1533 	/*
1534 	 * Apply any new source filters, if present.
1535 	 */
1536 	if (msfr.msfr_nsrcs > 0) {
1537 		struct in_msource	**pnims;
1538 		struct in_msource	*nims;
1539 		struct sockaddr_storage	*kss;
1540 		struct sockaddr_storage	*pkss;
1541 		sockunion_t		*psu;
1542 		int			 i, j;
1543 
1544 		/*
1545 		 * Drop the inp lock so we may sleep if we need to
1546 		 * in order to satisfy a malloc request.
1547 		 * We will re-take it before changing socket state.
1548 		 */
1549 		INP_WUNLOCK(inp);
1550 #ifdef DIAGNOSTIC
1551 		if (bootverbose) {
1552 			printf("%s: loading %lu source list entries\n",
1553 			    __func__, (unsigned long)msfr.msfr_nsrcs);
1554 		}
1555 #endif
1556 		/*
1557 		 * Make a copy of the user-space source vector so
1558 		 * that we may copy them with a single copyin. This
1559 		 * allows us to deal with page faults up-front.
1560 		 */
1561 		MALLOC(kss, struct sockaddr_storage *,
1562 		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
1563 		    M_TEMP, M_WAITOK);
1564 		error = copyin(msfr.msfr_srcs, kss,
1565 		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
1566 		if (error) {
1567 			FREE(kss, M_TEMP);
1568 			return (error);
1569 		}
1570 
1571 		/*
1572 		 * Perform argument checking on every sockaddr_storage
1573 		 * structure in the vector provided to us. Overwrite
1574 		 * fields which should not apply to source entries.
1575 		 * TODO: Check for duplicate sources on this pass.
1576 		 */
1577 		psu = (sockunion_t *)kss;
1578 		for (i = 0; i < msfr.msfr_nsrcs; i++, psu++) {
1579 			switch (psu->ss.ss_family) {
1580 			case AF_INET:
1581 				if (psu->sin.sin_len !=
1582 				    sizeof(struct sockaddr_in)) {
1583 					error = EINVAL;
1584 				} else {
1585 					psu->sin.sin_port = 0;
1586 				}
1587 				break;
1588 #ifdef notyet
1589 			case AF_INET6;
1590 				if (psu->sin6.sin6_len !=
1591 				    sizeof(struct sockaddr_in6)) {
1592 					error = EINVAL;
1593 				} else {
1594 					psu->sin6.sin6_port = 0;
1595 					psu->sin6.sin6_flowinfo = 0;
1596 				}
1597 				break;
1598 #endif
1599 			default:
1600 				error = EAFNOSUPPORT;
1601 				break;
1602 			}
1603 			if (error)
1604 				break;
1605 		}
1606 		if (error) {
1607 			FREE(kss, M_TEMP);
1608 			return (error);
1609 		}
1610 
1611 		/*
1612 		 * Allocate a block to track all the in_msource
1613 		 * entries we are about to allocate, in case we
1614 		 * abruptly need to free them.
1615 		 */
1616 		MALLOC(pnims, struct in_msource **,
1617 		    sizeof(struct in_msource *) * msfr.msfr_nsrcs,
1618 		    M_TEMP, M_WAITOK | M_ZERO);
1619 
1620 		/*
1621 		 * Allocate up to nsrcs individual chunks.
1622 		 * If we encounter an error, backtrack out of
1623 		 * all allocations cleanly; updates must be atomic.
1624 		 */
1625 		pkss = kss;
1626 		nims = NULL;
1627 		for (i = 0; i < msfr.msfr_nsrcs; i++, pkss++) {
1628 			MALLOC(nims, struct in_msource *,
1629 			    sizeof(struct in_msource) * msfr.msfr_nsrcs,
1630 			    M_IPMSOURCE, M_WAITOK | M_ZERO);
1631 			pnims[i] = nims;
1632 		}
1633 		if (i < msfr.msfr_nsrcs) {
1634 			for (j = 0; j < i; j++) {
1635 				if (pnims[j] != NULL)
1636 					FREE(pnims[j], M_IPMSOURCE);
1637 			}
1638 			FREE(pnims, M_TEMP);
1639 			FREE(kss, M_TEMP);
1640 			return (ENOBUFS);
1641 		}
1642 
1643 		INP_UNLOCK_ASSERT(inp);
1644 
1645 		/*
1646 		 * Finally, apply the filters to the socket.
1647 		 * Re-take the inp lock; we are changing socket state.
1648 		 */
1649 		pkss = kss;
1650 		INP_WLOCK(inp);
1651 		for (i = 0; i < msfr.msfr_nsrcs; i++, pkss++) {
1652 			memcpy(&(pnims[i]->ims_addr), pkss,
1653 			    sizeof(struct sockaddr_storage));
1654 			TAILQ_INSERT_TAIL(&imf->imf_sources, pnims[i],
1655 			    ims_next);
1656 			imf->imf_nsources++;
1657 		}
1658 		FREE(pnims, M_TEMP);
1659 		FREE(kss, M_TEMP);
1660 	}
1661 
1662 	/*
1663 	 * Update the filter mode on the socket before releasing the inpcb.
1664 	 */
1665 	INP_WLOCK_ASSERT(inp);
1666 	imf->imf_fmode = msfr.msfr_fmode;
1667 
1668 out_locked:
1669 	INP_WUNLOCK(inp);
1670 	return (error);
1671 }
1672 
1673 /*
1674  * Set the IP multicast options in response to user setsockopt().
1675  *
1676  * Many of the socket options handled in this function duplicate the
1677  * functionality of socket options in the regular unicast API. However,
1678  * it is not possible to merge the duplicate code, because the idempotence
1679  * of the IPv4 multicast part of the BSD Sockets API must be preserved;
1680  * the effects of these options must be treated as separate and distinct.
1681  */
1682 int
1683 inp_setmoptions(struct inpcb *inp, struct sockopt *sopt)
1684 {
1685 	struct ip_moptions	*imo;
1686 	int			 error;
1687 
1688 	error = 0;
1689 
1690 	/*
1691 	 * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
1692 	 * or is a divert socket, reject it.
1693 	 * XXX Unlocked read of inp_socket believed OK.
1694 	 */
1695 	if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
1696 	    (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
1697 	    inp->inp_socket->so_proto->pr_type != SOCK_DGRAM))
1698 		return (EOPNOTSUPP);
1699 
1700 	switch (sopt->sopt_name) {
1701 	case IP_MULTICAST_VIF: {
1702 		int vifi;
1703 		/*
1704 		 * Select a multicast VIF for transmission.
1705 		 * Only useful if multicast forwarding is active.
1706 		 */
1707 		if (legal_vif_num == NULL) {
1708 			error = EOPNOTSUPP;
1709 			break;
1710 		}
1711 		error = sooptcopyin(sopt, &vifi, sizeof(int), sizeof(int));
1712 		if (error)
1713 			break;
1714 		if (!legal_vif_num(vifi) && (vifi != -1)) {
1715 			error = EINVAL;
1716 			break;
1717 		}
1718 		imo = inp_findmoptions(inp);
1719 		imo->imo_multicast_vif = vifi;
1720 		INP_WUNLOCK(inp);
1721 		break;
1722 	}
1723 
1724 	case IP_MULTICAST_IF:
1725 		error = inp_set_multicast_if(inp, sopt);
1726 		break;
1727 
1728 	case IP_MULTICAST_TTL: {
1729 		u_char ttl;
1730 
1731 		/*
1732 		 * Set the IP time-to-live for outgoing multicast packets.
1733 		 * The original multicast API required a char argument,
1734 		 * which is inconsistent with the rest of the socket API.
1735 		 * We allow either a char or an int.
1736 		 */
1737 		if (sopt->sopt_valsize == sizeof(u_char)) {
1738 			error = sooptcopyin(sopt, &ttl, sizeof(u_char),
1739 			    sizeof(u_char));
1740 			if (error)
1741 				break;
1742 		} else {
1743 			u_int ittl;
1744 
1745 			error = sooptcopyin(sopt, &ittl, sizeof(u_int),
1746 			    sizeof(u_int));
1747 			if (error)
1748 				break;
1749 			if (ittl > 255) {
1750 				error = EINVAL;
1751 				break;
1752 			}
1753 			ttl = (u_char)ittl;
1754 		}
1755 		imo = inp_findmoptions(inp);
1756 		imo->imo_multicast_ttl = ttl;
1757 		INP_WUNLOCK(inp);
1758 		break;
1759 	}
1760 
1761 	case IP_MULTICAST_LOOP: {
1762 		u_char loop;
1763 
1764 		/*
1765 		 * Set the loopback flag for outgoing multicast packets.
1766 		 * Must be zero or one.  The original multicast API required a
1767 		 * char argument, which is inconsistent with the rest
1768 		 * of the socket API.  We allow either a char or an int.
1769 		 */
1770 		if (sopt->sopt_valsize == sizeof(u_char)) {
1771 			error = sooptcopyin(sopt, &loop, sizeof(u_char),
1772 			    sizeof(u_char));
1773 			if (error)
1774 				break;
1775 		} else {
1776 			u_int iloop;
1777 
1778 			error = sooptcopyin(sopt, &iloop, sizeof(u_int),
1779 					    sizeof(u_int));
1780 			if (error)
1781 				break;
1782 			loop = (u_char)iloop;
1783 		}
1784 		imo = inp_findmoptions(inp);
1785 		imo->imo_multicast_loop = !!loop;
1786 		INP_WUNLOCK(inp);
1787 		break;
1788 	}
1789 
1790 	case IP_ADD_MEMBERSHIP:
1791 	case IP_ADD_SOURCE_MEMBERSHIP:
1792 	case MCAST_JOIN_GROUP:
1793 	case MCAST_JOIN_SOURCE_GROUP:
1794 		error = inp_join_group(inp, sopt);
1795 		break;
1796 
1797 	case IP_DROP_MEMBERSHIP:
1798 	case IP_DROP_SOURCE_MEMBERSHIP:
1799 	case MCAST_LEAVE_GROUP:
1800 	case MCAST_LEAVE_SOURCE_GROUP:
1801 		error = inp_leave_group(inp, sopt);
1802 		break;
1803 
1804 	case IP_BLOCK_SOURCE:
1805 	case IP_UNBLOCK_SOURCE:
1806 	case MCAST_BLOCK_SOURCE:
1807 	case MCAST_UNBLOCK_SOURCE:
1808 		error = inp_change_source_filter(inp, sopt);
1809 		break;
1810 
1811 	case IP_MSFILTER:
1812 		error = inp_set_source_filters(inp, sopt);
1813 		break;
1814 
1815 	default:
1816 		error = EOPNOTSUPP;
1817 		break;
1818 	}
1819 
1820 	INP_UNLOCK_ASSERT(inp);
1821 
1822 	return (error);
1823 }
1824