xref: /freebsd/sys/netinet/in_mcast.c (revision b3aaa0cc21c63d388230c7ef2a80abd631ff20d5)
1 /*-
2  * Copyright (c) 2007 Bruce M. Simpson.
3  * Copyright (c) 2005 Robert N. M. Watson.
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote
15  *    products derived from this software without specific prior written
16  *    permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 /*
32  * IPv4 multicast socket, group, and socket option processing module.
33  * Until further notice, this file requires INET to compile.
34  * TODO: Make this infrastructure independent of address family.
35  * TODO: Teach netinet6 to use this code.
36  * TODO: Hook up SSM logic to IGMPv3/MLDv2.
37  */
38 
39 #include <sys/cdefs.h>
40 __FBSDID("$FreeBSD$");
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/malloc.h>
46 #include <sys/mbuf.h>
47 #include <sys/protosw.h>
48 #include <sys/socket.h>
49 #include <sys/socketvar.h>
50 #include <sys/sysctl.h>
51 #include <sys/vimage.h>
52 
53 #include <net/if.h>
54 #include <net/if_dl.h>
55 #include <net/route.h>
56 #include <net/vnet.h>
57 
58 #include <netinet/in.h>
59 #include <netinet/in_systm.h>
60 #include <netinet/in_pcb.h>
61 #include <netinet/in_var.h>
62 #include <netinet/ip_var.h>
63 #include <netinet/igmp_var.h>
64 #include <netinet/vinet.h>
65 
66 #ifndef __SOCKUNION_DECLARED
67 union sockunion {
68 	struct sockaddr_storage	ss;
69 	struct sockaddr		sa;
70 	struct sockaddr_dl	sdl;
71 	struct sockaddr_in	sin;
72 #ifdef INET6
73 	struct sockaddr_in6	sin6;
74 #endif
75 };
76 typedef union sockunion sockunion_t;
77 #define __SOCKUNION_DECLARED
78 #endif /* __SOCKUNION_DECLARED */
79 
80 static MALLOC_DEFINE(M_IPMADDR, "in_multi", "IPv4 multicast group");
81 static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "IPv4 multicast options");
82 static MALLOC_DEFINE(M_IPMSOURCE, "in_msource", "IPv4 multicast source filter");
83 
84 /*
85  * The IPv4 multicast list (in_multihead and associated structures) are
86  * protected by the global in_multi_mtx.  See in_var.h for more details.  For
87  * now, in_multi_mtx is marked as recursible due to IGMP's calling back into
88  * ip_output() to send IGMP packets while holding the lock; this probably is
89  * not quite desirable.
90  */
91 #ifdef VIMAGE_GLOBALS
92 struct in_multihead in_multihead;	/* XXX BSS initialization */
93 #endif
94 struct mtx in_multi_mtx;
95 MTX_SYSINIT(in_multi_mtx, &in_multi_mtx, "in_multi_mtx", MTX_DEF | MTX_RECURSE);
96 
97 /*
98  * Functions with non-static linkage defined in this file should be
99  * declared in in_var.h:
100  *  imo_match_group()
101  *  imo_match_source()
102  *  in_addmulti()
103  *  in_delmulti()
104  *  in_delmulti_locked()
105  * and ip_var.h:
106  *  inp_freemoptions()
107  *  inp_getmoptions()
108  *  inp_setmoptions()
109  */
110 static int	imo_grow(struct ip_moptions *);
111 static int	imo_join_source(struct ip_moptions *, size_t, sockunion_t *);
112 static int	imo_leave_source(struct ip_moptions *, size_t, sockunion_t *);
113 static int	inp_change_source_filter(struct inpcb *, struct sockopt *);
114 static struct ip_moptions *
115 		inp_findmoptions(struct inpcb *);
116 static int	inp_get_source_filters(struct inpcb *, struct sockopt *);
117 static int	inp_join_group(struct inpcb *, struct sockopt *);
118 static int	inp_leave_group(struct inpcb *, struct sockopt *);
119 static int	inp_set_multicast_if(struct inpcb *, struct sockopt *);
120 static int	inp_set_source_filters(struct inpcb *, struct sockopt *);
121 
122 /*
123  * Resize the ip_moptions vector to the next power-of-two minus 1.
124  * May be called with locks held; do not sleep.
125  */
126 static int
127 imo_grow(struct ip_moptions *imo)
128 {
129 	struct in_multi		**nmships;
130 	struct in_multi		**omships;
131 	struct in_mfilter	 *nmfilters;
132 	struct in_mfilter	 *omfilters;
133 	size_t			  idx;
134 	size_t			  newmax;
135 	size_t			  oldmax;
136 
137 	nmships = NULL;
138 	nmfilters = NULL;
139 	omships = imo->imo_membership;
140 	omfilters = imo->imo_mfilters;
141 	oldmax = imo->imo_max_memberships;
142 	newmax = ((oldmax + 1) * 2) - 1;
143 
144 	if (newmax <= IP_MAX_MEMBERSHIPS) {
145 		nmships = (struct in_multi **)realloc(omships,
146 		    sizeof(struct in_multi *) * newmax, M_IPMOPTS, M_NOWAIT);
147 		nmfilters = (struct in_mfilter *)realloc(omfilters,
148 		    sizeof(struct in_mfilter) * newmax, M_IPMSOURCE, M_NOWAIT);
149 		if (nmships != NULL && nmfilters != NULL) {
150 			/* Initialize newly allocated source filter heads. */
151 			for (idx = oldmax; idx < newmax; idx++) {
152 				nmfilters[idx].imf_fmode = MCAST_EXCLUDE;
153 				nmfilters[idx].imf_nsources = 0;
154 				TAILQ_INIT(&nmfilters[idx].imf_sources);
155 			}
156 			imo->imo_max_memberships = newmax;
157 			imo->imo_membership = nmships;
158 			imo->imo_mfilters = nmfilters;
159 		}
160 	}
161 
162 	if (nmships == NULL || nmfilters == NULL) {
163 		if (nmships != NULL)
164 			free(nmships, M_IPMOPTS);
165 		if (nmfilters != NULL)
166 			free(nmfilters, M_IPMSOURCE);
167 		return (ETOOMANYREFS);
168 	}
169 
170 	return (0);
171 }
172 
173 /*
174  * Add a source to a multicast filter list.
175  * Assumes the associated inpcb is locked.
176  */
177 static int
178 imo_join_source(struct ip_moptions *imo, size_t gidx, sockunion_t *src)
179 {
180 	struct in_msource	*ims, *nims;
181 	struct in_mfilter	*imf;
182 
183 	KASSERT(src->ss.ss_family == AF_INET, ("%s: !AF_INET", __func__));
184 	KASSERT(imo->imo_mfilters != NULL,
185 	    ("%s: imo_mfilters vector not allocated", __func__));
186 
187 	imf = &imo->imo_mfilters[gidx];
188 	if (imf->imf_nsources == IP_MAX_SOURCE_FILTER)
189 		return (ENOBUFS);
190 
191 	ims = imo_match_source(imo, gidx, &src->sa);
192 	if (ims != NULL)
193 		return (EADDRNOTAVAIL);
194 
195 	/* Do not sleep with inp lock held. */
196 	nims = malloc(sizeof(struct in_msource),
197 	    M_IPMSOURCE, M_NOWAIT | M_ZERO);
198 	if (nims == NULL)
199 		return (ENOBUFS);
200 
201 	nims->ims_addr = src->ss;
202 	TAILQ_INSERT_TAIL(&imf->imf_sources, nims, ims_next);
203 	imf->imf_nsources++;
204 
205 	return (0);
206 }
207 
208 static int
209 imo_leave_source(struct ip_moptions *imo, size_t gidx, sockunion_t *src)
210 {
211 	struct in_msource	*ims;
212 	struct in_mfilter	*imf;
213 
214 	KASSERT(src->ss.ss_family == AF_INET, ("%s: !AF_INET", __func__));
215 	KASSERT(imo->imo_mfilters != NULL,
216 	    ("%s: imo_mfilters vector not allocated", __func__));
217 
218 	imf = &imo->imo_mfilters[gidx];
219 	if (imf->imf_nsources == IP_MAX_SOURCE_FILTER)
220 		return (ENOBUFS);
221 
222 	ims = imo_match_source(imo, gidx, &src->sa);
223 	if (ims == NULL)
224 		return (EADDRNOTAVAIL);
225 
226 	TAILQ_REMOVE(&imf->imf_sources, ims, ims_next);
227 	free(ims, M_IPMSOURCE);
228 	imf->imf_nsources--;
229 
230 	return (0);
231 }
232 
233 /*
234  * Find an IPv4 multicast group entry for this ip_moptions instance
235  * which matches the specified group, and optionally an interface.
236  * Return its index into the array, or -1 if not found.
237  */
238 size_t
239 imo_match_group(struct ip_moptions *imo, struct ifnet *ifp,
240     struct sockaddr *group)
241 {
242 	sockunion_t	 *gsa;
243 	struct in_multi	**pinm;
244 	int		  idx;
245 	int		  nmships;
246 
247 	gsa = (sockunion_t *)group;
248 
249 	/* The imo_membership array may be lazy allocated. */
250 	if (imo->imo_membership == NULL || imo->imo_num_memberships == 0)
251 		return (-1);
252 
253 	nmships = imo->imo_num_memberships;
254 	pinm = &imo->imo_membership[0];
255 	for (idx = 0; idx < nmships; idx++, pinm++) {
256 		if (*pinm == NULL)
257 			continue;
258 #if 0
259 		printf("%s: trying ifp = %p, inaddr = %s ", __func__,
260 		    ifp, inet_ntoa(gsa->sin.sin_addr));
261 		printf("against %p, %s\n",
262 		    (*pinm)->inm_ifp, inet_ntoa((*pinm)->inm_addr));
263 #endif
264 		if ((ifp == NULL || ((*pinm)->inm_ifp == ifp)) &&
265 		    (*pinm)->inm_addr.s_addr == gsa->sin.sin_addr.s_addr) {
266 			break;
267 		}
268 	}
269 	if (idx >= nmships)
270 		idx = -1;
271 
272 	return (idx);
273 }
274 
275 /*
276  * Find a multicast source entry for this imo which matches
277  * the given group index for this socket, and source address.
278  */
279 struct in_msource *
280 imo_match_source(struct ip_moptions *imo, size_t gidx, struct sockaddr *src)
281 {
282 	struct in_mfilter	*imf;
283 	struct in_msource	*ims, *pims;
284 
285 	KASSERT(src->sa_family == AF_INET, ("%s: !AF_INET", __func__));
286 	KASSERT(gidx != -1 && gidx < imo->imo_num_memberships,
287 	    ("%s: invalid index %d\n", __func__, (int)gidx));
288 
289 	/* The imo_mfilters array may be lazy allocated. */
290 	if (imo->imo_mfilters == NULL)
291 		return (NULL);
292 
293 	pims = NULL;
294 	imf = &imo->imo_mfilters[gidx];
295 	TAILQ_FOREACH(ims, &imf->imf_sources, ims_next) {
296 		/*
297 		 * Perform bitwise comparison of two IPv4 addresses.
298 		 * TODO: Do the same for IPv6.
299 		 * Do not use sa_equal() for this as it is not aware of
300 		 * deeper structure in sockaddr_in or sockaddr_in6.
301 		 */
302 		if (((struct sockaddr_in *)&ims->ims_addr)->sin_addr.s_addr ==
303 		    ((struct sockaddr_in *)src)->sin_addr.s_addr) {
304 			pims = ims;
305 			break;
306 		}
307 	}
308 
309 	return (pims);
310 }
311 
312 /*
313  * Join an IPv4 multicast group.
314  */
315 struct in_multi *
316 in_addmulti(struct in_addr *ap, struct ifnet *ifp)
317 {
318 	INIT_VNET_INET(ifp->if_vnet);
319 	struct in_multi *inm;
320 
321 	inm = NULL;
322 
323 	IFF_LOCKGIANT(ifp);
324 	IN_MULTI_LOCK();
325 
326 	IN_LOOKUP_MULTI(*ap, ifp, inm);
327 	if (inm != NULL) {
328 		/*
329 		 * If we already joined this group, just bump the
330 		 * refcount and return it.
331 		 */
332 		KASSERT(inm->inm_refcount >= 1,
333 		    ("%s: bad refcount %d", __func__, inm->inm_refcount));
334 		++inm->inm_refcount;
335 	} else do {
336 		sockunion_t		 gsa;
337 		struct ifmultiaddr	*ifma;
338 		struct in_multi		*ninm;
339 		int			 error;
340 
341 		memset(&gsa, 0, sizeof(gsa));
342 		gsa.sin.sin_family = AF_INET;
343 		gsa.sin.sin_len = sizeof(struct sockaddr_in);
344 		gsa.sin.sin_addr = *ap;
345 
346 		/*
347 		 * Check if a link-layer group is already associated
348 		 * with this network-layer group on the given ifnet.
349 		 * If so, bump the refcount on the existing network-layer
350 		 * group association and return it.
351 		 */
352 		error = if_addmulti(ifp, &gsa.sa, &ifma);
353 		if (error)
354 			break;
355 		if (ifma->ifma_protospec != NULL) {
356 			inm = (struct in_multi *)ifma->ifma_protospec;
357 #ifdef INVARIANTS
358 			if (inm->inm_ifma != ifma || inm->inm_ifp != ifp ||
359 			    inm->inm_addr.s_addr != ap->s_addr)
360 				panic("%s: ifma is inconsistent", __func__);
361 #endif
362 			++inm->inm_refcount;
363 			break;
364 		}
365 
366 		/*
367 		 * A new membership is needed; construct it and
368 		 * perform the IGMP join.
369 		 */
370 		ninm = malloc(sizeof(*ninm), M_IPMADDR, M_NOWAIT | M_ZERO);
371 		if (ninm == NULL) {
372 			if_delmulti_ifma(ifma);
373 			break;
374 		}
375 		ninm->inm_addr = *ap;
376 		ninm->inm_ifp = ifp;
377 		ninm->inm_ifma = ifma;
378 		ninm->inm_refcount = 1;
379 		ifma->ifma_protospec = ninm;
380 		LIST_INSERT_HEAD(&V_in_multihead, ninm, inm_link);
381 
382 		igmp_joingroup(ninm);
383 
384 		inm = ninm;
385 	} while (0);
386 
387 	IN_MULTI_UNLOCK();
388 	IFF_UNLOCKGIANT(ifp);
389 
390 	return (inm);
391 }
392 
393 /*
394  * Leave an IPv4 multicast group.
395  * It is OK to call this routine if the underlying ifnet went away.
396  *
397  * XXX: To deal with the ifp going away, we cheat; the link-layer code in net
398  * will set ifma_ifp to NULL when the associated ifnet instance is detached
399  * from the system.
400  *
401  * The only reason we need to violate layers and check ifma_ifp here at all
402  * is because certain hardware drivers still require Giant to be held,
403  * and it must always be taken before other locks.
404  */
405 void
406 in_delmulti(struct in_multi *inm)
407 {
408 	struct ifnet *ifp;
409 
410 	KASSERT(inm != NULL, ("%s: inm is NULL", __func__));
411 	KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__));
412 	ifp = inm->inm_ifma->ifma_ifp;
413 
414 	if (ifp != NULL) {
415 		/*
416 		 * Sanity check that netinet's notion of ifp is the
417 		 * same as net's.
418 		 */
419 		KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__));
420 		IFF_LOCKGIANT(ifp);
421 	}
422 
423 	IN_MULTI_LOCK();
424 	in_delmulti_locked(inm);
425 	IN_MULTI_UNLOCK();
426 
427 	if (ifp != NULL)
428 		IFF_UNLOCKGIANT(ifp);
429 }
430 
431 /*
432  * Delete a multicast address record, with locks held.
433  *
434  * It is OK to call this routine if the ifp went away.
435  * Assumes that caller holds the IN_MULTI lock, and that
436  * Giant was taken before other locks if required by the hardware.
437  */
438 void
439 in_delmulti_locked(struct in_multi *inm)
440 {
441 	struct ifmultiaddr *ifma;
442 
443 	IN_MULTI_LOCK_ASSERT();
444 	KASSERT(inm->inm_refcount >= 1, ("%s: freeing freed inm", __func__));
445 
446 	if (--inm->inm_refcount == 0) {
447 		igmp_leavegroup(inm);
448 
449 		ifma = inm->inm_ifma;
450 #ifdef DIAGNOSTIC
451 		if (bootverbose)
452 			printf("%s: purging ifma %p\n", __func__, ifma);
453 #endif
454 		KASSERT(ifma->ifma_protospec == inm,
455 		    ("%s: ifma_protospec != inm", __func__));
456 		ifma->ifma_protospec = NULL;
457 
458 		LIST_REMOVE(inm, inm_link);
459 		free(inm, M_IPMADDR);
460 
461 		if_delmulti_ifma(ifma);
462 	}
463 }
464 
465 /*
466  * Block or unblock an ASM/SSM multicast source on an inpcb.
467  */
468 static int
469 inp_change_source_filter(struct inpcb *inp, struct sockopt *sopt)
470 {
471 	INIT_VNET_NET(curvnet);
472 	INIT_VNET_INET(curvnet);
473 	struct group_source_req		 gsr;
474 	sockunion_t			*gsa, *ssa;
475 	struct ifnet			*ifp;
476 	struct in_mfilter		*imf;
477 	struct ip_moptions		*imo;
478 	struct in_msource		*ims;
479 	size_t				 idx;
480 	int				 error;
481 	int				 block;
482 
483 	ifp = NULL;
484 	error = 0;
485 	block = 0;
486 
487 	memset(&gsr, 0, sizeof(struct group_source_req));
488 	gsa = (sockunion_t *)&gsr.gsr_group;
489 	ssa = (sockunion_t *)&gsr.gsr_source;
490 
491 	switch (sopt->sopt_name) {
492 	case IP_BLOCK_SOURCE:
493 	case IP_UNBLOCK_SOURCE: {
494 		struct ip_mreq_source	 mreqs;
495 
496 		error = sooptcopyin(sopt, &mreqs,
497 		    sizeof(struct ip_mreq_source),
498 		    sizeof(struct ip_mreq_source));
499 		if (error)
500 			return (error);
501 
502 		gsa->sin.sin_family = AF_INET;
503 		gsa->sin.sin_len = sizeof(struct sockaddr_in);
504 		gsa->sin.sin_addr = mreqs.imr_multiaddr;
505 
506 		ssa->sin.sin_family = AF_INET;
507 		ssa->sin.sin_len = sizeof(struct sockaddr_in);
508 		ssa->sin.sin_addr = mreqs.imr_sourceaddr;
509 
510 		if (mreqs.imr_interface.s_addr != INADDR_ANY)
511 			INADDR_TO_IFP(mreqs.imr_interface, ifp);
512 
513 		if (sopt->sopt_name == IP_BLOCK_SOURCE)
514 			block = 1;
515 
516 #ifdef DIAGNOSTIC
517 		if (bootverbose) {
518 			printf("%s: imr_interface = %s, ifp = %p\n",
519 			    __func__, inet_ntoa(mreqs.imr_interface), ifp);
520 		}
521 #endif
522 		break;
523 	    }
524 
525 	case MCAST_BLOCK_SOURCE:
526 	case MCAST_UNBLOCK_SOURCE:
527 		error = sooptcopyin(sopt, &gsr,
528 		    sizeof(struct group_source_req),
529 		    sizeof(struct group_source_req));
530 		if (error)
531 			return (error);
532 
533 		if (gsa->sin.sin_family != AF_INET ||
534 		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
535 			return (EINVAL);
536 
537 		if (ssa->sin.sin_family != AF_INET ||
538 		    ssa->sin.sin_len != sizeof(struct sockaddr_in))
539 			return (EINVAL);
540 
541 		if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
542 			return (EADDRNOTAVAIL);
543 
544 		ifp = ifnet_byindex(gsr.gsr_interface);
545 
546 		if (sopt->sopt_name == MCAST_BLOCK_SOURCE)
547 			block = 1;
548 		break;
549 
550 	default:
551 #ifdef DIAGNOSTIC
552 		if (bootverbose) {
553 			printf("%s: unknown sopt_name %d\n", __func__,
554 			    sopt->sopt_name);
555 		}
556 #endif
557 		return (EOPNOTSUPP);
558 		break;
559 	}
560 
561 	/* XXX INET6 */
562 	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
563 		return (EINVAL);
564 
565 	/*
566 	 * Check if we are actually a member of this group.
567 	 */
568 	imo = inp_findmoptions(inp);
569 	idx = imo_match_group(imo, ifp, &gsa->sa);
570 	if (idx == -1 || imo->imo_mfilters == NULL) {
571 		error = EADDRNOTAVAIL;
572 		goto out_locked;
573 	}
574 
575 	KASSERT(imo->imo_mfilters != NULL,
576 	    ("%s: imo_mfilters not allocated", __func__));
577 	imf = &imo->imo_mfilters[idx];
578 
579 	/*
580 	 * SSM multicast truth table for block/unblock operations.
581 	 *
582 	 * Operation   Filter Mode  Entry exists?   Action
583 	 *
584 	 * block       exclude      no              add source to filter
585 	 * unblock     include      no              add source to filter
586 	 * block       include      no              EINVAL
587 	 * unblock     exclude      no              EINVAL
588 	 * block       exclude      yes             EADDRNOTAVAIL
589 	 * unblock     include      yes             EADDRNOTAVAIL
590 	 * block       include      yes             remove source from filter
591 	 * unblock     exclude      yes             remove source from filter
592 	 *
593 	 * FreeBSD does not explicitly distinguish between ASM and SSM
594 	 * mode sockets; all sockets are assumed to have a filter list.
595 	 */
596 #ifdef DIAGNOSTIC
597 	if (bootverbose) {
598 		printf("%s: imf_fmode is %s\n", __func__,
599 		    imf->imf_fmode == MCAST_INCLUDE ? "include" : "exclude");
600 	}
601 #endif
602 	ims = imo_match_source(imo, idx, &ssa->sa);
603 	if (ims == NULL) {
604 		if ((block == 1 && imf->imf_fmode == MCAST_EXCLUDE) ||
605 		    (block == 0 && imf->imf_fmode == MCAST_INCLUDE)) {
606 #ifdef DIAGNOSTIC
607 			if (bootverbose) {
608 				printf("%s: adding %s to filter list\n",
609 				    __func__, inet_ntoa(ssa->sin.sin_addr));
610 			}
611 #endif
612 			error = imo_join_source(imo, idx, ssa);
613 		}
614 		if ((block == 1 && imf->imf_fmode == MCAST_INCLUDE) ||
615 		    (block == 0 && imf->imf_fmode == MCAST_EXCLUDE)) {
616 			/*
617 			 * If the socket is in inclusive mode:
618 			 *  the source is already blocked as it has no entry.
619 			 * If the socket is in exclusive mode:
620 			 *  the source is already unblocked as it has no entry.
621 			 */
622 #ifdef DIAGNOSTIC
623 			if (bootverbose) {
624 				printf("%s: ims %p; %s already [un]blocked\n",
625 				    __func__, ims,
626 				    inet_ntoa(ssa->sin.sin_addr));
627 			}
628 #endif
629 			error = EINVAL;
630 		}
631 	} else {
632 		if ((block == 1 && imf->imf_fmode == MCAST_EXCLUDE) ||
633 		    (block == 0 && imf->imf_fmode == MCAST_INCLUDE)) {
634 			/*
635 			 * If the socket is in exclusive mode:
636 			 *  the source is already blocked as it has an entry.
637 			 * If the socket is in inclusive mode:
638 			 *  the source is already unblocked as it has an entry.
639 			 */
640 #ifdef DIAGNOSTIC
641 			if (bootverbose) {
642 				printf("%s: ims %p; %s already [un]blocked\n",
643 				    __func__, ims,
644 				    inet_ntoa(ssa->sin.sin_addr));
645 			}
646 #endif
647 			error = EADDRNOTAVAIL;
648 		}
649 		if ((block == 1 && imf->imf_fmode == MCAST_INCLUDE) ||
650 		    (block == 0 && imf->imf_fmode == MCAST_EXCLUDE)) {
651 #ifdef DIAGNOSTIC
652 			if (bootverbose) {
653 				printf("%s: removing %s from filter list\n",
654 				    __func__, inet_ntoa(ssa->sin.sin_addr));
655 			}
656 #endif
657 			error = imo_leave_source(imo, idx, ssa);
658 		}
659 	}
660 
661 out_locked:
662 	INP_WUNLOCK(inp);
663 	return (error);
664 }
665 
666 /*
667  * Given an inpcb, return its multicast options structure pointer.  Accepts
668  * an unlocked inpcb pointer, but will return it locked.  May sleep.
669  */
670 static struct ip_moptions *
671 inp_findmoptions(struct inpcb *inp)
672 {
673 	struct ip_moptions	 *imo;
674 	struct in_multi		**immp;
675 	struct in_mfilter	 *imfp;
676 	size_t			  idx;
677 
678 	INP_WLOCK(inp);
679 	if (inp->inp_moptions != NULL)
680 		return (inp->inp_moptions);
681 
682 	INP_WUNLOCK(inp);
683 
684 	imo = (struct ip_moptions *)malloc(sizeof(*imo), M_IPMOPTS,
685 	    M_WAITOK);
686 	immp = (struct in_multi **)malloc(sizeof(*immp) * IP_MIN_MEMBERSHIPS,
687 	    M_IPMOPTS, M_WAITOK | M_ZERO);
688 	imfp = (struct in_mfilter *)malloc(
689 	    sizeof(struct in_mfilter) * IP_MIN_MEMBERSHIPS,
690 	    M_IPMSOURCE, M_WAITOK);
691 
692 	imo->imo_multicast_ifp = NULL;
693 	imo->imo_multicast_addr.s_addr = INADDR_ANY;
694 	imo->imo_multicast_vif = -1;
695 	imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
696 	imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
697 	imo->imo_num_memberships = 0;
698 	imo->imo_max_memberships = IP_MIN_MEMBERSHIPS;
699 	imo->imo_membership = immp;
700 
701 	/* Initialize per-group source filters. */
702 	for (idx = 0; idx < IP_MIN_MEMBERSHIPS; idx++) {
703 		imfp[idx].imf_fmode = MCAST_EXCLUDE;
704 		imfp[idx].imf_nsources = 0;
705 		TAILQ_INIT(&imfp[idx].imf_sources);
706 	}
707 	imo->imo_mfilters = imfp;
708 
709 	INP_WLOCK(inp);
710 	if (inp->inp_moptions != NULL) {
711 		free(imfp, M_IPMSOURCE);
712 		free(immp, M_IPMOPTS);
713 		free(imo, M_IPMOPTS);
714 		return (inp->inp_moptions);
715 	}
716 	inp->inp_moptions = imo;
717 	return (imo);
718 }
719 
720 /*
721  * Discard the IP multicast options (and source filters).
722  */
723 void
724 inp_freemoptions(struct ip_moptions *imo)
725 {
726 	struct in_mfilter	*imf;
727 	struct in_msource	*ims, *tims;
728 	size_t			 idx, nmships;
729 
730 	KASSERT(imo != NULL, ("%s: ip_moptions is NULL", __func__));
731 
732 	nmships = imo->imo_num_memberships;
733 	for (idx = 0; idx < nmships; ++idx) {
734 		in_delmulti(imo->imo_membership[idx]);
735 
736 		if (imo->imo_mfilters != NULL) {
737 			imf = &imo->imo_mfilters[idx];
738 			TAILQ_FOREACH_SAFE(ims, &imf->imf_sources,
739 			    ims_next, tims) {
740 				TAILQ_REMOVE(&imf->imf_sources, ims, ims_next);
741 				free(ims, M_IPMSOURCE);
742 				imf->imf_nsources--;
743 			}
744 			KASSERT(imf->imf_nsources == 0,
745 			    ("%s: did not free all imf_nsources", __func__));
746 		}
747 	}
748 
749 	if (imo->imo_mfilters != NULL)
750 		free(imo->imo_mfilters, M_IPMSOURCE);
751 	free(imo->imo_membership, M_IPMOPTS);
752 	free(imo, M_IPMOPTS);
753 }
754 
755 /*
756  * Atomically get source filters on a socket for an IPv4 multicast group.
757  * Called with INP lock held; returns with lock released.
758  */
759 static int
760 inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt)
761 {
762 	INIT_VNET_NET(curvnet);
763 	struct __msfilterreq	 msfr;
764 	sockunion_t		*gsa;
765 	struct ifnet		*ifp;
766 	struct ip_moptions	*imo;
767 	struct in_mfilter	*imf;
768 	struct in_msource	*ims;
769 	struct sockaddr_storage	*ptss;
770 	struct sockaddr_storage	*tss;
771 	int			 error;
772 	size_t			 idx;
773 
774 	INP_WLOCK_ASSERT(inp);
775 
776 	imo = inp->inp_moptions;
777 	KASSERT(imo != NULL, ("%s: null ip_moptions", __func__));
778 
779 	INP_WUNLOCK(inp);
780 
781 	error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq),
782 	    sizeof(struct __msfilterreq));
783 	if (error)
784 		return (error);
785 
786 	if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex)
787 		return (EINVAL);
788 
789 	ifp = ifnet_byindex(msfr.msfr_ifindex);
790 	if (ifp == NULL)
791 		return (EINVAL);
792 
793 	INP_WLOCK(inp);
794 
795 	/*
796 	 * Lookup group on the socket.
797 	 */
798 	gsa = (sockunion_t *)&msfr.msfr_group;
799 	idx = imo_match_group(imo, ifp, &gsa->sa);
800 	if (idx == -1 || imo->imo_mfilters == NULL) {
801 		INP_WUNLOCK(inp);
802 		return (EADDRNOTAVAIL);
803 	}
804 
805 	imf = &imo->imo_mfilters[idx];
806 	msfr.msfr_fmode = imf->imf_fmode;
807 	msfr.msfr_nsrcs = imf->imf_nsources;
808 
809 	/*
810 	 * If the user specified a buffer, copy out the source filter
811 	 * entries to userland gracefully.
812 	 * msfr.msfr_nsrcs is always set to the total number of filter
813 	 * entries which the kernel currently has for this group.
814 	 */
815 	tss = NULL;
816 	if (msfr.msfr_srcs != NULL && msfr.msfr_nsrcs > 0) {
817 		/*
818 		 * Make a copy of the source vector so that we do not
819 		 * thrash the inpcb lock whilst copying it out.
820 		 * We only copy out the number of entries which userland
821 		 * has asked for, but we always tell userland how big the
822 		 * buffer really needs to be.
823 		 */
824 		tss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
825 		    M_TEMP, M_NOWAIT);
826 		if (tss == NULL) {
827 			error = ENOBUFS;
828 		} else {
829 			ptss = tss;
830 			TAILQ_FOREACH(ims, &imf->imf_sources, ims_next) {
831 				memcpy(ptss++, &ims->ims_addr,
832 				    sizeof(struct sockaddr_storage));
833 			}
834 		}
835 	}
836 
837 	INP_WUNLOCK(inp);
838 
839 	if (tss != NULL) {
840 		error = copyout(tss, msfr.msfr_srcs,
841 		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
842 		free(tss, M_TEMP);
843 	}
844 
845 	if (error)
846 		return (error);
847 
848 	error = sooptcopyout(sopt, &msfr, sizeof(struct __msfilterreq));
849 
850 	return (error);
851 }
852 
853 /*
854  * Return the IP multicast options in response to user getsockopt().
855  */
856 int
857 inp_getmoptions(struct inpcb *inp, struct sockopt *sopt)
858 {
859 	INIT_VNET_INET(curvnet);
860 	struct ip_mreqn		 mreqn;
861 	struct ip_moptions	*imo;
862 	struct ifnet		*ifp;
863 	struct in_ifaddr	*ia;
864 	int			 error, optval;
865 	u_char			 coptval;
866 
867 	INP_WLOCK(inp);
868 	imo = inp->inp_moptions;
869 	/*
870 	 * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
871 	 * or is a divert socket, reject it.
872 	 */
873 	if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
874 	    (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
875 	    inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) {
876 		INP_WUNLOCK(inp);
877 		return (EOPNOTSUPP);
878 	}
879 
880 	error = 0;
881 	switch (sopt->sopt_name) {
882 	case IP_MULTICAST_VIF:
883 		if (imo != NULL)
884 			optval = imo->imo_multicast_vif;
885 		else
886 			optval = -1;
887 		INP_WUNLOCK(inp);
888 		error = sooptcopyout(sopt, &optval, sizeof(int));
889 		break;
890 
891 	case IP_MULTICAST_IF:
892 		memset(&mreqn, 0, sizeof(struct ip_mreqn));
893 		if (imo != NULL) {
894 			ifp = imo->imo_multicast_ifp;
895 			if (imo->imo_multicast_addr.s_addr != INADDR_ANY) {
896 				mreqn.imr_address = imo->imo_multicast_addr;
897 			} else if (ifp != NULL) {
898 				mreqn.imr_ifindex = ifp->if_index;
899 				IFP_TO_IA(ifp, ia);
900 				if (ia != NULL) {
901 					mreqn.imr_address =
902 					    IA_SIN(ia)->sin_addr;
903 				}
904 			}
905 		}
906 		INP_WUNLOCK(inp);
907 		if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) {
908 			error = sooptcopyout(sopt, &mreqn,
909 			    sizeof(struct ip_mreqn));
910 		} else {
911 			error = sooptcopyout(sopt, &mreqn.imr_address,
912 			    sizeof(struct in_addr));
913 		}
914 		break;
915 
916 	case IP_MULTICAST_TTL:
917 		if (imo == 0)
918 			optval = coptval = IP_DEFAULT_MULTICAST_TTL;
919 		else
920 			optval = coptval = imo->imo_multicast_ttl;
921 		INP_WUNLOCK(inp);
922 		if (sopt->sopt_valsize == sizeof(u_char))
923 			error = sooptcopyout(sopt, &coptval, sizeof(u_char));
924 		else
925 			error = sooptcopyout(sopt, &optval, sizeof(int));
926 		break;
927 
928 	case IP_MULTICAST_LOOP:
929 		if (imo == 0)
930 			optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
931 		else
932 			optval = coptval = imo->imo_multicast_loop;
933 		INP_WUNLOCK(inp);
934 		if (sopt->sopt_valsize == sizeof(u_char))
935 			error = sooptcopyout(sopt, &coptval, sizeof(u_char));
936 		else
937 			error = sooptcopyout(sopt, &optval, sizeof(int));
938 		break;
939 
940 	case IP_MSFILTER:
941 		if (imo == NULL) {
942 			error = EADDRNOTAVAIL;
943 			INP_WUNLOCK(inp);
944 		} else {
945 			error = inp_get_source_filters(inp, sopt);
946 		}
947 		break;
948 
949 	default:
950 		INP_WUNLOCK(inp);
951 		error = ENOPROTOOPT;
952 		break;
953 	}
954 
955 	INP_UNLOCK_ASSERT(inp);
956 
957 	return (error);
958 }
959 
960 /*
961  * Join an IPv4 multicast group, possibly with a source.
962  */
963 static int
964 inp_join_group(struct inpcb *inp, struct sockopt *sopt)
965 {
966 	INIT_VNET_NET(curvnet);
967 	INIT_VNET_INET(curvnet);
968 	struct group_source_req		 gsr;
969 	sockunion_t			*gsa, *ssa;
970 	struct ifnet			*ifp;
971 	struct in_mfilter		*imf;
972 	struct ip_moptions		*imo;
973 	struct in_multi			*inm;
974 	size_t				 idx;
975 	int				 error;
976 
977 	ifp = NULL;
978 	error = 0;
979 
980 	memset(&gsr, 0, sizeof(struct group_source_req));
981 	gsa = (sockunion_t *)&gsr.gsr_group;
982 	gsa->ss.ss_family = AF_UNSPEC;
983 	ssa = (sockunion_t *)&gsr.gsr_source;
984 	ssa->ss.ss_family = AF_UNSPEC;
985 
986 	switch (sopt->sopt_name) {
987 	case IP_ADD_MEMBERSHIP:
988 	case IP_ADD_SOURCE_MEMBERSHIP: {
989 		struct ip_mreq_source	 mreqs;
990 
991 		if (sopt->sopt_name == IP_ADD_MEMBERSHIP) {
992 			error = sooptcopyin(sopt, &mreqs,
993 			    sizeof(struct ip_mreq),
994 			    sizeof(struct ip_mreq));
995 			/*
996 			 * Do argument switcharoo from ip_mreq into
997 			 * ip_mreq_source to avoid using two instances.
998 			 */
999 			mreqs.imr_interface = mreqs.imr_sourceaddr;
1000 			mreqs.imr_sourceaddr.s_addr = INADDR_ANY;
1001 		} else if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) {
1002 			error = sooptcopyin(sopt, &mreqs,
1003 			    sizeof(struct ip_mreq_source),
1004 			    sizeof(struct ip_mreq_source));
1005 		}
1006 		if (error)
1007 			return (error);
1008 
1009 		gsa->sin.sin_family = AF_INET;
1010 		gsa->sin.sin_len = sizeof(struct sockaddr_in);
1011 		gsa->sin.sin_addr = mreqs.imr_multiaddr;
1012 
1013 		if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) {
1014 			ssa->sin.sin_family = AF_INET;
1015 			ssa->sin.sin_len = sizeof(struct sockaddr_in);
1016 			ssa->sin.sin_addr = mreqs.imr_sourceaddr;
1017 		}
1018 
1019 		/*
1020 		 * Obtain ifp. If no interface address was provided,
1021 		 * use the interface of the route in the unicast FIB for
1022 		 * the given multicast destination; usually, this is the
1023 		 * default route.
1024 		 * If this lookup fails, attempt to use the first non-loopback
1025 		 * interface with multicast capability in the system as a
1026 		 * last resort. The legacy IPv4 ASM API requires that we do
1027 		 * this in order to allow groups to be joined when the routing
1028 		 * table has not yet been populated during boot.
1029 		 * If all of these conditions fail, return EADDRNOTAVAIL, and
1030 		 * reject the IPv4 multicast join.
1031 		 */
1032 		if (mreqs.imr_interface.s_addr != INADDR_ANY) {
1033 			INADDR_TO_IFP(mreqs.imr_interface, ifp);
1034 		} else {
1035 			struct route ro;
1036 
1037 			ro.ro_rt = NULL;
1038 			*(struct sockaddr_in *)&ro.ro_dst = gsa->sin;
1039 			in_rtalloc_ign(&ro, 0,
1040 			   inp->inp_inc.inc_fibnum);
1041 			if (ro.ro_rt != NULL) {
1042 				ifp = ro.ro_rt->rt_ifp;
1043 				KASSERT(ifp != NULL, ("%s: null ifp",
1044 				    __func__));
1045 				RTFREE(ro.ro_rt);
1046 			} else {
1047 				struct in_ifaddr *ia;
1048 				struct ifnet *mfp = NULL;
1049 				TAILQ_FOREACH(ia, &V_in_ifaddrhead, ia_link) {
1050 					mfp = ia->ia_ifp;
1051 					if (!(mfp->if_flags & IFF_LOOPBACK) &&
1052 					     (mfp->if_flags & IFF_MULTICAST)) {
1053 						ifp = mfp;
1054 						break;
1055 					}
1056 				}
1057 			}
1058 		}
1059 #ifdef DIAGNOSTIC
1060 		if (bootverbose) {
1061 			printf("%s: imr_interface = %s, ifp = %p\n",
1062 			    __func__, inet_ntoa(mreqs.imr_interface), ifp);
1063 		}
1064 #endif
1065 		break;
1066 	}
1067 
1068 	case MCAST_JOIN_GROUP:
1069 	case MCAST_JOIN_SOURCE_GROUP:
1070 		if (sopt->sopt_name == MCAST_JOIN_GROUP) {
1071 			error = sooptcopyin(sopt, &gsr,
1072 			    sizeof(struct group_req),
1073 			    sizeof(struct group_req));
1074 		} else if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
1075 			error = sooptcopyin(sopt, &gsr,
1076 			    sizeof(struct group_source_req),
1077 			    sizeof(struct group_source_req));
1078 		}
1079 		if (error)
1080 			return (error);
1081 
1082 		if (gsa->sin.sin_family != AF_INET ||
1083 		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
1084 			return (EINVAL);
1085 
1086 		/*
1087 		 * Overwrite the port field if present, as the sockaddr
1088 		 * being copied in may be matched with a binary comparison.
1089 		 * XXX INET6
1090 		 */
1091 		gsa->sin.sin_port = 0;
1092 		if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
1093 			if (ssa->sin.sin_family != AF_INET ||
1094 			    ssa->sin.sin_len != sizeof(struct sockaddr_in))
1095 				return (EINVAL);
1096 			ssa->sin.sin_port = 0;
1097 		}
1098 
1099 		/*
1100 		 * Obtain the ifp.
1101 		 */
1102 		if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
1103 			return (EADDRNOTAVAIL);
1104 		ifp = ifnet_byindex(gsr.gsr_interface);
1105 
1106 		break;
1107 
1108 	default:
1109 #ifdef DIAGNOSTIC
1110 		if (bootverbose) {
1111 			printf("%s: unknown sopt_name %d\n", __func__,
1112 			    sopt->sopt_name);
1113 		}
1114 #endif
1115 		return (EOPNOTSUPP);
1116 		break;
1117 	}
1118 
1119 	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
1120 		return (EINVAL);
1121 
1122 	if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0)
1123 		return (EADDRNOTAVAIL);
1124 
1125 	/*
1126 	 * Check if we already hold membership of this group for this inpcb.
1127 	 * If so, we do not need to perform the initial join.
1128 	 */
1129 	imo = inp_findmoptions(inp);
1130 	idx = imo_match_group(imo, ifp, &gsa->sa);
1131 	if (idx != -1) {
1132 		if (ssa->ss.ss_family != AF_UNSPEC) {
1133 			/*
1134 			 * Attempting to join an ASM group (when already
1135 			 * an ASM or SSM member) is an error.
1136 			 */
1137 			error = EADDRNOTAVAIL;
1138 		} else {
1139 			imf = &imo->imo_mfilters[idx];
1140 			if (imf->imf_nsources == 0) {
1141 				/*
1142 				 * Attempting to join an SSM group (when
1143 				 * already an ASM member) is an error.
1144 				 */
1145 				error = EINVAL;
1146 			} else {
1147 				/*
1148 				 * Attempting to join an SSM group (when
1149 				 * already an SSM member) means "add this
1150 				 * source to the inclusive filter list".
1151 				 */
1152 				error = imo_join_source(imo, idx, ssa);
1153 			}
1154 		}
1155 		goto out_locked;
1156 	}
1157 
1158 	/*
1159 	 * Call imo_grow() to reallocate the membership and source filter
1160 	 * vectors if they are full. If the size would exceed the hard limit,
1161 	 * then we know we've really run out of entries. We keep the INP
1162 	 * lock held to avoid introducing a race condition.
1163 	 */
1164 	if (imo->imo_num_memberships == imo->imo_max_memberships) {
1165 		error = imo_grow(imo);
1166 		if (error)
1167 			goto out_locked;
1168 	}
1169 
1170 	/*
1171 	 * So far, so good: perform the layer 3 join, layer 2 join,
1172 	 * and make an IGMP announcement if needed.
1173 	 */
1174 	inm = in_addmulti(&gsa->sin.sin_addr, ifp);
1175 	if (inm == NULL) {
1176 		error = ENOBUFS;
1177 		goto out_locked;
1178 	}
1179 	idx = imo->imo_num_memberships;
1180 	imo->imo_membership[idx] = inm;
1181 	imo->imo_num_memberships++;
1182 
1183 	KASSERT(imo->imo_mfilters != NULL,
1184 	    ("%s: imf_mfilters vector was not allocated", __func__));
1185 	imf = &imo->imo_mfilters[idx];
1186 	KASSERT(TAILQ_EMPTY(&imf->imf_sources),
1187 	    ("%s: imf_sources not empty", __func__));
1188 
1189 	/*
1190 	 * If this is a new SSM group join (i.e. a source was specified
1191 	 * with this group), add this source to the filter list.
1192 	 */
1193 	if (ssa->ss.ss_family != AF_UNSPEC) {
1194 		/*
1195 		 * An initial SSM join implies that this socket's membership
1196 		 * of the multicast group is now in inclusive mode.
1197 		 */
1198 		imf->imf_fmode = MCAST_INCLUDE;
1199 
1200 		error = imo_join_source(imo, idx, ssa);
1201 		if (error) {
1202 			/*
1203 			 * Drop inp lock before calling in_delmulti(),
1204 			 * to prevent a lock order reversal.
1205 			 */
1206 			--imo->imo_num_memberships;
1207 			INP_WUNLOCK(inp);
1208 			in_delmulti(inm);
1209 			return (error);
1210 		}
1211 	}
1212 
1213 out_locked:
1214 	INP_WUNLOCK(inp);
1215 	return (error);
1216 }
1217 
1218 /*
1219  * Leave an IPv4 multicast group on an inpcb, possibly with a source.
1220  */
1221 static int
1222 inp_leave_group(struct inpcb *inp, struct sockopt *sopt)
1223 {
1224 	INIT_VNET_NET(curvnet);
1225 	INIT_VNET_INET(curvnet);
1226 	struct group_source_req		 gsr;
1227 	struct ip_mreq_source		 mreqs;
1228 	sockunion_t			*gsa, *ssa;
1229 	struct ifnet			*ifp;
1230 	struct in_mfilter		*imf;
1231 	struct ip_moptions		*imo;
1232 	struct in_msource		*ims, *tims;
1233 	struct in_multi			*inm;
1234 	size_t				 idx;
1235 	int				 error;
1236 
1237 	ifp = NULL;
1238 	error = 0;
1239 
1240 	memset(&gsr, 0, sizeof(struct group_source_req));
1241 	gsa = (sockunion_t *)&gsr.gsr_group;
1242 	gsa->ss.ss_family = AF_UNSPEC;
1243 	ssa = (sockunion_t *)&gsr.gsr_source;
1244 	ssa->ss.ss_family = AF_UNSPEC;
1245 
1246 	switch (sopt->sopt_name) {
1247 	case IP_DROP_MEMBERSHIP:
1248 	case IP_DROP_SOURCE_MEMBERSHIP:
1249 		if (sopt->sopt_name == IP_DROP_MEMBERSHIP) {
1250 			error = sooptcopyin(sopt, &mreqs,
1251 			    sizeof(struct ip_mreq),
1252 			    sizeof(struct ip_mreq));
1253 			/*
1254 			 * Swap interface and sourceaddr arguments,
1255 			 * as ip_mreq and ip_mreq_source are laid
1256 			 * out differently.
1257 			 */
1258 			mreqs.imr_interface = mreqs.imr_sourceaddr;
1259 			mreqs.imr_sourceaddr.s_addr = INADDR_ANY;
1260 		} else if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) {
1261 			error = sooptcopyin(sopt, &mreqs,
1262 			    sizeof(struct ip_mreq_source),
1263 			    sizeof(struct ip_mreq_source));
1264 		}
1265 		if (error)
1266 			return (error);
1267 
1268 		gsa->sin.sin_family = AF_INET;
1269 		gsa->sin.sin_len = sizeof(struct sockaddr_in);
1270 		gsa->sin.sin_addr = mreqs.imr_multiaddr;
1271 
1272 		if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) {
1273 			ssa->sin.sin_family = AF_INET;
1274 			ssa->sin.sin_len = sizeof(struct sockaddr_in);
1275 			ssa->sin.sin_addr = mreqs.imr_sourceaddr;
1276 		}
1277 
1278 		if (gsa->sin.sin_addr.s_addr != INADDR_ANY)
1279 			INADDR_TO_IFP(mreqs.imr_interface, ifp);
1280 
1281 #ifdef DIAGNOSTIC
1282 		if (bootverbose) {
1283 			printf("%s: imr_interface = %s, ifp = %p\n",
1284 			    __func__, inet_ntoa(mreqs.imr_interface), ifp);
1285 		}
1286 #endif
1287 		break;
1288 
1289 	case MCAST_LEAVE_GROUP:
1290 	case MCAST_LEAVE_SOURCE_GROUP:
1291 		if (sopt->sopt_name == MCAST_LEAVE_GROUP) {
1292 			error = sooptcopyin(sopt, &gsr,
1293 			    sizeof(struct group_req),
1294 			    sizeof(struct group_req));
1295 		} else if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
1296 			error = sooptcopyin(sopt, &gsr,
1297 			    sizeof(struct group_source_req),
1298 			    sizeof(struct group_source_req));
1299 		}
1300 		if (error)
1301 			return (error);
1302 
1303 		if (gsa->sin.sin_family != AF_INET ||
1304 		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
1305 			return (EINVAL);
1306 
1307 		if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
1308 			if (ssa->sin.sin_family != AF_INET ||
1309 			    ssa->sin.sin_len != sizeof(struct sockaddr_in))
1310 				return (EINVAL);
1311 		}
1312 
1313 		if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface)
1314 			return (EADDRNOTAVAIL);
1315 
1316 		ifp = ifnet_byindex(gsr.gsr_interface);
1317 		break;
1318 
1319 	default:
1320 #ifdef DIAGNOSTIC
1321 		if (bootverbose) {
1322 			printf("%s: unknown sopt_name %d\n", __func__,
1323 			    sopt->sopt_name);
1324 		}
1325 #endif
1326 		return (EOPNOTSUPP);
1327 		break;
1328 	}
1329 
1330 	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
1331 		return (EINVAL);
1332 
1333 	/*
1334 	 * Find the membership in the membership array.
1335 	 */
1336 	imo = inp_findmoptions(inp);
1337 	idx = imo_match_group(imo, ifp, &gsa->sa);
1338 	if (idx == -1) {
1339 		error = EADDRNOTAVAIL;
1340 		goto out_locked;
1341 	}
1342 	imf = &imo->imo_mfilters[idx];
1343 
1344 	/*
1345 	 * If we were instructed only to leave a given source, do so.
1346 	 */
1347 	if (ssa->ss.ss_family != AF_UNSPEC) {
1348 		if (imf->imf_nsources == 0 ||
1349 		    imf->imf_fmode == MCAST_EXCLUDE) {
1350 			/*
1351 			 * Attempting to SSM leave an ASM group
1352 			 * is an error; should use *_BLOCK_SOURCE instead.
1353 			 * Attempting to SSM leave a source in a group when
1354 			 * the socket is in 'exclude mode' is also an error.
1355 			 */
1356 			error = EINVAL;
1357 		} else {
1358 			error = imo_leave_source(imo, idx, ssa);
1359 		}
1360 		/*
1361 		 * If an error occurred, or this source is not the last
1362 		 * source in the group, do not leave the whole group.
1363 		 */
1364 		if (error || imf->imf_nsources > 0)
1365 			goto out_locked;
1366 	}
1367 
1368 	/*
1369 	 * Give up the multicast address record to which the membership points.
1370 	 */
1371 	inm = imo->imo_membership[idx];
1372 	in_delmulti(inm);
1373 
1374 	/*
1375 	 * Free any source filters for this group if they exist.
1376 	 * Revert inpcb to the default MCAST_EXCLUDE state.
1377 	 */
1378 	if (imo->imo_mfilters != NULL) {
1379 		TAILQ_FOREACH_SAFE(ims, &imf->imf_sources, ims_next, tims) {
1380 			TAILQ_REMOVE(&imf->imf_sources, ims, ims_next);
1381 			free(ims, M_IPMSOURCE);
1382 			imf->imf_nsources--;
1383 		}
1384 		KASSERT(imf->imf_nsources == 0,
1385 		    ("%s: imf_nsources not 0", __func__));
1386 		KASSERT(TAILQ_EMPTY(&imf->imf_sources),
1387 		    ("%s: imf_sources not empty", __func__));
1388 		imf->imf_fmode = MCAST_EXCLUDE;
1389 	}
1390 
1391 	/*
1392 	 * Remove the gap in the membership array.
1393 	 */
1394 	for (++idx; idx < imo->imo_num_memberships; ++idx)
1395 		imo->imo_membership[idx-1] = imo->imo_membership[idx];
1396 	imo->imo_num_memberships--;
1397 
1398 out_locked:
1399 	INP_WUNLOCK(inp);
1400 	return (error);
1401 }
1402 
1403 /*
1404  * Select the interface for transmitting IPv4 multicast datagrams.
1405  *
1406  * Either an instance of struct in_addr or an instance of struct ip_mreqn
1407  * may be passed to this socket option. An address of INADDR_ANY or an
1408  * interface index of 0 is used to remove a previous selection.
1409  * When no interface is selected, one is chosen for every send.
1410  */
1411 static int
1412 inp_set_multicast_if(struct inpcb *inp, struct sockopt *sopt)
1413 {
1414 	INIT_VNET_NET(curvnet);
1415 	struct in_addr		 addr;
1416 	struct ip_mreqn		 mreqn;
1417 	struct ifnet		*ifp;
1418 	struct ip_moptions	*imo;
1419 	int			 error;
1420 
1421 	if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) {
1422 		/*
1423 		 * An interface index was specified using the
1424 		 * Linux-derived ip_mreqn structure.
1425 		 */
1426 		error = sooptcopyin(sopt, &mreqn, sizeof(struct ip_mreqn),
1427 		    sizeof(struct ip_mreqn));
1428 		if (error)
1429 			return (error);
1430 
1431 		if (mreqn.imr_ifindex < 0 || V_if_index < mreqn.imr_ifindex)
1432 			return (EINVAL);
1433 
1434 		if (mreqn.imr_ifindex == 0) {
1435 			ifp = NULL;
1436 		} else {
1437 			ifp = ifnet_byindex(mreqn.imr_ifindex);
1438 			if (ifp == NULL)
1439 				return (EADDRNOTAVAIL);
1440 		}
1441 	} else {
1442 		/*
1443 		 * An interface was specified by IPv4 address.
1444 		 * This is the traditional BSD usage.
1445 		 */
1446 		error = sooptcopyin(sopt, &addr, sizeof(struct in_addr),
1447 		    sizeof(struct in_addr));
1448 		if (error)
1449 			return (error);
1450 		if (addr.s_addr == INADDR_ANY) {
1451 			ifp = NULL;
1452 		} else {
1453 			INADDR_TO_IFP(addr, ifp);
1454 			if (ifp == NULL)
1455 				return (EADDRNOTAVAIL);
1456 		}
1457 #ifdef DIAGNOSTIC
1458 		if (bootverbose) {
1459 			printf("%s: ifp = %p, addr = %s\n",
1460 			    __func__, ifp, inet_ntoa(addr)); /* XXX INET6 */
1461 		}
1462 #endif
1463 	}
1464 
1465 	/* Reject interfaces which do not support multicast. */
1466 	if (ifp != NULL && (ifp->if_flags & IFF_MULTICAST) == 0)
1467 		return (EOPNOTSUPP);
1468 
1469 	imo = inp_findmoptions(inp);
1470 	imo->imo_multicast_ifp = ifp;
1471 	imo->imo_multicast_addr.s_addr = INADDR_ANY;
1472 	INP_WUNLOCK(inp);
1473 
1474 	return (0);
1475 }
1476 
1477 /*
1478  * Atomically set source filters on a socket for an IPv4 multicast group.
1479  */
1480 static int
1481 inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt)
1482 {
1483 	INIT_VNET_NET(curvnet);
1484 	struct __msfilterreq	 msfr;
1485 	sockunion_t		*gsa;
1486 	struct ifnet		*ifp;
1487 	struct in_mfilter	*imf;
1488 	struct ip_moptions	*imo;
1489 	struct in_msource	*ims, *tims;
1490 	size_t			 idx;
1491 	int			 error;
1492 
1493 	error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq),
1494 	    sizeof(struct __msfilterreq));
1495 	if (error)
1496 		return (error);
1497 
1498 	if (msfr.msfr_nsrcs > IP_MAX_SOURCE_FILTER ||
1499 	    (msfr.msfr_fmode != MCAST_EXCLUDE &&
1500 	     msfr.msfr_fmode != MCAST_INCLUDE))
1501 		return (EINVAL);
1502 
1503 	if (msfr.msfr_group.ss_family != AF_INET ||
1504 	    msfr.msfr_group.ss_len != sizeof(struct sockaddr_in))
1505 		return (EINVAL);
1506 
1507 	gsa = (sockunion_t *)&msfr.msfr_group;
1508 	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
1509 		return (EINVAL);
1510 
1511 	gsa->sin.sin_port = 0;	/* ignore port */
1512 
1513 	if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex)
1514 		return (EADDRNOTAVAIL);
1515 
1516 	ifp = ifnet_byindex(msfr.msfr_ifindex);
1517 	if (ifp == NULL)
1518 		return (EADDRNOTAVAIL);
1519 
1520 	/*
1521 	 * Take the INP lock.
1522 	 * Check if this socket is a member of this group.
1523 	 */
1524 	imo = inp_findmoptions(inp);
1525 	idx = imo_match_group(imo, ifp, &gsa->sa);
1526 	if (idx == -1 || imo->imo_mfilters == NULL) {
1527 		error = EADDRNOTAVAIL;
1528 		goto out_locked;
1529 	}
1530 	imf = &imo->imo_mfilters[idx];
1531 
1532 #ifdef DIAGNOSTIC
1533 	if (bootverbose)
1534 		printf("%s: clearing source list\n", __func__);
1535 #endif
1536 
1537 	/*
1538 	 * Remove any existing source filters.
1539 	 */
1540 	TAILQ_FOREACH_SAFE(ims, &imf->imf_sources, ims_next, tims) {
1541 		TAILQ_REMOVE(&imf->imf_sources, ims, ims_next);
1542 		free(ims, M_IPMSOURCE);
1543 		imf->imf_nsources--;
1544 	}
1545 	KASSERT(imf->imf_nsources == 0,
1546 	    ("%s: source list not cleared", __func__));
1547 
1548 	/*
1549 	 * Apply any new source filters, if present.
1550 	 */
1551 	if (msfr.msfr_nsrcs > 0) {
1552 		struct in_msource	**pnims;
1553 		struct in_msource	*nims;
1554 		struct sockaddr_storage	*kss;
1555 		struct sockaddr_storage	*pkss;
1556 		sockunion_t		*psu;
1557 		int			 i, j;
1558 
1559 		/*
1560 		 * Drop the inp lock so we may sleep if we need to
1561 		 * in order to satisfy a malloc request.
1562 		 * We will re-take it before changing socket state.
1563 		 */
1564 		INP_WUNLOCK(inp);
1565 #ifdef DIAGNOSTIC
1566 		if (bootverbose) {
1567 			printf("%s: loading %lu source list entries\n",
1568 			    __func__, (unsigned long)msfr.msfr_nsrcs);
1569 		}
1570 #endif
1571 		/*
1572 		 * Make a copy of the user-space source vector so
1573 		 * that we may copy them with a single copyin. This
1574 		 * allows us to deal with page faults up-front.
1575 		 */
1576 		kss = malloc(sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
1577 		    M_TEMP, M_WAITOK);
1578 		error = copyin(msfr.msfr_srcs, kss,
1579 		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
1580 		if (error) {
1581 			free(kss, M_TEMP);
1582 			return (error);
1583 		}
1584 
1585 		/*
1586 		 * Perform argument checking on every sockaddr_storage
1587 		 * structure in the vector provided to us. Overwrite
1588 		 * fields which should not apply to source entries.
1589 		 * TODO: Check for duplicate sources on this pass.
1590 		 */
1591 		psu = (sockunion_t *)kss;
1592 		for (i = 0; i < msfr.msfr_nsrcs; i++, psu++) {
1593 			switch (psu->ss.ss_family) {
1594 			case AF_INET:
1595 				if (psu->sin.sin_len !=
1596 				    sizeof(struct sockaddr_in)) {
1597 					error = EINVAL;
1598 				} else {
1599 					psu->sin.sin_port = 0;
1600 				}
1601 				break;
1602 #ifdef notyet
1603 			case AF_INET6;
1604 				if (psu->sin6.sin6_len !=
1605 				    sizeof(struct sockaddr_in6)) {
1606 					error = EINVAL;
1607 				} else {
1608 					psu->sin6.sin6_port = 0;
1609 					psu->sin6.sin6_flowinfo = 0;
1610 				}
1611 				break;
1612 #endif
1613 			default:
1614 				error = EAFNOSUPPORT;
1615 				break;
1616 			}
1617 			if (error)
1618 				break;
1619 		}
1620 		if (error) {
1621 			free(kss, M_TEMP);
1622 			return (error);
1623 		}
1624 
1625 		/*
1626 		 * Allocate a block to track all the in_msource
1627 		 * entries we are about to allocate, in case we
1628 		 * abruptly need to free them.
1629 		 */
1630 		pnims = malloc(sizeof(struct in_msource *) * msfr.msfr_nsrcs,
1631 		    M_TEMP, M_WAITOK | M_ZERO);
1632 
1633 		/*
1634 		 * Allocate up to nsrcs individual chunks.
1635 		 * If we encounter an error, backtrack out of
1636 		 * all allocations cleanly; updates must be atomic.
1637 		 */
1638 		pkss = kss;
1639 		nims = NULL;
1640 		for (i = 0; i < msfr.msfr_nsrcs; i++, pkss++) {
1641 			nims = malloc(sizeof(struct in_msource) *
1642 			    msfr.msfr_nsrcs, M_IPMSOURCE, M_WAITOK | M_ZERO);
1643 			pnims[i] = nims;
1644 		}
1645 		if (i < msfr.msfr_nsrcs) {
1646 			for (j = 0; j < i; j++) {
1647 				if (pnims[j] != NULL)
1648 					free(pnims[j], M_IPMSOURCE);
1649 			}
1650 			free(pnims, M_TEMP);
1651 			free(kss, M_TEMP);
1652 			return (ENOBUFS);
1653 		}
1654 
1655 		INP_UNLOCK_ASSERT(inp);
1656 
1657 		/*
1658 		 * Finally, apply the filters to the socket.
1659 		 * Re-take the inp lock; we are changing socket state.
1660 		 */
1661 		pkss = kss;
1662 		INP_WLOCK(inp);
1663 		for (i = 0; i < msfr.msfr_nsrcs; i++, pkss++) {
1664 			memcpy(&(pnims[i]->ims_addr), pkss,
1665 			    sizeof(struct sockaddr_storage));
1666 			TAILQ_INSERT_TAIL(&imf->imf_sources, pnims[i],
1667 			    ims_next);
1668 			imf->imf_nsources++;
1669 		}
1670 		free(pnims, M_TEMP);
1671 		free(kss, M_TEMP);
1672 	}
1673 
1674 	/*
1675 	 * Update the filter mode on the socket before releasing the inpcb.
1676 	 */
1677 	INP_WLOCK_ASSERT(inp);
1678 	imf->imf_fmode = msfr.msfr_fmode;
1679 
1680 out_locked:
1681 	INP_WUNLOCK(inp);
1682 	return (error);
1683 }
1684 
1685 /*
1686  * Set the IP multicast options in response to user setsockopt().
1687  *
1688  * Many of the socket options handled in this function duplicate the
1689  * functionality of socket options in the regular unicast API. However,
1690  * it is not possible to merge the duplicate code, because the idempotence
1691  * of the IPv4 multicast part of the BSD Sockets API must be preserved;
1692  * the effects of these options must be treated as separate and distinct.
1693  */
1694 int
1695 inp_setmoptions(struct inpcb *inp, struct sockopt *sopt)
1696 {
1697 	struct ip_moptions	*imo;
1698 	int			 error;
1699 
1700 	error = 0;
1701 
1702 	/*
1703 	 * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
1704 	 * or is a divert socket, reject it.
1705 	 * XXX Unlocked read of inp_socket believed OK.
1706 	 */
1707 	if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
1708 	    (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
1709 	    inp->inp_socket->so_proto->pr_type != SOCK_DGRAM))
1710 		return (EOPNOTSUPP);
1711 
1712 	switch (sopt->sopt_name) {
1713 	case IP_MULTICAST_VIF: {
1714 		int vifi;
1715 		/*
1716 		 * Select a multicast VIF for transmission.
1717 		 * Only useful if multicast forwarding is active.
1718 		 */
1719 		if (legal_vif_num == NULL) {
1720 			error = EOPNOTSUPP;
1721 			break;
1722 		}
1723 		error = sooptcopyin(sopt, &vifi, sizeof(int), sizeof(int));
1724 		if (error)
1725 			break;
1726 		if (!legal_vif_num(vifi) && (vifi != -1)) {
1727 			error = EINVAL;
1728 			break;
1729 		}
1730 		imo = inp_findmoptions(inp);
1731 		imo->imo_multicast_vif = vifi;
1732 		INP_WUNLOCK(inp);
1733 		break;
1734 	}
1735 
1736 	case IP_MULTICAST_IF:
1737 		error = inp_set_multicast_if(inp, sopt);
1738 		break;
1739 
1740 	case IP_MULTICAST_TTL: {
1741 		u_char ttl;
1742 
1743 		/*
1744 		 * Set the IP time-to-live for outgoing multicast packets.
1745 		 * The original multicast API required a char argument,
1746 		 * which is inconsistent with the rest of the socket API.
1747 		 * We allow either a char or an int.
1748 		 */
1749 		if (sopt->sopt_valsize == sizeof(u_char)) {
1750 			error = sooptcopyin(sopt, &ttl, sizeof(u_char),
1751 			    sizeof(u_char));
1752 			if (error)
1753 				break;
1754 		} else {
1755 			u_int ittl;
1756 
1757 			error = sooptcopyin(sopt, &ittl, sizeof(u_int),
1758 			    sizeof(u_int));
1759 			if (error)
1760 				break;
1761 			if (ittl > 255) {
1762 				error = EINVAL;
1763 				break;
1764 			}
1765 			ttl = (u_char)ittl;
1766 		}
1767 		imo = inp_findmoptions(inp);
1768 		imo->imo_multicast_ttl = ttl;
1769 		INP_WUNLOCK(inp);
1770 		break;
1771 	}
1772 
1773 	case IP_MULTICAST_LOOP: {
1774 		u_char loop;
1775 
1776 		/*
1777 		 * Set the loopback flag for outgoing multicast packets.
1778 		 * Must be zero or one.  The original multicast API required a
1779 		 * char argument, which is inconsistent with the rest
1780 		 * of the socket API.  We allow either a char or an int.
1781 		 */
1782 		if (sopt->sopt_valsize == sizeof(u_char)) {
1783 			error = sooptcopyin(sopt, &loop, sizeof(u_char),
1784 			    sizeof(u_char));
1785 			if (error)
1786 				break;
1787 		} else {
1788 			u_int iloop;
1789 
1790 			error = sooptcopyin(sopt, &iloop, sizeof(u_int),
1791 					    sizeof(u_int));
1792 			if (error)
1793 				break;
1794 			loop = (u_char)iloop;
1795 		}
1796 		imo = inp_findmoptions(inp);
1797 		imo->imo_multicast_loop = !!loop;
1798 		INP_WUNLOCK(inp);
1799 		break;
1800 	}
1801 
1802 	case IP_ADD_MEMBERSHIP:
1803 	case IP_ADD_SOURCE_MEMBERSHIP:
1804 	case MCAST_JOIN_GROUP:
1805 	case MCAST_JOIN_SOURCE_GROUP:
1806 		error = inp_join_group(inp, sopt);
1807 		break;
1808 
1809 	case IP_DROP_MEMBERSHIP:
1810 	case IP_DROP_SOURCE_MEMBERSHIP:
1811 	case MCAST_LEAVE_GROUP:
1812 	case MCAST_LEAVE_SOURCE_GROUP:
1813 		error = inp_leave_group(inp, sopt);
1814 		break;
1815 
1816 	case IP_BLOCK_SOURCE:
1817 	case IP_UNBLOCK_SOURCE:
1818 	case MCAST_BLOCK_SOURCE:
1819 	case MCAST_UNBLOCK_SOURCE:
1820 		error = inp_change_source_filter(inp, sopt);
1821 		break;
1822 
1823 	case IP_MSFILTER:
1824 		error = inp_set_source_filters(inp, sopt);
1825 		break;
1826 
1827 	default:
1828 		error = EOPNOTSUPP;
1829 		break;
1830 	}
1831 
1832 	INP_UNLOCK_ASSERT(inp);
1833 
1834 	return (error);
1835 }
1836