xref: /freebsd/sys/netinet/in_mcast.c (revision 1e413cf93298b5b97441a21d9a50fdcd0ee9945e)
1 /*-
2  * Copyright (c) 2007 Bruce M. Simpson.
3  * Copyright (c) 2005 Robert N. M. Watson.
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote
15  *    products derived from this software without specific prior written
16  *    permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 /*
32  * IPv4 multicast socket, group, and socket option processing module.
33  * Until further notice, this file requires INET to compile.
34  * TODO: Make this infrastructure independent of address family.
35  * TODO: Teach netinet6 to use this code.
36  * TODO: Hook up SSM logic to IGMPv3/MLDv2.
37  */
38 
39 #include <sys/cdefs.h>
40 __FBSDID("$FreeBSD$");
41 
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/malloc.h>
46 #include <sys/mbuf.h>
47 #include <sys/protosw.h>
48 #include <sys/socket.h>
49 #include <sys/socketvar.h>
50 #include <sys/sysctl.h>
51 
52 #include <net/if.h>
53 #include <net/if_dl.h>
54 #include <net/route.h>
55 
56 #include <netinet/in.h>
57 #include <netinet/in_systm.h>
58 #include <netinet/in_pcb.h>
59 #include <netinet/in_var.h>
60 #include <netinet/ip_var.h>
61 #include <netinet/igmp_var.h>
62 
63 #ifndef __SOCKUNION_DECLARED
64 union sockunion {
65 	struct sockaddr_storage	ss;
66 	struct sockaddr		sa;
67 	struct sockaddr_dl	sdl;
68 	struct sockaddr_in	sin;
69 #ifdef INET6
70 	struct sockaddr_in6	sin6;
71 #endif
72 };
73 typedef union sockunion sockunion_t;
74 #define __SOCKUNION_DECLARED
75 #endif /* __SOCKUNION_DECLARED */
76 
77 static MALLOC_DEFINE(M_IPMADDR, "in_multi", "IPv4 multicast group");
78 static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "IPv4 multicast options");
79 static MALLOC_DEFINE(M_IPMSOURCE, "in_msource", "IPv4 multicast source filter");
80 
81 /*
82  * The IPv4 multicast list (in_multihead and associated structures) are
83  * protected by the global in_multi_mtx.  See in_var.h for more details.  For
84  * now, in_multi_mtx is marked as recursible due to IGMP's calling back into
85  * ip_output() to send IGMP packets while holding the lock; this probably is
86  * not quite desirable.
87  */
88 struct in_multihead in_multihead;	/* XXX BSS initialization */
89 struct mtx in_multi_mtx;
90 MTX_SYSINIT(in_multi_mtx, &in_multi_mtx, "in_multi_mtx", MTX_DEF | MTX_RECURSE);
91 
92 /*
93  * Functions with non-static linkage defined in this file should be
94  * declared in in_var.h:
95  *  imo_match_group()
96  *  imo_match_source()
97  *  in_addmulti()
98  *  in_delmulti()
99  *  in_delmulti_locked()
100  * and ip_var.h:
101  *  inp_freemoptions()
102  *  inp_getmoptions()
103  *  inp_setmoptions()
104  */
105 static int	imo_grow(struct ip_moptions *);
106 static int	imo_join_source(struct ip_moptions *, size_t, sockunion_t *);
107 static int	imo_leave_source(struct ip_moptions *, size_t, sockunion_t *);
108 static int	inp_change_source_filter(struct inpcb *, struct sockopt *);
109 static struct ip_moptions *
110 		inp_findmoptions(struct inpcb *);
111 static int	inp_get_source_filters(struct inpcb *, struct sockopt *);
112 static int	inp_join_group(struct inpcb *, struct sockopt *);
113 static int	inp_leave_group(struct inpcb *, struct sockopt *);
114 static int	inp_set_multicast_if(struct inpcb *, struct sockopt *);
115 static int	inp_set_source_filters(struct inpcb *, struct sockopt *);
116 
117 /*
118  * Resize the ip_moptions vector to the next power-of-two minus 1.
119  * May be called with locks held; do not sleep.
120  */
121 static int
122 imo_grow(struct ip_moptions *imo)
123 {
124 	struct in_multi		**nmships;
125 	struct in_multi		**omships;
126 	struct in_mfilter	 *nmfilters;
127 	struct in_mfilter	 *omfilters;
128 	size_t			  idx;
129 	size_t			  newmax;
130 	size_t			  oldmax;
131 
132 	nmships = NULL;
133 	nmfilters = NULL;
134 	omships = imo->imo_membership;
135 	omfilters = imo->imo_mfilters;
136 	oldmax = imo->imo_max_memberships;
137 	newmax = ((oldmax + 1) * 2) - 1;
138 
139 	if (newmax <= IP_MAX_MEMBERSHIPS) {
140 		nmships = (struct in_multi **)realloc(omships,
141 		    sizeof(struct in_multi *) * newmax, M_IPMOPTS, M_NOWAIT);
142 		nmfilters = (struct in_mfilter *)realloc(omfilters,
143 		    sizeof(struct in_mfilter) * newmax, M_IPMSOURCE, M_NOWAIT);
144 		if (nmships != NULL && nmfilters != NULL) {
145 			/* Initialize newly allocated source filter heads. */
146 			for (idx = oldmax; idx < newmax; idx++) {
147 				nmfilters[idx].imf_fmode = MCAST_EXCLUDE;
148 				nmfilters[idx].imf_nsources = 0;
149 				TAILQ_INIT(&nmfilters[idx].imf_sources);
150 			}
151 			imo->imo_max_memberships = newmax;
152 			imo->imo_membership = nmships;
153 			imo->imo_mfilters = nmfilters;
154 		}
155 	}
156 
157 	if (nmships == NULL || nmfilters == NULL) {
158 		if (nmships != NULL)
159 			free(nmships, M_IPMOPTS);
160 		if (nmfilters != NULL)
161 			free(nmfilters, M_IPMSOURCE);
162 		return (ETOOMANYREFS);
163 	}
164 
165 	return (0);
166 }
167 
168 /*
169  * Add a source to a multicast filter list.
170  * Assumes the associated inpcb is locked.
171  */
172 static int
173 imo_join_source(struct ip_moptions *imo, size_t gidx, sockunion_t *src)
174 {
175 	struct in_msource	*ims, *nims;
176 	struct in_mfilter	*imf;
177 
178 	KASSERT(src->ss.ss_family == AF_INET, ("%s: !AF_INET", __func__));
179 	KASSERT(imo->imo_mfilters != NULL,
180 	    ("%s: imo_mfilters vector not allocated", __func__));
181 
182 	imf = &imo->imo_mfilters[gidx];
183 	if (imf->imf_nsources == IP_MAX_SOURCE_FILTER)
184 		return (ENOBUFS);
185 
186 	ims = imo_match_source(imo, gidx, &src->sa);
187 	if (ims != NULL)
188 		return (EADDRNOTAVAIL);
189 
190 	/* Do not sleep with inp lock held. */
191 	MALLOC(nims, struct in_msource *, sizeof(struct in_msource),
192 	    M_IPMSOURCE, M_NOWAIT | M_ZERO);
193 	if (nims == NULL)
194 		return (ENOBUFS);
195 
196 	nims->ims_addr = src->ss;
197 	TAILQ_INSERT_TAIL(&imf->imf_sources, nims, ims_next);
198 	imf->imf_nsources++;
199 
200 	return (0);
201 }
202 
203 static int
204 imo_leave_source(struct ip_moptions *imo, size_t gidx, sockunion_t *src)
205 {
206 	struct in_msource	*ims;
207 	struct in_mfilter	*imf;
208 
209 	KASSERT(src->ss.ss_family == AF_INET, ("%s: !AF_INET", __func__));
210 	KASSERT(imo->imo_mfilters != NULL,
211 	    ("%s: imo_mfilters vector not allocated", __func__));
212 
213 	imf = &imo->imo_mfilters[gidx];
214 	if (imf->imf_nsources == IP_MAX_SOURCE_FILTER)
215 		return (ENOBUFS);
216 
217 	ims = imo_match_source(imo, gidx, &src->sa);
218 	if (ims == NULL)
219 		return (EADDRNOTAVAIL);
220 
221 	TAILQ_REMOVE(&imf->imf_sources, ims, ims_next);
222 	FREE(ims, M_IPMSOURCE);
223 	imf->imf_nsources--;
224 
225 	return (0);
226 }
227 
228 /*
229  * Find an IPv4 multicast group entry for this ip_moptions instance
230  * which matches the specified group, and optionally an interface.
231  * Return its index into the array, or -1 if not found.
232  */
233 size_t
234 imo_match_group(struct ip_moptions *imo, struct ifnet *ifp,
235     struct sockaddr *group)
236 {
237 	sockunion_t	 *gsa;
238 	struct in_multi	**pinm;
239 	int		  idx;
240 	int		  nmships;
241 
242 	gsa = (sockunion_t *)group;
243 
244 	/* The imo_membership array may be lazy allocated. */
245 	if (imo->imo_membership == NULL || imo->imo_num_memberships == 0)
246 		return (-1);
247 
248 	nmships = imo->imo_num_memberships;
249 	pinm = &imo->imo_membership[0];
250 	for (idx = 0; idx < nmships; idx++, pinm++) {
251 		if (*pinm == NULL)
252 			continue;
253 #if 0
254 		printf("%s: trying ifp = %p, inaddr = %s ", __func__,
255 		    ifp, inet_ntoa(gsa->sin.sin_addr));
256 		printf("against %p, %s\n",
257 		    (*pinm)->inm_ifp, inet_ntoa((*pinm)->inm_addr));
258 #endif
259 		if ((ifp == NULL || ((*pinm)->inm_ifp == ifp)) &&
260 		    (*pinm)->inm_addr.s_addr == gsa->sin.sin_addr.s_addr) {
261 			break;
262 		}
263 	}
264 	if (idx >= nmships)
265 		idx = -1;
266 
267 	return (idx);
268 }
269 
270 /*
271  * Find a multicast source entry for this imo which matches
272  * the given group index for this socket, and source address.
273  */
274 struct in_msource *
275 imo_match_source(struct ip_moptions *imo, size_t gidx, struct sockaddr *src)
276 {
277 	struct in_mfilter	*imf;
278 	struct in_msource	*ims, *pims;
279 
280 	KASSERT(src->sa_family == AF_INET, ("%s: !AF_INET", __func__));
281 	KASSERT(gidx != -1 && gidx < imo->imo_num_memberships,
282 	    ("%s: invalid index %d\n", __func__, (int)gidx));
283 
284 	/* The imo_mfilters array may be lazy allocated. */
285 	if (imo->imo_mfilters == NULL)
286 		return (NULL);
287 
288 	pims = NULL;
289 	imf = &imo->imo_mfilters[gidx];
290 	TAILQ_FOREACH(ims, &imf->imf_sources, ims_next) {
291 		/*
292 		 * Perform bitwise comparison of two IPv4 addresses.
293 		 * TODO: Do the same for IPv6.
294 		 * Do not use sa_equal() for this as it is not aware of
295 		 * deeper structure in sockaddr_in or sockaddr_in6.
296 		 */
297 		if (((struct sockaddr_in *)&ims->ims_addr)->sin_addr.s_addr ==
298 		    ((struct sockaddr_in *)src)->sin_addr.s_addr) {
299 			pims = ims;
300 			break;
301 		}
302 	}
303 
304 	return (pims);
305 }
306 
307 /*
308  * Join an IPv4 multicast group.
309  */
310 struct in_multi *
311 in_addmulti(struct in_addr *ap, struct ifnet *ifp)
312 {
313 	struct in_multi *inm;
314 
315 	inm = NULL;
316 
317 	IFF_LOCKGIANT(ifp);
318 	IN_MULTI_LOCK();
319 
320 	IN_LOOKUP_MULTI(*ap, ifp, inm);
321 	if (inm != NULL) {
322 		/*
323 		 * If we already joined this group, just bump the
324 		 * refcount and return it.
325 		 */
326 		KASSERT(inm->inm_refcount >= 1,
327 		    ("%s: bad refcount %d", __func__, inm->inm_refcount));
328 		++inm->inm_refcount;
329 	} else do {
330 		sockunion_t		 gsa;
331 		struct ifmultiaddr	*ifma;
332 		struct in_multi		*ninm;
333 		int			 error;
334 
335 		memset(&gsa, 0, sizeof(gsa));
336 		gsa.sin.sin_family = AF_INET;
337 		gsa.sin.sin_len = sizeof(struct sockaddr_in);
338 		gsa.sin.sin_addr = *ap;
339 
340 		/*
341 		 * Check if a link-layer group is already associated
342 		 * with this network-layer group on the given ifnet.
343 		 * If so, bump the refcount on the existing network-layer
344 		 * group association and return it.
345 		 */
346 		error = if_addmulti(ifp, &gsa.sa, &ifma);
347 		if (error)
348 			break;
349 		if (ifma->ifma_protospec != NULL) {
350 			inm = (struct in_multi *)ifma->ifma_protospec;
351 #ifdef INVARIANTS
352 			if (inm->inm_ifma != ifma || inm->inm_ifp != ifp ||
353 			    inm->inm_addr.s_addr != ap->s_addr)
354 				panic("%s: ifma is inconsistent", __func__);
355 #endif
356 			++inm->inm_refcount;
357 			break;
358 		}
359 
360 		/*
361 		 * A new membership is needed; construct it and
362 		 * perform the IGMP join.
363 		 */
364 		ninm = malloc(sizeof(*ninm), M_IPMADDR, M_NOWAIT | M_ZERO);
365 		if (ninm == NULL) {
366 			if_delmulti_ifma(ifma);
367 			break;
368 		}
369 		ninm->inm_addr = *ap;
370 		ninm->inm_ifp = ifp;
371 		ninm->inm_ifma = ifma;
372 		ninm->inm_refcount = 1;
373 		ifma->ifma_protospec = ninm;
374 		LIST_INSERT_HEAD(&in_multihead, ninm, inm_link);
375 
376 		igmp_joingroup(ninm);
377 
378 		inm = ninm;
379 	} while (0);
380 
381 	IN_MULTI_UNLOCK();
382 	IFF_UNLOCKGIANT(ifp);
383 
384 	return (inm);
385 }
386 
387 /*
388  * Leave an IPv4 multicast group.
389  * It is OK to call this routine if the underlying ifnet went away.
390  *
391  * XXX: To deal with the ifp going away, we cheat; the link-layer code in net
392  * will set ifma_ifp to NULL when the associated ifnet instance is detached
393  * from the system.
394  *
395  * The only reason we need to violate layers and check ifma_ifp here at all
396  * is because certain hardware drivers still require Giant to be held,
397  * and it must always be taken before other locks.
398  */
399 void
400 in_delmulti(struct in_multi *inm)
401 {
402 	struct ifnet *ifp;
403 
404 	KASSERT(inm != NULL, ("%s: inm is NULL", __func__));
405 	KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__));
406 	ifp = inm->inm_ifma->ifma_ifp;
407 
408 	if (ifp != NULL) {
409 		/*
410 		 * Sanity check that netinet's notion of ifp is the
411 		 * same as net's.
412 		 */
413 		KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__));
414 		IFF_LOCKGIANT(ifp);
415 	}
416 
417 	IN_MULTI_LOCK();
418 	in_delmulti_locked(inm);
419 	IN_MULTI_UNLOCK();
420 
421 	if (ifp != NULL)
422 		IFF_UNLOCKGIANT(ifp);
423 }
424 
425 /*
426  * Delete a multicast address record, with locks held.
427  *
428  * It is OK to call this routine if the ifp went away.
429  * Assumes that caller holds the IN_MULTI lock, and that
430  * Giant was taken before other locks if required by the hardware.
431  */
432 void
433 in_delmulti_locked(struct in_multi *inm)
434 {
435 	struct ifmultiaddr *ifma;
436 
437 	IN_MULTI_LOCK_ASSERT();
438 	KASSERT(inm->inm_refcount >= 1, ("%s: freeing freed inm", __func__));
439 
440 	if (--inm->inm_refcount == 0) {
441 		igmp_leavegroup(inm);
442 
443 		ifma = inm->inm_ifma;
444 #ifdef DIAGNOSTIC
445 		if (bootverbose)
446 			printf("%s: purging ifma %p\n", __func__, ifma);
447 #endif
448 		KASSERT(ifma->ifma_protospec == inm,
449 		    ("%s: ifma_protospec != inm", __func__));
450 		ifma->ifma_protospec = NULL;
451 
452 		LIST_REMOVE(inm, inm_link);
453 		free(inm, M_IPMADDR);
454 
455 		if_delmulti_ifma(ifma);
456 	}
457 }
458 
459 /*
460  * Block or unblock an ASM/SSM multicast source on an inpcb.
461  */
462 static int
463 inp_change_source_filter(struct inpcb *inp, struct sockopt *sopt)
464 {
465 	struct group_source_req		 gsr;
466 	sockunion_t			*gsa, *ssa;
467 	struct ifnet			*ifp;
468 	struct in_mfilter		*imf;
469 	struct ip_moptions		*imo;
470 	struct in_msource		*ims;
471 	size_t				 idx;
472 	int				 error;
473 	int				 block;
474 
475 	ifp = NULL;
476 	error = 0;
477 	block = 0;
478 
479 	memset(&gsr, 0, sizeof(struct group_source_req));
480 	gsa = (sockunion_t *)&gsr.gsr_group;
481 	ssa = (sockunion_t *)&gsr.gsr_source;
482 
483 	switch (sopt->sopt_name) {
484 	case IP_BLOCK_SOURCE:
485 	case IP_UNBLOCK_SOURCE: {
486 		struct ip_mreq_source	 mreqs;
487 
488 		error = sooptcopyin(sopt, &mreqs,
489 		    sizeof(struct ip_mreq_source),
490 		    sizeof(struct ip_mreq_source));
491 		if (error)
492 			return (error);
493 
494 		gsa->sin.sin_family = AF_INET;
495 		gsa->sin.sin_len = sizeof(struct sockaddr_in);
496 		gsa->sin.sin_addr = mreqs.imr_multiaddr;
497 
498 		ssa->sin.sin_family = AF_INET;
499 		ssa->sin.sin_len = sizeof(struct sockaddr_in);
500 		ssa->sin.sin_addr = mreqs.imr_sourceaddr;
501 
502 		if (mreqs.imr_interface.s_addr != INADDR_ANY)
503 			INADDR_TO_IFP(mreqs.imr_interface, ifp);
504 
505 		if (sopt->sopt_name == IP_BLOCK_SOURCE)
506 			block = 1;
507 
508 #ifdef DIAGNOSTIC
509 		if (bootverbose) {
510 			printf("%s: imr_interface = %s, ifp = %p\n",
511 			    __func__, inet_ntoa(mreqs.imr_interface), ifp);
512 		}
513 #endif
514 		break;
515 	    }
516 
517 	case MCAST_BLOCK_SOURCE:
518 	case MCAST_UNBLOCK_SOURCE:
519 		error = sooptcopyin(sopt, &gsr,
520 		    sizeof(struct group_source_req),
521 		    sizeof(struct group_source_req));
522 		if (error)
523 			return (error);
524 
525 		if (gsa->sin.sin_family != AF_INET ||
526 		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
527 			return (EINVAL);
528 
529 		if (ssa->sin.sin_family != AF_INET ||
530 		    ssa->sin.sin_len != sizeof(struct sockaddr_in))
531 			return (EINVAL);
532 
533 		if (gsr.gsr_interface == 0 || if_index < gsr.gsr_interface)
534 			return (EADDRNOTAVAIL);
535 
536 		ifp = ifnet_byindex(gsr.gsr_interface);
537 
538 		if (sopt->sopt_name == MCAST_BLOCK_SOURCE)
539 			block = 1;
540 		break;
541 
542 	default:
543 #ifdef DIAGNOSTIC
544 		if (bootverbose) {
545 			printf("%s: unknown sopt_name %d\n", __func__,
546 			    sopt->sopt_name);
547 		}
548 #endif
549 		return (EOPNOTSUPP);
550 		break;
551 	}
552 
553 	/* XXX INET6 */
554 	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
555 		return (EINVAL);
556 
557 	/*
558 	 * Check if we are actually a member of this group.
559 	 */
560 	imo = inp_findmoptions(inp);
561 	idx = imo_match_group(imo, ifp, &gsa->sa);
562 	if (idx == -1 || imo->imo_mfilters == NULL) {
563 		error = EADDRNOTAVAIL;
564 		goto out_locked;
565 	}
566 
567 	KASSERT(imo->imo_mfilters != NULL,
568 	    ("%s: imo_mfilters not allocated", __func__));
569 	imf = &imo->imo_mfilters[idx];
570 
571 	/*
572 	 * SSM multicast truth table for block/unblock operations.
573 	 *
574 	 * Operation   Filter Mode  Entry exists?   Action
575 	 *
576 	 * block       exclude      no              add source to filter
577 	 * unblock     include      no              add source to filter
578 	 * block       include      no              EINVAL
579 	 * unblock     exclude      no              EINVAL
580 	 * block       exclude      yes             EADDRNOTAVAIL
581 	 * unblock     include      yes             EADDRNOTAVAIL
582 	 * block       include      yes             remove source from filter
583 	 * unblock     exclude      yes             remove source from filter
584 	 *
585 	 * FreeBSD does not explicitly distinguish between ASM and SSM
586 	 * mode sockets; all sockets are assumed to have a filter list.
587 	 */
588 #ifdef DIAGNOSTIC
589 	if (bootverbose) {
590 		printf("%s: imf_fmode is %s\n", __func__,
591 		    imf->imf_fmode == MCAST_INCLUDE ? "include" : "exclude");
592 	}
593 #endif
594 	ims = imo_match_source(imo, idx, &ssa->sa);
595 	if (ims == NULL) {
596 		if ((block == 1 && imf->imf_fmode == MCAST_EXCLUDE) ||
597 		    (block == 0 && imf->imf_fmode == MCAST_INCLUDE)) {
598 #ifdef DIAGNOSTIC
599 			if (bootverbose) {
600 				printf("%s: adding %s to filter list\n",
601 				    __func__, inet_ntoa(ssa->sin.sin_addr));
602 			}
603 #endif
604 			error = imo_join_source(imo, idx, ssa);
605 		}
606 		if ((block == 1 && imf->imf_fmode == MCAST_INCLUDE) ||
607 		    (block == 0 && imf->imf_fmode == MCAST_EXCLUDE)) {
608 			/*
609 			 * If the socket is in inclusive mode:
610 			 *  the source is already blocked as it has no entry.
611 			 * If the socket is in exclusive mode:
612 			 *  the source is already unblocked as it has no entry.
613 			 */
614 #ifdef DIAGNOSTIC
615 			if (bootverbose) {
616 				printf("%s: ims %p; %s already [un]blocked\n",
617 				    __func__, ims,
618 				    inet_ntoa(ssa->sin.sin_addr));
619 			}
620 #endif
621 			error = EINVAL;
622 		}
623 	} else {
624 		if ((block == 1 && imf->imf_fmode == MCAST_EXCLUDE) ||
625 		    (block == 0 && imf->imf_fmode == MCAST_INCLUDE)) {
626 			/*
627 			 * If the socket is in exclusive mode:
628 			 *  the source is already blocked as it has an entry.
629 			 * If the socket is in inclusive mode:
630 			 *  the source is already unblocked as it has an entry.
631 			 */
632 #ifdef DIAGNOSTIC
633 			if (bootverbose) {
634 				printf("%s: ims %p; %s already [un]blocked\n",
635 				    __func__, ims,
636 				    inet_ntoa(ssa->sin.sin_addr));
637 			}
638 #endif
639 			error = EADDRNOTAVAIL;
640 		}
641 		if ((block == 1 && imf->imf_fmode == MCAST_INCLUDE) ||
642 		    (block == 0 && imf->imf_fmode == MCAST_EXCLUDE)) {
643 #ifdef DIAGNOSTIC
644 			if (bootverbose) {
645 				printf("%s: removing %s from filter list\n",
646 				    __func__, inet_ntoa(ssa->sin.sin_addr));
647 			}
648 #endif
649 			error = imo_leave_source(imo, idx, ssa);
650 		}
651 	}
652 
653 out_locked:
654 	INP_UNLOCK(inp);
655 	return (error);
656 }
657 
658 /*
659  * Given an inpcb, return its multicast options structure pointer.  Accepts
660  * an unlocked inpcb pointer, but will return it locked.  May sleep.
661  */
662 static struct ip_moptions *
663 inp_findmoptions(struct inpcb *inp)
664 {
665 	struct ip_moptions	 *imo;
666 	struct in_multi		**immp;
667 	struct in_mfilter	 *imfp;
668 	size_t			  idx;
669 
670 	INP_LOCK(inp);
671 	if (inp->inp_moptions != NULL)
672 		return (inp->inp_moptions);
673 
674 	INP_UNLOCK(inp);
675 
676 	imo = (struct ip_moptions *)malloc(sizeof(*imo), M_IPMOPTS,
677 	    M_WAITOK);
678 	immp = (struct in_multi **)malloc(sizeof(*immp) * IP_MIN_MEMBERSHIPS,
679 	    M_IPMOPTS, M_WAITOK | M_ZERO);
680 	imfp = (struct in_mfilter *)malloc(
681 	    sizeof(struct in_mfilter) * IP_MIN_MEMBERSHIPS,
682 	    M_IPMSOURCE, M_WAITOK);
683 
684 	imo->imo_multicast_ifp = NULL;
685 	imo->imo_multicast_addr.s_addr = INADDR_ANY;
686 	imo->imo_multicast_vif = -1;
687 	imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
688 	imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
689 	imo->imo_num_memberships = 0;
690 	imo->imo_max_memberships = IP_MIN_MEMBERSHIPS;
691 	imo->imo_membership = immp;
692 
693 	/* Initialize per-group source filters. */
694 	for (idx = 0; idx < IP_MIN_MEMBERSHIPS; idx++) {
695 		imfp[idx].imf_fmode = MCAST_EXCLUDE;
696 		imfp[idx].imf_nsources = 0;
697 		TAILQ_INIT(&imfp[idx].imf_sources);
698 	}
699 	imo->imo_mfilters = imfp;
700 
701 	INP_LOCK(inp);
702 	if (inp->inp_moptions != NULL) {
703 		free(imfp, M_IPMSOURCE);
704 		free(immp, M_IPMOPTS);
705 		free(imo, M_IPMOPTS);
706 		return (inp->inp_moptions);
707 	}
708 	inp->inp_moptions = imo;
709 	return (imo);
710 }
711 
712 /*
713  * Discard the IP multicast options (and source filters).
714  */
715 void
716 inp_freemoptions(struct ip_moptions *imo)
717 {
718 	struct in_mfilter	*imf;
719 	struct in_msource	*ims, *tims;
720 	size_t			 idx, nmships;
721 
722 	KASSERT(imo != NULL, ("%s: ip_moptions is NULL", __func__));
723 
724 	nmships = imo->imo_num_memberships;
725 	for (idx = 0; idx < nmships; ++idx) {
726 		in_delmulti(imo->imo_membership[idx]);
727 
728 		if (imo->imo_mfilters != NULL) {
729 			imf = &imo->imo_mfilters[idx];
730 			TAILQ_FOREACH_SAFE(ims, &imf->imf_sources,
731 			    ims_next, tims) {
732 				TAILQ_REMOVE(&imf->imf_sources, ims, ims_next);
733 				FREE(ims, M_IPMSOURCE);
734 				imf->imf_nsources--;
735 			}
736 			KASSERT(imf->imf_nsources == 0,
737 			    ("%s: did not free all imf_nsources", __func__));
738 		}
739 	}
740 
741 	if (imo->imo_mfilters != NULL)
742 		free(imo->imo_mfilters, M_IPMSOURCE);
743 	free(imo->imo_membership, M_IPMOPTS);
744 	free(imo, M_IPMOPTS);
745 }
746 
747 /*
748  * Atomically get source filters on a socket for an IPv4 multicast group.
749  * Called with INP lock held; returns with lock released.
750  */
751 static int
752 inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt)
753 {
754 	struct __msfilterreq	 msfr;
755 	sockunion_t		*gsa;
756 	struct ifnet		*ifp;
757 	struct ip_moptions	*imo;
758 	struct in_mfilter	*imf;
759 	struct in_msource	*ims;
760 	struct sockaddr_storage	*ptss;
761 	struct sockaddr_storage	*tss;
762 	int			 error;
763 	size_t			 idx;
764 
765 	INP_LOCK_ASSERT(inp);
766 
767 	imo = inp->inp_moptions;
768 	KASSERT(imo != NULL, ("%s: null ip_moptions", __func__));
769 
770 	INP_UNLOCK(inp);
771 
772 	error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq),
773 	    sizeof(struct __msfilterreq));
774 	if (error)
775 		return (error);
776 
777 	if (msfr.msfr_ifindex == 0 || if_index < msfr.msfr_ifindex)
778 		return (EINVAL);
779 
780 	ifp = ifnet_byindex(msfr.msfr_ifindex);
781 	if (ifp == NULL)
782 		return (EINVAL);
783 
784 	INP_LOCK(inp);
785 
786 	/*
787 	 * Lookup group on the socket.
788 	 */
789 	gsa = (sockunion_t *)&msfr.msfr_group;
790 	idx = imo_match_group(imo, ifp, &gsa->sa);
791 	if (idx == -1 || imo->imo_mfilters == NULL) {
792 		INP_UNLOCK(inp);
793 		return (EADDRNOTAVAIL);
794 	}
795 
796 	imf = &imo->imo_mfilters[idx];
797 	msfr.msfr_fmode = imf->imf_fmode;
798 	msfr.msfr_nsrcs = imf->imf_nsources;
799 
800 	/*
801 	 * If the user specified a buffer, copy out the source filter
802 	 * entries to userland gracefully.
803 	 * msfr.msfr_nsrcs is always set to the total number of filter
804 	 * entries which the kernel currently has for this group.
805 	 */
806 	tss = NULL;
807 	if (msfr.msfr_srcs != NULL && msfr.msfr_nsrcs > 0) {
808 		/*
809 		 * Make a copy of the source vector so that we do not
810 		 * thrash the inpcb lock whilst copying it out.
811 		 * We only copy out the number of entries which userland
812 		 * has asked for, but we always tell userland how big the
813 		 * buffer really needs to be.
814 		 */
815 		MALLOC(tss, struct sockaddr_storage *,
816 		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
817 		    M_TEMP, M_NOWAIT);
818 		if (tss == NULL) {
819 			error = ENOBUFS;
820 		} else {
821 			ptss = tss;
822 			TAILQ_FOREACH(ims, &imf->imf_sources, ims_next) {
823 				memcpy(ptss++, &ims->ims_addr,
824 				    sizeof(struct sockaddr_storage));
825 			}
826 		}
827 	}
828 
829 	INP_UNLOCK(inp);
830 
831 	if (tss != NULL) {
832 		error = copyout(tss, msfr.msfr_srcs,
833 		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
834 		FREE(tss, M_TEMP);
835 	}
836 
837 	if (error)
838 		return (error);
839 
840 	error = sooptcopyout(sopt, &msfr, sizeof(struct __msfilterreq));
841 
842 	return (error);
843 }
844 
845 /*
846  * Return the IP multicast options in response to user getsockopt().
847  */
848 int
849 inp_getmoptions(struct inpcb *inp, struct sockopt *sopt)
850 {
851 	struct ip_mreqn		 mreqn;
852 	struct ip_moptions	*imo;
853 	struct ifnet		*ifp;
854 	struct in_ifaddr	*ia;
855 	int			 error, optval;
856 	u_char			 coptval;
857 
858 	INP_LOCK(inp);
859 	imo = inp->inp_moptions;
860 	/*
861 	 * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
862 	 * or is a divert socket, reject it.
863 	 */
864 	if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
865 	    (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
866 	    inp->inp_socket->so_proto->pr_type != SOCK_DGRAM)) {
867 		INP_UNLOCK(inp);
868 		return (EOPNOTSUPP);
869 	}
870 
871 	error = 0;
872 	switch (sopt->sopt_name) {
873 	case IP_MULTICAST_VIF:
874 		if (imo != NULL)
875 			optval = imo->imo_multicast_vif;
876 		else
877 			optval = -1;
878 		INP_UNLOCK(inp);
879 		error = sooptcopyout(sopt, &optval, sizeof(int));
880 		break;
881 
882 	case IP_MULTICAST_IF:
883 		memset(&mreqn, 0, sizeof(struct ip_mreqn));
884 		if (imo != NULL) {
885 			ifp = imo->imo_multicast_ifp;
886 			if (imo->imo_multicast_addr.s_addr != INADDR_ANY) {
887 				mreqn.imr_address = imo->imo_multicast_addr;
888 			} else if (ifp != NULL) {
889 				mreqn.imr_ifindex = ifp->if_index;
890 				IFP_TO_IA(ifp, ia);
891 				if (ia != NULL) {
892 					mreqn.imr_address =
893 					    IA_SIN(ia)->sin_addr;
894 				}
895 			}
896 		}
897 		INP_UNLOCK(inp);
898 		if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) {
899 			error = sooptcopyout(sopt, &mreqn,
900 			    sizeof(struct ip_mreqn));
901 		} else {
902 			error = sooptcopyout(sopt, &mreqn.imr_address,
903 			    sizeof(struct in_addr));
904 		}
905 		break;
906 
907 	case IP_MULTICAST_TTL:
908 		if (imo == 0)
909 			optval = coptval = IP_DEFAULT_MULTICAST_TTL;
910 		else
911 			optval = coptval = imo->imo_multicast_ttl;
912 		INP_UNLOCK(inp);
913 		if (sopt->sopt_valsize == sizeof(u_char))
914 			error = sooptcopyout(sopt, &coptval, sizeof(u_char));
915 		else
916 			error = sooptcopyout(sopt, &optval, sizeof(int));
917 		break;
918 
919 	case IP_MULTICAST_LOOP:
920 		if (imo == 0)
921 			optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
922 		else
923 			optval = coptval = imo->imo_multicast_loop;
924 		INP_UNLOCK(inp);
925 		if (sopt->sopt_valsize == sizeof(u_char))
926 			error = sooptcopyout(sopt, &coptval, sizeof(u_char));
927 		else
928 			error = sooptcopyout(sopt, &optval, sizeof(int));
929 		break;
930 
931 	case IP_MSFILTER:
932 		if (imo == NULL) {
933 			error = EADDRNOTAVAIL;
934 			INP_UNLOCK(inp);
935 		} else {
936 			error = inp_get_source_filters(inp, sopt);
937 		}
938 		break;
939 
940 	default:
941 		INP_UNLOCK(inp);
942 		error = ENOPROTOOPT;
943 		break;
944 	}
945 
946 	INP_UNLOCK_ASSERT(inp);
947 
948 	return (error);
949 }
950 
951 /*
952  * Join an IPv4 multicast group, possibly with a source.
953  */
954 static int
955 inp_join_group(struct inpcb *inp, struct sockopt *sopt)
956 {
957 	struct group_source_req		 gsr;
958 	sockunion_t			*gsa, *ssa;
959 	struct ifnet			*ifp;
960 	struct in_mfilter		*imf;
961 	struct ip_moptions		*imo;
962 	struct in_multi			*inm;
963 	size_t				 idx;
964 	int				 error;
965 
966 	ifp = NULL;
967 	error = 0;
968 
969 	memset(&gsr, 0, sizeof(struct group_source_req));
970 	gsa = (sockunion_t *)&gsr.gsr_group;
971 	gsa->ss.ss_family = AF_UNSPEC;
972 	ssa = (sockunion_t *)&gsr.gsr_source;
973 	ssa->ss.ss_family = AF_UNSPEC;
974 
975 	switch (sopt->sopt_name) {
976 	case IP_ADD_MEMBERSHIP:
977 	case IP_ADD_SOURCE_MEMBERSHIP: {
978 		struct ip_mreq_source	 mreqs;
979 
980 		if (sopt->sopt_name == IP_ADD_MEMBERSHIP) {
981 			error = sooptcopyin(sopt, &mreqs,
982 			    sizeof(struct ip_mreq),
983 			    sizeof(struct ip_mreq));
984 			/*
985 			 * Do argument switcharoo from ip_mreq into
986 			 * ip_mreq_source to avoid using two instances.
987 			 */
988 			mreqs.imr_interface = mreqs.imr_sourceaddr;
989 			mreqs.imr_sourceaddr.s_addr = INADDR_ANY;
990 		} else if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) {
991 			error = sooptcopyin(sopt, &mreqs,
992 			    sizeof(struct ip_mreq_source),
993 			    sizeof(struct ip_mreq_source));
994 		}
995 		if (error)
996 			return (error);
997 
998 		gsa->sin.sin_family = AF_INET;
999 		gsa->sin.sin_len = sizeof(struct sockaddr_in);
1000 		gsa->sin.sin_addr = mreqs.imr_multiaddr;
1001 
1002 		if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) {
1003 			ssa->sin.sin_family = AF_INET;
1004 			ssa->sin.sin_len = sizeof(struct sockaddr_in);
1005 			ssa->sin.sin_addr = mreqs.imr_sourceaddr;
1006 		}
1007 
1008 		/*
1009 		 * Obtain ifp. If no interface address was provided,
1010 		 * use the interface of the route in the unicast FIB for
1011 		 * the given multicast destination; usually, this is the
1012 		 * default route.
1013 		 * If this lookup fails, attempt to use the first non-loopback
1014 		 * interface with multicast capability in the system as a
1015 		 * last resort. The legacy IPv4 ASM API requires that we do
1016 		 * this in order to allow groups to be joined when the routing
1017 		 * table has not yet been populated during boot.
1018 		 * If all of these conditions fail, return EADDRNOTAVAIL, and
1019 		 * reject the IPv4 multicast join.
1020 		 */
1021 		if (mreqs.imr_interface.s_addr != INADDR_ANY) {
1022 			INADDR_TO_IFP(mreqs.imr_interface, ifp);
1023 		} else {
1024 			struct route ro;
1025 
1026 			ro.ro_rt = NULL;
1027 			*(struct sockaddr_in *)&ro.ro_dst = gsa->sin;
1028 			rtalloc_ign(&ro, RTF_CLONING);
1029 			if (ro.ro_rt != NULL) {
1030 				ifp = ro.ro_rt->rt_ifp;
1031 				KASSERT(ifp != NULL, ("%s: null ifp",
1032 				    __func__));
1033 				RTFREE(ro.ro_rt);
1034 			} else {
1035 				struct in_ifaddr *ia;
1036 				struct ifnet *mfp = NULL;
1037 				TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link) {
1038 					mfp = ia->ia_ifp;
1039 					if (!(mfp->if_flags & IFF_LOOPBACK) &&
1040 					     (mfp->if_flags & IFF_MULTICAST)) {
1041 						ifp = mfp;
1042 						break;
1043 					}
1044 				}
1045 			}
1046 		}
1047 #ifdef DIAGNOSTIC
1048 		if (bootverbose) {
1049 			printf("%s: imr_interface = %s, ifp = %p\n",
1050 			    __func__, inet_ntoa(mreqs.imr_interface), ifp);
1051 		}
1052 #endif
1053 		break;
1054 	}
1055 
1056 	case MCAST_JOIN_GROUP:
1057 	case MCAST_JOIN_SOURCE_GROUP:
1058 		if (sopt->sopt_name == MCAST_JOIN_GROUP) {
1059 			error = sooptcopyin(sopt, &gsr,
1060 			    sizeof(struct group_req),
1061 			    sizeof(struct group_req));
1062 		} else if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
1063 			error = sooptcopyin(sopt, &gsr,
1064 			    sizeof(struct group_source_req),
1065 			    sizeof(struct group_source_req));
1066 		}
1067 		if (error)
1068 			return (error);
1069 
1070 		if (gsa->sin.sin_family != AF_INET ||
1071 		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
1072 			return (EINVAL);
1073 
1074 		/*
1075 		 * Overwrite the port field if present, as the sockaddr
1076 		 * being copied in may be matched with a binary comparison.
1077 		 * XXX INET6
1078 		 */
1079 		gsa->sin.sin_port = 0;
1080 		if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) {
1081 			if (ssa->sin.sin_family != AF_INET ||
1082 			    ssa->sin.sin_len != sizeof(struct sockaddr_in))
1083 				return (EINVAL);
1084 			ssa->sin.sin_port = 0;
1085 		}
1086 
1087 		/*
1088 		 * Obtain the ifp.
1089 		 */
1090 		if (gsr.gsr_interface == 0 || if_index < gsr.gsr_interface)
1091 			return (EADDRNOTAVAIL);
1092 		ifp = ifnet_byindex(gsr.gsr_interface);
1093 
1094 		break;
1095 
1096 	default:
1097 #ifdef DIAGNOSTIC
1098 		if (bootverbose) {
1099 			printf("%s: unknown sopt_name %d\n", __func__,
1100 			    sopt->sopt_name);
1101 		}
1102 #endif
1103 		return (EOPNOTSUPP);
1104 		break;
1105 	}
1106 
1107 	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
1108 		return (EINVAL);
1109 
1110 	if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0)
1111 		return (EADDRNOTAVAIL);
1112 
1113 	/*
1114 	 * Check if we already hold membership of this group for this inpcb.
1115 	 * If so, we do not need to perform the initial join.
1116 	 */
1117 	imo = inp_findmoptions(inp);
1118 	idx = imo_match_group(imo, ifp, &gsa->sa);
1119 	if (idx != -1) {
1120 		if (ssa->ss.ss_family != AF_UNSPEC) {
1121 			/*
1122 			 * Attempting to join an ASM group (when already
1123 			 * an ASM or SSM member) is an error.
1124 			 */
1125 			error = EADDRNOTAVAIL;
1126 		} else {
1127 			imf = &imo->imo_mfilters[idx];
1128 			if (imf->imf_nsources == 0) {
1129 				/*
1130 				 * Attempting to join an SSM group (when
1131 				 * already an ASM member) is an error.
1132 				 */
1133 				error = EINVAL;
1134 			} else {
1135 				/*
1136 				 * Attempting to join an SSM group (when
1137 				 * already an SSM member) means "add this
1138 				 * source to the inclusive filter list".
1139 				 */
1140 				error = imo_join_source(imo, idx, ssa);
1141 			}
1142 		}
1143 		goto out_locked;
1144 	}
1145 
1146 	/*
1147 	 * Call imo_grow() to reallocate the membership and source filter
1148 	 * vectors if they are full. If the size would exceed the hard limit,
1149 	 * then we know we've really run out of entries. We keep the INP
1150 	 * lock held to avoid introducing a race condition.
1151 	 */
1152 	if (imo->imo_num_memberships == imo->imo_max_memberships) {
1153 		error = imo_grow(imo);
1154 		if (error)
1155 			goto out_locked;
1156 	}
1157 
1158 	/*
1159 	 * So far, so good: perform the layer 3 join, layer 2 join,
1160 	 * and make an IGMP announcement if needed.
1161 	 */
1162 	inm = in_addmulti(&gsa->sin.sin_addr, ifp);
1163 	if (inm == NULL) {
1164 		error = ENOBUFS;
1165 		goto out_locked;
1166 	}
1167 	idx = imo->imo_num_memberships;
1168 	imo->imo_membership[idx] = inm;
1169 	imo->imo_num_memberships++;
1170 
1171 	KASSERT(imo->imo_mfilters != NULL,
1172 	    ("%s: imf_mfilters vector was not allocated", __func__));
1173 	imf = &imo->imo_mfilters[idx];
1174 	KASSERT(TAILQ_EMPTY(&imf->imf_sources),
1175 	    ("%s: imf_sources not empty", __func__));
1176 
1177 	/*
1178 	 * If this is a new SSM group join (i.e. a source was specified
1179 	 * with this group), add this source to the filter list.
1180 	 */
1181 	if (ssa->ss.ss_family != AF_UNSPEC) {
1182 		/*
1183 		 * An initial SSM join implies that this socket's membership
1184 		 * of the multicast group is now in inclusive mode.
1185 		 */
1186 		imf->imf_fmode = MCAST_INCLUDE;
1187 
1188 		error = imo_join_source(imo, idx, ssa);
1189 		if (error) {
1190 			/*
1191 			 * Drop inp lock before calling in_delmulti(),
1192 			 * to prevent a lock order reversal.
1193 			 */
1194 			--imo->imo_num_memberships;
1195 			INP_UNLOCK(inp);
1196 			in_delmulti(inm);
1197 			return (error);
1198 		}
1199 	}
1200 
1201 out_locked:
1202 	INP_UNLOCK(inp);
1203 	return (error);
1204 }
1205 
1206 /*
1207  * Leave an IPv4 multicast group on an inpcb, possibly with a source.
1208  */
1209 static int
1210 inp_leave_group(struct inpcb *inp, struct sockopt *sopt)
1211 {
1212 	struct group_source_req		 gsr;
1213 	struct ip_mreq_source		 mreqs;
1214 	sockunion_t			*gsa, *ssa;
1215 	struct ifnet			*ifp;
1216 	struct in_mfilter		*imf;
1217 	struct ip_moptions		*imo;
1218 	struct in_msource		*ims, *tims;
1219 	struct in_multi			*inm;
1220 	size_t				 idx;
1221 	int				 error;
1222 
1223 	ifp = NULL;
1224 	error = 0;
1225 
1226 	memset(&gsr, 0, sizeof(struct group_source_req));
1227 	gsa = (sockunion_t *)&gsr.gsr_group;
1228 	gsa->ss.ss_family = AF_UNSPEC;
1229 	ssa = (sockunion_t *)&gsr.gsr_source;
1230 	ssa->ss.ss_family = AF_UNSPEC;
1231 
1232 	switch (sopt->sopt_name) {
1233 	case IP_DROP_MEMBERSHIP:
1234 	case IP_DROP_SOURCE_MEMBERSHIP:
1235 		if (sopt->sopt_name == IP_DROP_MEMBERSHIP) {
1236 			error = sooptcopyin(sopt, &mreqs,
1237 			    sizeof(struct ip_mreq),
1238 			    sizeof(struct ip_mreq));
1239 			/*
1240 			 * Swap interface and sourceaddr arguments,
1241 			 * as ip_mreq and ip_mreq_source are laid
1242 			 * out differently.
1243 			 */
1244 			mreqs.imr_interface = mreqs.imr_sourceaddr;
1245 			mreqs.imr_sourceaddr.s_addr = INADDR_ANY;
1246 		} else if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) {
1247 			error = sooptcopyin(sopt, &mreqs,
1248 			    sizeof(struct ip_mreq_source),
1249 			    sizeof(struct ip_mreq_source));
1250 		}
1251 		if (error)
1252 			return (error);
1253 
1254 		gsa->sin.sin_family = AF_INET;
1255 		gsa->sin.sin_len = sizeof(struct sockaddr_in);
1256 		gsa->sin.sin_addr = mreqs.imr_multiaddr;
1257 
1258 		if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) {
1259 			ssa->sin.sin_family = AF_INET;
1260 			ssa->sin.sin_len = sizeof(struct sockaddr_in);
1261 			ssa->sin.sin_addr = mreqs.imr_sourceaddr;
1262 		}
1263 
1264 		if (gsa->sin.sin_addr.s_addr != INADDR_ANY)
1265 			INADDR_TO_IFP(mreqs.imr_interface, ifp);
1266 
1267 #ifdef DIAGNOSTIC
1268 		if (bootverbose) {
1269 			printf("%s: imr_interface = %s, ifp = %p\n",
1270 			    __func__, inet_ntoa(mreqs.imr_interface), ifp);
1271 		}
1272 #endif
1273 		break;
1274 
1275 	case MCAST_LEAVE_GROUP:
1276 	case MCAST_LEAVE_SOURCE_GROUP:
1277 		if (sopt->sopt_name == MCAST_LEAVE_GROUP) {
1278 			error = sooptcopyin(sopt, &gsr,
1279 			    sizeof(struct group_req),
1280 			    sizeof(struct group_req));
1281 		} else if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
1282 			error = sooptcopyin(sopt, &gsr,
1283 			    sizeof(struct group_source_req),
1284 			    sizeof(struct group_source_req));
1285 		}
1286 		if (error)
1287 			return (error);
1288 
1289 		if (gsa->sin.sin_family != AF_INET ||
1290 		    gsa->sin.sin_len != sizeof(struct sockaddr_in))
1291 			return (EINVAL);
1292 
1293 		if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) {
1294 			if (ssa->sin.sin_family != AF_INET ||
1295 			    ssa->sin.sin_len != sizeof(struct sockaddr_in))
1296 				return (EINVAL);
1297 		}
1298 
1299 		if (gsr.gsr_interface == 0 || if_index < gsr.gsr_interface)
1300 			return (EADDRNOTAVAIL);
1301 
1302 		ifp = ifnet_byindex(gsr.gsr_interface);
1303 		break;
1304 
1305 	default:
1306 #ifdef DIAGNOSTIC
1307 		if (bootverbose) {
1308 			printf("%s: unknown sopt_name %d\n", __func__,
1309 			    sopt->sopt_name);
1310 		}
1311 #endif
1312 		return (EOPNOTSUPP);
1313 		break;
1314 	}
1315 
1316 	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
1317 		return (EINVAL);
1318 
1319 	/*
1320 	 * Find the membership in the membership array.
1321 	 */
1322 	imo = inp_findmoptions(inp);
1323 	idx = imo_match_group(imo, ifp, &gsa->sa);
1324 	if (idx == -1) {
1325 		error = EADDRNOTAVAIL;
1326 		goto out_locked;
1327 	}
1328 	imf = &imo->imo_mfilters[idx];
1329 
1330 	/*
1331 	 * If we were instructed only to leave a given source, do so.
1332 	 */
1333 	if (ssa->ss.ss_family != AF_UNSPEC) {
1334 		if (imf->imf_nsources == 0 ||
1335 		    imf->imf_fmode == MCAST_EXCLUDE) {
1336 			/*
1337 			 * Attempting to SSM leave an ASM group
1338 			 * is an error; should use *_BLOCK_SOURCE instead.
1339 			 * Attempting to SSM leave a source in a group when
1340 			 * the socket is in 'exclude mode' is also an error.
1341 			 */
1342 			error = EINVAL;
1343 		} else {
1344 			error = imo_leave_source(imo, idx, ssa);
1345 		}
1346 		/*
1347 		 * If an error occurred, or this source is not the last
1348 		 * source in the group, do not leave the whole group.
1349 		 */
1350 		if (error || imf->imf_nsources > 0)
1351 			goto out_locked;
1352 	}
1353 
1354 	/*
1355 	 * Give up the multicast address record to which the membership points.
1356 	 */
1357 	inm = imo->imo_membership[idx];
1358 	in_delmulti(inm);
1359 
1360 	/*
1361 	 * Free any source filters for this group if they exist.
1362 	 * Revert inpcb to the default MCAST_EXCLUDE state.
1363 	 */
1364 	if (imo->imo_mfilters != NULL) {
1365 		TAILQ_FOREACH_SAFE(ims, &imf->imf_sources, ims_next, tims) {
1366 			TAILQ_REMOVE(&imf->imf_sources, ims, ims_next);
1367 			FREE(ims, M_IPMSOURCE);
1368 			imf->imf_nsources--;
1369 		}
1370 		KASSERT(imf->imf_nsources == 0,
1371 		    ("%s: imf_nsources not 0", __func__));
1372 		KASSERT(TAILQ_EMPTY(&imf->imf_sources),
1373 		    ("%s: imf_sources not empty", __func__));
1374 		imf->imf_fmode = MCAST_EXCLUDE;
1375 	}
1376 
1377 	/*
1378 	 * Remove the gap in the membership array.
1379 	 */
1380 	for (++idx; idx < imo->imo_num_memberships; ++idx)
1381 		imo->imo_membership[idx-1] = imo->imo_membership[idx];
1382 	imo->imo_num_memberships--;
1383 
1384 out_locked:
1385 	INP_UNLOCK(inp);
1386 	return (error);
1387 }
1388 
1389 /*
1390  * Select the interface for transmitting IPv4 multicast datagrams.
1391  *
1392  * Either an instance of struct in_addr or an instance of struct ip_mreqn
1393  * may be passed to this socket option. An address of INADDR_ANY or an
1394  * interface index of 0 is used to remove a previous selection.
1395  * When no interface is selected, one is chosen for every send.
1396  */
1397 static int
1398 inp_set_multicast_if(struct inpcb *inp, struct sockopt *sopt)
1399 {
1400 	struct in_addr		 addr;
1401 	struct ip_mreqn		 mreqn;
1402 	struct ifnet		*ifp;
1403 	struct ip_moptions	*imo;
1404 	int			 error;
1405 
1406 	if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) {
1407 		/*
1408 		 * An interface index was specified using the
1409 		 * Linux-derived ip_mreqn structure.
1410 		 */
1411 		error = sooptcopyin(sopt, &mreqn, sizeof(struct ip_mreqn),
1412 		    sizeof(struct ip_mreqn));
1413 		if (error)
1414 			return (error);
1415 
1416 		if (mreqn.imr_ifindex < 0 || if_index < mreqn.imr_ifindex)
1417 			return (EINVAL);
1418 
1419 		if (mreqn.imr_ifindex == 0) {
1420 			ifp = NULL;
1421 		} else {
1422 			ifp = ifnet_byindex(mreqn.imr_ifindex);
1423 			if (ifp == NULL)
1424 				return (EADDRNOTAVAIL);
1425 		}
1426 	} else {
1427 		/*
1428 		 * An interface was specified by IPv4 address.
1429 		 * This is the traditional BSD usage.
1430 		 */
1431 		error = sooptcopyin(sopt, &addr, sizeof(struct in_addr),
1432 		    sizeof(struct in_addr));
1433 		if (error)
1434 			return (error);
1435 		if (addr.s_addr == INADDR_ANY) {
1436 			ifp = NULL;
1437 		} else {
1438 			INADDR_TO_IFP(addr, ifp);
1439 			if (ifp == NULL)
1440 				return (EADDRNOTAVAIL);
1441 		}
1442 #ifdef DIAGNOSTIC
1443 		if (bootverbose) {
1444 			printf("%s: ifp = %p, addr = %s\n",
1445 			    __func__, ifp, inet_ntoa(addr)); /* XXX INET6 */
1446 		}
1447 #endif
1448 	}
1449 
1450 	/* Reject interfaces which do not support multicast. */
1451 	if (ifp != NULL && (ifp->if_flags & IFF_MULTICAST) == 0)
1452 		return (EOPNOTSUPP);
1453 
1454 	imo = inp_findmoptions(inp);
1455 	imo->imo_multicast_ifp = ifp;
1456 	imo->imo_multicast_addr.s_addr = INADDR_ANY;
1457 	INP_UNLOCK(inp);
1458 
1459 	return (0);
1460 }
1461 
1462 /*
1463  * Atomically set source filters on a socket for an IPv4 multicast group.
1464  */
1465 static int
1466 inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt)
1467 {
1468 	struct __msfilterreq	 msfr;
1469 	sockunion_t		*gsa;
1470 	struct ifnet		*ifp;
1471 	struct in_mfilter	*imf;
1472 	struct ip_moptions	*imo;
1473 	struct in_msource	*ims, *tims;
1474 	size_t			 idx;
1475 	int			 error;
1476 
1477 	error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq),
1478 	    sizeof(struct __msfilterreq));
1479 	if (error)
1480 		return (error);
1481 
1482 	if (msfr.msfr_nsrcs > IP_MAX_SOURCE_FILTER ||
1483 	    (msfr.msfr_fmode != MCAST_EXCLUDE &&
1484 	     msfr.msfr_fmode != MCAST_INCLUDE))
1485 		return (EINVAL);
1486 
1487 	if (msfr.msfr_group.ss_family != AF_INET ||
1488 	    msfr.msfr_group.ss_len != sizeof(struct sockaddr_in))
1489 		return (EINVAL);
1490 
1491 	gsa = (sockunion_t *)&msfr.msfr_group;
1492 	if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr)))
1493 		return (EINVAL);
1494 
1495 	gsa->sin.sin_port = 0;	/* ignore port */
1496 
1497 	if (msfr.msfr_ifindex == 0 || if_index < msfr.msfr_ifindex)
1498 		return (EADDRNOTAVAIL);
1499 
1500 	ifp = ifnet_byindex(msfr.msfr_ifindex);
1501 	if (ifp == NULL)
1502 		return (EADDRNOTAVAIL);
1503 
1504 	/*
1505 	 * Take the INP lock.
1506 	 * Check if this socket is a member of this group.
1507 	 */
1508 	imo = inp_findmoptions(inp);
1509 	idx = imo_match_group(imo, ifp, &gsa->sa);
1510 	if (idx == -1 || imo->imo_mfilters == NULL) {
1511 		error = EADDRNOTAVAIL;
1512 		goto out_locked;
1513 	}
1514 	imf = &imo->imo_mfilters[idx];
1515 
1516 #ifdef DIAGNOSTIC
1517 	if (bootverbose)
1518 		printf("%s: clearing source list\n", __func__);
1519 #endif
1520 
1521 	/*
1522 	 * Remove any existing source filters.
1523 	 */
1524 	TAILQ_FOREACH_SAFE(ims, &imf->imf_sources, ims_next, tims) {
1525 		TAILQ_REMOVE(&imf->imf_sources, ims, ims_next);
1526 		FREE(ims, M_IPMSOURCE);
1527 		imf->imf_nsources--;
1528 	}
1529 	KASSERT(imf->imf_nsources == 0,
1530 	    ("%s: source list not cleared", __func__));
1531 
1532 	/*
1533 	 * Apply any new source filters, if present.
1534 	 */
1535 	if (msfr.msfr_nsrcs > 0) {
1536 		struct in_msource	**pnims;
1537 		struct in_msource	*nims;
1538 		struct sockaddr_storage	*kss;
1539 		struct sockaddr_storage	*pkss;
1540 		sockunion_t		*psu;
1541 		int			 i, j;
1542 
1543 		/*
1544 		 * Drop the inp lock so we may sleep if we need to
1545 		 * in order to satisfy a malloc request.
1546 		 * We will re-take it before changing socket state.
1547 		 */
1548 		INP_UNLOCK(inp);
1549 #ifdef DIAGNOSTIC
1550 		if (bootverbose) {
1551 			printf("%s: loading %lu source list entries\n",
1552 			    __func__, (unsigned long)msfr.msfr_nsrcs);
1553 		}
1554 #endif
1555 		/*
1556 		 * Make a copy of the user-space source vector so
1557 		 * that we may copy them with a single copyin. This
1558 		 * allows us to deal with page faults up-front.
1559 		 */
1560 		MALLOC(kss, struct sockaddr_storage *,
1561 		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs,
1562 		    M_TEMP, M_WAITOK);
1563 		error = copyin(msfr.msfr_srcs, kss,
1564 		    sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs);
1565 		if (error) {
1566 			FREE(kss, M_TEMP);
1567 			return (error);
1568 		}
1569 
1570 		/*
1571 		 * Perform argument checking on every sockaddr_storage
1572 		 * structure in the vector provided to us. Overwrite
1573 		 * fields which should not apply to source entries.
1574 		 * TODO: Check for duplicate sources on this pass.
1575 		 */
1576 		psu = (sockunion_t *)kss;
1577 		for (i = 0; i < msfr.msfr_nsrcs; i++, psu++) {
1578 			switch (psu->ss.ss_family) {
1579 			case AF_INET:
1580 				if (psu->sin.sin_len !=
1581 				    sizeof(struct sockaddr_in)) {
1582 					error = EINVAL;
1583 				} else {
1584 					psu->sin.sin_port = 0;
1585 				}
1586 				break;
1587 #ifdef notyet
1588 			case AF_INET6;
1589 				if (psu->sin6.sin6_len !=
1590 				    sizeof(struct sockaddr_in6)) {
1591 					error = EINVAL;
1592 				} else {
1593 					psu->sin6.sin6_port = 0;
1594 					psu->sin6.sin6_flowinfo = 0;
1595 				}
1596 				break;
1597 #endif
1598 			default:
1599 				error = EAFNOSUPPORT;
1600 				break;
1601 			}
1602 			if (error)
1603 				break;
1604 		}
1605 		if (error) {
1606 			FREE(kss, M_TEMP);
1607 			return (error);
1608 		}
1609 
1610 		/*
1611 		 * Allocate a block to track all the in_msource
1612 		 * entries we are about to allocate, in case we
1613 		 * abruptly need to free them.
1614 		 */
1615 		MALLOC(pnims, struct in_msource **,
1616 		    sizeof(struct in_msource *) * msfr.msfr_nsrcs,
1617 		    M_TEMP, M_WAITOK | M_ZERO);
1618 
1619 		/*
1620 		 * Allocate up to nsrcs individual chunks.
1621 		 * If we encounter an error, backtrack out of
1622 		 * all allocations cleanly; updates must be atomic.
1623 		 */
1624 		pkss = kss;
1625 		nims = NULL;
1626 		for (i = 0; i < msfr.msfr_nsrcs; i++, pkss++) {
1627 			MALLOC(nims, struct in_msource *,
1628 			    sizeof(struct in_msource) * msfr.msfr_nsrcs,
1629 			    M_IPMSOURCE, M_WAITOK | M_ZERO);
1630 			pnims[i] = nims;
1631 		}
1632 		if (i < msfr.msfr_nsrcs) {
1633 			for (j = 0; j < i; j++) {
1634 				if (pnims[j] != NULL)
1635 					FREE(pnims[j], M_IPMSOURCE);
1636 			}
1637 			FREE(pnims, M_TEMP);
1638 			FREE(kss, M_TEMP);
1639 			return (ENOBUFS);
1640 		}
1641 
1642 		INP_UNLOCK_ASSERT(inp);
1643 
1644 		/*
1645 		 * Finally, apply the filters to the socket.
1646 		 * Re-take the inp lock; we are changing socket state.
1647 		 */
1648 		pkss = kss;
1649 		INP_LOCK(inp);
1650 		for (i = 0; i < msfr.msfr_nsrcs; i++, pkss++) {
1651 			memcpy(&(pnims[i]->ims_addr), pkss,
1652 			    sizeof(struct sockaddr_storage));
1653 			TAILQ_INSERT_TAIL(&imf->imf_sources, pnims[i],
1654 			    ims_next);
1655 			imf->imf_nsources++;
1656 		}
1657 		FREE(pnims, M_TEMP);
1658 		FREE(kss, M_TEMP);
1659 	}
1660 
1661 	/*
1662 	 * Update the filter mode on the socket before releasing the inpcb.
1663 	 */
1664 	INP_LOCK_ASSERT(inp);
1665 	imf->imf_fmode = msfr.msfr_fmode;
1666 
1667 out_locked:
1668 	INP_UNLOCK(inp);
1669 	return (error);
1670 }
1671 
1672 /*
1673  * Set the IP multicast options in response to user setsockopt().
1674  *
1675  * Many of the socket options handled in this function duplicate the
1676  * functionality of socket options in the regular unicast API. However,
1677  * it is not possible to merge the duplicate code, because the idempotence
1678  * of the IPv4 multicast part of the BSD Sockets API must be preserved;
1679  * the effects of these options must be treated as separate and distinct.
1680  */
1681 int
1682 inp_setmoptions(struct inpcb *inp, struct sockopt *sopt)
1683 {
1684 	struct ip_moptions	*imo;
1685 	int			 error;
1686 
1687 	error = 0;
1688 
1689 	/*
1690 	 * If socket is neither of type SOCK_RAW or SOCK_DGRAM,
1691 	 * or is a divert socket, reject it.
1692 	 * XXX Unlocked read of inp_socket believed OK.
1693 	 */
1694 	if (inp->inp_socket->so_proto->pr_protocol == IPPROTO_DIVERT ||
1695 	    (inp->inp_socket->so_proto->pr_type != SOCK_RAW &&
1696 	    inp->inp_socket->so_proto->pr_type != SOCK_DGRAM))
1697 		return (EOPNOTSUPP);
1698 
1699 	switch (sopt->sopt_name) {
1700 	case IP_MULTICAST_VIF: {
1701 		int vifi;
1702 		/*
1703 		 * Select a multicast VIF for transmission.
1704 		 * Only useful if multicast forwarding is active.
1705 		 */
1706 		if (legal_vif_num == NULL) {
1707 			error = EOPNOTSUPP;
1708 			break;
1709 		}
1710 		error = sooptcopyin(sopt, &vifi, sizeof(int), sizeof(int));
1711 		if (error)
1712 			break;
1713 		if (!legal_vif_num(vifi) && (vifi != -1)) {
1714 			error = EINVAL;
1715 			break;
1716 		}
1717 		imo = inp_findmoptions(inp);
1718 		imo->imo_multicast_vif = vifi;
1719 		INP_UNLOCK(inp);
1720 		break;
1721 	}
1722 
1723 	case IP_MULTICAST_IF:
1724 		error = inp_set_multicast_if(inp, sopt);
1725 		break;
1726 
1727 	case IP_MULTICAST_TTL: {
1728 		u_char ttl;
1729 
1730 		/*
1731 		 * Set the IP time-to-live for outgoing multicast packets.
1732 		 * The original multicast API required a char argument,
1733 		 * which is inconsistent with the rest of the socket API.
1734 		 * We allow either a char or an int.
1735 		 */
1736 		if (sopt->sopt_valsize == sizeof(u_char)) {
1737 			error = sooptcopyin(sopt, &ttl, sizeof(u_char),
1738 			    sizeof(u_char));
1739 			if (error)
1740 				break;
1741 		} else {
1742 			u_int ittl;
1743 
1744 			error = sooptcopyin(sopt, &ittl, sizeof(u_int),
1745 			    sizeof(u_int));
1746 			if (error)
1747 				break;
1748 			if (ittl > 255) {
1749 				error = EINVAL;
1750 				break;
1751 			}
1752 			ttl = (u_char)ittl;
1753 		}
1754 		imo = inp_findmoptions(inp);
1755 		imo->imo_multicast_ttl = ttl;
1756 		INP_UNLOCK(inp);
1757 		break;
1758 	}
1759 
1760 	case IP_MULTICAST_LOOP: {
1761 		u_char loop;
1762 
1763 		/*
1764 		 * Set the loopback flag for outgoing multicast packets.
1765 		 * Must be zero or one.  The original multicast API required a
1766 		 * char argument, which is inconsistent with the rest
1767 		 * of the socket API.  We allow either a char or an int.
1768 		 */
1769 		if (sopt->sopt_valsize == sizeof(u_char)) {
1770 			error = sooptcopyin(sopt, &loop, sizeof(u_char),
1771 			    sizeof(u_char));
1772 			if (error)
1773 				break;
1774 		} else {
1775 			u_int iloop;
1776 
1777 			error = sooptcopyin(sopt, &iloop, sizeof(u_int),
1778 					    sizeof(u_int));
1779 			if (error)
1780 				break;
1781 			loop = (u_char)iloop;
1782 		}
1783 		imo = inp_findmoptions(inp);
1784 		imo->imo_multicast_loop = !!loop;
1785 		INP_UNLOCK(inp);
1786 		break;
1787 	}
1788 
1789 	case IP_ADD_MEMBERSHIP:
1790 	case IP_ADD_SOURCE_MEMBERSHIP:
1791 	case MCAST_JOIN_GROUP:
1792 	case MCAST_JOIN_SOURCE_GROUP:
1793 		error = inp_join_group(inp, sopt);
1794 		break;
1795 
1796 	case IP_DROP_MEMBERSHIP:
1797 	case IP_DROP_SOURCE_MEMBERSHIP:
1798 	case MCAST_LEAVE_GROUP:
1799 	case MCAST_LEAVE_SOURCE_GROUP:
1800 		error = inp_leave_group(inp, sopt);
1801 		break;
1802 
1803 	case IP_BLOCK_SOURCE:
1804 	case IP_UNBLOCK_SOURCE:
1805 	case MCAST_BLOCK_SOURCE:
1806 	case MCAST_UNBLOCK_SOURCE:
1807 		error = inp_change_source_filter(inp, sopt);
1808 		break;
1809 
1810 	case IP_MSFILTER:
1811 		error = inp_set_source_filters(inp, sopt);
1812 		break;
1813 
1814 	default:
1815 		error = EOPNOTSUPP;
1816 		break;
1817 	}
1818 
1819 	INP_UNLOCK_ASSERT(inp);
1820 
1821 	return (error);
1822 }
1823