xref: /freebsd/sys/netinet/ip_output.c (revision a8445737e740901f5f2c8d24c12ef7fc8b00134e)
1 /*
2  * Copyright (c) 1982, 1986, 1988, 1990, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
34  *	$Id: ip_output.c,v 1.81 1998/08/23 03:07:14 wollman Exp $
35  */
36 
37 #define _IP_VHL
38 
39 #include "opt_ipfw.h"
40 #include "opt_ipdivert.h"
41 #include "opt_ipfilter.h"
42 
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/malloc.h>
46 #include <sys/mbuf.h>
47 #include <sys/protosw.h>
48 #include <sys/socket.h>
49 #include <sys/socketvar.h>
50 
51 #include <net/if.h>
52 #include <net/route.h>
53 
54 #include <netinet/in.h>
55 #include <netinet/in_systm.h>
56 #include <netinet/ip.h>
57 #include <netinet/in_pcb.h>
58 #include <netinet/in_var.h>
59 #include <netinet/ip_var.h>
60 
61 #ifdef vax
62 #include <machine/mtpr.h>
63 #endif
64 #include <machine/in_cksum.h>
65 
66 static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options");
67 
68 #if !defined(COMPAT_IPFW) || COMPAT_IPFW == 1
69 #undef COMPAT_IPFW
70 #define COMPAT_IPFW 1
71 #else
72 #undef COMPAT_IPFW
73 #endif
74 
75 #ifdef COMPAT_IPFW
76 #include <netinet/ip_fw.h>
77 #endif
78 
79 #ifdef IPFIREWALL_FORWARD_DEBUG
80 #define print_ip(a)	 printf("%ld.%ld.%ld.%ld",(ntohl(a.s_addr)>>24)&0xFF,\
81 				 		  (ntohl(a.s_addr)>>16)&0xFF,\
82 						  (ntohl(a.s_addr)>>8)&0xFF,\
83 						  (ntohl(a.s_addr))&0xFF);
84 #endif
85 
86 u_short ip_id;
87 
88 static struct mbuf *ip_insertoptions __P((struct mbuf *, struct mbuf *, int *));
89 static void	ip_mloopback
90 	__P((struct ifnet *, struct mbuf *, struct sockaddr_in *, int));
91 static int	ip_getmoptions
92 	__P((struct sockopt *, struct ip_moptions *));
93 static int	ip_pcbopts __P((int, struct mbuf **, struct mbuf *));
94 static int	ip_setmoptions
95 	__P((struct sockopt *, struct ip_moptions **));
96 
97 #if defined(IPFILTER_LKM) || defined(IPFILTER)
98 int	ip_optcopy __P((struct ip *, struct ip *));
99 extern int (*fr_checkp) __P((struct ip *, int, struct ifnet *, int, struct mbuf **));
100 #else
101 static int	ip_optcopy __P((struct ip *, struct ip *));
102 #endif
103 
104 
105 extern	struct protosw inetsw[];
106 
107 /*
108  * IP output.  The packet in mbuf chain m contains a skeletal IP
109  * header (with len, off, ttl, proto, tos, src, dst).
110  * The mbuf chain containing the packet will be freed.
111  * The mbuf opt, if present, will not be freed.
112  */
113 int
114 ip_output(m0, opt, ro, flags, imo)
115 	struct mbuf *m0;
116 	struct mbuf *opt;
117 	struct route *ro;
118 	int flags;
119 	struct ip_moptions *imo;
120 {
121 	struct ip *ip, *mhip;
122 	struct ifnet *ifp;
123 	struct mbuf *m = m0;
124 	int hlen = sizeof (struct ip);
125 	int len, off, error = 0;
126 	struct sockaddr_in *dst;
127 	struct in_ifaddr *ia;
128 	int isbroadcast;
129 #ifdef IPFIREWALL_FORWARD
130 	int fwd_rewrite_src = 0;
131 #endif
132 
133 #ifdef	DIAGNOSTIC
134 	if ((m->m_flags & M_PKTHDR) == 0)
135 		panic("ip_output no HDR");
136 	if (!ro)
137 		panic("ip_output no route, proto = %d",
138 		      mtod(m, struct ip *)->ip_p);
139 #endif
140 	if (opt) {
141 		m = ip_insertoptions(m, opt, &len);
142 		hlen = len;
143 	}
144 	ip = mtod(m, struct ip *);
145 	/*
146 	 * Fill in IP header.
147 	 */
148 	if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
149 		ip->ip_vhl = IP_MAKE_VHL(IPVERSION, hlen >> 2);
150 		ip->ip_off &= IP_DF;
151 		ip->ip_id = htons(ip_id++);
152 		ipstat.ips_localout++;
153 	} else {
154 		hlen = IP_VHL_HL(ip->ip_vhl) << 2;
155 	}
156 
157 	dst = (struct sockaddr_in *)&ro->ro_dst;
158 	/*
159 	 * If there is a cached route,
160 	 * check that it is to the same destination
161 	 * and is still up.  If not, free it and try again.
162 	 */
163 	if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
164 	   dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
165 		RTFREE(ro->ro_rt);
166 		ro->ro_rt = (struct rtentry *)0;
167 	}
168 	if (ro->ro_rt == 0) {
169 		dst->sin_family = AF_INET;
170 		dst->sin_len = sizeof(*dst);
171 		dst->sin_addr = ip->ip_dst;
172 	}
173 	/*
174 	 * If routing to interface only,
175 	 * short circuit routing lookup.
176 	 */
177 #define ifatoia(ifa)	((struct in_ifaddr *)(ifa))
178 #define sintosa(sin)	((struct sockaddr *)(sin))
179 	if (flags & IP_ROUTETOIF) {
180 		if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 &&
181 		    (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) {
182 			ipstat.ips_noroute++;
183 			error = ENETUNREACH;
184 			goto bad;
185 		}
186 		ifp = ia->ia_ifp;
187 		ip->ip_ttl = 1;
188 		isbroadcast = in_broadcast(dst->sin_addr, ifp);
189 	} else {
190 		/*
191 		 * If this is the case, we probably don't want to allocate
192 		 * a protocol-cloned route since we didn't get one from the
193 		 * ULP.  This lets TCP do its thing, while not burdening
194 		 * forwarding or ICMP with the overhead of cloning a route.
195 		 * Of course, we still want to do any cloning requested by
196 		 * the link layer, as this is probably required in all cases
197 		 * for correct operation (as it is for ARP).
198 		 */
199 		if (ro->ro_rt == 0)
200 			rtalloc_ign(ro, RTF_PRCLONING);
201 		if (ro->ro_rt == 0) {
202 			ipstat.ips_noroute++;
203 			error = EHOSTUNREACH;
204 			goto bad;
205 		}
206 		ia = ifatoia(ro->ro_rt->rt_ifa);
207 		ifp = ro->ro_rt->rt_ifp;
208 		ro->ro_rt->rt_use++;
209 		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
210 			dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
211 		if (ro->ro_rt->rt_flags & RTF_HOST)
212 			isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
213 		else
214 			isbroadcast = in_broadcast(dst->sin_addr, ifp);
215 	}
216 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
217 		struct in_multi *inm;
218 
219 		m->m_flags |= M_MCAST;
220 		/*
221 		 * IP destination address is multicast.  Make sure "dst"
222 		 * still points to the address in "ro".  (It may have been
223 		 * changed to point to a gateway address, above.)
224 		 */
225 		dst = (struct sockaddr_in *)&ro->ro_dst;
226 		/*
227 		 * See if the caller provided any multicast options
228 		 */
229 		if (imo != NULL) {
230 			ip->ip_ttl = imo->imo_multicast_ttl;
231 			if (imo->imo_multicast_ifp != NULL)
232 				ifp = imo->imo_multicast_ifp;
233 			if (imo->imo_multicast_vif != -1)
234 				ip->ip_src.s_addr =
235 				    ip_mcast_src(imo->imo_multicast_vif);
236 		} else
237 			ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
238 		/*
239 		 * Confirm that the outgoing interface supports multicast.
240 		 */
241 		if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
242 			if ((ifp->if_flags & IFF_MULTICAST) == 0) {
243 				ipstat.ips_noroute++;
244 				error = ENETUNREACH;
245 				goto bad;
246 			}
247 		}
248 		/*
249 		 * If source address not specified yet, use address
250 		 * of outgoing interface.
251 		 */
252 		if (ip->ip_src.s_addr == INADDR_ANY) {
253 			register struct in_ifaddr *ia1;
254 
255 			for (ia1 = in_ifaddrhead.tqh_first; ia1;
256 			     ia1 = ia1->ia_link.tqe_next)
257 				if (ia1->ia_ifp == ifp) {
258 					ip->ip_src = IA_SIN(ia1)->sin_addr;
259 					break;
260 				}
261 		}
262 
263 		IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
264 		if (inm != NULL &&
265 		   (imo == NULL || imo->imo_multicast_loop)) {
266 			/*
267 			 * If we belong to the destination multicast group
268 			 * on the outgoing interface, and the caller did not
269 			 * forbid loopback, loop back a copy.
270 			 */
271 			ip_mloopback(ifp, m, dst, hlen);
272 		}
273 		else {
274 			/*
275 			 * If we are acting as a multicast router, perform
276 			 * multicast forwarding as if the packet had just
277 			 * arrived on the interface to which we are about
278 			 * to send.  The multicast forwarding function
279 			 * recursively calls this function, using the
280 			 * IP_FORWARDING flag to prevent infinite recursion.
281 			 *
282 			 * Multicasts that are looped back by ip_mloopback(),
283 			 * above, will be forwarded by the ip_input() routine,
284 			 * if necessary.
285 			 */
286 			if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
287 				/*
288 				 * Check if rsvp daemon is running. If not, don't
289 				 * set ip_moptions. This ensures that the packet
290 				 * is multicast and not just sent down one link
291 				 * as prescribed by rsvpd.
292 				 */
293 				if (!rsvp_on)
294 				  imo = NULL;
295 				if (ip_mforward(ip, ifp, m, imo) != 0) {
296 					m_freem(m);
297 					goto done;
298 				}
299 			}
300 		}
301 
302 		/*
303 		 * Multicasts with a time-to-live of zero may be looped-
304 		 * back, above, but must not be transmitted on a network.
305 		 * Also, multicasts addressed to the loopback interface
306 		 * are not sent -- the above call to ip_mloopback() will
307 		 * loop back a copy if this host actually belongs to the
308 		 * destination group on the loopback interface.
309 		 */
310 		if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
311 			m_freem(m);
312 			goto done;
313 		}
314 
315 		goto sendit;
316 	}
317 #ifndef notdef
318 	/*
319 	 * If source address not specified yet, use address
320 	 * of outgoing interface.
321 	 */
322 	if (ip->ip_src.s_addr == INADDR_ANY) {
323 		ip->ip_src = IA_SIN(ia)->sin_addr;
324 #ifdef IPFIREWALL_FORWARD
325 		/* Keep note that we did this - if the firewall changes
326 		 * the next-hop, our interface may change, changing the
327 		 * default source IP. It's a shame so much effort happens
328 		 * twice. Oh well.
329 		 */
330 		fwd_rewrite_src++;
331 #endif /* IPFIREWALL_FORWARD */
332 	}
333 #endif /* notdef */
334 	/*
335 	 * Verify that we have any chance at all of being able to queue
336 	 *      the packet or packet fragments
337 	 */
338 	if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >=
339 		ifp->if_snd.ifq_maxlen) {
340 			error = ENOBUFS;
341 			goto bad;
342 	}
343 
344 	/*
345 	 * Look for broadcast address and
346 	 * and verify user is allowed to send
347 	 * such a packet.
348 	 */
349 	if (isbroadcast) {
350 		if ((ifp->if_flags & IFF_BROADCAST) == 0) {
351 			error = EADDRNOTAVAIL;
352 			goto bad;
353 		}
354 		if ((flags & IP_ALLOWBROADCAST) == 0) {
355 			error = EACCES;
356 			goto bad;
357 		}
358 		/* don't allow broadcast messages to be fragmented */
359 		if ((u_short)ip->ip_len > ifp->if_mtu) {
360 			error = EMSGSIZE;
361 			goto bad;
362 		}
363 		m->m_flags |= M_BCAST;
364 	} else {
365 		m->m_flags &= ~M_BCAST;
366 	}
367 
368 sendit:
369 	/*
370 	 * IpHack's section.
371 	 * - Xlate: translate packet's addr/port (NAT).
372 	 * - Firewall: deny/allow/etc.
373 	 * - Wrap: fake packet's addr/port <unimpl.>
374 	 * - Encapsulate: put it in another IP and send out. <unimp.>
375 	 */
376 #if defined(IPFILTER) || defined(IPFILTER_LKM)
377 	if (fr_checkp) {
378 		struct  mbuf    *m1 = m;
379 
380 		if ((error = (*fr_checkp)(ip, hlen, ifp, 1, &m1)) || !m1)
381 			goto done;
382 		ip = mtod(m = m1, struct ip *);
383 	}
384 #endif
385 
386 #ifdef COMPAT_IPFW
387         if (ip_nat_ptr && !(*ip_nat_ptr)(&ip, &m, ifp, IP_NAT_OUT)) {
388 		error = EACCES;
389 		goto done;
390 	}
391 
392 	/*
393 	 * Check with the firewall...
394 	 */
395 	if (ip_fw_chk_ptr) {
396 #ifdef IPFIREWALL_FORWARD
397 		struct sockaddr_in *old = dst;
398 #endif
399 #ifdef IPDIVERT
400 		ip_divert_port = (*ip_fw_chk_ptr)(&ip,
401 		    hlen, ifp, &ip_divert_cookie, &m, &dst);
402 		if (ip_divert_port) {		/* Divert packet */
403 			(*inetsw[ip_protox[IPPROTO_DIVERT]].pr_input)(m, 0);
404 			goto done;
405 		}
406 #else	/* !IPDIVERT */
407 		u_int16_t 	dummy = 0;
408 		/* If ipfw says divert, we have to just drop packet */
409 		if ((*ip_fw_chk_ptr)(&ip, hlen, ifp, &dummy, &m, &dst)) {
410 			m_freem(m);
411 			goto done;
412 		}
413 #endif	/* !IPDIVERT */
414 		if (!m) {
415 			error = EACCES;
416 			goto done;
417 		}
418 #ifdef IPFIREWALL_FORWARD
419 		/* Here we check dst to make sure it's directly reachable on the
420 		 * interface we previously thought it was.
421 		 * If it isn't (which may be likely in some situations) we have
422 		 * to re-route it (ie, find a route for the next-hop and the
423 		 * associated interface) and set them here. This is nested
424 		 * forwarding which in most cases is undesirable, except where
425 		 * such control is nigh impossible. So we do it here.
426 		 * And I'm babbling.
427 		 */
428 		if (old != dst) {
429 			struct in_ifaddr *ia;
430 
431 			/* It's changed... */
432 			/* There must be a better way to do this next line... */
433 			static struct route sro_fwd, *ro_fwd = &sro_fwd;
434 #ifdef IPFIREWALL_FORWARD_DEBUG
435 			printf("IPFIREWALL_FORWARD: New dst ip: ");
436 			print_ip(dst->sin_addr);
437 			printf("\n");
438 #endif
439 			/*
440 			 * We need to figure out if we have been forwarded
441 			 * to a local socket. If so then we should somehow
442 			 * "loop back" to ip_input, and get directed to the
443 			 * PCB as if we had received this packet. This is
444 			 * because it may be dificult to identify the packets
445 			 * you want to forward until they are being output
446 			 * and have selected an interface. (e.g. locally
447 			 * initiated packets) If we used the loopback inteface,
448 			 * we would not be able to control what happens
449 			 * as the packet runs through ip_input() as
450 			 * it is done through a ISR.
451 			 */
452 			for (ia = TAILQ_FIRST(&in_ifaddrhead); ia;
453 					ia = TAILQ_NEXT(ia, ia_link)) {
454 				/*
455 				 * If the addr to forward to is one
456 				 * of ours, we pretend to
457 				 * be the destination for this packet.
458 				 */
459 				if (IA_SIN(ia)->sin_addr.s_addr ==
460 						 dst->sin_addr.s_addr)
461 					break;
462 			}
463 			if (ia) {
464 				/* tell ip_input "dont filter" */
465 				ip_fw_fwd_addr = dst;
466 				if (m->m_pkthdr.rcvif == NULL)
467 					m->m_pkthdr.rcvif = ifunit("lo0");
468 				ip->ip_len = htons((u_short)ip->ip_len);
469 				ip->ip_off = htons((u_short)ip->ip_off);
470 				ip->ip_sum = 0;
471 				if (ip->ip_vhl == IP_VHL_BORING) {
472 					ip->ip_sum = in_cksum_hdr(ip);
473 				} else {
474 					ip->ip_sum = in_cksum(m, hlen);
475 				}
476 				ip_input(m);
477 				goto done;
478 			}
479 			/* Some of the logic for this was
480 			 * nicked from above.
481 			 *
482 			 * This rewrites the cached route in a local PCB.
483 			 * Is this what we want to do?
484 			 */
485 			bcopy(dst, &ro_fwd->ro_dst, sizeof(*dst));
486 
487 			ro_fwd->ro_rt = 0;
488 			rtalloc_ign(ro_fwd, RTF_PRCLONING);
489 
490 			if (ro_fwd->ro_rt == 0) {
491 				ipstat.ips_noroute++;
492 				error = EHOSTUNREACH;
493 				goto bad;
494 			}
495 
496 			ia = ifatoia(ro_fwd->ro_rt->rt_ifa);
497 			ifp = ro_fwd->ro_rt->rt_ifp;
498 			ro_fwd->ro_rt->rt_use++;
499 			if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY)
500 				dst = (struct sockaddr_in *)ro_fwd->ro_rt->rt_gateway;
501 			if (ro_fwd->ro_rt->rt_flags & RTF_HOST)
502 				isbroadcast =
503 				    (ro_fwd->ro_rt->rt_flags & RTF_BROADCAST);
504 			else
505 				isbroadcast = in_broadcast(dst->sin_addr, ifp);
506 			RTFREE(ro->ro_rt);
507 			ro->ro_rt = ro_fwd->ro_rt;
508 			dst = (struct sockaddr_in *)&ro_fwd->ro_dst;
509 
510 			/*
511 			 * If we added a default src ip earlier,
512 			 * which would have been gotten from the-then
513 			 * interface, do it again, from the new one.
514 			 */
515 			if (fwd_rewrite_src)
516 				ip->ip_src = IA_SIN(ia)->sin_addr;
517 		}
518 #endif /* IPFIREWALL_FORWARD */
519 	}
520 #endif /* COMPAT_IPFW */
521 
522 
523 	/*
524 	 * If small enough for interface, can just send directly.
525 	 */
526 	if ((u_short)ip->ip_len <= ifp->if_mtu) {
527 		ip->ip_len = htons((u_short)ip->ip_len);
528 		ip->ip_off = htons((u_short)ip->ip_off);
529 		ip->ip_sum = 0;
530 		if (ip->ip_vhl == IP_VHL_BORING) {
531 			ip->ip_sum = in_cksum_hdr(ip);
532 		} else {
533 			ip->ip_sum = in_cksum(m, hlen);
534 		}
535 		error = (*ifp->if_output)(ifp, m,
536 				(struct sockaddr *)dst, ro->ro_rt);
537 		goto done;
538 	}
539 	/*
540 	 * Too large for interface; fragment if possible.
541 	 * Must be able to put at least 8 bytes per fragment.
542 	 */
543 	if (ip->ip_off & IP_DF) {
544 		error = EMSGSIZE;
545 		/*
546 		 * This case can happen if the user changed the MTU
547 		 * of an interface after enabling IP on it.  Because
548 		 * most netifs don't keep track of routes pointing to
549 		 * them, there is no way for one to update all its
550 		 * routes when the MTU is changed.
551 		 */
552 		if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST))
553 		    && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU)
554 		    && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
555 			ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
556 		}
557 		ipstat.ips_cantfrag++;
558 		goto bad;
559 	}
560 	len = (ifp->if_mtu - hlen) &~ 7;
561 	if (len < 8) {
562 		error = EMSGSIZE;
563 		goto bad;
564 	}
565 
566     {
567 	int mhlen, firstlen = len;
568 	struct mbuf **mnext = &m->m_nextpkt;
569 
570 	/*
571 	 * Loop through length of segment after first fragment,
572 	 * make new header and copy data of each part and link onto chain.
573 	 */
574 	m0 = m;
575 	mhlen = sizeof (struct ip);
576 	for (off = hlen + len; off < (u_short)ip->ip_len; off += len) {
577 		MGETHDR(m, M_DONTWAIT, MT_HEADER);
578 		if (m == 0) {
579 			error = ENOBUFS;
580 			ipstat.ips_odropped++;
581 			goto sendorfree;
582 		}
583 		m->m_flags |= (m0->m_flags & M_MCAST);
584 		m->m_data += max_linkhdr;
585 		mhip = mtod(m, struct ip *);
586 		*mhip = *ip;
587 		if (hlen > sizeof (struct ip)) {
588 			mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
589 			mhip->ip_vhl = IP_MAKE_VHL(IPVERSION, mhlen >> 2);
590 		}
591 		m->m_len = mhlen;
592 		mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF);
593 		if (ip->ip_off & IP_MF)
594 			mhip->ip_off |= IP_MF;
595 		if (off + len >= (u_short)ip->ip_len)
596 			len = (u_short)ip->ip_len - off;
597 		else
598 			mhip->ip_off |= IP_MF;
599 		mhip->ip_len = htons((u_short)(len + mhlen));
600 		m->m_next = m_copy(m0, off, len);
601 		if (m->m_next == 0) {
602 			(void) m_free(m);
603 			error = ENOBUFS;	/* ??? */
604 			ipstat.ips_odropped++;
605 			goto sendorfree;
606 		}
607 		m->m_pkthdr.len = mhlen + len;
608 		m->m_pkthdr.rcvif = (struct ifnet *)0;
609 		mhip->ip_off = htons((u_short)mhip->ip_off);
610 		mhip->ip_sum = 0;
611 		if (mhip->ip_vhl == IP_VHL_BORING) {
612 			mhip->ip_sum = in_cksum_hdr(mhip);
613 		} else {
614 			mhip->ip_sum = in_cksum(m, mhlen);
615 		}
616 		*mnext = m;
617 		mnext = &m->m_nextpkt;
618 		ipstat.ips_ofragments++;
619 	}
620 	/*
621 	 * Update first fragment by trimming what's been copied out
622 	 * and updating header, then send each fragment (in order).
623 	 */
624 	m = m0;
625 	m_adj(m, hlen + firstlen - (u_short)ip->ip_len);
626 	m->m_pkthdr.len = hlen + firstlen;
627 	ip->ip_len = htons((u_short)m->m_pkthdr.len);
628 	ip->ip_off = htons((u_short)(ip->ip_off | IP_MF));
629 	ip->ip_sum = 0;
630 	if (ip->ip_vhl == IP_VHL_BORING) {
631 		ip->ip_sum = in_cksum_hdr(ip);
632 	} else {
633 		ip->ip_sum = in_cksum(m, hlen);
634 	}
635 sendorfree:
636 	for (m = m0; m; m = m0) {
637 		m0 = m->m_nextpkt;
638 		m->m_nextpkt = 0;
639 		if (error == 0)
640 			error = (*ifp->if_output)(ifp, m,
641 			    (struct sockaddr *)dst, ro->ro_rt);
642 		else
643 			m_freem(m);
644 	}
645 
646 	if (error == 0)
647 		ipstat.ips_fragmented++;
648     }
649 done:
650 	return (error);
651 bad:
652 	m_freem(m0);
653 	goto done;
654 }
655 
656 /*
657  * Insert IP options into preformed packet.
658  * Adjust IP destination as required for IP source routing,
659  * as indicated by a non-zero in_addr at the start of the options.
660  *
661  * XXX This routine assumes that the packet has no options in place.
662  */
663 static struct mbuf *
664 ip_insertoptions(m, opt, phlen)
665 	register struct mbuf *m;
666 	struct mbuf *opt;
667 	int *phlen;
668 {
669 	register struct ipoption *p = mtod(opt, struct ipoption *);
670 	struct mbuf *n;
671 	register struct ip *ip = mtod(m, struct ip *);
672 	unsigned optlen;
673 
674 	optlen = opt->m_len - sizeof(p->ipopt_dst);
675 	if (optlen + (u_short)ip->ip_len > IP_MAXPACKET)
676 		return (m);		/* XXX should fail */
677 	if (p->ipopt_dst.s_addr)
678 		ip->ip_dst = p->ipopt_dst;
679 	if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
680 		MGETHDR(n, M_DONTWAIT, MT_HEADER);
681 		if (n == 0)
682 			return (m);
683 		n->m_pkthdr.len = m->m_pkthdr.len + optlen;
684 		m->m_len -= sizeof(struct ip);
685 		m->m_data += sizeof(struct ip);
686 		n->m_next = m;
687 		m = n;
688 		m->m_len = optlen + sizeof(struct ip);
689 		m->m_data += max_linkhdr;
690 		(void)memcpy(mtod(m, void *), ip, sizeof(struct ip));
691 	} else {
692 		m->m_data -= optlen;
693 		m->m_len += optlen;
694 		m->m_pkthdr.len += optlen;
695 		ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
696 	}
697 	ip = mtod(m, struct ip *);
698 	bcopy(p->ipopt_list, ip + 1, optlen);
699 	*phlen = sizeof(struct ip) + optlen;
700 	ip->ip_vhl = IP_MAKE_VHL(IPVERSION, *phlen >> 2);
701 	ip->ip_len += optlen;
702 	return (m);
703 }
704 
705 /*
706  * Copy options from ip to jp,
707  * omitting those not copied during fragmentation.
708  */
709 #if !defined(IPFILTER) && !defined(IPFILTER_LKM)
710 static
711 #endif
712 int
713 ip_optcopy(ip, jp)
714 	struct ip *ip, *jp;
715 {
716 	register u_char *cp, *dp;
717 	int opt, optlen, cnt;
718 
719 	cp = (u_char *)(ip + 1);
720 	dp = (u_char *)(jp + 1);
721 	cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip);
722 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
723 		opt = cp[0];
724 		if (opt == IPOPT_EOL)
725 			break;
726 		if (opt == IPOPT_NOP) {
727 			/* Preserve for IP mcast tunnel's LSRR alignment. */
728 			*dp++ = IPOPT_NOP;
729 			optlen = 1;
730 			continue;
731 		} else
732 			optlen = cp[IPOPT_OLEN];
733 		/* bogus lengths should have been caught by ip_dooptions */
734 		if (optlen > cnt)
735 			optlen = cnt;
736 		if (IPOPT_COPIED(opt)) {
737 			bcopy(cp, dp, optlen);
738 			dp += optlen;
739 		}
740 	}
741 	for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
742 		*dp++ = IPOPT_EOL;
743 	return (optlen);
744 }
745 
746 /*
747  * IP socket option processing.
748  */
749 int
750 ip_ctloutput(so, sopt)
751 	struct socket *so;
752 	struct sockopt *sopt;
753 {
754 	struct	inpcb *inp = sotoinpcb(so);
755 	int	error, optval;
756 
757 	error = optval = 0;
758 	if (sopt->sopt_level != IPPROTO_IP) {
759 		return (EINVAL);
760 	}
761 
762 	switch (sopt->sopt_dir) {
763 	case SOPT_SET:
764 		switch (sopt->sopt_name) {
765 		case IP_OPTIONS:
766 #ifdef notyet
767 		case IP_RETOPTS:
768 #endif
769 		{
770 			struct mbuf *m;
771 			if (sopt->sopt_valsize > MLEN) {
772 				error = EMSGSIZE;
773 				break;
774 			}
775 			MGET(m, sopt->sopt_p ? M_WAIT : M_DONTWAIT, MT_HEADER);
776 			if (m == 0) {
777 				error = ENOBUFS;
778 				break;
779 			}
780 			m->m_len = sopt->sopt_valsize;
781 			error = sooptcopyin(sopt, mtod(m, char *), m->m_len,
782 					    m->m_len);
783 
784 			return (ip_pcbopts(sopt->sopt_name, &inp->inp_options,
785 					   m));
786 		}
787 
788 		case IP_TOS:
789 		case IP_TTL:
790 		case IP_RECVOPTS:
791 		case IP_RECVRETOPTS:
792 		case IP_RECVDSTADDR:
793 		case IP_RECVIF:
794 			error = sooptcopyin(sopt, &optval, sizeof optval,
795 					    sizeof optval);
796 			if (error)
797 				break;
798 
799 			switch (sopt->sopt_name) {
800 			case IP_TOS:
801 				inp->inp_ip_tos = optval;
802 				break;
803 
804 			case IP_TTL:
805 				inp->inp_ip_ttl = optval;
806 				break;
807 #define	OPTSET(bit) \
808 	if (optval) \
809 		inp->inp_flags |= bit; \
810 	else \
811 		inp->inp_flags &= ~bit;
812 
813 			case IP_RECVOPTS:
814 				OPTSET(INP_RECVOPTS);
815 				break;
816 
817 			case IP_RECVRETOPTS:
818 				OPTSET(INP_RECVRETOPTS);
819 				break;
820 
821 			case IP_RECVDSTADDR:
822 				OPTSET(INP_RECVDSTADDR);
823 				break;
824 
825 			case IP_RECVIF:
826 				OPTSET(INP_RECVIF);
827 				break;
828 			}
829 			break;
830 #undef OPTSET
831 
832 		case IP_MULTICAST_IF:
833 		case IP_MULTICAST_VIF:
834 		case IP_MULTICAST_TTL:
835 		case IP_MULTICAST_LOOP:
836 		case IP_ADD_MEMBERSHIP:
837 		case IP_DROP_MEMBERSHIP:
838 			error = ip_setmoptions(sopt, &inp->inp_moptions);
839 			break;
840 
841 		case IP_PORTRANGE:
842 			error = sooptcopyin(sopt, &optval, sizeof optval,
843 					    sizeof optval);
844 			if (error)
845 				break;
846 
847 			switch (optval) {
848 			case IP_PORTRANGE_DEFAULT:
849 				inp->inp_flags &= ~(INP_LOWPORT);
850 				inp->inp_flags &= ~(INP_HIGHPORT);
851 				break;
852 
853 			case IP_PORTRANGE_HIGH:
854 				inp->inp_flags &= ~(INP_LOWPORT);
855 				inp->inp_flags |= INP_HIGHPORT;
856 				break;
857 
858 			case IP_PORTRANGE_LOW:
859 				inp->inp_flags &= ~(INP_HIGHPORT);
860 				inp->inp_flags |= INP_LOWPORT;
861 				break;
862 
863 			default:
864 				error = EINVAL;
865 				break;
866 			}
867 			break;
868 
869 		default:
870 			error = ENOPROTOOPT;
871 			break;
872 		}
873 		break;
874 
875 	case SOPT_GET:
876 		switch (sopt->sopt_name) {
877 		case IP_OPTIONS:
878 		case IP_RETOPTS:
879 			if (inp->inp_options)
880 				error = sooptcopyout(sopt,
881 						     mtod(inp->inp_options,
882 							  char *),
883 						     inp->inp_options->m_len);
884 			else
885 				sopt->sopt_valsize = 0;
886 			break;
887 
888 		case IP_TOS:
889 		case IP_TTL:
890 		case IP_RECVOPTS:
891 		case IP_RECVRETOPTS:
892 		case IP_RECVDSTADDR:
893 		case IP_RECVIF:
894 		case IP_PORTRANGE:
895 			switch (sopt->sopt_name) {
896 
897 			case IP_TOS:
898 				optval = inp->inp_ip_tos;
899 				break;
900 
901 			case IP_TTL:
902 				optval = inp->inp_ip_ttl;
903 				break;
904 
905 #define	OPTBIT(bit)	(inp->inp_flags & bit ? 1 : 0)
906 
907 			case IP_RECVOPTS:
908 				optval = OPTBIT(INP_RECVOPTS);
909 				break;
910 
911 			case IP_RECVRETOPTS:
912 				optval = OPTBIT(INP_RECVRETOPTS);
913 				break;
914 
915 			case IP_RECVDSTADDR:
916 				optval = OPTBIT(INP_RECVDSTADDR);
917 				break;
918 
919 			case IP_RECVIF:
920 				optval = OPTBIT(INP_RECVIF);
921 				break;
922 
923 			case IP_PORTRANGE:
924 				if (inp->inp_flags & INP_HIGHPORT)
925 					optval = IP_PORTRANGE_HIGH;
926 				else if (inp->inp_flags & INP_LOWPORT)
927 					optval = IP_PORTRANGE_LOW;
928 				else
929 					optval = 0;
930 				break;
931 			}
932 			error = sooptcopyout(sopt, &optval, sizeof optval);
933 			break;
934 
935 		case IP_MULTICAST_IF:
936 		case IP_MULTICAST_VIF:
937 		case IP_MULTICAST_TTL:
938 		case IP_MULTICAST_LOOP:
939 		case IP_ADD_MEMBERSHIP:
940 		case IP_DROP_MEMBERSHIP:
941 			error = ip_getmoptions(sopt, inp->inp_moptions);
942 			break;
943 
944 		default:
945 			error = ENOPROTOOPT;
946 			break;
947 		}
948 		break;
949 	}
950 	return (error);
951 }
952 
953 /*
954  * Set up IP options in pcb for insertion in output packets.
955  * Store in mbuf with pointer in pcbopt, adding pseudo-option
956  * with destination address if source routed.
957  */
958 static int
959 ip_pcbopts(optname, pcbopt, m)
960 	int optname;
961 	struct mbuf **pcbopt;
962 	register struct mbuf *m;
963 {
964 	register int cnt, optlen;
965 	register u_char *cp;
966 	u_char opt;
967 
968 	/* turn off any old options */
969 	if (*pcbopt)
970 		(void)m_free(*pcbopt);
971 	*pcbopt = 0;
972 	if (m == (struct mbuf *)0 || m->m_len == 0) {
973 		/*
974 		 * Only turning off any previous options.
975 		 */
976 		if (m)
977 			(void)m_free(m);
978 		return (0);
979 	}
980 
981 #ifndef	vax
982 	if (m->m_len % sizeof(int32_t))
983 		goto bad;
984 #endif
985 	/*
986 	 * IP first-hop destination address will be stored before
987 	 * actual options; move other options back
988 	 * and clear it when none present.
989 	 */
990 	if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
991 		goto bad;
992 	cnt = m->m_len;
993 	m->m_len += sizeof(struct in_addr);
994 	cp = mtod(m, u_char *) + sizeof(struct in_addr);
995 	ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt);
996 	bzero(mtod(m, caddr_t), sizeof(struct in_addr));
997 
998 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
999 		opt = cp[IPOPT_OPTVAL];
1000 		if (opt == IPOPT_EOL)
1001 			break;
1002 		if (opt == IPOPT_NOP)
1003 			optlen = 1;
1004 		else {
1005 			optlen = cp[IPOPT_OLEN];
1006 			if (optlen <= IPOPT_OLEN || optlen > cnt)
1007 				goto bad;
1008 		}
1009 		switch (opt) {
1010 
1011 		default:
1012 			break;
1013 
1014 		case IPOPT_LSRR:
1015 		case IPOPT_SSRR:
1016 			/*
1017 			 * user process specifies route as:
1018 			 *	->A->B->C->D
1019 			 * D must be our final destination (but we can't
1020 			 * check that since we may not have connected yet).
1021 			 * A is first hop destination, which doesn't appear in
1022 			 * actual IP option, but is stored before the options.
1023 			 */
1024 			if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
1025 				goto bad;
1026 			m->m_len -= sizeof(struct in_addr);
1027 			cnt -= sizeof(struct in_addr);
1028 			optlen -= sizeof(struct in_addr);
1029 			cp[IPOPT_OLEN] = optlen;
1030 			/*
1031 			 * Move first hop before start of options.
1032 			 */
1033 			bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
1034 			    sizeof(struct in_addr));
1035 			/*
1036 			 * Then copy rest of options back
1037 			 * to close up the deleted entry.
1038 			 */
1039 			ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] +
1040 			    sizeof(struct in_addr)),
1041 			    (caddr_t)&cp[IPOPT_OFFSET+1],
1042 			    (unsigned)cnt + sizeof(struct in_addr));
1043 			break;
1044 		}
1045 	}
1046 	if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
1047 		goto bad;
1048 	*pcbopt = m;
1049 	return (0);
1050 
1051 bad:
1052 	(void)m_free(m);
1053 	return (EINVAL);
1054 }
1055 
1056 /*
1057  * XXX
1058  * The whole multicast option thing needs to be re-thought.
1059  * Several of these options are equally applicable to non-multicast
1060  * transmission, and one (IP_MULTICAST_TTL) totally duplicates a
1061  * standard option (IP_TTL).
1062  */
1063 /*
1064  * Set the IP multicast options in response to user setsockopt().
1065  */
1066 static int
1067 ip_setmoptions(sopt, imop)
1068 	struct sockopt *sopt;
1069 	struct ip_moptions **imop;
1070 {
1071 	int error = 0;
1072 	int i;
1073 	struct in_addr addr;
1074 	struct ip_mreq mreq;
1075 	struct ifnet *ifp;
1076 	struct ip_moptions *imo = *imop;
1077 	struct route ro;
1078 	struct sockaddr_in *dst;
1079 	int s;
1080 
1081 	if (imo == NULL) {
1082 		/*
1083 		 * No multicast option buffer attached to the pcb;
1084 		 * allocate one and initialize to default values.
1085 		 */
1086 		imo = (struct ip_moptions*)malloc(sizeof(*imo), M_IPMOPTS,
1087 		    M_WAITOK);
1088 
1089 		if (imo == NULL)
1090 			return (ENOBUFS);
1091 		*imop = imo;
1092 		imo->imo_multicast_ifp = NULL;
1093 		imo->imo_multicast_vif = -1;
1094 		imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1095 		imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
1096 		imo->imo_num_memberships = 0;
1097 	}
1098 
1099 	switch (sopt->sopt_name) {
1100 	/* store an index number for the vif you wanna use in the send */
1101 	case IP_MULTICAST_VIF:
1102 		if (legal_vif_num == 0) {
1103 			error = EOPNOTSUPP;
1104 			break;
1105 		}
1106 		error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
1107 		if (error)
1108 			break;
1109 		if (!legal_vif_num(i) && (i != -1)) {
1110 			error = EINVAL;
1111 			break;
1112 		}
1113 		imo->imo_multicast_vif = i;
1114 		break;
1115 
1116 	case IP_MULTICAST_IF:
1117 		/*
1118 		 * Select the interface for outgoing multicast packets.
1119 		 */
1120 		error = sooptcopyin(sopt, &addr, sizeof addr, sizeof addr);
1121 		if (error)
1122 			break;
1123 		/*
1124 		 * INADDR_ANY is used to remove a previous selection.
1125 		 * When no interface is selected, a default one is
1126 		 * chosen every time a multicast packet is sent.
1127 		 */
1128 		if (addr.s_addr == INADDR_ANY) {
1129 			imo->imo_multicast_ifp = NULL;
1130 			break;
1131 		}
1132 		/*
1133 		 * The selected interface is identified by its local
1134 		 * IP address.  Find the interface and confirm that
1135 		 * it supports multicasting.
1136 		 */
1137 		s = splimp();
1138 		INADDR_TO_IFP(addr, ifp);
1139 		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1140 			splx(s);
1141 			error = EADDRNOTAVAIL;
1142 			break;
1143 		}
1144 		imo->imo_multicast_ifp = ifp;
1145 		splx(s);
1146 		break;
1147 
1148 	case IP_MULTICAST_TTL:
1149 		/*
1150 		 * Set the IP time-to-live for outgoing multicast packets.
1151 		 * The original multicast API required a char argument,
1152 		 * which is inconsistent with the rest of the socket API.
1153 		 * We allow either a char or an int.
1154 		 */
1155 		if (sopt->sopt_valsize == 1) {
1156 			u_char ttl;
1157 			error = sooptcopyin(sopt, &ttl, 1, 1);
1158 			if (error)
1159 				break;
1160 			imo->imo_multicast_ttl = ttl;
1161 		} else {
1162 			u_int ttl;
1163 			error = sooptcopyin(sopt, &ttl, sizeof ttl,
1164 					    sizeof ttl);
1165 			if (error)
1166 				break;
1167 			if (ttl > 255)
1168 				error = EINVAL;
1169 			else
1170 				imo->imo_multicast_ttl = ttl;
1171 		}
1172 		break;
1173 
1174 	case IP_MULTICAST_LOOP:
1175 		/*
1176 		 * Set the loopback flag for outgoing multicast packets.
1177 		 * Must be zero or one.  The original multicast API required a
1178 		 * char argument, which is inconsistent with the rest
1179 		 * of the socket API.  We allow either a char or an int.
1180 		 */
1181 		if (sopt->sopt_valsize == 1) {
1182 			u_char loop;
1183 			error = sooptcopyin(sopt, &loop, 1, 1);
1184 			if (error)
1185 				break;
1186 			imo->imo_multicast_loop = !!loop;
1187 		} else {
1188 			u_int loop;
1189 			error = sooptcopyin(sopt, &loop, sizeof loop,
1190 					    sizeof loop);
1191 			if (error)
1192 				break;
1193 			imo->imo_multicast_loop = !!loop;
1194 		}
1195 		break;
1196 
1197 	case IP_ADD_MEMBERSHIP:
1198 		/*
1199 		 * Add a multicast group membership.
1200 		 * Group must be a valid IP multicast address.
1201 		 */
1202 		error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
1203 		if (error)
1204 			break;
1205 
1206 		if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) {
1207 			error = EINVAL;
1208 			break;
1209 		}
1210 		s = splimp();
1211 		/*
1212 		 * If no interface address was provided, use the interface of
1213 		 * the route to the given multicast address.
1214 		 */
1215 		if (mreq.imr_interface.s_addr == INADDR_ANY) {
1216 			bzero((caddr_t)&ro, sizeof(ro));
1217 			dst = (struct sockaddr_in *)&ro.ro_dst;
1218 			dst->sin_len = sizeof(*dst);
1219 			dst->sin_family = AF_INET;
1220 			dst->sin_addr = mreq.imr_multiaddr;
1221 			rtalloc(&ro);
1222 			if (ro.ro_rt == NULL) {
1223 				error = EADDRNOTAVAIL;
1224 				splx(s);
1225 				break;
1226 			}
1227 			ifp = ro.ro_rt->rt_ifp;
1228 			rtfree(ro.ro_rt);
1229 		}
1230 		else {
1231 			INADDR_TO_IFP(mreq.imr_interface, ifp);
1232 		}
1233 
1234 		/*
1235 		 * See if we found an interface, and confirm that it
1236 		 * supports multicast.
1237 		 */
1238 		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1239 			error = EADDRNOTAVAIL;
1240 			splx(s);
1241 			break;
1242 		}
1243 		/*
1244 		 * See if the membership already exists or if all the
1245 		 * membership slots are full.
1246 		 */
1247 		for (i = 0; i < imo->imo_num_memberships; ++i) {
1248 			if (imo->imo_membership[i]->inm_ifp == ifp &&
1249 			    imo->imo_membership[i]->inm_addr.s_addr
1250 						== mreq.imr_multiaddr.s_addr)
1251 				break;
1252 		}
1253 		if (i < imo->imo_num_memberships) {
1254 			error = EADDRINUSE;
1255 			splx(s);
1256 			break;
1257 		}
1258 		if (i == IP_MAX_MEMBERSHIPS) {
1259 			error = ETOOMANYREFS;
1260 			splx(s);
1261 			break;
1262 		}
1263 		/*
1264 		 * Everything looks good; add a new record to the multicast
1265 		 * address list for the given interface.
1266 		 */
1267 		if ((imo->imo_membership[i] =
1268 		    in_addmulti(&mreq.imr_multiaddr, ifp)) == NULL) {
1269 			error = ENOBUFS;
1270 			splx(s);
1271 			break;
1272 		}
1273 		++imo->imo_num_memberships;
1274 		splx(s);
1275 		break;
1276 
1277 	case IP_DROP_MEMBERSHIP:
1278 		/*
1279 		 * Drop a multicast group membership.
1280 		 * Group must be a valid IP multicast address.
1281 		 */
1282 		error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
1283 		if (error)
1284 			break;
1285 
1286 		if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) {
1287 			error = EINVAL;
1288 			break;
1289 		}
1290 
1291 		s = splimp();
1292 		/*
1293 		 * If an interface address was specified, get a pointer
1294 		 * to its ifnet structure.
1295 		 */
1296 		if (mreq.imr_interface.s_addr == INADDR_ANY)
1297 			ifp = NULL;
1298 		else {
1299 			INADDR_TO_IFP(mreq.imr_interface, ifp);
1300 			if (ifp == NULL) {
1301 				error = EADDRNOTAVAIL;
1302 				splx(s);
1303 				break;
1304 			}
1305 		}
1306 		/*
1307 		 * Find the membership in the membership array.
1308 		 */
1309 		for (i = 0; i < imo->imo_num_memberships; ++i) {
1310 			if ((ifp == NULL ||
1311 			     imo->imo_membership[i]->inm_ifp == ifp) &&
1312 			     imo->imo_membership[i]->inm_addr.s_addr ==
1313 			     mreq.imr_multiaddr.s_addr)
1314 				break;
1315 		}
1316 		if (i == imo->imo_num_memberships) {
1317 			error = EADDRNOTAVAIL;
1318 			splx(s);
1319 			break;
1320 		}
1321 		/*
1322 		 * Give up the multicast address record to which the
1323 		 * membership points.
1324 		 */
1325 		in_delmulti(imo->imo_membership[i]);
1326 		/*
1327 		 * Remove the gap in the membership array.
1328 		 */
1329 		for (++i; i < imo->imo_num_memberships; ++i)
1330 			imo->imo_membership[i-1] = imo->imo_membership[i];
1331 		--imo->imo_num_memberships;
1332 		splx(s);
1333 		break;
1334 
1335 	default:
1336 		error = EOPNOTSUPP;
1337 		break;
1338 	}
1339 
1340 	/*
1341 	 * If all options have default values, no need to keep the mbuf.
1342 	 */
1343 	if (imo->imo_multicast_ifp == NULL &&
1344 	    imo->imo_multicast_vif == -1 &&
1345 	    imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
1346 	    imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
1347 	    imo->imo_num_memberships == 0) {
1348 		free(*imop, M_IPMOPTS);
1349 		*imop = NULL;
1350 	}
1351 
1352 	return (error);
1353 }
1354 
1355 /*
1356  * Return the IP multicast options in response to user getsockopt().
1357  */
1358 static int
1359 ip_getmoptions(sopt, imo)
1360 	struct sockopt *sopt;
1361 	register struct ip_moptions *imo;
1362 {
1363 	struct in_addr addr;
1364 	struct in_ifaddr *ia;
1365 	int error, optval;
1366 	u_char coptval;
1367 
1368 	error = 0;
1369 	switch (sopt->sopt_name) {
1370 	case IP_MULTICAST_VIF:
1371 		if (imo != NULL)
1372 			optval = imo->imo_multicast_vif;
1373 		else
1374 			optval = -1;
1375 		error = sooptcopyout(sopt, &optval, sizeof optval);
1376 		break;
1377 
1378 	case IP_MULTICAST_IF:
1379 		if (imo == NULL || imo->imo_multicast_ifp == NULL)
1380 			addr.s_addr = INADDR_ANY;
1381 		else {
1382 			IFP_TO_IA(imo->imo_multicast_ifp, ia);
1383 			addr.s_addr = (ia == NULL) ? INADDR_ANY
1384 				: IA_SIN(ia)->sin_addr.s_addr;
1385 		}
1386 		error = sooptcopyout(sopt, &addr, sizeof addr);
1387 		break;
1388 
1389 	case IP_MULTICAST_TTL:
1390 		if (imo == 0)
1391 			optval = coptval = IP_DEFAULT_MULTICAST_TTL;
1392 		else
1393 			optval = coptval = imo->imo_multicast_ttl;
1394 		if (sopt->sopt_valsize == 1)
1395 			error = sooptcopyout(sopt, &coptval, 1);
1396 		else
1397 			error = sooptcopyout(sopt, &optval, sizeof optval);
1398 		break;
1399 
1400 	case IP_MULTICAST_LOOP:
1401 		if (imo == 0)
1402 			optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
1403 		else
1404 			optval = coptval = imo->imo_multicast_loop;
1405 		if (sopt->sopt_valsize == 1)
1406 			error = sooptcopyout(sopt, &coptval, 1);
1407 		else
1408 			error = sooptcopyout(sopt, &optval, sizeof optval);
1409 		break;
1410 
1411 	default:
1412 		error = ENOPROTOOPT;
1413 		break;
1414 	}
1415 	return (error);
1416 }
1417 
1418 /*
1419  * Discard the IP multicast options.
1420  */
1421 void
1422 ip_freemoptions(imo)
1423 	register struct ip_moptions *imo;
1424 {
1425 	register int i;
1426 
1427 	if (imo != NULL) {
1428 		for (i = 0; i < imo->imo_num_memberships; ++i)
1429 			in_delmulti(imo->imo_membership[i]);
1430 		free(imo, M_IPMOPTS);
1431 	}
1432 }
1433 
1434 /*
1435  * Routine called from ip_output() to loop back a copy of an IP multicast
1436  * packet to the input queue of a specified interface.  Note that this
1437  * calls the output routine of the loopback "driver", but with an interface
1438  * pointer that might NOT be a loopback interface -- evil, but easier than
1439  * replicating that code here.
1440  */
1441 static void
1442 ip_mloopback(ifp, m, dst, hlen)
1443 	struct ifnet *ifp;
1444 	register struct mbuf *m;
1445 	register struct sockaddr_in *dst;
1446 	int hlen;
1447 {
1448 	register struct ip *ip;
1449 	struct mbuf *copym;
1450 
1451 	copym = m_copy(m, 0, M_COPYALL);
1452 	if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen))
1453 		copym = m_pullup(copym, hlen);
1454 	if (copym != NULL) {
1455 		/*
1456 		 * We don't bother to fragment if the IP length is greater
1457 		 * than the interface's MTU.  Can this possibly matter?
1458 		 */
1459 		ip = mtod(copym, struct ip *);
1460 		ip->ip_len = htons((u_short)ip->ip_len);
1461 		ip->ip_off = htons((u_short)ip->ip_off);
1462 		ip->ip_sum = 0;
1463 		if (ip->ip_vhl == IP_VHL_BORING) {
1464 			ip->ip_sum = in_cksum_hdr(ip);
1465 		} else {
1466 			ip->ip_sum = in_cksum(copym, hlen);
1467 		}
1468 		/*
1469 		 * NB:
1470 		 * It's not clear whether there are any lingering
1471 		 * reentrancy problems in other areas which might
1472 		 * be exposed by using ip_input directly (in
1473 		 * particular, everything which modifies the packet
1474 		 * in-place).  Yet another option is using the
1475 		 * protosw directly to deliver the looped back
1476 		 * packet.  For the moment, we'll err on the side
1477 		 * of safety by using if_simloop().
1478 		 */
1479 #if 1 /* XXX */
1480 		if (dst->sin_family != AF_INET) {
1481 			printf("ip_mloopback: bad address family %d\n",
1482 						dst->sin_family);
1483 			dst->sin_family = AF_INET;
1484 		}
1485 #endif
1486 
1487 #ifdef notdef
1488 		copym->m_pkthdr.rcvif = ifp;
1489 		ip_input(copym);
1490 #else
1491 		if_simloop(ifp, copym, (struct sockaddr *)dst, 0);
1492 #endif
1493 	}
1494 }
1495