xref: /freebsd/sys/netinet/ip_output.c (revision 05c7a37afb48ddd5ee1bd921a5d46fe59cc70b15)
1 /*
2  * Copyright (c) 1982, 1986, 1988, 1990, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
34  *	$Id: ip_output.c,v 1.33 1996/03/26 18:56:51 fenner Exp $
35  */
36 
37 #include <sys/param.h>
38 #include <sys/queue.h>
39 #include <sys/systm.h>
40 #include <sys/malloc.h>
41 #include <sys/mbuf.h>
42 #include <sys/errno.h>
43 #include <sys/protosw.h>
44 #include <sys/socket.h>
45 #include <sys/socketvar.h>
46 
47 #include <net/if.h>
48 #include <net/route.h>
49 
50 #include <netinet/in.h>
51 #include <netinet/in_systm.h>
52 #include <netinet/ip.h>
53 #include <netinet/in_pcb.h>
54 #include <netinet/in_var.h>
55 #include <netinet/ip_var.h>
56 
57 #ifdef vax
58 #include <machine/mtpr.h>
59 #endif
60 
61 u_short ip_id;
62 
63 static struct mbuf *ip_insertoptions __P((struct mbuf *, struct mbuf *, int *));
64 static void	ip_mloopback
65 	__P((struct ifnet *, struct mbuf *, struct sockaddr_in *));
66 static int	ip_getmoptions
67 	__P((int, struct ip_moptions *, struct mbuf **));
68 static int	ip_optcopy __P((struct ip *, struct ip *));
69 static int	ip_pcbopts __P((struct mbuf **, struct mbuf *));
70 static int	ip_setmoptions
71 	__P((int, struct ip_moptions **, struct mbuf *));
72 
73 /*
74  * IP output.  The packet in mbuf chain m contains a skeletal IP
75  * header (with len, off, ttl, proto, tos, src, dst).
76  * The mbuf chain containing the packet will be freed.
77  * The mbuf opt, if present, will not be freed.
78  */
79 int
80 ip_output(m0, opt, ro, flags, imo)
81 	struct mbuf *m0;
82 	struct mbuf *opt;
83 	struct route *ro;
84 	int flags;
85 	struct ip_moptions *imo;
86 {
87 	struct ip *ip, *mhip;
88 	struct ifnet *ifp;
89 	struct mbuf *m = m0;
90 	int hlen = sizeof (struct ip);
91 	int len, off, error = 0;
92 	/*
93 	 * It might seem obvious at first glance that one could easily
94 	 * make a one-behind cache out of this by simply making `iproute'
95 	 * static and eliminating the bzero() below.  However, this turns
96 	 * out not to work, for two reasons:
97 	 *
98 	 * 1) This routine needs to be reentrant.  It can be called
99 	 * recursively from encapsulating network interfaces, and it
100 	 * is always called recursively from ip_mforward().
101 	 *
102 	 * 2) You turn out not to gain much.  There is already a one-
103 	 * behind cache implemented for the specific case of forwarding,
104 	 * and sends on a connected socket will use a route associated
105 	 * with the PCB.  The only cases left are sends on unconnected
106 	 * and raw sockets, and if these cases are really significant,
107 	 * something is seriously wrong.
108 	 */
109 	struct route iproute;
110 	struct sockaddr_in *dst;
111 	struct in_ifaddr *ia;
112 
113 #ifdef	DIAGNOSTIC
114 	if ((m->m_flags & M_PKTHDR) == 0)
115 		panic("ip_output no HDR");
116 #endif
117 	if (opt) {
118 		m = ip_insertoptions(m, opt, &len);
119 		hlen = len;
120 	}
121 	ip = mtod(m, struct ip *);
122 	/*
123 	 * Fill in IP header.
124 	 */
125 	if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
126 		ip->ip_v = IPVERSION;
127 		ip->ip_off &= IP_DF;
128 		ip->ip_id = htons(ip_id++);
129 		ip->ip_hl = hlen >> 2;
130 		ipstat.ips_localout++;
131 	} else {
132 		hlen = ip->ip_hl << 2;
133 	}
134 	/*
135 	 * Route packet.
136 	 */
137 	if (ro == 0) {
138 		ro = &iproute;
139 		bzero((caddr_t)ro, sizeof (*ro));
140 	}
141 	dst = (struct sockaddr_in *)&ro->ro_dst;
142 	/*
143 	 * If there is a cached route,
144 	 * check that it is to the same destination
145 	 * and is still up.  If not, free it and try again.
146 	 */
147 	if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
148 	   dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
149 		RTFREE(ro->ro_rt);
150 		ro->ro_rt = (struct rtentry *)0;
151 	}
152 	if (ro->ro_rt == 0) {
153 		dst->sin_family = AF_INET;
154 		dst->sin_len = sizeof(*dst);
155 		dst->sin_addr = ip->ip_dst;
156 	}
157 	/*
158 	 * If routing to interface only,
159 	 * short circuit routing lookup.
160 	 */
161 #define ifatoia(ifa)	((struct in_ifaddr *)(ifa))
162 #define sintosa(sin)	((struct sockaddr *)(sin))
163 	if (flags & IP_ROUTETOIF) {
164 		if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 &&
165 		    (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) {
166 			ipstat.ips_noroute++;
167 			error = ENETUNREACH;
168 			goto bad;
169 		}
170 		ifp = ia->ia_ifp;
171 		ip->ip_ttl = 1;
172 	} else {
173 		/*
174 		 * If this is the case, we probably don't want to allocate
175 		 * a protocol-cloned route since we didn't get one from the
176 		 * ULP.  This lets TCP do its thing, while not burdening
177 		 * forwarding or ICMP with the overhead of cloning a route.
178 		 * Of course, we still want to do any cloning requested by
179 		 * the link layer, as this is probably required in all cases
180 		 * for correct operation (as it is for ARP).
181 		 */
182 		if (ro->ro_rt == 0)
183 			rtalloc_ign(ro, RTF_PRCLONING);
184 		if (ro->ro_rt == 0) {
185 			ipstat.ips_noroute++;
186 			error = EHOSTUNREACH;
187 			goto bad;
188 		}
189 		ia = ifatoia(ro->ro_rt->rt_ifa);
190 		ifp = ro->ro_rt->rt_ifp;
191 		ro->ro_rt->rt_use++;
192 		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
193 			dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
194 	}
195 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
196 		struct in_multi *inm;
197 
198 		m->m_flags |= M_MCAST;
199 		/*
200 		 * IP destination address is multicast.  Make sure "dst"
201 		 * still points to the address in "ro".  (It may have been
202 		 * changed to point to a gateway address, above.)
203 		 */
204 		dst = (struct sockaddr_in *)&ro->ro_dst;
205 		/*
206 		 * See if the caller provided any multicast options
207 		 */
208 		if (imo != NULL) {
209 			ip->ip_ttl = imo->imo_multicast_ttl;
210 			if (imo->imo_multicast_ifp != NULL)
211 				ifp = imo->imo_multicast_ifp;
212 			if (imo->imo_multicast_vif != -1)
213 				ip->ip_src.s_addr =
214 				    ip_mcast_src(imo->imo_multicast_vif);
215 		} else
216 			ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
217 		/*
218 		 * Confirm that the outgoing interface supports multicast.
219 		 */
220 		if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
221 			if ((ifp->if_flags & IFF_MULTICAST) == 0) {
222 				ipstat.ips_noroute++;
223 				error = ENETUNREACH;
224 				goto bad;
225 			}
226 		}
227 		/*
228 		 * If source address not specified yet, use address
229 		 * of outgoing interface.
230 		 */
231 		if (ip->ip_src.s_addr == INADDR_ANY) {
232 			register struct in_ifaddr *ia;
233 
234 			for (ia = in_ifaddr; ia; ia = ia->ia_next)
235 				if (ia->ia_ifp == ifp) {
236 					ip->ip_src = IA_SIN(ia)->sin_addr;
237 					break;
238 				}
239 		}
240 
241 		IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
242 		if (inm != NULL &&
243 		   (imo == NULL || imo->imo_multicast_loop)) {
244 			/*
245 			 * If we belong to the destination multicast group
246 			 * on the outgoing interface, and the caller did not
247 			 * forbid loopback, loop back a copy.
248 			 */
249 			ip_mloopback(ifp, m, dst);
250 		}
251 		else {
252 			/*
253 			 * If we are acting as a multicast router, perform
254 			 * multicast forwarding as if the packet had just
255 			 * arrived on the interface to which we are about
256 			 * to send.  The multicast forwarding function
257 			 * recursively calls this function, using the
258 			 * IP_FORWARDING flag to prevent infinite recursion.
259 			 *
260 			 * Multicasts that are looped back by ip_mloopback(),
261 			 * above, will be forwarded by the ip_input() routine,
262 			 * if necessary.
263 			 */
264 			if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
265 				/*
266 				 * Check if rsvp daemon is running. If not, don't
267 				 * set ip_moptions. This ensures that the packet
268 				 * is multicast and not just sent down one link
269 				 * as prescribed by rsvpd.
270 				 */
271 				if (!rsvp_on)
272 				  imo = NULL;
273 				if (ip_mforward(ip, ifp, m, imo) != 0) {
274 					m_freem(m);
275 					goto done;
276 				}
277 			}
278 		}
279 
280 		/*
281 		 * Multicasts with a time-to-live of zero may be looped-
282 		 * back, above, but must not be transmitted on a network.
283 		 * Also, multicasts addressed to the loopback interface
284 		 * are not sent -- the above call to ip_mloopback() will
285 		 * loop back a copy if this host actually belongs to the
286 		 * destination group on the loopback interface.
287 		 */
288 		if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
289 			m_freem(m);
290 			goto done;
291 		}
292 
293 		goto sendit;
294 	}
295 #ifndef notdef
296 	/*
297 	 * If source address not specified yet, use address
298 	 * of outgoing interface.
299 	 */
300 	if (ip->ip_src.s_addr == INADDR_ANY)
301 		ip->ip_src = IA_SIN(ia)->sin_addr;
302 #endif
303 	/*
304 	 * Verify that we have any chance at all of being able to queue
305 	 *      the packet or packet fragments
306 	 */
307 	if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >=
308 		ifp->if_snd.ifq_maxlen) {
309 			error = ENOBUFS;
310 			goto bad;
311 	}
312 
313 	/*
314 	 * Look for broadcast address and
315 	 * and verify user is allowed to send
316 	 * such a packet.
317 	 */
318 	if (in_broadcast(dst->sin_addr, ifp)) {
319 		if ((ifp->if_flags & IFF_BROADCAST) == 0) {
320 			error = EADDRNOTAVAIL;
321 			goto bad;
322 		}
323 		if ((flags & IP_ALLOWBROADCAST) == 0) {
324 			error = EACCES;
325 			goto bad;
326 		}
327 		/* don't allow broadcast messages to be fragmented */
328 		if ((u_short)ip->ip_len > ifp->if_mtu) {
329 			error = EMSGSIZE;
330 			goto bad;
331 		}
332 		m->m_flags |= M_BCAST;
333 	} else
334 		m->m_flags &= ~M_BCAST;
335 
336 sendit:
337 	/*
338 	 * Check with the firewall...
339 	 */
340 	if (ip_fw_chk_ptr && !(*ip_fw_chk_ptr)(&ip, hlen, ifp, 1, &m)) {
341 		error = EACCES;
342 		goto done;
343 	}
344 
345 	/*
346 	 * If small enough for interface, can just send directly.
347 	 */
348 	if ((u_short)ip->ip_len <= ifp->if_mtu) {
349 		ip->ip_len = htons((u_short)ip->ip_len);
350 		ip->ip_off = htons((u_short)ip->ip_off);
351 		ip->ip_sum = 0;
352 		ip->ip_sum = in_cksum(m, hlen);
353 		error = (*ifp->if_output)(ifp, m,
354 				(struct sockaddr *)dst, ro->ro_rt);
355 		goto done;
356 	}
357 	/*
358 	 * Too large for interface; fragment if possible.
359 	 * Must be able to put at least 8 bytes per fragment.
360 	 */
361 	if (ip->ip_off & IP_DF) {
362 		error = EMSGSIZE;
363 #if 1
364 		/*
365 		 * This case can happen if the user changed the MTU
366 		 * of an interface after enabling IP on it.  Because
367 		 * most netifs don't keep track of routes pointing to
368 		 * them, there is no way for one to update all its
369 		 * routes when the MTU is changed.
370 		 */
371 		if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST))
372 		    && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU)
373 		    && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
374 			ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
375 		}
376 #endif
377 		ipstat.ips_cantfrag++;
378 		goto bad;
379 	}
380 	len = (ifp->if_mtu - hlen) &~ 7;
381 	if (len < 8) {
382 		error = EMSGSIZE;
383 		goto bad;
384 	}
385 
386     {
387 	int mhlen, firstlen = len;
388 	struct mbuf **mnext = &m->m_nextpkt;
389 
390 	/*
391 	 * Loop through length of segment after first fragment,
392 	 * make new header and copy data of each part and link onto chain.
393 	 */
394 	m0 = m;
395 	mhlen = sizeof (struct ip);
396 	for (off = hlen + len; off < (u_short)ip->ip_len; off += len) {
397 		MGETHDR(m, M_DONTWAIT, MT_HEADER);
398 		if (m == 0) {
399 			error = ENOBUFS;
400 			ipstat.ips_odropped++;
401 			goto sendorfree;
402 		}
403 		m->m_data += max_linkhdr;
404 		mhip = mtod(m, struct ip *);
405 		*mhip = *ip;
406 		if (hlen > sizeof (struct ip)) {
407 			mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
408 			mhip->ip_hl = mhlen >> 2;
409 		}
410 		m->m_len = mhlen;
411 		mhip->ip_off = ((off - hlen) >> 3) + (ip->ip_off & ~IP_MF);
412 		if (ip->ip_off & IP_MF)
413 			mhip->ip_off |= IP_MF;
414 		if (off + len >= (u_short)ip->ip_len)
415 			len = (u_short)ip->ip_len - off;
416 		else
417 			mhip->ip_off |= IP_MF;
418 		mhip->ip_len = htons((u_short)(len + mhlen));
419 		m->m_next = m_copy(m0, off, len);
420 		if (m->m_next == 0) {
421 			(void) m_free(m);
422 			error = ENOBUFS;	/* ??? */
423 			ipstat.ips_odropped++;
424 			goto sendorfree;
425 		}
426 		m->m_pkthdr.len = mhlen + len;
427 		m->m_pkthdr.rcvif = (struct ifnet *)0;
428 		mhip->ip_off = htons((u_short)mhip->ip_off);
429 		mhip->ip_sum = 0;
430 		mhip->ip_sum = in_cksum(m, mhlen);
431 		*mnext = m;
432 		mnext = &m->m_nextpkt;
433 		ipstat.ips_ofragments++;
434 	}
435 	/*
436 	 * Update first fragment by trimming what's been copied out
437 	 * and updating header, then send each fragment (in order).
438 	 */
439 	m = m0;
440 	m_adj(m, hlen + firstlen - (u_short)ip->ip_len);
441 	m->m_pkthdr.len = hlen + firstlen;
442 	ip->ip_len = htons((u_short)m->m_pkthdr.len);
443 	ip->ip_off = htons((u_short)(ip->ip_off | IP_MF));
444 	ip->ip_sum = 0;
445 	ip->ip_sum = in_cksum(m, hlen);
446 sendorfree:
447 	for (m = m0; m; m = m0) {
448 		m0 = m->m_nextpkt;
449 		m->m_nextpkt = 0;
450 		if (error == 0)
451 			error = (*ifp->if_output)(ifp, m,
452 			    (struct sockaddr *)dst, ro->ro_rt);
453 		else
454 			m_freem(m);
455 	}
456 
457 	if (error == 0)
458 		ipstat.ips_fragmented++;
459     }
460 done:
461 	if (ro == &iproute && (flags & IP_ROUTETOIF) == 0 && ro->ro_rt)
462 		RTFREE(ro->ro_rt);
463 
464 	return (error);
465 bad:
466 	m_freem(m0);
467 	goto done;
468 }
469 
470 /*
471  * Insert IP options into preformed packet.
472  * Adjust IP destination as required for IP source routing,
473  * as indicated by a non-zero in_addr at the start of the options.
474  *
475  * XXX This routine assumes that the packet has no options in place.
476  */
477 static struct mbuf *
478 ip_insertoptions(m, opt, phlen)
479 	register struct mbuf *m;
480 	struct mbuf *opt;
481 	int *phlen;
482 {
483 	register struct ipoption *p = mtod(opt, struct ipoption *);
484 	struct mbuf *n;
485 	register struct ip *ip = mtod(m, struct ip *);
486 	unsigned optlen;
487 
488 	optlen = opt->m_len - sizeof(p->ipopt_dst);
489 	if (optlen + (u_short)ip->ip_len > IP_MAXPACKET)
490 		return (m);		/* XXX should fail */
491 	if (p->ipopt_dst.s_addr)
492 		ip->ip_dst = p->ipopt_dst;
493 	if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
494 		MGETHDR(n, M_DONTWAIT, MT_HEADER);
495 		if (n == 0)
496 			return (m);
497 		n->m_pkthdr.len = m->m_pkthdr.len + optlen;
498 		m->m_len -= sizeof(struct ip);
499 		m->m_data += sizeof(struct ip);
500 		n->m_next = m;
501 		m = n;
502 		m->m_len = optlen + sizeof(struct ip);
503 		m->m_data += max_linkhdr;
504 		(void)memcpy(mtod(m, void *), ip, sizeof(struct ip));
505 	} else {
506 		m->m_data -= optlen;
507 		m->m_len += optlen;
508 		m->m_pkthdr.len += optlen;
509 		ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
510 	}
511 	ip = mtod(m, struct ip *);
512 	(void)memcpy(ip + 1, p->ipopt_list, (unsigned)optlen);
513 	*phlen = sizeof(struct ip) + optlen;
514 	ip->ip_hl = *phlen >> 2;
515 	ip->ip_len += optlen;
516 	return (m);
517 }
518 
519 /*
520  * Copy options from ip to jp,
521  * omitting those not copied during fragmentation.
522  */
523 static int
524 ip_optcopy(ip, jp)
525 	struct ip *ip, *jp;
526 {
527 	register u_char *cp, *dp;
528 	int opt, optlen, cnt;
529 
530 	cp = (u_char *)(ip + 1);
531 	dp = (u_char *)(jp + 1);
532 	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
533 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
534 		opt = cp[0];
535 		if (opt == IPOPT_EOL)
536 			break;
537 		if (opt == IPOPT_NOP) {
538 			/* Preserve for IP mcast tunnel's LSRR alignment. */
539 			*dp++ = IPOPT_NOP;
540 			optlen = 1;
541 			continue;
542 		} else
543 			optlen = cp[IPOPT_OLEN];
544 		/* bogus lengths should have been caught by ip_dooptions */
545 		if (optlen > cnt)
546 			optlen = cnt;
547 		if (IPOPT_COPIED(opt)) {
548 			(void)memcpy(dp, cp, (unsigned)optlen);
549 			dp += optlen;
550 		}
551 	}
552 	for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
553 		*dp++ = IPOPT_EOL;
554 	return (optlen);
555 }
556 
557 /*
558  * IP socket option processing.
559  */
560 int
561 ip_ctloutput(op, so, level, optname, mp)
562 	int op;
563 	struct socket *so;
564 	int level, optname;
565 	struct mbuf **mp;
566 {
567 	register struct inpcb *inp = sotoinpcb(so);
568 	register struct mbuf *m = *mp;
569 	register int optval = 0;
570 	int error = 0;
571 
572 	if (level != IPPROTO_IP) {
573 		error = EINVAL;
574 		if (op == PRCO_SETOPT && *mp)
575 			(void) m_free(*mp);
576 	} else switch (op) {
577 
578 	case PRCO_SETOPT:
579 		switch (optname) {
580 		case IP_OPTIONS:
581 #ifdef notyet
582 		case IP_RETOPTS:
583 			return (ip_pcbopts(optname, &inp->inp_options, m));
584 #else
585 			return (ip_pcbopts(&inp->inp_options, m));
586 #endif
587 
588 		case IP_TOS:
589 		case IP_TTL:
590 		case IP_RECVOPTS:
591 		case IP_RECVRETOPTS:
592 		case IP_RECVDSTADDR:
593 			if (m == 0 || m->m_len != sizeof(int))
594 				error = EINVAL;
595 			else {
596 				optval = *mtod(m, int *);
597 				switch (optname) {
598 
599 				case IP_TOS:
600 					inp->inp_ip.ip_tos = optval;
601 					break;
602 
603 				case IP_TTL:
604 					inp->inp_ip.ip_ttl = optval;
605 					break;
606 #define	OPTSET(bit) \
607 	if (optval) \
608 		inp->inp_flags |= bit; \
609 	else \
610 		inp->inp_flags &= ~bit;
611 
612 				case IP_RECVOPTS:
613 					OPTSET(INP_RECVOPTS);
614 					break;
615 
616 				case IP_RECVRETOPTS:
617 					OPTSET(INP_RECVRETOPTS);
618 					break;
619 
620 				case IP_RECVDSTADDR:
621 					OPTSET(INP_RECVDSTADDR);
622 					break;
623 				}
624 			}
625 			break;
626 #undef OPTSET
627 
628 		case IP_MULTICAST_IF:
629 		case IP_MULTICAST_VIF:
630 		case IP_MULTICAST_TTL:
631 		case IP_MULTICAST_LOOP:
632 		case IP_ADD_MEMBERSHIP:
633 		case IP_DROP_MEMBERSHIP:
634 			error = ip_setmoptions(optname, &inp->inp_moptions, m);
635 			break;
636 
637 		case IP_PORTRANGE:
638 			if (m == 0 || m->m_len != sizeof(int))
639 				error = EINVAL;
640 			else {
641 				optval = *mtod(m, int *);
642 
643 				switch (optval) {
644 
645 				case IP_PORTRANGE_DEFAULT:
646 					inp->inp_flags &= ~(INP_LOWPORT);
647 					inp->inp_flags &= ~(INP_HIGHPORT);
648 					break;
649 
650 				case IP_PORTRANGE_HIGH:
651 					inp->inp_flags &= ~(INP_LOWPORT);
652 					inp->inp_flags |= INP_HIGHPORT;
653 					break;
654 
655 				case IP_PORTRANGE_LOW:
656 					inp->inp_flags &= ~(INP_HIGHPORT);
657 					inp->inp_flags |= INP_LOWPORT;
658 					break;
659 
660 				default:
661 					error = EINVAL;
662 					break;
663 				}
664 			}
665 
666 		default:
667 			error = ENOPROTOOPT;
668 			break;
669 		}
670 		if (m)
671 			(void)m_free(m);
672 		break;
673 
674 	case PRCO_GETOPT:
675 		switch (optname) {
676 		case IP_OPTIONS:
677 		case IP_RETOPTS:
678 			*mp = m = m_get(M_WAIT, MT_SOOPTS);
679 			if (inp->inp_options) {
680 				m->m_len = inp->inp_options->m_len;
681 				(void)memcpy(mtod(m, void *),
682 				    mtod(inp->inp_options, void *), (unsigned)m->m_len);
683 			} else
684 				m->m_len = 0;
685 			break;
686 
687 		case IP_TOS:
688 		case IP_TTL:
689 		case IP_RECVOPTS:
690 		case IP_RECVRETOPTS:
691 		case IP_RECVDSTADDR:
692 			*mp = m = m_get(M_WAIT, MT_SOOPTS);
693 			m->m_len = sizeof(int);
694 			switch (optname) {
695 
696 			case IP_TOS:
697 				optval = inp->inp_ip.ip_tos;
698 				break;
699 
700 			case IP_TTL:
701 				optval = inp->inp_ip.ip_ttl;
702 				break;
703 
704 #define	OPTBIT(bit)	(inp->inp_flags & bit ? 1 : 0)
705 
706 			case IP_RECVOPTS:
707 				optval = OPTBIT(INP_RECVOPTS);
708 				break;
709 
710 			case IP_RECVRETOPTS:
711 				optval = OPTBIT(INP_RECVRETOPTS);
712 				break;
713 
714 			case IP_RECVDSTADDR:
715 				optval = OPTBIT(INP_RECVDSTADDR);
716 				break;
717 			}
718 			*mtod(m, int *) = optval;
719 			break;
720 
721 		case IP_MULTICAST_IF:
722 		case IP_MULTICAST_VIF:
723 		case IP_MULTICAST_TTL:
724 		case IP_MULTICAST_LOOP:
725 		case IP_ADD_MEMBERSHIP:
726 		case IP_DROP_MEMBERSHIP:
727 			error = ip_getmoptions(optname, inp->inp_moptions, mp);
728 			break;
729 
730 		case IP_PORTRANGE:
731 			*mp = m = m_get(M_WAIT, MT_SOOPTS);
732 			m->m_len = sizeof(int);
733 
734 			if (inp->inp_flags & INP_HIGHPORT)
735 				optval = IP_PORTRANGE_HIGH;
736 			else if (inp->inp_flags & INP_LOWPORT)
737 				optval = IP_PORTRANGE_LOW;
738 			else
739 				optval = 0;
740 
741 			*mtod(m, int *) = optval;
742 			break;
743 
744 		default:
745 			error = ENOPROTOOPT;
746 			break;
747 		}
748 		break;
749 	}
750 	return (error);
751 }
752 
753 /*
754  * Set up IP options in pcb for insertion in output packets.
755  * Store in mbuf with pointer in pcbopt, adding pseudo-option
756  * with destination address if source routed.
757  */
758 static int
759 #ifdef notyet
760 ip_pcbopts(optname, pcbopt, m)
761 	int optname;
762 #else
763 ip_pcbopts(pcbopt, m)
764 #endif
765 	struct mbuf **pcbopt;
766 	register struct mbuf *m;
767 {
768 	register cnt, optlen;
769 	register u_char *cp;
770 	u_char opt;
771 
772 	/* turn off any old options */
773 	if (*pcbopt)
774 		(void)m_free(*pcbopt);
775 	*pcbopt = 0;
776 	if (m == (struct mbuf *)0 || m->m_len == 0) {
777 		/*
778 		 * Only turning off any previous options.
779 		 */
780 		if (m)
781 			(void)m_free(m);
782 		return (0);
783 	}
784 
785 #ifndef	vax
786 	if (m->m_len % sizeof(long))
787 		goto bad;
788 #endif
789 	/*
790 	 * IP first-hop destination address will be stored before
791 	 * actual options; move other options back
792 	 * and clear it when none present.
793 	 */
794 	if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
795 		goto bad;
796 	cnt = m->m_len;
797 	m->m_len += sizeof(struct in_addr);
798 	cp = mtod(m, u_char *) + sizeof(struct in_addr);
799 	ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt);
800 	bzero(mtod(m, caddr_t), sizeof(struct in_addr));
801 
802 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
803 		opt = cp[IPOPT_OPTVAL];
804 		if (opt == IPOPT_EOL)
805 			break;
806 		if (opt == IPOPT_NOP)
807 			optlen = 1;
808 		else {
809 			optlen = cp[IPOPT_OLEN];
810 			if (optlen <= IPOPT_OLEN || optlen > cnt)
811 				goto bad;
812 		}
813 		switch (opt) {
814 
815 		default:
816 			break;
817 
818 		case IPOPT_LSRR:
819 		case IPOPT_SSRR:
820 			/*
821 			 * user process specifies route as:
822 			 *	->A->B->C->D
823 			 * D must be our final destination (but we can't
824 			 * check that since we may not have connected yet).
825 			 * A is first hop destination, which doesn't appear in
826 			 * actual IP option, but is stored before the options.
827 			 */
828 			if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
829 				goto bad;
830 			m->m_len -= sizeof(struct in_addr);
831 			cnt -= sizeof(struct in_addr);
832 			optlen -= sizeof(struct in_addr);
833 			cp[IPOPT_OLEN] = optlen;
834 			/*
835 			 * Move first hop before start of options.
836 			 */
837 			bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
838 			    sizeof(struct in_addr));
839 			/*
840 			 * Then copy rest of options back
841 			 * to close up the deleted entry.
842 			 */
843 			ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] +
844 			    sizeof(struct in_addr)),
845 			    (caddr_t)&cp[IPOPT_OFFSET+1],
846 			    (unsigned)cnt + sizeof(struct in_addr));
847 			break;
848 		}
849 	}
850 	if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
851 		goto bad;
852 	*pcbopt = m;
853 	return (0);
854 
855 bad:
856 	(void)m_free(m);
857 	return (EINVAL);
858 }
859 
860 /*
861  * Set the IP multicast options in response to user setsockopt().
862  */
863 static int
864 ip_setmoptions(optname, imop, m)
865 	int optname;
866 	struct ip_moptions **imop;
867 	struct mbuf *m;
868 {
869 	register int error = 0;
870 	u_char loop;
871 	register int i;
872 	struct in_addr addr;
873 	register struct ip_mreq *mreq;
874 	register struct ifnet *ifp;
875 	register struct ip_moptions *imo = *imop;
876 	struct route ro;
877 	register struct sockaddr_in *dst;
878 	int s;
879 
880 	if (imo == NULL) {
881 		/*
882 		 * No multicast option buffer attached to the pcb;
883 		 * allocate one and initialize to default values.
884 		 */
885 		imo = (struct ip_moptions*)malloc(sizeof(*imo), M_IPMOPTS,
886 		    M_WAITOK);
887 
888 		if (imo == NULL)
889 			return (ENOBUFS);
890 		*imop = imo;
891 		imo->imo_multicast_ifp = NULL;
892 		imo->imo_multicast_vif = -1;
893 		imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
894 		imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
895 		imo->imo_num_memberships = 0;
896 	}
897 
898 	switch (optname) {
899 	/* store an index number for the vif you wanna use in the send */
900 	case IP_MULTICAST_VIF:
901 		if (!legal_vif_num) {
902 			error = EOPNOTSUPP;
903 			break;
904 		}
905 		if (m == NULL || m->m_len != sizeof(int)) {
906 			error = EINVAL;
907 			break;
908 		}
909 		i = *(mtod(m, int *));
910 		if (!legal_vif_num(i) && (i != -1)) {
911 			error = EINVAL;
912 			break;
913 		}
914 		imo->imo_multicast_vif = i;
915 		break;
916 
917 	case IP_MULTICAST_IF:
918 		/*
919 		 * Select the interface for outgoing multicast packets.
920 		 */
921 		if (m == NULL || m->m_len != sizeof(struct in_addr)) {
922 			error = EINVAL;
923 			break;
924 		}
925 		addr = *(mtod(m, struct in_addr *));
926 		/*
927 		 * INADDR_ANY is used to remove a previous selection.
928 		 * When no interface is selected, a default one is
929 		 * chosen every time a multicast packet is sent.
930 		 */
931 		if (addr.s_addr == INADDR_ANY) {
932 			imo->imo_multicast_ifp = NULL;
933 			break;
934 		}
935 		/*
936 		 * The selected interface is identified by its local
937 		 * IP address.  Find the interface and confirm that
938 		 * it supports multicasting.
939 		 */
940 		s = splimp();
941 		INADDR_TO_IFP(addr, ifp);
942 		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
943 			splx(s);
944 			error = EADDRNOTAVAIL;
945 			break;
946 		}
947 		imo->imo_multicast_ifp = ifp;
948 		splx(s);
949 		break;
950 
951 	case IP_MULTICAST_TTL:
952 		/*
953 		 * Set the IP time-to-live for outgoing multicast packets.
954 		 */
955 		if (m == NULL || m->m_len != 1) {
956 			error = EINVAL;
957 			break;
958 		}
959 		imo->imo_multicast_ttl = *(mtod(m, u_char *));
960 		break;
961 
962 	case IP_MULTICAST_LOOP:
963 		/*
964 		 * Set the loopback flag for outgoing multicast packets.
965 		 * Must be zero or one.
966 		 */
967 		if (m == NULL || m->m_len != 1 ||
968 		   (loop = *(mtod(m, u_char *))) > 1) {
969 			error = EINVAL;
970 			break;
971 		}
972 		imo->imo_multicast_loop = loop;
973 		break;
974 
975 	case IP_ADD_MEMBERSHIP:
976 		/*
977 		 * Add a multicast group membership.
978 		 * Group must be a valid IP multicast address.
979 		 */
980 		if (m == NULL || m->m_len != sizeof(struct ip_mreq)) {
981 			error = EINVAL;
982 			break;
983 		}
984 		mreq = mtod(m, struct ip_mreq *);
985 		if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) {
986 			error = EINVAL;
987 			break;
988 		}
989 		s = splimp();
990 		/*
991 		 * If no interface address was provided, use the interface of
992 		 * the route to the given multicast address.
993 		 */
994 		if (mreq->imr_interface.s_addr == INADDR_ANY) {
995 			bzero((caddr_t)&ro, sizeof(ro));
996 			dst = (struct sockaddr_in *)&ro.ro_dst;
997 			dst->sin_len = sizeof(*dst);
998 			dst->sin_family = AF_INET;
999 			dst->sin_addr = mreq->imr_multiaddr;
1000 			rtalloc(&ro);
1001 			if (ro.ro_rt == NULL) {
1002 				error = EADDRNOTAVAIL;
1003 				splx(s);
1004 				break;
1005 			}
1006 			ifp = ro.ro_rt->rt_ifp;
1007 			rtfree(ro.ro_rt);
1008 		}
1009 		else {
1010 			INADDR_TO_IFP(mreq->imr_interface, ifp);
1011 		}
1012 
1013 		/*
1014 		 * See if we found an interface, and confirm that it
1015 		 * supports multicast.
1016 		 */
1017 		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1018 			error = EADDRNOTAVAIL;
1019 			splx(s);
1020 			break;
1021 		}
1022 		/*
1023 		 * See if the membership already exists or if all the
1024 		 * membership slots are full.
1025 		 */
1026 		for (i = 0; i < imo->imo_num_memberships; ++i) {
1027 			if (imo->imo_membership[i]->inm_ifp == ifp &&
1028 			    imo->imo_membership[i]->inm_addr.s_addr
1029 						== mreq->imr_multiaddr.s_addr)
1030 				break;
1031 		}
1032 		if (i < imo->imo_num_memberships) {
1033 			error = EADDRINUSE;
1034 			splx(s);
1035 			break;
1036 		}
1037 		if (i == IP_MAX_MEMBERSHIPS) {
1038 			error = ETOOMANYREFS;
1039 			splx(s);
1040 			break;
1041 		}
1042 		/*
1043 		 * Everything looks good; add a new record to the multicast
1044 		 * address list for the given interface.
1045 		 */
1046 		if ((imo->imo_membership[i] =
1047 		    in_addmulti(&mreq->imr_multiaddr, ifp)) == NULL) {
1048 			error = ENOBUFS;
1049 			splx(s);
1050 			break;
1051 		}
1052 		++imo->imo_num_memberships;
1053 		splx(s);
1054 		break;
1055 
1056 	case IP_DROP_MEMBERSHIP:
1057 		/*
1058 		 * Drop a multicast group membership.
1059 		 * Group must be a valid IP multicast address.
1060 		 */
1061 		if (m == NULL || m->m_len != sizeof(struct ip_mreq)) {
1062 			error = EINVAL;
1063 			break;
1064 		}
1065 		mreq = mtod(m, struct ip_mreq *);
1066 		if (!IN_MULTICAST(ntohl(mreq->imr_multiaddr.s_addr))) {
1067 			error = EINVAL;
1068 			break;
1069 		}
1070 
1071 		s = splimp();
1072 		/*
1073 		 * If an interface address was specified, get a pointer
1074 		 * to its ifnet structure.
1075 		 */
1076 		if (mreq->imr_interface.s_addr == INADDR_ANY)
1077 			ifp = NULL;
1078 		else {
1079 			INADDR_TO_IFP(mreq->imr_interface, ifp);
1080 			if (ifp == NULL) {
1081 				error = EADDRNOTAVAIL;
1082 				splx(s);
1083 				break;
1084 			}
1085 		}
1086 		/*
1087 		 * Find the membership in the membership array.
1088 		 */
1089 		for (i = 0; i < imo->imo_num_memberships; ++i) {
1090 			if ((ifp == NULL ||
1091 			     imo->imo_membership[i]->inm_ifp == ifp) &&
1092 			     imo->imo_membership[i]->inm_addr.s_addr ==
1093 			     mreq->imr_multiaddr.s_addr)
1094 				break;
1095 		}
1096 		if (i == imo->imo_num_memberships) {
1097 			error = EADDRNOTAVAIL;
1098 			splx(s);
1099 			break;
1100 		}
1101 		/*
1102 		 * Give up the multicast address record to which the
1103 		 * membership points.
1104 		 */
1105 		in_delmulti(imo->imo_membership[i]);
1106 		/*
1107 		 * Remove the gap in the membership array.
1108 		 */
1109 		for (++i; i < imo->imo_num_memberships; ++i)
1110 			imo->imo_membership[i-1] = imo->imo_membership[i];
1111 		--imo->imo_num_memberships;
1112 		splx(s);
1113 		break;
1114 
1115 	default:
1116 		error = EOPNOTSUPP;
1117 		break;
1118 	}
1119 
1120 	/*
1121 	 * If all options have default values, no need to keep the mbuf.
1122 	 */
1123 	if (imo->imo_multicast_ifp == NULL &&
1124 	    imo->imo_multicast_vif == -1 &&
1125 	    imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
1126 	    imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
1127 	    imo->imo_num_memberships == 0) {
1128 		free(*imop, M_IPMOPTS);
1129 		*imop = NULL;
1130 	}
1131 
1132 	return (error);
1133 }
1134 
1135 /*
1136  * Return the IP multicast options in response to user getsockopt().
1137  */
1138 static int
1139 ip_getmoptions(optname, imo, mp)
1140 	int optname;
1141 	register struct ip_moptions *imo;
1142 	register struct mbuf **mp;
1143 {
1144 	u_char *ttl;
1145 	u_char *loop;
1146 	struct in_addr *addr;
1147 	struct in_ifaddr *ia;
1148 
1149 	*mp = m_get(M_WAIT, MT_SOOPTS);
1150 
1151 	switch (optname) {
1152 
1153 	case IP_MULTICAST_VIF:
1154 		if (imo != NULL)
1155 			*(mtod(*mp, int *)) = imo->imo_multicast_vif;
1156 		else
1157 			*(mtod(*mp, int *)) = -1;
1158 		(*mp)->m_len = sizeof(int);
1159 		return(0);
1160 
1161 	case IP_MULTICAST_IF:
1162 		addr = mtod(*mp, struct in_addr *);
1163 		(*mp)->m_len = sizeof(struct in_addr);
1164 		if (imo == NULL || imo->imo_multicast_ifp == NULL)
1165 			addr->s_addr = INADDR_ANY;
1166 		else {
1167 			IFP_TO_IA(imo->imo_multicast_ifp, ia);
1168 			addr->s_addr = (ia == NULL) ? INADDR_ANY
1169 					: IA_SIN(ia)->sin_addr.s_addr;
1170 		}
1171 		return (0);
1172 
1173 	case IP_MULTICAST_TTL:
1174 		ttl = mtod(*mp, u_char *);
1175 		(*mp)->m_len = 1;
1176 		*ttl = (imo == NULL) ? IP_DEFAULT_MULTICAST_TTL
1177 				     : imo->imo_multicast_ttl;
1178 		return (0);
1179 
1180 	case IP_MULTICAST_LOOP:
1181 		loop = mtod(*mp, u_char *);
1182 		(*mp)->m_len = 1;
1183 		*loop = (imo == NULL) ? IP_DEFAULT_MULTICAST_LOOP
1184 				      : imo->imo_multicast_loop;
1185 		return (0);
1186 
1187 	default:
1188 		return (EOPNOTSUPP);
1189 	}
1190 }
1191 
1192 /*
1193  * Discard the IP multicast options.
1194  */
1195 void
1196 ip_freemoptions(imo)
1197 	register struct ip_moptions *imo;
1198 {
1199 	register int i;
1200 
1201 	if (imo != NULL) {
1202 		for (i = 0; i < imo->imo_num_memberships; ++i)
1203 			in_delmulti(imo->imo_membership[i]);
1204 		free(imo, M_IPMOPTS);
1205 	}
1206 }
1207 
1208 /*
1209  * Routine called from ip_output() to loop back a copy of an IP multicast
1210  * packet to the input queue of a specified interface.  Note that this
1211  * calls the output routine of the loopback "driver", but with an interface
1212  * pointer that might NOT be a loopback interface -- evil, but easier than
1213  * replicating that code here.
1214  */
1215 static void
1216 ip_mloopback(ifp, m, dst)
1217 	struct ifnet *ifp;
1218 	register struct mbuf *m;
1219 	register struct sockaddr_in *dst;
1220 {
1221 	register struct ip *ip;
1222 	struct mbuf *copym;
1223 
1224 	copym = m_copy(m, 0, M_COPYALL);
1225 	if (copym != NULL) {
1226 		/*
1227 		 * We don't bother to fragment if the IP length is greater
1228 		 * than the interface's MTU.  Can this possibly matter?
1229 		 */
1230 		ip = mtod(copym, struct ip *);
1231 		ip->ip_len = htons((u_short)ip->ip_len);
1232 		ip->ip_off = htons((u_short)ip->ip_off);
1233 		ip->ip_sum = 0;
1234 		ip->ip_sum = in_cksum(copym, ip->ip_hl << 2);
1235 		(void) looutput(ifp, copym, (struct sockaddr *)dst, NULL);
1236 	}
1237 }
1238