xref: /freebsd/sys/netinet/ip_output.c (revision 17d6c636720d00f77e5d098daf4c278f89d84f7b)
1 /*
2  * Copyright (c) 1982, 1986, 1988, 1990, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)ip_output.c	8.3 (Berkeley) 1/21/94
34  * $FreeBSD$
35  */
36 
37 #define _IP_VHL
38 
39 #include "opt_ipfw.h"
40 #include "opt_ipdn.h"
41 #include "opt_ipdivert.h"
42 #include "opt_ipfilter.h"
43 #include "opt_ipsec.h"
44 #include "opt_pfil_hooks.h"
45 #include "opt_random_ip_id.h"
46 
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/kernel.h>
50 #include <sys/malloc.h>
51 #include <sys/mbuf.h>
52 #include <sys/protosw.h>
53 #include <sys/socket.h>
54 #include <sys/socketvar.h>
55 
56 #include <net/if.h>
57 #include <net/route.h>
58 
59 #include <netinet/in.h>
60 #include <netinet/in_systm.h>
61 #include <netinet/ip.h>
62 #include <netinet/in_pcb.h>
63 #include <netinet/in_var.h>
64 #include <netinet/ip_var.h>
65 
66 #include <machine/in_cksum.h>
67 
68 static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options");
69 
70 #ifdef IPSEC
71 #include <netinet6/ipsec.h>
72 #include <netkey/key.h>
73 #ifdef IPSEC_DEBUG
74 #include <netkey/key_debug.h>
75 #else
76 #define	KEYDEBUG(lev,arg)
77 #endif
78 #endif /*IPSEC*/
79 
80 #include <netinet/ip_fw.h>
81 #include <netinet/ip_dummynet.h>
82 
83 #ifdef IPFIREWALL_FORWARD_DEBUG
84 #define print_ip(a)	 printf("%ld.%ld.%ld.%ld",(ntohl(a.s_addr)>>24)&0xFF,\
85 				 		  (ntohl(a.s_addr)>>16)&0xFF,\
86 						  (ntohl(a.s_addr)>>8)&0xFF,\
87 						  (ntohl(a.s_addr))&0xFF);
88 #endif
89 
90 u_short ip_id;
91 
92 static struct mbuf *ip_insertoptions __P((struct mbuf *, struct mbuf *, int *));
93 static struct ifnet *ip_multicast_if __P((struct in_addr *, int *));
94 static void	ip_mloopback
95 	__P((struct ifnet *, struct mbuf *, struct sockaddr_in *, int));
96 static int	ip_getmoptions
97 	__P((struct sockopt *, struct ip_moptions *));
98 static int	ip_pcbopts __P((int, struct mbuf **, struct mbuf *));
99 static int	ip_setmoptions
100 	__P((struct sockopt *, struct ip_moptions **));
101 
102 int	ip_optcopy __P((struct ip *, struct ip *));
103 
104 
105 extern	struct protosw inetsw[];
106 
107 /*
108  * IP output.  The packet in mbuf chain m contains a skeletal IP
109  * header (with len, off, ttl, proto, tos, src, dst).
110  * The mbuf chain containing the packet will be freed.
111  * The mbuf opt, if present, will not be freed.
112  */
113 int
114 ip_output(m0, opt, ro, flags, imo)
115 	struct mbuf *m0;
116 	struct mbuf *opt;
117 	struct route *ro;
118 	int flags;
119 	struct ip_moptions *imo;
120 {
121 	struct ip *ip, *mhip;
122 	struct ifnet *ifp;
123 	struct mbuf *m = m0;
124 	int hlen = sizeof (struct ip);
125 	int len, off, error = 0;
126 	struct route iproute;
127 	struct sockaddr_in *dst;
128 	struct in_ifaddr *ia;
129 	int isbroadcast, sw_csum;
130 	struct in_addr pkt_dst;
131 #ifdef IPSEC
132 	struct socket *so = NULL;
133 	struct secpolicy *sp = NULL;
134 #endif
135 	u_int16_t divert_cookie;		/* firewall cookie */
136 #ifdef PFIL_HOOKS
137 	struct packet_filter_hook *pfh;
138 	struct mbuf *m1;
139 	int rv;
140 #endif /* PFIL_HOOKS */
141 #ifdef IPFIREWALL_FORWARD
142 	int fwd_rewrite_src = 0;
143 #endif
144 	struct ip_fw *rule = NULL;
145 
146 #ifdef IPDIVERT
147 	/* Get and reset firewall cookie */
148 	divert_cookie = ip_divert_cookie;
149 	ip_divert_cookie = 0;
150 #else
151 	divert_cookie = 0;
152 #endif
153 
154         /*
155          * dummynet packet are prepended a vestigial mbuf with
156          * m_type = MT_DUMMYNET and m_data pointing to the matching
157          * rule.
158          */
159         if (m->m_type == MT_DUMMYNET) {
160             /*
161              * the packet was already tagged, so part of the
162              * processing was already done, and we need to go down.
163              * Get parameters from the header.
164              */
165             rule = (struct ip_fw *)(m->m_data) ;
166 	    opt = NULL ;
167 	    ro = & ( ((struct dn_pkt *)m)->ro ) ;
168 	    imo = NULL ;
169 	    dst = ((struct dn_pkt *)m)->dn_dst ;
170 	    ifp = ((struct dn_pkt *)m)->ifp ;
171 	    flags = ((struct dn_pkt *)m)->flags ;
172 
173             m0 = m = m->m_next ;
174 #ifdef IPSEC
175 	    so = ipsec_getsocket(m);
176 	    (void)ipsec_setsocket(m, NULL);
177 #endif
178             ip = mtod(m, struct ip *);
179             hlen = IP_VHL_HL(ip->ip_vhl) << 2 ;
180             ia = ifatoia(ro->ro_rt->rt_ifa);
181             goto sendit;
182         } else
183             rule = NULL ;
184 #ifdef IPSEC
185 	so = ipsec_getsocket(m);
186 	(void)ipsec_setsocket(m, NULL);
187 #endif
188 
189 #ifdef	DIAGNOSTIC
190 	if ((m->m_flags & M_PKTHDR) == 0)
191 		panic("ip_output no HDR");
192 #endif
193 	if (opt) {
194 		m = ip_insertoptions(m, opt, &len);
195 		hlen = len;
196 	}
197 	ip = mtod(m, struct ip *);
198 	pkt_dst = ip_fw_fwd_addr == NULL
199 		? ip->ip_dst : ip_fw_fwd_addr->sin_addr;
200 
201 	/*
202 	 * Fill in IP header.
203 	 */
204 	if ((flags & (IP_FORWARDING|IP_RAWOUTPUT)) == 0) {
205 		ip->ip_vhl = IP_MAKE_VHL(IPVERSION, hlen >> 2);
206 		ip->ip_off &= IP_DF;
207 #ifdef RANDOM_IP_ID
208 		ip->ip_id = ip_randomid();
209 #else
210 		ip->ip_id = htons(ip_id++);
211 #endif
212 		ipstat.ips_localout++;
213 	} else {
214 		hlen = IP_VHL_HL(ip->ip_vhl) << 2;
215 	}
216 
217 	/* Route packet. */
218 	if (ro == NULL) {
219 		ro = &iproute;
220 		bzero(ro, sizeof(*ro));
221 	}
222 	dst = (struct sockaddr_in *)&ro->ro_dst;
223 	/*
224 	 * If there is a cached route,
225 	 * check that it is to the same destination
226 	 * and is still up.  If not, free it and try again.
227 	 */
228 	if (ro->ro_rt && ((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
229 	   dst->sin_addr.s_addr != pkt_dst.s_addr)) {
230 		RTFREE(ro->ro_rt);
231 		ro->ro_rt = (struct rtentry *)0;
232 	}
233 	if (ro->ro_rt == 0) {
234 		dst->sin_family = AF_INET;
235 		dst->sin_len = sizeof(*dst);
236 		dst->sin_addr = pkt_dst;
237 	}
238 	/*
239 	 * If routing to interface only,
240 	 * short circuit routing lookup.
241 	 */
242 	if (flags & IP_ROUTETOIF) {
243 		if ((ia = ifatoia(ifa_ifwithdstaddr(sintosa(dst)))) == 0 &&
244 		    (ia = ifatoia(ifa_ifwithnet(sintosa(dst)))) == 0) {
245 			ipstat.ips_noroute++;
246 			error = ENETUNREACH;
247 			goto bad;
248 		}
249 		ifp = ia->ia_ifp;
250 		ip->ip_ttl = 1;
251 		isbroadcast = in_broadcast(dst->sin_addr, ifp);
252 	} else if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) &&
253 	    imo != NULL && imo->imo_multicast_ifp != NULL) {
254 		/*
255 		 * Bypass the normal routing lookup for multicast
256 		 * packets if the interface is specified.
257 		 */
258 		ifp = imo->imo_multicast_ifp;
259 		IFP_TO_IA(ifp, ia);
260 		isbroadcast = 0;	/* fool gcc */
261 	} else {
262 		/*
263 		 * If this is the case, we probably don't want to allocate
264 		 * a protocol-cloned route since we didn't get one from the
265 		 * ULP.  This lets TCP do its thing, while not burdening
266 		 * forwarding or ICMP with the overhead of cloning a route.
267 		 * Of course, we still want to do any cloning requested by
268 		 * the link layer, as this is probably required in all cases
269 		 * for correct operation (as it is for ARP).
270 		 */
271 		if (ro->ro_rt == 0)
272 			rtalloc_ign(ro, RTF_PRCLONING);
273 		if (ro->ro_rt == 0) {
274 			ipstat.ips_noroute++;
275 			error = EHOSTUNREACH;
276 			goto bad;
277 		}
278 		ia = ifatoia(ro->ro_rt->rt_ifa);
279 		ifp = ro->ro_rt->rt_ifp;
280 		ro->ro_rt->rt_use++;
281 		if (ro->ro_rt->rt_flags & RTF_GATEWAY)
282 			dst = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
283 		if (ro->ro_rt->rt_flags & RTF_HOST)
284 			isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
285 		else
286 			isbroadcast = in_broadcast(dst->sin_addr, ifp);
287 	}
288 	if (IN_MULTICAST(ntohl(pkt_dst.s_addr))) {
289 		struct in_multi *inm;
290 
291 		m->m_flags |= M_MCAST;
292 		/*
293 		 * IP destination address is multicast.  Make sure "dst"
294 		 * still points to the address in "ro".  (It may have been
295 		 * changed to point to a gateway address, above.)
296 		 */
297 		dst = (struct sockaddr_in *)&ro->ro_dst;
298 		/*
299 		 * See if the caller provided any multicast options
300 		 */
301 		if (imo != NULL) {
302 			ip->ip_ttl = imo->imo_multicast_ttl;
303 			if (imo->imo_multicast_vif != -1)
304 				ip->ip_src.s_addr =
305 				    ip_mcast_src(imo->imo_multicast_vif);
306 		} else
307 			ip->ip_ttl = IP_DEFAULT_MULTICAST_TTL;
308 		/*
309 		 * Confirm that the outgoing interface supports multicast.
310 		 */
311 		if ((imo == NULL) || (imo->imo_multicast_vif == -1)) {
312 			if ((ifp->if_flags & IFF_MULTICAST) == 0) {
313 				ipstat.ips_noroute++;
314 				error = ENETUNREACH;
315 				goto bad;
316 			}
317 		}
318 		/*
319 		 * If source address not specified yet, use address
320 		 * of outgoing interface.
321 		 */
322 		if (ip->ip_src.s_addr == INADDR_ANY) {
323 			/* Interface may have no addresses. */
324 			if (ia != NULL)
325 				ip->ip_src = IA_SIN(ia)->sin_addr;
326 		}
327 
328 		if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
329 			/*
330 			 * XXX
331 			 * delayed checksums are not currently
332 			 * compatible with IP multicast routing
333 			 */
334 			if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
335 				in_delayed_cksum(m);
336 				m->m_pkthdr.csum_flags &=
337 					~CSUM_DELAY_DATA;
338 			}
339 		}
340 		IN_LOOKUP_MULTI(pkt_dst, ifp, inm);
341 		if (inm != NULL &&
342 		   (imo == NULL || imo->imo_multicast_loop)) {
343 			/*
344 			 * If we belong to the destination multicast group
345 			 * on the outgoing interface, and the caller did not
346 			 * forbid loopback, loop back a copy.
347 			 */
348 			ip_mloopback(ifp, m, dst, hlen);
349 		}
350 		else {
351 			/*
352 			 * If we are acting as a multicast router, perform
353 			 * multicast forwarding as if the packet had just
354 			 * arrived on the interface to which we are about
355 			 * to send.  The multicast forwarding function
356 			 * recursively calls this function, using the
357 			 * IP_FORWARDING flag to prevent infinite recursion.
358 			 *
359 			 * Multicasts that are looped back by ip_mloopback(),
360 			 * above, will be forwarded by the ip_input() routine,
361 			 * if necessary.
362 			 */
363 			if (ip_mrouter && (flags & IP_FORWARDING) == 0) {
364 				/*
365 				 * Check if rsvp daemon is running. If not, don't
366 				 * set ip_moptions. This ensures that the packet
367 				 * is multicast and not just sent down one link
368 				 * as prescribed by rsvpd.
369 				 */
370 				if (!rsvp_on)
371 				  imo = NULL;
372 				if (ip_mforward(ip, ifp, m, imo) != 0) {
373 					m_freem(m);
374 					goto done;
375 				}
376 			}
377 		}
378 
379 		/*
380 		 * Multicasts with a time-to-live of zero may be looped-
381 		 * back, above, but must not be transmitted on a network.
382 		 * Also, multicasts addressed to the loopback interface
383 		 * are not sent -- the above call to ip_mloopback() will
384 		 * loop back a copy if this host actually belongs to the
385 		 * destination group on the loopback interface.
386 		 */
387 		if (ip->ip_ttl == 0 || ifp->if_flags & IFF_LOOPBACK) {
388 			m_freem(m);
389 			goto done;
390 		}
391 
392 		goto sendit;
393 	}
394 #ifndef notdef
395 	/*
396 	 * If source address not specified yet, use address
397 	 * of outgoing interface.
398 	 */
399 	if (ip->ip_src.s_addr == INADDR_ANY) {
400 		/* Interface may have no addresses. */
401 		if (ia != NULL) {
402 			ip->ip_src = IA_SIN(ia)->sin_addr;
403 #ifdef IPFIREWALL_FORWARD
404 			/* Keep note that we did this - if the firewall changes
405 		 	* the next-hop, our interface may change, changing the
406 		 	* default source IP. It's a shame so much effort happens
407 		 	* twice. Oh well.
408 		 	*/
409 			fwd_rewrite_src++;
410 #endif /* IPFIREWALL_FORWARD */
411 		}
412 	}
413 #endif /* notdef */
414 	/*
415 	 * Verify that we have any chance at all of being able to queue
416 	 *      the packet or packet fragments
417 	 */
418 	if ((ifp->if_snd.ifq_len + ip->ip_len / ifp->if_mtu + 1) >=
419 		ifp->if_snd.ifq_maxlen) {
420 			error = ENOBUFS;
421 			ipstat.ips_odropped++;
422 			goto bad;
423 	}
424 
425 	/*
426 	 * Look for broadcast address and
427 	 * and verify user is allowed to send
428 	 * such a packet.
429 	 */
430 	if (isbroadcast) {
431 		if ((ifp->if_flags & IFF_BROADCAST) == 0) {
432 			error = EADDRNOTAVAIL;
433 			goto bad;
434 		}
435 		if ((flags & IP_ALLOWBROADCAST) == 0) {
436 			error = EACCES;
437 			goto bad;
438 		}
439 		/* don't allow broadcast messages to be fragmented */
440 		if ((u_short)ip->ip_len > ifp->if_mtu) {
441 			error = EMSGSIZE;
442 			goto bad;
443 		}
444 		m->m_flags |= M_BCAST;
445 	} else {
446 		m->m_flags &= ~M_BCAST;
447 	}
448 
449 sendit:
450 #ifdef IPSEC
451 	/* get SP for this packet */
452 	if (so == NULL)
453 		sp = ipsec4_getpolicybyaddr(m, IPSEC_DIR_OUTBOUND, flags, &error);
454 	else
455 		sp = ipsec4_getpolicybysock(m, IPSEC_DIR_OUTBOUND, so, &error);
456 
457 	if (sp == NULL) {
458 		ipsecstat.out_inval++;
459 		goto bad;
460 	}
461 
462 	error = 0;
463 
464 	/* check policy */
465 	switch (sp->policy) {
466 	case IPSEC_POLICY_DISCARD:
467 		/*
468 		 * This packet is just discarded.
469 		 */
470 		ipsecstat.out_polvio++;
471 		goto bad;
472 
473 	case IPSEC_POLICY_BYPASS:
474 	case IPSEC_POLICY_NONE:
475 		/* no need to do IPsec. */
476 		goto skip_ipsec;
477 
478 	case IPSEC_POLICY_IPSEC:
479 		if (sp->req == NULL) {
480 			/* acquire a policy */
481 			error = key_spdacquire(sp);
482 			goto bad;
483 		}
484 		break;
485 
486 	case IPSEC_POLICY_ENTRUST:
487 	default:
488 		printf("ip_output: Invalid policy found. %d\n", sp->policy);
489 	}
490     {
491 	struct ipsec_output_state state;
492 	bzero(&state, sizeof(state));
493 	state.m = m;
494 	if (flags & IP_ROUTETOIF) {
495 		state.ro = &iproute;
496 		bzero(&iproute, sizeof(iproute));
497 	} else
498 		state.ro = ro;
499 	state.dst = (struct sockaddr *)dst;
500 
501 	ip->ip_sum = 0;
502 
503 	/*
504 	 * XXX
505 	 * delayed checksums are not currently compatible with IPsec
506 	 */
507 	if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
508 		in_delayed_cksum(m);
509 		m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
510 	}
511 
512 	HTONS(ip->ip_len);
513 	HTONS(ip->ip_off);
514 
515 	error = ipsec4_output(&state, sp, flags);
516 
517 	m = state.m;
518 	if (flags & IP_ROUTETOIF) {
519 		/*
520 		 * if we have tunnel mode SA, we may need to ignore
521 		 * IP_ROUTETOIF.
522 		 */
523 		if (state.ro != &iproute || state.ro->ro_rt != NULL) {
524 			flags &= ~IP_ROUTETOIF;
525 			ro = state.ro;
526 		}
527 	} else
528 		ro = state.ro;
529 	dst = (struct sockaddr_in *)state.dst;
530 	if (error) {
531 		/* mbuf is already reclaimed in ipsec4_output. */
532 		m0 = NULL;
533 		switch (error) {
534 		case EHOSTUNREACH:
535 		case ENETUNREACH:
536 		case EMSGSIZE:
537 		case ENOBUFS:
538 		case ENOMEM:
539 			break;
540 		default:
541 			printf("ip4_output (ipsec): error code %d\n", error);
542 			/*fall through*/
543 		case ENOENT:
544 			/* don't show these error codes to the user */
545 			error = 0;
546 			break;
547 		}
548 		goto bad;
549 	}
550     }
551 
552 	/* be sure to update variables that are affected by ipsec4_output() */
553 	ip = mtod(m, struct ip *);
554 #ifdef _IP_VHL
555 	hlen = IP_VHL_HL(ip->ip_vhl) << 2;
556 #else
557 	hlen = ip->ip_hl << 2;
558 #endif
559 	if (ro->ro_rt == NULL) {
560 		if ((flags & IP_ROUTETOIF) == 0) {
561 			printf("ip_output: "
562 				"can't update route after IPsec processing\n");
563 			error = EHOSTUNREACH;	/*XXX*/
564 			goto bad;
565 		}
566 	} else {
567 		ia = ifatoia(ro->ro_rt->rt_ifa);
568 		ifp = ro->ro_rt->rt_ifp;
569 	}
570 
571 	/* make it flipped, again. */
572 	NTOHS(ip->ip_len);
573 	NTOHS(ip->ip_off);
574 skip_ipsec:
575 #endif /*IPSEC*/
576 
577 	/*
578 	 * IpHack's section.
579 	 * - Xlate: translate packet's addr/port (NAT).
580 	 * - Firewall: deny/allow/etc.
581 	 * - Wrap: fake packet's addr/port <unimpl.>
582 	 * - Encapsulate: put it in another IP and send out. <unimp.>
583 	 */
584 #ifdef PFIL_HOOKS
585 	/*
586 	 * Run through list of hooks for output packets.
587 	 */
588 	m1 = m;
589 	pfh = pfil_hook_get(PFIL_OUT, &inetsw[ip_protox[IPPROTO_IP]].pr_pfh);
590 	for (; pfh; pfh = TAILQ_NEXT(pfh, pfil_link))
591 		if (pfh->pfil_func) {
592 			rv = pfh->pfil_func(ip, hlen, ifp, 1, &m1);
593 			if (rv) {
594 				error = EHOSTUNREACH;
595 				goto done;
596 			}
597 			m = m1;
598 			if (m == NULL)
599 				goto done;
600 			ip = mtod(m, struct ip *);
601 		}
602 #endif /* PFIL_HOOKS */
603 
604 	/*
605 	 * Check with the firewall...
606 	 * but not if we are already being fwd'd from a firewall.
607 	 */
608 	if (fw_enable && IPFW_LOADED && !ip_fw_fwd_addr) {
609 		struct sockaddr_in *old = dst;
610 
611 		off = ip_fw_chk_ptr(&ip,
612 		    hlen, ifp, &divert_cookie, &m, &rule, &dst);
613                 /*
614                  * On return we must do the following:
615                  * m == NULL         -> drop the pkt (old interface, deprecated)
616                  * (off & IP_FW_PORT_DENY_FLAG)	-> drop the pkt (new interface)
617                  * 1<=off<= 0xffff		-> DIVERT
618                  * (off & IP_FW_PORT_DYNT_FLAG)	-> send to a DUMMYNET pipe
619                  * (off & IP_FW_PORT_TEE_FLAG)	-> TEE the packet
620                  * dst != old			-> IPFIREWALL_FORWARD
621                  * off==0, dst==old		-> accept
622                  * If some of the above modules are not compiled in, then
623                  * we should't have to check the corresponding condition
624                  * (because the ipfw control socket should not accept
625                  * unsupported rules), but better play safe and drop
626                  * packets in case of doubt.
627                  */
628 		if (off & IP_FW_PORT_DENY_FLAG) { /* XXX new interface-denied */
629 			if (m)
630 				m_freem(m);
631 			error = EACCES;
632 			goto done;
633 		}
634 		if (!m) {			/* firewall said to reject */
635 			static int __debug=10;
636 
637 			if (__debug > 0) {
638 				printf(
639 				    "firewall returns NULL, please update!\n");
640 				__debug--;
641 			}
642 			error = EACCES;
643 			goto done;
644 		}
645 		if (off == 0 && dst == old)		/* common case */
646 			goto pass;
647                 if (DUMMYNET_LOADED && (off & IP_FW_PORT_DYNT_FLAG) != 0) {
648 			/*
649 			 * pass the pkt to dummynet. Need to include
650 			 * pipe number, m, ifp, ro, dst because these are
651 			 * not recomputed in the next pass.
652 			 * All other parameters have been already used and
653 			 * so they are not needed anymore.
654 			 * XXX note: if the ifp or ro entry are deleted
655 			 * while a pkt is in dummynet, we are in trouble!
656 			 */
657 			error = ip_dn_io_ptr(off & 0xffff, DN_TO_IP_OUT, m,
658 			    ifp, ro, dst, rule, flags);
659 			goto done;
660 		}
661 #ifdef IPDIVERT
662 		if (off != 0 && (off & IP_FW_PORT_DYNT_FLAG) == 0) {
663 			struct mbuf *clone = NULL;
664 
665 			/* Clone packet if we're doing a 'tee' */
666 			if ((off & IP_FW_PORT_TEE_FLAG) != 0)
667 				clone = m_dup(m, M_DONTWAIT);
668 
669 			/*
670 			 * XXX
671 			 * delayed checksums are not currently compatible
672 			 * with divert sockets.
673 			 */
674 			if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
675 				in_delayed_cksum(m);
676 				m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
677 			}
678 
679 			/* Restore packet header fields to original values */
680 			HTONS(ip->ip_len);
681 			HTONS(ip->ip_off);
682 
683 			/* Deliver packet to divert input routine */
684 			ip_divert_cookie = divert_cookie;
685 			divert_packet(m, 0, off & 0xffff);
686 
687 			/* If 'tee', continue with original packet */
688 			if (clone != NULL) {
689 				m = clone;
690 				ip = mtod(m, struct ip *);
691 				goto pass;
692 			}
693 			goto done;
694 		}
695 #endif
696 
697 #ifdef IPFIREWALL_FORWARD
698 		/* Here we check dst to make sure it's directly reachable on the
699 		 * interface we previously thought it was.
700 		 * If it isn't (which may be likely in some situations) we have
701 		 * to re-route it (ie, find a route for the next-hop and the
702 		 * associated interface) and set them here. This is nested
703 		 * forwarding which in most cases is undesirable, except where
704 		 * such control is nigh impossible. So we do it here.
705 		 * And I'm babbling.
706 		 */
707 		if (off == 0 && old != dst) {
708 			struct in_ifaddr *ia;
709 
710 			/* It's changed... */
711 			/* There must be a better way to do this next line... */
712 			static struct route sro_fwd, *ro_fwd = &sro_fwd;
713 #ifdef IPFIREWALL_FORWARD_DEBUG
714 			printf("IPFIREWALL_FORWARD: New dst ip: ");
715 			print_ip(dst->sin_addr);
716 			printf("\n");
717 #endif
718 			/*
719 			 * We need to figure out if we have been forwarded
720 			 * to a local socket. If so then we should somehow
721 			 * "loop back" to ip_input, and get directed to the
722 			 * PCB as if we had received this packet. This is
723 			 * because it may be dificult to identify the packets
724 			 * you want to forward until they are being output
725 			 * and have selected an interface. (e.g. locally
726 			 * initiated packets) If we used the loopback inteface,
727 			 * we would not be able to control what happens
728 			 * as the packet runs through ip_input() as
729 			 * it is done through a ISR.
730 			 */
731 			LIST_FOREACH(ia,
732 			    INADDR_HASH(dst->sin_addr.s_addr), ia_hash) {
733 				/*
734 				 * If the addr to forward to is one
735 				 * of ours, we pretend to
736 				 * be the destination for this packet.
737 				 */
738 				if (IA_SIN(ia)->sin_addr.s_addr ==
739 						 dst->sin_addr.s_addr)
740 					break;
741 			}
742 			if (ia) {
743 				/* tell ip_input "dont filter" */
744 				ip_fw_fwd_addr = dst;
745 				if (m->m_pkthdr.rcvif == NULL)
746 					m->m_pkthdr.rcvif = ifunit("lo0");
747 				if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
748 					m->m_pkthdr.csum_flags |=
749 					    CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
750 					m0->m_pkthdr.csum_data = 0xffff;
751 				}
752 				m->m_pkthdr.csum_flags |=
753 				    CSUM_IP_CHECKED | CSUM_IP_VALID;
754 				HTONS(ip->ip_len);
755 				HTONS(ip->ip_off);
756 				ip_input(m);
757 				goto done;
758 			}
759 			/* Some of the logic for this was
760 			 * nicked from above.
761 			 *
762 			 * This rewrites the cached route in a local PCB.
763 			 * Is this what we want to do?
764 			 */
765 			bcopy(dst, &ro_fwd->ro_dst, sizeof(*dst));
766 
767 			ro_fwd->ro_rt = 0;
768 			rtalloc_ign(ro_fwd, RTF_PRCLONING);
769 
770 			if (ro_fwd->ro_rt == 0) {
771 				ipstat.ips_noroute++;
772 				error = EHOSTUNREACH;
773 				goto bad;
774 			}
775 
776 			ia = ifatoia(ro_fwd->ro_rt->rt_ifa);
777 			ifp = ro_fwd->ro_rt->rt_ifp;
778 			ro_fwd->ro_rt->rt_use++;
779 			if (ro_fwd->ro_rt->rt_flags & RTF_GATEWAY)
780 				dst = (struct sockaddr_in *)ro_fwd->ro_rt->rt_gateway;
781 			if (ro_fwd->ro_rt->rt_flags & RTF_HOST)
782 				isbroadcast =
783 				    (ro_fwd->ro_rt->rt_flags & RTF_BROADCAST);
784 			else
785 				isbroadcast = in_broadcast(dst->sin_addr, ifp);
786 			if (ro->ro_rt)
787 				RTFREE(ro->ro_rt);
788 			ro->ro_rt = ro_fwd->ro_rt;
789 			dst = (struct sockaddr_in *)&ro_fwd->ro_dst;
790 
791 			/*
792 			 * If we added a default src ip earlier,
793 			 * which would have been gotten from the-then
794 			 * interface, do it again, from the new one.
795 			 */
796 			if (fwd_rewrite_src)
797 				ip->ip_src = IA_SIN(ia)->sin_addr;
798 			goto pass ;
799 		}
800 #endif /* IPFIREWALL_FORWARD */
801                 /*
802                  * if we get here, none of the above matches, and
803                  * we have to drop the pkt
804                  */
805 		m_freem(m);
806                 error = EACCES; /* not sure this is the right error msg */
807                 goto done;
808 	}
809 
810 	ip_fw_fwd_addr = NULL;
811 pass:
812 	m->m_pkthdr.csum_flags |= CSUM_IP;
813 	sw_csum = m->m_pkthdr.csum_flags & ~ifp->if_hwassist;
814 	if (sw_csum & CSUM_DELAY_DATA) {
815 		in_delayed_cksum(m);
816 		sw_csum &= ~CSUM_DELAY_DATA;
817 	}
818 	m->m_pkthdr.csum_flags &= ifp->if_hwassist;
819 
820 	/*
821 	 * If small enough for interface, or the interface will take
822 	 * care of the fragmentation for us, can just send directly.
823 	 */
824 	if ((u_short)ip->ip_len <= ifp->if_mtu ||
825 	    ifp->if_hwassist & CSUM_FRAGMENT) {
826 		HTONS(ip->ip_len);
827 		HTONS(ip->ip_off);
828 		ip->ip_sum = 0;
829 		if (sw_csum & CSUM_DELAY_IP) {
830 			if (ip->ip_vhl == IP_VHL_BORING) {
831 				ip->ip_sum = in_cksum_hdr(ip);
832 			} else {
833 				ip->ip_sum = in_cksum(m, hlen);
834 			}
835 		}
836 
837 		/* Record statistics for this interface address. */
838 		if (!(flags & IP_FORWARDING) && ia) {
839 			ia->ia_ifa.if_opackets++;
840 			ia->ia_ifa.if_obytes += m->m_pkthdr.len;
841 		}
842 
843 #ifdef IPSEC
844 		/* clean ipsec history once it goes out of the node */
845 		ipsec_delaux(m);
846 #endif
847 
848 		error = (*ifp->if_output)(ifp, m,
849 				(struct sockaddr *)dst, ro->ro_rt);
850 		goto done;
851 	}
852 	/*
853 	 * Too large for interface; fragment if possible.
854 	 * Must be able to put at least 8 bytes per fragment.
855 	 */
856 	if (ip->ip_off & IP_DF) {
857 		error = EMSGSIZE;
858 		/*
859 		 * This case can happen if the user changed the MTU
860 		 * of an interface after enabling IP on it.  Because
861 		 * most netifs don't keep track of routes pointing to
862 		 * them, there is no way for one to update all its
863 		 * routes when the MTU is changed.
864 		 */
865 		if ((ro->ro_rt->rt_flags & (RTF_UP | RTF_HOST))
866 		    && !(ro->ro_rt->rt_rmx.rmx_locks & RTV_MTU)
867 		    && (ro->ro_rt->rt_rmx.rmx_mtu > ifp->if_mtu)) {
868 			ro->ro_rt->rt_rmx.rmx_mtu = ifp->if_mtu;
869 		}
870 		ipstat.ips_cantfrag++;
871 		goto bad;
872 	}
873 	len = (ifp->if_mtu - hlen) &~ 7;
874 	if (len < 8) {
875 		error = EMSGSIZE;
876 		goto bad;
877 	}
878 
879 	/*
880 	 * if the interface will not calculate checksums on
881 	 * fragmented packets, then do it here.
882 	 */
883 	if (m->m_pkthdr.csum_flags & CSUM_DELAY_DATA &&
884 	    (ifp->if_hwassist & CSUM_IP_FRAGS) == 0) {
885 		in_delayed_cksum(m);
886 		m->m_pkthdr.csum_flags &= ~CSUM_DELAY_DATA;
887 	}
888 
889     {
890 	int mhlen, firstlen = len;
891 	struct mbuf **mnext = &m->m_nextpkt;
892 	int nfrags = 1;
893 
894 	/*
895 	 * Loop through length of segment after first fragment,
896 	 * make new header and copy data of each part and link onto chain.
897 	 */
898 	m0 = m;
899 	mhlen = sizeof (struct ip);
900 	for (off = hlen + len; off < (u_short)ip->ip_len; off += len) {
901 		MGETHDR(m, M_DONTWAIT, MT_HEADER);
902 		if (m == 0) {
903 			error = ENOBUFS;
904 			ipstat.ips_odropped++;
905 			goto sendorfree;
906 		}
907 		m->m_flags |= (m0->m_flags & M_MCAST) | M_FRAG;
908 		m->m_data += max_linkhdr;
909 		mhip = mtod(m, struct ip *);
910 		*mhip = *ip;
911 		if (hlen > sizeof (struct ip)) {
912 			mhlen = ip_optcopy(ip, mhip) + sizeof (struct ip);
913 			mhip->ip_vhl = IP_MAKE_VHL(IPVERSION, mhlen >> 2);
914 		}
915 		m->m_len = mhlen;
916 		mhip->ip_off = ((off - hlen) >> 3) + ip->ip_off;
917 		if (off + len >= (u_short)ip->ip_len)
918 			len = (u_short)ip->ip_len - off;
919 		else
920 			mhip->ip_off |= IP_MF;
921 		mhip->ip_len = htons((u_short)(len + mhlen));
922 		m->m_next = m_copy(m0, off, len);
923 		if (m->m_next == 0) {
924 			(void) m_free(m);
925 			error = ENOBUFS;	/* ??? */
926 			ipstat.ips_odropped++;
927 			goto sendorfree;
928 		}
929 		m->m_pkthdr.len = mhlen + len;
930 		m->m_pkthdr.rcvif = (struct ifnet *)0;
931 		m->m_pkthdr.csum_flags = m0->m_pkthdr.csum_flags;
932 		HTONS(mhip->ip_off);
933 		mhip->ip_sum = 0;
934 		if (sw_csum & CSUM_DELAY_IP) {
935 			if (mhip->ip_vhl == IP_VHL_BORING) {
936 				mhip->ip_sum = in_cksum_hdr(mhip);
937 			} else {
938 				mhip->ip_sum = in_cksum(m, mhlen);
939 			}
940 		}
941 		*mnext = m;
942 		mnext = &m->m_nextpkt;
943 		nfrags++;
944 	}
945 	ipstat.ips_ofragments += nfrags;
946 
947 	/* set first/last markers for fragment chain */
948 	m->m_flags |= M_LASTFRAG;
949 	m0->m_flags |= M_FIRSTFRAG | M_FRAG;
950 	m0->m_pkthdr.csum_data = nfrags;
951 
952 	/*
953 	 * Update first fragment by trimming what's been copied out
954 	 * and updating header, then send each fragment (in order).
955 	 */
956 	m = m0;
957 	m_adj(m, hlen + firstlen - (u_short)ip->ip_len);
958 	m->m_pkthdr.len = hlen + firstlen;
959 	ip->ip_len = htons((u_short)m->m_pkthdr.len);
960 	ip->ip_off |= IP_MF;
961 	HTONS(ip->ip_off);
962 	ip->ip_sum = 0;
963 	if (sw_csum & CSUM_DELAY_IP) {
964 		if (ip->ip_vhl == IP_VHL_BORING) {
965 			ip->ip_sum = in_cksum_hdr(ip);
966 		} else {
967 			ip->ip_sum = in_cksum(m, hlen);
968 		}
969 	}
970 sendorfree:
971 	for (m = m0; m; m = m0) {
972 		m0 = m->m_nextpkt;
973 		m->m_nextpkt = 0;
974 #ifdef IPSEC
975 		/* clean ipsec history once it goes out of the node */
976 		ipsec_delaux(m);
977 #endif
978 		if (error == 0) {
979 			/* Record statistics for this interface address. */
980 			if (ia != NULL) {
981 				ia->ia_ifa.if_opackets++;
982 				ia->ia_ifa.if_obytes += m->m_pkthdr.len;
983 			}
984 
985 			error = (*ifp->if_output)(ifp, m,
986 			    (struct sockaddr *)dst, ro->ro_rt);
987 		} else
988 			m_freem(m);
989 	}
990 
991 	if (error == 0)
992 		ipstat.ips_fragmented++;
993     }
994 done:
995 #ifdef IPSEC
996 	if (ro == &iproute && ro->ro_rt) {
997 		RTFREE(ro->ro_rt);
998 		ro->ro_rt = NULL;
999 	}
1000 	if (sp != NULL) {
1001 		KEYDEBUG(KEYDEBUG_IPSEC_STAMP,
1002 			printf("DP ip_output call free SP:%p\n", sp));
1003 		key_freesp(sp);
1004 	}
1005 #endif /* IPSEC */
1006 	return (error);
1007 bad:
1008 	m_freem(m);
1009 	goto done;
1010 }
1011 
1012 void
1013 in_delayed_cksum(struct mbuf *m)
1014 {
1015 	struct ip *ip;
1016 	u_short csum, offset;
1017 
1018 	ip = mtod(m, struct ip *);
1019 	offset = IP_VHL_HL(ip->ip_vhl) << 2 ;
1020 	csum = in_cksum_skip(m, ip->ip_len, offset);
1021 	if (m->m_pkthdr.csum_flags & CSUM_UDP && csum == 0)
1022 		csum = 0xffff;
1023 	offset += m->m_pkthdr.csum_data;	/* checksum offset */
1024 
1025 	if (offset + sizeof(u_short) > m->m_len) {
1026 		printf("delayed m_pullup, m->len: %d  off: %d  p: %d\n",
1027 		    m->m_len, offset, ip->ip_p);
1028 		/*
1029 		 * XXX
1030 		 * this shouldn't happen, but if it does, the
1031 		 * correct behavior may be to insert the checksum
1032 		 * in the existing chain instead of rearranging it.
1033 		 */
1034 		m = m_pullup(m, offset + sizeof(u_short));
1035 	}
1036 	*(u_short *)(m->m_data + offset) = csum;
1037 }
1038 
1039 /*
1040  * Insert IP options into preformed packet.
1041  * Adjust IP destination as required for IP source routing,
1042  * as indicated by a non-zero in_addr at the start of the options.
1043  *
1044  * XXX This routine assumes that the packet has no options in place.
1045  */
1046 static struct mbuf *
1047 ip_insertoptions(m, opt, phlen)
1048 	register struct mbuf *m;
1049 	struct mbuf *opt;
1050 	int *phlen;
1051 {
1052 	register struct ipoption *p = mtod(opt, struct ipoption *);
1053 	struct mbuf *n;
1054 	register struct ip *ip = mtod(m, struct ip *);
1055 	unsigned optlen;
1056 
1057 	optlen = opt->m_len - sizeof(p->ipopt_dst);
1058 	if (optlen + (u_short)ip->ip_len > IP_MAXPACKET)
1059 		return (m);		/* XXX should fail */
1060 	if (p->ipopt_dst.s_addr)
1061 		ip->ip_dst = p->ipopt_dst;
1062 	if (m->m_flags & M_EXT || m->m_data - optlen < m->m_pktdat) {
1063 		MGETHDR(n, M_DONTWAIT, MT_HEADER);
1064 		if (n == 0)
1065 			return (m);
1066 		n->m_pkthdr.rcvif = (struct ifnet *)0;
1067 		n->m_pkthdr.len = m->m_pkthdr.len + optlen;
1068 		m->m_len -= sizeof(struct ip);
1069 		m->m_data += sizeof(struct ip);
1070 		n->m_next = m;
1071 		m = n;
1072 		m->m_len = optlen + sizeof(struct ip);
1073 		m->m_data += max_linkhdr;
1074 		(void)memcpy(mtod(m, void *), ip, sizeof(struct ip));
1075 	} else {
1076 		m->m_data -= optlen;
1077 		m->m_len += optlen;
1078 		m->m_pkthdr.len += optlen;
1079 		ovbcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip));
1080 	}
1081 	ip = mtod(m, struct ip *);
1082 	bcopy(p->ipopt_list, ip + 1, optlen);
1083 	*phlen = sizeof(struct ip) + optlen;
1084 	ip->ip_vhl = IP_MAKE_VHL(IPVERSION, *phlen >> 2);
1085 	ip->ip_len += optlen;
1086 	return (m);
1087 }
1088 
1089 /*
1090  * Copy options from ip to jp,
1091  * omitting those not copied during fragmentation.
1092  */
1093 int
1094 ip_optcopy(ip, jp)
1095 	struct ip *ip, *jp;
1096 {
1097 	register u_char *cp, *dp;
1098 	int opt, optlen, cnt;
1099 
1100 	cp = (u_char *)(ip + 1);
1101 	dp = (u_char *)(jp + 1);
1102 	cnt = (IP_VHL_HL(ip->ip_vhl) << 2) - sizeof (struct ip);
1103 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
1104 		opt = cp[0];
1105 		if (opt == IPOPT_EOL)
1106 			break;
1107 		if (opt == IPOPT_NOP) {
1108 			/* Preserve for IP mcast tunnel's LSRR alignment. */
1109 			*dp++ = IPOPT_NOP;
1110 			optlen = 1;
1111 			continue;
1112 		}
1113 #ifdef DIAGNOSTIC
1114 		if (cnt < IPOPT_OLEN + sizeof(*cp))
1115 			panic("malformed IPv4 option passed to ip_optcopy");
1116 #endif
1117 		optlen = cp[IPOPT_OLEN];
1118 #ifdef DIAGNOSTIC
1119 		if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
1120 			panic("malformed IPv4 option passed to ip_optcopy");
1121 #endif
1122 		/* bogus lengths should have been caught by ip_dooptions */
1123 		if (optlen > cnt)
1124 			optlen = cnt;
1125 		if (IPOPT_COPIED(opt)) {
1126 			bcopy(cp, dp, optlen);
1127 			dp += optlen;
1128 		}
1129 	}
1130 	for (optlen = dp - (u_char *)(jp+1); optlen & 0x3; optlen++)
1131 		*dp++ = IPOPT_EOL;
1132 	return (optlen);
1133 }
1134 
1135 /*
1136  * IP socket option processing.
1137  */
1138 int
1139 ip_ctloutput(so, sopt)
1140 	struct socket *so;
1141 	struct sockopt *sopt;
1142 {
1143 	struct	inpcb *inp = sotoinpcb(so);
1144 	int	error, optval;
1145 
1146 	error = optval = 0;
1147 	if (sopt->sopt_level != IPPROTO_IP) {
1148 		return (EINVAL);
1149 	}
1150 
1151 	switch (sopt->sopt_dir) {
1152 	case SOPT_SET:
1153 		switch (sopt->sopt_name) {
1154 		case IP_OPTIONS:
1155 #ifdef notyet
1156 		case IP_RETOPTS:
1157 #endif
1158 		{
1159 			struct mbuf *m;
1160 			if (sopt->sopt_valsize > MLEN) {
1161 				error = EMSGSIZE;
1162 				break;
1163 			}
1164 			MGET(m, sopt->sopt_td ? M_TRYWAIT : M_DONTWAIT, MT_HEADER);
1165 			if (m == 0) {
1166 				error = ENOBUFS;
1167 				break;
1168 			}
1169 			m->m_len = sopt->sopt_valsize;
1170 			error = sooptcopyin(sopt, mtod(m, char *), m->m_len,
1171 					    m->m_len);
1172 
1173 			return (ip_pcbopts(sopt->sopt_name, &inp->inp_options,
1174 					   m));
1175 		}
1176 
1177 		case IP_TOS:
1178 		case IP_TTL:
1179 		case IP_RECVOPTS:
1180 		case IP_RECVRETOPTS:
1181 		case IP_RECVDSTADDR:
1182 		case IP_RECVIF:
1183 		case IP_FAITH:
1184 			error = sooptcopyin(sopt, &optval, sizeof optval,
1185 					    sizeof optval);
1186 			if (error)
1187 				break;
1188 
1189 			switch (sopt->sopt_name) {
1190 			case IP_TOS:
1191 				inp->inp_ip_tos = optval;
1192 				break;
1193 
1194 			case IP_TTL:
1195 				inp->inp_ip_ttl = optval;
1196 				break;
1197 #define	OPTSET(bit) \
1198 	if (optval) \
1199 		inp->inp_flags |= bit; \
1200 	else \
1201 		inp->inp_flags &= ~bit;
1202 
1203 			case IP_RECVOPTS:
1204 				OPTSET(INP_RECVOPTS);
1205 				break;
1206 
1207 			case IP_RECVRETOPTS:
1208 				OPTSET(INP_RECVRETOPTS);
1209 				break;
1210 
1211 			case IP_RECVDSTADDR:
1212 				OPTSET(INP_RECVDSTADDR);
1213 				break;
1214 
1215 			case IP_RECVIF:
1216 				OPTSET(INP_RECVIF);
1217 				break;
1218 
1219 			case IP_FAITH:
1220 				OPTSET(INP_FAITH);
1221 				break;
1222 			}
1223 			break;
1224 #undef OPTSET
1225 
1226 		case IP_MULTICAST_IF:
1227 		case IP_MULTICAST_VIF:
1228 		case IP_MULTICAST_TTL:
1229 		case IP_MULTICAST_LOOP:
1230 		case IP_ADD_MEMBERSHIP:
1231 		case IP_DROP_MEMBERSHIP:
1232 			error = ip_setmoptions(sopt, &inp->inp_moptions);
1233 			break;
1234 
1235 		case IP_PORTRANGE:
1236 			error = sooptcopyin(sopt, &optval, sizeof optval,
1237 					    sizeof optval);
1238 			if (error)
1239 				break;
1240 
1241 			switch (optval) {
1242 			case IP_PORTRANGE_DEFAULT:
1243 				inp->inp_flags &= ~(INP_LOWPORT);
1244 				inp->inp_flags &= ~(INP_HIGHPORT);
1245 				break;
1246 
1247 			case IP_PORTRANGE_HIGH:
1248 				inp->inp_flags &= ~(INP_LOWPORT);
1249 				inp->inp_flags |= INP_HIGHPORT;
1250 				break;
1251 
1252 			case IP_PORTRANGE_LOW:
1253 				inp->inp_flags &= ~(INP_HIGHPORT);
1254 				inp->inp_flags |= INP_LOWPORT;
1255 				break;
1256 
1257 			default:
1258 				error = EINVAL;
1259 				break;
1260 			}
1261 			break;
1262 
1263 #ifdef IPSEC
1264 		case IP_IPSEC_POLICY:
1265 		{
1266 			caddr_t req;
1267 			size_t len = 0;
1268 			int priv;
1269 			struct mbuf *m;
1270 			int optname;
1271 
1272 			if ((error = soopt_getm(sopt, &m)) != 0) /* XXX */
1273 				break;
1274 			if ((error = soopt_mcopyin(sopt, m)) != 0) /* XXX */
1275 				break;
1276 			priv = (sopt->sopt_td != NULL &&
1277 				suser_td(sopt->sopt_td) != 0) ? 0 : 1;
1278 			req = mtod(m, caddr_t);
1279 			len = m->m_len;
1280 			optname = sopt->sopt_name;
1281 			error = ipsec4_set_policy(inp, optname, req, len, priv);
1282 			m_freem(m);
1283 			break;
1284 		}
1285 #endif /*IPSEC*/
1286 
1287 		default:
1288 			error = ENOPROTOOPT;
1289 			break;
1290 		}
1291 		break;
1292 
1293 	case SOPT_GET:
1294 		switch (sopt->sopt_name) {
1295 		case IP_OPTIONS:
1296 		case IP_RETOPTS:
1297 			if (inp->inp_options)
1298 				error = sooptcopyout(sopt,
1299 						     mtod(inp->inp_options,
1300 							  char *),
1301 						     inp->inp_options->m_len);
1302 			else
1303 				sopt->sopt_valsize = 0;
1304 			break;
1305 
1306 		case IP_TOS:
1307 		case IP_TTL:
1308 		case IP_RECVOPTS:
1309 		case IP_RECVRETOPTS:
1310 		case IP_RECVDSTADDR:
1311 		case IP_RECVIF:
1312 		case IP_PORTRANGE:
1313 		case IP_FAITH:
1314 			switch (sopt->sopt_name) {
1315 
1316 			case IP_TOS:
1317 				optval = inp->inp_ip_tos;
1318 				break;
1319 
1320 			case IP_TTL:
1321 				optval = inp->inp_ip_ttl;
1322 				break;
1323 
1324 #define	OPTBIT(bit)	(inp->inp_flags & bit ? 1 : 0)
1325 
1326 			case IP_RECVOPTS:
1327 				optval = OPTBIT(INP_RECVOPTS);
1328 				break;
1329 
1330 			case IP_RECVRETOPTS:
1331 				optval = OPTBIT(INP_RECVRETOPTS);
1332 				break;
1333 
1334 			case IP_RECVDSTADDR:
1335 				optval = OPTBIT(INP_RECVDSTADDR);
1336 				break;
1337 
1338 			case IP_RECVIF:
1339 				optval = OPTBIT(INP_RECVIF);
1340 				break;
1341 
1342 			case IP_PORTRANGE:
1343 				if (inp->inp_flags & INP_HIGHPORT)
1344 					optval = IP_PORTRANGE_HIGH;
1345 				else if (inp->inp_flags & INP_LOWPORT)
1346 					optval = IP_PORTRANGE_LOW;
1347 				else
1348 					optval = 0;
1349 				break;
1350 
1351 			case IP_FAITH:
1352 				optval = OPTBIT(INP_FAITH);
1353 				break;
1354 			}
1355 			error = sooptcopyout(sopt, &optval, sizeof optval);
1356 			break;
1357 
1358 		case IP_MULTICAST_IF:
1359 		case IP_MULTICAST_VIF:
1360 		case IP_MULTICAST_TTL:
1361 		case IP_MULTICAST_LOOP:
1362 		case IP_ADD_MEMBERSHIP:
1363 		case IP_DROP_MEMBERSHIP:
1364 			error = ip_getmoptions(sopt, inp->inp_moptions);
1365 			break;
1366 
1367 #ifdef IPSEC
1368 		case IP_IPSEC_POLICY:
1369 		{
1370 			struct mbuf *m = NULL;
1371 			caddr_t req = NULL;
1372 			size_t len = 0;
1373 
1374 			if (m != 0) {
1375 				req = mtod(m, caddr_t);
1376 				len = m->m_len;
1377 			}
1378 			error = ipsec4_get_policy(sotoinpcb(so), req, len, &m);
1379 			if (error == 0)
1380 				error = soopt_mcopyout(sopt, m); /* XXX */
1381 			if (error == 0)
1382 				m_freem(m);
1383 			break;
1384 		}
1385 #endif /*IPSEC*/
1386 
1387 		default:
1388 			error = ENOPROTOOPT;
1389 			break;
1390 		}
1391 		break;
1392 	}
1393 	return (error);
1394 }
1395 
1396 /*
1397  * Set up IP options in pcb for insertion in output packets.
1398  * Store in mbuf with pointer in pcbopt, adding pseudo-option
1399  * with destination address if source routed.
1400  */
1401 static int
1402 ip_pcbopts(optname, pcbopt, m)
1403 	int optname;
1404 	struct mbuf **pcbopt;
1405 	register struct mbuf *m;
1406 {
1407 	register int cnt, optlen;
1408 	register u_char *cp;
1409 	u_char opt;
1410 
1411 	/* turn off any old options */
1412 	if (*pcbopt)
1413 		(void)m_free(*pcbopt);
1414 	*pcbopt = 0;
1415 	if (m == (struct mbuf *)0 || m->m_len == 0) {
1416 		/*
1417 		 * Only turning off any previous options.
1418 		 */
1419 		if (m)
1420 			(void)m_free(m);
1421 		return (0);
1422 	}
1423 
1424 	if (m->m_len % sizeof(int32_t))
1425 		goto bad;
1426 	/*
1427 	 * IP first-hop destination address will be stored before
1428 	 * actual options; move other options back
1429 	 * and clear it when none present.
1430 	 */
1431 	if (m->m_data + m->m_len + sizeof(struct in_addr) >= &m->m_dat[MLEN])
1432 		goto bad;
1433 	cnt = m->m_len;
1434 	m->m_len += sizeof(struct in_addr);
1435 	cp = mtod(m, u_char *) + sizeof(struct in_addr);
1436 	ovbcopy(mtod(m, caddr_t), (caddr_t)cp, (unsigned)cnt);
1437 	bzero(mtod(m, caddr_t), sizeof(struct in_addr));
1438 
1439 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
1440 		opt = cp[IPOPT_OPTVAL];
1441 		if (opt == IPOPT_EOL)
1442 			break;
1443 		if (opt == IPOPT_NOP)
1444 			optlen = 1;
1445 		else {
1446 			if (cnt < IPOPT_OLEN + sizeof(*cp))
1447 				goto bad;
1448 			optlen = cp[IPOPT_OLEN];
1449 			if (optlen < IPOPT_OLEN + sizeof(*cp) || optlen > cnt)
1450 				goto bad;
1451 		}
1452 		switch (opt) {
1453 
1454 		default:
1455 			break;
1456 
1457 		case IPOPT_LSRR:
1458 		case IPOPT_SSRR:
1459 			/*
1460 			 * user process specifies route as:
1461 			 *	->A->B->C->D
1462 			 * D must be our final destination (but we can't
1463 			 * check that since we may not have connected yet).
1464 			 * A is first hop destination, which doesn't appear in
1465 			 * actual IP option, but is stored before the options.
1466 			 */
1467 			if (optlen < IPOPT_MINOFF - 1 + sizeof(struct in_addr))
1468 				goto bad;
1469 			m->m_len -= sizeof(struct in_addr);
1470 			cnt -= sizeof(struct in_addr);
1471 			optlen -= sizeof(struct in_addr);
1472 			cp[IPOPT_OLEN] = optlen;
1473 			/*
1474 			 * Move first hop before start of options.
1475 			 */
1476 			bcopy((caddr_t)&cp[IPOPT_OFFSET+1], mtod(m, caddr_t),
1477 			    sizeof(struct in_addr));
1478 			/*
1479 			 * Then copy rest of options back
1480 			 * to close up the deleted entry.
1481 			 */
1482 			ovbcopy((caddr_t)(&cp[IPOPT_OFFSET+1] +
1483 			    sizeof(struct in_addr)),
1484 			    (caddr_t)&cp[IPOPT_OFFSET+1],
1485 			    (unsigned)cnt + sizeof(struct in_addr));
1486 			break;
1487 		}
1488 	}
1489 	if (m->m_len > MAX_IPOPTLEN + sizeof(struct in_addr))
1490 		goto bad;
1491 	*pcbopt = m;
1492 	return (0);
1493 
1494 bad:
1495 	(void)m_free(m);
1496 	return (EINVAL);
1497 }
1498 
1499 /*
1500  * XXX
1501  * The whole multicast option thing needs to be re-thought.
1502  * Several of these options are equally applicable to non-multicast
1503  * transmission, and one (IP_MULTICAST_TTL) totally duplicates a
1504  * standard option (IP_TTL).
1505  */
1506 
1507 /*
1508  * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index.
1509  */
1510 static struct ifnet *
1511 ip_multicast_if(a, ifindexp)
1512 	struct in_addr *a;
1513 	int *ifindexp;
1514 {
1515 	int ifindex;
1516 	struct ifnet *ifp;
1517 
1518 	if (ifindexp)
1519 		*ifindexp = 0;
1520 	if (ntohl(a->s_addr) >> 24 == 0) {
1521 		ifindex = ntohl(a->s_addr) & 0xffffff;
1522 		if (ifindex < 0 || if_index < ifindex)
1523 			return NULL;
1524 		ifp = ifnet_byindex(ifindex);
1525 		if (ifindexp)
1526 			*ifindexp = ifindex;
1527 	} else {
1528 		INADDR_TO_IFP(*a, ifp);
1529 	}
1530 	return ifp;
1531 }
1532 
1533 /*
1534  * Set the IP multicast options in response to user setsockopt().
1535  */
1536 static int
1537 ip_setmoptions(sopt, imop)
1538 	struct sockopt *sopt;
1539 	struct ip_moptions **imop;
1540 {
1541 	int error = 0;
1542 	int i;
1543 	struct in_addr addr;
1544 	struct ip_mreq mreq;
1545 	struct ifnet *ifp;
1546 	struct ip_moptions *imo = *imop;
1547 	struct route ro;
1548 	struct sockaddr_in *dst;
1549 	int ifindex;
1550 	int s;
1551 
1552 	if (imo == NULL) {
1553 		/*
1554 		 * No multicast option buffer attached to the pcb;
1555 		 * allocate one and initialize to default values.
1556 		 */
1557 		imo = (struct ip_moptions*)malloc(sizeof(*imo), M_IPMOPTS,
1558 		    M_WAITOK);
1559 
1560 		if (imo == NULL)
1561 			return (ENOBUFS);
1562 		*imop = imo;
1563 		imo->imo_multicast_ifp = NULL;
1564 		imo->imo_multicast_addr.s_addr = INADDR_ANY;
1565 		imo->imo_multicast_vif = -1;
1566 		imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL;
1567 		imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP;
1568 		imo->imo_num_memberships = 0;
1569 	}
1570 
1571 	switch (sopt->sopt_name) {
1572 	/* store an index number for the vif you wanna use in the send */
1573 	case IP_MULTICAST_VIF:
1574 		if (legal_vif_num == 0) {
1575 			error = EOPNOTSUPP;
1576 			break;
1577 		}
1578 		error = sooptcopyin(sopt, &i, sizeof i, sizeof i);
1579 		if (error)
1580 			break;
1581 		if (!legal_vif_num(i) && (i != -1)) {
1582 			error = EINVAL;
1583 			break;
1584 		}
1585 		imo->imo_multicast_vif = i;
1586 		break;
1587 
1588 	case IP_MULTICAST_IF:
1589 		/*
1590 		 * Select the interface for outgoing multicast packets.
1591 		 */
1592 		error = sooptcopyin(sopt, &addr, sizeof addr, sizeof addr);
1593 		if (error)
1594 			break;
1595 		/*
1596 		 * INADDR_ANY is used to remove a previous selection.
1597 		 * When no interface is selected, a default one is
1598 		 * chosen every time a multicast packet is sent.
1599 		 */
1600 		if (addr.s_addr == INADDR_ANY) {
1601 			imo->imo_multicast_ifp = NULL;
1602 			break;
1603 		}
1604 		/*
1605 		 * The selected interface is identified by its local
1606 		 * IP address.  Find the interface and confirm that
1607 		 * it supports multicasting.
1608 		 */
1609 		s = splimp();
1610 		ifp = ip_multicast_if(&addr, &ifindex);
1611 		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1612 			splx(s);
1613 			error = EADDRNOTAVAIL;
1614 			break;
1615 		}
1616 		imo->imo_multicast_ifp = ifp;
1617 		if (ifindex)
1618 			imo->imo_multicast_addr = addr;
1619 		else
1620 			imo->imo_multicast_addr.s_addr = INADDR_ANY;
1621 		splx(s);
1622 		break;
1623 
1624 	case IP_MULTICAST_TTL:
1625 		/*
1626 		 * Set the IP time-to-live for outgoing multicast packets.
1627 		 * The original multicast API required a char argument,
1628 		 * which is inconsistent with the rest of the socket API.
1629 		 * We allow either a char or an int.
1630 		 */
1631 		if (sopt->sopt_valsize == 1) {
1632 			u_char ttl;
1633 			error = sooptcopyin(sopt, &ttl, 1, 1);
1634 			if (error)
1635 				break;
1636 			imo->imo_multicast_ttl = ttl;
1637 		} else {
1638 			u_int ttl;
1639 			error = sooptcopyin(sopt, &ttl, sizeof ttl,
1640 					    sizeof ttl);
1641 			if (error)
1642 				break;
1643 			if (ttl > 255)
1644 				error = EINVAL;
1645 			else
1646 				imo->imo_multicast_ttl = ttl;
1647 		}
1648 		break;
1649 
1650 	case IP_MULTICAST_LOOP:
1651 		/*
1652 		 * Set the loopback flag for outgoing multicast packets.
1653 		 * Must be zero or one.  The original multicast API required a
1654 		 * char argument, which is inconsistent with the rest
1655 		 * of the socket API.  We allow either a char or an int.
1656 		 */
1657 		if (sopt->sopt_valsize == 1) {
1658 			u_char loop;
1659 			error = sooptcopyin(sopt, &loop, 1, 1);
1660 			if (error)
1661 				break;
1662 			imo->imo_multicast_loop = !!loop;
1663 		} else {
1664 			u_int loop;
1665 			error = sooptcopyin(sopt, &loop, sizeof loop,
1666 					    sizeof loop);
1667 			if (error)
1668 				break;
1669 			imo->imo_multicast_loop = !!loop;
1670 		}
1671 		break;
1672 
1673 	case IP_ADD_MEMBERSHIP:
1674 		/*
1675 		 * Add a multicast group membership.
1676 		 * Group must be a valid IP multicast address.
1677 		 */
1678 		error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
1679 		if (error)
1680 			break;
1681 
1682 		if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) {
1683 			error = EINVAL;
1684 			break;
1685 		}
1686 		s = splimp();
1687 		/*
1688 		 * If no interface address was provided, use the interface of
1689 		 * the route to the given multicast address.
1690 		 */
1691 		if (mreq.imr_interface.s_addr == INADDR_ANY) {
1692 			bzero((caddr_t)&ro, sizeof(ro));
1693 			dst = (struct sockaddr_in *)&ro.ro_dst;
1694 			dst->sin_len = sizeof(*dst);
1695 			dst->sin_family = AF_INET;
1696 			dst->sin_addr = mreq.imr_multiaddr;
1697 			rtalloc(&ro);
1698 			if (ro.ro_rt == NULL) {
1699 				error = EADDRNOTAVAIL;
1700 				splx(s);
1701 				break;
1702 			}
1703 			ifp = ro.ro_rt->rt_ifp;
1704 			rtfree(ro.ro_rt);
1705 		}
1706 		else {
1707 			ifp = ip_multicast_if(&mreq.imr_interface, NULL);
1708 		}
1709 
1710 		/*
1711 		 * See if we found an interface, and confirm that it
1712 		 * supports multicast.
1713 		 */
1714 		if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) {
1715 			error = EADDRNOTAVAIL;
1716 			splx(s);
1717 			break;
1718 		}
1719 		/*
1720 		 * See if the membership already exists or if all the
1721 		 * membership slots are full.
1722 		 */
1723 		for (i = 0; i < imo->imo_num_memberships; ++i) {
1724 			if (imo->imo_membership[i]->inm_ifp == ifp &&
1725 			    imo->imo_membership[i]->inm_addr.s_addr
1726 						== mreq.imr_multiaddr.s_addr)
1727 				break;
1728 		}
1729 		if (i < imo->imo_num_memberships) {
1730 			error = EADDRINUSE;
1731 			splx(s);
1732 			break;
1733 		}
1734 		if (i == IP_MAX_MEMBERSHIPS) {
1735 			error = ETOOMANYREFS;
1736 			splx(s);
1737 			break;
1738 		}
1739 		/*
1740 		 * Everything looks good; add a new record to the multicast
1741 		 * address list for the given interface.
1742 		 */
1743 		if ((imo->imo_membership[i] =
1744 		    in_addmulti(&mreq.imr_multiaddr, ifp)) == NULL) {
1745 			error = ENOBUFS;
1746 			splx(s);
1747 			break;
1748 		}
1749 		++imo->imo_num_memberships;
1750 		splx(s);
1751 		break;
1752 
1753 	case IP_DROP_MEMBERSHIP:
1754 		/*
1755 		 * Drop a multicast group membership.
1756 		 * Group must be a valid IP multicast address.
1757 		 */
1758 		error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq);
1759 		if (error)
1760 			break;
1761 
1762 		if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) {
1763 			error = EINVAL;
1764 			break;
1765 		}
1766 
1767 		s = splimp();
1768 		/*
1769 		 * If an interface address was specified, get a pointer
1770 		 * to its ifnet structure.
1771 		 */
1772 		if (mreq.imr_interface.s_addr == INADDR_ANY)
1773 			ifp = NULL;
1774 		else {
1775 			ifp = ip_multicast_if(&mreq.imr_interface, NULL);
1776 			if (ifp == NULL) {
1777 				error = EADDRNOTAVAIL;
1778 				splx(s);
1779 				break;
1780 			}
1781 		}
1782 		/*
1783 		 * Find the membership in the membership array.
1784 		 */
1785 		for (i = 0; i < imo->imo_num_memberships; ++i) {
1786 			if ((ifp == NULL ||
1787 			     imo->imo_membership[i]->inm_ifp == ifp) &&
1788 			     imo->imo_membership[i]->inm_addr.s_addr ==
1789 			     mreq.imr_multiaddr.s_addr)
1790 				break;
1791 		}
1792 		if (i == imo->imo_num_memberships) {
1793 			error = EADDRNOTAVAIL;
1794 			splx(s);
1795 			break;
1796 		}
1797 		/*
1798 		 * Give up the multicast address record to which the
1799 		 * membership points.
1800 		 */
1801 		in_delmulti(imo->imo_membership[i]);
1802 		/*
1803 		 * Remove the gap in the membership array.
1804 		 */
1805 		for (++i; i < imo->imo_num_memberships; ++i)
1806 			imo->imo_membership[i-1] = imo->imo_membership[i];
1807 		--imo->imo_num_memberships;
1808 		splx(s);
1809 		break;
1810 
1811 	default:
1812 		error = EOPNOTSUPP;
1813 		break;
1814 	}
1815 
1816 	/*
1817 	 * If all options have default values, no need to keep the mbuf.
1818 	 */
1819 	if (imo->imo_multicast_ifp == NULL &&
1820 	    imo->imo_multicast_vif == -1 &&
1821 	    imo->imo_multicast_ttl == IP_DEFAULT_MULTICAST_TTL &&
1822 	    imo->imo_multicast_loop == IP_DEFAULT_MULTICAST_LOOP &&
1823 	    imo->imo_num_memberships == 0) {
1824 		free(*imop, M_IPMOPTS);
1825 		*imop = NULL;
1826 	}
1827 
1828 	return (error);
1829 }
1830 
1831 /*
1832  * Return the IP multicast options in response to user getsockopt().
1833  */
1834 static int
1835 ip_getmoptions(sopt, imo)
1836 	struct sockopt *sopt;
1837 	register struct ip_moptions *imo;
1838 {
1839 	struct in_addr addr;
1840 	struct in_ifaddr *ia;
1841 	int error, optval;
1842 	u_char coptval;
1843 
1844 	error = 0;
1845 	switch (sopt->sopt_name) {
1846 	case IP_MULTICAST_VIF:
1847 		if (imo != NULL)
1848 			optval = imo->imo_multicast_vif;
1849 		else
1850 			optval = -1;
1851 		error = sooptcopyout(sopt, &optval, sizeof optval);
1852 		break;
1853 
1854 	case IP_MULTICAST_IF:
1855 		if (imo == NULL || imo->imo_multicast_ifp == NULL)
1856 			addr.s_addr = INADDR_ANY;
1857 		else if (imo->imo_multicast_addr.s_addr) {
1858 			/* return the value user has set */
1859 			addr = imo->imo_multicast_addr;
1860 		} else {
1861 			IFP_TO_IA(imo->imo_multicast_ifp, ia);
1862 			addr.s_addr = (ia == NULL) ? INADDR_ANY
1863 				: IA_SIN(ia)->sin_addr.s_addr;
1864 		}
1865 		error = sooptcopyout(sopt, &addr, sizeof addr);
1866 		break;
1867 
1868 	case IP_MULTICAST_TTL:
1869 		if (imo == 0)
1870 			optval = coptval = IP_DEFAULT_MULTICAST_TTL;
1871 		else
1872 			optval = coptval = imo->imo_multicast_ttl;
1873 		if (sopt->sopt_valsize == 1)
1874 			error = sooptcopyout(sopt, &coptval, 1);
1875 		else
1876 			error = sooptcopyout(sopt, &optval, sizeof optval);
1877 		break;
1878 
1879 	case IP_MULTICAST_LOOP:
1880 		if (imo == 0)
1881 			optval = coptval = IP_DEFAULT_MULTICAST_LOOP;
1882 		else
1883 			optval = coptval = imo->imo_multicast_loop;
1884 		if (sopt->sopt_valsize == 1)
1885 			error = sooptcopyout(sopt, &coptval, 1);
1886 		else
1887 			error = sooptcopyout(sopt, &optval, sizeof optval);
1888 		break;
1889 
1890 	default:
1891 		error = ENOPROTOOPT;
1892 		break;
1893 	}
1894 	return (error);
1895 }
1896 
1897 /*
1898  * Discard the IP multicast options.
1899  */
1900 void
1901 ip_freemoptions(imo)
1902 	register struct ip_moptions *imo;
1903 {
1904 	register int i;
1905 
1906 	if (imo != NULL) {
1907 		for (i = 0; i < imo->imo_num_memberships; ++i)
1908 			in_delmulti(imo->imo_membership[i]);
1909 		free(imo, M_IPMOPTS);
1910 	}
1911 }
1912 
1913 /*
1914  * Routine called from ip_output() to loop back a copy of an IP multicast
1915  * packet to the input queue of a specified interface.  Note that this
1916  * calls the output routine of the loopback "driver", but with an interface
1917  * pointer that might NOT be a loopback interface -- evil, but easier than
1918  * replicating that code here.
1919  */
1920 static void
1921 ip_mloopback(ifp, m, dst, hlen)
1922 	struct ifnet *ifp;
1923 	register struct mbuf *m;
1924 	register struct sockaddr_in *dst;
1925 	int hlen;
1926 {
1927 	register struct ip *ip;
1928 	struct mbuf *copym;
1929 
1930 	copym = m_copy(m, 0, M_COPYALL);
1931 	if (copym != NULL && (copym->m_flags & M_EXT || copym->m_len < hlen))
1932 		copym = m_pullup(copym, hlen);
1933 	if (copym != NULL) {
1934 		/*
1935 		 * We don't bother to fragment if the IP length is greater
1936 		 * than the interface's MTU.  Can this possibly matter?
1937 		 */
1938 		ip = mtod(copym, struct ip *);
1939 		HTONS(ip->ip_len);
1940 		HTONS(ip->ip_off);
1941 		ip->ip_sum = 0;
1942 		if (ip->ip_vhl == IP_VHL_BORING) {
1943 			ip->ip_sum = in_cksum_hdr(ip);
1944 		} else {
1945 			ip->ip_sum = in_cksum(copym, hlen);
1946 		}
1947 		/*
1948 		 * NB:
1949 		 * It's not clear whether there are any lingering
1950 		 * reentrancy problems in other areas which might
1951 		 * be exposed by using ip_input directly (in
1952 		 * particular, everything which modifies the packet
1953 		 * in-place).  Yet another option is using the
1954 		 * protosw directly to deliver the looped back
1955 		 * packet.  For the moment, we'll err on the side
1956 		 * of safety by using if_simloop().
1957 		 */
1958 #if 1 /* XXX */
1959 		if (dst->sin_family != AF_INET) {
1960 			printf("ip_mloopback: bad address family %d\n",
1961 						dst->sin_family);
1962 			dst->sin_family = AF_INET;
1963 		}
1964 #endif
1965 
1966 #ifdef notdef
1967 		copym->m_pkthdr.rcvif = ifp;
1968 		ip_input(copym);
1969 #else
1970 		/* if the checksum hasn't been computed, mark it as valid */
1971 		if (copym->m_pkthdr.csum_flags & CSUM_DELAY_DATA) {
1972 			copym->m_pkthdr.csum_flags |=
1973 			    CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1974 			copym->m_pkthdr.csum_data = 0xffff;
1975 		}
1976 		if_simloop(ifp, copym, dst->sin_family, 0);
1977 #endif
1978 	}
1979 }
1980