xref: /freebsd/sys/netinet/ip_input.c (revision 5ebc7e6281887681c3a348a5a4c902e262ccd656)
1 /*
2  * Copyright (c) 1982, 1986, 1988, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 3. All advertising materials mentioning features or use of this software
14  *    must display the following acknowledgement:
15  *	This product includes software developed by the University of
16  *	California, Berkeley and its contributors.
17  * 4. Neither the name of the University nor the names of its contributors
18  *    may be used to endorse or promote products derived from this software
19  *    without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31  * SUCH DAMAGE.
32  *
33  *	@(#)ip_input.c	8.2 (Berkeley) 1/4/94
34  * $Id: ip_input.c,v 1.20 1995/05/09 13:35:45 davidg Exp $
35  */
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/malloc.h>
40 #include <sys/mbuf.h>
41 #include <sys/domain.h>
42 #include <sys/protosw.h>
43 #include <sys/socket.h>
44 #include <sys/errno.h>
45 #include <sys/time.h>
46 #include <sys/kernel.h>
47 #include <sys/syslog.h>
48 
49 #include <vm/vm.h>
50 #include <sys/sysctl.h>
51 
52 #include <net/if.h>
53 #include <net/route.h>
54 #include <net/netisr.h>
55 
56 #include <netinet/in.h>
57 #include <netinet/in_systm.h>
58 #include <netinet/in_var.h>
59 #include <netinet/ip.h>
60 #include <netinet/in_pcb.h>
61 #include <netinet/in_var.h>
62 #include <netinet/ip_var.h>
63 #include <netinet/ip_icmp.h>
64 
65 #include <netinet/ip_fw.h>
66 
67 #include <sys/socketvar.h>
68 struct socket *ip_rsvpd;
69 
70 #ifndef	IPFORWARDING
71 #ifdef GATEWAY
72 #define	IPFORWARDING	1	/* forward IP packets not for us */
73 #else /* GATEWAY */
74 #define	IPFORWARDING	0	/* don't forward IP packets not for us */
75 #endif /* GATEWAY */
76 #endif /* IPFORWARDING */
77 #ifndef	IPSENDREDIRECTS
78 #define	IPSENDREDIRECTS	1
79 #endif
80 int	ipforwarding = IPFORWARDING;
81 int	ipsendredirects = IPSENDREDIRECTS;
82 int	ip_defttl = IPDEFTTL;
83 int	ip_dosourceroute = 0;
84 #ifdef DIAGNOSTIC
85 int	ipprintfs = 0;
86 #endif
87 
88 extern	struct domain inetdomain;
89 extern	struct protosw inetsw[];
90 u_char	ip_protox[IPPROTO_MAX];
91 int	ipqmaxlen = IFQ_MAXLEN;
92 struct	in_ifaddr *in_ifaddr;			/* first inet address */
93 struct	ifqueue ipintrq;
94 
95 struct ipstat ipstat;
96 struct ipq ipq;
97 
98 /*
99  * We need to save the IP options in case a protocol wants to respond
100  * to an incoming packet over the same route if the packet got here
101  * using IP source routing.  This allows connection establishment and
102  * maintenance when the remote end is on a network that is not known
103  * to us.
104  */
105 int	ip_nhops = 0;
106 static	struct ip_srcrt {
107 	struct	in_addr dst;			/* final destination */
108 	char	nop;				/* one NOP to align */
109 	char	srcopt[IPOPT_OFFSET + 1];	/* OPTVAL, OLEN and OFFSET */
110 	struct	in_addr route[MAX_IPOPTLEN/sizeof(struct in_addr)];
111 } ip_srcrt;
112 
113 static void save_rte __P((u_char *, struct in_addr));
114 /*
115  * IP initialization: fill in IP protocol switch table.
116  * All protocols not implemented in kernel go to raw IP protocol handler.
117  */
118 void
119 ip_init()
120 {
121 	register struct protosw *pr;
122 	register int i;
123 
124 	pr = pffindproto(PF_INET, IPPROTO_RAW, SOCK_RAW);
125 	if (pr == 0)
126 		panic("ip_init");
127 	for (i = 0; i < IPPROTO_MAX; i++)
128 		ip_protox[i] = pr - inetsw;
129 	for (pr = inetdomain.dom_protosw;
130 	    pr < inetdomain.dom_protoswNPROTOSW; pr++)
131 		if (pr->pr_domain->dom_family == PF_INET &&
132 		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
133 			ip_protox[pr->pr_protocol] = pr - inetsw;
134 	ipq.next = ipq.prev = &ipq;
135 	ip_id = time.tv_sec & 0xffff;
136 	ipintrq.ifq_maxlen = ipqmaxlen;
137 }
138 
139 struct	sockaddr_in ipaddr = { sizeof(ipaddr), AF_INET };
140 struct	route ipforward_rt;
141 
142 /*
143  * Ip input routine.  Checksum and byte swap header.  If fragmented
144  * try to reassemble.  Process options.  Pass to next level.
145  */
146 void
147 ipintr(void)
148 {
149 	register struct ip *ip;
150 	register struct mbuf *m;
151 	register struct ipq *fp;
152 	register struct in_ifaddr *ia;
153 	int hlen, s;
154 
155 next:
156 	/*
157 	 * Get next datagram off input queue and get IP header
158 	 * in first mbuf.
159 	 */
160 	s = splimp();
161 	IF_DEQUEUE(&ipintrq, m);
162 	splx(s);
163 	if (m == 0)
164 		return;
165 #ifdef	DIAGNOSTIC
166 	if ((m->m_flags & M_PKTHDR) == 0)
167 		panic("ipintr no HDR");
168 #endif
169 	/*
170 	 * If no IP addresses have been set yet but the interfaces
171 	 * are receiving, can't do anything with incoming packets yet.
172 	 */
173 	if (in_ifaddr == NULL)
174 		goto bad;
175 	ipstat.ips_total++;
176 	if (m->m_len < sizeof (struct ip) &&
177 	    (m = m_pullup(m, sizeof (struct ip))) == 0) {
178 		ipstat.ips_toosmall++;
179 		goto next;
180 	}
181 	ip = mtod(m, struct ip *);
182 	if (ip->ip_v != IPVERSION) {
183 		ipstat.ips_badvers++;
184 		goto bad;
185 	}
186 	hlen = ip->ip_hl << 2;
187 	if (hlen < sizeof(struct ip)) {	/* minimum header length */
188 		ipstat.ips_badhlen++;
189 		goto bad;
190 	}
191 	if (hlen > m->m_len) {
192 		if ((m = m_pullup(m, hlen)) == 0) {
193 			ipstat.ips_badhlen++;
194 			goto next;
195 		}
196 		ip = mtod(m, struct ip *);
197 	}
198 	ip->ip_sum = in_cksum(m, hlen);
199 	if (ip->ip_sum) {
200 		ipstat.ips_badsum++;
201 		goto bad;
202 	}
203 
204 	/*
205 	 * Convert fields to host representation.
206 	 */
207 	NTOHS(ip->ip_len);
208 	if (ip->ip_len < hlen) {
209 		ipstat.ips_badlen++;
210 		goto bad;
211 	}
212 	NTOHS(ip->ip_id);
213 	NTOHS(ip->ip_off);
214 
215 	/*
216 	 * Check that the amount of data in the buffers
217 	 * is as at least much as the IP header would have us expect.
218 	 * Trim mbufs if longer than we expect.
219 	 * Drop packet if shorter than we expect.
220 	 */
221 	if (m->m_pkthdr.len < ip->ip_len) {
222 		ipstat.ips_tooshort++;
223 		goto bad;
224 	}
225 	if (m->m_pkthdr.len > ip->ip_len) {
226 		if (m->m_len == m->m_pkthdr.len) {
227 			m->m_len = ip->ip_len;
228 			m->m_pkthdr.len = ip->ip_len;
229 		} else
230 			m_adj(m, ip->ip_len - m->m_pkthdr.len);
231 	}
232 	/*
233 	 * IpHack's section.
234 	 * Right now when no processing on packet has done
235 	 * and it is still fresh out of network we do our black
236 	 * deals with it.
237 	 * - Firewall: deny/allow
238 	 * - Wrap: fake packet's addr/port <unimpl.>
239 	 * - Encapsulate: put it in another IP and send out. <unimp.>
240  	 */
241 
242         if (ip_fw_chk_ptr!=NULL)
243                if (!(*ip_fw_chk_ptr)(ip,m->m_pkthdr.rcvif,ip_fw_chain) ) {
244                        goto bad;
245                }
246 
247 	/*
248 	 * Process options and, if not destined for us,
249 	 * ship it on.  ip_dooptions returns 1 when an
250 	 * error was detected (causing an icmp message
251 	 * to be sent and the original packet to be freed).
252 	 */
253 	ip_nhops = 0;		/* for source routed packets */
254 	if (hlen > sizeof (struct ip) && ip_dooptions(m))
255 		goto next;
256 
257         /* greedy RSVP, snatches any PATH packet of the RSVP protocol and no
258          * matter if it is destined to another node, or whether it is
259          * a multicast one, RSVP wants it! and prevents it from being forwarded
260          * anywhere else. Also checks if the rsvp daemon is running before
261 	 * grabbing the packet.
262          */
263 	if (ip_rsvpd != NULL && ip->ip_p==IPPROTO_RSVP)
264 		goto ours;
265 
266 	/*
267 	 * Check our list of addresses, to see if the packet is for us.
268 	 */
269 	for (ia = in_ifaddr; ia; ia = ia->ia_next) {
270 #define	satosin(sa)	((struct sockaddr_in *)(sa))
271 
272 		if (IA_SIN(ia)->sin_addr.s_addr == ip->ip_dst.s_addr)
273 			goto ours;
274 		if (
275 #ifdef	DIRECTED_BROADCAST
276 		    ia->ia_ifp == m->m_pkthdr.rcvif &&
277 #endif
278 		    (ia->ia_ifp->if_flags & IFF_BROADCAST)) {
279 			u_long t;
280 
281 			if (satosin(&ia->ia_broadaddr)->sin_addr.s_addr ==
282 			    ip->ip_dst.s_addr)
283 				goto ours;
284 			if (ip->ip_dst.s_addr == ia->ia_netbroadcast.s_addr)
285 				goto ours;
286 			/*
287 			 * Look for all-0's host part (old broadcast addr),
288 			 * either for subnet or net.
289 			 */
290 			t = ntohl(ip->ip_dst.s_addr);
291 			if (t == ia->ia_subnet)
292 				goto ours;
293 			if (t == ia->ia_net)
294 				goto ours;
295 		}
296 	}
297 	if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
298 		struct in_multi *inm;
299 		if (ip_mrouter) {
300 			/*
301 			 * If we are acting as a multicast router, all
302 			 * incoming multicast packets are passed to the
303 			 * kernel-level multicast forwarding function.
304 			 * The packet is returned (relatively) intact; if
305 			 * ip_mforward() returns a non-zero value, the packet
306 			 * must be discarded, else it may be accepted below.
307 			 *
308 			 * (The IP ident field is put in the same byte order
309 			 * as expected when ip_mforward() is called from
310 			 * ip_output().)
311 			 */
312 			ip->ip_id = htons(ip->ip_id);
313 			if (ip_mforward(ip, m->m_pkthdr.rcvif, m, 0) != 0) {
314 				ipstat.ips_cantforward++;
315 				m_freem(m);
316 				goto next;
317 			}
318 			ip->ip_id = ntohs(ip->ip_id);
319 
320 			/*
321 			 * The process-level routing demon needs to receive
322 			 * all multicast IGMP packets, whether or not this
323 			 * host belongs to their destination groups.
324 			 */
325 			if (ip->ip_p == IPPROTO_IGMP)
326 				goto ours;
327 			ipstat.ips_forward++;
328 		}
329 		/*
330 		 * See if we belong to the destination multicast group on the
331 		 * arrival interface.
332 		 */
333 		IN_LOOKUP_MULTI(ip->ip_dst, m->m_pkthdr.rcvif, inm);
334 		if (inm == NULL) {
335 			ipstat.ips_cantforward++;
336 			m_freem(m);
337 			goto next;
338 		}
339 		goto ours;
340 	}
341 	if (ip->ip_dst.s_addr == (u_long)INADDR_BROADCAST)
342 		goto ours;
343 	if (ip->ip_dst.s_addr == INADDR_ANY)
344 		goto ours;
345 
346 	/*
347 	 * Not for us; forward if possible and desirable.
348 	 */
349 	if (ipforwarding == 0) {
350 		ipstat.ips_cantforward++;
351 		m_freem(m);
352 	} else
353 		ip_forward(m, 0);
354 	goto next;
355 
356 ours:
357 
358 		/*
359 		 * If packet came to us we count it...
360 		 * This way we count all incoming packets which has
361 		 * not been forwarded...
362 		 * Do not convert ip_len to host byte order when
363 		 * counting,ppl already made it for us before..
364 		 */
365 	if (ip_acct_cnt_ptr!=NULL)
366 		(*ip_acct_cnt_ptr)(ip,m->m_pkthdr.rcvif,ip_acct_chain,0);
367 
368 	/*
369 	 * If offset or IP_MF are set, must reassemble.
370 	 * Otherwise, nothing need be done.
371 	 * (We could look in the reassembly queue to see
372 	 * if the packet was previously fragmented,
373 	 * but it's not worth the time; just let them time out.)
374 	 */
375 	if (ip->ip_off &~ IP_DF) {
376 		if (m->m_flags & M_EXT) {		/* XXX */
377 			if ((m = m_pullup(m, sizeof (struct ip))) == 0) {
378 				ipstat.ips_toosmall++;
379 				goto next;
380 			}
381 			ip = mtod(m, struct ip *);
382 		}
383 		/*
384 		 * Look for queue of fragments
385 		 * of this datagram.
386 		 */
387 		for (fp = ipq.next; fp != &ipq; fp = fp->next)
388 			if (ip->ip_id == fp->ipq_id &&
389 			    ip->ip_src.s_addr == fp->ipq_src.s_addr &&
390 			    ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
391 			    ip->ip_p == fp->ipq_p)
392 				goto found;
393 		fp = 0;
394 found:
395 
396 		/*
397 		 * Adjust ip_len to not reflect header,
398 		 * set ip_mff if more fragments are expected,
399 		 * convert offset of this to bytes.
400 		 */
401 		ip->ip_len -= hlen;
402 		((struct ipasfrag *)ip)->ipf_mff &= ~1;
403 		if (ip->ip_off & IP_MF)
404 			((struct ipasfrag *)ip)->ipf_mff |= 1;
405 		ip->ip_off <<= 3;
406 
407 		/*
408 		 * If datagram marked as having more fragments
409 		 * or if this is not the first fragment,
410 		 * attempt reassembly; if it succeeds, proceed.
411 		 */
412 		if (((struct ipasfrag *)ip)->ipf_mff & 1 || ip->ip_off) {
413 			ipstat.ips_fragments++;
414 			ip = ip_reass((struct ipasfrag *)ip, fp);
415 			if (ip == 0)
416 				goto next;
417 			ipstat.ips_reassembled++;
418 			m = dtom(ip);
419 		} else
420 			if (fp)
421 				ip_freef(fp);
422 	} else
423 		ip->ip_len -= hlen;
424 
425 	/*
426 	 * Switch out to protocol's input routine.
427 	 */
428 	ipstat.ips_delivered++;
429 	(*inetsw[ip_protox[ip->ip_p]].pr_input)(m, hlen);
430 	goto next;
431 bad:
432 	m_freem(m);
433 	goto next;
434 }
435 
436 NETISR_SET(NETISR_IP, ipintr);
437 
438 /*
439  * Take incoming datagram fragment and try to
440  * reassemble it into whole datagram.  If a chain for
441  * reassembly of this datagram already exists, then it
442  * is given as fp; otherwise have to make a chain.
443  */
444 struct ip *
445 ip_reass(ip, fp)
446 	register struct ipasfrag *ip;
447 	register struct ipq *fp;
448 {
449 	register struct mbuf *m = dtom(ip);
450 	register struct ipasfrag *q;
451 	struct mbuf *t;
452 	int hlen = ip->ip_hl << 2;
453 	int i, next;
454 
455 	/*
456 	 * Presence of header sizes in mbufs
457 	 * would confuse code below.
458 	 */
459 	m->m_data += hlen;
460 	m->m_len -= hlen;
461 
462 	/*
463 	 * If first fragment to arrive, create a reassembly queue.
464 	 */
465 	if (fp == 0) {
466 		if ((t = m_get(M_DONTWAIT, MT_FTABLE)) == NULL)
467 			goto dropfrag;
468 		fp = mtod(t, struct ipq *);
469 		insque(fp, &ipq);
470 		fp->ipq_ttl = IPFRAGTTL;
471 		fp->ipq_p = ip->ip_p;
472 		fp->ipq_id = ip->ip_id;
473 		fp->ipq_next = fp->ipq_prev = (struct ipasfrag *)fp;
474 		fp->ipq_src = ((struct ip *)ip)->ip_src;
475 		fp->ipq_dst = ((struct ip *)ip)->ip_dst;
476 		q = (struct ipasfrag *)fp;
477 		goto insert;
478 	}
479 
480 	/*
481 	 * Find a segment which begins after this one does.
482 	 */
483 	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next)
484 		if (q->ip_off > ip->ip_off)
485 			break;
486 
487 	/*
488 	 * If there is a preceding segment, it may provide some of
489 	 * our data already.  If so, drop the data from the incoming
490 	 * segment.  If it provides all of our data, drop us.
491 	 */
492 	if (q->ipf_prev != (struct ipasfrag *)fp) {
493 		i = q->ipf_prev->ip_off + q->ipf_prev->ip_len - ip->ip_off;
494 		if (i > 0) {
495 			if (i >= ip->ip_len)
496 				goto dropfrag;
497 			m_adj(dtom(ip), i);
498 			ip->ip_off += i;
499 			ip->ip_len -= i;
500 		}
501 	}
502 
503 	/*
504 	 * While we overlap succeeding segments trim them or,
505 	 * if they are completely covered, dequeue them.
506 	 */
507 	while (q != (struct ipasfrag *)fp && ip->ip_off + ip->ip_len > q->ip_off) {
508 		i = (ip->ip_off + ip->ip_len) - q->ip_off;
509 		if (i < q->ip_len) {
510 			q->ip_len -= i;
511 			q->ip_off += i;
512 			m_adj(dtom(q), i);
513 			break;
514 		}
515 		q = q->ipf_next;
516 		m_freem(dtom(q->ipf_prev));
517 		ip_deq(q->ipf_prev);
518 	}
519 
520 insert:
521 	/*
522 	 * Stick new segment in its place;
523 	 * check for complete reassembly.
524 	 */
525 	ip_enq(ip, q->ipf_prev);
526 	next = 0;
527 	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next) {
528 		if (q->ip_off != next)
529 			return (0);
530 		next += q->ip_len;
531 	}
532 	if (q->ipf_prev->ipf_mff & 1)
533 		return (0);
534 
535 	/*
536 	 * Reassembly is complete; concatenate fragments.
537 	 */
538 	q = fp->ipq_next;
539 	m = dtom(q);
540 	t = m->m_next;
541 	m->m_next = 0;
542 	m_cat(m, t);
543 	q = q->ipf_next;
544 	while (q != (struct ipasfrag *)fp) {
545 		t = dtom(q);
546 		q = q->ipf_next;
547 		m_cat(m, t);
548 	}
549 
550 	/*
551 	 * Create header for new ip packet by
552 	 * modifying header of first packet;
553 	 * dequeue and discard fragment reassembly header.
554 	 * Make header visible.
555 	 */
556 	ip = fp->ipq_next;
557 	ip->ip_len = next;
558 	ip->ipf_mff &= ~1;
559 	((struct ip *)ip)->ip_src = fp->ipq_src;
560 	((struct ip *)ip)->ip_dst = fp->ipq_dst;
561 	remque(fp);
562 	(void) m_free(dtom(fp));
563 	m = dtom(ip);
564 	m->m_len += (ip->ip_hl << 2);
565 	m->m_data -= (ip->ip_hl << 2);
566 	/* some debugging cruft by sklower, below, will go away soon */
567 	if (m->m_flags & M_PKTHDR) { /* XXX this should be done elsewhere */
568 		register int plen = 0;
569 		for (t = m; m; m = m->m_next)
570 			plen += m->m_len;
571 		t->m_pkthdr.len = plen;
572 	}
573 	return ((struct ip *)ip);
574 
575 dropfrag:
576 	ipstat.ips_fragdropped++;
577 	m_freem(m);
578 	return (0);
579 }
580 
581 /*
582  * Free a fragment reassembly header and all
583  * associated datagrams.
584  */
585 void
586 ip_freef(fp)
587 	struct ipq *fp;
588 {
589 	register struct ipasfrag *q, *p;
590 
591 	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = p) {
592 		p = q->ipf_next;
593 		ip_deq(q);
594 		m_freem(dtom(q));
595 	}
596 	remque(fp);
597 	(void) m_free(dtom(fp));
598 }
599 
600 /*
601  * Put an ip fragment on a reassembly chain.
602  * Like insque, but pointers in middle of structure.
603  */
604 void
605 ip_enq(p, prev)
606 	register struct ipasfrag *p, *prev;
607 {
608 
609 	p->ipf_prev = prev;
610 	p->ipf_next = prev->ipf_next;
611 	prev->ipf_next->ipf_prev = p;
612 	prev->ipf_next = p;
613 }
614 
615 /*
616  * To ip_enq as remque is to insque.
617  */
618 void
619 ip_deq(p)
620 	register struct ipasfrag *p;
621 {
622 
623 	p->ipf_prev->ipf_next = p->ipf_next;
624 	p->ipf_next->ipf_prev = p->ipf_prev;
625 }
626 
627 /*
628  * IP timer processing;
629  * if a timer expires on a reassembly
630  * queue, discard it.
631  */
632 void
633 ip_slowtimo()
634 {
635 	register struct ipq *fp;
636 	int s = splnet();
637 
638 	fp = ipq.next;
639 	if (fp == 0) {
640 		splx(s);
641 		return;
642 	}
643 	while (fp != &ipq) {
644 		--fp->ipq_ttl;
645 		fp = fp->next;
646 		if (fp->prev->ipq_ttl == 0) {
647 			ipstat.ips_fragtimeout++;
648 			ip_freef(fp->prev);
649 		}
650 	}
651 	splx(s);
652 }
653 
654 /*
655  * Drain off all datagram fragments.
656  */
657 void
658 ip_drain()
659 {
660 
661 	while (ipq.next != &ipq) {
662 		ipstat.ips_fragdropped++;
663 		ip_freef(ipq.next);
664 	}
665 }
666 
667 /*
668  * Do option processing on a datagram,
669  * possibly discarding it if bad options are encountered,
670  * or forwarding it if source-routed.
671  * Returns 1 if packet has been forwarded/freed,
672  * 0 if the packet should be processed further.
673  */
674 int
675 ip_dooptions(m)
676 	struct mbuf *m;
677 {
678 	register struct ip *ip = mtod(m, struct ip *);
679 	register u_char *cp;
680 	register struct ip_timestamp *ipt;
681 	register struct in_ifaddr *ia;
682 	int opt, optlen, cnt, off, code, type = ICMP_PARAMPROB, forward = 0;
683 	struct in_addr *sin, dst;
684 	n_time ntime;
685 
686 	dst = ip->ip_dst;
687 	cp = (u_char *)(ip + 1);
688 	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
689 	for (; cnt > 0; cnt -= optlen, cp += optlen) {
690 		opt = cp[IPOPT_OPTVAL];
691 		if (opt == IPOPT_EOL)
692 			break;
693 		if (opt == IPOPT_NOP)
694 			optlen = 1;
695 		else {
696 			optlen = cp[IPOPT_OLEN];
697 			if (optlen <= 0 || optlen > cnt) {
698 				code = &cp[IPOPT_OLEN] - (u_char *)ip;
699 				goto bad;
700 			}
701 		}
702 		switch (opt) {
703 
704 		default:
705 			break;
706 
707 		/*
708 		 * Source routing with record.
709 		 * Find interface with current destination address.
710 		 * If none on this machine then drop if strictly routed,
711 		 * or do nothing if loosely routed.
712 		 * Record interface address and bring up next address
713 		 * component.  If strictly routed make sure next
714 		 * address is on directly accessible net.
715 		 */
716 		case IPOPT_LSRR:
717 		case IPOPT_SSRR:
718 			if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
719 				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
720 				goto bad;
721 			}
722 			ipaddr.sin_addr = ip->ip_dst;
723 			ia = (struct in_ifaddr *)
724 				ifa_ifwithaddr((struct sockaddr *)&ipaddr);
725 			if (ia == 0) {
726 				if (opt == IPOPT_SSRR) {
727 					type = ICMP_UNREACH;
728 					code = ICMP_UNREACH_SRCFAIL;
729 					goto bad;
730 				}
731 				/*
732 				 * Loose routing, and not at next destination
733 				 * yet; nothing to do except forward.
734 				 */
735 				break;
736 			}
737 			off--;			/* 0 origin */
738 			if (off > optlen - sizeof(struct in_addr)) {
739 				/*
740 				 * End of source route.  Should be for us.
741 				 */
742 				save_rte(cp, ip->ip_src);
743 				break;
744 			}
745 
746 			if (!ip_dosourceroute) {
747 				char buf[4*sizeof "123"];
748 				strcpy(buf, inet_ntoa(ip->ip_dst));
749 
750 				log(LOG_WARNING,
751 				    "attempted source route from %s to %s\n",
752 				    inet_ntoa(ip->ip_src), buf);
753 				type = ICMP_UNREACH;
754 				code = ICMP_UNREACH_SRCFAIL;
755 				goto bad;
756 			}
757 
758 			/*
759 			 * locate outgoing interface
760 			 */
761 			(void)memcpy(&ipaddr.sin_addr, cp + off,
762 			    sizeof(ipaddr.sin_addr));
763 
764 			if (opt == IPOPT_SSRR) {
765 #define	INA	struct in_ifaddr *
766 #define	SA	struct sockaddr *
767 			    if ((ia = (INA)ifa_ifwithdstaddr((SA)&ipaddr)) == 0)
768 				ia = (INA)ifa_ifwithnet((SA)&ipaddr);
769 			} else
770 				ia = ip_rtaddr(ipaddr.sin_addr);
771 			if (ia == 0) {
772 				type = ICMP_UNREACH;
773 				code = ICMP_UNREACH_SRCFAIL;
774 				goto bad;
775 			}
776 			ip->ip_dst = ipaddr.sin_addr;
777 			(void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
778 			    sizeof(struct in_addr));
779 			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
780 			/*
781 			 * Let ip_intr's mcast routing check handle mcast pkts
782 			 */
783 			forward = !IN_MULTICAST(ntohl(ip->ip_dst.s_addr));
784 			break;
785 
786 		case IPOPT_RR:
787 			if ((off = cp[IPOPT_OFFSET]) < IPOPT_MINOFF) {
788 				code = &cp[IPOPT_OFFSET] - (u_char *)ip;
789 				goto bad;
790 			}
791 			/*
792 			 * If no space remains, ignore.
793 			 */
794 			off--;			/* 0 origin */
795 			if (off > optlen - sizeof(struct in_addr))
796 				break;
797 			(void)memcpy(&ipaddr.sin_addr, &ip->ip_dst,
798 			    sizeof(ipaddr.sin_addr));
799 			/*
800 			 * locate outgoing interface; if we're the destination,
801 			 * use the incoming interface (should be same).
802 			 */
803 			if ((ia = (INA)ifa_ifwithaddr((SA)&ipaddr)) == 0 &&
804 			    (ia = ip_rtaddr(ipaddr.sin_addr)) == 0) {
805 				type = ICMP_UNREACH;
806 				code = ICMP_UNREACH_HOST;
807 				goto bad;
808 			}
809 			(void)memcpy(cp + off, &(IA_SIN(ia)->sin_addr),
810 			    sizeof(struct in_addr));
811 			cp[IPOPT_OFFSET] += sizeof(struct in_addr);
812 			break;
813 
814 		case IPOPT_TS:
815 			code = cp - (u_char *)ip;
816 			ipt = (struct ip_timestamp *)cp;
817 			if (ipt->ipt_len < 5)
818 				goto bad;
819 			if (ipt->ipt_ptr > ipt->ipt_len - sizeof (long)) {
820 				if (++ipt->ipt_oflw == 0)
821 					goto bad;
822 				break;
823 			}
824 			sin = (struct in_addr *)(cp + ipt->ipt_ptr - 1);
825 			switch (ipt->ipt_flg) {
826 
827 			case IPOPT_TS_TSONLY:
828 				break;
829 
830 			case IPOPT_TS_TSANDADDR:
831 				if (ipt->ipt_ptr + sizeof(n_time) +
832 				    sizeof(struct in_addr) > ipt->ipt_len)
833 					goto bad;
834 				ipaddr.sin_addr = dst;
835 				ia = (INA)ifaof_ifpforaddr((SA)&ipaddr,
836 							    m->m_pkthdr.rcvif);
837 				if (ia == 0)
838 					continue;
839 				(void)memcpy(sin, &IA_SIN(ia)->sin_addr,
840 				    sizeof(struct in_addr));
841 				ipt->ipt_ptr += sizeof(struct in_addr);
842 				break;
843 
844 			case IPOPT_TS_PRESPEC:
845 				if (ipt->ipt_ptr + sizeof(n_time) +
846 				    sizeof(struct in_addr) > ipt->ipt_len)
847 					goto bad;
848 				(void)memcpy(&ipaddr.sin_addr, sin,
849 				    sizeof(struct in_addr));
850 				if (ifa_ifwithaddr((SA)&ipaddr) == 0)
851 					continue;
852 				ipt->ipt_ptr += sizeof(struct in_addr);
853 				break;
854 
855 			default:
856 				goto bad;
857 			}
858 			ntime = iptime();
859 			(void)memcpy(cp + ipt->ipt_ptr - 1, &ntime,
860 			    sizeof(n_time));
861 			ipt->ipt_ptr += sizeof(n_time);
862 		}
863 	}
864 	if (forward) {
865 		ip_forward(m, 1);
866 		return (1);
867 	}
868 	return (0);
869 bad:
870 	ip->ip_len -= ip->ip_hl << 2;   /* XXX icmp_error adds in hdr length */
871 	icmp_error(m, type, code, 0, 0);
872 	ipstat.ips_badoptions++;
873 	return (1);
874 }
875 
876 /*
877  * Given address of next destination (final or next hop),
878  * return internet address info of interface to be used to get there.
879  */
880 struct in_ifaddr *
881 ip_rtaddr(dst)
882 	 struct in_addr dst;
883 {
884 	register struct sockaddr_in *sin;
885 
886 	sin = (struct sockaddr_in *) &ipforward_rt.ro_dst;
887 
888 	if (ipforward_rt.ro_rt == 0 || dst.s_addr != sin->sin_addr.s_addr) {
889 		if (ipforward_rt.ro_rt) {
890 			RTFREE(ipforward_rt.ro_rt);
891 			ipforward_rt.ro_rt = 0;
892 		}
893 		sin->sin_family = AF_INET;
894 		sin->sin_len = sizeof(*sin);
895 		sin->sin_addr = dst;
896 
897 		rtalloc_ign(&ipforward_rt, RTF_PRCLONING);
898 	}
899 	if (ipforward_rt.ro_rt == 0)
900 		return ((struct in_ifaddr *)0);
901 	return ((struct in_ifaddr *) ipforward_rt.ro_rt->rt_ifa);
902 }
903 
904 /*
905  * Save incoming source route for use in replies,
906  * to be picked up later by ip_srcroute if the receiver is interested.
907  */
908 void
909 save_rte(option, dst)
910 	u_char *option;
911 	struct in_addr dst;
912 {
913 	unsigned olen;
914 
915 	olen = option[IPOPT_OLEN];
916 #ifdef DIAGNOSTIC
917 	if (ipprintfs)
918 		printf("save_rte: olen %d\n", olen);
919 #endif
920 	if (olen > sizeof(ip_srcrt) - (1 + sizeof(dst)))
921 		return;
922 	(void)memcpy(ip_srcrt.srcopt, option, olen);
923 	ip_nhops = (olen - IPOPT_OFFSET - 1) / sizeof(struct in_addr);
924 	ip_srcrt.dst = dst;
925 }
926 
927 /*
928  * Retrieve incoming source route for use in replies,
929  * in the same form used by setsockopt.
930  * The first hop is placed before the options, will be removed later.
931  */
932 struct mbuf *
933 ip_srcroute()
934 {
935 	register struct in_addr *p, *q;
936 	register struct mbuf *m;
937 
938 	if (ip_nhops == 0)
939 		return ((struct mbuf *)0);
940 	m = m_get(M_DONTWAIT, MT_SOOPTS);
941 	if (m == 0)
942 		return ((struct mbuf *)0);
943 
944 #define OPTSIZ	(sizeof(ip_srcrt.nop) + sizeof(ip_srcrt.srcopt))
945 
946 	/* length is (nhops+1)*sizeof(addr) + sizeof(nop + srcrt header) */
947 	m->m_len = ip_nhops * sizeof(struct in_addr) + sizeof(struct in_addr) +
948 	    OPTSIZ;
949 #ifdef DIAGNOSTIC
950 	if (ipprintfs)
951 		printf("ip_srcroute: nhops %d mlen %d", ip_nhops, m->m_len);
952 #endif
953 
954 	/*
955 	 * First save first hop for return route
956 	 */
957 	p = &ip_srcrt.route[ip_nhops - 1];
958 	*(mtod(m, struct in_addr *)) = *p--;
959 #ifdef DIAGNOSTIC
960 	if (ipprintfs)
961 		printf(" hops %lx", ntohl(mtod(m, struct in_addr *)->s_addr));
962 #endif
963 
964 	/*
965 	 * Copy option fields and padding (nop) to mbuf.
966 	 */
967 	ip_srcrt.nop = IPOPT_NOP;
968 	ip_srcrt.srcopt[IPOPT_OFFSET] = IPOPT_MINOFF;
969 	(void)memcpy(mtod(m, caddr_t) + sizeof(struct in_addr),
970 	    &ip_srcrt.nop, OPTSIZ);
971 	q = (struct in_addr *)(mtod(m, caddr_t) +
972 	    sizeof(struct in_addr) + OPTSIZ);
973 #undef OPTSIZ
974 	/*
975 	 * Record return path as an IP source route,
976 	 * reversing the path (pointers are now aligned).
977 	 */
978 	while (p >= ip_srcrt.route) {
979 #ifdef DIAGNOSTIC
980 		if (ipprintfs)
981 			printf(" %lx", ntohl(q->s_addr));
982 #endif
983 		*q++ = *p--;
984 	}
985 	/*
986 	 * Last hop goes to final destination.
987 	 */
988 	*q = ip_srcrt.dst;
989 #ifdef DIAGNOSTIC
990 	if (ipprintfs)
991 		printf(" %lx\n", ntohl(q->s_addr));
992 #endif
993 	return (m);
994 }
995 
996 /*
997  * Strip out IP options, at higher
998  * level protocol in the kernel.
999  * Second argument is buffer to which options
1000  * will be moved, and return value is their length.
1001  * XXX should be deleted; last arg currently ignored.
1002  */
1003 void
1004 ip_stripoptions(m, mopt)
1005 	register struct mbuf *m;
1006 	struct mbuf *mopt;
1007 {
1008 	register int i;
1009 	struct ip *ip = mtod(m, struct ip *);
1010 	register caddr_t opts;
1011 	int olen;
1012 
1013 	olen = (ip->ip_hl<<2) - sizeof (struct ip);
1014 	opts = (caddr_t)(ip + 1);
1015 	i = m->m_len - (sizeof (struct ip) + olen);
1016 	bcopy(opts + olen, opts, (unsigned)i);
1017 	m->m_len -= olen;
1018 	if (m->m_flags & M_PKTHDR)
1019 		m->m_pkthdr.len -= olen;
1020 	ip->ip_hl = sizeof(struct ip) >> 2;
1021 }
1022 
1023 u_char inetctlerrmap[PRC_NCMDS] = {
1024 	0,		0,		0,		0,
1025 	0,		EMSGSIZE,	EHOSTDOWN,	EHOSTUNREACH,
1026 	EHOSTUNREACH,	EHOSTUNREACH,	ECONNREFUSED,	ECONNREFUSED,
1027 	EMSGSIZE,	EHOSTUNREACH,	0,		0,
1028 	0,		0,		0,		0,
1029 	ENOPROTOOPT
1030 };
1031 
1032 /*
1033  * Forward a packet.  If some error occurs return the sender
1034  * an icmp packet.  Note we can't always generate a meaningful
1035  * icmp message because icmp doesn't have a large enough repertoire
1036  * of codes and types.
1037  *
1038  * If not forwarding, just drop the packet.  This could be confusing
1039  * if ipforwarding was zero but some routing protocol was advancing
1040  * us as a gateway to somewhere.  However, we must let the routing
1041  * protocol deal with that.
1042  *
1043  * The srcrt parameter indicates whether the packet is being forwarded
1044  * via a source route.
1045  */
1046 void
1047 ip_forward(m, srcrt)
1048 	struct mbuf *m;
1049 	int srcrt;
1050 {
1051 	register struct ip *ip = mtod(m, struct ip *);
1052 	register struct sockaddr_in *sin;
1053 	register struct rtentry *rt;
1054 	int error, type = 0, code = 0;
1055 	struct mbuf *mcopy;
1056 	n_long dest;
1057 	struct ifnet *destifp;
1058 
1059 	dest = 0;
1060 #ifdef DIAGNOSTIC
1061 	if (ipprintfs)
1062 		printf("forward: src %lx dst %lx ttl %x\n",
1063 			ip->ip_src.s_addr, ip->ip_dst.s_addr, ip->ip_ttl);
1064 #endif
1065 
1066 
1067 	if (m->m_flags & M_BCAST || in_canforward(ip->ip_dst) == 0) {
1068 		ipstat.ips_cantforward++;
1069 		m_freem(m);
1070 		return;
1071 	}
1072 	HTONS(ip->ip_id);
1073 	if (ip->ip_ttl <= IPTTLDEC) {
1074 		icmp_error(m, ICMP_TIMXCEED, ICMP_TIMXCEED_INTRANS, dest, 0);
1075 		return;
1076 	}
1077 	ip->ip_ttl -= IPTTLDEC;
1078 
1079 	sin = (struct sockaddr_in *)&ipforward_rt.ro_dst;
1080 	if ((rt = ipforward_rt.ro_rt) == 0 ||
1081 	    ip->ip_dst.s_addr != sin->sin_addr.s_addr) {
1082 		if (ipforward_rt.ro_rt) {
1083 			RTFREE(ipforward_rt.ro_rt);
1084 			ipforward_rt.ro_rt = 0;
1085 		}
1086 		sin->sin_family = AF_INET;
1087 		sin->sin_len = sizeof(*sin);
1088 		sin->sin_addr = ip->ip_dst;
1089 
1090 		rtalloc_ign(&ipforward_rt, RTF_PRCLONING);
1091 		if (ipforward_rt.ro_rt == 0) {
1092 			icmp_error(m, ICMP_UNREACH, ICMP_UNREACH_HOST, dest, 0);
1093 			return;
1094 		}
1095 		rt = ipforward_rt.ro_rt;
1096 	}
1097 
1098 	/*
1099 	 * Save at most 64 bytes of the packet in case
1100 	 * we need to generate an ICMP message to the src.
1101 	 */
1102 	mcopy = m_copy(m, 0, imin((int)ip->ip_len, 64));
1103 
1104 	/*
1105 	 * If forwarding packet using same interface that it came in on,
1106 	 * perhaps should send a redirect to sender to shortcut a hop.
1107 	 * Only send redirect if source is sending directly to us,
1108 	 * and if packet was not source routed (or has any options).
1109 	 * Also, don't send redirect if forwarding using a default route
1110 	 * or a route modified by a redirect.
1111 	 */
1112 #define	satosin(sa)	((struct sockaddr_in *)(sa))
1113 	if (rt->rt_ifp == m->m_pkthdr.rcvif &&
1114 	    (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
1115 	    satosin(rt_key(rt))->sin_addr.s_addr != 0 &&
1116 	    ipsendredirects && !srcrt) {
1117 #define	RTA(rt)	((struct in_ifaddr *)(rt->rt_ifa))
1118 		u_long src = ntohl(ip->ip_src.s_addr);
1119 
1120 		if (RTA(rt) &&
1121 		    (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) {
1122 		    if (rt->rt_flags & RTF_GATEWAY)
1123 			dest = satosin(rt->rt_gateway)->sin_addr.s_addr;
1124 		    else
1125 			dest = ip->ip_dst.s_addr;
1126 		    /* Router requirements says to only send host redirects */
1127 		    type = ICMP_REDIRECT;
1128 		    code = ICMP_REDIRECT_HOST;
1129 #ifdef DIAGNOSTIC
1130 		    if (ipprintfs)
1131 		        printf("redirect (%d) to %lx\n", code, (u_long)dest);
1132 #endif
1133 		}
1134 	}
1135 
1136 	error = ip_output(m, (struct mbuf *)0, &ipforward_rt, IP_FORWARDING
1137 #ifdef DIRECTED_BROADCAST
1138 			    | IP_ALLOWBROADCAST
1139 #endif
1140 						, 0);
1141 	if (error)
1142 		ipstat.ips_cantforward++;
1143 	else {
1144 		ipstat.ips_forward++;
1145 		if (type)
1146 			ipstat.ips_redirectsent++;
1147 		else {
1148 			if (mcopy)
1149 				m_freem(mcopy);
1150 			return;
1151 		}
1152 	}
1153 	if (mcopy == NULL)
1154 		return;
1155 	destifp = NULL;
1156 
1157 	switch (error) {
1158 
1159 	case 0:				/* forwarded, but need redirect */
1160 		/* type, code set above */
1161 		break;
1162 
1163 	case ENETUNREACH:		/* shouldn't happen, checked above */
1164 	case EHOSTUNREACH:
1165 	case ENETDOWN:
1166 	case EHOSTDOWN:
1167 	default:
1168 		type = ICMP_UNREACH;
1169 		code = ICMP_UNREACH_HOST;
1170 		break;
1171 
1172 	case EMSGSIZE:
1173 		type = ICMP_UNREACH;
1174 		code = ICMP_UNREACH_NEEDFRAG;
1175 		if (ipforward_rt.ro_rt)
1176 			destifp = ipforward_rt.ro_rt->rt_ifp;
1177 		ipstat.ips_cantfrag++;
1178 		break;
1179 
1180 	case ENOBUFS:
1181 		type = ICMP_SOURCEQUENCH;
1182 		code = 0;
1183 		break;
1184 	}
1185 	icmp_error(mcopy, type, code, dest, destifp);
1186 }
1187 
1188 int
1189 ip_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
1190 	int *name;
1191 	u_int namelen;
1192 	void *oldp;
1193 	size_t *oldlenp;
1194 	void *newp;
1195 	size_t newlen;
1196 {
1197 	/* All sysctl names at this level are terminal. */
1198 	if (namelen != 1)
1199 		return (ENOTDIR);
1200 
1201 	switch (name[0]) {
1202 	case IPCTL_FORWARDING:
1203 		return (sysctl_int(oldp, oldlenp, newp, newlen, &ipforwarding));
1204 	case IPCTL_SENDREDIRECTS:
1205 		return (sysctl_int(oldp, oldlenp, newp, newlen,
1206 			&ipsendredirects));
1207 	case IPCTL_DEFTTL:
1208 		return (sysctl_int(oldp, oldlenp, newp, newlen, &ip_defttl));
1209 	case IPCTL_SOURCEROUTE:
1210 		return (sysctl_int(oldp, oldlenp, newp, newlen,
1211 				   &ip_dosourceroute));
1212 #ifdef notyet
1213 	case IPCTL_DEFMTU:
1214 		return (sysctl_int(oldp, oldlenp, newp, newlen, &ip_mtu));
1215 #endif
1216 	case IPCTL_RTEXPIRE:
1217 		return (sysctl_int(oldp, oldlenp, newp, newlen,
1218 				   &rtq_reallyold));
1219 	case IPCTL_RTMINEXPIRE:
1220 		return (sysctl_int(oldp, oldlenp, newp, newlen,
1221 				   &rtq_minreallyold));
1222 	case IPCTL_RTMAXCACHE:
1223 		return (sysctl_int(oldp, oldlenp, newp, newlen,
1224 				   &rtq_toomany));
1225 	default:
1226 		return (EOPNOTSUPP);
1227 	}
1228 	/* NOTREACHED */
1229 }
1230 
1231 int
1232 ip_rsvp_init(struct socket *so)
1233 {
1234 	if (so->so_type != SOCK_RAW ||
1235 	    so->so_proto->pr_protocol != IPPROTO_RSVP)
1236 	  return EOPNOTSUPP;
1237 
1238 	if (ip_rsvpd != NULL)
1239 	  return EADDRINUSE;
1240 
1241 	ip_rsvpd = so;
1242 
1243 	return 0;
1244 }
1245 
1246 int
1247 ip_rsvp_done(void)
1248 {
1249 	ip_rsvpd = NULL;
1250 	return 0;
1251 }
1252