xref: /illumos-gate/usr/src/stand/lib/inet/ipv4.c (revision 8b80e8cb6855118d46f605e91b5ed4ce83417395)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  *
25  * ipv4.c, Code implementing the IPv4 internet protocol.
26  */
27 
28 #pragma ident	"%Z%%M%	%I%	%E% SMI"
29 
30 #include <sys/types.h>
31 #include <socket_impl.h>
32 #include <socket_inet.h>
33 #include <sys/sysmacros.h>
34 #include <sys/socket.h>
35 #include <netinet/in_systm.h>
36 #include <netinet/in.h>
37 #include <netinet/ip.h>
38 #include <netinet/udp.h>
39 #include <net/if_arp.h>
40 #include <sys/promif.h>
41 #include <sys/bootconf.h>
42 #include <sys/fcntl.h>
43 #include <sys/salib.h>
44 
45 #include "icmp4.h"
46 #include "ipv4.h"
47 #include "ipv4_impl.h"
48 #include "mac.h"
49 #include "mac_impl.h"
50 #include "v4_sum_impl.h"
51 #include <sys/bootdebug.h>
52 
53 static struct ip_frag	fragment[FRAG_MAX];	/* ip fragment buffers */
54 static int		fragments;		/* Number of fragments */
55 static uint8_t		ttl = MAXTTL;		/* IP ttl */
56 static struct in_addr	myip;			/* our network-order IP addr */
57 static struct in_addr	mynet;			/* net-order netaddr */
58 static struct in_addr	netmask =
59 	{ 0xff, 0xff, 0xff, 0xff };		/* our network-order netmask */
60 static boolean_t	netmask_set = B_FALSE;	/* has anyone set netmask? */
61 static struct in_addr	defaultrouter;		/* net-order defaultrouter */
62 static int		promiscuous;		/* promiscuous mode */
63 static struct routing table[IPV4_ROUTE_TABLE_SIZE];
64 
65 static uint16_t	g_ip_id;
66 
67 #ifdef	DEBUG
68 #define	FRAG_DEBUG
69 #endif	/* DEBUG */
70 
71 #ifdef FRAG_DEBUG
72 /*
73  * display the fragment list. For debugging purposes.
74  */
75 static void
76 frag_disp(uint16_t size)
77 {
78 	int	i;
79 	uint_t	total = 0;
80 
81 	printf("Dumping fragment info: (%d)\n\n", fragments);
82 	printf("More:\tOffset:\tDatap:\t\tIPid:\t\tIPlen:\tIPhlen:\n");
83 	for (i = 0; i < FRAG_MAX; i++) {
84 		if (fragment[i].mp == NULL)
85 			continue;
86 		printf("%d\t%d\t0x%x\t%d\t\t%d\t%d\n", fragment[i].more,
87 		    fragment[i].offset, fragment[i].mp->b_rptr,
88 		    fragment[i].ipid, fragment[i].iplen, fragment[i].iphlen);
89 		total += (fragment[i].iplen - fragment[i].iphlen);
90 	}
91 	printf("Total length is: %d. It should be: %d\n\n", total, size);
92 }
93 #endif /* FRAG_DEBUG */
94 
95 /*
96  * This function returns index of fragment 0 of the current fragmented DGRAM
97  * (which would contain the transport header). Return the fragment number
98  * for success, -1 if we don't yet have the first fragment.
99  */
100 static int
101 frag_first(void)
102 {
103 	int		i;
104 
105 	if (fragments == 0)
106 		return (-1);
107 
108 	for (i = 0; i < FRAG_MAX; i++) {
109 		if (fragment[i].mp != NULL && fragment[i].offset == 0)
110 			return (i);
111 	}
112 	return (-1);
113 }
114 
115 /*
116  * This function returns index of the last fragment of the current DGRAM.
117  * Returns the fragment number for success, -1 if we don't yet have the
118  * last fragment.
119  */
120 static int
121 frag_last(void)
122 {
123 	int		i;
124 
125 	if (fragments == 0)
126 		return (-1);
127 
128 	for (i = 0; i < FRAG_MAX; i++) {
129 		if (fragment[i].mp != NULL && !fragment[i].more)
130 			return (i);
131 	}
132 	return (-1);
133 }
134 
135 /*
136  * This function adds a fragment to the current pkt fragment list. Returns
137  * FRAG_NOSLOTS if there are no more slots, FRAG_DUP if the fragment is
138  * a duplicate, or FRAG_SUCCESS if it is successful.
139  */
140 static int
141 frag_add(int16_t offset, mblk_t *mp, uint16_t ipid,
142     int16_t iplen, int16_t iphlen, uint8_t ipp)
143 {
144 	int	i;
145 	int16_t	true_offset = IPV4_OFFSET(offset);
146 
147 	/* first pass - look for duplicates */
148 	for (i = 0; i < FRAG_MAX; i++) {
149 		if (fragment[i].mp != NULL &&
150 		    fragment[i].offset == true_offset)
151 			return (FRAG_DUP);
152 	}
153 
154 	/* second pass - fill in empty slot */
155 	for (i = 0; i < FRAG_MAX; i++) {
156 		if (fragment[i].mp == NULL) {
157 			fragment[i].more = (offset & IP_MF);
158 			fragment[i].offset = true_offset;
159 			fragment[i].mp = mp;
160 			fragment[i].ipid = ipid;
161 			fragment[i].iplen = iplen;
162 			fragment[i].iphlen = iphlen;
163 			fragment[i].ipp = ipp;
164 			fragments++;
165 			return (FRAG_SUCCESS);
166 		}
167 	}
168 	return (FRAG_NOSLOTS);
169 }
170 
171 /*
172  * Nuke a fragment.
173  */
174 static void
175 frag_free(int index)
176 {
177 	if (fragment[index].mp != NULL) {
178 		freeb(fragment[index].mp);
179 		fragments--;
180 	}
181 	bzero((caddr_t)&fragment[index], sizeof (struct ip_frag));
182 }
183 
184 /*
185  * zero the frag list.
186  */
187 static void
188 frag_flush(void)
189 {
190 	int i;
191 
192 	for (i = 0; i < FRAG_MAX; i++)
193 		frag_free(i);
194 
195 	fragments = 0;
196 }
197 
198 /*
199  * Analyze the fragment list - see if we captured all our fragments.
200  *
201  * Returns TRUE if we've got all the fragments, and FALSE if we don't.
202  */
203 static int
204 frag_chk(void)
205 {
206 	int		i, first_frag, last_frag;
207 	int16_t		actual, total;
208 	uint16_t	ip_id;
209 	uint8_t		ipp;
210 
211 	if (fragments == 0 || (first_frag = frag_first()) < 0 ||
212 	    (last_frag = frag_last()) < 0)
213 		return (FALSE);
214 
215 	/*
216 	 * Validate the ipid's of our fragments - nuke those that don't
217 	 * match the id of the first fragment or don't match the IP
218 	 * protocol of the first fragment.
219 	 */
220 	ip_id = fragment[first_frag].ipid;
221 	ipp = fragment[first_frag].ipp;
222 	for (i = 0; i < FRAG_MAX; i++) {
223 		if (fragment[i].mp != NULL && ip_id != fragment[i].ipid &&
224 			fragment[i].ipp != ipp) {
225 #ifdef FRAG_DEBUG
226 			printf("ipv4: Frag id mismatch: %x != %x\n",
227 			    fragment[i].ipid, ip_id);
228 #endif /* FRAG_DEBUG */
229 			frag_free(i);
230 		}
231 	}
232 
233 	if (frag_last() < 0)
234 		return (FALSE);
235 
236 	total = fragment[last_frag].offset + fragment[last_frag].iplen -
237 	    fragment[last_frag].iphlen;
238 
239 	for (i = 0, actual = 0; i < FRAG_MAX; i++)
240 		actual += (fragment[i].iplen - fragment[i].iphlen);
241 
242 #ifdef FRAG_DEBUG
243 	frag_disp(total);
244 #endif /* FRAG_DEBUG */
245 
246 	return (total == actual);
247 }
248 
249 /*
250  * Load the assembled fragments into igp. Returns 0 for success, nonzero
251  * otherwise.
252  */
253 static int
254 frag_load(struct inetgram *igp)
255 {
256 	int	i;
257 	int16_t	len;
258 	uint_t	total_len;
259 	boolean_t first_frag = B_FALSE;
260 	mblk_t *mp;
261 	struct ip *iph;
262 	int first_iph_len;
263 
264 	if (fragments == 0)
265 		return (ENOENT);
266 
267 	mp = igp->igm_mp;
268 	/* Get the IP header length of the first fragment. */
269 	i = frag_first();
270 	assert(i >= 0);
271 	first_iph_len = fragment[i].iphlen;
272 	for (i = 0, len = 0, total_len = 0; i < FRAG_MAX; i++) {
273 		if (fragment[i].mp != NULL) {
274 			/*
275 			 * Copy just the data (omit the ip header of all
276 			 * fragments except the first one which contains
277 			 * all the info...)
278 			 */
279 			if (fragment[i].offset == 0) {
280 				len = fragment[i].iplen;
281 				first_frag = B_TRUE;
282 			} else {
283 				len = fragment[i].iplen - fragment[i].iphlen;
284 			}
285 			total_len += len;
286 			if (total_len > mp->b_size)
287 				return (E2BIG);
288 			if (first_frag) {
289 				bcopy((caddr_t)(fragment[i].mp->b_rptr),
290 				    (caddr_t)mp->b_rptr, len);
291 				first_frag = B_FALSE;
292 			} else {
293 				bcopy((caddr_t)(fragment[i].mp->b_rptr +
294 				    fragment[i].iphlen),
295 				    (caddr_t)(mp->b_rptr + first_iph_len +
296 				    fragment[i].offset), len);
297 			}
298 			mp->b_wptr += len;
299 		}
300 	}
301 	/* Fix the total length in the IP header. */
302 	iph = (struct ip *)mp->b_rptr;
303 	iph->ip_len = htons(total_len);
304 	return (0);
305 }
306 
307 /*
308  * Locate a routing table entry based upon arguments. IP addresses expected
309  * in network order. Returns index for success, -1 if entry not found.
310  */
311 static int
312 find_route(uint8_t *flagp, struct in_addr *destp, struct in_addr *gatewayp)
313 {
314 	int i, table_entry = -1;
315 
316 	for (i = 0; table_entry == -1 && i < IPV4_ROUTE_TABLE_SIZE; i++) {
317 		if (flagp != NULL) {
318 			if (*flagp & table[i].flag)
319 				table_entry = i;
320 		}
321 		if (destp != NULL) {
322 			if (destp->s_addr == table[i].dest.s_addr)
323 				table_entry = i;
324 			else
325 				table_entry = -1;
326 		}
327 		if (gatewayp != NULL) {
328 			if (gatewayp->s_addr == table[i].gateway.s_addr)
329 				table_entry = i;
330 			else
331 				table_entry = -1;
332 		}
333 	}
334 	return (table_entry);
335 }
336 
337 /*
338  * ADD or DEL a routing table entry. Returns 0 for success, -1 and errno
339  * otherwise. IP addresses are expected in network order.
340  */
341 int
342 ipv4_route(int cmd, uint8_t flag, struct in_addr *destp,
343     struct in_addr *gatewayp)
344 {
345 	static	int	routing_table_initialized;
346 	int		index;
347 	uint8_t 	tmp_flag;
348 
349 	if (gatewayp == NULL) {
350 		errno = EINVAL;
351 		return (-1);
352 	}
353 
354 	/* initialize routing table */
355 	if (routing_table_initialized == 0) {
356 		for (index = 0; index < IPV4_ROUTE_TABLE_SIZE; index++)
357 			table[index].flag = RT_UNUSED;
358 		routing_table_initialized = 1;
359 	}
360 
361 	switch (cmd) {
362 	case IPV4_ADD_ROUTE:
363 		tmp_flag = (uint8_t)RT_UNUSED;
364 		if ((index = find_route(&tmp_flag, NULL, NULL)) == -1) {
365 			dprintf("ipv4_route: routing table full.\n");
366 			errno = ENOSPC;
367 			return (-1);
368 		}
369 		table[index].flag = flag;
370 		if (destp != NULL)
371 			table[index].dest.s_addr = destp->s_addr;
372 		else
373 			table[index].dest.s_addr = htonl(INADDR_ANY);
374 		table[index].gateway.s_addr = gatewayp->s_addr;
375 		break;
376 	case IPV4_BAD_ROUTE:
377 		/* FALLTHRU */
378 	case IPV4_DEL_ROUTE:
379 		if ((index = find_route(&flag, destp, gatewayp)) == -1) {
380 			dprintf("ipv4_route: No such routing entry.\n");
381 			errno = ENOENT;
382 			return (-1);
383 		}
384 		if (cmd == IPV4_DEL_ROUTE) {
385 			table[index].flag = RT_UNUSED;
386 			table[index].dest.s_addr = htonl(INADDR_ANY);
387 			table[index].gateway.s_addr = htonl(INADDR_ANY);
388 		} else
389 			table[index].flag = RT_NG;
390 	default:
391 		errno = EINVAL;
392 		return (-1);
393 	}
394 	return (0);
395 }
396 
397 /*
398  * Return gateway to destination. Returns gateway IP address in network order
399  * for success, NULL if no route to destination exists.
400  */
401 struct in_addr *
402 ipv4_get_route(uint8_t flag, struct in_addr *destp, struct in_addr *gatewayp)
403 {
404 	int index;
405 	if ((index = find_route(&flag, destp, gatewayp)) == -1)
406 		return (NULL);
407 	return (&table[index].gateway);
408 }
409 
410 /*
411  * Initialize the IPv4 generic parts of the socket, as well as the routing
412  * table.
413  */
414 void
415 ipv4_socket_init(struct inetboot_socket *isp)
416 {
417 	isp->input[NETWORK_LVL] = ipv4_input;
418 	isp->output[NETWORK_LVL] = ipv4_output;
419 	isp->close[NETWORK_LVL] = NULL;
420 	isp->headerlen[NETWORK_LVL] = ipv4_header_len;
421 }
422 
423 /*
424  * Initialize a raw ipv4 socket.
425  */
426 void
427 ipv4_raw_socket(struct inetboot_socket *isp, uint8_t proto)
428 {
429 	isp->type = INETBOOT_RAW;
430 	if (proto == 0)
431 		isp->proto = IPPROTO_IP;
432 	else
433 		isp->proto = proto;
434 	isp->input[TRANSPORT_LVL] = NULL;
435 	isp->output[TRANSPORT_LVL] = NULL;
436 	isp->headerlen[TRANSPORT_LVL] = NULL;
437 	isp->ports = NULL;
438 }
439 
440 /*
441  * Return the size of an IPv4 header (no options)
442  */
443 /* ARGSUSED */
444 int
445 ipv4_header_len(struct inetgram *igm)
446 {
447 	return (sizeof (struct ip));
448 }
449 
450 /*
451  * Set our source address.
452  * Argument is assumed to be host order.
453  */
454 void
455 ipv4_setipaddr(struct in_addr *ip)
456 {
457 	myip.s_addr = htonl(ip->s_addr);
458 }
459 
460 /*
461  * Returns our current source address in host order.
462  */
463 void
464 ipv4_getipaddr(struct in_addr *ip)
465 {
466 	ip->s_addr = ntohl(myip.s_addr);
467 }
468 
469 /*
470  * Set our netmask.
471  * Argument is assumed to be host order.
472  */
473 void
474 ipv4_setnetmask(struct in_addr *ip)
475 {
476 	netmask_set = B_TRUE;
477 	netmask.s_addr = htonl(ip->s_addr);
478 	mynet.s_addr = netmask.s_addr & myip.s_addr; /* implicit */
479 }
480 
481 void
482 ipv4_getnetid(struct in_addr *my_netid)
483 {
484 	struct in_addr my_netmask;
485 	if (mynet.s_addr != 0)
486 		my_netid->s_addr = ntohl(mynet.s_addr);
487 	else {
488 		ipv4_getnetmask(&my_netmask);
489 		my_netid->s_addr = my_netmask.s_addr & ntohl(myip.s_addr);
490 	}
491 }
492 
493 /*
494  * Returns our current netmask in host order.
495  * Neither OBP nor the standalone DHCP client mandate
496  * that the netmask be specified, so in the absence of
497  * a netmask, we attempt to derive it using class-based
498  * heuristics.
499  */
500 void
501 ipv4_getnetmask(struct in_addr *ip)
502 {
503 	if (netmask_set || (myip.s_addr == 0))
504 		ip->s_addr = ntohl(netmask.s_addr);
505 	else {
506 		/* base the netmask on our IP address */
507 		if (IN_CLASSA(ntohl(myip.s_addr)))
508 			ip->s_addr = ntohl(IN_CLASSA_NET);
509 		else if (IN_CLASSB(ntohl(myip.s_addr)))
510 			ip->s_addr = ntohl(IN_CLASSB_NET);
511 		else if (IN_CLASSC(ntohl(myip.s_addr)))
512 			ip->s_addr = ntohl(IN_CLASSC_NET);
513 		else
514 			ip->s_addr = ntohl(IN_CLASSE_NET);
515 	}
516 }
517 
518 /*
519  * Set our default router.
520  * Argument is assumed to be host order, and *MUST* be on the same network
521  * as our source IP address.
522  */
523 void
524 ipv4_setdefaultrouter(struct in_addr *ip)
525 {
526 	defaultrouter.s_addr = htonl(ip->s_addr);
527 }
528 
529 /*
530  * Returns our current default router in host order.
531  */
532 void
533 ipv4_getdefaultrouter(struct in_addr *ip)
534 {
535 	ip->s_addr = ntohl(defaultrouter.s_addr);
536 }
537 
538 /*
539  * Toggle promiscuous flag. If set, client disregards destination IP
540  * address. Otherwise, only limited broadcast, network broadcast, and
541  * unicast traffic get through. Returns previous setting.
542  */
543 int
544 ipv4_setpromiscuous(int toggle)
545 {
546 	int old = promiscuous;
547 
548 	promiscuous = toggle;
549 
550 	return (old);
551 }
552 
553 /*
554  * Set IP TTL.
555  */
556 void
557 ipv4_setmaxttl(uint8_t cttl)
558 {
559 	ttl = cttl;
560 }
561 
562 /*
563  * Convert an ipv4 address to dotted notation.
564  * Returns ptr to statically allocated buffer containing dotted string.
565  */
566 char *
567 inet_ntoa(struct in_addr ip)
568 {
569 	uint8_t *p;
570 	static char ipaddr[16];
571 
572 	p = (uint8_t *)&ip.s_addr;
573 	(void) sprintf(ipaddr, "%u.%u.%u.%u", p[0], p[1], p[2], p[3]);
574 	return (ipaddr);
575 }
576 
577 /*
578  * Construct a transport datagram from a series of IP fragments (igp == NULL)
579  * or from a single IP datagram (igp != NULL). Return the address of the
580  * contructed transport datagram.
581  */
582 struct inetgram *
583 make_trans_datagram(int index, struct inetgram *igp, struct in_addr ipsrc,
584     struct in_addr ipdst, uint16_t iphlen)
585 {
586 	uint16_t	trans_len, *transp, new_len;
587 	int		first_frag, last_frag;
588 	boolean_t	fragmented;
589 	struct inetgram	*ngp;
590 	struct ip	*iph;
591 
592 	fragmented = (igp == NULL);
593 
594 	ngp = (struct inetgram *)bkmem_zalloc(sizeof (struct inetgram));
595 	if (ngp == NULL) {
596 		errno = ENOMEM;
597 		if (fragmented)
598 			frag_flush();
599 		return (NULL);
600 	}
601 
602 	if (fragmented) {
603 		last_frag = frag_last();
604 		trans_len = fragment[last_frag].offset +
605 		    fragment[last_frag].iplen - fragment[last_frag].iphlen;
606 		first_frag = frag_first();
607 		/*
608 		 * The returned buffer contains the IP header of the
609 		 * first fragment.
610 		 */
611 		trans_len += fragment[first_frag].iphlen;
612 		transp = (uint16_t *)(fragment[first_frag].mp->b_rptr +
613 		    fragment[first_frag].iphlen);
614 	} else {
615 		/*
616 		 * Note that igm_len may not be the real length of an
617 		 * IP packet because some network interface, such as
618 		 * Ethernet, as a minimum frame size.  So we should not
619 		 * use the interface frame size to determine the
620 		 * length of an IP packet.  We should use the IP
621 		 * length field in the IP header.
622 		 */
623 		iph = (struct ip *)igp->igm_mp->b_rptr;
624 		trans_len = ntohs(iph->ip_len);
625 		transp = (uint16_t *)(igp->igm_mp->b_rptr + iphlen);
626 	}
627 
628 	ngp->igm_saddr.sin_addr.s_addr = ipsrc.s_addr;
629 	ngp->igm_saddr.sin_port = sockets[index].ports(transp, SOURCE);
630 	ngp->igm_target.s_addr = ipdst.s_addr;
631 	ngp->igm_level = TRANSPORT_LVL;
632 
633 	/*
634 	 * Align to 16bit value.  Checksum code may require an extra byte
635 	 * for padding.
636 	 */
637 	new_len = ((trans_len + sizeof (int16_t) - 1) &
638 	    ~(sizeof (int16_t) - 1));
639 	if ((ngp->igm_mp = allocb(new_len, 0)) == NULL) {
640 		errno = ENOMEM;
641 		bkmem_free((caddr_t)ngp, sizeof (struct inetgram));
642 		if (fragmented)
643 			frag_flush();
644 		return (NULL);
645 	}
646 
647 	if (fragmented) {
648 		if (frag_load(ngp) != 0) {
649 			freeb(ngp->igm_mp);
650 			bkmem_free((caddr_t)ngp, sizeof (struct inetgram));
651 			frag_flush();
652 			return (NULL);
653 		}
654 		frag_flush();
655 	} else {
656 		bcopy((caddr_t)(igp->igm_mp->b_rptr),
657 		    (caddr_t)ngp->igm_mp->b_rptr, trans_len);
658 		ngp->igm_mp->b_wptr += trans_len;
659 	}
660 	return (ngp);
661 }
662 
663 /*
664  * ipv4_input: Pull in IPv4 datagrams addressed to us. Handle IP fragmentation
665  * (fragments received in any order) and ICMP at this level.
666  *
667  * Note that because our network is serviced by polling when we expect
668  * something (upon a referenced socket), we don't go through the work of
669  * locating the appropriate socket a datagram is destined for. We'll only
670  * accept data for the referenced socket. This means we don't have
671  * asynchronous networking, but since we can't service the net using an
672  * interrupt handler, it doesn't do us any good to try to service datagrams
673  * destined for sockets other than the referenced one. Data is handled in
674  * a fifo manner.
675  *
676  * The mac layer will grab all frames for us. If we find we don't have all
677  * the necessary fragments to reassemble the datagram, we'll call the mac
678  * layer again for FRAG_ATTEMPTS to see if it has any more frames.
679  *
680  * Supported protocols: IPPROTO_IP, IPPROTO_ICMP, IPPROTO_UDP.
681  *
682  * Returns: number of NETWORK_LVL datagrams placed on socket , -1 if error
683  * occurred.
684  *
685  * Note: errno is set to ETIMEDOUT if fragment reassembly fails.
686  */
687 int
688 ipv4_input(int index)
689 {
690 	int			datagrams = 0;
691 	int			frag_stat, input_attempts = 0;
692 	uint16_t		iphlen, iplen, ip_id;
693 	int16_t			curr_off;
694 	struct ip		*iphp;
695 	struct inetgram		*igp, *newgp = NULL, *ipv4_listp = NULL;
696 	struct in_addr		ipdst, ipsrc;
697 	mblk_t			*mp;
698 	enum SockType		type;
699 
700 #ifdef	DEBUG
701 	printf("ipv4_input(%d): start ######################################\n",
702 	    index);
703 #endif	/* DEBUG */
704 
705 	frag_flush();
706 
707 ipv4_try_again:
708 
709 	while ((igp = sockets[index].inq) != NULL) {
710 		if (igp->igm_level != NETWORK_LVL) {
711 #ifdef	DEBUG
712 			printf("ipv4_input(%d): unexpected frame type: %d\n",
713 			    index, igp->igm_level);
714 #endif	/* DEBUG */
715 			del_gram(&sockets[index].inq, igp, TRUE);
716 			continue;
717 		}
718 		iphp = (struct ip *)igp->igm_mp->b_rptr;
719 		if (iphp->ip_v != IPVERSION) {
720 			dprintf("ipv4_input(%d): IPv%d datagram discarded\n",
721 			index, iphp->ip_v);
722 			del_gram(&sockets[index].inq, igp, TRUE);
723 			continue;
724 		}
725 		iphlen = iphp->ip_hl << 2;
726 		if (iphlen < sizeof (struct ip)) {
727 			dprintf("ipv4_input(%d): IP msg too short (%d < %u)\n",
728 			    index, iphlen, (uint_t)sizeof (struct ip));
729 			del_gram(&sockets[index].inq, igp, TRUE);
730 			continue;
731 		}
732 		iplen = ntohs(iphp->ip_len);
733 		if (iplen > msgdsize(igp->igm_mp)) {
734 			dprintf("ipv4_input(%d): IP len/buffer mismatch "
735 			    "(%d > %lu)\n", index, iplen, igp->igm_mp->b_size);
736 			del_gram(&sockets[index].inq, igp, TRUE);
737 			continue;
738 		}
739 
740 		bcopy((caddr_t)&(iphp->ip_dst), (caddr_t)&ipdst,
741 		    sizeof (ipdst));
742 		bcopy((caddr_t)&(iphp->ip_src), (caddr_t)&ipsrc,
743 		    sizeof (ipsrc));
744 
745 		/* igp->igm_mp->b_datap is guaranteed to be 64 bit aligned] */
746 		if (ipv4cksum((uint16_t *)iphp, iphlen) != 0) {
747 			dprintf("ipv4_input(%d): Bad IP header checksum "
748 			    "(to %s)\n", index, inet_ntoa(ipdst));
749 			del_gram(&sockets[index].inq, igp, TRUE);
750 			continue;
751 		}
752 
753 		if (!promiscuous) {
754 			/* validate destination address */
755 			if (ipdst.s_addr != htonl(INADDR_BROADCAST) &&
756 			    ipdst.s_addr != (mynet.s_addr | ~netmask.s_addr) &&
757 			    ipdst.s_addr != myip.s_addr) {
758 #ifdef	DEBUG
759 				printf("ipv4_input(%d): msg to %s discarded.\n",
760 				    index, inet_ntoa(ipdst));
761 #endif	/* DEBUG */
762 				/* not ours */
763 				del_gram(&sockets[index].inq, igp, TRUE);
764 				continue;
765 			}
766 		}
767 
768 		/* Intercept ICMP first */
769 		if (!promiscuous && (iphp->ip_p == IPPROTO_ICMP)) {
770 			icmp4(igp, iphp, iphlen, ipsrc);
771 			del_gram(&sockets[index].inq, igp, TRUE);
772 			continue;
773 		}
774 
775 #ifdef	DEBUG
776 		printf("ipv4_input(%d): processing ID: 0x%x protocol %d "
777 		    "(0x%x) (0x%x,%d)\n",
778 		    index, ntohs(iphp->ip_id), iphp->ip_p, igp, igp->igm_mp,
779 		    igp->igm_mp->b_size);
780 #endif	/* DEBUG */
781 		type = sockets[index].type;
782 		if (type == INETBOOT_RAW) {
783 			/* No fragmentation - Just the raw packet. */
784 #ifdef	DEBUG
785 			printf("ipv4_input(%d): Raw packet.\n", index);
786 #endif	/* DEBUG */
787 			del_gram(&sockets[index].inq, igp, FALSE);
788 			add_grams(&ipv4_listp, igp);
789 			igp->igm_mp->b_rptr += iphlen;
790 			igp->igm_mp->b_wptr = igp->igm_mp->b_rptr + iplen;
791 			datagrams++;
792 			continue;
793 		}
794 
795 		if ((type == INETBOOT_DGRAM && iphp->ip_p != IPPROTO_UDP) ||
796 		    (type == INETBOOT_STREAM && iphp->ip_p != IPPROTO_TCP)) {
797 			/* Wrong protocol. */
798 			dprintf("ipv4_input(%d): unexpected protocol: "
799 			    "%d for socket type %d\n", index, iphp->ip_p, type);
800 			del_gram(&sockets[index].inq, igp, TRUE);
801 			continue;
802 		}
803 
804 		/*
805 		 * The following code is common to both STREAM and DATAGRAM
806 		 * sockets.
807 		 */
808 
809 		/*
810 		 * Once we process the first fragment, we won't have
811 		 * the transport header, so we'll have to  match on
812 		 * IP id.
813 		 */
814 		curr_off = ntohs(iphp->ip_off);
815 		if ((curr_off & ~(IP_DF | IP_MF)) == 0) {
816 			uint16_t	*transp;
817 
818 			/* Validate transport header. */
819 			mp = igp->igm_mp;
820 			if ((mp->b_wptr - mp->b_rptr - iphlen) <
821 			    sockets[index].headerlen[TRANSPORT_LVL](igp)) {
822 				dprintf("ipv4_input(%d): datagram 0 "
823 				    "too small to hold transport header "
824 				    "(from %s)\n", index, inet_ntoa(ipsrc));
825 				del_gram(&sockets[index].inq, igp, TRUE);
826 				continue;
827 			}
828 
829 			/*
830 			 * check alignment - transport elements are 16
831 			 * bit aligned..
832 			 */
833 			transp = (uint16_t *)(mp->b_rptr + iphlen);
834 			if ((uintptr_t)transp % sizeof (uint16_t)) {
835 				dprintf("ipv4_input(%d): Transport "
836 				    "header is not 16-bit aligned "
837 				    "(0x%lx, from %s)\n", index, (long)transp,
838 				    inet_ntoa(ipsrc));
839 				del_gram(&sockets[index].inq, igp, TRUE);
840 				continue;
841 			}
842 
843 			if (curr_off & IP_MF) {
844 				/* fragment 0 of fragmented datagram */
845 				ip_id = ntohs(iphp->ip_id);
846 				frag_stat = frag_add(curr_off, igp->igm_mp,
847 				    ip_id, iplen, iphlen, iphp->ip_p);
848 				if (frag_stat != FRAG_SUCCESS) {
849 #ifdef	FRAG_DEBUG
850 					if (frag_stat == FRAG_DUP) {
851 						printf("ipv4_input"
852 						    "(%d): Frag dup.\n", index);
853 					} else {
854 						printf("ipv4_input"
855 						    "(%d): too many "
856 						    "frags\n", index);
857 					}
858 #endif	/* FRAG_DEBUG */
859 					del_gram(&sockets[index].inq,
860 					    igp, TRUE);
861 					continue;
862 				}
863 
864 				del_gram(&sockets[index].inq, igp, FALSE);
865 				/* keep the data, lose the inetgram */
866 				bkmem_free((caddr_t)igp,
867 				    sizeof (struct inetgram));
868 #ifdef	FRAG_DEBUG
869 				printf("ipv4_input(%d): Frag/Off/Id "
870 				    "(%d/%d/%x)\n", index, fragments,
871 				    IPV4_OFFSET(curr_off), ip_id);
872 #endif	/* FRAG_DEBUG */
873 			} else {
874 				/* Single, unfragmented datagram */
875 				newgp = make_trans_datagram(index, igp,
876 				    ipsrc, ipdst, iphlen);
877 				if (newgp != NULL) {
878 					add_grams(&ipv4_listp, newgp);
879 					datagrams++;
880 				}
881 				del_gram(&sockets[index].inq, igp,
882 				    TRUE);
883 				continue;
884 			}
885 		} else {
886 			/* fragments other than 0 */
887 			frag_stat = frag_add(curr_off, igp->igm_mp,
888 			    ntohs(iphp->ip_id), iplen, iphlen, iphp->ip_p);
889 
890 			if (frag_stat == FRAG_SUCCESS) {
891 #ifdef	FRAG_DEBUG
892 				printf("ipv4_input(%d): Frag(%d) "
893 				    "off(%d) id(%x)\n", index,
894 				    fragments, IPV4_OFFSET(curr_off),
895 				    ntohs(iphp->ip_id));
896 #endif	/* FRAG_DEBUG */
897 				del_gram(&sockets[index].inq, igp, FALSE);
898 				/* keep the data, lose the inetgram */
899 				bkmem_free((caddr_t)igp,
900 				    sizeof (struct inetgram));
901 			} else {
902 #ifdef	FRAG_DEBUG
903 				if (frag_stat == FRAG_DUP)
904 					printf("ipv4_input(%d): Frag "
905 					    "dup.\n", index);
906 				else {
907 					printf("ipv4_input(%d): too "
908 					    "many frags\n", index);
909 				}
910 #endif	/* FRAG_DEBUG */
911 				del_gram(&sockets[index].inq, igp, TRUE);
912 				continue;
913 			}
914 		}
915 
916 		/*
917 		 * Determine if we have all of the fragments.
918 		 *
919 		 * NOTE: at this point, we've placed the data in the
920 		 * fragment table, and the inetgram (igp) has been
921 		 * deleted.
922 		 */
923 		if (!frag_chk())
924 			continue;
925 
926 		newgp = make_trans_datagram(index, NULL, ipsrc, ipdst, iphlen);
927 		if (newgp == NULL)
928 			continue;
929 		add_grams(&ipv4_listp, newgp);
930 		datagrams++;
931 	}
932 	if (ipv4_listp == NULL && fragments != 0) {
933 		if (++input_attempts > FRAG_ATTEMPTS) {
934 			dprintf("ipv4_input(%d): reassembly(%d) timed out in "
935 			    "%d msecs.\n", index, fragments,
936 			    sockets[index].in_timeout * input_attempts);
937 			frag_flush();
938 			errno = ETIMEDOUT;
939 			return (-1);
940 		} else {
941 			/*
942 			 * Call the media layer again... there may be more
943 			 * packets waiting.
944 			 */
945 			if (sockets[index].input[MEDIA_LVL](index) < 0) {
946 				/* errno will be set appropriately */
947 				frag_flush();
948 				return (-1);
949 			}
950 			goto ipv4_try_again;
951 		}
952 	}
953 
954 	add_grams(&sockets[index].inq, ipv4_listp);
955 
956 	return (datagrams);
957 }
958 
959 /*
960  * ipv4_output: Generate IPv4 datagram(s) for the payload and deliver them.
961  * Routing is handled here as well, by reusing the saddr field to hold the
962  * router's IP address.
963  *
964  * We don't deal with fragmentation on the outgoing side.
965  *
966  * Arguments: index to socket, inetgram to send.
967  *
968  * Returns: 0 for success, -1 if error occurred.
969  */
970 int
971 ipv4_output(int index, struct inetgram *ogp)
972 {
973 	struct ip	*iphp;
974 	uint64_t	iphbuffer[sizeof (struct ip)];
975 
976 #ifdef	DEBUG
977 	printf("ipv4_output(%d): size %d\n", index,
978 	    ogp->igm_mp->b_wptr - ogp->igm_mp->b_rptr);
979 #endif	/* DEBUG */
980 
981 	/* we don't deal (yet) with fragmentation. Maybe never will */
982 	if ((ogp->igm_mp->b_wptr - ogp->igm_mp->b_rptr) > mac_get_mtu()) {
983 		dprintf("ipv4: datagram too big for MAC layer.\n");
984 		errno = E2BIG;
985 		return (-1);
986 	}
987 
988 	if (ogp->igm_level != NETWORK_LVL) {
989 #ifdef	DEBUG
990 		printf("ipv4_output(%d): unexpected frame type: %d\n", index,
991 		    ogp->igm_level);
992 #endif	/* DEBUG */
993 		errno = EINVAL;
994 		return (-1);
995 	}
996 
997 	if (sockets[index].out_flags & SO_DONTROUTE)
998 		ogp->igm_oflags |= MSG_DONTROUTE;
999 
1000 	iphp = (struct ip *)&iphbuffer;
1001 	iphp->ip_v = IPVERSION;
1002 	iphp->ip_hl = sizeof (struct ip) / 4;
1003 	iphp->ip_tos = 0;
1004 	iphp->ip_len = htons(ogp->igm_mp->b_wptr - ogp->igm_mp->b_rptr +
1005 	    sizeof (struct ip));
1006 	iphp->ip_id = htons(++g_ip_id);
1007 	iphp->ip_off = htons(IP_DF);
1008 	iphp->ip_p = sockets[index].proto;
1009 	iphp->ip_sum = htons(0);
1010 	iphp->ip_ttl = ttl;
1011 
1012 	/* struct copies */
1013 	iphp->ip_src = myip;
1014 	iphp->ip_dst = ogp->igm_saddr.sin_addr;
1015 
1016 	/*
1017 	 * On local / limited broadcasts, don't route. From a purist's
1018 	 * perspective, we should be setting the TTL to 1. But
1019 	 * operational experience has shown that some BOOTP relay agents
1020 	 * (ciscos) discard our packets. Furthermore, these devices also
1021 	 * *don't* reset the TTL to MAXTTL on the unicast side of the
1022 	 * BOOTP relay agent! Sigh. Thus to work correctly in these
1023 	 * environments, we leave the TTL as it has been been set by
1024 	 * the application layer, and simply don't check for a route.
1025 	 */
1026 	if (iphp->ip_dst.s_addr == htonl(INADDR_BROADCAST) ||
1027 	    (netmask.s_addr != htonl(INADDR_BROADCAST) &&
1028 	    iphp->ip_dst.s_addr == (mynet.s_addr | ~netmask.s_addr))) {
1029 		ogp->igm_oflags |= MSG_DONTROUTE;
1030 	}
1031 
1032 	/* Routing necessary? */
1033 	if ((ogp->igm_oflags & MSG_DONTROUTE) == 0 &&
1034 	    ((iphp->ip_dst.s_addr & netmask.s_addr) != mynet.s_addr)) {
1035 		struct in_addr *rip;
1036 		if ((rip = ipv4_get_route(RT_HOST, &iphp->ip_dst,
1037 		    NULL)) == NULL) {
1038 			rip = ipv4_get_route(RT_DEFAULT, NULL, NULL);
1039 		}
1040 		if (rip == NULL) {
1041 			dprintf("ipv4(%d): No route to %s.\n",
1042 			    index, inet_ntoa(iphp->ip_dst));
1043 			errno = EHOSTUNREACH;
1044 			return (-1);
1045 		}
1046 		ogp->igm_router.s_addr = rip->s_addr;
1047 	} else
1048 		ogp->igm_router.s_addr = htonl(INADDR_ANY);
1049 
1050 	iphp->ip_sum = ipv4cksum((uint16_t *)iphp, sizeof (struct ip));
1051 	ogp->igm_mp->b_rptr -= sizeof (struct ip);
1052 	bcopy((caddr_t)iphp, (caddr_t)(ogp->igm_mp->b_rptr),
1053 	    sizeof (struct ip));
1054 
1055 	ogp->igm_level = MEDIA_LVL;
1056 
1057 	return (0);
1058 }
1059 
1060 /*
1061  * Function to be called by TCP to send out a packet.  This is used
1062  * when TCP wants to send out packets which it has already filled in
1063  * most of the header fields.
1064  */
1065 int
1066 ipv4_tcp_output(int sock_id, mblk_t *pkt)
1067 {
1068 	struct ip *iph;
1069 	struct in_addr *rip = NULL;
1070 	struct inetgram datagram;
1071 
1072 	iph = (struct ip *)pkt->b_rptr;
1073 
1074 	bzero(&datagram, sizeof (struct inetgram));
1075 
1076 	/*
1077 	 * Bootparams doesn't know about subnet masks, so we need to
1078 	 * explicitly check for this flag.
1079 	 */
1080 	if (sockets[sock_id].out_flags & SO_DONTROUTE)
1081 		datagram.igm_oflags |= MSG_DONTROUTE;
1082 
1083 	/* Routing necessary? */
1084 	if (((datagram.igm_oflags & MSG_DONTROUTE) == 0) &&
1085 		((iph->ip_dst.s_addr & netmask.s_addr) != mynet.s_addr)) {
1086 		if ((rip = ipv4_get_route(RT_HOST, &iph->ip_dst,
1087 		    NULL)) == NULL) {
1088 			rip = ipv4_get_route(RT_DEFAULT, NULL, NULL);
1089 		}
1090 		if (rip == NULL) {
1091 			dprintf("ipv4(%d): No route to %s.\n",
1092 			    sock_id, inet_ntoa(iph->ip_dst));
1093 			errno = EHOSTUNREACH;
1094 			return (-1);
1095 		}
1096 	}
1097 
1098 	iph->ip_id = htons(++g_ip_id);
1099 	iph->ip_sum = ipv4cksum((uint16_t *)iph, sizeof (struct ip));
1100 #if DEBUG > 1
1101 	printf("ipv4_tcp_output: dump IP packet(%d)\n", iph->ip_len);
1102 	hexdump((char *)pkt->b_rptr, iph->ip_len);
1103 #endif
1104 	/* Call the MAC layer output routine to send it out. */
1105 	datagram.igm_mp = pkt;
1106 	datagram.igm_level = MEDIA_LVL;
1107 	if (rip != NULL)
1108 		datagram.igm_router.s_addr = rip->s_addr;
1109 	else
1110 		datagram.igm_router.s_addr = 0;
1111 	return (mac_state.mac_output(sock_id, &datagram));
1112 }
1113 
1114 /*
1115  * Internet address interpretation routine.
1116  * All the network library routines call this
1117  * routine to interpret entries in the data bases
1118  * which are expected to be an address.
1119  * The value returned is in network order.
1120  */
1121 in_addr_t
1122 inet_addr(const char *cp)
1123 {
1124 	uint32_t val, base, n;
1125 	char c;
1126 	uint32_t parts[4], *pp = parts;
1127 
1128 	if (*cp == '\0')
1129 		return ((uint32_t)-1); /* disallow null string in cp */
1130 again:
1131 	/*
1132 	 * Collect number up to ``.''.
1133 	 * Values are specified as for C:
1134 	 * 0x=hex, 0=octal, other=decimal.
1135 	 */
1136 	val = 0; base = 10;
1137 	if (*cp == '0') {
1138 		if (*++cp == 'x' || *cp == 'X')
1139 			base = 16, cp++;
1140 		else
1141 			base = 8;
1142 	}
1143 	while ((c = *cp) != NULL) {
1144 		if (isdigit(c)) {
1145 			if ((c - '0') >= base)
1146 			    break;
1147 			val = (val * base) + (c - '0');
1148 			cp++;
1149 			continue;
1150 		}
1151 		if (base == 16 && isxdigit(c)) {
1152 			val = (val << 4) + (c + 10 - (islower(c) ? 'a' : 'A'));
1153 			cp++;
1154 			continue;
1155 		}
1156 		break;
1157 	}
1158 	if (*cp == '.') {
1159 		/*
1160 		 * Internet format:
1161 		 *	a.b.c.d
1162 		 *	a.b.c	(with c treated as 16-bits)
1163 		 *	a.b	(with b treated as 24 bits)
1164 		 */
1165 		if ((pp >= parts + 3) || (val > 0xff)) {
1166 			return ((uint32_t)-1);
1167 		}
1168 		*pp++ = val, cp++;
1169 		goto again;
1170 	}
1171 	/*
1172 	 * Check for trailing characters.
1173 	 */
1174 	if (*cp && !isspace(*cp)) {
1175 		return ((uint32_t)-1);
1176 	}
1177 	*pp++ = val;
1178 	/*
1179 	 * Concoct the address according to
1180 	 * the number of parts specified.
1181 	 */
1182 	n = pp - parts;
1183 	switch (n) {
1184 
1185 	case 1:				/* a -- 32 bits */
1186 		val = parts[0];
1187 		break;
1188 
1189 	case 2:				/* a.b -- 8.24 bits */
1190 		if (parts[1] > 0xffffff)
1191 		    return ((uint32_t)-1);
1192 		val = (parts[0] << 24) | (parts[1] & 0xffffff);
1193 		break;
1194 
1195 	case 3:				/* a.b.c -- 8.8.16 bits */
1196 		if (parts[2] > 0xffff)
1197 		    return ((uint32_t)-1);
1198 		val = (parts[0] << 24) | ((parts[1] & 0xff) << 16) |
1199 			(parts[2] & 0xffff);
1200 		break;
1201 
1202 	case 4:				/* a.b.c.d -- 8.8.8.8 bits */
1203 		if (parts[3] > 0xff)
1204 		    return ((uint32_t)-1);
1205 		val = (parts[0] << 24) | ((parts[1] & 0xff) << 16) |
1206 		    ((parts[2] & 0xff) << 8) | (parts[3] & 0xff);
1207 		break;
1208 
1209 	default:
1210 		return ((uint32_t)-1);
1211 	}
1212 	val = htonl(val);
1213 	return (val);
1214 }
1215 
1216 void
1217 hexdump(char *data, int datalen)
1218 {
1219 	char *p;
1220 	ushort_t *p16 = (ushort_t *)data;
1221 	char *p8 = data;
1222 	int i, left, len;
1223 	int chunk = 16;  /* 16 bytes per line */
1224 
1225 	printf("\n");
1226 
1227 	for (p = data; p < data + datalen; p += chunk) {
1228 		printf("\t%4d: ", (int)(p - data));
1229 		left = (data + datalen) - p;
1230 		len = MIN(chunk, left);
1231 		for (i = 0; i < (len / 2); i++)
1232 			printf("%04x ", ntohs(*p16++) & 0xffff);
1233 		if (len % 2) {
1234 			printf("%02x   ", *((unsigned char *)p16));
1235 		}
1236 		for (i = 0; i < (chunk - left) / 2; i++)
1237 			printf("     ");
1238 
1239 		printf("   ");
1240 		for (i = 0; i < len; i++, p8++)
1241 			printf("%c", isprint(*p8) ? *p8 : '.');
1242 		printf("\n");
1243 	}
1244 
1245 	printf("\n");
1246 }
1247