xref: /illumos-gate/usr/src/stand/lib/inet/ipv4.c (revision 12042ab213b3af68474f48555504db816a449211)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  *
25  * ipv4.c, Code implementing the IPv4 internet protocol.
26  */
27 
28 #include <sys/types.h>
29 #include <socket_impl.h>
30 #include <socket_inet.h>
31 #include <sys/sysmacros.h>
32 #include <sys/socket.h>
33 #include <netinet/in_systm.h>
34 #include <netinet/in.h>
35 #include <netinet/ip.h>
36 #include <netinet/udp.h>
37 #include <net/if_arp.h>
38 #include <sys/promif.h>
39 #include <sys/bootconf.h>
40 #include <sys/fcntl.h>
41 #include <sys/salib.h>
42 
43 #include "icmp4.h"
44 #include "ipv4.h"
45 #include "ipv4_impl.h"
46 #include "mac.h"
47 #include "mac_impl.h"
48 #include "v4_sum_impl.h"
49 #include <sys/bootdebug.h>
50 
51 static struct ip_frag	fragment[FRAG_MAX];	/* ip fragment buffers */
52 static int		fragments;		/* Number of fragments */
53 static uint8_t		ttl = MAXTTL;		/* IP ttl */
54 static struct in_addr	myip;			/* our network-order IP addr */
55 static struct in_addr	mynet;			/* net-order netaddr */
56 static struct in_addr	netmask =
57 	{ 0xff, 0xff, 0xff, 0xff };		/* our network-order netmask */
58 static boolean_t	netmask_set = B_FALSE;	/* has anyone set netmask? */
59 static struct in_addr	defaultrouter;		/* net-order defaultrouter */
60 static int		promiscuous;		/* promiscuous mode */
61 static struct routing table[IPV4_ROUTE_TABLE_SIZE];
62 
63 static uint16_t	g_ip_id;
64 
65 #ifdef	DEBUG
66 #define	FRAG_DEBUG
67 #endif	/* DEBUG */
68 
69 #ifdef FRAG_DEBUG
70 /*
71  * display the fragment list. For debugging purposes.
72  */
73 static void
74 frag_disp(uint16_t size)
75 {
76 	int	i;
77 	uint_t	total = 0;
78 
79 	printf("Dumping fragment info: (%d)\n\n", fragments);
80 	printf("More:\tOffset:\tDatap:\t\tIPid:\t\tIPlen:\tIPhlen:\n");
81 	for (i = 0; i < FRAG_MAX; i++) {
82 		if (fragment[i].mp == NULL)
83 			continue;
84 		printf("%d\t%d\t0x%x\t%d\t\t%d\t%d\n", fragment[i].more,
85 		    fragment[i].offset, fragment[i].mp->b_rptr,
86 		    fragment[i].ipid, fragment[i].iplen, fragment[i].iphlen);
87 		total += (fragment[i].iplen - fragment[i].iphlen);
88 	}
89 	printf("Total length is: %d. It should be: %d\n\n", total, size);
90 }
91 #endif /* FRAG_DEBUG */
92 
93 /*
94  * This function returns index of fragment 0 of the current fragmented DGRAM
95  * (which would contain the transport header). Return the fragment number
96  * for success, -1 if we don't yet have the first fragment.
97  */
98 static int
99 frag_first(void)
100 {
101 	int		i;
102 
103 	if (fragments == 0)
104 		return (-1);
105 
106 	for (i = 0; i < FRAG_MAX; i++) {
107 		if (fragment[i].mp != NULL && fragment[i].offset == 0)
108 			return (i);
109 	}
110 	return (-1);
111 }
112 
113 /*
114  * This function returns index of the last fragment of the current DGRAM.
115  * Returns the fragment number for success, -1 if we don't yet have the
116  * last fragment.
117  */
118 static int
119 frag_last(void)
120 {
121 	int		i;
122 
123 	if (fragments == 0)
124 		return (-1);
125 
126 	for (i = 0; i < FRAG_MAX; i++) {
127 		if (fragment[i].mp != NULL && !fragment[i].more)
128 			return (i);
129 	}
130 	return (-1);
131 }
132 
133 /*
134  * This function adds a fragment to the current pkt fragment list. Returns
135  * FRAG_NOSLOTS if there are no more slots, FRAG_DUP if the fragment is
136  * a duplicate, or FRAG_SUCCESS if it is successful.
137  */
138 static int
139 frag_add(int16_t offset, mblk_t *mp, uint16_t ipid,
140     int16_t iplen, int16_t iphlen, uint8_t ipp)
141 {
142 	int	i;
143 	int16_t	true_offset = IPV4_OFFSET(offset);
144 
145 	/* first pass - look for duplicates */
146 	for (i = 0; i < FRAG_MAX; i++) {
147 		if (fragment[i].mp != NULL &&
148 		    fragment[i].offset == true_offset)
149 			return (FRAG_DUP);
150 	}
151 
152 	/* second pass - fill in empty slot */
153 	for (i = 0; i < FRAG_MAX; i++) {
154 		if (fragment[i].mp == NULL) {
155 			fragment[i].more = (offset & IP_MF);
156 			fragment[i].offset = true_offset;
157 			fragment[i].mp = mp;
158 			fragment[i].ipid = ipid;
159 			fragment[i].iplen = iplen;
160 			fragment[i].iphlen = iphlen;
161 			fragment[i].ipp = ipp;
162 			fragments++;
163 			return (FRAG_SUCCESS);
164 		}
165 	}
166 	return (FRAG_NOSLOTS);
167 }
168 
169 /*
170  * Nuke a fragment.
171  */
172 static void
173 frag_free(int index)
174 {
175 	if (fragment[index].mp != NULL) {
176 		freeb(fragment[index].mp);
177 		fragments--;
178 	}
179 	bzero((caddr_t)&fragment[index], sizeof (struct ip_frag));
180 }
181 
182 /*
183  * zero the frag list.
184  */
185 static void
186 frag_flush(void)
187 {
188 	int i;
189 
190 	for (i = 0; i < FRAG_MAX; i++)
191 		frag_free(i);
192 
193 	fragments = 0;
194 }
195 
196 /*
197  * Analyze the fragment list - see if we captured all our fragments.
198  *
199  * Returns TRUE if we've got all the fragments, and FALSE if we don't.
200  */
201 static int
202 frag_chk(void)
203 {
204 	int		i, first_frag, last_frag;
205 	int16_t		actual, total;
206 	uint16_t	ip_id;
207 	uint8_t		ipp;
208 
209 	if (fragments == 0 || (first_frag = frag_first()) < 0 ||
210 	    (last_frag = frag_last()) < 0)
211 		return (FALSE);
212 
213 	/*
214 	 * Validate the ipid's of our fragments - nuke those that don't
215 	 * match the id of the first fragment or don't match the IP
216 	 * protocol of the first fragment.
217 	 */
218 	ip_id = fragment[first_frag].ipid;
219 	ipp = fragment[first_frag].ipp;
220 	for (i = 0; i < FRAG_MAX; i++) {
221 		if (fragment[i].mp != NULL && ip_id != fragment[i].ipid &&
222 			fragment[i].ipp != ipp) {
223 #ifdef FRAG_DEBUG
224 			printf("ipv4: Frag id mismatch: %x != %x\n",
225 			    fragment[i].ipid, ip_id);
226 #endif /* FRAG_DEBUG */
227 			frag_free(i);
228 		}
229 	}
230 
231 	if (frag_last() < 0)
232 		return (FALSE);
233 
234 	total = fragment[last_frag].offset + fragment[last_frag].iplen -
235 	    fragment[last_frag].iphlen;
236 
237 	for (i = 0, actual = 0; i < FRAG_MAX; i++)
238 		actual += (fragment[i].iplen - fragment[i].iphlen);
239 
240 #ifdef FRAG_DEBUG
241 	frag_disp(total);
242 #endif /* FRAG_DEBUG */
243 
244 	return (total == actual);
245 }
246 
247 /*
248  * Load the assembled fragments into igp. Returns 0 for success, nonzero
249  * otherwise.
250  */
251 static int
252 frag_load(struct inetgram *igp)
253 {
254 	int	i;
255 	int16_t	len;
256 	uint_t	total_len;
257 	boolean_t first_frag = B_FALSE;
258 	mblk_t *mp;
259 	struct ip *iph;
260 	int first_iph_len;
261 
262 	if (fragments == 0)
263 		return (ENOENT);
264 
265 	mp = igp->igm_mp;
266 	/* Get the IP header length of the first fragment. */
267 	i = frag_first();
268 	assert(i >= 0);
269 	first_iph_len = fragment[i].iphlen;
270 	for (i = 0, len = 0, total_len = 0; i < FRAG_MAX; i++) {
271 		if (fragment[i].mp != NULL) {
272 			/*
273 			 * Copy just the data (omit the ip header of all
274 			 * fragments except the first one which contains
275 			 * all the info...)
276 			 */
277 			if (fragment[i].offset == 0) {
278 				len = fragment[i].iplen;
279 				first_frag = B_TRUE;
280 			} else {
281 				len = fragment[i].iplen - fragment[i].iphlen;
282 			}
283 			total_len += len;
284 			if (total_len > mp->b_size)
285 				return (E2BIG);
286 			if (first_frag) {
287 				bcopy((caddr_t)(fragment[i].mp->b_rptr),
288 				    (caddr_t)mp->b_rptr, len);
289 				first_frag = B_FALSE;
290 			} else {
291 				bcopy((caddr_t)(fragment[i].mp->b_rptr +
292 				    fragment[i].iphlen),
293 				    (caddr_t)(mp->b_rptr + first_iph_len +
294 				    fragment[i].offset), len);
295 			}
296 			mp->b_wptr += len;
297 		}
298 	}
299 	/* Fix the total length in the IP header. */
300 	iph = (struct ip *)mp->b_rptr;
301 	iph->ip_len = htons(total_len);
302 	return (0);
303 }
304 
305 /*
306  * Locate a routing table entry based upon arguments. IP addresses expected
307  * in network order. Returns index for success, -1 if entry not found.
308  */
309 static int
310 find_route(uint8_t *flagp, struct in_addr *destp, struct in_addr *gatewayp)
311 {
312 	int i, table_entry = -1;
313 
314 	for (i = 0; table_entry == -1 && i < IPV4_ROUTE_TABLE_SIZE; i++) {
315 		if (flagp != NULL) {
316 			if (*flagp & table[i].flag)
317 				table_entry = i;
318 		}
319 		if (destp != NULL) {
320 			if (destp->s_addr == table[i].dest.s_addr)
321 				table_entry = i;
322 			else
323 				table_entry = -1;
324 		}
325 		if (gatewayp != NULL) {
326 			if (gatewayp->s_addr == table[i].gateway.s_addr)
327 				table_entry = i;
328 			else
329 				table_entry = -1;
330 		}
331 	}
332 	return (table_entry);
333 }
334 
335 /*
336  * ADD or DEL a routing table entry. Returns 0 for success, -1 and errno
337  * otherwise. IP addresses are expected in network order.
338  */
339 int
340 ipv4_route(int cmd, uint8_t flag, struct in_addr *destp,
341     struct in_addr *gatewayp)
342 {
343 	static	int	routing_table_initialized;
344 	int		index;
345 	uint8_t 	tmp_flag;
346 
347 	if (gatewayp == NULL) {
348 		errno = EINVAL;
349 		return (-1);
350 	}
351 
352 	/* initialize routing table */
353 	if (routing_table_initialized == 0) {
354 		for (index = 0; index < IPV4_ROUTE_TABLE_SIZE; index++)
355 			table[index].flag = RT_UNUSED;
356 		routing_table_initialized = 1;
357 	}
358 
359 	switch (cmd) {
360 	case IPV4_ADD_ROUTE:
361 		tmp_flag = (uint8_t)RT_UNUSED;
362 		if ((index = find_route(&tmp_flag, NULL, NULL)) == -1) {
363 			dprintf("ipv4_route: routing table full.\n");
364 			errno = ENOSPC;
365 			return (-1);
366 		}
367 		table[index].flag = flag;
368 		if (destp != NULL)
369 			table[index].dest.s_addr = destp->s_addr;
370 		else
371 			table[index].dest.s_addr = htonl(INADDR_ANY);
372 		table[index].gateway.s_addr = gatewayp->s_addr;
373 		break;
374 	case IPV4_BAD_ROUTE:
375 		/* FALLTHRU */
376 	case IPV4_DEL_ROUTE:
377 		if ((index = find_route(&flag, destp, gatewayp)) == -1) {
378 			dprintf("ipv4_route: No such routing entry.\n");
379 			errno = ENOENT;
380 			return (-1);
381 		}
382 		if (cmd == IPV4_DEL_ROUTE) {
383 			table[index].flag = RT_UNUSED;
384 			table[index].dest.s_addr = htonl(INADDR_ANY);
385 			table[index].gateway.s_addr = htonl(INADDR_ANY);
386 		} else
387 			table[index].flag = RT_NG;
388 	default:
389 		errno = EINVAL;
390 		return (-1);
391 	}
392 	return (0);
393 }
394 
395 /*
396  * Return gateway to destination. Returns gateway IP address in network order
397  * for success, NULL if no route to destination exists.
398  */
399 struct in_addr *
400 ipv4_get_route(uint8_t flag, struct in_addr *destp, struct in_addr *gatewayp)
401 {
402 	int index;
403 	if ((index = find_route(&flag, destp, gatewayp)) == -1)
404 		return (NULL);
405 	return (&table[index].gateway);
406 }
407 
408 /*
409  * Initialize the IPv4 generic parts of the socket, as well as the routing
410  * table.
411  */
412 void
413 ipv4_socket_init(struct inetboot_socket *isp)
414 {
415 	isp->input[NETWORK_LVL] = ipv4_input;
416 	isp->output[NETWORK_LVL] = ipv4_output;
417 	isp->close[NETWORK_LVL] = NULL;
418 	isp->headerlen[NETWORK_LVL] = ipv4_header_len;
419 }
420 
421 /*
422  * Initialize a raw ipv4 socket.
423  */
424 void
425 ipv4_raw_socket(struct inetboot_socket *isp, uint8_t proto)
426 {
427 	isp->type = INETBOOT_RAW;
428 	if (proto == 0)
429 		isp->proto = IPPROTO_IP;
430 	else
431 		isp->proto = proto;
432 	isp->input[TRANSPORT_LVL] = NULL;
433 	isp->output[TRANSPORT_LVL] = NULL;
434 	isp->headerlen[TRANSPORT_LVL] = NULL;
435 	isp->ports = NULL;
436 }
437 
438 /*
439  * Return the size of an IPv4 header (no options)
440  */
441 /* ARGSUSED */
442 int
443 ipv4_header_len(struct inetgram *igm)
444 {
445 	return (sizeof (struct ip));
446 }
447 
448 /*
449  * Set our source address.
450  * Argument is assumed to be host order.
451  */
452 void
453 ipv4_setipaddr(struct in_addr *ip)
454 {
455 	myip.s_addr = htonl(ip->s_addr);
456 }
457 
458 /*
459  * Returns our current source address in host order.
460  */
461 void
462 ipv4_getipaddr(struct in_addr *ip)
463 {
464 	ip->s_addr = ntohl(myip.s_addr);
465 }
466 
467 /*
468  * Set our netmask.
469  * Argument is assumed to be host order.
470  */
471 void
472 ipv4_setnetmask(struct in_addr *ip)
473 {
474 	netmask_set = B_TRUE;
475 	netmask.s_addr = htonl(ip->s_addr);
476 	mynet.s_addr = netmask.s_addr & myip.s_addr; /* implicit */
477 }
478 
479 void
480 ipv4_getnetid(struct in_addr *my_netid)
481 {
482 	struct in_addr my_netmask;
483 	if (mynet.s_addr != 0)
484 		my_netid->s_addr = ntohl(mynet.s_addr);
485 	else {
486 		ipv4_getnetmask(&my_netmask);
487 		my_netid->s_addr = my_netmask.s_addr & ntohl(myip.s_addr);
488 	}
489 }
490 
491 /*
492  * Returns our current netmask in host order.
493  * Neither OBP nor the standalone DHCP client mandate
494  * that the netmask be specified, so in the absence of
495  * a netmask, we attempt to derive it using class-based
496  * heuristics.
497  */
498 void
499 ipv4_getnetmask(struct in_addr *ip)
500 {
501 	if (netmask_set || (myip.s_addr == 0))
502 		ip->s_addr = ntohl(netmask.s_addr);
503 	else {
504 		/* base the netmask on our IP address */
505 		if (IN_CLASSA(ntohl(myip.s_addr)))
506 			ip->s_addr = ntohl(IN_CLASSA_NET);
507 		else if (IN_CLASSB(ntohl(myip.s_addr)))
508 			ip->s_addr = ntohl(IN_CLASSB_NET);
509 		else if (IN_CLASSC(ntohl(myip.s_addr)))
510 			ip->s_addr = ntohl(IN_CLASSC_NET);
511 		else
512 			ip->s_addr = ntohl(IN_CLASSE_NET);
513 	}
514 }
515 
516 /*
517  * Set our default router.
518  * Argument is assumed to be host order, and *MUST* be on the same network
519  * as our source IP address.
520  */
521 void
522 ipv4_setdefaultrouter(struct in_addr *ip)
523 {
524 	defaultrouter.s_addr = htonl(ip->s_addr);
525 }
526 
527 /*
528  * Returns our current default router in host order.
529  */
530 void
531 ipv4_getdefaultrouter(struct in_addr *ip)
532 {
533 	ip->s_addr = ntohl(defaultrouter.s_addr);
534 }
535 
536 /*
537  * Toggle promiscuous flag. If set, client disregards destination IP
538  * address. Otherwise, only limited broadcast, network broadcast, and
539  * unicast traffic get through. Returns previous setting.
540  */
541 int
542 ipv4_setpromiscuous(int toggle)
543 {
544 	int old = promiscuous;
545 
546 	promiscuous = toggle;
547 
548 	return (old);
549 }
550 
551 /*
552  * Set IP TTL.
553  */
554 void
555 ipv4_setmaxttl(uint8_t cttl)
556 {
557 	ttl = cttl;
558 }
559 
560 /*
561  * Convert an ipv4 address to dotted notation.
562  * Returns ptr to statically allocated buffer containing dotted string.
563  */
564 char *
565 inet_ntoa(struct in_addr ip)
566 {
567 	uint8_t *p;
568 	static char ipaddr[16];
569 
570 	p = (uint8_t *)&ip.s_addr;
571 	(void) sprintf(ipaddr, "%u.%u.%u.%u", p[0], p[1], p[2], p[3]);
572 	return (ipaddr);
573 }
574 
575 /*
576  * Construct a transport datagram from a series of IP fragments (igp == NULL)
577  * or from a single IP datagram (igp != NULL). Return the address of the
578  * contructed transport datagram.
579  */
580 struct inetgram *
581 make_trans_datagram(int index, struct inetgram *igp, struct in_addr ipsrc,
582     struct in_addr ipdst, uint16_t iphlen)
583 {
584 	uint16_t	trans_len, *transp, new_len;
585 	int		first_frag, last_frag;
586 	boolean_t	fragmented;
587 	struct inetgram	*ngp;
588 	struct ip	*iph;
589 
590 	fragmented = (igp == NULL);
591 
592 	ngp = (struct inetgram *)bkmem_zalloc(sizeof (struct inetgram));
593 	if (ngp == NULL) {
594 		errno = ENOMEM;
595 		if (fragmented)
596 			frag_flush();
597 		return (NULL);
598 	}
599 
600 	if (fragmented) {
601 		last_frag = frag_last();
602 		trans_len = fragment[last_frag].offset +
603 		    fragment[last_frag].iplen - fragment[last_frag].iphlen;
604 		first_frag = frag_first();
605 		/*
606 		 * The returned buffer contains the IP header of the
607 		 * first fragment.
608 		 */
609 		trans_len += fragment[first_frag].iphlen;
610 		transp = (uint16_t *)(fragment[first_frag].mp->b_rptr +
611 		    fragment[first_frag].iphlen);
612 	} else {
613 		/*
614 		 * Note that igm_len may not be the real length of an
615 		 * IP packet because some network interface, such as
616 		 * Ethernet, as a minimum frame size.  So we should not
617 		 * use the interface frame size to determine the
618 		 * length of an IP packet.  We should use the IP
619 		 * length field in the IP header.
620 		 */
621 		iph = (struct ip *)igp->igm_mp->b_rptr;
622 		trans_len = ntohs(iph->ip_len);
623 		transp = (uint16_t *)(igp->igm_mp->b_rptr + iphlen);
624 	}
625 
626 	ngp->igm_saddr.sin_addr.s_addr = ipsrc.s_addr;
627 	ngp->igm_saddr.sin_port = sockets[index].ports(transp, SOURCE);
628 	ngp->igm_target.s_addr = ipdst.s_addr;
629 	ngp->igm_level = TRANSPORT_LVL;
630 
631 	/*
632 	 * Align to 16bit value.  Checksum code may require an extra byte
633 	 * for padding.
634 	 */
635 	new_len = ((trans_len + sizeof (int16_t) - 1) &
636 	    ~(sizeof (int16_t) - 1));
637 	if ((ngp->igm_mp = allocb(new_len, 0)) == NULL) {
638 		errno = ENOMEM;
639 		bkmem_free((caddr_t)ngp, sizeof (struct inetgram));
640 		if (fragmented)
641 			frag_flush();
642 		return (NULL);
643 	}
644 
645 	if (fragmented) {
646 		if (frag_load(ngp) != 0) {
647 			freeb(ngp->igm_mp);
648 			bkmem_free((caddr_t)ngp, sizeof (struct inetgram));
649 			frag_flush();
650 			return (NULL);
651 		}
652 		frag_flush();
653 	} else {
654 		bcopy((caddr_t)(igp->igm_mp->b_rptr),
655 		    (caddr_t)ngp->igm_mp->b_rptr, trans_len);
656 		ngp->igm_mp->b_wptr += trans_len;
657 	}
658 	return (ngp);
659 }
660 
661 /*
662  * ipv4_input: Pull in IPv4 datagrams addressed to us. Handle IP fragmentation
663  * (fragments received in any order) and ICMP at this level.
664  *
665  * Note that because our network is serviced by polling when we expect
666  * something (upon a referenced socket), we don't go through the work of
667  * locating the appropriate socket a datagram is destined for. We'll only
668  * accept data for the referenced socket. This means we don't have
669  * asynchronous networking, but since we can't service the net using an
670  * interrupt handler, it doesn't do us any good to try to service datagrams
671  * destined for sockets other than the referenced one. Data is handled in
672  * a fifo manner.
673  *
674  * The mac layer will grab all frames for us. If we find we don't have all
675  * the necessary fragments to reassemble the datagram, we'll call the mac
676  * layer again for FRAG_ATTEMPTS to see if it has any more frames.
677  *
678  * Supported protocols: IPPROTO_IP, IPPROTO_ICMP, IPPROTO_UDP.
679  *
680  * Returns: number of NETWORK_LVL datagrams placed on socket , -1 if error
681  * occurred.
682  *
683  * Note: errno is set to ETIMEDOUT if fragment reassembly fails.
684  */
685 int
686 ipv4_input(int index)
687 {
688 	int			datagrams = 0;
689 	int			frag_stat, input_attempts = 0;
690 	uint16_t		iphlen, iplen, ip_id;
691 	int16_t			curr_off;
692 	struct ip		*iphp;
693 	struct inetgram		*igp, *newgp = NULL, *ipv4_listp = NULL;
694 	struct in_addr		ipdst, ipsrc;
695 	mblk_t			*mp;
696 	enum SockType		type;
697 
698 #ifdef	DEBUG
699 	printf("ipv4_input(%d): start ######################################\n",
700 	    index);
701 #endif	/* DEBUG */
702 
703 	frag_flush();
704 
705 ipv4_try_again:
706 
707 	while ((igp = sockets[index].inq) != NULL) {
708 		if (igp->igm_level != NETWORK_LVL) {
709 #ifdef	DEBUG
710 			printf("ipv4_input(%d): unexpected frame type: %d\n",
711 			    index, igp->igm_level);
712 #endif	/* DEBUG */
713 			del_gram(&sockets[index].inq, igp, TRUE);
714 			continue;
715 		}
716 		iphp = (struct ip *)igp->igm_mp->b_rptr;
717 		if (iphp->ip_v != IPVERSION) {
718 			dprintf("ipv4_input(%d): IPv%d datagram discarded\n",
719 			index, iphp->ip_v);
720 			del_gram(&sockets[index].inq, igp, TRUE);
721 			continue;
722 		}
723 		iphlen = iphp->ip_hl << 2;
724 		if (iphlen < sizeof (struct ip)) {
725 			dprintf("ipv4_input(%d): IP msg too short (%d < %u)\n",
726 			    index, iphlen, (uint_t)sizeof (struct ip));
727 			del_gram(&sockets[index].inq, igp, TRUE);
728 			continue;
729 		}
730 		iplen = ntohs(iphp->ip_len);
731 		if (iplen > msgdsize(igp->igm_mp)) {
732 			dprintf("ipv4_input(%d): IP len/buffer mismatch "
733 			    "(%d > %lu)\n", index, iplen, igp->igm_mp->b_size);
734 			del_gram(&sockets[index].inq, igp, TRUE);
735 			continue;
736 		}
737 
738 		bcopy((caddr_t)&(iphp->ip_dst), (caddr_t)&ipdst,
739 		    sizeof (ipdst));
740 		bcopy((caddr_t)&(iphp->ip_src), (caddr_t)&ipsrc,
741 		    sizeof (ipsrc));
742 
743 		/* igp->igm_mp->b_datap is guaranteed to be 64 bit aligned] */
744 		if (ipv4cksum((uint16_t *)iphp, iphlen) != 0) {
745 			dprintf("ipv4_input(%d): Bad IP header checksum "
746 			    "(to %s)\n", index, inet_ntoa(ipdst));
747 			del_gram(&sockets[index].inq, igp, TRUE);
748 			continue;
749 		}
750 
751 		if (!promiscuous) {
752 			/* validate destination address */
753 			if (ipdst.s_addr != htonl(INADDR_BROADCAST) &&
754 			    ipdst.s_addr != (mynet.s_addr | ~netmask.s_addr) &&
755 			    ipdst.s_addr != myip.s_addr) {
756 #ifdef	DEBUG
757 				printf("ipv4_input(%d): msg to %s discarded.\n",
758 				    index, inet_ntoa(ipdst));
759 #endif	/* DEBUG */
760 				/* not ours */
761 				del_gram(&sockets[index].inq, igp, TRUE);
762 				continue;
763 			}
764 		}
765 
766 		/* Intercept ICMP first */
767 		if (!promiscuous && (iphp->ip_p == IPPROTO_ICMP)) {
768 			icmp4(igp, iphp, iphlen, ipsrc);
769 			del_gram(&sockets[index].inq, igp, TRUE);
770 			continue;
771 		}
772 
773 #ifdef	DEBUG
774 		printf("ipv4_input(%d): processing ID: 0x%x protocol %d "
775 		    "(0x%x) (0x%x,%d)\n",
776 		    index, ntohs(iphp->ip_id), iphp->ip_p, igp, igp->igm_mp,
777 		    igp->igm_mp->b_size);
778 #endif	/* DEBUG */
779 		type = sockets[index].type;
780 		if (type == INETBOOT_RAW) {
781 			/* No fragmentation - Just the raw packet. */
782 #ifdef	DEBUG
783 			printf("ipv4_input(%d): Raw packet.\n", index);
784 #endif	/* DEBUG */
785 			del_gram(&sockets[index].inq, igp, FALSE);
786 			add_grams(&ipv4_listp, igp);
787 			igp->igm_mp->b_rptr += iphlen;
788 			igp->igm_mp->b_wptr = igp->igm_mp->b_rptr + iplen;
789 			datagrams++;
790 			continue;
791 		}
792 
793 		if ((type == INETBOOT_DGRAM && iphp->ip_p != IPPROTO_UDP) ||
794 		    (type == INETBOOT_STREAM && iphp->ip_p != IPPROTO_TCP)) {
795 			/* Wrong protocol. */
796 			dprintf("ipv4_input(%d): unexpected protocol: "
797 			    "%d for socket type %d\n", index, iphp->ip_p, type);
798 			del_gram(&sockets[index].inq, igp, TRUE);
799 			continue;
800 		}
801 
802 		/*
803 		 * The following code is common to both STREAM and DATAGRAM
804 		 * sockets.
805 		 */
806 
807 		/*
808 		 * Once we process the first fragment, we won't have
809 		 * the transport header, so we'll have to  match on
810 		 * IP id.
811 		 */
812 		curr_off = ntohs(iphp->ip_off);
813 		if ((curr_off & ~(IP_DF | IP_MF)) == 0) {
814 			uint16_t	*transp;
815 
816 			/* Validate transport header. */
817 			mp = igp->igm_mp;
818 			if ((mp->b_wptr - mp->b_rptr - iphlen) <
819 			    sockets[index].headerlen[TRANSPORT_LVL](igp)) {
820 				dprintf("ipv4_input(%d): datagram 0 "
821 				    "too small to hold transport header "
822 				    "(from %s)\n", index, inet_ntoa(ipsrc));
823 				del_gram(&sockets[index].inq, igp, TRUE);
824 				continue;
825 			}
826 
827 			/*
828 			 * check alignment - transport elements are 16
829 			 * bit aligned..
830 			 */
831 			transp = (uint16_t *)(mp->b_rptr + iphlen);
832 			if ((uintptr_t)transp % sizeof (uint16_t)) {
833 				dprintf("ipv4_input(%d): Transport "
834 				    "header is not 16-bit aligned "
835 				    "(0x%lx, from %s)\n", index, (long)transp,
836 				    inet_ntoa(ipsrc));
837 				del_gram(&sockets[index].inq, igp, TRUE);
838 				continue;
839 			}
840 
841 			if (curr_off & IP_MF) {
842 				/* fragment 0 of fragmented datagram */
843 				ip_id = ntohs(iphp->ip_id);
844 				frag_stat = frag_add(curr_off, igp->igm_mp,
845 				    ip_id, iplen, iphlen, iphp->ip_p);
846 				if (frag_stat != FRAG_SUCCESS) {
847 #ifdef	FRAG_DEBUG
848 					if (frag_stat == FRAG_DUP) {
849 						printf("ipv4_input"
850 						    "(%d): Frag dup.\n", index);
851 					} else {
852 						printf("ipv4_input"
853 						    "(%d): too many "
854 						    "frags\n", index);
855 					}
856 #endif	/* FRAG_DEBUG */
857 					del_gram(&sockets[index].inq,
858 					    igp, TRUE);
859 					continue;
860 				}
861 
862 				del_gram(&sockets[index].inq, igp, FALSE);
863 				/* keep the data, lose the inetgram */
864 				bkmem_free((caddr_t)igp,
865 				    sizeof (struct inetgram));
866 #ifdef	FRAG_DEBUG
867 				printf("ipv4_input(%d): Frag/Off/Id "
868 				    "(%d/%d/%x)\n", index, fragments,
869 				    IPV4_OFFSET(curr_off), ip_id);
870 #endif	/* FRAG_DEBUG */
871 			} else {
872 				/* Single, unfragmented datagram */
873 				newgp = make_trans_datagram(index, igp,
874 				    ipsrc, ipdst, iphlen);
875 				if (newgp != NULL) {
876 					add_grams(&ipv4_listp, newgp);
877 					datagrams++;
878 				}
879 				del_gram(&sockets[index].inq, igp,
880 				    TRUE);
881 				continue;
882 			}
883 		} else {
884 			/* fragments other than 0 */
885 			frag_stat = frag_add(curr_off, igp->igm_mp,
886 			    ntohs(iphp->ip_id), iplen, iphlen, iphp->ip_p);
887 
888 			if (frag_stat == FRAG_SUCCESS) {
889 #ifdef	FRAG_DEBUG
890 				printf("ipv4_input(%d): Frag(%d) "
891 				    "off(%d) id(%x)\n", index,
892 				    fragments, IPV4_OFFSET(curr_off),
893 				    ntohs(iphp->ip_id));
894 #endif	/* FRAG_DEBUG */
895 				del_gram(&sockets[index].inq, igp, FALSE);
896 				/* keep the data, lose the inetgram */
897 				bkmem_free((caddr_t)igp,
898 				    sizeof (struct inetgram));
899 			} else {
900 #ifdef	FRAG_DEBUG
901 				if (frag_stat == FRAG_DUP)
902 					printf("ipv4_input(%d): Frag "
903 					    "dup.\n", index);
904 				else {
905 					printf("ipv4_input(%d): too "
906 					    "many frags\n", index);
907 				}
908 #endif	/* FRAG_DEBUG */
909 				del_gram(&sockets[index].inq, igp, TRUE);
910 				continue;
911 			}
912 		}
913 
914 		/*
915 		 * Determine if we have all of the fragments.
916 		 *
917 		 * NOTE: at this point, we've placed the data in the
918 		 * fragment table, and the inetgram (igp) has been
919 		 * deleted.
920 		 */
921 		if (!frag_chk())
922 			continue;
923 
924 		newgp = make_trans_datagram(index, NULL, ipsrc, ipdst, iphlen);
925 		if (newgp == NULL)
926 			continue;
927 		add_grams(&ipv4_listp, newgp);
928 		datagrams++;
929 	}
930 	if (ipv4_listp == NULL && fragments != 0) {
931 		if (++input_attempts > FRAG_ATTEMPTS) {
932 			dprintf("ipv4_input(%d): reassembly(%d) timed out in "
933 			    "%d msecs.\n", index, fragments,
934 			    sockets[index].in_timeout * input_attempts);
935 			frag_flush();
936 			errno = ETIMEDOUT;
937 			return (-1);
938 		} else {
939 			/*
940 			 * Call the media layer again... there may be more
941 			 * packets waiting.
942 			 */
943 			if (sockets[index].input[MEDIA_LVL](index) < 0) {
944 				/* errno will be set appropriately */
945 				frag_flush();
946 				return (-1);
947 			}
948 			goto ipv4_try_again;
949 		}
950 	}
951 
952 	add_grams(&sockets[index].inq, ipv4_listp);
953 
954 	return (datagrams);
955 }
956 
957 /*
958  * ipv4_output: Generate IPv4 datagram(s) for the payload and deliver them.
959  * Routing is handled here as well, by reusing the saddr field to hold the
960  * router's IP address.
961  *
962  * We don't deal with fragmentation on the outgoing side.
963  *
964  * Arguments: index to socket, inetgram to send.
965  *
966  * Returns: 0 for success, -1 if error occurred.
967  */
968 int
969 ipv4_output(int index, struct inetgram *ogp)
970 {
971 	struct ip	*iphp;
972 	uint64_t	iphbuffer[sizeof (struct ip)];
973 
974 #ifdef	DEBUG
975 	printf("ipv4_output(%d): size %d\n", index,
976 	    ogp->igm_mp->b_wptr - ogp->igm_mp->b_rptr);
977 #endif	/* DEBUG */
978 
979 	/* we don't deal (yet) with fragmentation. Maybe never will */
980 	if ((ogp->igm_mp->b_wptr - ogp->igm_mp->b_rptr) > mac_get_mtu()) {
981 		dprintf("ipv4: datagram too big for MAC layer.\n");
982 		errno = E2BIG;
983 		return (-1);
984 	}
985 
986 	if (ogp->igm_level != NETWORK_LVL) {
987 #ifdef	DEBUG
988 		printf("ipv4_output(%d): unexpected frame type: %d\n", index,
989 		    ogp->igm_level);
990 #endif	/* DEBUG */
991 		errno = EINVAL;
992 		return (-1);
993 	}
994 
995 	if (sockets[index].out_flags & SO_DONTROUTE)
996 		ogp->igm_oflags |= MSG_DONTROUTE;
997 
998 	iphp = (struct ip *)&iphbuffer;
999 	iphp->ip_v = IPVERSION;
1000 	iphp->ip_hl = sizeof (struct ip) / 4;
1001 	iphp->ip_tos = 0;
1002 	iphp->ip_len = htons(ogp->igm_mp->b_wptr - ogp->igm_mp->b_rptr +
1003 	    sizeof (struct ip));
1004 	iphp->ip_id = htons(++g_ip_id);
1005 	iphp->ip_off = htons(IP_DF);
1006 	iphp->ip_p = sockets[index].proto;
1007 	iphp->ip_sum = htons(0);
1008 	iphp->ip_ttl = ttl;
1009 
1010 	/* struct copies */
1011 	iphp->ip_src = myip;
1012 	iphp->ip_dst = ogp->igm_saddr.sin_addr;
1013 
1014 	/*
1015 	 * On local / limited broadcasts, don't route. From a purist's
1016 	 * perspective, we should be setting the TTL to 1. But
1017 	 * operational experience has shown that some BOOTP relay agents
1018 	 * (ciscos) discard our packets. Furthermore, these devices also
1019 	 * *don't* reset the TTL to MAXTTL on the unicast side of the
1020 	 * BOOTP relay agent! Sigh. Thus to work correctly in these
1021 	 * environments, we leave the TTL as it has been been set by
1022 	 * the application layer, and simply don't check for a route.
1023 	 */
1024 	if (iphp->ip_dst.s_addr == htonl(INADDR_BROADCAST) ||
1025 	    (netmask.s_addr != htonl(INADDR_BROADCAST) &&
1026 	    iphp->ip_dst.s_addr == (mynet.s_addr | ~netmask.s_addr))) {
1027 		ogp->igm_oflags |= MSG_DONTROUTE;
1028 	}
1029 
1030 	/* Routing necessary? */
1031 	if ((ogp->igm_oflags & MSG_DONTROUTE) == 0 &&
1032 	    ((iphp->ip_dst.s_addr & netmask.s_addr) != mynet.s_addr)) {
1033 		struct in_addr *rip;
1034 		if ((rip = ipv4_get_route(RT_HOST, &iphp->ip_dst,
1035 		    NULL)) == NULL) {
1036 			rip = ipv4_get_route(RT_DEFAULT, NULL, NULL);
1037 		}
1038 		if (rip == NULL) {
1039 			dprintf("ipv4(%d): No route to %s.\n",
1040 			    index, inet_ntoa(iphp->ip_dst));
1041 			errno = EHOSTUNREACH;
1042 			return (-1);
1043 		}
1044 		ogp->igm_router.s_addr = rip->s_addr;
1045 	} else
1046 		ogp->igm_router.s_addr = htonl(INADDR_ANY);
1047 
1048 	iphp->ip_sum = ipv4cksum((uint16_t *)iphp, sizeof (struct ip));
1049 	ogp->igm_mp->b_rptr -= sizeof (struct ip);
1050 	bcopy((caddr_t)iphp, (caddr_t)(ogp->igm_mp->b_rptr),
1051 	    sizeof (struct ip));
1052 
1053 	ogp->igm_level = MEDIA_LVL;
1054 
1055 	return (0);
1056 }
1057 
1058 /*
1059  * Function to be called by TCP to send out a packet.  This is used
1060  * when TCP wants to send out packets which it has already filled in
1061  * most of the header fields.
1062  */
1063 int
1064 ipv4_tcp_output(int sock_id, mblk_t *pkt)
1065 {
1066 	struct ip *iph;
1067 	struct in_addr *rip = NULL;
1068 	struct inetgram datagram;
1069 
1070 	iph = (struct ip *)pkt->b_rptr;
1071 
1072 	bzero(&datagram, sizeof (struct inetgram));
1073 
1074 	/*
1075 	 * Bootparams doesn't know about subnet masks, so we need to
1076 	 * explicitly check for this flag.
1077 	 */
1078 	if (sockets[sock_id].out_flags & SO_DONTROUTE)
1079 		datagram.igm_oflags |= MSG_DONTROUTE;
1080 
1081 	/* Routing necessary? */
1082 	if (((datagram.igm_oflags & MSG_DONTROUTE) == 0) &&
1083 		((iph->ip_dst.s_addr & netmask.s_addr) != mynet.s_addr)) {
1084 		if ((rip = ipv4_get_route(RT_HOST, &iph->ip_dst,
1085 		    NULL)) == NULL) {
1086 			rip = ipv4_get_route(RT_DEFAULT, NULL, NULL);
1087 		}
1088 		if (rip == NULL) {
1089 			dprintf("ipv4(%d): No route to %s.\n",
1090 			    sock_id, inet_ntoa(iph->ip_dst));
1091 			errno = EHOSTUNREACH;
1092 			return (-1);
1093 		}
1094 	}
1095 
1096 	iph->ip_id = htons(++g_ip_id);
1097 	iph->ip_sum = ipv4cksum((uint16_t *)iph, sizeof (struct ip));
1098 #if DEBUG > 1
1099 	printf("ipv4_tcp_output: dump IP packet(%d)\n", iph->ip_len);
1100 	hexdump((char *)pkt->b_rptr, iph->ip_len);
1101 #endif
1102 	/* Call the MAC layer output routine to send it out. */
1103 	datagram.igm_mp = pkt;
1104 	datagram.igm_level = MEDIA_LVL;
1105 	if (rip != NULL)
1106 		datagram.igm_router.s_addr = rip->s_addr;
1107 	else
1108 		datagram.igm_router.s_addr = 0;
1109 	return (mac_state.mac_output(sock_id, &datagram));
1110 }
1111 
1112 /*
1113  * Internet address interpretation routine.
1114  * All the network library routines call this
1115  * routine to interpret entries in the data bases
1116  * which are expected to be an address.
1117  * The value returned is in network order.
1118  */
1119 in_addr_t
1120 inet_addr(const char *cp)
1121 {
1122 	uint32_t val, base, n;
1123 	char c;
1124 	uint32_t parts[4], *pp = parts;
1125 
1126 	if (*cp == '\0')
1127 		return ((uint32_t)-1); /* disallow null string in cp */
1128 again:
1129 	/*
1130 	 * Collect number up to ``.''.
1131 	 * Values are specified as for C:
1132 	 * 0x=hex, 0=octal, other=decimal.
1133 	 */
1134 	val = 0; base = 10;
1135 	if (*cp == '0') {
1136 		if (*++cp == 'x' || *cp == 'X')
1137 			base = 16, cp++;
1138 		else
1139 			base = 8;
1140 	}
1141 	while ((c = *cp) != '\0') {
1142 		if (isdigit(c)) {
1143 			if ((c - '0') >= base)
1144 			    break;
1145 			val = (val * base) + (c - '0');
1146 			cp++;
1147 			continue;
1148 		}
1149 		if (base == 16 && isxdigit(c)) {
1150 			val = (val << 4) + (c + 10 - (islower(c) ? 'a' : 'A'));
1151 			cp++;
1152 			continue;
1153 		}
1154 		break;
1155 	}
1156 	if (*cp == '.') {
1157 		/*
1158 		 * Internet format:
1159 		 *	a.b.c.d
1160 		 *	a.b.c	(with c treated as 16-bits)
1161 		 *	a.b	(with b treated as 24 bits)
1162 		 */
1163 		if ((pp >= parts + 3) || (val > 0xff)) {
1164 			return ((uint32_t)-1);
1165 		}
1166 		*pp++ = val, cp++;
1167 		goto again;
1168 	}
1169 	/*
1170 	 * Check for trailing characters.
1171 	 */
1172 	if (*cp && !isspace(*cp)) {
1173 		return ((uint32_t)-1);
1174 	}
1175 	*pp++ = val;
1176 	/*
1177 	 * Concoct the address according to
1178 	 * the number of parts specified.
1179 	 */
1180 	n = pp - parts;
1181 	switch (n) {
1182 
1183 	case 1:				/* a -- 32 bits */
1184 		val = parts[0];
1185 		break;
1186 
1187 	case 2:				/* a.b -- 8.24 bits */
1188 		if (parts[1] > 0xffffff)
1189 		    return ((uint32_t)-1);
1190 		val = (parts[0] << 24) | (parts[1] & 0xffffff);
1191 		break;
1192 
1193 	case 3:				/* a.b.c -- 8.8.16 bits */
1194 		if (parts[2] > 0xffff)
1195 		    return ((uint32_t)-1);
1196 		val = (parts[0] << 24) | ((parts[1] & 0xff) << 16) |
1197 			(parts[2] & 0xffff);
1198 		break;
1199 
1200 	case 4:				/* a.b.c.d -- 8.8.8.8 bits */
1201 		if (parts[3] > 0xff)
1202 		    return ((uint32_t)-1);
1203 		val = (parts[0] << 24) | ((parts[1] & 0xff) << 16) |
1204 		    ((parts[2] & 0xff) << 8) | (parts[3] & 0xff);
1205 		break;
1206 
1207 	default:
1208 		return ((uint32_t)-1);
1209 	}
1210 	val = htonl(val);
1211 	return (val);
1212 }
1213 
1214 void
1215 hexdump(char *data, int datalen)
1216 {
1217 	char *p;
1218 	ushort_t *p16 = (ushort_t *)data;
1219 	char *p8 = data;
1220 	int i, left, len;
1221 	int chunk = 16;  /* 16 bytes per line */
1222 
1223 	printf("\n");
1224 
1225 	for (p = data; p < data + datalen; p += chunk) {
1226 		printf("\t%4d: ", (int)(p - data));
1227 		left = (data + datalen) - p;
1228 		len = MIN(chunk, left);
1229 		for (i = 0; i < (len / 2); i++)
1230 			printf("%04x ", ntohs(*p16++) & 0xffff);
1231 		if (len % 2) {
1232 			printf("%02x   ", *((unsigned char *)p16));
1233 		}
1234 		for (i = 0; i < (chunk - left) / 2; i++)
1235 			printf("     ");
1236 
1237 		printf("   ");
1238 		for (i = 0; i < len; i++, p8++)
1239 			printf("%c", isprint(*p8) ? *p8 : '.');
1240 		printf("\n");
1241 	}
1242 
1243 	printf("\n");
1244 }
1245