xref: /illumos-gate/usr/src/stand/lib/sock/socket.c (revision 032624d56c174c5c55126582b32e314a6af15522)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  *
26  * socket.c, Code implementing a simple socket interface.
27  */
28 
29 #pragma ident	"%Z%%M%	%I%	%E% SMI"
30 
31 #include <sys/types.h>
32 #include "socket_impl.h"
33 #include <sys/isa_defs.h>
34 #include <sys/sysmacros.h>
35 #include <sys/bootconf.h>
36 #include <sys/socket.h>
37 #include <netinet/in.h>
38 #include <netinet/ip.h>
39 #include <netinet/tcp.h>
40 #include <sys/uio.h>
41 #include <sys/salib.h>
42 #include "socket_inet.h"
43 #include "ipv4.h"
44 #include "ipv4_impl.h"
45 #include "udp_inet.h"
46 #include "tcp_inet.h"
47 #include "mac.h"
48 #include "mac_impl.h"
49 #include <sys/promif.h>
50 
51 struct inetboot_socket	sockets[MAXSOCKET] = { 0 };
52 
53 /* Default send and receive socket buffer size */
54 #define	SO_DEF_SNDBUF	48*1024
55 #define	SO_DEF_RCVBUF	48*1024
56 
57 /* Default max socket buffer size */
58 #define	SO_MAX_BUF	4*1024*1024
59 
60 static ssize_t dgram_sendto(int, const void *, size_t, int,
61     const struct sockaddr *, int);
62 static ssize_t stream_sendto(int, const void *, size_t, int);
63 static int bind_check(int, const struct sockaddr *);
64 static int quickbind(int);
65 
66 /* Check the validity of a fd and return the socket index of that fd. */
67 int
68 so_check_fd(int fd, int *errno)
69 {
70 	int i;
71 
72 	i = FD_TO_SOCKET(fd);
73 	if (i < 0 || i >= MAXSOCKET) {
74 		*errno = ENOTSOCK;
75 		return (-1);
76 	}
77 	if (sockets[i].type == INETBOOT_UNUSED) {
78 		*errno = ENOTSOCK;
79 		return (-1);
80 	}
81 	return (i);
82 }
83 
84 /*
85  * Create an endpoint for network communication. Returns a descriptor.
86  *
87  * Notes:
88  *	Only PF_INET communication domains are supported. Within
89  * 	this domain, only SOCK_RAW, SOCK_DGRAM and SOCK_STREAM types are
90  *	supported.
91  */
92 int
93 socket(int domain, int type, int protocol)
94 {
95 	static int sock_initialized;
96 	int i;
97 
98 	errno = 0;
99 
100 	if (!sock_initialized) {
101 		for (i = 0; i < MAXSOCKET; i++)
102 			sockets[i].type = INETBOOT_UNUSED;
103 		sock_initialized = B_TRUE;
104 	}
105 	if (domain != AF_INET) {
106 		errno = EPROTONOSUPPORT;
107 		return (-1);
108 	}
109 
110 	/* Find available socket */
111 	for (i = 0; i < MAXSOCKET; i++) {
112 		if (sockets[i].type == INETBOOT_UNUSED)
113 			break;
114 	}
115 	if (i >= MAXSOCKET) {
116 		errno = EMFILE;	/* No slots left. */
117 		return (-1);
118 	}
119 
120 	/* Some socket initialization... */
121 	sockets[i].so_rcvbuf = SO_DEF_RCVBUF;
122 	sockets[i].so_sndbuf = SO_DEF_SNDBUF;
123 
124 	/*
125 	 * Note that we ignore the protocol field for SOCK_DGRAM and
126 	 * SOCK_STREAM.  When we support different protocols in future,
127 	 * this needs to be changed.
128 	 */
129 	switch (type) {
130 	case SOCK_RAW:
131 		ipv4_raw_socket(&sockets[i], (uint8_t)protocol);
132 		break;
133 	case SOCK_DGRAM:
134 		udp_socket_init(&sockets[i]);
135 		break;
136 	case SOCK_STREAM:
137 		tcp_socket_init(&sockets[i]);
138 		break;
139 	default:
140 		errno = EPROTOTYPE;
141 		break;
142 	}
143 
144 	if (errno != 0)
145 		return (-1);
146 
147 	/* IPv4 generic initialization. */
148 	ipv4_socket_init(&sockets[i]);
149 
150 	/* MAC generic initialization. */
151 	mac_socket_init(&sockets[i]);
152 
153 	return (i + SOCKETTYPE);
154 }
155 
156 int
157 getsockname(int s, struct sockaddr *name,  socklen_t *namelen)
158 {
159 	int i;
160 
161 	errno = 0;
162 	if ((i = so_check_fd(s, &errno)) == -1)
163 		return (-1);
164 
165 	if (*namelen < sizeof (struct sockaddr_in)) {
166 		errno = ENOMEM;
167 		return (-1);
168 	}
169 
170 	/* Structure assignment... */
171 	*((struct sockaddr_in *)name) = sockets[i].bind;
172 	*namelen = sizeof (struct sockaddr_in);
173 	return (0);
174 }
175 
176 /*
177  * The socket options we support are:
178  * SO_RCVTIMEO	-	Value is in msecs, and is of uint32_t.
179  * SO_DONTROUTE	-	Value is an int, and is a boolean (nonzero if set).
180  * SO_REUSEADDR -	Value is an int boolean.
181  * SO_RCVBUF -		Value is an int.
182  * SO_SNDBUF -		Value is an int.
183  */
184 int
185 getsockopt(int s, int level, int option, void *optval, socklen_t *optlen)
186 {
187 	int i;
188 
189 	errno = 0;
190 	if ((i = so_check_fd(s, &errno)) == -1)
191 		return (-1);
192 
193 	switch (level) {
194 	case SOL_SOCKET: {
195 		switch (option) {
196 		case SO_RCVTIMEO:
197 			if (*optlen == sizeof (uint32_t)) {
198 				*(uint32_t *)optval = sockets[i].in_timeout;
199 			} else {
200 				*optlen = 0;
201 				errno = EINVAL;
202 			}
203 			break;
204 		case SO_DONTROUTE:
205 			if (*optlen == sizeof (int)) {
206 				*(int *)optval =
207 				    (sockets[i].out_flags & SO_DONTROUTE);
208 			} else {
209 				*optlen = 0;
210 				errno = EINVAL;
211 			}
212 			break;
213 		case SO_REUSEADDR:
214 			if (*optlen == sizeof (int)) {
215 				*(int *)optval =
216 				    (sockets[i].so_opt & SO_REUSEADDR);
217 			} else {
218 				*optlen = 0;
219 				errno = EINVAL;
220 			}
221 			break;
222 		case SO_RCVBUF:
223 			if (*optlen == sizeof (int)) {
224 				*(int *)optval = sockets[i].so_rcvbuf;
225 			} else {
226 				*optlen = 0;
227 				errno = EINVAL;
228 			}
229 			break;
230 		case SO_SNDBUF:
231 			if (*optlen == sizeof (int)) {
232 				*(int *)optval = sockets[i].so_sndbuf;
233 			} else {
234 				*optlen = 0;
235 				errno = EINVAL;
236 			}
237 			break;
238 		case SO_LINGER:
239 			if (*optlen == sizeof (struct linger)) {
240 				/* struct copy */
241 				*(struct linger *)optval = sockets[i].so_linger;
242 			} else {
243 				*optlen = 0;
244 				errno = EINVAL;
245 			}
246 		default:
247 			errno = ENOPROTOOPT;
248 			break;
249 		}
250 		break;
251 	} /* case SOL_SOCKET */
252 	case IPPROTO_TCP:
253 	case IPPROTO_IP: {
254 		switch (option) {
255 		default:
256 			*optlen = 0;
257 			errno = ENOPROTOOPT;
258 			break;
259 		}
260 		break;
261 	} /* case IPPROTO_IP or IPPROTO_TCP */
262 	default:
263 		errno = ENOPROTOOPT;
264 		break;
265 	} /* switch (level) */
266 
267 	if (errno != 0)
268 		return (-1);
269 	else
270 		return (0);
271 }
272 
273 /*
274  * Generate a network-order source port from the privileged range if
275  * "reserved" is true, dynamic/private range otherwise. We consider the
276  * range of 512-1023 privileged ports as ports we can use. This mirrors
277  * historical rpc client practice for privileged port selection.
278  */
279 in_port_t
280 get_source_port(boolean_t reserved)
281 {
282 	static in_port_t	dynamic = IPPORT_DYNAMIC_START - 1,
283 				    rsvdport = (IPPORT_RESERVED / 2) - 1;
284 	in_port_t		p;
285 
286 	if (reserved) {
287 		if (++rsvdport >= IPPORT_RESERVED)
288 			p = rsvdport = IPPORT_RESERVED / 2;
289 		else
290 			p = rsvdport;
291 	} else
292 		p = ++dynamic;
293 
294 	return (htons(p));
295 }
296 
297 /*
298  * The socket options we support are:
299  * SO_RECVTIMEO	-	Value is uint32_t msecs.
300  * SO_DONTROUTE	-	Value is int boolean (nonzero == TRUE, zero == FALSE).
301  * SO_REUSEADDR -	value is int boolean.
302  * SO_RCVBUF -		Value is int.
303  * SO_SNDBUF -		Value is int.
304  */
305 int
306 setsockopt(int s, int level, int option, const void *optval, socklen_t optlen)
307 {
308 	int i;
309 
310 	errno = 0;
311 	if ((i = so_check_fd(s, &errno)) == -1)
312 		return (-1);
313 
314 	switch (level) {
315 	case SOL_SOCKET: {
316 		switch (option) {
317 		case SO_RCVTIMEO:
318 			if (optlen == sizeof (uint32_t))
319 				sockets[i].in_timeout = *(uint32_t *)optval;
320 			else {
321 				errno = EINVAL;
322 			}
323 			break;
324 		case SO_DONTROUTE:
325 			if (optlen == sizeof (int)) {
326 				if (*(int *)optval)
327 					sockets[i].out_flags |= SO_DONTROUTE;
328 				else
329 					sockets[i].out_flags &= ~SO_DONTROUTE;
330 			} else {
331 				errno = EINVAL;
332 			}
333 			break;
334 		case SO_REUSEADDR:
335 			if (optlen == sizeof (int)) {
336 				if (*(int *)optval)
337 					sockets[i].so_opt |= SO_REUSEADDR;
338 				else
339 					sockets[i].so_opt &= ~SO_REUSEADDR;
340 			} else {
341 				errno = EINVAL;
342 			}
343 			break;
344 		case SO_RCVBUF:
345 			if (optlen == sizeof (int)) {
346 				sockets[i].so_rcvbuf = *(int *)optval;
347 				if (sockets[i].so_rcvbuf > SO_MAX_BUF)
348 					sockets[i].so_rcvbuf = SO_MAX_BUF;
349 				(void) tcp_opt_set(sockets[i].pcb,
350 				    level, option, optval, optlen);
351 			} else {
352 				errno = EINVAL;
353 			}
354 			break;
355 		case SO_SNDBUF:
356 			if (optlen == sizeof (int)) {
357 				sockets[i].so_sndbuf = *(int *)optval;
358 				if (sockets[i].so_sndbuf > SO_MAX_BUF)
359 					sockets[i].so_sndbuf = SO_MAX_BUF;
360 				(void) tcp_opt_set(sockets[i].pcb,
361 				    level, option, optval, optlen);
362 			} else {
363 				errno = EINVAL;
364 			}
365 			break;
366 		case SO_LINGER:
367 			if (optlen == sizeof (struct linger)) {
368 				/* struct copy */
369 				sockets[i].so_linger = *(struct linger *)optval;
370 				(void) tcp_opt_set(sockets[i].pcb,
371 				    level, option, optval, optlen);
372 			} else {
373 				errno = EINVAL;
374 			}
375 			break;
376 		default:
377 			errno = ENOPROTOOPT;
378 			break;
379 		}
380 		break;
381 	} /* case SOL_SOCKET */
382 	case IPPROTO_TCP:
383 	case IPPROTO_IP: {
384 		switch (option) {
385 		default:
386 			errno = ENOPROTOOPT;
387 			break;
388 		}
389 		break;
390 	} /* case IPPROTO_IP  or IPPROTO_TCP */
391 	default:
392 		errno = ENOPROTOOPT;
393 		break;
394 	} /* switch (level) */
395 
396 	if (errno != 0)
397 		return (-1);
398 	else
399 		return (0);
400 }
401 
402 /*
403  * Shut down part of a full-duplex connection.
404  *
405  * Only supported for TCP sockets
406  */
407 int
408 shutdown(int s, int how)
409 {
410 	int sock_id;
411 	int i;
412 
413 	errno = 0;
414 	if ((sock_id = so_check_fd(s, &errno)) == -1)
415 		return (-1);
416 
417 	/* shutdown only supported for TCP sockets */
418 	if (sockets[sock_id].type != INETBOOT_STREAM) {
419 		errno = EOPNOTSUPP;
420 		return (-1);
421 	}
422 
423 	if (!(sockets[sock_id].so_state & SS_ISCONNECTED)) {
424 		errno = ENOTCONN;
425 		return (-1);
426 	}
427 
428 	switch (how) {
429 	case 0:
430 		sockets[sock_id].so_state |= SS_CANTRCVMORE;
431 		break;
432 	case 1:
433 		sockets[sock_id].so_state |= SS_CANTSENDMORE;
434 		break;
435 	case 2:
436 		sockets[sock_id].so_state |= (SS_CANTRCVMORE | SS_CANTSENDMORE);
437 		break;
438 	default:
439 		errno = EINVAL;
440 		return (-1);
441 	}
442 
443 	switch (sockets[sock_id].so_state &
444 				(SS_CANTRCVMORE | SS_CANTSENDMORE)) {
445 	case (SS_CANTRCVMORE | SS_CANTSENDMORE):
446 		/* Call lower level protocol close routine. */
447 		for (i = TRANSPORT_LVL; i >= MEDIA_LVL; i--) {
448 			if (sockets[sock_id].close[i] != NULL) {
449 				(void) sockets[sock_id].close[i](sock_id);
450 			}
451 		}
452 		nuke_grams(&sockets[sock_id].inq);
453 		break;
454 	case SS_CANTRCVMORE:
455 		nuke_grams(&sockets[sock_id].inq);
456 		break;
457 	case SS_CANTSENDMORE:
458 		/* Call lower level protocol close routine. */
459 		if (tcp_shutdown(sock_id) < 0)
460 			return (-1);
461 		break;
462 	default:
463 		errno = EINVAL;
464 		return (-1);
465 	}
466 
467 	return (0);
468 }
469 
470 /*
471  * "close" a socket.
472  */
473 int
474 socket_close(int s)
475 {
476 	int sock_id, i;
477 
478 	errno = 0;
479 	if ((sock_id = so_check_fd(s, &errno)) == -1)
480 		return (-1);
481 
482 	/* Call lower level protocol close routine. */
483 	for (i = TRANSPORT_LVL; i >= MEDIA_LVL; i--) {
484 		if (sockets[sock_id].close[i] != NULL) {
485 			/*
486 			 * Note that the close() routine of other
487 			 * layers can return an error.  But right
488 			 * now, the only mechanism to report that
489 			 * back is for the close() routine to set
490 			 * the errno and socket_close() will return
491 			 * an error.  But the close operation will
492 			 * not be stopped.
493 			 */
494 			(void) sockets[sock_id].close[i](sock_id);
495 		}
496 	}
497 
498 	/*
499 	 * Clear the input queue.  This has to be done
500 	 * after the lower level protocol close routines have been
501 	 * called as they may want to do something about the queue.
502 	 */
503 	nuke_grams(&sockets[sock_id].inq);
504 
505 	bzero((caddr_t)&sockets[sock_id], sizeof (struct inetboot_socket));
506 	sockets[sock_id].type = INETBOOT_UNUSED;
507 
508 	return (0);
509 }
510 
511 /*
512  * Read up to `nbyte' of data from socket `s' into `buf'; if non-zero,
513  * then give up after `read_timeout' seconds.  Returns the number of
514  * bytes read, or -1 on failure.
515  */
516 int
517 socket_read(int s, void *buf, size_t nbyte, int read_timeout)
518 {
519 	ssize_t	n;
520 	uint_t	start, diff;
521 	struct sockaddr from;
522 	uint_t fromlen = sizeof (from);
523 
524 	/*
525 	 * keep calling non-blocking recvfrom until something received
526 	 * or an error occurs
527 	 */
528 	start = prom_gettime();
529 	for (;;) {
530 		n = recvfrom(s, buf, nbyte, MSG_DONTWAIT, NULL, NULL);
531 		if (n == -1 && errno == EWOULDBLOCK) {
532 			diff = (uint_t)((prom_gettime() - start) + 500) / 1000;
533 			if (read_timeout != 0 && diff > read_timeout) {
534 				errno = EINTR;
535 				return (-1);
536 			}
537 		} else {
538 			return (n);
539 		}
540 	}
541 }
542 
543 /*
544  * Write up to `nbyte' bytes of data from `buf' to the address pointed to
545  * `addr' using socket `s'.  Returns the number of bytes writte on success,
546  * or -1 on failure.
547  */
548 int
549 socket_write(int s, const void *buf, size_t nbyte, struct sockaddr_in *addr)
550 {
551 	return (sendto(s, buf, nbyte, 0, (struct sockaddr *)addr,
552 	    sizeof (*addr)));
553 }
554 
555 static int
556 bind_check(int sock_id, const struct sockaddr *addr)
557 {
558 	int k;
559 	struct sockaddr_in *in_addr = (struct sockaddr_in *)addr;
560 
561 	/* Do not check for duplicate bind() if SO_REUSEADDR option is set. */
562 	if (! (sockets[sock_id].so_opt & SO_REUSEADDR)) {
563 		for (k = 0; k < MAXSOCKET; k++) {
564 			if (sockets[k].type != INETBOOT_UNUSED &&
565 			    sockets[k].proto == sockets[sock_id].proto &&
566 			    sockets[k].bound) {
567 				if ((sockets[k].bind.sin_addr.s_addr ==
568 				    in_addr->sin_addr.s_addr) &&
569 				    (sockets[k].bind.sin_port ==
570 				    in_addr->sin_port)) {
571 					errno = EADDRINUSE;
572 					return (-1);
573 				}
574 			}
575 		}
576 	}
577 	return (0);
578 }
579 
580 /* Assign a name to an unnamed socket. */
581 int
582 bind(int s, const struct sockaddr *name, socklen_t namelen)
583 {
584 	int i;
585 
586 	errno = 0;
587 
588 	if ((i = so_check_fd(s, &errno)) == -1)
589 		return (-1);
590 
591 	if (name == NULL) {
592 		/* unbind */
593 		if (sockets[i].bound) {
594 			bzero((caddr_t)&sockets[i].bind,
595 			    sizeof (struct sockaddr_in));
596 			sockets[i].bound = B_FALSE;
597 		}
598 		return (0);
599 	}
600 	if (namelen != sizeof (struct sockaddr_in) || name == NULL) {
601 		errno = EINVAL;
602 		return (-1);
603 	}
604 	if (name->sa_family != AF_INET) {
605 		errno = EAFNOSUPPORT;
606 		return (-1);
607 	}
608 	if (sockets[i].bound) {
609 		if (bcmp((caddr_t)&sockets[i].bind, (caddr_t)name,
610 		    namelen) == 0) {
611 			/* attempt to bind to same address ok... */
612 			return (0);
613 		}
614 		errno = EINVAL;	/* already bound */
615 		return (-1);
616 	}
617 
618 	if (errno != 0) {
619 		return (-1);
620 	}
621 
622 	/* Check for duplicate bind(). */
623 	if (bind_check(i, name) < 0)
624 		return (-1);
625 
626 	bcopy((caddr_t)name, (caddr_t)&sockets[i].bind, namelen);
627 	if (sockets[i].type == INETBOOT_STREAM) {
628 		if (tcp_bind(i) < 0) {
629 			return (-1);
630 		}
631 	}
632 	sockets[i].bound = B_TRUE;
633 
634 	return (0);
635 }
636 
637 static int
638 quickbind(int sock_id)
639 {
640 	int i;
641 	struct sockaddr_in addr;
642 
643 	/*
644 	 * XXX This needs more work.  Right now, if ipv4_setipaddr()
645 	 * have not been called, this will be wrong.  But we need
646 	 * something better.  Need to be revisited.
647 	 */
648 	ipv4_getipaddr(&addr.sin_addr);
649 	addr.sin_family = AF_INET;
650 
651 	for (i = SMALLEST_ANON_PORT; i <= LARGEST_ANON_PORT; i++) {
652 		addr.sin_port = htons(i);
653 		if (bind_check(sock_id, (struct sockaddr *)&addr) == 0)
654 			break;
655 	}
656 	/* Need to clear errno as it is probably set by bind_check(). */
657 	errno = 0;
658 
659 	if (i <= LARGEST_ANON_PORT) {
660 		bcopy((caddr_t)&addr, (caddr_t)&sockets[sock_id].bind,
661 		    sizeof (struct sockaddr_in));
662 		sockets[sock_id].bound = B_TRUE;
663 #ifdef DEBUG
664 		printf("quick bind done addr %s port %d\n",
665 		    inet_ntoa(sockets[sock_id].bind.sin_addr),
666 			ntohs(sockets[sock_id].bind.sin_port));
667 #endif
668 		return (0);
669 	} else {
670 		return (-1);
671 	}
672 }
673 
674 int
675 listen(int fd, int backlog)
676 {
677 	int sock_id;
678 
679 	errno = 0;
680 	if ((sock_id = so_check_fd(fd, &errno)) == -1)
681 		return (-1);
682 
683 	if (sockets[sock_id].type != INETBOOT_STREAM) {
684 		errno = EOPNOTSUPP;
685 		return (-1);
686 	}
687 	if (sockets[sock_id].so_error != 0) {
688 		errno = sockets[sock_id].so_error;
689 		return (-1);
690 	}
691 	return (tcp_listen(sock_id, backlog));
692 }
693 
694 int
695 accept(int fd, struct sockaddr *addr,  socklen_t *addr_len)
696 {
697 	int sock_id;
698 	int new_sd;
699 
700 	errno = 0;
701 	if ((sock_id = so_check_fd(fd, &errno)) == -1)
702 		return (-1);
703 
704 	if (sockets[sock_id].type != INETBOOT_STREAM) {
705 		errno = EOPNOTSUPP;
706 		return (-1);
707 	}
708 	if (sockets[sock_id].so_error != 0) {
709 		errno = sockets[sock_id].so_error;
710 		return (-1);
711 	}
712 	if ((new_sd = tcp_accept(sock_id, addr, addr_len)) == -1)
713 		return (-1);
714 	sock_id = so_check_fd(new_sd, &errno);
715 	sockets[sock_id].so_state |= SS_ISCONNECTED;
716 	return (new_sd);
717 }
718 
719 int
720 connect(int fd, const  struct sockaddr *addr, socklen_t addr_len)
721 {
722 	int sock_id;
723 	int so_type;
724 
725 	errno = 0;
726 	if ((sock_id = so_check_fd(fd, &errno)) == -1)
727 		return (-1);
728 
729 	so_type = sockets[sock_id].type;
730 
731 	if (addr == NULL || addr_len == 0) {
732 		errno = EINVAL;
733 		return (-1);
734 	}
735 	/* Don't allow connect for raw socket. */
736 	if (so_type == INETBOOT_RAW) {
737 		errno = EPROTONOSUPPORT;
738 		return (-1);
739 	}
740 
741 	if (sockets[sock_id].so_state & SS_ISCONNECTED) {
742 		errno = EINVAL;
743 		return (-1);
744 	}
745 
746 	if (sockets[sock_id].so_error != 0) {
747 		errno = sockets[sock_id].so_error;
748 		return (-1);
749 	}
750 
751 	/* If the socket is not bound, we need to do a quick bind. */
752 	if (!sockets[sock_id].bound) {
753 		/* For TCP socket, just call tcp_bind(). */
754 		if (so_type == INETBOOT_STREAM) {
755 			if (tcp_bind(sock_id) < 0)
756 				return (-1);
757 		} else {
758 			if (quickbind(sock_id) < 0) {
759 				errno = EADDRNOTAVAIL;
760 				return (-1);
761 			}
762 		}
763 	}
764 	/* Should do some sanity check for addr .... */
765 	bcopy((caddr_t)addr, &sockets[sock_id].remote,
766 	    sizeof (struct sockaddr_in));
767 
768 	if (sockets[sock_id].type == INETBOOT_STREAM) {
769 		/* Call TCP connect routine. */
770 		if (tcp_connect(sock_id) == 0)
771 			sockets[sock_id].so_state |= SS_ISCONNECTED;
772 		else {
773 			if (sockets[sock_id].so_error != 0)
774 				errno = sockets[sock_id].so_error;
775 			return (-1);
776 		}
777 	} else {
778 		sockets[sock_id].so_state |= SS_ISCONNECTED;
779 	}
780 	return (0);
781 }
782 
783 /* Just a wrapper around recvfrom(). */
784 ssize_t
785 recv(int s, void *buf, size_t len, int flags)
786 {
787 	return (recvfrom(s, buf, len, flags, NULL, NULL));
788 }
789 
790 /*
791  * Receive messages from a connectionless socket. Legal flags are 0 and
792  * MSG_DONTWAIT. MSG_WAITALL is not currently supported.
793  *
794  * Returns length of message for success, -1 if error occurred.
795  */
796 ssize_t
797 recvfrom(int s, void *buf, size_t len, int flags, struct sockaddr *from,
798     socklen_t *fromlen)
799 {
800 	int			sock_id, i;
801 	ssize_t			datalen, bytes = 0;
802 	struct inetgram		*icp;
803 	enum SockType		so_type;
804 	char			*tmp_buf;
805 	mblk_t			*mp;
806 
807 	errno = 0;
808 
809 	if ((sock_id = so_check_fd(s, &errno)) == -1) {
810 		errno = EINVAL;
811 		return (-1);
812 	}
813 
814 	if (sockets[sock_id].type == INETBOOT_STREAM &&
815 	    !(sockets[sock_id].so_state & SS_ISCONNECTED)) {
816 		errno = ENOTCONN;
817 		return (-1);
818 	}
819 
820 	if (buf == NULL || len == 0) {
821 		errno = EINVAL;
822 		return (-1);
823 	}
824 	/* Yup - MSG_WAITALL not implemented */
825 	if ((flags & ~MSG_DONTWAIT) != 0) {
826 		errno = EINVAL;
827 		return (-1);
828 	}
829 
830 retry:
831 	if (sockets[sock_id].inq == NULL) {
832 		/* Go out and check the wire */
833 		for (i = MEDIA_LVL; i < APP_LVL; i++) {
834 			if (sockets[sock_id].input[i] != NULL) {
835 				if (sockets[sock_id].input[i](sock_id) < 0) {
836 					if (sockets[sock_id].so_error != 0) {
837 						errno =
838 						    sockets[sock_id].so_error;
839 					}
840 					return (-1);
841 				}
842 			}
843 		}
844 	}
845 
846 	so_type = sockets[sock_id].type;
847 
848 	/* Remove unknown inetgrams from the head of inq.  Can this happen? */
849 	while ((icp = sockets[sock_id].inq) != NULL) {
850 		if ((so_type == INETBOOT_DGRAM ||
851 		    so_type == INETBOOT_STREAM) &&
852 		    icp->igm_level != APP_LVL) {
853 #ifdef	DEBUG
854 			printf("recvfrom: unexpected level %d frame found\n",
855 			    icp->igm_level);
856 #endif	/* DEBUG */
857 			del_gram(&sockets[sock_id].inq, icp, B_TRUE);
858 			continue;
859 		} else {
860 			break;
861 		}
862 	}
863 
864 
865 	if (icp == NULL) {
866 		/*
867 		 * Checking for error should be done everytime a lower layer
868 		 * input routing is called.  For example, if TCP gets a RST,
869 		 * this should be reported asap.
870 		 */
871 		if (sockets[sock_id].so_state & SS_CANTRCVMORE) {
872 			if (sockets[sock_id].so_error != 0) {
873 				errno = sockets[sock_id].so_error;
874 				return (-1);
875 			} else {
876 				return (0);
877 			}
878 		}
879 
880 		if ((flags & MSG_DONTWAIT) == 0)
881 			goto retry;	/* wait forever */
882 
883 		/* no data */
884 		errno = EWOULDBLOCK;
885 		return (-1);
886 	}
887 
888 	if (from != NULL && fromlen != NULL) {
889 		switch (so_type) {
890 		case INETBOOT_STREAM:
891 			/* Need to copy from the socket's remote address. */
892 			bcopy(&(sockets[sock_id].remote), from, MIN(*fromlen,
893 			    sizeof (struct sockaddr_in)));
894 			break;
895 		case INETBOOT_RAW:
896 		case INETBOOT_DGRAM:
897 		default:
898 			if (*fromlen > sizeof (icp->igm_saddr))
899 				*fromlen = sizeof (icp->igm_saddr);
900 			bcopy((caddr_t)&(icp->igm_saddr), (caddr_t)from,
901 			    MIN(*fromlen, sizeof (struct sockaddr_in)));
902 			break;
903 		}
904 	}
905 
906 	mp = icp->igm_mp;
907 	switch (so_type) {
908 	case INETBOOT_STREAM:
909 		/*
910 		 * If the message has igm_id == TCP_CALLB_MAGIC_ID, we need
911 		 * to drain the data held by tcp and try again.
912 		 */
913 		if (icp->igm_id == TCP_CALLB_MAGIC_ID) {
914 			del_gram(&sockets[sock_id].inq, icp, B_TRUE);
915 			tcp_rcv_drain_sock(sock_id);
916 			goto retry;
917 		}
918 
919 		/* TCP should put only user data in the inetgram. */
920 		tmp_buf = (char *)buf;
921 		while (len > 0 && icp != NULL) {
922 			datalen = mp->b_wptr - mp->b_rptr;
923 			if (len < datalen) {
924 				bcopy(mp->b_rptr, tmp_buf, len);
925 				bytes += len;
926 				mp->b_rptr += len;
927 				break;
928 			} else {
929 				bcopy(mp->b_rptr, tmp_buf, datalen);
930 				len -= datalen;
931 				bytes += datalen;
932 				tmp_buf += datalen;
933 				del_gram(&sockets[sock_id].inq, icp, B_TRUE);
934 
935 				/*
936 				 * If we have any embedded magic messages just
937 				 * drop them.
938 				 */
939 				while ((icp = sockets[sock_id].inq) != NULL) {
940 					if (icp->igm_id != TCP_CALLB_MAGIC_ID)
941 						break;
942 					del_gram(&sockets[sock_id].inq, icp,
943 						B_TRUE);
944 				}
945 
946 				if (icp == NULL)
947 					break;
948 				mp = icp->igm_mp;
949 			}
950 		}
951 		sockets[sock_id].so_rcvbuf += (int32_t)bytes;
952 		break;
953 	case INETBOOT_DGRAM:
954 		datalen = mp->b_wptr - mp->b_rptr;
955 		if (len < datalen)
956 			bytes = len;
957 		else
958 			bytes = datalen;
959 		bcopy(mp->b_rptr, buf, bytes);
960 		del_gram(&sockets[sock_id].inq, icp, B_TRUE);
961 		break;
962 	case INETBOOT_RAW:
963 	default:
964 		datalen = mp->b_wptr - mp->b_rptr;
965 		if (len < datalen)
966 			bytes = len;
967 		else
968 			bytes = datalen;
969 		bcopy(mp->b_rptr, buf, bytes);
970 		del_gram(&sockets[sock_id].inq, icp, B_TRUE);
971 		break;
972 	}
973 
974 #ifdef	DEBUG
975 	printf("recvfrom(%d): data: (0x%x,%d)\n", sock_id,
976 	    (icp != NULL) ? icp->igm_mp : 0, bytes);
977 #endif	/* DEBUG */
978 	return (bytes);
979 }
980 
981 
982 /* Just a wrapper around sendto(). */
983 ssize_t
984 send(int s, const void *msg, size_t len, int flags)
985 {
986 	return (sendto(s, msg, len, flags, NULL, 0));
987 }
988 
989 /*
990  * Transmit a message through a socket.
991  *
992  * Supported flags: MSG_DONTROUTE or 0.
993  */
994 ssize_t
995 sendto(int s, const void *msg, size_t len, int flags, const struct sockaddr *to,
996     socklen_t tolen)
997 {
998 	enum SockType so_type;
999 	int sock_id;
1000 	ssize_t bytes;
1001 
1002 	errno = 0;
1003 
1004 	if ((sock_id = so_check_fd(s, &errno)) == -1) {
1005 		return (-1);
1006 	}
1007 	if (msg == NULL) {
1008 		errno = EINVAL;
1009 		return (-1);
1010 	}
1011 	so_type = sockets[sock_id].type;
1012 	if ((flags & ~MSG_DONTROUTE) != 0) {
1013 		errno = EINVAL;
1014 		return (-1);
1015 	}
1016 	if (sockets[sock_id].so_error != 0) {
1017 		errno = sockets[sock_id].so_error;
1018 		return (-1);
1019 	}
1020 	if (to != NULL && to->sa_family != AF_INET) {
1021 		errno = EAFNOSUPPORT;
1022 		return (-1);
1023 	}
1024 
1025 	switch (so_type) {
1026 	case INETBOOT_RAW:
1027 	case INETBOOT_DGRAM:
1028 		if (!(sockets[sock_id].so_state & SS_ISCONNECTED) &&
1029 		    (to == NULL || tolen != sizeof (struct sockaddr_in))) {
1030 			errno = EINVAL;
1031 			return (-1);
1032 		}
1033 		bytes = dgram_sendto(sock_id, msg, len, flags, to, tolen);
1034 		break;
1035 	case INETBOOT_STREAM:
1036 		if (!((sockets[sock_id].so_state & SS_ISCONNECTED) ||
1037 		    (sockets[sock_id].so_state & SS_ISCONNECTING))) {
1038 			errno = EINVAL;
1039 			return (-1);
1040 		}
1041 		if (sockets[sock_id].so_state & SS_CANTSENDMORE) {
1042 			errno = EPIPE;
1043 			return (-1);
1044 		}
1045 		bytes = stream_sendto(sock_id, msg, len, flags);
1046 		break;
1047 	default:
1048 		/* Should not happen... */
1049 		errno = EPROTOTYPE;
1050 		return (-1);
1051 	}
1052 	return (bytes);
1053 }
1054 
1055 static ssize_t
1056 dgram_sendto(int i, const void *msg, size_t len, int flags,
1057     const struct sockaddr *to, int tolen)
1058 {
1059 	struct inetgram		oc;
1060 	int			l, offset;
1061 	size_t			tlen;
1062 	mblk_t			*mp;
1063 
1064 #ifdef	DEBUG
1065 	{
1066 	struct sockaddr_in *sin = (struct sockaddr_in *)to;
1067 	printf("sendto(%d): msg of length: %d sent to port %d and host: %s\n",
1068 	    i, len, ntohs(sin->sin_port), inet_ntoa(sin->sin_addr));
1069 	}
1070 #endif	/* DEBUG */
1071 
1072 	nuke_grams(&sockets[i].inq); /* flush the input queue */
1073 
1074 	/* calculate offset for data */
1075 	offset = sockets[i].headerlen[MEDIA_LVL](NULL) +
1076 	    (sockets[i].headerlen[NETWORK_LVL])(NULL);
1077 
1078 	bzero((caddr_t)&oc, sizeof (oc));
1079 	if (sockets[i].type != INETBOOT_RAW) {
1080 		offset += (sockets[i].headerlen[TRANSPORT_LVL])(NULL);
1081 		oc.igm_level = TRANSPORT_LVL;
1082 	} else
1083 		oc.igm_level = NETWORK_LVL;
1084 	oc.igm_oflags = flags;
1085 
1086 	if (to != NULL) {
1087 		bcopy((caddr_t)to, (caddr_t)&oc.igm_saddr, tolen);
1088 	} else {
1089 		bcopy((caddr_t)&sockets[i].remote, (caddr_t)&oc.igm_saddr,
1090 		    sizeof (struct sockaddr_in));
1091 	}
1092 
1093 	/* Get a legal source port if the socket isn't bound. */
1094 	if (sockets[i].bound == B_FALSE &&
1095 	    ntohs(oc.igm_saddr.sin_port == 0)) {
1096 		((struct sockaddr_in *)&oc.igm_saddr)->sin_port =
1097 		    get_source_port(B_FALSE);
1098 	}
1099 
1100 	/* Round up to 16bit value for checksum purposes */
1101 	if (sockets[i].type == INETBOOT_DGRAM) {
1102 		tlen = ((len + sizeof (uint16_t) - 1) &
1103 		    ~(sizeof (uint16_t) - 1));
1104 	} else
1105 		tlen = len;
1106 
1107 	if ((oc.igm_mp = allocb(tlen + offset, 0)) == NULL) {
1108 		errno = ENOMEM;
1109 		return (-1);
1110 	}
1111 	mp = oc.igm_mp;
1112 	mp->b_rptr = mp->b_wptr += offset;
1113 	bcopy((caddr_t)msg, mp->b_wptr, len);
1114 	mp->b_wptr += len;
1115 	for (l = TRANSPORT_LVL; l >= MEDIA_LVL; l--) {
1116 		if (sockets[i].output[l] != NULL) {
1117 			if (sockets[i].output[l](i, &oc) < 0) {
1118 				freeb(mp);
1119 				if (errno == 0)
1120 					errno = EIO;
1121 				return (-1);
1122 			}
1123 		}
1124 	}
1125 	freeb(mp);
1126 	return (len);
1127 }
1128 
1129 /* ARGSUSED */
1130 static ssize_t
1131 stream_sendto(int i, const void *msg, size_t len, int flags)
1132 {
1133 	int cnt;
1134 
1135 	assert(sockets[i].pcb != NULL);
1136 
1137 	/*
1138 	 * Call directly TCP's send routine.  We do this because TCP
1139 	 * needs to decide whether to send out the data.
1140 	 *
1141 	 * Note also that currently, TCP ignores all flags passed in for
1142 	 * TCP socket.
1143 	 */
1144 	if ((cnt = tcp_send(i, sockets[i].pcb, msg, len)) < 0) {
1145 		if (sockets[i].so_error != 0)
1146 			errno = sockets[i].so_error;
1147 		return (-1);
1148 	} else {
1149 		return (cnt);
1150 	}
1151 }
1152 
1153 /*
1154  * Returns ptr to the last inetgram in the list, or null if list is null
1155  */
1156 struct inetgram *
1157 last_gram(struct inetgram *igp)
1158 {
1159 	struct inetgram	*wp;
1160 	for (wp = igp; wp != NULL; wp = wp->igm_next) {
1161 		if (wp->igm_next == NULL)
1162 			return (wp);
1163 	}
1164 	return (NULL);
1165 }
1166 
1167 /*
1168  * Adds an inetgram or list of inetgrams to the end of the list.
1169  */
1170 void
1171 add_grams(struct inetgram **igpp, struct inetgram *newgp)
1172 {
1173 	struct inetgram	 *wp;
1174 
1175 	if (newgp == NULL)
1176 		return;
1177 
1178 	if (*igpp == NULL)
1179 		*igpp = newgp;
1180 	else {
1181 		wp = last_gram(*igpp);
1182 		wp->igm_next = newgp;
1183 	}
1184 }
1185 
1186 /*
1187  * Nuke a whole list of grams.
1188  */
1189 void
1190 nuke_grams(struct inetgram **lgpp)
1191 {
1192 	while (*lgpp != NULL)
1193 		del_gram(lgpp, *lgpp, B_TRUE);
1194 }
1195 
1196 /*
1197  * Remove the referenced inetgram. List is altered accordingly. Destroy the
1198  * referenced inetgram if freeit is B_TRUE.
1199  */
1200 void
1201 del_gram(struct inetgram **lgpp, struct inetgram *igp, int freeit)
1202 {
1203 	struct inetgram	*wp, *pp = NULL;
1204 
1205 	if (lgpp == NULL || igp == NULL)
1206 		return;
1207 
1208 	wp = *lgpp;
1209 	while (wp != NULL) {
1210 		if (wp == igp) {
1211 			/* detach wp from the list */
1212 			if (*lgpp == wp)
1213 				*lgpp = (*lgpp)->igm_next;
1214 			else
1215 				pp->igm_next = wp->igm_next;
1216 			igp->igm_next = NULL;
1217 
1218 			if (freeit) {
1219 				if (igp->igm_mp != NULL)
1220 					freeb(igp->igm_mp);
1221 				bkmem_free((caddr_t)igp,
1222 				    sizeof (struct inetgram));
1223 			}
1224 			break;
1225 		}
1226 		pp = wp;
1227 		wp = wp->igm_next;
1228 	}
1229 }
1230 
1231 struct nct_t nct[] = {
1232 	"bootp",	NCT_BOOTP_DHCP,
1233 	"dhcp",		NCT_BOOTP_DHCP,
1234 	"rarp",		NCT_RARP_BOOTPARAMS,
1235 	"manual",	NCT_MANUAL
1236 };
1237 int	nct_entries = sizeof (nct) / sizeof (nct[0]);
1238 
1239 /*
1240  * Figure out from the bootpath what kind of network configuration strategy
1241  * we should use. Returns the network config strategy.
1242  */
1243 int
1244 get_netconfig_strategy(void)
1245 {
1246 	int	i;
1247 #if !defined(__i386)
1248 	/* sparc */
1249 #define	ISSPACE(c) (c == ' ' || c == '\t' || c == '\n' || c == '\0')
1250 	char	lbootpath[OBP_MAXPATHLEN];
1251 	char	net_options[NCT_BUFSIZE];
1252 	char	*op, *nop, *sp;
1253 	dnode_t	cn;
1254 	int	proplen;
1255 
1256 	/* If the PROM DHCP cache exists, we're done */
1257 	if (prom_cached_reply(B_TRUE))
1258 		return (NCT_BOOTP_DHCP);
1259 
1260 	/*
1261 	 *	Newer (version 4) PROMs will put the name in the
1262 	 *	"net-config-strategy" property.
1263 	 */
1264 	cn = prom_finddevice("/chosen");
1265 	if ((proplen = prom_getproplen(cn, "net-config-strategy")) <
1266 	    sizeof (net_options)) {
1267 		(void) prom_getprop(cn, "net-config-strategy", net_options);
1268 		net_options[proplen] = '\0';
1269 	} else {
1270 
1271 		/*
1272 		 * We're reduced to sacanning bootpath for the prototol to use.
1273 		 * Since there was no "net-config-strategy" property, this is
1274 		 * an old PROM, so we need to excise any extraneous key/value
1275 		 * initializations from bootpath[].
1276 		 */
1277 		for (op = prom_bootpath(), sp = lbootpath; op != NULL &&
1278 		    !ISSPACE(*op); sp++, op++)
1279 			*sp = *op;
1280 		*sp = '\0';
1281 		/* find the last '/' (in the device path) */
1282 		if ((op = strrchr(lbootpath, '/')) == NULL)	/* last '/' */
1283 			op = lbootpath;
1284 		else
1285 			op++;
1286 		/* then look for the ':' separating it from the protocol */
1287 		while (*op != ':' && *op != '\0')
1288 			op++;
1289 
1290 		if (*op == ':') {
1291 			for (nop = net_options, op++;
1292 			    *op != '\0' && *op != '/' && !ISSPACE(*op) &&
1293 			    nop < &net_options[NCT_BUFSIZE]; nop++, op++)
1294 				*nop = *op;
1295 			*nop = '\0';
1296 		} else
1297 			net_options[0] = '\0';
1298 	}
1299 
1300 #undef	ISSPACE
1301 #else
1302 	/* i86 */
1303 	extern struct bootops bootops;
1304 	extern int bgetprop(struct bootops *, char *, caddr_t, int, phandle_t);
1305 	char	net_options[MAXNAMELEN];
1306 
1307 	/*
1308 	 * Look at net-config-strategy boot property to determine what protocol
1309 	 * will be used.
1310 	 */
1311 	(void) bgetprop(&bootops, "net-config-strategy", net_options,
1312 	    sizeof (net_options), 0);
1313 
1314 #endif	/* __i386 */
1315 
1316 	for (i = 0; i < nct_entries; i++)
1317 		if (strcmp(net_options, nct[i].p_name) == 0)
1318 			return (nct[i].p_id);
1319 
1320 	return (NCT_DEFAULT);
1321 }
1322 
1323 /* Modified STREAM routines for ease of porting core TCP code. */
1324 
1325 /*ARGSUSED*/
1326 mblk_t *
1327 allocb(size_t size, uint_t pri)
1328 {
1329 	unsigned char *base;
1330 	mblk_t *mp;
1331 
1332 	if ((mp = (mblk_t *)bkmem_zalloc(sizeof (mblk_t))) == NULL)
1333 		return (NULL);
1334 	if ((base = (unsigned char *)bkmem_zalloc(size)) == NULL)
1335 		return (NULL);
1336 
1337 	mp->b_next = mp->b_prev = mp->b_cont = NULL;
1338 	mp->b_rptr = mp->b_wptr = mp->b_datap = (unsigned char *)base;
1339 	mp->b_size = size;
1340 
1341 	return (mp);
1342 }
1343 
1344 void
1345 freeb(mblk_t *mp)
1346 {
1347 #ifdef DEBUG
1348 	printf("freeb datap %x\n", mp->b_datap);
1349 #endif
1350 	bkmem_free((caddr_t)(mp->b_datap), mp->b_size);
1351 #ifdef DEBUG
1352 	printf("freeb mp %x\n", mp);
1353 #endif
1354 	bkmem_free((caddr_t)mp, sizeof (mblk_t));
1355 }
1356 
1357 void
1358 freemsg(mblk_t *mp)
1359 {
1360 	while (mp) {
1361 		mblk_t *mp_cont = mp->b_cont;
1362 
1363 		freeb(mp);
1364 		mp = mp_cont;
1365 	}
1366 }
1367 
1368 mblk_t *
1369 copyb(mblk_t *bp)
1370 {
1371 	mblk_t *nbp;
1372 	unsigned char *ndp;
1373 
1374 	assert((uintptr_t)(bp->b_wptr - bp->b_rptr) >= 0);
1375 
1376 	if (!(nbp = allocb(bp->b_size, 0)))
1377 		return (NULL);
1378 	nbp->b_cont = NULL;
1379 	ndp = nbp->b_datap;
1380 
1381 	nbp->b_rptr = ndp + (bp->b_rptr - bp->b_datap);
1382 	nbp->b_wptr = nbp->b_rptr + (bp->b_wptr - bp->b_rptr);
1383 	bcopy(bp->b_datap, nbp->b_datap, bp->b_size);
1384 	return (nbp);
1385 }
1386 
1387 /* To simplify things, dupb() is implemented as copyb(). */
1388 mblk_t *
1389 dupb(mblk_t *mp)
1390 {
1391 	return (copyb(mp));
1392 }
1393 
1394 /*
1395  * get number of data bytes in message
1396  */
1397 size_t
1398 msgdsize(mblk_t *bp)
1399 {
1400 	size_t count = 0;
1401 
1402 	for (; bp != NULL; bp = bp->b_cont) {
1403 		assert(bp->b_wptr >= bp->b_rptr);
1404 		count += bp->b_wptr - bp->b_rptr;
1405 	}
1406 	return (count);
1407 }
1408