1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 *
25 * socket.c, Code implementing a simple socket interface.
26 */
27
28 #include <sys/types.h>
29 #include "socket_impl.h"
30 #include <sys/isa_defs.h>
31 #include <sys/sysmacros.h>
32 #include <sys/bootconf.h>
33 #include <sys/socket.h>
34 #include <netinet/in.h>
35 #include <netinet/ip.h>
36 #include <netinet/tcp.h>
37 #include <sys/uio.h>
38 #include <sys/salib.h>
39 #include "socket_inet.h"
40 #include "ipv4.h"
41 #include "ipv4_impl.h"
42 #include "udp_inet.h"
43 #include "tcp_inet.h"
44 #include "mac.h"
45 #include "mac_impl.h"
46 #include <sys/promif.h>
47
48 struct inetboot_socket sockets[MAXSOCKET] = { 0 };
49
50 /* Default send and receive socket buffer size */
51 #define SO_DEF_SNDBUF 48*1024
52 #define SO_DEF_RCVBUF 48*1024
53
54 /* Default max socket buffer size */
55 #define SO_MAX_BUF 4*1024*1024
56
57 static ssize_t dgram_sendto(int, const void *, size_t, int,
58 const struct sockaddr *, int);
59 static ssize_t stream_sendto(int, const void *, size_t, int);
60 static int bind_check(int, const struct sockaddr *);
61 static int quickbind(int);
62
63 /* Check the validity of a fd and return the socket index of that fd. */
64 int
so_check_fd(int fd,int * errno)65 so_check_fd(int fd, int *errno)
66 {
67 int i;
68
69 i = FD_TO_SOCKET(fd);
70 if (i < 0 || i >= MAXSOCKET) {
71 *errno = ENOTSOCK;
72 return (-1);
73 }
74 if (sockets[i].type == INETBOOT_UNUSED) {
75 *errno = ENOTSOCK;
76 return (-1);
77 }
78 return (i);
79 }
80
81 /*
82 * Create an endpoint for network communication. Returns a descriptor.
83 *
84 * Notes:
85 * Only PF_INET communication domains are supported. Within
86 * this domain, only SOCK_RAW, SOCK_DGRAM and SOCK_STREAM types are
87 * supported.
88 */
89 int
socket(int domain,int type,int protocol)90 socket(int domain, int type, int protocol)
91 {
92 static int sock_initialized;
93 int i;
94
95 errno = 0;
96
97 if (!sock_initialized) {
98 for (i = 0; i < MAXSOCKET; i++)
99 sockets[i].type = INETBOOT_UNUSED;
100 sock_initialized = B_TRUE;
101 }
102 if (domain != AF_INET) {
103 errno = EPROTONOSUPPORT;
104 return (-1);
105 }
106
107 /* Find available socket */
108 for (i = 0; i < MAXSOCKET; i++) {
109 if (sockets[i].type == INETBOOT_UNUSED)
110 break;
111 }
112 if (i >= MAXSOCKET) {
113 errno = EMFILE; /* No slots left. */
114 return (-1);
115 }
116
117 /* Some socket initialization... */
118 sockets[i].so_rcvbuf = SO_DEF_RCVBUF;
119 sockets[i].so_sndbuf = SO_DEF_SNDBUF;
120
121 /*
122 * Note that we ignore the protocol field for SOCK_DGRAM and
123 * SOCK_STREAM. When we support different protocols in future,
124 * this needs to be changed.
125 */
126 switch (type) {
127 case SOCK_RAW:
128 ipv4_raw_socket(&sockets[i], (uint8_t)protocol);
129 break;
130 case SOCK_DGRAM:
131 udp_socket_init(&sockets[i]);
132 break;
133 case SOCK_STREAM:
134 tcp_socket_init(&sockets[i]);
135 break;
136 default:
137 errno = EPROTOTYPE;
138 break;
139 }
140
141 if (errno != 0)
142 return (-1);
143
144 /* IPv4 generic initialization. */
145 ipv4_socket_init(&sockets[i]);
146
147 /* MAC generic initialization. */
148 mac_socket_init(&sockets[i]);
149
150 return (i + SOCKETTYPE);
151 }
152
153 int
getsockname(int s,struct sockaddr * name,socklen_t * namelen)154 getsockname(int s, struct sockaddr *name, socklen_t *namelen)
155 {
156 int i;
157
158 errno = 0;
159 if ((i = so_check_fd(s, &errno)) == -1)
160 return (-1);
161
162 if (*namelen < sizeof (struct sockaddr_in)) {
163 errno = ENOMEM;
164 return (-1);
165 }
166
167 /* Structure assignment... */
168 *((struct sockaddr_in *)name) = sockets[i].bind;
169 *namelen = sizeof (struct sockaddr_in);
170 return (0);
171 }
172
173 /*
174 * The socket options we support are:
175 * SO_RCVTIMEO - Value is in msecs, and is of uint32_t.
176 * SO_DONTROUTE - Value is an int, and is a boolean (nonzero if set).
177 * SO_REUSEADDR - Value is an int boolean.
178 * SO_RCVBUF - Value is an int.
179 * SO_SNDBUF - Value is an int.
180 */
181 int
getsockopt(int s,int level,int option,void * optval,socklen_t * optlen)182 getsockopt(int s, int level, int option, void *optval, socklen_t *optlen)
183 {
184 int i;
185
186 errno = 0;
187 if ((i = so_check_fd(s, &errno)) == -1)
188 return (-1);
189
190 switch (level) {
191 case SOL_SOCKET: {
192 switch (option) {
193 case SO_RCVTIMEO:
194 if (*optlen == sizeof (uint32_t)) {
195 *(uint32_t *)optval = sockets[i].in_timeout;
196 } else {
197 *optlen = 0;
198 errno = EINVAL;
199 }
200 break;
201 case SO_DONTROUTE:
202 if (*optlen == sizeof (int)) {
203 *(int *)optval =
204 (sockets[i].out_flags & SO_DONTROUTE);
205 } else {
206 *optlen = 0;
207 errno = EINVAL;
208 }
209 break;
210 case SO_REUSEADDR:
211 if (*optlen == sizeof (int)) {
212 *(int *)optval =
213 (sockets[i].so_opt & SO_REUSEADDR);
214 } else {
215 *optlen = 0;
216 errno = EINVAL;
217 }
218 break;
219 case SO_RCVBUF:
220 if (*optlen == sizeof (int)) {
221 *(int *)optval = sockets[i].so_rcvbuf;
222 } else {
223 *optlen = 0;
224 errno = EINVAL;
225 }
226 break;
227 case SO_SNDBUF:
228 if (*optlen == sizeof (int)) {
229 *(int *)optval = sockets[i].so_sndbuf;
230 } else {
231 *optlen = 0;
232 errno = EINVAL;
233 }
234 break;
235 case SO_LINGER:
236 if (*optlen == sizeof (struct linger)) {
237 /* struct copy */
238 *(struct linger *)optval = sockets[i].so_linger;
239 } else {
240 *optlen = 0;
241 errno = EINVAL;
242 }
243 break;
244 default:
245 errno = ENOPROTOOPT;
246 break;
247 }
248 break;
249 } /* case SOL_SOCKET */
250 case IPPROTO_TCP:
251 case IPPROTO_IP: {
252 switch (option) {
253 default:
254 *optlen = 0;
255 errno = ENOPROTOOPT;
256 break;
257 }
258 break;
259 } /* case IPPROTO_IP or IPPROTO_TCP */
260 default:
261 errno = ENOPROTOOPT;
262 break;
263 } /* switch (level) */
264
265 if (errno != 0)
266 return (-1);
267 else
268 return (0);
269 }
270
271 /*
272 * Generate a network-order source port from the privileged range if
273 * "reserved" is true, dynamic/private range otherwise. We consider the
274 * range of 512-1023 privileged ports as ports we can use. This mirrors
275 * historical rpc client practice for privileged port selection.
276 */
277 in_port_t
get_source_port(boolean_t reserved)278 get_source_port(boolean_t reserved)
279 {
280 static in_port_t dynamic = IPPORT_DYNAMIC_START - 1,
281 rsvdport = (IPPORT_RESERVED / 2) - 1;
282 in_port_t p;
283
284 if (reserved) {
285 if (++rsvdport >= IPPORT_RESERVED)
286 p = rsvdport = IPPORT_RESERVED / 2;
287 else
288 p = rsvdport;
289 } else
290 p = ++dynamic;
291
292 return (htons(p));
293 }
294
295 /*
296 * The socket options we support are:
297 * SO_RECVTIMEO - Value is uint32_t msecs.
298 * SO_DONTROUTE - Value is int boolean (nonzero == TRUE, zero == FALSE).
299 * SO_REUSEADDR - value is int boolean.
300 * SO_RCVBUF - Value is int.
301 * SO_SNDBUF - Value is int.
302 */
303 int
setsockopt(int s,int level,int option,const void * optval,socklen_t optlen)304 setsockopt(int s, int level, int option, const void *optval, socklen_t optlen)
305 {
306 int i;
307
308 errno = 0;
309 if ((i = so_check_fd(s, &errno)) == -1)
310 return (-1);
311
312 switch (level) {
313 case SOL_SOCKET: {
314 switch (option) {
315 case SO_RCVTIMEO:
316 if (optlen == sizeof (uint32_t))
317 sockets[i].in_timeout = *(uint32_t *)optval;
318 else {
319 errno = EINVAL;
320 }
321 break;
322 case SO_DONTROUTE:
323 if (optlen == sizeof (int)) {
324 if (*(int *)optval)
325 sockets[i].out_flags |= SO_DONTROUTE;
326 else
327 sockets[i].out_flags &= ~SO_DONTROUTE;
328 } else {
329 errno = EINVAL;
330 }
331 break;
332 case SO_REUSEADDR:
333 if (optlen == sizeof (int)) {
334 if (*(int *)optval)
335 sockets[i].so_opt |= SO_REUSEADDR;
336 else
337 sockets[i].so_opt &= ~SO_REUSEADDR;
338 } else {
339 errno = EINVAL;
340 }
341 break;
342 case SO_RCVBUF:
343 if (optlen == sizeof (int)) {
344 sockets[i].so_rcvbuf = *(int *)optval;
345 if (sockets[i].so_rcvbuf > SO_MAX_BUF)
346 sockets[i].so_rcvbuf = SO_MAX_BUF;
347 (void) tcp_opt_set(sockets[i].pcb,
348 level, option, optval, optlen);
349 } else {
350 errno = EINVAL;
351 }
352 break;
353 case SO_SNDBUF:
354 if (optlen == sizeof (int)) {
355 sockets[i].so_sndbuf = *(int *)optval;
356 if (sockets[i].so_sndbuf > SO_MAX_BUF)
357 sockets[i].so_sndbuf = SO_MAX_BUF;
358 (void) tcp_opt_set(sockets[i].pcb,
359 level, option, optval, optlen);
360 } else {
361 errno = EINVAL;
362 }
363 break;
364 case SO_LINGER:
365 if (optlen == sizeof (struct linger)) {
366 /* struct copy */
367 sockets[i].so_linger = *(struct linger *)optval;
368 (void) tcp_opt_set(sockets[i].pcb,
369 level, option, optval, optlen);
370 } else {
371 errno = EINVAL;
372 }
373 break;
374 default:
375 errno = ENOPROTOOPT;
376 break;
377 }
378 break;
379 } /* case SOL_SOCKET */
380 case IPPROTO_TCP:
381 case IPPROTO_IP: {
382 switch (option) {
383 default:
384 errno = ENOPROTOOPT;
385 break;
386 }
387 break;
388 } /* case IPPROTO_IP or IPPROTO_TCP */
389 default:
390 errno = ENOPROTOOPT;
391 break;
392 } /* switch (level) */
393
394 if (errno != 0)
395 return (-1);
396 else
397 return (0);
398 }
399
400 /*
401 * Shut down part of a full-duplex connection.
402 *
403 * Only supported for TCP sockets
404 */
405 int
shutdown(int s,int how)406 shutdown(int s, int how)
407 {
408 int sock_id;
409 int i;
410
411 errno = 0;
412 if ((sock_id = so_check_fd(s, &errno)) == -1)
413 return (-1);
414
415 /* shutdown only supported for TCP sockets */
416 if (sockets[sock_id].type != INETBOOT_STREAM) {
417 errno = EOPNOTSUPP;
418 return (-1);
419 }
420
421 if (!(sockets[sock_id].so_state & SS_ISCONNECTED)) {
422 errno = ENOTCONN;
423 return (-1);
424 }
425
426 switch (how) {
427 case 0:
428 sockets[sock_id].so_state |= SS_CANTRCVMORE;
429 break;
430 case 1:
431 sockets[sock_id].so_state |= SS_CANTSENDMORE;
432 break;
433 case 2:
434 sockets[sock_id].so_state |= (SS_CANTRCVMORE | SS_CANTSENDMORE);
435 break;
436 default:
437 errno = EINVAL;
438 return (-1);
439 }
440
441 switch (sockets[sock_id].so_state &
442 (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
443 case (SS_CANTRCVMORE | SS_CANTSENDMORE):
444 /* Call lower level protocol close routine. */
445 for (i = TRANSPORT_LVL; i >= MEDIA_LVL; i--) {
446 if (sockets[sock_id].close[i] != NULL) {
447 (void) sockets[sock_id].close[i](sock_id);
448 }
449 }
450 nuke_grams(&sockets[sock_id].inq);
451 break;
452 case SS_CANTRCVMORE:
453 nuke_grams(&sockets[sock_id].inq);
454 break;
455 case SS_CANTSENDMORE:
456 /* Call lower level protocol close routine. */
457 if (tcp_shutdown(sock_id) < 0)
458 return (-1);
459 break;
460 default:
461 errno = EINVAL;
462 return (-1);
463 }
464
465 return (0);
466 }
467
468 /*
469 * "close" a socket.
470 */
471 int
socket_close(int s)472 socket_close(int s)
473 {
474 int sock_id, i;
475
476 errno = 0;
477 if ((sock_id = so_check_fd(s, &errno)) == -1)
478 return (-1);
479
480 /* Call lower level protocol close routine. */
481 for (i = TRANSPORT_LVL; i >= MEDIA_LVL; i--) {
482 if (sockets[sock_id].close[i] != NULL) {
483 /*
484 * Note that the close() routine of other
485 * layers can return an error. But right
486 * now, the only mechanism to report that
487 * back is for the close() routine to set
488 * the errno and socket_close() will return
489 * an error. But the close operation will
490 * not be stopped.
491 */
492 (void) sockets[sock_id].close[i](sock_id);
493 }
494 }
495
496 /*
497 * Clear the input queue. This has to be done
498 * after the lower level protocol close routines have been
499 * called as they may want to do something about the queue.
500 */
501 nuke_grams(&sockets[sock_id].inq);
502
503 bzero((caddr_t)&sockets[sock_id], sizeof (struct inetboot_socket));
504 sockets[sock_id].type = INETBOOT_UNUSED;
505
506 return (0);
507 }
508
509 /*
510 * Read up to `nbyte' of data from socket `s' into `buf'; if non-zero,
511 * then give up after `read_timeout' seconds. Returns the number of
512 * bytes read, or -1 on failure.
513 */
514 int
socket_read(int s,void * buf,size_t nbyte,int read_timeout)515 socket_read(int s, void *buf, size_t nbyte, int read_timeout)
516 {
517 ssize_t n;
518 uint_t start, diff;
519
520 /*
521 * keep calling non-blocking recvfrom until something received
522 * or an error occurs
523 */
524 start = prom_gettime();
525 for (;;) {
526 n = recvfrom(s, buf, nbyte, MSG_DONTWAIT, NULL, NULL);
527 if (n == -1 && errno == EWOULDBLOCK) {
528 diff = (uint_t)((prom_gettime() - start) + 500) / 1000;
529 if (read_timeout != 0 && diff > read_timeout) {
530 errno = EINTR;
531 return (-1);
532 }
533 } else {
534 return (n);
535 }
536 }
537 }
538
539 /*
540 * Write up to `nbyte' bytes of data from `buf' to the address pointed to
541 * `addr' using socket `s'. Returns the number of bytes writte on success,
542 * or -1 on failure.
543 */
544 int
socket_write(int s,const void * buf,size_t nbyte,struct sockaddr_in * addr)545 socket_write(int s, const void *buf, size_t nbyte, struct sockaddr_in *addr)
546 {
547 return (sendto(s, buf, nbyte, 0, (struct sockaddr *)addr,
548 sizeof (*addr)));
549 }
550
551 static int
bind_check(int sock_id,const struct sockaddr * addr)552 bind_check(int sock_id, const struct sockaddr *addr)
553 {
554 int k;
555 struct sockaddr_in *in_addr = (struct sockaddr_in *)addr;
556
557 /* Do not check for duplicate bind() if SO_REUSEADDR option is set. */
558 if (! (sockets[sock_id].so_opt & SO_REUSEADDR)) {
559 for (k = 0; k < MAXSOCKET; k++) {
560 if (sockets[k].type != INETBOOT_UNUSED &&
561 sockets[k].proto == sockets[sock_id].proto &&
562 sockets[k].bound) {
563 if ((sockets[k].bind.sin_addr.s_addr ==
564 in_addr->sin_addr.s_addr) &&
565 (sockets[k].bind.sin_port ==
566 in_addr->sin_port)) {
567 errno = EADDRINUSE;
568 return (-1);
569 }
570 }
571 }
572 }
573 return (0);
574 }
575
576 /* Assign a name to an unnamed socket. */
577 int
bind(int s,const struct sockaddr * name,socklen_t namelen)578 bind(int s, const struct sockaddr *name, socklen_t namelen)
579 {
580 int i;
581
582 errno = 0;
583
584 if ((i = so_check_fd(s, &errno)) == -1)
585 return (-1);
586
587 if (name == NULL) {
588 /* unbind */
589 if (sockets[i].bound) {
590 bzero((caddr_t)&sockets[i].bind,
591 sizeof (struct sockaddr_in));
592 sockets[i].bound = B_FALSE;
593 }
594 return (0);
595 }
596 if (namelen != sizeof (struct sockaddr_in) || name == NULL) {
597 errno = EINVAL;
598 return (-1);
599 }
600 if (name->sa_family != AF_INET) {
601 errno = EAFNOSUPPORT;
602 return (-1);
603 }
604 if (sockets[i].bound) {
605 if (bcmp((caddr_t)&sockets[i].bind, (caddr_t)name,
606 namelen) == 0) {
607 /* attempt to bind to same address ok... */
608 return (0);
609 }
610 errno = EINVAL; /* already bound */
611 return (-1);
612 }
613
614 if (errno != 0) {
615 return (-1);
616 }
617
618 /* Check for duplicate bind(). */
619 if (bind_check(i, name) < 0)
620 return (-1);
621
622 bcopy((caddr_t)name, (caddr_t)&sockets[i].bind, namelen);
623 if (sockets[i].type == INETBOOT_STREAM) {
624 if (tcp_bind(i) < 0) {
625 return (-1);
626 }
627 }
628 sockets[i].bound = B_TRUE;
629
630 return (0);
631 }
632
633 static int
quickbind(int sock_id)634 quickbind(int sock_id)
635 {
636 int i;
637 struct sockaddr_in addr;
638
639 /*
640 * XXX This needs more work. Right now, if ipv4_setipaddr()
641 * have not been called, this will be wrong. But we need
642 * something better. Need to be revisited.
643 */
644 ipv4_getipaddr(&addr.sin_addr);
645 addr.sin_family = AF_INET;
646
647 for (i = SMALLEST_ANON_PORT; i <= LARGEST_ANON_PORT; i++) {
648 addr.sin_port = htons(i);
649 if (bind_check(sock_id, (struct sockaddr *)&addr) == 0)
650 break;
651 }
652 /* Need to clear errno as it is probably set by bind_check(). */
653 errno = 0;
654
655 if (i <= LARGEST_ANON_PORT) {
656 bcopy((caddr_t)&addr, (caddr_t)&sockets[sock_id].bind,
657 sizeof (struct sockaddr_in));
658 sockets[sock_id].bound = B_TRUE;
659 #ifdef DEBUG
660 printf("quick bind done addr %s port %d\n",
661 inet_ntoa(sockets[sock_id].bind.sin_addr),
662 ntohs(sockets[sock_id].bind.sin_port));
663 #endif
664 return (0);
665 } else {
666 return (-1);
667 }
668 }
669
670 int
listen(int fd,int backlog)671 listen(int fd, int backlog)
672 {
673 int sock_id;
674
675 errno = 0;
676 if ((sock_id = so_check_fd(fd, &errno)) == -1)
677 return (-1);
678
679 if (sockets[sock_id].type != INETBOOT_STREAM) {
680 errno = EOPNOTSUPP;
681 return (-1);
682 }
683 if (sockets[sock_id].so_error != 0) {
684 errno = sockets[sock_id].so_error;
685 return (-1);
686 }
687 return (tcp_listen(sock_id, backlog));
688 }
689
690 int
accept(int fd,struct sockaddr * addr,socklen_t * addr_len)691 accept(int fd, struct sockaddr *addr, socklen_t *addr_len)
692 {
693 int sock_id;
694 int new_sd;
695
696 errno = 0;
697 if ((sock_id = so_check_fd(fd, &errno)) == -1)
698 return (-1);
699
700 if (sockets[sock_id].type != INETBOOT_STREAM) {
701 errno = EOPNOTSUPP;
702 return (-1);
703 }
704 if (sockets[sock_id].so_error != 0) {
705 errno = sockets[sock_id].so_error;
706 return (-1);
707 }
708 if ((new_sd = tcp_accept(sock_id, addr, addr_len)) == -1)
709 return (-1);
710 sock_id = so_check_fd(new_sd, &errno);
711 sockets[sock_id].so_state |= SS_ISCONNECTED;
712 return (new_sd);
713 }
714
715 int
connect(int fd,const struct sockaddr * addr,socklen_t addr_len)716 connect(int fd, const struct sockaddr *addr, socklen_t addr_len)
717 {
718 int sock_id;
719 int so_type;
720
721 errno = 0;
722 if ((sock_id = so_check_fd(fd, &errno)) == -1)
723 return (-1);
724
725 so_type = sockets[sock_id].type;
726
727 if (addr == NULL || addr_len == 0) {
728 errno = EINVAL;
729 return (-1);
730 }
731 /* Don't allow connect for raw socket. */
732 if (so_type == INETBOOT_RAW) {
733 errno = EPROTONOSUPPORT;
734 return (-1);
735 }
736
737 if (sockets[sock_id].so_state & SS_ISCONNECTED) {
738 errno = EINVAL;
739 return (-1);
740 }
741
742 if (sockets[sock_id].so_error != 0) {
743 errno = sockets[sock_id].so_error;
744 return (-1);
745 }
746
747 /* If the socket is not bound, we need to do a quick bind. */
748 if (!sockets[sock_id].bound) {
749 /* For TCP socket, just call tcp_bind(). */
750 if (so_type == INETBOOT_STREAM) {
751 if (tcp_bind(sock_id) < 0)
752 return (-1);
753 } else {
754 if (quickbind(sock_id) < 0) {
755 errno = EADDRNOTAVAIL;
756 return (-1);
757 }
758 }
759 }
760 /* Should do some sanity check for addr .... */
761 bcopy((caddr_t)addr, &sockets[sock_id].remote,
762 sizeof (struct sockaddr_in));
763
764 if (sockets[sock_id].type == INETBOOT_STREAM) {
765 /* Call TCP connect routine. */
766 if (tcp_connect(sock_id) == 0)
767 sockets[sock_id].so_state |= SS_ISCONNECTED;
768 else {
769 if (sockets[sock_id].so_error != 0)
770 errno = sockets[sock_id].so_error;
771 return (-1);
772 }
773 } else {
774 sockets[sock_id].so_state |= SS_ISCONNECTED;
775 }
776 return (0);
777 }
778
779 /* Just a wrapper around recvfrom(). */
780 ssize_t
recv(int s,void * buf,size_t len,int flags)781 recv(int s, void *buf, size_t len, int flags)
782 {
783 return (recvfrom(s, buf, len, flags, NULL, NULL));
784 }
785
786 /*
787 * Receive messages from a connectionless socket. Legal flags are 0 and
788 * MSG_DONTWAIT. MSG_WAITALL is not currently supported.
789 *
790 * Returns length of message for success, -1 if error occurred.
791 */
792 ssize_t
recvfrom(int s,void * buf,size_t len,int flags,struct sockaddr * from,socklen_t * fromlen)793 recvfrom(int s, void *buf, size_t len, int flags, struct sockaddr *from,
794 socklen_t *fromlen)
795 {
796 int sock_id, i;
797 ssize_t datalen, bytes = 0;
798 struct inetgram *icp;
799 enum SockType so_type;
800 char *tmp_buf;
801 mblk_t *mp;
802
803 errno = 0;
804
805 if ((sock_id = so_check_fd(s, &errno)) == -1) {
806 errno = EINVAL;
807 return (-1);
808 }
809
810 if (sockets[sock_id].type == INETBOOT_STREAM &&
811 !(sockets[sock_id].so_state & SS_ISCONNECTED)) {
812 errno = ENOTCONN;
813 return (-1);
814 }
815
816 if (buf == NULL || len == 0) {
817 errno = EINVAL;
818 return (-1);
819 }
820 /* Yup - MSG_WAITALL not implemented */
821 if ((flags & ~MSG_DONTWAIT) != 0) {
822 errno = EINVAL;
823 return (-1);
824 }
825
826 retry:
827 if (sockets[sock_id].inq == NULL) {
828 /* Go out and check the wire */
829 for (i = MEDIA_LVL; i < APP_LVL; i++) {
830 if (sockets[sock_id].input[i] != NULL) {
831 if (sockets[sock_id].input[i](sock_id) < 0) {
832 if (sockets[sock_id].so_error != 0) {
833 errno =
834 sockets[sock_id].so_error;
835 }
836 return (-1);
837 }
838 }
839 }
840 }
841
842 so_type = sockets[sock_id].type;
843
844 /* Remove unknown inetgrams from the head of inq. Can this happen? */
845 while ((icp = sockets[sock_id].inq) != NULL) {
846 if ((so_type == INETBOOT_DGRAM ||
847 so_type == INETBOOT_STREAM) &&
848 icp->igm_level != APP_LVL) {
849 #ifdef DEBUG
850 printf("recvfrom: unexpected level %d frame found\n",
851 icp->igm_level);
852 #endif /* DEBUG */
853 del_gram(&sockets[sock_id].inq, icp, B_TRUE);
854 continue;
855 } else {
856 break;
857 }
858 }
859
860
861 if (icp == NULL) {
862 /*
863 * Checking for error should be done everytime a lower layer
864 * input routing is called. For example, if TCP gets a RST,
865 * this should be reported asap.
866 */
867 if (sockets[sock_id].so_state & SS_CANTRCVMORE) {
868 if (sockets[sock_id].so_error != 0) {
869 errno = sockets[sock_id].so_error;
870 return (-1);
871 } else {
872 return (0);
873 }
874 }
875
876 if ((flags & MSG_DONTWAIT) == 0)
877 goto retry; /* wait forever */
878
879 /* no data */
880 errno = EWOULDBLOCK;
881 return (-1);
882 }
883
884 if (from != NULL && fromlen != NULL) {
885 switch (so_type) {
886 case INETBOOT_STREAM:
887 /* Need to copy from the socket's remote address. */
888 bcopy(&(sockets[sock_id].remote), from, MIN(*fromlen,
889 sizeof (struct sockaddr_in)));
890 break;
891 case INETBOOT_RAW:
892 case INETBOOT_DGRAM:
893 default:
894 if (*fromlen > sizeof (icp->igm_saddr))
895 *fromlen = sizeof (icp->igm_saddr);
896 bcopy((caddr_t)&(icp->igm_saddr), (caddr_t)from,
897 MIN(*fromlen, sizeof (struct sockaddr_in)));
898 break;
899 }
900 }
901
902 mp = icp->igm_mp;
903 switch (so_type) {
904 case INETBOOT_STREAM:
905 /*
906 * If the message has igm_id == TCP_CALLB_MAGIC_ID, we need
907 * to drain the data held by tcp and try again.
908 */
909 if (icp->igm_id == TCP_CALLB_MAGIC_ID) {
910 del_gram(&sockets[sock_id].inq, icp, B_TRUE);
911 tcp_rcv_drain_sock(sock_id);
912 goto retry;
913 }
914
915 /* TCP should put only user data in the inetgram. */
916 tmp_buf = (char *)buf;
917 while (len > 0 && icp != NULL) {
918 datalen = mp->b_wptr - mp->b_rptr;
919 if (len < datalen) {
920 bcopy(mp->b_rptr, tmp_buf, len);
921 bytes += len;
922 mp->b_rptr += len;
923 break;
924 } else {
925 bcopy(mp->b_rptr, tmp_buf, datalen);
926 len -= datalen;
927 bytes += datalen;
928 tmp_buf += datalen;
929 del_gram(&sockets[sock_id].inq, icp, B_TRUE);
930
931 /*
932 * If we have any embedded magic messages just
933 * drop them.
934 */
935 while ((icp = sockets[sock_id].inq) != NULL) {
936 if (icp->igm_id != TCP_CALLB_MAGIC_ID)
937 break;
938 del_gram(&sockets[sock_id].inq, icp,
939 B_TRUE);
940 }
941
942 if (icp == NULL)
943 break;
944 mp = icp->igm_mp;
945 }
946 }
947 sockets[sock_id].so_rcvbuf += (int32_t)bytes;
948 break;
949 case INETBOOT_DGRAM:
950 datalen = mp->b_wptr - mp->b_rptr;
951 if (len < datalen)
952 bytes = len;
953 else
954 bytes = datalen;
955 bcopy(mp->b_rptr, buf, bytes);
956 del_gram(&sockets[sock_id].inq, icp, B_TRUE);
957 break;
958 case INETBOOT_RAW:
959 default:
960 datalen = mp->b_wptr - mp->b_rptr;
961 if (len < datalen)
962 bytes = len;
963 else
964 bytes = datalen;
965 bcopy(mp->b_rptr, buf, bytes);
966 del_gram(&sockets[sock_id].inq, icp, B_TRUE);
967 break;
968 }
969
970 #ifdef DEBUG
971 printf("recvfrom(%d): data: (0x%x,%d)\n", sock_id,
972 (icp != NULL) ? icp->igm_mp : 0, bytes);
973 #endif /* DEBUG */
974 return (bytes);
975 }
976
977
978 /* Just a wrapper around sendto(). */
979 ssize_t
send(int s,const void * msg,size_t len,int flags)980 send(int s, const void *msg, size_t len, int flags)
981 {
982 return (sendto(s, msg, len, flags, NULL, 0));
983 }
984
985 /*
986 * Transmit a message through a socket.
987 *
988 * Supported flags: MSG_DONTROUTE or 0.
989 */
990 ssize_t
sendto(int s,const void * msg,size_t len,int flags,const struct sockaddr * to,socklen_t tolen)991 sendto(int s, const void *msg, size_t len, int flags, const struct sockaddr *to,
992 socklen_t tolen)
993 {
994 enum SockType so_type;
995 int sock_id;
996 ssize_t bytes;
997
998 errno = 0;
999
1000 if ((sock_id = so_check_fd(s, &errno)) == -1) {
1001 return (-1);
1002 }
1003 if (msg == NULL) {
1004 errno = EINVAL;
1005 return (-1);
1006 }
1007 so_type = sockets[sock_id].type;
1008 if ((flags & ~MSG_DONTROUTE) != 0) {
1009 errno = EINVAL;
1010 return (-1);
1011 }
1012 if (sockets[sock_id].so_error != 0) {
1013 errno = sockets[sock_id].so_error;
1014 return (-1);
1015 }
1016 if (to != NULL && to->sa_family != AF_INET) {
1017 errno = EAFNOSUPPORT;
1018 return (-1);
1019 }
1020
1021 switch (so_type) {
1022 case INETBOOT_RAW:
1023 case INETBOOT_DGRAM:
1024 if (!(sockets[sock_id].so_state & SS_ISCONNECTED) &&
1025 (to == NULL || tolen != sizeof (struct sockaddr_in))) {
1026 errno = EINVAL;
1027 return (-1);
1028 }
1029 bytes = dgram_sendto(sock_id, msg, len, flags, to, tolen);
1030 break;
1031 case INETBOOT_STREAM:
1032 if (!((sockets[sock_id].so_state & SS_ISCONNECTED) ||
1033 (sockets[sock_id].so_state & SS_ISCONNECTING))) {
1034 errno = EINVAL;
1035 return (-1);
1036 }
1037 if (sockets[sock_id].so_state & SS_CANTSENDMORE) {
1038 errno = EPIPE;
1039 return (-1);
1040 }
1041 bytes = stream_sendto(sock_id, msg, len, flags);
1042 break;
1043 default:
1044 /* Should not happen... */
1045 errno = EPROTOTYPE;
1046 return (-1);
1047 }
1048 return (bytes);
1049 }
1050
1051 static ssize_t
dgram_sendto(int i,const void * msg,size_t len,int flags,const struct sockaddr * to,int tolen)1052 dgram_sendto(int i, const void *msg, size_t len, int flags,
1053 const struct sockaddr *to, int tolen)
1054 {
1055 struct inetgram oc;
1056 int l, offset;
1057 size_t tlen;
1058 mblk_t *mp;
1059
1060 #ifdef DEBUG
1061 {
1062 struct sockaddr_in *sin = (struct sockaddr_in *)to;
1063 printf("sendto(%d): msg of length: %d sent to port %d and host: %s\n",
1064 i, len, ntohs(sin->sin_port), inet_ntoa(sin->sin_addr));
1065 }
1066 #endif /* DEBUG */
1067
1068 nuke_grams(&sockets[i].inq); /* flush the input queue */
1069
1070 /* calculate offset for data */
1071 offset = sockets[i].headerlen[MEDIA_LVL](NULL) +
1072 (sockets[i].headerlen[NETWORK_LVL])(NULL);
1073
1074 bzero((caddr_t)&oc, sizeof (oc));
1075 if (sockets[i].type != INETBOOT_RAW) {
1076 offset += (sockets[i].headerlen[TRANSPORT_LVL])(NULL);
1077 oc.igm_level = TRANSPORT_LVL;
1078 } else
1079 oc.igm_level = NETWORK_LVL;
1080 oc.igm_oflags = flags;
1081
1082 if (to != NULL) {
1083 bcopy((caddr_t)to, (caddr_t)&oc.igm_saddr, tolen);
1084 } else {
1085 bcopy((caddr_t)&sockets[i].remote, (caddr_t)&oc.igm_saddr,
1086 sizeof (struct sockaddr_in));
1087 }
1088
1089 /* Get a legal source port if the socket isn't bound. */
1090 if (sockets[i].bound == B_FALSE &&
1091 ntohs(oc.igm_saddr.sin_port == 0)) {
1092 ((struct sockaddr_in *)&oc.igm_saddr)->sin_port =
1093 get_source_port(B_FALSE);
1094 }
1095
1096 /* Round up to 16bit value for checksum purposes */
1097 if (sockets[i].type == INETBOOT_DGRAM) {
1098 tlen = ((len + sizeof (uint16_t) - 1) &
1099 ~(sizeof (uint16_t) - 1));
1100 } else
1101 tlen = len;
1102
1103 if ((oc.igm_mp = allocb(tlen + offset, 0)) == NULL) {
1104 errno = ENOMEM;
1105 return (-1);
1106 }
1107 mp = oc.igm_mp;
1108 mp->b_rptr = mp->b_wptr += offset;
1109 bcopy((caddr_t)msg, mp->b_wptr, len);
1110 mp->b_wptr += len;
1111 for (l = TRANSPORT_LVL; l >= MEDIA_LVL; l--) {
1112 if (sockets[i].output[l] != NULL) {
1113 if (sockets[i].output[l](i, &oc) < 0) {
1114 freeb(mp);
1115 if (errno == 0)
1116 errno = EIO;
1117 return (-1);
1118 }
1119 }
1120 }
1121 freeb(mp);
1122 return (len);
1123 }
1124
1125 /* ARGSUSED */
1126 static ssize_t
stream_sendto(int i,const void * msg,size_t len,int flags)1127 stream_sendto(int i, const void *msg, size_t len, int flags)
1128 {
1129 int cnt;
1130
1131 assert(sockets[i].pcb != NULL);
1132
1133 /*
1134 * Call directly TCP's send routine. We do this because TCP
1135 * needs to decide whether to send out the data.
1136 *
1137 * Note also that currently, TCP ignores all flags passed in for
1138 * TCP socket.
1139 */
1140 if ((cnt = tcp_send(i, sockets[i].pcb, msg, len)) < 0) {
1141 if (sockets[i].so_error != 0)
1142 errno = sockets[i].so_error;
1143 return (-1);
1144 } else {
1145 return (cnt);
1146 }
1147 }
1148
1149 /*
1150 * Returns ptr to the last inetgram in the list, or null if list is null
1151 */
1152 struct inetgram *
last_gram(struct inetgram * igp)1153 last_gram(struct inetgram *igp)
1154 {
1155 struct inetgram *wp;
1156 for (wp = igp; wp != NULL; wp = wp->igm_next) {
1157 if (wp->igm_next == NULL)
1158 return (wp);
1159 }
1160 return (NULL);
1161 }
1162
1163 /*
1164 * Adds an inetgram or list of inetgrams to the end of the list.
1165 */
1166 void
add_grams(struct inetgram ** igpp,struct inetgram * newgp)1167 add_grams(struct inetgram **igpp, struct inetgram *newgp)
1168 {
1169 struct inetgram *wp;
1170
1171 if (newgp == NULL)
1172 return;
1173
1174 if (*igpp == NULL)
1175 *igpp = newgp;
1176 else {
1177 wp = last_gram(*igpp);
1178 wp->igm_next = newgp;
1179 }
1180 }
1181
1182 /*
1183 * Nuke a whole list of grams.
1184 */
1185 void
nuke_grams(struct inetgram ** lgpp)1186 nuke_grams(struct inetgram **lgpp)
1187 {
1188 while (*lgpp != NULL)
1189 del_gram(lgpp, *lgpp, B_TRUE);
1190 }
1191
1192 /*
1193 * Remove the referenced inetgram. List is altered accordingly. Destroy the
1194 * referenced inetgram if freeit is B_TRUE.
1195 */
1196 void
del_gram(struct inetgram ** lgpp,struct inetgram * igp,int freeit)1197 del_gram(struct inetgram **lgpp, struct inetgram *igp, int freeit)
1198 {
1199 struct inetgram *wp, *pp = NULL;
1200
1201 if (lgpp == NULL || igp == NULL)
1202 return;
1203
1204 wp = *lgpp;
1205 while (wp != NULL) {
1206 if (wp == igp) {
1207 /* detach wp from the list */
1208 if (*lgpp == wp)
1209 *lgpp = (*lgpp)->igm_next;
1210 else
1211 pp->igm_next = wp->igm_next;
1212 igp->igm_next = NULL;
1213
1214 if (freeit) {
1215 if (igp->igm_mp != NULL)
1216 freeb(igp->igm_mp);
1217 bkmem_free((caddr_t)igp,
1218 sizeof (struct inetgram));
1219 }
1220 break;
1221 }
1222 pp = wp;
1223 wp = wp->igm_next;
1224 }
1225 }
1226
1227 struct nct_t nct[] = {
1228 "bootp", NCT_BOOTP_DHCP,
1229 "dhcp", NCT_BOOTP_DHCP,
1230 "rarp", NCT_RARP_BOOTPARAMS,
1231 "manual", NCT_MANUAL
1232 };
1233 int nct_entries = sizeof (nct) / sizeof (nct[0]);
1234
1235 /*
1236 * Figure out from the bootpath what kind of network configuration strategy
1237 * we should use. Returns the network config strategy.
1238 */
1239 int
get_netconfig_strategy(void)1240 get_netconfig_strategy(void)
1241 {
1242 int i;
1243 #define ISSPACE(c) (c == ' ' || c == '\t' || c == '\n' || c == '\0')
1244 char lbootpath[OBP_MAXPATHLEN];
1245 char net_options[NCT_BUFSIZE];
1246 char *op, *nop, *sp;
1247 pnode_t cn;
1248 int proplen;
1249
1250 /* If the PROM DHCP cache exists, we're done */
1251 if (prom_cached_reply(B_TRUE))
1252 return (NCT_BOOTP_DHCP);
1253
1254 /*
1255 * Newer (version 4) PROMs will put the name in the
1256 * "net-config-strategy" property.
1257 */
1258 cn = prom_finddevice("/chosen");
1259 if ((proplen = prom_getproplen(cn, "net-config-strategy")) <
1260 sizeof (net_options)) {
1261 (void) prom_getprop(cn, "net-config-strategy", net_options);
1262 net_options[proplen] = '\0';
1263 } else {
1264
1265 /*
1266 * We're reduced to sacanning bootpath for the prototol to use.
1267 * Since there was no "net-config-strategy" property, this is
1268 * an old PROM, so we need to excise any extraneous key/value
1269 * initializations from bootpath[].
1270 */
1271 for (op = prom_bootpath(), sp = lbootpath; op != NULL &&
1272 !ISSPACE(*op); sp++, op++)
1273 *sp = *op;
1274 *sp = '\0';
1275 /* find the last '/' (in the device path) */
1276 if ((op = strrchr(lbootpath, '/')) == NULL) /* last '/' */
1277 op = lbootpath;
1278 else
1279 op++;
1280 /* then look for the ':' separating it from the protocol */
1281 while (*op != ':' && *op != '\0')
1282 op++;
1283
1284 if (*op == ':') {
1285 for (nop = net_options, op++;
1286 *op != '\0' && *op != '/' && !ISSPACE(*op) &&
1287 nop < &net_options[NCT_BUFSIZE]; nop++, op++)
1288 *nop = *op;
1289 *nop = '\0';
1290 } else
1291 net_options[0] = '\0';
1292 }
1293
1294 #undef ISSPACE
1295
1296 for (i = 0; i < nct_entries; i++)
1297 if (strcmp(net_options, nct[i].p_name) == 0)
1298 return (nct[i].p_id);
1299
1300 return (NCT_DEFAULT);
1301 }
1302
1303 /* Modified STREAM routines for ease of porting core TCP code. */
1304
1305 /*ARGSUSED*/
1306 mblk_t *
allocb(size_t size,uint_t pri)1307 allocb(size_t size, uint_t pri)
1308 {
1309 unsigned char *base;
1310 mblk_t *mp;
1311
1312 if ((mp = (mblk_t *)bkmem_zalloc(sizeof (mblk_t))) == NULL)
1313 return (NULL);
1314 if ((base = (unsigned char *)bkmem_zalloc(size)) == NULL)
1315 return (NULL);
1316
1317 mp->b_next = mp->b_prev = mp->b_cont = NULL;
1318 mp->b_rptr = mp->b_wptr = mp->b_datap = (unsigned char *)base;
1319 mp->b_size = size;
1320
1321 return (mp);
1322 }
1323
1324 void
freeb(mblk_t * mp)1325 freeb(mblk_t *mp)
1326 {
1327 #ifdef DEBUG
1328 printf("freeb datap %x\n", mp->b_datap);
1329 #endif
1330 bkmem_free((caddr_t)(mp->b_datap), mp->b_size);
1331 #ifdef DEBUG
1332 printf("freeb mp %x\n", mp);
1333 #endif
1334 bkmem_free((caddr_t)mp, sizeof (mblk_t));
1335 }
1336
1337 void
freemsg(mblk_t * mp)1338 freemsg(mblk_t *mp)
1339 {
1340 while (mp) {
1341 mblk_t *mp_cont = mp->b_cont;
1342
1343 freeb(mp);
1344 mp = mp_cont;
1345 }
1346 }
1347
1348 mblk_t *
copyb(mblk_t * bp)1349 copyb(mblk_t *bp)
1350 {
1351 mblk_t *nbp;
1352 unsigned char *ndp;
1353
1354 assert((uintptr_t)(bp->b_wptr - bp->b_rptr) >= 0);
1355
1356 if (!(nbp = allocb(bp->b_size, 0)))
1357 return (NULL);
1358 nbp->b_cont = NULL;
1359 ndp = nbp->b_datap;
1360
1361 nbp->b_rptr = ndp + (bp->b_rptr - bp->b_datap);
1362 nbp->b_wptr = nbp->b_rptr + (bp->b_wptr - bp->b_rptr);
1363 bcopy(bp->b_datap, nbp->b_datap, bp->b_size);
1364 return (nbp);
1365 }
1366
1367 /* To simplify things, dupb() is implemented as copyb(). */
1368 mblk_t *
dupb(mblk_t * mp)1369 dupb(mblk_t *mp)
1370 {
1371 return (copyb(mp));
1372 }
1373
1374 /*
1375 * get number of data bytes in message
1376 */
1377 size_t
msgdsize(mblk_t * bp)1378 msgdsize(mblk_t *bp)
1379 {
1380 size_t count = 0;
1381
1382 for (; bp != NULL; bp = bp->b_cont) {
1383 assert(bp->b_wptr >= bp->b_rptr);
1384 count += bp->b_wptr - bp->b_rptr;
1385 }
1386 return (count);
1387 }
1388