1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 *
25 * socket.c, Code implementing a simple socket interface.
26 */
27
28 #pragma ident "%Z%%M% %I% %E% SMI"
29
30 #include <sys/types.h>
31 #include "socket_impl.h"
32 #include <sys/isa_defs.h>
33 #include <sys/sysmacros.h>
34 #include <sys/bootconf.h>
35 #include <sys/socket.h>
36 #include <netinet/in.h>
37 #include <netinet/ip.h>
38 #include <netinet/tcp.h>
39 #include <sys/uio.h>
40 #include <sys/salib.h>
41 #include "socket_inet.h"
42 #include "ipv4.h"
43 #include "ipv4_impl.h"
44 #include "udp_inet.h"
45 #include "tcp_inet.h"
46 #include "mac.h"
47 #include "mac_impl.h"
48 #include <sys/promif.h>
49
50 struct inetboot_socket sockets[MAXSOCKET] = { 0 };
51
52 /* Default send and receive socket buffer size */
53 #define SO_DEF_SNDBUF 48*1024
54 #define SO_DEF_RCVBUF 48*1024
55
56 /* Default max socket buffer size */
57 #define SO_MAX_BUF 4*1024*1024
58
59 static ssize_t dgram_sendto(int, const void *, size_t, int,
60 const struct sockaddr *, int);
61 static ssize_t stream_sendto(int, const void *, size_t, int);
62 static int bind_check(int, const struct sockaddr *);
63 static int quickbind(int);
64
65 /* Check the validity of a fd and return the socket index of that fd. */
66 int
so_check_fd(int fd,int * errno)67 so_check_fd(int fd, int *errno)
68 {
69 int i;
70
71 i = FD_TO_SOCKET(fd);
72 if (i < 0 || i >= MAXSOCKET) {
73 *errno = ENOTSOCK;
74 return (-1);
75 }
76 if (sockets[i].type == INETBOOT_UNUSED) {
77 *errno = ENOTSOCK;
78 return (-1);
79 }
80 return (i);
81 }
82
83 /*
84 * Create an endpoint for network communication. Returns a descriptor.
85 *
86 * Notes:
87 * Only PF_INET communication domains are supported. Within
88 * this domain, only SOCK_RAW, SOCK_DGRAM and SOCK_STREAM types are
89 * supported.
90 */
91 int
socket(int domain,int type,int protocol)92 socket(int domain, int type, int protocol)
93 {
94 static int sock_initialized;
95 int i;
96
97 errno = 0;
98
99 if (!sock_initialized) {
100 for (i = 0; i < MAXSOCKET; i++)
101 sockets[i].type = INETBOOT_UNUSED;
102 sock_initialized = B_TRUE;
103 }
104 if (domain != AF_INET) {
105 errno = EPROTONOSUPPORT;
106 return (-1);
107 }
108
109 /* Find available socket */
110 for (i = 0; i < MAXSOCKET; i++) {
111 if (sockets[i].type == INETBOOT_UNUSED)
112 break;
113 }
114 if (i >= MAXSOCKET) {
115 errno = EMFILE; /* No slots left. */
116 return (-1);
117 }
118
119 /* Some socket initialization... */
120 sockets[i].so_rcvbuf = SO_DEF_RCVBUF;
121 sockets[i].so_sndbuf = SO_DEF_SNDBUF;
122
123 /*
124 * Note that we ignore the protocol field for SOCK_DGRAM and
125 * SOCK_STREAM. When we support different protocols in future,
126 * this needs to be changed.
127 */
128 switch (type) {
129 case SOCK_RAW:
130 ipv4_raw_socket(&sockets[i], (uint8_t)protocol);
131 break;
132 case SOCK_DGRAM:
133 udp_socket_init(&sockets[i]);
134 break;
135 case SOCK_STREAM:
136 tcp_socket_init(&sockets[i]);
137 break;
138 default:
139 errno = EPROTOTYPE;
140 break;
141 }
142
143 if (errno != 0)
144 return (-1);
145
146 /* IPv4 generic initialization. */
147 ipv4_socket_init(&sockets[i]);
148
149 /* MAC generic initialization. */
150 mac_socket_init(&sockets[i]);
151
152 return (i + SOCKETTYPE);
153 }
154
155 int
getsockname(int s,struct sockaddr * name,socklen_t * namelen)156 getsockname(int s, struct sockaddr *name, socklen_t *namelen)
157 {
158 int i;
159
160 errno = 0;
161 if ((i = so_check_fd(s, &errno)) == -1)
162 return (-1);
163
164 if (*namelen < sizeof (struct sockaddr_in)) {
165 errno = ENOMEM;
166 return (-1);
167 }
168
169 /* Structure assignment... */
170 *((struct sockaddr_in *)name) = sockets[i].bind;
171 *namelen = sizeof (struct sockaddr_in);
172 return (0);
173 }
174
175 /*
176 * The socket options we support are:
177 * SO_RCVTIMEO - Value is in msecs, and is of uint32_t.
178 * SO_DONTROUTE - Value is an int, and is a boolean (nonzero if set).
179 * SO_REUSEADDR - Value is an int boolean.
180 * SO_RCVBUF - Value is an int.
181 * SO_SNDBUF - Value is an int.
182 */
183 int
getsockopt(int s,int level,int option,void * optval,socklen_t * optlen)184 getsockopt(int s, int level, int option, void *optval, socklen_t *optlen)
185 {
186 int i;
187
188 errno = 0;
189 if ((i = so_check_fd(s, &errno)) == -1)
190 return (-1);
191
192 switch (level) {
193 case SOL_SOCKET: {
194 switch (option) {
195 case SO_RCVTIMEO:
196 if (*optlen == sizeof (uint32_t)) {
197 *(uint32_t *)optval = sockets[i].in_timeout;
198 } else {
199 *optlen = 0;
200 errno = EINVAL;
201 }
202 break;
203 case SO_DONTROUTE:
204 if (*optlen == sizeof (int)) {
205 *(int *)optval =
206 (sockets[i].out_flags & SO_DONTROUTE);
207 } else {
208 *optlen = 0;
209 errno = EINVAL;
210 }
211 break;
212 case SO_REUSEADDR:
213 if (*optlen == sizeof (int)) {
214 *(int *)optval =
215 (sockets[i].so_opt & SO_REUSEADDR);
216 } else {
217 *optlen = 0;
218 errno = EINVAL;
219 }
220 break;
221 case SO_RCVBUF:
222 if (*optlen == sizeof (int)) {
223 *(int *)optval = sockets[i].so_rcvbuf;
224 } else {
225 *optlen = 0;
226 errno = EINVAL;
227 }
228 break;
229 case SO_SNDBUF:
230 if (*optlen == sizeof (int)) {
231 *(int *)optval = sockets[i].so_sndbuf;
232 } else {
233 *optlen = 0;
234 errno = EINVAL;
235 }
236 break;
237 case SO_LINGER:
238 if (*optlen == sizeof (struct linger)) {
239 /* struct copy */
240 *(struct linger *)optval = sockets[i].so_linger;
241 } else {
242 *optlen = 0;
243 errno = EINVAL;
244 }
245 default:
246 errno = ENOPROTOOPT;
247 break;
248 }
249 break;
250 } /* case SOL_SOCKET */
251 case IPPROTO_TCP:
252 case IPPROTO_IP: {
253 switch (option) {
254 default:
255 *optlen = 0;
256 errno = ENOPROTOOPT;
257 break;
258 }
259 break;
260 } /* case IPPROTO_IP or IPPROTO_TCP */
261 default:
262 errno = ENOPROTOOPT;
263 break;
264 } /* switch (level) */
265
266 if (errno != 0)
267 return (-1);
268 else
269 return (0);
270 }
271
272 /*
273 * Generate a network-order source port from the privileged range if
274 * "reserved" is true, dynamic/private range otherwise. We consider the
275 * range of 512-1023 privileged ports as ports we can use. This mirrors
276 * historical rpc client practice for privileged port selection.
277 */
278 in_port_t
get_source_port(boolean_t reserved)279 get_source_port(boolean_t reserved)
280 {
281 static in_port_t dynamic = IPPORT_DYNAMIC_START - 1,
282 rsvdport = (IPPORT_RESERVED / 2) - 1;
283 in_port_t p;
284
285 if (reserved) {
286 if (++rsvdport >= IPPORT_RESERVED)
287 p = rsvdport = IPPORT_RESERVED / 2;
288 else
289 p = rsvdport;
290 } else
291 p = ++dynamic;
292
293 return (htons(p));
294 }
295
296 /*
297 * The socket options we support are:
298 * SO_RECVTIMEO - Value is uint32_t msecs.
299 * SO_DONTROUTE - Value is int boolean (nonzero == TRUE, zero == FALSE).
300 * SO_REUSEADDR - value is int boolean.
301 * SO_RCVBUF - Value is int.
302 * SO_SNDBUF - Value is int.
303 */
304 int
setsockopt(int s,int level,int option,const void * optval,socklen_t optlen)305 setsockopt(int s, int level, int option, const void *optval, socklen_t optlen)
306 {
307 int i;
308
309 errno = 0;
310 if ((i = so_check_fd(s, &errno)) == -1)
311 return (-1);
312
313 switch (level) {
314 case SOL_SOCKET: {
315 switch (option) {
316 case SO_RCVTIMEO:
317 if (optlen == sizeof (uint32_t))
318 sockets[i].in_timeout = *(uint32_t *)optval;
319 else {
320 errno = EINVAL;
321 }
322 break;
323 case SO_DONTROUTE:
324 if (optlen == sizeof (int)) {
325 if (*(int *)optval)
326 sockets[i].out_flags |= SO_DONTROUTE;
327 else
328 sockets[i].out_flags &= ~SO_DONTROUTE;
329 } else {
330 errno = EINVAL;
331 }
332 break;
333 case SO_REUSEADDR:
334 if (optlen == sizeof (int)) {
335 if (*(int *)optval)
336 sockets[i].so_opt |= SO_REUSEADDR;
337 else
338 sockets[i].so_opt &= ~SO_REUSEADDR;
339 } else {
340 errno = EINVAL;
341 }
342 break;
343 case SO_RCVBUF:
344 if (optlen == sizeof (int)) {
345 sockets[i].so_rcvbuf = *(int *)optval;
346 if (sockets[i].so_rcvbuf > SO_MAX_BUF)
347 sockets[i].so_rcvbuf = SO_MAX_BUF;
348 (void) tcp_opt_set(sockets[i].pcb,
349 level, option, optval, optlen);
350 } else {
351 errno = EINVAL;
352 }
353 break;
354 case SO_SNDBUF:
355 if (optlen == sizeof (int)) {
356 sockets[i].so_sndbuf = *(int *)optval;
357 if (sockets[i].so_sndbuf > SO_MAX_BUF)
358 sockets[i].so_sndbuf = SO_MAX_BUF;
359 (void) tcp_opt_set(sockets[i].pcb,
360 level, option, optval, optlen);
361 } else {
362 errno = EINVAL;
363 }
364 break;
365 case SO_LINGER:
366 if (optlen == sizeof (struct linger)) {
367 /* struct copy */
368 sockets[i].so_linger = *(struct linger *)optval;
369 (void) tcp_opt_set(sockets[i].pcb,
370 level, option, optval, optlen);
371 } else {
372 errno = EINVAL;
373 }
374 break;
375 default:
376 errno = ENOPROTOOPT;
377 break;
378 }
379 break;
380 } /* case SOL_SOCKET */
381 case IPPROTO_TCP:
382 case IPPROTO_IP: {
383 switch (option) {
384 default:
385 errno = ENOPROTOOPT;
386 break;
387 }
388 break;
389 } /* case IPPROTO_IP or IPPROTO_TCP */
390 default:
391 errno = ENOPROTOOPT;
392 break;
393 } /* switch (level) */
394
395 if (errno != 0)
396 return (-1);
397 else
398 return (0);
399 }
400
401 /*
402 * Shut down part of a full-duplex connection.
403 *
404 * Only supported for TCP sockets
405 */
406 int
shutdown(int s,int how)407 shutdown(int s, int how)
408 {
409 int sock_id;
410 int i;
411
412 errno = 0;
413 if ((sock_id = so_check_fd(s, &errno)) == -1)
414 return (-1);
415
416 /* shutdown only supported for TCP sockets */
417 if (sockets[sock_id].type != INETBOOT_STREAM) {
418 errno = EOPNOTSUPP;
419 return (-1);
420 }
421
422 if (!(sockets[sock_id].so_state & SS_ISCONNECTED)) {
423 errno = ENOTCONN;
424 return (-1);
425 }
426
427 switch (how) {
428 case 0:
429 sockets[sock_id].so_state |= SS_CANTRCVMORE;
430 break;
431 case 1:
432 sockets[sock_id].so_state |= SS_CANTSENDMORE;
433 break;
434 case 2:
435 sockets[sock_id].so_state |= (SS_CANTRCVMORE | SS_CANTSENDMORE);
436 break;
437 default:
438 errno = EINVAL;
439 return (-1);
440 }
441
442 switch (sockets[sock_id].so_state &
443 (SS_CANTRCVMORE | SS_CANTSENDMORE)) {
444 case (SS_CANTRCVMORE | SS_CANTSENDMORE):
445 /* Call lower level protocol close routine. */
446 for (i = TRANSPORT_LVL; i >= MEDIA_LVL; i--) {
447 if (sockets[sock_id].close[i] != NULL) {
448 (void) sockets[sock_id].close[i](sock_id);
449 }
450 }
451 nuke_grams(&sockets[sock_id].inq);
452 break;
453 case SS_CANTRCVMORE:
454 nuke_grams(&sockets[sock_id].inq);
455 break;
456 case SS_CANTSENDMORE:
457 /* Call lower level protocol close routine. */
458 if (tcp_shutdown(sock_id) < 0)
459 return (-1);
460 break;
461 default:
462 errno = EINVAL;
463 return (-1);
464 }
465
466 return (0);
467 }
468
469 /*
470 * "close" a socket.
471 */
472 int
socket_close(int s)473 socket_close(int s)
474 {
475 int sock_id, i;
476
477 errno = 0;
478 if ((sock_id = so_check_fd(s, &errno)) == -1)
479 return (-1);
480
481 /* Call lower level protocol close routine. */
482 for (i = TRANSPORT_LVL; i >= MEDIA_LVL; i--) {
483 if (sockets[sock_id].close[i] != NULL) {
484 /*
485 * Note that the close() routine of other
486 * layers can return an error. But right
487 * now, the only mechanism to report that
488 * back is for the close() routine to set
489 * the errno and socket_close() will return
490 * an error. But the close operation will
491 * not be stopped.
492 */
493 (void) sockets[sock_id].close[i](sock_id);
494 }
495 }
496
497 /*
498 * Clear the input queue. This has to be done
499 * after the lower level protocol close routines have been
500 * called as they may want to do something about the queue.
501 */
502 nuke_grams(&sockets[sock_id].inq);
503
504 bzero((caddr_t)&sockets[sock_id], sizeof (struct inetboot_socket));
505 sockets[sock_id].type = INETBOOT_UNUSED;
506
507 return (0);
508 }
509
510 /*
511 * Read up to `nbyte' of data from socket `s' into `buf'; if non-zero,
512 * then give up after `read_timeout' seconds. Returns the number of
513 * bytes read, or -1 on failure.
514 */
515 int
socket_read(int s,void * buf,size_t nbyte,int read_timeout)516 socket_read(int s, void *buf, size_t nbyte, int read_timeout)
517 {
518 ssize_t n;
519 uint_t start, diff;
520
521 /*
522 * keep calling non-blocking recvfrom until something received
523 * or an error occurs
524 */
525 start = prom_gettime();
526 for (;;) {
527 n = recvfrom(s, buf, nbyte, MSG_DONTWAIT, NULL, NULL);
528 if (n == -1 && errno == EWOULDBLOCK) {
529 diff = (uint_t)((prom_gettime() - start) + 500) / 1000;
530 if (read_timeout != 0 && diff > read_timeout) {
531 errno = EINTR;
532 return (-1);
533 }
534 } else {
535 return (n);
536 }
537 }
538 }
539
540 /*
541 * Write up to `nbyte' bytes of data from `buf' to the address pointed to
542 * `addr' using socket `s'. Returns the number of bytes writte on success,
543 * or -1 on failure.
544 */
545 int
socket_write(int s,const void * buf,size_t nbyte,struct sockaddr_in * addr)546 socket_write(int s, const void *buf, size_t nbyte, struct sockaddr_in *addr)
547 {
548 return (sendto(s, buf, nbyte, 0, (struct sockaddr *)addr,
549 sizeof (*addr)));
550 }
551
552 static int
bind_check(int sock_id,const struct sockaddr * addr)553 bind_check(int sock_id, const struct sockaddr *addr)
554 {
555 int k;
556 struct sockaddr_in *in_addr = (struct sockaddr_in *)addr;
557
558 /* Do not check for duplicate bind() if SO_REUSEADDR option is set. */
559 if (! (sockets[sock_id].so_opt & SO_REUSEADDR)) {
560 for (k = 0; k < MAXSOCKET; k++) {
561 if (sockets[k].type != INETBOOT_UNUSED &&
562 sockets[k].proto == sockets[sock_id].proto &&
563 sockets[k].bound) {
564 if ((sockets[k].bind.sin_addr.s_addr ==
565 in_addr->sin_addr.s_addr) &&
566 (sockets[k].bind.sin_port ==
567 in_addr->sin_port)) {
568 errno = EADDRINUSE;
569 return (-1);
570 }
571 }
572 }
573 }
574 return (0);
575 }
576
577 /* Assign a name to an unnamed socket. */
578 int
bind(int s,const struct sockaddr * name,socklen_t namelen)579 bind(int s, const struct sockaddr *name, socklen_t namelen)
580 {
581 int i;
582
583 errno = 0;
584
585 if ((i = so_check_fd(s, &errno)) == -1)
586 return (-1);
587
588 if (name == NULL) {
589 /* unbind */
590 if (sockets[i].bound) {
591 bzero((caddr_t)&sockets[i].bind,
592 sizeof (struct sockaddr_in));
593 sockets[i].bound = B_FALSE;
594 }
595 return (0);
596 }
597 if (namelen != sizeof (struct sockaddr_in) || name == NULL) {
598 errno = EINVAL;
599 return (-1);
600 }
601 if (name->sa_family != AF_INET) {
602 errno = EAFNOSUPPORT;
603 return (-1);
604 }
605 if (sockets[i].bound) {
606 if (bcmp((caddr_t)&sockets[i].bind, (caddr_t)name,
607 namelen) == 0) {
608 /* attempt to bind to same address ok... */
609 return (0);
610 }
611 errno = EINVAL; /* already bound */
612 return (-1);
613 }
614
615 if (errno != 0) {
616 return (-1);
617 }
618
619 /* Check for duplicate bind(). */
620 if (bind_check(i, name) < 0)
621 return (-1);
622
623 bcopy((caddr_t)name, (caddr_t)&sockets[i].bind, namelen);
624 if (sockets[i].type == INETBOOT_STREAM) {
625 if (tcp_bind(i) < 0) {
626 return (-1);
627 }
628 }
629 sockets[i].bound = B_TRUE;
630
631 return (0);
632 }
633
634 static int
quickbind(int sock_id)635 quickbind(int sock_id)
636 {
637 int i;
638 struct sockaddr_in addr;
639
640 /*
641 * XXX This needs more work. Right now, if ipv4_setipaddr()
642 * have not been called, this will be wrong. But we need
643 * something better. Need to be revisited.
644 */
645 ipv4_getipaddr(&addr.sin_addr);
646 addr.sin_family = AF_INET;
647
648 for (i = SMALLEST_ANON_PORT; i <= LARGEST_ANON_PORT; i++) {
649 addr.sin_port = htons(i);
650 if (bind_check(sock_id, (struct sockaddr *)&addr) == 0)
651 break;
652 }
653 /* Need to clear errno as it is probably set by bind_check(). */
654 errno = 0;
655
656 if (i <= LARGEST_ANON_PORT) {
657 bcopy((caddr_t)&addr, (caddr_t)&sockets[sock_id].bind,
658 sizeof (struct sockaddr_in));
659 sockets[sock_id].bound = B_TRUE;
660 #ifdef DEBUG
661 printf("quick bind done addr %s port %d\n",
662 inet_ntoa(sockets[sock_id].bind.sin_addr),
663 ntohs(sockets[sock_id].bind.sin_port));
664 #endif
665 return (0);
666 } else {
667 return (-1);
668 }
669 }
670
671 int
listen(int fd,int backlog)672 listen(int fd, int backlog)
673 {
674 int sock_id;
675
676 errno = 0;
677 if ((sock_id = so_check_fd(fd, &errno)) == -1)
678 return (-1);
679
680 if (sockets[sock_id].type != INETBOOT_STREAM) {
681 errno = EOPNOTSUPP;
682 return (-1);
683 }
684 if (sockets[sock_id].so_error != 0) {
685 errno = sockets[sock_id].so_error;
686 return (-1);
687 }
688 return (tcp_listen(sock_id, backlog));
689 }
690
691 int
accept(int fd,struct sockaddr * addr,socklen_t * addr_len)692 accept(int fd, struct sockaddr *addr, socklen_t *addr_len)
693 {
694 int sock_id;
695 int new_sd;
696
697 errno = 0;
698 if ((sock_id = so_check_fd(fd, &errno)) == -1)
699 return (-1);
700
701 if (sockets[sock_id].type != INETBOOT_STREAM) {
702 errno = EOPNOTSUPP;
703 return (-1);
704 }
705 if (sockets[sock_id].so_error != 0) {
706 errno = sockets[sock_id].so_error;
707 return (-1);
708 }
709 if ((new_sd = tcp_accept(sock_id, addr, addr_len)) == -1)
710 return (-1);
711 sock_id = so_check_fd(new_sd, &errno);
712 sockets[sock_id].so_state |= SS_ISCONNECTED;
713 return (new_sd);
714 }
715
716 int
connect(int fd,const struct sockaddr * addr,socklen_t addr_len)717 connect(int fd, const struct sockaddr *addr, socklen_t addr_len)
718 {
719 int sock_id;
720 int so_type;
721
722 errno = 0;
723 if ((sock_id = so_check_fd(fd, &errno)) == -1)
724 return (-1);
725
726 so_type = sockets[sock_id].type;
727
728 if (addr == NULL || addr_len == 0) {
729 errno = EINVAL;
730 return (-1);
731 }
732 /* Don't allow connect for raw socket. */
733 if (so_type == INETBOOT_RAW) {
734 errno = EPROTONOSUPPORT;
735 return (-1);
736 }
737
738 if (sockets[sock_id].so_state & SS_ISCONNECTED) {
739 errno = EINVAL;
740 return (-1);
741 }
742
743 if (sockets[sock_id].so_error != 0) {
744 errno = sockets[sock_id].so_error;
745 return (-1);
746 }
747
748 /* If the socket is not bound, we need to do a quick bind. */
749 if (!sockets[sock_id].bound) {
750 /* For TCP socket, just call tcp_bind(). */
751 if (so_type == INETBOOT_STREAM) {
752 if (tcp_bind(sock_id) < 0)
753 return (-1);
754 } else {
755 if (quickbind(sock_id) < 0) {
756 errno = EADDRNOTAVAIL;
757 return (-1);
758 }
759 }
760 }
761 /* Should do some sanity check for addr .... */
762 bcopy((caddr_t)addr, &sockets[sock_id].remote,
763 sizeof (struct sockaddr_in));
764
765 if (sockets[sock_id].type == INETBOOT_STREAM) {
766 /* Call TCP connect routine. */
767 if (tcp_connect(sock_id) == 0)
768 sockets[sock_id].so_state |= SS_ISCONNECTED;
769 else {
770 if (sockets[sock_id].so_error != 0)
771 errno = sockets[sock_id].so_error;
772 return (-1);
773 }
774 } else {
775 sockets[sock_id].so_state |= SS_ISCONNECTED;
776 }
777 return (0);
778 }
779
780 /* Just a wrapper around recvfrom(). */
781 ssize_t
recv(int s,void * buf,size_t len,int flags)782 recv(int s, void *buf, size_t len, int flags)
783 {
784 return (recvfrom(s, buf, len, flags, NULL, NULL));
785 }
786
787 /*
788 * Receive messages from a connectionless socket. Legal flags are 0 and
789 * MSG_DONTWAIT. MSG_WAITALL is not currently supported.
790 *
791 * Returns length of message for success, -1 if error occurred.
792 */
793 ssize_t
recvfrom(int s,void * buf,size_t len,int flags,struct sockaddr * from,socklen_t * fromlen)794 recvfrom(int s, void *buf, size_t len, int flags, struct sockaddr *from,
795 socklen_t *fromlen)
796 {
797 int sock_id, i;
798 ssize_t datalen, bytes = 0;
799 struct inetgram *icp;
800 enum SockType so_type;
801 char *tmp_buf;
802 mblk_t *mp;
803
804 errno = 0;
805
806 if ((sock_id = so_check_fd(s, &errno)) == -1) {
807 errno = EINVAL;
808 return (-1);
809 }
810
811 if (sockets[sock_id].type == INETBOOT_STREAM &&
812 !(sockets[sock_id].so_state & SS_ISCONNECTED)) {
813 errno = ENOTCONN;
814 return (-1);
815 }
816
817 if (buf == NULL || len == 0) {
818 errno = EINVAL;
819 return (-1);
820 }
821 /* Yup - MSG_WAITALL not implemented */
822 if ((flags & ~MSG_DONTWAIT) != 0) {
823 errno = EINVAL;
824 return (-1);
825 }
826
827 retry:
828 if (sockets[sock_id].inq == NULL) {
829 /* Go out and check the wire */
830 for (i = MEDIA_LVL; i < APP_LVL; i++) {
831 if (sockets[sock_id].input[i] != NULL) {
832 if (sockets[sock_id].input[i](sock_id) < 0) {
833 if (sockets[sock_id].so_error != 0) {
834 errno =
835 sockets[sock_id].so_error;
836 }
837 return (-1);
838 }
839 }
840 }
841 }
842
843 so_type = sockets[sock_id].type;
844
845 /* Remove unknown inetgrams from the head of inq. Can this happen? */
846 while ((icp = sockets[sock_id].inq) != NULL) {
847 if ((so_type == INETBOOT_DGRAM ||
848 so_type == INETBOOT_STREAM) &&
849 icp->igm_level != APP_LVL) {
850 #ifdef DEBUG
851 printf("recvfrom: unexpected level %d frame found\n",
852 icp->igm_level);
853 #endif /* DEBUG */
854 del_gram(&sockets[sock_id].inq, icp, B_TRUE);
855 continue;
856 } else {
857 break;
858 }
859 }
860
861
862 if (icp == NULL) {
863 /*
864 * Checking for error should be done everytime a lower layer
865 * input routing is called. For example, if TCP gets a RST,
866 * this should be reported asap.
867 */
868 if (sockets[sock_id].so_state & SS_CANTRCVMORE) {
869 if (sockets[sock_id].so_error != 0) {
870 errno = sockets[sock_id].so_error;
871 return (-1);
872 } else {
873 return (0);
874 }
875 }
876
877 if ((flags & MSG_DONTWAIT) == 0)
878 goto retry; /* wait forever */
879
880 /* no data */
881 errno = EWOULDBLOCK;
882 return (-1);
883 }
884
885 if (from != NULL && fromlen != NULL) {
886 switch (so_type) {
887 case INETBOOT_STREAM:
888 /* Need to copy from the socket's remote address. */
889 bcopy(&(sockets[sock_id].remote), from, MIN(*fromlen,
890 sizeof (struct sockaddr_in)));
891 break;
892 case INETBOOT_RAW:
893 case INETBOOT_DGRAM:
894 default:
895 if (*fromlen > sizeof (icp->igm_saddr))
896 *fromlen = sizeof (icp->igm_saddr);
897 bcopy((caddr_t)&(icp->igm_saddr), (caddr_t)from,
898 MIN(*fromlen, sizeof (struct sockaddr_in)));
899 break;
900 }
901 }
902
903 mp = icp->igm_mp;
904 switch (so_type) {
905 case INETBOOT_STREAM:
906 /*
907 * If the message has igm_id == TCP_CALLB_MAGIC_ID, we need
908 * to drain the data held by tcp and try again.
909 */
910 if (icp->igm_id == TCP_CALLB_MAGIC_ID) {
911 del_gram(&sockets[sock_id].inq, icp, B_TRUE);
912 tcp_rcv_drain_sock(sock_id);
913 goto retry;
914 }
915
916 /* TCP should put only user data in the inetgram. */
917 tmp_buf = (char *)buf;
918 while (len > 0 && icp != NULL) {
919 datalen = mp->b_wptr - mp->b_rptr;
920 if (len < datalen) {
921 bcopy(mp->b_rptr, tmp_buf, len);
922 bytes += len;
923 mp->b_rptr += len;
924 break;
925 } else {
926 bcopy(mp->b_rptr, tmp_buf, datalen);
927 len -= datalen;
928 bytes += datalen;
929 tmp_buf += datalen;
930 del_gram(&sockets[sock_id].inq, icp, B_TRUE);
931
932 /*
933 * If we have any embedded magic messages just
934 * drop them.
935 */
936 while ((icp = sockets[sock_id].inq) != NULL) {
937 if (icp->igm_id != TCP_CALLB_MAGIC_ID)
938 break;
939 del_gram(&sockets[sock_id].inq, icp,
940 B_TRUE);
941 }
942
943 if (icp == NULL)
944 break;
945 mp = icp->igm_mp;
946 }
947 }
948 sockets[sock_id].so_rcvbuf += (int32_t)bytes;
949 break;
950 case INETBOOT_DGRAM:
951 datalen = mp->b_wptr - mp->b_rptr;
952 if (len < datalen)
953 bytes = len;
954 else
955 bytes = datalen;
956 bcopy(mp->b_rptr, buf, bytes);
957 del_gram(&sockets[sock_id].inq, icp, B_TRUE);
958 break;
959 case INETBOOT_RAW:
960 default:
961 datalen = mp->b_wptr - mp->b_rptr;
962 if (len < datalen)
963 bytes = len;
964 else
965 bytes = datalen;
966 bcopy(mp->b_rptr, buf, bytes);
967 del_gram(&sockets[sock_id].inq, icp, B_TRUE);
968 break;
969 }
970
971 #ifdef DEBUG
972 printf("recvfrom(%d): data: (0x%x,%d)\n", sock_id,
973 (icp != NULL) ? icp->igm_mp : 0, bytes);
974 #endif /* DEBUG */
975 return (bytes);
976 }
977
978
979 /* Just a wrapper around sendto(). */
980 ssize_t
send(int s,const void * msg,size_t len,int flags)981 send(int s, const void *msg, size_t len, int flags)
982 {
983 return (sendto(s, msg, len, flags, NULL, 0));
984 }
985
986 /*
987 * Transmit a message through a socket.
988 *
989 * Supported flags: MSG_DONTROUTE or 0.
990 */
991 ssize_t
sendto(int s,const void * msg,size_t len,int flags,const struct sockaddr * to,socklen_t tolen)992 sendto(int s, const void *msg, size_t len, int flags, const struct sockaddr *to,
993 socklen_t tolen)
994 {
995 enum SockType so_type;
996 int sock_id;
997 ssize_t bytes;
998
999 errno = 0;
1000
1001 if ((sock_id = so_check_fd(s, &errno)) == -1) {
1002 return (-1);
1003 }
1004 if (msg == NULL) {
1005 errno = EINVAL;
1006 return (-1);
1007 }
1008 so_type = sockets[sock_id].type;
1009 if ((flags & ~MSG_DONTROUTE) != 0) {
1010 errno = EINVAL;
1011 return (-1);
1012 }
1013 if (sockets[sock_id].so_error != 0) {
1014 errno = sockets[sock_id].so_error;
1015 return (-1);
1016 }
1017 if (to != NULL && to->sa_family != AF_INET) {
1018 errno = EAFNOSUPPORT;
1019 return (-1);
1020 }
1021
1022 switch (so_type) {
1023 case INETBOOT_RAW:
1024 case INETBOOT_DGRAM:
1025 if (!(sockets[sock_id].so_state & SS_ISCONNECTED) &&
1026 (to == NULL || tolen != sizeof (struct sockaddr_in))) {
1027 errno = EINVAL;
1028 return (-1);
1029 }
1030 bytes = dgram_sendto(sock_id, msg, len, flags, to, tolen);
1031 break;
1032 case INETBOOT_STREAM:
1033 if (!((sockets[sock_id].so_state & SS_ISCONNECTED) ||
1034 (sockets[sock_id].so_state & SS_ISCONNECTING))) {
1035 errno = EINVAL;
1036 return (-1);
1037 }
1038 if (sockets[sock_id].so_state & SS_CANTSENDMORE) {
1039 errno = EPIPE;
1040 return (-1);
1041 }
1042 bytes = stream_sendto(sock_id, msg, len, flags);
1043 break;
1044 default:
1045 /* Should not happen... */
1046 errno = EPROTOTYPE;
1047 return (-1);
1048 }
1049 return (bytes);
1050 }
1051
1052 static ssize_t
dgram_sendto(int i,const void * msg,size_t len,int flags,const struct sockaddr * to,int tolen)1053 dgram_sendto(int i, const void *msg, size_t len, int flags,
1054 const struct sockaddr *to, int tolen)
1055 {
1056 struct inetgram oc;
1057 int l, offset;
1058 size_t tlen;
1059 mblk_t *mp;
1060
1061 #ifdef DEBUG
1062 {
1063 struct sockaddr_in *sin = (struct sockaddr_in *)to;
1064 printf("sendto(%d): msg of length: %d sent to port %d and host: %s\n",
1065 i, len, ntohs(sin->sin_port), inet_ntoa(sin->sin_addr));
1066 }
1067 #endif /* DEBUG */
1068
1069 nuke_grams(&sockets[i].inq); /* flush the input queue */
1070
1071 /* calculate offset for data */
1072 offset = sockets[i].headerlen[MEDIA_LVL](NULL) +
1073 (sockets[i].headerlen[NETWORK_LVL])(NULL);
1074
1075 bzero((caddr_t)&oc, sizeof (oc));
1076 if (sockets[i].type != INETBOOT_RAW) {
1077 offset += (sockets[i].headerlen[TRANSPORT_LVL])(NULL);
1078 oc.igm_level = TRANSPORT_LVL;
1079 } else
1080 oc.igm_level = NETWORK_LVL;
1081 oc.igm_oflags = flags;
1082
1083 if (to != NULL) {
1084 bcopy((caddr_t)to, (caddr_t)&oc.igm_saddr, tolen);
1085 } else {
1086 bcopy((caddr_t)&sockets[i].remote, (caddr_t)&oc.igm_saddr,
1087 sizeof (struct sockaddr_in));
1088 }
1089
1090 /* Get a legal source port if the socket isn't bound. */
1091 if (sockets[i].bound == B_FALSE &&
1092 ntohs(oc.igm_saddr.sin_port == 0)) {
1093 ((struct sockaddr_in *)&oc.igm_saddr)->sin_port =
1094 get_source_port(B_FALSE);
1095 }
1096
1097 /* Round up to 16bit value for checksum purposes */
1098 if (sockets[i].type == INETBOOT_DGRAM) {
1099 tlen = ((len + sizeof (uint16_t) - 1) &
1100 ~(sizeof (uint16_t) - 1));
1101 } else
1102 tlen = len;
1103
1104 if ((oc.igm_mp = allocb(tlen + offset, 0)) == NULL) {
1105 errno = ENOMEM;
1106 return (-1);
1107 }
1108 mp = oc.igm_mp;
1109 mp->b_rptr = mp->b_wptr += offset;
1110 bcopy((caddr_t)msg, mp->b_wptr, len);
1111 mp->b_wptr += len;
1112 for (l = TRANSPORT_LVL; l >= MEDIA_LVL; l--) {
1113 if (sockets[i].output[l] != NULL) {
1114 if (sockets[i].output[l](i, &oc) < 0) {
1115 freeb(mp);
1116 if (errno == 0)
1117 errno = EIO;
1118 return (-1);
1119 }
1120 }
1121 }
1122 freeb(mp);
1123 return (len);
1124 }
1125
1126 /* ARGSUSED */
1127 static ssize_t
stream_sendto(int i,const void * msg,size_t len,int flags)1128 stream_sendto(int i, const void *msg, size_t len, int flags)
1129 {
1130 int cnt;
1131
1132 assert(sockets[i].pcb != NULL);
1133
1134 /*
1135 * Call directly TCP's send routine. We do this because TCP
1136 * needs to decide whether to send out the data.
1137 *
1138 * Note also that currently, TCP ignores all flags passed in for
1139 * TCP socket.
1140 */
1141 if ((cnt = tcp_send(i, sockets[i].pcb, msg, len)) < 0) {
1142 if (sockets[i].so_error != 0)
1143 errno = sockets[i].so_error;
1144 return (-1);
1145 } else {
1146 return (cnt);
1147 }
1148 }
1149
1150 /*
1151 * Returns ptr to the last inetgram in the list, or null if list is null
1152 */
1153 struct inetgram *
last_gram(struct inetgram * igp)1154 last_gram(struct inetgram *igp)
1155 {
1156 struct inetgram *wp;
1157 for (wp = igp; wp != NULL; wp = wp->igm_next) {
1158 if (wp->igm_next == NULL)
1159 return (wp);
1160 }
1161 return (NULL);
1162 }
1163
1164 /*
1165 * Adds an inetgram or list of inetgrams to the end of the list.
1166 */
1167 void
add_grams(struct inetgram ** igpp,struct inetgram * newgp)1168 add_grams(struct inetgram **igpp, struct inetgram *newgp)
1169 {
1170 struct inetgram *wp;
1171
1172 if (newgp == NULL)
1173 return;
1174
1175 if (*igpp == NULL)
1176 *igpp = newgp;
1177 else {
1178 wp = last_gram(*igpp);
1179 wp->igm_next = newgp;
1180 }
1181 }
1182
1183 /*
1184 * Nuke a whole list of grams.
1185 */
1186 void
nuke_grams(struct inetgram ** lgpp)1187 nuke_grams(struct inetgram **lgpp)
1188 {
1189 while (*lgpp != NULL)
1190 del_gram(lgpp, *lgpp, B_TRUE);
1191 }
1192
1193 /*
1194 * Remove the referenced inetgram. List is altered accordingly. Destroy the
1195 * referenced inetgram if freeit is B_TRUE.
1196 */
1197 void
del_gram(struct inetgram ** lgpp,struct inetgram * igp,int freeit)1198 del_gram(struct inetgram **lgpp, struct inetgram *igp, int freeit)
1199 {
1200 struct inetgram *wp, *pp = NULL;
1201
1202 if (lgpp == NULL || igp == NULL)
1203 return;
1204
1205 wp = *lgpp;
1206 while (wp != NULL) {
1207 if (wp == igp) {
1208 /* detach wp from the list */
1209 if (*lgpp == wp)
1210 *lgpp = (*lgpp)->igm_next;
1211 else
1212 pp->igm_next = wp->igm_next;
1213 igp->igm_next = NULL;
1214
1215 if (freeit) {
1216 if (igp->igm_mp != NULL)
1217 freeb(igp->igm_mp);
1218 bkmem_free((caddr_t)igp,
1219 sizeof (struct inetgram));
1220 }
1221 break;
1222 }
1223 pp = wp;
1224 wp = wp->igm_next;
1225 }
1226 }
1227
1228 struct nct_t nct[] = {
1229 "bootp", NCT_BOOTP_DHCP,
1230 "dhcp", NCT_BOOTP_DHCP,
1231 "rarp", NCT_RARP_BOOTPARAMS,
1232 "manual", NCT_MANUAL
1233 };
1234 int nct_entries = sizeof (nct) / sizeof (nct[0]);
1235
1236 /*
1237 * Figure out from the bootpath what kind of network configuration strategy
1238 * we should use. Returns the network config strategy.
1239 */
1240 int
get_netconfig_strategy(void)1241 get_netconfig_strategy(void)
1242 {
1243 int i;
1244 #define ISSPACE(c) (c == ' ' || c == '\t' || c == '\n' || c == '\0')
1245 char lbootpath[OBP_MAXPATHLEN];
1246 char net_options[NCT_BUFSIZE];
1247 char *op, *nop, *sp;
1248 pnode_t cn;
1249 int proplen;
1250
1251 /* If the PROM DHCP cache exists, we're done */
1252 if (prom_cached_reply(B_TRUE))
1253 return (NCT_BOOTP_DHCP);
1254
1255 /*
1256 * Newer (version 4) PROMs will put the name in the
1257 * "net-config-strategy" property.
1258 */
1259 cn = prom_finddevice("/chosen");
1260 if ((proplen = prom_getproplen(cn, "net-config-strategy")) <
1261 sizeof (net_options)) {
1262 (void) prom_getprop(cn, "net-config-strategy", net_options);
1263 net_options[proplen] = '\0';
1264 } else {
1265
1266 /*
1267 * We're reduced to sacanning bootpath for the prototol to use.
1268 * Since there was no "net-config-strategy" property, this is
1269 * an old PROM, so we need to excise any extraneous key/value
1270 * initializations from bootpath[].
1271 */
1272 for (op = prom_bootpath(), sp = lbootpath; op != NULL &&
1273 !ISSPACE(*op); sp++, op++)
1274 *sp = *op;
1275 *sp = '\0';
1276 /* find the last '/' (in the device path) */
1277 if ((op = strrchr(lbootpath, '/')) == NULL) /* last '/' */
1278 op = lbootpath;
1279 else
1280 op++;
1281 /* then look for the ':' separating it from the protocol */
1282 while (*op != ':' && *op != '\0')
1283 op++;
1284
1285 if (*op == ':') {
1286 for (nop = net_options, op++;
1287 *op != '\0' && *op != '/' && !ISSPACE(*op) &&
1288 nop < &net_options[NCT_BUFSIZE]; nop++, op++)
1289 *nop = *op;
1290 *nop = '\0';
1291 } else
1292 net_options[0] = '\0';
1293 }
1294
1295 #undef ISSPACE
1296
1297 for (i = 0; i < nct_entries; i++)
1298 if (strcmp(net_options, nct[i].p_name) == 0)
1299 return (nct[i].p_id);
1300
1301 return (NCT_DEFAULT);
1302 }
1303
1304 /* Modified STREAM routines for ease of porting core TCP code. */
1305
1306 /*ARGSUSED*/
1307 mblk_t *
allocb(size_t size,uint_t pri)1308 allocb(size_t size, uint_t pri)
1309 {
1310 unsigned char *base;
1311 mblk_t *mp;
1312
1313 if ((mp = (mblk_t *)bkmem_zalloc(sizeof (mblk_t))) == NULL)
1314 return (NULL);
1315 if ((base = (unsigned char *)bkmem_zalloc(size)) == NULL)
1316 return (NULL);
1317
1318 mp->b_next = mp->b_prev = mp->b_cont = NULL;
1319 mp->b_rptr = mp->b_wptr = mp->b_datap = (unsigned char *)base;
1320 mp->b_size = size;
1321
1322 return (mp);
1323 }
1324
1325 void
freeb(mblk_t * mp)1326 freeb(mblk_t *mp)
1327 {
1328 #ifdef DEBUG
1329 printf("freeb datap %x\n", mp->b_datap);
1330 #endif
1331 bkmem_free((caddr_t)(mp->b_datap), mp->b_size);
1332 #ifdef DEBUG
1333 printf("freeb mp %x\n", mp);
1334 #endif
1335 bkmem_free((caddr_t)mp, sizeof (mblk_t));
1336 }
1337
1338 void
freemsg(mblk_t * mp)1339 freemsg(mblk_t *mp)
1340 {
1341 while (mp) {
1342 mblk_t *mp_cont = mp->b_cont;
1343
1344 freeb(mp);
1345 mp = mp_cont;
1346 }
1347 }
1348
1349 mblk_t *
copyb(mblk_t * bp)1350 copyb(mblk_t *bp)
1351 {
1352 mblk_t *nbp;
1353 unsigned char *ndp;
1354
1355 assert((uintptr_t)(bp->b_wptr - bp->b_rptr) >= 0);
1356
1357 if (!(nbp = allocb(bp->b_size, 0)))
1358 return (NULL);
1359 nbp->b_cont = NULL;
1360 ndp = nbp->b_datap;
1361
1362 nbp->b_rptr = ndp + (bp->b_rptr - bp->b_datap);
1363 nbp->b_wptr = nbp->b_rptr + (bp->b_wptr - bp->b_rptr);
1364 bcopy(bp->b_datap, nbp->b_datap, bp->b_size);
1365 return (nbp);
1366 }
1367
1368 /* To simplify things, dupb() is implemented as copyb(). */
1369 mblk_t *
dupb(mblk_t * mp)1370 dupb(mblk_t *mp)
1371 {
1372 return (copyb(mp));
1373 }
1374
1375 /*
1376 * get number of data bytes in message
1377 */
1378 size_t
msgdsize(mblk_t * bp)1379 msgdsize(mblk_t *bp)
1380 {
1381 size_t count = 0;
1382
1383 for (; bp != NULL; bp = bp->b_cont) {
1384 assert(bp->b_wptr >= bp->b_rptr);
1385 count += bp->b_wptr - bp->b_rptr;
1386 }
1387 return (count);
1388 }
1389