1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 * 26 * socket.c, Code implementing a simple socket interface. 27 */ 28 29 #pragma ident "%Z%%M% %I% %E% SMI" 30 31 #include <sys/types.h> 32 #include "socket_impl.h" 33 #include <sys/isa_defs.h> 34 #include <sys/sysmacros.h> 35 #include <sys/bootconf.h> 36 #include <sys/socket.h> 37 #include <netinet/in.h> 38 #include <netinet/ip.h> 39 #include <netinet/tcp.h> 40 #include <sys/uio.h> 41 #include <sys/salib.h> 42 #include "socket_inet.h" 43 #include "ipv4.h" 44 #include "ipv4_impl.h" 45 #include "udp_inet.h" 46 #include "tcp_inet.h" 47 #include "mac.h" 48 #include "mac_impl.h" 49 #include <sys/promif.h> 50 51 struct inetboot_socket sockets[MAXSOCKET] = { 0 }; 52 53 /* Default send and receive socket buffer size */ 54 #define SO_DEF_SNDBUF 48*1024 55 #define SO_DEF_RCVBUF 48*1024 56 57 /* Default max socket buffer size */ 58 #define SO_MAX_BUF 4*1024*1024 59 60 static ssize_t dgram_sendto(int, const void *, size_t, int, 61 const struct sockaddr *, int); 62 static ssize_t stream_sendto(int, const void *, size_t, int); 63 static int bind_check(int, const struct sockaddr *); 64 static int quickbind(int); 65 66 /* Check the validity of a fd and return the socket index of that fd. */ 67 int 68 so_check_fd(int fd, int *errno) 69 { 70 int i; 71 72 i = FD_TO_SOCKET(fd); 73 if (i < 0 || i >= MAXSOCKET) { 74 *errno = ENOTSOCK; 75 return (-1); 76 } 77 if (sockets[i].type == INETBOOT_UNUSED) { 78 *errno = ENOTSOCK; 79 return (-1); 80 } 81 return (i); 82 } 83 84 /* 85 * Create an endpoint for network communication. Returns a descriptor. 86 * 87 * Notes: 88 * Only PF_INET communication domains are supported. Within 89 * this domain, only SOCK_RAW, SOCK_DGRAM and SOCK_STREAM types are 90 * supported. 91 */ 92 int 93 socket(int domain, int type, int protocol) 94 { 95 static int sock_initialized; 96 int i; 97 98 errno = 0; 99 100 if (!sock_initialized) { 101 for (i = 0; i < MAXSOCKET; i++) 102 sockets[i].type = INETBOOT_UNUSED; 103 sock_initialized = B_TRUE; 104 } 105 if (domain != AF_INET) { 106 errno = EPROTONOSUPPORT; 107 return (-1); 108 } 109 110 /* Find available socket */ 111 for (i = 0; i < MAXSOCKET; i++) { 112 if (sockets[i].type == INETBOOT_UNUSED) 113 break; 114 } 115 if (i >= MAXSOCKET) { 116 errno = EMFILE; /* No slots left. */ 117 return (-1); 118 } 119 120 /* Some socket initialization... */ 121 sockets[i].so_rcvbuf = SO_DEF_RCVBUF; 122 sockets[i].so_sndbuf = SO_DEF_SNDBUF; 123 124 /* 125 * Note that we ignore the protocol field for SOCK_DGRAM and 126 * SOCK_STREAM. When we support different protocols in future, 127 * this needs to be changed. 128 */ 129 switch (type) { 130 case SOCK_RAW: 131 ipv4_raw_socket(&sockets[i], (uint8_t)protocol); 132 break; 133 case SOCK_DGRAM: 134 udp_socket_init(&sockets[i]); 135 break; 136 case SOCK_STREAM: 137 tcp_socket_init(&sockets[i]); 138 break; 139 default: 140 errno = EPROTOTYPE; 141 break; 142 } 143 144 if (errno != 0) 145 return (-1); 146 147 /* IPv4 generic initialization. */ 148 ipv4_socket_init(&sockets[i]); 149 150 /* MAC generic initialization. */ 151 mac_socket_init(&sockets[i]); 152 153 return (i + SOCKETTYPE); 154 } 155 156 int 157 getsockname(int s, struct sockaddr *name, socklen_t *namelen) 158 { 159 int i; 160 161 errno = 0; 162 if ((i = so_check_fd(s, &errno)) == -1) 163 return (-1); 164 165 if (*namelen < sizeof (struct sockaddr_in)) { 166 errno = ENOMEM; 167 return (-1); 168 } 169 170 /* Structure assignment... */ 171 *((struct sockaddr_in *)name) = sockets[i].bind; 172 *namelen = sizeof (struct sockaddr_in); 173 return (0); 174 } 175 176 /* 177 * The socket options we support are: 178 * SO_RCVTIMEO - Value is in msecs, and is of uint32_t. 179 * SO_DONTROUTE - Value is an int, and is a boolean (nonzero if set). 180 * SO_REUSEADDR - Value is an int boolean. 181 * SO_RCVBUF - Value is an int. 182 * SO_SNDBUF - Value is an int. 183 */ 184 int 185 getsockopt(int s, int level, int option, void *optval, socklen_t *optlen) 186 { 187 int i; 188 189 errno = 0; 190 if ((i = so_check_fd(s, &errno)) == -1) 191 return (-1); 192 193 switch (level) { 194 case SOL_SOCKET: { 195 switch (option) { 196 case SO_RCVTIMEO: 197 if (*optlen == sizeof (uint32_t)) { 198 *(uint32_t *)optval = sockets[i].in_timeout; 199 } else { 200 *optlen = 0; 201 errno = EINVAL; 202 } 203 break; 204 case SO_DONTROUTE: 205 if (*optlen == sizeof (int)) { 206 *(int *)optval = 207 (sockets[i].out_flags & SO_DONTROUTE); 208 } else { 209 *optlen = 0; 210 errno = EINVAL; 211 } 212 break; 213 case SO_REUSEADDR: 214 if (*optlen == sizeof (int)) { 215 *(int *)optval = 216 (sockets[i].so_opt & SO_REUSEADDR); 217 } else { 218 *optlen = 0; 219 errno = EINVAL; 220 } 221 break; 222 case SO_RCVBUF: 223 if (*optlen == sizeof (int)) { 224 *(int *)optval = sockets[i].so_rcvbuf; 225 } else { 226 *optlen = 0; 227 errno = EINVAL; 228 } 229 break; 230 case SO_SNDBUF: 231 if (*optlen == sizeof (int)) { 232 *(int *)optval = sockets[i].so_sndbuf; 233 } else { 234 *optlen = 0; 235 errno = EINVAL; 236 } 237 break; 238 case SO_LINGER: 239 if (*optlen == sizeof (struct linger)) { 240 /* struct copy */ 241 *(struct linger *)optval = sockets[i].so_linger; 242 } else { 243 *optlen = 0; 244 errno = EINVAL; 245 } 246 default: 247 errno = ENOPROTOOPT; 248 break; 249 } 250 break; 251 } /* case SOL_SOCKET */ 252 case IPPROTO_TCP: 253 case IPPROTO_IP: { 254 switch (option) { 255 default: 256 *optlen = 0; 257 errno = ENOPROTOOPT; 258 break; 259 } 260 break; 261 } /* case IPPROTO_IP or IPPROTO_TCP */ 262 default: 263 errno = ENOPROTOOPT; 264 break; 265 } /* switch (level) */ 266 267 if (errno != 0) 268 return (-1); 269 else 270 return (0); 271 } 272 273 /* 274 * Generate a network-order source port from the privileged range if 275 * "reserved" is true, dynamic/private range otherwise. We consider the 276 * range of 512-1023 privileged ports as ports we can use. This mirrors 277 * historical rpc client practice for privileged port selection. 278 */ 279 in_port_t 280 get_source_port(boolean_t reserved) 281 { 282 static in_port_t dynamic = IPPORT_DYNAMIC_START - 1, 283 rsvdport = (IPPORT_RESERVED / 2) - 1; 284 in_port_t p; 285 286 if (reserved) { 287 if (++rsvdport >= IPPORT_RESERVED) 288 p = rsvdport = IPPORT_RESERVED / 2; 289 else 290 p = rsvdport; 291 } else 292 p = ++dynamic; 293 294 return (htons(p)); 295 } 296 297 /* 298 * The socket options we support are: 299 * SO_RECVTIMEO - Value is uint32_t msecs. 300 * SO_DONTROUTE - Value is int boolean (nonzero == TRUE, zero == FALSE). 301 * SO_REUSEADDR - value is int boolean. 302 * SO_RCVBUF - Value is int. 303 * SO_SNDBUF - Value is int. 304 */ 305 int 306 setsockopt(int s, int level, int option, const void *optval, socklen_t optlen) 307 { 308 int i; 309 310 errno = 0; 311 if ((i = so_check_fd(s, &errno)) == -1) 312 return (-1); 313 314 switch (level) { 315 case SOL_SOCKET: { 316 switch (option) { 317 case SO_RCVTIMEO: 318 if (optlen == sizeof (uint32_t)) 319 sockets[i].in_timeout = *(uint32_t *)optval; 320 else { 321 errno = EINVAL; 322 } 323 break; 324 case SO_DONTROUTE: 325 if (optlen == sizeof (int)) { 326 if (*(int *)optval) 327 sockets[i].out_flags |= SO_DONTROUTE; 328 else 329 sockets[i].out_flags &= ~SO_DONTROUTE; 330 } else { 331 errno = EINVAL; 332 } 333 break; 334 case SO_REUSEADDR: 335 if (optlen == sizeof (int)) { 336 if (*(int *)optval) 337 sockets[i].so_opt |= SO_REUSEADDR; 338 else 339 sockets[i].so_opt &= ~SO_REUSEADDR; 340 } else { 341 errno = EINVAL; 342 } 343 break; 344 case SO_RCVBUF: 345 if (optlen == sizeof (int)) { 346 sockets[i].so_rcvbuf = *(int *)optval; 347 if (sockets[i].so_rcvbuf > SO_MAX_BUF) 348 sockets[i].so_rcvbuf = SO_MAX_BUF; 349 (void) tcp_opt_set(sockets[i].pcb, 350 level, option, optval, optlen); 351 } else { 352 errno = EINVAL; 353 } 354 break; 355 case SO_SNDBUF: 356 if (optlen == sizeof (int)) { 357 sockets[i].so_sndbuf = *(int *)optval; 358 if (sockets[i].so_sndbuf > SO_MAX_BUF) 359 sockets[i].so_sndbuf = SO_MAX_BUF; 360 (void) tcp_opt_set(sockets[i].pcb, 361 level, option, optval, optlen); 362 } else { 363 errno = EINVAL; 364 } 365 break; 366 case SO_LINGER: 367 if (optlen == sizeof (struct linger)) { 368 /* struct copy */ 369 sockets[i].so_linger = *(struct linger *)optval; 370 (void) tcp_opt_set(sockets[i].pcb, 371 level, option, optval, optlen); 372 } else { 373 errno = EINVAL; 374 } 375 break; 376 default: 377 errno = ENOPROTOOPT; 378 break; 379 } 380 break; 381 } /* case SOL_SOCKET */ 382 case IPPROTO_TCP: 383 case IPPROTO_IP: { 384 switch (option) { 385 default: 386 errno = ENOPROTOOPT; 387 break; 388 } 389 break; 390 } /* case IPPROTO_IP or IPPROTO_TCP */ 391 default: 392 errno = ENOPROTOOPT; 393 break; 394 } /* switch (level) */ 395 396 if (errno != 0) 397 return (-1); 398 else 399 return (0); 400 } 401 402 /* 403 * Shut down part of a full-duplex connection. 404 * 405 * Only supported for TCP sockets 406 */ 407 int 408 shutdown(int s, int how) 409 { 410 int sock_id; 411 int i; 412 413 errno = 0; 414 if ((sock_id = so_check_fd(s, &errno)) == -1) 415 return (-1); 416 417 /* shutdown only supported for TCP sockets */ 418 if (sockets[sock_id].type != INETBOOT_STREAM) { 419 errno = EOPNOTSUPP; 420 return (-1); 421 } 422 423 if (!(sockets[sock_id].so_state & SS_ISCONNECTED)) { 424 errno = ENOTCONN; 425 return (-1); 426 } 427 428 switch (how) { 429 case 0: 430 sockets[sock_id].so_state |= SS_CANTRCVMORE; 431 break; 432 case 1: 433 sockets[sock_id].so_state |= SS_CANTSENDMORE; 434 break; 435 case 2: 436 sockets[sock_id].so_state |= (SS_CANTRCVMORE | SS_CANTSENDMORE); 437 break; 438 default: 439 errno = EINVAL; 440 return (-1); 441 } 442 443 switch (sockets[sock_id].so_state & 444 (SS_CANTRCVMORE | SS_CANTSENDMORE)) { 445 case (SS_CANTRCVMORE | SS_CANTSENDMORE): 446 /* Call lower level protocol close routine. */ 447 for (i = TRANSPORT_LVL; i >= MEDIA_LVL; i--) { 448 if (sockets[sock_id].close[i] != NULL) { 449 (void) sockets[sock_id].close[i](sock_id); 450 } 451 } 452 nuke_grams(&sockets[sock_id].inq); 453 break; 454 case SS_CANTRCVMORE: 455 nuke_grams(&sockets[sock_id].inq); 456 break; 457 case SS_CANTSENDMORE: 458 /* Call lower level protocol close routine. */ 459 if (tcp_shutdown(sock_id) < 0) 460 return (-1); 461 break; 462 default: 463 errno = EINVAL; 464 return (-1); 465 } 466 467 return (0); 468 } 469 470 /* 471 * "close" a socket. 472 */ 473 int 474 socket_close(int s) 475 { 476 int sock_id, i; 477 478 errno = 0; 479 if ((sock_id = so_check_fd(s, &errno)) == -1) 480 return (-1); 481 482 /* Call lower level protocol close routine. */ 483 for (i = TRANSPORT_LVL; i >= MEDIA_LVL; i--) { 484 if (sockets[sock_id].close[i] != NULL) { 485 /* 486 * Note that the close() routine of other 487 * layers can return an error. But right 488 * now, the only mechanism to report that 489 * back is for the close() routine to set 490 * the errno and socket_close() will return 491 * an error. But the close operation will 492 * not be stopped. 493 */ 494 (void) sockets[sock_id].close[i](sock_id); 495 } 496 } 497 498 /* 499 * Clear the input queue. This has to be done 500 * after the lower level protocol close routines have been 501 * called as they may want to do something about the queue. 502 */ 503 nuke_grams(&sockets[sock_id].inq); 504 505 bzero((caddr_t)&sockets[sock_id], sizeof (struct inetboot_socket)); 506 sockets[sock_id].type = INETBOOT_UNUSED; 507 508 return (0); 509 } 510 511 /* 512 * Read up to `nbyte' of data from socket `s' into `buf'; if non-zero, 513 * then give up after `read_timeout' seconds. Returns the number of 514 * bytes read, or -1 on failure. 515 */ 516 int 517 socket_read(int s, void *buf, size_t nbyte, int read_timeout) 518 { 519 ssize_t n; 520 uint_t start, diff; 521 struct sockaddr from; 522 uint_t fromlen = sizeof (from); 523 524 /* 525 * keep calling non-blocking recvfrom until something received 526 * or an error occurs 527 */ 528 start = prom_gettime(); 529 for (;;) { 530 n = recvfrom(s, buf, nbyte, MSG_DONTWAIT, NULL, NULL); 531 if (n == -1 && errno == EWOULDBLOCK) { 532 diff = (uint_t)((prom_gettime() - start) + 500) / 1000; 533 if (read_timeout != 0 && diff > read_timeout) { 534 errno = EINTR; 535 return (-1); 536 } 537 } else { 538 return (n); 539 } 540 } 541 } 542 543 /* 544 * Write up to `nbyte' bytes of data from `buf' to the address pointed to 545 * `addr' using socket `s'. Returns the number of bytes writte on success, 546 * or -1 on failure. 547 */ 548 int 549 socket_write(int s, const void *buf, size_t nbyte, struct sockaddr_in *addr) 550 { 551 return (sendto(s, buf, nbyte, 0, (struct sockaddr *)addr, 552 sizeof (*addr))); 553 } 554 555 static int 556 bind_check(int sock_id, const struct sockaddr *addr) 557 { 558 int k; 559 struct sockaddr_in *in_addr = (struct sockaddr_in *)addr; 560 561 /* Do not check for duplicate bind() if SO_REUSEADDR option is set. */ 562 if (! (sockets[sock_id].so_opt & SO_REUSEADDR)) { 563 for (k = 0; k < MAXSOCKET; k++) { 564 if (sockets[k].type != INETBOOT_UNUSED && 565 sockets[k].proto == sockets[sock_id].proto && 566 sockets[k].bound) { 567 if ((sockets[k].bind.sin_addr.s_addr == 568 in_addr->sin_addr.s_addr) && 569 (sockets[k].bind.sin_port == 570 in_addr->sin_port)) { 571 errno = EADDRINUSE; 572 return (-1); 573 } 574 } 575 } 576 } 577 return (0); 578 } 579 580 /* Assign a name to an unnamed socket. */ 581 int 582 bind(int s, const struct sockaddr *name, socklen_t namelen) 583 { 584 int i; 585 586 errno = 0; 587 588 if ((i = so_check_fd(s, &errno)) == -1) 589 return (-1); 590 591 if (name == NULL) { 592 /* unbind */ 593 if (sockets[i].bound) { 594 bzero((caddr_t)&sockets[i].bind, 595 sizeof (struct sockaddr_in)); 596 sockets[i].bound = B_FALSE; 597 } 598 return (0); 599 } 600 if (namelen != sizeof (struct sockaddr_in) || name == NULL) { 601 errno = EINVAL; 602 return (-1); 603 } 604 if (name->sa_family != AF_INET) { 605 errno = EAFNOSUPPORT; 606 return (-1); 607 } 608 if (sockets[i].bound) { 609 if (bcmp((caddr_t)&sockets[i].bind, (caddr_t)name, 610 namelen) == 0) { 611 /* attempt to bind to same address ok... */ 612 return (0); 613 } 614 errno = EINVAL; /* already bound */ 615 return (-1); 616 } 617 618 if (errno != 0) { 619 return (-1); 620 } 621 622 /* Check for duplicate bind(). */ 623 if (bind_check(i, name) < 0) 624 return (-1); 625 626 bcopy((caddr_t)name, (caddr_t)&sockets[i].bind, namelen); 627 if (sockets[i].type == INETBOOT_STREAM) { 628 if (tcp_bind(i) < 0) { 629 return (-1); 630 } 631 } 632 sockets[i].bound = B_TRUE; 633 634 return (0); 635 } 636 637 static int 638 quickbind(int sock_id) 639 { 640 int i; 641 struct sockaddr_in addr; 642 643 /* 644 * XXX This needs more work. Right now, if ipv4_setipaddr() 645 * have not been called, this will be wrong. But we need 646 * something better. Need to be revisited. 647 */ 648 ipv4_getipaddr(&addr.sin_addr); 649 addr.sin_family = AF_INET; 650 651 for (i = SMALLEST_ANON_PORT; i <= LARGEST_ANON_PORT; i++) { 652 addr.sin_port = htons(i); 653 if (bind_check(sock_id, (struct sockaddr *)&addr) == 0) 654 break; 655 } 656 /* Need to clear errno as it is probably set by bind_check(). */ 657 errno = 0; 658 659 if (i <= LARGEST_ANON_PORT) { 660 bcopy((caddr_t)&addr, (caddr_t)&sockets[sock_id].bind, 661 sizeof (struct sockaddr_in)); 662 sockets[sock_id].bound = B_TRUE; 663 #ifdef DEBUG 664 printf("quick bind done addr %s port %d\n", 665 inet_ntoa(sockets[sock_id].bind.sin_addr), 666 ntohs(sockets[sock_id].bind.sin_port)); 667 #endif 668 return (0); 669 } else { 670 return (-1); 671 } 672 } 673 674 int 675 listen(int fd, int backlog) 676 { 677 int sock_id; 678 679 errno = 0; 680 if ((sock_id = so_check_fd(fd, &errno)) == -1) 681 return (-1); 682 683 if (sockets[sock_id].type != INETBOOT_STREAM) { 684 errno = EOPNOTSUPP; 685 return (-1); 686 } 687 if (sockets[sock_id].so_error != 0) { 688 errno = sockets[sock_id].so_error; 689 return (-1); 690 } 691 return (tcp_listen(sock_id, backlog)); 692 } 693 694 int 695 accept(int fd, struct sockaddr *addr, socklen_t *addr_len) 696 { 697 int sock_id; 698 int new_sd; 699 700 errno = 0; 701 if ((sock_id = so_check_fd(fd, &errno)) == -1) 702 return (-1); 703 704 if (sockets[sock_id].type != INETBOOT_STREAM) { 705 errno = EOPNOTSUPP; 706 return (-1); 707 } 708 if (sockets[sock_id].so_error != 0) { 709 errno = sockets[sock_id].so_error; 710 return (-1); 711 } 712 if ((new_sd = tcp_accept(sock_id, addr, addr_len)) == -1) 713 return (-1); 714 sock_id = so_check_fd(new_sd, &errno); 715 sockets[sock_id].so_state |= SS_ISCONNECTED; 716 return (new_sd); 717 } 718 719 int 720 connect(int fd, const struct sockaddr *addr, socklen_t addr_len) 721 { 722 int sock_id; 723 int so_type; 724 725 errno = 0; 726 if ((sock_id = so_check_fd(fd, &errno)) == -1) 727 return (-1); 728 729 so_type = sockets[sock_id].type; 730 731 if (addr == NULL || addr_len == 0) { 732 errno = EINVAL; 733 return (-1); 734 } 735 /* Don't allow connect for raw socket. */ 736 if (so_type == INETBOOT_RAW) { 737 errno = EPROTONOSUPPORT; 738 return (-1); 739 } 740 741 if (sockets[sock_id].so_state & SS_ISCONNECTED) { 742 errno = EINVAL; 743 return (-1); 744 } 745 746 if (sockets[sock_id].so_error != 0) { 747 errno = sockets[sock_id].so_error; 748 return (-1); 749 } 750 751 /* If the socket is not bound, we need to do a quick bind. */ 752 if (!sockets[sock_id].bound) { 753 /* For TCP socket, just call tcp_bind(). */ 754 if (so_type == INETBOOT_STREAM) { 755 if (tcp_bind(sock_id) < 0) 756 return (-1); 757 } else { 758 if (quickbind(sock_id) < 0) { 759 errno = EADDRNOTAVAIL; 760 return (-1); 761 } 762 } 763 } 764 /* Should do some sanity check for addr .... */ 765 bcopy((caddr_t)addr, &sockets[sock_id].remote, 766 sizeof (struct sockaddr_in)); 767 768 if (sockets[sock_id].type == INETBOOT_STREAM) { 769 /* Call TCP connect routine. */ 770 if (tcp_connect(sock_id) == 0) 771 sockets[sock_id].so_state |= SS_ISCONNECTED; 772 else { 773 if (sockets[sock_id].so_error != 0) 774 errno = sockets[sock_id].so_error; 775 return (-1); 776 } 777 } else { 778 sockets[sock_id].so_state |= SS_ISCONNECTED; 779 } 780 return (0); 781 } 782 783 /* Just a wrapper around recvfrom(). */ 784 ssize_t 785 recv(int s, void *buf, size_t len, int flags) 786 { 787 return (recvfrom(s, buf, len, flags, NULL, NULL)); 788 } 789 790 /* 791 * Receive messages from a connectionless socket. Legal flags are 0 and 792 * MSG_DONTWAIT. MSG_WAITALL is not currently supported. 793 * 794 * Returns length of message for success, -1 if error occurred. 795 */ 796 ssize_t 797 recvfrom(int s, void *buf, size_t len, int flags, struct sockaddr *from, 798 socklen_t *fromlen) 799 { 800 int sock_id, i; 801 ssize_t datalen, bytes = 0; 802 struct inetgram *icp; 803 enum SockType so_type; 804 char *tmp_buf; 805 mblk_t *mp; 806 807 errno = 0; 808 809 if ((sock_id = so_check_fd(s, &errno)) == -1) { 810 errno = EINVAL; 811 return (-1); 812 } 813 814 if (sockets[sock_id].type == INETBOOT_STREAM && 815 !(sockets[sock_id].so_state & SS_ISCONNECTED)) { 816 errno = ENOTCONN; 817 return (-1); 818 } 819 820 if (buf == NULL || len == 0) { 821 errno = EINVAL; 822 return (-1); 823 } 824 /* Yup - MSG_WAITALL not implemented */ 825 if ((flags & ~MSG_DONTWAIT) != 0) { 826 errno = EINVAL; 827 return (-1); 828 } 829 830 retry: 831 if (sockets[sock_id].inq == NULL) { 832 /* Go out and check the wire */ 833 for (i = MEDIA_LVL; i < APP_LVL; i++) { 834 if (sockets[sock_id].input[i] != NULL) { 835 if (sockets[sock_id].input[i](sock_id) < 0) { 836 if (sockets[sock_id].so_error != 0) { 837 errno = 838 sockets[sock_id].so_error; 839 } 840 return (-1); 841 } 842 } 843 } 844 } 845 846 so_type = sockets[sock_id].type; 847 848 /* Remove unknown inetgrams from the head of inq. Can this happen? */ 849 while ((icp = sockets[sock_id].inq) != NULL) { 850 if ((so_type == INETBOOT_DGRAM || 851 so_type == INETBOOT_STREAM) && 852 icp->igm_level != APP_LVL) { 853 #ifdef DEBUG 854 printf("recvfrom: unexpected level %d frame found\n", 855 icp->igm_level); 856 #endif /* DEBUG */ 857 del_gram(&sockets[sock_id].inq, icp, B_TRUE); 858 continue; 859 } else { 860 break; 861 } 862 } 863 864 865 if (icp == NULL) { 866 /* 867 * Checking for error should be done everytime a lower layer 868 * input routing is called. For example, if TCP gets a RST, 869 * this should be reported asap. 870 */ 871 if (sockets[sock_id].so_state & SS_CANTRCVMORE) { 872 if (sockets[sock_id].so_error != 0) { 873 errno = sockets[sock_id].so_error; 874 return (-1); 875 } else { 876 return (0); 877 } 878 } 879 880 if ((flags & MSG_DONTWAIT) == 0) 881 goto retry; /* wait forever */ 882 883 /* no data */ 884 errno = EWOULDBLOCK; 885 return (-1); 886 } 887 888 if (from != NULL && fromlen != NULL) { 889 switch (so_type) { 890 case INETBOOT_STREAM: 891 /* Need to copy from the socket's remote address. */ 892 bcopy(&(sockets[sock_id].remote), from, MIN(*fromlen, 893 sizeof (struct sockaddr_in))); 894 break; 895 case INETBOOT_RAW: 896 case INETBOOT_DGRAM: 897 default: 898 if (*fromlen > sizeof (icp->igm_saddr)) 899 *fromlen = sizeof (icp->igm_saddr); 900 bcopy((caddr_t)&(icp->igm_saddr), (caddr_t)from, 901 MIN(*fromlen, sizeof (struct sockaddr_in))); 902 break; 903 } 904 } 905 906 mp = icp->igm_mp; 907 switch (so_type) { 908 case INETBOOT_STREAM: 909 /* 910 * If the message has igm_id == TCP_CALLB_MAGIC_ID, we need 911 * to drain the data held by tcp and try again. 912 */ 913 if (icp->igm_id == TCP_CALLB_MAGIC_ID) { 914 del_gram(&sockets[sock_id].inq, icp, B_TRUE); 915 tcp_rcv_drain_sock(sock_id); 916 goto retry; 917 } 918 919 /* TCP should put only user data in the inetgram. */ 920 tmp_buf = (char *)buf; 921 while (len > 0 && icp != NULL) { 922 datalen = mp->b_wptr - mp->b_rptr; 923 if (len < datalen) { 924 bcopy(mp->b_rptr, tmp_buf, len); 925 bytes += len; 926 mp->b_rptr += len; 927 break; 928 } else { 929 bcopy(mp->b_rptr, tmp_buf, datalen); 930 len -= datalen; 931 bytes += datalen; 932 tmp_buf += datalen; 933 del_gram(&sockets[sock_id].inq, icp, B_TRUE); 934 935 /* 936 * If we have any embedded magic messages just 937 * drop them. 938 */ 939 while ((icp = sockets[sock_id].inq) != NULL) { 940 if (icp->igm_id != TCP_CALLB_MAGIC_ID) 941 break; 942 del_gram(&sockets[sock_id].inq, icp, 943 B_TRUE); 944 } 945 946 if (icp == NULL) 947 break; 948 mp = icp->igm_mp; 949 } 950 } 951 sockets[sock_id].so_rcvbuf += (int32_t)bytes; 952 break; 953 case INETBOOT_DGRAM: 954 datalen = mp->b_wptr - mp->b_rptr; 955 if (len < datalen) 956 bytes = len; 957 else 958 bytes = datalen; 959 bcopy(mp->b_rptr, buf, bytes); 960 del_gram(&sockets[sock_id].inq, icp, B_TRUE); 961 break; 962 case INETBOOT_RAW: 963 default: 964 datalen = mp->b_wptr - mp->b_rptr; 965 if (len < datalen) 966 bytes = len; 967 else 968 bytes = datalen; 969 bcopy(mp->b_rptr, buf, bytes); 970 del_gram(&sockets[sock_id].inq, icp, B_TRUE); 971 break; 972 } 973 974 #ifdef DEBUG 975 printf("recvfrom(%d): data: (0x%x,%d)\n", sock_id, 976 (icp != NULL) ? icp->igm_mp : 0, bytes); 977 #endif /* DEBUG */ 978 return (bytes); 979 } 980 981 982 /* Just a wrapper around sendto(). */ 983 ssize_t 984 send(int s, const void *msg, size_t len, int flags) 985 { 986 return (sendto(s, msg, len, flags, NULL, 0)); 987 } 988 989 /* 990 * Transmit a message through a socket. 991 * 992 * Supported flags: MSG_DONTROUTE or 0. 993 */ 994 ssize_t 995 sendto(int s, const void *msg, size_t len, int flags, const struct sockaddr *to, 996 socklen_t tolen) 997 { 998 enum SockType so_type; 999 int sock_id; 1000 ssize_t bytes; 1001 1002 errno = 0; 1003 1004 if ((sock_id = so_check_fd(s, &errno)) == -1) { 1005 return (-1); 1006 } 1007 if (msg == NULL) { 1008 errno = EINVAL; 1009 return (-1); 1010 } 1011 so_type = sockets[sock_id].type; 1012 if ((flags & ~MSG_DONTROUTE) != 0) { 1013 errno = EINVAL; 1014 return (-1); 1015 } 1016 if (sockets[sock_id].so_error != 0) { 1017 errno = sockets[sock_id].so_error; 1018 return (-1); 1019 } 1020 if (to != NULL && to->sa_family != AF_INET) { 1021 errno = EAFNOSUPPORT; 1022 return (-1); 1023 } 1024 1025 switch (so_type) { 1026 case INETBOOT_RAW: 1027 case INETBOOT_DGRAM: 1028 if (!(sockets[sock_id].so_state & SS_ISCONNECTED) && 1029 (to == NULL || tolen != sizeof (struct sockaddr_in))) { 1030 errno = EINVAL; 1031 return (-1); 1032 } 1033 bytes = dgram_sendto(sock_id, msg, len, flags, to, tolen); 1034 break; 1035 case INETBOOT_STREAM: 1036 if (!((sockets[sock_id].so_state & SS_ISCONNECTED) || 1037 (sockets[sock_id].so_state & SS_ISCONNECTING))) { 1038 errno = EINVAL; 1039 return (-1); 1040 } 1041 if (sockets[sock_id].so_state & SS_CANTSENDMORE) { 1042 errno = EPIPE; 1043 return (-1); 1044 } 1045 bytes = stream_sendto(sock_id, msg, len, flags); 1046 break; 1047 default: 1048 /* Should not happen... */ 1049 errno = EPROTOTYPE; 1050 return (-1); 1051 } 1052 return (bytes); 1053 } 1054 1055 static ssize_t 1056 dgram_sendto(int i, const void *msg, size_t len, int flags, 1057 const struct sockaddr *to, int tolen) 1058 { 1059 struct inetgram oc; 1060 int l, offset; 1061 size_t tlen; 1062 mblk_t *mp; 1063 1064 #ifdef DEBUG 1065 { 1066 struct sockaddr_in *sin = (struct sockaddr_in *)to; 1067 printf("sendto(%d): msg of length: %d sent to port %d and host: %s\n", 1068 i, len, ntohs(sin->sin_port), inet_ntoa(sin->sin_addr)); 1069 } 1070 #endif /* DEBUG */ 1071 1072 nuke_grams(&sockets[i].inq); /* flush the input queue */ 1073 1074 /* calculate offset for data */ 1075 offset = sockets[i].headerlen[MEDIA_LVL](NULL) + 1076 (sockets[i].headerlen[NETWORK_LVL])(NULL); 1077 1078 bzero((caddr_t)&oc, sizeof (oc)); 1079 if (sockets[i].type != INETBOOT_RAW) { 1080 offset += (sockets[i].headerlen[TRANSPORT_LVL])(NULL); 1081 oc.igm_level = TRANSPORT_LVL; 1082 } else 1083 oc.igm_level = NETWORK_LVL; 1084 oc.igm_oflags = flags; 1085 1086 if (to != NULL) { 1087 bcopy((caddr_t)to, (caddr_t)&oc.igm_saddr, tolen); 1088 } else { 1089 bcopy((caddr_t)&sockets[i].remote, (caddr_t)&oc.igm_saddr, 1090 sizeof (struct sockaddr_in)); 1091 } 1092 1093 /* Get a legal source port if the socket isn't bound. */ 1094 if (sockets[i].bound == B_FALSE && 1095 ntohs(oc.igm_saddr.sin_port == 0)) { 1096 ((struct sockaddr_in *)&oc.igm_saddr)->sin_port = 1097 get_source_port(B_FALSE); 1098 } 1099 1100 /* Round up to 16bit value for checksum purposes */ 1101 if (sockets[i].type == INETBOOT_DGRAM) { 1102 tlen = ((len + sizeof (uint16_t) - 1) & 1103 ~(sizeof (uint16_t) - 1)); 1104 } else 1105 tlen = len; 1106 1107 if ((oc.igm_mp = allocb(tlen + offset, 0)) == NULL) { 1108 errno = ENOMEM; 1109 return (-1); 1110 } 1111 mp = oc.igm_mp; 1112 mp->b_rptr = mp->b_wptr += offset; 1113 bcopy((caddr_t)msg, mp->b_wptr, len); 1114 mp->b_wptr += len; 1115 for (l = TRANSPORT_LVL; l >= MEDIA_LVL; l--) { 1116 if (sockets[i].output[l] != NULL) { 1117 if (sockets[i].output[l](i, &oc) < 0) { 1118 freeb(mp); 1119 if (errno == 0) 1120 errno = EIO; 1121 return (-1); 1122 } 1123 } 1124 } 1125 freeb(mp); 1126 return (len); 1127 } 1128 1129 /* ARGSUSED */ 1130 static ssize_t 1131 stream_sendto(int i, const void *msg, size_t len, int flags) 1132 { 1133 int cnt; 1134 1135 assert(sockets[i].pcb != NULL); 1136 1137 /* 1138 * Call directly TCP's send routine. We do this because TCP 1139 * needs to decide whether to send out the data. 1140 * 1141 * Note also that currently, TCP ignores all flags passed in for 1142 * TCP socket. 1143 */ 1144 if ((cnt = tcp_send(i, sockets[i].pcb, msg, len)) < 0) { 1145 if (sockets[i].so_error != 0) 1146 errno = sockets[i].so_error; 1147 return (-1); 1148 } else { 1149 return (cnt); 1150 } 1151 } 1152 1153 /* 1154 * Returns ptr to the last inetgram in the list, or null if list is null 1155 */ 1156 struct inetgram * 1157 last_gram(struct inetgram *igp) 1158 { 1159 struct inetgram *wp; 1160 for (wp = igp; wp != NULL; wp = wp->igm_next) { 1161 if (wp->igm_next == NULL) 1162 return (wp); 1163 } 1164 return (NULL); 1165 } 1166 1167 /* 1168 * Adds an inetgram or list of inetgrams to the end of the list. 1169 */ 1170 void 1171 add_grams(struct inetgram **igpp, struct inetgram *newgp) 1172 { 1173 struct inetgram *wp; 1174 1175 if (newgp == NULL) 1176 return; 1177 1178 if (*igpp == NULL) 1179 *igpp = newgp; 1180 else { 1181 wp = last_gram(*igpp); 1182 wp->igm_next = newgp; 1183 } 1184 } 1185 1186 /* 1187 * Nuke a whole list of grams. 1188 */ 1189 void 1190 nuke_grams(struct inetgram **lgpp) 1191 { 1192 while (*lgpp != NULL) 1193 del_gram(lgpp, *lgpp, B_TRUE); 1194 } 1195 1196 /* 1197 * Remove the referenced inetgram. List is altered accordingly. Destroy the 1198 * referenced inetgram if freeit is B_TRUE. 1199 */ 1200 void 1201 del_gram(struct inetgram **lgpp, struct inetgram *igp, int freeit) 1202 { 1203 struct inetgram *wp, *pp = NULL; 1204 1205 if (lgpp == NULL || igp == NULL) 1206 return; 1207 1208 wp = *lgpp; 1209 while (wp != NULL) { 1210 if (wp == igp) { 1211 /* detach wp from the list */ 1212 if (*lgpp == wp) 1213 *lgpp = (*lgpp)->igm_next; 1214 else 1215 pp->igm_next = wp->igm_next; 1216 igp->igm_next = NULL; 1217 1218 if (freeit) { 1219 if (igp->igm_mp != NULL) 1220 freeb(igp->igm_mp); 1221 bkmem_free((caddr_t)igp, 1222 sizeof (struct inetgram)); 1223 } 1224 break; 1225 } 1226 pp = wp; 1227 wp = wp->igm_next; 1228 } 1229 } 1230 1231 struct nct_t nct[] = { 1232 "bootp", NCT_BOOTP_DHCP, 1233 "dhcp", NCT_BOOTP_DHCP, 1234 "rarp", NCT_RARP_BOOTPARAMS, 1235 "manual", NCT_MANUAL 1236 }; 1237 int nct_entries = sizeof (nct) / sizeof (nct[0]); 1238 1239 /* 1240 * Figure out from the bootpath what kind of network configuration strategy 1241 * we should use. Returns the network config strategy. 1242 */ 1243 int 1244 get_netconfig_strategy(void) 1245 { 1246 int i; 1247 #if !defined(__i386) 1248 /* sparc */ 1249 #define ISSPACE(c) (c == ' ' || c == '\t' || c == '\n' || c == '\0') 1250 char lbootpath[OBP_MAXPATHLEN]; 1251 char net_options[NCT_BUFSIZE]; 1252 char *op, *nop, *sp; 1253 dnode_t cn; 1254 int proplen; 1255 1256 /* If the PROM DHCP cache exists, we're done */ 1257 if (prom_cached_reply(B_TRUE)) 1258 return (NCT_BOOTP_DHCP); 1259 1260 /* 1261 * Newer (version 4) PROMs will put the name in the 1262 * "net-config-strategy" property. 1263 */ 1264 cn = prom_finddevice("/chosen"); 1265 if ((proplen = prom_getproplen(cn, "net-config-strategy")) < 1266 sizeof (net_options)) { 1267 (void) prom_getprop(cn, "net-config-strategy", net_options); 1268 net_options[proplen] = '\0'; 1269 } else { 1270 1271 /* 1272 * We're reduced to sacanning bootpath for the prototol to use. 1273 * Since there was no "net-config-strategy" property, this is 1274 * an old PROM, so we need to excise any extraneous key/value 1275 * initializations from bootpath[]. 1276 */ 1277 for (op = prom_bootpath(), sp = lbootpath; op != NULL && 1278 !ISSPACE(*op); sp++, op++) 1279 *sp = *op; 1280 *sp = '\0'; 1281 /* find the last '/' (in the device path) */ 1282 if ((op = strrchr(lbootpath, '/')) == NULL) /* last '/' */ 1283 op = lbootpath; 1284 else 1285 op++; 1286 /* then look for the ':' separating it from the protocol */ 1287 while (*op != ':' && *op != '\0') 1288 op++; 1289 1290 if (*op == ':') { 1291 for (nop = net_options, op++; 1292 *op != '\0' && *op != '/' && !ISSPACE(*op) && 1293 nop < &net_options[NCT_BUFSIZE]; nop++, op++) 1294 *nop = *op; 1295 *nop = '\0'; 1296 } else 1297 net_options[0] = '\0'; 1298 } 1299 1300 #undef ISSPACE 1301 #else 1302 /* i86 */ 1303 extern struct bootops bootops; 1304 extern int bgetprop(struct bootops *, char *, caddr_t, int, phandle_t); 1305 char net_options[MAXNAMELEN]; 1306 1307 /* 1308 * Look at net-config-strategy boot property to determine what protocol 1309 * will be used. 1310 */ 1311 (void) bgetprop(&bootops, "net-config-strategy", net_options, 1312 sizeof (net_options), 0); 1313 1314 #endif /* __i386 */ 1315 1316 for (i = 0; i < nct_entries; i++) 1317 if (strcmp(net_options, nct[i].p_name) == 0) 1318 return (nct[i].p_id); 1319 1320 return (NCT_DEFAULT); 1321 } 1322 1323 /* Modified STREAM routines for ease of porting core TCP code. */ 1324 1325 /*ARGSUSED*/ 1326 mblk_t * 1327 allocb(size_t size, uint_t pri) 1328 { 1329 unsigned char *base; 1330 mblk_t *mp; 1331 1332 if ((mp = (mblk_t *)bkmem_zalloc(sizeof (mblk_t))) == NULL) 1333 return (NULL); 1334 if ((base = (unsigned char *)bkmem_zalloc(size)) == NULL) 1335 return (NULL); 1336 1337 mp->b_next = mp->b_prev = mp->b_cont = NULL; 1338 mp->b_rptr = mp->b_wptr = mp->b_datap = (unsigned char *)base; 1339 mp->b_size = size; 1340 1341 return (mp); 1342 } 1343 1344 void 1345 freeb(mblk_t *mp) 1346 { 1347 #ifdef DEBUG 1348 printf("freeb datap %x\n", mp->b_datap); 1349 #endif 1350 bkmem_free((caddr_t)(mp->b_datap), mp->b_size); 1351 #ifdef DEBUG 1352 printf("freeb mp %x\n", mp); 1353 #endif 1354 bkmem_free((caddr_t)mp, sizeof (mblk_t)); 1355 } 1356 1357 void 1358 freemsg(mblk_t *mp) 1359 { 1360 while (mp) { 1361 mblk_t *mp_cont = mp->b_cont; 1362 1363 freeb(mp); 1364 mp = mp_cont; 1365 } 1366 } 1367 1368 mblk_t * 1369 copyb(mblk_t *bp) 1370 { 1371 mblk_t *nbp; 1372 unsigned char *ndp; 1373 1374 assert((uintptr_t)(bp->b_wptr - bp->b_rptr) >= 0); 1375 1376 if (!(nbp = allocb(bp->b_size, 0))) 1377 return (NULL); 1378 nbp->b_cont = NULL; 1379 ndp = nbp->b_datap; 1380 1381 nbp->b_rptr = ndp + (bp->b_rptr - bp->b_datap); 1382 nbp->b_wptr = nbp->b_rptr + (bp->b_wptr - bp->b_rptr); 1383 bcopy(bp->b_datap, nbp->b_datap, bp->b_size); 1384 return (nbp); 1385 } 1386 1387 /* To simplify things, dupb() is implemented as copyb(). */ 1388 mblk_t * 1389 dupb(mblk_t *mp) 1390 { 1391 return (copyb(mp)); 1392 } 1393 1394 /* 1395 * get number of data bytes in message 1396 */ 1397 size_t 1398 msgdsize(mblk_t *bp) 1399 { 1400 size_t count = 0; 1401 1402 for (; bp != NULL; bp = bp->b_cont) { 1403 assert(bp->b_wptr >= bp->b_rptr); 1404 count += bp->b_wptr - bp->b_rptr; 1405 } 1406 return (count); 1407 } 1408