1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * 25 * socket.c, Code implementing a simple socket interface. 26 */ 27 28 #include <sys/types.h> 29 #include "socket_impl.h" 30 #include <sys/isa_defs.h> 31 #include <sys/sysmacros.h> 32 #include <sys/bootconf.h> 33 #include <sys/socket.h> 34 #include <netinet/in.h> 35 #include <netinet/ip.h> 36 #include <netinet/tcp.h> 37 #include <sys/uio.h> 38 #include <sys/salib.h> 39 #include "socket_inet.h" 40 #include "ipv4.h" 41 #include "ipv4_impl.h" 42 #include "udp_inet.h" 43 #include "tcp_inet.h" 44 #include "mac.h" 45 #include "mac_impl.h" 46 #include <sys/promif.h> 47 48 struct inetboot_socket sockets[MAXSOCKET] = { 0 }; 49 50 /* Default send and receive socket buffer size */ 51 #define SO_DEF_SNDBUF 48*1024 52 #define SO_DEF_RCVBUF 48*1024 53 54 /* Default max socket buffer size */ 55 #define SO_MAX_BUF 4*1024*1024 56 57 static ssize_t dgram_sendto(int, const void *, size_t, int, 58 const struct sockaddr *, int); 59 static ssize_t stream_sendto(int, const void *, size_t, int); 60 static int bind_check(int, const struct sockaddr *); 61 static int quickbind(int); 62 63 /* Check the validity of a fd and return the socket index of that fd. */ 64 int 65 so_check_fd(int fd, int *errno) 66 { 67 int i; 68 69 i = FD_TO_SOCKET(fd); 70 if (i < 0 || i >= MAXSOCKET) { 71 *errno = ENOTSOCK; 72 return (-1); 73 } 74 if (sockets[i].type == INETBOOT_UNUSED) { 75 *errno = ENOTSOCK; 76 return (-1); 77 } 78 return (i); 79 } 80 81 /* 82 * Create an endpoint for network communication. Returns a descriptor. 83 * 84 * Notes: 85 * Only PF_INET communication domains are supported. Within 86 * this domain, only SOCK_RAW, SOCK_DGRAM and SOCK_STREAM types are 87 * supported. 88 */ 89 int 90 socket(int domain, int type, int protocol) 91 { 92 static int sock_initialized; 93 int i; 94 95 errno = 0; 96 97 if (!sock_initialized) { 98 for (i = 0; i < MAXSOCKET; i++) 99 sockets[i].type = INETBOOT_UNUSED; 100 sock_initialized = B_TRUE; 101 } 102 if (domain != AF_INET) { 103 errno = EPROTONOSUPPORT; 104 return (-1); 105 } 106 107 /* Find available socket */ 108 for (i = 0; i < MAXSOCKET; i++) { 109 if (sockets[i].type == INETBOOT_UNUSED) 110 break; 111 } 112 if (i >= MAXSOCKET) { 113 errno = EMFILE; /* No slots left. */ 114 return (-1); 115 } 116 117 /* Some socket initialization... */ 118 sockets[i].so_rcvbuf = SO_DEF_RCVBUF; 119 sockets[i].so_sndbuf = SO_DEF_SNDBUF; 120 121 /* 122 * Note that we ignore the protocol field for SOCK_DGRAM and 123 * SOCK_STREAM. When we support different protocols in future, 124 * this needs to be changed. 125 */ 126 switch (type) { 127 case SOCK_RAW: 128 ipv4_raw_socket(&sockets[i], (uint8_t)protocol); 129 break; 130 case SOCK_DGRAM: 131 udp_socket_init(&sockets[i]); 132 break; 133 case SOCK_STREAM: 134 tcp_socket_init(&sockets[i]); 135 break; 136 default: 137 errno = EPROTOTYPE; 138 break; 139 } 140 141 if (errno != 0) 142 return (-1); 143 144 /* IPv4 generic initialization. */ 145 ipv4_socket_init(&sockets[i]); 146 147 /* MAC generic initialization. */ 148 mac_socket_init(&sockets[i]); 149 150 return (i + SOCKETTYPE); 151 } 152 153 int 154 getsockname(int s, struct sockaddr *name, socklen_t *namelen) 155 { 156 int i; 157 158 errno = 0; 159 if ((i = so_check_fd(s, &errno)) == -1) 160 return (-1); 161 162 if (*namelen < sizeof (struct sockaddr_in)) { 163 errno = ENOMEM; 164 return (-1); 165 } 166 167 /* Structure assignment... */ 168 *((struct sockaddr_in *)name) = sockets[i].bind; 169 *namelen = sizeof (struct sockaddr_in); 170 return (0); 171 } 172 173 /* 174 * The socket options we support are: 175 * SO_RCVTIMEO - Value is in msecs, and is of uint32_t. 176 * SO_DONTROUTE - Value is an int, and is a boolean (nonzero if set). 177 * SO_REUSEADDR - Value is an int boolean. 178 * SO_RCVBUF - Value is an int. 179 * SO_SNDBUF - Value is an int. 180 */ 181 int 182 getsockopt(int s, int level, int option, void *optval, socklen_t *optlen) 183 { 184 int i; 185 186 errno = 0; 187 if ((i = so_check_fd(s, &errno)) == -1) 188 return (-1); 189 190 switch (level) { 191 case SOL_SOCKET: { 192 switch (option) { 193 case SO_RCVTIMEO: 194 if (*optlen == sizeof (uint32_t)) { 195 *(uint32_t *)optval = sockets[i].in_timeout; 196 } else { 197 *optlen = 0; 198 errno = EINVAL; 199 } 200 break; 201 case SO_DONTROUTE: 202 if (*optlen == sizeof (int)) { 203 *(int *)optval = 204 (sockets[i].out_flags & SO_DONTROUTE); 205 } else { 206 *optlen = 0; 207 errno = EINVAL; 208 } 209 break; 210 case SO_REUSEADDR: 211 if (*optlen == sizeof (int)) { 212 *(int *)optval = 213 (sockets[i].so_opt & SO_REUSEADDR); 214 } else { 215 *optlen = 0; 216 errno = EINVAL; 217 } 218 break; 219 case SO_RCVBUF: 220 if (*optlen == sizeof (int)) { 221 *(int *)optval = sockets[i].so_rcvbuf; 222 } else { 223 *optlen = 0; 224 errno = EINVAL; 225 } 226 break; 227 case SO_SNDBUF: 228 if (*optlen == sizeof (int)) { 229 *(int *)optval = sockets[i].so_sndbuf; 230 } else { 231 *optlen = 0; 232 errno = EINVAL; 233 } 234 break; 235 case SO_LINGER: 236 if (*optlen == sizeof (struct linger)) { 237 /* struct copy */ 238 *(struct linger *)optval = sockets[i].so_linger; 239 } else { 240 *optlen = 0; 241 errno = EINVAL; 242 } 243 break; 244 default: 245 errno = ENOPROTOOPT; 246 break; 247 } 248 break; 249 } /* case SOL_SOCKET */ 250 case IPPROTO_TCP: 251 case IPPROTO_IP: { 252 switch (option) { 253 default: 254 *optlen = 0; 255 errno = ENOPROTOOPT; 256 break; 257 } 258 break; 259 } /* case IPPROTO_IP or IPPROTO_TCP */ 260 default: 261 errno = ENOPROTOOPT; 262 break; 263 } /* switch (level) */ 264 265 if (errno != 0) 266 return (-1); 267 else 268 return (0); 269 } 270 271 /* 272 * Generate a network-order source port from the privileged range if 273 * "reserved" is true, dynamic/private range otherwise. We consider the 274 * range of 512-1023 privileged ports as ports we can use. This mirrors 275 * historical rpc client practice for privileged port selection. 276 */ 277 in_port_t 278 get_source_port(boolean_t reserved) 279 { 280 static in_port_t dynamic = IPPORT_DYNAMIC_START - 1, 281 rsvdport = (IPPORT_RESERVED / 2) - 1; 282 in_port_t p; 283 284 if (reserved) { 285 if (++rsvdport >= IPPORT_RESERVED) 286 p = rsvdport = IPPORT_RESERVED / 2; 287 else 288 p = rsvdport; 289 } else 290 p = ++dynamic; 291 292 return (htons(p)); 293 } 294 295 /* 296 * The socket options we support are: 297 * SO_RECVTIMEO - Value is uint32_t msecs. 298 * SO_DONTROUTE - Value is int boolean (nonzero == TRUE, zero == FALSE). 299 * SO_REUSEADDR - value is int boolean. 300 * SO_RCVBUF - Value is int. 301 * SO_SNDBUF - Value is int. 302 */ 303 int 304 setsockopt(int s, int level, int option, const void *optval, socklen_t optlen) 305 { 306 int i; 307 308 errno = 0; 309 if ((i = so_check_fd(s, &errno)) == -1) 310 return (-1); 311 312 switch (level) { 313 case SOL_SOCKET: { 314 switch (option) { 315 case SO_RCVTIMEO: 316 if (optlen == sizeof (uint32_t)) 317 sockets[i].in_timeout = *(uint32_t *)optval; 318 else { 319 errno = EINVAL; 320 } 321 break; 322 case SO_DONTROUTE: 323 if (optlen == sizeof (int)) { 324 if (*(int *)optval) 325 sockets[i].out_flags |= SO_DONTROUTE; 326 else 327 sockets[i].out_flags &= ~SO_DONTROUTE; 328 } else { 329 errno = EINVAL; 330 } 331 break; 332 case SO_REUSEADDR: 333 if (optlen == sizeof (int)) { 334 if (*(int *)optval) 335 sockets[i].so_opt |= SO_REUSEADDR; 336 else 337 sockets[i].so_opt &= ~SO_REUSEADDR; 338 } else { 339 errno = EINVAL; 340 } 341 break; 342 case SO_RCVBUF: 343 if (optlen == sizeof (int)) { 344 sockets[i].so_rcvbuf = *(int *)optval; 345 if (sockets[i].so_rcvbuf > SO_MAX_BUF) 346 sockets[i].so_rcvbuf = SO_MAX_BUF; 347 (void) tcp_opt_set(sockets[i].pcb, 348 level, option, optval, optlen); 349 } else { 350 errno = EINVAL; 351 } 352 break; 353 case SO_SNDBUF: 354 if (optlen == sizeof (int)) { 355 sockets[i].so_sndbuf = *(int *)optval; 356 if (sockets[i].so_sndbuf > SO_MAX_BUF) 357 sockets[i].so_sndbuf = SO_MAX_BUF; 358 (void) tcp_opt_set(sockets[i].pcb, 359 level, option, optval, optlen); 360 } else { 361 errno = EINVAL; 362 } 363 break; 364 case SO_LINGER: 365 if (optlen == sizeof (struct linger)) { 366 /* struct copy */ 367 sockets[i].so_linger = *(struct linger *)optval; 368 (void) tcp_opt_set(sockets[i].pcb, 369 level, option, optval, optlen); 370 } else { 371 errno = EINVAL; 372 } 373 break; 374 default: 375 errno = ENOPROTOOPT; 376 break; 377 } 378 break; 379 } /* case SOL_SOCKET */ 380 case IPPROTO_TCP: 381 case IPPROTO_IP: { 382 switch (option) { 383 default: 384 errno = ENOPROTOOPT; 385 break; 386 } 387 break; 388 } /* case IPPROTO_IP or IPPROTO_TCP */ 389 default: 390 errno = ENOPROTOOPT; 391 break; 392 } /* switch (level) */ 393 394 if (errno != 0) 395 return (-1); 396 else 397 return (0); 398 } 399 400 /* 401 * Shut down part of a full-duplex connection. 402 * 403 * Only supported for TCP sockets 404 */ 405 int 406 shutdown(int s, int how) 407 { 408 int sock_id; 409 int i; 410 411 errno = 0; 412 if ((sock_id = so_check_fd(s, &errno)) == -1) 413 return (-1); 414 415 /* shutdown only supported for TCP sockets */ 416 if (sockets[sock_id].type != INETBOOT_STREAM) { 417 errno = EOPNOTSUPP; 418 return (-1); 419 } 420 421 if (!(sockets[sock_id].so_state & SS_ISCONNECTED)) { 422 errno = ENOTCONN; 423 return (-1); 424 } 425 426 switch (how) { 427 case 0: 428 sockets[sock_id].so_state |= SS_CANTRCVMORE; 429 break; 430 case 1: 431 sockets[sock_id].so_state |= SS_CANTSENDMORE; 432 break; 433 case 2: 434 sockets[sock_id].so_state |= (SS_CANTRCVMORE | SS_CANTSENDMORE); 435 break; 436 default: 437 errno = EINVAL; 438 return (-1); 439 } 440 441 switch (sockets[sock_id].so_state & 442 (SS_CANTRCVMORE | SS_CANTSENDMORE)) { 443 case (SS_CANTRCVMORE | SS_CANTSENDMORE): 444 /* Call lower level protocol close routine. */ 445 for (i = TRANSPORT_LVL; i >= MEDIA_LVL; i--) { 446 if (sockets[sock_id].close[i] != NULL) { 447 (void) sockets[sock_id].close[i](sock_id); 448 } 449 } 450 nuke_grams(&sockets[sock_id].inq); 451 break; 452 case SS_CANTRCVMORE: 453 nuke_grams(&sockets[sock_id].inq); 454 break; 455 case SS_CANTSENDMORE: 456 /* Call lower level protocol close routine. */ 457 if (tcp_shutdown(sock_id) < 0) 458 return (-1); 459 break; 460 default: 461 errno = EINVAL; 462 return (-1); 463 } 464 465 return (0); 466 } 467 468 /* 469 * "close" a socket. 470 */ 471 int 472 socket_close(int s) 473 { 474 int sock_id, i; 475 476 errno = 0; 477 if ((sock_id = so_check_fd(s, &errno)) == -1) 478 return (-1); 479 480 /* Call lower level protocol close routine. */ 481 for (i = TRANSPORT_LVL; i >= MEDIA_LVL; i--) { 482 if (sockets[sock_id].close[i] != NULL) { 483 /* 484 * Note that the close() routine of other 485 * layers can return an error. But right 486 * now, the only mechanism to report that 487 * back is for the close() routine to set 488 * the errno and socket_close() will return 489 * an error. But the close operation will 490 * not be stopped. 491 */ 492 (void) sockets[sock_id].close[i](sock_id); 493 } 494 } 495 496 /* 497 * Clear the input queue. This has to be done 498 * after the lower level protocol close routines have been 499 * called as they may want to do something about the queue. 500 */ 501 nuke_grams(&sockets[sock_id].inq); 502 503 bzero((caddr_t)&sockets[sock_id], sizeof (struct inetboot_socket)); 504 sockets[sock_id].type = INETBOOT_UNUSED; 505 506 return (0); 507 } 508 509 /* 510 * Read up to `nbyte' of data from socket `s' into `buf'; if non-zero, 511 * then give up after `read_timeout' seconds. Returns the number of 512 * bytes read, or -1 on failure. 513 */ 514 int 515 socket_read(int s, void *buf, size_t nbyte, int read_timeout) 516 { 517 ssize_t n; 518 uint_t start, diff; 519 520 /* 521 * keep calling non-blocking recvfrom until something received 522 * or an error occurs 523 */ 524 start = prom_gettime(); 525 for (;;) { 526 n = recvfrom(s, buf, nbyte, MSG_DONTWAIT, NULL, NULL); 527 if (n == -1 && errno == EWOULDBLOCK) { 528 diff = (uint_t)((prom_gettime() - start) + 500) / 1000; 529 if (read_timeout != 0 && diff > read_timeout) { 530 errno = EINTR; 531 return (-1); 532 } 533 } else { 534 return (n); 535 } 536 } 537 } 538 539 /* 540 * Write up to `nbyte' bytes of data from `buf' to the address pointed to 541 * `addr' using socket `s'. Returns the number of bytes writte on success, 542 * or -1 on failure. 543 */ 544 int 545 socket_write(int s, const void *buf, size_t nbyte, struct sockaddr_in *addr) 546 { 547 return (sendto(s, buf, nbyte, 0, (struct sockaddr *)addr, 548 sizeof (*addr))); 549 } 550 551 static int 552 bind_check(int sock_id, const struct sockaddr *addr) 553 { 554 int k; 555 struct sockaddr_in *in_addr = (struct sockaddr_in *)addr; 556 557 /* Do not check for duplicate bind() if SO_REUSEADDR option is set. */ 558 if (! (sockets[sock_id].so_opt & SO_REUSEADDR)) { 559 for (k = 0; k < MAXSOCKET; k++) { 560 if (sockets[k].type != INETBOOT_UNUSED && 561 sockets[k].proto == sockets[sock_id].proto && 562 sockets[k].bound) { 563 if ((sockets[k].bind.sin_addr.s_addr == 564 in_addr->sin_addr.s_addr) && 565 (sockets[k].bind.sin_port == 566 in_addr->sin_port)) { 567 errno = EADDRINUSE; 568 return (-1); 569 } 570 } 571 } 572 } 573 return (0); 574 } 575 576 /* Assign a name to an unnamed socket. */ 577 int 578 bind(int s, const struct sockaddr *name, socklen_t namelen) 579 { 580 int i; 581 582 errno = 0; 583 584 if ((i = so_check_fd(s, &errno)) == -1) 585 return (-1); 586 587 if (name == NULL) { 588 /* unbind */ 589 if (sockets[i].bound) { 590 bzero((caddr_t)&sockets[i].bind, 591 sizeof (struct sockaddr_in)); 592 sockets[i].bound = B_FALSE; 593 } 594 return (0); 595 } 596 if (namelen != sizeof (struct sockaddr_in) || name == NULL) { 597 errno = EINVAL; 598 return (-1); 599 } 600 if (name->sa_family != AF_INET) { 601 errno = EAFNOSUPPORT; 602 return (-1); 603 } 604 if (sockets[i].bound) { 605 if (bcmp((caddr_t)&sockets[i].bind, (caddr_t)name, 606 namelen) == 0) { 607 /* attempt to bind to same address ok... */ 608 return (0); 609 } 610 errno = EINVAL; /* already bound */ 611 return (-1); 612 } 613 614 if (errno != 0) { 615 return (-1); 616 } 617 618 /* Check for duplicate bind(). */ 619 if (bind_check(i, name) < 0) 620 return (-1); 621 622 bcopy((caddr_t)name, (caddr_t)&sockets[i].bind, namelen); 623 if (sockets[i].type == INETBOOT_STREAM) { 624 if (tcp_bind(i) < 0) { 625 return (-1); 626 } 627 } 628 sockets[i].bound = B_TRUE; 629 630 return (0); 631 } 632 633 static int 634 quickbind(int sock_id) 635 { 636 int i; 637 struct sockaddr_in addr; 638 639 /* 640 * XXX This needs more work. Right now, if ipv4_setipaddr() 641 * have not been called, this will be wrong. But we need 642 * something better. Need to be revisited. 643 */ 644 ipv4_getipaddr(&addr.sin_addr); 645 addr.sin_family = AF_INET; 646 647 for (i = SMALLEST_ANON_PORT; i <= LARGEST_ANON_PORT; i++) { 648 addr.sin_port = htons(i); 649 if (bind_check(sock_id, (struct sockaddr *)&addr) == 0) 650 break; 651 } 652 /* Need to clear errno as it is probably set by bind_check(). */ 653 errno = 0; 654 655 if (i <= LARGEST_ANON_PORT) { 656 bcopy((caddr_t)&addr, (caddr_t)&sockets[sock_id].bind, 657 sizeof (struct sockaddr_in)); 658 sockets[sock_id].bound = B_TRUE; 659 #ifdef DEBUG 660 printf("quick bind done addr %s port %d\n", 661 inet_ntoa(sockets[sock_id].bind.sin_addr), 662 ntohs(sockets[sock_id].bind.sin_port)); 663 #endif 664 return (0); 665 } else { 666 return (-1); 667 } 668 } 669 670 int 671 listen(int fd, int backlog) 672 { 673 int sock_id; 674 675 errno = 0; 676 if ((sock_id = so_check_fd(fd, &errno)) == -1) 677 return (-1); 678 679 if (sockets[sock_id].type != INETBOOT_STREAM) { 680 errno = EOPNOTSUPP; 681 return (-1); 682 } 683 if (sockets[sock_id].so_error != 0) { 684 errno = sockets[sock_id].so_error; 685 return (-1); 686 } 687 return (tcp_listen(sock_id, backlog)); 688 } 689 690 int 691 accept(int fd, struct sockaddr *addr, socklen_t *addr_len) 692 { 693 int sock_id; 694 int new_sd; 695 696 errno = 0; 697 if ((sock_id = so_check_fd(fd, &errno)) == -1) 698 return (-1); 699 700 if (sockets[sock_id].type != INETBOOT_STREAM) { 701 errno = EOPNOTSUPP; 702 return (-1); 703 } 704 if (sockets[sock_id].so_error != 0) { 705 errno = sockets[sock_id].so_error; 706 return (-1); 707 } 708 if ((new_sd = tcp_accept(sock_id, addr, addr_len)) == -1) 709 return (-1); 710 sock_id = so_check_fd(new_sd, &errno); 711 sockets[sock_id].so_state |= SS_ISCONNECTED; 712 return (new_sd); 713 } 714 715 int 716 connect(int fd, const struct sockaddr *addr, socklen_t addr_len) 717 { 718 int sock_id; 719 int so_type; 720 721 errno = 0; 722 if ((sock_id = so_check_fd(fd, &errno)) == -1) 723 return (-1); 724 725 so_type = sockets[sock_id].type; 726 727 if (addr == NULL || addr_len == 0) { 728 errno = EINVAL; 729 return (-1); 730 } 731 /* Don't allow connect for raw socket. */ 732 if (so_type == INETBOOT_RAW) { 733 errno = EPROTONOSUPPORT; 734 return (-1); 735 } 736 737 if (sockets[sock_id].so_state & SS_ISCONNECTED) { 738 errno = EINVAL; 739 return (-1); 740 } 741 742 if (sockets[sock_id].so_error != 0) { 743 errno = sockets[sock_id].so_error; 744 return (-1); 745 } 746 747 /* If the socket is not bound, we need to do a quick bind. */ 748 if (!sockets[sock_id].bound) { 749 /* For TCP socket, just call tcp_bind(). */ 750 if (so_type == INETBOOT_STREAM) { 751 if (tcp_bind(sock_id) < 0) 752 return (-1); 753 } else { 754 if (quickbind(sock_id) < 0) { 755 errno = EADDRNOTAVAIL; 756 return (-1); 757 } 758 } 759 } 760 /* Should do some sanity check for addr .... */ 761 bcopy((caddr_t)addr, &sockets[sock_id].remote, 762 sizeof (struct sockaddr_in)); 763 764 if (sockets[sock_id].type == INETBOOT_STREAM) { 765 /* Call TCP connect routine. */ 766 if (tcp_connect(sock_id) == 0) 767 sockets[sock_id].so_state |= SS_ISCONNECTED; 768 else { 769 if (sockets[sock_id].so_error != 0) 770 errno = sockets[sock_id].so_error; 771 return (-1); 772 } 773 } else { 774 sockets[sock_id].so_state |= SS_ISCONNECTED; 775 } 776 return (0); 777 } 778 779 /* Just a wrapper around recvfrom(). */ 780 ssize_t 781 recv(int s, void *buf, size_t len, int flags) 782 { 783 return (recvfrom(s, buf, len, flags, NULL, NULL)); 784 } 785 786 /* 787 * Receive messages from a connectionless socket. Legal flags are 0 and 788 * MSG_DONTWAIT. MSG_WAITALL is not currently supported. 789 * 790 * Returns length of message for success, -1 if error occurred. 791 */ 792 ssize_t 793 recvfrom(int s, void *buf, size_t len, int flags, struct sockaddr *from, 794 socklen_t *fromlen) 795 { 796 int sock_id, i; 797 ssize_t datalen, bytes = 0; 798 struct inetgram *icp; 799 enum SockType so_type; 800 char *tmp_buf; 801 mblk_t *mp; 802 803 errno = 0; 804 805 if ((sock_id = so_check_fd(s, &errno)) == -1) { 806 errno = EINVAL; 807 return (-1); 808 } 809 810 if (sockets[sock_id].type == INETBOOT_STREAM && 811 !(sockets[sock_id].so_state & SS_ISCONNECTED)) { 812 errno = ENOTCONN; 813 return (-1); 814 } 815 816 if (buf == NULL || len == 0) { 817 errno = EINVAL; 818 return (-1); 819 } 820 /* Yup - MSG_WAITALL not implemented */ 821 if ((flags & ~MSG_DONTWAIT) != 0) { 822 errno = EINVAL; 823 return (-1); 824 } 825 826 retry: 827 if (sockets[sock_id].inq == NULL) { 828 /* Go out and check the wire */ 829 for (i = MEDIA_LVL; i < APP_LVL; i++) { 830 if (sockets[sock_id].input[i] != NULL) { 831 if (sockets[sock_id].input[i](sock_id) < 0) { 832 if (sockets[sock_id].so_error != 0) { 833 errno = 834 sockets[sock_id].so_error; 835 } 836 return (-1); 837 } 838 } 839 } 840 } 841 842 so_type = sockets[sock_id].type; 843 844 /* Remove unknown inetgrams from the head of inq. Can this happen? */ 845 while ((icp = sockets[sock_id].inq) != NULL) { 846 if ((so_type == INETBOOT_DGRAM || 847 so_type == INETBOOT_STREAM) && 848 icp->igm_level != APP_LVL) { 849 #ifdef DEBUG 850 printf("recvfrom: unexpected level %d frame found\n", 851 icp->igm_level); 852 #endif /* DEBUG */ 853 del_gram(&sockets[sock_id].inq, icp, B_TRUE); 854 continue; 855 } else { 856 break; 857 } 858 } 859 860 861 if (icp == NULL) { 862 /* 863 * Checking for error should be done everytime a lower layer 864 * input routing is called. For example, if TCP gets a RST, 865 * this should be reported asap. 866 */ 867 if (sockets[sock_id].so_state & SS_CANTRCVMORE) { 868 if (sockets[sock_id].so_error != 0) { 869 errno = sockets[sock_id].so_error; 870 return (-1); 871 } else { 872 return (0); 873 } 874 } 875 876 if ((flags & MSG_DONTWAIT) == 0) 877 goto retry; /* wait forever */ 878 879 /* no data */ 880 errno = EWOULDBLOCK; 881 return (-1); 882 } 883 884 if (from != NULL && fromlen != NULL) { 885 switch (so_type) { 886 case INETBOOT_STREAM: 887 /* Need to copy from the socket's remote address. */ 888 bcopy(&(sockets[sock_id].remote), from, MIN(*fromlen, 889 sizeof (struct sockaddr_in))); 890 break; 891 case INETBOOT_RAW: 892 case INETBOOT_DGRAM: 893 default: 894 if (*fromlen > sizeof (icp->igm_saddr)) 895 *fromlen = sizeof (icp->igm_saddr); 896 bcopy((caddr_t)&(icp->igm_saddr), (caddr_t)from, 897 MIN(*fromlen, sizeof (struct sockaddr_in))); 898 break; 899 } 900 } 901 902 mp = icp->igm_mp; 903 switch (so_type) { 904 case INETBOOT_STREAM: 905 /* 906 * If the message has igm_id == TCP_CALLB_MAGIC_ID, we need 907 * to drain the data held by tcp and try again. 908 */ 909 if (icp->igm_id == TCP_CALLB_MAGIC_ID) { 910 del_gram(&sockets[sock_id].inq, icp, B_TRUE); 911 tcp_rcv_drain_sock(sock_id); 912 goto retry; 913 } 914 915 /* TCP should put only user data in the inetgram. */ 916 tmp_buf = (char *)buf; 917 while (len > 0 && icp != NULL) { 918 datalen = mp->b_wptr - mp->b_rptr; 919 if (len < datalen) { 920 bcopy(mp->b_rptr, tmp_buf, len); 921 bytes += len; 922 mp->b_rptr += len; 923 break; 924 } else { 925 bcopy(mp->b_rptr, tmp_buf, datalen); 926 len -= datalen; 927 bytes += datalen; 928 tmp_buf += datalen; 929 del_gram(&sockets[sock_id].inq, icp, B_TRUE); 930 931 /* 932 * If we have any embedded magic messages just 933 * drop them. 934 */ 935 while ((icp = sockets[sock_id].inq) != NULL) { 936 if (icp->igm_id != TCP_CALLB_MAGIC_ID) 937 break; 938 del_gram(&sockets[sock_id].inq, icp, 939 B_TRUE); 940 } 941 942 if (icp == NULL) 943 break; 944 mp = icp->igm_mp; 945 } 946 } 947 sockets[sock_id].so_rcvbuf += (int32_t)bytes; 948 break; 949 case INETBOOT_DGRAM: 950 datalen = mp->b_wptr - mp->b_rptr; 951 if (len < datalen) 952 bytes = len; 953 else 954 bytes = datalen; 955 bcopy(mp->b_rptr, buf, bytes); 956 del_gram(&sockets[sock_id].inq, icp, B_TRUE); 957 break; 958 case INETBOOT_RAW: 959 default: 960 datalen = mp->b_wptr - mp->b_rptr; 961 if (len < datalen) 962 bytes = len; 963 else 964 bytes = datalen; 965 bcopy(mp->b_rptr, buf, bytes); 966 del_gram(&sockets[sock_id].inq, icp, B_TRUE); 967 break; 968 } 969 970 #ifdef DEBUG 971 printf("recvfrom(%d): data: (0x%x,%d)\n", sock_id, 972 (icp != NULL) ? icp->igm_mp : 0, bytes); 973 #endif /* DEBUG */ 974 return (bytes); 975 } 976 977 978 /* Just a wrapper around sendto(). */ 979 ssize_t 980 send(int s, const void *msg, size_t len, int flags) 981 { 982 return (sendto(s, msg, len, flags, NULL, 0)); 983 } 984 985 /* 986 * Transmit a message through a socket. 987 * 988 * Supported flags: MSG_DONTROUTE or 0. 989 */ 990 ssize_t 991 sendto(int s, const void *msg, size_t len, int flags, const struct sockaddr *to, 992 socklen_t tolen) 993 { 994 enum SockType so_type; 995 int sock_id; 996 ssize_t bytes; 997 998 errno = 0; 999 1000 if ((sock_id = so_check_fd(s, &errno)) == -1) { 1001 return (-1); 1002 } 1003 if (msg == NULL) { 1004 errno = EINVAL; 1005 return (-1); 1006 } 1007 so_type = sockets[sock_id].type; 1008 if ((flags & ~MSG_DONTROUTE) != 0) { 1009 errno = EINVAL; 1010 return (-1); 1011 } 1012 if (sockets[sock_id].so_error != 0) { 1013 errno = sockets[sock_id].so_error; 1014 return (-1); 1015 } 1016 if (to != NULL && to->sa_family != AF_INET) { 1017 errno = EAFNOSUPPORT; 1018 return (-1); 1019 } 1020 1021 switch (so_type) { 1022 case INETBOOT_RAW: 1023 case INETBOOT_DGRAM: 1024 if (!(sockets[sock_id].so_state & SS_ISCONNECTED) && 1025 (to == NULL || tolen != sizeof (struct sockaddr_in))) { 1026 errno = EINVAL; 1027 return (-1); 1028 } 1029 bytes = dgram_sendto(sock_id, msg, len, flags, to, tolen); 1030 break; 1031 case INETBOOT_STREAM: 1032 if (!((sockets[sock_id].so_state & SS_ISCONNECTED) || 1033 (sockets[sock_id].so_state & SS_ISCONNECTING))) { 1034 errno = EINVAL; 1035 return (-1); 1036 } 1037 if (sockets[sock_id].so_state & SS_CANTSENDMORE) { 1038 errno = EPIPE; 1039 return (-1); 1040 } 1041 bytes = stream_sendto(sock_id, msg, len, flags); 1042 break; 1043 default: 1044 /* Should not happen... */ 1045 errno = EPROTOTYPE; 1046 return (-1); 1047 } 1048 return (bytes); 1049 } 1050 1051 static ssize_t 1052 dgram_sendto(int i, const void *msg, size_t len, int flags, 1053 const struct sockaddr *to, int tolen) 1054 { 1055 struct inetgram oc; 1056 int l, offset; 1057 size_t tlen; 1058 mblk_t *mp; 1059 1060 #ifdef DEBUG 1061 { 1062 struct sockaddr_in *sin = (struct sockaddr_in *)to; 1063 printf("sendto(%d): msg of length: %d sent to port %d and host: %s\n", 1064 i, len, ntohs(sin->sin_port), inet_ntoa(sin->sin_addr)); 1065 } 1066 #endif /* DEBUG */ 1067 1068 nuke_grams(&sockets[i].inq); /* flush the input queue */ 1069 1070 /* calculate offset for data */ 1071 offset = sockets[i].headerlen[MEDIA_LVL](NULL) + 1072 (sockets[i].headerlen[NETWORK_LVL])(NULL); 1073 1074 bzero((caddr_t)&oc, sizeof (oc)); 1075 if (sockets[i].type != INETBOOT_RAW) { 1076 offset += (sockets[i].headerlen[TRANSPORT_LVL])(NULL); 1077 oc.igm_level = TRANSPORT_LVL; 1078 } else 1079 oc.igm_level = NETWORK_LVL; 1080 oc.igm_oflags = flags; 1081 1082 if (to != NULL) { 1083 bcopy((caddr_t)to, (caddr_t)&oc.igm_saddr, tolen); 1084 } else { 1085 bcopy((caddr_t)&sockets[i].remote, (caddr_t)&oc.igm_saddr, 1086 sizeof (struct sockaddr_in)); 1087 } 1088 1089 /* Get a legal source port if the socket isn't bound. */ 1090 if (sockets[i].bound == B_FALSE && 1091 ntohs(oc.igm_saddr.sin_port == 0)) { 1092 ((struct sockaddr_in *)&oc.igm_saddr)->sin_port = 1093 get_source_port(B_FALSE); 1094 } 1095 1096 /* Round up to 16bit value for checksum purposes */ 1097 if (sockets[i].type == INETBOOT_DGRAM) { 1098 tlen = ((len + sizeof (uint16_t) - 1) & 1099 ~(sizeof (uint16_t) - 1)); 1100 } else 1101 tlen = len; 1102 1103 if ((oc.igm_mp = allocb(tlen + offset, 0)) == NULL) { 1104 errno = ENOMEM; 1105 return (-1); 1106 } 1107 mp = oc.igm_mp; 1108 mp->b_rptr = mp->b_wptr += offset; 1109 bcopy((caddr_t)msg, mp->b_wptr, len); 1110 mp->b_wptr += len; 1111 for (l = TRANSPORT_LVL; l >= MEDIA_LVL; l--) { 1112 if (sockets[i].output[l] != NULL) { 1113 if (sockets[i].output[l](i, &oc) < 0) { 1114 freeb(mp); 1115 if (errno == 0) 1116 errno = EIO; 1117 return (-1); 1118 } 1119 } 1120 } 1121 freeb(mp); 1122 return (len); 1123 } 1124 1125 /* ARGSUSED */ 1126 static ssize_t 1127 stream_sendto(int i, const void *msg, size_t len, int flags) 1128 { 1129 int cnt; 1130 1131 assert(sockets[i].pcb != NULL); 1132 1133 /* 1134 * Call directly TCP's send routine. We do this because TCP 1135 * needs to decide whether to send out the data. 1136 * 1137 * Note also that currently, TCP ignores all flags passed in for 1138 * TCP socket. 1139 */ 1140 if ((cnt = tcp_send(i, sockets[i].pcb, msg, len)) < 0) { 1141 if (sockets[i].so_error != 0) 1142 errno = sockets[i].so_error; 1143 return (-1); 1144 } else { 1145 return (cnt); 1146 } 1147 } 1148 1149 /* 1150 * Returns ptr to the last inetgram in the list, or null if list is null 1151 */ 1152 struct inetgram * 1153 last_gram(struct inetgram *igp) 1154 { 1155 struct inetgram *wp; 1156 for (wp = igp; wp != NULL; wp = wp->igm_next) { 1157 if (wp->igm_next == NULL) 1158 return (wp); 1159 } 1160 return (NULL); 1161 } 1162 1163 /* 1164 * Adds an inetgram or list of inetgrams to the end of the list. 1165 */ 1166 void 1167 add_grams(struct inetgram **igpp, struct inetgram *newgp) 1168 { 1169 struct inetgram *wp; 1170 1171 if (newgp == NULL) 1172 return; 1173 1174 if (*igpp == NULL) 1175 *igpp = newgp; 1176 else { 1177 wp = last_gram(*igpp); 1178 wp->igm_next = newgp; 1179 } 1180 } 1181 1182 /* 1183 * Nuke a whole list of grams. 1184 */ 1185 void 1186 nuke_grams(struct inetgram **lgpp) 1187 { 1188 while (*lgpp != NULL) 1189 del_gram(lgpp, *lgpp, B_TRUE); 1190 } 1191 1192 /* 1193 * Remove the referenced inetgram. List is altered accordingly. Destroy the 1194 * referenced inetgram if freeit is B_TRUE. 1195 */ 1196 void 1197 del_gram(struct inetgram **lgpp, struct inetgram *igp, int freeit) 1198 { 1199 struct inetgram *wp, *pp = NULL; 1200 1201 if (lgpp == NULL || igp == NULL) 1202 return; 1203 1204 wp = *lgpp; 1205 while (wp != NULL) { 1206 if (wp == igp) { 1207 /* detach wp from the list */ 1208 if (*lgpp == wp) 1209 *lgpp = (*lgpp)->igm_next; 1210 else 1211 pp->igm_next = wp->igm_next; 1212 igp->igm_next = NULL; 1213 1214 if (freeit) { 1215 if (igp->igm_mp != NULL) 1216 freeb(igp->igm_mp); 1217 bkmem_free((caddr_t)igp, 1218 sizeof (struct inetgram)); 1219 } 1220 break; 1221 } 1222 pp = wp; 1223 wp = wp->igm_next; 1224 } 1225 } 1226 1227 struct nct_t nct[] = { 1228 "bootp", NCT_BOOTP_DHCP, 1229 "dhcp", NCT_BOOTP_DHCP, 1230 "rarp", NCT_RARP_BOOTPARAMS, 1231 "manual", NCT_MANUAL 1232 }; 1233 int nct_entries = sizeof (nct) / sizeof (nct[0]); 1234 1235 /* 1236 * Figure out from the bootpath what kind of network configuration strategy 1237 * we should use. Returns the network config strategy. 1238 */ 1239 int 1240 get_netconfig_strategy(void) 1241 { 1242 int i; 1243 #define ISSPACE(c) (c == ' ' || c == '\t' || c == '\n' || c == '\0') 1244 char lbootpath[OBP_MAXPATHLEN]; 1245 char net_options[NCT_BUFSIZE]; 1246 char *op, *nop, *sp; 1247 pnode_t cn; 1248 int proplen; 1249 1250 /* If the PROM DHCP cache exists, we're done */ 1251 if (prom_cached_reply(B_TRUE)) 1252 return (NCT_BOOTP_DHCP); 1253 1254 /* 1255 * Newer (version 4) PROMs will put the name in the 1256 * "net-config-strategy" property. 1257 */ 1258 cn = prom_finddevice("/chosen"); 1259 if ((proplen = prom_getproplen(cn, "net-config-strategy")) < 1260 sizeof (net_options)) { 1261 (void) prom_getprop(cn, "net-config-strategy", net_options); 1262 net_options[proplen] = '\0'; 1263 } else { 1264 1265 /* 1266 * We're reduced to sacanning bootpath for the prototol to use. 1267 * Since there was no "net-config-strategy" property, this is 1268 * an old PROM, so we need to excise any extraneous key/value 1269 * initializations from bootpath[]. 1270 */ 1271 for (op = prom_bootpath(), sp = lbootpath; op != NULL && 1272 !ISSPACE(*op); sp++, op++) 1273 *sp = *op; 1274 *sp = '\0'; 1275 /* find the last '/' (in the device path) */ 1276 if ((op = strrchr(lbootpath, '/')) == NULL) /* last '/' */ 1277 op = lbootpath; 1278 else 1279 op++; 1280 /* then look for the ':' separating it from the protocol */ 1281 while (*op != ':' && *op != '\0') 1282 op++; 1283 1284 if (*op == ':') { 1285 for (nop = net_options, op++; 1286 *op != '\0' && *op != '/' && !ISSPACE(*op) && 1287 nop < &net_options[NCT_BUFSIZE]; nop++, op++) 1288 *nop = *op; 1289 *nop = '\0'; 1290 } else 1291 net_options[0] = '\0'; 1292 } 1293 1294 #undef ISSPACE 1295 1296 for (i = 0; i < nct_entries; i++) 1297 if (strcmp(net_options, nct[i].p_name) == 0) 1298 return (nct[i].p_id); 1299 1300 return (NCT_DEFAULT); 1301 } 1302 1303 /* Modified STREAM routines for ease of porting core TCP code. */ 1304 1305 /*ARGSUSED*/ 1306 mblk_t * 1307 allocb(size_t size, uint_t pri) 1308 { 1309 unsigned char *base; 1310 mblk_t *mp; 1311 1312 if ((mp = (mblk_t *)bkmem_zalloc(sizeof (mblk_t))) == NULL) 1313 return (NULL); 1314 if ((base = (unsigned char *)bkmem_zalloc(size)) == NULL) 1315 return (NULL); 1316 1317 mp->b_next = mp->b_prev = mp->b_cont = NULL; 1318 mp->b_rptr = mp->b_wptr = mp->b_datap = (unsigned char *)base; 1319 mp->b_size = size; 1320 1321 return (mp); 1322 } 1323 1324 void 1325 freeb(mblk_t *mp) 1326 { 1327 #ifdef DEBUG 1328 printf("freeb datap %x\n", mp->b_datap); 1329 #endif 1330 bkmem_free((caddr_t)(mp->b_datap), mp->b_size); 1331 #ifdef DEBUG 1332 printf("freeb mp %x\n", mp); 1333 #endif 1334 bkmem_free((caddr_t)mp, sizeof (mblk_t)); 1335 } 1336 1337 void 1338 freemsg(mblk_t *mp) 1339 { 1340 while (mp) { 1341 mblk_t *mp_cont = mp->b_cont; 1342 1343 freeb(mp); 1344 mp = mp_cont; 1345 } 1346 } 1347 1348 mblk_t * 1349 copyb(mblk_t *bp) 1350 { 1351 mblk_t *nbp; 1352 unsigned char *ndp; 1353 1354 assert((uintptr_t)(bp->b_wptr - bp->b_rptr) >= 0); 1355 1356 if (!(nbp = allocb(bp->b_size, 0))) 1357 return (NULL); 1358 nbp->b_cont = NULL; 1359 ndp = nbp->b_datap; 1360 1361 nbp->b_rptr = ndp + (bp->b_rptr - bp->b_datap); 1362 nbp->b_wptr = nbp->b_rptr + (bp->b_wptr - bp->b_rptr); 1363 bcopy(bp->b_datap, nbp->b_datap, bp->b_size); 1364 return (nbp); 1365 } 1366 1367 /* To simplify things, dupb() is implemented as copyb(). */ 1368 mblk_t * 1369 dupb(mblk_t *mp) 1370 { 1371 return (copyb(mp)); 1372 } 1373 1374 /* 1375 * get number of data bytes in message 1376 */ 1377 size_t 1378 msgdsize(mblk_t *bp) 1379 { 1380 size_t count = 0; 1381 1382 for (; bp != NULL; bp = bp->b_cont) { 1383 assert(bp->b_wptr >= bp->b_rptr); 1384 count += bp->b_wptr - bp->b_rptr; 1385 } 1386 return (count); 1387 } 1388