1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 23 * Use is subject to license terms. 24 * 25 * socket.c, Code implementing a simple socket interface. 26 */ 27 28 #pragma ident "%Z%%M% %I% %E% SMI" 29 30 #include <sys/types.h> 31 #include "socket_impl.h" 32 #include <sys/isa_defs.h> 33 #include <sys/sysmacros.h> 34 #include <sys/bootconf.h> 35 #include <sys/socket.h> 36 #include <netinet/in.h> 37 #include <netinet/ip.h> 38 #include <netinet/tcp.h> 39 #include <sys/uio.h> 40 #include <sys/salib.h> 41 #include "socket_inet.h" 42 #include "ipv4.h" 43 #include "ipv4_impl.h" 44 #include "udp_inet.h" 45 #include "tcp_inet.h" 46 #include "mac.h" 47 #include "mac_impl.h" 48 #include <sys/promif.h> 49 50 struct inetboot_socket sockets[MAXSOCKET] = { 0 }; 51 52 /* Default send and receive socket buffer size */ 53 #define SO_DEF_SNDBUF 48*1024 54 #define SO_DEF_RCVBUF 48*1024 55 56 /* Default max socket buffer size */ 57 #define SO_MAX_BUF 4*1024*1024 58 59 static ssize_t dgram_sendto(int, const void *, size_t, int, 60 const struct sockaddr *, int); 61 static ssize_t stream_sendto(int, const void *, size_t, int); 62 static int bind_check(int, const struct sockaddr *); 63 static int quickbind(int); 64 65 /* Check the validity of a fd and return the socket index of that fd. */ 66 int 67 so_check_fd(int fd, int *errno) 68 { 69 int i; 70 71 i = FD_TO_SOCKET(fd); 72 if (i < 0 || i >= MAXSOCKET) { 73 *errno = ENOTSOCK; 74 return (-1); 75 } 76 if (sockets[i].type == INETBOOT_UNUSED) { 77 *errno = ENOTSOCK; 78 return (-1); 79 } 80 return (i); 81 } 82 83 /* 84 * Create an endpoint for network communication. Returns a descriptor. 85 * 86 * Notes: 87 * Only PF_INET communication domains are supported. Within 88 * this domain, only SOCK_RAW, SOCK_DGRAM and SOCK_STREAM types are 89 * supported. 90 */ 91 int 92 socket(int domain, int type, int protocol) 93 { 94 static int sock_initialized; 95 int i; 96 97 errno = 0; 98 99 if (!sock_initialized) { 100 for (i = 0; i < MAXSOCKET; i++) 101 sockets[i].type = INETBOOT_UNUSED; 102 sock_initialized = B_TRUE; 103 } 104 if (domain != AF_INET) { 105 errno = EPROTONOSUPPORT; 106 return (-1); 107 } 108 109 /* Find available socket */ 110 for (i = 0; i < MAXSOCKET; i++) { 111 if (sockets[i].type == INETBOOT_UNUSED) 112 break; 113 } 114 if (i >= MAXSOCKET) { 115 errno = EMFILE; /* No slots left. */ 116 return (-1); 117 } 118 119 /* Some socket initialization... */ 120 sockets[i].so_rcvbuf = SO_DEF_RCVBUF; 121 sockets[i].so_sndbuf = SO_DEF_SNDBUF; 122 123 /* 124 * Note that we ignore the protocol field for SOCK_DGRAM and 125 * SOCK_STREAM. When we support different protocols in future, 126 * this needs to be changed. 127 */ 128 switch (type) { 129 case SOCK_RAW: 130 ipv4_raw_socket(&sockets[i], (uint8_t)protocol); 131 break; 132 case SOCK_DGRAM: 133 udp_socket_init(&sockets[i]); 134 break; 135 case SOCK_STREAM: 136 tcp_socket_init(&sockets[i]); 137 break; 138 default: 139 errno = EPROTOTYPE; 140 break; 141 } 142 143 if (errno != 0) 144 return (-1); 145 146 /* IPv4 generic initialization. */ 147 ipv4_socket_init(&sockets[i]); 148 149 /* MAC generic initialization. */ 150 mac_socket_init(&sockets[i]); 151 152 return (i + SOCKETTYPE); 153 } 154 155 int 156 getsockname(int s, struct sockaddr *name, socklen_t *namelen) 157 { 158 int i; 159 160 errno = 0; 161 if ((i = so_check_fd(s, &errno)) == -1) 162 return (-1); 163 164 if (*namelen < sizeof (struct sockaddr_in)) { 165 errno = ENOMEM; 166 return (-1); 167 } 168 169 /* Structure assignment... */ 170 *((struct sockaddr_in *)name) = sockets[i].bind; 171 *namelen = sizeof (struct sockaddr_in); 172 return (0); 173 } 174 175 /* 176 * The socket options we support are: 177 * SO_RCVTIMEO - Value is in msecs, and is of uint32_t. 178 * SO_DONTROUTE - Value is an int, and is a boolean (nonzero if set). 179 * SO_REUSEADDR - Value is an int boolean. 180 * SO_RCVBUF - Value is an int. 181 * SO_SNDBUF - Value is an int. 182 */ 183 int 184 getsockopt(int s, int level, int option, void *optval, socklen_t *optlen) 185 { 186 int i; 187 188 errno = 0; 189 if ((i = so_check_fd(s, &errno)) == -1) 190 return (-1); 191 192 switch (level) { 193 case SOL_SOCKET: { 194 switch (option) { 195 case SO_RCVTIMEO: 196 if (*optlen == sizeof (uint32_t)) { 197 *(uint32_t *)optval = sockets[i].in_timeout; 198 } else { 199 *optlen = 0; 200 errno = EINVAL; 201 } 202 break; 203 case SO_DONTROUTE: 204 if (*optlen == sizeof (int)) { 205 *(int *)optval = 206 (sockets[i].out_flags & SO_DONTROUTE); 207 } else { 208 *optlen = 0; 209 errno = EINVAL; 210 } 211 break; 212 case SO_REUSEADDR: 213 if (*optlen == sizeof (int)) { 214 *(int *)optval = 215 (sockets[i].so_opt & SO_REUSEADDR); 216 } else { 217 *optlen = 0; 218 errno = EINVAL; 219 } 220 break; 221 case SO_RCVBUF: 222 if (*optlen == sizeof (int)) { 223 *(int *)optval = sockets[i].so_rcvbuf; 224 } else { 225 *optlen = 0; 226 errno = EINVAL; 227 } 228 break; 229 case SO_SNDBUF: 230 if (*optlen == sizeof (int)) { 231 *(int *)optval = sockets[i].so_sndbuf; 232 } else { 233 *optlen = 0; 234 errno = EINVAL; 235 } 236 break; 237 case SO_LINGER: 238 if (*optlen == sizeof (struct linger)) { 239 /* struct copy */ 240 *(struct linger *)optval = sockets[i].so_linger; 241 } else { 242 *optlen = 0; 243 errno = EINVAL; 244 } 245 default: 246 errno = ENOPROTOOPT; 247 break; 248 } 249 break; 250 } /* case SOL_SOCKET */ 251 case IPPROTO_TCP: 252 case IPPROTO_IP: { 253 switch (option) { 254 default: 255 *optlen = 0; 256 errno = ENOPROTOOPT; 257 break; 258 } 259 break; 260 } /* case IPPROTO_IP or IPPROTO_TCP */ 261 default: 262 errno = ENOPROTOOPT; 263 break; 264 } /* switch (level) */ 265 266 if (errno != 0) 267 return (-1); 268 else 269 return (0); 270 } 271 272 /* 273 * Generate a network-order source port from the privileged range if 274 * "reserved" is true, dynamic/private range otherwise. We consider the 275 * range of 512-1023 privileged ports as ports we can use. This mirrors 276 * historical rpc client practice for privileged port selection. 277 */ 278 in_port_t 279 get_source_port(boolean_t reserved) 280 { 281 static in_port_t dynamic = IPPORT_DYNAMIC_START - 1, 282 rsvdport = (IPPORT_RESERVED / 2) - 1; 283 in_port_t p; 284 285 if (reserved) { 286 if (++rsvdport >= IPPORT_RESERVED) 287 p = rsvdport = IPPORT_RESERVED / 2; 288 else 289 p = rsvdport; 290 } else 291 p = ++dynamic; 292 293 return (htons(p)); 294 } 295 296 /* 297 * The socket options we support are: 298 * SO_RECVTIMEO - Value is uint32_t msecs. 299 * SO_DONTROUTE - Value is int boolean (nonzero == TRUE, zero == FALSE). 300 * SO_REUSEADDR - value is int boolean. 301 * SO_RCVBUF - Value is int. 302 * SO_SNDBUF - Value is int. 303 */ 304 int 305 setsockopt(int s, int level, int option, const void *optval, socklen_t optlen) 306 { 307 int i; 308 309 errno = 0; 310 if ((i = so_check_fd(s, &errno)) == -1) 311 return (-1); 312 313 switch (level) { 314 case SOL_SOCKET: { 315 switch (option) { 316 case SO_RCVTIMEO: 317 if (optlen == sizeof (uint32_t)) 318 sockets[i].in_timeout = *(uint32_t *)optval; 319 else { 320 errno = EINVAL; 321 } 322 break; 323 case SO_DONTROUTE: 324 if (optlen == sizeof (int)) { 325 if (*(int *)optval) 326 sockets[i].out_flags |= SO_DONTROUTE; 327 else 328 sockets[i].out_flags &= ~SO_DONTROUTE; 329 } else { 330 errno = EINVAL; 331 } 332 break; 333 case SO_REUSEADDR: 334 if (optlen == sizeof (int)) { 335 if (*(int *)optval) 336 sockets[i].so_opt |= SO_REUSEADDR; 337 else 338 sockets[i].so_opt &= ~SO_REUSEADDR; 339 } else { 340 errno = EINVAL; 341 } 342 break; 343 case SO_RCVBUF: 344 if (optlen == sizeof (int)) { 345 sockets[i].so_rcvbuf = *(int *)optval; 346 if (sockets[i].so_rcvbuf > SO_MAX_BUF) 347 sockets[i].so_rcvbuf = SO_MAX_BUF; 348 (void) tcp_opt_set(sockets[i].pcb, 349 level, option, optval, optlen); 350 } else { 351 errno = EINVAL; 352 } 353 break; 354 case SO_SNDBUF: 355 if (optlen == sizeof (int)) { 356 sockets[i].so_sndbuf = *(int *)optval; 357 if (sockets[i].so_sndbuf > SO_MAX_BUF) 358 sockets[i].so_sndbuf = SO_MAX_BUF; 359 (void) tcp_opt_set(sockets[i].pcb, 360 level, option, optval, optlen); 361 } else { 362 errno = EINVAL; 363 } 364 break; 365 case SO_LINGER: 366 if (optlen == sizeof (struct linger)) { 367 /* struct copy */ 368 sockets[i].so_linger = *(struct linger *)optval; 369 (void) tcp_opt_set(sockets[i].pcb, 370 level, option, optval, optlen); 371 } else { 372 errno = EINVAL; 373 } 374 break; 375 default: 376 errno = ENOPROTOOPT; 377 break; 378 } 379 break; 380 } /* case SOL_SOCKET */ 381 case IPPROTO_TCP: 382 case IPPROTO_IP: { 383 switch (option) { 384 default: 385 errno = ENOPROTOOPT; 386 break; 387 } 388 break; 389 } /* case IPPROTO_IP or IPPROTO_TCP */ 390 default: 391 errno = ENOPROTOOPT; 392 break; 393 } /* switch (level) */ 394 395 if (errno != 0) 396 return (-1); 397 else 398 return (0); 399 } 400 401 /* 402 * Shut down part of a full-duplex connection. 403 * 404 * Only supported for TCP sockets 405 */ 406 int 407 shutdown(int s, int how) 408 { 409 int sock_id; 410 int i; 411 412 errno = 0; 413 if ((sock_id = so_check_fd(s, &errno)) == -1) 414 return (-1); 415 416 /* shutdown only supported for TCP sockets */ 417 if (sockets[sock_id].type != INETBOOT_STREAM) { 418 errno = EOPNOTSUPP; 419 return (-1); 420 } 421 422 if (!(sockets[sock_id].so_state & SS_ISCONNECTED)) { 423 errno = ENOTCONN; 424 return (-1); 425 } 426 427 switch (how) { 428 case 0: 429 sockets[sock_id].so_state |= SS_CANTRCVMORE; 430 break; 431 case 1: 432 sockets[sock_id].so_state |= SS_CANTSENDMORE; 433 break; 434 case 2: 435 sockets[sock_id].so_state |= (SS_CANTRCVMORE | SS_CANTSENDMORE); 436 break; 437 default: 438 errno = EINVAL; 439 return (-1); 440 } 441 442 switch (sockets[sock_id].so_state & 443 (SS_CANTRCVMORE | SS_CANTSENDMORE)) { 444 case (SS_CANTRCVMORE | SS_CANTSENDMORE): 445 /* Call lower level protocol close routine. */ 446 for (i = TRANSPORT_LVL; i >= MEDIA_LVL; i--) { 447 if (sockets[sock_id].close[i] != NULL) { 448 (void) sockets[sock_id].close[i](sock_id); 449 } 450 } 451 nuke_grams(&sockets[sock_id].inq); 452 break; 453 case SS_CANTRCVMORE: 454 nuke_grams(&sockets[sock_id].inq); 455 break; 456 case SS_CANTSENDMORE: 457 /* Call lower level protocol close routine. */ 458 if (tcp_shutdown(sock_id) < 0) 459 return (-1); 460 break; 461 default: 462 errno = EINVAL; 463 return (-1); 464 } 465 466 return (0); 467 } 468 469 /* 470 * "close" a socket. 471 */ 472 int 473 socket_close(int s) 474 { 475 int sock_id, i; 476 477 errno = 0; 478 if ((sock_id = so_check_fd(s, &errno)) == -1) 479 return (-1); 480 481 /* Call lower level protocol close routine. */ 482 for (i = TRANSPORT_LVL; i >= MEDIA_LVL; i--) { 483 if (sockets[sock_id].close[i] != NULL) { 484 /* 485 * Note that the close() routine of other 486 * layers can return an error. But right 487 * now, the only mechanism to report that 488 * back is for the close() routine to set 489 * the errno and socket_close() will return 490 * an error. But the close operation will 491 * not be stopped. 492 */ 493 (void) sockets[sock_id].close[i](sock_id); 494 } 495 } 496 497 /* 498 * Clear the input queue. This has to be done 499 * after the lower level protocol close routines have been 500 * called as they may want to do something about the queue. 501 */ 502 nuke_grams(&sockets[sock_id].inq); 503 504 bzero((caddr_t)&sockets[sock_id], sizeof (struct inetboot_socket)); 505 sockets[sock_id].type = INETBOOT_UNUSED; 506 507 return (0); 508 } 509 510 /* 511 * Read up to `nbyte' of data from socket `s' into `buf'; if non-zero, 512 * then give up after `read_timeout' seconds. Returns the number of 513 * bytes read, or -1 on failure. 514 */ 515 int 516 socket_read(int s, void *buf, size_t nbyte, int read_timeout) 517 { 518 ssize_t n; 519 uint_t start, diff; 520 521 /* 522 * keep calling non-blocking recvfrom until something received 523 * or an error occurs 524 */ 525 start = prom_gettime(); 526 for (;;) { 527 n = recvfrom(s, buf, nbyte, MSG_DONTWAIT, NULL, NULL); 528 if (n == -1 && errno == EWOULDBLOCK) { 529 diff = (uint_t)((prom_gettime() - start) + 500) / 1000; 530 if (read_timeout != 0 && diff > read_timeout) { 531 errno = EINTR; 532 return (-1); 533 } 534 } else { 535 return (n); 536 } 537 } 538 } 539 540 /* 541 * Write up to `nbyte' bytes of data from `buf' to the address pointed to 542 * `addr' using socket `s'. Returns the number of bytes writte on success, 543 * or -1 on failure. 544 */ 545 int 546 socket_write(int s, const void *buf, size_t nbyte, struct sockaddr_in *addr) 547 { 548 return (sendto(s, buf, nbyte, 0, (struct sockaddr *)addr, 549 sizeof (*addr))); 550 } 551 552 static int 553 bind_check(int sock_id, const struct sockaddr *addr) 554 { 555 int k; 556 struct sockaddr_in *in_addr = (struct sockaddr_in *)addr; 557 558 /* Do not check for duplicate bind() if SO_REUSEADDR option is set. */ 559 if (! (sockets[sock_id].so_opt & SO_REUSEADDR)) { 560 for (k = 0; k < MAXSOCKET; k++) { 561 if (sockets[k].type != INETBOOT_UNUSED && 562 sockets[k].proto == sockets[sock_id].proto && 563 sockets[k].bound) { 564 if ((sockets[k].bind.sin_addr.s_addr == 565 in_addr->sin_addr.s_addr) && 566 (sockets[k].bind.sin_port == 567 in_addr->sin_port)) { 568 errno = EADDRINUSE; 569 return (-1); 570 } 571 } 572 } 573 } 574 return (0); 575 } 576 577 /* Assign a name to an unnamed socket. */ 578 int 579 bind(int s, const struct sockaddr *name, socklen_t namelen) 580 { 581 int i; 582 583 errno = 0; 584 585 if ((i = so_check_fd(s, &errno)) == -1) 586 return (-1); 587 588 if (name == NULL) { 589 /* unbind */ 590 if (sockets[i].bound) { 591 bzero((caddr_t)&sockets[i].bind, 592 sizeof (struct sockaddr_in)); 593 sockets[i].bound = B_FALSE; 594 } 595 return (0); 596 } 597 if (namelen != sizeof (struct sockaddr_in) || name == NULL) { 598 errno = EINVAL; 599 return (-1); 600 } 601 if (name->sa_family != AF_INET) { 602 errno = EAFNOSUPPORT; 603 return (-1); 604 } 605 if (sockets[i].bound) { 606 if (bcmp((caddr_t)&sockets[i].bind, (caddr_t)name, 607 namelen) == 0) { 608 /* attempt to bind to same address ok... */ 609 return (0); 610 } 611 errno = EINVAL; /* already bound */ 612 return (-1); 613 } 614 615 if (errno != 0) { 616 return (-1); 617 } 618 619 /* Check for duplicate bind(). */ 620 if (bind_check(i, name) < 0) 621 return (-1); 622 623 bcopy((caddr_t)name, (caddr_t)&sockets[i].bind, namelen); 624 if (sockets[i].type == INETBOOT_STREAM) { 625 if (tcp_bind(i) < 0) { 626 return (-1); 627 } 628 } 629 sockets[i].bound = B_TRUE; 630 631 return (0); 632 } 633 634 static int 635 quickbind(int sock_id) 636 { 637 int i; 638 struct sockaddr_in addr; 639 640 /* 641 * XXX This needs more work. Right now, if ipv4_setipaddr() 642 * have not been called, this will be wrong. But we need 643 * something better. Need to be revisited. 644 */ 645 ipv4_getipaddr(&addr.sin_addr); 646 addr.sin_family = AF_INET; 647 648 for (i = SMALLEST_ANON_PORT; i <= LARGEST_ANON_PORT; i++) { 649 addr.sin_port = htons(i); 650 if (bind_check(sock_id, (struct sockaddr *)&addr) == 0) 651 break; 652 } 653 /* Need to clear errno as it is probably set by bind_check(). */ 654 errno = 0; 655 656 if (i <= LARGEST_ANON_PORT) { 657 bcopy((caddr_t)&addr, (caddr_t)&sockets[sock_id].bind, 658 sizeof (struct sockaddr_in)); 659 sockets[sock_id].bound = B_TRUE; 660 #ifdef DEBUG 661 printf("quick bind done addr %s port %d\n", 662 inet_ntoa(sockets[sock_id].bind.sin_addr), 663 ntohs(sockets[sock_id].bind.sin_port)); 664 #endif 665 return (0); 666 } else { 667 return (-1); 668 } 669 } 670 671 int 672 listen(int fd, int backlog) 673 { 674 int sock_id; 675 676 errno = 0; 677 if ((sock_id = so_check_fd(fd, &errno)) == -1) 678 return (-1); 679 680 if (sockets[sock_id].type != INETBOOT_STREAM) { 681 errno = EOPNOTSUPP; 682 return (-1); 683 } 684 if (sockets[sock_id].so_error != 0) { 685 errno = sockets[sock_id].so_error; 686 return (-1); 687 } 688 return (tcp_listen(sock_id, backlog)); 689 } 690 691 int 692 accept(int fd, struct sockaddr *addr, socklen_t *addr_len) 693 { 694 int sock_id; 695 int new_sd; 696 697 errno = 0; 698 if ((sock_id = so_check_fd(fd, &errno)) == -1) 699 return (-1); 700 701 if (sockets[sock_id].type != INETBOOT_STREAM) { 702 errno = EOPNOTSUPP; 703 return (-1); 704 } 705 if (sockets[sock_id].so_error != 0) { 706 errno = sockets[sock_id].so_error; 707 return (-1); 708 } 709 if ((new_sd = tcp_accept(sock_id, addr, addr_len)) == -1) 710 return (-1); 711 sock_id = so_check_fd(new_sd, &errno); 712 sockets[sock_id].so_state |= SS_ISCONNECTED; 713 return (new_sd); 714 } 715 716 int 717 connect(int fd, const struct sockaddr *addr, socklen_t addr_len) 718 { 719 int sock_id; 720 int so_type; 721 722 errno = 0; 723 if ((sock_id = so_check_fd(fd, &errno)) == -1) 724 return (-1); 725 726 so_type = sockets[sock_id].type; 727 728 if (addr == NULL || addr_len == 0) { 729 errno = EINVAL; 730 return (-1); 731 } 732 /* Don't allow connect for raw socket. */ 733 if (so_type == INETBOOT_RAW) { 734 errno = EPROTONOSUPPORT; 735 return (-1); 736 } 737 738 if (sockets[sock_id].so_state & SS_ISCONNECTED) { 739 errno = EINVAL; 740 return (-1); 741 } 742 743 if (sockets[sock_id].so_error != 0) { 744 errno = sockets[sock_id].so_error; 745 return (-1); 746 } 747 748 /* If the socket is not bound, we need to do a quick bind. */ 749 if (!sockets[sock_id].bound) { 750 /* For TCP socket, just call tcp_bind(). */ 751 if (so_type == INETBOOT_STREAM) { 752 if (tcp_bind(sock_id) < 0) 753 return (-1); 754 } else { 755 if (quickbind(sock_id) < 0) { 756 errno = EADDRNOTAVAIL; 757 return (-1); 758 } 759 } 760 } 761 /* Should do some sanity check for addr .... */ 762 bcopy((caddr_t)addr, &sockets[sock_id].remote, 763 sizeof (struct sockaddr_in)); 764 765 if (sockets[sock_id].type == INETBOOT_STREAM) { 766 /* Call TCP connect routine. */ 767 if (tcp_connect(sock_id) == 0) 768 sockets[sock_id].so_state |= SS_ISCONNECTED; 769 else { 770 if (sockets[sock_id].so_error != 0) 771 errno = sockets[sock_id].so_error; 772 return (-1); 773 } 774 } else { 775 sockets[sock_id].so_state |= SS_ISCONNECTED; 776 } 777 return (0); 778 } 779 780 /* Just a wrapper around recvfrom(). */ 781 ssize_t 782 recv(int s, void *buf, size_t len, int flags) 783 { 784 return (recvfrom(s, buf, len, flags, NULL, NULL)); 785 } 786 787 /* 788 * Receive messages from a connectionless socket. Legal flags are 0 and 789 * MSG_DONTWAIT. MSG_WAITALL is not currently supported. 790 * 791 * Returns length of message for success, -1 if error occurred. 792 */ 793 ssize_t 794 recvfrom(int s, void *buf, size_t len, int flags, struct sockaddr *from, 795 socklen_t *fromlen) 796 { 797 int sock_id, i; 798 ssize_t datalen, bytes = 0; 799 struct inetgram *icp; 800 enum SockType so_type; 801 char *tmp_buf; 802 mblk_t *mp; 803 804 errno = 0; 805 806 if ((sock_id = so_check_fd(s, &errno)) == -1) { 807 errno = EINVAL; 808 return (-1); 809 } 810 811 if (sockets[sock_id].type == INETBOOT_STREAM && 812 !(sockets[sock_id].so_state & SS_ISCONNECTED)) { 813 errno = ENOTCONN; 814 return (-1); 815 } 816 817 if (buf == NULL || len == 0) { 818 errno = EINVAL; 819 return (-1); 820 } 821 /* Yup - MSG_WAITALL not implemented */ 822 if ((flags & ~MSG_DONTWAIT) != 0) { 823 errno = EINVAL; 824 return (-1); 825 } 826 827 retry: 828 if (sockets[sock_id].inq == NULL) { 829 /* Go out and check the wire */ 830 for (i = MEDIA_LVL; i < APP_LVL; i++) { 831 if (sockets[sock_id].input[i] != NULL) { 832 if (sockets[sock_id].input[i](sock_id) < 0) { 833 if (sockets[sock_id].so_error != 0) { 834 errno = 835 sockets[sock_id].so_error; 836 } 837 return (-1); 838 } 839 } 840 } 841 } 842 843 so_type = sockets[sock_id].type; 844 845 /* Remove unknown inetgrams from the head of inq. Can this happen? */ 846 while ((icp = sockets[sock_id].inq) != NULL) { 847 if ((so_type == INETBOOT_DGRAM || 848 so_type == INETBOOT_STREAM) && 849 icp->igm_level != APP_LVL) { 850 #ifdef DEBUG 851 printf("recvfrom: unexpected level %d frame found\n", 852 icp->igm_level); 853 #endif /* DEBUG */ 854 del_gram(&sockets[sock_id].inq, icp, B_TRUE); 855 continue; 856 } else { 857 break; 858 } 859 } 860 861 862 if (icp == NULL) { 863 /* 864 * Checking for error should be done everytime a lower layer 865 * input routing is called. For example, if TCP gets a RST, 866 * this should be reported asap. 867 */ 868 if (sockets[sock_id].so_state & SS_CANTRCVMORE) { 869 if (sockets[sock_id].so_error != 0) { 870 errno = sockets[sock_id].so_error; 871 return (-1); 872 } else { 873 return (0); 874 } 875 } 876 877 if ((flags & MSG_DONTWAIT) == 0) 878 goto retry; /* wait forever */ 879 880 /* no data */ 881 errno = EWOULDBLOCK; 882 return (-1); 883 } 884 885 if (from != NULL && fromlen != NULL) { 886 switch (so_type) { 887 case INETBOOT_STREAM: 888 /* Need to copy from the socket's remote address. */ 889 bcopy(&(sockets[sock_id].remote), from, MIN(*fromlen, 890 sizeof (struct sockaddr_in))); 891 break; 892 case INETBOOT_RAW: 893 case INETBOOT_DGRAM: 894 default: 895 if (*fromlen > sizeof (icp->igm_saddr)) 896 *fromlen = sizeof (icp->igm_saddr); 897 bcopy((caddr_t)&(icp->igm_saddr), (caddr_t)from, 898 MIN(*fromlen, sizeof (struct sockaddr_in))); 899 break; 900 } 901 } 902 903 mp = icp->igm_mp; 904 switch (so_type) { 905 case INETBOOT_STREAM: 906 /* 907 * If the message has igm_id == TCP_CALLB_MAGIC_ID, we need 908 * to drain the data held by tcp and try again. 909 */ 910 if (icp->igm_id == TCP_CALLB_MAGIC_ID) { 911 del_gram(&sockets[sock_id].inq, icp, B_TRUE); 912 tcp_rcv_drain_sock(sock_id); 913 goto retry; 914 } 915 916 /* TCP should put only user data in the inetgram. */ 917 tmp_buf = (char *)buf; 918 while (len > 0 && icp != NULL) { 919 datalen = mp->b_wptr - mp->b_rptr; 920 if (len < datalen) { 921 bcopy(mp->b_rptr, tmp_buf, len); 922 bytes += len; 923 mp->b_rptr += len; 924 break; 925 } else { 926 bcopy(mp->b_rptr, tmp_buf, datalen); 927 len -= datalen; 928 bytes += datalen; 929 tmp_buf += datalen; 930 del_gram(&sockets[sock_id].inq, icp, B_TRUE); 931 932 /* 933 * If we have any embedded magic messages just 934 * drop them. 935 */ 936 while ((icp = sockets[sock_id].inq) != NULL) { 937 if (icp->igm_id != TCP_CALLB_MAGIC_ID) 938 break; 939 del_gram(&sockets[sock_id].inq, icp, 940 B_TRUE); 941 } 942 943 if (icp == NULL) 944 break; 945 mp = icp->igm_mp; 946 } 947 } 948 sockets[sock_id].so_rcvbuf += (int32_t)bytes; 949 break; 950 case INETBOOT_DGRAM: 951 datalen = mp->b_wptr - mp->b_rptr; 952 if (len < datalen) 953 bytes = len; 954 else 955 bytes = datalen; 956 bcopy(mp->b_rptr, buf, bytes); 957 del_gram(&sockets[sock_id].inq, icp, B_TRUE); 958 break; 959 case INETBOOT_RAW: 960 default: 961 datalen = mp->b_wptr - mp->b_rptr; 962 if (len < datalen) 963 bytes = len; 964 else 965 bytes = datalen; 966 bcopy(mp->b_rptr, buf, bytes); 967 del_gram(&sockets[sock_id].inq, icp, B_TRUE); 968 break; 969 } 970 971 #ifdef DEBUG 972 printf("recvfrom(%d): data: (0x%x,%d)\n", sock_id, 973 (icp != NULL) ? icp->igm_mp : 0, bytes); 974 #endif /* DEBUG */ 975 return (bytes); 976 } 977 978 979 /* Just a wrapper around sendto(). */ 980 ssize_t 981 send(int s, const void *msg, size_t len, int flags) 982 { 983 return (sendto(s, msg, len, flags, NULL, 0)); 984 } 985 986 /* 987 * Transmit a message through a socket. 988 * 989 * Supported flags: MSG_DONTROUTE or 0. 990 */ 991 ssize_t 992 sendto(int s, const void *msg, size_t len, int flags, const struct sockaddr *to, 993 socklen_t tolen) 994 { 995 enum SockType so_type; 996 int sock_id; 997 ssize_t bytes; 998 999 errno = 0; 1000 1001 if ((sock_id = so_check_fd(s, &errno)) == -1) { 1002 return (-1); 1003 } 1004 if (msg == NULL) { 1005 errno = EINVAL; 1006 return (-1); 1007 } 1008 so_type = sockets[sock_id].type; 1009 if ((flags & ~MSG_DONTROUTE) != 0) { 1010 errno = EINVAL; 1011 return (-1); 1012 } 1013 if (sockets[sock_id].so_error != 0) { 1014 errno = sockets[sock_id].so_error; 1015 return (-1); 1016 } 1017 if (to != NULL && to->sa_family != AF_INET) { 1018 errno = EAFNOSUPPORT; 1019 return (-1); 1020 } 1021 1022 switch (so_type) { 1023 case INETBOOT_RAW: 1024 case INETBOOT_DGRAM: 1025 if (!(sockets[sock_id].so_state & SS_ISCONNECTED) && 1026 (to == NULL || tolen != sizeof (struct sockaddr_in))) { 1027 errno = EINVAL; 1028 return (-1); 1029 } 1030 bytes = dgram_sendto(sock_id, msg, len, flags, to, tolen); 1031 break; 1032 case INETBOOT_STREAM: 1033 if (!((sockets[sock_id].so_state & SS_ISCONNECTED) || 1034 (sockets[sock_id].so_state & SS_ISCONNECTING))) { 1035 errno = EINVAL; 1036 return (-1); 1037 } 1038 if (sockets[sock_id].so_state & SS_CANTSENDMORE) { 1039 errno = EPIPE; 1040 return (-1); 1041 } 1042 bytes = stream_sendto(sock_id, msg, len, flags); 1043 break; 1044 default: 1045 /* Should not happen... */ 1046 errno = EPROTOTYPE; 1047 return (-1); 1048 } 1049 return (bytes); 1050 } 1051 1052 static ssize_t 1053 dgram_sendto(int i, const void *msg, size_t len, int flags, 1054 const struct sockaddr *to, int tolen) 1055 { 1056 struct inetgram oc; 1057 int l, offset; 1058 size_t tlen; 1059 mblk_t *mp; 1060 1061 #ifdef DEBUG 1062 { 1063 struct sockaddr_in *sin = (struct sockaddr_in *)to; 1064 printf("sendto(%d): msg of length: %d sent to port %d and host: %s\n", 1065 i, len, ntohs(sin->sin_port), inet_ntoa(sin->sin_addr)); 1066 } 1067 #endif /* DEBUG */ 1068 1069 nuke_grams(&sockets[i].inq); /* flush the input queue */ 1070 1071 /* calculate offset for data */ 1072 offset = sockets[i].headerlen[MEDIA_LVL](NULL) + 1073 (sockets[i].headerlen[NETWORK_LVL])(NULL); 1074 1075 bzero((caddr_t)&oc, sizeof (oc)); 1076 if (sockets[i].type != INETBOOT_RAW) { 1077 offset += (sockets[i].headerlen[TRANSPORT_LVL])(NULL); 1078 oc.igm_level = TRANSPORT_LVL; 1079 } else 1080 oc.igm_level = NETWORK_LVL; 1081 oc.igm_oflags = flags; 1082 1083 if (to != NULL) { 1084 bcopy((caddr_t)to, (caddr_t)&oc.igm_saddr, tolen); 1085 } else { 1086 bcopy((caddr_t)&sockets[i].remote, (caddr_t)&oc.igm_saddr, 1087 sizeof (struct sockaddr_in)); 1088 } 1089 1090 /* Get a legal source port if the socket isn't bound. */ 1091 if (sockets[i].bound == B_FALSE && 1092 ntohs(oc.igm_saddr.sin_port == 0)) { 1093 ((struct sockaddr_in *)&oc.igm_saddr)->sin_port = 1094 get_source_port(B_FALSE); 1095 } 1096 1097 /* Round up to 16bit value for checksum purposes */ 1098 if (sockets[i].type == INETBOOT_DGRAM) { 1099 tlen = ((len + sizeof (uint16_t) - 1) & 1100 ~(sizeof (uint16_t) - 1)); 1101 } else 1102 tlen = len; 1103 1104 if ((oc.igm_mp = allocb(tlen + offset, 0)) == NULL) { 1105 errno = ENOMEM; 1106 return (-1); 1107 } 1108 mp = oc.igm_mp; 1109 mp->b_rptr = mp->b_wptr += offset; 1110 bcopy((caddr_t)msg, mp->b_wptr, len); 1111 mp->b_wptr += len; 1112 for (l = TRANSPORT_LVL; l >= MEDIA_LVL; l--) { 1113 if (sockets[i].output[l] != NULL) { 1114 if (sockets[i].output[l](i, &oc) < 0) { 1115 freeb(mp); 1116 if (errno == 0) 1117 errno = EIO; 1118 return (-1); 1119 } 1120 } 1121 } 1122 freeb(mp); 1123 return (len); 1124 } 1125 1126 /* ARGSUSED */ 1127 static ssize_t 1128 stream_sendto(int i, const void *msg, size_t len, int flags) 1129 { 1130 int cnt; 1131 1132 assert(sockets[i].pcb != NULL); 1133 1134 /* 1135 * Call directly TCP's send routine. We do this because TCP 1136 * needs to decide whether to send out the data. 1137 * 1138 * Note also that currently, TCP ignores all flags passed in for 1139 * TCP socket. 1140 */ 1141 if ((cnt = tcp_send(i, sockets[i].pcb, msg, len)) < 0) { 1142 if (sockets[i].so_error != 0) 1143 errno = sockets[i].so_error; 1144 return (-1); 1145 } else { 1146 return (cnt); 1147 } 1148 } 1149 1150 /* 1151 * Returns ptr to the last inetgram in the list, or null if list is null 1152 */ 1153 struct inetgram * 1154 last_gram(struct inetgram *igp) 1155 { 1156 struct inetgram *wp; 1157 for (wp = igp; wp != NULL; wp = wp->igm_next) { 1158 if (wp->igm_next == NULL) 1159 return (wp); 1160 } 1161 return (NULL); 1162 } 1163 1164 /* 1165 * Adds an inetgram or list of inetgrams to the end of the list. 1166 */ 1167 void 1168 add_grams(struct inetgram **igpp, struct inetgram *newgp) 1169 { 1170 struct inetgram *wp; 1171 1172 if (newgp == NULL) 1173 return; 1174 1175 if (*igpp == NULL) 1176 *igpp = newgp; 1177 else { 1178 wp = last_gram(*igpp); 1179 wp->igm_next = newgp; 1180 } 1181 } 1182 1183 /* 1184 * Nuke a whole list of grams. 1185 */ 1186 void 1187 nuke_grams(struct inetgram **lgpp) 1188 { 1189 while (*lgpp != NULL) 1190 del_gram(lgpp, *lgpp, B_TRUE); 1191 } 1192 1193 /* 1194 * Remove the referenced inetgram. List is altered accordingly. Destroy the 1195 * referenced inetgram if freeit is B_TRUE. 1196 */ 1197 void 1198 del_gram(struct inetgram **lgpp, struct inetgram *igp, int freeit) 1199 { 1200 struct inetgram *wp, *pp = NULL; 1201 1202 if (lgpp == NULL || igp == NULL) 1203 return; 1204 1205 wp = *lgpp; 1206 while (wp != NULL) { 1207 if (wp == igp) { 1208 /* detach wp from the list */ 1209 if (*lgpp == wp) 1210 *lgpp = (*lgpp)->igm_next; 1211 else 1212 pp->igm_next = wp->igm_next; 1213 igp->igm_next = NULL; 1214 1215 if (freeit) { 1216 if (igp->igm_mp != NULL) 1217 freeb(igp->igm_mp); 1218 bkmem_free((caddr_t)igp, 1219 sizeof (struct inetgram)); 1220 } 1221 break; 1222 } 1223 pp = wp; 1224 wp = wp->igm_next; 1225 } 1226 } 1227 1228 struct nct_t nct[] = { 1229 "bootp", NCT_BOOTP_DHCP, 1230 "dhcp", NCT_BOOTP_DHCP, 1231 "rarp", NCT_RARP_BOOTPARAMS, 1232 "manual", NCT_MANUAL 1233 }; 1234 int nct_entries = sizeof (nct) / sizeof (nct[0]); 1235 1236 /* 1237 * Figure out from the bootpath what kind of network configuration strategy 1238 * we should use. Returns the network config strategy. 1239 */ 1240 int 1241 get_netconfig_strategy(void) 1242 { 1243 int i; 1244 #define ISSPACE(c) (c == ' ' || c == '\t' || c == '\n' || c == '\0') 1245 char lbootpath[OBP_MAXPATHLEN]; 1246 char net_options[NCT_BUFSIZE]; 1247 char *op, *nop, *sp; 1248 pnode_t cn; 1249 int proplen; 1250 1251 /* If the PROM DHCP cache exists, we're done */ 1252 if (prom_cached_reply(B_TRUE)) 1253 return (NCT_BOOTP_DHCP); 1254 1255 /* 1256 * Newer (version 4) PROMs will put the name in the 1257 * "net-config-strategy" property. 1258 */ 1259 cn = prom_finddevice("/chosen"); 1260 if ((proplen = prom_getproplen(cn, "net-config-strategy")) < 1261 sizeof (net_options)) { 1262 (void) prom_getprop(cn, "net-config-strategy", net_options); 1263 net_options[proplen] = '\0'; 1264 } else { 1265 1266 /* 1267 * We're reduced to sacanning bootpath for the prototol to use. 1268 * Since there was no "net-config-strategy" property, this is 1269 * an old PROM, so we need to excise any extraneous key/value 1270 * initializations from bootpath[]. 1271 */ 1272 for (op = prom_bootpath(), sp = lbootpath; op != NULL && 1273 !ISSPACE(*op); sp++, op++) 1274 *sp = *op; 1275 *sp = '\0'; 1276 /* find the last '/' (in the device path) */ 1277 if ((op = strrchr(lbootpath, '/')) == NULL) /* last '/' */ 1278 op = lbootpath; 1279 else 1280 op++; 1281 /* then look for the ':' separating it from the protocol */ 1282 while (*op != ':' && *op != '\0') 1283 op++; 1284 1285 if (*op == ':') { 1286 for (nop = net_options, op++; 1287 *op != '\0' && *op != '/' && !ISSPACE(*op) && 1288 nop < &net_options[NCT_BUFSIZE]; nop++, op++) 1289 *nop = *op; 1290 *nop = '\0'; 1291 } else 1292 net_options[0] = '\0'; 1293 } 1294 1295 #undef ISSPACE 1296 1297 for (i = 0; i < nct_entries; i++) 1298 if (strcmp(net_options, nct[i].p_name) == 0) 1299 return (nct[i].p_id); 1300 1301 return (NCT_DEFAULT); 1302 } 1303 1304 /* Modified STREAM routines for ease of porting core TCP code. */ 1305 1306 /*ARGSUSED*/ 1307 mblk_t * 1308 allocb(size_t size, uint_t pri) 1309 { 1310 unsigned char *base; 1311 mblk_t *mp; 1312 1313 if ((mp = (mblk_t *)bkmem_zalloc(sizeof (mblk_t))) == NULL) 1314 return (NULL); 1315 if ((base = (unsigned char *)bkmem_zalloc(size)) == NULL) 1316 return (NULL); 1317 1318 mp->b_next = mp->b_prev = mp->b_cont = NULL; 1319 mp->b_rptr = mp->b_wptr = mp->b_datap = (unsigned char *)base; 1320 mp->b_size = size; 1321 1322 return (mp); 1323 } 1324 1325 void 1326 freeb(mblk_t *mp) 1327 { 1328 #ifdef DEBUG 1329 printf("freeb datap %x\n", mp->b_datap); 1330 #endif 1331 bkmem_free((caddr_t)(mp->b_datap), mp->b_size); 1332 #ifdef DEBUG 1333 printf("freeb mp %x\n", mp); 1334 #endif 1335 bkmem_free((caddr_t)mp, sizeof (mblk_t)); 1336 } 1337 1338 void 1339 freemsg(mblk_t *mp) 1340 { 1341 while (mp) { 1342 mblk_t *mp_cont = mp->b_cont; 1343 1344 freeb(mp); 1345 mp = mp_cont; 1346 } 1347 } 1348 1349 mblk_t * 1350 copyb(mblk_t *bp) 1351 { 1352 mblk_t *nbp; 1353 unsigned char *ndp; 1354 1355 assert((uintptr_t)(bp->b_wptr - bp->b_rptr) >= 0); 1356 1357 if (!(nbp = allocb(bp->b_size, 0))) 1358 return (NULL); 1359 nbp->b_cont = NULL; 1360 ndp = nbp->b_datap; 1361 1362 nbp->b_rptr = ndp + (bp->b_rptr - bp->b_datap); 1363 nbp->b_wptr = nbp->b_rptr + (bp->b_wptr - bp->b_rptr); 1364 bcopy(bp->b_datap, nbp->b_datap, bp->b_size); 1365 return (nbp); 1366 } 1367 1368 /* To simplify things, dupb() is implemented as copyb(). */ 1369 mblk_t * 1370 dupb(mblk_t *mp) 1371 { 1372 return (copyb(mp)); 1373 } 1374 1375 /* 1376 * get number of data bytes in message 1377 */ 1378 size_t 1379 msgdsize(mblk_t *bp) 1380 { 1381 size_t count = 0; 1382 1383 for (; bp != NULL; bp = bp->b_cont) { 1384 assert(bp->b_wptr >= bp->b_rptr); 1385 count += bp->b_wptr - bp->b_rptr; 1386 } 1387 return (count); 1388 } 1389