1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 * 26 * socket.c, Code implementing a simple socket interface. 27 */ 28 29 #pragma ident "%Z%%M% %I% %E% SMI" 30 31 #include <sys/types.h> 32 #include "socket_impl.h" 33 #include <sys/isa_defs.h> 34 #include <sys/sysmacros.h> 35 #include <sys/bootconf.h> 36 #include <sys/socket.h> 37 #include <netinet/in.h> 38 #include <netinet/ip.h> 39 #include <netinet/tcp.h> 40 #include <sys/uio.h> 41 #include <sys/salib.h> 42 #include "socket_inet.h" 43 #include "ipv4.h" 44 #include "ipv4_impl.h" 45 #include "udp_inet.h" 46 #include "tcp_inet.h" 47 #include "mac.h" 48 #include "mac_impl.h" 49 #include <sys/promif.h> 50 51 struct inetboot_socket sockets[MAXSOCKET] = { 0 }; 52 53 /* Default send and receive socket buffer size */ 54 #define SO_DEF_SNDBUF 48*1024 55 #define SO_DEF_RCVBUF 48*1024 56 57 /* Default max socket buffer size */ 58 #define SO_MAX_BUF 4*1024*1024 59 60 static ssize_t dgram_sendto(int, const void *, size_t, int, 61 const struct sockaddr *, int); 62 static ssize_t stream_sendto(int, const void *, size_t, int); 63 static int bind_check(int, const struct sockaddr *); 64 static int quickbind(int); 65 66 /* Check the validity of a fd and return the socket index of that fd. */ 67 int 68 so_check_fd(int fd, int *errno) 69 { 70 int i; 71 72 i = FD_TO_SOCKET(fd); 73 if (i < 0 || i >= MAXSOCKET) { 74 *errno = ENOTSOCK; 75 return (-1); 76 } 77 if (sockets[i].type == INETBOOT_UNUSED) { 78 *errno = ENOTSOCK; 79 return (-1); 80 } 81 return (i); 82 } 83 84 /* 85 * Create an endpoint for network communication. Returns a descriptor. 86 * 87 * Notes: 88 * Only PF_INET communication domains are supported. Within 89 * this domain, only SOCK_RAW, SOCK_DGRAM and SOCK_STREAM types are 90 * supported. 91 */ 92 int 93 socket(int domain, int type, int protocol) 94 { 95 static int sock_initialized; 96 int i; 97 98 errno = 0; 99 100 if (!sock_initialized) { 101 for (i = 0; i < MAXSOCKET; i++) 102 sockets[i].type = INETBOOT_UNUSED; 103 sock_initialized = B_TRUE; 104 } 105 if (domain != AF_INET) { 106 errno = EPROTONOSUPPORT; 107 return (-1); 108 } 109 110 /* Find available socket */ 111 for (i = 0; i < MAXSOCKET; i++) { 112 if (sockets[i].type == INETBOOT_UNUSED) 113 break; 114 } 115 if (i >= MAXSOCKET) { 116 errno = EMFILE; /* No slots left. */ 117 return (-1); 118 } 119 120 /* Some socket initialization... */ 121 sockets[i].so_rcvbuf = SO_DEF_RCVBUF; 122 sockets[i].so_sndbuf = SO_DEF_SNDBUF; 123 124 /* 125 * Note that we ignore the protocol field for SOCK_DGRAM and 126 * SOCK_STREAM. When we support different protocols in future, 127 * this needs to be changed. 128 */ 129 switch (type) { 130 case SOCK_RAW: 131 ipv4_raw_socket(&sockets[i], (uint8_t)protocol); 132 break; 133 case SOCK_DGRAM: 134 udp_socket_init(&sockets[i]); 135 break; 136 case SOCK_STREAM: 137 tcp_socket_init(&sockets[i]); 138 break; 139 default: 140 errno = EPROTOTYPE; 141 break; 142 } 143 144 if (errno != 0) 145 return (-1); 146 147 /* IPv4 generic initialization. */ 148 ipv4_socket_init(&sockets[i]); 149 150 /* MAC generic initialization. */ 151 mac_socket_init(&sockets[i]); 152 153 return (i + SOCKETTYPE); 154 } 155 156 int 157 getsockname(int s, struct sockaddr *name, socklen_t *namelen) 158 { 159 int i; 160 161 errno = 0; 162 if ((i = so_check_fd(s, &errno)) == -1) 163 return (-1); 164 165 if (*namelen < sizeof (struct sockaddr_in)) { 166 errno = ENOMEM; 167 return (-1); 168 } 169 170 /* Structure assignment... */ 171 *((struct sockaddr_in *)name) = sockets[i].bind; 172 *namelen = sizeof (struct sockaddr_in); 173 return (0); 174 } 175 176 /* 177 * The socket options we support are: 178 * SO_RCVTIMEO - Value is in msecs, and is of uint32_t. 179 * SO_DONTROUTE - Value is an int, and is a boolean (nonzero if set). 180 * SO_REUSEADDR - Value is an int boolean. 181 * SO_RCVBUF - Value is an int. 182 * SO_SNDBUF - Value is an int. 183 */ 184 int 185 getsockopt(int s, int level, int option, void *optval, socklen_t *optlen) 186 { 187 int i; 188 189 errno = 0; 190 if ((i = so_check_fd(s, &errno)) == -1) 191 return (-1); 192 193 switch (level) { 194 case SOL_SOCKET: { 195 switch (option) { 196 case SO_RCVTIMEO: 197 if (*optlen == sizeof (uint32_t)) { 198 *(uint32_t *)optval = sockets[i].in_timeout; 199 } else { 200 *optlen = 0; 201 errno = EINVAL; 202 } 203 break; 204 case SO_DONTROUTE: 205 if (*optlen == sizeof (int)) { 206 *(int *)optval = 207 (sockets[i].out_flags & SO_DONTROUTE); 208 } else { 209 *optlen = 0; 210 errno = EINVAL; 211 } 212 break; 213 case SO_REUSEADDR: 214 if (*optlen == sizeof (int)) { 215 *(int *)optval = 216 (sockets[i].so_opt & SO_REUSEADDR); 217 } else { 218 *optlen = 0; 219 errno = EINVAL; 220 } 221 break; 222 case SO_RCVBUF: 223 if (*optlen == sizeof (int)) { 224 *(int *)optval = sockets[i].so_rcvbuf; 225 } else { 226 *optlen = 0; 227 errno = EINVAL; 228 } 229 break; 230 case SO_SNDBUF: 231 if (*optlen == sizeof (int)) { 232 *(int *)optval = sockets[i].so_sndbuf; 233 } else { 234 *optlen = 0; 235 errno = EINVAL; 236 } 237 break; 238 case SO_LINGER: 239 if (*optlen == sizeof (struct linger)) { 240 /* struct copy */ 241 *(struct linger *)optval = sockets[i].so_linger; 242 } else { 243 *optlen = 0; 244 errno = EINVAL; 245 } 246 default: 247 errno = ENOPROTOOPT; 248 break; 249 } 250 break; 251 } /* case SOL_SOCKET */ 252 case IPPROTO_TCP: 253 case IPPROTO_IP: { 254 switch (option) { 255 default: 256 *optlen = 0; 257 errno = ENOPROTOOPT; 258 break; 259 } 260 break; 261 } /* case IPPROTO_IP or IPPROTO_TCP */ 262 default: 263 errno = ENOPROTOOPT; 264 break; 265 } /* switch (level) */ 266 267 if (errno != 0) 268 return (-1); 269 else 270 return (0); 271 } 272 273 /* 274 * Generate a network-order source port from the privileged range if 275 * "reserved" is true, dynamic/private range otherwise. We consider the 276 * range of 512-1023 privileged ports as ports we can use. This mirrors 277 * historical rpc client practice for privileged port selection. 278 */ 279 in_port_t 280 get_source_port(boolean_t reserved) 281 { 282 static in_port_t dynamic = IPPORT_DYNAMIC_START - 1, 283 rsvdport = (IPPORT_RESERVED / 2) - 1; 284 in_port_t p; 285 286 if (reserved) { 287 if (++rsvdport >= IPPORT_RESERVED) 288 p = rsvdport = IPPORT_RESERVED / 2; 289 else 290 p = rsvdport; 291 } else 292 p = ++dynamic; 293 294 return (htons(p)); 295 } 296 297 /* 298 * The socket options we support are: 299 * SO_RECVTIMEO - Value is uint32_t msecs. 300 * SO_DONTROUTE - Value is int boolean (nonzero == TRUE, zero == FALSE). 301 * SO_REUSEADDR - value is int boolean. 302 * SO_RCVBUF - Value is int. 303 * SO_SNDBUF - Value is int. 304 */ 305 int 306 setsockopt(int s, int level, int option, const void *optval, socklen_t optlen) 307 { 308 int i; 309 310 errno = 0; 311 if ((i = so_check_fd(s, &errno)) == -1) 312 return (-1); 313 314 switch (level) { 315 case SOL_SOCKET: { 316 switch (option) { 317 case SO_RCVTIMEO: 318 if (optlen == sizeof (uint32_t)) 319 sockets[i].in_timeout = *(uint32_t *)optval; 320 else { 321 errno = EINVAL; 322 } 323 break; 324 case SO_DONTROUTE: 325 if (optlen == sizeof (int)) { 326 if (*(int *)optval) 327 sockets[i].out_flags |= SO_DONTROUTE; 328 else 329 sockets[i].out_flags &= ~SO_DONTROUTE; 330 } else { 331 errno = EINVAL; 332 } 333 break; 334 case SO_REUSEADDR: 335 if (optlen == sizeof (int)) { 336 if (*(int *)optval) 337 sockets[i].so_opt |= SO_REUSEADDR; 338 else 339 sockets[i].so_opt &= ~SO_REUSEADDR; 340 } else { 341 errno = EINVAL; 342 } 343 break; 344 case SO_RCVBUF: 345 if (optlen == sizeof (int)) { 346 sockets[i].so_rcvbuf = *(int *)optval; 347 if (sockets[i].so_rcvbuf > SO_MAX_BUF) 348 sockets[i].so_rcvbuf = SO_MAX_BUF; 349 (void) tcp_opt_set(sockets[i].pcb, 350 level, option, optval, optlen); 351 } else { 352 errno = EINVAL; 353 } 354 break; 355 case SO_SNDBUF: 356 if (optlen == sizeof (int)) { 357 sockets[i].so_sndbuf = *(int *)optval; 358 if (sockets[i].so_sndbuf > SO_MAX_BUF) 359 sockets[i].so_sndbuf = SO_MAX_BUF; 360 (void) tcp_opt_set(sockets[i].pcb, 361 level, option, optval, optlen); 362 } else { 363 errno = EINVAL; 364 } 365 break; 366 case SO_LINGER: 367 if (optlen == sizeof (struct linger)) { 368 /* struct copy */ 369 sockets[i].so_linger = *(struct linger *)optval; 370 (void) tcp_opt_set(sockets[i].pcb, 371 level, option, optval, optlen); 372 } else { 373 errno = EINVAL; 374 } 375 break; 376 default: 377 errno = ENOPROTOOPT; 378 break; 379 } 380 break; 381 } /* case SOL_SOCKET */ 382 case IPPROTO_TCP: 383 case IPPROTO_IP: { 384 switch (option) { 385 default: 386 errno = ENOPROTOOPT; 387 break; 388 } 389 break; 390 } /* case IPPROTO_IP or IPPROTO_TCP */ 391 default: 392 errno = ENOPROTOOPT; 393 break; 394 } /* switch (level) */ 395 396 if (errno != 0) 397 return (-1); 398 else 399 return (0); 400 } 401 402 /* 403 * Shut down part of a full-duplex connection. 404 * 405 * Only supported for TCP sockets 406 */ 407 int 408 shutdown(int s, int how) 409 { 410 int sock_id; 411 int i; 412 413 errno = 0; 414 if ((sock_id = so_check_fd(s, &errno)) == -1) 415 return (-1); 416 417 /* shutdown only supported for TCP sockets */ 418 if (sockets[sock_id].type != INETBOOT_STREAM) { 419 errno = EOPNOTSUPP; 420 return (-1); 421 } 422 423 if (!(sockets[sock_id].so_state & SS_ISCONNECTED)) { 424 errno = ENOTCONN; 425 return (-1); 426 } 427 428 switch (how) { 429 case 0: 430 sockets[sock_id].so_state |= SS_CANTRCVMORE; 431 break; 432 case 1: 433 sockets[sock_id].so_state |= SS_CANTSENDMORE; 434 break; 435 case 2: 436 sockets[sock_id].so_state |= (SS_CANTRCVMORE | SS_CANTSENDMORE); 437 break; 438 default: 439 errno = EINVAL; 440 return (-1); 441 } 442 443 switch (sockets[sock_id].so_state & 444 (SS_CANTRCVMORE | SS_CANTSENDMORE)) { 445 case (SS_CANTRCVMORE | SS_CANTSENDMORE): 446 /* Call lower level protocol close routine. */ 447 for (i = TRANSPORT_LVL; i >= MEDIA_LVL; i--) { 448 if (sockets[sock_id].close[i] != NULL) { 449 (void) sockets[sock_id].close[i](sock_id); 450 } 451 } 452 nuke_grams(&sockets[sock_id].inq); 453 break; 454 case SS_CANTRCVMORE: 455 nuke_grams(&sockets[sock_id].inq); 456 break; 457 case SS_CANTSENDMORE: 458 /* Call lower level protocol close routine. */ 459 if (tcp_shutdown(sock_id) < 0) 460 return (-1); 461 break; 462 default: 463 errno = EINVAL; 464 return (-1); 465 } 466 467 return (0); 468 } 469 470 /* 471 * "close" a socket. 472 */ 473 int 474 socket_close(int s) 475 { 476 int sock_id, i; 477 478 errno = 0; 479 if ((sock_id = so_check_fd(s, &errno)) == -1) 480 return (-1); 481 482 /* Call lower level protocol close routine. */ 483 for (i = TRANSPORT_LVL; i >= MEDIA_LVL; i--) { 484 if (sockets[sock_id].close[i] != NULL) { 485 /* 486 * Note that the close() routine of other 487 * layers can return an error. But right 488 * now, the only mechanism to report that 489 * back is for the close() routine to set 490 * the errno and socket_close() will return 491 * an error. But the close operation will 492 * not be stopped. 493 */ 494 (void) sockets[sock_id].close[i](sock_id); 495 } 496 } 497 498 /* 499 * Clear the input queue. This has to be done 500 * after the lower level protocol close routines have been 501 * called as they may want to do something about the queue. 502 */ 503 nuke_grams(&sockets[sock_id].inq); 504 505 bzero((caddr_t)&sockets[sock_id], sizeof (struct inetboot_socket)); 506 sockets[sock_id].type = INETBOOT_UNUSED; 507 508 return (0); 509 } 510 511 /* 512 * Read up to `nbyte' of data from socket `s' into `buf'; if non-zero, 513 * then give up after `read_timeout' seconds. Returns the number of 514 * bytes read, or -1 on failure. 515 */ 516 int 517 socket_read(int s, void *buf, size_t nbyte, int read_timeout) 518 { 519 ssize_t n; 520 uint_t start, diff; 521 522 /* 523 * keep calling non-blocking recvfrom until something received 524 * or an error occurs 525 */ 526 start = prom_gettime(); 527 for (;;) { 528 n = recvfrom(s, buf, nbyte, MSG_DONTWAIT, NULL, NULL); 529 if (n == -1 && errno == EWOULDBLOCK) { 530 diff = (uint_t)((prom_gettime() - start) + 500) / 1000; 531 if (read_timeout != 0 && diff > read_timeout) { 532 errno = EINTR; 533 return (-1); 534 } 535 } else { 536 return (n); 537 } 538 } 539 } 540 541 /* 542 * Write up to `nbyte' bytes of data from `buf' to the address pointed to 543 * `addr' using socket `s'. Returns the number of bytes writte on success, 544 * or -1 on failure. 545 */ 546 int 547 socket_write(int s, const void *buf, size_t nbyte, struct sockaddr_in *addr) 548 { 549 return (sendto(s, buf, nbyte, 0, (struct sockaddr *)addr, 550 sizeof (*addr))); 551 } 552 553 static int 554 bind_check(int sock_id, const struct sockaddr *addr) 555 { 556 int k; 557 struct sockaddr_in *in_addr = (struct sockaddr_in *)addr; 558 559 /* Do not check for duplicate bind() if SO_REUSEADDR option is set. */ 560 if (! (sockets[sock_id].so_opt & SO_REUSEADDR)) { 561 for (k = 0; k < MAXSOCKET; k++) { 562 if (sockets[k].type != INETBOOT_UNUSED && 563 sockets[k].proto == sockets[sock_id].proto && 564 sockets[k].bound) { 565 if ((sockets[k].bind.sin_addr.s_addr == 566 in_addr->sin_addr.s_addr) && 567 (sockets[k].bind.sin_port == 568 in_addr->sin_port)) { 569 errno = EADDRINUSE; 570 return (-1); 571 } 572 } 573 } 574 } 575 return (0); 576 } 577 578 /* Assign a name to an unnamed socket. */ 579 int 580 bind(int s, const struct sockaddr *name, socklen_t namelen) 581 { 582 int i; 583 584 errno = 0; 585 586 if ((i = so_check_fd(s, &errno)) == -1) 587 return (-1); 588 589 if (name == NULL) { 590 /* unbind */ 591 if (sockets[i].bound) { 592 bzero((caddr_t)&sockets[i].bind, 593 sizeof (struct sockaddr_in)); 594 sockets[i].bound = B_FALSE; 595 } 596 return (0); 597 } 598 if (namelen != sizeof (struct sockaddr_in) || name == NULL) { 599 errno = EINVAL; 600 return (-1); 601 } 602 if (name->sa_family != AF_INET) { 603 errno = EAFNOSUPPORT; 604 return (-1); 605 } 606 if (sockets[i].bound) { 607 if (bcmp((caddr_t)&sockets[i].bind, (caddr_t)name, 608 namelen) == 0) { 609 /* attempt to bind to same address ok... */ 610 return (0); 611 } 612 errno = EINVAL; /* already bound */ 613 return (-1); 614 } 615 616 if (errno != 0) { 617 return (-1); 618 } 619 620 /* Check for duplicate bind(). */ 621 if (bind_check(i, name) < 0) 622 return (-1); 623 624 bcopy((caddr_t)name, (caddr_t)&sockets[i].bind, namelen); 625 if (sockets[i].type == INETBOOT_STREAM) { 626 if (tcp_bind(i) < 0) { 627 return (-1); 628 } 629 } 630 sockets[i].bound = B_TRUE; 631 632 return (0); 633 } 634 635 static int 636 quickbind(int sock_id) 637 { 638 int i; 639 struct sockaddr_in addr; 640 641 /* 642 * XXX This needs more work. Right now, if ipv4_setipaddr() 643 * have not been called, this will be wrong. But we need 644 * something better. Need to be revisited. 645 */ 646 ipv4_getipaddr(&addr.sin_addr); 647 addr.sin_family = AF_INET; 648 649 for (i = SMALLEST_ANON_PORT; i <= LARGEST_ANON_PORT; i++) { 650 addr.sin_port = htons(i); 651 if (bind_check(sock_id, (struct sockaddr *)&addr) == 0) 652 break; 653 } 654 /* Need to clear errno as it is probably set by bind_check(). */ 655 errno = 0; 656 657 if (i <= LARGEST_ANON_PORT) { 658 bcopy((caddr_t)&addr, (caddr_t)&sockets[sock_id].bind, 659 sizeof (struct sockaddr_in)); 660 sockets[sock_id].bound = B_TRUE; 661 #ifdef DEBUG 662 printf("quick bind done addr %s port %d\n", 663 inet_ntoa(sockets[sock_id].bind.sin_addr), 664 ntohs(sockets[sock_id].bind.sin_port)); 665 #endif 666 return (0); 667 } else { 668 return (-1); 669 } 670 } 671 672 int 673 listen(int fd, int backlog) 674 { 675 int sock_id; 676 677 errno = 0; 678 if ((sock_id = so_check_fd(fd, &errno)) == -1) 679 return (-1); 680 681 if (sockets[sock_id].type != INETBOOT_STREAM) { 682 errno = EOPNOTSUPP; 683 return (-1); 684 } 685 if (sockets[sock_id].so_error != 0) { 686 errno = sockets[sock_id].so_error; 687 return (-1); 688 } 689 return (tcp_listen(sock_id, backlog)); 690 } 691 692 int 693 accept(int fd, struct sockaddr *addr, socklen_t *addr_len) 694 { 695 int sock_id; 696 int new_sd; 697 698 errno = 0; 699 if ((sock_id = so_check_fd(fd, &errno)) == -1) 700 return (-1); 701 702 if (sockets[sock_id].type != INETBOOT_STREAM) { 703 errno = EOPNOTSUPP; 704 return (-1); 705 } 706 if (sockets[sock_id].so_error != 0) { 707 errno = sockets[sock_id].so_error; 708 return (-1); 709 } 710 if ((new_sd = tcp_accept(sock_id, addr, addr_len)) == -1) 711 return (-1); 712 sock_id = so_check_fd(new_sd, &errno); 713 sockets[sock_id].so_state |= SS_ISCONNECTED; 714 return (new_sd); 715 } 716 717 int 718 connect(int fd, const struct sockaddr *addr, socklen_t addr_len) 719 { 720 int sock_id; 721 int so_type; 722 723 errno = 0; 724 if ((sock_id = so_check_fd(fd, &errno)) == -1) 725 return (-1); 726 727 so_type = sockets[sock_id].type; 728 729 if (addr == NULL || addr_len == 0) { 730 errno = EINVAL; 731 return (-1); 732 } 733 /* Don't allow connect for raw socket. */ 734 if (so_type == INETBOOT_RAW) { 735 errno = EPROTONOSUPPORT; 736 return (-1); 737 } 738 739 if (sockets[sock_id].so_state & SS_ISCONNECTED) { 740 errno = EINVAL; 741 return (-1); 742 } 743 744 if (sockets[sock_id].so_error != 0) { 745 errno = sockets[sock_id].so_error; 746 return (-1); 747 } 748 749 /* If the socket is not bound, we need to do a quick bind. */ 750 if (!sockets[sock_id].bound) { 751 /* For TCP socket, just call tcp_bind(). */ 752 if (so_type == INETBOOT_STREAM) { 753 if (tcp_bind(sock_id) < 0) 754 return (-1); 755 } else { 756 if (quickbind(sock_id) < 0) { 757 errno = EADDRNOTAVAIL; 758 return (-1); 759 } 760 } 761 } 762 /* Should do some sanity check for addr .... */ 763 bcopy((caddr_t)addr, &sockets[sock_id].remote, 764 sizeof (struct sockaddr_in)); 765 766 if (sockets[sock_id].type == INETBOOT_STREAM) { 767 /* Call TCP connect routine. */ 768 if (tcp_connect(sock_id) == 0) 769 sockets[sock_id].so_state |= SS_ISCONNECTED; 770 else { 771 if (sockets[sock_id].so_error != 0) 772 errno = sockets[sock_id].so_error; 773 return (-1); 774 } 775 } else { 776 sockets[sock_id].so_state |= SS_ISCONNECTED; 777 } 778 return (0); 779 } 780 781 /* Just a wrapper around recvfrom(). */ 782 ssize_t 783 recv(int s, void *buf, size_t len, int flags) 784 { 785 return (recvfrom(s, buf, len, flags, NULL, NULL)); 786 } 787 788 /* 789 * Receive messages from a connectionless socket. Legal flags are 0 and 790 * MSG_DONTWAIT. MSG_WAITALL is not currently supported. 791 * 792 * Returns length of message for success, -1 if error occurred. 793 */ 794 ssize_t 795 recvfrom(int s, void *buf, size_t len, int flags, struct sockaddr *from, 796 socklen_t *fromlen) 797 { 798 int sock_id, i; 799 ssize_t datalen, bytes = 0; 800 struct inetgram *icp; 801 enum SockType so_type; 802 char *tmp_buf; 803 mblk_t *mp; 804 805 errno = 0; 806 807 if ((sock_id = so_check_fd(s, &errno)) == -1) { 808 errno = EINVAL; 809 return (-1); 810 } 811 812 if (sockets[sock_id].type == INETBOOT_STREAM && 813 !(sockets[sock_id].so_state & SS_ISCONNECTED)) { 814 errno = ENOTCONN; 815 return (-1); 816 } 817 818 if (buf == NULL || len == 0) { 819 errno = EINVAL; 820 return (-1); 821 } 822 /* Yup - MSG_WAITALL not implemented */ 823 if ((flags & ~MSG_DONTWAIT) != 0) { 824 errno = EINVAL; 825 return (-1); 826 } 827 828 retry: 829 if (sockets[sock_id].inq == NULL) { 830 /* Go out and check the wire */ 831 for (i = MEDIA_LVL; i < APP_LVL; i++) { 832 if (sockets[sock_id].input[i] != NULL) { 833 if (sockets[sock_id].input[i](sock_id) < 0) { 834 if (sockets[sock_id].so_error != 0) { 835 errno = 836 sockets[sock_id].so_error; 837 } 838 return (-1); 839 } 840 } 841 } 842 } 843 844 so_type = sockets[sock_id].type; 845 846 /* Remove unknown inetgrams from the head of inq. Can this happen? */ 847 while ((icp = sockets[sock_id].inq) != NULL) { 848 if ((so_type == INETBOOT_DGRAM || 849 so_type == INETBOOT_STREAM) && 850 icp->igm_level != APP_LVL) { 851 #ifdef DEBUG 852 printf("recvfrom: unexpected level %d frame found\n", 853 icp->igm_level); 854 #endif /* DEBUG */ 855 del_gram(&sockets[sock_id].inq, icp, B_TRUE); 856 continue; 857 } else { 858 break; 859 } 860 } 861 862 863 if (icp == NULL) { 864 /* 865 * Checking for error should be done everytime a lower layer 866 * input routing is called. For example, if TCP gets a RST, 867 * this should be reported asap. 868 */ 869 if (sockets[sock_id].so_state & SS_CANTRCVMORE) { 870 if (sockets[sock_id].so_error != 0) { 871 errno = sockets[sock_id].so_error; 872 return (-1); 873 } else { 874 return (0); 875 } 876 } 877 878 if ((flags & MSG_DONTWAIT) == 0) 879 goto retry; /* wait forever */ 880 881 /* no data */ 882 errno = EWOULDBLOCK; 883 return (-1); 884 } 885 886 if (from != NULL && fromlen != NULL) { 887 switch (so_type) { 888 case INETBOOT_STREAM: 889 /* Need to copy from the socket's remote address. */ 890 bcopy(&(sockets[sock_id].remote), from, MIN(*fromlen, 891 sizeof (struct sockaddr_in))); 892 break; 893 case INETBOOT_RAW: 894 case INETBOOT_DGRAM: 895 default: 896 if (*fromlen > sizeof (icp->igm_saddr)) 897 *fromlen = sizeof (icp->igm_saddr); 898 bcopy((caddr_t)&(icp->igm_saddr), (caddr_t)from, 899 MIN(*fromlen, sizeof (struct sockaddr_in))); 900 break; 901 } 902 } 903 904 mp = icp->igm_mp; 905 switch (so_type) { 906 case INETBOOT_STREAM: 907 /* 908 * If the message has igm_id == TCP_CALLB_MAGIC_ID, we need 909 * to drain the data held by tcp and try again. 910 */ 911 if (icp->igm_id == TCP_CALLB_MAGIC_ID) { 912 del_gram(&sockets[sock_id].inq, icp, B_TRUE); 913 tcp_rcv_drain_sock(sock_id); 914 goto retry; 915 } 916 917 /* TCP should put only user data in the inetgram. */ 918 tmp_buf = (char *)buf; 919 while (len > 0 && icp != NULL) { 920 datalen = mp->b_wptr - mp->b_rptr; 921 if (len < datalen) { 922 bcopy(mp->b_rptr, tmp_buf, len); 923 bytes += len; 924 mp->b_rptr += len; 925 break; 926 } else { 927 bcopy(mp->b_rptr, tmp_buf, datalen); 928 len -= datalen; 929 bytes += datalen; 930 tmp_buf += datalen; 931 del_gram(&sockets[sock_id].inq, icp, B_TRUE); 932 933 /* 934 * If we have any embedded magic messages just 935 * drop them. 936 */ 937 while ((icp = sockets[sock_id].inq) != NULL) { 938 if (icp->igm_id != TCP_CALLB_MAGIC_ID) 939 break; 940 del_gram(&sockets[sock_id].inq, icp, 941 B_TRUE); 942 } 943 944 if (icp == NULL) 945 break; 946 mp = icp->igm_mp; 947 } 948 } 949 sockets[sock_id].so_rcvbuf += (int32_t)bytes; 950 break; 951 case INETBOOT_DGRAM: 952 datalen = mp->b_wptr - mp->b_rptr; 953 if (len < datalen) 954 bytes = len; 955 else 956 bytes = datalen; 957 bcopy(mp->b_rptr, buf, bytes); 958 del_gram(&sockets[sock_id].inq, icp, B_TRUE); 959 break; 960 case INETBOOT_RAW: 961 default: 962 datalen = mp->b_wptr - mp->b_rptr; 963 if (len < datalen) 964 bytes = len; 965 else 966 bytes = datalen; 967 bcopy(mp->b_rptr, buf, bytes); 968 del_gram(&sockets[sock_id].inq, icp, B_TRUE); 969 break; 970 } 971 972 #ifdef DEBUG 973 printf("recvfrom(%d): data: (0x%x,%d)\n", sock_id, 974 (icp != NULL) ? icp->igm_mp : 0, bytes); 975 #endif /* DEBUG */ 976 return (bytes); 977 } 978 979 980 /* Just a wrapper around sendto(). */ 981 ssize_t 982 send(int s, const void *msg, size_t len, int flags) 983 { 984 return (sendto(s, msg, len, flags, NULL, 0)); 985 } 986 987 /* 988 * Transmit a message through a socket. 989 * 990 * Supported flags: MSG_DONTROUTE or 0. 991 */ 992 ssize_t 993 sendto(int s, const void *msg, size_t len, int flags, const struct sockaddr *to, 994 socklen_t tolen) 995 { 996 enum SockType so_type; 997 int sock_id; 998 ssize_t bytes; 999 1000 errno = 0; 1001 1002 if ((sock_id = so_check_fd(s, &errno)) == -1) { 1003 return (-1); 1004 } 1005 if (msg == NULL) { 1006 errno = EINVAL; 1007 return (-1); 1008 } 1009 so_type = sockets[sock_id].type; 1010 if ((flags & ~MSG_DONTROUTE) != 0) { 1011 errno = EINVAL; 1012 return (-1); 1013 } 1014 if (sockets[sock_id].so_error != 0) { 1015 errno = sockets[sock_id].so_error; 1016 return (-1); 1017 } 1018 if (to != NULL && to->sa_family != AF_INET) { 1019 errno = EAFNOSUPPORT; 1020 return (-1); 1021 } 1022 1023 switch (so_type) { 1024 case INETBOOT_RAW: 1025 case INETBOOT_DGRAM: 1026 if (!(sockets[sock_id].so_state & SS_ISCONNECTED) && 1027 (to == NULL || tolen != sizeof (struct sockaddr_in))) { 1028 errno = EINVAL; 1029 return (-1); 1030 } 1031 bytes = dgram_sendto(sock_id, msg, len, flags, to, tolen); 1032 break; 1033 case INETBOOT_STREAM: 1034 if (!((sockets[sock_id].so_state & SS_ISCONNECTED) || 1035 (sockets[sock_id].so_state & SS_ISCONNECTING))) { 1036 errno = EINVAL; 1037 return (-1); 1038 } 1039 if (sockets[sock_id].so_state & SS_CANTSENDMORE) { 1040 errno = EPIPE; 1041 return (-1); 1042 } 1043 bytes = stream_sendto(sock_id, msg, len, flags); 1044 break; 1045 default: 1046 /* Should not happen... */ 1047 errno = EPROTOTYPE; 1048 return (-1); 1049 } 1050 return (bytes); 1051 } 1052 1053 static ssize_t 1054 dgram_sendto(int i, const void *msg, size_t len, int flags, 1055 const struct sockaddr *to, int tolen) 1056 { 1057 struct inetgram oc; 1058 int l, offset; 1059 size_t tlen; 1060 mblk_t *mp; 1061 1062 #ifdef DEBUG 1063 { 1064 struct sockaddr_in *sin = (struct sockaddr_in *)to; 1065 printf("sendto(%d): msg of length: %d sent to port %d and host: %s\n", 1066 i, len, ntohs(sin->sin_port), inet_ntoa(sin->sin_addr)); 1067 } 1068 #endif /* DEBUG */ 1069 1070 nuke_grams(&sockets[i].inq); /* flush the input queue */ 1071 1072 /* calculate offset for data */ 1073 offset = sockets[i].headerlen[MEDIA_LVL](NULL) + 1074 (sockets[i].headerlen[NETWORK_LVL])(NULL); 1075 1076 bzero((caddr_t)&oc, sizeof (oc)); 1077 if (sockets[i].type != INETBOOT_RAW) { 1078 offset += (sockets[i].headerlen[TRANSPORT_LVL])(NULL); 1079 oc.igm_level = TRANSPORT_LVL; 1080 } else 1081 oc.igm_level = NETWORK_LVL; 1082 oc.igm_oflags = flags; 1083 1084 if (to != NULL) { 1085 bcopy((caddr_t)to, (caddr_t)&oc.igm_saddr, tolen); 1086 } else { 1087 bcopy((caddr_t)&sockets[i].remote, (caddr_t)&oc.igm_saddr, 1088 sizeof (struct sockaddr_in)); 1089 } 1090 1091 /* Get a legal source port if the socket isn't bound. */ 1092 if (sockets[i].bound == B_FALSE && 1093 ntohs(oc.igm_saddr.sin_port == 0)) { 1094 ((struct sockaddr_in *)&oc.igm_saddr)->sin_port = 1095 get_source_port(B_FALSE); 1096 } 1097 1098 /* Round up to 16bit value for checksum purposes */ 1099 if (sockets[i].type == INETBOOT_DGRAM) { 1100 tlen = ((len + sizeof (uint16_t) - 1) & 1101 ~(sizeof (uint16_t) - 1)); 1102 } else 1103 tlen = len; 1104 1105 if ((oc.igm_mp = allocb(tlen + offset, 0)) == NULL) { 1106 errno = ENOMEM; 1107 return (-1); 1108 } 1109 mp = oc.igm_mp; 1110 mp->b_rptr = mp->b_wptr += offset; 1111 bcopy((caddr_t)msg, mp->b_wptr, len); 1112 mp->b_wptr += len; 1113 for (l = TRANSPORT_LVL; l >= MEDIA_LVL; l--) { 1114 if (sockets[i].output[l] != NULL) { 1115 if (sockets[i].output[l](i, &oc) < 0) { 1116 freeb(mp); 1117 if (errno == 0) 1118 errno = EIO; 1119 return (-1); 1120 } 1121 } 1122 } 1123 freeb(mp); 1124 return (len); 1125 } 1126 1127 /* ARGSUSED */ 1128 static ssize_t 1129 stream_sendto(int i, const void *msg, size_t len, int flags) 1130 { 1131 int cnt; 1132 1133 assert(sockets[i].pcb != NULL); 1134 1135 /* 1136 * Call directly TCP's send routine. We do this because TCP 1137 * needs to decide whether to send out the data. 1138 * 1139 * Note also that currently, TCP ignores all flags passed in for 1140 * TCP socket. 1141 */ 1142 if ((cnt = tcp_send(i, sockets[i].pcb, msg, len)) < 0) { 1143 if (sockets[i].so_error != 0) 1144 errno = sockets[i].so_error; 1145 return (-1); 1146 } else { 1147 return (cnt); 1148 } 1149 } 1150 1151 /* 1152 * Returns ptr to the last inetgram in the list, or null if list is null 1153 */ 1154 struct inetgram * 1155 last_gram(struct inetgram *igp) 1156 { 1157 struct inetgram *wp; 1158 for (wp = igp; wp != NULL; wp = wp->igm_next) { 1159 if (wp->igm_next == NULL) 1160 return (wp); 1161 } 1162 return (NULL); 1163 } 1164 1165 /* 1166 * Adds an inetgram or list of inetgrams to the end of the list. 1167 */ 1168 void 1169 add_grams(struct inetgram **igpp, struct inetgram *newgp) 1170 { 1171 struct inetgram *wp; 1172 1173 if (newgp == NULL) 1174 return; 1175 1176 if (*igpp == NULL) 1177 *igpp = newgp; 1178 else { 1179 wp = last_gram(*igpp); 1180 wp->igm_next = newgp; 1181 } 1182 } 1183 1184 /* 1185 * Nuke a whole list of grams. 1186 */ 1187 void 1188 nuke_grams(struct inetgram **lgpp) 1189 { 1190 while (*lgpp != NULL) 1191 del_gram(lgpp, *lgpp, B_TRUE); 1192 } 1193 1194 /* 1195 * Remove the referenced inetgram. List is altered accordingly. Destroy the 1196 * referenced inetgram if freeit is B_TRUE. 1197 */ 1198 void 1199 del_gram(struct inetgram **lgpp, struct inetgram *igp, int freeit) 1200 { 1201 struct inetgram *wp, *pp = NULL; 1202 1203 if (lgpp == NULL || igp == NULL) 1204 return; 1205 1206 wp = *lgpp; 1207 while (wp != NULL) { 1208 if (wp == igp) { 1209 /* detach wp from the list */ 1210 if (*lgpp == wp) 1211 *lgpp = (*lgpp)->igm_next; 1212 else 1213 pp->igm_next = wp->igm_next; 1214 igp->igm_next = NULL; 1215 1216 if (freeit) { 1217 if (igp->igm_mp != NULL) 1218 freeb(igp->igm_mp); 1219 bkmem_free((caddr_t)igp, 1220 sizeof (struct inetgram)); 1221 } 1222 break; 1223 } 1224 pp = wp; 1225 wp = wp->igm_next; 1226 } 1227 } 1228 1229 struct nct_t nct[] = { 1230 "bootp", NCT_BOOTP_DHCP, 1231 "dhcp", NCT_BOOTP_DHCP, 1232 "rarp", NCT_RARP_BOOTPARAMS, 1233 "manual", NCT_MANUAL 1234 }; 1235 int nct_entries = sizeof (nct) / sizeof (nct[0]); 1236 1237 /* 1238 * Figure out from the bootpath what kind of network configuration strategy 1239 * we should use. Returns the network config strategy. 1240 */ 1241 int 1242 get_netconfig_strategy(void) 1243 { 1244 int i; 1245 #if !defined(__i386) 1246 /* sparc */ 1247 #define ISSPACE(c) (c == ' ' || c == '\t' || c == '\n' || c == '\0') 1248 char lbootpath[OBP_MAXPATHLEN]; 1249 char net_options[NCT_BUFSIZE]; 1250 char *op, *nop, *sp; 1251 pnode_t cn; 1252 int proplen; 1253 1254 /* If the PROM DHCP cache exists, we're done */ 1255 if (prom_cached_reply(B_TRUE)) 1256 return (NCT_BOOTP_DHCP); 1257 1258 /* 1259 * Newer (version 4) PROMs will put the name in the 1260 * "net-config-strategy" property. 1261 */ 1262 cn = prom_finddevice("/chosen"); 1263 if ((proplen = prom_getproplen(cn, "net-config-strategy")) < 1264 sizeof (net_options)) { 1265 (void) prom_getprop(cn, "net-config-strategy", net_options); 1266 net_options[proplen] = '\0'; 1267 } else { 1268 1269 /* 1270 * We're reduced to sacanning bootpath for the prototol to use. 1271 * Since there was no "net-config-strategy" property, this is 1272 * an old PROM, so we need to excise any extraneous key/value 1273 * initializations from bootpath[]. 1274 */ 1275 for (op = prom_bootpath(), sp = lbootpath; op != NULL && 1276 !ISSPACE(*op); sp++, op++) 1277 *sp = *op; 1278 *sp = '\0'; 1279 /* find the last '/' (in the device path) */ 1280 if ((op = strrchr(lbootpath, '/')) == NULL) /* last '/' */ 1281 op = lbootpath; 1282 else 1283 op++; 1284 /* then look for the ':' separating it from the protocol */ 1285 while (*op != ':' && *op != '\0') 1286 op++; 1287 1288 if (*op == ':') { 1289 for (nop = net_options, op++; 1290 *op != '\0' && *op != '/' && !ISSPACE(*op) && 1291 nop < &net_options[NCT_BUFSIZE]; nop++, op++) 1292 *nop = *op; 1293 *nop = '\0'; 1294 } else 1295 net_options[0] = '\0'; 1296 } 1297 1298 #undef ISSPACE 1299 #else 1300 /* i86 */ 1301 extern struct bootops bootops; 1302 extern int bgetprop(struct bootops *, char *, caddr_t, int, phandle_t); 1303 char net_options[MAXNAMELEN]; 1304 1305 /* 1306 * Look at net-config-strategy boot property to determine what protocol 1307 * will be used. 1308 */ 1309 (void) bgetprop(&bootops, "net-config-strategy", net_options, 1310 sizeof (net_options), 0); 1311 1312 #endif /* __i386 */ 1313 1314 for (i = 0; i < nct_entries; i++) 1315 if (strcmp(net_options, nct[i].p_name) == 0) 1316 return (nct[i].p_id); 1317 1318 return (NCT_DEFAULT); 1319 } 1320 1321 /* Modified STREAM routines for ease of porting core TCP code. */ 1322 1323 /*ARGSUSED*/ 1324 mblk_t * 1325 allocb(size_t size, uint_t pri) 1326 { 1327 unsigned char *base; 1328 mblk_t *mp; 1329 1330 if ((mp = (mblk_t *)bkmem_zalloc(sizeof (mblk_t))) == NULL) 1331 return (NULL); 1332 if ((base = (unsigned char *)bkmem_zalloc(size)) == NULL) 1333 return (NULL); 1334 1335 mp->b_next = mp->b_prev = mp->b_cont = NULL; 1336 mp->b_rptr = mp->b_wptr = mp->b_datap = (unsigned char *)base; 1337 mp->b_size = size; 1338 1339 return (mp); 1340 } 1341 1342 void 1343 freeb(mblk_t *mp) 1344 { 1345 #ifdef DEBUG 1346 printf("freeb datap %x\n", mp->b_datap); 1347 #endif 1348 bkmem_free((caddr_t)(mp->b_datap), mp->b_size); 1349 #ifdef DEBUG 1350 printf("freeb mp %x\n", mp); 1351 #endif 1352 bkmem_free((caddr_t)mp, sizeof (mblk_t)); 1353 } 1354 1355 void 1356 freemsg(mblk_t *mp) 1357 { 1358 while (mp) { 1359 mblk_t *mp_cont = mp->b_cont; 1360 1361 freeb(mp); 1362 mp = mp_cont; 1363 } 1364 } 1365 1366 mblk_t * 1367 copyb(mblk_t *bp) 1368 { 1369 mblk_t *nbp; 1370 unsigned char *ndp; 1371 1372 assert((uintptr_t)(bp->b_wptr - bp->b_rptr) >= 0); 1373 1374 if (!(nbp = allocb(bp->b_size, 0))) 1375 return (NULL); 1376 nbp->b_cont = NULL; 1377 ndp = nbp->b_datap; 1378 1379 nbp->b_rptr = ndp + (bp->b_rptr - bp->b_datap); 1380 nbp->b_wptr = nbp->b_rptr + (bp->b_wptr - bp->b_rptr); 1381 bcopy(bp->b_datap, nbp->b_datap, bp->b_size); 1382 return (nbp); 1383 } 1384 1385 /* To simplify things, dupb() is implemented as copyb(). */ 1386 mblk_t * 1387 dupb(mblk_t *mp) 1388 { 1389 return (copyb(mp)); 1390 } 1391 1392 /* 1393 * get number of data bytes in message 1394 */ 1395 size_t 1396 msgdsize(mblk_t *bp) 1397 { 1398 size_t count = 0; 1399 1400 for (; bp != NULL; bp = bp->b_cont) { 1401 assert(bp->b_wptr >= bp->b_rptr); 1402 count += bp->b_wptr - bp->b_rptr; 1403 } 1404 return (count); 1405 } 1406