1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/param.h> 29 #include <sys/systm.h> 30 #include <sys/sysmacros.h> 31 #include <sys/debug.h> 32 #include <sys/cmn_err.h> 33 34 #include <sys/stropts.h> 35 #include <sys/socket.h> 36 #include <sys/socketvar.h> 37 38 #define _SUN_TPI_VERSION 2 39 #include <sys/tihdr.h> 40 #include <sys/sockio.h> 41 #include <sys/sodirect.h> 42 #include <sys/kmem_impl.h> 43 44 #include <sys/strsubr.h> 45 #include <sys/strsun.h> 46 #include <sys/ddi.h> 47 #include <netinet/in.h> 48 #include <inet/ip.h> 49 50 #include <fs/sockfs/sockcommon.h> 51 52 #include <sys/socket_proto.h> 53 54 #include <fs/sockfs/socktpi_impl.h> 55 #include <sys/tihdr.h> 56 #include <fs/sockfs/nl7c.h> 57 #include <inet/kssl/ksslapi.h> 58 59 60 extern int xnet_skip_checks; 61 extern int xnet_check_print; 62 63 static void so_queue_oob(sock_upper_handle_t, mblk_t *, size_t); 64 65 66 /*ARGSUSED*/ 67 int 68 so_accept_notsupp(struct sonode *lso, int fflag, 69 struct cred *cr, struct sonode **nsop) 70 { 71 return (EOPNOTSUPP); 72 } 73 74 /*ARGSUSED*/ 75 int 76 so_listen_notsupp(struct sonode *so, int backlog, struct cred *cr) 77 { 78 return (EOPNOTSUPP); 79 } 80 81 /*ARGSUSED*/ 82 int 83 so_getsockname_notsupp(struct sonode *so, struct sockaddr *sa, 84 socklen_t *len, struct cred *cr) 85 { 86 return (EOPNOTSUPP); 87 } 88 89 /*ARGSUSED*/ 90 int 91 so_getpeername_notsupp(struct sonode *so, struct sockaddr *addr, 92 socklen_t *addrlen, boolean_t accept, struct cred *cr) 93 { 94 return (EOPNOTSUPP); 95 } 96 97 /*ARGSUSED*/ 98 int 99 so_shutdown_notsupp(struct sonode *so, int how, struct cred *cr) 100 { 101 return (EOPNOTSUPP); 102 } 103 104 /*ARGSUSED*/ 105 int 106 so_sendmblk_notsupp(struct sonode *so, struct msghdr *msg, int fflag, 107 struct cred *cr, mblk_t **mpp) 108 { 109 return (EOPNOTSUPP); 110 } 111 112 /* 113 * Generic Socket Ops 114 */ 115 116 /* ARGSUSED */ 117 int 118 so_init(struct sonode *so, struct sonode *pso, struct cred *cr, int flags) 119 { 120 return (socket_init_common(so, pso, flags, cr)); 121 } 122 123 int 124 so_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen, 125 int flags, struct cred *cr) 126 { 127 int error; 128 129 SO_BLOCK_FALLBACK(so, SOP_BIND(so, name, namelen, flags, cr)); 130 131 ASSERT(flags == _SOBIND_XPG4_2 || flags == _SOBIND_SOCKBSD); 132 133 /* X/Open requires this check */ 134 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 135 if (xnet_check_print) { 136 printf("sockfs: X/Open bind state check " 137 "caused EINVAL\n"); 138 } 139 error = EINVAL; 140 goto done; 141 } 142 143 /* 144 * a bind to a NULL address is interpreted as unbind. So just 145 * do the downcall. 146 */ 147 if (name == NULL) 148 goto dobind; 149 150 switch (so->so_family) { 151 case AF_INET: 152 if ((size_t)namelen != sizeof (sin_t)) { 153 error = name->sa_family != so->so_family ? 154 EAFNOSUPPORT : EINVAL; 155 eprintsoline(so, error); 156 goto done; 157 } 158 159 if ((flags & _SOBIND_XPG4_2) && 160 (name->sa_family != so->so_family)) { 161 /* 162 * This check has to be made for X/Open 163 * sockets however application failures have 164 * been observed when it is applied to 165 * all sockets. 166 */ 167 error = EAFNOSUPPORT; 168 eprintsoline(so, error); 169 goto done; 170 } 171 /* 172 * Force a zero sa_family to match so_family. 173 * 174 * Some programs like inetd(1M) don't set the 175 * family field. Other programs leave 176 * sin_family set to garbage - SunOS 4.X does 177 * not check the family field on a bind. 178 * We use the family field that 179 * was passed in to the socket() call. 180 */ 181 name->sa_family = so->so_family; 182 break; 183 184 case AF_INET6: { 185 #ifdef DEBUG 186 sin6_t *sin6 = (sin6_t *)name; 187 #endif 188 if ((size_t)namelen != sizeof (sin6_t)) { 189 error = name->sa_family != so->so_family ? 190 EAFNOSUPPORT : EINVAL; 191 eprintsoline(so, error); 192 goto done; 193 } 194 195 if (name->sa_family != so->so_family) { 196 /* 197 * With IPv6 we require the family to match 198 * unlike in IPv4. 199 */ 200 error = EAFNOSUPPORT; 201 eprintsoline(so, error); 202 goto done; 203 } 204 #ifdef DEBUG 205 /* 206 * Verify that apps don't forget to clear 207 * sin6_scope_id etc 208 */ 209 if (sin6->sin6_scope_id != 0 && 210 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 211 zcmn_err(getzoneid(), CE_WARN, 212 "bind with uninitialized sin6_scope_id " 213 "(%d) on socket. Pid = %d\n", 214 (int)sin6->sin6_scope_id, 215 (int)curproc->p_pid); 216 } 217 if (sin6->__sin6_src_id != 0) { 218 zcmn_err(getzoneid(), CE_WARN, 219 "bind with uninitialized __sin6_src_id " 220 "(%d) on socket. Pid = %d\n", 221 (int)sin6->__sin6_src_id, 222 (int)curproc->p_pid); 223 } 224 #endif /* DEBUG */ 225 226 break; 227 } 228 default: 229 /* Just pass the request to the protocol */ 230 goto dobind; 231 } 232 233 /* 234 * First we check if either NCA or KSSL has been enabled for 235 * the requested address, and if so, we fall back to TPI. 236 * If neither of those two services are enabled, then we just 237 * pass the request to the protocol. 238 * 239 * Note that KSSL can only be enabled on a socket if NCA is NOT 240 * enabled for that socket, hence the else-statement below. 241 */ 242 if (nl7c_enabled && ((so->so_family == AF_INET || 243 so->so_family == AF_INET6) && 244 nl7c_lookup_addr(name, namelen) != NULL)) { 245 /* 246 * NL7C is not supported in non-global zones, 247 * we enforce this restriction here. 248 */ 249 if (so->so_zoneid == GLOBAL_ZONEID) { 250 /* NCA should be used, so fall back to TPI */ 251 error = so_tpi_fallback(so, cr); 252 SO_UNBLOCK_FALLBACK(so); 253 if (error) 254 return (error); 255 else 256 return (SOP_BIND(so, name, namelen, flags, cr)); 257 } 258 } else if (so->so_type == SOCK_STREAM) { 259 /* Check if KSSL has been configured for this address */ 260 kssl_ent_t ent; 261 kssl_endpt_type_t type; 262 struct T_bind_req bind_req; 263 mblk_t *mp; 264 265 /* 266 * TODO: Check with KSSL team if we could add a function call 267 * that only queries whether KSSL is enabled for the given 268 * address. 269 */ 270 bind_req.PRIM_type = T_BIND_REQ; 271 bind_req.ADDR_length = namelen; 272 bind_req.ADDR_offset = (t_scalar_t)sizeof (bind_req); 273 mp = soallocproto2(&bind_req, sizeof (bind_req), 274 name, namelen, 0, _ALLOC_SLEEP, cr); 275 276 type = kssl_check_proxy(mp, so, &ent); 277 freemsg(mp); 278 279 if (type != KSSL_NO_PROXY) { 280 /* 281 * KSSL has been configured for this address, so 282 * we must fall back to TPI. 283 */ 284 kssl_release_ent(ent, so, type); 285 error = so_tpi_fallback(so, cr); 286 SO_UNBLOCK_FALLBACK(so); 287 if (error) 288 return (error); 289 else 290 return (SOP_BIND(so, name, namelen, flags, cr)); 291 } 292 } 293 294 dobind: 295 error = (*so->so_downcalls->sd_bind) 296 (so->so_proto_handle, name, namelen, cr); 297 done: 298 SO_UNBLOCK_FALLBACK(so); 299 300 return (error); 301 } 302 303 int 304 so_listen(struct sonode *so, int backlog, struct cred *cr) 305 { 306 int error = 0; 307 308 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 309 SO_BLOCK_FALLBACK(so, SOP_LISTEN(so, backlog, cr)); 310 311 error = (*so->so_downcalls->sd_listen)(so->so_proto_handle, backlog, 312 cr); 313 314 SO_UNBLOCK_FALLBACK(so); 315 316 return (error); 317 } 318 319 320 int 321 so_connect(struct sonode *so, const struct sockaddr *name, 322 socklen_t namelen, int fflag, int flags, struct cred *cr) 323 { 324 int error = 0; 325 sock_connid_t id; 326 327 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 328 SO_BLOCK_FALLBACK(so, SOP_CONNECT(so, name, namelen, fflag, flags, cr)); 329 330 /* 331 * If there is a pending error, return error 332 * This can happen if a non blocking operation caused an error. 333 */ 334 335 if (so->so_error != 0) { 336 mutex_enter(&so->so_lock); 337 error = sogeterr(so, B_TRUE); 338 mutex_exit(&so->so_lock); 339 if (error != 0) 340 goto done; 341 } 342 343 error = (*so->so_downcalls->sd_connect)(so->so_proto_handle, 344 name, namelen, &id, cr); 345 346 if (error == EINPROGRESS) 347 error = so_wait_connected(so, fflag & (FNONBLOCK|FNDELAY), id); 348 349 done: 350 SO_UNBLOCK_FALLBACK(so); 351 return (error); 352 } 353 354 /*ARGSUSED*/ 355 int 356 so_accept(struct sonode *so, int fflag, struct cred *cr, struct sonode **nsop) 357 { 358 int error = 0; 359 struct sonode *nso; 360 361 *nsop = NULL; 362 363 SO_BLOCK_FALLBACK(so, SOP_ACCEPT(so, fflag, cr, nsop)); 364 if ((so->so_state & SS_ACCEPTCONN) == 0) { 365 SO_UNBLOCK_FALLBACK(so); 366 return ((so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW) ? 367 EOPNOTSUPP : EINVAL); 368 } 369 370 if ((error = so_acceptq_dequeue(so, (fflag & (FNONBLOCK|FNDELAY)), 371 &nso)) == 0) { 372 ASSERT(nso != NULL); 373 374 /* finish the accept */ 375 error = (*so->so_downcalls->sd_accept)(so->so_proto_handle, 376 nso->so_proto_handle, (sock_upper_handle_t)nso, cr); 377 if (error != 0) { 378 (void) socket_close(nso, 0, cr); 379 socket_destroy(nso); 380 } else { 381 *nsop = nso; 382 } 383 } 384 385 SO_UNBLOCK_FALLBACK(so); 386 return (error); 387 } 388 389 int 390 so_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 391 struct cred *cr) 392 { 393 int error, flags; 394 boolean_t dontblock; 395 ssize_t orig_resid; 396 mblk_t *mp; 397 398 SO_BLOCK_FALLBACK(so, SOP_SENDMSG(so, msg, uiop, cr)); 399 400 flags = msg->msg_flags; 401 error = 0; 402 dontblock = (flags & MSG_DONTWAIT) || 403 (uiop->uio_fmode & (FNONBLOCK|FNDELAY)); 404 405 if (!(flags & MSG_XPG4_2) && msg->msg_controllen != 0) { 406 /* 407 * Old way of passing fd's is not supported 408 */ 409 SO_UNBLOCK_FALLBACK(so); 410 return (EOPNOTSUPP); 411 } 412 413 if ((so->so_mode & SM_ATOMIC) && 414 uiop->uio_resid > so->so_proto_props.sopp_maxpsz && 415 so->so_proto_props.sopp_maxpsz != -1) { 416 SO_UNBLOCK_FALLBACK(so); 417 return (EMSGSIZE); 418 } 419 420 /* 421 * For atomic sends we will only do one iteration. 422 */ 423 do { 424 if (so->so_state & SS_CANTSENDMORE) { 425 error = EPIPE; 426 break; 427 } 428 429 if (so->so_error != 0) { 430 mutex_enter(&so->so_lock); 431 error = sogeterr(so, B_TRUE); 432 mutex_exit(&so->so_lock); 433 if (error != 0) 434 break; 435 } 436 437 /* 438 * Send down OOB messages even if the send path is being 439 * flow controlled (assuming the protocol supports OOB data). 440 */ 441 if (flags & MSG_OOB) { 442 if ((so->so_mode & SM_EXDATA) == 0) { 443 error = EOPNOTSUPP; 444 break; 445 } 446 } else if (so->so_snd_qfull) { 447 /* 448 * Need to wait until the protocol is ready to receive 449 * more data for transmission. 450 */ 451 if ((error = so_snd_wait_qnotfull(so, dontblock)) != 0) 452 break; 453 } 454 455 /* 456 * Time to send data to the protocol. We either copy the 457 * data into mblks or pass the uio directly to the protocol. 458 * We decide what to do based on the available down calls. 459 */ 460 if (so->so_downcalls->sd_send_uio != NULL) { 461 error = (*so->so_downcalls->sd_send_uio) 462 (so->so_proto_handle, uiop, msg, cr); 463 if (error != 0) 464 break; 465 } else { 466 /* save the resid in case of failure */ 467 orig_resid = uiop->uio_resid; 468 469 if ((mp = socopyinuio(uiop, 470 so->so_proto_props.sopp_maxpsz, 471 so->so_proto_props.sopp_wroff, 472 so->so_proto_props.sopp_maxblk, 473 so->so_proto_props.sopp_tail, &error, 474 cr)) == NULL) { 475 break; 476 } 477 ASSERT(uiop->uio_resid >= 0); 478 479 error = (*so->so_downcalls->sd_send) 480 (so->so_proto_handle, mp, msg, cr); 481 if (error != 0) { 482 /* 483 * The send failed. We do not have to free the 484 * mblks, because that is the protocol's 485 * responsibility. However, uio_resid must 486 * remain accurate, so adjust that here. 487 */ 488 uiop->uio_resid = orig_resid; 489 break; 490 } 491 } 492 } while (uiop->uio_resid > 0); 493 494 SO_UNBLOCK_FALLBACK(so); 495 496 return (error); 497 } 498 499 int 500 so_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag, 501 struct cred *cr, mblk_t **mpp) 502 { 503 int error; 504 boolean_t dontblock; 505 size_t size; 506 mblk_t *mp = *mpp; 507 508 SO_BLOCK_FALLBACK(so, SOP_SENDMBLK(so, msg, fflag, cr, mpp)); 509 510 error = 0; 511 dontblock = (msg->msg_flags & MSG_DONTWAIT) || 512 (fflag & (FNONBLOCK|FNDELAY)); 513 size = msgdsize(mp); 514 515 if ((so->so_mode & SM_SENDFILESUPP) == 0 || 516 so->so_downcalls->sd_send == NULL) { 517 SO_UNBLOCK_FALLBACK(so); 518 return (EOPNOTSUPP); 519 } 520 521 if ((so->so_mode & SM_ATOMIC) && 522 size > so->so_proto_props.sopp_maxpsz && 523 so->so_proto_props.sopp_maxpsz != -1) { 524 SO_UNBLOCK_FALLBACK(so); 525 return (EMSGSIZE); 526 } 527 528 while (mp != NULL) { 529 mblk_t *nmp, *last_mblk; 530 size_t mlen; 531 532 if (so->so_state & SS_CANTSENDMORE) { 533 error = EPIPE; 534 break; 535 } 536 if (so->so_error != 0) { 537 mutex_enter(&so->so_lock); 538 error = sogeterr(so, B_TRUE); 539 mutex_exit(&so->so_lock); 540 if (error != 0) 541 break; 542 } 543 if (so->so_snd_qfull) { 544 /* 545 * Need to wait until the protocol is ready to receive 546 * more data for transmission. 547 */ 548 if ((error = so_snd_wait_qnotfull(so, dontblock)) != 0) 549 break; 550 } 551 552 /* 553 * We only allow so_maxpsz of data to be sent down to 554 * the protocol at time. 555 */ 556 mlen = MBLKL(mp); 557 nmp = mp->b_cont; 558 last_mblk = mp; 559 while (nmp != NULL) { 560 mlen += MBLKL(nmp); 561 if (mlen > so->so_proto_props.sopp_maxpsz) { 562 last_mblk->b_cont = NULL; 563 break; 564 } 565 last_mblk = nmp; 566 nmp = nmp->b_cont; 567 } 568 569 error = (*so->so_downcalls->sd_send) 570 (so->so_proto_handle, mp, msg, cr); 571 if (error != 0) { 572 /* 573 * The send failed. The protocol will free the mblks 574 * that were sent down. Let the caller deal with the 575 * rest. 576 */ 577 *mpp = nmp; 578 break; 579 } 580 581 *mpp = mp = nmp; 582 } 583 584 SO_UNBLOCK_FALLBACK(so); 585 586 return (error); 587 } 588 589 int 590 so_shutdown(struct sonode *so, int how, struct cred *cr) 591 { 592 int error; 593 594 SO_BLOCK_FALLBACK(so, SOP_SHUTDOWN(so, how, cr)); 595 596 /* 597 * SunOS 4.X has no check for datagram sockets. 598 * 5.X checks that it is connected (ENOTCONN) 599 * X/Open requires that we check the connected state. 600 */ 601 if (!(so->so_state & SS_ISCONNECTED)) { 602 if (!xnet_skip_checks) { 603 error = ENOTCONN; 604 if (xnet_check_print) { 605 printf("sockfs: X/Open shutdown check " 606 "caused ENOTCONN\n"); 607 } 608 } 609 goto done; 610 } 611 612 error = ((*so->so_downcalls->sd_shutdown)(so->so_proto_handle, 613 how, cr)); 614 615 /* 616 * Protocol agreed to shutdown. We need to flush the 617 * receive buffer if the receive side is being shutdown. 618 */ 619 if (error == 0 && how != SHUT_WR) { 620 mutex_enter(&so->so_lock); 621 /* wait for active reader to finish */ 622 (void) so_lock_read(so, 0); 623 624 so_rcv_flush(so); 625 626 so_unlock_read(so); 627 mutex_exit(&so->so_lock); 628 } 629 630 done: 631 SO_UNBLOCK_FALLBACK(so); 632 return (error); 633 } 634 635 int 636 so_getsockname(struct sonode *so, struct sockaddr *addr, 637 socklen_t *addrlen, struct cred *cr) 638 { 639 int error; 640 641 SO_BLOCK_FALLBACK(so, SOP_GETSOCKNAME(so, addr, addrlen, cr)); 642 643 error = (*so->so_downcalls->sd_getsockname) 644 (so->so_proto_handle, addr, addrlen, cr); 645 646 SO_UNBLOCK_FALLBACK(so); 647 return (error); 648 } 649 650 int 651 so_getpeername(struct sonode *so, struct sockaddr *addr, 652 socklen_t *addrlen, boolean_t accept, struct cred *cr) 653 { 654 int error; 655 656 SO_BLOCK_FALLBACK(so, SOP_GETPEERNAME(so, addr, addrlen, accept, cr)); 657 658 if (accept) { 659 error = (*so->so_downcalls->sd_getpeername) 660 (so->so_proto_handle, addr, addrlen, cr); 661 } else if (!(so->so_state & SS_ISCONNECTED)) { 662 error = ENOTCONN; 663 } else if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 664 /* Added this check for X/Open */ 665 error = EINVAL; 666 if (xnet_check_print) { 667 printf("sockfs: X/Open getpeername check => EINVAL\n"); 668 } 669 } else { 670 error = (*so->so_downcalls->sd_getpeername) 671 (so->so_proto_handle, addr, addrlen, cr); 672 } 673 674 SO_UNBLOCK_FALLBACK(so); 675 return (error); 676 } 677 678 int 679 so_getsockopt(struct sonode *so, int level, int option_name, 680 void *optval, socklen_t *optlenp, int flags, struct cred *cr) 681 { 682 int error = 0; 683 684 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 685 SO_BLOCK_FALLBACK(so, 686 SOP_GETSOCKOPT(so, level, option_name, optval, optlenp, flags, cr)); 687 688 error = socket_getopt_common(so, level, option_name, optval, optlenp, 689 flags); 690 if (error < 0) { 691 error = (*so->so_downcalls->sd_getsockopt) 692 (so->so_proto_handle, level, option_name, optval, optlenp, 693 cr); 694 if (error == ENOPROTOOPT) { 695 if (level == SOL_SOCKET) { 696 /* 697 * If a protocol does not support a particular 698 * socket option, set can fail (not allowed) 699 * but get can not fail. This is the previous 700 * sockfs bahvior. 701 */ 702 switch (option_name) { 703 case SO_LINGER: 704 if (*optlenp < (t_uscalar_t) 705 sizeof (struct linger)) { 706 error = EINVAL; 707 break; 708 } 709 error = 0; 710 bzero(optval, sizeof (struct linger)); 711 *optlenp = sizeof (struct linger); 712 break; 713 case SO_RCVTIMEO: 714 case SO_SNDTIMEO: 715 if (*optlenp < (t_uscalar_t) 716 sizeof (struct timeval)) { 717 error = EINVAL; 718 break; 719 } 720 error = 0; 721 bzero(optval, sizeof (struct timeval)); 722 *optlenp = sizeof (struct timeval); 723 break; 724 case SO_SND_BUFINFO: 725 if (*optlenp < (t_uscalar_t) 726 sizeof (struct so_snd_bufinfo)) { 727 error = EINVAL; 728 break; 729 } 730 error = 0; 731 bzero(optval, 732 sizeof (struct so_snd_bufinfo)); 733 *optlenp = 734 sizeof (struct so_snd_bufinfo); 735 break; 736 case SO_DEBUG: 737 case SO_REUSEADDR: 738 case SO_KEEPALIVE: 739 case SO_DONTROUTE: 740 case SO_BROADCAST: 741 case SO_USELOOPBACK: 742 case SO_OOBINLINE: 743 case SO_DGRAM_ERRIND: 744 case SO_SNDBUF: 745 case SO_RCVBUF: 746 error = 0; 747 *((int32_t *)optval) = 0; 748 *optlenp = sizeof (int32_t); 749 break; 750 default: 751 break; 752 } 753 } 754 } 755 } 756 757 SO_UNBLOCK_FALLBACK(so); 758 return (error); 759 } 760 761 int 762 so_setsockopt(struct sonode *so, int level, int option_name, 763 const void *optval, socklen_t optlen, struct cred *cr) 764 { 765 int error = 0; 766 struct timeval tl; 767 const void *opt = optval; 768 769 SO_BLOCK_FALLBACK(so, 770 SOP_SETSOCKOPT(so, level, option_name, optval, optlen, cr)); 771 772 /* X/Open requires this check */ 773 if (so->so_state & SS_CANTSENDMORE && !xnet_skip_checks) { 774 SO_UNBLOCK_FALLBACK(so); 775 if (xnet_check_print) 776 printf("sockfs: X/Open setsockopt check => EINVAL\n"); 777 return (EINVAL); 778 } 779 780 if (level == SOL_SOCKET) { 781 switch (option_name) { 782 case SO_RCVTIMEO: 783 case SO_SNDTIMEO: { 784 /* 785 * We pass down these two options to protocol in order 786 * to support some third part protocols which need to 787 * know them. For those protocols which don't care 788 * these two options, simply return 0. 789 */ 790 clock_t t_usec; 791 792 if (get_udatamodel() == DATAMODEL_NONE || 793 get_udatamodel() == DATAMODEL_NATIVE) { 794 if (optlen != sizeof (struct timeval)) { 795 error = EINVAL; 796 goto done; 797 } 798 bcopy((struct timeval *)optval, &tl, 799 sizeof (struct timeval)); 800 } else { 801 if (optlen != sizeof (struct timeval32)) { 802 error = EINVAL; 803 goto done; 804 } 805 TIMEVAL32_TO_TIMEVAL(&tl, 806 (struct timeval32 *)optval); 807 } 808 opt = &tl; 809 optlen = sizeof (tl); 810 t_usec = tl.tv_sec * 1000 * 1000 + tl.tv_usec; 811 mutex_enter(&so->so_lock); 812 if (option_name == SO_RCVTIMEO) 813 so->so_rcvtimeo = drv_usectohz(t_usec); 814 else 815 so->so_sndtimeo = drv_usectohz(t_usec); 816 mutex_exit(&so->so_lock); 817 break; 818 } 819 case SO_RCVBUF: 820 /* 821 * XXX XPG 4.2 applications retrieve SO_RCVBUF from 822 * sockfs since the transport might adjust the value 823 * and not return exactly what was set by the 824 * application. 825 */ 826 so->so_xpg_rcvbuf = *(int32_t *)optval; 827 break; 828 } 829 } 830 error = (*so->so_downcalls->sd_setsockopt) 831 (so->so_proto_handle, level, option_name, opt, optlen, cr); 832 done: 833 SO_UNBLOCK_FALLBACK(so); 834 return (error); 835 } 836 837 int 838 so_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode, 839 struct cred *cr, int32_t *rvalp) 840 { 841 int error = 0; 842 843 SO_BLOCK_FALLBACK(so, SOP_IOCTL(so, cmd, arg, mode, cr, rvalp)); 844 845 /* 846 * If there is a pending error, return error 847 * This can happen if a non blocking operation caused an error. 848 */ 849 if (so->so_error != 0) { 850 mutex_enter(&so->so_lock); 851 error = sogeterr(so, B_TRUE); 852 mutex_exit(&so->so_lock); 853 if (error != 0) 854 goto done; 855 } 856 857 /* 858 * calling strioc can result in the socket falling back to TPI, 859 * if that is supported. 860 */ 861 if ((error = socket_ioctl_common(so, cmd, arg, mode, cr, rvalp)) < 0 && 862 (error = socket_strioc_common(so, cmd, arg, mode, cr, rvalp)) < 0) { 863 error = (*so->so_downcalls->sd_ioctl)(so->so_proto_handle, 864 cmd, arg, mode, rvalp, cr); 865 } 866 867 done: 868 SO_UNBLOCK_FALLBACK(so); 869 870 return (error); 871 } 872 873 int 874 so_poll(struct sonode *so, short events, int anyyet, short *reventsp, 875 struct pollhead **phpp) 876 { 877 int state = so->so_state; 878 *reventsp = 0; 879 880 if (so->so_error != 0 && 881 ((POLLIN|POLLRDNORM|POLLOUT) & events) != 0) { 882 *reventsp = (POLLIN|POLLRDNORM|POLLOUT) & events; 883 return (0); 884 } 885 886 /* 887 * As long as there is buffer to send data, and the socket is 888 * in a state where it can send data (i.e., connected for 889 * connection oriented protocols), then turn on POLLOUT events 890 */ 891 if (!so->so_snd_qfull && ((so->so_mode & SM_CONNREQUIRED) == 0 || 892 state & SS_ISCONNECTED)) { 893 *reventsp |= POLLOUT & events; 894 } 895 896 /* 897 * Turn on POLLIN whenever there is data on the receive queue, 898 * or the socket is in a state where no more data will be received. 899 * Also, if the socket is accepting connections, flip the bit if 900 * there is something on the queue. 901 * 902 * We do an initial check for events without holding locks. However, 903 * if there are no event available, then we redo the check for POLLIN 904 * events under the lock. 905 */ 906 907 /* Pending connections */ 908 if (so->so_acceptq_len > 0) 909 *reventsp |= (POLLIN|POLLRDNORM) & events; 910 911 /* Data */ 912 /* so_downcalls is null for sctp */ 913 if (so->so_downcalls != NULL && so->so_downcalls->sd_poll != NULL) { 914 *reventsp |= (*so->so_downcalls->sd_poll) 915 (so->so_proto_handle, events & SO_PROTO_POLLEV, anyyet, 916 CRED()) & events; 917 ASSERT((*reventsp & ~events) == 0); 918 /* do not recheck events */ 919 events &= ~SO_PROTO_POLLEV; 920 } else { 921 if (SO_HAVE_DATA(so)) 922 *reventsp |= (POLLIN|POLLRDNORM) & events; 923 924 /* Urgent data */ 925 if ((state & SS_OOBPEND) != 0) 926 *reventsp |= (POLLRDBAND) & events; 927 } 928 929 if (!*reventsp && !anyyet) { 930 /* Check for read events again, but this time under lock */ 931 if (events & (POLLIN|POLLRDNORM)) { 932 mutex_enter(&so->so_lock); 933 if (SO_HAVE_DATA(so) || so->so_acceptq_len > 0) { 934 mutex_exit(&so->so_lock); 935 *reventsp |= (POLLIN|POLLRDNORM) & events; 936 return (0); 937 } else { 938 so->so_pollev |= SO_POLLEV_IN; 939 mutex_exit(&so->so_lock); 940 } 941 } 942 *phpp = &so->so_poll_list; 943 } 944 return (0); 945 } 946 947 /* 948 * Generic Upcalls 949 */ 950 void 951 so_connected(sock_upper_handle_t sock_handle, sock_connid_t id, 952 cred_t *peer_cred, pid_t peer_cpid) 953 { 954 struct sonode *so = (struct sonode *)sock_handle; 955 956 mutex_enter(&so->so_lock); 957 ASSERT(so->so_proto_handle != NULL); 958 959 if (peer_cred != NULL) { 960 if (so->so_peercred != NULL) 961 crfree(so->so_peercred); 962 crhold(peer_cred); 963 so->so_peercred = peer_cred; 964 so->so_cpid = peer_cpid; 965 } 966 967 so->so_proto_connid = id; 968 soisconnected(so); 969 /* 970 * Wake ones who're waiting for conn to become established. 971 */ 972 so_notify_connected(so); 973 } 974 975 int 976 so_disconnected(sock_upper_handle_t sock_handle, sock_connid_t id, int error) 977 { 978 struct sonode *so = (struct sonode *)sock_handle; 979 980 mutex_enter(&so->so_lock); 981 982 so->so_proto_connid = id; 983 soisdisconnected(so, error); 984 so_notify_disconnected(so, error); 985 986 return (0); 987 } 988 989 void 990 so_opctl(sock_upper_handle_t sock_handle, sock_opctl_action_t action, 991 uintptr_t arg) 992 { 993 struct sonode *so = (struct sonode *)sock_handle; 994 995 switch (action) { 996 case SOCK_OPCTL_SHUT_SEND: 997 mutex_enter(&so->so_lock); 998 socantsendmore(so); 999 so_notify_disconnecting(so); 1000 break; 1001 case SOCK_OPCTL_SHUT_RECV: { 1002 mutex_enter(&so->so_lock); 1003 socantrcvmore(so); 1004 so_notify_eof(so); 1005 break; 1006 } 1007 case SOCK_OPCTL_ENAB_ACCEPT: 1008 mutex_enter(&so->so_lock); 1009 so->so_state |= SS_ACCEPTCONN; 1010 so->so_backlog = (unsigned int)arg; 1011 mutex_exit(&so->so_lock); 1012 break; 1013 default: 1014 ASSERT(0); 1015 break; 1016 } 1017 } 1018 1019 void 1020 so_txq_full(sock_upper_handle_t sock_handle, boolean_t qfull) 1021 { 1022 struct sonode *so = (struct sonode *)sock_handle; 1023 1024 if (qfull) { 1025 so_snd_qfull(so); 1026 } else { 1027 so_snd_qnotfull(so); 1028 mutex_enter(&so->so_lock); 1029 so_notify_writable(so); 1030 } 1031 } 1032 1033 sock_upper_handle_t 1034 so_newconn(sock_upper_handle_t parenthandle, 1035 sock_lower_handle_t proto_handle, sock_downcalls_t *sock_downcalls, 1036 struct cred *peer_cred, pid_t peer_cpid, sock_upcalls_t **sock_upcallsp) 1037 { 1038 struct sonode *so = (struct sonode *)parenthandle; 1039 struct sonode *nso; 1040 int error; 1041 1042 ASSERT(proto_handle != NULL); 1043 1044 if ((so->so_state & SS_ACCEPTCONN) == 0 || 1045 so->so_acceptq_len >= so->so_backlog) 1046 return (NULL); 1047 1048 nso = socket_newconn(so, proto_handle, sock_downcalls, SOCKET_NOSLEEP, 1049 &error); 1050 if (nso == NULL) 1051 return (NULL); 1052 1053 if (peer_cred != NULL) { 1054 crhold(peer_cred); 1055 nso->so_peercred = peer_cred; 1056 nso->so_cpid = peer_cpid; 1057 } 1058 1059 /* 1060 * The new socket (nso), proto_handle and sock_upcallsp are all 1061 * valid at this point. But as soon as nso is placed in the accept 1062 * queue that can no longer be assumed (since an accept() thread may 1063 * pull it off the queue and close the socket). 1064 */ 1065 *sock_upcallsp = &so_upcalls; 1066 1067 (void) so_acceptq_enqueue(so, nso); 1068 1069 mutex_enter(&so->so_lock); 1070 so_notify_newconn(so); 1071 1072 return ((sock_upper_handle_t)nso); 1073 } 1074 1075 void 1076 so_set_prop(sock_upper_handle_t sock_handle, struct sock_proto_props *soppp) 1077 { 1078 struct sonode *so; 1079 1080 so = (struct sonode *)sock_handle; 1081 1082 mutex_enter(&so->so_lock); 1083 1084 if (soppp->sopp_flags & SOCKOPT_MAXBLK) 1085 so->so_proto_props.sopp_maxblk = soppp->sopp_maxblk; 1086 if (soppp->sopp_flags & SOCKOPT_WROFF) 1087 so->so_proto_props.sopp_wroff = soppp->sopp_wroff; 1088 if (soppp->sopp_flags & SOCKOPT_TAIL) 1089 so->so_proto_props.sopp_tail = soppp->sopp_tail; 1090 if (soppp->sopp_flags & SOCKOPT_RCVHIWAT) 1091 so->so_proto_props.sopp_rxhiwat = soppp->sopp_rxhiwat; 1092 if (soppp->sopp_flags & SOCKOPT_RCVLOWAT) 1093 so->so_proto_props.sopp_rxlowat = soppp->sopp_rxlowat; 1094 if (soppp->sopp_flags & SOCKOPT_MAXPSZ) 1095 so->so_proto_props.sopp_maxpsz = soppp->sopp_maxpsz; 1096 if (soppp->sopp_flags & SOCKOPT_MINPSZ) 1097 so->so_proto_props.sopp_minpsz = soppp->sopp_minpsz; 1098 if (soppp->sopp_flags & SOCKOPT_ZCOPY) { 1099 if (soppp->sopp_zcopyflag & ZCVMSAFE) { 1100 so->so_proto_props.sopp_zcopyflag |= STZCVMSAFE; 1101 so->so_proto_props.sopp_zcopyflag &= ~STZCVMUNSAFE; 1102 } else if (soppp->sopp_zcopyflag & ZCVMUNSAFE) { 1103 so->so_proto_props.sopp_zcopyflag |= STZCVMUNSAFE; 1104 so->so_proto_props.sopp_zcopyflag &= ~STZCVMSAFE; 1105 } 1106 1107 if (soppp->sopp_zcopyflag & COPYCACHED) { 1108 so->so_proto_props.sopp_zcopyflag |= STRCOPYCACHED; 1109 } 1110 } 1111 if (soppp->sopp_flags & SOCKOPT_OOBINLINE) 1112 so->so_proto_props.sopp_oobinline = soppp->sopp_oobinline; 1113 if (soppp->sopp_flags & SOCKOPT_RCVTIMER) 1114 so->so_proto_props.sopp_rcvtimer = soppp->sopp_rcvtimer; 1115 if (soppp->sopp_flags & SOCKOPT_RCVTHRESH) 1116 so->so_proto_props.sopp_rcvthresh = soppp->sopp_rcvthresh; 1117 if (soppp->sopp_flags & SOCKOPT_MAXADDRLEN) 1118 so->so_proto_props.sopp_maxaddrlen = soppp->sopp_maxaddrlen; 1119 1120 mutex_exit(&so->so_lock); 1121 1122 #ifdef DEBUG 1123 soppp->sopp_flags &= ~(SOCKOPT_MAXBLK | SOCKOPT_WROFF | SOCKOPT_TAIL | 1124 SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT | SOCKOPT_MAXPSZ | 1125 SOCKOPT_ZCOPY | SOCKOPT_OOBINLINE | SOCKOPT_RCVTIMER | 1126 SOCKOPT_RCVTHRESH | SOCKOPT_MAXADDRLEN | SOCKOPT_MINPSZ); 1127 ASSERT(soppp->sopp_flags == 0); 1128 #endif 1129 } 1130 1131 /* ARGSUSED */ 1132 ssize_t 1133 so_queue_msg(sock_upper_handle_t sock_handle, mblk_t *mp, 1134 size_t msg_size, int flags, int *errorp, boolean_t *force_pushp) 1135 { 1136 struct sonode *so = (struct sonode *)sock_handle; 1137 boolean_t force_push = B_TRUE; 1138 int space_left; 1139 sodirect_t *sodp = so->so_direct; 1140 1141 ASSERT(errorp != NULL); 1142 *errorp = 0; 1143 if (mp == NULL) { 1144 if (msg_size > 0) { 1145 ASSERT(so->so_downcalls->sd_recv_uio != NULL); 1146 mutex_enter(&so->so_lock); 1147 /* the notify functions will drop the lock */ 1148 if (flags & MSG_OOB) 1149 so_notify_oobdata(so, IS_SO_OOB_INLINE(so)); 1150 else 1151 so_notify_data(so, msg_size); 1152 return (0); 1153 } 1154 /* 1155 * recv space check 1156 */ 1157 mutex_enter(&so->so_lock); 1158 space_left = so->so_rcvbuf - so->so_rcv_queued; 1159 if (space_left <= 0) { 1160 so->so_flowctrld = B_TRUE; 1161 *errorp = ENOSPC; 1162 space_left = -1; 1163 } 1164 goto done_unlock; 1165 } 1166 1167 ASSERT(mp->b_next == NULL); 1168 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_PROTO); 1169 ASSERT(msg_size == msgdsize(mp)); 1170 1171 if (flags & MSG_OOB) { 1172 so_queue_oob(sock_handle, mp, msg_size); 1173 return (0); 1174 } 1175 1176 if (force_pushp != NULL) 1177 force_push = *force_pushp; 1178 1179 if (DB_TYPE(mp) == M_PROTO && !__TPI_PRIM_ISALIGNED(mp->b_rptr)) { 1180 /* The read pointer is not aligned correctly for TPI */ 1181 zcmn_err(getzoneid(), CE_WARN, 1182 "sockfs: Unaligned TPI message received. rptr = %p\n", 1183 (void *)mp->b_rptr); 1184 freemsg(mp); 1185 mutex_enter(sodp->sod_lockp); 1186 SOD_UIOAFINI(sodp); 1187 mutex_exit(sodp->sod_lockp); 1188 1189 return (so->so_rcvbuf - so->so_rcv_queued); 1190 } 1191 1192 mutex_enter(&so->so_lock); 1193 if (so->so_state & (SS_FALLBACK_PENDING | SS_FALLBACK_COMP)) { 1194 SOD_DISABLE(sodp); 1195 mutex_exit(&so->so_lock); 1196 *errorp = EOPNOTSUPP; 1197 return (-1); 1198 } 1199 if (so->so_state & SS_CANTRCVMORE) { 1200 freemsg(mp); 1201 SOD_DISABLE(sodp); 1202 mutex_exit(&so->so_lock); 1203 return (0); 1204 } 1205 1206 /* process the mblk via I/OAT if capable */ 1207 if (sodp != NULL && (sodp->sod_state & SOD_ENABLED)) { 1208 if (DB_TYPE(mp) == M_DATA) { 1209 (void) sod_uioa_mblk_init(sodp, mp, msg_size); 1210 } else { 1211 SOD_UIOAFINI(sodp); 1212 } 1213 } 1214 1215 if (mp->b_next == NULL) { 1216 so_enqueue_msg(so, mp, msg_size); 1217 } else { 1218 do { 1219 mblk_t *nmp; 1220 1221 if ((nmp = mp->b_next) != NULL) { 1222 mp->b_next = NULL; 1223 } 1224 so_enqueue_msg(so, mp, msgdsize(mp)); 1225 mp = nmp; 1226 } while (mp != NULL); 1227 } 1228 1229 space_left = so->so_rcvbuf - so->so_rcv_queued; 1230 if (space_left <= 0) { 1231 so->so_flowctrld = B_TRUE; 1232 *errorp = ENOSPC; 1233 space_left = -1; 1234 } 1235 1236 if (force_push || so->so_rcv_queued >= so->so_rcv_thresh || 1237 so->so_rcv_queued >= so->so_rcv_wanted || 1238 (sodp != NULL && so->so_rcv_queued >= sodp->sod_want)) { 1239 SOCKET_TIMER_CANCEL(so); 1240 /* 1241 * so_notify_data will release the lock 1242 */ 1243 so_notify_data(so, so->so_rcv_queued); 1244 1245 if (force_pushp != NULL) 1246 *force_pushp = B_TRUE; 1247 goto done; 1248 } else if (so->so_rcv_timer_tid == 0) { 1249 /* Make sure the recv push timer is running */ 1250 SOCKET_TIMER_START(so); 1251 } 1252 1253 done_unlock: 1254 mutex_exit(&so->so_lock); 1255 done: 1256 return (space_left); 1257 } 1258 1259 /* 1260 * Set the offset of where the oob data is relative to the bytes in 1261 * queued. Also generate SIGURG 1262 */ 1263 void 1264 so_signal_oob(sock_upper_handle_t sock_handle, ssize_t offset) 1265 { 1266 struct sonode *so; 1267 1268 ASSERT(offset >= 0); 1269 so = (struct sonode *)sock_handle; 1270 mutex_enter(&so->so_lock); 1271 SOD_UIOAFINI(so->so_direct); 1272 1273 /* 1274 * New urgent data on the way so forget about any old 1275 * urgent data. 1276 */ 1277 so->so_state &= ~(SS_HAVEOOBDATA|SS_HADOOBDATA); 1278 1279 /* 1280 * Record that urgent data is pending. 1281 */ 1282 so->so_state |= SS_OOBPEND; 1283 1284 if (so->so_oobmsg != NULL) { 1285 dprintso(so, 1, ("sock: discarding old oob\n")); 1286 freemsg(so->so_oobmsg); 1287 so->so_oobmsg = NULL; 1288 } 1289 1290 /* 1291 * set the offset where the urgent byte is 1292 */ 1293 so->so_oobmark = so->so_rcv_queued + offset; 1294 if (so->so_oobmark == 0) 1295 so->so_state |= SS_RCVATMARK; 1296 else 1297 so->so_state &= ~SS_RCVATMARK; 1298 1299 so_notify_oobsig(so); 1300 } 1301 1302 /* 1303 * Queue the OOB byte 1304 */ 1305 static void 1306 so_queue_oob(sock_upper_handle_t sock_handle, mblk_t *mp, size_t len) 1307 { 1308 struct sonode *so; 1309 1310 so = (struct sonode *)sock_handle; 1311 mutex_enter(&so->so_lock); 1312 SOD_UIOAFINI(so->so_direct); 1313 1314 ASSERT(mp != NULL); 1315 if (!IS_SO_OOB_INLINE(so)) { 1316 so->so_oobmsg = mp; 1317 so->so_state |= SS_HAVEOOBDATA; 1318 } else { 1319 so_enqueue_msg(so, mp, len); 1320 } 1321 1322 so_notify_oobdata(so, IS_SO_OOB_INLINE(so)); 1323 } 1324 1325 int 1326 so_close(struct sonode *so, int flag, struct cred *cr) 1327 { 1328 int error; 1329 1330 error = (*so->so_downcalls->sd_close)(so->so_proto_handle, flag, cr); 1331 1332 /* 1333 * At this point there will be no more upcalls from the protocol 1334 */ 1335 mutex_enter(&so->so_lock); 1336 1337 ASSERT(so_verify_oobstate(so)); 1338 1339 so_rcv_flush(so); 1340 mutex_exit(&so->so_lock); 1341 1342 return (error); 1343 } 1344 1345 void 1346 so_zcopy_notify(sock_upper_handle_t sock_handle) 1347 { 1348 struct sonode *so = (struct sonode *)sock_handle; 1349 1350 mutex_enter(&so->so_lock); 1351 so->so_copyflag |= STZCNOTIFY; 1352 cv_broadcast(&so->so_copy_cv); 1353 mutex_exit(&so->so_lock); 1354 } 1355 1356 void 1357 so_set_error(sock_upper_handle_t sock_handle, int error) 1358 { 1359 struct sonode *so = (struct sonode *)sock_handle; 1360 1361 mutex_enter(&so->so_lock); 1362 1363 soseterror(so, error); 1364 1365 so_notify_error(so); 1366 } 1367 1368 /* 1369 * so_recvmsg - read data from the socket 1370 * 1371 * There are two ways of obtaining data; either we ask the protocol to 1372 * copy directly into the supplied buffer, or we copy data from the 1373 * sonode's receive queue. The decision which one to use depends on 1374 * whether the protocol has a sd_recv_uio down call. 1375 */ 1376 int 1377 so_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 1378 struct cred *cr) 1379 { 1380 rval_t rval; 1381 int flags = 0; 1382 t_uscalar_t controllen, namelen; 1383 int error = 0; 1384 int ret; 1385 mblk_t *mctlp = NULL; 1386 union T_primitives *tpr; 1387 void *control; 1388 ssize_t saved_resid; 1389 struct uio *suiop; 1390 1391 SO_BLOCK_FALLBACK(so, SOP_RECVMSG(so, msg, uiop, cr)); 1392 1393 if ((so->so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 && 1394 (so->so_mode & SM_CONNREQUIRED)) { 1395 SO_UNBLOCK_FALLBACK(so); 1396 return (ENOTCONN); 1397 } 1398 1399 if (msg->msg_flags & MSG_PEEK) 1400 msg->msg_flags &= ~MSG_WAITALL; 1401 1402 if (so->so_mode & SM_ATOMIC) 1403 msg->msg_flags |= MSG_TRUNC; 1404 1405 if (msg->msg_flags & MSG_OOB) { 1406 if ((so->so_mode & SM_EXDATA) == 0) { 1407 error = EOPNOTSUPP; 1408 } else if (so->so_downcalls->sd_recv_uio != NULL) { 1409 error = (*so->so_downcalls->sd_recv_uio) 1410 (so->so_proto_handle, uiop, msg, cr); 1411 } else { 1412 error = sorecvoob(so, msg, uiop, msg->msg_flags, 1413 IS_SO_OOB_INLINE(so)); 1414 } 1415 SO_UNBLOCK_FALLBACK(so); 1416 return (error); 1417 } 1418 1419 /* 1420 * If the protocol has the recv down call, then pass the request 1421 * down. 1422 */ 1423 if (so->so_downcalls->sd_recv_uio != NULL) { 1424 error = (*so->so_downcalls->sd_recv_uio) 1425 (so->so_proto_handle, uiop, msg, cr); 1426 SO_UNBLOCK_FALLBACK(so); 1427 return (error); 1428 } 1429 1430 /* 1431 * Reading data from the socket buffer 1432 */ 1433 flags = msg->msg_flags; 1434 msg->msg_flags = 0; 1435 1436 /* 1437 * Set msg_controllen and msg_namelen to zero here to make it 1438 * simpler in the cases that no control or name is returned. 1439 */ 1440 controllen = msg->msg_controllen; 1441 namelen = msg->msg_namelen; 1442 msg->msg_controllen = 0; 1443 msg->msg_namelen = 0; 1444 1445 mutex_enter(&so->so_lock); 1446 /* Set SOREADLOCKED */ 1447 error = so_lock_read_intr(so, 1448 uiop->uio_fmode | ((flags & MSG_DONTWAIT) ? FNONBLOCK : 0)); 1449 mutex_exit(&so->so_lock); 1450 if (error) { 1451 SO_UNBLOCK_FALLBACK(so); 1452 return (error); 1453 } 1454 1455 suiop = sod_rcv_init(so, flags, &uiop); 1456 retry: 1457 saved_resid = uiop->uio_resid; 1458 error = so_dequeue_msg(so, &mctlp, uiop, &rval, flags); 1459 if (error != 0) { 1460 goto out; 1461 } 1462 /* 1463 * For datagrams the MOREDATA flag is used to set MSG_TRUNC. 1464 * For non-datagrams MOREDATA is used to set MSG_EOR. 1465 */ 1466 ASSERT(!(rval.r_val1 & MORECTL)); 1467 if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC)) 1468 msg->msg_flags |= MSG_TRUNC; 1469 if (mctlp == NULL) { 1470 dprintso(so, 1, ("so_recvmsg: got M_DATA\n")); 1471 1472 mutex_enter(&so->so_lock); 1473 /* Set MSG_EOR based on MOREDATA */ 1474 if (!(rval.r_val1 & MOREDATA)) { 1475 if (so->so_state & SS_SAVEDEOR) { 1476 msg->msg_flags |= MSG_EOR; 1477 so->so_state &= ~SS_SAVEDEOR; 1478 } 1479 } 1480 /* 1481 * If some data was received (i.e. not EOF) and the 1482 * read/recv* has not been satisfied wait for some more. 1483 */ 1484 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 1485 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 1486 mutex_exit(&so->so_lock); 1487 goto retry; 1488 } 1489 1490 goto out_locked; 1491 } 1492 /* strsock_proto has already verified length and alignment */ 1493 tpr = (union T_primitives *)mctlp->b_rptr; 1494 dprintso(so, 1, ("so_recvmsg: type %d\n", tpr->type)); 1495 switch (tpr->type) { 1496 case T_DATA_IND: { 1497 /* 1498 * Set msg_flags to MSG_EOR based on 1499 * MORE_flag and MOREDATA. 1500 */ 1501 mutex_enter(&so->so_lock); 1502 so->so_state &= ~SS_SAVEDEOR; 1503 if (!(tpr->data_ind.MORE_flag & 1)) { 1504 if (!(rval.r_val1 & MOREDATA)) 1505 msg->msg_flags |= MSG_EOR; 1506 else 1507 so->so_state |= SS_SAVEDEOR; 1508 } 1509 freemsg(mctlp); 1510 /* 1511 * If some data was received (i.e. not EOF) and the 1512 * read/recv* has not been satisfied wait for some more. 1513 */ 1514 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 1515 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 1516 mutex_exit(&so->so_lock); 1517 goto retry; 1518 } 1519 goto out_locked; 1520 } 1521 case T_UNITDATA_IND: { 1522 void *addr; 1523 t_uscalar_t addrlen; 1524 void *abuf; 1525 t_uscalar_t optlen; 1526 void *opt; 1527 1528 if (namelen != 0) { 1529 /* Caller wants source address */ 1530 addrlen = tpr->unitdata_ind.SRC_length; 1531 addr = sogetoff(mctlp, tpr->unitdata_ind.SRC_offset, 1532 addrlen, 1); 1533 if (addr == NULL) { 1534 freemsg(mctlp); 1535 error = EPROTO; 1536 eprintsoline(so, error); 1537 goto out; 1538 } 1539 ASSERT(so->so_family != AF_UNIX); 1540 } 1541 optlen = tpr->unitdata_ind.OPT_length; 1542 if (optlen != 0) { 1543 t_uscalar_t ncontrollen; 1544 1545 /* 1546 * Extract any source address option. 1547 * Determine how large cmsg buffer is needed. 1548 */ 1549 opt = sogetoff(mctlp, tpr->unitdata_ind.OPT_offset, 1550 optlen, __TPI_ALIGN_SIZE); 1551 1552 if (opt == NULL) { 1553 freemsg(mctlp); 1554 error = EPROTO; 1555 eprintsoline(so, error); 1556 goto out; 1557 } 1558 if (so->so_family == AF_UNIX) 1559 so_getopt_srcaddr(opt, optlen, &addr, &addrlen); 1560 ncontrollen = so_cmsglen(mctlp, opt, optlen, 1561 !(flags & MSG_XPG4_2)); 1562 if (controllen != 0) 1563 controllen = ncontrollen; 1564 else if (ncontrollen != 0) 1565 msg->msg_flags |= MSG_CTRUNC; 1566 } else { 1567 controllen = 0; 1568 } 1569 1570 if (namelen != 0) { 1571 /* 1572 * Return address to caller. 1573 * Caller handles truncation if length 1574 * exceeds msg_namelen. 1575 * NOTE: AF_UNIX NUL termination is ensured by 1576 * the sender's copyin_name(). 1577 */ 1578 abuf = kmem_alloc(addrlen, KM_SLEEP); 1579 1580 bcopy(addr, abuf, addrlen); 1581 msg->msg_name = abuf; 1582 msg->msg_namelen = addrlen; 1583 } 1584 1585 if (controllen != 0) { 1586 /* 1587 * Return control msg to caller. 1588 * Caller handles truncation if length 1589 * exceeds msg_controllen. 1590 */ 1591 control = kmem_zalloc(controllen, KM_SLEEP); 1592 1593 error = so_opt2cmsg(mctlp, opt, optlen, 1594 !(flags & MSG_XPG4_2), control, controllen); 1595 if (error) { 1596 freemsg(mctlp); 1597 if (msg->msg_namelen != 0) 1598 kmem_free(msg->msg_name, 1599 msg->msg_namelen); 1600 kmem_free(control, controllen); 1601 eprintsoline(so, error); 1602 goto out; 1603 } 1604 msg->msg_control = control; 1605 msg->msg_controllen = controllen; 1606 } 1607 1608 freemsg(mctlp); 1609 goto out; 1610 } 1611 case T_OPTDATA_IND: { 1612 struct T_optdata_req *tdr; 1613 void *opt; 1614 t_uscalar_t optlen; 1615 1616 tdr = (struct T_optdata_req *)mctlp->b_rptr; 1617 optlen = tdr->OPT_length; 1618 if (optlen != 0) { 1619 t_uscalar_t ncontrollen; 1620 /* 1621 * Determine how large cmsg buffer is needed. 1622 */ 1623 opt = sogetoff(mctlp, 1624 tpr->optdata_ind.OPT_offset, optlen, 1625 __TPI_ALIGN_SIZE); 1626 1627 if (opt == NULL) { 1628 freemsg(mctlp); 1629 error = EPROTO; 1630 eprintsoline(so, error); 1631 goto out; 1632 } 1633 1634 ncontrollen = so_cmsglen(mctlp, opt, optlen, 1635 !(flags & MSG_XPG4_2)); 1636 if (controllen != 0) 1637 controllen = ncontrollen; 1638 else if (ncontrollen != 0) 1639 msg->msg_flags |= MSG_CTRUNC; 1640 } else { 1641 controllen = 0; 1642 } 1643 1644 if (controllen != 0) { 1645 /* 1646 * Return control msg to caller. 1647 * Caller handles truncation if length 1648 * exceeds msg_controllen. 1649 */ 1650 control = kmem_zalloc(controllen, KM_SLEEP); 1651 1652 error = so_opt2cmsg(mctlp, opt, optlen, 1653 !(flags & MSG_XPG4_2), control, controllen); 1654 if (error) { 1655 freemsg(mctlp); 1656 kmem_free(control, controllen); 1657 eprintsoline(so, error); 1658 goto out; 1659 } 1660 msg->msg_control = control; 1661 msg->msg_controllen = controllen; 1662 } 1663 1664 /* 1665 * Set msg_flags to MSG_EOR based on 1666 * DATA_flag and MOREDATA. 1667 */ 1668 mutex_enter(&so->so_lock); 1669 so->so_state &= ~SS_SAVEDEOR; 1670 if (!(tpr->data_ind.MORE_flag & 1)) { 1671 if (!(rval.r_val1 & MOREDATA)) 1672 msg->msg_flags |= MSG_EOR; 1673 else 1674 so->so_state |= SS_SAVEDEOR; 1675 } 1676 freemsg(mctlp); 1677 /* 1678 * If some data was received (i.e. not EOF) and the 1679 * read/recv* has not been satisfied wait for some more. 1680 * Not possible to wait if control info was received. 1681 */ 1682 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 1683 controllen == 0 && 1684 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 1685 mutex_exit(&so->so_lock); 1686 goto retry; 1687 } 1688 goto out_locked; 1689 } 1690 default: 1691 cmn_err(CE_CONT, "so_recvmsg bad type %x \n", 1692 tpr->type); 1693 freemsg(mctlp); 1694 error = EPROTO; 1695 ASSERT(0); 1696 } 1697 out: 1698 mutex_enter(&so->so_lock); 1699 out_locked: 1700 /* The sod_lockp pointers to the sonode so_lock */ 1701 ret = sod_rcv_done(so, suiop, uiop); 1702 if (ret != 0 && error == 0) 1703 error = ret; 1704 1705 so_unlock_read(so); /* Clear SOREADLOCKED */ 1706 mutex_exit(&so->so_lock); 1707 1708 SO_UNBLOCK_FALLBACK(so); 1709 1710 return (error); 1711 } 1712 1713 sonodeops_t so_sonodeops = { 1714 so_init, /* sop_init */ 1715 so_accept, /* sop_accept */ 1716 so_bind, /* sop_bind */ 1717 so_listen, /* sop_listen */ 1718 so_connect, /* sop_connect */ 1719 so_recvmsg, /* sop_recvmsg */ 1720 so_sendmsg, /* sop_sendmsg */ 1721 so_sendmblk, /* sop_sendmblk */ 1722 so_getpeername, /* sop_getpeername */ 1723 so_getsockname, /* sop_getsockname */ 1724 so_shutdown, /* sop_shutdown */ 1725 so_getsockopt, /* sop_getsockopt */ 1726 so_setsockopt, /* sop_setsockopt */ 1727 so_ioctl, /* sop_ioctl */ 1728 so_poll, /* sop_poll */ 1729 so_close, /* sop_close */ 1730 }; 1731 1732 sock_upcalls_t so_upcalls = { 1733 so_newconn, 1734 so_connected, 1735 so_disconnected, 1736 so_opctl, 1737 so_queue_msg, 1738 so_set_prop, 1739 so_txq_full, 1740 so_signal_oob, 1741 so_zcopy_notify, 1742 so_set_error 1743 }; 1744