1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #include <sys/types.h> 28 #include <sys/param.h> 29 #include <sys/systm.h> 30 #include <sys/sysmacros.h> 31 #include <sys/debug.h> 32 #include <sys/cmn_err.h> 33 34 #include <sys/stropts.h> 35 #include <sys/socket.h> 36 #include <sys/socketvar.h> 37 38 #define _SUN_TPI_VERSION 2 39 #include <sys/tihdr.h> 40 #include <sys/sockio.h> 41 #include <sys/kmem_impl.h> 42 43 #include <sys/strsubr.h> 44 #include <sys/strsun.h> 45 #include <sys/ddi.h> 46 #include <netinet/in.h> 47 #include <inet/ip.h> 48 49 #include <fs/sockfs/sockcommon.h> 50 51 #include <sys/socket_proto.h> 52 53 #include <fs/sockfs/socktpi_impl.h> 54 #include <fs/sockfs/sodirect.h> 55 #include <sys/tihdr.h> 56 #include <fs/sockfs/nl7c.h> 57 #include <inet/kssl/ksslapi.h> 58 59 60 extern int xnet_skip_checks; 61 extern int xnet_check_print; 62 63 static void so_queue_oob(sock_upper_handle_t, mblk_t *, size_t); 64 65 66 /*ARGSUSED*/ 67 int 68 so_accept_notsupp(struct sonode *lso, int fflag, 69 struct cred *cr, struct sonode **nsop) 70 { 71 return (EOPNOTSUPP); 72 } 73 74 /*ARGSUSED*/ 75 int 76 so_listen_notsupp(struct sonode *so, int backlog, struct cred *cr) 77 { 78 return (EOPNOTSUPP); 79 } 80 81 /*ARGSUSED*/ 82 int 83 so_getsockname_notsupp(struct sonode *so, struct sockaddr *sa, 84 socklen_t *len, struct cred *cr) 85 { 86 return (EOPNOTSUPP); 87 } 88 89 /*ARGSUSED*/ 90 int 91 so_getpeername_notsupp(struct sonode *so, struct sockaddr *addr, 92 socklen_t *addrlen, boolean_t accept, struct cred *cr) 93 { 94 return (EOPNOTSUPP); 95 } 96 97 /*ARGSUSED*/ 98 int 99 so_shutdown_notsupp(struct sonode *so, int how, struct cred *cr) 100 { 101 return (EOPNOTSUPP); 102 } 103 104 /*ARGSUSED*/ 105 int 106 so_sendmblk_notsupp(struct sonode *so, struct msghdr *msg, int fflag, 107 struct cred *cr, mblk_t **mpp) 108 { 109 return (EOPNOTSUPP); 110 } 111 112 /* 113 * Generic Socket Ops 114 */ 115 116 /* ARGSUSED */ 117 int 118 so_init(struct sonode *so, struct sonode *pso, struct cred *cr, int flags) 119 { 120 return (socket_init_common(so, pso, flags, cr)); 121 } 122 123 int 124 so_bind(struct sonode *so, struct sockaddr *name, socklen_t namelen, 125 int flags, struct cred *cr) 126 { 127 int error; 128 129 SO_BLOCK_FALLBACK(so, SOP_BIND(so, name, namelen, flags, cr)); 130 131 ASSERT(flags == _SOBIND_XPG4_2 || flags == _SOBIND_SOCKBSD); 132 133 /* X/Open requires this check */ 134 if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 135 if (xnet_check_print) { 136 printf("sockfs: X/Open bind state check " 137 "caused EINVAL\n"); 138 } 139 error = EINVAL; 140 goto done; 141 } 142 143 /* 144 * a bind to a NULL address is interpreted as unbind. So just 145 * do the downcall. 146 */ 147 if (name == NULL) 148 goto dobind; 149 150 switch (so->so_family) { 151 case AF_INET: 152 if ((size_t)namelen != sizeof (sin_t)) { 153 error = name->sa_family != so->so_family ? 154 EAFNOSUPPORT : EINVAL; 155 eprintsoline(so, error); 156 goto done; 157 } 158 159 if ((flags & _SOBIND_XPG4_2) && 160 (name->sa_family != so->so_family)) { 161 /* 162 * This check has to be made for X/Open 163 * sockets however application failures have 164 * been observed when it is applied to 165 * all sockets. 166 */ 167 error = EAFNOSUPPORT; 168 eprintsoline(so, error); 169 goto done; 170 } 171 /* 172 * Force a zero sa_family to match so_family. 173 * 174 * Some programs like inetd(1M) don't set the 175 * family field. Other programs leave 176 * sin_family set to garbage - SunOS 4.X does 177 * not check the family field on a bind. 178 * We use the family field that 179 * was passed in to the socket() call. 180 */ 181 name->sa_family = so->so_family; 182 break; 183 184 case AF_INET6: { 185 #ifdef DEBUG 186 sin6_t *sin6 = (sin6_t *)name; 187 #endif 188 if ((size_t)namelen != sizeof (sin6_t)) { 189 error = name->sa_family != so->so_family ? 190 EAFNOSUPPORT : EINVAL; 191 eprintsoline(so, error); 192 goto done; 193 } 194 195 if (name->sa_family != so->so_family) { 196 /* 197 * With IPv6 we require the family to match 198 * unlike in IPv4. 199 */ 200 error = EAFNOSUPPORT; 201 eprintsoline(so, error); 202 goto done; 203 } 204 #ifdef DEBUG 205 /* 206 * Verify that apps don't forget to clear 207 * sin6_scope_id etc 208 */ 209 if (sin6->sin6_scope_id != 0 && 210 !IN6_IS_ADDR_LINKSCOPE(&sin6->sin6_addr)) { 211 zcmn_err(getzoneid(), CE_WARN, 212 "bind with uninitialized sin6_scope_id " 213 "(%d) on socket. Pid = %d\n", 214 (int)sin6->sin6_scope_id, 215 (int)curproc->p_pid); 216 } 217 if (sin6->__sin6_src_id != 0) { 218 zcmn_err(getzoneid(), CE_WARN, 219 "bind with uninitialized __sin6_src_id " 220 "(%d) on socket. Pid = %d\n", 221 (int)sin6->__sin6_src_id, 222 (int)curproc->p_pid); 223 } 224 #endif /* DEBUG */ 225 226 break; 227 } 228 default: 229 /* Just pass the request to the protocol */ 230 goto dobind; 231 } 232 233 /* 234 * First we check if either NCA or KSSL has been enabled for 235 * the requested address, and if so, we fall back to TPI. 236 * If neither of those two services are enabled, then we just 237 * pass the request to the protocol. 238 * 239 * Note that KSSL can only be enabled on a socket if NCA is NOT 240 * enabled for that socket, hence the else-statement below. 241 */ 242 if (nl7c_enabled && ((so->so_family == AF_INET || 243 so->so_family == AF_INET6) && 244 nl7c_lookup_addr(name, namelen) != NULL)) { 245 /* 246 * NL7C is not supported in non-global zones, 247 * we enforce this restriction here. 248 */ 249 if (so->so_zoneid == GLOBAL_ZONEID) { 250 /* NCA should be used, so fall back to TPI */ 251 error = so_tpi_fallback(so, cr); 252 SO_UNBLOCK_FALLBACK(so); 253 if (error) 254 return (error); 255 else 256 return (SOP_BIND(so, name, namelen, flags, cr)); 257 } 258 } else if (so->so_type == SOCK_STREAM) { 259 /* Check if KSSL has been configured for this address */ 260 kssl_ent_t ent; 261 kssl_endpt_type_t type; 262 struct T_bind_req bind_req; 263 mblk_t *mp; 264 265 /* 266 * TODO: Check with KSSL team if we could add a function call 267 * that only queries whether KSSL is enabled for the given 268 * address. 269 */ 270 bind_req.PRIM_type = T_BIND_REQ; 271 bind_req.ADDR_length = namelen; 272 bind_req.ADDR_offset = (t_scalar_t)sizeof (bind_req); 273 mp = soallocproto2(&bind_req, sizeof (bind_req), 274 name, namelen, 0, _ALLOC_SLEEP, cr); 275 276 type = kssl_check_proxy(mp, so, &ent); 277 freemsg(mp); 278 279 if (type != KSSL_NO_PROXY) { 280 /* 281 * KSSL has been configured for this address, so 282 * we must fall back to TPI. 283 */ 284 kssl_release_ent(ent, so, type); 285 error = so_tpi_fallback(so, cr); 286 SO_UNBLOCK_FALLBACK(so); 287 if (error) 288 return (error); 289 else 290 return (SOP_BIND(so, name, namelen, flags, cr)); 291 } 292 } 293 294 dobind: 295 error = (*so->so_downcalls->sd_bind) 296 (so->so_proto_handle, name, namelen, cr); 297 done: 298 SO_UNBLOCK_FALLBACK(so); 299 300 return (error); 301 } 302 303 int 304 so_listen(struct sonode *so, int backlog, struct cred *cr) 305 { 306 int error = 0; 307 308 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 309 SO_BLOCK_FALLBACK(so, SOP_LISTEN(so, backlog, cr)); 310 311 error = (*so->so_downcalls->sd_listen)(so->so_proto_handle, backlog, 312 cr); 313 314 SO_UNBLOCK_FALLBACK(so); 315 316 return (error); 317 } 318 319 320 int 321 so_connect(struct sonode *so, const struct sockaddr *name, 322 socklen_t namelen, int fflag, int flags, struct cred *cr) 323 { 324 int error = 0; 325 sock_connid_t id; 326 327 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 328 SO_BLOCK_FALLBACK(so, SOP_CONNECT(so, name, namelen, fflag, flags, cr)); 329 330 /* 331 * If there is a pending error, return error 332 * This can happen if a non blocking operation caused an error. 333 */ 334 335 if (so->so_error != 0) { 336 mutex_enter(&so->so_lock); 337 error = sogeterr(so, B_TRUE); 338 mutex_exit(&so->so_lock); 339 if (error != 0) 340 goto done; 341 } 342 343 error = (*so->so_downcalls->sd_connect)(so->so_proto_handle, 344 name, namelen, &id, cr); 345 346 if (error == EINPROGRESS) 347 error = so_wait_connected(so, fflag & (FNONBLOCK|FNDELAY), id); 348 349 done: 350 SO_UNBLOCK_FALLBACK(so); 351 return (error); 352 } 353 354 /*ARGSUSED*/ 355 int 356 so_accept(struct sonode *so, int fflag, struct cred *cr, struct sonode **nsop) 357 { 358 int error = 0; 359 struct sonode *nso; 360 361 *nsop = NULL; 362 363 SO_BLOCK_FALLBACK(so, SOP_ACCEPT(so, fflag, cr, nsop)); 364 if ((so->so_state & SS_ACCEPTCONN) == 0) { 365 SO_UNBLOCK_FALLBACK(so); 366 return ((so->so_type == SOCK_DGRAM || so->so_type == SOCK_RAW) ? 367 EOPNOTSUPP : EINVAL); 368 } 369 370 if ((error = so_acceptq_dequeue(so, (fflag & (FNONBLOCK|FNDELAY)), 371 &nso)) == 0) { 372 ASSERT(nso != NULL); 373 374 /* finish the accept */ 375 error = (*so->so_downcalls->sd_accept)(so->so_proto_handle, 376 nso->so_proto_handle, (sock_upper_handle_t)nso, cr); 377 if (error != 0) { 378 (void) socket_close(nso, 0, cr); 379 socket_destroy(nso); 380 } else { 381 *nsop = nso; 382 } 383 } 384 385 SO_UNBLOCK_FALLBACK(so); 386 return (error); 387 } 388 389 int 390 so_sendmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 391 struct cred *cr) 392 { 393 int error, flags; 394 boolean_t dontblock; 395 ssize_t orig_resid; 396 mblk_t *mp; 397 398 SO_BLOCK_FALLBACK(so, SOP_SENDMSG(so, msg, uiop, cr)); 399 400 flags = msg->msg_flags; 401 error = 0; 402 dontblock = (flags & MSG_DONTWAIT) || 403 (uiop->uio_fmode & (FNONBLOCK|FNDELAY)); 404 405 if (!(flags & MSG_XPG4_2) && msg->msg_controllen != 0) { 406 /* 407 * Old way of passing fd's is not supported 408 */ 409 SO_UNBLOCK_FALLBACK(so); 410 return (EOPNOTSUPP); 411 } 412 413 if ((so->so_mode & SM_ATOMIC) && 414 uiop->uio_resid > so->so_proto_props.sopp_maxpsz && 415 so->so_proto_props.sopp_maxpsz != -1) { 416 SO_UNBLOCK_FALLBACK(so); 417 return (EMSGSIZE); 418 } 419 420 /* 421 * For atomic sends we will only do one iteration. 422 */ 423 do { 424 if (so->so_state & SS_CANTSENDMORE) { 425 error = EPIPE; 426 break; 427 } 428 429 if (so->so_error != 0) { 430 mutex_enter(&so->so_lock); 431 error = sogeterr(so, B_TRUE); 432 mutex_exit(&so->so_lock); 433 if (error != 0) 434 break; 435 } 436 437 /* 438 * Send down OOB messages even if the send path is being 439 * flow controlled (assuming the protocol supports OOB data). 440 */ 441 if (flags & MSG_OOB) { 442 if ((so->so_mode & SM_EXDATA) == 0) { 443 error = EOPNOTSUPP; 444 break; 445 } 446 } else if (so->so_snd_qfull) { 447 /* 448 * Need to wait until the protocol is ready to receive 449 * more data for transmission. 450 */ 451 if ((error = so_snd_wait_qnotfull(so, dontblock)) != 0) 452 break; 453 } 454 455 /* 456 * Time to send data to the protocol. We either copy the 457 * data into mblks or pass the uio directly to the protocol. 458 * We decide what to do based on the available down calls. 459 */ 460 if (so->so_downcalls->sd_send_uio != NULL) { 461 error = (*so->so_downcalls->sd_send_uio) 462 (so->so_proto_handle, uiop, msg, cr); 463 if (error != 0) 464 break; 465 } else { 466 /* save the resid in case of failure */ 467 orig_resid = uiop->uio_resid; 468 469 if ((mp = socopyinuio(uiop, 470 so->so_proto_props.sopp_maxpsz, 471 so->so_proto_props.sopp_wroff, 472 so->so_proto_props.sopp_maxblk, 473 so->so_proto_props.sopp_tail, &error)) == NULL) { 474 break; 475 } 476 ASSERT(uiop->uio_resid >= 0); 477 478 error = (*so->so_downcalls->sd_send) 479 (so->so_proto_handle, mp, msg, cr); 480 if (error != 0) { 481 /* 482 * The send failed. We do not have to free the 483 * mblks, because that is the protocol's 484 * responsibility. However, uio_resid must 485 * remain accurate, so adjust that here. 486 */ 487 uiop->uio_resid = orig_resid; 488 break; 489 } 490 } 491 } while (uiop->uio_resid > 0); 492 493 SO_UNBLOCK_FALLBACK(so); 494 495 return (error); 496 } 497 498 int 499 so_sendmblk(struct sonode *so, struct nmsghdr *msg, int fflag, 500 struct cred *cr, mblk_t **mpp) 501 { 502 int error; 503 boolean_t dontblock; 504 size_t size; 505 mblk_t *mp = *mpp; 506 507 SO_BLOCK_FALLBACK(so, SOP_SENDMBLK(so, msg, fflag, cr, mpp)); 508 509 error = 0; 510 dontblock = (msg->msg_flags & MSG_DONTWAIT) || 511 (fflag & (FNONBLOCK|FNDELAY)); 512 size = msgdsize(mp); 513 514 if ((so->so_mode & SM_SENDFILESUPP) == 0 || 515 so->so_downcalls->sd_send == NULL) { 516 SO_UNBLOCK_FALLBACK(so); 517 return (EOPNOTSUPP); 518 } 519 520 if ((so->so_mode & SM_ATOMIC) && 521 size > so->so_proto_props.sopp_maxpsz && 522 so->so_proto_props.sopp_maxpsz != -1) { 523 SO_UNBLOCK_FALLBACK(so); 524 return (EMSGSIZE); 525 } 526 527 while (mp != NULL) { 528 mblk_t *nmp, *last_mblk; 529 size_t mlen; 530 531 if (so->so_state & SS_CANTSENDMORE) { 532 error = EPIPE; 533 break; 534 } 535 if (so->so_error != 0) { 536 mutex_enter(&so->so_lock); 537 error = sogeterr(so, B_TRUE); 538 mutex_exit(&so->so_lock); 539 if (error != 0) 540 break; 541 } 542 if (so->so_snd_qfull) { 543 /* 544 * Need to wait until the protocol is ready to receive 545 * more data for transmission. 546 */ 547 if ((error = so_snd_wait_qnotfull(so, dontblock)) != 0) 548 break; 549 } 550 551 /* 552 * We only allow so_maxpsz of data to be sent down to 553 * the protocol at time. 554 */ 555 mlen = MBLKL(mp); 556 nmp = mp->b_cont; 557 last_mblk = mp; 558 while (nmp != NULL) { 559 mlen += MBLKL(nmp); 560 if (mlen > so->so_proto_props.sopp_maxpsz) { 561 last_mblk->b_cont = NULL; 562 break; 563 } 564 last_mblk = nmp; 565 nmp = nmp->b_cont; 566 } 567 568 error = (*so->so_downcalls->sd_send) 569 (so->so_proto_handle, mp, msg, cr); 570 if (error != 0) { 571 /* 572 * The send failed. The protocol will free the mblks 573 * that were sent down. Let the caller deal with the 574 * rest. 575 */ 576 *mpp = nmp; 577 break; 578 } 579 580 *mpp = mp = nmp; 581 } 582 583 SO_UNBLOCK_FALLBACK(so); 584 585 return (error); 586 } 587 588 int 589 so_shutdown(struct sonode *so, int how, struct cred *cr) 590 { 591 int error; 592 593 SO_BLOCK_FALLBACK(so, SOP_SHUTDOWN(so, how, cr)); 594 595 /* 596 * SunOS 4.X has no check for datagram sockets. 597 * 5.X checks that it is connected (ENOTCONN) 598 * X/Open requires that we check the connected state. 599 */ 600 if (!(so->so_state & SS_ISCONNECTED)) { 601 if (!xnet_skip_checks) { 602 error = ENOTCONN; 603 if (xnet_check_print) { 604 printf("sockfs: X/Open shutdown check " 605 "caused ENOTCONN\n"); 606 } 607 } 608 goto done; 609 } 610 611 error = ((*so->so_downcalls->sd_shutdown)(so->so_proto_handle, 612 how, cr)); 613 614 /* 615 * Protocol agreed to shutdown. We need to flush the 616 * receive buffer if the receive side is being shutdown. 617 */ 618 if (error == 0 && how != SHUT_WR) { 619 mutex_enter(&so->so_lock); 620 /* wait for active reader to finish */ 621 (void) so_lock_read(so, 0); 622 623 so_rcv_flush(so); 624 625 so_unlock_read(so); 626 mutex_exit(&so->so_lock); 627 } 628 629 done: 630 SO_UNBLOCK_FALLBACK(so); 631 return (error); 632 } 633 634 int 635 so_getsockname(struct sonode *so, struct sockaddr *addr, 636 socklen_t *addrlen, struct cred *cr) 637 { 638 int error; 639 640 SO_BLOCK_FALLBACK(so, SOP_GETSOCKNAME(so, addr, addrlen, cr)); 641 642 error = (*so->so_downcalls->sd_getsockname) 643 (so->so_proto_handle, addr, addrlen, cr); 644 645 SO_UNBLOCK_FALLBACK(so); 646 return (error); 647 } 648 649 int 650 so_getpeername(struct sonode *so, struct sockaddr *addr, 651 socklen_t *addrlen, boolean_t accept, struct cred *cr) 652 { 653 int error; 654 655 SO_BLOCK_FALLBACK(so, SOP_GETPEERNAME(so, addr, addrlen, accept, cr)); 656 657 if (accept) { 658 error = (*so->so_downcalls->sd_getpeername) 659 (so->so_proto_handle, addr, addrlen, cr); 660 } else if (!(so->so_state & SS_ISCONNECTED)) { 661 error = ENOTCONN; 662 } else if ((so->so_state & SS_CANTSENDMORE) && !xnet_skip_checks) { 663 /* Added this check for X/Open */ 664 error = EINVAL; 665 if (xnet_check_print) { 666 printf("sockfs: X/Open getpeername check => EINVAL\n"); 667 } 668 } else { 669 error = (*so->so_downcalls->sd_getpeername) 670 (so->so_proto_handle, addr, addrlen, cr); 671 } 672 673 SO_UNBLOCK_FALLBACK(so); 674 return (error); 675 } 676 677 int 678 so_getsockopt(struct sonode *so, int level, int option_name, 679 void *optval, socklen_t *optlenp, int flags, struct cred *cr) 680 { 681 int error = 0; 682 683 ASSERT(MUTEX_NOT_HELD(&so->so_lock)); 684 SO_BLOCK_FALLBACK(so, 685 SOP_GETSOCKOPT(so, level, option_name, optval, optlenp, flags, cr)); 686 687 error = socket_getopt_common(so, level, option_name, optval, optlenp, 688 flags); 689 if (error < 0) { 690 error = (*so->so_downcalls->sd_getsockopt) 691 (so->so_proto_handle, level, option_name, optval, optlenp, 692 cr); 693 if (error == ENOPROTOOPT) { 694 if (level == SOL_SOCKET) { 695 /* 696 * If a protocol does not support a particular 697 * socket option, set can fail (not allowed) 698 * but get can not fail. This is the previous 699 * sockfs bahvior. 700 */ 701 switch (option_name) { 702 case SO_LINGER: 703 if (*optlenp < (t_uscalar_t) 704 sizeof (struct linger)) { 705 error = EINVAL; 706 break; 707 } 708 error = 0; 709 bzero(optval, sizeof (struct linger)); 710 *optlenp = sizeof (struct linger); 711 break; 712 case SO_RCVTIMEO: 713 case SO_SNDTIMEO: 714 if (*optlenp < (t_uscalar_t) 715 sizeof (struct timeval)) { 716 error = EINVAL; 717 break; 718 } 719 error = 0; 720 bzero(optval, sizeof (struct timeval)); 721 *optlenp = sizeof (struct timeval); 722 break; 723 case SO_SND_BUFINFO: 724 if (*optlenp < (t_uscalar_t) 725 sizeof (struct so_snd_bufinfo)) { 726 error = EINVAL; 727 break; 728 } 729 error = 0; 730 bzero(optval, 731 sizeof (struct so_snd_bufinfo)); 732 *optlenp = 733 sizeof (struct so_snd_bufinfo); 734 break; 735 case SO_DEBUG: 736 case SO_REUSEADDR: 737 case SO_KEEPALIVE: 738 case SO_DONTROUTE: 739 case SO_BROADCAST: 740 case SO_USELOOPBACK: 741 case SO_OOBINLINE: 742 case SO_DGRAM_ERRIND: 743 case SO_SNDBUF: 744 case SO_RCVBUF: 745 error = 0; 746 *((int32_t *)optval) = 0; 747 *optlenp = sizeof (int32_t); 748 break; 749 default: 750 break; 751 } 752 } 753 } 754 } 755 756 SO_UNBLOCK_FALLBACK(so); 757 return (error); 758 } 759 760 int 761 so_setsockopt(struct sonode *so, int level, int option_name, 762 const void *optval, socklen_t optlen, struct cred *cr) 763 { 764 int error = 0; 765 struct timeval tl; 766 const void *opt = optval; 767 768 SO_BLOCK_FALLBACK(so, 769 SOP_SETSOCKOPT(so, level, option_name, optval, optlen, cr)); 770 771 /* X/Open requires this check */ 772 if (so->so_state & SS_CANTSENDMORE && !xnet_skip_checks) { 773 SO_UNBLOCK_FALLBACK(so); 774 if (xnet_check_print) 775 printf("sockfs: X/Open setsockopt check => EINVAL\n"); 776 return (EINVAL); 777 } 778 779 if (level == SOL_SOCKET) { 780 switch (option_name) { 781 case SO_RCVTIMEO: 782 case SO_SNDTIMEO: { 783 /* 784 * We pass down these two options to protocol in order 785 * to support some third part protocols which need to 786 * know them. For those protocols which don't care 787 * these two options, simply return 0. 788 */ 789 clock_t t_usec; 790 791 if (get_udatamodel() == DATAMODEL_NONE || 792 get_udatamodel() == DATAMODEL_NATIVE) { 793 if (optlen != sizeof (struct timeval)) { 794 error = EINVAL; 795 goto done; 796 } 797 bcopy((struct timeval *)optval, &tl, 798 sizeof (struct timeval)); 799 } else { 800 if (optlen != sizeof (struct timeval32)) { 801 error = EINVAL; 802 goto done; 803 } 804 TIMEVAL32_TO_TIMEVAL(&tl, 805 (struct timeval32 *)optval); 806 } 807 opt = &tl; 808 optlen = sizeof (tl); 809 t_usec = tl.tv_sec * 1000 * 1000 + tl.tv_usec; 810 mutex_enter(&so->so_lock); 811 if (option_name == SO_RCVTIMEO) 812 so->so_rcvtimeo = drv_usectohz(t_usec); 813 else 814 so->so_sndtimeo = drv_usectohz(t_usec); 815 mutex_exit(&so->so_lock); 816 break; 817 } 818 case SO_RCVBUF: 819 /* 820 * XXX XPG 4.2 applications retrieve SO_RCVBUF from 821 * sockfs since the transport might adjust the value 822 * and not return exactly what was set by the 823 * application. 824 */ 825 so->so_xpg_rcvbuf = *(int32_t *)optval; 826 break; 827 } 828 } 829 error = (*so->so_downcalls->sd_setsockopt) 830 (so->so_proto_handle, level, option_name, opt, optlen, cr); 831 done: 832 SO_UNBLOCK_FALLBACK(so); 833 return (error); 834 } 835 836 int 837 so_ioctl(struct sonode *so, int cmd, intptr_t arg, int mode, 838 struct cred *cr, int32_t *rvalp) 839 { 840 int error = 0; 841 842 SO_BLOCK_FALLBACK(so, SOP_IOCTL(so, cmd, arg, mode, cr, rvalp)); 843 844 /* 845 * If there is a pending error, return error 846 * This can happen if a non blocking operation caused an error. 847 */ 848 if (so->so_error != 0) { 849 mutex_enter(&so->so_lock); 850 error = sogeterr(so, B_TRUE); 851 mutex_exit(&so->so_lock); 852 if (error != 0) 853 goto done; 854 } 855 856 /* 857 * calling strioc can result in the socket falling back to TPI, 858 * if that is supported. 859 */ 860 if ((error = socket_ioctl_common(so, cmd, arg, mode, cr, rvalp)) < 0 && 861 (error = socket_strioc_common(so, cmd, arg, mode, cr, rvalp)) < 0) { 862 error = (*so->so_downcalls->sd_ioctl)(so->so_proto_handle, 863 cmd, arg, mode, rvalp, cr); 864 } 865 866 done: 867 SO_UNBLOCK_FALLBACK(so); 868 869 return (error); 870 } 871 872 int 873 so_poll(struct sonode *so, short events, int anyyet, short *reventsp, 874 struct pollhead **phpp) 875 { 876 int state = so->so_state; 877 *reventsp = 0; 878 879 /* 880 * In sockets the errors are represented as input/output events 881 */ 882 if (so->so_error != 0 && 883 ((POLLIN|POLLRDNORM|POLLOUT) & events) != 0) { 884 *reventsp = (POLLIN|POLLRDNORM|POLLOUT) & events; 885 return (0); 886 } 887 888 /* 889 * If the socket is in a state where it can send data 890 * turn on POLLWRBAND and POLLOUT events. 891 */ 892 if ((so->so_mode & SM_CONNREQUIRED) == 0 || (state & SS_ISCONNECTED)) { 893 /* 894 * out of band data is allowed even if the connection 895 * is flow controlled 896 */ 897 *reventsp |= POLLWRBAND & events; 898 if (!so->so_snd_qfull) { 899 /* 900 * As long as there is buffer to send data 901 * turn on POLLOUT events 902 */ 903 *reventsp |= POLLOUT & events; 904 } 905 } 906 907 /* 908 * Turn on POLLIN whenever there is data on the receive queue, 909 * or the socket is in a state where no more data will be received. 910 * Also, if the socket is accepting connections, flip the bit if 911 * there is something on the queue. 912 * 913 * We do an initial check for events without holding locks. However, 914 * if there are no event available, then we redo the check for POLLIN 915 * events under the lock. 916 */ 917 918 /* Pending connections */ 919 if (so->so_acceptq_len > 0) 920 *reventsp |= (POLLIN|POLLRDNORM) & events; 921 922 /* Data */ 923 /* so_downcalls is null for sctp */ 924 if (so->so_downcalls != NULL && so->so_downcalls->sd_poll != NULL) { 925 *reventsp |= (*so->so_downcalls->sd_poll) 926 (so->so_proto_handle, events & SO_PROTO_POLLEV, anyyet, 927 CRED()) & events; 928 ASSERT((*reventsp & ~events) == 0); 929 /* do not recheck events */ 930 events &= ~SO_PROTO_POLLEV; 931 } else { 932 if (SO_HAVE_DATA(so)) 933 *reventsp |= (POLLIN|POLLRDNORM) & events; 934 935 /* Urgent data */ 936 if ((state & SS_OOBPEND) != 0) { 937 *reventsp |= (POLLRDBAND | POLLPRI) & events; 938 } 939 } 940 941 if (!*reventsp && !anyyet) { 942 /* Check for read events again, but this time under lock */ 943 if (events & (POLLIN|POLLRDNORM)) { 944 mutex_enter(&so->so_lock); 945 if (SO_HAVE_DATA(so) || so->so_acceptq_len > 0) { 946 mutex_exit(&so->so_lock); 947 *reventsp |= (POLLIN|POLLRDNORM) & events; 948 return (0); 949 } else { 950 so->so_pollev |= SO_POLLEV_IN; 951 mutex_exit(&so->so_lock); 952 } 953 } 954 *phpp = &so->so_poll_list; 955 } 956 return (0); 957 } 958 959 /* 960 * Generic Upcalls 961 */ 962 void 963 so_connected(sock_upper_handle_t sock_handle, sock_connid_t id, 964 cred_t *peer_cred, pid_t peer_cpid) 965 { 966 struct sonode *so = (struct sonode *)sock_handle; 967 968 mutex_enter(&so->so_lock); 969 ASSERT(so->so_proto_handle != NULL); 970 971 if (peer_cred != NULL) { 972 if (so->so_peercred != NULL) 973 crfree(so->so_peercred); 974 crhold(peer_cred); 975 so->so_peercred = peer_cred; 976 so->so_cpid = peer_cpid; 977 } 978 979 so->so_proto_connid = id; 980 soisconnected(so); 981 /* 982 * Wake ones who're waiting for conn to become established. 983 */ 984 so_notify_connected(so); 985 } 986 987 int 988 so_disconnected(sock_upper_handle_t sock_handle, sock_connid_t id, int error) 989 { 990 struct sonode *so = (struct sonode *)sock_handle; 991 992 mutex_enter(&so->so_lock); 993 994 so->so_proto_connid = id; 995 soisdisconnected(so, error); 996 so_notify_disconnected(so, error); 997 998 return (0); 999 } 1000 1001 void 1002 so_opctl(sock_upper_handle_t sock_handle, sock_opctl_action_t action, 1003 uintptr_t arg) 1004 { 1005 struct sonode *so = (struct sonode *)sock_handle; 1006 1007 switch (action) { 1008 case SOCK_OPCTL_SHUT_SEND: 1009 mutex_enter(&so->so_lock); 1010 socantsendmore(so); 1011 so_notify_disconnecting(so); 1012 break; 1013 case SOCK_OPCTL_SHUT_RECV: { 1014 mutex_enter(&so->so_lock); 1015 socantrcvmore(so); 1016 so_notify_eof(so); 1017 break; 1018 } 1019 case SOCK_OPCTL_ENAB_ACCEPT: 1020 mutex_enter(&so->so_lock); 1021 so->so_state |= SS_ACCEPTCONN; 1022 so->so_backlog = (unsigned int)arg; 1023 mutex_exit(&so->so_lock); 1024 break; 1025 default: 1026 ASSERT(0); 1027 break; 1028 } 1029 } 1030 1031 void 1032 so_txq_full(sock_upper_handle_t sock_handle, boolean_t qfull) 1033 { 1034 struct sonode *so = (struct sonode *)sock_handle; 1035 1036 if (qfull) { 1037 so_snd_qfull(so); 1038 } else { 1039 so_snd_qnotfull(so); 1040 mutex_enter(&so->so_lock); 1041 so_notify_writable(so); 1042 } 1043 } 1044 1045 sock_upper_handle_t 1046 so_newconn(sock_upper_handle_t parenthandle, 1047 sock_lower_handle_t proto_handle, sock_downcalls_t *sock_downcalls, 1048 struct cred *peer_cred, pid_t peer_cpid, sock_upcalls_t **sock_upcallsp) 1049 { 1050 struct sonode *so = (struct sonode *)parenthandle; 1051 struct sonode *nso; 1052 int error; 1053 1054 ASSERT(proto_handle != NULL); 1055 1056 if ((so->so_state & SS_ACCEPTCONN) == 0 || 1057 so->so_acceptq_len >= so->so_backlog) 1058 return (NULL); 1059 1060 nso = socket_newconn(so, proto_handle, sock_downcalls, SOCKET_NOSLEEP, 1061 &error); 1062 if (nso == NULL) 1063 return (NULL); 1064 1065 if (peer_cred != NULL) { 1066 crhold(peer_cred); 1067 nso->so_peercred = peer_cred; 1068 nso->so_cpid = peer_cpid; 1069 } 1070 1071 /* 1072 * The new socket (nso), proto_handle and sock_upcallsp are all 1073 * valid at this point. But as soon as nso is placed in the accept 1074 * queue that can no longer be assumed (since an accept() thread may 1075 * pull it off the queue and close the socket). 1076 */ 1077 *sock_upcallsp = &so_upcalls; 1078 1079 (void) so_acceptq_enqueue(so, nso); 1080 1081 mutex_enter(&so->so_lock); 1082 so_notify_newconn(so); 1083 1084 return ((sock_upper_handle_t)nso); 1085 } 1086 1087 void 1088 so_set_prop(sock_upper_handle_t sock_handle, struct sock_proto_props *soppp) 1089 { 1090 struct sonode *so; 1091 1092 so = (struct sonode *)sock_handle; 1093 1094 mutex_enter(&so->so_lock); 1095 1096 if (soppp->sopp_flags & SOCKOPT_MAXBLK) 1097 so->so_proto_props.sopp_maxblk = soppp->sopp_maxblk; 1098 if (soppp->sopp_flags & SOCKOPT_WROFF) 1099 so->so_proto_props.sopp_wroff = soppp->sopp_wroff; 1100 if (soppp->sopp_flags & SOCKOPT_TAIL) 1101 so->so_proto_props.sopp_tail = soppp->sopp_tail; 1102 if (soppp->sopp_flags & SOCKOPT_RCVHIWAT) 1103 so->so_proto_props.sopp_rxhiwat = soppp->sopp_rxhiwat; 1104 if (soppp->sopp_flags & SOCKOPT_RCVLOWAT) 1105 so->so_proto_props.sopp_rxlowat = soppp->sopp_rxlowat; 1106 if (soppp->sopp_flags & SOCKOPT_MAXPSZ) 1107 so->so_proto_props.sopp_maxpsz = soppp->sopp_maxpsz; 1108 if (soppp->sopp_flags & SOCKOPT_MINPSZ) 1109 so->so_proto_props.sopp_minpsz = soppp->sopp_minpsz; 1110 if (soppp->sopp_flags & SOCKOPT_ZCOPY) { 1111 if (soppp->sopp_zcopyflag & ZCVMSAFE) { 1112 so->so_proto_props.sopp_zcopyflag |= STZCVMSAFE; 1113 so->so_proto_props.sopp_zcopyflag &= ~STZCVMUNSAFE; 1114 } else if (soppp->sopp_zcopyflag & ZCVMUNSAFE) { 1115 so->so_proto_props.sopp_zcopyflag |= STZCVMUNSAFE; 1116 so->so_proto_props.sopp_zcopyflag &= ~STZCVMSAFE; 1117 } 1118 1119 if (soppp->sopp_zcopyflag & COPYCACHED) { 1120 so->so_proto_props.sopp_zcopyflag |= STRCOPYCACHED; 1121 } 1122 } 1123 if (soppp->sopp_flags & SOCKOPT_OOBINLINE) 1124 so->so_proto_props.sopp_oobinline = soppp->sopp_oobinline; 1125 if (soppp->sopp_flags & SOCKOPT_RCVTIMER) 1126 so->so_proto_props.sopp_rcvtimer = soppp->sopp_rcvtimer; 1127 if (soppp->sopp_flags & SOCKOPT_RCVTHRESH) 1128 so->so_proto_props.sopp_rcvthresh = soppp->sopp_rcvthresh; 1129 if (soppp->sopp_flags & SOCKOPT_MAXADDRLEN) 1130 so->so_proto_props.sopp_maxaddrlen = soppp->sopp_maxaddrlen; 1131 if (soppp->sopp_flags & SOCKOPT_LOOPBACK) 1132 so->so_proto_props.sopp_loopback = soppp->sopp_loopback; 1133 1134 mutex_exit(&so->so_lock); 1135 1136 #ifdef DEBUG 1137 soppp->sopp_flags &= ~(SOCKOPT_MAXBLK | SOCKOPT_WROFF | SOCKOPT_TAIL | 1138 SOCKOPT_RCVHIWAT | SOCKOPT_RCVLOWAT | SOCKOPT_MAXPSZ | 1139 SOCKOPT_ZCOPY | SOCKOPT_OOBINLINE | SOCKOPT_RCVTIMER | 1140 SOCKOPT_RCVTHRESH | SOCKOPT_MAXADDRLEN | SOCKOPT_MINPSZ | 1141 SOCKOPT_LOOPBACK); 1142 ASSERT(soppp->sopp_flags == 0); 1143 #endif 1144 } 1145 1146 /* ARGSUSED */ 1147 ssize_t 1148 so_queue_msg(sock_upper_handle_t sock_handle, mblk_t *mp, 1149 size_t msg_size, int flags, int *errorp, boolean_t *force_pushp) 1150 { 1151 struct sonode *so = (struct sonode *)sock_handle; 1152 boolean_t force_push = B_TRUE; 1153 int space_left; 1154 sodirect_t *sodp = so->so_direct; 1155 1156 ASSERT(errorp != NULL); 1157 *errorp = 0; 1158 if (mp == NULL) { 1159 if (msg_size > 0) { 1160 ASSERT(so->so_downcalls->sd_recv_uio != NULL); 1161 mutex_enter(&so->so_lock); 1162 /* the notify functions will drop the lock */ 1163 if (flags & MSG_OOB) 1164 so_notify_oobdata(so, IS_SO_OOB_INLINE(so)); 1165 else 1166 so_notify_data(so, msg_size); 1167 return (0); 1168 } 1169 /* 1170 * recv space check 1171 */ 1172 mutex_enter(&so->so_lock); 1173 space_left = so->so_rcvbuf - so->so_rcv_queued; 1174 if (space_left <= 0) { 1175 so->so_flowctrld = B_TRUE; 1176 *errorp = ENOSPC; 1177 space_left = -1; 1178 } 1179 goto done_unlock; 1180 } 1181 1182 ASSERT(mp->b_next == NULL); 1183 ASSERT(DB_TYPE(mp) == M_DATA || DB_TYPE(mp) == M_PROTO); 1184 ASSERT(msg_size == msgdsize(mp)); 1185 1186 if (flags & MSG_OOB) { 1187 so_queue_oob(sock_handle, mp, msg_size); 1188 return (0); 1189 } 1190 1191 if (force_pushp != NULL) 1192 force_push = *force_pushp; 1193 1194 if (DB_TYPE(mp) == M_PROTO && !__TPI_PRIM_ISALIGNED(mp->b_rptr)) { 1195 /* The read pointer is not aligned correctly for TPI */ 1196 zcmn_err(getzoneid(), CE_WARN, 1197 "sockfs: Unaligned TPI message received. rptr = %p\n", 1198 (void *)mp->b_rptr); 1199 freemsg(mp); 1200 mutex_enter(&so->so_lock); 1201 if (sodp != NULL) 1202 SOD_UIOAFINI(sodp); 1203 mutex_exit(&so->so_lock); 1204 1205 return (so->so_rcvbuf - so->so_rcv_queued); 1206 } 1207 1208 mutex_enter(&so->so_lock); 1209 if (so->so_state & (SS_FALLBACK_DRAIN | SS_FALLBACK_COMP)) { 1210 if (sodp != NULL) 1211 SOD_DISABLE(sodp); 1212 mutex_exit(&so->so_lock); 1213 *errorp = EOPNOTSUPP; 1214 return (-1); 1215 } 1216 if (so->so_state & SS_CANTRCVMORE) { 1217 freemsg(mp); 1218 if (sodp != NULL) 1219 SOD_DISABLE(sodp); 1220 mutex_exit(&so->so_lock); 1221 return (0); 1222 } 1223 1224 /* process the mblk via I/OAT if capable */ 1225 if (sodp != NULL && sodp->sod_enabled) { 1226 if (DB_TYPE(mp) == M_DATA) { 1227 sod_uioa_mblk_init(sodp, mp, msg_size); 1228 } else { 1229 SOD_UIOAFINI(sodp); 1230 } 1231 } 1232 1233 if (mp->b_next == NULL) { 1234 so_enqueue_msg(so, mp, msg_size); 1235 } else { 1236 do { 1237 mblk_t *nmp; 1238 1239 if ((nmp = mp->b_next) != NULL) { 1240 mp->b_next = NULL; 1241 } 1242 so_enqueue_msg(so, mp, msgdsize(mp)); 1243 mp = nmp; 1244 } while (mp != NULL); 1245 } 1246 1247 space_left = so->so_rcvbuf - so->so_rcv_queued; 1248 if (space_left <= 0) { 1249 so->so_flowctrld = B_TRUE; 1250 *errorp = ENOSPC; 1251 space_left = -1; 1252 } 1253 1254 if (force_push || so->so_rcv_queued >= so->so_rcv_thresh || 1255 so->so_rcv_queued >= so->so_rcv_wanted) { 1256 SOCKET_TIMER_CANCEL(so); 1257 /* 1258 * so_notify_data will release the lock 1259 */ 1260 so_notify_data(so, so->so_rcv_queued); 1261 1262 if (force_pushp != NULL) 1263 *force_pushp = B_TRUE; 1264 goto done; 1265 } else if (so->so_rcv_timer_tid == 0) { 1266 /* Make sure the recv push timer is running */ 1267 SOCKET_TIMER_START(so); 1268 } 1269 1270 done_unlock: 1271 mutex_exit(&so->so_lock); 1272 done: 1273 return (space_left); 1274 } 1275 1276 /* 1277 * Set the offset of where the oob data is relative to the bytes in 1278 * queued. Also generate SIGURG 1279 */ 1280 void 1281 so_signal_oob(sock_upper_handle_t sock_handle, ssize_t offset) 1282 { 1283 struct sonode *so; 1284 1285 ASSERT(offset >= 0); 1286 so = (struct sonode *)sock_handle; 1287 mutex_enter(&so->so_lock); 1288 if (so->so_direct != NULL) 1289 SOD_UIOAFINI(so->so_direct); 1290 1291 /* 1292 * New urgent data on the way so forget about any old 1293 * urgent data. 1294 */ 1295 so->so_state &= ~(SS_HAVEOOBDATA|SS_HADOOBDATA); 1296 1297 /* 1298 * Record that urgent data is pending. 1299 */ 1300 so->so_state |= SS_OOBPEND; 1301 1302 if (so->so_oobmsg != NULL) { 1303 dprintso(so, 1, ("sock: discarding old oob\n")); 1304 freemsg(so->so_oobmsg); 1305 so->so_oobmsg = NULL; 1306 } 1307 1308 /* 1309 * set the offset where the urgent byte is 1310 */ 1311 so->so_oobmark = so->so_rcv_queued + offset; 1312 if (so->so_oobmark == 0) 1313 so->so_state |= SS_RCVATMARK; 1314 else 1315 so->so_state &= ~SS_RCVATMARK; 1316 1317 so_notify_oobsig(so); 1318 } 1319 1320 /* 1321 * Queue the OOB byte 1322 */ 1323 static void 1324 so_queue_oob(sock_upper_handle_t sock_handle, mblk_t *mp, size_t len) 1325 { 1326 struct sonode *so; 1327 1328 so = (struct sonode *)sock_handle; 1329 mutex_enter(&so->so_lock); 1330 if (so->so_direct != NULL) 1331 SOD_UIOAFINI(so->so_direct); 1332 1333 ASSERT(mp != NULL); 1334 if (!IS_SO_OOB_INLINE(so)) { 1335 so->so_oobmsg = mp; 1336 so->so_state |= SS_HAVEOOBDATA; 1337 } else { 1338 so_enqueue_msg(so, mp, len); 1339 } 1340 1341 so_notify_oobdata(so, IS_SO_OOB_INLINE(so)); 1342 } 1343 1344 int 1345 so_close(struct sonode *so, int flag, struct cred *cr) 1346 { 1347 int error; 1348 1349 error = (*so->so_downcalls->sd_close)(so->so_proto_handle, flag, cr); 1350 1351 /* 1352 * At this point there will be no more upcalls from the protocol 1353 */ 1354 mutex_enter(&so->so_lock); 1355 1356 ASSERT(so_verify_oobstate(so)); 1357 1358 so_rcv_flush(so); 1359 mutex_exit(&so->so_lock); 1360 1361 return (error); 1362 } 1363 1364 void 1365 so_zcopy_notify(sock_upper_handle_t sock_handle) 1366 { 1367 struct sonode *so = (struct sonode *)sock_handle; 1368 1369 mutex_enter(&so->so_lock); 1370 so->so_copyflag |= STZCNOTIFY; 1371 cv_broadcast(&so->so_copy_cv); 1372 mutex_exit(&so->so_lock); 1373 } 1374 1375 void 1376 so_set_error(sock_upper_handle_t sock_handle, int error) 1377 { 1378 struct sonode *so = (struct sonode *)sock_handle; 1379 1380 mutex_enter(&so->so_lock); 1381 1382 soseterror(so, error); 1383 1384 so_notify_error(so); 1385 } 1386 1387 /* 1388 * so_recvmsg - read data from the socket 1389 * 1390 * There are two ways of obtaining data; either we ask the protocol to 1391 * copy directly into the supplied buffer, or we copy data from the 1392 * sonode's receive queue. The decision which one to use depends on 1393 * whether the protocol has a sd_recv_uio down call. 1394 */ 1395 int 1396 so_recvmsg(struct sonode *so, struct nmsghdr *msg, struct uio *uiop, 1397 struct cred *cr) 1398 { 1399 rval_t rval; 1400 int flags = 0; 1401 t_uscalar_t controllen, namelen; 1402 int error = 0; 1403 int ret; 1404 mblk_t *mctlp = NULL; 1405 union T_primitives *tpr; 1406 void *control; 1407 ssize_t saved_resid; 1408 struct uio *suiop; 1409 1410 SO_BLOCK_FALLBACK(so, SOP_RECVMSG(so, msg, uiop, cr)); 1411 1412 if ((so->so_state & (SS_ISCONNECTED|SS_CANTRCVMORE)) == 0 && 1413 (so->so_mode & SM_CONNREQUIRED)) { 1414 SO_UNBLOCK_FALLBACK(so); 1415 return (ENOTCONN); 1416 } 1417 1418 if (msg->msg_flags & MSG_PEEK) 1419 msg->msg_flags &= ~MSG_WAITALL; 1420 1421 if (so->so_mode & SM_ATOMIC) 1422 msg->msg_flags |= MSG_TRUNC; 1423 1424 if (msg->msg_flags & MSG_OOB) { 1425 if ((so->so_mode & SM_EXDATA) == 0) { 1426 error = EOPNOTSUPP; 1427 } else if (so->so_downcalls->sd_recv_uio != NULL) { 1428 error = (*so->so_downcalls->sd_recv_uio) 1429 (so->so_proto_handle, uiop, msg, cr); 1430 } else { 1431 error = sorecvoob(so, msg, uiop, msg->msg_flags, 1432 IS_SO_OOB_INLINE(so)); 1433 } 1434 SO_UNBLOCK_FALLBACK(so); 1435 return (error); 1436 } 1437 1438 /* 1439 * If the protocol has the recv down call, then pass the request 1440 * down. 1441 */ 1442 if (so->so_downcalls->sd_recv_uio != NULL) { 1443 error = (*so->so_downcalls->sd_recv_uio) 1444 (so->so_proto_handle, uiop, msg, cr); 1445 SO_UNBLOCK_FALLBACK(so); 1446 return (error); 1447 } 1448 1449 /* 1450 * Reading data from the socket buffer 1451 */ 1452 flags = msg->msg_flags; 1453 msg->msg_flags = 0; 1454 1455 /* 1456 * Set msg_controllen and msg_namelen to zero here to make it 1457 * simpler in the cases that no control or name is returned. 1458 */ 1459 controllen = msg->msg_controllen; 1460 namelen = msg->msg_namelen; 1461 msg->msg_controllen = 0; 1462 msg->msg_namelen = 0; 1463 1464 mutex_enter(&so->so_lock); 1465 /* Set SOREADLOCKED */ 1466 error = so_lock_read_intr(so, 1467 uiop->uio_fmode | ((flags & MSG_DONTWAIT) ? FNONBLOCK : 0)); 1468 mutex_exit(&so->so_lock); 1469 if (error) { 1470 SO_UNBLOCK_FALLBACK(so); 1471 return (error); 1472 } 1473 1474 suiop = sod_rcv_init(so, flags, &uiop); 1475 retry: 1476 saved_resid = uiop->uio_resid; 1477 error = so_dequeue_msg(so, &mctlp, uiop, &rval, flags); 1478 if (error != 0) { 1479 goto out; 1480 } 1481 /* 1482 * For datagrams the MOREDATA flag is used to set MSG_TRUNC. 1483 * For non-datagrams MOREDATA is used to set MSG_EOR. 1484 */ 1485 ASSERT(!(rval.r_val1 & MORECTL)); 1486 if ((rval.r_val1 & MOREDATA) && (so->so_mode & SM_ATOMIC)) 1487 msg->msg_flags |= MSG_TRUNC; 1488 if (mctlp == NULL) { 1489 dprintso(so, 1, ("so_recvmsg: got M_DATA\n")); 1490 1491 mutex_enter(&so->so_lock); 1492 /* Set MSG_EOR based on MOREDATA */ 1493 if (!(rval.r_val1 & MOREDATA)) { 1494 if (so->so_state & SS_SAVEDEOR) { 1495 msg->msg_flags |= MSG_EOR; 1496 so->so_state &= ~SS_SAVEDEOR; 1497 } 1498 } 1499 /* 1500 * If some data was received (i.e. not EOF) and the 1501 * read/recv* has not been satisfied wait for some more. 1502 */ 1503 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 1504 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 1505 mutex_exit(&so->so_lock); 1506 flags |= MSG_NOMARK; 1507 goto retry; 1508 } 1509 1510 goto out_locked; 1511 } 1512 /* so_queue_msg has already verified length and alignment */ 1513 tpr = (union T_primitives *)mctlp->b_rptr; 1514 dprintso(so, 1, ("so_recvmsg: type %d\n", tpr->type)); 1515 switch (tpr->type) { 1516 case T_DATA_IND: { 1517 /* 1518 * Set msg_flags to MSG_EOR based on 1519 * MORE_flag and MOREDATA. 1520 */ 1521 mutex_enter(&so->so_lock); 1522 so->so_state &= ~SS_SAVEDEOR; 1523 if (!(tpr->data_ind.MORE_flag & 1)) { 1524 if (!(rval.r_val1 & MOREDATA)) 1525 msg->msg_flags |= MSG_EOR; 1526 else 1527 so->so_state |= SS_SAVEDEOR; 1528 } 1529 freemsg(mctlp); 1530 /* 1531 * If some data was received (i.e. not EOF) and the 1532 * read/recv* has not been satisfied wait for some more. 1533 */ 1534 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 1535 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 1536 mutex_exit(&so->so_lock); 1537 flags |= MSG_NOMARK; 1538 goto retry; 1539 } 1540 goto out_locked; 1541 } 1542 case T_UNITDATA_IND: { 1543 void *addr; 1544 t_uscalar_t addrlen; 1545 void *abuf; 1546 t_uscalar_t optlen; 1547 void *opt; 1548 1549 if (namelen != 0) { 1550 /* Caller wants source address */ 1551 addrlen = tpr->unitdata_ind.SRC_length; 1552 addr = sogetoff(mctlp, tpr->unitdata_ind.SRC_offset, 1553 addrlen, 1); 1554 if (addr == NULL) { 1555 freemsg(mctlp); 1556 error = EPROTO; 1557 eprintsoline(so, error); 1558 goto out; 1559 } 1560 ASSERT(so->so_family != AF_UNIX); 1561 } 1562 optlen = tpr->unitdata_ind.OPT_length; 1563 if (optlen != 0) { 1564 t_uscalar_t ncontrollen; 1565 1566 /* 1567 * Extract any source address option. 1568 * Determine how large cmsg buffer is needed. 1569 */ 1570 opt = sogetoff(mctlp, tpr->unitdata_ind.OPT_offset, 1571 optlen, __TPI_ALIGN_SIZE); 1572 1573 if (opt == NULL) { 1574 freemsg(mctlp); 1575 error = EPROTO; 1576 eprintsoline(so, error); 1577 goto out; 1578 } 1579 if (so->so_family == AF_UNIX) 1580 so_getopt_srcaddr(opt, optlen, &addr, &addrlen); 1581 ncontrollen = so_cmsglen(mctlp, opt, optlen, 1582 !(flags & MSG_XPG4_2)); 1583 if (controllen != 0) 1584 controllen = ncontrollen; 1585 else if (ncontrollen != 0) 1586 msg->msg_flags |= MSG_CTRUNC; 1587 } else { 1588 controllen = 0; 1589 } 1590 1591 if (namelen != 0) { 1592 /* 1593 * Return address to caller. 1594 * Caller handles truncation if length 1595 * exceeds msg_namelen. 1596 * NOTE: AF_UNIX NUL termination is ensured by 1597 * the sender's copyin_name(). 1598 */ 1599 abuf = kmem_alloc(addrlen, KM_SLEEP); 1600 1601 bcopy(addr, abuf, addrlen); 1602 msg->msg_name = abuf; 1603 msg->msg_namelen = addrlen; 1604 } 1605 1606 if (controllen != 0) { 1607 /* 1608 * Return control msg to caller. 1609 * Caller handles truncation if length 1610 * exceeds msg_controllen. 1611 */ 1612 control = kmem_zalloc(controllen, KM_SLEEP); 1613 1614 error = so_opt2cmsg(mctlp, opt, optlen, 1615 !(flags & MSG_XPG4_2), control, controllen); 1616 if (error) { 1617 freemsg(mctlp); 1618 if (msg->msg_namelen != 0) 1619 kmem_free(msg->msg_name, 1620 msg->msg_namelen); 1621 kmem_free(control, controllen); 1622 eprintsoline(so, error); 1623 goto out; 1624 } 1625 msg->msg_control = control; 1626 msg->msg_controllen = controllen; 1627 } 1628 1629 freemsg(mctlp); 1630 goto out; 1631 } 1632 case T_OPTDATA_IND: { 1633 struct T_optdata_req *tdr; 1634 void *opt; 1635 t_uscalar_t optlen; 1636 1637 tdr = (struct T_optdata_req *)mctlp->b_rptr; 1638 optlen = tdr->OPT_length; 1639 if (optlen != 0) { 1640 t_uscalar_t ncontrollen; 1641 /* 1642 * Determine how large cmsg buffer is needed. 1643 */ 1644 opt = sogetoff(mctlp, 1645 tpr->optdata_ind.OPT_offset, optlen, 1646 __TPI_ALIGN_SIZE); 1647 1648 if (opt == NULL) { 1649 freemsg(mctlp); 1650 error = EPROTO; 1651 eprintsoline(so, error); 1652 goto out; 1653 } 1654 1655 ncontrollen = so_cmsglen(mctlp, opt, optlen, 1656 !(flags & MSG_XPG4_2)); 1657 if (controllen != 0) 1658 controllen = ncontrollen; 1659 else if (ncontrollen != 0) 1660 msg->msg_flags |= MSG_CTRUNC; 1661 } else { 1662 controllen = 0; 1663 } 1664 1665 if (controllen != 0) { 1666 /* 1667 * Return control msg to caller. 1668 * Caller handles truncation if length 1669 * exceeds msg_controllen. 1670 */ 1671 control = kmem_zalloc(controllen, KM_SLEEP); 1672 1673 error = so_opt2cmsg(mctlp, opt, optlen, 1674 !(flags & MSG_XPG4_2), control, controllen); 1675 if (error) { 1676 freemsg(mctlp); 1677 kmem_free(control, controllen); 1678 eprintsoline(so, error); 1679 goto out; 1680 } 1681 msg->msg_control = control; 1682 msg->msg_controllen = controllen; 1683 } 1684 1685 /* 1686 * Set msg_flags to MSG_EOR based on 1687 * DATA_flag and MOREDATA. 1688 */ 1689 mutex_enter(&so->so_lock); 1690 so->so_state &= ~SS_SAVEDEOR; 1691 if (!(tpr->data_ind.MORE_flag & 1)) { 1692 if (!(rval.r_val1 & MOREDATA)) 1693 msg->msg_flags |= MSG_EOR; 1694 else 1695 so->so_state |= SS_SAVEDEOR; 1696 } 1697 freemsg(mctlp); 1698 /* 1699 * If some data was received (i.e. not EOF) and the 1700 * read/recv* has not been satisfied wait for some more. 1701 * Not possible to wait if control info was received. 1702 */ 1703 if ((flags & MSG_WAITALL) && !(msg->msg_flags & MSG_EOR) && 1704 controllen == 0 && 1705 uiop->uio_resid != saved_resid && uiop->uio_resid > 0) { 1706 mutex_exit(&so->so_lock); 1707 flags |= MSG_NOMARK; 1708 goto retry; 1709 } 1710 goto out_locked; 1711 } 1712 default: 1713 cmn_err(CE_CONT, "so_recvmsg bad type %x \n", 1714 tpr->type); 1715 freemsg(mctlp); 1716 error = EPROTO; 1717 ASSERT(0); 1718 } 1719 out: 1720 mutex_enter(&so->so_lock); 1721 out_locked: 1722 ret = sod_rcv_done(so, suiop, uiop); 1723 if (ret != 0 && error == 0) 1724 error = ret; 1725 1726 so_unlock_read(so); /* Clear SOREADLOCKED */ 1727 mutex_exit(&so->so_lock); 1728 1729 SO_UNBLOCK_FALLBACK(so); 1730 1731 return (error); 1732 } 1733 1734 sonodeops_t so_sonodeops = { 1735 so_init, /* sop_init */ 1736 so_accept, /* sop_accept */ 1737 so_bind, /* sop_bind */ 1738 so_listen, /* sop_listen */ 1739 so_connect, /* sop_connect */ 1740 so_recvmsg, /* sop_recvmsg */ 1741 so_sendmsg, /* sop_sendmsg */ 1742 so_sendmblk, /* sop_sendmblk */ 1743 so_getpeername, /* sop_getpeername */ 1744 so_getsockname, /* sop_getsockname */ 1745 so_shutdown, /* sop_shutdown */ 1746 so_getsockopt, /* sop_getsockopt */ 1747 so_setsockopt, /* sop_setsockopt */ 1748 so_ioctl, /* sop_ioctl */ 1749 so_poll, /* sop_poll */ 1750 so_close, /* sop_close */ 1751 }; 1752 1753 sock_upcalls_t so_upcalls = { 1754 so_newconn, 1755 so_connected, 1756 so_disconnected, 1757 so_opctl, 1758 so_queue_msg, 1759 so_set_prop, 1760 so_txq_full, 1761 so_signal_oob, 1762 so_zcopy_notify, 1763 so_set_error 1764 }; 1765